#!/usr/bin/perl

#sgml2html is part of sgmltools, which you can get from
#http://pobox.com/~cg/sgmltools
#tidy (html-tidy) is available from http://tidy.sourceforge.net/
#perl is available from http://www.cpan.org/

$sgml2html = "/usr/bin/sgml2html";
$tidy = "/usr/bin/tidy";
$style = "   <style type=\"text/css\">\n    <!--\n"
  ."      body {background-color:#f0fff0;\n"
  ."            font-family:helvetica,arial,sans-serif}\n"
  ."      a:link {text-decoration:none;color:#0000ff}\n"
  ."      a:visited {text-decoration:none;color:#7f007f}\n"
  ."      a:hover {text-decoration:none; border-width:thin;\n"
  ."               border-style:dotted; background-color:#f0fff0}\n"
  ."      a:focus {text-decoration:none; background-color:#e0eee0;"
  ." color:#000000}\n"
  ."      a:active {text-decoration:none; background-color:#e0eee0;"
  ." color:#000000}\n"
  ."     -->    </style>";

if( $ARGV[0] =~ m/[012]/){
  $split = $ARGV[0];
} else {
  $split = 1;
}

print "Copyright (c) 2002 John D Lamb\n\n";
print "This program is free software; you can redistribute ";
print "it and/or modify it under the terms of the GNU ";
print "General Public License as published by the Free Software Foundation; ";
print "either version 2 of the license, or (at your option) any later ";
print "version.\n\n";;
print "This program is distributed in the hope that it will be useful, ";
print "but WITHOUT ANY WARRANTY; without even the implied warranty of ";
print "MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ";
print "GNU General Public License for more details.\n\n";
print "You should have received a copy of the GNU General Public License ";
print "along with this program; if not, write to the Free Software ";
print "Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111 USA\n\n";

#Find files in current directory
opendir(DIR, ".");
@dirlist = grep(!/^\.\.?$/, readdir DIR);
closedir DIR;

#Convert only sgml files
foreach(@dirlist){
  if( /\.sgml$/ ){
    system "$sgml2html -split $split $_";
  }
}

#Find all the new files
opendir(DIR, ".");
@dirlist = grep(!/^\.\.?$/, readdir DIR);
closedir DIR;

#Deal with each html file in turn
foreach(@dirlist){
  if( /\.html$/ ){
    cleanup( $_ );
  } 
}

sub cleanup{ #filename
  my $filename = $_[0];
  my $meta = "<meta http-equiv=\"Content-Type\""
    ." content=\"text/html; charset=is0-8859-1\"/>";
  #now read the entire file
  open(READFILE,$filename);
  my $file_contents = join( ' ', (<READFILE>) );
  close(READFILE);
  #make sure that the file has a lang attribute in html
  unless( $file_contents =~ m/<html[^>]*lang/i ){
    $file_contents =~ s/<html/<html lang="EN"/i;
  }
  $file_contents =~ s/<meta[^>]*http-equiv/<meta[^>]*http-equiv/gi;
  $file_contents =~ s/<meta[^>]*http-equiv[^>]*=[^>]"Content-Type"[^>]*>//;
  $file_contents =~ s/<head[^>]*>/$1$meta/i;
  unless( $file_contents =~ m%<style[^>]*>% ){
    $file_contents =~ s</head[^>]*>$style$1i;
  }
  open(WRITEFILE,">$filename");
  print WRITEFILE $file_contents;
  close(READFILE);
  #tidy up file
  system "$tidy -asxml -indent -modify $filename";
}
