#!/usr/local/bin/perl

# HTML fixer.  This script travels a directory tree looking for files that
# end in .htm, .html, .HTM or .HTML, and changes all occurances of the
# strings .htm", .gif" and .html" to upper case, and changes .html to .htm

# the purpose is to alter html files to conform with ISO9660 file naming
# conventions.  This tool is designed to be used with the upcase tool.

# as an added bonus, this renames .html files to .htm (ignoring case).

# original coding - Barry Zickuhr, 4/11/96

###########################################################################
# first, check to see if there are any arguments.  If none, print usage

$num = @ARGV;
if ( $num == 0 ) {
	die "Usage: htmfix.pl <directory>\n";
}

# Check to see if object is file or link.  If so, rename it now and exit

$file = @ARGV[0];
if ( -f $file || -l $file ) {
	&edit($file) || die "Couldn't fix file $file \n"

# check to see if it is a directory.  If so, call "rendir" subroutine with
# this name

} elsif ( -d $file ) {
	$topdir = $file;
	&dirtrack("$file");

# Neither file, link or directory.  Either it is not here, or it is a special
# file that needs to be handled elsewhere

} else { die "This is not a normal file - please double check it and edit
manually";
}

# run through a directory tree, editing the files as you find them w/
&edit
#       &dirtrack(directory);

sub dirtrack {

# localize and initialize variables

        local($dir, $file, $DIR);
        $dir = $_[0];
        $DIR = "\U$dir";

# open directory handle and move to that directory

        opendir($DIR, "$dir") || die "Couldn't open $dir.\n";
        chdir("$dir") || die "Couldn't change to $dir.\n";

# read through the directory, setting "$file" to the names in the dirctory
# if $file is a directory, recursively call this subroutine. 

        while ($file = readdir($DIR)) {
                if (-d $file) {

# don't forget to skip . and ..

                        if ($file ne "." && $file ne "..")
{&dirtrack("$file");}
                }

# it's a file - call &edit on it

		else { &edit($file); }
        }

# go home, close the dir-handle 

        chdir("..");
        closedir("$DIR");
}

# Edit subroutine - open file, open /tmp/file, edit it, then remove and copy
the one from
# tmp.  Ignore file completely if it is not an html file

sub edit {

# localize variables, and get called argument

	local($file, $name);
	$file = $_[0];

# If the filename has ".htm" in it we assume it is .html, .htm, .HTML or .HTM,
and we 
# edit it.
# if it is not an html file, we ignore it

	if ( $file =~ /.htm/i ) {

# leeeeets EDIT! input of file, output of /tmp/file

		open(IN,"$file");
		open(OUT,">/tmp/$file");
		while(<IN>) {

# these 3 subs upcase a string of letters (either case), numbers, ".", "-", "_"
or "+" ending
# in .gif", .htm" or .html".  

			s/([a-zA-Z0-9\.\-\_]*.gif")/\U$1\E/g;
			s/([a-zA-Z0-9\.\-\_]*.jpg")/\U$1\E/g;
			s/([a-zA-Z0-9\.\-\_]*.htm")/\U$1\E/g;
			s/([a-zA-Z0-9\.\-\_]*.html")/\U$1\E/g;

# This changes .HTML" to .HTM" (iso9660 stuff ya know)

			s/.HTML"/.HTM"/g;

# All substitutions complete - print it

			print OUT $_;
		}

# Close output files

		close(IN);
		close(OUT);

# change .html file names to .htm.  Ignore case in case upcase.pl has not been
run yet

		$name = $file;
		if ( $file =~ /.html$/i ) { chop($name); }

# remove the local file.  

		unlink("$file");

# input is now the /tmp version of the file - output is our massaged filename
($name)

		open(IN,"/tmp/$file");
		open(OUT,">$name");

# standard perl copy

		while(<IN>) {
			print OUT $_;
		}

# close and clean up /tmp

		close(IN);
		close(OUT);
		unlink("/tmp/$file");
	} 
}