#!/usr/local/bin/perl # HTML fixer. This script travels a directory tree looking for files that # end in .htm, .html, .HTM or .HTML, and changes all occurances of the # strings .htm", .gif" and .html" to upper case, and changes .html to .htm # the purpose is to alter html files to conform with ISO9660 file naming # conventions. This tool is designed to be used with the upcase tool. # as an added bonus, this renames .html files to .htm (ignoring case). # original coding - Barry Zickuhr, 4/11/96 ########################################################################### # first, check to see if there are any arguments. If none, print usage $num = @ARGV; if ( $num == 0 ) { die "Usage: htmfix.pl <directory>\n"; } # Check to see if object is file or link. If so, rename it now and exit $file = @ARGV[0]; if ( -f $file || -l $file ) { &edit($file) || die "Couldn't fix file $file \n" # check to see if it is a directory. If so, call "rendir" subroutine with # this name } elsif ( -d $file ) { $topdir = $file; &dirtrack("$file"); # Neither file, link or directory. Either it is not here, or it is a special # file that needs to be handled elsewhere } else { die "This is not a normal file - please double check it and edit manually"; } # run through a directory tree, editing the files as you find them w/ &edit # &dirtrack(directory); sub dirtrack { # localize and initialize variables local($dir, $file, $DIR); $dir = $_[0]; $DIR = "\U$dir"; # open directory handle and move to that directory opendir($DIR, "$dir") || die "Couldn't open $dir.\n"; chdir("$dir") || die "Couldn't change to $dir.\n"; # read through the directory, setting "$file" to the names in the dirctory # if $file is a directory, recursively call this subroutine. while ($file = readdir($DIR)) { if (-d $file) { # don't forget to skip . and .. if ($file ne "." && $file ne "..") {&dirtrack("$file");} } # it's a file - call &edit on it else { &edit($file); } } # go home, close the dir-handle chdir(".."); closedir("$DIR"); } # Edit subroutine - open file, open /tmp/file, edit it, then remove and copy the one from # tmp. Ignore file completely if it is not an html file sub edit { # localize variables, and get called argument local($file, $name); $file = $_[0]; # If the filename has ".htm" in it we assume it is .html, .htm, .HTML or .HTM, and we # edit it. # if it is not an html file, we ignore it if ( $file =~ /.htm/i ) { # leeeeets EDIT! input of file, output of /tmp/file open(IN,"$file"); open(OUT,">/tmp/$file"); while(<IN>) { # these 3 subs upcase a string of letters (either case), numbers, ".", "-", "_" or "+" ending # in .gif", .htm" or .html". s/([a-zA-Z0-9\.\-\_]*.gif")/\U$1\E/g; s/([a-zA-Z0-9\.\-\_]*.jpg")/\U$1\E/g; s/([a-zA-Z0-9\.\-\_]*.htm")/\U$1\E/g; s/([a-zA-Z0-9\.\-\_]*.html")/\U$1\E/g; # This changes .HTML" to .HTM" (iso9660 stuff ya know) s/.HTML"/.HTM"/g; # All substitutions complete - print it print OUT $_; } # Close output files close(IN); close(OUT); # change .html file names to .htm. Ignore case in case upcase.pl has not been run yet $name = $file; if ( $file =~ /.html$/i ) { chop($name); } # remove the local file. unlink("$file"); # input is now the /tmp version of the file - output is our massaged filename ($name) open(IN,"/tmp/$file"); open(OUT,">$name"); # standard perl copy while(<IN>) { print OUT $_; } # close and clean up /tmp close(IN); close(OUT); unlink("/tmp/$file"); } }