#!/usr/local/bin/perl
# HTML fixer. This script travels a directory tree looking for files that
# end in .htm, .html, .HTM or .HTML, and changes all occurances of the
# strings .htm", .gif" and .html" to upper case, and changes .html to .htm
# the purpose is to alter html files to conform with ISO9660 file naming
# conventions. This tool is designed to be used with the upcase tool.
# as an added bonus, this renames .html files to .htm (ignoring case).
# original coding - Barry Zickuhr, 4/11/96
###########################################################################
# first, check to see if there are any arguments. If none, print usage
$num = @ARGV;
if ( $num == 0 ) {
die "Usage: htmfix.pl <directory>\n";
}
# Check to see if object is file or link. If so, rename it now and exit
$file = @ARGV[0];
if ( -f $file || -l $file ) {
&edit($file) || die "Couldn't fix file $file \n"
# check to see if it is a directory. If so, call "rendir" subroutine with
# this name
} elsif ( -d $file ) {
$topdir = $file;
&dirtrack("$file");
# Neither file, link or directory. Either it is not here, or it is a special
# file that needs to be handled elsewhere
} else { die "This is not a normal file - please double check it and edit
manually";
}
# run through a directory tree, editing the files as you find them w/
&edit
# &dirtrack(directory);
sub dirtrack {
# localize and initialize variables
local($dir, $file, $DIR);
$dir = $_[0];
$DIR = "\U$dir";
# open directory handle and move to that directory
opendir($DIR, "$dir") || die "Couldn't open $dir.\n";
chdir("$dir") || die "Couldn't change to $dir.\n";
# read through the directory, setting "$file" to the names in the dirctory
# if $file is a directory, recursively call this subroutine.
while ($file = readdir($DIR)) {
if (-d $file) {
# don't forget to skip . and ..
if ($file ne "." && $file ne "..")
{&dirtrack("$file");}
}
# it's a file - call &edit on it
else { &edit($file); }
}
# go home, close the dir-handle
chdir("..");
closedir("$DIR");
}
# Edit subroutine - open file, open /tmp/file, edit it, then remove and copy
the one from
# tmp. Ignore file completely if it is not an html file
sub edit {
# localize variables, and get called argument
local($file, $name);
$file = $_[0];
# If the filename has ".htm" in it we assume it is .html, .htm, .HTML or .HTM,
and we
# edit it.
# if it is not an html file, we ignore it
if ( $file =~ /.htm/i ) {
# leeeeets EDIT! input of file, output of /tmp/file
open(IN,"$file");
open(OUT,">/tmp/$file");
while(<IN>) {
# these 3 subs upcase a string of letters (either case), numbers, ".", "-", "_"
or "+" ending
# in .gif", .htm" or .html".
s/([a-zA-Z0-9\.\-\_]*.gif")/\U$1\E/g;
s/([a-zA-Z0-9\.\-\_]*.jpg")/\U$1\E/g;
s/([a-zA-Z0-9\.\-\_]*.htm")/\U$1\E/g;
s/([a-zA-Z0-9\.\-\_]*.html")/\U$1\E/g;
# This changes .HTML" to .HTM" (iso9660 stuff ya know)
s/.HTML"/.HTM"/g;
# All substitutions complete - print it
print OUT $_;
}
# Close output files
close(IN);
close(OUT);
# change .html file names to .htm. Ignore case in case upcase.pl has not been
run yet
$name = $file;
if ( $file =~ /.html$/i ) { chop($name); }
# remove the local file.
unlink("$file");
# input is now the /tmp version of the file - output is our massaged filename
($name)
open(IN,"/tmp/$file");
open(OUT,">$name");
# standard perl copy
while(<IN>) {
print OUT $_;
}
# close and clean up /tmp
close(IN);
close(OUT);
unlink("/tmp/$file");
}
}