#!/usr/local/bin/perl # # TxtIndex - Create look-up index file for /src/txt directory. # V1.1 pl 00 -- 25 Nov 1997 # # Written by # Jouko Valta (jopi@x2ftp.oulu.fi) # # Locate files on FTP and text directories and find matching pairs. # # # Revision History # V1.0 pl 00 -- 8 Apr 1997 # Initial revision. # # V1.1 pl 00 -- 25 Nov 1997 # Misc options, comments, etc. # Added LongDescription Module Files. # Sort the Keywords file. # # $Usage = "\ Options:\ -h Show help page and exit\ -k Sort Keywords file as well\ -nk Suppress sorting the Keywords file\ -c,-nw Don't update index file\ -w Write updated index file\n\n"; # ----------------------------------------------------------------------------- ## Default Filenames $LOGHOME = "/pc/log"; $TEXTDIR = "/src/txt"; $FTPDIR = "/ftp-service/ftp/pub/msdos"; $INDEXDIR = $LOGHOME . "/programming"; # Re-formatted TXT files. $TARGET = $TEXTDIR . "/00index.txt"; $FIND_PROGS = "/bin/find $FTPDIR/programming -type f -print | /bin/cut -c40-"; $FIND_MSDOS = "/bin/find $FTPDIR/arcers $FTPDIR/virus -type f -print | /bin/cut -c28-"; $SORT = "/bin/sort"; ## Defaults $CntOnly = 0; $SortKeys = 0; $DoLDesc = 1; ## Command line options while ($_ = $ARGV[0], /^-/) { shift; last if /^--$/; if (/^-h/) { print $Usage; exit 2; } if (/^-k/) { $SortKeys = 1; next; } # Update keywords. if (/^-nk/) { $SortKeys = 0; next; } if (/^-c|^-nw/) { $CntOnly = 1; next; } # No index update. if (/^-w/) { $CntOnly = 0; next; } if (/^-l/) { $DoLDesc = 1; next; } # Check LDM Files. if (/^-nl/) { $DoLDesc = 0; next; } if (/^-/) { die "Unsupported argument near $_\n Stopped"; } } # ----------------------------------------------------------------------------- # # Initialize Counters # $FtpFiles = 0; $TxtFiles = 0; # Txt Files $MatchLess = 0; # No mainfile $MatchOne = 0; $MatchMany = 0; # Multiple mainfiles $LDescFiles = 0; # Long Description Module Files $LDescOrphan = 0; $LDMatchOne = 0; $LDMatchMany = 0; # Multiple mainfiles # ----------------------------------------------------------------------------- # Check Long Description Module File matches # # *.txt *.lsm # others => error # if ($DoLDesc) { open (T , "/bin/find $INDEXDIR -type f -print |") || die "Can't read $INDEXDIR"; print "\nChecking files on $INDEXDIR.\n"; while () { chop; # print "$_\n"; if (/^\s*$/) { next line; } # Empty line if (/^00|\/00|00\_*index|index.html/) { next line; } # Index file if (/rejected\.\d+/) { next line; } # Info file # Takes the last slash by default. if (/\S*\/(\S+\/)(\S+)$/) { # File in LDM directory $dir = $1; $file = $2; $txt = get_txt_name ($file); # '-' for no txt # print "$dir\t$file\t$txt\n"; if ($dir ne "programming/") { $IndexCategory{$txt} = $dir; # '-' for no txt } $IndexName{$txt} = $file; # '-' for no txt # Update Count ++$IndexMatch{$txt}; # '-' for no txt ++$LDescFiles; } # if (!$IndexMatch{$_}) { # '-' for no txt # printf "No mainfile for $_.\n"; ## printf OF (" -\t\t\t-\t\t%s\n", $_); # ++$LDescOrphan; # } # elsif ($IndexMatch{$_} > 1) { # '-' for no txt # printf "Multiple matches for $_.\n"; # Multiple mainfiles ## ++$LDMatchMany; # } # else { # ++$LDMatchOne; # } } # while close T; print "\n"; } # Do LDesc $IndexCategory{"-"} = ""; # '-' for no txt $IndexName{"-"} = "-"; $date = `/bin/date`; # ----------------------------------------------------------------------------- # Get the FTP directory listing ... # open (foo, "$FIND_PROGS |$SORT |") || die "Can't excute '$FIND_PROGS | $SORT'"; if (! $CntOnly) { open (OF, ">$TARGET"); print "\nCreating $TARGET\n"; print OF "% IndexFile Format v1.0 -- File created $date\n"; # Double LF } print "\nChecking files on $FTPDIR.\n"; process_input(); open (foo, "$FIND_MSDOS |$SORT |") || die "Can't excute '$FIND_MSDOS | $SORT'"; process_input(); # ----------------------------------------------------------------------------- # # Check TXT File matches # Count matching files for each textfile found. # open (T , "/bin/ls -1 $TEXTDIR |") || die "Can't read $TEXTDIR"; print "Checking for matches on $TEXTDIR.\n"; while () { chop; ++$TxtFiles; if (!$TxtMatch{$_}) { # '-' for no txt printf "No mainfile for $_.\n"; printf OF (" -\t\t\t-\t\t%-8s", $_); if ($IndexName{$_}) { printf OF ("\t%s%s\n", $_, $IndexCategory{$_}, $IndexName{$_}); } else { printf OF ("\t-\n"); } ++$MatchLess; } elsif ($TxtMatch{$_} > 1) { # '-' for no txt printf "Multiple matches for $_.\n"; # Multiple mainfiles ++$MatchMany; } else { ++$MatchOne; } } # while close T; # ----------------------------------------------------------------------------- # # Print Summary # printf "\n\n"; printf "\t%4d files on FTP archive. (Index files not included.)\n", $FtpFiles; printf "\t%4d txtfiles.\n", $TxtFiles; ($DoLDesc) && printf "\t%4d long description text files.\n", $LDescFiles; printf "\n"; printf "\t%4d textfiles don't have a main file.\n", $MatchLess; printf "\t%4d textfiles shared by multiple files.\n", $MatchMany; printf "\t%4d files uniquely have a txt file.\n", $MatchOne; printf "\t%4d files don't have a txt file.\n", $TxtMatch{"-"}; printf "\n"; if ($DoLDesc) { printf "\t%4d long text files don't have a main file.\n", $LDescOrphan; printf "\t%4d long text files shared by multiple files.\n", $LDMatchMany; printf "\t%4d files uniquely have a long txt file.\n", $LDMatchOne; printf "\t%4d (txt) files don't have a long txt file.\n", $IndexMatch{"-"}; printf "\n"; } printf OF "\n"; close OF; chmod 0664, $TARGET; print "Done.\n"; # ----------------------------------------------------------------------------- # # For each mainfile, define the corresponding textfile name and count # matches. # sub process_input { line: while () { # print $_; if (/^\s*$/) { next line; } # Empty line if (/^00|\/00|00\_*index|index.html/) { next line; } # Index file if (/rejected\.\d+/) { next line; } # Info file # Takes the last slash by default. if (/(\S+)\/(\S+)/) { # File in any FTP directory $dir = $1; $file = $2; $txt = get_txt_name ($file); # '-' for no txt printf OF (" %-16s\t%-8s\t%-8s\t%s%s\n", $dir, $file, $txt, $IndexCategory{$txt}, $IndexName{$txt}); # Update Count ++$TxtMatch{$txt}; # '-' for no txt ++$FtpFiles; } } # while } # process_input # ----------------------------------------------------------------------------- sub get_txt_name { local ($mainfile, $foo) = @_; local ($txtfile, $ext) = split (/\./, $mainfile); # $descfile = $txtfile . ".txt"; $txtfile = $txtfile . ".txt"; # print "$mainfile $txtfile\n"; if (-f "$TEXTDIR/$txtfile") { return $txtfile; } return "-"; } # ----------------------------------------------------------------------------- # # NOTE: 'textindex' and 'longindex.pl' expect .txt ot .lsm extensions only. # (Counting on 'dosadm') # # This is not very reliable method -- should use FIND to work out subdirs etc. # sub get_desc_name { local ($mainfile, $foo) = @_; local ($descfile, $ext) = split (/\./, $mainfile); # $descfile = $descfile . ".txt"; # print "$mainfile $descfile\n"; if (-f "$INDEXDIR/$descfile.txt") { return "$descfile.txt"; } if (-f "$INDEXDIR/$descfile.lsm") { return "$descfile.lsm"; } return "-"; }