#!/usr/bin/perl use strict; # Author: Laura Melton, ITS Student Intern, KCLS # Date: September 2003 # Usage: # FormatReaderPulls.pl [-o ] [-e ] [-s ] # This program cleans up book-alert pulls. It is meant to be called from and # to work with the book-alert VOC (which I called BOOK.ALERTS). Its purpose # is to take the separate files which the VOC produces for each category and # to reformat those files into one, with the category name at the front of each # line of text. # It works by reading the &SAVEDLISTS& directory and finding, one by one, all # the files ending with the specified string (Default is ".BAO"). When the # script finds such a file, it opens the file and spews its contents to another # output file, prepending the category code to each line. (The script knows the # category code because it's the first part of the filename, the part that's not # the extension.) It then deletes the file and goes on to the next one. # This script should not have to be edited when making routine changes to the # BOOK.ALERT VOC. # If you want to change the location of the output file or the input-file # extension, or if the save-list location should change for some reason, edit # the values of $outputfile, $extension, or $savelistdir accordingly. # These values can more easily be overridden from the command line. # Default directory/outputfile/extension my $outputfile = '/tmp/bookalerts.txt'; my $extension = '.BAO'; my $savelistdir = './&SAVEDLISTS&'; # Override directory/outputfile/extension from command line my $arg; while ($arg = shift) { $outputfile = shift if ($arg =~ /^-o/); $extension = shift if ($arg =~ /^-e/); $savelistdir = shift if ($arg =~ /^-s/); } # Make sure the directory path doesn't end with / if ($savelistdir =~ /^(.*)\/$/) { $savelistdir = $1; } # Set-up for correcting diacritics (makes Perl code later easier, promise) my $code; my $base; my $entity; # hash of hashes, holds data: code, base letter, accented letter # see http://www.uottawa.ca/library/tsd/iiidiacritics.html my %charcodes = ( '225' => { # grave 'A' => 'À', 'a' => 'à', 'E' => 'È', 'e' => 'è', 'I' => 'Ì', 'i' => 'ì', 'O' => 'Ò', 'o' => 'ò', 'U' => 'Ù', 'u' => 'ù', }, '226' => { # acute 'A' => 'Á', 'a' => 'á', 'E' => 'É', 'e' => 'é', 'I' => 'Í', 'i' => 'í', 'O' => 'Ó', 'o' => 'ó', 'U' => 'Ú', 'u' => 'ú', 'Y' => 'Ý', 'y' => 'ý', }, '227' => { # circonflex 'A' => 'Â', 'a' => 'â', 'E' => 'Ê', 'e' => 'ê', 'I' => 'Î', 'i' => 'î', 'O' => 'Ô', 'o' => 'ô', 'U' => 'Û', 'u' => 'û', }, '228' => { # tilde 'A' => 'Ã', 'a' => 'ã', 'O' => 'Õ', 'o' => 'õ', 'N' => 'Ñ', 'n' => 'ñ', }, '232' => { # umlaut 'A' => 'Ä', 'a' => 'ä', 'E' => 'Ë', 'e' => 'ë', 'I' => 'Ï', 'i' => 'ï', 'O' => 'Ö', 'o' => 'ö', 'U' => 'Ü', 'u' => 'ü', 'Y' => 'Ÿ', 'y' => 'ÿ', }, '233' => { # caron 'S' => 'Š', 's' => 'š', 'Z' => 'Ž', 'z' => 'ž', }, '234' => { # angstrom 'A' => 'Å', 'a' => 'å', }, '240' => { # cedilla 'C' => 'Ç', 'c' => 'ç', }, ); my $filename; my $line; my $outputline; my $category; open (OUTPUT,">$outputfile") or die "Could not open $outputfile for writing, exiting.\n"; # Open save-list directory opendir (SAVEDLISTS, "$savelistdir") or die "Could not open $savelistdir, exiting.\n"; LOOP: while ($filename = readdir (SAVEDLISTS)) { if ($filename =~ /^([A-Z0-9]*)${extension}$/) { $category = $1; # grouped section of filename is category unless (open(INPUT,"$savelistdir/$filename")) { print STDOUT "Could not open $savelistdir/$filename for reading.\n"; next LOOP; } print STDOUT " Processing list: $category"; while($line = ) { chomp($line); # remove ending carriage return next if ($line =~ /^(|\s*)$/); # skip to next iteration if the line doesn't contain any data # if title ends with '/', remove it $line = $1.$2 if ($line =~ /\A([^\|]*\|[^\|]*)\/(|.*)\Z/); # replace ampersands (before other character codes) # $line =~ s/&/&/g; # replace foreign characters (using previously-defined hashes) foreach $code (keys %charcodes) { while (($base,$entity) = each %{$charcodes{$code}}) { $line =~ s/\\AN$code(|\s)\\$base/$entity/g; } } # remove any remaining occurrences of \ANxxx\ $line =~ s/\\AN[\d]+(|\s)\\//g; # If the line begins with a pipe, this is the first line of a book. # print the current output line (unless it's blank/undefined, as it # will be at first) and assign the new line to the output line # variable. Otherwise append it to the output line. if ($line =~ /^\|/) { print OUTPUT "$outputline\n" unless ($outputline =~ /^$/); $outputline = "$category $line"; } else { $outputline .= " $line"; #$outputline .= " *** $line"; # Paragraph-break markers? } } close INPUT; print STDOUT " ... done\n"; unlink "$savelistdir/$filename"; # delete input file when processing done } } closedir SAVEDLISTS; print OUTPUT "$outputline\n"; # print last output line close OUTPUT; print "Reformatting finished. Output file is $outputfile.\n";