use Getopt::Long;
use Term::ReadLine;
+binmode STDOUT, ":utf8";
my $term = new Term::ReadLine 'yaz-cleanup';
my $OUT = $term->OUT || \*STDOUT;
populate_trash() if ($conf->{trashfile});
# set up files, since everything appears to be in order
-open MARC, '<:utf8', (shift || 'incoming.marc.xml')
+my $marcfile = shift || 'incoming.marc.xml';
+open MARC, '<:utf8', $marcfile
or die "Can't open input file $!\n";
open my $NUMARC, '>:utf8', $conf->{output}
or die "Can't open output file $!\n";
my $EXMARC = 'EX';
print $NUMARC "<collection>\n";
+$conf->{totalrecs} = `grep -c '<record' $marcfile`;
+chomp $conf->{totalrecs};
+$conf->{percent} = 0;
+
my @record = (); # current record storage
my %recmeta = (); # metadata about current record
my $ptr = 0; # record index pointer
help => \&help,
);
-my @spinner = qw(- / | \\);
+my @spinner = qw(- \\ | /);
my $sidx = 0;
while ( buildrecord() ) {
- unless ($conf->{ricount} % 100) {
- print "\rWorking... ", $spinner[$sidx];
+ unless ($conf->{ricount} % 50) {
+ $conf->{percent} = int(($conf->{ricount} / $conf->{totalrecs}) * 100);
+ print "\rWorking (",$conf->{percent},"%) ", $spinner[$sidx];
$sidx = ($sidx == $#spinner) ? 0 : $sidx + 1;
}
$ptr = 0;
until ($ptr == $#record) {
+ # get datafield/tag data if we have it
+ my $rc = stow_record_data();
+ return $rc if $rc;
+
# naked ampersands
if ($record[$ptr] =~ /&/ && $record[$ptr] !~ /&\w+?;/)
{ edit("Naked ampersand"); $ptr= 0; next }
next;
}
# test for existing 901/903 unless we're autocleaning them
- unless ($conf->{'strip-nines'}) {
+ unless ($conf->{'strip9'} or $conf->{'no-strip9'}) {
if ($match == 901 or $match == 903) {
edit("Incoming 901/903 found in data");
next;
sub do_automated_cleanups {
$ptr = 0;
until ($ptr == $#record) {
- # get datafield/tag data if we have it
- my $rc = stow_record_data();
- return $rc if $rc;
-
# catch empty datafield elements
if ($record[$ptr] =~ m/<datafield tag="..."/) {
if ($record[$ptr + 1] =~ m|</datafield>|) {
'original-tag|ot=i',
'original-subfield|os=s',
'script',
- 'strip-nines',
+ 'strip9',
+ 'no-strip9',
'trashfile|t=s',
'trashhelp',
'help|h',
$c->{window} = 5;
# autotrash 901, 903 if strip-nines
- if ($c->{'strip-nines'}) {
+ if ($c->{'strip9'}) {
$c->{trash}{901} = 1;
$c->{trash}{903} = 1;
}
--autoscrub -a Automatically remove non-numeric tags in data
--nocollapse -n Don't compress records to one line on output
- --strip-nines Automatically remove any existing 901/903 tags in data
+ --strip9 Automatically remove any existing 901/903 tags in data (reversible)
--trashfile -t File containing trash tag data (see --trashhelp)