open my $OLD2NEW, '>', 'old2new.map'
if ($conf->{'renumber-from'} and $conf->{'original-subfield'});
my $EXMARC = 'EX';
-print $NUMARC "<collection>\n";
+print $NUMARC "<collection xmlns=\"http://www.loc.gov/MARC21/slim\">\n";
$conf->{totalrecs} = `grep -c '<record' $marcfile`;
chomp $conf->{totalrecs};
$conf->{percent} = 0;
-my @record = (); # current record storage
-my %recmeta = (); # metadata about current record
-my $ptr = 0; # record index pointer
+my @record; # current record storage
+my %recmeta; # metadata about current record
+my $ptr = 0; # record index pointer
# this is the dispatch table which drives command selection in
# edit(), below
edit("Non-numerics in tag") unless $conf->{autoscrub};
next;
}
- # test for existing 901/903 unless we're autocleaning them
- unless ($conf->{'strip9'} or $conf->{'no-strip9'}) {
- if ($match == 901 or $match == 903) {
- edit("Incoming 901/903 found in data");
- next;
- }
- }
}
# subfields can't be non-alphanumeric
write_record($NUMARC);
}
print $NUMARC "</collection>\n";
-print $OUT "\nDone. \n";
+print $OUT "\nDone. ",$conf->{ricount}," in / ",$conf->{rocount}," out \n";
#-----------------------------------------------------------------------------------
my @a = @record[0 .. $ptr - 1];
my @b = @record[$ptr + 2 .. $#record];
@record = (@a, @b);
+ @a = undef; @b = undef;
message("Empty datafield scrubbed");
$ptr = 0;
next;
my @a = @record[0 .. $ptr - 1];
my @b = @record[$ptr + 1 .. $#record];
@record = (@a, @b);
+ @a = undef; @b = undef;
message("Empty subfield scrubbed");
$ptr = 0;
next;
$l = <MARC> while (defined $l and $l !~ /<record>/);
return $l unless defined $l;
- @record = ();
- %recmeta = ();
$conf->{ricount}++;
+ for (keys %recmeta) { $recmeta{$_} = undef }
+ for (0 .. @record) { delete $record[$_] }
+
+ my $i = 0;
until ($l =~ m|</record>|) {
# clean up tags with spaces in them
$l =~ s/tag=" /tag="00/g;
# excise unwanted tags
if ($istrash) {
- if ($l =~ m|</datafield|)
- { $istrash = 0 }
- else
- { $l = <MARC>; next }
+ $istrash = 0 if ($l =~ m|</datafield|);
+ $l = <MARC>;
+ next;
}
if ($l =~ m/<datafield tag="(.{3})"/) {
if ($trash->has($1) or ($conf->{autoscrub} and $1 =~ /\D/))
{ $istrash = 1; next }
}
- push @record, $l;
+ $record[$i] = $l;
$l = <MARC>;
+ $i++;
}
- push @record, $l;
+ $record[$i] = $l;
return 1;
}
'" ind1=" " ind2=" "> <subfield code="',
$conf->{'renumber-subfield'},
'">', $recmeta{nid}, "</subfield></datafield>\n");
- my @tmp = @record[0 .. $#record - 1];
- my $last = $record[$#record];
+ my @tmp = @record[0 .. @record - 2];
+ my $last = $record[-1];
+ @record = undef;
@record = (@tmp, $renumber, $last);
+ @tmp = undef; $last = undef;
$conf->{'renumber-from'}++;
}
$recmeta{prevline} = $record[$ptr];
$record[$ptr] =~ s/$from/$to/;
+ $ofrom = undef; $to = undef; $from = undef;
print_context();
return 0;
}
my $temp = $record[$ptr];
$record[$ptr] = $recmeta{prevline};
$recmeta{prevline} = $temp;
+ $temp = undef;
print_context();
return 0;
}
my @a = @record[0 .. $ptr - 1];
my @b = @record[$ptr + 1 .. $#record];
@record = (@a, @b);
+ @a = undef; @b = undef;
print_context();
return 0;
}
my @a = @record[0 .. $ptr - 1];
my @b = @record[$ptr .. $#record];
@record = (@a, $conf->{killline}, @b);
+ @a = undef; @b = undef;
print_context();
return 0;
}
my (@explanation) = @_;
print $OUT @explanation;
$recmeta{explanation} = join(' ', 'Tag', $recmeta{tag}, @explanation);
+ @explanation = undef;
write_record($EXMARC);
return 1;
}
'original-tag|ot=i',
'original-subfield|os=s',
'script',
- 'strip9',
'no-strip9',
'trashfile|t=s',
'trashhelp',
'help|h',
);
- show_help() unless $rc;
+ show_help() unless $rc and @ARGV;
show_help() if ($c->{help});
show_trashhelp() if ($c->{trashhelp});
$c->{exception} = join('.',$c->{prefix},'exception','marc','xml');
$c->{'renumber-tag'} = 903 unless defined $c->{'renumber-tag'};
$c->{'renumber-subfield'} = 'a' unless defined $c->{'renumber-subfield'};
- $c->{window} = 5;
+ $c->{window} = 9;
if ($c->{trashfile}) {
$c->{trash} = Equinox::Migration::SimpleTagList->new(file => $conf->{trashfile})
} else {
$c->{trash} = Equinox::Migration::SimpleTagList->new;
}
- # remove original id sequence tag from trash hash if we know it
- $c->{trash}->remove_tag($c->{'original-tag'})
- if ( $c->{'original-tag'} and $c->{trash}->has($c->{'original-tag'}) );
-
- # autotrash 901, 903 if strip-nines
- if ($c->{'strip9'}) {
+ # autotrash 901, 903 unless no strip-nines
+ unless ($c->{'no-strip9'}) {
$c->{trash}->add_tag(901);
$c->{trash}->add_tag(903);
}
+ # remove original id sequence tag from trash hash if we know it
+ $c->{trash}->remove_tag($c->{'original-tag'})
+ if ( $c->{'original-tag'} and $c->{trash}->has($c->{'original-tag'}) );
my @keys = keys %{$c};
show_help() unless (@ARGV and @keys);
--autoscrub -a Automatically remove non-numeric tags in data
--nocollapse -n Don't compress records to one line on output
- --strip9 Automatically remove any existing 901/903 tags in data
- --no-strip9 Don't complain about 901/903 tags in data
+ --no-strip9 Don't autoremove 901/903 tags in data
--trashfile -t File containing trash tag data (see --trashhelp)