From: Shawn Boyette Date: Wed, 24 Jun 2009 19:20:19 +0000 (+0000) Subject: old2new generation now handles controlfields and will map any number of matches,... X-Git-Url: http://git.equinoxoli.org/?p=migration-tools.git;a=commitdiff_plain;h=10808752247bab3066a1bdfb69926cd4b2928ff8 old2new generation now handles controlfields and will map any number of matches, not just one-per-record --- diff --git a/marc_cleanup b/marc_cleanup index 61c715c..7f22835 100755 --- a/marc_cleanup +++ b/marc_cleanup @@ -26,7 +26,7 @@ open MARC, '<:utf8', $marcfile open my $NUMARC, '>:utf8', $conf->{output} or die "Can't open output file $!\n"; open my $OLD2NEW, '>', 'old2new.map' - if ($conf->{'renumber-from'} and $conf->{'original-subfield'}); + if ($conf->{'renumber-from'} and $conf->{'original-tag'}); my $EXMARC = 'EX'; print $NUMARC "\n"; @@ -76,7 +76,7 @@ while ( buildrecord() ) { $ptr = 0; until ($ptr == $#record) { # get datafield/tag data if we have it - my $rc = stow_record_data(); + $rc = stow_record_data() if ($conf->{'renumber-from'} and $conf->{'original-tag'}); return $rc if $rc; # naked ampersands @@ -110,7 +110,7 @@ while ( buildrecord() ) { write_record($NUMARC); } print $NUMARC "\n"; -print $OUT "\nDone. ",$conf->{ricount}," in / ",$conf->{rocount}," out \n"; +print $OUT "\nDone. ",$conf->{ricount}," in; ",$conf->{rocount}," dumped \n"; #----------------------------------------------------------------------------------- @@ -186,37 +186,54 @@ sub do_automated_cleanups { sub stow_record_data { # get tag data if we're looking at it - if ($record[$ptr] =~ m/{'original-subfield'} and $recmeta{tag} == $conf->{'original-tag'}) { - my $line = $record[$ptr]; my $lptr = $ptr; - my $osub = $conf->{'original-subfield'}; - $recmeta{oid} = 'NONE'; - - # skim to end of this tag - until ($line =~ m||) { - if ($line =~ /(.+?){'original-tag'}) { + my $oid = 0; + if ($tag < 10) { + # controlfield + if ($record[$ptr] =~ m|(.+?)|) + { $oid = $1; print $OLD2NEW "$oid\t", $recmeta{nid}, "\n" } + } elsif ($tag >= 10 and $conf->{'original-subfield'}) { + # datafield + my $line = $record[$ptr]; my $lptr = $ptr; + my $osub = $conf->{'original-subfield'}; + # skim to end of this tag + until ($line =~ m||) { + if ($line =~ /(.+?)new map file + if ($conf->{'renumber-from'} and $conf->{'original-subfield'}) { + } + } } return 0; @@ -236,6 +253,9 @@ sub edit { my ($msg) = @_; return if $conf->{trash}->has( $recmeta{tag} ); + if ( $conf->{fullauto} ) + { dump_record($msg); return } + $conf->{editmsg} = $msg; print_fullcontext(); @@ -306,11 +326,25 @@ sub buildrecord { { $istrash = 1; next } } - $record[$i] = $l; + push @record, $l; $l = ; $i++; } - $record[$i] = $l; + + # add 903(?) with new record id + if ($conf->{'renumber-from'}) { + $recmeta{nid} = $conf->{'renumber-from'}; + push @record, join('', ' ', + $recmeta{nid}, + "\n"); + $conf->{'renumber-from'}++; + } + $i++; + + push @record, $l; return 1; } @@ -324,38 +358,19 @@ sub write_record { $FH = $EXMARC; } - $conf->{rocount}++ if ($FH eq $NUMARC); print $FH '\n" if(defined $recmeta{explanation}); - # add 903(?) with new record id - my $renumber = ''; - if ($conf->{'renumber-from'}) { - $recmeta{nid} = $conf->{'renumber-from'}; - $renumber = join('', ' ', $recmeta{nid}, "\n"); - my @tmp = @record[0 .. @record - 2]; - my $last = $record[-1]; - @record = undef; - @record = (@tmp, $renumber, $last); - @tmp = undef; $last = undef; - $conf->{'renumber-from'}++; - } - # scrub newlines (unless told not to or writing exception record) unless ($conf->{nocollapse} or $FH eq $EXMARC) { s/\n// for (@record) } - # write to old->new map file if needed - if ($conf->{'renumber-from'} and $conf->{'original-subfield'}) { - print $OLD2NEW $recmeta{oid}, "\t", $recmeta{nid}, "\n" - } - # actually write the record print $FH @record,"\n"; + # increment output record count (if not exception) + $conf->{rocount}++ if ($FH eq $EXMARC); + # if we were dumping to exception file, nuke the record and set ptr # to terminate processing loop @record = ('a'); @@ -367,7 +382,7 @@ sub print_fullcontext { print $OUT $conf->{editmsg},"\n"; print $OUT "\r Tag:",$recmeta{tag}, " Ind1:'", $recmeta{ind1},"' Ind2:'", $recmeta{ind2}, "'"; - print $OUT " @ ", $conf->{ricount}, "/", $conf->{rocount} + 1; + print $OUT " @ ", $conf->{ricount}, "/", $conf->{totalrecs}; print_context(); return 0; } @@ -387,7 +402,7 @@ sub print_context { sub message { my ($msg) = @_; - print $OUT "\r$msg at ",$conf->{ricount},"/",$conf->{rocount} + 1,"\n"; + print $OUT "\r$msg at ",$conf->{ricount},"/",$conf->{totalrecs}, "\n"; } #----------------------------------------------------------------------------------- @@ -491,9 +506,8 @@ sub display_lines { sub dump_record { my (@explanation) = @_; - print $OUT @explanation; - $recmeta{explanation} = join(' ', 'Tag', $recmeta{tag}, @explanation); - @explanation = undef; + $recmeta{explanation} = join(' ', 'DUMPING RECORD: Tag', $recmeta{tag}, @explanation); + message( $recmeta{explanation} ); write_record($EXMARC); return 1; } @@ -568,6 +582,7 @@ sub initialize { my $rc = GetOptions( $c, 'autoscrub|a', + 'fullauto', 'exception|x=s', 'output|o=s', 'prefix|p=s', @@ -589,6 +604,8 @@ sub initialize { # defaults my $pfx = $c->{prefix} // "bibs"; + $c->{ricount} = 0; + $c->{rocount} = 0; $c->{output} = join('.',$c->{prefix},'clean','marc','xml'); $c->{exception} = join('.',$c->{prefix},'exception','marc','xml'); $c->{'renumber-tag'} = 903 unless defined $c->{'renumber-tag'}; @@ -637,6 +654,8 @@ Options --no-strip9 Don't autoremove 901/903 tags in data --trashfile -t File containing trash tag data (see --trashhelp) + --fullauto No manual edits. All problematic records dumped to + exception file. --script Store human-initiated ops in scriptfile (.mcscript) Not yet implemented