X-Git-Url: http://git.equinoxoli.org/?p=migration-tools.git;a=blobdiff_plain;f=extract_holdings;h=51ea2b84152f73e8c4320d4a93365f49b443b14e;hp=01f6a4572fda7671049e206baffc7a21cb687d7e;hb=0445ca1b7fb163fd20346fedaaa7e8612f8f93c3;hpb=831ddfb55de051a485f7eef176f91cbfbc821195 diff --git a/extract_holdings b/extract_holdings index 01f6a45..51ea2b8 100755 --- a/extract_holdings +++ b/extract_holdings @@ -33,6 +33,7 @@ sub extract_holdings { mapfile => $c->{map}, verbose => 1, ); + print "Writing holdings to output file(s)...\n"; # open main holdings file open HOLDINGS, '>', ($c->{prefix} . "-HOLDINGS.pg"); @@ -79,27 +80,40 @@ sub extract_holdings { # now get everything else in the mapping for my $othertag ( sort keys %{$rec->{tmap}} ) { next if $othertag eq $c->{holdings}; # ignoring the holdings, o'course - my $idx = $rec->{tmap}{$othertag}[0]; # get index into tags struct - unless (defined $idx) { + my $test_idx = $rec->{tmap}{$othertag}[0]; # get index into tags struct + unless (defined $test_idx) { push @out, ''; next; } - for my $sub ( sort keys %{$rec->{tags}[$idx]{uni}} ) { - push @out, $rec->{tags}[$idx]{uni}{$sub}; - print HOLDINGS "l_", $m->name($rec->{tags}[$idx]{tag}, $sub), ", " - unless $j; + # handle only first other tag unless it is known to be multi + my $limit = 0; + if (exists($multis->{$othertag})) { + $limit = $#{ $rec->{tmap}{$othertag} }; } - for my $sub ( sort keys %{$multis->{$othertag}} ) { - for my $value ( @{$rec->{tags}[$idx]{multi}{$sub}} ) { - my $fh = $MULTIFILE{"$othertag$sub"}; - print $fh join("\t", $rec->{egid}, $j, $value), "\n"; + foreach my $idx (0..$limit) { + my $tag_idx = $rec->{tmap}{$othertag}[$idx]; + for my $sub ( sort keys %{$rec->{tags}[$tag_idx]{uni}} ) { + if ($m->first_only($rec->{tags}[$tag_idx]{tag}, $sub)) { + push @out, ($k == 1) ? $rec->{tags}[$tag_idx]{uni}{$sub} : ''; + } else { + push @out, $rec->{tags}[$tag_idx]{uni}{$sub}; + } + print HOLDINGS "l_", $m->name($rec->{tags}[$tag_idx]{tag}, $sub), ", " unless $j; + } + next unless exists($multis->{$othertag}); + for my $sub ( sort keys %{$multis->{$othertag}} ) { + next if $m->first_only($rec->{tags}[$tag_idx]{tag}, $sub) and ($k > 1); + for my $value ( @{$rec->{tags}[$tag_idx]{multi}{$sub}} ) { + my $fh = $MULTIFILE{"$othertag$sub"}; + print $fh normalize_output(join("\t", $rec->{egid}, $j, $value)), "\n"; + } } } } # and dump it print HOLDINGS "\n" unless $j; - print HOLDINGS join("\t", @out); + print HOLDINGS normalize_output(join("\t", @out)); print HOLDINGS "\n"; $j++; } @@ -203,6 +217,7 @@ sub initialize { 'holdings|h=i', 'copyid|c=s', 'prefix|p=s', + 'disable-pg-normalization', 'version|v', 'help', ); @@ -229,6 +244,12 @@ sub initialize { return $c; } +sub normalize_output { + my $str = shift; + $str =~ s!\\!\\\\!g unless $c->{'disable-pg-normalization'}; + return $str; +} + sub show_help { my ($msg) = @_; print "\nERROR - $msg\n" if $msg; @@ -256,6 +277,10 @@ HOLDINGS EXTRACTION ARGUMENTS used to extract holdings data from the input MARC file --holdings -h Specifies actual holdings tag --copyid -c Specifies subfield of holdings with unique copy identifier + --disable-pg-normalization By default, output is normalized so that a Postgres + copy or \\copy can import the data without choking on + backslashes; use this command-line option if + output is not meant to be consumed by psql. All three of these must be given together. HELP