X-Git-Url: http://git.equinoxoli.org/?p=migration-tools.git;a=blobdiff_plain;f=extract_holdings;h=619aec14853e961d5fa21aaf7530c1ddc1cb0ef1;hp=49de83cabd071cad3f97fbebefbb1d0887bc65a2;hb=96c4a5bf729832f704dd86f2271d6f3531ebb4e9;hpb=b731ba3ca2e1e821acbb7702b47587a6e26dbb7e diff --git a/extract_holdings b/extract_holdings index 49de83c..619aec1 100755 --- a/extract_holdings +++ b/extract_holdings @@ -1,4 +1,21 @@ #!/usr/bin/perl + +# Copyright 2009-2012, Equinox Software, Inc. +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + use strict; use warnings; @@ -45,6 +62,8 @@ sub extract_holdings { { open my $fh, ">", ($c->{prefix} . "-HOLDINGS-MULT-$t$s.pg"); $MULTIFILE{"$t$s"} = $fh } } + my $parallel_fields = $m->get_parallel_fields; + my $i = 0; # record counter my $j = 0; # holdings counter @@ -72,7 +91,9 @@ sub extract_holdings { for my $sub ( sort keys %{$multis->{$tagid}} ) { for my $value ( @{$rec->{tags}[$holdidx]{multi}{$sub}} ) { my $fh = $MULTIFILE{"$tagid$sub"}; - print $fh join("\t", $rec->{egid}, $j, $value), "\n"; + my $clean_value = $value; + $clean_value =~ s/[\r\n\t]//g; + print $fh join("\t", $rec->{egid}, $j, $clean_value), "\n"; } } @@ -85,6 +106,22 @@ sub extract_holdings { push @out, ''; next; } + + # handle parallel fields + if (exists($parallel_fields->{$othertag})) { + my $num_fields = $#{ $rec->{tmap}{$othertag} }; + my $tag_idx; + if ($holdidx > $num_fields) { + $tag_idx = -1; + } else { + $tag_idx = $rec->{tmap}{$othertag}[$holdidx]; + } + for my $sub ( sort keys %{ $parallel_fields->{$othertag } } ) { + push @out, $tag_idx > -1 ? $rec->{tags}[$tag_idx]{parallel}{$sub}->[0] : ''; + print HOLDINGS "l_", $m->name($rec->{tags}[$tag_idx]{tag}, $sub), ", " unless $j; + } + } + # handle only first other tag unless it is known to be multi my $limit = 0; if (exists($multis->{$othertag})) { @@ -105,7 +142,9 @@ sub extract_holdings { next if $m->first_only($rec->{tags}[$tag_idx]{tag}, $sub) and ($k > 1); for my $value ( @{$rec->{tags}[$tag_idx]{multi}{$sub}} ) { my $fh = $MULTIFILE{"$othertag$sub"}; - print $fh normalize_output(join("\t", $rec->{egid}, $j, $value)), "\n"; + my $clean_value = $value; + $clean_value =~ s/[\r\n\t]//g; + print $fh normalize_output(join("\t", $rec->{egid}, $j, $clean_value)), "\n"; } } } @@ -156,7 +195,7 @@ sub dump_sample_detail { for my $subkey (sort keys %{ $tags->{$tag} }) { my $sub = $tags->{$tag}{$subkey}; print "|| $subkey | ", $sub->{value}, " | ", - $sub->{count}, "/", $sub->{tcnt}, " | ||\n"; + $sub->{count}, "/", $sub->{tcnt}, " | ", ($sub->{count} > $sub->{tcnt}) ? "MULTI" : "", " ||\n"; } print "\n"; }