From: Rogan Hamby Date: Fri, 23 Aug 2019 18:29:19 +0000 (-0400) Subject: moving marc manipulation to it's own file X-Git-Url: http://git.equinoxoli.org/?p=migration-tools.git;a=commitdiff_plain;h=c240a9ee5487a1d2deee2170a05a3131df60a366 moving marc manipulation to it's own file --- diff --git a/sql/base/base.sql b/sql/base/base.sql index 60ef86c..56cab3d 100644 --- a/sql/base/base.sql +++ b/sql/base/base.sql @@ -3199,29 +3199,6 @@ END; $$ LANGUAGE plpgsql; -CREATE OR REPLACE FUNCTION migration_tools.marc_parses( TEXT ) RETURNS BOOLEAN AS $func$ - -use MARC::Record; -use MARC::File::XML (BinaryEncoding => 'UTF-8'); -use MARC::Charset; - -MARC::Charset->assume_unicode(1); - -my $xml = shift; - -eval { - my $r = MARC::Record->new_from_xml( $xml ); - my $output_xml = $r->as_xml_record(); -}; -if ($@) { - return 0; -} else { - return 1; -} - -$func$ LANGUAGE PLPERLU; -COMMENT ON FUNCTION migration_tools.marc_parses(TEXT) IS 'Return boolean indicating if MARCXML string is parseable by MARC::File::XML'; - CREATE OR REPLACE FUNCTION migration_tools.simple_export_library_config(dir TEXT, orgs INT[]) RETURNS VOID AS $FUNC$ BEGIN EXECUTE $$COPY (SELECT * FROM actor.hours_of_operation WHERE id IN ($$ || @@ -3321,239 +3298,6 @@ BEGIN END; $FUNC$ LANGUAGE PLPGSQL; -CREATE OR REPLACE FUNCTION migration_tools.merge_marc_fields( TEXT, TEXT, TEXT[] ) RETURNS TEXT AS $func$ - -use strict; -use warnings; - -use MARC::Record; -use MARC::File::XML (BinaryEncoding => 'UTF-8'); -use MARC::Charset; - -MARC::Charset->assume_unicode(1); - -my $target_xml = shift; -my $source_xml = shift; -my $tags = shift; - -my $target; -my $source; - -eval { $target = MARC::Record->new_from_xml( $target_xml ); }; -if ($@) { - return; -} -eval { $source = MARC::Record->new_from_xml( $source_xml ); }; -if ($@) { - return; -} - -my $source_id = $source->subfield('901', 'c'); -$source_id = $source->subfield('903', 'a') unless $source_id; -my $target_id = $target->subfield('901', 'c'); -$target_id = $target->subfield('903', 'a') unless $target_id; - -my %existing_fields; -foreach my $tag (@$tags) { - my %existing_fields = map { $_->as_formatted() => 1 } $target->field($tag); - my @to_add = grep { not exists $existing_fields{$_->as_formatted()} } $source->field($tag); - $target->insert_fields_ordered(map { $_->clone() } @to_add); - if (@to_add) { - elog(NOTICE, "Merged $tag tag(s) from $source_id to $target_id"); - } -} - -my $xml = $target->as_xml_record; -$xml =~ s/^<\?.+?\?>$//mo; -$xml =~ s/\n//sgo; -$xml =~ s/>\s+ 'UTF-8'); -use Text::CSV; - -my $in_tags = shift; -my $in_values = shift; - -# hack-and-slash parsing of array-passed-as-string; -# this can go away once everybody is running Postgres 9.1+ -my $csv = Text::CSV->new({binary => 1}); -$in_tags =~ s/^{//; -$in_tags =~ s/}$//; -my $status = $csv->parse($in_tags); -my $tags = [ $csv->fields() ]; -$in_values =~ s/^{//; -$in_values =~ s/}$//; -$status = $csv->parse($in_values); -my $values = [ $csv->fields() ]; - -my $marc = MARC::Record->new(); - -$marc->leader('00000nam a22000007 4500'); -$marc->append_fields(MARC::Field->new('008', '000000s 000 eng d')); - -foreach my $i (0..$#$tags) { - my ($tag, $sf); - if ($tags->[$i] =~ /^(\d{3})([0-9a-z])$/) { - $tag = $1; - $sf = $2; - $marc->append_fields(MARC::Field->new($tag, ' ', ' ', $sf => $values->[$i])) if $values->[$i] !~ /^\s*$/ and $values->[$i] ne 'NULL'; - } elsif ($tags->[$i] =~ /^(\d{3})$/) { - $tag = $1; - $marc->append_fields(MARC::Field->new($tag, $values->[$i])) if $values->[$i] !~ /^\s*$/ and $values->[$i] ne 'NULL'; - } -} - -my $xml = $marc->as_xml_record; -$xml =~ s/^<\?.+?\?>$//mo; -$xml =~ s/\n//sgo; -$xml =~ s/>\s+ 'UTF-8'); -use Text::CSV; - -my $in_tags = shift; -my $in_ind1 = shift; -my $in_ind2 = shift; -my $in_values = shift; - -# hack-and-slash parsing of array-passed-as-string; -# this can go away once everybody is running Postgres 9.1+ -my $csv = Text::CSV->new({binary => 1}); -$in_tags =~ s/^{//; -$in_tags =~ s/}$//; -my $status = $csv->parse($in_tags); -my $tags = [ $csv->fields() ]; -$in_ind1 =~ s/^{//; -$in_ind1 =~ s/}$//; -$status = $csv->parse($in_ind1); -my $ind1s = [ $csv->fields() ]; -$in_ind2 =~ s/^{//; -$in_ind2 =~ s/}$//; -$status = $csv->parse($in_ind2); -my $ind2s = [ $csv->fields() ]; -$in_values =~ s/^{//; -$in_values =~ s/}$//; -$status = $csv->parse($in_values); -my $values = [ $csv->fields() ]; - -my $marc = MARC::Record->new(); - -$marc->leader('00000nam a22000007 4500'); -$marc->append_fields(MARC::Field->new('008', '000000s 000 eng d')); - -foreach my $i (0..$#$tags) { - my ($tag, $sf); - if ($tags->[$i] =~ /^(\d{3})([0-9a-z])$/) { - $tag = $1; - $sf = $2; - $marc->append_fields(MARC::Field->new($tag, $ind1s->[$i], $ind2s->[$i], $sf => $values->[$i])) if $values->[$i] !~ /^\s*$/ and $values->[$i] ne 'NULL'; - } elsif ($tags->[$i] =~ /^(\d{3})$/) { - $tag = $1; - $marc->append_fields(MARC::Field->new($tag, $values->[$i])) if $values->[$i] !~ /^\s*$/ and $values->[$i] ne 'NULL'; - } -} - -my $xml = $marc->as_xml_record; -$xml =~ s/^<\?.+?\?>$//mo; -$xml =~ s/\n//sgo; -$xml =~ s/>\s+ 'UTF-8'); -use MARC::Charset; -use strict; - -MARC::Charset->assume_unicode(1); - -elog(ERROR, 'indicator position must be either 1 or 2') unless $pos =~ /^[12]$/; -elog(ERROR, 'MARC tag must be numeric') unless $tag =~ /^\d{3}$/; -elog(ERROR, 'MARC tag must not be control field') if $tag =~ /^00/; -elog(ERROR, 'Value must be exactly one character') unless $value =~ /^.$/; - -my $xml = $marcxml; -eval { - my $marc = MARC::Record->new_from_xml($marcxml, 'UTF-8'); - - foreach my $field ($marc->field($tag)) { - $field->update("ind$pos" => $value); - } - $xml = $marc->as_xml_record; - $xml =~ s/^<\?.+?\?>$//mo; - $xml =~ s/\n//sgo; - $xml =~ s/>\s+new_from_xml($marcxml, 'UTF-8'); - $field = $marc->leader(); - }; - return $field; -$$ LANGUAGE PLPERLU STABLE; - -CREATE OR REPLACE FUNCTION migration_tools.get_marc_tag (TEXT, TEXT, TEXT, TEXT) RETURNS TEXT AS $$ - my ($marcxml, $tag, $subfield, $delimiter) = @_; - - use MARC::Record; - use MARC::File::XML; - use MARC::Field; - - my $field; - eval { - my $marc = MARC::Record->new_from_xml($marcxml, 'UTF-8'); - $field = $marc->field($tag); - }; - return $field->as_string($subfield,$delimiter) if $field; - return; -$$ LANGUAGE PLPERLU STABLE; - -CREATE OR REPLACE FUNCTION migration_tools.get_marc_tags (TEXT, TEXT, TEXT, TEXT) RETURNS TEXT[] AS $$ - my ($marcxml, $tag, $subfield, $delimiter) = @_; - - use MARC::Record; - use MARC::File::XML; - use MARC::Field; - - my @fields; - eval { - my $marc = MARC::Record->new_from_xml($marcxml, 'UTF-8'); - @fields = $marc->field($tag); - }; - my @texts; - foreach my $field (@fields) { - push @texts, $field->as_string($subfield,$delimiter); - } - return \@texts; -$$ LANGUAGE PLPERLU STABLE; - -CREATE OR REPLACE FUNCTION migration_tools.get_marc_tags_filtered (TEXT, TEXT, TEXT, TEXT, TEXT) RETURNS TEXT[] AS $$ - my ($marcxml, $tag, $subfield, $delimiter, $match) = @_; - - use MARC::Record; - use MARC::File::XML; - use MARC::Field; - - my @fields; - eval { - my $marc = MARC::Record->new_from_xml($marcxml, 'UTF-8'); - @fields = $marc->field($tag); - }; - my @texts; - foreach my $field (@fields) { - if ($field->as_string() =~ qr/$match/) { - push @texts, $field->as_string($subfield,$delimiter); - } - } - return \@texts; -$$ LANGUAGE PLPERLU STABLE; - CREATE OR REPLACE FUNCTION migration_tools.find_hold_matrix_matchpoint (INTEGER) RETURNS INTEGER AS $$ SELECT action.find_hold_matrix_matchpoint( (SELECT pickup_lib FROM action.hold_request WHERE id = $1), @@ -4640,403 +4313,7 @@ BEGIN END $$ LANGUAGE plpgsql; -DROP FUNCTION IF EXISTS migration_tools.munge_sf9(INTEGER,TEXT,TEXT); -CREATE OR REPLACE FUNCTION migration_tools.merge_group(bib_id INTEGER,new_sf9 TEXT,force TEXT DEFAULT 'false') - RETURNS BOOLEAN AS -$BODY$ -DECLARE - marc_xml TEXT; - new_marc TEXT; -BEGIN - SELECT marc FROM biblio.record_entry WHERE id = bib_id INTO marc_xml; - - SELECT munge_sf9(marc_xml,new_sf9,force) INTO new_marc; - UPDATE biblio.record_entry SET marc = new_marc WHERE id = bib_id; - - RETURN true; -END; -$BODY$ LANGUAGE plpgsql; - -DROP FUNCTION IF EXISTS migration_tools.munge_sf9(TEXT,TEXT,TEXT); -CREATE OR REPLACE FUNCTION migration_tools.munge_sf9(marc_xml TEXT, new_9_to_set TEXT, force TEXT) - RETURNS TEXT - LANGUAGE plperlu -AS $function$ -use strict; -use warnings; - -use MARC::Record; -use MARC::File::XML (BinaryEncoding => 'utf8'); - -binmode(STDERR, ':bytes'); -binmode(STDOUT, ':utf8'); -binmode(STDERR, ':utf8'); - -my $marc_xml = shift; -my $new_9_to_set = shift; -my $force = shift; - -$marc_xml =~ s/(.........)./${1}a/; - -eval { - $marc_xml = MARC::Record->new_from_xml($marc_xml); -}; -if ($@) { - #elog("could not parse $bibid: $@\n"); - import MARC::File::XML (BinaryEncoding => 'utf8'); - return $marc_xml; -} - -my @uris = $marc_xml->field('856'); -return $marc_xml->as_xml_record() unless @uris; - -foreach my $field (@uris) { - my $ind1 = $field->indicator('1'); - if (!defined $ind1) { next; } - if ($ind1 ne '1' && $ind1 ne '4' && $force eq 'false') { next; } - if ($ind1 ne '1' && $ind1 ne '4' && $force eq 'true') { $field->set_indicator(1,'4'); } - my $ind2 = $field->indicator('2'); - if (!defined $ind2) { next; } - if ($ind2 ne '0' && $ind2 ne '1' && $force eq 'false') { next; } - if ($ind2 ne '0' && $ind2 ne '1' && $force eq 'true') { $field->set_indicator(2,'0'); } - $field->add_subfields( '9' => $new_9_to_set ); -} - -return $marc_xml->as_xml_record(); - -$function$; - -DROP FUNCTION IF EXISTS migration_tools.munge_sf9_qualifying_match(TEXT,TEXT,TEXT); -CREATE OR REPLACE FUNCTION migration_tools.munge_sf9_qualifying_match(marc_xml TEXT, qualifying_match TEXT, new_9_to_set TEXT, force TEXT) - RETURNS TEXT - LANGUAGE plperlu -AS $function$ -use strict; -use warnings; - -use MARC::Record; -use MARC::File::XML (BinaryEncoding => 'utf8'); - -binmode(STDERR, ':bytes'); -binmode(STDOUT, ':utf8'); -binmode(STDERR, ':utf8'); - -my $marc_xml = shift; -my $qualifying_match = shift; -my $new_9_to_set = shift; -my $force = shift; - -$marc_xml =~ s/(.........)./${1}a/; - -eval { - $marc_xml = MARC::Record->new_from_xml($marc_xml); -}; -if ($@) { - #elog("could not parse $bibid: $@\n"); - import MARC::File::XML (BinaryEncoding => 'utf8'); - return $marc_xml; -} - -my @uris = $marc_xml->field('856'); -return $marc_xml->as_xml_record() unless @uris; - -foreach my $field (@uris) { - if ($field->as_string() =~ qr/$qualifying_match/) { - my $ind1 = $field->indicator('1'); - if (!defined $ind1) { next; } - if ($ind1 ne '1' && $ind1 ne '4' && $force eq 'false') { next; } - if ($ind1 ne '1' && $ind1 ne '4' && $force eq 'true') { $field->set_indicator(1,'4'); } - my $ind2 = $field->indicator('2'); - if (!defined $ind2) { next; } - if ($ind2 ne '0' && $ind2 ne '1' && $force eq 'false') { next; } - if ($ind2 ne '0' && $ind2 ne '1' && $force eq 'true') { $field->set_indicator(2,'0'); } - $field->add_subfields( '9' => $new_9_to_set ); - } -} - -return $marc_xml->as_xml_record(); - -$function$; - -DROP FUNCTION IF EXISTS migration_tools.owner_change_sf9_substring_match(TEXT,TEXT,TEXT,TEXT); -CREATE OR REPLACE FUNCTION migration_tools.owner_change_sf9_substring_match (marc_xml TEXT, substring_old_value TEXT, new_value TEXT, fix_indicators TEXT) - RETURNS TEXT - LANGUAGE plperlu -AS $function$ -use strict; -use warnings; - -use MARC::Record; -use MARC::File::XML (BinaryEncoding => 'utf8'); - -binmode(STDERR, ':bytes'); -binmode(STDOUT, ':utf8'); -binmode(STDERR, ':utf8'); - -my $marc_xml = shift; -my $substring_old_value = shift; -my $new_value = shift; -my $fix_indicators = shift; - -$marc_xml =~ s/(.........)./${1}a/; - -eval { - $marc_xml = MARC::Record->new_from_xml($marc_xml); -}; -if ($@) { - #elog("could not parse $bibid: $@\n"); - import MARC::File::XML (BinaryEncoding => 'utf8'); - return $marc_xml; -} - -my @uris = $marc_xml->field('856'); -return $marc_xml->as_xml_record() unless @uris; - -foreach my $field (@uris) { - my $ind1 = $field->indicator('1'); - if (defined $ind1) { - if ($ind1 ne '1' && $ind1 ne '4' && $fix_indicators eq 'true') { - $field->set_indicator(1,'4'); - } - } - my $ind2 = $field->indicator('2'); - if (defined $ind2) { - if ($ind2 ne '0' && $ind2 ne '1' && $fix_indicators eq 'true') { - $field->set_indicator(2,'0'); - } - } - if ($field->as_string('9') =~ qr/$substring_old_value/) { - $field->delete_subfield('9'); - $field->add_subfields( '9' => $new_value ); - } - $marc_xml->delete_field($field); # -- we're going to dedup and add them back -} - -my %hash = (map { ($_->as_usmarc => $_) } @uris); # -- courtesy of an old Mike Rylander post :-) -$marc_xml->insert_fields_ordered( values( %hash ) ); - -return $marc_xml->as_xml_record(); - -$function$; - -DROP FUNCTION IF EXISTS migration_tools.owner_change_sf9_substring_match2(TEXT,TEXT,TEXT,TEXT,TEXT); -CREATE OR REPLACE FUNCTION migration_tools.owner_change_sf9_substring_match2 (marc_xml TEXT, qualifying_match TEXT, substring_old_value TEXT, new_value TEXT, fix_indicators TEXT) - RETURNS TEXT - LANGUAGE plperlu -AS $function$ -use strict; -use warnings; - -use MARC::Record; -use MARC::File::XML (BinaryEncoding => 'utf8'); - -binmode(STDERR, ':bytes'); -binmode(STDOUT, ':utf8'); -binmode(STDERR, ':utf8'); - -my $marc_xml = shift; -my $qualifying_match = shift; -my $substring_old_value = shift; -my $new_value = shift; -my $fix_indicators = shift; - -$marc_xml =~ s/(.........)./${1}a/; - -eval { - $marc_xml = MARC::Record->new_from_xml($marc_xml); -}; -if ($@) { - #elog("could not parse $bibid: $@\n"); - import MARC::File::XML (BinaryEncoding => 'utf8'); - return $marc_xml; -} - -my @unqualified_uris = $marc_xml->field('856'); -my @uris = (); -foreach my $field (@unqualified_uris) { - if ($field->as_string() =~ qr/$qualifying_match/) { - push @uris, $field; - } -} -return $marc_xml->as_xml_record() unless @uris; - -foreach my $field (@uris) { - my $ind1 = $field->indicator('1'); - if (defined $ind1) { - if ($ind1 ne '1' && $ind1 ne '4' && $fix_indicators eq 'true') { - $field->set_indicator(1,'4'); - } - } - my $ind2 = $field->indicator('2'); - if (defined $ind2) { - if ($ind2 ne '0' && $ind2 ne '1' && $fix_indicators eq 'true') { - $field->set_indicator(2,'0'); - } - } - if ($field->as_string('9') =~ qr/$substring_old_value/) { - $field->delete_subfield('9'); - $field->add_subfields( '9' => $new_value ); - } - $marc_xml->delete_field($field); # -- we're going to dedup and add them back -} - -my %hash = (map { ($_->as_usmarc => $_) } @uris); # -- courtesy of an old Mike Rylander post :-) -$marc_xml->insert_fields_ordered( values( %hash ) ); - -return $marc_xml->as_xml_record(); - -$function$; - --- strip marc tag -DROP FUNCTION IF EXISTS migration_tools.strip_tag(TEXT,TEXT); -CREATE OR REPLACE FUNCTION migration_tools.strip_tag(marc TEXT, tag TEXT) - RETURNS TEXT - LANGUAGE plperlu -AS $function$ -use strict; -use warnings; - -use MARC::Record; -use MARC::File::XML (BinaryEncoding => 'utf8'); - -binmode(STDERR, ':bytes'); -binmode(STDOUT, ':utf8'); -binmode(STDERR, ':utf8'); - -my $marc_xml = shift; -my $tag = shift; - -$marc_xml =~ s/(.........)./${1}a/; - -eval { - $marc_xml = MARC::Record->new_from_xml($marc_xml); -}; -if ($@) { - #elog("could not parse $bibid: $@\n"); - import MARC::File::XML (BinaryEncoding => 'utf8'); - return $marc_xml; -} - -my @fields = $marc_xml->field($tag); -return $marc_xml->as_xml_record() unless @fields; - -$marc_xml->delete_fields(@fields); - -return $marc_xml->as_xml_record(); - -$function$; - --- removes tags from record based on tag, subfield and evidence --- example: strip_tag(marc, '500', 'a', 'gift') will remove 500s with 'gift' as a part of the $a -DROP FUNCTION IF EXISTS migration_tools.strip_tag(TEXT,TEXT,TEXT,TEXT); -CREATE OR REPLACE FUNCTION migration_tools.strip_tag(marc TEXT, tag TEXT, subfield TEXT, evidence TEXT) - RETURNS TEXT - LANGUAGE plperlu -AS $function$ -use strict; -use warnings; - -use MARC::Record; -use MARC::File::XML (BinaryEncoding => 'utf8'); - -binmode(STDERR, ':bytes'); -binmode(STDOUT, ':utf8'); -binmode(STDERR, ':utf8'); - -my $marc_xml = shift; -my $tag = shift; -my $subfield = shift; -my $evidence = shift; - -$marc_xml =~ s/(.........)./${1}a/; - -eval { - $marc_xml = MARC::Record->new_from_xml($marc_xml); -}; -if ($@) { - #elog("could not parse $bibid: $@\n"); - import MARC::File::XML (BinaryEncoding => 'utf8'); - return $marc_xml; -} - -my @fields = $marc_xml->field($tag); -return $marc_xml->as_xml_record() unless @fields; - -my @fields_to_delete; - -foreach my $f (@fields) { - my $sf = lc($f->as_string($subfield)); - if ($sf =~ m/$evidence/) { push @fields_to_delete, $f; } -} - -$marc_xml->delete_fields(@fields_to_delete); - -return $marc_xml->as_xml_record(); - -$function$; - - --- consolidate marc tag -DROP FUNCTION IF EXISTS migration_tools.consolidate_tag(TEXT,TEXT); -CREATE OR REPLACE FUNCTION migration_tools.consolidate_tag(marc TEXT, tag TEXT) - RETURNS TEXT - LANGUAGE plperlu -AS $function$ -use strict; -use warnings; - -use MARC::Record; -use MARC::File::XML (BinaryEncoding => 'utf8'); - -binmode(STDERR, ':bytes'); -binmode(STDOUT, ':utf8'); -binmode(STDERR, ':utf8'); - -my $marc_xml = shift; -my $tag = shift; - -$marc_xml =~ s/(.........)./${1}a/; - -eval { - $marc_xml = MARC::Record->new_from_xml($marc_xml); -}; -if ($@) { - #elog("could not parse $bibid: $@\n"); - import MARC::File::XML (BinaryEncoding => 'utf8'); - return $marc_xml; -} - -my @fields = $marc_xml->field($tag); -return $marc_xml->as_xml_record() unless @fields; - -my @combined_subfield_refs = (); -my @combined_subfields = (); -foreach my $field (@fields) { - my @subfield_refs = $field->subfields(); - push @combined_subfield_refs, @subfield_refs; -} - -my @sorted_subfield_refs = reverse sort { $a->[0] <=> $b->[0] } @combined_subfield_refs; - -while ( my $tuple = pop( @sorted_subfield_refs ) ) { - my ($code,$data) = @$tuple; - unshift( @combined_subfields, $code, $data ); -} - -$marc_xml->delete_fields(@fields); - -my $new_field = new MARC::Field( - $tag, - $fields[0]->indicator(1), - $fields[0]->indicator(2), - @combined_subfields -); - -$marc_xml->insert_grouped_field( $new_field ); - -return $marc_xml->as_xml_record(); - -$function$; +function$; -- convenience function for linking to the item staging table diff --git a/sql/base/marc.sql b/sql/base/marc.sql new file mode 100644 index 0000000..89984b1 --- /dev/null +++ b/sql/base/marc.sql @@ -0,0 +1,725 @@ +CREATE OR REPLACE FUNCTION migration_tools.marc_parses( TEXT ) RETURNS BOOLEAN AS $func$ + +use MARC::Record; +use MARC::File::XML (BinaryEncoding => 'UTF-8'); +use MARC::Charset; + +MARC::Charset->assume_unicode(1); + +my $xml = shift; + +eval { + my $r = MARC::Record->new_from_xml( $xml ); + my $output_xml = $r->as_xml_record(); +}; +if ($@) { + return 0; +} else { + return 1; +} + +$func$ LANGUAGE PLPERLU; +COMMENT ON FUNCTION migration_tools.marc_parses(TEXT) IS 'Return boolean indicating if MARCXML string is parseable by MARC::File::XML'; + +CREATE OR REPLACE FUNCTION migration_tools.merge_marc_fields( TEXT, TEXT, TEXT[] ) RETURNS TEXT AS $func$ + +use strict; +use warnings; + +use MARC::Record; +use MARC::File::XML (BinaryEncoding => 'UTF-8'); +use MARC::Charset; + +MARC::Charset->assume_unicode(1); + +my $target_xml = shift; +my $source_xml = shift; +my $tags = shift; + +my $target; +my $source; + +eval { $target = MARC::Record->new_from_xml( $target_xml ); }; +if ($@) { + return; +} +eval { $source = MARC::Record->new_from_xml( $source_xml ); }; +if ($@) { + return; +} + +my $source_id = $source->subfield('901', 'c'); +$source_id = $source->subfield('903', 'a') unless $source_id; +my $target_id = $target->subfield('901', 'c'); +$target_id = $target->subfield('903', 'a') unless $target_id; + +my %existing_fields; +foreach my $tag (@$tags) { + my %existing_fields = map { $_->as_formatted() => 1 } $target->field($tag); + my @to_add = grep { not exists $existing_fields{$_->as_formatted()} } $source->field($tag); + $target->insert_fields_ordered(map { $_->clone() } @to_add); + if (@to_add) { + elog(NOTICE, "Merged $tag tag(s) from $source_id to $target_id"); + } +} + +my $xml = $target->as_xml_record; +$xml =~ s/^<\?.+?\?>$//mo; +$xml =~ s/\n//sgo; +$xml =~ s/>\s+ 'UTF-8'); +use Text::CSV; + +my $in_tags = shift; +my $in_values = shift; + +# hack-and-slash parsing of array-passed-as-string; +# this can go away once everybody is running Postgres 9.1+ +my $csv = Text::CSV->new({binary => 1}); +$in_tags =~ s/^{//; +$in_tags =~ s/}$//; +my $status = $csv->parse($in_tags); +my $tags = [ $csv->fields() ]; +$in_values =~ s/^{//; +$in_values =~ s/}$//; +$status = $csv->parse($in_values); +my $values = [ $csv->fields() ]; + +my $marc = MARC::Record->new(); + +$marc->leader('00000nam a22000007 4500'); +$marc->append_fields(MARC::Field->new('008', '000000s 000 eng d')); + +foreach my $i (0..$#$tags) { + my ($tag, $sf); + if ($tags->[$i] =~ /^(\d{3})([0-9a-z])$/) { + $tag = $1; + $sf = $2; + $marc->append_fields(MARC::Field->new($tag, ' ', ' ', $sf => $values->[$i])) if $values->[$i] !~ /^\s*$/ and $values->[$i] ne 'NULL'; + } elsif ($tags->[$i] =~ /^(\d{3})$/) { + $tag = $1; + $marc->append_fields(MARC::Field->new($tag, $values->[$i])) if $values->[$i] !~ /^\s*$/ and $values->[$i] ne 'NULL'; + } +} + +my $xml = $marc->as_xml_record; +$xml =~ s/^<\?.+?\?>$//mo; +$xml =~ s/\n//sgo; +$xml =~ s/>\s+ 'UTF-8'); +use Text::CSV; + +my $in_tags = shift; +my $in_ind1 = shift; +my $in_ind2 = shift; +my $in_values = shift; + +# hack-and-slash parsing of array-passed-as-string; +# this can go away once everybody is running Postgres 9.1+ +my $csv = Text::CSV->new({binary => 1}); +$in_tags =~ s/^{//; +$in_tags =~ s/}$//; +my $status = $csv->parse($in_tags); +my $tags = [ $csv->fields() ]; +$in_ind1 =~ s/^{//; +$in_ind1 =~ s/}$//; +$status = $csv->parse($in_ind1); +my $ind1s = [ $csv->fields() ]; +$in_ind2 =~ s/^{//; +$in_ind2 =~ s/}$//; +$status = $csv->parse($in_ind2); +my $ind2s = [ $csv->fields() ]; +$in_values =~ s/^{//; +$in_values =~ s/}$//; +$status = $csv->parse($in_values); +my $values = [ $csv->fields() ]; + +my $marc = MARC::Record->new(); + +$marc->leader('00000nam a22000007 4500'); +$marc->append_fields(MARC::Field->new('008', '000000s 000 eng d')); + +foreach my $i (0..$#$tags) { + my ($tag, $sf); + if ($tags->[$i] =~ /^(\d{3})([0-9a-z])$/) { + $tag = $1; + $sf = $2; + $marc->append_fields(MARC::Field->new($tag, $ind1s->[$i], $ind2s->[$i], $sf => $values->[$i])) if $values->[$i] !~ /^\s*$/ and $values->[$i] ne 'NULL'; + } elsif ($tags->[$i] =~ /^(\d{3})$/) { + $tag = $1; + $marc->append_fields(MARC::Field->new($tag, $values->[$i])) if $values->[$i] !~ /^\s*$/ and $values->[$i] ne 'NULL'; + } +} + +my $xml = $marc->as_xml_record; +$xml =~ s/^<\?.+?\?>$//mo; +$xml =~ s/\n//sgo; +$xml =~ s/>\s+ 'UTF-8'); +use MARC::Charset; +use strict; + +MARC::Charset->assume_unicode(1); + +elog(ERROR, 'indicator position must be either 1 or 2') unless $pos =~ /^[12]$/; +elog(ERROR, 'MARC tag must be numeric') unless $tag =~ /^\d{3}$/; +elog(ERROR, 'MARC tag must not be control field') if $tag =~ /^00/; +elog(ERROR, 'Value must be exactly one character') unless $value =~ /^.$/; + +my $xml = $marcxml; +eval { + my $marc = MARC::Record->new_from_xml($marcxml, 'UTF-8'); + + foreach my $field ($marc->field($tag)) { + $field->update("ind$pos" => $value); + } + $xml = $marc->as_xml_record; + $xml =~ s/^<\?.+?\?>$//mo; + $xml =~ s/\n//sgo; + $xml =~ s/>\s+new_from_xml($marcxml, 'UTF-8'); + $field = $marc->leader(); + }; + return $field; +$$ LANGUAGE PLPERLU STABLE; + +CREATE OR REPLACE FUNCTION migration_tools.get_marc_tag (TEXT, TEXT, TEXT, TEXT) RETURNS TEXT AS $$ + my ($marcxml, $tag, $subfield, $delimiter) = @_; + + use MARC::Record; + use MARC::File::XML; + use MARC::Field; + + my $field; + eval { + my $marc = MARC::Record->new_from_xml($marcxml, 'UTF-8'); + $field = $marc->field($tag); + }; + return $field->as_string($subfield,$delimiter) if $field; + return; +$$ LANGUAGE PLPERLU STABLE; + +CREATE OR REPLACE FUNCTION migration_tools.get_marc_tags (TEXT, TEXT, TEXT, TEXT) RETURNS TEXT[] AS $$ + my ($marcxml, $tag, $subfield, $delimiter) = @_; + + use MARC::Record; + use MARC::File::XML; + use MARC::Field; + + my @fields; + eval { + my $marc = MARC::Record->new_from_xml($marcxml, 'UTF-8'); + @fields = $marc->field($tag); + }; + my @texts; + foreach my $field (@fields) { + push @texts, $field->as_string($subfield,$delimiter); + } + return \@texts; +$$ LANGUAGE PLPERLU STABLE; + +CREATE OR REPLACE FUNCTION migration_tools.get_marc_tags_filtered (TEXT, TEXT, TEXT, TEXT, TEXT) RETURNS TEXT[] AS $$ + my ($marcxml, $tag, $subfield, $delimiter, $match) = @_; + + use MARC::Record; + use MARC::File::XML; + use MARC::Field; + + my @fields; + eval { + my $marc = MARC::Record->new_from_xml($marcxml, 'UTF-8'); + @fields = $marc->field($tag); + }; + my @texts; + foreach my $field (@fields) { + if ($field->as_string() =~ qr/$match/) { + push @texts, $field->as_string($subfield,$delimiter); + } + } + return \@texts; +$$ LANGUAGE PLPERLU STABLE; + +DROP FUNCTION IF EXISTS migration_tools.munge_sf9(INTEGER,TEXT,TEXT); +CREATE OR REPLACE FUNCTION migration_tools.merge_sf9(bib_id INTEGER,new_sf9 TEXT,force TEXT DEFAULT 'false') + RETURNS BOOLEAN AS +$BODY$ +DECLARE + marc_xml TEXT; + new_marc TEXT; +BEGIN + SELECT marc FROM biblio.record_entry WHERE id = bib_id INTO marc_xml; + + SELECT munge_sf9(marc_xml,new_sf9,force) INTO new_marc; + UPDATE biblio.record_entry SET marc = new_marc WHERE id = bib_id; + + RETURN true; +END; +$BODY$ LANGUAGE plpgsql; + +DROP FUNCTION IF EXISTS migration_tools.munge_sf9(TEXT,TEXT,TEXT); +CREATE OR REPLACE FUNCTION migration_tools.munge_sf9(marc_xml TEXT, new_9_to_set TEXT, force TEXT) + RETURNS TEXT + LANGUAGE plperlu +AS $function$ +use strict; +use warnings; + +use MARC::Record; +use MARC::File::XML (BinaryEncoding => 'utf8'); + +binmode(STDERR, ':bytes'); +binmode(STDOUT, ':utf8'); +binmode(STDERR, ':utf8'); + +my $marc_xml = shift; +my $new_9_to_set = shift; +my $force = shift; + +$marc_xml =~ s/(.........)./${1}a/; + +eval { + $marc_xml = MARC::Record->new_from_xml($marc_xml); +}; +if ($@) { + #elog("could not parse $bibid: $@\n"); + import MARC::File::XML (BinaryEncoding => 'utf8'); + return $marc_xml; +} + +my @uris = $marc_xml->field('856'); +return $marc_xml->as_xml_record() unless @uris; + +foreach my $field (@uris) { + my $ind1 = $field->indicator('1'); + if (!defined $ind1) { next; } + if ($ind1 ne '1' && $ind1 ne '4' && $force eq 'false') { next; } + if ($ind1 ne '1' && $ind1 ne '4' && $force eq 'true') { $field->set_indicator(1,'4'); } + my $ind2 = $field->indicator('2'); + if (!defined $ind2) { next; } + if ($ind2 ne '0' && $ind2 ne '1' && $force eq 'false') { next; } + if ($ind2 ne '0' && $ind2 ne '1' && $force eq 'true') { $field->set_indicator(2,'0'); } + $field->add_subfields( '9' => $new_9_to_set ); +} + +return $marc_xml->as_xml_record(); + +$function$; + +DROP FUNCTION IF EXISTS migration_tools.munge_sf9_qualifying_match(TEXT,TEXT,TEXT); +CREATE OR REPLACE FUNCTION migration_tools.munge_sf9_qualifying_match(marc_xml TEXT, qualifying_match TEXT, new_9_to_set TEXT, force TEXT) + RETURNS TEXT + LANGUAGE plperlu +AS $function$ +use strict; +use warnings; + +use MARC::Record; +use MARC::File::XML (BinaryEncoding => 'utf8'); + +binmode(STDERR, ':bytes'); +binmode(STDOUT, ':utf8'); +binmode(STDERR, ':utf8'); + +my $marc_xml = shift; +my $qualifying_match = shift; +my $new_9_to_set = shift; +my $force = shift; + +$marc_xml =~ s/(.........)./${1}a/; + +eval { + $marc_xml = MARC::Record->new_from_xml($marc_xml); +}; +if ($@) { + #elog("could not parse $bibid: $@\n"); + import MARC::File::XML (BinaryEncoding => 'utf8'); + return $marc_xml; +} + +my @uris = $marc_xml->field('856'); +return $marc_xml->as_xml_record() unless @uris; + +foreach my $field (@uris) { + if ($field->as_string() =~ qr/$qualifying_match/) { + my $ind1 = $field->indicator('1'); + if (!defined $ind1) { next; } + if ($ind1 ne '1' && $ind1 ne '4' && $force eq 'false') { next; } + if ($ind1 ne '1' && $ind1 ne '4' && $force eq 'true') { $field->set_indicator(1,'4'); } + my $ind2 = $field->indicator('2'); + if (!defined $ind2) { next; } + if ($ind2 ne '0' && $ind2 ne '1' && $force eq 'false') { next; } + if ($ind2 ne '0' && $ind2 ne '1' && $force eq 'true') { $field->set_indicator(2,'0'); } + $field->add_subfields( '9' => $new_9_to_set ); + } +} + +return $marc_xml->as_xml_record(); + +$function$; + +DROP FUNCTION IF EXISTS migration_tools.owner_change_sf9_substring_match(TEXT,TEXT,TEXT,TEXT); +CREATE OR REPLACE FUNCTION migration_tools.owner_change_sf9_substring_match (marc_xml TEXT, substring_old_value TEXT, new_value TEXT, fix_indicators TEXT) + RETURNS TEXT + LANGUAGE plperlu +AS $function$ +use strict; +use warnings; + +use MARC::Record; +use MARC::File::XML (BinaryEncoding => 'utf8'); + +binmode(STDERR, ':bytes'); +binmode(STDOUT, ':utf8'); +binmode(STDERR, ':utf8'); + +my $marc_xml = shift; +my $substring_old_value = shift; +my $new_value = shift; +my $fix_indicators = shift; + +$marc_xml =~ s/(.........)./${1}a/; + +eval { + $marc_xml = MARC::Record->new_from_xml($marc_xml); +}; +if ($@) { + #elog("could not parse $bibid: $@\n"); + import MARC::File::XML (BinaryEncoding => 'utf8'); + return $marc_xml; +} + +my @uris = $marc_xml->field('856'); +return $marc_xml->as_xml_record() unless @uris; + +foreach my $field (@uris) { + my $ind1 = $field->indicator('1'); + if (defined $ind1) { + if ($ind1 ne '1' && $ind1 ne '4' && $fix_indicators eq 'true') { + $field->set_indicator(1,'4'); + } + } + my $ind2 = $field->indicator('2'); + if (defined $ind2) { + if ($ind2 ne '0' && $ind2 ne '1' && $fix_indicators eq 'true') { + $field->set_indicator(2,'0'); + } + } + if ($field->as_string('9') =~ qr/$substring_old_value/) { + $field->delete_subfield('9'); + $field->add_subfields( '9' => $new_value ); + } + $marc_xml->delete_field($field); # -- we're going to dedup and add them back +} + +my %hash = (map { ($_->as_usmarc => $_) } @uris); # -- courtesy of an old Mike Rylander post :-) +$marc_xml->insert_fields_ordered( values( %hash ) ); + +return $marc_xml->as_xml_record(); + +$function$; + +DROP FUNCTION IF EXISTS migration_tools.owner_change_sf9_substring_match2(TEXT,TEXT,TEXT,TEXT,TEXT); +CREATE OR REPLACE FUNCTION migration_tools.owner_change_sf9_substring_match2 (marc_xml TEXT, qualifying_match TEXT, substring_old_value TEXT, new_value TEXT, fix_indicators TEXT) + RETURNS TEXT + LANGUAGE plperlu +AS $function$ +use strict; +use warnings; + +use MARC::Record; +use MARC::File::XML (BinaryEncoding => 'utf8'); + +binmode(STDERR, ':bytes'); +binmode(STDOUT, ':utf8'); +binmode(STDERR, ':utf8'); + +my $marc_xml = shift; +my $qualifying_match = shift; +my $substring_old_value = shift; +my $new_value = shift; +my $fix_indicators = shift; + +$marc_xml =~ s/(.........)./${1}a/; + +eval { + $marc_xml = MARC::Record->new_from_xml($marc_xml); +}; +if ($@) { + #elog("could not parse $bibid: $@\n"); + import MARC::File::XML (BinaryEncoding => 'utf8'); + return $marc_xml; +} + +my @unqualified_uris = $marc_xml->field('856'); +my @uris = (); +foreach my $field (@unqualified_uris) { + if ($field->as_string() =~ qr/$qualifying_match/) { + push @uris, $field; + } +} +return $marc_xml->as_xml_record() unless @uris; + +foreach my $field (@uris) { + my $ind1 = $field->indicator('1'); + if (defined $ind1) { + if ($ind1 ne '1' && $ind1 ne '4' && $fix_indicators eq 'true') { + $field->set_indicator(1,'4'); + } + } + my $ind2 = $field->indicator('2'); + if (defined $ind2) { + if ($ind2 ne '0' && $ind2 ne '1' && $fix_indicators eq 'true') { + $field->set_indicator(2,'0'); + } + } + if ($field->as_string('9') =~ qr/$substring_old_value/) { + $field->delete_subfield('9'); + $field->add_subfields( '9' => $new_value ); + } + $marc_xml->delete_field($field); # -- we're going to dedup and add them back +} + +my %hash = (map { ($_->as_usmarc => $_) } @uris); # -- courtesy of an old Mike Rylander post :-) +$marc_xml->insert_fields_ordered( values( %hash ) ); + +return $marc_xml->as_xml_record(); + +$function$; + +-- strip marc tag +DROP FUNCTION IF EXISTS migration_tools.strip_tag(TEXT,TEXT); +CREATE OR REPLACE FUNCTION migration_tools.strip_tag(marc TEXT, tag TEXT) + RETURNS TEXT + LANGUAGE plperlu +AS $function$ +use strict; +use warnings; + +use MARC::Record; +use MARC::File::XML (BinaryEncoding => 'utf8'); + +binmode(STDERR, ':bytes'); +binmode(STDOUT, ':utf8'); +binmode(STDERR, ':utf8'); + +my $marc_xml = shift; +my $tag = shift; + +$marc_xml =~ s/(.........)./${1}a/; + +eval { + $marc_xml = MARC::Record->new_from_xml($marc_xml); +}; +if ($@) { + #elog("could not parse $bibid: $@\n"); + import MARC::File::XML (BinaryEncoding => 'utf8'); + return $marc_xml; +} + +my @fields = $marc_xml->field($tag); +return $marc_xml->as_xml_record() unless @fields; + +$marc_xml->delete_fields(@fields); + +return $marc_xml->as_xml_record(); + +$function$; + +-- removes tags from record based on tag, subfield and evidence +-- example: strip_tag(marc, '500', 'a', 'gift') will remove 500s with 'gift' as a part of the $a +DROP FUNCTION IF EXISTS migration_tools.strip_tag(TEXT,TEXT,TEXT,TEXT); +CREATE OR REPLACE FUNCTION migration_tools.strip_tag(marc TEXT, tag TEXT, subfield TEXT, evidence TEXT) + RETURNS TEXT + LANGUAGE plperlu +AS $function$ +use strict; +use warnings; + +use MARC::Record; +use MARC::File::XML (BinaryEncoding => 'utf8'); + +binmode(STDERR, ':bytes'); +binmode(STDOUT, ':utf8'); +binmode(STDERR, ':utf8'); + +my $marc_xml = shift; +my $tag = shift; +my $subfield = shift; +my $evidence = shift; + +$marc_xml =~ s/(.........)./${1}a/; + +eval { + $marc_xml = MARC::Record->new_from_xml($marc_xml); +}; +if ($@) { + #elog("could not parse $bibid: $@\n"); + import MARC::File::XML (BinaryEncoding => 'utf8'); + return $marc_xml; +} + +my @fields = $marc_xml->field($tag); +return $marc_xml->as_xml_record() unless @fields; + +my @fields_to_delete; + +foreach my $f (@fields) { + my $sf = lc($f->as_string($subfield)); + if ($sf =~ m/$evidence/) { push @fields_to_delete, $f; } +} + +$marc_xml->delete_fields(@fields_to_delete); + +return $marc_xml->as_xml_record(); + +$function$; + +-- consolidate marc tag +DROP FUNCTION IF EXISTS migration_tools.consolidate_tag(TEXT,TEXT); +CREATE OR REPLACE FUNCTION migration_tools.consolidate_tag(marc TEXT, tag TEXT) + RETURNS TEXT + LANGUAGE plperlu +AS $function$ +use strict; +use warnings; + +use MARC::Record; +use MARC::File::XML (BinaryEncoding => 'utf8'); + +binmode(STDERR, ':bytes'); +binmode(STDOUT, ':utf8'); +binmode(STDERR, ':utf8'); + +my $marc_xml = shift; +my $tag = shift; + +$marc_xml =~ s/(.........)./${1}a/; + +eval { + $marc_xml = MARC::Record->new_from_xml($marc_xml); +}; +if ($@) { + #elog("could not parse $bibid: $@\n"); + import MARC::File::XML (BinaryEncoding => 'utf8'); + return $marc_xml; +} + +my @fields = $marc_xml->field($tag); +return $marc_xml->as_xml_record() unless @fields; + +my @combined_subfield_refs = (); +my @combined_subfields = (); +foreach my $field (@fields) { + my @subfield_refs = $field->subfields(); + push @combined_subfield_refs, @subfield_refs; +} + +my @sorted_subfield_refs = reverse sort { $a->[0] <=> $b->[0] } @combined_subfield_refs; + +while ( my $tuple = pop( @sorted_subfield_refs ) ) { + my ($code,$data) = @$tuple; + unshift( @combined_subfields, $code, $data ); +} + +$marc_xml->delete_fields(@fields); + +my $new_field = new MARC::Field( + $tag, + $fields[0]->indicator(1), + $fields[0]->indicator(2), + @combined_subfields +); + +$marc_xml->insert_grouped_field( $new_field ); + +return $marc_xml->as_xml_record(); + +$function$; + +