From 0dfb2b6300d7d398cb514f0d4c0db899583eaa5b Mon Sep 17 00:00:00 2001 From: Jason Etheridge Date: Wed, 3 May 2017 15:02:43 -0400 Subject: [PATCH] some stuff I had lying around Signed-off-by: Jason Etheridge --- bibs_items/filter_out_mfhd.pl | 91 +++++++++++++++++++++++++++++++++++++ bibs_items/stage-HOLDINGS-MULT.pl | 42 +++++++++++++++++ 2 files changed, 133 insertions(+), 0 deletions(-) create mode 100755 bibs_items/filter_out_mfhd.pl create mode 100755 bibs_items/stage-HOLDINGS-MULT.pl diff --git a/bibs_items/filter_out_mfhd.pl b/bibs_items/filter_out_mfhd.pl new file mode 100755 index 0000000..7139027 --- /dev/null +++ b/bibs_items/filter_out_mfhd.pl @@ -0,0 +1,91 @@ +#!/usr/bin/perl -w +# ./filter_out_mfhd.pl marcfile > out 2> err +# Looks for tcn_id.map2 containg lines like: 001_or_035value|eg_bib_id +# Spits out mfhd.tsv (eg_bib_idmarcxmleg_bib_id) and mfhd.bad.mrc +# For marcfile, it expects a "title record", followed by one or more MFHD records. Rinse, repeat. + +use strict; +use warnings; +use open ':utf8'; + +use MARC::Batch; +use Unicode::Normalize; +use MARC::File::XML ( BinaryEncoding => 'utf-8' ); +use MARC::Field; + +my $batch = MARC::Batch->new( 'USMARC', @ARGV ); +$batch->strict_off(); +$batch->warnings_off(); + +my $current_title; +my $tag001; +my $tag035; +my $tag245; +my $tag852; +my $tag866; +my %tcn2bid; + +open FILE, "tcn_id.map2"; +while (my $line = ) { + if ($line =~ /^(.+)\|(.*)$/) { + $tcn2bid{$1} = $2; + } +} +close FILE; + +open MFHD, ">mfhd.tsv"; +open BADMFHD, ">mfhd.bad.mrc"; +while ( my $marc = $batch->next ) { + $tag001 = $marc->field('001'); + $tag035 = $marc->field('035'); + $tag245 = $marc->field('245'); + $tag852 = $marc->field('852'); + $tag866 = $marc->field('866'); + if ($tag852 || $tag866) { + print "\tMFHD\n"; + my $field = MARC::Field->new( + '004', + $tcn2bid{$current_title} + ? $tcn2bid{$current_title} + : 'missing: ' . $current_title + ); + $marc->insert_fields_ordered( $field ); + if ($tcn2bid{$current_title}) { + my $string = $marc->as_xml_record(); + $string =~ s/\n//g; + $string =~ s/<\?xml version="1\.0" encoding="UTF-8"\?>//; + print MFHD $tcn2bid{$current_title} . "\t$string\t" . $tcn2bid{$current_title} . "\n"; + } else { + print BADMFHD $marc->as_usmarc(); + } + } else { + if ($tag001) { + my $tcnv = $tag001->as_string(); + if ($tcnv =~ /^\d*$/) { + print "fishy MFHD? with 001 $tcnv\n"; + print STDERR "=== fishy MFHD? with 001 $tcnv\n"; + print STDERR $marc->as_formatted() . "\n"; + } else { + print "title with 001 $tcnv, eg bib id = $tcn2bid{$tcnv}\n"; + $current_title = $tcnv; + } + } else { + if ($tag035) { + my $tcnv = $tag035->as_string(); + print "title with 035 $tcnv, eg bib id = $tcn2bid{$tcnv}\n"; + $current_title = $tcnv; + } else { + my $tcnv; + if ($tag245) { + $tcnv = $tag245->as_string(); + } + print "fishy title? missing 001 and 035: $tcnv\n"; + print STDERR "=== fishy title? missing 001 and 035: $tcnv\n"; + print STDERR $marc->as_formatted() . "\n"; + $current_title = "fishy: $tcnv"; + } + } + } +} +close BADMFHD; +close MFHD; diff --git a/bibs_items/stage-HOLDINGS-MULT.pl b/bibs_items/stage-HOLDINGS-MULT.pl new file mode 100755 index 0000000..1ed391f --- /dev/null +++ b/bibs_items/stage-HOLDINGS-MULT.pl @@ -0,0 +1,42 @@ +#!/usr/bin/perl -w +# sed -i 's/\\/\//g' *MULT* +# ls *MULT* | ~/git/migraton-tools/bibs_items/stage-HOLDINGS-MULT.pl >> scripts/asset_copy_stage.sql + +use strict; + +my $first_time = 1; +my $schema; + +sub first_time { + $schema = shift; + $first_time = 0; + print qq^ +DROP TABLE IF EXISTS m_$schema.asset_copy_multi_legacy; +CREATE TABLE m_$schema.asset_copy_multi_legacy ( + eg_bib_id BIGINT, + eg_copy_id INTEGER, + hseq TEXT, + subfield TEXT, + value TEXT +); +CREATE INDEX ON m_$schema.asset_copy_multi_legacy (eg_bib_id); +CREATE INDEX ON m_$schema.asset_copy_multi_legacy (eg_copy_id); +CREATE INDEX ON m_$schema.asset_copy_multi_legacy (hseq); +CREATE INDEX ON m_$schema.asset_copy_multi_legacy (subfield); +CREATE INDEX ON m_$schema.asset_copy_multi_legacy (hseq,subfield);\n\n +^; + +} + + +while (my $line = <>) { + chomp $line; + if ($line =~ /^(.+?)-.+(.)\.pg$/) { + first_time($1) if $first_time; + print "\\COPY m_$1.asset_copy_multi_legacy (eg_bib_id,hseq,value) FROM '$line'\n"; + print "UPDATE m_$1.asset_copy_multi_legacy SET subfield = '$2' WHERE subfield IS NULL;\n\n"; + } +} + +print "UPDATE m_$schema.asset_copy_multi_legacy SET eg_copy_id = b.id FROM m_$schema.asset_copy_legacy b WHERE x_eg_bib_id = eg_bib_id AND x_hseq = hseq;\n\n"; + -- 1.7.2.5