X-Git-Url: http://git.equinoxoli.org/?p=migration-tools.git;a=blobdiff_plain;f=sql%2Fbase%2Fbase.sql;h=b5ae24bd104d933adf08424be1b0b837e8ef21b2;hp=603cffebba4dfe93c9b89e9d6c29ba391f5bb900;hb=5c9cf67d5fa1128dacab0656789a72ba887e288e;hpb=56e3292334866e6b1eecb3b619d650e24ca67dba diff --git a/sql/base/base.sql b/sql/base/base.sql index 603cffe..b5ae24b 100644 --- a/sql/base/base.sql +++ b/sql/base/base.sql @@ -244,7 +244,7 @@ CREATE OR REPLACE FUNCTION migration_tools.build_specific_base_staging_table (TE BEGIN base_staging_table = REPLACE( production_table, '.', '_' ); --RAISE INFO 'In migration_tools.build_specific_base_staging_table(%,%) -> %', migration_schema, production_table, base_staging_table; - PERFORM migration_tools.exec( $1, 'CREATE UNLOGGED TABLE ' || migration_schema || '.' || base_staging_table || ' ( LIKE ' || production_table || ' INCLUDING DEFAULTS EXCLUDING CONSTRAINTS );' ); + PERFORM migration_tools.exec( $1, 'CREATE TABLE ' || migration_schema || '.' || base_staging_table || ' ( LIKE ' || production_table || ' INCLUDING DEFAULTS EXCLUDING CONSTRAINTS );' ); PERFORM migration_tools.exec( $1, ' INSERT INTO ' || migration_schema || '.fields_requiring_mapping SELECT table_schema, table_name, column_name, data_type @@ -272,7 +272,7 @@ CREATE OR REPLACE FUNCTION migration_tools.create_linked_legacy_table_from (TEXT column_list TEXT := ''; column_count INTEGER := 0; BEGIN - create_sql := 'CREATE UNLOGGED TABLE ' || migration_schema || '.' || parent_table || '_legacy ( '; + create_sql := 'CREATE TABLE ' || migration_schema || '.' || parent_table || '_legacy ( '; FOR columns IN SELECT table_schema, table_name, column_name, data_type FROM information_schema.columns @@ -573,6 +573,98 @@ CREATE OR REPLACE FUNCTION migration_tools.name_parse_out_fuller_last_first_midd END; $$ LANGUAGE PLPGSQL STRICT IMMUTABLE; +CREATE OR REPLACE FUNCTION migration_tools.name_parse_out_fuller_last_first_middle_and_random_affix2 (TEXT) RETURNS TEXT[] AS $$ + DECLARE + full_name TEXT := $1; + temp TEXT; + family_name TEXT := ''; + first_given_name TEXT := ''; + second_given_name TEXT := ''; + suffix TEXT := ''; + prefix TEXT := ''; + BEGIN + temp := BTRIM(full_name); + -- Use values, not structure, for prefix/suffix, unless we come up with a better idea + --IF temp ~ '^\S{2,}\.' THEN + -- prefix := REGEXP_REPLACE(temp, '^(\S{2,}\.).*$','\1'); + -- temp := BTRIM(REGEXP_REPLACE(temp, '^\S{2,}\.(.*)$','\1')); + --END IF; + --IF temp ~ '\S{2,}\.$' THEN + -- suffix := REGEXP_REPLACE(temp, '^.*(\S{2,}\.)$','\1'); + -- temp := REGEXP_REPLACE(temp, '^(.*)\S{2,}\.$','\1'); + --END IF; + IF temp ilike '%MR.%' THEN + prefix := 'Mr.'; + temp := BTRIM(REGEXP_REPLACE( temp, E'MR\.\\s*', '', 'i' )); + END IF; + IF temp ilike '%MRS.%' THEN + prefix := 'Mrs.'; + temp := BTRIM(REGEXP_REPLACE( temp, E'MRS\.\\s*', '', 'i' )); + END IF; + IF temp ilike '%MS.%' THEN + prefix := 'Ms.'; + temp := BTRIM(REGEXP_REPLACE( temp, E'MS\.\\s*', '', 'i' )); + END IF; + IF temp ilike '%DR.%' THEN + prefix := 'Dr.'; + temp := BTRIM(REGEXP_REPLACE( temp, E'DR\.\\s*', '', 'i' )); + END IF; + IF temp ilike '%JR.%' THEN + suffix := 'Jr.'; + temp := BTRIM(REGEXP_REPLACE( temp, E'JR\.\\s*', '', 'i' )); + END IF; + IF temp ilike '%JR,%' THEN + suffix := 'Jr.'; + temp := BTRIM(REGEXP_REPLACE( temp, E'JR,\\s*', ',', 'i' )); + END IF; + IF temp ilike '%SR.%' THEN + suffix := 'Sr.'; + temp := BTRIM(REGEXP_REPLACE( temp, E'SR\.\\s*', '', 'i' )); + END IF; + IF temp ilike '%SR,%' THEN + suffix := 'Sr.'; + temp := BTRIM(REGEXP_REPLACE( temp, E'SR,\\s*', ',', 'i' )); + END IF; + IF temp like '%III%' THEN + suffix := 'III'; + temp := BTRIM(REGEXP_REPLACE( temp, E'III', '' )); + END IF; + IF temp like '%II%' THEN + suffix := 'II'; + temp := BTRIM(REGEXP_REPLACE( temp, E'II', '' )); + END IF; + + IF temp ~ ',' THEN + family_name = BTRIM(REGEXP_REPLACE(temp,'^(.*?,).*$','\1')); + temp := BTRIM(REPLACE( temp, family_name, '' )); + family_name := REPLACE( family_name, ',', '' ); + IF temp ~ ' ' THEN + first_given_name := BTRIM( REGEXP_REPLACE(temp,'^(.+)\s(.+)$','\1') ); + second_given_name := BTRIM( REGEXP_REPLACE(temp,'^(.+)\s(.+)$','\2') ); + ELSE + first_given_name := temp; + second_given_name := ''; + END IF; + ELSE + IF temp ~ '^\S+\s+\S+\s+\S+$' THEN + first_given_name := BTRIM( REGEXP_REPLACE(temp,'^(\S+)\s*(\S+)\s*(\S+)$','\1') ); + second_given_name := BTRIM( REGEXP_REPLACE(temp,'^(\S+)\s*(\S+)\s*(\S+)$','\2') ); + family_name := BTRIM( REGEXP_REPLACE(temp,'^(\S+)\s*(\S+)\s*(\S+)$','\3') ); + ELSE + first_given_name := BTRIM( REGEXP_REPLACE(temp,'^(\S+)\s*(\S+)$','\1') ); + second_given_name := temp; + family_name := BTRIM( REGEXP_REPLACE(temp,'^(\S+)\s*(\S+)$','\2') ); + END IF; + END IF; + + family_name := BTRIM(REPLACE(REPLACE(family_name,',',''),'"','')); + first_given_name := BTRIM(REPLACE(REPLACE(first_given_name,',',''),'"','')); + second_given_name := BTRIM(REPLACE(REPLACE(second_given_name,',',''),'"','')); + + RETURN ARRAY[ family_name, prefix, first_given_name, second_given_name, suffix ]; + END; +$$ LANGUAGE PLPGSQL STRICT IMMUTABLE; + CREATE OR REPLACE FUNCTION migration_tools.address_parse_out_citystatezip (TEXT) RETURNS TEXT[] AS $$ DECLARE city_state_zip TEXT := $1; @@ -688,7 +780,7 @@ CREATE OR REPLACE FUNCTION migration_tools.parse_out_address2 (TEXT) RETURNS TEX $$ LANGUAGE PLPERLU STABLE; DROP TABLE IF EXISTS migration_tools.usps_suffixes; -CREATE UNLOGGED TABLE migration_tools.usps_suffixes ( suffix_from TEXT, suffix_to TEXT ); +CREATE TABLE migration_tools.usps_suffixes ( suffix_from TEXT, suffix_to TEXT ); INSERT INTO migration_tools.usps_suffixes VALUES ('ALLEE','ALY'), ('ALLEY','ALY'), @@ -3071,6 +3163,73 @@ The second argument is an array of text containing the values to plug into each If the value for a given field is NULL or the empty string, it is not inserted. $$; +CREATE OR REPLACE FUNCTION migration_tools.make_stub_bib (text[], text[], text[], text[]) RETURNS TEXT AS $func$ + +use strict; +use warnings; + +use MARC::Record; +use MARC::File::XML (BinaryEncoding => 'UTF-8'); +use Text::CSV; + +my $in_tags = shift; +my $in_ind1 = shift; +my $in_ind2 = shift; +my $in_values = shift; + +# hack-and-slash parsing of array-passed-as-string; +# this can go away once everybody is running Postgres 9.1+ +my $csv = Text::CSV->new({binary => 1}); +$in_tags =~ s/^{//; +$in_tags =~ s/}$//; +my $status = $csv->parse($in_tags); +my $tags = [ $csv->fields() ]; +$in_ind1 =~ s/^{//; +$in_ind1 =~ s/}$//; +$status = $csv->parse($in_ind1); +my $ind1s = [ $csv->fields() ]; +$in_ind2 =~ s/^{//; +$in_ind2 =~ s/}$//; +$status = $csv->parse($in_ind2); +my $ind2s = [ $csv->fields() ]; +$in_values =~ s/^{//; +$in_values =~ s/}$//; +$status = $csv->parse($in_values); +my $values = [ $csv->fields() ]; + +my $marc = MARC::Record->new(); + +$marc->leader('00000nam a22000007 4500'); +$marc->append_fields(MARC::Field->new('008', '000000s 000 eng d')); + +foreach my $i (0..$#$tags) { + my ($tag, $sf); + if ($tags->[$i] =~ /^(\d{3})([0-9a-z])$/) { + $tag = $1; + $sf = $2; + $marc->append_fields(MARC::Field->new($tag, $ind1s->[$i], $ind2s->[$i], $sf => $values->[$i])) if $values->[$i] !~ /^\s*$/ and $values->[$i] ne 'NULL'; + } elsif ($tags->[$i] =~ /^(\d{3})$/) { + $tag = $1; + $marc->append_fields(MARC::Field->new($tag, $ind1s->[$i], $ind2s->[$i], $values->[$i])) if $values->[$i] !~ /^\s*$/ and $values->[$i] ne 'NULL'; + } +} + +my $xml = $marc->as_xml_record; +$xml =~ s/^<\?.+?\?>$//mo; +$xml =~ s/\n//sgo; +$xml =~ s/>\s+new_from_xml($marcxml, 'UTF-8'); + @fields = $marc->field($tag); + }; + my @texts; + foreach my $field (@fields) { + if ($field->as_string() =~ qr/$match/) { + push @texts, $field->as_string($subfield,$delimiter); + } + } + return \@texts; +$$ LANGUAGE PLPERLU STABLE; + CREATE OR REPLACE FUNCTION migration_tools.find_hold_matrix_matchpoint (INTEGER) RETURNS INTEGER AS $$ SELECT action.find_hold_matrix_matchpoint( (SELECT pickup_lib FROM action.hold_request WHERE id = $1), @@ -3558,13 +3738,17 @@ $$ LANGUAGE PLPGSQL STRICT VOLATILE; -- convenience functions for handling copy_location maps - CREATE OR REPLACE FUNCTION migration_tools.handle_shelf (TEXT,TEXT,TEXT,INTEGER) RETURNS VOID AS $$ + SELECT migration_tools._handle_shelf($1,$2,$3,$4,TRUE); +$$ LANGUAGE SQL; + +CREATE OR REPLACE FUNCTION migration_tools._handle_shelf (TEXT,TEXT,TEXT,INTEGER,BOOLEAN) RETURNS VOID AS $$ DECLARE table_schema ALIAS FOR $1; table_name ALIAS FOR $2; org_shortname ALIAS FOR $3; org_range ALIAS FOR $4; + make_assertion ALIAS FOR $5; proceed BOOLEAN; org INTEGER; -- if x_org is on the mapping table, it'll take precedence over the passed org_shortname param @@ -3573,6 +3757,7 @@ CREATE OR REPLACE FUNCTION migration_tools.handle_shelf (TEXT,TEXT,TEXT,INTEGER) x_org INTEGER; org_list INTEGER[]; o INTEGER; + row_count NUMERIC; BEGIN EXECUTE 'SELECT EXISTS ( SELECT 1 @@ -3608,26 +3793,28 @@ CREATE OR REPLACE FUNCTION migration_tools.handle_shelf (TEXT,TEXT,TEXT,INTEGER) || ' ADD COLUMN x_shelf INTEGER'; IF x_org_found THEN + RAISE INFO 'Found x_org column'; EXECUTE 'UPDATE ' || quote_ident(table_name) || ' a' - || ' SET x_shelf = id FROM asset_copy_location b' + || ' SET x_shelf = b.id FROM asset_copy_location b' || ' WHERE BTRIM(UPPER(a.desired_shelf)) = BTRIM(UPPER(b.name))' || ' AND b.owning_lib = x_org' || ' AND NOT b.deleted'; EXECUTE 'UPDATE ' || quote_ident(table_name) || ' a' - || ' SET x_shelf = id FROM asset.copy_location b' + || ' SET x_shelf = b.id FROM asset.copy_location b' || ' WHERE BTRIM(UPPER(a.desired_shelf)) = BTRIM(UPPER(b.name))' || ' AND b.owning_lib = x_org' || ' AND x_shelf IS NULL' || ' AND NOT b.deleted'; ELSE + RAISE INFO 'Did not find x_org column'; EXECUTE 'UPDATE ' || quote_ident(table_name) || ' a' - || ' SET x_shelf = id FROM asset_copy_location b' + || ' SET x_shelf = b.id FROM asset_copy_location b' || ' WHERE BTRIM(UPPER(a.desired_shelf)) = BTRIM(UPPER(b.name))' || ' AND b.owning_lib = $1' || ' AND NOT b.deleted' USING org; EXECUTE 'UPDATE ' || quote_ident(table_name) || ' a' - || ' SET x_shelf = id FROM asset_copy_location b' + || ' SET x_shelf = b.id FROM asset_copy_location b' || ' WHERE BTRIM(UPPER(a.desired_shelf)) = BTRIM(UPPER(b.name))' || ' AND b.owning_lib = $1' || ' AND x_shelf IS NULL' @@ -3636,19 +3823,24 @@ CREATE OR REPLACE FUNCTION migration_tools.handle_shelf (TEXT,TEXT,TEXT,INTEGER) END IF; FOREACH o IN ARRAY org_list LOOP + RAISE INFO 'Considering org %', o; EXECUTE 'UPDATE ' || quote_ident(table_name) || ' a' - || ' SET x_shelf = id FROM asset.copy_location b' + || ' SET x_shelf = b.id FROM asset.copy_location b' || ' WHERE BTRIM(UPPER(a.desired_shelf)) = BTRIM(UPPER(b.name))' || ' AND b.owning_lib = $1 AND x_shelf IS NULL' || ' AND NOT b.deleted' USING o; + GET DIAGNOSTICS row_count = ROW_COUNT; + RAISE INFO 'Updated % rows', row_count; END LOOP; - EXECUTE 'SELECT migration_tools.assert( - NOT EXISTS (SELECT 1 FROM ' || quote_ident(table_name) || ' WHERE desired_shelf <> '''' AND x_shelf IS NULL), - ''Cannot find a desired location'', - ''Found all desired locations'' - );'; + IF make_assertion THEN + EXECUTE 'SELECT migration_tools.assert( + NOT EXISTS (SELECT 1 FROM ' || quote_ident(table_name) || ' WHERE desired_shelf <> '''' AND x_shelf IS NULL), + ''Cannot find a desired location'', + ''Found all desired locations'' + );'; + END IF; END; $$ LANGUAGE PLPGSQL STRICT VOLATILE; @@ -3768,7 +3960,7 @@ CREATE OR REPLACE FUNCTION migration_tools.handle_org (TEXT,TEXT) RETURNS VOID A || ' ADD COLUMN x_org INTEGER'; EXECUTE 'UPDATE ' || quote_ident(table_name) || ' a' - || ' SET x_org = id FROM actor.org_unit b' + || ' SET x_org = b.id FROM actor.org_unit b' || ' WHERE BTRIM(a.desired_org) = BTRIM(b.shortname)'; EXECUTE 'SELECT migration_tools.assert( @@ -4182,9 +4374,25 @@ BEGIN END $$ LANGUAGE plpgsql; --- yet another subfield 9 function, this one only adds the $9 if the ind1 = 1 or 4 and ind2 = 0 or 1 -DROP FUNCTION IF EXISTS migration_tools.strict_add_sf9(TEXT,TEXT); -CREATE OR REPLACE FUNCTION migration_tools.strict_add_sf9(marc TEXT, new_9 TEXT) +DROP FUNCTION IF EXISTS migration_tools.munge_sf9(INTEGER,TEXT,TEXT); +CREATE OR REPLACE FUNCTION migration_tools.merge_group(bib_id INTEGER,new_sf9 TEXT,force TEXT DEFAULT 'false') + RETURNS BOOLEAN AS +$BODY$ +DECLARE + marc_xml TEXT; + new_marc TEXT; +BEGIN + SELECT marc FROM biblio.record_entry WHERE id = bib_id INTO marc_xml; + + SELECT munge_sf9(marc_xml,new_sf9,force) INTO new_marc; + UPDATE biblio.record_entry SET marc = new_marc WHERE id = bib_id; + + RETURN true; +END; +$BODY$ LANGUAGE plpgsql; + +DROP FUNCTION IF EXISTS migration_tools.munge_sf9(TEXT,TEXT,TEXT); +CREATE OR REPLACE FUNCTION migration_tools.munge_sf9(marc_xml TEXT, new_9_to_set TEXT, force TEXT) RETURNS TEXT LANGUAGE plperlu AS $function$ @@ -4200,6 +4408,7 @@ binmode(STDERR, ':utf8'); my $marc_xml = shift; my $new_9_to_set = shift; +my $force = shift; $marc_xml =~ s/(.........)./${1}a/; @@ -4218,10 +4427,12 @@ return $marc_xml->as_xml_record() unless @uris; foreach my $field (@uris) { my $ind1 = $field->indicator('1'); if (!defined $ind1) { next; } - if ($ind1 ne '1' && $ind1 ne '4') { next; } + if ($ind1 ne '1' && $ind1 ne '4' && $force eq 'false') { next; } + if ($ind1 ne '1' && $ind1 ne '4' && $force eq 'true') { $field->set_indicator(1,'4'); } my $ind2 = $field->indicator('2'); if (!defined $ind2) { next; } - if ($ind2 ne '0' && $ind2 ne '1') { next; } + if ($ind2 ne '0' && $ind2 ne '1' && $force eq 'false') { next; } + if ($ind2 ne '0' && $ind2 ne '1' && $force eq 'true') { $field->set_indicator(2,'0'); } $field->add_subfields( '9' => $new_9_to_set ); } @@ -4229,10 +4440,8 @@ return $marc_xml->as_xml_record(); $function$; --- yet another subfield 9 function, this one only adds the $9 and forces --- ind1 = 4 if not already ind1 = 1 or 4 and ind2 = 0 if not already ind2 = 0 or 1 -DROP FUNCTION IF EXISTS migration_tools.force_add_sf9(TEXT,TEXT); -CREATE OR REPLACE FUNCTION migration_tools.force_add_sf9(marc TEXT, new_9 TEXT) +DROP FUNCTION IF EXISTS migration_tools.munge_sf9_qualifying_match(TEXT,TEXT,TEXT); +CREATE OR REPLACE FUNCTION migration_tools.munge_sf9_qualifying_match(marc_xml TEXT, qualifying_match TEXT, new_9_to_set TEXT, force TEXT) RETURNS TEXT LANGUAGE plperlu AS $function$ @@ -4247,7 +4456,9 @@ binmode(STDOUT, ':utf8'); binmode(STDERR, ':utf8'); my $marc_xml = shift; +my $qualifying_match = shift; my $new_9_to_set = shift; +my $force = shift; $marc_xml =~ s/(.........)./${1}a/; @@ -4264,22 +4475,25 @@ my @uris = $marc_xml->field('856'); return $marc_xml->as_xml_record() unless @uris; foreach my $field (@uris) { - my $ind1 = $field->indicator('1'); - if (!defined $ind1) { next; } - if ($ind1 ne '1' && $ind1 ne '4') { $field->set_indicator(1,'4'); } - my $ind2 = $field->indicator('2'); - if (!defined $ind2) { next; } - if ($ind2 ne '0' && $ind2 ne '1') { $field->set_indicator(2,'0'); } - $field->add_subfields( '9' => $new_9_to_set ); + if ($field->as_string() =~ qr/$qualifying_match/) { + my $ind1 = $field->indicator('1'); + if (!defined $ind1) { next; } + if ($ind1 ne '1' && $ind1 ne '4' && $force eq 'false') { next; } + if ($ind1 ne '1' && $ind1 ne '4' && $force eq 'true') { $field->set_indicator(1,'4'); } + my $ind2 = $field->indicator('2'); + if (!defined $ind2) { next; } + if ($ind2 ne '0' && $ind2 ne '1' && $force eq 'false') { next; } + if ($ind2 ne '0' && $ind2 ne '1' && $force eq 'true') { $field->set_indicator(2,'0'); } + $field->add_subfields( '9' => $new_9_to_set ); + } } return $marc_xml->as_xml_record(); $function$; --- alternate adding subfield 9 function in that it adds them to existing tags where the 856$u matches a correct value only -DROP FUNCTION IF EXISTS migration_tools.add_sf9(TEXT,TEXT,TEXT); -CREATE OR REPLACE FUNCTION migration_tools.add_sf9(marc TEXT, partial_u TEXT, new_9 TEXT) +DROP FUNCTION IF EXISTS migration_tools.owner_change_sf9_substring_match(TEXT,TEXT,TEXT,TEXT); +CREATE OR REPLACE FUNCTION migration_tools.owner_change_sf9_substring_match (marc_xml TEXT, substring_old_value TEXT, new_value TEXT, fix_indicators TEXT) RETURNS TEXT LANGUAGE plperlu AS $function$ @@ -4294,8 +4508,9 @@ binmode(STDOUT, ':utf8'); binmode(STDERR, ':utf8'); my $marc_xml = shift; -my $matching_u_text = shift; -my $new_9_to_set = shift; +my $substring_old_value = shift; +my $new_value = shift; +my $fix_indicators = shift; $marc_xml =~ s/(.........)./${1}a/; @@ -4305,54 +4520,106 @@ eval { if ($@) { #elog("could not parse $bibid: $@\n"); import MARC::File::XML (BinaryEncoding => 'utf8'); - return; + return $marc_xml; } my @uris = $marc_xml->field('856'); -return unless @uris; +return $marc_xml->as_xml_record() unless @uris; foreach my $field (@uris) { - my $sfu = $field->subfield('u'); + my $ind1 = $field->indicator('1'); + if (defined $ind1) { + if ($ind1 ne '1' && $ind1 ne '4' && $fix_indicators eq 'true') { + $field->set_indicator(1,'4'); + } + } my $ind2 = $field->indicator('2'); - if (!defined $ind2) { next; } - if ($ind2 ne '0') { next; } - if (!defined $sfu) { next; } - if ($sfu =~ m/$matching_u_text/ or $matching_u_text eq 'pineapple') { - $field->add_subfields( '9' => $new_9_to_set ); - last; + if (defined $ind2) { + if ($ind2 ne '0' && $ind2 ne '1' && $fix_indicators eq 'true') { + $field->set_indicator(2,'0'); + } } + if ($field->as_string('9') =~ qr/$substring_old_value/) { + $field->delete_subfield('9'); + $field->add_subfields( '9' => $new_value ); + } + $marc_xml->delete_field($field); # -- we're going to dedup and add them back } +my %hash = (map { ($_->as_usmarc => $_) } @uris); # -- courtesy of an old Mike Rylander post :-) +$marc_xml->insert_fields_ordered( values( %hash ) ); + return $marc_xml->as_xml_record(); $function$; -DROP FUNCTION IF EXISTS migration_tools.add_sf9(BIGINT, TEXT, TEXT, REGCLASS); -CREATE OR REPLACE FUNCTION migration_tools.add_sf9(bib_id BIGINT, target_u_text TEXT, sf9_text TEXT, bib_table REGCLASS) - RETURNS BOOLEAN AS -$BODY$ -DECLARE - source_xml TEXT; - new_xml TEXT; - r BOOLEAN; -BEGIN +DROP FUNCTION IF EXISTS migration_tools.owner_change_sf9_substring_match2(TEXT,TEXT,TEXT,TEXT,TEXT); +CREATE OR REPLACE FUNCTION migration_tools.owner_change_sf9_substring_match2 (marc_xml TEXT, qualifying_match TEXT, substring_old_value TEXT, new_value TEXT, fix_indicators TEXT) + RETURNS TEXT + LANGUAGE plperlu +AS $function$ +use strict; +use warnings; + +use MARC::Record; +use MARC::File::XML (BinaryEncoding => 'utf8'); - EXECUTE 'SELECT marc FROM ' || bib_table || ' WHERE id = ' || bib_id INTO source_xml; +binmode(STDERR, ':bytes'); +binmode(STDOUT, ':utf8'); +binmode(STDERR, ':utf8'); - SELECT migration_tools.add_sf9(source_xml, target_u_text, sf9_text) INTO new_xml; +my $marc_xml = shift; +my $qualifying_match = shift; +my $substring_old_value = shift; +my $new_value = shift; +my $fix_indicators = shift; - r = FALSE; - new_xml = '$_$' || new_xml || '$_$'; +$marc_xml =~ s/(.........)./${1}a/; - IF new_xml != source_xml THEN - EXECUTE 'UPDATE ' || bib_table || ' SET marc = ' || new_xml || ' WHERE id = ' || bib_id; - r = TRUE; - END IF; +eval { + $marc_xml = MARC::Record->new_from_xml($marc_xml); +}; +if ($@) { + #elog("could not parse $bibid: $@\n"); + import MARC::File::XML (BinaryEncoding => 'utf8'); + return $marc_xml; +} - RETURN r; +my @unqualified_uris = $marc_xml->field('856'); +my @uris = (); +foreach my $field (@unqualified_uris) { + if ($field->as_string() =~ qr/$qualifying_match/) { + push @uris, $field; + } +} +return $marc_xml->as_xml_record() unless @uris; -END; -$BODY$ LANGUAGE plpgsql; +foreach my $field (@uris) { + my $ind1 = $field->indicator('1'); + if (defined $ind1) { + if ($ind1 ne '1' && $ind1 ne '4' && $fix_indicators eq 'true') { + $field->set_indicator(1,'4'); + } + } + my $ind2 = $field->indicator('2'); + if (defined $ind2) { + if ($ind2 ne '0' && $ind2 ne '1' && $fix_indicators eq 'true') { + $field->set_indicator(2,'0'); + } + } + if ($field->as_string('9') =~ qr/$substring_old_value/) { + $field->delete_subfield('9'); + $field->add_subfields( '9' => $new_value ); + } + $marc_xml->delete_field($field); # -- we're going to dedup and add them back +} + +my %hash = (map { ($_->as_usmarc => $_) } @uris); # -- courtesy of an old Mike Rylander post :-) +$marc_xml->insert_fields_ordered( values( %hash ) ); + +return $marc_xml->as_xml_record(); + +$function$; -- strip marc tag DROP FUNCTION IF EXISTS migration_tools.strip_tag(TEXT,TEXT); @@ -5174,3 +5441,320 @@ BEGIN RETURN TRUE; END $function$; + + +-- convenience function for handling item barcode collisions in asset_copy_legacy + +CREATE OR REPLACE FUNCTION migration_tools.handle_asset_barcode_collisions(migration_schema TEXT) RETURNS VOID AS $function$ +DECLARE + x_barcode TEXT; + x_id BIGINT; + row_count NUMERIC; + internal_collision_count NUMERIC := 0; + incumbent_collision_count NUMERIC := 0; +BEGIN + FOR x_barcode IN SELECT barcode FROM asset_copy_legacy WHERE x_migrate GROUP BY 1 HAVING COUNT(*) > 1 + LOOP + FOR x_id IN SELECT id FROM asset_copy WHERE barcode = x_barcode + LOOP + UPDATE asset_copy SET barcode = migration_schema || '_internal_collision_' || id || '_' || barcode WHERE id = x_id; + GET DIAGNOSTICS row_count = ROW_COUNT; + internal_collision_count := internal_collision_count + row_count; + END LOOP; + END LOOP; + RAISE INFO '% internal collisions', internal_collision_count; + FOR x_barcode IN SELECT a.barcode FROM asset.copy a, asset_copy_legacy b WHERE x_migrate AND a.deleted IS FALSE AND a.barcode = b.barcode + LOOP + FOR x_id IN SELECT id FROM asset_copy_legacy WHERE barcode = x_barcode + LOOP + UPDATE asset_copy_legacy SET barcode = migration_schema || '_incumbent_collision_' || id || '_' || barcode WHERE id = x_id; + GET DIAGNOSTICS row_count = ROW_COUNT; + incumbent_collision_count := incumbent_collision_count + row_count; + END LOOP; + END LOOP; + RAISE INFO '% incumbent collisions', incumbent_collision_count; +END +$function$ LANGUAGE plpgsql; + +-- convenience function for handling patron barcode/usrname collisions in actor_usr_legacy +-- this should be ran prior to populating actor_card + +CREATE OR REPLACE FUNCTION migration_tools.handle_actor_barcode_collisions(migration_schema TEXT) RETURNS VOID AS $function$ +DECLARE + x_barcode TEXT; + x_id BIGINT; + row_count NUMERIC; + internal_collision_count NUMERIC := 0; + incumbent_barcode_collision_count NUMERIC := 0; + incumbent_usrname_collision_count NUMERIC := 0; +BEGIN + FOR x_barcode IN SELECT usrname FROM actor_usr_legacy WHERE x_migrate GROUP BY 1 HAVING COUNT(*) > 1 + LOOP + FOR x_id IN SELECT id FROM actor_usr_legacy WHERE x_migrate AND usrname = x_barcode + LOOP + UPDATE actor_usr_legacy SET usrname = migration_schema || '_internal_collision_' || id || '_' || usrname WHERE id = x_id; + GET DIAGNOSTICS row_count = ROW_COUNT; + internal_collision_count := internal_collision_count + row_count; + END LOOP; + END LOOP; + RAISE INFO '% internal usrname/barcode collisions', internal_collision_count; + + FOR x_barcode IN + SELECT a.barcode FROM actor.card a, actor_usr_legacy b WHERE x_migrate AND a.barcode = b.usrname + LOOP + FOR x_id IN SELECT DISTINCT id FROM actor_usr_legacy WHERE x_migrate AND usrname = x_barcode + LOOP + UPDATE actor_usr_legacy SET usrname = migration_schema || '_incumbent_barcode_collision_' || id || '_' || usrname WHERE id = x_id; + GET DIAGNOSTICS row_count = ROW_COUNT; + incumbent_barcode_collision_count := incumbent_barcode_collision_count + row_count; + END LOOP; + END LOOP; + RAISE INFO '% incumbent barcode collisions', incumbent_barcode_collision_count; + + FOR x_barcode IN + SELECT a.usrname FROM actor.usr a, actor_usr_legacy b WHERE x_migrate AND a.usrname = b.usrname + LOOP + FOR x_id IN SELECT DISTINCT id FROM actor_usr_legacy WHERE x_migrate AND usrname = x_barcode + LOOP + UPDATE actor_usr_legacy SET usrname = migration_schema || '_incumbent_usrname_collision_' || id || '_' || usrname WHERE id = x_id; + GET DIAGNOSTICS row_count = ROW_COUNT; + incumbent_usrname_collision_count := incumbent_usrname_collision_count + row_count; + END LOOP; + END LOOP; + RAISE INFO '% incumbent usrname collisions (post barcode collision munging)', incumbent_usrname_collision_count; +END +$function$ LANGUAGE plpgsql; + +-- alternate version: convenience function for handling item barcode collisions in asset_copy_legacy + +CREATE OR REPLACE FUNCTION migration_tools.handle_asset_barcode_collisions2(migration_schema TEXT) RETURNS VOID AS $function$ +DECLARE + x_barcode TEXT; + x_id BIGINT; + row_count NUMERIC; + internal_collision_count NUMERIC := 0; + incumbent_collision_count NUMERIC := 0; +BEGIN + FOR x_barcode IN SELECT barcode FROM asset_copy_legacy WHERE x_migrate GROUP BY 1 HAVING COUNT(*) > 1 + LOOP + FOR x_id IN SELECT id FROM asset_copy WHERE barcode = x_barcode + LOOP + UPDATE asset_copy SET barcode = migration_schema || '_internal_collision_' || id || '_' || barcode WHERE id = x_id; + GET DIAGNOSTICS row_count = ROW_COUNT; + internal_collision_count := internal_collision_count + row_count; + END LOOP; + END LOOP; + RAISE INFO '% internal collisions', internal_collision_count; + FOR x_barcode IN SELECT a.barcode FROM asset.copy a, asset_copy_legacy b WHERE x_migrate AND a.deleted IS FALSE AND a.barcode = b.barcode + LOOP + FOR x_id IN SELECT id FROM asset_copy_legacy WHERE barcode = x_barcode + LOOP + UPDATE asset_copy_legacy SET barcode = migration_schema || '_' || barcode WHERE id = x_id; + GET DIAGNOSTICS row_count = ROW_COUNT; + incumbent_collision_count := incumbent_collision_count + row_count; + END LOOP; + END LOOP; + RAISE INFO '% incumbent collisions', incumbent_collision_count; +END +$function$ LANGUAGE plpgsql; + +-- alternate version: convenience function for handling patron barcode/usrname collisions in actor_usr_legacy +-- this should be ran prior to populating actor_card + +CREATE OR REPLACE FUNCTION migration_tools.handle_actor_barcode_collisions2(migration_schema TEXT) RETURNS VOID AS $function$ +DECLARE + x_barcode TEXT; + x_id BIGINT; + row_count NUMERIC; + internal_collision_count NUMERIC := 0; + incumbent_barcode_collision_count NUMERIC := 0; + incumbent_usrname_collision_count NUMERIC := 0; +BEGIN + FOR x_barcode IN SELECT usrname FROM actor_usr_legacy WHERE x_migrate GROUP BY 1 HAVING COUNT(*) > 1 + LOOP + FOR x_id IN SELECT id FROM actor_usr_legacy WHERE x_migrate AND usrname = x_barcode + LOOP + UPDATE actor_usr_legacy SET usrname = migration_schema || '_internal_collision_' || id || '_' || usrname WHERE id = x_id; + GET DIAGNOSTICS row_count = ROW_COUNT; + internal_collision_count := internal_collision_count + row_count; + END LOOP; + END LOOP; + RAISE INFO '% internal usrname/barcode collisions', internal_collision_count; + + FOR x_barcode IN + SELECT a.barcode FROM actor.card a, actor_usr_legacy b WHERE x_migrate AND a.barcode = b.usrname + LOOP + FOR x_id IN SELECT DISTINCT id FROM actor_usr_legacy WHERE x_migrate AND usrname = x_barcode + LOOP + UPDATE actor_usr_legacy SET usrname = migration_schema || '_' || usrname WHERE id = x_id; + GET DIAGNOSTICS row_count = ROW_COUNT; + incumbent_barcode_collision_count := incumbent_barcode_collision_count + row_count; + END LOOP; + END LOOP; + RAISE INFO '% incumbent barcode collisions', incumbent_barcode_collision_count; + + FOR x_barcode IN + SELECT a.usrname FROM actor.usr a, actor_usr_legacy b WHERE x_migrate AND a.usrname = b.usrname + LOOP + FOR x_id IN SELECT DISTINCT id FROM actor_usr_legacy WHERE x_migrate AND usrname = x_barcode + LOOP + UPDATE actor_usr_legacy SET usrname = migration_schema || '_' || usrname WHERE id = x_id; + GET DIAGNOSTICS row_count = ROW_COUNT; + incumbent_usrname_collision_count := incumbent_usrname_collision_count + row_count; + END LOOP; + END LOOP; + RAISE INFO '% incumbent usrname collisions (post barcode collision munging)', incumbent_usrname_collision_count; +END +$function$ LANGUAGE plpgsql; + +CREATE OR REPLACE FUNCTION migration_tools.is_circ_rule_safe_to_delete( test_matchpoint INTEGER ) RETURNS BOOLEAN AS $func$ +-- WARNING: Use at your own risk +-- FIXME: not considering marc_type, marc_form, marc_bib_level, marc_vr_format, usr_age_lower_bound, usr_age_upper_bound, item_age +DECLARE + item_object asset.copy%ROWTYPE; + user_object actor.usr%ROWTYPE; + test_rule_object config.circ_matrix_matchpoint%ROWTYPE; + result_rule_object config.circ_matrix_matchpoint%ROWTYPE; + safe_to_delete BOOLEAN := FALSE; + m action.found_circ_matrix_matchpoint; + result_matchpoint INTEGER; + -- ( success BOOL, matchpoint config.circ_matrix_matchpoint, buildrows INT[] ) +BEGIN + SELECT INTO test_rule_object * FROM config.circ_matrix_matchpoint WHERE id = test_matchpoint; + RAISE INFO 'testing rule: %', test_rule_object; + + INSERT INTO actor.usr ( + profile, + usrname, + passwd, + ident_type, + first_given_name, + family_name, + home_ou, + juvenile + ) SELECT + COALESCE(test_rule_object.grp, 2), + 'is_circ_rule_safe_to_delete_' || test_matchpoint || '_' || NOW()::text, + MD5(NOW()::TEXT), + 1, + 'Ima', + 'Test', + COALESCE(test_rule_object.user_home_ou, test_rule_object.org_unit), + COALESCE(test_rule_object.juvenile_flag, FALSE) + ; + + SELECT INTO user_object * FROM actor.usr WHERE id = currval('actor.usr_id_seq'); + + INSERT INTO asset.call_number ( + creator, + editor, + record, + owning_lib, + label, + label_class + ) SELECT + 1, + 1, + -1, + COALESCE(test_rule_object.copy_owning_lib,test_rule_object.org_unit), + 'is_circ_rule_safe_to_delete_' || test_matchpoint || '_' || NOW()::text, + 1 + ; + + INSERT INTO asset.copy ( + barcode, + circ_lib, + creator, + call_number, + editor, + location, + loan_duration, + fine_level, + ref, + circ_modifier + ) SELECT + 'is_circ_rule_safe_to_delete_' || test_matchpoint || '_' || NOW()::text, + COALESCE(test_rule_object.copy_circ_lib,test_rule_object.org_unit), + 1, + currval('asset.call_number_id_seq'), + 1, + COALESCE(test_rule_object.copy_location,1), + 2, + 2, + COALESCE(test_rule_object.ref_flag,FALSE), + test_rule_object.circ_modifier + ; + + SELECT INTO item_object * FROM asset.copy WHERE id = currval('asset.copy_id_seq'); + + SELECT INTO m * FROM action.find_circ_matrix_matchpoint( + test_rule_object.org_unit, + item_object, + user_object, + COALESCE(test_rule_object.is_renewal,FALSE) + ); + RAISE INFO 'action.find_circ_matrix_matchpoint(%,%,%,%) = (%,%,%)', + test_rule_object.org_unit, + item_object.id, + user_object.id, + COALESCE(test_rule_object.is_renewal,FALSE), + m.success, + (m.matchpoint).id, + m.buildrows + ; + + FOR result_matchpoint IN SELECT UNNEST(m.buildrows) + LOOP + SELECT INTO result_rule_object * FROM config.circ_matrix_matchpoint WHERE id = result_matchpoint; + RAISE INFO 'considering rule: %', result_rule_object; + IF result_rule_object.id = test_rule_object.id THEN + RAISE INFO 'found self'; + CONTINUE; + END IF; + IF (result_rule_object.circulate = test_rule_object.circulate + AND result_rule_object.duration_rule = test_rule_object.duration_rule + AND result_rule_object.recurring_fine_rule = test_rule_object.recurring_fine_rule + AND result_rule_object.max_fine_rule = test_rule_object.max_fine_rule + AND ( + (result_rule_object.hard_due_date IS NULL AND test_rule_object.hard_due_date IS NULL) + OR (result_rule_object.hard_due_date = test_rule_object.hard_due_date) + OR (result_rule_object.hard_due_date IS NOT NULL AND test_rule_object.hard_due_date IS NULL) + ) + AND ( + (result_rule_object.renewals IS NULL AND test_rule_object.renewals IS NULL) + OR (result_rule_object.renewals = test_rule_object.renewals) + OR (result_rule_object.renewals IS NOT NULL AND test_rule_object.renewals IS NULL) + ) + AND ( + (result_rule_object.grace_period IS NULL AND test_rule_object.grace_period IS NULL) + OR (result_rule_object.grace_period = test_rule_object.grace_period) + OR (result_rule_object.grace_period IS NOT NULL AND test_rule_object.grace_period IS NULL) + ) + AND NOT EXISTS ( + SELECT limit_set, fallthrough + FROM config.circ_matrix_limit_set_map + WHERE active and matchpoint = test_rule_object.id + EXCEPT + SELECT limit_set, fallthrough + FROM config.circ_matrix_limit_set_map + WHERE active and matchpoint = result_rule_object.id + ) + ) THEN + RAISE INFO 'rule has same outcome'; + safe_to_delete := TRUE; + ELSE + RAISE INFO 'rule has different outcome, bail now'; + RAISE EXCEPTION 'rollback the item and user tables'; + END IF; + END LOOP; + + RAISE EXCEPTION 'rollback the item and user tables'; + +EXCEPTION WHEN OTHERS THEN + + RAISE INFO 'inside exception block: %, %', SQLSTATE, SQLERRM; + RETURN safe_to_delete; + +END; +$func$ LANGUAGE plpgsql; +