X-Git-Url: http://git.equinoxoli.org/?p=migration-tools.git;a=blobdiff_plain;f=sql%2Fbase%2Fbase.sql;h=168e779e11bf688f523b759acdb54cf7eaa40bd9;hp=d5bcdc88c04fa2bd6d60b05a148779e8d1461dea;hb=b9b25104e037780b5b959849a55593ddbf1e5205;hpb=fdf506b5ca99e934d2b3a837a7d6519bbb73aa69 diff --git a/sql/base/base.sql b/sql/base/base.sql index d5bcdc8..168e779 100644 --- a/sql/base/base.sql +++ b/sql/base/base.sql @@ -1675,6 +1675,167 @@ CREATE OR REPLACE FUNCTION migration_tools.attempt_sierra_timestamp (TEXT,TEXT) END; $$ LANGUAGE PLPGSQL STRICT STABLE; +CREATE OR REPLACE FUNCTION migration_tools.openbiblio2marc (x_bibid TEXT) RETURNS TEXT AS $func$ +BEGIN +-- Expects the following table/columns: + +-- export_biblio_tsv: +-- l_bibid | 1 +-- l_create_dt | 2007-03-07 09:03:09 +-- l_last_change_dt | 2015-01-23 11:18:54 +-- l_last_change_userid | 2 +-- l_material_cd | 10 +-- l_collection_cd | 13 +-- l_call_nmbr1 | Canada +-- l_call_nmbr2 | ON +-- l_call_nmbr3 | Ottawa 18 +-- l_title | Art and the courts : France ad England +-- l_title_remainder | from 1259-1328 +-- l_responsibility_stmt | +-- l_author | National Gallery of Canada +-- l_topic1 | +-- l_topic2 | +-- l_topic3 | +-- l_topic4 | +-- l_topic5 | +-- l_opac_flg | Y +-- l_flag_attention | 0 + +-- export_biblio_field_tsv: +-- l_bibid | 1 +-- l_fieldid | 1 +-- l_tag | 720 +-- l_ind1_cd | N +-- l_ind2_cd | N +-- l_subfield_cd | a +-- l_field_data | Brieger, Peter Henry + +-- Map export_biblio_tsv as follows: +-- l_call_nmbr? -> 099a +-- l_author -> 100a +-- l_title -> 245a +-- l_title_remainder -> 245b +-- l_responsibility_stmt -> 245c +-- l_topic? -> 650a +-- l_bibid -> 001 + +RETURN + migration_tools.consolidate_tag( migration_tools.make_stub_bib(y.tag,y.ind1,y.ind2,y.data), '245' ) +FROM ( + select + array_agg(lpad(l_tag,3,'0') || l_subfield_cd) as "tag", + array_agg(l_ind1_cd) as "ind1", + array_agg(l_ind2_cd) as "ind2", + array_agg(l_field_data) as "data" + from ( + select + l_tag, + l_subfield_cd, + l_ind1_cd, + l_ind2_cd, + l_field_data + from export_biblio_field_tsv + where l_bibid = x_bibid + union + select + '099' as "l_tag", + 'a' as "l_subfield_cd", + ' ' as "l_ind1_cd", + ' ' as "l_ind2_cd", + concat_ws(' ', + nullif(btrim(l_call_nmbr1),''), + nullif(btrim(l_call_nmbr2),''), + nullif(btrim(l_call_nmbr3),'') + ) as "l_field_data" + from export_biblio_tsv + where l_bibid = x_bibid + union + select + '100' as "l_tag", + 'a' as "l_subfield_cd", + ' ' as "l_ind1_cd", + ' ' as "l_ind2_cd", + l_author as "l_field_data" + from export_biblio_tsv + where l_bibid = x_bibid and nullif(btrim(l_author),'') is not null + union + select + '245' as "l_tag", + 'a' as "l_subfield_cd", + ' ' as "l_ind1_cd", + ' ' as "l_ind2_cd", + l_title as "l_field_data" + from export_biblio_tsv + where l_bibid = x_bibid and nullif(btrim(l_title),'') is not null + union + select + '245' as "l_tag", + 'b' as "l_subfield_cd", + ' ' as "l_ind1_cd", + ' ' as "l_ind2_cd", + l_title_remainder as "l_field_data" + from export_biblio_tsv + where l_bibid = x_bibid and nullif(btrim(l_title_remainder),'') is not null + union + select + '650' as "l_tag", + 'a' as "l_subfield_cd", + ' ' as "l_ind1_cd", + ' ' as "l_ind2_cd", + l_topic1 as "l_field_data" + from export_biblio_tsv + where l_bibid = x_bibid and nullif(btrim(l_topic1),'') is not null + union + select + '650' as "l_tag", + 'a' as "l_subfield_cd", + ' ' as "l_ind1_cd", + ' ' as "l_ind2_cd", + l_topic2 as "l_field_data" + from export_biblio_tsv + where l_bibid = x_bibid and nullif(btrim(l_topic2),'') is not null + union + select + '650' as "l_tag", + 'a' as "l_subfield_cd", + ' ' as "l_ind1_cd", + ' ' as "l_ind2_cd", + l_topic3 as "l_field_data" + from export_biblio_tsv + where l_bibid = x_bibid and nullif(btrim(l_topic3),'') is not null + union + select + '650' as "l_tag", + 'a' as "l_subfield_cd", + ' ' as "l_ind1_cd", + ' ' as "l_ind2_cd", + l_topic4 as "l_field_data" + from export_biblio_tsv + where l_bibid = x_bibid and nullif(btrim(l_topic4),'') is not null + union + select + '650' as "l_tag", + 'a' as "l_subfield_cd", + ' ' as "l_ind1_cd", + ' ' as "l_ind2_cd", + l_topic5 as "l_field_data" + from export_biblio_tsv + where l_bibid = x_bibid and nullif(btrim(l_topic5),'') is not null + union + select + '001' as "l_tag", + '' as "l_subfield_cd", + '' as "l_ind1_cd", + '' as "l_ind2_cd", + l_bibid as "l_field_data" + from export_biblio_tsv + where l_bibid = x_bibid + ) x +) y; + +END +$func$ LANGUAGE plpgsql; + CREATE OR REPLACE FUNCTION migration_tools.attempt_money (TEXT,TEXT) RETURNS NUMERIC(8,2) AS $$ DECLARE attempt_value ALIAS FOR $1; @@ -3163,6 +3324,73 @@ The second argument is an array of text containing the values to plug into each If the value for a given field is NULL or the empty string, it is not inserted. $$; +CREATE OR REPLACE FUNCTION migration_tools.make_stub_bib (text[], text[], text[], text[]) RETURNS TEXT AS $func$ + +use strict; +use warnings; + +use MARC::Record; +use MARC::File::XML (BinaryEncoding => 'UTF-8'); +use Text::CSV; + +my $in_tags = shift; +my $in_ind1 = shift; +my $in_ind2 = shift; +my $in_values = shift; + +# hack-and-slash parsing of array-passed-as-string; +# this can go away once everybody is running Postgres 9.1+ +my $csv = Text::CSV->new({binary => 1}); +$in_tags =~ s/^{//; +$in_tags =~ s/}$//; +my $status = $csv->parse($in_tags); +my $tags = [ $csv->fields() ]; +$in_ind1 =~ s/^{//; +$in_ind1 =~ s/}$//; +$status = $csv->parse($in_ind1); +my $ind1s = [ $csv->fields() ]; +$in_ind2 =~ s/^{//; +$in_ind2 =~ s/}$//; +$status = $csv->parse($in_ind2); +my $ind2s = [ $csv->fields() ]; +$in_values =~ s/^{//; +$in_values =~ s/}$//; +$status = $csv->parse($in_values); +my $values = [ $csv->fields() ]; + +my $marc = MARC::Record->new(); + +$marc->leader('00000nam a22000007 4500'); +$marc->append_fields(MARC::Field->new('008', '000000s 000 eng d')); + +foreach my $i (0..$#$tags) { + my ($tag, $sf); + if ($tags->[$i] =~ /^(\d{3})([0-9a-z])$/) { + $tag = $1; + $sf = $2; + $marc->append_fields(MARC::Field->new($tag, $ind1s->[$i], $ind2s->[$i], $sf => $values->[$i])) if $values->[$i] !~ /^\s*$/ and $values->[$i] ne 'NULL'; + } elsif ($tags->[$i] =~ /^(\d{3})$/) { + $tag = $1; + $marc->append_fields(MARC::Field->new($tag, $values->[$i])) if $values->[$i] !~ /^\s*$/ and $values->[$i] ne 'NULL'; + } +} + +my $xml = $marc->as_xml_record; +$xml =~ s/^<\?.+?\?>$//mo; +$xml =~ s/\n//sgo; +$xml =~ s/>\s+as_xml_record(); $function$; +-- consolidate marc tag +DROP FUNCTION IF EXISTS migration_tools.consolidate_tag(TEXT,TEXT); +CREATE OR REPLACE FUNCTION migration_tools.consolidate_tag(marc TEXT, tag TEXT) + RETURNS TEXT + LANGUAGE plperlu +AS $function$ +use strict; +use warnings; + +use MARC::Record; +use MARC::File::XML (BinaryEncoding => 'utf8'); + +binmode(STDERR, ':bytes'); +binmode(STDOUT, ':utf8'); +binmode(STDERR, ':utf8'); + +my $marc_xml = shift; +my $tag = shift; + +$marc_xml =~ s/(.........)./${1}a/; + +eval { + $marc_xml = MARC::Record->new_from_xml($marc_xml); +}; +if ($@) { + #elog("could not parse $bibid: $@\n"); + import MARC::File::XML (BinaryEncoding => 'utf8'); + return $marc_xml; +} + +my @fields = $marc_xml->field($tag); +return $marc_xml->as_xml_record() unless @fields; + +my @combined_subfield_refs = (); +my @combined_subfields = (); +foreach my $field (@fields) { + my @subfield_refs = $field->subfields(); + push @combined_subfield_refs, @subfield_refs; +} + +my @sorted_subfield_refs = reverse sort { $a->[0] <=> $b->[0] } @combined_subfield_refs; + +while ( my $tuple = pop( @sorted_subfield_refs ) ) { + my ($code,$data) = @$tuple; + unshift( @combined_subfields, $code, $data ); +} + +$marc_xml->delete_fields(@fields); + +my $new_field = new MARC::Field( + $tag, + $fields[0]->indicator(1), + $fields[0]->indicator(2), + @combined_subfields +); + +$marc_xml->insert_grouped_field( $new_field ); + +return $marc_xml->as_xml_record(); + +$function$; + -- convenience function for linking to the item staging table CREATE OR REPLACE FUNCTION migration_tools.handle_item_barcode (TEXT,TEXT,TEXT,TEXT,BOOLEAN) RETURNS VOID AS $$ @@ -5445,7 +5735,7 @@ BEGIN RAISE INFO '% incumbent barcode collisions', incumbent_barcode_collision_count; FOR x_barcode IN - SELECT a.usrname FROM actor.usr a, actor_usr_legacy b WHERE x_migrate AND a.deleted IS FALSE AND a.usrname = b.usrname + SELECT a.usrname FROM actor.usr a, actor_usr_legacy b WHERE x_migrate AND a.usrname = b.usrname LOOP FOR x_id IN SELECT DISTINCT id FROM actor_usr_legacy WHERE x_migrate AND usrname = x_barcode LOOP @@ -5457,3 +5747,237 @@ BEGIN RAISE INFO '% incumbent usrname collisions (post barcode collision munging)', incumbent_usrname_collision_count; END $function$ LANGUAGE plpgsql; + +-- alternate version: convenience function for handling item barcode collisions in asset_copy_legacy + +CREATE OR REPLACE FUNCTION migration_tools.handle_asset_barcode_collisions2(migration_schema TEXT) RETURNS VOID AS $function$ +DECLARE + x_barcode TEXT; + x_id BIGINT; + row_count NUMERIC; + internal_collision_count NUMERIC := 0; + incumbent_collision_count NUMERIC := 0; +BEGIN + FOR x_barcode IN SELECT barcode FROM asset_copy_legacy WHERE x_migrate GROUP BY 1 HAVING COUNT(*) > 1 + LOOP + FOR x_id IN SELECT id FROM asset_copy WHERE barcode = x_barcode + LOOP + UPDATE asset_copy SET barcode = migration_schema || '_internal_collision_' || id || '_' || barcode WHERE id = x_id; + GET DIAGNOSTICS row_count = ROW_COUNT; + internal_collision_count := internal_collision_count + row_count; + END LOOP; + END LOOP; + RAISE INFO '% internal collisions', internal_collision_count; + FOR x_barcode IN SELECT a.barcode FROM asset.copy a, asset_copy_legacy b WHERE x_migrate AND a.deleted IS FALSE AND a.barcode = b.barcode + LOOP + FOR x_id IN SELECT id FROM asset_copy_legacy WHERE barcode = x_barcode + LOOP + UPDATE asset_copy_legacy SET barcode = migration_schema || '_' || barcode WHERE id = x_id; + GET DIAGNOSTICS row_count = ROW_COUNT; + incumbent_collision_count := incumbent_collision_count + row_count; + END LOOP; + END LOOP; + RAISE INFO '% incumbent collisions', incumbent_collision_count; +END +$function$ LANGUAGE plpgsql; + +-- alternate version: convenience function for handling patron barcode/usrname collisions in actor_usr_legacy +-- this should be ran prior to populating actor_card + +CREATE OR REPLACE FUNCTION migration_tools.handle_actor_barcode_collisions2(migration_schema TEXT) RETURNS VOID AS $function$ +DECLARE + x_barcode TEXT; + x_id BIGINT; + row_count NUMERIC; + internal_collision_count NUMERIC := 0; + incumbent_barcode_collision_count NUMERIC := 0; + incumbent_usrname_collision_count NUMERIC := 0; +BEGIN + FOR x_barcode IN SELECT usrname FROM actor_usr_legacy WHERE x_migrate GROUP BY 1 HAVING COUNT(*) > 1 + LOOP + FOR x_id IN SELECT id FROM actor_usr_legacy WHERE x_migrate AND usrname = x_barcode + LOOP + UPDATE actor_usr_legacy SET usrname = migration_schema || '_internal_collision_' || id || '_' || usrname WHERE id = x_id; + GET DIAGNOSTICS row_count = ROW_COUNT; + internal_collision_count := internal_collision_count + row_count; + END LOOP; + END LOOP; + RAISE INFO '% internal usrname/barcode collisions', internal_collision_count; + + FOR x_barcode IN + SELECT a.barcode FROM actor.card a, actor_usr_legacy b WHERE x_migrate AND a.barcode = b.usrname + LOOP + FOR x_id IN SELECT DISTINCT id FROM actor_usr_legacy WHERE x_migrate AND usrname = x_barcode + LOOP + UPDATE actor_usr_legacy SET usrname = migration_schema || '_' || usrname WHERE id = x_id; + GET DIAGNOSTICS row_count = ROW_COUNT; + incumbent_barcode_collision_count := incumbent_barcode_collision_count + row_count; + END LOOP; + END LOOP; + RAISE INFO '% incumbent barcode collisions', incumbent_barcode_collision_count; + + FOR x_barcode IN + SELECT a.usrname FROM actor.usr a, actor_usr_legacy b WHERE x_migrate AND a.usrname = b.usrname + LOOP + FOR x_id IN SELECT DISTINCT id FROM actor_usr_legacy WHERE x_migrate AND usrname = x_barcode + LOOP + UPDATE actor_usr_legacy SET usrname = migration_schema || '_' || usrname WHERE id = x_id; + GET DIAGNOSTICS row_count = ROW_COUNT; + incumbent_usrname_collision_count := incumbent_usrname_collision_count + row_count; + END LOOP; + END LOOP; + RAISE INFO '% incumbent usrname collisions (post barcode collision munging)', incumbent_usrname_collision_count; +END +$function$ LANGUAGE plpgsql; + +CREATE OR REPLACE FUNCTION migration_tools.is_circ_rule_safe_to_delete( test_matchpoint INTEGER ) RETURNS BOOLEAN AS $func$ +-- WARNING: Use at your own risk +-- FIXME: not considering marc_type, marc_form, marc_bib_level, marc_vr_format, usr_age_lower_bound, usr_age_upper_bound, item_age +DECLARE + item_object asset.copy%ROWTYPE; + user_object actor.usr%ROWTYPE; + test_rule_object config.circ_matrix_matchpoint%ROWTYPE; + result_rule_object config.circ_matrix_matchpoint%ROWTYPE; + safe_to_delete BOOLEAN := FALSE; + m action.found_circ_matrix_matchpoint; + result_matchpoint INTEGER; + -- ( success BOOL, matchpoint config.circ_matrix_matchpoint, buildrows INT[] ) +BEGIN + SELECT INTO test_rule_object * FROM config.circ_matrix_matchpoint WHERE id = test_matchpoint; + RAISE INFO 'testing rule: %', test_rule_object; + + INSERT INTO actor.usr ( + profile, + usrname, + passwd, + ident_type, + first_given_name, + family_name, + home_ou, + juvenile + ) SELECT + COALESCE(test_rule_object.grp, 2), + 'is_circ_rule_safe_to_delete_' || test_matchpoint || '_' || NOW()::text, + MD5(NOW()::TEXT), + 1, + 'Ima', + 'Test', + COALESCE(test_rule_object.user_home_ou, test_rule_object.org_unit), + COALESCE(test_rule_object.juvenile_flag, FALSE) + ; + + SELECT INTO user_object * FROM actor.usr WHERE id = currval('actor.usr_id_seq'); + + INSERT INTO asset.call_number ( + creator, + editor, + record, + owning_lib, + label, + label_class + ) SELECT + 1, + 1, + -1, + COALESCE(test_rule_object.copy_owning_lib,test_rule_object.org_unit), + 'is_circ_rule_safe_to_delete_' || test_matchpoint || '_' || NOW()::text, + 1 + ; + + INSERT INTO asset.copy ( + barcode, + circ_lib, + creator, + call_number, + editor, + location, + loan_duration, + fine_level, + ref, + circ_modifier + ) SELECT + 'is_circ_rule_safe_to_delete_' || test_matchpoint || '_' || NOW()::text, + COALESCE(test_rule_object.copy_circ_lib,test_rule_object.org_unit), + 1, + currval('asset.call_number_id_seq'), + 1, + COALESCE(test_rule_object.copy_location,1), + 2, + 2, + COALESCE(test_rule_object.ref_flag,FALSE), + test_rule_object.circ_modifier + ; + + SELECT INTO item_object * FROM asset.copy WHERE id = currval('asset.copy_id_seq'); + + SELECT INTO m * FROM action.find_circ_matrix_matchpoint( + test_rule_object.org_unit, + item_object, + user_object, + COALESCE(test_rule_object.is_renewal,FALSE) + ); + RAISE INFO 'action.find_circ_matrix_matchpoint(%,%,%,%) = (%,%,%)', + test_rule_object.org_unit, + item_object.id, + user_object.id, + COALESCE(test_rule_object.is_renewal,FALSE), + m.success, + (m.matchpoint).id, + m.buildrows + ; + + FOR result_matchpoint IN SELECT UNNEST(m.buildrows) + LOOP + SELECT INTO result_rule_object * FROM config.circ_matrix_matchpoint WHERE id = result_matchpoint; + RAISE INFO 'considering rule: %', result_rule_object; + IF result_rule_object.id = test_rule_object.id THEN + RAISE INFO 'found self'; + CONTINUE; + END IF; + IF (result_rule_object.circulate = test_rule_object.circulate + AND result_rule_object.duration_rule = test_rule_object.duration_rule + AND result_rule_object.recurring_fine_rule = test_rule_object.recurring_fine_rule + AND result_rule_object.max_fine_rule = test_rule_object.max_fine_rule + AND ( + (result_rule_object.hard_due_date IS NULL AND test_rule_object.hard_due_date IS NULL) + OR (result_rule_object.hard_due_date = test_rule_object.hard_due_date) + OR (result_rule_object.hard_due_date IS NOT NULL AND test_rule_object.hard_due_date IS NULL) + ) + AND ( + (result_rule_object.renewals IS NULL AND test_rule_object.renewals IS NULL) + OR (result_rule_object.renewals = test_rule_object.renewals) + OR (result_rule_object.renewals IS NOT NULL AND test_rule_object.renewals IS NULL) + ) + AND ( + (result_rule_object.grace_period IS NULL AND test_rule_object.grace_period IS NULL) + OR (result_rule_object.grace_period = test_rule_object.grace_period) + OR (result_rule_object.grace_period IS NOT NULL AND test_rule_object.grace_period IS NULL) + ) + AND NOT EXISTS ( + SELECT limit_set, fallthrough + FROM config.circ_matrix_limit_set_map + WHERE active and matchpoint = test_rule_object.id + EXCEPT + SELECT limit_set, fallthrough + FROM config.circ_matrix_limit_set_map + WHERE active and matchpoint = result_rule_object.id + ) + ) THEN + RAISE INFO 'rule has same outcome'; + safe_to_delete := TRUE; + ELSE + RAISE INFO 'rule has different outcome, bail now'; + RAISE EXCEPTION 'rollback the item and user tables'; + END IF; + END LOOP; + + RAISE EXCEPTION 'rollback the item and user tables'; + +EXCEPTION WHEN OTHERS THEN + + RAISE INFO 'inside exception block: %, %', SQLSTATE, SQLERRM; + RETURN safe_to_delete; + +END; +$func$ LANGUAGE plpgsql; +