3 # Copyright 2009-2012, Equinox Software, Inc.
5 # This program is free software; you can redistribute it and/or
6 # modify it under the terms of the GNU General Public License
7 # as published by the Free Software Foundation; either version 2
8 # of the License, or (at your option) any later version.
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU General Public License for more details.
15 # You should have received a copy of the GNU General Public License
16 # along with this program; if not, write to the Free Software
17 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
21 # This is still very rough.
30 # order of field separators, i think: ! ; : .
32 # pubcycle_to_scap() was derived at late hours, and isn't totally right. It's
33 # also not commented yet as you can see. Sorry, but this still needs lots of
34 # work to be reliable and understandable.
36 # $record is one of the objects loaded from serctl.data. $field is a
38 sub pubcycle_to_scap {
39 my ($record, $field) = @_;
41 if ($record->{NAM_TYPE} ne 'NUMERATION') {
42 $field->update(i => '(year)');
43 $field->update(x => '01');
45 if ($record->{NAM_TYPE} =~ /SEASON/) {
46 $field->update(j => '(season)');
47 } elsif ($record->{NAM_TYPE} =~ /MONTH|DATE/) {
48 $field->update(j => '(month)');
50 $field->update(k => '(day)') if $record->{NAM_TYPE} eq 'DATE';
54 my @pfields = split /\!/, $record->{PUBCYCLE_DEF};
56 my @dow = qw/su mo tu we th fr sa/;
58 my %periods_mo = ( 1 => 'm', 2 => 'b', 3 => 'q', 6 => 'f' );
61 if ($pfields[0] =~ /^(\d):M$/ and $pfields[1] =~ /^\d+:D$/ and $record->{NAM_TYPE} =~ /MONTH|CUSTOM|SEASON/) {
62 $pfields[0] =~ /^(\d):M$/;
64 $pfields[1] =~ /^(\d+):D$/;
66 return 0 unless exists $periods_mo{$freq_m}; # not handled
68 w => $periods_mo{$freq_m}, y => sprintf('pd%02d', $on)
72 elsif ($pfields[0] =~ /^[12]:W$/ and $pfields[1] =~ /^\d+:D$/) {
73 $pfields[0] =~ /^([12]):W$/;
75 $pfields[1] =~ /^(\d+):D$/;
78 w => ($freq_m == 1 ? 'w' : 'e'), y => sprintf('pd%s', $dow[$on-1])
82 elsif ($pfields[0] =~ /^[1-3]:Y$/ and $pfields[1] =~ /^\d+:D\.\d+:M$/) {
83 $pfields[0] =~ /^([1-3]):Y$/;
85 $pfields[1] =~ /^(\d+):D\.(\d+):M$/;
90 y => sprintf('pd%02d%02d', $of_mo, $on_day)
94 } elsif (@pfields == 3) {
95 if ($pfields[0] =~ /^\d:M$/ && $pfields[1] =~ /^\d+:D(;|$)/) {
96 $pfields[0] =~ /^(\d):M$/;
100 foreach my $date (split /;/, $pfields[1]) {
101 $date =~ /^(\d+):D/ or return 0; # not handled
102 push @dates, sprintf("%02d", $1);
104 my $potential = 12 / $months * @dates;
105 my @combos = split /;/, $pfields[2];
107 foreach my $combo (@combos) {
108 if ($combo =~ /^(\d+):M\.0:Y/) {
110 push @cparts, sprintf("%02d/%02d", $squash - 1, $squash);
111 } elsif ($combo =~ /(\d+):D\.(\d+):M\.0:Y/) {
112 push @oparts, sprintf("%02d%02d", $2, $1);
114 return 0; # abort, not handled yet
118 $u = $potential - @cparts if @cparts;
119 $u = $potential - @oparts if @oparts;
123 $w = $periods_mo{$months};
125 } elsif (exists $periods_mo{$months}) {
126 $w = $periods_mo{$months};
128 return 0; # abort, not handled
130 $field->update(u => $u, v => 'r', w => $w);
131 $field->add_subfields(y => 'cm' . join(",", @cparts)) if @cparts;
132 $field->add_subfields(y => 'od' . join(",", @oparts)) if @oparts;
133 $field->add_subfields(y => 'pd' . join(",", @dates));
134 return 1; # early out
141 sub record_bits_to_scap {
145 my $field = new MARC::Field(
146 '853', # tag doesn't really matter here
152 # when nam_type is true, use sub_iss for numeric $w
153 $field->update(a => $record->{SERC_LBL1}) if $record->{SERC_LBL1};
154 if ($record->{SERC_LBL2}) {
155 $field->update(b => $record->{SERC_LBL2});
156 $field->update(u => $record->{SERC_LMT2} || 'var');
157 $field->update(v => 'r');
160 # return the representation we need
161 return (new JSON::XS)->encode([
162 $field->indicator(1),
163 $field->indicator(2),
164 map { @$_ } $field->subfields
165 ]) if pubcycle_to_scap($record, $field);
170 # actually parses serctl.data, the file with all the DOCUMENT BOUNDARY and
172 sub load_serctl_export {
175 open FH, "<$filename" or die ("can't read $filename: $!");
182 # If we don't match this regex, move to next entry.
183 if (not /^\.(\w+)\.(?:.+\|a(.+))?$/) {
184 push @$entries, $entry if $entry;
189 # If we don't have a defined $2, just move to next line.
190 next unless defined $2;
203 foreach my $hash (@$data) {
204 foreach (keys %$hash) {
213 # The title key map is a simple text file made up of lines. Each line contains
214 # two tokens separated by a space. The first one is the value of
215 # SERC_TITLE_KEY. The second one is the id of the biblio.record_entry row
216 # that corresponds to it. I don't have a good script for making that yet.
217 # SERC_TITLE_KEY can be different things (ISxN, 035‡a, etc) so you have to
218 # build the map by using various database queries (and hand de-duping).
220 sub load_title_key_map {
223 open FH, "<$filename" or die "$filename: $!";
227 /^(\S+) (\d+)/ or next;
236 ############################# MAIN ###############################
239 "i" => "-", # input file
240 "p" => "dump", # operation
244 "dump" => sub { # Just parse input file (serctl.data) and dump resulting
247 print Dumper(load_serctl_export($opts->{i})), "\n";
251 "keys" => sub { # Over the whole list of records, how often do all keys
252 # appear? This is an analysis tool.
254 my $freq = unique_keys(load_serctl_export($opts->{i}));
256 my @keys = reverse sort { $freq->{$a} <=> $freq->{$b} } keys %$freq;
259 printf ("%-19s %d\n", $_, $freq->{$_});
263 "map" => sub { # Combine records from input file with title key map and
264 # perform transformations suitable for evergreen import.
267 my $title_key_map = load_title_key_map($opts->{t});
268 my $data = load_serctl_export($opts->{i});
270 foreach my $record (@$data) {
271 my $title_key = $record->{SERC_TITLE_KEY};
272 if (!($record->{bre} = $title_key_map->{$title_key})) {
273 $record->{unready} = 1;
277 $record->{number_of_streams} = $record->{SERC_REC_COP};
278 $record->{subscription_owning_lib} = $record->{SERC_LIB};
279 $record->{distribtion_holding_lib} = substr($record->{HOLDING_CODE}, 0, 1);
281 $record->{scap_active} =
282 $record->{SERC_STATUS} eq 'ACTIVE' ? 't' : 'f';
283 $record->{scap_pattern_code} = record_bits_to_scap($record) if $record->{PUBCYCLE_DEF};
285 # save uppercase keys' values in a note
286 my @uc_keys = grep { $_ !~ /[a-z]/ } (keys %$record);
287 $record->{note_text} = join(
289 map { "$_: $record->{$_}" } @uc_keys
292 # and remove uppercase keys now
293 delete $record->{$_} for @uc_keys;
297 my $unready = [grep { $_->{unready} } @$data];
299 open FH, ">$opts->{u}" or die "$opts->{u}: $!";
300 print FH Dumper($unready), "\n";
305 print Dumper($data), "\n";
311 getopts("i:p:t:u:", $opts) or die("usage: $0\n\t[-i infile]\n\t[-p operation]\n\t[-t title_key_map]\n\t[-u dump_file_for_unmapped_records]");
314 my $operation = lc $opts->{p};
315 exit $operations->{$operation}->($opts) if exists $operations->{$operation};
317 "specify a valid operation\n$0 -p [" .
318 join(" | ", (keys %$operations)) . "]"