--- /dev/null
+package Equinox::Migration::Utils;
+
+# Copyright 2014, Equinox Software, Inc.
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+
+use strict;
+use warnings;
+
+BEGIN {
+ require Exporter;
+
+ our $VERSION = 1.00;
+ our @ISA = qw(Exporter);
+ our @EXPORT = ();
+ our @EXPORT_OK = qw(normalize_oclc_number);
+}
+
+sub normalize_oclc_number {
+ my $str = shift;
+
+ # trim
+ $str =~ s/^\s+//;
+ $str =~ s/\s+$//;
+
+ # get rid of prefixes
+ $str =~ s/^\(OCoLC\)//i;
+ $str =~ s/^(ocl7|ocm|ocn|on)//i;
+
+ # ... and any leading zeroes
+ $str =~ s/^0+//;
+
+ if ($str =~ /^\d+$/) {
+ return '(OCoLC)' . $str;
+ } else {
+ return;
+ }
+}
+
+=head1 NAME
+
+Equinox::Migration::Utils - utility functions
+
+=head1 SYNOPSIS
+
+ use Equinox::Migration::Utils qw/normalize_oclc_number/;
+ my $normalized = normalize_oclc_number($oclc);
+
+=head1 FUNCTIONS
+
+=head2 normalize_oclc_number)
+
+ my $normalized = normalize_oclc_number($oclc);
+
+Returns a normalized form of a string that is assumed to be
+an OCLC control number. The normalized form consists of the
+string "(OCoLC)" followed by the numeric portion of the OCLC
+number, sans leading zeroes.
+
+The input string is expected to be a sequence of digits with
+optional leading and trailing whitespace and an optional prefix
+from a set observed in the wild, e.g., "(OCoLC)", "ocm", and so
+forth. If the input string does not meet this condition, the
+undefined value is returned.
+
+=head1 AUTHOR
+
+Galen Charlton
+
+=head1 COPYRIGHT
+
+Copyright 2014, Equinox Software Inc.
+
+=cut
+
+1;
--- /dev/null
+# Copyright 2014, Equinox Software, Inc.
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+
+use strict;
+use warnings;
+
+use Test::More tests => 3;
+use Equinox::Migration::Utils qw/normalize_oclc_number/;
+
+is(normalize_oclc_number('ocm38548133'), '(OCoLC)38548133', 'prefixed with "ocm"');
+is(normalize_oclc_number(' ocm38548133 '), '(OCoLC)38548133', 'ignore leading/trailing whitespace');
+is(normalize_oclc_number('(OCoLC)ocm00123456'), '(OCoLC)123456', 'ignore leading zeroes in number');
use Unicode::Normalize;
use MARC::File::XML ( BinaryEncoding => 'utf-8' );
use Equinox::Migration::SubfieldMapper;
+use Equinox::Migration::Utils qw/normalize_oclc_number/;
my $conf = {}; # configuration hashref
my $count = 0; my $scount = 0;
# oclc
$marc{oclc} = [];
- push @{ $marc{oclc} }, $record->field('001')->as_string()
- if ($record->field('001') and $record->field('003') and
- $record->field('003')->as_string() =~ /OCo{0,1}LC/);
+ if ($record->field('001') &&
+ $record->field('003') &&
+ $record->field('003')->as_string() =~ /OCo{0,1}LC/ &&
+ defined normalize_oclc_number($record->field('001')->as_string())) {
+ push @{ $marc{oclc} }, normalize_oclc_number($record->field('001')->as_string());
+ }
for ($record->field('035')) {
my $oclc = $_->subfield('a');
- push @{ $marc{oclc} }, $oclc
- if (defined $oclc and $oclc =~ /\(OCoLC\)/ and $oclc =~/([0-9]+)/);
+ if (defined $oclc &&
+ ($oclc =~ /\(OCoLC\)/ || $oclc =~ /(ocm|ocl7|ocn|on)/) &&
+ defined normalize_oclc_number($oclc)) {
+ push @{ $marc{oclc} }, normalize_oclc_number($oclc);
+ }
}
if ($record->field('999')) {