From: Don McMorris Date: Wed, 30 Jul 2008 23:38:54 +0000 (+0000) Subject: ci split_marc.pl X-Git-Url: http://git.equinoxoli.org/?p=migration-tools.git;a=commitdiff_plain;h=92214de6bc40fc23a8a69d73d2e942c5ede13e9a ci split_marc.pl --- diff --git a/split_marc.pl b/split_marc.pl new file mode 100644 index 0000000..f98eada --- /dev/null +++ b/split_marc.pl @@ -0,0 +1,35 @@ +#!/usr/bin/perl +use MARC::Batch; +use MARC::Record; +use MARC::File::XML ( BinaryEncoding => 'utf-8' ); +use MARC::Field; +use POSIX; + +my $split_every = $ARGV[0]; +my $count = 0; + +binmode(STDOUT, ':utf8'); +binmode(STDIN, ':utf8'); + +foreach $argnum ( 1 .. $#ARGV ) { + + print STDERR "Processing " . $ARGV[$argnum] . "\n"; + + my $batch = MARC::Batch->new('XML',$ARGV[$argnum]); + $batch->strict_off(); + $batch->warnings_off(); + + while ( my $record = $batch->next() ) { + + $count++; + + my $filename = $ARGV[$argnum] . ".split." . floor( $count / $split_every ) . ".xml"; + + open FILE, ">>$filename"; + binmode(FILE, ':utf8'); + print FILE $record->as_xml(); + close FILE; + } + print STDERR "Processed $count records.\n"; +} +