From 554ccf4432cac7bb502b493f7bbbb90becba67e5 Mon Sep 17 00:00:00 2001 From: Shawn Boyette Date: Tue, 23 Sep 2008 07:29:56 +0000 Subject: [PATCH] better non-alnum detection in subfield codes --- yaz-cleanup | 18 +++++++++++++----- 1 files changed, 13 insertions(+), 5 deletions(-) diff --git a/yaz-cleanup b/yaz-cleanup index f4b2452..998bfa5 100755 --- a/yaz-cleanup +++ b/yaz-cleanup @@ -19,12 +19,14 @@ my @record = (); # current record storage my %reccontext = (); my @linecontext= (); # last 5 lines of file -open MARC, '<', 'incoming.marc.xml'; +my $input = shift || 'incoming.marc.xml'; + +open MARC, '<', $input; open my $NUMARC, '>', 'incoming.clean.marc.xml'; print $NUMARC '',"\n"; open my $EXMARC, '>', 'incoming.exceptions.marc.xml'; print $EXMARC '',"\n"; -open MARC2, '<', 'incoming.marc.xml'; +open MARC2, '<', $input; ; # this is the dispatch table which drives command selection in @@ -81,12 +83,18 @@ while (my $line = getline()) { } # naked ampersands - if ($line =~ /&/ && $line !~ /&\w{1,7};/) + if ($line =~ /&/ && $line !~ /&\w+?;/) { edit("Looks like naked ampersand", $line); next } # subfields can't be non-alphanumeric - if ($line =~ /\n"; -- 1.7.2.5