X-Git-Url: http://git.equinoxoli.org/?p=migration-tools.git;a=blobdiff_plain;f=extract_xml_tags.pl;fp=extract_xml_tags.pl;h=404e3af2685593e374ea02ee7d8e71b914793d77;hp=0000000000000000000000000000000000000000;hb=3061a223b78cd95f61cda81d49e3fa4b74d439b6;hpb=b731ba3ca2e1e821acbb7702b47587a6e26dbb7e diff --git a/extract_xml_tags.pl b/extract_xml_tags.pl new file mode 100755 index 0000000..404e3af --- /dev/null +++ b/extract_xml_tags.pl @@ -0,0 +1,42 @@ +#!/usr/bin/perl -w +use strict; + +use Getopt::Long; + +my (@tags, $infile); +GetOptions ("tags=s" => \@tags, + "infile=s" => \$infile); +@tags = split(/,/, join(',', @tags)); + +open(FH, $infile) or die "Can't open $infile for reading: $!"; + +while () { + + my %tag; + my $xml = $_; + + # Find the Evergreen bib ID + $xml =~ m/(.+?)<\/subfield>/; + my $egid = $1; + + # Find each occurrence of each tag specified + foreach (@tags) { + $tag{$_} = [ $xml =~ m/()/g ]; + } + + # Clean up the results before printing + my $output = ''; + foreach my $key (sort keys %tag) { + my $text = join("", @{$tag{$key}}); + $text =~ s/>\s+