X-Git-Url: http://git.equinoxoli.org/?p=migration-tools.git;a=blobdiff_plain;f=text%2Fjoin_lines_if_short;fp=text%2Fjoin_lines_if_short;h=14001041b324719ca49ee1e0990c53a44b239fe5;hp=0000000000000000000000000000000000000000;hb=f9201dc2d1699f5161e5e29690a1634e8063bb85;hpb=d1812fa8c4c9e220978d650adb3611c978a2a56b diff --git a/text/join_lines_if_short b/text/join_lines_if_short new file mode 100755 index 0000000..1400104 --- /dev/null +++ b/text/join_lines_if_short @@ -0,0 +1,87 @@ +#!/usr/bin/perl -w +############################################################################### +=pod + +=head1 NAME + +join_lines_if_short - program for joining some adjacent lines from stdin or one +or more files + +=head1 SYNOPSIS + +B [options...] [text files...] + +=head1 DESCRIPTION + +B is used to join adjacent lines from stdin or one or more +files, trimming trailing line feeds and carriage returns, and sending the output +to stdout. Lines are only joined if the first line is short the expected number +of columns. + +=head1 OVERVIEW + +B understands the following optional arguments: + +=over 15 + +=item --help + +Show this documentation. + +=item --delimiter= + +B will use the specified delimiter for determining the +column count for each line. The default is to assume tab as the delimiter. + +=item --join_delimiter= + +B will use the specified delimiter when joining lines. +The default is to use no delimiter. + +=item --columns= + +B will expect each line to contain columns. If a line +has fewer than columns, then this is the trigger for joining that line with +the next line. The new line will be reconsidered and potentially joined with +the next line and so on. + +=back + +=cut +############################################################################### + +use strict; +use Pod::Usage; +use Getopt::Long; + +my $help; +my $delimiter = "\t"; +my $join_delimiter = ""; +my $colcount; + +GetOptions( + 'delimiter=s' => \$delimiter, + 'join_delimiter=s' => \$join_delimiter, + 'columns=s' => \$colcount, + 'help|?' => \$help +); +pod2usage(-verbose => 2) if $help || ! defined $colcount; + +my $line_buffer = ''; +while (my $line = <>) { + chomp $line; + if ($line_buffer eq '') { + $line_buffer = $line; + } else { + $line_buffer = "$line_buffer$join_delimiter$line"; + } + my @f = split /$delimiter/, $line_buffer; + if (scalar(@f) >= $colcount) { + $line_buffer =~ s/\x0D//g; # strip embedded carriage returns + print "$line_buffer\n"; + $line_buffer = ''; + } +} +if ($line_buffer ne '') { + print "$line_buffer\n"; +}