Revision: 525
Author:
jeremy...@gmail.com
Date: Wed Feb 13 14:09:58 2013
Log: csvformat: give the user control over the output quoting.
This adds two new flags to csvformat:
--quoting={all,none,minimal,nonnumeric}
* all - quote every field (default for backwards compatibility).
* none - don't quote anything.
* minimal - only quote fields that contain commas.
* nonnumeric - only quote fields that don't look like numbers.
--fields-to-quote=<list>
For every line, quote only the specified fields.
* perllib/
utils.pl: Added an expand_nums() function for Perl scripts.
Basically works like the C version.
* csvformat/
csvformat.in: Add the new flags and break each quoting style
out into separate functions.
* csvformat/tests/test_04.sh: Verifies the new functionality.
http://code.google.com/p/crush-tools/source/detail?r=525
Added:
/trunk/src/csvformat/tests/test_04.sh
Modified:
/trunk/src/csvformat/args.tab
/trunk/src/csvformat/
csvformat.in
/trunk/src/perllib/
utils.pl
/trunk/src/perllib/
utils_test.pl
=======================================
--- /dev/null
+++ /trunk/src/csvformat/tests/test_04.sh Wed Feb 13 14:09:58 2013
@@ -0,0 +1,49 @@
+test=04
+
+sub_test=0
+description="quote all"
+output=`echo "hello${DELIMITER}world" | $bin -q all 2>&1`
+if [ "$output" != '"hello","world"' ]; then
+ test_status $test $sub_test "$description" FAIL
+else
+ test_status $test $sub_test "$description" PASS
+fi
+
+sub_test=1
+description="quote none"
+output=`echo "hello${DELIMITER}there, world" | $bin -q none 2>&1`
+if [ "$output" != 'hello,there, world' ]; then
+ test_status $test $sub_test "$description" FAIL
+else
+ test_status $test $sub_test "$description" PASS
+fi
+
+sub_test=2
+description="quote minimal"
+output=`echo "hello${DELIMITER}there, world" | $bin -q minimal 2>&1`
+if [ "$output" != 'hello,"there, world"' ]; then
+ test_status $test $sub_test "$description" FAIL
+else
+ test_status $test $sub_test "$description" PASS
+fi
+
+sub_test=3
+description="quote nonnumeric"
+output=`echo "hello${DELIMITER}1${DELIMITER}2.3${DELIMITER}-4${DELIMITER}world"
|
+ $bin -q nonnumeric 2>&1`
+if [ "$output" != '"hello",1,2.3,-4,"world"' ]; then
+ test_status $test $sub_test "$description" FAIL
+else
+ test_status $test $sub_test "$description" PASS
+fi
+
+sub_test=4
+description="quote specific"
+output=`echo "hello${DELIMITER}1${DELIMITER}2.3${DELIMITER}world" |
+ $bin -f 2-3 2>&1`
+if [ "$output" != 'hello,"1","2.3",world' ]; then
+ test_status $test $sub_test "$description" FAIL
+else
+ test_status $test $sub_test "$description" PASS
+fi
+
=======================================
--- /trunk/src/csvformat/args.tab Mon Jul 14 09:50:39 2008
+++ /trunk/src/csvformat/args.tab Wed Feb 13 14:09:58 2013
@@ -10,7 +10,7 @@
do_long_opts => 1,
preproc_extra => '',
copyright => <<END_COPYRIGHT
- Copyright 2008 Google Inc.
+ Copyright 2008-2013 Google Inc.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
@@ -36,7 +36,7 @@
main_code => 'usage(argv[0]); exit(EXIT_HELP);',
description => 'print this message and exit'
},
- {
+ {
name => 'delim',
shortopt => 'd',
longopt => 'delim',
@@ -68,5 +68,21 @@
required => 0,
description => 'do not escape internal quotes'
},
+ {
+ name => 'quoting',
+ shortopt => 'q',
+ longopt => 'quoting',
+ type => 'var',
+ required => 0,
+ description => 'field quoting style. Must be one
of "all", "minimal", "nonnumeric" or "none".'
+ },
+ {
+ name => 'list',
+ longopt => 'fields-to-quote',
+ shortopt => 'f',
+ type => 'var',
+ required => 0,
+ description => 'only/always quote these specific fields. If specified,
any value in --quoting is ignored.'
+ },
);
=======================================
--- /trunk/src/csvformat/
csvformat.in Tue Sep 23 12:50:37 2008
+++ /trunk/src/csvformat/
csvformat.in Wed Feb 13 14:09:58 2013
@@ -1,7 +1,7 @@
#!CRUSH_PERL_PATH -w
#-*-perl-*-
-# Copyright 2008 Google Inc.
+# Copyright 2008-2013 Google Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@@ -19,9 +19,10 @@
use Getopt::Long;
use FileHandle;
-my ($h, $d, $i, $o, $noescape, $show_version);
+my ($h, $d, $i, $o, $noescape, $show_version, $quoting, @quote_fields);
$d = $ENV{DELIMITER} || chr(0xfe);
+$quoting = 'all';
Getopt::Long::Configure( "no_ignore_case" );
&GetOptions("help" => \$h,
"delim=s" => \$d,
@@ -29,6 +30,8 @@
"output=s" => \$o,
"no-escape" => \$noescape,
"Version" => \$show_version,
+ "quoting=s" => \$quoting,
+ "fields-to-quote=s" => \@quote_fields,
);
if ($h) {
@@ -39,6 +42,21 @@
crush_version();
exit(0);
}
+
+my $quote_fn;
+if (@quote_fields) {
+ my $field_list = join(',', @quote_fields);
+ if ($field_list =~ /([^-,\d]+)/) {
+ die "Bad --fields-to-quote value: $1\n";
+ }
+ @quote_fields = expand_nums($field_list, -1);
+ $quote_fn = \"e_specific
+}
+elsif ($quoting eq 'all') { $quote_fn = \"e_all }
+elsif ($quoting eq 'none') { $quote_fn = \"e_none }
+elsif ($quoting eq 'minimal') { $quote_fn = \"e_minimal }
+elsif ($quoting eq 'nonnumeric') { $quote_fn = \"e_nonnumeric }
+else { die "Invalid --quoting value: $quoting\n"; }
$d = expand_chars($d); # expand escape sequences
@@ -74,14 +92,54 @@
$_ =~ s/"/""/g; # "escape" existing quotes
}
- $_ = q(") . $_ . q(") . $linebreak;
- $_ =~ s/\Q$d\E/","/g;
-
- print $OUT $_;
+ print $OUT $quote_fn->($_), $linebreak;
}
exit(0);
+sub quote_specific {
+ my $line = shift;
+ my @fields = split(/\Q$d\E/o, $line);
+ for my $i (@quote_fields) {
+ $fields[$i] = q(") . $fields[$i] . q(");
+ }
+ return join(',', @fields);
+}
+
+sub quote_all {
+ my $line = shift;
+ $line = q(") . $line . q(");
+ $line =~ s/\Q$d\E/","/og;
+ return $line;
+}
+
+sub quote_none {
+ my $line = shift;
+ $line =~ s/\Q$d\E/,/og;
+ return $line;
+}
+
+sub quote_nonnumeric {
+ my $line = shift;
+ my @fields = split(/\Q$d\E/o, $line);
+ for my $i (0 .. $#fields) {
+ if ($fields[$i] !~ /-?\d+(\.\d+)?/) {
+ $fields[$i] = q(") . $fields[$i] . q(");
+ }
+ }
+ return join(',', @fields);
+}
+
+sub quote_minimal {
+ my $line = shift;
+ my @fields = split(/\Q$d\E/o, $line);
+ for my $i (0 .. $#fields) {
+ if ($fields[$i] =~ /,/) {
+ $fields[$i] = q(") . $fields[$i] . q(");
+ }
+ }
+ return join(',', @fields);
+}
sub usage {
print STDERR <<"__USAGE__";
@@ -91,10 +149,14 @@
Usage: $0 [options...]
Options:
- -d, --delim <delim> input field separator (default: 0xfe)
- -i, --input <infile> input file (default: stdin)
- -o, --output <outfile> output file (default: stdout)
- -n, --no-escape do not escape quotes
+ -d, --delim <delim> input field separator (default: 0xfe)
+ -i, --input <infile> input file (default: stdin)
+ -o, --output <outfile> output file (default: stdout)
+ -n, --no-escape do not escape quotes
+ -q, --quoting <style> field quoting style. Must be one of
+ "all", "minimal", "nonnumeric" or "none".
+ --fields-to-quote <list> only/always quote these specific fields. If
+ specified, any value in --quoting is ignored.
__USAGE__
=======================================
--- /trunk/src/perllib/
utils.pl Thu Sep 23 15:20:52 2010
+++ /trunk/src/perllib/
utils.pl Wed Feb 13 14:09:58 2013
@@ -63,5 +63,37 @@
$end_pos = length($_[0]) - $pos if $end_pos <= 0;
return substr($_[0], $pos, $end_pos);
}
+
+=item * expand_nums($arg [, $adjust])
+
+Turn a string of comma-separated numbers and number ranges into an array of
+numbers. If specified, $adjust is added to each value after expansion.
E.g.,
+If turning 1-based field indexes into array indexes, pass -1 as the adjust
+value.
+
+=cut
+sub expand_nums {
+ my $arg = shift;
+ my $adjust = shift || 0;
+ my @fields = split(',', $arg);
+ my @idxs = ();
+ foreach my $f (@fields) {
+ if ($f =~ /(\d+)-(\d+)/) {
+ push(@idxs, $1 .. $2);
+ } elsif ($f =~ /\d+/) {
+ push(@idxs, $f);
+ } else {
+ use Carp;
+ croak "Invalid value in numeric list: $f";
+ }
+ }
+ if ($adjust) {
+ foreach $i (0 .. $#idxs) {
+ $idxs[$i] += $adjust;
+ }
+ }
+ return @idxs;
+}
+
1;
=======================================
--- /trunk/src/perllib/
utils_test.pl Thu Nov 20 09:53:54 2008
+++ /trunk/src/perllib/
utils_test.pl Wed Feb 13 14:09:58 2013
@@ -18,6 +18,15 @@
$has_error++;
}
}
+
+sub arrays_are_equal {
+ my ($a, $b) = @_;
+ return 0 if (@$a != @$b);
+ foreach my $i (0 .. $#{ $a }) {
+ return 0 if ($a->[$i] != $b->[$i]);
+ }
+ return 1;
+}
# fields_in_line()
chk fields_in_line('hello,world', ',') == 2,
@@ -41,4 +50,13 @@
chk get_line_field('1|2|3|4', 1, '|') == 2,
"get_line_field(): middle pos" ;
+# expand_nums()
+my @a;
+@a = expand_nums('1-3,5');
+chk arrays_are_equal(\@a, [1, 2, 3, 5]),
+ "expand_nums(): no adjustment.";
+@a = expand_nums('1-3,5', -1);
+chk arrays_are_equal(\@a, [0, 1, 2, 4]),
+ "expand_nums(): -1 adjustment.";
+
exit $has_error;