[crush-tools] r530 committed - * Allow not specifying aggregation keys: aggregate over whole file...

3 views
Skip to first unread message

crush...@googlecode.com

unread,
Mar 15, 2013, 10:44:13 PM3/15/13
to crush...@googlegroups.com
Revision: 530
Author: warlock.cc
Date: Fri Mar 15 19:43:59 2013
Log: * Allow not specifying aggregation keys: aggregate over whole file
* Add a test case for whole file aggregation
* Fix test cases (truncated header line)
* Add tests to Makefile.am

http://code.google.com/p/crush-tools/source/detail?r=530

Added:
/trunk/src/aggregate/test/test_11.expected
/trunk/src/aggregate/test/test_11.sh
Modified:
/trunk/src/aggregate/Makefile.am
/trunk/src/aggregate/aggregate.c
/trunk/src/aggregate/test/test_05.1.C.expected
/trunk/src/aggregate/test/test_05.1.en_US.utf8.expected
/trunk/src/aggregate/test/test_05.1.es_AR.utf8.expected
/trunk/src/aggregate/test/test_06.1.expected

=======================================
--- /dev/null
+++ /trunk/src/aggregate/test/test_11.expected Fri Mar 15 19:43:59 2013
@@ -0,0 +1,2 @@
+Numeric-1-Sum Numeric-2-Sum Numeric-1-Count Numeric-2-Count
Numeric-1-Average Numeric-2-Average
+22 58 14 14 1.57 4.14
=======================================
--- /dev/null
+++ /trunk/src/aggregate/test/test_11.sh Fri Mar 15 19:43:59 2013
@@ -0,0 +1,22 @@
+test_number=11
+description="whole file aggregation, no keys"
+
+
+expected="$test_dir/test_$test_number.expected"
+outfile="$test_dir/test_$test_number.actual"
+
+
+$bin -p -L \
+ -S Numeric-1,Numeric-2 \
+ -C Numeric-1,Numeric-2 \
+ -A Numeric-1,Numeric-2 \
+ "$test_dir/test.in" "$test_dir/test.in2" \
+ > "$outfile"
+
+if [ $? -ne 0 ] ||
+ [ "`diff -q $outfile $expected`" ]; then
+ test_status $test_number 1 "$description" FAIL
+else
+ test_status $test_number 1 "$description" PASS
+ rm "$outfile"
+fi
=======================================
--- /trunk/src/aggregate/Makefile.am Mon Sep 12 08:03:16 2011
+++ /trunk/src/aggregate/Makefile.am Fri Mar 15 19:43:59 2013
@@ -13,7 +13,7 @@

CLEANFILES = $(BUILT_SOURCES)

-EXTRA_DIST = args.tab test.conf test/test.in test/test.in2 \
+EXTRA_DIST = args.tab test.conf test/test.in test/test.in2 test/test.in3 \
test/test_00.sh test/test_00.expected \
test/test_01.sh test/test_01.expected \
test/test_02.sh test/test_02.expected \
@@ -25,7 +25,11 @@
test/test_05.1.es_AR.utf8.expected test/test_05.2.C.expected \
test/test_05.2.en_US.utf8.expected test/test_05.2.es_AR.utf8.expected \
test/test_06.sh test/test_06.0.expected \
- test/test_06.1.expected test/test_06.2.expected
+ test/test_06.1.expected test/test_06.2.expected \
+ test/test_07.sh test/test_07.expected \
+ test/test_08.sh test/test_08.expected \
+ test/test_09.sh test/test_10.sh \
+ test/test_11.sh test/test_11.expected

man1_MANS = aggregate.1
aggregate.1 : args.tab
=======================================
--- /trunk/src/aggregate/aggregate.c Fri Mar 15 19:35:38 2013
+++ /trunk/src/aggregate/aggregate.c Fri Mar 15 19:43:59 2013
@@ -134,7 +134,7 @@
int i, n;

hashtbl_t aggregations;
- struct aggregation *value;
+ struct aggregation *value = NULL;
char **key_array;

size_t n_hash_elems;
@@ -147,11 +147,6 @@

char default_delim[] = { 0xFE, 0x00 }; /* default delimiter string */

- if (! args->keys && ! args->key_labels) {
- fprintf(stderr, "%s: -k or -K must be specified\n", argv[0]);
- return EXIT_HELP;
- }
-
delim = args->delim;
if (!delim)
delim = getenv("DELIMITER");
@@ -193,8 +188,8 @@
fprintf(stderr, "\n\n");
#endif

- outbuf = NULL;
- outbuf_sz = 0;
+ outbuf = xmalloc(64);
+ outbuf_sz = 64;

/* set locale with values from the environment so strcoll()
will work correctly. */
@@ -202,56 +197,60 @@
setlocale(LC_COLLATE, "");

if (args->preserve) {
- size_t str_len;
-
if (dbfr_getline(in_reader) <= 0) {
fprintf(stderr, "%s: unexpected end of file\n", getenv("_"));
exit(EXIT_FILE_ERR);
}
chomp(in_reader->current_line);

- outbuf = xmalloc(in_reader->current_line_len);
- outbuf_sz = in_reader->current_line_len;
+ if (in_reader->current_line_len > outbuf_sz) {
+ outbuf = xrealloc(outbuf, in_reader->current_line_len + 32);
+ outbuf_sz = in_reader->current_line_len + 32;
+ }

- extract_fields_to_string(in_reader->current_line, outbuf, outbuf_sz,
- conf.keys.indexes, conf.keys.count, delim,
NULL);
- fputs(outbuf, stdout);
+ n = 0; // count output columns
+ if (conf.keys.count) {
+ extract_fields_to_string(in_reader->current_line, outbuf, outbuf_sz,
+ conf.keys.indexes, conf.keys.count, delim,
NULL);
+ fputs(outbuf, stdout);
+ n++;
+ }
if (args->labels) {
- printf("%s%s", delim, args->labels);
+ printf("%s%s", (n++ > 0 ? delim : ""), args->labels);
} else {
if (conf.sums.count) {
extract_fields_to_string(in_reader->current_line, outbuf,
outbuf_sz,
conf.sums.indexes, conf.sums.count, delim,
args->auto_label ? "-Sum" : NULL);
- printf("%s%s", delim, outbuf);
+ printf("%s%s", (n++ > 0 ? delim : ""), outbuf);
}

if (conf.counts.count) {
extract_fields_to_string(in_reader->current_line, outbuf,
outbuf_sz,
conf.counts.indexes, conf.counts.count,
delim,
args->auto_label ? "-Count" : NULL);
- printf("%s%s", delim, outbuf);
+ printf("%s%s", (n++ > 0 ? delim : ""), outbuf);
}

if (conf.averages.count) {
extract_fields_to_string(in_reader->current_line, outbuf,
outbuf_sz,
conf.averages.indexes,
conf.averages.count,
delim, args->auto_label ? "-Average" :
NULL);
- printf("%s%s", delim, outbuf);
+ printf("%s%s", (n++ > 0 ? delim : ""), outbuf);
}

if (conf.mins.count) {
extract_fields_to_string(in_reader->current_line, outbuf,
outbuf_sz,
conf.mins.indexes, conf.mins.count, delim,
args->auto_label ? "-Min" : NULL);
- printf("%s%s", delim, outbuf);
+ printf("%s%s", (n++ > 0 ? delim : ""), outbuf);
}

if (conf.maxs.count) {
extract_fields_to_string(in_reader->current_line, outbuf,
outbuf_sz,
conf.maxs.indexes, conf.maxs.count, delim,
args->auto_label ? "-Max" : NULL);
- printf("%s%s", delim, outbuf);
+ printf("%s%s", (n++ > 0 ? delim : ""), outbuf);
}
}

@@ -261,6 +260,7 @@
ht_init(&aggregations, 1024, NULL, (void (*)) free_agg);

n_hash_elems = 0;
+ memset(outbuf, 0, outbuf_sz);

/* loop through all files */
while (in != NULL) {
@@ -271,15 +271,17 @@
/* loop through each line of the file */
while (dbfr_getline(in_reader) > 0) {
chomp(in_reader->current_line);
- if (in_reader->current_line_len > outbuf_sz) {
- outbuf = xrealloc(outbuf, in_reader->current_line_len + 32);
- outbuf_sz = in_reader->current_line_len + 32;
- }
+ if (conf.keys.count) {
+ if (in_reader->current_line_len > outbuf_sz) {
+ outbuf = xrealloc(outbuf, in_reader->current_line_len + 32);
+ outbuf_sz = in_reader->current_line_len + 32;
+ }
+ extract_fields_to_string(in_reader->current_line, outbuf,
outbuf_sz,
+ conf.keys.indexes, conf.keys.count,
delim, NULL);

- extract_fields_to_string(in_reader->current_line, outbuf, outbuf_sz,
- conf.keys.indexes, conf.keys.count, delim,
NULL);
+ value = (struct aggregation *) ht_get(&aggregations, outbuf);
+ }

- value = (struct aggregation *) ht_get(&aggregations, outbuf);
if (!value) {
in_hash = 0;
value = alloc_agg(conf.sums.count, conf.counts.count,
@@ -389,20 +391,21 @@
free(outbuf);

/* Print all of the output. */
- key_array = xmalloc(sizeof(char *) * n_hash_elems);
- ht_keys(&aggregations, key_array);
-
- if (! args->nosort) {
- qsort(key_array, n_hash_elems, sizeof(char *),
- (int (*)(const void *, const void *)) key_strcmp);
- }
-
- for (i = 0; i < n_hash_elems; i++) {
- value = (struct aggregation *) ht_get(&aggregations, key_array[i]);
- print_keys_and_agg_vals(key_array[i], value);
+ if (conf.keys.count) {
+ key_array = xmalloc(sizeof(char *) * n_hash_elems);
+ ht_keys(&aggregations, key_array);
+ if (! args->nosort) {
+ qsort(key_array, n_hash_elems, sizeof(char *),
+ (int (*)(const void *, const void *)) key_strcmp);
+ }
+ for (i = 0; i < n_hash_elems; i++) {
+ value = (struct aggregation *) ht_get(&aggregations, key_array[i]);
+ print_keys_and_agg_vals(key_array[i], value);
+ }
+ free(key_array);
+ } else {
+ print_keys_and_agg_vals(NULL, value);
}
-
- free(key_array);

ht_destroy(&aggregations);

@@ -451,28 +454,31 @@
}

int print_keys_and_agg_vals(char *key, struct aggregation *val) {
- int i;
- fputs(key, stdout);
+ int i, n = 0;
+ if (key) {
+ fputs(key, stdout);
+ n++;
+ }
for (i = 0; i < conf.sums.count; i++) {
- printf("%s%.*f", delim, conf.sums.precisions[i], val->sums[i]);
+ printf("%s%.*f", (n++ > 0 ? delim : ""), conf.sums.precisions[i],
val->sums[i]);
}
for (i = 0; i < conf.counts.count; i++) {
- printf("%s%d", delim, val->counts[i]);
+ printf("%s%d", (n++ > 0 ? delim : ""), val->counts[i]);
}
for (i = 0; i < conf.averages.count; i++) {
- printf("%s%.*f", delim, conf.averages.precisions[i] + 2,
+ printf("%s%.*f", (n++ > 0 ? delim : ""), conf.averages.precisions[i] +
2,
val->average_sums[i] / val->average_counts[i]);
}
for (i = 0; i < conf.mins.count; i++) {
if (val->mins_initialized[i])
- printf("%s%.*f", delim, conf.mins.precisions[i],
val->numeric_mins[i]);
- else
+ printf("%s%.*f", (n++ > 0 ? delim : ""), conf.mins.precisions[i],
val->numeric_mins[i]);
+ else if (n++ > 0)
fputs(delim, stdout);
}
for (i = 0; i < conf.maxs.count; i++) {
if (val->maxs_initialized[i])
- printf("%s%.*f", delim, conf.maxs.precisions[i],
val->numeric_maxs[i]);
- else
+ printf("%s%.*f", (n++ > 0 ? delim : ""), conf.maxs.precisions[i],
val->numeric_maxs[i]);
+ else if (n++ > 0)
fputs(delim, stdout);
}
fputs("\n", stdout);
=======================================
--- /trunk/src/aggregate/test/test_05.1.C.expected Thu Aug 25 09:38:16 2011
+++ /trunk/src/aggregate/test/test_05.1.C.expected Fri Mar 15 19:43:59 2013
@@ -1,4 +1,4 @@
-Text-1 Text-2 Numeric-1-Sum Numeric-2-Sum Numeric-1-Count Numeric-2-Count
Numeric-1-Average Numeric-2-Averag
+Text-1 Text-2 Numeric-1-Sum Numeric-2-Sum Numeric-1-Count Numeric-2-Count
Numeric-1-Average Numeric-2-Average
first text value a 2 13 2 2 1.00 6.50
first text value b 1 3 1 1 1.00 3.00
second text value b 2 2 1 1 2.00 2.00
=======================================
--- /trunk/src/aggregate/test/test_05.1.en_US.utf8.expected Thu Aug 25
09:38:16 2011
+++ /trunk/src/aggregate/test/test_05.1.en_US.utf8.expected Fri Mar 15
19:43:59 2013
@@ -1,4 +1,4 @@
-Text-1 Text-2 Numeric-1-Sum Numeric-2-Sum Numeric-1-Count Numeric-2-Count
Numeric-1-Average Numeric-2-Averag
+Text-1 Text-2 Numeric-1-Sum Numeric-2-Sum Numeric-1-Count Numeric-2-Count
Numeric-1-Average Numeric-2-Average
first text value a 2 13 2 2 1.00 6.50
first text value b 1 3 1 1 1.00 3.00
second text value b 2 2 1 1 2.00 2.00
=======================================
--- /trunk/src/aggregate/test/test_05.1.es_AR.utf8.expected Thu Aug 25
09:38:16 2011
+++ /trunk/src/aggregate/test/test_05.1.es_AR.utf8.expected Fri Mar 15
19:43:59 2013
@@ -1,4 +1,4 @@
-Text-1 Text-2 Numeric-1-Sum Numeric-2-Sum Numeric-1-Count Numeric-2-Count
Numeric-1-Average Numeric-2-Averag
+Text-1 Text-2 Numeric-1-Sum Numeric-2-Sum Numeric-1-Count Numeric-2-Count
Numeric-1-Average Numeric-2-Average
first text value a 2 13 2 2 1,00 6,50
first text value b 1 3 1 1 1,00 3,00
second text value b 2 2 1 1 2,00 2,00
=======================================
--- /trunk/src/aggregate/test/test_06.1.expected Mon Nov 17 11:46:40 2008
+++ /trunk/src/aggregate/test/test_06.1.expected Fri Mar 15 19:43:59 2013
@@ -1,4 +1,4 @@
-Text-1 Text-2 Numeric-1-Sum Numeric-2-Sum Numeric-1-Count Numeric-2-Count
Numeric-1-Average Numeric-2-Averag
+Text-1 Text-2 Numeric-1-Sum Numeric-2-Sum Numeric-1-Count Numeric-2-Count
Numeric-1-Average Numeric-2-Average
first text value a 4 26 4 4 1.00 6.50
first text value b 2 6 2 2 1.00 3.00
second text value b 4 4 2 2 2.00 2.00
Reply all
Reply to author
Forward
0 new messages