[redistricter] r206 committed - faster export mode in analyze tool. export to zipped-csv that should b...

0 views
Skip to first unread message

redist...@googlecode.com

unread,
Mar 4, 2011, 12:22:27 PM3/4/11
to redistrict...@googlegroups.com
Revision: 206
Author: brian.olson
Date: Fri Mar 4 09:21:28 2011
Log: faster export mode in analyze tool. export to zipped-csv that should
be compatible with publicmappingproject.
Use this to provide downloadable block list for all best-so-far solutions
in analyze_solutions.py

http://code.google.com/p/redistricter/source/detail?r=206

Modified:
/trunk/GeoData.h
/trunk/analyze.cpp
/trunk/analyze_submissions.py
/trunk/fileio.cpp
/trunk/new_st_index_pyt.html

=======================================
--- /trunk/GeoData.h Wed Feb 9 20:10:08 2011
+++ /trunk/GeoData.h Fri Mar 4 09:21:28 2011
@@ -70,6 +70,8 @@
uint32_t indexOfUbid( uint64_t u );
/* linear search, sloooow */
uint64_t ubidOfIndex( uint32_t index );
+ /* allocate new uint64_t[] look-up-table, fill it, pass-out min and max
index for it. */
+ uint64_t* makeUbidLUT(uint32_t* minIndex, uint32_t* maxIndex);

// Map "Logical Record Number" to internal index so that we can map in
// other Census data files.
=======================================
--- /trunk/analyze.cpp Tue Aug 24 20:51:57 2010
+++ /trunk/analyze.cpp Fri Mar 4 09:21:28 2011
@@ -3,6 +3,7 @@
#include <string.h>
#include <unistd.h>
#include <math.h>
+#include <zlib.h>
#include "District2.h"
#include "Bitmap.h"
#include "Node.h"
@@ -181,11 +182,13 @@
}

if (exportPath != NULL) {
- FILE* exportf = fopen(exportPath, "w");
- if (exportf == NULL) {
- perror(exportPath);
- exit(1);
- return 1;
+ bool exportCsv = false;
+ bool gz = false;
+ if (strcasestr(exportPath, ".csv")) {
+ exportCsv = true;
+ }
+ if (strcasestr(exportPath, ".gz")) {
+ gz = true;
}
uint64_t maxUbid = 0;
for (int i = 0; i < sov.gd->numPoints; ++i) {
@@ -208,13 +211,49 @@
} else if (length < 15) {
length = 15; // round up to 2 state + 3 county + 6 tract + 4 block
}
- char format[30];
- snprintf(format, sizeof(format), "%%0%dlld%%02d\n", length);
+ char format[50];
+ if (exportCsv) {
+ snprintf(format, sizeof(format), "%%0%dlld,%%d\n", length);
+ } else {
+ // fixed length lines
+ snprintf(format, sizeof(format), "%%0%dlld%%02d\n", length);
+ }
+ FILE* exportf = NULL;
+ gzFile exportgzf = NULL;
+ if (gz) {
+ exportgzf = gzopen(exportPath, "wb");
+ if (exportgzf == NULL) {
+ int gzerrno = 0;
+ fprintf(stderr, "gzip error \"%s\" on file \"%s\"\n",
gzerror(exportgzf, &gzerrno), exportPath);
+ perror(exportPath);
+ exit(1);
+ return 1;
+ }
+ } else {
+ exportf = fopen(exportPath, "w");
+ if (exportf == NULL) {
+ perror(exportPath);
+ exit(1);
+ return 1;
+ }
+ }
+ uint32_t minIndex = 0, maxIndex = 0;
+ uint64_t* ubidLut = sov.gd->makeUbidLUT(&minIndex, &maxIndex);
for (int i = 0; i < sov.gd->numPoints; ++i) {
- uint64_t ubid = sov.gd->ubidOfIndex(i);
- fprintf(exportf, format, ubid, sov.winner[i]);
- }
- fclose(exportf);
+ uint64_t ubid = ubidLut[i - minIndex]; //sov.gd->ubidOfIndex(i);
+ if (gz) {
+ gzprintf(exportgzf, format, ubid, sov.winner[i]);
+ } else {
+ fprintf(exportf, format, ubid, sov.winner[i]);
+ }
+ }
+ delete [] ubidLut;
+ if (gz) {
+ gzflush(exportgzf, Z_FINISH);
+ gzclose(exportgzf);
+ } else {
+ fclose(exportf);
+ }
}

for (unsigned int i = 0; i < compareArgs.size(); ++i) {
=======================================
--- /trunk/analyze_submissions.py Tue Mar 1 05:05:21 2011
+++ /trunk/analyze_submissions.py Fri Mar 4 09:21:28 2011
@@ -1,6 +1,7 @@
#!/usr/bin/python

import cgi
+import gzip
import logging
import os
import random
@@ -544,7 +545,7 @@
))
out.close()

- def measureRace(self, cname, solution, htmlout):
+ def measureRace(self, cname, solution, htmlout, exportpath):
config = self.config[cname]
#zipname = 'VA/zips/va2010.pl.zip'
stl = cname[0:2].lower()
@@ -565,6 +566,8 @@
'--dsort', '1', '--notext',
'--html', htmlout,
'-P', pbfile, '-d', numd, '--loadSolution', solution]
+ if exportpath:
+ cmd += ['--export', exportpath]

p = subprocess.Popen(cmd, shell=False, stdin=subprocess.PIPE)
p.stdin.write(zf.read(part1name))
@@ -652,8 +655,20 @@
dszout.close()

racehtml = os.path.join(sdir, 'race.html')
- if self.options.redraw or (not os.path.exists(racehtml)):
- self.measureRace(cname, solpath, racehtml)
+ solutioncsvgz = os.path.join(sdir, 'solution.csv.gz')
+ if self.options.redraw or newerthan(solpath, racehtml) or
newerthan(solpath, solutioncsvgz):
+ # TODO: there could be smarter logic here to run faster if only one
piece is needed.
+ self.measureRace(cname, solpath, racehtml, solutioncsvgz)
+
+ solutionzip = os.path.join(sdir, 'solution.zip')
+ if newerthan(solutioncsvgz, solutionzip):
+ try:
+ solutioncsv = gzip.open(solutioncsvgz, 'rb').read()
+ oz = zipfile.ZipFile(solutionzip, 'w', zipfile.ZIP_DEFLATED)
+ oz.writestr(cname + '.csv', solutioncsv)
+ oz.close()
+ except IOError, e:
+ logging.error('failed %s -> %s: %s', solutioncsvgz, solutionzip, e)

# Make images map.png and map500.png
if needsDrend:
@@ -707,7 +722,7 @@
google_analytics=_google_analytics(),
))
out.close()
- for x in ('map.png', 'map500.png', 'index.html', 'solution.dsz'):
+ for x in
('map.png', 'map500.png', 'index.html', 'solution.dsz', 'solution.csv.gz', 'solution.zip'):
atomicLink(os.path.join(sdir, x), os.path.join(outdir, cname, x))

def buildBestSoFarDirs(self, configs=None):
=======================================
--- /trunk/fileio.cpp Thu Feb 10 08:40:52 2011
+++ /trunk/fileio.cpp Fri Mar 4 09:21:28 2011
@@ -732,6 +732,26 @@
}
return (uint64_t)-1;
}
+uint64_t* GeoData::makeUbidLUT(uint32_t* minIndexP, uint32_t* maxIndexP) {
+ uint32_t minIndex = ubids[0].index;
+ uint32_t maxIndex = ubids[0].index;
+ for ( int i = 1; i < numPoints; i++ ) {
+ if ( ubids[i].index > maxIndex ) {
+ maxIndex = ubids[i].index;
+ }
+ if ( ubids[i].index < minIndex ) {
+ minIndex = ubids[i].index;
+ }
+ }
+ int length = maxIndex - minIndex + 1;
+ uint64_t* out = new uint64_t[length];
+ for ( int i = 0; i < numPoints; i++ ) {
+ out[ubids[i].index - minIndex] = ubids[i].ubid;
+ }
+ *minIndexP = minIndex;
+ *maxIndexP = maxIndex;
+ return out;
+}

/* binary search, fastish */
uint32_t GeoData::indexOfRecno( uint32_t rn ) {
=======================================
--- /trunk/new_st_index_pyt.html Sun Feb 27 12:50:04 2011
+++ /trunk/new_st_index_pyt.html Fri Mar 4 09:21:28 2011
@@ -25,7 +25,7 @@
<tr><!--<td class="myow">My&nbsp;map</td>--><td
class="myon">$my_kmpp</td><td class="myon">$my_spread</td><td
class="myon">$my_std</td></tr></table>
${extra}
${racedata}
-
+<div><a href="solution.zip">zipped csv block list for $statename</a></div>
</td></tr></table>
${google_analytics}
</body></html>

Reply all
Reply to author
Forward
0 new messages