|
1 |
import xml.dom.minidom as xml |
|
2 |
from xmlwrap import * |
|
3 |
import random |
|
4 |
import subprocess, os, sys |
|
5 |
|
|
6 |
# Main function for generating |
|
7 |
|
|
8 |
def generate(filename, key_dist, value_dist, time_dist, N, sort=None): |
|
9 |
FH = open(filename, 'w') |
|
10 |
for d_id in xrange(N): |
|
11 |
sampled_key = key_dist.sample() |
|
12 |
sampled_value = value_dist.sample(sampled_key.k) |
|
13 |
sampled_time = time_dist.sample(sampled_key.k, sampled_value.v) |
|
14 |
data = DataPoint(sampled_key, sampled_value, sampled_time) |
|
15 |
data.write(FH) |
|
16 |
FH.close() |
|
17 |
if sort: |
|
18 |
subprocess.call('sort {0} -o {0}'.format(filename), shell=True) |
|
19 |
|
|
20 |
# Useful classes |
|
21 |
|
|
22 |
class Key(object): |
|
23 |
def __init__(self, k): |
|
24 |
self.k = k |
|
25 |
def __str__(self): |
|
26 |
return 'K' + str(self.k) |
|
27 |
|
|
28 |
class Value(object): |
|
29 |
def __init__(self, v): |
|
30 |
self.v = v |
|
31 |
def __str__(self): |
|
32 |
return 'V' + str(self.v) |
|
33 |
|
|
34 |
class Distro(object): |
|
35 |
def __init__(self, func, *args): |
|
36 |
self.F = func |
|
37 |
self.args = args |
|
38 |
def sample(self): |
|
39 |
return self.F(*self.args) |
|
40 |
def __str__(self): |
|
41 |
return '{0}({1})'.format(self.F, str(self.args)) |
|
42 |
|
|
43 |
class KeyDistro(Distro): |
|
44 |
def sample(self): |
|
45 |
return Key(self.F(*self.args)) |
|
46 |
|
|
47 |
class ValueDistro(Distro): |
|
48 |
def __init__(self, func, *args): |
|
49 |
self.F = func |
|
50 |
self.args = args |
|
51 |
def sample(self,k): |
|
52 |
return Value(self.F(k,*self.args)) |
|
53 |
|
|
54 |
class TimeDistro(Distro): |
|
55 |
def __init__(self, func, *args): |
|
56 |
self.F = func |
|
57 |
self.args = args |
|
58 |
def sample(self,k,v): |
|
59 |
return self.F(k,v,*self.args) |
|
60 |
|
|
61 |
|
|
62 |
class DataPoint(object): |
|
63 |
def __init__(self, *args): |
|
64 |
self.key = args[0] |
|
65 |
self.value = args[1] |
|
66 |
self.time = args[2] |
|
67 |
def write(self, FH): |
|
68 |
FH.write(self.on_disk_str() + '\n') |
|
69 |
def on_disk_str(self): |
|
70 |
return '{0}; {1}, {2}'.format(self.key, self.value, self.time) |
|
71 |
def __str__(self): |
|
72 |
return self.on_disk_str() |
|
73 |
|
|
74 |
|
|
75 |
#XML |
|
76 |
|
|
77 |
def getOutputFile(dom): |
|
78 |
return getText(dom, ['generator', 'output']) |
|
79 |
|
|
80 |
def getDataNumber(dom): |
|
81 |
return getNumber(dom, ['generator','datapoints','number']) |
|
82 |
|
|
83 |
def getKeyDistro(dom): |
|
84 |
function = getFunction(dom,\ |
|
85 |
['generator','key',\ |
|
86 |
'distribution', 'function']) |
|
87 |
args = getNumberList(dom,['generator','key', 'distribution', 'args']) |
|
88 |
return KeyDistro(function, *args) |
|
89 |
|
|
90 |
def getValueDistro(dom): |
|
91 |
function = getFunction(dom,['generator','value', 'distribution', 'function']) |
|
92 |
args = getNumberList(dom,['generator','value', 'distribution', 'args']) |
|
93 |
return ValueDistro(function, *args) |
|
94 |
|
|
95 |
|
|
96 |
def getTimeDistro(dom): |
|
97 |
function = getFunction(dom,['generator','time', 'distribution', 'function']) |
|
98 |
args = getNumberList(dom,['generator','time', 'distribution', 'args']) |
|
99 |
return TimeDistro(function, *args) |
|
100 |
|
|
101 |
# MAIN |
|
102 |
if len(sys.argv) != 2: |
|
103 |
print 'Usage: python generator.py <XML config file>' |
|
104 |
quit() |
|
105 |
dom = xml.parse(sys.argv[1]) |
|
106 |
filename = getOutputFile(dom) |
|
107 |
N = getDataNumber(dom) |
|
108 |
KD = getKeyDistro(dom) |
|
109 |
VD = getValueDistro(dom) |
|
110 |
TD = getTimeDistro(dom) |
|
111 |
generate(filename, KD, VD, TD, N, True) |
|
1 |
import re |
|
2 |
|
|
3 |
# Constant |
|
4 |
MAX_RECURSION_DEPTH = 25 |
|
5 |
|
|
6 |
def template(rmap, **kwargs): |
|
7 |
|
|
8 |
tmp_symb_id = 0 |
|
9 |
|
|
10 |
# 1) Deal with the input. Could be either a string or file |
|
11 |
FILE = 'file' |
|
12 |
STRING = 'string' |
|
13 |
|
|
14 |
assert(FILE in kwargs or STRING in kwargs) |
|
15 |
assert(FILE not in kwargs or STRING not in kwargs) |
|
16 |
|
|
17 |
text = '' |
|
18 |
if FILE in kwargs: |
|
19 |
FH = open(kwargs[FILE]) |
|
20 |
text = FH.read() |
|
21 |
FH.close() |
|
22 |
elif STRING in kwargs: |
|
23 |
text = kwargs[STRING] |
|
24 |
else: |
|
25 |
assert(False) |
|
26 |
|
|
27 |
# 2) Replace until we cannot replace no more. |
|
28 |
symb_re = re.compile('\@\{(\w+?)\}') |
|
29 |
repeat_re = re.compile('\@\@\[(.+?)\]') |
|
30 |
for i in xrange(MAX_RECURSION_DEPTH): |
|
31 |
new_text = text |
|
32 |
|
|
33 |
# 2.1) Get occurances list |
|
34 |
symb_list = symb_re.findall(text) |
|
35 |
repeat_list = repeat_re.findall(text) |
|
36 |
# 2.3) Process all repeated symbols |
|
37 |
for rep in repeat_list: |
|
38 |
# 2.3.1) Parse the rule |
|
39 |
(rule, body) = rep.split(':') |
|
40 |
if '@' in body: |
|
41 |
continue |
|
42 |
body = body.split(',') |
|
43 |
replace = [] |
|
44 |
# 2.3.2) Build the unrolling |
|
45 |
for i in xrange(len(body)): |
|
46 |
tmp_symb = 'TMP_{0}_{1}'.format(tmp_symb_id, i) |
|
47 |
tmp_symb_id += 1 |
|
48 |
rmap[tmp_symb] = body[i] |
|
49 |
replace.append(re.sub('@', '@{' + tmp_symb + '}', rule)) |
|
50 |
# 2.3.3) Substitute |
|
51 |
new_text = re.sub(re.escape('@@[' + rep + ']'),\ |
|
52 |
'\n'.join(replace),\ |
|
53 |
new_text) |
|
54 |
# 2.2) Process all simple sybmols |
|
55 |
for symb in symb_list: |
|
56 |
if symb in rmap: |
|
57 |
replace = rmap[symb] |
|
58 |
else: |
|
59 |
print "Cannot find definition for @{" + symb + "}" |
|
60 |
replace = '#BAD SYMBOL#' |
|
61 |
new_text = re.sub('@\{' + symb + '\}', replace, new_text) |
|
62 |
|
|
63 |
# Unchanged? |
|
64 |
if new_text == text: |
|
65 |
return text |
|
66 |
text = new_text |
|
67 |
return text + '\n#MAX DEPTH#'; |
|
68 |
|
|
69 |
def rewrite(rmap, infile, outfile): |
|
70 |
text = template(rmap, file=infile) |
|
71 |
FH = open(outfile, 'w') |
|
72 |
FH.write(text) |
|
73 |
FH.close() |
|
1 |
<document> |
|
2 |
<!-- Parameters for Data generation -->\ |
|
3 |
<generator> |
|
4 |
<output>test2.dat</output> |
|
5 |
<!-- Datapoints -->\ |
|
6 |
<datapoints> |
|
7 |
<number>400</number> |
|
8 |
</datapoints> |
|
9 |
|
|
10 |
<!-- Key Distro -->\ |
|
11 |
<key> |
|
12 |
<number>4</number> |
|
13 |
<distribution> |
|
14 |
<function>random.randint</function> |
|
15 |
<args>0,400</args> |
|
16 |
</distribution> |
|
17 |
</key> |
|
18 |
<!-- Value Distro -->\ |
|
19 |
<value> |
|
20 |
<number>4</number> |
|
21 |
<distribution> |
|
22 |
<function>distro.gauss</function> |
|
23 |
<args>2</args> |
|
24 |
</distribution> |
|
25 |
</value> |
|
26 |
<!-- Time Distro -->\ |
|
27 |
<time> |
|
28 |
<distribution> |
|
29 |
<function>distro.const</function> |
|
30 |
<args>1</args> |
|
31 |
</distribution> |
|
32 |
</time> |
|
33 |
</generator> |
|
34 |
|
|
35 |
<!-- Parameters for the Shared Object -->\ |
|
36 |
<so> |
|
37 |
<!-- PAO specification -->\ |
|
38 |
<pao_h_tmpl>templates/pao_h.tmpl</pao_h_tmpl> |
|
39 |
<pao_cpp_tmpl>templates/pao_cpp.tmpl</pao_cpp_tmpl> |
|
40 |
<pao_h_gen>gen_pao.h</pao_h_gen> |
|
41 |
<pao_cpp_gen>gen_pao.cpp</pao_cpp_gen> |
|
42 |
<pao_args> |
|
43 |
<pao_class>TestPaoClass</pao_class> |
|
44 |
<pao_state_datatype>int i;</pao_state_datatype> |
|
45 |
<pao_add_func>this->i++;</pao_add_func> |
|
46 |
<pao_merge_func>this->i += add_agg->i;</pao_merge_func> |
|
47 |
</pao_args> |
|
48 |
|
|
49 |
<!-- Mapper specification -->\ |
|
50 |
<mapper_h_tmpl>templates/mapper_h.tmpl</mapper_h_tmpl> |
|
51 |
<mapper_cpp_tmpl>templates/mapper_cpp.tmpl</mapper_cpp_tmpl> |
|
52 |
<mapper_h_gen>gen_mapper.h</mapper_h_gen> |
|
53 |
<mapper_cpp_gen>gen_mapper.cpp</mapper_cpp_gen> |
|
54 |
<mapper_args> |
|
55 |
<mapper_class>TestMapperClass</mapper_class> |
|
56 |
<mapper_state_datatype> </mapper_state_datatype> |
|
57 |
<mapper_map_func>sleep(1);</mapper_map_func> |
|
58 |
<mapper_part_func>long key_id = atoi(key.c_str()); |
|
59 |
return key_id % @{partition_number};</mapper_part_func> |
|
60 |
<mapper_header_list>stdlib.h,stdio.h</mapper_header_list> |
|
61 |
<partition_number>10</partition_number> |
|
62 |
</mapper_args> |
|
63 |
</so> |
|
64 |
|
|
65 |
|
|
66 |
|
|
67 |
</document> |
|
1 |
import xml.dom.minidom as xml |
|
2 |
from xmlwrap import * |
|
3 |
import sys |
|
4 |
import microtemplate as mt |
|
5 |
|
|
6 |
if len(sys.argv) != 2: |
|
7 |
print 'Usage: python generator.py <XML config file>' |
|
8 |
quit() |
|
9 |
|
|
10 |
# 1) Parse the XML file |
|
11 |
dom = xml.parse(sys.argv[1]) |
|
12 |
|
|
13 |
# 2) Generate the PAO file |
|
14 |
# 2.1) Get file names3 |
|
15 |
pao_h_tmpl = getText(dom, ['pao_h_tmpl']) |
|
16 |
pao_cpp_tmpl = getText(dom, ['pao_cpp_tmpl']) |
|
17 |
pao_h_gen = getText(dom, ['pao_h_gen']) |
|
18 |
pao_cpp_gen = getText(dom, ['pao_cpp_gen']) |
|
19 |
# 2.2) Get the substitution list |
|
20 |
pao_sub = getAttrList(dom, ['pao_args']) |
|
21 |
# 2.3) Force file naming |
|
22 |
pao_sub['pao_header'] = pao_h_gen |
|
23 |
# 2.4) Sub and write |
|
24 |
mt.rewrite(pao_sub, pao_h_tmpl, pao_h_gen) |
|
25 |
mt.rewrite(pao_sub, pao_cpp_tmpl, pao_cpp_gen) |
|
26 |
|
|
27 |
# 3) Generate the Mapper file |
|
28 |
# 3.1) Get file names |
|
29 |
mapper_h_tmpl = getText(dom, ['mapper_h_tmpl']) |
| Diff too large and stripped… |