def build_tree(c, it):
'''
This function builds a tree of vnodes from the
iterable generating tuples of the following form:
(parent_gnx, gnx, childIndex, h, b, ua)
The tuples must be in the outline order.
Returns vnode instance that is a root of this tree.
'''
gnxDict = c.fileCommands.gnxDict
def getv(gnx, h, b, ua):
v = gnxDict.get(gnx)
if v is None:
v = leoNodes.VNode(c, gnx)
v._headString = h
v._bodyString = b
if ua:
v.unknownAttributes = ua
gnxDict[gnx] = v
return v
# root is handled first, before the loop
parent_gnx, gnx, childIndex, h, b, ua = next(it)
vparent = gnxDict.get(parent_gnx)
root = getv(gnx, h, b, ua)
vparent.children.insert(childIndex, root)
root.parents.append(vparent)
# now rest of the tuples
for parent_gnx, gnx, childIndex, h, b, ua in it:
vparent = gnxDict.get(parent_gnx)
v = getv(gnx, h, b, ua)
vparent.children.insert(childIndex, v)
v.parents.append(vparent)
return root
test_1 ( 10 files) - 4.891s ........ average:489.07ms
test_2 (1000 files) - 12.833s ........ average:12.83ms
test_3 (1000 files) - 2.216s ........ average: 2.22ms
1/2 ---> 38.11 times faster
1/3 ---> 220.71 times fasterdef nodes_from_leo_xml(contents):
'''
Parses contents as xml Leo document and returns
a generator of the tuples
(parent_gnx, gnx, childIndex, h, b, ua, descendentUas)
suitable to be piped to the build_tree function.
'''
xroot = read_xml(contents)
v_elements = xroot.find('vnodes')
t_elements = xroot.find('tnodes')
bodies, uas = get_bodies_and_uas(t_elements)
heads = {}
def viter(parent_gnx, i, xv):
gnx = xv.attrib.get('t')
d_uas = xv.attrib.get('descendentVnodeUnknownAttributes')
d_uas = d_uas and resolve_ua('xxx', d_uas) # key is not important here
h = heads.get(gnx)
if not h:
h = xv[0].text or ''
heads[gnx] = h
yield parent_gnx, gnx, i, h, bodies[gnx], uas[gnx], d_uas
for j, ch in enumerate(xv[1:]):
yield from viter(gnx, j, ch)
else:
yield parent_gnx, gnx, i, heads[gnx], bodies[gnx], uas[gnx], d_uas
for i, xv in enumerate(v_elements):
yield from viter('hidden-root-vnode-gnx', i, xv)
nodes_from_leo_xml()function will be used for various paste commands. This way, the xml bits have to be parsed each time the nodes are processed. I would have expected that xml not be used for internal communications, but only for external interactions. I realize that mostly the xml parts are short and quick to parse, but still I would lean to using plain text or even python objects. Are you doing it this way mostly because Leo copies nodes as xml in the first place?
Here is a function that generates tuples from the xml content:
[snip]
I have a question - you say that thenodes_from_leo_xml()function will be used for various paste commands. This way, the xml bits have to be parsed each time the nodes are processed.I would have expected that xml not be used for internal communications, but only for external interactions. I realize that mostly the xml parts are short and quick to parse, but still I would lean to using plain text or even python objects. Are you doing it this way mostly because Leo copies nodes as xml in the first place?
Here is a function that generates tuples from the xml content: