I haven't done this, but I'm curious about it as well. The application
I'm working on has a graph with ~100,000 edges. I have Node and Edge
tables/objects similar to yours but with some more fields. I pull stuff
from the DB to create an adjacency list, which I save to disk using the
marshal package (it gets saved as a .pyc file, ~7MB).
Reading the marshaled graph and running it through Dijkstra's shortest
paths algorithm is pretty fast. It takes about half a second to read in
the graph and another half second (on average) to get a result from
Dijkstra.
I'm using GIS data that is handed to me by someone else, and I don't
really have a need to invent a way to update it, since I could just use
GRASS or QGIS, but it's still an interesting problem.
Looking at your code, I'm curious about (confused by) the "upper" and
"lower" neighbors. Do you need to keep track of which neighbors are
above and below?
Here's a slightly modified version that doesn't use upper and lower and
only uses a single property, _neighbours. There is a flag for
add_neighbour that says whether the node being added (othernode) should
have the node being added to (self) as a neighbor. So, instead of this:
node1.add_neighbour(node2)
node2.add_neighbour(node1)
You'd do this:
node1.add_neighbour(node2, True) # To say that node2 also connects
to node1
or:
node1.add_neighbour(node2, False) # When node2 doesn't connect to
node1
#!/usr/bin/env python
import sys
import os
from sqlalchemy import *
engine = create_engine('sqlite://')
meta = BoundMetaData(engine)
nodes = Table('nodes', meta,
Column("nodeid", Integer, primary_key=True)
)
edges = Table('edges', meta,
Column("node_1_id", Integer, ForeignKey('nodes.nodeid')),
Column("node_2_id", Integer, ForeignKey('nodes.nodeid'))
)
class Node(object):
def __str__(self):
return "<Node %d, neighbours [%s]>" % (self.nodeid,
','.join(map(str, [n.nodeid for n in self.neighbours])))
def get_neighbours(self):
return list(self._neighbours)
neighbours = property(get_neighbours)
def add_neighbour(self, othernode, both_ways=True):
if othernode not in self._neighbours:
self._neighbours.append(othernode)
if both_ways and self not in othernode._neighbours:
othernode._neighbours.append(self)
nodes_mapper = mapper(Node, nodes)
nodes2 = nodes.alias('nodes2')
nodes2_mapper = mapper(Node, nodes2, non_primary=True)
nodes_mapper.add_property('_neighbours',
relation(nodes2_mapper,
secondary=edges,
primaryjoin=nodes.c.nodeid==edges.c.node_1_id,
secondaryjoin=nodes2.c.nodeid==edges.c.node_2_id
)
)
def main():
meta.create_all()
s = create_session(engine)
for i in range(10):
n = Node()
n.nodeid = i
s.save(n)
s.flush()
s.clear()
nodes = s.query(Node).select()
nodes[1].add_neighbour(nodes[2])
nodes[1].add_neighbour(nodes[3])
nodes[1].add_neighbour(nodes[4])
nodes[4].add_neighbour(nodes[5], False)
nodes[5].add_neighbour(nodes[1], False)
nodes[2].add_neighbour(nodes[1])
for n in nodes:
print n
if __name__ == '__main__':
main()
Maybe that's useful? Maybe not. It was fun to think about it anyway.
__wyatt
class Node(object):
def __init__(self, id):
self.nodeid = id
def add_neighbor(self, othernode):
Edge(self, othernode)
def higher_neighbors(self):
return [x.upper_node for x in self.lower_edges]
def lower_neighbors(self):
return [x.lower_node for x in self.upper_edges]
class Edge(object):
def __init__(self, n1, n2):
if n1.nodeid < n2.nodeid:
self.lower_node = n1
self.upper_node = n2
else:
self.lower_node = n2
self.upper_node = n1
mapper(Node, nodes)
mapper(Edge, edges, properties={
'lower_node':relation(Node,
primaryjoin=edges.c.lower==nodes.c.nodeid, backref='lower_edges'),
'higher_node':relation(Node,
primaryjoin=edges.c.higher==nodes.c.nodeid, backref='upper_edges')
}
)
n1 = Node(1)
n2 = Node(2)
n3 = Node(3)
n2.add_neighbor(n3)
[session.save(x) for x in [n1, n2, n3]]
session.flush()
im beginning to regret having "viewonly" and "non_primary" as options,
since i cant think of anything they do that cant be better accomplished
just by using Query.select(), or manual queries in conjunction with
query.instances(). I think im going to try to express this in the
documentation...
Looking at your code, I'm curious about (confused by) the "upper" and
"lower" neighbors. Do you need to keep track of which neighbors are
above and below?
Here's a slightly modified version that doesn't use upper and lower and
only uses a single property, _neighbours. There is a flag for
add_neighbour that says whether the node being added (othernode) should
have the node being added to (self) as a neighbor. So, instead of this:
Maybe that's useful? Maybe not. It was fun to think about it anyway.
im beginning to regret having "viewonly" and "non_primary" as options,
since i cant think of anything they do that cant be better accomplished
just by using Query.select(), or manual queries in conjunction with
query.instances(). I think im going to try to express this in the
documentation...