class FindShardableId(sqlalchemy.sql.ClauseVisitor):
def __init__(self, ids, key_fields, get_shard):
self.ids = ids
self.key_fields = key_fields
self.get_shard = get_shard
def _check_side(self, binary, side, other_side):
has_constant = (isinstance(side, sqlalchemy.Column)
and side.name in self.key_fields
and binary.operator ==
sqlalchemy.sql.operators.eq
and getattr(other_side, "value", None))
if has_constant:
self.ids.append(self.get_shard(other_side.value))
return has_constant
def visit_binary(self, binary):
if not self._check_side(binary, binary.left,
binary.right):
# Lazy load properties tend to be reversed, with the
constant on the left
self._check_side(binary, binary.right, binary.left)
def query_chooser(query):
ids = []
if query._criterion is not None:
FindShardableId(
ids,
set(["account_id", "account_guid"]),
lambda account_id:
shard_manager.shard_id_from_guid(account_id)
).traverse(query._criterion)
if len(ids) == 0:
logging.warn("\n\nExecuting query against all shards; "
"this may not be optimal:\n
\t{0}".format(str(query)))
return shards.keys()
else:
return ids
This works well most of the time, but we're finding that some queries
do not have a "value". These are all of the form
SELECT shopping_list_items.version AS shopping_list_items_version
FROM shopping_list_items
WHERE shopping_list_items.account_id = :param_1
AND shopping_list_items.shopping_list_item_id = :param_2
and :param1 is of the form _BindParamClause(u'%(63636624 param)s',
None, type_=UUID())
Typically, I'm seeing this come out of the innards of SQLAlchemy,
as one of several queries triggered by, say, a session.merge().
How do we work around this?
Thanks!
/George V. Reilly, Seattle
The only Query() I can see getting generated that would have non-valued
bindparams would be during a _get(). The values should be present in
query._params. If you need more info I can dig in to recall how the keys
of that dictionary are formatted in this case.
>
> How do we work around this?
>
> Thanks!
> /George V. Reilly, Seattle
>
> --
> You received this message because you are subscribed to the Google Groups
> "sqlalchemy" group.
> To post to this group, send email to sqlal...@googlegroups.com.
> To unsubscribe from this group, send email to
> sqlalchemy+...@googlegroups.com.
> For more options, visit this group at
> http://groups.google.com/group/sqlalchemy?hl=en.
>
>
Thanks, Michael. Here's what I came up with:
class FindShardableId(sqlalchemy.sql.ClauseVisitor):
def __init__(self, ids, key_fields, get_shard, params):
self.ids = ids
self.key_fields = key_fields
self.get_shard = get_shard
self.params = params
def visit_binary(self, binary):
if not self._check_side(binary, binary.left,
binary.right):
# Lazy load properties tend to be reversed, with the
constant on the left
self._check_side(binary, binary.right, binary.left)
def _check_side(self, binary, side, other_side):
if isinstance(side, sqlalchemy.Column) and side.name in
self.key_fields:
if binary.operator == sqlalchemy.sql.operators.eq:
value = getattr(other_side, "value", None)
if (value is None and
isinstance(other_side,
sqlalchemy.sql.expression._BindParamClause)):
value = self.params.get(other_side.key)
if value is not None:
self.ids.append(self.get_shard(value))
return True
elif binary.operator ==
sqlalchemy.sql.operators.in_op:
for bind in other_side.clauses:
self.ids.append(self.get_shard(bind.value))
return True
class QuerySharder(object):
def sessionmaker(self, **sessionmaker_args):
Session = sqlalchemy.orm.sessionmaker(
class_ = sqlalchemy.orm.shard.ShardedSession,
**sessionmaker_args)
Session.configure(
shards=self.shards,
shard_chooser=self._shard_chooser,
id_chooser=self._id_chooser,
query_chooser=self._query_chooser)
return Session
def _query_chooser(self, query):
ids = []
if query._criterion is not None:
FindShardableId(
ids,
set(["account_id", "account_guid"]),
lambda account_id:
self.shard_manager.shard_id_from_guid(account_id),
query._params
).traverse(query._criterion)
if len(ids) == 0:
logging.warn("\n\n! Executing query against all
shards; "
"this may not be optimal:\n\t{0}\n
\tParams: {1}\n".format(
str(query), str(query._params)))
return self.shards.keys()
else:
return ids
I really don't like the
isinstance(other_side,
sqlalchemy.sql.expression._BindParamClause))
in the middle of _check_side. Is there a cleaner way to do this?
I found that a combination of
* the above _check_side and two-sided visit_binary
* doing a better job of declaring ForeignKey relationships in Columns
* some explicit primaryjoins in calls to relation()
cleaned up all the cases where SA wasn't providing the ids in queries
Perhaps the sharding sample in SA 0.6 could be expanded?
--
/George V. Reilly, Seattle
I think what's needed here is a little more savvy to what the visitors
package provides. ClauseVisitor provides support for some of the hefty
transforming objects SQLA has internally in the sql util package, but for
"finding things" you're much better off using the functional API, which is
a lot more flexible. Anytime you find yourself using "isinstance()",
which is in fact sometimes necessary, you can instead try to get the
visitor dispatch system to give you that type instead. Below I've
adapted a recipe that is in the beaker example to also include columns and
operators from binary expressions. We can certainly add this function to
the sharding example. Ideally the sharding API would come with some
helper functions, if we could identify some that are of very general use.
from sqlalchemy.sql import visitors
def get_comparisons(query):
binds = {}
columns = set()
result = []
def visit_bindparam(bind):
value = query._params.get(bind.key, bind.value)
# lazyloader may dig a callable in here, intended
# to late-evaluate params after autoflush is called.
# convert to a scalar value.
if callable(value):
value = value()
binds[bind] = value
def visit_column(column):
columns.add(column)
def visit_binary(binary):
if binary.left in columns and binary.right in binds:
result.append((binary.left, binary.operator,
binds[binary.right]))
elif binary.left in binds and binary.right in columns:
result.append((binary.right, binary.operator,
binds[binary.left]))
if query._criterion is not None:
visitors.traverse_depthfirst(query._criterion, {},
{'bindparam':visit_bindparam,
'binary':visit_binary,
'column':visit_column
}
)
return result
if __name__ == '__main__':
from sqlalchemy import *
from sqlalchemy.orm import *
metadata = MetaData()
users = Table('users', metadata,
Column('id', Integer, primary_key=True, ),
Column('name', String(30), nullable=False),
)
addresses = Table('addresses', metadata,
Column('id', Integer, primary_key=True, ),
Column('user_id', None, ForeignKey('users.id')),
Column('email_address', String(50), nullable=False)
)
class User(object):
pass
class Address(object):
pass
mapper(Address, addresses, properties={
'user':relationship(User)
})
mapper(User, users)
sess = create_session()
q = sess.query(User).\
filter(User.id==Address.user_id).\
filter(User.name=='foo').\
filter(Address.email_address.like(bindparam('foo'))).\
params(foo='edward')
for col, op, value in get_comparisons(q):
print col.name, op, value
> --
> /George V. Reilly, Seattle
>
check out r0ddd638f1d90 in mercurial. I've added the function from the
example below, plus support for in_op(), to the attribute_shard example.
The old ClauseVisitor method is removed and replaced with this more robust
method.
Very nice! Thanks, Michael.
/George
I blogged about this at http://blogs.cozi.com/tech/2010/04/sqlalchemy-sharding.html
/George