Percentile calculation

308 views
Skip to first unread message

Michael

unread,
Aug 5, 2019, 10:38:46 AM8/5/19
to sqlalchemy
Hi!

I'm really having a great time with sqlalchemy so far! 

Currently I'm trying to apply a percentile function on a ORM schema with sqlite3. Average, min, max etc are working fine, but i cannot compute the median or any other percentile using 'percentile_cont'. 

A minimal example and the corresponding error messages can be found below.

Any help would be greatly appreciated!

Best
Michael

import sqlalchemy
sqlalchemy
.__version__ # '1.3.5'



from sqlalchemy import create_engine
engine
= create_engine('sqlite:///:memory:', echo=False)


from sqlalchemy.ext.declarative import declarative_base
Base = declarative_base()


from sqlalchemy import Column, Float, String, Integer
class User(Base):
    __tablename__
= 'users'


    id
= Column(Integer, primary_key=True)
    name
= Column(String)
    role
= Column(String)
    salary
= Column(Float)


   
def __repr__(self):
       
return "<User(name='%s', fullname='%s', nickname='%s')>" % (
                           
self.name, self.fullname, self.nickname)






Base.metadata.create_all(engine)


u1
= User(name='u1', role='Manager', salary = 100)
u2
= User(name='u2', role='Manager', salary = 110)
u3
= User(name='u3', role='Employee', salary = 1000)
u4
= User(name='u4', role='Employee', salary = 200)




from sqlalchemy.orm import sessionmaker
Session = sessionmaker(bind=engine)
session
= Session()




session
.add(u1)
session
.add(u2)
session
.add(u3)
session
.add(u4)




from sqlalchemy.sql import func
from sqlalchemy import within_group
   
q1
= session.query(func.avg(User.salary).label('average')).group_by(User.role)
print(q1.all())
q2
= session.query(func.percentile_disc(0.5).within_group(User.salary)).group_by(User.role)
print(q2)


print(q2.all()) # ERROR


---------------------------------------------------------------------------
OperationalError                          Traceback (most recent call last)
/usr/lib64/python3.7/site-packages/sqlalchemy/engine/base.py in _execute_context(self, dialect, constructor, statement, parameters, *args)
   1243                     self.dialect.do_execute(
-> 1244                         cursor, statement, parameters, context
   1245                     )

/usr/lib64/python3.7/site-packages/sqlalchemy/engine/default.py in do_execute(self, cursor, statement, parameters, context)
    549     def do_execute(self, cursor, statement, parameters, context=None):
--> 550         cursor.execute(statement, parameters)
    551

OperationalError: near "(": syntax error

The above exception was the direct cause of the following exception:

OperationalError                          Traceback (most recent call last)
<ipython-input-13-bded0b5cee0a> in <module>
----> 1 print(q2.all()) # ERROR

/usr/lib64/python3.7/site-packages/sqlalchemy/orm/query.py in all(self)
   3166
   3167         """
-> 3168         return list(self)
   3169
   3170     @_generative(_no_clauseelement_condition)

/usr/lib64/python3.7/site-packages/sqlalchemy/orm/query.py in __iter__(self)
   3322         if self._autoflush and not self._populate_existing:
   3323             self.session._autoflush()
-> 3324         return self._execute_and_instances(context)
   3325
   3326     def __str__(self):

/usr/lib64/python3.7/site-packages/sqlalchemy/orm/query.py in _execute_and_instances(self, querycontext)
   3347         )
   3348
-> 3349         result = conn.execute(querycontext.statement, self._params)
   3350         return loading.instances(querycontext.query, result, querycontext)
   3351

/usr/lib64/python3.7/site-packages/sqlalchemy/engine/base.py in execute(self, object_, *multiparams, **params)
    986             raise exc.ObjectNotExecutableError(object_)
    987         else:
--> 988             return meth(self, multiparams, params)
    989
    990     def _execute_function(self, func, multiparams, params):

/usr/lib64/python3.7/site-packages/sqlalchemy/sql/elements.py in _execute_on_connection(self, connection, multiparams, params)
    285     def _execute_on_connection(self, connection, multiparams, params):
    286         if self.supports_execution:
--> 287             return connection._execute_clauseelement(self, multiparams, params)
    288         else:
    289             raise exc.ObjectNotExecutableError(self)

/usr/lib64/python3.7/site-packages/sqlalchemy/engine/base.py in _execute_clauseelement(self, elem, multiparams, params)
   1105             distilled_params,
   1106             compiled_sql,
-> 1107             distilled_params,
   1108         )
   1109         if self._has_events or self.engine._has_events:

/usr/lib64/python3.7/site-packages/sqlalchemy/engine/base.py in _execute_context(self, dialect, constructor, statement, parameters, *args)
   1246         except BaseException as e:
   1247             self._handle_dbapi_exception(
-> 1248                 e, statement, parameters, cursor, context
   1249             )
   1250

/usr/lib64/python3.7/site-packages/sqlalchemy/engine/base.py in _handle_dbapi_exception(self, e, statement, parameters, cursor, context)
   1464                 util.raise_from_cause(newraise, exc_info)
   1465             elif should_wrap:
-> 1466                 util.raise_from_cause(sqlalchemy_exception, exc_info)
   1467             else:
   1468                 util.reraise(*exc_info)

/usr/lib64/python3.7/site-packages/sqlalchemy/util/compat.py in raise_from_cause(exception, exc_info)
    397     exc_type, exc_value, exc_tb = exc_info
    398     cause = exc_value if exc_value is not exception else None
--> 399     reraise(type(exception), exception, tb=exc_tb, cause=cause)
    400
    401

/usr/lib64/python3.7/site-packages/sqlalchemy/util/compat.py in reraise(tp, value, tb, cause)
    151             value.__cause__ = cause
    152         if value.__traceback__ is not tb:
--> 153             raise value.with_traceback(tb)
    154         raise value
    155

/usr/lib64/python3.7/site-packages/sqlalchemy/engine/base.py in _execute_context(self, dialect, constructor, statement, parameters, *args)
   1242                 if not evt_handled:
   1243                     self.dialect.do_execute(
-> 1244                         cursor, statement, parameters, context
   1245                     )
   1246         except BaseException as e:

/usr/lib64/python3.7/site-packages/sqlalchemy/engine/default.py in do_execute(self, cursor, statement, parameters, context)
    548
    549     def do_execute(self, cursor, statement, parameters, context=None):
--> 550         cursor.execute(statement, parameters)
    551
    552     def do_execute_no_params(self, cursor, statement, context=None):

OperationalError: (sqlite3.OperationalError) near "(": syntax error
[SQL: SELECT percentile_cont(?) WITHIN GROUP (ORDER BY users.salary DESC) AS anon_1
FROM users GROUP BY users.role]
[parameters: (0.5,)]
(Background on this error at:
http://sqlalche.me/e/e3q8)



Mike Bayer

unread,
Aug 5, 2019, 12:16:38 PM8/5/19
to noreply-spamdigest via sqlalchemy
does SQLite support WITHIN GROUP ?    Try it out on PostgreSQL, I think this is just not syntax SQLite supports.
--
SQLAlchemy -
The Python SQL Toolkit and Object Relational Mapper
 
 
To post example code, please provide an MCVE: Minimal, Complete, and Verifiable Example. See http://stackoverflow.com/help/mcve for a full description.
---
You received this message because you are subscribed to the Google Groups "sqlalchemy" group.
To unsubscribe from this group and stop receiving emails from it, send an email to sqlalchemy+...@googlegroups.com.

Michael

unread,
Aug 5, 2019, 3:21:40 PM8/5/19
to sqlalchemy
Thanks a lot! Indeed SQLite seems to be the problem and the code works fine with PostgreSQL. 

Unfortunately, a full fledged database server is not an option. Therefore I probably have to work around the problem in Python. 

Is there an easy way to obtain a list of objects generated by multiple group_by conditions? Then I could calculate the percentiles e.g. in numpy. 
To unsubscribe from this group and stop receiving emails from it, send an email to sqlal...@googlegroups.com.

Mike Bayer

unread,
Aug 5, 2019, 4:49:10 PM8/5/19
to noreply-spamdigest via sqlalchemy


On Mon, Aug 5, 2019, at 3:21 PM, Michael wrote:
Thanks a lot! Indeed SQLite seems to be the problem and the code works fine with PostgreSQL. 

Unfortunately, a full fledged database server is not an option. Therefore I probably have to work around the problem in Python. 

Is there an easy way to obtain a list of objects generated by multiple group_by conditions? Then I could calculate the percentiles e.g. in numpy. 

group_by() accepts any number of expressions and you can use it multiple times.



To unsubscribe from this group and stop receiving emails from it, send an email to sqlalchemy+...@googlegroups.com.

Jonathan Vanasco

unread,
Aug 5, 2019, 6:30:53 PM8/5/19
to sqlalchemy


On Monday, August 5, 2019 at 3:21:40 PM UTC-4, Michael wrote:
Thanks a lot! Indeed SQLite seems to be the problem and the code works fine with PostgreSQL. 

Unfortunately, a full fledged database server is not an option. Therefore I probably have to work around the problem in Python. 

Is there an easy way to obtain a list of objects generated by multiple group_by conditions? Then I could calculate the percentiles e.g. in numpy. 

If you can generate a function in raw sql that works with sqlite, you may be able to use the @compiles to build a function in Python

as a very simple example, this is how i deal with sqlite wanting `least` and postgres wanting `min`:



class min_date(expression.FunctionElement):
    type = sqlalchemy.types.DateTime()
    name = 'min_date'


@compiles(min_date)
def min_date__default(element, compiler, **kw):
    # return compiler.visit_function(element)
    """
    # just return the first date
    """
    args = list(element.clauses)
    return compiler.process(args[0])


@compiles(min_date, 'postgresql')
def min_date__postgresql(element, compiler, **kw):
    """
    # select least(col_a, col_b);
    """
    args = list(element.clauses)
    return "LEAST(%s, %s)" % (
        compiler.process(args[0]),
        compiler.process(args[1]),
    )


@compiles(min_date, 'sqlite')
def min_date__sqlite(element, compiler, **kw):
    """
    # select min(col_a, col_b);
    """
    args = list(element.clauses)
    return "min(%s, %s)" % (
        compiler.process(args[0]),
        compiler.process(args[1]),
    )

Mike Bayer

unread,
Aug 5, 2019, 6:57:51 PM8/5/19
to noreply-spamdigest via sqlalchemy
note also pysqlite allows Python functions to embedded in SQL directly, which can do the percentile_cont() part but not the WITHIN GROUP part.

--
SQLAlchemy -
The Python SQL Toolkit and Object Relational Mapper
 
 
To post example code, please provide an MCVE: Minimal, Complete, and Verifiable Example. See http://stackoverflow.com/help/mcve for a full description.
---
You received this message because you are subscribed to the Google Groups "sqlalchemy" group.
To unsubscribe from this group and stop receiving emails from it, send an email to sqlalchemy+...@googlegroups.com.

Reply all
Reply to author
Forward
0 new messages