SQLAlchemy 1.0.12 use inner JOIN instead of LEFT OUTER JOIN

2,151 views
Skip to first unread message

Alex Dev

unread,
Apr 25, 2016, 5:16:42 PM4/25/16
to sqlalchemy
Hello,

I have a broken query when migrating from SQLAlchemy 0.9.4 to 1.0.12. It seems to be linked to a behavioral change in the ORM (http://docs.sqlalchemy.org/en/rel_1_0/changelog/migration_10.html#right-inner-join-nesting-now-the-default-for-joinedload-with-innerjoin-true)

Here is simplified version of the code:

# -*- coding: utf-8 -*-
from sqlalchemy import *
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import *


Base = declarative_base()

_plant_table
= Table('plant', Base.metadata,
       
Column('id', Integer, primary_key=True)
   
)

_plant_dimensionsseries_table
= Table('plant_dimensionsseries', Base.metadata,
       
Column('plant_id', Integer, primary_key=True),
       
Column('dimensionsseriestype_id', Integer, primary_key=True),
       
ForeignKeyConstraint(['plant_id'], ['plant.id'])
   
)

_view_plant_dimensionsseries_table
= Table('view_plant_dimensionsseries', Base.metadata,
       
Column('plant_id', Integer, primary_key=True),
       
Column('dimensionsseriestype_id', Integer, primary_key=True),
       
ForeignKeyConstraint(
           
['plant_id', 'dimensionsseriestype_id'],
           
['plant_dimensionsseries.plant_id', 'plant_dimensionsseries.dimensionsseriestype_id'])
   
)

class Plant(Base):
    __table__
= _plant_table

class PlantDimensionsseries(Base):
    __table__
= _plant_dimensionsseries_table
    _plant_id
= __table__.c.plant_id
    _dimensionsseriestype_id
= __table__.c.dimensionsseriestype_id

    plant
= relationship('Plant',
                innerjoin
=True,
                backref
=backref('plant_dimensionsseries'))

class PlantDimensionsseriesDataComputed(Base):
    __table__
= _view_plant_dimensionsseries_table
    _plant_id
= __table__.c.plant_id
    _dimensionsseriestype_id
= __table__.c.dimensionsseriestype_id

   
# One-to-one relationship
    dimensionsseries
= relationship('PlantDimensionsseries',
                innerjoin
=True,
                backref
=backref('data_computed',
                    innerjoin
=True))

if __name__ == '__main__':

    engine
= create_engine('postgresql://nurseryacme_employee@localhost:5432/nurseryacme')
   
Session = sessionmaker(bind=engine)
    session
= Session()

   
# query 1:
   
# SQLAlchemy 0.9.4: Correct SQL generated
   
# SQLAlchemy 1.0.12: Wrong SQL generated, a inner JOIN is used instead of a LEFT OUTER JOIN between plant_dimensionsseries and view_plant_dimensionsseries
   
print session.query(Plant).\
        outerjoin
(Plant.plant_dimensionsseries).\
        options
(contains_eager(Plant.plant_dimensionsseries)).\
        options
(joinedload(Plant.plant_dimensionsseries, PlantDimensionsseries.data_computed))

   
# query 2:
   
# SQLAlchemy 1.0.12: Correct SQL generated
   
print session.query(Plant).\
        outerjoin
(Plant.plant_dimensionsseries).\
        options
(contains_eager(Plant.plant_dimensionsseries)).\
        options
(joinedload(Plant.plant_dimensionsseries, PlantDimensionsseries.data_computed, innerjoin=False))


Result with SQLAlchemy 0.9.4:
# query 1
SELECT
...
FROM plant LEFT OUTER JOIN plant_dimensionsseries ON plant
.id = plant_dimensionsseries.plant_id LEFT OUTER JOIN view_plant_dimensionsseries AS view_plant_dimensionsseries_1 ON plant_dimensionsseries.plant_id = view_plant_dimensionsseries_1.plant_id AND plant_dimensionsseries.dimensionsseriestype_id = view_plant_dimensionsseries_1.dimensionsseriestype_id
# query 2
SELECT
...
FROM plant LEFT OUTER JOIN plant_dimensionsseries ON plant
.id = plant_dimensionsseries.plant_id LEFT OUTER JOIN view_plant_dimensionsseries AS view_plant_dimensionsseries_1 ON plant_dimensionsseries.plant_id = view_plant_dimensionsseries_1.plant_id AND plant_dimensionsseries.dimensionsseriestype_id = view_plant_dimensionsseries_1.dimensionsseriestype_id


Result with SQLAlchemy 1.0.12:
# query 1
SELECT
...
FROM plant LEFT OUTER JOIN plant_dimensionsseries ON plant
.id = plant_dimensionsseries.plant_id JOIN view_plant_dimensionsseries AS view_plant_dimensionsseries_1 ON plant_dimensionsseries.plant_id = view_plant_dimensionsseries_1.plant_id AND plant_dimensionsseries.dimensionsseriestype_id = view_plant_dimensionsseries_1.dimensionsseriestype_id
# query 2
SELECT
...
FROM plant LEFT OUTER JOIN plant_dimensionsseries ON plant
.id = plant_dimensionsseries.plant_id LEFT OUTER JOIN view_plant_dimensionsseries AS view_plant_dimensionsseries_1 ON plant_dimensionsseries.plant_id = view_plant_dimensionsseries_1.plant_id AND plant_dimensionsseries.dimensionsseriestype_id = view_plant_dimensionsseries_1.dimensionsseriestype_id

In query 1 under SQLAlchemy 1.0.12, the query discard many rows due the JOIN chained to the LEFT OUTER JOIN and this precisely what SQLAlchemy wanted to avoid if I refer to the description of "Right-nested inner joins available in joined eager loads" (http://docs.sqlalchemy.org/en/rel_1_0/changelog/migration_09.html#feature-2976).

I can fix the query 1 by adding the innerjoin=False as in query 2 but I am wondering if this is a bug or the expected behavior following the behavioral change in SQLAlchemy 1.0.x (http://docs.sqlalchemy.org/en/rel_1_0/changelog/migration_10.html#right-inner-join-nesting-now-the-default-for-joinedload-with-innerjoin-true). Is this a bug?

Mike Bayer

unread,
Apr 25, 2016, 6:28:10 PM4/25/16
to sqlal...@googlegroups.com
> plant =relationship('Plant',
> innerjoin=True,
> backref=backref('plant_dimensionsseries'))
>
> classPlantDimensionsseriesDataComputed(Base):
> __table__ =_view_plant_dimensionsseries_table
> _plant_id =__table__.c.plant_id
> _dimensionsseriestype_id =__table__.c.dimensionsseriestype_id
>
> # One-to-one relationship
> dimensionsseries =relationship('PlantDimensionsseries',
> innerjoin=True,
> backref=backref('data_computed',
> innerjoin=True))
>
> if__name__ =='__main__':
>
> engine
> =create_engine('postgresql://nurseryacme_employee@localhost:5432/nurseryacme')
> Session=sessionmaker(bind=engine)
> session =Session()
>
> # query 1:
> # SQLAlchemy 0.9.4: Correct SQL generated
> # SQLAlchemy 1.0.12: Wrong SQL generated, a inner JOIN is used instead
> of a LEFT OUTER JOIN between plant_dimensionsseries and
> view_plant_dimensionsseries
> printsession.query(Plant).\
> outerjoin(Plant.plant_dimensionsseries).\
> options(contains_eager(Plant.plant_dimensionsseries)).\
>
> options(joinedload(Plant.plant_dimensionsseries,PlantDimensionsseries.data_computed))

well this usage above is wrong. You can't have contains_eager() and
joinedload() along the same paths at the same time like that. Also,
chaining joinedload() from contains_eager() is not a very typical thing
to do, it works, but joinedload() does not coordinate with
contains_eager() in any way, and it has no idea that you are using
outerjoin() to the left of it.

The two correct ways to do this are:

print session.query(Plant).\
join(Plant.plant_dimensionsseries).\

options(contains_eager(Plant.plant_dimensionsseries).joinedload(PlantDimensionsseries.data_computed,
))

and

print session.query(Plant).\
outerjoin(Plant.plant_dimensionsseries).\

options(contains_eager(Plant.plant_dimensionsseries).joinedload(PlantDimensionsseries.data_computed,
innerjoin=False))


or of course:


print session.query(Plant).\
outerjoin(Plant.plant_dimensionsseries).\

options(joinedload(Plant.plant_dimensionsseries).joinedload(PlantDimensionsseries.data_computed))


where in the last one, the joinedload() to the right coordinates with
the joinedload to the left to become a LEFT OUTER JOIN.





>
> # query 2:
> # SQLAlchemy 1.0.12: Correct SQL generated
> printsession.query(Plant).\
> outerjoin(Plant.plant_dimensionsseries).\
> options(contains_eager(Plant.plant_dimensionsseries)).\
>
> options(joinedload(Plant.plant_dimensionsseries,PlantDimensionsseries.data_computed,innerjoin=False))
>
> |
>
> Result with SQLAlchemy 0.9.4:
> |
> # query 1
> SELECT ...
> FROM plant LEFT OUTER JOIN plant_dimensionsseries ON plant.id
> =plant_dimensionsseries.plant_id *LEFT OUTER
> JOIN*view_plant_dimensionsseries AS view_plant_dimensionsseries_1 ON
> plant_dimensionsseries.plant_id =view_plant_dimensionsseries_1.plant_id
> AND plant_dimensionsseries.dimensionsseriestype_id
> =view_plant_dimensionsseries_1.dimensionsseriestype_id
> # query 2
> SELECT ...
> FROM plant LEFT OUTER JOIN plant_dimensionsseries ON plant.id
> =plant_dimensionsseries.plant_id *LEFT OUTER
> JOIN*view_plant_dimensionsseries AS view_plant_dimensionsseries_1 ON
> plant_dimensionsseries.plant_id =view_plant_dimensionsseries_1.plant_id
> AND plant_dimensionsseries.dimensionsseriestype_id
> =view_plant_dimensionsseries_1.dimensionsseriestype_id
>
> |
>
> Result with SQLAlchemy 1.0.12:
> |
> # query 1
> SELECT ...
> FROM plant LEFT OUTER JOIN plant_dimensionsseries ON plant.id
> =plant_dimensionsseries.plant_id *JOIN*view_plant_dimensionsseries AS
> view_plant_dimensionsseries_1 ON plant_dimensionsseries.plant_id
> =view_plant_dimensionsseries_1.plant_id AND
> plant_dimensionsseries.dimensionsseriestype_id
> =view_plant_dimensionsseries_1.dimensionsseriestype_id
> # query 2
> SELECT ...
> FROM plant LEFT OUTER JOIN plant_dimensionsseries ON plant.id
> =plant_dimensionsseries.plant_id *LEFT OUTER
> JOIN*view_plant_dimensionsseries AS view_plant_dimensionsseries_1 ON
> plant_dimensionsseries.plant_id =view_plant_dimensionsseries_1.plant_id
> AND plant_dimensionsseries.dimensionsseriestype_id
> =view_plant_dimensionsseries_1.dimensionsseriestype_id
> |
>
> In query 1 under SQLAlchemy 1.0.12, the query discard many rows due the
> JOIN chained to the LEFT OUTER JOIN and this precisely what SQLAlchemy
> wanted to avoid if I refer to the description of "Right-nested inner
> joins available in joined eager loads"
> (http://docs.sqlalchemy.org/en/rel_1_0/changelog/migration_09.html#feature-2976).
>
> I can fix the query 1 by adding the |innerjoin=False| as in query 2 but
> I am wondering if this is a bug or the expected behavior following the
> behavioral change in SQLAlchemy 1.0.x
> (http://docs.sqlalchemy.org/en/rel_1_0/changelog/migration_10.html#right-inner-join-nesting-now-the-default-for-joinedload-with-innerjoin-true).
> Is this a bug?
>
> --
> You received this message because you are subscribed to the Google
> Groups "sqlalchemy" group.
> To unsubscribe from this group and stop receiving emails from it, send
> an email to sqlalchemy+...@googlegroups.com
> <mailto:sqlalchemy+...@googlegroups.com>.
> To post to this group, send email to sqlal...@googlegroups.com
> <mailto:sqlal...@googlegroups.com>.
> Visit this group at https://groups.google.com/group/sqlalchemy.
> For more options, visit https://groups.google.com/d/optout.

Alex Dev

unread,
Apr 26, 2016, 1:22:52 PM4/26/16
to sqlalchemy
Thank you for your quick answer Mike.


Le mardi 26 avril 2016 00:28:10 UTC+2, Mike Bayer a écrit :


well this usage above is wrong.  You can't have contains_eager() and
joinedload() along the same paths at the same time like that.   Also,
chaining joinedload() from contains_eager() is not a very typical thing
to do, it works, but joinedload() does not coordinate with
contains_eager() in any way, and it has no idea that you are using
outerjoin() to the left of it.

The two correct ways to do this are:

     print session.query(Plant).\
         join(Plant.plant_dimensionsseries).\
 
options(contains_eager(Plant.plant_dimensionsseries).joinedload(PlantDimensionsseries.data_computed,
))

I cannot use a inner JOIN between plant and plant_dimensionsseries because a plant does not necessarily have any corresponding plant_dimensionsseries so give me different result.
 

and

     print session.query(Plant).\
         outerjoin(Plant.plant_dimensionsseries).\
 
options(contains_eager(Plant.plant_dimensionsseries).joinedload(PlantDimensionsseries.data_computed,
innerjoin=False))


or of course:


     print session.query(Plant).\
         outerjoin(Plant.plant_dimensionsseries).\
 
options(joinedload(Plant.plant_dimensionsseries).joinedload(PlantDimensionsseries.data_computed))
 
These two work. However I think that I cannot do that in my real query because it is a bit more complex. For the sake of simplifying my question, I reduced the query to the minimum but I realize it is now hard so see why I was using contains_eager or joinedload in different parts of the query. Indeed, I need to do more joins in the real query (see below).

I can have the expected result by adding the innerjoin=False but you wrote that this is a wrong usage so I wonder what a correct usage would be in my real case.

# -*- coding: utf-8 -*-
from sqlalchemy import *
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import *


Base = declarative_base()

_plant_table
= Table('plant', Base.metadata,
       
Column('id', Integer, primary_key=True)
   
)


_dimensionsseriestype_table
= Table('dimensionsseriestype', Base.metadata,
       
Column('id', Integer, primary_key=True),
       
Column('sortindex', Integer),
       
Column('designation', String),

   
)

_plant_dimensionsseries_table
= Table('plant_dimensionsseries', Base.metadata,
       
Column('plant_id', Integer, primary_key=True),
       
Column('dimensionsseriestype_id', Integer, primary_key=True),

       
ForeignKeyConstraint(['plant_id'], ['plant.id']),
       
ForeignKeyConstraint(['dimensionsseriestype_id'], ['dimensionsseriestype.id']),

   
)

_view_plant_dimensionsseries_table
= Table('view_plant_dimensionsseries', Base.metadata,
       
Column('plant_id', Integer, primary_key=True),
       
Column('dimensionsseriestype_id', Integer, primary_key=True),

       
ForeignKeyConstraint(
           
['plant_id', 'dimensionsseriestype_id'],
           
['plant_dimensionsseries.plant_id', 'plant_dimensionsseries.dimensionsseriestype_id'])
   
)

class Plant(Base):
    __table__
= _plant_table

class Dimensionsseriestype(Base):
    __table__
= _dimensionsseriestype_table
    _id
= __table__.c.id

class PlantDimensionsseries(Base):
    __table__
= _plant_dimensionsseries_table
    _plant_id
= __table__.c.plant_id
    _dimensionsseriestype_id
= __table__.c.
dimensionsseriestype_id

    plant
= relationship('Plant',
                innerjoin
=True,
                backref
=backref('plant_dimensionsseries'))

    dimensionsseriestype
= relationship('Dimensionsseriestype',
                innerjoin
=True,
                backref
=backref('plant_dimensionsseries'))

class PlantDimensionsseriesDataComputed(Base):
    __table__
= _view_plant_dimensionsseries_table
    _plant_id
= __table__.c.plant_id
    _dimensionsseriestype_id
= __table__.c.dimensionsseriestype_id

   
# One-to-one relationship

    dimensionsseries
= relationship('PlantDimensionsseries',

                innerjoin
=True,
                backref
=backref('data_computed',
                    innerjoin
=True))

if __name__ == '__main__':

    engine
= create_engine('postgresql://nurseryacme_employee@localhost:5432/nurseryacme')
   
Session = sessionmaker(bind=engine)
    session
= Session()


   
# real query (almost)
   
print session.query(Plant).\
        outerjoin
(Plant.plant_dimensionsseries).\
        outerjoin
(Dimensionsseriestype, PlantDimensionsseries.dimensionsseriestype).\
        options
(contains_eager(Plant.plant_dimensionsseries, PlantDimensionsseries.dimensionsseriestype)).\
        options
(joinedload(Plant.plant_dimensionsseries, PlantDimensionsseries.data_computed)).\
        order_by
(Dimensionsseriestype.sortindex)


Mike Bayer

unread,
Apr 26, 2016, 3:42:45 PM4/26/16
to sqlal...@googlegroups.com


On 04/26/2016 12:22 PM, Alex Dev wrote:
> These two work. However I think that I cannot do that in my real query
> because it is a bit more complex. For the sake of simplifying my
> question, I reduced the query to the minimum but I realize it is now
> hard so see why I was using contains_eager or joinedload in different
> parts of the query. Indeed, I need to do more joins in the real query
> (see below).
>
> I can have the expected result by adding the |innerjoin=False| but you
> wrote that this is a wrong usage so I wonder what a correct usage would
> be in my real case.
>
> |
> # -*- coding: utf-8 -*-
> fromsqlalchemy import*
> fromsqlalchemy.ext.declarative importdeclarative_base
> fromsqlalchemy.orm import*
>
>
> Base=declarative_base()
>
> _plant_table =Table('plant',Base.metadata,
> Column('id',Integer,primary_key=True)
> )
>
> _dimensionsseriestype_table =Table('dimensionsseriestype',Base.metadata,
> Column('id',Integer,primary_key=True),
> Column('sortindex',Integer),
> Column('designation',String),
> )
>
> _plant_dimensionsseries_table =Table('plant_dimensionsseries',Base.metadata,
> Column('plant_id',Integer,primary_key=True),
> Column('dimensionsseriestype_id',Integer,primary_key=True),
> ForeignKeyConstraint(['plant_id'],['plant.id']),
> ForeignKeyConstraint(['dimensionsseriestype_id'],['dimensionsseriestype.id']),
> )
>
> _view_plant_dimensionsseries_table
> =Table('view_plant_dimensionsseries',Base.metadata,
> Column('plant_id',Integer,primary_key=True),
> Column('dimensionsseriestype_id',Integer,primary_key=True),
> ForeignKeyConstraint(
> ['plant_id','dimensionsseriestype_id'],
> ['plant_dimensionsseries.plant_id','plant_dimensionsseries.dimensionsseriestype_id'])
> )
>
> classPlant(Base):
> __table__ =_plant_table
>
> classDimensionsseriestype(Base):
> __table__ =_dimensionsseriestype_table
> _id =__table__.c.id
>
> classPlantDimensionsseries(Base):
> __table__ =_plant_dimensionsseries_table
> _plant_id =__table__.c.plant_id
> _dimensionsseriestype_id =__table__.c.dimensionsseriestype_id
>
> plant =relationship('Plant',
> innerjoin=True,
> backref=backref('plant_dimensionsseries'))
>
> dimensionsseriestype =relationship('Dimensionsseriestype',
> innerjoin=True,
> backref=backref('plant_dimensionsseries'))
>
> classPlantDimensionsseriesDataComputed(Base):
> __table__ =_view_plant_dimensionsseries_table
> _plant_id =__table__.c.plant_id
> _dimensionsseriestype_id =__table__.c.dimensionsseriestype_id
>
> # One-to-one relationship
> dimensionsseries =relationship('PlantDimensionsseries',
> innerjoin=True,
> backref=backref('data_computed',
> innerjoin=True))
>
> if__name__ =='__main__':
>
> engine
> =create_engine('postgresql://nurseryacme_employee@localhost:5432/nurseryacme')
> Session=sessionmaker(bind=engine)
> session =Session()
>
> # real query (almost)
> printsession.query(Plant).\
> outerjoin(Plant.plant_dimensionsseries).\
>
> outerjoin(Dimensionsseriestype,PlantDimensionsseries.dimensionsseriestype).\
>
> options(contains_eager(Plant.plant_dimensionsseries,PlantDimensionsseries.dimensionsseriestype)).\
>
> options(joinedload(Plant.plant_dimensionsseries,PlantDimensionsseries.data_computed)).\
> order_by(Dimensionsseriestype.sortindex)

if you're looking to joinedload() just
PlantDimentionssseries.data_computed and have the contains_eager() take
effect for the Plant.plant_dimensionseries part, and the issue is that
you want this to be two separate sets of options, you can do this:

options(contains_eager(Plant.plant_dimensionsseries)).
options(defaultload(Plant.plant_dimensionseries).joinedload(PlantDimensionsSeries.data_computed,
innerjoin=False))

if you need to use contains_eager() for one segment and joinedload() for
the second, then you need to give it innerjoin=False, this won't be
auto-detected.
Reply all
Reply to author
Forward
0 new messages