SELECT non_interval_query.metadata_value AS non_interval_query_metadata_value,
coalesce(sum(non_interval_query.coalesce_2), 0) AS coalesce_1,
timestamp
FROM
(SELECT generate_series(date_trunc('day', date('2019-01-06T00:00:00+00:00')), date_trunc('day', date('2019-01-12T00:00:00+00:00')), '1 day') AS timestamp) AS time_series
LEFT OUTER JOIN
(SELECT post_metadata_1.metadata_value AS post_metadata_1_metadata_value,
post_metadata_2.metadata_value AS post_metadata_2_metadata_value,
vw_post.created_at AS vw_post_created_at,
coalesce(count(DISTINCT vw_post.id), 0) AS coalesce_1
FROM vw_post
JOIN post_metadata AS post_metadata_1 ON post_metadata_1.post_id = vw_post.id
JOIN post_metadata AS post_metadata_2 ON post_metadata_2.post_id = vw_post.id
WHERE post_metadata_1.metadata_value IN ('<metadata_values>')
AND post_metadata_2.metadata_value IN ('<metadata_value>')
AND vw_post.created_at >= '2019-01-06T00:00:00+00:00'
AND vw_post.created_at <= '2019-01-12T00:00:00+00:00'
AND post_metadata_1.schema_uid = '<schema_uid>'
AND post_metadata_1.metadata_name = '<metadata_name>'
AND post_metadata_2.schema_uid = '<schema_uid>'
AND post_metadata_2.metadata_name = '<metadata_name>'
AND vw_post.license_id IN (<license_ids>)
GROUP BY vw_post.created_at,
post_metadata_1.metadata_value,
post_metadata_2.metadata_value,
vw_post.created_at) AS non_interval_query ON date_trunc('day', created_at) = timestamp;non_interval_query.metadata_value AS non_interval_query_metadata_value" specified at the beginning of the query is ambiguous due to the 2 "metadata_value" selects in the "non_interval_query" subquery. What I'm trying to do is have 2 selects at the top level - one for "non_interval_query.post_metadata_1_metadata_value" and one for "non_interval_query.post_metadata_2_metadata_value".
For reference, here is the code used to generate the above query:
def apply_date_group_by(self, session, query, range_gb_params):
field_name = self.db.get("column")
model = self._object.get("model")
if not field_name or not model:
raise ValueError("Invalid date group by")
gb_column = self._build_column()
interval = range_gb_params.get("interval")
interval_type = range_gb_params.get("interval_type")
time_series = func.generate_series(
func.date_trunc(interval_type, func.date(range_gb_params["start"])),
func.date_trunc(interval_type, func.date(range_gb_params["end"])),
interval,
).label("timestamp")
ts_column = column("timestamp")
time_series_query = session.query(time_series).subquery("time_series")
non_interval_query = query.subquery("non_interval_query")
# have to replace the original gb_column with the 'timestamp' column
# in order to properly merge the dataset into the time series dataset
non_gb_columns, gbs = self._prepare_non_gb_columns(
ts_column, gb_column, non_interval_query.columns
)
# construct query with correct position passed in from `range_gb_params`
query_position = range_gb_params.get("query_index_position", 0)
non_gb_columns.insert(query_position, ts_column)
date_gb_query = session.query(*non_gb_columns).select_from(
time_series_query.outerjoin(
non_interval_query,
func.date_trunc(interval_type, column(field_name)) == ts_column,
)
)
if gbs:
date_gb_query = date_gb_query.group_by(*gbs)
return date_gb_query.order_by(ts_column)
Any help on this would be greatly appreciated!
def _rebuild_non_interval_query_for_group_by(self, session, query):
from sqlalchemy import table, column, select
from sqlalchemy.orm import aliased
from collections import defaultdict, OrderedDict
post_metadata = table(
"post_metadata", column("post_id"), column("metadata_value")
)
campaign_metadata = table(
"campaign_metadata", column("campaign_id"), column("metadata_value")
)
asset_metadata = table(
"asset_metadata", column("asset_id"), column("metadata_value")
)
vw_asset = table("vw_asset", column("id"))
vw_campaign = table("vw_campaign", column("id"))
vw_post = table("vw_post", column("id"))
METADATA_PRIMARY_TABLE_MAP = {
asset_metadata.name: vw_asset,
campaign_metadata.name: vw_campaign,
post_metadata.name: vw_post,
}
METADATA_NAME_TABLE_MAP = {
asset_metadata.name: asset_metadata,
campaign_metadata.name: campaign_metadata,
post_metadata.name: post_metadata,
}
primary_tables = set()
metadata_columns_count = defaultdict(int)
metadata_alias = OrderedDict()
columns = []
for c in query.c._all_columns:
if c.name == "metadata_value":
parent_column = list(c.base_columns)[0]
table = parent_column.table
primary_tables.add(METADATA_PRIMARY_TABLE_MAP[table.name])
metadata_columns_count[METADATA_NAME_TABLE_MAP[table.name]] += 1
alias_number = metadata_columns_count[METADATA_NAME_TABLE_MAP[table.name]]
alias_name = "{}_{}".format(table.name, alias_number)
alias = aliased(parent_column.table, alias_name)
metadata_alias[alias_name] = alias
column = alias.c.metadata_value.label(
"{}_{}_{}".format(table.name, alias_number, "metadata_value")
)
columns.append(column)
else:
columns.append(c)
# start constructing query
non_interval_query = session.query(*columns).select_from(*primary_tables)
for alias_name, alias in metadata_alias.items():
object_type = self._get_object_type_from_metadata_name(alias.original.name)
non_interval_query = (
non_interval_query
.join(
alias,
getattr(alias.c, "{}_id".format(object_type)) == METADATA_PRIMARY_TABLE_MAP[alias.original.name].c.id
)
)
non_interval_query = non_interval_query.subquery("non_interval_query")
return non_interval_querySELECT post_metadata_1.metadata_value AS post_metadata_1_metadata_value,
post_metadata_2.metadata_value AS post_metadata_2_metadata_value,
non_interval_query.created_at,
non_interval_query.coalesce_1 \nFROM
(SELECT post_metadata_3.metadata_value AS metadata_value, post_metadata_4.metadata_value AS metadata_value, vw_post.created_at AS created_at, coalesce(count(DISTINCT vw_post.id), :coalesce_2) AS coalesce_1 \nFROM vw_post
JOIN post_metadata AS post_metadata_3 ON post_metadata_3.post_id = vw_post.id
JOIN post_metadata AS post_metadata_4 ON post_metadata_4.post_id = vw_post.id \nWHERE post_metadata_3.metadata_value IN (:metadata_value_1, :metadata_value_2)
AND post_metadata_4.metadata_value IN (:metadata_value_3, :metadata_value_4)
AND vw_post.created_at >= :created_at_1
AND vw_post.created_at <= :created_at_2
AND post_metadata_3.schema_uid = :schema_uid_1
AND post_metadata_3.metadata_name = :metadata_name_1
AND post_metadata_4.schema_uid = :schema_uid_2
AND post_metadata_4.metadata_name = :metadata_name_2
AND vw_post.license_id IN (:license_id_1, :license_id_2)
GROUP BY vw_post.created_at, post_metadata_3.metadata_value, post_metadata_4.metadata_value, vw_post.created_at) AS non_interval_query,
vw_post
JOIN post_metadata AS post_metadata_1 ON post_metadata_1.post_id = vw_post.id
JOIN post_metadata AS post_metadata_2 ON post_metadata_2.post_id = vw_post.id;query.c._all_columns" are still referencing the original query. I need to figure out how to rebuild those columns as well so that they correctly reference the clauses in the new query.(Pdb++) list(query.c)
[Column('metadata_value', VARCHAR(length=255), table=<non_interval_query>), Column('created_at', TIMESTAMP(), table=<non_interval_query>), <sqlalchemy.sql.elements.ColumnClause at 0x7f15e1f6fef0; %(139731962070520 coalesce)s>](Pdb++) query.c._all_columns
[Column('metadata_value', VARCHAR(length=255), table=<non_interval_query>), Column('metadata_value', VARCHAR(length=255), table=<non_interval_query>), Column('created_at', TIMESTAMP(), table=<non_interval_query>), <sqlalchemy.sql.elements.ColumnClause at 0x7f15e1f6fef0; %(139731962070520 coalesce)s>]