How to do self-join funnel

53 views
Skip to first unread message

Diz

unread,
Feb 8, 2021, 12:34:48 PM2/8/21
to ClickHouse
In the average SQL dialect if I have an events table and want to sequentially see events for a given user, I'd use a window function with rank over with a partition... I see clickhouse has a rowNumberInAllBlocks() but I don't see a partition out side of the LIMIT over that can be put at the end.

How should I implement a sequential join of events on a user ID in clickhouse? Has anyone done something similar? :)

Denis Zhuravlev

unread,
Feb 9, 2021, 2:47:15 PM2/9/21
to ClickHouse

You can do it using groupArrays and high-order functions
Also check



CREATE TABLE IF NOT EXISTS employee
(
    empid Int32,
    deptid Int32,
    salary Int64
) ENGINE = Memory


INSERT INTO `employee` (`empid`, `deptid`, `salary`) VALUES(1, 10, 5500000),(2, 10, 4500000),(3, 20, 1900000),(4, 20, 4800000),(5, 40, 6500000),(6, 40, 14500000),(7, 40, 44500000),(8, 50, 6500000),(9, 50, 7500000);


SELECT empid, deptid, salary, rank FROM (
   SELECT groupArray(empid) AS empids, groupArray(deptid) AS deptids, groupArray(salary) AS salaries
      FROM (SELECT * FROM employee ORDER BY deptid ASC, salary DESC))
      ARRAY JOIN empids AS empid, deptids AS deptid, salaries AS salary, arrayEnumerateUniq(deptids) AS rank

      ┌─empid─┬─deptid─┬───salary─┬─rank─┐
      │     1 │     10 │  5500000 │    1 │
      │     2 │     10 │  4500000 │    2 │
      │     4 │     20 │  4800000 │    1 │
      │     3 │     20 │  1900000 │    2 │
      │     7 │     40 │ 44500000 │    1 │
      │     6 │     40 │ 14500000 │    2 │
      │     5 │     40 │  6500000 │    3 │
      │     9 │     50 │  7500000 │    1 │
      │     8 │     50 │  6500000 │    2 │
      └───────┴────────┴──────────┴──────┘

select empid, deptid, salary, salaries[1] salary_max_over_dep,  rank
from (
   SELECT deptid, groupArray(empid) AS empids, groupArray(salary) AS salaries
   FROM (SELECT * FROM employee ORDER BY deptid ASC, salary DESC)
   group by deptid )
array Join empids as empid, salaries as salary, arrayEnumerateDense(salaries) as rank
order by deptid, rank

┌─empid─┬─deptid─┬───salary─┬─salary_max_over_dep─┬─rank─┐
│     1 │     10 │  5500000 │             5500000 │    1 │
│     2 │     10 │  4500000 │             5500000 │    2 │
│     4 │     20 │  4800000 │             4800000 │    1 │
│     3 │     20 │  1900000 │             4800000 │    2 │
│     7 │     40 │ 44500000 │            44500000 │    1 │
│     6 │     40 │ 14500000 │            44500000 │    2 │
│     5 │     40 │  6500000 │            44500000 │    3 │
│     9 │     50 │  7500000 │             7500000 │    1 │
│     8 │     50 │  6500000 │             7500000 │    2 │
└───────┴────────┴──────────┴─────────────────────┴──────┘     
Reply all
Reply to author
Forward
0 new messages