DB2 V9.5 FP7,
db2 "select B.NAME, COUNT(distinct A.ID), COUNT(A.ID) from (values (1),
(2),(3),(4)) as A (ID), (values (1,'A'),(1,'A'),(2,'B'),(3,'A')) as B
(ID, NAME) where A.ID = B.ID group by B.NAME"
NAME 2 3
---- ----------- -----------
A 2 3
B 1 1
I am trying to achieve the same using partitions:
select B.NAME, COUNT (distinct A.ID) over (partition by B.NAME),
COUNT(A.ID) over (partition by B.NAME) from (values (1),(2),(3),(4))
as A (ID), (values (1,'A'),(1,'A'),(2,'B'),(3,'A')) as B (ID, NAME)
where A.ID = B.ID
SQL0441N Invalid use of keyword DISTINCT or ALL with function
"COUNT".
SQLSTATE=42601
I am unable to use distinct with partitions. Is this feature not
supported ? Any available workarounds?
Thanks,
-M
Note:
I like to use capital letters for keywords and small letters(or mixed)
for others.
And I think that it is usual habit in SQL.
------------------------------ Commands Entered
------------------------------
WITH
a(id) AS (
VALUES (1), (2), (3), (4)
)
, b(id, name) AS (
VALUES (1, 'A'), (1, 'A'), (2, 'B'), (3, 'A')
)
SELECT b.name
, MAX( DENSE_RANK()
OVER(PARTITION BY b.name
ORDER BY a.id)
)
OVER(PARTITION BY b.name) AS count_distinct_a_id
, COUNT(a.id)
OVER(PARTITION BY b.name) AS count_a_id
, a.id AS a_id
FROM a
, b
WHERE a.id = b.id
ORDER BY
b.name
;
------------------------------------------------------------------------------
NAME COUNT_DISTINCT_A_ID COUNT_A_ID A_ID
---- -------------------- ----------- -----------
A 2 3 1
A 2 3 1
A 2 3 3
B 1 1 2
4 record(s) selected.
Is there a quick way to skip null values from DENSE_RANK, without
having to do a query on top of the results ?
Thanks, -M
As a general(apart from your query), if you want to use DENSE_RANK for
a workaround of COUNT(DISTINCT ...) OVER(...),
you can use
ORDER BY COALESCE( <id> , MIN(<id>) OVER(PARTITION BY name) )
instead of
ORDER BY <id>
in DENSE_RANK.
Here is an example:
------------------------------ Commands Entered
------------------------------
WITH
b(id, name) AS (
VALUES (1, 'A'), (1, 'A'), (2, 'B'), (CAST(NULL AS INT), 'A')
)
SELECT name
, MAX( DENSE_RANK()
OVER(PARTITION BY name
ORDER BY COALESCE( id , MIN(id) OVER(PARTITION
BY name) )
)
)
OVER(PARTITION BY name) AS count_distinct_id
, COUNT(id)
OVER(PARTITION BY name) AS count_id
, id
FROM b
ORDER BY
name
;
------------------------------------------------------------------------------
NAME COUNT_DISTINCT_ID COUNT_ID ID
---- -------------------- ----------- -----------
A 1 2 1
A 1 2 1
A 1 2 -
------------------------------ Commands Entered
------------------------------
WITH
b(id, name) AS (
VALUES (1, 'A'), (1, 'A'), (2, 'B'), (CAST(NULL AS INT), 'A')
)
SELECT name
, MAX( DENSE_RANK()
OVER(PARTITION BY name
ORDER BY COALESCE( id , MIN(id) OVER(PARTITION BY
name) )
)
)
OVER(PARTITION BY name) AS count_distinct_id
, COUNT(id)
OVER(PARTITION BY name) AS count_id
, COUNT(*)
OVER(PARTITION BY name) AS count_rows
, id
FROM b
ORDER BY
name
;
------------------------------------------------------------------------------
NAME COUNT_DISTINCT_ID COUNT_ID COUNT_ROWS ID
---- -------------------- ----------- ----------- -----------
A 1 2 3 1
A 1 2 3 1
A 1 2 3 -
B 1 1 1 2
4 record(s) selected.
I found another expression which worked even if all values of the
count column in a partition were null,
------------------------------ Commands Entered
------------------------------
WITH
b(id, name) AS (
VALUES (CAST(NULL AS INT), 'A'), (CAST(NULL AS INT), 'A')
, (2, 'B'), (CAST(NULL AS INT), 'A'), (CAST(NULL AS INT), 'B')
, (5, 'C'), (CAST(NULL AS INT), 'C')
, (5, 'C'), (CAST(NULL AS INT), 'C'), (7, 'C')
)
SELECT name
, COUNT( CASE ROW_NUMBER()
OVER(PARTITION BY name , id)
WHEN 1 THEN id
END
)
OVER(PARTITION BY name) AS count_distinct_id_2
, COUNT(id)
OVER(PARTITION BY name) AS count_id
, COUNT(*)
OVER(PARTITION BY name) AS count_rows
, id
FROM b
ORDER BY
name
;
------------------------------------------------------------------------------
NAME COUNT_DISTINCT_ID_2 COUNT_ID COUNT_ROWS ID
---- ------------------- ----------- ----------- -----------
A 0 0 3 -
A 0 0 3 -
A 0 0 3 -
B 1 1 2 2
B 1 1 2 -
C 2 3 5 5
C 2 3 5 5
C 2 3 5 7
C 2 3 5 -
C 2 3 5 -
10 record(s) selected.
Great solutions, Tonkuma. Thanks a lot for all your help.
with T (ID, VALUE) as (values (1,'abc'),(1,'xpto'),(1,cast (null as
varchar(3))), (2,CAST (null as varchar(3))))
select ID, VALUE, MAX( DENSE_RANK() OVER( PARTITION BY ID order by
COALESCE( VALUE, MIN(VALUE) OVER(PARTITION BY ID) ) ) ) OVER
(partition By ID) from T
;
ID 1 has 2 not null values and one null.
ID 2 has only one null value.
This query returns:
ID VALUE 3
----------- ----- --------------------
1 abc 2
1 - 2
1 xpto 2
2 - 1
Which is correct for ID 1 (as it has only two not null values), but is
incorrect for ID 2 ....
What am I missing?
Thanks again.
-M
So, please use "COUNT( CASE ROW_NUMBER() ... END ) OVER(...)",
like ...
------------------------------ Commands Entered
------------------------------
with T (ID, VALUE) as (
values
(1,'abc'), (1,'xpto')
, (1,cast (null as varchar(3)))
, (2,CAST (null as varchar(3)))
)
SELECT id
, value
, COUNT( CASE ROW_NUMBER()
OVER(PARTITION BY id , value)
WHEN 1 THEN value
END
)
OVER(PARTITION BY id) AS count_distinct
FROM t
;
------------------------------------------------------------------------------
ID VALUE COUNT_DISTINCT
----------- ----- --------------
1 abc 2
1 xpto 2
1 - 2
2 - 0
4 record(s) selected.
Note:
You wanted to give a solution which was equivalent to
COUNT(DISTINCT id) OVER(PARTITION BY name)
in your original post.
But, your previous example is corresponds to
COUNT(DISTINCT value) OVER(PARTITION BY id)