the tuple sketch should do it. The below query is on the wikipedia dataset and does two things
1) distinctcount of distinct users by country who contributed to en.wikipedia and es.wikipedia.
2) mean of the "count" metric by each country (mean = total count/distinct user count)
so if you multiply intersect_mean*intersect_est in the below query you will get count of events of users contributing to both wikipedia channels
This is available only in the native query
{
"dataSource": "wikipedia1",
"queryType": "groupBy",
"intervals": [
"2016-06-27T00:00:00.000Z/2016-06-27T23:00:00.000Z"
],
"granularity": "all",
"aggregations": [
{ "type": "filtered",
"aggregator":
{
"type": "arrayOfDoublesSketch",
"name": "sketch_en",
"fieldName": "user",
"metricColumns" : ["count"],
"nominalEntries": 65536
},
"filter": {
"type": "selector",
"dimension": "channel",
"value": "#en.wikipedia",
"extractionFn": null
}
},
{ "type": "filtered",
"aggregator":
{
"type": "arrayOfDoublesSketch",
"name": "sketch_es",
"fieldName": "user",
"metricColumns" : ["count"],
"nominalEntries": 65536
},
"filter": {
"type": "selector",
"dimension": "channel",
"value": "#es.wikipedia",
"extractionFn": null
}
}
]
,
"postAggregations": [
{"type" : "arrayOfDoublesSketchToMeans",
"name": "intersect_mean",
"field":
{
"type" : "arrayOfDoublesSketchSetOp",
"name": "intersect",
"operation": "INTERSECT",
"nominalEntries":65536,
"fields" : [{"type": "fieldAccess",
"fieldName": "sketch_en"},
{"type": "fieldAccess",
"fieldName": "sketch_es"}
]
}
},
{"type" : "arrayOfDoublesSketchToEstimate",
"name": "intersect_est",
"field":
{
"type" : "arrayOfDoublesSketchSetOp",
"name": "intersect",
"operation": "INTERSECT",
"nominalEntries":65536,
"fields" : [{"type": "fieldAccess",
"fieldName": "sketch_en"},
{"type": "fieldAccess",
"fieldName": "sketch_es"}
]
}
}
],
"dimensions": [
{
"type": "default",
"dimension": "countryName",
"outputName": "countryName"
}
]
}