PS....
I am writting to kafka on my one broker with replication =1 and number of partitions set to 2
I am randonly writing to a kafka partition.
I have two realtime servers.
"shardSpec": {
"type": "linear",
"partitionNum": 0
},
"shardSpec": {
"type": "linear",
"partitionNum": 1
},
I would expect that when I write to kafka the realtime server with the matching partition would always get the the data...but..its randon in the sense then if I write to partion 0 on kafka...realtine server with "partitionNum": 1 can get the data. What am I missing here? Below is my spec flile
{
"dataSchema" : {
"dataSource" : "datasource-topic-test",
"parser" : {
"type" : "string",
"parseSpec" : {
"format" : "json",
"timestampSpec" : {
"column": "utcdt",
"format": "iso"
},
"dimensionsSpec" : {
"dimensions": [],
"dimensionExclusions" : [],
"spatialDimensions" : []
}
}
},
"metricsSpec" : [{
"type" : "count",
"name" : "test"
}],
"granularitySpec" : {
"type" : "uniform",
"segmentGranularity" : "minute",
"queryGranularity" : "NONE"
}
},
"ioConfig" : {
"type" : "realtime",
"firehose": {
"type": "kafka-0.8",
"consumerProps": {
"zookeeper.connect": "<%=@zookeeper%>",
"
zookeeper.connection.timeout.ms" : "15000",
"
zookeeper.session.timeout.ms" : "15000",
"
zookeeper.sync.time.ms" : "5000",
"
group.id": "topic-pixel-<%=@environment%>",
"fetch.message.max.bytes" : "1048586",
"auto.offset.reset": "largest",
"auto.commit.enable": "false"
},
"feed": "topic-test-<%=@environment%>"
},
"plumber": {
"type": "realtime"
}
},
"tuningConfig": {
"shardSpec": {
"type": "linear",
"partitionNum": <%=@partitionNum%>
},
"type" : "realtime",
"maxRowsInMemory": 500000,
"intermediatePersistPeriod": "PT10m",
"windowPeriod": "PT10m",
"basePersistDirectory": "\/tmp\/realtime\/basePersist",
"rejectionPolicy": {
"type": "serverTime"