-- The script is all done in python using the cassandra-driver package.The export is handled by one instance which scans the Redis for the keys it wants and puts them on a multiprocessing queue. Then each child process (32 total) pulls a key of the queue, gets the values for that key from Redis, creates a Cluster session and executes the insert.
Potential problem is creating a unique session for each insert but when I tried creating one session per child process, I ran into problems running the script for an hour without a timeout occurring and breaking the entire process.
-- I terminated the cluster yesterday but I'll be running some more tests today and I'll paste any updates here.
SCHEMA:
CREATE KEYSPACE record_store WITH replication = {'class': 'NetworkTopologyStrategy', 'US-EAST': '2'} AND durable_writes = true;
CREATE TABLE records (user uuid,
attributes set<int>,
items set<int>,
primary key (user))
WITH bloom_filter_fp_chance = 0.01
AND caching = '{"keys": "ALL", "rows_per_partition":"NONE"}'
AND comment = ''
AND compaction = {'class': 'org.apache.cassandra.db.compaction.SizeTieredCompactionStrategy'}
AND compression = {'sstable_compression': 'org.apache.cassandra.io.compress.LZ4Compressor'}
AND dclocal_read_repair_chance = 0.0
AND default_time_to_live = 0.0
AND gc_grace_seconds = 604800
AND max_index_interval = 2048
AND memtable_flush_period_in_ms = 0
AND min_index_interval = 128
AND read_repair_chance = 0.1
AND speculative_retry = '99.0PERCENTILE';