[Stage 5:==03:13:44 WARN org.apache.spark.rpc.netty.NettyRpcEnv - Ignored message: HeartbeatResponse(false)
03:14:31 WARN org.apache.spark.rpc.netty.NettyRpcEndpointRef - Error sending message [message = Heartbeat(driver,[Lscala.Tuple2;@3f7a6c8,BlockManagerId(driver, localhost, 40607))] in 1 attempts
org.apache.spark.rpc.RpcTimeoutException: Futures timed out after [10 seconds]. This timeout is controlled by spark.executor.heartbeatInterval
gremlin.graph=org.apache.tinkerpop.gremlin.hadoop.structure.HadoopGraph
gremlin.hadoop.graphInputFormat=org.apache.tinkerpop.gremlin.hadoop.structure.io.script.ScriptInputFormat
gremlin.hadoop.graphOutputFormat=org.apache.tinkerpop.gremlin.hadoop.structure.io.gryo.GryoOutputFormat
gremlin.hadoop.jarsInDistributedCache=true
gremlin.hadoop.inputLocation=data/sample-bulk-import-data
gremlin.hadoop.scriptInputFormat.script=scripts/bulk-import.groovy
storage.batch-loading=true
gremlin.hadoop.defaultGraphComputer=org.apache.tinkerpop.gremlin.spark.process.computer.SparkGraphComputer
gremlin.hadoop.outputLocation=/path/to/persist/location
gremlin.spark.graphStorageLevel=DISK_ONLY
gremlin.spark.persistStorageLevel=DISK_ONLY
#
# SparkGraphComputer Configuration
#
spark.master=local[*]
spark.serializer=org.apache.spark.serializer.KryoSerializer
spark.executor.memory=6g
spark.driver.memory=6g
spark.local.dir=/janusgraph/external/sparkgremlin.graph=org.janusgraph.core.JanusGraphFactory
storage.backend=cassandrathrift
storage.batch-loading=true
storage.cassandra.frame-size-mb=1000
schema.default=none
ids.block-size=25000
storage.hostname=<three IPs for cassandra ring>
storage.cassandra.keyspace=test_graph
storage.read-time=200000
storage.write-time=20000
#I've commented the next two out, but they were used to build the keyspace
#storage.cassandra.replication-strategy-options=asia-southeast1_asia_cassandra,2
#storage.cassandra.replication-strategy-class=org.apache.cassandra.locator.NetworkTopologyStrategy
storage.cassandra.write-consistency-level=ONE
storage.cassandra.read-consistency-level=ONE
#storage.cassandra.atomic-batch-mutate=false
index.edge.backend=lucene
index.edge.directory=/janusgraph/data/edgeindex
# Whether to enable JanusGraph's database-level cache, which is shared
# across all transactions. Enabling this option speeds up traversals by
# holding hot graph elements in memory, but also increases the likelihood
# of reading stale data. Disabling it forces each transaction to
# independently fetch graph elements from storage before reading/writing
# them.
cache.db-cache = true
cache.db-cache-clean-wait = 20
cache.db-cache-time = 180000
cache.db-cache-size = 0.5import groovy.io.FileType
folder = new File('/janusgraph/external/import/adjacency-list')
done_folder = new File('/janusgraph/external/import/done')
folder.eachFileRecurse FileType.FILES, { file ->
if (file.name.endsWith(".csv")) {
println(file.absolutePath)
graph = GraphFactory.open("conf/coral/read-graph.properties")
graph.configuration().setInputLocation(file.absolutePath)
graph.configuration().setProperty("gremlin.hadoop.scriptInputFormat.script", "/janusgraph/scripts/bulk-import.groovy")
blvp = BulkLoaderVertexProgram.build().intermediateBatchSize(10000).writeGraph('conf/coral/write-graph.properties').create(graph)
graph.compute(SparkGraphComputer).program(blvp).submit().get()
graph.close()
file.renameTo(new File(done_folder, file.getName()))
}
}