import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import org.neo4j.graphdb.RelationshipType;
import org.neo4j.helpers.collection.MapUtil;
import org.neo4j.kernel.impl.util.FileUtils;
import org.neo4j.unsafe.batchinsert.BatchInserter;
import org.neo4j.unsafe.batchinsert.BatchInserters;
public class GraphImporter {
private long nodeIdx=0;
private Map<Long,Long> nodeMap = new HashMap<Long, Long>();
enum RelType implements RelationshipType {
KNOWS
}
private void createNode(long pnum, BatchInserter db, Map<String, Object> prop) {
if(!nodeMap.containsKey(pnum)) {
nodeIdx++;
nodeMap.put(pnum, nodeIdx);
prop.put("Id", pnum);
db.createNode(nodeIdx, prop);
}
}
private long getNodeNum(long pnum) throws Exception {
if(nodeMap.containsKey(pnum)) {
return nodeMap.get(pnum);
} else {
throw new Exception("Missing person: "+pnum);
}
}
public static void main(String[] args) {
GraphImporter importer = new GraphImporter();
importer.load(args[0], args[1], args[2]);
}
private void load(String vertexFile, String edgeFile, String dbpath) {
BatchInserter db = null;
BufferedReader reader = null;
long timestmp=0;
try {
File graphDb = new File(dbpath);
if (graphDb.exists()) {
FileUtils.deleteRecursively(graphDb);
}
long nodes = 0;
long errorRows = 0;
Map<String, String> config = new HashMap<String, String>();
config = MapUtil.load( new File( "batch.properties" ) );
db = BatchInserters.inserter(dbpath, config);
reader = new BufferedReader(new FileReader(new File(vertexFile)));
System.out.println("Loading nodes..");
reader.readLine();
String line = null;
while ((line = reader.readLine()) != null) {
String[] lineData = line.split(",");
try {
Map<String, Object> prop = new HashMap<String, Object>(10);
prop.put("City", lineData[3].replace("\"", ""));
prop.put("Country", lineData[4].replace("\"", ""));
prop.put("Gender", lineData[5].replace("\"", ""));
createNode(Long.valueOf(lineData[0].replace("\"", "")), db, prop);
} catch (NumberFormatException e) {
errorRows++;
}
nodes++;
if(nodes%1000000==0) {
System.out.println("Nodes: "+nodes+"("+errorRows+"); "+nodeIdx);
}
}
System.out.println("Total nodes: "+nodes);
reader.close();
reader = new BufferedReader(new FileReader(new File(edgeFile)));
System.out.println("Loading edges..");
long node1 = 0;
long node2 = 0;
reader.readLine();
long edges = 0;
errorRows=0;
line = null;
timestmp = System.currentTimeMillis();
while ((line = reader.readLine()) != null) {
String[] lineData = line.split(",");
try {
node1 = getNodeNum(Long.valueOf(lineData[0].replace("\"", "")));
node2 = getNodeNum(Long.valueOf(lineData[1].replace("\"", "")));
db.createRelationship(node1, node2, RelType.KNOWS, null);
} catch (NumberFormatException e) {
errorRows++;
} catch(Exception e) {
e.printStackTrace();
}
edges++;
if(edges%1000000==0) {
long currTimestmp = System.currentTimeMillis();
System.out.println("Edges: "+edges+" ("+errorRows+")"+" time: "+
(currTimestmp - timestmp)/1000);
timestmp = currTimestmp;
}
}
System.out.println("Data successfully imported!");
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} catch (Throwable e) {
e.printStackTrace();
} finally {
try {
if(db != null) {
db.shutdown();
}
if(reader != null) {
reader.close();
}
} catch (Throwable e) {
e.printStackTrace();
}
}
}
}
Batch properties
remote_logging_host=127.0.0.1
forced_kernel_id=
read_only=false
neo4j.ext.udc.host=udc.neo4j.org
logical_log=nioneo_logical.log
online_backup_enabled=false
remote_logging_port=4560
gc_monitor_threshold=200ms
array_block_size=120
load_kernel_extensions=true
neostore.relationshipstore.db.mapped_memory=1000M
node_auto_indexing=false
intercept_committing_transactions=false
keep_logical_logs=true
dump_configuration=true
gc_monitor_wait_time=100ms
cache_type=none
intercept_deserialized_transactions=false
neostore.nodestore.db.mapped_memory=200M
neo4j.ext.udc.first_delay=600000
neo4j.ext.udc.reg=unreg
lucene_searcher_cache_size=2147483647
neo4j.ext.udc.interval=86400000
use_memory_mapped_buffers=true
rebuild_idgenerators_fast=true
neostore.propertystore.db.index.keys.mapped_memory=5M
neostore.propertystore.db.strings.mapped_memory=200M
neostore.propertystore.db.arrays.mapped_memory=130M
neo_store=neostore
logging.threshold_for_rotation=104857600
neostore.propertystore.db.index.mapped_memory=5M
backup_slave=false
neostore.propertystore.db.mapped_memory=2000M
gcr_cache_min_log_interval=60s
relationship_grab_size=100
relationship_auto_indexing=false
string_block_size=120
lucene_writer_cache_size=2147483647
node_cache_array_fraction=1.0
grab_file_lock=true
remote_logging_enabled=false
allow_store_upgrade=false
neo4j.ext.udc.enabled=true
execution_guard_enabled=false
relationship_cache_array_fraction=1.0
online_backup_port=6362
--
--
--
I have the same problem with 10 million nodes and 2 billion relationships. It looks like this:.................................................................................................... 19633 ms for 10000000.................................................................................................... 20871 ms for 10000000.................................................................................................... 22767 ms for 10000000.................................................................................................... 23296 ms for 10000000.................................................................................................... 23286 ms for 10000000.................................................................................................... 23988 ms for 10000000.................................................................................................... 25374 ms for 10000000.................................................................................................... 1197765 ms for 10000000.................................................................................................... 8839674 ms for 10000000.................................................................................................... 15733633 ms for 10000000.................................................................................................... 17917691 ms for 10000000Performance degradation is so drastic that batch importing is unusable. What can I do?I am using https://github.com/jexp/batch-importiotop shows that java process is doing only approx 1Mb/sec writes. CPU is almost always 0%. Memory used (RSS) is 22 Gb.My server has 128Gb of RAM.$ cat batch.propertiesdump_configuration=truecache_type=noneuse_memory_mapped_buffers=trueneostore.propertystore.db.index.keys.mapped_memory=5Gneostore.propertystore.db.index.mapped_memory=5Gneostore.nodestore.db.mapped_memory=100Gneostore.relationshipstore.db.mapped_memory=80Gneostore.propertystore.db.mapped_memory=5Gneostore.propertystore.db.strings.mapped_memory=5G#node_auto_indexing=true#node_keys_indexable=NameAnd I am using 40Gb heap (-Xmx40G).
dump_configuration=truecache_type=noneuse_memory_mapped_buffers=trueneostore.propertystore.db.index.keys.mapped_memory=1Gneostore.propertystore.db.index.mapped_memory=1Gneostore.nodestore.db.mapped_memory=100Mneostore.relationshipstore.db.mapped_memory=60Gneostore.propertystore.db.mapped_memory=30Gneostore.propertystore.db.strings.mapped_memory=10G
--
--
--