Hi guys,
I'm doing some performance testing with alluxio 2.0, I found the underfs read throughout is very poor. I have a alluxio cluster ( 3 masters, 56 workers, per workers have about 2000GB sata SSD), and the underfs is a HDFS cluster(160+ datanode, 10PB capacity).
then I try to read date stored in HDFS, and monitor the throughout with prometheus and grafana. I found the underfs read throughout is about 600MB only, (there is no network bandwidth problem), it became worse, (only about 60MB). See the pic below
I didn't found any error info from worker.log/worker.out and master.log.
My questions is, how can I debug where is the problem? how to improve the read throughout of underfs?
alluxio-site.properties is as below:
alluxio.debug=true
alluxio.fuse.cached.paths.max=5000
alluxio.fuse.debug.enabled=true
alluxio.fuse.user.group.translation.enabled=true
alluxio.home=/usr/local/alluxio/
alluxio.job.master.client.threads=4096
alluxio.job.master.job.capacity=10000000
alluxio.job.master.lost.worker.interval=5s
alluxio.job.master.worker.heartbeat.interval=60s
alluxio.job.master.worker.timeout=60s
alluxio.locality.compare.node.ip=true
alluxio.logs.dir=/var/log/alluxio/
alluxio.underfs.hdfs.configuration=/usr/local/alluxio/conf/core-site.xml:/usr/local/alluxio/conf/hdfs-site.xml
alluxio.underfs.hdfs.remote=true
alluxio.underfs.listing.length=10000
alluxio.underfs.object.store.service.threads=200
alluxio.webui.cors.enabled=true
alluxio.zookeeper.address=aaa:2181,bbb:2181,ccc2181,ddd:2181,eee:2181
alluxio.zookeeper.enabled=true
alluxio.zookeeper.session.timeout=300s
## MASTER configurations
alluxio.master.activesync.interval=60s
alluxio.master.activesync.maxactivity=50
alluxio.master.audit.logging.queue.capacity=1000000
alluxio.master.backup.directory=/mnt/glusterfs-alluxio-ksyun/alluxio/backup
alluxio.master.daily.backup.enabled=true
alluxio.master.daily.backup.time=01:00
alluxio.master.executor.parallelism=36
alluxio.master.journal.checkpoint.period.entries=20000000
alluxio.master.journal.flush.batch.time=10s
alluxio.master.journal.folder=/mnt/glusterfs-alluxio-ksyun/alluxio/journal
alluxio.master.journal.log.size.bytes.max=500MB
alluxio.master.journal.type=UFS
alluxio.master.jvm.monitor.enabled=true
alluxio.master.metastore=ROCKS
alluxio.master.metastore.dir=/mnt/nvme
alluxio.master.metastore.inode.cache.evict.batch.size=100000
alluxio.master.metastore.inode.cache.high.water.mark.ratio=0.8
alluxio.master.metastore.inode.cache.low.water.mark.ratio=0.5
alluxio.master.metastore.inode.cache.max.size=100000000
alluxio.master.metastore.inode.enumerator.buffer.count=100000
alluxio.master.mount.table.root.ufs=/mnt/alluxio/data/
alluxio.master.periodic.block.integrity.check.interval=1hr
alluxio.master.rpc.executor.max.pool.size=100000
alluxio.master.ufs.block.location.cache.capacity=100000000
alluxio.master.ufs.path.cache.capacity=1000000
alluxio.master.ufs.path.cache.threads=128
alluxio.master.worker.threads.max=51200
alluxio.master.worker.threads.min=2560
## WORKER configurations
alluxio.worker.block.heartbeat.interval=60s
alluxio.worker.block.heartbeat.timeout=2min
alluxio.worker.block.master.client.pool.size=100
alluxio.worker.block.threads.max=4096
alluxio.worker.data.server.domain.socket.address=/usr/local/alluxio/run/
alluxio.worker.data.server.domain.socket.as.uuid=true
alluxio.worker.file.buffer.size=1G
alluxio.worker.filesystem.heartbeat.interval=60s
alluxio.worker.jvm.monitor.enabled=true
alluxio.worker.memory.size=20GB
alluxio.worker.network.async.cache.manager.threads.max=64
alluxio.worker.network.block.reader.threads.max=4096
alluxio.worker.network.flowcontrol.window=16MB
alluxio.worker.network.max.inbound.message.size=16MB
alluxio.worker.network.reader.buffer.size=16MB
alluxio.worker.network.reader.max.chunk.size.bytes=512MB
alluxio.worker.tieredstore.block.lock.readers=4000
alluxio.worker.tieredstore.levels=1
alluxio.worker.tieredstore.level0.alias=SSD
alluxio.worker.tieredstore.level0.dirs.path=/mnt/ssd
alluxio.worker.tieredstore.level0.dirs.quota=2300GB
alluxio.worker.tieredstore.level0.watermark.high.ratio=0.95
alluxio.worker.tieredstore.level0.watermark.low.ratio=0.5
## USER configurations
alluxio.user.block.master.client.threads=128
alluxio.user.block.remote.read.buffer.size.bytes=1GB
alluxio.user.block.size.bytes.default=128MB
alluxio.user.block.worker.client.pool.size=2048
alluxio.user.file.buffer.bytes=128MB
alluxio.user.file.master.client.threads=128
alluxio.user.file.metadata.load.type=ONCE
alluxio.user.file.metadata.sync.interval=-1
alluxio.user.file.passive.cache.enabled=true
alluxio.user.local.reader.chunk.size.bytes=128MB
alluxio.user.metrics.collection.enabled=true
alluxio.user.network.data.timeout=60s
alluxio.user.network.max.inbound.message.size=1GB
alluxio.user.network.netty.worker.threads=16
alluxio.user.network.reader.buffer.size.messages=64
alluxio.user.network.reader.chunk.size.bytes=128MB
alluxio.user.rpc.retry.base.sleep=5s
alluxio.user.rpc.retry.max.duration=2min
alluxio.user.short.circuit.enabled=true
alluxio.user.ufs.delegation.read.buffer.size.bytes=128MB
## RESOURCE configurations
## SECURITY configurations
alluxio.security.group.mapping.class=alluxio.security.group.provider.ShellBasedUnixGroupsMapping
alluxio.security.login.impersonation.username=_NONE_