2017-02-02 11:43:01,926 ERROR logger.type (UnderFileSystemDataServerHandler.java:handleFileReadRequest) - Failed to read ufs file, may have been closed due to a client timeout.
java.net.SocketException: Socket closed
at java.net.SocketInputStream.read(SocketInputStream.java:203)
at java.net.SocketInputStream.read(SocketInputStream.java:141)
at sun.security.ssl.InputRecord.readFully(InputRecord.java:465)
at sun.security.ssl.InputRecord.readV3Record(InputRecord.java:593)
at sun.security.ssl.InputRecord.read(InputRecord.java:532)
at sun.security.ssl.SSLSocketImpl.readRecord(SSLSocketImpl.java:973)
at sun.security.ssl.SSLSocketImpl.readDataRecord(SSLSocketImpl.java:930)
at sun.security.ssl.AppInputStream.read(AppInputStream.java:105)
at java.io.BufferedInputStream.read1(BufferedInputStream.java:284)
at java.io.BufferedInputStream.read(BufferedInputStream.java:345)
at org.apache.commons.httpclient.ContentLengthInputStream.read(ContentLengthInputStream.java:170)
at java.io.FilterInputStream.read(FilterInputStream.java:133)
at org.apache.commons.httpclient.AutoCloseInputStream.read(AutoCloseInputStream.java:108)
at alluxio.org.jets3t.service.io.InterruptableInputStream.read(InterruptableInputStream.java:78)
at alluxio.org.jets3t.service.impl.rest.httpclient.HttpMethodReleaseInputStream.read(HttpMethodReleaseInputStream.java:136)
at java.io.BufferedInputStream.read1(BufferedInputStream.java:284)
at java.io.BufferedInputStream.read(BufferedInputStream.java:345)
at alluxio.underfs.s3.S3InputStream.read(S3InputStream.java:101)
at com.google.common.io.CountingInputStream.read(CountingInputStream.java:62)
at alluxio.underfs.ObjectUnderFileInputStream.read(ObjectUnderFileInputStream.java:75)
at alluxio.worker.netty.UnderFileSystemDataServerHandler.handleFileReadRequest(UnderFileSystemDataServerHandler.java:83)
at alluxio.worker.netty.DataServerHandler.channelRead0(DataServerHandler.java:78)
at alluxio.worker.netty.DataServerHandler.channelRead0(DataServerHandler.java:43)
at io.netty.channel.SimpleChannelInboundHandler.channelRead(SimpleChannelInboundHandler.java:105)
at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:308)
at io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:294)
at io.netty.handler.codec.MessageToMessageDecoder.channelRead(MessageToMessageDecoder.java:103)
at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:308)
at io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:294)
at io.netty.handler.codec.ByteToMessageDecoder.channelRead(ByteToMessageDecoder.java:244)
at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:308)
at io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:294)
at io.netty.channel.DefaultChannelPipeline.fireChannelRead(DefaultChannelPipeline.java:846)
at io.netty.channel.epoll.AbstractEpollStreamChannel$EpollStreamUnsafe.epollInReady(AbstractEpollStreamChannel.java:831)
at io.netty.channel.epoll.EpollEventLoop.processReady(EpollEventLoop.java:346)
at io.netty.channel.epoll.EpollEventLoop.run(EpollEventLoop.java:254)
at io.netty.util.concurrent.SingleThreadEventExecutor$2.run(SingleThreadEventExecutor.java:111)
at java.lang.Thread.run(Thread.java:745)
2017-02-02 11:43:01,927 INFO httpclient.HttpMethodDirector (HttpMethodDirector.java:executeWithRetry) - I/O exception (java.net.SocketException) caught when processing request: Socket Closed
2017-02-02 11:43:01,927 INFO httpclient.HttpMethodDirector (HttpMethodDirector.java:executeWithRetry) - Retrying request
2017-02-02 11:43:02,005 INFO logger.type (FileUtils.java:createStorageDirPath) - Folder /mnt/ramdisk/alluxioworker/.tmp_blocks/648 was created!
2017-02-02 11:43:02,073 ERROR logger.type (UnderFileSystemDataServerHandler.java:handleFileReadRequest) - Failed to read ufs file, may have been closed due to a client timeout.
alluxio.exception.FileDoesNotExistException: Worker fileId 10206415740131496 is invalid. The worker may have crashed or cleaned up the client state due to a timeout.
at alluxio.worker.file.UnderFileSystemManager.getInputStreamAtPosition(UnderFileSystemManager.java:432)
at alluxio.worker.file.DefaultFileSystemWorker.getUfsInputStream(DefaultFileSystemWorker.java:148)
at alluxio.worker.netty.UnderFileSystemDataServerHandler.handleFileReadRequest(UnderFileSystemDataServerHandler.java:77)
at alluxio.worker.netty.DataServerHandler.channelRead0(DataServerHandler.java:78)
at alluxio.worker.netty.DataServerHandler.channelRead0(DataServerHandler.java:43)
at io.netty.channel.SimpleChannelInboundHandler.channelRead(SimpleChannelInboundHandler.java:105)
at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:308)
at io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:294)
at io.netty.handler.codec.MessageToMessageDecoder.channelRead(MessageToMessageDecoder.java:103)
at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:308)
at io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:294)
at io.netty.handler.codec.ByteToMessageDecoder.channelRead(ByteToMessageDecoder.java:244)
at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:308)
at io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:294)
at io.netty.channel.DefaultChannelPipeline.fireChannelRead(DefaultChannelPipeline.java:846)
at io.netty.channel.epoll.AbstractEpollStreamChannel$EpollStreamUnsafe.epollInReady(AbstractEpollStreamChannel.java:831)
at io.netty.channel.epoll.AbstractEpollStreamChannel$EpollStreamUnsafe.epollRdHupReady(AbstractEpollStreamChannel.java:772)
at io.netty.channel.epoll.EpollEventLoop.processReady(EpollEventLoop.java:338)
at io.netty.channel.epoll.EpollEventLoop.run(EpollEventLoop.java:254)
at io.netty.util.concurrent.SingleThreadEventExecutor$2.run(SingleThreadEventExecutor.java:111)
at java.lang.Thread.run(Thread.java:745)
2017-02-02 11:43:02,075 ERROR logger.type (UnderFileSystemDataServerHandler.java:handleFileReadRequest) - Failed to read ufs file, may have been closed due to a client timeout.
alluxio.exception.FileDoesNotExistException: Worker fileId 10206415740131490 is invalid. The worker may have crashed or cleaned up the client state due to a timeout.
at alluxio.worker.file.UnderFileSystemManager.getInputStreamAtPosition(UnderFileSystemManager.java:432)
at alluxio.worker.file.DefaultFileSystemWorker.getUfsInputStream(DefaultFileSystemWorker.java:148)
at alluxio.worker.netty.UnderFileSystemDataServerHandler.handleFileReadRequest(UnderFileSystemDataServerHandler.java:77)
at alluxio.worker.netty.DataServerHandler.channelRead0(DataServerHandler.java:78)
at alluxio.worker.netty.DataServerHandler.channelRead0(DataServerHandler.java:43)
at io.netty.channel.SimpleChannelInboundHandler.channelRead(SimpleChannelInboundHandler.java:105)
at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:308)
at io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:294)
at io.netty.handler.codec.MessageToMessageDecoder.channelRead(MessageToMessageDecoder.java:103)
at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:308)
at io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:294)
at io.netty.handler.codec.ByteToMessageDecoder.channelRead(ByteToMessageDecoder.java:244)
at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:308)
at io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:294)
at io.netty.channel.DefaultChannelPipeline.fireChannelRead(DefaultChannelPipeline.java:846)
at io.netty.channel.epoll.AbstractEpollStreamChannel$EpollStreamUnsafe.epollInReady(AbstractEpollStreamChannel.java:831)
at io.netty.channel.epoll.AbstractEpollStreamChannel$EpollStreamUnsafe.epollRdHupReady(AbstractEpollStreamChannel.java:772)
at io.netty.channel.epoll.EpollEventLoop.processReady(EpollEventLoop.java:338)
at io.netty.channel.epoll.EpollEventLoop.run(EpollEventLoop.java:254)
at io.netty.util.concurrent.SingleThreadEventExecutor$2.run(SingleThreadEventExecutor.java:111)
at java.lang.Thread.run(Thread.java:745)
2017-02-02 11:43:02,184 ERROR logger.type (UnderFileSystemDataServerHandler.java:handleFileReadRequest) - Failed to read ufs file, may have been closed due to a client timeout.
alluxio.exception.FileDoesNotExistException: Worker fileId 10206415740131455 is invalid. The worker may have crashed or cleaned up the client state due to a timeout.
at alluxio.worker.file.UnderFileSystemManager.getInputStreamAtPosition(UnderFileSystemManager.java:432)
at alluxio.worker.file.DefaultFileSystemWorker.getUfsInputStream(DefaultFileSystemWorker.java:148)
at alluxio.worker.netty.UnderFileSystemDataServerHandler.handleFileReadRequest(UnderFileSystemDataServerHandler.java:77)
at alluxio.worker.netty.DataServerHandler.channelRead0(DataServerHandler.java:78)
at alluxio.worker.netty.DataServerHandler.channelRead0(DataServerHandler.java:43)
at io.netty.channel.SimpleChannelInboundHandler.channelRead(SimpleChannelInboundHandler.java:105)
at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:308)
at io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:294)
at io.netty.handler.codec.MessageToMessageDecoder.channelRead(MessageToMessageDecoder.java:103)
at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:308)
at io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:294)
at io.netty.handler.codec.ByteToMessageDecoder.channelRead(ByteToMessageDecoder.java:244)
at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:308)
at io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:294)
at io.netty.channel.DefaultChannelPipeline.fireChannelRead(DefaultChannelPipeline.java:846)
at io.netty.channel.epoll.AbstractEpollStreamChannel$EpollStreamUnsafe.epollInReady(AbstractEpollStreamChannel.java:831)
at io.netty.channel.epoll.EpollEventLoop.processReady(EpollEventLoop.java:346)
at io.netty.channel.epoll.EpollEventLoop.run(EpollEventLoop.java:254)
at io.netty.util.concurrent.SingleThreadEventExecutor$2.run(SingleThreadEventExecutor.java:111)
at java.lang.Thread.run(Thread.java:745)
re S3 response data streams are always fully consumed or closed.
2017-02-02 11:39:35,002 INFO logger.type (FileUtils.java:createStorageDirPath) - Folder /mnt/ramdisk/alluxioworker/.tmp_blocks/743 was created!
2017-02-02 11:39:35,016 INFO logger.type (FileUtils.java:createStorageDirPath) - Folder /mnt/ramdisk/alluxioworker/.tmp_blocks/83 was created!
2017-02-02 11:39:35,162 ERROR logger.type (UnderFileSystemDataServerHandler.java:handleFileReadRequest) - Failed to read ufs file, may have been closed due to a client timeout.
javax.net.ssl.SSLProtocolException: Data received in non-data state: 6
at sun.security.ssl.SSLSocketImpl.readRecord(SSLSocketImpl.java:1109)
at sun.security.ssl.SSLSocketImpl.readDataRecord(SSLSocketImpl.java:930)
at sun.security.ssl.AppInputStream.read(AppInputStream.java:105)
at java.io.BufferedInputStream.read1(BufferedInputStream.java:284)
at java.io.BufferedInputStream.read(BufferedInputStream.java:345)
at org.apache.commons.httpclient.ContentLengthInputStream.read(ContentLengthInputStream.java:170)
at java.io.FilterInputStream.read(FilterInputStream.java:133)
at org.apache.commons.httpclient.AutoCloseInputStream.read(AutoCloseInputStream.java:108)
at alluxio.org.jets3t.service.io.InterruptableInputStream.read(InterruptableInputStream.java:78)
at alluxio.org.jets3t.service.impl.rest.httpclient.HttpMethodReleaseInputStream.read(HttpMethodReleaseInputStream.java:136)
at java.io.BufferedInputStream.read1(BufferedInputStream.java:284)
at java.io.BufferedInputStream.read(BufferedInputStream.java:345)
at alluxio.underfs.s3.S3InputStream.read(S3InputStream.java:101)
at com.google.common.io.CountingInputStream.read(CountingInputStream.java:62)
at alluxio.underfs.ObjectUnderFileInputStream.read(ObjectUnderFileInputStream.java:75)
at alluxio.worker.netty.UnderFileSystemDataServerHandler.handleFileReadRequest(UnderFileSystemDataServerHandler.java:83)
at alluxio.worker.netty.DataServerHandler.channelRead0(DataServerHandler.java:78)
at alluxio.worker.netty.DataServerHandler.channelRead0(DataServerHandler.java:43)
at io.netty.channel.SimpleChannelInboundHandler.channelRead(SimpleChannelInboundHandler.java:105)
at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:308)
at io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:294)
at io.netty.handler.codec.MessageToMessageDecoder.channelRead(MessageToMessageDecoder.java:103)
at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:308)
at io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:294)
at io.netty.handler.codec.ByteToMessageDecoder.channelRead(ByteToMessageDecoder.java:244)
at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:308)
at io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:294)
at io.netty.channel.DefaultChannelPipeline.fireChannelRead(DefaultChannelPipeline.java:846)
at io.netty.channel.epoll.AbstractEpollStreamChannel$EpollStreamUnsafe.epollInReady(AbstractEpollStreamChannel.java:831)
at io.netty.channel.epoll.EpollEventLoop.processReady(EpollEventLoop.java:346)
at io.netty.channel.epoll.EpollEventLoop.run(EpollEventLoop.java:254)
at io.netty.util.concurrent.SingleThreadEventExecutor$2.run(SingleThreadEventExecutor.java:111)
at java.lang.Thread.run(Thread.java:745)
15919,1 94%
My underfs address is a local path and I mount the s3 directory to the Alluxio mount point.
If I free all the data and run the queries again. The query runs fine. Is there any consistency issue while reading from underfs. Could someone help me in figuring the issue?
alluxio.user.network.netty.timeout.ms=60000
We tried to run the queries after that. I have attached the log file below of one of the workers.
Basically, it is giving "socket closed" error. I am receiving different error everytime once I rerun the query after the failure.
I am using s3 directory as the UNDERFS path with the s3a connector.
Then after restarting alluxio and presto with the same config, I tried to test run same query on different amount of data. First I incrementally ran the simple select count(*) from table_name where hour='00'; I increased the number of hours gradually. I was able to run the queries. When I ran count(*) on the whole table the query failed with the same error. I am running the queries through presto.
Could these be due to large number of s3a connections or should I change any settings on Presto client to handle this issue.
2017-02-03 09:50:32,138 ERROR logger.type (BlockDataServerHandler.java:handleBlockReadRequest) - Exception reading block 2231369728
alluxio.exception.BlockDoesNotExistException: lockId 25434 has no lock record
at alluxio.worker.block.BlockLockManager.validateLock(BlockLockManager.java:249)
at alluxio.worker.block.TieredBlockStore.getBlockReader(TieredBlockStore.java:173)
at alluxio.worker.block.DefaultBlockWorker.readBlockRemote(DefaultBlockWorker.java:383)
at alluxio.worker.netty.BlockDataServerHandler.handleBlockReadRequest(BlockDataServerHandler.java:89)
at alluxio.worker.netty.DataServerHandler.channelRead0(DataServerHandler.java:70)
at alluxio.worker.netty.DataServerHandler.channelRead0(DataServerHandler.java:43)
at io.netty.channel.SimpleChannelInboundHandler.channelRead(SimpleChannelInboundHandler.java:105)
at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:308)
at io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:294)
at io.netty.handler.codec.MessageToMessageDecoder.channelRead(MessageToMessageDecoder.java:103)
at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:308)
at io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:294)
at io.netty.handler.codec.ByteToMessageDecoder.channelRead(ByteToMessageDecoder.java:244)
at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:308)
at io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:294)
at io.netty.channel.DefaultChannelPipeline.fireChannelRead(DefaultChannelPipeline.java:846)
at io.netty.channel.epoll.AbstractEpollStreamChannel$EpollStreamUnsafe.epollInReady(AbstractEpollStreamChannel.java:831)
at io.netty.channel.epoll.AbstractEpollStreamChannel$EpollStreamUnsafe.epollRdHupReady(AbstractEpollStreamChannel.java:772)
at io.netty.channel.epoll.EpollEventLoop.processReady(EpollEventLoop.java:338)
at io.netty.channel.epoll.EpollEventLoop.run(EpollEventLoop.java:254)
at io.netty.util.concurrent.SingleThreadEventExecutor$2.run(SingleThreadEventExecutor.java:111)
at java.lang.Thread.run(Thread.java:745)
2017-02-03 09:50:32,258 ERROR logger.type (BlockDataServerHandler.java:handleBlockReadRequest) - Exception reading block 2248146944
alluxio.exception.BlockDoesNotExistException: lockId 25427 has no lock record
at alluxio.worker.block.BlockLockManager.validateLock(BlockLockManager.java:249)
at alluxio.worker.block.TieredBlockStore.getBlockReader(TieredBlockStore.java:173)
at alluxio.worker.block.DefaultBlockWorker.readBlockRemote(DefaultBlockWorker.java:383)
at alluxio.worker.netty.BlockDataServerHandler.handleBlockReadRequest(BlockDataServerHandler.java:89)
at alluxio.worker.netty.DataServerHandler.channelRead0(DataServerHandler.java:70)
at alluxio.worker.netty.DataServerHandler.channelRead0(DataServerHandler.java:43)
at io.netty.channel.SimpleChannelInboundHandler.channelRead(SimpleChannelInboundHandler.java:105)
at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:308)
at io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:294)
at io.netty.handler.codec.MessageToMessageDecoder.channelRead(MessageToMessageDecoder.java:103)
at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:308)
at io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:294)
at io.netty.handler.codec.ByteToMessageDecoder.channelRead(ByteToMessageDecoder.java:244)
at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:308)
at io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:294)
at io.netty.channel.DefaultChannelPipeline.fireChannelRead(DefaultChannelPipeline.java:846)
at io.netty.channel.epoll.AbstractEpollStreamChannel$EpollStreamUnsafe.epollInReady(AbstractEpollStreamChannel.java:831)
at io.netty.channel.epoll.AbstractEpollStreamChannel$EpollStreamUnsafe.epollRdHupReady(AbstractEpollStreamChannel.java:772)
at io.netty.channel.epoll.EpollEventLoop.processReady(EpollEventLoop.java:338)
at io.netty.channel.epoll.EpollEventLoop.run(EpollEventLoop.java:254)
at io.netty.util.concurrent.SingleThreadEventExecutor$2.run(SingleThreadEventExecutor.java:111)
at java.lang.Thread.run(Thread.java:745)
2017-02-03 09:50:32,304 ERROR logger.type (BlockDataServerHandler.java:handleBlockReadRequest) - Exception reading block 1912602624
alluxio.exception.BlockDoesNotExistException: lockId 25428 has no lock record
at alluxio.worker.block.BlockLockManager.validateLock(BlockLockManager.java:249)
at alluxio.worker.block.TieredBlockStore.getBlockReader(TieredBlockStore.java:173)
at alluxio.worker.block.DefaultBlockWorker.readBlockRemote(DefaultBlockWorker.java:383)
at alluxio.worker.netty.BlockDataServerHandler.handleBlockReadRequest(BlockDataServerHandler.java:89)
at alluxio.worker.netty.DataServerHandler.channelRead0(DataServerHandler.java:70)
at alluxio.worker.netty.DataServerHandler.channelRead0(DataServerHandler.java:43)
at io.netty.channel.SimpleChannelInboundHandler.channelRead(SimpleChannelInboundHandler.java:105)
at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:308)
at io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:294)
at io.netty.handler.codec.MessageToMessageDecoder.channelRead(MessageToMessageDecoder.java:103)
at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:308)
at io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:294)
at io.netty.handler.codec.ByteToMessageDecoder.channelRead(ByteToMessageDecoder.java:244)
at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:308)
at io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:294)
at io.netty.channel.DefaultChannelPipeline.fireChannelRead(DefaultChannelPipeline.java:846)
at io.netty.channel.epoll.AbstractEpollStreamChannel$EpollStreamUnsafe.epollInReady(AbstractEpollStreamChannel.java:831)
at io.netty.channel.epoll.AbstractEpollStreamChannel$EpollStreamUnsafe.epollRdHupReady(AbstractEpollStreamChannel.java:772)
at io.netty.channel.epoll.EpollEventLoop.processReady(EpollEventLoop.java:338)
at io.netty.channel.epoll.EpollEventLoop.run(EpollEventLoop.java:254)
at io.netty.util.concurrent.SingleThreadEventExecutor$2.run(SingleThreadEventExecutor.java:111)
at java.lang.Thread.run(Thread.java:745)
2017-02-03 09:50:32,724 ERROR logger.type (BlockDataServerHandler.java:handleBlockReadRequest) - Exception reading block 2080374784
alluxio.exception.BlockDoesNotExistException: lockId 25456 has no lock record
at alluxio.worker.block.BlockLockManager.validateLock(BlockLockManager.java:249)
at alluxio.worker.block.TieredBlockStore.getBlockReader(TieredBlockStore.java:173)
at alluxio.worker.block.DefaultBlockWorker.readBlockRemote(DefaultBlockWorker.java:383)
at alluxio.worker.netty.BlockDataServerHandler.handleBlockReadRequest(BlockDataServerHandler.java:89)
at alluxio.worker.netty.DataServerHandler.channelRead0(DataServerHandler.java:70)
at alluxio.worker.netty.DataServerHandler.channelRead0(DataServerHandler.java:43)
at io.netty.channel.SimpleChannelInboundHandler.channelRead(SimpleChannelInboundHandler.java:105)
at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:308)
at io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:294)
at io.netty.handler.codec.MessageToMessageDecoder.channelRead(MessageToMessageDecoder.java:103)
at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:308)
at io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:294)
at io.netty.handler.codec.ByteToMessageDecoder.channelRead(ByteToMessageDecoder.java:244)
at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:308)
at io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:294)
at io.netty.channel.DefaultChannelPipeline.fireChannelRead(DefaultChannelPipeline.java:846)
at io.netty.channel.epoll.AbstractEpollStreamChannel$EpollStreamUnsafe.epollInReady(AbstractEpollStreamChannel.java:831)
at io.netty.channel.epoll.AbstractEpollStreamChannel$EpollStreamUnsafe.epollRdHupReady(AbstractEpollStreamChannel.java:772)
at io.netty.channel.epoll.EpollEventLoop.processReady(EpollEventLoop.java:338)
at io.netty.channel.epoll.EpollEventLoop.run(EpollEventLoop.java:254)
at io.netty.util.concurrent.SingleThreadEventExecutor$2.run(SingleThreadEventExecutor.java:111)
at java.lang.Thread.run(Thread.java:745)
2017-02-03 09:50:32,726 ERROR logger.type (UnderFileSystemDataServerHandler.java:handleFileReadRequest) - Failed to read ufs file, may have been closed due to a client timeout.
java.net.SocketException: Socket closed
at java.net.SocketInputStream.read(SocketInputStream.java:183)
at java.net.SocketInputStream.read(SocketInputStream.java:121)
at org.apache.http.impl.io.SessionInputBufferImpl.streamRead(SessionInputBufferImpl.java:139)
at org.apache.http.impl.io.SessionInputBufferImpl.read(SessionInputBufferImpl.java:200)
at org.apache.http.impl.io.ContentLengthInputStream.read(ContentLengthInputStream.java:178)
at org.apache.http.conn.EofSensorInputStream.read(EofSensorInputStream.java:137)
at com.amazonaws.internal.SdkFilterInputStream.read(SdkFilterInputStream.java:72)
at com.amazonaws.event.ProgressInputStream.read(ProgressInputStream.java:151)
at com.amazonaws.internal.SdkFilterInputStream.read(SdkFilterInputStream.java:72)
at com.amazonaws.services.s3.model.S3ObjectInputStream.read(S3ObjectInputStream.java:155)
at com.amazonaws.internal.SdkFilterInputStream.read(SdkFilterInputStream.java:72)
at com.amazonaws.internal.SdkFilterInputStream.read(SdkFilterInputStream.java:72)
at com.amazonaws.event.ProgressInputStream.read(ProgressInputStream.java:151)
at java.security.DigestInputStream.read(DigestInputStream.java:161)
at com.amazonaws.services.s3.internal.DigestValidationInputStream.read(DigestValidationInputStream.java:59)
at com.amazonaws.internal.SdkFilterInputStream.read(SdkFilterInputStream.java:72)
at com.amazonaws.services.s3.model.S3ObjectInputStream.read(S3ObjectInputStream.java:155)
at alluxio.underfs.s3a.S3AInputStream.read(S3AInputStream.java:97)
at com.google.common.io.CountingInputStream.read(CountingInputStream.java:62)
at alluxio.underfs.ObjectUnderFileInputStream.read(ObjectUnderFileInputStream.java:75)
at alluxio.worker.netty.UnderFileSystemDataServerHandler.handleFileReadRequest(UnderFileSystemDataServerHandler.java:83)
at alluxio.worker.netty.DataServerHandler.channelRead0(DataServerHandler.java:78)
at alluxio.worker.netty.DataServerHandler.channelRead0(DataServerHandler.java:43)
at io.netty.channel.SimpleChannelInboundHandler.channelRead(SimpleChannelInboundHandler.java:105)
at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:308)
at io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:294)
at io.netty.handler.codec.MessageToMessageDecoder.channelRead(MessageToMessageDecoder.java:103)
at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:308)
at io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:294)
at io.netty.handler.codec.ByteToMessageDecoder.channelRead(ByteToMessageDecoder.java:244)
at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:308)
at io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:294)
at io.netty.channel.DefaultChannelPipeline.fireChannelRead(DefaultChannelPipeline.java:846)
at io.netty.channel.epoll.AbstractEpollStreamChannel$EpollStreamUnsafe.epollInReady(AbstractEpollStreamChannel.java:831)
at io.netty.channel.epoll.EpollEventLoop.processReady(EpollEventLoop.java:346)
at io.netty.channel.epoll.EpollEventLoop.run(EpollEventLoop.java:254)
at io.netty.util.concurrent.SingleThreadEventExecutor$2.run(SingleThreadEventExecutor.java:111)
at java.lang.Thread.run(Thread.java:745)
2017-02-03 09:50:34,156 ERROR logger.type (UnderFileSystemDataServerHandler.java:handleFileReadRequest) - Failed to read ufs file, may have been closed due to a client timeout.
com.amazonaws.AmazonClientException: Unable to verify integrity of data download. Client calculated content hash didn't match hash calculated by Amazon S3. The data may be corrupt.
at com.amazonaws.services.s3.internal.DigestValidationInputStream.validateMD5Digest(DigestValidationInputStream.java:79)
at com.amazonaws.services.s3.internal.DigestValidationInputStream.read(DigestValidationInputStream.java:61)
at com.amazonaws.internal.SdkFilterInputStream.read(SdkFilterInputStream.java:72)
at com.amazonaws.services.s3.model.S3ObjectInputStream.read(S3ObjectInputStream.java:155)
at alluxio.underfs.s3a.S3AInputStream.read(S3AInputStream.java:97)
at com.google.common.io.CountingInputStream.read(CountingInputStream.java:62)
at alluxio.underfs.ObjectUnderFileInputStream.read(ObjectUnderFileInputStream.java:75)
at alluxio.worker.netty.UnderFileSystemDataServerHandler.handleFileReadRequest(UnderFileSystemDataServerHandler.java:83)
at alluxio.worker.netty.DataServerHandler.channelRead0(DataServerHandler.java:78)
at alluxio.worker.netty.DataServerHandler.channelRead0(DataServerHandler.java:43)
at io.netty.channel.SimpleChannelInboundHandler.channelRead(SimpleChannelInboundHandler.java:105)
at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:308)
at io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:294)
at io.netty.handler.codec.MessageToMessageDecoder.channelRead(MessageToMessageDecoder.java:103)
at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:308)
at io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:294)
at io.netty.handler.codec.ByteToMessageDecoder.channelRead(ByteToMessageDecoder.java:244)
at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:308)
at io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:294)
at io.netty.channel.DefaultChannelPipeline.fireChannelRead(DefaultChannelPipeline.java:846)
at io.netty.channel.epoll.AbstractEpollStreamChannel$EpollStreamUnsafe.epollInReady(AbstractEpollStreamChannel.java:831)
at io.netty.channel.epoll.EpollEventLoop.processReady(EpollEventLoop.java:346)
at io.netty.channel.epoll.EpollEventLoop.run(EpollEventLoop.java:254)
at io.netty.util.concurrent.SingleThreadEventExecutor$2.run(SingleThreadEventExecutor.java:111)
at java.lang.Thread.run(Thread.java:745)
org.apache.http.conn.ConnectionPoolTimeoutException: Timeout waiting for connection from pool at org.apache.http.impl.conn.PoolingHttpClientConnectionManager.leaseConnection(PoolingHttpClientConnectionManager.java:286) at org.apache.http.impl.conn.PoolingHttpClientConnectionManager$1.get(PoolingHttpClientConnectionManager.java:263) at sun.reflect.GeneratedMethodAccessor65.invoke(Unknown Source) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:498) at com.amazonaws.http.conn.ClientConnectionRequestFactory$Handler.invoke(ClientConnectionRequestFactory.java:70) at com.amazonaws.http.conn.$Proxy39.get(Unknown Source) at org.apache.http.impl.execchain.MainClientExec.execute(MainClientExec.java:190) at org.apache.http.impl.execchain.ProtocolExec.execute(ProtocolExec.java:184) at org.apache.http.impl.client.InternalHttpClient.doExecute(InternalHttpClient.java:184) at org.apache.http.impl.client.CloseableHttpClient.execute(CloseableHttpClient.java:82) at org.apache.http.impl.client.CloseableHttpClient.execute(CloseableHttpClient.java:55) at com.amazonaws.http.apache.client.impl.SdkHttpClient.execute(SdkHttpClient.java:72) at com.amazonaws.http.AmazonHttpClient.executeOneRequest(AmazonHttpClient.java:787) at com.amazonaws.http.AmazonHttpClient.executeHelper(AmazonHttpClient.java:630) at com.amazonaws.http.AmazonHttpClient.doExecute(AmazonHttpClient.java:405)
at com.amazonaws.http.AmazonHttpClient.executeWithTimer(AmazonHttpClient.java:367) at com.amazonaws.http.AmazonHttpClient.execute(AmazonHttpClient.java:318) at com.amazonaws.services.s3.AmazonS3Client.invoke(AmazonS3Client.java:3787) at com.amazonaws.services.s3.AmazonS3Client.getObject(AmazonS3Client.java:1137) at alluxio.underfs.s3a.S3AInputStream.openStream(S3AInputStream.java:127) at alluxio.underfs.s3a.S3AInputStream.read(S3AInputStream.java:95)
at com.google.common.io.CountingInputStream.read(CountingInputStream.java:62) at alluxio.underfs.ObjectUnderFileInputStream.read(ObjectUnderFileInputStream.java:75) at alluxio.worker.netty.UnderFileSystemDataServerHandler.handleFileReadRequest(UnderFileSystemDataServerHandler.java:83) at alluxio.worker.netty.DataServerHandler.channelRead0(DataServerHandler.java:78) at alluxio.worker.netty.DataServerHandler.channelRead0(DataServerHandler.java:43) at io.netty.channel.SimpleChannelInboundHandler.channelRead(SimpleChannelInboundHandler.java:105) at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:308) at io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:294) at io.netty.handler.codec.MessageToMessageDecoder.channelRead(MessageToMessageDecoder.java:103) at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:308) at io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:294)
Thanks,
Shail Shah
com.amazonaws.AmazonClientException: Unable to verify integrity of data download. Client calculated content hash didn't match hash calculated by Amazon S3. The data may be corrupt.
alluxio.underfs.s3a.request.timeout.ms=0
alluxio.worker.session.timeout.ms=60000000
alluxio.underfs.s3a.socket.timeout.ms=5000000
and also using the following parameters in presto client.
alluxio.user.file.waitcompleted.poll.ms=1000000
alluxio.user.network.netty.timeout.ms=6000000
I think there was some issue due to client timeout or either s3a timeout. We were able to resolve the issue using these parameters.
I am still unable to diagnose a previous issue completely. When I used to get the previously mentioned errors, if I ran the queries that span the same data files after getting the timeout error, it never used to run. It used to throw the same error each and everytime in just 2 seconds. But, if I free those files from memory, and query the same data, sometimes it used to run, sometimes it doesn't. Could you think of any possibility that might lead to this issue? Because if query gets killed due to timeout issue, the query should run the next time irrespective of the availability of data in any tier.
2017-02-06 12:16:20,569 ERROR logger.type (UnderFileSystemDataServerHandler.java:handleFileReadRequest) - Failed to read ufs file, may have been closed due to a client timeout.
alluxio.exception.FileDoesNotExistException: Worker fileId 5018825212407877824 is invalid. The worker may have crashed or cleaned up the client state due to a timeout.
at alluxio.worker.file.UnderFileSystemManager.getInputStreamAtPosition(UnderFileSystemManager.java:432)
at alluxio.worker.file.DefaultFileSystemWorker.getUfsInputStream(DefaultFileSystemWorker.java:148)
at alluxio.worker.netty.UnderFileSystemDataServerHandler.handleFileReadRequest(UnderFileSystemDataServerHandler.java:77)
at alluxio.worker.netty.DataServerHandler.channelRead0(DataServerHandler.java:78)
at alluxio.worker.netty.DataServerHandler.channelRead0(DataServerHandler.java:43)
at io.netty.channel.SimpleChannelInboundHandler.channelRead(SimpleChannelInboundHandler.java:105)
at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:308)
at io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:294)
at io.netty.handler.codec.MessageToMessageDecoder.channelRead(MessageToMessageDecoder.java:103)
at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:308)
at io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:294)
at io.netty.handler.codec.ByteToMessageDecoder.channelRead(ByteToMessageDecoder.java:244)
at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:308)
at io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:294)
at io.netty.channel.DefaultChannelPipeline.fireChannelRead(DefaultChannelPipeline.java:846)
at io.netty.channel.epoll.AbstractEpollStreamChannel$EpollStreamUnsafe.epollInReady(AbstractEpollStreamChannel.java:831)
at io.netty.channel.epoll.EpollEventLoop.processReady(EpollEventLoop.java:346)
at io.netty.channel.epoll.EpollEventLoop.run(EpollEventLoop.java:254)
at io.netty.util.concurrent.SingleThreadEventExecutor$2.run(SingleThreadEventExecutor.java:111)
at java.lang.Thread.run(Thread.java:745)
2017-02-06 12:16:20,913 INFO logger.type (FileUtils.java:createStorageDirPath) - Folder /mnt/ramdisk/alluxioworker/.tmp_blocks/460 was created!
2017-02-06 12:16:21,198 INFO logger.type (FileUtils.java:createStorageDirPath) - Folder /mnt/ramdisk/alluxioworker/.tmp_blocks/255 was created!
2017-02-06 12:16:21,525 ERROR logger.type (UnderFileSystemDataServerHandler.java:handleFileReadRequest) - Failed to read ufs file, may have been closed due to a client timeout.
alluxio.exception.FileDoesNotExistException: Worker fileId 5018825212407877829 is invalid. The worker may have crashed or cleaned up the client state due to a timeout.
at alluxio.worker.file.UnderFileSystemManager.getInputStreamAtPosition(UnderFileSystemManager.java:432)
at alluxio.worker.file.DefaultFileSystemWorker.getUfsInputStream(DefaultFileSystemWorker.java:148)
at alluxio.worker.netty.UnderFileSystemDataServerHandler.handleFileReadRequest(UnderFileSystemDataServerHandler.java:77)
at alluxio.worker.netty.DataServerHandler.channelRead0(DataServerHandler.java:78)
at alluxio.worker.netty.DataServerHandler.channelRead0(DataServerHandler.java:43)
at io.netty.channel.SimpleChannelInboundHandler.channelRead(SimpleChannelInboundHandler.java:105)
at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:308)
at io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:294)
at io.netty.handler.codec.MessageToMessageDecoder.channelRead(MessageToMessageDecoder.java:103)
at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:308)
at io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:294)
at io.netty.handler.codec.ByteToMessageDecoder.channelRead(ByteToMessageDecoder.java:244)
at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:308)
at io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:294)
at io.netty.channel.DefaultChannelPipeline.fireChannelRead(DefaultChannelPipeline.java:846)
at io.netty.channel.epoll.AbstractEpollStreamChannel$EpollStreamUnsafe.epollInReady(AbstractEpollStreamChannel.java:831)
at io.netty.channel.epoll.EpollEventLoop.processReady(EpollEventLoop.java:346)
at io.netty.channel.epoll.EpollEventLoop.run(EpollEventLoop.java:254)
at io.netty.util.concurrent.SingleThreadEventExecutor$2.run(SingleThreadEventExecutor.java:111)
at java.lang.Thread.run(Thread.java:745)
alluxio.master.worker.threads.max=4096
alluxio.master.tieredstore.global.levels=2
alluxio.master.tieredstore.global.level0.alias=MEM
alluxio.master.tieredstore.global.level1.alias=SSD
# Worker properties
alluxio.worker.block.threads.max=4096
alluxio.worker.tieredstore.levels=2
alluxio.worker.tieredstore.level0.alias=MEM
alluxio.worker.tieredstore.level0.dirs.path=/mnt/ramdisk
alluxio.worker.tieredstore.level0.dirs.quota=10GB
alluxio.worker.tieredstore.level1.alias=SSD
alluxio.worker.tieredstore.level1.dirs.path=/data/alluxio
alluxio.worker.tieredstore.level1.dirs.quota=110GB
alluxio.worker.tieredstore.reserver.enabled=false
# User properties
alluxio.underfs.s3a.request.timeout.ms=0
alluxio.worker.session.timeout.ms=36000000
alluxio.underfs.s3a.socket.timeout.ms=36000000
USERS:-
-Dalluxio.user.file.waitcompleted.poll.ms=3600000
-Dalluxio.user.network.netty.timeout.ms=36000000
-Dalluxio.user.file.readtype.default=CACHE