We had an incident earlier that when one of our client had improper retry policy that leads to huge wave of requests to the server. Then all clients started to see `this StatusRuntimeException: UNKNOWN: channel closed` at client side.
@400000005f0744ef2096858c java.util.concurrent.CompletionException: io.grpc.StatusRuntimeException: UNKNOWN: channel closed
@400000005f0744ef2096858c Channel Pipeline: [SslHandler#0, ProtocolNegotiators$ClientTlsHandler#0, WriteBufferingAndExceptionHandler#0, DefaultChannelPipeline$TailContext#0]
@400000005f0744ef20968974 at java.util.concurrent.CompletableFuture.encodeThrowable(CompletableFuture.java:331) ~[?:?]
@400000005f0744ef2096df64 at java.util.concurrent.CompletableFuture.completeThrowable(CompletableFuture.java:346) ~[?:?]
@400000005f0744ef2096df64 at java.util.concurrent.CompletableFuture$UniApply.tryFire(CompletableFuture.java:632) ~[?:?]
@400000005f0744ef2096e34c at java.util.concurrent.CompletableFuture.postComplete(CompletableFuture.java:506) [?:?]
@400000005f0744ef2096e34c at java.util.concurrent.CompletableFuture.completeExceptionally(CompletableFuture.java:2088) [?:?]
@400000005f0744ef2096ef04 at io.grpc.stub.ClientCalls$StreamObserverToCallListenerAdapter.onClose(ClientCalls.java:449) [grpc-stub-1.27.1.jar:1.27.1]
@400000005f0744ef2096fabc at io.grpc.internal.ClientCallImpl.closeObserver(ClientCallImpl.java:426) [grpc-core-1.29.0.jar:1.29.0]
@400000005f0744ef2096fea4 at io.grpc.internal.ClientCallImpl.access$500(ClientCallImpl.java:66) [grpc-core-1.29.0.jar:1.29.0]
@400000005f0744ef2096fea4 at io.grpc.internal.ClientCallImpl$ClientStreamListenerImpl.close(ClientCallImpl.java:689) [grpc-core-1.29.0.jar:1.29.0]
@400000005f0744ef20970674 at io.grpc.internal.ClientCallImpl$ClientStreamListenerImpl.access$900(ClientCallImpl.java:577) [grpc-core-1.29.0.jar:1.29.0]
@400000005f0744ef20970a5c at io.grpc.internal.ClientCallImpl$ClientStreamListenerImpl$1StreamClosed.runInternal(ClientCallImpl.java:751) [grpc-core-1.29.0.jar:1.29.0]
@400000005f0744ef20970a5c at io.grpc.internal.ClientCallImpl$ClientStreamListenerImpl$1StreamClosed.runInContext(ClientCallImpl.java:740) [grpc-core-1.29.0.jar:1.29.0]
@400000005f0744ef2097604c at io.grpc.internal.ContextRunnable.run(ContextRunnable.java:37) [grpc-core-1.29.0.jar:1.29.0]
@400000005f0744ef20976434 at io.grpc.internal.SerializingExecutor.run(SerializingExecutor.java:123) [grpc-core-1.29.0.jar:1.29.0]
@400000005f0744ef20976434 at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128) [?:?]
@400000005f0744ef20976fec at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628) [?:?]
@400000005f0744ef20976fec at java.lang.Thread.run(Thread.java:834) [?:?]
@400000005f0744ef20976fec Caused by: io.grpc.StatusRuntimeException: UNKNOWN: channel closed
@400000005f0744ef209773d4 Channel Pipeline: [SslHandler#0, ProtocolNegotiators$ClientTlsHandler#0, WriteBufferingAndExceptionHandler#0, DefaultChannelPipeline$TailContext#0]
@400000005f0744ef209777bc at io.grpc.Status.asRuntimeException(Status.java:533) ~[grpc-api-1.29.0.jar:1.29.0]
@400000005f0744ef209777bc ... 12 more
Caused by: java.nio.channels.ClosedChannelException
at io.grpc.netty.shaded.io.grpc.netty.Utils.statusFromThrowable(Utils.java:168)
at io.grpc.netty.shaded.io.grpc.netty.NettyClientTransport$5.operationComplete(NettyClientTransport.java:267)
at io.grpc.netty.shaded.io.grpc.netty.NettyClientTransport$5.operationComplete(NettyClientTransport.java:261)
at io.grpc.netty.shaded.io.netty.util.concurrent.DefaultPromise.notifyListener0(DefaultPromise.java:511)
at io.grpc.netty.shaded.io.netty.util.concurrent.DefaultPromise.notifyListenersNow(DefaultPromise.java:485)
at io.grpc.netty.shaded.io.netty.util.concurrent.DefaultPromise.notifyListeners(DefaultPromise.java:424)
at io.grpc.netty.shaded.io.netty.util.concurrent.DefaultPromise.setFailure(DefaultPromise.java:112)
java.util.concurrent.ExecutionException: io.grpc.StatusRuntimeException: UNKNOWN: channel closed
at java.util.concurrent.CompletableFuture.reportGet(CompletableFuture.java:357) ~[?:1.8.0_92]
at java.util.concurrent.CompletableFuture.get(CompletableFuture.java:1895) ~
Caused by: io.grpc.StatusRuntimeException: UNKNOWN: channel closed
at io.grpc.Status.asRuntimeException(Status.java:533)
at io.grpc.stub.ClientCalls$StreamObserverToCallListenerAdapter.onClose(ClientCalls.java:442) io.grpc.PartialForwardingClientCallListener.onClose(PartialForwardingClientCallListener.java:39) io.grpc.ForwardingClientCallListener.onClose(ForwardingClientCallListener.java:23) ~[sync-bid-strategy.jar:?]
at io.grpc.ForwardingClientCallListener$SimpleForwardingClientCallListener.onClose(ForwardingClientCallListener.java:40) ~[sync-bid-strategy.jar:?]
at io.grpc.internal.CensusStatsModule$StatsClientInterceptor$1$1.onClose(CensusStatsModule.java:700) ~[sync-bid-strategy.jar:?]
at io.grpc.PartialForwardingClientCallListener.onClose(PartialForwardingClientCallListener.java:39)
at io.grpc.ForwardingClientCallListener.onClose(ForwardingClientCallListener.java:23)
at io.grpc.ForwardingClientCallListener$SimpleForwardingClientCallListener.onClose(ForwardingClientCallListener.java:40)
at io.grpc.internal.CensusTracingModule$TracingClientInterceptor$1$1.onClose(CensusTracingModule.java:399)
at io.grpc.internal.ClientCallImpl.closeObserver(ClientCallImpl.java:507)
at io.grpc.internal.ClientCallImpl.access$300(ClientCallImpl.java:66)
at io.grpc.internal.ClientCallImpl$ClientStreamListenerImpl.close(ClientCallImpl.java:627)
at io.grpc.internal.ClientCallImpl$ClientStreamListenerImpl.access$700(ClientCallImpl.java:515)
at io.grpc.internal.ClientCallImpl$ClientStreamListenerImpl$1StreamClosed.runInternal(ClientCallImpl.java:686)
at io.grpc.internal.ClientCallImpl$ClientStreamListenerImpl$1StreamClosed.runInContext(ClientCallImpl.java:675)
at io.grpc.internal.ContextRunnable.run(ContextRunnable.java:37)
at io.grpc.internal.SerializingExecutor.run(SerializingExecutor.java:123)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) ~[?:1.8.0_92]
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) ~[?:1.8.0_92]
at java.lang.Thread.run(Thread.java:745) ~[?:1.8.0_92]
Caused by: java.nio.channels.ClosedChannelException
at io.grpc.netty.shaded.io.grpc.netty.Utils.statusFromThrowable(Utils.java:202) ~
at io.grpc.netty.shaded.io.grpc.netty.WriteBufferingAndExceptionHandler.exceptionCaught(WriteBufferingAndExceptionHandler.java:86)
at io.grpc.netty.shaded.io.netty.channel.AbstractChannelHandlerContext.invokeExceptionCaught(AbstractChannelHandlerContext.java:297)
at io.grpc.netty.shaded.io.netty.channel.AbstractChannelHandlerContext.invokeExceptionCaught(AbstractChannelHandlerContext.java:276)
at io.grpc.netty.shaded.io.netty.channel.AbstractChannelHandlerContext.fireExceptionCaught(AbstractChannelHandlerContext.java:268)
at io.grpc.netty.shaded.io.grpc.netty.ProtocolNegotiators$ClientTlsHandler.userEventTriggered(ProtocolNegotiators.java:373) io.grpc.netty.shaded.io.netty.channel.AbstractChannelHandlerContext.invokeUserEventTriggered(AbstractChannelHandlerContext.java:341)
at io.grpc.netty.shaded.io.netty.channel.AbstractChannelHandlerContext.invokeUserEventTriggered(AbstractChannelHandlerContext.java:327)
at io.grpc.netty.shaded.io.netty.channel.AbstractChannelHandlerContext.fireUserEventTriggered(AbstractChannelHandlerContext.java:319)
at io.grpc.netty.shaded.io.netty.handler.ssl.SslUtils.handleHandshakeFailure(SslUtils.java:348)
at io.grpc.netty.shaded.io.netty.handler.ssl.SslHandler.setHandshakeFailure(SslHandler.java:1792)
at io.grpc.netty.shaded.io.netty.handler.ssl.SslHandler.channelInactive(SslHandler.java:1073)
at io.grpc.netty.shaded.io.netty.channel.AbstractChannelHandlerContext.invokeChannelInactive(AbstractChannelHandlerContext.java:257)
at io.grpc.netty.shaded.io.netty.channel.AbstractChannelHandlerContext.invokeChannelInactive(AbstractChannelHandlerContext.java:243)
at io.grpc.netty.shaded.io.netty.channel.AbstractChannelHandlerContext.fireChannelInactive(AbstractChannelHandlerContext.java:236)
at io.grpc.netty.shaded.io.netty.channel.DefaultChannelPipeline$HeadContext.channelInactive(DefaultChannelPipeline.java:1403)
at io.grpc.netty.shaded.io.netty.channel.AbstractChannelHandlerContext.invokeChannelInactive(AbstractChannelHandlerContext.java:257)
at io.grpc.netty.shaded.io.netty.channel.AbstractChannelHandlerContext.invokeChannelInactive(AbstractChannelHandlerContext.java:243)
at io.grpc.netty.shaded.io.netty.channel.DefaultChannelPipeline.fireChannelInactive(DefaultChannelPipeline.java:912)
at io.grpc.netty.shaded.io.netty.channel.AbstractChannel$AbstractUnsafe$8.run(AbstractChannel.java:827)
at io.grpc.netty.shaded.io.netty.util.concurrent.AbstractEventExecutor.safeExecute(AbstractEventExecutor.java:163)
at io.grpc.netty.shaded.io.netty.util.concurrent.SingleThreadEventExecutor.runAllTasks(SingleThreadEventExecutor.java:405)
at io.grpc.netty.shaded.io.netty.channel.epoll.EpollEventLoop.run(EpollEventLoop.java:338)
at io.grpc.netty.shaded.io.netty.util.concurrent.SingleThreadEventExecutor$5.run(SingleThreadEventExecutor.java:906)
at io.grpc.netty.shaded.io.netty.util.internal.ThreadExecutorMap$2.run(ThreadExecutorMap.java:74)
at io.grpc.netty.shaded.io.netty.util.concurrent.FastThreadLocalRunnable.run(FastThreadLocalRunnable.java:30)
at java.lang.Thread.run(Thread.java:745) ~[?:1.8.0_92]
Caused by: java.nio.channels.ClosedChannelException
at io.grpc.netty.shaded.io.netty.handler.ssl.SslHandler.channelInactive(...)(Unknown Source)
The load is probably not very very high ~40RPS, but most of these requests are server streaming and will transmit fairly large amount of data, meaning that it will take considerable amount of time for each request to complete.
Are we hitting some kind of limitation of our server? I thought it could be `MAX_CONCURRENT_STREAM`, but not sure the `MAX_CONCURRENT_STREAM` applies to a client's connection to server, or the server's connections overall.
We are using grpc-netty-shaded(1.27.1).