Fullgc引發redis超時異常
1、現象
-|2019-11-14 11:50:01.095|ERROR|TID:2254.3964.15737033664569521|DubboServerHandler-192.168.58.20:10880-thread-317|c.s.c.s.p.StoreSkuRpcServiceImpl.selectStoreSkuByOneId:67-查詢店鋪商品信息出錯,skuId=28190648 org.springframework.dao.QueryTimeoutException: Redis command timed out; nested exception is io.lettuce.core.RedisCommandTimeoutException: Command timed out after 6 second(s) at org.springframework.data.redis.connection.lettuce.LettuceExceptionConverter.convert(LettuceExceptionConverter.java:70) at org.springframework.data.redis.connection.lettuce.LettuceExceptionConverter.convert(LettuceExceptionConverter.java:41) at org.springframework.data.redis.PassThroughExceptionTranslationStrategy.translate(PassThroughExceptionTranslationStrategy.java:44) at org.springframework.data.redis.FallbackExceptionTranslationStrategy.translate(FallbackExceptionTranslationStrategy.java:42) at org.springframework.data.redis.connection.lettuce.LettuceConnection.convertLettuceAccessException(LettuceConnection.java:268) at org.springframework.data.redis.connection.lettuce.LettuceStringCommands.convertLettuceAccessException(LettuceStringCommands.java:799) at org.springframework.data.redis.connection.lettuce.LettuceStringCommands.get(LettuceStringCommands.java:68) at org.springframework.data.redis.connection.DefaultedRedisConnection.get(DefaultedRedisConnection.java:253) at sun.reflect.GeneratedMethodAccessor229.invoke(Unknown Source) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:498) at org.springframework.data.redis.core.CloseSuppressingInvocationHandler.invoke(CloseSuppressingInvocationHandler.java:61) at com.sun.proxy.$Proxy188.get(Unknown Source) at com.xxx.chaos.server.common.redis.XxxRedisTemplate$11.doInRedis(XxxRedisTemplate.java:288) at org.springframework.data.redis.core.RedisTemplate.execute(RedisTemplate.java:224) at org.springframework.data.redis.core.RedisTemplate.execute(RedisTemplate.java:184) at org.springframework.data.redis.core.RedisTemplate.execute(RedisTemplate.java:171) at com.sexxcoo.chaos.server.common.redis.XxxRedisTemplate.get(XxxRedisTemplate.java:285) at com.xxx.chaos.server.common.redis.XxxRedisTemplate.gets(XxxRedisTemplate.java:308) at com.xxx.chaos.server.common.redis.XxxRedisTemplate.multiGet2Map(XxxRedisTemplate.java:376) at com.xxx.chaos.server.service.impl.StoreSkuServiceImpl.selectStoreSkuBySkuId(StoreSkuServiceImpl.java:47) at com.xxx.chaos.server.service.impl.StoreSkuServiceImpl$$FastClassBySpringCGLIB$$c1612837.invoke(<generated>) at org.springframework.cglib.proxy.MethodProxy.invoke(MethodProxy.java:218) at org.springframework.aop.framework.CglibAopProxy$CglibMethodInvocation.invokeJoinpoint(CglibAopProxy.java:749) at org.springframework.aop.framework.ReflectiveMethodInvocation.proceed(ReflectiveMethodInvocation.java:163) at org.springframework.aop.interceptor.ExposeInvocationInterceptor.invoke(ExposeInvocationInterceptor.java:93) at org.springframework.aop.framework.ReflectiveMethodInvocation.proceed(ReflectiveMethodInvocation.java:186) at org.springframework.aop.framework.CglibAopProxy$DynamicAdvisedInterceptor.intercept(CglibAopProxy.java:688) at com.xxx.chaos.server.service.impl.StoreSkuServiceImpl$$EnhancerBySpringCGLIB$$d569daea.selectStoreSkuBySkuId(<generated>) at com.xxx.chaos.server.provider.StoreSkuRpcServiceImpl.selectStoreSkuByOneId(StoreSkuRpcServiceImpl.java:62) at com.alibaba.dubbo.common.bytecode.Wrapper6.invokeMethod(Wrapper6.java) at com.alibaba.dubbo.rpc.proxy.javassist.JavassistProxyFactory$1.doInvoke(JavassistProxyFactory.java:47) at com.alibaba.dubbo.rpc.proxy.AbstractProxyInvoker.invoke(AbstractProxyInvoker.java:76) at com.alibaba.dubbo.config.invoker.DelegateProviderMetaDataInvoker.invoke(DelegateProviderMetaDataInvoker.java:52) at com.alibaba.dubbo.rpc.protocol.InvokerWrapper.invoke(InvokerWrapper.java:56) at com.alibaba.dubbo.rpc.filter.ExceptionFilter.invoke(ExceptionFilter.java:62) at com.alibaba.dubbo.rpc.protocol.ProtocolFilterWrapper$1.invoke(ProtocolFilterWrapper.java:72) at com.alibaba.csp.sentinel.adapter.dubbo.SentinelDubboProviderFilter.invoke(SentinelDubboProviderFilter.java:66) at com.alibaba.dubbo.rpc.protocol.ProtocolFilterWrapper$1.invoke(ProtocolFilterWrapper.java:72) at com.alibaba.dubbo.monitor.support.MonitorFilter.invoke$original$b6dOrsVO(MonitorFilter.java:75) at com.alibaba.dubbo.monitor.support.MonitorFilter.invoke$original$b6dOrsVO$accessor$6uvazkPi(MonitorFilter.java) at com.alibaba.dubbo.monitor.support.MonitorFilter$auxiliary$BaEkbyx9.call(Unknown Source) at org.apache.skywalking.apm.agent.core.plugin.interceptor.enhance.InstMethodsInter.intercept(InstMethodsInter.java:93) at com.alibaba.dubbo.monitor.support.MonitorFilter.invoke(MonitorFilter.java) at com.alibaba.dubbo.rpc.protocol.ProtocolFilterWrapper$1.invoke(ProtocolFilterWrapper.java:72) at com.alibaba.dubbo.rpc.filter.TimeoutFilter.invoke(TimeoutFilter.java:42) at com.alibaba.dubbo.rpc.protocol.ProtocolFilterWrapper$1.invoke(ProtocolFilterWrapper.java:72) at com.alibaba.dubbo.rpc.protocol.dubbo.filter.TraceFilter.invoke(TraceFilter.java:78) at com.alibaba.dubbo.rpc.protocol.ProtocolFilterWrapper$1.invoke(ProtocolFilterWrapper.java:72) at com.alibaba.dubbo.rpc.filter.ContextFilter.invoke(ContextFilter.java:72) at com.alibaba.dubbo.rpc.protocol.ProtocolFilterWrapper$1.invoke(ProtocolFilterWrapper.java:72) at com.alibaba.dubbo.rpc.filter.GenericFilter.invoke(GenericFilter.java:131) at com.alibaba.dubbo.rpc.protocol.ProtocolFilterWrapper$1.invoke(ProtocolFilterWrapper.java:72) at com.alibaba.dubbo.rpc.filter.ClassLoaderFilter.invoke(ClassLoaderFilter.java:38) at com.alibaba.dubbo.rpc.protocol.ProtocolFilterWrapper$1.invoke(ProtocolFilterWrapper.java:72) at com.alibaba.dubbo.rpc.filter.EchoFilter.invoke(EchoFilter.java:38) at com.alibaba.dubbo.rpc.protocol.ProtocolFilterWrapper$1.invoke(ProtocolFilterWrapper.java:72) at com.alibaba.dubbo.rpc.protocol.dubbo.DubboProtocol$1.reply(DubboProtocol.java:103) at com.alibaba.dubbo.remoting.exchange.support.header.HeaderExchangeHandler.handleRequest(HeaderExchangeHandler.java:96) at com.alibaba.dubbo.remoting.exchange.support.header.HeaderExchangeHandler.received(HeaderExchangeHandler.java:172) at com.alibaba.dubbo.remoting.transport.DecodeHandler.received(DecodeHandler.java:51) at com.alibaba.dubbo.remoting.transport.dispatcher.ChannelEventRunnable.run(ChannelEventRunnable.java:80) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) at java.lang.Thread.run(Thread.java:748) Caused by: io.lettuce.core.RedisCommandTimeoutException: Command timed out after 6 second(s) at io.lettuce.core.ExceptionFactory.createTimeoutException(ExceptionFactory.java:51) at io.lettuce.core.LettuceFutures.awaitOrCancel(LettuceFutures.java:114) at io.lettuce.core.cluster.ClusterFutureSyncInvocationHandler.handleInvocation(ClusterFutureSyncInvocationHandler.java:123) at io.lettuce.core.internal.AbstractInvocationHandler.invoke(AbstractInvocationHandler.java:80) at com.sun.proxy.$Proxy187.get(Unknown Source) at org.springframework.data.redis.connection.lettuce.LettuceStringCommands.get(LettuceStringCommands.java:66) ... 58 common frames omitted
2、通過 falcon、及grafana監控,發現應用與redis系統資源壓力正常
3、由于間歇出現,然后查看GC 情況
應用FullGC了且時間持續12.8s,時間點也剛好與報警異常時間對上,基本確認 FullGC原因引起

4、由于多環境混合部署(阿里云docker+IDC),然后查看了一下 docker 節點應用情況
查看發現 IDC 與 docker 節點的 GC 差異較大,docker節點對應gc時間都在>100ms,而 idc基本都< 100ms,

(1)請教了秋偉大神,sudo /usr/local/jdk1.8.0_131/bin/jstack pid | grep 'Paralle'
idc 結果為2,而 docker 里為96個,恰巧 docker 宿主機的cpu 配置未96,難道應用拿到的是宿主機的 cpu 核數?
(2)然后驗證了一下 dubbo io線程sudo /usr/local/jdk1.8.0_131/bin/jstack 7004 | grep 'New I/O server worker',IDC 為3,docker 里為37,基本確認應該是這個引起的差異;
(3)于是配置了-XX:ParallelGCThreads=2 -XX:ConcGCThreads=2 上線后,容器的 gc線程數據與 idc 一致,且時間也縮短了。到此基本確定差異原因就是docker 節點獲取的是宿主機資源導致,
(4)請教運維小伙伴后給出終極方案:但此配置需要 jdk版本>1.8.0_191,之前版本只能1.容器資源隔離2.使用-XX:ParallelGCThreads=2 -XX:ConcGCThreads=2,內存可以由-Xms, -Xmx限制,但這種方法不能避免很多Java庫根據availableProcessors()來做相應邏輯處理

5)其實這個問題一早las遇到過(項目基于 openresty 里面有 nginx 配置文件),其中worker_processes auto導致 openresty nginx woker 起了96個,這在4C8G 的 docker 節點上運行性能很差,后來運維部署用模板替換掉了 auto 配置項
5、總結
https://www.robberphex.com/java-and-docker-memory-and-cpu-limits/

浙公網安備 33010602011771號