我在OpenResty的init_worker_by_lua_block阶段使用
ngx.timer.at来同步数据,发现重启两次OpenResty后,最终mlcache的缓存数据缺失了一部分,这是为什么呢?下面是复现步骤:
1、OpenResty的版本
[root@localhost logs]# openresty -V
...
2、mlcache的版本
...
local _M = {
_VERSION = "2.5.0",
_AUTHOR = "Thibault Charbonnier",
_LICENSE = "MIT",
}
local mt = { __index = _M }
...
3、nginx配置文件
user root;
worker_processes 1;
#error_log logs/error.log;
#error_log logs/error.log notice;
#error_log logs/error.log info;
#pid logs/nginx.pid;
events {
worker_connections 1024;
}
http {
include mime.types;
default_type application/octet-stream;
sendfile on;
#tcp_nopush on;
#keepalive_timeout 0;
keepalive_timeout 65;
#gzip on;
lua_shared_dict shm_main_cache 500m;
lua_shared_dict shm_miss_cache 50m;
lua_shared_dict shm_lock_cache 50m;
lua_shared_dict shm_iphm_cache 50m;
lua_package_path "/opt/lua/?.lua;;";
lua_package_cpath "/opt/lua/?.so;;";
init_by_lua_block {
_c = require("test_mlcache")
_c.init()
}
init_worker_by_lua_block {
_c.test()
}
server {
listen 80;
server_name localhost;
location =/peek {
content_by_lua_block {
_c.peek()
}
}
location =/access {
content_by_lua_block {
_c.access()
}
}
}
}
4、复现代码段
local mlcache = require("resty.mlcache")
local cjson = require("cjson")
local table_nkeys = require("table.nkeys")
local _M = {
_VERSION = 1.0
}
local default_settings = {
ttl = nil,
neg_ttl = nil
}
function _M.init()
local cache, err = mlcache.new("my_cache",
"shm_main_cache",
{
lru_size = 6, -- size of the L1 (Lua VM) eplat
ttl = 0, -- 1h ttl for hits
neg_ttl = 0, -- 30s ttl for misses
shm_miss = "shm_miss_cache",
shm_locks = "shm_lock_cache",
ipc_shm = "shm_iphm_cache"
})
if err then
ngx.log(ngx.ERR, "Initialize mlcache error: ", err)
return
end
cache:set("dns",
default_settings, {})
_G.cache = cache
end
local callback = function(_, cores)
local id = ngx.worker.pid()
ngx.log(ngx.ERR, id)
for i = 1, 5000 do
cores:update()
ngx.sleep(0.002)
local value, err, hit_level = cores:get("dns")
value[tostring(i)] = "value"
if err then
ngx.log(ngx.ERR,"Get mlcache error: ", err)
return
else
cores:set("dns",
default_settings, value)
end
end
end
local timer = function()
local ok, err =
ngx.timer.at(
0,
callback,
cache
)
if not ok then
ngx.log(ngx.ERR, err)
end
end
function _M.test()
timer()
end
function _M.peek()
local ttl, err, value = cache:peek("dns")
ngx.say("result: ",
cjson.encode(value))
ngx.say("pid: ", ngx.worker.pid())
ngx.say("type: ", type(value))
ngx.say("total: ",
table_nkeys(value))
end
function _M.access()
local dns_later, err, hit_level = cache:get("dns")
ngx.say("result: ",
cjson.encode(dns_later))
ngx.say("pid: ", ngx.worker.pid())
ngx.say("type: ", type(dns_later))
ngx.say("hit_level: ", hit_level)
ngx.say("total: ",
table_nkeys(dns_later))
end
return _M
5、复现步骤
步骤1:使用以上配置启动OpenResty,此时会触发init_worker_by_lua的钩子,运行一个后台timer.
$ openresty
步骤2:启动完成OpenResty后,马上重启OpenResty,此时也会触发init_worker_by_lua的钩子,运行一个新的后台timer,但是此时老的timer还在运行。
$ openresty -s reload
...
pid: 1834966
type: table
hit_level: 1
total: 4687 # The correct value is 5000
步骤4:请求"
http://localhost/peek"查看mlcache中L2中的最终结果,会发现同样会缺失一部分数据。
...
pid: 1834966
type: table
total: 4687 # The correct value is 5000
所以,我就感觉十分的疑惑,按照常规推论,L2是使用
lua_shared_dict实现的,操作应该是原子性的,L1的内容是从L2中拉取过来的,即便timer在OpenResty重启时老的timer不会结束,但是也不应该影响新的进程,结果应该是最终一致的才对,不知道是哪里出了问题,请大家帮忙一起定位一下。