I'm trying to get God to watch Riak on Ubuntu.
The riak init script doesn't write pids, and su's as the riak user and uses 'pings' to determine if it's up or down as opposed to tracking pids.
When trying to start the init scripts via god, it detects the wrong pid. (or rather the pid that spawns the process goes away)
root@lucid32:/etc/god# god status
riak: start
root@lucid32:/etc/god# cat /var/run/god/riak.pid
16382
root@lucid32:/etc/god# ps aux | grep 16382
root 16906 0.0 0.1 2140 744 pts/0 S+ 00:16 0:00 grep --color=auto 16382
but riak is running happily
root@lucid32:/etc/god# ps aux | grep riak
riak 16421 0.0 0.2 2192 760 ? S 00:08 0:00 /usr/lib/riak/erts-5.7.5/bin/run_erl -daemon /tmp/riak// /var/log/riak exec /usr/sbin/riak console
riak 16422 0.2 3.9 25456 14856 pts/1 Ssl+ 00:08 0:01 /usr/lib/riak/erts-5.7.5/bin/beam -K true -A 64 -- -root /usr/lib/riak -progname riak -- -home /var/lib/riak -- -boot /usr/lib/riak/releases/0.14.2/riak -embedded -config /etc/riak/app.config -name
ri...@127.0.0.1 -setcookie riak -- console
riak 16447 0.0 0.1 1956 384 ? S 00:08 0:00 /usr/lib/riak/erts-5.7.5/bin/epmd -daemon
riak 16509 0.0 0.1 1832 532 ? Ss 00:08 0:00 sh -s disksup
riak 16511 0.0 0.1 1752 496 ? Ss 00:08 0:00 /usr/lib/riak/lib/os_mon-2.2.5/priv/bin/memsup
riak 16512 0.0 0.0 1748 360 ? Ss 00:08 0:00 /usr/lib/riak/lib/os_mon-2.2.5/priv/bin/cpu_sup
root 16848 0.0 0.1 2076 632 pts/2 S+ 00:11 0:00 tail -f /var/log/god.log /var/log/riak/god.log
root 16912 0.0 0.2 2144 808 pts/0 S+ 00:17 0:00 grep --color=auto riak
root@lucid32:/etc/god# /etc/init.d/riak ping
Attempting to restart script through sudo -u riak
pong
The current god config.
God.watch do |w|
w.interval = 5.seconds
daemon = '/etc/init.d/riak'
w.start = "#{daemon} start"
w.stop = "#{daemon} stop"
w.restart = "#{daemon} restart"
w.start_grace = 60.seconds
w.stop_grace = 60.seconds
w.restart_grace = 60.seconds
w.log = "/var/log/riak/god.log"
w.transition(:init, { true => :up, false => :start }) do |on|
on.condition(:process_running){|c| c.running = true }
end
w.transition([:start, :restart], :up) do |on|
on.condition(:process_running){|c| c.running = true }
end
# start if process is not running
w.transition(:up, :start) do |on|
on.condition(:process_exits) do |c|
c.notify = %w[ localhost ]
end
end
w.transition(:up, :unmonitored) do |on|
on.condition(:flapping){|c|
c.to_state = [:start, :restart]
c.times = 5
c.within = 5.minute
c.transition = :unmonitored
c.retry_in = 10.minutes
c.retry_times = 5
c.retry_within = 2.hours
c.notify = %w[ localhost ]
}
end
end
How do I override the test for process_running to use the pings instead of checking for the pids?