Hi,
We have a 2 node cluster setup (unicast).
But every 5seconds, the following message appears in our /var/log/corosync/corosync.log:
Oct 09 10:40:53 [1368] zabbix-node1 cib: info: cib_process_request: Completed cib_query operation for section constraints: OK (rc=0, origin=zabbix-node2/cibadmin/2, version=0.48753.1)
Oct 09 10:40:54 [1368] zabbix-node1 cib: info: crm_client_new: Connecting 0x7f3f2d9d9f80 for uid=0 gid=0 pid=522 id=8abd82f8-aca6-484d-802b-2de3d1dd99bb
Oct 09 10:40:54 [1368] zabbix-node1 cib: info: cib_process_request: Completed cib_query operation for section constraints: OK (rc=0, origin=local/cibadmin/2, version=0.48753.1)
Oct 09 10:40:54 [1368] zabbix-node1 cib: info: crm_client_destroy: Destroying 0 events
Oct 09 10:40:54 [1368] zabbix-node1 cib: info: crm_client_new: Connecting 0x7f3f2d9d9f80 for uid=0 gid=0 pid=526 id=53006f9b-d121-4216-bd89-fbfe38149909
Oct 09 10:40:54 [1368] zabbix-node1 cib: info: cib_process_request: Completed cib_query operation for section 'all': OK (rc=0, origin=local/crm_resource/2, version=0.48753.1)
Oct 09 10:40:54 [1368] zabbix-node1 cib: info: crm_client_destroy: Destroying 0 events
Oct 09 10:40:54 [1368] zabbix-node1 cib: info: crm_client_new: Connecting 0x7f3f2d9d9f80 for uid=0 gid=0 pid=540 id=9dd00941-f103-4c83-ad30-ae0539e4a015
Oct 09 10:40:54 [1368] zabbix-node1 cib: info: cib_process_request: Completed cib_query operation for section nodes: OK (rc=0, origin=local/crm_attribute/2, version=0.48753.1)
Oct 09 10:40:54 [1368] zabbix-node1 cib: info: cib_process_request: Completed cib_query operation for section //cib/configuration/crm_config//cluster_property_set[@id='mysql_replication']//nvpair[@name='p_mysql_REPL_INFO']: OK (rc=0, origin=local/crm_attribute/3, version=0.48753.1)
Oct 09 10:40:54 [1368] zabbix-node1 cib: info: crm_client_destroy: Destroying 0 events
Oct 09 10:40:54 [1368] zabbix-node1 cib: info: crm_client_new: Connecting 0x7f3f2d9d9f80 for uid=0 gid=0 pid=573 id=1e5bf6a5-53f2-42a1-ba18-b006470864ce
Oct 09 10:40:54 [1368] zabbix-node1 cib: info: cib_process_request: Completed cib_query operation for section nodes: OK (rc=0, origin=local/crm_attribute/2, version=0.48753.1)
Oct 09 10:40:54 [1368] zabbix-node1 cib: info: cib_process_request: Completed cib_query operation for section //cib/configuration/crm_config//cluster_property_set[@id='mysql_replication']//nvpair[@name='p_mysql_REPL_STATUS']: OK (rc=0, origin=local/crm_attribute/3, version=0.48753.1)
Oct 09 10:40:54 [1368] zabbix-node1 cib: info: crm_client_destroy: Destroying 0 events
Oct 09 10:40:54 [1368] zabbix-node1 cib: info: crm_client_new: Connecting 0x7f3f2d9d9f80 for uid=0 gid=0 pid=590 id=fc2b7571-6c11-4b4b-9fa0-f1b94c5b7423
Oct 09 10:40:54 [1368] zabbix-node1 cib: info: cib_process_request: Completed cib_query operation for section nodes: OK (rc=0, origin=local/crm_attribute/2, version=0.48753.1)
Oct 09 10:40:54 [1368] zabbix-node1 cib: info: cib_process_request: Completed cib_query operation for section //cib/configuration/crm_config//cluster_property_set[@id='mysql_replication']//nvpair[@name='p_mysql_REPL_STATUS']: OK (rc=0, origin=local/crm_attribute/3, version=0.48753.1)
Oct 09 10:40:54 [1368] zabbix-node1 cib: notice: log_cib_diff: cib:diff: Local-only Change: 0.48754.1
Oct 09 10:40:54 [1368] zabbix-node1 cib: info: cib_process_request: Completed cib_modify operation for section crm_config: OK (rc=0, origin=local/crm_attribute/4, version=0.48754.1)
Oct 09 10:40:54 [1373] zabbix-node1 crmd: info: abort_transition_graph: te_update_diff:126 - Triggered transition abort (complete=1, node=, tag=diff, id=(null), magic=NA, cib=0.48754.1) : Non-status change
Oct 09 10:40:54 [1373] zabbix-node1 crmd: notice: do_state_transition: State transition S_IDLE -> S_POLICY_ENGINE [ input=I_PE_CALC cause=C_FSA_INTERNAL origin=abort_transition_graph ]
Oct 09 10:40:54 [1368] zabbix-node1 cib: info: cib_process_request: Completed cib_query operation for section 'all': OK (rc=0, origin=local/crmd/869, version=0.48754.1)
Oct 09 10:40:54 [1368] zabbix-node1 cib: info: cib_process_request: Completed cib_query operation for section crm_config: OK (rc=0, origin=local/crmd/870, version=0.48754.1)
Oct 09 10:40:54 [1368] zabbix-node1 cib: info: crm_client_destroy: Destroying 0 events
Oct 09 10:40:54 [1372] zabbix-node1 pengine: notice: unpack_config: On loss of CCM Quorum: Ignore
Oct 09 10:40:54 [1372] zabbix-node1 pengine: info: determine_online_status: Node zabbix-node1 is online
Oct 09 10:40:54 [1372] zabbix-node1 pengine: info: determine_online_status: Node zabbix-node2 is online
Oct 09 10:40:54 [1368] zabbix-node1 cib: info: write_cib_contents: Archived previous version as /var/lib/heartbeat/crm/cib-24.raw
Oct 09 10:40:54 [1372] zabbix-node1 pengine: info: native_print: vip (ocf::heartbeat:IPaddr2): Started zabbix-node1
Oct 09 10:40:54 [1372] zabbix-node1 pengine: info: native_print: zabbix (lsb:zabbix-server): Started zabbix-node1
Oct 09 10:40:54 [1372] zabbix-node1 pengine: info: clone_print: Master/Slave Set: ms_MySQL [p_mysql]
Oct 09 10:40:54 [1372] zabbix-node1 pengine: info: short_print: Masters: [ zabbix-node1 ]
Oct 09 10:40:54 [1372] zabbix-node1 pengine: info: short_print: Slaves: [ zabbix-node2 ]
Oct 09 10:40:54 [1372] zabbix-node1 pengine: info: master_color: Promoting p_mysql:0 (Master zabbix-node1)
Oct 09 10:40:54 [1372] zabbix-node1 pengine: info: master_color: ms_MySQL: Promoted 1 instances of a possible 1 to master
Oct 09 10:40:54 [1368] zabbix-node1 cib: info: write_cib_contents: Wrote version 0.48754.0 of the CIB to disk (digest: b6087df8212182daed7158a74cdd05cc)
Oct 09 10:40:54 [1372] zabbix-node1 pengine: info: LogActions: Leave vip (Started zabbix-node1)
Oct 09 10:40:54 [1372] zabbix-node1 pengine: info: LogActions: Leave zabbix (Started zabbix-node1)
Oct 09 10:40:54 [1372] zabbix-node1 pengine: info: LogActions: Leave p_mysql:0 (Master zabbix-node1)
Oct 09 10:40:54 [1372] zabbix-node1 pengine: info: LogActions: Leave p_mysql:1 (Slave zabbix-node2)
Oct 09 10:40:54 [1368] zabbix-node1 cib: info: retrieveCib: Reading cluster configuration from: /var/lib/heartbeat/crm/cib.cNuPgF (digest: /var/lib/heartbeat/crm/cib.wGGHmG)
Oct 09 10:40:54 [1373] zabbix-node1 crmd: info: do_state_transition: State transition S_POLICY_ENGINE -> S_TRANSITION_ENGINE [ input=I_PE_SUCCESS cause=C_IPC_MESSAGE origin=handle_response ]
Oct 09 10:40:54 [1373] zabbix-node1 crmd: info: do_te_invoke: Processing graph 396 (ref=pe_calc-dc-1412844054-470) derived from /var/lib/pacemaker/pengine/pe-input-3176.bz2
Oct 09 10:40:54 [1373] zabbix-node1 crmd: notice: run_graph: Transition 396 (Complete=0, Pending=0, Fired=0, Skipped=0, Incomplete=0, Source=/var/lib/pacemaker/pengine/pe-input-3176.bz2): Complete
Oct 09 10:40:54 [1373] zabbix-node1 crmd: info: do_log: FSA: Input I_TE_SUCCESS from notify_crmd() received in state S_TRANSITION_ENGINE
Oct 09 10:40:54 [1373] zabbix-node1 crmd: notice: do_state_transition: State transition S_TRANSITION_ENGINE -> S_IDLE [ input=I_TE_SUCCESS cause=C_FSA_INTERNAL origin=notify_crmd ]
Oct 09 10:40:54 [1372] zabbix-node1 pengine: notice: process_pe_message: Calculated Transition 396: /var/lib/pacemaker/pengine/pe-input-3176.bz2
Oct 09 10:40:54 [1368] zabbix-node1 cib: info: crm_client_new: Connecting 0x7f3f2d9d9f80 for uid=0 gid=0 pid=592 id=a3576cc2-10bb-4828-83ef-6520192f3b51
Oct 09 10:40:54 [1368] zabbix-node1 cib: info: cib_process_request: Completed cib_query operation for section nodes: OK (rc=0, origin=local/crm_attribute/2, version=0.48754.1)
Oct 09 10:40:54 [1368] zabbix-node1 cib: info: cib_process_request: Completed cib_query operation for section //cib/status//node_state[@id='1']//transient_attributes//nvpair[@name='p_mysql_master_crashed']: No such device or address (rc=-6, origin=local/crm_attribute/3, version=0.48754.1)
Oct 09 10:40:54 [1368] zabbix-node1 cib: info: crm_client_destroy: Destroying 0 events
Oct 09 10:40:54 [1370] zabbix-node1 lrmd: notice: operation_finished: p_mysql_monitor_5000:495:stderr [ Error performing operation: No such device or address ]
2 things I don't understand well:
*) Why does it says: 'Promoting p_mysql:0 (Master zabbix-node1)', cluster is running normally so state should remain unchanged
*) the error on the last line ('Error performing operation: No such device or address'), what does it mean?
Some other config:
# lsb_release -a
No LSB modules are available.
Distributor ID: Ubuntu
Description: Ubuntu 14.04.1 LTS
Release: 14.04
Codename: trusty
# pacemakerd --version
Pacemaker 1.1.10
Written by Andrew Beekhof
# corosync -v
Corosync Cluster Engine, version '2.3.3'
Copyright (c) 2006-2009 Red Hat, Inc.
#OCF resource version:
Version: 20140901101510
#crm configure show
node $id="1" zabbix-node1 \
attributes standby="off"
node $id="2" zabbix-node2 \
attributes standby="off"
primitive p_mysql ocf:percona:mysql \
params config="/etc/mysql/my.cnf" log="/var/log/mysql/error.log" pid="/var/lib/mysql/mysqld.pid" socket="/var/run/mysqld/mysqld.sock" replication_user="repl_user" replication_passwd="xxx" max_slave_lag="60" evict_outdated_slaves="false" binary="/usr/sbin/mysqld" test_user="test_user" test_passwd="xxx" \
op monitor interval="5s" role="Master" OCF_CHECK_LEVEL="1" \
op monitor interval="2s" role="Slave" OCF_CHECK_LEVEL="1" \
op start interval="0" timeout="600s" \
op stop interval="0" timeout="600s" \
op promote interval="0" timeout="600s"
primitive vip ocf:heartbeat:IPaddr2 \
params ip="172.24.194.113" nic="bond0" \
op monitor interval="15" \
meta target-role="Started"
primitive zabbix lsb:zabbix-server \
op start interval="0" timeout="600" delay="5s" \
op monitor interval="30s" \
op stop interval="0" timeout="600s" \
meta target-role="Started"
ms ms_MySQL p_mysql \
meta master-max="1" master-node-max="1" clone-max="2" clone-node-max="1" notify="true" globally-unique="false" target-role="Started" is-managed="true"
location cli-prefer-vip vip inf: zabbix-node2
colocation vip_and_zabbix_on_master inf: vip zabbix ms_MySQL:Master
order zabbix_before_vip inf: zabbix vip:start
property $id="cib-bootstrap-options" \
dc-version="1.1.10-42f2063" \
cluster-infrastructure="corosync" \
expected-quorum-votes="2" \
stonith-enabled="false" \
no-quorum-policy="ignore" \
last-lrm-refresh="1381926226" \
maintenance-mode="false"
property $id="mysql_replication" \
p_mysql_REPL_INFO="zabbix-node1|mysql-bin-node1.000001|15062383" \
p_mysql_REPL_STATUS="mysql-bin-node1.000001|23909590|104857600"
rsc_defaults $id="rsc-options" \
resource-stickiness="100"
#cat /etc/corosync/corosync.conf
# Please read the corosync.conf.5 manual page
totem {
version: 2
#crypto_cipher: none
#crypto_hash: none
secauth: on
threads: 0
cluster_name: ZabbixCluster
interface {
ringnumber: 0
bindnetaddr: 172.24.192.0
mcastport: 5405
ttl: 1
}
transport: udpu
}
logging {
fileline: off
to_logfile: yes
to_syslog: yes
logfile: /var/log/corosync/corosync.log
debug: off
timestamp: on
logger_subsys {
subsys: QUORUM
debug: off
}
}
nodelist {
node {
ring0_addr: 172.24.195.59
name: zabbix-node1
nodeid: 1
}
node {
ring0_addr: 172.24.195.60
name: zabbix-node2
nodeid: 2
}
}
quorum {
# Enable and configure quorum subsystem (default: off)
# see also corosync.conf.5 and votequorum.5
provider: corosync_votequorum
}
Thanks,
Erik