Slave node stuck on Joining: receiving State Transfer

1,069 views
Skip to first unread message

Sander Groenen

unread,
Jul 6, 2017, 4:07:47 AM7/6/17
to codership
We have a docker setup with 2 nodes in master-slave configuration and for some reason after an initial SST the slave node gets stuck in Joining: receiving State Transfer and the wsrep_local_recv_queue keeps growing without ever reducing. Below are the mysql config for donor and slave nodes. We previously had a problem with IST not working due to docker container not binding to the correct address and after changing the ist.recv_bind to 127.0.0.1 this seemed to solve the problem, however this problem was introduced. Any help would be appreciated.

My.cnf of DONOR

[client]
port = 3306
socket = /var/run/mysqld/mysqld.sock

# Here is entries for some specific programs
# The following values assume you have at least 32M ram

# This was formally known as [safe_mysqld]. Both versions are currently parsed.
[mysqld_safe]
socket = /var/run/mysqld/mysqld.sock
nice = 0

[mysqld]

# Settings for replication
server_id=1
bind-address=0.0.0.0

skip-host-cache
skip-name-resolve
#
# * Basic Settings
#
#user = mysql
pid-file = /var/run/mysqld/mysqld.pid
socket = /var/run/mysqld/mysqld.sock
port = 3306
basedir = /usr
datadir = /var/lib/mysql
tmpdir = /tmp
lc_messages_dir = /usr/share/mysql
lc_messages = en_US
skip-external-locking

character-set-server=utf8
default-storage-engine=INNODB
sql-mode="NO_AUTO_CREATE_USER,NO_ENGINE_SUBSTITUTION"


#
# Instead of skip-networking the default is now to listen only on
# localhost which is more compatible and is not less secure.
#bind-address = 127.0.0.1
#
# * Fine Tuning
#
max_connections = 251
connect_timeout     = 10000
wait_timeout = 6000
max_allowed_packet = 16M
thread_cache_size       = 128
sort_buffer_size = 4M
bulk_insert_buffer_size = 16M
tmp_table_size = 256M
max_heap_table_size = 32M
#
# * MyISAM
#
# This replaces the startup script and checks MyISAM tables if needed
# the first time they are touched. On error, make copy and try a repair.
myisam_max_sort_file_size=100G
myisam_recover_options = BACKUP
key_buffer_size = 8G
#open-files-limit = 2000
table_open_cache = 2000
myisam_sort_buffer_size = 222M
concurrent_insert = 2
read_buffer_size = 2M
read_rnd_buffer_size = 1M
#
# * Query Cache Configuration
#
# Cache only tiny result sets, so we can fit more in the query cache.
query_cache_limit = 100M
query_cache_size = 1G
# for more write intensive setups, set to DEMAND or OFF
#query_cache_type = DEMAND
#
# * Logging and Replication
#
# Both location gets rotated by the cronjob.
# Be aware that this log type is a performance killer.
# As of 5.1 you can enable the log at runtime!
#general_log_file        = /var/log/mysql/mysql.log
#general_log             = 1

#
# Error logging goes to syslog due to /etc/mysql/conf.d/mysqld_safe_syslog.cnf.
#
# we do want to know about network errors and such
log_warnings = 2
#
# Enable the slow query log to see queries with especially long duration
slow_query_log=1
slow_query_log_file = /var/lib/mysql/mariadb-slow.log
long_query_time = 2
#log_slow_rate_limit = 1000
#log_slow_verbosity = query_plan

relay-log-space-limit=6G

#log-queries-not-using-indexes
#log_slow_admin_statements
#
# The following can be used as easy to replay backup logs or for replication.
# note: if you are setting up a replication slave, see README.Debian about
#       other settings you may need to change.
#server-id = 1
#report_host = master1
#auto_increment_increment = 2
#auto_increment_offset = 1
#log_bin = /var/log/mysql/mariadb-bin
#log_bin_index = /var/log/mysql/mariadb-bin.index
# not fab for performance, but safer
#sync_binlog = 1
expire_logs_days = 10
max_binlog_size         = 100M
# slaves
#relay_log = /var/log/mysql/relay-bin
#relay_log_index = /var/log/mysql/relay-bin.index
#relay_log_info_file = /var/log/mysql/relay-bin.info
#log_slave_updates
#read_only
#
# If applications support it, this stricter sql_mode prevents some
# mistakes like inserting invalid dates etc.
#sql_mode = NO_ENGINE_SUBSTITUTION,TRADITIONAL
#
# * InnoDB
#
# InnoDB is enabled by default with a 10MB datafile in /var/lib/mysql/.
# Read the manual for more InnoDB related options. There are many!
default_storage_engine = InnoDB
# you can't just change log file size, requires special procedure
innodb_log_file_size = 2G
innodb_buffer_pool_size = 7G
innodb_log_buffer_size = 8M
innodb_file_per_table = 1
innodb_open_files = 400
innodb_io_capacity = 400
innodb_flush_method = O_DIRECT

innodb_additional_mem_pool_size=2G
innodb_flush_log_at_trx_commit=1
innodb_thread_concurrency=0
innodb_read_io_threads=64
innodb_write_io_threads=64
innodb_autoextend_increment=64
innodb_buffer_pool_instances=8
innodb_concurrency_tickets=5000
innodb_old_blocks_time=1000
innodb_stats_on_metadata=0
innodb_checksum_algorithm=0
innodb_locks_unsafe_for_binlog = 1
innodb_autoinc_lock_mode = 2





back_log=80
flush_time=0
max_connect_errors=100
open_files_limit=4161
table_definition_cache=1400
sync_master_info=1000
sync_relay_log=10000
sync_relay_log_info=10000


#
# * Security Features
#
# Read the manual, too, if you want chroot!
# chroot = /var/lib/mysql/
#
# For generating SSL certificates I recommend the OpenSSL GUI "tinyca".
#
# ssl-ca=/etc/mysql/cacert.pem
# ssl-cert=/etc/mysql/server-cert.pem
# ssl-key=/etc/mysql/server-key.pem

#
# * Galera-related settings
#
[galera]
# Mandatory settings
wsrep_on=ON

#wsrep_debug=ON

wsrep_provider=/usr/lib/galera/libgalera_smm.so
wsrep_provider_options="gcache.size=2G; gcache.recover=yes; gcs.fc_limit=1000; gcs.fc_master_slave=yes; pc.weight=2;ist.recv_bind=127.0.0.1"
wsrep_cluster_name=mariadb-cluster-nmb
#on live node gcom address must be empty
wsrep_cluster_address=gcomm://
wsrep_node_address="192.168.114.39"
wsrep_node_incoming_address="192.168.114.39"
wsrep_node_name="nmb_main"
wsrep_sst_receive_address="192.168.114.39"
wsrep_sst_method=xtrabackup-v2
wsrep_sst_auth=root:<correct-password>

binlog_format=row

wsrep_slave_threads=8
#innodb_flush_log_at_trx_commit=0

[mysqldump]
quick
quote-names
max_allowed_packet = 64M

[mysql]
#no-auto-rehash # faster start of mysql but no tab completion

[isamchk]
key_buffer = 16M

!includedir /etc/mysql/conf.d/


My.cnf of JOINER

[client]
port = 3306
socket = /var/run/mysqld/mysqld.sock

# Here is entries for some specific programs
# The following values assume you have at least 32M ram

# This was formally known as [safe_mysqld]. Both versions are currently parsed.
[mysqld_safe]
socket = /var/run/mysqld/mysqld.sock
nice = 0

[mysqld]

# Settings for replication
server_id=2
bind-address=0.0.0.0

skip-host-cache
skip-name-resolve
#
# * Basic Settings
#
#user = mysql
pid-file = /var/run/mysqld/mysqld.pid
socket = /var/run/mysqld/mysqld.sock
port = 3306
basedir = /usr
datadir = /var/lib/mysql
tmpdir = /tmp
lc_messages_dir = /usr/share/mysql
lc_messages = en_US
skip-external-locking

character-set-server=utf8
default-storage-engine=INNODB
sql-mode="NO_AUTO_CREATE_USER,NO_ENGINE_SUBSTITUTION"


#
# Instead of skip-networking the default is now to listen only on
# localhost which is more compatible and is not less secure.
#bind-address = 127.0.0.1
#
# * Fine Tuning
#
max_connections = 251
connect_timeout     = 10000
wait_timeout = 6000
max_allowed_packet = 16M
thread_cache_size       = 128
sort_buffer_size = 4M
bulk_insert_buffer_size = 16M
tmp_table_size = 256M
max_heap_table_size = 32M
#
# * MyISAM
#
# This replaces the startup script and checks MyISAM tables if needed
# the first time they are touched. On error, make copy and try a repair.
myisam_max_sort_file_size=100G
myisam_recover_options = BACKUP
key_buffer_size = 8G
#open-files-limit = 2000
table_open_cache = 2000
myisam_sort_buffer_size = 222M
concurrent_insert = 2
read_buffer_size = 2M
read_rnd_buffer_size = 1M
#
# * Query Cache Configuration
#
# Cache only tiny result sets, so we can fit more in the query cache.
query_cache_limit = 100M
query_cache_size = 1G
# for more write intensive setups, set to DEMAND or OFF
#query_cache_type = DEMAND
#
# * Logging and Replication
#
# Both location gets rotated by the cronjob.
# Be aware that this log type is a performance killer.
# As of 5.1 you can enable the log at runtime!
#general_log_file        = /var/log/mysql/mysql.log
#general_log             = 1

#
# Error logging goes to syslog due to /etc/mysql/conf.d/mysqld_safe_syslog.cnf.
#
# we do want to know about network errors and such
log_warnings = 2
#
# Enable the slow query log to see queries with especially long duration
slow_query_log=1
slow_query_log_file = /var/lib/mysql/mariadb-slow.log
long_query_time = 4
#log_slow_rate_limit = 1000
#log_slow_verbosity = query_plan

relay-log-space-limit=6G

#log-queries-not-using-indexes
#log_slow_admin_statements
#
# The following can be used as easy to replay backup logs or for replication.
# note: if you are setting up a replication slave, see README.Debian about
#       other settings you may need to change.
#server-id = 1
#report_host = master1
#auto_increment_increment = 2
#auto_increment_offset = 1
#log_bin = /var/log/mysql/mariadb-bin
#log_bin_index = /var/log/mysql/mariadb-bin.index
# not fab for performance, but safer
#sync_binlog = 1
expire_logs_days = 10
max_binlog_size         = 100M
# slaves
#relay_log = /var/log/mysql/relay-bin
#relay_log_index = /var/log/mysql/relay-bin.index
#relay_log_info_file = /var/log/mysql/relay-bin.info
#log_slave_updates
#read_only
#
# If applications support it, this stricter sql_mode prevents some
# mistakes like inserting invalid dates etc.
#sql_mode = NO_ENGINE_SUBSTITUTION,TRADITIONAL
#
# * InnoDB
#
# InnoDB is enabled by default with a 10MB datafile in /var/lib/mysql/.
# Read the manual for more InnoDB related options. There are many!
default_storage_engine = InnoDB
# you can't just change log file size, requires special procedure
innodb_log_file_size = 2G
innodb_buffer_pool_size = 7G
innodb_log_buffer_size = 8M
innodb_file_per_table = 1
innodb_open_files = 400
innodb_io_capacity = 400
innodb_flush_method = O_DIRECT

innodb_additional_mem_pool_size=2G
innodb_flush_log_at_trx_commit=1
innodb_thread_concurrency=0
innodb_read_io_threads=64
innodb_write_io_threads=64
innodb_autoextend_increment=64
innodb_buffer_pool_instances=8
innodb_concurrency_tickets=5000
innodb_old_blocks_time=1000
innodb_stats_on_metadata=0
innodb_checksum_algorithm=0
innodb_locks_unsafe_for_binlog = 1
innodb_autoinc_lock_mode = 2




back_log=80
flush_time=0
max_connect_errors=100
open_files_limit=4161
table_definition_cache=1400
sync_master_info=1000
sync_relay_log=10000
sync_relay_log_info=10000


#
# * Galera-related settings
#
[galera]
# Mandatory settings
wsrep_on=ON

#wsrep_debug=ON

#wsrep_sst_method: xtrabackup-v2 to prevent donor node not accessible during replication!
wsrep_dirty_reads=ON
wsrep_provider=/usr/lib/galera/libgalera_smm.so
wsrep_provider_options="gcache.size=2G; gcache.recover=yes; gcs.fc_limit=1000; gcs.fc_master_slave=yes; pc.weight=0; ist.recv_bind=127.0.0.1"
wsrep_cluster_name=mariadb-cluster-nmb
wsrep_cluster_address=gcomm://192.168.114.39,192.168.60.123
wsrep_node_address="192.168.60.123"
wsrep_node_incoming_address="192.168.60.123"
wsrep_node_name="nmb_backup"
wsrep_sst_receive_address="192.168.60.123"
wsrep_sst_method=xtrabackup-v2
wsrep_sst_auth=root:<correct-password>

binlog_format=row

# Optional setting
wsrep_slave_threads=69
#innodb_flush_log_at_trx_commit=0

[mysqldump]
quick
quote-names
max_allowed_packet = 64M

[mysql]
#no-auto-rehash # faster start of mysql but no tab completion

[isamchk]
key_buffer = 16M

#
# * IMPORTANT: Additional settings that can override those from this file!
#   The files must end with '.cnf', otherwise they'll be ignored.
#
!includedir /etc/mysql/conf.d/

JOINER and DONOR logs are included as attachements

output of DONOR wsrep status

Variable_nameValue
wsrep_apply_oooe0.026346
wsrep_apply_oool0.000011
wsrep_apply_window1.029740
wsrep_causal_reads0
wsrep_cert_deps_distance63.966969
wsrep_cert_index_size47
wsrep_cert_interval0.035373
wsrep_cluster_conf_id60
wsrep_cluster_size2
wsrep_cluster_state_uuid625893a6-4554-11e7-b41d-dfe527fde688
wsrep_cluster_statusPrimary
wsrep_commit_oooe0.000000
wsrep_commit_oool0.000000
wsrep_commit_window1.003723
wsrep_connectedON
wsrep_desync_count0
wsrep_evs_delayed
wsrep_evs_evict_list
wsrep_evs_repl_latency0.00151313/0.00644966/0.157399/0.0107859/223
wsrep_evs_stateOPERATIONAL
wsrep_flow_control_paused0.000000
wsrep_flow_control_paused_ns0
wsrep_flow_control_recv0
wsrep_flow_control_sent0
wsrep_gcomm_uuid18782d4e-5cc4-11e7-b455-2b6aa59646ba
wsrep_incoming_addresses192.168.114.39:3306,192.168.60.123:3306
wsrep_last_committed9416467
wsrep_local_bf_aborts0
wsrep_local_cached_downto9275118
wsrep_local_cert_failures81
wsrep_local_commits1658018
wsrep_local_index0
wsrep_local_recv_queue0
wsrep_local_recv_queue_avg0.000149
wsrep_local_recv_queue_max2
wsrep_local_recv_queue_min0
wsrep_local_replays0
wsrep_local_send_queue0
wsrep_local_send_queue_avg0.000528
wsrep_local_send_queue_max15
wsrep_local_send_queue_min0
wsrep_local_state4
wsrep_local_state_commentSynced
wsrep_local_state_uuid625893a6-4554-11e7-b41d-dfe527fde688
wsrep_protocol_version7
wsrep_provider_nameGalera
wsrep_provider_vendorCodership Oy <in...@codership.com>
wsrep_provider_version25.3.19(r3667)
wsrep_readyON
wsrep_received13388
wsrep_received_bytes117501
wsrep_repl_data_bytes955665180
wsrep_repl_keys206671980
wsrep_repl_keys_bytes1691625502
wsrep_repl_other_bytes0
wsrep_replicated1662600
wsrep_replicated_bytes19804348644
wsrep_thread_count9

Output of status wsrep JOINER

Variable_nameValue
wsrep_apply_oooe0.052899
wsrep_apply_oool0.000000
wsrep_apply_window1.339776
wsrep_causal_reads0
wsrep_cert_deps_distance114.166667
wsrep_cert_index_size24
wsrep_cert_interval0.145765
wsrep_cluster_conf_id28
wsrep_cluster_size2
wsrep_cluster_state_uuid625893a6-4554-11e7-b41d-dfe527fde688
wsrep_cluster_statusPrimary
wsrep_commit_oooe0.000000
wsrep_commit_oool0.000000
wsrep_commit_window1.289929
wsrep_connectedON
wsrep_desync_count0
wsrep_evs_delayed
wsrep_evs_evict_list
wsrep_evs_repl_latency0/0/0/0/0
wsrep_evs_stateOPERATIONAL
wsrep_flow_control_paused0.000000
wsrep_flow_control_paused_ns0
wsrep_flow_control_recv0
wsrep_flow_control_sent0
wsrep_gcomm_uuid2813aee7-61c2-11e7-8670-364f715d3d4e
wsrep_incoming_addresses192.168.114.39:3306,192.168.60.123:3306
wsrep_last_committed9273954
wsrep_local_bf_aborts0
wsrep_local_cached_downto18446744073709551615
wsrep_local_cert_failures0
wsrep_local_commits0
wsrep_local_index1
wsrep_local_recv_queue142869
wsrep_local_recv_queue_avg70515.711604
wsrep_local_recv_queue_max142869
wsrep_local_recv_queue_min0
wsrep_local_replays0
wsrep_local_send_queue0
wsrep_local_send_queue_avg0.000000
wsrep_local_send_queue_max1
wsrep_local_send_queue_min0
wsrep_local_state1
wsrep_local_state_commentJoining: receiving State Transfer
wsrep_local_state_uuid625893a6-4554-11e7-b41d-dfe527fde688
wsrep_protocol_version7
wsrep_provider_nameGalera
wsrep_provider_vendorCodership Oy <in...@codership.com>
wsrep_provider_version25.3.19(r3667)
wsrep_readyOFF
wsrep_received1866
wsrep_received_bytes503834
wsrep_repl_data_bytes0
wsrep_repl_keys0
wsrep_repl_keys_bytes0
wsrep_repl_other_bytes0
wsrep_replicated0
wsrep_replicated_bytes0
wsrep_thread_count70


donor2_log.txt
joiner2_log.txt
Reply all
Reply to author
Forward
0 new messages