[root@slmaster ~]# systemctl status slurmdbd
● slurmdbd.service - Slurm DBD accounting daemon
Loaded: loaded (/etc/systemd/system/slurmdbd.service; enabled; vendor preset: disabled)
Active: failed (Result: timeout) since Thu 2018-10-11 20:34:42 UTC; 14min ago
Process: 1406 ExecStart=/usr/sbin/slurmdbd $SLURMDBD_OPTIONS (code=exited, status=0/SUCCESS)
Oct 11 20:33:11 slmaster systemd[1]: Starting Slurm DBD accounting daemon...
Oct 11 20:33:11 slmaster systemd[1]: PID file /var/run/slurmdbd.pid not readable (yet?) after start.
Oct 11 20:34:42 slmaster systemd[1]: slurmdbd.service start operation timed out. Terminating.
Oct 11 20:34:42 slmaster systemd[1]: Failed to start Slurm DBD accounting daemon.
Oct 11 20:34:42 slmaster systemd[1]: Unit slurmdbd.service entered failed state.
Oct 11 20:34:42 slmaster systemd[1]: slurmdbd.service failed.
[root@slmaster ~]# systemctl status mariadb
● mariadb.service - MariaDB database server
Loaded: loaded (/usr/lib/systemd/system/mariadb.service; enabled; vendor preset: disabled)
Active: active (running) since Thu 2018-10-11 20:33:11 UTC; 18min ago
Process: 991 ExecStartPost=/usr/libexec/mariadb-wait-ready $MAINPID (code=exited, status=0/SUCCESS)
Process: 943 ExecStartPre=/usr/libexec/mariadb-prepare-db-dir %n (code=exited, status=0/SUCCESS)
Main PID: 989 (mysqld_safe)
CGroup: /system.slice/mariadb.service
├─ 989 /bin/sh /usr/bin/mysqld_safe --basedir=/usr
└─1265 /usr/libexec/mysqld --basedir=/usr --datadir=/var/lib/mysql --plugin-dir=/usr/lib64/mysql/plugin --log-error=/var/log/mariadb/maria...
Oct 11 20:33:07 slmaster systemd[1]: Starting MariaDB database server...
Oct 11 20:33:07 slmaster mariadb-prepare-db-dir[943]: Database MariaDB is probably initialized in /var/lib/mysql already, nothing is done.
Oct 11 20:33:08 slmaster mysqld_safe[989]: 181011 20:33:08 mysqld_safe Logging to '/var/log/mariadb/mariadb.log'.
Oct 11 20:33:09 slmaster mysqld_safe[989]: 181011 20:33:09 mysqld_safe Starting mysqld daemon with databases from /var/lib/mysql
Oct 11 20:33:11 slmaster systemd[1]: Started MariaDB database server.
[2018-10-11T20:33:11.648] debug: Log file re-opened
[2018-10-11T20:33:11.720] debug: Munge authentication plugin loaded
[2018-10-11T20:33:11.749] debug2: mysql_connect() called for db slurm_acct_db
[2018-10-11T20:33:11.785] debug2: innodb_buffer_pool_size: 629145600
[2018-10-11T20:33:11.785] debug2: innodb_log_file_size: 67108864
[2018-10-11T20:33:11.786] debug2: innodb_lock_wait_timeout: 900
[2018-10-11T20:33:11.956] Accounting storage MYSQL plugin loaded
[2018-10-11T20:33:11.958] debug2: ArchiveDir = /tmp
[2018-10-11T20:33:11.958] debug2: ArchiveScript = (null)
[2018-10-11T20:33:11.958] debug2: AuthInfo = (null)
[2018-10-11T20:33:11.958] debug2: AuthType = auth/munge
[2018-10-11T20:33:11.958] debug2: CommitDelay = 0
[2018-10-11T20:33:11.958] debug2: DbdAddr = slmaster
[2018-10-11T20:33:11.958] debug2: DbdBackupHost = (null)
[2018-10-11T20:33:11.958] debug2: DbdHost = slmaster
[2018-10-11T20:33:11.958] debug2: DbdPort = 6819
[2018-10-11T20:33:11.958] debug2: DebugFlags = (null)
[2018-10-11T20:33:11.958] debug2: DebugLevel = 6
[2018-10-11T20:33:11.958] debug2: DebugLevelSyslog = 10
[2018-10-11T20:33:11.958] debug2: DefaultQOS = (null)
[2018-10-11T20:33:11.958] debug2: LogFile = /var/log/slurm/slurmdbd.log
[2018-10-11T20:33:11.958] debug2: MessageTimeout = 10
[2018-10-11T20:33:11.958] debug2: Parameters = (null)
[2018-10-11T20:33:11.958] debug2: PidFile = /var/spool/slurm/slurmdbd.pid
[2018-10-11T20:33:11.958] debug2: PluginDir = /usr/lib64/slurm
[2018-10-11T20:33:11.958] debug2: PrivateData = none
[2018-10-11T20:33:11.958] debug2: PurgeJobAfter = NONE
[2018-10-11T20:33:11.958] debug2: PurgeResvAfter = NONE
[2018-10-11T20:33:11.958] debug2: PurgeStepAfter = NONE
[2018-10-11T20:33:11.958] debug2: PurgeSuspendAfter = NONE
[2018-10-11T20:33:11.958] debug2: PurgeTXNAfter = NONE
[2018-10-11T20:33:11.958] debug2: PurgeUsageAfter = NONE
[2018-10-11T20:33:11.958] debug2: SlurmUser = slurm(982)
[2018-10-11T20:33:11.958] debug2: StorageBackupHost = (null)
[2018-10-11T20:33:11.958] debug2: StorageHost = localhost
[2018-10-11T20:33:11.958] debug2: StorageLoc = slurm_acct_db
[2018-10-11T20:33:11.958] debug2: StoragePort = 3306
[2018-10-11T20:33:11.958] debug2: StorageType = accounting_storage/mysql
[2018-10-11T20:33:11.958] debug2: StorageUser = slurm
[2018-10-11T20:33:11.958] debug2: TCPTimeout = 2
[2018-10-11T20:33:11.958] debug2: TrackWCKey = 0
[2018-10-11T20:33:11.958] debug2: TrackSlurmctldDown= 0
[2018-10-11T20:33:11.958] debug2: acct_storage_p_get_connection: request new connection 1
[2018-10-11T20:33:11.974] slurmdbd version 18.08.1 started
[2018-10-11T20:33:11.986] debug2: running rollup at Thu Oct 11 20:33:11 2018
[2018-10-11T20:33:11.986] debug2: Everything rolled up
[2018-10-11T20:34:42.968] Terminate signal (SIGINT or SIGTERM) received
[2018-10-11T20:34:42.969] debug: rpc_mgr shutting down
[root@slmaster ~]# sacctmgr -vvvv
sacctmgr: Accounting storage SLURMDBD plugin loaded
sacctmgr: debug2: slurm_connect failed: Connection refused
sacctmgr: debug2: Error connecting slurm stream socket at 127.0.0.1:6819: Connection refused
sacctmgr: error: slurm_persist_conn_open_without_init: failed to open persistent connection to slmaster:6819: Connection refused
sacctmgr: error: slurmdbd: Sending PersistInit msg: Connection refused
sacctmgr: error: Problem talking to the database: Connection refused
[root@slmaster ~]# mysql -u slurm -h localhost -P 3306 -p
Enter password:
Welcome to the MariaDB monitor. Commands end with ; or \g.
Your MariaDB connection id is 7
Server version: 5.5.60-MariaDB MariaDB Server
Copyright (c) 2000, 2018, Oracle, MariaDB Corporation Ab and others.
Type 'help;' or '\h' for help. Type '\c' to clear the current input statement.
MariaDB [(none)]>
[mysqld]
datadir=/var/lib/mysql
socket=/var/lib/mysql/mysql.sock
# Disabling symbolic-links is recommended to prevent assorted security risks
symbolic-links=0
# Settings user and group are ignored when systemd is used.
# If you need to run mysqld under a different user or group,
# customize your systemd unit file for mariadb according to the
# instructions in http://fedoraproject.org/wiki/Systemd
[mysqld_safe]
log-error=/var/log/mariadb/mariadb.log
pid-file=/var/run/mariadb/mariadb.pid
#
# include all files from the config directory
#
!includedir /etc/my.cnf.d
@Chris and @Lachlan,
Thanks for your responses.
I resolved the issue based on hint from Jeffrey in earlier email. I tweaked the location of PID files in slurm config files, but missed to change them in the systemd service definition files.
Making them watch the same PID files did the trick.