[COMMIT scylla-cluster-tests branch-6.0] feature(coredump): use hardlinks to prevert from clearing coredump

0 views
Skip to first unread message

Commit Bot

<bot@cloudius-systems.com>
unread,
Jun 27, 2024, 2:12:16 PMJun 27
to scylladb-dev@googlegroups.com, Israel Fruchter
From: Israel Fruchter <fr...@scylladb.com>
Committer: Israel Fruchter <israel....@gmail.com>
Branch: branch-6.0

feature(coredump): use hardlinks to prevert from clearing coredump

this change is for creating hardlink on any found core
and compressing uploading using the hardlink, so it won't be cleared
by systemd-coredump in the midst of SCT processing of the coredump.

hardlinks are created in `/var/lib/scylla/hardlinks` so it would
share the same pyshical device as the coredumps directory, but still
won't be scan for deletion by systemd-coredump.

Fixes: #7255
(cherry picked from commit fdf25923e0f272121daeab17122387bff5541085)

---
diff --git a/sdcm/coredump.py b/sdcm/coredump.py
--- a/sdcm/coredump.py
+++ b/sdcm/coredump.py
@@ -22,6 +22,7 @@
from threading import Thread, Event
from dataclasses import dataclass
from pathlib import Path
+from contextlib import contextmanager

from sdcm.log import SDCMAdapter
from sdcm.remote import NETWORK_EXCEPTIONS
@@ -223,6 +224,10 @@ def _upload_coredump(self, core_info: CoreDumpInfo):
download_instructions += f'\nunlz4 {coredump.name}'
core_info.download_url, core_info.download_instructions = download_url, download_instructions

+ @contextmanager
+ def hard_link_corefile(self, corefile): # pylint: disable=unused-argument,no-self-use
+ yield
+
def upload_coredump(self, core_info: CoreDumpInfo):
if core_info.download_url:
return False
@@ -232,7 +237,10 @@ def upload_coredump(self, core_info: CoreDumpInfo):
try:
self.log.debug(f'Start uploading file: {core_info.corefile}')
core_info.download_instructions = 'Coredump upload in progress'
- self._upload_coredump(core_info)
+ with self.hard_link_corefile(core_info.corefile) as hard_link:
+ if hard_link:
+ core_info.corefile = str(hard_link)
+ self._upload_coredump(core_info)
return True
except Exception as exc: # pylint: disable=broad-except
core_info.download_instructions = 'failed to upload core'
@@ -308,6 +316,16 @@ def systemd_version(self):
self.log.warning("failed to get systemd version:", exc_info=True)
return systemd_version

+ @contextmanager
+ def hard_link_corefile(self, corefile):
+ hard_links_path = Path(corefile).parent / 'hardlinks'
+ link_path = hard_links_path / Path(corefile).name
+ self.node.remoter.sudo(f'mkdir -p {hard_links_path}', ignore_status=True)
+ self.log.debug(f'doing: ln {corefile} {link_path}')
+ self.node.remoter.sudo(f'ln {corefile} {link_path}', ignore_status=True)
+ yield link_path
+ self.node.remoter.sudo(f'rm -f {link_path}', ignore_status=True)
+
def get_list_of_cores_json(self) -> Optional[List[CoreDumpInfo]]:
result = self.node.remoter.run(
'sudo coredumpctl -q --json=short', verbose=False, ignore_status=True)
diff --git a/unit_tests/test_data/test_coredump/systemd/exceptions_limit_not_reached_test_remoter.json b/unit_tests/test_data/test_coredump/systemd/exceptions_limit_not_reached_test_remoter.json
--- a/unit_tests/test_data/test_coredump/systemd/exceptions_limit_not_reached_test_remoter.json
+++ b/unit_tests/test_data/test_coredump/systemd/exceptions_limit_not_reached_test_remoter.json
@@ -68,13 +68,76 @@
"exit_status": 0
}
],
- "sudo curl --request PUT --fail --show-error --upload-file '/var/lib/systemd/coredump/core.sshd.1000.3ee441d8238246e79d2c30f6619ceeac.307283.1598239861000000000000.lz4' 'https://upload.scylladb.com/core.sshd.1000.3ee441d8238246e79d2c30f6619ceeac.307283.1598239861000000000000./core.sshd.1000.3ee441d8238246e79d2c30f6619ceeac.307283.1598239861000000000000.lz4'": [
+ "sudo curl --request PUT --fail --show-error --upload-file '/var/lib/systemd/coredump/hardlinks/core.sshd.1000.3ee441d8238246e79d2c30f6619ceeac.307283.1598239861000000000000.lz4' 'https://upload.scylladb.com/core.sshd.1000.3ee441d8238246e79d2c30f6619ceeac.307283.1598239861000000000000./core.sshd.1000.3ee441d8238246e79d2c30f6619ceeac.307283.1598239861000000000000.lz4'": [
{
"__instance__": "fabric.runners.Result",
"stderr": " % Total % Received % Xferd Average Speed Time Time Time Current\n Dload Upload Total Spent Left Speed\n\r 0 0 0 0 0 0 0 0 --:--:-- --:--:-- --:--:-- 0\r 0 60.5M 0 0 0 65536 0 153k 0:06:43 --:--:-- 0:06:43 153k\r 5 60.5M 0 0 5 3520k 0 2703k 0:00:22 0:00:01 0:00:21 2701k\r 11 60.5M 0 0 11 7232k 0 3187k 0:00:19 0:00:02 0:00:17 3185k\r 15 60.5M 0 0 15 9856k 0 3062k 0:00:20 0:00:03 0:00:17 3061k\r 20 60.5M 0 0 20 12.3M 0 2993k 0:00:20 0:00:04 0:00:16 2992k\r 25 60.5M 0 0 25 15.4M 0 2997k 0:00:20 0:00:05 0:00:15 3241k\r 30 60.5M 0 0 30 18.3M 0 3017k 0:00:20 0:00:06 0:00:14 3099k\r 35 60.5M 0 0 35 21.4M 0 3027k 0:00:20 0:00:07 0:00:13 2954k\r 40 60.5M 0 0 40 24.5M 0 3050k 0:00:20 0:00:08 0:00:12 3042k\r 45 60.5M 0 0 45 27.4M 0 3045k 0:00:20 0:00:09 0:00:11 3089k\r 50 60.5M 0 0 50 30.6M 0 3061k 0:00:20 0:00:10 0:00:10 3128k\r 55 60.5M 0 0 55 33.8M 0 3086k 0:00:20 0:00:11 0:00:09 3172k\r 61 60.5M 0 0 61 37.1M 0 3112k 0:00:19 0:00:12 0:00:07 3235k\r 67 60.5M 0 0 67 41.0M 0 3170k 0:00:19 0:00:13 0:00:06 3366k\r 74 60.5M 0 0 74 45.1M 0 3249k 0:00:19 0:00:14 0:00:05 3626k\r 82 60.5M 0 0 82 50.0M 0 3362k 0:00:18 0:00:15 0:00:03 3983k\r 92 60.5M 0 0 92 55.7M 0 3520k 0:00:17 0:00:16 0:00:01 4495k\r100 60.5M 100 381 100 60.5M 21 3510k 0:00:18 0:00:17 0:00:01 4408k\r100 60.5M 100 381 100 60.5M 21 3509k 0:00:18 0:00:17 0:00:01 4531k\n",
"stdout": "<?xml version='1.0' encoding='UTF-8'?><Error><Code>AccessDenied</Code><Message>Access denied.</Message><Details>Anonymous caller does not have storage.objects.delete access to upload.scylladb.com/core.python.1000.3ee441d8238246e79d2c30f6619ceeac.1245911.1598259111000000000000./core.python.1000.3ee441d8238246e79d2c30f6619ceeac.1245911.1598259111000000000000.lz4.</Details></Error>",
"exited": 0,
"exit_status": 0
}
+ ],
+ "sudo ln /var/lib/systemd/coredump/core.sshd.1000.3ee441d8238246e79d2c30f6619ceeac.307283.1598239861000000000000.lz4 /var/lib/systemd/coredump/hardlinks/core.sshd.1000.3ee441d8238246e79d2c30f6619ceeac.307283.1598239861000000000000.lz4": [
+ {
+ "__instance__": "fabric.runners.Result",
+ "stdout": "",
+ "stderr": "",
+ "exited": 0,
+ "exit_status": 0
+ }
+ ],
+ "sudo ln /var/lib/systemd/coredump/core.python.1000.3ee441d8238246e79d2c30f6619ceeac.1404017.1598260030000000000000.lz4 /var/lib/systemd/coredump/hardlinks/core.python.1000.3ee441d8238246e79d2c30f6619ceeac.1404017.1598260030000000000000.lz4": [
+ {
+ "__instance__": "fabric.runners.Result",
+ "stdout": "",
+ "stderr": "",
+ "exited": 0,
+ "exit_status": 0
+ }
+ ],
+ "sudo ln /var/lib/systemd/coredump/core.python.1000.3ee441d8238246e79d2c30f6619ceeac.1245911.1598259111000000000000.lz4 /var/lib/systemd/coredump/hardlinks/core.python.1000.3ee441d8238246e79d2c30f6619ceeac.1245911.1598259111000000000000.lz4": [
+ {
+ "__instance__": "fabric.runners.Result",
+ "stdout": "",
+ "stderr": "",
+ "exited": 0,
+ "exit_status": 0
+ }
+ ],
+ "sudo mkdir -p /var/lib/systemd/coredump/hardlinks": [
+ {
+ "__instance__": "fabric.runners.Result",
+ "stdout": "",
+ "stderr": "",
+ "exited": 0,
+ "exit_status": 0
+ }
+ ],
+ "sudo rm -f /var/lib/systemd/coredump/hardlinks/core.python.1000.3ee441d8238246e79d2c30f6619ceeac.1245911.1598259111000000000000.lz4": [
+ {
+ "__instance__": "fabric.runners.Result",
+ "stdout": "",
+ "stderr": "",
+ "exited": 0,
+ "exit_status": 0
+ }
+ ],
+ "sudo rm -f /var/lib/systemd/coredump/hardlinks/core.python.1000.3ee441d8238246e79d2c30f6619ceeac.1404017.1598260030000000000000.lz4": [
+ {
+ "__instance__": "fabric.runners.Result",
+ "stdout": "",
+ "stderr": "",
+ "exited": 0,
+ "exit_status": 0
+ }
+ ],
+ "sudo rm -f /var/lib/systemd/coredump/hardlinks/core.sshd.1000.3ee441d8238246e79d2c30f6619ceeac.307283.1598239861000000000000.lz4": [
+ {
+ "__instance__": "fabric.runners.Result",
+ "stdout": "",
+ "stderr": "",
+ "exited": 0,
+ "exit_status": 0
+ }
]
}
diff --git a/unit_tests/test_data/test_coredump/systemd/exceptions_limit_not_reached_test_results.json b/unit_tests/test_data/test_coredump/systemd/exceptions_limit_not_reached_test_results.json
--- a/unit_tests/test_data/test_coredump/systemd/exceptions_limit_not_reached_test_results.json
+++ b/unit_tests/test_data/test_coredump/systemd/exceptions_limit_not_reached_test_results.json
@@ -25,7 +25,7 @@
{
"__instance__": "sdcm.coredump.CoreDumpInfo",
"pid": "307283",
- "corefile": "/var/lib/systemd/coredump/core.sshd.1000.3ee441d8238246e79d2c30f6619ceeac.307283.1598239861000000000000.lz4",
+ "corefile": "/var/lib/systemd/coredump/hardlinks/core.sshd.1000.3ee441d8238246e79d2c30f6619ceeac.307283.1598239861000000000000.lz4",
"source_timestamp": 1598239861.0,
"coredump_info": " PID: 307283 (sshd)\n UID: 1000 (dkropachev)\n GID: 1000 (dkropachev)\n Signal: 31 (SYS)\n Timestamp: Mon 2020-08-24 10:31:01 +07 (4 days ago)\n Command Line: sshd: dkropachev [net]\n Executable: /usr/sbin/sshd\n Control Group: /user.slice/user-1000.slice/us...@1000.service/gnome-launched-pycharm-professional_pycharm-professional.desktop-6641.scope\n Unit: us...@1000.service\n User Unit: gnome-launched-pycharm-professional_pycharm-professional.desktop-6641.scope\n Slice: user-1000.slice\n Owner UID: 1000 (dkropachev)\n Boot ID: 3ee441d8238246e79d2c30f6619ceeac\n Machine ID: a72dad55f1754c44ad63a008ad3a60a5\n Hostname: dkropahev-pc\n Storage: /var/lib/systemd/coredump/core.sshd.1000.3ee441d8238246e79d2c30f6619ceeac.307283.1598239861000000000000.lz4\n Message: Process 307283 (sshd) of user 1000 dumped core.\n \n Stack trace of thread 307283:\n #0 0x00007fbdaf06177b __socket (libc.so.6 + 0x12377b)\n #1 0x00007fbdaf058b43 openlog_internal (libc.so.6 + 0x11ab43)\n #2 0x00007fbdaf05901f __vsyslog_internal (libc.so.6 + 0x11b01f)\n #3 0x00007fbdaf059333 __syslog_chk (libc.so.6 + 0x11b333)\n #4 0x000055a4e7c12b18 n/a (sshd + 0x5ab18)\n #5 0x000055a4e7c10a2a n/a (sshd + 0x58a2a)\n #6 0x000055a4e7be7f96 n/a (sshd + 0x2ff96)\n #7 0x000055a4e7c12981 n/a (sshd + 0x5a981)\n #8 0x000055a4e7c10a2a n/a (sshd + 0x58a2a)\n #9 0x000055a4e7be80c8 n/a (sshd + 0x300c8)\n #10 0x000055a4e7be97e9 n/a (sshd + 0x317e9)\n #11 0x000055a4e7bc957b n/a (sshd + 0x1157b)\n #12 0x00007fbdaef650b3 __libc_start_main (libc.so.6 + 0x270b3)\n #13 0x000055a4e7bc9b7e n/a (sshd + 0x11b7e)\n",
"download_instructions": "gsutil cp gs://upload.scylladb.com/core.sshd.1000.3ee441d8238246e79d2c30f6619ceeac.307283.1598239861000000000000./core.sshd.1000.3ee441d8238246e79d2c30f6619ceeac.307283.1598239861000000000000.lz4 .\nunlz4 core.sshd.1000.3ee441d8238246e79d2c30f6619ceeac.307283.1598239861000000000000.lz4",
@@ -61,7 +61,7 @@
{
"__instance__": "sdcm.coredump.CoreDumpInfo",
"pid": "307283",
- "corefile": "/var/lib/systemd/coredump/core.sshd.1000.3ee441d8238246e79d2c30f6619ceeac.307283.1598239861000000000000.lz4",
+ "corefile": "/var/lib/systemd/coredump/hardlinks/core.sshd.1000.3ee441d8238246e79d2c30f6619ceeac.307283.1598239861000000000000.lz4",
"source_timestamp": 1598239861.0,
"coredump_info": " PID: 307283 (sshd)\n UID: 1000 (dkropachev)\n GID: 1000 (dkropachev)\n Signal: 31 (SYS)\n Timestamp: Mon 2020-08-24 10:31:01 +07 (4 days ago)\n Command Line: sshd: dkropachev [net]\n Executable: /usr/sbin/sshd\n Control Group: /user.slice/user-1000.slice/us...@1000.service/gnome-launched-pycharm-professional_pycharm-professional.desktop-6641.scope\n Unit: us...@1000.service\n User Unit: gnome-launched-pycharm-professional_pycharm-professional.desktop-6641.scope\n Slice: user-1000.slice\n Owner UID: 1000 (dkropachev)\n Boot ID: 3ee441d8238246e79d2c30f6619ceeac\n Machine ID: a72dad55f1754c44ad63a008ad3a60a5\n Hostname: dkropahev-pc\n Storage: /var/lib/systemd/coredump/core.sshd.1000.3ee441d8238246e79d2c30f6619ceeac.307283.1598239861000000000000.lz4\n Message: Process 307283 (sshd) of user 1000 dumped core.\n \n Stack trace of thread 307283:\n #0 0x00007fbdaf06177b __socket (libc.so.6 + 0x12377b)\n #1 0x00007fbdaf058b43 openlog_internal (libc.so.6 + 0x11ab43)\n #2 0x00007fbdaf05901f __vsyslog_internal (libc.so.6 + 0x11b01f)\n #3 0x00007fbdaf059333 __syslog_chk (libc.so.6 + 0x11b333)\n #4 0x000055a4e7c12b18 n/a (sshd + 0x5ab18)\n #5 0x000055a4e7c10a2a n/a (sshd + 0x58a2a)\n #6 0x000055a4e7be7f96 n/a (sshd + 0x2ff96)\n #7 0x000055a4e7c12981 n/a (sshd + 0x5a981)\n #8 0x000055a4e7c10a2a n/a (sshd + 0x58a2a)\n #9 0x000055a4e7be80c8 n/a (sshd + 0x300c8)\n #10 0x000055a4e7be97e9 n/a (sshd + 0x317e9)\n #11 0x000055a4e7bc957b n/a (sshd + 0x1157b)\n #12 0x00007fbdaef650b3 __libc_start_main (libc.so.6 + 0x270b3)\n #13 0x000055a4e7bc9b7e n/a (sshd + 0x11b7e)\n",
"download_instructions": "gsutil cp gs://upload.scylladb.com/core.sshd.1000.3ee441d8238246e79d2c30f6619ceeac.307283.1598239861000000000000./core.sshd.1000.3ee441d8238246e79d2c30f6619ceeac.307283.1598239861000000000000.lz4 .\nunlz4 core.sshd.1000.3ee441d8238246e79d2c30f6619ceeac.307283.1598239861000000000000.lz4",
@@ -74,7 +74,7 @@
{
"__instance__": "sdcm.coredump.CoreDumpInfo",
"pid": "307283",
- "corefile": "/var/lib/systemd/coredump/core.sshd.1000.3ee441d8238246e79d2c30f6619ceeac.307283.1598239861000000000000.lz4",
+ "corefile": "/var/lib/systemd/coredump/hardlinks/core.sshd.1000.3ee441d8238246e79d2c30f6619ceeac.307283.1598239861000000000000.lz4",
"source_timestamp": 1598239861.0,
"coredump_info": " PID: 307283 (sshd)\n UID: 1000 (dkropachev)\n GID: 1000 (dkropachev)\n Signal: 31 (SYS)\n Timestamp: Mon 2020-08-24 10:31:01 +07 (4 days ago)\n Command Line: sshd: dkropachev [net]\n Executable: /usr/sbin/sshd\n Control Group: /user.slice/user-1000.slice/us...@1000.service/gnome-launched-pycharm-professional_pycharm-professional.desktop-6641.scope\n Unit: us...@1000.service\n User Unit: gnome-launched-pycharm-professional_pycharm-professional.desktop-6641.scope\n Slice: user-1000.slice\n Owner UID: 1000 (dkropachev)\n Boot ID: 3ee441d8238246e79d2c30f6619ceeac\n Machine ID: a72dad55f1754c44ad63a008ad3a60a5\n Hostname: dkropahev-pc\n Storage: /var/lib/systemd/coredump/core.sshd.1000.3ee441d8238246e79d2c30f6619ceeac.307283.1598239861000000000000.lz4\n Message: Process 307283 (sshd) of user 1000 dumped core.\n \n Stack trace of thread 307283:\n #0 0x00007fbdaf06177b __socket (libc.so.6 + 0x12377b)\n #1 0x00007fbdaf058b43 openlog_internal (libc.so.6 + 0x11ab43)\n #2 0x00007fbdaf05901f __vsyslog_internal (libc.so.6 + 0x11b01f)\n #3 0x00007fbdaf059333 __syslog_chk (libc.so.6 + 0x11b333)\n #4 0x000055a4e7c12b18 n/a (sshd + 0x5ab18)\n #5 0x000055a4e7c10a2a n/a (sshd + 0x58a2a)\n #6 0x000055a4e7be7f96 n/a (sshd + 0x2ff96)\n #7 0x000055a4e7c12981 n/a (sshd + 0x5a981)\n #8 0x000055a4e7c10a2a n/a (sshd + 0x58a2a)\n #9 0x000055a4e7be80c8 n/a (sshd + 0x300c8)\n #10 0x000055a4e7be97e9 n/a (sshd + 0x317e9)\n #11 0x000055a4e7bc957b n/a (sshd + 0x1157b)\n #12 0x00007fbdaef650b3 __libc_start_main (libc.so.6 + 0x270b3)\n #13 0x000055a4e7bc9b7e n/a (sshd + 0x11b7e)\n",
"download_instructions": "gsutil cp gs://upload.scylladb.com/core.sshd.1000.3ee441d8238246e79d2c30f6619ceeac.307283.1598239861000000000000./core.sshd.1000.3ee441d8238246e79d2c30f6619ceeac.307283.1598239861000000000000.lz4 .\nunlz4 core.sshd.1000.3ee441d8238246e79d2c30f6619ceeac.307283.1598239861000000000000.lz4",
diff --git a/unit_tests/test_data/test_coredump/systemd/fail_upload_test_remoter.json b/unit_tests/test_data/test_coredump/systemd/fail_upload_test_remoter.json
--- a/unit_tests/test_data/test_coredump/systemd/fail_upload_test_remoter.json
+++ b/unit_tests/test_data/test_coredump/systemd/fail_upload_test_remoter.json
@@ -83,7 +83,7 @@
"exit_status": 0
}
],
- "sudo curl --request PUT --fail --show-error --upload-file '/var/lib/systemd/coredump/core.sshd.1000.3ee441d8238246e79d2c30f6619ceeac.307283.1598239861000000000000.lz4' 'https://upload.scylladb.com/core.sshd.1000.3ee441d8238246e79d2c30f6619ceeac.307283.1598239861000000000000./core.sshd.1000.3ee441d8238246e79d2c30f6619ceeac.307283.1598239861000000000000.lz4'": [
+ "sudo curl --request PUT --fail --show-error --upload-file '/var/lib/systemd/coredump/hardlinks/core.sshd.1000.3ee441d8238246e79d2c30f6619ceeac.307283.1598239861000000000000.lz4' 'https://upload.scylladb.com/core.sshd.1000.3ee441d8238246e79d2c30f6619ceeac.307283.1598239861000000000000./core.sshd.1000.3ee441d8238246e79d2c30f6619ceeac.307283.1598239861000000000000.lz4'": [
{
"__instance__": "invoke.exceptions.UnexpectedExit",
"result": {
@@ -95,7 +95,7 @@
"reason": null
}
],
- "sudo curl --request PUT --fail --show-error --upload-file '/var/lib/systemd/coredump/core.python.1000.3ee441d8238246e79d2c30f6619ceeac.1245911.1598259111000000000000.lz4' 'https://upload.scylladb.com/core.python.1000.3ee441d8238246e79d2c30f6619ceeac.1245911.1598259111000000000000./core.python.1000.3ee441d8238246e79d2c30f6619ceeac.1245911.1598259111000000000000.lz4'": [
+ "sudo curl --request PUT --fail --show-error --upload-file '/var/lib/systemd/coredump/hardlinks/core.python.1000.3ee441d8238246e79d2c30f6619ceeac.1245911.1598259111000000000000.lz4' 'https://upload.scylladb.com/core.python.1000.3ee441d8238246e79d2c30f6619ceeac.1245911.1598259111000000000000./core.python.1000.3ee441d8238246e79d2c30f6619ceeac.1245911.1598259111000000000000.lz4'": [
{
"__instance__": "fabric.runners.Result",
"stderr": " % Total % Received % Xferd Average Speed Time Time Time Current\n Dload Upload Total Spent Left Speed\n\r 0 0 0 0 0 0 0 0 --:--:-- --:--:-- --:--:-- 0\r 0 60.5M 0 0 0 65536 0 153k 0:06:43 --:--:-- 0:06:43 153k\r 5 60.5M 0 0 5 3520k 0 2703k 0:00:22 0:00:01 0:00:21 2701k\r 11 60.5M 0 0 11 7232k 0 3187k 0:00:19 0:00:02 0:00:17 3185k\r 15 60.5M 0 0 15 9856k 0 3062k 0:00:20 0:00:03 0:00:17 3061k\r 20 60.5M 0 0 20 12.3M 0 2993k 0:00:20 0:00:04 0:00:16 2992k\r 25 60.5M 0 0 25 15.4M 0 2997k 0:00:20 0:00:05 0:00:15 3241k\r 30 60.5M 0 0 30 18.3M 0 3017k 0:00:20 0:00:06 0:00:14 3099k\r 35 60.5M 0 0 35 21.4M 0 3027k 0:00:20 0:00:07 0:00:13 2954k\r 40 60.5M 0 0 40 24.5M 0 3050k 0:00:20 0:00:08 0:00:12 3042k\r 45 60.5M 0 0 45 27.4M 0 3045k 0:00:20 0:00:09 0:00:11 3089k\r 50 60.5M 0 0 50 30.6M 0 3061k 0:00:20 0:00:10 0:00:10 3128k\r 55 60.5M 0 0 55 33.8M 0 3086k 0:00:20 0:00:11 0:00:09 3172k\r 61 60.5M 0 0 61 37.1M 0 3112k 0:00:19 0:00:12 0:00:07 3235k\r 67 60.5M 0 0 67 41.0M 0 3170k 0:00:19 0:00:13 0:00:06 3366k\r 74 60.5M 0 0 74 45.1M 0 3249k 0:00:19 0:00:14 0:00:05 3626k\r 82 60.5M 0 0 82 50.0M 0 3362k 0:00:18 0:00:15 0:00:03 3983k\r 92 60.5M 0 0 92 55.7M 0 3520k 0:00:17 0:00:16 0:00:01 4495k\r100 60.5M 100 381 100 60.5M 21 3510k 0:00:18 0:00:17 0:00:01 4408k\r100 60.5M 100 381 100 60.5M 21 3509k 0:00:18 0:00:17 0:00:01 4531k\n",
@@ -104,7 +104,7 @@
"exit_status": 0
}
],
- "sudo curl --request PUT --fail --show-error --upload-file '/var/lib/systemd/coredump/core.python.1000.3ee441d8238246e79d2c30f6619ceeac.1404017.1598260030000000000000.lz4' 'https://upload.scylladb.com/core.python.1000.3ee441d8238246e79d2c30f6619ceeac.1404017.1598260030000000000000./core.python.1000.3ee441d8238246e79d2c30f6619ceeac.1404017.1598260030000000000000.lz4'": [
+ "sudo curl --request PUT --fail --show-error --upload-file '/var/lib/systemd/coredump/hardlinks/core.python.1000.3ee441d8238246e79d2c30f6619ceeac.1404017.1598260030000000000000.lz4' 'https://upload.scylladb.com/core.python.1000.3ee441d8238246e79d2c30f6619ceeac.1404017.1598260030000000000000./core.python.1000.3ee441d8238246e79d2c30f6619ceeac.1404017.1598260030000000000000.lz4'": [
{
"__instance__": "invoke.exceptions.UnexpectedExit",
"result": {
@@ -115,5 +115,68 @@
},
"reason": null
}
+ ],
+ "sudo ln /var/lib/systemd/coredump/core.sshd.1000.3ee441d8238246e79d2c30f6619ceeac.307283.1598239861000000000000.lz4 /var/lib/systemd/coredump/hardlinks/core.sshd.1000.3ee441d8238246e79d2c30f6619ceeac.307283.1598239861000000000000.lz4": [
+ {
+ "__instance__": "fabric.runners.Result",
+ "stdout": "",
+ "stderr": "",
+ "exited": 0,
+ "exit_status": 0
+ }
+ ],
+ "sudo ln /var/lib/systemd/coredump/core.python.1000.3ee441d8238246e79d2c30f6619ceeac.1404017.1598260030000000000000.lz4 /var/lib/systemd/coredump/hardlinks/core.python.1000.3ee441d8238246e79d2c30f6619ceeac.1404017.1598260030000000000000.lz4": [
+ {
+ "__instance__": "fabric.runners.Result",
+ "stdout": "",
+ "stderr": "",
+ "exited": 0,
+ "exit_status": 0
+ }
+ ],
+ "sudo ln /var/lib/systemd/coredump/core.python.1000.3ee441d8238246e79d2c30f6619ceeac.1245911.1598259111000000000000.lz4 /var/lib/systemd/coredump/hardlinks/core.python.1000.3ee441d8238246e79d2c30f6619ceeac.1245911.1598259111000000000000.lz4": [
+ {
+ "__instance__": "fabric.runners.Result",
+ "stdout": "",
+ "stderr": "",
+ "exited": 0,
+ "exit_status": 0
+ }
+ ],
+ "sudo mkdir -p /var/lib/systemd/coredump/hardlinks": [
+ {
+ "__instance__": "fabric.runners.Result",
+ "stdout": "",
+ "stderr": "",
+ "exited": 0,
+ "exit_status": 0
+ }
+ ],
+ "sudo rm -f /var/lib/systemd/coredump/hardlinks/core.sshd.1000.3ee441d8238246e79d2c30f6619ceeac.307283.1598239861000000000000.lz4": [
+ {
+ "__instance__": "fabric.runners.Result",
+ "stdout": "",
+ "stderr": "",
+ "exited": 0,
+ "exit_status": 0
+ }
+ ],
+ "sudo rm -f /var/lib/systemd/coredump/hardlinks/core.python.1000.3ee441d8238246e79d2c30f6619ceeac.1404017.1598260030000000000000.lz4": [
+ {
+ "__instance__": "fabric.runners.Result",
+ "stdout": "",
+ "stderr": "",
+ "exited": 0,
+ "exit_status": 0
+ }
+ ],
+ "sudo rm -f /var/lib/systemd/coredump/hardlinks/core.python.1000.3ee441d8238246e79d2c30f6619ceeac.1245911.1598259111000000000000.lz4": [
+ {
+ "__instance__": "fabric.runners.Result",
+ "stdout": "",
+ "stderr": "",
+ "exited": 0,
+ "exit_status": 0
+ }
]
}
diff --git a/unit_tests/test_data/test_coredump/systemd/fail_upload_test_results.json b/unit_tests/test_data/test_coredump/systemd/fail_upload_test_results.json
--- a/unit_tests/test_data/test_coredump/systemd/fail_upload_test_results.json
+++ b/unit_tests/test_data/test_coredump/systemd/fail_upload_test_results.json
null
diff --git a/unit_tests/test_data/test_coredump/systemd/success_test_remoter.json b/unit_tests/test_data/test_coredump/systemd/success_test_remoter.json
--- a/unit_tests/test_data/test_coredump/systemd/success_test_remoter.json
+++ b/unit_tests/test_data/test_coredump/systemd/success_test_remoter.json
@@ -105,7 +105,7 @@
"exit_status": 0
}
],
- "sudo curl --request PUT --fail --show-error --upload-file '/var/lib/systemd/coredump/core.sshd.1000.3ee441d8238246e79d2c30f6619ceeac.307283.1598239861000000000000.lz4' 'https://upload.scylladb.com/core.sshd.1000.3ee441d8238246e79d2c30f6619ceeac.307283.1598239861000000000000./core.sshd.1000.3ee441d8238246e79d2c30f6619ceeac.307283.1598239861000000000000.lz4'": [
+ "sudo curl --request PUT --fail --show-error --upload-file '/var/lib/systemd/coredump/hardlinks/core.sshd.1000.3ee441d8238246e79d2c30f6619ceeac.307283.1598239861000000000000.lz4' 'https://upload.scylladb.com/core.sshd.1000.3ee441d8238246e79d2c30f6619ceeac.307283.1598239861000000000000./core.sshd.1000.3ee441d8238246e79d2c30f6619ceeac.307283.1598239861000000000000.lz4'": [
{
"__instance__": "invoke.exceptions.UnexpectedExit",
"result": {
@@ -124,7 +124,7 @@
"exit_status": 0
}
],
- "sudo curl --request PUT --fail --show-error --upload-file '/var/lib/systemd/coredump/core.python.1000.3ee441d8238246e79d2c30f6619ceeac.1245911.1598259111000000000000.lz4' 'https://upload.scylladb.com/core.python.1000.3ee441d8238246e79d2c30f6619ceeac.1245911.1598259111000000000000./core.python.1000.3ee441d8238246e79d2c30f6619ceeac.1245911.1598259111000000000000.lz4'": [
+ "sudo curl --request PUT --fail --show-error --upload-file '/var/lib/systemd/coredump/hardlinks/core.python.1000.3ee441d8238246e79d2c30f6619ceeac.1245911.1598259111000000000000.lz4' 'https://upload.scylladb.com/core.python.1000.3ee441d8238246e79d2c30f6619ceeac.1245911.1598259111000000000000./core.python.1000.3ee441d8238246e79d2c30f6619ceeac.1245911.1598259111000000000000.lz4'": [
{
"__instance__": "fabric.runners.Result",
"stderr": " % Total % Received % Xferd Average Speed Time Time Time Current\n Dload Upload Total Spent Left Speed\n\r 0 0 0 0 0 0 0 0 --:--:-- --:--:-- --:--:-- 0\r 0 60.5M 0 0 0 65536 0 153k 0:06:43 --:--:-- 0:06:43 153k\r 5 60.5M 0 0 5 3520k 0 2703k 0:00:22 0:00:01 0:00:21 2701k\r 11 60.5M 0 0 11 7232k 0 3187k 0:00:19 0:00:02 0:00:17 3185k\r 15 60.5M 0 0 15 9856k 0 3062k 0:00:20 0:00:03 0:00:17 3061k\r 20 60.5M 0 0 20 12.3M 0 2993k 0:00:20 0:00:04 0:00:16 2992k\r 25 60.5M 0 0 25 15.4M 0 2997k 0:00:20 0:00:05 0:00:15 3241k\r 30 60.5M 0 0 30 18.3M 0 3017k 0:00:20 0:00:06 0:00:14 3099k\r 35 60.5M 0 0 35 21.4M 0 3027k 0:00:20 0:00:07 0:00:13 2954k\r 40 60.5M 0 0 40 24.5M 0 3050k 0:00:20 0:00:08 0:00:12 3042k\r 45 60.5M 0 0 45 27.4M 0 3045k 0:00:20 0:00:09 0:00:11 3089k\r 50 60.5M 0 0 50 30.6M 0 3061k 0:00:20 0:00:10 0:00:10 3128k\r 55 60.5M 0 0 55 33.8M 0 3086k 0:00:20 0:00:11 0:00:09 3172k\r 61 60.5M 0 0 61 37.1M 0 3112k 0:00:19 0:00:12 0:00:07 3235k\r 67 60.5M 0 0 67 41.0M 0 3170k 0:00:19 0:00:13 0:00:06 3366k\r 74 60.5M 0 0 74 45.1M 0 3249k 0:00:19 0:00:14 0:00:05 3626k\r 82 60.5M 0 0 82 50.0M 0 3362k 0:00:18 0:00:15 0:00:03 3983k\r 92 60.5M 0 0 92 55.7M 0 3520k 0:00:17 0:00:16 0:00:01 4495k\r100 60.5M 100 381 100 60.5M 21 3510k 0:00:18 0:00:17 0:00:01 4408k\r100 60.5M 100 381 100 60.5M 21 3509k 0:00:18 0:00:17 0:00:01 4531k\n",
@@ -133,7 +133,7 @@
"exit_status": 0
}
],
- "sudo curl --request PUT --fail --show-error --upload-file '/var/lib/systemd/coredump/core.python.1000.3ee441d8238246e79d2c30f6619ceeac.1404017.1598260030000000000000.lz4' 'https://upload.scylladb.com/core.python.1000.3ee441d8238246e79d2c30f6619ceeac.1404017.1598260030000000000000./core.python.1000.3ee441d8238246e79d2c30f6619ceeac.1404017.1598260030000000000000.lz4'": [
+ "sudo curl --request PUT --fail --show-error --upload-file '/var/lib/systemd/coredump/hardlinks/core.python.1000.3ee441d8238246e79d2c30f6619ceeac.1404017.1598260030000000000000.lz4' 'https://upload.scylladb.com/core.python.1000.3ee441d8238246e79d2c30f6619ceeac.1404017.1598260030000000000000./core.python.1000.3ee441d8238246e79d2c30f6619ceeac.1404017.1598260030000000000000.lz4'": [
{
"__instance__": "invoke.exceptions.UnexpectedExit",
"result": {
@@ -151,5 +151,68 @@
"exited": 0,
"exit_status": 0
}
+ ],
+ "sudo ln /var/lib/systemd/coredump/core.sshd.1000.3ee441d8238246e79d2c30f6619ceeac.307283.1598239861000000000000.lz4 /var/lib/systemd/coredump/hardlinks/core.sshd.1000.3ee441d8238246e79d2c30f6619ceeac.307283.1598239861000000000000.lz4": [
+ {
+ "__instance__": "fabric.runners.Result",
+ "stdout": "",
+ "stderr": "",
+ "exited": 0,
+ "exit_status": 0
+ }
+ ],
+ "sudo ln /var/lib/systemd/coredump/core.python.1000.3ee441d8238246e79d2c30f6619ceeac.1404017.1598260030000000000000.lz4 /var/lib/systemd/coredump/hardlinks/core.python.1000.3ee441d8238246e79d2c30f6619ceeac.1404017.1598260030000000000000.lz4": [
+ {
+ "__instance__": "fabric.runners.Result",
+ "stdout": "",
+ "stderr": "",
+ "exited": 0,
+ "exit_status": 0
+ }
+ ],
+ "sudo ln /var/lib/systemd/coredump/core.python.1000.3ee441d8238246e79d2c30f6619ceeac.1245911.1598259111000000000000.lz4 /var/lib/systemd/coredump/hardlinks/core.python.1000.3ee441d8238246e79d2c30f6619ceeac.1245911.1598259111000000000000.lz4": [
+ {
+ "__instance__": "fabric.runners.Result",
+ "stdout": "",
+ "stderr": "",
+ "exited": 0,
+ "exit_status": 0
+ }
+ ],
+ "sudo mkdir -p /var/lib/systemd/coredump/hardlinks": [
+ {
+ "__instance__": "fabric.runners.Result",
+ "stdout": "",
+ "stderr": "",
+ "exited": 0,
+ "exit_status": 0
+ }
+ ],
+ "sudo rm -f /var/lib/systemd/coredump/hardlinks/core.sshd.1000.3ee441d8238246e79d2c30f6619ceeac.307283.1598239861000000000000.lz4": [
+ {
+ "__instance__": "fabric.runners.Result",
+ "stdout": "",
+ "stderr": "",
+ "exited": 0,
+ "exit_status": 0
+ }
+ ],
+ "sudo rm -f /var/lib/systemd/coredump/hardlinks/core.python.1000.3ee441d8238246e79d2c30f6619ceeac.1404017.1598260030000000000000.lz4": [
+ {
+ "__instance__": "fabric.runners.Result",
+ "stdout": "",
+ "stderr": "",
+ "exited": 0,
+ "exit_status": 0
+ }
+ ],
+ "sudo rm -f /var/lib/systemd/coredump/hardlinks/core.python.1000.3ee441d8238246e79d2c30f6619ceeac.1245911.1598259111000000000000.lz4": [
+ {
+ "__instance__": "fabric.runners.Result",
+ "stdout": "",
+ "stderr": "",
+ "exited": 0,
+ "exit_status": 0
+ }
]
}
diff --git a/unit_tests/test_data/test_coredump/systemd/success_test_results.json b/unit_tests/test_data/test_coredump/systemd/success_test_results.json
--- a/unit_tests/test_data/test_coredump/systemd/success_test_results.json
+++ b/unit_tests/test_data/test_coredump/systemd/success_test_results.json
null
diff --git a/unit_tests/test_data/test_coredump/systemd/success_test_systemd_248_remoter.json b/unit_tests/test_data/test_coredump/systemd/success_test_systemd_248_remoter.json
--- a/unit_tests/test_data/test_coredump/systemd/success_test_systemd_248_remoter.json
+++ b/unit_tests/test_data/test_coredump/systemd/success_test_systemd_248_remoter.json
@@ -114,13 +114,40 @@
"exit_status": 0
}
],
- "sudo curl --request PUT --fail --show-error --upload-file '/var/lib/systemd/coredump/core.scylla.112.ff0f302ef93d4366812e6c5bcca67a90.5348.1669637980000000.lz4' 'https://upload.scylladb.com/core.scylla.112.ff0f302ef93d4366812e6c5bcca67a90.5348.1669637980000000./core.scylla.112.ff0f302ef93d4366812e6c5bcca67a90.5348.1669637980000000.lz4'": [
+ "sudo curl --request PUT --fail --show-error --upload-file '/var/lib/systemd/coredump/hardlinks/core.scylla.112.ff0f302ef93d4366812e6c5bcca67a90.5348.1669637980000000.lz4' 'https://upload.scylladb.com/core.scylla.112.ff0f302ef93d4366812e6c5bcca67a90.5348.1669637980000000./core.scylla.112.ff0f302ef93d4366812e6c5bcca67a90.5348.1669637980000000.lz4'": [
{
"__instance__": "fabric.runners.Result",
"stderr": " % Total % Received % Xferd Average Speed Time Time Time Current\n Dload Upload Total Spent Left Speed\n\r 0 0 0 0 0 0 0 0 --:--:-- --:--:-- --:--:-- 0\r 0 60.5M 0 0 0 65536 0 153k 0:06:43 --:--:-- 0:06:43 153k\r 5 60.5M 0 0 5 3520k 0 2703k 0:00:22 0:00:01 0:00:21 2701k\r 11 60.5M 0 0 11 7232k 0 3187k 0:00:19 0:00:02 0:00:17 3185k\r 15 60.5M 0 0 15 9856k 0 3062k 0:00:20 0:00:03 0:00:17 3061k\r 20 60.5M 0 0 20 12.3M 0 2993k 0:00:20 0:00:04 0:00:16 2992k\r 25 60.5M 0 0 25 15.4M 0 2997k 0:00:20 0:00:05 0:00:15 3241k\r 30 60.5M 0 0 30 18.3M 0 3017k 0:00:20 0:00:06 0:00:14 3099k\r 35 60.5M 0 0 35 21.4M 0 3027k 0:00:20 0:00:07 0:00:13 2954k\r 40 60.5M 0 0 40 24.5M 0 3050k 0:00:20 0:00:08 0:00:12 3042k\r 45 60.5M 0 0 45 27.4M 0 3045k 0:00:20 0:00:09 0:00:11 3089k\r 50 60.5M 0 0 50 30.6M 0 3061k 0:00:20 0:00:10 0:00:10 3128k\r 55 60.5M 0 0 55 33.8M 0 3086k 0:00:20 0:00:11 0:00:09 3172k\r 61 60.5M 0 0 61 37.1M 0 3112k 0:00:19 0:00:12 0:00:07 3235k\r 67 60.5M 0 0 67 41.0M 0 3170k 0:00:19 0:00:13 0:00:06 3366k\r 74 60.5M 0 0 74 45.1M 0 3249k 0:00:19 0:00:14 0:00:05 3626k\r 82 60.5M 0 0 82 50.0M 0 3362k 0:00:18 0:00:15 0:00:03 3983k\r 92 60.5M 0 0 92 55.7M 0 3520k 0:00:17 0:00:16 0:00:01 4495k\r100 60.5M 100 381 100 60.5M 21 3510k 0:00:18 0:00:17 0:00:01 4408k\r100 60.5M 100 381 100 60.5M 21 3509k 0:00:18 0:00:17 0:00:01 4531k\n",
"stdout": "<?xml version='1.0' encoding='UTF-8'?><Error><Code>AccessDenied</Code><Message>Access denied.</Message><Details>Anonymous caller does not have storage.objects.delete access to upload.scylladb.com/core.python.1000.3ee441d8238246e79d2c30f6619ceeac.1245911.1598259111000000000000./core.python.1000.3ee441d8238246e79d2c30f6619ceeac.1245911.1598259111000000000000.lz4.</Details></Error>",
"exited": 0,
"exit_status": 0
}
+ ],
+ "sudo ln /var/lib/systemd/coredump/core.scylla.112.ff0f302ef93d4366812e6c5bcca67a90.5348.1669637980000000.lz4 /var/lib/systemd/coredump/hardlinks/core.scylla.112.ff0f302ef93d4366812e6c5bcca67a90.5348.1669637980000000.lz4": [
+ {
+ "__instance__": "fabric.runners.Result",
+ "stdout": "",
+ "stderr": "",
+ "exited": 0,
+ "exit_status": 0
+ }
+ ],
+ "sudo mkdir -p /var/lib/systemd/coredump/hardlinks": [
+ {
+ "__instance__": "fabric.runners.Result",
+ "stdout": "",
+ "stderr": "",
+ "exited": 0,
+ "exit_status": 0
+ }
+ ],
+ "sudo rm -f /var/lib/systemd/coredump/hardlinks/core.scylla.112.ff0f302ef93d4366812e6c5bcca67a90.5348.1669637980000000.lz4": [
+ {
+ "__instance__": "fabric.runners.Result",
+ "stdout": "",
+ "stderr": "",
+ "exited": 0,
+ "exit_status": 0
+ }
]
}
diff --git a/unit_tests/test_data/test_coredump/systemd/success_test_systemd_248_results.json b/unit_tests/test_data/test_coredump/systemd/success_test_systemd_248_results.json
--- a/unit_tests/test_data/test_coredump/systemd/success_test_systemd_248_results.json
+++ b/unit_tests/test_data/test_coredump/systemd/success_test_systemd_248_results.json
@@ -14,7 +14,7 @@
{
"__instance__": "sdcm.coredump.CoreDumpInfo",
"pid": "5348",
- "corefile": "/var/lib/systemd/coredump/core.scylla.112.ff0f302ef93d4366812e6c5bcca67a90.5348.1669637980000000.lz4",
+ "corefile": "/var/lib/systemd/coredump/hardlinks/core.scylla.112.ff0f302ef93d4366812e6c5bcca67a90.5348.1669637980000000.lz4",
"source_timestamp": 1669637980.0,
"coredump_info": " PID: 5348 (scylla)\n UID: 112 (scylla)\n GID: 118 (scylla)\n Signal: 6 (ABRT)\n Timestamp: Mon 2022-11-28 12:19:40 UTC (15min ago)\n Command Line: /usr/bin/scylla --blocked-reactor-notify-ms 25 --abort-on-lsa-bad-alloc 1 --abort-on-seastar-bad-alloc --abort-on-internal-error 1 --abort-on-ebadf 1 --enable-sstable-key-validation 1 --smp 5 --log-to-syslog 1 --log-to-stdout 0 --default-log-level info --network-stack posix --io-properties-file=/etc/scylla.d/io_properties.yaml --cpuset 1-7 --lock-memory=1\n Executable: /opt/scylladb/libexec/scylla\n Control Group: /scylla.slice/scylla-server.slice/scylla-server.service\n Unit: scylla-server.service\n Slice: scylla-server.slice\n Boot ID: ff0f302ef93d4366812e6c5bcca67a90\n Machine ID: 4f7707a0ac3e44c5894dd86302dfd3b6\n Hostname: longevity-large-partitions-3h-fix-5-db-node-8b477908-1\n Storage: /var/lib/systemd/coredump/core.scylla.112.ff0f302ef93d4366812e6c5bcca67a90.5348.1669637980000000.lz4 (present)\n Disk Size: 55.6G\n Message: Process 5348 (scylla) of user 112 dumped core.\n \n Stack trace of thread 5352:\n #0 0x00007f233e38a6bd syscall (libc.so.6 + 0x1046bd)\n #1 0x0000000004f94f01 _ZN7seastar8internal13io_pgeteventsEmllPNS0_9linux_abi8io_eventEPK8timespecPK10__sigset_tb (scylla + 0x4d94f01)\n #2 0x0000000004f90c75 _ZN7seastar19reactor_backend_aio12await_eventsEiPK10__sigset_t (scylla + 0x4d90c75)\n #3 0x0000000004f91390 _ZN7seastar19reactor_backend_aio23wait_and_process_eventsEPK10__sigset_t (scylla + 0x4d91390)\n #4 0x0000000004f52b7d _ZN7seastar7reactor6do_runEv (scylla + 0x4d52b7d)\n #5 0x0000000004f6f892 _ZNSt17_Function_handlerIFvvEZN7seastar3smp9configureERKNS1_11smp_optionsERKNS1_15reactor_optionsEE4$_90E9_M_invokeERKSt9_Any_data (scylla + 0x4d6f892)\n #6 0x0000000004f2484b _ZN7seastar12posix_thread13start_routineEPv (scylla + 0x4d2484b)\n #7 0x00007f233e31114d start_thread (libc.so.6 + 0x8b14d)\n #8 0x00007f233e392950 __clone3 (libc.so.6 + 0x10c950)\n",
"download_instructions": "gsutil cp gs://upload.scylladb.com/core.scylla.112.ff0f302ef93d4366812e6c5bcca67a90.5348.1669637980000000./core.scylla.112.ff0f302ef93d4366812e6c5bcca67a90.5348.1669637980000000.lz4 .\nunlz4 core.scylla.112.ff0f302ef93d4366812e6c5bcca67a90.5348.1669637980000000.lz4",
@@ -39,7 +39,7 @@
{
"__instance__": "sdcm.coredump.CoreDumpInfo",
"pid": "5348",
- "corefile": "/var/lib/systemd/coredump/core.scylla.112.ff0f302ef93d4366812e6c5bcca67a90.5348.1669637980000000.lz4",
+ "corefile": "/var/lib/systemd/coredump/hardlinks/core.scylla.112.ff0f302ef93d4366812e6c5bcca67a90.5348.1669637980000000.lz4",
"source_timestamp": 1669637980.0,
"coredump_info": " PID: 5348 (scylla)\n UID: 112 (scylla)\n GID: 118 (scylla)\n Signal: 6 (ABRT)\n Timestamp: Mon 2022-11-28 12:19:40 UTC (15min ago)\n Command Line: /usr/bin/scylla --blocked-reactor-notify-ms 25 --abort-on-lsa-bad-alloc 1 --abort-on-seastar-bad-alloc --abort-on-internal-error 1 --abort-on-ebadf 1 --enable-sstable-key-validation 1 --smp 5 --log-to-syslog 1 --log-to-stdout 0 --default-log-level info --network-stack posix --io-properties-file=/etc/scylla.d/io_properties.yaml --cpuset 1-7 --lock-memory=1\n Executable: /opt/scylladb/libexec/scylla\n Control Group: /scylla.slice/scylla-server.slice/scylla-server.service\n Unit: scylla-server.service\n Slice: scylla-server.slice\n Boot ID: ff0f302ef93d4366812e6c5bcca67a90\n Machine ID: 4f7707a0ac3e44c5894dd86302dfd3b6\n Hostname: longevity-large-partitions-3h-fix-5-db-node-8b477908-1\n Storage: /var/lib/systemd/coredump/core.scylla.112.ff0f302ef93d4366812e6c5bcca67a90.5348.1669637980000000.lz4 (present)\n Disk Size: 55.6G\n Message: Process 5348 (scylla) of user 112 dumped core.\n \n Stack trace of thread 5352:\n #0 0x00007f233e38a6bd syscall (libc.so.6 + 0x1046bd)\n #1 0x0000000004f94f01 _ZN7seastar8internal13io_pgeteventsEmllPNS0_9linux_abi8io_eventEPK8timespecPK10__sigset_tb (scylla + 0x4d94f01)\n #2 0x0000000004f90c75 _ZN7seastar19reactor_backend_aio12await_eventsEiPK10__sigset_t (scylla + 0x4d90c75)\n #3 0x0000000004f91390 _ZN7seastar19reactor_backend_aio23wait_and_process_eventsEPK10__sigset_t (scylla + 0x4d91390)\n #4 0x0000000004f52b7d _ZN7seastar7reactor6do_runEv (scylla + 0x4d52b7d)\n #5 0x0000000004f6f892 _ZNSt17_Function_handlerIFvvEZN7seastar3smp9configureERKNS1_11smp_optionsERKNS1_15reactor_optionsEE4$_90E9_M_invokeERKSt9_Any_data (scylla + 0x4d6f892)\n #6 0x0000000004f2484b _ZN7seastar12posix_thread13start_routineEPv (scylla + 0x4d2484b)\n #7 0x00007f233e31114d start_thread (libc.so.6 + 0x8b14d)\n #8 0x00007f233e392950 __clone3 (libc.so.6 + 0x10c950)\n",
"download_instructions": "gsutil cp gs://upload.scylladb.com/core.scylla.112.ff0f302ef93d4366812e6c5bcca67a90.5348.1669637980000000./core.scylla.112.ff0f302ef93d4366812e6c5bcca67a90.5348.1669637980000000.lz4 .\nunlz4 core.scylla.112.ff0f302ef93d4366812e6c5bcca67a90.5348.1669637980000000.lz4",

Commit Bot

<bot@cloudius-systems.com>
unread,
Jun 30, 2024, 6:22:39 AMJun 30
to scylladb-dev@googlegroups.com, Israel Fruchter
From: Israel Fruchter <fr...@scylladb.com>
Committer: Israel Fruchter <israel....@gmail.com>
Branch: branch-2023.1

feature(coredump): use hardlinks to prevert from clearing coredump

this change is for creating hardlink on any found core
and compressing uploading using the hardlink, so it won't be cleared
by systemd-coredump in the midst of SCT processing of the coredump.

hardlinks are created in `/var/lib/scylla/hardlinks` so it would
share the same pyshical device as the coredumps directory, but still
won't be scan for deletion by systemd-coredump.

Fixes: #7255
(cherry picked from commit fdf25923e0f272121daeab17122387bff5541085)

---
diff --git a/sdcm/coredump.py b/sdcm/coredump.py
--- a/sdcm/coredump.py
+++ b/sdcm/coredump.py
@@ -21,6 +21,8 @@
from functools import cached_property
from threading import Thread, Event
from dataclasses import dataclass
+from pathlib import Path
+from contextlib import contextmanager

from sdcm.log import SDCMAdapter
from sdcm.remote import NETWORK_EXCEPTIONS
@@ -214,6 +216,10 @@ def _upload_coredump(self, core_info: CoreDumpInfo):
download_instructions = 'gsutil cp gs://%s .\ngunzip %s' % (upload_url, coredump.rsplit('/', 1)[-1])
core_info.download_url, core_info.download_instructions = download_url, download_instructions

+ @contextmanager
+ def hard_link_corefile(self, corefile): # pylint: disable=unused-argument,no-self-use
+ yield
+
def upload_coredump(self, core_info: CoreDumpInfo):
if core_info.download_url:
return False
@@ -223,7 +229,10 @@ def upload_coredump(self, core_info: CoreDumpInfo):
try:
self.log.debug(f'Start uploading file: {core_info.corefile}')
core_info.download_instructions = 'Coredump upload in progress'
- self._upload_coredump(core_info)
+ with self.hard_link_corefile(core_info.corefile) as hard_link:
+ if hard_link:
+ core_info.corefile = str(hard_link)
+ self._upload_coredump(core_info)
return True
except Exception as exc: # pylint: disable=broad-except
core_info.download_instructions = 'failed to upload core'
@@ -299,6 +308,16 @@ def systemd_version(self):
"download_instructions": "gsutil cp gs://upload.scylladb.com/core.sshd.1000.3ee441d8238246e79d2c30f6619ceeac.307283.1598239861000000000000./core.sshd.1000.3ee441d8238246e79d2c30f6619ceeac.307283.1598239861000000000000.lz4 .\ngunzip core.sshd.1000.3ee441d8238246e79d2c30f6619ceeac.307283.1598239861000000000000.lz4",
@@ -61,7 +61,7 @@
{
"__instance__": "sdcm.coredump.CoreDumpInfo",
"pid": "307283",
- "corefile": "/var/lib/systemd/coredump/core.sshd.1000.3ee441d8238246e79d2c30f6619ceeac.307283.1598239861000000000000.lz4",
+ "corefile": "/var/lib/systemd/coredump/hardlinks/core.sshd.1000.3ee441d8238246e79d2c30f6619ceeac.307283.1598239861000000000000.lz4",
"source_timestamp": 1598239861.0,
"coredump_info": " PID: 307283 (sshd)\n UID: 1000 (dkropachev)\n GID: 1000 (dkropachev)\n Signal: 31 (SYS)\n Timestamp: Mon 2020-08-24 10:31:01 +07 (4 days ago)\n Command Line: sshd: dkropachev [net]\n Executable: /usr/sbin/sshd\n Control Group: /user.slice/user-1000.slice/us...@1000.service/gnome-launched-pycharm-professional_pycharm-professional.desktop-6641.scope\n Unit: us...@1000.service\n User Unit: gnome-launched-pycharm-professional_pycharm-professional.desktop-6641.scope\n Slice: user-1000.slice\n Owner UID: 1000 (dkropachev)\n Boot ID: 3ee441d8238246e79d2c30f6619ceeac\n Machine ID: a72dad55f1754c44ad63a008ad3a60a5\n Hostname: dkropahev-pc\n Storage: /var/lib/systemd/coredump/core.sshd.1000.3ee441d8238246e79d2c30f6619ceeac.307283.1598239861000000000000.lz4\n Message: Process 307283 (sshd) of user 1000 dumped core.\n \n Stack trace of thread 307283:\n #0 0x00007fbdaf06177b __socket (libc.so.6 + 0x12377b)\n #1 0x00007fbdaf058b43 openlog_internal (libc.so.6 + 0x11ab43)\n #2 0x00007fbdaf05901f __vsyslog_internal (libc.so.6 + 0x11b01f)\n #3 0x00007fbdaf059333 __syslog_chk (libc.so.6 + 0x11b333)\n #4 0x000055a4e7c12b18 n/a (sshd + 0x5ab18)\n #5 0x000055a4e7c10a2a n/a (sshd + 0x58a2a)\n #6 0x000055a4e7be7f96 n/a (sshd + 0x2ff96)\n #7 0x000055a4e7c12981 n/a (sshd + 0x5a981)\n #8 0x000055a4e7c10a2a n/a (sshd + 0x58a2a)\n #9 0x000055a4e7be80c8 n/a (sshd + 0x300c8)\n #10 0x000055a4e7be97e9 n/a (sshd + 0x317e9)\n #11 0x000055a4e7bc957b n/a (sshd + 0x1157b)\n #12 0x00007fbdaef650b3 __libc_start_main (libc.so.6 + 0x270b3)\n #13 0x000055a4e7bc9b7e n/a (sshd + 0x11b7e)\n",
"download_instructions": "gsutil cp gs://upload.scylladb.com/core.sshd.1000.3ee441d8238246e79d2c30f6619ceeac.307283.1598239861000000000000./core.sshd.1000.3ee441d8238246e79d2c30f6619ceeac.307283.1598239861000000000000.lz4 .\ngunzip core.sshd.1000.3ee441d8238246e79d2c30f6619ceeac.307283.1598239861000000000000.lz4",
@@ -74,7 +74,7 @@
{
"__instance__": "sdcm.coredump.CoreDumpInfo",
"pid": "307283",
- "corefile": "/var/lib/systemd/coredump/core.sshd.1000.3ee441d8238246e79d2c30f6619ceeac.307283.1598239861000000000000.lz4",
+ "corefile": "/var/lib/systemd/coredump/hardlinks/core.sshd.1000.3ee441d8238246e79d2c30f6619ceeac.307283.1598239861000000000000.lz4",
"source_timestamp": 1598239861.0,
"coredump_info": " PID: 307283 (sshd)\n UID: 1000 (dkropachev)\n GID: 1000 (dkropachev)\n Signal: 31 (SYS)\n Timestamp: Mon 2020-08-24 10:31:01 +07 (4 days ago)\n Command Line: sshd: dkropachev [net]\n Executable: /usr/sbin/sshd\n Control Group: /user.slice/user-1000.slice/us...@1000.service/gnome-launched-pycharm-professional_pycharm-professional.desktop-6641.scope\n Unit: us...@1000.service\n User Unit: gnome-launched-pycharm-professional_pycharm-professional.desktop-6641.scope\n Slice: user-1000.slice\n Owner UID: 1000 (dkropachev)\n Boot ID: 3ee441d8238246e79d2c30f6619ceeac\n Machine ID: a72dad55f1754c44ad63a008ad3a60a5\n Hostname: dkropahev-pc\n Storage: /var/lib/systemd/coredump/core.sshd.1000.3ee441d8238246e79d2c30f6619ceeac.307283.1598239861000000000000.lz4\n Message: Process 307283 (sshd) of user 1000 dumped core.\n \n Stack trace of thread 307283:\n #0 0x00007fbdaf06177b __socket (libc.so.6 + 0x12377b)\n #1 0x00007fbdaf058b43 openlog_internal (libc.so.6 + 0x11ab43)\n #2 0x00007fbdaf05901f __vsyslog_internal (libc.so.6 + 0x11b01f)\n #3 0x00007fbdaf059333 __syslog_chk (libc.so.6 + 0x11b333)\n #4 0x000055a4e7c12b18 n/a (sshd + 0x5ab18)\n #5 0x000055a4e7c10a2a n/a (sshd + 0x58a2a)\n #6 0x000055a4e7be7f96 n/a (sshd + 0x2ff96)\n #7 0x000055a4e7c12981 n/a (sshd + 0x5a981)\n #8 0x000055a4e7c10a2a n/a (sshd + 0x58a2a)\n #9 0x000055a4e7be80c8 n/a (sshd + 0x300c8)\n #10 0x000055a4e7be97e9 n/a (sshd + 0x317e9)\n #11 0x000055a4e7bc957b n/a (sshd + 0x1157b)\n #12 0x00007fbdaef650b3 __libc_start_main (libc.so.6 + 0x270b3)\n #13 0x000055a4e7bc9b7e n/a (sshd + 0x11b7e)\n",
"download_instructions": "gsutil cp gs://upload.scylladb.com/core.sshd.1000.3ee441d8238246e79d2c30f6619ceeac.307283.1598239861000000000000./core.sshd.1000.3ee441d8238246e79d2c30f6619ceeac.307283.1598239861000000000000.lz4 .\ngunzip core.sshd.1000.3ee441d8238246e79d2c30f6619ceeac.307283.1598239861000000000000.lz4",
"download_instructions": "gsutil cp gs://upload.scylladb.com/core.scylla.112.ff0f302ef93d4366812e6c5bcca67a90.5348.1669637980000000./core.scylla.112.ff0f302ef93d4366812e6c5bcca67a90.5348.1669637980000000.lz4 .\ngunzip core.scylla.112.ff0f302ef93d4366812e6c5bcca67a90.5348.1669637980000000.lz4",
@@ -39,7 +39,7 @@
{
"__instance__": "sdcm.coredump.CoreDumpInfo",
"pid": "5348",
- "corefile": "/var/lib/systemd/coredump/core.scylla.112.ff0f302ef93d4366812e6c5bcca67a90.5348.1669637980000000.lz4",
+ "corefile": "/var/lib/systemd/coredump/hardlinks/core.scylla.112.ff0f302ef93d4366812e6c5bcca67a90.5348.1669637980000000.lz4",
"source_timestamp": 1669637980.0,
"coredump_info": " PID: 5348 (scylla)\n UID: 112 (scylla)\n GID: 118 (scylla)\n Signal: 6 (ABRT)\n Timestamp: Mon 2022-11-28 12:19:40 UTC (15min ago)\n Command Line: /usr/bin/scylla --blocked-reactor-notify-ms 25 --abort-on-lsa-bad-alloc 1 --abort-on-seastar-bad-alloc --abort-on-internal-error 1 --abort-on-ebadf 1 --enable-sstable-key-validation 1 --smp 5 --log-to-syslog 1 --log-to-stdout 0 --default-log-level info --network-stack posix --io-properties-file=/etc/scylla.d/io_properties.yaml --cpuset 1-7 --lock-memory=1\n Executable: /opt/scylladb/libexec/scylla\n Control Group: /scylla.slice/scylla-server.slice/scylla-server.service\n Unit: scylla-server.service\n Slice: scylla-server.slice\n Boot ID: ff0f302ef93d4366812e6c5bcca67a90\n Machine ID: 4f7707a0ac3e44c5894dd86302dfd3b6\n Hostname: longevity-large-partitions-3h-fix-5-db-node-8b477908-1\n Storage: /var/lib/systemd/coredump/core.scylla.112.ff0f302ef93d4366812e6c5bcca67a90.5348.1669637980000000.lz4 (present)\n Disk Size: 55.6G\n Message: Process 5348 (scylla) of user 112 dumped core.\n \n Stack trace of thread 5352:\n #0 0x00007f233e38a6bd syscall (libc.so.6 + 0x1046bd)\n #1 0x0000000004f94f01 _ZN7seastar8internal13io_pgeteventsEmllPNS0_9linux_abi8io_eventEPK8timespecPK10__sigset_tb (scylla + 0x4d94f01)\n #2 0x0000000004f90c75 _ZN7seastar19reactor_backend_aio12await_eventsEiPK10__sigset_t (scylla + 0x4d90c75)\n #3 0x0000000004f91390 _ZN7seastar19reactor_backend_aio23wait_and_process_eventsEPK10__sigset_t (scylla + 0x4d91390)\n #4 0x0000000004f52b7d _ZN7seastar7reactor6do_runEv (scylla + 0x4d52b7d)\n #5 0x0000000004f6f892 _ZNSt17_Function_handlerIFvvEZN7seastar3smp9configureERKNS1_11smp_optionsERKNS1_15reactor_optionsEE4$_90E9_M_invokeERKSt9_Any_data (scylla + 0x4d6f892)\n #6 0x0000000004f2484b _ZN7seastar12posix_thread13start_routineEPv (scylla + 0x4d2484b)\n #7 0x00007f233e31114d start_thread (libc.so.6 + 0x8b14d)\n #8 0x00007f233e392950 __clone3 (libc.so.6 + 0x10c950)\n",
"download_instructions": "gsutil cp gs://upload.scylladb.com/core.scylla.112.ff0f302ef93d4366812e6c5bcca67a90.5348.1669637980000000./core.scylla.112.ff0f302ef93d4366812e6c5bcca67a90.5348.1669637980000000.lz4 .\ngunzip core.scylla.112.ff0f302ef93d4366812e6c5bcca67a90.5348.1669637980000000.lz4",

Commit Bot

<bot@cloudius-systems.com>
unread,
Jun 30, 2024, 6:22:59 AMJun 30
to scylladb-dev@googlegroups.com, Israel Fruchter
From: Israel Fruchter <fr...@scylladb.com>
Committer: Israel Fruchter <israel....@gmail.com>
Branch: branch-2024.1
Reply all
Reply to author
Forward
0 new messages