Dear Linux maintainers and reviewers:
Linux version: ccd1cdca5cd433c8a5dff78b69a79b31d9b77ee1
The bisection log shows the first introduced commit is 169aaaf2e0be615ffd4a12adc02db5eb86e8eee1
commit 169aaaf2e0be615ffd4a12adc02db5eb86e8eee1 (HEAD) Author: Qu Wenruo <w...@suse.com> Date: Fri Jun 14 13:52:30 2024 +0930 btrfs: introduce new "rescue=ignoremetacsums" mount option Introduce "rescue=ignoremetacsums" to ignore metadata csums, all the other metadata sanity checks are still kept as is. This new mount option is mostly to allow the kernel to mount an interrupted checksum conversion (at the metadata csum overwrite stage). And since the main part of metadata sanity checks is inside tree-checker, we shouldn't lose much safety, and the new mount option is rescue mount option it requires full read-only mount. Reviewed-by: Josef Bacik <jo...@toxicpanda.com> Signed-off-by: Qu Wenruo <w...@suse.com> Reviewed-by: David Sterba <dst...@suse.com> Signed-off-by: David Sterba <dst...@suse.com> diff --git a/fs/btrfs/bio.c b/fs/btrfs/bio.c index f59b00be26f3..f04d93109960 100644 --- a/fs/btrfs/bio.c +++ b/fs/btrfs/bio.c @@ -732,7 +732,7 @@ static bool btrfs_submit_chunk(struct btrfs_bio *bbio, int mirror_num) * point, so they are handled as part of the no-checksum case. */ if (inode && !(inode->flags & BTRFS_INODE_NODATASUM) && - !test_bit(BTRFS_FS_STATE_NO_CSUMS, &fs_info->fs_state) && + !test_bit(BTRFS_FS_STATE_NO_DATA_CSUMS, &fs_info->fs_state) && !btrfs_is_data_reloc_root(inode->root)) { if (should_async_write(bbio) && btrfs_wq_submit_bio(bbio, bioc, &smap, mirror_num)) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 686eec119eb4..2cfb7ab24aa9 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -367,6 +367,7 @@ int btrfs_validate_extent_buffer(struct extent_buffer *eb, u8 result[BTRFS_CSUM_SIZE]; const u8 *header_csum; int ret = 0; + const bool ignore_csum = btrfs_test_opt(fs_info, IGNOREMETACSUMS); ASSERT(check); @@ -399,13 +400,16 @@ int btrfs_validate_extent_buffer(struct extent_buffer *eb, if (memcmp(result, header_csum, csum_size) != 0) { btrfs_warn_rl(fs_info, -"checksum verify failed on logical %llu mirror %u wanted " CSUM_FMT " found " CSUM_FMT " level %d", +"checksum verify failed on logical %llu mirror %u wanted " CSUM_FMT " found " CSUM_FMT " level %d%s", eb->start, eb->read_mirror, CSUM_FMT_VALUE(csum_size, header_csum), CSUM_FMT_VALUE(csum_size, result), - btrfs_header_level(eb)); - ret = -EUCLEAN; - goto out; + btrfs_header_level(eb), + ignore_csum ? ", ignored" : ""); + if (!ignore_csum) { + ret = -EUCLEAN; + goto out; + } } if (found_level != check->level) { @@ -2131,7 +2135,7 @@ static int load_global_roots_objectid(struct btrfs_root *tree_root, /* If we have IGNOREDATACSUMS skip loading these roots. */ if (objectid == BTRFS_CSUM_TREE_OBJECTID && btrfs_test_opt(fs_info, IGNOREDATACSUMS)) { - set_bit(BTRFS_FS_STATE_NO_CSUMS, &fs_info->fs_state); + set_bit(BTRFS_FS_STATE_NO_DATA_CSUMS, &fs_info->fs_state); return 0; } @@ -2184,7 +2188,7 @@ static int load_global_roots_objectid(struct btrfs_root *tree_root, if (!found || ret) { if (objectid == BTRFS_CSUM_TREE_OBJECTID) - set_bit(BTRFS_FS_STATE_NO_CSUMS, &fs_info->fs_state); + set_bit(BTRFS_FS_STATE_NO_DATA_CSUMS, &fs_info->fs_state); if (!btrfs_test_opt(fs_info, IGNOREBADROOTS)) ret = ret ? ret : -ENOENT; @@ -2865,6 +2869,8 @@ static int init_mount_fs_info(struct btrfs_fs_info *fs_info, struct super_block if (sb_rdonly(sb)) set_bit(BTRFS_FS_STATE_RO, &fs_info->fs_state); + if (btrfs_test_opt(fs_info, IGNOREMETACSUMS)) + set_bit(BTRFS_FS_STATE_SKIP_META_CSUMS, &fs_info->fs_state); return btrfs_alloc_stripe_hash_table(fs_info); } diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index e815fefaffe1..5c342fe1af61 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -353,7 +353,7 @@ blk_status_t btrfs_lookup_bio_sums(struct btrfs_bio *bbio) u32 bio_offset = 0; if ((inode->flags & BTRFS_INODE_NODATASUM) || - test_bit(BTRFS_FS_STATE_NO_CSUMS, &fs_info->fs_state)) + test_bit(BTRFS_FS_STATE_NO_DATA_CSUMS, &fs_info->fs_state)) return BLK_STS_OK; /* diff --git a/fs/btrfs/fs.h b/fs/btrfs/fs.h index 18e0d3539496..d5d473aafe98 100644 --- a/fs/btrfs/fs.h +++ b/fs/btrfs/fs.h @@ -98,7 +98,9 @@ enum { /* The btrfs_fs_info created for self-tests */ BTRFS_FS_STATE_DUMMY_FS_INFO, - BTRFS_FS_STATE_NO_CSUMS, + /* Checksum errors are ignored. */ + BTRFS_FS_STATE_NO_DATA_CSUMS, + BTRFS_FS_STATE_SKIP_META_CSUMS, /* Indicates there was an error cleaning up a log tree. */ BTRFS_FS_STATE_LOG_CLEANUP_ERROR, @@ -224,6 +226,7 @@ enum { BTRFS_MOUNT_IGNOREDATACSUMS = (1UL << 28), BTRFS_MOUNT_NODISCARD = (1UL << 29), BTRFS_MOUNT_NOSPACECACHE = (1UL << 30), + BTRFS_MOUNT_IGNOREMETACSUMS = (1UL << 31), }; /* diff --git a/fs/btrfs/messages.c b/fs/btrfs/messages.c index 210d9c82e2ae..77752eec125d 100644 --- a/fs/btrfs/messages.c +++ b/fs/btrfs/messages.c @@ -20,7 +20,8 @@ static const char fs_state_chars[] = { [BTRFS_FS_STATE_TRANS_ABORTED] = 'A', [BTRFS_FS_STATE_DEV_REPLACING] = 'R', [BTRFS_FS_STATE_DUMMY_FS_INFO] = 0, - [BTRFS_FS_STATE_NO_CSUMS] = 'C', + [BTRFS_FS_STATE_NO_DATA_CSUMS] = 'C', + [BTRFS_FS_STATE_SKIP_META_CSUMS] = 'S', [BTRFS_FS_STATE_LOG_CLEANUP_ERROR] = 'L', }; diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 12cc1805af39..65d2abdc9975 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -176,6 +176,7 @@ enum { Opt_rescue_nologreplay, Opt_rescue_ignorebadroots, Opt_rescue_ignoredatacsums, + Opt_rescue_ignoremetacsums, Opt_rescue_parameter_all, }; @@ -185,7 +186,9 @@ static const struct constant_table btrfs_parameter_rescue[] = { { "ignorebadroots", Opt_rescue_ignorebadroots }, { "ibadroots", Opt_rescue_ignorebadroots }, { "ignoredatacsums", Opt_rescue_ignoredatacsums }, + { "ignoremetacsums", Opt_rescue_ignoremetacsums}, { "idatacsums", Opt_rescue_ignoredatacsums }, + { "imetacsums", Opt_rescue_ignoremetacsums}, { "all", Opt_rescue_parameter_all }, {} }; @@ -571,8 +574,12 @@ static int btrfs_parse_param(struct fs_context *fc, struct fs_parameter *param) case Opt_rescue_ignoredatacsums: btrfs_set_opt(ctx->mount_opt, IGNOREDATACSUMS); break; + case Opt_rescue_ignoremetacsums: + btrfs_set_opt(ctx->mount_opt, IGNOREMETACSUMS); + break; case Opt_rescue_parameter_all: btrfs_set_opt(ctx->mount_opt, IGNOREDATACSUMS); + btrfs_set_opt(ctx->mount_opt, IGNOREMETACSUMS); btrfs_set_opt(ctx->mount_opt, IGNOREBADROOTS); btrfs_set_opt(ctx->mount_opt, NOLOGREPLAY); break; @@ -647,7 +654,8 @@ bool btrfs_check_options(const struct btrfs_fs_info *info, unsigned long *mount_ if (!(flags & SB_RDONLY) && (check_ro_option(info, *mount_opt, BTRFS_MOUNT_NOLOGREPLAY, "nologreplay") || check_ro_option(info, *mount_opt, BTRFS_MOUNT_IGNOREBADROOTS, "ignorebadroots") || - check_ro_option(info, *mount_opt, BTRFS_MOUNT_IGNOREDATACSUMS, "ignoredatacsums"))) + check_ro_option(info, *mount_opt, BTRFS_MOUNT_IGNOREDATACSUMS, "ignoredatacsums") || + check_ro_option(info, *mount_opt, BTRFS_MOUNT_IGNOREMETACSUMS, "ignoremetacsums"))) ret = false; if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE) && @@ -1063,6 +1071,8 @@ static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry) print_rescue_option(seq, "ignorebadroots", &printed); if (btrfs_test_opt(info, IGNOREDATACSUMS)) print_rescue_option(seq, "ignoredatacsums", &printed); + if (btrfs_test_opt(info, IGNOREMETACSUMS)) + print_rescue_option(seq, "ignoremetacsums", &printed); if (btrfs_test_opt(info, FLUSHONCOMMIT)) seq_puts(seq, ",flushoncommit"); if (btrfs_test_opt(info, DISCARD_SYNC)) @@ -1420,6 +1430,7 @@ static void btrfs_emit_options(struct btrfs_fs_info *info, btrfs_info_if_set(info, old, USEBACKUPROOT, "trying to use backup root at mount time"); btrfs_info_if_set(info, old, IGNOREBADROOTS, "ignoring bad roots"); btrfs_info_if_set(info, old, IGNOREDATACSUMS, "ignoring data csums"); + btrfs_info_if_set(info, old, IGNOREMETACSUMS, "ignoring meta csums"); btrfs_info_if_unset(info, old, NODATACOW, "setting datacow"); btrfs_info_if_unset(info, old, SSD, "not using ssd optimizations"); diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c index c58cea0da597..0e0e8eb84ca2 100644 --- a/fs/btrfs/sysfs.c +++ b/fs/btrfs/sysfs.c @@ -385,6 +385,7 @@ static const char *rescue_opts[] = { "nologreplay", "ignorebadroots", "ignoredatacsums", + "ignoremetacsums", "all", }; diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c index 58e724c80a06..df7733044f7e 100644 --- a/fs/btrfs/zoned.c +++ b/fs/btrfs/zoned.c @@ -1844,7 +1844,7 @@ void btrfs_finish_ordered_zoned(struct btrfs_ordered_extent *ordered) * here so that we don't attempt to log the csums later. */ if ((inode->flags & BTRFS_INODE_NODATASUM) || - test_bit(BTRFS_FS_STATE_NO_CSUMS, &fs_info->fs_state)) { + test_bit(BTRFS_FS_STATE_NO_DATA_CSUMS, &fs_info->fs_state)) { while ((sum = list_first_entry_or_null(&ordered->list, typeof(*sum), list))) { list_del(&sum->list);
The test case, kernel config and full bisection log are attached.
The report is (The full report is attached):
BTRFS warning (device loop1 state CS): transaction 9 (with 12288 dirty metadata bytes) is not committed assertion failed: !found :: 0, in fs/btrfs/disk-io.c:4188 ------------[ cut here ]------------ kernel BUG at fs/btrfs/disk-io.c:4188! Oops: invalid opcode: 0000 [#1] SMP KASAN NOPTI CPU: 1 UID: 0 PID: 35067 Comm: syz-executor Tainted: G S 6.17.0-rc2-gcf6fc5eefc5b #3 PREEMPT(full) Tainted: [S]=CPU_OUT_OF_SPEC Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.13.0-1ubuntu1.1 04/01/2014 RIP: 0010:warn_about_uncommitted_trans fs/btrfs/disk-io.c:4188 [inline] RIP: 0010:close_ctree+0x1442/0x1450 fs/btrfs/disk-io.c:4411 Code: 31 ec 0a e8 40 93 99 00 48 c7 c7 a0 73 6c 8c 48 c7 c6 20 bf 6c 8c 31 d2 48 c7 c1 20 74 6c 8c 41 b8 5c 10 00 00 e8 6e 7c ff ff <0f> 0b cc cc cc cc cc cc cc cc cc cc cc cc 90 90 90 90 90 90 90 90 RSP: 0018:ffa0000002c4fb40 EFLAGS: 00010246 RAX: 0000000000000039 RBX: ff1100012545cd38 RCX: 450f9e6765fd5300 RDX: 0000000000000000 RSI: 0000000080000000 RDI: 0000000000000000 RBP: ffa0000002c4fce0 R08: ff110004c8724253 R09: 1fe22000990e484a R10: dffffc0000000000 R11: ffe21c00990e484b R12: ffa0000002c4fbe0 R13: 1ff4000000589f80 R14: ff1100012545cd40 R15: ff11000164ea3028 FS: 0000000036427a00(0000) GS:ff11000535410000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 000055dd3f353020 CR3: 000000015c5a6006 CR4: 0000000000773ef0 PKRU: 80000000 Call Trace: <TASK> generic_shutdown_super+0x135/0x2c0 fs/super.c:643 kill_anon_super+0x3b/0x70 fs/super.c:1282 btrfs_kill_super+0x41/0x50 fs/btrfs/super.c:2117 deactivate_locked_super+0xc4/0x120 fs/super.c:474 cleanup_mnt+0x42e/0x4d0 fs/namespace.c:1375 task_work_run+0x25b/0x320 kernel/task_work.c:227 resume_user_mode_work include/linux/resume_user_mode.h:50 [inline] exit_to_user_mode_loop+0xec/0x110 kernel/entry/common.c:43 exit_to_user_mode_prepare include/linux/irq-entry-common.h:225 [inline] syscall_exit_to_user_mode_work include/linux/entry-common.h:175 [inline] syscall_exit_to_user_mode include/linux/entry-common.h:210 [inline] do_syscall_64+0x250/0x3b0 arch/x86/entry/syscall_64.c:100 entry_SYSCALL_64_after_hwframe+0x76/0x7e RIP: 0033:0x56a75b Code: ff ff ff f7 d8 64 89 01 48 83 c8 ff c3 66 90 f3 0f 1e fa 31 f6 e9 05 00 00 00 0f 1f 44 00 00 f3 0f 1e fa b8 a6 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 b0 ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007fff2aaf8d48 EFLAGS: 00000246 ORIG_RAX: 00000000000000a6 RAX: 0000000000000000 RBX: 0000000000000007 RCX: 000000000056a75b RDX: 00007fff2aaf8df9 RSI: 0000000000000009 RDI: 00007fff2aaf8df0 RBP: 00000000000927c0 R08: 00000000005e20b0 R09: 00007fff2aaf8be0 R10: 00000000fffffffb R11: 0000000000000246 R12: 00007fff2aaf9e70 R13: 000000003643ab60 R14: 0000000000000000 R15: 0000000000000032 </TASK> Modules linked in: ---[ end trace 0000000000000000 ]--- RIP: 0010:warn_about_uncommitted_trans fs/btrfs/disk-io.c:4188 [inline] RIP: 0010:close_ctree+0x1442/0x1450 fs/btrfs/disk-io.c:4411 Code: 31 ec 0a e8 40 93 99 00 48 c7 c7 a0 73 6c 8c 48 c7 c6 20 bf 6c 8c 31 d2 48 c7 c1 20 74 6c 8c 41 b8 5c 10 00 00 e8 6e 7c ff ff <0f> 0b cc cc cc cc cc cc cc cc cc cc cc cc 90 90 90 90 90 90 90 90 RSP: 0018:ffa0000002c4fb40 EFLAGS: 00010246 RAX: 0000000000000039 RBX: ff1100012545cd38 RCX: 450f9e6765fd5300 RDX: 0000000000000000 RSI: 0000000080000000 RDI: 0000000000000000 RBP: ffa0000002c4fce0 R08: ff110004c8724253 R09: 1fe22000990e484a R10: dffffc0000000000 R11: ffe21c00990e484b R12: ffa0000002c4fbe0 R13: 1ff4000000589f80 R14: ff1100012545cd40 R15: ff11000164ea3028 FS: 0000000036427a00(0000) GS:ff11000535410000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 000055dd3f353020 CR3: 000000015c5a6006 CR4: 0000000000773ef0 PKRU: 80000000