[syzbot] [mm?] WARNING: refcount bug in __reset_page_owner

11 views
Skip to first unread message

syzbot

unread,
Mar 19, 2024, 5:41:26 AMMar 19
to ak...@linux-foundation.org, linux-...@vger.kernel.org, linu...@kvack.org, syzkall...@googlegroups.com
Hello,

syzbot found the following issue on:

HEAD commit: 741e9d668aa5 Merge tag 'scsi-misc' of git://git.kernel.org..
git tree: upstream
console output: https://syzkaller.appspot.com/x/log.txt?x=112d3bb9180000
kernel config: https://syzkaller.appspot.com/x/.config?x=1c6662240382da2
dashboard link: https://syzkaller.appspot.com/bug?extid=98c1a1753a0731df2dd4
compiler: gcc (Debian 12.2.0-14) 12.2.0, GNU ld (GNU Binutils for Debian) 2.40
syz repro: https://syzkaller.appspot.com/x/repro.syz?x=17d6ee31180000
C reproducer: https://syzkaller.appspot.com/x/repro.c?x=1338fc6e180000

Downloadable assets:
disk image (non-bootable): https://storage.googleapis.com/syzbot-assets/7bc7510fe41f/non_bootable_disk-741e9d66.raw.xz
vmlinux: https://storage.googleapis.com/syzbot-assets/1d3f3486333c/vmlinux-741e9d66.xz
kernel image: https://storage.googleapis.com/syzbot-assets/41a39292f22a/bzImage-741e9d66.xz

IMPORTANT: if you fix the issue, please add the following tag to the commit:
Reported-by: syzbot+98c1a1...@syzkaller.appspotmail.com

------------[ cut here ]------------
refcount_t: decrement hit 0; leaking memory.
WARNING: CPU: 0 PID: 42 at lib/refcount.c:31 refcount_warn_saturate+0x1ed/0x210 lib/refcount.c:31
Modules linked in:
CPU: 0 PID: 42 Comm: kcompactd0 Not tainted 6.8.0-syzkaller-11339-g741e9d668aa5 #0
Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.2-debian-1.16.2-1 04/01/2014
RIP: 0010:refcount_warn_saturate+0x1ed/0x210 lib/refcount.c:31
Code: 8b e8 17 19 d5 fc 90 0f 0b 90 90 e9 c3 fe ff ff e8 e8 7b 12 fd c6 05 a1 c1 0e 0b 01 90 48 c7 c7 80 75 6e 8b e8 f4 18 d5 fc 90 <0f> 0b 90 90 e9 a0 fe ff ff 48 89 ef e8 42 64 6d fd e9 44 fe ff ff
RSP: 0018:ffffc900009a7350 EFLAGS: 00010286
RAX: 0000000000000000 RBX: 0000000000000000 RCX: ffffffff814fc049
RDX: ffff8880186e2440 RSI: ffffffff814fc056 RDI: 0000000000000001
RBP: ffff8881064ad33c R08: 0000000000000001 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000000 R12: ffff8881064ad33c
R13: 0000000000000000 R14: 00000000026401ed R15: ffff888016a2c278
FS: 0000000000000000(0000) GS:ffff88806b200000(0000) knlGS:0000000000000000
CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 000055556f261ca8 CR3: 000000000d57a000 CR4: 0000000000350ef0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
Call Trace:
<TASK>
__refcount_dec include/linux/refcount.h:336 [inline]
refcount_dec include/linux/refcount.h:351 [inline]
dec_stack_record_count mm/page_owner.c:215 [inline]
__reset_page_owner+0x2ea/0x370 mm/page_owner.c:253
reset_page_owner include/linux/page_owner.h:25 [inline]
free_pages_prepare mm/page_alloc.c:1141 [inline]
free_unref_page_prepare+0x527/0xb10 mm/page_alloc.c:2347
free_unref_page+0x33/0x3c0 mm/page_alloc.c:2487
__folio_put_small mm/swap.c:119 [inline]
__folio_put+0x166/0x1f0 mm/swap.c:142
folio_put include/linux/mm.h:1506 [inline]
migrate_folio_done+0x27a/0x320 mm/migrate.c:1106
migrate_folio_move mm/migrate.c:1329 [inline]
migrate_pages_batch+0x18a4/0x2c60 mm/migrate.c:1778
migrate_pages_sync mm/migrate.c:1844 [inline]
migrate_pages+0x1b9f/0x2840 mm/migrate.c:1953
compact_zone+0x1f68/0x41c0 mm/compaction.c:2659
compact_node+0x1a2/0x2d0 mm/compaction.c:2923
kcompactd+0x73c/0xdb0 mm/compaction.c:3221
kthread+0x2c1/0x3a0 kernel/kthread.c:388
ret_from_fork+0x45/0x80 arch/x86/kernel/process.c:147
ret_from_fork_asm+0x1a/0x30 arch/x86/entry/entry_64.S:243
</TASK>


---
This report is generated by a bot. It may contain errors.
See https://goo.gl/tpsmEJ for more information about syzbot.
syzbot engineers can be reached at syzk...@googlegroups.com.

syzbot will keep track of this issue. See:
https://goo.gl/tpsmEJ#status for how to communicate with syzbot.

If the report is already addressed, let syzbot know by replying with:
#syz fix: exact-commit-title

If you want syzbot to run the reproducer, reply with:
#syz test: git://repo/address.git branch-or-commit-hash
If you attach or paste a git patch, syzbot will apply it before testing.

If you want to overwrite report's subsystems, reply with:
#syz set subsystems: new-subsystem
(See the list of subsystem names on the web dashboard)

If the report is a duplicate of another one, reply with:
#syz dup: exact-subject-of-another-report

If you want to undo deduplication, reply with:
#syz undup

Tetsuo Handa

unread,
Mar 20, 2024, 4:06:55 AMMar 20
to syzbot, syzkaller-bugs, LKML
#syz test: git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git master

diff --git a/mm/page_owner.c b/mm/page_owner.c
index e7139952ffd9..58fc7b451f75 100644
--- a/mm/page_owner.c
+++ b/mm/page_owner.c
@@ -27,6 +27,7 @@ struct page_owner {
gfp_t gfp_mask;
depot_stack_handle_t handle;
depot_stack_handle_t free_handle;
+ depot_stack_handle_t migrate_handle;
u64 ts_nsec;
u64 free_ts_nsec;
char comm[TASK_COMM_LEN];
@@ -183,9 +184,11 @@ static void add_stack_record_to_list(struct stack_record *stack_record,
spin_unlock_irqrestore(&stack_list_lock, flags);
}

-static void inc_stack_record_count(depot_stack_handle_t handle, gfp_t gfp_mask)
+static void inc_stack_record_count(depot_stack_handle_t handle, gfp_t gfp_mask,
+ int nr_base_pages)
{
struct stack_record *stack_record = __stack_depot_get_stack_record(handle);
+ int old = REFCOUNT_SATURATED;

if (!stack_record)
return;
@@ -197,22 +200,21 @@ static void inc_stack_record_count(depot_stack_handle_t handle, gfp_t gfp_mask)
* Since we do not use STACK_DEPOT_FLAG_GET API, let us
* set a refcount of 1 ourselves.
*/
- if (refcount_read(&stack_record->count) == REFCOUNT_SATURATED) {
- int old = REFCOUNT_SATURATED;
-
- if (atomic_try_cmpxchg_relaxed(&stack_record->count.refs, &old, 1))
- /* Add the new stack_record to our list */
- add_stack_record_to_list(stack_record, gfp_mask);
- }
- refcount_inc(&stack_record->count);
+ if (atomic_try_cmpxchg_relaxed(&stack_record->count.refs, &old, 1))
+ add_stack_record_to_list(stack_record, gfp_mask);
+ refcount_add(nr_base_pages, &stack_record->count);
}

-static void dec_stack_record_count(depot_stack_handle_t handle)
+static void dec_stack_record_count(depot_stack_handle_t handle,
+ int nr_base_pages)
{
struct stack_record *stack_record = __stack_depot_get_stack_record(handle);

- if (stack_record)
- refcount_dec(&stack_record->count);
+ if (!stack_record)
+ return;
+
+ if (refcount_sub_and_test(nr_base_pages, &stack_record->count))
+ WARN(1, "%s refcount went to 0 for %u handle\n", __func__, handle);
}

void __reset_page_owner(struct page *page, unsigned short order)
@@ -229,7 +231,15 @@ void __reset_page_owner(struct page *page, unsigned short order)
return;

page_owner = get_page_owner(page_ext);
- alloc_handle = page_owner->handle;
+ /*
+ * If this page was allocated for migration purposes, its handle doesn't
+ * reference the stack it was allocated from, so make sure to use the
+ * migrate_handle in order to subtract it from the right stack.
+ */
+ if (!page_owner->migrate_handle)
+ alloc_handle = page_owner->handle;
+ else
+ alloc_handle = page_owner->migrate_handle;

handle = save_stack(GFP_NOWAIT | __GFP_NOWARN);
for (i = 0; i < (1 << order); i++) {
@@ -250,7 +260,7 @@ void __reset_page_owner(struct page *page, unsigned short order)
* the machinery is not ready yet, we cannot decrement
* their refcount either.
*/
- dec_stack_record_count(alloc_handle);
+ dec_stack_record_count(alloc_handle, 1 << order);
}

static inline void __set_page_owner_handle(struct page_ext *page_ext,
@@ -266,6 +276,7 @@ static inline void __set_page_owner_handle(struct page_ext *page_ext,
page_owner->handle = handle;
page_owner->order = order;
page_owner->gfp_mask = gfp_mask;
+ page_owner->migrate_handle = 0;
page_owner->last_migrate_reason = -1;
page_owner->pid = current->pid;
page_owner->tgid = current->tgid;
@@ -292,7 +303,7 @@ noinline void __set_page_owner(struct page *page, unsigned short order,
return;
__set_page_owner_handle(page_ext, handle, order, gfp_mask);
page_ext_put(page_ext);
- inc_stack_record_count(handle, gfp_mask);
+ inc_stack_record_count(handle, gfp_mask, 1 << order);
}

void __set_page_owner_migrate_reason(struct page *page, int reason)
@@ -347,6 +358,7 @@ void __folio_copy_owner(struct folio *newfolio, struct folio *old)
new_page_owner->gfp_mask = old_page_owner->gfp_mask;
new_page_owner->last_migrate_reason =
old_page_owner->last_migrate_reason;
+ new_page_owner->migrate_handle = new_page_owner->handle;
new_page_owner->handle = old_page_owner->handle;
new_page_owner->pid = old_page_owner->pid;
new_page_owner->tgid = old_page_owner->tgid;
@@ -848,11 +860,11 @@ static void *stack_next(struct seq_file *m, void *v, loff_t *ppos)
return stack;
}

-static unsigned long page_owner_stack_threshold;
+static unsigned long page_owner_pages_threshold;

static int stack_print(struct seq_file *m, void *v)
{
- int i, stack_count;
+ int i, nr_base_pages;
struct stack *stack = v;
unsigned long *entries;
unsigned long nr_entries;
@@ -863,14 +875,14 @@ static int stack_print(struct seq_file *m, void *v)

nr_entries = stack_record->size;
entries = stack_record->entries;
- stack_count = refcount_read(&stack_record->count) - 1;
+ nr_base_pages = refcount_read(&stack_record->count) - 1;

- if (stack_count < 1 || stack_count < page_owner_stack_threshold)
+ if (nr_base_pages < 1 || nr_base_pages < page_owner_pages_threshold)
return 0;

for (i = 0; i < nr_entries; i++)
seq_printf(m, " %pS\n", (void *)entries[i]);
- seq_printf(m, "stack_count: %d\n\n", stack_count);
+ seq_printf(m, "nr_base_pages: %d\n\n", nr_base_pages);

return 0;
}
@@ -900,13 +912,13 @@ static const struct file_operations page_owner_stack_operations = {

static int page_owner_threshold_get(void *data, u64 *val)
{
- *val = READ_ONCE(page_owner_stack_threshold);
+ *val = READ_ONCE(page_owner_pages_threshold);
return 0;
}

static int page_owner_threshold_set(void *data, u64 val)
{
- WRITE_ONCE(page_owner_stack_threshold, val);
+ WRITE_ONCE(page_owner_pages_threshold, val);
return 0;
}


syzbot

unread,
Mar 20, 2024, 4:31:05 AMMar 20
to linux-...@vger.kernel.org, penguin...@i-love.sakura.ne.jp, syzkall...@googlegroups.com
Hello,

syzbot has tested the proposed patch and the reproducer did not trigger any issue:

Reported-and-tested-by: syzbot+98c1a1...@syzkaller.appspotmail.com

Tested on:

commit: a4145ce1 Merge tag 'bcachefs-2024-03-19' of https://ev..
git tree: upstream
console output: https://syzkaller.appspot.com/x/log.txt?x=131b1d66180000
kernel config: https://syzkaller.appspot.com/x/.config?x=9f47e8dfa53b0b11
dashboard link: https://syzkaller.appspot.com/bug?extid=98c1a1753a0731df2dd4
compiler: gcc (Debian 12.2.0-14) 12.2.0, GNU ld (GNU Binutils for Debian) 2.40
patch: https://syzkaller.appspot.com/x/patch.diff?x=13972985180000

Note: testing is done by a robot and is best-effort only.

Tetsuo Handa

unread,
Mar 26, 2024, 6:17:27 AMMar 26
to syzbot, linux-...@vger.kernel.org, syzkall...@googlegroups.com
#syz fix: mm,page_owner: Fix refcount imbalance

Tetsuo Handa

unread,
Apr 20, 2024, 5:23:00 AM (7 days ago) Apr 20
to syzbot, linux-...@vger.kernel.org, syzkall...@googlegroups.com
#syz fix: mm,page_owner: fix refcount imbalance

Reply all
Reply to author
Forward
0 new messages