[PATCH v1] mm/memblock: Correct totalram_pages accounting with KMSAN

0 views
Skip to first unread message

Alexander Potapenko

unread,
Sep 17, 2025, 8:32:58 AM (7 days ago) Sep 17
to gli...@google.com, ak...@linux-foundation.org, da...@redhat.com, vba...@suse.cz, rp...@kernel.org, linu...@kvack.org, linux-...@vger.kernel.org, el...@google.com, dvy...@google.com, kasa...@googlegroups.com, Aleksandr Nogikh
When KMSAN is enabled, `kmsan_memblock_free_pages()` can hold back pages
for metadata instead of returning them to the early allocator. The callers,
however, would unconditionally increment `totalram_pages`, assuming the
pages were always freed. This resulted in an incorrect calculation of the
total available RAM, causing the kernel to believe it had more memory than
it actually did.

This patch refactors `memblock_free_pages()` to return the number of pages
it successfully frees. If KMSAN stashes the pages, the function now
returns 0; otherwise, it returns the number of pages in the block.

The callers in `memblock.c` have been updated to use this return value,
ensuring that `totalram_pages` is incremented only by the number of pages
actually returned to the allocator. This corrects the total RAM accounting
when KMSAN is active.

Cc: Aleksandr Nogikh <nog...@google.com>
Fixes: 3c2065098260 ("init: kmsan: call KMSAN initialization routines")
Signed-off-by: Alexander Potapenko <gli...@google.com>
---
mm/internal.h | 4 ++--
mm/memblock.c | 18 +++++++++---------
mm/mm_init.c | 9 +++++----
3 files changed, 16 insertions(+), 15 deletions(-)

diff --git a/mm/internal.h b/mm/internal.h
index 45b725c3dc030..ae1ee6e02eff9 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -742,8 +742,8 @@ static inline void clear_zone_contiguous(struct zone *zone)
extern int __isolate_free_page(struct page *page, unsigned int order);
extern void __putback_isolated_page(struct page *page, unsigned int order,
int mt);
-extern void memblock_free_pages(struct page *page, unsigned long pfn,
- unsigned int order);
+extern unsigned long memblock_free_pages(struct page *page, unsigned long pfn,
+ unsigned int order);
extern void __free_pages_core(struct page *page, unsigned int order,
enum meminit_context context);

diff --git a/mm/memblock.c b/mm/memblock.c
index 117d963e677c9..de7ff644d8f4f 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -1834,10 +1834,9 @@ void __init memblock_free_late(phys_addr_t base, phys_addr_t size)
cursor = PFN_UP(base);
end = PFN_DOWN(base + size);

- for (; cursor < end; cursor++) {
- memblock_free_pages(pfn_to_page(cursor), cursor, 0);
- totalram_pages_inc();
- }
+ for (; cursor < end; cursor++)
+ totalram_pages_add(
+ memblock_free_pages(pfn_to_page(cursor), cursor, 0));
}

/*
@@ -2259,9 +2258,11 @@ static void __init free_unused_memmap(void)
#endif
}

-static void __init __free_pages_memory(unsigned long start, unsigned long end)
+static unsigned long __init __free_pages_memory(unsigned long start,
+ unsigned long end)
{
int order;
+ unsigned long freed = 0;

while (start < end) {
/*
@@ -2279,10 +2280,11 @@ static void __init __free_pages_memory(unsigned long start, unsigned long end)
while (start + (1UL << order) > end)
order--;

- memblock_free_pages(pfn_to_page(start), start, order);
+ freed += memblock_free_pages(pfn_to_page(start), start, order);

start += (1UL << order);
}
+ return freed;
}

static unsigned long __init __free_memory_core(phys_addr_t start,
@@ -2297,9 +2299,7 @@ static unsigned long __init __free_memory_core(phys_addr_t start,
if (start_pfn >= end_pfn)
return 0;

- __free_pages_memory(start_pfn, end_pfn);
-
- return end_pfn - start_pfn;
+ return __free_pages_memory(start_pfn, end_pfn);
}

static void __init memmap_init_reserved_pages(void)
diff --git a/mm/mm_init.c b/mm/mm_init.c
index 5c21b3af216b2..9883612768511 100644
--- a/mm/mm_init.c
+++ b/mm/mm_init.c
@@ -2548,24 +2548,25 @@ void *__init alloc_large_system_hash(const char *tablename,
return table;
}

-void __init memblock_free_pages(struct page *page, unsigned long pfn,
- unsigned int order)
+unsigned long __init memblock_free_pages(struct page *page, unsigned long pfn,
+ unsigned int order)
{
if (IS_ENABLED(CONFIG_DEFERRED_STRUCT_PAGE_INIT)) {
int nid = early_pfn_to_nid(pfn);

if (!early_page_initialised(pfn, nid))
- return;
+ return 0;
}

if (!kmsan_memblock_free_pages(page, order)) {
/* KMSAN will take care of these pages. */
- return;
+ return 0;
}

/* pages were reserved and not allocated */
clear_page_tag_ref(page);
__free_pages_core(page, order, MEMINIT_EARLY);
+ return 1UL << order;
}

DEFINE_STATIC_KEY_MAYBE(CONFIG_INIT_ON_ALLOC_DEFAULT_ON, init_on_alloc);
--
2.51.0.384.g4c02a37b29-goog

David Hildenbrand

unread,
Sep 17, 2025, 9:29:57 AM (7 days ago) Sep 17
to Alexander Potapenko, ak...@linux-foundation.org, vba...@suse.cz, rp...@kernel.org, linu...@kvack.org, linux-...@vger.kernel.org, el...@google.com, dvy...@google.com, kasa...@googlegroups.com, Aleksandr Nogikh
That part is clear. But for readability we should probably just do

if (memblock_free_pages(pfn_to_page(cursor), cursor, 0))
totalram_pages_inc();

Or use a temp variable as an alternative.


LGTM

Reviewed-by: David Hildenbrand <da...@redhat.com>

--
Cheers

David / dhildenb

Mike Rapoport

unread,
Sep 17, 2025, 10:20:12 AM (7 days ago) Sep 17
to David Hildenbrand, Alexander Potapenko, ak...@linux-foundation.org, vba...@suse.cz, linu...@kvack.org, linux-...@vger.kernel.org, el...@google.com, dvy...@google.com, kasa...@googlegroups.com, Aleksandr Nogikh
I prefer this one and totalram_pages_add() after the loop

> LGTM
>
> Reviewed-by: David Hildenbrand <da...@redhat.com>
>
> --
> Cheers
>
> David / dhildenb
>

--
Sincerely yours,
Mike.

Mike Rapoport

unread,
Sep 17, 2025, 10:27:50 AM (7 days ago) Sep 17
to Alexander Potapenko, ak...@linux-foundation.org, da...@redhat.com, vba...@suse.cz, linu...@kvack.org, linux-...@vger.kernel.org, el...@google.com, dvy...@google.com, kasa...@googlegroups.com, Aleksandr Nogikh
No need for extern, the inconsistency is fine here.
Please either align this with 'struct' or drop spaces and keep only tabs.

> {
> if (IS_ENABLED(CONFIG_DEFERRED_STRUCT_PAGE_INIT)) {
> int nid = early_pfn_to_nid(pfn);
>
> if (!early_page_initialised(pfn, nid))
> - return;
> + return 0;
> }
>
> if (!kmsan_memblock_free_pages(page, order)) {
> /* KMSAN will take care of these pages. */
> - return;
> + return 0;
> }
>
> /* pages were reserved and not allocated */
> clear_page_tag_ref(page);
> __free_pages_core(page, order, MEMINIT_EARLY);
> + return 1UL << order;
> }
>
> DEFINE_STATIC_KEY_MAYBE(CONFIG_INIT_ON_ALLOC_DEFAULT_ON, init_on_alloc);

--
Sincerely yours,
Mike.

Alexander Potapenko

unread,
5:56 AM (7 hours ago) 5:56 AM
to gli...@google.com, ak...@linux-foundation.org, da...@redhat.com, vba...@suse.cz, rp...@kernel.org, linu...@kvack.org, linux-...@vger.kernel.org, el...@google.com, dvy...@google.com, kasa...@googlegroups.com, Aleksandr Nogikh
When KMSAN is enabled, `kmsan_memblock_free_pages()` can hold back pages
for metadata instead of returning them to the early allocator. The callers,
however, would unconditionally increment `totalram_pages`, assuming the
pages were always freed. This resulted in an incorrect calculation of the
total available RAM, causing the kernel to believe it had more memory than
it actually did.

This patch refactors `memblock_free_pages()` to return the number of pages
it successfully frees. If KMSAN stashes the pages, the function now
returns 0; otherwise, it returns the number of pages in the block.

The callers in `memblock.c` have been updated to use this return value,
ensuring that `totalram_pages` is incremented only by the number of pages
actually returned to the allocator. This corrects the total RAM accounting
when KMSAN is active.

Cc: Aleksandr Nogikh <nog...@google.com>
Fixes: 3c2065098260 ("init: kmsan: call KMSAN initialization routines")
Signed-off-by: Alexander Potapenko <gli...@google.com>
Reviewed-by: David Hildenbrand <da...@redhat.com>

--- │
v2: │
- Remove extern from the declaration of memblock_free_pages() in │
mm/internal.h as suggested by Mike Rapoport. │
- Fix formatting in the definition of memblock_free_pages() in │
mm/mm_init.c as suggested by Mike Rapoport. │
- Refactor memblock_free_late() to improve readability as suggested by │
David Hildenbrand. │
---
mm/internal.h | 4 ++--
mm/memblock.c | 21 +++++++++++----------
mm/mm_init.c | 9 +++++----
3 files changed, 18 insertions(+), 16 deletions(-)

diff --git a/mm/internal.h b/mm/internal.h
index 45b725c3dc030..ac841c53653eb 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -742,8 +742,8 @@ static inline void clear_zone_contiguous(struct zone *zone)
extern int __isolate_free_page(struct page *page, unsigned int order);
extern void __putback_isolated_page(struct page *page, unsigned int order,
int mt);
-extern void memblock_free_pages(struct page *page, unsigned long pfn,
- unsigned int order);
+unsigned long memblock_free_pages(struct page *page, unsigned long pfn,
+ unsigned int order);
extern void __free_pages_core(struct page *page, unsigned int order,
enum meminit_context context);

diff --git a/mm/memblock.c b/mm/memblock.c
index 117d963e677c9..9b23baee7dfe7 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -1826,6 +1826,7 @@ void *__init __memblock_alloc_or_panic(phys_addr_t size, phys_addr_t align,
void __init memblock_free_late(phys_addr_t base, phys_addr_t size)
{
phys_addr_t cursor, end;
+ unsigned long freed_pages = 0;

end = base + size - 1;
memblock_dbg("%s: [%pa-%pa] %pS\n",
@@ -1834,10 +1835,9 @@ void __init memblock_free_late(phys_addr_t base, phys_addr_t size)
cursor = PFN_UP(base);
end = PFN_DOWN(base + size);

- for (; cursor < end; cursor++) {
- memblock_free_pages(pfn_to_page(cursor), cursor, 0);
- totalram_pages_inc();
- }
+ for (; cursor < end; cursor++)
+ freed_pages += memblock_free_pages(pfn_to_page(cursor), cursor, 0);
+ totalram_pages_add(freed_pages);
}

/*
@@ -2259,9 +2259,11 @@ static void __init free_unused_memmap(void)
#endif
}

-static void __init __free_pages_memory(unsigned long start, unsigned long end)
+static unsigned long __init __free_pages_memory(unsigned long start,
+ unsigned long end)
{
int order;
+ unsigned long freed = 0;

while (start < end) {
/*
@@ -2279,14 +2281,15 @@ static void __init __free_pages_memory(unsigned long start, unsigned long end)
while (start + (1UL << order) > end)
order--;

- memblock_free_pages(pfn_to_page(start), start, order);
+ freed += memblock_free_pages(pfn_to_page(start), start, order);

start += (1UL << order);
}
+ return freed;
}

static unsigned long __init __free_memory_core(phys_addr_t start,
- phys_addr_t end)
+ phys_addr_t end)
{
unsigned long start_pfn = PFN_UP(start);
unsigned long end_pfn = PFN_DOWN(end);
@@ -2297,9 +2300,7 @@ static unsigned long __init __free_memory_core(phys_addr_t start,
if (start_pfn >= end_pfn)
return 0;

- __free_pages_memory(start_pfn, end_pfn);
-
- return end_pfn - start_pfn;
+ return __free_pages_memory(start_pfn, end_pfn);
}

static void __init memmap_init_reserved_pages(void)
diff --git a/mm/mm_init.c b/mm/mm_init.c
index 5c21b3af216b2..9883612768511 100644
--- a/mm/mm_init.c
+++ b/mm/mm_init.c
@@ -2548,24 +2548,25 @@ void *__init alloc_large_system_hash(const char *tablename,
return table;
}

-void __init memblock_free_pages(struct page *page, unsigned long pfn,
- unsigned int order)
+unsigned long __init memblock_free_pages(struct page *page, unsigned long pfn,
+ unsigned int order)
{
if (IS_ENABLED(CONFIG_DEFERRED_STRUCT_PAGE_INIT)) {
int nid = early_pfn_to_nid(pfn);

if (!early_page_initialised(pfn, nid))
- return;
+ return 0;
}

if (!kmsan_memblock_free_pages(page, order)) {
/* KMSAN will take care of these pages. */
- return;
+ return 0;
}

/* pages were reserved and not allocated */
clear_page_tag_ref(page);
__free_pages_core(page, order, MEMINIT_EARLY);
+ return 1UL << order;
}

DEFINE_STATIC_KEY_MAYBE(CONFIG_INIT_ON_ALLOC_DEFAULT_ON, init_on_alloc);
--
2.51.0.534.gc79095c0ca-goog

Alexander Potapenko

unread,
6:03 AM (7 hours ago) 6:03 AM
to gli...@google.com, ak...@linux-foundation.org, da...@redhat.com, vba...@suse.cz, rp...@kernel.org, linu...@kvack.org, linux-...@vger.kernel.org, el...@google.com, dvy...@google.com, kasa...@googlegroups.com, Aleksandr Nogikh

Markus Elfring

unread,
9:07 AM (4 hours ago) 9:07 AM
to Alexander Potapenko, linu...@kvack.org, kasa...@googlegroups.com, LKML, Aleksandr Nogikh, Andrew Morton, David Hildenbrand, Dmitry Vyukov, Marco Elver, Mike Rapoport, Vlastimil Babka

Markus Elfring

unread,
9:23 AM (3 hours ago) 9:23 AM
to Alexander Potapenko, linu...@kvack.org, kasa...@googlegroups.com, LKML, Aleksandr Nogikh, Andrew Morton, David Hildenbrand, Dmitry Vyukov, Marco Elver, Mike Rapoport, Vlastimil Babka

> +++ b/mm/mm_init.c
> @@ -2548,24 +2548,25 @@ void *__init alloc_large_system_hash(const char *tablename,

> +unsigned long __init memblock_free_pages(struct page *page, unsigned long pfn,
> + unsigned int order)
> {

> if (!kmsan_memblock_free_pages(page, order)) {
> /* KMSAN will take care of these pages. */
> - return;
> + return 0;
> }


How do you think about to omit curly brackets for this if statement?
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/Documentation/process/coding-style.rst?h=v6.17-rc7#n197

Regards,
Markus

Marco Elver

unread,
9:35 AM (3 hours ago) 9:35 AM
to Markus Elfring, Alexander Potapenko, linu...@kvack.org, kasa...@googlegroups.com, LKML, Aleksandr Nogikh, Andrew Morton, David Hildenbrand, Dmitry Vyukov, Mike Rapoport, Vlastimil Babka
No - with the /* .. */ comment there are 2 lines in this block.
Reply all
Reply to author
Forward
0 new messages