Re: [PATCH 0/8] __vmalloc() and no-block support

1 view
Skip to first unread message

Marco Elver

unread,
Aug 7, 2025, 7:01:11 AM8/7/25
to Uladzislau Rezki (Sony), linu...@kvack.org, Andrew Morton, Vlastimil Babka, Michal Hocko, Baoquan He, LKML, Alexander Potapenko, kasa...@googlegroups.com
On Thu, Aug 07, 2025 at 09:58AM +0200, Uladzislau Rezki (Sony) wrote:
> Hello.
>
> This is a second series of making __vmalloc() to support GFP_ATOMIC and
> GFP_NOWAIT flags. It tends to improve the non-blocking behaviour.
>
> The first one can be found here:
>
> https://lore.kernel.org/all/20250704152537...@gmail.com/
>
> that was an RFC. Using this series for testing i have not found more
> places which can trigger: scheduling during atomic. Though there is
> one which requires attention. I will explain in [1].
>
> Please note, non-blocking gets improved in the __vmalloc() call only,
> i.e. vmalloc_huge() still contains in its paths many cond_resched()
> points and can not be used as non-blocking as of now.
>
> [1] The vmap_pages_range_noflush() contains the kmsan_vmap_pages_range_noflush()
> external implementation for KCSAN specifically which is hard coded to GFP_KERNEL.
> The kernel should be built with CONFIG_KCSAN option. To me it looks like not
> straight forward to run such kernel on my box, therefore i need more time to
> investigate what is wrong with CONFIG_KCSAN and my env.

KMSAN or KCSAN?

[+Cc KMSAN maintainers]

Uladzislau Rezki

unread,
Aug 8, 2025, 4:48:34 AM8/8/25
to Marco Elver, Uladzislau Rezki (Sony), linu...@kvack.org, Andrew Morton, Vlastimil Babka, Michal Hocko, Baoquan He, LKML, Alexander Potapenko, kasa...@googlegroups.com
Sorry for type, yes, that was about CONFIG_KMSAN.

--
Uladzislau Rezki

Uladzislau Rezki

unread,
Aug 23, 2025, 5:35:13 AM8/23/25
to Alexander Potapenko, Marco Elver, linu...@kvack.org, Andrew Morton, Vlastimil Babka, Michal Hocko, Baoquan He, LKML, Alexander Potapenko, kasa...@googlegroups.com
Hello, Alexander!

I am working on making vmalloc to support extra non-blocking flags.
Currently i see one more place that i need to address:

kmsan_vmap_pages_range_noflush() function which uses hard-coded GFP_KERNEL
flags for allocation of two arrays for its internal use only.

I have a question to you, can we just get rid of those two allocations?
It is the easiest way, if possible. Otherwise i can add "gfp_t gfp_mask"
extra parameter and pass there a corresponding gfp_mask flag. See below:

<snip>
diff --git a/include/linux/kmsan.h b/include/linux/kmsan.h
index 2b1432cc16d5..e4b34e7a3b11 100644
--- a/include/linux/kmsan.h
+++ b/include/linux/kmsan.h
@@ -133,6 +133,7 @@ void kmsan_kfree_large(const void *ptr);
* @prot: page protection flags used for vmap.
* @pages: array of pages.
* @page_shift: page_shift passed to vmap_range_noflush().
+ * @gfp_mask: gfp_mask to use internally.
*
* KMSAN maps shadow and origin pages of @pages into contiguous ranges in
* vmalloc metadata address range. Returns 0 on success, callers must check
@@ -142,7 +143,8 @@ int __must_check kmsan_vmap_pages_range_noflush(unsigned long start,
unsigned long end,
pgprot_t prot,
struct page **pages,
- unsigned int page_shift);
+ unsigned int page_shift,
+ gfp_t gfp_mask);

/**
* kmsan_vunmap_kernel_range_noflush() - Notify KMSAN about a vunmap.
@@ -348,7 +350,7 @@ static inline void kmsan_kfree_large(const void *ptr)

static inline int __must_check kmsan_vmap_pages_range_noflush(
unsigned long start, unsigned long end, pgprot_t prot,
- struct page **pages, unsigned int page_shift)
+ struct page **pages, unsigned int page_shift, gfp_t gfp_mask)
{
return 0;
}
diff --git a/mm/internal.h b/mm/internal.h
index 45b725c3dc03..6a13b8ee1e6c 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -1359,7 +1359,7 @@ size_t splice_folio_into_pipe(struct pipe_inode_info *pipe,
#ifdef CONFIG_MMU
void __init vmalloc_init(void);
int __must_check vmap_pages_range_noflush(unsigned long addr, unsigned long end,
- pgprot_t prot, struct page **pages, unsigned int page_shift);
+ pgprot_t prot, struct page **pages, unsigned int page_shift, gfp_t gfp_mask);
unsigned int get_vm_area_page_order(struct vm_struct *vm);
#else
static inline void vmalloc_init(void)
@@ -1368,7 +1368,7 @@ static inline void vmalloc_init(void)

static inline
int __must_check vmap_pages_range_noflush(unsigned long addr, unsigned long end,
- pgprot_t prot, struct page **pages, unsigned int page_shift)
+ pgprot_t prot, struct page **pages, unsigned int page_shift, gfp_t gfp_mask)
{
return -EINVAL;
}
diff --git a/mm/kmsan/init.c b/mm/kmsan/init.c
index b14ce3417e65..5b74d6dbf0b8 100644
--- a/mm/kmsan/init.c
+++ b/mm/kmsan/init.c
@@ -233,5 +233,6 @@ void __init kmsan_init_runtime(void)
kmsan_memblock_discard();
pr_info("Starting KernelMemorySanitizer\n");
pr_info("ATTENTION: KMSAN is a debugging tool! Do not use it on production machines!\n");
- kmsan_enabled = true;
+ /* kmsan_enabled = true; */
+ kmsan_enabled = false;
}
diff --git a/mm/kmsan/shadow.c b/mm/kmsan/shadow.c
index 54f3c3c962f0..3cd733663100 100644
--- a/mm/kmsan/shadow.c
+++ b/mm/kmsan/shadow.c
@@ -215,7 +215,7 @@ void kmsan_free_page(struct page *page, unsigned int order)

int kmsan_vmap_pages_range_noflush(unsigned long start, unsigned long end,
pgprot_t prot, struct page **pages,
- unsigned int page_shift)
+ unsigned int page_shift, gfp_t gfp_mask)
{
unsigned long shadow_start, origin_start, shadow_end, origin_end;
struct page **s_pages, **o_pages;
@@ -230,8 +230,8 @@ int kmsan_vmap_pages_range_noflush(unsigned long start, unsigned long end,
return 0;

nr = (end - start) / PAGE_SIZE;
- s_pages = kcalloc(nr, sizeof(*s_pages), GFP_KERNEL);
- o_pages = kcalloc(nr, sizeof(*o_pages), GFP_KERNEL);
+ s_pages = kcalloc(nr, sizeof(*s_pages), gfp_mask);
+ o_pages = kcalloc(nr, sizeof(*o_pages), gfp_mask);
if (!s_pages || !o_pages) {
err = -ENOMEM;
goto ret;
diff --git a/mm/percpu-vm.c b/mm/percpu-vm.c
index cd69caf6aa8d..4f5937090590 100644
--- a/mm/percpu-vm.c
+++ b/mm/percpu-vm.c
@@ -194,7 +194,7 @@ static int __pcpu_map_pages(unsigned long addr, struct page **pages,
int nr_pages)
{
return vmap_pages_range_noflush(addr, addr + (nr_pages << PAGE_SHIFT),
- PAGE_KERNEL, pages, PAGE_SHIFT);
+ PAGE_KERNEL, pages, PAGE_SHIFT, GFP_KERNEL);
}

/**
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index ee197f5b8cf0..9be01dcca690 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -671,16 +671,28 @@ int __vmap_pages_range_noflush(unsigned long addr, unsigned long end,
}

int vmap_pages_range_noflush(unsigned long addr, unsigned long end,
- pgprot_t prot, struct page **pages, unsigned int page_shift)
+ pgprot_t prot, struct page **pages, unsigned int page_shift,
+ gfp_t gfp_mask)
{
int ret = kmsan_vmap_pages_range_noflush(addr, end, prot, pages,
- page_shift);
+ page_shift, gfp_mask);

if (ret)
return ret;
return __vmap_pages_range_noflush(addr, end, prot, pages, page_shift);
}

+static int __vmap_pages_range(unsigned long addr, unsigned long end,
+ pgprot_t prot, struct page **pages, unsigned int page_shift,
+ gfp_t gfp_mask)
+{
+ int err;
+
+ err = vmap_pages_range_noflush(addr, end, prot, pages, page_shift, gfp_mask);
+ flush_cache_vmap(addr, end);
+ return err;
+}
+
/**
* vmap_pages_range - map pages to a kernel virtual address
* @addr: start of the VM area to map
@@ -696,11 +708,7 @@ int vmap_pages_range_noflush(unsigned long addr, unsigned long end,
int vmap_pages_range(unsigned long addr, unsigned long end,
pgprot_t prot, struct page **pages, unsigned int page_shift)
{
- int err;
-
- err = vmap_pages_range_noflush(addr, end, prot, pages, page_shift);
- flush_cache_vmap(addr, end);
- return err;
+ return __vmap_pages_range(addr, end, prot, pages, page_shift, GFP_KERNEL);
}

static int check_sparse_vm_area(struct vm_struct *area, unsigned long start,
@@ -3804,8 +3812,8 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
flags = memalloc_noio_save();

do {
- ret = vmap_pages_range(addr, addr + size, prot, area->pages,
- page_shift);
+ ret = __vmap_pages_range(addr, addr + size, prot, area->pages,
+ page_shift, gfp_mask);
if (nofail && (ret < 0))
schedule_timeout_uninterruptible(1);
} while (nofail && (ret < 0));
<snip>

Thanks!

--
Uladzislau Rezki
Reply all
Reply to author
Forward
0 new messages