Google Groups no longer supports new Usenet posts or subscriptions. Historical content remains viewable.
Dismiss

[PATCH 1/2] mm: hugetlbfs: Add some VM_BUG_ON()s to catch non-hugetlbfs pages

1 view
Skip to first unread message

Dave Hansen

unread,
Nov 14, 2013, 6:40:02 PM11/14/13
to

From: Dave Hansen <dave....@linux.intel.com>

Dave Jiang reported that he was seeing oopses when running
NUMA systems and default_hugepagesz=1G. I traced the issue down
to migrate_page_copy() trying to use the same code for hugetlb
pages and transparent hugepages. It should not have been trying
to pass thp pages in there.

So, add some VM_BUG_ON()s for the next hapless VM developer that
tries the same thing.

Signed-off-by: Dave Hansen <dave....@linux.intel.com>
Reviewed-by: Naoya Horiguchi <n-hor...@ah.jp.nec.com>

---

linux.git-davehans/include/linux/hugetlb.h | 1 +
linux.git-davehans/mm/hugetlb.c | 1 +
2 files changed, 2 insertions(+)

diff -puN include/linux/hugetlb.h~bug-not-hugetlbfs-in-copy_huge_page include/linux/hugetlb.h
--- linux.git/include/linux/hugetlb.h~bug-not-hugetlbfs-in-copy_huge_page 2013-11-14 15:09:38.707180957 -0800
+++ linux.git-davehans/include/linux/hugetlb.h 2013-11-14 15:09:38.710181090 -0800
@@ -355,6 +355,7 @@ static inline pte_t arch_make_huge_pte(p

static inline struct hstate *page_hstate(struct page *page)
{
+ VM_BUG_ON(!PageHuge(page));
return size_to_hstate(PAGE_SIZE << compound_order(page));
}

diff -puN mm/hugetlb.c~bug-not-hugetlbfs-in-copy_huge_page mm/hugetlb.c
--- linux.git/mm/hugetlb.c~bug-not-hugetlbfs-in-copy_huge_page 2013-11-14 15:09:38.708181001 -0800
+++ linux.git-davehans/mm/hugetlb.c 2013-11-14 15:09:38.711181135 -0800
@@ -498,6 +498,7 @@ void copy_huge_page(struct page *dst, st
int i;
struct hstate *h = page_hstate(src);

+ VM_BUG_ON(!h);
if (unlikely(pages_per_huge_page(h) > MAX_ORDER_NR_PAGES)) {
copy_gigantic_page(dst, src);
return;
_
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majo...@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/

Dave Hansen

unread,
Nov 14, 2013, 6:40:02 PM11/14/13
to

Changes from v1:
* removed explicit might_sleep() in favor of the one that we
get from the cond_resched();

--

From: Dave Hansen <dave....@linux.intel.com>

Right now, the migration code in migrate_page_copy() uses
copy_huge_page() for hugetlbfs and thp pages:

if (PageHuge(page) || PageTransHuge(page))
copy_huge_page(newpage, page);

So, yay for code reuse. But:

void copy_huge_page(struct page *dst, struct page *src)
{
struct hstate *h = page_hstate(src);

and a non-hugetlbfs page has no page_hstate(). This
works 99% of the time because page_hstate() determines
the hstate from the page order alone. Since the page
order of a THP page matches the default hugetlbfs page
order, it works.

But, if you change the default huge page size on the
boot command-line (say default_hugepagesz=1G), then
we might not even *have* a 2MB hstate so page_hstate()
returns null and copy_huge_page() oopses pretty fast
since copy_huge_page() dereferences the hstate:

void copy_huge_page(struct page *dst, struct page *src)
{
struct hstate *h = page_hstate(src);
if (unlikely(pages_per_huge_page(h) > MAX_ORDER_NR_PAGES)) {
...

This patch creates a copy_high_order_page() which can
be used on THP pages.

I believe the bug was introduced in b32967ff101:
Author: Mel Gorman <mgo...@suse.de>
Date: Mon Nov 19 12:35:47 2012 +0000
mm: numa: Add THP migration for the NUMA working set scanning fault case.

Signed-off-by: Dave Hansen <dave....@linux.intel.com>
---

linux.git-davehans/include/linux/huge_mm.h | 16 ++++++++++++++++
linux.git-davehans/mm/huge_memory.c | 12 ++++++++++++
linux.git-davehans/mm/migrate.c | 6 ++++--
3 files changed, 32 insertions(+), 2 deletions(-)

diff -puN include/linux/huge_mm.h~copy-huge-separate-from-copy-transhuge include/linux/huge_mm.h
--- linux.git/include/linux/huge_mm.h~copy-huge-separate-from-copy-transhuge 2013-11-14 15:09:38.869188202 -0800
+++ linux.git-davehans/include/linux/huge_mm.h 2013-11-14 15:09:38.873188379 -0800
@@ -177,6 +177,10 @@ static inline struct page *compound_tran
extern int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned long addr, pmd_t pmd, pmd_t *pmdp);

+extern void copy_high_order_page(struct page *newpage,
+ struct page *oldpage,
+ int order);
+
#else /* CONFIG_TRANSPARENT_HUGEPAGE */
#define HPAGE_PMD_SHIFT ({ BUILD_BUG(); 0; })
#define HPAGE_PMD_MASK ({ BUILD_BUG(); 0; })
@@ -227,6 +231,18 @@ static inline int do_huge_pmd_numa_page(
return 0;
}

+/*
+ * The non-stub version of this code is probably usable
+ * generically but its only user is thp at the moment,
+ * so enforce that with a BUG()
+ */
+static inline void copy_high_order_page(struct page *newpage,
+ struct page *oldpage,
+ int order)
+{
+ BUG();
+}
+
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */

#endif /* _LINUX_HUGE_MM_H */
diff -puN mm/huge_memory.c~copy-huge-separate-from-copy-transhuge mm/huge_memory.c
--- linux.git/mm/huge_memory.c~copy-huge-separate-from-copy-transhuge 2013-11-14 15:09:38.870188245 -0800
+++ linux.git-davehans/mm/huge_memory.c 2013-11-14 15:09:38.874188424 -0800
@@ -2890,3 +2890,15 @@ void __vma_adjust_trans_huge(struct vm_a
split_huge_page_address(next->vm_mm, nstart);
}
}
+
+void copy_high_order_page(struct page *newpage,
+ struct page *oldpage,
+ int order)
+{
+ int i;
+
+ for (i = 0; i < (1<<order); i++) {
+ cond_resched();
+ copy_highpage(newpage + i, oldpage + i);
+ }
+}
diff -puN mm/migrate.c~copy-huge-separate-from-copy-transhuge mm/migrate.c
--- linux.git/mm/migrate.c~copy-huge-separate-from-copy-transhuge 2013-11-14 15:09:38.871188288 -0800
+++ linux.git-davehans/mm/migrate.c 2013-11-14 15:09:38.874188424 -0800
@@ -447,8 +447,10 @@ void migrate_page_copy(struct page *newp
{
int cpupid;

- if (PageHuge(page) || PageTransHuge(page))
- copy_huge_page(newpage, page);
+ if (PageHuge(page))
+ copy_huge_page(newpage, page);
+ else if(PageTransHuge(page))
+ copy_high_order_page(newpage, page, HPAGE_PMD_ORDER);
else
copy_highpage(newpage, page);

Dave Hansen

unread,
Nov 14, 2013, 6:40:03 PM11/14/13
to
There were only minor comments about this the last time around.
Any reason not not merge it?

Mel Gorman

unread,
Nov 15, 2013, 5:00:02 AM11/15/13
to
On Thu, Nov 14, 2013 at 03:33:58PM -0800, Dave Hansen wrote:
>
> From: Dave Hansen <dave....@linux.intel.com>
>
> Dave Jiang reported that he was seeing oopses when running
> NUMA systems and default_hugepagesz=1G. I traced the issue down
> to migrate_page_copy() trying to use the same code for hugetlb
> pages and transparent hugepages. It should not have been trying
> to pass thp pages in there.
>
> So, add some VM_BUG_ON()s for the next hapless VM developer that
> tries the same thing.
>
> Signed-off-by: Dave Hansen <dave....@linux.intel.com>
> Reviewed-by: Naoya Horiguchi <n-hor...@ah.jp.nec.com>
>

Acked-by: Mel Gorman <mgo...@suse.de>

--
Mel Gorman
SUSE Labs

Mel Gorman

unread,
Nov 15, 2013, 5:30:01 AM11/15/13
to
It is a mild pity that there are variants of this like copy_user_huge_page
for COW. They could be collapsed but the result API would not be pretty.

A rename of copy_huge_page to copy_hugetlbfs_page is justified to avoid
a repeat mistake. Alternatively, there seems to be little reason to add
hugetlbfs and thp specific apis when you could just do something like this
(untested)

diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 0b7656e..784313a 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -476,40 +476,6 @@ static int vma_has_reserves(struct vm_area_struct *vma, long chg)
return 0;
}

-static void copy_gigantic_page(struct page *dst, struct page *src)
-{
- int i;
- struct hstate *h = page_hstate(src);
- struct page *dst_base = dst;
- struct page *src_base = src;
-
- for (i = 0; i < pages_per_huge_page(h); ) {
- cond_resched();
- copy_highpage(dst, src);
-
- i++;
- dst = mem_map_next(dst, dst_base, i);
- src = mem_map_next(src, src_base, i);
- }
-}
-
-void copy_huge_page(struct page *dst, struct page *src)
-{
- int i;
- struct hstate *h = page_hstate(src);
-
- if (unlikely(pages_per_huge_page(h) > MAX_ORDER_NR_PAGES)) {
- copy_gigantic_page(dst, src);
- return;
- }
-
- might_sleep();
- for (i = 0; i < pages_per_huge_page(h); i++) {
- cond_resched();
- copy_highpage(dst + i, src + i);
- }
-}
-
static void enqueue_huge_page(struct hstate *h, struct page *page)
{
int nid = page_to_nid(page);
diff --git a/mm/migrate.c b/mm/migrate.c
index 9167b22..843b96d 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -440,6 +440,49 @@ int migrate_huge_page_move_mapping(struct address_space *mapping,
return MIGRATEPAGE_SUCCESS;
}

+static void copy_gigantic_page(struct page *dst, struct page *src,
+ int nr_pages)
+{
+ int i;
+ struct page *dst_base = dst;
+ struct page *src_base = src;
+
+ for (i = 0; i < nr_pages; ) {
+ cond_resched();
+ copy_highpage(dst, src);
+
+ i++;
+ dst = mem_map_next(dst, dst_base, i);
+ src = mem_map_next(src, src_base, i);
+ }
+}
+
+void copy_huge_page(struct page *dst, struct page *src)
+{
+ int i;
+ int nr_pages;
+
+ if (PageHuge(src)) {
+ /* hugetlbfs page */
+ struct hstate *h = page_hstate(src);
+ nr_pages = pages_per_huge_page(h);
+
+ if (unlikely(nr_pages > MAX_ORDER_NR_PAGES)) {
+ copy_gigantic_page(dst, src, nr_pages);
+ return;
+ }
+ } else {
+ /* thp page */
+ BUG_ON(!PageTransHuge(src));
+ nr_pages = HPAGE_PMD_NR;
+ }
+
+ for (i = 0; i < nr_pages; i++) {
+ cond_resched();
+ copy_highpage(dst + i, src + i);
+ }
+}
+
/*
* Copy the page to its new location
*/
0 new messages