(trimmed off the batman/bpf Ccs)
FWIW here's a nicer reproducer that more clearly shows what's really
going on:
#define _GNU_SOURCE
#include <sys/mman.h>
#include <linux/perf_event.h>
#include <linux/hw_breakpoint.h>
#include <fcntl.h>
#include <stdio.h>
#include <stdint.h>
#include <string.h>
#include <syscall.h>
#include <unistd.h>
// for compat with older perf headers
#define uprobe_path config1
int main(int argc, char *argv[])
{
// Find out what type id we need for uprobes
int perf_type_pmu_uprobe;
{
FILE *fp =
fopen("/sys/bus/event_source/devices/uprobe/type", "r");
fscanf(fp, "%d", &perf_type_pmu_uprobe);
fclose(fp);
}
const char *filename = "./bus";
int fd = open(filename, O_RDWR|O_CREAT, 0600);
write(fd, "x", 1);
void *addr = mmap(NULL, 4096,
PROT_READ | PROT_WRITE | PROT_EXEC,
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
// Register a perf uprobe on "./bus"
struct perf_event_attr attr = {};
attr.type = perf_type_pmu_uprobe;
attr.uprobe_path = (unsigned long) filename;
syscall(__NR_perf_event_open, &attr, 0, 0, -1, 0);
void *addr2 = mmap(NULL, 2 * 4096,
PROT_NONE,
MAP_PRIVATE, fd, 0);
void *addr3 = mremap((void *) addr2, 4096, 2 * 4096,
MREMAP_MAYMOVE);
mremap(addr3, 4096, 4096, MREMAP_MAYMOVE | MREMAP_FIXED, (void
*) addr2);
return 0;
}
this instantly reproduces this output on current mainline for me:
BUG: Bad rss-counter state mm:(____ptrval____) type:MM_ANONPAGES val:1
AFAICT the worst thing about this bug is that it shows up on anything
that parses logs for "BUG"; it doesn't seem to have any ill effects
other than messing up the rss counters. Although maybe it points to some
underlying problem in uprobes/mm interaction.
If I enable the "rss_stat" tracepoint and set ftrace_dump_on_oops=1, I
see a trace roughly like this:
perf_event_open()
mmap(2 * 4096):
- uprobe_mmap()
- install_breakpoint()
- __replace_page()
- rss_stat: mm_id=0 curr=1 member=1 size=53248B
mremap(4096 => 2 * 4096):
- install_breakpoint()
- __replace_page()
- rss_stat: mm_id=0 curr=1 member=1 size=57344B
- unmap_page_range()
- rss_stat: mm_id=0 curr=1 member=1 size=53248B
mremap(4096 => 4096):
- move_vma()
- copy_vma()
- vma_merge()
- install_breakpoint()
- __replace_page()
- rss_stat: mm_id=0 curr=1 member=1 size=57344B
- do_munmap()
- install_breakpoint():
- __replace_page()
- rss_stat: mm_id=0 curr=1 member=1 size=61440B
- unmap_page_range():
- rss_stat: mm_id=0 curr=1 member=1 size=57344B
exit()
- exit_mmap()
- unmap_page_range():
- rss_stat: mm_id=0 curr=0 member=1 size=45056B
- unmap_page_range():
- rss_stat: mm_id=0 curr=0 member=1 size=32768B
- unmap_page_range():
- rss_stat: mm_id=0 curr=0 member=1 size=20480B
- unmap_page_range():
- rss_stat: mm_id=0 curr=0 member=1 size=16384B
- unmap_page_range():
- rss_stat: mm_id=0 curr=0 member=1 size=4096B
What strikes me here is that at the end of the first mremap(), we have
size 53248B (13 pages), but at the end of the second mremap(), we have
size 57344B (14 pages), even though the second mremap() is only moving 1
page. So the second mremap() is bumping it up twice, but then only
bumping down once.
Vegard