Google Groups no longer supports new Usenet posts or subscriptions. Historical content remains viewable.
Dismiss

[PATCH v1 2/8] mm, kasan: SLAB support

53 views
Skip to first unread message

Alexander Potapenko

unread,
Jan 27, 2016, 1:30:06 PM1/27/16
to
This patch adds KASAN hooks to SLAB allocator.

This patch is based on the "mm: kasan: unified support for SLUB and
SLAB allocators" patch originally prepared by Dmitry Chernenkov.

Signed-off-by: Alexander Potapenko <gli...@google.com>
---
Documentation/kasan.txt | 5 ++-
include/linux/kasan.h | 12 +++++++
include/linux/slab.h | 6 ++++
include/linux/slab_def.h | 14 ++++++++
include/linux/slub_def.h | 11 ++++++
lib/Kconfig.kasan | 4 ++-
mm/Makefile | 1 +
mm/kasan/kasan.c | 91 ++++++++++++++++++++++++++++++++++++++++++++++++
mm/kasan/kasan.h | 34 ++++++++++++++++++
mm/kasan/report.c | 59 +++++++++++++++++++++++++------
mm/slab.c | 46 +++++++++++++++++++++---
mm/slab_common.c | 2 +-
12 files changed, 264 insertions(+), 21 deletions(-)

diff --git a/Documentation/kasan.txt b/Documentation/kasan.txt
index aa1e0c9..7dd95b3 100644
--- a/Documentation/kasan.txt
+++ b/Documentation/kasan.txt
@@ -12,8 +12,7 @@ KASAN uses compile-time instrumentation for checking every memory access,
therefore you will need a GCC version 4.9.2 or later. GCC 5.0 or later is
required for detection of out-of-bounds accesses to stack or global variables.

-Currently KASAN is supported only for x86_64 architecture and requires the
-kernel to be built with the SLUB allocator.
+Currently KASAN is supported only for x86_64 architecture.

1. Usage
========
@@ -27,7 +26,7 @@ inline are compiler instrumentation types. The former produces smaller binary
the latter is 1.1 - 2 times faster. Inline instrumentation requires a GCC
version 5.0 or later.

-Currently KASAN works only with the SLUB memory allocator.
+KASAN works with both SLUB and SLAB memory allocators.
For better bug detection and nicer reporting, enable CONFIG_STACKTRACE.

To disable instrumentation for specific files or directories, add a line
diff --git a/include/linux/kasan.h b/include/linux/kasan.h
index 4b9f85c..4405a35 100644
--- a/include/linux/kasan.h
+++ b/include/linux/kasan.h
@@ -46,6 +46,9 @@ void kasan_unpoison_shadow(const void *address, size_t size);
void kasan_alloc_pages(struct page *page, unsigned int order);
void kasan_free_pages(struct page *page, unsigned int order);

+void kasan_cache_create(struct kmem_cache *cache, size_t *size,
+ unsigned long *flags);
+
void kasan_poison_slab(struct page *page);
void kasan_unpoison_object_data(struct kmem_cache *cache, void *object);
void kasan_poison_object_data(struct kmem_cache *cache, void *object);
@@ -59,6 +62,11 @@ void kasan_krealloc(const void *object, size_t new_size);
void kasan_slab_alloc(struct kmem_cache *s, void *object);
void kasan_slab_free(struct kmem_cache *s, void *object);

+struct kasan_cache {
+ int alloc_meta_offset;
+ int free_meta_offset;
+};
+
int kasan_module_alloc(void *addr, size_t size);
void kasan_free_shadow(const struct vm_struct *vm);

@@ -72,6 +80,10 @@ static inline void kasan_disable_current(void) {}
static inline void kasan_alloc_pages(struct page *page, unsigned int order) {}
static inline void kasan_free_pages(struct page *page, unsigned int order) {}

+static inline void kasan_cache_create(struct kmem_cache *cache,
+ size_t *size,
+ unsigned long *flags) {}
+
static inline void kasan_poison_slab(struct page *page) {}
static inline void kasan_unpoison_object_data(struct kmem_cache *cache,
void *object) {}
diff --git a/include/linux/slab.h b/include/linux/slab.h
index 3ffee74..92f8558 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -92,6 +92,12 @@
# define SLAB_ACCOUNT 0x00000000UL
#endif

+#ifdef CONFIG_KASAN
+#define SLAB_KASAN 0x08000000UL
+#else
+#define SLAB_KASAN 0x00000000UL
+#endif
+
/* The following flags affect the page allocator grouping pages by mobility */
#define SLAB_RECLAIM_ACCOUNT 0x00020000UL /* Objects are reclaimable */
#define SLAB_TEMPORARY SLAB_RECLAIM_ACCOUNT /* Objects are short-lived */
diff --git a/include/linux/slab_def.h b/include/linux/slab_def.h
index 33d0490..a25804d 100644
--- a/include/linux/slab_def.h
+++ b/include/linux/slab_def.h
@@ -72,8 +72,22 @@ struct kmem_cache {
#ifdef CONFIG_MEMCG_KMEM
struct memcg_cache_params memcg_params;
#endif
+#ifdef CONFIG_KASAN
+ struct kasan_cache kasan_info;
+#endif

struct kmem_cache_node *node[MAX_NUMNODES];
};

+static inline void *nearest_obj(struct kmem_cache *cache, struct page *page,
+ void *x) {
+ void *object = x - (x - page->s_mem) % cache->size;
+ void *last_object = page->s_mem + (cache->num - 1) * cache->size;
+
+ if (unlikely(object > last_object))
+ return last_object;
+ else
+ return object;
+}
+
#endif /* _LINUX_SLAB_DEF_H */
diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
index 3388511..c553dad 100644
--- a/include/linux/slub_def.h
+++ b/include/linux/slub_def.h
@@ -129,4 +129,15 @@ static inline void *virt_to_obj(struct kmem_cache *s,
void object_err(struct kmem_cache *s, struct page *page,
u8 *object, char *reason);

+static inline void *nearest_obj(struct kmem_cache *cache, struct page *page,
+ void *x) {
+ void *object = x - (x - page_address(page)) % cache->size;
+ void *last_object = page_address(page) +
+ (page->objects - 1) * cache->size;
+ if (unlikely(object > last_object))
+ return last_object;
+ else
+ return object;
+}
+
#endif /* _LINUX_SLUB_DEF_H */
diff --git a/lib/Kconfig.kasan b/lib/Kconfig.kasan
index 0fee5ac..0e4d2b3 100644
--- a/lib/Kconfig.kasan
+++ b/lib/Kconfig.kasan
@@ -5,7 +5,7 @@ if HAVE_ARCH_KASAN

config KASAN
bool "KASan: runtime memory debugger"
- depends on SLUB_DEBUG
+ depends on SLUB_DEBUG || (SLAB && !DEBUG_SLAB)
select CONSTRUCTORS
help
Enables kernel address sanitizer - runtime memory debugger,
@@ -16,6 +16,8 @@ config KASAN
This feature consumes about 1/8 of available memory and brings about
~x3 performance slowdown.
For better error detection enable CONFIG_STACKTRACE.
+ Currently CONFIG_KASAN doesn't work with CONFIG_DEBUG_SLAB
+ (the resulting kernel does not boot).

choice
prompt "Instrumentation type"
diff --git a/mm/Makefile b/mm/Makefile
index 2ed4319..d675b37 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -3,6 +3,7 @@
#

KASAN_SANITIZE_slab_common.o := n
+KASAN_SANITIZE_slab.o := n
KASAN_SANITIZE_slub.o := n

mmu-y := nommu.o
diff --git a/mm/kasan/kasan.c b/mm/kasan/kasan.c
index bc0a8d8..84305c2 100644
--- a/mm/kasan/kasan.c
+++ b/mm/kasan/kasan.c
@@ -314,6 +314,59 @@ void kasan_free_pages(struct page *page, unsigned int order)
KASAN_FREE_PAGE);
}

+#ifdef CONFIG_SLAB
+/*
+ * Adaptive redzone policy taken from the userspace AddressSanitizer runtime.
+ * For larger allocations larger redzones are used.
+ */
+static size_t optimal_redzone(size_t object_size)
+{
+ int rz =
+ object_size <= 64 - 16 ? 16 :
+ object_size <= 128 - 32 ? 32 :
+ object_size <= 512 - 64 ? 64 :
+ object_size <= 4096 - 128 ? 128 :
+ object_size <= (1 << 14) - 256 ? 256 :
+ object_size <= (1 << 15) - 512 ? 512 :
+ object_size <= (1 << 16) - 1024 ? 1024 : 2048;
+ return rz;
+}
+
+void kasan_cache_create(struct kmem_cache *cache, size_t *size,
+ unsigned long *flags)
+{
+ int redzone_adjust;
+ /* Make sure the adjusted size is still less than
+ * KMALLOC_MAX_CACHE_SIZE.
+ * TODO: this check is only useful for SLAB, but not SLUB. We'll need
+ * to skip it for SLUB when it starts using kasan_cache_create().
+ */
+ if (*size > KMALLOC_MAX_CACHE_SIZE -
+ sizeof(struct kasan_alloc_meta) -
+ sizeof(struct kasan_free_meta))
+ return;
+ *flags |= SLAB_KASAN;
+ /* Add alloc meta. */
+ cache->kasan_info.alloc_meta_offset = *size;
+ *size += sizeof(struct kasan_alloc_meta);
+
+ /* Add free meta. */
+ if (cache->flags & SLAB_DESTROY_BY_RCU || cache->ctor ||
+ cache->object_size < sizeof(struct kasan_free_meta)) {
+ cache->kasan_info.free_meta_offset = *size;
+ *size += sizeof(struct kasan_free_meta);
+ }
+ redzone_adjust = optimal_redzone(cache->object_size) -
+ (*size - cache->object_size);
+ if (redzone_adjust > 0)
+ *size += redzone_adjust;
+ *size = min(KMALLOC_MAX_CACHE_SIZE,
+ max(*size,
+ cache->object_size +
+ optimal_redzone(cache->object_size)));
+}
+#endif
+
void kasan_poison_slab(struct page *page)
{
kasan_poison_shadow(page_address(page),
@@ -331,8 +384,36 @@ void kasan_poison_object_data(struct kmem_cache *cache, void *object)
kasan_poison_shadow(object,
round_up(cache->object_size, KASAN_SHADOW_SCALE_SIZE),
KASAN_KMALLOC_REDZONE);
+#ifdef CONFIG_SLAB
+ if (cache->flags & SLAB_KASAN) {
+ struct kasan_alloc_meta *alloc_info =
+ get_alloc_info(cache, object);
+ alloc_info->state = KASAN_STATE_INIT;
+ }
+#endif
}

+static inline void set_track(struct kasan_track *track)
+{
+ track->cpu = raw_smp_processor_id();
+ track->pid = current->pid;
+ track->when = jiffies;
+}
+
+#ifdef CONFIG_SLAB
+struct kasan_alloc_meta *get_alloc_info(struct kmem_cache *cache,
+ const void *object)
+{
+ return (void *)object + cache->kasan_info.alloc_meta_offset;
+}
+
+struct kasan_free_meta *get_free_info(struct kmem_cache *cache,
+ const void *object)
+{
+ return (void *)object + cache->kasan_info.free_meta_offset;
+}
+#endif
+
void kasan_slab_alloc(struct kmem_cache *cache, void *object)
{
kasan_kmalloc(cache, object, cache->object_size);
@@ -366,6 +447,16 @@ void kasan_kmalloc(struct kmem_cache *cache, const void *object, size_t size)
kasan_unpoison_shadow(object, size);
kasan_poison_shadow((void *)redzone_start, redzone_end - redzone_start,
KASAN_KMALLOC_REDZONE);
+#ifdef CONFIG_SLAB
+ if (cache->flags & SLAB_KASAN) {
+ struct kasan_alloc_meta *alloc_info =
+ get_alloc_info(cache, object);
+
+ alloc_info->state = KASAN_STATE_ALLOC;
+ alloc_info->alloc_size = size;
+ set_track(&alloc_info->track);
+ }
+#endif
}
EXPORT_SYMBOL(kasan_kmalloc);

diff --git a/mm/kasan/kasan.h b/mm/kasan/kasan.h
index 4f6c62e..7b9e4ab9 100644
--- a/mm/kasan/kasan.h
+++ b/mm/kasan/kasan.h
@@ -54,6 +54,40 @@ struct kasan_global {
#endif
};

+/**
+ * Structures to keep alloc and free tracks *
+ */
+
+enum kasan_state {
+ KASAN_STATE_INIT,
+ KASAN_STATE_ALLOC,
+ KASAN_STATE_FREE
+};
+
+struct kasan_track {
+ u64 cpu : 6; /* for NR_CPUS = 64 */
+ u64 pid : 16; /* 65536 processes */
+ u64 when : 42; /* ~140 years */
+};
+
+struct kasan_alloc_meta {
+ u32 state : 2; /* enum kasan_state */
+ u32 alloc_size : 30;
+ struct kasan_track track;
+};
+
+struct kasan_free_meta {
+ /* Allocator freelist pointer, unused by KASAN. */
+ void **freelist;
+ struct kasan_track track;
+};
+
+struct kasan_alloc_meta *get_alloc_info(struct kmem_cache *cache,
+ const void *object);
+struct kasan_free_meta *get_free_info(struct kmem_cache *cache,
+ const void *object);
+
+
static inline const void *kasan_shadow_to_mem(const void *shadow_addr)
{
return (void *)(((unsigned long)shadow_addr - KASAN_SHADOW_OFFSET)
diff --git a/mm/kasan/report.c b/mm/kasan/report.c
index 12f222d..2bf7218 100644
--- a/mm/kasan/report.c
+++ b/mm/kasan/report.c
@@ -115,6 +115,42 @@ static inline bool init_task_stack_addr(const void *addr)
sizeof(init_thread_union.stack));
}

+static void print_track(struct kasan_track *track)
+{
+ pr_err("PID = %lu, CPU = %lu, timestamp = %lu\n", track->pid,
+ track->cpu, track->when);
+}
+
+static void print_object(struct kmem_cache *cache, void *object)
+{
+ struct kasan_alloc_meta *alloc_info = get_alloc_info(cache, object);
+ struct kasan_free_meta *free_info;
+
+ pr_err("Object at %p, in cache %s\n", object, cache->name);
+ if (!(cache->flags & SLAB_KASAN))
+ return;
+ switch (alloc_info->state) {
+ case KASAN_STATE_INIT:
+ pr_err("Object not allocated yet\n");
+ break;
+ case KASAN_STATE_ALLOC:
+ pr_err("Object allocated with size %u bytes.\n",
+ alloc_info->alloc_size);
+ pr_err("Allocation:\n");
+ print_track(&alloc_info->track);
+ break;
+ case KASAN_STATE_FREE:
+ pr_err("Object freed, allocated with size %u bytes\n",
+ alloc_info->alloc_size);
+ free_info = get_free_info(cache, object);
+ pr_err("Allocation:\n");
+ print_track(&alloc_info->track);
+ pr_err("Deallocation:\n");
+ print_track(&free_info->track);
+ break;
+ }
+}
+
static void print_address_description(struct kasan_access_info *info)
{
const void *addr = info->access_addr;
@@ -126,17 +162,14 @@ static void print_address_description(struct kasan_access_info *info)
if (PageSlab(page)) {
void *object;
struct kmem_cache *cache = page->slab_cache;
- void *last_object;
-
- object = virt_to_obj(cache, page_address(page), addr);
- last_object = page_address(page) +
- page->objects * cache->size;
-
- if (unlikely(object > last_object))
- object = last_object; /* we hit into padding */
-
+ object = nearest_obj(cache, page,
+ (void *)info->access_addr);
+#ifdef CONFIG_SLAB
+ print_object(cache, object);
+#else
object_err(cache, page, object,
- "kasan: bad access detected");
+ "kasan: bad access detected");
+#endif
return;
}
dump_page(page, "kasan: bad access detected");
@@ -146,8 +179,9 @@ static void print_address_description(struct kasan_access_info *info)
if (!init_task_stack_addr(addr))
pr_err("Address belongs to variable %pS\n", addr);
}
-
+#ifdef CONFIG_SLUB
dump_stack();
+#endif
}

static bool row_is_guilty(const void *row, const void *guilty)
@@ -233,6 +267,9 @@ static void kasan_report_error(struct kasan_access_info *info)
dump_stack();
} else {
print_error_description(info);
+#ifdef CONFIG_SLAB
+ dump_stack();
+#endif
print_address_description(info);
print_shadow_for_address(info->first_bad_addr);
}
diff --git a/mm/slab.c b/mm/slab.c
index 6ecc697..739b89d 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -2196,6 +2196,7 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags)
#endif
#endif

+ kasan_cache_create(cachep, &size, &flags);
/*
* Determine if the slab management is 'on' or 'off' slab.
* (bootstrapping cannot cope with offslab caches so don't do
@@ -2503,8 +2504,13 @@ static void cache_init_objs(struct kmem_cache *cachep,
* cache which they are a constructor for. Otherwise, deadlock.
* They must also be threaded.
*/
- if (cachep->ctor && !(cachep->flags & SLAB_POISON))
+ if (cachep->ctor && !(cachep->flags & SLAB_POISON)) {
+ kasan_unpoison_object_data(cachep,
+ objp + obj_offset(cachep));
cachep->ctor(objp + obj_offset(cachep));
+ kasan_poison_object_data(
+ cachep, objp + obj_offset(cachep));
+ }

if (cachep->flags & SLAB_RED_ZONE) {
if (*dbg_redzone2(cachep, objp) != RED_INACTIVE)
@@ -2519,8 +2525,11 @@ static void cache_init_objs(struct kmem_cache *cachep,
kernel_map_pages(virt_to_page(objp),
cachep->size / PAGE_SIZE, 0);
#else
- if (cachep->ctor)
+ if (cachep->ctor) {
+ kasan_unpoison_object_data(cachep, objp);
cachep->ctor(objp);
+ kasan_poison_object_data(cachep, objp);
+ }
#endif
set_obj_status(page, i, OBJECT_FREE);
set_free_obj(page, i, i);
@@ -2650,6 +2659,7 @@ static int cache_grow(struct kmem_cache *cachep,

slab_map_pages(cachep, page, freelist);

+ kasan_poison_slab(page);
cache_init_objs(cachep, page);

if (gfpflags_allow_blocking(local_flags))
@@ -3364,7 +3374,10 @@ free_done:
static inline void __cache_free(struct kmem_cache *cachep, void *objp,
unsigned long caller)
{
- struct array_cache *ac = cpu_cache_get(cachep);
+ struct array_cache *ac;
+
+ kasan_slab_free(cachep, objp);
+ ac = cpu_cache_get(cachep);

check_irq_off();
kmemleak_free_recursive(objp, cachep->flags);
@@ -3403,6 +3416,8 @@ static inline void __cache_free(struct kmem_cache *cachep, void *objp,
void *kmem_cache_alloc(struct kmem_cache *cachep, gfp_t flags)
{
void *ret = slab_alloc(cachep, flags, _RET_IP_);
+ if (ret)
+ kasan_slab_alloc(cachep, ret);

trace_kmem_cache_alloc(_RET_IP_, ret,
cachep->object_size, cachep->size, flags);
@@ -3432,6 +3447,8 @@ kmem_cache_alloc_trace(struct kmem_cache *cachep, gfp_t flags, size_t size)

ret = slab_alloc(cachep, flags, _RET_IP_);

+ if (ret)
+ kasan_kmalloc(cachep, ret, size);
trace_kmalloc(_RET_IP_, ret,
size, cachep->size, flags);
return ret;
@@ -3455,6 +3472,8 @@ void *kmem_cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid)
{
void *ret = slab_alloc_node(cachep, flags, nodeid, _RET_IP_);

+ if (ret)
+ kasan_slab_alloc(cachep, ret);
trace_kmem_cache_alloc_node(_RET_IP_, ret,
cachep->object_size, cachep->size,
flags, nodeid);
@@ -3473,6 +3492,8 @@ void *kmem_cache_alloc_node_trace(struct kmem_cache *cachep,

ret = slab_alloc_node(cachep, flags, nodeid, _RET_IP_);

+ if (ret)
+ kasan_kmalloc(cachep, ret, size);
trace_kmalloc_node(_RET_IP_, ret,
size, cachep->size,
flags, nodeid);
@@ -3485,11 +3506,16 @@ static __always_inline void *
__do_kmalloc_node(size_t size, gfp_t flags, int node, unsigned long caller)
{
struct kmem_cache *cachep;
+ void *ret;

cachep = kmalloc_slab(size, flags);
if (unlikely(ZERO_OR_NULL_PTR(cachep)))
return cachep;
- return kmem_cache_alloc_node_trace(cachep, flags, node, size);
+ ret = kmem_cache_alloc_node_trace(cachep, flags, node, size);
+ if (ret)
+ kasan_kmalloc(cachep, ret, size);
+
+ return ret;
}

void *__kmalloc_node(size_t size, gfp_t flags, int node)
@@ -3523,6 +3549,8 @@ static __always_inline void *__do_kmalloc(size_t size, gfp_t flags,
return cachep;
ret = slab_alloc(cachep, flags, caller);

+ if (ret)
+ kasan_kmalloc(cachep, ret, size);
trace_kmalloc(caller, ret,
size, cachep->size, flags);

@@ -4240,10 +4268,18 @@ module_init(slab_proc_init);
*/
size_t ksize(const void *objp)
{
+ size_t size;
+
BUG_ON(!objp);
if (unlikely(objp == ZERO_SIZE_PTR))
return 0;

- return virt_to_cache(objp)->object_size;
+ size = virt_to_cache(objp)->object_size;
+ /* We assume that ksize callers could use whole allocated area,
+ * so we need to unpoison this area.
+ */
+ kasan_krealloc(objp, size);
+
+ return size;
}
EXPORT_SYMBOL(ksize);
diff --git a/mm/slab_common.c b/mm/slab_common.c
index e016178..8d2531d 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -35,7 +35,7 @@ struct kmem_cache *kmem_cache;
*/
#define SLAB_NEVER_MERGE (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | \
SLAB_TRACE | SLAB_DESTROY_BY_RCU | SLAB_NOLEAKTRACE | \
- SLAB_FAILSLAB)
+ SLAB_FAILSLAB | SLAB_KASAN)

#define SLAB_MERGE_SAME (SLAB_RECLAIM_ACCOUNT | SLAB_CACHE_DMA | \
SLAB_NOTRACK | SLAB_ACCOUNT)
--
2.7.0.rc3.207.g0ac5344

Alexander Potapenko

unread,
Jan 27, 2016, 1:30:07 PM1/27/16
to
Signed-off-by: Alexander Potapenko <gli...@google.com>
---
lib/test_kasan.c | 2 ++
1 file changed, 2 insertions(+)

diff --git a/lib/test_kasan.c b/lib/test_kasan.c
index 66dd92f..5498a78 100644
--- a/lib/test_kasan.c
+++ b/lib/test_kasan.c
@@ -286,6 +286,8 @@ static noinline void __init kmalloc_uaf2(void)
}

ptr1[40] = 'x';
+ if (ptr1 == ptr2)
+ pr_err("Could not detect use-after-free: ptr1 == ptr2\n");
kfree(ptr2);
}

--
2.7.0.rc3.207.g0ac5344

Alexander Potapenko

unread,
Jan 27, 2016, 1:30:07 PM1/27/16
to
Quarantine isolates freed objects in a separate queue. The objects are
returned to the allocator later, which helps to detect use-after-free
errors.

Freed objects are first added to per-cpu quarantine queues.
When a cache is destroyed or memory shrinking is requested, the objects
are moved into the global quarantine queue. Whenever a kmalloc call
allows memory reclaiming, the oldest objects are popped out of the
global queue until the total size of objects in quarantine is less than
3/4 of the maximum quarantine size (which is a fraction of installed
physical memory).

Right now quarantine support is only enabled in SLAB allocator.
Unification of KASAN features in SLAB and SLUB will be done later.

This patch is based on the "mm: kasan: quarantine" patch originally
prepared by Dmitry Chernenkov.

Signed-off-by: Alexander Potapenko <gli...@google.com>
---
include/linux/kasan.h | 30 ++++--
lib/test_kasan.c | 29 ++++++
mm/kasan/Makefile | 2 +-
mm/kasan/kasan.c | 68 +++++++++++-
mm/kasan/kasan.h | 11 +-
mm/kasan/quarantine.c | 284 ++++++++++++++++++++++++++++++++++++++++++++++++++
mm/kasan/report.c | 3 +-
mm/mempool.c | 7 +-
mm/page_alloc.c | 2 +-
mm/slab.c | 12 ++-
mm/slab.h | 4 +
mm/slab_common.c | 2 +
mm/slub.c | 4 +-
13 files changed, 435 insertions(+), 23 deletions(-)

diff --git a/include/linux/kasan.h b/include/linux/kasan.h
index bf71ab0..355e722 100644
--- a/include/linux/kasan.h
+++ b/include/linux/kasan.h
@@ -44,24 +44,29 @@ static inline void kasan_disable_current(void)
void kasan_unpoison_shadow(const void *address, size_t size);

void kasan_alloc_pages(struct page *page, unsigned int order);
-void kasan_free_pages(struct page *page, unsigned int order);
+void kasan_poison_free_pages(struct page *page, unsigned int order);

void kasan_cache_create(struct kmem_cache *cache, size_t *size,
unsigned long *flags);
+void kasan_cache_shrink(struct kmem_cache *cache);
+void kasan_cache_destroy(struct kmem_cache *cache);

void kasan_poison_slab(struct page *page);
void kasan_unpoison_object_data(struct kmem_cache *cache, void *object);
void kasan_poison_object_data(struct kmem_cache *cache, void *object);

void kasan_kmalloc_large(const void *ptr, size_t size, gfp_t flags);
-void kasan_kfree_large(const void *ptr);
-void kasan_kfree(void *ptr);
+void kasan_poison_kfree_large(const void *ptr);
+void kasan_poison_kfree(void *ptr);
void kasan_kmalloc(struct kmem_cache *s, const void *object, size_t size,
gfp_t flags);
void kasan_krealloc(const void *object, size_t new_size, gfp_t flags);

void kasan_slab_alloc(struct kmem_cache *s, void *object, gfp_t flags);
-void kasan_slab_free(struct kmem_cache *s, void *object);
+/* kasan_slab_free() returns true if the object has been put into quarantine.
+ */
+bool kasan_slab_free(struct kmem_cache *s, void *object);
+void kasan_poison_slab_free(struct kmem_cache *s, void *object);

struct kasan_cache {
int alloc_meta_offset;
@@ -79,11 +84,14 @@ static inline void kasan_enable_current(void) {}
static inline void kasan_disable_current(void) {}

static inline void kasan_alloc_pages(struct page *page, unsigned int order) {}
-static inline void kasan_free_pages(struct page *page, unsigned int order) {}
+static inline void kasan_poison_free_pages(struct page *page,
+ unsigned int order) {}

static inline void kasan_cache_create(struct kmem_cache *cache,
size_t *size,
unsigned long *flags) {}
+static inline void kasan_cache_shrink(struct kmem_cache *cache) {}
+static inline void kasan_cache_destroy(struct kmem_cache *cache) {}

static inline void kasan_poison_slab(struct page *page) {}
static inline void kasan_unpoison_object_data(struct kmem_cache *cache,
@@ -92,8 +100,8 @@ static inline void kasan_poison_object_data(struct kmem_cache *cache,
void *object) {}

static inline void kasan_kmalloc_large(void *ptr, size_t size, gfp_t flags) {}
-static inline void kasan_kfree_large(const void *ptr) {}
-static inline void kasan_kfree(void *ptr) {}
+static inline void kasan_poison_kfree_large(const void *ptr) {}
+static inline void kasan_poison_kfree(void *ptr) {}
static inline void kasan_kmalloc(struct kmem_cache *s, const void *object,
size_t size, gfp_t flags) {}
static inline void kasan_krealloc(const void *object, size_t new_size,
@@ -101,7 +109,13 @@ static inline void kasan_krealloc(const void *object, size_t new_size,

static inline void kasan_slab_alloc(struct kmem_cache *s, void *object,
gfp_t flags) {}
-static inline void kasan_slab_free(struct kmem_cache *s, void *object) {}
+/* kasan_slab_free() returns true if the object has been put into quarantine.
+ */
+static inline bool kasan_slab_free(struct kmem_cache *s, void *object)
+{
+ return false;
+}
+static inline void kasan_poison_slab_free(struct kmem_cache *s, void *object) {}

static inline int kasan_module_alloc(void *addr, size_t size) { return 0; }
static inline void kasan_free_shadow(const struct vm_struct *vm) {}
diff --git a/lib/test_kasan.c b/lib/test_kasan.c
index 822c804..6eb6d42 100644
--- a/lib/test_kasan.c
+++ b/lib/test_kasan.c
@@ -349,6 +349,32 @@ static noinline void __init kasan_stack_oob(void)
*(volatile char *)p;
}

+#ifdef CONFIG_SLAB
+static noinline void __init kasan_quarantine_cache(void)
+{
+ struct kmem_cache *cache = kmem_cache_create(
+ "test", 137, 8, GFP_KERNEL, NULL);
+ int i;
+
+ for (i = 0; i < 100; i++) {
+ void *p = kmem_cache_alloc(cache, GFP_KERNEL);
+
+ kmem_cache_free(cache, p);
+ p = kmalloc(sizeof(u64), GFP_KERNEL);
+ kfree(p);
+ }
+ kmem_cache_shrink(cache);
+ for (i = 0; i < 100; i++) {
+ u64 *p = kmem_cache_alloc(cache, GFP_KERNEL);
+
+ kmem_cache_free(cache, p);
+ p = kmalloc(sizeof(u64), GFP_KERNEL);
+ kfree(p);
+ }
+ kmem_cache_destroy(cache);
+}
+#endif
+
static int __init kmalloc_tests_init(void)
{
kmalloc_oob_right();
@@ -372,6 +398,9 @@ static int __init kmalloc_tests_init(void)
kmem_cache_oob();
kasan_stack_oob();
kasan_global_oob();
+#ifdef CONFIG_SLAB
+ kasan_quarantine_cache();
+#endif
return -EAGAIN;
}

diff --git a/mm/kasan/Makefile b/mm/kasan/Makefile
index f952515..8e59350 100644
--- a/mm/kasan/Makefile
+++ b/mm/kasan/Makefile
@@ -7,5 +7,5 @@ CFLAGS_kasan.o := $(call cc-option, -fno-conserve-stack -fno-stack-protector)

obj-y := kasan.o report.o kasan_init.o
ifdef CONFIG_SLAB
- obj-y += stackdepot.o
+ obj-y += stackdepot.o quarantine.o
endif
diff --git a/mm/kasan/kasan.c b/mm/kasan/kasan.c
index b5d04ec..43c079a 100644
--- a/mm/kasan/kasan.c
+++ b/mm/kasan/kasan.c
@@ -307,7 +307,7 @@ void kasan_alloc_pages(struct page *page, unsigned int order)
kasan_unpoison_shadow(page_address(page), PAGE_SIZE << order);
}

-void kasan_free_pages(struct page *page, unsigned int order)
+void kasan_poison_free_pages(struct page *page, unsigned int order)
{
if (likely(!PageHighMem(page)))
kasan_poison_shadow(page_address(page),
@@ -368,6 +368,20 @@ void kasan_cache_create(struct kmem_cache *cache, size_t *size,
}
#endif

+void kasan_cache_shrink(struct kmem_cache *cache)
+{
+#ifdef CONFIG_SLAB
+ quarantine_remove_cache(cache);
+#endif
+}
+
+void kasan_cache_destroy(struct kmem_cache *cache)
+{
+#ifdef CONFIG_SLAB
+ quarantine_remove_cache(cache);
+#endif
+}
+
void kasan_poison_slab(struct page *page)
{
kasan_poison_shadow(page_address(page),
@@ -464,7 +478,7 @@ void kasan_slab_alloc(struct kmem_cache *cache, void *object, gfp_t flags)
kasan_kmalloc(cache, object, cache->object_size, flags);
}

-void kasan_slab_free(struct kmem_cache *cache, void *object)
+void kasan_poison_slab_free(struct kmem_cache *cache, void *object)
{
unsigned long size = cache->object_size;
unsigned long rounded_up_size = round_up(size, KASAN_SHADOW_SCALE_SIZE);
@@ -476,12 +490,53 @@ void kasan_slab_free(struct kmem_cache *cache, void *object)
kasan_poison_shadow(object, rounded_up_size, KASAN_KMALLOC_FREE);
}

+bool kasan_slab_free(struct kmem_cache *cache, void *object)
+{
+#ifdef CONFIG_SLAB
+ /* RCU slabs could be legally used after free within the RCU period */
+ if (unlikely(cache->flags & SLAB_DESTROY_BY_RCU))
+ return false;
+
+ if (likely(cache->flags & SLAB_KASAN)) {
+ struct kasan_alloc_meta *alloc_info =
+ get_alloc_info(cache, object);
+ struct kasan_free_meta *free_info =
+ get_free_info(cache, object);
+
+ switch (alloc_info->state) {
+ case KASAN_STATE_ALLOC:
+ alloc_info->state = KASAN_STATE_QUARANTINE;
+ quarantine_put(free_info, cache);
+ set_track(&free_info->track, GFP_NOWAIT);
+ kasan_poison_slab_free(cache, object);
+ return true;
+ case KASAN_STATE_QUARANTINE:
+ case KASAN_STATE_FREE:
+ pr_err("Double free");
+ dump_stack();
+ break;
+ default:
+ break;
+ }
+ }
+ return false;
+#else
+ kasan_poison_slab_free(cache, object);
+ return false;
+#endif
+}
+
void kasan_kmalloc(struct kmem_cache *cache, const void *object, size_t size,
gfp_t flags)
{
unsigned long redzone_start;
unsigned long redzone_end;

+#ifdef CONFIG_SLAB
+ if (flags & __GFP_RECLAIM)
+ quarantine_reduce();
+#endif
+
if (unlikely(object == NULL))
return;

@@ -512,6 +567,11 @@ void kasan_kmalloc_large(const void *ptr, size_t size, gfp_t flags)
unsigned long redzone_start;
unsigned long redzone_end;

+#ifdef CONFIG_SLAB
+ if (flags & __GFP_RECLAIM)
+ quarantine_reduce();
+#endif
+
if (unlikely(ptr == NULL))
return;

@@ -540,7 +600,7 @@ void kasan_krealloc(const void *object, size_t size, gfp_t flags)
kasan_kmalloc(page->slab_cache, object, size, flags);
}

-void kasan_kfree(void *ptr)
+void kasan_poison_kfree(void *ptr)
{
struct page *page;

@@ -553,7 +613,7 @@ void kasan_kfree(void *ptr)
kasan_slab_free(page->slab_cache, ptr);
}

-void kasan_kfree_large(const void *ptr)
+void kasan_poison_kfree_large(const void *ptr)
{
struct page *page = virt_to_page(ptr);

diff --git a/mm/kasan/kasan.h b/mm/kasan/kasan.h
index eb9de369..0fe58d9 100644
--- a/mm/kasan/kasan.h
+++ b/mm/kasan/kasan.h
@@ -61,6 +61,7 @@ struct kasan_global {
enum kasan_state {
KASAN_STATE_INIT,
KASAN_STATE_ALLOC,
+ KASAN_STATE_QUARANTINE,
KASAN_STATE_FREE
};

@@ -82,8 +83,10 @@ struct kasan_alloc_meta {
};

struct kasan_free_meta {
- /* Allocator freelist pointer, unused by KASAN. */
- void **freelist;
+ /* This field is used while the object is in the quarantine.
+ * Otherwise it might be used for the allocator freelist.
+ */
+ void **quarantine_link;
struct kasan_track track;
};

@@ -113,4 +116,8 @@ kasan_stack_handle kasan_save_stack(struct stack_trace *trace, gfp_t flags);

void kasan_fetch_stack(kasan_stack_handle handle, struct stack_trace *trace);

+void quarantine_put(struct kasan_free_meta *info, struct kmem_cache *cache);
+void quarantine_reduce(void);
+void quarantine_remove_cache(struct kmem_cache *cache);
+
#endif
diff --git a/mm/kasan/quarantine.c b/mm/kasan/quarantine.c
new file mode 100644
index 0000000..10c0ad6
--- /dev/null
+++ b/mm/kasan/quarantine.c
@@ -0,0 +1,284 @@
+/* Kasan quarantine */
+
+#include <linux/gfp.h>
+#include <linux/hash.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/percpu.h>
+#include <linux/printk.h>
+#include <linux/shrinker.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/types.h>
+
+#include "../slab.h"
+#include "kasan.h"
+
+/* Data structure and operations for quarantine queues. */
+
+/* Each queue is a signled-linked list, which also stores the total size of
+ * objects inside of it.
+ */
+struct qlist {
+ void **head;
+ void **tail;
+ size_t bytes;
+};
+
+#define QLIST_INIT { NULL, NULL, 0 }
+
+static inline bool empty_qlist(struct qlist *q)
+{
+ return !q->head;
+}
+
+static inline void init_qlist(struct qlist *q)
+{
+ q->head = q->tail = NULL;
+ q->bytes = 0;
+}
+
+static inline void qlist_put(struct qlist *q, void **qlink, size_t size)
+{
+ if (unlikely(empty_qlist(q)))
+ q->head = qlink;
+ else
+ *q->tail = qlink;
+ q->tail = qlink;
+ *qlink = NULL;
+ q->bytes += size;
+}
+
+static inline void **qlist_remove(struct qlist *q, void ***prev,
+ size_t size)
+{
+ void **qlink = *prev;
+
+ *prev = *qlink;
+ if (q->tail == qlink) {
+ if (q->head == qlink)
+ q->tail = NULL;
+ else
+ q->tail = (void **)prev;
+ }
+ q->bytes -= size;
+
+ return qlink;
+}
+
+static inline void qlist_move_all(struct qlist *from, struct qlist *to)
+{
+ if (unlikely(empty_qlist(from)))
+ return;
+
+ if (empty_qlist(to)) {
+ *to = *from;
+ init_qlist(from);
+ return;
+ }
+
+ *to->tail = from->head;
+ to->tail = from->tail;
+ to->bytes += from->bytes;
+
+ init_qlist(from);
+}
+
+static inline void qlist_move(struct qlist *from, void **last, struct qlist *to,
+ size_t size)
+{
+ if (unlikely(last == from->tail)) {
+ qlist_move_all(from, to);
+ return;
+ }
+ if (empty_qlist(to))
+ to->head = from->head;
+ else
+ *to->tail = from->head;
+ to->tail = last;
+ from->head = *last;
+ *last = NULL;
+ from->bytes -= size;
+ to->bytes += size;
+}
+
+
+/* The object quarantine consists of per-cpu queues and a global queue,
+ * guarded by quarantine_lock.
+ */
+static DEFINE_PER_CPU(struct qlist, cpu_quarantine);
+
+static struct qlist global_quarantine;
+static DEFINE_SPINLOCK(quarantine_lock);
+
+/* Maximum size of the global queue. */
+static unsigned long quarantine_size;
+
+/* The fraction of physical memory the quarantine is allowed to occupy.
+ * Quarantine doesn't support memory shrinker with SLAB allocator, so we keep
+ * the ratio low to avoid OOM.
+ */
+#define QUARANTINE_FRACTION 32
+
+#define QUARANTINE_LOW_SIZE (smp_load_acquire(&quarantine_size) * 3 / 4)
+#define QUARANTINE_PERCPU_SIZE (1 << 20)
+
+static inline struct kmem_cache *qlink_to_cache(void **qlink)
+{
+ return virt_to_head_page(qlink)->slab_cache;
+}
+
+static inline void *qlink_to_object(void **qlink, struct kmem_cache *cache)
+{
+ struct kasan_free_meta *free_info =
+ container_of((void ***)qlink, struct kasan_free_meta,
+ quarantine_link);
+
+ return ((void *)free_info) - cache->kasan_info.free_meta_offset;
+}
+
+static inline void qlink_free(void **qlink, struct kmem_cache *cache)
+{
+ void *object = qlink_to_object(qlink, cache);
+ struct kasan_alloc_meta *alloc_info = get_alloc_info(cache, object);
+ unsigned long flags;
+
+ local_irq_save(flags);
+ alloc_info->state = KASAN_STATE_FREE;
+ nokasan_free(cache, object, _THIS_IP_);
+ local_irq_restore(flags);
+}
+
+static inline void qlist_free_all(struct qlist *q, struct kmem_cache *cache)
+{
+ void **qlink;
+
+ if (unlikely(empty_qlist(q)))
+ return;
+
+ qlink = q->head;
+ while (qlink) {
+ struct kmem_cache *obj_cache =
+ cache ? cache : qlink_to_cache(qlink);
+ void **next = *qlink;
+
+ qlink_free(qlink, obj_cache);
+ qlink = next;
+ }
+ init_qlist(q);
+}
+
+void quarantine_put(struct kasan_free_meta *info, struct kmem_cache *cache)
+{
+ unsigned long flags;
+ struct qlist *q;
+ struct qlist temp = QLIST_INIT;
+
+ local_irq_save(flags);
+
+ q = this_cpu_ptr(&cpu_quarantine);
+ qlist_put(q, (void **) &info->quarantine_link, cache->size);
+ if (unlikely(q->bytes > QUARANTINE_PERCPU_SIZE))
+ qlist_move_all(q, &temp);
+
+ local_irq_restore(flags);
+
+ if (unlikely(!empty_qlist(&temp))) {
+ spin_lock_irqsave(&quarantine_lock, flags);
+ qlist_move_all(&temp, &global_quarantine);
+ spin_unlock_irqrestore(&quarantine_lock, flags);
+ }
+}
+
+void quarantine_reduce(void)
+{
+ size_t new_quarantine_size;
+ unsigned long flags;
+ struct qlist to_free = QLIST_INIT;
+ size_t size_to_free = 0;
+ void **last;
+
+ if (likely(ACCESS_ONCE(global_quarantine.bytes) <=
+ smp_load_acquire(&quarantine_size)))
+ return;
+
+ spin_lock_irqsave(&quarantine_lock, flags);
+
+ /* Update quarantine size in case of hotplug. Allocate a fraction of
+ * the installed memory to quarantine minus per-cpu queue limits.
+ */
+ new_quarantine_size = (ACCESS_ONCE(totalram_pages) << PAGE_SHIFT) /
+ QUARANTINE_FRACTION;
+ new_quarantine_size -= QUARANTINE_PERCPU_SIZE * num_online_cpus();
+ smp_store_release(&quarantine_size, new_quarantine_size);
+
+ last = global_quarantine.head;
+ while (last) {
+ struct kmem_cache *cache = qlink_to_cache(last);
+
+ size_to_free += cache->size;
+ if (!*last || size_to_free >
+ global_quarantine.bytes - QUARANTINE_LOW_SIZE)
+ break;
+ last = (void **) *last;
+ }
+ qlist_move(&global_quarantine, last, &to_free, size_to_free);
+
+ spin_unlock_irqrestore(&quarantine_lock, flags);
+
+ qlist_free_all(&to_free, NULL);
+}
+
+static inline void qlist_move_cache(struct qlist *from,
+ struct qlist *to,
+ struct kmem_cache *cache)
+{
+ void ***prev;
+
+ if (unlikely(empty_qlist(from)))
+ return;
+
+ prev = &from->head;
+ while (*prev) {
+ void **qlink = *prev;
+ struct kmem_cache *obj_cache = qlink_to_cache(qlink);
+
+ if (obj_cache == cache) {
+ if (unlikely(from->tail == qlink))
+ from->tail = (void **) prev;
+ *prev = (void **) *qlink;
+ from->bytes -= cache->size;
+ qlist_put(to, qlink, cache->size);
+ } else
+ prev = (void ***) *prev;
+ }
+}
+
+static void per_cpu_remove_cache(void *arg)
+{
+ struct kmem_cache *cache = arg;
+ struct qlist to_free = QLIST_INIT;
+ struct qlist *q;
+ unsigned long flags;
+
+ local_irq_save(flags);
+ q = this_cpu_ptr(&cpu_quarantine);
+ qlist_move_cache(q, &to_free, cache);
+ local_irq_restore(flags);
+
+ qlist_free_all(&to_free, cache);
+}
+
+void quarantine_remove_cache(struct kmem_cache *cache)
+{
+ unsigned long flags;
+ struct qlist to_free = QLIST_INIT;
+
+ on_each_cpu(per_cpu_remove_cache, cache, 0);
+
+ spin_lock_irqsave(&quarantine_lock, flags);
+ qlist_move_cache(&global_quarantine, &to_free, cache);
+ spin_unlock_irqrestore(&quarantine_lock, flags);
+
+ qlist_free_all(&to_free, cache);
+}
diff --git a/mm/kasan/report.c b/mm/kasan/report.c
index 6c4afcd..a4dca25 100644
--- a/mm/kasan/report.c
+++ b/mm/kasan/report.c
@@ -148,7 +148,8 @@ static void print_object(struct kmem_cache *cache, void *object)
print_track(&alloc_info->track);
break;
case KASAN_STATE_FREE:
- pr_err("Object freed, allocated with size %u bytes\n",
+ case KASAN_STATE_QUARANTINE:
+ pr_err("Object freed, allocated with size %lu bytes\n",
alloc_info->alloc_size);
free_info = get_free_info(cache, object);
pr_err("Allocation:\n");
diff --git a/mm/mempool.c b/mm/mempool.c
index b47c8a7..4beeeef 100644
--- a/mm/mempool.c
+++ b/mm/mempool.c
@@ -105,11 +105,12 @@ static inline void poison_element(mempool_t *pool, void *element)
static void kasan_poison_element(mempool_t *pool, void *element)
{
if (pool->alloc == mempool_alloc_slab)
- kasan_slab_free(pool->pool_data, element);
+ kasan_poison_slab_free(pool->pool_data, element);
if (pool->alloc == mempool_kmalloc)
- kasan_kfree(element);
+ kasan_poison_kfree(element);
if (pool->alloc == mempool_alloc_pages)
- kasan_free_pages(element, (unsigned long)pool->pool_data);
+ kasan_poison_free_pages(element,
+ (unsigned long)pool->pool_data);
}

static void kasan_unpoison_element(mempool_t *pool, void *element, gfp_t flags)
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 63358d9..4f65587 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -980,7 +980,7 @@ static bool free_pages_prepare(struct page *page, unsigned int order)

trace_mm_page_free(page, order);
kmemcheck_free_shadow(page, order);
- kasan_free_pages(page, order);
+ kasan_poison_free_pages(page, order);

if (PageAnon(page))
page->mapping = NULL;
diff --git a/mm/slab.c b/mm/slab.c
index 0ec7aa3..e2fac67 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -3374,9 +3374,19 @@ free_done:
static inline void __cache_free(struct kmem_cache *cachep, void *objp,
unsigned long caller)
{
+#ifdef CONFIG_KASAN
+ if (!kasan_slab_free(cachep, objp))
+ /* The object has been put into the quarantine, don't touch it
+ * for now.
+ */
+ nokasan_free(cachep, objp, caller);
+}
+
+void nokasan_free(struct kmem_cache *cachep, void *objp, unsigned long caller)
+{
+#endif
struct array_cache *ac;

- kasan_slab_free(cachep, objp);
ac = cpu_cache_get(cachep);

check_irq_off();
diff --git a/mm/slab.h b/mm/slab.h
index c63b869..3f19e3f 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -372,4 +372,8 @@ void *slab_next(struct seq_file *m, void *p, loff_t *pos);
void slab_stop(struct seq_file *m, void *p);
int memcg_slab_show(struct seq_file *m, void *p);

+#ifdef CONFIG_KASAN
+void nokasan_free(struct kmem_cache *cache, void *x, unsigned long addr);
+#endif
+
#endif /* MM_SLAB_H */
diff --git a/mm/slab_common.c b/mm/slab_common.c
index 8478631..8f2edde 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -710,6 +710,7 @@ void kmem_cache_destroy(struct kmem_cache *s)
get_online_cpus();
get_online_mems();

+ kasan_cache_destroy(s);
mutex_lock(&slab_mutex);

s->refcount--;
@@ -748,6 +749,7 @@ int kmem_cache_shrink(struct kmem_cache *cachep)

get_online_cpus();
get_online_mems();
+ kasan_cache_shrink(cachep);
ret = __kmem_cache_shrink(cachep, false);
put_online_mems();
put_online_cpus();
diff --git a/mm/slub.c b/mm/slub.c
index 945bbee..6fe45de 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1278,7 +1278,7 @@ static inline void kmalloc_large_node_hook(void *ptr, size_t size, gfp_t flags)
static inline void kfree_hook(const void *x)
{
kmemleak_free(x);
- kasan_kfree_large(x);
+ kasan_poison_kfree_large(x);
}

static inline struct kmem_cache *slab_pre_alloc_hook(struct kmem_cache *s,
@@ -1333,7 +1333,7 @@ static inline void slab_free_hook(struct kmem_cache *s, void *x)
if (!(s->flags & SLAB_DEBUG_OBJECTS))
debug_check_no_obj_freed(x, s->object_size);

- kasan_slab_free(s, x);
+ kasan_poison_slab_free(s, x);
}

static inline void slab_free_freelist_hook(struct kmem_cache *s,
--
2.7.0.rc3.207.g0ac5344

Alexander Potapenko

unread,
Jan 27, 2016, 1:30:07 PM1/27/16
to
This patch set implements SLAB support for KASAN

Unlike SLUB, SLAB doesn't store allocation/deallocation stacks for heap
objects, therefore we reimplement this feature in mm/kasan/stackdepot.c.
The intention is to ultimately switch SLUB to use this implementation as
well, which will remove the dependency on SLUB_DEBUG.

Also neither SLUB nor SLAB delay the reuse of freed memory chunks, which
is necessary for better detection of use-after-free errors. We introduce
memory quarantine (mm/kasan/quarantine.c), which allows delayed reuse of
deallocated memory.

Alexander Potapenko (8):
kasan: Change the behavior of kmalloc_large_oob_right test
mm, kasan: SLAB support
mm, kasan: Added GFP flags to KASAN API
arch, ftrace: For KASAN put hard/soft IRQ entries into separate
sections
mm, kasan: Stackdepot implementation. Enable stackdepot for SLAB
kasan: Test fix: Warn if the UAF could not be detected in kmalloc_uaf2
kasan: Changed kmalloc_large_oob_right, added
kmalloc_pagealloc_oob_right
mm: kasan: Initial memory quarantine implementation

Documentation/kasan.txt | 5 +-
arch/arm/kernel/vmlinux.lds.S | 1 +
arch/arm64/kernel/vmlinux.lds.S | 1 +
arch/blackfin/kernel/vmlinux.lds.S | 1 +
arch/c6x/kernel/vmlinux.lds.S | 1 +
arch/metag/kernel/vmlinux.lds.S | 1 +
arch/microblaze/kernel/vmlinux.lds.S | 1 +
arch/mips/kernel/vmlinux.lds.S | 1 +
arch/nios2/kernel/vmlinux.lds.S | 1 +
arch/openrisc/kernel/vmlinux.lds.S | 1 +
arch/parisc/kernel/vmlinux.lds.S | 1 +
arch/powerpc/kernel/vmlinux.lds.S | 1 +
arch/s390/kernel/vmlinux.lds.S | 1 +
arch/sh/kernel/vmlinux.lds.S | 1 +
arch/sparc/kernel/vmlinux.lds.S | 1 +
arch/tile/kernel/vmlinux.lds.S | 1 +
arch/x86/kernel/Makefile | 1 +
arch/x86/kernel/vmlinux.lds.S | 1 +
include/asm-generic/vmlinux.lds.h | 12 +-
include/linux/ftrace.h | 31 ++--
include/linux/kasan.h | 63 +++++---
include/linux/slab.h | 6 +
include/linux/slab_def.h | 14 ++
include/linux/slub_def.h | 11 ++
kernel/softirq.c | 3 +-
lib/Kconfig.kasan | 4 +-
lib/test_kasan.c | 66 +++++++-
mm/Makefile | 1 +
mm/kasan/Makefile | 3 +
mm/kasan/kasan.c | 221 +++++++++++++++++++++++++--
mm/kasan/kasan.h | 52 +++++++
mm/kasan/quarantine.c | 284 +++++++++++++++++++++++++++++++++++
mm/kasan/report.c | 68 +++++++--
mm/kasan/stackdepot.c | 236 +++++++++++++++++++++++++++++
mm/mempool.c | 23 +--
mm/page_alloc.c | 2 +-
mm/slab.c | 56 ++++++-
mm/slab.h | 4 +
mm/slab_common.c | 8 +-
mm/slub.c | 21 +--
40 files changed, 1122 insertions(+), 89 deletions(-)
create mode 100644 mm/kasan/quarantine.c
create mode 100644 mm/kasan/stackdepot.c

--
2.7.0.rc3.207.g0ac5344

Alexander Potapenko

unread,
Jan 27, 2016, 1:30:11 PM1/27/16
to
Stack depot will allow KASAN store allocation/deallocation stack traces
for memory chunks. The stack traces are stored in a hash table and
referenced by handles which reside in the kasan_alloc_meta and
kasan_free_meta structures in the allocated memory chunks.

IRQ stack traces are cut below the IRQ entry point to avoid unnecessary
duplication.

Right now stackdepot support is only enabled in SLAB allocator.
Once KASAN features in SLAB are on par with those in SLUB we can switch
SLUB to stackdepot as well, thus removing the dependency on SLUB_DEBUG.

This patch is based on the "mm: kasan: stack depots" patch originally
prepared by Dmitry Chernenkov.

Signed-off-by: Alexander Potapenko <gli...@google.com>
---
arch/x86/kernel/Makefile | 1 +
mm/kasan/Makefile | 3 +
mm/kasan/kasan.c | 51 +++++++++-
mm/kasan/kasan.h | 11 +++
mm/kasan/report.c | 8 ++
mm/kasan/stackdepot.c | 236 +++++++++++++++++++++++++++++++++++++++++++++++
6 files changed, 307 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index b1b78ff..500584d 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -19,6 +19,7 @@ endif
KASAN_SANITIZE_head$(BITS).o := n
KASAN_SANITIZE_dumpstack.o := n
KASAN_SANITIZE_dumpstack_$(BITS).o := n
+KASAN_SANITIZE_stacktrace.o := n

CFLAGS_irq.o := -I$(src)/../include/asm/trace

diff --git a/mm/kasan/Makefile b/mm/kasan/Makefile
index 6471014..f952515 100644
--- a/mm/kasan/Makefile
+++ b/mm/kasan/Makefile
@@ -6,3 +6,6 @@ CFLAGS_REMOVE_kasan.o = -pg
CFLAGS_kasan.o := $(call cc-option, -fno-conserve-stack -fno-stack-protector)

obj-y := kasan.o report.o kasan_init.o
+ifdef CONFIG_SLAB
+ obj-y += stackdepot.o
+endif
diff --git a/mm/kasan/kasan.c b/mm/kasan/kasan.c
index 787224a..b5d04ec 100644
--- a/mm/kasan/kasan.c
+++ b/mm/kasan/kasan.c
@@ -17,7 +17,9 @@
#define DISABLE_BRANCH_PROFILING

#include <linux/export.h>
+#include <linux/ftrace.h>
#include <linux/init.h>
+#include <linux/kasan.h>
#include <linux/kernel.h>
#include <linux/kmemleak.h>
#include <linux/memblock.h>
@@ -31,7 +33,6 @@
#include <linux/string.h>
#include <linux/types.h>
#include <linux/vmalloc.h>
-#include <linux/kasan.h>

#include "kasan.h"
#include "../slab.h"
@@ -393,23 +394,67 @@ void kasan_poison_object_data(struct kmem_cache *cache, void *object)
#endif
}

-static inline void set_track(struct kasan_track *track)
+static inline int in_irqentry_text(unsigned long ptr)
+{
+ return (ptr >= (unsigned long)&__irqentry_text_start &&
+ ptr < (unsigned long)&__irqentry_text_end) ||
+ (ptr >= (unsigned long)&__softirqentry_text_start &&
+ ptr < (unsigned long)&__softirqentry_text_end);
+}
+
+static inline void filter_irq_stacks(struct stack_trace *trace)
+{
+ int i;
+
+ if (!trace->nr_entries)
+ return;
+ for (i = 0; i < trace->nr_entries; i++)
+ if (in_irqentry_text(trace->entries[i])) {
+ /* Include the irqentry function into the stack. */
+ trace->nr_entries = i + 1;
+ break;
+ }
+}
+
+static inline kasan_stack_handle save_stack(gfp_t flags)
+{
+ unsigned long entries[KASAN_STACK_DEPTH];
+ struct stack_trace trace = {
+ .nr_entries = 0,
+ .entries = entries,
+ .max_entries = KASAN_STACK_DEPTH,
+ .skip = 0
+ };
+
+ save_stack_trace(&trace);
+ filter_irq_stacks(&trace);
+ if (trace.nr_entries != 0 &&
+ trace.entries[trace.nr_entries-1] == ULONG_MAX)
+ trace.nr_entries--;
+
+ return kasan_save_stack(&trace, flags);
+}
+
+static inline void set_track(struct kasan_track *track, gfp_t flags)
{
track->cpu = raw_smp_processor_id();
track->pid = current->pid;
track->when = jiffies;
+ track->stack = save_stack(flags);
}

#ifdef CONFIG_SLAB
struct kasan_alloc_meta *get_alloc_info(struct kmem_cache *cache,
const void *object)
{
+ BUILD_BUG_ON(sizeof(struct kasan_alloc_meta) > 32);
return (void *)object + cache->kasan_info.alloc_meta_offset;
}

struct kasan_free_meta *get_free_info(struct kmem_cache *cache,
const void *object)
{
+ BUILD_BUG_ON(sizeof(struct kasan_free_meta) > 32);
return (void *)object + cache->kasan_info.free_meta_offset;
}
#endif
@@ -455,7 +500,7 @@ void kasan_kmalloc(struct kmem_cache *cache, const void *object, size_t size,

alloc_info->state = KASAN_STATE_ALLOC;
alloc_info->alloc_size = size;
- set_track(&alloc_info->track);
+ set_track(&alloc_info->track, flags);
}
#endif
}
diff --git a/mm/kasan/kasan.h b/mm/kasan/kasan.h
index 7b9e4ab9..eb9de369 100644
--- a/mm/kasan/kasan.h
+++ b/mm/kasan/kasan.h
@@ -64,10 +64,15 @@ enum kasan_state {
KASAN_STATE_FREE
};

+#define KASAN_STACK_DEPTH 64
+#define KASAN_STACK_BITS (32) /* up to 16GB of stack storage */
+typedef u32 kasan_stack_handle;
+
struct kasan_track {
u64 cpu : 6; /* for NR_CPUS = 64 */
u64 pid : 16; /* 65536 processes */
u64 when : 42; /* ~140 years */
+ kasan_stack_handle stack : KASAN_STACK_BITS;
};

struct kasan_alloc_meta {
@@ -102,4 +107,10 @@ static inline bool kasan_report_enabled(void)
void kasan_report(unsigned long addr, size_t size,
bool is_write, unsigned long ip);

+struct stack_trace;
+
+kasan_stack_handle kasan_save_stack(struct stack_trace *trace, gfp_t flags);
+
+void kasan_fetch_stack(kasan_stack_handle handle, struct stack_trace *trace);
+
#endif
diff --git a/mm/kasan/report.c b/mm/kasan/report.c
index 2bf7218..6c4afcd 100644
--- a/mm/kasan/report.c
+++ b/mm/kasan/report.c
@@ -119,6 +119,14 @@ static void print_track(struct kasan_track *track)
{
pr_err("PID = %lu, CPU = %lu, timestamp = %lu\n", track->pid,
track->cpu, track->when);
+ if (track->stack) {
+ struct stack_trace trace;
+
+ kasan_fetch_stack(track->stack, &trace);
+ print_stack_trace(&trace, 0);
+ } else {
+ pr_err("(stack is not available)\n");
+ }
}

static void print_object(struct kmem_cache *cache, void *object)
diff --git a/mm/kasan/stackdepot.c b/mm/kasan/stackdepot.c
new file mode 100644
index 0000000..e3026a5
--- /dev/null
+++ b/mm/kasan/stackdepot.c
@@ -0,0 +1,236 @@
+/*
+ * Stack depot
+ * KASAN needs to safe alloc and free stacks per object, but storing 2 stack
+ * traces per object is too much overhead (e.g. SLUB_DEBUG needs 256 bytes per
+ * object).
+ *
+ * Instead, stack depot maintains a hashtable of unique stacktraces. Since alloc
+ * and free stacks repeat a lot, we save about 100x space.
+ * Stacks are never removed from depot, so we store them contiguously one after
+ * another in a contiguos memory allocation.
+ */
+
+
+#include <linux/gfp.h>
+#include <linux/jhash.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/percpu.h>
+#include <linux/printk.h>
+#include <linux/stacktrace.h>
+#include <linux/string.h>
+#include <linux/types.h>
+
+#include "kasan.h"
+
+#define STACK_ALLOC_ORDER 4 /* 'Slab' size order for stack depot, 16 pages */
+#define STACK_ALLOC_SIZE (1L << (PAGE_SHIFT + STACK_ALLOC_ORDER))
+#define STACK_ALLOC_ALIGN 4
+#define STACK_ALLOC_OFFSET_BITS (STACK_ALLOC_ORDER + PAGE_SHIFT - \
+ STACK_ALLOC_ALIGN)
+#define STACK_ALLOC_INDEX_BITS (KASAN_STACK_BITS - STACK_ALLOC_OFFSET_BITS)
+#define STACK_ALLOC_SLABS_CAP 1024
+#define STACK_ALLOC_MAX_SLABS \
+ (((1L << (STACK_ALLOC_INDEX_BITS)) < STACK_ALLOC_SLABS_CAP) ? \
+ (1L << (STACK_ALLOC_INDEX_BITS)) : STACK_ALLOC_SLABS_CAP)
+
+/* The compact structure to store the reference to stacks. */
+union handle_parts {
+ kasan_stack_handle handle;
+ struct {
+ u32 slabindex : STACK_ALLOC_INDEX_BITS;
+ u32 offset : STACK_ALLOC_OFFSET_BITS;
+ };
+};
+
+struct kasan_stack {
+ struct kasan_stack *next; /* Link in the hashtable */
+ u32 hash; /* Hash in the hastable */
+ u32 size; /* Number of frames in the stack */
+ union handle_parts handle;
+ unsigned long entries[1]; /* Variable-sized array of entries. */
+};
+
+static void *stack_slabs[STACK_ALLOC_MAX_SLABS];
+
+static int depot_index;
+static int next_slab_inited;
+static size_t depot_offset;
+static DEFINE_SPINLOCK(depot_lock);
+
+static bool init_stack_slab(void **prealloc)
+{
+ if (!*prealloc)
+ return false;
+ if (smp_load_acquire(&next_slab_inited))
+ return true;
+ if (stack_slabs[depot_index] == NULL) {
+ stack_slabs[depot_index] = *prealloc;
+ } else {
+ stack_slabs[depot_index + 1] = *prealloc;
+ smp_store_release(&next_slab_inited, 1);
+ }
+ *prealloc = NULL;
+ return true;
+}
+
+/* Allocation of a new stack in raw storage */
+static struct kasan_stack *kasan_alloc_stack(unsigned long *entries, int size,
+ u32 hash, void **prealloc, gfp_t alloc_flags)
+{
+ int required_size = offsetof(struct kasan_stack, entries) +
+ sizeof(unsigned long) * size;
+ struct kasan_stack *stack;
+
+ required_size = ALIGN(required_size, 1 << STACK_ALLOC_ALIGN);
+
+ if (unlikely(depot_offset + required_size > STACK_ALLOC_SIZE)) {
+ if (unlikely(depot_index + 1 >= STACK_ALLOC_MAX_SLABS)) {
+ WARN_ONCE(1, "Stack depot reached limit capacity");
+ return NULL;
+ }
+ depot_index++;
+ depot_offset = 0;
+ if (depot_index + 1 < STACK_ALLOC_MAX_SLABS)
+ smp_store_release(&next_slab_inited, 0);
+ }
+ init_stack_slab(prealloc);
+ if (stack_slabs[depot_index] == NULL)
+ return NULL;
+
+ stack = stack_slabs[depot_index] + depot_offset;
+
+ stack->hash = hash;
+ stack->size = size;
+ stack->handle.slabindex = depot_index;
+ stack->handle.offset = depot_offset >> STACK_ALLOC_ALIGN;
+ __memcpy(stack->entries, entries, size * sizeof(unsigned long));
+ depot_offset += required_size;
+
+ return stack;
+}
+
+#define STACK_HASH_ORDER 20
+#define STACK_HASH_SIZE (1L << STACK_HASH_ORDER)
+#define STACK_HASH_MASK (STACK_HASH_SIZE - 1)
+#define STACK_HASH_SEED 0x9747b28c
+
+static struct kasan_stack *stack_table[STACK_HASH_SIZE] = {
+ [0 ... STACK_HASH_SIZE - 1] = NULL
+};
+
+/* Calculate hash for a stack */
+static inline u32 hash_stack(unsigned long *entries, unsigned int size)
+{
+ return jhash2((u32 *)entries,
+ size * sizeof(unsigned long) / sizeof(u32),
+ STACK_HASH_SEED);
+}
+
+/* Find a stack that is equal to the one stored in entries in the hash */
+static inline struct kasan_stack *find_stack(struct kasan_stack *bucket,
+ unsigned long *entries, int size,
+ u32 hash)
+{
+ struct kasan_stack *found;
+
+ for (found = bucket; found; found = found->next) {
+ if (found->hash == hash &&
+ found->size == size &&
+ !memcmp(entries, found->entries,
+ size * sizeof(unsigned long))) {
+ return found;
+ }
+ }
+ return NULL;
+}
+
+void kasan_fetch_stack(kasan_stack_handle handle, struct stack_trace *trace)
+{
+ union handle_parts parts = { .handle = handle };
+ void *slab = stack_slabs[parts.slabindex];
+ size_t offset = parts.offset << STACK_ALLOC_ALIGN;
+ struct kasan_stack *stack = slab + offset;
+
+ trace->nr_entries = trace->max_entries = stack->size;
+ trace->entries = stack->entries;
+ trace->skip = 0;
+}
+
+/*
+ * kasan_save_stack - save stack in a stack depot.
+ * @trace - the stacktrace to save.
+ * @alloc_flags - flags for allocating additional memory if required.
+ *
+ * Returns the handle of the stack struct stored in depot.
+ */
+kasan_stack_handle kasan_save_stack(struct stack_trace *trace,
+ gfp_t alloc_flags)
+{
+ u32 hash;
+ kasan_stack_handle retval = 0;
+ struct kasan_stack *found = NULL, **bucket;
+ unsigned long flags;
+ struct page *page = NULL;
+ void *prealloc = NULL;
+
+ if (unlikely(trace->nr_entries == 0))
+ goto exit;
+
+ hash = hash_stack(trace->entries, trace->nr_entries);
+ /* Bad luck, we won't store this stack. */
+ if (hash == 0)
+ goto exit;
+
+ bucket = &stack_table[hash & STACK_HASH_MASK];
+
+ /* Fast path: look the stack trace up without locking. */
+ found = find_stack(smp_load_acquire(bucket), trace->entries,
+ trace->nr_entries, hash);
+ if (found)
+ goto exit;
+
+ /* Check if the current or the next stack slab need to be initialized.
+ * If so, allocate the memory - we won't be able to do that under the
+ * lock.
+ */
+ if (unlikely(!smp_load_acquire(&next_slab_inited))) {
+ if (!preempt_count() && !in_irq()) {
+ alloc_flags &= (__GFP_RECLAIM | __GFP_IO | __GFP_FS |
+ __GFP_NOWARN | __GFP_NORETRY |
+ __GFP_NOMEMALLOC | __GFP_DIRECT_RECLAIM);
+ page = alloc_pages(alloc_flags, STACK_ALLOC_ORDER);
+ if (page)
+ prealloc = page_address(page);
+ }
+ }
+
+ spin_lock_irqsave(&depot_lock, flags);
+
+ found = find_stack(*bucket, trace->entries, trace->nr_entries, hash);
+ if (!found) {
+ struct kasan_stack *new =
+ kasan_alloc_stack(trace->entries, trace->nr_entries,
+ hash, &prealloc, alloc_flags);
+ if (new) {
+ new->next = *bucket;
+ smp_store_release(bucket, new);
+ found = new;
+ }
+ } else if (prealloc) {
+ /*
+ * We didn't need to store this stack trace, but let's keep
+ * the preallocated memory for the future.
+ */
+ WARN_ON(!init_stack_slab(&prealloc));
+ }
+
+ spin_unlock_irqrestore(&depot_lock, flags);
+exit:
+ if (prealloc)
+ /* Nobody used this memory, ok to free it. */
+ free_pages((unsigned long)prealloc, STACK_ALLOC_ORDER);
+ if (found)
+ retval = found->handle.handle;
+ return retval;
+}
--
2.7.0.rc3.207.g0ac5344

Alexander Potapenko

unread,
Jan 27, 2016, 1:30:13 PM1/27/16
to
Add GFP flags to KASAN hooks for future patches to use.

This patch is based on the "mm: kasan: unified support for SLUB and
SLAB allocators" patch originally prepared by Dmitry Chernenkov.

Signed-off-by: Alexander Potapenko <gli...@google.com>
---
include/linux/kasan.h | 19 +++++++++++--------
mm/kasan/kasan.c | 15 ++++++++-------
mm/mempool.c | 16 ++++++++--------
mm/slab.c | 14 +++++++-------
mm/slab_common.c | 4 ++--
mm/slub.c | 17 +++++++++--------
6 files changed, 45 insertions(+), 40 deletions(-)

diff --git a/include/linux/kasan.h b/include/linux/kasan.h
index 4405a35..bf71ab0 100644
--- a/include/linux/kasan.h
+++ b/include/linux/kasan.h
@@ -53,13 +53,14 @@ void kasan_poison_slab(struct page *page);
void kasan_unpoison_object_data(struct kmem_cache *cache, void *object);
void kasan_poison_object_data(struct kmem_cache *cache, void *object);

-void kasan_kmalloc_large(const void *ptr, size_t size);
+void kasan_kmalloc_large(const void *ptr, size_t size, gfp_t flags);
void kasan_kfree_large(const void *ptr);
void kasan_kfree(void *ptr);
-void kasan_kmalloc(struct kmem_cache *s, const void *object, size_t size);
-void kasan_krealloc(const void *object, size_t new_size);
+void kasan_kmalloc(struct kmem_cache *s, const void *object, size_t size,
+ gfp_t flags);
+void kasan_krealloc(const void *object, size_t new_size, gfp_t flags);

-void kasan_slab_alloc(struct kmem_cache *s, void *object);
+void kasan_slab_alloc(struct kmem_cache *s, void *object, gfp_t flags);
void kasan_slab_free(struct kmem_cache *s, void *object);

struct kasan_cache {
@@ -90,14 +91,16 @@ static inline void kasan_unpoison_object_data(struct kmem_cache *cache,
static inline void kasan_poison_object_data(struct kmem_cache *cache,
void *object) {}

-static inline void kasan_kmalloc_large(void *ptr, size_t size) {}
+static inline void kasan_kmalloc_large(void *ptr, size_t size, gfp_t flags) {}
static inline void kasan_kfree_large(const void *ptr) {}
static inline void kasan_kfree(void *ptr) {}
static inline void kasan_kmalloc(struct kmem_cache *s, const void *object,
- size_t size) {}
-static inline void kasan_krealloc(const void *object, size_t new_size) {}
+ size_t size, gfp_t flags) {}
+static inline void kasan_krealloc(const void *object, size_t new_size,
+ gfp_t flags) {}

-static inline void kasan_slab_alloc(struct kmem_cache *s, void *object) {}
+static inline void kasan_slab_alloc(struct kmem_cache *s, void *object,
+ gfp_t flags) {}
static inline void kasan_slab_free(struct kmem_cache *s, void *object) {}

static inline int kasan_module_alloc(void *addr, size_t size) { return 0; }
diff --git a/mm/kasan/kasan.c b/mm/kasan/kasan.c
index 84305c2..787224a 100644
--- a/mm/kasan/kasan.c
+++ b/mm/kasan/kasan.c
@@ -414,9 +414,9 @@ struct kasan_free_meta *get_free_info(struct kmem_cache *cache,
}
#endif

-void kasan_slab_alloc(struct kmem_cache *cache, void *object)
+void kasan_slab_alloc(struct kmem_cache *cache, void *object, gfp_t flags)
{
- kasan_kmalloc(cache, object, cache->object_size);
+ kasan_kmalloc(cache, object, cache->object_size, flags);
}

void kasan_slab_free(struct kmem_cache *cache, void *object)
@@ -431,7 +431,8 @@ void kasan_slab_free(struct kmem_cache *cache, void *object)
kasan_poison_shadow(object, rounded_up_size, KASAN_KMALLOC_FREE);
}

-void kasan_kmalloc(struct kmem_cache *cache, const void *object, size_t size)
+void kasan_kmalloc(struct kmem_cache *cache, const void *object, size_t size,
+ gfp_t flags)
{
unsigned long redzone_start;
unsigned long redzone_end;
@@ -460,7 +461,7 @@ void kasan_kmalloc(struct kmem_cache *cache, const void *object, size_t size)
}
EXPORT_SYMBOL(kasan_kmalloc);

-void kasan_kmalloc_large(const void *ptr, size_t size)
+void kasan_kmalloc_large(const void *ptr, size_t size, gfp_t flags)
{
struct page *page;
unsigned long redzone_start;
@@ -479,7 +480,7 @@ void kasan_kmalloc_large(const void *ptr, size_t size)
KASAN_PAGE_REDZONE);
}

-void kasan_krealloc(const void *object, size_t size)
+void kasan_krealloc(const void *object, size_t size, gfp_t flags)
{
struct page *page;

@@ -489,9 +490,9 @@ void kasan_krealloc(const void *object, size_t size)
page = virt_to_head_page(object);

if (unlikely(!PageSlab(page)))
- kasan_kmalloc_large(object, size);
+ kasan_kmalloc_large(object, size, flags);
else
- kasan_kmalloc(page->slab_cache, object, size);
+ kasan_kmalloc(page->slab_cache, object, size, flags);
}

void kasan_kfree(void *ptr)
diff --git a/mm/mempool.c b/mm/mempool.c
index 004d42b..b47c8a7 100644
--- a/mm/mempool.c
+++ b/mm/mempool.c
@@ -112,12 +112,12 @@ static void kasan_poison_element(mempool_t *pool, void *element)
kasan_free_pages(element, (unsigned long)pool->pool_data);
}

-static void kasan_unpoison_element(mempool_t *pool, void *element)
+static void kasan_unpoison_element(mempool_t *pool, void *element, gfp_t flags)
{
if (pool->alloc == mempool_alloc_slab)
- kasan_slab_alloc(pool->pool_data, element);
+ kasan_slab_alloc(pool->pool_data, element, flags);
if (pool->alloc == mempool_kmalloc)
- kasan_krealloc(element, (size_t)pool->pool_data);
+ kasan_krealloc(element, (size_t)pool->pool_data, flags);
if (pool->alloc == mempool_alloc_pages)
kasan_alloc_pages(element, (unsigned long)pool->pool_data);
}
@@ -130,13 +130,13 @@ static void add_element(mempool_t *pool, void *element)
pool->elements[pool->curr_nr++] = element;
}

-static void *remove_element(mempool_t *pool)
+static void *remove_element(mempool_t *pool, gfp_t flags)
{
void *element = pool->elements[--pool->curr_nr];

BUG_ON(pool->curr_nr < 0);
check_element(pool, element);
- kasan_unpoison_element(pool, element);
+ kasan_unpoison_element(pool, element, flags);
return element;
}

@@ -154,7 +154,7 @@ void mempool_destroy(mempool_t *pool)
return;

while (pool->curr_nr) {
- void *element = remove_element(pool);
+ void *element = remove_element(pool, GFP_KERNEL);
pool->free(element, pool->pool_data);
}
kfree(pool->elements);
@@ -250,7 +250,7 @@ int mempool_resize(mempool_t *pool, int new_min_nr)
spin_lock_irqsave(&pool->lock, flags);
if (new_min_nr <= pool->min_nr) {
while (new_min_nr < pool->curr_nr) {
- element = remove_element(pool);
+ element = remove_element(pool, GFP_KERNEL);
spin_unlock_irqrestore(&pool->lock, flags);
pool->free(element, pool->pool_data);
spin_lock_irqsave(&pool->lock, flags);
@@ -336,7 +336,7 @@ repeat_alloc:

spin_lock_irqsave(&pool->lock, flags);
if (likely(pool->curr_nr)) {
- element = remove_element(pool);
+ element = remove_element(pool, gfp_temp);
spin_unlock_irqrestore(&pool->lock, flags);
/* paired with rmb in mempool_free(), read comment there */
smp_wmb();
diff --git a/mm/slab.c b/mm/slab.c
index 739b89d..0ec7aa3 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -3417,7 +3417,7 @@ void *kmem_cache_alloc(struct kmem_cache *cachep, gfp_t flags)
{
void *ret = slab_alloc(cachep, flags, _RET_IP_);
if (ret)
- kasan_slab_alloc(cachep, ret);
+ kasan_slab_alloc(cachep, ret, flags);

trace_kmem_cache_alloc(_RET_IP_, ret,
cachep->object_size, cachep->size, flags);
@@ -3448,7 +3448,7 @@ kmem_cache_alloc_trace(struct kmem_cache *cachep, gfp_t flags, size_t size)
ret = slab_alloc(cachep, flags, _RET_IP_);

if (ret)
- kasan_kmalloc(cachep, ret, size);
+ kasan_kmalloc(cachep, ret, size, flags);
trace_kmalloc(_RET_IP_, ret,
size, cachep->size, flags);
return ret;
@@ -3473,7 +3473,7 @@ void *kmem_cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid)
void *ret = slab_alloc_node(cachep, flags, nodeid, _RET_IP_);

if (ret)
- kasan_slab_alloc(cachep, ret);
+ kasan_slab_alloc(cachep, ret, flags);
trace_kmem_cache_alloc_node(_RET_IP_, ret,
cachep->object_size, cachep->size,
flags, nodeid);
@@ -3493,7 +3493,7 @@ void *kmem_cache_alloc_node_trace(struct kmem_cache *cachep,
ret = slab_alloc_node(cachep, flags, nodeid, _RET_IP_);

if (ret)
- kasan_kmalloc(cachep, ret, size);
+ kasan_kmalloc(cachep, ret, size, flags);
trace_kmalloc_node(_RET_IP_, ret,
size, cachep->size,
flags, nodeid);
@@ -3513,7 +3513,7 @@ __do_kmalloc_node(size_t size, gfp_t flags, int node, unsigned long caller)
return cachep;
ret = kmem_cache_alloc_node_trace(cachep, flags, node, size);
if (ret)
- kasan_kmalloc(cachep, ret, size);
+ kasan_kmalloc(cachep, ret, size, flags);

return ret;
}
@@ -3550,7 +3550,7 @@ static __always_inline void *__do_kmalloc(size_t size, gfp_t flags,
ret = slab_alloc(cachep, flags, caller);

if (ret)
- kasan_kmalloc(cachep, ret, size);
+ kasan_kmalloc(cachep, ret, size, flags);
trace_kmalloc(caller, ret,
size, cachep->size, flags);

@@ -4278,7 +4278,7 @@ size_t ksize(const void *objp)
/* We assume that ksize callers could use whole allocated area,
* so we need to unpoison this area.
*/
- kasan_krealloc(objp, size);
+ kasan_krealloc(objp, size, GFP_NOWAIT);

return size;
}
diff --git a/mm/slab_common.c b/mm/slab_common.c
index 8d2531d..8478631 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -1008,7 +1008,7 @@ void *kmalloc_order(size_t size, gfp_t flags, unsigned int order)
page = alloc_kmem_pages(flags, order);
ret = page ? page_address(page) : NULL;
kmemleak_alloc(ret, size, 1, flags);
- kasan_kmalloc_large(ret, size);
+ kasan_kmalloc_large(ret, size, flags);
return ret;
}
EXPORT_SYMBOL(kmalloc_order);
@@ -1189,7 +1189,7 @@ static __always_inline void *__do_krealloc(const void *p, size_t new_size,
ks = ksize(p);

if (ks >= new_size) {
- kasan_krealloc((void *)p, new_size);
+ kasan_krealloc((void *)p, new_size, flags);
return (void *)p;
}

diff --git a/mm/slub.c b/mm/slub.c
index b21fd24..945bbee 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1272,7 +1272,7 @@ static inline void dec_slabs_node(struct kmem_cache *s, int node,
static inline void kmalloc_large_node_hook(void *ptr, size_t size, gfp_t flags)
{
kmemleak_alloc(ptr, size, 1, flags);
- kasan_kmalloc_large(ptr, size);
+ kasan_kmalloc_large(ptr, size, flags);
}

static inline void kfree_hook(const void *x)
@@ -1306,7 +1306,7 @@ static inline void slab_post_alloc_hook(struct kmem_cache *s, gfp_t flags,
kmemcheck_slab_alloc(s, flags, object, slab_ksize(s));
kmemleak_alloc_recursive(object, s->object_size, 1,
s->flags, flags);
- kasan_slab_alloc(s, object);
+ kasan_slab_alloc(s, object, flags);
}
memcg_kmem_put_cache(s);
}
@@ -2590,7 +2590,7 @@ void *kmem_cache_alloc_trace(struct kmem_cache *s, gfp_t gfpflags, size_t size)
{
void *ret = slab_alloc(s, gfpflags, _RET_IP_);
trace_kmalloc(_RET_IP_, ret, size, s->size, gfpflags);
- kasan_kmalloc(s, ret, size);
+ kasan_kmalloc(s, ret, size, gfpflags);
return ret;
}
EXPORT_SYMBOL(kmem_cache_alloc_trace);
@@ -2618,7 +2618,7 @@ void *kmem_cache_alloc_node_trace(struct kmem_cache *s,
trace_kmalloc_node(_RET_IP_, ret,
size, s->size, gfpflags, node);

- kasan_kmalloc(s, ret, size);
+ kasan_kmalloc(s, ret, size, gfpflags);
return ret;
}
EXPORT_SYMBOL(kmem_cache_alloc_node_trace);
@@ -3162,7 +3162,8 @@ static void early_kmem_cache_node_alloc(int node)
init_object(kmem_cache_node, n, SLUB_RED_ACTIVE);
init_tracking(kmem_cache_node, n);
#endif
- kasan_kmalloc(kmem_cache_node, n, sizeof(struct kmem_cache_node));
+ kasan_kmalloc(kmem_cache_node, n, sizeof(struct kmem_cache_node),
+ GFP_KERNEL);
init_kmem_cache_node(n);
inc_slabs_node(kmem_cache_node, node, page->objects);

@@ -3535,7 +3536,7 @@ void *__kmalloc(size_t size, gfp_t flags)

trace_kmalloc(_RET_IP_, ret, size, s->size, flags);

- kasan_kmalloc(s, ret, size);
+ kasan_kmalloc(s, ret, size, flags);

return ret;
}
@@ -3580,7 +3581,7 @@ void *__kmalloc_node(size_t size, gfp_t flags, int node)

trace_kmalloc_node(_RET_IP_, ret, size, s->size, flags, node);

- kasan_kmalloc(s, ret, size);
+ kasan_kmalloc(s, ret, size, flags);

return ret;
}
@@ -3609,7 +3610,7 @@ size_t ksize(const void *object)
size_t size = __ksize(object);
/* We assume that ksize callers could use whole allocated area,
so we need unpoison this area. */
- kasan_krealloc(object, size);
+ kasan_krealloc(object, size, GFP_NOWAIT);
return size;
}
EXPORT_SYMBOL(ksize);
--
2.7.0.rc3.207.g0ac5344

Alexander Potapenko

unread,
Jan 27, 2016, 1:30:13 PM1/27/16
to
depending on which allocator (SLAB or SLUB) is being used

Signed-off-by: Alexander Potapenko <gli...@google.com>
---
lib/test_kasan.c | 17 ++++++++++++++++-
1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/lib/test_kasan.c b/lib/test_kasan.c
index c32f3b0..66dd92f 100644
--- a/lib/test_kasan.c
+++ b/lib/test_kasan.c
@@ -68,7 +68,22 @@ static noinline void __init kmalloc_node_oob_right(void)
static noinline void __init kmalloc_large_oob_right(void)
{
char *ptr;
- size_t size = KMALLOC_MAX_CACHE_SIZE + 10;
+ size_t size;
+
+ if (KMALLOC_MAX_CACHE_SIZE == KMALLOC_MAX_SIZE) {
+ /*
+ * We're using the SLAB allocator. Allocate a chunk that fits
+ * into a slab.
+ */
+ size = KMALLOC_MAX_CACHE_SIZE - 256;
+ } else {
+ /*
+ * KMALLOC_MAX_SIZE > KMALLOC_MAX_CACHE_SIZE.
+ * We're using the SLUB allocator. Allocate a chunk that does
+ * not fit into a slab to trigger the page allocator.
+ */
+ size = KMALLOC_MAX_CACHE_SIZE + 10;
+ }

pr_info("kmalloc large allocation: out-of-bounds to right\n");
ptr = kmalloc(size, GFP_KERNEL);
--
2.7.0.rc3.207.g0ac5344

Joonsoo Kim

unread,
Jan 28, 2016, 2:50:06 AM1/28/16
to
Hello,

On Wed, Jan 27, 2016 at 07:25:10PM +0100, Alexander Potapenko wrote:
> Stack depot will allow KASAN store allocation/deallocation stack traces
> for memory chunks. The stack traces are stored in a hash table and
> referenced by handles which reside in the kasan_alloc_meta and
> kasan_free_meta structures in the allocated memory chunks.

Looks really nice!

Could it be more generalized to be used by other feature that need to
store stack trace such as tracepoint or page owner?

If it could be, there is one more requirement.
I understand the fact that entry is never removed from depot makes things
very simpler, but, for general usecases, it's better to use reference count
and allow to remove. Is it possible?

Thanks.

Joonsoo Kim

unread,
Jan 28, 2016, 2:50:06 AM1/28/16
to
On Wed, Jan 27, 2016 at 07:25:07PM +0100, Alexander Potapenko wrote:
> This patch adds KASAN hooks to SLAB allocator.
>
> This patch is based on the "mm: kasan: unified support for SLUB and
> SLAB allocators" patch originally prepared by Dmitry Chernenkov.
>
> Signed-off-by: Alexander Potapenko <gli...@google.com>
> ---
> Documentation/kasan.txt | 5 ++-

...

> +#ifdef CONFIG_SLAB
> +struct kasan_alloc_meta *get_alloc_info(struct kmem_cache *cache,
> + const void *object)
> +{
> + return (void *)object + cache->kasan_info.alloc_meta_offset;
> +}
> +
> +struct kasan_free_meta *get_free_info(struct kmem_cache *cache,
> + const void *object)
> +{
> + return (void *)object + cache->kasan_info.free_meta_offset;
> +}
> +#endif

I cannot find the place to store stack info for free. get_free_info()
isn't used except print_object(). Plese let me know where.

Thanks.

Alexander Potapenko

unread,
Jan 28, 2016, 8:30:06 AM1/28/16
to
On Thu, Jan 28, 2016 at 1:51 PM, Alexander Potapenko <gli...@google.com> wrote:
>
> On Jan 28, 2016 8:40 AM, "Joonsoo Kim" <iamjoon...@lge.com> wrote:
>>
>> Hello,
>>
>> On Wed, Jan 27, 2016 at 07:25:10PM +0100, Alexander Potapenko wrote:
>> > Stack depot will allow KASAN store allocation/deallocation stack traces
>> > for memory chunks. The stack traces are stored in a hash table and
>> > referenced by handles which reside in the kasan_alloc_meta and
>> > kasan_free_meta structures in the allocated memory chunks.
>>
>> Looks really nice!
>>
>> Could it be more generalized to be used by other feature that need to
>> store stack trace such as tracepoint or page owner?
> Certainly yes, but see below.
>
>> If it could be, there is one more requirement.
>> I understand the fact that entry is never removed from depot makes things
>> very simpler, but, for general usecases, it's better to use reference
>> count
>> and allow to remove. Is it possible?
> For our use case reference counting is not really necessary, and it would
> introduce unwanted contention.
> There are two possible options, each having its advantages and drawbacks: we
> can let the clients store the refcounters directly in their stacks (more
> universal, but harder to use for the clients), or keep the counters in the
> depot but add an API that does not change them (easier for the clients, but
> potentially error-prone).
>
> I'd say it's better to actually find at least one more user for the stack
> depot in order to understand the requirements, and refactor the code after
> that.
>> Thanks.
>>
(resending to linux-kernel@ because the previous mail bounced)


--
Alexander Potapenko
Software Engineer

Google Germany GmbH
Erika-Mann-Straße, 33
80636 München

Geschäftsführer: Matthew Scott Sucherman, Paul Terence Manicle
Registergericht und -nummer: Hamburg, HRB 86891
Sitz der Gesellschaft: Hamburg
Diese E-Mail ist vertraulich. Wenn Sie nicht der richtige Adressat sind,
leiten Sie diese bitte nicht weiter, informieren Sie den
Absender und löschen Sie die E-Mail und alle Anhänge. Vielen Dank.
This e-mail is confidential. If you are not the right addressee please
do not forward it, please inform the sender, and please erase this
e-mail including any attachments. Thanks.

Alexander Potapenko

unread,
Jan 28, 2016, 8:30:06 AM1/28/16
to
On Thu, Jan 28, 2016 at 1:37 PM, Alexander Potapenko <gli...@google.com> wrote:
> This is covered by other patches in this patchset.

Joonsoo Kim

unread,
Jan 31, 2016, 9:20:05 PM1/31/16
to
This should be covered by this patch. Stroing and printing free_info
is already done on SLUB and it is meaningful without quarantain.

Thanks.

Joonsoo Kim

unread,
Jan 31, 2016, 9:50:08 PM1/31/16
to
On Wed, Jan 27, 2016 at 07:25:13PM +0100, Alexander Potapenko wrote:
> Quarantine isolates freed objects in a separate queue. The objects are
> returned to the allocator later, which helps to detect use-after-free
> errors.
>
> Freed objects are first added to per-cpu quarantine queues.
> When a cache is destroyed or memory shrinking is requested, the objects
> are moved into the global quarantine queue. Whenever a kmalloc call
> allows memory reclaiming, the oldest objects are popped out of the
> global queue until the total size of objects in quarantine is less than
> 3/4 of the maximum quarantine size (which is a fraction of installed
> physical memory).

Just wondering why not using time based approach rather than size
based one. In heavy load condition, how much time do the object stay in
quarantine?

>
> Right now quarantine support is only enabled in SLAB allocator.
> Unification of KASAN features in SLAB and SLUB will be done later.
>
> This patch is based on the "mm: kasan: quarantine" patch originally
> prepared by Dmitry Chernenkov.
>
> Signed-off-by: Alexander Potapenko <gli...@google.com>
> ---
> include/linux/kasan.h | 30 ++++--
> lib/test_kasan.c | 29 ++++++
> mm/kasan/Makefile | 2 +-
> mm/kasan/kasan.c | 68 +++++++++++-
> mm/kasan/kasan.h | 11 +-
> mm/kasan/quarantine.c | 284 ++++++++++++++++++++++++++++++++++++++++++++++++++
> mm/kasan/report.c | 3 +-
> mm/mempool.c | 7 +-
> mm/page_alloc.c | 2 +-
> mm/slab.c | 12 ++-
> mm/slab.h | 4 +
> mm/slab_common.c | 2 +
> mm/slub.c | 4 +-
> 13 files changed, 435 insertions(+), 23 deletions(-)
>

...

> +bool kasan_slab_free(struct kmem_cache *cache, void *object)
> +{
> +#ifdef CONFIG_SLAB
> + /* RCU slabs could be legally used after free within the RCU period */
> + if (unlikely(cache->flags & SLAB_DESTROY_BY_RCU))
> + return false;
> +
> + if (likely(cache->flags & SLAB_KASAN)) {
> + struct kasan_alloc_meta *alloc_info =
> + get_alloc_info(cache, object);
> + struct kasan_free_meta *free_info =
> + get_free_info(cache, object);
> +
> + switch (alloc_info->state) {
> + case KASAN_STATE_ALLOC:
> + alloc_info->state = KASAN_STATE_QUARANTINE;
> + quarantine_put(free_info, cache);

quarantine_put() can be called regardless of SLAB_DESTROY_BY_RCU,
although it's not much meaningful without poisoning. But, I have an
idea to poison object on SLAB_DESTROY_BY_RCU cache.

quarantine_put() moves per cpu list to global queue when
list size reaches QUARANTINE_PERCPU_SIZE. If we call synchronize_rcu()
at that time, after then, we can poison objects. With appropriate size
setup, it would not be intrusive.

> + set_track(&free_info->track, GFP_NOWAIT);

set_track() can be called regardless of SLAB_DESTROY_BY_RCU.

> + kasan_poison_slab_free(cache, object);
> + return true;
> + case KASAN_STATE_QUARANTINE:
> + case KASAN_STATE_FREE:
> + pr_err("Double free");
> + dump_stack();
> + break;
> + default:
> + break;
> + }
> + }
> + return false;
> +#else
> + kasan_poison_slab_free(cache, object);
> + return false;
> +#endif
> +}
> +

...
Isn't it better to call quarantine_reduce() in shrink_slab()?
It will help to maximize quarantine time.
Should be called with wait = 1.
It looks not good to me.
Converting __cache_free() to ____cache_free() and making
__cache_free() call ____cache_free() if (!kasan_slab_free()) looks
better to me and less error-prone.

Thanks.

Joonsoo Kim

unread,
Jan 31, 2016, 10:00:06 PM1/31/16
to
On Thu, Jan 28, 2016 at 02:27:44PM +0100, Alexander Potapenko wrote:
> On Thu, Jan 28, 2016 at 1:51 PM, Alexander Potapenko <gli...@google.com> wrote:
> >
> > On Jan 28, 2016 8:40 AM, "Joonsoo Kim" <iamjoon...@lge.com> wrote:
> >>
> >> Hello,
> >>
> >> On Wed, Jan 27, 2016 at 07:25:10PM +0100, Alexander Potapenko wrote:
> >> > Stack depot will allow KASAN store allocation/deallocation stack traces
> >> > for memory chunks. The stack traces are stored in a hash table and
> >> > referenced by handles which reside in the kasan_alloc_meta and
> >> > kasan_free_meta structures in the allocated memory chunks.
> >>
> >> Looks really nice!
> >>
> >> Could it be more generalized to be used by other feature that need to
> >> store stack trace such as tracepoint or page owner?
> > Certainly yes, but see below.
> >
> >> If it could be, there is one more requirement.
> >> I understand the fact that entry is never removed from depot makes things
> >> very simpler, but, for general usecases, it's better to use reference
> >> count
> >> and allow to remove. Is it possible?
> > For our use case reference counting is not really necessary, and it would
> > introduce unwanted contention.

Okay.

> > There are two possible options, each having its advantages and drawbacks: we
> > can let the clients store the refcounters directly in their stacks (more
> > universal, but harder to use for the clients), or keep the counters in the
> > depot but add an API that does not change them (easier for the clients, but
> > potentially error-prone).
> > I'd say it's better to actually find at least one more user for the stack
> > depot in order to understand the requirements, and refactor the code after
> > that.

I re-think the page owner case and it also may not need refcount.
For now, just moving this stuff to /lib would be helpful for other future user.

BTW, is there any performance number? I guess that it could affect
the performance.

Thanks.

Andrew Morton

unread,
Feb 2, 2016, 12:40:05 AM2/2/16
to
On Wed, 27 Jan 2016 19:25:06 +0100 Alexander Potapenko <gli...@google.com> wrote:

> depending on which allocator (SLAB or SLUB) is being used
>
> ...
>
> --- a/lib/test_kasan.c
> +++ b/lib/test_kasan.c
> @@ -68,7 +68,22 @@ static noinline void __init kmalloc_node_oob_right(void)
> static noinline void __init kmalloc_large_oob_right(void)
> {
> char *ptr;
> - size_t size = KMALLOC_MAX_CACHE_SIZE + 10;
> + size_t size;
> +
> + if (KMALLOC_MAX_CACHE_SIZE == KMALLOC_MAX_SIZE) {
> + /*
> + * We're using the SLAB allocator. Allocate a chunk that fits
> + * into a slab.
> + */
> + size = KMALLOC_MAX_CACHE_SIZE - 256;
> + } else {
> + /*
> + * KMALLOC_MAX_SIZE > KMALLOC_MAX_CACHE_SIZE.
> + * We're using the SLUB allocator. Allocate a chunk that does
> + * not fit into a slab to trigger the page allocator.
> + */
> + size = KMALLOC_MAX_CACHE_SIZE + 10;
> + }

This seems a weird way of working out whether we're using SLAB or SLUB.

Can't we use, umm, #ifdef CONFIG_SLAB? If not that then let's cook up
something standardized rather than a weird just-happens-to-work like
this.

Andrey Ryabinin

unread,
Feb 2, 2016, 10:30:08 AM2/2/16
to
Actually it would be simpler to not use KMALLOC_MAX_CACHE_SIZE at all.
Simply replace it with 2 or 3 PAGE_SIZEs.

Alexander Potapenko

unread,
Feb 2, 2016, 11:30:14 AM2/2/16
to
The intention was to detect the situation in which a new allocator
appears for which we don't know how it behaves if we allocate more
than KMALLOC_MAX_CACHE_SIZE.
I agree this makes little sense and we can just stick to
CONFIG_SLAB/CONFIG_SLUB cases.

However I think it's better to keep 'size = KMALLOC_MAX_CACHE_SIZE +
something' to keep this code working in the case the value of
KMALLOC_MAX_CACHE_SIZE changes.

Alexander Potapenko

unread,
Feb 15, 2016, 9:10:05 AM2/15/16
to
Turns out I've actually overintellectualized this test.
I've reworked this patch so that kmalloc_pagealloc_oob_right()
allocates a big chunk of memory from the page allocator (and therefore
is enabled only under CONFIG_SLUB).
kmalloc_large_oob_right() now allocates KMALLOC_MAX_CACHE_SIZE - 256
in both SLAB and SLUB modes.

I'll send the updated patch set later today.

Alexander Potapenko

unread,
Feb 16, 2016, 1:40:08 PM2/16/16
to
I agree this code may need to be moved to /lib someday, but I wouldn't
hurry with that.
Right now it is quite KASAN-specific, and it's unclear yet whether
anyone else is going to use it.
I suggest we keep it in mm/kasan for now, and factor the common parts
into /lib when the need arises.

> BTW, is there any performance number? I guess that it could affect
> the performance.
I've compared the performance of KASAN with SLAB allocator on a small
synthetic benchmark in two modes: with stack depot enabled and with
kasan_save_stack() unconditionally returning 0.
In the former case 8% more time was spent in the kernel than in the latter case.

If I am not mistaking, for SLUB allocator the bookkeeping (enabled
with the slub_debug=UZ boot options) take only 1.5 time, so the
difference is worth looking into (at least before we switch SLUB to
stack depot).


> Thanks.

Alexander Potapenko

unread,
Feb 17, 2016, 1:30:07 PM2/17/16
to
I've made additional measurements.
Previously I had been using a userspace benchmark that created and
destroyed pipes in a loop
(https://github.com/google/sanitizers/blob/master/address-sanitizer/kernel_buildbot/slave/bench_pipes.c).

Now I've made a kernel module that allocated and deallocated memory
chunks of different sizes in a loop.
There were two modes of operation:
1) all the allocations were made from the same function, therefore all
allocation/deallocation stacks were similar and there always was a hit
in the stackdepot hashtable
2) The allocations were made from 2^16 different stacks.

In the first case SLAB+stackdepot turned out to be 13% faster than
SLUB+slub_debug, in the second SLAB was 11% faster.
Note that in both cases and for both allocators most of the time (more
than 90%) was spent in the x86 stack unwinder, which is common for
both approaches.

Yet another observation regarding stackdepot: under a heavy load
(running Trinity for a hour, 101M allocations) the depot saturates at
around 20K records with the hashtable miss rate of 0.02%.
That said, I still cannot justify the results of the userspace
benchmark, but the slowdown of the stackdepot approach for SLAB sounds
acceptable, especially given the memory gain compared to SLUB
bookkeeping (which requires 128 bytes per memory allocation) and the
fact we'll be dealing with the fast path most of the time.

It will certainly be nice to compare SLUB+slub_debug to
SLUB+stackdepot once we start switching SLUB to stackdepot.

Joonsoo Kim

unread,
Feb 18, 2016, 3:00:06 AM2/18/16
to
Please consider it one more time. I really have a plan to use it on page owner,
because using page owner requires too many memory for stack trace and
it changes system behaviour a lot.

Page owner uses following structure to store stack trace.

struct page_ext {
unsigned long flags;
#ifdef CONFIG_PAGE_OWNER
unsigned int order;
gfp_t gfp_mask;
unsigned int nr_entries;
int last_migrate_reason;
unsigned long trace_entries[8];
#endif
};

Using stack depot in page owner would be straight forward if stack depot
is in /lib. It is possible to move it when needed but it requires moving
a file and it would not be desirable.

>> BTW, is there any performance number? I guess that it could affect
>> the performance.
> I've compared the performance of KASAN with SLAB allocator on a small
> synthetic benchmark in two modes: with stack depot enabled and with
> kasan_save_stack() unconditionally returning 0.
> In the former case 8% more time was spent in the kernel than in the latter case.
>
> If I am not mistaking, for SLUB allocator the bookkeeping (enabled
> with the slub_debug=UZ boot options) take only 1.5 time, so the
> difference is worth looking into (at least before we switch SLUB to
> stack depot).

Okay.

Joonsoo Kim

unread,
Feb 18, 2016, 3:20:05 AM2/18/16
to
I don't know what version of kernel you tested but, until recently,
slub_debug=UZ has a side effect not to using fastpath of SLUB. So,
comparison between them isn't appropriate. Today's linux-next branch
would have some improvements on this area so use it to compare them.

> Note that in both cases and for both allocators most of the time (more
> than 90%) was spent in the x86 stack unwinder, which is common for
> both approaches.

If more than 90% time is spent in stack unwinder which is common for
both cases, how something is better than the other by 13%?

> Yet another observation regarding stackdepot: under a heavy load
> (running Trinity for a hour, 101M allocations) the depot saturates at
> around 20K records with the hashtable miss rate of 0.02%.
> That said, I still cannot justify the results of the userspace
> benchmark, but the slowdown of the stackdepot approach for SLAB sounds
> acceptable, especially given the memory gain compared to SLUB
> bookkeeping (which requires 128 bytes per memory allocation) and the
> fact we'll be dealing with the fast path most of the time.

In fact, I don't have much concern about performance because saving
memory has enough merit to be merged. Anyway, it looks acceptable
even for performance.

> It will certainly be nice to compare SLUB+slub_debug to
> SLUB+stackdepot once we start switching SLUB to stackdepot.

Okay.

Thanks.

Alexander Potapenko

unread,
Feb 18, 2016, 8:00:10 AM2/18/16
to
However this info is meaningless without saved stack traces, which are
only introduced in the stackdepot patch (see "[PATCH v1 5/8] mm,
kasan: Stackdepot implementation. Enable stackdepot for SLAB")

Alexander Potapenko

unread,
Feb 18, 2016, 9:10:08 AM2/18/16
to
Won't this slow the quarantine down unpredictably (e.g. in the case
there're no RCU slabs in quarantine we'll still be waiting for
synchronize_rcu())?
Yet this is something worth looking into. Do you want RCU to be
handled in this patch set?

>> + set_track(&free_info->track, GFP_NOWAIT);
>
> set_track() can be called regardless of SLAB_DESTROY_BY_RCU.
Agreed, I can fix that if we decide to handle RCU in this patch
(otherwise it will lead to confusion).
This is true, however if we don't call quarantine_reduce() from
kmalloc()/kfree() the size of the quarantine will be unpredictable.
There's a tradeoff between efficiency and space here, and at least in
some cases we may want to trade efficiency for space.
Agreed, thank you.
Fixed. Will upload the new patchset soonish.

Alexander Potapenko

unread,
Feb 18, 2016, 10:10:12 AM2/18/16
to
That's good to know.
I've been using https://github.com/torvalds/linux.git, which probably
didn't have those improvements.

>> Note that in both cases and for both allocators most of the time (more
>> than 90%) was spent in the x86 stack unwinder, which is common for
>> both approaches.
>
> If more than 90% time is spent in stack unwinder which is common for
> both cases, how something is better than the other by 13%?
On the second glance, this number (90%) may be inaccurate, because I
measured the stack unwinding times separately, which could have
introduced deviation (not to mention it was incorrect for SLUB).
Yet we're talking about a significant amount of time spent in the unwinder.
My numbers were 26.111 seconds for 1024K SLAB allocation/deallocation
pairs and 30.278 seconds for 1024K alloc/dealloc pairs with SLUB.
When measured separately in the same routine that did the allocations,
2048K calls to save_stack_trace() took 25.487 seconds.

>> Yet another observation regarding stackdepot: under a heavy load
>> (running Trinity for a hour, 101M allocations) the depot saturates at
>> around 20K records with the hashtable miss rate of 0.02%.
>> That said, I still cannot justify the results of the userspace
>> benchmark, but the slowdown of the stackdepot approach for SLAB sounds
>> acceptable, especially given the memory gain compared to SLUB
>> bookkeeping (which requires 128 bytes per memory allocation) and the
>> fact we'll be dealing with the fast path most of the time.
>
> In fact, I don't have much concern about performance because saving
> memory has enough merit to be merged. Anyway, it looks acceptable
> even for performance.
>
>> It will certainly be nice to compare SLUB+slub_debug to
>> SLUB+stackdepot once we start switching SLUB to stackdepot.
>
> Okay.
>
> Thanks.



Alexander Potapenko

unread,
Feb 18, 2016, 12:20:06 PM2/18/16
to
Stack depot will allow KASAN store allocation/deallocation stack traces
for memory chunks. The stack traces are stored in a hash table and
referenced by handles which reside in the kasan_alloc_meta and
kasan_free_meta structures in the allocated memory chunks.

IRQ stack traces are cut below the IRQ entry point to avoid unnecessary
duplication.

Right now stackdepot support is only enabled in SLAB allocator.
Once KASAN features in SLAB are on par with those in SLUB we can switch
SLUB to stackdepot as well, thus removing the dependency on SLUB_DEBUG.

This patch is based on the "mm: kasan: stack depots" patch originally
prepared by Dmitry Chernenkov.

Signed-off-by: Alexander Potapenko <gli...@google.com>
---
v2: - per request from Joonsoo Kim, moved the stackdepot implementation to
lib/, as there's a plan to use it for page owner
- added copyright comments
- added comments about smp_load_acquire()/smp_store_release()
---
arch/x86/kernel/Makefile | 1 +
include/linux/stackdepot.h | 32 ++++++
lib/Makefile | 7 ++
lib/stackdepot.c | 274 +++++++++++++++++++++++++++++++++++++++++++++
mm/kasan/Makefile | 1 +
mm/kasan/kasan.c | 51 ++++++++-
mm/kasan/kasan.h | 4 +
mm/kasan/report.c | 9 ++
8 files changed, 376 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index b1b78ff..500584d 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -19,6 +19,7 @@ endif
KASAN_SANITIZE_head$(BITS).o := n
KASAN_SANITIZE_dumpstack.o := n
KASAN_SANITIZE_dumpstack_$(BITS).o := n
+KASAN_SANITIZE_stacktrace.o := n

CFLAGS_irq.o := -I$(src)/../include/asm/trace

diff --git a/include/linux/stackdepot.h b/include/linux/stackdepot.h
new file mode 100644
index 0000000..b6cbe05
--- /dev/null
+++ b/include/linux/stackdepot.h
@@ -0,0 +1,32 @@
+/*
+ * A generic stack depot implementation
+ *
+ * Author: Alexander Potapenko <gli...@google.com>
+ * Copyright (C) 2016 Google, Inc.
+ *
+ * Based on code by Dmitry Chernenkov.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef _LINUX_STACKDEPOT_H
+#define _LINUX_STACKDEPOT_H
+
+typedef u32 depot_stack_handle;
+
+struct stack_trace;
+
+depot_stack_handle depot_save_stack(struct stack_trace *trace, gfp_t flags);
+
+void depot_fetch_stack(depot_stack_handle handle, struct stack_trace *trace);
+
+#endif
diff --git a/lib/Makefile b/lib/Makefile
index a7c26a4..10a4ae3 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -167,6 +167,13 @@ obj-$(CONFIG_SG_SPLIT) += sg_split.o
obj-$(CONFIG_STMP_DEVICE) += stmp_device.o
obj-$(CONFIG_IRQ_POLL) += irq_poll.o

+ifeq ($(CONFIG_KASAN),y)
+ifeq ($(CONFIG_SLAB),y)
+ obj-y += stackdepot.o
+ KASAN_SANITIZE_slub.o := n
+endif
+endif
+
libfdt_files = fdt.o fdt_ro.o fdt_wip.o fdt_rw.o fdt_sw.o fdt_strerror.o \
fdt_empty_tree.o
$(foreach file, $(libfdt_files), \
diff --git a/lib/stackdepot.c b/lib/stackdepot.c
new file mode 100644
index 0000000..f09b0da
--- /dev/null
+++ b/lib/stackdepot.c
@@ -0,0 +1,274 @@
+/*
+ * Generic stack depot for storing stack traces.
+ *
+ * Some debugging tools need to save stack traces of certain events which can
+ * be later presented to the user. For example, KASAN needs to safe alloc and
+ * free stacks for each object, but storing two stack traces per object
+ * requires too much memory (e.g. SLUB_DEBUG needs 256 bytes per object for
+ * that).
+ *
+ * Instead, stack depot maintains a hashtable of unique stacktraces. Since alloc
+ * and free stacks repeat a lot, we save about 100x space.
+ * Stacks are never removed from depot, so we store them contiguously one after
+ * another in a contiguos memory allocation.
+ *
+ * Author: Alexander Potapenko <gli...@google.com>
+ * Copyright (C) 2016 Google, Inc.
+ *
+ * Based on code by Dmitry Chernenkov.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ */
+
+#include <linux/gfp.h>
+#include <linux/jhash.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/percpu.h>
+#include <linux/printk.h>
+#include <linux/stacktrace.h>
+#include <linux/stackdepot.h>
+#include <linux/string.h>
+#include <linux/types.h>
+
+#define DEPOT_STACK_BITS (sizeof(depot_stack_handle) * 8)
+
+#define STACK_ALLOC_ORDER 4 /* 'Slab' size order for stack depot, 16 pages */
+#define STACK_ALLOC_SIZE (1LL << (PAGE_SHIFT + STACK_ALLOC_ORDER))
+#define STACK_ALLOC_ALIGN 4
+#define STACK_ALLOC_OFFSET_BITS (STACK_ALLOC_ORDER + PAGE_SHIFT - \
+ STACK_ALLOC_ALIGN)
+#define STACK_ALLOC_INDEX_BITS (DEPOT_STACK_BITS - STACK_ALLOC_OFFSET_BITS)
+#define STACK_ALLOC_SLABS_CAP 1024
+#define STACK_ALLOC_MAX_SLABS \
+ (((1LL << (STACK_ALLOC_INDEX_BITS)) < STACK_ALLOC_SLABS_CAP) ? \
+ (1LL << (STACK_ALLOC_INDEX_BITS)) : STACK_ALLOC_SLABS_CAP)
+
+/* The compact structure to store the reference to stacks. */
+union handle_parts {
+ depot_stack_handle handle;
+ struct {
+ u32 slabindex : STACK_ALLOC_INDEX_BITS;
+ u32 offset : STACK_ALLOC_OFFSET_BITS;
+ };
+};
+
+struct stack_record {
+ struct stack_record *next; /* Link in the hashtable */
+ u32 hash; /* Hash in the hastable */
+ u32 size; /* Number of frames in the stack */
+ union handle_parts handle;
+ unsigned long entries[1]; /* Variable-sized array of entries. */
+};
+
+static void *stack_slabs[STACK_ALLOC_MAX_SLABS];
+
+static int depot_index;
+static int next_slab_inited;
+static size_t depot_offset;
+static DEFINE_SPINLOCK(depot_lock);
+
+static bool init_stack_slab(void **prealloc)
+{
+ if (!*prealloc)
+ return false;
+ /* This smp_load_acquire() pairs with smp_store_release() to
+ * |next_slab_inited| below and in depot_alloc_stack().
+ */
+ if (smp_load_acquire(&next_slab_inited))
+ return true;
+ if (stack_slabs[depot_index] == NULL) {
+ stack_slabs[depot_index] = *prealloc;
+ } else {
+ stack_slabs[depot_index + 1] = *prealloc;
+ /* This smp_store_release pairs with smp_load_acquire() from
+ * |next_slab_inited| above and in depot_save_stack().
+ */
+ smp_store_release(&next_slab_inited, 1);
+ }
+ *prealloc = NULL;
+ return true;
+}
+
+/* Allocation of a new stack in raw storage */
+static struct stack_record *depot_alloc_stack(unsigned long *entries, int size,
+ u32 hash, void **prealloc, gfp_t alloc_flags)
+{
+ int required_size = offsetof(struct stack_record, entries) +
+ sizeof(unsigned long) * size;
+ struct stack_record *stack;
+
+ required_size = ALIGN(required_size, 1 << STACK_ALLOC_ALIGN);
+
+ if (unlikely(depot_offset + required_size > STACK_ALLOC_SIZE)) {
+ if (unlikely(depot_index + 1 >= STACK_ALLOC_MAX_SLABS)) {
+ WARN_ONCE(1, "Stack depot reached limit capacity");
+ return NULL;
+ }
+ depot_index++;
+ depot_offset = 0;
+ /* smp_store_release() here pairs with smp_load_acquire() from
+ * |next_slab_inited| in depot_save_stack() and
+ * init_stack_slab().
+ */
+ if (depot_index + 1 < STACK_ALLOC_MAX_SLABS)
+ smp_store_release(&next_slab_inited, 0);
+ }
+ init_stack_slab(prealloc);
+ if (stack_slabs[depot_index] == NULL)
+ return NULL;
+
+ stack = stack_slabs[depot_index] + depot_offset;
+
+ stack->hash = hash;
+ stack->size = size;
+ stack->handle.slabindex = depot_index;
+ stack->handle.offset = depot_offset >> STACK_ALLOC_ALIGN;
+ __memcpy(stack->entries, entries, size * sizeof(unsigned long));
+ depot_offset += required_size;
+
+ return stack;
+}
+
+#define STACK_HASH_ORDER 20
+#define STACK_HASH_SIZE (1L << STACK_HASH_ORDER)
+#define STACK_HASH_MASK (STACK_HASH_SIZE - 1)
+#define STACK_HASH_SEED 0x9747b28c
+
+static struct stack_record *stack_table[STACK_HASH_SIZE] = {
+ [0 ... STACK_HASH_SIZE - 1] = NULL
+};
+
+/* Calculate hash for a stack */
+static inline u32 hash_stack(unsigned long *entries, unsigned int size)
+{
+ return jhash2((u32 *)entries,
+ size * sizeof(unsigned long) / sizeof(u32),
+ STACK_HASH_SEED);
+}
+
+/* Find a stack that is equal to the one stored in entries in the hash */
+static inline struct stack_record *find_stack(struct stack_record *bucket,
+ unsigned long *entries, int size,
+ u32 hash)
+{
+ struct stack_record *found;
+
+ for (found = bucket; found; found = found->next) {
+ if (found->hash == hash &&
+ found->size == size &&
+ !memcmp(entries, found->entries,
+ size * sizeof(unsigned long))) {
+ return found;
+ }
+ }
+ return NULL;
+}
+
+void depot_fetch_stack(depot_stack_handle handle, struct stack_trace *trace)
+{
+ union handle_parts parts = { .handle = handle };
+ void *slab = stack_slabs[parts.slabindex];
+ size_t offset = parts.offset << STACK_ALLOC_ALIGN;
+ struct stack_record *stack = slab + offset;
+
+ trace->nr_entries = trace->max_entries = stack->size;
+ trace->entries = stack->entries;
+ trace->skip = 0;
+}
+
+/*
+ * depot_save_stack - save stack in a stack depot.
+ * @trace - the stacktrace to save.
+ * @alloc_flags - flags for allocating additional memory if required.
+ *
+ * Returns the handle of the stack struct stored in depot.
+ */
+depot_stack_handle depot_save_stack(struct stack_trace *trace,
+ gfp_t alloc_flags)
+{
+ u32 hash;
+ depot_stack_handle retval = 0;
+ struct stack_record *found = NULL, **bucket;
+ unsigned long flags;
+ struct page *page = NULL;
+ void *prealloc = NULL;
+
+ if (unlikely(trace->nr_entries == 0))
+ goto exit;
+
+ hash = hash_stack(trace->entries, trace->nr_entries);
+ /* Bad luck, we won't store this stack. */
+ if (hash == 0)
+ goto exit;
+
+ bucket = &stack_table[hash & STACK_HASH_MASK];
+
+ /* Fast path: look the stack trace up without locking.
+ *
+ * The smp_load_acquire() here pairs with smp_store_release() to
+ * |bucket| below.
+ */
+ found = find_stack(smp_load_acquire(bucket), trace->entries,
+ trace->nr_entries, hash);
+ if (found)
+ goto exit;
+
+ /* Check if the current or the next stack slab need to be initialized.
+ * If so, allocate the memory - we won't be able to do that under the
+ * lock.
+ *
+ * The smp_load_acquire() here pairs with smp_store_release() to
+ * |next_slab_inited| in depot_alloc_stack() and init_stack_slab().
+ */
+ if (unlikely(!smp_load_acquire(&next_slab_inited))) {
+ if (!preempt_count() && !in_irq()) {
+ alloc_flags &= (__GFP_RECLAIM | __GFP_IO | __GFP_FS |
+ __GFP_NOWARN | __GFP_NORETRY |
+ __GFP_NOMEMALLOC | __GFP_DIRECT_RECLAIM);
+ page = alloc_pages(alloc_flags, STACK_ALLOC_ORDER);
+ if (page)
+ prealloc = page_address(page);
+ }
+ }
+
+ spin_lock_irqsave(&depot_lock, flags);
+
+ found = find_stack(*bucket, trace->entries, trace->nr_entries, hash);
+ if (!found) {
+ struct stack_record *new =
+ depot_alloc_stack(trace->entries, trace->nr_entries,
+ hash, &prealloc, alloc_flags);
+ if (new) {
+ new->next = *bucket;
+ /* This smp_store_release() pairs with
+ * smp_load_acquire() from |bucket| above.
+ */
+ smp_store_release(bucket, new);
+ found = new;
+ }
+ } else if (prealloc) {
+ /*
+ * We didn't need to store this stack trace, but let's keep
+ * the preallocated memory for the future.
+ */
+ WARN_ON(!init_stack_slab(&prealloc));
+ }
+
+ spin_unlock_irqrestore(&depot_lock, flags);
+exit:
+ if (prealloc)
+ /* Nobody used this memory, ok to free it. */
+ free_pages((unsigned long)prealloc, STACK_ALLOC_ORDER);
+ if (found)
+ retval = found->handle.handle;
+ return retval;
+}
diff --git a/mm/kasan/Makefile b/mm/kasan/Makefile
index a61460d..32bd73a 100644
--- a/mm/kasan/Makefile
+++ b/mm/kasan/Makefile
@@ -7,3 +7,4 @@ CFLAGS_REMOVE_kasan.o = -pg
CFLAGS_kasan.o := $(call cc-option, -fno-conserve-stack -fno-stack-protector)

obj-y := kasan.o report.o kasan_init.o
+
diff --git a/mm/kasan/kasan.c b/mm/kasan/kasan.c
index 787224a..fb7885d 100644
--- a/mm/kasan/kasan.c
+++ b/mm/kasan/kasan.c
@@ -17,7 +17,9 @@
#define DISABLE_BRANCH_PROFILING

#include <linux/export.h>
+#include <linux/interrupt.h>
+ break;
+ }
+}
+
+static inline depot_stack_handle save_stack(gfp_t flags)
+{
+ unsigned long entries[KASAN_STACK_DEPTH];
+ struct stack_trace trace = {
+ .nr_entries = 0,
+ .entries = entries,
+ .max_entries = KASAN_STACK_DEPTH,
+ .skip = 0
+ };
+
+ save_stack_trace(&trace);
+ filter_irq_stacks(&trace);
+ if (trace.nr_entries != 0 &&
+ trace.entries[trace.nr_entries-1] == ULONG_MAX)
+ trace.nr_entries--;
+
+ return depot_save_stack(&trace, flags);
+}
+
+static inline void set_track(struct kasan_track *track, gfp_t flags)
{
track->cpu = raw_smp_processor_id();
track->pid = current->pid;
track->when = jiffies;
+ track->stack = save_stack(flags);
}

#ifdef CONFIG_SLAB
struct kasan_alloc_meta *get_alloc_info(struct kmem_cache *cache,
const void *object)
{
+ BUILD_BUG_ON(sizeof(struct kasan_alloc_meta) > 32);
return (void *)object + cache->kasan_info.alloc_meta_offset;
}

struct kasan_free_meta *get_free_info(struct kmem_cache *cache,
const void *object)
{
+ BUILD_BUG_ON(sizeof(struct kasan_free_meta) > 32);
return (void *)object + cache->kasan_info.free_meta_offset;
}
#endif
@@ -455,7 +500,7 @@ void kasan_kmalloc(struct kmem_cache *cache, const void *object, size_t size,

alloc_info->state = KASAN_STATE_ALLOC;
alloc_info->alloc_size = size;
- set_track(&alloc_info->track);
+ set_track(&alloc_info->track, flags);
}
#endif
}
diff --git a/mm/kasan/kasan.h b/mm/kasan/kasan.h
index 7b9e4ab9..b4e5942 100644
--- a/mm/kasan/kasan.h
+++ b/mm/kasan/kasan.h
@@ -2,6 +2,7 @@
#define __MM_KASAN_KASAN_H

#include <linux/kasan.h>
+#include <linux/stackdepot.h>

#define KASAN_SHADOW_SCALE_SIZE (1UL << KASAN_SHADOW_SCALE_SHIFT)
#define KASAN_SHADOW_MASK (KASAN_SHADOW_SCALE_SIZE - 1)
@@ -64,10 +65,13 @@ enum kasan_state {
KASAN_STATE_FREE
};

+#define KASAN_STACK_DEPTH 64
+
struct kasan_track {
u64 cpu : 6; /* for NR_CPUS = 64 */
u64 pid : 16; /* 65536 processes */
u64 when : 42; /* ~140 years */
+ depot_stack_handle stack : sizeof(depot_stack_handle);
};

struct kasan_alloc_meta {
diff --git a/mm/kasan/report.c b/mm/kasan/report.c
index 2bf7218..4af52bb 100644
--- a/mm/kasan/report.c
+++ b/mm/kasan/report.c
@@ -18,6 +18,7 @@
#include <linux/printk.h>
#include <linux/sched.h>
#include <linux/slab.h>
+#include <linux/stackdepot.h>
#include <linux/stacktrace.h>
#include <linux/string.h>
#include <linux/types.h>
@@ -119,6 +120,14 @@ static void print_track(struct kasan_track *track)
{
pr_err("PID = %lu, CPU = %lu, timestamp = %lu\n", track->pid,
track->cpu, track->when);
+ if (track->stack) {
+ struct stack_trace trace;
+
+ depot_fetch_stack(track->stack, &trace);
+ print_stack_trace(&trace, 0);
+ } else {
+ pr_err("(stack is not available)\n");
+ }
}

static void print_object(struct kmem_cache *cache, void *object)
--
2.7.0.rc3.207.g0ac5344

Joonsoo Kim

unread,
Feb 18, 2016, 8:50:05 PM2/18/16
to
2016-02-18 21:58 GMT+09:00 Alexander Potapenko <gli...@google.com>:
> However this info is meaningless without saved stack traces, which are
> only introduced in the stackdepot patch (see "[PATCH v1 5/8] mm,
> kasan: Stackdepot implementation. Enable stackdepot for SLAB")

Not meaningless. You already did it for allocation caller without saved
stack traces. What makes difference between alloc/free?

Thanks.

Joonsoo Kim

unread,
Feb 18, 2016, 9:20:05 PM2/18/16
to
It could be handled by introducing one cpu variable.

> Yet this is something worth looking into. Do you want RCU to be
> handled in this patch set?

No. It would be future work.
size of the quarantine doesn't matter unless there is memory pressure.
If memory pressure, shrink_slab() would be called and we can reduce
size of quarantine. However, I don't think this is show stopper. We can
do it when needed.

Thanks.

Dmitry Vyukov

unread,
Feb 19, 2016, 4:30:06 AM2/19/16
to
No, this does not work. We've tried.
The problem is fragmentation. When all memory is occupied by slab,
it's already too late to reclaim memory. Free objects are randomly
scattered over memory, so if you have just 1% of live objects, the
chances are that you won't be able to reclaim any single page.

Alexander Potapenko

unread,
Feb 19, 2016, 8:00:07 AM2/19/16
to
Ah, yes, I see.
This patch was indeed missing the following bits in kasan_slab_free():

#ifdef CONFIG_SLAB
if (cache->flags & SLAB_KASAN) {
struct kasan_free_meta *free_info =
get_free_info(cache, object);
struct kasan_alloc_meta *alloc_info =
get_alloc_info(cache, object);
alloc_info->state = KASAN_STATE_FREE;
set_track(&free_info->track);
}
#endif

I'll include them in the next round of patches.

Christoph Lameter

unread,
Feb 19, 2016, 10:50:06 AM2/19/16
to
On Fri, 19 Feb 2016, Dmitry Vyukov wrote:

> No, this does not work. We've tried.
> The problem is fragmentation. When all memory is occupied by slab,
> it's already too late to reclaim memory. Free objects are randomly
> scattered over memory, so if you have just 1% of live objects, the
> chances are that you won't be able to reclaim any single page.

Yes that is why slab objects *need* to be *movable*!!!

Joonsoo Kim

unread,
Feb 23, 2016, 2:30:12 AM2/23/16
to
Okay. Now, I got it.

Thanks.

Alexander Potapenko

unread,
Feb 26, 2016, 8:40:06 AM2/26/16
to
Quarantine isolates freed objects in a separate queue. The objects are
returned to the allocator later, which helps to detect use-after-free
errors.

Freed objects are first added to per-cpu quarantine queues.
When a cache is destroyed or memory shrinking is requested, the objects
are moved into the global quarantine queue. Whenever a kmalloc call
allows memory reclaiming, the oldest objects are popped out of the
global queue until the total size of objects in quarantine is less than
3/4 of the maximum quarantine size (which is a fraction of installed
physical memory).

Right now quarantine support is only enabled in SLAB allocator.
Unification of KASAN features in SLAB and SLUB will be done later.

This patch is based on the "mm: kasan: quarantine" patch originally
prepared by Dmitry Chernenkov.

Signed-off-by: Alexander Potapenko <gli...@google.com>
---
v2: - added copyright comments
- per request from Joonsoo Kim made __cache_free() more straightforward
- added comments for smp_load_acquire()/smp_store_release()

v3: - incorporate changes introduced by the "mm, kasan: SLAB support" patch
---
include/linux/kasan.h | 30 +++--
lib/test_kasan.c | 29 +++++
mm/kasan/Makefile | 3 +
mm/kasan/kasan.c | 71 ++++++++++--
mm/kasan/kasan.h | 11 +-
mm/kasan/quarantine.c | 306 ++++++++++++++++++++++++++++++++++++++++++++++++++
mm/kasan/report.c | 3 +-
mm/mempool.c | 7 +-
mm/page_alloc.c | 2 +-
mm/slab.c | 14 ++-
mm/slab.h | 4 +
mm/slab_common.c | 2 +
mm/slub.c | 4 +-
13 files changed, 457 insertions(+), 29 deletions(-)

diff --git a/include/linux/kasan.h b/include/linux/kasan.h
index bf71ab0..355e722 100644
--- a/include/linux/kasan.h
+++ b/include/linux/kasan.h
@@ -44,24 +44,29 @@ static inline void kasan_disable_current(void)
void kasan_unpoison_shadow(const void *address, size_t size);

void kasan_alloc_pages(struct page *page, unsigned int order);
-void kasan_free_pages(struct page *page, unsigned int order);
+void kasan_poison_free_pages(struct page *page, unsigned int order);

void kasan_cache_create(struct kmem_cache *cache, size_t *size,
unsigned long *flags);
+void kasan_cache_shrink(struct kmem_cache *cache);
+void kasan_cache_destroy(struct kmem_cache *cache);

void kasan_poison_slab(struct page *page);
void kasan_unpoison_object_data(struct kmem_cache *cache, void *object);
void kasan_poison_object_data(struct kmem_cache *cache, void *object);

void kasan_kmalloc_large(const void *ptr, size_t size, gfp_t flags);
-void kasan_kfree_large(const void *ptr);
-void kasan_kfree(void *ptr);
+void kasan_poison_kfree_large(const void *ptr);
+void kasan_poison_kfree(void *ptr);
void kasan_kmalloc(struct kmem_cache *s, const void *object, size_t size,
gfp_t flags);
void kasan_krealloc(const void *object, size_t new_size, gfp_t flags);

void kasan_slab_alloc(struct kmem_cache *s, void *object, gfp_t flags);
-void kasan_slab_free(struct kmem_cache *s, void *object);
+/* kasan_slab_free() returns true if the object has been put into quarantine.
+ */
+bool kasan_slab_free(struct kmem_cache *s, void *object);
+void kasan_poison_slab_free(struct kmem_cache *s, void *object);

struct kasan_cache {
int alloc_meta_offset;
@@ -79,11 +84,14 @@ static inline void kasan_enable_current(void) {}
static inline void kasan_disable_current(void) {}

static inline void kasan_alloc_pages(struct page *page, unsigned int order) {}
-static inline void kasan_free_pages(struct page *page, unsigned int order) {}
+static inline void kasan_poison_free_pages(struct page *page,
+ unsigned int order) {}

static inline void kasan_cache_create(struct kmem_cache *cache,
size_t *size,
unsigned long *flags) {}
+static inline void kasan_cache_shrink(struct kmem_cache *cache) {}
+static inline void kasan_cache_destroy(struct kmem_cache *cache) {}

static inline void kasan_poison_slab(struct page *page) {}
static inline void kasan_unpoison_object_data(struct kmem_cache *cache,
@@ -92,8 +100,8 @@ static inline void kasan_poison_object_data(struct kmem_cache *cache,
void *object) {}

static inline void kasan_kmalloc_large(void *ptr, size_t size, gfp_t flags) {}
-static inline void kasan_kfree_large(const void *ptr) {}
-static inline void kasan_kfree(void *ptr) {}
+static inline void kasan_poison_kfree_large(const void *ptr) {}
+static inline void kasan_poison_kfree(void *ptr) {}
static inline void kasan_kmalloc(struct kmem_cache *s, const void *object,
size_t size, gfp_t flags) {}
static inline void kasan_krealloc(const void *object, size_t new_size,
@@ -101,7 +109,13 @@ static inline void kasan_krealloc(const void *object, size_t new_size,

static inline void kasan_slab_alloc(struct kmem_cache *s, void *object,
gfp_t flags) {}
-static inline void kasan_slab_free(struct kmem_cache *s, void *object) {}
+/* kasan_slab_free() returns true if the object has been put into quarantine.
+ */
+static inline bool kasan_slab_free(struct kmem_cache *s, void *object)
+{
+ return false;
+}
+static inline void kasan_poison_slab_free(struct kmem_cache *s, void *object) {}

static inline int kasan_module_alloc(void *addr, size_t size) { return 0; }
static inline void kasan_free_shadow(const struct vm_struct *vm) {}
diff --git a/lib/test_kasan.c b/lib/test_kasan.c
index 82169fb..799c98e 100644
--- a/lib/test_kasan.c
+++ b/lib/test_kasan.c
@@ -344,6 +344,32 @@ static noinline void __init kasan_stack_oob(void)
*(volatile char *)p;
}

+#ifdef CONFIG_SLAB
+static noinline void __init kasan_quarantine_cache(void)
+{
+ struct kmem_cache *cache = kmem_cache_create(
+ "test", 137, 8, GFP_KERNEL, NULL);
+ int i;
+
+ for (i = 0; i < 100; i++) {
+ void *p = kmem_cache_alloc(cache, GFP_KERNEL);
+
+ kmem_cache_free(cache, p);
+ p = kmalloc(sizeof(u64), GFP_KERNEL);
+ kfree(p);
+ }
+ kmem_cache_shrink(cache);
+ for (i = 0; i < 100; i++) {
+ u64 *p = kmem_cache_alloc(cache, GFP_KERNEL);
+
+ kmem_cache_free(cache, p);
+ p = kmalloc(sizeof(u64), GFP_KERNEL);
+ kfree(p);
+ }
+ kmem_cache_destroy(cache);
+}
+#endif
+
static int __init kmalloc_tests_init(void)
{
kmalloc_oob_right();
@@ -367,6 +393,9 @@ static int __init kmalloc_tests_init(void)
kmem_cache_oob();
kasan_stack_oob();
kasan_global_oob();
+#ifdef CONFIG_SLAB
+ kasan_quarantine_cache();
+#endif
return -EAGAIN;
}

diff --git a/mm/kasan/Makefile b/mm/kasan/Makefile
index 32bd73a..d1db41e 100644
--- a/mm/kasan/Makefile
+++ b/mm/kasan/Makefile
@@ -8,3 +8,6 @@ CFLAGS_kasan.o := $(call cc-option, -fno-conserve-stack -fno-stack-protector)

obj-y := kasan.o report.o kasan_init.o

+ifdef CONFIG_SLAB
+ obj-y += quarantine.o
+endif
diff --git a/mm/kasan/kasan.c b/mm/kasan/kasan.c
index 088cb31..904cd9a 100644
--- a/mm/kasan/kasan.c
+++ b/mm/kasan/kasan.c
@@ -307,7 +307,7 @@ void kasan_alloc_pages(struct page *page, unsigned int order)
kasan_unpoison_shadow(page_address(page), PAGE_SIZE << order);
}

-void kasan_free_pages(struct page *page, unsigned int order)
+void kasan_poison_free_pages(struct page *page, unsigned int order)
{
if (likely(!PageHighMem(page)))
kasan_poison_shadow(page_address(page),
@@ -368,6 +368,20 @@ void kasan_cache_create(struct kmem_cache *cache, size_t *size,
}
#endif

+void kasan_cache_shrink(struct kmem_cache *cache)
+{
+#ifdef CONFIG_SLAB
+ quarantine_remove_cache(cache);
+#endif
+}
+
+void kasan_cache_destroy(struct kmem_cache *cache)
+{
+#ifdef CONFIG_SLAB
+ quarantine_remove_cache(cache);
+#endif
+}
+
void kasan_poison_slab(struct page *page)
{
kasan_poison_shadow(page_address(page),
@@ -464,7 +478,7 @@ void kasan_slab_alloc(struct kmem_cache *cache, void *object, gfp_t flags)
kasan_kmalloc(cache, object, cache->object_size, flags);
}

-void kasan_slab_free(struct kmem_cache *cache, void *object)
+void kasan_poison_slab_free(struct kmem_cache *cache, void *object)
{
unsigned long size = cache->object_size;
unsigned long rounded_up_size = round_up(size, KASAN_SHADOW_SCALE_SIZE);
@@ -473,18 +487,43 @@ void kasan_slab_free(struct kmem_cache *cache, void *object)
if (unlikely(cache->flags & SLAB_DESTROY_BY_RCU))
return;

+ kasan_poison_shadow(object, rounded_up_size, KASAN_KMALLOC_FREE);
+}
+
+bool kasan_slab_free(struct kmem_cache *cache, void *object)
+{
#ifdef CONFIG_SLAB
- if (cache->flags & SLAB_KASAN) {
- struct kasan_free_meta *free_info =
- get_free_info(cache, object);
+ /* RCU slabs could be legally used after free within the RCU period */
+ if (unlikely(cache->flags & SLAB_DESTROY_BY_RCU))
+ return false;
+
+ if (likely(cache->flags & SLAB_KASAN)) {
struct kasan_alloc_meta *alloc_info =
get_alloc_info(cache, object);
- alloc_info->state = KASAN_STATE_FREE;
- set_track(&free_info->track);
+ struct kasan_free_meta *free_info =
+ get_free_info(cache, object);
+
+ switch (alloc_info->state) {
+ case KASAN_STATE_ALLOC:
+ alloc_info->state = KASAN_STATE_QUARANTINE;
+ quarantine_put(free_info, cache);
+ set_track(&free_info->track, GFP_NOWAIT);
+ kasan_poison_slab_free(cache, object);
+ return true;
+ case KASAN_STATE_QUARANTINE:
+ case KASAN_STATE_FREE:
+ pr_err("Double free");
+ dump_stack();
+ break;
+ default:
+ break;
+ }
}
+ return false;
+#else
+ kasan_poison_slab_free(cache, object);
+ return false;
#endif
-
- kasan_poison_shadow(object, rounded_up_size, KASAN_KMALLOC_FREE);
}

void kasan_kmalloc(struct kmem_cache *cache, const void *object, size_t size,
@@ -493,6 +532,11 @@ void kasan_kmalloc(struct kmem_cache *cache, const void *object, size_t size,
unsigned long redzone_start;
unsigned long redzone_end;

+#ifdef CONFIG_SLAB
+ if (flags & __GFP_RECLAIM)
+ quarantine_reduce();
+#endif
+
if (unlikely(object == NULL))
return;

@@ -523,6 +567,11 @@ void kasan_kmalloc_large(const void *ptr, size_t size, gfp_t flags)
unsigned long redzone_start;
unsigned long redzone_end;

+#ifdef CONFIG_SLAB
+ if (flags & __GFP_RECLAIM)
+ quarantine_reduce();
+#endif
+
if (unlikely(ptr == NULL))
return;

@@ -551,7 +600,7 @@ void kasan_krealloc(const void *object, size_t size, gfp_t flags)
kasan_kmalloc(page->slab_cache, object, size, flags);
}

-void kasan_kfree(void *ptr)
+void kasan_poison_kfree(void *ptr)
{
struct page *page;

@@ -564,7 +613,7 @@ void kasan_kfree(void *ptr)
kasan_slab_free(page->slab_cache, ptr);
}

-void kasan_kfree_large(const void *ptr)
+void kasan_poison_kfree_large(const void *ptr)
{
struct page *page = virt_to_page(ptr);

diff --git a/mm/kasan/kasan.h b/mm/kasan/kasan.h
index b4e5942..37e0b3a 100644
--- a/mm/kasan/kasan.h
+++ b/mm/kasan/kasan.h
@@ -62,6 +62,7 @@ struct kasan_global {
enum kasan_state {
KASAN_STATE_INIT,
KASAN_STATE_ALLOC,
+ KASAN_STATE_QUARANTINE,
KASAN_STATE_FREE
};

@@ -81,8 +82,10 @@ struct kasan_alloc_meta {
};

struct kasan_free_meta {
- /* Allocator freelist pointer, unused by KASAN. */
- void **freelist;
+ /* This field is used while the object is in the quarantine.
+ * Otherwise it might be used for the allocator freelist.
+ */
+ void **quarantine_link;
struct kasan_track track;
};

@@ -106,4 +109,8 @@ static inline bool kasan_report_enabled(void)
void kasan_report(unsigned long addr, size_t size,
bool is_write, unsigned long ip);

+void quarantine_put(struct kasan_free_meta *info, struct kmem_cache *cache);
+void quarantine_reduce(void);
+void quarantine_remove_cache(struct kmem_cache *cache);
+
#endif
diff --git a/mm/kasan/quarantine.c b/mm/kasan/quarantine.c
new file mode 100644
index 0000000..cf3f1fd
--- /dev/null
+++ b/mm/kasan/quarantine.c
@@ -0,0 +1,306 @@
+/*
+ * KASAN quarantine.
+ *
+ * Author: Alexander Potapenko <gli...@google.com>
+ * Copyright (C) 2016 Google, Inc.
+ *
+ * Based on code by Dmitry Chernenkov.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ */
+
+#include <linux/gfp.h>
+#include <linux/hash.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/percpu.h>
+#include <linux/printk.h>
+#include <linux/shrinker.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/types.h>
+
+#include "../slab.h"
+#include "kasan.h"
+
+/* Data structure and operations for quarantine queues. */
+
+/* Each queue is a signled-linked list, which also stores the total size of
+ * objects inside of it.
+ */
+struct qlist {
+ void **head;
+ void **tail;
+ size_t bytes;
+};
+
+#define QLIST_INIT { NULL, NULL, 0 }
+
+static inline bool empty_qlist(struct qlist *q)
+{
+ return !q->head;
+}
+
+static inline void init_qlist(struct qlist *q)
+{
+ q->head = q->tail = NULL;
+ q->bytes = 0;
+}
+
+static inline void qlist_put(struct qlist *q, void **qlink, size_t size)
+{
+ if (unlikely(empty_qlist(q)))
+ q->head = qlink;
+ else
+ *q->tail = qlink;
+ q->tail = qlink;
+ *qlink = NULL;
+ q->bytes += size;
+}
+
+static inline void **qlist_remove(struct qlist *q, void ***prev,
+ size_t size)
+{
+ void **qlink = *prev;
+
+ *prev = *qlink;
+ if (q->tail == qlink) {
+ if (q->head == qlink)
+ q->tail = NULL;
+ else
+ q->tail = (void **)prev;
+ }
+ q->bytes -= size;
+
+ return qlink;
+}
+
+static inline void qlist_move_all(struct qlist *from, struct qlist *to)
+{
+ if (unlikely(empty_qlist(from)))
+ return;
+
+ if (empty_qlist(to)) {
+ *to = *from;
+ init_qlist(from);
+ return;
+ }
+
+ *to->tail = from->head;
+ to->tail = from->tail;
+ to->bytes += from->bytes;
+
+ init_qlist(from);
+}
+
+static inline void qlist_move(struct qlist *from, void **last, struct qlist *to,
+ size_t size)
+{
+ if (unlikely(last == from->tail)) {
+ qlist_move_all(from, to);
+ return;
+ }
+ if (empty_qlist(to))
+ to->head = from->head;
+ else
+ *to->tail = from->head;
+ to->tail = last;
+ from->head = *last;
+ *last = NULL;
+ from->bytes -= size;
+ to->bytes += size;
+}
+
+
+/* The object quarantine consists of per-cpu queues and a global queue,
+ * guarded by quarantine_lock.
+ */
+static DEFINE_PER_CPU(struct qlist, cpu_quarantine);
+
+static struct qlist global_quarantine;
+static DEFINE_SPINLOCK(quarantine_lock);
+
+/* Maximum size of the global queue. */
+static unsigned long quarantine_size;
+
+/* The fraction of physical memory the quarantine is allowed to occupy.
+ * Quarantine doesn't support memory shrinker with SLAB allocator, so we keep
+ * the ratio low to avoid OOM.
+ */
+#define QUARANTINE_FRACTION 32
+
+/* smp_load_acquire() here pairs with smp_store_release() in
+ * quarantine_reduce().
+ */
+#define QUARANTINE_LOW_SIZE (smp_load_acquire(&quarantine_size) * 3 / 4)
+#define QUARANTINE_PERCPU_SIZE (1 << 20)
+
+static inline struct kmem_cache *qlink_to_cache(void **qlink)
+{
+ return virt_to_head_page(qlink)->slab_cache;
+}
+
+static inline void *qlink_to_object(void **qlink, struct kmem_cache *cache)
+{
+ struct kasan_free_meta *free_info =
+ container_of((void ***)qlink, struct kasan_free_meta,
+ quarantine_link);
+
+ return ((void *)free_info) - cache->kasan_info.free_meta_offset;
+}
+
+static inline void qlink_free(void **qlink, struct kmem_cache *cache)
+{
+ void *object = qlink_to_object(qlink, cache);
+ struct kasan_alloc_meta *alloc_info = get_alloc_info(cache, object);
+ unsigned long flags;
+
+ local_irq_save(flags);
+ alloc_info->state = KASAN_STATE_FREE;
+ ___cache_free(cache, object, _THIS_IP_);
+ local_irq_restore(flags);
+}
+
+static inline void qlist_free_all(struct qlist *q, struct kmem_cache *cache)
+{
+ void **qlink;
+
+ if (unlikely(empty_qlist(q)))
+ return;
+
+ qlink = q->head;
+ while (qlink) {
+ struct kmem_cache *obj_cache =
+ cache ? cache : qlink_to_cache(qlink);
+ void **next = *qlink;
+
+ qlink_free(qlink, obj_cache);
+ qlink = next;
+ }
+ init_qlist(q);
+}
+
+void quarantine_put(struct kasan_free_meta *info, struct kmem_cache *cache)
+{
+ unsigned long flags;
+ struct qlist *q;
+ struct qlist temp = QLIST_INIT;
+
+ local_irq_save(flags);
+
+ q = this_cpu_ptr(&cpu_quarantine);
+ qlist_put(q, (void **) &info->quarantine_link, cache->size);
+ if (unlikely(q->bytes > QUARANTINE_PERCPU_SIZE))
+ qlist_move_all(q, &temp);
+
+ local_irq_restore(flags);
+
+ if (unlikely(!empty_qlist(&temp))) {
+ spin_lock_irqsave(&quarantine_lock, flags);
+ qlist_move_all(&temp, &global_quarantine);
+ spin_unlock_irqrestore(&quarantine_lock, flags);
+ }
+}
+
+void quarantine_reduce(void)
+{
+ size_t new_quarantine_size;
+ unsigned long flags;
+ struct qlist to_free = QLIST_INIT;
+ size_t size_to_free = 0;
+ void **last;
+
+ /* smp_load_acquire() here pairs with smp_store_release() below. */
+ if (likely(ACCESS_ONCE(global_quarantine.bytes) <=
+ smp_load_acquire(&quarantine_size)))
+ return;
+
+ spin_lock_irqsave(&quarantine_lock, flags);
+
+ /* Update quarantine size in case of hotplug. Allocate a fraction of
+ * the installed memory to quarantine minus per-cpu queue limits.
+ */
+ new_quarantine_size = (ACCESS_ONCE(totalram_pages) << PAGE_SHIFT) /
+ QUARANTINE_FRACTION;
+ new_quarantine_size -= QUARANTINE_PERCPU_SIZE * num_online_cpus();
+ /* Pairs with smp_load_acquire() above and in QUARANTINE_LOW_SIZE. */
+ smp_store_release(&quarantine_size, new_quarantine_size);
+
+ last = global_quarantine.head;
+ while (last) {
+ struct kmem_cache *cache = qlink_to_cache(last);
+
+ size_to_free += cache->size;
+ if (!*last || size_to_free >
+ global_quarantine.bytes - QUARANTINE_LOW_SIZE)
+ break;
+ last = (void **) *last;
+ }
+ qlist_move(&global_quarantine, last, &to_free, size_to_free);
+
+ spin_unlock_irqrestore(&quarantine_lock, flags);
+
+ qlist_free_all(&to_free, NULL);
+}
+ struct kmem_cache *cache = arg;
+ struct qlist to_free = QLIST_INIT;
+ struct qlist *q;
+ unsigned long flags;
+
+ local_irq_save(flags);
+ q = this_cpu_ptr(&cpu_quarantine);
+ qlist_move_cache(q, &to_free, cache);
+ local_irq_restore(flags);
+
+ qlist_free_all(&to_free, cache);
+}
+
+void quarantine_remove_cache(struct kmem_cache *cache)
+{
+ unsigned long flags;
+ struct qlist to_free = QLIST_INIT;
+
+ on_each_cpu(per_cpu_remove_cache, cache, 1);
+
+ spin_lock_irqsave(&quarantine_lock, flags);
+ qlist_move_cache(&global_quarantine, &to_free, cache);
+ spin_unlock_irqrestore(&quarantine_lock, flags);
+
+ qlist_free_all(&to_free, cache);
+}
diff --git a/mm/kasan/report.c b/mm/kasan/report.c
index 4af52bb..a398ad4 100644
--- a/mm/kasan/report.c
+++ b/mm/kasan/report.c
@@ -149,7 +149,8 @@ static void print_object(struct kmem_cache *cache, void *object)
index 838ca8bb..a79ae81 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -980,7 +980,7 @@ static bool free_pages_prepare(struct page *page, unsigned int order)

trace_mm_page_free(page, order);
kmemcheck_free_shadow(page, order);
- kasan_free_pages(page, order);
+ kasan_poison_free_pages(page, order);

if (PageAnon(page))
page->mapping = NULL;
diff --git a/mm/slab.c b/mm/slab.c
index 52a7a8d..8884fdf 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -3374,9 +3374,21 @@ free_done:
static inline void __cache_free(struct kmem_cache *cachep, void *objp,
unsigned long caller)
{
+#ifdef CONFIG_KASAN
+ if (kasan_slab_free(cachep, objp))
+ /* The object has been put into the quarantine, don't touch it
+ * for now.
+ */
+ return;
+#endif
+ ___cache_free(cachep, objp, caller);
+}
+
+void ___cache_free(struct kmem_cache *cachep, void *objp,
+ unsigned long caller)
+{
struct array_cache *ac;

- kasan_slab_free(cachep, objp);
ac = cpu_cache_get(cachep);

check_irq_off();
diff --git a/mm/slab.h b/mm/slab.h
index 2eedace..d877b1e 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -373,4 +373,8 @@ void *slab_next(struct seq_file *m, void *p, loff_t *pos);
void slab_stop(struct seq_file *m, void *p);
int memcg_slab_show(struct seq_file *m, void *p);

+#ifdef CONFIG_KASAN
+void ___cache_free(struct kmem_cache *cache, void *x, unsigned long addr);
+#endif
+
#endif /* MM_SLAB_H */
diff --git a/mm/slab_common.c b/mm/slab_common.c
index 538f616..7bcb5bc 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -711,6 +711,7 @@ void kmem_cache_destroy(struct kmem_cache *s)
get_online_cpus();
get_online_mems();

+ kasan_cache_destroy(s);
mutex_lock(&slab_mutex);

s->refcount--;
@@ -749,6 +750,7 @@ int kmem_cache_shrink(struct kmem_cache *cachep)

get_online_cpus();
get_online_mems();
+ kasan_cache_shrink(cachep);
ret = __kmem_cache_shrink(cachep, false);
put_online_mems();
put_online_cpus();
diff --git a/mm/slub.c b/mm/slub.c
index 2978695..1819293 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1278,7 +1278,7 @@ static inline void kmalloc_large_node_hook(void *ptr, size_t size, gfp_t flags)
static inline void kfree_hook(const void *x)
{
kmemleak_free(x);
- kasan_kfree_large(x);
+ kasan_poison_kfree_large(x);
}

static inline struct kmem_cache *slab_pre_alloc_hook(struct kmem_cache *s,
@@ -1333,7 +1333,7 @@ static inline void slab_free_hook(struct kmem_cache *s, void *x)
if (!(s->flags & SLAB_DEBUG_OBJECTS))
debug_check_no_obj_freed(x, s->object_size);

- kasan_slab_free(s, x);
+ kasan_poison_slab_free(s, x);
}

static inline void slab_free_freelist_hook(struct kmem_cache *s,
--
2.7.0.rc3.207.g0ac5344

Alexander Potapenko

unread,
Feb 26, 2016, 11:50:09 AM2/26/16
to
Quarantine isolates freed objects in a separate queue. The objects are
returned to the allocator later, which helps to detect use-after-free
errors.

Freed objects are first added to per-cpu quarantine queues.
When a cache is destroyed or memory shrinking is requested, the objects
are moved into the global quarantine queue. Whenever a kmalloc call
allows memory reclaiming, the oldest objects are popped out of the
global queue until the total size of objects in quarantine is less than
3/4 of the maximum quarantine size (which is a fraction of installed
physical memory).

Right now quarantine support is only enabled in SLAB allocator.
Unification of KASAN features in SLAB and SLUB will be done later.

This patch is based on the "mm: kasan: quarantine" patch originally
prepared by Dmitry Chernenkov.

Signed-off-by: Alexander Potapenko <gli...@google.com>
---
v2: - added copyright comments
- per request from Joonsoo Kim made __cache_free() more straightforward
- added comments for smp_load_acquire()/smp_store_release()

v3: - incorporate changes introduced by the "mm, kasan: SLAB support" patch

v4: - fix kbuild compile-time error (missing ___cache_free() declaration)
and a warning (wrong format specifier)
---
include/linux/kasan.h | 30 +++--
lib/test_kasan.c | 29 +++++
mm/kasan/Makefile | 3 +
mm/kasan/kasan.c | 71 ++++++++++--
mm/kasan/kasan.h | 11 +-
mm/kasan/quarantine.c | 306 ++++++++++++++++++++++++++++++++++++++++++++++++++
mm/kasan/report.c | 1 +
mm/mempool.c | 7 +-
mm/page_alloc.c | 2 +-
mm/slab.c | 14 ++-
mm/slab.h | 2 +
mm/slab_common.c | 2 +
mm/slub.c | 4 +-
13 files changed, 454 insertions(+), 28 deletions(-)
index 82d8858..958af33 100644
--- a/mm/kasan/report.c
+++ b/mm/kasan/report.c
@@ -150,6 +150,7 @@ static void print_object(struct kmem_cache *cache, void *object)
print_track(&alloc_info->track);
break;
case KASAN_STATE_FREE:
+ case KASAN_STATE_QUARANTINE:
pr_err("Object freed, allocated with size %u bytes\n",
alloc_info->alloc_size);
free_info = get_free_info(cache, object);
index 2eedace..e5680ee 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -373,4 +373,6 @@ void *slab_next(struct seq_file *m, void *p, loff_t *pos);
void slab_stop(struct seq_file *m, void *p);
int memcg_slab_show(struct seq_file *m, void *p);

+void ___cache_free(struct kmem_cache *cache, void *x, unsigned long addr);
0 new messages