Re: clang compiled kernel panic when mounting zfs root on i386

Konstantin Belousov

unread,

Nov 29, 2012, 6:29:44 PM11/29/12

to

On Tue, Nov 27, 2012 at 08:21:05AM +1100, Bruce Evans wrote:
> On Mon, 26 Nov 2012, Konstantin Belousov wrote:
>
> > On Mon, Nov 26, 2012 at 06:31:34AM -0800, sig6247 wrote:
> >>
> >> Just checked out r243529, this only happens when the kernel is compiled
> >> by clang, and only on i386, either recompiling the kernel with gcc or
> >> booting from a UFS root works fine. Is it a known problem?
> > It looks like that clang uses more stack than gcc, and zfs makes quite
> > deep call chains.
...
> It would be useful if the stack trace printed the the stack pointer
> on every function call, so that you could see how much stack each
> function used.

Please apply the patch below and obtain the backtrace of the double fault
panic again. I will commit the patch later.

diff --git a/sys/amd64/amd64/db_trace.c b/sys/amd64/amd64/db_trace.c
index cba90f2..2c81f87 100644
--- a/sys/amd64/amd64/db_trace.c
+++ b/sys/amd64/amd64/db_trace.c
@@ -186,7 +186,8 @@ db_ss(struct db_variable *vp, db_expr_t *valuep, int op)

static void db_nextframe(struct amd64_frame **, db_addr_t *, struct thread *);
static int db_numargs(struct amd64_frame *);
-static void db_print_stack_entry(const char *, int, char **, long *, db_addr_t);
+static void db_print_stack_entry(const char *, int, char **, long *, db_addr_t,
+ void *);
static void decode_syscall(int, struct thread *);

static const char * watchtype_str(int type);
@@ -230,12 +231,13 @@ db_numargs(fp)
}

static void
-db_print_stack_entry(name, narg, argnp, argp, callpc)
+db_print_stack_entry(name, narg, argnp, argp, callpc, frame)
const char *name;
int narg;
char **argnp;
long *argp;
db_addr_t callpc;
+ void *frame;
{
db_printf("%s(", name);
#if 0
@@ -250,6 +252,8 @@ db_print_stack_entry(name, narg, argnp, argp, callpc)
#endif
db_printf(") at ");
db_printsym(callpc, DB_STGY_PROC);
+ if (frame != NULL)
+ db_printf("/frame 0x%lx", (register_t)frame);
db_printf("\n");
}

@@ -341,7 +345,7 @@ db_nextframe(struct amd64_frame **fp, db_addr_t *ip, struct thread *td)
return;
}

- db_print_stack_entry(name, 0, 0, 0, rip);
+ db_print_stack_entry(name, 0, 0, 0, rip, &(*fp)->f_frame);

/*
* Point to base of trapframe which is just above the
@@ -437,7 +441,8 @@ db_backtrace(struct thread *td, struct trapframe *tf,
* Don't try to walk back on a stack for a
* process that hasn't actually been run yet.
*/
- db_print_stack_entry(name, 0, 0, 0, pc);
+ db_print_stack_entry(name, 0, 0, 0, pc,
+ actframe);
break;
}
first = FALSE;
@@ -451,7 +456,7 @@ db_backtrace(struct thread *td, struct trapframe *tf,
narg = db_numargs(frame);
}

- db_print_stack_entry(name, narg, argnp, argp, pc);
+ db_print_stack_entry(name, narg, argnp, argp, pc, actframe);

if (actframe != frame) {
/* `frame' belongs to caller. */
@@ -465,7 +470,7 @@ db_backtrace(struct thread *td, struct trapframe *tf,
if (INKERNEL((long)pc) && !INKERNEL((long)frame)) {
sym = db_search_symbol(pc, DB_STGY_ANY, &offset);
db_symbol_values(sym, &name, NULL);
- db_print_stack_entry(name, 0, 0, 0, pc);
+ db_print_stack_entry(name, 0, 0, 0, pc, frame);
break;
}
if (!INKERNEL((long) frame)) {
diff --git a/sys/i386/i386/db_trace.c b/sys/i386/i386/db_trace.c
index 445d9c5..822cc56 100644
--- a/sys/i386/i386/db_trace.c
+++ b/sys/i386/i386/db_trace.c
@@ -176,7 +176,8 @@ db_ss(struct db_variable *vp, db_expr_t *valuep, int op)

static void db_nextframe(struct i386_frame **, db_addr_t *, struct thread *);
static int db_numargs(struct i386_frame *);
-static void db_print_stack_entry(const char *, int, char **, int *, db_addr_t);
+static void db_print_stack_entry(const char *, int, char **, int *, db_addr_t,
+ void *);
static void decode_syscall(int, struct thread *);

static const char * watchtype_str(int type);
@@ -220,12 +221,13 @@ retry:
}

static void
-db_print_stack_entry(name, narg, argnp, argp, callpc)
+db_print_stack_entry(name, narg, argnp, argp, callpc, frame)
const char *name;
int narg;
char **argnp;
int *argp;
db_addr_t callpc;
+ void *frame;
{
int n = narg >= 0 ? narg : 5;

@@ -242,6 +244,8 @@ db_print_stack_entry(name, narg, argnp, argp, callpc)
db_printf(",...");
db_printf(") at ");
db_printsym(callpc, DB_STGY_PROC);
+ if (frame != NULL)
+ db_printf("/frame 0x%r", (register_t)frame);
db_printf("\n");
}

@@ -326,7 +330,7 @@ db_nextframe(struct i386_frame **fp, db_addr_t *ip, struct thread *td)
return;
}

- db_print_stack_entry(name, 0, 0, 0, eip);
+ db_print_stack_entry(name, 0, 0, 0, eip, &(*fp)->f_frame);

/*
* For a double fault, we have to snag the values from the
@@ -467,7 +471,8 @@ db_backtrace(struct thread *td, struct trapframe *tf, struct i386_frame *frame,
* Don't try to walk back on a stack for a
* process that hasn't actually been run yet.
*/
- db_print_stack_entry(name, 0, 0, 0, pc);
+ db_print_stack_entry(name, 0, 0, 0, pc,
+ actframe);
break;
}
first = FALSE;
@@ -481,7 +486,7 @@ db_backtrace(struct thread *td, struct trapframe *tf, struct i386_frame *frame,
narg = db_numargs(frame);
}

- db_print_stack_entry(name, narg, argnp, argp, pc);
+ db_print_stack_entry(name, narg, argnp, argp, pc, actframe);

if (actframe != frame) {
/* `frame' belongs to caller. */
@@ -495,7 +500,7 @@ db_backtrace(struct thread *td, struct trapframe *tf, struct i386_frame *frame,
if (INKERNEL((int)pc) && !INKERNEL((int) frame)) {
sym = db_search_symbol(pc, DB_STGY_ANY, &offset);
db_symbol_values(sym, &name, NULL);
- db_print_stack_entry(name, 0, 0, 0, pc);
+ db_print_stack_entry(name, 0, 0, 0, pc, frame);
break;
}
if (!INKERNEL((int) frame)) {

sig6247

unread,

Nov 30, 2012, 7:42:45 AM11/30/12

to

On Fri, 30 Nov 2012 01:29:44 +0200, Konstantin Belousov <kost...@gmail.com> wrote:

> Please apply the patch below and obtain the backtrace of the double fault
> panic again. I will commit the patch later.

Thanks for the patch.

WARNING: WITNESS option enabled, expect reduced performance.
Trying to mount root from zfs:zroot []...

Fatal double fault:
eip = 0xc0e93e07
esp = 0xc86bffbc
ebp = 0xc86c0032
cpuid = 1; apic id = 01
panic: double fault
cpuid = 1
KDB: enter: panic
[ thread pid 1 tid 100002 ]
Stopped at kdb_enter+0x3d: movl $0,kdb_why
db> bt
Tracing pid 1 tid 100002 td 0xc89efbc0
kdb_enter(c1065960,c1065960,c10b903b,c139f438,aef01785,...) at kdb_enter+0x3d/frame 0xc139f3f0
panic(c10b903b,1,1,1,c86c0032,...) at panic+0x14b/frame 0xc139f42c
dblfault_handler() at dblfault_handler+0xab/frame 0xc139f42c
--- trap 0x17, eip = 0xc0e93e07, esp = 0xc86bffbc, ebp = 0xc86c0032 ---
__qdivrem(0,aba80000,c10d,98600000,68c119,...) at __qdivrem+0x197/frame 0xc86c0032
db>
_______________________________________________
freebsd...@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/freebsd-current
To unsubscribe, send any mail to "freebsd-curre...@freebsd.org"

Konstantin Belousov

unread,

Nov 30, 2012, 11:47:15 AM11/30/12

to

Hm, this is not very useful. Although the panic is again caused by the stack
overflow, most likely (please also include the output of the "show thread"
from ddb), it is at different place, and probably at the leaf function.

Can you try some more times, so that we could see 'big' backtrace ?

Konstantin Belousov

unread,

Dec 3, 2012, 5:41:32 PM12/3/12

to

On Sat, Dec 01, 2012 at 01:34:04AM -0800, sig6247 wrote:

> On Fri, 30 Nov 2012 18:47:15 +0200, Konstantin Belousov <kost...@gmail.com> wrote:
>
> > Hm, this is not very useful. Although the panic is again caused by the stack
> > overflow, most likely (please also include the output of the "show thread"
> > from ddb), it is at different place, and probably at the leaf function.
> >
> > Can you try some more times, so that we could see 'big' backtrace ?
>

> Sure. Thanks.

>
> WARNING: WITNESS option enabled, expect reduced performance.
> Trying to mount root from zfs:zroot []...
>
> Fatal double fault:

> eip = 0xc0add15d
> esp = 0xc86bffc8
> ebp = 0xc86c003c

> cpuid = 1; apic id = 01
> panic: double fault
> cpuid = 1
> KDB: enter: panic
> [ thread pid 1 tid 100002 ]
> Stopped at kdb_enter+0x3d: movl $0,kdb_why
> db> bt
> Tracing pid 1 tid 100002 td 0xc89efbc0

> kdb_enter(c1065960,c1065960,c10b903b,c139f438,2243cdbd,...) at kdb_enter+0x3d/frame 0xc139f3f0
> panic(c10b903b,1,1,1,c86c003c,...) at panic+0x14b/frame 0xc139f42c
> dblfault_handler() at dblfault_handler+0xab/frame 0xc139f42c
> --- trap 0x17, eip = 0xc0add15d, esp = 0xc86bffc8, ebp = 0xc86c003c ---
> witness_checkorder(c1fd7508,9,c109ee8c,7fa,0,...) at witness_checkorder+0x37d/frame 0xc86c003c
> __mtx_lock_flags(c1fd7518,0,c109ee8c,7fa,c135e998,...) at __mtx_lock_flags+0x87/frame 0xc86c007
> 0
> uma_zalloc_arg(c1fd66c0,0,1,4d3,c86c0110,...) at uma_zalloc_arg+0x605/frame 0xc86c00c8
> vm_map_insert(c1fd508c,c13e0ca0,bd3a000,0,cbc39000,...) at vm_map_insert+0x499/frame 0xc86c0130
>
> kmem_back(c1fd508c,cbc39000,1000,3,c86c01d4,...) at kmem_back+0x76/frame 0xc86c018c
> kmem_malloc(c1fd508c,1000,3) at kmem_malloc+0x250/frame 0xc86c01c0
> page_alloc(c1fd1d80,1000,c86c020b,3,c1fd1d80,...) at page_alloc+0x27/frame 0xc86c01d4
> keg_alloc_slab(103,4,c109ee8c,870,cbb95f6c,...) at keg_alloc_slab+0xc3/frame 0xc86c0218
> keg_fetch_slab(103,c1fd1d80,cbb95f6c,c1fc8230,c86c02c0,...) at keg_fetch_slab+0xe2/frame 0xc86c
> 0250
> zone_fetch_slab(c1fd1d80,c1fd0480,103,826,0,...) at zone_fetch_slab+0x43/frame 0xc86c0268
> uma_zalloc_arg(c1fd1d80,0,102,3,2,...) at uma_zalloc_arg+0x3f2/frame 0xc86c02c0
> malloc(4c,c1826100,102,c86c0388,c173909a,...) at malloc+0xe9/frame 0xc86c02e8
> zfs_kmem_alloc(4c,102,cb7d8820,c89efbc0,cb7d8820,...) at zfs_kmem_alloc+0x20/frame 0xc86c02fc
> vdev_mirror_io_start(cba232e0,10,cba232e0,1,0,...) at vdev_mirror_io_start+0x14a/frame 0xc86c03
> 88
> zio_vdev_io_start(cba232e0,c89efbc0,0,cba232e0,c86c0600,...) at zio_vdev_io_start+0x228/frame 0
> xc86c03e4
> zio_execute(cba232e0,cb7d8000,cbbec640,cbbe2000,600,...) at zio_execute+0x106/frame 0xc86c0418
> spa_load_verify_cb(cb7d8000,0,cbbec640,cba6bd20,c86c0600,...) at spa_load_verify_cb+0x89/frame
> 0xc86c0458
> traverse_visitbp(cba6bd20,cbbec640,c86c0600,c86c0ba0,0,...) at traverse_visitbp+0x29f/frame 0xc
> 86c05e0
> traverse_dnode(cba6bd20,0,0,23,0,...) at traverse_dnode+0x92/frame 0xc86c0638
> traverse_visitbp(cba6bd98,cbbf0080,c86c0890,cba6bdd4,c16ca7e0,...) at traverse_visitbp+0xe47/fr
> ame 0xc86c07c0
> traverse_visitbp(cba6bdd4,cbbe2840,c86c0968,c86c0ba0,0,...) at traverse_visitbp+0xf32/frame 0xc
> 86c0948
> traverse_dnode(cba6bdd4,0,0,0,0,...) at traverse_dnode+0x92/frame 0xc86c09a0
> traverse_visitbp(0,cb7d8398,c86c0b50,2,cbbdc214,...) at traverse_visitbp+0x96d/frame 0xc86c0b28
>
> traverse_impl(0,0,cb7d8398,74,0,...) at traverse_impl+0x268/frame 0xc86c0be0
> traverse_pool(cb7d8000,74,0,d,c1723830,...) at traverse_pool+0x79/frame 0xc86c0c88
> spa_load(0,1,c86c0ec4,1e,0,...) at spa_load+0x1dde/frame 0xc86c0df0
> spa_load(0,0,c13d9d14,1,3,...) at spa_load+0x11a5/frame 0xc86c0f58
> spa_load_best(0,ffffffff,ffffffff,1,c0add175,...) at spa_load_best+0x71/frame 0xc86c0fb0
> spa_open_common(c17dce4e,0,0,c86c1190,c16f1a1c,...) at spa_open_common+0x11a/frame 0xc86c100c
> spa_open(c86c1078,c86c1074,c17dce4e,c135e998,c1fd7798,...) at spa_open+0x27/frame 0xc86c1020
> dsl_dir_open_spa(0,c89770b0,c17dd1e1,c86c11f8,c86c11f4,...) at dsl_dir_open_spa+0x6c/frame 0xc8
> 6c1190
> dsl_dataset_hold(c89770b0,cb7d3800,c86c1240,cb7d3800,cb7d3800,...) at dsl_dataset_hold+0x3a/fra
> me 0xc86c120c
> dsl_dataset_own(c89770b0,0,cb7d3800,c86c1240,c1824e30,...) at dsl_dataset_own+0x21/frame 0xc86c
> 1228
> dmu_objset_own(c89770b0,2,1,cb7d3800,c86c1290,...) at dmu_objset_own+0x2a/frame 0xc86c1250
> zfsvfs_create(c89770b0,c86c13ac,c17ea09b,681,0,...) at zfsvfs_create+0x4c/frame 0xc86c12a8
> zfs_mount(cb99b540,c17f0160,cb98b100,c89cae80,0,...) at zfs_mount+0x42c/frame 0xc86c14e0
> vfs_donmount(c89efbc0,4000,0,c86c1790,cb98b180,...) at vfs_donmount+0xc6d/frame 0xc86c1778
> kernel_mount(c8977490,4000,0,0,1,...) at kernel_mount+0x6b/frame 0xc86c17b8
> parse_mount(cb96e0e0,c1195498,0,1,0,...) at parse_mount+0x606/frame 0xc86c19d8
> vfs_mountroot(c13da634,4,c105ceba,2bb,0,...) at vfs_mountroot+0x6cf/frame 0xc86c1c60
> start_init(0,c86c1d08,c105f7c4,3db,0,...) at start_init+0x6a/frame 0xc86c1ccc
> fork_exit(c0a429e0,0,c86c1d08) at fork_exit+0x7f/frame 0xc86c1cf4
> fork_trampoline() at fork_trampoline+0x8/frame 0xc86c1cf4
> --- trap 0, eip = 0, esp = 0xc86c1d40, ebp = 0 ---
> db> show thread
> Thread 100002 at 0xc89efbc0:
> proc (pid 1): 0xc89edb40
> name: kernel
> stack: 0xc86c0000-0xc86c1fff
> flags: 0x4 pflags: 0x10000
> state: RUNNING (CPU 1)
> priority: 84
> container lock: sched lock 1 (0xc1220000)
> db>

Please try the patch below. It might give an immediate relief, but still
there are many offenders in the backtrace.

diff --git a/sys/kern/vfs_mountroot.c b/sys/kern/vfs_mountroot.c
index 83948f2..147926e 100644
--- a/sys/kern/vfs_mountroot.c
+++ b/sys/kern/vfs_mountroot.c
@@ -672,10 +672,11 @@ parse_mount_dev_present(const char *dev)
return (error != 0) ? 0 : 1;
}

+#define ERRMSGL 255
static int
parse_mount(char **conf)
{
- char errmsg[255];
+ char *errmsg;
struct mntarg *ma;
char *dev, *fs, *opts, *tok;
int delay, error, timeout;
@@ -707,7 +708,7 @@ parse_mount(char **conf)
printf("Trying to mount root from %s:%s [%s]...\n", fs, dev,
(opts != NULL) ? opts : "");

- bzero(errmsg, sizeof(errmsg));
+ errmsg = malloc(ERRMSGL, M_TEMP, M_WAITOK | M_ZERO);

if (vfs_byname(fs) == NULL) {
strlcpy(errmsg, "unknown file system", sizeof(errmsg));
@@ -734,7 +735,7 @@ parse_mount(char **conf)
ma = mount_arg(ma, "fstype", fs, -1);
ma = mount_arg(ma, "fspath", "/", -1);
ma = mount_arg(ma, "from", dev, -1);
- ma = mount_arg(ma, "errmsg", errmsg, sizeof(errmsg));
+ ma = mount_arg(ma, "errmsg", errmsg, ERRMSGL);
ma = mount_arg(ma, "ro", NULL, 0);
ma = parse_mountroot_options(ma, opts);
error = kernel_mount(ma, MNT_ROOTFS);
@@ -748,11 +749,13 @@ parse_mount(char **conf)
printf(".\n");
}
free(fs, M_TEMP);
+ free(errmsg, M_TEMP);
if (opts != NULL)
free(opts, M_TEMP);
/* kernel_mount can return -1 on error. */
return ((error < 0) ? EDOOFUS : error);
}
+#undef ERRMSGL

static int
vfs_mountroot_parse(struct sbuf *sb, struct mount *mpdevfs)

sig6247

unread,

Dec 4, 2012, 1:20:34 PM12/4/12

to

On Tue, 4 Dec 2012 00:41:32 +0200, Konstantin Belousov <kost...@gmail.com> wrote:

> Please try the patch below. It might give an immediate relief, but still
> there are many offenders in the backtrace.

Thanks for the patch, it works now.

Volodymyr Kostyrko

unread,

Dec 12, 2012, 8:04:23 AM12/12/12

to

04.12.2012 00:41, Konstantin Belousov:

> Please try the patch below. It might give an immediate relief, but still
> there are many offenders in the backtrace.

I'm having almost the same issue and the patch doesn't work for me.

Trying to mount root from zfs:limb0 []...

Fatal double fault:
eip = 0x835a6bce
esp = 0x875c2fd4
ebp = 0x875c3018
cpuid = 0; apic id = 00
panic: double fault
cpuid = 0
KDB: stack backtrace:
db_trace_self_wrapper(8380283b,20646920,3030203d,3831000a,a3a000a,...)
at db_trace_self_wrapper+0x36/frame 0x83a10f10
kdb_backtrace(8383658f,0,83837c3d,83a10fc0,0,...) at
kdb_backtrace+0x30/frame 0x83a10f70
panic(83837c3d,0,0,0,875c3018,...) at panic+0x1bc/frame 0x83a10fb4
dblfault_handler() at cpu_fetch_syscall_args/frame 0x83a10fb4
--- trap 0x17, eip = 8x835a6bce, esp = 0x875c2fd4, ebp = 0x875c3018 ---
witness_checkorder(843df808,9,8382a15c,7dd,0,...) at
witness_checkorder+0x2e/frame 0x875c3018
_mtx_lock_flags(843df808,0,8382a15c,7dd,202,...) at
_mtx_lock_flags+0x7a/frame 0x875c3040
uma_zalloc_arg(843de960,0,102,2,2,...) at uma_zalloc_arg+0x5df/franc
0x875c3090
malloc(38,83d03100,102,875c3138,83c01d1a,...) at malloc+0xe9/frame
0x875c30c0
zfs_kmem_alloc(38,102,8,83cab2fe,157,...) at zfs_kmem_alloc+0x20/frame
0x875c30d4
vdev_mirror_io_start(87e3eb20,10,B7e3eb20,1,87d3f618,...) at
vdev_mirror_io_start+0x14a/frame 0x875c3138
zio_vdev_io_start(87e3eb20,8795dbcO,87e3eb20,875c3340,200,...) at
zio_vdev_io_start+0x1a6/frame Ox875c3180
zio_execute(87e3eb20.87c8f000,880a0640,8807d400,200,...) at
zio_execute+0x103/frame 0x875c31b0
spa_load_verify_cb(87c8f000,0,880a0640,87f7b708,875c3340,...) at
spa_load_verify_cb+0x89/frame 0x875c31f0
traverse_visitbp(87f7b708,880a0640,875c3340,875c3db8,0,...) at
traverse_visitbp+0x1e6/frame 0x875c3320
traverse_dnode(87f7b708,15,0,3,O,...) at traverse_dnode+0x92/frame
0x875c337O
traverse_visitbp(87f7b6cc,880a4000,875c3520,87f7b744,83b92d10,...) at
traverse_visitbp+0xc40/frame 0x875c34a0
traverse_visitbp(87f7b744,88096000,875c3650,87f7b834,83b92d10,...) at
traverse_visitbp+0xd33/frame 0xB75c35d0
traverse_visitbp(87f7b834,88074000,875c3780,87f7b8ac,83b92d10,...) at
traverse_visitbp+0xd33/frame 0x875c3700

traverse_visitbp(87f7b8ac,8806c000,875c38b0,87f7b924,83b92d10,...) at
traverse_visitbp+0xd33/frame 0x875c3830
traverse_visitbp(87f7b924,88064000,875c39e0,87f7b99c,83b92d10,...) at
traverse_visitbp+0xd33/frame 0x875c3960
traverse_visitbp(87f7b99c,87fce000,875c3b10,87f7ba14,83b92d10,...) at
traverse_visitbp+0xd33/frame 0x875c3a90
traverse_visitbp(87f7ba14,88061040,875c3be0,875c3db8,0,...) at
traverse_visitbp+0xd33/frame 0x875c3bc0
traverse_dnode(87f7ba14,15,0,0,0,...) at traverse_dnode+0x92/frame
0x875c3c10
traverse_visitbp(0,87f8ee80,875c3d68,2,834,...) at
traverse_visitbp+0x822/frame 0x875c3d40
traverse_impl(15,0,87f8ee80,261400,0,...) at traverse_impl+0x268/frane
0x875c3df0
traverse_pool(87c8f000,261400,0,d,83bec290,...) at
traverse_pool+0x273/frame 0x875c3e90
spa_load(0,1,875c4034,83ca82f2,8,...) at spa_load+0x1d8f/frame 0x875c3fa8
spa_load(0,0,83a48934,1,14,...) at spa_load+0x114c/frame 0x875c40c0
spa_load_best(0,ffffffff,ffffffff,1,0,...) at spa_load_best+0x71/frame
0x875c3e90
spa_open_common(83ca3ca6,0,0,875c42f0,83bb9dec,...) at
spa_open_common+0x11a/frame 0x875c4174
spa_open(875c41e0,875c41dc,83ca3ca6,0,0,...) at spa_open+0x27/frame
0x875c4188
dsl_dir_open_spa(0,87d47350,83ca4039,875c4358,875c4354,...) at
dsl_dir_open_spa+0x6c/frame 0x875c42f0
dsl_dataset_hold(87d47350,87a36000,875c43a0,87a36000,87a36000,...) at
dsl_data_hold+0x3a/frame 0x875c436c
dsl_dataset_own(87d47350,0,87a3600,875c43a0,83d01e30,...) at
dsl_dataset_own+0x21/frame 0x875c4388
dmu_objset_own(87d4350,2,1,87a36000,875c43f0,...) at
dmu_objset_own+0x2a/frame 0x875c43b0
zfsvfs_create(87d47350,875c4504,83cb0b68,68e,87d47350,...) at
zfsvfs_create+0x4c/frame 0x875c4400
zfs_mount(87d40ce4,83cb5Bd0,87d46300,87957500,8384fd28,...) at
zfs_mount+0x4a9/frame 0x875c4630
vfs_donmount(8795dbc0,4000,0,875c48b8,87d46380,...) at
vfs_donmount+0xc94/frame 0x875c48a0
kernel_mount(87d473d0,4000,0,0,839de044,...) at kernel_mount+0x6b/frame
0x875c48e0
parse_mount(87d47400,8385a800,0,1,0,...) at parse_mount+0x622/frame
0x875c49f8
vfs_mountroot(83a491c4,4,837f68a2,2ba,0,...) at
vfs_mountroot+0x6f1/frame 0x875c4c60
start_init(0,875c4d08,837f8f83,3d8,0,...) at start_init+0x6a/frame
0x875c4ccc
fork_exit(835107b0,0,875c4d08) at fork_init+0x7c/frame 0x875c4cf4
fork_trampoline() at fork_trampoline+0x8/frame 0x875c4cf4
--- trap 0, eip = 0, esp = 0x875c4d40, ebp = O ---
KDB: enter: panic
[ thread pid 1 tid 100002 J
Stopped at kdb_enter+0x3d: movl $O,kdb_why
db>

Source pictures are at
https://picasaweb.google.com/104021007361271711472/I386ZfsDoubleFault?authuser=0&feat=directlink
just in case I missed something.

--
Sphinx of black quartz, judge my vow.

Dimitry Andric

unread,

Dec 12, 2012, 2:35:42 PM12/12/12

to

On 2012-12-12 14:04, Volodymyr Kostyrko wrote:
> 04.12.2012 00:41, Konstantin Belousov:
>> Please try the patch below. It might give an immediate relief, but still
>> there are many offenders in the backtrace.
>
> I'm having almost the same issue and the patch doesn't work for me.

...

Looking at the stack frame addresses, it seems some of them are mangled.
Did you type this by hand? The differences between subsequent frames
are a bit strange because of it (and because of awk's integer
processing):

_mtx_lock_flags 40
uma_zalloc_arg 80
malloc 48
zfs_kmem_alloc 20
vdev_mirror_io_start 100
zio_vdev_io_start -2270966072
zio_execute 2270966192
spa_load_verify_cb 64
traverse_visitbp 304
traverse_dnode -2129031145
traverse_visitbp 2129031529
traverse_visitbp 805306672
traverse_visitbp -805306064
traverse_visitbp 304
traverse_visitbp 304
traverse_visitbp 304
traverse_visitbp 304
traverse_dnode 80
traverse_visitbp 304
traverse_impl 176
traverse_pool 160
spa_load 280
spa_load 280
spa_load_best -560
spa_open_common 740
spa_open 20
dsl_dir_open_spa 360
dsl_dataset_hold 124
dsl_dataset_own 28
dmu_objset_own 40
zfsvfs_create 80
zfs_mount 560
vfs_donmount 624
kernel_mount 64
parse_mount 280
vfs_mountroot 616
start_init 108
fork_exit 40
fork_trampoline 0

The kernel stack is just 8,192 bytes; since you can see these routines
are all consuming massive amounts of stack, and the calls are very
deeply nested, it is almost inevitable that it would crash.

Especially the recursive spa_load and traverse_visitbp calls are scary,
because that can grow out of hand very quickly. It is probably tricky
to remove the recursion...

Andriy Gapon

unread,

Dec 13, 2012, 5:25:48 AM12/13/12

to

on 12/12/2012 21:35 Dimitry Andric said the following:

> Especially the recursive spa_load and traverse_visitbp calls are scary,
> because that can grow out of hand very quickly. It is probably tricky
> to remove the recursion...

Re-entering spa_load once is normal and is expected.
traverse_visitbp is also expected to recurse depending on data layout.
So yeah, it's probably even trickier than teaching clang to allocate smaller stack
frames ;-)

--
Andriy Gapon

Volodymyr Kostyrko

unread,

Dec 13, 2012, 7:29:16 AM12/13/12

to

12.12.2012 21:35, Dimitry Andric:

> On 2012-12-12 14:04, Volodymyr Kostyrko wrote:
>> 04.12.2012 00:41, Konstantin Belousov:
>>> Please try the patch below. It might give an immediate relief, but still
>>> there are many offenders in the backtrace.
>>
>> I'm having almost the same issue and the patch doesn't work for me.
> ...
>
> Looking at the stack frame addresses, it seems some of them are mangled.
> Did you type this by hand? The differences between subsequent frames
> are a bit strange because of it (and because of awk's integer
> processing):

Yes, I had typed that by hand. I attached link to the pictures just in case.

> The kernel stack is just 8,192 bytes; since you can see these routines
> are all consuming massive amounts of stack, and the calls are very
> deeply nested, it is almost inevitable that it would crash.
>

> Especially the recursive spa_load and traverse_visitbp calls are scary,
> because that can grow out of hand very quickly. It is probably tricky
> to remove the recursion...

After playing more with this kernel I also found it can crash not only
by this scenario. There are different possible ways.

I actually don't think there's a point fixing it right now. New clang is
coming anyway...

--
Sphinx of black quartz, judge my vow.

Volodymyr Kostyrko

unread,

Dec 17, 2012, 7:57:20 AM12/17/12

to

13.12.2012 12:25, Andriy Gapon:

> on 12/12/2012 21:35 Dimitry Andric said the following:

>> Especially the recursive spa_load and traverse_visitbp calls are scary,
>> because that can grow out of hand very quickly. It is probably tricky
>> to remove the recursion...
>

> Re-entering spa_load once is normal and is expected.
> traverse_visitbp is also expected to recurse depending on data layout.
> So yeah, it's probably even trickier than teaching clang to allocate smaller stack
> frames ;-)

I hit this one again, but this time my world and kernel are compiled
with stock gcc. Pictures 3 to 5:

https://picasaweb.google.com/104021007361271711472/I386ZfsDoubleFault

This happens on mounting root after unclean shutdown. I fixed my pool
with booting amd64 kernel, after this i386 kernel starts fine.

Maybe it's just time to accept that ZFS on i386 is not stable? Current
handbook elaborates on ZFS like it's known to work on i386.

Andriy Gapon

unread,

Dec 17, 2012, 5:20:25 PM12/17/12

to

on 17/12/2012 14:57 Volodymyr Kostyrko said the following:

> 13.12.2012 12:25, Andriy Gapon:
>> on 12/12/2012 21:35 Dimitry Andric said the following:
>>> Especially the recursive spa_load and traverse_visitbp calls are scary,
>>> because that can grow out of hand very quickly. It is probably tricky
>>> to remove the recursion...
>>
>> Re-entering spa_load once is normal and is expected.
>> traverse_visitbp is also expected to recurse depending on data layout.
>> So yeah, it's probably even trickier than teaching clang to allocate smaller
>> stack
>> frames ;-)
>
> I hit this one again, but this time my world and kernel are compiled with stock
> gcc. Pictures 3 to 5:
>
> https://picasaweb.google.com/104021007361271711472/I386ZfsDoubleFault
>
> This happens on mounting root after unclean shutdown. I fixed my pool with
> booting amd64 kernel, after this i386 kernel starts fine.
>
> Maybe it's just time to accept that ZFS on i386 is not stable? Current handbook
> elaborates on ZFS like it's known to work on i386.

Yes, it is known to work.

It's been already mentioned many times that ZFS works much better on amd64.
It's up to a (potential) user to understand limitations of i386 and to decide
whether to use ZFS, in what situations and how.

You may want to consider using KSTACK_PAGES=4 in your kernel configuration.

--
Andriy Gapon