Rudolf Čejka <cej...@fit.vutbr.cz> changed:
What |Removed |Added
----------------------------------------------------------------------------
CC| |cej...@fit.vutbr.cz
--- Comment #18 from Rudolf Čejka <cej...@fit.vutbr.cz> ---
Hello,
another +1 for us.
--- Comment #19 from Rudolf Čejka <cej...@fit.vutbr.cz> ---
And now the debugging info. Unfortunately, we are new for this bug, so we have
the kernel without INVARIANTS and without debugging.patch yet.
We have locked bacula-fd, which is backup client with similar function to find
& stat and it occured during incremental backup operation:
procstat -kk (I can send full procstat -kk -a personally, it has 429876 bytes):
1331 101013 bacula-fd - mi_switch+0xbe
sleepq_catch_signals+0xab sleepq_wait_sig+0xc _cv_wait_sig+0x17a seltdwait+0xae
kern_select+0x8fa sys_select+0x54 amd64_syscall+0x321 Xfast_syscall+0xfb
1331 101035 bacula-fd - mi_switch+0xbe
sleepq_catch_signals+0xab sleepq_timedwait_sig+0xf _sleep+0x238
umtxq_sleep+0x125 do_wait+0x387 __umtx_op_wait_uint_private+0x83
amd64_syscall+0x321 Xfast_syscall+0xfb
1331 102066 bacula-fd - mi_switch+0xbe sleepq_wait+0x3a
_sleep+0x287 vnode_create_vobject+0x100 ufs_lookup_ino+0xa0
VOP_CACHEDLOOKUP_APV+0x7a vfs_cache_lookup+0xd6 VOP_LOOKUP_APV+0x7a
lookup+0x571 namei+0x344 kern_statat_vnhook+0xae sys_lstat+0x30
amd64_syscall+0x321 Xfast_syscall+0xfb
1331 103370 bacula-fd - mi_switch+0xbe
sleepq_catch_signals+0xab sleepq_timedwait_sig+0xf _cv_timedwait_sig_sbt+0x19e
seltdwait+0xa4 kern_select+0x8fa sys_select+0x54 amd64_syscall+0x321
Xfast_syscall+0xfb
where the important thread is TID 102066, with vnode_create_vobject.
(kgdb) info threads
...
701 Thread 102066 (PID=1331: bacula-fd) sched_switch (td=0xfffff803ec526960,
newtd=0x0, flags=<value optimized out>) at /usr/src/sys/kern/sched_ule.c:1945
...
(kgdb) thread 701
[Switching to thread 701 (Thread 102066)]#0 sched_switch
(td=0xfffff803ec526960, newtd=0x0, flags=<value optimized out>) at
/usr/src/sys/kern/sched_ule.c:1945
1945 cpuid = PCPU_GET(cpuid);
(kgdb) bt
#0 sched_switch (td=0xfffff803ec526960, newtd=0x0, flags=<value optimized
out>) at /usr/src/sys/kern/sched_ule.c:1945
#1 0xffffffff80580d4e in mi_switch (flags=260, newtd=0x0) at
/usr/src/sys/kern/kern_synch.c:491
#2 0xffffffff805c155a in sleepq_wait (wchan=0x0, pri=0) at
/usr/src/sys/kern/subr_sleepqueue.c:618
#3 0xffffffff80580837 in _sleep (ident=<value optimized out>, lock=<value
optimized out>, priority=<value optimized out>, wmesg=<value optimized out>,
sbt=<value optimized out>, pr=<value optimized out>, flags=<value optimized
out>)
at /usr/src/sys/kern/kern_synch.c:255
#4 0xffffffff80766e60 in vnode_create_vobject (vp=0xfffff816fe4f3588,
isize=512, td=0xfffff803ec526960) at /usr/src/sys/vm/vnode_pager.c:120
#5 0xffffffff8072fe80 in ufs_lookup_ino (vdp=0xfffff816fe4f3588,
vpp=0xfffffe17f2e63858, cnp=0xfffffe17f2e63880, dd_ino=0x0) at
/usr/src/sys/ufs/ufs/ufs_lookup.c:259
#6 0xffffffff807f897a in VOP_CACHEDLOOKUP_APV (vop=<value optimized out>,
a=<value optimized out>) at vnode_if.c:197
#7 0xffffffff806092e6 in vfs_cache_lookup (ap=<value optimized out>) at
vnode_if.h:80
#8 0xffffffff807f883a in VOP_LOOKUP_APV (vop=<value optimized out>, a=<value
optimized out>) at vnode_if.c:129
#9 0xffffffff806111f1 in lookup (ndp=0xfffffe17f2e637f8) at vnode_if.h:54
#10 0xffffffff80610a04 in namei (ndp=0xfffffe17f2e637f8) at
/usr/src/sys/kern/vfs_lookup.c:302
#11 0xffffffff8062631e in kern_statat_vnhook (td=0xfffff803ec526960,
flag=<value optimized out>, fd=-100, path=0x43c18128 <Error reading address
0x43c18128: Bad address>, pathseg=UIO_USERSPACE, sbp=0xfffffe17f2e63910,
hook=0)
at /usr/src/sys/kern/vfs_syscalls.c:2300
#12 0xffffffff80626420 in sys_lstat (td=0x0, uap=0xfffffe17f2e63a40) at
/usr/src/sys/kern/vfs_syscalls.c:2280
#13 0xffffffff8078acd1 in amd64_syscall (td=0xfffff803ec526960, traced=0) at
subr_syscall.c:141
#14 0xffffffff8076fffb in Xfast_syscall () at
/usr/src/sys/amd64/amd64/exception.S:396
#15 0x00000000425a731a in ?? ()
Previous frame inner to this frame (corrupt stack?)
(kgdb) frame 4
#4 0xffffffff80766e60 in vnode_create_vobject (vp=0xfffff816fe4f3588,
isize=512, td=0xfffff803ec526960) at /usr/src/sys/vm/vnode_pager.c:120
120 VM_OBJECT_SLEEP(object, object, PDROP | PVM, "vodead",
0);
(kgdb) p *vp
$1 = {v_tag = 0xffffffff8085c651 "ufs", v_op = 0xffffffff80b0f950, v_data =
0xfffff81488311160, v_mount = 0xfffff8019a27d000, v_nmntvnodes = {tqe_next =
0xfffff816894dc3b0, tqe_prev = 0xfffff80b0d4991f8}, v_un = {vu_mount = 0x0,
vu_socket = 0x0, vu_cdev = 0x0, vu_fifoinfo = 0x0}, v_hashlist = {le_next =
0xfffff80824c363b0, le_prev = 0xfffff81742127b48}, v_cache_src = {lh_first =
0x0}, v_cache_dst = {tqh_first = 0xfffff80e5f6a5af0,
tqh_last = 0xfffff80e5f6a5b10}, v_cache_dd = 0xfffff80e5f6a5af0, v_lock =
{lock_object = {lo_name = 0xffffffff8085c651 "ufs", lo_flags = 117112832,
lo_data = 0, lo_witness = 0x0}, lk_lock = 1, lk_exslpfail = 0, lk_timo = 51,
lk_pri = 96}, v_interlock = {lock_object = {lo_name = 0xffffffff80870687
"vnode interlock", lo_flags = 16973824, lo_data = 0, lo_witness = 0x0},
mtx_lock = 4}, v_vnlock = 0xfffff816fe4f35f0, v_actfreelist = {
tqe_next = 0xfffff80b0d4991d8, tqe_prev = 0xfffff816894dc470}, v_bufobj =
{bo_lock = {lock_object = {lo_name = 0xffffffff80870697 "bufobj interlock",
lo_flags = 86179840, lo_data = 0, lo_witness = 0x0}, rw_lock = 1},
bo_ops = 0xffffffff80afc070, bo_object = 0xfffff802f8423d00, bo_synclist =
{le_next = 0x0, le_prev = 0x0}, bo_private = 0xfffff816fe4f3588, __bo_vnode =
0xfffff816fe4f3588, bo_clean = {bv_hd = {tqh_first = 0x0,
tqh_last = 0xfffff816fe4f36a8}, bv_root = {pt_root = 0}, bv_cnt = 0},
bo_dirty = {bv_hd = {tqh_first = 0x0, tqh_last = 0xfffff816fe4f36c8}, bv_root =
{pt_root = 0}, bv_cnt = 0}, bo_numoutput = 0, bo_flag = 0, bo_bsize = 32768},
v_pollinfo = 0x0, v_label = 0x0, v_lockf = 0x0, v_rl = {rl_waiters =
{tqh_first = 0x0, tqh_last = 0xfffff816fe4f3710}, rl_currdep = 0x0}, v_cstart =
0, v_lasta = 0, v_lastw = 0, v_clen = 0, v_holdcnt = 8, v_usecount = 7,
v_iflag = 512, v_vflag = 0, v_writecount = 0, v_hash = 76467851, v_type =
VDIR}
(kgdb) p *(vp->v_bufobj.bo_object)
$2 = {lock = {lock_object = {lo_name = 0xffffffff8088aeb6 "vm object", lo_flags
= 90374144, lo_data = 0, lo_witness = 0x0}, rw_lock = 1}, object_list =
{tqe_next = 0xfffff802f8423e00, tqe_prev = 0xfffff802f8423c20}, shadow_head = {
lh_first = 0x0}, shadow_list = {le_next = 0x0, le_prev =
0xfffff801744afa30}, memq = {tqh_first = 0xfffff8183ea423d0, tqh_last =
0xfffff8183ea423e0}, rtree = {rt_root = 18446735381746688977, rt_flags = 0
'\0'}, size = 1,
generation = 1, ref_count = 0, shadow_count = 0, memattr = 6 '\006', type = 2
'\002', flags = 16392, pg_color = 1608, paging_in_progress = 0,
resident_page_count = 1, backing_object = 0x0, backing_object_offset = 0,
pager_object_list = {tqe_next = 0x0, tqe_prev = 0x0}, rvq = {lh_first = 0x0},
cache = {rt_root = 0, rt_flags = 0 '\0'}, handle = 0xfffff816fe4f3588, un_pager
= {vnp = {vnp_size = 512, writemappings = 0}, devp = {devp_pglist = {
tqh_first = 0x200, tqh_last = 0x0}, ops = 0x0, dev = 0x0}, sgp =
{sgp_pglist = {tqh_first = 0x200, tqh_last = 0x0}}, swp = {swp_tmpfs = 0x200,
swp_bcount = 0}}, cred = 0x0, charge = 0}
(kgdb) p *td
$3 = {td_lock = 0xffffffff80b9d4d0, td_proc = 0xfffff8025dd694f0, td_plist =
{tqe_next = 0xfffff801744ac4b0, tqe_prev = 0xfffff805374dd970}, td_runq =
{tqe_next = 0x0, tqe_prev = 0xffffffff80b728b0}, td_slpq = {
tqe_next = 0xfffff80389d11960, tqe_prev = 0xfffff80223243e80}, td_lockq =
{tqe_next = 0x0, tqe_prev = 0xfffffe00003849b8}, td_hash = {le_next = 0x0,
le_prev = 0xfffffe00007d7590}, td_cpuset = 0xfffff801100593a8,
td_sel = 0xfffff806ce36eb00, td_sleepqueue = 0x0, td_turnstile =
0xfffff80174d07e40, td_rlqe = 0xfffff8017477ba28, td_umtxq =
0xfffff803a5a44f00, td_tid = 102066, td_sigqueue = {sq_signals = {__bits = {0,
0, 0, 0}}, sq_kill = {
__bits = {0, 0, 0, 0}}, sq_list = {tqh_first = 0x0, tqh_last =
0xfffff803ec526a18}, sq_proc = 0xfffff8025dd694f0, sq_flags = 1},
td_lend_user_pri = 255 '˙', td_flags = 4, td_inhibitors = 2, td_pflags = 0,
td_dupfd = 0,
td_sqqueue = 0, td_wchan = 0xfffff802f8423d00, td_wmesg = 0xffffffff8088be0a
"vodead", td_lastcpu = 13 '\r', td_oncpu = 255 '˙', td_owepreempt = 0 '\0',
td_tsqueue = 0 '\0', td_locks = 26721, td_rw_rlocks = 0, td_lk_slocks = 0,
td_stopsched = 0, td_blocked = 0x0, td_lockname = 0x0, td_contested =
{lh_first = 0x0}, td_sleeplocks = 0x0, td_intr_nesting_level = 0, td_pinned =
0, td_ucred = 0xfffff8017438e800, td_estcpu = 0, td_slptick = 1266348642,
td_blktick = 0, td_swvoltick = 1266348642, td_cow = 0, td_ru = {ru_utime =
{tv_sec = 0, tv_usec = 0}, ru_stime = {tv_sec = 0, tv_usec = 0}, ru_maxrss =
7952, ru_ixrss = 21233268, ru_idrss = 1299996, ru_isrss = 13866624,
ru_minflt = 1, ru_majflt = 0, ru_nswap = 0, ru_inblock = 2105608,
ru_oublock = 0, ru_msgsnd = 1531599, ru_msgrcv = 100, ru_nsignals = 0, ru_nvcsw
= 1685809, ru_nivcsw = 9633}, td_rux = {rux_runtime = 2226241352945,
rux_uticks = 46227, rux_sticks = 62106, rux_iticks = 0, rux_uu = 365363300,
rux_su = 490865795, rux_tu = 856229096}, td_incruntime = 0, td_runtime =
2226241352945, td_pticks = 0, td_sticks = 0, td_iticks = 0, td_uticks = 0,
td_intrval = 0, td_oldsigmask = {__bits = {0, 0, 0, 0}}, td_generation =
1695442, td_sigstk = {ss_sp = 0x0, ss_size = 0, ss_flags = 0}, td_xsig = 0,
td_profil_addr = 0, td_profil_ticks = 0,
td_name = "bacula-fd\000\000\000\000\000\000\000\000\000\000", td_fpop = 0x0,
td_dbgflags = 0, td_dbgksi = {ksi_link = {tqe_next = 0x0, tqe_prev = 0x0},
ksi_info = {si_signo = 0, si_errno = 0, si_code = 0, si_pid = 0, si_uid = 0,
si_status = 0, si_addr = 0x0, si_value = {sival_int = 0, sival_ptr = 0x0,
sigval_int = 0, sigval_ptr = 0x0}, _reason = {_fault = {_trapno = 0}, _timer =
{_timerid = 0, _overrun = 0}, _mesgq = {_mqd = 0}, _poll = {_band = 0},
__spare__ = {__spare1__ = 0, __spare2__ = {0, 0, 0, 0, 0, 0, 0}}}},
ksi_flags = 0, ksi_sigq = 0x0}, td_ng_outbound = 0, td_osd = {osd_nslots = 0,
osd_slots = 0x0, osd_next = {le_next = 0x0, le_prev = 0x0}},
td_map_def_user = 0x0, td_dbg_forked = 0, td_vp_reserv = 0, td_no_sleeping =
0, td_dom_rr_idx = 0, td_sigmask = {__bits = {0, 0, 0, 0}}, td_rqindex = 48
'0', td_base_pri = 152 '\230', td_priority = 152 '\230',
td_pri_class = 3 '\003', td_user_pri = 183 'ˇ', td_base_user_pri = 183 'ˇ',
td_pcb = 0xfffffe17f2e63b80, td_state = TDS_INHIBITED, td_retval = {0,
1084620448}, td_slpcallout = {c_links = {le = {le_next = 0x0,
le_prev = 0xfffffe0000ad9288}, sle = {sle_next = 0x0}, tqe = {tqe_next
= 0x0, tqe_prev = 0xfffffe0000ad9288}}, c_time = 5430875852315327, c_precision
= 26843, c_arg = 0xfffff803ec526960,
c_func = 0xffffffff805c1370 <sleepq_timeout>, c_lock = 0x0, c_flags = 2,
c_iflags = 272, c_cpu = 11}, td_frame = 0xfffffe17f2e63ac0, td_kstack_obj =
0xfffff8076ec32a00, td_kstack = 18446741977545703424, td_kstack_pages = 4,
td_critnest = 1, td_md = {md_spinlock_count = 1, md_saved_flags = 582,
md_spurflt_addr = 1131679744}, td_sched = 0xfffff803ec526de0, td_ar = 0x0,
td_lprof = {{lh_first = 0x0}, {lh_first = 0x0}}, td_dtrace = 0x0, td_errno = 0,
td_vnet = 0x0, td_vnet_lpush = 0x0, td_intr_frame = 0x0, td_rfppwait_p =
0xfffff80616f0a000, td_ma = 0x0, td_ma_cnt = 0, td_su = 0x0, td_dbg_sc_code =
0, td_dbg_sc_narg = 0, td_emuldata = 0x0}
(kgdb) p *object
$4 = {lock = {lock_object = {lo_name = 0xffffffff8088aeb6 "vm object", lo_flags
= 90374144, lo_data = 0, lo_witness = 0x0}, rw_lock = 1}, object_list =
{tqe_next = 0xfffff802f8423e00, tqe_prev = 0xfffff802f8423c20}, shadow_head = {
lh_first = 0x0}, shadow_list = {le_next = 0x0, le_prev =
0xfffff801744afa30}, memq = {tqh_first = 0xfffff8183ea423d0, tqh_last =
0xfffff8183ea423e0}, rtree = {rt_root = 18446735381746688977, rt_flags = 0
'\0'}, size = 1,
generation = 1, ref_count = 0, shadow_count = 0, memattr = 6 '\006', type = 2
'\002', flags = 16392, pg_color = 1608, paging_in_progress = 0,
resident_page_count = 1, backing_object = 0x0, backing_object_offset = 0,
pager_object_list = {tqe_next = 0x0, tqe_prev = 0x0}, rvq = {lh_first = 0x0},
cache = {rt_root = 0, rt_flags = 0 '\0'}, handle = 0xfffff816fe4f3588, un_pager
= {vnp = {vnp_size = 512, writemappings = 0}, devp = {devp_pglist = {
tqh_first = 0x200, tqh_last = 0x0}, ops = 0x0, dev = 0x0}, sgp =
{sgp_pglist = {tqh_first = 0x200, tqh_last = 0x0}}, swp = {swp_tmpfs = 0x200,
swp_bcount = 0}}, cred = 0x0, charge = 0}
--
--- Comment #20 from Konstantin Belousov <k...@FreeBSD.org> ---
(In reply to Rudolf Čejka from comment #19)
From your report, I am almost certain (with ~95% level) that this issue, and
the issue reported in PR204426 are the same. I would follow on that PR since
the reporter seems to have low time to reproduce, and more involved debugging
patches are already in flight there.
ro...@qsp.nl changed:
What |Removed |Added
----------------------------------------------------------------------------
CC| |ro...@qsp.nl
--- Comment #22 from ro...@qsp.nl ---
And another +1. Deadlock occurs on a raidz1 pool with 4 1.2TB members. This
system is running on r294572. The issue started after approximately 59 days of
production use with an average load of about 150 io transactions per second.
The deadlock is on a directory, which is located on a +-600GB ZFS dataset and
contains 7 files and two subdirs, a few kb total.
procstat -kk for a stuck du process:
PID TID COMM TDNAME KSTACK
97300 101745 du - mi_switch+0xe1 sleepq_wait+0x3a
_sleep+0x287 vnode_create_vobject+0x100 zfs_freebsd_open+0xf5 VOP_OPEN_APV+0xa1
vn_open_vnode+0x234 vn_open_cred+0x36a kern_openat+0x26f amd64_syscall+0x40f
Xfast_syscall+0xfb
And here is an rsync that is stuck:
25689 101762 rsync-backup - mi_switch+0xe1 sleepq_wait+0x3a
_sleep+0x287 vnode_create_vobject+0x100 zfs_freebsd_open+0xf5 VOP_OPEN_APV+0xa1
vn_open_vnode+0x234 vn_open_cred+0x36a kern_openat+0x26f amd64_syscall+0x40f
Xfast_syscall+0xfb
This filesystem is exported through nfs. We have a few nfsd threads stuck as
well:
root@nfs01:/home/roel # procstat -kk 857
PID TID COMM TDNAME KSTACK
857 100485 nfsd nfsd: master mi_switch+0xe1
sleepq_catch_signals+0xab sleepq_timedwait_sig+0x10 _cv_timedwait_sig_sbt+0x18b
svc_run_internal+0x46d svc_run+0x182 nfsrvd_nfsd+0x242 nfssvc_nfsd+0x107
sys_nfssvc+0x9c amd64_syscall+0x40f Xfast_syscall+0xfb
857 100505 nfsd nfsd: service mi_switch+0xe1
sleepq_catch_signals+0xab sleepq_wait_sig+0xf _cv_wait_sig+0x16a
svc_run_internal+0x86e svc_thread_start+0xb fork_exit+0x9a fork_trampoline+0xe
... lots of similar threads, except for: ...
857 100541 nfsd nfsd: service mi_switch+0xe1 sleepq_wait+0x3a
_sleep+0x287 vnode_create_vobject+0x100 zfs_fhtovp+0x45b nfsvno_fhtovp+0x7c
nfsd_fhtovp+0xc8 nfsrvd_dorpc+0x12c nfssvc_program+0x54e svc_run_internal+0xd2b
svc_thread_start+0xb fork_exit+0x9a fork_trampoline+0xe
857 100588 nfsd nfsd: service mi_switch+0xe1 sleepq_wait+0x3a
_sleep+0x287 vnode_create_vobject+0x100 zfs_fhtovp+0x45b nfsvno_fhtovp+0x7c
nfsd_fhtovp+0xc8 nfsrvd_dorpc+0x12c nfssvc_program+0x54e svc_run_internal+0xd2b
svc_thread_start+0xb fork_exit+0x9a fork_trampoline+0xe
Some kgdb output for one of the stuck du threads:
[Switching to thread 555 (Thread 101720)]#0 sched_switch
(td=0xfffff8033a17e000, newtd=<value optimized out>, flags=<value optimized
out>) at /usr/src/sys/kern/sched_ule.c:1945
1945 cpuid = PCPU_GET(cpuid);
(kgdb) bt
#0 sched_switch (td=0xfffff8033a17e000, newtd=<value optimized out>,
flags=<value optimized out>) at /usr/src/sys/kern/sched_ule.c:1945
#1 0xffffffff80959ff1 in mi_switch (flags=260, newtd=0x0) at
/usr/src/sys/kern/kern_synch.c:491
#2 0xffffffff8099b31a in sleepq_wait (wchan=0x0, pri=0) at
/usr/src/sys/kern/subr_sleepqueue.c:617
#3 0xffffffff80959a17 in _sleep (ident=<value optimized out>, lock=<value
optimized out>, priority=<value optimized out>, wmesg=<value optimized out>,
sbt=<value optimized out>, pr=<value optimized out>,
flags=<value optimized out>) at /usr/src/sys/kern/kern_synch.c:255
#4 0xffffffff80bed620 in vnode_create_vobject (vp=0xfffff801d3e11b10,
isize=16, td=0xfffff8033a17e000) at /usr/src/sys/vm/vnode_pager.c:120
#5 0xffffffff81a7d5f5 in zfs_freebsd_open (ap=0xfffffe0466890658) at
/usr/src/sys/modules/zfs/../../cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c:6076
#6 0xffffffff80e7e651 in VOP_OPEN_APV (vop=<value optimized out>, a=<value
optimized out>) at vnode_if.c:469
#7 0xffffffff80a05694 in vn_open_vnode (vp=0xfffff801d3e11b10, fmode=1179653,
cred=0xfffff803682bd400, td=0xfffff8033a17e000, fp=0xfffff80240d403c0) at
vnode_if.h:196
#8 0xffffffff80a0528a in vn_open_cred (ndp=0xfffffe0466890870,
flagp=0xfffffe046689094c, cmode=<value optimized out>, vn_open_flags=<value
optimized out>, cred=0x0, fp=0xfffff80240d403c0)
at /usr/src/sys/kern/vfs_vnops.c:268
#9 0xffffffff809fe44f in kern_openat (td=0xfffff8033a17e000, fd=-100,
path=0x801422d48 <Error reading address 0x801422d48: Bad address>,
pathseg=UIO_USERSPACE, flags=1179653, mode=<value optimized out>)
at /usr/src/sys/kern/vfs_syscalls.c:1093
#10 0xffffffff80d55aff in amd64_syscall (td=0xfffff8033a17e000, traced=0) at
subr_syscall.c:141
#11 0xffffffff80d3ad7b in Xfast_syscall () at
/usr/src/sys/amd64/amd64/exception.S:396
#12 0x0000000800b7570a in ?? ()
Previous frame inner to this frame (corrupt stack?)
(kgdb) frame 4
#4 0xffffffff80bed620 in vnode_create_vobject (vp=0xfffff801d3e11b10,
isize=16, td=0xfffff8033a17e000) at /usr/src/sys/vm/vnode_pager.c:120
120 VM_OBJECT_SLEEP(object, object, PDROP | PVM, "vodead",
0);
(kgdb) print vp
$4 = (struct vnode *) 0xfffff801d3e11b10
(kgdb) print *vp
$5 = {v_tag = 0xffffffff81aecb3b "zfs", v_op = 0xffffffff81aff790, v_data =
0xfffff801a2d1ca10, v_mount = 0xfffff8011a46e660, v_nmntvnodes = {tqe_next =
0xfffff801a53281d8, tqe_prev = 0xfffff801a6b8e5a8}, v_un = {
vu_mount = 0x0, vu_socket = 0x0, vu_cdev = 0x0, vu_fifoinfo = 0x0},
v_hashlist = {le_next = 0x0, le_prev = 0x0}, v_cache_src = {lh_first =
0xfffff80019708850}, v_cache_dst = {tqh_first = 0xfffff80257063850,
tqh_last = 0xfffff80257063870}, v_cache_dd = 0xfffff80257063850, v_lock =
{lock_object = {lo_name = 0xffffffff81aecb3b "zfs", lo_flags = 117112832,
lo_data = 0, lo_witness = 0x0}, lk_lock = 1, lk_exslpfail = 0,
lk_timo = 51, lk_pri = 96}, v_interlock = {lock_object = {lo_name =
0xffffffff80fe1a6c "vnode interlock", lo_flags = 16973824, lo_data = 0,
lo_witness = 0x0}, mtx_lock = 4}, v_vnlock = 0xfffff801d3e11b78,
v_actfreelist = {tqe_next = 0xfffff801a6b8e588, tqe_prev =
0xfffff801a5328298}, v_bufobj = {bo_lock = {lock_object = {lo_name =
0xffffffff80fe1a7c "bufobj interlock", lo_flags = 86179840, lo_data = 0,
lo_witness = 0x0},
rw_lock = 1}, bo_ops = 0xffffffff814b3900, bo_object =
0xfffff801fd0ea100, bo_synclist = {le_next = 0x0, le_prev = 0x0}, bo_private =
0xfffff801d3e11b10, __bo_vnode = 0xfffff801d3e11b10, bo_clean = {bv_hd = {
tqh_first = 0x0, tqh_last = 0xfffff801d3e11c30}, bv_root = {pt_root =
0}, bv_cnt = 0}, bo_dirty = {bv_hd = {tqh_first = 0x0, tqh_last =
0xfffff801d3e11c50}, bv_root = {pt_root = 0}, bv_cnt = 0}, bo_numoutput = 0,
bo_flag = 0, bo_bsize = 131072}, v_pollinfo = 0x0, v_label = 0x0, v_lockf =
0x0, v_rl = {rl_waiters = {tqh_first = 0x0, tqh_last = 0xfffff801d3e11c98},
rl_currdep = 0x0}, v_cstart = 0, v_lasta = 0, v_lastw = 0,
v_clen = 0, v_holdcnt = 127, v_usecount = 126, v_iflag = 512, v_vflag = 0,
v_writecount = 0, v_hash = 7955980, v_type = VDIR}
(kgdb) print *td
$6 = {td_lock = 0xffffffff81619670, td_proc = 0xfffff8006233f4f0, td_plist =
{tqe_next = 0x0, tqe_prev = 0xfffff8006233f500}, td_runq = {tqe_next = 0x0,
tqe_prev = 0xffffffff815e3998}, td_slpq = {
tqe_next = 0xfffff803f7d36960, tqe_prev = 0xfffff802836ed030}, td_lockq =
{tqe_next = 0x0, tqe_prev = 0xfffffe03e44e7908}, td_hash = {le_next = 0x0,
le_prev = 0xfffffe000087aac0}, td_cpuset = 0xfffff800083cd3a8,
td_sel = 0xfffff80039695e00, td_sleepqueue = 0x0, td_turnstile =
0xfffff8015b0cacc0, td_rlqe = 0xfffff80117ffa4d8, td_umtxq =
0xfffff802d770a780, td_tid = 101720, td_sigqueue = {sq_signals = {__bits = {0,
0, 0, 0}},
sq_kill = {__bits = {0, 0, 0, 0}}, sq_list = {tqh_first = 0x0, tqh_last =
0xfffff8033a17e0b8}, sq_proc = 0xfffff8006233f4f0, sq_flags = 1},
td_lend_user_pri = 255 'ÿ', td_flags = 133124, td_inhibitors = 2,
td_pflags = 0, td_dupfd = -1, td_sqqueue = 0, td_wchan = 0xfffff801fd0ea100,
td_wmesg = 0xffffffff81006e8d "vodead", td_lastcpu = 1 '\001', td_oncpu = 255
'ÿ', td_owepreempt = 0 '\0', td_tsqueue = 0 '\0',
td_locks = 4348, td_rw_rlocks = 0, td_lk_slocks = 0, td_stopsched = 0,
td_blocked = 0x0, td_lockname = 0x0, td_contested = {lh_first = 0x0},
td_sleeplocks = 0x0, td_intr_nesting_level = 0, td_pinned = 0,
td_ucred = 0xfffff803682bd400, td_estcpu = 0, td_slptick = 444244584,
td_blktick = 0, td_swvoltick = 444244584, td_cow = 39, td_ru = {ru_utime =
{tv_sec = 0, tv_usec = 0}, ru_stime = {tv_sec = 0, tv_usec = 0},
ru_maxrss = 1656, ru_ixrss = 192, ru_idrss = 64, ru_isrss = 2048, ru_minflt
= 119, ru_majflt = 0, ru_nswap = 0, ru_inblock = 749, ru_oublock = 0, ru_msgsnd
= 0, ru_msgrcv = 0, ru_nsignals = 0, ru_nvcsw = 739,
ru_nivcsw = 2}, td_rux = {rux_runtime = 514120625, rux_uticks = 1,
rux_sticks = 15, rux_iticks = 0, rux_uu = 12852, rux_su = 192791, rux_tu =
205644}, td_incruntime = 0, td_runtime = 514120625, td_pticks = 0,
td_sticks = 0, td_iticks = 0, td_uticks = 0, td_intrval = 0, td_oldsigmask =
{__bits = {0, 0, 0, 0}}, td_generation = 741, td_sigstk = {ss_sp = 0x0, ss_size
= 0, ss_flags = 4}, td_xsig = 0, td_profil_addr = 0,
td_profil_ticks = 0, td_name = "du", '\0' <repeats 17 times>, td_fpop = 0x0,
td_dbgflags = 0, td_dbgksi = {ksi_link = {tqe_next = 0x0, tqe_prev = 0x0},
ksi_info = {si_signo = 0, si_errno = 0, si_code = 0, si_pid = 0,
si_uid = 0, si_status = 0, si_addr = 0x0, si_value = {sival_int = 0,
sival_ptr = 0x0, sigval_int = 0, sigval_ptr = 0x0}, _reason = {_fault =
{_trapno = 0}, _timer = {_timerid = 0, _overrun = 0}, _mesgq = {_mqd = 0},
_poll = {_band = 0}, __spare__ = {__spare1__ = 0, __spare2__ = {0, 0,
0, 0, 0, 0, 0}}}}, ksi_flags = 0, ksi_sigq = 0x0}, td_ng_outbound = 0, td_osd =
{osd_nslots = 0, osd_slots = 0x0, osd_next = {le_next = 0x0,
le_prev = 0x0}}, td_map_def_user = 0x0, td_dbg_forked = 0, td_vp_reserv =
0, td_no_sleeping = 0, td_dom_rr_idx = 0, td_sigmask = {__bits = {0, 0, 0, 0}},
td_rqindex = 31 '\037', td_base_pri = 120 'x',
td_priority = 120 'x', td_pri_class = 3 '\003', td_user_pri = 126 '~',
td_base_user_pri = 126 '~', td_pcb = 0xfffffe0466890b80, td_state =
TDS_INHIBITED, td_retval = {0, 1}, td_slpcallout = {c_links = {le = {
le_next = 0x0, le_prev = 0xfffffe00008d0a10}, sle = {sle_next = 0x0},
tqe = {tqe_next = 0x0, tqe_prev = 0xfffffe00008d0a10}}, c_time =
20354765951718809, c_precision = 268435437, c_arg = 0xfffff8033a17e000,
c_func = 0xffffffff8099b0d0 <sleepq_timeout>, c_lock = 0x0, c_flags = 0,
c_iflags = 272, c_cpu = 2}, td_frame = 0xfffffe0466890ac0, td_kstack_obj =
0xfffff803af9b7200, td_kstack = 18446741893586407424,
td_kstack_pages = 4, td_critnest = 1, td_md = {md_spinlock_count = 1,
md_saved_flags = 582, md_spurflt_addr = 0}, td_sched = 0xfffff8033a17e480,
td_ar = 0x0, td_lprof = {{lh_first = 0x0}, {lh_first = 0x0}},
td_dtrace = 0xfffff8037c56e700, td_errno = 0, td_vnet = 0x0, td_vnet_lpush =
0x0, td_intr_frame = 0x0, td_rfppwait_p = 0xfffff802656e09e0, td_ma = 0x0,
td_ma_cnt = 0, td_su = 0x0, td_dbg_sc_code = 0, td_dbg_sc_narg = 0,
td_emuldata = 0x0}
And another stuck du process:
(kgdb) thread 795
[Switching to thread 795 (Thread 101745)]#0 sched_switch
(td=0xfffff8006f2e84b0, newtd=<value optimized out>, flags=<value optimized
out>) at /usr/src/sys/kern/sched_ule.c:1945
1945 cpuid = PCPU_GET(cpuid);
(kgdb) bt
#0 sched_switch (td=0xfffff8006f2e84b0, newtd=<value optimized out>,
flags=<value optimized out>) at /usr/src/sys/kern/sched_ule.c:1945
#1 0xffffffff80959ff1 in mi_switch (flags=260, newtd=0x0) at
/usr/src/sys/kern/kern_synch.c:491
#2 0xffffffff8099b31a in sleepq_wait (wchan=0x0, pri=0) at
/usr/src/sys/kern/subr_sleepqueue.c:617
#3 0xffffffff80959a17 in _sleep (ident=<value optimized out>, lock=<value
optimized out>, priority=<value optimized out>, wmesg=<value optimized out>,
sbt=<value optimized out>, pr=<value optimized out>,
flags=<value optimized out>) at /usr/src/sys/kern/kern_synch.c:255
#4 0xffffffff80bed620 in vnode_create_vobject (vp=0xfffff801d3e11b10,
isize=16, td=0xfffff8006f2e84b0) at /usr/src/sys/vm/vnode_pager.c:120
#5 0xffffffff81a7d5f5 in zfs_freebsd_open (ap=0xfffffe046690d658) at
/usr/src/sys/modules/zfs/../../cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c:6076
#6 0xffffffff80e7e651 in VOP_OPEN_APV (vop=<value optimized out>, a=<value
optimized out>) at vnode_if.c:469
#7 0xffffffff80a05694 in vn_open_vnode (vp=0xfffff801d3e11b10, fmode=1179653,
cred=0xfffff802f6e2f000, td=0xfffff8006f2e84b0, fp=0xfffff80104fcc5a0) at
vnode_if.h:196
#8 0xffffffff80a0528a in vn_open_cred (ndp=0xfffffe046690d870,
flagp=0xfffffe046690d94c, cmode=<value optimized out>, vn_open_flags=<value
optimized out>, cred=0x0, fp=0xfffff80104fcc5a0)
at /usr/src/sys/kern/vfs_vnops.c:268
#9 0xffffffff809fe44f in kern_openat (td=0xfffff8006f2e84b0, fd=-100,
path=0x801422d48 <Error reading address 0x801422d48: Bad address>,
pathseg=UIO_USERSPACE, flags=1179653, mode=<value optimized out>)
at /usr/src/sys/kern/vfs_syscalls.c:1093
#10 0xffffffff80d55aff in amd64_syscall (td=0xfffff8006f2e84b0, traced=0) at
subr_syscall.c:141
#11 0xffffffff80d3ad7b in Xfast_syscall () at
/usr/src/sys/amd64/amd64/exception.S:396
#12 0x0000000800b7570a in ?? ()
Previous frame inner to this frame (corrupt stack?)
(kgdb) frame 4
#4 0xffffffff80bed620 in vnode_create_vobject (vp=0xfffff801d3e11b10,
isize=16, td=0xfffff8006f2e84b0) at /usr/src/sys/vm/vnode_pager.c:120
120 VM_OBJECT_SLEEP(object, object, PDROP | PVM, "vodead",
0);
(kgdb) p *vp
$7 = {v_tag = 0xffffffff81aecb3b "zfs", v_op = 0xffffffff81aff790, v_data =
0xfffff801a2d1ca10, v_mount = 0xfffff8011a46e660, v_nmntvnodes = {tqe_next =
0xfffff801a53281d8, tqe_prev = 0xfffff801a6b8e5a8}, v_un = {
vu_mount = 0x0, vu_socket = 0x0, vu_cdev = 0x0, vu_fifoinfo = 0x0},
v_hashlist = {le_next = 0x0, le_prev = 0x0}, v_cache_src = {lh_first =
0xfffff80019708850}, v_cache_dst = {tqh_first = 0xfffff80257063850,
tqh_last = 0xfffff80257063870}, v_cache_dd = 0xfffff80257063850, v_lock =
{lock_object = {lo_name = 0xffffffff81aecb3b "zfs", lo_flags = 117112832,
lo_data = 0, lo_witness = 0x0}, lk_lock = 1, lk_exslpfail = 0,
lk_timo = 51, lk_pri = 96}, v_interlock = {lock_object = {lo_name =
0xffffffff80fe1a6c "vnode interlock", lo_flags = 16973824, lo_data = 0,
lo_witness = 0x0}, mtx_lock = 4}, v_vnlock = 0xfffff801d3e11b78,
v_actfreelist = {tqe_next = 0xfffff801a6b8e588, tqe_prev =
0xfffff801a5328298}, v_bufobj = {bo_lock = {lock_object = {lo_name =
0xffffffff80fe1a7c "bufobj interlock", lo_flags = 86179840, lo_data = 0,
lo_witness = 0x0},
rw_lock = 1}, bo_ops = 0xffffffff814b3900, bo_object =
0xfffff801fd0ea100, bo_synclist = {le_next = 0x0, le_prev = 0x0}, bo_private =
0xfffff801d3e11b10, __bo_vnode = 0xfffff801d3e11b10, bo_clean = {bv_hd = {
tqh_first = 0x0, tqh_last = 0xfffff801d3e11c30}, bv_root = {pt_root =
0}, bv_cnt = 0}, bo_dirty = {bv_hd = {tqh_first = 0x0, tqh_last =
0xfffff801d3e11c50}, bv_root = {pt_root = 0}, bv_cnt = 0}, bo_numoutput = 0,
bo_flag = 0, bo_bsize = 131072}, v_pollinfo = 0x0, v_label = 0x0, v_lockf =
0x0, v_rl = {rl_waiters = {tqh_first = 0x0, tqh_last = 0xfffff801d3e11c98},
rl_currdep = 0x0}, v_cstart = 0, v_lasta = 0, v_lastw = 0,
v_clen = 0, v_holdcnt = 127, v_usecount = 126, v_iflag = 512, v_vflag = 0,
v_writecount = 0, v_hash = 7955980, v_type = VDIR}
(kgdb) p *td
$8 = {td_lock = 0xffffffff81619670, td_proc = 0xfffff80009eb59e0, td_plist =
{tqe_next = 0x0, tqe_prev = 0xfffff80009eb59f0}, td_runq = {tqe_next = 0x0,
tqe_prev = 0xffffffff815e87a8}, td_slpq = {
tqe_next = 0xfffff803cfb27960, tqe_prev = 0xfffff8006f2e8990}, td_lockq =
{tqe_next = 0x0, tqe_prev = 0xfffffe04662f90c8}, td_hash = {le_next = 0x0,
le_prev = 0xfffffe000087ab88}, td_cpuset = 0xfffff800083cd3a8,
td_sel = 0xfffff80290088500, td_sleepqueue = 0x0, td_turnstile =
0xfffff800b09c4840, td_rlqe = 0xfffff8015a73a910, td_umtxq =
0xfffff8012d91b280, td_tid = 101745, td_sigqueue = {sq_signals = {__bits = {0,
0, 0, 0}},
sq_kill = {__bits = {0, 0, 0, 0}}, sq_list = {tqh_first = 0x0, tqh_last =
0xfffff8006f2e8568}, sq_proc = 0xfffff80009eb59e0, sq_flags = 1},
td_lend_user_pri = 255 'ÿ', td_flags = 133124, td_inhibitors = 2,
td_pflags = 0, td_dupfd = -1, td_sqqueue = 0, td_wchan = 0xfffff801fd0ea100,
td_wmesg = 0xffffffff81006e8d "vodead", td_lastcpu = 7 '\a', td_oncpu = 255
'ÿ', td_owepreempt = 0 '\0', td_tsqueue = 0 '\0', td_locks = 3768,
td_rw_rlocks = 0, td_lk_slocks = 0, td_stopsched = 0, td_blocked = 0x0,
td_lockname = 0x0, td_contested = {lh_first = 0x0}, td_sleeplocks = 0x0,
td_intr_nesting_level = 0, td_pinned = 0, td_ucred = 0xfffff802f6e2f000,
td_estcpu = 0, td_slptick = 729400807, td_blktick = 0, td_swvoltick =
729400807, td_cow = 41, td_ru = {ru_utime = {tv_sec = 0, tv_usec = 0}, ru_stime
= {tv_sec = 0, tv_usec = 0}, ru_maxrss = 1696, ru_ixrss = 312,
ru_idrss = 104, ru_isrss = 3328, ru_minflt = 121, ru_majflt = 0, ru_nswap =
0, ru_inblock = 840, ru_oublock = 0, ru_msgsnd = 0, ru_msgrcv = 0, ru_nsignals
= 0, ru_nvcsw = 795, ru_nivcsw = 0}, td_rux = {
rux_runtime = 547891773, rux_uticks = 1, rux_sticks = 25, rux_iticks = 0,
rux_uu = 8428, rux_su = 210723, rux_tu = 219152}, td_incruntime = 0, td_runtime
= 547891773, td_pticks = 0, td_sticks = 0, td_iticks = 0,
td_uticks = 0, td_intrval = 0, td_oldsigmask = {__bits = {0, 0, 0, 0}},
td_generation = 795, td_sigstk = {ss_sp = 0x0, ss_size = 0, ss_flags = 4},
td_xsig = 0, td_profil_addr = 0, td_profil_ticks = 0,
td_name = "du", '\0' <repeats 17 times>, td_fpop = 0x0, td_dbgflags = 0,
td_dbgksi = {ksi_link = {tqe_next = 0x0, tqe_prev = 0x0}, ksi_info = {si_signo
= 0, si_errno = 0, si_code = 0, si_pid = 0, si_uid = 0,
si_status = 0, si_addr = 0x0, si_value = {sival_int = 0, sival_ptr = 0x0,
sigval_int = 0, sigval_ptr = 0x0}, _reason = {_fault = {_trapno = 0}, _timer =
{_timerid = 0, _overrun = 0}, _mesgq = {_mqd = 0}, _poll = {
_band = 0}, __spare__ = {__spare1__ = 0, __spare2__ = {0, 0, 0, 0, 0,
0, 0}}}}, ksi_flags = 0, ksi_sigq = 0x0}, td_ng_outbound = 0, td_osd =
{osd_nslots = 0, osd_slots = 0x0, osd_next = {le_next = 0x0,
le_prev = 0x0}}, td_map_def_user = 0x0, td_dbg_forked = 0, td_vp_reserv =
0, td_no_sleeping = 0, td_dom_rr_idx = 0, td_sigmask = {__bits = {0, 0, 0, 0}},
td_rqindex = 32 ' ', td_base_pri = 120 'x',
td_priority = 120 'x', td_pri_class = 3 '\003', td_user_pri = 130 '\202',
td_base_user_pri = 130 '\202', td_pcb = 0xfffffe046690db80, td_state =
TDS_INHIBITED, td_retval = {0, 1}, td_slpcallout = {c_links = {le = {
le_next = 0x0, le_prev = 0xfffffe0000ac29b0}, sle = {sle_next = 0x0},
tqe = {tqe_next = 0x0, tqe_prev = 0xfffffe0000ac29b0}}, c_time =
21579501454196634, c_precision = 268435437, c_arg = 0xfffff8006f2e84b0,
c_func = 0xffffffff8099b0d0 <sleepq_timeout>, c_lock = 0x0, c_flags = 0,
c_iflags = 272, c_cpu = 9}, td_frame = 0xfffffe046690dac0, td_kstack_obj =
0xfffff80198fc0e00, td_kstack = 18446741893586919424,
td_kstack_pages = 4, td_critnest = 1, td_md = {md_spinlock_count = 1,
md_saved_flags = 582, md_spurflt_addr = 0}, td_sched = 0xfffff8006f2e8930,
td_ar = 0x0, td_lprof = {{lh_first = 0x0}, {lh_first = 0x0}},
td_dtrace = 0xfffff802a4f1fc00, td_errno = 0, td_vnet = 0x0, td_vnet_lpush =
0x0, td_intr_frame = 0x0, td_rfppwait_p = 0xfffff803cb6dd4f0, td_ma = 0x0,
td_ma_cnt = 0, td_su = 0x0, td_dbg_sc_code = 0, td_dbg_sc_narg = 0,
td_emuldata = 0x0}
We are replicating this filesystem using incremental ZFS snapshots onto a
secondary host (using zrep). I can mount the replicated fs and read the file
there without issue.
I am currently compiling the latest -STABLE kernel and am scheduling a reboot
tonight.