#0 sched::thread::switch_to (this=0xffff8000009c8040, this@entry=0xffff80007fee1040) at arch/x64/arch-switch.hh:108#1 0x000000004040c57a in sched::cpu::reschedule_from_interrupt (this=0xffff80000001f040, called_from_yield=called_from_yield@entry=false, preempt_after=..., preempt_after@entry=...) at core/sched.cc:339#2 0x000000004040d2e8 in sched::cpu::schedule () at include/osv/sched.hh:1316#3 0x000000004040d406 in sched::thread::wait (this=this@entry=0xffff800003fa8040) at core/sched.cc:1216#4 0x000000004043a856 in sched::thread::do_wait_for<lockfree::mutex, sched::wait_object<waitqueue> > (mtx=...) at include/osv/mutex.h:41#5 sched::thread::wait_for<waitqueue&> (mtx=...) at include/osv/sched.hh:1226#6 waitqueue::wait (this=this@entry=0x408f04d0 <mmu::vma_list_mutex+48>, mtx=...) at core/waitqueue.cc:56#7 0x00000000403ea41b in rwlock::reader_wait_lockable (this=<optimized out>) at core/rwlock.cc:174#8 rwlock::rlock (this=this@entry=0x408f04a0 <mmu::vma_list_mutex>) at core/rwlock.cc:29#9 0x000000004034cbac in rwlock_for_read::lock (this=0x408f04a0 <mmu::vma_list_mutex>) at include/osv/rwlock.h:113#10 std::lock_guard<rwlock_for_read&>::lock_guard (__m=..., this=<synthetic pointer>) at /usr/include/c++/9/bits/std_mutex.h:159#11 lock_guard_for_with_lock<rwlock_for_read&>::lock_guard_for_with_lock (lock=..., this=<synthetic pointer>) at include/osv/mutex.h:89#12 mmu::vm_fault (addr=35184666537984, addr@entry=35184666541728, ef=ef@entry=0xffff800003fad068) at core/mmu.cc:1333#13 0x00000000403ad539 in page_fault (ef=0xffff800003fad068) at arch/x64/mmu.cc:42#14 <signal handler called>#15 arch::ensure_next_stack_page () at arch/x64/arch.hh:37#16 sched::preempt_disable () at include/osv/sched.hh:1008#17 preempt_lock_t::lock (this=<optimized out>) at include/osv/preempt-lock.hh:15#18 std::lock_guard<preempt_lock_t>::lock_guard (__m=..., this=<synthetic pointer>) at /usr/include/c++/9/bits/std_mutex.h:159#19 lock_guard_for_with_lock<preempt_lock_t>::lock_guard_for_with_lock (lock=..., this=<synthetic pointer>) at include/osv/mutex.h:89#20 memory::pool::alloc (this=0x40907608 <memory::malloc_pools+168>) at core/mempool.cc:214#21 0x00000000403f936f in std_malloc (size=80, alignment=16) at core/mempool.cc:1679#22 0x00000000403f97db in malloc (size=80) at core/mempool.cc:1887#23 0x00000000404c0089 in operator new(unsigned long) ()#24 0x000000004034c9d2 in mmu::anon_vma::split (this=0xffffa00001f69e80, edge=4136108032) at include/osv/addr_range.hh:16#25 0x000000004034ef93 in mmu::evacuate (start=<optimized out>, end=<optimized out>) at /usr/include/boost/move/detail/meta_utils.hpp:267#26 0x000000004035007c in mmu::allocate (v=v@entry=0xffffa000050e4600, start=start@entry=4127195136, size=size@entry=8912896, search=search@entry=false) at core/mmu.cc:1116#27 0x0000000040350e87 in mmu::map_anon (addr=addr@entry=0xf6000000, size=size@entry=8912896, flags=flags@entry=1, perm=perm@entry=3) at core/mmu.cc:1219#28 0x000000004047d9f0 in mmap (addr=0xf6000000, length=8912896, prot=<optimized out>, flags=<optimized out>, fd=<optimized out>, offset=0) at libc/mman.cc:152#29 0x0000100000f2dcef in os::Linux::commit_memory_impl(char*, unsigned long, bool) ()#30 0x0000100000f2dfe9 in os::pd_commit_memory(char*, unsigned long, unsigned long, bool) ()#31 0x0000100000f22a4a in os::commit_memory(char*, unsigned long, unsigned long, bool) ()#32 0x0000100000f956db in PSVirtualSpace::expand_by(unsigned long) ()#33 0x0000100000f96998 in PSYoungGen::resize_generation(unsigned long, unsigned long) ()#34 0x0000100000f95ad2 in PSYoungGen::resize(unsigned long, unsigned long) ()#35 0x0000100000f92e9f in PSScavenge::invoke_no_policy() ()#36 0x0000100000f93673 in PSScavenge::invoke() ()#37 0x0000100000f4de5d in ParallelScavengeHeap::failed_mem_allocate(unsigned long) ()#38 0x00001000010cfd97 in VM_ParallelGCFailedAllocation::doit() ()#39 0x00001000010d7572 in VM_Operation::evaluate() ()#40 0x00001000010d526b in VMThread::evaluate_operation(VM_Operation*) ()#41 0x00001000010d65ab in VMThread::loop() ()#42 0x00001000010d6a13 in VMThread::run() ()#43 0x0000100000f2e152 in java_start(Thread*) ()#44 0x00000000404773ba in pthread_private::pthread::<lambda()>::operator() (__closure=0xffffa00002ddbd00) at libc/pthread.cc:115#45 std::_Function_handler<void(), pthread_private::pthread::pthread(void* (*)(void*), void*, sigset_t, const pthread_private::thread_attr*)::<lambda()> >::_M_invoke(const std::_Any_data &) (__functor=...) at /usr/include/c++/9/bits/std_function.h:300#46 0x000000004040da1c in sched::thread_main_c (t=0xffff800003fa8040) at arch/x64/arch-switch.hh:326#47 0x00000000403ad2f3 in thread_main () at arch/x64/entry.S:113--- a/libc/mman.cc+++ b/libc/mman.cc@@ -154,6 +149,9 @@ void *mmap(void *addr, size_t length, int prot, int flags,
} } try {+ char i;+ asm volatile("movb -4096(%%rsp), %0" : "=r"(i));+ asm volatile("movb -8096(%%rsp), %0" : "=r"(i)); ret = mmu::map_anon(addr, length, mmap_flags, mmap_perm); } catch (error& err) { err.to_libc(); // sets errnoThe first bug is not really a reason for the deadlock you see above - frame 27 core/mmu.cc:1219 and frame 12 core/mmu.cc:1333. I think we have a situation when we have ongoing memory allocation (see memory::pool::alloc) which then requires disabling preemption which triggers ensure_next_stack_page() trigger a fault that tries to allocate memory and ends up trying to acquire same lock - mmu::vma_list_mutex.I am not sure what is the right way to fix it. One way could be to prevent this deadlock could be accomplished by preventing the fault. I experimented a bit and adding these 3 lines to mmap() to trigger earlier fault to ensure 2 pages of stack made the deadlock go away:--- a/libc/mman.cc+++ b/libc/mman.cc@@ -154,6 +149,9 @@ void *mmap(void *addr, size_t length, int prot, int flags,}}try {+ char i;+ asm volatile("movb -4096(%%rsp), %0" : "=r"(i));+ asm volatile("movb -8096(%%rsp), %0" : "=r"(i));ret = mmu::map_anon(addr, length, mmap_flags, mmap_perm);} catch (error& err) {err.to_libc(); // sets errnoNow he most troubling question is whether it an example of the situation where memory allocation causes a page fault by ensure_next_stack_page() on disabling preemption or interrupts and we just need to pre-fault deeper in all these cases (hopefully not to many) or there are many other types of scenarios we have not foreseen?
I am not sure if at this point this patch still applies to the source tree at this point, but could you please try to see if you can review it as is? Otherwise I will revise it with the latest master.
Waldek