sched::thread::switch_to (this=this@entry=0xffff8000001ba040) at arch/x64/arch-switch.hh:108#1 0x00000000403ff794 in sched::cpu::reschedule_from_interrupt (this=0xffff80000001d040, called_from_yield=called_from_yield@entry=false, preempt_after=..., preempt_after@entry=...) at core/sched.cc:339#2 0x00000000403ffc8c in sched::cpu::schedule () at include/osv/sched.hh:1310#3 0x0000000040400372 in sched::thread::wait (this=this@entry=0xffff8000014a1040) at core/sched.cc:1214#4 0x0000000040428072 in sched::thread::do_wait_for<lockfree::mutex, sched::wait_object<waitqueue> > (mtx=...) at include/osv/mutex.h:41#5 sched::thread::wait_for<waitqueue&> (mtx=...) at include/osv/sched.hh:1220#6 waitqueue::wait (this=this@entry=0x408ec550 <mmu::vma_list_mutex+48>, mtx=...) at core/waitqueue.cc:56#7 0x00000000403e2d83 in rwlock::reader_wait_lockable (this=<optimized out>) at core/rwlock.cc:174#8 rwlock::rlock (this=this@entry=0x408ec520 <mmu::vma_list_mutex>) at core/rwlock.cc:29#9 0x000000004034ad98 in rwlock_for_read::lock (this=0x408ec520 <mmu::vma_list_mutex>) at include/osv/rwlock.h:113#10 std::lock_guard<rwlock_for_read&>::lock_guard (__m=..., this=<synthetic pointer>) at /usr/include/c++/8/bits/std_mutex.h:162#11 lock_guard_for_with_lock<rwlock_for_read&>::lock_guard_for_with_lock (lock=..., this=<synthetic pointer>) at include/osv/mutex.h:89#12 mmu::vm_fault (addr=18446603337326391296, addr@entry=18446603337326395384, ef=ef@entry=0xffff8000014a6068) at core/mmu.cc:1334#13 0x00000000403a746e in page_fault (ef=0xffff8000014a6068) at arch/x64/mmu.cc:38#14 <signal handler called>#15 0x00000000403f2114 in memory::page_range_allocator::insert<true> (this=this@entry=0x40904300 <memory::free_page_ranges>, pr=...) at core/mempool.cc:575#16 0x00000000403ef83c in memory::page_range_allocator::<lambda(memory::page_range&)>::operator() (header=..., __closure=<synthetic pointer>) at core/mempool.cc:751#17 memory::page_range_allocator::<lambda(memory::page_range&)>::operator() (header=..., __closure=<synthetic pointer>) at core/mempool.cc:736#18 memory::page_range_allocator::for_each<memory::page_range_allocator::alloc_aligned(size_t, size_t, size_t, bool)::<lambda(memory::page_range&)> > (f=..., min_order=<optimized out>, this=0x40904300 <memory::free_page_ranges>) at core/mempool.cc:809#19 memory::page_range_allocator::alloc_aligned (this=this@entry=0x40904300 <memory::free_page_ranges>, size=size@entry=2097152, offset=offset@entry=0, alignment=alignment@entry=2097152, fill=fill@entry=true) at core/mempool.cc:736#20 0x00000000403f0164 in memory::alloc_huge_page (N=N@entry=2097152) at core/mempool.cc:1601#21 0x000000004035030e in mmu::uninitialized_anonymous_page_provider::map (this=0x40873150 <mmu::page_allocator_init>, offset=83886080, ptep=..., pte=..., write=<optimized out>) at include/osv/mmu-defs.hh:219#22 0x0000000040355b94 in mmu::populate<(mmu::account_opt)1>::page<1> (offset=83886080, ptep=..., this=0x2000001ffd70) at include/osv/mmu-defs.hh:235#23 mmu::page<mmu::populate<>, 1> (ptep=..., offset=83886080, pops=...) at core/mmu.cc:311#24 mmu::map_level<mmu::populate<(mmu::account_opt)1>, 2>::operator() (base_virt=35185397596160, parent=..., this=<synthetic pointer>) at core/mmu.cc:437#25 mmu::map_level<mmu::populate<(mmu::account_opt)1>, 3>::map_range<2> (this=<synthetic pointer>, ptep=..., base_virt=35184372088832, slop=4096, page_mapper=..., size=132120576, vcur=<optimized out>) at core/mmu.cc:399#26 mmu::map_level<mmu::populate<(mmu::account_opt)1>, 3>::operator() (base_virt=35184372088832, parent=..., this=<synthetic pointer>) at core/mmu.cc:449#27 mmu::map_level<mmu::populate<(mmu::account_opt)1>, 4>::map_range<3> (this=<synthetic pointer>, ptep=..., base_virt=35184372088832, slop=4096, page_mapper=..., size=134217728, vcur=<optimized out>) at core/mmu.cc:399#28 mmu::map_level<mmu::populate<(mmu::account_opt)1>, 4>::operator() (base_virt=35184372088832, parent=..., this=<synthetic pointer>)--Type <RET> for more, q to quit, c to continue without paging-- at core/mmu.cc:449#29 mmu::map_range<mmu::populate<(mmu::account_opt)1> > (vma_start=vma_start@entry=35185313710080, vstart=vstart@entry=35185313710080, size=<optimized out>, page_mapper=..., slop=slop@entry=4096) at core/mmu.cc:354#30 0x0000000040356385 in mmu::operate_range<mmu::populate<(mmu::account_opt)1> > (size=<optimized out>, start=0x200038200000, vma_start=<optimized out>, mapper=...) at core/mmu.cc:801#31 mmu::vma::operate_range<mmu::populate<(mmu::account_opt)1> > (size=3, addr=0x200038200000, mapper=..., this=0xffffa000012deb00) at core/mmu.cc:1412#32 mmu::populate_vma<(mmu::account_opt)1> (vma=vma@entry=0xffffa000012deb00, v=v@entry=0x200038200000, size=size@entry=134217728, write=write@entry=false) at core/mmu.cc:1206#33 0x000000004034e8d2 in mmu::map_anon (addr=addr@entry=0x0, size=size@entry=134217728, flags=flags@entry=2, perm=perm@entry=3) at core/mmu.cc:1222#34 0x000000004046503d in mmap (addr=addr@entry=0x0, length=length@entry=134217728, prot=prot@entry=3, flags=flags@entry=32802, fd=fd@entry=-1, offset=offset@entry=0) at libc/mman.cc:156#35 0x0000100000006624 in exhaust_memory (size=size@entry=134217728) at /home/wkozaczuk/projects/osv/tests/tst-huge.cc:31#36 0x000010000000621e in main (argc=<optimized out>, argv=<optimized out>) at /home/wkozaczuk/projects/osv/tests/tst-huge.cc:99#37 0x000000004043090d in osv::application::run_main (this=0xffffa00001130e10) at /usr/include/c++/8/bits/stl_vector.h:805#38 0x0000000040226b51 in osv::application::main (this=0xffffa00001130e10) at core/app.cc:320#39 0x0000000040430ab9 in osv::application::<lambda(void*)>::operator() (__closure=0x0, app=<optimized out>) at core/app.cc:233#40 osv::application::<lambda(void*)>::_FUN(void *) () at core/app.cc:235#41 0x000000004045eec6 in pthread_private::pthread::<lambda()>::operator() (__closure=0xffffa0000149c200) at libc/pthread.cc:114#42 std::_Function_handler<void(), pthread_private::pthread::pthread(void* (*)(void*), void*, sigset_t, const pthread_private::thread_attr*)::<lambda()> >::_M_invoke(const std::_Any_data &) (__functor=...) at /usr/include/c++/8/bits/std_function.h:297#43 0x0000000040401117 in sched::thread_main_c (t=0xffff8000014a1040) at arch/x64/arch-switch.hh:271#44 0x00000000403a7263 in thread_main () at arch/x64/entry.S:113This patch definitely fixes an apparent bug I introduced myself in the past. I have tested that issue #1048 goes away with 4,5,6, 7 or 8GB of memory. I have also verified using cli module that free memory is reported properly now.
However, there is still 1 question and 1 issue outstanding:1. I do not understand how this bug arch_setup_free_memory() would lead to a page fault reported by issue 1048 or other "read errors" with higher memory (8GB, end so). I would expect this bug lead to OSv missing to use the memory above 1GB in the e820 block but still be able to operate properly without the page fault. Is there another underlying bug that this patch actually covers?
2. After this patch the tst-huge.so does not pass - actually hangs or never completes. I have played with it a bit and discovered that it passes if I run it with the right amount of memory - 128M < m <= 1G, but fails with anything above 1GB (the deafult is 2GB). It could be that the test is flaky and has to have right amount of free memory to pass (?).
Here is the stacktrace of where it was stuck:sched::thread::switch_to (this=this@entry=0xffff8000001ba040) at arch/x64/arch-switch.hh:108#1 0x00000000403ff794 in sched::cpu::reschedule_from_interrupt (this=0xffff80000001d040, called_from_yield=called_from_yield@entry=false,preempt_after=..., preempt_after@entry=...) at core/sched.cc:339#2 0x00000000403ffc8c in sched::cpu::schedule () at include/osv/sched.hh:1310#3 0x0000000040400372 in sched::thread::wait (this=this@entry=0xffff8000014a1040) at core/sched.cc:1214#4 0x0000000040428072 in sched::thread::do_wait_for<lockfree::mutex, sched::wait_object<waitqueue> > (mtx=...) at include/osv/mutex.h:41#5 sched::thread::wait_for<waitqueue&> (mtx=...) at include/osv/sched.hh:1220#6 waitqueue::wait (this=this@entry=0x408ec550 <mmu::vma_list_mutex+48>, mtx=...) at core/waitqueue.cc:56#7 0x00000000403e2d83 in rwlock::reader_wait_lockable (this=<optimized out>) at core/rwlock.cc:174#8 rwlock::rlock (this=this@entry=0x408ec520 <mmu::vma_list_mutex>) at core/rwlock.cc:29#9 0x000000004034ad98 in rwlock_for_read::lock (this=0x408ec520 <mmu::vma_list_mutex>) at include/osv/rwlock.h:113#10 std::lock_guard<rwlock_for_read&>::lock_guard (__m=..., this=<synthetic pointer>) at /usr/include/c++/8/bits/std_mutex.h:162#11 lock_guard_for_with_lock<rwlock_for_read&>::lock_guard_for_with_lock (lock=..., this=<synthetic pointer>) at include/osv/mutex.h:89#12 mmu::vm_fault (addr=18446603337326391296, addr@entry=18446603337326395384, ef=ef@entry=0xffff8000014a6068) at core/mmu.cc:1334#13 0x00000000403a746e in page_fault (ef=0xffff8000014a6068) at arch/x64/mmu.cc:38
--
You received this message because you are subscribed to the Google Groups "OSv Development" group.
To unsubscribe from this group and stop receiving emails from it, send an email to osv-dev+u...@googlegroups.com.
To view this discussion on the web visit https://groups.google.com/d/msgid/osv-dev/d929a80d-7bac-4db9-beb2-47e46fc695ef%40googlegroups.com.
Thanks for this - I was hitting a wierd page fault issue on our
application as we've recently moved from 0.52 to the latest OSv.
Something like this, which occurs early on in startup:
Assertion failed: ef->rflags & processor::rflags_if (arch/x64/mmu.cc:
page_fault: 34)
--
You received this message because you are subscribed to the Google Groups "OSv Development" group.
To unsubscribe from this group and stop receiving emails from it, send an email to osv-dev+u...@googlegroups.com.
To view this discussion on the web visit https://groups.google.com/d/msgid/osv-dev/6e20f4ba3085db676f28f9799cf2c7eb28fb94bf.camel%40rossfell.co.uk.
Thanks for committing this patch. But unfortunately the tst-huge.so problem is going to make the unit tests fails.
I saw you opened the issue about it. Shall we disable the test until we fix it or somehow change setup and make this test run with 1GB memory instead?
Waldek
Hi,
Thanks for committing this patch. But unfortunately the tst-huge.so problem is going to make the unit tests fails.
I saw you opened the issue about it. Shall we disable the test until we fix it or somehow change setup and make this test run with 1GB memory instead?
On Wed, Aug 21, 2019 at 6:04 AM Waldek Kozaczuk <jwkoz...@gmail.com> wrote:This patch definitely fixes an apparent bug I introduced myself in the past. I have tested that issue #1048 goes away with 4,5,6, 7 or 8GB of memory. I have also verified using cli module that free memory is reported properly now.Thanks, I committed you patch, which indeed fixes that bug. But you are asking god questions:However, there is still 1 question and 1 issue outstanding:1. I do not understand how this bug arch_setup_free_memory() would lead to a page fault reported by issue 1048 or other "read errors" with higher memory (8GB, end so). I would expect this bug lead to OSv missing to use the memory above 1GB in the e820 block but still be able to operate properly without the page fault. Is there another underlying bug that this patch actually covers?What I don't understand (I don't understand the details of this intialization code, unfortunately), is that if your code by mistake did not use the code above 1GB, how does changing the amount of memory from 2GB to 4GB, etc., make any difference at all
mmu::linear_map(elf_start, elf_phys, elf_size, OSV_KERNEL_BASE); // get rid of the command line, before low memory is unmapped parse_cmdline(mb);+ memset((void*)0x0, 0, 0x9fc00);+ memset((void*)0x100000, 0, 0x100000); // now that we have some free memory, we can start mapping the rest mmu::switch_to_runtime_page_tables();1G)
start: 0x0000000000000000, size: 0x000000000009fc00start: 0x0000000000100000, size: 0x000000003fede000
2G)
start: 0x0000000000000000, size: 0x000000000009fc00start: 0x0000000000100000, size: 0x000000007fede000
3.5G)
start: 0x0000000000000000, size: 0x000000000009fc00start: 0x0000000000100000, size: 0x00000000bfede000
start: 0x0000000100000000, size: 0x0000000020000000
4G)
start: 0x0000000000000000, size: 0x000000000009fc00start: 0x0000000000100000, size: 0x00000000bfede000
start: 0x0000000100000000, size: 0x0000000040000000
8G)start: 0x0000000000000000, size: 0x000000000009fc00start: 0x0000000000100000, size: 0x00000000bfede000
start: 0x0000000100000000, size: 0x0000000140000000
free_initial_memory_range, addr: 000000000093daf4free_initial_memory_range, size: 000000003f6c250c
free_initial_memory_range, addr: 0000000000000000free_initial_memory_range, size: 000000000009fc00
free_initial_memory_range, addr: 0000000000100000free_initial_memory_range, size: 0000000000100000
free_initial_memory_range, addr: 0000000040000000free_initial_memory_range, size: 000000003ffde000free_initial_memory_range, addr: 000000000093daf4free_initial_memory_range, size: 000000003f6c250c
free_initial_memory_range, addr: 0000000000000000free_initial_memory_range, size: 000000000009fc00
free_initial_memory_range, addr: 0000000000100000free_initial_memory_range, size: 0000000000100000
To unsubscribe from this group and stop receiving emails from it, send an email to osv...@googlegroups.com.
On Wed, 2019-08-21 at 12:22 +0300, Nadav Har'El wrote:
> I am guessing (need to verify...) that our rwlock implementation is
> not recursive - a thread already holding the write lock needs to wait
> (forever) for the read lock. If this is true, this is an rwlock bug.
So I was puzzled a bit by the rwlock code. It handles recursive write
lock acquisitions by incrementing a counter:
// recursive write lock
if (_wowner == sched::thread::current()) {
_wrecurse++;
}
On the unlock side though, it does decrement the counter but then goes
on to wake a write_waiter - which seems wrong. Probably I am missing
something - but why is the second part not inside the else clause where
_wowner is set to nullptr?
void rwlock::wunlock()
{
WITH_LOCK(_mtx) {
assert(_wowner == sched::thread::current());
if (_wrecurse > 0) {
_wrecurse--;
} else {
_wowner = nullptr;
}
if (!_write_waiters.empty()) {
_write_waiters.wake_one(_mtx);
} else {
_read_waiters.wake_all(_mtx);
}
}
}
I think you're right that if you hold the write lock and then try and
readlock it will fail:
bool rwlock::read_lockable()
{
return ((!_wowner) && (_write_waiters.empty()));
}
Rick
--
You received this message because you are subscribed to the Google Groups "OSv Development" group.
To unsubscribe from this group and stop receiving emails from it, send an email to osv-dev+u...@googlegroups.com.
To view this discussion on the web visit https://groups.google.com/d/msgid/osv-dev/d75b970c54f9fe25cda79f2cc0aa0e7eaee3ec2f.camel%40rossfell.co.uk.
Rick
#50 <signal handler called>
#51 0x0000100005bc6d43 in ?? ()
}
}
Rick
--
You received this message because you are subscribed to a topic in the Google Groups "OSv Development" group.
To unsubscribe from this topic, visit https://groups.google.com/d/topic/osv-dev/N5Knl4HE25o/unsubscribe.
To unsubscribe from this group and all its topics, send an email to osv-dev+u...@googlegroups.com.
To view this discussion on the web visit https://groups.google.com/d/msgid/osv-dev/8d0280c9332d190ec647129e1a2afb30db2629db.camel%40rossfell.co.uk.