Google Groups no longer supports new Usenet posts or subscriptions. Historical content remains viewable.
Dismiss

Call for testing: VM bugs in 10.3

1 view
Skip to first unread message

Konstantin Belousov

unread,
Aug 2, 2016, 3:26:11 PM8/2/16
to
Below is the merge of some high-profile virtual memory subsystem bug
fixes from stable/10 to 10.3. I merged fixes for bugs reported by
users, issues which are even theoretically unlikely to occur in real
world loads, are not included into the patch set. The later is mostly
corrections for the handling of radix insertion failures. Included fixes
are for random SIGSEGV delivered to processes, hangs on "vodead" state
on filesystem operations, and several others.

List of the merged revisions:
r301184 prevent parallel object collapses, fixes object lifecycle
r301436 do not leak the vm object lock, fixes overcommit disable
r302243 avoid the active object marking for vm.vmtotal sysctl, fixes
"vodead" hangs
r302513 vm_fault() race with the vm_object_collapse(), fixes spurious SIGSEGV
r303291 postpone BO_DEAD, fixes panic on fast vnode reclaim

I am asking for some testing, it is not necessary for your system to
exhibit the problematic behaviour for your testing to be useful. I am
more looking for smoke-testing kind of confirmation that patch is fine.
Neither I nor people who usually help me with testing, run 10.3 systems.

If everything appear to be fine, my intent is to ask re/so to issue
Errata Notice with these changes in about a week from now.

Index: sys/kern/vfs_subr.c
===================================================================
--- sys/kern/vfs_subr.c (revision 303659)
+++ sys/kern/vfs_subr.c (working copy)
@@ -2934,7 +2934,13 @@ vgonel(struct vnode *vp)
TAILQ_EMPTY(&vp->v_bufobj.bo_clean.bv_hd) &&
vp->v_bufobj.bo_clean.bv_cnt == 0,
("vp %p bufobj not invalidated", vp));
- vp->v_bufobj.bo_flag |= BO_DEAD;
+
+ /*
+ * For VMIO bufobj, BO_DEAD is set in vm_object_terminate()
+ * after the object's page queue is flushed.
+ */
+ if (vp->v_bufobj.bo_object == NULL)
+ vp->v_bufobj.bo_flag |= BO_DEAD;
BO_UNLOCK(&vp->v_bufobj);

/*
Index: sys/vm/vm_fault.c
===================================================================
--- sys/vm/vm_fault.c (revision 303659)
+++ sys/vm/vm_fault.c (working copy)
@@ -286,7 +286,7 @@ vm_fault_hold(vm_map_t map, vm_offset_t vaddr, vm_
vm_prot_t prot;
long ahead, behind;
int alloc_req, era, faultcount, nera, reqpage, result;
- boolean_t growstack, is_first_object_locked, wired;
+ boolean_t dead, growstack, is_first_object_locked, wired;
int map_generation;
vm_object_t next_object;
vm_page_t marray[VM_FAULT_READ_MAX];
@@ -423,11 +423,18 @@ fast_failed:
fs.pindex = fs.first_pindex;
while (TRUE) {
/*
- * If the object is dead, we stop here
+ * If the object is marked for imminent termination,
+ * we retry here, since the collapse pass has raced
+ * with us. Otherwise, if we see terminally dead
+ * object, return fail.
*/
- if (fs.object->flags & OBJ_DEAD) {
+ if ((fs.object->flags & OBJ_DEAD) != 0) {
+ dead = fs.object->type == OBJT_DEAD;
unlock_and_deallocate(&fs);
- return (KERN_PROTECTION_FAILURE);
+ if (dead)
+ return (KERN_PROTECTION_FAILURE);
+ pause("vmf_de", 1);
+ goto RetryFault;
}

/*
Index: sys/vm/vm_meter.c
===================================================================
--- sys/vm/vm_meter.c (revision 303659)
+++ sys/vm/vm_meter.c (working copy)
@@ -93,30 +93,32 @@ SYSCTL_PROC(_vm, VM_LOADAVG, loadavg, CTLTYPE_STRU
CTLFLAG_MPSAFE, NULL, 0, sysctl_vm_loadavg, "S,loadavg",
"Machine loadaverage history");

+/*
+ * This function aims to determine if the object is mapped,
+ * specifically, if it is referenced by a vm_map_entry. Because
+ * objects occasionally acquire transient references that do not
+ * represent a mapping, the method used here is inexact. However, it
+ * has very low overhead and is good enough for the advisory
+ * vm.vmtotal sysctl.
+ */
+static bool
+is_object_active(vm_object_t obj)
+{
+
+ return (obj->ref_count > obj->shadow_count);
+}
+
static int
vmtotal(SYSCTL_HANDLER_ARGS)
{
- struct proc *p;
struct vmtotal total;
- vm_map_entry_t entry;
vm_object_t object;
- vm_map_t map;
- int paging;
+ struct proc *p;
struct thread *td;
- struct vmspace *vm;

bzero(&total, sizeof(total));
+
/*
- * Mark all objects as inactive.
- */
- mtx_lock(&vm_object_list_mtx);
- TAILQ_FOREACH(object, &vm_object_list, object_list) {
- VM_OBJECT_WLOCK(object);
- vm_object_clear_flag(object, OBJ_ACTIVE);
- VM_OBJECT_WUNLOCK(object);
- }
- mtx_unlock(&vm_object_list_mtx);
- /*
* Calculate process statistics.
*/
sx_slock(&allproc_lock);
@@ -136,11 +138,15 @@ vmtotal(SYSCTL_HANDLER_ARGS)
case TDS_INHIBITED:
if (TD_IS_SWAPPED(td))
total.t_sw++;
- else if (TD_IS_SLEEPING(td) &&
- td->td_priority <= PZERO)
- total.t_dw++;
- else
- total.t_sl++;
+ else if (TD_IS_SLEEPING(td)) {
+ if (td->td_priority <= PZERO)
+ total.t_dw++;
+ else
+ total.t_sl++;
+ if (td->td_wchan ==
+ &cnt.v_free_count)
+ total.t_pw++;
+ }
break;

case TDS_CAN_RUN:
@@ -158,29 +164,6 @@ vmtotal(SYSCTL_HANDLER_ARGS)
}
}
PROC_UNLOCK(p);
- /*
- * Note active objects.
- */
- paging = 0;
- vm = vmspace_acquire_ref(p);
- if (vm == NULL)
- continue;
- map = &vm->vm_map;
- vm_map_lock_read(map);
- for (entry = map->header.next;
- entry != &map->header; entry = entry->next) {
- if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) ||
- (object = entry->object.vm_object) == NULL)
- continue;
- VM_OBJECT_WLOCK(object);
- vm_object_set_flag(object, OBJ_ACTIVE);
- paging |= object->paging_in_progress;
- VM_OBJECT_WUNLOCK(object);
- }
- vm_map_unlock_read(map);
- vmspace_free(vm);
- if (paging)
- total.t_pw++;
}
sx_sunlock(&allproc_lock);
/*
@@ -206,9 +189,18 @@ vmtotal(SYSCTL_HANDLER_ARGS)
*/
continue;
}
+ if (object->ref_count == 1 &&
+ (object->flags & OBJ_NOSPLIT) != 0) {
+ /*
+ * Also skip otherwise unreferenced swap
+ * objects backing tmpfs vnodes, and POSIX or
+ * SysV shared memory.
+ */
+ continue;
+ }
total.t_vm += object->size;
total.t_rm += object->resident_page_count;
- if (object->flags & OBJ_ACTIVE) {
+ if (is_object_active(object)) {
total.t_avm += object->size;
total.t_arm += object->resident_page_count;
}
@@ -216,7 +208,7 @@ vmtotal(SYSCTL_HANDLER_ARGS)
/* shared object */
total.t_vmshr += object->size;
total.t_rmshr += object->resident_page_count;
- if (object->flags & OBJ_ACTIVE) {
+ if (is_object_active(object)) {
total.t_avmshr += object->size;
total.t_armshr += object->resident_page_count;
}
Index: sys/vm/vm_object.c
===================================================================
--- sys/vm/vm_object.c (revision 303659)
+++ sys/vm/vm_object.c (working copy)
@@ -737,6 +737,10 @@ vm_object_terminate(vm_object_t object)

vinvalbuf(vp, V_SAVE, 0, 0);

+ BO_LOCK(&vp->v_bufobj);
+ vp->v_bufobj.bo_flag |= BO_DEAD;
+ BO_UNLOCK(&vp->v_bufobj);
+
VM_OBJECT_WLOCK(object);
}

@@ -1722,6 +1726,9 @@ vm_object_collapse(vm_object_t object)
* case.
*/
if (backing_object->ref_count == 1) {
+ vm_object_pip_add(object, 1);
+ vm_object_pip_add(backing_object, 1);
+
/*
* If there is exactly one reference to the backing
* object, we can collapse it into the parent.
@@ -1793,11 +1800,13 @@ vm_object_collapse(vm_object_t object)
KASSERT(backing_object->ref_count == 1, (
"backing_object %p was somehow re-referenced during collapse!",
backing_object));
+ vm_object_pip_wakeup(backing_object);
backing_object->type = OBJT_DEAD;
backing_object->ref_count = 0;
VM_OBJECT_WUNLOCK(backing_object);
vm_object_destroy(backing_object);

+ vm_object_pip_wakeup(object);
object_collapses++;
} else {
vm_object_t new_backing_object;
@@ -2130,6 +2139,7 @@ vm_object_coalesce(vm_object_t prev_object, vm_oof
*/
if (!reserved && !swap_reserve_by_cred(ptoa(next_size),
prev_object->cred)) {
+ VM_OBJECT_WUNLOCK(prev_object);
return (FALSE);
}
prev_object->charge += ptoa(next_size);
Index: sys/vm/vm_object.h
===================================================================
--- sys/vm/vm_object.h (revision 303659)
+++ sys/vm/vm_object.h (working copy)
@@ -181,7 +181,6 @@ struct vm_object {
*/
#define OBJ_FICTITIOUS 0x0001 /* (c) contains fictitious pages */
#define OBJ_UNMANAGED 0x0002 /* (c) contains unmanaged pages */
-#define OBJ_ACTIVE 0x0004 /* active objects */
#define OBJ_DEAD 0x0008 /* dead objects (during rundown) */
#define OBJ_NOSPLIT 0x0010 /* dont split this object */
#define OBJ_PIPWNT 0x0040 /* paging in progress wanted */
Index: .
===================================================================
--- . (revision 303659)
+++ . (working copy)

Property changes on: .
___________________________________________________________________
Modified: svn:mergeinfo
## -0,0 +0,2 ##
Merged /head:r300758,300959,302063,302236,302317,302567,302580
Merged /stable/10:r301184,301436,302243,302513,303291
_______________________________________________
freebsd...@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/freebsd-stable
To unsubscribe, send any mail to "freebsd-stabl...@freebsd.org"

pete wright

unread,
Aug 2, 2016, 3:47:52 PM8/2/16
to
On Aug 2, 2016 12:26 PM, "Konstantin Belousov" <kost...@gmail.com> wrote:
>
> Below is the merge of some high-profile virtual memory subsystem bug
> fixes from stable/10 to 10.3. I merged fixes for bugs reported by
> users, issues which are even theoretically unlikely to occur in real
> world loads, are not included into the patch set. The later is mostly
> corrections for the handling of radix insertion failures. Included fixes
> are for random SIGSEGV delivered to processes, hangs on "vodead" state
> on filesystem operations, and several others.
>
> List of the merged revisions:
> r301184 prevent parallel object collapses, fixes object lifecycle
> r301436 do not leak the vm object lock, fixes overcommit disable
> r302243 avoid the active object marking for vm.vmtotal sysctl, fixes
> "vodead" hangs
> r302513 vm_fault() race with the vm_object_collapse(), fixes spurious
SIGSEGV
> r303291 postpone BO_DEAD, fixes panic on fast vnode reclaim
>
> I am asking for some testing, it is not necessary for your system to
> exhibit the problematic behaviour for your testing to be useful. I am
> more looking for smoke-testing kind of confirmation that patch is fine.
> Neither I nor people who usually help me with testing, run 10.3 systems.
>

Is testing on 10.3-RELEASE useful, or is this only for people tracking
STABLE?

Thanks!
-pete

Ian Lepore

unread,
Aug 2, 2016, 3:58:19 PM8/2/16
to
On Tue, 2016-08-02 at 22:25 +0300, Konstantin Belousov wrote:
> Below is the merge of some high-profile virtual memory subsystem bug
> fixes from stable/10 to 10.3. I merged fixes for bugs reported by
> users, issues which are even theoretically unlikely to occur in real
> world loads, are not included into the patch set. The later is mostly
> corrections for the handling of radix insertion failures. Included
> fixes
> are for random SIGSEGV delivered to processes, hangs on "vodead"
> state
> on filesystem operations, and several others.
>
> List of the merged revisions:
> r301184 prevent parallel object collapses, fixes object lifecycle
> r301436 do not leak the vm object lock, fixes overcommit disable
> r302243 avoid the active object marking for vm.vmtotal sysctl, fixes
> "vodead" hangs
> r302513 vm_fault() race with the vm_object_collapse(), fixes spurious
> SIGSEGV
> r303291 postpone BO_DEAD, fixes panic on fast vnode reclaim
>
> I am asking for some testing, it is not necessary for your system to
> exhibit the problematic behaviour for your testing to be useful. I am
> more looking for smoke-testing kind of confirmation that patch is
> fine.
> Neither I nor people who usually help me with testing, run 10.3
> systems.
>
> If everything appear to be fine, my intent is to ask re/so to issue
> Errata Notice with these changes in about a week from now.
>
> Index: sys/kern/vfs_subr.c
> [...]

I run 10-stable on my everyday desktop/build machine, but my mail
client ruined the format of the patches. Can I just 'svn up' on the 10
-stable branch and then MFC the revs you list above, or are there hand
-tweaks to the patches you attached?

-- Ian

Konstantin Belousov

unread,
Aug 2, 2016, 3:59:18 PM8/2/16
to
On Tue, Aug 02, 2016 at 12:47:23PM -0700, pete wright wrote:
> On Aug 2, 2016 12:26 PM, "Konstantin Belousov" <kost...@gmail.com> wrote:
> >
> > Below is the merge of some high-profile virtual memory subsystem bug
> > fixes from stable/10 to 10.3. I merged fixes for bugs reported by
> > users, issues which are even theoretically unlikely to occur in real
> > world loads, are not included into the patch set. The later is mostly
> > corrections for the handling of radix insertion failures. Included fixes
> > are for random SIGSEGV delivered to processes, hangs on "vodead" state
> > on filesystem operations, and several others.
> >
> > List of the merged revisions:
> > r301184 prevent parallel object collapses, fixes object lifecycle
> > r301436 do not leak the vm object lock, fixes overcommit disable
> > r302243 avoid the active object marking for vm.vmtotal sysctl, fixes
> > "vodead" hangs
> > r302513 vm_fault() race with the vm_object_collapse(), fixes spurious
> SIGSEGV
> > r303291 postpone BO_DEAD, fixes panic on fast vnode reclaim
> >
> > I am asking for some testing, it is not necessary for your system to
> > exhibit the problematic behaviour for your testing to be useful. I am
> > more looking for smoke-testing kind of confirmation that patch is fine.
> > Neither I nor people who usually help me with testing, run 10.3 systems.
> >
>
> Is testing on 10.3-RELEASE useful, or is this only for people tracking
> STABLE?
This is only for people running 10.3. The list of merged revisions is from
stable/10, where the fixes were already merged for month or more.

Konstantin Belousov

unread,
Aug 2, 2016, 4:16:54 PM8/2/16
to
On Tue, Aug 02, 2016 at 01:57:49PM -0600, Ian Lepore wrote:
> On Tue, 2016-08-02 at 22:25 +0300, Konstantin Belousov wrote:
> > Below is the merge of some high-profile virtual memory subsystem bug
> > fixes from stable/10 to 10.3.

> I run 10-stable on my everyday desktop/build machine, but my mail
> client ruined the format of the patches. Can I just 'svn up' on the 10
> -stable branch and then MFC the revs you list above, or are there hand
> -tweaks to the patches you attached?
If you svn up stable/10, or if your existing sources are already past
r303291, then you already have that patches.

The first sentence of my mail stated that the backport is from stable/10
to 10.3.

Anyway, I put the patch at https://kib.kiev.ua/kib/vm-10.3-bp.1.patch .
0 new messages