Google Groups no longer supports new Usenet posts or subscriptions. Historical content remains viewable.
Dismiss

reduce size of pv_pte

4 views
Skip to first unread message

YAMAMOTO Takashi

unread,
Apr 25, 2011, 6:40:54 PM4/25/11
to
hi,

the following patch reduces the size of pv_pte, thus pv_entry and vm_page.
comments?

YAMAMOTO Takashi

Index: include/pmap_pv.h
===================================================================
RCS file: /cvsroot/src/sys/arch/x86/include/pmap_pv.h,v
retrieving revision 1.2
diff -u -p -r1.2 pmap_pv.h
--- include/pmap_pv.h 28 Jan 2008 11:06:42 -0000 1.2
+++ include/pmap_pv.h 25 Apr 2011 22:35:38 -0000
@@ -44,9 +44,10 @@ struct vm_page;
* pv_pte: describe a pte
*/

+typedef paddr_t pvkey_t;
+
struct pv_pte {
- struct vm_page *pte_ptp; /* PTP; NULL for pmap_kernel() */
- vaddr_t pte_va; /* VA */
+ pvkey_t pte_key;
};

/*
Index: x86/pmap.c
===================================================================
RCS file: /cvsroot/src/sys/arch/x86/x86/pmap.c,v
retrieving revision 1.119
diff -u -p -r1.119 pmap.c
--- x86/pmap.c 14 Apr 2011 16:00:21 -0000 1.119
+++ x86/pmap.c 25 Apr 2011 22:35:38 -0000
@@ -438,11 +438,127 @@ struct pv_hash_head {
SLIST_HEAD(, pv_entry) hh_list;
} pv_hash_heads[PV_HASH_SIZE];

+/*
+ * to save memory, we convert a (ptp, va) tuple to an opaque type, pvkey_t.
+ * pvkey_t is logically a pointer to a pte.
+ */
+
+#define PVKEY_KERNEL 1
+
+/*
+ * pvkey_decode: decode the (ptp, va) tuple for the given pvkey.
+ */
+
+static void
+pvkey_decode(const pvkey_t key, struct vm_page **ptpp, vaddr_t *vap)
+{
+ struct vm_page *ptp;
+ vaddr_t va;
+
+ if ((key & PVKEY_KERNEL) != 0) {
+ ptp = NULL;
+ va = key & ~PVKEY_KERNEL;
+ } else {
+ vaddr_t l2_frame;
+ vaddr_t l1_mask;
+
+ ptp = PHYS_TO_VM_PAGE(key);
+ l2_frame = ptp->offset / PAGE_SIZE * NBPD_L2;
+ l1_mask = (key & PAGE_MASK) / sizeof(pt_entry_t) * PAGE_SIZE;
+ KASSERT((l2_frame & ~L2_FRAME) == 0);
+ KASSERT((l1_mask & L2_FRAME) == 0);
+ KASSERT((l1_mask & PAGE_MASK) == 0);
+ va = l2_frame + l1_mask;
+ }
+ KASSERT((va & PAGE_MASK) == 0);
+ *vap = va;
+ *ptpp = ptp;
+}
+
+/*
+ * pvkey_encode: generate a pvkey for the given (ptp, va) tuple.
+ */
+
+static pvkey_t
+pvkey_encode(struct vm_page *ptp, vaddr_t va)
+{
+ pvkey_t key;
+
+ KASSERT((va & PAGE_MASK) == 0);
+ if (ptp == NULL) {
+ /*
+ * kernel pmap
+ *
+ * use (va | PVKEY_KERNEL) as a key.
+ */
+ KASSERT(va >= VM_MIN_KERNEL_ADDRESS);
+ CTASSERT(sizeof(va) <= sizeof(pvkey_t));
+ key = va | PVKEY_KERNEL;
+ } else {
+ /*
+ * user pmap
+ *
+ * use the physical address of the pte as a key.
+ */
+ const paddr_t ptppa = VM_PAGE_TO_PHYS(ptp);
+
+ KASSERT(va < VM_MIN_KERNEL_ADDRESS);
+ KASSERT(ptp->offset == ptp_va2o(va, 1));
+ CTASSERT(sizeof(paddr_t) <= sizeof(pvkey_t));
+ key = (pvkey_t)(ptppa + sizeof(pt_entry_t) * pl1_pi(va));
+ KASSERT(key < ptppa + PAGE_SIZE);
+ KASSERT((key & PVKEY_KERNEL) == 0);
+ }
+#if defined(DEBUG)
+ /*
+ * check if the pvkey is decodable to the original tuple.
+ */
+ {
+ struct vm_page *tptp;
+ vaddr_t tva;
+
+ pvkey_decode(key, &tptp, &tva);
+ KDASSERT(tptp == ptp);
+ KDASSERT(tva == va);
+ }
+#endif /* defined(DEBUG) */
+ return key;
+}
+
+/*
+ * pvkey_advance: calculate the pvkey for the next pte.
+ *
+ * basically the faster equivalent of
+ * pvkey_decode(key, &ptp, &va);
+ * pvkey_encode(ptp, va + PAGE_SIZE)
+ *
+ * note that pvkey_advance returns a garbage after crossing a ptp boundary.
+ * it's caller's responsibility not to use the garbage.
+ *
+ * XXX this could be micro-optimized to an uncoditional add if we adjust
+ * the pvkey encoding. is it worth?
+ */
+
+static pvkey_t
+pvkey_advance(const pvkey_t key)
+{
+ pvkey_t nextkey;
+
+ if ((key & PVKEY_KERNEL) != 0) {
+ nextkey = key + PAGE_SIZE;
+ } else {
+ nextkey = key + sizeof(pt_entry_t);
+ }
+ return nextkey;
+}
+
static u_int
-pvhash_hash(struct vm_page *ptp, vaddr_t va)
+pvhash_hash(const pvkey_t key)
{
+ const u_int ptppn = key / NBPD_L2;
+ const u_int pfn = key / sizeof(pt_entry_t);

- return (uintptr_t)ptp / sizeof(*ptp) + (va >> PAGE_SHIFT);
+ return ptppn + pfn;
}

static struct pv_hash_head *
@@ -460,15 +576,14 @@ pvhash_lock(u_int hash)
}

static struct pv_entry *
-pvhash_remove(struct pv_hash_head *hh, struct vm_page *ptp, vaddr_t va)
+pvhash_remove(struct pv_hash_head *hh, const pvkey_t key)
{
struct pv_entry *pve;
struct pv_entry *prev;

prev = NULL;
SLIST_FOREACH(pve, &hh->hh_list, pve_hash) {
- if (pve->pve_pte.pte_ptp == ptp &&
- pve->pve_pte.pte_va == va) {
+ if (pve->pve_pte.pte_key == key) {
if (prev != NULL) {
SLIST_REMOVE_AFTER(prev, pve_hash);
} else {
@@ -1779,7 +1894,7 @@ insert_pv(struct pmap_page *pp, struct p

KASSERT(pp_locked(pp));

- hash = pvhash_hash(pve->pve_pte.pte_ptp, pve->pve_pte.pte_va);
+ hash = pvhash_hash(pve->pve_pte.pte_key);
lock = pvhash_lock(hash);
hh = pvhash_head(hash);
mutex_spin_enter(lock);
@@ -1800,20 +1915,23 @@ static struct pv_entry *
pmap_enter_pv(struct pmap_page *pp,
struct pv_entry *pve, /* preallocated pve for us to use */
struct pv_entry **sparepve,
- struct vm_page *ptp,
- vaddr_t va)
+ const pvkey_t key)
{
+#if defined(DEBUG)
+ struct vm_page *ptp;
+ vaddr_t va;

- KASSERT(ptp == NULL || ptp->wire_count >= 2);
- KASSERT(ptp == NULL || ptp->uobject != NULL);
- KASSERT(ptp == NULL || ptp_va2o(va, 1) == ptp->offset);
+ pvkey_decode(key, &ptp, &va);
+ KDASSERT(ptp == NULL || ptp->wire_count >= 2);
+ KDASSERT(ptp == NULL || ptp->uobject != NULL);
+ KDASSERT(ptp == NULL || ptp_va2o(va, 1) == ptp->offset);
+#endif /* defined(DEBUG) */
KASSERT(pp_locked(pp));

if ((pp->pp_flags & PP_EMBEDDED) == 0) {
if (LIST_EMPTY(&pp->pp_head.pvh_list)) {
pp->pp_flags |= PP_EMBEDDED;
- pp->pp_pte.pte_ptp = ptp;
- pp->pp_pte.pte_va = va;
+ pp->pp_pte.pte_key = key;

return pve;
}
@@ -1829,8 +1947,7 @@ pmap_enter_pv(struct pmap_page *pp,
insert_pv(pp, pve2);
}

- pve->pve_pte.pte_ptp = ptp;
- pve->pve_pte.pte_va = va;
+ pve->pve_pte.pte_key = key;
insert_pv(pp, pve);

return NULL;
@@ -1845,20 +1962,24 @@ pmap_enter_pv(struct pmap_page *pp,
*/

static struct pv_entry *
-pmap_remove_pv(struct pmap_page *pp, struct vm_page *ptp, vaddr_t va)
+pmap_remove_pv(struct pmap_page *pp, const pvkey_t key)
{
struct pv_hash_head *hh;
struct pv_entry *pve;
kmutex_t *lock;
u_int hash;
+#if defined(DEBUG)
+ struct vm_page *ptp;
+ vaddr_t va;

- KASSERT(ptp == NULL || ptp->uobject != NULL);
- KASSERT(ptp == NULL || ptp_va2o(va, 1) == ptp->offset);
+ pvkey_decode(key, &ptp, &va);
+ KDASSERT(ptp == NULL || ptp->uobject != NULL);
+ KDASSERT(ptp == NULL || ptp_va2o(va, 1) == ptp->offset);
+#endif /* defined(DEBUG) */
KASSERT(pp_locked(pp));

if ((pp->pp_flags & PP_EMBEDDED) != 0) {
- KASSERT(pp->pp_pte.pte_ptp == ptp);
- KASSERT(pp->pp_pte.pte_va == va);
+ KASSERT(pp->pp_pte.pte_key == key);

pp->pp_flags &= ~PP_EMBEDDED;
LIST_INIT(&pp->pp_head.pvh_list);
@@ -1866,11 +1987,11 @@ pmap_remove_pv(struct pmap_page *pp, str
return NULL;
}

- hash = pvhash_hash(ptp, va);
+ hash = pvhash_hash(key);
lock = pvhash_lock(hash);
hh = pvhash_head(hash);
mutex_spin_enter(lock);
- pve = pvhash_remove(hh, ptp, va);
+ pve = pvhash_remove(hh, key);
mutex_spin_exit(lock);

LIST_REMOVE(pve, pve_list);
@@ -3203,7 +3341,6 @@ pmap_unmap_pte(void)
/*
* pmap_remove_ptes: remove PTEs from a PTP
*
- * => must have proper locking on pmap_master_lock
* => caller must hold pmap's lock
* => PTP must be mapped into KVA
* => PTP should be null if pmap == pmap_kernel()
@@ -3218,9 +3355,13 @@ pmap_remove_ptes(struct pmap *pmap, stru
struct pv_entry *pve;
pt_entry_t *pte = (pt_entry_t *) ptpva;
pt_entry_t opte, xpte = 0;
+ pvkey_t key;

KASSERT(pmap == pmap_kernel() || mutex_owned(&pmap->pm_lock));
KASSERT(kpreempt_disabled());
+ KASSERT((startva & PAGE_MASK) == 0);
+ KASSERT((endva & PAGE_MASK) == 0);
+ KASSERT((startva & L2_FRAME) == ((endva - 1) & L2_FRAME));

/*
* note that ptpva points to the PTE that maps startva. this may
@@ -3231,11 +3372,13 @@ pmap_remove_ptes(struct pmap *pmap, stru
* to keep track of the number of real PTEs in the PTP).
*/

- for (/*null*/; startva < endva && (ptp == NULL || ptp->wire_count > 1)
- ; pte++, startva += PAGE_SIZE) {
+ for (key = pvkey_encode(ptp, startva);
+ startva < endva && (ptp == NULL || ptp->wire_count > 1);
+ pte++, startva += PAGE_SIZE, key = pvkey_advance(key)) {
struct vm_page *pg;
struct pmap_page *pp;

+ KASSERT(pvkey_encode(ptp, startva) == key);
if (!pmap_valid_entry(*pte))
continue; /* VA not mapped */

@@ -3282,7 +3425,7 @@ pmap_remove_ptes(struct pmap *pmap, stru
pp = VM_PAGE_TO_PP(pg);
pp_lock(pp);
pp->pp_attrs |= opte;
- pve = pmap_remove_pv(pp, ptp, startva);
+ pve = pmap_remove_pv(pp, key);
pp_unlock(pp);

if (pve != NULL) {
@@ -3300,7 +3443,6 @@ pmap_remove_ptes(struct pmap *pmap, stru
/*
* pmap_remove_pte: remove a single PTE from a PTP
*
- * => must have proper locking on pmap_master_lock
* => caller must hold pmap's lock
* => PTP must be mapped into KVA
* => PTP should be null if pmap == pmap_kernel()
@@ -3316,6 +3458,7 @@ pmap_remove_pte(struct pmap *pmap, struc
struct pv_entry *pve;
struct vm_page *pg;
struct pmap_page *pp;
+ paddr_t key;

KASSERT(pmap == pmap_kernel() || mutex_owned(&pmap->pm_lock));
KASSERT(pmap == pmap_kernel() || kpreempt_disabled());
@@ -3364,10 +3507,11 @@ pmap_remove_pte(struct pmap *pmap, struc
#endif

/* sync R/M bits */
+ key = pvkey_encode(ptp, va);
pp = VM_PAGE_TO_PP(pg);
pp_lock(pp);
pp->pp_attrs |= opte;
- pve = pmap_remove_pv(pp, ptp, va);
+ pve = pmap_remove_pv(pp, key);
pp_unlock(pp);

if (pve) {
@@ -3487,6 +3631,8 @@ pmap_remove(struct pmap *pmap, vaddr_t s
panic("pmap_remove: unmanaged PTP "
"detected");
#endif
+ KASSERT(ptp ==
+ pmap_find_ptp(pmap, blkendva - PAGE_SIZE, -1, 1));
}
xpte |= pmap_remove_ptes(pmap, ptp,
(vaddr_t)&ptes[pl1_i(va)], va, blkendva, &pv_tofree);
@@ -3525,8 +3671,7 @@ pmap_sync_pv(struct pv_pte *pvpte, pt_en
pt_entry_t npte;
bool need_shootdown;

- ptp = pvpte->pte_ptp;
- va = pvpte->pte_va;
+ pvkey_decode(pvpte->pte_key, &ptp, &va);
KASSERT(ptp == NULL || ptp->uobject != NULL);
KASSERT(ptp == NULL || ptp_va2o(va, 1) == ptp->offset);
pmap = ptp_to_pmap(ptp);
@@ -3615,7 +3760,6 @@ pmap_page_remove(struct vm_page *pg)
struct pmap_page *pp;
struct pv_pte *pvpte;
struct pv_entry *killlist = NULL;
- struct vm_page *ptp;
pt_entry_t expect;
lwp_t *l;
int count;
@@ -3631,6 +3775,7 @@ startover:
struct pmap *pmap;
struct pv_entry *pve;
pt_entry_t opte;
+ struct vm_page *ptp;
vaddr_t va;
int error;

@@ -3639,7 +3784,7 @@ startover:
* otherwise the pmap can disappear behind us.
*/

- ptp = pvpte->pte_ptp;
+ pvkey_decode(pvpte->pte_key, &ptp, &va);
pmap = ptp_to_pmap(ptp);
if (ptp != NULL) {
pmap_reference(pmap);
@@ -3659,8 +3804,7 @@ startover:
}

pp->pp_attrs |= opte;
- va = pvpte->pte_va;
- pve = pmap_remove_pv(pp, ptp, va);
+ pve = pmap_remove_pv(pp, pvpte->pte_key);
pp_unlock(pp);

/* update the PTP reference count. free if last reference. */
@@ -3986,6 +4130,7 @@ pmap_enter_ma(struct pmap *pmap, vaddr_t
int error;
bool wired = (flags & PMAP_WIRED) != 0;
struct pmap *pmap2;
+ pvkey_t key;

KASSERT(pmap_initialized);
KASSERT(curlwp->l_md.md_gc_pmap != pmap);
@@ -4124,6 +4272,8 @@ pmap_enter_ma(struct pmap *pmap, vaddr_t
goto same_pa;
}

+ key = pvkey_encode(ptp, va);
+
/*
* if old page is managed, remove pv_entry from its list.
*/
@@ -4140,7 +4290,7 @@ pmap_enter_ma(struct pmap *pmap, vaddr_t
old_pp = VM_PAGE_TO_PP(pg);

pp_lock(old_pp);
- old_pve = pmap_remove_pv(old_pp, ptp, va);
+ old_pve = pmap_remove_pv(old_pp, key);
old_pp->pp_attrs |= opte;
pp_unlock(old_pp);
}
@@ -4151,7 +4301,7 @@ pmap_enter_ma(struct pmap *pmap, vaddr_t

if (new_pp) {
pp_lock(new_pp);
- new_pve = pmap_enter_pv(new_pp, new_pve, &new_pve2, ptp, va);
+ new_pve = pmap_enter_pv(new_pp, new_pve, &new_pve2, key);
pp_unlock(new_pp);
}

@@ -4693,6 +4843,7 @@ pmap_update(struct pmap *pmap)
ptp->flags |= PG_ZERO;
pp = VM_PAGE_TO_PP(ptp);
empty_ptps = pp->pp_link;
+ KASSERT((pp->pp_flags & PP_EMBEDDED) == 0);
LIST_INIT(&pp->pp_head.pvh_list);
uvm_pagefree(ptp);
}

--
Posted automagically by a mail2news gateway at muc.de e.V.
Please direct questions, flames, donations, etc. to news-...@muc.de

Masao Uebayashi

unread,
May 11, 2011, 11:46:51 PM5/11/11
to
In the very long run, we will be able to cache a reference to a
vm_physseg on the fault handler stack, then pass (paddr_t, vm_page) to
pmap. I agree that PHYS_TO_VM_PAGE is a bad idea in general.

Masao

On Thu, May 12, 2011 at 6:25 AM, Lars Heidieker <la...@heidieker.de> wrote:
> -----BEGIN PGP SIGNED MESSAGE-----
> Hash: SHA1


>
> On 04/26/11 00:40, YAMAMOTO Takashi wrote:
>> hi,
>>
>> the following patch reduces the size of pv_pte, thus pv_entry and vm_page.
>> comments?
>>
>> YAMAMOTO Takashi
>>
>

> Hi, that's interesting. It is cutting a pv_entry from 40 bytes down to
> 32 bytes for 64 bit.
> I've a concern about the runtime requirements because of the
> PHYS_TO_VM_PAGE lookup which might be significant.
> Do all amd64 machines have only a few physical memory segments?
>
> Lars

> -----BEGIN PGP SIGNATURE-----
> Version: GnuPG v2.0.17 (GNU/Linux)
> Comment: Using GnuPG with Mozilla - http://enigmail.mozdev.org/
>
> iEYEARECAAYFAk3K/tgACgkQcxuYqjT7GRYETACfWbH1nWYgnIJjBc/ucbsbtArN
> SzgAn12V24jkJA/qsjxuIxeZgneVx4Ox
> =D0Di
> -----END PGP SIGNATURE-----

YAMAMOTO Takashi

unread,
May 12, 2011, 10:12:03 PM5/12/11
to
hi,

> In the very long run, we will be able to cache a reference to a
> vm_physseg on the fault handler stack, then pass (paddr_t, vm_page) to
> pmap. I agree that PHYS_TO_VM_PAGE is a bad idea in general.

i don't think it's relevant to this patch.
pvkey_decode is only used for P->V operations.

YAMAMOTO Takashi

>
> Masao

Lars Heidieker

unread,
May 15, 2011, 4:34:03 PM5/15/11
to
-----BEGIN PGP SIGNED MESSAGE-----
Hash: SHA1

Hi,

On 05/13/11 04:11, YAMAMOTO Takashi wrote:
> hi,


>
>> -----BEGIN PGP SIGNED MESSAGE-----
>> Hash: SHA1
>>
>> On 04/26/11 00:40, YAMAMOTO Takashi wrote:
>>> hi,
>>>
>>> the following patch reduces the size of pv_pte, thus pv_entry and vm_page.
>>> comments?
>>>
>>> YAMAMOTO Takashi
>>>
>>
>> Hi, that's interesting. It is cutting a pv_entry from 40 bytes down to
>> 32 bytes for 64 bit.
>> I've a concern about the runtime requirements because of the
>> PHYS_TO_VM_PAGE lookup which might be significant.
>

> if it's significant, it should be improved. :-)
>

yes obviously ;-) It's just something to keep an eye on. There is
already a alternative physseg_find method utilizing a binary search...

>> Do all amd64 machines have only a few physical memory segments?
>

> i don't think so.
> linux folks put amazing efforts to speed up their PHYS_TO_VM_PAGE
> equivalent for their big irons.
>

That's the point were different search strategies might become necessary.

> YAMAMOTO Takashi
>
>>
>> Lars
>

As you stated that only P -> V operations need pvkey_decode the
performance hit should be very low if measurable at all...
And that's what I found, at least on my amd64 quad-core with 8gig, the
overhead seems to be totally in the noise.
I'm fine with that and reducing the pv_pte size is definitely a win.

Lars


-----BEGIN PGP SIGNATURE-----
Version: GnuPG v2.0.17 (GNU/Linux)
Comment: Using GnuPG with Mozilla - http://enigmail.mozdev.org/

iEYEARECAAYFAk3QOLoACgkQcxuYqjT7GRbnrgCgsnZlv1QtxVThvpuQACAoGWaD
92cAniKVszUVB46QcIl7wTpjYlOM8/Xh
=52Uv
-----END PGP SIGNATURE-----

0 new messages