Google Groups no longer supports new Usenet posts or subscriptions. Historical content remains viewable.
Dismiss

armv7 and unified TLBs

25 views
Skip to first unread message

Mark Kettenis

unread,
Aug 1, 2016, 11:11:32 AM8/1/16
to
So the ARMv7 ARM says in B4.2.2:

- on an implementation with separate data and instruction TLBs, any
unified TLB operation operates on both TLBs

- on an implementation with a unified TLB, any instruction TLB
operation, and any data TLB operation, operates on the unified TLB

- ARM deprecates use of instruction TLB operations and data TLB
operations, and recommends that software always uses the unified
TLB operations.

It seems that All the Cortex-A CPUs, with the exception of the
Cortex-A8 have a unified TLB. Since the non-unified TLB operations
are deprecated, and using the unified TLB operations leads to some
code simplifications, I think it makes sense to switch armv7 to these.
This might be a slight pessimisation on Cortex-A8, although I'm not
sure it will be noticable.

Thoughts?

Tested on Cortex-A9. Would be nice if somebody could test this on
Cortex-A8.


Index: arch/arm/arm/cpufunc_asm_armv7.S
===================================================================
RCS file: /cvs/src/sys/arch/arm/arm/cpufunc_asm_armv7.S,v
retrieving revision 1.10
diff -u -p -r1.10 cpufunc_asm_armv7.S
--- arch/arm/arm/cpufunc_asm_armv7.S 25 Apr 2016 04:46:56 -0000 1.10
+++ arch/arm/arm/cpufunc_asm_armv7.S 1 Aug 2016 14:34:36 -0000
@@ -55,15 +55,14 @@ ENTRY(armv7_setttb)
* TLB functions
*/
ENTRY(armv7_tlb_flushID_SE)
- mcr CP15_DTLBIMVA /* flush D tlb single entry */
- mcr CP15_ITLBIMVA /* flush I tlb single entry */
+ mcr CP15_TLBIMVA(r0) /* flush unified tlb single entry */
mcr CP15_BPIMVA /* flush va from BP */
dsb sy
isb sy
mov pc, lr

ENTRY(armv7_tlb_flushI_SE)
- mcr CP15_ITLBIMVA /* flush I tlb single entry */
+ mcr CP15_TLBIMVA(r0) /* flush unified tlb single entry */
mcr CP15_BPIMVA /* flush va from BP */
dsb sy
isb sy
@@ -73,27 +72,27 @@ ENTRY(armv7_tlb_flushI_SE)
* TLB functions
*/
ENTRY(armv7_tlb_flushID)
- mcr CP15_TLBIALL(r0) /* flush I+D tlb */
+ mcr CP15_TLBIALL(r0) /* flush unified tlb */
mcr CP15_BPIALL /* Flush BP cache */
dsb sy
isb sy
mov pc, lr

ENTRY(armv7_tlb_flushI)
- mcr CP15_ITLBIALL /* flush I tlb */
+ mcr CP15_TLBIALL(r0) /* flush unified tlb */
mcr CP15_BPIALL /* Flush BP cache */
dsb sy
isb sy
mov pc, lr

ENTRY(armv7_tlb_flushD)
- mcr CP15_DTLBIALL /* flush D tlb */
+ mcr CP15_TLBIALL(r0) /* flush unified tlb */
dsb sy
isb sy
mov pc, lr

ENTRY(armv7_tlb_flushD_SE)
- mcr CP15_DTLBIMVA /* flush D tlb single entry */
+ mcr CP15_TLBIMVA(r0) /* flush unified tlb single entry */
dsb sy
isb sy
mov pc, lr
Index: arch/arm/arm/pmap7.c
===================================================================
RCS file: /cvs/src/sys/arch/arm/arm/pmap7.c,v
retrieving revision 1.31
diff -u -p -r1.31 pmap7.c
--- arch/arm/arm/pmap7.c 31 Jul 2016 22:27:07 -0000 1.31
+++ arch/arm/arm/pmap7.c 1 Aug 2016 14:34:37 -0000
@@ -1028,12 +1028,8 @@ pmap_clearbit(struct vm_page *pg, u_int
*ptep = npte;
PTE_SYNC(ptep);
/* Flush the TLB entry if a current pmap. */
- if (l2pte_valid(opte)) {
- if (PV_BEEN_EXECD(oflags))
- pmap_tlb_flushID_SE(pm, pv->pv_va);
- else
- pmap_tlb_flushD_SE(pm, pv->pv_va);
- }
+ if (l2pte_valid(opte))
+ pmap_tlb_flushID_SE(pm, pv->pv_va);
}

NPDEBUG(PDB_BITS,
@@ -1281,7 +1277,6 @@ pmap_enter(pmap_t pm, vaddr_t va, paddr_
ptep = &l2b->l2b_kva[l2pte_index(va)];
opte = *ptep;
npte = pa;
- oflags = 0;

if (opte != 0) { /* not l2pte_valid!!! MIOD */
/*
@@ -1367,7 +1362,6 @@ pmap_enter(pmap_t pm, vaddr_t va, paddr_
* must remove it from the PV list
*/
pve = pmap_remove_pv(opg, pm, va);
- oflags = pve->pv_flags;
} else
if ((pve = pool_get(&pmap_pv_pool, PR_NOWAIT)) == NULL){
if ((flags & PMAP_CANFAIL) == 0)
@@ -1397,8 +1391,6 @@ pmap_enter(pmap_t pm, vaddr_t va, paddr_
* at this address.
*/
pve = pmap_remove_pv(opg, pm, va);
- oflags = pve->pv_flags;
-
pool_put(&pmap_pv_pool, pve);
}
}
@@ -1449,12 +1441,8 @@ pmap_enter(pmap_t pm, vaddr_t va, paddr_
}
}

- if (l2pte_valid(opte)) {
- if (PV_BEEN_EXECD(oflags))
- pmap_tlb_flushID_SE(pm, va);
- else
- pmap_tlb_flushD_SE(pm, va);
- }
+ if (l2pte_valid(opte))
+ pmap_tlb_flushID_SE(pm, va);
}

/*
@@ -1480,7 +1468,7 @@ pmap_remove(pmap_t pm, vaddr_t sva, vadd
struct l2_bucket *l2b;
vaddr_t next_bucket;
pt_entry_t *ptep;
- u_int mappings, is_exec;
+ u_int mappings;

NPDEBUG(PDB_REMOVE, printf("pmap_remove: pmap=%p sva=%08lx eva=%08lx\n",
pm, sva, eva));
@@ -1520,7 +1508,6 @@ pmap_remove(pmap_t pm, vaddr_t sva, vadd

pm->pm_stats.resident_count--;
pa = l2pte_pa(pte);
- is_exec = 0;

/*
* Update flags. In a number of circumstances,
@@ -1531,10 +1518,8 @@ pmap_remove(pmap_t pm, vaddr_t sva, vadd
if (pg != NULL) {
struct pv_entry *pve;
pve = pmap_remove_pv(pg, pm, sva);
- if (pve != NULL) {
- is_exec = PV_BEEN_EXECD(pve->pv_flags);
+ if (pve != NULL)
pool_put(&pmap_pv_pool, pve);
- }
}

/*
@@ -1547,12 +1532,8 @@ pmap_remove(pmap_t pm, vaddr_t sva, vadd

*ptep = L2_TYPE_INV;
PTE_SYNC(ptep);
- if (l2pte_valid(pte)) {
- if (is_exec)
- pmap_tlb_flushID_SE(pm, sva);
- else
- pmap_tlb_flushD_SE(pm, sva);
- }
+ if (l2pte_valid(pte))
+ pmap_tlb_flushID_SE(pm, sva);

sva += PAGE_SIZE;
ptep++;
@@ -1801,12 +1782,8 @@ NPDEBUG(PDB_PROTECT, printf("\n"));

if (flush >= 0) {
flush++;
- if (l2pte_valid(opte)) {
- if (PV_BEEN_EXECD(f))
- cpu_tlb_flushID_SE(sva);
- else
- cpu_tlb_flushD_SE(sva);
- }
+ if (l2pte_valid(opte))
+ cpu_tlb_flushID_SE(sva);
} else
flags |= f;
}
@@ -1816,12 +1793,9 @@ NPDEBUG(PDB_PROTECT, printf("\n"));
}
}

- if (flush < 0) {
- if (PV_BEEN_EXECD(flags))
- pmap_tlb_flushID(pm);
- else
- pmap_tlb_flushD(pm);
- }
+ if (flush < 0)
+ pmap_tlb_flushID(pm);
+
NPDEBUG(PDB_PROTECT, printf("\n"));
}


Mark Kettenis

unread,
Aug 2, 2016, 7:44:05 AM8/2/16
to
> Date: Tue, 2 Aug 2016 02:02:57 +1000
> From: Jonathan Gray <j...@jsg.id.au>

Adding back tech@ just in case a knwoledgable person there wants to
chime in...

> On Mon, Aug 01, 2016 at 05:10:48PM +0200, Mark Kettenis wrote:
> > So the ARMv7 ARM says in B4.2.2:
> >
> > - on an implementation with separate data and instruction TLBs, any
> > unified TLB operation operates on both TLBs
> >
> > - on an implementation with a unified TLB, any instruction TLB
> > operation, and any data TLB operation, operates on the unified TLB
> >
> > - ARM deprecates use of instruction TLB operations and data TLB
> > operations, and recommends that software always uses the unified
> > TLB operations.
> >
> > It seems that All the Cortex-A CPUs, with the exception of the
> > Cortex-A8 have a unified TLB. Since the non-unified TLB operations
> > are deprecated, and using the unified TLB operations leads to some
> > code simplifications, I think it makes sense to switch armv7 to these.
> > This might be a slight pessimisation on Cortex-A8, although I'm not
> > sure it will be noticable.
> >
> > Thoughts?
> >
> > Tested on Cortex-A9. Would be nice if somebody could test this on
> > Cortex-A8.
>
> We could just drop the I and D specific functions like FreeBSD did?
>
> /*
> * TLB functions. ARMv7 does all TLB ops based on a unified TLB model
> * whether the hardware implements separate I+D or not, so we use the
> * same 'ID' functions for all 3 variations.
> */
>
> armv7_tlb_flushID, /* tlb_flushID */
> armv7_tlb_flushID_SE, /* tlb_flushID_SE */
> armv7_tlb_flushID, /* tlb_flushD */
> armv7_tlb_flushID_SE, /* tlb_flushD_SE */
>
> https://lists.freebsd.org/pipermail/freebsd-arm/2014-March/007849.html

I think it makes sense to keep the distinction betwwen the ID and D
variants for now. The ID variants also flush the branch predictor
wheras the D variants don't.

That raises some question about my simplifications of the armv7 pmap.
Those replace the conditional executation of ID or D variants with
unconditional executaion of D variants. I think that that is still
worth doing. In theory there is some additional overhead for flushing
the branch prediction. But the elemination of the branch instructions
will at least partly compensate for that. And on most, if not all,
Cortex CPUs flushing the branch predictor isn't necessary. So on
those we can simply change the function pointers to always use the D
variants. I'll do that in a fllowup diff.

So, as a first step I'd like to commit the diff below.

ok?


Index: cpufunc_asm_armv7.S
===================================================================
RCS file: /cvs/src/sys/arch/arm/arm/cpufunc_asm_armv7.S,v
retrieving revision 1.10
diff -u -p -r1.10 cpufunc_asm_armv7.S
--- cpufunc_asm_armv7.S 25 Apr 2016 04:46:56 -0000 1.10
+++ cpufunc_asm_armv7.S 2 Aug 2016 11:26:38 -0000
@@ -45,7 +45,7 @@ ENTRY(armv7_setttb)
isb sy

mcr CP15_TTBR0(r0) /* load new TTB */
- mcr CP15_TLBIALL(r0) /* invalidate I+D TLBs */
+ mcr CP15_TLBIALL(r0) /* invalidate unified TLB */
dsb sy
isb sy

@@ -55,45 +55,27 @@ ENTRY(armv7_setttb)
* TLB functions
*/
ENTRY(armv7_tlb_flushID_SE)
- mcr CP15_DTLBIMVA /* flush D tlb single entry */
- mcr CP15_ITLBIMVA /* flush I tlb single entry */
+ mcr CP15_TLBIMVA(r0) /* flush unified tlb single entry */
mcr CP15_BPIMVA /* flush va from BP */
dsb sy
isb sy
mov pc, lr

-ENTRY(armv7_tlb_flushI_SE)
- mcr CP15_ITLBIMVA /* flush I tlb single entry */
- mcr CP15_BPIMVA /* flush va from BP */
- dsb sy
- isb sy
- mov pc, lr
-
-/*
- * TLB functions
- */
ENTRY(armv7_tlb_flushID)
- mcr CP15_TLBIALL(r0) /* flush I+D tlb */
+ mcr CP15_TLBIALL(r0) /* flush unified tlb */
mcr CP15_BPIALL /* Flush BP cache */
dsb sy
isb sy
mov pc, lr

-ENTRY(armv7_tlb_flushI)
- mcr CP15_ITLBIALL /* flush I tlb */
- mcr CP15_BPIALL /* Flush BP cache */
+ENTRY(armv7_tlb_flushD_SE)
+ mcr CP15_TLBIMVA(r0) /* flush unified tlb single entry */
dsb sy
isb sy
mov pc, lr

ENTRY(armv7_tlb_flushD)
- mcr CP15_DTLBIALL /* flush D tlb */
- dsb sy
- isb sy
- mov pc, lr
-
-ENTRY(armv7_tlb_flushD_SE)
- mcr CP15_DTLBIMVA /* flush D tlb single entry */
+ mcr CP15_TLBIALL(r0) /* flush unified tlb */
dsb sy
isb sy
mov pc, lr
@@ -251,7 +233,7 @@ ENTRY(armv7_context_switch)
isb sy

mcr CP15_TTBR0(r0) /* set the new TTB */
- mcr CP15_TLBIALL(r0) /* and flush the I+D tlbs */
+ mcr CP15_TLBIALL(r0) /* and flush the unified tlb */

Jonathan Gray

unread,
Aug 2, 2016, 8:34:50 PM8/2/16
to
You seem to be missing a cpufunc.c diff, I think this is
what you meant to send?

In which case ok jsg@

Index: cpufunc.c
===================================================================
RCS file: /cvs/src/sys/arch/arm/arm/cpufunc.c,v
retrieving revision 1.42
diff -u -p -r1.42 cpufunc.c
--- cpufunc.c 31 Jul 2016 03:49:51 -0000 1.42
+++ cpufunc.c 2 Aug 2016 12:09:04 -0000
@@ -109,8 +109,8 @@ struct cpu_functions armv7_cpufuncs = {

armv7_tlb_flushID, /* tlb_flushID */
armv7_tlb_flushID_SE, /* tlb_flushID_SE */
- armv7_tlb_flushI, /* tlb_flushI */
- armv7_tlb_flushI_SE, /* tlb_flushI_SE */
+ armv7_tlb_flushID, /* tlb_flushI */
+ armv7_tlb_flushID_SE, /* tlb_flushI_SE */
armv7_tlb_flushD, /* tlb_flushD */
armv7_tlb_flushD_SE, /* tlb_flushD_SE */

Index: cpufunc_asm_armv7.S
===================================================================
RCS file: /cvs/src/sys/arch/arm/arm/cpufunc_asm_armv7.S,v
retrieving revision 1.10
diff -u -p -r1.10 cpufunc_asm_armv7.S
--- cpufunc_asm_armv7.S 25 Apr 2016 04:46:56 -0000 1.10
+++ cpufunc_asm_armv7.S 2 Aug 2016 12:09:04 -0000

Mark Kettenis

unread,
Aug 3, 2016, 3:06:18 AM8/3/16
to
> Date: Wed, 3 Aug 2016 10:34:08 +1000
Yeah, that's what I have in my tree. Sorry. Thanks. Committed.
0 new messages