based on reading the uboot, linux & plan 9 code for handling the l2 cache.
http://code.google.com/p/inferno-kirkwood/source/detail?r=4c44c358a8
Added:
/doc/l2cache.txt
Modified:
/io.h
/l.s
/main.c
/mem.h
=======================================
--- /dev/null
+++ /doc/l2cache.txt Tue Mar 16 15:41:07 2010
@@ -0,0 +1,48 @@
+some information about the l2 cache. there are no public docs.
+
+the following has been gathered from linux, verified against uboot
+& plan 9 code.
+
+we've taken a shortcut: we only use the l2 cache in write through
+mode, so we do not have to undirty it separately from the dcache.
+
+when enabling/disabling the l2 cache, the dcache & icache must be off.
+
+the instructions manipulating the l2 cache take physical addresses
+(actually, that seems to depend on the last argument).
+note that the cacheline is also 32 bytes.
+
+the l2 cache can be turned on/off (and modes set) by instruction:
+
+mcr p15, 1, r0, c15, c1, 0 // get
+mrc p15, 1, r0, c15, c1, 0 // set
+
+meaning of bits:
+l2 enable = 1<<22,
+l2 prefetch disable = 1<<24
+l2 write allocate enable= 1<<28
+l2 cpu streaming enable = 1<<29
+
+
+the instructions to clean and invalidate dirty pages:
+
+mcr p15, 1, r0, c15, c9, 3 // clean
+mcr p15, 1, r0, c15, c10, 3 // clean & invalidate
+mcr p15, 1, r0, c15, c11, 3 // invalidate address
+mcr p15, 1, r0, c15, c11, 0 // invalidate entire l2 cache
+
+the instructions below clean & invalidate a range of memory. first
+the begin address is written. then the end address in a separate
+instruction that only "returns" when the operation is done. the
+end address is inclusive: it is cleaned/invalidated too. i don't
+know the behaviour when the lower bits are not zero, so better keep
+them zero.
+
+mcr p15, 1, r0, c15, c9, 4 // clean range start
+mcr p15, 1, r0, c15, c9, 5 // clean range end (inclusive), go
+
+mcr p15, 1, r0, c15, c11, 4 // invalidate range start
+mcr p15, 1, r0, c15, c11, 5 // invalidate range end (inclusive), go
+
+perhaps there is a clean & invalidate range instruction too. i
+haven't seen it used, but also haven't tested for it.
=======================================
--- /io.h Mon Mar 15 06:57:05 2010
+++ /io.h Tue Mar 16 15:41:07 2010
@@ -114,7 +114,7 @@
/* CpucsReg.l2cfg, cpu l2 cfg */
L2ecc = 1<<2,
- L2enable = 1<<3,
+ L2exists = 1<<3,
L2wtmode = 1<<4,
Pexenable = 1<<0,
=======================================
--- /l.s Tue Mar 16 10:55:21 2010
+++ /l.s Tue Mar 16 15:41:07 2010
@@ -292,7 +292,9 @@
/*
-"write back" (including draining write buffer) and invalidating.
+ * "write back" (including draining write buffer) and invalidating.
+ * we use the l2 cache in write-through mode, so writing back the dcache
writes back the l2 cache.
+ * we do need to invalidate the l2 cache explicitly.
*/
TEXT icinvall(SB), $-4
@@ -315,10 +317,10 @@
RET
-#define DRAINWB MOVW $0, R2; \
- MCR CpMMU, 0, R2, C7, C10, 4
-
-/* arm926ej-s' special test,clean,invalidate instruction does not seem to
work. walk through each way for each set. */
+/*
+ * arm926ej-s' special test,clean,invalidate instruction (with loop) does
not seem to work.
+ * walk through each way for each set.
+ */
TEXT dcwball(SB), $-4
dcwball0:
MOVW $(127<<5), R1 /* start at set 128 */
@@ -330,13 +332,18 @@
BCS wbway
SUB.S $(1<<5), R1 /* flag C for no borrow: another set */
BCS wbset
- DRAINWB
+
+ /* drain write buffer */
+ MOVW $0, R0
+ MCR CpMMU, 0, R0, C7, C10, 4
+
RET
TEXT dcwb(SB), $-4
MOVW 4(FP), R1
CMP $(CACHESIZE), R1
BCS dcwball0
+
ADD R0, R1
BIC $(CACHELINESIZE-1), R0
dcwb0:
@@ -344,11 +351,21 @@
ADD $CACHELINESIZE, R0
CMP R1, R0
BLO dcwb0
- DRAINWB
+
+ /* drain write buffer */
+ MOVW $0, R0
+ MCR CpMMU, 0, R0, C7, C10, 4
+
RET
+/* clean & invalidate entire dcache & l2 cache */
TEXT dcwbinvall(SB), $-4
dcwbinvall0:
+ /* disable fiq/irq */
+ MOVW CPSR, R2
+ ORR $(PsrDfiq|PsrDirq), R2, R3
+ MOVW R3, CPSR
+
MOVW $(127<<5), R1 /* start at set 128 */
wbinvset:
ORR $(3<<30), R1, R0 /* start at way 4 */
@@ -358,34 +375,66 @@
BCS wbinvway
SUB.S $(1<<5), R1 /* flag C for no borrow: another set */
BCS wbinvset
- DRAINWB
+
+ /* drain write buffer */
+ MOVW $0, R0
+ MCR CpMMU, 0, R0, C7, C10, 4
+
+ MCR CpMMU, 1, R0, C15, C11, 0 /* invalidate entire l2 cache, fine since
we use l2 in write-through mode. */
+ MOVW R2, CPSR /* restore fiq/irq state */
RET
+/*
+ * clean & invalidate address range in both dcache & l2 cache
+ * don't partially flush more than half the l2 cache. dcwbinvall flushes
dcache with fewer instructions, and l2 cache with one.
+ */
TEXT dcwbinv(SB), $-4
MOVW 4(FP), R1
- CMP $(CACHESIZE), R1
+ CMP $(L2CACHESIZE/2), R1
BCS dcwbinvall0
+
ADD R0, R1
BIC $(CACHELINESIZE-1), R0
+ MOVW R0, R3 /* keep copy of start address */
+
+ /* disable fiq/irq */
+ MOVW CPSR, R2
+ ORR $(PsrDfiq|PsrDirq), R2, R4
+ MOVW R4, CPSR
+
dcwbinv0:
- MCR CpMMU, 0, R0, C7, C14, 1
+ MCR CpMMU, 0, R0, C7, C14, 1 /* clean & invalidate address */
ADD $CACHELINESIZE, R0
CMP R1, R0
BLO dcwbinv0
- DRAINWB
+
+ /* drain write buffer */
+ MOVW $0, R0
+ MCR CpMMU, 0, R0, C7, C10, 4
+
+ /* note: write-through l2 cache means lines are clean in l2 too */
+ BIC $(CACHELINESIZE-1), R1 /* round down end address */
+ MCR CpMMU, 1, R3, C15, C11, 4 /* start addr */
+ MCR CpMMU, 1, R1, C15, C11, 5 /* end addr, inclusive. go. */
+
+ MOVW R2, CPSR /* restore fiq/irq state */
RET
+/* invalidate entire dcache & l2 cache */
TEXT dcinvall(SB), $-4
MOVW $0, R0
- MCR CpMMU, 0, R0, C7, C6, 0
+ MCR CpMMU, 0, R0, C7, C6, 0 /* invalidate dcache */
+ MCR CpMMU, 1, R0, C15, C11, 0 /* invalidate l2 cache */
RET
+/* invalidate range in both dcache & l2 cache */
TEXT dcinv(SB), $-4
MOVW 4(FP), R1
ADD R0, R1
BIC $(CACHELINESIZE-1), R0
dcinv0:
- MCR CpMMU, 0, R0, C7, C6, 1
+ MCR CpMMU, 0, R0, C7, C6, 1 /* invalidate dcache line */
+ MCR CpMMU, 1, R0, C15, C11, 3 /* invalidate l2 cache line */
ADD $CACHELINESIZE, R0
CMP R1, R0
BLO dcinv0
=======================================
--- /main.c Tue Mar 16 10:55:21 2010
+++ /main.c Tue Mar 16 15:41:07 2010
@@ -126,7 +126,7 @@
ulong v;
v = CPUCSREG->l2cfg;
- print("l2: %s, ecc %s, mode %s\n", (v&L2enable) ? "on" : "off",
(v&L2ecc) ? "on" : "off", (v & L2wtmode) ? "writethrough" : "writeback");
+ print("l2: %s, ecc %s, mode %s\n", (v&L2exists) ? "on" : "off",
(v&L2ecc) ? "on" : "off", (v & L2wtmode) ? "writethrough" : "writeback");
if(0) {
int i;
@@ -188,6 +188,10 @@
Cacheable = 1<<3,
Bufferable = 1<<2,
Sectiondescr = (1<<4)|(2<<0),
+
+ /* "marvell extra features" register, for l2 cache */
+ ML2noprefetch = 1<<24,
+ ML2enable = 1<<22,
};
static void
cacheprint(void)
@@ -215,7 +219,7 @@
mmuinit(void)
{
ulong *p;
- ulong i;
+ ulong i, v;
p = xspanalloc(16*1024, 16*1024, 0);
if(p == nil)
@@ -233,22 +237,29 @@
p[Regbase>>20] = (Regbase&~MASK(20))|AP|Sectiondescr;
p[AddrPhyNand>>20] = (AddrPhyNand&~MASK(20))|AP|Sectiondescr;
- /* enable mmu & l1 caches */
+ /* enable mmu, l1 cache & l2 cache */
ttbput((ulong)p); /* translation table base address */
dacput(Manager<<0); /* we only use dom 0, all accesses allowed */
fcsepidput(0); /* pid used in mva (modified va). always 0 for us. */
+ tlbclear();
+
dclockdownput(0xfff<<4); /* bits 3..0 set the locked Ways */
+ icinvall();
dcinvall();
- tlbclear();
+ cpctlput(cpctlget()&~(Icacheena|Dcacheena));
+ CPUCSREG->l2cfg |= L2exists|L2wtmode;
+ v = CPUCSREG->l2cfg;
+ USED(v);
+ mvfeatset(mvfeatget()|ML2noprefetch|ML2enable);
+
/* xxx should set the 8 locked down tlb entries. for performance, but
also because they now may contain bad entries. */
cpctlput(cpctlget()|MMUena|Icacheena|Dcacheena|Alignfault);
}
-extern ulong flierp(ulong);
void
main(void)
{
- CPUCSREG->l2cfg &= ~L2enable;
+ CPUCSREG->l2cfg &= ~L2exists;
/* invalidate & enable l1 icache */
iclockdownput(0xfff<<4); /* bits 3..0 set the locked Ways */
=======================================
--- /mem.h Mon Mar 15 06:57:05 2010
+++ /mem.h Tue Mar 16 15:41:07 2010
@@ -96,4 +96,5 @@
#define CpCl4 0x00008000 /* L4: set T bit on PC loads */
#define CACHESIZE 4096
+#define L2CACHESIZE 262144
#define CACHELINESIZE 32