|
|
# Disabling Zynq caches
|
|
|
|
|
|
Disabling the cache may be an interesting option, at least during development, to force all load-store operations to cross the CPU boundary. When using the [AXI simple bridge], the [AXI bridge] or even the [SecBus HSM], it exposes much more memory accesses. There are probably other ways but this one works:
|
|
|
|
|
|
## Modifying the Linux kernel to disable the caches
|
|
|
|
|
|
In your `linux-xlnx` working copy create a new version of `arch/arm/configs/xilinx_zynq_defconfig` and add some parameters to it:
|
|
|
|
|
|
```bash
|
|
|
cp arch/arm/configs/xilinx_zynq_defconfig arch/arm/configs/xilinx_zynq_nocache
|
|
|
cat <<! >> arch/arm/configs/xilinx_zynq_nocache
|
|
|
CONFIG_RELOCATABLE=y
|
|
|
CONFIG_CPU_ICACHE_DISABLE=y
|
|
|
CONFIG_CPU_DCACHE_DISABLE=y
|
|
|
!
|
|
|
```
|
|
|
|
|
|
<!--
|
|
|
The Linux boot file `arch/arm/kernel/head.S` must be modified to allow for the data cache to be disabled (first make a copy the original version). The `__turn_mmu_on` procedure must be modified to:
|
|
|
|
|
|
```
|
|
|
ENTRY(__turn_mmu_on)
|
|
|
mov r0, r0
|
|
|
instr_sync
|
|
|
tst r4, #1
|
|
|
bne iflush
|
|
|
mrc p15, 0, r10, c0, c1, 5 @ read ID_MMFR1
|
|
|
tst r10, #0xf << 16 @ hierarchical cache (ARMv7)
|
|
|
mov r10, #0
|
|
|
beq hierarchical
|
|
|
mcr p15, 0, r10, c7, c14, 0 @ clean+invalidate D
|
|
|
b iflush
|
|
|
hierarchical:
|
|
|
mcr p15, 0, r10, c7, c10, 5 @ DMB
|
|
|
stmfd sp!, {r0-r7, r9-r11}
|
|
|
mrc p15, 1, r0, c0, c0, 1 @ read clidr
|
|
|
ands r3, r0, #0x7000000 @ extract loc from clidr
|
|
|
mov r3, r3, lsr #23 @ left align loc bit field
|
|
|
beq finished @ if loc is 0, then no need to c
|
|
|
mov r10, #0 @ start clean at cache level 0
|
|
|
loop1:
|
|
|
add r2, r10, r10, lsr #1 @ work out 3x current cache leve
|
|
|
mov r1, r0, lsr r2 @ extract cache type bits from c
|
|
|
and r1, r1, #7 @ mask of the bits for current c
|
|
|
cmp r1, #2 @ see what cache we have at this
|
|
|
blt skip @ skip if no cache, or just i-ca
|
|
|
mcr p15, 2, r10, c0, c0, 0 @ select current cache level in
|
|
|
mcr p15, 0, r10, c7, c5, 4 @ isb to sych the new cssr&csidr
|
|
|
mrc p15, 1, r1, c0, c0, 0 @ read the new csidr
|
|
|
and r2, r1, #7 @ extract the length of the cach
|
|
|
add r2, r2, #4 @ add 4 (line length offset)
|
|
|
ldr r4, =0x3ff
|
|
|
ands r4, r4, r1, lsr #3 @ find maximum number on the way
|
|
|
clz r5, r4 @ find bit position of way size
|
|
|
ldr r7, =0x7fff
|
|
|
ands r7, r7, r1, lsr #13 @ extract max number of the inde
|
|
|
loop2:
|
|
|
mov r9, r4 @ create working copy of max way
|
|
|
loop3:
|
|
|
ARM( orr r11, r10, r9, lsl r5 ) @ factor way and cache number
|
|
|
ARM( orr r11, r11, r7, lsl r2 ) @ factor index number into r11
|
|
|
THUMB( lsl r6, r9, r5 )
|
|
|
THUMB( orr r11, r10, r6 ) @ factor way and cache
|
|
|
THUMB( lsl r6, r7, r2 )
|
|
|
THUMB( orr r11, r11, r6 ) @ factor index number
|
|
|
mcr p15, 0, r11, c7, c14, 2 @ clean & invalidate by set/way
|
|
|
subs r9, r9, #1 @ decrement the way
|
|
|
bge loop3
|
|
|
subs r7, r7, #1 @ decrement the index
|
|
|
bge loop2
|
|
|
skip:
|
|
|
add r10, r10, #2 @ increment cache number
|
|
|
cmp r3, r10
|
|
|
bgt loop1
|
|
|
finished:
|
|
|
ldmfd sp!, {r0-r7, r9-r11}
|
|
|
mov r10, #0 @ swith back to cache level 0
|
|
|
mcr p15, 2, r10, c0, c0, 0 @ select current cache level in
|
|
|
iflush:
|
|
|
mcr p15, 0, r10, c7, c10, 4 @ DSB
|
|
|
mcr p15, 0, r10, c7, c5, 0 @ invalidate I+BTB
|
|
|
mcr p15, 0, r10, c7, c10, 4 @ DSB
|
|
|
mcr p15, 0, r10, c7, c5, 4 @ ISB
|
|
|
mcr p15, 0, r0, c1, c0, 0 @ write control reg
|
|
|
mrc p15, 0, r3, c0, c0, 0 @ read id reg
|
|
|
instr_sync
|
|
|
mov r3, r3
|
|
|
mov r3, r13
|
|
|
ret r3
|
|
|
__turn_mmu_on_end:
|
|
|
ENDPROC(__turn_mmu_on)
|
|
|
```
|
|
|
-->
|
|
|
|
|
|
Compile the Linux kernel:
|
|
|
|
|
|
```bash
|
|
|
$ export CROSS_COMPILE=arm-xilinx-linux-gnueabi-
|
|
|
$ make O=build-nocache ARCH=arm xilinx_zynq_nocache
|
|
|
$ make O=build-nocache ARCH=arm zImage
|
|
|
```
|
|
|
|
|
|
The kernel image is in `build-nocache/arch/arm/boot/zImage`.
|
|
|
|
|
|
## Removing the L2 cache from the device tree
|
|
|
|
|
|
The L2 cache must also be removed from the device tree by literally commenting out the following section from `zynq-7000.dtsi` device tree source file:
|
|
|
|
|
|
```
|
|
|
/*
|
|
|
L2: cache-controller@f8f02000 {
|
|
|
compatible = "arm,pl310-cache";
|
|
|
reg = <0xF8F02000 0x1000>;
|
|
|
arm,data-latency = <3 2 2>;
|
|
|
arm,tag-latency = <2 2 2>;
|
|
|
cache-unified;
|
|
|
cache-level = <2>;
|
|
|
};
|
|
|
*/
|
|
|
```
|
|
|
|
|
|
Rebuild the device tree blob:
|
|
|
|
|
|
```bash
|
|
|
dtc -I dts -O dtb -o devicetree.dtb system.dts
|
|
|
```
|
|
|
|
|
|
## Checking the CPU state
|
|
|
|
|
|
Once the Linux kernel has booted one can check the CPU state to verify whether the caches are in use or not. This can be done by looking at the content of the `SCTRL` CPU register (using, for instance, a debugger through the JTAG port). Bits 12 and 2 of the `SCTRL` CPU register indicate whether the instruction and data caches are enabled, respectively. If these two bits are cleared (0), then all instruction and data caches are disabled.
|
|
|
|
|
|
[AXI simple bridge]: axi-simple-bridge
|
|
|
[AXI bridge]: axi-bridge
|
|
|
[SecBus HSM]: hsm-as-a-bridge
|
|
|
|
|
|
<!-- vim: set tabstop=4 softtabstop=4 shiftwidth=4 noexpandtab textwidth=0: --> |