;
; Copyright (c) Microsoft Corporation.  All rights reserved.
;
;
; Use of this sample source code is subject to the terms of the Microsoft
; license agreement under which you licensed this sample source code. If
; you did not accept the terms of the license agreement, you are not
; authorized to use this sample source code. For the terms of the license,
; please see the license agreement between you and Microsoft or, if applicable,
; see the LICENSE.RTF on your install media or the root of your tools installation.
; THE SAMPLE SOURCE CODE IS PROVIDED "AS IS", WITH NO WARRANTIES.
;
;-------------------------------------------------------------------------------
;
;  File: flushdc.s 
;
;  This file implement OALFlushDCache function. This implementaion should
;  work on most ARM based SoC. Note that newer silicon support more effective
;  test, clean and invalidate DCache function.
;
        INCLUDE kxarm.h
        INCLUDE armmacros.s
        INCLUDE oal_cache.inc

        IMPORT g_oalCacheInfo

        TEXTAREA

;-------------------------------------------------------------------------------
;
;  Function:  OALFlushDCache (L1/L2 FLUSH)
;
  LEAF_ENTRY OALFlushDCache

        stmfd   sp!, {r4-r6, r7, r9-r11, lr}    ; store off registers to stack

        DCD     0xf57ff05f                      ; DMB (Data Memory Barrier)

        mrc     p15, 1, r0, c0, c0, 1           ; read clidr
        ands    r3, r0, #0x7000000  
        mov     r3, r3, lsr #23                 ; cache level value
        beq     donea               

        mov     r10, #0                         ; start clean at cache level 0
loop1a  add     r2, r10, r10, lsr #1            ; work out 3x current cache level
        mov     r1, r0, lsr r2                  ; extract cache type bits from clidr
        and     r1, r1, #7                      ; mask of the bits for current cache only
        cmp     r1, #2                          ; see what cache we have at this level
        blt     skipa                           ; skip if no cache, or just i-cache

        mcr     p15, 2, r10, c0, c0, 0          ; select current cache level in cssr
        mov     r1, #0
        ;mcr     p15, 0, r1, c7, c5, 4          ; prefetch flush to sync the change to the cachesize id reg
        DCD     0xf57ff06f                      ; ISB (Instruction Synchronization Barrier)
        mrc     p15, 1, r1, c0, c0, 0           ; read the new csidr
        and     r2, r1, #7                      ; extract the length of the cache lines
        add     r2, r2, #4                      ; add 4 (line length offset)
        ldr     r4, =0x3ff
        ands    r4, r4, r1, lsr #3              ; r4 is maximum number on the way size
        clz     r5, r4                          ; r5 find bit position of way size increment
        ldr     r7, =0x7fff
        ands    r7, r7, r1, lsr #13             ; r7 extract max number of the index size

loop2a  mov     r9, r4                          ; r9 is working copy of max way size
loop3a  orr     r11, r10, r9, lsl r5            ; factor way and cache number into r11
        orr     r11, r11, r7, lsl r2            ; factor index number into r11

        mcr     p15, 0, r11, c7, c14, 2         ; clean and invalidate by set/way

        subs    r9, r9, #1                      ; decrement the way
        bge     loop3a

        subs    r7, r7, #1                      ; decrement the index
        bge     loop2a
        
	;check if L2 cache enable.
	mov	r5, #0x0
	mrc	p15, 0, r5, c1, c0, 1		; read Auxiliary Control Register	
	and	r6, r5, #2			; [1] bit indicates L2EN.
	cmp	r6, #2
	bne	donea				; done if L2 cache disable.

	
skipa   add     r10, r10, #2                        ; increment cache number
        cmp     r3, r10
        bgt     loop1a

donea   mov     r10, #0                             ; swith back to cache level 0
        mcr     p15, 2, r10, c0, c0, 0              ; select current cache level in cssr

        DCD     0xf57ff04f                      ; DSB (Data Synchronization Barrier)
        DCD     0xf57ff06f                      ; ISB (Instruction Synchronization Barrier        

        ldmfd   sp!, {r4-r6, r7, r9-r11, lr}    ; restore registers

        RETURN


  LEAF_ENTRY OALFlushL2Cache

        stmfd   sp!, {r4-r6, r7, r9-r11, lr}    ; store off registers to stack

	; check if L2 cache enable.
	mov	r0, #0x0
	mrc	p15, 0, r0, c1, c0, 1		; read Auxiliary Control Register	
	and	r1, r0, #2			; [1] bit indicates L2EN.
	cmp	r1, #2
	bne	doneb				; done if L2 cache disable.             

	; check if L2 is unified cache.
	mrc     p15, 1, r2, c0, c0, 1		; read Cache Level ID Register
	mov	r3, r2, lsr #3			; [5:3] CL2 - 3'b000=no cache at CL2, 3'b100=unified cache at CL2
	and	r3, r3, #7
	cmp	r3, #4
	bne 	doneb				; done if L2 cache is not unified cache.            

        DCD     0xf57ff05f                      ; DMB (Data Memory Barrier)
    
	; select Cache Level 2
	mov	r10, #2				; [3:1] Cache level selected 0=L1, 1=L2, [0] 1=instruction, 0=data, unified
        mcr     p15, 2, r10, c0, c0, 0          ; select current cache level in cssr
        mov     r1, #0
        ;mcr     p15, 0, r1, c7, c5, 4          ; prefetch flush to sync the change to the cachesize id reg
        DCD     0xf57ff06f                      ; ISB (Instruction Synchronization Barrier)

	; cache size identification
        mrc     p15, 1, r1, c0, c0, 0           ; read the new csidr
        and     r2, r1, #7                      ; extract the length of the cache lines
        add     r2, r2, #4                      ; add 4 (line length offset)
        ldr     r4, =0x3ff
        ands    r4, r4, r1, lsr #3              ; r4 is maximum number on the way size
        clz     r5, r4                          ; r5 find bit position of way size increment
        ldr     r7, =0x7fff
        ands    r7, r7, r1, lsr #13             ; r7 extract max number of the index size


loop2b  mov     r9, r4                          ; r9 is working copy of max way size
loop3b  orr     r11, r10, r9, lsl r5            ; factor way and cache number into r11
        orr     r11, r11, r7, lsl r2            ; factor index number into r11

        mcr     p15, 0, r11, c7, c14, 2         ; clean and invalidate by set/way

        subs    r9, r9, #1                      ; decrement the way
        bge     loop3b

        subs    r7, r7, #1                      ; decrement the index
        bge     loop2b


doneb   mov     r10, #0                             ; swith back to cache level 0
        mcr     p15, 2, r10, c0, c0, 0              ; select current cache level in cssr

        DCD     0xf57ff04f                      ; DSB (Data Synchronization Barrier)
        DCD     0xf57ff06f                      ; ISB (Instruction Synchronization Barrier        
        
        ldmfd   sp!, {r4-r6, r7, r9-r11, lr}    ; restore registers

        RETURN

  LEAF_ENTRY OALFlushL1DCache

        stmfd   sp!, {r4-r6, r7, r9-r11, lr}    ; store off registers to stack

        DCD     0xf57ff05f                      ; DMB (Data Memory Barrier)

        mrc     p15, 1, r0, c0, c0, 1           ; read clidr
        ands    r3, r0, #0x7000000  
        mov     r3, r3, lsr #23                 ; cache level value
        beq     donec               

        mov     r10, #0                         ; start clean at cache level 0
loop1c  add     r2, r10, r10, lsr #1            ; work out 3x current cache level
        mov     r1, r0, lsr r2                  ; extract cache type bits from clidr
        and     r1, r1, #7                      ; mask of the bits for current cache only
        cmp     r1, #2                          ; see what cache we have at this level
        blt     donec                           ; skip if no cache, or just i-cache

        mcr     p15, 2, r10, c0, c0, 0          ; select current cache level in cssr
        mov     r1, #0
        ;mcr     p15, 0, r1, c7, c5, 4          ; prefetch flush to sync the change to the cachesize id reg
        DCD     0xf57ff06f                      ; ISB (Instruction Synchronization Barrier)
        mrc     p15, 1, r1, c0, c0, 0           ; read the new csidr
        and     r2, r1, #7                      ; extract the length of the cache lines
        add     r2, r2, #4                      ; add 4 (line length offset)
        ldr     r4, =0x3ff
        ands    r4, r4, r1, lsr #3              ; r4 is maximum number on the way size
        clz     r5, r4                          ; r5 find bit position of way size increment
        ldr     r7, =0x7fff
        ands    r7, r7, r1, lsr #13             ; r7 extract max number of the index size

loop2c  mov     r9, r4                          ; r9 is working copy of max way size
loop3c  orr     r11, r10, r9, lsl r5            ; factor way and cache number into r11
        orr     r11, r11, r7, lsl r2            ; factor index number into r11

        mcr     p15, 0, r11, c7, c14, 2         ; clean and invalidate by set/way

        subs    r9, r9, #1                      ; decrement the way
        bge     loop3c

        subs    r7, r7, #1                      ; decrement the index
        bge     loop2c
        
donec   mov     r10, #0                             ; swith back to cache level 0
        mcr     p15, 2, r10, c0, c0, 0              ; select current cache level in cssr

        DCD     0xf57ff04f                      ; DSB (Data Synchronization Barrier)
        DCD     0xf57ff06f                      ; ISB (Instruction Synchronization Barrier        

        ldmfd   sp!, {r4-r6, r7, r9-r11, lr}    ; restore registers

        RETURN
        END
