gte_arm: implement RTPS, RTPT
[pcsx_rearmed.git] / libpcsxcore / new_dynarec / linkage_arm.s
index 8744608..f5af0f5 100644 (file)
@@ -1,6 +1,6 @@
 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
  *   linkage_arm.s for PCSX                                                *
- *   Copyright (C) 2009-2010 Ari64                                         *
+ *   Copyright (C) 2009-2011 Ari64                                         *
  *   Copyright (C) 2010-2011 GraÅžvydas "notaz" Ignotas                     *
  *                                                                         *
  *   This program is free software; you can redistribute it and/or modify  *
  *   51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.          *
  * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
 
-.equiv HAVE_ARMV7, 1
+/* .equiv HAVE_ARMV7, 1 */
 
-.if HAVE_ARMV7
-       .cpu cortex-a8
-       .fpu vfp
-.else
-       .cpu arm9tdmi
-       .fpu softvfp
-.endif 
        .global rdram
 rdram = 0x80000000
        .global dynarec_local
@@ -62,6 +55,8 @@ rdram = 0x80000000
        .global psxRegs
        .global nd_pcsx_io
        .global psxH_ptr
+       .global inv_code_start
+       .global inv_code_end
 
        .bss
        .align  4
@@ -186,10 +181,16 @@ nd_pcsx_io_end = spu_writef + 4
 psxH_ptr = nd_pcsx_io_end
        .type   psxH_ptr, %object
        .size   psxH_ptr, 4
-align0 = psxH_ptr + 4 /* just for alignment */
+inv_code_start = psxH_ptr + 4
+       .type   inv_code_start, %object
+       .size   inv_code_start, 4
+inv_code_end = inv_code_start + 4
+       .type   inv_code_end, %object
+       .size   inv_code_end, 4
+align0 = inv_code_end + 4 /* just for alignment */
        .type   align0, %object
-       .size   align0, 4
-branch_target = align0 + 4
+       .size   align0, 12
+branch_target = align0 + 12
        .type   branch_target, %object
        .size   branch_target, 4
 mini_ht = branch_target + 4
@@ -203,52 +204,66 @@ memory_map = restore_candidate + 512
        .size   memory_map, 4194304
 dynarec_local_end = memory_map + 4194304
 
-       .text
-       .align  2
-       .global dyna_linker
-       .type   dyna_linker, %function
-dyna_linker:
+.macro load_var_adr reg var
+.if HAVE_ARMV7
+       movw    \reg, #:lower16:\var
+       movt    \reg, #:upper16:\var
+.else
+       ldr     \reg, =\var
+.endif
+.endm
+
+.macro dyna_linker_main
        /* r0 = virtual target address */
        /* r1 = instruction to patch */
-       mov     r12, r0
-       mov     r6, #4096
-       mov     r2, #0x80000
        ldr     r3, .jiptr
+       /* get_page */
+       lsr     r2, r0, #12
+       mov     r6, #4096
+       bic     r2, r2, #0xe0000
        sub     r6, r6, #1
+       cmp     r2, #0x1000
        ldr     r7, [r1]
-       eor     r2, r2, r12, lsr #12
-       and     r6, r6, r12, lsr #12
+       biclt   r2, #0x0e00
+       and     r6, r6, r2
        cmp     r2, #2048
        add     r12, r7, #2
        orrcs   r2, r6, #2048
        ldr     r5, [r3, r2, lsl #2]
        lsl     r12, r12, #8
+       add     r6, r1, r12, asr #6
+       mov     r8, #0
        /* jump_in lookup */
-.A1:
+1:
        movs    r4, r5
-       beq     .A3
+       beq     2f
        ldr     r3, [r5]
        ldr     r5, [r4, #12]
        teq     r3, r0
-       bne     .A1
+       bne     1b
        ldr     r3, [r4, #4]
        ldr     r4, [r4, #8]
        tst     r3, r3
-       bne     .A1
-.A2:
-       mov     r5, r1
-       add     r1, r1, r12, asr #6
-       teq     r1, r4
+       bne     1b
+       teq     r4, r6
        moveq   pc, r4 /* Stale i-cache */
+       mov     r8, r4
+       b       1b     /* jump_in may have dupes, continue search */
+2:
+       tst     r8, r8
+       beq     3f     /* r0 not in jump_in */
+
+       mov     r5, r1
+       mov     r1, r6
        bl      add_link
-       sub     r2, r4, r5
+       sub     r2, r8, r5
        and     r1, r7, #0xff000000
        lsl     r2, r2, #6
        sub     r1, r1, #2
        add     r1, r1, r2, lsr #8
        str     r1, [r5]
-       mov     pc, r4
-.A3:
+       mov     pc, r8
+3:
        /* hash_table lookup */
        cmp     r2, #2048
        ldr     r3, .jdptr
@@ -266,14 +281,14 @@ dyna_linker:
        teq     r7, r0
        ldreq   pc, [r6, #12]
        /* jump_dirty lookup */
-.A6:
+6:
        movs    r4, r5
-       beq     .A8
+       beq     8f
        ldr     r3, [r5]
        ldr     r5, [r4, #12]
        teq     r3, r0
-       bne     .A6
-.A7:
+       bne     6b
+7:
        ldr     r1, [r4, #8]
        /* hash_table insert */
        ldr     r2, [r6]
@@ -283,7 +298,18 @@ dyna_linker:
        str     r2, [r6, #8]
        str     r3, [r6, #12]
        mov     pc, r1
-.A8:
+8:
+.endm
+
+       .text
+       .align  2
+       .global dyna_linker
+       .type   dyna_linker, %function
+dyna_linker:
+       /* r0 = virtual target address */
+       /* r1 = instruction to patch */
+       dyna_linker_main
+
        mov     r4, r0
        mov     r5, r1
        bl      new_recompile_block
@@ -328,80 +354,8 @@ exec_pagefault:
 dyna_linker_ds:
        /* r0 = virtual target address */
        /* r1 = instruction to patch */
-       mov     r12, r0
-       mov     r6, #4096
-       mov     r2, #0x80000
-       ldr     r3, .jiptr
-       sub     r6, r6, #1
-       ldr     r7, [r1]
-       eor     r2, r2, r12, lsr #12
-       and     r6, r6, r12, lsr #12
-       cmp     r2, #2048
-       add     r12, r7, #2
-       orrcs   r2, r6, #2048
-       ldr     r5, [r3, r2, lsl #2]
-       lsl     r12, r12, #8
-       /* jump_in lookup */
-.B1:
-       movs    r4, r5
-       beq     .B3
-       ldr     r3, [r5]
-       ldr     r5, [r4, #12]
-       teq     r3, r0
-       bne     .B1
-       ldr     r3, [r4, #4]
-       ldr     r4, [r4, #8]
-       tst     r3, r3
-       bne     .B1
-.B2:
-       mov     r5, r1
-       add     r1, r1, r12, asr #6
-       teq     r1, r4
-       moveq   pc, r4 /* Stale i-cache */
-       bl      add_link
-       sub     r2, r4, r5
-       and     r1, r7, #0xff000000
-       lsl     r2, r2, #6
-       sub     r1, r1, #2
-       add     r1, r1, r2, lsr #8
-       str     r1, [r5]
-       mov     pc, r4
-.B3:
-       /* hash_table lookup */
-       cmp     r2, #2048
-       ldr     r3, .jdptr
-       eor     r4, r0, r0, lsl #16
-       lslcc   r2, r0, #9
-       ldr     r6, .htptr
-       lsr     r4, r4, #12
-       lsrcc   r2, r2, #21
-       bic     r4, r4, #15
-       ldr     r5, [r3, r2, lsl #2]
-       ldr     r7, [r6, r4]!
-       teq     r7, r0
-       ldreq   pc, [r6, #4]
-       ldr     r7, [r6, #8]
-       teq     r7, r0
-       ldreq   pc, [r6, #12]
-       /* jump_dirty lookup */
-.B6:
-       movs    r4, r5
-       beq     .B8
-       ldr     r3, [r5]
-       ldr     r5, [r4, #12]
-       teq     r3, r0
-       bne     .B6
-.B7:
-       ldr     r1, [r4, #8]
-       /* hash_table insert */
-       ldr     r2, [r6]
-       ldr     r3, [r6, #4]
-       str     r0, [r6]
-       str     r1, [r6, #4]
-       str     r2, [r6, #8]
-       str     r3, [r6, #12]
-       mov     pc, r1
-.B8:
+       dyna_linker_main
+
        mov     r4, r0
        bic     r0, r0, #7
        mov     r5, r1
@@ -747,18 +701,119 @@ indirect_jump:
        .size   indirect_jump, .-indirect_jump
        .size   indirect_jump_indexed, .-indirect_jump_indexed
 
+       .align  2
+       .global invalidate_addr_r0
+       .type   invalidate_addr_r0, %function
+invalidate_addr_r0:
+       stmia   fp, {r0, r1, r2, r3, r12, lr}
+       b       invalidate_addr_call
+       .size   invalidate_addr_r0, .-invalidate_addr_r0
+       .align  2
+       .global invalidate_addr_r1
+       .type   invalidate_addr_r1, %function
+invalidate_addr_r1:
+       stmia   fp, {r0, r1, r2, r3, r12, lr}
+       mov     r0, r1
+       b       invalidate_addr_call
+       .size   invalidate_addr_r1, .-invalidate_addr_r1
+       .align  2
+       .global invalidate_addr_r2
+       .type   invalidate_addr_r2, %function
+invalidate_addr_r2:
+       stmia   fp, {r0, r1, r2, r3, r12, lr}
+       mov     r0, r2
+       b       invalidate_addr_call
+       .size   invalidate_addr_r2, .-invalidate_addr_r2
+       .align  2
+       .global invalidate_addr_r3
+       .type   invalidate_addr_r3, %function
+invalidate_addr_r3:
+       stmia   fp, {r0, r1, r2, r3, r12, lr}
+       mov     r0, r3
+       b       invalidate_addr_call
+       .size   invalidate_addr_r3, .-invalidate_addr_r3
+       .align  2
+       .global invalidate_addr_r4
+       .type   invalidate_addr_r4, %function
+invalidate_addr_r4:
+       stmia   fp, {r0, r1, r2, r3, r12, lr}
+       mov     r0, r4
+       b       invalidate_addr_call
+       .size   invalidate_addr_r4, .-invalidate_addr_r4
+       .align  2
+       .global invalidate_addr_r5
+       .type   invalidate_addr_r5, %function
+invalidate_addr_r5:
+       stmia   fp, {r0, r1, r2, r3, r12, lr}
+       mov     r0, r5
+       b       invalidate_addr_call
+       .size   invalidate_addr_r5, .-invalidate_addr_r5
+       .align  2
+       .global invalidate_addr_r6
+       .type   invalidate_addr_r6, %function
+invalidate_addr_r6:
+       stmia   fp, {r0, r1, r2, r3, r12, lr}
+       mov     r0, r6
+       b       invalidate_addr_call
+       .size   invalidate_addr_r6, .-invalidate_addr_r6
+       .align  2
+       .global invalidate_addr_r7
+       .type   invalidate_addr_r7, %function
+invalidate_addr_r7:
+       stmia   fp, {r0, r1, r2, r3, r12, lr}
+       mov     r0, r7
+       b       invalidate_addr_call
+       .size   invalidate_addr_r7, .-invalidate_addr_r7
+       .align  2
+       .global invalidate_addr_r8
+       .type   invalidate_addr_r8, %function
+invalidate_addr_r8:
+       stmia   fp, {r0, r1, r2, r3, r12, lr}
+       mov     r0, r8
+       b       invalidate_addr_call
+       .size   invalidate_addr_r8, .-invalidate_addr_r8
+       .align  2
+       .global invalidate_addr_r9
+       .type   invalidate_addr_r9, %function
+invalidate_addr_r9:
+       stmia   fp, {r0, r1, r2, r3, r12, lr}
+       mov     r0, r9
+       b       invalidate_addr_call
+       .size   invalidate_addr_r9, .-invalidate_addr_r9
+       .align  2
+       .global invalidate_addr_r10
+       .type   invalidate_addr_r10, %function
+invalidate_addr_r10:
+       stmia   fp, {r0, r1, r2, r3, r12, lr}
+       mov     r0, r10
+       b       invalidate_addr_call
+       .size   invalidate_addr_r10, .-invalidate_addr_r10
+       .align  2
+       .global invalidate_addr_r12
+       .type   invalidate_addr_r12, %function
+invalidate_addr_r12:
+       stmia   fp, {r0, r1, r2, r3, r12, lr}
+       mov     r0, r12
+       .size   invalidate_addr_r12, .-invalidate_addr_r12
+       .align  2
+       .global invalidate_addr_call
+       .type   invalidate_addr_call, %function
+invalidate_addr_call:
+       ldr     r12, [fp, #inv_code_start-dynarec_local]
+       ldr     lr, [fp, #inv_code_end-dynarec_local]
+       cmp     r0, r12
+       cmpcs   lr, r0
+       blcc    invalidate_addr
+       ldmia   fp, {r0, r1, r2, r3, r12, pc}
+       .size   invalidate_addr_call, .-invalidate_addr_call
+
        .align  2
        .global new_dyna_start
        .type   new_dyna_start, %function
 new_dyna_start:
        /* ip is stored to conform EABI alignment */
        stmfd   sp!, {r4, r5, r6, r7, r8, r9, sl, fp, ip, lr}
-.if HAVE_ARMV7
-       movw    fp, #:lower16:dynarec_local
-       movt    fp, #:upper16:dynarec_local
-.else
-       ldr     fp, .dlptr
-.endif
+       load_var_adr fp, dynarec_local
        ldr     r0, [fp, #pcaddr-dynarec_local]
        bl      get_addr_ht
        ldr     r1, [fp, #next_interupt-dynarec_local]
@@ -766,8 +821,6 @@ new_dyna_start:
        str     r1, [fp, #last_count-dynarec_local]
        sub     r10, r10, r1
        mov     pc, r0
-.dlptr:
-       .word   dynarec_local
        .size   new_dyna_start, .-new_dyna_start
 
 /* --------------------------------------- */
@@ -785,6 +838,7 @@ new_dyna_start:
 .global        ari_write_ram_mirror8
 .global        ari_write_ram_mirror16
 .global        ari_write_ram_mirror32
+.global        ari_write_ram_mirror_ro32
 .global        ari_read_bios8
 .global        ari_read_bios16
 .global        ari_read_bios32
@@ -864,8 +918,13 @@ ari_write_ram32:
        str\pf  r1, [r0]
        tst     r2, r2
        movne   pc, lr
-       lsr     r0, r0, #12
-       b       invalidate_block
+       ldr     r1, [fp, #inv_code_start-dynarec_local]
+       ldr     r2, [fp, #inv_code_end-dynarec_local]
+       cmp     r0, r1
+       cmpcs   r2, r0
+       movcs   pc, lr
+       nop
+       b       invalidate_addr
 .endm
 
 ari_write_ram_mirror8:
@@ -877,6 +936,14 @@ ari_write_ram_mirror16:
 ari_write_ram_mirror32:
        ari_write_ram_mirror (3<<11), word,
 
+ari_write_ram_mirror_ro32:
+       load_var_adr r0, pcsx_ram_is_ro
+       ldr     r0, [r0]
+       tst     r0, r0
+       movne   pc, lr
+       nop
+       b       ari_write_ram_mirror32
+
 
 .macro ari_read_bios_mirror bic_const op
        ldr     r0, [fp, #address-dynarec_local]