X-Git-Url: https://notaz.gp2x.de/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=Pico%2Fcarthw%2Fsvp%2Fstub_arm.S;h=debcc0e8c9d2c68f2d2c1d3e0ea0265f1b689763;hb=e5fa9817777032758511868c8aaa9ff780786c3f;hp=246b4b4acb907f8cb90de3bb9018b676fe0b3229;hpb=f5d1115ffc2e9e39e7431381e6a3f367b1252a32;p=picodrive.git diff --git a/Pico/carthw/svp/stub_arm.S b/Pico/carthw/svp/stub_arm.S index 246b4b4..debcc0e 100644 --- a/Pico/carthw/svp/stub_arm.S +++ b/Pico/carthw/svp/stub_arm.S @@ -1,10 +1,17 @@ @ vim:filetype=armasm +@ Compiler helper functions and some SVP HLE code + +@ (c) Copyright 2008, Grazvydas "notaz" Ignotas +@ Free for non-commercial use. + .if 0 #include "compiler.h" .endif .global tcache +.global ssp_block_table +.global ssp_block_table_iram .global flush_inval_caches .global ssp_drc_entry @@ -13,13 +20,26 @@ .global ssp_drc_end .global ssp_hle_800 .global ssp_hle_902 - -@ translation cache buffer +.global ssp_hle_07_030 +.global ssp_hle_07_036 +.global ssp_hle_07_6d6 +.global ssp_hle_11_12c +.global ssp_hle_11_384 +.global ssp_hle_11_38a + +@ translation cache buffer + pointer table .text .align 12 @ 4096 -.size tcache, TCACHE_SIZE +.size tcache, SSP_TCACHE_SIZE +.size ssp_block_table, SSP_BLOCKTAB_SIZE +.size ssp_block_table_iram, SSP_BLOCKTAB_IRAM_SIZE tcache: - .space TCACHE_SIZE + .space SSP_TCACHE_SIZE +ssp_block_table: + .space SSP_BLOCKTAB_SIZE +ssp_block_table_iram: + .space SSP_BLOCKTAB_IRAM_SIZE + .space SSP_BLOCKTAB_ALIGN_SIZE .text @@ -56,6 +76,7 @@ flush_inval_caches: #define SSP_OFFS_PM_WRITE 0x46c // pmac_write[] #define SSP_OFFS_EMUSTAT 0x484 // emu_status #define SSP_OFFS_IRAM_ROM 0x48c // ptr_iram_rom +#define SSP_OFFS_DRAM 0x490 // ptr_dram #define SSP_OFFS_IRAM_DIRTY 0x494 #define SSP_OFFS_IRAM_CTX 0x498 // iram_context #define SSP_OFFS_BLTAB 0x49c // block_table @@ -252,6 +273,20 @@ ssp_hle_800: b ssp_drc_next +.macro hle_flushflags + bic r6, r6, #0xf + mrs r1, cpsr + orr r6, r6, r1, lsr #28 +.endm + +.macro hle_popstack + sub r6, r6, #0x20000000 + add r1, r7, #0x400 + add r1, r1, #0x048 @ stack + add r1, r1, r6, lsr #28 + ldrh r0, [r1] +.endm + ssp_hle_902: cmp r11, #0 ble ssp_drc_end @@ -302,12 +337,311 @@ ssp_hle_902_loop: str r0, [r7, #(SSP_OFFS_GR+SSP_PMC*4)] str r0, [r7, #(SSP_OFFS_PM_WRITE+4*4)] - sub r6, r6, #0x20000000 - add r1, r7, #0x400 - add r1, r1, #0x048 @ stack - add r1, r1, r6, lsr #28 - ldrh r0, [r1] + hle_popstack subs r11,r11,#16 @ timeslice is likely to end ble ssp_drc_end b ssp_drc_next + +@ this one is car rendering related +.macro hle_11_12c_mla offs_in + ldrsh r5, [r7, #(\offs_in+0)] + ldrsh r0, [r7, #(\offs_in+2)] + ldrsh r1, [r7, #(\offs_in+4)] + mul r5, r2, r5 + ldrsh r12,[r7, #(\offs_in+6)] + mla r5, r3, r0, r5 + mla r5, r4, r1, r5 + add r5, r5, r12,lsl #11 + + movs r5, r5, lsr #13 + add r1, r7, r8, lsr #23 + strh r5, [r1] + add r8, r8, #(1<<24) +.endm + +ssp_hle_11_12c: + cmp r11, #0 + ble ssp_drc_end + + mov r0, #0 + bl ssp_pm_read + mov r4, r0 + + mov r0, #0 + bl ssp_pm_read + mov r5, r0 + + mov r0, #0 + bl ssp_pm_read + + mov r2, r4, lsl #16 + mov r2, r2, asr #15 @ (r7|00) << 1 + mov r3, r5, lsl #16 + mov r3, r3, asr #15 @ (r7|01) << 1 + mov r4, r0, lsl #16 + mov r4, r4, asr #15 @ (r7|10) << 1 + + bic r8, r8, #0xff + mov r8, r8, ror #16 + + hle_11_12c_mla 0x20 + hle_11_12c_mla 0x28 + hle_11_12c_mla 0x30 + + mov r8, r8, ror #16 + orr r8, r8, #0x1c +@ hle_flushflags + hle_popstack + sub r11,r11,#33 + b ssp_drc_next + + +ssp_hle_11_384: + mov r3, #2 + b ssp_hle_11_38x + +ssp_hle_11_38a: + mov r3, #3 @ r5 + +ssp_hle_11_38x: + cmp r11, #0 + ble ssp_drc_end + + mov r2, #0 @ EFh, EEh + mov r1, #1 @ r4 + add r0, r7, #0x1c0 @ r0 (based) + +ssp_hle_11_38x_loop: + ldrh r5, [r0], #2 + ldr r12,[r7, #0x224] + mov r5, r5, lsl #16 + eor r5, r5, r5, asr #31 + add r5, r5, r5, lsr #31 @ abs(r5) + cmp r5, r12,lsl #16 + orrpl r2, r2, r1,lsl #16 @ EFh |= r4 + + ldrh r5, [r0, #2]! + ldr r12,[r7, #0x220] + cmp r5, r12,lsr #16 + orrpl r2, r2, r1,lsl #16 @ EFh |= r4 + + ldr r12,[r7, #0x1e8] + add r0, r0, #2 + mov r12,r12,lsl #16 + cmp r5, r12,lsr #16 + orrmi r2, r2, r1 + + mov r1, r1, lsl #1 + subs r3, r3, #1 + bpl ssp_hle_11_38x_loop + + str r2, [r7, #0x1dc] + sub r0, r0, r7 + bic r8, r8, #0xff + orr r8, r8, r0, lsr #1 + bic r9, r9, #0xff + orr r9, r9, r1 + +@ hle_flushflags + hle_popstack + sub r11,r11,#(9+30*4) + b ssp_drc_next + + +ssp_hle_07_6d6: + cmp r11, #0 + ble ssp_drc_end + + ldr r1, [r7, #0x20c] + and r0, r8, #0xff @ assuming alignment + add r0, r7, r0, lsl #1 + mov r2, r1, lsr #16 + mov r1, r1, lsl #16 @ 106h << 16 + mov r2, r2, lsl #16 @ 107h << 16 + +ssp_hle_07_6d6_loop: + ldr r5, [r0], #4 + tst r5, r5 + bmi ssp_hle_07_6d6_end + mov r5, r5, lsl #16 + cmp r5, r1 + movmi r1, r5 + cmp r5, r2 + sub r11,r11,#16 + bmi ssp_hle_07_6d6_loop + mov r2, r5 + b ssp_hle_07_6d6_loop + +ssp_hle_07_6d6_end: + sub r0, r0, r7 + mov r0, r0, lsr #1 + bic r8, r8, #0xff + orr r8, r8, r0 + orr r1, r2, r1, lsr #16 + str r1, [r7, #0x20c] + hle_popstack + sub r11,r11,#6 + b ssp_drc_next + + +ssp_hle_07_030: + ldrh r0, [r7] + mov r0, r0, lsl #4 + orr r0, r0, r0, lsr #16 + strh r0, [r7] + sub r11,r11,#3 + +ssp_hle_07_036: + ldr r1, [r7, #0x1e0] @ F1h F0h + rsb r5, r1, r1, lsr #16 + mov r5, r5, lsl #16 @ AL not needed + cmp r5, #(4<<16) + sub r11,r11,#5 + bmi hle_07_036_ending2 + ldr r1, [r7, #0x1dc] @ EEh + cmp r5, r1, lsl #16 + sub r11,r11,#5 + bpl hle_07_036_ret + + mov r0, r5, lsr #16 + add r1, r7, #0x100 + strh r0, [r1, #0xea] @ F5h + ldr r0, [r7, #0x1e0] @ F0h + and r0, r0, #3 + strh r0, [r1, #0xf0] @ F8h + add r2, r0, #0xc0 @ r2 + add r2, r7, r2, lsl #1 + ldrh r2, [r2] + ldr r0, [r7] + mov r1, #4 + and r0, r0, r2 + bl ssp_pm_write + @ will handle PMC later + ldr r0, [r7, #0x1e8] @ F5h << 16 + ldr r1, [r7, #0x1f0] @ F8h + ldr r2, [r7, #0x1d4] @ EAh + sub r0, r0, #(3<<16) + add r0, r0, r1, lsl #16 + sub r0, r2, r0, asr #18 + and r0, r0, #0x7f + rsbs r0, r0, #0x78 @ length + ble hle_07_036_ending1 + + sub r11,r11,r0 + + @ copy part + ldr r1, [r7, #(SSP_OFFS_GR+SSP_PMC*4)] + ldr r2, [r7, #SSP_OFFS_DRAM] + mov r1, r1, lsl #16 + add r1, r2, r1, lsr #15 @ addr (based) + ldrh r2, [r7, #0] @ pattern + ldrh r3, [r7, #6] @ mode + + mov r12, #0x4000 + orr r12,r12,#0x0018 + subs r12,r3, r12 + subnes r12,r12,#0x0400 + blne tr_unhandled + + orr r2, r2, r2, lsl #16 + tst r3, #0x400 + bne hle_07_036_ovrwr + +hle_07_036_no_ovrwr: + tst r1, #2 + strneh r2, [r1], #0x3e @ align + subne r0, r0, #1 + subs r0, r0, #4 + blt hle_07_036_l2 + +hle_07_036_l1: + subs r0, r0, #4 + str r2, [r1], #0x40 + str r2, [r1], #0x40 + bge hle_07_036_l1 + +hle_07_036_l2: + tst r0, #2 + strne r2, [r1], #0x40 + tst r0, #1 + strneh r2, [r1], #2 + b hle_07_036_end_copy + +hle_07_036_ovrwr: + tst r2, #0x000f + orreq r12,r12,#0x000f + tst r2, #0x00f0 + orreq r12,r12,#0x00f0 + tst r2, #0x0f00 + orreq r12,r12,#0x0f00 + tst r2, #0xf000 + orreq r12,r12,#0xf000 + orrs r12,r12,r12,lsl #16 + beq hle_07_036_no_ovrwr + + tst r1, #2 + beq hle_07_036_ol0 + ldrh r3, [r1] + and r3, r3, r12 + orr r3, r3, r2 + strh r3, [r1], #0x3e @ align + sub r0, r0, #1 + +hle_07_036_ol0: + subs r0, r0, #2 + blt hle_07_036_ol2 + +hle_07_036_ol1: + subs r0, r0, #2 + ldr r3, [r1] + and r3, r3, r12 + orr r3, r3, r2 + str r3, [r1], #0x40 + bge hle_07_036_ol1 + +hle_07_036_ol2: + tst r0, #1 + ldrneh r3, [r1] + andne r3, r3, r12 + orrne r3, r3, r2 + strneh r3, [r1], #2 + +hle_07_036_end_copy: + ldr r2, [r7, #SSP_OFFS_DRAM] + add r3, r7, #0x400 + sub r0, r1, r2 @ new addr + mov r0, r0, lsr #1 + strh r0, [r3, #(0x6c+4*4)] @ SSP_OFFS_PM_WRITE+4*4 (low) + +hle_07_036_ending1: + ldr r0, [r7, #0x1e0] @ F1h << 16 + add r0, r0, #(1<<16) + and r0, r0, #(3<<16) + add r0, r0, #(0xc4<<16) + bic r8, r8, #0xff0000 + orr r8, r8, r0 @ r2 + add r0, r7, r0, lsr #15 + ldrh r0, [r0] + ldr r2, [r7] + and r0, r0, r2 + movs r5, r0, lsl #16 + + ldr r1, [r7, #4] @ new mode + add r2, r7, #0x400 + strh r1, [r2, #(0x6c+4*4+2)] @ SSP_OFFS_PM_WRITE+4*4 (high) + mov r1, #4 + bl ssp_pm_write + sub r11,r11,#35 + +hle_07_036_ret: + hle_popstack + b ssp_drc_next + +hle_07_036_ending2: + sub r11,r11,#3 + movs r5, r5, lsl #1 + bmi hle_07_036_ret + mov r0, #0x87 + b ssp_drc_next @ let the dispatcher finish this +