From 66dda842eae01f47f5389b931ec9567fb0bbb6a1 Mon Sep 17 00:00:00 2001 From: notaz Date: Tue, 8 Aug 2017 01:45:57 +0300 Subject: [PATCH] eliminate text relocations They are forbidden on newer versions of Android and iOS. A bit more instructions, but only for init stuff. No performance difference measured (basic tests only). --- Cyclone.h | 5 +- Main.cpp | 160 ++++++++++++++++++++++++++++++--------------------- tools/idle.s | 55 ++++++++++++------ 3 files changed, 137 insertions(+), 83 deletions(-) diff --git a/Cyclone.h b/Cyclone.h index 1933a46..fdcff59 100644 --- a/Cyclone.h +++ b/Cyclone.h @@ -48,7 +48,10 @@ struct Cyclone int (*IrqCallback)(int int_level); // [r7,#0x8c] optional irq callback function, see config.h void (*ResetCallback)(void); // [r7,#0x90] if enabled in config.h, calls this whenever RESET opcode is encountered. int (*UnrecognizedCallback)(void); // [r7,#0x94] if enabled in config.h, calls this whenever unrecognized opcode is encountered. - unsigned int internal[6]; // [r7,#0x98] reserved for internal use, do not change. + void *internal_CycloneEnd; // [r7,#0x98] internal, do not modify + int internal_s_cycles; // [r7,#0x9c] internal, do not modify + void *internal_s_CycloneEnd; // [r7,#0xa0] internal, do not modify + unsigned int internal[3]; // [r7,#0xa4] reserved for internal use, do not change. }; // Initialize. Used only if Cyclone was compiled with compressed jumptable, see config.h diff --git a/Main.cpp b/Main.cpp index b6f49c3..400d3a6 100644 --- a/Main.cpp +++ b/Main.cpp @@ -111,50 +111,29 @@ void ltorg() } #if (CYCLONE_FOR_GENESIS == 2) -// r12=ptr to tas in table, trashes r0,r1 -static void ChangeTAS(int norm) +static const char *tas_ops[] = { + "Op4ad0", "Op4ad8", "Op4adf", + "Op4ae0", "Op4ae7", "Op4ae8", + "Op4af0", "Op4af8", "Op4af9", +}; + +// get handler address in r0, OT (offset table) in r2 +static void ChangeTASGet(unsigned int i) { - ot(" ldr r0,=Op4ad0%s\n",norm?"_":""); - ot(" mov r1,#8\n"); - ot("setrtas_loop%i0%s ;@ 4ad0-4ad7\n",norm,ms?"":":"); - ot(" subs r1,r1,#1\n"); - ot(" str r0,[r12],#4\n"); - ot(" bne setrtas_loop%i0\n",norm); - ot(" ldr r0,=Op4ad8%s\n",norm?"_":""); - ot(" mov r1,#7\n"); - ot("setrtas_loop%i1%s ;@ 4ad8-4ade\n",norm,ms?"":":"); - ot(" subs r1,r1,#1\n"); - ot(" str r0,[r12],#4\n"); - ot(" bne setrtas_loop%i1\n",norm); - ot(" ldr r0,=Op4adf%s\n",norm?"_":""); - ot(" str r0,[r12],#4\n"); - ot(" ldr r0,=Op4ae0%s\n",norm?"_":""); - ot(" mov r1,#7\n"); - ot("setrtas_loop%i2%s ;@ 4ae0-4ae6\n",norm,ms?"":":"); - ot(" subs r1,r1,#1\n"); - ot(" str r0,[r12],#4\n"); - ot(" bne setrtas_loop%i2\n",norm); - ot(" ldr r0,=Op4ae7%s\n",norm?"_":""); - ot(" str r0,[r12],#4\n"); - ot(" ldr r0,=Op4ae8%s\n",norm?"_":""); - ot(" mov r1,#8\n"); - ot("setrtas_loop%i3%s ;@ 4ae8-4aef\n",norm,ms?"":":"); - ot(" subs r1,r1,#1\n"); - ot(" str r0,[r12],#4\n"); - ot(" bne setrtas_loop%i3\n",norm); - ot(" ldr r0,=Op4af0%s\n",norm?"_":""); - ot(" mov r1,#8\n"); - ot("setrtas_loop%i4%s ;@ 4af0-4af7\n",norm,ms?"":":"); - ot(" subs r1,r1,#1\n"); - ot(" str r0,[r12],#4\n"); - ot(" bne setrtas_loop%i4\n",norm); - ot(" ldr r0,=Op4af8%s\n",norm?"_":""); - ot(" str r0,[r12],#4\n"); - ot(" ldr r0,=Op4af9%s\n",norm?"_":""); - ot(" str r0,[r12],#4\n"); + if (i >= sizeof(tas_ops) / sizeof(tas_ops[0])) + abort(); + ot(" ldr r0,[r2,#%d*4] ;@ %s\n",i,tas_ops[i]); + ot(" add r0,r0,r2\n"); } #endif +static void LoadCycloneJumpTab(int reg, int tmp) +{ + ot(" adr r%d,CycloneOT_JT\n", tmp); + ot(" ldr r%d,[r%d] ;@ CycloneJumpTab-CycloneOT_JT\n", reg, tmp); + ot(" add r%d,r%d,r%d ;@ =CycloneJumpTab\n", reg, reg, tmp); +} + #if EMULATE_ADDRESS_ERRORS_JUMP || EMULATE_ADDRESS_ERRORS_IO static void AddressErrorWrapper(char rw, const char *dataprg, int iw) { @@ -198,14 +177,13 @@ static void PrintFramework() ot(" mov r7,r0 ;@ r7 = Pointer to Cpu Context\n"); ot(" ;@ r0-3 = Temporary registers\n"); ot(" ldrb r10,[r7,#0x46] ;@ r10 = Flags (NZCV)\n"); - ot(" ldr r6,=CycloneJumpTab ;@ r6 = Opcode Jump table\n"); + ot(" ldr r6,[r7,#0x54] ;@ r6 = Opcode Jump table (from reset)\n"); ot(" ldr r5,[r7,#0x5c] ;@ r5 = Cycles\n"); ot(" ldr r4,[r7,#0x40] ;@ r4 = Current PC + Memory Base\n"); ot(" ;@ r8 = Current Opcode\n"); ot(" ldr r1,[r7,#0x44] ;@ Get SR high T_S__III and irq level\n"); ot(" mov r10,r10,lsl #28;@ r10 = Flags 0xf0000000, cpsr format\n"); ot(" ;@ r11 = Source value / Memory Base\n"); - ot(" str r6,[r7,#0x54] ;@ make a copy to avoid literal pools\n"); ot("\n"); #if (CYCLONE_FOR_GENESIS == 2) || EMULATE_TRACE ot(" mov r2,#0\n"); @@ -262,7 +240,7 @@ static void PrintFramework() ot("CycloneInit%s\n", ms?"":":"); #if COMPRESS_JUMPTABLE ot(";@ decompress jump table\n"); - ot(" ldr r12,=CycloneJumpTab\n"); + LoadCycloneJumpTab(12, 1); ot(" add r0,r12,#0xe000*4 ;@ ctrl code pointer\n"); ot(" ldr r1,[r0,#-4]\n"); ot(" tst r1,r1\n"); @@ -288,7 +266,7 @@ static void PrintFramework() ot(" bgt unc_loop_in\n"); ot(" b unc_loop\n"); ot("unc_finish%s\n", ms?"":":"); - ot(" ldr r12,=CycloneJumpTab\n"); + LoadCycloneJumpTab(12, 1); ot(" ;@ set a-line and f-line handlers\n"); ot(" add r0,r12,#0xa000*4\n"); ot(" ldr r1,[r0,#4] ;@ a-line handler\n"); @@ -308,7 +286,7 @@ static void PrintFramework() ltorg(); #else ot(";@ fix final jumptable entries\n"); - ot(" ldr r12,=CycloneJumpTab\n"); + LoadCycloneJumpTab(12, 0); ot(" add r12,r12,#0x10000*4\n"); ot(" ldr r0,[r12,#-3*4]\n"); ot(" str r0,[r12,#-2*4]\n"); @@ -320,7 +298,9 @@ static void PrintFramework() // -------------- ot("CycloneReset%s\n", ms?"":":"); ot(" stmfd sp!,{r7,lr}\n"); + LoadCycloneJumpTab(12, 1); ot(" mov r7,r0\n"); + ot(" str r12,[r7,#0x54] ;@ save CycloneJumpTab avoid literal pools\n"); ot(" mov r0,#0\n"); ot(" str r0,[r7,#0x58] ;@ state_flags\n"); ot(" str r0,[r7,#0x48] ;@ OSP\n"); @@ -343,6 +323,77 @@ static void PrintFramework() ot(" ldmfd sp!,{r7,pc}\n"); ot("\n"); + // -------------- + ot("CycloneSetRealTAS%s\n", ms?"":":"); +#if (CYCLONE_FOR_GENESIS == 2) + LoadCycloneJumpTab(12, 1); + ot(" tst r0,r0\n"); + ot(" add r12,r12,#0x4a00*4\n"); + ot(" add r12,r12,#0x00d0*4\n"); + ot(" adr r2,CycloneOT_TAS_\n"); + ot(" addeq r2,r2,#%lu*4\n", sizeof(tas_ops) / sizeof(tas_ops[0])); + + ChangeTASGet(0); + ot(" mov r1,#8\n"); + ot("setrtas_loop0%s ;@ 4ad0-4ad7\n",ms?"":":"); + ot(" subs r1,r1,#1\n"); + ot(" str r0,[r12],#4\n"); + ot(" bne setrtas_loop0\n"); + + ChangeTASGet(1); + ot(" mov r1,#7\n"); + ot("setrtas_loop1%s ;@ 4ad8-4ade\n",ms?"":":"); + ot(" subs r1,r1,#1\n"); + ot(" str r0,[r12],#4\n"); + ot(" bne setrtas_loop1\n"); + + ChangeTASGet(2); + ot(" str r0,[r12],#4\n"); + ChangeTASGet(3); + ot(" mov r1,#7\n"); + ot("setrtas_loop2%s ;@ 4ae0-4ae6\n",ms?"":":"); + ot(" subs r1,r1,#1\n"); + ot(" str r0,[r12],#4\n"); + ot(" bne setrtas_loop2\n"); + + ChangeTASGet(4); + ot(" str r0,[r12],#4\n"); + ChangeTASGet(5); + ot(" mov r1,#8\n"); + ot("setrtas_loop3%s ;@ 4ae8-4aef\n",ms?"":":"); + ot(" subs r1,r1,#1\n"); + ot(" str r0,[r12],#4\n"); + ot(" bne setrtas_loop3\n"); + + ChangeTASGet(6); + ot(" mov r1,#8\n"); + ot("setrtas_loop4%s ;@ 4af0-4af7\n",ms?"":":"); + ot(" subs r1,r1,#1\n"); + ot(" str r0,[r12],#4\n"); + ot(" bne setrtas_loop4\n"); + + ChangeTASGet(7); + ot(" str r0,[r12],#4\n"); + ChangeTASGet(8); + ot(" str r0,[r12],#4\n"); +#endif + ot(" bx lr\n"); + ot("\n"); + + // -------------- + // offset table to avoid .text relocations (forbidden by Android and iOS) + ot("CycloneOT_JT%s\n", ms?"":":"); + ot(" %s %s-CycloneOT_JT\n", ms?"dcd":".long", "CycloneJumpTab"); +#if (CYCLONE_FOR_GENESIS == 2) + ot("CycloneOT_TAS_%s\n", ms?"":":"); // working TAS (no MD bug) + for (size_t i = 0; i < sizeof(tas_ops) / sizeof(tas_ops[0]); i++) + ot(" %s %s_-CycloneOT_TAS_\n", ms?"dcd":".long", tas_ops[i]); + ot("CycloneOT_TAS%s\n", ms?"":":"); // broken TAS + for (size_t i = 0; i < sizeof(tas_ops) / sizeof(tas_ops[0]); i++) + ot(" %s %s-CycloneOT_TAS\n", ms?"dcd":".long", tas_ops[i]); + ot("\n"); +#endif + // -------------- // 68k: XNZVC, ARM: NZCV ot("CycloneSetSr%s\n", ms?"":":"); @@ -494,25 +545,6 @@ static void PrintFramework() ot("\n"); ot("\n"); - // -------------- - ot("CycloneSetRealTAS%s\n", ms?"":":"); -#if (CYCLONE_FOR_GENESIS == 2) - ot(" ldr r12,=CycloneJumpTab\n"); - ot(" tst r0,r0\n"); - ot(" add r12,r12,#0x4a00*4\n"); - ot(" add r12,r12,#0x00d0*4\n"); - ot(" beq setrtas_off\n"); - ChangeTAS(1); - ot(" bx lr\n"); - ot("setrtas_off%s\n",ms?"":":"); - ChangeTAS(0); - ot(" bx lr\n"); - ltorg(); -#else - ot(" bx lr\n"); -#endif - ot("\n"); - // -------------- ot(";@ DoInterrupt - r0=IRQ level\n"); ot("CycloneDoInterruptGoBack%s\n", ms?"":":"); diff --git a/tools/idle.s b/tools/idle.s index e55b5f1..f36588f 100644 --- a/tools/idle.s +++ b/tools/idle.s @@ -33,8 +33,11 @@ patch_desc_table: .global CycloneInitIdle CycloneInitIdle: - ldr r3, =CycloneJumpTab - ldr r2, =patch_desc_table + adr r12,offset_table + ldr r3, [r12, #2*4] @ =CycloneJumpTab-ot + ldr r2, [r12, #1*4] @ =patch_desc_table-ot + add r3, r3, r12 + add r2, r2, r12 mov r12,#patch_desc_table_size cii_loop: @@ -51,22 +54,30 @@ cii_loop: subs r12,r12,#1 bgt cii_loop - ldr r0, =have_patches + adr r12,offset_table + ldr r0, [r12, #0*4] @ =have_patches-ot mov r1, #1 - str r1, [r0] + str r1, [r0, r12] bx lr .global CycloneFinishIdle CycloneFinishIdle: - ldr r0, =have_patches - ldr r0, [r0] + adr r12,offset_table + ldr r3, [r12, #0*4] @ =have_patches-ot + ldr r0, [r3, r12]! tst r0, r0 bxeq lr - ldr r3, =CycloneJumpTab - ldr r2, =patch_desc_table + stmfd sp!, {r4,r5} + mov r5, r3 + ldr r2, [r12, #1*4] @ =patch_desc_table-ot + ldr r3, [r12, #2*4] @ =CycloneJumpTab-ot + ldr r4, [r12, #3*4] @ =Op____-ot + add r2, r2, r12 + add r3, r3, r12 + add r4, r4, r12 mov r12,#patch_desc_table_size cfi_loop: @@ -74,17 +85,16 @@ cfi_loop: ldr r1, [r2, #12] @ normal str r1, [r3, r0, lsl #2] ldrh r0, [r2, #2] - ldr r1, =Op____ add r0, r3, r0, lsl #2 - str r1, [r0] - str r1, [r0, #0x800] + str r4, [r0] @ Op____ + str r4, [r0, #0x800] add r2, r2, #16 subs r12,r12,#1 bgt cfi_loop - ldr r0, =have_patches mov r1, #0 - str r1, [r0] + str r1, [r5] @ have_patches + ldmfd sp!, {r4,r5} bx lr @@ -154,14 +164,15 @@ idle_detector_bcc8: ble exit_detector @ remove detector from Cyclone + adr r12,offset_table mov r0, r8, lsr #8 cmp r0, #0x66 - ldrlt r1, =Op6002 - ldreq r1, =Op6602 - ldrgt r1, =Op6702 + ldrlt r1, [r12, #4*4] @ =Op6002-ot + ldreq r1, [r12, #4*5] @ =Op6602-ot + ldrgt r1, [r12, #4*6] @ =Op6702-ot + add r1, r1, r12 - ldr r3, =CycloneJumpTab - str r1, [r3, r8, lsl #2] + str r1, [r6, r8, lsl #2] bx r1 exit_detector: @@ -171,3 +182,11 @@ exit_detector: beq Op6602 b Op6702 +offset_table: + .word have_patches - offset_table + .word patch_desc_table - offset_table + .word CycloneJumpTab - offset_table + .word Op____ - offset_table + .word Op6002 - offset_table + .word Op6602 - offset_table @ 5 + .word Op6702 - offset_table -- 2.39.5