From: kub Date: Sun, 2 Jun 2024 07:48:15 +0000 (+0000) Subject: sh2 drc, fix mul/add saturation X-Git-Tag: v2.00~41 X-Git-Url: https://notaz.gp2x.de/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=38fd3bd8669827e3670be8f788f182945fa6f45c;p=picodrive.git sh2 drc, fix mul/add saturation --- diff --git a/cpu/drc/emit_arm.c b/cpu/drc/emit_arm.c index 454ff87b..7d51c357 100644 --- a/cpu/drc/emit_arm.c +++ b/cpu/drc/emit_arm.c @@ -1333,7 +1333,7 @@ static inline void emith_pool_adjust(int tcache_offs, int move_offs) EMITH_JMP_START(DCOND_LE); \ /* turns = sr.cycles / cycles */ \ emith_asr(t2, sr, 12); \ - emith_move_r_imm(t3, (u32)((1ULL<<32) / (cycles)) + 1); \ + emith_move_r_imm(t3, (u32)((1ULL<<32) / (cycles))); \ emith_mul_u64(t1, t2, t2, t3); /* multiply by 1/x */ \ rcache_free_tmp(t3); \ if (reg >= 0) { \ @@ -1427,16 +1427,16 @@ static inline void emith_pool_adjust(int tcache_offs, int move_offs) emith_sext(mh, mh, 16); \ emith_mula_s64(ml, mh, rn, rm); \ /* overflow if top 17 bits of MACH aren't all 1 or 0 */ \ - /* to check: add MACH[15] to MACH[31:16]. this is 0 if no overflow */ \ - emith_asrf(rn, mh, 16); /* sum = (MACH>>16) + ((MACH>>15)&1) */ \ - emith_adcf_r_imm(rn, 0); /* (MACH>>15) is in carry after shift */ \ - EMITH_SJMP_START(DCOND_EQ); /* sum != 0 -> ov */ \ - emith_move_r_imm_c(DCOND_NE, ml, 0x0000); /* -overflow */ \ - emith_move_r_imm_c(DCOND_NE, mh, 0x8000); \ - EMITH_SJMP_START(DCOND_LE); /* sum > 0 -> +ovl */ \ - emith_sub_r_imm_c(DCOND_GT, ml, 1); /* 0xffffffff */ \ - emith_sub_r_imm_c(DCOND_GT, mh, 1); /* 0x00007fff */ \ - EMITH_SJMP_END(DCOND_LE); \ + /* to check: add MACH >> 31 to MACH >> 15. this is 0 if no overflow */ \ + emith_asr(rn, mh, 15); \ + emith_addf_r_r_r_lsr(rn, rn, mh, 31); \ + EMITH_SJMP_START(DCOND_EQ); /* sum != 0 -> -ovl */ \ + emith_move_r_imm_c(DCOND_NE, ml, 0x00000000); \ + emith_move_r_imm_c(DCOND_NE, mh, 0x00008000); \ + EMITH_SJMP_START(DCOND_MI); /* sum > 0 -> +ovl */ \ + emith_sub_r_imm_c(DCOND_PL, ml, 1); /* 0xffffffff */ \ + emith_sub_r_imm_c(DCOND_PL, mh, 1); /* 0x00007fff */ \ + EMITH_SJMP_END(DCOND_MI); \ EMITH_SJMP_END(DCOND_EQ); \ EMITH_SJMP2_END(DCOND_NE); \ } while (0) @@ -1456,10 +1456,10 @@ static inline void emith_pool_adjust(int tcache_offs, int move_offs) EMITH_SJMP_START(DCOND_EQ); /* sum != 0 -> overflow */ \ /* XXX: LSB signalling only in SH1, or in SH2 too? */ \ emith_move_r_imm_c(DCOND_NE, mh, 0x00000001); /* LSB of MACH */ \ - emith_move_r_imm_c(DCOND_NE, ml, 0x80000000); /* negative ovrfl */ \ - EMITH_SJMP_START(DCOND_LE); /* sum > 0 -> positive ovrfl */ \ - emith_sub_r_imm_c(DCOND_GT, ml, 1); /* 0x7fffffff */ \ - EMITH_SJMP_END(DCOND_LE); \ + emith_move_r_imm_c(DCOND_NE, ml, 0x80000000); /* -ovrfl */ \ + EMITH_SJMP_START(DCOND_MI); /* sum > 0 -> +ovrfl */ \ + emith_sub_r_imm_c(DCOND_PL, ml, 1); /* 0x7fffffff */ \ + EMITH_SJMP_END(DCOND_MI); \ EMITH_SJMP_END(DCOND_EQ); \ EMITH_SJMP2_END(DCOND_NE); \ } while (0) diff --git a/cpu/drc/emit_arm64.c b/cpu/drc/emit_arm64.c index f97583d2..b8ae926a 100644 --- a/cpu/drc/emit_arm64.c +++ b/cpu/drc/emit_arm64.c @@ -1242,7 +1242,7 @@ static void emith_ldst_offs(int sz, int rd, int rn, int o9, int ld, int mode) EMITH_JMP_START(DCOND_LE); \ /* turns = sr.cycles / cycles */ \ emith_asr(t2, sr, 12); \ - emith_move_r_imm(t3, (u32)((1ULL<<32) / (cycles)) + 1); \ + emith_move_r_imm(t3, (u32)((1ULL<<32) / (cycles))); \ emith_mul_u64(t1, t2, t2, t3); /* multiply by 1/x */ \ rcache_free_tmp(t3); \ if (reg >= 0) { \ @@ -1309,13 +1309,13 @@ static void emith_ldst_offs(int sz, int rd, int rn, int o9, int ld, int mode) /* to check: add MACH >> 31 to MACH >> 15. this is 0 if no overflow */ \ emith_asr(rn, mh, 15); \ emith_addf_r_r_r_lsr(rn, rn, mh, 31); \ - EMITH_SJMP_START(DCOND_EQ); /* sum != 0 -> ov */ \ - emith_move_r_imm_c(DCOND_NE, ml, 0x0000); /* -overflow */ \ - emith_move_r_imm_c(DCOND_NE, mh, 0x8000); \ - EMITH_SJMP_START(DCOND_LE); /* sum > 0 -> +ovl */ \ - emith_sub_r_imm_c(DCOND_GT, ml, 1); /* 0xffffffff */ \ - emith_sub_r_imm_c(DCOND_GT, mh, 1); /* 0x00007fff */ \ - EMITH_SJMP_END(DCOND_LE); \ + EMITH_SJMP_START(DCOND_EQ); /* sum != 0 -> -ovl */ \ + emith_move_r_imm_c(DCOND_NE, ml, 0x00000000); \ + emith_move_r_imm_c(DCOND_NE, mh, 0x00008000); \ + EMITH_SJMP_START(DCOND_MI); /* sum > 0 -> +ovl */ \ + emith_sub_r_imm_c(DCOND_PL, ml, 1); /* 0xffffffff */ \ + emith_sub_r_imm_c(DCOND_PL, mh, 1); /* 0x00007fff */ \ + EMITH_SJMP_END(DCOND_MI); \ EMITH_SJMP_END(DCOND_EQ); \ EMITH_SJMP_END(DCOND_EQ); \ } while (0) @@ -1336,10 +1336,10 @@ static void emith_ldst_offs(int sz, int rd, int rn, int o9, int ld, int mode) EMITH_SJMP_START(DCOND_EQ); /* sum != 0 -> overflow */ \ /* XXX: LSB signalling only in SH1, or in SH2 too? */ \ emith_move_r_imm_c(DCOND_NE, mh, 0x00000001); /* LSB of MACH */ \ - emith_move_r_imm_c(DCOND_NE, ml, 0x80000000); /* negative ovrfl */ \ - EMITH_SJMP_START(DCOND_LE); /* sum > 0 -> positive ovrfl */ \ - emith_sub_r_imm_c(DCOND_GT, ml, 1); /* 0x7fffffff */ \ - EMITH_SJMP_END(DCOND_LE); \ + emith_move_r_imm_c(DCOND_NE, ml, 0x80000000); /* -ovrfl */ \ + EMITH_SJMP_START(DCOND_MI); /* sum > 0 -> +ovrfl */ \ + emith_sub_r_imm_c(DCOND_PL, ml, 1); /* 0x7fffffff */ \ + EMITH_SJMP_END(DCOND_MI); \ EMITH_SJMP_END(DCOND_EQ); \ EMITH_SJMP_END(DCOND_EQ); \ } while (0) diff --git a/cpu/drc/emit_mips.c b/cpu/drc/emit_mips.c index d2964dca..d775fdd8 100644 --- a/cpu/drc/emit_mips.c +++ b/cpu/drc/emit_mips.c @@ -1736,7 +1736,7 @@ static NOINLINE void host_instructions_updated(void *base, void *end, int force) EMITH_JMP_START(DCOND_LE); \ /* turns = sr.cycles / cycles */ \ emith_asr(t2, sr, 12); \ - emith_move_r_imm(t3, (u32)((1ULL<<32) / (cycles)) + 1); \ + emith_move_r_imm(t3, (u32)((1ULL<<32) / (cycles))); \ emith_mul_u64(t1, t2, t2, t3); /* multiply by 1/x */ \ rcache_free_tmp(t3); \ if (reg >= 0) { \ @@ -1806,13 +1806,13 @@ static NOINLINE void host_instructions_updated(void *base, void *end, int force) emith_asr(rn, mh, 15); \ emith_add_r_r_r_lsr(rn, rn, mh, 31); /* sum = (MACH>>31)+(MACH>>15) */ \ emith_teq_r_r(rn, Z0); /* (need only N and Z flags) */ \ - EMITH_SJMP_START(DCOND_EQ); /* sum != 0 -> ov */ \ - emith_move_r_imm_c(DCOND_NE, ml, 0x0000); /* -overflow */ \ - emith_move_r_imm_c(DCOND_NE, mh, 0x8000); \ - EMITH_SJMP_START(DCOND_PL); /* sum > 0 -> +ovl */ \ - emith_sub_r_imm_c(DCOND_MI, ml, 1); /* 0xffffffff */ \ - emith_sub_r_imm_c(DCOND_MI, mh, 1); /* 0x00007fff */ \ - EMITH_SJMP_END(DCOND_PL); \ + EMITH_SJMP_START(DCOND_EQ); /* sum != 0 -> -ovl */ \ + emith_move_r_imm_c(DCOND_NE, ml, 0x00000000); \ + emith_move_r_imm_c(DCOND_NE, mh, 0x00008000); \ + EMITH_SJMP_START(DCOND_MI); /* sum > 0 -> +ovl */ \ + emith_sub_r_imm_c(DCOND_PL, ml, 1); /* 0xffffffff */ \ + emith_sub_r_imm_c(DCOND_PL, mh, 1); /* 0x00007fff */ \ + EMITH_SJMP_END(DCOND_MI); \ EMITH_SJMP_END(DCOND_EQ); \ EMITH_SJMP_END(DCOND_EQ); \ } while (0) @@ -1835,10 +1835,10 @@ static NOINLINE void host_instructions_updated(void *base, void *end, int force) EMITH_SJMP_START(DCOND_EQ); /* sum != 0 -> overflow */ \ /* XXX: LSB signalling only in SH1, or in SH2 too? */ \ emith_move_r_imm_c(DCOND_NE, mh, 0x00000001); /* LSB of MACH */ \ - emith_move_r_imm_c(DCOND_NE, ml, 0x80000000); /* negative ovrfl */ \ - EMITH_SJMP_START(DCOND_PL); /* sum > 0 -> positive ovrfl */ \ - emith_sub_r_imm_c(DCOND_MI, ml, 1); /* 0x7fffffff */ \ - EMITH_SJMP_END(DCOND_PL); \ + emith_move_r_imm_c(DCOND_NE, ml, 0x80000000); /* -ovrfl */ \ + EMITH_SJMP_START(DCOND_MI); /* sum < 0 -> +ovrfl */ \ + emith_sub_r_imm_c(DCOND_PL, ml, 1); /* 0x7fffffff */ \ + EMITH_SJMP_END(DCOND_MI); \ EMITH_SJMP_END(DCOND_EQ); \ EMITH_SJMP_END(DCOND_EQ); \ } while (0) diff --git a/cpu/drc/emit_ppc.c b/cpu/drc/emit_ppc.c index d856f100..f8e05b14 100644 --- a/cpu/drc/emit_ppc.c +++ b/cpu/drc/emit_ppc.c @@ -1670,7 +1670,7 @@ static NOINLINE void host_instructions_updated(void *base, void *end, int force) EMITH_JMP_START(DCOND_LE); \ /* turns = sr.cycles / cycles */ \ emith_asr(t2, sr, 12); \ - emith_move_r_imm(t3, (u32)((1ULL<<32) / (cycles)) + 1); \ + emith_move_r_imm(t3, (u32)((1ULL<<32) / (cycles))); \ emith_mul_u64(t1, t2, t2, t3); /* multiply by 1/x */ \ rcache_free_tmp(t3); \ if (reg >= 0) { \ @@ -1742,13 +1742,13 @@ static NOINLINE void host_instructions_updated(void *base, void *end, int force) emith_asr(rn, mh, 15); \ emith_add_r_r_r_lsr(rn, rn, mh, 31); /* sum = (MACH>>31)+(MACH>>15) */ \ emith_tst_r_r(rn, rn); /* (need only N and Z flags) */ \ - EMITH_SJMP_START(DCOND_EQ); /* sum != 0 -> ov */ \ - emith_move_r_imm_c(DCOND_NE, ml, 0x0000); /* -overflow */ \ - emith_move_r_imm_c(DCOND_NE, mh, 0x8000); \ - EMITH_SJMP_START(DCOND_PL); /* sum > 0 -> +ovl */ \ - emith_sub_r_imm_c(DCOND_MI, ml, 1); /* 0xffffffff */ \ - emith_sub_r_imm_c(DCOND_MI, mh, 1); /* 0x00007fff */ \ - EMITH_SJMP_END(DCOND_PL); \ + EMITH_SJMP_START(DCOND_EQ); /* sum != 0 -> -ovl */ \ + emith_move_r_imm_c(DCOND_NE, ml, 0x00000000); \ + emith_move_r_imm_c(DCOND_NE, mh, 0x00008000); \ + EMITH_SJMP_START(DCOND_MI); /* sum > 0 -> +ovl */ \ + emith_sub_r_imm_c(DCOND_PL, ml, 1); /* 0xffffffff */ \ + emith_sub_r_imm_c(DCOND_PL, mh, 1); /* 0x00007fff */ \ + EMITH_SJMP_END(DCOND_MI); \ EMITH_SJMP_END(DCOND_EQ); \ EMITH_SJMP_END(DCOND_EQ); \ } while (0) @@ -1771,10 +1771,10 @@ static NOINLINE void host_instructions_updated(void *base, void *end, int force) EMITH_SJMP_START(DCOND_EQ); /* sum != 0 -> overflow */ \ /* XXX: LSB signalling only in SH1, or in SH2 too? */ \ emith_move_r_imm_c(DCOND_NE, mh, 0x00000001); /* LSB of MACH */ \ - emith_move_r_imm_c(DCOND_NE, ml, 0x80000000); /* negative ovrfl */ \ - EMITH_SJMP_START(DCOND_PL); /* sum > 0 -> positive ovrfl */ \ - emith_sub_r_imm_c(DCOND_MI, ml, 1); /* 0x7fffffff */ \ - EMITH_SJMP_END(DCOND_PL); \ + emith_move_r_imm_c(DCOND_NE, ml, 0x80000000); /* -ovrfl */ \ + EMITH_SJMP_START(DCOND_MI); /* sum > 0 -> +ovrfl */ \ + emith_sub_r_imm_c(DCOND_PL, ml, 1); /* 0x7fffffff */ \ + EMITH_SJMP_END(DCOND_MI); \ EMITH_SJMP_END(DCOND_EQ); \ EMITH_SJMP_END(DCOND_EQ); \ } while (0) diff --git a/cpu/drc/emit_riscv.c b/cpu/drc/emit_riscv.c index 832575b2..d4fd65b8 100644 --- a/cpu/drc/emit_riscv.c +++ b/cpu/drc/emit_riscv.c @@ -1494,7 +1494,7 @@ static int emith_cond_check(int cond, int *r, int *s) EMITH_JMP_START(DCOND_LE); \ /* turns = sr.cycles / cycles */ \ emith_asr(t2, sr, 12); \ - emith_move_r_imm(t3, (u32)((1ULL<<32) / (cycles)) + 1); \ + emith_move_r_imm(t3, (u32)((1ULL<<32) / (cycles))); \ emith_mul_u64(t1, t2, t2, t3); /* multiply by 1/x */ \ rcache_free_tmp(t3); \ if (reg >= 0) { \ @@ -1564,13 +1564,13 @@ static int emith_cond_check(int cond, int *r, int *s) emith_asr(rn, mh, 15); \ emith_add_r_r_r_lsr(rn, rn, mh, 31); /* sum = (MACH>>31)+(MACH>>15) */ \ emith_teq_r_r(rn, Z0); /* (need only N and Z flags) */ \ - EMITH_SJMP_START(DCOND_EQ); /* sum != 0 -> ov */ \ - emith_move_r_imm_c(DCOND_NE, ml, 0x0000); /* -overflow */ \ - emith_move_r_imm_c(DCOND_NE, mh, 0x8000); \ - EMITH_SJMP_START(DCOND_PL); /* sum > 0 -> +ovl */ \ - emith_sub_r_imm_c(DCOND_MI, ml, 1); /* 0xffffffff */ \ - emith_sub_r_imm_c(DCOND_MI, mh, 1); /* 0x00007fff */ \ - EMITH_SJMP_END(DCOND_PL); \ + EMITH_SJMP_START(DCOND_EQ); /* sum != 0 -> -ovl */ \ + emith_move_r_imm_c(DCOND_NE, ml, 0x00000000); \ + emith_move_r_imm_c(DCOND_NE, mh, 0x00008000); \ + EMITH_SJMP_START(DCOND_MI); /* sum > 0 -> +ovl */ \ + emith_sub_r_imm_c(DCOND_PL, ml, 1); /* 0xffffffff */ \ + emith_sub_r_imm_c(DCOND_PL, mh, 1); /* 0x00007fff */ \ + EMITH_SJMP_END(DCOND_MI); \ EMITH_SJMP_END(DCOND_EQ); \ EMITH_SJMP_END(DCOND_EQ); \ } while (0) @@ -1593,10 +1593,10 @@ static int emith_cond_check(int cond, int *r, int *s) EMITH_SJMP_START(DCOND_EQ); /* sum != 0 -> overflow */ \ /* XXX: LSB signalling only in SH1, or in SH2 too? */ \ emith_move_r_imm_c(DCOND_NE, mh, 0x00000001); /* LSB of MACH */ \ - emith_move_r_imm_c(DCOND_NE, ml, 0x80000000); /* negative ovrfl */ \ - EMITH_SJMP_START(DCOND_PL); /* sum > 0 -> positive ovrfl */ \ - emith_sub_r_imm_c(DCOND_MI, ml, 1); /* 0x7fffffff */ \ - EMITH_SJMP_END(DCOND_PL); \ + emith_move_r_imm_c(DCOND_NE, ml, 0x80000000); /* -ovrfl */ \ + EMITH_SJMP_START(DCOND_MI); /* sum > 0 -> +ovrfl */ \ + emith_sub_r_imm_c(DCOND_PL, ml, 1); /* 0x7fffffff */ \ + EMITH_SJMP_END(DCOND_MI); \ EMITH_SJMP_END(DCOND_EQ); \ EMITH_SJMP_END(DCOND_EQ); \ } while (0) diff --git a/cpu/drc/emit_x86.c b/cpu/drc/emit_x86.c index 6a815e85..53d52385 100644 --- a/cpu/drc/emit_x86.c +++ b/cpu/drc/emit_x86.c @@ -1273,7 +1273,7 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI, // x86-64,i386 common EMITH_JMP_START(DCOND_LE); \ /* turns = sr.cycles / cycles */ \ emith_asr(t2, sr, 12); \ - emith_move_r_imm(t3, (u32)((1ULL<<32) / (cycles)) + 1); \ + emith_move_r_imm(t3, (u32)((1ULL<<32) / (cycles))); \ emith_mul_u64(t1, t2, t2, t3); /* multiply by 1/x */ \ rcache_free_tmp(t3); \ if (reg >= 0) { \ @@ -1363,16 +1363,16 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI, // x86-64,i386 common emith_tst_r_imm(sr, S); \ EMITH_SJMP_START(DCOND_EQ); \ /* overflow if top 17 bits of MACH aren't all 1 or 0 */ \ - /* to check: add MACH[15] to MACH[31:16]. this is 0 if no overflow */ \ - emith_asrf(rn, mh, 16); /* sum = (MACH>>16) + ((MACH>>15)&1) */ \ - emith_adcf_r_imm(rn, 0); /* (MACH>>15) is in carry after shift */ \ - EMITH_SJMP_START(DCOND_EQ); /* sum != 0 -> ov */ \ - emith_move_r_imm_c(DCOND_NE, ml, 0x0000); /* -overflow */ \ - emith_move_r_imm_c(DCOND_NE, mh, 0x8000); \ - EMITH_SJMP_START(DCOND_LE); /* sum > 0 -> +ovl */ \ - emith_sub_r_imm_c(DCOND_GT, ml, 1); /* 0xffffffff */ \ - emith_sub_r_imm_c(DCOND_GT, mh, 1); /* 0x00007fff */ \ - EMITH_SJMP_END(DCOND_LE); \ + /* to check: add MACH >> 31 to MACH >> 15. this is 0 if no overflow */ \ + emith_asr(rn, mh, 15); \ + emith_addf_r_r_r_lsr(rn, rn, mh, 31); \ + EMITH_SJMP_START(DCOND_EQ); /* sum != 0 -> -ovl */ \ + emith_move_r_imm_c(DCOND_NE, ml, 0x00000000); \ + emith_move_r_imm_c(DCOND_NE, mh, 0x00008000); \ + EMITH_SJMP_START(DCOND_MI); /* sum < 0 -> -ovl */ \ + emith_sub_r_imm_c(DCOND_PL, ml, 1); /* 0xffffffff */ \ + emith_sub_r_imm_c(DCOND_PL, mh, 1); /* 0x00007fff */ \ + EMITH_SJMP_END(DCOND_MI); \ EMITH_SJMP_END(DCOND_EQ); \ EMITH_SJMP_END(DCOND_EQ); \ } while (0) @@ -1394,10 +1394,10 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI, // x86-64,i386 common EMITH_SJMP_START(DCOND_EQ); /* sum != 0 -> overflow */ \ /* XXX: LSB signalling only in SH1, or in SH2 too? */ \ emith_move_r_imm_c(DCOND_NE, mh, 0x00000001); /* LSB of MACH */ \ - emith_move_r_imm_c(DCOND_NE, ml, 0x80000000); /* negative ovrfl */ \ - EMITH_SJMP_START(DCOND_LE); /* sum > 0 -> positive ovrfl */ \ - emith_sub_r_imm_c(DCOND_GT, ml, 1); /* 0x7fffffff */ \ - EMITH_SJMP_END(DCOND_LE); \ + emith_move_r_imm_c(DCOND_NE, ml, 0x80000000); /* -overflow */ \ + EMITH_SJMP_START(DCOND_MI); /* sum > 0 -> +overflow */ \ + emith_sub_r_imm_c(DCOND_PL, ml, 1); /* 0x7fffffff */ \ + EMITH_SJMP_END(DCOND_MI); \ EMITH_SJMP_END(DCOND_EQ); \ EMITH_SJMP_END(DCOND_EQ); \ } while (0)