sh2 drc, fix mul/add saturation
authorkub <derkub@gmail.com>
Sun, 2 Jun 2024 07:48:15 +0000 (07:48 +0000)
committerkub <derkub@gmail.com>
Sun, 2 Jun 2024 08:03:09 +0000 (08:03 +0000)
cpu/drc/emit_arm.c
cpu/drc/emit_arm64.c
cpu/drc/emit_mips.c
cpu/drc/emit_ppc.c
cpu/drc/emit_riscv.c
cpu/drc/emit_x86.c

index 454ff87..7d51c35 100644 (file)
@@ -1333,7 +1333,7 @@ static inline void emith_pool_adjust(int tcache_offs, int move_offs)
        EMITH_JMP_START(DCOND_LE);                              \
        /* turns = sr.cycles / cycles */                        \
        emith_asr(t2, sr, 12);                                  \
-       emith_move_r_imm(t3, (u32)((1ULL<<32) / (cycles)) + 1); \
+       emith_move_r_imm(t3, (u32)((1ULL<<32) / (cycles)));     \
        emith_mul_u64(t1, t2, t2, t3); /* multiply by 1/x */    \
        rcache_free_tmp(t3);                                    \
        if (reg >= 0) {                                         \
@@ -1427,16 +1427,16 @@ static inline void emith_pool_adjust(int tcache_offs, int move_offs)
        emith_sext(mh, mh, 16);                   \
        emith_mula_s64(ml, mh, rn, rm);           \
        /* overflow if top 17 bits of MACH aren't all 1 or 0 */ \
-       /* to check: add MACH[15] to MACH[31:16]. this is 0 if no overflow */ \
-       emith_asrf(rn, mh, 16); /* sum = (MACH>>16) + ((MACH>>15)&1) */ \
-       emith_adcf_r_imm(rn, 0); /* (MACH>>15) is in carry after shift */ \
-       EMITH_SJMP_START(DCOND_EQ); /* sum != 0 -> ov */ \
-       emith_move_r_imm_c(DCOND_NE, ml, 0x0000); /* -overflow */ \
-       emith_move_r_imm_c(DCOND_NE, mh, 0x8000); \
-       EMITH_SJMP_START(DCOND_LE); /* sum > 0 -> +ovl */ \
-       emith_sub_r_imm_c(DCOND_GT, ml, 1); /* 0xffffffff */ \
-       emith_sub_r_imm_c(DCOND_GT, mh, 1); /* 0x00007fff */ \
-       EMITH_SJMP_END(DCOND_LE);                 \
+       /* to check: add MACH >> 31 to MACH >> 15. this is 0 if no overflow */ \
+       emith_asr(rn, mh, 15);                    \
+       emith_addf_r_r_r_lsr(rn, rn, mh, 31);     \
+       EMITH_SJMP_START(DCOND_EQ); /* sum != 0 -> -ovl */ \
+       emith_move_r_imm_c(DCOND_NE, ml, 0x00000000); \
+       emith_move_r_imm_c(DCOND_NE, mh, 0x00008000); \
+       EMITH_SJMP_START(DCOND_MI); /* sum > 0 -> +ovl */ \
+       emith_sub_r_imm_c(DCOND_PL, ml, 1); /* 0xffffffff */ \
+       emith_sub_r_imm_c(DCOND_PL, mh, 1); /* 0x00007fff */ \
+       EMITH_SJMP_END(DCOND_MI);                 \
        EMITH_SJMP_END(DCOND_EQ);                 \
        EMITH_SJMP2_END(DCOND_NE);                \
 } while (0)
@@ -1456,10 +1456,10 @@ static inline void emith_pool_adjust(int tcache_offs, int move_offs)
        EMITH_SJMP_START(DCOND_EQ); /* sum != 0 -> overflow */ \
        /* XXX: LSB signalling only in SH1, or in SH2 too? */ \
        emith_move_r_imm_c(DCOND_NE, mh, 0x00000001); /* LSB of MACH */ \
-       emith_move_r_imm_c(DCOND_NE, ml, 0x80000000); /* negative ovrfl */ \
-       EMITH_SJMP_START(DCOND_LE); /* sum > 0 -> positive ovrfl */ \
-       emith_sub_r_imm_c(DCOND_GT, ml, 1); /* 0x7fffffff */ \
-       EMITH_SJMP_END(DCOND_LE);                 \
+       emith_move_r_imm_c(DCOND_NE, ml, 0x80000000); /* -ovrfl */ \
+       EMITH_SJMP_START(DCOND_MI); /* sum > 0 -> +ovrfl */ \
+       emith_sub_r_imm_c(DCOND_PL, ml, 1); /* 0x7fffffff */ \
+       EMITH_SJMP_END(DCOND_MI);                 \
        EMITH_SJMP_END(DCOND_EQ);                 \
        EMITH_SJMP2_END(DCOND_NE);                \
 } while (0)
index f97583d..b8ae926 100644 (file)
@@ -1242,7 +1242,7 @@ static void emith_ldst_offs(int sz, int rd, int rn, int o9, int ld, int mode)
        EMITH_JMP_START(DCOND_LE);                              \
        /* turns = sr.cycles / cycles */                        \
        emith_asr(t2, sr, 12);                                  \
-       emith_move_r_imm(t3, (u32)((1ULL<<32) / (cycles)) + 1); \
+       emith_move_r_imm(t3, (u32)((1ULL<<32) / (cycles)));     \
        emith_mul_u64(t1, t2, t2, t3); /* multiply by 1/x */    \
        rcache_free_tmp(t3);                                    \
        if (reg >= 0) {                                         \
@@ -1309,13 +1309,13 @@ static void emith_ldst_offs(int sz, int rd, int rn, int o9, int ld, int mode)
        /* to check: add MACH >> 31 to MACH >> 15. this is 0 if no overflow */ \
        emith_asr(rn, mh, 15);                    \
        emith_addf_r_r_r_lsr(rn, rn, mh, 31);     \
-       EMITH_SJMP_START(DCOND_EQ); /* sum != 0 -> ov */ \
-       emith_move_r_imm_c(DCOND_NE, ml, 0x0000); /* -overflow */ \
-       emith_move_r_imm_c(DCOND_NE, mh, 0x8000); \
-       EMITH_SJMP_START(DCOND_LE); /* sum > 0 -> +ovl */ \
-       emith_sub_r_imm_c(DCOND_GT, ml, 1); /* 0xffffffff */ \
-       emith_sub_r_imm_c(DCOND_GT, mh, 1); /* 0x00007fff */ \
-       EMITH_SJMP_END(DCOND_LE);                 \
+       EMITH_SJMP_START(DCOND_EQ); /* sum != 0 -> -ovl */ \
+       emith_move_r_imm_c(DCOND_NE, ml, 0x00000000); \
+       emith_move_r_imm_c(DCOND_NE, mh, 0x00008000); \
+       EMITH_SJMP_START(DCOND_MI); /* sum > 0 -> +ovl */ \
+       emith_sub_r_imm_c(DCOND_PL, ml, 1); /* 0xffffffff */ \
+       emith_sub_r_imm_c(DCOND_PL, mh, 1); /* 0x00007fff */ \
+       EMITH_SJMP_END(DCOND_MI);                 \
        EMITH_SJMP_END(DCOND_EQ);                 \
        EMITH_SJMP_END(DCOND_EQ);                 \
 } while (0)
@@ -1336,10 +1336,10 @@ static void emith_ldst_offs(int sz, int rd, int rn, int o9, int ld, int mode)
        EMITH_SJMP_START(DCOND_EQ); /* sum != 0 -> overflow */ \
        /* XXX: LSB signalling only in SH1, or in SH2 too? */ \
        emith_move_r_imm_c(DCOND_NE, mh, 0x00000001); /* LSB of MACH */ \
-       emith_move_r_imm_c(DCOND_NE, ml, 0x80000000); /* negative ovrfl */ \
-       EMITH_SJMP_START(DCOND_LE); /* sum > 0 -> positive ovrfl */ \
-       emith_sub_r_imm_c(DCOND_GT, ml, 1); /* 0x7fffffff */ \
-       EMITH_SJMP_END(DCOND_LE);                 \
+       emith_move_r_imm_c(DCOND_NE, ml, 0x80000000); /* -ovrfl */ \
+       EMITH_SJMP_START(DCOND_MI); /* sum > 0 -> +ovrfl */ \
+       emith_sub_r_imm_c(DCOND_PL, ml, 1); /* 0x7fffffff */ \
+       EMITH_SJMP_END(DCOND_MI);                 \
        EMITH_SJMP_END(DCOND_EQ);                 \
        EMITH_SJMP_END(DCOND_EQ);                 \
 } while (0)
index d2964dc..d775fdd 100644 (file)
@@ -1736,7 +1736,7 @@ static NOINLINE void host_instructions_updated(void *base, void *end, int force)
        EMITH_JMP_START(DCOND_LE);                              \
        /* turns = sr.cycles / cycles */                        \
        emith_asr(t2, sr, 12);                                  \
-       emith_move_r_imm(t3, (u32)((1ULL<<32) / (cycles)) + 1); \
+       emith_move_r_imm(t3, (u32)((1ULL<<32) / (cycles)));     \
        emith_mul_u64(t1, t2, t2, t3); /* multiply by 1/x */    \
        rcache_free_tmp(t3);                                    \
        if (reg >= 0) {                                         \
@@ -1806,13 +1806,13 @@ static NOINLINE void host_instructions_updated(void *base, void *end, int force)
        emith_asr(rn, mh, 15);                    \
        emith_add_r_r_r_lsr(rn, rn, mh, 31); /* sum = (MACH>>31)+(MACH>>15) */ \
        emith_teq_r_r(rn, Z0); /* (need only N and Z flags) */ \
-       EMITH_SJMP_START(DCOND_EQ); /* sum != 0 -> ov */ \
-       emith_move_r_imm_c(DCOND_NE, ml, 0x0000); /* -overflow */ \
-       emith_move_r_imm_c(DCOND_NE, mh, 0x8000); \
-       EMITH_SJMP_START(DCOND_PL); /* sum > 0 -> +ovl */ \
-       emith_sub_r_imm_c(DCOND_MI, ml, 1); /* 0xffffffff */ \
-       emith_sub_r_imm_c(DCOND_MI, mh, 1); /* 0x00007fff */ \
-       EMITH_SJMP_END(DCOND_PL);                 \
+       EMITH_SJMP_START(DCOND_EQ); /* sum != 0 -> -ovl */ \
+       emith_move_r_imm_c(DCOND_NE, ml, 0x00000000); \
+       emith_move_r_imm_c(DCOND_NE, mh, 0x00008000); \
+       EMITH_SJMP_START(DCOND_MI); /* sum > 0 -> +ovl */ \
+       emith_sub_r_imm_c(DCOND_PL, ml, 1); /* 0xffffffff */ \
+       emith_sub_r_imm_c(DCOND_PL, mh, 1); /* 0x00007fff */ \
+       EMITH_SJMP_END(DCOND_MI);                 \
        EMITH_SJMP_END(DCOND_EQ);                 \
        EMITH_SJMP_END(DCOND_EQ);                 \
 } while (0)
@@ -1835,10 +1835,10 @@ static NOINLINE void host_instructions_updated(void *base, void *end, int force)
        EMITH_SJMP_START(DCOND_EQ); /* sum != 0 -> overflow */ \
        /* XXX: LSB signalling only in SH1, or in SH2 too? */ \
        emith_move_r_imm_c(DCOND_NE, mh, 0x00000001); /* LSB of MACH */ \
-       emith_move_r_imm_c(DCOND_NE, ml, 0x80000000); /* negative ovrfl */ \
-       EMITH_SJMP_START(DCOND_PL); /* sum > 0 -> positive ovrfl */ \
-       emith_sub_r_imm_c(DCOND_MI, ml, 1); /* 0x7fffffff */ \
-       EMITH_SJMP_END(DCOND_PL);                 \
+       emith_move_r_imm_c(DCOND_NE, ml, 0x80000000); /* -ovrfl */ \
+       EMITH_SJMP_START(DCOND_MI); /* sum < 0 -> +ovrfl */ \
+       emith_sub_r_imm_c(DCOND_PL, ml, 1); /* 0x7fffffff */ \
+       EMITH_SJMP_END(DCOND_MI);                 \
        EMITH_SJMP_END(DCOND_EQ);                 \
        EMITH_SJMP_END(DCOND_EQ);                 \
 } while (0)
index d856f10..f8e05b1 100644 (file)
@@ -1670,7 +1670,7 @@ static NOINLINE void host_instructions_updated(void *base, void *end, int force)
        EMITH_JMP_START(DCOND_LE);                              \
        /* turns = sr.cycles / cycles */                        \
        emith_asr(t2, sr, 12);                                  \
-       emith_move_r_imm(t3, (u32)((1ULL<<32) / (cycles)) + 1); \
+       emith_move_r_imm(t3, (u32)((1ULL<<32) / (cycles)));     \
        emith_mul_u64(t1, t2, t2, t3); /* multiply by 1/x */    \
        rcache_free_tmp(t3);                                    \
        if (reg >= 0) {                                         \
@@ -1742,13 +1742,13 @@ static NOINLINE void host_instructions_updated(void *base, void *end, int force)
        emith_asr(rn, mh, 15);                    \
        emith_add_r_r_r_lsr(rn, rn, mh, 31); /* sum = (MACH>>31)+(MACH>>15) */ \
        emith_tst_r_r(rn, rn); /* (need only N and Z flags) */ \
-       EMITH_SJMP_START(DCOND_EQ); /* sum != 0 -> ov */ \
-       emith_move_r_imm_c(DCOND_NE, ml, 0x0000); /* -overflow */ \
-       emith_move_r_imm_c(DCOND_NE, mh, 0x8000); \
-       EMITH_SJMP_START(DCOND_PL); /* sum > 0 -> +ovl */ \
-       emith_sub_r_imm_c(DCOND_MI, ml, 1); /* 0xffffffff */ \
-       emith_sub_r_imm_c(DCOND_MI, mh, 1); /* 0x00007fff */ \
-       EMITH_SJMP_END(DCOND_PL);                 \
+       EMITH_SJMP_START(DCOND_EQ); /* sum != 0 -> -ovl */ \
+       emith_move_r_imm_c(DCOND_NE, ml, 0x00000000); \
+       emith_move_r_imm_c(DCOND_NE, mh, 0x00008000); \
+       EMITH_SJMP_START(DCOND_MI); /* sum > 0 -> +ovl */ \
+       emith_sub_r_imm_c(DCOND_PL, ml, 1); /* 0xffffffff */ \
+       emith_sub_r_imm_c(DCOND_PL, mh, 1); /* 0x00007fff */ \
+       EMITH_SJMP_END(DCOND_MI);                 \
        EMITH_SJMP_END(DCOND_EQ);                 \
        EMITH_SJMP_END(DCOND_EQ);                 \
 } while (0)
@@ -1771,10 +1771,10 @@ static NOINLINE void host_instructions_updated(void *base, void *end, int force)
        EMITH_SJMP_START(DCOND_EQ); /* sum != 0 -> overflow */ \
        /* XXX: LSB signalling only in SH1, or in SH2 too? */ \
        emith_move_r_imm_c(DCOND_NE, mh, 0x00000001); /* LSB of MACH */ \
-       emith_move_r_imm_c(DCOND_NE, ml, 0x80000000); /* negative ovrfl */ \
-       EMITH_SJMP_START(DCOND_PL); /* sum > 0 -> positive ovrfl */ \
-       emith_sub_r_imm_c(DCOND_MI, ml, 1); /* 0x7fffffff */ \
-       EMITH_SJMP_END(DCOND_PL);                 \
+       emith_move_r_imm_c(DCOND_NE, ml, 0x80000000); /* -ovrfl */ \
+       EMITH_SJMP_START(DCOND_MI); /* sum > 0 -> +ovrfl */ \
+       emith_sub_r_imm_c(DCOND_PL, ml, 1); /* 0x7fffffff */ \
+       EMITH_SJMP_END(DCOND_MI);                 \
        EMITH_SJMP_END(DCOND_EQ);                 \
        EMITH_SJMP_END(DCOND_EQ);                 \
 } while (0)
index 832575b..d4fd65b 100644 (file)
@@ -1494,7 +1494,7 @@ static int emith_cond_check(int cond, int *r, int *s)
        EMITH_JMP_START(DCOND_LE);                              \
        /* turns = sr.cycles / cycles */                        \
        emith_asr(t2, sr, 12);                                  \
-       emith_move_r_imm(t3, (u32)((1ULL<<32) / (cycles)) + 1); \
+       emith_move_r_imm(t3, (u32)((1ULL<<32) / (cycles)));     \
        emith_mul_u64(t1, t2, t2, t3); /* multiply by 1/x */    \
        rcache_free_tmp(t3);                                    \
        if (reg >= 0) {                                         \
@@ -1564,13 +1564,13 @@ static int emith_cond_check(int cond, int *r, int *s)
        emith_asr(rn, mh, 15);                    \
        emith_add_r_r_r_lsr(rn, rn, mh, 31); /* sum = (MACH>>31)+(MACH>>15) */ \
        emith_teq_r_r(rn, Z0); /* (need only N and Z flags) */ \
-       EMITH_SJMP_START(DCOND_EQ); /* sum != 0 -> ov */ \
-       emith_move_r_imm_c(DCOND_NE, ml, 0x0000); /* -overflow */ \
-       emith_move_r_imm_c(DCOND_NE, mh, 0x8000); \
-       EMITH_SJMP_START(DCOND_PL); /* sum > 0 -> +ovl */ \
-       emith_sub_r_imm_c(DCOND_MI, ml, 1); /* 0xffffffff */ \
-       emith_sub_r_imm_c(DCOND_MI, mh, 1); /* 0x00007fff */ \
-       EMITH_SJMP_END(DCOND_PL);                 \
+       EMITH_SJMP_START(DCOND_EQ); /* sum != 0 -> -ovl */ \
+       emith_move_r_imm_c(DCOND_NE, ml, 0x00000000); \
+       emith_move_r_imm_c(DCOND_NE, mh, 0x00008000); \
+       EMITH_SJMP_START(DCOND_MI); /* sum > 0 -> +ovl */ \
+       emith_sub_r_imm_c(DCOND_PL, ml, 1); /* 0xffffffff */ \
+       emith_sub_r_imm_c(DCOND_PL, mh, 1); /* 0x00007fff */ \
+       EMITH_SJMP_END(DCOND_MI);                 \
        EMITH_SJMP_END(DCOND_EQ);                 \
        EMITH_SJMP_END(DCOND_EQ);                 \
 } while (0)
@@ -1593,10 +1593,10 @@ static int emith_cond_check(int cond, int *r, int *s)
        EMITH_SJMP_START(DCOND_EQ); /* sum != 0 -> overflow */ \
        /* XXX: LSB signalling only in SH1, or in SH2 too? */ \
        emith_move_r_imm_c(DCOND_NE, mh, 0x00000001); /* LSB of MACH */ \
-       emith_move_r_imm_c(DCOND_NE, ml, 0x80000000); /* negative ovrfl */ \
-       EMITH_SJMP_START(DCOND_PL); /* sum > 0 -> positive ovrfl */ \
-       emith_sub_r_imm_c(DCOND_MI, ml, 1); /* 0x7fffffff */ \
-       EMITH_SJMP_END(DCOND_PL);                 \
+       emith_move_r_imm_c(DCOND_NE, ml, 0x80000000); /* -ovrfl */ \
+       EMITH_SJMP_START(DCOND_MI); /* sum > 0 -> +ovrfl */ \
+       emith_sub_r_imm_c(DCOND_PL, ml, 1); /* 0x7fffffff */ \
+       EMITH_SJMP_END(DCOND_MI);                 \
        EMITH_SJMP_END(DCOND_EQ);                 \
        EMITH_SJMP_END(DCOND_EQ);                 \
 } while (0)
index 6a815e8..53d5238 100644 (file)
@@ -1273,7 +1273,7 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI,        // x86-64,i386 common
        EMITH_JMP_START(DCOND_LE);                              \
        /* turns = sr.cycles / cycles */                        \
        emith_asr(t2, sr, 12);                                  \
-       emith_move_r_imm(t3, (u32)((1ULL<<32) / (cycles)) + 1); \
+       emith_move_r_imm(t3, (u32)((1ULL<<32) / (cycles)));     \
        emith_mul_u64(t1, t2, t2, t3); /* multiply by 1/x */    \
        rcache_free_tmp(t3);                                    \
        if (reg >= 0) {                                         \
@@ -1363,16 +1363,16 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI,      // x86-64,i386 common
        emith_tst_r_imm(sr, S);                   \
        EMITH_SJMP_START(DCOND_EQ);               \
        /* overflow if top 17 bits of MACH aren't all 1 or 0 */ \
-       /* to check: add MACH[15] to MACH[31:16]. this is 0 if no overflow */ \
-       emith_asrf(rn, mh, 16); /* sum = (MACH>>16) + ((MACH>>15)&1) */ \
-       emith_adcf_r_imm(rn, 0); /* (MACH>>15) is in carry after shift */ \
-       EMITH_SJMP_START(DCOND_EQ); /* sum != 0 -> ov */ \
-       emith_move_r_imm_c(DCOND_NE, ml, 0x0000); /* -overflow */ \
-       emith_move_r_imm_c(DCOND_NE, mh, 0x8000); \
-       EMITH_SJMP_START(DCOND_LE); /* sum > 0 -> +ovl */ \
-       emith_sub_r_imm_c(DCOND_GT, ml, 1); /* 0xffffffff */ \
-       emith_sub_r_imm_c(DCOND_GT, mh, 1); /* 0x00007fff */ \
-       EMITH_SJMP_END(DCOND_LE);                 \
+       /* to check: add MACH >> 31 to MACH >> 15. this is 0 if no overflow */ \
+       emith_asr(rn, mh, 15);                    \
+       emith_addf_r_r_r_lsr(rn, rn, mh, 31);     \
+       EMITH_SJMP_START(DCOND_EQ); /* sum != 0 -> -ovl */ \
+       emith_move_r_imm_c(DCOND_NE, ml, 0x00000000); \
+       emith_move_r_imm_c(DCOND_NE, mh, 0x00008000); \
+       EMITH_SJMP_START(DCOND_MI); /* sum < 0 -> -ovl */ \
+       emith_sub_r_imm_c(DCOND_PL, ml, 1); /* 0xffffffff */ \
+       emith_sub_r_imm_c(DCOND_PL, mh, 1); /* 0x00007fff */ \
+       EMITH_SJMP_END(DCOND_MI);                 \
        EMITH_SJMP_END(DCOND_EQ);                 \
        EMITH_SJMP_END(DCOND_EQ);                 \
 } while (0)
@@ -1394,10 +1394,10 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI,      // x86-64,i386 common
        EMITH_SJMP_START(DCOND_EQ); /* sum != 0 -> overflow */ \
        /* XXX: LSB signalling only in SH1, or in SH2 too? */ \
        emith_move_r_imm_c(DCOND_NE, mh, 0x00000001); /* LSB of MACH */ \
-       emith_move_r_imm_c(DCOND_NE, ml, 0x80000000); /* negative ovrfl */ \
-       EMITH_SJMP_START(DCOND_LE); /* sum > 0 -> positive ovrfl */ \
-       emith_sub_r_imm_c(DCOND_GT, ml, 1); /* 0x7fffffff */ \
-       EMITH_SJMP_END(DCOND_LE);                 \
+       emith_move_r_imm_c(DCOND_NE, ml, 0x80000000); /* -overflow */ \
+       EMITH_SJMP_START(DCOND_MI); /* sum > 0 -> +overflow */ \
+       emith_sub_r_imm_c(DCOND_PL, ml, 1); /* 0x7fffffff */ \
+       EMITH_SJMP_END(DCOND_MI);                 \
        EMITH_SJMP_END(DCOND_EQ);                 \
        EMITH_SJMP_END(DCOND_EQ);                 \
 } while (0)