From ed7e9150781e0d5c0f3a95a4910963ea821fbdf4 Mon Sep 17 00:00:00 2001 From: kub Date: Thu, 25 Apr 2019 19:03:58 +0200 Subject: [PATCH] sh2 drc, improved constant handling and register allocator --- cpu/sh2/compiler.c | 151 ++++++++++++++++++++++++++++++++------------- 1 file changed, 108 insertions(+), 43 deletions(-) diff --git a/cpu/sh2/compiler.c b/cpu/sh2/compiler.c index bc63e18b..cd85b373 100644 --- a/cpu/sh2/compiler.c +++ b/cpu/sh2/compiler.c @@ -395,10 +395,10 @@ enum { } guest_reg_flags; typedef struct { - u16 flags; // guest flags: is constant, is dirty? + u8 flags; // guest flags: is constant, is dirty? s8 sreg; // cache reg for static mapping s8 vreg; // cache_reg this is currently mapped to, -1 if not mapped - u32 val; // value if this is constant + s8 cnst; // const index if this is constant } guest_reg_t; @@ -1153,7 +1153,7 @@ static int find_in_array(u32 *array, size_t size, u32 what) // NB rcache allocation dependencies: // - get_reg_arg/get_tmp_arg first (might evict other regs just allocated) -// - get_reg(..., NULL) before get_reg(..., &x) if it might get the same reg +// - get_reg(..., NULL) before get_reg(..., &hr) if it might get the same reg // - get_reg(..., RC_GR_READ/RMW, ...) before WRITE (might evict needed reg) // register cache / constant propagation stuff @@ -1163,7 +1163,15 @@ typedef enum { RC_GR_RMW, } rc_gr_mode; +typedef struct { + u32 gregs; + u32 val; +} gconst_t; + +gconst_t gconsts[ARRAY_SIZE(guest_regs)]; + static int rcache_get_reg_(sh2_reg_e r, rc_gr_mode mode, int do_locking, int *hr); +static void rcache_add_vreg_alias(int x, sh2_reg_e r); static void rcache_remove_vreg_alias(int x, sh2_reg_e r); #define RCACHE_DUMP(msg) { \ @@ -1185,11 +1193,51 @@ static void rcache_remove_vreg_alias(int x, sh2_reg_e r); } \ } +// binary search approach, since we don't have CLZ on ARM920T +#define FOR_ALL_BITS_SET_DO(mask, bit, code) { \ + u32 __mask = mask; \ + for (bit = 31; bit >= 0 && mask; bit--, __mask <<= 1) { \ + if (!(__mask & (0xffff << 16))) \ + bit -= 16, __mask <<= 16; \ + if (!(__mask & (0xff << 24))) \ + bit -= 8, __mask <<= 8; \ + if (!(__mask & (0xf << 28))) \ + bit -= 4, __mask <<= 4; \ + if (!(__mask & (0x3 << 30))) \ + bit -= 2, __mask <<= 2; \ + if (!(__mask & (0x1 << 31))) \ + bit -= 1, __mask <<= 1; \ + if (__mask & (0x1 << 31)) { \ + code; \ + } \ + } \ +} + #if PROPAGATE_CONSTANTS +static inline int gconst_alloc(sh2_reg_e r) +{ + int i, n = -1; + + for (i = 0; i < ARRAY_SIZE(gconsts); i++) { + if (gconsts[i].gregs & (1 << r)) + gconsts[i].gregs &= ~(1 << r); + if (gconsts[i].gregs == 0 && n < 0) + n = i; + } + if (n >= 0) + gconsts[n].gregs = (1 << r); + else + exit(1); // cannot happen - more constants than guest regs? + return n; +} + static void gconst_set(sh2_reg_e r, u32 val) { + int i = gconst_alloc(r); + guest_regs[r].flags |= GRF_CONST; - guest_regs[r].val = val; + guest_regs[r].cnst = i; + gconsts[i].val = val; } static void gconst_new(sh2_reg_e r, u32 val) @@ -1204,16 +1252,22 @@ static void gconst_new(sh2_reg_e r, u32 val) static void gconst_copy(sh2_reg_e rd, sh2_reg_e rs) { - guest_regs[rd].flags &= ~(GRF_CONST|GRF_CDIRTY); - if (guest_regs[rs].flags & GRF_CONST) - gconst_set(rd, guest_regs[rs].val); + if (guest_regs[rd].flags & GRF_CONST) { + guest_regs[rd].flags &= ~(GRF_CONST|GRF_CDIRTY); + gconsts[guest_regs[rd].cnst].gregs &= ~(1 << rd); + } + if (guest_regs[rs].flags & GRF_CONST) { + guest_regs[rd].flags |= GRF_CONST; + guest_regs[rd].cnst = guest_regs[rs].cnst; + gconsts[guest_regs[rd].cnst].gregs |= (1 << rd); + } } #endif static int gconst_get(sh2_reg_e r, u32 *val) { if (guest_regs[r].flags & GRF_CONST) { - *val = guest_regs[r].val; + *val = gconsts[guest_regs[r].cnst].val; return 1; } return 0; @@ -1227,11 +1281,20 @@ static int gconst_check(sh2_reg_e r) } // update hr if dirty, else do nothing -static int gconst_try_read(int hr, sh2_reg_e r) +static int gconst_try_read(int vreg, sh2_reg_e r) { + int i, x; if (guest_regs[r].flags & GRF_CDIRTY) { - emith_move_r_imm(hr, guest_regs[r].val); - guest_regs[r].flags &= ~GRF_CDIRTY; + x = guest_regs[r].cnst; + emith_move_r_imm(cache_regs[vreg].hreg, gconsts[x].val); + FOR_ALL_BITS_SET_DO(gconsts[x].gregs, i, + { + if (guest_regs[i].vreg >= 0 && i != r) + rcache_remove_vreg_alias(guest_regs[i].vreg, i); + rcache_add_vreg_alias(vreg, i); + guest_regs[i].flags &= ~GRF_CDIRTY; + guest_regs[i].flags |= GRF_DIRTY; + }); return 1; } return 0; @@ -1250,6 +1313,8 @@ static u32 gconst_dirty_mask(void) static void gconst_kill(sh2_reg_e r) { + if (guest_regs[r].flags &= ~(GRF_CONST|GRF_CDIRTY)) + gconsts[guest_regs[r].cnst].gregs &= ~(1 << r); guest_regs[r].flags &= ~(GRF_CONST|GRF_CDIRTY); } @@ -1269,8 +1334,11 @@ static void gconst_invalidate(void) { int i; - for (i = 0; i < ARRAY_SIZE(guest_regs); i++) + for (i = 0; i < ARRAY_SIZE(guest_regs); i++) { + if (guest_regs[i].flags & (GRF_CONST|GRF_CDIRTY)) + gconsts[guest_regs[i].cnst].gregs &= ~(1 << i); guest_regs[i].flags &= ~(GRF_CONST|GRF_CDIRTY); + } } static u16 rcache_counter; @@ -1278,28 +1346,9 @@ static u32 rcache_static; static u32 rcache_locked; static u32 rcache_hint_soon; static u32 rcache_hint_late; +static u32 rcache_hint_write; #define rcache_hint (rcache_hint_soon|rcache_hint_late) -// binary search approach, since we don't have CLZ on ARM920T -#define FOR_ALL_BITS_SET_DO(mask, bit, code) { \ - u32 __mask = mask; \ - for (bit = 31; bit >= 0 && mask; bit--, __mask <<= 1) { \ - if (!(__mask & (0xffff << 16))) \ - bit -= 16, __mask <<= 16; \ - if (!(__mask & (0xff << 24))) \ - bit -= 8, __mask <<= 8; \ - if (!(__mask & (0xf << 28))) \ - bit -= 4, __mask <<= 4; \ - if (!(__mask & (0x3 << 30))) \ - bit -= 2, __mask <<= 2; \ - if (!(__mask & (0x1 << 31))) \ - bit -= 1, __mask <<= 1; \ - if (__mask & (0x1 << 31)) { \ - code; \ - } \ - } \ -} - static void rcache_unmap_vreg(int x) { int i; @@ -1328,8 +1377,7 @@ static void rcache_clean_vreg(int x) rcache_unmap_vreg(guest_regs[r].sreg); emith_move_r_r(cache_regs[guest_regs[r].sreg].hreg, cache_regs[guest_regs[r].vreg].hreg); rcache_remove_vreg_alias(x, r); - cache_regs[guest_regs[r].sreg].gregs = (1 << r); - guest_regs[r].vreg = guest_regs[r].sreg; + rcache_add_vreg_alias(guest_regs[r].sreg, r); } else { // must evict since sreg is locked emith_ctx_write(cache_regs[x].hreg, r * 4); @@ -1343,6 +1391,12 @@ static void rcache_clean_vreg(int x) } } +static void rcache_add_vreg_alias(int x, sh2_reg_e r) +{ + cache_regs[x].gregs |= (1 << r); + guest_regs[r].vreg = x; +} + static void rcache_remove_vreg_alias(int x, sh2_reg_e r) { cache_regs[x].gregs &= ~(1 << r); @@ -1396,9 +1450,12 @@ static cache_reg_t *rcache_evict(void) else if (rcache_hint_late & cache_regs[i].gregs) // REGs needed in some future insn i_prio = 3; - else + else if ((rcache_hint_write & cache_regs[i].gregs) != cache_regs[i].gregs) // REGs not needed soon i_prio = 4; + else + // REGs soon overwritten anyway + i_prio = 5; if (prio < i_prio || (prio == i_prio && cache_regs[i].stamp < min_stamp)) { min_stamp = cache_regs[i].stamp; @@ -1549,6 +1606,7 @@ static int rcache_get_reg_(sh2_reg_e r, rc_gr_mode mode, int do_locking, int *hr h = guest_regs[r].sreg; rcache_evict_vreg(h); tr = &cache_regs[h]; + tr->gregs = 1 << r; if (i >= 0) { if (mode != RC_GR_WRITE) { if (hr) @@ -1559,14 +1617,13 @@ static int rcache_get_reg_(sh2_reg_e r, rc_gr_mode mode, int do_locking, int *hr } rcache_remove_vreg_alias(guest_regs[r].vreg, r); } else if (mode != RC_GR_WRITE) { - if (gconst_try_read(tr->hreg, r)) { + if (gconst_try_read(h, r)) { tr->flags |= HRF_DIRTY; guest_regs[r].flags |= GRF_DIRTY; } else emith_ctx_read(tr->hreg, r * 4); } guest_regs[r].vreg = guest_regs[r].sreg; - tr->gregs = 1 << r; goto end; } else if (i >= 0) { if (mode == RC_GR_READ || !(cache_regs[i].gregs & ~(1 << r))) { @@ -1608,7 +1665,7 @@ static int rcache_get_reg_(sh2_reg_e r, rc_gr_mode mode, int do_locking, int *hr guest_regs[r].vreg = tr - cache_regs; if (mode != RC_GR_WRITE) { - if (gconst_try_read(tr->hreg, r)) { + if (gconst_try_read(guest_regs[r].vreg, r)) { tr->flags |= HRF_DIRTY; guest_regs[r].flags |= GRF_DIRTY; } else if (split >= 0) { @@ -1747,7 +1804,7 @@ static int rcache_get_reg_arg(int arg, sh2_reg_e r, int *hr) srcr = dstr; if (rcache_static & (1 << r)) srcr = rcache_get_reg_(r, RC_GR_READ, 0, NULL); - else if (gconst_try_read(srcr, r)) + else if (gconst_try_read(guest_regs[r].vreg, r)) dirty = 1; else emith_ctx_read(srcr, r * 4); @@ -1780,8 +1837,10 @@ static int rcache_get_reg_arg(int arg, sh2_reg_e r, int *hr) emith_move_r_r(dstr, srcr); } else if (hr != NULL) { // caller will modify arg, so it will soon be out of sync with r - if (dirty || src_dirty) + if (dirty || src_dirty) { emith_ctx_write(dstr, r * 4); // must clean since arg will be modified + guest_regs[r].flags &= ~GRF_DIRTY; + } } else if (guest_regs[r].vreg < 0) { // keep arg as vreg for r cache_regs[dstid].type = HR_CACHED; @@ -1909,6 +1968,11 @@ static inline void rcache_set_hint_late(u32 mask) rcache_hint_late = mask & ~rcache_static; } +static inline void rcache_set_hint_write(u32 mask) +{ + rcache_hint_write = mask & ~rcache_static; +} + static inline int rcache_is_hinted(sh2_reg_e r) { // consider static REGs as always hinted, since they are always there @@ -2038,7 +2102,7 @@ static void rcache_invalidate(void) } rcache_counter = 0; - rcache_hint_soon = rcache_hint_late = 0; + rcache_hint_soon = rcache_hint_late = rcache_hint_write = 0; gconst_invalidate(); } @@ -2155,10 +2219,9 @@ static void emit_move_r_r(sh2_reg_e dst, sh2_reg_e src) if (guest_regs[dst].vreg >= 0) rcache_remove_vreg_alias(guest_regs[dst].vreg, dst); // make dst an alias of src - cache_regs[i].gregs |= (1 << dst); + rcache_add_vreg_alias(i, dst); cache_regs[i].flags |= HRF_DIRTY; guest_regs[dst].flags |= GRF_DIRTY; - guest_regs[dst].vreg = i; gconst_kill(dst); #if PROPAGATE_CONSTANTS gconst_copy(dst, src); @@ -2772,6 +2835,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) dbg(1, "unhandled delay_dep_bk: %x", delay_dep_bk); rcache_set_hint_soon(0); rcache_set_hint_late(0); + rcache_set_hint_write(0); } else { @@ -2802,6 +2866,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) } rcache_set_hint_soon(late); // insns 1-3 rcache_set_hint_late(late & ~soon); // insns 4-9 + rcache_set_hint_write(write & ~(late|soon)); // next access is write } rcache_set_locked(opd[0].source); // try not to evict src regs for this op -- 2.39.2