2 * vim:shiftwidth=2:expandtab
8 #include "../../pico/pico_int.h"
11 #include "../drc/cmn.h"
18 #define dbg(l,...) { \
19 if ((l) & DRC_DEBUG) \
20 elprintf(EL_STATUS, ##__VA_ARGS__); \
23 #include "mame/sh2dasm.h"
24 #include <platform/linux/host_dasm.h>
25 static int insns_compiled, hash_collisions, host_insn_count;
34 static u8 *tcache_dsm_ptrs[3];
35 static char sh2dasm_buff[64];
36 #define do_host_disasm(tcid) \
37 host_dasm(tcache_dsm_ptrs[tcid], tcache_ptr - tcache_dsm_ptrs[tcid]); \
38 tcache_dsm_ptrs[tcid] = tcache_ptr
40 #define do_host_disasm(x)
43 #define BLOCK_CYCLE_LIMIT 100
44 #define MAX_BLOCK_SIZE (BLOCK_CYCLE_LIMIT * 6 * 6)
46 // we have 3 translation cache buffers, split from one drc/cmn buffer.
47 // BIOS shares tcache with data array because it's only used for init
48 // and can be discarded early
49 static const int tcache_sizes[3] = {
50 DRC_TCACHE_SIZE * 6 / 8, // ROM, DRAM
51 DRC_TCACHE_SIZE / 8, // BIOS, data array in master sh2
52 DRC_TCACHE_SIZE / 8, // ... slave
55 static u8 *tcache_bases[3];
56 static u8 *tcache_ptrs[3];
58 // ptr for code emiters
59 static u8 *tcache_ptr;
62 #include "../drc/emit_arm.c"
64 static const int reg_map_g2h[] = {
74 #include "../drc/emit_x86.c"
76 static const int reg_map_g2h[] = {
88 SHR_R0 = 0, SHR_R15 = 15,
89 SHR_PC, SHR_PPC, SHR_PR, SHR_SR,
90 SHR_GBR, SHR_VBR, SHR_MACH, SHR_MACL,
93 typedef struct block_desc_ {
94 u32 addr; // SH2 PC address
95 u32 end_addr; // TODO rm?
96 void *tcache_ptr; // translated block for above PC
97 struct block_desc_ *next; // next block with the same PC hash
103 static const int block_max_counts[3] = {
108 static block_desc *block_tables[3];
109 static int block_counts[3];
112 #define MAX_HASH_ENTRIES 1024
113 #define HASH_MASK (MAX_HASH_ENTRIES - 1)
114 static void **hash_table;
116 extern void sh2_drc_entry(SH2 *sh2, void *block);
117 extern void sh2_drc_exit(void);
120 extern void REGPARM(2) sh2_do_op(SH2 *sh2, int opcode);
121 static void REGPARM(1) sh2_test_irq(SH2 *sh2);
123 static void flush_tcache(int tcid)
125 dbg(1, "tcache #%d flush! (%d/%d, bds %d/%d)", tcid,
126 tcache_ptrs[tcid] - tcache_bases[tcid], tcache_sizes[tcid],
127 block_counts[tcid], block_max_counts[tcid]);
129 block_counts[tcid] = 0;
130 tcache_ptrs[tcid] = tcache_bases[tcid];
131 if (tcid == 0) { // ROM, RAM
132 memset(hash_table, 0, sizeof(hash_table[0]) * MAX_HASH_ENTRIES);
133 memset(Pico32xMem->drcblk_ram, 0, sizeof(Pico32xMem->drcblk_ram));
136 memset(Pico32xMem->drcblk_da[tcid - 1], 0, sizeof(Pico32xMem->drcblk_da[0]));
138 tcache_dsm_ptrs[tcid] = tcache_bases[tcid];
142 static void *dr_find_block(block_desc *tab, u32 addr)
144 for (tab = tab->next; tab != NULL; tab = tab->next)
145 if (tab->addr == addr)
149 return tab->tcache_ptr;
151 printf("block miss for %08x\n", addr);
155 static block_desc *dr_add_block(u32 addr, int tcache_id, int *blk_id)
157 int *bcount = &block_counts[tcache_id];
160 if (*bcount >= block_max_counts[tcache_id])
163 bd = &block_tables[tcache_id][*bcount];
165 bd->tcache_ptr = tcache_ptr;
172 #define HASH_FUNC(hash_tab, addr) \
173 ((block_desc **)(hash_tab))[(addr) & HASH_MASK]
175 // ---------------------------------------------------------------
177 static void emit_move_r_imm32(sh2_reg_e dst, u32 imm)
179 int host_dst = reg_map_g2h[dst];
184 emith_move_r_imm(tmp, imm);
186 emith_ctx_write(tmp, dst * 4);
189 static void emit_move_r_r(sh2_reg_e dst, sh2_reg_e src)
191 int host_dst = reg_map_g2h[dst], host_src = reg_map_g2h[src];
194 if (host_dst != -1 && host_src != -1) {
195 emith_move_r_r(host_dst, host_src);
205 emith_ctx_read(tmp, src * 4);
207 emith_ctx_write(tmp, dst * 4);
210 static void emit_braf(sh2_reg_e reg, u32 pc)
212 int host_reg = reg_map_g2h[reg];
213 if (host_reg == -1) {
214 emith_ctx_read(0, reg * 4);
216 emith_move_r_r(0, host_reg);
217 emith_add_r_imm(0, pc);
219 emith_ctx_write(0, SHR_PPC * 4);
223 static int sh2_translate_op4(int op)
229 emith_pass_arg(2, sh2, op);
230 emith_call(sh2_do_op);
241 #define CHECK_UNHANDLED_BITS(mask) { \
242 if ((op & (mask)) != 0) \
246 static void *sh2_translate(SH2 *sh2, block_desc *other_block)
249 block_desc *this_block;
250 unsigned int pc = sh2->pc;
251 int op, delayed_op = 0, test_irq = 0;
252 int tcache_id = 0, blkid = 0;
258 if ((tmp != 0 && tmp != 1 && tmp != 6) || sh2->pc == 0) {
259 printf("invalid PC, aborting: %08x\n", sh2->pc);
260 // FIXME: be less destructive
264 if ((sh2->pc & 0xe0000000) == 0xc0000000 || (sh2->pc & ~0xfff) == 0) {
265 // data_array, BIOS have separate tcache (shared)
266 tcache_id = 1 + sh2->is_slave;
269 tcache_ptr = tcache_ptrs[tcache_id];
270 this_block = dr_add_block(pc, tcache_id, &blkid);
272 tmp = tcache_ptr - tcache_bases[tcache_id];
273 if (tmp > tcache_sizes[tcache_id] - MAX_BLOCK_SIZE || this_block == NULL) {
274 flush_tcache(tcache_id);
275 tcache_ptr = tcache_ptrs[tcache_id];
276 other_block = NULL; // also gone too due to flush
277 this_block = dr_add_block(pc, tcache_id, &blkid);
280 this_block->next = other_block;
281 if ((sh2->pc & 0xc6000000) == 0x02000000) // ROM
282 HASH_FUNC(hash_table, pc) = this_block;
284 block_entry = tcache_ptr;
286 printf("== %csh2 block #%d,%d %08x -> %p\n", sh2->is_slave ? 's' : 'm',
287 tcache_id, block_counts[tcache_id], pc, block_entry);
288 if (other_block != NULL) {
289 printf(" hash collision with %08x\n", other_block->addr);
294 while (cycles < BLOCK_CYCLE_LIMIT || delayed_op)
299 op = p32x_sh2_read16(pc, sh2);
304 DasmSH2(sh2dasm_buff, pc, op);
305 printf("%08x %04x %s\n", pc, op, sh2dasm_buff);
312 switch ((op >> 12) & 0x0f)
317 CHECK_UNHANDLED_BITS(0xd0);
318 // BRAF Rm 0000mmmm00100011
319 // BSRF Rm 0000mmmm00000011
322 emit_move_r_imm32(SHR_PR, pc + 2);
323 emit_braf((op >> 8) & 0x0f, pc + 2);
327 CHECK_UNHANDLED_BITS(0xf0);
328 // NOP 0000000000001001
331 CHECK_UNHANDLED_BITS(0xd0);
334 // RTS 0000000000001011
335 emit_move_r_r(SHR_PPC, SHR_PR);
338 // RTE 0000000000101011
339 //emit_move_r_r(SHR_PC, SHR_PR);
340 emit_move_r_imm32(SHR_PC, pc - 2);
341 emith_pass_arg_r(0, CONTEXT_REG);
342 emith_pass_arg_imm(1, op);
343 emith_call(sh2_do_op);
344 emit_move_r_r(SHR_PPC, SHR_PC);
355 if ((op & 0xf0) != 0)
357 // LDC.L @Rm+,SR 0100mmmm00000111
361 if ((op & 0xd0) != 0)
363 // JMP @Rm 0100mmmm00101011
364 // JSR @Rm 0100mmmm00001011
367 emit_move_r_imm32(SHR_PR, pc + 2);
368 emit_move_r_r(SHR_PPC, (op >> 8) & 0x0f);
372 if ((op & 0xf0) != 0)
374 // LDC Rm,SR 0100mmmm00001110
381 switch (op & 0x0f00) {
382 // BT/S label 10001101dddddddd
384 // BF/S label 10001111dddddddd
389 // BT label 10001001dddddddd
391 // BF label 10001011dddddddd
393 tmp = ((signed int)(op << 24) >> 23);
394 tmp2 = delayed_op ? SHR_PPC : SHR_PC;
395 emit_move_r_imm32(tmp2, pc + (delayed_op ? 2 : 0));
397 EMITH_CONDITIONAL(emit_move_r_imm32(tmp2, pc + tmp + 2), (op & 0x0200) ? 1 : 0);
406 // BRA label 1010dddddddddddd
409 tmp = ((signed int)(op << 20) >> 19);
410 emit_move_r_imm32(SHR_PPC, pc + tmp + 2);
415 // BSR label 1011dddddddddddd
417 emit_move_r_imm32(SHR_PR, pc + 2);
422 emit_move_r_imm32(SHR_PC, pc - 2);
423 emith_pass_arg_r(0, CONTEXT_REG);
424 emith_pass_arg_imm(1, op);
425 emith_call(sh2_do_op);
431 emit_move_r_r(SHR_PC, SHR_PPC);
433 if (test_irq && delayed_op != 2) {
434 emith_pass_arg_r(0, CONTEXT_REG);
435 emith_call(sh2_test_irq);
441 do_host_disasm(tcache_id);
445 this_block->end_addr = pc;
447 // mark memory blocks as containing compiled code
448 if ((sh2->pc & 0xe0000000) == 0xc0000000 || (sh2->pc & ~0xfff) == 0) {
450 u16 *drcblk = Pico32xMem->drcblk_da[sh2->is_slave];
451 tmp = (this_block->addr & 0xfff) >> SH2_DRCBLK_DA_SHIFT;
452 tmp2 = (this_block->end_addr & 0xfff) >> SH2_DRCBLK_DA_SHIFT;
453 Pico32xMem->drcblk_da[sh2->is_slave][tmp] = (blkid << 1) | 1;
454 for (++tmp; tmp < tmp2; tmp++) {
456 break; // dont overwrite overlay block
457 drcblk[tmp] = blkid << 1;
460 else if ((this_block->addr & 0xc7fc0000) == 0x06000000) { // DRAM
461 tmp = (this_block->addr & 0x3ffff) >> SH2_DRCBLK_RAM_SHIFT;
462 tmp2 = (this_block->end_addr & 0x3ffff) >> SH2_DRCBLK_RAM_SHIFT;
463 Pico32xMem->drcblk_ram[tmp] = (blkid << 1) | 1;
464 for (++tmp; tmp < tmp2; tmp++) {
465 if (Pico32xMem->drcblk_ram[tmp])
467 Pico32xMem->drcblk_ram[tmp] = blkid << 1;
471 if (reg_map_g2h[SHR_SR] == -1) {
472 emith_ctx_sub(cycles << 12, SHR_SR * 4);
474 emith_sub_r_imm(reg_map_g2h[SHR_SR], cycles << 12);
475 emith_jump(sh2_drc_exit);
476 tcache_ptrs[tcache_id] = tcache_ptr;
479 cache_flush_d_inval_i(block_entry, tcache_ptr);
482 do_host_disasm(tcache_id);
483 dbg(1, " block #%d,%d tcache %d/%d, insns %d -> %d %.3f",
484 tcache_id, block_counts[tcache_id],
485 tcache_ptr - tcache_bases[tcache_id], tcache_sizes[tcache_id],
486 insns_compiled, host_insn_count, (double)host_insn_count / insns_compiled);
487 if ((sh2->pc & 0xc6000000) == 0x02000000) // ROM
488 dbg(1, " hash collisions %d/%d", hash_collisions, block_counts[tcache_id]);
497 do_host_disasm(tcache_id);
502 void __attribute__((noinline)) sh2_drc_dispatcher(SH2 *sh2)
504 while (((signed int)sh2->sr >> 12) > 0)
507 block_desc *bd = NULL;
509 // FIXME: must avoid doing it so often..
512 // we have full block id tables for data_array and RAM
513 // BIOS goes to data_array table too
514 if ((sh2->pc & 0xff000000) == 0xc0000000 || (sh2->pc & ~0xfff) == 0) {
515 int blkid = Pico32xMem->drcblk_da[sh2->is_slave][(sh2->pc & 0xfff) >> SH2_DRCBLK_DA_SHIFT];
517 bd = &block_tables[1 + sh2->is_slave][blkid >> 1];
518 block = bd->tcache_ptr;
522 else if ((sh2->pc & 0xc6000000) == 0x06000000) {
523 int blkid = Pico32xMem->drcblk_ram[(sh2->pc & 0x3ffff) >> SH2_DRCBLK_RAM_SHIFT];
525 bd = &block_tables[0][blkid >> 1];
526 block = bd->tcache_ptr;
530 else if ((sh2->pc & 0xc6000000) == 0x02000000) {
531 bd = HASH_FUNC(hash_table, sh2->pc);
534 if (bd->addr == sh2->pc)
535 block = bd->tcache_ptr;
537 block = dr_find_block(bd, sh2->pc);
542 block = sh2_translate(sh2, bd);
544 dbg(4, "= %csh2 enter %08x %p, c=%d", sh2->is_slave ? 's' : 'm',
545 sh2->pc, block, (signed int)sh2->sr >> 12);
550 sh2_drc_entry(sh2, block);
554 static void sh2_smc_rm_block(u16 *drcblk, u16 *p, block_desc *btab, u32 a)
557 block_desc *bd = btab + id;
559 dbg(1, " killing block %08x", bd->addr);
560 bd->addr = bd->end_addr = 0;
562 while (p > drcblk && (p[-1] >> 1) == id)
565 // check for possible overlay block
566 if (p > 0 && p[-1] != 0) {
567 bd = btab + (p[-1] >> 1);
568 if (bd->addr <= a && a < bd->end_addr)
569 sh2_smc_rm_block(drcblk, p - 1, btab, a);
575 while ((*p >> 1) == id);
578 void sh2_drc_wcheck_ram(unsigned int a, int val, int cpuid)
580 u16 *drcblk = Pico32xMem->drcblk_ram;
581 u16 *p = drcblk + ((a & 0x3ffff) >> SH2_DRCBLK_RAM_SHIFT);
583 dbg(1, "%csh2 smc check @%08x", cpuid ? 's' : 'm', a);
584 sh2_smc_rm_block(drcblk, p, block_tables[0], a);
587 void sh2_drc_wcheck_da(unsigned int a, int val, int cpuid)
589 u16 *drcblk = Pico32xMem->drcblk_da[cpuid];
590 u16 *p = drcblk + ((a & 0xfff) >> SH2_DRCBLK_DA_SHIFT);
592 dbg(1, "%csh2 smc check @%08x", cpuid ? 's' : 'm', a);
593 sh2_smc_rm_block(drcblk, p, block_tables[1 + cpuid], a);
596 void sh2_execute(SH2 *sh2, int cycles)
598 sh2->cycles_aim += cycles;
599 cycles = sh2->cycles_aim - sh2->cycles_done;
601 // cycles are kept in SHR_SR unused bits (upper 20)
603 sh2->sr |= cycles << 12;
604 sh2_drc_dispatcher(sh2);
606 sh2->cycles_done += cycles - ((signed int)sh2->sr >> 12);
609 static void REGPARM(1) sh2_test_irq(SH2 *sh2)
611 if (sh2->pending_level > ((sh2->sr >> 4) & 0x0f))
613 if (sh2->pending_irl > sh2->pending_int_irq)
614 sh2_do_irq(sh2, sh2->pending_irl, 64 + sh2->pending_irl/2);
616 sh2_do_irq(sh2, sh2->pending_int_irq, sh2->pending_int_vector);
617 sh2->pending_int_irq = 0; // auto-clear
618 sh2->pending_level = sh2->pending_irl;
624 static void block_stats(void)
626 int c, b, i, total = 0;
628 for (b = 0; b < ARRAY_SIZE(block_tables); b++)
629 for (i = 0; i < block_counts[b]; i++)
630 if (block_tables[b][i].addr != 0)
631 total += block_tables[b][i].refcount;
633 for (c = 0; c < 10; c++) {
634 block_desc *blk, *maxb = NULL;
636 for (b = 0; b < ARRAY_SIZE(block_tables); b++) {
637 for (i = 0; i < block_counts[b]; i++) {
638 blk = &block_tables[b][i];
639 if (blk->addr != 0 && blk->refcount > max) {
647 printf("%08x %9d %2.3f%%\n", maxb->addr, maxb->refcount,
648 (double)maxb->refcount / total * 100.0);
652 for (b = 0; b < ARRAY_SIZE(block_tables); b++)
653 for (i = 0; i < block_counts[b]; i++)
654 block_tables[b][i].refcount = 0;
657 #define block_stats()
660 void sh2_drc_flush_all(void)
668 int sh2_drc_init(SH2 *sh2)
670 if (block_tables[0] == NULL) {
675 cnt = block_max_counts[0] + block_max_counts[1] + block_max_counts[2];
676 block_tables[0] = calloc(cnt, sizeof(*block_tables[0]));
677 if (block_tables[0] == NULL)
680 memset(block_counts, 0, sizeof(block_counts));
681 tcache_bases[0] = tcache_ptrs[0] = tcache;
683 for (i = 1; i < ARRAY_SIZE(block_tables); i++) {
684 block_tables[i] = block_tables[i - 1] + block_max_counts[i - 1];
685 tcache_bases[i] = tcache_ptrs[i] = tcache_bases[i - 1] + tcache_sizes[i - 1];
689 PicoOpt |= POPT_DIS_VDP_FIFO;
692 for (i = 0; i < ARRAY_SIZE(block_tables); i++)
693 tcache_dsm_ptrs[i] = tcache_bases[i];
700 if (hash_table == NULL) {
701 hash_table = calloc(sizeof(hash_table[0]), MAX_HASH_ENTRIES);
702 if (hash_table == NULL)
709 void sh2_drc_finish(SH2 *sh2)
711 if (block_tables[0] != NULL) {
713 free(block_tables[0]);
714 memset(block_tables, 0, sizeof(block_tables));
719 if (hash_table != NULL) {