2 * vim:shiftwidth=2:expandtab
8 #include "../../pico/pico_int.h"
11 #include "../drc/cmn.h"
17 #define dbg(l,...) { \
18 if ((l) & DRC_DEBUG) \
19 elprintf(EL_STATUS, ##__VA_ARGS__); \
23 #include "mame/sh2dasm.h"
24 #include <platform/linux/host_dasm.h>
25 static int insns_compiled, hash_collisions, host_insn_count;
28 static u8 *tcache_dsm_ptrs[3];
29 static char sh2dasm_buff[64];
30 #define do_host_disasm(tcid) \
31 host_dasm(tcache_dsm_ptrs[tcid], tcache_ptr - tcache_dsm_ptrs[tcid]); \
32 tcache_dsm_ptrs[tcid] = tcache_ptr
34 #define do_host_disasm(x)
37 #define BLOCK_CYCLE_LIMIT 100
38 #define MAX_BLOCK_SIZE (BLOCK_CYCLE_LIMIT * 6 * 6)
40 // we have 3 translation cache buffers, split from one drc/cmn buffer.
41 // BIOS shares tcache with data array because it's only used for init
42 // and can be discarded early
43 static const int tcache_sizes[3] = {
44 DRC_TCACHE_SIZE * 6 / 8, // ROM, DRAM
45 DRC_TCACHE_SIZE / 8, // BIOS, data array in master sh2
46 DRC_TCACHE_SIZE / 8, // ... slave
49 static u8 *tcache_bases[3];
50 static u8 *tcache_ptrs[3];
52 // ptr for code emiters
53 static u8 *tcache_ptr;
56 #include "../drc/emit_arm.c"
58 static const int reg_map_g2h[] = {
68 #include "../drc/emit_x86.c"
70 static const int reg_map_g2h[] = {
82 SHR_R0 = 0, SHR_R15 = 15,
83 SHR_PC, SHR_PPC, SHR_PR, SHR_SR,
84 SHR_GBR, SHR_VBR, SHR_MACH, SHR_MACL,
87 typedef struct block_desc_ {
88 u32 addr; // SH2 PC address
89 u32 end_addr; // TODO rm?
90 void *tcache_ptr; // translated block for above PC
91 struct block_desc_ *next; // next block with the same PC hash
97 static const int block_max_counts[3] = {
102 static block_desc *block_tables[3];
103 static int block_counts[3];
106 #define MAX_HASH_ENTRIES 1024
107 #define HASH_MASK (MAX_HASH_ENTRIES - 1)
108 static void **hash_table;
110 extern void sh2_drc_entry(SH2 *sh2, void *block);
111 extern void sh2_drc_exit(void);
114 extern void __attribute__((regparm(2))) sh2_do_op(SH2 *sh2, int opcode);
115 static void __attribute__((regparm(1))) sh2_test_irq(SH2 *sh2);
117 static void flush_tcache(int tcid)
119 printf("tcache #%d flush! (%d/%d, bds %d/%d)\n", tcid,
120 tcache_ptrs[tcid] - tcache_bases[tcid], tcache_sizes[tcid],
121 block_counts[tcid], block_max_counts[tcid]);
123 block_counts[tcid] = 0;
124 tcache_ptrs[tcid] = tcache_bases[tcid];
125 if (tcid == 0) { // ROM, RAM
126 memset(hash_table, 0, sizeof(hash_table[0]) * MAX_HASH_ENTRIES);
127 memset(Pico32xMem->drcblk_ram, 0, sizeof(Pico32xMem->drcblk_ram));
130 memset(Pico32xMem->drcblk_da[tcid - 1], 0, sizeof(Pico32xMem->drcblk_da[0]));
132 tcache_dsm_ptrs[tcid] = tcache_bases[tcid];
136 static void *dr_find_block(block_desc *tab, u32 addr)
138 for (tab = tab->next; tab != NULL; tab = tab->next)
139 if (tab->addr == addr)
143 return tab->tcache_ptr;
145 printf("block miss for %08x\n", addr);
149 static block_desc *dr_add_block(u32 addr, int tcache_id, int *blk_id)
151 int *bcount = &block_counts[tcache_id];
154 if (*bcount >= block_max_counts[tcache_id])
157 bd = &block_tables[tcache_id][*bcount];
159 bd->tcache_ptr = tcache_ptr;
166 #define HASH_FUNC(hash_tab, addr) \
167 ((block_desc **)(hash_tab))[(addr) & HASH_MASK]
169 // ---------------------------------------------------------------
171 static void emit_move_r_imm32(sh2_reg_e dst, u32 imm)
173 int host_dst = reg_map_g2h[dst];
178 emith_move_r_imm(tmp, imm);
180 emith_ctx_write(tmp, dst * 4);
183 static void emit_move_r_r(sh2_reg_e dst, sh2_reg_e src)
185 int host_dst = reg_map_g2h[dst], host_src = reg_map_g2h[src];
188 if (host_dst != -1 && host_src != -1) {
189 emith_move_r_r(host_dst, host_src);
199 emith_ctx_read(tmp, src * 4);
201 emith_ctx_write(tmp, dst * 4);
204 static void emit_braf(sh2_reg_e reg, u32 pc)
206 int host_reg = reg_map_g2h[reg];
207 if (host_reg == -1) {
208 emith_ctx_read(0, reg * 4);
210 emith_move_r_r(0, host_reg);
211 emith_add_r_imm(0, pc);
213 emith_ctx_write(0, SHR_PPC * 4);
217 static int sh2_translate_op4(int op)
223 emith_pass_arg(2, sh2, op);
224 emith_call(sh2_do_op);
235 #define CHECK_UNHANDLED_BITS(mask) { \
236 if ((op & (mask)) != 0) \
240 static void *sh2_translate(SH2 *sh2, block_desc *other_block)
243 block_desc *this_block;
244 unsigned int pc = sh2->pc;
245 int op, delayed_op = 0, test_irq = 0;
246 int tcache_id = 0, blkid = 0;
252 if ((tmp != 0 && tmp != 1 && tmp != 6) || sh2->pc == 0) {
253 printf("invalid PC, aborting: %08x\n", sh2->pc);
254 // FIXME: be less destructive
258 if ((sh2->pc & 0xe0000000) == 0xc0000000 || (sh2->pc & ~0xfff) == 0) {
259 // data_array, BIOS have separate tcache (shared)
260 tcache_id = 1 + sh2->is_slave;
263 tcache_ptr = tcache_ptrs[tcache_id];
264 this_block = dr_add_block(pc, tcache_id, &blkid);
266 tmp = tcache_ptr - tcache_bases[tcache_id];
267 if (tmp > tcache_sizes[tcache_id] - MAX_BLOCK_SIZE || this_block == NULL) {
268 flush_tcache(tcache_id);
269 tcache_ptr = tcache_ptrs[tcache_id];
270 other_block = NULL; // also gone too due to flush
271 this_block = dr_add_block(pc, tcache_id, &blkid);
274 this_block->next = other_block;
275 if ((sh2->pc & 0xc6000000) == 0x02000000) // ROM
276 HASH_FUNC(hash_table, pc) = this_block;
278 block_entry = tcache_ptr;
280 printf("== %csh2 block #%d,%d %08x -> %p\n", sh2->is_slave ? 's' : 'm',
281 tcache_id, block_counts[tcache_id], pc, block_entry);
282 if (other_block != NULL) {
283 printf(" hash collision with %08x\n", other_block->addr);
288 while (cycles < BLOCK_CYCLE_LIMIT || delayed_op)
293 op = p32x_sh2_read16(pc, sh2);
298 DasmSH2(sh2dasm_buff, pc, op);
299 printf("%08x %04x %s\n", pc, op, sh2dasm_buff);
306 switch ((op >> 12) & 0x0f)
311 CHECK_UNHANDLED_BITS(0xd0);
312 // BRAF Rm 0000mmmm00100011
313 // BSRF Rm 0000mmmm00000011
316 emit_move_r_imm32(SHR_PR, pc + 2);
317 emit_braf((op >> 8) & 0x0f, pc + 2);
321 CHECK_UNHANDLED_BITS(0xf0);
322 // NOP 0000000000001001
325 CHECK_UNHANDLED_BITS(0xd0);
328 // RTS 0000000000001011
329 emit_move_r_r(SHR_PPC, SHR_PR);
332 // RTE 0000000000101011
333 //emit_move_r_r(SHR_PC, SHR_PR);
334 emit_move_r_imm32(SHR_PC, pc - 2);
335 emith_pass_arg_r(0, CONTEXT_REG);
336 emith_pass_arg_imm(1, op);
337 emith_call(sh2_do_op);
338 emit_move_r_r(SHR_PPC, SHR_PC);
349 if ((op & 0xf0) != 0)
351 // LDC.L @Rm+,SR 0100mmmm00000111
355 if ((op & 0xd0) != 0)
357 // JMP @Rm 0100mmmm00101011
358 // JSR @Rm 0100mmmm00001011
361 emit_move_r_imm32(SHR_PR, pc + 2);
362 emit_move_r_r(SHR_PPC, (op >> 8) & 0x0f);
366 if ((op & 0xf0) != 0)
368 // LDC Rm,SR 0100mmmm00001110
375 switch (op & 0x0f00) {
376 // BT/S label 10001101dddddddd
378 // BF/S label 10001111dddddddd
383 // BT label 10001001dddddddd
385 // BF label 10001011dddddddd
387 tmp = ((signed int)(op << 24) >> 23);
388 tmp2 = delayed_op ? SHR_PPC : SHR_PC;
389 emit_move_r_imm32(tmp2, pc + (delayed_op ? 2 : 0));
391 EMITH_CONDITIONAL(emit_move_r_imm32(tmp2, pc + tmp + 2), (op & 0x0200) ? 1 : 0);
400 // BRA label 1010dddddddddddd
403 tmp = ((signed int)(op << 20) >> 19);
404 emit_move_r_imm32(SHR_PPC, pc + tmp + 2);
409 // BSR label 1011dddddddddddd
411 emit_move_r_imm32(SHR_PR, pc + 2);
416 emit_move_r_imm32(SHR_PC, pc - 2);
417 emith_pass_arg_r(0, CONTEXT_REG);
418 emith_pass_arg_imm(1, op);
419 emith_call(sh2_do_op);
425 emit_move_r_r(SHR_PC, SHR_PPC);
427 if (test_irq && delayed_op != 2) {
428 emith_pass_arg_r(0, CONTEXT_REG);
429 emith_call(sh2_test_irq);
435 do_host_disasm(tcache_id);
439 this_block->end_addr = pc;
441 // mark memory blocks as containing compiled code
442 if ((sh2->pc & 0xe0000000) == 0xc0000000 || (sh2->pc & ~0xfff) == 0) {
444 u16 *drcblk = Pico32xMem->drcblk_da[sh2->is_slave];
445 tmp = (this_block->addr & 0xfff) >> SH2_DRCBLK_DA_SHIFT;
446 tmp2 = (this_block->end_addr & 0xfff) >> SH2_DRCBLK_DA_SHIFT;
447 Pico32xMem->drcblk_da[sh2->is_slave][tmp] = (blkid << 1) | 1;
448 for (++tmp; tmp < tmp2; tmp++) {
450 break; // dont overwrite overlay block
451 drcblk[tmp] = blkid << 1;
454 else if ((this_block->addr & 0xc7fc0000) == 0x06000000) { // DRAM
455 tmp = (this_block->addr & 0x3ffff) >> SH2_DRCBLK_RAM_SHIFT;
456 tmp2 = (this_block->end_addr & 0x3ffff) >> SH2_DRCBLK_RAM_SHIFT;
457 Pico32xMem->drcblk_ram[tmp] = (blkid << 1) | 1;
458 for (++tmp; tmp < tmp2; tmp++) {
459 if (Pico32xMem->drcblk_ram[tmp])
461 Pico32xMem->drcblk_ram[tmp] = blkid << 1;
465 if (reg_map_g2h[SHR_SR] == -1) {
466 emith_ctx_sub(cycles << 12, SHR_SR * 4);
468 emith_sub_r_imm(reg_map_g2h[SHR_SR], cycles << 12);
469 emith_jump(sh2_drc_exit);
470 tcache_ptrs[tcache_id] = tcache_ptr;
472 do_host_disasm(tcache_id);
473 dbg(1, " block #%d,%d tcache %d/%d, insns %d -> %d %.3f",
474 tcache_id, block_counts[tcache_id],
475 tcache_ptr - tcache_bases[tcache_id], tcache_sizes[tcache_id],
476 insns_compiled, host_insn_count, (double)host_insn_count / insns_compiled);
477 if ((sh2->pc & 0xc6000000) == 0x02000000) // ROM
478 dbg(1, " hash collisions %d/%d", hash_collisions, block_counts[tcache_id]);
483 do_host_disasm(tcache_id);
488 void __attribute__((noinline)) sh2_drc_dispatcher(SH2 *sh2)
490 while (((signed int)sh2->sr >> 12) > 0)
493 block_desc *bd = NULL;
495 // FIXME: must avoid doing it so often..
498 // we have full block id tables for data_array and RAM
499 // BIOS goes to data_array table too
500 if ((sh2->pc & 0xff000000) == 0xc0000000 || (sh2->pc & ~0xfff) == 0) {
501 int blkid = Pico32xMem->drcblk_da[sh2->is_slave][(sh2->pc & 0xfff) >> SH2_DRCBLK_DA_SHIFT];
503 bd = &block_tables[1 + sh2->is_slave][blkid >> 1];
504 block = bd->tcache_ptr;
508 else if ((sh2->pc & 0xc6000000) == 0x06000000) {
509 int blkid = Pico32xMem->drcblk_ram[(sh2->pc & 0x3ffff) >> SH2_DRCBLK_RAM_SHIFT];
511 bd = &block_tables[0][blkid >> 1];
512 block = bd->tcache_ptr;
516 else if ((sh2->pc & 0xc6000000) == 0x02000000) {
517 bd = HASH_FUNC(hash_table, sh2->pc);
520 if (bd->addr == sh2->pc)
521 block = bd->tcache_ptr;
523 block = dr_find_block(bd, sh2->pc);
528 block = sh2_translate(sh2, bd);
530 dbg(4, "= %csh2 enter %08x %p, c=%d", sh2->is_slave ? 's' : 'm',
531 sh2->pc, block, (signed int)sh2->sr >> 12);
536 sh2_drc_entry(sh2, block);
540 static void sh2_smc_rm_block(u16 *drcblk, u16 *p, block_desc *btab, u32 a)
543 block_desc *bd = btab + id;
545 dbg(1, " killing block %08x", bd->addr);
546 bd->addr = bd->end_addr = 0;
548 while (p > drcblk && (p[-1] >> 1) == id)
551 // check for possible overlay block
552 if (p > 0 && p[-1] != 0) {
553 bd = btab + (p[-1] >> 1);
554 if (bd->addr <= a && a < bd->end_addr)
555 sh2_smc_rm_block(drcblk, p - 1, btab, a);
561 while ((*p >> 1) == id);
564 void sh2_drc_wcheck_ram(unsigned int a, int val, int cpuid)
566 u16 *drcblk = Pico32xMem->drcblk_ram;
567 u16 *p = drcblk + ((a & 0x3ffff) >> SH2_DRCBLK_RAM_SHIFT);
569 dbg(1, "%csh2 smc check @%08x", cpuid ? 's' : 'm', a);
570 sh2_smc_rm_block(drcblk, p, block_tables[0], a);
573 void sh2_drc_wcheck_da(unsigned int a, int val, int cpuid)
575 u16 *drcblk = Pico32xMem->drcblk_da[cpuid];
576 u16 *p = drcblk + ((a & 0xfff) >> SH2_DRCBLK_DA_SHIFT);
578 dbg(1, "%csh2 smc check @%08x", cpuid ? 's' : 'm', a);
579 sh2_smc_rm_block(drcblk, p, block_tables[1 + cpuid], a);
582 void sh2_execute(SH2 *sh2, int cycles)
584 sh2->cycles_aim += cycles;
585 cycles = sh2->cycles_aim - sh2->cycles_done;
587 // cycles are kept in SHR_SR unused bits (upper 20)
589 sh2->sr |= cycles << 12;
590 sh2_drc_dispatcher(sh2);
592 sh2->cycles_done += cycles - ((signed int)sh2->sr >> 12);
595 static void __attribute__((regparm(1))) sh2_test_irq(SH2 *sh2)
597 if (sh2->pending_level > ((sh2->sr >> 4) & 0x0f))
599 if (sh2->pending_irl > sh2->pending_int_irq)
600 sh2_do_irq(sh2, sh2->pending_irl, 64 + sh2->pending_irl/2);
602 sh2_do_irq(sh2, sh2->pending_int_irq, sh2->pending_int_vector);
603 sh2->pending_int_irq = 0; // auto-clear
604 sh2->pending_level = sh2->pending_irl;
610 static void block_stats(void)
612 int c, b, i, total = 0;
614 for (b = 0; b < ARRAY_SIZE(block_tables); b++)
615 for (i = 0; i < block_counts[b]; i++)
616 if (block_tables[b][i].addr != 0)
617 total += block_tables[b][i].refcount;
619 for (c = 0; c < 10; c++) {
620 block_desc *blk, *maxb = NULL;
622 for (b = 0; b < ARRAY_SIZE(block_tables); b++) {
623 for (i = 0; i < block_counts[b]; i++) {
624 blk = &block_tables[b][i];
625 if (blk->addr != 0 && blk->refcount > max) {
633 printf("%08x %9d %2.3f%%\n", maxb->addr, maxb->refcount,
634 (double)maxb->refcount / total * 100.0);
640 int sh2_drc_init(SH2 *sh2)
642 if (block_tables[0] == NULL) {
647 cnt = block_max_counts[0] + block_max_counts[1] + block_max_counts[2];
648 block_tables[0] = calloc(cnt, sizeof(*block_tables[0]));
649 if (block_tables[0] == NULL)
652 memset(block_counts, 0, sizeof(block_counts));
653 tcache_bases[0] = tcache_ptrs[0] = tcache;
655 for (i = 1; i < ARRAY_SIZE(block_tables); i++) {
656 block_tables[i] = block_tables[i - 1] + block_max_counts[i - 1];
657 tcache_bases[i] = tcache_ptrs[i] = tcache_bases[i - 1] + tcache_sizes[i - 1];
661 for (i = 0; i < ARRAY_SIZE(block_tables); i++)
662 tcache_dsm_ptrs[i] = tcache_bases[i];
669 if (hash_table == NULL) {
670 hash_table = calloc(sizeof(hash_table[0]), MAX_HASH_ENTRIES);
671 if (hash_table == NULL)
678 void sh2_drc_finish(SH2 *sh2)
680 if (block_tables[0] != NULL) {
684 free(block_tables[0]);
685 memset(block_tables, 0, sizeof(block_tables));
690 if (hash_table != NULL) {