2 * vim:shiftwidth=2:expandtab
8 #include "../../pico/pico_int.h"
11 #include "../drc/cmn.h"
17 #define dbg(l,...) { \
18 if ((l) & DRC_DEBUG) \
19 elprintf(EL_STATUS, ##__VA_ARGS__); \
23 #include "mame/sh2dasm.h"
24 #include <platform/linux/host_dasm.h>
25 static int insns_compiled, hash_collisions, host_insn_count;
28 static u8 *tcache_dsm_ptrs[3];
29 static char sh2dasm_buff[64];
30 #define do_host_disasm(tcid) \
31 host_dasm(tcache_dsm_ptrs[tcid], tcache_ptr - tcache_dsm_ptrs[tcid]); \
32 tcache_dsm_ptrs[tcid] = tcache_ptr
34 #define do_host_disasm(x)
37 #define BLOCK_CYCLE_LIMIT 100
38 #define MAX_BLOCK_SIZE (BLOCK_CYCLE_LIMIT * 6 * 6)
40 // we have 3 translation cache buffers, split from one drc/cmn buffer.
41 // BIOS shares tcache with data array because it's only used for init
42 // and can be discarded early
43 static const int tcache_sizes[3] = {
44 DRC_TCACHE_SIZE * 6 / 8, // ROM, DRAM
45 DRC_TCACHE_SIZE / 8, // BIOS, data array in master sh2
46 DRC_TCACHE_SIZE / 8, // ... slave
49 static u8 *tcache_bases[3];
50 static u8 *tcache_ptrs[3];
52 // ptr for code emiters
53 static u8 *tcache_ptr;
55 #include "../drc/emit_x86.c"
58 SHR_R0 = 0, SHR_R15 = 15,
59 SHR_PC, SHR_PPC, SHR_PR, SHR_SR,
60 SHR_GBR, SHR_VBR, SHR_MACH, SHR_MACL,
63 typedef struct block_desc_ {
64 u32 addr; // SH2 PC address
65 u32 end_addr; // TODO rm?
66 void *tcache_ptr; // translated block for above PC
67 struct block_desc_ *next; // next block with the same PC hash
73 static const int block_max_counts[3] = {
78 static block_desc *block_tables[3];
79 static int block_counts[3];
82 #define MAX_HASH_ENTRIES 1024
83 #define HASH_MASK (MAX_HASH_ENTRIES - 1)
84 static void **hash_table;
86 extern void sh2_drc_entry(SH2 *sh2, void *block);
87 extern void sh2_drc_exit(void);
90 extern void __attribute__((regparm(2))) sh2_do_op(SH2 *sh2, int opcode);
91 static void __attribute__((regparm(1))) sh2_test_irq(SH2 *sh2);
93 static void flush_tcache(int tcid)
95 printf("tcache #%d flush! (%d/%d, bds %d/%d)\n", tcid,
96 tcache_ptrs[tcid] - tcache_bases[tcid], tcache_sizes[tcid],
97 block_counts[tcid], block_max_counts[tcid]);
99 block_counts[tcid] = 0;
100 tcache_ptrs[tcid] = tcache_bases[tcid];
101 if (tcid == 0) { // ROM, RAM
102 memset(hash_table, 0, sizeof(hash_table[0]) * MAX_HASH_ENTRIES);
103 memset(Pico32xMem->drcblk_ram, 0, sizeof(Pico32xMem->drcblk_ram));
106 memset(Pico32xMem->drcblk_da[tcid - 1], 0, sizeof(Pico32xMem->drcblk_da[0]));
108 tcache_dsm_ptrs[tcid] = tcache_bases[tcid];
112 static void *dr_find_block(block_desc *tab, u32 addr)
114 for (tab = tab->next; tab != NULL; tab = tab->next)
115 if (tab->addr == addr)
119 return tab->tcache_ptr;
121 printf("block miss for %08x\n", addr);
125 static block_desc *dr_add_block(u32 addr, int tcache_id, int *blk_id)
127 int *bcount = &block_counts[tcache_id];
130 if (*bcount >= block_max_counts[tcache_id])
133 bd = &block_tables[tcache_id][*bcount];
135 bd->tcache_ptr = tcache_ptr;
142 #define HASH_FUNC(hash_tab, addr) \
143 ((block_desc **)(hash_tab))[(addr) & HASH_MASK]
145 // ---------------------------------------------------------------
147 static void emit_move_r_imm32(sh2_reg_e dst, u32 imm)
149 int host_dst = reg_map_g2h[dst];
154 emith_move_r_imm(tmp, imm);
156 emith_ctx_write(tmp, dst * 4);
159 static void emit_move_r_r(sh2_reg_e dst, sh2_reg_e src)
161 int host_dst = reg_map_g2h[dst], host_src = reg_map_g2h[src];
164 if (host_dst != -1 && host_src != -1) {
165 emith_move_r_r(host_dst, host_src);
175 emith_ctx_read(tmp, src * 4);
177 emith_ctx_write(tmp, dst * 4);
180 static void emit_braf(sh2_reg_e reg, u32 pc)
182 int host_reg = reg_map_g2h[reg];
183 if (host_reg == -1) {
184 emith_ctx_read(0, reg * 4);
186 emith_move_r_r(0, host_reg);
187 emith_add_r_imm(0, pc);
189 emith_ctx_write(0, SHR_PPC * 4);
193 static int sh2_translate_op4(int op)
199 emith_pass_arg(2, sh2, op);
200 emith_call(sh2_do_op);
211 #define CHECK_UNHANDLED_BITS(mask) { \
212 if ((op & (mask)) != 0) \
216 static void *sh2_translate(SH2 *sh2, block_desc *other_block)
219 block_desc *this_block;
220 unsigned int pc = sh2->pc;
221 int op, delayed_op = 0, test_irq = 0;
222 int tcache_id = 0, blkid = 0;
228 if ((tmp != 0 && tmp != 1 && tmp != 6) || sh2->pc == 0) {
229 printf("invalid PC, aborting: %08x\n", sh2->pc);
230 // FIXME: be less destructive
234 if ((sh2->pc & 0xe0000000) == 0xc0000000 || (sh2->pc & ~0xfff) == 0) {
235 // data_array, BIOS have separate tcache (shared)
236 tcache_id = 1 + sh2->is_slave;
239 tcache_ptr = tcache_ptrs[tcache_id];
240 this_block = dr_add_block(pc, tcache_id, &blkid);
242 tmp = tcache_ptr - tcache_bases[tcache_id];
243 if (tmp > tcache_sizes[tcache_id] - MAX_BLOCK_SIZE || this_block == NULL) {
244 flush_tcache(tcache_id);
245 tcache_ptr = tcache_ptrs[tcache_id];
246 other_block = NULL; // also gone too due to flush
247 this_block = dr_add_block(pc, tcache_id, &blkid);
250 this_block->next = other_block;
251 if ((sh2->pc & 0xc6000000) == 0x02000000) // ROM
252 HASH_FUNC(hash_table, pc) = this_block;
254 block_entry = tcache_ptr;
256 printf("== %csh2 block #%d,%d %08x -> %p\n", sh2->is_slave ? 's' : 'm',
257 tcache_id, block_counts[tcache_id], pc, block_entry);
258 if (other_block != NULL) {
259 printf(" hash collision with %08x\n", other_block->addr);
264 while (cycles < BLOCK_CYCLE_LIMIT || delayed_op)
269 op = p32x_sh2_read16(pc, sh2);
274 DasmSH2(sh2dasm_buff, pc, op);
275 printf("%08x %04x %s\n", pc, op, sh2dasm_buff);
282 switch ((op >> 12) & 0x0f)
287 CHECK_UNHANDLED_BITS(0xd0);
288 // BRAF Rm 0000mmmm00100011
289 // BSRF Rm 0000mmmm00000011
292 emit_move_r_imm32(SHR_PR, pc + 2);
293 emit_braf((op >> 8) & 0x0f, pc + 2);
297 CHECK_UNHANDLED_BITS(0xf0);
298 // NOP 0000000000001001
301 CHECK_UNHANDLED_BITS(0xd0);
304 // RTS 0000000000001011
305 emit_move_r_r(SHR_PPC, SHR_PR);
308 // RTE 0000000000101011
309 //emit_move_r_r(SHR_PC, SHR_PR);
310 emit_move_r_imm32(SHR_PC, pc - 2);
311 emith_pass_arg_r(0, CONTEXT_REG);
312 emith_pass_arg_imm(1, op);
313 emith_call(sh2_do_op);
314 emit_move_r_r(SHR_PPC, SHR_PC);
325 if ((op & 0xf0) != 0)
327 // LDC.L @Rm+,SR 0100mmmm00000111
331 if ((op & 0xd0) != 0)
333 // JMP @Rm 0100mmmm00101011
334 // JSR @Rm 0100mmmm00001011
337 emit_move_r_imm32(SHR_PR, pc + 2);
338 emit_move_r_r(SHR_PPC, (op >> 8) & 0x0f);
342 if ((op & 0xf0) != 0)
344 // LDC Rm,SR 0100mmmm00001110
351 switch (op & 0x0f00) {
352 // BT/S label 10001101dddddddd
354 // BF/S label 10001111dddddddd
359 // BT label 10001001dddddddd
361 // BF label 10001011dddddddd
363 tmp = ((signed int)(op << 24) >> 23);
364 tmp2 = delayed_op ? SHR_PPC : SHR_PC;
365 emit_move_r_imm32(tmp2, pc + (delayed_op ? 2 : 0));
367 EMIT_CONDITIONAL(emit_move_r_imm32(tmp2, pc + tmp + 2), (op & 0x0200) ? 1 : 0);
376 // BRA label 1010dddddddddddd
379 tmp = ((signed int)(op << 20) >> 19);
380 emit_move_r_imm32(SHR_PPC, pc + tmp + 2);
385 // BSR label 1011dddddddddddd
387 emit_move_r_imm32(SHR_PR, pc + 2);
392 emit_move_r_imm32(SHR_PC, pc - 2);
393 emith_pass_arg_r(0, CONTEXT_REG);
394 emith_pass_arg_imm(1, op);
395 emith_call(sh2_do_op);
401 emit_move_r_r(SHR_PC, SHR_PPC);
403 if (test_irq && delayed_op != 2) {
404 emith_pass_arg_r(0, CONTEXT_REG);
405 emith_call(sh2_test_irq);
411 do_host_disasm(tcache_id);
415 this_block->end_addr = pc;
417 // mark memory blocks as containing compiled code
418 if ((sh2->pc & 0xe0000000) == 0xc0000000 || (sh2->pc & ~0xfff) == 0) {
420 u16 *drcblk = Pico32xMem->drcblk_da[sh2->is_slave];
421 tmp = (this_block->addr & 0xfff) >> SH2_DRCBLK_DA_SHIFT;
422 tmp2 = (this_block->end_addr & 0xfff) >> SH2_DRCBLK_DA_SHIFT;
423 Pico32xMem->drcblk_da[sh2->is_slave][tmp] = (blkid << 1) | 1;
424 for (++tmp; tmp < tmp2; tmp++) {
426 break; // dont overwrite overlay block
427 drcblk[tmp] = blkid << 1;
430 else if ((this_block->addr & 0xc7fc0000) == 0x06000000) { // DRAM
431 tmp = (this_block->addr & 0x3ffff) >> SH2_DRCBLK_RAM_SHIFT;
432 tmp2 = (this_block->end_addr & 0x3ffff) >> SH2_DRCBLK_RAM_SHIFT;
433 Pico32xMem->drcblk_ram[tmp] = (blkid << 1) | 1;
434 for (++tmp; tmp < tmp2; tmp++) {
435 if (Pico32xMem->drcblk_ram[tmp])
437 Pico32xMem->drcblk_ram[tmp] = blkid << 1;
441 if (reg_map_g2h[SHR_SR] == -1) {
442 emith_ctx_sub(cycles << 12, SHR_SR * 4);
444 emith_sub_r_imm(reg_map_g2h[SHR_SR], cycles << 12);
445 emith_jump(sh2_drc_exit);
446 tcache_ptrs[tcache_id] = tcache_ptr;
448 do_host_disasm(tcache_id);
449 dbg(1, " block #%d,%d tcache %d/%d, insns %d -> %d %.3f",
450 tcache_id, block_counts[tcache_id],
451 tcache_ptr - tcache_bases[tcache_id], tcache_sizes[tcache_id],
452 insns_compiled, host_insn_count, (double)host_insn_count / insns_compiled);
453 if ((sh2->pc & 0xc6000000) == 0x02000000) // ROM
454 dbg(1, " hash collisions %d/%d", hash_collisions, block_counts[tcache_id]);
459 do_host_disasm(tcache_id);
464 void __attribute__((noinline)) sh2_drc_dispatcher(SH2 *sh2)
466 while (((signed int)sh2->sr >> 12) > 0)
469 block_desc *bd = NULL;
471 // FIXME: must avoid doing it so often..
474 // we have full block id tables for data_array and RAM
475 // BIOS goes to data_array table too
476 if ((sh2->pc & 0xff000000) == 0xc0000000 || (sh2->pc & ~0xfff) == 0) {
477 int blkid = Pico32xMem->drcblk_da[sh2->is_slave][(sh2->pc & 0xfff) >> SH2_DRCBLK_DA_SHIFT];
479 bd = &block_tables[1 + sh2->is_slave][blkid >> 1];
480 block = bd->tcache_ptr;
484 else if ((sh2->pc & 0xc6000000) == 0x06000000) {
485 int blkid = Pico32xMem->drcblk_ram[(sh2->pc & 0x3ffff) >> SH2_DRCBLK_RAM_SHIFT];
487 bd = &block_tables[0][blkid >> 1];
488 block = bd->tcache_ptr;
492 else if ((sh2->pc & 0xc6000000) == 0x02000000) {
493 bd = HASH_FUNC(hash_table, sh2->pc);
496 if (bd->addr == sh2->pc)
497 block = bd->tcache_ptr;
499 block = dr_find_block(bd, sh2->pc);
504 block = sh2_translate(sh2, bd);
506 dbg(4, "= %csh2 enter %08x %p, c=%d", sh2->is_slave ? 's' : 'm',
507 sh2->pc, block, (signed int)sh2->sr >> 12);
512 sh2_drc_entry(sh2, block);
516 static void sh2_smc_rm_block(u16 *drcblk, u16 *p, block_desc *btab, u32 a)
519 block_desc *bd = btab + id;
521 dbg(1, " killing block %08x", bd->addr);
522 bd->addr = bd->end_addr = 0;
524 while (p > drcblk && (p[-1] >> 1) == id)
527 // check for possible overlay block
528 if (p > 0 && p[-1] != 0) {
529 bd = btab + (p[-1] >> 1);
530 if (bd->addr <= a && a < bd->end_addr)
531 sh2_smc_rm_block(drcblk, p - 1, btab, a);
537 while ((*p >> 1) == id);
540 void sh2_drc_wcheck_ram(unsigned int a, int val, int cpuid)
542 u16 *drcblk = Pico32xMem->drcblk_ram;
543 u16 *p = drcblk + ((a & 0x3ffff) >> SH2_DRCBLK_RAM_SHIFT);
545 dbg(1, "%csh2 smc check @%08x", cpuid ? 's' : 'm', a);
546 sh2_smc_rm_block(drcblk, p, block_tables[0], a);
549 void sh2_drc_wcheck_da(unsigned int a, int val, int cpuid)
551 u16 *drcblk = Pico32xMem->drcblk_da[cpuid];
552 u16 *p = drcblk + ((a & 0xfff) >> SH2_DRCBLK_DA_SHIFT);
554 dbg(1, "%csh2 smc check @%08x", cpuid ? 's' : 'm', a);
555 sh2_smc_rm_block(drcblk, p, block_tables[1 + cpuid], a);
558 void sh2_execute(SH2 *sh2, int cycles)
560 sh2->cycles_aim += cycles;
561 cycles = sh2->cycles_aim - sh2->cycles_done;
563 // cycles are kept in SHR_SR unused bits (upper 20)
565 sh2->sr |= cycles << 12;
566 sh2_drc_dispatcher(sh2);
568 sh2->cycles_done += cycles - ((signed int)sh2->sr >> 12);
571 static void __attribute__((regparm(1))) sh2_test_irq(SH2 *sh2)
573 if (sh2->pending_level > ((sh2->sr >> 4) & 0x0f))
575 if (sh2->pending_irl > sh2->pending_int_irq)
576 sh2_do_irq(sh2, sh2->pending_irl, 64 + sh2->pending_irl/2);
578 sh2_do_irq(sh2, sh2->pending_int_irq, sh2->pending_int_vector);
579 sh2->pending_int_irq = 0; // auto-clear
580 sh2->pending_level = sh2->pending_irl;
586 static void block_stats(void)
588 int c, b, i, total = 0;
590 for (b = 0; b < ARRAY_SIZE(block_tables); b++)
591 for (i = 0; i < block_counts[b]; i++)
592 if (block_tables[b][i].addr != 0)
593 total += block_tables[b][i].refcount;
595 for (c = 0; c < 10; c++) {
596 block_desc *blk, *maxb = NULL;
598 for (b = 0; b < ARRAY_SIZE(block_tables); b++) {
599 for (i = 0; i < block_counts[b]; i++) {
600 blk = &block_tables[b][i];
601 if (blk->addr != 0 && blk->refcount > max) {
609 printf("%08x %9d %2.3f%%\n", maxb->addr, maxb->refcount,
610 (double)maxb->refcount / total * 100.0);
616 int sh2_drc_init(SH2 *sh2)
618 if (block_tables[0] == NULL) {
620 cnt = block_max_counts[0] + block_max_counts[1] + block_max_counts[2];
621 block_tables[0] = calloc(cnt, sizeof(*block_tables[0]));
622 if (block_tables[0] == NULL)
625 memset(block_counts, 0, sizeof(block_counts));
626 tcache_bases[0] = tcache_ptrs[0] = tcache;
628 for (i = 1; i < ARRAY_SIZE(block_tables); i++) {
629 block_tables[i] = block_tables[i - 1] + block_max_counts[i - 1];
630 tcache_bases[i] = tcache_ptrs[i] = tcache_bases[i - 1] + tcache_sizes[i - 1];
634 for (i = 0; i < ARRAY_SIZE(block_tables); i++)
635 tcache_dsm_ptrs[i] = tcache_bases[i];
642 if (hash_table == NULL) {
643 hash_table = calloc(sizeof(hash_table[0]), MAX_HASH_ENTRIES);
644 if (hash_table == NULL)
651 void sh2_drc_finish(SH2 *sh2)
653 if (block_tables[0] != NULL) {
657 free(block_tables[0]);
658 memset(block_tables, 0, sizeof(block_tables));
661 if (hash_table != NULL) {