drc: rework block tracking and lookup
[picodrive.git] / cpu / sh2 / compiler.c
... / ...
CommitLineData
1/*
2 * SH2 recompiler
3 * (C) notaz, 2009,2010,2013
4 *
5 * This work is licensed under the terms of MAME license.
6 * See COPYING file in the top-level directory.
7 *
8 * notes:
9 * - tcache, block descriptor, link buffer overflows result in sh2_translate()
10 * failure, followed by full tcache invalidation for that region
11 * - jumps between blocks are tracked for SMC handling (in block_links[]),
12 * except jumps between different tcaches
13 *
14 * implemented:
15 * - static register allocation
16 * - remaining register caching and tracking in temporaries
17 * - block-local branch linking
18 * - block linking (except between tcaches)
19 * - some constant propagation
20 *
21 * TODO:
22 * - better constant propagation
23 * - stack caching?
24 * - bug fixing
25 */
26#include <stddef.h>
27#include <stdio.h>
28#include <stdlib.h>
29#include <assert.h>
30
31#include "../../pico/pico_int.h"
32#include "sh2.h"
33#include "compiler.h"
34#include "../drc/cmn.h"
35#include "../debug.h"
36
37// features
38#define PROPAGATE_CONSTANTS 1
39#define LINK_BRANCHES 1
40
41// limits (per block)
42#define MAX_BLOCK_SIZE (BLOCK_INSN_LIMIT * 6 * 6)
43
44// max literal offset from the block end
45#define MAX_LITERAL_OFFSET 32*2
46#define MAX_LITERALS (BLOCK_INSN_LIMIT / 4)
47#define MAX_LOCAL_BRANCHES 32
48
49///
50#define FETCH_OP(pc) \
51 dr_pc_base[(pc) / 2]
52
53#define FETCH32(a) \
54 ((dr_pc_base[(a) / 2] << 16) | dr_pc_base[(a) / 2 + 1])
55
56#ifdef DRC_SH2
57
58// debug stuff
59// 1 - warnings/errors
60// 2 - block info/smc
61// 4 - asm
62// 8 - runtime block entry log
63// {
64#ifndef DRC_DEBUG
65#define DRC_DEBUG 0
66#endif
67
68#if DRC_DEBUG
69#define dbg(l,...) { \
70 if ((l) & DRC_DEBUG) \
71 elprintf(EL_STATUS, ##__VA_ARGS__); \
72}
73
74#include "mame/sh2dasm.h"
75#include <platform/libpicofe/linux/host_dasm.h>
76static int insns_compiled, hash_collisions, host_insn_count;
77#define COUNT_OP \
78 host_insn_count++
79#else // !DRC_DEBUG
80#define COUNT_OP
81#define dbg(...)
82#endif
83
84#if (DRC_DEBUG & 4)
85static u8 *tcache_dsm_ptrs[3];
86static char sh2dasm_buff[64];
87#define do_host_disasm(tcid) \
88 host_dasm(tcache_dsm_ptrs[tcid], tcache_ptr - tcache_dsm_ptrs[tcid]); \
89 tcache_dsm_ptrs[tcid] = tcache_ptr
90#else
91#define do_host_disasm(x)
92#endif
93
94#if (DRC_DEBUG & 8) || defined(PDB)
95static void REGPARM(3) *sh2_drc_log_entry(void *block, SH2 *sh2, u32 sr)
96{
97 if (block != NULL) {
98 dbg(8, "= %csh2 enter %08x %p, c=%d", sh2->is_slave ? 's' : 'm',
99 sh2->pc, block, (signed int)sr >> 12);
100 pdb_step(sh2, sh2->pc);
101 }
102 return block;
103}
104#endif
105// } debug
106
107#define TCACHE_BUFFERS 3
108
109// we have 3 translation cache buffers, split from one drc/cmn buffer.
110// BIOS shares tcache with data array because it's only used for init
111// and can be discarded early
112// XXX: need to tune sizes
113static const int tcache_sizes[TCACHE_BUFFERS] = {
114 DRC_TCACHE_SIZE * 6 / 8, // ROM (rarely used), DRAM
115 DRC_TCACHE_SIZE / 8, // BIOS, data array in master sh2
116 DRC_TCACHE_SIZE / 8, // ... slave
117};
118
119static u8 *tcache_bases[TCACHE_BUFFERS];
120static u8 *tcache_ptrs[TCACHE_BUFFERS];
121
122// ptr for code emiters
123static u8 *tcache_ptr;
124
125#define MAX_BLOCK_ENTRIES (BLOCK_INSN_LIMIT / 8)
126
127struct block_entry {
128 u32 pc;
129 void *tcache_ptr; // translated block for above PC
130 struct block_entry *next; // next block in hash_table with same pc hash
131#if (DRC_DEBUG & 2)
132 struct block_desc *block;
133#endif
134};
135
136struct block_desc {
137 u32 addr; // block start SH2 PC address
138 u32 end_addr; // address after last op or literal
139#if (DRC_DEBUG & 2)
140 int refcount;
141#endif
142 int entry_count;
143 struct block_entry entryp[MAX_BLOCK_ENTRIES];
144};
145
146struct block_link {
147 u32 target_pc;
148 void *jump; // insn address
149// struct block_link_ *next;
150};
151
152static const int block_max_counts[TCACHE_BUFFERS] = {
153 4*1024,
154 256,
155 256,
156};
157static struct block_desc *block_tables[TCACHE_BUFFERS];
158static int block_counts[TCACHE_BUFFERS];
159
160static const int block_link_max_counts[TCACHE_BUFFERS] = {
161 4*1024,
162 256,
163 256,
164};
165static struct block_link *block_links[TCACHE_BUFFERS];
166static int block_link_counts[TCACHE_BUFFERS];
167
168// used for invalidation
169static const int ram_sizes[TCACHE_BUFFERS] = {
170 0x40000,
171 0x1000,
172 0x1000,
173};
174#define ADDR_TO_BLOCK_PAGE 0x100
175
176struct block_list {
177 struct block_desc *block;
178 struct block_list *next;
179};
180
181// array of pointers to block_lists for RAM and 2 data arrays
182// each array has len: sizeof(mem) / ADDR_TO_BLOCK_PAGE
183static struct block_list **inval_lookup[TCACHE_BUFFERS];
184
185static const int hash_table_sizes[TCACHE_BUFFERS] = {
186 0x1000,
187 0x100,
188 0x100,
189};
190static struct block_entry **hash_tables[TCACHE_BUFFERS];
191
192#define HASH_FUNC(hash_tab, addr, mask) \
193 (hash_tab)[(((addr) >> 20) ^ ((addr) >> 2)) & (mask)]
194
195// host register tracking
196enum {
197 HR_FREE,
198 HR_CACHED, // 'val' has sh2_reg_e
199// HR_CONST, // 'val' has a constant
200 HR_TEMP, // reg used for temp storage
201};
202
203enum {
204 HRF_DIRTY = 1 << 0, // reg has "dirty" value to be written to ctx
205 HRF_LOCKED = 1 << 1, // HR_CACHED can't be evicted
206};
207
208typedef struct {
209 u32 hreg:5; // "host" reg
210 u32 greg:5; // "guest" reg
211 u32 type:3;
212 u32 flags:3;
213 u32 stamp:16; // kind of a timestamp
214} temp_reg_t;
215
216// note: reg_temp[] must have at least the amount of
217// registers used by handlers in worst case (currently 4)
218#ifdef __arm__
219#include "../drc/emit_arm.c"
220
221static const int reg_map_g2h[] = {
222 4, 5, 6, 7,
223 8, -1, -1, -1,
224 -1, -1, -1, -1,
225 -1, -1, -1, 9, // r12 .. sp
226 -1, -1, -1, 10, // SHR_PC, SHR_PPC, SHR_PR, SHR_SR,
227 -1, -1, -1, -1, // SHR_GBR, SHR_VBR, SHR_MACH, SHR_MACL,
228};
229
230static temp_reg_t reg_temp[] = {
231 { 0, },
232 { 1, },
233 { 12, },
234 { 14, },
235 { 2, },
236 { 3, },
237};
238
239#elif defined(__i386__)
240#include "../drc/emit_x86.c"
241
242static const int reg_map_g2h[] = {
243 xSI,-1, -1, -1,
244 -1, -1, -1, -1,
245 -1, -1, -1, -1,
246 -1, -1, -1, -1,
247 -1, -1, -1, xDI,
248 -1, -1, -1, -1,
249};
250
251// ax, cx, dx are usually temporaries by convention
252static temp_reg_t reg_temp[] = {
253 { xAX, },
254 { xBX, },
255 { xCX, },
256 { xDX, },
257};
258
259#else
260#error unsupported arch
261#endif
262
263#define T 0x00000001
264#define S 0x00000002
265#define I 0x000000f0
266#define Q 0x00000100
267#define M 0x00000200
268#define T_save 0x00000800
269
270#define I_SHIFT 4
271#define Q_SHIFT 8
272#define M_SHIFT 9
273
274static void REGPARM(1) (*sh2_drc_entry)(SH2 *sh2);
275static void (*sh2_drc_dispatcher)(void);
276static void (*sh2_drc_exit)(void);
277static void (*sh2_drc_test_irq)(void);
278
279static u32 REGPARM(2) (*sh2_drc_read8)(u32 a, SH2 *sh2);
280static u32 REGPARM(2) (*sh2_drc_read16)(u32 a, SH2 *sh2);
281static u32 REGPARM(2) (*sh2_drc_read32)(u32 a, SH2 *sh2);
282static void REGPARM(2) (*sh2_drc_write8)(u32 a, u32 d);
283static void REGPARM(2) (*sh2_drc_write8_slot)(u32 a, u32 d);
284static void REGPARM(2) (*sh2_drc_write16)(u32 a, u32 d);
285static void REGPARM(2) (*sh2_drc_write16_slot)(u32 a, u32 d);
286static int REGPARM(3) (*sh2_drc_write32)(u32 a, u32 d, SH2 *sh2);
287
288// address space stuff
289static int dr_ctx_get_mem_ptr(u32 a, u32 *mask)
290{
291 int poffs = -1;
292
293 if ((a & ~0x7ff) == 0) {
294 // BIOS
295 poffs = offsetof(SH2, p_bios);
296 *mask = 0x7ff;
297 }
298 else if ((a & 0xfffff000) == 0xc0000000) {
299 // data array
300 poffs = offsetof(SH2, p_da);
301 *mask = 0xfff;
302 }
303 else if ((a & 0xc6000000) == 0x06000000) {
304 // SDRAM
305 poffs = offsetof(SH2, p_sdram);
306 *mask = 0x03ffff;
307 }
308 else if ((a & 0xc6000000) == 0x02000000) {
309 // ROM
310 poffs = offsetof(SH2, p_rom);
311 *mask = 0x3fffff;
312 }
313
314 return poffs;
315}
316
317static struct block_entry *dr_get_entry(u32 pc, int is_slave, int *tcache_id)
318{
319 struct block_entry *be;
320 u32 tcid = 0, mask;
321
322 // data arrays have their own caches
323 if ((pc & 0xe0000000) == 0xc0000000 || (pc & ~0xfff) == 0)
324 tcid = 1 + is_slave;
325
326 *tcache_id = tcid;
327
328 mask = hash_table_sizes[tcid] - 1;
329 be = HASH_FUNC(hash_tables[tcid], pc, mask);
330 for (; be != NULL; be = be->next)
331 if (be->pc == pc)
332 return be;
333
334 return NULL;
335}
336
337// ---------------------------------------------------------------
338
339// block management
340static void add_to_block_list(struct block_list **blist, struct block_desc *block)
341{
342 struct block_list *added = malloc(sizeof(*added));
343 if (!added) {
344 elprintf(EL_ANOMALY, "drc OOM (1)");
345 return;
346 }
347 added->block = block;
348 added->next = *blist;
349 *blist = added;
350}
351
352static void rm_from_block_list(struct block_list **blist, struct block_desc *block)
353{
354 struct block_list *prev = NULL, *current = *blist;
355 for (; current != NULL; prev = current, current = current->next) {
356 if (current->block == block) {
357 if (prev == NULL)
358 *blist = current->next;
359 else
360 prev->next = current->next;
361 free(current);
362 return;
363 }
364 }
365 dbg(1, "can't rm block %p (%08x-%08x)",
366 block, block->addr, block->end_addr);
367}
368
369static void rm_block_list(struct block_list **blist)
370{
371 struct block_list *tmp, *current = *blist;
372 while (current != NULL) {
373 tmp = current;
374 current = current->next;
375 free(tmp);
376 }
377 *blist = NULL;
378}
379
380static void REGPARM(1) flush_tcache(int tcid)
381{
382 int i;
383
384 dbg(1, "tcache #%d flush! (%d/%d, bds %d/%d)", tcid,
385 tcache_ptrs[tcid] - tcache_bases[tcid], tcache_sizes[tcid],
386 block_counts[tcid], block_max_counts[tcid]);
387
388 block_counts[tcid] = 0;
389 block_link_counts[tcid] = 0;
390 memset(hash_tables[tcid], 0, sizeof(*hash_tables[0]) * hash_table_sizes[tcid]);
391 tcache_ptrs[tcid] = tcache_bases[tcid];
392 if (Pico32xMem != NULL) {
393 if (tcid == 0) // ROM, RAM
394 memset(Pico32xMem->drcblk_ram, 0,
395 sizeof(Pico32xMem->drcblk_ram));
396 else
397 memset(Pico32xMem->drcblk_da[tcid - 1], 0,
398 sizeof(Pico32xMem->drcblk_da[0]));
399 }
400#if (DRC_DEBUG & 4)
401 tcache_dsm_ptrs[tcid] = tcache_bases[tcid];
402#endif
403
404 for (i = 0; i < ram_sizes[tcid] / ADDR_TO_BLOCK_PAGE; i++)
405 rm_block_list(&inval_lookup[tcid][i]);
406}
407
408#if LINK_BRANCHES
409// add block links (tracked branches)
410static int dr_add_block_link(u32 target_pc, void *jump, int tcache_id)
411{
412 struct block_link *bl = block_links[tcache_id];
413 int cnt = block_link_counts[tcache_id];
414
415 if (cnt >= block_link_max_counts[tcache_id]) {
416 dbg(1, "bl overflow for tcache %d\n", tcache_id);
417 return -1;
418 }
419
420 bl[cnt].target_pc = target_pc;
421 bl[cnt].jump = jump;
422 block_link_counts[tcache_id]++;
423
424 return 0;
425}
426#endif
427
428static void add_to_hashlist(struct block_entry *be, int tcache_id)
429{
430 u32 tcmask = hash_table_sizes[tcache_id] - 1;
431
432 be->next = HASH_FUNC(hash_tables[tcache_id], be->pc, tcmask);
433 HASH_FUNC(hash_tables[tcache_id], be->pc, tcmask) = be;
434
435#if (DRC_DEBUG & 2)
436 if (be->next != NULL) {
437 printf(" %08x: hash collision with %08x\n",
438 be->pc, be->next->pc);
439 hash_collisions++;
440 }
441#endif
442}
443
444static void rm_from_hashlist(struct block_entry *be, int tcache_id)
445{
446 u32 tcmask = hash_table_sizes[tcache_id] - 1;
447 struct block_entry *cur, *prev;
448
449 cur = HASH_FUNC(hash_tables[tcache_id], be->pc, tcmask);
450 if (cur == NULL)
451 goto missing;
452
453 if (be == cur) { // first
454 HASH_FUNC(hash_tables[tcache_id], be->pc, tcmask) = be->next;
455 return;
456 }
457
458 for (prev = cur, cur = cur->next; cur != NULL; cur = cur->next) {
459 if (cur == be) {
460 prev->next = cur->next;
461 return;
462 }
463 }
464
465missing:
466 dbg(1, "rm_from_hashlist: be %p %08x missing?", be, be->pc);
467}
468
469static struct block_desc *dr_add_block(u32 addr, u32 end_addr, int is_slave, int *blk_id)
470{
471 struct block_entry *be;
472 struct block_desc *bd;
473 int tcache_id;
474 int *bcount;
475
476 // do a lookup to get tcache_id and override check
477 be = dr_get_entry(addr, is_slave, &tcache_id);
478 if (be != NULL)
479 dbg(1, "block override for %08x", addr);
480
481 bcount = &block_counts[tcache_id];
482 if (*bcount >= block_max_counts[tcache_id]) {
483 dbg(1, "bd overflow for tcache %d", tcache_id);
484 return NULL;
485 }
486
487 bd = &block_tables[tcache_id][*bcount];
488 bd->addr = addr;
489 bd->end_addr = end_addr;
490
491 bd->entry_count = 1;
492 bd->entryp[0].pc = addr;
493 bd->entryp[0].tcache_ptr = tcache_ptr;
494#if (DRC_DEBUG & 2)
495 bd->entryp[0].block = bd;
496 bd->refcount = 0;
497#endif
498 add_to_hashlist(&bd->entryp[0], tcache_id);
499
500 *blk_id = *bcount;
501 (*bcount)++;
502
503 return bd;
504}
505
506static void REGPARM(3) *dr_lookup_block(u32 pc, int is_slave, int *tcache_id)
507{
508 struct block_entry *be = NULL;
509 void *block = NULL;
510
511 be = dr_get_entry(pc, is_slave, tcache_id);
512 if (be != NULL)
513 block = be->tcache_ptr;
514
515#if (DRC_DEBUG & 2)
516 if (be != NULL)
517 be->block->refcount++;
518#endif
519 return block;
520}
521
522static void *dr_failure(void)
523{
524 lprintf("recompilation failed\n");
525 exit(1);
526}
527
528static void *dr_prepare_ext_branch(u32 pc, SH2 *sh2, int tcache_id)
529{
530#if LINK_BRANCHES
531 int target_tcache_id;
532 void *target;
533 int ret;
534
535 target = dr_lookup_block(pc, sh2->is_slave, &target_tcache_id);
536 if (target_tcache_id == tcache_id) {
537 // allow linking blocks only from local cache
538 ret = dr_add_block_link(pc, tcache_ptr, tcache_id);
539 if (ret < 0)
540 return NULL;
541 }
542 if (target == NULL || target_tcache_id != tcache_id)
543 target = sh2_drc_dispatcher;
544
545 return target;
546#else
547 return sh2_drc_dispatcher;
548#endif
549}
550
551static void dr_link_blocks(void *target, u32 pc, int tcache_id)
552{
553#if 0 // FIXME: invalidated blocks must not be in block_links
554//LINK_BRANCHES
555 struct block_link *bl = block_links[tcache_id];
556 int cnt = block_link_counts[tcache_id];
557 int i;
558
559 for (i = 0; i < cnt; i++) {
560 if (bl[i].target_pc == pc) {
561 dbg(2, "- link from %p", bl[i].jump);
562 emith_jump_patch(bl[i].jump, target);
563 // XXX: sync ARM caches (old jump should be fine)?
564 }
565 }
566#endif
567}
568
569#define ADD_TO_ARRAY(array, count, item, failcode) \
570 array[count++] = item; \
571 if (count >= ARRAY_SIZE(array)) { \
572 dbg(1, "warning: " #array " overflow"); \
573 failcode; \
574 }
575
576static int find_in_array(u32 *array, size_t size, u32 what)
577{
578 size_t i;
579 for (i = 0; i < size; i++)
580 if (what == array[i])
581 return i;
582
583 return -1;
584}
585
586// ---------------------------------------------------------------
587
588// register cache / constant propagation stuff
589typedef enum {
590 RC_GR_READ,
591 RC_GR_WRITE,
592 RC_GR_RMW,
593} rc_gr_mode;
594
595static int rcache_get_reg_(sh2_reg_e r, rc_gr_mode mode, int do_locking);
596
597// guest regs with constants
598static u32 dr_gcregs[24];
599// a mask of constant/dirty regs
600static u32 dr_gcregs_mask;
601static u32 dr_gcregs_dirty;
602
603#if PROPAGATE_CONSTANTS
604static void gconst_new(sh2_reg_e r, u32 val)
605{
606 int i;
607
608 dr_gcregs_mask |= 1 << r;
609 dr_gcregs_dirty |= 1 << r;
610 dr_gcregs[r] = val;
611
612 // throw away old r that we might have cached
613 for (i = ARRAY_SIZE(reg_temp) - 1; i >= 0; i--) {
614 if ((reg_temp[i].type == HR_CACHED) &&
615 reg_temp[i].greg == r) {
616 reg_temp[i].type = HR_FREE;
617 reg_temp[i].flags = 0;
618 }
619 }
620}
621#endif
622
623static int gconst_get(sh2_reg_e r, u32 *val)
624{
625 if (dr_gcregs_mask & (1 << r)) {
626 *val = dr_gcregs[r];
627 return 1;
628 }
629 return 0;
630}
631
632static int gconst_check(sh2_reg_e r)
633{
634 if ((dr_gcregs_mask | dr_gcregs_dirty) & (1 << r))
635 return 1;
636 return 0;
637}
638
639// update hr if dirty, else do nothing
640static int gconst_try_read(int hr, sh2_reg_e r)
641{
642 if (dr_gcregs_dirty & (1 << r)) {
643 emith_move_r_imm(hr, dr_gcregs[r]);
644 dr_gcregs_dirty &= ~(1 << r);
645 return 1;
646 }
647 return 0;
648}
649
650static void gconst_check_evict(sh2_reg_e r)
651{
652 if (dr_gcregs_mask & (1 << r))
653 // no longer cached in reg, make dirty again
654 dr_gcregs_dirty |= 1 << r;
655}
656
657static void gconst_kill(sh2_reg_e r)
658{
659 dr_gcregs_mask &= ~(1 << r);
660 dr_gcregs_dirty &= ~(1 << r);
661}
662
663static void gconst_clean(void)
664{
665 int i;
666
667 for (i = 0; i < ARRAY_SIZE(dr_gcregs); i++)
668 if (dr_gcregs_dirty & (1 << i)) {
669 // using RC_GR_READ here: it will call gconst_try_read,
670 // cache the reg and mark it dirty.
671 rcache_get_reg_(i, RC_GR_READ, 0);
672 }
673}
674
675static void gconst_invalidate(void)
676{
677 dr_gcregs_mask = dr_gcregs_dirty = 0;
678}
679
680static u16 rcache_counter;
681
682static temp_reg_t *rcache_evict(void)
683{
684 // evict reg with oldest stamp
685 int i, oldest = -1;
686 u16 min_stamp = (u16)-1;
687
688 for (i = 0; i < ARRAY_SIZE(reg_temp); i++) {
689 if (reg_temp[i].type == HR_CACHED && !(reg_temp[i].flags & HRF_LOCKED) &&
690 reg_temp[i].stamp <= min_stamp) {
691 min_stamp = reg_temp[i].stamp;
692 oldest = i;
693 }
694 }
695
696 if (oldest == -1) {
697 printf("no registers to evict, aborting\n");
698 exit(1);
699 }
700
701 i = oldest;
702 if (reg_temp[i].type == HR_CACHED) {
703 if (reg_temp[i].flags & HRF_DIRTY)
704 // writeback
705 emith_ctx_write(reg_temp[i].hreg, reg_temp[i].greg * 4);
706 gconst_check_evict(reg_temp[i].greg);
707 }
708
709 reg_temp[i].type = HR_FREE;
710 reg_temp[i].flags = 0;
711 return &reg_temp[i];
712}
713
714static int get_reg_static(sh2_reg_e r, rc_gr_mode mode)
715{
716 int i = reg_map_g2h[r];
717 if (i != -1) {
718 if (mode != RC_GR_WRITE)
719 gconst_try_read(i, r);
720 }
721 return i;
722}
723
724// note: must not be called when doing conditional code
725static int rcache_get_reg_(sh2_reg_e r, rc_gr_mode mode, int do_locking)
726{
727 temp_reg_t *tr;
728 int i, ret;
729
730 // maybe statically mapped?
731 ret = get_reg_static(r, mode);
732 if (ret != -1)
733 goto end;
734
735 rcache_counter++;
736
737 // maybe already cached?
738 // if so, prefer against gconst (they must be in sync)
739 for (i = ARRAY_SIZE(reg_temp) - 1; i >= 0; i--) {
740 if (reg_temp[i].type == HR_CACHED && reg_temp[i].greg == r) {
741 reg_temp[i].stamp = rcache_counter;
742 if (mode != RC_GR_READ)
743 reg_temp[i].flags |= HRF_DIRTY;
744 ret = reg_temp[i].hreg;
745 goto end;
746 }
747 }
748
749 // use any free reg
750 for (i = ARRAY_SIZE(reg_temp) - 1; i >= 0; i--) {
751 if (reg_temp[i].type == HR_FREE) {
752 tr = &reg_temp[i];
753 goto do_alloc;
754 }
755 }
756
757 tr = rcache_evict();
758
759do_alloc:
760 tr->type = HR_CACHED;
761 if (do_locking)
762 tr->flags |= HRF_LOCKED;
763 if (mode != RC_GR_READ)
764 tr->flags |= HRF_DIRTY;
765 tr->greg = r;
766 tr->stamp = rcache_counter;
767 ret = tr->hreg;
768
769 if (mode != RC_GR_WRITE) {
770 if (gconst_check(r)) {
771 if (gconst_try_read(ret, r))
772 tr->flags |= HRF_DIRTY;
773 }
774 else
775 emith_ctx_read(tr->hreg, r * 4);
776 }
777
778end:
779 if (mode != RC_GR_READ)
780 gconst_kill(r);
781
782 return ret;
783}
784
785static int rcache_get_reg(sh2_reg_e r, rc_gr_mode mode)
786{
787 return rcache_get_reg_(r, mode, 1);
788}
789
790static int rcache_get_tmp(void)
791{
792 temp_reg_t *tr;
793 int i;
794
795 for (i = 0; i < ARRAY_SIZE(reg_temp); i++)
796 if (reg_temp[i].type == HR_FREE) {
797 tr = &reg_temp[i];
798 goto do_alloc;
799 }
800
801 tr = rcache_evict();
802
803do_alloc:
804 tr->type = HR_TEMP;
805 return tr->hreg;
806}
807
808static int rcache_get_arg_id(int arg)
809{
810 int i, r = 0;
811 host_arg2reg(r, arg);
812
813 for (i = 0; i < ARRAY_SIZE(reg_temp); i++)
814 if (reg_temp[i].hreg == r)
815 break;
816
817 if (i == ARRAY_SIZE(reg_temp)) // can't happen
818 exit(1);
819
820 if (reg_temp[i].type == HR_CACHED) {
821 // writeback
822 if (reg_temp[i].flags & HRF_DIRTY)
823 emith_ctx_write(reg_temp[i].hreg, reg_temp[i].greg * 4);
824 gconst_check_evict(reg_temp[i].greg);
825 }
826 else if (reg_temp[i].type == HR_TEMP) {
827 printf("arg %d reg %d already used, aborting\n", arg, r);
828 exit(1);
829 }
830
831 reg_temp[i].type = HR_FREE;
832 reg_temp[i].flags = 0;
833
834 return i;
835}
836
837// get a reg to be used as function arg
838static int rcache_get_tmp_arg(int arg)
839{
840 int id = rcache_get_arg_id(arg);
841 reg_temp[id].type = HR_TEMP;
842
843 return reg_temp[id].hreg;
844}
845
846// same but caches a reg. RC_GR_READ only.
847static int rcache_get_reg_arg(int arg, sh2_reg_e r)
848{
849 int i, srcr, dstr, dstid;
850 int dirty = 0, src_dirty = 0;
851
852 dstid = rcache_get_arg_id(arg);
853 dstr = reg_temp[dstid].hreg;
854
855 // maybe already statically mapped?
856 srcr = get_reg_static(r, RC_GR_READ);
857 if (srcr != -1)
858 goto do_cache;
859
860 // maybe already cached?
861 for (i = ARRAY_SIZE(reg_temp) - 1; i >= 0; i--) {
862 if ((reg_temp[i].type == HR_CACHED) &&
863 reg_temp[i].greg == r)
864 {
865 srcr = reg_temp[i].hreg;
866 if (reg_temp[i].flags & HRF_DIRTY)
867 src_dirty = 1;
868 goto do_cache;
869 }
870 }
871
872 // must read
873 srcr = dstr;
874 if (gconst_check(r)) {
875 if (gconst_try_read(srcr, r))
876 dirty = 1;
877 }
878 else
879 emith_ctx_read(srcr, r * 4);
880
881do_cache:
882 if (dstr != srcr)
883 emith_move_r_r(dstr, srcr);
884#if 1
885 else
886 dirty |= src_dirty;
887
888 if (dirty)
889 // must clean, callers might want to modify the arg before call
890 emith_ctx_write(dstr, r * 4);
891#else
892 if (dirty)
893 reg_temp[dstid].flags |= HRF_DIRTY;
894#endif
895
896 reg_temp[dstid].stamp = ++rcache_counter;
897 reg_temp[dstid].type = HR_CACHED;
898 reg_temp[dstid].greg = r;
899 reg_temp[dstid].flags |= HRF_LOCKED;
900 return dstr;
901}
902
903static void rcache_free_tmp(int hr)
904{
905 int i;
906 for (i = 0; i < ARRAY_SIZE(reg_temp); i++)
907 if (reg_temp[i].hreg == hr)
908 break;
909
910 if (i == ARRAY_SIZE(reg_temp) || reg_temp[i].type != HR_TEMP) {
911 printf("rcache_free_tmp fail: #%i hr %d, type %d\n", i, hr, reg_temp[i].type);
912 return;
913 }
914
915 reg_temp[i].type = HR_FREE;
916 reg_temp[i].flags = 0;
917}
918
919static void rcache_unlock(int hr)
920{
921 int i;
922 for (i = 0; i < ARRAY_SIZE(reg_temp); i++)
923 if (reg_temp[i].type == HR_CACHED && reg_temp[i].hreg == hr)
924 reg_temp[i].flags &= ~HRF_LOCKED;
925}
926
927static void rcache_unlock_all(void)
928{
929 int i;
930 for (i = 0; i < ARRAY_SIZE(reg_temp); i++)
931 reg_temp[i].flags &= ~HRF_LOCKED;
932}
933
934static void rcache_clean(void)
935{
936 int i;
937 gconst_clean();
938
939 for (i = 0; i < ARRAY_SIZE(reg_temp); i++)
940 if (reg_temp[i].type == HR_CACHED && (reg_temp[i].flags & HRF_DIRTY)) {
941 // writeback
942 emith_ctx_write(reg_temp[i].hreg, reg_temp[i].greg * 4);
943 reg_temp[i].flags &= ~HRF_DIRTY;
944 }
945}
946
947static void rcache_invalidate(void)
948{
949 int i;
950 for (i = 0; i < ARRAY_SIZE(reg_temp); i++) {
951 reg_temp[i].type = HR_FREE;
952 reg_temp[i].flags = 0;
953 }
954 rcache_counter = 0;
955
956 gconst_invalidate();
957}
958
959static void rcache_flush(void)
960{
961 rcache_clean();
962 rcache_invalidate();
963}
964
965// ---------------------------------------------------------------
966
967static int emit_get_rbase_and_offs(u32 a, u32 *offs)
968{
969 u32 mask = 0;
970 int poffs;
971 int hr;
972
973 poffs = dr_ctx_get_mem_ptr(a, &mask);
974 if (poffs == -1)
975 return -1;
976
977 // XXX: could use some related reg
978 hr = rcache_get_tmp();
979 emith_ctx_read(hr, poffs);
980 emith_add_r_imm(hr, a & mask & ~0xff);
981 *offs = a & 0xff; // XXX: ARM oriented..
982 return hr;
983}
984
985static void emit_move_r_imm32(sh2_reg_e dst, u32 imm)
986{
987#if PROPAGATE_CONSTANTS
988 gconst_new(dst, imm);
989#else
990 int hr = rcache_get_reg(dst, RC_GR_WRITE);
991 emith_move_r_imm(hr, imm);
992#endif
993}
994
995static void emit_move_r_r(sh2_reg_e dst, sh2_reg_e src)
996{
997 int hr_d = rcache_get_reg(dst, RC_GR_WRITE);
998 int hr_s = rcache_get_reg(src, RC_GR_READ);
999
1000 emith_move_r_r(hr_d, hr_s);
1001}
1002
1003// T must be clear, and comparison done just before this
1004static void emit_or_t_if_eq(int srr)
1005{
1006 EMITH_SJMP_START(DCOND_NE);
1007 emith_or_r_imm_c(DCOND_EQ, srr, T);
1008 EMITH_SJMP_END(DCOND_NE);
1009}
1010
1011// arguments must be ready
1012// reg cache must be clean before call
1013static int emit_memhandler_read_(int size, int ram_check)
1014{
1015 int arg0, arg1;
1016 host_arg2reg(arg0, 0);
1017
1018 rcache_clean();
1019
1020 // must writeback cycles for poll detection stuff
1021 // FIXME: rm
1022 if (reg_map_g2h[SHR_SR] != -1)
1023 emith_ctx_write(reg_map_g2h[SHR_SR], SHR_SR * 4);
1024
1025 arg1 = rcache_get_tmp_arg(1);
1026 emith_move_r_r(arg1, CONTEXT_REG);
1027
1028#ifndef PDB_NET
1029 if (ram_check && Pico.rom == (void *)0x02000000 && Pico32xMem->sdram == (void *)0x06000000) {
1030 int tmp = rcache_get_tmp();
1031 emith_and_r_r_imm(tmp, arg0, 0xfb000000);
1032 emith_cmp_r_imm(tmp, 0x02000000);
1033 switch (size) {
1034 case 0: // 8
1035 EMITH_SJMP3_START(DCOND_NE);
1036 emith_eor_r_imm_c(DCOND_EQ, arg0, 1);
1037 emith_read8_r_r_offs_c(DCOND_EQ, arg0, arg0, 0);
1038 EMITH_SJMP3_MID(DCOND_NE);
1039 emith_call_cond(DCOND_NE, sh2_drc_read8);
1040 EMITH_SJMP3_END();
1041 break;
1042 case 1: // 16
1043 EMITH_SJMP3_START(DCOND_NE);
1044 emith_read16_r_r_offs_c(DCOND_EQ, arg0, arg0, 0);
1045 EMITH_SJMP3_MID(DCOND_NE);
1046 emith_call_cond(DCOND_NE, sh2_drc_read16);
1047 EMITH_SJMP3_END();
1048 break;
1049 case 2: // 32
1050 EMITH_SJMP3_START(DCOND_NE);
1051 emith_read_r_r_offs_c(DCOND_EQ, arg0, arg0, 0);
1052 emith_ror_c(DCOND_EQ, arg0, arg0, 16);
1053 EMITH_SJMP3_MID(DCOND_NE);
1054 emith_call_cond(DCOND_NE, sh2_drc_read32);
1055 EMITH_SJMP3_END();
1056 break;
1057 }
1058 }
1059 else
1060#endif
1061 {
1062 switch (size) {
1063 case 0: // 8
1064 emith_call(sh2_drc_read8);
1065 break;
1066 case 1: // 16
1067 emith_call(sh2_drc_read16);
1068 break;
1069 case 2: // 32
1070 emith_call(sh2_drc_read32);
1071 break;
1072 }
1073 }
1074 rcache_invalidate();
1075
1076 if (reg_map_g2h[SHR_SR] != -1)
1077 emith_ctx_read(reg_map_g2h[SHR_SR], SHR_SR * 4);
1078
1079 // assuming arg0 and retval reg matches
1080 return rcache_get_tmp_arg(0);
1081}
1082
1083static int emit_memhandler_read(int size)
1084{
1085 return emit_memhandler_read_(size, 1);
1086}
1087
1088static int emit_memhandler_read_rr(sh2_reg_e rd, sh2_reg_e rs, u32 offs, int size)
1089{
1090 int hr, hr2, ram_check = 1;
1091 u32 val, offs2;
1092
1093 if (gconst_get(rs, &val)) {
1094 hr = emit_get_rbase_and_offs(val + offs, &offs2);
1095 if (hr != -1) {
1096 hr2 = rcache_get_reg(rd, RC_GR_WRITE);
1097 switch (size) {
1098 case 0: // 8
1099 emith_read8_r_r_offs(hr2, hr, offs2 ^ 1);
1100 emith_sext(hr2, hr2, 8);
1101 break;
1102 case 1: // 16
1103 emith_read16_r_r_offs(hr2, hr, offs2);
1104 emith_sext(hr2, hr2, 16);
1105 break;
1106 case 2: // 32
1107 emith_read_r_r_offs(hr2, hr, offs2);
1108 emith_ror(hr2, hr2, 16);
1109 break;
1110 }
1111 rcache_free_tmp(hr);
1112 return hr2;
1113 }
1114
1115 ram_check = 0;
1116 }
1117
1118 hr = rcache_get_reg_arg(0, rs);
1119 if (offs != 0)
1120 emith_add_r_imm(hr, offs);
1121 hr = emit_memhandler_read_(size, ram_check);
1122 hr2 = rcache_get_reg(rd, RC_GR_WRITE);
1123 if (size != 2) {
1124 emith_sext(hr2, hr, (size == 1) ? 16 : 8);
1125 } else
1126 emith_move_r_r(hr2, hr);
1127 rcache_free_tmp(hr);
1128
1129 return hr2;
1130}
1131
1132static void emit_memhandler_write(int size, u32 pc, int delay)
1133{
1134 int ctxr;
1135 host_arg2reg(ctxr, 2);
1136 if (reg_map_g2h[SHR_SR] != -1)
1137 emith_ctx_write(reg_map_g2h[SHR_SR], SHR_SR * 4);
1138
1139 switch (size) {
1140 case 0: // 8
1141 // XXX: consider inlining sh2_drc_write8
1142 if (delay) {
1143 emith_call(sh2_drc_write8_slot);
1144 } else {
1145 emit_move_r_imm32(SHR_PC, pc);
1146 rcache_clean();
1147 emith_call(sh2_drc_write8);
1148 }
1149 break;
1150 case 1: // 16
1151 if (delay) {
1152 emith_call(sh2_drc_write16_slot);
1153 } else {
1154 emit_move_r_imm32(SHR_PC, pc);
1155 rcache_clean();
1156 emith_call(sh2_drc_write16);
1157 }
1158 break;
1159 case 2: // 32
1160 emith_move_r_r(ctxr, CONTEXT_REG);
1161 emith_call(sh2_drc_write32);
1162 break;
1163 }
1164
1165 if (reg_map_g2h[SHR_SR] != -1)
1166 emith_ctx_read(reg_map_g2h[SHR_SR], SHR_SR * 4);
1167 rcache_invalidate();
1168}
1169
1170// @(Rx,Ry)
1171static int emit_indirect_indexed_read(int rx, int ry, int size)
1172{
1173 int a0, t;
1174 a0 = rcache_get_reg_arg(0, rx);
1175 t = rcache_get_reg(ry, RC_GR_READ);
1176 emith_add_r_r(a0, t);
1177 return emit_memhandler_read(size);
1178}
1179
1180// read @Rn, @rm
1181static void emit_indirect_read_double(u32 *rnr, u32 *rmr, int rn, int rm, int size)
1182{
1183 int tmp;
1184
1185 rcache_get_reg_arg(0, rn);
1186 tmp = emit_memhandler_read(size);
1187 emith_ctx_write(tmp, offsetof(SH2, drc_tmp));
1188 rcache_free_tmp(tmp);
1189 tmp = rcache_get_reg(rn, RC_GR_RMW);
1190 emith_add_r_imm(tmp, 1 << size);
1191 rcache_unlock(tmp);
1192
1193 rcache_get_reg_arg(0, rm);
1194 *rmr = emit_memhandler_read(size);
1195 *rnr = rcache_get_tmp();
1196 emith_ctx_read(*rnr, offsetof(SH2, drc_tmp));
1197 tmp = rcache_get_reg(rm, RC_GR_RMW);
1198 emith_add_r_imm(tmp, 1 << size);
1199 rcache_unlock(tmp);
1200}
1201
1202static void emit_do_static_regs(int is_write, int tmpr)
1203{
1204 int i, r, count;
1205
1206 for (i = 0; i < ARRAY_SIZE(reg_map_g2h); i++) {
1207 r = reg_map_g2h[i];
1208 if (r == -1)
1209 continue;
1210
1211 for (count = 1; i < ARRAY_SIZE(reg_map_g2h) - 1; i++, r++) {
1212 if (reg_map_g2h[i + 1] != r + 1)
1213 break;
1214 count++;
1215 }
1216
1217 if (count > 1) {
1218 // i, r point to last item
1219 if (is_write)
1220 emith_ctx_write_multiple(r - count + 1, (i - count + 1) * 4, count, tmpr);
1221 else
1222 emith_ctx_read_multiple(r - count + 1, (i - count + 1) * 4, count, tmpr);
1223 } else {
1224 if (is_write)
1225 emith_ctx_write(r, i * 4);
1226 else
1227 emith_ctx_read(r, i * 4);
1228 }
1229 }
1230}
1231
1232static void emit_block_entry(void)
1233{
1234 int arg0;
1235
1236 host_arg2reg(arg0, 0);
1237
1238#if (DRC_DEBUG & 8) || defined(PDB)
1239 int arg1, arg2;
1240 host_arg2reg(arg1, 1);
1241 host_arg2reg(arg2, 2);
1242
1243 emit_do_static_regs(1, arg2);
1244 emith_move_r_r(arg1, CONTEXT_REG);
1245 emith_move_r_r(arg2, rcache_get_reg(SHR_SR, RC_GR_READ));
1246 emith_call(sh2_drc_log_entry);
1247 rcache_invalidate();
1248#endif
1249 emith_tst_r_r(arg0, arg0);
1250 EMITH_SJMP_START(DCOND_EQ);
1251 emith_jump_reg_c(DCOND_NE, arg0);
1252 EMITH_SJMP_END(DCOND_EQ);
1253}
1254
1255#define DELAYED_OP \
1256 drcf.delayed_op = 2
1257
1258#define DELAY_SAVE_T(sr) { \
1259 emith_bic_r_imm(sr, T_save); \
1260 emith_tst_r_imm(sr, T); \
1261 EMITH_SJMP_START(DCOND_EQ); \
1262 emith_or_r_imm_c(DCOND_NE, sr, T_save); \
1263 EMITH_SJMP_END(DCOND_EQ); \
1264 drcf.use_saved_t = 1; \
1265}
1266
1267#define FLUSH_CYCLES(sr) \
1268 if (cycles > 0) { \
1269 emith_sub_r_imm(sr, cycles << 12); \
1270 cycles = 0; \
1271 }
1272
1273#define CHECK_UNHANDLED_BITS(mask) { \
1274 if ((op & (mask)) != 0) \
1275 goto default_; \
1276}
1277
1278#define GET_Fx() \
1279 ((op >> 4) & 0x0f)
1280
1281#define GET_Rm GET_Fx
1282
1283#define GET_Rn() \
1284 ((op >> 8) & 0x0f)
1285
1286#define CHECK_FX_LT(n) \
1287 if (GET_Fx() >= n) \
1288 goto default_
1289
1290static void *dr_get_pc_base(u32 pc, int is_slave);
1291
1292static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id)
1293{
1294 // XXX: maybe use structs instead?
1295 u32 branch_target_pc[MAX_LOCAL_BRANCHES];
1296 void *branch_target_ptr[MAX_LOCAL_BRANCHES];
1297 int branch_target_count = 0;
1298 void *branch_patch_ptr[MAX_LOCAL_BRANCHES];
1299 u32 branch_patch_pc[MAX_LOCAL_BRANCHES];
1300 int branch_patch_count = 0;
1301 u32 literal_addr[MAX_LITERALS];
1302 int literal_addr_count = 0;
1303 int pending_branch_cond = -1;
1304 int pending_branch_pc = 0;
1305 u8 op_flags[BLOCK_INSN_LIMIT];
1306 struct {
1307 u32 delayed_op:2;
1308 u32 test_irq:1;
1309 u32 use_saved_t:1; // delayed op modifies T
1310 } drcf = { 0, };
1311
1312 // PC of current, first, last, last_target_blk SH2 insn
1313 u32 pc, base_pc, end_pc, out_pc;
1314 void *block_entry_ptr;
1315 struct block_desc *block;
1316 u16 *dr_pc_base;
1317 int blkid_main = 0;
1318 int skip_op = 0;
1319 u32 tmp, tmp2;
1320 int cycles;
1321 int i, v;
1322 int op;
1323
1324 base_pc = sh2->pc;
1325
1326 // get base/validate PC
1327 dr_pc_base = dr_get_pc_base(base_pc, sh2->is_slave);
1328 if (dr_pc_base == (void *)-1) {
1329 printf("invalid PC, aborting: %08x\n", base_pc);
1330 // FIXME: be less destructive
1331 exit(1);
1332 }
1333
1334 tcache_ptr = tcache_ptrs[tcache_id];
1335
1336 // predict tcache overflow
1337 tmp = tcache_ptr - tcache_bases[tcache_id];
1338 if (tmp > tcache_sizes[tcache_id] - MAX_BLOCK_SIZE) {
1339 dbg(1, "tcache %d overflow", tcache_id);
1340 return NULL;
1341 }
1342
1343 // 1st pass: scan forward for local branches
1344 scan_block(base_pc, sh2->is_slave, op_flags, &end_pc);
1345
1346 block = dr_add_block(base_pc, end_pc + MAX_LITERAL_OFFSET, // XXX
1347 sh2->is_slave, &blkid_main);
1348 if (block == NULL)
1349 return NULL;
1350
1351 block_entry_ptr = tcache_ptr;
1352 dbg(2, "== %csh2 block #%d,%d %08x-%08x -> %p", sh2->is_slave ? 's' : 'm',
1353 tcache_id, blkid_main, base_pc, end_pc, block_entry_ptr);
1354
1355 dr_link_blocks(tcache_ptr, base_pc, tcache_id);
1356
1357 // collect branch_targets that don't land on delay slots
1358 for (pc = base_pc; pc < end_pc; pc += 2) {
1359 if (!(OP_FLAGS(pc) & OF_BTARGET))
1360 continue;
1361 if (OP_FLAGS(pc) & OF_DELAY_OP) {
1362 OP_FLAGS(pc) &= ~OF_BTARGET;
1363 continue;
1364 }
1365 ADD_TO_ARRAY(branch_target_pc, branch_target_count, pc, break);
1366 }
1367
1368 if (branch_target_count > 0) {
1369 memset(branch_target_ptr, 0, sizeof(branch_target_ptr[0]) * branch_target_count);
1370 }
1371
1372 // -------------------------------------------------
1373 // 2nd pass: actual compilation
1374 out_pc = 0;
1375 pc = base_pc;
1376 for (cycles = 0; pc <= end_pc || drcf.delayed_op; )
1377 {
1378 u32 tmp3, tmp4, sr;
1379
1380 if (drcf.delayed_op > 0)
1381 drcf.delayed_op--;
1382
1383 op = FETCH_OP(pc);
1384
1385 if ((OP_FLAGS(pc) & OF_BTARGET) || pc == base_pc)
1386 {
1387 i = find_in_array(branch_target_pc, branch_target_count, pc);
1388 if (pc != base_pc)
1389 {
1390 // make block entry
1391
1392 sr = rcache_get_reg(SHR_SR, RC_GR_RMW);
1393 FLUSH_CYCLES(sr);
1394 // decide if to flush rcache
1395 if ((op & 0xf0ff) == 0x4010 && FETCH_OP(pc + 2) == 0x8bfd) // DT; BF #-2
1396 rcache_clean();
1397 else
1398 rcache_flush();
1399 do_host_disasm(tcache_id);
1400
1401 v = block->entry_count;
1402 if (v < ARRAY_SIZE(block->entryp)) {
1403 block->entryp[v].pc = pc;
1404 block->entryp[v].tcache_ptr = tcache_ptr;
1405#if (DRC_DEBUG & 2)
1406 block->entryp[v].block = block;
1407#endif
1408 add_to_hashlist(&block->entryp[v], tcache_id);
1409 block->entry_count++;
1410
1411 dbg(2, "-- %csh2 block #%d,%d entry %08x -> %p", sh2->is_slave ? 's' : 'm',
1412 tcache_id, blkid_main, pc, tcache_ptr);
1413
1414 // since we made a block entry, link any other blocks that jump to current pc
1415 dr_link_blocks(tcache_ptr, pc, tcache_id);
1416 }
1417 else {
1418 dbg(1, "too many entryp for block #%d,%d pc=%08x",
1419 tcache_id, blkid_main, pc);
1420 }
1421 }
1422 if (i >= 0)
1423 branch_target_ptr[i] = tcache_ptr;
1424
1425 // must update PC
1426 emit_move_r_imm32(SHR_PC, pc);
1427 rcache_clean();
1428
1429 // check cycles
1430 sr = rcache_get_reg(SHR_SR, RC_GR_READ);
1431 emith_cmp_r_imm(sr, 0);
1432 emith_jump_cond(DCOND_LE, sh2_drc_exit);
1433 do_host_disasm(tcache_id);
1434 rcache_unlock_all();
1435 }
1436
1437#if (DRC_DEBUG & 2)
1438 insns_compiled++;
1439#endif
1440#if (DRC_DEBUG & 4)
1441 DasmSH2(sh2dasm_buff, pc, op);
1442 printf("%c%08x %04x %s\n", (OP_FLAGS(pc) & OF_BTARGET) ? '*' : ' ',
1443 pc, op, sh2dasm_buff);
1444#endif
1445#ifdef DRC_CMP
1446 //if (out_pc != 0 && out_pc != (u32)-1)
1447 // emit_move_r_imm32(SHR_PC, out_pc);
1448 //else
1449 if (!drcf.delayed_op) {
1450 emit_move_r_imm32(SHR_PC, pc);
1451 sr = rcache_get_reg(SHR_SR, RC_GR_RMW);
1452 FLUSH_CYCLES(sr);
1453 // rcache_clean(); // FIXME
1454 rcache_flush();
1455 emit_do_static_regs(1, 0);
1456 emith_pass_arg_r(0, CONTEXT_REG);
1457 emith_call(do_sh2_cmp);
1458 }
1459#endif
1460
1461 pc += 2;
1462 cycles++;
1463
1464 if (skip_op > 0) {
1465 skip_op--;
1466 continue;
1467 }
1468
1469 switch ((op >> 12) & 0x0f)
1470 {
1471 /////////////////////////////////////////////
1472 case 0x00:
1473 switch (op & 0x0f)
1474 {
1475 case 0x02:
1476 tmp = rcache_get_reg(GET_Rn(), RC_GR_WRITE);
1477 switch (GET_Fx())
1478 {
1479 case 0: // STC SR,Rn 0000nnnn00000010
1480 tmp2 = SHR_SR;
1481 break;
1482 case 1: // STC GBR,Rn 0000nnnn00010010
1483 tmp2 = SHR_GBR;
1484 break;
1485 case 2: // STC VBR,Rn 0000nnnn00100010
1486 tmp2 = SHR_VBR;
1487 break;
1488 default:
1489 goto default_;
1490 }
1491 tmp3 = rcache_get_reg(tmp2, RC_GR_READ);
1492 emith_move_r_r(tmp, tmp3);
1493 if (tmp2 == SHR_SR)
1494 emith_clear_msb(tmp, tmp, 22); // reserved bits defined by ISA as 0
1495 goto end_op;
1496 case 0x03:
1497 CHECK_UNHANDLED_BITS(0xd0);
1498 // BRAF Rm 0000mmmm00100011
1499 // BSRF Rm 0000mmmm00000011
1500 DELAYED_OP;
1501 tmp = rcache_get_reg(SHR_PC, RC_GR_WRITE);
1502 tmp2 = rcache_get_reg(GET_Rn(), RC_GR_READ);
1503 emith_move_r_r(tmp, tmp2);
1504 if (op & 0x20)
1505 emith_add_r_imm(tmp, pc + 2);
1506 else { // BSRF
1507 tmp3 = rcache_get_reg(SHR_PR, RC_GR_WRITE);
1508 emith_move_r_imm(tmp3, pc + 2);
1509 emith_add_r_r(tmp, tmp3);
1510 }
1511 out_pc = (u32)-1;
1512 cycles++;
1513 goto end_op;
1514 case 0x04: // MOV.B Rm,@(R0,Rn) 0000nnnnmmmm0100
1515 case 0x05: // MOV.W Rm,@(R0,Rn) 0000nnnnmmmm0101
1516 case 0x06: // MOV.L Rm,@(R0,Rn) 0000nnnnmmmm0110
1517 rcache_clean();
1518 tmp = rcache_get_reg_arg(1, GET_Rm());
1519 tmp2 = rcache_get_reg_arg(0, SHR_R0);
1520 tmp3 = rcache_get_reg(GET_Rn(), RC_GR_READ);
1521 emith_add_r_r(tmp2, tmp3);
1522 emit_memhandler_write(op & 3, pc, drcf.delayed_op);
1523 goto end_op;
1524 case 0x07:
1525 // MUL.L Rm,Rn 0000nnnnmmmm0111
1526 tmp = rcache_get_reg(GET_Rn(), RC_GR_READ);
1527 tmp2 = rcache_get_reg(GET_Rm(), RC_GR_READ);
1528 tmp3 = rcache_get_reg(SHR_MACL, RC_GR_WRITE);
1529 emith_mul(tmp3, tmp2, tmp);
1530 cycles++;
1531 goto end_op;
1532 case 0x08:
1533 CHECK_UNHANDLED_BITS(0xf00);
1534 switch (GET_Fx())
1535 {
1536 case 0: // CLRT 0000000000001000
1537 sr = rcache_get_reg(SHR_SR, RC_GR_RMW);
1538 if (drcf.delayed_op)
1539 DELAY_SAVE_T(sr);
1540 emith_bic_r_imm(sr, T);
1541 break;
1542 case 1: // SETT 0000000000011000
1543 sr = rcache_get_reg(SHR_SR, RC_GR_RMW);
1544 if (drcf.delayed_op)
1545 DELAY_SAVE_T(sr);
1546 emith_or_r_imm(sr, T);
1547 break;
1548 case 2: // CLRMAC 0000000000101000
1549 emit_move_r_imm32(SHR_MACL, 0);
1550 emit_move_r_imm32(SHR_MACH, 0);
1551 break;
1552 default:
1553 goto default_;
1554 }
1555 goto end_op;
1556 case 0x09:
1557 switch (GET_Fx())
1558 {
1559 case 0: // NOP 0000000000001001
1560 CHECK_UNHANDLED_BITS(0xf00);
1561 break;
1562 case 1: // DIV0U 0000000000011001
1563 CHECK_UNHANDLED_BITS(0xf00);
1564 sr = rcache_get_reg(SHR_SR, RC_GR_RMW);
1565 if (drcf.delayed_op)
1566 DELAY_SAVE_T(sr);
1567 emith_bic_r_imm(sr, M|Q|T);
1568 break;
1569 case 2: // MOVT Rn 0000nnnn00101001
1570 sr = rcache_get_reg(SHR_SR, RC_GR_READ);
1571 tmp2 = rcache_get_reg(GET_Rn(), RC_GR_WRITE);
1572 emith_clear_msb(tmp2, sr, 31);
1573 break;
1574 default:
1575 goto default_;
1576 }
1577 goto end_op;
1578 case 0x0a:
1579 tmp = rcache_get_reg(GET_Rn(), RC_GR_WRITE);
1580 switch (GET_Fx())
1581 {
1582 case 0: // STS MACH,Rn 0000nnnn00001010
1583 tmp2 = SHR_MACH;
1584 break;
1585 case 1: // STS MACL,Rn 0000nnnn00011010
1586 tmp2 = SHR_MACL;
1587 break;
1588 case 2: // STS PR,Rn 0000nnnn00101010
1589 tmp2 = SHR_PR;
1590 break;
1591 default:
1592 goto default_;
1593 }
1594 tmp2 = rcache_get_reg(tmp2, RC_GR_READ);
1595 emith_move_r_r(tmp, tmp2);
1596 goto end_op;
1597 case 0x0b:
1598 CHECK_UNHANDLED_BITS(0xf00);
1599 switch (GET_Fx())
1600 {
1601 case 0: // RTS 0000000000001011
1602 DELAYED_OP;
1603 emit_move_r_r(SHR_PC, SHR_PR);
1604 out_pc = (u32)-1;
1605 cycles++;
1606 break;
1607 case 1: // SLEEP 0000000000011011
1608 tmp = rcache_get_reg(SHR_SR, RC_GR_RMW);
1609 emith_clear_msb(tmp, tmp, 20); // clear cycles
1610 out_pc = out_pc - 2;
1611 cycles = 1;
1612 goto end_op;
1613 case 2: // RTE 0000000000101011
1614 DELAYED_OP;
1615 // pop PC
1616 emit_memhandler_read_rr(SHR_PC, SHR_SP, 0, 2);
1617 // pop SR
1618 tmp = rcache_get_reg_arg(0, SHR_SP);
1619 emith_add_r_imm(tmp, 4);
1620 tmp = emit_memhandler_read(2);
1621 sr = rcache_get_reg(SHR_SR, RC_GR_RMW);
1622 emith_write_sr(sr, tmp);
1623 rcache_free_tmp(tmp);
1624 tmp = rcache_get_reg(SHR_SP, RC_GR_RMW);
1625 emith_add_r_imm(tmp, 4*2);
1626 drcf.test_irq = 1;
1627 out_pc = (u32)-1;
1628 cycles += 3;
1629 break;
1630 default:
1631 goto default_;
1632 }
1633 goto end_op;
1634 case 0x0c: // MOV.B @(R0,Rm),Rn 0000nnnnmmmm1100
1635 case 0x0d: // MOV.W @(R0,Rm),Rn 0000nnnnmmmm1101
1636 case 0x0e: // MOV.L @(R0,Rm),Rn 0000nnnnmmmm1110
1637 tmp = emit_indirect_indexed_read(SHR_R0, GET_Rm(), op & 3);
1638 tmp2 = rcache_get_reg(GET_Rn(), RC_GR_WRITE);
1639 if ((op & 3) != 2) {
1640 emith_sext(tmp2, tmp, (op & 1) ? 16 : 8);
1641 } else
1642 emith_move_r_r(tmp2, tmp);
1643 rcache_free_tmp(tmp);
1644 goto end_op;
1645 case 0x0f: // MAC.L @Rm+,@Rn+ 0000nnnnmmmm1111
1646 emit_indirect_read_double(&tmp, &tmp2, GET_Rn(), GET_Rm(), 2);
1647 tmp4 = rcache_get_reg(SHR_MACH, RC_GR_RMW);
1648 /* MS 16 MAC bits unused if saturated */
1649 sr = rcache_get_reg(SHR_SR, RC_GR_READ);
1650 emith_tst_r_imm(sr, S);
1651 EMITH_SJMP_START(DCOND_EQ);
1652 emith_clear_msb_c(DCOND_NE, tmp4, tmp4, 16);
1653 EMITH_SJMP_END(DCOND_EQ);
1654 rcache_unlock(sr);
1655 tmp3 = rcache_get_reg(SHR_MACL, RC_GR_RMW); // might evict SR
1656 emith_mula_s64(tmp3, tmp4, tmp, tmp2);
1657 rcache_free_tmp(tmp2);
1658 sr = rcache_get_reg(SHR_SR, RC_GR_READ); // reget just in case
1659 emith_tst_r_imm(sr, S);
1660
1661 EMITH_JMP_START(DCOND_EQ);
1662 emith_asr(tmp, tmp4, 15);
1663 emith_cmp_r_imm(tmp, -1); // negative overflow (0x80000000..0xffff7fff)
1664 EMITH_SJMP_START(DCOND_GE);
1665 emith_move_r_imm_c(DCOND_LT, tmp4, 0x8000);
1666 emith_move_r_imm_c(DCOND_LT, tmp3, 0x0000);
1667 EMITH_SJMP_END(DCOND_GE);
1668 emith_cmp_r_imm(tmp, 0); // positive overflow (0x00008000..0x7fffffff)
1669 EMITH_SJMP_START(DCOND_LE);
1670 emith_move_r_imm_c(DCOND_GT, tmp4, 0x00007fff);
1671 emith_move_r_imm_c(DCOND_GT, tmp3, 0xffffffff);
1672 EMITH_SJMP_END(DCOND_LE);
1673 EMITH_JMP_END(DCOND_EQ);
1674
1675 rcache_free_tmp(tmp);
1676 cycles += 2;
1677 goto end_op;
1678 }
1679 goto default_;
1680
1681 /////////////////////////////////////////////
1682 case 0x01:
1683 // MOV.L Rm,@(disp,Rn) 0001nnnnmmmmdddd
1684 rcache_clean();
1685 tmp = rcache_get_reg_arg(0, GET_Rn());
1686 tmp2 = rcache_get_reg_arg(1, GET_Rm());
1687 if (op & 0x0f)
1688 emith_add_r_imm(tmp, (op & 0x0f) * 4);
1689 emit_memhandler_write(2, pc, drcf.delayed_op);
1690 goto end_op;
1691
1692 case 0x02:
1693 switch (op & 0x0f)
1694 {
1695 case 0x00: // MOV.B Rm,@Rn 0010nnnnmmmm0000
1696 case 0x01: // MOV.W Rm,@Rn 0010nnnnmmmm0001
1697 case 0x02: // MOV.L Rm,@Rn 0010nnnnmmmm0010
1698 rcache_clean();
1699 rcache_get_reg_arg(0, GET_Rn());
1700 rcache_get_reg_arg(1, GET_Rm());
1701 emit_memhandler_write(op & 3, pc, drcf.delayed_op);
1702 goto end_op;
1703 case 0x04: // MOV.B Rm,@–Rn 0010nnnnmmmm0100
1704 case 0x05: // MOV.W Rm,@–Rn 0010nnnnmmmm0101
1705 case 0x06: // MOV.L Rm,@–Rn 0010nnnnmmmm0110
1706 tmp = rcache_get_reg(GET_Rn(), RC_GR_RMW);
1707 emith_sub_r_imm(tmp, (1 << (op & 3)));
1708 rcache_clean();
1709 rcache_get_reg_arg(0, GET_Rn());
1710 rcache_get_reg_arg(1, GET_Rm());
1711 emit_memhandler_write(op & 3, pc, drcf.delayed_op);
1712 goto end_op;
1713 case 0x07: // DIV0S Rm,Rn 0010nnnnmmmm0111
1714 sr = rcache_get_reg(SHR_SR, RC_GR_RMW);
1715 tmp2 = rcache_get_reg(GET_Rn(), RC_GR_READ);
1716 tmp3 = rcache_get_reg(GET_Rm(), RC_GR_READ);
1717 if (drcf.delayed_op)
1718 DELAY_SAVE_T(sr);
1719 emith_bic_r_imm(sr, M|Q|T);
1720 emith_tst_r_imm(tmp2, (1<<31));
1721 EMITH_SJMP_START(DCOND_EQ);
1722 emith_or_r_imm_c(DCOND_NE, sr, Q);
1723 EMITH_SJMP_END(DCOND_EQ);
1724 emith_tst_r_imm(tmp3, (1<<31));
1725 EMITH_SJMP_START(DCOND_EQ);
1726 emith_or_r_imm_c(DCOND_NE, sr, M);
1727 EMITH_SJMP_END(DCOND_EQ);
1728 emith_teq_r_r(tmp2, tmp3);
1729 EMITH_SJMP_START(DCOND_PL);
1730 emith_or_r_imm_c(DCOND_MI, sr, T);
1731 EMITH_SJMP_END(DCOND_PL);
1732 goto end_op;
1733 case 0x08: // TST Rm,Rn 0010nnnnmmmm1000
1734 sr = rcache_get_reg(SHR_SR, RC_GR_RMW);
1735 tmp2 = rcache_get_reg(GET_Rn(), RC_GR_READ);
1736 tmp3 = rcache_get_reg(GET_Rm(), RC_GR_READ);
1737 if (drcf.delayed_op)
1738 DELAY_SAVE_T(sr);
1739 emith_bic_r_imm(sr, T);
1740 emith_tst_r_r(tmp2, tmp3);
1741 emit_or_t_if_eq(sr);
1742 goto end_op;
1743 case 0x09: // AND Rm,Rn 0010nnnnmmmm1001
1744 tmp = rcache_get_reg(GET_Rn(), RC_GR_RMW);
1745 tmp2 = rcache_get_reg(GET_Rm(), RC_GR_READ);
1746 emith_and_r_r(tmp, tmp2);
1747 goto end_op;
1748 case 0x0a: // XOR Rm,Rn 0010nnnnmmmm1010
1749 tmp = rcache_get_reg(GET_Rn(), RC_GR_RMW);
1750 tmp2 = rcache_get_reg(GET_Rm(), RC_GR_READ);
1751 emith_eor_r_r(tmp, tmp2);
1752 goto end_op;
1753 case 0x0b: // OR Rm,Rn 0010nnnnmmmm1011
1754 tmp = rcache_get_reg(GET_Rn(), RC_GR_RMW);
1755 tmp2 = rcache_get_reg(GET_Rm(), RC_GR_READ);
1756 emith_or_r_r(tmp, tmp2);
1757 goto end_op;
1758 case 0x0c: // CMP/STR Rm,Rn 0010nnnnmmmm1100
1759 tmp = rcache_get_tmp();
1760 tmp2 = rcache_get_reg(GET_Rn(), RC_GR_READ);
1761 tmp3 = rcache_get_reg(GET_Rm(), RC_GR_READ);
1762 emith_eor_r_r_r(tmp, tmp2, tmp3);
1763 sr = rcache_get_reg(SHR_SR, RC_GR_RMW);
1764 if (drcf.delayed_op)
1765 DELAY_SAVE_T(sr);
1766 emith_bic_r_imm(sr, T);
1767 emith_tst_r_imm(tmp, 0x000000ff);
1768 emit_or_t_if_eq(tmp);
1769 emith_tst_r_imm(tmp, 0x0000ff00);
1770 emit_or_t_if_eq(tmp);
1771 emith_tst_r_imm(tmp, 0x00ff0000);
1772 emit_or_t_if_eq(tmp);
1773 emith_tst_r_imm(tmp, 0xff000000);
1774 emit_or_t_if_eq(tmp);
1775 rcache_free_tmp(tmp);
1776 goto end_op;
1777 case 0x0d: // XTRCT Rm,Rn 0010nnnnmmmm1101
1778 tmp = rcache_get_reg(GET_Rn(), RC_GR_RMW);
1779 tmp2 = rcache_get_reg(GET_Rm(), RC_GR_READ);
1780 emith_lsr(tmp, tmp, 16);
1781 emith_or_r_r_lsl(tmp, tmp2, 16);
1782 goto end_op;
1783 case 0x0e: // MULU.W Rm,Rn 0010nnnnmmmm1110
1784 case 0x0f: // MULS.W Rm,Rn 0010nnnnmmmm1111
1785 tmp2 = rcache_get_reg(GET_Rn(), RC_GR_READ);
1786 tmp = rcache_get_reg(SHR_MACL, RC_GR_WRITE);
1787 if (op & 1) {
1788 emith_sext(tmp, tmp2, 16);
1789 } else
1790 emith_clear_msb(tmp, tmp2, 16);
1791 tmp3 = rcache_get_reg(GET_Rm(), RC_GR_READ);
1792 tmp2 = rcache_get_tmp();
1793 if (op & 1) {
1794 emith_sext(tmp2, tmp3, 16);
1795 } else
1796 emith_clear_msb(tmp2, tmp3, 16);
1797 emith_mul(tmp, tmp, tmp2);
1798 rcache_free_tmp(tmp2);
1799 goto end_op;
1800 }
1801 goto default_;
1802
1803 /////////////////////////////////////////////
1804 case 0x03:
1805 switch (op & 0x0f)
1806 {
1807 case 0x00: // CMP/EQ Rm,Rn 0011nnnnmmmm0000
1808 case 0x02: // CMP/HS Rm,Rn 0011nnnnmmmm0010
1809 case 0x03: // CMP/GE Rm,Rn 0011nnnnmmmm0011
1810 case 0x06: // CMP/HI Rm,Rn 0011nnnnmmmm0110
1811 case 0x07: // CMP/GT Rm,Rn 0011nnnnmmmm0111
1812 sr = rcache_get_reg(SHR_SR, RC_GR_RMW);
1813 tmp2 = rcache_get_reg(GET_Rn(), RC_GR_READ);
1814 tmp3 = rcache_get_reg(GET_Rm(), RC_GR_READ);
1815 if (drcf.delayed_op)
1816 DELAY_SAVE_T(sr);
1817 emith_bic_r_imm(sr, T);
1818 emith_cmp_r_r(tmp2, tmp3);
1819 switch (op & 0x07)
1820 {
1821 case 0x00: // CMP/EQ
1822 emit_or_t_if_eq(sr);
1823 break;
1824 case 0x02: // CMP/HS
1825 EMITH_SJMP_START(DCOND_LO);
1826 emith_or_r_imm_c(DCOND_HS, sr, T);
1827 EMITH_SJMP_END(DCOND_LO);
1828 break;
1829 case 0x03: // CMP/GE
1830 EMITH_SJMP_START(DCOND_LT);
1831 emith_or_r_imm_c(DCOND_GE, sr, T);
1832 EMITH_SJMP_END(DCOND_LT);
1833 break;
1834 case 0x06: // CMP/HI
1835 EMITH_SJMP_START(DCOND_LS);
1836 emith_or_r_imm_c(DCOND_HI, sr, T);
1837 EMITH_SJMP_END(DCOND_LS);
1838 break;
1839 case 0x07: // CMP/GT
1840 EMITH_SJMP_START(DCOND_LE);
1841 emith_or_r_imm_c(DCOND_GT, sr, T);
1842 EMITH_SJMP_END(DCOND_LE);
1843 break;
1844 }
1845 goto end_op;
1846 case 0x04: // DIV1 Rm,Rn 0011nnnnmmmm0100
1847 // Q1 = carry(Rn = (Rn << 1) | T)
1848 // if Q ^ M
1849 // Q2 = carry(Rn += Rm)
1850 // else
1851 // Q2 = carry(Rn -= Rm)
1852 // Q = M ^ Q1 ^ Q2
1853 // T = (Q == M) = !(Q ^ M) = !(Q1 ^ Q2)
1854 tmp2 = rcache_get_reg(GET_Rn(), RC_GR_RMW);
1855 tmp3 = rcache_get_reg(GET_Rm(), RC_GR_READ);
1856 sr = rcache_get_reg(SHR_SR, RC_GR_RMW);
1857 if (drcf.delayed_op)
1858 DELAY_SAVE_T(sr);
1859 emith_tpop_carry(sr, 0);
1860 emith_adcf_r_r(tmp2, tmp2);
1861 emith_tpush_carry(sr, 0); // keep Q1 in T for now
1862 tmp4 = rcache_get_tmp();
1863 emith_and_r_r_imm(tmp4, sr, M);
1864 emith_eor_r_r_lsr(sr, tmp4, M_SHIFT - Q_SHIFT); // Q ^= M
1865 rcache_free_tmp(tmp4);
1866 // add or sub, invert T if carry to get Q1 ^ Q2
1867 // in: (Q ^ M) passed in Q, Q1 in T
1868 emith_sh2_div1_step(tmp2, tmp3, sr);
1869 emith_bic_r_imm(sr, Q);
1870 emith_tst_r_imm(sr, M);
1871 EMITH_SJMP_START(DCOND_EQ);
1872 emith_or_r_imm_c(DCOND_NE, sr, Q); // Q = M
1873 EMITH_SJMP_END(DCOND_EQ);
1874 emith_tst_r_imm(sr, T);
1875 EMITH_SJMP_START(DCOND_EQ);
1876 emith_eor_r_imm_c(DCOND_NE, sr, Q); // Q = M ^ Q1 ^ Q2
1877 EMITH_SJMP_END(DCOND_EQ);
1878 emith_eor_r_imm(sr, T); // T = !(Q1 ^ Q2)
1879 goto end_op;
1880 case 0x05: // DMULU.L Rm,Rn 0011nnnnmmmm0101
1881 tmp = rcache_get_reg(GET_Rn(), RC_GR_READ);
1882 tmp2 = rcache_get_reg(GET_Rm(), RC_GR_READ);
1883 tmp3 = rcache_get_reg(SHR_MACL, RC_GR_WRITE);
1884 tmp4 = rcache_get_reg(SHR_MACH, RC_GR_WRITE);
1885 emith_mul_u64(tmp3, tmp4, tmp, tmp2);
1886 cycles++;
1887 goto end_op;
1888 case 0x08: // SUB Rm,Rn 0011nnnnmmmm1000
1889 case 0x0c: // ADD Rm,Rn 0011nnnnmmmm1100
1890 tmp = rcache_get_reg(GET_Rn(), RC_GR_RMW);
1891 tmp2 = rcache_get_reg(GET_Rm(), RC_GR_READ);
1892 if (op & 4) {
1893 emith_add_r_r(tmp, tmp2);
1894 } else
1895 emith_sub_r_r(tmp, tmp2);
1896 goto end_op;
1897 case 0x0a: // SUBC Rm,Rn 0011nnnnmmmm1010
1898 case 0x0e: // ADDC Rm,Rn 0011nnnnmmmm1110
1899 tmp = rcache_get_reg(GET_Rn(), RC_GR_RMW);
1900 tmp2 = rcache_get_reg(GET_Rm(), RC_GR_READ);
1901 sr = rcache_get_reg(SHR_SR, RC_GR_RMW);
1902 if (drcf.delayed_op)
1903 DELAY_SAVE_T(sr);
1904 if (op & 4) { // adc
1905 emith_tpop_carry(sr, 0);
1906 emith_adcf_r_r(tmp, tmp2);
1907 emith_tpush_carry(sr, 0);
1908 } else {
1909 emith_tpop_carry(sr, 1);
1910 emith_sbcf_r_r(tmp, tmp2);
1911 emith_tpush_carry(sr, 1);
1912 }
1913 goto end_op;
1914 case 0x0b: // SUBV Rm,Rn 0011nnnnmmmm1011
1915 case 0x0f: // ADDV Rm,Rn 0011nnnnmmmm1111
1916 tmp = rcache_get_reg(GET_Rn(), RC_GR_RMW);
1917 tmp2 = rcache_get_reg(GET_Rm(), RC_GR_READ);
1918 sr = rcache_get_reg(SHR_SR, RC_GR_RMW);
1919 if (drcf.delayed_op)
1920 DELAY_SAVE_T(sr);
1921 emith_bic_r_imm(sr, T);
1922 if (op & 4) {
1923 emith_addf_r_r(tmp, tmp2);
1924 } else
1925 emith_subf_r_r(tmp, tmp2);
1926 EMITH_SJMP_START(DCOND_VC);
1927 emith_or_r_imm_c(DCOND_VS, sr, T);
1928 EMITH_SJMP_END(DCOND_VC);
1929 goto end_op;
1930 case 0x0d: // DMULS.L Rm,Rn 0011nnnnmmmm1101
1931 tmp = rcache_get_reg(GET_Rn(), RC_GR_READ);
1932 tmp2 = rcache_get_reg(GET_Rm(), RC_GR_READ);
1933 tmp3 = rcache_get_reg(SHR_MACL, RC_GR_WRITE);
1934 tmp4 = rcache_get_reg(SHR_MACH, RC_GR_WRITE);
1935 emith_mul_s64(tmp3, tmp4, tmp, tmp2);
1936 cycles++;
1937 goto end_op;
1938 }
1939 goto default_;
1940
1941 /////////////////////////////////////////////
1942 case 0x04:
1943 switch (op & 0x0f)
1944 {
1945 case 0x00:
1946 switch (GET_Fx())
1947 {
1948 case 0: // SHLL Rn 0100nnnn00000000
1949 case 2: // SHAL Rn 0100nnnn00100000
1950 tmp = rcache_get_reg(GET_Rn(), RC_GR_RMW);
1951 sr = rcache_get_reg(SHR_SR, RC_GR_RMW);
1952 if (drcf.delayed_op)
1953 DELAY_SAVE_T(sr);
1954 emith_tpop_carry(sr, 0); // dummy
1955 emith_lslf(tmp, tmp, 1);
1956 emith_tpush_carry(sr, 0);
1957 goto end_op;
1958 case 1: // DT Rn 0100nnnn00010000
1959 sr = rcache_get_reg(SHR_SR, RC_GR_RMW);
1960 if (drcf.delayed_op)
1961 DELAY_SAVE_T(sr);
1962#ifndef DRC_CMP
1963 if (FETCH_OP(pc) == 0x8bfd) { // BF #-2
1964 if (gconst_get(GET_Rn(), &tmp)) {
1965 // XXX: limit burned cycles
1966 emit_move_r_imm32(GET_Rn(), 0);
1967 emith_or_r_imm(sr, T);
1968 cycles += tmp * 4 + 1; // +1 syncs with noconst version, not sure why
1969 skip_op = 1;
1970 }
1971 else
1972 emith_sh2_dtbf_loop();
1973 goto end_op;
1974 }
1975#endif
1976 tmp = rcache_get_reg(GET_Rn(), RC_GR_RMW);
1977 emith_bic_r_imm(sr, T);
1978 emith_subf_r_imm(tmp, 1);
1979 emit_or_t_if_eq(sr);
1980 goto end_op;
1981 }
1982 goto default_;
1983 case 0x01:
1984 switch (GET_Fx())
1985 {
1986 case 0: // SHLR Rn 0100nnnn00000001
1987 case 2: // SHAR Rn 0100nnnn00100001
1988 tmp = rcache_get_reg(GET_Rn(), RC_GR_RMW);
1989 sr = rcache_get_reg(SHR_SR, RC_GR_RMW);
1990 if (drcf.delayed_op)
1991 DELAY_SAVE_T(sr);
1992 emith_tpop_carry(sr, 0); // dummy
1993 if (op & 0x20) {
1994 emith_asrf(tmp, tmp, 1);
1995 } else
1996 emith_lsrf(tmp, tmp, 1);
1997 emith_tpush_carry(sr, 0);
1998 goto end_op;
1999 case 1: // CMP/PZ Rn 0100nnnn00010001
2000 tmp = rcache_get_reg(GET_Rn(), RC_GR_RMW);
2001 sr = rcache_get_reg(SHR_SR, RC_GR_RMW);
2002 if (drcf.delayed_op)
2003 DELAY_SAVE_T(sr);
2004 emith_bic_r_imm(sr, T);
2005 emith_cmp_r_imm(tmp, 0);
2006 EMITH_SJMP_START(DCOND_LT);
2007 emith_or_r_imm_c(DCOND_GE, sr, T);
2008 EMITH_SJMP_END(DCOND_LT);
2009 goto end_op;
2010 }
2011 goto default_;
2012 case 0x02:
2013 case 0x03:
2014 switch (op & 0x3f)
2015 {
2016 case 0x02: // STS.L MACH,@–Rn 0100nnnn00000010
2017 tmp = SHR_MACH;
2018 break;
2019 case 0x12: // STS.L MACL,@–Rn 0100nnnn00010010
2020 tmp = SHR_MACL;
2021 break;
2022 case 0x22: // STS.L PR,@–Rn 0100nnnn00100010
2023 tmp = SHR_PR;
2024 break;
2025 case 0x03: // STC.L SR,@–Rn 0100nnnn00000011
2026 tmp = SHR_SR;
2027 cycles++;
2028 break;
2029 case 0x13: // STC.L GBR,@–Rn 0100nnnn00010011
2030 tmp = SHR_GBR;
2031 cycles++;
2032 break;
2033 case 0x23: // STC.L VBR,@–Rn 0100nnnn00100011
2034 tmp = SHR_VBR;
2035 cycles++;
2036 break;
2037 default:
2038 goto default_;
2039 }
2040 tmp2 = rcache_get_reg(GET_Rn(), RC_GR_RMW);
2041 emith_sub_r_imm(tmp2, 4);
2042 rcache_clean();
2043 rcache_get_reg_arg(0, GET_Rn());
2044 tmp3 = rcache_get_reg_arg(1, tmp);
2045 if (tmp == SHR_SR)
2046 emith_clear_msb(tmp3, tmp3, 22); // reserved bits defined by ISA as 0
2047 emit_memhandler_write(2, pc, drcf.delayed_op);
2048 goto end_op;
2049 case 0x04:
2050 case 0x05:
2051 switch (op & 0x3f)
2052 {
2053 case 0x04: // ROTL Rn 0100nnnn00000100
2054 case 0x05: // ROTR Rn 0100nnnn00000101
2055 tmp = rcache_get_reg(GET_Rn(), RC_GR_RMW);
2056 sr = rcache_get_reg(SHR_SR, RC_GR_RMW);
2057 if (drcf.delayed_op)
2058 DELAY_SAVE_T(sr);
2059 emith_tpop_carry(sr, 0); // dummy
2060 if (op & 1) {
2061 emith_rorf(tmp, tmp, 1);
2062 } else
2063 emith_rolf(tmp, tmp, 1);
2064 emith_tpush_carry(sr, 0);
2065 goto end_op;
2066 case 0x24: // ROTCL Rn 0100nnnn00100100
2067 case 0x25: // ROTCR Rn 0100nnnn00100101
2068 tmp = rcache_get_reg(GET_Rn(), RC_GR_RMW);
2069 sr = rcache_get_reg(SHR_SR, RC_GR_RMW);
2070 if (drcf.delayed_op)
2071 DELAY_SAVE_T(sr);
2072 emith_tpop_carry(sr, 0);
2073 if (op & 1) {
2074 emith_rorcf(tmp);
2075 } else
2076 emith_rolcf(tmp);
2077 emith_tpush_carry(sr, 0);
2078 goto end_op;
2079 case 0x15: // CMP/PL Rn 0100nnnn00010101
2080 tmp = rcache_get_reg(GET_Rn(), RC_GR_RMW);
2081 sr = rcache_get_reg(SHR_SR, RC_GR_RMW);
2082 if (drcf.delayed_op)
2083 DELAY_SAVE_T(sr);
2084 emith_bic_r_imm(sr, T);
2085 emith_cmp_r_imm(tmp, 0);
2086 EMITH_SJMP_START(DCOND_LE);
2087 emith_or_r_imm_c(DCOND_GT, sr, T);
2088 EMITH_SJMP_END(DCOND_LE);
2089 goto end_op;
2090 }
2091 goto default_;
2092 case 0x06:
2093 case 0x07:
2094 switch (op & 0x3f)
2095 {
2096 case 0x06: // LDS.L @Rm+,MACH 0100mmmm00000110
2097 tmp = SHR_MACH;
2098 break;
2099 case 0x16: // LDS.L @Rm+,MACL 0100mmmm00010110
2100 tmp = SHR_MACL;
2101 break;
2102 case 0x26: // LDS.L @Rm+,PR 0100mmmm00100110
2103 tmp = SHR_PR;
2104 break;
2105 case 0x07: // LDC.L @Rm+,SR 0100mmmm00000111
2106 tmp = SHR_SR;
2107 cycles += 2;
2108 break;
2109 case 0x17: // LDC.L @Rm+,GBR 0100mmmm00010111
2110 tmp = SHR_GBR;
2111 cycles += 2;
2112 break;
2113 case 0x27: // LDC.L @Rm+,VBR 0100mmmm00100111
2114 tmp = SHR_VBR;
2115 cycles += 2;
2116 break;
2117 default:
2118 goto default_;
2119 }
2120 rcache_get_reg_arg(0, GET_Rn());
2121 tmp2 = emit_memhandler_read(2);
2122 if (tmp == SHR_SR) {
2123 sr = rcache_get_reg(SHR_SR, RC_GR_RMW);
2124 if (drcf.delayed_op)
2125 DELAY_SAVE_T(sr);
2126 emith_write_sr(sr, tmp2);
2127 drcf.test_irq = 1;
2128 } else {
2129 tmp = rcache_get_reg(tmp, RC_GR_WRITE);
2130 emith_move_r_r(tmp, tmp2);
2131 }
2132 rcache_free_tmp(tmp2);
2133 tmp = rcache_get_reg(GET_Rn(), RC_GR_RMW);
2134 emith_add_r_imm(tmp, 4);
2135 goto end_op;
2136 case 0x08:
2137 case 0x09:
2138 switch (GET_Fx())
2139 {
2140 case 0:
2141 // SHLL2 Rn 0100nnnn00001000
2142 // SHLR2 Rn 0100nnnn00001001
2143 tmp = 2;
2144 break;
2145 case 1:
2146 // SHLL8 Rn 0100nnnn00011000
2147 // SHLR8 Rn 0100nnnn00011001
2148 tmp = 8;
2149 break;
2150 case 2:
2151 // SHLL16 Rn 0100nnnn00101000
2152 // SHLR16 Rn 0100nnnn00101001
2153 tmp = 16;
2154 break;
2155 default:
2156 goto default_;
2157 }
2158 tmp2 = rcache_get_reg(GET_Rn(), RC_GR_RMW);
2159 if (op & 1) {
2160 emith_lsr(tmp2, tmp2, tmp);
2161 } else
2162 emith_lsl(tmp2, tmp2, tmp);
2163 goto end_op;
2164 case 0x0a:
2165 switch (GET_Fx())
2166 {
2167 case 0: // LDS Rm,MACH 0100mmmm00001010
2168 tmp2 = SHR_MACH;
2169 break;
2170 case 1: // LDS Rm,MACL 0100mmmm00011010
2171 tmp2 = SHR_MACL;
2172 break;
2173 case 2: // LDS Rm,PR 0100mmmm00101010
2174 tmp2 = SHR_PR;
2175 break;
2176 default:
2177 goto default_;
2178 }
2179 emit_move_r_r(tmp2, GET_Rn());
2180 goto end_op;
2181 case 0x0b:
2182 switch (GET_Fx())
2183 {
2184 case 0: // JSR @Rm 0100mmmm00001011
2185 case 2: // JMP @Rm 0100mmmm00101011
2186 DELAYED_OP;
2187 if (!(op & 0x20))
2188 emit_move_r_imm32(SHR_PR, pc + 2);
2189 emit_move_r_r(SHR_PC, (op >> 8) & 0x0f);
2190 out_pc = (u32)-1;
2191 cycles++;
2192 break;
2193 case 1: // TAS.B @Rn 0100nnnn00011011
2194 // XXX: is TAS working on 32X?
2195 rcache_get_reg_arg(0, GET_Rn());
2196 tmp = emit_memhandler_read(0);
2197 sr = rcache_get_reg(SHR_SR, RC_GR_RMW);
2198 if (drcf.delayed_op)
2199 DELAY_SAVE_T(sr);
2200 emith_bic_r_imm(sr, T);
2201 emith_cmp_r_imm(tmp, 0);
2202 emit_or_t_if_eq(sr);
2203 rcache_clean();
2204 emith_or_r_imm(tmp, 0x80);
2205 tmp2 = rcache_get_tmp_arg(1); // assuming it differs to tmp
2206 emith_move_r_r(tmp2, tmp);
2207 rcache_free_tmp(tmp);
2208 rcache_get_reg_arg(0, GET_Rn());
2209 emit_memhandler_write(0, pc, drcf.delayed_op);
2210 cycles += 3;
2211 break;
2212 default:
2213 goto default_;
2214 }
2215 goto end_op;
2216 case 0x0e:
2217 tmp = rcache_get_reg(GET_Rn(), RC_GR_READ);
2218 switch (GET_Fx())
2219 {
2220 case 0: // LDC Rm,SR 0100mmmm00001110
2221 tmp2 = SHR_SR;
2222 break;
2223 case 1: // LDC Rm,GBR 0100mmmm00011110
2224 tmp2 = SHR_GBR;
2225 break;
2226 case 2: // LDC Rm,VBR 0100mmmm00101110
2227 tmp2 = SHR_VBR;
2228 break;
2229 default:
2230 goto default_;
2231 }
2232 if (tmp2 == SHR_SR) {
2233 sr = rcache_get_reg(SHR_SR, RC_GR_RMW);
2234 if (drcf.delayed_op)
2235 DELAY_SAVE_T(sr);
2236 emith_write_sr(sr, tmp);
2237 drcf.test_irq = 1;
2238 } else {
2239 tmp2 = rcache_get_reg(tmp2, RC_GR_WRITE);
2240 emith_move_r_r(tmp2, tmp);
2241 }
2242 goto end_op;
2243 case 0x0f:
2244 // MAC.W @Rm+,@Rn+ 0100nnnnmmmm1111
2245 emit_indirect_read_double(&tmp, &tmp2, GET_Rn(), GET_Rm(), 1);
2246 emith_sext(tmp, tmp, 16);
2247 emith_sext(tmp2, tmp2, 16);
2248 tmp3 = rcache_get_reg(SHR_MACL, RC_GR_RMW);
2249 tmp4 = rcache_get_reg(SHR_MACH, RC_GR_RMW);
2250 emith_mula_s64(tmp3, tmp4, tmp, tmp2);
2251 rcache_free_tmp(tmp2);
2252 // XXX: MACH should be untouched when S is set?
2253 sr = rcache_get_reg(SHR_SR, RC_GR_READ);
2254 emith_tst_r_imm(sr, S);
2255 EMITH_JMP_START(DCOND_EQ);
2256
2257 emith_asr(tmp, tmp3, 31);
2258 emith_eorf_r_r(tmp, tmp4); // tmp = ((signed)macl >> 31) ^ mach
2259 EMITH_JMP_START(DCOND_EQ);
2260 emith_move_r_imm(tmp3, 0x80000000);
2261 emith_tst_r_r(tmp4, tmp4);
2262 EMITH_SJMP_START(DCOND_MI);
2263 emith_sub_r_imm_c(DCOND_PL, tmp3, 1); // positive
2264 EMITH_SJMP_END(DCOND_MI);
2265 EMITH_JMP_END(DCOND_EQ);
2266
2267 EMITH_JMP_END(DCOND_EQ);
2268 rcache_free_tmp(tmp);
2269 cycles += 2;
2270 goto end_op;
2271 }
2272 goto default_;
2273
2274 /////////////////////////////////////////////
2275 case 0x05:
2276 // MOV.L @(disp,Rm),Rn 0101nnnnmmmmdddd
2277 emit_memhandler_read_rr(GET_Rn(), GET_Rm(), (op & 0x0f) * 4, 2);
2278 goto end_op;
2279
2280 /////////////////////////////////////////////
2281 case 0x06:
2282 switch (op & 0x0f)
2283 {
2284 case 0x00: // MOV.B @Rm,Rn 0110nnnnmmmm0000
2285 case 0x01: // MOV.W @Rm,Rn 0110nnnnmmmm0001
2286 case 0x02: // MOV.L @Rm,Rn 0110nnnnmmmm0010
2287 case 0x04: // MOV.B @Rm+,Rn 0110nnnnmmmm0100
2288 case 0x05: // MOV.W @Rm+,Rn 0110nnnnmmmm0101
2289 case 0x06: // MOV.L @Rm+,Rn 0110nnnnmmmm0110
2290 emit_memhandler_read_rr(GET_Rn(), GET_Rm(), 0, op & 3);
2291 if ((op & 7) >= 4 && GET_Rn() != GET_Rm()) {
2292 tmp = rcache_get_reg(GET_Rm(), RC_GR_RMW);
2293 emith_add_r_imm(tmp, (1 << (op & 3)));
2294 }
2295 goto end_op;
2296 case 0x03:
2297 case 0x07 ... 0x0f:
2298 tmp = rcache_get_reg(GET_Rm(), RC_GR_READ);
2299 tmp2 = rcache_get_reg(GET_Rn(), RC_GR_WRITE);
2300 switch (op & 0x0f)
2301 {
2302 case 0x03: // MOV Rm,Rn 0110nnnnmmmm0011
2303 emith_move_r_r(tmp2, tmp);
2304 break;
2305 case 0x07: // NOT Rm,Rn 0110nnnnmmmm0111
2306 emith_mvn_r_r(tmp2, tmp);
2307 break;
2308 case 0x08: // SWAP.B Rm,Rn 0110nnnnmmmm1000
2309 tmp3 = tmp2;
2310 if (tmp == tmp2)
2311 tmp3 = rcache_get_tmp();
2312 tmp4 = rcache_get_tmp();
2313 emith_lsr(tmp3, tmp, 16);
2314 emith_or_r_r_lsl(tmp3, tmp, 24);
2315 emith_and_r_r_imm(tmp4, tmp, 0xff00);
2316 emith_or_r_r_lsl(tmp3, tmp4, 8);
2317 emith_rol(tmp2, tmp3, 16);
2318 rcache_free_tmp(tmp4);
2319 if (tmp == tmp2)
2320 rcache_free_tmp(tmp3);
2321 break;
2322 case 0x09: // SWAP.W Rm,Rn 0110nnnnmmmm1001
2323 emith_rol(tmp2, tmp, 16);
2324 break;
2325 case 0x0a: // NEGC Rm,Rn 0110nnnnmmmm1010
2326 sr = rcache_get_reg(SHR_SR, RC_GR_RMW);
2327 if (drcf.delayed_op)
2328 DELAY_SAVE_T(sr);
2329 emith_tpop_carry(sr, 1);
2330 emith_negcf_r_r(tmp2, tmp);
2331 emith_tpush_carry(sr, 1);
2332 break;
2333 case 0x0b: // NEG Rm,Rn 0110nnnnmmmm1011
2334 emith_neg_r_r(tmp2, tmp);
2335 break;
2336 case 0x0c: // EXTU.B Rm,Rn 0110nnnnmmmm1100
2337 emith_clear_msb(tmp2, tmp, 24);
2338 break;
2339 case 0x0d: // EXTU.W Rm,Rn 0110nnnnmmmm1101
2340 emith_clear_msb(tmp2, tmp, 16);
2341 break;
2342 case 0x0e: // EXTS.B Rm,Rn 0110nnnnmmmm1110
2343 emith_sext(tmp2, tmp, 8);
2344 break;
2345 case 0x0f: // EXTS.W Rm,Rn 0110nnnnmmmm1111
2346 emith_sext(tmp2, tmp, 16);
2347 break;
2348 }
2349 goto end_op;
2350 }
2351 goto default_;
2352
2353 /////////////////////////////////////////////
2354 case 0x07:
2355 // ADD #imm,Rn 0111nnnniiiiiiii
2356 tmp = rcache_get_reg(GET_Rn(), RC_GR_RMW);
2357 if (op & 0x80) { // adding negative
2358 emith_sub_r_imm(tmp, -op & 0xff);
2359 } else
2360 emith_add_r_imm(tmp, op & 0xff);
2361 goto end_op;
2362
2363 /////////////////////////////////////////////
2364 case 0x08:
2365 switch (op & 0x0f00)
2366 {
2367 case 0x0000: // MOV.B R0,@(disp,Rn) 10000000nnnndddd
2368 case 0x0100: // MOV.W R0,@(disp,Rn) 10000001nnnndddd
2369 rcache_clean();
2370 tmp = rcache_get_reg_arg(0, GET_Rm());
2371 tmp2 = rcache_get_reg_arg(1, SHR_R0);
2372 tmp3 = (op & 0x100) >> 8;
2373 if (op & 0x0f)
2374 emith_add_r_imm(tmp, (op & 0x0f) << tmp3);
2375 emit_memhandler_write(tmp3, pc, drcf.delayed_op);
2376 goto end_op;
2377 case 0x0400: // MOV.B @(disp,Rm),R0 10000100mmmmdddd
2378 case 0x0500: // MOV.W @(disp,Rm),R0 10000101mmmmdddd
2379 tmp = (op & 0x100) >> 8;
2380 emit_memhandler_read_rr(SHR_R0, GET_Rm(), (op & 0x0f) << tmp, tmp);
2381 goto end_op;
2382 case 0x0800: // CMP/EQ #imm,R0 10001000iiiiiiii
2383 // XXX: could use cmn
2384 tmp = rcache_get_tmp();
2385 tmp2 = rcache_get_reg(0, RC_GR_READ);
2386 sr = rcache_get_reg(SHR_SR, RC_GR_RMW);
2387 if (drcf.delayed_op)
2388 DELAY_SAVE_T(sr);
2389 emith_move_r_imm_s8(tmp, op & 0xff);
2390 emith_bic_r_imm(sr, T);
2391 emith_cmp_r_r(tmp2, tmp);
2392 emit_or_t_if_eq(sr);
2393 rcache_free_tmp(tmp);
2394 goto end_op;
2395 case 0x0d00: // BT/S label 10001101dddddddd
2396 case 0x0f00: // BF/S label 10001111dddddddd
2397 DELAYED_OP;
2398 // fallthrough
2399 case 0x0900: // BT label 10001001dddddddd
2400 case 0x0b00: // BF label 10001011dddddddd
2401 // will handle conditional branches later
2402 pending_branch_cond = (op & 0x0200) ? DCOND_EQ : DCOND_NE;
2403 i = ((signed int)(op << 24) >> 23);
2404 pending_branch_pc = pc + i + 2;
2405 goto end_op;
2406 }
2407 goto default_;
2408
2409 /////////////////////////////////////////////
2410 case 0x09:
2411 // MOV.W @(disp,PC),Rn 1001nnnndddddddd
2412 tmp = pc + (op & 0xff) * 2 + 2;
2413#if PROPAGATE_CONSTANTS
2414 if (tmp < end_pc + MAX_LITERAL_OFFSET && literal_addr_count < MAX_LITERALS) {
2415 ADD_TO_ARRAY(literal_addr, literal_addr_count, tmp,);
2416 gconst_new(GET_Rn(), (u32)(int)(signed short)FETCH_OP(tmp));
2417 }
2418 else
2419#endif
2420 {
2421 tmp2 = rcache_get_tmp_arg(0);
2422 emith_move_r_imm(tmp2, tmp);
2423 tmp2 = emit_memhandler_read(1);
2424 tmp3 = rcache_get_reg(GET_Rn(), RC_GR_WRITE);
2425 emith_sext(tmp3, tmp2, 16);
2426 rcache_free_tmp(tmp2);
2427 }
2428 goto end_op;
2429
2430 /////////////////////////////////////////////
2431 case 0x0a:
2432 // BRA label 1010dddddddddddd
2433 DELAYED_OP;
2434 sr = rcache_get_reg(SHR_SR, RC_GR_RMW);
2435 tmp = ((signed int)(op << 20) >> 19);
2436 out_pc = pc + tmp + 2;
2437 if (tmp == (u32)-4)
2438 emith_clear_msb(sr, sr, 20); // burn cycles
2439 cycles++;
2440 break;
2441
2442 /////////////////////////////////////////////
2443 case 0x0b:
2444 // BSR label 1011dddddddddddd
2445 DELAYED_OP;
2446 emit_move_r_imm32(SHR_PR, pc + 2);
2447 tmp = ((signed int)(op << 20) >> 19);
2448 out_pc = pc + tmp + 2;
2449 cycles++;
2450 break;
2451
2452 /////////////////////////////////////////////
2453 case 0x0c:
2454 switch (op & 0x0f00)
2455 {
2456 case 0x0000: // MOV.B R0,@(disp,GBR) 11000000dddddddd
2457 case 0x0100: // MOV.W R0,@(disp,GBR) 11000001dddddddd
2458 case 0x0200: // MOV.L R0,@(disp,GBR) 11000010dddddddd
2459 rcache_clean();
2460 tmp = rcache_get_reg_arg(0, SHR_GBR);
2461 tmp2 = rcache_get_reg_arg(1, SHR_R0);
2462 tmp3 = (op & 0x300) >> 8;
2463 emith_add_r_imm(tmp, (op & 0xff) << tmp3);
2464 emit_memhandler_write(tmp3, pc, drcf.delayed_op);
2465 goto end_op;
2466 case 0x0400: // MOV.B @(disp,GBR),R0 11000100dddddddd
2467 case 0x0500: // MOV.W @(disp,GBR),R0 11000101dddddddd
2468 case 0x0600: // MOV.L @(disp,GBR),R0 11000110dddddddd
2469 tmp = (op & 0x300) >> 8;
2470 emit_memhandler_read_rr(SHR_R0, SHR_GBR, (op & 0xff) << tmp, tmp);
2471 goto end_op;
2472 case 0x0300: // TRAPA #imm 11000011iiiiiiii
2473 tmp = rcache_get_reg(SHR_SP, RC_GR_RMW);
2474 emith_sub_r_imm(tmp, 4*2);
2475 // push SR
2476 tmp = rcache_get_reg_arg(0, SHR_SP);
2477 emith_add_r_imm(tmp, 4);
2478 tmp = rcache_get_reg_arg(1, SHR_SR);
2479 emith_clear_msb(tmp, tmp, 22);
2480 emit_memhandler_write(2, pc, drcf.delayed_op);
2481 // push PC
2482 rcache_get_reg_arg(0, SHR_SP);
2483 tmp = rcache_get_tmp_arg(1);
2484 emith_move_r_imm(tmp, pc);
2485 emit_memhandler_write(2, pc, drcf.delayed_op);
2486 // obtain new PC
2487 emit_memhandler_read_rr(SHR_PC, SHR_VBR, (op & 0xff) * 4, 2);
2488 out_pc = (u32)-1;
2489 cycles += 7;
2490 goto end_op;
2491 case 0x0700: // MOVA @(disp,PC),R0 11000111dddddddd
2492 emit_move_r_imm32(SHR_R0, (pc + (op & 0xff) * 4 + 2) & ~3);
2493 goto end_op;
2494 case 0x0800: // TST #imm,R0 11001000iiiiiiii
2495 tmp = rcache_get_reg(SHR_R0, RC_GR_READ);
2496 sr = rcache_get_reg(SHR_SR, RC_GR_RMW);
2497 if (drcf.delayed_op)
2498 DELAY_SAVE_T(sr);
2499 emith_bic_r_imm(sr, T);
2500 emith_tst_r_imm(tmp, op & 0xff);
2501 emit_or_t_if_eq(sr);
2502 goto end_op;
2503 case 0x0900: // AND #imm,R0 11001001iiiiiiii
2504 tmp = rcache_get_reg(SHR_R0, RC_GR_RMW);
2505 emith_and_r_imm(tmp, op & 0xff);
2506 goto end_op;
2507 case 0x0a00: // XOR #imm,R0 11001010iiiiiiii
2508 tmp = rcache_get_reg(SHR_R0, RC_GR_RMW);
2509 emith_eor_r_imm(tmp, op & 0xff);
2510 goto end_op;
2511 case 0x0b00: // OR #imm,R0 11001011iiiiiiii
2512 tmp = rcache_get_reg(SHR_R0, RC_GR_RMW);
2513 emith_or_r_imm(tmp, op & 0xff);
2514 goto end_op;
2515 case 0x0c00: // TST.B #imm,@(R0,GBR) 11001100iiiiiiii
2516 tmp = emit_indirect_indexed_read(SHR_R0, SHR_GBR, 0);
2517 sr = rcache_get_reg(SHR_SR, RC_GR_RMW);
2518 if (drcf.delayed_op)
2519 DELAY_SAVE_T(sr);
2520 emith_bic_r_imm(sr, T);
2521 emith_tst_r_imm(tmp, op & 0xff);
2522 emit_or_t_if_eq(sr);
2523 rcache_free_tmp(tmp);
2524 cycles += 2;
2525 goto end_op;
2526 case 0x0d00: // AND.B #imm,@(R0,GBR) 11001101iiiiiiii
2527 tmp = emit_indirect_indexed_read(SHR_R0, SHR_GBR, 0);
2528 emith_and_r_imm(tmp, op & 0xff);
2529 goto end_rmw_op;
2530 case 0x0e00: // XOR.B #imm,@(R0,GBR) 11001110iiiiiiii
2531 tmp = emit_indirect_indexed_read(SHR_R0, SHR_GBR, 0);
2532 emith_eor_r_imm(tmp, op & 0xff);
2533 goto end_rmw_op;
2534 case 0x0f00: // OR.B #imm,@(R0,GBR) 11001111iiiiiiii
2535 tmp = emit_indirect_indexed_read(SHR_R0, SHR_GBR, 0);
2536 emith_or_r_imm(tmp, op & 0xff);
2537 end_rmw_op:
2538 tmp2 = rcache_get_tmp_arg(1);
2539 emith_move_r_r(tmp2, tmp);
2540 rcache_free_tmp(tmp);
2541 tmp3 = rcache_get_reg_arg(0, SHR_GBR);
2542 tmp4 = rcache_get_reg(SHR_R0, RC_GR_READ);
2543 emith_add_r_r(tmp3, tmp4);
2544 emit_memhandler_write(0, pc, drcf.delayed_op);
2545 cycles += 2;
2546 goto end_op;
2547 }
2548 goto default_;
2549
2550 /////////////////////////////////////////////
2551 case 0x0d:
2552 // MOV.L @(disp,PC),Rn 1101nnnndddddddd
2553 tmp = (pc + (op & 0xff) * 4 + 2) & ~3;
2554#if PROPAGATE_CONSTANTS
2555 if (tmp < end_pc + MAX_LITERAL_OFFSET && literal_addr_count < MAX_LITERALS) {
2556 ADD_TO_ARRAY(literal_addr, literal_addr_count, tmp,);
2557 gconst_new(GET_Rn(), FETCH32(tmp));
2558 }
2559 else
2560#endif
2561 {
2562 tmp2 = rcache_get_tmp_arg(0);
2563 emith_move_r_imm(tmp2, tmp);
2564 tmp2 = emit_memhandler_read(2);
2565 tmp3 = rcache_get_reg(GET_Rn(), RC_GR_WRITE);
2566 emith_move_r_r(tmp3, tmp2);
2567 rcache_free_tmp(tmp2);
2568 }
2569 goto end_op;
2570
2571 /////////////////////////////////////////////
2572 case 0x0e:
2573 // MOV #imm,Rn 1110nnnniiiiiiii
2574 emit_move_r_imm32(GET_Rn(), (u32)(signed int)(signed char)op);
2575 goto end_op;
2576
2577 default:
2578 default_:
2579 elprintf(EL_ANOMALY, "%csh2 drc: unhandled op %04x @ %08x",
2580 sh2->is_slave ? 's' : 'm', op, pc - 2);
2581 break;
2582 }
2583
2584end_op:
2585 rcache_unlock_all();
2586
2587 // conditional branch handling (with/without delay)
2588 if (pending_branch_cond != -1 && drcf.delayed_op != 2)
2589 {
2590 u32 target_pc = pending_branch_pc;
2591 int ctaken = drcf.delayed_op ? 1 : 2;
2592 void *target;
2593
2594 sr = rcache_get_reg(SHR_SR, RC_GR_RMW);
2595 FLUSH_CYCLES(sr);
2596 if (drcf.use_saved_t)
2597 emith_tst_r_imm(sr, T_save);
2598 else
2599 emith_tst_r_imm(sr, T);
2600
2601 // handle cycles
2602 emith_sub_r_imm_c(pending_branch_cond, sr, ctaken<<12);
2603 rcache_clean();
2604
2605#if LINK_BRANCHES
2606 if (find_in_array(branch_target_pc, branch_target_count, target_pc) >= 0) {
2607 // local branch
2608 // XXX: jumps back can be linked already
2609 branch_patch_pc[branch_patch_count] = target_pc;
2610 branch_patch_ptr[branch_patch_count] = tcache_ptr;
2611 emith_jump_cond_patchable(pending_branch_cond, tcache_ptr);
2612
2613 branch_patch_count++;
2614 if (branch_patch_count == MAX_LOCAL_BRANCHES) {
2615 dbg(1, "warning: too many local branches");
2616 break;
2617 }
2618 }
2619 else
2620#endif
2621 {
2622 // can't resolve branch locally, make a block exit
2623 emit_move_r_imm32(SHR_PC, target_pc);
2624 rcache_clean();
2625
2626 target = dr_prepare_ext_branch(target_pc, sh2, tcache_id);
2627 if (target == NULL)
2628 return NULL;
2629 emith_jump_cond_patchable(pending_branch_cond, target);
2630 }
2631
2632 drcf.use_saved_t = 0;
2633 pending_branch_cond = -1;
2634 }
2635
2636 // test irq?
2637 // XXX: delay slots..
2638 if (drcf.test_irq && drcf.delayed_op != 2) {
2639 if (!drcf.delayed_op)
2640 emit_move_r_imm32(SHR_PC, pc);
2641 sr = rcache_get_reg(SHR_SR, RC_GR_RMW);
2642 FLUSH_CYCLES(sr);
2643 rcache_flush();
2644 emith_call(sh2_drc_test_irq);
2645 drcf.test_irq = 0;
2646 }
2647
2648 do_host_disasm(tcache_id);
2649
2650 if (out_pc != 0 && drcf.delayed_op != 2)
2651 break;
2652 }
2653
2654 tmp = rcache_get_reg(SHR_SR, RC_GR_RMW);
2655 FLUSH_CYCLES(tmp);
2656 rcache_flush();
2657
2658 if (out_pc == (u32)-1) {
2659 // indirect jump -> back to dispatcher
2660 emith_jump(sh2_drc_dispatcher);
2661 } else {
2662 void *target;
2663 if (out_pc == 0)
2664 out_pc = pc;
2665 emit_move_r_imm32(SHR_PC, out_pc);
2666 rcache_flush();
2667
2668 target = dr_prepare_ext_branch(out_pc, sh2, tcache_id);
2669 if (target == NULL)
2670 return NULL;
2671 emith_jump_patchable(target);
2672 }
2673
2674 // link local branches
2675 for (i = 0; i < branch_patch_count; i++) {
2676 void *target;
2677 int t;
2678 t = find_in_array(branch_target_pc, branch_target_count, branch_patch_pc[i]);
2679 target = branch_target_ptr[t];
2680 if (target == NULL) {
2681 // flush pc and go back to dispatcher (this should no longer happen)
2682 dbg(1, "stray branch to %08x %p", branch_patch_pc[i], tcache_ptr);
2683 target = tcache_ptr;
2684 emit_move_r_imm32(SHR_PC, branch_patch_pc[i]);
2685 rcache_flush();
2686 emith_jump(sh2_drc_dispatcher);
2687 }
2688 emith_jump_patch(branch_patch_ptr[i], target);
2689 }
2690
2691 end_pc = pc;
2692
2693 // mark memory blocks as containing compiled code
2694 // override any overlay blocks as they become unreachable anyway
2695 if (tcache_id != 0 || (block->addr & 0xc7fc0000) == 0x06000000)
2696 {
2697 u16 *drc_ram_blk = NULL;
2698 u32 addr, mask = 0, shift = 0;
2699
2700 if (tcache_id != 0) {
2701 // data array, BIOS
2702 drc_ram_blk = Pico32xMem->drcblk_da[sh2->is_slave];
2703 shift = SH2_DRCBLK_DA_SHIFT;
2704 mask = 0xfff;
2705 }
2706 else if ((block->addr & 0xc7fc0000) == 0x06000000) {
2707 // SDRAM
2708 drc_ram_blk = Pico32xMem->drcblk_ram;
2709 shift = SH2_DRCBLK_RAM_SHIFT;
2710 mask = 0x3ffff;
2711 }
2712
2713 // mark recompiled insns
2714 drc_ram_blk[(base_pc & mask) >> shift] = 1;
2715 for (pc = base_pc; pc < end_pc; pc += 2)
2716 drc_ram_blk[(pc & mask) >> shift] = 1;
2717
2718 // mark literals
2719 for (i = 0; i < literal_addr_count; i++) {
2720 tmp = literal_addr[i];
2721 drc_ram_blk[(tmp & mask) >> shift] = 1;
2722 }
2723
2724 // add to invalidation lookup lists
2725 addr = base_pc & ~(ADDR_TO_BLOCK_PAGE - 1);
2726 for (; addr < end_pc + MAX_LITERAL_OFFSET; addr += ADDR_TO_BLOCK_PAGE) {
2727 i = (addr & mask) / ADDR_TO_BLOCK_PAGE;
2728 add_to_block_list(&inval_lookup[tcache_id][i], block);
2729 }
2730 }
2731
2732 tcache_ptrs[tcache_id] = tcache_ptr;
2733
2734 host_instructions_updated(block_entry_ptr, tcache_ptr);
2735
2736 do_host_disasm(tcache_id);
2737 dbg(2, " block #%d,%d tcache %d/%d, insns %d -> %d %.3f",
2738 tcache_id, blkid_main,
2739 tcache_ptr - tcache_bases[tcache_id], tcache_sizes[tcache_id],
2740 insns_compiled, host_insn_count, (float)host_insn_count / insns_compiled);
2741 if ((sh2->pc & 0xc6000000) == 0x02000000) // ROM
2742 dbg(2, " hash collisions %d/%d", hash_collisions, block_counts[tcache_id]);
2743/*
2744 printf("~~~\n");
2745 tcache_dsm_ptrs[tcache_id] = block_entry_ptr;
2746 do_host_disasm(tcache_id);
2747 printf("~~~\n");
2748*/
2749
2750#if (DRC_DEBUG & 4)
2751 fflush(stdout);
2752#endif
2753
2754 return block_entry_ptr;
2755}
2756
2757static void sh2_generate_utils(void)
2758{
2759 int arg0, arg1, arg2, sr, tmp;
2760 void *sh2_drc_write_end, *sh2_drc_write_slot_end;
2761
2762 sh2_drc_write32 = p32x_sh2_write32;
2763 sh2_drc_read8 = p32x_sh2_read8;
2764 sh2_drc_read16 = p32x_sh2_read16;
2765 sh2_drc_read32 = p32x_sh2_read32;
2766
2767 host_arg2reg(arg0, 0);
2768 host_arg2reg(arg1, 1);
2769 host_arg2reg(arg2, 2);
2770 emith_move_r_r(arg0, arg0); // nop
2771
2772 // sh2_drc_exit(void)
2773 sh2_drc_exit = (void *)tcache_ptr;
2774 emit_do_static_regs(1, arg2);
2775 emith_sh2_drc_exit();
2776
2777 // sh2_drc_dispatcher(void)
2778 sh2_drc_dispatcher = (void *)tcache_ptr;
2779 sr = rcache_get_reg(SHR_SR, RC_GR_READ);
2780 emith_cmp_r_imm(sr, 0);
2781 emith_jump_cond(DCOND_LT, sh2_drc_exit);
2782 rcache_invalidate();
2783 emith_ctx_read(arg0, SHR_PC * 4);
2784 emith_ctx_read(arg1, offsetof(SH2, is_slave));
2785 emith_add_r_r_imm(arg2, CONTEXT_REG, offsetof(SH2, drc_tmp));
2786 emith_call(dr_lookup_block);
2787 emit_block_entry();
2788 // lookup failed, call sh2_translate()
2789 emith_move_r_r(arg0, CONTEXT_REG);
2790 emith_ctx_read(arg1, offsetof(SH2, drc_tmp)); // tcache_id
2791 emith_call(sh2_translate);
2792 emit_block_entry();
2793 // sh2_translate() failed, flush cache and retry
2794 emith_ctx_read(arg0, offsetof(SH2, drc_tmp));
2795 emith_call(flush_tcache);
2796 emith_move_r_r(arg0, CONTEXT_REG);
2797 emith_ctx_read(arg1, offsetof(SH2, drc_tmp));
2798 emith_call(sh2_translate);
2799 emit_block_entry();
2800 // XXX: can't translate, fail
2801 emith_call(dr_failure);
2802
2803 // sh2_drc_test_irq(void)
2804 // assumes it's called from main function (may jump to dispatcher)
2805 sh2_drc_test_irq = (void *)tcache_ptr;
2806 emith_ctx_read(arg1, offsetof(SH2, pending_level));
2807 sr = rcache_get_reg(SHR_SR, RC_GR_READ);
2808 emith_lsr(arg0, sr, I_SHIFT);
2809 emith_and_r_imm(arg0, 0x0f);
2810 emith_cmp_r_r(arg1, arg0); // pending_level > ((sr >> 4) & 0x0f)?
2811 EMITH_SJMP_START(DCOND_GT);
2812 emith_ret_c(DCOND_LE); // nope, return
2813 EMITH_SJMP_END(DCOND_GT);
2814 // adjust SP
2815 tmp = rcache_get_reg(SHR_SP, RC_GR_RMW);
2816 emith_sub_r_imm(tmp, 4*2);
2817 rcache_clean();
2818 // push SR
2819 tmp = rcache_get_reg_arg(0, SHR_SP);
2820 emith_add_r_imm(tmp, 4);
2821 tmp = rcache_get_reg_arg(1, SHR_SR);
2822 emith_clear_msb(tmp, tmp, 22);
2823 emith_move_r_r(arg2, CONTEXT_REG);
2824 emith_call(p32x_sh2_write32); // XXX: use sh2_drc_write32?
2825 rcache_invalidate();
2826 // push PC
2827 rcache_get_reg_arg(0, SHR_SP);
2828 emith_ctx_read(arg1, SHR_PC * 4);
2829 emith_move_r_r(arg2, CONTEXT_REG);
2830 emith_call(p32x_sh2_write32);
2831 rcache_invalidate();
2832 // update I, cycles, do callback
2833 emith_ctx_read(arg1, offsetof(SH2, pending_level));
2834 sr = rcache_get_reg(SHR_SR, RC_GR_RMW);
2835 emith_bic_r_imm(sr, I);
2836 emith_or_r_r_lsl(sr, arg1, I_SHIFT);
2837 emith_sub_r_imm(sr, 13 << 12); // at least 13 cycles
2838 rcache_flush();
2839 emith_move_r_r(arg0, CONTEXT_REG);
2840 emith_call_ctx(offsetof(SH2, irq_callback)); // vector = sh2->irq_callback(sh2, level);
2841 // obtain new PC
2842 emith_lsl(arg0, arg0, 2);
2843 emith_ctx_read(arg1, SHR_VBR * 4);
2844 emith_add_r_r(arg0, arg1);
2845 emit_memhandler_read(2);
2846 emith_ctx_write(arg0, SHR_PC * 4);
2847#ifdef __i386__
2848 emith_add_r_imm(xSP, 4); // fix stack
2849#endif
2850 emith_jump(sh2_drc_dispatcher);
2851 rcache_invalidate();
2852
2853 // sh2_drc_entry(SH2 *sh2)
2854 sh2_drc_entry = (void *)tcache_ptr;
2855 emith_sh2_drc_entry();
2856 emith_move_r_r(CONTEXT_REG, arg0); // move ctx, arg0
2857 emit_do_static_regs(0, arg2);
2858 emith_call(sh2_drc_test_irq);
2859 emith_jump(sh2_drc_dispatcher);
2860
2861 // write-caused irq detection
2862 sh2_drc_write_end = tcache_ptr;
2863 emith_tst_r_r(arg0, arg0);
2864 EMITH_SJMP_START(DCOND_NE);
2865 emith_jump_ctx_c(DCOND_EQ, offsetof(SH2, drc_tmp)); // return
2866 EMITH_SJMP_END(DCOND_NE);
2867 emith_call(sh2_drc_test_irq);
2868 emith_jump_ctx(offsetof(SH2, drc_tmp));
2869
2870 // write-caused irq detection for writes in delay slot
2871 sh2_drc_write_slot_end = tcache_ptr;
2872 emith_tst_r_r(arg0, arg0);
2873 EMITH_SJMP_START(DCOND_NE);
2874 emith_jump_ctx_c(DCOND_EQ, offsetof(SH2, drc_tmp));
2875 EMITH_SJMP_END(DCOND_NE);
2876 // just burn cycles to get back to dispatcher after branch is handled
2877 sr = rcache_get_reg(SHR_SR, RC_GR_RMW);
2878 emith_ctx_write(sr, offsetof(SH2, irq_cycles));
2879 emith_clear_msb(sr, sr, 20); // clear cycles
2880 rcache_flush();
2881 emith_jump_ctx(offsetof(SH2, drc_tmp));
2882
2883 // sh2_drc_write8(u32 a, u32 d)
2884 sh2_drc_write8 = (void *)tcache_ptr;
2885 emith_ret_to_ctx(offsetof(SH2, drc_tmp));
2886 emith_ctx_read(arg2, offsetof(SH2, write8_tab));
2887 emith_sh2_wcall(arg0, arg2, sh2_drc_write_end);
2888
2889 // sh2_drc_write16(u32 a, u32 d)
2890 sh2_drc_write16 = (void *)tcache_ptr;
2891 emith_ret_to_ctx(offsetof(SH2, drc_tmp));
2892 emith_ctx_read(arg2, offsetof(SH2, write16_tab));
2893 emith_sh2_wcall(arg0, arg2, sh2_drc_write_end);
2894
2895 // sh2_drc_write8_slot(u32 a, u32 d)
2896 sh2_drc_write8_slot = (void *)tcache_ptr;
2897 emith_ret_to_ctx(offsetof(SH2, drc_tmp));
2898 emith_ctx_read(arg2, offsetof(SH2, write8_tab));
2899 emith_sh2_wcall(arg0, arg2, sh2_drc_write_slot_end);
2900
2901 // sh2_drc_write16_slot(u32 a, u32 d)
2902 sh2_drc_write16_slot = (void *)tcache_ptr;
2903 emith_ret_to_ctx(offsetof(SH2, drc_tmp));
2904 emith_ctx_read(arg2, offsetof(SH2, write16_tab));
2905 emith_sh2_wcall(arg0, arg2, sh2_drc_write_slot_end);
2906
2907#ifdef PDB_NET
2908 // debug
2909 #define MAKE_READ_WRAPPER(func) { \
2910 void *tmp = (void *)tcache_ptr; \
2911 emith_push_ret(); \
2912 emith_call(func); \
2913 emith_ctx_read(arg2, offsetof(SH2, pdb_io_csum[0])); \
2914 emith_addf_r_r(arg2, arg0); \
2915 emith_ctx_write(arg2, offsetof(SH2, pdb_io_csum[0])); \
2916 emith_ctx_read(arg2, offsetof(SH2, pdb_io_csum[1])); \
2917 emith_adc_r_imm(arg2, 0x01000000); \
2918 emith_ctx_write(arg2, offsetof(SH2, pdb_io_csum[1])); \
2919 emith_pop_and_ret(); \
2920 func = tmp; \
2921 }
2922 #define MAKE_WRITE_WRAPPER(func) { \
2923 void *tmp = (void *)tcache_ptr; \
2924 emith_ctx_read(arg2, offsetof(SH2, pdb_io_csum[0])); \
2925 emith_addf_r_r(arg2, arg1); \
2926 emith_ctx_write(arg2, offsetof(SH2, pdb_io_csum[0])); \
2927 emith_ctx_read(arg2, offsetof(SH2, pdb_io_csum[1])); \
2928 emith_adc_r_imm(arg2, 0x01000000); \
2929 emith_ctx_write(arg2, offsetof(SH2, pdb_io_csum[1])); \
2930 emith_move_r_r(arg2, CONTEXT_REG); \
2931 emith_jump(func); \
2932 func = tmp; \
2933 }
2934
2935 MAKE_READ_WRAPPER(sh2_drc_read8);
2936 MAKE_READ_WRAPPER(sh2_drc_read16);
2937 MAKE_READ_WRAPPER(sh2_drc_read32);
2938 MAKE_WRITE_WRAPPER(sh2_drc_write8);
2939 MAKE_WRITE_WRAPPER(sh2_drc_write8_slot);
2940 MAKE_WRITE_WRAPPER(sh2_drc_write16);
2941 MAKE_WRITE_WRAPPER(sh2_drc_write16_slot);
2942 MAKE_WRITE_WRAPPER(sh2_drc_write32);
2943#if (DRC_DEBUG & 4)
2944 host_dasm_new_symbol(sh2_drc_read8);
2945 host_dasm_new_symbol(sh2_drc_read16);
2946 host_dasm_new_symbol(sh2_drc_read32);
2947 host_dasm_new_symbol(sh2_drc_write32);
2948#endif
2949#endif
2950
2951 rcache_invalidate();
2952#if (DRC_DEBUG & 4)
2953 host_dasm_new_symbol(sh2_drc_entry);
2954 host_dasm_new_symbol(sh2_drc_dispatcher);
2955 host_dasm_new_symbol(sh2_drc_exit);
2956 host_dasm_new_symbol(sh2_drc_test_irq);
2957 host_dasm_new_symbol(sh2_drc_write_end);
2958 host_dasm_new_symbol(sh2_drc_write_slot_end);
2959 host_dasm_new_symbol(sh2_drc_write8);
2960 host_dasm_new_symbol(sh2_drc_write8_slot);
2961 host_dasm_new_symbol(sh2_drc_write16);
2962 host_dasm_new_symbol(sh2_drc_write16_slot);
2963#endif
2964}
2965
2966static void sh2_smc_rm_block_entry(struct block_desc *bd, int tcache_id, u32 ram_mask)
2967{
2968 void *tmp;
2969 u32 i, addr;
2970
2971 dbg(2, " killing entry %08x-%08x, blkid %d,%d",
2972 bd->addr, bd->end_addr, tcache_id, bd - block_tables[tcache_id]);
2973 if (bd->addr == 0 || bd->entry_count == 0) {
2974 dbg(1, " killing dead block!? %08x", bd->addr);
2975 return;
2976 }
2977
2978 // remove from inval_lookup
2979 addr = bd->addr & ~(ADDR_TO_BLOCK_PAGE - 1);
2980 for (; addr < bd->end_addr; addr += ADDR_TO_BLOCK_PAGE) {
2981 i = (addr & ram_mask) / ADDR_TO_BLOCK_PAGE;
2982 rm_from_block_list(&inval_lookup[tcache_id][i], bd);
2983 }
2984
2985 tmp = tcache_ptr;
2986
2987 // remove from hash table
2988 // XXX: maybe kill links somehow instead?
2989 for (i = 0; i < bd->entry_count; i++) {
2990 rm_from_hashlist(&bd->entryp[i], tcache_id);
2991
2992 // since we never reuse tcache space of dead blocks,
2993 // insert jump to dispatcher for blocks that are linked to this
2994 tcache_ptr = bd->entryp[i].tcache_ptr;
2995 emit_move_r_imm32(SHR_PC, bd->addr);
2996 rcache_flush();
2997 emith_jump(sh2_drc_dispatcher);
2998
2999 host_instructions_updated(bd->entryp[i].tcache_ptr, tcache_ptr);
3000 }
3001
3002 tcache_ptr = tmp;
3003
3004 bd->addr = bd->end_addr = 0;
3005 bd->entry_count = 0;
3006}
3007
3008static void sh2_smc_rm_block(u32 a, u16 *drc_ram_blk, int tcache_id, u32 shift, u32 mask)
3009{
3010 struct block_list **blist = NULL, *entry;
3011 u32 from = ~0, to = 0;
3012 struct block_desc *block;
3013
3014 blist = &inval_lookup[tcache_id][(a & mask) / ADDR_TO_BLOCK_PAGE];
3015 entry = *blist;
3016 while (entry != NULL) {
3017 block = entry->block;
3018 if (block->addr <= a && a < block->end_addr) {
3019 if (block->addr < from)
3020 from = block->addr;
3021 if (block->end_addr > to)
3022 to = block->end_addr;
3023
3024 sh2_smc_rm_block_entry(block, tcache_id, mask);
3025
3026 // entry lost, restart search
3027 entry = *blist;
3028 continue;
3029 }
3030 entry = entry->next;
3031 }
3032
3033 // clear entry points
3034 if (from < to) {
3035 u16 *p = drc_ram_blk + ((from & mask) >> shift);
3036 memset(p, 0, (to - from) >> (shift - 1));
3037 }
3038}
3039
3040void sh2_drc_wcheck_ram(unsigned int a, int val, int cpuid)
3041{
3042 dbg(2, "%csh2 smc check @%08x", cpuid ? 's' : 'm', a);
3043 sh2_smc_rm_block(a, Pico32xMem->drcblk_ram, 0, SH2_DRCBLK_RAM_SHIFT, 0x3ffff);
3044}
3045
3046void sh2_drc_wcheck_da(unsigned int a, int val, int cpuid)
3047{
3048 dbg(2, "%csh2 smc check @%08x", cpuid ? 's' : 'm', a);
3049 sh2_smc_rm_block(a, Pico32xMem->drcblk_da[cpuid],
3050 1 + cpuid, SH2_DRCBLK_DA_SHIFT, 0xfff);
3051}
3052
3053int sh2_execute(SH2 *sh2c, int cycles)
3054{
3055 int ret_cycles;
3056
3057 sh2c->cycles_timeslice = cycles;
3058
3059 // cycles are kept in SHR_SR unused bits (upper 20)
3060 // bit11 contains T saved for delay slot
3061 // others are usual SH2 flags
3062 sh2c->sr &= 0x3f3;
3063 sh2c->sr |= cycles << 12;
3064 sh2_drc_entry(sh2c);
3065
3066 // TODO: irq cycles
3067 ret_cycles = (signed int)sh2c->sr >> 12;
3068 if (ret_cycles > 0)
3069 dbg(1, "warning: drc returned with cycles: %d", ret_cycles);
3070
3071 return sh2c->cycles_timeslice - ret_cycles;
3072}
3073
3074#if (DRC_DEBUG & 2)
3075void block_stats(void)
3076{
3077 int c, b, i, total = 0;
3078
3079 printf("block stats:\n");
3080 for (b = 0; b < ARRAY_SIZE(block_tables); b++)
3081 for (i = 0; i < block_counts[b]; i++)
3082 if (block_tables[b][i].addr != 0)
3083 total += block_tables[b][i].refcount;
3084
3085 for (c = 0; c < 10; c++) {
3086 struct block_desc *blk, *maxb = NULL;
3087 int max = 0;
3088 for (b = 0; b < ARRAY_SIZE(block_tables); b++) {
3089 for (i = 0; i < block_counts[b]; i++) {
3090 blk = &block_tables[b][i];
3091 if (blk->addr != 0 && blk->refcount > max) {
3092 max = blk->refcount;
3093 maxb = blk;
3094 }
3095 }
3096 }
3097 if (maxb == NULL)
3098 break;
3099 printf("%08x %9d %2.3f%%\n", maxb->addr, maxb->refcount,
3100 (double)maxb->refcount / total * 100.0);
3101 maxb->refcount = 0;
3102 }
3103
3104 for (b = 0; b < ARRAY_SIZE(block_tables); b++)
3105 for (i = 0; i < block_counts[b]; i++)
3106 block_tables[b][i].refcount = 0;
3107}
3108#else
3109#define block_stats()
3110#endif
3111
3112void sh2_drc_flush_all(void)
3113{
3114 block_stats();
3115 flush_tcache(0);
3116 flush_tcache(1);
3117 flush_tcache(2);
3118}
3119
3120void sh2_drc_mem_setup(SH2 *sh2)
3121{
3122 // fill the convenience pointers
3123 sh2->p_bios = sh2->is_slave ? Pico32xMem->sh2_rom_s : Pico32xMem->sh2_rom_m;
3124 sh2->p_da = Pico32xMem->data_array[sh2->is_slave];
3125 sh2->p_sdram = Pico32xMem->sdram;
3126 sh2->p_rom = Pico.rom;
3127}
3128
3129int sh2_drc_init(SH2 *sh2)
3130{
3131 int i;
3132
3133 if (block_tables[0] == NULL)
3134 {
3135 for (i = 0; i < TCACHE_BUFFERS; i++) {
3136 block_tables[i] = calloc(block_max_counts[i], sizeof(*block_tables[0]));
3137 if (block_tables[i] == NULL)
3138 goto fail;
3139 // max 2 block links (exits) per block
3140 block_links[i] = calloc(block_link_max_counts[i], sizeof(*block_links[0]));
3141 if (block_links[i] == NULL)
3142 goto fail;
3143
3144 inval_lookup[i] = calloc(ram_sizes[i] / ADDR_TO_BLOCK_PAGE,
3145 sizeof(inval_lookup[0]));
3146 if (inval_lookup[i] == NULL)
3147 goto fail;
3148
3149 hash_tables[i] = calloc(hash_table_sizes[i], sizeof(*hash_tables[0]));
3150 if (hash_tables[i] == NULL)
3151 goto fail;
3152 }
3153 memset(block_counts, 0, sizeof(block_counts));
3154 memset(block_link_counts, 0, sizeof(block_link_counts));
3155
3156 drc_cmn_init();
3157 tcache_ptr = tcache;
3158 sh2_generate_utils();
3159 host_instructions_updated(tcache, tcache_ptr);
3160
3161 tcache_bases[0] = tcache_ptrs[0] = tcache_ptr;
3162 for (i = 1; i < ARRAY_SIZE(tcache_bases); i++)
3163 tcache_bases[i] = tcache_ptrs[i] = tcache_bases[i - 1] + tcache_sizes[i - 1];
3164
3165 // tmp
3166 PicoOpt |= POPT_DIS_VDP_FIFO;
3167
3168#if (DRC_DEBUG & 4)
3169 for (i = 0; i < ARRAY_SIZE(block_tables); i++)
3170 tcache_dsm_ptrs[i] = tcache_bases[i];
3171 // disasm the utils
3172 tcache_dsm_ptrs[0] = tcache;
3173 do_host_disasm(0);
3174#endif
3175#if (DRC_DEBUG & 1)
3176 hash_collisions = 0;
3177#endif
3178 }
3179
3180 return 0;
3181
3182fail:
3183 sh2_drc_finish(sh2);
3184 return -1;
3185}
3186
3187void sh2_drc_finish(SH2 *sh2)
3188{
3189 int i;
3190
3191 if (block_tables[0] == NULL)
3192 return;
3193
3194 sh2_drc_flush_all();
3195
3196 for (i = 0; i < TCACHE_BUFFERS; i++) {
3197#if (DRC_DEBUG & 4)
3198 printf("~~~ tcache %d\n", i);
3199 tcache_dsm_ptrs[i] = tcache_bases[i];
3200 tcache_ptr = tcache_ptrs[i];
3201 do_host_disasm(i);
3202#endif
3203
3204 if (block_tables[i] != NULL)
3205 free(block_tables[i]);
3206 block_tables[i] = NULL;
3207 if (block_links[i] == NULL)
3208 free(block_links[i]);
3209 block_links[i] = NULL;
3210
3211 if (inval_lookup[i] == NULL)
3212 free(inval_lookup[i]);
3213 inval_lookup[i] = NULL;
3214
3215 if (hash_tables[i] != NULL) {
3216 free(hash_tables[i]);
3217 hash_tables[i] = NULL;
3218 }
3219 }
3220
3221 drc_cmn_cleanup();
3222}
3223
3224#endif /* DRC_SH2 */
3225
3226static void *dr_get_pc_base(u32 pc, int is_slave)
3227{
3228 void *ret = NULL;
3229 u32 mask = 0;
3230
3231 if ((pc & ~0x7ff) == 0) {
3232 // BIOS
3233 ret = is_slave ? Pico32xMem->sh2_rom_s : Pico32xMem->sh2_rom_m;
3234 mask = 0x7ff;
3235 }
3236 else if ((pc & 0xfffff000) == 0xc0000000) {
3237 // data array
3238 ret = Pico32xMem->data_array[is_slave];
3239 mask = 0xfff;
3240 }
3241 else if ((pc & 0xc6000000) == 0x06000000) {
3242 // SDRAM
3243 ret = Pico32xMem->sdram;
3244 mask = 0x03ffff;
3245 }
3246 else if ((pc & 0xc6000000) == 0x02000000) {
3247 // ROM
3248 ret = Pico.rom;
3249 mask = 0x3fffff;
3250 }
3251
3252 if (ret == NULL)
3253 return (void *)-1; // NULL is valid value
3254
3255 return (char *)ret - (pc & ~mask);
3256}
3257
3258void scan_block(u32 base_pc, int is_slave, u8 *op_flags, u32 *end_pc)
3259{
3260 u16 *dr_pc_base;
3261 u32 pc, target, op;
3262 int cycles;
3263
3264 memset(op_flags, 0, BLOCK_INSN_LIMIT);
3265
3266 dr_pc_base = dr_get_pc_base(base_pc, is_slave);
3267
3268 for (cycles = 0, pc = base_pc; cycles < BLOCK_INSN_LIMIT-1; cycles++, pc += 2) {
3269 op = FETCH_OP(pc);
3270 if ((op & 0xf000) == 0xa000 || (op & 0xf000) == 0xb000) { // BRA, BSR
3271 signed int offs = ((signed int)(op << 20) >> 19);
3272 pc += 2;
3273 OP_FLAGS(pc) |= OF_DELAY_OP;
3274 target = pc + offs + 2;
3275 if (base_pc <= target && target < base_pc + BLOCK_INSN_LIMIT * 2)
3276 OP_FLAGS(target) |= OF_BTARGET;
3277 break;
3278 }
3279 if ((op & 0xf000) == 0) {
3280 op &= 0xff;
3281 if (op == 0x1b) // SLEEP
3282 break;
3283 // BRAF, BSRF, RTS, RTE
3284 if (op == 0x23 || op == 0x03 || op == 0x0b || op == 0x2b) {
3285 pc += 2;
3286 OP_FLAGS(pc) |= OF_DELAY_OP;
3287 break;
3288 }
3289 continue;
3290 }
3291 if ((op & 0xf0df) == 0x400b) { // JMP, JSR
3292 pc += 2;
3293 OP_FLAGS(pc) |= OF_DELAY_OP;
3294 break;
3295 }
3296 if ((op & 0xf900) == 0x8900) { // BT(S), BF(S)
3297 signed int offs = ((signed int)(op << 24) >> 23);
3298 if (op & 0x0400)
3299 OP_FLAGS(pc + 2) |= OF_DELAY_OP;
3300 target = pc + offs + 4;
3301 if (base_pc <= target && target < base_pc + BLOCK_INSN_LIMIT * 2)
3302 OP_FLAGS(target) |= OF_BTARGET;
3303 }
3304 if ((op & 0xff00) == 0xc300) // TRAPA
3305 break;
3306 }
3307 *end_pc = pc;
3308}
3309
3310// vim:shiftwidth=2:ts=2:expandtab