drc: add a timing hack for Internal Section
[pcsx_rearmed.git] / libpcsxcore / new_dynarec / new_dynarec.c
CommitLineData
57871462 1/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2 * Mupen64plus - new_dynarec.c *
20d507ba 3 * Copyright (C) 2009-2011 Ari64 *
57871462 4 * *
5 * This program is free software; you can redistribute it and/or modify *
6 * it under the terms of the GNU General Public License as published by *
7 * the Free Software Foundation; either version 2 of the License, or *
8 * (at your option) any later version. *
9 * *
10 * This program is distributed in the hope that it will be useful, *
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
13 * GNU General Public License for more details. *
14 * *
15 * You should have received a copy of the GNU General Public License *
16 * along with this program; if not, write to the *
17 * Free Software Foundation, Inc., *
18 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
19 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
20
21#include <stdlib.h>
22#include <stdint.h> //include for uint64_t
23#include <assert.h>
d848b60a 24#include <errno.h>
4600ba03 25#include <sys/mman.h>
d148d265 26#ifdef __MACH__
27#include <libkern/OSCacheControl.h>
28#endif
1e212a25 29#ifdef _3DS
30#include <3ds_utils.h>
31#endif
32#ifdef VITA
33#include <psp2/kernel/sysmem.h>
34static int sceBlock;
35#endif
57871462 36
d148d265 37#include "new_dynarec_config.h"
3968e69e 38#include "../psxhle.h"
39#include "../psxinterpreter.h"
3d624f89 40#include "emu_if.h" //emulator interface
57871462 41
d1e4ebd9 42#define noinline __attribute__((noinline,noclone))
b14b6a8f 43#ifndef ARRAY_SIZE
44#define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0]))
45#endif
46
4600ba03 47//#define DISASM
48//#define assem_debug printf
49//#define inv_debug printf
50#define assem_debug(...)
51#define inv_debug(...)
57871462 52
53#ifdef __i386__
54#include "assem_x86.h"
55#endif
56#ifdef __x86_64__
57#include "assem_x64.h"
58#endif
59#ifdef __arm__
60#include "assem_arm.h"
61#endif
be516ebe 62#ifdef __aarch64__
63#include "assem_arm64.h"
64#endif
57871462 65
66#define MAXBLOCK 4096
67#define MAX_OUTPUT_BLOCK_SIZE 262144
2573466a 68
2a014d73 69struct ndrc_mem
70{
71 u_char translation_cache[1 << TARGET_SIZE_2];
72 struct
73 {
74 struct tramp_insns ops[2048 / sizeof(struct tramp_insns)];
75 const void *f[2048 / sizeof(void *)];
76 } tramp;
77};
78
79#ifdef BASE_ADDR_DYNAMIC
80static struct ndrc_mem *ndrc;
81#else
82static struct ndrc_mem ndrc_ __attribute__((aligned(4096)));
83static struct ndrc_mem *ndrc = &ndrc_;
84#endif
85
b14b6a8f 86// stubs
87enum stub_type {
88 CC_STUB = 1,
89 FP_STUB = 2,
90 LOADB_STUB = 3,
91 LOADH_STUB = 4,
92 LOADW_STUB = 5,
93 LOADD_STUB = 6,
94 LOADBU_STUB = 7,
95 LOADHU_STUB = 8,
96 STOREB_STUB = 9,
97 STOREH_STUB = 10,
98 STOREW_STUB = 11,
99 STORED_STUB = 12,
100 STORELR_STUB = 13,
101 INVCODE_STUB = 14,
102};
103
57871462 104struct regstat
105{
106 signed char regmap_entry[HOST_REGS];
107 signed char regmap[HOST_REGS];
57871462 108 uint64_t wasdirty;
109 uint64_t dirty;
110 uint64_t u;
57871462 111 u_int wasconst;
112 u_int isconst;
8575a877 113 u_int loadedconst; // host regs that have constants loaded
114 u_int waswritten; // MIPS regs that were used as store base before
57871462 115};
116
de5a60c3 117// note: asm depends on this layout
57871462 118struct ll_entry
119{
120 u_int vaddr;
de5a60c3 121 u_int reg_sv_flags;
57871462 122 void *addr;
123 struct ll_entry *next;
124};
125
df4dc2b1 126struct ht_entry
127{
128 u_int vaddr[2];
129 void *tcaddr[2];
130};
131
b14b6a8f 132struct code_stub
133{
134 enum stub_type type;
135 void *addr;
136 void *retaddr;
137 u_int a;
138 uintptr_t b;
139 uintptr_t c;
140 u_int d;
141 u_int e;
142};
143
643aeae3 144struct link_entry
145{
146 void *addr;
147 u_int target;
148 u_int ext;
149};
150
e2b5e7aa 151 // used by asm:
152 u_char *out;
df4dc2b1 153 struct ht_entry hash_table[65536] __attribute__((aligned(16)));
e2b5e7aa 154 struct ll_entry *jump_in[4096] __attribute__((aligned(16)));
155 struct ll_entry *jump_dirty[4096];
156
157 static struct ll_entry *jump_out[4096];
158 static u_int start;
159 static u_int *source;
160 static char insn[MAXBLOCK][10];
161 static u_char itype[MAXBLOCK];
162 static u_char opcode[MAXBLOCK];
163 static u_char opcode2[MAXBLOCK];
164 static u_char bt[MAXBLOCK];
165 static u_char rs1[MAXBLOCK];
166 static u_char rs2[MAXBLOCK];
167 static u_char rt1[MAXBLOCK];
168 static u_char rt2[MAXBLOCK];
e2b5e7aa 169 static u_char dep1[MAXBLOCK];
170 static u_char dep2[MAXBLOCK];
171 static u_char lt1[MAXBLOCK];
bedfea38 172 static uint64_t gte_rs[MAXBLOCK]; // gte: 32 data and 32 ctl regs
173 static uint64_t gte_rt[MAXBLOCK];
174 static uint64_t gte_unneeded[MAXBLOCK];
ffb0b9e0 175 static u_int smrv[32]; // speculated MIPS register values
176 static u_int smrv_strong; // mask or regs that are likely to have correct values
177 static u_int smrv_weak; // same, but somewhat less likely
178 static u_int smrv_strong_next; // same, but after current insn executes
179 static u_int smrv_weak_next;
e2b5e7aa 180 static int imm[MAXBLOCK];
181 static u_int ba[MAXBLOCK];
182 static char likely[MAXBLOCK];
183 static char is_ds[MAXBLOCK];
184 static char ooo[MAXBLOCK];
185 static uint64_t unneeded_reg[MAXBLOCK];
e2b5e7aa 186 static uint64_t branch_unneeded_reg[MAXBLOCK];
afec9d44 187 static signed char regmap_pre[MAXBLOCK][HOST_REGS]; // pre-instruction i?
40fca85b 188 // contains 'real' consts at [i] insn, but may differ from what's actually
189 // loaded in host reg as 'final' value is always loaded, see get_final_value()
190 static uint32_t current_constmap[HOST_REGS];
191 static uint32_t constmap[MAXBLOCK][HOST_REGS];
956f3129 192 static struct regstat regs[MAXBLOCK];
193 static struct regstat branch_regs[MAXBLOCK];
e2b5e7aa 194 static signed char minimum_free_regs[MAXBLOCK];
195 static u_int needed_reg[MAXBLOCK];
196 static u_int wont_dirty[MAXBLOCK];
197 static u_int will_dirty[MAXBLOCK];
198 static int ccadj[MAXBLOCK];
199 static int slen;
df4dc2b1 200 static void *instr_addr[MAXBLOCK];
643aeae3 201 static struct link_entry link_addr[MAXBLOCK];
e2b5e7aa 202 static int linkcount;
b14b6a8f 203 static struct code_stub stubs[MAXBLOCK*3];
e2b5e7aa 204 static int stubcount;
205 static u_int literals[1024][2];
206 static int literalcount;
207 static int is_delayslot;
e2b5e7aa 208 static char shadow[1048576] __attribute__((aligned(16)));
209 static void *copy;
210 static int expirep;
211 static u_int stop_after_jal;
a327ad27 212#ifndef RAM_FIXED
01d26796 213 static uintptr_t ram_offset;
a327ad27 214#else
01d26796 215 static const uintptr_t ram_offset=0;
a327ad27 216#endif
e2b5e7aa 217
218 int new_dynarec_hacks;
219 int new_dynarec_did_compile;
687b4580 220
221 extern int cycle_count; // ... until end of the timeslice, counts -N -> 0
222 extern int last_count; // last absolute target, often = next_interupt
223 extern int pcaddr;
224 extern int pending_exception;
225 extern int branch_target;
d1e4ebd9 226 extern uintptr_t mini_ht[32][2];
57871462 227 extern u_char restore_candidate[512];
57871462 228
229 /* registers that may be allocated */
230 /* 1-31 gpr */
7c3a5182 231#define LOREG 32 // lo
232#define HIREG 33 // hi
00fa9369 233//#define FSREG 34 // FPU status (FCSR)
57871462 234#define CSREG 35 // Coprocessor status
235#define CCREG 36 // Cycle count
236#define INVCP 37 // Pointer to invalid_code
1edfcc68 237//#define MMREG 38 // Pointer to memory_map
9c45ca93 238//#define ROREG 39 // ram offset (if rdram!=0x80000000)
619e5ded 239#define TEMPREG 40
240#define FTEMP 40 // FPU temporary register
241#define PTEMP 41 // Prefetch temporary register
1edfcc68 242//#define TLREG 42 // TLB mapping offset
619e5ded 243#define RHASH 43 // Return address hash
244#define RHTBL 44 // Return address hash table address
245#define RTEMP 45 // JR/JALR address register
246#define MAXREG 45
247#define AGEN1 46 // Address generation temporary register
1edfcc68 248//#define AGEN2 47 // Address generation temporary register
249//#define MGEN1 48 // Maptable address generation temporary register
250//#define MGEN2 49 // Maptable address generation temporary register
619e5ded 251#define BTREG 50 // Branch target temporary register
57871462 252
253 /* instruction types */
254#define NOP 0 // No operation
255#define LOAD 1 // Load
256#define STORE 2 // Store
257#define LOADLR 3 // Unaligned load
258#define STORELR 4 // Unaligned store
9f51b4b9 259#define MOV 5 // Move
57871462 260#define ALU 6 // Arithmetic/logic
261#define MULTDIV 7 // Multiply/divide
262#define SHIFT 8 // Shift by register
263#define SHIFTIMM 9// Shift by immediate
264#define IMM16 10 // 16-bit immediate
265#define RJUMP 11 // Unconditional jump to register
266#define UJUMP 12 // Unconditional jump
267#define CJUMP 13 // Conditional branch (BEQ/BNE/BGTZ/BLEZ)
268#define SJUMP 14 // Conditional branch (regimm format)
269#define COP0 15 // Coprocessor 0
270#define COP1 16 // Coprocessor 1
271#define C1LS 17 // Coprocessor 1 load/store
ad49de89 272//#define FJUMP 18 // Conditional branch (floating point)
00fa9369 273//#define FLOAT 19 // Floating point unit
274//#define FCONV 20 // Convert integer to float
275//#define FCOMP 21 // Floating point compare (sets FSREG)
57871462 276#define SYSCALL 22// SYSCALL
277#define OTHER 23 // Other
278#define SPAN 24 // Branch/delay slot spans 2 pages
279#define NI 25 // Not implemented
7139f3c8 280#define HLECALL 26// PCSX fake opcodes for HLE
b9b61529 281#define COP2 27 // Coprocessor 2 move
282#define C2LS 28 // Coprocessor 2 load/store
283#define C2OP 29 // Coprocessor 2 operation
1e973cb0 284#define INTCALL 30// Call interpreter to handle rare corner cases
57871462 285
57871462 286 /* branch codes */
287#define TAKEN 1
288#define NOTTAKEN 2
289#define NULLDS 3
290
7c3a5182 291#define DJT_1 (void *)1l // no function, just a label in assem_debug log
292#define DJT_2 (void *)2l
293
57871462 294// asm linkage
3968e69e 295int new_recompile_block(u_int addr);
57871462 296void *get_addr_ht(u_int vaddr);
297void invalidate_block(u_int block);
298void invalidate_addr(u_int addr);
299void remove_hash(int vaddr);
57871462 300void dyna_linker();
301void dyna_linker_ds();
302void verify_code();
57871462 303void verify_code_ds();
304void cc_interrupt();
305void fp_exception();
306void fp_exception_ds();
3968e69e 307void jump_to_new_pc();
7139f3c8 308void new_dyna_leave();
57871462 309
57871462 310// Needed by assembler
ad49de89 311static void wb_register(signed char r,signed char regmap[],uint64_t dirty);
312static void wb_dirtys(signed char i_regmap[],uint64_t i_dirty);
313static void wb_needed_dirtys(signed char i_regmap[],uint64_t i_dirty,int addr);
e2b5e7aa 314static void load_all_regs(signed char i_regmap[]);
315static void load_needed_regs(signed char i_regmap[],signed char next_regmap[]);
316static void load_regs_entry(int t);
ad49de89 317static void load_all_consts(signed char regmap[],u_int dirty,int i);
e2b5e7aa 318
3968e69e 319static int verify_dirty(const u_int *ptr);
e2b5e7aa 320static int get_final_value(int hr, int i, int *value);
b14b6a8f 321static void add_stub(enum stub_type type, void *addr, void *retaddr,
322 u_int a, uintptr_t b, uintptr_t c, u_int d, u_int e);
323static void add_stub_r(enum stub_type type, void *addr, void *retaddr,
324 int i, int addr_reg, struct regstat *i_regs, int ccadj, u_int reglist);
643aeae3 325static void add_to_linker(void *addr, u_int target, int ext);
8062d65a 326static void *emit_fastpath_cmp_jump(int i,int addr,int *addr_reg_override);
687b4580 327static void *get_direct_memhandler(void *table, u_int addr,
328 enum stub_type type, uintptr_t *addr_host);
329static void pass_args(int a0, int a1);
2a014d73 330static void emit_far_jump(const void *f);
331static void emit_far_call(const void *f);
57871462 332
d148d265 333static void mprotect_w_x(void *start, void *end, int is_x)
334{
335#ifdef NO_WRITE_EXEC
1e212a25 336 #if defined(VITA)
337 // *Open* enables write on all memory that was
338 // allocated by sceKernelAllocMemBlockForVM()?
339 if (is_x)
340 sceKernelCloseVMDomain();
341 else
342 sceKernelOpenVMDomain();
343 #else
d148d265 344 u_long mstart = (u_long)start & ~4095ul;
345 u_long mend = (u_long)end;
346 if (mprotect((void *)mstart, mend - mstart,
347 PROT_READ | (is_x ? PROT_EXEC : PROT_WRITE)) != 0)
348 SysPrintf("mprotect(%c) failed: %s\n", is_x ? 'x' : 'w', strerror(errno));
1e212a25 349 #endif
d148d265 350#endif
351}
352
353static void start_tcache_write(void *start, void *end)
354{
355 mprotect_w_x(start, end, 0);
356}
357
358static void end_tcache_write(void *start, void *end)
359{
919981d0 360#if defined(__arm__) || defined(__aarch64__)
d148d265 361 size_t len = (char *)end - (char *)start;
362 #if defined(__BLACKBERRY_QNX__)
363 msync(start, len, MS_SYNC | MS_CACHE_ONLY | MS_INVALIDATE_ICACHE);
364 #elif defined(__MACH__)
365 sys_cache_control(kCacheFunctionPrepareForExecution, start, len);
366 #elif defined(VITA)
1e212a25 367 sceKernelSyncVMDomain(sceBlock, start, len);
368 #elif defined(_3DS)
369 ctr_flush_invalidate_cache();
919981d0 370 #elif defined(__aarch64__)
371 // as of 2021, __clear_cache() is still broken on arm64
372 // so here is a custom one :(
373 clear_cache_arm64(start, end);
d148d265 374 #else
375 __clear_cache(start, end);
376 #endif
377 (void)len;
378#endif
379
380 mprotect_w_x(start, end, 1);
381}
382
383static void *start_block(void)
384{
385 u_char *end = out + MAX_OUTPUT_BLOCK_SIZE;
2a014d73 386 if (end > ndrc->translation_cache + sizeof(ndrc->translation_cache))
387 end = ndrc->translation_cache + sizeof(ndrc->translation_cache);
d148d265 388 start_tcache_write(out, end);
389 return out;
390}
391
392static void end_block(void *start)
393{
394 end_tcache_write(start, out);
395}
396
919981d0 397// also takes care of w^x mappings when patching code
398static u_int needs_clear_cache[1<<(TARGET_SIZE_2-17)];
399
400static void mark_clear_cache(void *target)
401{
402 uintptr_t offset = (u_char *)target - ndrc->translation_cache;
403 u_int mask = 1u << ((offset >> 12) & 31);
404 if (!(needs_clear_cache[offset >> 17] & mask)) {
405 char *start = (char *)((uintptr_t)target & ~4095l);
406 start_tcache_write(start, start + 4095);
407 needs_clear_cache[offset >> 17] |= mask;
408 }
409}
410
411// Clearing the cache is rather slow on ARM Linux, so mark the areas
412// that need to be cleared, and then only clear these areas once.
413static void do_clear_cache(void)
414{
415 int i, j;
416 for (i = 0; i < (1<<(TARGET_SIZE_2-17)); i++)
417 {
418 u_int bitmap = needs_clear_cache[i];
419 if (!bitmap)
420 continue;
421 for (j = 0; j < 32; j++)
422 {
423 u_char *start, *end;
424 if (!(bitmap & (1<<j)))
425 continue;
426
427 start = ndrc->translation_cache + i*131072 + j*4096;
428 end = start + 4095;
429 for (j++; j < 32; j++) {
430 if (!(bitmap & (1<<j)))
431 break;
432 end += 4096;
433 }
434 end_tcache_write(start, end);
435 }
436 needs_clear_cache[i] = 0;
437 }
438}
439
57871462 440//#define DEBUG_CYCLE_COUNT 1
441
b6e87b2b 442#define NO_CYCLE_PENALTY_THR 12
443
4e9dcd7f 444int cycle_multiplier; // 100 for 1.0
a3203cf4 445int cycle_multiplier_override;
4e9dcd7f 446
447static int CLOCK_ADJUST(int x)
448{
a3203cf4 449 int m = cycle_multiplier_override
450 ? cycle_multiplier_override : cycle_multiplier;
4e9dcd7f 451 int s=(x>>31)|1;
a3203cf4 452 return (x * m + s * 50) / 100;
4e9dcd7f 453}
454
94d23bb9 455static u_int get_page(u_int vaddr)
57871462 456{
0ce47d46 457 u_int page=vaddr&~0xe0000000;
458 if (page < 0x1000000)
459 page &= ~0x0e00000; // RAM mirrors
460 page>>=12;
57871462 461 if(page>2048) page=2048+(page&2047);
94d23bb9 462 return page;
463}
464
d25604ca 465// no virtual mem in PCSX
466static u_int get_vpage(u_int vaddr)
467{
468 return get_page(vaddr);
469}
94d23bb9 470
df4dc2b1 471static struct ht_entry *hash_table_get(u_int vaddr)
472{
473 return &hash_table[((vaddr>>16)^vaddr)&0xFFFF];
474}
475
476static void hash_table_add(struct ht_entry *ht_bin, u_int vaddr, void *tcaddr)
477{
478 ht_bin->vaddr[1] = ht_bin->vaddr[0];
479 ht_bin->tcaddr[1] = ht_bin->tcaddr[0];
480 ht_bin->vaddr[0] = vaddr;
481 ht_bin->tcaddr[0] = tcaddr;
482}
483
484// some messy ari64's code, seems to rely on unsigned 32bit overflow
485static int doesnt_expire_soon(void *tcaddr)
486{
487 u_int diff = (u_int)((u_char *)tcaddr - out) << (32-TARGET_SIZE_2);
488 return diff > (u_int)(0x60000000 + (MAX_OUTPUT_BLOCK_SIZE << (32-TARGET_SIZE_2)));
489}
490
94d23bb9 491// Get address from virtual address
492// This is called from the recompiled JR/JALR instructions
d1e4ebd9 493void noinline *get_addr(u_int vaddr)
94d23bb9 494{
495 u_int page=get_page(vaddr);
496 u_int vpage=get_vpage(vaddr);
57871462 497 struct ll_entry *head;
498 //printf("TRACE: count=%d next=%d (get_addr %x,page %d)\n",Count,next_interupt,vaddr,page);
499 head=jump_in[page];
500 while(head!=NULL) {
de5a60c3 501 if(head->vaddr==vaddr) {
643aeae3 502 //printf("TRACE: count=%d next=%d (get_addr match %x: %p)\n",Count,next_interupt,vaddr,head->addr);
df4dc2b1 503 hash_table_add(hash_table_get(vaddr), vaddr, head->addr);
57871462 504 return head->addr;
505 }
506 head=head->next;
507 }
508 head=jump_dirty[vpage];
509 while(head!=NULL) {
de5a60c3 510 if(head->vaddr==vaddr) {
643aeae3 511 //printf("TRACE: count=%d next=%d (get_addr match dirty %x: %p)\n",Count,next_interupt,vaddr,head->addr);
57871462 512 // Don't restore blocks which are about to expire from the cache
df4dc2b1 513 if (doesnt_expire_soon(head->addr))
514 if (verify_dirty(head->addr)) {
57871462 515 //printf("restore candidate: %x (%d) d=%d\n",vaddr,page,invalid_code[vaddr>>12]);
516 invalid_code[vaddr>>12]=0;
9be4ba64 517 inv_code_start=inv_code_end=~0;
57871462 518 if(vpage<2048) {
57871462 519 restore_candidate[vpage>>3]|=1<<(vpage&7);
520 }
521 else restore_candidate[page>>3]|=1<<(page&7);
df4dc2b1 522 struct ht_entry *ht_bin = hash_table_get(vaddr);
523 if (ht_bin->vaddr[0] == vaddr)
524 ht_bin->tcaddr[0] = head->addr; // Replace existing entry
57871462 525 else
df4dc2b1 526 hash_table_add(ht_bin, vaddr, head->addr);
527
57871462 528 return head->addr;
529 }
530 }
531 head=head->next;
532 }
533 //printf("TRACE: count=%d next=%d (get_addr no-match %x)\n",Count,next_interupt,vaddr);
534 int r=new_recompile_block(vaddr);
535 if(r==0) return get_addr(vaddr);
536 // Execute in unmapped page, generate pagefault execption
537 Status|=2;
538 Cause=(vaddr<<31)|0x8;
539 EPC=(vaddr&1)?vaddr-5:vaddr;
540 BadVAddr=(vaddr&~1);
541 Context=(Context&0xFF80000F)|((BadVAddr>>9)&0x007FFFF0);
542 EntryHi=BadVAddr&0xFFFFE000;
543 return get_addr_ht(0x80000000);
544}
545// Look up address in hash table first
546void *get_addr_ht(u_int vaddr)
547{
548 //printf("TRACE: count=%d next=%d (get_addr_ht %x)\n",Count,next_interupt,vaddr);
df4dc2b1 549 const struct ht_entry *ht_bin = hash_table_get(vaddr);
550 if (ht_bin->vaddr[0] == vaddr) return ht_bin->tcaddr[0];
551 if (ht_bin->vaddr[1] == vaddr) return ht_bin->tcaddr[1];
57871462 552 return get_addr(vaddr);
553}
554
57871462 555void clear_all_regs(signed char regmap[])
556{
557 int hr;
558 for (hr=0;hr<HOST_REGS;hr++) regmap[hr]=-1;
559}
560
d1e4ebd9 561static signed char get_reg(const signed char regmap[],int r)
57871462 562{
563 int hr;
564 for (hr=0;hr<HOST_REGS;hr++) if(hr!=EXCLUDE_REG&&regmap[hr]==r) return hr;
565 return -1;
566}
567
568// Find a register that is available for two consecutive cycles
d1e4ebd9 569static signed char get_reg2(signed char regmap1[], const signed char regmap2[], int r)
57871462 570{
571 int hr;
572 for (hr=0;hr<HOST_REGS;hr++) if(hr!=EXCLUDE_REG&&regmap1[hr]==r&&regmap2[hr]==r) return hr;
573 return -1;
574}
575
576int count_free_regs(signed char regmap[])
577{
578 int count=0;
579 int hr;
580 for(hr=0;hr<HOST_REGS;hr++)
581 {
582 if(hr!=EXCLUDE_REG) {
583 if(regmap[hr]<0) count++;
584 }
585 }
586 return count;
587}
588
589void dirty_reg(struct regstat *cur,signed char reg)
590{
591 int hr;
592 if(!reg) return;
593 for (hr=0;hr<HOST_REGS;hr++) {
594 if((cur->regmap[hr]&63)==reg) {
595 cur->dirty|=1<<hr;
596 }
597 }
598}
599
40fca85b 600static void set_const(struct regstat *cur, signed char reg, uint32_t value)
57871462 601{
602 int hr;
603 if(!reg) return;
604 for (hr=0;hr<HOST_REGS;hr++) {
605 if(cur->regmap[hr]==reg) {
606 cur->isconst|=1<<hr;
956f3129 607 current_constmap[hr]=value;
57871462 608 }
57871462 609 }
610}
611
40fca85b 612static void clear_const(struct regstat *cur, signed char reg)
57871462 613{
614 int hr;
615 if(!reg) return;
616 for (hr=0;hr<HOST_REGS;hr++) {
617 if((cur->regmap[hr]&63)==reg) {
618 cur->isconst&=~(1<<hr);
619 }
620 }
621}
622
40fca85b 623static int is_const(struct regstat *cur, signed char reg)
57871462 624{
625 int hr;
79c75f1b 626 if(reg<0) return 0;
57871462 627 if(!reg) return 1;
628 for (hr=0;hr<HOST_REGS;hr++) {
629 if((cur->regmap[hr]&63)==reg) {
630 return (cur->isconst>>hr)&1;
631 }
632 }
633 return 0;
634}
40fca85b 635
636static uint32_t get_const(struct regstat *cur, signed char reg)
57871462 637{
638 int hr;
639 if(!reg) return 0;
640 for (hr=0;hr<HOST_REGS;hr++) {
641 if(cur->regmap[hr]==reg) {
956f3129 642 return current_constmap[hr];
57871462 643 }
644 }
c43b5311 645 SysPrintf("Unknown constant in r%d\n",reg);
7c3a5182 646 abort();
57871462 647}
648
649// Least soon needed registers
650// Look at the next ten instructions and see which registers
651// will be used. Try not to reallocate these.
652void lsn(u_char hsn[], int i, int *preferred_reg)
653{
654 int j;
655 int b=-1;
656 for(j=0;j<9;j++)
657 {
658 if(i+j>=slen) {
659 j=slen-i-1;
660 break;
661 }
662 if(itype[i+j]==UJUMP||itype[i+j]==RJUMP||(source[i+j]>>16)==0x1000)
663 {
664 // Don't go past an unconditonal jump
665 j++;
666 break;
667 }
668 }
669 for(;j>=0;j--)
670 {
671 if(rs1[i+j]) hsn[rs1[i+j]]=j;
672 if(rs2[i+j]) hsn[rs2[i+j]]=j;
673 if(rt1[i+j]) hsn[rt1[i+j]]=j;
674 if(rt2[i+j]) hsn[rt2[i+j]]=j;
675 if(itype[i+j]==STORE || itype[i+j]==STORELR) {
676 // Stores can allocate zero
677 hsn[rs1[i+j]]=j;
678 hsn[rs2[i+j]]=j;
679 }
680 // On some architectures stores need invc_ptr
681 #if defined(HOST_IMM8)
b9b61529 682 if(itype[i+j]==STORE || itype[i+j]==STORELR || (opcode[i+j]&0x3b)==0x39 || (opcode[i+j]&0x3b)==0x3a) {
57871462 683 hsn[INVCP]=j;
684 }
685 #endif
ad49de89 686 if(i+j>=0&&(itype[i+j]==UJUMP||itype[i+j]==CJUMP||itype[i+j]==SJUMP))
57871462 687 {
688 hsn[CCREG]=j;
689 b=j;
690 }
691 }
692 if(b>=0)
693 {
694 if(ba[i+b]>=start && ba[i+b]<(start+slen*4))
695 {
696 // Follow first branch
697 int t=(ba[i+b]-start)>>2;
698 j=7-b;if(t+j>=slen) j=slen-t-1;
699 for(;j>=0;j--)
700 {
701 if(rs1[t+j]) if(hsn[rs1[t+j]]>j+b+2) hsn[rs1[t+j]]=j+b+2;
702 if(rs2[t+j]) if(hsn[rs2[t+j]]>j+b+2) hsn[rs2[t+j]]=j+b+2;
703 //if(rt1[t+j]) if(hsn[rt1[t+j]]>j+b+2) hsn[rt1[t+j]]=j+b+2;
704 //if(rt2[t+j]) if(hsn[rt2[t+j]]>j+b+2) hsn[rt2[t+j]]=j+b+2;
705 }
706 }
707 // TODO: preferred register based on backward branch
708 }
709 // Delay slot should preferably not overwrite branch conditions or cycle count
ad49de89 710 if(i>0&&(itype[i-1]==RJUMP||itype[i-1]==UJUMP||itype[i-1]==CJUMP||itype[i-1]==SJUMP)) {
57871462 711 if(rs1[i-1]) if(hsn[rs1[i-1]]>1) hsn[rs1[i-1]]=1;
712 if(rs2[i-1]) if(hsn[rs2[i-1]]>1) hsn[rs2[i-1]]=1;
713 hsn[CCREG]=1;
714 // ...or hash tables
715 hsn[RHASH]=1;
716 hsn[RHTBL]=1;
717 }
718 // Coprocessor load/store needs FTEMP, even if not declared
b9b61529 719 if(itype[i]==C1LS||itype[i]==C2LS) {
57871462 720 hsn[FTEMP]=0;
721 }
722 // Load L/R also uses FTEMP as a temporary register
723 if(itype[i]==LOADLR) {
724 hsn[FTEMP]=0;
725 }
b7918751 726 // Also SWL/SWR/SDL/SDR
727 if(opcode[i]==0x2a||opcode[i]==0x2e||opcode[i]==0x2c||opcode[i]==0x2d) {
57871462 728 hsn[FTEMP]=0;
729 }
57871462 730 // Don't remove the miniht registers
731 if(itype[i]==UJUMP||itype[i]==RJUMP)
732 {
733 hsn[RHASH]=0;
734 hsn[RHTBL]=0;
735 }
736}
737
738// We only want to allocate registers if we're going to use them again soon
739int needed_again(int r, int i)
740{
741 int j;
742 int b=-1;
743 int rn=10;
9f51b4b9 744
57871462 745 if(i>0&&(itype[i-1]==UJUMP||itype[i-1]==RJUMP||(source[i-1]>>16)==0x1000))
746 {
747 if(ba[i-1]<start || ba[i-1]>start+slen*4-4)
748 return 0; // Don't need any registers if exiting the block
749 }
750 for(j=0;j<9;j++)
751 {
752 if(i+j>=slen) {
753 j=slen-i-1;
754 break;
755 }
756 if(itype[i+j]==UJUMP||itype[i+j]==RJUMP||(source[i+j]>>16)==0x1000)
757 {
758 // Don't go past an unconditonal jump
759 j++;
760 break;
761 }
1e973cb0 762 if(itype[i+j]==SYSCALL||itype[i+j]==HLECALL||itype[i+j]==INTCALL||((source[i+j]&0xfc00003f)==0x0d))
57871462 763 {
764 break;
765 }
766 }
767 for(;j>=1;j--)
768 {
769 if(rs1[i+j]==r) rn=j;
770 if(rs2[i+j]==r) rn=j;
771 if((unneeded_reg[i+j]>>r)&1) rn=10;
ad49de89 772 if(i+j>=0&&(itype[i+j]==UJUMP||itype[i+j]==CJUMP||itype[i+j]==SJUMP))
57871462 773 {
774 b=j;
775 }
776 }
777 /*
778 if(b>=0)
779 {
780 if(ba[i+b]>=start && ba[i+b]<(start+slen*4))
781 {
782 // Follow first branch
783 int o=rn;
784 int t=(ba[i+b]-start)>>2;
785 j=7-b;if(t+j>=slen) j=slen-t-1;
786 for(;j>=0;j--)
787 {
788 if(!((unneeded_reg[t+j]>>r)&1)) {
789 if(rs1[t+j]==r) if(rn>j+b+2) rn=j+b+2;
790 if(rs2[t+j]==r) if(rn>j+b+2) rn=j+b+2;
791 }
792 else rn=o;
793 }
794 }
795 }*/
b7217e13 796 if(rn<10) return 1;
581335b0 797 (void)b;
57871462 798 return 0;
799}
800
801// Try to match register allocations at the end of a loop with those
802// at the beginning
803int loop_reg(int i, int r, int hr)
804{
805 int j,k;
806 for(j=0;j<9;j++)
807 {
808 if(i+j>=slen) {
809 j=slen-i-1;
810 break;
811 }
812 if(itype[i+j]==UJUMP||itype[i+j]==RJUMP||(source[i+j]>>16)==0x1000)
813 {
814 // Don't go past an unconditonal jump
815 j++;
816 break;
817 }
818 }
819 k=0;
820 if(i>0){
ad49de89 821 if(itype[i-1]==UJUMP||itype[i-1]==CJUMP||itype[i-1]==SJUMP)
57871462 822 k--;
823 }
824 for(;k<j;k++)
825 {
00fa9369 826 assert(r < 64);
827 if((unneeded_reg[i+k]>>r)&1) return hr;
ad49de89 828 if(i+k>=0&&(itype[i+k]==UJUMP||itype[i+k]==CJUMP||itype[i+k]==SJUMP))
57871462 829 {
830 if(ba[i+k]>=start && ba[i+k]<(start+i*4))
831 {
832 int t=(ba[i+k]-start)>>2;
833 int reg=get_reg(regs[t].regmap_entry,r);
834 if(reg>=0) return reg;
835 //reg=get_reg(regs[t+1].regmap_entry,r);
836 //if(reg>=0) return reg;
837 }
838 }
839 }
840 return hr;
841}
842
843
844// Allocate every register, preserving source/target regs
845void alloc_all(struct regstat *cur,int i)
846{
847 int hr;
9f51b4b9 848
57871462 849 for(hr=0;hr<HOST_REGS;hr++) {
850 if(hr!=EXCLUDE_REG) {
851 if(((cur->regmap[hr]&63)!=rs1[i])&&((cur->regmap[hr]&63)!=rs2[i])&&
852 ((cur->regmap[hr]&63)!=rt1[i])&&((cur->regmap[hr]&63)!=rt2[i]))
853 {
854 cur->regmap[hr]=-1;
855 cur->dirty&=~(1<<hr);
856 }
857 // Don't need zeros
858 if((cur->regmap[hr]&63)==0)
859 {
860 cur->regmap[hr]=-1;
861 cur->dirty&=~(1<<hr);
862 }
863 }
864 }
865}
866
d1e4ebd9 867#ifndef NDEBUG
868static int host_tempreg_in_use;
869
870static void host_tempreg_acquire(void)
871{
872 assert(!host_tempreg_in_use);
873 host_tempreg_in_use = 1;
874}
875
876static void host_tempreg_release(void)
877{
878 host_tempreg_in_use = 0;
879}
880#else
881static void host_tempreg_acquire(void) {}
882static void host_tempreg_release(void) {}
883#endif
884
8062d65a 885#ifdef DRC_DBG
886extern void gen_interupt();
887extern void do_insn_cmp();
d1e4ebd9 888#define FUNCNAME(f) { f, " " #f }
8062d65a 889static const struct {
d1e4ebd9 890 void *addr;
8062d65a 891 const char *name;
892} function_names[] = {
893 FUNCNAME(cc_interrupt),
894 FUNCNAME(gen_interupt),
895 FUNCNAME(get_addr_ht),
896 FUNCNAME(get_addr),
897 FUNCNAME(jump_handler_read8),
898 FUNCNAME(jump_handler_read16),
899 FUNCNAME(jump_handler_read32),
900 FUNCNAME(jump_handler_write8),
901 FUNCNAME(jump_handler_write16),
902 FUNCNAME(jump_handler_write32),
903 FUNCNAME(invalidate_addr),
3968e69e 904 FUNCNAME(jump_to_new_pc),
8062d65a 905 FUNCNAME(new_dyna_leave),
906 FUNCNAME(pcsx_mtc0),
907 FUNCNAME(pcsx_mtc0_ds),
908 FUNCNAME(do_insn_cmp),
3968e69e 909#ifdef __arm__
910 FUNCNAME(verify_code),
911#endif
8062d65a 912};
913
d1e4ebd9 914static const char *func_name(const void *a)
8062d65a 915{
916 int i;
917 for (i = 0; i < sizeof(function_names)/sizeof(function_names[0]); i++)
918 if (function_names[i].addr == a)
919 return function_names[i].name;
920 return "";
921}
922#else
923#define func_name(x) ""
924#endif
925
57871462 926#ifdef __i386__
927#include "assem_x86.c"
928#endif
929#ifdef __x86_64__
930#include "assem_x64.c"
931#endif
932#ifdef __arm__
933#include "assem_arm.c"
934#endif
be516ebe 935#ifdef __aarch64__
936#include "assem_arm64.c"
937#endif
57871462 938
2a014d73 939static void *get_trampoline(const void *f)
940{
941 size_t i;
942
943 for (i = 0; i < ARRAY_SIZE(ndrc->tramp.f); i++) {
944 if (ndrc->tramp.f[i] == f || ndrc->tramp.f[i] == NULL)
945 break;
946 }
947 if (i == ARRAY_SIZE(ndrc->tramp.f)) {
948 SysPrintf("trampoline table is full, last func %p\n", f);
949 abort();
950 }
951 if (ndrc->tramp.f[i] == NULL) {
952 start_tcache_write(&ndrc->tramp.f[i], &ndrc->tramp.f[i + 1]);
953 ndrc->tramp.f[i] = f;
954 end_tcache_write(&ndrc->tramp.f[i], &ndrc->tramp.f[i + 1]);
955 }
956 return &ndrc->tramp.ops[i];
957}
958
959static void emit_far_jump(const void *f)
960{
961 if (can_jump_or_call(f)) {
962 emit_jmp(f);
963 return;
964 }
965
966 f = get_trampoline(f);
967 emit_jmp(f);
968}
969
970static void emit_far_call(const void *f)
971{
972 if (can_jump_or_call(f)) {
973 emit_call(f);
974 return;
975 }
976
977 f = get_trampoline(f);
978 emit_call(f);
979}
980
57871462 981// Add virtual address mapping to linked list
982void ll_add(struct ll_entry **head,int vaddr,void *addr)
983{
984 struct ll_entry *new_entry;
985 new_entry=malloc(sizeof(struct ll_entry));
986 assert(new_entry!=NULL);
987 new_entry->vaddr=vaddr;
de5a60c3 988 new_entry->reg_sv_flags=0;
57871462 989 new_entry->addr=addr;
990 new_entry->next=*head;
991 *head=new_entry;
992}
993
de5a60c3 994void ll_add_flags(struct ll_entry **head,int vaddr,u_int reg_sv_flags,void *addr)
57871462 995{
7139f3c8 996 ll_add(head,vaddr,addr);
de5a60c3 997 (*head)->reg_sv_flags=reg_sv_flags;
57871462 998}
999
1000// Check if an address is already compiled
1001// but don't return addresses which are about to expire from the cache
1002void *check_addr(u_int vaddr)
1003{
df4dc2b1 1004 struct ht_entry *ht_bin = hash_table_get(vaddr);
1005 size_t i;
b14b6a8f 1006 for (i = 0; i < ARRAY_SIZE(ht_bin->vaddr); i++) {
df4dc2b1 1007 if (ht_bin->vaddr[i] == vaddr)
1008 if (doesnt_expire_soon((u_char *)ht_bin->tcaddr[i] - MAX_OUTPUT_BLOCK_SIZE))
1009 if (isclean(ht_bin->tcaddr[i]))
1010 return ht_bin->tcaddr[i];
57871462 1011 }
94d23bb9 1012 u_int page=get_page(vaddr);
57871462 1013 struct ll_entry *head;
1014 head=jump_in[page];
df4dc2b1 1015 while (head != NULL) {
1016 if (head->vaddr == vaddr) {
1017 if (doesnt_expire_soon(head->addr)) {
57871462 1018 // Update existing entry with current address
df4dc2b1 1019 if (ht_bin->vaddr[0] == vaddr) {
1020 ht_bin->tcaddr[0] = head->addr;
57871462 1021 return head->addr;
1022 }
df4dc2b1 1023 if (ht_bin->vaddr[1] == vaddr) {
1024 ht_bin->tcaddr[1] = head->addr;
57871462 1025 return head->addr;
1026 }
1027 // Insert into hash table with low priority.
1028 // Don't evict existing entries, as they are probably
1029 // addresses that are being accessed frequently.
df4dc2b1 1030 if (ht_bin->vaddr[0] == -1) {
1031 ht_bin->vaddr[0] = vaddr;
1032 ht_bin->tcaddr[0] = head->addr;
1033 }
1034 else if (ht_bin->vaddr[1] == -1) {
1035 ht_bin->vaddr[1] = vaddr;
1036 ht_bin->tcaddr[1] = head->addr;
57871462 1037 }
1038 return head->addr;
1039 }
1040 }
1041 head=head->next;
1042 }
1043 return 0;
1044}
1045
1046void remove_hash(int vaddr)
1047{
1048 //printf("remove hash: %x\n",vaddr);
df4dc2b1 1049 struct ht_entry *ht_bin = hash_table_get(vaddr);
1050 if (ht_bin->vaddr[1] == vaddr) {
1051 ht_bin->vaddr[1] = -1;
1052 ht_bin->tcaddr[1] = NULL;
57871462 1053 }
df4dc2b1 1054 if (ht_bin->vaddr[0] == vaddr) {
1055 ht_bin->vaddr[0] = ht_bin->vaddr[1];
1056 ht_bin->tcaddr[0] = ht_bin->tcaddr[1];
1057 ht_bin->vaddr[1] = -1;
1058 ht_bin->tcaddr[1] = NULL;
57871462 1059 }
1060}
1061
643aeae3 1062void ll_remove_matching_addrs(struct ll_entry **head,uintptr_t addr,int shift)
57871462 1063{
1064 struct ll_entry *next;
1065 while(*head) {
643aeae3 1066 if(((uintptr_t)((*head)->addr)>>shift)==(addr>>shift) ||
1067 ((uintptr_t)((*head)->addr-MAX_OUTPUT_BLOCK_SIZE)>>shift)==(addr>>shift))
57871462 1068 {
643aeae3 1069 inv_debug("EXP: Remove pointer to %p (%x)\n",(*head)->addr,(*head)->vaddr);
57871462 1070 remove_hash((*head)->vaddr);
1071 next=(*head)->next;
1072 free(*head);
1073 *head=next;
1074 }
1075 else
1076 {
1077 head=&((*head)->next);
1078 }
1079 }
1080}
1081
1082// Remove all entries from linked list
1083void ll_clear(struct ll_entry **head)
1084{
1085 struct ll_entry *cur;
1086 struct ll_entry *next;
581335b0 1087 if((cur=*head)) {
57871462 1088 *head=0;
1089 while(cur) {
1090 next=cur->next;
1091 free(cur);
1092 cur=next;
1093 }
1094 }
1095}
1096
1097// Dereference the pointers and remove if it matches
643aeae3 1098static void ll_kill_pointers(struct ll_entry *head,uintptr_t addr,int shift)
57871462 1099{
1100 while(head) {
643aeae3 1101 uintptr_t ptr = (uintptr_t)get_pointer(head->addr);
1102 inv_debug("EXP: Lookup pointer to %lx at %p (%x)\n",(long)ptr,head->addr,head->vaddr);
57871462 1103 if(((ptr>>shift)==(addr>>shift)) ||
1104 (((ptr-MAX_OUTPUT_BLOCK_SIZE)>>shift)==(addr>>shift)))
1105 {
643aeae3 1106 inv_debug("EXP: Kill pointer at %p (%x)\n",head->addr,head->vaddr);
d148d265 1107 void *host_addr=find_extjump_insn(head->addr);
919981d0 1108 mark_clear_cache(host_addr);
df4dc2b1 1109 set_jump_target(host_addr, head->addr);
57871462 1110 }
1111 head=head->next;
1112 }
1113}
1114
1115// This is called when we write to a compiled block (see do_invstub)
d1e4ebd9 1116static void invalidate_page(u_int page)
57871462 1117{
57871462 1118 struct ll_entry *head;
1119 struct ll_entry *next;
1120 head=jump_in[page];
1121 jump_in[page]=0;
1122 while(head!=NULL) {
1123 inv_debug("INVALIDATE: %x\n",head->vaddr);
1124 remove_hash(head->vaddr);
1125 next=head->next;
1126 free(head);
1127 head=next;
1128 }
1129 head=jump_out[page];
1130 jump_out[page]=0;
1131 while(head!=NULL) {
643aeae3 1132 inv_debug("INVALIDATE: kill pointer to %x (%p)\n",head->vaddr,head->addr);
d148d265 1133 void *host_addr=find_extjump_insn(head->addr);
919981d0 1134 mark_clear_cache(host_addr);
df4dc2b1 1135 set_jump_target(host_addr, head->addr);
57871462 1136 next=head->next;
1137 free(head);
1138 head=next;
1139 }
57871462 1140}
9be4ba64 1141
1142static void invalidate_block_range(u_int block, u_int first, u_int last)
57871462 1143{
94d23bb9 1144 u_int page=get_page(block<<12);
57871462 1145 //printf("first=%d last=%d\n",first,last);
f76eeef9 1146 invalidate_page(page);
57871462 1147 assert(first+5>page); // NB: this assumes MAXBLOCK<=4096 (4 pages)
1148 assert(last<page+5);
1149 // Invalidate the adjacent pages if a block crosses a 4K boundary
1150 while(first<page) {
1151 invalidate_page(first);
1152 first++;
1153 }
1154 for(first=page+1;first<last;first++) {
1155 invalidate_page(first);
1156 }
919981d0 1157 do_clear_cache();
9f51b4b9 1158
57871462 1159 // Don't trap writes
1160 invalid_code[block]=1;
f76eeef9 1161
57871462 1162 #ifdef USE_MINI_HT
1163 memset(mini_ht,-1,sizeof(mini_ht));
1164 #endif
1165}
9be4ba64 1166
1167void invalidate_block(u_int block)
1168{
1169 u_int page=get_page(block<<12);
1170 u_int vpage=get_vpage(block<<12);
1171 inv_debug("INVALIDATE: %x (%d)\n",block<<12,page);
1172 //inv_debug("invalid_code[block]=%d\n",invalid_code[block]);
1173 u_int first,last;
1174 first=last=page;
1175 struct ll_entry *head;
1176 head=jump_dirty[vpage];
1177 //printf("page=%d vpage=%d\n",page,vpage);
1178 while(head!=NULL) {
9be4ba64 1179 if(vpage>2047||(head->vaddr>>12)==block) { // Ignore vaddr hash collision
01d26796 1180 u_char *start, *end;
1181 get_bounds(head->addr, &start, &end);
1182 //printf("start: %p end: %p\n", start, end);
1183 if (page < 2048 && start >= rdram && end < rdram+RAM_SIZE) {
1184 if (((start-rdram)>>12) <= page && ((end-1-rdram)>>12) >= page) {
1185 if ((((start-rdram)>>12)&2047) < first) first = ((start-rdram)>>12)&2047;
1186 if ((((end-1-rdram)>>12)&2047) > last) last = ((end-1-rdram)>>12)&2047;
9be4ba64 1187 }
1188 }
9be4ba64 1189 }
1190 head=head->next;
1191 }
1192 invalidate_block_range(block,first,last);
1193}
1194
57871462 1195void invalidate_addr(u_int addr)
1196{
9be4ba64 1197 //static int rhits;
1198 // this check is done by the caller
1199 //if (inv_code_start<=addr&&addr<=inv_code_end) { rhits++; return; }
d25604ca 1200 u_int page=get_vpage(addr);
9be4ba64 1201 if(page<2048) { // RAM
1202 struct ll_entry *head;
1203 u_int addr_min=~0, addr_max=0;
4a35de07 1204 u_int mask=RAM_SIZE-1;
1205 u_int addr_main=0x80000000|(addr&mask);
9be4ba64 1206 int pg1;
4a35de07 1207 inv_code_start=addr_main&~0xfff;
1208 inv_code_end=addr_main|0xfff;
9be4ba64 1209 pg1=page;
1210 if (pg1>0) {
1211 // must check previous page too because of spans..
1212 pg1--;
1213 inv_code_start-=0x1000;
1214 }
1215 for(;pg1<=page;pg1++) {
1216 for(head=jump_dirty[pg1];head!=NULL;head=head->next) {
01d26796 1217 u_char *start_h, *end_h;
1218 u_int start, end;
1219 get_bounds(head->addr, &start_h, &end_h);
1220 start = (uintptr_t)start_h - ram_offset;
1221 end = (uintptr_t)end_h - ram_offset;
4a35de07 1222 if(start<=addr_main&&addr_main<end) {
9be4ba64 1223 if(start<addr_min) addr_min=start;
1224 if(end>addr_max) addr_max=end;
1225 }
4a35de07 1226 else if(addr_main<start) {
9be4ba64 1227 if(start<inv_code_end)
1228 inv_code_end=start-1;
1229 }
1230 else {
1231 if(end>inv_code_start)
1232 inv_code_start=end;
1233 }
1234 }
1235 }
1236 if (addr_min!=~0) {
1237 inv_debug("INV ADDR: %08x hit %08x-%08x\n", addr, addr_min, addr_max);
1238 inv_code_start=inv_code_end=~0;
1239 invalidate_block_range(addr>>12,(addr_min&mask)>>12,(addr_max&mask)>>12);
1240 return;
1241 }
1242 else {
4a35de07 1243 inv_code_start=(addr&~mask)|(inv_code_start&mask);
1244 inv_code_end=(addr&~mask)|(inv_code_end&mask);
d25604ca 1245 inv_debug("INV ADDR: %08x miss, inv %08x-%08x, sk %d\n", addr, inv_code_start, inv_code_end, 0);
9be4ba64 1246 return;
d25604ca 1247 }
9be4ba64 1248 }
57871462 1249 invalidate_block(addr>>12);
1250}
9be4ba64 1251
dd3a91a1 1252// This is called when loading a save state.
1253// Anything could have changed, so invalidate everything.
919981d0 1254void invalidate_all_pages(void)
57871462 1255{
581335b0 1256 u_int page;
57871462 1257 for(page=0;page<4096;page++)
1258 invalidate_page(page);
1259 for(page=0;page<1048576;page++)
1260 if(!invalid_code[page]) {
1261 restore_candidate[(page&2047)>>3]|=1<<(page&7);
1262 restore_candidate[((page&2047)>>3)+256]|=1<<(page&7);
1263 }
57871462 1264 #ifdef USE_MINI_HT
1265 memset(mini_ht,-1,sizeof(mini_ht));
1266 #endif
919981d0 1267 do_clear_cache();
57871462 1268}
1269
d1e4ebd9 1270static void do_invstub(int n)
1271{
1272 literal_pool(20);
1273 u_int reglist=stubs[n].a;
1274 set_jump_target(stubs[n].addr, out);
1275 save_regs(reglist);
1276 if(stubs[n].b!=0) emit_mov(stubs[n].b,0);
2a014d73 1277 emit_far_call(invalidate_addr);
d1e4ebd9 1278 restore_regs(reglist);
1279 emit_jmp(stubs[n].retaddr); // return address
1280}
1281
57871462 1282// Add an entry to jump_out after making a link
d1e4ebd9 1283// src should point to code by emit_extjump2()
57871462 1284void add_link(u_int vaddr,void *src)
1285{
94d23bb9 1286 u_int page=get_page(vaddr);
643aeae3 1287 inv_debug("add_link: %p -> %x (%d)\n",src,vaddr,page);
d1e4ebd9 1288 check_extjump2(src);
57871462 1289 ll_add(jump_out+page,vaddr,src);
643aeae3 1290 //void *ptr=get_pointer(src);
1291 //inv_debug("add_link: Pointer is to %p\n",ptr);
57871462 1292}
1293
1294// If a code block was found to be unmodified (bit was set in
1295// restore_candidate) and it remains unmodified (bit is clear
1296// in invalid_code) then move the entries for that 4K page from
1297// the dirty list to the clean list.
1298void clean_blocks(u_int page)
1299{
1300 struct ll_entry *head;
1301 inv_debug("INV: clean_blocks page=%d\n",page);
1302 head=jump_dirty[page];
1303 while(head!=NULL) {
1304 if(!invalid_code[head->vaddr>>12]) {
1305 // Don't restore blocks which are about to expire from the cache
df4dc2b1 1306 if (doesnt_expire_soon(head->addr)) {
581335b0 1307 if(verify_dirty(head->addr)) {
01d26796 1308 u_char *start, *end;
643aeae3 1309 //printf("Possibly Restore %x (%p)\n",head->vaddr, head->addr);
57871462 1310 u_int i;
1311 u_int inv=0;
01d26796 1312 get_bounds(head->addr, &start, &end);
1313 if (start - rdram < RAM_SIZE) {
1314 for (i = (start-rdram+0x80000000)>>12; i <= (end-1-rdram+0x80000000)>>12; i++) {
57871462 1315 inv|=invalid_code[i];
1316 }
1317 }
4cb76aa4 1318 else if((signed int)head->vaddr>=(signed int)0x80000000+RAM_SIZE) {
57871462 1319 inv=1;
1320 }
1321 if(!inv) {
df4dc2b1 1322 void *clean_addr = get_clean_addr(head->addr);
1323 if (doesnt_expire_soon(clean_addr)) {
57871462 1324 u_int ppage=page;
643aeae3 1325 inv_debug("INV: Restored %x (%p/%p)\n",head->vaddr, head->addr, clean_addr);
57871462 1326 //printf("page=%x, addr=%x\n",page,head->vaddr);
1327 //assert(head->vaddr>>12==(page|0x80000));
de5a60c3 1328 ll_add_flags(jump_in+ppage,head->vaddr,head->reg_sv_flags,clean_addr);
df4dc2b1 1329 struct ht_entry *ht_bin = hash_table_get(head->vaddr);
1330 if (ht_bin->vaddr[0] == head->vaddr)
1331 ht_bin->tcaddr[0] = clean_addr; // Replace existing entry
1332 if (ht_bin->vaddr[1] == head->vaddr)
1333 ht_bin->tcaddr[1] = clean_addr; // Replace existing entry
57871462 1334 }
1335 }
1336 }
1337 }
1338 }
1339 head=head->next;
1340 }
1341}
1342
8062d65a 1343/* Register allocation */
1344
1345// Note: registers are allocated clean (unmodified state)
1346// if you intend to modify the register, you must call dirty_reg().
1347static void alloc_reg(struct regstat *cur,int i,signed char reg)
1348{
1349 int r,hr;
1350 int preferred_reg = (reg&7);
1351 if(reg==CCREG) preferred_reg=HOST_CCREG;
1352 if(reg==PTEMP||reg==FTEMP) preferred_reg=12;
1353
1354 // Don't allocate unused registers
1355 if((cur->u>>reg)&1) return;
1356
1357 // see if it's already allocated
1358 for(hr=0;hr<HOST_REGS;hr++)
1359 {
1360 if(cur->regmap[hr]==reg) return;
1361 }
1362
1363 // Keep the same mapping if the register was already allocated in a loop
1364 preferred_reg = loop_reg(i,reg,preferred_reg);
1365
1366 // Try to allocate the preferred register
1367 if(cur->regmap[preferred_reg]==-1) {
1368 cur->regmap[preferred_reg]=reg;
1369 cur->dirty&=~(1<<preferred_reg);
1370 cur->isconst&=~(1<<preferred_reg);
1371 return;
1372 }
1373 r=cur->regmap[preferred_reg];
1374 assert(r < 64);
1375 if((cur->u>>r)&1) {
1376 cur->regmap[preferred_reg]=reg;
1377 cur->dirty&=~(1<<preferred_reg);
1378 cur->isconst&=~(1<<preferred_reg);
1379 return;
1380 }
1381
1382 // Clear any unneeded registers
1383 // We try to keep the mapping consistent, if possible, because it
1384 // makes branches easier (especially loops). So we try to allocate
1385 // first (see above) before removing old mappings. If this is not
1386 // possible then go ahead and clear out the registers that are no
1387 // longer needed.
1388 for(hr=0;hr<HOST_REGS;hr++)
1389 {
1390 r=cur->regmap[hr];
1391 if(r>=0) {
1392 assert(r < 64);
1393 if((cur->u>>r)&1) {cur->regmap[hr]=-1;break;}
1394 }
1395 }
1396 // Try to allocate any available register, but prefer
1397 // registers that have not been used recently.
1398 if(i>0) {
1399 for(hr=0;hr<HOST_REGS;hr++) {
1400 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
1401 if(regs[i-1].regmap[hr]!=rs1[i-1]&&regs[i-1].regmap[hr]!=rs2[i-1]&&regs[i-1].regmap[hr]!=rt1[i-1]&&regs[i-1].regmap[hr]!=rt2[i-1]) {
1402 cur->regmap[hr]=reg;
1403 cur->dirty&=~(1<<hr);
1404 cur->isconst&=~(1<<hr);
1405 return;
1406 }
1407 }
1408 }
1409 }
1410 // Try to allocate any available register
1411 for(hr=0;hr<HOST_REGS;hr++) {
1412 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
1413 cur->regmap[hr]=reg;
1414 cur->dirty&=~(1<<hr);
1415 cur->isconst&=~(1<<hr);
1416 return;
1417 }
1418 }
1419
1420 // Ok, now we have to evict someone
1421 // Pick a register we hopefully won't need soon
1422 u_char hsn[MAXREG+1];
1423 memset(hsn,10,sizeof(hsn));
1424 int j;
1425 lsn(hsn,i,&preferred_reg);
1426 //printf("eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",cur->regmap[0],cur->regmap[1],cur->regmap[2],cur->regmap[3],cur->regmap[5],cur->regmap[6],cur->regmap[7]);
1427 //printf("hsn(%x): %d %d %d %d %d %d %d\n",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
1428 if(i>0) {
1429 // Don't evict the cycle count at entry points, otherwise the entry
1430 // stub will have to write it.
1431 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
1432 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP)) hsn[CCREG]=2;
1433 for(j=10;j>=3;j--)
1434 {
1435 // Alloc preferred register if available
1436 if(hsn[r=cur->regmap[preferred_reg]&63]==j) {
1437 for(hr=0;hr<HOST_REGS;hr++) {
1438 // Evict both parts of a 64-bit register
1439 if((cur->regmap[hr]&63)==r) {
1440 cur->regmap[hr]=-1;
1441 cur->dirty&=~(1<<hr);
1442 cur->isconst&=~(1<<hr);
1443 }
1444 }
1445 cur->regmap[preferred_reg]=reg;
1446 return;
1447 }
1448 for(r=1;r<=MAXREG;r++)
1449 {
1450 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
8062d65a 1451 for(hr=0;hr<HOST_REGS;hr++) {
1452 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
1453 if(cur->regmap[hr]==r) {
1454 cur->regmap[hr]=reg;
1455 cur->dirty&=~(1<<hr);
1456 cur->isconst&=~(1<<hr);
1457 return;
1458 }
1459 }
1460 }
1461 }
1462 }
1463 }
1464 }
1465 for(j=10;j>=0;j--)
1466 {
1467 for(r=1;r<=MAXREG;r++)
1468 {
1469 if(hsn[r]==j) {
8062d65a 1470 for(hr=0;hr<HOST_REGS;hr++) {
1471 if(cur->regmap[hr]==r) {
1472 cur->regmap[hr]=reg;
1473 cur->dirty&=~(1<<hr);
1474 cur->isconst&=~(1<<hr);
1475 return;
1476 }
1477 }
1478 }
1479 }
1480 }
7c3a5182 1481 SysPrintf("This shouldn't happen (alloc_reg)");abort();
8062d65a 1482}
1483
1484// Allocate a temporary register. This is done without regard to
1485// dirty status or whether the register we request is on the unneeded list
1486// Note: This will only allocate one register, even if called multiple times
1487static void alloc_reg_temp(struct regstat *cur,int i,signed char reg)
1488{
1489 int r,hr;
1490 int preferred_reg = -1;
1491
1492 // see if it's already allocated
1493 for(hr=0;hr<HOST_REGS;hr++)
1494 {
1495 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==reg) return;
1496 }
1497
1498 // Try to allocate any available register
1499 for(hr=HOST_REGS-1;hr>=0;hr--) {
1500 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
1501 cur->regmap[hr]=reg;
1502 cur->dirty&=~(1<<hr);
1503 cur->isconst&=~(1<<hr);
1504 return;
1505 }
1506 }
1507
1508 // Find an unneeded register
1509 for(hr=HOST_REGS-1;hr>=0;hr--)
1510 {
1511 r=cur->regmap[hr];
1512 if(r>=0) {
1513 assert(r < 64);
1514 if((cur->u>>r)&1) {
1515 if(i==0||((unneeded_reg[i-1]>>r)&1)) {
1516 cur->regmap[hr]=reg;
1517 cur->dirty&=~(1<<hr);
1518 cur->isconst&=~(1<<hr);
1519 return;
1520 }
1521 }
1522 }
1523 }
1524
1525 // Ok, now we have to evict someone
1526 // Pick a register we hopefully won't need soon
1527 // TODO: we might want to follow unconditional jumps here
1528 // TODO: get rid of dupe code and make this into a function
1529 u_char hsn[MAXREG+1];
1530 memset(hsn,10,sizeof(hsn));
1531 int j;
1532 lsn(hsn,i,&preferred_reg);
1533 //printf("hsn: %d %d %d %d %d %d %d\n",hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
1534 if(i>0) {
1535 // Don't evict the cycle count at entry points, otherwise the entry
1536 // stub will have to write it.
1537 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
1538 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP)) hsn[CCREG]=2;
1539 for(j=10;j>=3;j--)
1540 {
1541 for(r=1;r<=MAXREG;r++)
1542 {
1543 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
8062d65a 1544 for(hr=0;hr<HOST_REGS;hr++) {
1545 if(hr!=HOST_CCREG||hsn[CCREG]>2) {
1546 if(cur->regmap[hr]==r) {
1547 cur->regmap[hr]=reg;
1548 cur->dirty&=~(1<<hr);
1549 cur->isconst&=~(1<<hr);
1550 return;
1551 }
1552 }
1553 }
1554 }
1555 }
1556 }
1557 }
1558 for(j=10;j>=0;j--)
1559 {
1560 for(r=1;r<=MAXREG;r++)
1561 {
1562 if(hsn[r]==j) {
8062d65a 1563 for(hr=0;hr<HOST_REGS;hr++) {
1564 if(cur->regmap[hr]==r) {
1565 cur->regmap[hr]=reg;
1566 cur->dirty&=~(1<<hr);
1567 cur->isconst&=~(1<<hr);
1568 return;
1569 }
1570 }
1571 }
1572 }
1573 }
7c3a5182 1574 SysPrintf("This shouldn't happen");abort();
8062d65a 1575}
1576
ad49de89 1577static void mov_alloc(struct regstat *current,int i)
57871462 1578{
1579 // Note: Don't need to actually alloc the source registers
ad49de89 1580 //alloc_reg(current,i,rs1[i]);
1581 alloc_reg(current,i,rt1[i]);
1582
57871462 1583 clear_const(current,rs1[i]);
1584 clear_const(current,rt1[i]);
1585 dirty_reg(current,rt1[i]);
1586}
1587
ad49de89 1588static void shiftimm_alloc(struct regstat *current,int i)
57871462 1589{
57871462 1590 if(opcode2[i]<=0x3) // SLL/SRL/SRA
1591 {
1592 if(rt1[i]) {
1593 if(rs1[i]&&needed_again(rs1[i],i)) alloc_reg(current,i,rs1[i]);
1594 else lt1[i]=rs1[i];
1595 alloc_reg(current,i,rt1[i]);
57871462 1596 dirty_reg(current,rt1[i]);
dc49e339 1597 if(is_const(current,rs1[i])) {
1598 int v=get_const(current,rs1[i]);
1599 if(opcode2[i]==0x00) set_const(current,rt1[i],v<<imm[i]);
1600 if(opcode2[i]==0x02) set_const(current,rt1[i],(u_int)v>>imm[i]);
1601 if(opcode2[i]==0x03) set_const(current,rt1[i],v>>imm[i]);
1602 }
1603 else clear_const(current,rt1[i]);
57871462 1604 }
1605 }
dc49e339 1606 else
1607 {
1608 clear_const(current,rs1[i]);
1609 clear_const(current,rt1[i]);
1610 }
1611
57871462 1612 if(opcode2[i]>=0x38&&opcode2[i]<=0x3b) // DSLL/DSRL/DSRA
1613 {
9c45ca93 1614 assert(0);
57871462 1615 }
1616 if(opcode2[i]==0x3c) // DSLL32
1617 {
9c45ca93 1618 assert(0);
57871462 1619 }
1620 if(opcode2[i]==0x3e) // DSRL32
1621 {
9c45ca93 1622 assert(0);
57871462 1623 }
1624 if(opcode2[i]==0x3f) // DSRA32
1625 {
9c45ca93 1626 assert(0);
57871462 1627 }
1628}
1629
ad49de89 1630static void shift_alloc(struct regstat *current,int i)
57871462 1631{
1632 if(rt1[i]) {
1633 if(opcode2[i]<=0x07) // SLLV/SRLV/SRAV
1634 {
1635 if(rs1[i]) alloc_reg(current,i,rs1[i]);
1636 if(rs2[i]) alloc_reg(current,i,rs2[i]);
1637 alloc_reg(current,i,rt1[i]);
e1190b87 1638 if(rt1[i]==rs2[i]) {
1639 alloc_reg_temp(current,i,-1);
1640 minimum_free_regs[i]=1;
1641 }
57871462 1642 } else { // DSLLV/DSRLV/DSRAV
00fa9369 1643 assert(0);
57871462 1644 }
1645 clear_const(current,rs1[i]);
1646 clear_const(current,rs2[i]);
1647 clear_const(current,rt1[i]);
1648 dirty_reg(current,rt1[i]);
1649 }
1650}
1651
ad49de89 1652static void alu_alloc(struct regstat *current,int i)
57871462 1653{
1654 if(opcode2[i]>=0x20&&opcode2[i]<=0x23) { // ADD/ADDU/SUB/SUBU
1655 if(rt1[i]) {
1656 if(rs1[i]&&rs2[i]) {
1657 alloc_reg(current,i,rs1[i]);
1658 alloc_reg(current,i,rs2[i]);
1659 }
1660 else {
1661 if(rs1[i]&&needed_again(rs1[i],i)) alloc_reg(current,i,rs1[i]);
1662 if(rs2[i]&&needed_again(rs2[i],i)) alloc_reg(current,i,rs2[i]);
1663 }
1664 alloc_reg(current,i,rt1[i]);
1665 }
57871462 1666 }
1667 if(opcode2[i]==0x2a||opcode2[i]==0x2b) { // SLT/SLTU
1668 if(rt1[i]) {
ad49de89 1669 alloc_reg(current,i,rs1[i]);
1670 alloc_reg(current,i,rs2[i]);
1671 alloc_reg(current,i,rt1[i]);
57871462 1672 }
57871462 1673 }
1674 if(opcode2[i]>=0x24&&opcode2[i]<=0x27) { // AND/OR/XOR/NOR
1675 if(rt1[i]) {
1676 if(rs1[i]&&rs2[i]) {
1677 alloc_reg(current,i,rs1[i]);
1678 alloc_reg(current,i,rs2[i]);
1679 }
1680 else
1681 {
1682 if(rs1[i]&&needed_again(rs1[i],i)) alloc_reg(current,i,rs1[i]);
1683 if(rs2[i]&&needed_again(rs2[i],i)) alloc_reg(current,i,rs2[i]);
1684 }
1685 alloc_reg(current,i,rt1[i]);
57871462 1686 }
1687 }
1688 if(opcode2[i]>=0x2c&&opcode2[i]<=0x2f) { // DADD/DADDU/DSUB/DSUBU
00fa9369 1689 assert(0);
57871462 1690 }
1691 clear_const(current,rs1[i]);
1692 clear_const(current,rs2[i]);
1693 clear_const(current,rt1[i]);
1694 dirty_reg(current,rt1[i]);
1695}
1696
ad49de89 1697static void imm16_alloc(struct regstat *current,int i)
57871462 1698{
1699 if(rs1[i]&&needed_again(rs1[i],i)) alloc_reg(current,i,rs1[i]);
1700 else lt1[i]=rs1[i];
1701 if(rt1[i]) alloc_reg(current,i,rt1[i]);
1702 if(opcode[i]==0x18||opcode[i]==0x19) { // DADDI/DADDIU
00fa9369 1703 assert(0);
57871462 1704 }
1705 else if(opcode[i]==0x0a||opcode[i]==0x0b) { // SLTI/SLTIU
57871462 1706 clear_const(current,rs1[i]);
1707 clear_const(current,rt1[i]);
1708 }
1709 else if(opcode[i]>=0x0c&&opcode[i]<=0x0e) { // ANDI/ORI/XORI
57871462 1710 if(is_const(current,rs1[i])) {
1711 int v=get_const(current,rs1[i]);
1712 if(opcode[i]==0x0c) set_const(current,rt1[i],v&imm[i]);
1713 if(opcode[i]==0x0d) set_const(current,rt1[i],v|imm[i]);
1714 if(opcode[i]==0x0e) set_const(current,rt1[i],v^imm[i]);
1715 }
1716 else clear_const(current,rt1[i]);
1717 }
1718 else if(opcode[i]==0x08||opcode[i]==0x09) { // ADDI/ADDIU
1719 if(is_const(current,rs1[i])) {
1720 int v=get_const(current,rs1[i]);
1721 set_const(current,rt1[i],v+imm[i]);
1722 }
1723 else clear_const(current,rt1[i]);
57871462 1724 }
1725 else {
40fca85b 1726 set_const(current,rt1[i],imm[i]<<16); // LUI
57871462 1727 }
1728 dirty_reg(current,rt1[i]);
1729}
1730
ad49de89 1731static void load_alloc(struct regstat *current,int i)
57871462 1732{
1733 clear_const(current,rt1[i]);
1734 //if(rs1[i]!=rt1[i]&&needed_again(rs1[i],i)) clear_const(current,rs1[i]); // Does this help or hurt?
1735 if(!rs1[i]) current->u&=~1LL; // Allow allocating r0 if it's the source register
1736 if(needed_again(rs1[i],i)) alloc_reg(current,i,rs1[i]);
373d1d07 1737 if(rt1[i]&&!((current->u>>rt1[i])&1)) {
57871462 1738 alloc_reg(current,i,rt1[i]);
373d1d07 1739 assert(get_reg(current->regmap,rt1[i])>=0);
57871462 1740 if(opcode[i]==0x27||opcode[i]==0x37) // LWU/LD
1741 {
ad49de89 1742 assert(0);
57871462 1743 }
1744 else if(opcode[i]==0x1A||opcode[i]==0x1B) // LDL/LDR
1745 {
ad49de89 1746 assert(0);
57871462 1747 }
57871462 1748 dirty_reg(current,rt1[i]);
57871462 1749 // LWL/LWR need a temporary register for the old value
1750 if(opcode[i]==0x22||opcode[i]==0x26)
1751 {
1752 alloc_reg(current,i,FTEMP);
1753 alloc_reg_temp(current,i,-1);
e1190b87 1754 minimum_free_regs[i]=1;
57871462 1755 }
1756 }
1757 else
1758 {
373d1d07 1759 // Load to r0 or unneeded register (dummy load)
57871462 1760 // but we still need a register to calculate the address
535d208a 1761 if(opcode[i]==0x22||opcode[i]==0x26)
1762 {
1763 alloc_reg(current,i,FTEMP); // LWL/LWR need another temporary
1764 }
57871462 1765 alloc_reg_temp(current,i,-1);
e1190b87 1766 minimum_free_regs[i]=1;
535d208a 1767 if(opcode[i]==0x1A||opcode[i]==0x1B) // LDL/LDR
1768 {
ad49de89 1769 assert(0);
535d208a 1770 }
57871462 1771 }
1772}
1773
1774void store_alloc(struct regstat *current,int i)
1775{
1776 clear_const(current,rs2[i]);
1777 if(!(rs2[i])) current->u&=~1LL; // Allow allocating r0 if necessary
1778 if(needed_again(rs1[i],i)) alloc_reg(current,i,rs1[i]);
1779 alloc_reg(current,i,rs2[i]);
1780 if(opcode[i]==0x2c||opcode[i]==0x2d||opcode[i]==0x3f) { // 64-bit SDL/SDR/SD
ad49de89 1781 assert(0);
57871462 1782 }
57871462 1783 #if defined(HOST_IMM8)
1784 // On CPUs without 32-bit immediates we need a pointer to invalid_code
1785 else alloc_reg(current,i,INVCP);
1786 #endif
b7918751 1787 if(opcode[i]==0x2a||opcode[i]==0x2e||opcode[i]==0x2c||opcode[i]==0x2d) { // SWL/SWL/SDL/SDR
57871462 1788 alloc_reg(current,i,FTEMP);
1789 }
1790 // We need a temporary register for address generation
1791 alloc_reg_temp(current,i,-1);
e1190b87 1792 minimum_free_regs[i]=1;
57871462 1793}
1794
1795void c1ls_alloc(struct regstat *current,int i)
1796{
1797 //clear_const(current,rs1[i]); // FIXME
1798 clear_const(current,rt1[i]);
1799 if(needed_again(rs1[i],i)) alloc_reg(current,i,rs1[i]);
1800 alloc_reg(current,i,CSREG); // Status
1801 alloc_reg(current,i,FTEMP);
1802 if(opcode[i]==0x35||opcode[i]==0x3d) { // 64-bit LDC1/SDC1
ad49de89 1803 assert(0);
57871462 1804 }
57871462 1805 #if defined(HOST_IMM8)
1806 // On CPUs without 32-bit immediates we need a pointer to invalid_code
1807 else if((opcode[i]&0x3b)==0x39) // SWC1/SDC1
1808 alloc_reg(current,i,INVCP);
1809 #endif
1810 // We need a temporary register for address generation
1811 alloc_reg_temp(current,i,-1);
1812}
1813
b9b61529 1814void c2ls_alloc(struct regstat *current,int i)
1815{
1816 clear_const(current,rt1[i]);
1817 if(needed_again(rs1[i],i)) alloc_reg(current,i,rs1[i]);
1818 alloc_reg(current,i,FTEMP);
b9b61529 1819 #if defined(HOST_IMM8)
1820 // On CPUs without 32-bit immediates we need a pointer to invalid_code
1edfcc68 1821 if((opcode[i]&0x3b)==0x3a) // SWC2/SDC2
b9b61529 1822 alloc_reg(current,i,INVCP);
1823 #endif
1824 // We need a temporary register for address generation
1825 alloc_reg_temp(current,i,-1);
e1190b87 1826 minimum_free_regs[i]=1;
b9b61529 1827}
1828
57871462 1829#ifndef multdiv_alloc
1830void multdiv_alloc(struct regstat *current,int i)
1831{
1832 // case 0x18: MULT
1833 // case 0x19: MULTU
1834 // case 0x1A: DIV
1835 // case 0x1B: DIVU
1836 // case 0x1C: DMULT
1837 // case 0x1D: DMULTU
1838 // case 0x1E: DDIV
1839 // case 0x1F: DDIVU
1840 clear_const(current,rs1[i]);
1841 clear_const(current,rs2[i]);
1842 if(rs1[i]&&rs2[i])
1843 {
1844 if((opcode2[i]&4)==0) // 32-bit
1845 {
1846 current->u&=~(1LL<<HIREG);
1847 current->u&=~(1LL<<LOREG);
1848 alloc_reg(current,i,HIREG);
1849 alloc_reg(current,i,LOREG);
1850 alloc_reg(current,i,rs1[i]);
1851 alloc_reg(current,i,rs2[i]);
57871462 1852 dirty_reg(current,HIREG);
1853 dirty_reg(current,LOREG);
1854 }
1855 else // 64-bit
1856 {
00fa9369 1857 assert(0);
57871462 1858 }
1859 }
1860 else
1861 {
1862 // Multiply by zero is zero.
1863 // MIPS does not have a divide by zero exception.
1864 // The result is undefined, we return zero.
1865 alloc_reg(current,i,HIREG);
1866 alloc_reg(current,i,LOREG);
57871462 1867 dirty_reg(current,HIREG);
1868 dirty_reg(current,LOREG);
1869 }
1870}
1871#endif
1872
1873void cop0_alloc(struct regstat *current,int i)
1874{
1875 if(opcode2[i]==0) // MFC0
1876 {
1877 if(rt1[i]) {
1878 clear_const(current,rt1[i]);
1879 alloc_all(current,i);
1880 alloc_reg(current,i,rt1[i]);
57871462 1881 dirty_reg(current,rt1[i]);
1882 }
1883 }
1884 else if(opcode2[i]==4) // MTC0
1885 {
1886 if(rs1[i]){
1887 clear_const(current,rs1[i]);
1888 alloc_reg(current,i,rs1[i]);
1889 alloc_all(current,i);
1890 }
1891 else {
1892 alloc_all(current,i); // FIXME: Keep r0
1893 current->u&=~1LL;
1894 alloc_reg(current,i,0);
1895 }
1896 }
1897 else
1898 {
1899 // TLBR/TLBWI/TLBWR/TLBP/ERET
1900 assert(opcode2[i]==0x10);
1901 alloc_all(current,i);
1902 }
e1190b87 1903 minimum_free_regs[i]=HOST_REGS;
57871462 1904}
1905
00fa9369 1906static void cop12_alloc(struct regstat *current,int i)
57871462 1907{
1908 alloc_reg(current,i,CSREG); // Load status
00fa9369 1909 if(opcode2[i]<3) // MFC1/CFC1
57871462 1910 {
7de557a6 1911 if(rt1[i]){
1912 clear_const(current,rt1[i]);
00fa9369 1913 alloc_reg(current,i,rt1[i]);
7de557a6 1914 dirty_reg(current,rt1[i]);
57871462 1915 }
57871462 1916 alloc_reg_temp(current,i,-1);
1917 }
00fa9369 1918 else if(opcode2[i]>3) // MTC1/CTC1
57871462 1919 {
1920 if(rs1[i]){
1921 clear_const(current,rs1[i]);
00fa9369 1922 alloc_reg(current,i,rs1[i]);
57871462 1923 }
1924 else {
1925 current->u&=~1LL;
1926 alloc_reg(current,i,0);
57871462 1927 }
00fa9369 1928 alloc_reg_temp(current,i,-1);
57871462 1929 }
e1190b87 1930 minimum_free_regs[i]=1;
57871462 1931}
00fa9369 1932
b9b61529 1933void c2op_alloc(struct regstat *current,int i)
1934{
1935 alloc_reg_temp(current,i,-1);
1936}
57871462 1937
1938void syscall_alloc(struct regstat *current,int i)
1939{
1940 alloc_cc(current,i);
1941 dirty_reg(current,CCREG);
1942 alloc_all(current,i);
e1190b87 1943 minimum_free_regs[i]=HOST_REGS;
57871462 1944 current->isconst=0;
1945}
1946
1947void delayslot_alloc(struct regstat *current,int i)
1948{
1949 switch(itype[i]) {
1950 case UJUMP:
1951 case CJUMP:
1952 case SJUMP:
1953 case RJUMP:
57871462 1954 case SYSCALL:
7139f3c8 1955 case HLECALL:
57871462 1956 case SPAN:
7c3a5182 1957 assem_debug("jump in the delay slot. this shouldn't happen.\n");//abort();
c43b5311 1958 SysPrintf("Disabled speculative precompilation\n");
57871462 1959 stop_after_jal=1;
1960 break;
1961 case IMM16:
1962 imm16_alloc(current,i);
1963 break;
1964 case LOAD:
1965 case LOADLR:
1966 load_alloc(current,i);
1967 break;
1968 case STORE:
1969 case STORELR:
1970 store_alloc(current,i);
1971 break;
1972 case ALU:
1973 alu_alloc(current,i);
1974 break;
1975 case SHIFT:
1976 shift_alloc(current,i);
1977 break;
1978 case MULTDIV:
1979 multdiv_alloc(current,i);
1980 break;
1981 case SHIFTIMM:
1982 shiftimm_alloc(current,i);
1983 break;
1984 case MOV:
1985 mov_alloc(current,i);
1986 break;
1987 case COP0:
1988 cop0_alloc(current,i);
1989 break;
1990 case COP1:
b9b61529 1991 case COP2:
00fa9369 1992 cop12_alloc(current,i);
57871462 1993 break;
1994 case C1LS:
1995 c1ls_alloc(current,i);
1996 break;
b9b61529 1997 case C2LS:
1998 c2ls_alloc(current,i);
1999 break;
b9b61529 2000 case C2OP:
2001 c2op_alloc(current,i);
2002 break;
57871462 2003 }
2004}
2005
2006// Special case where a branch and delay slot span two pages in virtual memory
2007static void pagespan_alloc(struct regstat *current,int i)
2008{
2009 current->isconst=0;
2010 current->wasconst=0;
2011 regs[i].wasconst=0;
e1190b87 2012 minimum_free_regs[i]=HOST_REGS;
57871462 2013 alloc_all(current,i);
2014 alloc_cc(current,i);
2015 dirty_reg(current,CCREG);
2016 if(opcode[i]==3) // JAL
2017 {
2018 alloc_reg(current,i,31);
2019 dirty_reg(current,31);
2020 }
2021 if(opcode[i]==0&&(opcode2[i]&0x3E)==8) // JR/JALR
2022 {
2023 alloc_reg(current,i,rs1[i]);
5067f341 2024 if (rt1[i]!=0) {
2025 alloc_reg(current,i,rt1[i]);
2026 dirty_reg(current,rt1[i]);
57871462 2027 }
2028 }
2029 if((opcode[i]&0x2E)==4) // BEQ/BNE/BEQL/BNEL
2030 {
2031 if(rs1[i]) alloc_reg(current,i,rs1[i]);
2032 if(rs2[i]) alloc_reg(current,i,rs2[i]);
57871462 2033 }
2034 else
2035 if((opcode[i]&0x2E)==6) // BLEZ/BGTZ/BLEZL/BGTZL
2036 {
2037 if(rs1[i]) alloc_reg(current,i,rs1[i]);
57871462 2038 }
57871462 2039 //else ...
2040}
2041
b14b6a8f 2042static void add_stub(enum stub_type type, void *addr, void *retaddr,
2043 u_int a, uintptr_t b, uintptr_t c, u_int d, u_int e)
2044{
d1e4ebd9 2045 assert(stubcount < ARRAY_SIZE(stubs));
b14b6a8f 2046 stubs[stubcount].type = type;
2047 stubs[stubcount].addr = addr;
2048 stubs[stubcount].retaddr = retaddr;
2049 stubs[stubcount].a = a;
2050 stubs[stubcount].b = b;
2051 stubs[stubcount].c = c;
2052 stubs[stubcount].d = d;
2053 stubs[stubcount].e = e;
57871462 2054 stubcount++;
2055}
2056
b14b6a8f 2057static void add_stub_r(enum stub_type type, void *addr, void *retaddr,
2058 int i, int addr_reg, struct regstat *i_regs, int ccadj, u_int reglist)
2059{
2060 add_stub(type, addr, retaddr, i, addr_reg, (uintptr_t)i_regs, ccadj, reglist);
2061}
2062
57871462 2063// Write out a single register
ad49de89 2064static void wb_register(signed char r,signed char regmap[],uint64_t dirty)
57871462 2065{
2066 int hr;
2067 for(hr=0;hr<HOST_REGS;hr++) {
2068 if(hr!=EXCLUDE_REG) {
2069 if((regmap[hr]&63)==r) {
2070 if((dirty>>hr)&1) {
ad49de89 2071 assert(regmap[hr]<64);
2072 emit_storereg(r,hr);
57871462 2073 }
2074 }
2075 }
2076 }
2077}
2078
8062d65a 2079static void wb_valid(signed char pre[],signed char entry[],u_int dirty_pre,u_int dirty,uint64_t u)
2080{
2081 //if(dirty_pre==dirty) return;
2082 int hr,reg;
2083 for(hr=0;hr<HOST_REGS;hr++) {
2084 if(hr!=EXCLUDE_REG) {
2085 reg=pre[hr];
2086 if(((~u)>>(reg&63))&1) {
2087 if(reg>0) {
2088 if(((dirty_pre&~dirty)>>hr)&1) {
2089 if(reg>0&&reg<34) {
2090 emit_storereg(reg,hr);
2091 }
2092 else if(reg>=64) {
2093 assert(0);
2094 }
2095 }
2096 }
2097 }
2098 }
2099 }
2100}
2101
687b4580 2102// trashes r2
2103static void pass_args(int a0, int a1)
2104{
2105 if(a0==1&&a1==0) {
2106 // must swap
2107 emit_mov(a0,2); emit_mov(a1,1); emit_mov(2,0);
2108 }
2109 else if(a0!=0&&a1==0) {
2110 emit_mov(a1,1);
2111 if (a0>=0) emit_mov(a0,0);
2112 }
2113 else {
2114 if(a0>=0&&a0!=0) emit_mov(a0,0);
2115 if(a1>=0&&a1!=1) emit_mov(a1,1);
2116 }
2117}
2118
2119static void alu_assemble(int i,struct regstat *i_regs)
57871462 2120{
2121 if(opcode2[i]>=0x20&&opcode2[i]<=0x23) { // ADD/ADDU/SUB/SUBU
2122 if(rt1[i]) {
2123 signed char s1,s2,t;
2124 t=get_reg(i_regs->regmap,rt1[i]);
2125 if(t>=0) {
2126 s1=get_reg(i_regs->regmap,rs1[i]);
2127 s2=get_reg(i_regs->regmap,rs2[i]);
2128 if(rs1[i]&&rs2[i]) {
2129 assert(s1>=0);
2130 assert(s2>=0);
2131 if(opcode2[i]&2) emit_sub(s1,s2,t);
2132 else emit_add(s1,s2,t);
2133 }
2134 else if(rs1[i]) {
2135 if(s1>=0) emit_mov(s1,t);
2136 else emit_loadreg(rs1[i],t);
2137 }
2138 else if(rs2[i]) {
2139 if(s2>=0) {
2140 if(opcode2[i]&2) emit_neg(s2,t);
2141 else emit_mov(s2,t);
2142 }
2143 else {
2144 emit_loadreg(rs2[i],t);
2145 if(opcode2[i]&2) emit_neg(t,t);
2146 }
2147 }
2148 else emit_zeroreg(t);
2149 }
2150 }
2151 }
2152 if(opcode2[i]>=0x2c&&opcode2[i]<=0x2f) { // DADD/DADDU/DSUB/DSUBU
00fa9369 2153 assert(0);
57871462 2154 }
2155 if(opcode2[i]==0x2a||opcode2[i]==0x2b) { // SLT/SLTU
2156 if(rt1[i]) {
ad49de89 2157 signed char s1l,s2l,t;
57871462 2158 {
57871462 2159 t=get_reg(i_regs->regmap,rt1[i]);
2160 //assert(t>=0);
2161 if(t>=0) {
2162 s1l=get_reg(i_regs->regmap,rs1[i]);
2163 s2l=get_reg(i_regs->regmap,rs2[i]);
2164 if(rs2[i]==0) // rx<r0
2165 {
06e425d7 2166 if(opcode2[i]==0x2a&&rs1[i]!=0) { // SLT
2167 assert(s1l>=0);
57871462 2168 emit_shrimm(s1l,31,t);
06e425d7 2169 }
2170 else // SLTU (unsigned can not be less than zero, 0<0)
57871462 2171 emit_zeroreg(t);
2172 }
2173 else if(rs1[i]==0) // r0<rx
2174 {
2175 assert(s2l>=0);
2176 if(opcode2[i]==0x2a) // SLT
2177 emit_set_gz32(s2l,t);
2178 else // SLTU (set if not zero)
2179 emit_set_nz32(s2l,t);
2180 }
2181 else{
2182 assert(s1l>=0);assert(s2l>=0);
2183 if(opcode2[i]==0x2a) // SLT
2184 emit_set_if_less32(s1l,s2l,t);
2185 else // SLTU
2186 emit_set_if_carry32(s1l,s2l,t);
2187 }
2188 }
2189 }
2190 }
2191 }
2192 if(opcode2[i]>=0x24&&opcode2[i]<=0x27) { // AND/OR/XOR/NOR
2193 if(rt1[i]) {
ad49de89 2194 signed char s1l,s2l,tl;
57871462 2195 tl=get_reg(i_regs->regmap,rt1[i]);
57871462 2196 {
57871462 2197 if(tl>=0) {
2198 s1l=get_reg(i_regs->regmap,rs1[i]);
2199 s2l=get_reg(i_regs->regmap,rs2[i]);
2200 if(rs1[i]&&rs2[i]) {
2201 assert(s1l>=0);
2202 assert(s2l>=0);
2203 if(opcode2[i]==0x24) { // AND
2204 emit_and(s1l,s2l,tl);
2205 } else
2206 if(opcode2[i]==0x25) { // OR
2207 emit_or(s1l,s2l,tl);
2208 } else
2209 if(opcode2[i]==0x26) { // XOR
2210 emit_xor(s1l,s2l,tl);
2211 } else
2212 if(opcode2[i]==0x27) { // NOR
2213 emit_or(s1l,s2l,tl);
2214 emit_not(tl,tl);
2215 }
2216 }
2217 else
2218 {
2219 if(opcode2[i]==0x24) { // AND
2220 emit_zeroreg(tl);
2221 } else
2222 if(opcode2[i]==0x25||opcode2[i]==0x26) { // OR/XOR
2223 if(rs1[i]){
2224 if(s1l>=0) emit_mov(s1l,tl);
2225 else emit_loadreg(rs1[i],tl); // CHECK: regmap_entry?
2226 }
2227 else
2228 if(rs2[i]){
2229 if(s2l>=0) emit_mov(s2l,tl);
2230 else emit_loadreg(rs2[i],tl); // CHECK: regmap_entry?
2231 }
2232 else emit_zeroreg(tl);
2233 } else
2234 if(opcode2[i]==0x27) { // NOR
2235 if(rs1[i]){
2236 if(s1l>=0) emit_not(s1l,tl);
2237 else {
2238 emit_loadreg(rs1[i],tl);
2239 emit_not(tl,tl);
2240 }
2241 }
2242 else
2243 if(rs2[i]){
2244 if(s2l>=0) emit_not(s2l,tl);
2245 else {
2246 emit_loadreg(rs2[i],tl);
2247 emit_not(tl,tl);
2248 }
2249 }
2250 else emit_movimm(-1,tl);
2251 }
2252 }
2253 }
2254 }
2255 }
2256 }
2257}
2258
2259void imm16_assemble(int i,struct regstat *i_regs)
2260{
2261 if (opcode[i]==0x0f) { // LUI
2262 if(rt1[i]) {
2263 signed char t;
2264 t=get_reg(i_regs->regmap,rt1[i]);
2265 //assert(t>=0);
2266 if(t>=0) {
2267 if(!((i_regs->isconst>>t)&1))
2268 emit_movimm(imm[i]<<16,t);
2269 }
2270 }
2271 }
2272 if(opcode[i]==0x08||opcode[i]==0x09) { // ADDI/ADDIU
2273 if(rt1[i]) {
2274 signed char s,t;
2275 t=get_reg(i_regs->regmap,rt1[i]);
2276 s=get_reg(i_regs->regmap,rs1[i]);
2277 if(rs1[i]) {
2278 //assert(t>=0);
2279 //assert(s>=0);
2280 if(t>=0) {
2281 if(!((i_regs->isconst>>t)&1)) {
2282 if(s<0) {
2283 if(i_regs->regmap_entry[t]!=rs1[i]) emit_loadreg(rs1[i],t);
2284 emit_addimm(t,imm[i],t);
2285 }else{
2286 if(!((i_regs->wasconst>>s)&1))
2287 emit_addimm(s,imm[i],t);
2288 else
2289 emit_movimm(constmap[i][s]+imm[i],t);
2290 }
2291 }
2292 }
2293 } else {
2294 if(t>=0) {
2295 if(!((i_regs->isconst>>t)&1))
2296 emit_movimm(imm[i],t);
2297 }
2298 }
2299 }
2300 }
2301 if(opcode[i]==0x18||opcode[i]==0x19) { // DADDI/DADDIU
2302 if(rt1[i]) {
7c3a5182 2303 signed char sl,tl;
57871462 2304 tl=get_reg(i_regs->regmap,rt1[i]);
57871462 2305 sl=get_reg(i_regs->regmap,rs1[i]);
2306 if(tl>=0) {
2307 if(rs1[i]) {
57871462 2308 assert(sl>=0);
7c3a5182 2309 emit_addimm(sl,imm[i],tl);
57871462 2310 } else {
2311 emit_movimm(imm[i],tl);
57871462 2312 }
2313 }
2314 }
2315 }
2316 else if(opcode[i]==0x0a||opcode[i]==0x0b) { // SLTI/SLTIU
2317 if(rt1[i]) {
2318 //assert(rs1[i]!=0); // r0 might be valid, but it's probably a bug
ad49de89 2319 signed char sl,t;
57871462 2320 t=get_reg(i_regs->regmap,rt1[i]);
57871462 2321 sl=get_reg(i_regs->regmap,rs1[i]);
2322 //assert(t>=0);
2323 if(t>=0) {
2324 if(rs1[i]>0) {
57871462 2325 if(opcode[i]==0x0a) { // SLTI
2326 if(sl<0) {
2327 if(i_regs->regmap_entry[t]!=rs1[i]) emit_loadreg(rs1[i],t);
2328 emit_slti32(t,imm[i],t);
2329 }else{
2330 emit_slti32(sl,imm[i],t);
2331 }
2332 }
2333 else { // SLTIU
2334 if(sl<0) {
2335 if(i_regs->regmap_entry[t]!=rs1[i]) emit_loadreg(rs1[i],t);
2336 emit_sltiu32(t,imm[i],t);
2337 }else{
2338 emit_sltiu32(sl,imm[i],t);
2339 }
2340 }
57871462 2341 }else{
2342 // SLTI(U) with r0 is just stupid,
2343 // nonetheless examples can be found
2344 if(opcode[i]==0x0a) // SLTI
2345 if(0<imm[i]) emit_movimm(1,t);
2346 else emit_zeroreg(t);
2347 else // SLTIU
2348 {
2349 if(imm[i]) emit_movimm(1,t);
2350 else emit_zeroreg(t);
2351 }
2352 }
2353 }
2354 }
2355 }
2356 else if(opcode[i]>=0x0c&&opcode[i]<=0x0e) { // ANDI/ORI/XORI
2357 if(rt1[i]) {
7c3a5182 2358 signed char sl,tl;
57871462 2359 tl=get_reg(i_regs->regmap,rt1[i]);
57871462 2360 sl=get_reg(i_regs->regmap,rs1[i]);
2361 if(tl>=0 && !((i_regs->isconst>>tl)&1)) {
2362 if(opcode[i]==0x0c) //ANDI
2363 {
2364 if(rs1[i]) {
2365 if(sl<0) {
2366 if(i_regs->regmap_entry[tl]!=rs1[i]) emit_loadreg(rs1[i],tl);
2367 emit_andimm(tl,imm[i],tl);
2368 }else{
2369 if(!((i_regs->wasconst>>sl)&1))
2370 emit_andimm(sl,imm[i],tl);
2371 else
2372 emit_movimm(constmap[i][sl]&imm[i],tl);
2373 }
2374 }
2375 else
2376 emit_zeroreg(tl);
57871462 2377 }
2378 else
2379 {
2380 if(rs1[i]) {
2381 if(sl<0) {
2382 if(i_regs->regmap_entry[tl]!=rs1[i]) emit_loadreg(rs1[i],tl);
2383 }
581335b0 2384 if(opcode[i]==0x0d) { // ORI
2385 if(sl<0) {
2386 emit_orimm(tl,imm[i],tl);
2387 }else{
2388 if(!((i_regs->wasconst>>sl)&1))
2389 emit_orimm(sl,imm[i],tl);
2390 else
2391 emit_movimm(constmap[i][sl]|imm[i],tl);
2392 }
57871462 2393 }
581335b0 2394 if(opcode[i]==0x0e) { // XORI
2395 if(sl<0) {
2396 emit_xorimm(tl,imm[i],tl);
2397 }else{
2398 if(!((i_regs->wasconst>>sl)&1))
2399 emit_xorimm(sl,imm[i],tl);
2400 else
2401 emit_movimm(constmap[i][sl]^imm[i],tl);
2402 }
57871462 2403 }
2404 }
2405 else {
2406 emit_movimm(imm[i],tl);
57871462 2407 }
2408 }
2409 }
2410 }
2411 }
2412}
2413
2414void shiftimm_assemble(int i,struct regstat *i_regs)
2415{
2416 if(opcode2[i]<=0x3) // SLL/SRL/SRA
2417 {
2418 if(rt1[i]) {
2419 signed char s,t;
2420 t=get_reg(i_regs->regmap,rt1[i]);
2421 s=get_reg(i_regs->regmap,rs1[i]);
2422 //assert(t>=0);
dc49e339 2423 if(t>=0&&!((i_regs->isconst>>t)&1)){
57871462 2424 if(rs1[i]==0)
2425 {
2426 emit_zeroreg(t);
2427 }
2428 else
2429 {
2430 if(s<0&&i_regs->regmap_entry[t]!=rs1[i]) emit_loadreg(rs1[i],t);
2431 if(imm[i]) {
2432 if(opcode2[i]==0) // SLL
2433 {
2434 emit_shlimm(s<0?t:s,imm[i],t);
2435 }
2436 if(opcode2[i]==2) // SRL
2437 {
2438 emit_shrimm(s<0?t:s,imm[i],t);
2439 }
2440 if(opcode2[i]==3) // SRA
2441 {
2442 emit_sarimm(s<0?t:s,imm[i],t);
2443 }
2444 }else{
2445 // Shift by zero
2446 if(s>=0 && s!=t) emit_mov(s,t);
2447 }
2448 }
2449 }
2450 //emit_storereg(rt1[i],t); //DEBUG
2451 }
2452 }
2453 if(opcode2[i]>=0x38&&opcode2[i]<=0x3b) // DSLL/DSRL/DSRA
2454 {
9c45ca93 2455 assert(0);
57871462 2456 }
2457 if(opcode2[i]==0x3c) // DSLL32
2458 {
9c45ca93 2459 assert(0);
57871462 2460 }
2461 if(opcode2[i]==0x3e) // DSRL32
2462 {
9c45ca93 2463 assert(0);
57871462 2464 }
2465 if(opcode2[i]==0x3f) // DSRA32
2466 {
9c45ca93 2467 assert(0);
57871462 2468 }
2469}
2470
2471#ifndef shift_assemble
3968e69e 2472static void shift_assemble(int i,struct regstat *i_regs)
57871462 2473{
3968e69e 2474 signed char s,t,shift;
2475 if (rt1[i] == 0)
2476 return;
2477 assert(opcode2[i]<=0x07); // SLLV/SRLV/SRAV
2478 t = get_reg(i_regs->regmap, rt1[i]);
2479 s = get_reg(i_regs->regmap, rs1[i]);
2480 shift = get_reg(i_regs->regmap, rs2[i]);
2481 if (t < 0)
2482 return;
2483
2484 if(rs1[i]==0)
2485 emit_zeroreg(t);
2486 else if(rs2[i]==0) {
2487 assert(s>=0);
2488 if(s!=t) emit_mov(s,t);
2489 }
2490 else {
2491 host_tempreg_acquire();
2492 emit_andimm(shift,31,HOST_TEMPREG);
2493 switch(opcode2[i]) {
2494 case 4: // SLLV
2495 emit_shl(s,HOST_TEMPREG,t);
2496 break;
2497 case 6: // SRLV
2498 emit_shr(s,HOST_TEMPREG,t);
2499 break;
2500 case 7: // SRAV
2501 emit_sar(s,HOST_TEMPREG,t);
2502 break;
2503 default:
2504 assert(0);
2505 }
2506 host_tempreg_release();
2507 }
57871462 2508}
3968e69e 2509
57871462 2510#endif
2511
8062d65a 2512enum {
2513 MTYPE_8000 = 0,
2514 MTYPE_8020,
2515 MTYPE_0000,
2516 MTYPE_A000,
2517 MTYPE_1F80,
2518};
2519
2520static int get_ptr_mem_type(u_int a)
2521{
2522 if(a < 0x00200000) {
2523 if(a<0x1000&&((start>>20)==0xbfc||(start>>24)==0xa0))
2524 // return wrong, must use memhandler for BIOS self-test to pass
2525 // 007 does similar stuff from a00 mirror, weird stuff
2526 return MTYPE_8000;
2527 return MTYPE_0000;
2528 }
2529 if(0x1f800000 <= a && a < 0x1f801000)
2530 return MTYPE_1F80;
2531 if(0x80200000 <= a && a < 0x80800000)
2532 return MTYPE_8020;
2533 if(0xa0000000 <= a && a < 0xa0200000)
2534 return MTYPE_A000;
2535 return MTYPE_8000;
2536}
2537
2538static void *emit_fastpath_cmp_jump(int i,int addr,int *addr_reg_override)
2539{
2540 void *jaddr = NULL;
2541 int type=0;
2542 int mr=rs1[i];
2543 if(((smrv_strong|smrv_weak)>>mr)&1) {
2544 type=get_ptr_mem_type(smrv[mr]);
2545 //printf("set %08x @%08x r%d %d\n", smrv[mr], start+i*4, mr, type);
2546 }
2547 else {
2548 // use the mirror we are running on
2549 type=get_ptr_mem_type(start);
2550 //printf("set nospec @%08x r%d %d\n", start+i*4, mr, type);
2551 }
2552
2553 if(type==MTYPE_8020) { // RAM 80200000+ mirror
d1e4ebd9 2554 host_tempreg_acquire();
8062d65a 2555 emit_andimm(addr,~0x00e00000,HOST_TEMPREG);
2556 addr=*addr_reg_override=HOST_TEMPREG;
2557 type=0;
2558 }
2559 else if(type==MTYPE_0000) { // RAM 0 mirror
d1e4ebd9 2560 host_tempreg_acquire();
8062d65a 2561 emit_orimm(addr,0x80000000,HOST_TEMPREG);
2562 addr=*addr_reg_override=HOST_TEMPREG;
2563 type=0;
2564 }
2565 else if(type==MTYPE_A000) { // RAM A mirror
d1e4ebd9 2566 host_tempreg_acquire();
8062d65a 2567 emit_andimm(addr,~0x20000000,HOST_TEMPREG);
2568 addr=*addr_reg_override=HOST_TEMPREG;
2569 type=0;
2570 }
2571 else if(type==MTYPE_1F80) { // scratchpad
2572 if (psxH == (void *)0x1f800000) {
d1e4ebd9 2573 host_tempreg_acquire();
3968e69e 2574 emit_xorimm(addr,0x1f800000,HOST_TEMPREG);
8062d65a 2575 emit_cmpimm(HOST_TEMPREG,0x1000);
d1e4ebd9 2576 host_tempreg_release();
8062d65a 2577 jaddr=out;
2578 emit_jc(0);
2579 }
2580 else {
2581 // do the usual RAM check, jump will go to the right handler
2582 type=0;
2583 }
2584 }
2585
2586 if(type==0)
2587 {
2588 emit_cmpimm(addr,RAM_SIZE);
2589 jaddr=out;
2590 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
2591 // Hint to branch predictor that the branch is unlikely to be taken
2592 if(rs1[i]>=28)
2593 emit_jno_unlikely(0);
2594 else
2595 #endif
2596 emit_jno(0);
2597 if(ram_offset!=0) {
d1e4ebd9 2598 host_tempreg_acquire();
8062d65a 2599 emit_addimm(addr,ram_offset,HOST_TEMPREG);
2600 addr=*addr_reg_override=HOST_TEMPREG;
2601 }
2602 }
2603
2604 return jaddr;
2605}
2606
687b4580 2607// return memhandler, or get directly accessable address and return 0
2608static void *get_direct_memhandler(void *table, u_int addr,
2609 enum stub_type type, uintptr_t *addr_host)
2610{
2611 uintptr_t l1, l2 = 0;
2612 l1 = ((uintptr_t *)table)[addr>>12];
2613 if ((l1 & (1ul << (sizeof(l1)*8-1))) == 0) {
2614 uintptr_t v = l1 << 1;
2615 *addr_host = v + addr;
2616 return NULL;
2617 }
2618 else {
2619 l1 <<= 1;
2620 if (type == LOADB_STUB || type == LOADBU_STUB || type == STOREB_STUB)
2621 l2 = ((uintptr_t *)l1)[0x1000/4 + 0x1000/2 + (addr&0xfff)];
2622 else if (type == LOADH_STUB || type == LOADHU_STUB || type == STOREH_STUB)
2623 l2=((uintptr_t *)l1)[0x1000/4 + (addr&0xfff)/2];
2624 else
2625 l2=((uintptr_t *)l1)[(addr&0xfff)/4];
2626 if ((l2 & (1<<31)) == 0) {
2627 uintptr_t v = l2 << 1;
2628 *addr_host = v + (addr&0xfff);
2629 return NULL;
2630 }
2631 return (void *)(l2 << 1);
2632 }
2633}
2634
8062d65a 2635static void load_assemble(int i,struct regstat *i_regs)
57871462 2636{
7c3a5182 2637 int s,tl,addr;
57871462 2638 int offset;
b14b6a8f 2639 void *jaddr=0;
5bf843dc 2640 int memtarget=0,c=0;
d1e4ebd9 2641 int fastio_reg_override=-1;
57871462 2642 u_int hr,reglist=0;
57871462 2643 tl=get_reg(i_regs->regmap,rt1[i]);
2644 s=get_reg(i_regs->regmap,rs1[i]);
2645 offset=imm[i];
2646 for(hr=0;hr<HOST_REGS;hr++) {
2647 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
2648 }
2649 if(i_regs->regmap[HOST_CCREG]==CCREG) reglist&=~(1<<HOST_CCREG);
2650 if(s>=0) {
2651 c=(i_regs->wasconst>>s)&1;
af4ee1fe 2652 if (c) {
2653 memtarget=((signed int)(constmap[i][s]+offset))<(signed int)0x80000000+RAM_SIZE;
af4ee1fe 2654 }
57871462 2655 }
57871462 2656 //printf("load_assemble: c=%d\n",c);
643aeae3 2657 //if(c) printf("load_assemble: const=%lx\n",(long)constmap[i][s]+offset);
57871462 2658 // FIXME: Even if the load is a NOP, we should check for pagefaults...
581335b0 2659 if((tl<0&&(!c||(((u_int)constmap[i][s]+offset)>>16)==0x1f80))
f18c0f46 2660 ||rt1[i]==0) {
5bf843dc 2661 // could be FIFO, must perform the read
f18c0f46 2662 // ||dummy read
5bf843dc 2663 assem_debug("(forced read)\n");
2664 tl=get_reg(i_regs->regmap,-1);
2665 assert(tl>=0);
5bf843dc 2666 }
2667 if(offset||s<0||c) addr=tl;
2668 else addr=s;
535d208a 2669 //if(tl<0) tl=get_reg(i_regs->regmap,-1);
2670 if(tl>=0) {
2671 //printf("load_assemble: c=%d\n",c);
643aeae3 2672 //if(c) printf("load_assemble: const=%lx\n",(long)constmap[i][s]+offset);
535d208a 2673 assert(tl>=0); // Even if the load is a NOP, we must check for pagefaults and I/O
2674 reglist&=~(1<<tl);
1edfcc68 2675 if(!c) {
1edfcc68 2676 #ifdef R29_HACK
2677 // Strmnnrmn's speed hack
2678 if(rs1[i]!=29||start<0x80001000||start>=0x80000000+RAM_SIZE)
2679 #endif
2680 {
d1e4ebd9 2681 jaddr=emit_fastpath_cmp_jump(i,addr,&fastio_reg_override);
535d208a 2682 }
1edfcc68 2683 }
2684 else if(ram_offset&&memtarget) {
d1e4ebd9 2685 host_tempreg_acquire();
1edfcc68 2686 emit_addimm(addr,ram_offset,HOST_TEMPREG);
d1e4ebd9 2687 fastio_reg_override=HOST_TEMPREG;
535d208a 2688 }
2689 int dummy=(rt1[i]==0)||(tl!=get_reg(i_regs->regmap,rt1[i])); // ignore loads to r0 and unneeded reg
2690 if (opcode[i]==0x20) { // LB
2691 if(!c||memtarget) {
2692 if(!dummy) {
57871462 2693 {
535d208a 2694 int x=0,a=tl;
535d208a 2695 if(!c) a=addr;
d1e4ebd9 2696 if(fastio_reg_override>=0) a=fastio_reg_override;
b1570849 2697
9c45ca93 2698 emit_movsbl_indexed(x,a,tl);
57871462 2699 }
57871462 2700 }
535d208a 2701 if(jaddr)
b14b6a8f 2702 add_stub_r(LOADB_STUB,jaddr,out,i,addr,i_regs,ccadj[i],reglist);
57871462 2703 }
535d208a 2704 else
2705 inline_readstub(LOADB_STUB,i,constmap[i][s]+offset,i_regs->regmap,rt1[i],ccadj[i],reglist);
2706 }
2707 if (opcode[i]==0x21) { // LH
2708 if(!c||memtarget) {
2709 if(!dummy) {
9c45ca93 2710 int x=0,a=tl;
2711 if(!c) a=addr;
d1e4ebd9 2712 if(fastio_reg_override>=0) a=fastio_reg_override;
9c45ca93 2713 emit_movswl_indexed(x,a,tl);
57871462 2714 }
535d208a 2715 if(jaddr)
b14b6a8f 2716 add_stub_r(LOADH_STUB,jaddr,out,i,addr,i_regs,ccadj[i],reglist);
57871462 2717 }
535d208a 2718 else
2719 inline_readstub(LOADH_STUB,i,constmap[i][s]+offset,i_regs->regmap,rt1[i],ccadj[i],reglist);
2720 }
2721 if (opcode[i]==0x23) { // LW
2722 if(!c||memtarget) {
2723 if(!dummy) {
dadf55f2 2724 int a=addr;
d1e4ebd9 2725 if(fastio_reg_override>=0) a=fastio_reg_override;
9c45ca93 2726 emit_readword_indexed(0,a,tl);
57871462 2727 }
535d208a 2728 if(jaddr)
b14b6a8f 2729 add_stub_r(LOADW_STUB,jaddr,out,i,addr,i_regs,ccadj[i],reglist);
57871462 2730 }
535d208a 2731 else
2732 inline_readstub(LOADW_STUB,i,constmap[i][s]+offset,i_regs->regmap,rt1[i],ccadj[i],reglist);
2733 }
2734 if (opcode[i]==0x24) { // LBU
2735 if(!c||memtarget) {
2736 if(!dummy) {
9c45ca93 2737 int x=0,a=tl;
2738 if(!c) a=addr;
d1e4ebd9 2739 if(fastio_reg_override>=0) a=fastio_reg_override;
b1570849 2740
9c45ca93 2741 emit_movzbl_indexed(x,a,tl);
57871462 2742 }
535d208a 2743 if(jaddr)
b14b6a8f 2744 add_stub_r(LOADBU_STUB,jaddr,out,i,addr,i_regs,ccadj[i],reglist);
57871462 2745 }
535d208a 2746 else
2747 inline_readstub(LOADBU_STUB,i,constmap[i][s]+offset,i_regs->regmap,rt1[i],ccadj[i],reglist);
2748 }
2749 if (opcode[i]==0x25) { // LHU
2750 if(!c||memtarget) {
2751 if(!dummy) {
9c45ca93 2752 int x=0,a=tl;
2753 if(!c) a=addr;
d1e4ebd9 2754 if(fastio_reg_override>=0) a=fastio_reg_override;
9c45ca93 2755 emit_movzwl_indexed(x,a,tl);
57871462 2756 }
535d208a 2757 if(jaddr)
b14b6a8f 2758 add_stub_r(LOADHU_STUB,jaddr,out,i,addr,i_regs,ccadj[i],reglist);
57871462 2759 }
535d208a 2760 else
2761 inline_readstub(LOADHU_STUB,i,constmap[i][s]+offset,i_regs->regmap,rt1[i],ccadj[i],reglist);
2762 }
2763 if (opcode[i]==0x27) { // LWU
7c3a5182 2764 assert(0);
535d208a 2765 }
2766 if (opcode[i]==0x37) { // LD
9c45ca93 2767 assert(0);
57871462 2768 }
535d208a 2769 }
d1e4ebd9 2770 if (fastio_reg_override == HOST_TEMPREG)
2771 host_tempreg_release();
57871462 2772}
2773
2774#ifndef loadlr_assemble
3968e69e 2775static void loadlr_assemble(int i,struct regstat *i_regs)
57871462 2776{
3968e69e 2777 int s,tl,temp,temp2,addr;
2778 int offset;
2779 void *jaddr=0;
2780 int memtarget=0,c=0;
2781 int fastio_reg_override=-1;
2782 u_int hr,reglist=0;
2783 tl=get_reg(i_regs->regmap,rt1[i]);
2784 s=get_reg(i_regs->regmap,rs1[i]);
2785 temp=get_reg(i_regs->regmap,-1);
2786 temp2=get_reg(i_regs->regmap,FTEMP);
2787 addr=get_reg(i_regs->regmap,AGEN1+(i&1));
2788 assert(addr<0);
2789 offset=imm[i];
2790 for(hr=0;hr<HOST_REGS;hr++) {
2791 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
2792 }
2793 reglist|=1<<temp;
2794 if(offset||s<0||c) addr=temp2;
2795 else addr=s;
2796 if(s>=0) {
2797 c=(i_regs->wasconst>>s)&1;
2798 if(c) {
2799 memtarget=((signed int)(constmap[i][s]+offset))<(signed int)0x80000000+RAM_SIZE;
2800 }
2801 }
2802 if(!c) {
2803 emit_shlimm(addr,3,temp);
2804 if (opcode[i]==0x22||opcode[i]==0x26) {
2805 emit_andimm(addr,0xFFFFFFFC,temp2); // LWL/LWR
2806 }else{
2807 emit_andimm(addr,0xFFFFFFF8,temp2); // LDL/LDR
2808 }
2809 jaddr=emit_fastpath_cmp_jump(i,temp2,&fastio_reg_override);
2810 }
2811 else {
2812 if(ram_offset&&memtarget) {
2813 host_tempreg_acquire();
2814 emit_addimm(temp2,ram_offset,HOST_TEMPREG);
2815 fastio_reg_override=HOST_TEMPREG;
2816 }
2817 if (opcode[i]==0x22||opcode[i]==0x26) {
2818 emit_movimm(((constmap[i][s]+offset)<<3)&24,temp); // LWL/LWR
2819 }else{
2820 emit_movimm(((constmap[i][s]+offset)<<3)&56,temp); // LDL/LDR
2821 }
2822 }
2823 if (opcode[i]==0x22||opcode[i]==0x26) { // LWL/LWR
2824 if(!c||memtarget) {
2825 int a=temp2;
2826 if(fastio_reg_override>=0) a=fastio_reg_override;
2827 emit_readword_indexed(0,a,temp2);
2828 if(fastio_reg_override==HOST_TEMPREG) host_tempreg_release();
2829 if(jaddr) add_stub_r(LOADW_STUB,jaddr,out,i,temp2,i_regs,ccadj[i],reglist);
2830 }
2831 else
2832 inline_readstub(LOADW_STUB,i,(constmap[i][s]+offset)&0xFFFFFFFC,i_regs->regmap,FTEMP,ccadj[i],reglist);
2833 if(rt1[i]) {
2834 assert(tl>=0);
2835 emit_andimm(temp,24,temp);
2836 if (opcode[i]==0x22) // LWL
2837 emit_xorimm(temp,24,temp);
2838 host_tempreg_acquire();
2839 emit_movimm(-1,HOST_TEMPREG);
2840 if (opcode[i]==0x26) {
2841 emit_shr(temp2,temp,temp2);
2842 emit_bic_lsr(tl,HOST_TEMPREG,temp,tl);
2843 }else{
2844 emit_shl(temp2,temp,temp2);
2845 emit_bic_lsl(tl,HOST_TEMPREG,temp,tl);
2846 }
2847 host_tempreg_release();
2848 emit_or(temp2,tl,tl);
2849 }
2850 //emit_storereg(rt1[i],tl); // DEBUG
2851 }
2852 if (opcode[i]==0x1A||opcode[i]==0x1B) { // LDL/LDR
2853 assert(0);
2854 }
57871462 2855}
2856#endif
2857
2858void store_assemble(int i,struct regstat *i_regs)
2859{
9c45ca93 2860 int s,tl;
57871462 2861 int addr,temp;
2862 int offset;
b14b6a8f 2863 void *jaddr=0;
2864 enum stub_type type;
666a299d 2865 int memtarget=0,c=0;
57871462 2866 int agr=AGEN1+(i&1);
d1e4ebd9 2867 int fastio_reg_override=-1;
57871462 2868 u_int hr,reglist=0;
57871462 2869 tl=get_reg(i_regs->regmap,rs2[i]);
2870 s=get_reg(i_regs->regmap,rs1[i]);
2871 temp=get_reg(i_regs->regmap,agr);
2872 if(temp<0) temp=get_reg(i_regs->regmap,-1);
2873 offset=imm[i];
2874 if(s>=0) {
2875 c=(i_regs->wasconst>>s)&1;
af4ee1fe 2876 if(c) {
2877 memtarget=((signed int)(constmap[i][s]+offset))<(signed int)0x80000000+RAM_SIZE;
af4ee1fe 2878 }
57871462 2879 }
2880 assert(tl>=0);
2881 assert(temp>=0);
2882 for(hr=0;hr<HOST_REGS;hr++) {
2883 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
2884 }
2885 if(i_regs->regmap[HOST_CCREG]==CCREG) reglist&=~(1<<HOST_CCREG);
2886 if(offset||s<0||c) addr=temp;
2887 else addr=s;
1edfcc68 2888 if(!c) {
d1e4ebd9 2889 jaddr=emit_fastpath_cmp_jump(i,addr,&fastio_reg_override);
1edfcc68 2890 }
2891 else if(ram_offset&&memtarget) {
d1e4ebd9 2892 host_tempreg_acquire();
1edfcc68 2893 emit_addimm(addr,ram_offset,HOST_TEMPREG);
d1e4ebd9 2894 fastio_reg_override=HOST_TEMPREG;
57871462 2895 }
2896
2897 if (opcode[i]==0x28) { // SB
2898 if(!c||memtarget) {
97a238a6 2899 int x=0,a=temp;
97a238a6 2900 if(!c) a=addr;
d1e4ebd9 2901 if(fastio_reg_override>=0) a=fastio_reg_override;
9c45ca93 2902 emit_writebyte_indexed(tl,x,a);
57871462 2903 }
2904 type=STOREB_STUB;
2905 }
2906 if (opcode[i]==0x29) { // SH
2907 if(!c||memtarget) {
97a238a6 2908 int x=0,a=temp;
97a238a6 2909 if(!c) a=addr;
d1e4ebd9 2910 if(fastio_reg_override>=0) a=fastio_reg_override;
9c45ca93 2911 emit_writehword_indexed(tl,x,a);
57871462 2912 }
2913 type=STOREH_STUB;
2914 }
2915 if (opcode[i]==0x2B) { // SW
dadf55f2 2916 if(!c||memtarget) {
2917 int a=addr;
d1e4ebd9 2918 if(fastio_reg_override>=0) a=fastio_reg_override;
9c45ca93 2919 emit_writeword_indexed(tl,0,a);
dadf55f2 2920 }
57871462 2921 type=STOREW_STUB;
2922 }
2923 if (opcode[i]==0x3F) { // SD
9c45ca93 2924 assert(0);
57871462 2925 type=STORED_STUB;
2926 }
d1e4ebd9 2927 if(fastio_reg_override==HOST_TEMPREG)
2928 host_tempreg_release();
b96d3df7 2929 if(jaddr) {
2930 // PCSX store handlers don't check invcode again
2931 reglist|=1<<addr;
b14b6a8f 2932 add_stub_r(type,jaddr,out,i,addr,i_regs,ccadj[i],reglist);
b96d3df7 2933 jaddr=0;
2934 }
1edfcc68 2935 if(!(i_regs->waswritten&(1<<rs1[i]))&&!(new_dynarec_hacks&NDHACK_NO_SMC_CHECK)) {
57871462 2936 if(!c||memtarget) {
2937 #ifdef DESTRUCTIVE_SHIFT
2938 // The x86 shift operation is 'destructive'; it overwrites the
2939 // source register, so we need to make a copy first and use that.
2940 addr=temp;
2941 #endif
2942 #if defined(HOST_IMM8)
2943 int ir=get_reg(i_regs->regmap,INVCP);
2944 assert(ir>=0);
2945 emit_cmpmem_indexedsr12_reg(ir,addr,1);
2946 #else
643aeae3 2947 emit_cmpmem_indexedsr12_imm(invalid_code,addr,1);
57871462 2948 #endif
0bbd1454 2949 #if defined(HAVE_CONDITIONAL_CALL) && !defined(DESTRUCTIVE_SHIFT)
2950 emit_callne(invalidate_addr_reg[addr]);
2951 #else
b14b6a8f 2952 void *jaddr2 = out;
57871462 2953 emit_jne(0);
b14b6a8f 2954 add_stub(INVCODE_STUB,jaddr2,out,reglist|(1<<HOST_CCREG),addr,0,0,0);
0bbd1454 2955 #endif
57871462 2956 }
2957 }
7a518516 2958 u_int addr_val=constmap[i][s]+offset;
3eaa7048 2959 if(jaddr) {
b14b6a8f 2960 add_stub_r(type,jaddr,out,i,addr,i_regs,ccadj[i],reglist);
3eaa7048 2961 } else if(c&&!memtarget) {
7a518516 2962 inline_writestub(type,i,addr_val,i_regs->regmap,rs2[i],ccadj[i],reglist);
2963 }
2964 // basic current block modification detection..
2965 // not looking back as that should be in mips cache already
3968e69e 2966 // (see Spyro2 title->attract mode)
7a518516 2967 if(c&&start+i*4<addr_val&&addr_val<start+slen*4) {
c43b5311 2968 SysPrintf("write to %08x hits block %08x, pc=%08x\n",addr_val,start,start+i*4);
7a518516 2969 assert(i_regs->regmap==regs[i].regmap); // not delay slot
2970 if(i_regs->regmap==regs[i].regmap) {
ad49de89 2971 load_all_consts(regs[i].regmap_entry,regs[i].wasdirty,i);
2972 wb_dirtys(regs[i].regmap_entry,regs[i].wasdirty);
7a518516 2973 emit_movimm(start+i*4+4,0);
643aeae3 2974 emit_writeword(0,&pcaddr);
d1e4ebd9 2975 emit_addimm(HOST_CCREG,2,HOST_CCREG);
2a014d73 2976 emit_far_call(get_addr_ht);
d1e4ebd9 2977 emit_jmpreg(0);
7a518516 2978 }
3eaa7048 2979 }
57871462 2980}
2981
3968e69e 2982static void storelr_assemble(int i,struct regstat *i_regs)
57871462 2983{
9c45ca93 2984 int s,tl;
57871462 2985 int temp;
57871462 2986 int offset;
b14b6a8f 2987 void *jaddr=0;
df4dc2b1 2988 void *case1, *case2, *case3;
2989 void *done0, *done1, *done2;
af4ee1fe 2990 int memtarget=0,c=0;
fab5d06d 2991 int agr=AGEN1+(i&1);
57871462 2992 u_int hr,reglist=0;
57871462 2993 tl=get_reg(i_regs->regmap,rs2[i]);
2994 s=get_reg(i_regs->regmap,rs1[i]);
fab5d06d 2995 temp=get_reg(i_regs->regmap,agr);
2996 if(temp<0) temp=get_reg(i_regs->regmap,-1);
57871462 2997 offset=imm[i];
2998 if(s>=0) {
2999 c=(i_regs->isconst>>s)&1;
af4ee1fe 3000 if(c) {
3001 memtarget=((signed int)(constmap[i][s]+offset))<(signed int)0x80000000+RAM_SIZE;
af4ee1fe 3002 }
57871462 3003 }
3004 assert(tl>=0);
3005 for(hr=0;hr<HOST_REGS;hr++) {
3006 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
3007 }
535d208a 3008 assert(temp>=0);
1edfcc68 3009 if(!c) {
3010 emit_cmpimm(s<0||offset?temp:s,RAM_SIZE);
3011 if(!offset&&s!=temp) emit_mov(s,temp);
b14b6a8f 3012 jaddr=out;
1edfcc68 3013 emit_jno(0);
3014 }
3015 else
3016 {
3017 if(!memtarget||!rs1[i]) {
b14b6a8f 3018 jaddr=out;
535d208a 3019 emit_jmp(0);
57871462 3020 }
535d208a 3021 }
3968e69e 3022 if(ram_offset)
3023 emit_addimm_no_flags(ram_offset,temp);
535d208a 3024
3025 if (opcode[i]==0x2C||opcode[i]==0x2D) { // SDL/SDR
9c45ca93 3026 assert(0);
535d208a 3027 }
57871462 3028
9c45ca93 3029 emit_xorimm(temp,3,temp);
535d208a 3030 emit_testimm(temp,2);
df4dc2b1 3031 case2=out;
535d208a 3032 emit_jne(0);
3033 emit_testimm(temp,1);
df4dc2b1 3034 case1=out;
535d208a 3035 emit_jne(0);
3036 // 0
3037 if (opcode[i]==0x2A) { // SWL
3038 emit_writeword_indexed(tl,0,temp);
3039 }
3968e69e 3040 else if (opcode[i]==0x2E) { // SWR
535d208a 3041 emit_writebyte_indexed(tl,3,temp);
3042 }
3968e69e 3043 else
9c45ca93 3044 assert(0);
df4dc2b1 3045 done0=out;
535d208a 3046 emit_jmp(0);
3047 // 1
df4dc2b1 3048 set_jump_target(case1, out);
535d208a 3049 if (opcode[i]==0x2A) { // SWL
3050 // Write 3 msb into three least significant bytes
3051 if(rs2[i]) emit_rorimm(tl,8,tl);
3052 emit_writehword_indexed(tl,-1,temp);
3053 if(rs2[i]) emit_rorimm(tl,16,tl);
3054 emit_writebyte_indexed(tl,1,temp);
3055 if(rs2[i]) emit_rorimm(tl,8,tl);
3056 }
3968e69e 3057 else if (opcode[i]==0x2E) { // SWR
535d208a 3058 // Write two lsb into two most significant bytes
3059 emit_writehword_indexed(tl,1,temp);
3060 }
df4dc2b1 3061 done1=out;
535d208a 3062 emit_jmp(0);
3063 // 2
df4dc2b1 3064 set_jump_target(case2, out);
535d208a 3065 emit_testimm(temp,1);
df4dc2b1 3066 case3=out;
535d208a 3067 emit_jne(0);
3068 if (opcode[i]==0x2A) { // SWL