drc: only override default cycle_multiplier
[pcsx_rearmed.git] / libpcsxcore / new_dynarec / new_dynarec.c
CommitLineData
57871462 1/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2 * Mupen64plus - new_dynarec.c *
20d507ba 3 * Copyright (C) 2009-2011 Ari64 *
57871462 4 * *
5 * This program is free software; you can redistribute it and/or modify *
6 * it under the terms of the GNU General Public License as published by *
7 * the Free Software Foundation; either version 2 of the License, or *
8 * (at your option) any later version. *
9 * *
10 * This program is distributed in the hope that it will be useful, *
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
13 * GNU General Public License for more details. *
14 * *
15 * You should have received a copy of the GNU General Public License *
16 * along with this program; if not, write to the *
17 * Free Software Foundation, Inc., *
18 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
19 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
20
21#include <stdlib.h>
22#include <stdint.h> //include for uint64_t
23#include <assert.h>
d848b60a 24#include <errno.h>
4600ba03 25#include <sys/mman.h>
d148d265 26#ifdef __MACH__
27#include <libkern/OSCacheControl.h>
28#endif
1e212a25 29#ifdef _3DS
30#include <3ds_utils.h>
31#endif
32#ifdef VITA
33#include <psp2/kernel/sysmem.h>
34static int sceBlock;
35#endif
57871462 36
d148d265 37#include "new_dynarec_config.h"
3968e69e 38#include "../psxhle.h"
39#include "../psxinterpreter.h"
81dbbf4c 40#include "../gte.h"
41#include "emu_if.h" // emulator interface
57871462 42
d1e4ebd9 43#define noinline __attribute__((noinline,noclone))
b14b6a8f 44#ifndef ARRAY_SIZE
45#define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0]))
46#endif
e3c6bdb5 47#ifndef min
48#define min(a, b) ((b) < (a) ? (b) : (a))
49#endif
32631e6a 50#ifndef max
51#define max(a, b) ((b) > (a) ? (b) : (a))
52#endif
b14b6a8f 53
4600ba03 54//#define DISASM
32631e6a 55//#define ASSEM_PRINT
56
57#ifdef ASSEM_PRINT
58#define assem_debug printf
59#else
4600ba03 60#define assem_debug(...)
32631e6a 61#endif
62//#define inv_debug printf
4600ba03 63#define inv_debug(...)
57871462 64
65#ifdef __i386__
66#include "assem_x86.h"
67#endif
68#ifdef __x86_64__
69#include "assem_x64.h"
70#endif
71#ifdef __arm__
72#include "assem_arm.h"
73#endif
be516ebe 74#ifdef __aarch64__
75#include "assem_arm64.h"
76#endif
57871462 77
81dbbf4c 78#define RAM_SIZE 0x200000
57871462 79#define MAXBLOCK 4096
80#define MAX_OUTPUT_BLOCK_SIZE 262144
2573466a 81
2a014d73 82struct ndrc_mem
83{
84 u_char translation_cache[1 << TARGET_SIZE_2];
85 struct
86 {
87 struct tramp_insns ops[2048 / sizeof(struct tramp_insns)];
88 const void *f[2048 / sizeof(void *)];
89 } tramp;
90};
91
92#ifdef BASE_ADDR_DYNAMIC
93static struct ndrc_mem *ndrc;
94#else
95static struct ndrc_mem ndrc_ __attribute__((aligned(4096)));
96static struct ndrc_mem *ndrc = &ndrc_;
97#endif
98
b14b6a8f 99// stubs
100enum stub_type {
101 CC_STUB = 1,
102 FP_STUB = 2,
103 LOADB_STUB = 3,
104 LOADH_STUB = 4,
105 LOADW_STUB = 5,
106 LOADD_STUB = 6,
107 LOADBU_STUB = 7,
108 LOADHU_STUB = 8,
109 STOREB_STUB = 9,
110 STOREH_STUB = 10,
111 STOREW_STUB = 11,
112 STORED_STUB = 12,
113 STORELR_STUB = 13,
114 INVCODE_STUB = 14,
115};
116
57871462 117struct regstat
118{
119 signed char regmap_entry[HOST_REGS];
120 signed char regmap[HOST_REGS];
57871462 121 uint64_t wasdirty;
122 uint64_t dirty;
123 uint64_t u;
57871462 124 u_int wasconst;
125 u_int isconst;
8575a877 126 u_int loadedconst; // host regs that have constants loaded
127 u_int waswritten; // MIPS regs that were used as store base before
57871462 128};
129
de5a60c3 130// note: asm depends on this layout
57871462 131struct ll_entry
132{
133 u_int vaddr;
de5a60c3 134 u_int reg_sv_flags;
57871462 135 void *addr;
136 struct ll_entry *next;
137};
138
df4dc2b1 139struct ht_entry
140{
141 u_int vaddr[2];
142 void *tcaddr[2];
143};
144
b14b6a8f 145struct code_stub
146{
147 enum stub_type type;
148 void *addr;
149 void *retaddr;
150 u_int a;
151 uintptr_t b;
152 uintptr_t c;
153 u_int d;
154 u_int e;
155};
156
643aeae3 157struct link_entry
158{
159 void *addr;
160 u_int target;
161 u_int ext;
162};
163
cf95b4f0 164static struct decoded_insn
165{
166 u_char itype;
167 u_char opcode;
168 u_char opcode2;
169 u_char rs1;
170 u_char rs2;
171 u_char rt1;
172 u_char rt2;
173 u_char lt1;
174 u_char bt:1;
cf95b4f0 175 u_char ooo:1;
176 u_char is_ds:1;
fe807a8a 177 u_char is_jump:1;
178 u_char is_ujump:1;
cf95b4f0 179} dops[MAXBLOCK];
180
e2b5e7aa 181 // used by asm:
182 u_char *out;
df4dc2b1 183 struct ht_entry hash_table[65536] __attribute__((aligned(16)));
e2b5e7aa 184 struct ll_entry *jump_in[4096] __attribute__((aligned(16)));
185 struct ll_entry *jump_dirty[4096];
186
187 static struct ll_entry *jump_out[4096];
188 static u_int start;
189 static u_int *source;
190 static char insn[MAXBLOCK][10];
bedfea38 191 static uint64_t gte_rs[MAXBLOCK]; // gte: 32 data and 32 ctl regs
192 static uint64_t gte_rt[MAXBLOCK];
193 static uint64_t gte_unneeded[MAXBLOCK];
ffb0b9e0 194 static u_int smrv[32]; // speculated MIPS register values
195 static u_int smrv_strong; // mask or regs that are likely to have correct values
196 static u_int smrv_weak; // same, but somewhat less likely
197 static u_int smrv_strong_next; // same, but after current insn executes
198 static u_int smrv_weak_next;
e2b5e7aa 199 static int imm[MAXBLOCK];
200 static u_int ba[MAXBLOCK];
e2b5e7aa 201 static uint64_t unneeded_reg[MAXBLOCK];
e2b5e7aa 202 static uint64_t branch_unneeded_reg[MAXBLOCK];
afec9d44 203 static signed char regmap_pre[MAXBLOCK][HOST_REGS]; // pre-instruction i?
40fca85b 204 // contains 'real' consts at [i] insn, but may differ from what's actually
205 // loaded in host reg as 'final' value is always loaded, see get_final_value()
206 static uint32_t current_constmap[HOST_REGS];
207 static uint32_t constmap[MAXBLOCK][HOST_REGS];
956f3129 208 static struct regstat regs[MAXBLOCK];
209 static struct regstat branch_regs[MAXBLOCK];
e2b5e7aa 210 static signed char minimum_free_regs[MAXBLOCK];
211 static u_int needed_reg[MAXBLOCK];
212 static u_int wont_dirty[MAXBLOCK];
213 static u_int will_dirty[MAXBLOCK];
214 static int ccadj[MAXBLOCK];
215 static int slen;
df4dc2b1 216 static void *instr_addr[MAXBLOCK];
643aeae3 217 static struct link_entry link_addr[MAXBLOCK];
e2b5e7aa 218 static int linkcount;
b14b6a8f 219 static struct code_stub stubs[MAXBLOCK*3];
e2b5e7aa 220 static int stubcount;
221 static u_int literals[1024][2];
222 static int literalcount;
223 static int is_delayslot;
e2b5e7aa 224 static char shadow[1048576] __attribute__((aligned(16)));
225 static void *copy;
226 static int expirep;
227 static u_int stop_after_jal;
39b71d9a 228 static u_int f1_hack; // 0 - off, ~0 - capture address, else addr
a327ad27 229#ifndef RAM_FIXED
01d26796 230 static uintptr_t ram_offset;
a327ad27 231#else
01d26796 232 static const uintptr_t ram_offset=0;
a327ad27 233#endif
e2b5e7aa 234
235 int new_dynarec_hacks;
d62c125a 236 int new_dynarec_hacks_pergame;
32631e6a 237 int new_dynarec_hacks_old;
e2b5e7aa 238 int new_dynarec_did_compile;
687b4580 239
d62c125a 240 #define HACK_ENABLED(x) ((new_dynarec_hacks | new_dynarec_hacks_pergame) & (x))
241
687b4580 242 extern int cycle_count; // ... until end of the timeslice, counts -N -> 0
243 extern int last_count; // last absolute target, often = next_interupt
244 extern int pcaddr;
245 extern int pending_exception;
246 extern int branch_target;
d1e4ebd9 247 extern uintptr_t mini_ht[32][2];
57871462 248 extern u_char restore_candidate[512];
57871462 249
250 /* registers that may be allocated */
251 /* 1-31 gpr */
7c3a5182 252#define LOREG 32 // lo
253#define HIREG 33 // hi
00fa9369 254//#define FSREG 34 // FPU status (FCSR)
57871462 255#define CSREG 35 // Coprocessor status
256#define CCREG 36 // Cycle count
257#define INVCP 37 // Pointer to invalid_code
1edfcc68 258//#define MMREG 38 // Pointer to memory_map
9c45ca93 259//#define ROREG 39 // ram offset (if rdram!=0x80000000)
619e5ded 260#define TEMPREG 40
261#define FTEMP 40 // FPU temporary register
262#define PTEMP 41 // Prefetch temporary register
1edfcc68 263//#define TLREG 42 // TLB mapping offset
619e5ded 264#define RHASH 43 // Return address hash
265#define RHTBL 44 // Return address hash table address
266#define RTEMP 45 // JR/JALR address register
267#define MAXREG 45
268#define AGEN1 46 // Address generation temporary register
1edfcc68 269//#define AGEN2 47 // Address generation temporary register
270//#define MGEN1 48 // Maptable address generation temporary register
271//#define MGEN2 49 // Maptable address generation temporary register
619e5ded 272#define BTREG 50 // Branch target temporary register
57871462 273
274 /* instruction types */
275#define NOP 0 // No operation
276#define LOAD 1 // Load
277#define STORE 2 // Store
278#define LOADLR 3 // Unaligned load
279#define STORELR 4 // Unaligned store
9f51b4b9 280#define MOV 5 // Move
57871462 281#define ALU 6 // Arithmetic/logic
282#define MULTDIV 7 // Multiply/divide
283#define SHIFT 8 // Shift by register
284#define SHIFTIMM 9// Shift by immediate
285#define IMM16 10 // 16-bit immediate
286#define RJUMP 11 // Unconditional jump to register
287#define UJUMP 12 // Unconditional jump
288#define CJUMP 13 // Conditional branch (BEQ/BNE/BGTZ/BLEZ)
289#define SJUMP 14 // Conditional branch (regimm format)
290#define COP0 15 // Coprocessor 0
291#define COP1 16 // Coprocessor 1
292#define C1LS 17 // Coprocessor 1 load/store
ad49de89 293//#define FJUMP 18 // Conditional branch (floating point)
00fa9369 294//#define FLOAT 19 // Floating point unit
295//#define FCONV 20 // Convert integer to float
296//#define FCOMP 21 // Floating point compare (sets FSREG)
57871462 297#define SYSCALL 22// SYSCALL
298#define OTHER 23 // Other
299#define SPAN 24 // Branch/delay slot spans 2 pages
300#define NI 25 // Not implemented
7139f3c8 301#define HLECALL 26// PCSX fake opcodes for HLE
b9b61529 302#define COP2 27 // Coprocessor 2 move
303#define C2LS 28 // Coprocessor 2 load/store
304#define C2OP 29 // Coprocessor 2 operation
1e973cb0 305#define INTCALL 30// Call interpreter to handle rare corner cases
57871462 306
57871462 307 /* branch codes */
308#define TAKEN 1
309#define NOTTAKEN 2
310#define NULLDS 3
311
7c3a5182 312#define DJT_1 (void *)1l // no function, just a label in assem_debug log
313#define DJT_2 (void *)2l
314
57871462 315// asm linkage
3968e69e 316int new_recompile_block(u_int addr);
57871462 317void *get_addr_ht(u_int vaddr);
318void invalidate_block(u_int block);
319void invalidate_addr(u_int addr);
320void remove_hash(int vaddr);
57871462 321void dyna_linker();
322void dyna_linker_ds();
323void verify_code();
57871462 324void verify_code_ds();
325void cc_interrupt();
326void fp_exception();
327void fp_exception_ds();
3968e69e 328void jump_to_new_pc();
81dbbf4c 329void call_gteStall();
7139f3c8 330void new_dyna_leave();
57871462 331
57871462 332// Needed by assembler
ad49de89 333static void wb_register(signed char r,signed char regmap[],uint64_t dirty);
334static void wb_dirtys(signed char i_regmap[],uint64_t i_dirty);
335static void wb_needed_dirtys(signed char i_regmap[],uint64_t i_dirty,int addr);
e2b5e7aa 336static void load_all_regs(signed char i_regmap[]);
337static void load_needed_regs(signed char i_regmap[],signed char next_regmap[]);
338static void load_regs_entry(int t);
ad49de89 339static void load_all_consts(signed char regmap[],u_int dirty,int i);
81dbbf4c 340static u_int get_host_reglist(const signed char *regmap);
e2b5e7aa 341
3968e69e 342static int verify_dirty(const u_int *ptr);
e2b5e7aa 343static int get_final_value(int hr, int i, int *value);
b14b6a8f 344static void add_stub(enum stub_type type, void *addr, void *retaddr,
345 u_int a, uintptr_t b, uintptr_t c, u_int d, u_int e);
346static void add_stub_r(enum stub_type type, void *addr, void *retaddr,
81dbbf4c 347 int i, int addr_reg, const struct regstat *i_regs, int ccadj, u_int reglist);
643aeae3 348static void add_to_linker(void *addr, u_int target, int ext);
8062d65a 349static void *emit_fastpath_cmp_jump(int i,int addr,int *addr_reg_override);
687b4580 350static void *get_direct_memhandler(void *table, u_int addr,
351 enum stub_type type, uintptr_t *addr_host);
32631e6a 352static void cop2_do_stall_check(u_int op, int i, const struct regstat *i_regs, u_int reglist);
687b4580 353static void pass_args(int a0, int a1);
2a014d73 354static void emit_far_jump(const void *f);
355static void emit_far_call(const void *f);
57871462 356
d148d265 357static void mprotect_w_x(void *start, void *end, int is_x)
358{
359#ifdef NO_WRITE_EXEC
1e212a25 360 #if defined(VITA)
361 // *Open* enables write on all memory that was
362 // allocated by sceKernelAllocMemBlockForVM()?
363 if (is_x)
364 sceKernelCloseVMDomain();
365 else
366 sceKernelOpenVMDomain();
367 #else
d148d265 368 u_long mstart = (u_long)start & ~4095ul;
369 u_long mend = (u_long)end;
370 if (mprotect((void *)mstart, mend - mstart,
371 PROT_READ | (is_x ? PROT_EXEC : PROT_WRITE)) != 0)
372 SysPrintf("mprotect(%c) failed: %s\n", is_x ? 'x' : 'w', strerror(errno));
1e212a25 373 #endif
d148d265 374#endif
375}
376
377static void start_tcache_write(void *start, void *end)
378{
379 mprotect_w_x(start, end, 0);
380}
381
382static void end_tcache_write(void *start, void *end)
383{
919981d0 384#if defined(__arm__) || defined(__aarch64__)
d148d265 385 size_t len = (char *)end - (char *)start;
386 #if defined(__BLACKBERRY_QNX__)
387 msync(start, len, MS_SYNC | MS_CACHE_ONLY | MS_INVALIDATE_ICACHE);
388 #elif defined(__MACH__)
389 sys_cache_control(kCacheFunctionPrepareForExecution, start, len);
390 #elif defined(VITA)
1e212a25 391 sceKernelSyncVMDomain(sceBlock, start, len);
392 #elif defined(_3DS)
393 ctr_flush_invalidate_cache();
919981d0 394 #elif defined(__aarch64__)
395 // as of 2021, __clear_cache() is still broken on arm64
396 // so here is a custom one :(
397 clear_cache_arm64(start, end);
d148d265 398 #else
399 __clear_cache(start, end);
400 #endif
401 (void)len;
402#endif
403
404 mprotect_w_x(start, end, 1);
405}
406
407static void *start_block(void)
408{
409 u_char *end = out + MAX_OUTPUT_BLOCK_SIZE;
2a014d73 410 if (end > ndrc->translation_cache + sizeof(ndrc->translation_cache))
411 end = ndrc->translation_cache + sizeof(ndrc->translation_cache);
d148d265 412 start_tcache_write(out, end);
413 return out;
414}
415
416static void end_block(void *start)
417{
418 end_tcache_write(start, out);
419}
420
919981d0 421// also takes care of w^x mappings when patching code
422static u_int needs_clear_cache[1<<(TARGET_SIZE_2-17)];
423
424static void mark_clear_cache(void *target)
425{
426 uintptr_t offset = (u_char *)target - ndrc->translation_cache;
427 u_int mask = 1u << ((offset >> 12) & 31);
428 if (!(needs_clear_cache[offset >> 17] & mask)) {
429 char *start = (char *)((uintptr_t)target & ~4095l);
430 start_tcache_write(start, start + 4095);
431 needs_clear_cache[offset >> 17] |= mask;
432 }
433}
434
435// Clearing the cache is rather slow on ARM Linux, so mark the areas
436// that need to be cleared, and then only clear these areas once.
437static void do_clear_cache(void)
438{
439 int i, j;
440 for (i = 0; i < (1<<(TARGET_SIZE_2-17)); i++)
441 {
442 u_int bitmap = needs_clear_cache[i];
443 if (!bitmap)
444 continue;
445 for (j = 0; j < 32; j++)
446 {
447 u_char *start, *end;
448 if (!(bitmap & (1<<j)))
449 continue;
450
451 start = ndrc->translation_cache + i*131072 + j*4096;
452 end = start + 4095;
453 for (j++; j < 32; j++) {
454 if (!(bitmap & (1<<j)))
455 break;
456 end += 4096;
457 }
458 end_tcache_write(start, end);
459 }
460 needs_clear_cache[i] = 0;
461 }
462}
463
57871462 464//#define DEBUG_CYCLE_COUNT 1
465
b6e87b2b 466#define NO_CYCLE_PENALTY_THR 12
467
26bd3dad 468int cycle_multiplier = CYCLE_MULT_DEFAULT; // 100 for 1.0
a3203cf4 469int cycle_multiplier_override;
32631e6a 470int cycle_multiplier_old;
4e9dcd7f 471
472static int CLOCK_ADJUST(int x)
473{
26bd3dad 474 int m = cycle_multiplier_override && cycle_multiplier == CYCLE_MULT_DEFAULT
a3203cf4 475 ? cycle_multiplier_override : cycle_multiplier;
4e9dcd7f 476 int s=(x>>31)|1;
a3203cf4 477 return (x * m + s * 50) / 100;
4e9dcd7f 478}
479
4919de1e 480static int ds_writes_rjump_rs(int i)
481{
cf95b4f0 482 return dops[i].rs1 != 0 && (dops[i].rs1 == dops[i+1].rt1 || dops[i].rs1 == dops[i+1].rt2);
4919de1e 483}
484
94d23bb9 485static u_int get_page(u_int vaddr)
57871462 486{
0ce47d46 487 u_int page=vaddr&~0xe0000000;
488 if (page < 0x1000000)
489 page &= ~0x0e00000; // RAM mirrors
490 page>>=12;
57871462 491 if(page>2048) page=2048+(page&2047);
94d23bb9 492 return page;
493}
494
d25604ca 495// no virtual mem in PCSX
496static u_int get_vpage(u_int vaddr)
497{
498 return get_page(vaddr);
499}
94d23bb9 500
df4dc2b1 501static struct ht_entry *hash_table_get(u_int vaddr)
502{
503 return &hash_table[((vaddr>>16)^vaddr)&0xFFFF];
504}
505
506static void hash_table_add(struct ht_entry *ht_bin, u_int vaddr, void *tcaddr)
507{
508 ht_bin->vaddr[1] = ht_bin->vaddr[0];
509 ht_bin->tcaddr[1] = ht_bin->tcaddr[0];
510 ht_bin->vaddr[0] = vaddr;
511 ht_bin->tcaddr[0] = tcaddr;
512}
513
514// some messy ari64's code, seems to rely on unsigned 32bit overflow
515static int doesnt_expire_soon(void *tcaddr)
516{
517 u_int diff = (u_int)((u_char *)tcaddr - out) << (32-TARGET_SIZE_2);
518 return diff > (u_int)(0x60000000 + (MAX_OUTPUT_BLOCK_SIZE << (32-TARGET_SIZE_2)));
519}
520
94d23bb9 521// Get address from virtual address
522// This is called from the recompiled JR/JALR instructions
d1e4ebd9 523void noinline *get_addr(u_int vaddr)
94d23bb9 524{
525 u_int page=get_page(vaddr);
526 u_int vpage=get_vpage(vaddr);
57871462 527 struct ll_entry *head;
528 //printf("TRACE: count=%d next=%d (get_addr %x,page %d)\n",Count,next_interupt,vaddr,page);
529 head=jump_in[page];
530 while(head!=NULL) {
de5a60c3 531 if(head->vaddr==vaddr) {
643aeae3 532 //printf("TRACE: count=%d next=%d (get_addr match %x: %p)\n",Count,next_interupt,vaddr,head->addr);
df4dc2b1 533 hash_table_add(hash_table_get(vaddr), vaddr, head->addr);
57871462 534 return head->addr;
535 }
536 head=head->next;
537 }
538 head=jump_dirty[vpage];
539 while(head!=NULL) {
de5a60c3 540 if(head->vaddr==vaddr) {
643aeae3 541 //printf("TRACE: count=%d next=%d (get_addr match dirty %x: %p)\n",Count,next_interupt,vaddr,head->addr);
57871462 542 // Don't restore blocks which are about to expire from the cache
df4dc2b1 543 if (doesnt_expire_soon(head->addr))
544 if (verify_dirty(head->addr)) {
57871462 545 //printf("restore candidate: %x (%d) d=%d\n",vaddr,page,invalid_code[vaddr>>12]);
546 invalid_code[vaddr>>12]=0;
9be4ba64 547 inv_code_start=inv_code_end=~0;
57871462 548 if(vpage<2048) {
57871462 549 restore_candidate[vpage>>3]|=1<<(vpage&7);
550 }
551 else restore_candidate[page>>3]|=1<<(page&7);
df4dc2b1 552 struct ht_entry *ht_bin = hash_table_get(vaddr);
553 if (ht_bin->vaddr[0] == vaddr)
554 ht_bin->tcaddr[0] = head->addr; // Replace existing entry
57871462 555 else
df4dc2b1 556 hash_table_add(ht_bin, vaddr, head->addr);
557
57871462 558 return head->addr;
559 }
560 }
561 head=head->next;
562 }
563 //printf("TRACE: count=%d next=%d (get_addr no-match %x)\n",Count,next_interupt,vaddr);
564 int r=new_recompile_block(vaddr);
565 if(r==0) return get_addr(vaddr);
566 // Execute in unmapped page, generate pagefault execption
567 Status|=2;
568 Cause=(vaddr<<31)|0x8;
569 EPC=(vaddr&1)?vaddr-5:vaddr;
570 BadVAddr=(vaddr&~1);
571 Context=(Context&0xFF80000F)|((BadVAddr>>9)&0x007FFFF0);
572 EntryHi=BadVAddr&0xFFFFE000;
573 return get_addr_ht(0x80000000);
574}
575// Look up address in hash table first
576void *get_addr_ht(u_int vaddr)
577{
578 //printf("TRACE: count=%d next=%d (get_addr_ht %x)\n",Count,next_interupt,vaddr);
df4dc2b1 579 const struct ht_entry *ht_bin = hash_table_get(vaddr);
580 if (ht_bin->vaddr[0] == vaddr) return ht_bin->tcaddr[0];
581 if (ht_bin->vaddr[1] == vaddr) return ht_bin->tcaddr[1];
57871462 582 return get_addr(vaddr);
583}
584
57871462 585void clear_all_regs(signed char regmap[])
586{
587 int hr;
588 for (hr=0;hr<HOST_REGS;hr++) regmap[hr]=-1;
589}
590
d1e4ebd9 591static signed char get_reg(const signed char regmap[],int r)
57871462 592{
593 int hr;
594 for (hr=0;hr<HOST_REGS;hr++) if(hr!=EXCLUDE_REG&&regmap[hr]==r) return hr;
595 return -1;
596}
597
598// Find a register that is available for two consecutive cycles
d1e4ebd9 599static signed char get_reg2(signed char regmap1[], const signed char regmap2[], int r)
57871462 600{
601 int hr;
602 for (hr=0;hr<HOST_REGS;hr++) if(hr!=EXCLUDE_REG&&regmap1[hr]==r&&regmap2[hr]==r) return hr;
603 return -1;
604}
605
606int count_free_regs(signed char regmap[])
607{
608 int count=0;
609 int hr;
610 for(hr=0;hr<HOST_REGS;hr++)
611 {
612 if(hr!=EXCLUDE_REG) {
613 if(regmap[hr]<0) count++;
614 }
615 }
616 return count;
617}
618
619void dirty_reg(struct regstat *cur,signed char reg)
620{
621 int hr;
622 if(!reg) return;
623 for (hr=0;hr<HOST_REGS;hr++) {
624 if((cur->regmap[hr]&63)==reg) {
625 cur->dirty|=1<<hr;
626 }
627 }
628}
629
40fca85b 630static void set_const(struct regstat *cur, signed char reg, uint32_t value)
57871462 631{
632 int hr;
633 if(!reg) return;
634 for (hr=0;hr<HOST_REGS;hr++) {
635 if(cur->regmap[hr]==reg) {
636 cur->isconst|=1<<hr;
956f3129 637 current_constmap[hr]=value;
57871462 638 }
57871462 639 }
640}
641
40fca85b 642static void clear_const(struct regstat *cur, signed char reg)
57871462 643{
644 int hr;
645 if(!reg) return;
646 for (hr=0;hr<HOST_REGS;hr++) {
647 if((cur->regmap[hr]&63)==reg) {
648 cur->isconst&=~(1<<hr);
649 }
650 }
651}
652
40fca85b 653static int is_const(struct regstat *cur, signed char reg)
57871462 654{
655 int hr;
79c75f1b 656 if(reg<0) return 0;
57871462 657 if(!reg) return 1;
658 for (hr=0;hr<HOST_REGS;hr++) {
659 if((cur->regmap[hr]&63)==reg) {
660 return (cur->isconst>>hr)&1;
661 }
662 }
663 return 0;
664}
40fca85b 665
666static uint32_t get_const(struct regstat *cur, signed char reg)
57871462 667{
668 int hr;
669 if(!reg) return 0;
670 for (hr=0;hr<HOST_REGS;hr++) {
671 if(cur->regmap[hr]==reg) {
956f3129 672 return current_constmap[hr];
57871462 673 }
674 }
c43b5311 675 SysPrintf("Unknown constant in r%d\n",reg);
7c3a5182 676 abort();
57871462 677}
678
679// Least soon needed registers
680// Look at the next ten instructions and see which registers
681// will be used. Try not to reallocate these.
682void lsn(u_char hsn[], int i, int *preferred_reg)
683{
684 int j;
685 int b=-1;
686 for(j=0;j<9;j++)
687 {
688 if(i+j>=slen) {
689 j=slen-i-1;
690 break;
691 }
fe807a8a 692 if (dops[i+j].is_ujump)
57871462 693 {
694 // Don't go past an unconditonal jump
695 j++;
696 break;
697 }
698 }
699 for(;j>=0;j--)
700 {
cf95b4f0 701 if(dops[i+j].rs1) hsn[dops[i+j].rs1]=j;
702 if(dops[i+j].rs2) hsn[dops[i+j].rs2]=j;
703 if(dops[i+j].rt1) hsn[dops[i+j].rt1]=j;
704 if(dops[i+j].rt2) hsn[dops[i+j].rt2]=j;
705 if(dops[i+j].itype==STORE || dops[i+j].itype==STORELR) {
57871462 706 // Stores can allocate zero
cf95b4f0 707 hsn[dops[i+j].rs1]=j;
708 hsn[dops[i+j].rs2]=j;
57871462 709 }
710 // On some architectures stores need invc_ptr
711 #if defined(HOST_IMM8)
cf95b4f0 712 if(dops[i+j].itype==STORE || dops[i+j].itype==STORELR || (dops[i+j].opcode&0x3b)==0x39 || (dops[i+j].opcode&0x3b)==0x3a) {
57871462 713 hsn[INVCP]=j;
714 }
715 #endif
cf95b4f0 716 if(i+j>=0&&(dops[i+j].itype==UJUMP||dops[i+j].itype==CJUMP||dops[i+j].itype==SJUMP))
57871462 717 {
718 hsn[CCREG]=j;
719 b=j;
720 }
721 }
722 if(b>=0)
723 {
724 if(ba[i+b]>=start && ba[i+b]<(start+slen*4))
725 {
726 // Follow first branch
727 int t=(ba[i+b]-start)>>2;
728 j=7-b;if(t+j>=slen) j=slen-t-1;
729 for(;j>=0;j--)
730 {
cf95b4f0 731 if(dops[t+j].rs1) if(hsn[dops[t+j].rs1]>j+b+2) hsn[dops[t+j].rs1]=j+b+2;
732 if(dops[t+j].rs2) if(hsn[dops[t+j].rs2]>j+b+2) hsn[dops[t+j].rs2]=j+b+2;
733 //if(dops[t+j].rt1) if(hsn[dops[t+j].rt1]>j+b+2) hsn[dops[t+j].rt1]=j+b+2;
734 //if(dops[t+j].rt2) if(hsn[dops[t+j].rt2]>j+b+2) hsn[dops[t+j].rt2]=j+b+2;
57871462 735 }
736 }
737 // TODO: preferred register based on backward branch
738 }
739 // Delay slot should preferably not overwrite branch conditions or cycle count
fe807a8a 740 if (i > 0 && dops[i-1].is_jump) {
cf95b4f0 741 if(dops[i-1].rs1) if(hsn[dops[i-1].rs1]>1) hsn[dops[i-1].rs1]=1;
742 if(dops[i-1].rs2) if(hsn[dops[i-1].rs2]>1) hsn[dops[i-1].rs2]=1;
57871462 743 hsn[CCREG]=1;
744 // ...or hash tables
745 hsn[RHASH]=1;
746 hsn[RHTBL]=1;
747 }
748 // Coprocessor load/store needs FTEMP, even if not declared
cf95b4f0 749 if(dops[i].itype==C1LS||dops[i].itype==C2LS) {
57871462 750 hsn[FTEMP]=0;
751 }
752 // Load L/R also uses FTEMP as a temporary register
cf95b4f0 753 if(dops[i].itype==LOADLR) {
57871462 754 hsn[FTEMP]=0;
755 }
b7918751 756 // Also SWL/SWR/SDL/SDR
cf95b4f0 757 if(dops[i].opcode==0x2a||dops[i].opcode==0x2e||dops[i].opcode==0x2c||dops[i].opcode==0x2d) {
57871462 758 hsn[FTEMP]=0;
759 }
57871462 760 // Don't remove the miniht registers
cf95b4f0 761 if(dops[i].itype==UJUMP||dops[i].itype==RJUMP)
57871462 762 {
763 hsn[RHASH]=0;
764 hsn[RHTBL]=0;
765 }
766}
767
768// We only want to allocate registers if we're going to use them again soon
769int needed_again(int r, int i)
770{
771 int j;
772 int b=-1;
773 int rn=10;
9f51b4b9 774
fe807a8a 775 if (i > 0 && dops[i-1].is_ujump)
57871462 776 {
777 if(ba[i-1]<start || ba[i-1]>start+slen*4-4)
778 return 0; // Don't need any registers if exiting the block
779 }
780 for(j=0;j<9;j++)
781 {
782 if(i+j>=slen) {
783 j=slen-i-1;
784 break;
785 }
fe807a8a 786 if (dops[i+j].is_ujump)
57871462 787 {
788 // Don't go past an unconditonal jump
789 j++;
790 break;
791 }
cf95b4f0 792 if(dops[i+j].itype==SYSCALL||dops[i+j].itype==HLECALL||dops[i+j].itype==INTCALL||((source[i+j]&0xfc00003f)==0x0d))
57871462 793 {
794 break;
795 }
796 }
797 for(;j>=1;j--)
798 {
cf95b4f0 799 if(dops[i+j].rs1==r) rn=j;
800 if(dops[i+j].rs2==r) rn=j;
57871462 801 if((unneeded_reg[i+j]>>r)&1) rn=10;
cf95b4f0 802 if(i+j>=0&&(dops[i+j].itype==UJUMP||dops[i+j].itype==CJUMP||dops[i+j].itype==SJUMP))
57871462 803 {
804 b=j;
805 }
806 }
b7217e13 807 if(rn<10) return 1;
581335b0 808 (void)b;
57871462 809 return 0;
810}
811
812// Try to match register allocations at the end of a loop with those
813// at the beginning
814int loop_reg(int i, int r, int hr)
815{
816 int j,k;
817 for(j=0;j<9;j++)
818 {
819 if(i+j>=slen) {
820 j=slen-i-1;
821 break;
822 }
fe807a8a 823 if (dops[i+j].is_ujump)
57871462 824 {
825 // Don't go past an unconditonal jump
826 j++;
827 break;
828 }
829 }
830 k=0;
831 if(i>0){
cf95b4f0 832 if(dops[i-1].itype==UJUMP||dops[i-1].itype==CJUMP||dops[i-1].itype==SJUMP)
57871462 833 k--;
834 }
835 for(;k<j;k++)
836 {
00fa9369 837 assert(r < 64);
838 if((unneeded_reg[i+k]>>r)&1) return hr;
cf95b4f0 839 if(i+k>=0&&(dops[i+k].itype==UJUMP||dops[i+k].itype==CJUMP||dops[i+k].itype==SJUMP))
57871462 840 {
841 if(ba[i+k]>=start && ba[i+k]<(start+i*4))
842 {
843 int t=(ba[i+k]-start)>>2;
844 int reg=get_reg(regs[t].regmap_entry,r);
845 if(reg>=0) return reg;
846 //reg=get_reg(regs[t+1].regmap_entry,r);
847 //if(reg>=0) return reg;
848 }
849 }
850 }
851 return hr;
852}
853
854
855// Allocate every register, preserving source/target regs
856void alloc_all(struct regstat *cur,int i)
857{
858 int hr;
9f51b4b9 859
57871462 860 for(hr=0;hr<HOST_REGS;hr++) {
861 if(hr!=EXCLUDE_REG) {
cf95b4f0 862 if(((cur->regmap[hr]&63)!=dops[i].rs1)&&((cur->regmap[hr]&63)!=dops[i].rs2)&&
863 ((cur->regmap[hr]&63)!=dops[i].rt1)&&((cur->regmap[hr]&63)!=dops[i].rt2))
57871462 864 {
865 cur->regmap[hr]=-1;
866 cur->dirty&=~(1<<hr);
867 }
868 // Don't need zeros
869 if((cur->regmap[hr]&63)==0)
870 {
871 cur->regmap[hr]=-1;
872 cur->dirty&=~(1<<hr);
873 }
874 }
875 }
876}
877
d1e4ebd9 878#ifndef NDEBUG
879static int host_tempreg_in_use;
880
881static void host_tempreg_acquire(void)
882{
883 assert(!host_tempreg_in_use);
884 host_tempreg_in_use = 1;
885}
886
887static void host_tempreg_release(void)
888{
889 host_tempreg_in_use = 0;
890}
891#else
892static void host_tempreg_acquire(void) {}
893static void host_tempreg_release(void) {}
894#endif
895
32631e6a 896#ifdef ASSEM_PRINT
8062d65a 897extern void gen_interupt();
898extern void do_insn_cmp();
d1e4ebd9 899#define FUNCNAME(f) { f, " " #f }
8062d65a 900static const struct {
d1e4ebd9 901 void *addr;
8062d65a 902 const char *name;
903} function_names[] = {
904 FUNCNAME(cc_interrupt),
905 FUNCNAME(gen_interupt),
906 FUNCNAME(get_addr_ht),
907 FUNCNAME(get_addr),
908 FUNCNAME(jump_handler_read8),
909 FUNCNAME(jump_handler_read16),
910 FUNCNAME(jump_handler_read32),
911 FUNCNAME(jump_handler_write8),
912 FUNCNAME(jump_handler_write16),
913 FUNCNAME(jump_handler_write32),
914 FUNCNAME(invalidate_addr),
3968e69e 915 FUNCNAME(jump_to_new_pc),
81dbbf4c 916 FUNCNAME(call_gteStall),
8062d65a 917 FUNCNAME(new_dyna_leave),
918 FUNCNAME(pcsx_mtc0),
919 FUNCNAME(pcsx_mtc0_ds),
32631e6a 920#ifdef DRC_DBG
8062d65a 921 FUNCNAME(do_insn_cmp),
32631e6a 922#endif
3968e69e 923#ifdef __arm__
924 FUNCNAME(verify_code),
925#endif
8062d65a 926};
927
d1e4ebd9 928static const char *func_name(const void *a)
8062d65a 929{
930 int i;
931 for (i = 0; i < sizeof(function_names)/sizeof(function_names[0]); i++)
932 if (function_names[i].addr == a)
933 return function_names[i].name;
934 return "";
935}
936#else
937#define func_name(x) ""
938#endif
939
57871462 940#ifdef __i386__
941#include "assem_x86.c"
942#endif
943#ifdef __x86_64__
944#include "assem_x64.c"
945#endif
946#ifdef __arm__
947#include "assem_arm.c"
948#endif
be516ebe 949#ifdef __aarch64__
950#include "assem_arm64.c"
951#endif
57871462 952
2a014d73 953static void *get_trampoline(const void *f)
954{
955 size_t i;
956
957 for (i = 0; i < ARRAY_SIZE(ndrc->tramp.f); i++) {
958 if (ndrc->tramp.f[i] == f || ndrc->tramp.f[i] == NULL)
959 break;
960 }
961 if (i == ARRAY_SIZE(ndrc->tramp.f)) {
962 SysPrintf("trampoline table is full, last func %p\n", f);
963 abort();
964 }
965 if (ndrc->tramp.f[i] == NULL) {
966 start_tcache_write(&ndrc->tramp.f[i], &ndrc->tramp.f[i + 1]);
967 ndrc->tramp.f[i] = f;
968 end_tcache_write(&ndrc->tramp.f[i], &ndrc->tramp.f[i + 1]);
969 }
970 return &ndrc->tramp.ops[i];
971}
972
973static void emit_far_jump(const void *f)
974{
975 if (can_jump_or_call(f)) {
976 emit_jmp(f);
977 return;
978 }
979
980 f = get_trampoline(f);
981 emit_jmp(f);
982}
983
984static void emit_far_call(const void *f)
985{
986 if (can_jump_or_call(f)) {
987 emit_call(f);
988 return;
989 }
990
991 f = get_trampoline(f);
992 emit_call(f);
993}
994
57871462 995// Add virtual address mapping to linked list
996void ll_add(struct ll_entry **head,int vaddr,void *addr)
997{
998 struct ll_entry *new_entry;
999 new_entry=malloc(sizeof(struct ll_entry));
1000 assert(new_entry!=NULL);
1001 new_entry->vaddr=vaddr;
de5a60c3 1002 new_entry->reg_sv_flags=0;
57871462 1003 new_entry->addr=addr;
1004 new_entry->next=*head;
1005 *head=new_entry;
1006}
1007
de5a60c3 1008void ll_add_flags(struct ll_entry **head,int vaddr,u_int reg_sv_flags,void *addr)
57871462 1009{
7139f3c8 1010 ll_add(head,vaddr,addr);
de5a60c3 1011 (*head)->reg_sv_flags=reg_sv_flags;
57871462 1012}
1013
1014// Check if an address is already compiled
1015// but don't return addresses which are about to expire from the cache
1016void *check_addr(u_int vaddr)
1017{
df4dc2b1 1018 struct ht_entry *ht_bin = hash_table_get(vaddr);
1019 size_t i;
b14b6a8f 1020 for (i = 0; i < ARRAY_SIZE(ht_bin->vaddr); i++) {
df4dc2b1 1021 if (ht_bin->vaddr[i] == vaddr)
1022 if (doesnt_expire_soon((u_char *)ht_bin->tcaddr[i] - MAX_OUTPUT_BLOCK_SIZE))
1023 if (isclean(ht_bin->tcaddr[i]))
1024 return ht_bin->tcaddr[i];
57871462 1025 }
94d23bb9 1026 u_int page=get_page(vaddr);
57871462 1027 struct ll_entry *head;
1028 head=jump_in[page];
df4dc2b1 1029 while (head != NULL) {
1030 if (head->vaddr == vaddr) {
1031 if (doesnt_expire_soon(head->addr)) {
57871462 1032 // Update existing entry with current address
df4dc2b1 1033 if (ht_bin->vaddr[0] == vaddr) {
1034 ht_bin->tcaddr[0] = head->addr;
57871462 1035 return head->addr;
1036 }
df4dc2b1 1037 if (ht_bin->vaddr[1] == vaddr) {
1038 ht_bin->tcaddr[1] = head->addr;
57871462 1039 return head->addr;
1040 }
1041 // Insert into hash table with low priority.
1042 // Don't evict existing entries, as they are probably
1043 // addresses that are being accessed frequently.
df4dc2b1 1044 if (ht_bin->vaddr[0] == -1) {
1045 ht_bin->vaddr[0] = vaddr;
1046 ht_bin->tcaddr[0] = head->addr;
1047 }
1048 else if (ht_bin->vaddr[1] == -1) {
1049 ht_bin->vaddr[1] = vaddr;
1050 ht_bin->tcaddr[1] = head->addr;
57871462 1051 }
1052 return head->addr;
1053 }
1054 }
1055 head=head->next;
1056 }
1057 return 0;
1058}
1059
1060void remove_hash(int vaddr)
1061{
1062 //printf("remove hash: %x\n",vaddr);
df4dc2b1 1063 struct ht_entry *ht_bin = hash_table_get(vaddr);
1064 if (ht_bin->vaddr[1] == vaddr) {
1065 ht_bin->vaddr[1] = -1;
1066 ht_bin->tcaddr[1] = NULL;
57871462 1067 }
df4dc2b1 1068 if (ht_bin->vaddr[0] == vaddr) {
1069 ht_bin->vaddr[0] = ht_bin->vaddr[1];
1070 ht_bin->tcaddr[0] = ht_bin->tcaddr[1];
1071 ht_bin->vaddr[1] = -1;
1072 ht_bin->tcaddr[1] = NULL;
57871462 1073 }
1074}
1075
943f42f3 1076static void ll_remove_matching_addrs(struct ll_entry **head,
1077 uintptr_t base_offs_s, int shift)
57871462 1078{
1079 struct ll_entry *next;
1080 while(*head) {
943f42f3 1081 uintptr_t o1 = (u_char *)(*head)->addr - ndrc->translation_cache;
1082 uintptr_t o2 = o1 - MAX_OUTPUT_BLOCK_SIZE;
1083 if ((o1 >> shift) == base_offs_s || (o2 >> shift) == base_offs_s)
57871462 1084 {
643aeae3 1085 inv_debug("EXP: Remove pointer to %p (%x)\n",(*head)->addr,(*head)->vaddr);
57871462 1086 remove_hash((*head)->vaddr);
1087 next=(*head)->next;
1088 free(*head);
1089 *head=next;
1090 }
1091 else
1092 {
1093 head=&((*head)->next);
1094 }
1095 }
1096}
1097
1098// Remove all entries from linked list
1099void ll_clear(struct ll_entry **head)
1100{
1101 struct ll_entry *cur;
1102 struct ll_entry *next;
581335b0 1103 if((cur=*head)) {
57871462 1104 *head=0;
1105 while(cur) {
1106 next=cur->next;
1107 free(cur);
1108 cur=next;
1109 }
1110 }
1111}
1112
1113// Dereference the pointers and remove if it matches
943f42f3 1114static void ll_kill_pointers(struct ll_entry *head,
1115 uintptr_t base_offs_s, int shift)
57871462 1116{
1117 while(head) {
943f42f3 1118 u_char *ptr = get_pointer(head->addr);
1119 uintptr_t o1 = ptr - ndrc->translation_cache;
1120 uintptr_t o2 = o1 - MAX_OUTPUT_BLOCK_SIZE;
1121 inv_debug("EXP: Lookup pointer to %p at %p (%x)\n",ptr,head->addr,head->vaddr);
1122 if ((o1 >> shift) == base_offs_s || (o2 >> shift) == base_offs_s)
57871462 1123 {
643aeae3 1124 inv_debug("EXP: Kill pointer at %p (%x)\n",head->addr,head->vaddr);
d148d265 1125 void *host_addr=find_extjump_insn(head->addr);
919981d0 1126 mark_clear_cache(host_addr);
df4dc2b1 1127 set_jump_target(host_addr, head->addr);
57871462 1128 }
1129 head=head->next;
1130 }
1131}
1132
1133// This is called when we write to a compiled block (see do_invstub)
d1e4ebd9 1134static void invalidate_page(u_int page)
57871462 1135{
57871462 1136 struct ll_entry *head;
1137 struct ll_entry *next;
1138 head=jump_in[page];
1139 jump_in[page]=0;
1140 while(head!=NULL) {
1141 inv_debug("INVALIDATE: %x\n",head->vaddr);
1142 remove_hash(head->vaddr);
1143 next=head->next;
1144 free(head);
1145 head=next;
1146 }
1147 head=jump_out[page];
1148 jump_out[page]=0;
1149 while(head!=NULL) {
643aeae3 1150 inv_debug("INVALIDATE: kill pointer to %x (%p)\n",head->vaddr,head->addr);
d148d265 1151 void *host_addr=find_extjump_insn(head->addr);
919981d0 1152 mark_clear_cache(host_addr);
3d680478 1153 set_jump_target(host_addr, head->addr); // point back to dyna_linker
57871462 1154 next=head->next;
1155 free(head);
1156 head=next;
1157 }
57871462 1158}
9be4ba64 1159
1160static void invalidate_block_range(u_int block, u_int first, u_int last)
57871462 1161{
94d23bb9 1162 u_int page=get_page(block<<12);
57871462 1163 //printf("first=%d last=%d\n",first,last);
f76eeef9 1164 invalidate_page(page);
57871462 1165 assert(first+5>page); // NB: this assumes MAXBLOCK<=4096 (4 pages)
1166 assert(last<page+5);
1167 // Invalidate the adjacent pages if a block crosses a 4K boundary
1168 while(first<page) {
1169 invalidate_page(first);
1170 first++;
1171 }
1172 for(first=page+1;first<last;first++) {
1173 invalidate_page(first);
1174 }
919981d0 1175 do_clear_cache();
9f51b4b9 1176
57871462 1177 // Don't trap writes
1178 invalid_code[block]=1;
f76eeef9 1179
57871462 1180 #ifdef USE_MINI_HT
1181 memset(mini_ht,-1,sizeof(mini_ht));
1182 #endif
1183}
9be4ba64 1184
1185void invalidate_block(u_int block)
1186{
1187 u_int page=get_page(block<<12);
1188 u_int vpage=get_vpage(block<<12);
1189 inv_debug("INVALIDATE: %x (%d)\n",block<<12,page);
1190 //inv_debug("invalid_code[block]=%d\n",invalid_code[block]);
1191 u_int first,last;
1192 first=last=page;
1193 struct ll_entry *head;
1194 head=jump_dirty[vpage];
1195 //printf("page=%d vpage=%d\n",page,vpage);
1196 while(head!=NULL) {
9be4ba64 1197 if(vpage>2047||(head->vaddr>>12)==block) { // Ignore vaddr hash collision
01d26796 1198 u_char *start, *end;
1199 get_bounds(head->addr, &start, &end);
1200 //printf("start: %p end: %p\n", start, end);
1201 if (page < 2048 && start >= rdram && end < rdram+RAM_SIZE) {
1202 if (((start-rdram)>>12) <= page && ((end-1-rdram)>>12) >= page) {
1203 if ((((start-rdram)>>12)&2047) < first) first = ((start-rdram)>>12)&2047;
1204 if ((((end-1-rdram)>>12)&2047) > last) last = ((end-1-rdram)>>12)&2047;
9be4ba64 1205 }
1206 }
9be4ba64 1207 }
1208 head=head->next;
1209 }
1210 invalidate_block_range(block,first,last);
1211}
1212
57871462 1213void invalidate_addr(u_int addr)
1214{
9be4ba64 1215 //static int rhits;
1216 // this check is done by the caller
1217 //if (inv_code_start<=addr&&addr<=inv_code_end) { rhits++; return; }
d25604ca 1218 u_int page=get_vpage(addr);
9be4ba64 1219 if(page<2048) { // RAM
1220 struct ll_entry *head;
1221 u_int addr_min=~0, addr_max=0;
4a35de07 1222 u_int mask=RAM_SIZE-1;
1223 u_int addr_main=0x80000000|(addr&mask);
9be4ba64 1224 int pg1;
4a35de07 1225 inv_code_start=addr_main&~0xfff;
1226 inv_code_end=addr_main|0xfff;
9be4ba64 1227 pg1=page;
1228 if (pg1>0) {
1229 // must check previous page too because of spans..
1230 pg1--;
1231 inv_code_start-=0x1000;
1232 }
1233 for(;pg1<=page;pg1++) {
1234 for(head=jump_dirty[pg1];head!=NULL;head=head->next) {
01d26796 1235 u_char *start_h, *end_h;
1236 u_int start, end;
1237 get_bounds(head->addr, &start_h, &end_h);
1238 start = (uintptr_t)start_h - ram_offset;
1239 end = (uintptr_t)end_h - ram_offset;
4a35de07 1240 if(start<=addr_main&&addr_main<end) {
9be4ba64 1241 if(start<addr_min) addr_min=start;
1242 if(end>addr_max) addr_max=end;
1243 }
4a35de07 1244 else if(addr_main<start) {
9be4ba64 1245 if(start<inv_code_end)
1246 inv_code_end=start-1;
1247 }
1248 else {
1249 if(end>inv_code_start)
1250 inv_code_start=end;
1251 }
1252 }
1253 }
1254 if (addr_min!=~0) {
1255 inv_debug("INV ADDR: %08x hit %08x-%08x\n", addr, addr_min, addr_max);
1256 inv_code_start=inv_code_end=~0;
1257 invalidate_block_range(addr>>12,(addr_min&mask)>>12,(addr_max&mask)>>12);
1258 return;
1259 }
1260 else {
4a35de07 1261 inv_code_start=(addr&~mask)|(inv_code_start&mask);
1262 inv_code_end=(addr&~mask)|(inv_code_end&mask);
d25604ca 1263 inv_debug("INV ADDR: %08x miss, inv %08x-%08x, sk %d\n", addr, inv_code_start, inv_code_end, 0);
9be4ba64 1264 return;
d25604ca 1265 }
9be4ba64 1266 }
57871462 1267 invalidate_block(addr>>12);
1268}
9be4ba64 1269
dd3a91a1 1270// This is called when loading a save state.
1271// Anything could have changed, so invalidate everything.
919981d0 1272void invalidate_all_pages(void)
57871462 1273{
581335b0 1274 u_int page;
57871462 1275 for(page=0;page<4096;page++)
1276 invalidate_page(page);
1277 for(page=0;page<1048576;page++)
1278 if(!invalid_code[page]) {
1279 restore_candidate[(page&2047)>>3]|=1<<(page&7);
1280 restore_candidate[((page&2047)>>3)+256]|=1<<(page&7);
1281 }
57871462 1282 #ifdef USE_MINI_HT
1283 memset(mini_ht,-1,sizeof(mini_ht));
1284 #endif
919981d0 1285 do_clear_cache();
57871462 1286}
1287
d1e4ebd9 1288static void do_invstub(int n)
1289{
1290 literal_pool(20);
1291 u_int reglist=stubs[n].a;
1292 set_jump_target(stubs[n].addr, out);
1293 save_regs(reglist);
1294 if(stubs[n].b!=0) emit_mov(stubs[n].b,0);
2a014d73 1295 emit_far_call(invalidate_addr);
d1e4ebd9 1296 restore_regs(reglist);
1297 emit_jmp(stubs[n].retaddr); // return address
1298}
1299
57871462 1300// Add an entry to jump_out after making a link
d1e4ebd9 1301// src should point to code by emit_extjump2()
3d680478 1302void add_jump_out(u_int vaddr,void *src)
57871462 1303{
94d23bb9 1304 u_int page=get_page(vaddr);
3d680478 1305 inv_debug("add_jump_out: %p -> %x (%d)\n",src,vaddr,page);
d1e4ebd9 1306 check_extjump2(src);
57871462 1307 ll_add(jump_out+page,vaddr,src);
3d680478 1308 //inv_debug("add_jump_out: to %p\n",get_pointer(src));
57871462 1309}
1310
1311// If a code block was found to be unmodified (bit was set in
1312// restore_candidate) and it remains unmodified (bit is clear
1313// in invalid_code) then move the entries for that 4K page from
1314// the dirty list to the clean list.
1315void clean_blocks(u_int page)
1316{
1317 struct ll_entry *head;
1318 inv_debug("INV: clean_blocks page=%d\n",page);
1319 head=jump_dirty[page];
1320 while(head!=NULL) {
1321 if(!invalid_code[head->vaddr>>12]) {
1322 // Don't restore blocks which are about to expire from the cache
df4dc2b1 1323 if (doesnt_expire_soon(head->addr)) {
581335b0 1324 if(verify_dirty(head->addr)) {
01d26796 1325 u_char *start, *end;
643aeae3 1326 //printf("Possibly Restore %x (%p)\n",head->vaddr, head->addr);
57871462 1327 u_int i;
1328 u_int inv=0;
01d26796 1329 get_bounds(head->addr, &start, &end);
1330 if (start - rdram < RAM_SIZE) {
1331 for (i = (start-rdram+0x80000000)>>12; i <= (end-1-rdram+0x80000000)>>12; i++) {
57871462 1332 inv|=invalid_code[i];
1333 }
1334 }
4cb76aa4 1335 else if((signed int)head->vaddr>=(signed int)0x80000000+RAM_SIZE) {
57871462 1336 inv=1;
1337 }
1338 if(!inv) {
df4dc2b1 1339 void *clean_addr = get_clean_addr(head->addr);
1340 if (doesnt_expire_soon(clean_addr)) {
57871462 1341 u_int ppage=page;
643aeae3 1342 inv_debug("INV: Restored %x (%p/%p)\n",head->vaddr, head->addr, clean_addr);
57871462 1343 //printf("page=%x, addr=%x\n",page,head->vaddr);
1344 //assert(head->vaddr>>12==(page|0x80000));
de5a60c3 1345 ll_add_flags(jump_in+ppage,head->vaddr,head->reg_sv_flags,clean_addr);
df4dc2b1 1346 struct ht_entry *ht_bin = hash_table_get(head->vaddr);
1347 if (ht_bin->vaddr[0] == head->vaddr)
1348 ht_bin->tcaddr[0] = clean_addr; // Replace existing entry
1349 if (ht_bin->vaddr[1] == head->vaddr)
1350 ht_bin->tcaddr[1] = clean_addr; // Replace existing entry
57871462 1351 }
1352 }
1353 }
1354 }
1355 }
1356 head=head->next;
1357 }
1358}
1359
8062d65a 1360/* Register allocation */
1361
1362// Note: registers are allocated clean (unmodified state)
1363// if you intend to modify the register, you must call dirty_reg().
1364static void alloc_reg(struct regstat *cur,int i,signed char reg)
1365{
1366 int r,hr;
1367 int preferred_reg = (reg&7);
1368 if(reg==CCREG) preferred_reg=HOST_CCREG;
1369 if(reg==PTEMP||reg==FTEMP) preferred_reg=12;
1370
1371 // Don't allocate unused registers
1372 if((cur->u>>reg)&1) return;
1373
1374 // see if it's already allocated
1375 for(hr=0;hr<HOST_REGS;hr++)
1376 {
1377 if(cur->regmap[hr]==reg) return;
1378 }
1379
1380 // Keep the same mapping if the register was already allocated in a loop
1381 preferred_reg = loop_reg(i,reg,preferred_reg);
1382
1383 // Try to allocate the preferred register
1384 if(cur->regmap[preferred_reg]==-1) {
1385 cur->regmap[preferred_reg]=reg;
1386 cur->dirty&=~(1<<preferred_reg);
1387 cur->isconst&=~(1<<preferred_reg);
1388 return;
1389 }
1390 r=cur->regmap[preferred_reg];
1391 assert(r < 64);
1392 if((cur->u>>r)&1) {
1393 cur->regmap[preferred_reg]=reg;
1394 cur->dirty&=~(1<<preferred_reg);
1395 cur->isconst&=~(1<<preferred_reg);
1396 return;
1397 }
1398
1399 // Clear any unneeded registers
1400 // We try to keep the mapping consistent, if possible, because it
1401 // makes branches easier (especially loops). So we try to allocate
1402 // first (see above) before removing old mappings. If this is not
1403 // possible then go ahead and clear out the registers that are no
1404 // longer needed.
1405 for(hr=0;hr<HOST_REGS;hr++)
1406 {
1407 r=cur->regmap[hr];
1408 if(r>=0) {
1409 assert(r < 64);
1410 if((cur->u>>r)&1) {cur->regmap[hr]=-1;break;}
1411 }
1412 }
1413 // Try to allocate any available register, but prefer
1414 // registers that have not been used recently.
1415 if(i>0) {
1416 for(hr=0;hr<HOST_REGS;hr++) {
1417 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
cf95b4f0 1418 if(regs[i-1].regmap[hr]!=dops[i-1].rs1&&regs[i-1].regmap[hr]!=dops[i-1].rs2&&regs[i-1].regmap[hr]!=dops[i-1].rt1&&regs[i-1].regmap[hr]!=dops[i-1].rt2) {
8062d65a 1419 cur->regmap[hr]=reg;
1420 cur->dirty&=~(1<<hr);
1421 cur->isconst&=~(1<<hr);
1422 return;
1423 }
1424 }
1425 }
1426 }
1427 // Try to allocate any available register
1428 for(hr=0;hr<HOST_REGS;hr++) {
1429 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
1430 cur->regmap[hr]=reg;
1431 cur->dirty&=~(1<<hr);
1432 cur->isconst&=~(1<<hr);
1433 return;
1434 }
1435 }
1436
1437 // Ok, now we have to evict someone
1438 // Pick a register we hopefully won't need soon
1439 u_char hsn[MAXREG+1];
1440 memset(hsn,10,sizeof(hsn));
1441 int j;
1442 lsn(hsn,i,&preferred_reg);
1443 //printf("eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",cur->regmap[0],cur->regmap[1],cur->regmap[2],cur->regmap[3],cur->regmap[5],cur->regmap[6],cur->regmap[7]);
1444 //printf("hsn(%x): %d %d %d %d %d %d %d\n",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
1445 if(i>0) {
1446 // Don't evict the cycle count at entry points, otherwise the entry
1447 // stub will have to write it.
cf95b4f0 1448 if(dops[i].bt&&hsn[CCREG]>2) hsn[CCREG]=2;
fe807a8a 1449 if (i>1 && hsn[CCREG] > 2 && dops[i-2].is_jump) hsn[CCREG]=2;
8062d65a 1450 for(j=10;j>=3;j--)
1451 {
1452 // Alloc preferred register if available
1453 if(hsn[r=cur->regmap[preferred_reg]&63]==j) {
1454 for(hr=0;hr<HOST_REGS;hr++) {
1455 // Evict both parts of a 64-bit register
1456 if((cur->regmap[hr]&63)==r) {
1457 cur->regmap[hr]=-1;
1458 cur->dirty&=~(1<<hr);
1459 cur->isconst&=~(1<<hr);
1460 }
1461 }
1462 cur->regmap[preferred_reg]=reg;
1463 return;
1464 }
1465 for(r=1;r<=MAXREG;r++)
1466 {
cf95b4f0 1467 if(hsn[r]==j&&r!=dops[i-1].rs1&&r!=dops[i-1].rs2&&r!=dops[i-1].rt1&&r!=dops[i-1].rt2) {
8062d65a 1468 for(hr=0;hr<HOST_REGS;hr++) {
1469 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
1470 if(cur->regmap[hr]==r) {
1471 cur->regmap[hr]=reg;
1472 cur->dirty&=~(1<<hr);
1473 cur->isconst&=~(1<<hr);
1474 return;
1475 }
1476 }
1477 }
1478 }
1479 }
1480 }
1481 }
1482 for(j=10;j>=0;j--)
1483 {
1484 for(r=1;r<=MAXREG;r++)
1485 {
1486 if(hsn[r]==j) {
8062d65a 1487 for(hr=0;hr<HOST_REGS;hr++) {
1488 if(cur->regmap[hr]==r) {
1489 cur->regmap[hr]=reg;
1490 cur->dirty&=~(1<<hr);
1491 cur->isconst&=~(1<<hr);
1492 return;
1493 }
1494 }
1495 }
1496 }
1497 }
7c3a5182 1498 SysPrintf("This shouldn't happen (alloc_reg)");abort();
8062d65a 1499}
1500
1501// Allocate a temporary register. This is done without regard to
1502// dirty status or whether the register we request is on the unneeded list
1503// Note: This will only allocate one register, even if called multiple times
1504static void alloc_reg_temp(struct regstat *cur,int i,signed char reg)
1505{
1506 int r,hr;
1507 int preferred_reg = -1;
1508
1509 // see if it's already allocated
1510 for(hr=0;hr<HOST_REGS;hr++)
1511 {
1512 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==reg) return;
1513 }
1514
1515 // Try to allocate any available register
1516 for(hr=HOST_REGS-1;hr>=0;hr--) {
1517 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
1518 cur->regmap[hr]=reg;
1519 cur->dirty&=~(1<<hr);
1520 cur->isconst&=~(1<<hr);
1521 return;
1522 }
1523 }
1524
1525 // Find an unneeded register
1526 for(hr=HOST_REGS-1;hr>=0;hr--)
1527 {
1528 r=cur->regmap[hr];
1529 if(r>=0) {
1530 assert(r < 64);
1531 if((cur->u>>r)&1) {
1532 if(i==0||((unneeded_reg[i-1]>>r)&1)) {
1533 cur->regmap[hr]=reg;
1534 cur->dirty&=~(1<<hr);
1535 cur->isconst&=~(1<<hr);
1536 return;
1537 }
1538 }
1539 }
1540 }
1541
1542 // Ok, now we have to evict someone
1543 // Pick a register we hopefully won't need soon
1544 // TODO: we might want to follow unconditional jumps here
1545 // TODO: get rid of dupe code and make this into a function
1546 u_char hsn[MAXREG+1];
1547 memset(hsn,10,sizeof(hsn));
1548 int j;
1549 lsn(hsn,i,&preferred_reg);
1550 //printf("hsn: %d %d %d %d %d %d %d\n",hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
1551 if(i>0) {
1552 // Don't evict the cycle count at entry points, otherwise the entry
1553 // stub will have to write it.
cf95b4f0 1554 if(dops[i].bt&&hsn[CCREG]>2) hsn[CCREG]=2;
fe807a8a 1555 if (i>1 && hsn[CCREG] > 2 && dops[i-2].is_jump) hsn[CCREG]=2;
8062d65a 1556 for(j=10;j>=3;j--)
1557 {
1558 for(r=1;r<=MAXREG;r++)
1559 {
cf95b4f0 1560 if(hsn[r]==j&&r!=dops[i-1].rs1&&r!=dops[i-1].rs2&&r!=dops[i-1].rt1&&r!=dops[i-1].rt2) {
8062d65a 1561 for(hr=0;hr<HOST_REGS;hr++) {
1562 if(hr!=HOST_CCREG||hsn[CCREG]>2) {
1563 if(cur->regmap[hr]==r) {
1564 cur->regmap[hr]=reg;
1565 cur->dirty&=~(1<<hr);
1566 cur->isconst&=~(1<<hr);
1567 return;
1568 }
1569 }
1570 }
1571 }
1572 }
1573 }
1574 }
1575 for(j=10;j>=0;j--)
1576 {
1577 for(r=1;r<=MAXREG;r++)
1578 {
1579 if(hsn[r]==j) {
8062d65a 1580 for(hr=0;hr<HOST_REGS;hr++) {
1581 if(cur->regmap[hr]==r) {
1582 cur->regmap[hr]=reg;
1583 cur->dirty&=~(1<<hr);
1584 cur->isconst&=~(1<<hr);
1585 return;
1586 }
1587 }
1588 }
1589 }
1590 }
7c3a5182 1591 SysPrintf("This shouldn't happen");abort();
8062d65a 1592}
1593
ad49de89 1594static void mov_alloc(struct regstat *current,int i)
57871462 1595{
cf95b4f0 1596 if (dops[i].rs1 == HIREG || dops[i].rs1 == LOREG) {
32631e6a 1597 // logically this is needed but just won't work, no idea why
1598 //alloc_cc(current,i); // for stalls
1599 //dirty_reg(current,CCREG);
1600 }
1601
57871462 1602 // Note: Don't need to actually alloc the source registers
cf95b4f0 1603 //alloc_reg(current,i,dops[i].rs1);
1604 alloc_reg(current,i,dops[i].rt1);
ad49de89 1605
cf95b4f0 1606 clear_const(current,dops[i].rs1);
1607 clear_const(current,dops[i].rt1);
1608 dirty_reg(current,dops[i].rt1);
57871462 1609}
1610
ad49de89 1611static void shiftimm_alloc(struct regstat *current,int i)
57871462 1612{
cf95b4f0 1613 if(dops[i].opcode2<=0x3) // SLL/SRL/SRA
57871462 1614 {
cf95b4f0 1615 if(dops[i].rt1) {
1616 if(dops[i].rs1&&needed_again(dops[i].rs1,i)) alloc_reg(current,i,dops[i].rs1);
1617 else dops[i].lt1=dops[i].rs1;
1618 alloc_reg(current,i,dops[i].rt1);
1619 dirty_reg(current,dops[i].rt1);
1620 if(is_const(current,dops[i].rs1)) {
1621 int v=get_const(current,dops[i].rs1);
1622 if(dops[i].opcode2==0x00) set_const(current,dops[i].rt1,v<<imm[i]);
1623 if(dops[i].opcode2==0x02) set_const(current,dops[i].rt1,(u_int)v>>imm[i]);
1624 if(dops[i].opcode2==0x03) set_const(current,dops[i].rt1,v>>imm[i]);
dc49e339 1625 }
cf95b4f0 1626 else clear_const(current,dops[i].rt1);
57871462 1627 }
1628 }
dc49e339 1629 else
1630 {
cf95b4f0 1631 clear_const(current,dops[i].rs1);
1632 clear_const(current,dops[i].rt1);
dc49e339 1633 }
1634
cf95b4f0 1635 if(dops[i].opcode2>=0x38&&dops[i].opcode2<=0x3b) // DSLL/DSRL/DSRA
57871462 1636 {
9c45ca93 1637 assert(0);
57871462 1638 }
cf95b4f0 1639 if(dops[i].opcode2==0x3c) // DSLL32
57871462 1640 {
9c45ca93 1641 assert(0);
57871462 1642 }
cf95b4f0 1643 if(dops[i].opcode2==0x3e) // DSRL32
57871462 1644 {
9c45ca93 1645 assert(0);
57871462 1646 }
cf95b4f0 1647 if(dops[i].opcode2==0x3f) // DSRA32
57871462 1648 {
9c45ca93 1649 assert(0);
57871462 1650 }
1651}
1652
ad49de89 1653static void shift_alloc(struct regstat *current,int i)
57871462 1654{
cf95b4f0 1655 if(dops[i].rt1) {
1656 if(dops[i].opcode2<=0x07) // SLLV/SRLV/SRAV
57871462 1657 {
cf95b4f0 1658 if(dops[i].rs1) alloc_reg(current,i,dops[i].rs1);
1659 if(dops[i].rs2) alloc_reg(current,i,dops[i].rs2);
1660 alloc_reg(current,i,dops[i].rt1);
1661 if(dops[i].rt1==dops[i].rs2) {
e1190b87 1662 alloc_reg_temp(current,i,-1);
1663 minimum_free_regs[i]=1;
1664 }
57871462 1665 } else { // DSLLV/DSRLV/DSRAV
00fa9369 1666 assert(0);
57871462 1667 }
cf95b4f0 1668 clear_const(current,dops[i].rs1);
1669 clear_const(current,dops[i].rs2);
1670 clear_const(current,dops[i].rt1);
1671 dirty_reg(current,dops[i].rt1);
57871462 1672 }
1673}
1674
ad49de89 1675static void alu_alloc(struct regstat *current,int i)
57871462 1676{
cf95b4f0 1677 if(dops[i].opcode2>=0x20&&dops[i].opcode2<=0x23) { // ADD/ADDU/SUB/SUBU
1678 if(dops[i].rt1) {
1679 if(dops[i].rs1&&dops[i].rs2) {
1680 alloc_reg(current,i,dops[i].rs1);
1681 alloc_reg(current,i,dops[i].rs2);
57871462 1682 }
1683 else {
cf95b4f0 1684 if(dops[i].rs1&&needed_again(dops[i].rs1,i)) alloc_reg(current,i,dops[i].rs1);
1685 if(dops[i].rs2&&needed_again(dops[i].rs2,i)) alloc_reg(current,i,dops[i].rs2);
57871462 1686 }
cf95b4f0 1687 alloc_reg(current,i,dops[i].rt1);
57871462 1688 }
57871462 1689 }
cf95b4f0 1690 if(dops[i].opcode2==0x2a||dops[i].opcode2==0x2b) { // SLT/SLTU
1691 if(dops[i].rt1) {
1692 alloc_reg(current,i,dops[i].rs1);
1693 alloc_reg(current,i,dops[i].rs2);
1694 alloc_reg(current,i,dops[i].rt1);
57871462 1695 }
57871462 1696 }
cf95b4f0 1697 if(dops[i].opcode2>=0x24&&dops[i].opcode2<=0x27) { // AND/OR/XOR/NOR
1698 if(dops[i].rt1) {
1699 if(dops[i].rs1&&dops[i].rs2) {
1700 alloc_reg(current,i,dops[i].rs1);
1701 alloc_reg(current,i,dops[i].rs2);
57871462 1702 }
1703 else
1704 {
cf95b4f0 1705 if(dops[i].rs1&&needed_again(dops[i].rs1,i)) alloc_reg(current,i,dops[i].rs1);
1706 if(dops[i].rs2&&needed_again(dops[i].rs2,i)) alloc_reg(current,i,dops[i].rs2);
57871462 1707 }
cf95b4f0 1708 alloc_reg(current,i,dops[i].rt1);
57871462 1709 }
1710 }
cf95b4f0 1711 if(dops[i].opcode2>=0x2c&&dops[i].opcode2<=0x2f) { // DADD/DADDU/DSUB/DSUBU
00fa9369 1712 assert(0);
57871462 1713 }
cf95b4f0 1714 clear_const(current,dops[i].rs1);
1715 clear_const(current,dops[i].rs2);
1716 clear_const(current,dops[i].rt1);
1717 dirty_reg(current,dops[i].rt1);
57871462 1718}
1719
ad49de89 1720static void imm16_alloc(struct regstat *current,int i)
57871462 1721{
cf95b4f0 1722 if(dops[i].rs1&&needed_again(dops[i].rs1,i)) alloc_reg(current,i,dops[i].rs1);
1723 else dops[i].lt1=dops[i].rs1;
1724 if(dops[i].rt1) alloc_reg(current,i,dops[i].rt1);
1725 if(dops[i].opcode==0x18||dops[i].opcode==0x19) { // DADDI/DADDIU
00fa9369 1726 assert(0);
57871462 1727 }
cf95b4f0 1728 else if(dops[i].opcode==0x0a||dops[i].opcode==0x0b) { // SLTI/SLTIU
1729 clear_const(current,dops[i].rs1);
1730 clear_const(current,dops[i].rt1);
57871462 1731 }
cf95b4f0 1732 else if(dops[i].opcode>=0x0c&&dops[i].opcode<=0x0e) { // ANDI/ORI/XORI
1733 if(is_const(current,dops[i].rs1)) {
1734 int v=get_const(current,dops[i].rs1);
1735 if(dops[i].opcode==0x0c) set_const(current,dops[i].rt1,v&imm[i]);
1736 if(dops[i].opcode==0x0d) set_const(current,dops[i].rt1,v|imm[i]);
1737 if(dops[i].opcode==0x0e) set_const(current,dops[i].rt1,v^imm[i]);
57871462 1738 }
cf95b4f0 1739 else clear_const(current,dops[i].rt1);
57871462 1740 }
cf95b4f0 1741 else if(dops[i].opcode==0x08||dops[i].opcode==0x09) { // ADDI/ADDIU
1742 if(is_const(current,dops[i].rs1)) {
1743 int v=get_const(current,dops[i].rs1);
1744 set_const(current,dops[i].rt1,v+imm[i]);
57871462 1745 }
cf95b4f0 1746 else clear_const(current,dops[i].rt1);
57871462 1747 }
1748 else {
cf95b4f0 1749 set_const(current,dops[i].rt1,imm[i]<<16); // LUI
57871462 1750 }
cf95b4f0 1751 dirty_reg(current,dops[i].rt1);
57871462 1752}
1753
ad49de89 1754static void load_alloc(struct regstat *current,int i)
57871462 1755{
cf95b4f0 1756 clear_const(current,dops[i].rt1);
1757 //if(dops[i].rs1!=dops[i].rt1&&needed_again(dops[i].rs1,i)) clear_const(current,dops[i].rs1); // Does this help or hurt?
1758 if(!dops[i].rs1) current->u&=~1LL; // Allow allocating r0 if it's the source register
1759 if(needed_again(dops[i].rs1,i)) alloc_reg(current,i,dops[i].rs1);
1760 if(dops[i].rt1&&!((current->u>>dops[i].rt1)&1)) {
1761 alloc_reg(current,i,dops[i].rt1);
1762 assert(get_reg(current->regmap,dops[i].rt1)>=0);
1763 if(dops[i].opcode==0x27||dops[i].opcode==0x37) // LWU/LD
57871462 1764 {
ad49de89 1765 assert(0);
57871462 1766 }
cf95b4f0 1767 else if(dops[i].opcode==0x1A||dops[i].opcode==0x1B) // LDL/LDR
57871462 1768 {
ad49de89 1769 assert(0);
57871462 1770 }
cf95b4f0 1771 dirty_reg(current,dops[i].rt1);
57871462 1772 // LWL/LWR need a temporary register for the old value
cf95b4f0 1773 if(dops[i].opcode==0x22||dops[i].opcode==0x26)
57871462 1774 {
1775 alloc_reg(current,i,FTEMP);
1776 alloc_reg_temp(current,i,-1);
e1190b87 1777 minimum_free_regs[i]=1;
57871462 1778 }
1779 }
1780 else
1781 {
373d1d07 1782 // Load to r0 or unneeded register (dummy load)
57871462 1783 // but we still need a register to calculate the address
cf95b4f0 1784 if(dops[i].opcode==0x22||dops[i].opcode==0x26)
535d208a 1785 {
1786 alloc_reg(current,i,FTEMP); // LWL/LWR need another temporary
1787 }
57871462 1788 alloc_reg_temp(current,i,-1);
e1190b87 1789 minimum_free_regs[i]=1;
cf95b4f0 1790 if(dops[i].opcode==0x1A||dops[i].opcode==0x1B) // LDL/LDR
535d208a 1791 {
ad49de89 1792 assert(0);
535d208a 1793 }
57871462 1794 }
1795}
1796
1797void store_alloc(struct regstat *current,int i)
1798{
cf95b4f0 1799 clear_const(current,dops[i].rs2);
1800 if(!(dops[i].rs2)) current->u&=~1LL; // Allow allocating r0 if necessary
1801 if(needed_again(dops[i].rs1,i)) alloc_reg(current,i,dops[i].rs1);
1802 alloc_reg(current,i,dops[i].rs2);
1803 if(dops[i].opcode==0x2c||dops[i].opcode==0x2d||dops[i].opcode==0x3f) { // 64-bit SDL/SDR/SD
ad49de89 1804 assert(0);
57871462 1805 }
57871462 1806 #if defined(HOST_IMM8)
1807 // On CPUs without 32-bit immediates we need a pointer to invalid_code
1808 else alloc_reg(current,i,INVCP);
1809 #endif
cf95b4f0 1810 if(dops[i].opcode==0x2a||dops[i].opcode==0x2e||dops[i].opcode==0x2c||dops[i].opcode==0x2d) { // SWL/SWL/SDL/SDR
57871462 1811 alloc_reg(current,i,FTEMP);
1812 }
1813 // We need a temporary register for address generation
1814 alloc_reg_temp(current,i,-1);
e1190b87 1815 minimum_free_regs[i]=1;
57871462 1816}
1817
1818void c1ls_alloc(struct regstat *current,int i)
1819{
cf95b4f0 1820 //clear_const(current,dops[i].rs1); // FIXME
1821 clear_const(current,dops[i].rt1);
1822 if(needed_again(dops[i].rs1,i)) alloc_reg(current,i,dops[i].rs1);
57871462 1823 alloc_reg(current,i,CSREG); // Status
1824 alloc_reg(current,i,FTEMP);
cf95b4f0 1825 if(dops[i].opcode==0x35||dops[i].opcode==0x3d) { // 64-bit LDC1/SDC1
ad49de89 1826 assert(0);
57871462 1827 }
57871462 1828 #if defined(HOST_IMM8)
1829 // On CPUs without 32-bit immediates we need a pointer to invalid_code
cf95b4f0 1830 else if((dops[i].opcode&0x3b)==0x39) // SWC1/SDC1
57871462 1831 alloc_reg(current,i,INVCP);
1832 #endif
1833 // We need a temporary register for address generation
1834 alloc_reg_temp(current,i,-1);
1835}
1836
b9b61529 1837void c2ls_alloc(struct regstat *current,int i)
1838{
cf95b4f0 1839 clear_const(current,dops[i].rt1);
1840 if(needed_again(dops[i].rs1,i)) alloc_reg(current,i,dops[i].rs1);
b9b61529 1841 alloc_reg(current,i,FTEMP);
b9b61529 1842 #if defined(HOST_IMM8)
1843 // On CPUs without 32-bit immediates we need a pointer to invalid_code
cf95b4f0 1844 if((dops[i].opcode&0x3b)==0x3a) // SWC2/SDC2
b9b61529 1845 alloc_reg(current,i,INVCP);
1846 #endif
1847 // We need a temporary register for address generation
1848 alloc_reg_temp(current,i,-1);
e1190b87 1849 minimum_free_regs[i]=1;
b9b61529 1850}
1851
57871462 1852#ifndef multdiv_alloc
1853void multdiv_alloc(struct regstat *current,int i)
1854{
1855 // case 0x18: MULT
1856 // case 0x19: MULTU
1857 // case 0x1A: DIV
1858 // case 0x1B: DIVU
1859 // case 0x1C: DMULT
1860 // case 0x1D: DMULTU
1861 // case 0x1E: DDIV
1862 // case 0x1F: DDIVU
cf95b4f0 1863 clear_const(current,dops[i].rs1);
1864 clear_const(current,dops[i].rs2);
32631e6a 1865 alloc_cc(current,i); // for stalls
cf95b4f0 1866 if(dops[i].rs1&&dops[i].rs2)
57871462 1867 {
cf95b4f0 1868 if((dops[i].opcode2&4)==0) // 32-bit
57871462 1869 {
1870 current->u&=~(1LL<<HIREG);
1871 current->u&=~(1LL<<LOREG);
1872 alloc_reg(current,i,HIREG);
1873 alloc_reg(current,i,LOREG);
cf95b4f0 1874 alloc_reg(current,i,dops[i].rs1);
1875 alloc_reg(current,i,dops[i].rs2);
57871462 1876 dirty_reg(current,HIREG);
1877 dirty_reg(current,LOREG);
1878 }
1879 else // 64-bit
1880 {
00fa9369 1881 assert(0);
57871462 1882 }
1883 }
1884 else
1885 {
1886 // Multiply by zero is zero.
1887 // MIPS does not have a divide by zero exception.
1888 // The result is undefined, we return zero.
1889 alloc_reg(current,i,HIREG);
1890 alloc_reg(current,i,LOREG);
57871462 1891 dirty_reg(current,HIREG);
1892 dirty_reg(current,LOREG);
1893 }
1894}
1895#endif
1896
1897void cop0_alloc(struct regstat *current,int i)
1898{
cf95b4f0 1899 if(dops[i].opcode2==0) // MFC0
57871462 1900 {
cf95b4f0 1901 if(dops[i].rt1) {
1902 clear_const(current,dops[i].rt1);
57871462 1903 alloc_all(current,i);
cf95b4f0 1904 alloc_reg(current,i,dops[i].rt1);
1905 dirty_reg(current,dops[i].rt1);
57871462 1906 }
1907 }
cf95b4f0 1908 else if(dops[i].opcode2==4) // MTC0
57871462 1909 {
cf95b4f0 1910 if(dops[i].rs1){
1911 clear_const(current,dops[i].rs1);
1912 alloc_reg(current,i,dops[i].rs1);
57871462 1913 alloc_all(current,i);
1914 }
1915 else {
1916 alloc_all(current,i); // FIXME: Keep r0
1917 current->u&=~1LL;
1918 alloc_reg(current,i,0);
1919 }
1920 }
1921 else
1922 {
1923 // TLBR/TLBWI/TLBWR/TLBP/ERET
cf95b4f0 1924 assert(dops[i].opcode2==0x10);
57871462 1925 alloc_all(current,i);
1926 }
e1190b87 1927 minimum_free_regs[i]=HOST_REGS;
57871462 1928}
1929
81dbbf4c 1930static void cop2_alloc(struct regstat *current,int i)
57871462 1931{
cf95b4f0 1932 if (dops[i].opcode2 < 3) // MFC2/CFC2
57871462 1933 {
81dbbf4c 1934 alloc_cc(current,i); // for stalls
1935 dirty_reg(current,CCREG);
cf95b4f0 1936 if(dops[i].rt1){
1937 clear_const(current,dops[i].rt1);
1938 alloc_reg(current,i,dops[i].rt1);
1939 dirty_reg(current,dops[i].rt1);
57871462 1940 }
57871462 1941 }
cf95b4f0 1942 else if (dops[i].opcode2 > 3) // MTC2/CTC2
57871462 1943 {
cf95b4f0 1944 if(dops[i].rs1){
1945 clear_const(current,dops[i].rs1);
1946 alloc_reg(current,i,dops[i].rs1);
57871462 1947 }
1948 else {
1949 current->u&=~1LL;
1950 alloc_reg(current,i,0);
57871462 1951 }
1952 }
81dbbf4c 1953 alloc_reg_temp(current,i,-1);
e1190b87 1954 minimum_free_regs[i]=1;
57871462 1955}
00fa9369 1956
b9b61529 1957void c2op_alloc(struct regstat *current,int i)
1958{
81dbbf4c 1959 alloc_cc(current,i); // for stalls
1960 dirty_reg(current,CCREG);
b9b61529 1961 alloc_reg_temp(current,i,-1);
1962}
57871462 1963
1964void syscall_alloc(struct regstat *current,int i)
1965{
1966 alloc_cc(current,i);
1967 dirty_reg(current,CCREG);
1968 alloc_all(current,i);
e1190b87 1969 minimum_free_regs[i]=HOST_REGS;
57871462 1970 current->isconst=0;
1971}
1972
1973void delayslot_alloc(struct regstat *current,int i)
1974{
cf95b4f0 1975 switch(dops[i].itype) {
57871462 1976 case UJUMP:
1977 case CJUMP:
1978 case SJUMP:
1979 case RJUMP:
57871462 1980 case SYSCALL:
7139f3c8 1981 case HLECALL:
57871462 1982 case SPAN:
7c3a5182 1983 assem_debug("jump in the delay slot. this shouldn't happen.\n");//abort();
c43b5311 1984 SysPrintf("Disabled speculative precompilation\n");
57871462 1985 stop_after_jal=1;
1986 break;
1987 case IMM16:
1988 imm16_alloc(current,i);
1989 break;
1990 case LOAD:
1991 case LOADLR:
1992 load_alloc(current,i);
1993 break;
1994 case STORE:
1995 case STORELR:
1996 store_alloc(current,i);
1997 break;
1998 case ALU:
1999 alu_alloc(current,i);
2000 break;
2001 case SHIFT:
2002 shift_alloc(current,i);
2003 break;
2004 case MULTDIV:
2005 multdiv_alloc(current,i);
2006 break;
2007 case SHIFTIMM:
2008 shiftimm_alloc(current,i);
2009 break;
2010 case MOV:
2011 mov_alloc(current,i);
2012 break;
2013 case COP0:
2014 cop0_alloc(current,i);
2015 break;
2016 case COP1:
81dbbf4c 2017 break;
b9b61529 2018 case COP2:
81dbbf4c 2019 cop2_alloc(current,i);
57871462 2020 break;
2021 case C1LS:
2022 c1ls_alloc(current,i);
2023 break;
b9b61529 2024 case C2LS:
2025 c2ls_alloc(current,i);
2026 break;
b9b61529 2027 case C2OP:
2028 c2op_alloc(current,i);
2029 break;
57871462 2030 }
2031}
2032
2033// Special case where a branch and delay slot span two pages in virtual memory
2034static void pagespan_alloc(struct regstat *current,int i)
2035{
2036 current->isconst=0;
2037 current->wasconst=0;
2038 regs[i].wasconst=0;
e1190b87 2039 minimum_free_regs[i]=HOST_REGS;
57871462 2040 alloc_all(current,i);
2041 alloc_cc(current,i);
2042 dirty_reg(current,CCREG);
cf95b4f0 2043 if(dops[i].opcode==3) // JAL
57871462 2044 {
2045 alloc_reg(current,i,31);
2046 dirty_reg(current,31);
2047 }
cf95b4f0 2048 if(dops[i].opcode==0&&(dops[i].opcode2&0x3E)==8) // JR/JALR
57871462 2049 {
cf95b4f0 2050 alloc_reg(current,i,dops[i].rs1);
2051 if (dops[i].rt1!=0) {
2052 alloc_reg(current,i,dops[i].rt1);
2053 dirty_reg(current,dops[i].rt1);
57871462 2054 }
2055 }
cf95b4f0 2056 if((dops[i].opcode&0x2E)==4) // BEQ/BNE/BEQL/BNEL
57871462 2057 {
cf95b4f0 2058 if(dops[i].rs1) alloc_reg(current,i,dops[i].rs1);
2059 if(dops[i].rs2) alloc_reg(current,i,dops[i].rs2);
57871462 2060 }
2061 else
cf95b4f0 2062 if((dops[i].opcode&0x2E)==6) // BLEZ/BGTZ/BLEZL/BGTZL
57871462 2063 {
cf95b4f0 2064 if(dops[i].rs1) alloc_reg(current,i,dops[i].rs1);
57871462 2065 }
57871462 2066 //else ...
2067}
2068
b14b6a8f 2069static void add_stub(enum stub_type type, void *addr, void *retaddr,
2070 u_int a, uintptr_t b, uintptr_t c, u_int d, u_int e)
2071{
d1e4ebd9 2072 assert(stubcount < ARRAY_SIZE(stubs));
b14b6a8f 2073 stubs[stubcount].type = type;
2074 stubs[stubcount].addr = addr;
2075 stubs[stubcount].retaddr = retaddr;
2076 stubs[stubcount].a = a;
2077 stubs[stubcount].b = b;
2078 stubs[stubcount].c = c;
2079 stubs[stubcount].d = d;
2080 stubs[stubcount].e = e;
57871462 2081 stubcount++;
2082}
2083
b14b6a8f 2084static void add_stub_r(enum stub_type type, void *addr, void *retaddr,
81dbbf4c 2085 int i, int addr_reg, const struct regstat *i_regs, int ccadj, u_int reglist)
b14b6a8f 2086{
2087 add_stub(type, addr, retaddr, i, addr_reg, (uintptr_t)i_regs, ccadj, reglist);
2088}
2089
57871462 2090// Write out a single register
ad49de89 2091static void wb_register(signed char r,signed char regmap[],uint64_t dirty)
57871462 2092{
2093 int hr;
2094 for(hr=0;hr<HOST_REGS;hr++) {
2095 if(hr!=EXCLUDE_REG) {
2096 if((regmap[hr]&63)==r) {
2097 if((dirty>>hr)&1) {
ad49de89 2098 assert(regmap[hr]<64);
2099 emit_storereg(r,hr);
57871462 2100 }
2101 }
2102 }
2103 }
2104}
2105
8062d65a 2106static void wb_valid(signed char pre[],signed char entry[],u_int dirty_pre,u_int dirty,uint64_t u)
2107{
2108 //if(dirty_pre==dirty) return;
2109 int hr,reg;
2110 for(hr=0;hr<HOST_REGS;hr++) {
2111 if(hr!=EXCLUDE_REG) {
2112 reg=pre[hr];
2113 if(((~u)>>(reg&63))&1) {
2114 if(reg>0) {
2115 if(((dirty_pre&~dirty)>>hr)&1) {
2116 if(reg>0&&reg<34) {
2117 emit_storereg(reg,hr);
2118 }
2119 else if(reg>=64) {
2120 assert(0);
2121 }
2122 }
2123 }
2124 }
2125 }
2126 }
2127}
2128
687b4580 2129// trashes r2
2130static void pass_args(int a0, int a1)
2131{
2132 if(a0==1&&a1==0) {
2133 // must swap
2134 emit_mov(a0,2); emit_mov(a1,1); emit_mov(2,0);
2135 }
2136 else if(a0!=0&&a1==0) {
2137 emit_mov(a1,1);
2138 if (a0>=0) emit_mov(a0,0);
2139 }
2140 else {
2141 if(a0>=0&&a0!=0) emit_mov(a0,0);
2142 if(a1>=0&&a1!=1) emit_mov(a1,1);
2143 }
2144}
2145
2146static void alu_assemble(int i,struct regstat *i_regs)
57871462 2147{
cf95b4f0 2148 if(dops[i].opcode2>=0x20&&dops[i].opcode2<=0x23) { // ADD/ADDU/SUB/SUBU
2149 if(dops[i].rt1) {
57871462 2150 signed char s1,s2,t;
cf95b4f0 2151 t=get_reg(i_regs->regmap,dops[i].rt1);
57871462 2152 if(t>=0) {
cf95b4f0 2153 s1=get_reg(i_regs->regmap,dops[i].rs1);
2154 s2=get_reg(i_regs->regmap,dops[i].rs2);
2155 if(dops[i].rs1&&dops[i].rs2) {
57871462 2156 assert(s1>=0);
2157 assert(s2>=0);
cf95b4f0 2158 if(dops[i].opcode2&2) emit_sub(s1,s2,t);
57871462 2159 else emit_add(s1,s2,t);
2160 }
cf95b4f0 2161 else if(dops[i].rs1) {
57871462 2162 if(s1>=0) emit_mov(s1,t);
cf95b4f0 2163 else emit_loadreg(dops[i].rs1,t);
57871462 2164 }
cf95b4f0 2165 else if(dops[i].rs2) {
57871462 2166 if(s2>=0) {
cf95b4f0 2167 if(dops[i].opcode2&2) emit_neg(s2,t);
57871462 2168 else emit_mov(s2,t);
2169 }
2170 else {
cf95b4f0 2171 emit_loadreg(dops[i].rs2,t);
2172 if(dops[i].opcode2&2) emit_neg(t,t);
57871462 2173 }
2174 }
2175 else emit_zeroreg(t);
2176 }
2177 }
2178 }
cf95b4f0 2179 if(dops[i].opcode2>=0x2c&&dops[i].opcode2<=0x2f) { // DADD/DADDU/DSUB/DSUBU
00fa9369 2180 assert(0);
57871462 2181 }
cf95b4f0 2182 if(dops[i].opcode2==0x2a||dops[i].opcode2==0x2b) { // SLT/SLTU
2183 if(dops[i].rt1) {
ad49de89 2184 signed char s1l,s2l,t;
57871462 2185 {
cf95b4f0 2186 t=get_reg(i_regs->regmap,dops[i].rt1);
57871462 2187 //assert(t>=0);
2188 if(t>=0) {
cf95b4f0 2189 s1l=get_reg(i_regs->regmap,dops[i].rs1);
2190 s2l=get_reg(i_regs->regmap,dops[i].rs2);
2191 if(dops[i].rs2==0) // rx<r0
57871462 2192 {
cf95b4f0 2193 if(dops[i].opcode2==0x2a&&dops[i].rs1!=0) { // SLT
06e425d7 2194 assert(s1l>=0);
57871462 2195 emit_shrimm(s1l,31,t);
06e425d7 2196 }
2197 else // SLTU (unsigned can not be less than zero, 0<0)
57871462 2198 emit_zeroreg(t);
2199 }
cf95b4f0 2200 else if(dops[i].rs1==0) // r0<rx
57871462 2201 {
2202 assert(s2l>=0);
cf95b4f0 2203 if(dops[i].opcode2==0x2a) // SLT
57871462 2204 emit_set_gz32(s2l,t);
2205 else // SLTU (set if not zero)
2206 emit_set_nz32(s2l,t);
2207 }
2208 else{
2209 assert(s1l>=0);assert(s2l>=0);
cf95b4f0 2210 if(dops[i].opcode2==0x2a) // SLT
57871462 2211 emit_set_if_less32(s1l,s2l,t);
2212 else // SLTU
2213 emit_set_if_carry32(s1l,s2l,t);
2214 }
2215 }
2216 }
2217 }
2218 }
cf95b4f0 2219 if(dops[i].opcode2>=0x24&&dops[i].opcode2<=0x27) { // AND/OR/XOR/NOR
2220 if(dops[i].rt1) {
ad49de89 2221 signed char s1l,s2l,tl;
cf95b4f0 2222 tl=get_reg(i_regs->regmap,dops[i].rt1);
57871462 2223 {
57871462 2224 if(tl>=0) {
cf95b4f0 2225 s1l=get_reg(i_regs->regmap,dops[i].rs1);
2226 s2l=get_reg(i_regs->regmap,dops[i].rs2);
2227 if(dops[i].rs1&&dops[i].rs2) {
57871462 2228 assert(s1l>=0);
2229 assert(s2l>=0);
cf95b4f0 2230 if(dops[i].opcode2==0x24) { // AND
57871462 2231 emit_and(s1l,s2l,tl);
2232 } else
cf95b4f0 2233 if(dops[i].opcode2==0x25) { // OR
57871462 2234 emit_or(s1l,s2l,tl);
2235 } else
cf95b4f0 2236 if(dops[i].opcode2==0x26) { // XOR
57871462 2237 emit_xor(s1l,s2l,tl);
2238 } else
cf95b4f0 2239 if(dops[i].opcode2==0x27) { // NOR
57871462 2240 emit_or(s1l,s2l,tl);
2241 emit_not(tl,tl);
2242 }
2243 }
2244 else
2245 {
cf95b4f0 2246 if(dops[i].opcode2==0x24) { // AND
57871462 2247 emit_zeroreg(tl);
2248 } else
cf95b4f0 2249 if(dops[i].opcode2==0x25||dops[i].opcode2==0x26) { // OR/XOR
2250 if(dops[i].rs1){
57871462 2251 if(s1l>=0) emit_mov(s1l,tl);
cf95b4f0 2252 else emit_loadreg(dops[i].rs1,tl); // CHECK: regmap_entry?
57871462 2253 }
2254 else
cf95b4f0 2255 if(dops[i].rs2){
57871462 2256 if(s2l>=0) emit_mov(s2l,tl);
cf95b4f0 2257 else emit_loadreg(dops[i].rs2,tl); // CHECK: regmap_entry?
57871462 2258 }
2259 else emit_zeroreg(tl);
2260 } else
cf95b4f0 2261 if(dops[i].opcode2==0x27) { // NOR
2262 if(dops[i].rs1){
57871462 2263 if(s1l>=0) emit_not(s1l,tl);
2264 else {
cf95b4f0 2265 emit_loadreg(dops[i].rs1,tl);
57871462 2266 emit_not(tl,tl);
2267 }
2268 }
2269 else
cf95b4f0 2270 if(dops[i].rs2){
57871462 2271 if(s2l>=0) emit_not(s2l,tl);
2272 else {
cf95b4f0 2273 emit_loadreg(dops[i].rs2,tl);
57871462 2274 emit_not(tl,tl);
2275 }
2276 }
2277 else emit_movimm(-1,tl);
2278 }
2279 }
2280 }
2281 }
2282 }
2283 }
2284}
2285
2286void imm16_assemble(int i,struct regstat *i_regs)
2287{
cf95b4f0 2288 if (dops[i].opcode==0x0f) { // LUI
2289 if(dops[i].rt1) {
57871462 2290 signed char t;
cf95b4f0 2291 t=get_reg(i_regs->regmap,dops[i].rt1);
57871462 2292 //assert(t>=0);
2293 if(t>=0) {
2294 if(!((i_regs->isconst>>t)&1))
2295 emit_movimm(imm[i]<<16,t);
2296 }
2297 }
2298 }
cf95b4f0 2299 if(dops[i].opcode==0x08||dops[i].opcode==0x09) { // ADDI/ADDIU
2300 if(dops[i].rt1) {
57871462 2301 signed char s,t;
cf95b4f0 2302 t=get_reg(i_regs->regmap,dops[i].rt1);
2303 s=get_reg(i_regs->regmap,dops[i].rs1);
2304 if(dops[i].rs1) {
57871462 2305 //assert(t>=0);
2306 //assert(s>=0);
2307 if(t>=0) {
2308 if(!((i_regs->isconst>>t)&1)) {
2309 if(s<0) {
cf95b4f0 2310 if(i_regs->regmap_entry[t]!=dops[i].rs1) emit_loadreg(dops[i].rs1,t);
57871462 2311 emit_addimm(t,imm[i],t);
2312 }else{
2313 if(!((i_regs->wasconst>>s)&1))
2314 emit_addimm(s,imm[i],t);
2315 else
2316 emit_movimm(constmap[i][s]+imm[i],t);
2317 }
2318 }
2319 }
2320 } else {
2321 if(t>=0) {
2322 if(!((i_regs->isconst>>t)&1))
2323 emit_movimm(imm[i],t);
2324 }
2325 }
2326 }
2327 }
cf95b4f0 2328 if(dops[i].opcode==0x18||dops[i].opcode==0x19) { // DADDI/DADDIU
2329 if(dops[i].rt1) {
7c3a5182 2330 signed char sl,tl;
cf95b4f0 2331 tl=get_reg(i_regs->regmap,dops[i].rt1);
2332 sl=get_reg(i_regs->regmap,dops[i].rs1);
57871462 2333 if(tl>=0) {
cf95b4f0 2334 if(dops[i].rs1) {
57871462 2335 assert(sl>=0);
7c3a5182 2336 emit_addimm(sl,imm[i],tl);
57871462 2337 } else {
2338 emit_movimm(imm[i],tl);
57871462 2339 }
2340 }
2341 }
2342 }
cf95b4f0 2343 else if(dops[i].opcode==0x0a||dops[i].opcode==0x0b) { // SLTI/SLTIU
2344 if(dops[i].rt1) {
2345 //assert(dops[i].rs1!=0); // r0 might be valid, but it's probably a bug
ad49de89 2346 signed char sl,t;
cf95b4f0 2347 t=get_reg(i_regs->regmap,dops[i].rt1);
2348 sl=get_reg(i_regs->regmap,dops[i].rs1);
57871462 2349 //assert(t>=0);
2350 if(t>=0) {
cf95b4f0 2351 if(dops[i].rs1>0) {
2352 if(dops[i].opcode==0x0a) { // SLTI
57871462 2353 if(sl<0) {
cf95b4f0 2354 if(i_regs->regmap_entry[t]!=dops[i].rs1) emit_loadreg(dops[i].rs1,t);
57871462 2355 emit_slti32(t,imm[i],t);
2356 }else{
2357 emit_slti32(sl,imm[i],t);
2358 }
2359 }
2360 else { // SLTIU
2361 if(sl<0) {
cf95b4f0 2362 if(i_regs->regmap_entry[t]!=dops[i].rs1) emit_loadreg(dops[i].rs1,t);
57871462 2363 emit_sltiu32(t,imm[i],t);
2364 }else{
2365 emit_sltiu32(sl,imm[i],t);
2366 }
2367 }
57871462 2368 }else{
2369 // SLTI(U) with r0 is just stupid,
2370 // nonetheless examples can be found
cf95b4f0 2371 if(dops[i].opcode==0x0a) // SLTI
57871462 2372 if(0<imm[i]) emit_movimm(1,t);
2373 else emit_zeroreg(t);
2374 else // SLTIU
2375 {
2376 if(imm[i]) emit_movimm(1,t);
2377 else emit_zeroreg(t);
2378 }
2379 }
2380 }
2381 }
2382 }
cf95b4f0 2383 else if(dops[i].opcode>=0x0c&&dops[i].opcode<=0x0e) { // ANDI/ORI/XORI
2384 if(dops[i].rt1) {
7c3a5182 2385 signed char sl,tl;
cf95b4f0 2386 tl=get_reg(i_regs->regmap,dops[i].rt1);
2387 sl=get_reg(i_regs->regmap,dops[i].rs1);
57871462 2388 if(tl>=0 && !((i_regs->isconst>>tl)&1)) {
cf95b4f0 2389 if(dops[i].opcode==0x0c) //ANDI
57871462 2390 {
cf95b4f0 2391 if(dops[i].rs1) {
57871462 2392 if(sl<0) {
cf95b4f0 2393 if(i_regs->regmap_entry[tl]!=dops[i].rs1) emit_loadreg(dops[i].rs1,tl);
57871462 2394 emit_andimm(tl,imm[i],tl);
2395 }else{
2396 if(!((i_regs->wasconst>>sl)&1))
2397 emit_andimm(sl,imm[i],tl);
2398 else
2399 emit_movimm(constmap[i][sl]&imm[i],tl);
2400 }
2401 }
2402 else
2403 emit_zeroreg(tl);
57871462 2404 }
2405 else
2406 {
cf95b4f0 2407 if(dops[i].rs1) {
57871462 2408 if(sl<0) {
cf95b4f0 2409 if(i_regs->regmap_entry[tl]!=dops[i].rs1) emit_loadreg(dops[i].rs1,tl);
57871462 2410 }
cf95b4f0 2411 if(dops[i].opcode==0x0d) { // ORI
581335b0 2412 if(sl<0) {
2413 emit_orimm(tl,imm[i],tl);
2414 }else{
2415 if(!((i_regs->wasconst>>sl)&1))
2416 emit_orimm(sl,imm[i],tl);
2417 else
2418 emit_movimm(constmap[i][sl]|imm[i],tl);
2419 }
57871462 2420 }
cf95b4f0 2421 if(dops[i].opcode==0x0e) { // XORI
581335b0 2422 if(sl<0) {
2423 emit_xorimm(tl,imm[i],tl);
2424 }else{
2425 if(!((i_regs->wasconst>>sl)&1))
2426 emit_xorimm(sl,imm[i],tl);
2427 else
2428 emit_movimm(constmap[i][sl]^imm[i],tl);
2429 }
57871462 2430 }
2431 }
2432 else {
2433 emit_movimm(imm[i],tl);
57871462 2434 }
2435 }
2436 }
2437 }
2438 }
2439}
2440
2441void shiftimm_assemble(int i,struct regstat *i_regs)
2442{
cf95b4f0 2443 if(dops[i].opcode2<=0x3) // SLL/SRL/SRA
57871462 2444 {
cf95b4f0 2445 if(dops[i].rt1) {
57871462 2446 signed char s,t;
cf95b4f0 2447 t=get_reg(i_regs->regmap,dops[i].rt1);
2448 s=get_reg(i_regs->regmap,dops[i].rs1);
57871462 2449 //assert(t>=0);
dc49e339 2450 if(t>=0&&!((i_regs->isconst>>t)&1)){
cf95b4f0 2451 if(dops[i].rs1==0)
57871462 2452 {
2453 emit_zeroreg(t);
2454 }
2455 else
2456 {
cf95b4f0 2457 if(s<0&&i_regs->regmap_entry[t]!=dops[i].rs1) emit_loadreg(dops[i].rs1,t);
57871462 2458 if(imm[i]) {
cf95b4f0 2459 if(dops[i].opcode2==0) // SLL
57871462 2460 {
2461 emit_shlimm(s<0?t:s,imm[i],t);
2462 }
cf95b4f0 2463 if(dops[i].opcode2==2) // SRL
57871462 2464 {
2465 emit_shrimm(s<0?t:s,imm[i],t);
2466 }
cf95b4f0 2467 if(dops[i].opcode2==3) // SRA
57871462 2468 {
2469 emit_sarimm(s<0?t:s,imm[i],t);
2470 }
2471 }else{
2472 // Shift by zero
2473 if(s>=0 && s!=t) emit_mov(s,t);
2474 }
2475 }
2476 }
cf95b4f0 2477 //emit_storereg(dops[i].rt1,t); //DEBUG
57871462 2478 }
2479 }
cf95b4f0 2480 if(dops[i].opcode2>=0x38&&dops[i].opcode2<=0x3b) // DSLL/DSRL/DSRA
57871462 2481 {
9c45ca93 2482 assert(0);
57871462 2483 }
cf95b4f0 2484 if(dops[i].opcode2==0x3c) // DSLL32
57871462 2485 {
9c45ca93 2486 assert(0);
57871462 2487 }
cf95b4f0 2488 if(dops[i].opcode2==0x3e) // DSRL32
57871462 2489 {
9c45ca93 2490 assert(0);
57871462 2491 }
cf95b4f0 2492 if(dops[i].opcode2==0x3f) // DSRA32
57871462 2493 {
9c45ca93 2494 assert(0);
57871462 2495 }
2496}
2497
2498#ifndef shift_assemble
3968e69e 2499static void shift_assemble(int i,struct regstat *i_regs)
57871462 2500{
3968e69e 2501 signed char s,t,shift;
cf95b4f0 2502 if (dops[i].rt1 == 0)
3968e69e 2503 return;
cf95b4f0 2504 assert(dops[i].opcode2<=0x07); // SLLV/SRLV/SRAV
2505 t = get_reg(i_regs->regmap, dops[i].rt1);
2506 s = get_reg(i_regs->regmap, dops[i].rs1);
2507 shift = get_reg(i_regs->regmap, dops[i].rs2);
3968e69e 2508 if (t < 0)
2509 return;
2510
cf95b4f0 2511 if(dops[i].rs1==0)
3968e69e 2512 emit_zeroreg(t);
cf95b4f0 2513 else if(dops[i].rs2==0) {
3968e69e 2514 assert(s>=0);
2515 if(s!=t) emit_mov(s,t);
2516 }
2517 else {
2518 host_tempreg_acquire();
2519 emit_andimm(shift,31,HOST_TEMPREG);
cf95b4f0 2520 switch(dops[i].opcode2) {
3968e69e 2521 case 4: // SLLV
2522 emit_shl(s,HOST_TEMPREG,t);
2523 break;
2524 case 6: // SRLV
2525 emit_shr(s,HOST_TEMPREG,t);
2526 break;
2527 case 7: // SRAV
2528 emit_sar(s,HOST_TEMPREG,t);
2529 break;
2530 default:
2531 assert(0);
2532 }
2533 host_tempreg_release();
2534 }
57871462 2535}
3968e69e 2536
57871462 2537#endif
2538
8062d65a 2539enum {
2540 MTYPE_8000 = 0,
2541 MTYPE_8020,
2542 MTYPE_0000,
2543 MTYPE_A000,
2544 MTYPE_1F80,
2545};
2546
2547static int get_ptr_mem_type(u_int a)
2548{
2549 if(a < 0x00200000) {
2550 if(a<0x1000&&((start>>20)==0xbfc||(start>>24)==0xa0))
2551 // return wrong, must use memhandler for BIOS self-test to pass
2552 // 007 does similar stuff from a00 mirror, weird stuff
2553 return MTYPE_8000;
2554 return MTYPE_0000;
2555 }
2556 if(0x1f800000 <= a && a < 0x1f801000)
2557 return MTYPE_1F80;
2558 if(0x80200000 <= a && a < 0x80800000)
2559 return MTYPE_8020;
2560 if(0xa0000000 <= a && a < 0xa0200000)
2561 return MTYPE_A000;
2562 return MTYPE_8000;
2563}
2564
2565static void *emit_fastpath_cmp_jump(int i,int addr,int *addr_reg_override)
2566{
2567 void *jaddr = NULL;
2568 int type=0;
cf95b4f0 2569 int mr=dops[i].rs1;
8062d65a 2570 if(((smrv_strong|smrv_weak)>>mr)&1) {
2571 type=get_ptr_mem_type(smrv[mr]);
2572 //printf("set %08x @%08x r%d %d\n", smrv[mr], start+i*4, mr, type);
2573 }
2574 else {
2575 // use the mirror we are running on
2576 type=get_ptr_mem_type(start);
2577 //printf("set nospec @%08x r%d %d\n", start+i*4, mr, type);
2578 }
2579
2580 if(type==MTYPE_8020) { // RAM 80200000+ mirror
d1e4ebd9 2581 host_tempreg_acquire();
8062d65a 2582 emit_andimm(addr,~0x00e00000,HOST_TEMPREG);
2583 addr=*addr_reg_override=HOST_TEMPREG;
2584 type=0;
2585 }
2586 else if(type==MTYPE_0000) { // RAM 0 mirror
d1e4ebd9 2587 host_tempreg_acquire();
8062d65a 2588 emit_orimm(addr,0x80000000,HOST_TEMPREG);
2589 addr=*addr_reg_override=HOST_TEMPREG;
2590 type=0;
2591 }
2592 else if(type==MTYPE_A000) { // RAM A mirror
d1e4ebd9 2593 host_tempreg_acquire();
8062d65a 2594 emit_andimm(addr,~0x20000000,HOST_TEMPREG);
2595 addr=*addr_reg_override=HOST_TEMPREG;
2596 type=0;
2597 }
2598 else if(type==MTYPE_1F80) { // scratchpad
2599 if (psxH == (void *)0x1f800000) {
d1e4ebd9 2600 host_tempreg_acquire();
3968e69e 2601 emit_xorimm(addr,0x1f800000,HOST_TEMPREG);
8062d65a 2602 emit_cmpimm(HOST_TEMPREG,0x1000);
d1e4ebd9 2603 host_tempreg_release();
8062d65a 2604 jaddr=out;
2605 emit_jc(0);
2606 }
2607 else {
2608 // do the usual RAM check, jump will go to the right handler
2609 type=0;
2610 }
2611 }
2612
2613 if(type==0)
2614 {
2615 emit_cmpimm(addr,RAM_SIZE);
2616 jaddr=out;
2617 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
2618 // Hint to branch predictor that the branch is unlikely to be taken
cf95b4f0 2619 if(dops[i].rs1>=28)
8062d65a 2620 emit_jno_unlikely(0);
2621 else
2622 #endif
2623 emit_jno(0);
2624 if(ram_offset!=0) {
d1e4ebd9 2625 host_tempreg_acquire();
8062d65a 2626 emit_addimm(addr,ram_offset,HOST_TEMPREG);
2627 addr=*addr_reg_override=HOST_TEMPREG;
2628 }
2629 }
2630
2631 return jaddr;
2632}
2633
687b4580 2634// return memhandler, or get directly accessable address and return 0
2635static void *get_direct_memhandler(void *table, u_int addr,
2636 enum stub_type type, uintptr_t *addr_host)
2637{
2638 uintptr_t l1, l2 = 0;
2639 l1 = ((uintptr_t *)table)[addr>>12];
2640 if ((l1 & (1ul << (sizeof(l1)*8-1))) == 0) {
2641 uintptr_t v = l1 << 1;
2642 *addr_host = v + addr;
2643 return NULL;
2644 }
2645 else {
2646 l1 <<= 1;
2647 if (type == LOADB_STUB || type == LOADBU_STUB || type == STOREB_STUB)
2648 l2 = ((uintptr_t *)l1)[0x1000/4 + 0x1000/2 + (addr&0xfff)];
2649 else if (type == LOADH_STUB || type == LOADHU_STUB || type == STOREH_STUB)
2650 l2=((uintptr_t *)l1)[0x1000/4 + (addr&0xfff)/2];
2651 else
2652 l2=((uintptr_t *)l1)[(addr&0xfff)/4];
2653 if ((l2 & (1<<31)) == 0) {
2654 uintptr_t v = l2 << 1;
2655 *addr_host = v + (addr&0xfff);
2656 return NULL;
2657 }
2658 return (void *)(l2 << 1);
2659 }
2660}
2661
81dbbf4c 2662static u_int get_host_reglist(const signed char *regmap)
2663{
2664 u_int reglist = 0, hr;
2665 for (hr = 0; hr < HOST_REGS; hr++) {
2666 if (hr != EXCLUDE_REG && regmap[hr] >= 0)
2667 reglist |= 1 << hr;
2668 }
2669 return reglist;
2670}
2671
2672static u_int reglist_exclude(u_int reglist, int r1, int r2)
2673{
2674 if (r1 >= 0)
2675 reglist &= ~(1u << r1);
2676 if (r2 >= 0)
2677 reglist &= ~(1u << r2);
2678 return reglist;
2679}
2680
e3c6bdb5 2681// find a temp caller-saved register not in reglist (so assumed to be free)
2682static int reglist_find_free(u_int reglist)
2683{
2684 u_int free_regs = ~reglist & CALLER_SAVE_REGS;
2685 if (free_regs == 0)
2686 return -1;
2687 return __builtin_ctz(free_regs);
2688}
2689
81dbbf4c 2690static void load_assemble(int i, const struct regstat *i_regs)
57871462 2691{
7c3a5182 2692 int s,tl,addr;
57871462 2693 int offset;
b14b6a8f 2694 void *jaddr=0;
5bf843dc 2695 int memtarget=0,c=0;
d1e4ebd9 2696 int fastio_reg_override=-1;
81dbbf4c 2697 u_int reglist=get_host_reglist(i_regs->regmap);
cf95b4f0 2698 tl=get_reg(i_regs->regmap,dops[i].rt1);
2699 s=get_reg(i_regs->regmap,dops[i].rs1);
57871462 2700 offset=imm[i];
57871462 2701 if(i_regs->regmap[HOST_CCREG]==CCREG) reglist&=~(1<<HOST_CCREG);
2702 if(s>=0) {
2703 c=(i_regs->wasconst>>s)&1;
af4ee1fe 2704 if (c) {
2705 memtarget=((signed int)(constmap[i][s]+offset))<(signed int)0x80000000+RAM_SIZE;
af4ee1fe 2706 }
57871462 2707 }
57871462 2708 //printf("load_assemble: c=%d\n",c);
643aeae3 2709 //if(c) printf("load_assemble: const=%lx\n",(long)constmap[i][s]+offset);
57871462 2710 // FIXME: Even if the load is a NOP, we should check for pagefaults...
581335b0 2711 if((tl<0&&(!c||(((u_int)constmap[i][s]+offset)>>16)==0x1f80))
cf95b4f0 2712 ||dops[i].rt1==0) {
5bf843dc 2713 // could be FIFO, must perform the read
f18c0f46 2714 // ||dummy read
5bf843dc 2715 assem_debug("(forced read)\n");
2716 tl=get_reg(i_regs->regmap,-1);
2717 assert(tl>=0);
5bf843dc 2718 }
2719 if(offset||s<0||c) addr=tl;
2720 else addr=s;
535d208a 2721 //if(tl<0) tl=get_reg(i_regs->regmap,-1);
2722 if(tl>=0) {
2723 //printf("load_assemble: c=%d\n",c);
643aeae3 2724 //if(c) printf("load_assemble: const=%lx\n",(long)constmap[i][s]+offset);
535d208a 2725 assert(tl>=0); // Even if the load is a NOP, we must check for pagefaults and I/O
2726 reglist&=~(1<<tl);
1edfcc68 2727 if(!c) {
1edfcc68 2728 #ifdef R29_HACK
2729 // Strmnnrmn's speed hack
cf95b4f0 2730 if(dops[i].rs1!=29||start<0x80001000||start>=0x80000000+RAM_SIZE)
1edfcc68 2731 #endif
2732 {
d1e4ebd9 2733 jaddr=emit_fastpath_cmp_jump(i,addr,&fastio_reg_override);
535d208a 2734 }
1edfcc68 2735 }
2736 else if(ram_offset&&memtarget) {
d1e4ebd9 2737 host_tempreg_acquire();
1edfcc68 2738 emit_addimm(addr,ram_offset,HOST_TEMPREG);
d1e4ebd9 2739 fastio_reg_override=HOST_TEMPREG;
535d208a 2740 }
cf95b4f0 2741 int dummy=(dops[i].rt1==0)||(tl!=get_reg(i_regs->regmap,dops[i].rt1)); // ignore loads to r0 and unneeded reg
2742 if (dops[i].opcode==0x20) { // LB
535d208a 2743 if(!c||memtarget) {
2744 if(!dummy) {
57871462 2745 {
535d208a 2746 int x=0,a=tl;
535d208a 2747 if(!c) a=addr;
d1e4ebd9 2748 if(fastio_reg_override>=0) a=fastio_reg_override;
b1570849 2749
9c45ca93 2750 emit_movsbl_indexed(x,a,tl);
57871462 2751 }
57871462 2752 }
535d208a 2753 if(jaddr)
b14b6a8f 2754 add_stub_r(LOADB_STUB,jaddr,out,i,addr,i_regs,ccadj[i],reglist);
57871462 2755 }
535d208a 2756 else
cf95b4f0 2757 inline_readstub(LOADB_STUB,i,constmap[i][s]+offset,i_regs->regmap,dops[i].rt1,ccadj[i],reglist);
535d208a 2758 }
cf95b4f0 2759 if (dops[i].opcode==0x21) { // LH
535d208a 2760 if(!c||memtarget) {
2761 if(!dummy) {
9c45ca93 2762 int x=0,a=tl;
2763 if(!c) a=addr;
d1e4ebd9 2764 if(fastio_reg_override>=0) a=fastio_reg_override;
9c45ca93 2765 emit_movswl_indexed(x,a,tl);
57871462 2766 }
535d208a 2767 if(jaddr)
b14b6a8f 2768 add_stub_r(LOADH_STUB,jaddr,out,i,addr,i_regs,ccadj[i],reglist);
57871462 2769 }
535d208a 2770 else
cf95b4f0 2771 inline_readstub(LOADH_STUB,i,constmap[i][s]+offset,i_regs->regmap,dops[i].rt1,ccadj[i],reglist);
535d208a 2772 }
cf95b4f0 2773 if (dops[i].opcode==0x23) { // LW
535d208a 2774 if(!c||memtarget) {
2775 if(!dummy) {
dadf55f2 2776 int a=addr;
d1e4ebd9 2777 if(fastio_reg_override>=0) a=fastio_reg_override;
9c45ca93 2778 emit_readword_indexed(0,a,tl);
57871462 2779 }
535d208a 2780 if(jaddr)
b14b6a8f 2781 add_stub_r(LOADW_STUB,jaddr,out,i,addr,i_regs,ccadj[i],reglist);
57871462 2782 }
535d208a 2783 else
cf95b4f0 2784 inline_readstub(LOADW_STUB,i,constmap[i][s]+offset,i_regs->regmap,dops[i].rt1,ccadj[i],reglist);
535d208a 2785 }
cf95b4f0 2786 if (dops[i].opcode==0x24) { // LBU
535d208a 2787 if(!c||memtarget) {
2788 if(!dummy) {
9c45ca93 2789 int x=0,a=tl;
2790 if(!c) a=addr;
d1e4ebd9 2791 if(fastio_reg_override>=0) a=fastio_reg_override;
b1570849 2792
9c45ca93 2793 emit_movzbl_indexed(x,a,tl);
57871462 2794 }
535d208a 2795 if(jaddr)
b14b6a8f 2796 add_stub_r(LOADBU_STUB,jaddr,out,i,addr,i_regs,ccadj[i],reglist);
57871462 2797 }
535d208a 2798 else
cf95b4f0 2799 inline_readstub(LOADBU_STUB,i,constmap[i][s]+offset,i_regs->regmap,dops[i].rt1,ccadj[i],reglist);
535d208a 2800 }
cf95b4f0 2801 if (dops[i].opcode==0x25) { // LHU
535d208a 2802 if(!c||memtarget) {
2803 if(!dummy) {
9c45ca93 2804 int x=0,a=tl;
2805 if(!c) a=addr;
d1e4ebd9 2806 if(fastio_reg_override>=0) a=fastio_reg_override;
9c45ca93 2807 emit_movzwl_indexed(x,a,tl);
57871462 2808 }
535d208a 2809 if(jaddr)
b14b6a8f 2810 add_stub_r(LOADHU_STUB,jaddr,out,i,addr,i_regs,ccadj[i],reglist);
57871462 2811 }
535d208a 2812 else
cf95b4f0 2813 inline_readstub(LOADHU_STUB,i,constmap[i][s]+offset,i_regs->regmap,dops[i].rt1,ccadj[i],reglist);
535d208a 2814 }
cf95b4f0 2815 if (dops[i].opcode==0x27) { // LWU
7c3a5182 2816 assert(0);
535d208a 2817 }
cf95b4f0 2818 if (dops[i].opcode==0x37) { // LD
9c45ca93 2819 assert(0);
57871462 2820 }
535d208a 2821 }
d1e4ebd9 2822 if (fastio_reg_override == HOST_TEMPREG)
2823 host_tempreg_release();
57871462 2824}
2825
2826#ifndef loadlr_assemble
81dbbf4c 2827static void loadlr_assemble(int i, const struct regstat *i_regs)
57871462 2828{
3968e69e 2829 int s,tl,temp,temp2,addr;
2830 int offset;
2831 void *jaddr=0;
2832 int memtarget=0,c=0;
2833 int fastio_reg_override=-1;
81dbbf4c 2834 u_int reglist=get_host_reglist(i_regs->regmap);
cf95b4f0 2835 tl=get_reg(i_regs->regmap,dops[i].rt1);
2836 s=get_reg(i_regs->regmap,dops[i].rs1);
3968e69e 2837 temp=get_reg(i_regs->regmap,-1);
2838 temp2=get_reg(i_regs->regmap,FTEMP);
2839 addr=get_reg(i_regs->regmap,AGEN1+(i&1));
2840 assert(addr<0);
2841 offset=imm[i];
3968e69e 2842 reglist|=1<<temp;
2843 if(offset||s<0||c) addr=temp2;
2844 else addr=s;
2845 if(s>=0) {
2846 c=(i_regs->wasconst>>s)&1;
2847 if(c) {
2848 memtarget=((signed int)(constmap[i][s]+offset))<(signed int)0x80000000+RAM_SIZE;
2849 }
2850 }
2851 if(!c) {
2852 emit_shlimm(addr,3,temp);
cf95b4f0 2853 if (dops[i].opcode==0x22||dops[i].opcode==0x26) {
3968e69e 2854 emit_andimm(addr,0xFFFFFFFC,temp2); // LWL/LWR
2855 }else{
2856 emit_andimm(addr,0xFFFFFFF8,temp2); // LDL/LDR
2857 }
2858 jaddr=emit_fastpath_cmp_jump(i,temp2,&fastio_reg_override);
2859 }
2860 else {
2861 if(ram_offset&&memtarget) {
2862 host_tempreg_acquire();
2863 emit_addimm(temp2,ram_offset,HOST_TEMPREG);
2864 fastio_reg_override=HOST_TEMPREG;
2865 }
cf95b4f0 2866 if (dops[i].opcode==0x22||dops[i].opcode==0x26) {
3968e69e 2867 emit_movimm(((constmap[i][s]+offset)<<3)&24,temp); // LWL/LWR
2868 }else{
2869 emit_movimm(((constmap[i][s]+offset)<<3)&56,temp); // LDL/LDR
2870 }
2871 }
cf95b4f0 2872 if (dops[i].opcode==0x22||dops[i].opcode==0x26) { // LWL/LWR
3968e69e 2873 if(!c||memtarget) {
2874 int a=temp2;
2875 if(fastio_reg_override>=0) a=fastio_reg_override;
2876 emit_readword_indexed(0,a,temp2);
2877 if(fastio_reg_override==HOST_TEMPREG) host_tempreg_release();
2878 if(jaddr) add_stub_r(LOADW_STUB,jaddr,out,i,temp2,i_regs,ccadj[i],reglist);
2879 }
2880 else
2881 inline_readstub(LOADW_STUB,i,(constmap[i][s]+offset)&0xFFFFFFFC,i_regs->regmap,FTEMP,ccadj[i],reglist);
cf95b4f0 2882 if(dops[i].rt1) {
3968e69e 2883 assert(tl>=0);
2884 emit_andimm(temp,24,temp);
cf95b4f0 2885 if (dops[i].opcode==0x22) // LWL
3968e69e 2886 emit_xorimm(temp,24,temp);
2887 host_tempreg_acquire();
2888 emit_movimm(-1,HOST_TEMPREG);
cf95b4f0 2889 if (dops[i].opcode==0x26) {
3968e69e 2890 emit_shr(temp2,temp,temp2);
2891 emit_bic_lsr(tl,HOST_TEMPREG,temp,tl);
2892 }else{
2893 emit_shl(temp2,temp,temp2);
2894 emit_bic_lsl(tl,HOST_TEMPREG,temp,tl);
2895 }
2896 host_tempreg_release();
2897 emit_or(temp2,tl,tl);
2898 }
cf95b4f0 2899 //emit_storereg(dops[i].rt1,tl); // DEBUG
3968e69e 2900 }
cf95b4f0 2901 if (dops[i].opcode==0x1A||dops[i].opcode==0x1B) { // LDL/LDR
3968e69e 2902 assert(0);
2903 }
57871462