drc: remove old debug code
[pcsx_rearmed.git] / libpcsxcore / new_dynarec / new_dynarec.c
CommitLineData
57871462 1/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2 * Mupen64plus - new_dynarec.c *
20d507ba 3 * Copyright (C) 2009-2011 Ari64 *
57871462 4 * *
5 * This program is free software; you can redistribute it and/or modify *
6 * it under the terms of the GNU General Public License as published by *
7 * the Free Software Foundation; either version 2 of the License, or *
8 * (at your option) any later version. *
9 * *
10 * This program is distributed in the hope that it will be useful, *
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
13 * GNU General Public License for more details. *
14 * *
15 * You should have received a copy of the GNU General Public License *
16 * along with this program; if not, write to the *
17 * Free Software Foundation, Inc., *
18 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
19 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
20
21#include <stdlib.h>
22#include <stdint.h> //include for uint64_t
23#include <assert.h>
d848b60a 24#include <errno.h>
4600ba03 25#include <sys/mman.h>
d148d265 26#ifdef __MACH__
27#include <libkern/OSCacheControl.h>
28#endif
1e212a25 29#ifdef _3DS
30#include <3ds_utils.h>
31#endif
32#ifdef VITA
33#include <psp2/kernel/sysmem.h>
34static int sceBlock;
35#endif
57871462 36
d148d265 37#include "new_dynarec_config.h"
dd79da89 38#include "../psxhle.h" //emulator interface
3d624f89 39#include "emu_if.h" //emulator interface
57871462 40
b14b6a8f 41#ifndef ARRAY_SIZE
42#define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0]))
43#endif
44
4600ba03 45//#define DISASM
46//#define assem_debug printf
47//#define inv_debug printf
48#define assem_debug(...)
49#define inv_debug(...)
57871462 50
51#ifdef __i386__
52#include "assem_x86.h"
53#endif
54#ifdef __x86_64__
55#include "assem_x64.h"
56#endif
57#ifdef __arm__
58#include "assem_arm.h"
59#endif
60
61#define MAXBLOCK 4096
62#define MAX_OUTPUT_BLOCK_SIZE 262144
2573466a 63
b14b6a8f 64// stubs
65enum stub_type {
66 CC_STUB = 1,
67 FP_STUB = 2,
68 LOADB_STUB = 3,
69 LOADH_STUB = 4,
70 LOADW_STUB = 5,
71 LOADD_STUB = 6,
72 LOADBU_STUB = 7,
73 LOADHU_STUB = 8,
74 STOREB_STUB = 9,
75 STOREH_STUB = 10,
76 STOREW_STUB = 11,
77 STORED_STUB = 12,
78 STORELR_STUB = 13,
79 INVCODE_STUB = 14,
80};
81
57871462 82struct regstat
83{
84 signed char regmap_entry[HOST_REGS];
85 signed char regmap[HOST_REGS];
86 uint64_t was32;
87 uint64_t is32;
88 uint64_t wasdirty;
89 uint64_t dirty;
90 uint64_t u;
91 uint64_t uu;
92 u_int wasconst;
93 u_int isconst;
8575a877 94 u_int loadedconst; // host regs that have constants loaded
95 u_int waswritten; // MIPS regs that were used as store base before
57871462 96};
97
de5a60c3 98// note: asm depends on this layout
57871462 99struct ll_entry
100{
101 u_int vaddr;
de5a60c3 102 u_int reg_sv_flags;
57871462 103 void *addr;
104 struct ll_entry *next;
105};
106
df4dc2b1 107struct ht_entry
108{
109 u_int vaddr[2];
110 void *tcaddr[2];
111};
112
b14b6a8f 113struct code_stub
114{
115 enum stub_type type;
116 void *addr;
117 void *retaddr;
118 u_int a;
119 uintptr_t b;
120 uintptr_t c;
121 u_int d;
122 u_int e;
123};
124
e2b5e7aa 125 // used by asm:
126 u_char *out;
df4dc2b1 127 struct ht_entry hash_table[65536] __attribute__((aligned(16)));
e2b5e7aa 128 struct ll_entry *jump_in[4096] __attribute__((aligned(16)));
129 struct ll_entry *jump_dirty[4096];
130
131 static struct ll_entry *jump_out[4096];
132 static u_int start;
133 static u_int *source;
134 static char insn[MAXBLOCK][10];
135 static u_char itype[MAXBLOCK];
136 static u_char opcode[MAXBLOCK];
137 static u_char opcode2[MAXBLOCK];
138 static u_char bt[MAXBLOCK];
139 static u_char rs1[MAXBLOCK];
140 static u_char rs2[MAXBLOCK];
141 static u_char rt1[MAXBLOCK];
142 static u_char rt2[MAXBLOCK];
143 static u_char us1[MAXBLOCK];
144 static u_char us2[MAXBLOCK];
145 static u_char dep1[MAXBLOCK];
146 static u_char dep2[MAXBLOCK];
147 static u_char lt1[MAXBLOCK];
bedfea38 148 static uint64_t gte_rs[MAXBLOCK]; // gte: 32 data and 32 ctl regs
149 static uint64_t gte_rt[MAXBLOCK];
150 static uint64_t gte_unneeded[MAXBLOCK];
ffb0b9e0 151 static u_int smrv[32]; // speculated MIPS register values
152 static u_int smrv_strong; // mask or regs that are likely to have correct values
153 static u_int smrv_weak; // same, but somewhat less likely
154 static u_int smrv_strong_next; // same, but after current insn executes
155 static u_int smrv_weak_next;
e2b5e7aa 156 static int imm[MAXBLOCK];
157 static u_int ba[MAXBLOCK];
158 static char likely[MAXBLOCK];
159 static char is_ds[MAXBLOCK];
160 static char ooo[MAXBLOCK];
161 static uint64_t unneeded_reg[MAXBLOCK];
162 static uint64_t unneeded_reg_upper[MAXBLOCK];
163 static uint64_t branch_unneeded_reg[MAXBLOCK];
164 static uint64_t branch_unneeded_reg_upper[MAXBLOCK];
165 static signed char regmap_pre[MAXBLOCK][HOST_REGS];
956f3129 166 static uint64_t current_constmap[HOST_REGS];
167 static uint64_t constmap[MAXBLOCK][HOST_REGS];
168 static struct regstat regs[MAXBLOCK];
169 static struct regstat branch_regs[MAXBLOCK];
e2b5e7aa 170 static signed char minimum_free_regs[MAXBLOCK];
171 static u_int needed_reg[MAXBLOCK];
172 static u_int wont_dirty[MAXBLOCK];
173 static u_int will_dirty[MAXBLOCK];
174 static int ccadj[MAXBLOCK];
175 static int slen;
df4dc2b1 176 static void *instr_addr[MAXBLOCK];
e2b5e7aa 177 static u_int link_addr[MAXBLOCK][3];
178 static int linkcount;
b14b6a8f 179 static struct code_stub stubs[MAXBLOCK*3];
e2b5e7aa 180 static int stubcount;
181 static u_int literals[1024][2];
182 static int literalcount;
183 static int is_delayslot;
184 static int cop1_usable;
185 static char shadow[1048576] __attribute__((aligned(16)));
186 static void *copy;
187 static int expirep;
188 static u_int stop_after_jal;
a327ad27 189#ifndef RAM_FIXED
190 static u_int ram_offset;
191#else
192 static const u_int ram_offset=0;
193#endif
e2b5e7aa 194
195 int new_dynarec_hacks;
196 int new_dynarec_did_compile;
57871462 197 extern u_char restore_candidate[512];
198 extern int cycle_count;
199
200 /* registers that may be allocated */
201 /* 1-31 gpr */
202#define HIREG 32 // hi
203#define LOREG 33 // lo
204#define FSREG 34 // FPU status (FCSR)
205#define CSREG 35 // Coprocessor status
206#define CCREG 36 // Cycle count
207#define INVCP 37 // Pointer to invalid_code
1edfcc68 208//#define MMREG 38 // Pointer to memory_map
619e5ded 209#define ROREG 39 // ram offset (if rdram!=0x80000000)
210#define TEMPREG 40
211#define FTEMP 40 // FPU temporary register
212#define PTEMP 41 // Prefetch temporary register
1edfcc68 213//#define TLREG 42 // TLB mapping offset
619e5ded 214#define RHASH 43 // Return address hash
215#define RHTBL 44 // Return address hash table address
216#define RTEMP 45 // JR/JALR address register
217#define MAXREG 45
218#define AGEN1 46 // Address generation temporary register
1edfcc68 219//#define AGEN2 47 // Address generation temporary register
220//#define MGEN1 48 // Maptable address generation temporary register
221//#define MGEN2 49 // Maptable address generation temporary register
619e5ded 222#define BTREG 50 // Branch target temporary register
57871462 223
224 /* instruction types */
225#define NOP 0 // No operation
226#define LOAD 1 // Load
227#define STORE 2 // Store
228#define LOADLR 3 // Unaligned load
229#define STORELR 4 // Unaligned store
9f51b4b9 230#define MOV 5 // Move
57871462 231#define ALU 6 // Arithmetic/logic
232#define MULTDIV 7 // Multiply/divide
233#define SHIFT 8 // Shift by register
234#define SHIFTIMM 9// Shift by immediate
235#define IMM16 10 // 16-bit immediate
236#define RJUMP 11 // Unconditional jump to register
237#define UJUMP 12 // Unconditional jump
238#define CJUMP 13 // Conditional branch (BEQ/BNE/BGTZ/BLEZ)
239#define SJUMP 14 // Conditional branch (regimm format)
240#define COP0 15 // Coprocessor 0
241#define COP1 16 // Coprocessor 1
242#define C1LS 17 // Coprocessor 1 load/store
243#define FJUMP 18 // Conditional branch (floating point)
244#define FLOAT 19 // Floating point unit
245#define FCONV 20 // Convert integer to float
246#define FCOMP 21 // Floating point compare (sets FSREG)
247#define SYSCALL 22// SYSCALL
248#define OTHER 23 // Other
249#define SPAN 24 // Branch/delay slot spans 2 pages
250#define NI 25 // Not implemented
7139f3c8 251#define HLECALL 26// PCSX fake opcodes for HLE
b9b61529 252#define COP2 27 // Coprocessor 2 move
253#define C2LS 28 // Coprocessor 2 load/store
254#define C2OP 29 // Coprocessor 2 operation
1e973cb0 255#define INTCALL 30// Call interpreter to handle rare corner cases
57871462 256
57871462 257 /* branch codes */
258#define TAKEN 1
259#define NOTTAKEN 2
260#define NULLDS 3
261
262// asm linkage
263int new_recompile_block(int addr);
264void *get_addr_ht(u_int vaddr);
265void invalidate_block(u_int block);
266void invalidate_addr(u_int addr);
267void remove_hash(int vaddr);
57871462 268void dyna_linker();
269void dyna_linker_ds();
270void verify_code();
271void verify_code_vm();
272void verify_code_ds();
273void cc_interrupt();
274void fp_exception();
275void fp_exception_ds();
7139f3c8 276void jump_syscall_hle();
7139f3c8 277void jump_hlecall();
1e973cb0 278void jump_intcall();
7139f3c8 279void new_dyna_leave();
57871462 280
57871462 281// Needed by assembler
e2b5e7aa 282static void wb_register(signed char r,signed char regmap[],uint64_t dirty,uint64_t is32);
283static void wb_dirtys(signed char i_regmap[],uint64_t i_is32,uint64_t i_dirty);
284static void wb_needed_dirtys(signed char i_regmap[],uint64_t i_is32,uint64_t i_dirty,int addr);
285static void load_all_regs(signed char i_regmap[]);
286static void load_needed_regs(signed char i_regmap[],signed char next_regmap[]);
287static void load_regs_entry(int t);
288static void load_all_consts(signed char regmap[],int is32,u_int dirty,int i);
289
290static int verify_dirty(u_int *ptr);
291static int get_final_value(int hr, int i, int *value);
b14b6a8f 292static void add_stub(enum stub_type type, void *addr, void *retaddr,
293 u_int a, uintptr_t b, uintptr_t c, u_int d, u_int e);
294static void add_stub_r(enum stub_type type, void *addr, void *retaddr,
295 int i, int addr_reg, struct regstat *i_regs, int ccadj, u_int reglist);
e2b5e7aa 296static void add_to_linker(int addr,int target,int ext);
57871462 297
e2b5e7aa 298static int tracedebug=0;
57871462 299
d148d265 300static void mprotect_w_x(void *start, void *end, int is_x)
301{
302#ifdef NO_WRITE_EXEC
1e212a25 303 #if defined(VITA)
304 // *Open* enables write on all memory that was
305 // allocated by sceKernelAllocMemBlockForVM()?
306 if (is_x)
307 sceKernelCloseVMDomain();
308 else
309 sceKernelOpenVMDomain();
310 #else
d148d265 311 u_long mstart = (u_long)start & ~4095ul;
312 u_long mend = (u_long)end;
313 if (mprotect((void *)mstart, mend - mstart,
314 PROT_READ | (is_x ? PROT_EXEC : PROT_WRITE)) != 0)
315 SysPrintf("mprotect(%c) failed: %s\n", is_x ? 'x' : 'w', strerror(errno));
1e212a25 316 #endif
d148d265 317#endif
318}
319
320static void start_tcache_write(void *start, void *end)
321{
322 mprotect_w_x(start, end, 0);
323}
324
325static void end_tcache_write(void *start, void *end)
326{
327#ifdef __arm__
328 size_t len = (char *)end - (char *)start;
329 #if defined(__BLACKBERRY_QNX__)
330 msync(start, len, MS_SYNC | MS_CACHE_ONLY | MS_INVALIDATE_ICACHE);
331 #elif defined(__MACH__)
332 sys_cache_control(kCacheFunctionPrepareForExecution, start, len);
333 #elif defined(VITA)
1e212a25 334 sceKernelSyncVMDomain(sceBlock, start, len);
335 #elif defined(_3DS)
336 ctr_flush_invalidate_cache();
d148d265 337 #else
338 __clear_cache(start, end);
339 #endif
340 (void)len;
341#endif
342
343 mprotect_w_x(start, end, 1);
344}
345
346static void *start_block(void)
347{
348 u_char *end = out + MAX_OUTPUT_BLOCK_SIZE;
349 if (end > (u_char *)BASE_ADDR + (1<<TARGET_SIZE_2))
350 end = (u_char *)BASE_ADDR + (1<<TARGET_SIZE_2);
351 start_tcache_write(out, end);
352 return out;
353}
354
355static void end_block(void *start)
356{
357 end_tcache_write(start, out);
358}
359
57871462 360//#define DEBUG_CYCLE_COUNT 1
361
b6e87b2b 362#define NO_CYCLE_PENALTY_THR 12
363
4e9dcd7f 364int cycle_multiplier; // 100 for 1.0
365
366static int CLOCK_ADJUST(int x)
367{
368 int s=(x>>31)|1;
369 return (x * cycle_multiplier + s * 50) / 100;
370}
371
94d23bb9 372static u_int get_page(u_int vaddr)
57871462 373{
0ce47d46 374 u_int page=vaddr&~0xe0000000;
375 if (page < 0x1000000)
376 page &= ~0x0e00000; // RAM mirrors
377 page>>=12;
57871462 378 if(page>2048) page=2048+(page&2047);
94d23bb9 379 return page;
380}
381
d25604ca 382// no virtual mem in PCSX
383static u_int get_vpage(u_int vaddr)
384{
385 return get_page(vaddr);
386}
94d23bb9 387
df4dc2b1 388static struct ht_entry *hash_table_get(u_int vaddr)
389{
390 return &hash_table[((vaddr>>16)^vaddr)&0xFFFF];
391}
392
393static void hash_table_add(struct ht_entry *ht_bin, u_int vaddr, void *tcaddr)
394{
395 ht_bin->vaddr[1] = ht_bin->vaddr[0];
396 ht_bin->tcaddr[1] = ht_bin->tcaddr[0];
397 ht_bin->vaddr[0] = vaddr;
398 ht_bin->tcaddr[0] = tcaddr;
399}
400
401// some messy ari64's code, seems to rely on unsigned 32bit overflow
402static int doesnt_expire_soon(void *tcaddr)
403{
404 u_int diff = (u_int)((u_char *)tcaddr - out) << (32-TARGET_SIZE_2);
405 return diff > (u_int)(0x60000000 + (MAX_OUTPUT_BLOCK_SIZE << (32-TARGET_SIZE_2)));
406}
407
94d23bb9 408// Get address from virtual address
409// This is called from the recompiled JR/JALR instructions
410void *get_addr(u_int vaddr)
411{
412 u_int page=get_page(vaddr);
413 u_int vpage=get_vpage(vaddr);
57871462 414 struct ll_entry *head;
415 //printf("TRACE: count=%d next=%d (get_addr %x,page %d)\n",Count,next_interupt,vaddr,page);
416 head=jump_in[page];
417 while(head!=NULL) {
de5a60c3 418 if(head->vaddr==vaddr) {
57871462 419 //printf("TRACE: count=%d next=%d (get_addr match %x: %x)\n",Count,next_interupt,vaddr,(int)head->addr);
df4dc2b1 420 hash_table_add(hash_table_get(vaddr), vaddr, head->addr);
57871462 421 return head->addr;
422 }
423 head=head->next;
424 }
425 head=jump_dirty[vpage];
426 while(head!=NULL) {
de5a60c3 427 if(head->vaddr==vaddr) {
57871462 428 //printf("TRACE: count=%d next=%d (get_addr match dirty %x: %x)\n",Count,next_interupt,vaddr,(int)head->addr);
429 // Don't restore blocks which are about to expire from the cache
df4dc2b1 430 if (doesnt_expire_soon(head->addr))
431 if (verify_dirty(head->addr)) {
57871462 432 //printf("restore candidate: %x (%d) d=%d\n",vaddr,page,invalid_code[vaddr>>12]);
433 invalid_code[vaddr>>12]=0;
9be4ba64 434 inv_code_start=inv_code_end=~0;
57871462 435 if(vpage<2048) {
57871462 436 restore_candidate[vpage>>3]|=1<<(vpage&7);
437 }
438 else restore_candidate[page>>3]|=1<<(page&7);
df4dc2b1 439 struct ht_entry *ht_bin = hash_table_get(vaddr);
440 if (ht_bin->vaddr[0] == vaddr)
441 ht_bin->tcaddr[0] = head->addr; // Replace existing entry
57871462 442 else
df4dc2b1 443 hash_table_add(ht_bin, vaddr, head->addr);
444
57871462 445 return head->addr;
446 }
447 }
448 head=head->next;
449 }
450 //printf("TRACE: count=%d next=%d (get_addr no-match %x)\n",Count,next_interupt,vaddr);
451 int r=new_recompile_block(vaddr);
452 if(r==0) return get_addr(vaddr);
453 // Execute in unmapped page, generate pagefault execption
454 Status|=2;
455 Cause=(vaddr<<31)|0x8;
456 EPC=(vaddr&1)?vaddr-5:vaddr;
457 BadVAddr=(vaddr&~1);
458 Context=(Context&0xFF80000F)|((BadVAddr>>9)&0x007FFFF0);
459 EntryHi=BadVAddr&0xFFFFE000;
460 return get_addr_ht(0x80000000);
461}
462// Look up address in hash table first
463void *get_addr_ht(u_int vaddr)
464{
465 //printf("TRACE: count=%d next=%d (get_addr_ht %x)\n",Count,next_interupt,vaddr);
df4dc2b1 466 const struct ht_entry *ht_bin = hash_table_get(vaddr);
467 if (ht_bin->vaddr[0] == vaddr) return ht_bin->tcaddr[0];
468 if (ht_bin->vaddr[1] == vaddr) return ht_bin->tcaddr[1];
57871462 469 return get_addr(vaddr);
470}
471
57871462 472void clear_all_regs(signed char regmap[])
473{
474 int hr;
475 for (hr=0;hr<HOST_REGS;hr++) regmap[hr]=-1;
476}
477
478signed char get_reg(signed char regmap[],int r)
479{
480 int hr;
481 for (hr=0;hr<HOST_REGS;hr++) if(hr!=EXCLUDE_REG&&regmap[hr]==r) return hr;
482 return -1;
483}
484
485// Find a register that is available for two consecutive cycles
486signed char get_reg2(signed char regmap1[],signed char regmap2[],int r)
487{
488 int hr;
489 for (hr=0;hr<HOST_REGS;hr++) if(hr!=EXCLUDE_REG&&regmap1[hr]==r&&regmap2[hr]==r) return hr;
490 return -1;
491}
492
493int count_free_regs(signed char regmap[])
494{
495 int count=0;
496 int hr;
497 for(hr=0;hr<HOST_REGS;hr++)
498 {
499 if(hr!=EXCLUDE_REG) {
500 if(regmap[hr]<0) count++;
501 }
502 }
503 return count;
504}
505
506void dirty_reg(struct regstat *cur,signed char reg)
507{
508 int hr;
509 if(!reg) return;
510 for (hr=0;hr<HOST_REGS;hr++) {
511 if((cur->regmap[hr]&63)==reg) {
512 cur->dirty|=1<<hr;
513 }
514 }
515}
516
517// If we dirty the lower half of a 64 bit register which is now being
518// sign-extended, we need to dump the upper half.
519// Note: Do this only after completion of the instruction, because
520// some instructions may need to read the full 64-bit value even if
521// overwriting it (eg SLTI, DSRA32).
522static void flush_dirty_uppers(struct regstat *cur)
523{
524 int hr,reg;
525 for (hr=0;hr<HOST_REGS;hr++) {
526 if((cur->dirty>>hr)&1) {
527 reg=cur->regmap[hr];
9f51b4b9 528 if(reg>=64)
57871462 529 if((cur->is32>>(reg&63))&1) cur->regmap[hr]=-1;
530 }
531 }
532}
533
534void set_const(struct regstat *cur,signed char reg,uint64_t value)
535{
536 int hr;
537 if(!reg) return;
538 for (hr=0;hr<HOST_REGS;hr++) {
539 if(cur->regmap[hr]==reg) {
540 cur->isconst|=1<<hr;
956f3129 541 current_constmap[hr]=value;
57871462 542 }
543 else if((cur->regmap[hr]^64)==reg) {
544 cur->isconst|=1<<hr;
956f3129 545 current_constmap[hr]=value>>32;
57871462 546 }
547 }
548}
549
550void clear_const(struct regstat *cur,signed char reg)
551{
552 int hr;
553 if(!reg) return;
554 for (hr=0;hr<HOST_REGS;hr++) {
555 if((cur->regmap[hr]&63)==reg) {
556 cur->isconst&=~(1<<hr);
557 }
558 }
559}
560
561int is_const(struct regstat *cur,signed char reg)
562{
563 int hr;
79c75f1b 564 if(reg<0) return 0;
57871462 565 if(!reg) return 1;
566 for (hr=0;hr<HOST_REGS;hr++) {
567 if((cur->regmap[hr]&63)==reg) {
568 return (cur->isconst>>hr)&1;
569 }
570 }
571 return 0;
572}
573uint64_t get_const(struct regstat *cur,signed char reg)
574{
575 int hr;
576 if(!reg) return 0;
577 for (hr=0;hr<HOST_REGS;hr++) {
578 if(cur->regmap[hr]==reg) {
956f3129 579 return current_constmap[hr];
57871462 580 }
581 }
c43b5311 582 SysPrintf("Unknown constant in r%d\n",reg);
57871462 583 exit(1);
584}
585
586// Least soon needed registers
587// Look at the next ten instructions and see which registers
588// will be used. Try not to reallocate these.
589void lsn(u_char hsn[], int i, int *preferred_reg)
590{
591 int j;
592 int b=-1;
593 for(j=0;j<9;j++)
594 {
595 if(i+j>=slen) {
596 j=slen-i-1;
597 break;
598 }
599 if(itype[i+j]==UJUMP||itype[i+j]==RJUMP||(source[i+j]>>16)==0x1000)
600 {
601 // Don't go past an unconditonal jump
602 j++;
603 break;
604 }
605 }
606 for(;j>=0;j--)
607 {
608 if(rs1[i+j]) hsn[rs1[i+j]]=j;
609 if(rs2[i+j]) hsn[rs2[i+j]]=j;
610 if(rt1[i+j]) hsn[rt1[i+j]]=j;
611 if(rt2[i+j]) hsn[rt2[i+j]]=j;
612 if(itype[i+j]==STORE || itype[i+j]==STORELR) {
613 // Stores can allocate zero
614 hsn[rs1[i+j]]=j;
615 hsn[rs2[i+j]]=j;
616 }
617 // On some architectures stores need invc_ptr
618 #if defined(HOST_IMM8)
b9b61529 619 if(itype[i+j]==STORE || itype[i+j]==STORELR || (opcode[i+j]&0x3b)==0x39 || (opcode[i+j]&0x3b)==0x3a) {
57871462 620 hsn[INVCP]=j;
621 }
622 #endif
623 if(i+j>=0&&(itype[i+j]==UJUMP||itype[i+j]==CJUMP||itype[i+j]==SJUMP||itype[i+j]==FJUMP))
624 {
625 hsn[CCREG]=j;
626 b=j;
627 }
628 }
629 if(b>=0)
630 {
631 if(ba[i+b]>=start && ba[i+b]<(start+slen*4))
632 {
633 // Follow first branch
634 int t=(ba[i+b]-start)>>2;
635 j=7-b;if(t+j>=slen) j=slen-t-1;
636 for(;j>=0;j--)
637 {
638 if(rs1[t+j]) if(hsn[rs1[t+j]]>j+b+2) hsn[rs1[t+j]]=j+b+2;
639 if(rs2[t+j]) if(hsn[rs2[t+j]]>j+b+2) hsn[rs2[t+j]]=j+b+2;
640 //if(rt1[t+j]) if(hsn[rt1[t+j]]>j+b+2) hsn[rt1[t+j]]=j+b+2;
641 //if(rt2[t+j]) if(hsn[rt2[t+j]]>j+b+2) hsn[rt2[t+j]]=j+b+2;
642 }
643 }
644 // TODO: preferred register based on backward branch
645 }
646 // Delay slot should preferably not overwrite branch conditions or cycle count
647 if(i>0&&(itype[i-1]==RJUMP||itype[i-1]==UJUMP||itype[i-1]==CJUMP||itype[i-1]==SJUMP||itype[i-1]==FJUMP)) {
648 if(rs1[i-1]) if(hsn[rs1[i-1]]>1) hsn[rs1[i-1]]=1;
649 if(rs2[i-1]) if(hsn[rs2[i-1]]>1) hsn[rs2[i-1]]=1;
650 hsn[CCREG]=1;
651 // ...or hash tables
652 hsn[RHASH]=1;
653 hsn[RHTBL]=1;
654 }
655 // Coprocessor load/store needs FTEMP, even if not declared
b9b61529 656 if(itype[i]==C1LS||itype[i]==C2LS) {
57871462 657 hsn[FTEMP]=0;
658 }
659 // Load L/R also uses FTEMP as a temporary register
660 if(itype[i]==LOADLR) {
661 hsn[FTEMP]=0;
662 }
b7918751 663 // Also SWL/SWR/SDL/SDR
664 if(opcode[i]==0x2a||opcode[i]==0x2e||opcode[i]==0x2c||opcode[i]==0x2d) {
57871462 665 hsn[FTEMP]=0;
666 }
57871462 667 // Don't remove the miniht registers
668 if(itype[i]==UJUMP||itype[i]==RJUMP)
669 {
670 hsn[RHASH]=0;
671 hsn[RHTBL]=0;
672 }
673}
674
675// We only want to allocate registers if we're going to use them again soon
676int needed_again(int r, int i)
677{
678 int j;
679 int b=-1;
680 int rn=10;
9f51b4b9 681
57871462 682 if(i>0&&(itype[i-1]==UJUMP||itype[i-1]==RJUMP||(source[i-1]>>16)==0x1000))
683 {
684 if(ba[i-1]<start || ba[i-1]>start+slen*4-4)
685 return 0; // Don't need any registers if exiting the block
686 }
687 for(j=0;j<9;j++)
688 {
689 if(i+j>=slen) {
690 j=slen-i-1;
691 break;
692 }
693 if(itype[i+j]==UJUMP||itype[i+j]==RJUMP||(source[i+j]>>16)==0x1000)
694 {
695 // Don't go past an unconditonal jump
696 j++;
697 break;
698 }
1e973cb0 699 if(itype[i+j]==SYSCALL||itype[i+j]==HLECALL||itype[i+j]==INTCALL||((source[i+j]&0xfc00003f)==0x0d))
57871462 700 {
701 break;
702 }
703 }
704 for(;j>=1;j--)
705 {
706 if(rs1[i+j]==r) rn=j;
707 if(rs2[i+j]==r) rn=j;
708 if((unneeded_reg[i+j]>>r)&1) rn=10;
709 if(i+j>=0&&(itype[i+j]==UJUMP||itype[i+j]==CJUMP||itype[i+j]==SJUMP||itype[i+j]==FJUMP))
710 {
711 b=j;
712 }
713 }
714 /*
715 if(b>=0)
716 {
717 if(ba[i+b]>=start && ba[i+b]<(start+slen*4))
718 {
719 // Follow first branch
720 int o=rn;
721 int t=(ba[i+b]-start)>>2;
722 j=7-b;if(t+j>=slen) j=slen-t-1;
723 for(;j>=0;j--)
724 {
725 if(!((unneeded_reg[t+j]>>r)&1)) {
726 if(rs1[t+j]==r) if(rn>j+b+2) rn=j+b+2;
727 if(rs2[t+j]==r) if(rn>j+b+2) rn=j+b+2;
728 }
729 else rn=o;
730 }
731 }
732 }*/
b7217e13 733 if(rn<10) return 1;
581335b0 734 (void)b;
57871462 735 return 0;
736}
737
738// Try to match register allocations at the end of a loop with those
739// at the beginning
740int loop_reg(int i, int r, int hr)
741{
742 int j,k;
743 for(j=0;j<9;j++)
744 {
745 if(i+j>=slen) {
746 j=slen-i-1;
747 break;
748 }
749 if(itype[i+j]==UJUMP||itype[i+j]==RJUMP||(source[i+j]>>16)==0x1000)
750 {
751 // Don't go past an unconditonal jump
752 j++;
753 break;
754 }
755 }
756 k=0;
757 if(i>0){
758 if(itype[i-1]==UJUMP||itype[i-1]==CJUMP||itype[i-1]==SJUMP||itype[i-1]==FJUMP)
759 k--;
760 }
761 for(;k<j;k++)
762 {
763 if(r<64&&((unneeded_reg[i+k]>>r)&1)) return hr;
764 if(r>64&&((unneeded_reg_upper[i+k]>>r)&1)) return hr;
765 if(i+k>=0&&(itype[i+k]==UJUMP||itype[i+k]==CJUMP||itype[i+k]==SJUMP||itype[i+k]==FJUMP))
766 {
767 if(ba[i+k]>=start && ba[i+k]<(start+i*4))
768 {
769 int t=(ba[i+k]-start)>>2;
770 int reg=get_reg(regs[t].regmap_entry,r);
771 if(reg>=0) return reg;
772 //reg=get_reg(regs[t+1].regmap_entry,r);
773 //if(reg>=0) return reg;
774 }
775 }
776 }
777 return hr;
778}
779
780
781// Allocate every register, preserving source/target regs
782void alloc_all(struct regstat *cur,int i)
783{
784 int hr;
9f51b4b9 785
57871462 786 for(hr=0;hr<HOST_REGS;hr++) {
787 if(hr!=EXCLUDE_REG) {
788 if(((cur->regmap[hr]&63)!=rs1[i])&&((cur->regmap[hr]&63)!=rs2[i])&&
789 ((cur->regmap[hr]&63)!=rt1[i])&&((cur->regmap[hr]&63)!=rt2[i]))
790 {
791 cur->regmap[hr]=-1;
792 cur->dirty&=~(1<<hr);
793 }
794 // Don't need zeros
795 if((cur->regmap[hr]&63)==0)
796 {
797 cur->regmap[hr]=-1;
798 cur->dirty&=~(1<<hr);
799 }
800 }
801 }
802}
803
57871462 804#ifdef __i386__
805#include "assem_x86.c"
806#endif
807#ifdef __x86_64__
808#include "assem_x64.c"
809#endif
810#ifdef __arm__
811#include "assem_arm.c"
812#endif
813
814// Add virtual address mapping to linked list
815void ll_add(struct ll_entry **head,int vaddr,void *addr)
816{
817 struct ll_entry *new_entry;
818 new_entry=malloc(sizeof(struct ll_entry));
819 assert(new_entry!=NULL);
820 new_entry->vaddr=vaddr;
de5a60c3 821 new_entry->reg_sv_flags=0;
57871462 822 new_entry->addr=addr;
823 new_entry->next=*head;
824 *head=new_entry;
825}
826
de5a60c3 827void ll_add_flags(struct ll_entry **head,int vaddr,u_int reg_sv_flags,void *addr)
57871462 828{
7139f3c8 829 ll_add(head,vaddr,addr);
de5a60c3 830 (*head)->reg_sv_flags=reg_sv_flags;
57871462 831}
832
833// Check if an address is already compiled
834// but don't return addresses which are about to expire from the cache
835void *check_addr(u_int vaddr)
836{
df4dc2b1 837 struct ht_entry *ht_bin = hash_table_get(vaddr);
838 size_t i;
b14b6a8f 839 for (i = 0; i < ARRAY_SIZE(ht_bin->vaddr); i++) {
df4dc2b1 840 if (ht_bin->vaddr[i] == vaddr)
841 if (doesnt_expire_soon((u_char *)ht_bin->tcaddr[i] - MAX_OUTPUT_BLOCK_SIZE))
842 if (isclean(ht_bin->tcaddr[i]))
843 return ht_bin->tcaddr[i];
57871462 844 }
94d23bb9 845 u_int page=get_page(vaddr);
57871462 846 struct ll_entry *head;
847 head=jump_in[page];
df4dc2b1 848 while (head != NULL) {
849 if (head->vaddr == vaddr) {
850 if (doesnt_expire_soon(head->addr)) {
57871462 851 // Update existing entry with current address
df4dc2b1 852 if (ht_bin->vaddr[0] == vaddr) {
853 ht_bin->tcaddr[0] = head->addr;
57871462 854 return head->addr;
855 }
df4dc2b1 856 if (ht_bin->vaddr[1] == vaddr) {
857 ht_bin->tcaddr[1] = head->addr;
57871462 858 return head->addr;
859 }
860 // Insert into hash table with low priority.
861 // Don't evict existing entries, as they are probably
862 // addresses that are being accessed frequently.
df4dc2b1 863 if (ht_bin->vaddr[0] == -1) {
864 ht_bin->vaddr[0] = vaddr;
865 ht_bin->tcaddr[0] = head->addr;
866 }
867 else if (ht_bin->vaddr[1] == -1) {
868 ht_bin->vaddr[1] = vaddr;
869 ht_bin->tcaddr[1] = head->addr;
57871462 870 }
871 return head->addr;
872 }
873 }
874 head=head->next;
875 }
876 return 0;
877}
878
879void remove_hash(int vaddr)
880{
881 //printf("remove hash: %x\n",vaddr);
df4dc2b1 882 struct ht_entry *ht_bin = hash_table_get(vaddr);
883 if (ht_bin->vaddr[1] == vaddr) {
884 ht_bin->vaddr[1] = -1;
885 ht_bin->tcaddr[1] = NULL;
57871462 886 }
df4dc2b1 887 if (ht_bin->vaddr[0] == vaddr) {
888 ht_bin->vaddr[0] = ht_bin->vaddr[1];
889 ht_bin->tcaddr[0] = ht_bin->tcaddr[1];
890 ht_bin->vaddr[1] = -1;
891 ht_bin->tcaddr[1] = NULL;
57871462 892 }
893}
894
895void ll_remove_matching_addrs(struct ll_entry **head,int addr,int shift)
896{
897 struct ll_entry *next;
898 while(*head) {
9f51b4b9 899 if(((u_int)((*head)->addr)>>shift)==(addr>>shift) ||
57871462 900 ((u_int)((*head)->addr-MAX_OUTPUT_BLOCK_SIZE)>>shift)==(addr>>shift))
901 {
902 inv_debug("EXP: Remove pointer to %x (%x)\n",(int)(*head)->addr,(*head)->vaddr);
903 remove_hash((*head)->vaddr);
904 next=(*head)->next;
905 free(*head);
906 *head=next;
907 }
908 else
909 {
910 head=&((*head)->next);
911 }
912 }
913}
914
915// Remove all entries from linked list
916void ll_clear(struct ll_entry **head)
917{
918 struct ll_entry *cur;
919 struct ll_entry *next;
581335b0 920 if((cur=*head)) {
57871462 921 *head=0;
922 while(cur) {
923 next=cur->next;
924 free(cur);
925 cur=next;
926 }
927 }
928}
929
930// Dereference the pointers and remove if it matches
d148d265 931static void ll_kill_pointers(struct ll_entry *head,int addr,int shift)
57871462 932{
933 while(head) {
934 int ptr=get_pointer(head->addr);
935 inv_debug("EXP: Lookup pointer to %x at %x (%x)\n",(int)ptr,(int)head->addr,head->vaddr);
936 if(((ptr>>shift)==(addr>>shift)) ||
937 (((ptr-MAX_OUTPUT_BLOCK_SIZE)>>shift)==(addr>>shift)))
938 {
5088bb70 939 inv_debug("EXP: Kill pointer at %x (%x)\n",(int)head->addr,head->vaddr);
d148d265 940 void *host_addr=find_extjump_insn(head->addr);
dd3a91a1 941 #ifdef __arm__
d148d265 942 mark_clear_cache(host_addr);
dd3a91a1 943 #endif
df4dc2b1 944 set_jump_target(host_addr, head->addr);
57871462 945 }
946 head=head->next;
947 }
948}
949
950// This is called when we write to a compiled block (see do_invstub)
f76eeef9 951void invalidate_page(u_int page)
57871462 952{
57871462 953 struct ll_entry *head;
954 struct ll_entry *next;
955 head=jump_in[page];
956 jump_in[page]=0;
957 while(head!=NULL) {
958 inv_debug("INVALIDATE: %x\n",head->vaddr);
959 remove_hash(head->vaddr);
960 next=head->next;
961 free(head);
962 head=next;
963 }
964 head=jump_out[page];
965 jump_out[page]=0;
966 while(head!=NULL) {
967 inv_debug("INVALIDATE: kill pointer to %x (%x)\n",head->vaddr,(int)head->addr);
d148d265 968 void *host_addr=find_extjump_insn(head->addr);
dd3a91a1 969 #ifdef __arm__
d148d265 970 mark_clear_cache(host_addr);
dd3a91a1 971 #endif
df4dc2b1 972 set_jump_target(host_addr, head->addr);
57871462 973 next=head->next;
974 free(head);
975 head=next;
976 }
57871462 977}
9be4ba64 978
979static void invalidate_block_range(u_int block, u_int first, u_int last)
57871462 980{
94d23bb9 981 u_int page=get_page(block<<12);
57871462 982 //printf("first=%d last=%d\n",first,last);
f76eeef9 983 invalidate_page(page);
57871462 984 assert(first+5>page); // NB: this assumes MAXBLOCK<=4096 (4 pages)
985 assert(last<page+5);
986 // Invalidate the adjacent pages if a block crosses a 4K boundary
987 while(first<page) {
988 invalidate_page(first);
989 first++;
990 }
991 for(first=page+1;first<last;first++) {
992 invalidate_page(first);
993 }
dd3a91a1 994 #ifdef __arm__
995 do_clear_cache();
996 #endif
9f51b4b9 997
57871462 998 // Don't trap writes
999 invalid_code[block]=1;
f76eeef9 1000
57871462 1001 #ifdef USE_MINI_HT
1002 memset(mini_ht,-1,sizeof(mini_ht));
1003 #endif
1004}
9be4ba64 1005
1006void invalidate_block(u_int block)
1007{
1008 u_int page=get_page(block<<12);
1009 u_int vpage=get_vpage(block<<12);
1010 inv_debug("INVALIDATE: %x (%d)\n",block<<12,page);
1011 //inv_debug("invalid_code[block]=%d\n",invalid_code[block]);
1012 u_int first,last;
1013 first=last=page;
1014 struct ll_entry *head;
1015 head=jump_dirty[vpage];
1016 //printf("page=%d vpage=%d\n",page,vpage);
1017 while(head!=NULL) {
1018 u_int start,end;
1019 if(vpage>2047||(head->vaddr>>12)==block) { // Ignore vaddr hash collision
1020 get_bounds((int)head->addr,&start,&end);
1021 //printf("start: %x end: %x\n",start,end);
4a35de07 1022 if(page<2048&&start>=(u_int)rdram&&end<(u_int)rdram+RAM_SIZE) {
9be4ba64 1023 if(((start-(u_int)rdram)>>12)<=page&&((end-1-(u_int)rdram)>>12)>=page) {
1024 if((((start-(u_int)rdram)>>12)&2047)<first) first=((start-(u_int)rdram)>>12)&2047;
1025 if((((end-1-(u_int)rdram)>>12)&2047)>last) last=((end-1-(u_int)rdram)>>12)&2047;
1026 }
1027 }
9be4ba64 1028 }
1029 head=head->next;
1030 }
1031 invalidate_block_range(block,first,last);
1032}
1033
57871462 1034void invalidate_addr(u_int addr)
1035{
9be4ba64 1036 //static int rhits;
1037 // this check is done by the caller
1038 //if (inv_code_start<=addr&&addr<=inv_code_end) { rhits++; return; }
d25604ca 1039 u_int page=get_vpage(addr);
9be4ba64 1040 if(page<2048) { // RAM
1041 struct ll_entry *head;
1042 u_int addr_min=~0, addr_max=0;
4a35de07 1043 u_int mask=RAM_SIZE-1;
1044 u_int addr_main=0x80000000|(addr&mask);
9be4ba64 1045 int pg1;
4a35de07 1046 inv_code_start=addr_main&~0xfff;
1047 inv_code_end=addr_main|0xfff;
9be4ba64 1048 pg1=page;
1049 if (pg1>0) {
1050 // must check previous page too because of spans..
1051 pg1--;
1052 inv_code_start-=0x1000;
1053 }
1054 for(;pg1<=page;pg1++) {
1055 for(head=jump_dirty[pg1];head!=NULL;head=head->next) {
1056 u_int start,end;
1057 get_bounds((int)head->addr,&start,&end);
4a35de07 1058 if(ram_offset) {
1059 start-=ram_offset;
1060 end-=ram_offset;
1061 }
1062 if(start<=addr_main&&addr_main<end) {
9be4ba64 1063 if(start<addr_min) addr_min=start;
1064 if(end>addr_max) addr_max=end;
1065 }
4a35de07 1066 else if(addr_main<start) {
9be4ba64 1067 if(start<inv_code_end)
1068 inv_code_end=start-1;
1069 }
1070 else {
1071 if(end>inv_code_start)
1072 inv_code_start=end;
1073 }
1074 }
1075 }
1076 if (addr_min!=~0) {
1077 inv_debug("INV ADDR: %08x hit %08x-%08x\n", addr, addr_min, addr_max);
1078 inv_code_start=inv_code_end=~0;
1079 invalidate_block_range(addr>>12,(addr_min&mask)>>12,(addr_max&mask)>>12);
1080 return;
1081 }
1082 else {
4a35de07 1083 inv_code_start=(addr&~mask)|(inv_code_start&mask);
1084 inv_code_end=(addr&~mask)|(inv_code_end&mask);
d25604ca 1085 inv_debug("INV ADDR: %08x miss, inv %08x-%08x, sk %d\n", addr, inv_code_start, inv_code_end, 0);
9be4ba64 1086 return;
d25604ca 1087 }
9be4ba64 1088 }
57871462 1089 invalidate_block(addr>>12);
1090}
9be4ba64 1091
dd3a91a1 1092// This is called when loading a save state.
1093// Anything could have changed, so invalidate everything.
57871462 1094void invalidate_all_pages()
1095{
581335b0 1096 u_int page;
57871462 1097 for(page=0;page<4096;page++)
1098 invalidate_page(page);
1099 for(page=0;page<1048576;page++)
1100 if(!invalid_code[page]) {
1101 restore_candidate[(page&2047)>>3]|=1<<(page&7);
1102 restore_candidate[((page&2047)>>3)+256]|=1<<(page&7);
1103 }
57871462 1104 #ifdef USE_MINI_HT
1105 memset(mini_ht,-1,sizeof(mini_ht));
1106 #endif
57871462 1107}
1108
1109// Add an entry to jump_out after making a link
1110void add_link(u_int vaddr,void *src)
1111{
94d23bb9 1112 u_int page=get_page(vaddr);
57871462 1113 inv_debug("add_link: %x -> %x (%d)\n",(int)src,vaddr,page);
76f71c27 1114 int *ptr=(int *)(src+4);
1115 assert((*ptr&0x0fff0000)==0x059f0000);
581335b0 1116 (void)ptr;
57871462 1117 ll_add(jump_out+page,vaddr,src);
1118 //int ptr=get_pointer(src);
1119 //inv_debug("add_link: Pointer is to %x\n",(int)ptr);
1120}
1121
1122// If a code block was found to be unmodified (bit was set in
1123// restore_candidate) and it remains unmodified (bit is clear
1124// in invalid_code) then move the entries for that 4K page from
1125// the dirty list to the clean list.
1126void clean_blocks(u_int page)
1127{
1128 struct ll_entry *head;
1129 inv_debug("INV: clean_blocks page=%d\n",page);
1130 head=jump_dirty[page];
1131 while(head!=NULL) {
1132 if(!invalid_code[head->vaddr>>12]) {
1133 // Don't restore blocks which are about to expire from the cache
df4dc2b1 1134 if (doesnt_expire_soon(head->addr)) {
57871462 1135 u_int start,end;
581335b0 1136 if(verify_dirty(head->addr)) {
57871462 1137 //printf("Possibly Restore %x (%x)\n",head->vaddr, (int)head->addr);
1138 u_int i;
1139 u_int inv=0;
1140 get_bounds((int)head->addr,&start,&end);
4cb76aa4 1141 if(start-(u_int)rdram<RAM_SIZE) {
57871462 1142 for(i=(start-(u_int)rdram+0x80000000)>>12;i<=(end-1-(u_int)rdram+0x80000000)>>12;i++) {
1143 inv|=invalid_code[i];
1144 }
1145 }
4cb76aa4 1146 else if((signed int)head->vaddr>=(signed int)0x80000000+RAM_SIZE) {
57871462 1147 inv=1;
1148 }
1149 if(!inv) {
df4dc2b1 1150 void *clean_addr = get_clean_addr(head->addr);
1151 if (doesnt_expire_soon(clean_addr)) {
57871462 1152 u_int ppage=page;
57871462 1153 inv_debug("INV: Restored %x (%x/%x)\n",head->vaddr, (int)head->addr, (int)clean_addr);
1154 //printf("page=%x, addr=%x\n",page,head->vaddr);
1155 //assert(head->vaddr>>12==(page|0x80000));
de5a60c3 1156 ll_add_flags(jump_in+ppage,head->vaddr,head->reg_sv_flags,clean_addr);
df4dc2b1 1157 struct ht_entry *ht_bin = hash_table_get(head->vaddr);
1158 if (ht_bin->vaddr[0] == head->vaddr)
1159 ht_bin->tcaddr[0] = clean_addr; // Replace existing entry
1160 if (ht_bin->vaddr[1] == head->vaddr)
1161 ht_bin->tcaddr[1] = clean_addr; // Replace existing entry
57871462 1162 }
1163 }
1164 }
1165 }
1166 }
1167 head=head->next;
1168 }
1169}
1170
1171
1172void mov_alloc(struct regstat *current,int i)
1173{
1174 // Note: Don't need to actually alloc the source registers
1175 if((~current->is32>>rs1[i])&1) {
1176 //alloc_reg64(current,i,rs1[i]);
1177 alloc_reg64(current,i,rt1[i]);
1178 current->is32&=~(1LL<<rt1[i]);
1179 } else {
1180 //alloc_reg(current,i,rs1[i]);
1181 alloc_reg(current,i,rt1[i]);
1182 current->is32|=(1LL<<rt1[i]);
1183 }
1184 clear_const(current,rs1[i]);
1185 clear_const(current,rt1[i]);
1186 dirty_reg(current,rt1[i]);
1187}
1188
1189void shiftimm_alloc(struct regstat *current,int i)
1190{
57871462 1191 if(opcode2[i]<=0x3) // SLL/SRL/SRA
1192 {
1193 if(rt1[i]) {
1194 if(rs1[i]&&needed_again(rs1[i],i)) alloc_reg(current,i,rs1[i]);
1195 else lt1[i]=rs1[i];
1196 alloc_reg(current,i,rt1[i]);
1197 current->is32|=1LL<<rt1[i];
1198 dirty_reg(current,rt1[i]);
dc49e339 1199 if(is_const(current,rs1[i])) {
1200 int v=get_const(current,rs1[i]);
1201 if(opcode2[i]==0x00) set_const(current,rt1[i],v<<imm[i]);
1202 if(opcode2[i]==0x02) set_const(current,rt1[i],(u_int)v>>imm[i]);
1203 if(opcode2[i]==0x03) set_const(current,rt1[i],v>>imm[i]);
1204 }
1205 else clear_const(current,rt1[i]);
57871462 1206 }
1207 }
dc49e339 1208 else
1209 {
1210 clear_const(current,rs1[i]);
1211 clear_const(current,rt1[i]);
1212 }
1213
57871462 1214 if(opcode2[i]>=0x38&&opcode2[i]<=0x3b) // DSLL/DSRL/DSRA
1215 {
1216 if(rt1[i]) {
1217 if(rs1[i]) alloc_reg64(current,i,rs1[i]);
1218 alloc_reg64(current,i,rt1[i]);
1219 current->is32&=~(1LL<<rt1[i]);
1220 dirty_reg(current,rt1[i]);
1221 }
1222 }
1223 if(opcode2[i]==0x3c) // DSLL32
1224 {
1225 if(rt1[i]) {
1226 if(rs1[i]) alloc_reg(current,i,rs1[i]);
1227 alloc_reg64(current,i,rt1[i]);
1228 current->is32&=~(1LL<<rt1[i]);
1229 dirty_reg(current,rt1[i]);
1230 }
1231 }
1232 if(opcode2[i]==0x3e) // DSRL32
1233 {
1234 if(rt1[i]) {
1235 alloc_reg64(current,i,rs1[i]);
1236 if(imm[i]==32) {
1237 alloc_reg64(current,i,rt1[i]);
1238 current->is32&=~(1LL<<rt1[i]);
1239 } else {
1240 alloc_reg(current,i,rt1[i]);
1241 current->is32|=1LL<<rt1[i];
1242 }
1243 dirty_reg(current,rt1[i]);
1244 }
1245 }
1246 if(opcode2[i]==0x3f) // DSRA32
1247 {
1248 if(rt1[i]) {
1249 alloc_reg64(current,i,rs1[i]);
1250 alloc_reg(current,i,rt1[i]);
1251 current->is32|=1LL<<rt1[i];
1252 dirty_reg(current,rt1[i]);
1253 }
1254 }
1255}
1256
1257void shift_alloc(struct regstat *current,int i)
1258{
1259 if(rt1[i]) {
1260 if(opcode2[i]<=0x07) // SLLV/SRLV/SRAV
1261 {
1262 if(rs1[i]) alloc_reg(current,i,rs1[i]);
1263 if(rs2[i]) alloc_reg(current,i,rs2[i]);
1264 alloc_reg(current,i,rt1[i]);
e1190b87 1265 if(rt1[i]==rs2[i]) {
1266 alloc_reg_temp(current,i,-1);
1267 minimum_free_regs[i]=1;
1268 }
57871462 1269 current->is32|=1LL<<rt1[i];
1270 } else { // DSLLV/DSRLV/DSRAV
1271 if(rs1[i]) alloc_reg64(current,i,rs1[i]);
1272 if(rs2[i]) alloc_reg(current,i,rs2[i]);
1273 alloc_reg64(current,i,rt1[i]);
1274 current->is32&=~(1LL<<rt1[i]);
1275 if(opcode2[i]==0x16||opcode2[i]==0x17) // DSRLV and DSRAV need a temporary register
e1190b87 1276 {
57871462 1277 alloc_reg_temp(current,i,-1);
e1190b87 1278 minimum_free_regs[i]=1;
1279 }
57871462 1280 }
1281 clear_const(current,rs1[i]);
1282 clear_const(current,rs2[i]);
1283 clear_const(current,rt1[i]);
1284 dirty_reg(current,rt1[i]);
1285 }
1286}
1287
1288void alu_alloc(struct regstat *current,int i)
1289{
1290 if(opcode2[i]>=0x20&&opcode2[i]<=0x23) { // ADD/ADDU/SUB/SUBU
1291 if(rt1[i]) {
1292 if(rs1[i]&&rs2[i]) {
1293 alloc_reg(current,i,rs1[i]);
1294 alloc_reg(current,i,rs2[i]);
1295 }
1296 else {
1297 if(rs1[i]&&needed_again(rs1[i],i)) alloc_reg(current,i,rs1[i]);
1298 if(rs2[i]&&needed_again(rs2[i],i)) alloc_reg(current,i,rs2[i]);
1299 }
1300 alloc_reg(current,i,rt1[i]);
1301 }
1302 current->is32|=1LL<<rt1[i];
1303 }
1304 if(opcode2[i]==0x2a||opcode2[i]==0x2b) { // SLT/SLTU
1305 if(rt1[i]) {
1306 if(!((current->is32>>rs1[i])&(current->is32>>rs2[i])&1))
1307 {
1308 alloc_reg64(current,i,rs1[i]);
1309 alloc_reg64(current,i,rs2[i]);
1310 alloc_reg(current,i,rt1[i]);
1311 } else {
1312 alloc_reg(current,i,rs1[i]);
1313 alloc_reg(current,i,rs2[i]);
1314 alloc_reg(current,i,rt1[i]);
1315 }
1316 }
1317 current->is32|=1LL<<rt1[i];
1318 }
1319 if(opcode2[i]>=0x24&&opcode2[i]<=0x27) { // AND/OR/XOR/NOR
1320 if(rt1[i]) {
1321 if(rs1[i]&&rs2[i]) {
1322 alloc_reg(current,i,rs1[i]);
1323 alloc_reg(current,i,rs2[i]);
1324 }
1325 else
1326 {
1327 if(rs1[i]&&needed_again(rs1[i],i)) alloc_reg(current,i,rs1[i]);
1328 if(rs2[i]&&needed_again(rs2[i],i)) alloc_reg(current,i,rs2[i]);
1329 }
1330 alloc_reg(current,i,rt1[i]);
1331 if(!((current->is32>>rs1[i])&(current->is32>>rs2[i])&1))
1332 {
1333 if(!((current->uu>>rt1[i])&1)) {
1334 alloc_reg64(current,i,rt1[i]);
1335 }
1336 if(get_reg(current->regmap,rt1[i]|64)>=0) {
1337 if(rs1[i]&&rs2[i]) {
1338 alloc_reg64(current,i,rs1[i]);
1339 alloc_reg64(current,i,rs2[i]);
1340 }
1341 else
1342 {
1343 // Is is really worth it to keep 64-bit values in registers?
1344 #ifdef NATIVE_64BIT
1345 if(rs1[i]&&needed_again(rs1[i],i)) alloc_reg64(current,i,rs1[i]);
1346 if(rs2[i]&&needed_again(rs2[i],i)) alloc_reg64(current,i,rs2[i]);
1347 #endif
1348 }
1349 }
1350 current->is32&=~(1LL<<rt1[i]);
1351 } else {
1352 current->is32|=1LL<<rt1[i];
1353 }
1354 }
1355 }
1356 if(opcode2[i]>=0x2c&&opcode2[i]<=0x2f) { // DADD/DADDU/DSUB/DSUBU
1357 if(rt1[i]) {
1358 if(rs1[i]&&rs2[i]) {
1359 if(!((current->uu>>rt1[i])&1)||get_reg(current->regmap,rt1[i]|64)>=0) {
1360 alloc_reg64(current,i,rs1[i]);
1361 alloc_reg64(current,i,rs2[i]);
1362 alloc_reg64(current,i,rt1[i]);
1363 } else {
1364 alloc_reg(current,i,rs1[i]);
1365 alloc_reg(current,i,rs2[i]);
1366 alloc_reg(current,i,rt1[i]);
1367 }
1368 }
1369 else {
1370 alloc_reg(current,i,rt1[i]);
1371 if(!((current->uu>>rt1[i])&1)||get_reg(current->regmap,rt1[i]|64)>=0) {
1372 // DADD used as move, or zeroing
1373 // If we have a 64-bit source, then make the target 64 bits too
1374 if(rs1[i]&&!((current->is32>>rs1[i])&1)) {
1375 if(get_reg(current->regmap,rs1[i])>=0) alloc_reg64(current,i,rs1[i]);
1376 alloc_reg64(current,i,rt1[i]);
1377 } else if(rs2[i]&&!((current->is32>>rs2[i])&1)) {
1378 if(get_reg(current->regmap,rs2[i])>=0) alloc_reg64(current,i,rs2[i]);
1379 alloc_reg64(current,i,rt1[i]);
1380 }
1381 if(opcode2[i]>=0x2e&&rs2[i]) {
1382 // DSUB used as negation - 64-bit result
1383 // If we have a 32-bit register, extend it to 64 bits
1384 if(get_reg(current->regmap,rs2[i])>=0) alloc_reg64(current,i,rs2[i]);
1385 alloc_reg64(current,i,rt1[i]);
1386 }
1387 }
1388 }
1389 if(rs1[i]&&rs2[i]) {
1390 current->is32&=~(1LL<<rt1[i]);
1391 } else if(rs1[i]) {
1392 current->is32&=~(1LL<<rt1[i]);
1393 if((current->is32>>rs1[i])&1)
1394 current->is32|=1LL<<rt1[i];
1395 } else if(rs2[i]) {
1396 current->is32&=~(1LL<<rt1[i]);
1397 if((current->is32>>rs2[i])&1)
1398 current->is32|=1LL<<rt1[i];
1399 } else {
1400 current->is32|=1LL<<rt1[i];
1401 }
1402 }
1403 }
1404 clear_const(current,rs1[i]);
1405 clear_const(current,rs2[i]);
1406 clear_const(current,rt1[i]);
1407 dirty_reg(current,rt1[i]);
1408}
1409
1410void imm16_alloc(struct regstat *current,int i)
1411{
1412 if(rs1[i]&&needed_again(rs1[i],i)) alloc_reg(current,i,rs1[i]);
1413 else lt1[i]=rs1[i];
1414 if(rt1[i]) alloc_reg(current,i,rt1[i]);
1415 if(opcode[i]==0x18||opcode[i]==0x19) { // DADDI/DADDIU
1416 current->is32&=~(1LL<<rt1[i]);
1417 if(!((current->uu>>rt1[i])&1)||get_reg(current->regmap,rt1[i]|64)>=0) {
1418 // TODO: Could preserve the 32-bit flag if the immediate is zero
1419 alloc_reg64(current,i,rt1[i]);
1420 alloc_reg64(current,i,rs1[i]);
1421 }
1422 clear_const(current,rs1[i]);
1423 clear_const(current,rt1[i]);
1424 }
1425 else if(opcode[i]==0x0a||opcode[i]==0x0b) { // SLTI/SLTIU
1426 if((~current->is32>>rs1[i])&1) alloc_reg64(current,i,rs1[i]);
1427 current->is32|=1LL<<rt1[i];
1428 clear_const(current,rs1[i]);
1429 clear_const(current,rt1[i]);
1430 }
1431 else if(opcode[i]>=0x0c&&opcode[i]<=0x0e) { // ANDI/ORI/XORI
1432 if(((~current->is32>>rs1[i])&1)&&opcode[i]>0x0c) {
1433 if(rs1[i]!=rt1[i]) {
1434 if(needed_again(rs1[i],i)) alloc_reg64(current,i,rs1[i]);
1435 alloc_reg64(current,i,rt1[i]);
1436 current->is32&=~(1LL<<rt1[i]);
1437 }
1438 }
1439 else current->is32|=1LL<<rt1[i]; // ANDI clears upper bits
1440 if(is_const(current,rs1[i])) {
1441 int v=get_const(current,rs1[i]);
1442 if(opcode[i]==0x0c) set_const(current,rt1[i],v&imm[i]);
1443 if(opcode[i]==0x0d) set_const(current,rt1[i],v|imm[i]);
1444 if(opcode[i]==0x0e) set_const(current,rt1[i],v^imm[i]);
1445 }
1446 else clear_const(current,rt1[i]);
1447 }
1448 else if(opcode[i]==0x08||opcode[i]==0x09) { // ADDI/ADDIU
1449 if(is_const(current,rs1[i])) {
1450 int v=get_const(current,rs1[i]);
1451 set_const(current,rt1[i],v+imm[i]);
1452 }
1453 else clear_const(current,rt1[i]);
1454 current->is32|=1LL<<rt1[i];
1455 }
1456 else {
1457 set_const(current,rt1[i],((long long)((short)imm[i]))<<16); // LUI
1458 current->is32|=1LL<<rt1[i];
1459 }
1460 dirty_reg(current,rt1[i]);
1461}
1462
1463void load_alloc(struct regstat *current,int i)
1464{
1465 clear_const(current,rt1[i]);
1466 //if(rs1[i]!=rt1[i]&&needed_again(rs1[i],i)) clear_const(current,rs1[i]); // Does this help or hurt?
1467 if(!rs1[i]) current->u&=~1LL; // Allow allocating r0 if it's the source register
1468 if(needed_again(rs1[i],i)) alloc_reg(current,i,rs1[i]);
373d1d07 1469 if(rt1[i]&&!((current->u>>rt1[i])&1)) {
57871462 1470 alloc_reg(current,i,rt1[i]);
373d1d07 1471 assert(get_reg(current->regmap,rt1[i])>=0);
57871462 1472 if(opcode[i]==0x27||opcode[i]==0x37) // LWU/LD
1473 {
1474 current->is32&=~(1LL<<rt1[i]);
1475 alloc_reg64(current,i,rt1[i]);
1476 }
1477 else if(opcode[i]==0x1A||opcode[i]==0x1B) // LDL/LDR
1478 {
1479 current->is32&=~(1LL<<rt1[i]);
1480 alloc_reg64(current,i,rt1[i]);
1481 alloc_all(current,i);
1482 alloc_reg64(current,i,FTEMP);
e1190b87 1483 minimum_free_regs[i]=HOST_REGS;
57871462 1484 }
1485 else current->is32|=1LL<<rt1[i];
1486 dirty_reg(current,rt1[i]);
57871462 1487 // LWL/LWR need a temporary register for the old value
1488 if(opcode[i]==0x22||opcode[i]==0x26)
1489 {
1490 alloc_reg(current,i,FTEMP);
1491 alloc_reg_temp(current,i,-1);
e1190b87 1492 minimum_free_regs[i]=1;
57871462 1493 }
1494 }
1495 else
1496 {
373d1d07 1497 // Load to r0 or unneeded register (dummy load)
57871462 1498 // but we still need a register to calculate the address
535d208a 1499 if(opcode[i]==0x22||opcode[i]==0x26)
1500 {
1501 alloc_reg(current,i,FTEMP); // LWL/LWR need another temporary
1502 }
57871462 1503 alloc_reg_temp(current,i,-1);
e1190b87 1504 minimum_free_regs[i]=1;
535d208a 1505 if(opcode[i]==0x1A||opcode[i]==0x1B) // LDL/LDR
1506 {
1507 alloc_all(current,i);
1508 alloc_reg64(current,i,FTEMP);
e1190b87 1509 minimum_free_regs[i]=HOST_REGS;
535d208a 1510 }
57871462 1511 }
1512}
1513
1514void store_alloc(struct regstat *current,int i)
1515{
1516 clear_const(current,rs2[i]);
1517 if(!(rs2[i])) current->u&=~1LL; // Allow allocating r0 if necessary
1518 if(needed_again(rs1[i],i)) alloc_reg(current,i,rs1[i]);
1519 alloc_reg(current,i,rs2[i]);
1520 if(opcode[i]==0x2c||opcode[i]==0x2d||opcode[i]==0x3f) { // 64-bit SDL/SDR/SD
1521 alloc_reg64(current,i,rs2[i]);
1522 if(rs2[i]) alloc_reg(current,i,FTEMP);
1523 }
57871462 1524 #if defined(HOST_IMM8)
1525 // On CPUs without 32-bit immediates we need a pointer to invalid_code
1526 else alloc_reg(current,i,INVCP);
1527 #endif
b7918751 1528 if(opcode[i]==0x2a||opcode[i]==0x2e||opcode[i]==0x2c||opcode[i]==0x2d) { // SWL/SWL/SDL/SDR
57871462 1529 alloc_reg(current,i,FTEMP);
1530 }
1531 // We need a temporary register for address generation
1532 alloc_reg_temp(current,i,-1);
e1190b87 1533 minimum_free_regs[i]=1;
57871462 1534}
1535
1536void c1ls_alloc(struct regstat *current,int i)
1537{
1538 //clear_const(current,rs1[i]); // FIXME
1539 clear_const(current,rt1[i]);
1540 if(needed_again(rs1[i],i)) alloc_reg(current,i,rs1[i]);
1541 alloc_reg(current,i,CSREG); // Status
1542 alloc_reg(current,i,FTEMP);
1543 if(opcode[i]==0x35||opcode[i]==0x3d) { // 64-bit LDC1/SDC1
1544 alloc_reg64(current,i,FTEMP);
1545 }
57871462 1546 #if defined(HOST_IMM8)
1547 // On CPUs without 32-bit immediates we need a pointer to invalid_code
1548 else if((opcode[i]&0x3b)==0x39) // SWC1/SDC1
1549 alloc_reg(current,i,INVCP);
1550 #endif
1551 // We need a temporary register for address generation
1552 alloc_reg_temp(current,i,-1);
1553}
1554
b9b61529 1555void c2ls_alloc(struct regstat *current,int i)
1556{
1557 clear_const(current,rt1[i]);
1558 if(needed_again(rs1[i],i)) alloc_reg(current,i,rs1[i]);
1559 alloc_reg(current,i,FTEMP);
b9b61529 1560 #if defined(HOST_IMM8)
1561 // On CPUs without 32-bit immediates we need a pointer to invalid_code
1edfcc68 1562 if((opcode[i]&0x3b)==0x3a) // SWC2/SDC2
b9b61529 1563 alloc_reg(current,i,INVCP);
1564 #endif
1565 // We need a temporary register for address generation
1566 alloc_reg_temp(current,i,-1);
e1190b87 1567 minimum_free_regs[i]=1;
b9b61529 1568}
1569
57871462 1570#ifndef multdiv_alloc
1571void multdiv_alloc(struct regstat *current,int i)
1572{
1573 // case 0x18: MULT
1574 // case 0x19: MULTU
1575 // case 0x1A: DIV
1576 // case 0x1B: DIVU
1577 // case 0x1C: DMULT
1578 // case 0x1D: DMULTU
1579 // case 0x1E: DDIV
1580 // case 0x1F: DDIVU
1581 clear_const(current,rs1[i]);
1582 clear_const(current,rs2[i]);
1583 if(rs1[i]&&rs2[i])
1584 {
1585 if((opcode2[i]&4)==0) // 32-bit
1586 {
1587 current->u&=~(1LL<<HIREG);
1588 current->u&=~(1LL<<LOREG);
1589 alloc_reg(current,i,HIREG);
1590 alloc_reg(current,i,LOREG);
1591 alloc_reg(current,i,rs1[i]);
1592 alloc_reg(current,i,rs2[i]);
1593 current->is32|=1LL<<HIREG;
1594 current->is32|=1LL<<LOREG;
1595 dirty_reg(current,HIREG);
1596 dirty_reg(current,LOREG);
1597 }
1598 else // 64-bit
1599 {
1600 current->u&=~(1LL<<HIREG);
1601 current->u&=~(1LL<<LOREG);
1602 current->uu&=~(1LL<<HIREG);
1603 current->uu&=~(1LL<<LOREG);
1604 alloc_reg64(current,i,HIREG);
1605 //if(HOST_REGS>10) alloc_reg64(current,i,LOREG);
1606 alloc_reg64(current,i,rs1[i]);
1607 alloc_reg64(current,i,rs2[i]);
1608 alloc_all(current,i);
1609 current->is32&=~(1LL<<HIREG);
1610 current->is32&=~(1LL<<LOREG);
1611 dirty_reg(current,HIREG);
1612 dirty_reg(current,LOREG);
e1190b87 1613 minimum_free_regs[i]=HOST_REGS;
57871462 1614 }
1615 }
1616 else
1617 {
1618 // Multiply by zero is zero.
1619 // MIPS does not have a divide by zero exception.
1620 // The result is undefined, we return zero.
1621 alloc_reg(current,i,HIREG);
1622 alloc_reg(current,i,LOREG);
1623 current->is32|=1LL<<HIREG;
1624 current->is32|=1LL<<LOREG;
1625 dirty_reg(current,HIREG);
1626 dirty_reg(current,LOREG);
1627 }
1628}
1629#endif
1630
1631void cop0_alloc(struct regstat *current,int i)
1632{
1633 if(opcode2[i]==0) // MFC0
1634 {
1635 if(rt1[i]) {
1636 clear_const(current,rt1[i]);
1637 alloc_all(current,i);
1638 alloc_reg(current,i,rt1[i]);
1639 current->is32|=1LL<<rt1[i];
1640 dirty_reg(current,rt1[i]);
1641 }
1642 }
1643 else if(opcode2[i]==4) // MTC0
1644 {
1645 if(rs1[i]){
1646 clear_const(current,rs1[i]);
1647 alloc_reg(current,i,rs1[i]);
1648 alloc_all(current,i);
1649 }
1650 else {
1651 alloc_all(current,i); // FIXME: Keep r0
1652 current->u&=~1LL;
1653 alloc_reg(current,i,0);
1654 }
1655 }
1656 else
1657 {
1658 // TLBR/TLBWI/TLBWR/TLBP/ERET
1659 assert(opcode2[i]==0x10);
1660 alloc_all(current,i);
1661 }
e1190b87 1662 minimum_free_regs[i]=HOST_REGS;
57871462 1663}
1664
1665void cop1_alloc(struct regstat *current,int i)
1666{
1667 alloc_reg(current,i,CSREG); // Load status
1668 if(opcode2[i]<3) // MFC1/DMFC1/CFC1
1669 {
7de557a6 1670 if(rt1[i]){
1671 clear_const(current,rt1[i]);
1672 if(opcode2[i]==1) {
1673 alloc_reg64(current,i,rt1[i]); // DMFC1
1674 current->is32&=~(1LL<<rt1[i]);
1675 }else{
1676 alloc_reg(current,i,rt1[i]); // MFC1/CFC1
1677 current->is32|=1LL<<rt1[i];
1678 }
1679 dirty_reg(current,rt1[i]);
57871462 1680 }
57871462 1681 alloc_reg_temp(current,i,-1);
1682 }
1683 else if(opcode2[i]>3) // MTC1/DMTC1/CTC1
1684 {
1685 if(rs1[i]){
1686 clear_const(current,rs1[i]);
1687 if(opcode2[i]==5)
1688 alloc_reg64(current,i,rs1[i]); // DMTC1
1689 else
1690 alloc_reg(current,i,rs1[i]); // MTC1/CTC1
1691 alloc_reg_temp(current,i,-1);
1692 }
1693 else {
1694 current->u&=~1LL;
1695 alloc_reg(current,i,0);
1696 alloc_reg_temp(current,i,-1);
1697 }
1698 }
e1190b87 1699 minimum_free_regs[i]=1;
57871462 1700}
1701void fconv_alloc(struct regstat *current,int i)
1702{
1703 alloc_reg(current,i,CSREG); // Load status
1704 alloc_reg_temp(current,i,-1);
e1190b87 1705 minimum_free_regs[i]=1;
57871462 1706}
1707void float_alloc(struct regstat *current,int i)
1708{
1709 alloc_reg(current,i,CSREG); // Load status
1710 alloc_reg_temp(current,i,-1);
e1190b87 1711 minimum_free_regs[i]=1;
57871462 1712}
b9b61529 1713void c2op_alloc(struct regstat *current,int i)
1714{
1715 alloc_reg_temp(current,i,-1);
1716}
57871462 1717void fcomp_alloc(struct regstat *current,int i)
1718{
1719 alloc_reg(current,i,CSREG); // Load status
1720 alloc_reg(current,i,FSREG); // Load flags
1721 dirty_reg(current,FSREG); // Flag will be modified
1722 alloc_reg_temp(current,i,-1);
e1190b87 1723 minimum_free_regs[i]=1;
57871462 1724}
1725
1726void syscall_alloc(struct regstat *current,int i)
1727{
1728 alloc_cc(current,i);
1729 dirty_reg(current,CCREG);
1730 alloc_all(current,i);
e1190b87 1731 minimum_free_regs[i]=HOST_REGS;
57871462 1732 current->isconst=0;
1733}
1734
1735void delayslot_alloc(struct regstat *current,int i)
1736{
1737 switch(itype[i]) {
1738 case UJUMP:
1739 case CJUMP:
1740 case SJUMP:
1741 case RJUMP:
1742 case FJUMP:
1743 case SYSCALL:
7139f3c8 1744 case HLECALL:
57871462 1745 case SPAN:
1746 assem_debug("jump in the delay slot. this shouldn't happen.\n");//exit(1);
c43b5311 1747 SysPrintf("Disabled speculative precompilation\n");
57871462 1748 stop_after_jal=1;
1749 break;
1750 case IMM16:
1751 imm16_alloc(current,i);
1752 break;
1753 case LOAD:
1754 case LOADLR:
1755 load_alloc(current,i);
1756 break;
1757 case STORE:
1758 case STORELR:
1759 store_alloc(current,i);
1760 break;
1761 case ALU:
1762 alu_alloc(current,i);
1763 break;
1764 case SHIFT:
1765 shift_alloc(current,i);
1766 break;
1767 case MULTDIV:
1768 multdiv_alloc(current,i);
1769 break;
1770 case SHIFTIMM:
1771 shiftimm_alloc(current,i);
1772 break;
1773 case MOV:
1774 mov_alloc(current,i);
1775 break;
1776 case COP0:
1777 cop0_alloc(current,i);
1778 break;
1779 case COP1:
b9b61529 1780 case COP2:
57871462 1781 cop1_alloc(current,i);
1782 break;
1783 case C1LS:
1784 c1ls_alloc(current,i);
1785 break;
b9b61529 1786 case C2LS:
1787 c2ls_alloc(current,i);
1788 break;
57871462 1789 case FCONV:
1790 fconv_alloc(current,i);
1791 break;
1792 case FLOAT:
1793 float_alloc(current,i);
1794 break;
1795 case FCOMP:
1796 fcomp_alloc(current,i);
1797 break;
b9b61529 1798 case C2OP:
1799 c2op_alloc(current,i);
1800 break;
57871462 1801 }
1802}
1803
1804// Special case where a branch and delay slot span two pages in virtual memory
1805static void pagespan_alloc(struct regstat *current,int i)
1806{
1807 current->isconst=0;
1808 current->wasconst=0;
1809 regs[i].wasconst=0;
e1190b87 1810 minimum_free_regs[i]=HOST_REGS;
57871462 1811 alloc_all(current,i);
1812 alloc_cc(current,i);
1813 dirty_reg(current,CCREG);
1814 if(opcode[i]==3) // JAL
1815 {
1816 alloc_reg(current,i,31);
1817 dirty_reg(current,31);
1818 }
1819 if(opcode[i]==0&&(opcode2[i]&0x3E)==8) // JR/JALR
1820 {
1821 alloc_reg(current,i,rs1[i]);
5067f341 1822 if (rt1[i]!=0) {
1823 alloc_reg(current,i,rt1[i]);
1824 dirty_reg(current,rt1[i]);
57871462 1825 }
1826 }
1827 if((opcode[i]&0x2E)==4) // BEQ/BNE/BEQL/BNEL
1828 {
1829 if(rs1[i]) alloc_reg(current,i,rs1[i]);
1830 if(rs2[i]) alloc_reg(current,i,rs2[i]);
1831 if(!((current->is32>>rs1[i])&(current->is32>>rs2[i])&1))
1832 {
1833 if(rs1[i]) alloc_reg64(current,i,rs1[i]);
1834 if(rs2[i]) alloc_reg64(current,i,rs2[i]);
1835 }
1836 }
1837 else
1838 if((opcode[i]&0x2E)==6) // BLEZ/BGTZ/BLEZL/BGTZL
1839 {
1840 if(rs1[i]) alloc_reg(current,i,rs1[i]);
1841 if(!((current->is32>>rs1[i])&1))
1842 {
1843 if(rs1[i]) alloc_reg64(current,i,rs1[i]);
1844 }
1845 }
1846 else
1847 if(opcode[i]==0x11) // BC1
1848 {
1849 alloc_reg(current,i,FSREG);
1850 alloc_reg(current,i,CSREG);
1851 }
1852 //else ...
1853}
1854
b14b6a8f 1855static void add_stub(enum stub_type type, void *addr, void *retaddr,
1856 u_int a, uintptr_t b, uintptr_t c, u_int d, u_int e)
1857{
1858 assert(a < ARRAY_SIZE(stubs));
1859 stubs[stubcount].type = type;
1860 stubs[stubcount].addr = addr;
1861 stubs[stubcount].retaddr = retaddr;
1862 stubs[stubcount].a = a;
1863 stubs[stubcount].b = b;
1864 stubs[stubcount].c = c;
1865 stubs[stubcount].d = d;
1866 stubs[stubcount].e = e;
57871462 1867 stubcount++;
1868}
1869
b14b6a8f 1870static void add_stub_r(enum stub_type type, void *addr, void *retaddr,
1871 int i, int addr_reg, struct regstat *i_regs, int ccadj, u_int reglist)
1872{
1873 add_stub(type, addr, retaddr, i, addr_reg, (uintptr_t)i_regs, ccadj, reglist);
1874}
1875
57871462 1876// Write out a single register
1877void wb_register(signed char r,signed char regmap[],uint64_t dirty,uint64_t is32)
1878{
1879 int hr;
1880 for(hr=0;hr<HOST_REGS;hr++) {
1881 if(hr!=EXCLUDE_REG) {
1882 if((regmap[hr]&63)==r) {
1883 if((dirty>>hr)&1) {
1884 if(regmap[hr]<64) {
1885 emit_storereg(r,hr);
57871462 1886 }else{
1887 emit_storereg(r|64,hr);
1888 }
1889 }
1890 }
1891 }
1892 }
1893}
1894
1895int mchecksum()
1896{
57871462 1897 int i;
1898 int sum=0;
1899 for(i=0;i<2097152;i++) {
1900 unsigned int temp=sum;
1901 sum<<=1;
1902 sum|=(~temp)>>31;
1903 sum^=((u_int *)rdram)[i];
1904 }
1905 return sum;
1906}
1907int rchecksum()
1908{
1909 int i;
1910 int sum=0;
1911 for(i=0;i<64;i++)
1912 sum^=((u_int *)reg)[i];
1913 return sum;
1914}
57871462 1915void rlist()
1916{
1917 int i;
1918 printf("TRACE: ");
1919 for(i=0;i<32;i++)
1920 printf("r%d:%8x%8x ",i,((int *)(reg+i))[1],((int *)(reg+i))[0]);
1921 printf("\n");
57871462 1922}
1923
57871462 1924void alu_assemble(int i,struct regstat *i_regs)
1925{
1926 if(opcode2[i]>=0x20&&opcode2[i]<=0x23) { // ADD/ADDU/SUB/SUBU
1927 if(rt1[i]) {
1928 signed char s1,s2,t;
1929 t=get_reg(i_regs->regmap,rt1[i]);
1930 if(t>=0) {
1931 s1=get_reg(i_regs->regmap,rs1[i]);
1932 s2=get_reg(i_regs->regmap,rs2[i]);
1933 if(rs1[i]&&rs2[i]) {
1934 assert(s1>=0);
1935 assert(s2>=0);
1936 if(opcode2[i]&2) emit_sub(s1,s2,t);
1937 else emit_add(s1,s2,t);
1938 }
1939 else if(rs1[i]) {
1940 if(s1>=0) emit_mov(s1,t);
1941 else emit_loadreg(rs1[i],t);
1942 }
1943 else if(rs2[i]) {
1944 if(s2>=0) {
1945 if(opcode2[i]&2) emit_neg(s2,t);
1946 else emit_mov(s2,t);
1947 }
1948 else {
1949 emit_loadreg(rs2[i],t);
1950 if(opcode2[i]&2) emit_neg(t,t);
1951 }
1952 }
1953 else emit_zeroreg(t);
1954 }
1955 }
1956 }
1957 if(opcode2[i]>=0x2c&&opcode2[i]<=0x2f) { // DADD/DADDU/DSUB/DSUBU
1958 if(rt1[i]) {
1959 signed char s1l,s2l,s1h,s2h,tl,th;
1960 tl=get_reg(i_regs->regmap,rt1[i]);
1961 th=get_reg(i_regs->regmap,rt1[i]|64);
1962 if(tl>=0) {
1963 s1l=get_reg(i_regs->regmap,rs1[i]);
1964 s2l=get_reg(i_regs->regmap,rs2[i]);
1965 s1h=get_reg(i_regs->regmap,rs1[i]|64);
1966 s2h=get_reg(i_regs->regmap,rs2[i]|64);
1967 if(rs1[i]&&rs2[i]) {
1968 assert(s1l>=0);
1969 assert(s2l>=0);
1970 if(opcode2[i]&2) emit_subs(s1l,s2l,tl);
1971 else emit_adds(s1l,s2l,tl);
1972 if(th>=0) {
1973 #ifdef INVERTED_CARRY
1974 if(opcode2[i]&2) {if(s1h!=th) emit_mov(s1h,th);emit_sbb(th,s2h);}
1975 #else
1976 if(opcode2[i]&2) emit_sbc(s1h,s2h,th);
1977 #endif
1978 else emit_add(s1h,s2h,th);
1979 }
1980 }
1981 else if(rs1[i]) {
1982 if(s1l>=0) emit_mov(s1l,tl);
1983 else emit_loadreg(rs1[i],tl);
1984 if(th>=0) {
1985 if(s1h>=0) emit_mov(s1h,th);
1986 else emit_loadreg(rs1[i]|64,th);
1987 }
1988 }
1989 else if(rs2[i]) {
1990 if(s2l>=0) {
1991 if(opcode2[i]&2) emit_negs(s2l,tl);
1992 else emit_mov(s2l,tl);
1993 }
1994 else {
1995 emit_loadreg(rs2[i],tl);
1996 if(opcode2[i]&2) emit_negs(tl,tl);
1997 }
1998 if(th>=0) {
1999 #ifdef INVERTED_CARRY
2000 if(s2h>=0) emit_mov(s2h,th);
2001 else emit_loadreg(rs2[i]|64,th);
2002 if(opcode2[i]&2) {
2003 emit_adcimm(-1,th); // x86 has inverted carry flag
2004 emit_not(th,th);
2005 }
2006 #else
2007 if(opcode2[i]&2) {
2008 if(s2h>=0) emit_rscimm(s2h,0,th);
2009 else {
2010 emit_loadreg(rs2[i]|64,th);
2011 emit_rscimm(th,0,th);
2012 }
2013 }else{
2014 if(s2h>=0) emit_mov(s2h,th);
2015 else emit_loadreg(rs2[i]|64,th);
2016 }
2017 #endif
2018 }
2019 }
2020 else {
2021 emit_zeroreg(tl);
2022 if(th>=0) emit_zeroreg(th);
2023 }
2024 }
2025 }
2026 }
2027 if(opcode2[i]==0x2a||opcode2[i]==0x2b) { // SLT/SLTU
2028 if(rt1[i]) {
2029 signed char s1l,s1h,s2l,s2h,t;
2030 if(!((i_regs->was32>>rs1[i])&(i_regs->was32>>rs2[i])&1))
2031 {
2032 t=get_reg(i_regs->regmap,rt1[i]);
2033 //assert(t>=0);
2034 if(t>=0) {
2035 s1l=get_reg(i_regs->regmap,rs1[i]);
2036 s1h=get_reg(i_regs->regmap,rs1[i]|64);
2037 s2l=get_reg(i_regs->regmap,rs2[i]);
2038 s2h=get_reg(i_regs->regmap,rs2[i]|64);
2039 if(rs2[i]==0) // rx<r0
2040 {
2041 assert(s1h>=0);
2042 if(opcode2[i]==0x2a) // SLT
2043 emit_shrimm(s1h,31,t);
2044 else // SLTU (unsigned can not be less than zero)
2045 emit_zeroreg(t);
2046 }
2047 else if(rs1[i]==0) // r0<rx
2048 {
2049 assert(s2h>=0);
2050 if(opcode2[i]==0x2a) // SLT
2051 emit_set_gz64_32(s2h,s2l,t);
2052 else // SLTU (set if not zero)
2053 emit_set_nz64_32(s2h,s2l,t);
2054 }
2055 else {
2056 assert(s1l>=0);assert(s1h>=0);
2057 assert(s2l>=0);assert(s2h>=0);
2058 if(opcode2[i]==0x2a) // SLT
2059 emit_set_if_less64_32(s1h,s1l,s2h,s2l,t);
2060 else // SLTU
2061 emit_set_if_carry64_32(s1h,s1l,s2h,s2l,t);
2062 }
2063 }
2064 } else {
2065 t=get_reg(i_regs->regmap,rt1[i]);
2066 //assert(t>=0);
2067 if(t>=0) {
2068 s1l=get_reg(i_regs->regmap,rs1[i]);
2069 s2l=get_reg(i_regs->regmap,rs2[i]);
2070 if(rs2[i]==0) // rx<r0
2071 {
2072 assert(s1l>=0);
2073 if(opcode2[i]==0x2a) // SLT
2074 emit_shrimm(s1l,31,t);
2075 else // SLTU (unsigned can not be less than zero)
2076 emit_zeroreg(t);
2077 }
2078 else if(rs1[i]==0) // r0<rx
2079 {
2080 assert(s2l>=0);
2081 if(opcode2[i]==0x2a) // SLT
2082 emit_set_gz32(s2l,t);
2083 else // SLTU (set if not zero)
2084 emit_set_nz32(s2l,t);
2085 }
2086 else{
2087 assert(s1l>=0);assert(s2l>=0);
2088 if(opcode2[i]==0x2a) // SLT
2089 emit_set_if_less32(s1l,s2l,t);
2090 else // SLTU
2091 emit_set_if_carry32(s1l,s2l,t);
2092 }
2093 }
2094 }
2095 }
2096 }
2097 if(opcode2[i]>=0x24&&opcode2[i]<=0x27) { // AND/OR/XOR/NOR
2098 if(rt1[i]) {
2099 signed char s1l,s1h,s2l,s2h,th,tl;
2100 tl=get_reg(i_regs->regmap,rt1[i]);
2101 th=get_reg(i_regs->regmap,rt1[i]|64);
2102 if(!((i_regs->was32>>rs1[i])&(i_regs->was32>>rs2[i])&1)&&th>=0)
2103 {
2104 assert(tl>=0);
2105 if(tl>=0) {
2106 s1l=get_reg(i_regs->regmap,rs1[i]);
2107 s1h=get_reg(i_regs->regmap,rs1[i]|64);
2108 s2l=get_reg(i_regs->regmap,rs2[i]);
2109 s2h=get_reg(i_regs->regmap,rs2[i]|64);
2110 if(rs1[i]&&rs2[i]) {
2111 assert(s1l>=0);assert(s1h>=0);
2112 assert(s2l>=0);assert(s2h>=0);
2113 if(opcode2[i]==0x24) { // AND
2114 emit_and(s1l,s2l,tl);
2115 emit_and(s1h,s2h,th);
2116 } else
2117 if(opcode2[i]==0x25) { // OR
2118 emit_or(s1l,s2l,tl);
2119 emit_or(s1h,s2h,th);
2120 } else
2121 if(opcode2[i]==0x26) { // XOR
2122 emit_xor(s1l,s2l,tl);
2123 emit_xor(s1h,s2h,th);
2124 } else
2125 if(opcode2[i]==0x27) { // NOR
2126 emit_or(s1l,s2l,tl);
2127 emit_or(s1h,s2h,th);
2128 emit_not(tl,tl);
2129 emit_not(th,th);
2130 }
2131 }
2132 else
2133 {
2134 if(opcode2[i]==0x24) { // AND
2135 emit_zeroreg(tl);
2136 emit_zeroreg(th);
2137 } else
2138 if(opcode2[i]==0x25||opcode2[i]==0x26) { // OR/XOR
2139 if(rs1[i]){
2140 if(s1l>=0) emit_mov(s1l,tl);
2141 else emit_loadreg(rs1[i],tl);
2142 if(s1h>=0) emit_mov(s1h,th);
2143 else emit_loadreg(rs1[i]|64,th);
2144 }
2145 else
2146 if(rs2[i]){
2147 if(s2l>=0) emit_mov(s2l,tl);
2148 else emit_loadreg(rs2[i],tl);
2149 if(s2h>=0) emit_mov(s2h,th);
2150 else emit_loadreg(rs2[i]|64,th);
2151 }
2152 else{
2153 emit_zeroreg(tl);
2154 emit_zeroreg(th);
2155 }
2156 } else
2157 if(opcode2[i]==0x27) { // NOR
2158 if(rs1[i]){
2159 if(s1l>=0) emit_not(s1l,tl);
2160 else{
2161 emit_loadreg(rs1[i],tl);
2162 emit_not(tl,tl);
2163 }
2164 if(s1h>=0) emit_not(s1h,th);
2165 else{
2166 emit_loadreg(rs1[i]|64,th);
2167 emit_not(th,th);
2168 }
2169 }
2170 else
2171 if(rs2[i]){
2172 if(s2l>=0) emit_not(s2l,tl);
2173 else{
2174 emit_loadreg(rs2[i],tl);
2175 emit_not(tl,tl);
2176 }
2177 if(s2h>=0) emit_not(s2h,th);
2178 else{
2179 emit_loadreg(rs2[i]|64,th);
2180 emit_not(th,th);
2181 }
2182 }
2183 else {
2184 emit_movimm(-1,tl);
2185 emit_movimm(-1,th);
2186 }
2187 }
2188 }
2189 }
2190 }
2191 else
2192 {
2193 // 32 bit
2194 if(tl>=0) {
2195 s1l=get_reg(i_regs->regmap,rs1[i]);
2196 s2l=get_reg(i_regs->regmap,rs2[i]);
2197 if(rs1[i]&&rs2[i]) {
2198 assert(s1l>=0);
2199 assert(s2l>=0);
2200 if(opcode2[i]==0x24) { // AND
2201 emit_and(s1l,s2l,tl);
2202 } else
2203 if(opcode2[i]==0x25) { // OR
2204 emit_or(s1l,s2l,tl);
2205 } else
2206 if(opcode2[i]==0x26) { // XOR
2207 emit_xor(s1l,s2l,tl);
2208 } else
2209 if(opcode2[i]==0x27) { // NOR
2210 emit_or(s1l,s2l,tl);
2211 emit_not(tl,tl);
2212 }
2213 }
2214 else
2215 {
2216 if(opcode2[i]==0x24) { // AND
2217 emit_zeroreg(tl);
2218 } else
2219 if(opcode2[i]==0x25||opcode2[i]==0x26) { // OR/XOR
2220 if(rs1[i]){
2221 if(s1l>=0) emit_mov(s1l,tl);
2222 else emit_loadreg(rs1[i],tl); // CHECK: regmap_entry?
2223 }
2224 else
2225 if(rs2[i]){
2226 if(s2l>=0) emit_mov(s2l,tl);
2227 else emit_loadreg(rs2[i],tl); // CHECK: regmap_entry?
2228 }
2229 else emit_zeroreg(tl);
2230 } else
2231 if(opcode2[i]==0x27) { // NOR
2232 if(rs1[i]){
2233 if(s1l>=0) emit_not(s1l,tl);
2234 else {
2235 emit_loadreg(rs1[i],tl);
2236 emit_not(tl,tl);
2237 }
2238 }
2239 else
2240 if(rs2[i]){
2241 if(s2l>=0) emit_not(s2l,tl);
2242 else {
2243 emit_loadreg(rs2[i],tl);
2244 emit_not(tl,tl);
2245 }
2246 }
2247 else emit_movimm(-1,tl);
2248 }
2249 }
2250 }
2251 }
2252 }
2253 }
2254}
2255
2256void imm16_assemble(int i,struct regstat *i_regs)
2257{
2258 if (opcode[i]==0x0f) { // LUI
2259 if(rt1[i]) {
2260 signed char t;
2261 t=get_reg(i_regs->regmap,rt1[i]);
2262 //assert(t>=0);
2263 if(t>=0) {
2264 if(!((i_regs->isconst>>t)&1))
2265 emit_movimm(imm[i]<<16,t);
2266 }
2267 }
2268 }
2269 if(opcode[i]==0x08||opcode[i]==0x09) { // ADDI/ADDIU
2270 if(rt1[i]) {
2271 signed char s,t;
2272 t=get_reg(i_regs->regmap,rt1[i]);
2273 s=get_reg(i_regs->regmap,rs1[i]);
2274 if(rs1[i]) {
2275 //assert(t>=0);
2276 //assert(s>=0);
2277 if(t>=0) {
2278 if(!((i_regs->isconst>>t)&1)) {
2279 if(s<0) {
2280 if(i_regs->regmap_entry[t]!=rs1[i]) emit_loadreg(rs1[i],t);
2281 emit_addimm(t,imm[i],t);
2282 }else{
2283 if(!((i_regs->wasconst>>s)&1))
2284 emit_addimm(s,imm[i],t);
2285 else
2286 emit_movimm(constmap[i][s]+imm[i],t);
2287 }
2288 }
2289 }
2290 } else {
2291 if(t>=0) {
2292 if(!((i_regs->isconst>>t)&1))
2293 emit_movimm(imm[i],t);
2294 }
2295 }
2296 }
2297 }
2298 if(opcode[i]==0x18||opcode[i]==0x19) { // DADDI/DADDIU
2299 if(rt1[i]) {
2300 signed char sh,sl,th,tl;
2301 th=get_reg(i_regs->regmap,rt1[i]|64);
2302 tl=get_reg(i_regs->regmap,rt1[i]);
2303 sh=get_reg(i_regs->regmap,rs1[i]|64);
2304 sl=get_reg(i_regs->regmap,rs1[i]);
2305 if(tl>=0) {
2306 if(rs1[i]) {
2307 assert(sh>=0);
2308 assert(sl>=0);
2309 if(th>=0) {
2310 emit_addimm64_32(sh,sl,imm[i],th,tl);
2311 }
2312 else {
2313 emit_addimm(sl,imm[i],tl);
2314 }
2315 } else {
2316 emit_movimm(imm[i],tl);
2317 if(th>=0) emit_movimm(((signed int)imm[i])>>31,th);
2318 }
2319 }
2320 }
2321 }
2322 else if(opcode[i]==0x0a||opcode[i]==0x0b) { // SLTI/SLTIU
2323 if(rt1[i]) {
2324 //assert(rs1[i]!=0); // r0 might be valid, but it's probably a bug
2325 signed char sh,sl,t;
2326 t=get_reg(i_regs->regmap,rt1[i]);
2327 sh=get_reg(i_regs->regmap,rs1[i]|64);
2328 sl=get_reg(i_regs->regmap,rs1[i]);
2329 //assert(t>=0);
2330 if(t>=0) {
2331 if(rs1[i]>0) {
2332 if(sh<0) assert((i_regs->was32>>rs1[i])&1);
2333 if(sh<0||((i_regs->was32>>rs1[i])&1)) {
2334 if(opcode[i]==0x0a) { // SLTI
2335 if(sl<0) {
2336 if(i_regs->regmap_entry[t]!=rs1[i]) emit_loadreg(rs1[i],t);
2337 emit_slti32(t,imm[i],t);
2338 }else{
2339 emit_slti32(sl,imm[i],t);
2340 }
2341 }
2342 else { // SLTIU
2343 if(sl<0) {
2344 if(i_regs->regmap_entry[t]!=rs1[i]) emit_loadreg(rs1[i],t);
2345 emit_sltiu32(t,imm[i],t);
2346 }else{
2347 emit_sltiu32(sl,imm[i],t);
2348 }
2349 }
2350 }else{ // 64-bit
2351 assert(sl>=0);
2352 if(opcode[i]==0x0a) // SLTI
2353 emit_slti64_32(sh,sl,imm[i],t);
2354 else // SLTIU
2355 emit_sltiu64_32(sh,sl,imm[i],t);
2356 }
2357 }else{
2358 // SLTI(U) with r0 is just stupid,
2359 // nonetheless examples can be found
2360 if(opcode[i]==0x0a) // SLTI
2361 if(0<imm[i]) emit_movimm(1,t);
2362 else emit_zeroreg(t);
2363 else // SLTIU
2364 {
2365 if(imm[i]) emit_movimm(1,t);
2366 else emit_zeroreg(t);
2367 }
2368 }
2369 }
2370 }
2371 }
2372 else if(opcode[i]>=0x0c&&opcode[i]<=0x0e) { // ANDI/ORI/XORI
2373 if(rt1[i]) {
2374 signed char sh,sl,th,tl;
2375 th=get_reg(i_regs->regmap,rt1[i]|64);
2376 tl=get_reg(i_regs->regmap,rt1[i]);
2377 sh=get_reg(i_regs->regmap,rs1[i]|64);
2378 sl=get_reg(i_regs->regmap,rs1[i]);
2379 if(tl>=0 && !((i_regs->isconst>>tl)&1)) {
2380 if(opcode[i]==0x0c) //ANDI
2381 {
2382 if(rs1[i]) {
2383 if(sl<0) {
2384 if(i_regs->regmap_entry[tl]!=rs1[i]) emit_loadreg(rs1[i],tl);
2385 emit_andimm(tl,imm[i],tl);
2386 }else{
2387 if(!((i_regs->wasconst>>sl)&1))
2388 emit_andimm(sl,imm[i],tl);
2389 else
2390 emit_movimm(constmap[i][sl]&imm[i],tl);
2391 }
2392 }
2393 else
2394 emit_zeroreg(tl);
2395 if(th>=0) emit_zeroreg(th);
2396 }
2397 else
2398 {
2399 if(rs1[i]) {
2400 if(sl<0) {
2401 if(i_regs->regmap_entry[tl]!=rs1[i]) emit_loadreg(rs1[i],tl);
2402 }
2403 if(th>=0) {
2404 if(sh<0) {
2405 emit_loadreg(rs1[i]|64,th);
2406 }else{
2407 emit_mov(sh,th);
2408 }
2409 }
581335b0 2410 if(opcode[i]==0x0d) { // ORI
2411 if(sl<0) {
2412 emit_orimm(tl,imm[i],tl);
2413 }else{
2414 if(!((i_regs->wasconst>>sl)&1))
2415 emit_orimm(sl,imm[i],tl);
2416 else
2417 emit_movimm(constmap[i][sl]|imm[i],tl);
2418 }
57871462 2419 }
581335b0 2420 if(opcode[i]==0x0e) { // XORI
2421 if(sl<0) {
2422 emit_xorimm(tl,imm[i],tl);
2423 }else{
2424 if(!((i_regs->wasconst>>sl)&1))
2425 emit_xorimm(sl,imm[i],tl);
2426 else
2427 emit_movimm(constmap[i][sl]^imm[i],tl);
2428 }
57871462 2429 }
2430 }
2431 else {
2432 emit_movimm(imm[i],tl);
2433 if(th>=0) emit_zeroreg(th);
2434 }
2435 }
2436 }
2437 }
2438 }
2439}
2440
2441void shiftimm_assemble(int i,struct regstat *i_regs)
2442{
2443 if(opcode2[i]<=0x3) // SLL/SRL/SRA
2444 {
2445 if(rt1[i]) {
2446 signed char s,t;
2447 t=get_reg(i_regs->regmap,rt1[i]);
2448 s=get_reg(i_regs->regmap,rs1[i]);
2449 //assert(t>=0);
dc49e339 2450 if(t>=0&&!((i_regs->isconst>>t)&1)){
57871462 2451 if(rs1[i]==0)
2452 {
2453 emit_zeroreg(t);
2454 }
2455 else
2456 {
2457 if(s<0&&i_regs->regmap_entry[t]!=rs1[i]) emit_loadreg(rs1[i],t);
2458 if(imm[i]) {
2459 if(opcode2[i]==0) // SLL
2460 {
2461 emit_shlimm(s<0?t:s,imm[i],t);
2462 }
2463 if(opcode2[i]==2) // SRL
2464 {
2465 emit_shrimm(s<0?t:s,imm[i],t);
2466 }
2467 if(opcode2[i]==3) // SRA
2468 {
2469 emit_sarimm(s<0?t:s,imm[i],t);
2470 }
2471 }else{
2472 // Shift by zero
2473 if(s>=0 && s!=t) emit_mov(s,t);
2474 }
2475 }
2476 }
2477 //emit_storereg(rt1[i],t); //DEBUG
2478 }
2479 }
2480 if(opcode2[i]>=0x38&&opcode2[i]<=0x3b) // DSLL/DSRL/DSRA
2481 {
2482 if(rt1[i]) {
2483 signed char sh,sl,th,tl;
2484 th=get_reg(i_regs->regmap,rt1[i]|64);
2485 tl=get_reg(i_regs->regmap,rt1[i]);
2486 sh=get_reg(i_regs->regmap,rs1[i]|64);
2487 sl=get_reg(i_regs->regmap,rs1[i]);
2488 if(tl>=0) {
2489 if(rs1[i]==0)
2490 {
2491 emit_zeroreg(tl);
2492 if(th>=0) emit_zeroreg(th);
2493 }
2494 else
2495 {
2496 assert(sl>=0);
2497 assert(sh>=0);
2498 if(imm[i]) {
2499 if(opcode2[i]==0x38) // DSLL
2500 {
2501 if(th>=0) emit_shldimm(sh,sl,imm[i],th);
2502 emit_shlimm(sl,imm[i],tl);
2503 }
2504 if(opcode2[i]==0x3a) // DSRL
2505 {
2506 emit_shrdimm(sl,sh,imm[i],tl);
2507 if(th>=0) emit_shrimm(sh,imm[i],th);
2508 }
2509 if(opcode2[i]==0x3b) // DSRA
2510 {
2511 emit_shrdimm(sl,sh,imm[i],tl);
2512 if(th>=0) emit_sarimm(sh,imm[i],th);
2513 }
2514 }else{
2515 // Shift by zero
2516 if(sl!=tl) emit_mov(sl,tl);
2517 if(th>=0&&sh!=th) emit_mov(sh,th);
2518 }
2519 }
2520 }
2521 }
2522 }
2523 if(opcode2[i]==0x3c) // DSLL32
2524 {
2525 if(rt1[i]) {
2526 signed char sl,tl,th;
2527 tl=get_reg(i_regs->regmap,rt1[i]);
2528 th=get_reg(i_regs->regmap,rt1[i]|64);
2529 sl=get_reg(i_regs->regmap,rs1[i]);
2530 if(th>=0||tl>=0){
2531 assert(tl>=0);
2532 assert(th>=0);
2533 assert(sl>=0);
2534 emit_mov(sl,th);
2535 emit_zeroreg(tl);
2536 if(imm[i]>32)
2537 {
2538 emit_shlimm(th,imm[i]&31,th);
2539 }
2540 }
2541 }
2542 }
2543 if(opcode2[i]==0x3e) // DSRL32
2544 {
2545 if(rt1[i]) {
2546 signed char sh,tl,th;
2547 tl=get_reg(i_regs->regmap,rt1[i]);
2548 th=get_reg(i_regs->regmap,rt1[i]|64);
2549 sh=get_reg(i_regs->regmap,rs1[i]|64);
2550 if(tl>=0){
2551 assert(sh>=0);
2552 emit_mov(sh,tl);
2553 if(th>=0) emit_zeroreg(th);
2554 if(imm[i]>32)
2555 {
2556 emit_shrimm(tl,imm[i]&31,tl);
2557 }
2558 }
2559 }
2560 }
2561 if(opcode2[i]==0x3f) // DSRA32
2562 {
2563 if(rt1[i]) {
2564 signed char sh,tl;
2565 tl=get_reg(i_regs->regmap,rt1[i]);
2566 sh=get_reg(i_regs->regmap,rs1[i]|64);
2567 if(tl>=0){
2568 assert(sh>=0);
2569 emit_mov(sh,tl);
2570 if(imm[i]>32)
2571 {
2572 emit_sarimm(tl,imm[i]&31,tl);
2573 }
2574 }
2575 }
2576 }
2577}
2578
2579#ifndef shift_assemble
2580void shift_assemble(int i,struct regstat *i_regs)
2581{
2582 printf("Need shift_assemble for this architecture.\n");
2583 exit(1);
2584}
2585#endif
2586
2587void load_assemble(int i,struct regstat *i_regs)
2588{
2589 int s,th,tl,addr,map=-1;
2590 int offset;
b14b6a8f 2591 void *jaddr=0;
5bf843dc 2592 int memtarget=0,c=0;
b1570849 2593 int fastload_reg_override=0;
57871462 2594 u_int hr,reglist=0;
2595 th=get_reg(i_regs->regmap,rt1[i]|64);
2596 tl=get_reg(i_regs->regmap,rt1[i]);
2597 s=get_reg(i_regs->regmap,rs1[i]);
2598 offset=imm[i];
2599 for(hr=0;hr<HOST_REGS;hr++) {
2600 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
2601 }
2602 if(i_regs->regmap[HOST_CCREG]==CCREG) reglist&=~(1<<HOST_CCREG);
2603 if(s>=0) {
2604 c=(i_regs->wasconst>>s)&1;
af4ee1fe 2605 if (c) {
2606 memtarget=((signed int)(constmap[i][s]+offset))<(signed int)0x80000000+RAM_SIZE;
af4ee1fe 2607 }
57871462 2608 }
57871462 2609 //printf("load_assemble: c=%d\n",c);
2610 //if(c) printf("load_assemble: const=%x\n",(int)constmap[i][s]+offset);
2611 // FIXME: Even if the load is a NOP, we should check for pagefaults...
581335b0 2612 if((tl<0&&(!c||(((u_int)constmap[i][s]+offset)>>16)==0x1f80))
f18c0f46 2613 ||rt1[i]==0) {
5bf843dc 2614 // could be FIFO, must perform the read
f18c0f46 2615 // ||dummy read
5bf843dc 2616 assem_debug("(forced read)\n");
2617 tl=get_reg(i_regs->regmap,-1);
2618 assert(tl>=0);
5bf843dc 2619 }
2620 if(offset||s<0||c) addr=tl;
2621 else addr=s;
535d208a 2622 //if(tl<0) tl=get_reg(i_regs->regmap,-1);
2623 if(tl>=0) {
2624 //printf("load_assemble: c=%d\n",c);
2625 //if(c) printf("load_assemble: const=%x\n",(int)constmap[i][s]+offset);
2626 assert(tl>=0); // Even if the load is a NOP, we must check for pagefaults and I/O
2627 reglist&=~(1<<tl);
2628 if(th>=0) reglist&=~(1<<th);
1edfcc68 2629 if(!c) {
2630 #ifdef RAM_OFFSET
2631 map=get_reg(i_regs->regmap,ROREG);
2632 if(map<0) emit_loadreg(ROREG,map=HOST_TEMPREG);
2633 #endif
2634 #ifdef R29_HACK
2635 // Strmnnrmn's speed hack
2636 if(rs1[i]!=29||start<0x80001000||start>=0x80000000+RAM_SIZE)
2637 #endif
2638 {
2639 jaddr=emit_fastpath_cmp_jump(i,addr,&fastload_reg_override);
535d208a 2640 }
1edfcc68 2641 }
2642 else if(ram_offset&&memtarget) {
2643 emit_addimm(addr,ram_offset,HOST_TEMPREG);
2644 fastload_reg_override=HOST_TEMPREG;
535d208a 2645 }
2646 int dummy=(rt1[i]==0)||(tl!=get_reg(i_regs->regmap,rt1[i])); // ignore loads to r0 and unneeded reg
2647 if (opcode[i]==0x20) { // LB
2648 if(!c||memtarget) {
2649 if(!dummy) {
57871462 2650 #ifdef HOST_IMM_ADDR32
2651 if(c)
2652 emit_movsbl_tlb((constmap[i][s]+offset)^3,map,tl);
2653 else
2654 #endif
2655 {
2656 //emit_xorimm(addr,3,tl);
57871462 2657 //emit_movsbl_indexed((int)rdram-0x80000000,tl,tl);
535d208a 2658 int x=0,a=tl;
2002a1db 2659#ifdef BIG_ENDIAN_MIPS
57871462 2660 if(!c) emit_xorimm(addr,3,tl);
2661 else x=((constmap[i][s]+offset)^3)-(constmap[i][s]+offset);
2002a1db 2662#else
535d208a 2663 if(!c) a=addr;
dadf55f2 2664#endif
b1570849 2665 if(fastload_reg_override) a=fastload_reg_override;
2666
535d208a 2667 emit_movsbl_indexed_tlb(x,a,map,tl);
57871462 2668 }
57871462 2669 }
535d208a 2670 if(jaddr)
b14b6a8f 2671 add_stub_r(LOADB_STUB,jaddr,out,i,addr,i_regs,ccadj[i],reglist);
57871462 2672 }
535d208a 2673 else
2674 inline_readstub(LOADB_STUB,i,constmap[i][s]+offset,i_regs->regmap,rt1[i],ccadj[i],reglist);
2675 }
2676 if (opcode[i]==0x21) { // LH
2677 if(!c||memtarget) {
2678 if(!dummy) {
57871462 2679 #ifdef HOST_IMM_ADDR32
2680 if(c)
2681 emit_movswl_tlb((constmap[i][s]+offset)^2,map,tl);
2682 else
2683 #endif
2684 {
535d208a 2685 int x=0,a=tl;
2002a1db 2686#ifdef BIG_ENDIAN_MIPS
57871462 2687 if(!c) emit_xorimm(addr,2,tl);
2688 else x=((constmap[i][s]+offset)^2)-(constmap[i][s]+offset);
2002a1db 2689#else
535d208a 2690 if(!c) a=addr;
dadf55f2 2691#endif
b1570849 2692 if(fastload_reg_override) a=fastload_reg_override;
57871462 2693 //#ifdef
2694 //emit_movswl_indexed_tlb(x,tl,map,tl);
2695 //else
2696 if(map>=0) {
535d208a 2697 emit_movswl_indexed(x,a,tl);
2698 }else{
a327ad27 2699 #if 1 //def RAM_OFFSET
535d208a 2700 emit_movswl_indexed(x,a,tl);
2701 #else
2702 emit_movswl_indexed((int)rdram-0x80000000+x,a,tl);
2703 #endif
2704 }
57871462 2705 }
57871462 2706 }
535d208a 2707 if(jaddr)
b14b6a8f 2708 add_stub_r(LOADH_STUB,jaddr,out,i,addr,i_regs,ccadj[i],reglist);
57871462 2709 }
535d208a 2710 else
2711 inline_readstub(LOADH_STUB,i,constmap[i][s]+offset,i_regs->regmap,rt1[i],ccadj[i],reglist);
2712 }
2713 if (opcode[i]==0x23) { // LW
2714 if(!c||memtarget) {
2715 if(!dummy) {
dadf55f2 2716 int a=addr;
b1570849 2717 if(fastload_reg_override) a=fastload_reg_override;
57871462 2718 //emit_readword_indexed((int)rdram-0x80000000,addr,tl);
2719 #ifdef HOST_IMM_ADDR32
2720 if(c)
2721 emit_readword_tlb(constmap[i][s]+offset,map,tl);
2722 else
2723 #endif
dadf55f2 2724 emit_readword_indexed_tlb(0,a,map,tl);
57871462 2725 }
535d208a 2726 if(jaddr)
b14b6a8f 2727 add_stub_r(LOADW_STUB,jaddr,out,i,addr,i_regs,ccadj[i],reglist);
57871462 2728 }
535d208a 2729 else
2730 inline_readstub(LOADW_STUB,i,constmap[i][s]+offset,i_regs->regmap,rt1[i],ccadj[i],reglist);
2731 }
2732 if (opcode[i]==0x24) { // LBU
2733 if(!c||memtarget) {
2734 if(!dummy) {
57871462 2735 #ifdef HOST_IMM_ADDR32
2736 if(c)
2737 emit_movzbl_tlb((constmap[i][s]+offset)^3,map,tl);
2738 else
2739 #endif
2740 {
2741 //emit_xorimm(addr,3,tl);
57871462 2742 //emit_movzbl_indexed((int)rdram-0x80000000,tl,tl);
535d208a 2743 int x=0,a=tl;
2002a1db 2744#ifdef BIG_ENDIAN_MIPS
57871462 2745 if(!c) emit_xorimm(addr,3,tl);
2746 else x=((constmap[i][s]+offset)^3)-(constmap[i][s]+offset);
2002a1db 2747#else
535d208a 2748 if(!c) a=addr;
dadf55f2 2749#endif
b1570849 2750 if(fastload_reg_override) a=fastload_reg_override;
2751
535d208a 2752 emit_movzbl_indexed_tlb(x,a,map,tl);
57871462 2753 }
57871462 2754 }
535d208a 2755 if(jaddr)
b14b6a8f 2756 add_stub_r(LOADBU_STUB,jaddr,out,i,addr,i_regs,ccadj[i],reglist);
57871462 2757 }
535d208a 2758 else
2759 inline_readstub(LOADBU_STUB,i,constmap[i][s]+offset,i_regs->regmap,rt1[i],ccadj[i],reglist);
2760 }
2761 if (opcode[i]==0x25) { // LHU
2762 if(!c||memtarget) {
2763 if(!dummy) {
57871462 2764 #ifdef HOST_IMM_ADDR32
2765 if(c)
2766 emit_movzwl_tlb((constmap[i][s]+offset)^2,map,tl);
2767 else
2768 #endif
2769 {
535d208a 2770 int x=0,a=tl;
2002a1db 2771#ifdef BIG_ENDIAN_MIPS
57871462 2772 if(!c) emit_xorimm(addr,2,tl);
2773 else x=((constmap[i][s]+offset)^2)-(constmap[i][s]+offset);
2002a1db 2774#else
535d208a 2775 if(!c) a=addr;
dadf55f2 2776#endif
b1570849 2777 if(fastload_reg_override) a=fastload_reg_override;
57871462 2778 //#ifdef
2779 //emit_movzwl_indexed_tlb(x,tl,map,tl);
2780 //#else
2781 if(map>=0) {
535d208a 2782 emit_movzwl_indexed(x,a,tl);
2783 }else{
a327ad27 2784 #if 1 //def RAM_OFFSET
535d208a 2785 emit_movzwl_indexed(x,a,tl);
2786 #else
2787 emit_movzwl_indexed((int)rdram-0x80000000+x,a,tl);
2788 #endif
2789 }
57871462 2790 }
2791 }
535d208a 2792 if(jaddr)
b14b6a8f 2793 add_stub_r(LOADHU_STUB,jaddr,out,i,addr,i_regs,ccadj[i],reglist);
57871462 2794 }
535d208a 2795 else
2796 inline_readstub(LOADHU_STUB,i,constmap[i][s]+offset,i_regs->regmap,rt1[i],ccadj[i],reglist);
2797 }
2798 if (opcode[i]==0x27) { // LWU
2799 assert(th>=0);
2800 if(!c||memtarget) {
2801 if(!dummy) {
dadf55f2 2802 int a=addr;
b1570849 2803 if(fastload_reg_override) a=fastload_reg_override;
57871462 2804 //emit_readword_indexed((int)rdram-0x80000000,addr,tl);
2805 #ifdef HOST_IMM_ADDR32
2806 if(c)
2807 emit_readword_tlb(constmap[i][s]+offset,map,tl);
2808 else
2809 #endif
dadf55f2 2810 emit_readword_indexed_tlb(0,a,map,tl);
57871462 2811 }
535d208a 2812 if(jaddr)
b14b6a8f 2813 add_stub_r(LOADW_STUB,jaddr,out,i,addr,i_regs,ccadj[i],reglist);
535d208a 2814 }
2815 else {
2816 inline_readstub(LOADW_STUB,i,constmap[i][s]+offset,i_regs->regmap,rt1[i],ccadj[i],reglist);
57871462 2817 }
535d208a 2818 emit_zeroreg(th);
2819 }
2820 if (opcode[i]==0x37) { // LD
2821 if(!c||memtarget) {
2822 if(!dummy) {
dadf55f2 2823 int a=addr;
b1570849 2824 if(fastload_reg_override) a=fastload_reg_override;
57871462 2825 //if(th>=0) emit_readword_indexed((int)rdram-0x80000000,addr,th);
2826 //emit_readword_indexed((int)rdram-0x7FFFFFFC,addr,tl);
2827 #ifdef HOST_IMM_ADDR32
2828 if(c)
2829 emit_readdword_tlb(constmap[i][s]+offset,map,th,tl);
2830 else
2831 #endif
dadf55f2 2832 emit_readdword_indexed_tlb(0,a,map,th,tl);
57871462 2833 }
535d208a 2834 if(jaddr)
b14b6a8f 2835 add_stub_r(LOADD_STUB,jaddr,out,i,addr,i_regs,ccadj[i],reglist);
57871462 2836 }
535d208a 2837 else
2838 inline_readstub(LOADD_STUB,i,constmap[i][s]+offset,i_regs->regmap,rt1[i],ccadj[i],reglist);
57871462 2839 }
535d208a 2840 }
57871462 2841}
2842
2843#ifndef loadlr_assemble
2844void loadlr_assemble(int i,struct regstat *i_regs)
2845{
2846 printf("Need loadlr_assemble for this architecture.\n");
2847 exit(1);
2848}
2849#endif
2850
2851void store_assemble(int i,struct regstat *i_regs)
2852{
2853 int s,th,tl,map=-1;
2854 int addr,temp;
2855 int offset;
b14b6a8f 2856 void *jaddr=0;
2857 enum stub_type type;
666a299d 2858 int memtarget=0,c=0;
57871462 2859 int agr=AGEN1+(i&1);
b1570849 2860 int faststore_reg_override=0;
57871462 2861 u_int hr,reglist=0;
2862 th=get_reg(i_regs->regmap,rs2[i]|64);
2863 tl=get_reg(i_regs->regmap,rs2[i]);
2864 s=get_reg(i_regs->regmap,rs1[i]);
2865 temp=get_reg(i_regs->regmap,agr);
2866 if(temp<0) temp=get_reg(i_regs->regmap,-1);
2867 offset=imm[i];
2868 if(s>=0) {
2869 c=(i_regs->wasconst>>s)&1;
af4ee1fe 2870 if(c) {
2871 memtarget=((signed int)(constmap[i][s]+offset))<(signed int)0x80000000+RAM_SIZE;
af4ee1fe 2872 }
57871462 2873 }
2874 assert(tl>=0);
2875 assert(temp>=0);
2876 for(hr=0;hr<HOST_REGS;hr++) {
2877 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
2878 }
2879 if(i_regs->regmap[HOST_CCREG]==CCREG) reglist&=~(1<<HOST_CCREG);
2880 if(offset||s<0||c) addr=temp;
2881 else addr=s;
1edfcc68 2882 if(!c) {
2883 jaddr=emit_fastpath_cmp_jump(i,addr,&faststore_reg_override);
2884 }
2885 else if(ram_offset&&memtarget) {
2886 emit_addimm(addr,ram_offset,HOST_TEMPREG);
2887 faststore_reg_override=HOST_TEMPREG;
57871462 2888 }
2889
2890 if (opcode[i]==0x28) { // SB
2891 if(!c||memtarget) {
97a238a6 2892 int x=0,a=temp;
2002a1db 2893#ifdef BIG_ENDIAN_MIPS
57871462 2894 if(!c) emit_xorimm(addr,3,temp);
2895 else x=((constmap[i][s]+offset)^3)-(constmap[i][s]+offset);
2002a1db 2896#else
97a238a6 2897 if(!c) a=addr;
dadf55f2 2898#endif
b1570849 2899 if(faststore_reg_override) a=faststore_reg_override;
57871462 2900 //emit_writebyte_indexed(tl,(int)rdram-0x80000000,temp);
97a238a6 2901 emit_writebyte_indexed_tlb(tl,x,a,map,a);
57871462 2902 }
2903 type=STOREB_STUB;
2904 }
2905 if (opcode[i]==0x29) { // SH
2906 if(!c||memtarget) {
97a238a6 2907 int x=0,a=temp;
2002a1db 2908#ifdef BIG_ENDIAN_MIPS
57871462 2909 if(!c) emit_xorimm(addr,2,temp);
2910 else x=((constmap[i][s]+offset)^2)-(constmap[i][s]+offset);
2002a1db 2911#else
97a238a6 2912 if(!c) a=addr;
dadf55f2 2913#endif
b1570849 2914 if(faststore_reg_override) a=faststore_reg_override;
57871462 2915 //#ifdef
2916 //emit_writehword_indexed_tlb(tl,x,temp,map,temp);
2917 //#else
2918 if(map>=0) {
97a238a6 2919 emit_writehword_indexed(tl,x,a);
57871462 2920 }else
a327ad27 2921 //emit_writehword_indexed(tl,(int)rdram-0x80000000+x,a);
2922 emit_writehword_indexed(tl,x,a);
57871462 2923 }
2924 type=STOREH_STUB;
2925 }
2926 if (opcode[i]==0x2B) { // SW
dadf55f2 2927 if(!c||memtarget) {
2928 int a=addr;
b1570849 2929 if(faststore_reg_override) a=faststore_reg_override;
57871462 2930 //emit_writeword_indexed(tl,(int)rdram-0x80000000,addr);
dadf55f2 2931 emit_writeword_indexed_tlb(tl,0,a,map,temp);
2932 }
57871462 2933 type=STOREW_STUB;
2934 }
2935 if (opcode[i]==0x3F) { // SD
2936 if(!c||memtarget) {
dadf55f2 2937 int a=addr;
b1570849 2938 if(faststore_reg_override) a=faststore_reg_override;
57871462 2939 if(rs2[i]) {
2940 assert(th>=0);
2941 //emit_writeword_indexed(th,(int)rdram-0x80000000,addr);
2942 //emit_writeword_indexed(tl,(int)rdram-0x7FFFFFFC,addr);
dadf55f2 2943 emit_writedword_indexed_tlb(th,tl,0,a,map,temp);
57871462 2944 }else{
2945 // Store zero
2946 //emit_writeword_indexed(tl,(int)rdram-0x80000000,temp);
2947 //emit_writeword_indexed(tl,(int)rdram-0x7FFFFFFC,temp);
dadf55f2 2948 emit_writedword_indexed_tlb(tl,tl,0,a,map,temp);
57871462 2949 }
2950 }
2951 type=STORED_STUB;
2952 }
b96d3df7 2953 if(jaddr) {
2954 // PCSX store handlers don't check invcode again
2955 reglist|=1<<addr;
b14b6a8f 2956 add_stub_r(type,jaddr,out,i,addr,i_regs,ccadj[i],reglist);
b96d3df7 2957 jaddr=0;
2958 }
1edfcc68 2959 if(!(i_regs->waswritten&(1<<rs1[i]))&&!(new_dynarec_hacks&NDHACK_NO_SMC_CHECK)) {
57871462 2960 if(!c||memtarget) {
2961 #ifdef DESTRUCTIVE_SHIFT
2962 // The x86 shift operation is 'destructive'; it overwrites the
2963 // source register, so we need to make a copy first and use that.
2964 addr=temp;
2965 #endif
2966 #if defined(HOST_IMM8)
2967 int ir=get_reg(i_regs->regmap,INVCP);
2968 assert(ir>=0);
2969 emit_cmpmem_indexedsr12_reg(ir,addr,1);
2970 #else
2971 emit_cmpmem_indexedsr12_imm((int)invalid_code,addr,1);
2972 #endif
0bbd1454 2973 #if defined(HAVE_CONDITIONAL_CALL) && !defined(DESTRUCTIVE_SHIFT)
2974 emit_callne(invalidate_addr_reg[addr]);
2975 #else
b14b6a8f 2976 void *jaddr2 = out;
57871462 2977 emit_jne(0);
b14b6a8f 2978 add_stub(INVCODE_STUB,jaddr2,out,reglist|(1<<HOST_CCREG),addr,0,0,0);
0bbd1454 2979 #endif
57871462 2980 }
2981 }
7a518516 2982 u_int addr_val=constmap[i][s]+offset;
3eaa7048 2983 if(jaddr) {
b14b6a8f 2984 add_stub_r(type,jaddr,out,i,addr,i_regs,ccadj[i],reglist);
3eaa7048 2985 } else if(c&&!memtarget) {
7a518516 2986 inline_writestub(type,i,addr_val,i_regs->regmap,rs2[i],ccadj[i],reglist);
2987 }
2988 // basic current block modification detection..
2989 // not looking back as that should be in mips cache already
2990 if(c&&start+i*4<addr_val&&addr_val<start+slen*4) {
c43b5311 2991 SysPrintf("write to %08x hits block %08x, pc=%08x\n",addr_val,start,start+i*4);
7a518516 2992 assert(i_regs->regmap==regs[i].regmap); // not delay slot
2993 if(i_regs->regmap==regs[i].regmap) {
2994 load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty,i);
2995 wb_dirtys(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty);
2996 emit_movimm(start+i*4+4,0);
2997 emit_writeword(0,(int)&pcaddr);
b14b6a8f 2998 emit_jmp(do_interrupt);
7a518516 2999 }
3eaa7048 3000 }
57871462 3001}
3002
3003void storelr_assemble(int i,struct regstat *i_regs)
3004{
3005 int s,th,tl;
3006 int temp;
581335b0 3007 int temp2=-1;
57871462 3008 int offset;
b14b6a8f 3009 void *jaddr=0;
df4dc2b1 3010 void *case1, *case2, *case3;
3011 void *done0, *done1, *done2;
af4ee1fe 3012 int memtarget=0,c=0;
fab5d06d 3013 int agr=AGEN1+(i&1);
57871462 3014 u_int hr,reglist=0;
3015 th=get_reg(i_regs->regmap,rs2[i]|64);
3016 tl=get_reg(i_regs->regmap,rs2[i]);
3017 s=get_reg(i_regs->regmap,rs1[i]);
fab5d06d 3018 temp=get_reg(i_regs->regmap,agr);
3019 if(temp<0) temp=get_reg(i_regs->regmap,-1);
57871462 3020 offset=imm[i];
3021 if(s>=0) {
3022 c=(i_regs->isconst>>s)&1;
af4ee1fe 3023 if(c) {
3024 memtarget=((signed int)(constmap[i][s]+offset))<(signed int)0x80000000+RAM_SIZE;
af4ee1fe 3025 }
57871462 3026 }
3027 assert(tl>=0);
3028 for(hr=0;hr<HOST_REGS;hr++) {
3029 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
3030 }
535d208a 3031 assert(temp>=0);
1edfcc68 3032 if(!c) {
3033 emit_cmpimm(s<0||offset?temp:s,RAM_SIZE);
3034 if(!offset&&s!=temp) emit_mov(s,temp);
b14b6a8f 3035 jaddr=out;
1edfcc68 3036 emit_jno(0);
3037 }
3038 else
3039 {
3040 if(!memtarget||!rs1[i]) {
b14b6a8f 3041 jaddr=out;
535d208a 3042 emit_jmp(0);
57871462 3043 }
535d208a 3044 }
1edfcc68 3045 #ifdef RAM_OFFSET
3046 int map=get_reg(i_regs->regmap,ROREG);
3047 if(map<0) emit_loadreg(ROREG,map=HOST_TEMPREG);
3048 #else
9f51b4b9 3049 if((u_int)rdram!=0x80000000)
1edfcc68 3050 emit_addimm_no_flags((u_int)rdram-(u_int)0x80000000,temp);
3051 #endif
535d208a 3052
3053 if (opcode[i]==0x2C||opcode[i]==0x2D) { // SDL/SDR
3054 temp2=get_reg(i_regs->regmap,FTEMP);
3055 if(!rs2[i]) temp2=th=tl;
3056 }
57871462 3057
2002a1db 3058#ifndef BIG_ENDIAN_MIPS
3059 emit_xorimm(temp,3,temp);
3060#endif
535d208a 3061 emit_testimm(temp,2);
df4dc2b1 3062 case2=out;
535d208a 3063 emit_jne(0);
3064 emit_testimm(temp,1);
df4dc2b1 3065 case1=out;
535d208a 3066 emit_jne(0);
3067 // 0
3068 if (opcode[i]==0x2A) { // SWL
3069 emit_writeword_indexed(tl,0,temp);
3070 }
3071 if (opcode[i]==0x2E) { // SWR
3072 emit_writebyte_indexed(tl,3,temp);
3073 }
3074 if (opcode[i]==0x2C) { // SDL
3075 emit_writeword_indexed(th,0,temp);
3076 if(rs2[i]) emit_mov(tl,temp2);
3077 }
3078 if (opcode[i]==0x2D) { // SDR
3079 emit_writebyte_indexed(tl,3,temp);
3080 if(rs2[i]) emit_shldimm(th,tl,24,temp2);
3081 }
df4dc2b1 3082 done0=out;
535d208a 3083 emit_jmp(0);
3084 // 1
df4dc2b1 3085 set_jump_target(case1, out);
535d208a 3086 if (opcode[i]==0x2A) { // SWL
3087 // Write 3 msb into three least significant bytes
3088 if(rs2[i]) emit_rorimm(tl,8,tl);
3089 emit_writehword_indexed(tl,-1,temp);
3090 if(rs2[i]) emit_rorimm(tl,16,tl);
3091 emit_writebyte_indexed(tl,1,temp);
3092 if(rs2[i]) emit_rorimm(tl,8,tl);
3093 }
3094 if (opcode[i]==0x2E) { // SWR
3095 // Write two lsb into two most significant bytes
3096 emit_writehword_indexed(tl,1,temp);
3097 }
3098 if (opcode[i]==0x2C) { // SDL
3099 if(rs2[i]) emit_shrdimm(tl,th,8,temp2);
3100 // Write 3 msb into three least significant bytes
3101 if(rs2[i]) emit_rorimm(th,8,th);
3102 emit_writehword_indexed(th,-1,temp);
3103 if(rs2[i]) emit_rorimm(th,16,th);
3104 emit_writebyte_indexed(th,1,temp);
3105 if(rs2[i]) emit_rorimm(th,8,th);
3106 }
3107 if (opcode[i]==0x2D) { // SDR
3108 if(rs2[i]) emit_shldimm(th,tl,16,temp2);
3109 // Write two lsb into two most significant bytes
3110 emit_writehword_indexed(tl,1,temp);
3111 }
df4dc2b1 3112 done1=out;
535d208a 3113 emit_jmp(0);
3114 // 2
df4dc2b1 3115 set_jump_target(case2, out);
535d208a 3116 emit_testimm(temp,1);
df4dc2b1 3117 case3=out;
535d208a 3118 emit_jne(0);
3119 if (opcode[i]==0x2A) { // SWL
3120 // Write two msb into two least significant bytes
3121 if(rs2[i]) emit_rorimm(tl,16,tl);
3122 emit_writehword_indexed(tl,-2,temp);
3123 if(rs2[i]) emit_rorimm(tl,16,tl);
3124 }
3125 if (opcode[i]==0x2E) { // SWR
3126 // Write 3 lsb into three most significant bytes
3127 emit_writebyte_indexed(tl,-1,temp);
3128 if(rs2[i]) emit_rorimm(tl,8,tl);
3129 emit_writehword_indexed(tl,0,temp);
3130 if(rs2[i]) emit_rorimm(tl,24,tl);
3131 }
3132 if (opcode[i]==0x2C) { // SDL
3133 if(rs2[i]) emit_shrdimm(tl,th,16,temp2);
3134 // Write two msb into two least significant bytes
3135 if(rs2[i]) emit_rorimm(th,16,th);
3136 emit_writehword_indexed(th,-2,temp);
3137 if(rs2[i]) emit_rorimm(th,16,th);
3138 }
3139 if (opcode[i]==0x2D) { // SDR
3140 if(rs2[i]) emit_shldimm(th,tl,8,temp2);
3141 // Write 3 lsb into three most significant bytes
3142 emit_writebyte_indexed(tl,-1,temp);
3143 if(rs2[i]) emit_rorimm(tl,8,tl);
3144 emit_writehword_indexed(tl,0,temp);
3145 if(rs2[i]) emit_rorimm(tl,24,tl);
3146 }
df4dc2b1 3147 done2=out;
535d208a 3148 emit_jmp(0);
3149 // 3
df4dc2b1 3150 set_jump_target(case3, out);
535d208a 3151 if (opcode[i]==0x2A) { // SWL
3152 // Write msb into least significant byte
3153 if(rs2[i]) emit_rorimm(tl,24,tl);
3154 emit_writebyte_indexed(tl,-3,temp);
3155 if(rs2[i]) emit_rorimm(tl,8,tl);
3156 }
3157 if (opcode[i]==0x2E) { // SWR
3158 // Write entire word
3159 emit_writeword_indexed(tl,-3,temp);
3160 }
3161 if (opcode[i]==0x2C) { // SDL
3162 if(rs2[i]) emit_shrdimm(tl,th,24,temp2);
3163 // Write msb into least significant byte
3164 if(rs2[i]) emit_rorimm(th,24,th);
3165 emit_writebyte_indexed(th,-3,temp);
3166 if(rs2[i]) emit_rorimm(th,8,th);
3167 }
3168 if (opcode[i]==0x2D) { // SDR
3169 if(rs2[i]) emit_mov(th,temp2);
3170 // Write entire word
3171 emit_writeword_indexed(tl,-3,temp);
3172 }
df4dc2b1 3173 set_jump_target(done0, out);
3174 set_jump_target(done1, out);
3175 set_jump_target(done2, out);
535d208a 3176 if (opcode[i]==0x2C) { // SDL
3177 emit_testimm(temp,4);
df4dc2b1 3178 done0=out;
57871462 3179 emit_jne(0);
535d208a 3180 emit_andimm(temp,~3,temp);
3181 emit_writeword_indexed(temp2,4,temp);
df4dc2b1 3182 set_jump_target(done0, out);
535d208a 3183 }
3184 if (opcode[i]==0x2D) { // SDR
3185 emit_testimm(temp,4);
df4dc2b1