drc: remove unnecessary cache flushing
[pcsx_rearmed.git] / libpcsxcore / new_dynarec / new_dynarec.c
CommitLineData
57871462 1/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2 * Mupen64plus - new_dynarec.c *
20d507ba 3 * Copyright (C) 2009-2011 Ari64 *
57871462 4 * *
5 * This program is free software; you can redistribute it and/or modify *
6 * it under the terms of the GNU General Public License as published by *
7 * the Free Software Foundation; either version 2 of the License, or *
8 * (at your option) any later version. *
9 * *
10 * This program is distributed in the hope that it will be useful, *
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
13 * GNU General Public License for more details. *
14 * *
15 * You should have received a copy of the GNU General Public License *
16 * along with this program; if not, write to the *
17 * Free Software Foundation, Inc., *
18 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
19 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
20
21#include <stdlib.h>
22#include <stdint.h> //include for uint64_t
23#include <assert.h>
d848b60a 24#include <errno.h>
4600ba03 25#include <sys/mman.h>
57871462 26
3d624f89 27#include "emu_if.h" //emulator interface
57871462 28
4600ba03 29//#define DISASM
30//#define assem_debug printf
31//#define inv_debug printf
32#define assem_debug(...)
33#define inv_debug(...)
57871462 34
35#ifdef __i386__
36#include "assem_x86.h"
37#endif
38#ifdef __x86_64__
39#include "assem_x64.h"
40#endif
41#ifdef __arm__
42#include "assem_arm.h"
43#endif
44
f23d3386 45#ifdef __BLACKBERRY_QNX__
a4874585
C
46#undef __clear_cache
47#define __clear_cache(start,end) msync(start, (size_t)((void*)end - (void*)start), MS_SYNC | MS_CACHE_ONLY | MS_INVALIDATE_ICACHE);
c7b746f0 48#elif defined(__MACH__)
49#include <libkern/OSCacheControl.h>
50#define __clear_cache mach_clear_cache
51static void __clear_cache(void *start, void *end) {
52 size_t len = (char *)end - (char *)start;
53 sys_dcache_flush(start, len);
54 sys_icache_invalidate(start, len);
55}
f23d3386 56#endif
a4874585 57
57871462 58#define MAXBLOCK 4096
59#define MAX_OUTPUT_BLOCK_SIZE 262144
2573466a 60
57871462 61struct regstat
62{
63 signed char regmap_entry[HOST_REGS];
64 signed char regmap[HOST_REGS];
65 uint64_t was32;
66 uint64_t is32;
67 uint64_t wasdirty;
68 uint64_t dirty;
69 uint64_t u;
70 uint64_t uu;
71 u_int wasconst;
72 u_int isconst;
8575a877 73 u_int loadedconst; // host regs that have constants loaded
74 u_int waswritten; // MIPS regs that were used as store base before
57871462 75};
76
de5a60c3 77// note: asm depends on this layout
57871462 78struct ll_entry
79{
80 u_int vaddr;
de5a60c3 81 u_int reg_sv_flags;
57871462 82 void *addr;
83 struct ll_entry *next;
84};
85
e2b5e7aa 86 // used by asm:
87 u_char *out;
88 u_int hash_table[65536][4] __attribute__((aligned(16)));
89 struct ll_entry *jump_in[4096] __attribute__((aligned(16)));
90 struct ll_entry *jump_dirty[4096];
91
92 static struct ll_entry *jump_out[4096];
93 static u_int start;
94 static u_int *source;
95 static char insn[MAXBLOCK][10];
96 static u_char itype[MAXBLOCK];
97 static u_char opcode[MAXBLOCK];
98 static u_char opcode2[MAXBLOCK];
99 static u_char bt[MAXBLOCK];
100 static u_char rs1[MAXBLOCK];
101 static u_char rs2[MAXBLOCK];
102 static u_char rt1[MAXBLOCK];
103 static u_char rt2[MAXBLOCK];
104 static u_char us1[MAXBLOCK];
105 static u_char us2[MAXBLOCK];
106 static u_char dep1[MAXBLOCK];
107 static u_char dep2[MAXBLOCK];
108 static u_char lt1[MAXBLOCK];
bedfea38 109 static uint64_t gte_rs[MAXBLOCK]; // gte: 32 data and 32 ctl regs
110 static uint64_t gte_rt[MAXBLOCK];
111 static uint64_t gte_unneeded[MAXBLOCK];
ffb0b9e0 112 static u_int smrv[32]; // speculated MIPS register values
113 static u_int smrv_strong; // mask or regs that are likely to have correct values
114 static u_int smrv_weak; // same, but somewhat less likely
115 static u_int smrv_strong_next; // same, but after current insn executes
116 static u_int smrv_weak_next;
e2b5e7aa 117 static int imm[MAXBLOCK];
118 static u_int ba[MAXBLOCK];
119 static char likely[MAXBLOCK];
120 static char is_ds[MAXBLOCK];
121 static char ooo[MAXBLOCK];
122 static uint64_t unneeded_reg[MAXBLOCK];
123 static uint64_t unneeded_reg_upper[MAXBLOCK];
124 static uint64_t branch_unneeded_reg[MAXBLOCK];
125 static uint64_t branch_unneeded_reg_upper[MAXBLOCK];
126 static signed char regmap_pre[MAXBLOCK][HOST_REGS];
956f3129 127 static uint64_t current_constmap[HOST_REGS];
128 static uint64_t constmap[MAXBLOCK][HOST_REGS];
129 static struct regstat regs[MAXBLOCK];
130 static struct regstat branch_regs[MAXBLOCK];
e2b5e7aa 131 static signed char minimum_free_regs[MAXBLOCK];
132 static u_int needed_reg[MAXBLOCK];
133 static u_int wont_dirty[MAXBLOCK];
134 static u_int will_dirty[MAXBLOCK];
135 static int ccadj[MAXBLOCK];
136 static int slen;
137 static u_int instr_addr[MAXBLOCK];
138 static u_int link_addr[MAXBLOCK][3];
139 static int linkcount;
140 static u_int stubs[MAXBLOCK*3][8];
141 static int stubcount;
142 static u_int literals[1024][2];
143 static int literalcount;
144 static int is_delayslot;
145 static int cop1_usable;
146 static char shadow[1048576] __attribute__((aligned(16)));
147 static void *copy;
148 static int expirep;
149 static u_int stop_after_jal;
a327ad27 150#ifndef RAM_FIXED
151 static u_int ram_offset;
152#else
153 static const u_int ram_offset=0;
154#endif
e2b5e7aa 155
156 int new_dynarec_hacks;
157 int new_dynarec_did_compile;
57871462 158 extern u_char restore_candidate[512];
159 extern int cycle_count;
160
161 /* registers that may be allocated */
162 /* 1-31 gpr */
163#define HIREG 32 // hi
164#define LOREG 33 // lo
165#define FSREG 34 // FPU status (FCSR)
166#define CSREG 35 // Coprocessor status
167#define CCREG 36 // Cycle count
168#define INVCP 37 // Pointer to invalid_code
1edfcc68 169//#define MMREG 38 // Pointer to memory_map
619e5ded 170#define ROREG 39 // ram offset (if rdram!=0x80000000)
171#define TEMPREG 40
172#define FTEMP 40 // FPU temporary register
173#define PTEMP 41 // Prefetch temporary register
1edfcc68 174//#define TLREG 42 // TLB mapping offset
619e5ded 175#define RHASH 43 // Return address hash
176#define RHTBL 44 // Return address hash table address
177#define RTEMP 45 // JR/JALR address register
178#define MAXREG 45
179#define AGEN1 46 // Address generation temporary register
1edfcc68 180//#define AGEN2 47 // Address generation temporary register
181//#define MGEN1 48 // Maptable address generation temporary register
182//#define MGEN2 49 // Maptable address generation temporary register
619e5ded 183#define BTREG 50 // Branch target temporary register
57871462 184
185 /* instruction types */
186#define NOP 0 // No operation
187#define LOAD 1 // Load
188#define STORE 2 // Store
189#define LOADLR 3 // Unaligned load
190#define STORELR 4 // Unaligned store
9f51b4b9 191#define MOV 5 // Move
57871462 192#define ALU 6 // Arithmetic/logic
193#define MULTDIV 7 // Multiply/divide
194#define SHIFT 8 // Shift by register
195#define SHIFTIMM 9// Shift by immediate
196#define IMM16 10 // 16-bit immediate
197#define RJUMP 11 // Unconditional jump to register
198#define UJUMP 12 // Unconditional jump
199#define CJUMP 13 // Conditional branch (BEQ/BNE/BGTZ/BLEZ)
200#define SJUMP 14 // Conditional branch (regimm format)
201#define COP0 15 // Coprocessor 0
202#define COP1 16 // Coprocessor 1
203#define C1LS 17 // Coprocessor 1 load/store
204#define FJUMP 18 // Conditional branch (floating point)
205#define FLOAT 19 // Floating point unit
206#define FCONV 20 // Convert integer to float
207#define FCOMP 21 // Floating point compare (sets FSREG)
208#define SYSCALL 22// SYSCALL
209#define OTHER 23 // Other
210#define SPAN 24 // Branch/delay slot spans 2 pages
211#define NI 25 // Not implemented
7139f3c8 212#define HLECALL 26// PCSX fake opcodes for HLE
b9b61529 213#define COP2 27 // Coprocessor 2 move
214#define C2LS 28 // Coprocessor 2 load/store
215#define C2OP 29 // Coprocessor 2 operation
1e973cb0 216#define INTCALL 30// Call interpreter to handle rare corner cases
57871462 217
218 /* stubs */
219#define CC_STUB 1
220#define FP_STUB 2
221#define LOADB_STUB 3
222#define LOADH_STUB 4
223#define LOADW_STUB 5
224#define LOADD_STUB 6
225#define LOADBU_STUB 7
226#define LOADHU_STUB 8
227#define STOREB_STUB 9
228#define STOREH_STUB 10
229#define STOREW_STUB 11
230#define STORED_STUB 12
231#define STORELR_STUB 13
232#define INVCODE_STUB 14
233
234 /* branch codes */
235#define TAKEN 1
236#define NOTTAKEN 2
237#define NULLDS 3
238
239// asm linkage
240int new_recompile_block(int addr);
241void *get_addr_ht(u_int vaddr);
242void invalidate_block(u_int block);
243void invalidate_addr(u_int addr);
244void remove_hash(int vaddr);
57871462 245void dyna_linker();
246void dyna_linker_ds();
247void verify_code();
248void verify_code_vm();
249void verify_code_ds();
250void cc_interrupt();
251void fp_exception();
252void fp_exception_ds();
7139f3c8 253void jump_syscall_hle();
7139f3c8 254void jump_hlecall();
1e973cb0 255void jump_intcall();
7139f3c8 256void new_dyna_leave();
57871462 257
57871462 258// Needed by assembler
e2b5e7aa 259static void wb_register(signed char r,signed char regmap[],uint64_t dirty,uint64_t is32);
260static void wb_dirtys(signed char i_regmap[],uint64_t i_is32,uint64_t i_dirty);
261static void wb_needed_dirtys(signed char i_regmap[],uint64_t i_is32,uint64_t i_dirty,int addr);
262static void load_all_regs(signed char i_regmap[]);
263static void load_needed_regs(signed char i_regmap[],signed char next_regmap[]);
264static void load_regs_entry(int t);
265static void load_all_consts(signed char regmap[],int is32,u_int dirty,int i);
266
267static int verify_dirty(u_int *ptr);
268static int get_final_value(int hr, int i, int *value);
269static void add_stub(int type,int addr,int retaddr,int a,int b,int c,int d,int e);
270static void add_to_linker(int addr,int target,int ext);
57871462 271
e2b5e7aa 272static int tracedebug=0;
57871462 273
274//#define DEBUG_CYCLE_COUNT 1
275
b6e87b2b 276#define NO_CYCLE_PENALTY_THR 12
277
4e9dcd7f 278int cycle_multiplier; // 100 for 1.0
279
280static int CLOCK_ADJUST(int x)
281{
282 int s=(x>>31)|1;
283 return (x * cycle_multiplier + s * 50) / 100;
284}
285
94d23bb9 286static u_int get_page(u_int vaddr)
57871462 287{
0ce47d46 288 u_int page=vaddr&~0xe0000000;
289 if (page < 0x1000000)
290 page &= ~0x0e00000; // RAM mirrors
291 page>>=12;
57871462 292 if(page>2048) page=2048+(page&2047);
94d23bb9 293 return page;
294}
295
d25604ca 296// no virtual mem in PCSX
297static u_int get_vpage(u_int vaddr)
298{
299 return get_page(vaddr);
300}
94d23bb9 301
302// Get address from virtual address
303// This is called from the recompiled JR/JALR instructions
304void *get_addr(u_int vaddr)
305{
306 u_int page=get_page(vaddr);
307 u_int vpage=get_vpage(vaddr);
57871462 308 struct ll_entry *head;
309 //printf("TRACE: count=%d next=%d (get_addr %x,page %d)\n",Count,next_interupt,vaddr,page);
310 head=jump_in[page];
311 while(head!=NULL) {
de5a60c3 312 if(head->vaddr==vaddr) {
57871462 313 //printf("TRACE: count=%d next=%d (get_addr match %x: %x)\n",Count,next_interupt,vaddr,(int)head->addr);
581335b0 314 u_int *ht_bin=hash_table[((vaddr>>16)^vaddr)&0xFFFF];
57871462 315 ht_bin[3]=ht_bin[1];
316 ht_bin[2]=ht_bin[0];
581335b0 317 ht_bin[1]=(u_int)head->addr;
57871462 318 ht_bin[0]=vaddr;
319 return head->addr;
320 }
321 head=head->next;
322 }
323 head=jump_dirty[vpage];
324 while(head!=NULL) {
de5a60c3 325 if(head->vaddr==vaddr) {
57871462 326 //printf("TRACE: count=%d next=%d (get_addr match dirty %x: %x)\n",Count,next_interupt,vaddr,(int)head->addr);
327 // Don't restore blocks which are about to expire from the cache
328 if((((u_int)head->addr-(u_int)out)<<(32-TARGET_SIZE_2))>0x60000000+(MAX_OUTPUT_BLOCK_SIZE<<(32-TARGET_SIZE_2)))
329 if(verify_dirty(head->addr)) {
330 //printf("restore candidate: %x (%d) d=%d\n",vaddr,page,invalid_code[vaddr>>12]);
331 invalid_code[vaddr>>12]=0;
9be4ba64 332 inv_code_start=inv_code_end=~0;
57871462 333 if(vpage<2048) {
57871462 334 restore_candidate[vpage>>3]|=1<<(vpage&7);
335 }
336 else restore_candidate[page>>3]|=1<<(page&7);
581335b0 337 u_int *ht_bin=hash_table[((vaddr>>16)^vaddr)&0xFFFF];
57871462 338 if(ht_bin[0]==vaddr) {
581335b0 339 ht_bin[1]=(u_int)head->addr; // Replace existing entry
57871462 340 }
341 else
342 {
343 ht_bin[3]=ht_bin[1];
344 ht_bin[2]=ht_bin[0];
345 ht_bin[1]=(int)head->addr;
346 ht_bin[0]=vaddr;
347 }
348 return head->addr;
349 }
350 }
351 head=head->next;
352 }
353 //printf("TRACE: count=%d next=%d (get_addr no-match %x)\n",Count,next_interupt,vaddr);
354 int r=new_recompile_block(vaddr);
355 if(r==0) return get_addr(vaddr);
356 // Execute in unmapped page, generate pagefault execption
357 Status|=2;
358 Cause=(vaddr<<31)|0x8;
359 EPC=(vaddr&1)?vaddr-5:vaddr;
360 BadVAddr=(vaddr&~1);
361 Context=(Context&0xFF80000F)|((BadVAddr>>9)&0x007FFFF0);
362 EntryHi=BadVAddr&0xFFFFE000;
363 return get_addr_ht(0x80000000);
364}
365// Look up address in hash table first
366void *get_addr_ht(u_int vaddr)
367{
368 //printf("TRACE: count=%d next=%d (get_addr_ht %x)\n",Count,next_interupt,vaddr);
581335b0 369 u_int *ht_bin=hash_table[((vaddr>>16)^vaddr)&0xFFFF];
57871462 370 if(ht_bin[0]==vaddr) return (void *)ht_bin[1];
371 if(ht_bin[2]==vaddr) return (void *)ht_bin[3];
372 return get_addr(vaddr);
373}
374
57871462 375void clear_all_regs(signed char regmap[])
376{
377 int hr;
378 for (hr=0;hr<HOST_REGS;hr++) regmap[hr]=-1;
379}
380
381signed char get_reg(signed char regmap[],int r)
382{
383 int hr;
384 for (hr=0;hr<HOST_REGS;hr++) if(hr!=EXCLUDE_REG&&regmap[hr]==r) return hr;
385 return -1;
386}
387
388// Find a register that is available for two consecutive cycles
389signed char get_reg2(signed char regmap1[],signed char regmap2[],int r)
390{
391 int hr;
392 for (hr=0;hr<HOST_REGS;hr++) if(hr!=EXCLUDE_REG&&regmap1[hr]==r&&regmap2[hr]==r) return hr;
393 return -1;
394}
395
396int count_free_regs(signed char regmap[])
397{
398 int count=0;
399 int hr;
400 for(hr=0;hr<HOST_REGS;hr++)
401 {
402 if(hr!=EXCLUDE_REG) {
403 if(regmap[hr]<0) count++;
404 }
405 }
406 return count;
407}
408
409void dirty_reg(struct regstat *cur,signed char reg)
410{
411 int hr;
412 if(!reg) return;
413 for (hr=0;hr<HOST_REGS;hr++) {
414 if((cur->regmap[hr]&63)==reg) {
415 cur->dirty|=1<<hr;
416 }
417 }
418}
419
420// If we dirty the lower half of a 64 bit register which is now being
421// sign-extended, we need to dump the upper half.
422// Note: Do this only after completion of the instruction, because
423// some instructions may need to read the full 64-bit value even if
424// overwriting it (eg SLTI, DSRA32).
425static void flush_dirty_uppers(struct regstat *cur)
426{
427 int hr,reg;
428 for (hr=0;hr<HOST_REGS;hr++) {
429 if((cur->dirty>>hr)&1) {
430 reg=cur->regmap[hr];
9f51b4b9 431 if(reg>=64)
57871462 432 if((cur->is32>>(reg&63))&1) cur->regmap[hr]=-1;
433 }
434 }
435}
436
437void set_const(struct regstat *cur,signed char reg,uint64_t value)
438{
439 int hr;
440 if(!reg) return;
441 for (hr=0;hr<HOST_REGS;hr++) {
442 if(cur->regmap[hr]==reg) {
443 cur->isconst|=1<<hr;
956f3129 444 current_constmap[hr]=value;
57871462 445 }
446 else if((cur->regmap[hr]^64)==reg) {
447 cur->isconst|=1<<hr;
956f3129 448 current_constmap[hr]=value>>32;
57871462 449 }
450 }
451}
452
453void clear_const(struct regstat *cur,signed char reg)
454{
455 int hr;
456 if(!reg) return;
457 for (hr=0;hr<HOST_REGS;hr++) {
458 if((cur->regmap[hr]&63)==reg) {
459 cur->isconst&=~(1<<hr);
460 }
461 }
462}
463
464int is_const(struct regstat *cur,signed char reg)
465{
466 int hr;
79c75f1b 467 if(reg<0) return 0;
57871462 468 if(!reg) return 1;
469 for (hr=0;hr<HOST_REGS;hr++) {
470 if((cur->regmap[hr]&63)==reg) {
471 return (cur->isconst>>hr)&1;
472 }
473 }
474 return 0;
475}
476uint64_t get_const(struct regstat *cur,signed char reg)
477{
478 int hr;
479 if(!reg) return 0;
480 for (hr=0;hr<HOST_REGS;hr++) {
481 if(cur->regmap[hr]==reg) {
956f3129 482 return current_constmap[hr];
57871462 483 }
484 }
c43b5311 485 SysPrintf("Unknown constant in r%d\n",reg);
57871462 486 exit(1);
487}
488
489// Least soon needed registers
490// Look at the next ten instructions and see which registers
491// will be used. Try not to reallocate these.
492void lsn(u_char hsn[], int i, int *preferred_reg)
493{
494 int j;
495 int b=-1;
496 for(j=0;j<9;j++)
497 {
498 if(i+j>=slen) {
499 j=slen-i-1;
500 break;
501 }
502 if(itype[i+j]==UJUMP||itype[i+j]==RJUMP||(source[i+j]>>16)==0x1000)
503 {
504 // Don't go past an unconditonal jump
505 j++;
506 break;
507 }
508 }
509 for(;j>=0;j--)
510 {
511 if(rs1[i+j]) hsn[rs1[i+j]]=j;
512 if(rs2[i+j]) hsn[rs2[i+j]]=j;
513 if(rt1[i+j]) hsn[rt1[i+j]]=j;
514 if(rt2[i+j]) hsn[rt2[i+j]]=j;
515 if(itype[i+j]==STORE || itype[i+j]==STORELR) {
516 // Stores can allocate zero
517 hsn[rs1[i+j]]=j;
518 hsn[rs2[i+j]]=j;
519 }
520 // On some architectures stores need invc_ptr
521 #if defined(HOST_IMM8)
b9b61529 522 if(itype[i+j]==STORE || itype[i+j]==STORELR || (opcode[i+j]&0x3b)==0x39 || (opcode[i+j]&0x3b)==0x3a) {
57871462 523 hsn[INVCP]=j;
524 }
525 #endif
526 if(i+j>=0&&(itype[i+j]==UJUMP||itype[i+j]==CJUMP||itype[i+j]==SJUMP||itype[i+j]==FJUMP))
527 {
528 hsn[CCREG]=j;
529 b=j;
530 }
531 }
532 if(b>=0)
533 {
534 if(ba[i+b]>=start && ba[i+b]<(start+slen*4))
535 {
536 // Follow first branch
537 int t=(ba[i+b]-start)>>2;
538 j=7-b;if(t+j>=slen) j=slen-t-1;
539 for(;j>=0;j--)
540 {
541 if(rs1[t+j]) if(hsn[rs1[t+j]]>j+b+2) hsn[rs1[t+j]]=j+b+2;
542 if(rs2[t+j]) if(hsn[rs2[t+j]]>j+b+2) hsn[rs2[t+j]]=j+b+2;
543 //if(rt1[t+j]) if(hsn[rt1[t+j]]>j+b+2) hsn[rt1[t+j]]=j+b+2;
544 //if(rt2[t+j]) if(hsn[rt2[t+j]]>j+b+2) hsn[rt2[t+j]]=j+b+2;
545 }
546 }
547 // TODO: preferred register based on backward branch
548 }
549 // Delay slot should preferably not overwrite branch conditions or cycle count
550 if(i>0&&(itype[i-1]==RJUMP||itype[i-1]==UJUMP||itype[i-1]==CJUMP||itype[i-1]==SJUMP||itype[i-1]==FJUMP)) {
551 if(rs1[i-1]) if(hsn[rs1[i-1]]>1) hsn[rs1[i-1]]=1;
552 if(rs2[i-1]) if(hsn[rs2[i-1]]>1) hsn[rs2[i-1]]=1;
553 hsn[CCREG]=1;
554 // ...or hash tables
555 hsn[RHASH]=1;
556 hsn[RHTBL]=1;
557 }
558 // Coprocessor load/store needs FTEMP, even if not declared
b9b61529 559 if(itype[i]==C1LS||itype[i]==C2LS) {
57871462 560 hsn[FTEMP]=0;
561 }
562 // Load L/R also uses FTEMP as a temporary register
563 if(itype[i]==LOADLR) {
564 hsn[FTEMP]=0;
565 }
b7918751 566 // Also SWL/SWR/SDL/SDR
567 if(opcode[i]==0x2a||opcode[i]==0x2e||opcode[i]==0x2c||opcode[i]==0x2d) {
57871462 568 hsn[FTEMP]=0;
569 }
57871462 570 // Don't remove the miniht registers
571 if(itype[i]==UJUMP||itype[i]==RJUMP)
572 {
573 hsn[RHASH]=0;
574 hsn[RHTBL]=0;
575 }
576}
577
578// We only want to allocate registers if we're going to use them again soon
579int needed_again(int r, int i)
580{
581 int j;
582 int b=-1;
583 int rn=10;
9f51b4b9 584
57871462 585 if(i>0&&(itype[i-1]==UJUMP||itype[i-1]==RJUMP||(source[i-1]>>16)==0x1000))
586 {
587 if(ba[i-1]<start || ba[i-1]>start+slen*4-4)
588 return 0; // Don't need any registers if exiting the block
589 }
590 for(j=0;j<9;j++)
591 {
592 if(i+j>=slen) {
593 j=slen-i-1;
594 break;
595 }
596 if(itype[i+j]==UJUMP||itype[i+j]==RJUMP||(source[i+j]>>16)==0x1000)
597 {
598 // Don't go past an unconditonal jump
599 j++;
600 break;
601 }
1e973cb0 602 if(itype[i+j]==SYSCALL||itype[i+j]==HLECALL||itype[i+j]==INTCALL||((source[i+j]&0xfc00003f)==0x0d))
57871462 603 {
604 break;
605 }
606 }
607 for(;j>=1;j--)
608 {
609 if(rs1[i+j]==r) rn=j;
610 if(rs2[i+j]==r) rn=j;
611 if((unneeded_reg[i+j]>>r)&1) rn=10;
612 if(i+j>=0&&(itype[i+j]==UJUMP||itype[i+j]==CJUMP||itype[i+j]==SJUMP||itype[i+j]==FJUMP))
613 {
614 b=j;
615 }
616 }
617 /*
618 if(b>=0)
619 {
620 if(ba[i+b]>=start && ba[i+b]<(start+slen*4))
621 {
622 // Follow first branch
623 int o=rn;
624 int t=(ba[i+b]-start)>>2;
625 j=7-b;if(t+j>=slen) j=slen-t-1;
626 for(;j>=0;j--)
627 {
628 if(!((unneeded_reg[t+j]>>r)&1)) {
629 if(rs1[t+j]==r) if(rn>j+b+2) rn=j+b+2;
630 if(rs2[t+j]==r) if(rn>j+b+2) rn=j+b+2;
631 }
632 else rn=o;
633 }
634 }
635 }*/
b7217e13 636 if(rn<10) return 1;
581335b0 637 (void)b;
57871462 638 return 0;
639}
640
641// Try to match register allocations at the end of a loop with those
642// at the beginning
643int loop_reg(int i, int r, int hr)
644{
645 int j,k;
646 for(j=0;j<9;j++)
647 {
648 if(i+j>=slen) {
649 j=slen-i-1;
650 break;
651 }
652 if(itype[i+j]==UJUMP||itype[i+j]==RJUMP||(source[i+j]>>16)==0x1000)
653 {
654 // Don't go past an unconditonal jump
655 j++;
656 break;
657 }
658 }
659 k=0;
660 if(i>0){
661 if(itype[i-1]==UJUMP||itype[i-1]==CJUMP||itype[i-1]==SJUMP||itype[i-1]==FJUMP)
662 k--;
663 }
664 for(;k<j;k++)
665 {
666 if(r<64&&((unneeded_reg[i+k]>>r)&1)) return hr;
667 if(r>64&&((unneeded_reg_upper[i+k]>>r)&1)) return hr;
668 if(i+k>=0&&(itype[i+k]==UJUMP||itype[i+k]==CJUMP||itype[i+k]==SJUMP||itype[i+k]==FJUMP))
669 {
670 if(ba[i+k]>=start && ba[i+k]<(start+i*4))
671 {
672 int t=(ba[i+k]-start)>>2;
673 int reg=get_reg(regs[t].regmap_entry,r);
674 if(reg>=0) return reg;
675 //reg=get_reg(regs[t+1].regmap_entry,r);
676 //if(reg>=0) return reg;
677 }
678 }
679 }
680 return hr;
681}
682
683
684// Allocate every register, preserving source/target regs
685void alloc_all(struct regstat *cur,int i)
686{
687 int hr;
9f51b4b9 688
57871462 689 for(hr=0;hr<HOST_REGS;hr++) {
690 if(hr!=EXCLUDE_REG) {
691 if(((cur->regmap[hr]&63)!=rs1[i])&&((cur->regmap[hr]&63)!=rs2[i])&&
692 ((cur->regmap[hr]&63)!=rt1[i])&&((cur->regmap[hr]&63)!=rt2[i]))
693 {
694 cur->regmap[hr]=-1;
695 cur->dirty&=~(1<<hr);
696 }
697 // Don't need zeros
698 if((cur->regmap[hr]&63)==0)
699 {
700 cur->regmap[hr]=-1;
701 cur->dirty&=~(1<<hr);
702 }
703 }
704 }
705}
706
57871462 707#ifdef __i386__
708#include "assem_x86.c"
709#endif
710#ifdef __x86_64__
711#include "assem_x64.c"
712#endif
713#ifdef __arm__
714#include "assem_arm.c"
715#endif
716
717// Add virtual address mapping to linked list
718void ll_add(struct ll_entry **head,int vaddr,void *addr)
719{
720 struct ll_entry *new_entry;
721 new_entry=malloc(sizeof(struct ll_entry));
722 assert(new_entry!=NULL);
723 new_entry->vaddr=vaddr;
de5a60c3 724 new_entry->reg_sv_flags=0;
57871462 725 new_entry->addr=addr;
726 new_entry->next=*head;
727 *head=new_entry;
728}
729
de5a60c3 730void ll_add_flags(struct ll_entry **head,int vaddr,u_int reg_sv_flags,void *addr)
57871462 731{
7139f3c8 732 ll_add(head,vaddr,addr);
de5a60c3 733 (*head)->reg_sv_flags=reg_sv_flags;
57871462 734}
735
736// Check if an address is already compiled
737// but don't return addresses which are about to expire from the cache
738void *check_addr(u_int vaddr)
739{
740 u_int *ht_bin=hash_table[((vaddr>>16)^vaddr)&0xFFFF];
741 if(ht_bin[0]==vaddr) {
742 if(((ht_bin[1]-MAX_OUTPUT_BLOCK_SIZE-(u_int)out)<<(32-TARGET_SIZE_2))>0x60000000+(MAX_OUTPUT_BLOCK_SIZE<<(32-TARGET_SIZE_2)))
743 if(isclean(ht_bin[1])) return (void *)ht_bin[1];
744 }
745 if(ht_bin[2]==vaddr) {
746 if(((ht_bin[3]-MAX_OUTPUT_BLOCK_SIZE-(u_int)out)<<(32-TARGET_SIZE_2))>0x60000000+(MAX_OUTPUT_BLOCK_SIZE<<(32-TARGET_SIZE_2)))
747 if(isclean(ht_bin[3])) return (void *)ht_bin[3];
748 }
94d23bb9 749 u_int page=get_page(vaddr);
57871462 750 struct ll_entry *head;
751 head=jump_in[page];
752 while(head!=NULL) {
de5a60c3 753 if(head->vaddr==vaddr) {
57871462 754 if((((u_int)head->addr-(u_int)out)<<(32-TARGET_SIZE_2))>0x60000000+(MAX_OUTPUT_BLOCK_SIZE<<(32-TARGET_SIZE_2))) {
755 // Update existing entry with current address
756 if(ht_bin[0]==vaddr) {
757 ht_bin[1]=(int)head->addr;
758 return head->addr;
759 }
760 if(ht_bin[2]==vaddr) {
761 ht_bin[3]=(int)head->addr;
762 return head->addr;
763 }
764 // Insert into hash table with low priority.
765 // Don't evict existing entries, as they are probably
766 // addresses that are being accessed frequently.
767 if(ht_bin[0]==-1) {
768 ht_bin[1]=(int)head->addr;
769 ht_bin[0]=vaddr;
770 }else if(ht_bin[2]==-1) {
771 ht_bin[3]=(int)head->addr;
772 ht_bin[2]=vaddr;
773 }
774 return head->addr;
775 }
776 }
777 head=head->next;
778 }
779 return 0;
780}
781
782void remove_hash(int vaddr)
783{
784 //printf("remove hash: %x\n",vaddr);
581335b0 785 u_int *ht_bin=hash_table[(((vaddr)>>16)^vaddr)&0xFFFF];
57871462 786 if(ht_bin[2]==vaddr) {
787 ht_bin[2]=ht_bin[3]=-1;
788 }
789 if(ht_bin[0]==vaddr) {
790 ht_bin[0]=ht_bin[2];
791 ht_bin[1]=ht_bin[3];
792 ht_bin[2]=ht_bin[3]=-1;
793 }
794}
795
796void ll_remove_matching_addrs(struct ll_entry **head,int addr,int shift)
797{
798 struct ll_entry *next;
799 while(*head) {
9f51b4b9 800 if(((u_int)((*head)->addr)>>shift)==(addr>>shift) ||
57871462 801 ((u_int)((*head)->addr-MAX_OUTPUT_BLOCK_SIZE)>>shift)==(addr>>shift))
802 {
803 inv_debug("EXP: Remove pointer to %x (%x)\n",(int)(*head)->addr,(*head)->vaddr);
804 remove_hash((*head)->vaddr);
805 next=(*head)->next;
806 free(*head);
807 *head=next;
808 }
809 else
810 {
811 head=&((*head)->next);
812 }
813 }
814}
815
816// Remove all entries from linked list
817void ll_clear(struct ll_entry **head)
818{
819 struct ll_entry *cur;
820 struct ll_entry *next;
581335b0 821 if((cur=*head)) {
57871462 822 *head=0;
823 while(cur) {
824 next=cur->next;
825 free(cur);
826 cur=next;
827 }
828 }
829}
830
831// Dereference the pointers and remove if it matches
832void ll_kill_pointers(struct ll_entry *head,int addr,int shift)
833{
834 while(head) {
835 int ptr=get_pointer(head->addr);
836 inv_debug("EXP: Lookup pointer to %x at %x (%x)\n",(int)ptr,(int)head->addr,head->vaddr);
837 if(((ptr>>shift)==(addr>>shift)) ||
838 (((ptr-MAX_OUTPUT_BLOCK_SIZE)>>shift)==(addr>>shift)))
839 {
5088bb70 840 inv_debug("EXP: Kill pointer at %x (%x)\n",(int)head->addr,head->vaddr);
f76eeef9 841 u_int host_addr=(u_int)kill_pointer(head->addr);
dd3a91a1 842 #ifdef __arm__
843 needs_clear_cache[(host_addr-(u_int)BASE_ADDR)>>17]|=1<<(((host_addr-(u_int)BASE_ADDR)>>12)&31);
844 #endif
57871462 845 }
846 head=head->next;
847 }
848}
849
850// This is called when we write to a compiled block (see do_invstub)
f76eeef9 851void invalidate_page(u_int page)
57871462 852{
57871462 853 struct ll_entry *head;
854 struct ll_entry *next;
855 head=jump_in[page];
856 jump_in[page]=0;
857 while(head!=NULL) {
858 inv_debug("INVALIDATE: %x\n",head->vaddr);
859 remove_hash(head->vaddr);
860 next=head->next;
861 free(head);
862 head=next;
863 }
864 head=jump_out[page];
865 jump_out[page]=0;
866 while(head!=NULL) {
867 inv_debug("INVALIDATE: kill pointer to %x (%x)\n",head->vaddr,(int)head->addr);
f76eeef9 868 u_int host_addr=(u_int)kill_pointer(head->addr);
dd3a91a1 869 #ifdef __arm__
870 needs_clear_cache[(host_addr-(u_int)BASE_ADDR)>>17]|=1<<(((host_addr-(u_int)BASE_ADDR)>>12)&31);
871 #endif
57871462 872 next=head->next;
873 free(head);
874 head=next;
875 }
57871462 876}
9be4ba64 877
878static void invalidate_block_range(u_int block, u_int first, u_int last)
57871462 879{
94d23bb9 880 u_int page=get_page(block<<12);
57871462 881 //printf("first=%d last=%d\n",first,last);
f76eeef9 882 invalidate_page(page);
57871462 883 assert(first+5>page); // NB: this assumes MAXBLOCK<=4096 (4 pages)
884 assert(last<page+5);
885 // Invalidate the adjacent pages if a block crosses a 4K boundary
886 while(first<page) {
887 invalidate_page(first);
888 first++;
889 }
890 for(first=page+1;first<last;first++) {
891 invalidate_page(first);
892 }
dd3a91a1 893 #ifdef __arm__
894 do_clear_cache();
895 #endif
9f51b4b9 896
57871462 897 // Don't trap writes
898 invalid_code[block]=1;
f76eeef9 899
57871462 900 #ifdef USE_MINI_HT
901 memset(mini_ht,-1,sizeof(mini_ht));
902 #endif
903}
9be4ba64 904
905void invalidate_block(u_int block)
906{
907 u_int page=get_page(block<<12);
908 u_int vpage=get_vpage(block<<12);
909 inv_debug("INVALIDATE: %x (%d)\n",block<<12,page);
910 //inv_debug("invalid_code[block]=%d\n",invalid_code[block]);
911 u_int first,last;
912 first=last=page;
913 struct ll_entry *head;
914 head=jump_dirty[vpage];
915 //printf("page=%d vpage=%d\n",page,vpage);
916 while(head!=NULL) {
917 u_int start,end;
918 if(vpage>2047||(head->vaddr>>12)==block) { // Ignore vaddr hash collision
919 get_bounds((int)head->addr,&start,&end);
920 //printf("start: %x end: %x\n",start,end);
4a35de07 921 if(page<2048&&start>=(u_int)rdram&&end<(u_int)rdram+RAM_SIZE) {
9be4ba64 922 if(((start-(u_int)rdram)>>12)<=page&&((end-1-(u_int)rdram)>>12)>=page) {
923 if((((start-(u_int)rdram)>>12)&2047)<first) first=((start-(u_int)rdram)>>12)&2047;
924 if((((end-1-(u_int)rdram)>>12)&2047)>last) last=((end-1-(u_int)rdram)>>12)&2047;
925 }
926 }
9be4ba64 927 }
928 head=head->next;
929 }
930 invalidate_block_range(block,first,last);
931}
932
57871462 933void invalidate_addr(u_int addr)
934{
9be4ba64 935 //static int rhits;
936 // this check is done by the caller
937 //if (inv_code_start<=addr&&addr<=inv_code_end) { rhits++; return; }
d25604ca 938 u_int page=get_vpage(addr);
9be4ba64 939 if(page<2048) { // RAM
940 struct ll_entry *head;
941 u_int addr_min=~0, addr_max=0;
4a35de07 942 u_int mask=RAM_SIZE-1;
943 u_int addr_main=0x80000000|(addr&mask);
9be4ba64 944 int pg1;
4a35de07 945 inv_code_start=addr_main&~0xfff;
946 inv_code_end=addr_main|0xfff;
9be4ba64 947 pg1=page;
948 if (pg1>0) {
949 // must check previous page too because of spans..
950 pg1--;
951 inv_code_start-=0x1000;
952 }
953 for(;pg1<=page;pg1++) {
954 for(head=jump_dirty[pg1];head!=NULL;head=head->next) {
955 u_int start,end;
956 get_bounds((int)head->addr,&start,&end);
4a35de07 957 if(ram_offset) {
958 start-=ram_offset;
959 end-=ram_offset;
960 }
961 if(start<=addr_main&&addr_main<end) {
9be4ba64 962 if(start<addr_min) addr_min=start;
963 if(end>addr_max) addr_max=end;
964 }
4a35de07 965 else if(addr_main<start) {
9be4ba64 966 if(start<inv_code_end)
967 inv_code_end=start-1;
968 }
969 else {
970 if(end>inv_code_start)
971 inv_code_start=end;
972 }
973 }
974 }
975 if (addr_min!=~0) {
976 inv_debug("INV ADDR: %08x hit %08x-%08x\n", addr, addr_min, addr_max);
977 inv_code_start=inv_code_end=~0;
978 invalidate_block_range(addr>>12,(addr_min&mask)>>12,(addr_max&mask)>>12);
979 return;
980 }
981 else {
4a35de07 982 inv_code_start=(addr&~mask)|(inv_code_start&mask);
983 inv_code_end=(addr&~mask)|(inv_code_end&mask);
d25604ca 984 inv_debug("INV ADDR: %08x miss, inv %08x-%08x, sk %d\n", addr, inv_code_start, inv_code_end, 0);
9be4ba64 985 return;
d25604ca 986 }
9be4ba64 987 }
57871462 988 invalidate_block(addr>>12);
989}
9be4ba64 990
dd3a91a1 991// This is called when loading a save state.
992// Anything could have changed, so invalidate everything.
57871462 993void invalidate_all_pages()
994{
581335b0 995 u_int page;
57871462 996 for(page=0;page<4096;page++)
997 invalidate_page(page);
998 for(page=0;page<1048576;page++)
999 if(!invalid_code[page]) {
1000 restore_candidate[(page&2047)>>3]|=1<<(page&7);
1001 restore_candidate[((page&2047)>>3)+256]|=1<<(page&7);
1002 }
57871462 1003 #ifdef USE_MINI_HT
1004 memset(mini_ht,-1,sizeof(mini_ht));
1005 #endif
57871462 1006}
1007
1008// Add an entry to jump_out after making a link
1009void add_link(u_int vaddr,void *src)
1010{
94d23bb9 1011 u_int page=get_page(vaddr);
57871462 1012 inv_debug("add_link: %x -> %x (%d)\n",(int)src,vaddr,page);
76f71c27 1013 int *ptr=(int *)(src+4);
1014 assert((*ptr&0x0fff0000)==0x059f0000);
581335b0 1015 (void)ptr;
57871462 1016 ll_add(jump_out+page,vaddr,src);
1017 //int ptr=get_pointer(src);
1018 //inv_debug("add_link: Pointer is to %x\n",(int)ptr);
1019}
1020
1021// If a code block was found to be unmodified (bit was set in
1022// restore_candidate) and it remains unmodified (bit is clear
1023// in invalid_code) then move the entries for that 4K page from
1024// the dirty list to the clean list.
1025void clean_blocks(u_int page)
1026{
1027 struct ll_entry *head;
1028 inv_debug("INV: clean_blocks page=%d\n",page);
1029 head=jump_dirty[page];
1030 while(head!=NULL) {
1031 if(!invalid_code[head->vaddr>>12]) {
1032 // Don't restore blocks which are about to expire from the cache
1033 if((((u_int)head->addr-(u_int)out)<<(32-TARGET_SIZE_2))>0x60000000+(MAX_OUTPUT_BLOCK_SIZE<<(32-TARGET_SIZE_2))) {
1034 u_int start,end;
581335b0 1035 if(verify_dirty(head->addr)) {
57871462 1036 //printf("Possibly Restore %x (%x)\n",head->vaddr, (int)head->addr);
1037 u_int i;
1038 u_int inv=0;
1039 get_bounds((int)head->addr,&start,&end);
4cb76aa4 1040 if(start-(u_int)rdram<RAM_SIZE) {
57871462 1041 for(i=(start-(u_int)rdram+0x80000000)>>12;i<=(end-1-(u_int)rdram+0x80000000)>>12;i++) {
1042 inv|=invalid_code[i];
1043 }
1044 }
4cb76aa4 1045 else if((signed int)head->vaddr>=(signed int)0x80000000+RAM_SIZE) {
57871462 1046 inv=1;
1047 }
1048 if(!inv) {
1049 void * clean_addr=(void *)get_clean_addr((int)head->addr);
1050 if((((u_int)clean_addr-(u_int)out)<<(32-TARGET_SIZE_2))>0x60000000+(MAX_OUTPUT_BLOCK_SIZE<<(32-TARGET_SIZE_2))) {
1051 u_int ppage=page;
57871462 1052 inv_debug("INV: Restored %x (%x/%x)\n",head->vaddr, (int)head->addr, (int)clean_addr);
1053 //printf("page=%x, addr=%x\n",page,head->vaddr);
1054 //assert(head->vaddr>>12==(page|0x80000));
de5a60c3 1055 ll_add_flags(jump_in+ppage,head->vaddr,head->reg_sv_flags,clean_addr);
581335b0 1056 u_int *ht_bin=hash_table[((head->vaddr>>16)^head->vaddr)&0xFFFF];
de5a60c3 1057 if(ht_bin[0]==head->vaddr) {
581335b0 1058 ht_bin[1]=(u_int)clean_addr; // Replace existing entry
de5a60c3 1059 }
1060 if(ht_bin[2]==head->vaddr) {
581335b0 1061 ht_bin[3]=(u_int)clean_addr; // Replace existing entry
57871462 1062 }
1063 }
1064 }
1065 }
1066 }
1067 }
1068 head=head->next;
1069 }
1070}
1071
1072
1073void mov_alloc(struct regstat *current,int i)
1074{
1075 // Note: Don't need to actually alloc the source registers
1076 if((~current->is32>>rs1[i])&1) {
1077 //alloc_reg64(current,i,rs1[i]);
1078 alloc_reg64(current,i,rt1[i]);
1079 current->is32&=~(1LL<<rt1[i]);
1080 } else {
1081 //alloc_reg(current,i,rs1[i]);
1082 alloc_reg(current,i,rt1[i]);
1083 current->is32|=(1LL<<rt1[i]);
1084 }
1085 clear_const(current,rs1[i]);
1086 clear_const(current,rt1[i]);
1087 dirty_reg(current,rt1[i]);
1088}
1089
1090void shiftimm_alloc(struct regstat *current,int i)
1091{
57871462 1092 if(opcode2[i]<=0x3) // SLL/SRL/SRA
1093 {
1094 if(rt1[i]) {
1095 if(rs1[i]&&needed_again(rs1[i],i)) alloc_reg(current,i,rs1[i]);
1096 else lt1[i]=rs1[i];
1097 alloc_reg(current,i,rt1[i]);
1098 current->is32|=1LL<<rt1[i];
1099 dirty_reg(current,rt1[i]);
dc49e339 1100 if(is_const(current,rs1[i])) {
1101 int v=get_const(current,rs1[i]);
1102 if(opcode2[i]==0x00) set_const(current,rt1[i],v<<imm[i]);
1103 if(opcode2[i]==0x02) set_const(current,rt1[i],(u_int)v>>imm[i]);
1104 if(opcode2[i]==0x03) set_const(current,rt1[i],v>>imm[i]);
1105 }
1106 else clear_const(current,rt1[i]);
57871462 1107 }
1108 }
dc49e339 1109 else
1110 {
1111 clear_const(current,rs1[i]);
1112 clear_const(current,rt1[i]);
1113 }
1114
57871462 1115 if(opcode2[i]>=0x38&&opcode2[i]<=0x3b) // DSLL/DSRL/DSRA
1116 {
1117 if(rt1[i]) {
1118 if(rs1[i]) alloc_reg64(current,i,rs1[i]);
1119 alloc_reg64(current,i,rt1[i]);
1120 current->is32&=~(1LL<<rt1[i]);
1121 dirty_reg(current,rt1[i]);
1122 }
1123 }
1124 if(opcode2[i]==0x3c) // DSLL32
1125 {
1126 if(rt1[i]) {
1127 if(rs1[i]) alloc_reg(current,i,rs1[i]);
1128 alloc_reg64(current,i,rt1[i]);
1129 current->is32&=~(1LL<<rt1[i]);
1130 dirty_reg(current,rt1[i]);
1131 }
1132 }
1133 if(opcode2[i]==0x3e) // DSRL32
1134 {
1135 if(rt1[i]) {
1136 alloc_reg64(current,i,rs1[i]);
1137 if(imm[i]==32) {
1138 alloc_reg64(current,i,rt1[i]);
1139 current->is32&=~(1LL<<rt1[i]);
1140 } else {
1141 alloc_reg(current,i,rt1[i]);
1142 current->is32|=1LL<<rt1[i];
1143 }
1144 dirty_reg(current,rt1[i]);
1145 }
1146 }
1147 if(opcode2[i]==0x3f) // DSRA32
1148 {
1149 if(rt1[i]) {
1150 alloc_reg64(current,i,rs1[i]);
1151 alloc_reg(current,i,rt1[i]);
1152 current->is32|=1LL<<rt1[i];
1153 dirty_reg(current,rt1[i]);
1154 }
1155 }
1156}
1157
1158void shift_alloc(struct regstat *current,int i)
1159{
1160 if(rt1[i]) {
1161 if(opcode2[i]<=0x07) // SLLV/SRLV/SRAV
1162 {
1163 if(rs1[i]) alloc_reg(current,i,rs1[i]);
1164 if(rs2[i]) alloc_reg(current,i,rs2[i]);
1165 alloc_reg(current,i,rt1[i]);
e1190b87 1166 if(rt1[i]==rs2[i]) {
1167 alloc_reg_temp(current,i,-1);
1168 minimum_free_regs[i]=1;
1169 }
57871462 1170 current->is32|=1LL<<rt1[i];
1171 } else { // DSLLV/DSRLV/DSRAV
1172 if(rs1[i]) alloc_reg64(current,i,rs1[i]);
1173 if(rs2[i]) alloc_reg(current,i,rs2[i]);
1174 alloc_reg64(current,i,rt1[i]);
1175 current->is32&=~(1LL<<rt1[i]);
1176 if(opcode2[i]==0x16||opcode2[i]==0x17) // DSRLV and DSRAV need a temporary register
e1190b87 1177 {
57871462 1178 alloc_reg_temp(current,i,-1);
e1190b87 1179 minimum_free_regs[i]=1;
1180 }
57871462 1181 }
1182 clear_const(current,rs1[i]);
1183 clear_const(current,rs2[i]);
1184 clear_const(current,rt1[i]);
1185 dirty_reg(current,rt1[i]);
1186 }
1187}
1188
1189void alu_alloc(struct regstat *current,int i)
1190{
1191 if(opcode2[i]>=0x20&&opcode2[i]<=0x23) { // ADD/ADDU/SUB/SUBU
1192 if(rt1[i]) {
1193 if(rs1[i]&&rs2[i]) {
1194 alloc_reg(current,i,rs1[i]);
1195 alloc_reg(current,i,rs2[i]);
1196 }
1197 else {
1198 if(rs1[i]&&needed_again(rs1[i],i)) alloc_reg(current,i,rs1[i]);
1199 if(rs2[i]&&needed_again(rs2[i],i)) alloc_reg(current,i,rs2[i]);
1200 }
1201 alloc_reg(current,i,rt1[i]);
1202 }
1203 current->is32|=1LL<<rt1[i];
1204 }
1205 if(opcode2[i]==0x2a||opcode2[i]==0x2b) { // SLT/SLTU
1206 if(rt1[i]) {
1207 if(!((current->is32>>rs1[i])&(current->is32>>rs2[i])&1))
1208 {
1209 alloc_reg64(current,i,rs1[i]);
1210 alloc_reg64(current,i,rs2[i]);
1211 alloc_reg(current,i,rt1[i]);
1212 } else {
1213 alloc_reg(current,i,rs1[i]);
1214 alloc_reg(current,i,rs2[i]);
1215 alloc_reg(current,i,rt1[i]);
1216 }
1217 }
1218 current->is32|=1LL<<rt1[i];
1219 }
1220 if(opcode2[i]>=0x24&&opcode2[i]<=0x27) { // AND/OR/XOR/NOR
1221 if(rt1[i]) {
1222 if(rs1[i]&&rs2[i]) {
1223 alloc_reg(current,i,rs1[i]);
1224 alloc_reg(current,i,rs2[i]);
1225 }
1226 else
1227 {
1228 if(rs1[i]&&needed_again(rs1[i],i)) alloc_reg(current,i,rs1[i]);
1229 if(rs2[i]&&needed_again(rs2[i],i)) alloc_reg(current,i,rs2[i]);
1230 }
1231 alloc_reg(current,i,rt1[i]);
1232 if(!((current->is32>>rs1[i])&(current->is32>>rs2[i])&1))
1233 {
1234 if(!((current->uu>>rt1[i])&1)) {
1235 alloc_reg64(current,i,rt1[i]);
1236 }
1237 if(get_reg(current->regmap,rt1[i]|64)>=0) {
1238 if(rs1[i]&&rs2[i]) {
1239 alloc_reg64(current,i,rs1[i]);
1240 alloc_reg64(current,i,rs2[i]);
1241 }
1242 else
1243 {
1244 // Is is really worth it to keep 64-bit values in registers?
1245 #ifdef NATIVE_64BIT
1246 if(rs1[i]&&needed_again(rs1[i],i)) alloc_reg64(current,i,rs1[i]);
1247 if(rs2[i]&&needed_again(rs2[i],i)) alloc_reg64(current,i,rs2[i]);
1248 #endif
1249 }
1250 }
1251 current->is32&=~(1LL<<rt1[i]);
1252 } else {
1253 current->is32|=1LL<<rt1[i];
1254 }
1255 }
1256 }
1257 if(opcode2[i]>=0x2c&&opcode2[i]<=0x2f) { // DADD/DADDU/DSUB/DSUBU
1258 if(rt1[i]) {
1259 if(rs1[i]&&rs2[i]) {
1260 if(!((current->uu>>rt1[i])&1)||get_reg(current->regmap,rt1[i]|64)>=0) {
1261 alloc_reg64(current,i,rs1[i]);
1262 alloc_reg64(current,i,rs2[i]);
1263 alloc_reg64(current,i,rt1[i]);
1264 } else {
1265 alloc_reg(current,i,rs1[i]);
1266 alloc_reg(current,i,rs2[i]);
1267 alloc_reg(current,i,rt1[i]);
1268 }
1269 }
1270 else {
1271 alloc_reg(current,i,rt1[i]);
1272 if(!((current->uu>>rt1[i])&1)||get_reg(current->regmap,rt1[i]|64)>=0) {
1273 // DADD used as move, or zeroing
1274 // If we have a 64-bit source, then make the target 64 bits too
1275 if(rs1[i]&&!((current->is32>>rs1[i])&1)) {
1276 if(get_reg(current->regmap,rs1[i])>=0) alloc_reg64(current,i,rs1[i]);
1277 alloc_reg64(current,i,rt1[i]);
1278 } else if(rs2[i]&&!((current->is32>>rs2[i])&1)) {
1279 if(get_reg(current->regmap,rs2[i])>=0) alloc_reg64(current,i,rs2[i]);
1280 alloc_reg64(current,i,rt1[i]);
1281 }
1282 if(opcode2[i]>=0x2e&&rs2[i]) {
1283 // DSUB used as negation - 64-bit result
1284 // If we have a 32-bit register, extend it to 64 bits
1285 if(get_reg(current->regmap,rs2[i])>=0) alloc_reg64(current,i,rs2[i]);
1286 alloc_reg64(current,i,rt1[i]);
1287 }
1288 }
1289 }
1290 if(rs1[i]&&rs2[i]) {
1291 current->is32&=~(1LL<<rt1[i]);
1292 } else if(rs1[i]) {
1293 current->is32&=~(1LL<<rt1[i]);
1294 if((current->is32>>rs1[i])&1)
1295 current->is32|=1LL<<rt1[i];
1296 } else if(rs2[i]) {
1297 current->is32&=~(1LL<<rt1[i]);
1298 if((current->is32>>rs2[i])&1)
1299 current->is32|=1LL<<rt1[i];
1300 } else {
1301 current->is32|=1LL<<rt1[i];
1302 }
1303 }
1304 }
1305 clear_const(current,rs1[i]);
1306 clear_const(current,rs2[i]);
1307 clear_const(current,rt1[i]);
1308 dirty_reg(current,rt1[i]);
1309}
1310
1311void imm16_alloc(struct regstat *current,int i)
1312{
1313 if(rs1[i]&&needed_again(rs1[i],i)) alloc_reg(current,i,rs1[i]);
1314 else lt1[i]=rs1[i];
1315 if(rt1[i]) alloc_reg(current,i,rt1[i]);
1316 if(opcode[i]==0x18||opcode[i]==0x19) { // DADDI/DADDIU
1317 current->is32&=~(1LL<<rt1[i]);
1318 if(!((current->uu>>rt1[i])&1)||get_reg(current->regmap,rt1[i]|64)>=0) {
1319 // TODO: Could preserve the 32-bit flag if the immediate is zero
1320 alloc_reg64(current,i,rt1[i]);
1321 alloc_reg64(current,i,rs1[i]);
1322 }
1323 clear_const(current,rs1[i]);
1324 clear_const(current,rt1[i]);
1325 }
1326 else if(opcode[i]==0x0a||opcode[i]==0x0b) { // SLTI/SLTIU
1327 if((~current->is32>>rs1[i])&1) alloc_reg64(current,i,rs1[i]);
1328 current->is32|=1LL<<rt1[i];
1329 clear_const(current,rs1[i]);
1330 clear_const(current,rt1[i]);
1331 }
1332 else if(opcode[i]>=0x0c&&opcode[i]<=0x0e) { // ANDI/ORI/XORI
1333 if(((~current->is32>>rs1[i])&1)&&opcode[i]>0x0c) {
1334 if(rs1[i]!=rt1[i]) {
1335 if(needed_again(rs1[i],i)) alloc_reg64(current,i,rs1[i]);
1336 alloc_reg64(current,i,rt1[i]);
1337 current->is32&=~(1LL<<rt1[i]);
1338 }
1339 }
1340 else current->is32|=1LL<<rt1[i]; // ANDI clears upper bits
1341 if(is_const(current,rs1[i])) {
1342 int v=get_const(current,rs1[i]);
1343 if(opcode[i]==0x0c) set_const(current,rt1[i],v&imm[i]);
1344 if(opcode[i]==0x0d) set_const(current,rt1[i],v|imm[i]);
1345 if(opcode[i]==0x0e) set_const(current,rt1[i],v^imm[i]);
1346 }
1347 else clear_const(current,rt1[i]);
1348 }
1349 else if(opcode[i]==0x08||opcode[i]==0x09) { // ADDI/ADDIU
1350 if(is_const(current,rs1[i])) {
1351 int v=get_const(current,rs1[i]);
1352 set_const(current,rt1[i],v+imm[i]);
1353 }
1354 else clear_const(current,rt1[i]);
1355 current->is32|=1LL<<rt1[i];
1356 }
1357 else {
1358 set_const(current,rt1[i],((long long)((short)imm[i]))<<16); // LUI
1359 current->is32|=1LL<<rt1[i];
1360 }
1361 dirty_reg(current,rt1[i]);
1362}
1363
1364void load_alloc(struct regstat *current,int i)
1365{
1366 clear_const(current,rt1[i]);
1367 //if(rs1[i]!=rt1[i]&&needed_again(rs1[i],i)) clear_const(current,rs1[i]); // Does this help or hurt?
1368 if(!rs1[i]) current->u&=~1LL; // Allow allocating r0 if it's the source register
1369 if(needed_again(rs1[i],i)) alloc_reg(current,i,rs1[i]);
373d1d07 1370 if(rt1[i]&&!((current->u>>rt1[i])&1)) {
57871462 1371 alloc_reg(current,i,rt1[i]);
373d1d07 1372 assert(get_reg(current->regmap,rt1[i])>=0);
57871462 1373 if(opcode[i]==0x27||opcode[i]==0x37) // LWU/LD
1374 {
1375 current->is32&=~(1LL<<rt1[i]);
1376 alloc_reg64(current,i,rt1[i]);
1377 }
1378 else if(opcode[i]==0x1A||opcode[i]==0x1B) // LDL/LDR
1379 {
1380 current->is32&=~(1LL<<rt1[i]);
1381 alloc_reg64(current,i,rt1[i]);
1382 alloc_all(current,i);
1383 alloc_reg64(current,i,FTEMP);
e1190b87 1384 minimum_free_regs[i]=HOST_REGS;
57871462 1385 }
1386 else current->is32|=1LL<<rt1[i];
1387 dirty_reg(current,rt1[i]);
57871462 1388 // LWL/LWR need a temporary register for the old value
1389 if(opcode[i]==0x22||opcode[i]==0x26)
1390 {
1391 alloc_reg(current,i,FTEMP);
1392 alloc_reg_temp(current,i,-1);
e1190b87 1393 minimum_free_regs[i]=1;
57871462 1394 }
1395 }
1396 else
1397 {
373d1d07 1398 // Load to r0 or unneeded register (dummy load)
57871462 1399 // but we still need a register to calculate the address
535d208a 1400 if(opcode[i]==0x22||opcode[i]==0x26)
1401 {
1402 alloc_reg(current,i,FTEMP); // LWL/LWR need another temporary
1403 }
57871462 1404 alloc_reg_temp(current,i,-1);
e1190b87 1405 minimum_free_regs[i]=1;
535d208a 1406 if(opcode[i]==0x1A||opcode[i]==0x1B) // LDL/LDR
1407 {
1408 alloc_all(current,i);
1409 alloc_reg64(current,i,FTEMP);
e1190b87 1410 minimum_free_regs[i]=HOST_REGS;
535d208a 1411 }
57871462 1412 }
1413}
1414
1415void store_alloc(struct regstat *current,int i)
1416{
1417 clear_const(current,rs2[i]);
1418 if(!(rs2[i])) current->u&=~1LL; // Allow allocating r0 if necessary
1419 if(needed_again(rs1[i],i)) alloc_reg(current,i,rs1[i]);
1420 alloc_reg(current,i,rs2[i]);
1421 if(opcode[i]==0x2c||opcode[i]==0x2d||opcode[i]==0x3f) { // 64-bit SDL/SDR/SD
1422 alloc_reg64(current,i,rs2[i]);
1423 if(rs2[i]) alloc_reg(current,i,FTEMP);
1424 }
57871462 1425 #if defined(HOST_IMM8)
1426 // On CPUs without 32-bit immediates we need a pointer to invalid_code
1427 else alloc_reg(current,i,INVCP);
1428 #endif
b7918751 1429 if(opcode[i]==0x2a||opcode[i]==0x2e||opcode[i]==0x2c||opcode[i]==0x2d) { // SWL/SWL/SDL/SDR
57871462 1430 alloc_reg(current,i,FTEMP);
1431 }
1432 // We need a temporary register for address generation
1433 alloc_reg_temp(current,i,-1);
e1190b87 1434 minimum_free_regs[i]=1;
57871462 1435}
1436
1437void c1ls_alloc(struct regstat *current,int i)
1438{
1439 //clear_const(current,rs1[i]); // FIXME
1440 clear_const(current,rt1[i]);
1441 if(needed_again(rs1[i],i)) alloc_reg(current,i,rs1[i]);
1442 alloc_reg(current,i,CSREG); // Status
1443 alloc_reg(current,i,FTEMP);
1444 if(opcode[i]==0x35||opcode[i]==0x3d) { // 64-bit LDC1/SDC1
1445 alloc_reg64(current,i,FTEMP);
1446 }
57871462 1447 #if defined(HOST_IMM8)
1448 // On CPUs without 32-bit immediates we need a pointer to invalid_code
1449 else if((opcode[i]&0x3b)==0x39) // SWC1/SDC1
1450 alloc_reg(current,i,INVCP);
1451 #endif
1452 // We need a temporary register for address generation
1453 alloc_reg_temp(current,i,-1);
1454}
1455
b9b61529 1456void c2ls_alloc(struct regstat *current,int i)
1457{
1458 clear_const(current,rt1[i]);
1459 if(needed_again(rs1[i],i)) alloc_reg(current,i,rs1[i]);
1460 alloc_reg(current,i,FTEMP);
b9b61529 1461 #if defined(HOST_IMM8)
1462 // On CPUs without 32-bit immediates we need a pointer to invalid_code
1edfcc68 1463 if((opcode[i]&0x3b)==0x3a) // SWC2/SDC2
b9b61529 1464 alloc_reg(current,i,INVCP);
1465 #endif
1466 // We need a temporary register for address generation
1467 alloc_reg_temp(current,i,-1);
e1190b87 1468 minimum_free_regs[i]=1;
b9b61529 1469}
1470
57871462 1471#ifndef multdiv_alloc
1472void multdiv_alloc(struct regstat *current,int i)
1473{
1474 // case 0x18: MULT
1475 // case 0x19: MULTU
1476 // case 0x1A: DIV
1477 // case 0x1B: DIVU
1478 // case 0x1C: DMULT
1479 // case 0x1D: DMULTU
1480 // case 0x1E: DDIV
1481 // case 0x1F: DDIVU
1482 clear_const(current,rs1[i]);
1483 clear_const(current,rs2[i]);
1484 if(rs1[i]&&rs2[i])
1485 {
1486 if((opcode2[i]&4)==0) // 32-bit
1487 {
1488 current->u&=~(1LL<<HIREG);
1489 current->u&=~(1LL<<LOREG);
1490 alloc_reg(current,i,HIREG);
1491 alloc_reg(current,i,LOREG);
1492 alloc_reg(current,i,rs1[i]);
1493 alloc_reg(current,i,rs2[i]);
1494 current->is32|=1LL<<HIREG;
1495 current->is32|=1LL<<LOREG;
1496 dirty_reg(current,HIREG);
1497 dirty_reg(current,LOREG);
1498 }
1499 else // 64-bit
1500 {
1501 current->u&=~(1LL<<HIREG);
1502 current->u&=~(1LL<<LOREG);
1503 current->uu&=~(1LL<<HIREG);
1504 current->uu&=~(1LL<<LOREG);
1505 alloc_reg64(current,i,HIREG);
1506 //if(HOST_REGS>10) alloc_reg64(current,i,LOREG);
1507 alloc_reg64(current,i,rs1[i]);
1508 alloc_reg64(current,i,rs2[i]);
1509 alloc_all(current,i);
1510 current->is32&=~(1LL<<HIREG);
1511 current->is32&=~(1LL<<LOREG);
1512 dirty_reg(current,HIREG);
1513 dirty_reg(current,LOREG);
e1190b87 1514 minimum_free_regs[i]=HOST_REGS;
57871462 1515 }
1516 }
1517 else
1518 {
1519 // Multiply by zero is zero.
1520 // MIPS does not have a divide by zero exception.
1521 // The result is undefined, we return zero.
1522 alloc_reg(current,i,HIREG);
1523 alloc_reg(current,i,LOREG);
1524 current->is32|=1LL<<HIREG;
1525 current->is32|=1LL<<LOREG;
1526 dirty_reg(current,HIREG);
1527 dirty_reg(current,LOREG);
1528 }
1529}
1530#endif
1531
1532void cop0_alloc(struct regstat *current,int i)
1533{
1534 if(opcode2[i]==0) // MFC0
1535 {
1536 if(rt1[i]) {
1537 clear_const(current,rt1[i]);
1538 alloc_all(current,i);
1539 alloc_reg(current,i,rt1[i]);
1540 current->is32|=1LL<<rt1[i];
1541 dirty_reg(current,rt1[i]);
1542 }
1543 }
1544 else if(opcode2[i]==4) // MTC0
1545 {
1546 if(rs1[i]){
1547 clear_const(current,rs1[i]);
1548 alloc_reg(current,i,rs1[i]);
1549 alloc_all(current,i);
1550 }
1551 else {
1552 alloc_all(current,i); // FIXME: Keep r0
1553 current->u&=~1LL;
1554 alloc_reg(current,i,0);
1555 }
1556 }
1557 else
1558 {
1559 // TLBR/TLBWI/TLBWR/TLBP/ERET
1560 assert(opcode2[i]==0x10);
1561 alloc_all(current,i);
1562 }
e1190b87 1563 minimum_free_regs[i]=HOST_REGS;
57871462 1564}
1565
1566void cop1_alloc(struct regstat *current,int i)
1567{
1568 alloc_reg(current,i,CSREG); // Load status
1569 if(opcode2[i]<3) // MFC1/DMFC1/CFC1
1570 {
7de557a6 1571 if(rt1[i]){
1572 clear_const(current,rt1[i]);
1573 if(opcode2[i]==1) {
1574 alloc_reg64(current,i,rt1[i]); // DMFC1
1575 current->is32&=~(1LL<<rt1[i]);
1576 }else{
1577 alloc_reg(current,i,rt1[i]); // MFC1/CFC1
1578 current->is32|=1LL<<rt1[i];
1579 }
1580 dirty_reg(current,rt1[i]);
57871462 1581 }
57871462 1582 alloc_reg_temp(current,i,-1);
1583 }
1584 else if(opcode2[i]>3) // MTC1/DMTC1/CTC1
1585 {
1586 if(rs1[i]){
1587 clear_const(current,rs1[i]);
1588 if(opcode2[i]==5)
1589 alloc_reg64(current,i,rs1[i]); // DMTC1
1590 else
1591 alloc_reg(current,i,rs1[i]); // MTC1/CTC1
1592 alloc_reg_temp(current,i,-1);
1593 }
1594 else {
1595 current->u&=~1LL;
1596 alloc_reg(current,i,0);
1597 alloc_reg_temp(current,i,-1);
1598 }
1599 }
e1190b87 1600 minimum_free_regs[i]=1;
57871462 1601}
1602void fconv_alloc(struct regstat *current,int i)
1603{
1604 alloc_reg(current,i,CSREG); // Load status
1605 alloc_reg_temp(current,i,-1);
e1190b87 1606 minimum_free_regs[i]=1;
57871462 1607}
1608void float_alloc(struct regstat *current,int i)
1609{
1610 alloc_reg(current,i,CSREG); // Load status
1611 alloc_reg_temp(current,i,-1);
e1190b87 1612 minimum_free_regs[i]=1;
57871462 1613}
b9b61529 1614void c2op_alloc(struct regstat *current,int i)
1615{
1616 alloc_reg_temp(current,i,-1);
1617}
57871462 1618void fcomp_alloc(struct regstat *current,int i)
1619{
1620 alloc_reg(current,i,CSREG); // Load status
1621 alloc_reg(current,i,FSREG); // Load flags
1622 dirty_reg(current,FSREG); // Flag will be modified
1623 alloc_reg_temp(current,i,-1);
e1190b87 1624 minimum_free_regs[i]=1;
57871462 1625}
1626
1627void syscall_alloc(struct regstat *current,int i)
1628{
1629 alloc_cc(current,i);
1630 dirty_reg(current,CCREG);
1631 alloc_all(current,i);
e1190b87 1632 minimum_free_regs[i]=HOST_REGS;
57871462 1633 current->isconst=0;
1634}
1635
1636void delayslot_alloc(struct regstat *current,int i)
1637{
1638 switch(itype[i]) {
1639 case UJUMP:
1640 case CJUMP:
1641 case SJUMP:
1642 case RJUMP:
1643 case FJUMP:
1644 case SYSCALL:
7139f3c8 1645 case HLECALL:
57871462 1646 case SPAN:
1647 assem_debug("jump in the delay slot. this shouldn't happen.\n");//exit(1);
c43b5311 1648 SysPrintf("Disabled speculative precompilation\n");
57871462 1649 stop_after_jal=1;
1650 break;
1651 case IMM16:
1652 imm16_alloc(current,i);
1653 break;
1654 case LOAD:
1655 case LOADLR:
1656 load_alloc(current,i);
1657 break;
1658 case STORE:
1659 case STORELR:
1660 store_alloc(current,i);
1661 break;
1662 case ALU:
1663 alu_alloc(current,i);
1664 break;
1665 case SHIFT:
1666 shift_alloc(current,i);
1667 break;
1668 case MULTDIV:
1669 multdiv_alloc(current,i);
1670 break;
1671 case SHIFTIMM:
1672 shiftimm_alloc(current,i);
1673 break;
1674 case MOV:
1675 mov_alloc(current,i);
1676 break;
1677 case COP0:
1678 cop0_alloc(current,i);
1679 break;
1680 case COP1:
b9b61529 1681 case COP2:
57871462 1682 cop1_alloc(current,i);
1683 break;
1684 case C1LS:
1685 c1ls_alloc(current,i);
1686 break;
b9b61529 1687 case C2LS:
1688 c2ls_alloc(current,i);
1689 break;
57871462 1690 case FCONV:
1691 fconv_alloc(current,i);
1692 break;
1693 case FLOAT:
1694 float_alloc(current,i);
1695 break;
1696 case FCOMP:
1697 fcomp_alloc(current,i);
1698 break;
b9b61529 1699 case C2OP:
1700 c2op_alloc(current,i);
1701 break;
57871462 1702 }
1703}
1704
1705// Special case where a branch and delay slot span two pages in virtual memory
1706static void pagespan_alloc(struct regstat *current,int i)
1707{
1708 current->isconst=0;
1709 current->wasconst=0;
1710 regs[i].wasconst=0;
e1190b87 1711 minimum_free_regs[i]=HOST_REGS;
57871462 1712 alloc_all(current,i);
1713 alloc_cc(current,i);
1714 dirty_reg(current,CCREG);
1715 if(opcode[i]==3) // JAL
1716 {
1717 alloc_reg(current,i,31);
1718 dirty_reg(current,31);
1719 }
1720 if(opcode[i]==0&&(opcode2[i]&0x3E)==8) // JR/JALR
1721 {
1722 alloc_reg(current,i,rs1[i]);
5067f341 1723 if (rt1[i]!=0) {
1724 alloc_reg(current,i,rt1[i]);
1725 dirty_reg(current,rt1[i]);
57871462 1726 }
1727 }
1728 if((opcode[i]&0x2E)==4) // BEQ/BNE/BEQL/BNEL
1729 {
1730 if(rs1[i]) alloc_reg(current,i,rs1[i]);
1731 if(rs2[i]) alloc_reg(current,i,rs2[i]);
1732 if(!((current->is32>>rs1[i])&(current->is32>>rs2[i])&1))
1733 {
1734 if(rs1[i]) alloc_reg64(current,i,rs1[i]);
1735 if(rs2[i]) alloc_reg64(current,i,rs2[i]);
1736 }
1737 }
1738 else
1739 if((opcode[i]&0x2E)==6) // BLEZ/BGTZ/BLEZL/BGTZL
1740 {
1741 if(rs1[i]) alloc_reg(current,i,rs1[i]);
1742 if(!((current->is32>>rs1[i])&1))
1743 {
1744 if(rs1[i]) alloc_reg64(current,i,rs1[i]);
1745 }
1746 }
1747 else
1748 if(opcode[i]==0x11) // BC1
1749 {
1750 alloc_reg(current,i,FSREG);
1751 alloc_reg(current,i,CSREG);
1752 }
1753 //else ...
1754}
1755
e2b5e7aa 1756static void add_stub(int type,int addr,int retaddr,int a,int b,int c,int d,int e)
57871462 1757{
1758 stubs[stubcount][0]=type;
1759 stubs[stubcount][1]=addr;
1760 stubs[stubcount][2]=retaddr;
1761 stubs[stubcount][3]=a;
1762 stubs[stubcount][4]=b;
1763 stubs[stubcount][5]=c;
1764 stubs[stubcount][6]=d;
1765 stubs[stubcount][7]=e;
1766 stubcount++;
1767}
1768
1769// Write out a single register
1770void wb_register(signed char r,signed char regmap[],uint64_t dirty,uint64_t is32)
1771{
1772 int hr;
1773 for(hr=0;hr<HOST_REGS;hr++) {
1774 if(hr!=EXCLUDE_REG) {
1775 if((regmap[hr]&63)==r) {
1776 if((dirty>>hr)&1) {
1777 if(regmap[hr]<64) {
1778 emit_storereg(r,hr);
57871462 1779 }else{
1780 emit_storereg(r|64,hr);
1781 }
1782 }
1783 }
1784 }
1785 }
1786}
1787
1788int mchecksum()
1789{
1790 //if(!tracedebug) return 0;
1791 int i;
1792 int sum=0;
1793 for(i=0;i<2097152;i++) {
1794 unsigned int temp=sum;
1795 sum<<=1;
1796 sum|=(~temp)>>31;
1797 sum^=((u_int *)rdram)[i];
1798 }
1799 return sum;
1800}
1801int rchecksum()
1802{
1803 int i;
1804 int sum=0;
1805 for(i=0;i<64;i++)
1806 sum^=((u_int *)reg)[i];
1807 return sum;
1808}
57871462 1809void rlist()
1810{
1811 int i;
1812 printf("TRACE: ");
1813 for(i=0;i<32;i++)
1814 printf("r%d:%8x%8x ",i,((int *)(reg+i))[1],((int *)(reg+i))[0]);
1815 printf("\n");
57871462 1816}
1817
1818void enabletrace()
1819{
1820 tracedebug=1;
1821}
1822
1823void memdebug(int i)
1824{
1825 //printf("TRACE: count=%d next=%d (checksum %x) lo=%8x%8x\n",Count,next_interupt,mchecksum(),(int)(reg[LOREG]>>32),(int)reg[LOREG]);
1826 //printf("TRACE: count=%d next=%d (rchecksum %x)\n",Count,next_interupt,rchecksum());
1827 //rlist();
1828 //if(tracedebug) {
1829 //if(Count>=-2084597794) {
1830 if((signed int)Count>=-2084597794&&(signed int)Count<0) {
1831 //if(0) {
1832 printf("TRACE: count=%d next=%d (checksum %x)\n",Count,next_interupt,mchecksum());
1833 //printf("TRACE: count=%d next=%d (checksum %x) Status=%x\n",Count,next_interupt,mchecksum(),Status);
1834 //printf("TRACE: count=%d next=%d (checksum %x) hi=%8x%8x\n",Count,next_interupt,mchecksum(),(int)(reg[HIREG]>>32),(int)reg[HIREG]);
1835 rlist();
1836 #ifdef __i386__
1837 printf("TRACE: %x\n",(&i)[-1]);
1838 #endif
1839 #ifdef __arm__
1840 int j;
1841 printf("TRACE: %x \n",(&j)[10]);
1842 printf("TRACE: %x %x %x %x %x %x %x %x %x %x %x %x %x %x %x %x %x %x %x %x\n",(&j)[1],(&j)[2],(&j)[3],(&j)[4],(&j)[5],(&j)[6],(&j)[7],(&j)[8],(&j)[9],(&j)[10],(&j)[11],(&j)[12],(&j)[13],(&j)[14],(&j)[15],(&j)[16],(&j)[17],(&j)[18],(&j)[19],(&j)[20]);
1843 #endif
1844 //fflush(stdout);
1845 }
1846 //printf("TRACE: %x\n",(&i)[-1]);
1847}
1848
57871462 1849void alu_assemble(int i,struct regstat *i_regs)
1850{
1851 if(opcode2[i]>=0x20&&opcode2[i]<=0x23) { // ADD/ADDU/SUB/SUBU
1852 if(rt1[i]) {
1853 signed char s1,s2,t;
1854 t=get_reg(i_regs->regmap,rt1[i]);
1855 if(t>=0) {
1856 s1=get_reg(i_regs->regmap,rs1[i]);
1857 s2=get_reg(i_regs->regmap,rs2[i]);
1858 if(rs1[i]&&rs2[i]) {
1859 assert(s1>=0);
1860 assert(s2>=0);
1861 if(opcode2[i]&2) emit_sub(s1,s2,t);
1862 else emit_add(s1,s2,t);
1863 }
1864 else if(rs1[i]) {
1865 if(s1>=0) emit_mov(s1,t);
1866 else emit_loadreg(rs1[i],t);
1867 }
1868 else if(rs2[i]) {
1869 if(s2>=0) {
1870 if(opcode2[i]&2) emit_neg(s2,t);
1871 else emit_mov(s2,t);
1872 }
1873 else {
1874 emit_loadreg(rs2[i],t);
1875 if(opcode2[i]&2) emit_neg(t,t);
1876 }
1877 }
1878 else emit_zeroreg(t);
1879 }
1880 }
1881 }
1882 if(opcode2[i]>=0x2c&&opcode2[i]<=0x2f) { // DADD/DADDU/DSUB/DSUBU
1883 if(rt1[i]) {
1884 signed char s1l,s2l,s1h,s2h,tl,th;
1885 tl=get_reg(i_regs->regmap,rt1[i]);
1886 th=get_reg(i_regs->regmap,rt1[i]|64);
1887 if(tl>=0) {
1888 s1l=get_reg(i_regs->regmap,rs1[i]);
1889 s2l=get_reg(i_regs->regmap,rs2[i]);
1890 s1h=get_reg(i_regs->regmap,rs1[i]|64);
1891 s2h=get_reg(i_regs->regmap,rs2[i]|64);
1892 if(rs1[i]&&rs2[i]) {
1893 assert(s1l>=0);
1894 assert(s2l>=0);
1895 if(opcode2[i]&2) emit_subs(s1l,s2l,tl);
1896 else emit_adds(s1l,s2l,tl);
1897 if(th>=0) {
1898 #ifdef INVERTED_CARRY
1899 if(opcode2[i]&2) {if(s1h!=th) emit_mov(s1h,th);emit_sbb(th,s2h);}
1900 #else
1901 if(opcode2[i]&2) emit_sbc(s1h,s2h,th);
1902 #endif
1903 else emit_add(s1h,s2h,th);
1904 }
1905 }
1906 else if(rs1[i]) {
1907 if(s1l>=0) emit_mov(s1l,tl);
1908 else emit_loadreg(rs1[i],tl);
1909 if(th>=0) {
1910 if(s1h>=0) emit_mov(s1h,th);
1911 else emit_loadreg(rs1[i]|64,th);
1912 }
1913 }
1914 else if(rs2[i]) {
1915 if(s2l>=0) {
1916 if(opcode2[i]&2) emit_negs(s2l,tl);
1917 else emit_mov(s2l,tl);
1918 }
1919 else {
1920 emit_loadreg(rs2[i],tl);
1921 if(opcode2[i]&2) emit_negs(tl,tl);
1922 }
1923 if(th>=0) {
1924 #ifdef INVERTED_CARRY
1925 if(s2h>=0) emit_mov(s2h,th);
1926 else emit_loadreg(rs2[i]|64,th);
1927 if(opcode2[i]&2) {
1928 emit_adcimm(-1,th); // x86 has inverted carry flag
1929 emit_not(th,th);
1930 }
1931 #else
1932 if(opcode2[i]&2) {
1933 if(s2h>=0) emit_rscimm(s2h,0,th);
1934 else {
1935 emit_loadreg(rs2[i]|64,th);
1936 emit_rscimm(th,0,th);
1937 }
1938 }else{
1939 if(s2h>=0) emit_mov(s2h,th);
1940 else emit_loadreg(rs2[i]|64,th);
1941 }
1942 #endif
1943 }
1944 }
1945 else {
1946 emit_zeroreg(tl);
1947 if(th>=0) emit_zeroreg(th);
1948 }
1949 }
1950 }
1951 }
1952 if(opcode2[i]==0x2a||opcode2[i]==0x2b) { // SLT/SLTU
1953 if(rt1[i]) {
1954 signed char s1l,s1h,s2l,s2h,t;
1955 if(!((i_regs->was32>>rs1[i])&(i_regs->was32>>rs2[i])&1))
1956 {
1957 t=get_reg(i_regs->regmap,rt1[i]);
1958 //assert(t>=0);
1959 if(t>=0) {
1960 s1l=get_reg(i_regs->regmap,rs1[i]);
1961 s1h=get_reg(i_regs->regmap,rs1[i]|64);
1962 s2l=get_reg(i_regs->regmap,rs2[i]);
1963 s2h=get_reg(i_regs->regmap,rs2[i]|64);
1964 if(rs2[i]==0) // rx<r0
1965 {
1966 assert(s1h>=0);
1967 if(opcode2[i]==0x2a) // SLT
1968 emit_shrimm(s1h,31,t);
1969 else // SLTU (unsigned can not be less than zero)
1970 emit_zeroreg(t);
1971 }
1972 else if(rs1[i]==0) // r0<rx
1973 {
1974 assert(s2h>=0);
1975 if(opcode2[i]==0x2a) // SLT
1976 emit_set_gz64_32(s2h,s2l,t);
1977 else // SLTU (set if not zero)
1978 emit_set_nz64_32(s2h,s2l,t);
1979 }
1980 else {
1981 assert(s1l>=0);assert(s1h>=0);
1982 assert(s2l>=0);assert(s2h>=0);
1983 if(opcode2[i]==0x2a) // SLT
1984 emit_set_if_less64_32(s1h,s1l,s2h,s2l,t);
1985 else // SLTU
1986 emit_set_if_carry64_32(s1h,s1l,s2h,s2l,t);
1987 }
1988 }
1989 } else {
1990 t=get_reg(i_regs->regmap,rt1[i]);
1991 //assert(t>=0);
1992 if(t>=0) {
1993 s1l=get_reg(i_regs->regmap,rs1[i]);
1994 s2l=get_reg(i_regs->regmap,rs2[i]);
1995 if(rs2[i]==0) // rx<r0
1996 {
1997 assert(s1l>=0);
1998 if(opcode2[i]==0x2a) // SLT
1999 emit_shrimm(s1l,31,t);
2000 else // SLTU (unsigned can not be less than zero)
2001 emit_zeroreg(t);
2002 }
2003 else if(rs1[i]==0) // r0<rx
2004 {
2005 assert(s2l>=0);
2006 if(opcode2[i]==0x2a) // SLT
2007 emit_set_gz32(s2l,t);
2008 else // SLTU (set if not zero)
2009 emit_set_nz32(s2l,t);
2010 }
2011 else{
2012 assert(s1l>=0);assert(s2l>=0);
2013 if(opcode2[i]==0x2a) // SLT
2014 emit_set_if_less32(s1l,s2l,t);
2015 else // SLTU
2016 emit_set_if_carry32(s1l,s2l,t);
2017 }
2018 }
2019 }
2020 }
2021 }
2022 if(opcode2[i]>=0x24&&opcode2[i]<=0x27) { // AND/OR/XOR/NOR
2023 if(rt1[i]) {
2024 signed char s1l,s1h,s2l,s2h,th,tl;
2025 tl=get_reg(i_regs->regmap,rt1[i]);
2026 th=get_reg(i_regs->regmap,rt1[i]|64);
2027 if(!((i_regs->was32>>rs1[i])&(i_regs->was32>>rs2[i])&1)&&th>=0)
2028 {
2029 assert(tl>=0);
2030 if(tl>=0) {
2031 s1l=get_reg(i_regs->regmap,rs1[i]);
2032 s1h=get_reg(i_regs->regmap,rs1[i]|64);
2033 s2l=get_reg(i_regs->regmap,rs2[i]);
2034 s2h=get_reg(i_regs->regmap,rs2[i]|64);
2035 if(rs1[i]&&rs2[i]) {
2036 assert(s1l>=0);assert(s1h>=0);
2037 assert(s2l>=0);assert(s2h>=0);
2038 if(opcode2[i]==0x24) { // AND
2039 emit_and(s1l,s2l,tl);
2040 emit_and(s1h,s2h,th);
2041 } else
2042 if(opcode2[i]==0x25) { // OR
2043 emit_or(s1l,s2l,tl);
2044 emit_or(s1h,s2h,th);
2045 } else
2046 if(opcode2[i]==0x26) { // XOR
2047 emit_xor(s1l,s2l,tl);
2048 emit_xor(s1h,s2h,th);
2049 } else
2050 if(opcode2[i]==0x27) { // NOR
2051 emit_or(s1l,s2l,tl);
2052 emit_or(s1h,s2h,th);
2053 emit_not(tl,tl);
2054 emit_not(th,th);
2055 }
2056 }
2057 else
2058 {
2059 if(opcode2[i]==0x24) { // AND
2060 emit_zeroreg(tl);
2061 emit_zeroreg(th);
2062 } else
2063 if(opcode2[i]==0x25||opcode2[i]==0x26) { // OR/XOR
2064 if(rs1[i]){
2065 if(s1l>=0) emit_mov(s1l,tl);
2066 else emit_loadreg(rs1[i],tl);
2067 if(s1h>=0) emit_mov(s1h,th);
2068 else emit_loadreg(rs1[i]|64,th);
2069 }
2070 else
2071 if(rs2[i]){
2072 if(s2l>=0) emit_mov(s2l,tl);
2073 else emit_loadreg(rs2[i],tl);
2074 if(s2h>=0) emit_mov(s2h,th);
2075 else emit_loadreg(rs2[i]|64,th);
2076 }
2077 else{
2078 emit_zeroreg(tl);
2079 emit_zeroreg(th);
2080 }
2081 } else
2082 if(opcode2[i]==0x27) { // NOR
2083 if(rs1[i]){
2084 if(s1l>=0) emit_not(s1l,tl);
2085 else{
2086 emit_loadreg(rs1[i],tl);
2087 emit_not(tl,tl);
2088 }
2089 if(s1h>=0) emit_not(s1h,th);
2090 else{
2091 emit_loadreg(rs1[i]|64,th);
2092 emit_not(th,th);
2093 }
2094 }
2095 else
2096 if(rs2[i]){
2097 if(s2l>=0) emit_not(s2l,tl);
2098 else{
2099 emit_loadreg(rs2[i],tl);
2100 emit_not(tl,tl);
2101 }
2102 if(s2h>=0) emit_not(s2h,th);
2103 else{
2104 emit_loadreg(rs2[i]|64,th);
2105 emit_not(th,th);
2106 }
2107 }
2108 else {
2109 emit_movimm(-1,tl);
2110 emit_movimm(-1,th);
2111 }
2112 }
2113 }
2114 }
2115 }
2116 else
2117 {
2118 // 32 bit
2119 if(tl>=0) {
2120 s1l=get_reg(i_regs->regmap,rs1[i]);
2121 s2l=get_reg(i_regs->regmap,rs2[i]);
2122 if(rs1[i]&&rs2[i]) {
2123 assert(s1l>=0);
2124 assert(s2l>=0);
2125 if(opcode2[i]==0x24) { // AND
2126 emit_and(s1l,s2l,tl);
2127 } else
2128 if(opcode2[i]==0x25) { // OR
2129 emit_or(s1l,s2l,tl);
2130 } else
2131 if(opcode2[i]==0x26) { // XOR
2132 emit_xor(s1l,s2l,tl);
2133 } else
2134 if(opcode2[i]==0x27) { // NOR
2135 emit_or(s1l,s2l,tl);
2136 emit_not(tl,tl);
2137 }
2138 }
2139 else
2140 {
2141 if(opcode2[i]==0x24) { // AND
2142 emit_zeroreg(tl);
2143 } else
2144 if(opcode2[i]==0x25||opcode2[i]==0x26) { // OR/XOR
2145 if(rs1[i]){
2146 if(s1l>=0) emit_mov(s1l,tl);
2147 else emit_loadreg(rs1[i],tl); // CHECK: regmap_entry?
2148 }
2149 else
2150 if(rs2[i]){
2151 if(s2l>=0) emit_mov(s2l,tl);
2152 else emit_loadreg(rs2[i],tl); // CHECK: regmap_entry?
2153 }
2154 else emit_zeroreg(tl);
2155 } else
2156 if(opcode2[i]==0x27) { // NOR
2157 if(rs1[i]){
2158 if(s1l>=0) emit_not(s1l,tl);
2159 else {
2160 emit_loadreg(rs1[i],tl);
2161 emit_not(tl,tl);
2162 }
2163 }
2164 else
2165 if(rs2[i]){
2166 if(s2l>=0) emit_not(s2l,tl);
2167 else {
2168 emit_loadreg(rs2[i],tl);
2169 emit_not(tl,tl);
2170 }
2171 }
2172 else emit_movimm(-1,tl);
2173 }
2174 }
2175 }
2176 }
2177 }
2178 }
2179}
2180
2181void imm16_assemble(int i,struct regstat *i_regs)
2182{
2183 if (opcode[i]==0x0f) { // LUI
2184 if(rt1[i]) {
2185 signed char t;
2186 t=get_reg(i_regs->regmap,rt1[i]);
2187 //assert(t>=0);
2188 if(t>=0) {
2189 if(!((i_regs->isconst>>t)&1))
2190 emit_movimm(imm[i]<<16,t);
2191 }
2192 }
2193 }
2194 if(opcode[i]==0x08||opcode[i]==0x09) { // ADDI/ADDIU
2195 if(rt1[i]) {
2196 signed char s,t;
2197 t=get_reg(i_regs->regmap,rt1[i]);
2198 s=get_reg(i_regs->regmap,rs1[i]);
2199 if(rs1[i]) {
2200 //assert(t>=0);
2201 //assert(s>=0);
2202 if(t>=0) {
2203 if(!((i_regs->isconst>>t)&1)) {
2204 if(s<0) {
2205 if(i_regs->regmap_entry[t]!=rs1[i]) emit_loadreg(rs1[i],t);
2206 emit_addimm(t,imm[i],t);
2207 }else{
2208 if(!((i_regs->wasconst>>s)&1))
2209 emit_addimm(s,imm[i],t);
2210 else
2211 emit_movimm(constmap[i][s]+imm[i],t);
2212 }
2213 }
2214 }
2215 } else {
2216 if(t>=0) {
2217 if(!((i_regs->isconst>>t)&1))
2218 emit_movimm(imm[i],t);
2219 }
2220 }
2221 }
2222 }
2223 if(opcode[i]==0x18||opcode[i]==0x19) { // DADDI/DADDIU
2224 if(rt1[i]) {
2225 signed char sh,sl,th,tl;
2226 th=get_reg(i_regs->regmap,rt1[i]|64);
2227 tl=get_reg(i_regs->regmap,rt1[i]);
2228 sh=get_reg(i_regs->regmap,rs1[i]|64);
2229 sl=get_reg(i_regs->regmap,rs1[i]);
2230 if(tl>=0) {
2231 if(rs1[i]) {
2232 assert(sh>=0);
2233 assert(sl>=0);
2234 if(th>=0) {
2235 emit_addimm64_32(sh,sl,imm[i],th,tl);
2236 }
2237 else {
2238 emit_addimm(sl,imm[i],tl);
2239 }
2240 } else {
2241 emit_movimm(imm[i],tl);
2242 if(th>=0) emit_movimm(((signed int)imm[i])>>31,th);
2243 }
2244 }
2245 }
2246 }
2247 else if(opcode[i]==0x0a||opcode[i]==0x0b) { // SLTI/SLTIU
2248 if(rt1[i]) {
2249 //assert(rs1[i]!=0); // r0 might be valid, but it's probably a bug
2250 signed char sh,sl,t;
2251 t=get_reg(i_regs->regmap,rt1[i]);
2252 sh=get_reg(i_regs->regmap,rs1[i]|64);
2253 sl=get_reg(i_regs->regmap,rs1[i]);
2254 //assert(t>=0);
2255 if(t>=0) {
2256 if(rs1[i]>0) {
2257 if(sh<0) assert((i_regs->was32>>rs1[i])&1);
2258 if(sh<0||((i_regs->was32>>rs1[i])&1)) {
2259 if(opcode[i]==0x0a) { // SLTI
2260 if(sl<0) {
2261 if(i_regs->regmap_entry[t]!=rs1[i]) emit_loadreg(rs1[i],t);
2262 emit_slti32(t,imm[i],t);
2263 }else{
2264 emit_slti32(sl,imm[i],t);
2265 }
2266 }
2267 else { // SLTIU
2268 if(sl<0) {
2269 if(i_regs->regmap_entry[t]!=rs1[i]) emit_loadreg(rs1[i],t);
2270 emit_sltiu32(t,imm[i],t);
2271 }else{
2272 emit_sltiu32(sl,imm[i],t);
2273 }
2274 }
2275 }else{ // 64-bit
2276 assert(sl>=0);
2277 if(opcode[i]==0x0a) // SLTI
2278 emit_slti64_32(sh,sl,imm[i],t);
2279 else // SLTIU
2280 emit_sltiu64_32(sh,sl,imm[i],t);
2281 }
2282 }else{
2283 // SLTI(U) with r0 is just stupid,
2284 // nonetheless examples can be found
2285 if(opcode[i]==0x0a) // SLTI
2286 if(0<imm[i]) emit_movimm(1,t);
2287 else emit_zeroreg(t);
2288 else // SLTIU
2289 {
2290 if(imm[i]) emit_movimm(1,t);
2291 else emit_zeroreg(t);
2292 }
2293 }
2294 }
2295 }
2296 }
2297 else if(opcode[i]>=0x0c&&opcode[i]<=0x0e) { // ANDI/ORI/XORI
2298 if(rt1[i]) {
2299 signed char sh,sl,th,tl;
2300 th=get_reg(i_regs->regmap,rt1[i]|64);
2301 tl=get_reg(i_regs->regmap,rt1[i]);
2302 sh=get_reg(i_regs->regmap,rs1[i]|64);
2303 sl=get_reg(i_regs->regmap,rs1[i]);
2304 if(tl>=0 && !((i_regs->isconst>>tl)&1)) {
2305 if(opcode[i]==0x0c) //ANDI
2306 {
2307 if(rs1[i]) {
2308 if(sl<0) {
2309 if(i_regs->regmap_entry[tl]!=rs1[i]) emit_loadreg(rs1[i],tl);
2310 emit_andimm(tl,imm[i],tl);
2311 }else{
2312 if(!((i_regs->wasconst>>sl)&1))
2313 emit_andimm(sl,imm[i],tl);
2314 else
2315 emit_movimm(constmap[i][sl]&imm[i],tl);
2316 }
2317 }
2318 else
2319 emit_zeroreg(tl);
2320 if(th>=0) emit_zeroreg(th);
2321 }
2322 else
2323 {
2324 if(rs1[i]) {
2325 if(sl<0) {
2326 if(i_regs->regmap_entry[tl]!=rs1[i]) emit_loadreg(rs1[i],tl);
2327 }
2328 if(th>=0) {
2329 if(sh<0) {
2330 emit_loadreg(rs1[i]|64,th);
2331 }else{
2332 emit_mov(sh,th);
2333 }
2334 }
581335b0 2335 if(opcode[i]==0x0d) { // ORI
2336 if(sl<0) {
2337 emit_orimm(tl,imm[i],tl);
2338 }else{
2339 if(!((i_regs->wasconst>>sl)&1))
2340 emit_orimm(sl,imm[i],tl);
2341 else
2342 emit_movimm(constmap[i][sl]|imm[i],tl);
2343 }
57871462 2344 }
581335b0 2345 if(opcode[i]==0x0e) { // XORI
2346 if(sl<0) {
2347 emit_xorimm(tl,imm[i],tl);
2348 }else{
2349 if(!((i_regs->wasconst>>sl)&1))
2350 emit_xorimm(sl,imm[i],tl);
2351 else
2352 emit_movimm(constmap[i][sl]^imm[i],tl);
2353 }
57871462 2354 }
2355 }
2356 else {
2357 emit_movimm(imm[i],tl);
2358 if(th>=0) emit_zeroreg(th);
2359 }
2360 }
2361 }
2362 }
2363 }
2364}
2365
2366void shiftimm_assemble(int i,struct regstat *i_regs)
2367{
2368 if(opcode2[i]<=0x3) // SLL/SRL/SRA
2369 {
2370 if(rt1[i]) {
2371 signed char s,t;
2372 t=get_reg(i_regs->regmap,rt1[i]);
2373 s=get_reg(i_regs->regmap,rs1[i]);
2374 //assert(t>=0);
dc49e339 2375 if(t>=0&&!((i_regs->isconst>>t)&1)){
57871462 2376 if(rs1[i]==0)
2377 {
2378 emit_zeroreg(t);
2379 }
2380 else
2381 {
2382 if(s<0&&i_regs->regmap_entry[t]!=rs1[i]) emit_loadreg(rs1[i],t);
2383 if(imm[i]) {
2384 if(opcode2[i]==0) // SLL
2385 {
2386 emit_shlimm(s<0?t:s,imm[i],t);
2387 }
2388 if(opcode2[i]==2) // SRL
2389 {
2390 emit_shrimm(s<0?t:s,imm[i],t);
2391 }
2392 if(opcode2[i]==3) // SRA
2393 {
2394 emit_sarimm(s<0?t:s,imm[i],t);
2395 }
2396 }else{
2397 // Shift by zero
2398 if(s>=0 && s!=t) emit_mov(s,t);
2399 }
2400 }
2401 }
2402 //emit_storereg(rt1[i],t); //DEBUG
2403 }
2404 }
2405 if(opcode2[i]>=0x38&&opcode2[i]<=0x3b) // DSLL/DSRL/DSRA
2406 {
2407 if(rt1[i]) {
2408 signed char sh,sl,th,tl;
2409 th=get_reg(i_regs->regmap,rt1[i]|64);
2410 tl=get_reg(i_regs->regmap,rt1[i]);
2411 sh=get_reg(i_regs->regmap,rs1[i]|64);
2412 sl=get_reg(i_regs->regmap,rs1[i]);
2413 if(tl>=0) {
2414 if(rs1[i]==0)
2415 {
2416 emit_zeroreg(tl);
2417 if(th>=0) emit_zeroreg(th);
2418 }
2419 else
2420 {
2421 assert(sl>=0);
2422 assert(sh>=0);
2423 if(imm[i]) {
2424 if(opcode2[i]==0x38) // DSLL
2425 {
2426 if(th>=0) emit_shldimm(sh,sl,imm[i],th);
2427 emit_shlimm(sl,imm[i],tl);
2428 }
2429 if(opcode2[i]==0x3a) // DSRL
2430 {
2431 emit_shrdimm(sl,sh,imm[i],tl);
2432 if(th>=0) emit_shrimm(sh,imm[i],th);
2433 }
2434 if(opcode2[i]==0x3b) // DSRA
2435 {
2436 emit_shrdimm(sl,sh,imm[i],tl);
2437 if(th>=0) emit_sarimm(sh,imm[i],th);
2438 }
2439 }else{
2440 // Shift by zero
2441 if(sl!=tl) emit_mov(sl,tl);
2442 if(th>=0&&sh!=th) emit_mov(sh,th);
2443 }
2444 }
2445 }
2446 }
2447 }
2448 if(opcode2[i]==0x3c) // DSLL32
2449 {
2450 if(rt1[i]) {
2451 signed char sl,tl,th;
2452 tl=get_reg(i_regs->regmap,rt1[i]);
2453 th=get_reg(i_regs->regmap,rt1[i]|64);
2454 sl=get_reg(i_regs->regmap,rs1[i]);
2455 if(th>=0||tl>=0){
2456 assert(tl>=0);
2457 assert(th>=0);
2458 assert(sl>=0);
2459 emit_mov(sl,th);
2460 emit_zeroreg(tl);
2461 if(imm[i]>32)
2462 {
2463 emit_shlimm(th,imm[i]&31,th);
2464 }
2465 }
2466 }
2467 }
2468 if(opcode2[i]==0x3e) // DSRL32
2469 {
2470 if(rt1[i]) {
2471 signed char sh,tl,th;
2472 tl=get_reg(i_regs->regmap,rt1[i]);
2473 th=get_reg(i_regs->regmap,rt1[i]|64);
2474 sh=get_reg(i_regs->regmap,rs1[i]|64);
2475 if(tl>=0){
2476 assert(sh>=0);
2477 emit_mov(sh,tl);
2478 if(th>=0) emit_zeroreg(th);
2479 if(imm[i]>32)
2480 {
2481 emit_shrimm(tl,imm[i]&31,tl);
2482 }
2483 }
2484 }
2485 }
2486 if(opcode2[i]==0x3f) // DSRA32
2487 {
2488 if(rt1[i]) {
2489 signed char sh,tl;
2490 tl=get_reg(i_regs->regmap,rt1[i]);
2491 sh=get_reg(i_regs->regmap,rs1[i]|64);
2492 if(tl>=0){
2493 assert(sh>=0);
2494 emit_mov(sh,tl);
2495 if(imm[i]>32)
2496 {
2497 emit_sarimm(tl,imm[i]&31,tl);
2498 }
2499 }
2500 }
2501 }
2502}
2503
2504#ifndef shift_assemble
2505void shift_assemble(int i,struct regstat *i_regs)
2506{
2507 printf("Need shift_assemble for this architecture.\n");
2508 exit(1);
2509}
2510#endif
2511
2512void load_assemble(int i,struct regstat *i_regs)
2513{
2514 int s,th,tl,addr,map=-1;
2515 int offset;
2516 int jaddr=0;
5bf843dc 2517 int memtarget=0,c=0;
b1570849 2518 int fastload_reg_override=0;
57871462 2519 u_int hr,reglist=0;
2520 th=get_reg(i_regs->regmap,rt1[i]|64);
2521 tl=get_reg(i_regs->regmap,rt1[i]);
2522 s=get_reg(i_regs->regmap,rs1[i]);
2523 offset=imm[i];
2524 for(hr=0;hr<HOST_REGS;hr++) {
2525 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
2526 }
2527 if(i_regs->regmap[HOST_CCREG]==CCREG) reglist&=~(1<<HOST_CCREG);
2528 if(s>=0) {
2529 c=(i_regs->wasconst>>s)&1;
af4ee1fe 2530 if (c) {
2531 memtarget=((signed int)(constmap[i][s]+offset))<(signed int)0x80000000+RAM_SIZE;
af4ee1fe 2532 }
57871462 2533 }
57871462 2534 //printf("load_assemble: c=%d\n",c);
2535 //if(c) printf("load_assemble: const=%x\n",(int)constmap[i][s]+offset);
2536 // FIXME: Even if the load is a NOP, we should check for pagefaults...
581335b0 2537 if((tl<0&&(!c||(((u_int)constmap[i][s]+offset)>>16)==0x1f80))
f18c0f46 2538 ||rt1[i]==0) {
5bf843dc 2539 // could be FIFO, must perform the read
f18c0f46 2540 // ||dummy read
5bf843dc 2541 assem_debug("(forced read)\n");
2542 tl=get_reg(i_regs->regmap,-1);
2543 assert(tl>=0);
5bf843dc 2544 }
2545 if(offset||s<0||c) addr=tl;
2546 else addr=s;
535d208a 2547 //if(tl<0) tl=get_reg(i_regs->regmap,-1);
2548 if(tl>=0) {
2549 //printf("load_assemble: c=%d\n",c);
2550 //if(c) printf("load_assemble: const=%x\n",(int)constmap[i][s]+offset);
2551 assert(tl>=0); // Even if the load is a NOP, we must check for pagefaults and I/O
2552 reglist&=~(1<<tl);
2553 if(th>=0) reglist&=~(1<<th);
1edfcc68 2554 if(!c) {
2555 #ifdef RAM_OFFSET
2556 map=get_reg(i_regs->regmap,ROREG);
2557 if(map<0) emit_loadreg(ROREG,map=HOST_TEMPREG);
2558 #endif
2559 #ifdef R29_HACK
2560 // Strmnnrmn's speed hack
2561 if(rs1[i]!=29||start<0x80001000||start>=0x80000000+RAM_SIZE)
2562 #endif
2563 {
2564 jaddr=emit_fastpath_cmp_jump(i,addr,&fastload_reg_override);
535d208a 2565 }
1edfcc68 2566 }
2567 else if(ram_offset&&memtarget) {
2568 emit_addimm(addr,ram_offset,HOST_TEMPREG);
2569 fastload_reg_override=HOST_TEMPREG;
535d208a 2570 }
2571 int dummy=(rt1[i]==0)||(tl!=get_reg(i_regs->regmap,rt1[i])); // ignore loads to r0 and unneeded reg
2572 if (opcode[i]==0x20) { // LB
2573 if(!c||memtarget) {
2574 if(!dummy) {
57871462 2575 #ifdef HOST_IMM_ADDR32
2576 if(c)
2577 emit_movsbl_tlb((constmap[i][s]+offset)^3,map,tl);
2578 else
2579 #endif
2580 {
2581 //emit_xorimm(addr,3,tl);
57871462 2582 //emit_movsbl_indexed((int)rdram-0x80000000,tl,tl);
535d208a 2583 int x=0,a=tl;
2002a1db 2584#ifdef BIG_ENDIAN_MIPS
57871462 2585 if(!c) emit_xorimm(addr,3,tl);
2586 else x=((constmap[i][s]+offset)^3)-(constmap[i][s]+offset);
2002a1db 2587#else
535d208a 2588 if(!c) a=addr;
dadf55f2 2589#endif
b1570849 2590 if(fastload_reg_override) a=fastload_reg_override;
2591
535d208a 2592 emit_movsbl_indexed_tlb(x,a,map,tl);
57871462 2593 }
57871462 2594 }
535d208a 2595 if(jaddr)
2596 add_stub(LOADB_STUB,jaddr,(int)out,i,addr,(int)i_regs,ccadj[i],reglist);
57871462 2597 }
535d208a 2598 else
2599 inline_readstub(LOADB_STUB,i,constmap[i][s]+offset,i_regs->regmap,rt1[i],ccadj[i],reglist);
2600 }
2601 if (opcode[i]==0x21) { // LH
2602 if(!c||memtarget) {
2603 if(!dummy) {
57871462 2604 #ifdef HOST_IMM_ADDR32
2605 if(c)
2606 emit_movswl_tlb((constmap[i][s]+offset)^2,map,tl);
2607 else
2608 #endif
2609 {
535d208a 2610 int x=0,a=tl;
2002a1db 2611#ifdef BIG_ENDIAN_MIPS
57871462 2612 if(!c) emit_xorimm(addr,2,tl);
2613 else x=((constmap[i][s]+offset)^2)-(constmap[i][s]+offset);
2002a1db 2614#else
535d208a 2615 if(!c) a=addr;
dadf55f2 2616#endif
b1570849 2617 if(fastload_reg_override) a=fastload_reg_override;
57871462 2618 //#ifdef
2619 //emit_movswl_indexed_tlb(x,tl,map,tl);
2620 //else
2621 if(map>=0) {
535d208a 2622 emit_movswl_indexed(x,a,tl);
2623 }else{
a327ad27 2624 #if 1 //def RAM_OFFSET
535d208a 2625 emit_movswl_indexed(x,a,tl);
2626 #else
2627 emit_movswl_indexed((int)rdram-0x80000000+x,a,tl);
2628 #endif
2629 }
57871462 2630 }
57871462 2631 }
535d208a 2632 if(jaddr)
2633 add_stub(LOADH_STUB,jaddr,(int)out,i,addr,(int)i_regs,ccadj[i],reglist);
57871462 2634 }
535d208a 2635 else
2636 inline_readstub(LOADH_STUB,i,constmap[i][s]+offset,i_regs->regmap,rt1[i],ccadj[i],reglist);
2637 }
2638 if (opcode[i]==0x23) { // LW
2639 if(!c||memtarget) {
2640 if(!dummy) {
dadf55f2 2641 int a=addr;
b1570849 2642 if(fastload_reg_override) a=fastload_reg_override;
57871462 2643 //emit_readword_indexed((int)rdram-0x80000000,addr,tl);
2644 #ifdef HOST_IMM_ADDR32
2645 if(c)
2646 emit_readword_tlb(constmap[i][s]+offset,map,tl);
2647 else
2648 #endif
dadf55f2 2649 emit_readword_indexed_tlb(0,a,map,tl);
57871462 2650 }
535d208a 2651 if(jaddr)
2652 add_stub(LOADW_STUB,jaddr,(int)out,i,addr,(int)i_regs,ccadj[i],reglist);
57871462 2653 }
535d208a 2654 else
2655 inline_readstub(LOADW_STUB,i,constmap[i][s]+offset,i_regs->regmap,rt1[i],ccadj[i],reglist);
2656 }
2657 if (opcode[i]==0x24) { // LBU
2658 if(!c||memtarget) {
2659 if(!dummy) {
57871462 2660 #ifdef HOST_IMM_ADDR32
2661 if(c)
2662 emit_movzbl_tlb((constmap[i][s]+offset)^3,map,tl);
2663 else
2664 #endif
2665 {
2666 //emit_xorimm(addr,3,tl);
57871462 2667 //emit_movzbl_indexed((int)rdram-0x80000000,tl,tl);
535d208a 2668 int x=0,a=tl;
2002a1db 2669#ifdef BIG_ENDIAN_MIPS
57871462 2670 if(!c) emit_xorimm(addr,3,tl);
2671 else x=((constmap[i][s]+offset)^3)-(constmap[i][s]+offset);
2002a1db 2672#else
535d208a 2673 if(!c) a=addr;
dadf55f2 2674#endif
b1570849 2675 if(fastload_reg_override) a=fastload_reg_override;
2676
535d208a 2677 emit_movzbl_indexed_tlb(x,a,map,tl);
57871462 2678 }
57871462 2679 }
535d208a 2680 if(jaddr)
2681 add_stub(LOADBU_STUB,jaddr,(int)out,i,addr,(int)i_regs,ccadj[i],reglist);
57871462 2682 }
535d208a 2683 else
2684 inline_readstub(LOADBU_STUB,i,constmap[i][s]+offset,i_regs->regmap,rt1[i],ccadj[i],reglist);
2685 }
2686 if (opcode[i]==0x25) { // LHU
2687 if(!c||memtarget) {
2688 if(!dummy) {
57871462 2689 #ifdef HOST_IMM_ADDR32
2690 if(c)
2691 emit_movzwl_tlb((constmap[i][s]+offset)^2,map,tl);
2692 else
2693 #endif
2694 {
535d208a 2695 int x=0,a=tl;
2002a1db 2696#ifdef BIG_ENDIAN_MIPS
57871462 2697 if(!c) emit_xorimm(addr,2,tl);
2698 else x=((constmap[i][s]+offset)^2)-(constmap[i][s]+offset);
2002a1db 2699#else
535d208a 2700 if(!c) a=addr;
dadf55f2 2701#endif
b1570849 2702 if(fastload_reg_override) a=fastload_reg_override;
57871462 2703 //#ifdef
2704 //emit_movzwl_indexed_tlb(x,tl,map,tl);
2705 //#else
2706 if(map>=0) {
535d208a 2707 emit_movzwl_indexed(x,a,tl);
2708 }else{
a327ad27 2709 #if 1 //def RAM_OFFSET
535d208a 2710 emit_movzwl_indexed(x,a,tl);
2711 #else
2712 emit_movzwl_indexed((int)rdram-0x80000000+x,a,tl);
2713 #endif
2714 }
57871462 2715 }
2716 }
535d208a 2717 if(jaddr)
2718 add_stub(LOADHU_STUB,jaddr,(int)out,i,addr,(int)i_regs,ccadj[i],reglist);
57871462 2719 }
535d208a 2720 else
2721 inline_readstub(LOADHU_STUB,i,constmap[i][s]+offset,i_regs->regmap,rt1[i],ccadj[i],reglist);
2722 }
2723 if (opcode[i]==0x27) { // LWU
2724 assert(th>=0);
2725 if(!c||memtarget) {
2726 if(!dummy) {
dadf55f2 2727 int a=addr;
b1570849 2728 if(fastload_reg_override) a=fastload_reg_override;
57871462 2729 //emit_readword_indexed((int)rdram-0x80000000,addr,tl);
2730 #ifdef HOST_IMM_ADDR32
2731 if(c)
2732 emit_readword_tlb(constmap[i][s]+offset,map,tl);
2733 else
2734 #endif
dadf55f2 2735 emit_readword_indexed_tlb(0,a,map,tl);
57871462 2736 }
535d208a 2737 if(jaddr)
2738 add_stub(LOADW_STUB,jaddr,(int)out,i,addr,(int)i_regs,ccadj[i],reglist);
2739 }
2740 else {
2741 inline_readstub(LOADW_STUB,i,constmap[i][s]+offset,i_regs->regmap,rt1[i],ccadj[i],reglist);
57871462 2742 }
535d208a 2743 emit_zeroreg(th);
2744 }
2745 if (opcode[i]==0x37) { // LD
2746 if(!c||memtarget) {
2747 if(!dummy) {
dadf55f2 2748 int a=addr;
b1570849 2749 if(fastload_reg_override) a=fastload_reg_override;
57871462 2750 //if(th>=0) emit_readword_indexed((int)rdram-0x80000000,addr,th);
2751 //emit_readword_indexed((int)rdram-0x7FFFFFFC,addr,tl);
2752 #ifdef HOST_IMM_ADDR32
2753 if(c)
2754 emit_readdword_tlb(constmap[i][s]+offset,map,th,tl);
2755 else
2756 #endif
dadf55f2 2757 emit_readdword_indexed_tlb(0,a,map,th,tl);
57871462 2758 }
535d208a 2759 if(jaddr)
2760 add_stub(LOADD_STUB,jaddr,(int)out,i,addr,(int)i_regs,ccadj[i],reglist);
57871462 2761 }
535d208a 2762 else
2763 inline_readstub(LOADD_STUB,i,constmap[i][s]+offset,i_regs->regmap,rt1[i],ccadj[i],reglist);
57871462 2764 }
535d208a 2765 }
2766 //emit_storereg(rt1[i],tl); // DEBUG
57871462 2767 //if(opcode[i]==0x23)
2768 //if(opcode[i]==0x24)
2769 //if(opcode[i]==0x23||opcode[i]==0x24)
2770 /*if(opcode[i]==0x21||opcode[i]==0x23||opcode[i]==0x24)
2771 {
2772 //emit_pusha();
2773 save_regs(0x100f);
2774 emit_readword((int)&last_count,ECX);
2775 #ifdef __i386__
2776 if(get_reg(i_regs->regmap,CCREG)<0)
2777 emit_loadreg(CCREG,HOST_CCREG);
2778 emit_add(HOST_CCREG,ECX,HOST_CCREG);
2779 emit_addimm(HOST_CCREG,2*ccadj[i],HOST_CCREG);
2780 emit_writeword(HOST_CCREG,(int)&Count);
2781 #endif
2782 #ifdef __arm__
2783 if(get_reg(i_regs->regmap,CCREG)<0)
2784 emit_loadreg(CCREG,0);
2785 else
2786 emit_mov(HOST_CCREG,0);
2787 emit_add(0,ECX,0);
2788 emit_addimm(0,2*ccadj[i],0);
2789 emit_writeword(0,(int)&Count);
2790 #endif
2791 emit_call((int)memdebug);
2792 //emit_popa();
2793 restore_regs(0x100f);
581335b0 2794 }*/
57871462 2795}
2796
2797#ifndef loadlr_assemble
2798void loadlr_assemble(int i,struct regstat *i_regs)
2799{
2800 printf("Need loadlr_assemble for this architecture.\n");
2801 exit(1);
2802}
2803#endif
2804
2805void store_assemble(int i,struct regstat *i_regs)
2806{
2807 int s,th,tl,map=-1;
2808 int addr,temp;
2809 int offset;
581335b0 2810 int jaddr=0,type;
666a299d 2811 int memtarget=0,c=0;
57871462 2812 int agr=AGEN1+(i&1);
b1570849 2813 int faststore_reg_override=0;
57871462 2814 u_int hr,reglist=0;
2815 th=get_reg(i_regs->regmap,rs2[i]|64);
2816 tl=get_reg(i_regs->regmap,rs2[i]);
2817 s=get_reg(i_regs->regmap,rs1[i]);
2818 temp=get_reg(i_regs->regmap,agr);
2819 if(temp<0) temp=get_reg(i_regs->regmap,-1);
2820 offset=imm[i];
2821 if(s>=0) {
2822 c=(i_regs->wasconst>>s)&1;
af4ee1fe 2823 if(c) {
2824 memtarget=((signed int)(constmap[i][s]+offset))<(signed int)0x80000000+RAM_SIZE;
af4ee1fe 2825 }
57871462 2826 }
2827 assert(tl>=0);
2828 assert(temp>=0);
2829 for(hr=0;hr<HOST_REGS;hr++) {
2830 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
2831 }
2832 if(i_regs->regmap[HOST_CCREG]==CCREG) reglist&=~(1<<HOST_CCREG);
2833 if(offset||s<0||c) addr=temp;
2834 else addr=s;
1edfcc68 2835 if(!c) {
2836 jaddr=emit_fastpath_cmp_jump(i,addr,&faststore_reg_override);
2837 }
2838 else if(ram_offset&&memtarget) {
2839 emit_addimm(addr,ram_offset,HOST_TEMPREG);
2840 faststore_reg_override=HOST_TEMPREG;
57871462 2841 }
2842
2843 if (opcode[i]==0x28) { // SB
2844 if(!c||memtarget) {
97a238a6 2845 int x=0,a=temp;
2002a1db 2846#ifdef BIG_ENDIAN_MIPS
57871462 2847 if(!c) emit_xorimm(addr,3,temp);
2848 else x=((constmap[i][s]+offset)^3)-(constmap[i][s]+offset);
2002a1db 2849#else
97a238a6 2850 if(!c) a=addr;
dadf55f2 2851#endif
b1570849 2852 if(faststore_reg_override) a=faststore_reg_override;
57871462 2853 //emit_writebyte_indexed(tl,(int)rdram-0x80000000,temp);
97a238a6 2854 emit_writebyte_indexed_tlb(tl,x,a,map,a);
57871462 2855 }
2856 type=STOREB_STUB;
2857 }
2858 if (opcode[i]==0x29) { // SH
2859 if(!c||memtarget) {
97a238a6 2860 int x=0,a=temp;
2002a1db 2861#ifdef BIG_ENDIAN_MIPS
57871462 2862 if(!c) emit_xorimm(addr,2,temp);
2863 else x=((constmap[i][s]+offset)^2)-(constmap[i][s]+offset);
2002a1db 2864#else
97a238a6 2865 if(!c) a=addr;
dadf55f2 2866#endif
b1570849 2867 if(faststore_reg_override) a=faststore_reg_override;
57871462 2868 //#ifdef
2869 //emit_writehword_indexed_tlb(tl,x,temp,map,temp);
2870 //#else
2871 if(map>=0) {
97a238a6 2872 emit_writehword_indexed(tl,x,a);
57871462 2873 }else
a327ad27 2874 //emit_writehword_indexed(tl,(int)rdram-0x80000000+x,a);
2875 emit_writehword_indexed(tl,x,a);
57871462 2876 }
2877 type=STOREH_STUB;
2878 }
2879 if (opcode[i]==0x2B) { // SW
dadf55f2 2880 if(!c||memtarget) {
2881 int a=addr;
b1570849 2882 if(faststore_reg_override) a=faststore_reg_override;
57871462 2883 //emit_writeword_indexed(tl,(int)rdram-0x80000000,addr);
dadf55f2 2884 emit_writeword_indexed_tlb(tl,0,a,map,temp);
2885 }
57871462 2886 type=STOREW_STUB;
2887 }
2888 if (opcode[i]==0x3F) { // SD
2889 if(!c||memtarget) {
dadf55f2 2890 int a=addr;
b1570849 2891 if(faststore_reg_override) a=faststore_reg_override;
57871462 2892 if(rs2[i]) {
2893 assert(th>=0);
2894 //emit_writeword_indexed(th,(int)rdram-0x80000000,addr);
2895 //emit_writeword_indexed(tl,(int)rdram-0x7FFFFFFC,addr);
dadf55f2 2896 emit_writedword_indexed_tlb(th,tl,0,a,map,temp);
57871462 2897 }else{
2898 // Store zero
2899 //emit_writeword_indexed(tl,(int)rdram-0x80000000,temp);
2900 //emit_writeword_indexed(tl,(int)rdram-0x7FFFFFFC,temp);
dadf55f2 2901 emit_writedword_indexed_tlb(tl,tl,0,a,map,temp);
57871462 2902 }
2903 }
2904 type=STORED_STUB;
2905 }
b96d3df7 2906 if(jaddr) {
2907 // PCSX store handlers don't check invcode again
2908 reglist|=1<<addr;
2909 add_stub(type,jaddr,(int)out,i,addr,(int)i_regs,ccadj[i],reglist);
2910 jaddr=0;
2911 }
1edfcc68 2912 if(!(i_regs->waswritten&(1<<rs1[i]))&&!(new_dynarec_hacks&NDHACK_NO_SMC_CHECK)) {
57871462 2913 if(!c||memtarget) {
2914 #ifdef DESTRUCTIVE_SHIFT
2915 // The x86 shift operation is 'destructive'; it overwrites the
2916 // source register, so we need to make a copy first and use that.
2917 addr=temp;
2918 #endif
2919 #if defined(HOST_IMM8)
2920 int ir=get_reg(i_regs->regmap,INVCP);
2921 assert(ir>=0);
2922 emit_cmpmem_indexedsr12_reg(ir,addr,1);
2923 #else
2924 emit_cmpmem_indexedsr12_imm((int)invalid_code,addr,1);
2925 #endif
0bbd1454 2926 #if defined(HAVE_CONDITIONAL_CALL) && !defined(DESTRUCTIVE_SHIFT)
2927 emit_callne(invalidate_addr_reg[addr]);
2928 #else
581335b0 2929 int jaddr2=(int)out;
57871462 2930 emit_jne(0);
2931 add_stub(INVCODE_STUB,jaddr2,(int)out,reglist|(1<<HOST_CCREG),addr,0,0,0);
0bbd1454 2932 #endif
57871462 2933 }
2934 }
7a518516 2935 u_int addr_val=constmap[i][s]+offset;
3eaa7048 2936 if(jaddr) {
2937 add_stub(type,jaddr,(int)out,i,addr,(int)i_regs,ccadj[i],reglist);
2938 } else if(c&&!memtarget) {
7a518516 2939 inline_writestub(type,i,addr_val,i_regs->regmap,rs2[i],ccadj[i],reglist);
2940 }
2941 // basic current block modification detection..
2942 // not looking back as that should be in mips cache already
2943 if(c&&start+i*4<addr_val&&addr_val<start+slen*4) {
c43b5311 2944 SysPrintf("write to %08x hits block %08x, pc=%08x\n",addr_val,start,start+i*4);
7a518516 2945 assert(i_regs->regmap==regs[i].regmap); // not delay slot
2946 if(i_regs->regmap==regs[i].regmap) {
2947 load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty,i);
2948 wb_dirtys(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty);
2949 emit_movimm(start+i*4+4,0);
2950 emit_writeword(0,(int)&pcaddr);
2951 emit_jmp((int)do_interrupt);
2952 }
3eaa7048 2953 }
57871462 2954 //if(opcode[i]==0x2B || opcode[i]==0x3F)
2955 //if(opcode[i]==0x2B || opcode[i]==0x28)
2956 //if(opcode[i]==0x2B || opcode[i]==0x29)
2957 //if(opcode[i]==0x2B)
2958 /*if(opcode[i]==0x2B || opcode[i]==0x28 || opcode[i]==0x29 || opcode[i]==0x3F)
2959 {
28d74ee8 2960 #ifdef __i386__
2961 emit_pusha();
2962 #endif
2963 #ifdef __arm__
57871462 2964 save_regs(0x100f);
28d74ee8 2965 #endif
57871462 2966 emit_readword((int)&last_count,ECX);
2967 #ifdef __i386__
2968 if(get_reg(i_regs->regmap,CCREG)<0)
2969 emit_loadreg(CCREG,HOST_CCREG);
2970 emit_add(HOST_CCREG,ECX,HOST_CCREG);
2971 emit_addimm(HOST_CCREG,2*ccadj[i],HOST_CCREG);
2972 emit_writeword(HOST_CCREG,(int)&Count);
2973 #endif
2974 #ifdef __arm__
2975 if(get_reg(i_regs->regmap,CCREG)<0)
2976 emit_loadreg(CCREG,0);
2977 else
2978 emit_mov(HOST_CCREG,0);
2979 emit_add(0,ECX,0);
2980 emit_addimm(0,2*ccadj[i],0);
2981 emit_writeword(0,(int)&Count);
2982 #endif
2983 emit_call((int)memdebug);
28d74ee8 2984 #ifdef __i386__
2985 emit_popa();
2986 #endif
2987 #ifdef __arm__
57871462 2988 restore_regs(0x100f);
28d74ee8 2989 #endif
581335b0 2990 }*/
57871462 2991}
2992
2993void storelr_assemble(int i,struct regstat *i_regs)
2994{
2995 int s,th,tl;
2996 int temp;
581335b0 2997 int temp2=-1;
57871462 2998 int offset;
581335b0 2999 int jaddr=0;
57871462 3000 int case1,case2,case3;
3001 int done0,done1,done2;
af4ee1fe 3002 int memtarget=0,c=0;
fab5d06d 3003 int agr=AGEN1+(i&1);
57871462 3004 u_int hr,reglist=0;
3005 th=get_reg(i_regs->regmap,rs2[i]|64);
3006 tl=get_reg(i_regs->regmap,rs2[i]);
3007 s=get_reg(i_regs->regmap,rs1[i]);
fab5d06d 3008 temp=get_reg(i_regs->regmap,agr);
3009 if(temp<0) temp=get_reg(i_regs->regmap,-1);
57871462 3010 offset=imm[i];
3011 if(s>=0) {
3012 c=(i_regs->isconst>>s)&1;
af4ee1fe 3013 if(c) {
3014 memtarget=((signed int)(constmap[i][s]+offset))<(signed int)0x80000000+RAM_SIZE;
af4ee1fe 3015 }
57871462 3016 }
3017 assert(tl>=0);
3018 for(hr=0;hr<HOST_REGS;hr++) {
3019 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
3020 }
535d208a 3021 assert(temp>=0);
1edfcc68 3022 if(!c) {
3023 emit_cmpimm(s<0||offset?temp:s,RAM_SIZE);
3024 if(!offset&&s!=temp) emit_mov(s,temp);
3025 jaddr=(int)out;
3026 emit_jno(0);
3027 }
3028 else
3029 {
3030 if(!memtarget||!rs1[i]) {
535d208a 3031 jaddr=(int)out;
3032 emit_jmp(0);
57871462 3033 }
535d208a 3034 }
1edfcc68 3035 #ifdef RAM_OFFSET
3036 int map=get_reg(i_regs->regmap,ROREG);
3037 if(map<0) emit_loadreg(ROREG,map=HOST_TEMPREG);
3038 #else
9f51b4b9 3039 if((u_int)rdram!=0x80000000)
1edfcc68 3040 emit_addimm_no_flags((u_int)rdram-(u_int)0x80000000,temp);
3041 #endif
535d208a 3042
3043 if (opcode[i]==0x2C||opcode[i]==0x2D) { // SDL/SDR
3044 temp2=get_reg(i_regs->regmap,FTEMP);
3045 if(!rs2[i]) temp2=th=tl;
3046 }
57871462 3047
2002a1db 3048#ifndef BIG_ENDIAN_MIPS
3049 emit_xorimm(temp,3,temp);
3050#endif
535d208a 3051 emit_testimm(temp,2);
3052 case2=(int)out;
3053 emit_jne(0);
3054 emit_testimm(temp,1);
3055 case1=(int)out;
3056 emit_jne(0);
3057 // 0
3058 if (opcode[i]==0x2A) { // SWL
3059 emit_writeword_indexed(tl,0,temp);
3060 }
3061 if (opcode[i]==0x2E) { // SWR
3062 emit_writebyte_indexed(tl,3,temp);
3063 }
3064 if (opcode[i]==0x2C) { // SDL
3065 emit_writeword_indexed(th,0,temp);
3066 if(rs2[i]) emit_mov(tl,temp2);
3067 }
3068 if (opcode[i]==0x2D) { // SDR
3069 emit_writebyte_indexed(tl,3,temp);
3070 if(rs2[i]) emit_shldimm(th,tl,24,temp2);
3071 }
3072 done0=(int)out;
3073 emit_jmp(0);
3074 // 1
3075 set_jump_target(case1,(int)out);
3076 if (opcode[i]==0x2A) { // SWL
3077 // Write 3 msb into three least significant bytes
3078 if(rs2[i]) emit_rorimm(tl,8,tl);
3079 emit_writehword_indexed(tl,-1,temp);
3080 if(rs2[i]) emit_rorimm(tl,16,tl);
3081 emit_writebyte_indexed(tl,1,temp);
3082 if(rs2[i]) emit_rorimm(tl,8,tl);
3083 }
3084 if (opcode[i]==0x2E) { // SWR
3085 // Write two lsb into two most significant bytes
3086 emit_writehword_indexed(tl,1,temp);
3087 }
3088 if (opcode[i]==0x2C) { // SDL
3089 if(rs2[i]) emit_shrdimm(tl,th,8,temp2);
3090 // Write 3 msb into three least significant bytes
3091 if(rs2[i]) emit_rorimm(th,8,th);
3092 emit_writehword_indexed(th,-1,temp);
3093 if(rs2[i]) emit_rorimm(th,16,th);
3094 emit_writebyte_indexed(th,1,temp);
3095 if(rs2[i]) emit_rorimm(th,8,th);
3096 }
3097 if (opcode[i]==0x2D) { // SDR
3098 if(rs2[i]) emit_shldimm(th,tl,16,temp2);
3099 // Write two lsb into two most significant bytes
3100 emit_writehword_indexed(tl,1,temp);
3101 }
3102 done1=(int)out;
3103 emit_jmp(0);
3104 // 2
3105 set_jump_target(case2,(int)out);
3106 emit_testimm(temp,1);
3107 case3=(int)out;
3108 emit_jne(0);
3109 if (opcode[i]==0x2A) { // SWL
3110 // Write two msb into two least significant bytes
3111 if(rs2[i]) emit_rorimm(tl,16,tl);
3112 emit_writehword_indexed(tl,-2,temp);
3113 if(rs2[i]) emit_rorimm(tl,16,tl);
3114 }
3115 if (opcode[i]==0x2E) { // SWR
3116 // Write 3 lsb into three most significant bytes
3117 emit_writebyte_indexed(tl,-1,temp);
3118 if(rs2[i]) emit_rorimm(tl,8,tl);
3119 emit_writehword_indexed(tl,0,temp);
3120 if(rs2[i]) emit_rorimm(tl,24,tl);
3121 }
3122 if (opcode[i]==0x2C) { // SDL
3123 if(rs2[i]) emit_shrdimm(tl,th,16,temp2);
3124 // Write two msb into two least significant bytes
3125 if(rs2[i]) emit_rorimm(th,16,th);
3126 emit_writehword_indexed(th,-2,temp);
3127 if(rs2[i]) emit_rorimm(th,16,th);
3128 }
3129 if (opcode[i]==0x2D) { // SDR
3130 if(rs2[i]) emit_shldimm(th,tl,8,temp2);
3131 // Write 3 lsb into three most significant bytes
3132 emit_writebyte_indexed(tl,-1,temp);
3133 if(rs2[i]) emit_rorimm(tl,8,tl);
3134 emit_writehword_indexed(tl,0,temp);
3135 if(rs2[i]) emit_rorimm(tl,24,tl);
3136 }
3137 done2=(int)out;
3138 emit_jmp(0);
3139 // 3
3140 set_jump_target(case3,(int)out);
3141 if (opcode[i]==0x2A) { // SWL
3142 // Write msb into least significant byte
3143 if(rs2[i]) emit_rorimm(tl,24,tl);
3144 emit_writebyte_indexed(tl,-3,temp);
3145 if(rs2[i]) emit_rorimm(tl,8,tl);
3146 }
3147 if (opcode[i]==0x2E) { // SWR
3148 // Write entire word
3149 emit_writeword_indexed(tl,-3,temp);
3150 }
3151 if (opcode[i]==0x2C) { // SDL
3152 if(rs2[i]) emit_shrdimm(tl,th,24,temp2);
3153 // Write msb into least significant byte
3154 if(rs2[i]) emit_rorimm(th,24,th);
3155 emit_writebyte_indexed(th,-3,temp);
3156 if(rs2[i]) emit_rorimm(th,8,th);
3157 }
3158 if (opcode[i]==0x2D) { // SDR
3159 if(rs2[i]) emit_mov(th,temp2);
3160 // Write entire word
3161 emit_writeword_indexed(tl,-3,temp);
3162 }
3163 set_jump_target(done0,(int)out);
3164 set_jump_target(done1,(int)out);
3165 set_jump_target(done2,(int)out);
3166 if (opcode[i]==0x2C) { // SDL
3167 emit_testimm(temp,4);
57871462 3168 done0=(int)out;
57871462 3169 emit_jne(0);
535d208a 3170 emit_andimm(temp,~3,temp);
3171 emit_writeword_indexed(temp2,4,temp);
3172 set_jump_target(done0,(int)out);
3173 }
3174 if (opcode[i]==0x2D) { // SDR
3175 emit_testimm(temp,4);
3176 done0=(int)out;
3177 emit_jeq(0);
3178 emit_andimm(temp,~3,temp);
3179 emit_writeword_indexed(temp2,-4,temp);
57871462 3180 set_jump_target(done0,(int)out);
57871462 3181 }
535d208a 3182 if(!c||!memtarget)
3183 add_stub(STORELR_STUB,jaddr,(int)out,i,(int)i_regs,temp,ccadj[i],reglist);
1edfcc68 3184 if(!(i_regs->waswritten&(1<<rs1[i]))&&!(new_dynarec_hacks&NDHACK_NO_SMC_CHECK)) {
535d208a 3185 #ifdef RAM_OFFSET
3186 int map=get_reg(i_regs->regmap,ROREG);
3187 if(map<0) map=HOST_TEMPREG;
3188 gen_orig_addr_w(temp,map);
3189 #else
57871462 3190 emit_addimm_no_flags((u_int)0x80000000-(u_int)rdram,temp);
535d208a 3191 #endif
57871462 3192 #if defined(HOST_IMM8)
3193 int ir=get_reg(i_regs->regmap,INVCP);
3194 assert(ir>=0);
3195 emit_cmpmem_indexedsr12_reg(ir,temp,1);
3196 #else
3197 emit_cmpmem_indexedsr12_imm((int)invalid_code,temp,1);
3198 #endif
535d208a 3199 #if defined(HAVE_CONDITIONAL_CALL) && !defined(DESTRUCTIVE_SHIFT)
3200 emit_callne(invalidate_addr_reg[temp]);
3201 #else
581335b0 3202 int jaddr2=(int)out;
57871462 3203 emit_jne(0);
3204 add_stub(INVCODE_STUB,jaddr2,(int)out,reglist|(1<<HOST_CCREG),temp,0,0,0);
535d208a 3205 #endif
57871462 3206 }
3207 /*
3208 emit_pusha();
3209 //save_regs(0x100f);
3210 emit_readword((int)&last_count,ECX);
3211 if(get_reg(i_regs->regmap,CCREG)<0)
3212 emit_loadreg(CCREG,HOST_CCREG);
3213 emit_add(HOST_CCREG,ECX,HOST_CCREG);
3214 emit_addimm(HOST_CCREG,2*ccadj[i],HOST_CCREG);
3215 emit_writeword(HOST_CCREG,(int)&Count);
3216 emit_call((int)memdebug);
3217 emit_popa();
3218 //restore_regs(0x100f);
581335b0