drc: starting arm64 support
[pcsx_rearmed.git] / libpcsxcore / new_dynarec / new_dynarec.c
CommitLineData
57871462 1/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2 * Mupen64plus - new_dynarec.c *
20d507ba 3 * Copyright (C) 2009-2011 Ari64 *
57871462 4 * *
5 * This program is free software; you can redistribute it and/or modify *
6 * it under the terms of the GNU General Public License as published by *
7 * the Free Software Foundation; either version 2 of the License, or *
8 * (at your option) any later version. *
9 * *
10 * This program is distributed in the hope that it will be useful, *
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
13 * GNU General Public License for more details. *
14 * *
15 * You should have received a copy of the GNU General Public License *
16 * along with this program; if not, write to the *
17 * Free Software Foundation, Inc., *
18 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
19 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
20
21#include <stdlib.h>
22#include <stdint.h> //include for uint64_t
23#include <assert.h>
d848b60a 24#include <errno.h>
4600ba03 25#include <sys/mman.h>
d148d265 26#ifdef __MACH__
27#include <libkern/OSCacheControl.h>
28#endif
1e212a25 29#ifdef _3DS
30#include <3ds_utils.h>
31#endif
32#ifdef VITA
33#include <psp2/kernel/sysmem.h>
34static int sceBlock;
35#endif
57871462 36
d148d265 37#include "new_dynarec_config.h"
dd79da89 38#include "../psxhle.h" //emulator interface
3d624f89 39#include "emu_if.h" //emulator interface
57871462 40
b14b6a8f 41#ifndef ARRAY_SIZE
42#define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0]))
43#endif
44
4600ba03 45//#define DISASM
46//#define assem_debug printf
47//#define inv_debug printf
48#define assem_debug(...)
49#define inv_debug(...)
57871462 50
51#ifdef __i386__
52#include "assem_x86.h"
53#endif
54#ifdef __x86_64__
55#include "assem_x64.h"
56#endif
57#ifdef __arm__
58#include "assem_arm.h"
59#endif
be516ebe 60#ifdef __aarch64__
61#include "assem_arm64.h"
62#endif
57871462 63
64#define MAXBLOCK 4096
65#define MAX_OUTPUT_BLOCK_SIZE 262144
2573466a 66
b14b6a8f 67// stubs
68enum stub_type {
69 CC_STUB = 1,
70 FP_STUB = 2,
71 LOADB_STUB = 3,
72 LOADH_STUB = 4,
73 LOADW_STUB = 5,
74 LOADD_STUB = 6,
75 LOADBU_STUB = 7,
76 LOADHU_STUB = 8,
77 STOREB_STUB = 9,
78 STOREH_STUB = 10,
79 STOREW_STUB = 11,
80 STORED_STUB = 12,
81 STORELR_STUB = 13,
82 INVCODE_STUB = 14,
83};
84
57871462 85struct regstat
86{
87 signed char regmap_entry[HOST_REGS];
88 signed char regmap[HOST_REGS];
57871462 89 uint64_t wasdirty;
90 uint64_t dirty;
91 uint64_t u;
57871462 92 u_int wasconst;
93 u_int isconst;
8575a877 94 u_int loadedconst; // host regs that have constants loaded
95 u_int waswritten; // MIPS regs that were used as store base before
57871462 96};
97
de5a60c3 98// note: asm depends on this layout
57871462 99struct ll_entry
100{
101 u_int vaddr;
de5a60c3 102 u_int reg_sv_flags;
57871462 103 void *addr;
104 struct ll_entry *next;
105};
106
df4dc2b1 107struct ht_entry
108{
109 u_int vaddr[2];
110 void *tcaddr[2];
111};
112
b14b6a8f 113struct code_stub
114{
115 enum stub_type type;
116 void *addr;
117 void *retaddr;
118 u_int a;
119 uintptr_t b;
120 uintptr_t c;
121 u_int d;
122 u_int e;
123};
124
643aeae3 125struct link_entry
126{
127 void *addr;
128 u_int target;
129 u_int ext;
130};
131
e2b5e7aa 132 // used by asm:
133 u_char *out;
df4dc2b1 134 struct ht_entry hash_table[65536] __attribute__((aligned(16)));
e2b5e7aa 135 struct ll_entry *jump_in[4096] __attribute__((aligned(16)));
136 struct ll_entry *jump_dirty[4096];
137
138 static struct ll_entry *jump_out[4096];
139 static u_int start;
140 static u_int *source;
141 static char insn[MAXBLOCK][10];
142 static u_char itype[MAXBLOCK];
143 static u_char opcode[MAXBLOCK];
144 static u_char opcode2[MAXBLOCK];
145 static u_char bt[MAXBLOCK];
146 static u_char rs1[MAXBLOCK];
147 static u_char rs2[MAXBLOCK];
148 static u_char rt1[MAXBLOCK];
149 static u_char rt2[MAXBLOCK];
150 static u_char us1[MAXBLOCK];
151 static u_char us2[MAXBLOCK];
152 static u_char dep1[MAXBLOCK];
153 static u_char dep2[MAXBLOCK];
154 static u_char lt1[MAXBLOCK];
bedfea38 155 static uint64_t gte_rs[MAXBLOCK]; // gte: 32 data and 32 ctl regs
156 static uint64_t gte_rt[MAXBLOCK];
157 static uint64_t gte_unneeded[MAXBLOCK];
ffb0b9e0 158 static u_int smrv[32]; // speculated MIPS register values
159 static u_int smrv_strong; // mask or regs that are likely to have correct values
160 static u_int smrv_weak; // same, but somewhat less likely
161 static u_int smrv_strong_next; // same, but after current insn executes
162 static u_int smrv_weak_next;
e2b5e7aa 163 static int imm[MAXBLOCK];
164 static u_int ba[MAXBLOCK];
165 static char likely[MAXBLOCK];
166 static char is_ds[MAXBLOCK];
167 static char ooo[MAXBLOCK];
168 static uint64_t unneeded_reg[MAXBLOCK];
e2b5e7aa 169 static uint64_t branch_unneeded_reg[MAXBLOCK];
e2b5e7aa 170 static signed char regmap_pre[MAXBLOCK][HOST_REGS];
956f3129 171 static uint64_t current_constmap[HOST_REGS];
172 static uint64_t constmap[MAXBLOCK][HOST_REGS];
173 static struct regstat regs[MAXBLOCK];
174 static struct regstat branch_regs[MAXBLOCK];
e2b5e7aa 175 static signed char minimum_free_regs[MAXBLOCK];
176 static u_int needed_reg[MAXBLOCK];
177 static u_int wont_dirty[MAXBLOCK];
178 static u_int will_dirty[MAXBLOCK];
179 static int ccadj[MAXBLOCK];
180 static int slen;
df4dc2b1 181 static void *instr_addr[MAXBLOCK];
643aeae3 182 static struct link_entry link_addr[MAXBLOCK];
e2b5e7aa 183 static int linkcount;
b14b6a8f 184 static struct code_stub stubs[MAXBLOCK*3];
e2b5e7aa 185 static int stubcount;
186 static u_int literals[1024][2];
187 static int literalcount;
188 static int is_delayslot;
e2b5e7aa 189 static char shadow[1048576] __attribute__((aligned(16)));
190 static void *copy;
191 static int expirep;
192 static u_int stop_after_jal;
a327ad27 193#ifndef RAM_FIXED
01d26796 194 static uintptr_t ram_offset;
a327ad27 195#else
01d26796 196 static const uintptr_t ram_offset=0;
a327ad27 197#endif
e2b5e7aa 198
199 int new_dynarec_hacks;
200 int new_dynarec_did_compile;
57871462 201 extern u_char restore_candidate[512];
202 extern int cycle_count;
203
204 /* registers that may be allocated */
205 /* 1-31 gpr */
206#define HIREG 32 // hi
207#define LOREG 33 // lo
00fa9369 208//#define FSREG 34 // FPU status (FCSR)
57871462 209#define CSREG 35 // Coprocessor status
210#define CCREG 36 // Cycle count
211#define INVCP 37 // Pointer to invalid_code
1edfcc68 212//#define MMREG 38 // Pointer to memory_map
9c45ca93 213//#define ROREG 39 // ram offset (if rdram!=0x80000000)
619e5ded 214#define TEMPREG 40
215#define FTEMP 40 // FPU temporary register
216#define PTEMP 41 // Prefetch temporary register
1edfcc68 217//#define TLREG 42 // TLB mapping offset
619e5ded 218#define RHASH 43 // Return address hash
219#define RHTBL 44 // Return address hash table address
220#define RTEMP 45 // JR/JALR address register
221#define MAXREG 45
222#define AGEN1 46 // Address generation temporary register
1edfcc68 223//#define AGEN2 47 // Address generation temporary register
224//#define MGEN1 48 // Maptable address generation temporary register
225//#define MGEN2 49 // Maptable address generation temporary register
619e5ded 226#define BTREG 50 // Branch target temporary register
57871462 227
228 /* instruction types */
229#define NOP 0 // No operation
230#define LOAD 1 // Load
231#define STORE 2 // Store
232#define LOADLR 3 // Unaligned load
233#define STORELR 4 // Unaligned store
9f51b4b9 234#define MOV 5 // Move
57871462 235#define ALU 6 // Arithmetic/logic
236#define MULTDIV 7 // Multiply/divide
237#define SHIFT 8 // Shift by register
238#define SHIFTIMM 9// Shift by immediate
239#define IMM16 10 // 16-bit immediate
240#define RJUMP 11 // Unconditional jump to register
241#define UJUMP 12 // Unconditional jump
242#define CJUMP 13 // Conditional branch (BEQ/BNE/BGTZ/BLEZ)
243#define SJUMP 14 // Conditional branch (regimm format)
244#define COP0 15 // Coprocessor 0
245#define COP1 16 // Coprocessor 1
246#define C1LS 17 // Coprocessor 1 load/store
ad49de89 247//#define FJUMP 18 // Conditional branch (floating point)
00fa9369 248//#define FLOAT 19 // Floating point unit
249//#define FCONV 20 // Convert integer to float
250//#define FCOMP 21 // Floating point compare (sets FSREG)
57871462 251#define SYSCALL 22// SYSCALL
252#define OTHER 23 // Other
253#define SPAN 24 // Branch/delay slot spans 2 pages
254#define NI 25 // Not implemented
7139f3c8 255#define HLECALL 26// PCSX fake opcodes for HLE
b9b61529 256#define COP2 27 // Coprocessor 2 move
257#define C2LS 28 // Coprocessor 2 load/store
258#define C2OP 29 // Coprocessor 2 operation
1e973cb0 259#define INTCALL 30// Call interpreter to handle rare corner cases
57871462 260
57871462 261 /* branch codes */
262#define TAKEN 1
263#define NOTTAKEN 2
264#define NULLDS 3
265
266// asm linkage
267int new_recompile_block(int addr);
268void *get_addr_ht(u_int vaddr);
269void invalidate_block(u_int block);
270void invalidate_addr(u_int addr);
271void remove_hash(int vaddr);
57871462 272void dyna_linker();
273void dyna_linker_ds();
274void verify_code();
275void verify_code_vm();
276void verify_code_ds();
277void cc_interrupt();
278void fp_exception();
279void fp_exception_ds();
7139f3c8 280void jump_syscall_hle();
7139f3c8 281void jump_hlecall();
1e973cb0 282void jump_intcall();
7139f3c8 283void new_dyna_leave();
57871462 284
57871462 285// Needed by assembler
ad49de89 286static void wb_register(signed char r,signed char regmap[],uint64_t dirty);
287static void wb_dirtys(signed char i_regmap[],uint64_t i_dirty);
288static void wb_needed_dirtys(signed char i_regmap[],uint64_t i_dirty,int addr);
e2b5e7aa 289static void load_all_regs(signed char i_regmap[]);
290static void load_needed_regs(signed char i_regmap[],signed char next_regmap[]);
291static void load_regs_entry(int t);
ad49de89 292static void load_all_consts(signed char regmap[],u_int dirty,int i);
e2b5e7aa 293
294static int verify_dirty(u_int *ptr);
295static int get_final_value(int hr, int i, int *value);
b14b6a8f 296static void add_stub(enum stub_type type, void *addr, void *retaddr,
297 u_int a, uintptr_t b, uintptr_t c, u_int d, u_int e);
298static void add_stub_r(enum stub_type type, void *addr, void *retaddr,
299 int i, int addr_reg, struct regstat *i_regs, int ccadj, u_int reglist);
643aeae3 300static void add_to_linker(void *addr, u_int target, int ext);
8062d65a 301static void *emit_fastpath_cmp_jump(int i,int addr,int *addr_reg_override);
57871462 302
d148d265 303static void mprotect_w_x(void *start, void *end, int is_x)
304{
305#ifdef NO_WRITE_EXEC
1e212a25 306 #if defined(VITA)
307 // *Open* enables write on all memory that was
308 // allocated by sceKernelAllocMemBlockForVM()?
309 if (is_x)
310 sceKernelCloseVMDomain();
311 else
312 sceKernelOpenVMDomain();
313 #else
d148d265 314 u_long mstart = (u_long)start & ~4095ul;
315 u_long mend = (u_long)end;
316 if (mprotect((void *)mstart, mend - mstart,
317 PROT_READ | (is_x ? PROT_EXEC : PROT_WRITE)) != 0)
318 SysPrintf("mprotect(%c) failed: %s\n", is_x ? 'x' : 'w', strerror(errno));
1e212a25 319 #endif
d148d265 320#endif
321}
322
323static void start_tcache_write(void *start, void *end)
324{
325 mprotect_w_x(start, end, 0);
326}
327
328static void end_tcache_write(void *start, void *end)
329{
330#ifdef __arm__
331 size_t len = (char *)end - (char *)start;
332 #if defined(__BLACKBERRY_QNX__)
333 msync(start, len, MS_SYNC | MS_CACHE_ONLY | MS_INVALIDATE_ICACHE);
334 #elif defined(__MACH__)
335 sys_cache_control(kCacheFunctionPrepareForExecution, start, len);
336 #elif defined(VITA)
1e212a25 337 sceKernelSyncVMDomain(sceBlock, start, len);
338 #elif defined(_3DS)
339 ctr_flush_invalidate_cache();
d148d265 340 #else
341 __clear_cache(start, end);
342 #endif
343 (void)len;
be516ebe 344#else
345 __clear_cache(start, end);
d148d265 346#endif
347
348 mprotect_w_x(start, end, 1);
349}
350
351static void *start_block(void)
352{
353 u_char *end = out + MAX_OUTPUT_BLOCK_SIZE;
643aeae3 354 if (end > translation_cache + (1<<TARGET_SIZE_2))
355 end = translation_cache + (1<<TARGET_SIZE_2);
d148d265 356 start_tcache_write(out, end);
357 return out;
358}
359
360static void end_block(void *start)
361{
362 end_tcache_write(start, out);
363}
364
57871462 365//#define DEBUG_CYCLE_COUNT 1
366
b6e87b2b 367#define NO_CYCLE_PENALTY_THR 12
368
4e9dcd7f 369int cycle_multiplier; // 100 for 1.0
370
371static int CLOCK_ADJUST(int x)
372{
373 int s=(x>>31)|1;
374 return (x * cycle_multiplier + s * 50) / 100;
375}
376
94d23bb9 377static u_int get_page(u_int vaddr)
57871462 378{
0ce47d46 379 u_int page=vaddr&~0xe0000000;
380 if (page < 0x1000000)
381 page &= ~0x0e00000; // RAM mirrors
382 page>>=12;
57871462 383 if(page>2048) page=2048+(page&2047);
94d23bb9 384 return page;
385}
386
d25604ca 387// no virtual mem in PCSX
388static u_int get_vpage(u_int vaddr)
389{
390 return get_page(vaddr);
391}
94d23bb9 392
df4dc2b1 393static struct ht_entry *hash_table_get(u_int vaddr)
394{
395 return &hash_table[((vaddr>>16)^vaddr)&0xFFFF];
396}
397
398static void hash_table_add(struct ht_entry *ht_bin, u_int vaddr, void *tcaddr)
399{
400 ht_bin->vaddr[1] = ht_bin->vaddr[0];
401 ht_bin->tcaddr[1] = ht_bin->tcaddr[0];
402 ht_bin->vaddr[0] = vaddr;
403 ht_bin->tcaddr[0] = tcaddr;
404}
405
406// some messy ari64's code, seems to rely on unsigned 32bit overflow
407static int doesnt_expire_soon(void *tcaddr)
408{
409 u_int diff = (u_int)((u_char *)tcaddr - out) << (32-TARGET_SIZE_2);
410 return diff > (u_int)(0x60000000 + (MAX_OUTPUT_BLOCK_SIZE << (32-TARGET_SIZE_2)));
411}
412
94d23bb9 413// Get address from virtual address
414// This is called from the recompiled JR/JALR instructions
415void *get_addr(u_int vaddr)
416{
417 u_int page=get_page(vaddr);
418 u_int vpage=get_vpage(vaddr);
57871462 419 struct ll_entry *head;
420 //printf("TRACE: count=%d next=%d (get_addr %x,page %d)\n",Count,next_interupt,vaddr,page);
421 head=jump_in[page];
422 while(head!=NULL) {
de5a60c3 423 if(head->vaddr==vaddr) {
643aeae3 424 //printf("TRACE: count=%d next=%d (get_addr match %x: %p)\n",Count,next_interupt,vaddr,head->addr);
df4dc2b1 425 hash_table_add(hash_table_get(vaddr), vaddr, head->addr);
57871462 426 return head->addr;
427 }
428 head=head->next;
429 }
430 head=jump_dirty[vpage];
431 while(head!=NULL) {
de5a60c3 432 if(head->vaddr==vaddr) {
643aeae3 433 //printf("TRACE: count=%d next=%d (get_addr match dirty %x: %p)\n",Count,next_interupt,vaddr,head->addr);
57871462 434 // Don't restore blocks which are about to expire from the cache
df4dc2b1 435 if (doesnt_expire_soon(head->addr))
436 if (verify_dirty(head->addr)) {
57871462 437 //printf("restore candidate: %x (%d) d=%d\n",vaddr,page,invalid_code[vaddr>>12]);
438 invalid_code[vaddr>>12]=0;
9be4ba64 439 inv_code_start=inv_code_end=~0;
57871462 440 if(vpage<2048) {
57871462 441 restore_candidate[vpage>>3]|=1<<(vpage&7);
442 }
443 else restore_candidate[page>>3]|=1<<(page&7);
df4dc2b1 444 struct ht_entry *ht_bin = hash_table_get(vaddr);
445 if (ht_bin->vaddr[0] == vaddr)
446 ht_bin->tcaddr[0] = head->addr; // Replace existing entry
57871462 447 else
df4dc2b1 448 hash_table_add(ht_bin, vaddr, head->addr);
449
57871462 450 return head->addr;
451 }
452 }
453 head=head->next;
454 }
455 //printf("TRACE: count=%d next=%d (get_addr no-match %x)\n",Count,next_interupt,vaddr);
456 int r=new_recompile_block(vaddr);
457 if(r==0) return get_addr(vaddr);
458 // Execute in unmapped page, generate pagefault execption
459 Status|=2;
460 Cause=(vaddr<<31)|0x8;
461 EPC=(vaddr&1)?vaddr-5:vaddr;
462 BadVAddr=(vaddr&~1);
463 Context=(Context&0xFF80000F)|((BadVAddr>>9)&0x007FFFF0);
464 EntryHi=BadVAddr&0xFFFFE000;
465 return get_addr_ht(0x80000000);
466}
467// Look up address in hash table first
468void *get_addr_ht(u_int vaddr)
469{
470 //printf("TRACE: count=%d next=%d (get_addr_ht %x)\n",Count,next_interupt,vaddr);
df4dc2b1 471 const struct ht_entry *ht_bin = hash_table_get(vaddr);
472 if (ht_bin->vaddr[0] == vaddr) return ht_bin->tcaddr[0];
473 if (ht_bin->vaddr[1] == vaddr) return ht_bin->tcaddr[1];
57871462 474 return get_addr(vaddr);
475}
476
57871462 477void clear_all_regs(signed char regmap[])
478{
479 int hr;
480 for (hr=0;hr<HOST_REGS;hr++) regmap[hr]=-1;
481}
482
483signed char get_reg(signed char regmap[],int r)
484{
485 int hr;
486 for (hr=0;hr<HOST_REGS;hr++) if(hr!=EXCLUDE_REG&&regmap[hr]==r) return hr;
487 return -1;
488}
489
490// Find a register that is available for two consecutive cycles
491signed char get_reg2(signed char regmap1[],signed char regmap2[],int r)
492{
493 int hr;
494 for (hr=0;hr<HOST_REGS;hr++) if(hr!=EXCLUDE_REG&&regmap1[hr]==r&&regmap2[hr]==r) return hr;
495 return -1;
496}
497
498int count_free_regs(signed char regmap[])
499{
500 int count=0;
501 int hr;
502 for(hr=0;hr<HOST_REGS;hr++)
503 {
504 if(hr!=EXCLUDE_REG) {
505 if(regmap[hr]<0) count++;
506 }
507 }
508 return count;
509}
510
511void dirty_reg(struct regstat *cur,signed char reg)
512{
513 int hr;
514 if(!reg) return;
515 for (hr=0;hr<HOST_REGS;hr++) {
516 if((cur->regmap[hr]&63)==reg) {
517 cur->dirty|=1<<hr;
518 }
519 }
520}
521
57871462 522void set_const(struct regstat *cur,signed char reg,uint64_t value)
523{
524 int hr;
525 if(!reg) return;
526 for (hr=0;hr<HOST_REGS;hr++) {
527 if(cur->regmap[hr]==reg) {
528 cur->isconst|=1<<hr;
956f3129 529 current_constmap[hr]=value;
57871462 530 }
531 else if((cur->regmap[hr]^64)==reg) {
532 cur->isconst|=1<<hr;
956f3129 533 current_constmap[hr]=value>>32;
57871462 534 }
535 }
536}
537
538void clear_const(struct regstat *cur,signed char reg)
539{
540 int hr;
541 if(!reg) return;
542 for (hr=0;hr<HOST_REGS;hr++) {
543 if((cur->regmap[hr]&63)==reg) {
544 cur->isconst&=~(1<<hr);
545 }
546 }
547}
548
549int is_const(struct regstat *cur,signed char reg)
550{
551 int hr;
79c75f1b 552 if(reg<0) return 0;
57871462 553 if(!reg) return 1;
554 for (hr=0;hr<HOST_REGS;hr++) {
555 if((cur->regmap[hr]&63)==reg) {
556 return (cur->isconst>>hr)&1;
557 }
558 }
559 return 0;
560}
561uint64_t get_const(struct regstat *cur,signed char reg)
562{
563 int hr;
564 if(!reg) return 0;
565 for (hr=0;hr<HOST_REGS;hr++) {
566 if(cur->regmap[hr]==reg) {
956f3129 567 return current_constmap[hr];
57871462 568 }
569 }
c43b5311 570 SysPrintf("Unknown constant in r%d\n",reg);
57871462 571 exit(1);
572}
573
574// Least soon needed registers
575// Look at the next ten instructions and see which registers
576// will be used. Try not to reallocate these.
577void lsn(u_char hsn[], int i, int *preferred_reg)
578{
579 int j;
580 int b=-1;
581 for(j=0;j<9;j++)
582 {
583 if(i+j>=slen) {
584 j=slen-i-1;
585 break;
586 }
587 if(itype[i+j]==UJUMP||itype[i+j]==RJUMP||(source[i+j]>>16)==0x1000)
588 {
589 // Don't go past an unconditonal jump
590 j++;
591 break;
592 }
593 }
594 for(;j>=0;j--)
595 {
596 if(rs1[i+j]) hsn[rs1[i+j]]=j;
597 if(rs2[i+j]) hsn[rs2[i+j]]=j;
598 if(rt1[i+j]) hsn[rt1[i+j]]=j;
599 if(rt2[i+j]) hsn[rt2[i+j]]=j;
600 if(itype[i+j]==STORE || itype[i+j]==STORELR) {
601 // Stores can allocate zero
602 hsn[rs1[i+j]]=j;
603 hsn[rs2[i+j]]=j;
604 }
605 // On some architectures stores need invc_ptr
606 #if defined(HOST_IMM8)
b9b61529 607 if(itype[i+j]==STORE || itype[i+j]==STORELR || (opcode[i+j]&0x3b)==0x39 || (opcode[i+j]&0x3b)==0x3a) {
57871462 608 hsn[INVCP]=j;
609 }
610 #endif
ad49de89 611 if(i+j>=0&&(itype[i+j]==UJUMP||itype[i+j]==CJUMP||itype[i+j]==SJUMP))
57871462 612 {
613 hsn[CCREG]=j;
614 b=j;
615 }
616 }
617 if(b>=0)
618 {
619 if(ba[i+b]>=start && ba[i+b]<(start+slen*4))
620 {
621 // Follow first branch
622 int t=(ba[i+b]-start)>>2;
623 j=7-b;if(t+j>=slen) j=slen-t-1;
624 for(;j>=0;j--)
625 {
626 if(rs1[t+j]) if(hsn[rs1[t+j]]>j+b+2) hsn[rs1[t+j]]=j+b+2;
627 if(rs2[t+j]) if(hsn[rs2[t+j]]>j+b+2) hsn[rs2[t+j]]=j+b+2;
628 //if(rt1[t+j]) if(hsn[rt1[t+j]]>j+b+2) hsn[rt1[t+j]]=j+b+2;
629 //if(rt2[t+j]) if(hsn[rt2[t+j]]>j+b+2) hsn[rt2[t+j]]=j+b+2;
630 }
631 }
632 // TODO: preferred register based on backward branch
633 }
634 // Delay slot should preferably not overwrite branch conditions or cycle count
ad49de89 635 if(i>0&&(itype[i-1]==RJUMP||itype[i-1]==UJUMP||itype[i-1]==CJUMP||itype[i-1]==SJUMP)) {
57871462 636 if(rs1[i-1]) if(hsn[rs1[i-1]]>1) hsn[rs1[i-1]]=1;
637 if(rs2[i-1]) if(hsn[rs2[i-1]]>1) hsn[rs2[i-1]]=1;
638 hsn[CCREG]=1;
639 // ...or hash tables
640 hsn[RHASH]=1;
641 hsn[RHTBL]=1;
642 }
643 // Coprocessor load/store needs FTEMP, even if not declared
b9b61529 644 if(itype[i]==C1LS||itype[i]==C2LS) {
57871462 645 hsn[FTEMP]=0;
646 }
647 // Load L/R also uses FTEMP as a temporary register
648 if(itype[i]==LOADLR) {
649 hsn[FTEMP]=0;
650 }
b7918751 651 // Also SWL/SWR/SDL/SDR
652 if(opcode[i]==0x2a||opcode[i]==0x2e||opcode[i]==0x2c||opcode[i]==0x2d) {
57871462 653 hsn[FTEMP]=0;
654 }
57871462 655 // Don't remove the miniht registers
656 if(itype[i]==UJUMP||itype[i]==RJUMP)
657 {
658 hsn[RHASH]=0;
659 hsn[RHTBL]=0;
660 }
661}
662
663// We only want to allocate registers if we're going to use them again soon
664int needed_again(int r, int i)
665{
666 int j;
667 int b=-1;
668 int rn=10;
9f51b4b9 669
57871462 670 if(i>0&&(itype[i-1]==UJUMP||itype[i-1]==RJUMP||(source[i-1]>>16)==0x1000))
671 {
672 if(ba[i-1]<start || ba[i-1]>start+slen*4-4)
673 return 0; // Don't need any registers if exiting the block
674 }
675 for(j=0;j<9;j++)
676 {
677 if(i+j>=slen) {
678 j=slen-i-1;
679 break;
680 }
681 if(itype[i+j]==UJUMP||itype[i+j]==RJUMP||(source[i+j]>>16)==0x1000)
682 {
683 // Don't go past an unconditonal jump
684 j++;
685 break;
686 }
1e973cb0 687 if(itype[i+j]==SYSCALL||itype[i+j]==HLECALL||itype[i+j]==INTCALL||((source[i+j]&0xfc00003f)==0x0d))
57871462 688 {
689 break;
690 }
691 }
692 for(;j>=1;j--)
693 {
694 if(rs1[i+j]==r) rn=j;
695 if(rs2[i+j]==r) rn=j;
696 if((unneeded_reg[i+j]>>r)&1) rn=10;
ad49de89 697 if(i+j>=0&&(itype[i+j]==UJUMP||itype[i+j]==CJUMP||itype[i+j]==SJUMP))
57871462 698 {
699 b=j;
700 }
701 }
702 /*
703 if(b>=0)
704 {
705 if(ba[i+b]>=start && ba[i+b]<(start+slen*4))
706 {
707 // Follow first branch
708 int o=rn;
709 int t=(ba[i+b]-start)>>2;
710 j=7-b;if(t+j>=slen) j=slen-t-1;
711 for(;j>=0;j--)
712 {
713 if(!((unneeded_reg[t+j]>>r)&1)) {
714 if(rs1[t+j]==r) if(rn>j+b+2) rn=j+b+2;
715 if(rs2[t+j]==r) if(rn>j+b+2) rn=j+b+2;
716 }
717 else rn=o;
718 }
719 }
720 }*/
b7217e13 721 if(rn<10) return 1;
581335b0 722 (void)b;
57871462 723 return 0;
724}
725
726// Try to match register allocations at the end of a loop with those
727// at the beginning
728int loop_reg(int i, int r, int hr)
729{
730 int j,k;
731 for(j=0;j<9;j++)
732 {
733 if(i+j>=slen) {
734 j=slen-i-1;
735 break;
736 }
737 if(itype[i+j]==UJUMP||itype[i+j]==RJUMP||(source[i+j]>>16)==0x1000)
738 {
739 // Don't go past an unconditonal jump
740 j++;
741 break;
742 }
743 }
744 k=0;
745 if(i>0){
ad49de89 746 if(itype[i-1]==UJUMP||itype[i-1]==CJUMP||itype[i-1]==SJUMP)
57871462 747 k--;
748 }
749 for(;k<j;k++)
750 {
00fa9369 751 assert(r < 64);
752 if((unneeded_reg[i+k]>>r)&1) return hr;
ad49de89 753 if(i+k>=0&&(itype[i+k]==UJUMP||itype[i+k]==CJUMP||itype[i+k]==SJUMP))
57871462 754 {
755 if(ba[i+k]>=start && ba[i+k]<(start+i*4))
756 {
757 int t=(ba[i+k]-start)>>2;
758 int reg=get_reg(regs[t].regmap_entry,r);
759 if(reg>=0) return reg;
760 //reg=get_reg(regs[t+1].regmap_entry,r);
761 //if(reg>=0) return reg;
762 }
763 }
764 }
765 return hr;
766}
767
768
769// Allocate every register, preserving source/target regs
770void alloc_all(struct regstat *cur,int i)
771{
772 int hr;
9f51b4b9 773
57871462 774 for(hr=0;hr<HOST_REGS;hr++) {
775 if(hr!=EXCLUDE_REG) {
776 if(((cur->regmap[hr]&63)!=rs1[i])&&((cur->regmap[hr]&63)!=rs2[i])&&
777 ((cur->regmap[hr]&63)!=rt1[i])&&((cur->regmap[hr]&63)!=rt2[i]))
778 {
779 cur->regmap[hr]=-1;
780 cur->dirty&=~(1<<hr);
781 }
782 // Don't need zeros
783 if((cur->regmap[hr]&63)==0)
784 {
785 cur->regmap[hr]=-1;
786 cur->dirty&=~(1<<hr);
787 }
788 }
789 }
790}
791
8062d65a 792#ifdef DRC_DBG
793extern void gen_interupt();
794extern void do_insn_cmp();
795#define FUNCNAME(f) { (intptr_t)f, " " #f }
796static const struct {
797 intptr_t addr;
798 const char *name;
799} function_names[] = {
800 FUNCNAME(cc_interrupt),
801 FUNCNAME(gen_interupt),
802 FUNCNAME(get_addr_ht),
803 FUNCNAME(get_addr),
804 FUNCNAME(jump_handler_read8),
805 FUNCNAME(jump_handler_read16),
806 FUNCNAME(jump_handler_read32),
807 FUNCNAME(jump_handler_write8),
808 FUNCNAME(jump_handler_write16),
809 FUNCNAME(jump_handler_write32),
810 FUNCNAME(invalidate_addr),
811 FUNCNAME(verify_code_vm),
812 FUNCNAME(verify_code),
813 FUNCNAME(jump_hlecall),
814 FUNCNAME(jump_syscall_hle),
815 FUNCNAME(new_dyna_leave),
816 FUNCNAME(pcsx_mtc0),
817 FUNCNAME(pcsx_mtc0_ds),
818 FUNCNAME(do_insn_cmp),
819};
820
821static const char *func_name(intptr_t a)
822{
823 int i;
824 for (i = 0; i < sizeof(function_names)/sizeof(function_names[0]); i++)
825 if (function_names[i].addr == a)
826 return function_names[i].name;
827 return "";
828}
829#else
830#define func_name(x) ""
831#endif
832
57871462 833#ifdef __i386__
834#include "assem_x86.c"
835#endif
836#ifdef __x86_64__
837#include "assem_x64.c"
838#endif
839#ifdef __arm__
840#include "assem_arm.c"
841#endif
be516ebe 842#ifdef __aarch64__
843#include "assem_arm64.c"
844#endif
57871462 845
846// Add virtual address mapping to linked list
847void ll_add(struct ll_entry **head,int vaddr,void *addr)
848{
849 struct ll_entry *new_entry;
850 new_entry=malloc(sizeof(struct ll_entry));
851 assert(new_entry!=NULL);
852 new_entry->vaddr=vaddr;
de5a60c3 853 new_entry->reg_sv_flags=0;
57871462 854 new_entry->addr=addr;
855 new_entry->next=*head;
856 *head=new_entry;
857}
858
de5a60c3 859void ll_add_flags(struct ll_entry **head,int vaddr,u_int reg_sv_flags,void *addr)
57871462 860{
7139f3c8 861 ll_add(head,vaddr,addr);
de5a60c3 862 (*head)->reg_sv_flags=reg_sv_flags;
57871462 863}
864
865// Check if an address is already compiled
866// but don't return addresses which are about to expire from the cache
867void *check_addr(u_int vaddr)
868{
df4dc2b1 869 struct ht_entry *ht_bin = hash_table_get(vaddr);
870 size_t i;
b14b6a8f 871 for (i = 0; i < ARRAY_SIZE(ht_bin->vaddr); i++) {
df4dc2b1 872 if (ht_bin->vaddr[i] == vaddr)
873 if (doesnt_expire_soon((u_char *)ht_bin->tcaddr[i] - MAX_OUTPUT_BLOCK_SIZE))
874 if (isclean(ht_bin->tcaddr[i]))
875 return ht_bin->tcaddr[i];
57871462 876 }
94d23bb9 877 u_int page=get_page(vaddr);
57871462 878 struct ll_entry *head;
879 head=jump_in[page];
df4dc2b1 880 while (head != NULL) {
881 if (head->vaddr == vaddr) {
882 if (doesnt_expire_soon(head->addr)) {
57871462 883 // Update existing entry with current address
df4dc2b1 884 if (ht_bin->vaddr[0] == vaddr) {
885 ht_bin->tcaddr[0] = head->addr;
57871462 886 return head->addr;
887 }
df4dc2b1 888 if (ht_bin->vaddr[1] == vaddr) {
889 ht_bin->tcaddr[1] = head->addr;
57871462 890 return head->addr;
891 }
892 // Insert into hash table with low priority.
893 // Don't evict existing entries, as they are probably
894 // addresses that are being accessed frequently.
df4dc2b1 895 if (ht_bin->vaddr[0] == -1) {
896 ht_bin->vaddr[0] = vaddr;
897 ht_bin->tcaddr[0] = head->addr;
898 }
899 else if (ht_bin->vaddr[1] == -1) {
900 ht_bin->vaddr[1] = vaddr;
901 ht_bin->tcaddr[1] = head->addr;
57871462 902 }
903 return head->addr;
904 }
905 }
906 head=head->next;
907 }
908 return 0;
909}
910
911void remove_hash(int vaddr)
912{
913 //printf("remove hash: %x\n",vaddr);
df4dc2b1 914 struct ht_entry *ht_bin = hash_table_get(vaddr);
915 if (ht_bin->vaddr[1] == vaddr) {
916 ht_bin->vaddr[1] = -1;
917 ht_bin->tcaddr[1] = NULL;
57871462 918 }
df4dc2b1 919 if (ht_bin->vaddr[0] == vaddr) {
920 ht_bin->vaddr[0] = ht_bin->vaddr[1];
921 ht_bin->tcaddr[0] = ht_bin->tcaddr[1];
922 ht_bin->vaddr[1] = -1;
923 ht_bin->tcaddr[1] = NULL;
57871462 924 }
925}
926
643aeae3 927void ll_remove_matching_addrs(struct ll_entry **head,uintptr_t addr,int shift)
57871462 928{
929 struct ll_entry *next;
930 while(*head) {
643aeae3 931 if(((uintptr_t)((*head)->addr)>>shift)==(addr>>shift) ||
932 ((uintptr_t)((*head)->addr-MAX_OUTPUT_BLOCK_SIZE)>>shift)==(addr>>shift))
57871462 933 {
643aeae3 934 inv_debug("EXP: Remove pointer to %p (%x)\n",(*head)->addr,(*head)->vaddr);
57871462 935 remove_hash((*head)->vaddr);
936 next=(*head)->next;
937 free(*head);
938 *head=next;
939 }
940 else
941 {
942 head=&((*head)->next);
943 }
944 }
945}
946
947// Remove all entries from linked list
948void ll_clear(struct ll_entry **head)
949{
950 struct ll_entry *cur;
951 struct ll_entry *next;
581335b0 952 if((cur=*head)) {
57871462 953 *head=0;
954 while(cur) {
955 next=cur->next;
956 free(cur);
957 cur=next;
958 }
959 }
960}
961
962// Dereference the pointers and remove if it matches
643aeae3 963static void ll_kill_pointers(struct ll_entry *head,uintptr_t addr,int shift)
57871462 964{
965 while(head) {
643aeae3 966 uintptr_t ptr = (uintptr_t)get_pointer(head->addr);
967 inv_debug("EXP: Lookup pointer to %lx at %p (%x)\n",(long)ptr,head->addr,head->vaddr);
57871462 968 if(((ptr>>shift)==(addr>>shift)) ||
969 (((ptr-MAX_OUTPUT_BLOCK_SIZE)>>shift)==(addr>>shift)))
970 {
643aeae3 971 inv_debug("EXP: Kill pointer at %p (%x)\n",head->addr,head->vaddr);
d148d265 972 void *host_addr=find_extjump_insn(head->addr);
dd3a91a1 973 #ifdef __arm__
d148d265 974 mark_clear_cache(host_addr);
dd3a91a1 975 #endif
df4dc2b1 976 set_jump_target(host_addr, head->addr);
57871462 977 }
978 head=head->next;
979 }
980}
981
982// This is called when we write to a compiled block (see do_invstub)
f76eeef9 983void invalidate_page(u_int page)
57871462 984{
57871462 985 struct ll_entry *head;
986 struct ll_entry *next;
987 head=jump_in[page];
988 jump_in[page]=0;
989 while(head!=NULL) {
990 inv_debug("INVALIDATE: %x\n",head->vaddr);
991 remove_hash(head->vaddr);
992 next=head->next;
993 free(head);
994 head=next;
995 }
996 head=jump_out[page];
997 jump_out[page]=0;
998 while(head!=NULL) {
643aeae3 999 inv_debug("INVALIDATE: kill pointer to %x (%p)\n",head->vaddr,head->addr);
d148d265 1000 void *host_addr=find_extjump_insn(head->addr);
dd3a91a1 1001 #ifdef __arm__
d148d265 1002 mark_clear_cache(host_addr);
dd3a91a1 1003 #endif
df4dc2b1 1004 set_jump_target(host_addr, head->addr);
57871462 1005 next=head->next;
1006 free(head);
1007 head=next;
1008 }
57871462 1009}
9be4ba64 1010
1011static void invalidate_block_range(u_int block, u_int first, u_int last)
57871462 1012{
94d23bb9 1013 u_int page=get_page(block<<12);
57871462 1014 //printf("first=%d last=%d\n",first,last);
f76eeef9 1015 invalidate_page(page);
57871462 1016 assert(first+5>page); // NB: this assumes MAXBLOCK<=4096 (4 pages)
1017 assert(last<page+5);
1018 // Invalidate the adjacent pages if a block crosses a 4K boundary
1019 while(first<page) {
1020 invalidate_page(first);
1021 first++;
1022 }
1023 for(first=page+1;first<last;first++) {
1024 invalidate_page(first);
1025 }
be516ebe 1026 #if defined(__arm__) || defined(__aarch64__)
dd3a91a1 1027 do_clear_cache();
1028 #endif
9f51b4b9 1029
57871462 1030 // Don't trap writes
1031 invalid_code[block]=1;
f76eeef9 1032
57871462 1033 #ifdef USE_MINI_HT
1034 memset(mini_ht,-1,sizeof(mini_ht));
1035 #endif
1036}
9be4ba64 1037
1038void invalidate_block(u_int block)
1039{
1040 u_int page=get_page(block<<12);
1041 u_int vpage=get_vpage(block<<12);
1042 inv_debug("INVALIDATE: %x (%d)\n",block<<12,page);
1043 //inv_debug("invalid_code[block]=%d\n",invalid_code[block]);
1044 u_int first,last;
1045 first=last=page;
1046 struct ll_entry *head;
1047 head=jump_dirty[vpage];
1048 //printf("page=%d vpage=%d\n",page,vpage);
1049 while(head!=NULL) {
9be4ba64 1050 if(vpage>2047||(head->vaddr>>12)==block) { // Ignore vaddr hash collision
01d26796 1051 u_char *start, *end;
1052 get_bounds(head->addr, &start, &end);
1053 //printf("start: %p end: %p\n", start, end);
1054 if (page < 2048 && start >= rdram && end < rdram+RAM_SIZE) {
1055 if (((start-rdram)>>12) <= page && ((end-1-rdram)>>12) >= page) {
1056 if ((((start-rdram)>>12)&2047) < first) first = ((start-rdram)>>12)&2047;
1057 if ((((end-1-rdram)>>12)&2047) > last) last = ((end-1-rdram)>>12)&2047;
9be4ba64 1058 }
1059 }
9be4ba64 1060 }
1061 head=head->next;
1062 }
1063 invalidate_block_range(block,first,last);
1064}
1065
57871462 1066void invalidate_addr(u_int addr)
1067{
9be4ba64 1068 //static int rhits;
1069 // this check is done by the caller
1070 //if (inv_code_start<=addr&&addr<=inv_code_end) { rhits++; return; }
d25604ca 1071 u_int page=get_vpage(addr);
9be4ba64 1072 if(page<2048) { // RAM
1073 struct ll_entry *head;
1074 u_int addr_min=~0, addr_max=0;
4a35de07 1075 u_int mask=RAM_SIZE-1;
1076 u_int addr_main=0x80000000|(addr&mask);
9be4ba64 1077 int pg1;
4a35de07 1078 inv_code_start=addr_main&~0xfff;
1079 inv_code_end=addr_main|0xfff;
9be4ba64 1080 pg1=page;
1081 if (pg1>0) {
1082 // must check previous page too because of spans..
1083 pg1--;
1084 inv_code_start-=0x1000;
1085 }
1086 for(;pg1<=page;pg1++) {
1087 for(head=jump_dirty[pg1];head!=NULL;head=head->next) {
01d26796 1088 u_char *start_h, *end_h;
1089 u_int start, end;
1090 get_bounds(head->addr, &start_h, &end_h);
1091 start = (uintptr_t)start_h - ram_offset;
1092 end = (uintptr_t)end_h - ram_offset;
4a35de07 1093 if(start<=addr_main&&addr_main<end) {
9be4ba64 1094 if(start<addr_min) addr_min=start;
1095 if(end>addr_max) addr_max=end;
1096 }
4a35de07 1097 else if(addr_main<start) {
9be4ba64 1098 if(start<inv_code_end)
1099 inv_code_end=start-1;
1100 }
1101 else {
1102 if(end>inv_code_start)
1103 inv_code_start=end;
1104 }
1105 }
1106 }
1107 if (addr_min!=~0) {
1108 inv_debug("INV ADDR: %08x hit %08x-%08x\n", addr, addr_min, addr_max);
1109 inv_code_start=inv_code_end=~0;
1110 invalidate_block_range(addr>>12,(addr_min&mask)>>12,(addr_max&mask)>>12);
1111 return;
1112 }
1113 else {
4a35de07 1114 inv_code_start=(addr&~mask)|(inv_code_start&mask);
1115 inv_code_end=(addr&~mask)|(inv_code_end&mask);
d25604ca 1116 inv_debug("INV ADDR: %08x miss, inv %08x-%08x, sk %d\n", addr, inv_code_start, inv_code_end, 0);
9be4ba64 1117 return;
d25604ca 1118 }
9be4ba64 1119 }
57871462 1120 invalidate_block(addr>>12);
1121}
9be4ba64 1122
dd3a91a1 1123// This is called when loading a save state.
1124// Anything could have changed, so invalidate everything.
57871462 1125void invalidate_all_pages()
1126{
581335b0 1127 u_int page;
57871462 1128 for(page=0;page<4096;page++)
1129 invalidate_page(page);
1130 for(page=0;page<1048576;page++)
1131 if(!invalid_code[page]) {
1132 restore_candidate[(page&2047)>>3]|=1<<(page&7);
1133 restore_candidate[((page&2047)>>3)+256]|=1<<(page&7);
1134 }
57871462 1135 #ifdef USE_MINI_HT
1136 memset(mini_ht,-1,sizeof(mini_ht));
1137 #endif
57871462 1138}
1139
1140// Add an entry to jump_out after making a link
1141void add_link(u_int vaddr,void *src)
1142{
94d23bb9 1143 u_int page=get_page(vaddr);
643aeae3 1144 inv_debug("add_link: %p -> %x (%d)\n",src,vaddr,page);
76f71c27 1145 int *ptr=(int *)(src+4);
1146 assert((*ptr&0x0fff0000)==0x059f0000);
581335b0 1147 (void)ptr;
57871462 1148 ll_add(jump_out+page,vaddr,src);
643aeae3 1149 //void *ptr=get_pointer(src);
1150 //inv_debug("add_link: Pointer is to %p\n",ptr);
57871462 1151}
1152
1153// If a code block was found to be unmodified (bit was set in
1154// restore_candidate) and it remains unmodified (bit is clear
1155// in invalid_code) then move the entries for that 4K page from
1156// the dirty list to the clean list.
1157void clean_blocks(u_int page)
1158{
1159 struct ll_entry *head;
1160 inv_debug("INV: clean_blocks page=%d\n",page);
1161 head=jump_dirty[page];
1162 while(head!=NULL) {
1163 if(!invalid_code[head->vaddr>>12]) {
1164 // Don't restore blocks which are about to expire from the cache
df4dc2b1 1165 if (doesnt_expire_soon(head->addr)) {
581335b0 1166 if(verify_dirty(head->addr)) {
01d26796 1167 u_char *start, *end;
643aeae3 1168 //printf("Possibly Restore %x (%p)\n",head->vaddr, head->addr);
57871462 1169 u_int i;
1170 u_int inv=0;
01d26796 1171 get_bounds(head->addr, &start, &end);
1172 if (start - rdram < RAM_SIZE) {
1173 for (i = (start-rdram+0x80000000)>>12; i <= (end-1-rdram+0x80000000)>>12; i++) {
57871462 1174 inv|=invalid_code[i];
1175 }
1176 }
4cb76aa4 1177 else if((signed int)head->vaddr>=(signed int)0x80000000+RAM_SIZE) {
57871462 1178 inv=1;
1179 }
1180 if(!inv) {
df4dc2b1 1181 void *clean_addr = get_clean_addr(head->addr);
1182 if (doesnt_expire_soon(clean_addr)) {
57871462 1183 u_int ppage=page;
643aeae3 1184 inv_debug("INV: Restored %x (%p/%p)\n",head->vaddr, head->addr, clean_addr);
57871462 1185 //printf("page=%x, addr=%x\n",page,head->vaddr);
1186 //assert(head->vaddr>>12==(page|0x80000));
de5a60c3 1187 ll_add_flags(jump_in+ppage,head->vaddr,head->reg_sv_flags,clean_addr);
df4dc2b1 1188 struct ht_entry *ht_bin = hash_table_get(head->vaddr);
1189 if (ht_bin->vaddr[0] == head->vaddr)
1190 ht_bin->tcaddr[0] = clean_addr; // Replace existing entry
1191 if (ht_bin->vaddr[1] == head->vaddr)
1192 ht_bin->tcaddr[1] = clean_addr; // Replace existing entry
57871462 1193 }
1194 }
1195 }
1196 }
1197 }
1198 head=head->next;
1199 }
1200}
1201
8062d65a 1202/* Register allocation */
1203
1204// Note: registers are allocated clean (unmodified state)
1205// if you intend to modify the register, you must call dirty_reg().
1206static void alloc_reg(struct regstat *cur,int i,signed char reg)
1207{
1208 int r,hr;
1209 int preferred_reg = (reg&7);
1210 if(reg==CCREG) preferred_reg=HOST_CCREG;
1211 if(reg==PTEMP||reg==FTEMP) preferred_reg=12;
1212
1213 // Don't allocate unused registers
1214 if((cur->u>>reg)&1) return;
1215
1216 // see if it's already allocated
1217 for(hr=0;hr<HOST_REGS;hr++)
1218 {
1219 if(cur->regmap[hr]==reg) return;
1220 }
1221
1222 // Keep the same mapping if the register was already allocated in a loop
1223 preferred_reg = loop_reg(i,reg,preferred_reg);
1224
1225 // Try to allocate the preferred register
1226 if(cur->regmap[preferred_reg]==-1) {
1227 cur->regmap[preferred_reg]=reg;
1228 cur->dirty&=~(1<<preferred_reg);
1229 cur->isconst&=~(1<<preferred_reg);
1230 return;
1231 }
1232 r=cur->regmap[preferred_reg];
1233 assert(r < 64);
1234 if((cur->u>>r)&1) {
1235 cur->regmap[preferred_reg]=reg;
1236 cur->dirty&=~(1<<preferred_reg);
1237 cur->isconst&=~(1<<preferred_reg);
1238 return;
1239 }
1240
1241 // Clear any unneeded registers
1242 // We try to keep the mapping consistent, if possible, because it
1243 // makes branches easier (especially loops). So we try to allocate
1244 // first (see above) before removing old mappings. If this is not
1245 // possible then go ahead and clear out the registers that are no
1246 // longer needed.
1247 for(hr=0;hr<HOST_REGS;hr++)
1248 {
1249 r=cur->regmap[hr];
1250 if(r>=0) {
1251 assert(r < 64);
1252 if((cur->u>>r)&1) {cur->regmap[hr]=-1;break;}
1253 }
1254 }
1255 // Try to allocate any available register, but prefer
1256 // registers that have not been used recently.
1257 if(i>0) {
1258 for(hr=0;hr<HOST_REGS;hr++) {
1259 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
1260 if(regs[i-1].regmap[hr]!=rs1[i-1]&&regs[i-1].regmap[hr]!=rs2[i-1]&&regs[i-1].regmap[hr]!=rt1[i-1]&&regs[i-1].regmap[hr]!=rt2[i-1]) {
1261 cur->regmap[hr]=reg;
1262 cur->dirty&=~(1<<hr);
1263 cur->isconst&=~(1<<hr);
1264 return;
1265 }
1266 }
1267 }
1268 }
1269 // Try to allocate any available register
1270 for(hr=0;hr<HOST_REGS;hr++) {
1271 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
1272 cur->regmap[hr]=reg;
1273 cur->dirty&=~(1<<hr);
1274 cur->isconst&=~(1<<hr);
1275 return;
1276 }
1277 }
1278
1279 // Ok, now we have to evict someone
1280 // Pick a register we hopefully won't need soon
1281 u_char hsn[MAXREG+1];
1282 memset(hsn,10,sizeof(hsn));
1283 int j;
1284 lsn(hsn,i,&preferred_reg);
1285 //printf("eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",cur->regmap[0],cur->regmap[1],cur->regmap[2],cur->regmap[3],cur->regmap[5],cur->regmap[6],cur->regmap[7]);
1286 //printf("hsn(%x): %d %d %d %d %d %d %d\n",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
1287 if(i>0) {
1288 // Don't evict the cycle count at entry points, otherwise the entry
1289 // stub will have to write it.
1290 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
1291 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP)) hsn[CCREG]=2;
1292 for(j=10;j>=3;j--)
1293 {
1294 // Alloc preferred register if available
1295 if(hsn[r=cur->regmap[preferred_reg]&63]==j) {
1296 for(hr=0;hr<HOST_REGS;hr++) {
1297 // Evict both parts of a 64-bit register
1298 if((cur->regmap[hr]&63)==r) {
1299 cur->regmap[hr]=-1;
1300 cur->dirty&=~(1<<hr);
1301 cur->isconst&=~(1<<hr);
1302 }
1303 }
1304 cur->regmap[preferred_reg]=reg;
1305 return;
1306 }
1307 for(r=1;r<=MAXREG;r++)
1308 {
1309 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
1310 for(hr=0;hr<HOST_REGS;hr++) {
1311 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
1312 if(cur->regmap[hr]==r+64) {
1313 cur->regmap[hr]=reg;
1314 cur->dirty&=~(1<<hr);
1315 cur->isconst&=~(1<<hr);
1316 return;
1317 }
1318 }
1319 }
1320 for(hr=0;hr<HOST_REGS;hr++) {
1321 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
1322 if(cur->regmap[hr]==r) {
1323 cur->regmap[hr]=reg;
1324 cur->dirty&=~(1<<hr);
1325 cur->isconst&=~(1<<hr);
1326 return;
1327 }
1328 }
1329 }
1330 }
1331 }
1332 }
1333 }
1334 for(j=10;j>=0;j--)
1335 {
1336 for(r=1;r<=MAXREG;r++)
1337 {
1338 if(hsn[r]==j) {
1339 for(hr=0;hr<HOST_REGS;hr++) {
1340 if(cur->regmap[hr]==r+64) {
1341 cur->regmap[hr]=reg;
1342 cur->dirty&=~(1<<hr);
1343 cur->isconst&=~(1<<hr);
1344 return;
1345 }
1346 }
1347 for(hr=0;hr<HOST_REGS;hr++) {
1348 if(cur->regmap[hr]==r) {
1349 cur->regmap[hr]=reg;
1350 cur->dirty&=~(1<<hr);
1351 cur->isconst&=~(1<<hr);
1352 return;
1353 }
1354 }
1355 }
1356 }
1357 }
1358 SysPrintf("This shouldn't happen (alloc_reg)");exit(1);
1359}
1360
1361// Allocate a temporary register. This is done without regard to
1362// dirty status or whether the register we request is on the unneeded list
1363// Note: This will only allocate one register, even if called multiple times
1364static void alloc_reg_temp(struct regstat *cur,int i,signed char reg)
1365{
1366 int r,hr;
1367 int preferred_reg = -1;
1368
1369 // see if it's already allocated
1370 for(hr=0;hr<HOST_REGS;hr++)
1371 {
1372 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==reg) return;
1373 }
1374
1375 // Try to allocate any available register
1376 for(hr=HOST_REGS-1;hr>=0;hr--) {
1377 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
1378 cur->regmap[hr]=reg;
1379 cur->dirty&=~(1<<hr);
1380 cur->isconst&=~(1<<hr);
1381 return;
1382 }
1383 }
1384
1385 // Find an unneeded register
1386 for(hr=HOST_REGS-1;hr>=0;hr--)
1387 {
1388 r=cur->regmap[hr];
1389 if(r>=0) {
1390 assert(r < 64);
1391 if((cur->u>>r)&1) {
1392 if(i==0||((unneeded_reg[i-1]>>r)&1)) {
1393 cur->regmap[hr]=reg;
1394 cur->dirty&=~(1<<hr);
1395 cur->isconst&=~(1<<hr);
1396 return;
1397 }
1398 }
1399 }
1400 }
1401
1402 // Ok, now we have to evict someone
1403 // Pick a register we hopefully won't need soon
1404 // TODO: we might want to follow unconditional jumps here
1405 // TODO: get rid of dupe code and make this into a function
1406 u_char hsn[MAXREG+1];
1407 memset(hsn,10,sizeof(hsn));
1408 int j;
1409 lsn(hsn,i,&preferred_reg);
1410 //printf("hsn: %d %d %d %d %d %d %d\n",hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
1411 if(i>0) {
1412 // Don't evict the cycle count at entry points, otherwise the entry
1413 // stub will have to write it.
1414 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
1415 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP)) hsn[CCREG]=2;
1416 for(j=10;j>=3;j--)
1417 {
1418 for(r=1;r<=MAXREG;r++)
1419 {
1420 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
1421 for(hr=0;hr<HOST_REGS;hr++) {
1422 if(hr!=HOST_CCREG||hsn[CCREG]>2) {
1423 if(cur->regmap[hr]==r+64) {
1424 cur->regmap[hr]=reg;
1425 cur->dirty&=~(1<<hr);
1426 cur->isconst&=~(1<<hr);
1427 return;
1428 }
1429 }
1430 }
1431 for(hr=0;hr<HOST_REGS;hr++) {
1432 if(hr!=HOST_CCREG||hsn[CCREG]>2) {
1433 if(cur->regmap[hr]==r) {
1434 cur->regmap[hr]=reg;
1435 cur->dirty&=~(1<<hr);
1436 cur->isconst&=~(1<<hr);
1437 return;
1438 }
1439 }
1440 }
1441 }
1442 }
1443 }
1444 }
1445 for(j=10;j>=0;j--)
1446 {
1447 for(r=1;r<=MAXREG;r++)
1448 {
1449 if(hsn[r]==j) {
1450 for(hr=0;hr<HOST_REGS;hr++) {
1451 if(cur->regmap[hr]==r+64) {
1452 cur->regmap[hr]=reg;
1453 cur->dirty&=~(1<<hr);
1454 cur->isconst&=~(1<<hr);
1455 return;
1456 }
1457 }
1458 for(hr=0;hr<HOST_REGS;hr++) {
1459 if(cur->regmap[hr]==r) {
1460 cur->regmap[hr]=reg;
1461 cur->dirty&=~(1<<hr);
1462 cur->isconst&=~(1<<hr);
1463 return;
1464 }
1465 }
1466 }
1467 }
1468 }
1469 SysPrintf("This shouldn't happen");exit(1);
1470}
1471
ad49de89 1472static void mov_alloc(struct regstat *current,int i)
57871462 1473{
1474 // Note: Don't need to actually alloc the source registers
ad49de89 1475 //alloc_reg(current,i,rs1[i]);
1476 alloc_reg(current,i,rt1[i]);
1477
57871462 1478 clear_const(current,rs1[i]);
1479 clear_const(current,rt1[i]);
1480 dirty_reg(current,rt1[i]);
1481}
1482
ad49de89 1483static void shiftimm_alloc(struct regstat *current,int i)
57871462 1484{
57871462 1485 if(opcode2[i]<=0x3) // SLL/SRL/SRA
1486 {
1487 if(rt1[i]) {
1488 if(rs1[i]&&needed_again(rs1[i],i)) alloc_reg(current,i,rs1[i]);
1489 else lt1[i]=rs1[i];
1490 alloc_reg(current,i,rt1[i]);
57871462 1491 dirty_reg(current,rt1[i]);
dc49e339 1492 if(is_const(current,rs1[i])) {
1493 int v=get_const(current,rs1[i]);
1494 if(opcode2[i]==0x00) set_const(current,rt1[i],v<<imm[i]);
1495 if(opcode2[i]==0x02) set_const(current,rt1[i],(u_int)v>>imm[i]);
1496 if(opcode2[i]==0x03) set_const(current,rt1[i],v>>imm[i]);
1497 }
1498 else clear_const(current,rt1[i]);
57871462 1499 }
1500 }
dc49e339 1501 else
1502 {
1503 clear_const(current,rs1[i]);
1504 clear_const(current,rt1[i]);
1505 }
1506
57871462 1507 if(opcode2[i]>=0x38&&opcode2[i]<=0x3b) // DSLL/DSRL/DSRA
1508 {
9c45ca93 1509 assert(0);
57871462 1510 }
1511 if(opcode2[i]==0x3c) // DSLL32
1512 {
9c45ca93 1513 assert(0);
57871462 1514 }
1515 if(opcode2[i]==0x3e) // DSRL32
1516 {
9c45ca93 1517 assert(0);
57871462 1518 }
1519 if(opcode2[i]==0x3f) // DSRA32
1520 {
9c45ca93 1521 assert(0);
57871462 1522 }
1523}
1524
ad49de89 1525static void shift_alloc(struct regstat *current,int i)
57871462 1526{
1527 if(rt1[i]) {
1528 if(opcode2[i]<=0x07) // SLLV/SRLV/SRAV
1529 {
1530 if(rs1[i]) alloc_reg(current,i,rs1[i]);
1531 if(rs2[i]) alloc_reg(current,i,rs2[i]);
1532 alloc_reg(current,i,rt1[i]);
e1190b87 1533 if(rt1[i]==rs2[i]) {
1534 alloc_reg_temp(current,i,-1);
1535 minimum_free_regs[i]=1;
1536 }
57871462 1537 } else { // DSLLV/DSRLV/DSRAV
00fa9369 1538 assert(0);
57871462 1539 }
1540 clear_const(current,rs1[i]);
1541 clear_const(current,rs2[i]);
1542 clear_const(current,rt1[i]);
1543 dirty_reg(current,rt1[i]);
1544 }
1545}
1546
ad49de89 1547static void alu_alloc(struct regstat *current,int i)
57871462 1548{
1549 if(opcode2[i]>=0x20&&opcode2[i]<=0x23) { // ADD/ADDU/SUB/SUBU
1550 if(rt1[i]) {
1551 if(rs1[i]&&rs2[i]) {
1552 alloc_reg(current,i,rs1[i]);
1553 alloc_reg(current,i,rs2[i]);
1554 }
1555 else {
1556 if(rs1[i]&&needed_again(rs1[i],i)) alloc_reg(current,i,rs1[i]);
1557 if(rs2[i]&&needed_again(rs2[i],i)) alloc_reg(current,i,rs2[i]);
1558 }
1559 alloc_reg(current,i,rt1[i]);
1560 }
57871462 1561 }
1562 if(opcode2[i]==0x2a||opcode2[i]==0x2b) { // SLT/SLTU
1563 if(rt1[i]) {
ad49de89 1564 alloc_reg(current,i,rs1[i]);
1565 alloc_reg(current,i,rs2[i]);
1566 alloc_reg(current,i,rt1[i]);
57871462 1567 }
57871462 1568 }
1569 if(opcode2[i]>=0x24&&opcode2[i]<=0x27) { // AND/OR/XOR/NOR
1570 if(rt1[i]) {
1571 if(rs1[i]&&rs2[i]) {
1572 alloc_reg(current,i,rs1[i]);
1573 alloc_reg(current,i,rs2[i]);
1574 }
1575 else
1576 {
1577 if(rs1[i]&&needed_again(rs1[i],i)) alloc_reg(current,i,rs1[i]);
1578 if(rs2[i]&&needed_again(rs2[i],i)) alloc_reg(current,i,rs2[i]);
1579 }
1580 alloc_reg(current,i,rt1[i]);
57871462 1581 }
1582 }
1583 if(opcode2[i]>=0x2c&&opcode2[i]<=0x2f) { // DADD/DADDU/DSUB/DSUBU
00fa9369 1584 assert(0);
57871462 1585 }
1586 clear_const(current,rs1[i]);
1587 clear_const(current,rs2[i]);
1588 clear_const(current,rt1[i]);
1589 dirty_reg(current,rt1[i]);
1590}
1591
ad49de89 1592static void imm16_alloc(struct regstat *current,int i)
57871462 1593{
1594 if(rs1[i]&&needed_again(rs1[i],i)) alloc_reg(current,i,rs1[i]);
1595 else lt1[i]=rs1[i];
1596 if(rt1[i]) alloc_reg(current,i,rt1[i]);
1597 if(opcode[i]==0x18||opcode[i]==0x19) { // DADDI/DADDIU
00fa9369 1598 assert(0);
57871462 1599 }
1600 else if(opcode[i]==0x0a||opcode[i]==0x0b) { // SLTI/SLTIU
57871462 1601 clear_const(current,rs1[i]);
1602 clear_const(current,rt1[i]);
1603 }
1604 else if(opcode[i]>=0x0c&&opcode[i]<=0x0e) { // ANDI/ORI/XORI
57871462 1605 if(is_const(current,rs1[i])) {
1606 int v=get_const(current,rs1[i]);
1607 if(opcode[i]==0x0c) set_const(current,rt1[i],v&imm[i]);
1608 if(opcode[i]==0x0d) set_const(current,rt1[i],v|imm[i]);
1609 if(opcode[i]==0x0e) set_const(current,rt1[i],v^imm[i]);
1610 }
1611 else clear_const(current,rt1[i]);
1612 }
1613 else if(opcode[i]==0x08||opcode[i]==0x09) { // ADDI/ADDIU
1614 if(is_const(current,rs1[i])) {
1615 int v=get_const(current,rs1[i]);
1616 set_const(current,rt1[i],v+imm[i]);
1617 }
1618 else clear_const(current,rt1[i]);
57871462 1619 }
1620 else {
1621 set_const(current,rt1[i],((long long)((short)imm[i]))<<16); // LUI
57871462 1622 }
1623 dirty_reg(current,rt1[i]);
1624}
1625
ad49de89 1626static void load_alloc(struct regstat *current,int i)
57871462 1627{
1628 clear_const(current,rt1[i]);
1629 //if(rs1[i]!=rt1[i]&&needed_again(rs1[i],i)) clear_const(current,rs1[i]); // Does this help or hurt?
1630 if(!rs1[i]) current->u&=~1LL; // Allow allocating r0 if it's the source register
1631 if(needed_again(rs1[i],i)) alloc_reg(current,i,rs1[i]);
373d1d07 1632 if(rt1[i]&&!((current->u>>rt1[i])&1)) {
57871462 1633 alloc_reg(current,i,rt1[i]);
373d1d07 1634 assert(get_reg(current->regmap,rt1[i])>=0);
57871462 1635 if(opcode[i]==0x27||opcode[i]==0x37) // LWU/LD
1636 {
ad49de89 1637 assert(0);
57871462 1638 }
1639 else if(opcode[i]==0x1A||opcode[i]==0x1B) // LDL/LDR
1640 {
ad49de89 1641 assert(0);
57871462 1642 }
57871462 1643 dirty_reg(current,rt1[i]);
57871462 1644 // LWL/LWR need a temporary register for the old value
1645 if(opcode[i]==0x22||opcode[i]==0x26)
1646 {
1647 alloc_reg(current,i,FTEMP);
1648 alloc_reg_temp(current,i,-1);
e1190b87 1649 minimum_free_regs[i]=1;
57871462 1650 }
1651 }
1652 else
1653 {
373d1d07 1654 // Load to r0 or unneeded register (dummy load)
57871462 1655 // but we still need a register to calculate the address
535d208a 1656 if(opcode[i]==0x22||opcode[i]==0x26)
1657 {
1658 alloc_reg(current,i,FTEMP); // LWL/LWR need another temporary
1659 }
57871462 1660 alloc_reg_temp(current,i,-1);
e1190b87 1661 minimum_free_regs[i]=1;
535d208a 1662 if(opcode[i]==0x1A||opcode[i]==0x1B) // LDL/LDR
1663 {
ad49de89 1664 assert(0);
535d208a 1665 }
57871462 1666 }
1667}
1668
1669void store_alloc(struct regstat *current,int i)
1670{
1671 clear_const(current,rs2[i]);
1672 if(!(rs2[i])) current->u&=~1LL; // Allow allocating r0 if necessary
1673 if(needed_again(rs1[i],i)) alloc_reg(current,i,rs1[i]);
1674 alloc_reg(current,i,rs2[i]);
1675 if(opcode[i]==0x2c||opcode[i]==0x2d||opcode[i]==0x3f) { // 64-bit SDL/SDR/SD
ad49de89 1676 assert(0);
57871462 1677 }
57871462 1678 #if defined(HOST_IMM8)
1679 // On CPUs without 32-bit immediates we need a pointer to invalid_code
1680 else alloc_reg(current,i,INVCP);
1681 #endif
b7918751 1682 if(opcode[i]==0x2a||opcode[i]==0x2e||opcode[i]==0x2c||opcode[i]==0x2d) { // SWL/SWL/SDL/SDR
57871462 1683 alloc_reg(current,i,FTEMP);
1684 }
1685 // We need a temporary register for address generation
1686 alloc_reg_temp(current,i,-1);
e1190b87 1687 minimum_free_regs[i]=1;
57871462 1688}
1689
1690void c1ls_alloc(struct regstat *current,int i)
1691{
1692 //clear_const(current,rs1[i]); // FIXME
1693 clear_const(current,rt1[i]);
1694 if(needed_again(rs1[i],i)) alloc_reg(current,i,rs1[i]);
1695 alloc_reg(current,i,CSREG); // Status
1696 alloc_reg(current,i,FTEMP);
1697 if(opcode[i]==0x35||opcode[i]==0x3d) { // 64-bit LDC1/SDC1
ad49de89 1698 assert(0);
57871462 1699 }
57871462 1700 #if defined(HOST_IMM8)
1701 // On CPUs without 32-bit immediates we need a pointer to invalid_code
1702 else if((opcode[i]&0x3b)==0x39) // SWC1/SDC1
1703 alloc_reg(current,i,INVCP);
1704 #endif
1705 // We need a temporary register for address generation
1706 alloc_reg_temp(current,i,-1);
1707}
1708
b9b61529 1709void c2ls_alloc(struct regstat *current,int i)
1710{
1711 clear_const(current,rt1[i]);
1712 if(needed_again(rs1[i],i)) alloc_reg(current,i,rs1[i]);
1713 alloc_reg(current,i,FTEMP);
b9b61529 1714 #if defined(HOST_IMM8)
1715 // On CPUs without 32-bit immediates we need a pointer to invalid_code
1edfcc68 1716 if((opcode[i]&0x3b)==0x3a) // SWC2/SDC2
b9b61529 1717 alloc_reg(current,i,INVCP);
1718 #endif
1719 // We need a temporary register for address generation
1720 alloc_reg_temp(current,i,-1);
e1190b87 1721 minimum_free_regs[i]=1;
b9b61529 1722}
1723
57871462 1724#ifndef multdiv_alloc
1725void multdiv_alloc(struct regstat *current,int i)
1726{
1727 // case 0x18: MULT
1728 // case 0x19: MULTU
1729 // case 0x1A: DIV
1730 // case 0x1B: DIVU
1731 // case 0x1C: DMULT
1732 // case 0x1D: DMULTU
1733 // case 0x1E: DDIV
1734 // case 0x1F: DDIVU
1735 clear_const(current,rs1[i]);
1736 clear_const(current,rs2[i]);
1737 if(rs1[i]&&rs2[i])
1738 {
1739 if((opcode2[i]&4)==0) // 32-bit
1740 {
1741 current->u&=~(1LL<<HIREG);
1742 current->u&=~(1LL<<LOREG);
1743 alloc_reg(current,i,HIREG);
1744 alloc_reg(current,i,LOREG);
1745 alloc_reg(current,i,rs1[i]);
1746 alloc_reg(current,i,rs2[i]);
57871462 1747 dirty_reg(current,HIREG);
1748 dirty_reg(current,LOREG);
1749 }
1750 else // 64-bit
1751 {
00fa9369 1752 assert(0);
57871462 1753 }
1754 }
1755 else
1756 {
1757 // Multiply by zero is zero.
1758 // MIPS does not have a divide by zero exception.
1759 // The result is undefined, we return zero.
1760 alloc_reg(current,i,HIREG);
1761 alloc_reg(current,i,LOREG);
57871462 1762 dirty_reg(current,HIREG);
1763 dirty_reg(current,LOREG);
1764 }
1765}
1766#endif
1767
1768void cop0_alloc(struct regstat *current,int i)
1769{
1770 if(opcode2[i]==0) // MFC0
1771 {
1772 if(rt1[i]) {
1773 clear_const(current,rt1[i]);
1774 alloc_all(current,i);
1775 alloc_reg(current,i,rt1[i]);
57871462 1776 dirty_reg(current,rt1[i]);
1777 }
1778 }
1779 else if(opcode2[i]==4) // MTC0
1780 {
1781 if(rs1[i]){
1782 clear_const(current,rs1[i]);
1783 alloc_reg(current,i,rs1[i]);
1784 alloc_all(current,i);
1785 }
1786 else {
1787 alloc_all(current,i); // FIXME: Keep r0
1788 current->u&=~1LL;
1789 alloc_reg(current,i,0);
1790 }
1791 }
1792 else
1793 {
1794 // TLBR/TLBWI/TLBWR/TLBP/ERET
1795 assert(opcode2[i]==0x10);
1796 alloc_all(current,i);
1797 }
e1190b87 1798 minimum_free_regs[i]=HOST_REGS;
57871462 1799}
1800
00fa9369 1801static void cop12_alloc(struct regstat *current,int i)
57871462 1802{
1803 alloc_reg(current,i,CSREG); // Load status
00fa9369 1804 if(opcode2[i]<3) // MFC1/CFC1
57871462 1805 {
7de557a6 1806 if(rt1[i]){
1807 clear_const(current,rt1[i]);
00fa9369 1808 alloc_reg(current,i,rt1[i]);
7de557a6 1809 dirty_reg(current,rt1[i]);
57871462 1810 }
57871462 1811 alloc_reg_temp(current,i,-1);
1812 }
00fa9369 1813 else if(opcode2[i]>3) // MTC1/CTC1
57871462 1814 {
1815 if(rs1[i]){
1816 clear_const(current,rs1[i]);
00fa9369 1817 alloc_reg(current,i,rs1[i]);
57871462 1818 }
1819 else {
1820 current->u&=~1LL;
1821 alloc_reg(current,i,0);
57871462 1822 }
00fa9369 1823 alloc_reg_temp(current,i,-1);
57871462 1824 }
e1190b87 1825 minimum_free_regs[i]=1;
57871462 1826}
00fa9369 1827
b9b61529 1828void c2op_alloc(struct regstat *current,int i)
1829{
1830 alloc_reg_temp(current,i,-1);
1831}
57871462 1832
1833void syscall_alloc(struct regstat *current,int i)
1834{
1835 alloc_cc(current,i);
1836 dirty_reg(current,CCREG);
1837 alloc_all(current,i);
e1190b87 1838 minimum_free_regs[i]=HOST_REGS;
57871462 1839 current->isconst=0;
1840}
1841
1842void delayslot_alloc(struct regstat *current,int i)
1843{
1844 switch(itype[i]) {
1845 case UJUMP:
1846 case CJUMP:
1847 case SJUMP:
1848 case RJUMP:
57871462 1849 case SYSCALL:
7139f3c8 1850 case HLECALL:
57871462 1851 case SPAN:
1852 assem_debug("jump in the delay slot. this shouldn't happen.\n");//exit(1);
c43b5311 1853 SysPrintf("Disabled speculative precompilation\n");
57871462 1854 stop_after_jal=1;
1855 break;
1856 case IMM16:
1857 imm16_alloc(current,i);
1858 break;
1859 case LOAD:
1860 case LOADLR:
1861 load_alloc(current,i);
1862 break;
1863 case STORE:
1864 case STORELR:
1865 store_alloc(current,i);
1866 break;
1867 case ALU:
1868 alu_alloc(current,i);
1869 break;
1870 case SHIFT:
1871 shift_alloc(current,i);
1872 break;
1873 case MULTDIV:
1874 multdiv_alloc(current,i);
1875 break;
1876 case SHIFTIMM:
1877 shiftimm_alloc(current,i);
1878 break;
1879 case MOV:
1880 mov_alloc(current,i);
1881 break;
1882 case COP0:
1883 cop0_alloc(current,i);
1884 break;
1885 case COP1:
b9b61529 1886 case COP2:
00fa9369 1887 cop12_alloc(current,i);
57871462 1888 break;
1889 case C1LS:
1890 c1ls_alloc(current,i);
1891 break;
b9b61529 1892 case C2LS:
1893 c2ls_alloc(current,i);
1894 break;
b9b61529 1895 case C2OP:
1896 c2op_alloc(current,i);
1897 break;
57871462 1898 }
1899}
1900
1901// Special case where a branch and delay slot span two pages in virtual memory
1902static void pagespan_alloc(struct regstat *current,int i)
1903{
1904 current->isconst=0;
1905 current->wasconst=0;
1906 regs[i].wasconst=0;
e1190b87 1907 minimum_free_regs[i]=HOST_REGS;
57871462 1908 alloc_all(current,i);
1909 alloc_cc(current,i);
1910 dirty_reg(current,CCREG);
1911 if(opcode[i]==3) // JAL
1912 {
1913 alloc_reg(current,i,31);
1914 dirty_reg(current,31);
1915 }
1916 if(opcode[i]==0&&(opcode2[i]&0x3E)==8) // JR/JALR
1917 {
1918 alloc_reg(current,i,rs1[i]);
5067f341 1919 if (rt1[i]!=0) {
1920 alloc_reg(current,i,rt1[i]);
1921 dirty_reg(current,rt1[i]);
57871462 1922 }
1923 }
1924 if((opcode[i]&0x2E)==4) // BEQ/BNE/BEQL/BNEL
1925 {
1926 if(rs1[i]) alloc_reg(current,i,rs1[i]);
1927 if(rs2[i]) alloc_reg(current,i,rs2[i]);
57871462 1928 }
1929 else
1930 if((opcode[i]&0x2E)==6) // BLEZ/BGTZ/BLEZL/BGTZL
1931 {
1932 if(rs1[i]) alloc_reg(current,i,rs1[i]);
57871462 1933 }
57871462 1934 //else ...
1935}
1936
b14b6a8f 1937static void add_stub(enum stub_type type, void *addr, void *retaddr,
1938 u_int a, uintptr_t b, uintptr_t c, u_int d, u_int e)
1939{
1940 assert(a < ARRAY_SIZE(stubs));
1941 stubs[stubcount].type = type;
1942 stubs[stubcount].addr = addr;
1943 stubs[stubcount].retaddr = retaddr;
1944 stubs[stubcount].a = a;
1945 stubs[stubcount].b = b;
1946 stubs[stubcount].c = c;
1947 stubs[stubcount].d = d;
1948 stubs[stubcount].e = e;
57871462 1949 stubcount++;
1950}
1951
b14b6a8f 1952static void add_stub_r(enum stub_type type, void *addr, void *retaddr,
1953 int i, int addr_reg, struct regstat *i_regs, int ccadj, u_int reglist)
1954{
1955 add_stub(type, addr, retaddr, i, addr_reg, (uintptr_t)i_regs, ccadj, reglist);
1956}
1957
57871462 1958// Write out a single register
ad49de89 1959static void wb_register(signed char r,signed char regmap[],uint64_t dirty)
57871462 1960{
1961 int hr;
1962 for(hr=0;hr<HOST_REGS;hr++) {
1963 if(hr!=EXCLUDE_REG) {
1964 if((regmap[hr]&63)==r) {
1965 if((dirty>>hr)&1) {
ad49de89 1966 assert(regmap[hr]<64);
1967 emit_storereg(r,hr);
57871462 1968 }
1969 }
1970 }
1971 }
1972}
1973
8062d65a 1974static void wb_valid(signed char pre[],signed char entry[],u_int dirty_pre,u_int dirty,uint64_t u)
1975{
1976 //if(dirty_pre==dirty) return;
1977 int hr,reg;
1978 for(hr=0;hr<HOST_REGS;hr++) {
1979 if(hr!=EXCLUDE_REG) {
1980 reg=pre[hr];
1981 if(((~u)>>(reg&63))&1) {
1982 if(reg>0) {
1983 if(((dirty_pre&~dirty)>>hr)&1) {
1984 if(reg>0&&reg<34) {
1985 emit_storereg(reg,hr);
1986 }
1987 else if(reg>=64) {
1988 assert(0);
1989 }
1990 }
1991 }
1992 }
1993 }
1994 }
1995}
1996
57871462 1997void rlist()
1998{
1999 int i;
2000 printf("TRACE: ");
2001 for(i=0;i<32;i++)
2002 printf("r%d:%8x%8x ",i,((int *)(reg+i))[1],((int *)(reg+i))[0]);
2003 printf("\n");
57871462 2004}
2005
57871462 2006void alu_assemble(int i,struct regstat *i_regs)
2007{
2008 if(opcode2[i]>=0x20&&opcode2[i]<=0x23) { // ADD/ADDU/SUB/SUBU
2009 if(rt1[i]) {
2010 signed char s1,s2,t;
2011 t=get_reg(i_regs->regmap,rt1[i]);
2012 if(t>=0) {
2013 s1=get_reg(i_regs->regmap,rs1[i]);
2014 s2=get_reg(i_regs->regmap,rs2[i]);
2015 if(rs1[i]&&rs2[i]) {
2016 assert(s1>=0);
2017 assert(s2>=0);
2018 if(opcode2[i]&2) emit_sub(s1,s2,t);
2019 else emit_add(s1,s2,t);
2020 }
2021 else if(rs1[i]) {
2022 if(s1>=0) emit_mov(s1,t);
2023 else emit_loadreg(rs1[i],t);
2024 }
2025 else if(rs2[i]) {
2026 if(s2>=0) {
2027 if(opcode2[i]&2) emit_neg(s2,t);
2028 else emit_mov(s2,t);
2029 }
2030 else {
2031 emit_loadreg(rs2[i],t);
2032 if(opcode2[i]&2) emit_neg(t,t);
2033 }
2034 }
2035 else emit_zeroreg(t);
2036 }
2037 }
2038 }
2039 if(opcode2[i]>=0x2c&&opcode2[i]<=0x2f) { // DADD/DADDU/DSUB/DSUBU
00fa9369 2040 assert(0);
57871462 2041 }
2042 if(opcode2[i]==0x2a||opcode2[i]==0x2b) { // SLT/SLTU
2043 if(rt1[i]) {
ad49de89 2044 signed char s1l,s2l,t;
57871462 2045 {
57871462 2046 t=get_reg(i_regs->regmap,rt1[i]);
2047 //assert(t>=0);
2048 if(t>=0) {
2049 s1l=get_reg(i_regs->regmap,rs1[i]);
2050 s2l=get_reg(i_regs->regmap,rs2[i]);
2051 if(rs2[i]==0) // rx<r0
2052 {
2053 assert(s1l>=0);
2054 if(opcode2[i]==0x2a) // SLT
2055 emit_shrimm(s1l,31,t);
2056 else // SLTU (unsigned can not be less than zero)
2057 emit_zeroreg(t);
2058 }
2059 else if(rs1[i]==0) // r0<rx
2060 {
2061 assert(s2l>=0);
2062 if(opcode2[i]==0x2a) // SLT
2063 emit_set_gz32(s2l,t);
2064 else // SLTU (set if not zero)
2065 emit_set_nz32(s2l,t);
2066 }
2067 else{
2068 assert(s1l>=0);assert(s2l>=0);
2069 if(opcode2[i]==0x2a) // SLT
2070 emit_set_if_less32(s1l,s2l,t);
2071 else // SLTU
2072 emit_set_if_carry32(s1l,s2l,t);
2073 }
2074 }
2075 }
2076 }
2077 }
2078 if(opcode2[i]>=0x24&&opcode2[i]<=0x27) { // AND/OR/XOR/NOR
2079 if(rt1[i]) {
ad49de89 2080 signed char s1l,s2l,tl;
57871462 2081 tl=get_reg(i_regs->regmap,rt1[i]);
57871462 2082 {
57871462 2083 if(tl>=0) {
2084 s1l=get_reg(i_regs->regmap,rs1[i]);
2085 s2l=get_reg(i_regs->regmap,rs2[i]);
2086 if(rs1[i]&&rs2[i]) {
2087 assert(s1l>=0);
2088 assert(s2l>=0);
2089 if(opcode2[i]==0x24) { // AND
2090 emit_and(s1l,s2l,tl);
2091 } else
2092 if(opcode2[i]==0x25) { // OR
2093 emit_or(s1l,s2l,tl);
2094 } else
2095 if(opcode2[i]==0x26) { // XOR
2096 emit_xor(s1l,s2l,tl);
2097 } else
2098 if(opcode2[i]==0x27) { // NOR
2099 emit_or(s1l,s2l,tl);
2100 emit_not(tl,tl);
2101 }
2102 }
2103 else
2104 {
2105 if(opcode2[i]==0x24) { // AND
2106 emit_zeroreg(tl);
2107 } else
2108 if(opcode2[i]==0x25||opcode2[i]==0x26) { // OR/XOR
2109 if(rs1[i]){
2110 if(s1l>=0) emit_mov(s1l,tl);
2111 else emit_loadreg(rs1[i],tl); // CHECK: regmap_entry?
2112 }
2113 else
2114 if(rs2[i]){
2115 if(s2l>=0) emit_mov(s2l,tl);
2116 else emit_loadreg(rs2[i],tl); // CHECK: regmap_entry?
2117 }
2118 else emit_zeroreg(tl);
2119 } else
2120 if(opcode2[i]==0x27) { // NOR
2121 if(rs1[i]){
2122 if(s1l>=0) emit_not(s1l,tl);
2123 else {
2124 emit_loadreg(rs1[i],tl);
2125 emit_not(tl,tl);
2126 }
2127 }
2128 else
2129 if(rs2[i]){
2130 if(s2l>=0) emit_not(s2l,tl);
2131 else {
2132 emit_loadreg(rs2[i],tl);
2133 emit_not(tl,tl);
2134 }
2135 }
2136 else emit_movimm(-1,tl);
2137 }
2138 }
2139 }
2140 }
2141 }
2142 }
2143}
2144
2145void imm16_assemble(int i,struct regstat *i_regs)
2146{
2147 if (opcode[i]==0x0f) { // LUI
2148 if(rt1[i]) {
2149 signed char t;
2150 t=get_reg(i_regs->regmap,rt1[i]);
2151 //assert(t>=0);
2152 if(t>=0) {
2153 if(!((i_regs->isconst>>t)&1))
2154 emit_movimm(imm[i]<<16,t);
2155 }
2156 }
2157 }
2158 if(opcode[i]==0x08||opcode[i]==0x09) { // ADDI/ADDIU
2159 if(rt1[i]) {
2160 signed char s,t;
2161 t=get_reg(i_regs->regmap,rt1[i]);
2162 s=get_reg(i_regs->regmap,rs1[i]);
2163 if(rs1[i]) {
2164 //assert(t>=0);
2165 //assert(s>=0);
2166 if(t>=0) {
2167 if(!((i_regs->isconst>>t)&1)) {
2168 if(s<0) {
2169 if(i_regs->regmap_entry[t]!=rs1[i]) emit_loadreg(rs1[i],t);
2170 emit_addimm(t,imm[i],t);
2171 }else{
2172 if(!((i_regs->wasconst>>s)&1))
2173 emit_addimm(s,imm[i],t);
2174 else
2175 emit_movimm(constmap[i][s]+imm[i],t);
2176 }
2177 }
2178 }
2179 } else {
2180 if(t>=0) {
2181 if(!((i_regs->isconst>>t)&1))
2182 emit_movimm(imm[i],t);
2183 }
2184 }
2185 }
2186 }
2187 if(opcode[i]==0x18||opcode[i]==0x19) { // DADDI/DADDIU
2188 if(rt1[i]) {
2189 signed char sh,sl,th,tl;
2190 th=get_reg(i_regs->regmap,rt1[i]|64);
2191 tl=get_reg(i_regs->regmap,rt1[i]);
2192 sh=get_reg(i_regs->regmap,rs1[i]|64);
2193 sl=get_reg(i_regs->regmap,rs1[i]);
2194 if(tl>=0) {
2195 if(rs1[i]) {
2196 assert(sh>=0);
2197 assert(sl>=0);
2198 if(th>=0) {
2199 emit_addimm64_32(sh,sl,imm[i],th,tl);
2200 }
2201 else {
2202 emit_addimm(sl,imm[i],tl);
2203 }
2204 } else {
2205 emit_movimm(imm[i],tl);
2206 if(th>=0) emit_movimm(((signed int)imm[i])>>31,th);
2207 }
2208 }
2209 }
2210 }
2211 else if(opcode[i]==0x0a||opcode[i]==0x0b) { // SLTI/SLTIU
2212 if(rt1[i]) {
2213 //assert(rs1[i]!=0); // r0 might be valid, but it's probably a bug
ad49de89 2214 signed char sl,t;
57871462 2215 t=get_reg(i_regs->regmap,rt1[i]);
57871462 2216 sl=get_reg(i_regs->regmap,rs1[i]);
2217 //assert(t>=0);
2218 if(t>=0) {
2219 if(rs1[i]>0) {
57871462 2220 if(opcode[i]==0x0a) { // SLTI
2221 if(sl<0) {
2222 if(i_regs->regmap_entry[t]!=rs1[i]) emit_loadreg(rs1[i],t);
2223 emit_slti32(t,imm[i],t);
2224 }else{
2225 emit_slti32(sl,imm[i],t);
2226 }
2227 }
2228 else { // SLTIU
2229 if(sl<0) {
2230 if(i_regs->regmap_entry[t]!=rs1[i]) emit_loadreg(rs1[i],t);
2231 emit_sltiu32(t,imm[i],t);
2232 }else{
2233 emit_sltiu32(sl,imm[i],t);
2234 }
2235 }
57871462 2236 }else{
2237 // SLTI(U) with r0 is just stupid,
2238 // nonetheless examples can be found
2239 if(opcode[i]==0x0a) // SLTI
2240 if(0<imm[i]) emit_movimm(1,t);
2241 else emit_zeroreg(t);
2242 else // SLTIU
2243 {
2244 if(imm[i]) emit_movimm(1,t);
2245 else emit_zeroreg(t);
2246 }
2247 }
2248 }
2249 }
2250 }
2251 else if(opcode[i]>=0x0c&&opcode[i]<=0x0e) { // ANDI/ORI/XORI
2252 if(rt1[i]) {
2253 signed char sh,sl,th,tl;
2254 th=get_reg(i_regs->regmap,rt1[i]|64);
2255 tl=get_reg(i_regs->regmap,rt1[i]);
2256 sh=get_reg(i_regs->regmap,rs1[i]|64);
2257 sl=get_reg(i_regs->regmap,rs1[i]);
2258 if(tl>=0 && !((i_regs->isconst>>tl)&1)) {
2259 if(opcode[i]==0x0c) //ANDI
2260 {
2261 if(rs1[i]) {
2262 if(sl<0) {
2263 if(i_regs->regmap_entry[tl]!=rs1[i]) emit_loadreg(rs1[i],tl);
2264 emit_andimm(tl,imm[i],tl);
2265 }else{
2266 if(!((i_regs->wasconst>>sl)&1))
2267 emit_andimm(sl,imm[i],tl);
2268 else
2269 emit_movimm(constmap[i][sl]&imm[i],tl);
2270 }
2271 }
2272 else
2273 emit_zeroreg(tl);
2274 if(th>=0) emit_zeroreg(th);
2275 }
2276 else
2277 {
2278 if(rs1[i]) {
2279 if(sl<0) {
2280 if(i_regs->regmap_entry[tl]!=rs1[i]) emit_loadreg(rs1[i],tl);
2281 }
2282 if(th>=0) {
2283 if(sh<0) {
2284 emit_loadreg(rs1[i]|64,th);
2285 }else{
2286 emit_mov(sh,th);
2287 }
2288 }
581335b0 2289 if(opcode[i]==0x0d) { // ORI
2290 if(sl<0) {
2291 emit_orimm(tl,imm[i],tl);
2292 }else{
2293 if(!((i_regs->wasconst>>sl)&1))
2294 emit_orimm(sl,imm[i],tl);
2295 else
2296 emit_movimm(constmap[i][sl]|imm[i],tl);
2297 }
57871462 2298 }
581335b0 2299 if(opcode[i]==0x0e) { // XORI
2300 if(sl<0) {
2301 emit_xorimm(tl,imm[i],tl);
2302 }else{
2303 if(!((i_regs->wasconst>>sl)&1))
2304 emit_xorimm(sl,imm[i],tl);
2305 else
2306 emit_movimm(constmap[i][sl]^imm[i],tl);
2307 }
57871462 2308 }
2309 }
2310 else {
2311 emit_movimm(imm[i],tl);
2312 if(th>=0) emit_zeroreg(th);
2313 }
2314 }
2315 }
2316 }
2317 }
2318}
2319
2320void shiftimm_assemble(int i,struct regstat *i_regs)
2321{
2322 if(opcode2[i]<=0x3) // SLL/SRL/SRA
2323 {
2324 if(rt1[i]) {
2325 signed char s,t;
2326 t=get_reg(i_regs->regmap,rt1[i]);
2327 s=get_reg(i_regs->regmap,rs1[i]);
2328 //assert(t>=0);
dc49e339 2329 if(t>=0&&!((i_regs->isconst>>t)&1)){
57871462 2330 if(rs1[i]==0)
2331 {
2332 emit_zeroreg(t);
2333 }
2334 else
2335 {
2336 if(s<0&&i_regs->regmap_entry[t]!=rs1[i]) emit_loadreg(rs1[i],t);
2337 if(imm[i]) {
2338 if(opcode2[i]==0) // SLL
2339 {
2340 emit_shlimm(s<0?t:s,imm[i],t);
2341 }
2342 if(opcode2[i]==2) // SRL
2343 {
2344 emit_shrimm(s<0?t:s,imm[i],t);
2345 }
2346 if(opcode2[i]==3) // SRA
2347 {
2348 emit_sarimm(s<0?t:s,imm[i],t);
2349 }
2350 }else{
2351 // Shift by zero
2352 if(s>=0 && s!=t) emit_mov(s,t);
2353 }
2354 }
2355 }
2356 //emit_storereg(rt1[i],t); //DEBUG
2357 }
2358 }
2359 if(opcode2[i]>=0x38&&opcode2[i]<=0x3b) // DSLL/DSRL/DSRA
2360 {
9c45ca93 2361 assert(0);
57871462 2362 }
2363 if(opcode2[i]==0x3c) // DSLL32
2364 {
9c45ca93 2365 assert(0);
57871462 2366 }
2367 if(opcode2[i]==0x3e) // DSRL32
2368 {
9c45ca93 2369 assert(0);
57871462 2370 }
2371 if(opcode2[i]==0x3f) // DSRA32
2372 {
9c45ca93 2373 assert(0);
57871462 2374 }
2375}
2376
2377#ifndef shift_assemble
2378void shift_assemble(int i,struct regstat *i_regs)
2379{
2380 printf("Need shift_assemble for this architecture.\n");
2381 exit(1);
2382}
2383#endif
2384
8062d65a 2385enum {
2386 MTYPE_8000 = 0,
2387 MTYPE_8020,
2388 MTYPE_0000,
2389 MTYPE_A000,
2390 MTYPE_1F80,
2391};
2392
2393static int get_ptr_mem_type(u_int a)
2394{
2395 if(a < 0x00200000) {
2396 if(a<0x1000&&((start>>20)==0xbfc||(start>>24)==0xa0))
2397 // return wrong, must use memhandler for BIOS self-test to pass
2398 // 007 does similar stuff from a00 mirror, weird stuff
2399 return MTYPE_8000;
2400 return MTYPE_0000;
2401 }
2402 if(0x1f800000 <= a && a < 0x1f801000)
2403 return MTYPE_1F80;
2404 if(0x80200000 <= a && a < 0x80800000)
2405 return MTYPE_8020;
2406 if(0xa0000000 <= a && a < 0xa0200000)
2407 return MTYPE_A000;
2408 return MTYPE_8000;
2409}
2410
2411static void *emit_fastpath_cmp_jump(int i,int addr,int *addr_reg_override)
2412{
2413 void *jaddr = NULL;
2414 int type=0;
2415 int mr=rs1[i];
2416 if(((smrv_strong|smrv_weak)>>mr)&1) {
2417 type=get_ptr_mem_type(smrv[mr]);
2418 //printf("set %08x @%08x r%d %d\n", smrv[mr], start+i*4, mr, type);
2419 }
2420 else {
2421 // use the mirror we are running on
2422 type=get_ptr_mem_type(start);
2423 //printf("set nospec @%08x r%d %d\n", start+i*4, mr, type);
2424 }
2425
2426 if(type==MTYPE_8020) { // RAM 80200000+ mirror
2427 emit_andimm(addr,~0x00e00000,HOST_TEMPREG);
2428 addr=*addr_reg_override=HOST_TEMPREG;
2429 type=0;
2430 }
2431 else if(type==MTYPE_0000) { // RAM 0 mirror
2432 emit_orimm(addr,0x80000000,HOST_TEMPREG);
2433 addr=*addr_reg_override=HOST_TEMPREG;
2434 type=0;
2435 }
2436 else if(type==MTYPE_A000) { // RAM A mirror
2437 emit_andimm(addr,~0x20000000,HOST_TEMPREG);
2438 addr=*addr_reg_override=HOST_TEMPREG;
2439 type=0;
2440 }
2441 else if(type==MTYPE_1F80) { // scratchpad
2442 if (psxH == (void *)0x1f800000) {
2443 emit_addimm(addr,-0x1f800000,HOST_TEMPREG);
2444 emit_cmpimm(HOST_TEMPREG,0x1000);
2445 jaddr=out;
2446 emit_jc(0);
2447 }
2448 else {
2449 // do the usual RAM check, jump will go to the right handler
2450 type=0;
2451 }
2452 }
2453
2454 if(type==0)
2455 {
2456 emit_cmpimm(addr,RAM_SIZE);
2457 jaddr=out;
2458 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
2459 // Hint to branch predictor that the branch is unlikely to be taken
2460 if(rs1[i]>=28)
2461 emit_jno_unlikely(0);
2462 else
2463 #endif
2464 emit_jno(0);
2465 if(ram_offset!=0) {
2466 emit_addimm(addr,ram_offset,HOST_TEMPREG);
2467 addr=*addr_reg_override=HOST_TEMPREG;
2468 }
2469 }
2470
2471 return jaddr;
2472}
2473
2474static void load_assemble(int i,struct regstat *i_regs)
57871462 2475{
9c45ca93 2476 int s,th,tl,addr;
57871462 2477 int offset;
b14b6a8f 2478 void *jaddr=0;
5bf843dc 2479 int memtarget=0,c=0;
b1570849 2480 int fastload_reg_override=0;
57871462 2481 u_int hr,reglist=0;
2482 th=get_reg(i_regs->regmap,rt1[i]|64);
2483 tl=get_reg(i_regs->regmap,rt1[i]);
2484 s=get_reg(i_regs->regmap,rs1[i]);
2485 offset=imm[i];
2486 for(hr=0;hr<HOST_REGS;hr++) {
2487 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
2488 }
2489 if(i_regs->regmap[HOST_CCREG]==CCREG) reglist&=~(1<<HOST_CCREG);
2490 if(s>=0) {
2491 c=(i_regs->wasconst>>s)&1;
af4ee1fe 2492 if (c) {
2493 memtarget=((signed int)(constmap[i][s]+offset))<(signed int)0x80000000+RAM_SIZE;
af4ee1fe 2494 }
57871462 2495 }
57871462 2496 //printf("load_assemble: c=%d\n",c);
643aeae3 2497 //if(c) printf("load_assemble: const=%lx\n",(long)constmap[i][s]+offset);
57871462 2498 // FIXME: Even if the load is a NOP, we should check for pagefaults...
581335b0 2499 if((tl<0&&(!c||(((u_int)constmap[i][s]+offset)>>16)==0x1f80))
f18c0f46 2500 ||rt1[i]==0) {
5bf843dc 2501 // could be FIFO, must perform the read
f18c0f46 2502 // ||dummy read
5bf843dc 2503 assem_debug("(forced read)\n");
2504 tl=get_reg(i_regs->regmap,-1);
2505 assert(tl>=0);
5bf843dc 2506 }
2507 if(offset||s<0||c) addr=tl;
2508 else addr=s;
535d208a 2509 //if(tl<0) tl=get_reg(i_regs->regmap,-1);
2510 if(tl>=0) {
2511 //printf("load_assemble: c=%d\n",c);
643aeae3 2512 //if(c) printf("load_assemble: const=%lx\n",(long)constmap[i][s]+offset);
535d208a 2513 assert(tl>=0); // Even if the load is a NOP, we must check for pagefaults and I/O
2514 reglist&=~(1<<tl);
2515 if(th>=0) reglist&=~(1<<th);
1edfcc68 2516 if(!c) {
1edfcc68 2517 #ifdef R29_HACK
2518 // Strmnnrmn's speed hack
2519 if(rs1[i]!=29||start<0x80001000||start>=0x80000000+RAM_SIZE)
2520 #endif
2521 {
2522 jaddr=emit_fastpath_cmp_jump(i,addr,&fastload_reg_override);
535d208a 2523 }
1edfcc68 2524 }
2525 else if(ram_offset&&memtarget) {
2526 emit_addimm(addr,ram_offset,HOST_TEMPREG);
2527 fastload_reg_override=HOST_TEMPREG;
535d208a 2528 }
2529 int dummy=(rt1[i]==0)||(tl!=get_reg(i_regs->regmap,rt1[i])); // ignore loads to r0 and unneeded reg
2530 if (opcode[i]==0x20) { // LB
2531 if(!c||memtarget) {
2532 if(!dummy) {
57871462 2533 {
535d208a 2534 int x=0,a=tl;
535d208a 2535 if(!c) a=addr;
b1570849 2536 if(fastload_reg_override) a=fastload_reg_override;
2537
9c45ca93 2538 emit_movsbl_indexed(x,a,tl);
57871462 2539 }
57871462 2540 }
535d208a 2541 if(jaddr)
b14b6a8f 2542 add_stub_r(LOADB_STUB,jaddr,out,i,addr,i_regs,ccadj[i],reglist);
57871462 2543 }
535d208a 2544 else
2545 inline_readstub(LOADB_STUB,i,constmap[i][s]+offset,i_regs->regmap,rt1[i],ccadj[i],reglist);
2546 }
2547 if (opcode[i]==0x21) { // LH
2548 if(!c||memtarget) {
2549 if(!dummy) {
9c45ca93 2550 int x=0,a=tl;
2551 if(!c) a=addr;
2552 if(fastload_reg_override) a=fastload_reg_override;
2553 emit_movswl_indexed(x,a,tl);
57871462 2554 }
535d208a 2555 if(jaddr)
b14b6a8f 2556 add_stub_r(LOADH_STUB,jaddr,out,i,addr,i_regs,ccadj[i],reglist);
57871462 2557 }
535d208a 2558 else
2559 inline_readstub(LOADH_STUB,i,constmap[i][s]+offset,i_regs->regmap,rt1[i],ccadj[i],reglist);
2560 }
2561 if (opcode[i]==0x23) { // LW
2562 if(!c||memtarget) {
2563 if(!dummy) {
dadf55f2 2564 int a=addr;
b1570849 2565 if(fastload_reg_override) a=fastload_reg_override;
9c45ca93 2566 emit_readword_indexed(0,a,tl);
57871462 2567 }
535d208a 2568 if(jaddr)
b14b6a8f 2569 add_stub_r(LOADW_STUB,jaddr,out,i,addr,i_regs,ccadj[i],reglist);
57871462 2570 }
535d208a 2571 else
2572 inline_readstub(LOADW_STUB,i,constmap[i][s]+offset,i_regs->regmap,rt1[i],ccadj[i],reglist);
2573 }
2574 if (opcode[i]==0x24) { // LBU
2575 if(!c||memtarget) {
2576 if(!dummy) {
9c45ca93 2577 int x=0,a=tl;
2578 if(!c) a=addr;
2579 if(fastload_reg_override) a=fastload_reg_override;
b1570849 2580
9c45ca93 2581 emit_movzbl_indexed(x,a,tl);
57871462 2582 }
535d208a 2583 if(jaddr)
b14b6a8f 2584 add_stub_r(LOADBU_STUB,jaddr,out,i,addr,i_regs,ccadj[i],reglist);
57871462 2585 }
535d208a 2586 else
2587 inline_readstub(LOADBU_STUB,i,constmap[i][s]+offset,i_regs->regmap,rt1[i],ccadj[i],reglist);
2588 }
2589 if (opcode[i]==0x25) { // LHU
2590 if(!c||memtarget) {
2591 if(!dummy) {
9c45ca93 2592 int x=0,a=tl;
2593 if(!c) a=addr;
2594 if(fastload_reg_override) a=fastload_reg_override;
2595 emit_movzwl_indexed(x,a,tl);
57871462 2596 }
535d208a 2597 if(jaddr)
b14b6a8f 2598 add_stub_r(LOADHU_STUB,jaddr,out,i,addr,i_regs,ccadj[i],reglist);
57871462 2599 }
535d208a 2600 else
2601 inline_readstub(LOADHU_STUB,i,constmap[i][s]+offset,i_regs->regmap,rt1[i],ccadj[i],reglist);
2602 }
2603 if (opcode[i]==0x27) { // LWU
2604 assert(th>=0);
2605 if(!c||memtarget) {
2606 if(!dummy) {
dadf55f2 2607 int a=addr;
b1570849 2608 if(fastload_reg_override) a=fastload_reg_override;
9c45ca93 2609 emit_readword_indexed(0,a,tl);
57871462 2610 }
535d208a 2611 if(jaddr)
b14b6a8f 2612 add_stub_r(LOADW_STUB,jaddr,out,i,addr,i_regs,ccadj[i],reglist);
535d208a 2613 }
2614 else {
2615 inline_readstub(LOADW_STUB,i,constmap[i][s]+offset,i_regs->regmap,rt1[i],ccadj[i],reglist);
57871462 2616 }
535d208a 2617 emit_zeroreg(th);
2618 }
2619 if (opcode[i]==0x37) { // LD
9c45ca93 2620 assert(0);
57871462 2621 }
535d208a 2622 }
57871462 2623}
2624
2625#ifndef loadlr_assemble
2626void loadlr_assemble(int i,struct regstat *i_regs)
2627{
2628 printf("Need loadlr_assemble for this architecture.\n");
2629 exit(1);
2630}
2631#endif
2632
2633void store_assemble(int i,struct regstat *i_regs)
2634{
9c45ca93 2635 int s,tl;
57871462 2636 int addr,temp;
2637 int offset;
b14b6a8f 2638 void *jaddr=0;
2639 enum stub_type type;
666a299d 2640 int memtarget=0,c=0;
57871462 2641 int agr=AGEN1+(i&1);
b1570849 2642 int faststore_reg_override=0;
57871462 2643 u_int hr,reglist=0;
57871462 2644 tl=get_reg(i_regs->regmap,rs2[i]);
2645 s=get_reg(i_regs->regmap,rs1[i]);
2646 temp=get_reg(i_regs->regmap,agr);
2647 if(temp<0) temp=get_reg(i_regs->regmap,-1);
2648 offset=imm[i];
2649 if(s>=0) {
2650 c=(i_regs->wasconst>>s)&1;
af4ee1fe 2651 if(c) {
2652 memtarget=((signed int)(constmap[i][s]+offset))<(signed int)0x80000000+RAM_SIZE;
af4ee1fe 2653 }
57871462 2654 }
2655 assert(tl>=0);
2656 assert(temp>=0);
2657 for(hr=0;hr<HOST_REGS;hr++) {
2658 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
2659 }
2660 if(i_regs->regmap[HOST_CCREG]==CCREG) reglist&=~(1<<HOST_CCREG);
2661 if(offset||s<0||c) addr=temp;
2662 else addr=s;
1edfcc68 2663 if(!c) {
2664 jaddr=emit_fastpath_cmp_jump(i,addr,&faststore_reg_override);
2665 }
2666 else if(ram_offset&&memtarget) {
2667 emit_addimm(addr,ram_offset,HOST_TEMPREG);
2668 faststore_reg_override=HOST_TEMPREG;
57871462 2669 }
2670
2671 if (opcode[i]==0x28) { // SB
2672 if(!c||memtarget) {
97a238a6 2673 int x=0,a=temp;
97a238a6 2674 if(!c) a=addr;
b1570849 2675 if(faststore_reg_override) a=faststore_reg_override;
9c45ca93 2676 emit_writebyte_indexed(tl,x,a);
57871462 2677 }
2678 type=STOREB_STUB;
2679 }
2680 if (opcode[i]==0x29) { // SH
2681 if(!c||memtarget) {
97a238a6 2682 int x=0,a=temp;
97a238a6 2683 if(!c) a=addr;
b1570849 2684 if(faststore_reg_override) a=faststore_reg_override;
9c45ca93 2685 emit_writehword_indexed(tl,x,a);
57871462 2686 }
2687 type=STOREH_STUB;
2688 }
2689 if (opcode[i]==0x2B) { // SW
dadf55f2 2690 if(!c||memtarget) {
2691 int a=addr;
b1570849 2692 if(faststore_reg_override) a=faststore_reg_override;
9c45ca93 2693 emit_writeword_indexed(tl,0,a);
dadf55f2 2694 }
57871462 2695 type=STOREW_STUB;
2696 }
2697 if (opcode[i]==0x3F) { // SD
9c45ca93 2698 assert(0);
57871462 2699 type=STORED_STUB;
2700 }
b96d3df7 2701 if(jaddr) {
2702 // PCSX store handlers don't check invcode again
2703 reglist|=1<<addr;
b14b6a8f 2704 add_stub_r(type,jaddr,out,i,addr,i_regs,ccadj[i],reglist);
b96d3df7 2705 jaddr=0;
2706 }
1edfcc68 2707 if(!(i_regs->waswritten&(1<<rs1[i]))&&!(new_dynarec_hacks&NDHACK_NO_SMC_CHECK)) {
57871462 2708 if(!c||memtarget) {
2709 #ifdef DESTRUCTIVE_SHIFT
2710 // The x86 shift operation is 'destructive'; it overwrites the
2711 // source register, so we need to make a copy first and use that.
2712 addr=temp;
2713 #endif
2714 #if defined(HOST_IMM8)
2715 int ir=get_reg(i_regs->regmap,INVCP);
2716 assert(ir>=0);
2717 emit_cmpmem_indexedsr12_reg(ir,addr,1);
2718 #else
643aeae3 2719 emit_cmpmem_indexedsr12_imm(invalid_code,addr,1);
57871462 2720 #endif
0bbd1454 2721 #if defined(HAVE_CONDITIONAL_CALL) && !defined(DESTRUCTIVE_SHIFT)
2722 emit_callne(invalidate_addr_reg[addr]);
2723 #else
b14b6a8f 2724 void *jaddr2 = out;
57871462 2725 emit_jne(0);
b14b6a8f 2726 add_stub(INVCODE_STUB,jaddr2,out,reglist|(1<<HOST_CCREG),addr,0,0,0);
0bbd1454 2727 #endif
57871462 2728 }
2729 }
7a518516 2730 u_int addr_val=constmap[i][s]+offset;
3eaa7048 2731 if(jaddr) {
b14b6a8f 2732 add_stub_r(type,jaddr,out,i,addr,i_regs,ccadj[i],reglist);
3eaa7048 2733 } else if(c&&!memtarget) {
7a518516 2734 inline_writestub(type,i,addr_val,i_regs->regmap,rs2[i],ccadj[i],reglist);
2735 }
2736 // basic current block modification detection..
2737 // not looking back as that should be in mips cache already
2738 if(c&&start+i*4<addr_val&&addr_val<start+slen*4) {
c43b5311 2739 SysPrintf("write to %08x hits block %08x, pc=%08x\n",addr_val,start,start+i*4);
7a518516 2740 assert(i_regs->regmap==regs[i].regmap); // not delay slot
2741 if(i_regs->regmap==regs[i].regmap) {
ad49de89 2742 load_all_consts(regs[i].regmap_entry,regs[i].wasdirty,i);
2743 wb_dirtys(regs[i].regmap_entry,regs[i].wasdirty);
7a518516 2744 emit_movimm(start+i*4+4,0);
643aeae3 2745 emit_writeword(0,&pcaddr);
b14b6a8f 2746 emit_jmp(do_interrupt);
7a518516 2747 }
3eaa7048 2748 }
57871462 2749}
2750
2751void storelr_assemble(int i,struct regstat *i_regs)
2752{
9c45ca93 2753 int s,tl;
57871462 2754 int temp;
57871462 2755 int offset;
b14b6a8f 2756 void *jaddr=0;
df4dc2b1 2757 void *case1, *case2, *case3;
2758 void *done0, *done1, *done2;
af4ee1fe 2759 int memtarget=0,c=0;
fab5d06d 2760 int agr=AGEN1+(i&1);
57871462 2761 u_int hr,reglist=0;
57871462 2762 tl=get_reg(i_regs->regmap,rs2[i]);
2763 s=get_reg(i_regs->regmap,rs1[i]);
fab5d06d 2764 temp=get_reg(i_regs->regmap,agr);
2765 if(temp<0) temp=get_reg(i_regs->regmap,-1);
57871462 2766 offset=imm[i];
2767 if(s>=0) {
2768 c=(i_regs->isconst>>s)&1;
af4ee1fe 2769 if(c) {
2770 memtarget=((signed int)(constmap[i][s]+offset))<(signed int)0x80000000+RAM_SIZE;
af4ee1fe 2771 }
57871462 2772 }
2773 assert(tl>=0);
2774 for(hr=0;hr<HOST_REGS;hr++) {
2775 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
2776 }
535d208a 2777 assert(temp>=0);
1edfcc68 2778 if(!c) {
2779 emit_cmpimm(s<0||offset?temp:s,RAM_SIZE);
2780 if(!offset&&s!=temp) emit_mov(s,temp);
b14b6a8f 2781 jaddr=out;
1edfcc68 2782 emit_jno(0);
2783 }
2784 else
2785 {
2786 if(!memtarget||!rs1[i]) {
b14b6a8f 2787 jaddr=out;
535d208a 2788 emit_jmp(0);
57871462 2789 }
535d208a 2790 }
9c45ca93 2791 emit_addimm_no_flags(ram_offset,temp);
535d208a 2792
2793 if (opcode[i]==0x2C||opcode[i]==0x2D) { // SDL/SDR
9c45ca93 2794 assert(0);
535d208a 2795 }
57871462 2796
9c45ca93 2797 emit_xorimm(temp,3,temp);
535d208a 2798 emit_testimm(temp,2);
df4dc2b1 2799 case2=out;
535d208a 2800 emit_jne(0);
2801 emit_testimm(temp,1);
df4dc2b1 2802 case1=out;
535d208a 2803 emit_jne(0);
2804 // 0
2805 if (opcode[i]==0x2A) { // SWL
2806 emit_writeword_indexed(tl,0,temp);
2807 }
2808 if (opcode[i]==0x2E) { // SWR
2809 emit_writebyte_indexed(tl,3,temp);
2810 }
2811 if (opcode[i]==0x2C) { // SDL
9c45ca93 2812 assert(0);
535d208a 2813 }
2814 if (opcode[i]==0x2D) { // SDR
9c45ca93 2815 assert(0);
535d208a 2816 }
df4dc2b1 2817 done0=out;
535d208a 2818 emit_jmp(0);
2819 // 1
df4dc2b1 2820 set_jump_target(case1, out);
535d208a 2821 if (opcode[i]==0x2A) { // SWL
2822 // Write 3 msb into three least significant bytes
2823 if(rs2[i]) emit_rorimm(tl,8,tl);
2824 emit_writehword_indexed(tl,-1,temp);
2825 if(rs2[i]) emit_rorimm(tl,16,tl);
2826 emit_writebyte_indexed(tl,1,temp);
2827 if(rs2[i]) emit_rorimm(tl,8,tl);
2828 }
2829 if (opcode[i]==0x2E) { // SWR
2830 // Write two lsb into two most significant bytes
2831 emit_writehword_indexed(tl,1,temp);
2832 }
2833 if (opcode[i]==0x2C) { // SDL
9c45ca93 2834 assert(0);
535d208a 2835 }
2836 if (opcode[i]==0x2D) { // SDR
9c45ca93 2837 assert(0);
535d208a 2838 }
df4dc2b1 2839 done1=out;
535d208a 2840 emit_jmp(0);
2841 // 2
df4dc2b1 2842 set_jump_target(case2, out);
535d208a 2843 emit_testimm(temp,1);
df4dc2b1 2844 case3=out;
535d208a 2845 emit_jne(0);
2846 if (opcode[i]==0x2A) { // SWL
2847 // Write two msb into two least significant bytes
2848 if(rs2[i]) emit_rorimm(tl,16,tl);
2849 emit_writehword_indexed(tl,-2,temp);
2850 if(rs2[i]) emit_rorimm(tl,16,tl);
2851 }
2852 if (opcode[i]==0x2E) { // SWR
2853 // Write 3 lsb into three most significant bytes
2854 emit_writebyte_indexed(tl,-1,temp);
2855 if(rs2[i]) emit_rorimm(tl,8,tl);
2856 emit_writehword_indexed(tl,0,temp);
2857 if(rs2[i]) emit_rorimm(tl,24,tl);
2858 }
2859 if (opcode[i]==0x2C) { // SDL
9c45ca93 2860 assert(0);
535d208a 2861 }
2862 if (opcode[i]==0x2D) { // SDR
9c45ca93 2863 assert(0);
535d208a 2864 }
df4dc2b1 2865 done2=out;
535d208a 2866 emit_jmp(0);
2867 // 3
df4dc2b1 2868 set_jump_target(case3, out);
535d208a 2869 if (opcode[i]==0x2A) { // SWL
2870 // Write msb into least significant byte
2871 if(rs2[i]) emit_rorimm(tl,24,tl);
2872 emit_writebyte_indexed(tl,-3,temp);
2873 if(rs2[i]) emit_rorimm(tl,8,tl);
2874 }
2875 if (opcode[i]==0x2E) { // SWR
2876 // Write entire word
2877 emit_writeword_indexed(tl,-3,temp);
2878 }
2879 if (opcode[i]==0x2C) { // SDL
9c45ca93 2880 assert(0);
535d208a 2881 }
2882 if (opcode[i]==0x2D) { // SDR
9c45ca93 2883 assert(0);
535d208a 2884 }
df4dc2b1 2885 set_jump_target(done0, out);
2886 set_jump_target(done1, out);
2887 set_jump_target(done2, out);
535d208a 2888 if (opcode[i]==0x2C) { // SDL
9c45ca93 2889 assert(0);
535d208a 2890 }
2891 if (opcode[i]==0x2D) { // SDR
9c45ca93 2892 assert(0);
57871462 2893 }
535d208a 2894 if(!c||!memtarget)
b14b6a8f 2895 add_stub_r(STORELR_STUB,jaddr,out,i,temp,i_regs,ccadj[i],reglist);
1edfcc68 2896 if(!(i_regs->waswritten&(1<<rs1[i]))&&!(new_dynarec_hacks&NDHACK_NO_SMC_CHECK)) {
9c45ca93 2897 emit_addimm_no_flags(-ram_offset,temp);
57871462 2898 #if defined(HOST_IMM8)
2899 int ir=get_reg(i_regs->regmap,INVCP);
2900 assert(ir>=0);
2901 emit_cmpmem_indexedsr12_reg(ir,temp,1);
2902 #else
643aeae3 2903 emit_cmpmem_indexedsr12_imm(invalid_code,temp,1);
57871462 2904 #endif
535d208a 2905 #if defined(HAVE_CONDITIONAL_CALL) && !defined(DESTRUCTIVE_SHIFT)
2906 emit_callne(invalidate_addr_reg[temp]);
2907 #else
b14b6a8f 2908 void *jaddr2 = out;
57871462 2909 emit_jne(0);
b14b6a8f 2910 add_stub(INVCODE_STUB,jaddr2,out,reglist|(1<<HOST_CCREG),temp,0,0,0);
535d208a 2911 #endif
57871462 2912 }
57871462 2913}
2914
8062d65a 2915static void cop0_assemble(int i,struct regstat *i_regs)
2916{
2917 if(opcode2[i]==0) // MFC0
2918 {
2919 signed char t=get_reg(i_regs->regmap,rt1[i]);
2920 u_int copr=(source[i]>>11)&0x1f;
2921 //assert(t>=0); // Why does this happen? OOT is weird
2922 if(t>=0&&rt1[i]!=0) {
2923 emit_readword(&reg_cop0[copr],t);
2924 }
2925 }
2926 else if(opcode2[i]==4) // MTC0
2927 {
2928 signed char s=get_reg(i_regs->regmap,rs1[i]);
2929 char copr=(source[i]>>11)&0x1f;
2930 assert(s>=0);
2931 wb_register(rs1[i],i_regs->regmap,i_regs->dirty);
2932 if(copr==9||copr==11||copr==12||copr==13) {
2933 emit_readword(&last_count,HOST_TEMPREG);
2934 emit_loadreg(CCREG,HOST_CCREG); // TODO: do proper reg alloc
2935 emit_add(HOST_CCREG,HOST_TEMPREG,HOST_CCREG);
2936 emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]),HOST_CCREG);
2937 emit_writeword(HOST_CCREG,&Count);
2938 }
2939 // What a mess. The status register (12) can enable interrupts,
2940 // so needs a special case to handle a pending interrupt.
2941 // The interrupt must be taken immediately, because a subsequent
2942 // instruction might disable interrupts again.
2943 if(copr==12||copr==13) {
2944 if (is_delayslot) {
2945 // burn cycles to cause cc_interrupt, which will
2946 // reschedule next_interupt. Relies on CCREG from above.
2947 assem_debug("MTC0 DS %d\n", copr);
2948 emit_writeword(HOST_CCREG,&last_count);
2949 emit_movimm(0,HOST_CCREG);
2950 emit_storereg(CCREG,HOST_CCREG);
2951 emit_loadreg(rs1[i],1);
2952 emit_movimm(copr,0);
2953 emit_call(pcsx_mtc0_ds);
2954 emit_loadreg(rs1[i],s);
2955 return;
2956 }
2957 emit_movimm(start+i*4+4,HOST_TEMPREG);
2958 emit_writeword(HOST_TEMPREG,&pcaddr);
2959 emit_movimm(0,HOST_TEMPREG);
2960 emit_writeword(HOST_TEMPREG,&pending_exception);
2961 }
2962 //else if(copr==12&&is_delayslot) emit_call((int)MTC0_R12);
2963 //else
2964 if(s==HOST_CCREG)
2965 emit_loadreg(rs1[i],1);
2966 else if(s!=1)
2967 emit_mov(s,1);
2968 emit_movimm(copr,0);
2969 emit_call(pcsx_mtc0);
2970 if(copr==9||copr==11||copr==12||copr==13) {
2971 emit_readword(&Count,HOST_CCREG);
2972 emit_readword(&next_interupt,HOST_TEMPREG);
2973 emit_addimm(HOST_CCREG,-CLOCK_ADJUST(ccadj[i]),HOST_CCREG);
2974 emit_sub(HOST_CCREG,HOST_TEMPREG,HOST_CCREG);
2975 emit_writeword(HOST_TEMPREG,&last_count);
2976 emit_storereg(CCREG,HOST_CCREG);
2977 }
2978 if(copr==12||copr==13) {
2979 assert(!is_delayslot);
2980 emit_readword(&pending_exception,14);
2981 emit_test(14,14);
2982 emit_jne(&do_interrupt);
2983 }
2984 emit_loadreg(rs1[i],s);
2985 if(get_reg(i_regs->regmap,rs1[i]|64)>=0)
2986 emit_loadreg(rs1[i]|64,get_reg(i_regs->regmap,rs1[i]|64));
2987 }
2988 else
2989 {
2990 assert(opcode2[i]==0x10);
2991 //if((source[i]&0x3f)==0x10) // RFE
2992 {
2993 emit_readword(&Status,0);
2994 emit_andimm(0,0x3c,1);
2995 emit_andimm(0,~0xf,0);
2996 emit_orrshr_imm(1,2,0);
2997 emit_writeword(0,&Status);
2998 }
2999 }
3000}
3001
3002static void cop1_unusable(int i,struct regstat *i_regs)
3003{
3004 // XXX: should just just do the exception instead
3005 //if(!cop1_usable)
3006 {
3007 void *jaddr=out;
3008 emit_jmp(0);
3009 add_stub_r(FP_STUB,jaddr,out,i,0,i_regs,is_delayslot,0);
3010 }
3011}
3012
3013static void cop1_assemble(int i,struct regstat *i_regs)
3014{
3015 cop1_unusable(i, i_regs);
3016}
3017
3018static void c1ls_assemble(int i,struct regstat *i_regs)
57871462 3019{
3d624f89 3020 cop1_unusable(i, i_regs);
57871462 3021}
3022
8062d65a 3023// FP_STUB
3024static void do_cop1stub(int n)
3025{
3026 literal_pool(256);
3027 assem_debug("do_cop1stub %x\n",start+stubs[n].a*4);
3028 set_jump_target(stubs[n].addr, out);
3029 int i=stubs[n].a;
3030// int rs=stubs[n].b;
3031 struct regstat *i_regs=(struct regstat *)stubs[n].c;
3032 int ds=stubs[n].d;
3033 if(!ds) {
3034 load_all_consts(regs[i].regmap_entry,regs[i].wasdirty,i);
3035 //if(i_regs!=&regs[i]) printf("oops: regs[i]=%x i_regs=%x",(int)&regs[i],(int)i_regs);
3036 }
3037 //else {printf("fp exception in delay slot\n");}
3038 wb_dirtys(i_regs->regmap_entry,i_regs->wasdirty);
3039 if(regs[i].regmap_entry[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
3040 emit_movimm(start+(i-ds)*4,EAX); // Get PC
3041 emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]),HOST_CCREG); // CHECK: is this right? There should probably be an extra cycle...
3042 emit_jmp(ds?fp_exception_ds:fp_exception);
3043}
3044
3045static void cop2_get_dreg(u_int copr,signed char tl,signed char temp)
3046{
3047 switch (copr) {
3048 case 1:
3049 case 3:
3050 case 5:
3051 case 8:
3052 case 9:
3053 case 10:
3054 case 11:
3055 emit_readword(&reg_cop2d[copr],tl);
3056 emit_signextend16(tl,tl);
3057 emit_writeword(tl,&reg_cop2d[copr]); // hmh
3058 break;
3059 case 7:
3060 case 16:
3061 case 17:
3062 case 18:
3063 case 19:
3064 emit_readword(&reg_cop2d[copr],tl);
3065 emit_andimm(tl,0xffff,tl);
3066 emit_writeword(tl,&reg_cop2d[copr]);
3067 break;
3068 case 15:
3069 emit_readword(&reg_cop2d[14],tl); // SXY2
3070 emit_writeword(tl,&reg_cop2d[copr]);
3071 break;
3072 case 28:
3073 case 29:
3074 emit_readword(&reg_cop2d[9],temp);
3075 emit_testimm(temp,0x8000); // do we need this?
3076 emit_andimm(temp,0xf80,temp);
3077 emit_andne_imm(temp,0,temp);
3078 emit_shrimm(temp,7,tl);
3079 emit_readword(&reg_cop2d[10],temp);
3080 emit_testimm(temp,0x8000);
3081 emit_andimm(temp,0xf80,temp);
3082 emit_andne_imm(temp,0,temp);
3083 emit_orrshr_imm(temp,2,tl);
3084 emit_readword(&reg_cop2d[11],temp);
3085 emit_testimm(temp,0x8000);
3086 emit_andimm(temp,0xf80,temp);
3087 emit_andne_imm(temp,0,temp);
3088 emit_orrshl_imm(temp,3,tl);
3089 emit_writeword(tl,&reg_cop2d[copr]);
3090 break;
3091 default:
3092 emit_readword(&reg_cop2d[copr],tl);
3093 break;
3094 }
3095}
3096
3097static void cop2_put_dreg(u_int copr,signed char sl,signed char temp)
3098{
3099 switch (copr) {
3100 case 15:
3101 emit_readword(&reg_cop2d[13],temp); // SXY1
3102 emit_writeword(sl,&reg_cop2d[copr]);
3103 emit_writeword(temp,&reg_cop2d[12]); // SXY0
3104 emit_readword(&reg_cop2d[14],temp); // SXY2
3105 emit_writeword(sl,&reg_cop2d[14]);
3106 emit_writeword(temp,&reg_cop2d[13]); // SXY1
3107 break;
3108 case 28:
3109 emit_andimm(sl,0x001f,temp);
3110 emit_shlimm(temp,7,temp);
3111 emit_writeword(temp,&reg_cop2d[9]);
3112 emit_andimm(sl,0x03e0,temp);
3113 emit_shlimm(temp,2,temp);
3114 emit_writeword(temp,&reg_cop2d[10]);
3115 emit_andimm(sl,0x7c00,temp);
3116 emit_shrimm(temp,3,temp);
3117 emit_writeword(temp,&reg_cop2d[11]);
3118 emit_writeword(sl,&reg_cop2d[28]);
3119 break;
3120 case 30:
3121 emit_movs(sl,temp);
3122 emit_mvnmi(temp,temp);
be516ebe 3123#if defined(HAVE_ARMV5) || defined(__aarch64__)
8062d65a