1 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2 * Mupen64plus/PCSX - assem_arm.c *
3 * Copyright (C) 2009-2011 Ari64 *
4 * Copyright (C) 2010-2011 GraÅžvydas "notaz" Ignotas *
6 * This program is free software; you can redistribute it and/or modify *
7 * it under the terms of the GNU General Public License as published by *
8 * the Free Software Foundation; either version 2 of the License, or *
9 * (at your option) any later version. *
11 * This program is distributed in the hope that it will be useful, *
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
14 * GNU General Public License for more details. *
16 * You should have received a copy of the GNU General Public License *
17 * along with this program; if not, write to the *
18 * Free Software Foundation, Inc., *
19 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
20 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
26 #include "../gte_arm.h"
27 #include "../gte_neon.h"
29 #include "arm_features.h"
31 #if defined(BASE_ADDR_FIXED)
32 #elif defined(BASE_ADDR_DYNAMIC)
33 u_char *translation_cache;
35 u_char translation_cache[1 << TARGET_SIZE_2] __attribute__((aligned(4096)));
39 #define CALLER_SAVE_REGS 0x100f
41 #define CALLER_SAVE_REGS 0x120f
44 #define unused __attribute__((unused))
47 #pragma GCC diagnostic ignored "-Wunused-function"
48 #pragma GCC diagnostic ignored "-Wunused-variable"
49 #pragma GCC diagnostic ignored "-Wunused-but-set-variable"
52 extern int cycle_count;
53 extern int last_count;
55 extern int pending_exception;
56 extern int branch_target;
57 extern uint64_t readmem_dword;
58 extern void *dynarec_local;
59 extern u_int mini_ht[32][2];
61 void indirect_jump_indexed();
74 void jump_vaddr_r10();
75 void jump_vaddr_r12();
77 void * const jump_vaddr_reg[16] = {
96 void invalidate_addr_r0();
97 void invalidate_addr_r1();
98 void invalidate_addr_r2();
99 void invalidate_addr_r3();
100 void invalidate_addr_r4();
101 void invalidate_addr_r5();
102 void invalidate_addr_r6();
103 void invalidate_addr_r7();
104 void invalidate_addr_r8();
105 void invalidate_addr_r9();
106 void invalidate_addr_r10();
107 void invalidate_addr_r12();
109 const u_int invalidate_addr_reg[16] = {
110 (int)invalidate_addr_r0,
111 (int)invalidate_addr_r1,
112 (int)invalidate_addr_r2,
113 (int)invalidate_addr_r3,
114 (int)invalidate_addr_r4,
115 (int)invalidate_addr_r5,
116 (int)invalidate_addr_r6,
117 (int)invalidate_addr_r7,
118 (int)invalidate_addr_r8,
119 (int)invalidate_addr_r9,
120 (int)invalidate_addr_r10,
122 (int)invalidate_addr_r12,
127 static u_int needs_clear_cache[1<<(TARGET_SIZE_2-17)];
131 static void set_jump_target(void *addr, void *target_)
133 u_int target = (u_int)target_;
135 u_int *ptr2=(u_int *)ptr;
137 assert((target-(u_int)ptr2-8)<1024);
138 assert(((uintptr_t)addr&3)==0);
139 assert((target&3)==0);
140 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
141 //printf("target=%x addr=%p insn=%x\n",target,addr,*ptr2);
143 else if(ptr[3]==0x72) {
144 // generated by emit_jno_unlikely
145 if((target-(u_int)ptr2-8)<1024) {
146 assert(((uintptr_t)addr&3)==0);
147 assert((target&3)==0);
148 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
150 else if((target-(u_int)ptr2-8)<4096&&!((target-(u_int)ptr2-8)&15)) {
151 assert(((uintptr_t)addr&3)==0);
152 assert((target&3)==0);
153 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>4)|0xE00;
155 else *ptr2=(0x7A000000)|(((target-(u_int)ptr2-8)<<6)>>8);
158 assert((ptr[3]&0x0e)==0xa);
159 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
163 // This optionally copies the instruction from the target of the branch into
164 // the space before the branch. Works, but the difference in speed is
165 // usually insignificant.
167 static void set_jump_target_fillslot(int addr,u_int target,int copy)
169 u_char *ptr=(u_char *)addr;
170 u_int *ptr2=(u_int *)ptr;
171 assert(!copy||ptr2[-1]==0xe28dd000);
174 assert((target-(u_int)ptr2-8)<4096);
175 *ptr2=(*ptr2&0xFFFFF000)|(target-(u_int)ptr2-8);
178 assert((ptr[3]&0x0e)==0xa);
179 u_int target_insn=*(u_int *)target;
180 if((target_insn&0x0e100000)==0) { // ALU, no immediate, no flags
183 if((target_insn&0x0c100000)==0x04100000) { // Load
186 if(target_insn&0x08000000) {
190 ptr2[-1]=target_insn;
193 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
199 static void add_literal(int addr,int val)
201 assert(literalcount<sizeof(literals)/sizeof(literals[0]));
202 literals[literalcount][0]=addr;
203 literals[literalcount][1]=val;
207 // from a pointer to external jump stub (which was produced by emit_extjump2)
208 // find where the jumping insn is
209 static void *find_extjump_insn(void *stub)
211 int *ptr=(int *)(stub+4);
212 assert((*ptr&0x0fff0000)==0x059f0000); // ldr rx, [pc, #ofs]
213 u_int offset=*ptr&0xfff;
214 void **l_ptr=(void *)ptr+offset+8;
218 // find where external branch is liked to using addr of it's stub:
219 // get address that insn one after stub loads (dyna_linker arg1),
220 // treat it as a pointer to branch insn,
221 // return addr where that branch jumps to
222 static void *get_pointer(void *stub)
224 //printf("get_pointer(%x)\n",(int)stub);
225 int *i_ptr=find_extjump_insn(stub);
226 assert((*i_ptr&0x0f000000)==0x0a000000);
227 return (u_char *)i_ptr+((*i_ptr<<8)>>6)+8;
230 // Find the "clean" entry point from a "dirty" entry point
231 // by skipping past the call to verify_code
232 static void *get_clean_addr(void *addr)
234 signed int *ptr = addr;
240 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
241 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
243 if((*ptr&0xFF000000)==0xea000000) {
244 return (char *)ptr+((*ptr<<8)>>6)+8; // follow jump
249 static int verify_dirty(u_int *ptr)
253 // get from literal pool
254 assert((*ptr&0xFFFF0000)==0xe59f0000);
256 u_int source=*(u_int*)((void *)ptr+offset+8);
258 assert((*ptr&0xFFFF0000)==0xe59f0000);
260 u_int copy=*(u_int*)((void *)ptr+offset+8);
262 assert((*ptr&0xFFFF0000)==0xe59f0000);
264 u_int len=*(u_int*)((void *)ptr+offset+8);
269 assert((*ptr&0xFFF00000)==0xe3000000);
270 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
271 u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
272 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
275 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
276 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
277 //printf("verify_dirty: %x %x %x\n",source,copy,len);
278 return !memcmp((void *)source,(void *)copy,len);
281 // This doesn't necessarily find all clean entry points, just
282 // guarantees that it's not dirty
283 static int isclean(void *addr)
286 u_int *ptr=((u_int *)addr)+4;
288 u_int *ptr=((u_int *)addr)+6;
290 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
291 if((*ptr&0xFF000000)!=0xeb000000) return 1; // bl instruction
292 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code) return 0;
293 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_vm) return 0;
294 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_ds) return 0;
298 // get source that block at addr was compiled from (host pointers)
299 static void get_bounds(void *addr, u_char **start, u_char **end)
304 // get from literal pool
305 assert((*ptr&0xFFFF0000)==0xe59f0000);
307 u_int source=*(u_int*)((void *)ptr+offset+8);
309 //assert((*ptr&0xFFFF0000)==0xe59f0000);
311 //u_int copy=*(u_int*)((void *)ptr+offset+8);
313 assert((*ptr&0xFFFF0000)==0xe59f0000);
315 u_int len=*(u_int*)((void *)ptr+offset+8);
320 assert((*ptr&0xFFF00000)==0xe3000000);
321 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
322 //u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
323 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
326 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
327 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
328 *start=(u_char *)source;
329 *end=(u_char *)source+len;
332 // Allocate a specific ARM register.
333 static void alloc_arm_reg(struct regstat *cur,int i,signed char reg,int hr)
338 // see if it's already allocated (and dealloc it)
339 for(n=0;n<HOST_REGS;n++)
341 if(n!=EXCLUDE_REG&&cur->regmap[n]==reg) {
342 dirty=(cur->dirty>>n)&1;
348 cur->dirty&=~(1<<hr);
349 cur->dirty|=dirty<<hr;
350 cur->isconst&=~(1<<hr);
353 // Alloc cycle count into dedicated register
354 static void alloc_cc(struct regstat *cur,int i)
356 alloc_arm_reg(cur,i,CCREG,HOST_CCREG);
361 static unused char regname[16][4] = {
379 static void output_w32(u_int word)
381 *((u_int *)out)=word;
385 static u_int rd_rn_rm(u_int rd, u_int rn, u_int rm)
390 return((rn<<16)|(rd<<12)|rm);
393 static u_int rd_rn_imm_shift(u_int rd, u_int rn, u_int imm, u_int shift)
398 assert((shift&1)==0);
399 return((rn<<16)|(rd<<12)|(((32-shift)&30)<<7)|imm);
402 static u_int genimm(u_int imm,u_int *encoded)
410 *encoded=((i&30)<<7)|imm;
413 imm=(imm>>2)|(imm<<30);i-=2;
418 static void genimm_checked(u_int imm,u_int *encoded)
420 u_int ret=genimm(imm,encoded);
425 static u_int genjmp(u_int addr)
427 int offset=addr-(int)out-8;
428 if(offset<-33554432||offset>=33554432) {
430 SysPrintf("genjmp: out of range: %08x\n", offset);
435 return ((u_int)offset>>2)&0xffffff;
438 static void emit_mov(int rs,int rt)
440 assem_debug("mov %s,%s\n",regname[rt],regname[rs]);
441 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs));
444 static void emit_movs(int rs,int rt)
446 assem_debug("movs %s,%s\n",regname[rt],regname[rs]);
447 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs));
450 static void emit_add(int rs1,int rs2,int rt)
452 assem_debug("add %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
453 output_w32(0xe0800000|rd_rn_rm(rt,rs1,rs2));
456 static void emit_adds(int rs1,int rs2,int rt)
458 assem_debug("adds %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
459 output_w32(0xe0900000|rd_rn_rm(rt,rs1,rs2));
462 static void emit_adcs(int rs1,int rs2,int rt)
464 assem_debug("adcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
465 output_w32(0xe0b00000|rd_rn_rm(rt,rs1,rs2));
468 static void emit_neg(int rs, int rt)
470 assem_debug("rsb %s,%s,#0\n",regname[rt],regname[rs]);
471 output_w32(0xe2600000|rd_rn_rm(rt,rs,0));
474 static void emit_sub(int rs1,int rs2,int rt)
476 assem_debug("sub %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
477 output_w32(0xe0400000|rd_rn_rm(rt,rs1,rs2));
480 static void emit_zeroreg(int rt)
482 assem_debug("mov %s,#0\n",regname[rt]);
483 output_w32(0xe3a00000|rd_rn_rm(rt,0,0));
486 static void emit_loadlp(u_int imm,u_int rt)
488 add_literal((int)out,imm);
489 assem_debug("ldr %s,pc+? [=%x]\n",regname[rt],imm);
490 output_w32(0xe5900000|rd_rn_rm(rt,15,0));
493 static void emit_movw(u_int imm,u_int rt)
496 assem_debug("movw %s,#%d (0x%x)\n",regname[rt],imm,imm);
497 output_w32(0xe3000000|rd_rn_rm(rt,0,0)|(imm&0xfff)|((imm<<4)&0xf0000));
500 static void emit_movt(u_int imm,u_int rt)
502 assem_debug("movt %s,#%d (0x%x)\n",regname[rt],imm&0xffff0000,imm&0xffff0000);
503 output_w32(0xe3400000|rd_rn_rm(rt,0,0)|((imm>>16)&0xfff)|((imm>>12)&0xf0000));
506 static void emit_movimm(u_int imm,u_int rt)
509 if(genimm(imm,&armval)) {
510 assem_debug("mov %s,#%d\n",regname[rt],imm);
511 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
512 }else if(genimm(~imm,&armval)) {
513 assem_debug("mvn %s,#%d\n",regname[rt],imm);
514 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
515 }else if(imm<65536) {
517 assem_debug("mov %s,#%d\n",regname[rt],imm&0xFF00);
518 output_w32(0xe3a00000|rd_rn_imm_shift(rt,0,imm>>8,8));
519 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
520 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
528 emit_movw(imm&0x0000FFFF,rt);
529 emit_movt(imm&0xFFFF0000,rt);
534 static void emit_pcreladdr(u_int rt)
536 assem_debug("add %s,pc,#?\n",regname[rt]);
537 output_w32(0xe2800000|rd_rn_rm(rt,15,0));
540 static void emit_loadreg(int r, int hr)
543 SysPrintf("64bit load in 32bit mode!\n");
550 int addr=((int)reg)+((r&63)<<REG_SHIFT)+((r&64)>>4);
551 if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4);
552 if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4);
553 if(r==CCREG) addr=(int)&cycle_count;
554 if(r==CSREG) addr=(int)&Status;
555 if(r==INVCP) addr=(int)&invc_ptr;
556 u_int offset = addr-(u_int)&dynarec_local;
558 assem_debug("ldr %s,fp+%d\n",regname[hr],offset);
559 output_w32(0xe5900000|rd_rn_rm(hr,FP,0)|offset);
563 static void emit_storereg(int r, int hr)
566 SysPrintf("64bit store in 32bit mode!\n");
570 int addr=((int)reg)+((r&63)<<REG_SHIFT)+((r&64)>>4);
571 if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4);
572 if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4);
573 if(r==CCREG) addr=(int)&cycle_count;
574 u_int offset = addr-(u_int)&dynarec_local;
576 assem_debug("str %s,fp+%d\n",regname[hr],offset);
577 output_w32(0xe5800000|rd_rn_rm(hr,FP,0)|offset);
580 static void emit_test(int rs, int rt)
582 assem_debug("tst %s,%s\n",regname[rs],regname[rt]);
583 output_w32(0xe1100000|rd_rn_rm(0,rs,rt));
586 static void emit_testimm(int rs,int imm)
589 assem_debug("tst %s,#%d\n",regname[rs],imm);
590 genimm_checked(imm,&armval);
591 output_w32(0xe3100000|rd_rn_rm(0,rs,0)|armval);
594 static void emit_testeqimm(int rs,int imm)
597 assem_debug("tsteq %s,$%d\n",regname[rs],imm);
598 genimm_checked(imm,&armval);
599 output_w32(0x03100000|rd_rn_rm(0,rs,0)|armval);
602 static void emit_not(int rs,int rt)
604 assem_debug("mvn %s,%s\n",regname[rt],regname[rs]);
605 output_w32(0xe1e00000|rd_rn_rm(rt,0,rs));
608 static void emit_mvnmi(int rs,int rt)
610 assem_debug("mvnmi %s,%s\n",regname[rt],regname[rs]);
611 output_w32(0x41e00000|rd_rn_rm(rt,0,rs));
614 static void emit_and(u_int rs1,u_int rs2,u_int rt)
616 assem_debug("and %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
617 output_w32(0xe0000000|rd_rn_rm(rt,rs1,rs2));
620 static void emit_or(u_int rs1,u_int rs2,u_int rt)
622 assem_debug("orr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
623 output_w32(0xe1800000|rd_rn_rm(rt,rs1,rs2));
626 static void emit_orrshl_imm(u_int rs,u_int imm,u_int rt)
631 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs],imm);
632 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|(imm<<7));
635 static void emit_orrshr_imm(u_int rs,u_int imm,u_int rt)
640 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs],imm);
641 output_w32(0xe1800020|rd_rn_rm(rt,rt,rs)|(imm<<7));
644 static void emit_xor(u_int rs1,u_int rs2,u_int rt)
646 assem_debug("eor %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
647 output_w32(0xe0200000|rd_rn_rm(rt,rs1,rs2));
650 static void emit_addimm(u_int rs,int imm,u_int rt)
656 if(genimm(imm,&armval)) {
657 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],imm);
658 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
659 }else if(genimm(-imm,&armval)) {
660 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],-imm);
661 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
663 }else if(rt!=rs&&(u_int)imm<65536) {
664 emit_movw(imm&0x0000ffff,rt);
666 }else if(rt!=rs&&(u_int)-imm<65536) {
667 emit_movw(-imm&0x0000ffff,rt);
670 }else if((u_int)-imm<65536) {
671 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],(-imm)&0xFF00);
672 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
673 output_w32(0xe2400000|rd_rn_imm_shift(rt,rs,(-imm)>>8,8));
674 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
677 int shift = (ffs(imm) - 1) & ~1;
678 int imm8 = imm & (0xff << shift);
679 genimm_checked(imm8,&armval);
680 assem_debug("add %s,%s,#0x%x\n",regname[rt],regname[rs],imm8);
681 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
688 else if(rs!=rt) emit_mov(rs,rt);
691 static void emit_addimm_and_set_flags(int imm,int rt)
693 assert(imm>-65536&&imm<65536);
695 if(genimm(imm,&armval)) {
696 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm);
697 output_w32(0xe2900000|rd_rn_rm(rt,rt,0)|armval);
698 }else if(genimm(-imm,&armval)) {
699 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],imm);
700 output_w32(0xe2500000|rd_rn_rm(rt,rt,0)|armval);
702 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF00);
703 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
704 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)>>8,8));
705 output_w32(0xe2500000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
707 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF00);
708 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
709 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm>>8,8));
710 output_w32(0xe2900000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
714 static void emit_addimm_no_flags(u_int imm,u_int rt)
716 emit_addimm(rt,imm,rt);
719 static void emit_addnop(u_int r)
722 assem_debug("add %s,%s,#0 (nop)\n",regname[r],regname[r]);
723 output_w32(0xe2800000|rd_rn_rm(r,r,0));
726 static void emit_adcimm(u_int rs,int imm,u_int rt)
729 genimm_checked(imm,&armval);
730 assem_debug("adc %s,%s,#%d\n",regname[rt],regname[rs],imm);
731 output_w32(0xe2a00000|rd_rn_rm(rt,rs,0)|armval);
734 static void emit_addimm64_32(int rsh,int rsl,int imm,int rth,int rtl)
736 // TODO: if(genimm(imm,&armval)) ...
738 emit_movimm(imm,HOST_TEMPREG);
739 emit_adds(HOST_TEMPREG,rsl,rtl);
740 emit_adcimm(rsh,0,rth);
743 static void emit_andimm(int rs,int imm,int rt)
748 }else if(genimm(imm,&armval)) {
749 assem_debug("and %s,%s,#%d\n",regname[rt],regname[rs],imm);
750 output_w32(0xe2000000|rd_rn_rm(rt,rs,0)|armval);
751 }else if(genimm(~imm,&armval)) {
752 assem_debug("bic %s,%s,#%d\n",regname[rt],regname[rs],imm);
753 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|armval);
754 }else if(imm==65535) {
756 assem_debug("bic %s,%s,#FF000000\n",regname[rt],regname[rs]);
757 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|0x4FF);
758 assem_debug("bic %s,%s,#00FF0000\n",regname[rt],regname[rt]);
759 output_w32(0xe3c00000|rd_rn_rm(rt,rt,0)|0x8FF);
761 assem_debug("uxth %s,%s\n",regname[rt],regname[rs]);
762 output_w32(0xe6ff0070|rd_rn_rm(rt,0,rs));
765 assert(imm>0&&imm<65535);
767 assem_debug("mov r14,#%d\n",imm&0xFF00);
768 output_w32(0xe3a00000|rd_rn_imm_shift(HOST_TEMPREG,0,imm>>8,8));
769 assem_debug("add r14,r14,#%d\n",imm&0xFF);
770 output_w32(0xe2800000|rd_rn_imm_shift(HOST_TEMPREG,HOST_TEMPREG,imm&0xff,0));
772 emit_movw(imm,HOST_TEMPREG);
774 assem_debug("and %s,%s,r14\n",regname[rt],regname[rs]);
775 output_w32(0xe0000000|rd_rn_rm(rt,rs,HOST_TEMPREG));
779 static void emit_orimm(int rs,int imm,int rt)
783 if(rs!=rt) emit_mov(rs,rt);
784 }else if(genimm(imm,&armval)) {
785 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm);
786 output_w32(0xe3800000|rd_rn_rm(rt,rs,0)|armval);
788 assert(imm>0&&imm<65536);
789 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
790 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
791 output_w32(0xe3800000|rd_rn_imm_shift(rt,rs,imm>>8,8));
792 output_w32(0xe3800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
796 static void emit_xorimm(int rs,int imm,int rt)
800 if(rs!=rt) emit_mov(rs,rt);
801 }else if(genimm(imm,&armval)) {
802 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm);
803 output_w32(0xe2200000|rd_rn_rm(rt,rs,0)|armval);
805 assert(imm>0&&imm<65536);
806 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
807 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
808 output_w32(0xe2200000|rd_rn_imm_shift(rt,rs,imm>>8,8));
809 output_w32(0xe2200000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
813 static void emit_shlimm(int rs,u_int imm,int rt)
818 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
819 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
822 static void emit_lsls_imm(int rs,int imm,int rt)
826 assem_debug("lsls %s,%s,#%d\n",regname[rt],regname[rs],imm);
827 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs)|(imm<<7));
830 static unused void emit_lslpls_imm(int rs,int imm,int rt)
834 assem_debug("lslpls %s,%s,#%d\n",regname[rt],regname[rs],imm);
835 output_w32(0x51b00000|rd_rn_rm(rt,0,rs)|(imm<<7));
838 static void emit_shrimm(int rs,u_int imm,int rt)
842 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
843 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
846 static void emit_sarimm(int rs,u_int imm,int rt)
850 assem_debug("asr %s,%s,#%d\n",regname[rt],regname[rs],imm);
851 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x40|(imm<<7));
854 static void emit_rorimm(int rs,u_int imm,int rt)
858 assem_debug("ror %s,%s,#%d\n",regname[rt],regname[rs],imm);
859 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x60|(imm<<7));
862 static void emit_signextend16(int rs,int rt)
865 emit_shlimm(rs,16,rt);
866 emit_sarimm(rt,16,rt);
868 assem_debug("sxth %s,%s\n",regname[rt],regname[rs]);
869 output_w32(0xe6bf0070|rd_rn_rm(rt,0,rs));
873 static void emit_signextend8(int rs,int rt)
876 emit_shlimm(rs,24,rt);
877 emit_sarimm(rt,24,rt);
879 assem_debug("sxtb %s,%s\n",regname[rt],regname[rs]);
880 output_w32(0xe6af0070|rd_rn_rm(rt,0,rs));
884 static void emit_shl(u_int rs,u_int shift,u_int rt)
890 assem_debug("lsl %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
891 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x10|(shift<<8));
894 static void emit_shr(u_int rs,u_int shift,u_int rt)
899 assem_debug("lsr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
900 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x30|(shift<<8));
903 static void emit_sar(u_int rs,u_int shift,u_int rt)
908 assem_debug("asr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
909 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x50|(shift<<8));
912 static void emit_orrshl(u_int rs,u_int shift,u_int rt)
917 assem_debug("orr %s,%s,%s,lsl %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
918 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x10|(shift<<8));
921 static void emit_orrshr(u_int rs,u_int shift,u_int rt)
926 assem_debug("orr %s,%s,%s,lsr %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
927 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x30|(shift<<8));
930 static void emit_cmpimm(int rs,int imm)
933 if(genimm(imm,&armval)) {
934 assem_debug("cmp %s,#%d\n",regname[rs],imm);
935 output_w32(0xe3500000|rd_rn_rm(0,rs,0)|armval);
936 }else if(genimm(-imm,&armval)) {
937 assem_debug("cmn %s,#%d\n",regname[rs],imm);
938 output_w32(0xe3700000|rd_rn_rm(0,rs,0)|armval);
941 emit_movimm(imm,HOST_TEMPREG);
942 assem_debug("cmp %s,r14\n",regname[rs]);
943 output_w32(0xe1500000|rd_rn_rm(0,rs,HOST_TEMPREG));
946 emit_movimm(-imm,HOST_TEMPREG);
947 assem_debug("cmn %s,r14\n",regname[rs]);
948 output_w32(0xe1700000|rd_rn_rm(0,rs,HOST_TEMPREG));
952 static void emit_cmovne_imm(int imm,int rt)
954 assem_debug("movne %s,#%d\n",regname[rt],imm);
956 genimm_checked(imm,&armval);
957 output_w32(0x13a00000|rd_rn_rm(rt,0,0)|armval);
960 static void emit_cmovl_imm(int imm,int rt)
962 assem_debug("movlt %s,#%d\n",regname[rt],imm);
964 genimm_checked(imm,&armval);
965 output_w32(0xb3a00000|rd_rn_rm(rt,0,0)|armval);
968 static void emit_cmovb_imm(int imm,int rt)
970 assem_debug("movcc %s,#%d\n",regname[rt],imm);
972 genimm_checked(imm,&armval);
973 output_w32(0x33a00000|rd_rn_rm(rt,0,0)|armval);
976 static void emit_cmovne_reg(int rs,int rt)
978 assem_debug("movne %s,%s\n",regname[rt],regname[rs]);
979 output_w32(0x11a00000|rd_rn_rm(rt,0,rs));
982 static void emit_cmovl_reg(int rs,int rt)
984 assem_debug("movlt %s,%s\n",regname[rt],regname[rs]);
985 output_w32(0xb1a00000|rd_rn_rm(rt,0,rs));
988 static void emit_cmovs_reg(int rs,int rt)
990 assem_debug("movmi %s,%s\n",regname[rt],regname[rs]);
991 output_w32(0x41a00000|rd_rn_rm(rt,0,rs));
994 static void emit_slti32(int rs,int imm,int rt)
996 if(rs!=rt) emit_zeroreg(rt);
998 if(rs==rt) emit_movimm(0,rt);
999 emit_cmovl_imm(1,rt);
1002 static void emit_sltiu32(int rs,int imm,int rt)
1004 if(rs!=rt) emit_zeroreg(rt);
1005 emit_cmpimm(rs,imm);
1006 if(rs==rt) emit_movimm(0,rt);
1007 emit_cmovb_imm(1,rt);
1010 static void emit_cmp(int rs,int rt)
1012 assem_debug("cmp %s,%s\n",regname[rs],regname[rt]);
1013 output_w32(0xe1500000|rd_rn_rm(0,rs,rt));
1016 static void emit_set_gz32(int rs, int rt)
1018 //assem_debug("set_gz32\n");
1021 emit_cmovl_imm(0,rt);
1024 static void emit_set_nz32(int rs, int rt)
1026 //assem_debug("set_nz32\n");
1027 if(rs!=rt) emit_movs(rs,rt);
1028 else emit_test(rs,rs);
1029 emit_cmovne_imm(1,rt);
1032 static void emit_set_if_less32(int rs1, int rs2, int rt)
1034 //assem_debug("set if less (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1035 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1037 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1038 emit_cmovl_imm(1,rt);
1041 static void emit_set_if_carry32(int rs1, int rs2, int rt)
1043 //assem_debug("set if carry (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1044 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1046 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1047 emit_cmovb_imm(1,rt);
1050 static void emit_call(const void *a_)
1053 assem_debug("bl %x (%x+%x)%s\n",a,(int)out,a-(int)out-8,func_name(a));
1054 u_int offset=genjmp(a);
1055 output_w32(0xeb000000|offset);
1058 static void emit_jmp(const void *a_)
1061 assem_debug("b %x (%x+%x)%s\n",a,(int)out,a-(int)out-8,func_name(a));
1062 u_int offset=genjmp(a);
1063 output_w32(0xea000000|offset);
1066 static void emit_jne(const void *a_)
1069 assem_debug("bne %x\n",a);
1070 u_int offset=genjmp(a);
1071 output_w32(0x1a000000|offset);
1074 static void emit_jeq(int a)
1076 assem_debug("beq %x\n",a);
1077 u_int offset=genjmp(a);
1078 output_w32(0x0a000000|offset);
1081 static void emit_js(int a)
1083 assem_debug("bmi %x\n",a);
1084 u_int offset=genjmp(a);
1085 output_w32(0x4a000000|offset);
1088 static void emit_jns(int a)
1090 assem_debug("bpl %x\n",a);
1091 u_int offset=genjmp(a);
1092 output_w32(0x5a000000|offset);
1095 static void emit_jl(int a)
1097 assem_debug("blt %x\n",a);
1098 u_int offset=genjmp(a);
1099 output_w32(0xba000000|offset);
1102 static void emit_jge(int a)
1104 assem_debug("bge %x\n",a);
1105 u_int offset=genjmp(a);
1106 output_w32(0xaa000000|offset);
1109 static void emit_jno(int a)
1111 assem_debug("bvc %x\n",a);
1112 u_int offset=genjmp(a);
1113 output_w32(0x7a000000|offset);
1116 static void emit_jc(int a)
1118 assem_debug("bcs %x\n",a);
1119 u_int offset=genjmp(a);
1120 output_w32(0x2a000000|offset);
1123 static void emit_jcc(void *a_)
1126 assem_debug("bcc %x\n",a);
1127 u_int offset=genjmp(a);
1128 output_w32(0x3a000000|offset);
1131 static void emit_callreg(u_int r)
1134 assem_debug("blx %s\n",regname[r]);
1135 output_w32(0xe12fff30|r);
1138 static void emit_jmpreg(u_int r)
1140 assem_debug("mov pc,%s\n",regname[r]);
1141 output_w32(0xe1a00000|rd_rn_rm(15,0,r));
1144 static void emit_readword_indexed(int offset, int rs, int rt)
1146 assert(offset>-4096&&offset<4096);
1147 assem_debug("ldr %s,%s+%d\n",regname[rt],regname[rs],offset);
1149 output_w32(0xe5900000|rd_rn_rm(rt,rs,0)|offset);
1151 output_w32(0xe5100000|rd_rn_rm(rt,rs,0)|(-offset));
1155 static void emit_readword_dualindexedx4(int rs1, int rs2, int rt)
1157 assem_debug("ldr %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1158 output_w32(0xe7900000|rd_rn_rm(rt,rs1,rs2)|0x100);
1161 static void emit_ldrcc_dualindexed(int rs1, int rs2, int rt)
1163 assem_debug("ldrcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1164 output_w32(0x37900000|rd_rn_rm(rt,rs1,rs2));
1167 static void emit_ldrccb_dualindexed(int rs1, int rs2, int rt)
1169 assem_debug("ldrccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1170 output_w32(0x37d00000|rd_rn_rm(rt,rs1,rs2));
1173 static void emit_ldrccsb_dualindexed(int rs1, int rs2, int rt)
1175 assem_debug("ldrccsb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1176 output_w32(0x319000d0|rd_rn_rm(rt,rs1,rs2));
1179 static void emit_ldrcch_dualindexed(int rs1, int rs2, int rt)
1181 assem_debug("ldrcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1182 output_w32(0x319000b0|rd_rn_rm(rt,rs1,rs2));
1185 static void emit_ldrccsh_dualindexed(int rs1, int rs2, int rt)
1187 assem_debug("ldrccsh %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1188 output_w32(0x319000f0|rd_rn_rm(rt,rs1,rs2));
1191 static void emit_movsbl_indexed(int offset, int rs, int rt)
1193 assert(offset>-256&&offset<256);
1194 assem_debug("ldrsb %s,%s+%d\n",regname[rt],regname[rs],offset);
1196 output_w32(0xe1d000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1198 output_w32(0xe15000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1202 static void emit_movswl_indexed(int offset, int rs, int rt)
1204 assert(offset>-256&&offset<256);
1205 assem_debug("ldrsh %s,%s+%d\n",regname[rt],regname[rs],offset);
1207 output_w32(0xe1d000f0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1209 output_w32(0xe15000f0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1213 static void emit_movzbl_indexed(int offset, int rs, int rt)
1215 assert(offset>-4096&&offset<4096);
1216 assem_debug("ldrb %s,%s+%d\n",regname[rt],regname[rs],offset);
1218 output_w32(0xe5d00000|rd_rn_rm(rt,rs,0)|offset);
1220 output_w32(0xe5500000|rd_rn_rm(rt,rs,0)|(-offset));
1224 static void emit_movzwl_indexed(int offset, int rs, int rt)
1226 assert(offset>-256&&offset<256);
1227 assem_debug("ldrh %s,%s+%d\n",regname[rt],regname[rs],offset);
1229 output_w32(0xe1d000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1231 output_w32(0xe15000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1235 static void emit_ldrd(int offset, int rs, int rt)
1237 assert(offset>-256&&offset<256);
1238 assem_debug("ldrd %s,%s+%d\n",regname[rt],regname[rs],offset);
1240 output_w32(0xe1c000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1242 output_w32(0xe14000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1246 static void emit_readword(void *addr, int rt)
1248 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
1249 assert(offset<4096);
1250 assem_debug("ldr %s,fp+%d\n",regname[rt],offset);
1251 output_w32(0xe5900000|rd_rn_rm(rt,FP,0)|offset);
1254 static void emit_writeword_indexed(int rt, int offset, int rs)
1256 assert(offset>-4096&&offset<4096);
1257 assem_debug("str %s,%s+%d\n",regname[rt],regname[rs],offset);
1259 output_w32(0xe5800000|rd_rn_rm(rt,rs,0)|offset);
1261 output_w32(0xe5000000|rd_rn_rm(rt,rs,0)|(-offset));
1265 static void emit_writehword_indexed(int rt, int offset, int rs)
1267 assert(offset>-256&&offset<256);
1268 assem_debug("strh %s,%s+%d\n",regname[rt],regname[rs],offset);
1270 output_w32(0xe1c000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1272 output_w32(0xe14000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1276 static void emit_writebyte_indexed(int rt, int offset, int rs)
1278 assert(offset>-4096&&offset<4096);
1279 assem_debug("strb %s,%s+%d\n",regname[rt],regname[rs],offset);
1281 output_w32(0xe5c00000|rd_rn_rm(rt,rs,0)|offset);
1283 output_w32(0xe5400000|rd_rn_rm(rt,rs,0)|(-offset));
1287 static void emit_strcc_dualindexed(int rs1, int rs2, int rt)
1289 assem_debug("strcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1290 output_w32(0x37800000|rd_rn_rm(rt,rs1,rs2));
1293 static void emit_strccb_dualindexed(int rs1, int rs2, int rt)
1295 assem_debug("strccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1296 output_w32(0x37c00000|rd_rn_rm(rt,rs1,rs2));
1299 static void emit_strcch_dualindexed(int rs1, int rs2, int rt)
1301 assem_debug("strcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1302 output_w32(0x318000b0|rd_rn_rm(rt,rs1,rs2));
1305 static void emit_writeword(int rt, void *addr)
1307 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
1308 assert(offset<4096);
1309 assem_debug("str %s,fp+%d\n",regname[rt],offset);
1310 output_w32(0xe5800000|rd_rn_rm(rt,FP,0)|offset);
1313 static void emit_umull(u_int rs1, u_int rs2, u_int hi, u_int lo)
1315 assem_debug("umull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
1320 output_w32(0xe0800090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
1323 static void emit_smull(u_int rs1, u_int rs2, u_int hi, u_int lo)
1325 assem_debug("smull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
1330 output_w32(0xe0c00090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
1333 static void emit_clz(int rs,int rt)
1335 assem_debug("clz %s,%s\n",regname[rt],regname[rs]);
1336 output_w32(0xe16f0f10|rd_rn_rm(rt,0,rs));
1339 static void emit_subcs(int rs1,int rs2,int rt)
1341 assem_debug("subcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1342 output_w32(0x20400000|rd_rn_rm(rt,rs1,rs2));
1345 static void emit_shrcc_imm(int rs,u_int imm,int rt)
1349 assem_debug("lsrcc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1350 output_w32(0x31a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
1353 static void emit_shrne_imm(int rs,u_int imm,int rt)
1357 assem_debug("lsrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
1358 output_w32(0x11a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
1361 static void emit_negmi(int rs, int rt)
1363 assem_debug("rsbmi %s,%s,#0\n",regname[rt],regname[rs]);
1364 output_w32(0x42600000|rd_rn_rm(rt,rs,0));
1367 static void emit_negsmi(int rs, int rt)
1369 assem_debug("rsbsmi %s,%s,#0\n",regname[rt],regname[rs]);
1370 output_w32(0x42700000|rd_rn_rm(rt,rs,0));
1373 static void emit_bic_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
1375 assem_debug("bic %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
1376 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
1379 static void emit_bic_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
1381 assem_debug("bic %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
1382 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
1385 static void emit_teq(int rs, int rt)
1387 assem_debug("teq %s,%s\n",regname[rs],regname[rt]);
1388 output_w32(0xe1300000|rd_rn_rm(0,rs,rt));
1391 static void emit_rsbimm(int rs, int imm, int rt)
1394 genimm_checked(imm,&armval);
1395 assem_debug("rsb %s,%s,#%d\n",regname[rt],regname[rs],imm);
1396 output_w32(0xe2600000|rd_rn_rm(rt,rs,0)|armval);
1399 // Load 2 immediates optimizing for small code size
1400 static void emit_mov2imm_compact(int imm1,u_int rt1,int imm2,u_int rt2)
1402 emit_movimm(imm1,rt1);
1404 if(genimm(imm2-imm1,&armval)) {
1405 assem_debug("add %s,%s,#%d\n",regname[rt2],regname[rt1],imm2-imm1);
1406 output_w32(0xe2800000|rd_rn_rm(rt2,rt1,0)|armval);
1407 }else if(genimm(imm1-imm2,&armval)) {
1408 assem_debug("sub %s,%s,#%d\n",regname[rt2],regname[rt1],imm1-imm2);
1409 output_w32(0xe2400000|rd_rn_rm(rt2,rt1,0)|armval);
1411 else emit_movimm(imm2,rt2);
1414 // Conditionally select one of two immediates, optimizing for small code size
1415 // This will only be called if HAVE_CMOV_IMM is defined
1416 static void emit_cmov2imm_e_ne_compact(int imm1,int imm2,u_int rt)
1419 if(genimm(imm2-imm1,&armval)) {
1420 emit_movimm(imm1,rt);
1421 assem_debug("addne %s,%s,#%d\n",regname[rt],regname[rt],imm2-imm1);
1422 output_w32(0x12800000|rd_rn_rm(rt,rt,0)|armval);
1423 }else if(genimm(imm1-imm2,&armval)) {
1424 emit_movimm(imm1,rt);
1425 assem_debug("subne %s,%s,#%d\n",regname[rt],regname[rt],imm1-imm2);
1426 output_w32(0x12400000|rd_rn_rm(rt,rt,0)|armval);
1430 emit_movimm(imm1,rt);
1431 add_literal((int)out,imm2);
1432 assem_debug("ldrne %s,pc+? [=%x]\n",regname[rt],imm2);
1433 output_w32(0x15900000|rd_rn_rm(rt,15,0));
1435 emit_movw(imm1&0x0000FFFF,rt);
1436 if((imm1&0xFFFF)!=(imm2&0xFFFF)) {
1437 assem_debug("movwne %s,#%d (0x%x)\n",regname[rt],imm2&0xFFFF,imm2&0xFFFF);
1438 output_w32(0x13000000|rd_rn_rm(rt,0,0)|(imm2&0xfff)|((imm2<<4)&0xf0000));
1440 emit_movt(imm1&0xFFFF0000,rt);
1441 if((imm1&0xFFFF0000)!=(imm2&0xFFFF0000)) {
1442 assem_debug("movtne %s,#%d (0x%x)\n",regname[rt],imm2&0xffff0000,imm2&0xffff0000);
1443 output_w32(0x13400000|rd_rn_rm(rt,0,0)|((imm2>>16)&0xfff)|((imm2>>12)&0xf0000));
1449 // special case for checking invalid_code
1450 static void emit_cmpmem_indexedsr12_reg(int base,int r,int imm)
1452 assert(imm<128&&imm>=0);
1454 assem_debug("ldrb lr,%s,%s lsr #12\n",regname[base],regname[r]);
1455 output_w32(0xe7d00000|rd_rn_rm(HOST_TEMPREG,base,r)|0x620);
1456 emit_cmpimm(HOST_TEMPREG,imm);
1459 static void emit_callne(int a)
1461 assem_debug("blne %x\n",a);
1462 u_int offset=genjmp(a);
1463 output_w32(0x1b000000|offset);
1466 // Used to preload hash table entries
1467 static unused void emit_prefetchreg(int r)
1469 assem_debug("pld %s\n",regname[r]);
1470 output_w32(0xf5d0f000|rd_rn_rm(0,r,0));
1473 // Special case for mini_ht
1474 static void emit_ldreq_indexed(int rs, u_int offset, int rt)
1476 assert(offset<4096);
1477 assem_debug("ldreq %s,[%s, #%d]\n",regname[rt],regname[rs],offset);
1478 output_w32(0x05900000|rd_rn_rm(rt,rs,0)|offset);
1481 static void emit_orrne_imm(int rs,int imm,int rt)
1484 genimm_checked(imm,&armval);
1485 assem_debug("orrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
1486 output_w32(0x13800000|rd_rn_rm(rt,rs,0)|armval);
1489 static void emit_andne_imm(int rs,int imm,int rt)
1492 genimm_checked(imm,&armval);
1493 assem_debug("andne %s,%s,#%d\n",regname[rt],regname[rs],imm);
1494 output_w32(0x12000000|rd_rn_rm(rt,rs,0)|armval);
1497 static unused void emit_addpl_imm(int rs,int imm,int rt)
1500 genimm_checked(imm,&armval);
1501 assem_debug("addpl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1502 output_w32(0x52800000|rd_rn_rm(rt,rs,0)|armval);
1505 static void emit_jno_unlikely(int a)
1508 assem_debug("addvc pc,pc,#? (%x)\n",/*a-(int)out-8,*/a);
1509 output_w32(0x72800000|rd_rn_rm(15,15,0));
1512 static void save_regs_all(u_int reglist)
1515 if(!reglist) return;
1516 assem_debug("stmia fp,{");
1519 assem_debug("r%d,",i);
1521 output_w32(0xe88b0000|reglist);
1524 static void restore_regs_all(u_int reglist)
1527 if(!reglist) return;
1528 assem_debug("ldmia fp,{");
1531 assem_debug("r%d,",i);
1533 output_w32(0xe89b0000|reglist);
1536 // Save registers before function call
1537 static void save_regs(u_int reglist)
1539 reglist&=CALLER_SAVE_REGS; // only save the caller-save registers, r0-r3, r12
1540 save_regs_all(reglist);
1543 // Restore registers after function call
1544 static void restore_regs(u_int reglist)
1546 reglist&=CALLER_SAVE_REGS;
1547 restore_regs_all(reglist);
1550 /* Stubs/epilogue */
1552 static void literal_pool(int n)
1554 if(!literalcount) return;
1556 if((int)out-literals[0][0]<4096-n) return;
1560 for(i=0;i<literalcount;i++)
1562 u_int l_addr=(u_int)out;
1565 if(literals[j][1]==literals[i][1]) {
1566 //printf("dup %08x\n",literals[i][1]);
1567 l_addr=literals[j][0];
1571 ptr=(u_int *)literals[i][0];
1572 u_int offset=l_addr-(u_int)ptr-8;
1573 assert(offset<4096);
1574 assert(!(offset&3));
1576 if(l_addr==(u_int)out) {
1577 literals[i][0]=l_addr; // remember for dupes
1578 output_w32(literals[i][1]);
1584 static void literal_pool_jumpover(int n)
1586 if(!literalcount) return;
1588 if((int)out-literals[0][0]<4096-n) return;
1593 set_jump_target(jaddr, out);
1596 static void emit_extjump2(u_char *addr, int target, void *linker)
1598 u_char *ptr=(u_char *)addr;
1599 assert((ptr[3]&0x0e)==0xa);
1602 emit_loadlp(target,0);
1603 emit_loadlp((u_int)addr,1);
1604 assert(addr>=translation_cache&&addr<(translation_cache+(1<<TARGET_SIZE_2)));
1605 //assert((target>=0x80000000&&target<0x80800000)||(target>0xA4000000&&target<0xA4001000));
1607 #ifdef DEBUG_CYCLE_COUNT
1608 emit_readword(&last_count,ECX);
1609 emit_add(HOST_CCREG,ECX,HOST_CCREG);
1610 emit_readword(&next_interupt,ECX);
1611 emit_writeword(HOST_CCREG,&Count);
1612 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
1613 emit_writeword(ECX,&last_count);
1619 static void emit_extjump(void *addr, int target)
1621 emit_extjump2(addr, target, dyna_linker);
1624 static void emit_extjump_ds(void *addr, int target)
1626 emit_extjump2(addr, target, dyna_linker_ds);
1629 // put rt_val into rt, potentially making use of rs with value rs_val
1630 static void emit_movimm_from(u_int rs_val,int rs,u_int rt_val,int rt)
1634 if(genimm(rt_val,&armval)) {
1635 assem_debug("mov %s,#%d\n",regname[rt],rt_val);
1636 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
1639 if(genimm(~rt_val,&armval)) {
1640 assem_debug("mvn %s,#%d\n",regname[rt],rt_val);
1641 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
1645 if(genimm(diff,&armval)) {
1646 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],diff);
1647 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
1649 }else if(genimm(-diff,&armval)) {
1650 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],-diff);
1651 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
1654 emit_movimm(rt_val,rt);
1657 // return 1 if above function can do it's job cheaply
1658 static int is_similar_value(u_int v1,u_int v2)
1662 if(v1==v2) return 1;
1664 for(xs=diff;xs!=0&&(xs&3)==0;xs>>=2)
1666 if(xs<0x100) return 1;
1667 for(xs=-diff;xs!=0&&(xs&3)==0;xs>>=2)
1669 if(xs<0x100) return 1;
1674 static void pass_args(int a0, int a1)
1678 emit_mov(a0,2); emit_mov(a1,1); emit_mov(2,0);
1680 else if(a0!=0&&a1==0) {
1682 if (a0>=0) emit_mov(a0,0);
1685 if(a0>=0&&a0!=0) emit_mov(a0,0);
1686 if(a1>=0&&a1!=1) emit_mov(a1,1);
1690 static void mov_loadtype_adj(enum stub_type type,int rs,int rt)
1693 case LOADB_STUB: emit_signextend8(rs,rt); break;
1694 case LOADBU_STUB: emit_andimm(rs,0xff,rt); break;
1695 case LOADH_STUB: emit_signextend16(rs,rt); break;
1696 case LOADHU_STUB: emit_andimm(rs,0xffff,rt); break;
1697 case LOADW_STUB: if(rs!=rt) emit_mov(rs,rt); break;
1702 #include "pcsxmem.h"
1703 #include "pcsxmem_inline.c"
1705 static void do_readstub(int n)
1707 assem_debug("do_readstub %x\n",start+stubs[n].a*4);
1709 set_jump_target(stubs[n].addr, out);
1710 enum stub_type type=stubs[n].type;
1713 struct regstat *i_regs=(struct regstat *)stubs[n].c;
1714 u_int reglist=stubs[n].e;
1715 signed char *i_regmap=i_regs->regmap;
1717 if(itype[i]==C1LS||itype[i]==C2LS||itype[i]==LOADLR) {
1718 rt=get_reg(i_regmap,FTEMP);
1720 rt=get_reg(i_regmap,rt1[i]);
1723 int r,temp=-1,temp2=HOST_TEMPREG,regs_saved=0;
1724 void *restore_jump = NULL;
1726 for(r=0;r<=12;r++) {
1727 if(((1<<r)&0x13ff)&&((1<<r)®list)==0) {
1731 if(rt>=0&&rt1[i]!=0)
1738 if((regs_saved||(reglist&2)==0)&&temp!=1&&rs!=1)
1740 emit_readword(&mem_rtab,temp);
1741 emit_shrimm(rs,12,temp2);
1742 emit_readword_dualindexedx4(temp,temp2,temp2);
1743 emit_lsls_imm(temp2,1,temp2);
1744 if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) {
1746 case LOADB_STUB: emit_ldrccsb_dualindexed(temp2,rs,rt); break;
1747 case LOADBU_STUB: emit_ldrccb_dualindexed(temp2,rs,rt); break;
1748 case LOADH_STUB: emit_ldrccsh_dualindexed(temp2,rs,rt); break;
1749 case LOADHU_STUB: emit_ldrcch_dualindexed(temp2,rs,rt); break;
1750 case LOADW_STUB: emit_ldrcc_dualindexed(temp2,rs,rt); break;
1756 emit_jcc(0); // jump to reg restore
1759 emit_jcc(stubs[n].retaddr); // return address
1764 if(type==LOADB_STUB||type==LOADBU_STUB)
1765 handler=jump_handler_read8;
1766 if(type==LOADH_STUB||type==LOADHU_STUB)
1767 handler=jump_handler_read16;
1768 if(type==LOADW_STUB)
1769 handler=jump_handler_read32;
1771 pass_args(rs,temp2);
1772 int cc=get_reg(i_regmap,CCREG);
1774 emit_loadreg(CCREG,2);
1775 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n].d+1),2);
1777 if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) {
1778 mov_loadtype_adj(type,0,rt);
1781 set_jump_target(restore_jump, out);
1782 restore_regs(reglist);
1783 emit_jmp(stubs[n].retaddr); // return address
1786 // return memhandler, or get directly accessable address and return 0
1787 static void *get_direct_memhandler(void *table,u_int addr,enum stub_type type,u_int *addr_host)
1790 l1=((u_int *)table)[addr>>12];
1791 if((l1&(1<<31))==0) {
1798 if(type==LOADB_STUB||type==LOADBU_STUB||type==STOREB_STUB)
1799 l2=((u_int *)l1)[0x1000/4 + 0x1000/2 + (addr&0xfff)];
1800 else if(type==LOADH_STUB||type==LOADHU_STUB||type==STOREH_STUB)
1801 l2=((u_int *)l1)[0x1000/4 + (addr&0xfff)/2];
1803 l2=((u_int *)l1)[(addr&0xfff)/4];
1804 if((l2&(1<<31))==0) {
1806 *addr_host=v+(addr&0xfff);
1809 return (void *)(l2<<1);
1813 static void inline_readstub(enum stub_type type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
1815 int rs=get_reg(regmap,target);
1816 int rt=get_reg(regmap,target);
1817 if(rs<0) rs=get_reg(regmap,-1);
1819 u_int host_addr=0,is_dynamic,far_call=0;
1821 int cc=get_reg(regmap,CCREG);
1822 if(pcsx_direct_read(type,addr,CLOCK_ADJUST(adj+1),cc,target?rs:-1,rt))
1824 handler = get_direct_memhandler(mem_rtab, addr, type, &host_addr);
1825 if (handler == NULL) {
1829 emit_movimm_from(addr,rs,host_addr,rs);
1831 case LOADB_STUB: emit_movsbl_indexed(0,rs,rt); break;
1832 case LOADBU_STUB: emit_movzbl_indexed(0,rs,rt); break;
1833 case LOADH_STUB: emit_movswl_indexed(0,rs,rt); break;
1834 case LOADHU_STUB: emit_movzwl_indexed(0,rs,rt); break;
1835 case LOADW_STUB: emit_readword_indexed(0,rs,rt); break;
1840 is_dynamic=pcsxmem_is_handler_dynamic(addr);
1842 if(type==LOADB_STUB||type==LOADBU_STUB)
1843 handler=jump_handler_read8;
1844 if(type==LOADH_STUB||type==LOADHU_STUB)
1845 handler=jump_handler_read16;
1846 if(type==LOADW_STUB)
1847 handler=jump_handler_read32;
1850 // call a memhandler
1851 if(rt>=0&&rt1[i]!=0)
1855 emit_movimm(addr,0);
1858 int offset=(u_char *)handler-out-8;
1859 if(offset<-33554432||offset>=33554432) {
1860 // unreachable memhandler, a plugin func perhaps
1861 emit_movimm((u_int)handler,12);
1865 emit_loadreg(CCREG,2);
1867 emit_movimm(((u_int *)mem_rtab)[addr>>12]<<1,1);
1868 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2);
1871 emit_readword(&last_count,3);
1872 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2);
1874 emit_writeword(2,&Count);
1882 if(rt>=0&&rt1[i]!=0) {
1884 case LOADB_STUB: emit_signextend8(0,rt); break;
1885 case LOADBU_STUB: emit_andimm(0,0xff,rt); break;
1886 case LOADH_STUB: emit_signextend16(0,rt); break;
1887 case LOADHU_STUB: emit_andimm(0,0xffff,rt); break;
1888 case LOADW_STUB: if(rt!=0) emit_mov(0,rt); break;
1892 restore_regs(reglist);
1895 static void do_writestub(int n)
1897 assem_debug("do_writestub %x\n",start+stubs[n].a*4);
1899 set_jump_target(stubs[n].addr, out);
1900 enum stub_type type=stubs[n].type;
1903 struct regstat *i_regs=(struct regstat *)stubs[n].c;
1904 u_int reglist=stubs[n].e;
1905 signed char *i_regmap=i_regs->regmap;
1907 if(itype[i]==C1LS||itype[i]==C2LS) {
1908 rt=get_reg(i_regmap,r=FTEMP);
1910 rt=get_reg(i_regmap,r=rs2[i]);
1914 int rtmp,temp=-1,temp2=HOST_TEMPREG,regs_saved=0;
1915 void *restore_jump = NULL;
1916 int reglist2=reglist|(1<<rs)|(1<<rt);
1917 for(rtmp=0;rtmp<=12;rtmp++) {
1918 if(((1<<rtmp)&0x13ff)&&((1<<rtmp)®list2)==0) {
1925 for(rtmp=0;rtmp<=3;rtmp++)
1926 if(rtmp!=rs&&rtmp!=rt)
1929 if((regs_saved||(reglist2&8)==0)&&temp!=3&&rs!=3&&rt!=3)
1931 emit_readword(&mem_wtab,temp);
1932 emit_shrimm(rs,12,temp2);
1933 emit_readword_dualindexedx4(temp,temp2,temp2);
1934 emit_lsls_imm(temp2,1,temp2);
1936 case STOREB_STUB: emit_strccb_dualindexed(temp2,rs,rt); break;
1937 case STOREH_STUB: emit_strcch_dualindexed(temp2,rs,rt); break;
1938 case STOREW_STUB: emit_strcc_dualindexed(temp2,rs,rt); break;
1943 emit_jcc(0); // jump to reg restore
1946 emit_jcc(stubs[n].retaddr); // return address (invcode check)
1952 case STOREB_STUB: handler=jump_handler_write8; break;
1953 case STOREH_STUB: handler=jump_handler_write16; break;
1954 case STOREW_STUB: handler=jump_handler_write32; break;
1961 int cc=get_reg(i_regmap,CCREG);
1963 emit_loadreg(CCREG,2);
1964 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n].d+1),2);
1965 // returns new cycle_count
1967 emit_addimm(0,-CLOCK_ADJUST((int)stubs[n].d+1),cc<0?2:cc);
1969 emit_storereg(CCREG,2);
1971 set_jump_target(restore_jump, out);
1972 restore_regs(reglist);
1973 emit_jmp(stubs[n].retaddr);
1976 static void inline_writestub(enum stub_type type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
1978 int rs=get_reg(regmap,-1);
1979 int rt=get_reg(regmap,target);
1983 void *handler = get_direct_memhandler(mem_wtab, addr, type, &host_addr);
1984 if (handler == NULL) {
1986 emit_movimm_from(addr,rs,host_addr,rs);
1988 case STOREB_STUB: emit_writebyte_indexed(rt,0,rs); break;
1989 case STOREH_STUB: emit_writehword_indexed(rt,0,rs); break;
1990 case STOREW_STUB: emit_writeword_indexed(rt,0,rs); break;
1996 // call a memhandler
1999 int cc=get_reg(regmap,CCREG);
2001 emit_loadreg(CCREG,2);
2002 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2);
2003 emit_movimm((u_int)handler,3);
2004 // returns new cycle_count
2005 emit_call(jump_handler_write_h);
2006 emit_addimm(0,-CLOCK_ADJUST(adj+1),cc<0?2:cc);
2008 emit_storereg(CCREG,2);
2009 restore_regs(reglist);
2012 static void do_unalignedwritestub(int n)
2014 assem_debug("do_unalignedwritestub %x\n",start+stubs[n].a*4);
2016 set_jump_target(stubs[n].addr, out);
2019 struct regstat *i_regs=(struct regstat *)stubs[n].c;
2020 int addr=stubs[n].b;
2021 u_int reglist=stubs[n].e;
2022 signed char *i_regmap=i_regs->regmap;
2023 int temp2=get_reg(i_regmap,FTEMP);
2025 rt=get_reg(i_regmap,rs2[i]);
2028 assert(opcode[i]==0x2a||opcode[i]==0x2e); // SWL/SWR only implemented
2030 reglist&=~(1<<temp2);
2033 // don't bother with it and call write handler
2036 int cc=get_reg(i_regmap,CCREG);
2038 emit_loadreg(CCREG,2);
2039 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n].d+1),2);
2040 emit_call((opcode[i]==0x2a?jump_handle_swl:jump_handle_swr));
2041 emit_addimm(0,-CLOCK_ADJUST((int)stubs[n].d+1),cc<0?2:cc);
2043 emit_storereg(CCREG,2);
2044 restore_regs(reglist);
2045 emit_jmp(stubs[n].retaddr); // return address
2047 emit_andimm(addr,0xfffffffc,temp2);
2048 emit_writeword(temp2,&address);
2051 emit_shrimm(addr,16,1);
2052 int cc=get_reg(i_regmap,CCREG);
2054 emit_loadreg(CCREG,2);
2056 emit_movimm((u_int)readmem,0);
2057 emit_addimm(cc<0?2:cc,2*stubs[n].d+2,2);
2058 emit_call((int)&indirect_jump_indexed);
2059 restore_regs(reglist);
2061 emit_readword(&readmem_dword,temp2);
2062 int temp=addr; //hmh
2063 emit_shlimm(addr,3,temp);
2064 emit_andimm(temp,24,temp);
2065 #ifdef BIG_ENDIAN_MIPS
2066 if (opcode[i]==0x2e) // SWR
2068 if (opcode[i]==0x2a) // SWL
2070 emit_xorimm(temp,24,temp);
2071 emit_movimm(-1,HOST_TEMPREG);
2072 if (opcode[i]==0x2a) { // SWL
2073 emit_bic_lsr(temp2,HOST_TEMPREG,temp,temp2);
2074 emit_orrshr(rt,temp,temp2);
2076 emit_bic_lsl(temp2,HOST_TEMPREG,temp,temp2);
2077 emit_orrshl(rt,temp,temp2);
2079 emit_readword(&address,addr);
2080 emit_writeword(temp2,&word);
2081 //save_regs(reglist); // don't need to, no state changes
2082 emit_shrimm(addr,16,1);
2083 emit_movimm((u_int)writemem,0);
2084 //emit_call((int)&indirect_jump_indexed);
2086 emit_readword_dualindexedx4(0,1,15);
2087 emit_readword(&Count,HOST_TEMPREG);
2088 emit_readword(&next_interupt,2);
2089 emit_addimm(HOST_TEMPREG,-2*stubs[n].d-2,HOST_TEMPREG);
2090 emit_writeword(2,&last_count);
2091 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2093 emit_storereg(CCREG,HOST_TEMPREG);
2095 restore_regs(reglist);
2096 emit_jmp(stubs[n].retaddr); // return address
2100 static void do_invstub(int n)
2103 u_int reglist=stubs[n].a;
2104 set_jump_target(stubs[n].addr, out);
2106 if(stubs[n].b!=0) emit_mov(stubs[n].b,0);
2107 emit_call(&invalidate_addr);
2108 restore_regs(reglist);
2109 emit_jmp(stubs[n].retaddr); // return address
2112 void *do_dirty_stub(int i)
2114 assem_debug("do_dirty_stub %x\n",start+i*4);
2115 u_int addr=(u_int)source;
2116 // Careful about the code output here, verify_dirty needs to parse it.
2118 emit_loadlp(addr,1);
2119 emit_loadlp((int)copy,2);
2120 emit_loadlp(slen*4,3);
2122 emit_movw(addr&0x0000FFFF,1);
2123 emit_movw(((u_int)copy)&0x0000FFFF,2);
2124 emit_movt(addr&0xFFFF0000,1);
2125 emit_movt(((u_int)copy)&0xFFFF0000,2);
2126 emit_movw(slen*4,3);
2128 emit_movimm(start+i*4,0);
2129 emit_call((int)start<(int)0xC0000000?&verify_code:&verify_code_vm);
2133 entry = instr_addr[i];
2134 emit_jmp(instr_addr[i]);
2138 static void do_dirty_stub_ds()
2140 // Careful about the code output here, verify_dirty needs to parse it.
2142 emit_loadlp((int)start<(int)0xC0000000?(int)source:(int)start,1);
2143 emit_loadlp((int)copy,2);
2144 emit_loadlp(slen*4,3);
2146 emit_movw(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0x0000FFFF,1);
2147 emit_movw(((u_int)copy)&0x0000FFFF,2);
2148 emit_movt(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0xFFFF0000,1);
2149 emit_movt(((u_int)copy)&0xFFFF0000,2);
2150 emit_movw(slen*4,3);
2152 emit_movimm(start+1,0);
2153 emit_call(&verify_code_ds);
2158 static void shift_assemble_arm(int i,struct regstat *i_regs)
2161 if(opcode2[i]<=0x07) // SLLV/SRLV/SRAV
2163 signed char s,t,shift;
2164 t=get_reg(i_regs->regmap,rt1[i]);
2165 s=get_reg(i_regs->regmap,rs1[i]);
2166 shift=get_reg(i_regs->regmap,rs2[i]);
2175 if(s!=t) emit_mov(s,t);
2179 emit_andimm(shift,31,HOST_TEMPREG);
2180 if(opcode2[i]==4) // SLLV
2182 emit_shl(s,HOST_TEMPREG,t);
2184 if(opcode2[i]==6) // SRLV
2186 emit_shr(s,HOST_TEMPREG,t);
2188 if(opcode2[i]==7) // SRAV
2190 emit_sar(s,HOST_TEMPREG,t);
2194 } else { // DSLLV/DSRLV/DSRAV
2195 signed char sh,sl,th,tl,shift;
2196 th=get_reg(i_regs->regmap,rt1[i]|64);
2197 tl=get_reg(i_regs->regmap,rt1[i]);
2198 sh=get_reg(i_regs->regmap,rs1[i]|64);
2199 sl=get_reg(i_regs->regmap,rs1[i]);
2200 shift=get_reg(i_regs->regmap,rs2[i]);
2205 if(th>=0) emit_zeroreg(th);
2210 if(sl!=tl) emit_mov(sl,tl);
2211 if(th>=0&&sh!=th) emit_mov(sh,th);
2215 // FIXME: What if shift==tl ?
2217 int temp=get_reg(i_regs->regmap,-1);
2219 if(th<0&&opcode2[i]!=0x14) {th=temp;} // DSLLV doesn't need a temporary register
2222 emit_andimm(shift,31,HOST_TEMPREG);
2223 if(opcode2[i]==0x14) // DSLLV
2225 if(th>=0) emit_shl(sh,HOST_TEMPREG,th);
2226 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
2227 emit_orrshr(sl,HOST_TEMPREG,th);
2228 emit_andimm(shift,31,HOST_TEMPREG);
2229 emit_testimm(shift,32);
2230 emit_shl(sl,HOST_TEMPREG,tl);
2231 if(th>=0) emit_cmovne_reg(tl,th);
2232 emit_cmovne_imm(0,tl);
2234 if(opcode2[i]==0x16) // DSRLV
2237 emit_shr(sl,HOST_TEMPREG,tl);
2238 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
2239 emit_orrshl(sh,HOST_TEMPREG,tl);
2240 emit_andimm(shift,31,HOST_TEMPREG);
2241 emit_testimm(shift,32);
2242 emit_shr(sh,HOST_TEMPREG,th);
2243 emit_cmovne_reg(th,tl);
2244 if(real_th>=0) emit_cmovne_imm(0,th);
2246 if(opcode2[i]==0x17) // DSRAV
2249 emit_shr(sl,HOST_TEMPREG,tl);
2250 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
2253 emit_sarimm(th,31,temp);
2255 emit_orrshl(sh,HOST_TEMPREG,tl);
2256 emit_andimm(shift,31,HOST_TEMPREG);
2257 emit_testimm(shift,32);
2258 emit_sar(sh,HOST_TEMPREG,th);
2259 emit_cmovne_reg(th,tl);
2260 if(real_th>=0) emit_cmovne_reg(temp,th);
2267 #define shift_assemble shift_assemble_arm
2269 static void loadlr_assemble_arm(int i,struct regstat *i_regs)
2271 int s,tl,temp,temp2,addr;
2274 int memtarget=0,c=0;
2275 int fastload_reg_override=0;
2277 tl=get_reg(i_regs->regmap,rt1[i]);
2278 s=get_reg(i_regs->regmap,rs1[i]);
2279 temp=get_reg(i_regs->regmap,-1);
2280 temp2=get_reg(i_regs->regmap,FTEMP);
2281 addr=get_reg(i_regs->regmap,AGEN1+(i&1));
2284 for(hr=0;hr<HOST_REGS;hr++) {
2285 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
2288 if(offset||s<0||c) addr=temp2;
2291 c=(i_regs->wasconst>>s)&1;
2293 memtarget=((signed int)(constmap[i][s]+offset))<(signed int)0x80000000+RAM_SIZE;
2297 emit_shlimm(addr,3,temp);
2298 if (opcode[i]==0x22||opcode[i]==0x26) {
2299 emit_andimm(addr,0xFFFFFFFC,temp2); // LWL/LWR
2301 emit_andimm(addr,0xFFFFFFF8,temp2); // LDL/LDR
2303 jaddr=emit_fastpath_cmp_jump(i,temp2,&fastload_reg_override);
2306 if(ram_offset&&memtarget) {
2307 emit_addimm(temp2,ram_offset,HOST_TEMPREG);
2308 fastload_reg_override=HOST_TEMPREG;
2310 if (opcode[i]==0x22||opcode[i]==0x26) {
2311 emit_movimm(((constmap[i][s]+offset)<<3)&24,temp); // LWL/LWR
2313 emit_movimm(((constmap[i][s]+offset)<<3)&56,temp); // LDL/LDR
2316 if (opcode[i]==0x22||opcode[i]==0x26) { // LWL/LWR
2319 if(fastload_reg_override) a=fastload_reg_override;
2320 emit_readword_indexed(0,a,temp2);
2321 if(jaddr) add_stub_r(LOADW_STUB,jaddr,out,i,temp2,i_regs,ccadj[i],reglist);
2324 inline_readstub(LOADW_STUB,i,(constmap[i][s]+offset)&0xFFFFFFFC,i_regs->regmap,FTEMP,ccadj[i],reglist);
2327 emit_andimm(temp,24,temp);
2328 #ifdef BIG_ENDIAN_MIPS
2329 if (opcode[i]==0x26) // LWR
2331 if (opcode[i]==0x22) // LWL
2333 emit_xorimm(temp,24,temp);
2334 emit_movimm(-1,HOST_TEMPREG);
2335 if (opcode[i]==0x26) {
2336 emit_shr(temp2,temp,temp2);
2337 emit_bic_lsr(tl,HOST_TEMPREG,temp,tl);
2339 emit_shl(temp2,temp,temp2);
2340 emit_bic_lsl(tl,HOST_TEMPREG,temp,tl);
2342 emit_or(temp2,tl,tl);
2344 //emit_storereg(rt1[i],tl); // DEBUG
2346 if (opcode[i]==0x1A||opcode[i]==0x1B) { // LDL/LDR
2350 #define loadlr_assemble loadlr_assemble_arm
2352 static void c2op_prologue(u_int op,u_int reglist)
2354 save_regs_all(reglist);
2357 emit_call((int)pcnt_gte_start);
2359 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0); // cop2 regs
2362 static void c2op_epilogue(u_int op,u_int reglist)
2366 emit_call((int)pcnt_gte_end);
2368 restore_regs_all(reglist);
2371 static void c2op_call_MACtoIR(int lm,int need_flags)
2374 emit_call(lm?gteMACtoIR_lm1:gteMACtoIR_lm0);
2376 emit_call(lm?gteMACtoIR_lm1_nf:gteMACtoIR_lm0_nf);
2379 static void c2op_call_rgb_func(void *func,int lm,int need_ir,int need_flags)
2382 // func is C code and trashes r0
2383 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
2384 if(need_flags||need_ir)
2385 c2op_call_MACtoIR(lm,need_flags);
2386 emit_call(need_flags?gteMACtoRGB:gteMACtoRGB_nf);
2389 static void c2op_assemble(int i,struct regstat *i_regs)
2391 u_int c2op=source[i]&0x3f;
2392 u_int hr,reglist_full=0,reglist;
2393 int need_flags,need_ir;
2394 for(hr=0;hr<HOST_REGS;hr++) {
2395 if(i_regs->regmap[hr]>=0) reglist_full|=1<<hr;
2397 reglist=reglist_full&CALLER_SAVE_REGS;
2399 if (gte_handlers[c2op]!=NULL) {
2400 need_flags=!(gte_unneeded[i+1]>>63); // +1 because of how liveness detection works
2401 need_ir=(gte_unneeded[i+1]&0xe00)!=0xe00;
2402 assem_debug("gte op %08x, unneeded %016llx, need_flags %d, need_ir %d\n",
2403 source[i],gte_unneeded[i+1],need_flags,need_ir);
2404 if(new_dynarec_hacks&NDHACK_GTE_NO_FLAGS)
2406 int shift = (source[i] >> 19) & 1;
2407 int lm = (source[i] >> 10) & 1;
2412 int v = (source[i] >> 15) & 3;
2413 int cv = (source[i] >> 13) & 3;
2414 int mx = (source[i] >> 17) & 3;
2415 reglist=reglist_full&(CALLER_SAVE_REGS|0xf0); // +{r4-r7}
2416 c2op_prologue(c2op,reglist);
2417 /* r4,r5 = VXYZ(v) packed; r6 = &MX11(mx); r7 = &CV1(cv) */
2421 emit_movzwl_indexed(9*4,0,4); // gteIR
2422 emit_movzwl_indexed(10*4,0,6);
2423 emit_movzwl_indexed(11*4,0,5);
2424 emit_orrshl_imm(6,16,4);
2427 emit_addimm(0,32*4+mx*8*4,6);
2429 emit_readword(&zeromem_ptr,6);
2431 emit_addimm(0,32*4+(cv*8+5)*4,7);
2433 emit_readword(&zeromem_ptr,7);
2435 emit_movimm(source[i],1); // opcode
2436 emit_call(gteMVMVA_part_neon);
2439 emit_call(gteMACtoIR_flags_neon);
2443 emit_call((int)gteMVMVA_part_cv3sh12_arm);
2445 emit_movimm(shift,1);
2446 emit_call((int)(need_flags?gteMVMVA_part_arm:gteMVMVA_part_nf_arm));
2448 if(need_flags||need_ir)
2449 c2op_call_MACtoIR(lm,need_flags);
2451 #else /* if not HAVE_ARMV5 */
2452 c2op_prologue(c2op,reglist);
2453 emit_movimm(source[i],1); // opcode
2454 emit_writeword(1,&psxRegs.code);
2455 emit_call((int)(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]));
2460 c2op_prologue(c2op,reglist);
2461 emit_call(shift?gteOP_part_shift:gteOP_part_noshift);
2462 if(need_flags||need_ir) {
2463 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
2464 c2op_call_MACtoIR(lm,need_flags);
2468 c2op_prologue(c2op,reglist);
2469 c2op_call_rgb_func(shift?gteDPCS_part_shift:gteDPCS_part_noshift,lm,need_ir,need_flags);
2472 c2op_prologue(c2op,reglist);
2473 c2op_call_rgb_func(shift?gteINTPL_part_shift:gteINTPL_part_noshift,lm,need_ir,need_flags);
2476 c2op_prologue(c2op,reglist);
2477 emit_call(shift?gteSQR_part_shift:gteSQR_part_noshift);
2478 if(need_flags||need_ir) {
2479 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
2480 c2op_call_MACtoIR(lm,need_flags);
2484 c2op_prologue(c2op,reglist);
2485 c2op_call_rgb_func(gteDCPL_part,lm,need_ir,need_flags);
2488 c2op_prologue(c2op,reglist);
2489 c2op_call_rgb_func(shift?gteGPF_part_shift:gteGPF_part_noshift,lm,need_ir,need_flags);
2492 c2op_prologue(c2op,reglist);
2493 c2op_call_rgb_func(shift?gteGPL_part_shift:gteGPL_part_noshift,lm,need_ir,need_flags);
2497 c2op_prologue(c2op,reglist);
2499 emit_movimm(source[i],1); // opcode
2500 emit_writeword(1,&psxRegs.code);
2502 emit_call(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]);
2505 c2op_epilogue(c2op,reglist);
2509 static void multdiv_assemble_arm(int i,struct regstat *i_regs)
2516 // case 0x1D: DMULTU
2521 if((opcode2[i]&4)==0) // 32-bit
2523 if(opcode2[i]==0x18) // MULT
2525 signed char m1=get_reg(i_regs->regmap,rs1[i]);
2526 signed char m2=get_reg(i_regs->regmap,rs2[i]);
2527 signed char hi=get_reg(i_regs->regmap,HIREG);
2528 signed char lo=get_reg(i_regs->regmap,LOREG);
2533 emit_smull(m1,m2,hi,lo);
2535 if(opcode2[i]==0x19) // MULTU
2537 signed char m1=get_reg(i_regs->regmap,rs1[i]);
2538 signed char m2=get_reg(i_regs->regmap,rs2[i]);
2539 signed char hi=get_reg(i_regs->regmap,HIREG);
2540 signed char lo=get_reg(i_regs->regmap,LOREG);
2545 emit_umull(m1,m2,hi,lo);
2547 if(opcode2[i]==0x1A) // DIV
2549 signed char d1=get_reg(i_regs->regmap,rs1[i]);
2550 signed char d2=get_reg(i_regs->regmap,rs2[i]);
2553 signed char quotient=get_reg(i_regs->regmap,LOREG);
2554 signed char remainder=get_reg(i_regs->regmap,HIREG);
2555 assert(quotient>=0);
2556 assert(remainder>=0);
2557 emit_movs(d1,remainder);
2558 emit_movimm(0xffffffff,quotient);
2559 emit_negmi(quotient,quotient); // .. quotient and ..
2560 emit_negmi(remainder,remainder); // .. remainder for div0 case (will be negated back after jump)
2561 emit_movs(d2,HOST_TEMPREG);
2562 emit_jeq((int)out+52); // Division by zero
2563 emit_negsmi(HOST_TEMPREG,HOST_TEMPREG);
2565 emit_clz(HOST_TEMPREG,quotient);
2566 emit_shl(HOST_TEMPREG,quotient,HOST_TEMPREG);
2568 emit_movimm(0,quotient);
2569 emit_addpl_imm(quotient,1,quotient);
2570 emit_lslpls_imm(HOST_TEMPREG,1,HOST_TEMPREG);
2571 emit_jns((int)out-2*4);
2573 emit_orimm(quotient,1<<31,quotient);
2574 emit_shr(quotient,quotient,quotient);
2575 emit_cmp(remainder,HOST_TEMPREG);
2576 emit_subcs(remainder,HOST_TEMPREG,remainder);
2577 emit_adcs(quotient,quotient,quotient);
2578 emit_shrimm(HOST_TEMPREG,1,HOST_TEMPREG);
2579 emit_jcc(out-16); // -4
2581 emit_negmi(quotient,quotient);
2583 emit_negmi(remainder,remainder);
2585 if(opcode2[i]==0x1B) // DIVU
2587 signed char d1=get_reg(i_regs->regmap,rs1[i]); // dividend
2588 signed char d2=get_reg(i_regs->regmap,rs2[i]); // divisor
2591 signed char quotient=get_reg(i_regs->regmap,LOREG);
2592 signed char remainder=get_reg(i_regs->regmap,HIREG);
2593 assert(quotient>=0);
2594 assert(remainder>=0);
2595 emit_mov(d1,remainder);
2596 emit_movimm(0xffffffff,quotient); // div0 case
2598 emit_jeq((int)out+40); // Division by zero
2600 emit_clz(d2,HOST_TEMPREG);
2601 emit_movimm(1<<31,quotient);
2602 emit_shl(d2,HOST_TEMPREG,d2);
2604 emit_movimm(0,HOST_TEMPREG);
2605 emit_addpl_imm(HOST_TEMPREG,1,HOST_TEMPREG);
2606 emit_lslpls_imm(d2,1,d2);
2607 emit_jns((int)out-2*4);
2608 emit_movimm(1<<31,quotient);
2610 emit_shr(quotient,HOST_TEMPREG,quotient);
2611 emit_cmp(remainder,d2);
2612 emit_subcs(remainder,d2,remainder);
2613 emit_adcs(quotient,quotient,quotient);
2614 emit_shrcc_imm(d2,1,d2);
2615 emit_jcc(out-16); // -4
2623 // Multiply by zero is zero.
2624 // MIPS does not have a divide by zero exception.
2625 // The result is undefined, we return zero.
2626 signed char hr=get_reg(i_regs->regmap,HIREG);
2627 signed char lr=get_reg(i_regs->regmap,LOREG);
2628 if(hr>=0) emit_zeroreg(hr);
2629 if(lr>=0) emit_zeroreg(lr);
2632 #define multdiv_assemble multdiv_assemble_arm
2634 static void do_preload_rhash(int r) {
2635 // Don't need this for ARM. On x86, this puts the value 0xf8 into the
2636 // register. On ARM the hash can be done with a single instruction (below)
2639 static void do_preload_rhtbl(int ht) {
2640 emit_addimm(FP,(int)&mini_ht-(int)&dynarec_local,ht);
2643 static void do_rhash(int rs,int rh) {
2644 emit_andimm(rs,0xf8,rh);
2647 static void do_miniht_load(int ht,int rh) {
2648 assem_debug("ldr %s,[%s,%s]!\n",regname[rh],regname[ht],regname[rh]);
2649 output_w32(0xe7b00000|rd_rn_rm(rh,ht,rh));
2652 static void do_miniht_jump(int rs,int rh,int ht) {
2654 emit_ldreq_indexed(ht,4,15);
2655 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
2657 emit_jmp(jump_vaddr_reg[7]);
2659 emit_jmp(jump_vaddr_reg[rs]);
2663 static void do_miniht_insert(u_int return_address,int rt,int temp) {
2665 emit_movimm(return_address,rt); // PC into link register
2666 add_to_linker(out,return_address,1);
2667 emit_pcreladdr(temp);
2668 emit_writeword(rt,&mini_ht[(return_address&0xFF)>>3][0]);
2669 emit_writeword(temp,&mini_ht[(return_address&0xFF)>>3][1]);
2671 emit_movw(return_address&0x0000FFFF,rt);
2672 add_to_linker(out,return_address,1);
2673 emit_pcreladdr(temp);
2674 emit_writeword(temp,&mini_ht[(return_address&0xFF)>>3][1]);
2675 emit_movt(return_address&0xFFFF0000,rt);
2676 emit_writeword(rt,&mini_ht[(return_address&0xFF)>>3][0]);
2680 static void mark_clear_cache(void *target)
2682 u_long offset = (u_char *)target - translation_cache;
2683 u_int mask = 1u << ((offset >> 12) & 31);
2684 if (!(needs_clear_cache[offset >> 17] & mask)) {
2685 char *start = (char *)((u_long)target & ~4095ul);
2686 start_tcache_write(start, start + 4096);
2687 needs_clear_cache[offset >> 17] |= mask;
2691 // Clearing the cache is rather slow on ARM Linux, so mark the areas
2692 // that need to be cleared, and then only clear these areas once.
2693 static void do_clear_cache()
2696 for (i=0;i<(1<<(TARGET_SIZE_2-17));i++)
2698 u_int bitmap=needs_clear_cache[i];
2700 u_char *start, *end;
2704 start=translation_cache+i*131072+j*4096;
2712 end_tcache_write(start, end);
2718 needs_clear_cache[i]=0;
2723 // CPU-architecture-specific initialization
2724 static void arch_init() {
2727 // vim:shiftwidth=2:expandtab