psxinterpreter: yet more exceptions, new config option
[pcsx_rearmed.git] / libpcsxcore / new_dynarec / assem_arm.c
... / ...
CommitLineData
1/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2 * Mupen64plus/PCSX - assem_arm.c *
3 * Copyright (C) 2009-2011 Ari64 *
4 * Copyright (C) 2010-2021 GraÅžvydas "notaz" Ignotas *
5 * *
6 * This program is free software; you can redistribute it and/or modify *
7 * it under the terms of the GNU General Public License as published by *
8 * the Free Software Foundation; either version 2 of the License, or *
9 * (at your option) any later version. *
10 * *
11 * This program is distributed in the hope that it will be useful, *
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
14 * GNU General Public License for more details. *
15 * *
16 * You should have received a copy of the GNU General Public License *
17 * along with this program; if not, write to the *
18 * Free Software Foundation, Inc., *
19 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
20 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
21
22#define FLAGLESS
23#include "../gte.h"
24#undef FLAGLESS
25#include "../gte_arm.h"
26#include "../gte_neon.h"
27#include "pcnt.h"
28#include "arm_features.h"
29
30#ifdef DRC_DBG
31#pragma GCC diagnostic ignored "-Wunused-function"
32#pragma GCC diagnostic ignored "-Wunused-variable"
33#pragma GCC diagnostic ignored "-Wunused-but-set-variable"
34#endif
35
36void indirect_jump_indexed();
37void indirect_jump();
38void do_interrupt();
39void jump_vaddr_r0();
40void jump_vaddr_r1();
41void jump_vaddr_r2();
42void jump_vaddr_r3();
43void jump_vaddr_r4();
44void jump_vaddr_r5();
45void jump_vaddr_r6();
46void jump_vaddr_r7();
47void jump_vaddr_r8();
48void jump_vaddr_r9();
49void jump_vaddr_r10();
50void jump_vaddr_r12();
51
52void * const jump_vaddr_reg[16] = {
53 jump_vaddr_r0,
54 jump_vaddr_r1,
55 jump_vaddr_r2,
56 jump_vaddr_r3,
57 jump_vaddr_r4,
58 jump_vaddr_r5,
59 jump_vaddr_r6,
60 jump_vaddr_r7,
61 jump_vaddr_r8,
62 jump_vaddr_r9,
63 jump_vaddr_r10,
64 0,
65 jump_vaddr_r12,
66 0,
67 0,
68 0
69};
70
71void invalidate_addr_r0();
72void invalidate_addr_r1();
73void invalidate_addr_r2();
74void invalidate_addr_r3();
75void invalidate_addr_r4();
76void invalidate_addr_r5();
77void invalidate_addr_r6();
78void invalidate_addr_r7();
79void invalidate_addr_r8();
80void invalidate_addr_r9();
81void invalidate_addr_r10();
82void invalidate_addr_r12();
83
84const u_int invalidate_addr_reg[16] = {
85 (int)invalidate_addr_r0,
86 (int)invalidate_addr_r1,
87 (int)invalidate_addr_r2,
88 (int)invalidate_addr_r3,
89 (int)invalidate_addr_r4,
90 (int)invalidate_addr_r5,
91 (int)invalidate_addr_r6,
92 (int)invalidate_addr_r7,
93 (int)invalidate_addr_r8,
94 (int)invalidate_addr_r9,
95 (int)invalidate_addr_r10,
96 0,
97 (int)invalidate_addr_r12,
98 0,
99 0,
100 0};
101
102/* Linker */
103
104static void set_jump_target(void *addr, void *target_)
105{
106 u_int target = (u_int)target_;
107 u_char *ptr = addr;
108 u_int *ptr2=(u_int *)ptr;
109 if(ptr[3]==0xe2) {
110 assert((target-(u_int)ptr2-8)<1024);
111 assert(((uintptr_t)addr&3)==0);
112 assert((target&3)==0);
113 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
114 //printf("target=%x addr=%p insn=%x\n",target,addr,*ptr2);
115 }
116 else if(ptr[3]==0x72) {
117 // generated by emit_jno_unlikely
118 if((target-(u_int)ptr2-8)<1024) {
119 assert(((uintptr_t)addr&3)==0);
120 assert((target&3)==0);
121 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
122 }
123 else if((target-(u_int)ptr2-8)<4096&&!((target-(u_int)ptr2-8)&15)) {
124 assert(((uintptr_t)addr&3)==0);
125 assert((target&3)==0);
126 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>4)|0xE00;
127 }
128 else *ptr2=(0x7A000000)|(((target-(u_int)ptr2-8)<<6)>>8);
129 }
130 else {
131 assert((ptr[3]&0x0e)==0xa);
132 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
133 }
134}
135
136// This optionally copies the instruction from the target of the branch into
137// the space before the branch. Works, but the difference in speed is
138// usually insignificant.
139#if 0
140static void set_jump_target_fillslot(int addr,u_int target,int copy)
141{
142 u_char *ptr=(u_char *)addr;
143 u_int *ptr2=(u_int *)ptr;
144 assert(!copy||ptr2[-1]==0xe28dd000);
145 if(ptr[3]==0xe2) {
146 assert(!copy);
147 assert((target-(u_int)ptr2-8)<4096);
148 *ptr2=(*ptr2&0xFFFFF000)|(target-(u_int)ptr2-8);
149 }
150 else {
151 assert((ptr[3]&0x0e)==0xa);
152 u_int target_insn=*(u_int *)target;
153 if((target_insn&0x0e100000)==0) { // ALU, no immediate, no flags
154 copy=0;
155 }
156 if((target_insn&0x0c100000)==0x04100000) { // Load
157 copy=0;
158 }
159 if(target_insn&0x08000000) {
160 copy=0;
161 }
162 if(copy) {
163 ptr2[-1]=target_insn;
164 target+=4;
165 }
166 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
167 }
168}
169#endif
170
171/* Literal pool */
172static void add_literal(int addr,int val)
173{
174 assert(literalcount<sizeof(literals)/sizeof(literals[0]));
175 literals[literalcount][0]=addr;
176 literals[literalcount][1]=val;
177 literalcount++;
178}
179
180// from a pointer to external jump stub (which was produced by emit_extjump2)
181// find where the jumping insn is
182static void *find_extjump_insn(void *stub)
183{
184 int *ptr=(int *)(stub+4);
185 assert((*ptr&0x0fff0000)==0x059f0000); // ldr rx, [pc, #ofs]
186 u_int offset=*ptr&0xfff;
187 void **l_ptr=(void *)ptr+offset+8;
188 return *l_ptr;
189}
190
191// find where external branch is liked to using addr of it's stub:
192// get address that insn one after stub loads (dyna_linker arg1),
193// treat it as a pointer to branch insn,
194// return addr where that branch jumps to
195#if 0
196static void *get_pointer(void *stub)
197{
198 //printf("get_pointer(%x)\n",(int)stub);
199 int *i_ptr=find_extjump_insn(stub);
200 assert((*i_ptr&0x0f000000)==0x0a000000); // b
201 return (u_char *)i_ptr+((*i_ptr<<8)>>6)+8;
202}
203#endif
204
205// Allocate a specific ARM register.
206static void alloc_arm_reg(struct regstat *cur,int i,signed char reg,int hr)
207{
208 int n;
209 int dirty=0;
210
211 // see if it's already allocated (and dealloc it)
212 for(n=0;n<HOST_REGS;n++)
213 {
214 if(n!=EXCLUDE_REG&&cur->regmap[n]==reg) {
215 dirty=(cur->dirty>>n)&1;
216 cur->regmap[n]=-1;
217 }
218 }
219
220 cur->regmap[hr]=reg;
221 cur->dirty&=~(1<<hr);
222 cur->dirty|=dirty<<hr;
223 cur->isconst&=~(1<<hr);
224}
225
226// Alloc cycle count into dedicated register
227static void alloc_cc(struct regstat *cur,int i)
228{
229 alloc_arm_reg(cur,i,CCREG,HOST_CCREG);
230}
231
232/* Assembler */
233
234static unused char regname[16][4] = {
235 "r0",
236 "r1",
237 "r2",
238 "r3",
239 "r4",
240 "r5",
241 "r6",
242 "r7",
243 "r8",
244 "r9",
245 "r10",
246 "fp",
247 "r12",
248 "sp",
249 "lr",
250 "pc"};
251
252static void output_w32(u_int word)
253{
254 *((u_int *)out)=word;
255 out+=4;
256}
257
258static u_int rd_rn_rm(u_int rd, u_int rn, u_int rm)
259{
260 assert(rd<16);
261 assert(rn<16);
262 assert(rm<16);
263 return((rn<<16)|(rd<<12)|rm);
264}
265
266static u_int rd_rn_imm_shift(u_int rd, u_int rn, u_int imm, u_int shift)
267{
268 assert(rd<16);
269 assert(rn<16);
270 assert(imm<256);
271 assert((shift&1)==0);
272 return((rn<<16)|(rd<<12)|(((32-shift)&30)<<7)|imm);
273}
274
275static u_int genimm(u_int imm,u_int *encoded)
276{
277 *encoded=0;
278 if(imm==0) return 1;
279 int i=32;
280 while(i>0)
281 {
282 if(imm<256) {
283 *encoded=((i&30)<<7)|imm;
284 return 1;
285 }
286 imm=(imm>>2)|(imm<<30);i-=2;
287 }
288 return 0;
289}
290
291static void genimm_checked(u_int imm,u_int *encoded)
292{
293 u_int ret=genimm(imm,encoded);
294 assert(ret);
295 (void)ret;
296}
297
298static u_int genjmp(u_int addr)
299{
300 if (addr < 3) return 0; // a branch that will be patched later
301 int offset = addr-(int)out-8;
302 if (offset < -33554432 || offset >= 33554432) {
303 SysPrintf("genjmp: out of range: %08x\n", offset);
304 abort();
305 return 0;
306 }
307 return ((u_int)offset>>2)&0xffffff;
308}
309
310static unused void emit_breakpoint(void)
311{
312 assem_debug("bkpt #0\n");
313 //output_w32(0xe1200070);
314 output_w32(0xe7f001f0);
315}
316
317static void emit_mov(int rs,int rt)
318{
319 assem_debug("mov %s,%s\n",regname[rt],regname[rs]);
320 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs));
321}
322
323static void emit_movs(int rs,int rt)
324{
325 assem_debug("movs %s,%s\n",regname[rt],regname[rs]);
326 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs));
327}
328
329static void emit_add(int rs1,int rs2,int rt)
330{
331 assem_debug("add %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
332 output_w32(0xe0800000|rd_rn_rm(rt,rs1,rs2));
333}
334
335static void emit_adds(int rs1,int rs2,int rt)
336{
337 assem_debug("adds %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
338 output_w32(0xe0900000|rd_rn_rm(rt,rs1,rs2));
339}
340#define emit_adds_ptr emit_adds
341
342static void emit_adcs(int rs1,int rs2,int rt)
343{
344 assem_debug("adcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
345 output_w32(0xe0b00000|rd_rn_rm(rt,rs1,rs2));
346}
347
348static void emit_neg(int rs, int rt)
349{
350 assem_debug("rsb %s,%s,#0\n",regname[rt],regname[rs]);
351 output_w32(0xe2600000|rd_rn_rm(rt,rs,0));
352}
353
354static void emit_sub(int rs1,int rs2,int rt)
355{
356 assem_debug("sub %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
357 output_w32(0xe0400000|rd_rn_rm(rt,rs1,rs2));
358}
359
360static void emit_zeroreg(int rt)
361{
362 assem_debug("mov %s,#0\n",regname[rt]);
363 output_w32(0xe3a00000|rd_rn_rm(rt,0,0));
364}
365
366static void emit_loadlp(u_int imm,u_int rt)
367{
368 add_literal((int)out,imm);
369 assem_debug("ldr %s,pc+? [=%x]\n",regname[rt],imm);
370 output_w32(0xe5900000|rd_rn_rm(rt,15,0));
371}
372
373#ifdef HAVE_ARMV7
374static void emit_movw(u_int imm,u_int rt)
375{
376 assert(imm<65536);
377 assem_debug("movw %s,#%d (0x%x)\n",regname[rt],imm,imm);
378 output_w32(0xe3000000|rd_rn_rm(rt,0,0)|(imm&0xfff)|((imm<<4)&0xf0000));
379}
380
381static void emit_movt(u_int imm,u_int rt)
382{
383 assem_debug("movt %s,#%d (0x%x)\n",regname[rt],imm&0xffff0000,imm&0xffff0000);
384 output_w32(0xe3400000|rd_rn_rm(rt,0,0)|((imm>>16)&0xfff)|((imm>>12)&0xf0000));
385}
386#endif
387
388static void emit_movimm(u_int imm,u_int rt)
389{
390 u_int armval;
391 if(genimm(imm,&armval)) {
392 assem_debug("mov %s,#%d\n",regname[rt],imm);
393 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
394 }else if(genimm(~imm,&armval)) {
395 assem_debug("mvn %s,#%d\n",regname[rt],imm);
396 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
397 }else if(imm<65536) {
398 #ifndef HAVE_ARMV7
399 assem_debug("mov %s,#%d\n",regname[rt],imm&0xFF00);
400 output_w32(0xe3a00000|rd_rn_imm_shift(rt,0,imm>>8,8));
401 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
402 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
403 #else
404 emit_movw(imm,rt);
405 #endif
406 }else{
407 #ifndef HAVE_ARMV7
408 emit_loadlp(imm,rt);
409 #else
410 emit_movw(imm&0x0000FFFF,rt);
411 emit_movt(imm&0xFFFF0000,rt);
412 #endif
413 }
414}
415
416static void emit_pcreladdr(u_int rt)
417{
418 assem_debug("add %s,pc,#?\n",regname[rt]);
419 output_w32(0xe2800000|rd_rn_rm(rt,15,0));
420}
421
422static void emit_loadreg(int r, int hr)
423{
424 assert(hr != EXCLUDE_REG);
425 if (r == 0)
426 emit_zeroreg(hr);
427 else {
428 void *addr;
429 switch (r) {
430 //case HIREG: addr = &hi; break;
431 //case LOREG: addr = &lo; break;
432 case CCREG: addr = &cycle_count; break;
433 case CSREG: addr = &psxRegs.CP0.n.SR; break;
434 case INVCP: addr = &invc_ptr; break;
435 case ROREG: addr = &ram_offset; break;
436 default:
437 assert(r < 34);
438 addr = &psxRegs.GPR.r[r];
439 break;
440 }
441 u_int offset = (u_char *)addr - (u_char *)&dynarec_local;
442 assert(offset<4096);
443 assem_debug("ldr %s,fp+%d # r%d\n",regname[hr],offset,r);
444 output_w32(0xe5900000|rd_rn_rm(hr,FP,0)|offset);
445 }
446}
447
448static void emit_storereg(int r, int hr)
449{
450 assert(hr != EXCLUDE_REG);
451 int addr = (int)&psxRegs.GPR.r[r];
452 switch (r) {
453 //case HIREG: addr = &hi; break;
454 //case LOREG: addr = &lo; break;
455 case CCREG: addr = (int)&cycle_count; break;
456 default: assert(r < 34); break;
457 }
458 u_int offset = addr-(u_int)&dynarec_local;
459 assert(offset<4096);
460 assem_debug("str %s,fp+%d # r%d\n",regname[hr],offset,r);
461 output_w32(0xe5800000|rd_rn_rm(hr,FP,0)|offset);
462}
463
464static void emit_test(int rs, int rt)
465{
466 assem_debug("tst %s,%s\n",regname[rs],regname[rt]);
467 output_w32(0xe1100000|rd_rn_rm(0,rs,rt));
468}
469
470static void emit_testimm(int rs,int imm)
471{
472 u_int armval;
473 assem_debug("tst %s,#%d\n",regname[rs],imm);
474 genimm_checked(imm,&armval);
475 output_w32(0xe3100000|rd_rn_rm(0,rs,0)|armval);
476}
477
478static void emit_testeqimm(int rs,int imm)
479{
480 u_int armval;
481 assem_debug("tsteq %s,$%d\n",regname[rs],imm);
482 genimm_checked(imm,&armval);
483 output_w32(0x03100000|rd_rn_rm(0,rs,0)|armval);
484}
485
486static void emit_not(int rs,int rt)
487{
488 assem_debug("mvn %s,%s\n",regname[rt],regname[rs]);
489 output_w32(0xe1e00000|rd_rn_rm(rt,0,rs));
490}
491
492static void emit_and(u_int rs1,u_int rs2,u_int rt)
493{
494 assem_debug("and %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
495 output_w32(0xe0000000|rd_rn_rm(rt,rs1,rs2));
496}
497
498static void emit_or(u_int rs1,u_int rs2,u_int rt)
499{
500 assem_debug("orr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
501 output_w32(0xe1800000|rd_rn_rm(rt,rs1,rs2));
502}
503
504static void emit_orrshl_imm(u_int rs,u_int imm,u_int rt)
505{
506 assert(rs<16);
507 assert(rt<16);
508 assert(imm<32);
509 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs],imm);
510 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|(imm<<7));
511}
512
513static void emit_orrshr_imm(u_int rs,u_int imm,u_int rt)
514{
515 assert(rs<16);
516 assert(rt<16);
517 assert(imm<32);
518 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs],imm);
519 output_w32(0xe1800020|rd_rn_rm(rt,rt,rs)|(imm<<7));
520}
521
522static void emit_xor(u_int rs1,u_int rs2,u_int rt)
523{
524 assem_debug("eor %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
525 output_w32(0xe0200000|rd_rn_rm(rt,rs1,rs2));
526}
527
528static void emit_xorsar_imm(u_int rs1,u_int rs2,u_int imm,u_int rt)
529{
530 assem_debug("eor %s,%s,%s,asr #%d\n",regname[rt],regname[rs1],regname[rs2],imm);
531 output_w32(0xe0200040|rd_rn_rm(rt,rs1,rs2)|(imm<<7));
532}
533
534static void emit_addimm(u_int rs,int imm,u_int rt)
535{
536 assert(rs<16);
537 assert(rt<16);
538 if(imm!=0) {
539 u_int armval;
540 if(genimm(imm,&armval)) {
541 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],imm);
542 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
543 }else if(genimm(-imm,&armval)) {
544 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],-imm);
545 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
546 #ifdef HAVE_ARMV7
547 }else if(rt!=rs&&(u_int)imm<65536) {
548 emit_movw(imm&0x0000ffff,rt);
549 emit_add(rs,rt,rt);
550 }else if(rt!=rs&&(u_int)-imm<65536) {
551 emit_movw(-imm&0x0000ffff,rt);
552 emit_sub(rs,rt,rt);
553 #endif
554 }else if((u_int)-imm<65536) {
555 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],(-imm)&0xFF00);
556 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
557 output_w32(0xe2400000|rd_rn_imm_shift(rt,rs,(-imm)>>8,8));
558 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
559 }else {
560 do {
561 int shift = (ffs(imm) - 1) & ~1;
562 int imm8 = imm & (0xff << shift);
563 genimm_checked(imm8,&armval);
564 assem_debug("add %s,%s,#0x%x\n",regname[rt],regname[rs],imm8);
565 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
566 rs = rt;
567 imm &= ~imm8;
568 }
569 while (imm != 0);
570 }
571 }
572 else if(rs!=rt) emit_mov(rs,rt);
573}
574
575static void emit_addimm_ptr(u_int rs, uintptr_t imm, u_int rt)
576{
577 emit_addimm(rs, imm, rt);
578}
579
580static void emit_addimm_and_set_flags(int imm,int rt)
581{
582 assert(imm>-65536&&imm<65536);
583 u_int armval;
584 if(genimm(imm,&armval)) {
585 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm);
586 output_w32(0xe2900000|rd_rn_rm(rt,rt,0)|armval);
587 }else if(genimm(-imm,&armval)) {
588 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],imm);
589 output_w32(0xe2500000|rd_rn_rm(rt,rt,0)|armval);
590 }else if(imm<0) {
591 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF00);
592 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
593 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)>>8,8));
594 output_w32(0xe2500000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
595 }else{
596 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF00);
597 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
598 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm>>8,8));
599 output_w32(0xe2900000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
600 }
601}
602
603static void emit_addnop(u_int r)
604{
605 assert(r<16);
606 assem_debug("add %s,%s,#0 (nop)\n",regname[r],regname[r]);
607 output_w32(0xe2800000|rd_rn_rm(r,r,0));
608}
609
610static void emit_andimm(int rs,int imm,int rt)
611{
612 u_int armval;
613 if(imm==0) {
614 emit_zeroreg(rt);
615 }else if(genimm(imm,&armval)) {
616 assem_debug("and %s,%s,#%d\n",regname[rt],regname[rs],imm);
617 output_w32(0xe2000000|rd_rn_rm(rt,rs,0)|armval);
618 }else if(genimm(~imm,&armval)) {
619 assem_debug("bic %s,%s,#%d\n",regname[rt],regname[rs],imm);
620 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|armval);
621 }else if(imm==65535) {
622 #ifndef HAVE_ARMV6
623 assem_debug("bic %s,%s,#FF000000\n",regname[rt],regname[rs]);
624 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|0x4FF);
625 assem_debug("bic %s,%s,#00FF0000\n",regname[rt],regname[rt]);
626 output_w32(0xe3c00000|rd_rn_rm(rt,rt,0)|0x8FF);
627 #else
628 assem_debug("uxth %s,%s\n",regname[rt],regname[rs]);
629 output_w32(0xe6ff0070|rd_rn_rm(rt,0,rs));
630 #endif
631 }else{
632 assert(imm>0&&imm<65535);
633 #ifndef HAVE_ARMV7
634 assem_debug("mov r14,#%d\n",imm&0xFF00);
635 output_w32(0xe3a00000|rd_rn_imm_shift(HOST_TEMPREG,0,imm>>8,8));
636 assem_debug("add r14,r14,#%d\n",imm&0xFF);
637 output_w32(0xe2800000|rd_rn_imm_shift(HOST_TEMPREG,HOST_TEMPREG,imm&0xff,0));
638 #else
639 emit_movw(imm,HOST_TEMPREG);
640 #endif
641 assem_debug("and %s,%s,r14\n",regname[rt],regname[rs]);
642 output_w32(0xe0000000|rd_rn_rm(rt,rs,HOST_TEMPREG));
643 }
644}
645
646static void emit_orimm(int rs,int imm,int rt)
647{
648 u_int armval;
649 if(imm==0) {
650 if(rs!=rt) emit_mov(rs,rt);
651 }else if(genimm(imm,&armval)) {
652 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm);
653 output_w32(0xe3800000|rd_rn_rm(rt,rs,0)|armval);
654 }else{
655 assert(imm>0&&imm<65536);
656 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
657 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
658 output_w32(0xe3800000|rd_rn_imm_shift(rt,rs,imm>>8,8));
659 output_w32(0xe3800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
660 }
661}
662
663static void emit_xorimm(int rs,int imm,int rt)
664{
665 u_int armval;
666 if(imm==0) {
667 if(rs!=rt) emit_mov(rs,rt);
668 }else if(genimm(imm,&armval)) {
669 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm);
670 output_w32(0xe2200000|rd_rn_rm(rt,rs,0)|armval);
671 }else{
672 assert(imm>0&&imm<65536);
673 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
674 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
675 output_w32(0xe2200000|rd_rn_imm_shift(rt,rs,imm>>8,8));
676 output_w32(0xe2200000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
677 }
678}
679
680static void emit_shlimm(int rs,u_int imm,int rt)
681{
682 assert(imm>0);
683 assert(imm<32);
684 //if(imm==1) ...
685 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
686 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
687}
688
689static void emit_lsls_imm(int rs,int imm,int rt)
690{
691 assert(imm>0);
692 assert(imm<32);
693 assem_debug("lsls %s,%s,#%d\n",regname[rt],regname[rs],imm);
694 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs)|(imm<<7));
695}
696
697static unused void emit_lslpls_imm(int rs,int imm,int rt)
698{
699 assert(imm>0);
700 assert(imm<32);
701 assem_debug("lslpls %s,%s,#%d\n",regname[rt],regname[rs],imm);
702 output_w32(0x51b00000|rd_rn_rm(rt,0,rs)|(imm<<7));
703}
704
705static void emit_shrimm(int rs,u_int imm,int rt)
706{
707 assert(imm>0);
708 assert(imm<32);
709 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
710 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
711}
712
713static void emit_sarimm(int rs,u_int imm,int rt)
714{
715 assert(imm>0);
716 assert(imm<32);
717 assem_debug("asr %s,%s,#%d\n",regname[rt],regname[rs],imm);
718 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x40|(imm<<7));
719}
720
721static void emit_rorimm(int rs,u_int imm,int rt)
722{
723 assert(imm>0);
724 assert(imm<32);
725 assem_debug("ror %s,%s,#%d\n",regname[rt],regname[rs],imm);
726 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x60|(imm<<7));
727}
728
729static void emit_signextend16(int rs,int rt)
730{
731 #ifndef HAVE_ARMV6
732 emit_shlimm(rs,16,rt);
733 emit_sarimm(rt,16,rt);
734 #else
735 assem_debug("sxth %s,%s\n",regname[rt],regname[rs]);
736 output_w32(0xe6bf0070|rd_rn_rm(rt,0,rs));
737 #endif
738}
739
740static void emit_signextend8(int rs,int rt)
741{
742 #ifndef HAVE_ARMV6
743 emit_shlimm(rs,24,rt);
744 emit_sarimm(rt,24,rt);
745 #else
746 assem_debug("sxtb %s,%s\n",regname[rt],regname[rs]);
747 output_w32(0xe6af0070|rd_rn_rm(rt,0,rs));
748 #endif
749}
750
751static void emit_shl(u_int rs,u_int shift,u_int rt)
752{
753 assert(rs<16);
754 assert(rt<16);
755 assert(shift<16);
756 //if(imm==1) ...
757 assem_debug("lsl %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
758 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x10|(shift<<8));
759}
760
761static void emit_shr(u_int rs,u_int shift,u_int rt)
762{
763 assert(rs<16);
764 assert(rt<16);
765 assert(shift<16);
766 assem_debug("lsr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
767 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x30|(shift<<8));
768}
769
770static void emit_sar(u_int rs,u_int shift,u_int rt)
771{
772 assert(rs<16);
773 assert(rt<16);
774 assert(shift<16);
775 assem_debug("asr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
776 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x50|(shift<<8));
777}
778
779static unused void emit_orrshl(u_int rs,u_int shift,u_int rt)
780{
781 assert(rs<16);
782 assert(rt<16);
783 assert(shift<16);
784 assem_debug("orr %s,%s,%s,lsl %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
785 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x10|(shift<<8));
786}
787
788static unused void emit_orrshr(u_int rs,u_int shift,u_int rt)
789{
790 assert(rs<16);
791 assert(rt<16);
792 assert(shift<16);
793 assem_debug("orr %s,%s,%s,lsr %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
794 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x30|(shift<<8));
795}
796
797static void emit_cmpimm(int rs,int imm)
798{
799 u_int armval;
800 if(genimm(imm,&armval)) {
801 assem_debug("cmp %s,#%d\n",regname[rs],imm);
802 output_w32(0xe3500000|rd_rn_rm(0,rs,0)|armval);
803 }else if(genimm(-imm,&armval)) {
804 assem_debug("cmn %s,#%d\n",regname[rs],imm);
805 output_w32(0xe3700000|rd_rn_rm(0,rs,0)|armval);
806 }else if(imm>0) {
807 assert(imm<65536);
808 emit_movimm(imm,HOST_TEMPREG);
809 assem_debug("cmp %s,r14\n",regname[rs]);
810 output_w32(0xe1500000|rd_rn_rm(0,rs,HOST_TEMPREG));
811 }else{
812 assert(imm>-65536);
813 emit_movimm(-imm,HOST_TEMPREG);
814 assem_debug("cmn %s,r14\n",regname[rs]);
815 output_w32(0xe1700000|rd_rn_rm(0,rs,HOST_TEMPREG));
816 }
817}
818
819static void emit_cmovne_imm(int imm,int rt)
820{
821 assem_debug("movne %s,#%d\n",regname[rt],imm);
822 u_int armval;
823 genimm_checked(imm,&armval);
824 output_w32(0x13a00000|rd_rn_rm(rt,0,0)|armval);
825}
826
827static void emit_cmovl_imm(int imm,int rt)
828{
829 assem_debug("movlt %s,#%d\n",regname[rt],imm);
830 u_int armval;
831 genimm_checked(imm,&armval);
832 output_w32(0xb3a00000|rd_rn_rm(rt,0,0)|armval);
833}
834
835static void emit_cmovb_imm(int imm,int rt)
836{
837 assem_debug("movcc %s,#%d\n",regname[rt],imm);
838 u_int armval;
839 genimm_checked(imm,&armval);
840 output_w32(0x33a00000|rd_rn_rm(rt,0,0)|armval);
841}
842
843static void emit_cmovae_imm(int imm,int rt)
844{
845 assem_debug("movcs %s,#%d\n",regname[rt],imm);
846 u_int armval;
847 genimm_checked(imm,&armval);
848 output_w32(0x23a00000|rd_rn_rm(rt,0,0)|armval);
849}
850
851static void emit_cmovs_imm(int imm,int rt)
852{
853 assem_debug("movmi %s,#%d\n",regname[rt],imm);
854 u_int armval;
855 genimm_checked(imm,&armval);
856 output_w32(0x43a00000|rd_rn_rm(rt,0,0)|armval);
857}
858
859static void emit_cmovne_reg(int rs,int rt)
860{
861 assem_debug("movne %s,%s\n",regname[rt],regname[rs]);
862 output_w32(0x11a00000|rd_rn_rm(rt,0,rs));
863}
864
865static void emit_cmovl_reg(int rs,int rt)
866{
867 assem_debug("movlt %s,%s\n",regname[rt],regname[rs]);
868 output_w32(0xb1a00000|rd_rn_rm(rt,0,rs));
869}
870
871static void emit_cmovb_reg(int rs,int rt)
872{
873 assem_debug("movcc %s,%s\n",regname[rt],regname[rs]);
874 output_w32(0x31a00000|rd_rn_rm(rt,0,rs));
875}
876
877static void emit_cmovs_reg(int rs,int rt)
878{
879 assem_debug("movmi %s,%s\n",regname[rt],regname[rs]);
880 output_w32(0x41a00000|rd_rn_rm(rt,0,rs));
881}
882
883static void emit_slti32(int rs,int imm,int rt)
884{
885 if(rs!=rt) emit_zeroreg(rt);
886 emit_cmpimm(rs,imm);
887 if(rs==rt) emit_movimm(0,rt);
888 emit_cmovl_imm(1,rt);
889}
890
891static void emit_sltiu32(int rs,int imm,int rt)
892{
893 if(rs!=rt) emit_zeroreg(rt);
894 emit_cmpimm(rs,imm);
895 if(rs==rt) emit_movimm(0,rt);
896 emit_cmovb_imm(1,rt);
897}
898
899static void emit_cmp(int rs,int rt)
900{
901 assem_debug("cmp %s,%s\n",regname[rs],regname[rt]);
902 output_w32(0xe1500000|rd_rn_rm(0,rs,rt));
903}
904
905static void emit_cmpcs(int rs,int rt)
906{
907 assem_debug("cmpcs %s,%s\n",regname[rs],regname[rt]);
908 output_w32(0x21500000|rd_rn_rm(0,rs,rt));
909}
910
911static void emit_set_gz32(int rs, int rt)
912{
913 //assem_debug("set_gz32\n");
914 emit_cmpimm(rs,1);
915 emit_movimm(1,rt);
916 emit_cmovl_imm(0,rt);
917}
918
919static void emit_set_nz32(int rs, int rt)
920{
921 //assem_debug("set_nz32\n");
922 if(rs!=rt) emit_movs(rs,rt);
923 else emit_test(rs,rs);
924 emit_cmovne_imm(1,rt);
925}
926
927static void emit_set_if_less32(int rs1, int rs2, int rt)
928{
929 //assem_debug("set if less (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
930 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
931 emit_cmp(rs1,rs2);
932 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
933 emit_cmovl_imm(1,rt);
934}
935
936static void emit_set_if_carry32(int rs1, int rs2, int rt)
937{
938 //assem_debug("set if carry (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
939 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
940 emit_cmp(rs1,rs2);
941 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
942 emit_cmovb_imm(1,rt);
943}
944
945static int can_jump_or_call(const void *a)
946{
947 intptr_t offset = (u_char *)a - out - 8;
948 return (-33554432 <= offset && offset < 33554432);
949}
950
951static void emit_call(const void *a_)
952{
953 int a = (int)a_;
954 assem_debug("bl %x (%x+%x)%s\n",a,(int)out,a-(int)out-8,func_name(a_));
955 u_int offset=genjmp(a);
956 output_w32(0xeb000000|offset);
957}
958
959static void emit_jmp(const void *a_)
960{
961 int a = (int)a_;
962 assem_debug("b %x (%x+%x)%s\n",a,(int)out,a-(int)out-8,func_name(a_));
963 u_int offset=genjmp(a);
964 output_w32(0xea000000|offset);
965}
966
967static void emit_jne(const void *a_)
968{
969 int a = (int)a_;
970 assem_debug("bne %x\n",a);
971 u_int offset=genjmp(a);
972 output_w32(0x1a000000|offset);
973}
974
975static void emit_jeq(const void *a_)
976{
977 int a = (int)a_;
978 assem_debug("beq %x\n",a);
979 u_int offset=genjmp(a);
980 output_w32(0x0a000000|offset);
981}
982
983static void emit_js(const void *a_)
984{
985 int a = (int)a_;
986 assem_debug("bmi %x\n",a);
987 u_int offset=genjmp(a);
988 output_w32(0x4a000000|offset);
989}
990
991static void emit_jns(const void *a_)
992{
993 int a = (int)a_;
994 assem_debug("bpl %x\n",a);
995 u_int offset=genjmp(a);
996 output_w32(0x5a000000|offset);
997}
998
999static void emit_jl(const void *a_)
1000{
1001 int a = (int)a_;
1002 assem_debug("blt %x\n",a);
1003 u_int offset=genjmp(a);
1004 output_w32(0xba000000|offset);
1005}
1006
1007static void emit_jge(const void *a_)
1008{
1009 int a = (int)a_;
1010 assem_debug("bge %x\n",a);
1011 u_int offset=genjmp(a);
1012 output_w32(0xaa000000|offset);
1013}
1014
1015static void emit_jno(const void *a_)
1016{
1017 int a = (int)a_;
1018 assem_debug("bvc %x\n",a);
1019 u_int offset=genjmp(a);
1020 output_w32(0x7a000000|offset);
1021}
1022
1023static void emit_jc(const void *a_)
1024{
1025 int a = (int)a_;
1026 assem_debug("bcs %x\n",a);
1027 u_int offset=genjmp(a);
1028 output_w32(0x2a000000|offset);
1029}
1030
1031static void emit_jcc(const void *a_)
1032{
1033 int a = (int)a_;
1034 assem_debug("bcc %x\n",a);
1035 u_int offset=genjmp(a);
1036 output_w32(0x3a000000|offset);
1037}
1038
1039static void *emit_cbz(int rs, const void *a)
1040{
1041 void *ret;
1042 emit_test(rs, rs);
1043 ret = out;
1044 emit_jeq(a);
1045 return ret;
1046}
1047
1048static unused void emit_callreg(u_int r)
1049{
1050 assert(r<15);
1051 assem_debug("blx %s\n",regname[r]);
1052 output_w32(0xe12fff30|r);
1053}
1054
1055static void emit_jmpreg(u_int r)
1056{
1057 assem_debug("mov pc,%s\n",regname[r]);
1058 output_w32(0xe1a00000|rd_rn_rm(15,0,r));
1059}
1060
1061static void emit_ret(void)
1062{
1063 emit_jmpreg(14);
1064}
1065
1066static void emit_readword_indexed(int offset, int rs, int rt)
1067{
1068 assert(offset>-4096&&offset<4096);
1069 assem_debug("ldr %s,%s+%d\n",regname[rt],regname[rs],offset);
1070 if(offset>=0) {
1071 output_w32(0xe5900000|rd_rn_rm(rt,rs,0)|offset);
1072 }else{
1073 output_w32(0xe5100000|rd_rn_rm(rt,rs,0)|(-offset));
1074 }
1075}
1076
1077static void emit_readword_dualindexedx4(int rs1, int rs2, int rt)
1078{
1079 assem_debug("ldr %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1080 output_w32(0xe7900000|rd_rn_rm(rt,rs1,rs2)|0x100);
1081}
1082#define emit_readptr_dualindexedx_ptrlen emit_readword_dualindexedx4
1083
1084static void emit_ldr_dualindexed(int rs1, int rs2, int rt)
1085{
1086 assem_debug("ldr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1087 output_w32(0xe7900000|rd_rn_rm(rt,rs1,rs2));
1088}
1089
1090static void emit_ldrcc_dualindexed(int rs1, int rs2, int rt)
1091{
1092 assem_debug("ldrcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1093 output_w32(0x37900000|rd_rn_rm(rt,rs1,rs2));
1094}
1095
1096static void emit_ldrb_dualindexed(int rs1, int rs2, int rt)
1097{
1098 assem_debug("ldrb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1099 output_w32(0xe7d00000|rd_rn_rm(rt,rs1,rs2));
1100}
1101
1102static void emit_ldrccb_dualindexed(int rs1, int rs2, int rt)
1103{
1104 assem_debug("ldrccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1105 output_w32(0x37d00000|rd_rn_rm(rt,rs1,rs2));
1106}
1107
1108static void emit_ldrsb_dualindexed(int rs1, int rs2, int rt)
1109{
1110 assem_debug("ldrsb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1111 output_w32(0xe19000d0|rd_rn_rm(rt,rs1,rs2));
1112}
1113
1114static void emit_ldrccsb_dualindexed(int rs1, int rs2, int rt)
1115{
1116 assem_debug("ldrccsb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1117 output_w32(0x319000d0|rd_rn_rm(rt,rs1,rs2));
1118}
1119
1120static void emit_ldrh_dualindexed(int rs1, int rs2, int rt)
1121{
1122 assem_debug("ldrh %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1123 output_w32(0xe19000b0|rd_rn_rm(rt,rs1,rs2));
1124}
1125
1126static void emit_ldrcch_dualindexed(int rs1, int rs2, int rt)
1127{
1128 assem_debug("ldrcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1129 output_w32(0x319000b0|rd_rn_rm(rt,rs1,rs2));
1130}
1131
1132static void emit_ldrsh_dualindexed(int rs1, int rs2, int rt)
1133{
1134 assem_debug("ldrsh %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1135 output_w32(0xe19000f0|rd_rn_rm(rt,rs1,rs2));
1136}
1137
1138static void emit_ldrccsh_dualindexed(int rs1, int rs2, int rt)
1139{
1140 assem_debug("ldrccsh %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1141 output_w32(0x319000f0|rd_rn_rm(rt,rs1,rs2));
1142}
1143
1144static void emit_str_dualindexed(int rs1, int rs2, int rt)
1145{
1146 assem_debug("str %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1147 output_w32(0xe7800000|rd_rn_rm(rt,rs1,rs2));
1148}
1149
1150static void emit_strb_dualindexed(int rs1, int rs2, int rt)
1151{
1152 assem_debug("strb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1153 output_w32(0xe7c00000|rd_rn_rm(rt,rs1,rs2));
1154}
1155
1156static void emit_strh_dualindexed(int rs1, int rs2, int rt)
1157{
1158 assem_debug("strh %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1159 output_w32(0xe18000b0|rd_rn_rm(rt,rs1,rs2));
1160}
1161
1162static void emit_movsbl_indexed(int offset, int rs, int rt)
1163{
1164 assert(offset>-256&&offset<256);
1165 assem_debug("ldrsb %s,%s+%d\n",regname[rt],regname[rs],offset);
1166 if(offset>=0) {
1167 output_w32(0xe1d000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1168 }else{
1169 output_w32(0xe15000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1170 }
1171}
1172
1173static void emit_movswl_indexed(int offset, int rs, int rt)
1174{
1175 assert(offset>-256&&offset<256);
1176 assem_debug("ldrsh %s,%s+%d\n",regname[rt],regname[rs],offset);
1177 if(offset>=0) {
1178 output_w32(0xe1d000f0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1179 }else{
1180 output_w32(0xe15000f0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1181 }
1182}
1183
1184static void emit_movzbl_indexed(int offset, int rs, int rt)
1185{
1186 assert(offset>-4096&&offset<4096);
1187 assem_debug("ldrb %s,%s+%d\n",regname[rt],regname[rs],offset);
1188 if(offset>=0) {
1189 output_w32(0xe5d00000|rd_rn_rm(rt,rs,0)|offset);
1190 }else{
1191 output_w32(0xe5500000|rd_rn_rm(rt,rs,0)|(-offset));
1192 }
1193}
1194
1195static void emit_movzwl_indexed(int offset, int rs, int rt)
1196{
1197 assert(offset>-256&&offset<256);
1198 assem_debug("ldrh %s,%s+%d\n",regname[rt],regname[rs],offset);
1199 if(offset>=0) {
1200 output_w32(0xe1d000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1201 }else{
1202 output_w32(0xe15000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1203 }
1204}
1205
1206static void emit_ldrd(int offset, int rs, int rt)
1207{
1208 assert(offset>-256&&offset<256);
1209 assem_debug("ldrd %s,%s+%d\n",regname[rt],regname[rs],offset);
1210 if(offset>=0) {
1211 output_w32(0xe1c000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1212 }else{
1213 output_w32(0xe14000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1214 }
1215}
1216
1217static void emit_readword(void *addr, int rt)
1218{
1219 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
1220 assert(offset<4096);
1221 assem_debug("ldr %s,fp+%d\n",regname[rt],offset);
1222 output_w32(0xe5900000|rd_rn_rm(rt,FP,0)|offset);
1223}
1224#define emit_readptr emit_readword
1225
1226static void emit_writeword_indexed(int rt, int offset, int rs)
1227{
1228 assert(offset>-4096&&offset<4096);
1229 assem_debug("str %s,%s+%d\n",regname[rt],regname[rs],offset);
1230 if(offset>=0) {
1231 output_w32(0xe5800000|rd_rn_rm(rt,rs,0)|offset);
1232 }else{
1233 output_w32(0xe5000000|rd_rn_rm(rt,rs,0)|(-offset));
1234 }
1235}
1236
1237static void emit_writehword_indexed(int rt, int offset, int rs)
1238{
1239 assert(offset>-256&&offset<256);
1240 assem_debug("strh %s,%s+%d\n",regname[rt],regname[rs],offset);
1241 if(offset>=0) {
1242 output_w32(0xe1c000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1243 }else{
1244 output_w32(0xe14000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1245 }
1246}
1247
1248static void emit_writebyte_indexed(int rt, int offset, int rs)
1249{
1250 assert(offset>-4096&&offset<4096);
1251 assem_debug("strb %s,%s+%d\n",regname[rt],regname[rs],offset);
1252 if(offset>=0) {
1253 output_w32(0xe5c00000|rd_rn_rm(rt,rs,0)|offset);
1254 }else{
1255 output_w32(0xe5400000|rd_rn_rm(rt,rs,0)|(-offset));
1256 }
1257}
1258
1259static void emit_strcc_dualindexed(int rs1, int rs2, int rt)
1260{
1261 assem_debug("strcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1262 output_w32(0x37800000|rd_rn_rm(rt,rs1,rs2));
1263}
1264
1265static void emit_strccb_dualindexed(int rs1, int rs2, int rt)
1266{
1267 assem_debug("strccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1268 output_w32(0x37c00000|rd_rn_rm(rt,rs1,rs2));
1269}
1270
1271static void emit_strcch_dualindexed(int rs1, int rs2, int rt)
1272{
1273 assem_debug("strcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1274 output_w32(0x318000b0|rd_rn_rm(rt,rs1,rs2));
1275}
1276
1277static void emit_writeword(int rt, void *addr)
1278{
1279 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
1280 assert(offset<4096);
1281 assem_debug("str %s,fp+%d\n",regname[rt],offset);
1282 output_w32(0xe5800000|rd_rn_rm(rt,FP,0)|offset);
1283}
1284
1285static void emit_umull(u_int rs1, u_int rs2, u_int hi, u_int lo)
1286{
1287 assem_debug("umull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
1288 assert(rs1<16);
1289 assert(rs2<16);
1290 assert(hi<16);
1291 assert(lo<16);
1292 output_w32(0xe0800090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
1293}
1294
1295static void emit_smull(u_int rs1, u_int rs2, u_int hi, u_int lo)
1296{
1297 assem_debug("smull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
1298 assert(rs1<16);
1299 assert(rs2<16);
1300 assert(hi<16);
1301 assert(lo<16);
1302 output_w32(0xe0c00090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
1303}
1304
1305static void emit_clz(int rs,int rt)
1306{
1307 assem_debug("clz %s,%s\n",regname[rt],regname[rs]);
1308 output_w32(0xe16f0f10|rd_rn_rm(rt,0,rs));
1309}
1310
1311static void emit_subcs(int rs1,int rs2,int rt)
1312{
1313 assem_debug("subcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1314 output_w32(0x20400000|rd_rn_rm(rt,rs1,rs2));
1315}
1316
1317static void emit_shrcc_imm(int rs,u_int imm,int rt)
1318{
1319 assert(imm>0);
1320 assert(imm<32);
1321 assem_debug("lsrcc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1322 output_w32(0x31a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
1323}
1324
1325static void emit_shrne_imm(int rs,u_int imm,int rt)
1326{
1327 assert(imm>0);
1328 assert(imm<32);
1329 assem_debug("lsrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
1330 output_w32(0x11a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
1331}
1332
1333static void emit_negmi(int rs, int rt)
1334{
1335 assem_debug("rsbmi %s,%s,#0\n",regname[rt],regname[rs]);
1336 output_w32(0x42600000|rd_rn_rm(rt,rs,0));
1337}
1338
1339static void emit_negsmi(int rs, int rt)
1340{
1341 assem_debug("rsbsmi %s,%s,#0\n",regname[rt],regname[rs]);
1342 output_w32(0x42700000|rd_rn_rm(rt,rs,0));
1343}
1344
1345static void emit_bic_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
1346{
1347 assem_debug("bic %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
1348 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
1349}
1350
1351static void emit_bic_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
1352{
1353 assem_debug("bic %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
1354 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
1355}
1356
1357static void emit_teq(int rs, int rt)
1358{
1359 assem_debug("teq %s,%s\n",regname[rs],regname[rt]);
1360 output_w32(0xe1300000|rd_rn_rm(0,rs,rt));
1361}
1362
1363static unused void emit_rsbimm(int rs, int imm, int rt)
1364{
1365 u_int armval;
1366 genimm_checked(imm,&armval);
1367 assem_debug("rsb %s,%s,#%d\n",regname[rt],regname[rs],imm);
1368 output_w32(0xe2600000|rd_rn_rm(rt,rs,0)|armval);
1369}
1370
1371// Conditionally select one of two immediates, optimizing for small code size
1372// This will only be called if HAVE_CMOV_IMM is defined
1373static void emit_cmov2imm_e_ne_compact(int imm1,int imm2,u_int rt)
1374{
1375 u_int armval;
1376 if(genimm(imm2-imm1,&armval)) {
1377 emit_movimm(imm1,rt);
1378 assem_debug("addne %s,%s,#%d\n",regname[rt],regname[rt],imm2-imm1);
1379 output_w32(0x12800000|rd_rn_rm(rt,rt,0)|armval);
1380 }else if(genimm(imm1-imm2,&armval)) {
1381 emit_movimm(imm1,rt);
1382 assem_debug("subne %s,%s,#%d\n",regname[rt],regname[rt],imm1-imm2);
1383 output_w32(0x12400000|rd_rn_rm(rt,rt,0)|armval);
1384 }
1385 else {
1386 #ifndef HAVE_ARMV7
1387 emit_movimm(imm1,rt);
1388 add_literal((int)out,imm2);
1389 assem_debug("ldrne %s,pc+? [=%x]\n",regname[rt],imm2);
1390 output_w32(0x15900000|rd_rn_rm(rt,15,0));
1391 #else
1392 emit_movw(imm1&0x0000FFFF,rt);
1393 if((imm1&0xFFFF)!=(imm2&0xFFFF)) {
1394 assem_debug("movwne %s,#%d (0x%x)\n",regname[rt],imm2&0xFFFF,imm2&0xFFFF);
1395 output_w32(0x13000000|rd_rn_rm(rt,0,0)|(imm2&0xfff)|((imm2<<4)&0xf0000));
1396 }
1397 emit_movt(imm1&0xFFFF0000,rt);
1398 if((imm1&0xFFFF0000)!=(imm2&0xFFFF0000)) {
1399 assem_debug("movtne %s,#%d (0x%x)\n",regname[rt],imm2&0xffff0000,imm2&0xffff0000);
1400 output_w32(0x13400000|rd_rn_rm(rt,0,0)|((imm2>>16)&0xfff)|((imm2>>12)&0xf0000));
1401 }
1402 #endif
1403 }
1404}
1405
1406// special case for checking invalid_code
1407static void emit_ldrb_indexedsr12_reg(int base, int r, int rt)
1408{
1409 assem_debug("ldrb %s,%s,%s lsr #12\n",regname[rt],regname[base],regname[r]);
1410 output_w32(0xe7d00000|rd_rn_rm(rt,base,r)|0x620);
1411}
1412
1413static void emit_callne(int a)
1414{
1415 assem_debug("blne %x\n",a);
1416 u_int offset=genjmp(a);
1417 output_w32(0x1b000000|offset);
1418}
1419
1420// Used to preload hash table entries
1421static unused void emit_prefetchreg(int r)
1422{
1423 assem_debug("pld %s\n",regname[r]);
1424 output_w32(0xf5d0f000|rd_rn_rm(0,r,0));
1425}
1426
1427// Special case for mini_ht
1428static void emit_ldreq_indexed(int rs, u_int offset, int rt)
1429{
1430 assert(offset<4096);
1431 assem_debug("ldreq %s,[%s, #%d]\n",regname[rt],regname[rs],offset);
1432 output_w32(0x05900000|rd_rn_rm(rt,rs,0)|offset);
1433}
1434
1435static void emit_orrne_imm(int rs,int imm,int rt)
1436{
1437 u_int armval;
1438 genimm_checked(imm,&armval);
1439 assem_debug("orrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
1440 output_w32(0x13800000|rd_rn_rm(rt,rs,0)|armval);
1441}
1442
1443static unused void emit_addpl_imm(int rs,int imm,int rt)
1444{
1445 u_int armval;
1446 genimm_checked(imm,&armval);
1447 assem_debug("addpl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1448 output_w32(0x52800000|rd_rn_rm(rt,rs,0)|armval);
1449}
1450
1451static void emit_jno_unlikely(int a)
1452{
1453 //emit_jno(a);
1454 assem_debug("addvc pc,pc,#? (%x)\n",/*a-(int)out-8,*/a);
1455 output_w32(0x72800000|rd_rn_rm(15,15,0));
1456}
1457
1458static void save_regs_all(u_int reglist)
1459{
1460 int i;
1461 if(!reglist) return;
1462 assem_debug("stmia fp,{");
1463 for(i=0;i<16;i++)
1464 if(reglist&(1<<i))
1465 assem_debug("r%d,",i);
1466 assem_debug("}\n");
1467 output_w32(0xe88b0000|reglist);
1468}
1469
1470static void restore_regs_all(u_int reglist)
1471{
1472 int i;
1473 if(!reglist) return;
1474 assem_debug("ldmia fp,{");
1475 for(i=0;i<16;i++)
1476 if(reglist&(1<<i))
1477 assem_debug("r%d,",i);
1478 assem_debug("}\n");
1479 output_w32(0xe89b0000|reglist);
1480}
1481
1482// Save registers before function call
1483static void save_regs(u_int reglist)
1484{
1485 reglist&=CALLER_SAVE_REGS; // only save the caller-save registers, r0-r3, r12
1486 save_regs_all(reglist);
1487}
1488
1489// Restore registers after function call
1490static void restore_regs(u_int reglist)
1491{
1492 reglist&=CALLER_SAVE_REGS;
1493 restore_regs_all(reglist);
1494}
1495
1496/* Stubs/epilogue */
1497
1498static void literal_pool(int n)
1499{
1500 if(!literalcount) return;
1501 if(n) {
1502 if((int)out-literals[0][0]<4096-n) return;
1503 }
1504 u_int *ptr;
1505 int i;
1506 for(i=0;i<literalcount;i++)
1507 {
1508 u_int l_addr=(u_int)out;
1509 int j;
1510 for(j=0;j<i;j++) {
1511 if(literals[j][1]==literals[i][1]) {
1512 //printf("dup %08x\n",literals[i][1]);
1513 l_addr=literals[j][0];
1514 break;
1515 }
1516 }
1517 ptr=(u_int *)literals[i][0];
1518 u_int offset=l_addr-(u_int)ptr-8;
1519 assert(offset<4096);
1520 assert(!(offset&3));
1521 *ptr|=offset;
1522 if(l_addr==(u_int)out) {
1523 literals[i][0]=l_addr; // remember for dupes
1524 output_w32(literals[i][1]);
1525 }
1526 }
1527 literalcount=0;
1528}
1529
1530static void literal_pool_jumpover(int n)
1531{
1532 if(!literalcount) return;
1533 if(n) {
1534 if((int)out-literals[0][0]<4096-n) return;
1535 }
1536 void *jaddr = out;
1537 emit_jmp(0);
1538 literal_pool(0);
1539 set_jump_target(jaddr, out);
1540}
1541
1542// parsed by get_pointer, find_extjump_insn
1543static void emit_extjump(u_char *addr, u_int target)
1544{
1545 u_char *ptr=(u_char *)addr;
1546 assert((ptr[3]&0x0e)==0xa);
1547 (void)ptr;
1548
1549 emit_loadlp(target,0);
1550 emit_loadlp((u_int)addr,1);
1551 assert(ndrc->translation_cache <= addr &&
1552 addr < ndrc->translation_cache + sizeof(ndrc->translation_cache));
1553 emit_far_jump(dyna_linker);
1554}
1555
1556static void check_extjump2(void *src)
1557{
1558 u_int *ptr = src;
1559 assert((ptr[1] & 0x0fff0000) == 0x059f0000); // ldr rx, [pc, #ofs]
1560 (void)ptr;
1561}
1562
1563// put rt_val into rt, potentially making use of rs with value rs_val
1564static void emit_movimm_from(u_int rs_val,int rs,u_int rt_val,int rt)
1565{
1566 u_int armval;
1567 int diff;
1568 if(genimm(rt_val,&armval)) {
1569 assem_debug("mov %s,#%d\n",regname[rt],rt_val);
1570 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
1571 return;
1572 }
1573 if(genimm(~rt_val,&armval)) {
1574 assem_debug("mvn %s,#%d\n",regname[rt],rt_val);
1575 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
1576 return;
1577 }
1578 diff=rt_val-rs_val;
1579 if(genimm(diff,&armval)) {
1580 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],diff);
1581 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
1582 return;
1583 }else if(genimm(-diff,&armval)) {
1584 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],-diff);
1585 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
1586 return;
1587 }
1588 emit_movimm(rt_val,rt);
1589}
1590
1591// return 1 if above function can do it's job cheaply
1592static int is_similar_value(u_int v1,u_int v2)
1593{
1594 u_int xs;
1595 int diff;
1596 if(v1==v2) return 1;
1597 diff=v2-v1;
1598 for(xs=diff;xs!=0&&(xs&3)==0;xs>>=2)
1599 ;
1600 if(xs<0x100) return 1;
1601 for(xs=-diff;xs!=0&&(xs&3)==0;xs>>=2)
1602 ;
1603 if(xs<0x100) return 1;
1604 return 0;
1605}
1606
1607static void mov_loadtype_adj(enum stub_type type,int rs,int rt)
1608{
1609 switch(type) {
1610 case LOADB_STUB: emit_signextend8(rs,rt); break;
1611 case LOADBU_STUB: emit_andimm(rs,0xff,rt); break;
1612 case LOADH_STUB: emit_signextend16(rs,rt); break;
1613 case LOADHU_STUB: emit_andimm(rs,0xffff,rt); break;
1614 case LOADW_STUB: if(rs!=rt) emit_mov(rs,rt); break;
1615 default: assert(0);
1616 }
1617}
1618
1619#include "pcsxmem.h"
1620#include "pcsxmem_inline.c"
1621
1622static void do_readstub(int n)
1623{
1624 assem_debug("do_readstub %x\n",start+stubs[n].a*4);
1625 literal_pool(256);
1626 set_jump_target(stubs[n].addr, out);
1627 enum stub_type type=stubs[n].type;
1628 int i=stubs[n].a;
1629 int rs=stubs[n].b;
1630 const struct regstat *i_regs=(struct regstat *)stubs[n].c;
1631 u_int reglist=stubs[n].e;
1632 const signed char *i_regmap=i_regs->regmap;
1633 int rt;
1634 if(dops[i].itype==C1LS||dops[i].itype==C2LS||dops[i].itype==LOADLR) {
1635 rt=get_reg(i_regmap,FTEMP);
1636 }else{
1637 rt=get_reg(i_regmap,dops[i].rt1);
1638 }
1639 assert(rs>=0);
1640 int r,temp=-1,temp2=HOST_TEMPREG,regs_saved=0;
1641 void *restore_jump = NULL;
1642 reglist|=(1<<rs);
1643 for(r=0;r<=12;r++) {
1644 if(((1<<r)&0x13ff)&&((1<<r)&reglist)==0) {
1645 temp=r; break;
1646 }
1647 }
1648 if(rt>=0&&dops[i].rt1!=0)
1649 reglist&=~(1<<rt);
1650 if(temp==-1) {
1651 save_regs(reglist);
1652 regs_saved=1;
1653 temp=(rs==0)?2:0;
1654 }
1655 if((regs_saved||(reglist&2)==0)&&temp!=1&&rs!=1)
1656 temp2=1;
1657 emit_readword(&mem_rtab,temp);
1658 emit_shrimm(rs,12,temp2);
1659 emit_readword_dualindexedx4(temp,temp2,temp2);
1660 emit_lsls_imm(temp2,1,temp2);
1661 if(dops[i].itype==C1LS||dops[i].itype==C2LS||(rt>=0&&dops[i].rt1!=0)) {
1662 switch(type) {
1663 case LOADB_STUB: emit_ldrccsb_dualindexed(temp2,rs,rt); break;
1664 case LOADBU_STUB: emit_ldrccb_dualindexed(temp2,rs,rt); break;
1665 case LOADH_STUB: emit_ldrccsh_dualindexed(temp2,rs,rt); break;
1666 case LOADHU_STUB: emit_ldrcch_dualindexed(temp2,rs,rt); break;
1667 case LOADW_STUB: emit_ldrcc_dualindexed(temp2,rs,rt); break;
1668 default: assert(0);
1669 }
1670 }
1671 if(regs_saved) {
1672 restore_jump=out;
1673 emit_jcc(0); // jump to reg restore
1674 }
1675 else
1676 emit_jcc(stubs[n].retaddr); // return address
1677
1678 if(!regs_saved)
1679 save_regs(reglist);
1680 void *handler=NULL;
1681 if(type==LOADB_STUB||type==LOADBU_STUB)
1682 handler=jump_handler_read8;
1683 if(type==LOADH_STUB||type==LOADHU_STUB)
1684 handler=jump_handler_read16;
1685 if(type==LOADW_STUB)
1686 handler=jump_handler_read32;
1687 assert(handler);
1688 pass_args(rs,temp2);
1689 int cc=get_reg(i_regmap,CCREG);
1690 if(cc<0)
1691 emit_loadreg(CCREG,2);
1692 emit_addimm(cc<0?2:cc,(int)stubs[n].d,2);
1693 emit_far_call(handler);
1694 if(dops[i].itype==C1LS||dops[i].itype==C2LS||(rt>=0&&dops[i].rt1!=0)) {
1695 mov_loadtype_adj(type,0,rt);
1696 }
1697 if(restore_jump)
1698 set_jump_target(restore_jump, out);
1699 restore_regs(reglist);
1700 emit_jmp(stubs[n].retaddr); // return address
1701}
1702
1703static void inline_readstub(enum stub_type type, int i, u_int addr,
1704 const signed char regmap[], int target, int adj, u_int reglist)
1705{
1706 int rs=get_reg(regmap,target);
1707 int rt=get_reg(regmap,target);
1708 if(rs<0) rs=get_reg_temp(regmap);
1709 assert(rs>=0);
1710 u_int is_dynamic;
1711 uintptr_t host_addr = 0;
1712 void *handler;
1713 int cc=get_reg(regmap,CCREG);
1714 if(pcsx_direct_read(type,addr,adj,cc,target?rs:-1,rt))
1715 return;
1716 handler = get_direct_memhandler(mem_rtab, addr, type, &host_addr);
1717 if (handler == NULL) {
1718 if(rt<0||dops[i].rt1==0)
1719 return;
1720 if(addr!=host_addr)
1721 emit_movimm_from(addr,rs,host_addr,rs);
1722 switch(type) {
1723 case LOADB_STUB: emit_movsbl_indexed(0,rs,rt); break;
1724 case LOADBU_STUB: emit_movzbl_indexed(0,rs,rt); break;
1725 case LOADH_STUB: emit_movswl_indexed(0,rs,rt); break;
1726 case LOADHU_STUB: emit_movzwl_indexed(0,rs,rt); break;
1727 case LOADW_STUB: emit_readword_indexed(0,rs,rt); break;
1728 default: assert(0);
1729 }
1730 return;
1731 }
1732 is_dynamic=pcsxmem_is_handler_dynamic(addr);
1733 if(is_dynamic) {
1734 if(type==LOADB_STUB||type==LOADBU_STUB)
1735 handler=jump_handler_read8;
1736 if(type==LOADH_STUB||type==LOADHU_STUB)
1737 handler=jump_handler_read16;
1738 if(type==LOADW_STUB)
1739 handler=jump_handler_read32;
1740 }
1741
1742 // call a memhandler
1743 if(rt>=0&&dops[i].rt1!=0)
1744 reglist&=~(1<<rt);
1745 save_regs(reglist);
1746 if(target==0)
1747 emit_movimm(addr,0);
1748 else if(rs!=0)
1749 emit_mov(rs,0);
1750 if(cc<0)
1751 emit_loadreg(CCREG,2);
1752 if(is_dynamic) {
1753 emit_movimm(((u_int *)mem_rtab)[addr>>12]<<1,1);
1754 emit_addimm(cc<0?2:cc,adj,2);
1755 }
1756 else {
1757 emit_readword(&last_count,3);
1758 emit_addimm(cc<0?2:cc,adj,2);
1759 emit_add(2,3,2);
1760 emit_writeword(2,&psxRegs.cycle);
1761 }
1762
1763 emit_far_call(handler);
1764
1765 if(rt>=0&&dops[i].rt1!=0) {
1766 switch(type) {
1767 case LOADB_STUB: emit_signextend8(0,rt); break;
1768 case LOADBU_STUB: emit_andimm(0,0xff,rt); break;
1769 case LOADH_STUB: emit_signextend16(0,rt); break;
1770 case LOADHU_STUB: emit_andimm(0,0xffff,rt); break;
1771 case LOADW_STUB: if(rt!=0) emit_mov(0,rt); break;
1772 default: assert(0);
1773 }
1774 }
1775 restore_regs(reglist);
1776}
1777
1778static void do_writestub(int n)
1779{
1780 assem_debug("do_writestub %x\n",start+stubs[n].a*4);
1781 literal_pool(256);
1782 set_jump_target(stubs[n].addr, out);
1783 enum stub_type type=stubs[n].type;
1784 int i=stubs[n].a;
1785 int rs=stubs[n].b;
1786 const struct regstat *i_regs=(struct regstat *)stubs[n].c;
1787 u_int reglist=stubs[n].e;
1788 const signed char *i_regmap=i_regs->regmap;
1789 int rt,r;
1790 if(dops[i].itype==C1LS||dops[i].itype==C2LS) {
1791 rt=get_reg(i_regmap,r=FTEMP);
1792 }else{
1793 rt=get_reg(i_regmap,r=dops[i].rs2);
1794 }
1795 assert(rs>=0);
1796 assert(rt>=0);
1797 int rtmp,temp=-1,temp2=HOST_TEMPREG,regs_saved=0;
1798 void *restore_jump = NULL;
1799 int reglist2=reglist|(1<<rs)|(1<<rt);
1800 for(rtmp=0;rtmp<=12;rtmp++) {
1801 if(((1<<rtmp)&0x13ff)&&((1<<rtmp)&reglist2)==0) {
1802 temp=rtmp; break;
1803 }
1804 }
1805 if(temp==-1) {
1806 save_regs(reglist);
1807 regs_saved=1;
1808 for(rtmp=0;rtmp<=3;rtmp++)
1809 if(rtmp!=rs&&rtmp!=rt)
1810 {temp=rtmp;break;}
1811 }
1812 if((regs_saved||(reglist2&8)==0)&&temp!=3&&rs!=3&&rt!=3)
1813 temp2=3;
1814 emit_readword(&mem_wtab,temp);
1815 emit_shrimm(rs,12,temp2);
1816 emit_readword_dualindexedx4(temp,temp2,temp2);
1817 emit_lsls_imm(temp2,1,temp2);
1818 switch(type) {
1819 case STOREB_STUB: emit_strccb_dualindexed(temp2,rs,rt); break;
1820 case STOREH_STUB: emit_strcch_dualindexed(temp2,rs,rt); break;
1821 case STOREW_STUB: emit_strcc_dualindexed(temp2,rs,rt); break;
1822 default: assert(0);
1823 }
1824 if(regs_saved) {
1825 restore_jump=out;
1826 emit_jcc(0); // jump to reg restore
1827 }
1828 else
1829 emit_jcc(stubs[n].retaddr); // return address (invcode check)
1830
1831 if(!regs_saved)
1832 save_regs(reglist);
1833 void *handler=NULL;
1834 switch(type) {
1835 case STOREB_STUB: handler=jump_handler_write8; break;
1836 case STOREH_STUB: handler=jump_handler_write16; break;
1837 case STOREW_STUB: handler=jump_handler_write32; break;
1838 default: assert(0);
1839 }
1840 assert(handler);
1841 pass_args(rs,rt);
1842 if(temp2!=3)
1843 emit_mov(temp2,3);
1844 int cc=get_reg(i_regmap,CCREG);
1845 if(cc<0)
1846 emit_loadreg(CCREG,2);
1847 emit_addimm(cc<0?2:cc,(int)stubs[n].d,2);
1848 // returns new cycle_count
1849 emit_far_call(handler);
1850 emit_addimm(0,-(int)stubs[n].d,cc<0?2:cc);
1851 if(cc<0)
1852 emit_storereg(CCREG,2);
1853 if(restore_jump)
1854 set_jump_target(restore_jump, out);
1855 restore_regs(reglist);
1856 emit_jmp(stubs[n].retaddr);
1857}
1858
1859static void inline_writestub(enum stub_type type, int i, u_int addr,
1860 const signed char regmap[], int target, int adj, u_int reglist)
1861{
1862 int rs=get_reg_temp(regmap);
1863 int rt=get_reg(regmap,target);
1864 assert(rs>=0);
1865 assert(rt>=0);
1866 uintptr_t host_addr = 0;
1867 void *handler = get_direct_memhandler(mem_wtab, addr, type, &host_addr);
1868 if (handler == NULL) {
1869 if(addr!=host_addr)
1870 emit_movimm_from(addr,rs,host_addr,rs);
1871 switch(type) {
1872 case STOREB_STUB: emit_writebyte_indexed(rt,0,rs); break;
1873 case STOREH_STUB: emit_writehword_indexed(rt,0,rs); break;
1874 case STOREW_STUB: emit_writeword_indexed(rt,0,rs); break;
1875 default: assert(0);
1876 }
1877 return;
1878 }
1879
1880 // call a memhandler
1881 save_regs(reglist);
1882 pass_args(rs,rt);
1883 int cc=get_reg(regmap,CCREG);
1884 if(cc<0)
1885 emit_loadreg(CCREG,2);
1886 emit_addimm(cc<0?2:cc,adj,2);
1887 emit_movimm((u_int)handler,3);
1888 // returns new cycle_count
1889 emit_far_call(jump_handler_write_h);
1890 emit_addimm(0,-adj,cc<0?2:cc);
1891 if(cc<0)
1892 emit_storereg(CCREG,2);
1893 restore_regs(reglist);
1894}
1895
1896/* Special assem */
1897
1898static void c2op_prologue(u_int op, int i, const struct regstat *i_regs, u_int reglist)
1899{
1900 save_regs_all(reglist);
1901 cop2_do_stall_check(op, i, i_regs, 0);
1902#ifdef PCNT
1903 emit_movimm(op, 0);
1904 emit_far_call(pcnt_gte_start);
1905#endif
1906 emit_addimm(FP, (u_char *)&psxRegs.CP2D.r[0] - (u_char *)&dynarec_local, 0); // cop2 regs
1907}
1908
1909static void c2op_epilogue(u_int op,u_int reglist)
1910{
1911#ifdef PCNT
1912 emit_movimm(op,0);
1913 emit_far_call(pcnt_gte_end);
1914#endif
1915 restore_regs_all(reglist);
1916}
1917
1918static void c2op_call_MACtoIR(int lm,int need_flags)
1919{
1920 if(need_flags)
1921 emit_far_call(lm?gteMACtoIR_lm1:gteMACtoIR_lm0);
1922 else
1923 emit_far_call(lm?gteMACtoIR_lm1_nf:gteMACtoIR_lm0_nf);
1924}
1925
1926static void c2op_call_rgb_func(void *func,int lm,int need_ir,int need_flags)
1927{
1928 emit_far_call(func);
1929 // func is C code and trashes r0
1930 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
1931 if(need_flags||need_ir)
1932 c2op_call_MACtoIR(lm,need_flags);
1933 emit_far_call(need_flags?gteMACtoRGB:gteMACtoRGB_nf);
1934}
1935
1936static void c2op_assemble(int i, const struct regstat *i_regs)
1937{
1938 u_int c2op = source[i] & 0x3f;
1939 u_int reglist_full = get_host_reglist(i_regs->regmap);
1940 u_int reglist = reglist_full & CALLER_SAVE_REGS;
1941 int need_flags, need_ir;
1942
1943 if (gte_handlers[c2op]!=NULL) {
1944 need_flags=!(gte_unneeded[i+1]>>63); // +1 because of how liveness detection works
1945 need_ir=(gte_unneeded[i+1]&0xe00)!=0xe00;
1946 assem_debug("gte op %08x, unneeded %016llx, need_flags %d, need_ir %d\n",
1947 source[i],gte_unneeded[i+1],need_flags,need_ir);
1948 if(HACK_ENABLED(NDHACK_GTE_NO_FLAGS))
1949 need_flags=0;
1950 int shift = (source[i] >> 19) & 1;
1951 int lm = (source[i] >> 10) & 1;
1952 switch(c2op) {
1953#ifndef DRC_DBG
1954 case GTE_MVMVA: {
1955#ifdef HAVE_ARMV5
1956 int v = (source[i] >> 15) & 3;
1957 int cv = (source[i] >> 13) & 3;
1958 int mx = (source[i] >> 17) & 3;
1959 reglist=reglist_full&(CALLER_SAVE_REGS|0xf0); // +{r4-r7}
1960 c2op_prologue(c2op,i,i_regs,reglist);
1961 /* r4,r5 = VXYZ(v) packed; r6 = &MX11(mx); r7 = &CV1(cv) */
1962 if(v<3)
1963 emit_ldrd(v*8,0,4);
1964 else {
1965 emit_movzwl_indexed(9*4,0,4); // gteIR
1966 emit_movzwl_indexed(10*4,0,6);
1967 emit_movzwl_indexed(11*4,0,5);
1968 emit_orrshl_imm(6,16,4);
1969 }
1970 if(mx<3)
1971 emit_addimm(0,32*4+mx*8*4,6);
1972 else
1973 emit_readword(&zeromem_ptr,6);
1974 if(cv<3)
1975 emit_addimm(0,32*4+(cv*8+5)*4,7);
1976 else
1977 emit_readword(&zeromem_ptr,7);
1978#ifdef __ARM_NEON__
1979 emit_movimm(source[i],1); // opcode
1980 emit_far_call(gteMVMVA_part_neon);
1981 if(need_flags) {
1982 emit_movimm(lm,1);
1983 emit_far_call(gteMACtoIR_flags_neon);
1984 }
1985#else
1986 if(cv==3&&shift)
1987 emit_far_call(gteMVMVA_part_cv3sh12_arm);
1988 else {
1989 emit_movimm(shift,1);
1990 emit_far_call(need_flags?gteMVMVA_part_arm:gteMVMVA_part_nf_arm);
1991 }
1992 if(need_flags||need_ir)
1993 c2op_call_MACtoIR(lm,need_flags);
1994#endif
1995#else /* if not HAVE_ARMV5 */
1996 c2op_prologue(c2op,i,i_regs,reglist);
1997 emit_movimm(source[i],1); // opcode
1998 emit_writeword(1,&psxRegs.code);
1999 emit_far_call(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]);
2000#endif
2001 break;
2002 }
2003 case GTE_OP:
2004 c2op_prologue(c2op,i,i_regs,reglist);
2005 emit_far_call(shift?gteOP_part_shift:gteOP_part_noshift);
2006 if(need_flags||need_ir) {
2007 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
2008 c2op_call_MACtoIR(lm,need_flags);
2009 }
2010 break;
2011 case GTE_DPCS:
2012 c2op_prologue(c2op,i,i_regs,reglist);
2013 c2op_call_rgb_func(shift?gteDPCS_part_shift:gteDPCS_part_noshift,lm,need_ir,need_flags);
2014 break;
2015 case GTE_INTPL:
2016 c2op_prologue(c2op,i,i_regs,reglist);
2017 c2op_call_rgb_func(shift?gteINTPL_part_shift:gteINTPL_part_noshift,lm,need_ir,need_flags);
2018 break;
2019 case GTE_SQR:
2020 c2op_prologue(c2op,i,i_regs,reglist);
2021 emit_far_call(shift?gteSQR_part_shift:gteSQR_part_noshift);
2022 if(need_flags||need_ir) {
2023 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
2024 c2op_call_MACtoIR(lm,need_flags);
2025 }
2026 break;
2027 case GTE_DCPL:
2028 c2op_prologue(c2op,i,i_regs,reglist);
2029 c2op_call_rgb_func(gteDCPL_part,lm,need_ir,need_flags);
2030 break;
2031 case GTE_GPF:
2032 c2op_prologue(c2op,i,i_regs,reglist);
2033 c2op_call_rgb_func(shift?gteGPF_part_shift:gteGPF_part_noshift,lm,need_ir,need_flags);
2034 break;
2035 case GTE_GPL:
2036 c2op_prologue(c2op,i,i_regs,reglist);
2037 c2op_call_rgb_func(shift?gteGPL_part_shift:gteGPL_part_noshift,lm,need_ir,need_flags);
2038 break;
2039#endif
2040 default:
2041 c2op_prologue(c2op,i,i_regs,reglist);
2042#ifdef DRC_DBG
2043 emit_movimm(source[i],1); // opcode
2044 emit_writeword(1,&psxRegs.code);
2045#endif
2046 emit_far_call(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]);
2047 break;
2048 }
2049 c2op_epilogue(c2op,reglist);
2050 }
2051}
2052
2053static void c2op_ctc2_31_assemble(signed char sl, signed char temp)
2054{
2055 //value = value & 0x7ffff000;
2056 //if (value & 0x7f87e000) value |= 0x80000000;
2057 emit_shrimm(sl,12,temp);
2058 emit_shlimm(temp,12,temp);
2059 emit_testimm(temp,0x7f000000);
2060 emit_testeqimm(temp,0x00870000);
2061 emit_testeqimm(temp,0x0000e000);
2062 emit_orrne_imm(temp,0x80000000,temp);
2063}
2064
2065static void do_mfc2_31_one(u_int copr,signed char temp)
2066{
2067 emit_readword(&reg_cop2d[copr],temp);
2068 emit_lsls_imm(temp,16,temp);
2069 emit_cmovs_imm(0,temp);
2070 emit_cmpimm(temp,0xf80<<16);
2071 emit_andimm(temp,0xf80<<16,temp);
2072 emit_cmovae_imm(0xf80<<16,temp);
2073}
2074
2075static void c2op_mfc2_29_assemble(signed char tl, signed char temp)
2076{
2077 if (temp < 0) {
2078 host_tempreg_acquire();
2079 temp = HOST_TEMPREG;
2080 }
2081 do_mfc2_31_one(9,temp);
2082 emit_shrimm(temp,7+16,tl);
2083 do_mfc2_31_one(10,temp);
2084 emit_orrshr_imm(temp,2+16,tl);
2085 do_mfc2_31_one(11,temp);
2086 emit_orrshr_imm(temp,-3+16,tl);
2087 emit_writeword(tl,&reg_cop2d[29]);
2088 if (temp == HOST_TEMPREG)
2089 host_tempreg_release();
2090}
2091
2092static void multdiv_assemble_arm(int i, const struct regstat *i_regs)
2093{
2094 // case 0x18: MULT
2095 // case 0x19: MULTU
2096 // case 0x1A: DIV
2097 // case 0x1B: DIVU
2098 // case 0x1C: DMULT
2099 // case 0x1D: DMULTU
2100 // case 0x1E: DDIV
2101 // case 0x1F: DDIVU
2102 if(dops[i].rs1&&dops[i].rs2)
2103 {
2104 if((dops[i].opcode2&4)==0) // 32-bit
2105 {
2106 if(dops[i].opcode2==0x18) // MULT
2107 {
2108 signed char m1=get_reg(i_regs->regmap,dops[i].rs1);
2109 signed char m2=get_reg(i_regs->regmap,dops[i].rs2);
2110 signed char hi=get_reg(i_regs->regmap,HIREG);
2111 signed char lo=get_reg(i_regs->regmap,LOREG);
2112 assert(m1>=0);
2113 assert(m2>=0);
2114 assert(hi>=0);
2115 assert(lo>=0);
2116 emit_smull(m1,m2,hi,lo);
2117 }
2118 if(dops[i].opcode2==0x19) // MULTU
2119 {
2120 signed char m1=get_reg(i_regs->regmap,dops[i].rs1);
2121 signed char m2=get_reg(i_regs->regmap,dops[i].rs2);
2122 signed char hi=get_reg(i_regs->regmap,HIREG);
2123 signed char lo=get_reg(i_regs->regmap,LOREG);
2124 assert(m1>=0);
2125 assert(m2>=0);
2126 assert(hi>=0);
2127 assert(lo>=0);
2128 emit_umull(m1,m2,hi,lo);
2129 }
2130 if(dops[i].opcode2==0x1A) // DIV
2131 {
2132 signed char d1=get_reg(i_regs->regmap,dops[i].rs1);
2133 signed char d2=get_reg(i_regs->regmap,dops[i].rs2);
2134 assert(d1>=0);
2135 assert(d2>=0);
2136 signed char quotient=get_reg(i_regs->regmap,LOREG);
2137 signed char remainder=get_reg(i_regs->regmap,HIREG);
2138 assert(quotient>=0);
2139 assert(remainder>=0);
2140 emit_movs(d1,remainder);
2141 emit_movimm(0xffffffff,quotient);
2142 emit_negmi(quotient,quotient); // .. quotient and ..
2143 emit_negmi(remainder,remainder); // .. remainder for div0 case (will be negated back after jump)
2144 emit_movs(d2,HOST_TEMPREG);
2145 emit_jeq(out+52); // Division by zero
2146 emit_negsmi(HOST_TEMPREG,HOST_TEMPREG);
2147#ifdef HAVE_ARMV5
2148 emit_clz(HOST_TEMPREG,quotient);
2149 emit_shl(HOST_TEMPREG,quotient,HOST_TEMPREG);
2150#else
2151 emit_movimm(0,quotient);
2152 emit_addpl_imm(quotient,1,quotient);
2153 emit_lslpls_imm(HOST_TEMPREG,1,HOST_TEMPREG);
2154 emit_jns(out-2*4);
2155#endif
2156 emit_orimm(quotient,1<<31,quotient);
2157 emit_shr(quotient,quotient,quotient);
2158 emit_cmp(remainder,HOST_TEMPREG);
2159 emit_subcs(remainder,HOST_TEMPREG,remainder);
2160 emit_adcs(quotient,quotient,quotient);
2161 emit_shrimm(HOST_TEMPREG,1,HOST_TEMPREG);
2162 emit_jcc(out-16); // -4
2163 emit_teq(d1,d2);
2164 emit_negmi(quotient,quotient);
2165 emit_test(d1,d1);
2166 emit_negmi(remainder,remainder);
2167 }
2168 if(dops[i].opcode2==0x1B) // DIVU
2169 {
2170 signed char d1=get_reg(i_regs->regmap,dops[i].rs1); // dividend
2171 signed char d2=get_reg(i_regs->regmap,dops[i].rs2); // divisor
2172 assert(d1>=0);
2173 assert(d2>=0);
2174 signed char quotient=get_reg(i_regs->regmap,LOREG);
2175 signed char remainder=get_reg(i_regs->regmap,HIREG);
2176 assert(quotient>=0);
2177 assert(remainder>=0);
2178 emit_mov(d1,remainder);
2179 emit_movimm(0xffffffff,quotient); // div0 case
2180 emit_test(d2,d2);
2181 emit_jeq(out+40); // Division by zero
2182#ifdef HAVE_ARMV5
2183 emit_clz(d2,HOST_TEMPREG);
2184 emit_movimm(1<<31,quotient);
2185 emit_shl(d2,HOST_TEMPREG,d2);
2186#else
2187 emit_movimm(0,HOST_TEMPREG);
2188 emit_addpl_imm(HOST_TEMPREG,1,HOST_TEMPREG);
2189 emit_lslpls_imm(d2,1,d2);
2190 emit_jns(out-2*4);
2191 emit_movimm(1<<31,quotient);
2192#endif
2193 emit_shr(quotient,HOST_TEMPREG,quotient);
2194 emit_cmp(remainder,d2);
2195 emit_subcs(remainder,d2,remainder);
2196 emit_adcs(quotient,quotient,quotient);
2197 emit_shrcc_imm(d2,1,d2);
2198 emit_jcc(out-16); // -4
2199 }
2200 }
2201 else // 64-bit
2202 assert(0);
2203 }
2204 else
2205 {
2206 // Multiply by zero is zero.
2207 // MIPS does not have a divide by zero exception.
2208 // The result is undefined, we return zero.
2209 signed char hr=get_reg(i_regs->regmap,HIREG);
2210 signed char lr=get_reg(i_regs->regmap,LOREG);
2211 if(hr>=0) emit_zeroreg(hr);
2212 if(lr>=0) emit_zeroreg(lr);
2213 }
2214}
2215#define multdiv_assemble multdiv_assemble_arm
2216
2217static void do_jump_vaddr(int rs)
2218{
2219 emit_far_jump(jump_vaddr_reg[rs]);
2220}
2221
2222static void do_preload_rhash(int r) {
2223 // Don't need this for ARM. On x86, this puts the value 0xf8 into the
2224 // register. On ARM the hash can be done with a single instruction (below)
2225}
2226
2227static void do_preload_rhtbl(int ht) {
2228 emit_addimm(FP,(int)&mini_ht-(int)&dynarec_local,ht);
2229}
2230
2231static void do_rhash(int rs,int rh) {
2232 emit_andimm(rs,0xf8,rh);
2233}
2234
2235static void do_miniht_load(int ht,int rh) {
2236 assem_debug("ldr %s,[%s,%s]!\n",regname[rh],regname[ht],regname[rh]);
2237 output_w32(0xe7b00000|rd_rn_rm(rh,ht,rh));
2238}
2239
2240static void do_miniht_jump(int rs,int rh,int ht) {
2241 emit_cmp(rh,rs);
2242 emit_ldreq_indexed(ht,4,15);
2243 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
2244 if(rs!=7)
2245 emit_mov(rs,7);
2246 rs=7;
2247 #endif
2248 do_jump_vaddr(rs);
2249}
2250
2251static void do_miniht_insert(u_int return_address,int rt,int temp) {
2252 #ifndef HAVE_ARMV7
2253 emit_movimm(return_address,rt); // PC into link register
2254 add_to_linker(out,return_address,1);
2255 emit_pcreladdr(temp);
2256 emit_writeword(rt,&mini_ht[(return_address&0xFF)>>3][0]);
2257 emit_writeword(temp,&mini_ht[(return_address&0xFF)>>3][1]);
2258 #else
2259 emit_movw(return_address&0x0000FFFF,rt);
2260 add_to_linker(out,return_address,1);
2261 emit_pcreladdr(temp);
2262 emit_writeword(temp,&mini_ht[(return_address&0xFF)>>3][1]);
2263 emit_movt(return_address&0xFFFF0000,rt);
2264 emit_writeword(rt,&mini_ht[(return_address&0xFF)>>3][0]);
2265 #endif
2266}
2267
2268// CPU-architecture-specific initialization
2269static void arch_init(void)
2270{
2271 uintptr_t diff = (u_char *)&ndrc->tramp.f - (u_char *)&ndrc->tramp.ops - 8;
2272 struct tramp_insns *ops = ndrc->tramp.ops;
2273 size_t i;
2274 assert(!(diff & 3));
2275 assert(diff < 0x1000);
2276 start_tcache_write(ops, (u_char *)ops + sizeof(ndrc->tramp.ops));
2277 for (i = 0; i < ARRAY_SIZE(ndrc->tramp.ops); i++)
2278 ops[i].ldrpc = 0xe5900000 | rd_rn_rm(15,15,0) | diff; // ldr pc, [=val]
2279 end_tcache_write(ops, (u_char *)ops + sizeof(ndrc->tramp.ops));
2280}
2281
2282// vim:shiftwidth=2:expandtab