drc: rework smc checks again
[pcsx_rearmed.git] / libpcsxcore / new_dynarec / assem_arm.c
... / ...
CommitLineData
1/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2 * Mupen64plus/PCSX - assem_arm.c *
3 * Copyright (C) 2009-2011 Ari64 *
4 * Copyright (C) 2010-2021 GraÅžvydas "notaz" Ignotas *
5 * *
6 * This program is free software; you can redistribute it and/or modify *
7 * it under the terms of the GNU General Public License as published by *
8 * the Free Software Foundation; either version 2 of the License, or *
9 * (at your option) any later version. *
10 * *
11 * This program is distributed in the hope that it will be useful, *
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
14 * GNU General Public License for more details. *
15 * *
16 * You should have received a copy of the GNU General Public License *
17 * along with this program; if not, write to the *
18 * Free Software Foundation, Inc., *
19 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
20 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
21
22#define FLAGLESS
23#include "../gte.h"
24#undef FLAGLESS
25#include "../gte_arm.h"
26#include "../gte_neon.h"
27#include "pcnt.h"
28#include "arm_features.h"
29
30#ifdef DRC_DBG
31#pragma GCC diagnostic ignored "-Wunused-function"
32#pragma GCC diagnostic ignored "-Wunused-variable"
33#pragma GCC diagnostic ignored "-Wunused-but-set-variable"
34#endif
35
36void indirect_jump_indexed();
37void indirect_jump();
38void do_interrupt();
39void jump_vaddr_r0();
40void jump_vaddr_r1();
41void jump_vaddr_r2();
42void jump_vaddr_r3();
43void jump_vaddr_r4();
44void jump_vaddr_r5();
45void jump_vaddr_r6();
46void jump_vaddr_r7();
47void jump_vaddr_r8();
48void jump_vaddr_r9();
49void jump_vaddr_r10();
50void jump_vaddr_r12();
51
52void * const jump_vaddr_reg[16] = {
53 jump_vaddr_r0,
54 jump_vaddr_r1,
55 jump_vaddr_r2,
56 jump_vaddr_r3,
57 jump_vaddr_r4,
58 jump_vaddr_r5,
59 jump_vaddr_r6,
60 jump_vaddr_r7,
61 jump_vaddr_r8,
62 jump_vaddr_r9,
63 jump_vaddr_r10,
64 0,
65 jump_vaddr_r12,
66 0,
67 0,
68 0
69};
70
71void invalidate_addr_r0();
72void invalidate_addr_r1();
73void invalidate_addr_r2();
74void invalidate_addr_r3();
75void invalidate_addr_r4();
76void invalidate_addr_r5();
77void invalidate_addr_r6();
78void invalidate_addr_r7();
79void invalidate_addr_r8();
80void invalidate_addr_r9();
81void invalidate_addr_r10();
82void invalidate_addr_r12();
83
84const u_int invalidate_addr_reg[16] = {
85 (int)invalidate_addr_r0,
86 (int)invalidate_addr_r1,
87 (int)invalidate_addr_r2,
88 (int)invalidate_addr_r3,
89 (int)invalidate_addr_r4,
90 (int)invalidate_addr_r5,
91 (int)invalidate_addr_r6,
92 (int)invalidate_addr_r7,
93 (int)invalidate_addr_r8,
94 (int)invalidate_addr_r9,
95 (int)invalidate_addr_r10,
96 0,
97 (int)invalidate_addr_r12,
98 0,
99 0,
100 0};
101
102/* Linker */
103
104static void set_jump_target(void *addr, void *target_)
105{
106 u_int target = (u_int)target_;
107 u_char *ptr = addr;
108 u_int *ptr2=(u_int *)ptr;
109 if(ptr[3]==0xe2) {
110 assert((target-(u_int)ptr2-8)<1024);
111 assert(((uintptr_t)addr&3)==0);
112 assert((target&3)==0);
113 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
114 //printf("target=%x addr=%p insn=%x\n",target,addr,*ptr2);
115 }
116 else if(ptr[3]==0x72) {
117 // generated by emit_jno_unlikely
118 if((target-(u_int)ptr2-8)<1024) {
119 assert(((uintptr_t)addr&3)==0);
120 assert((target&3)==0);
121 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
122 }
123 else if((target-(u_int)ptr2-8)<4096&&!((target-(u_int)ptr2-8)&15)) {
124 assert(((uintptr_t)addr&3)==0);
125 assert((target&3)==0);
126 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>4)|0xE00;
127 }
128 else *ptr2=(0x7A000000)|(((target-(u_int)ptr2-8)<<6)>>8);
129 }
130 else {
131 assert((ptr[3]&0x0e)==0xa);
132 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
133 }
134}
135
136// This optionally copies the instruction from the target of the branch into
137// the space before the branch. Works, but the difference in speed is
138// usually insignificant.
139#if 0
140static void set_jump_target_fillslot(int addr,u_int target,int copy)
141{
142 u_char *ptr=(u_char *)addr;
143 u_int *ptr2=(u_int *)ptr;
144 assert(!copy||ptr2[-1]==0xe28dd000);
145 if(ptr[3]==0xe2) {
146 assert(!copy);
147 assert((target-(u_int)ptr2-8)<4096);
148 *ptr2=(*ptr2&0xFFFFF000)|(target-(u_int)ptr2-8);
149 }
150 else {
151 assert((ptr[3]&0x0e)==0xa);
152 u_int target_insn=*(u_int *)target;
153 if((target_insn&0x0e100000)==0) { // ALU, no immediate, no flags
154 copy=0;
155 }
156 if((target_insn&0x0c100000)==0x04100000) { // Load
157 copy=0;
158 }
159 if(target_insn&0x08000000) {
160 copy=0;
161 }
162 if(copy) {
163 ptr2[-1]=target_insn;
164 target+=4;
165 }
166 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
167 }
168}
169#endif
170
171/* Literal pool */
172static void add_literal(int addr,int val)
173{
174 assert(literalcount<sizeof(literals)/sizeof(literals[0]));
175 literals[literalcount][0]=addr;
176 literals[literalcount][1]=val;
177 literalcount++;
178}
179
180// from a pointer to external jump stub (which was produced by emit_extjump2)
181// find where the jumping insn is
182static void *find_extjump_insn(void *stub)
183{
184 int *ptr=(int *)(stub+4);
185 assert((*ptr&0x0fff0000)==0x059f0000); // ldr rx, [pc, #ofs]
186 u_int offset=*ptr&0xfff;
187 void **l_ptr=(void *)ptr+offset+8;
188 return *l_ptr;
189}
190
191// find where external branch is liked to using addr of it's stub:
192// get address that insn one after stub loads (dyna_linker arg1),
193// treat it as a pointer to branch insn,
194// return addr where that branch jumps to
195#if 0
196static void *get_pointer(void *stub)
197{
198 //printf("get_pointer(%x)\n",(int)stub);
199 int *i_ptr=find_extjump_insn(stub);
200 assert((*i_ptr&0x0f000000)==0x0a000000); // b
201 return (u_char *)i_ptr+((*i_ptr<<8)>>6)+8;
202}
203#endif
204
205// Allocate a specific ARM register.
206static void alloc_arm_reg(struct regstat *cur,int i,signed char reg,int hr)
207{
208 int n;
209 int dirty=0;
210
211 // see if it's already allocated (and dealloc it)
212 for(n=0;n<HOST_REGS;n++)
213 {
214 if(n!=EXCLUDE_REG&&cur->regmap[n]==reg) {
215 dirty=(cur->dirty>>n)&1;
216 cur->regmap[n]=-1;
217 }
218 }
219
220 cur->regmap[hr]=reg;
221 cur->dirty&=~(1<<hr);
222 cur->dirty|=dirty<<hr;
223 cur->isconst&=~(1<<hr);
224}
225
226// Alloc cycle count into dedicated register
227static void alloc_cc(struct regstat *cur,int i)
228{
229 alloc_arm_reg(cur,i,CCREG,HOST_CCREG);
230}
231
232/* Assembler */
233
234static unused char regname[16][4] = {
235 "r0",
236 "r1",
237 "r2",
238 "r3",
239 "r4",
240 "r5",
241 "r6",
242 "r7",
243 "r8",
244 "r9",
245 "r10",
246 "fp",
247 "r12",
248 "sp",
249 "lr",
250 "pc"};
251
252static void output_w32(u_int word)
253{
254 *((u_int *)out)=word;
255 out+=4;
256}
257
258static u_int rd_rn_rm(u_int rd, u_int rn, u_int rm)
259{
260 assert(rd<16);
261 assert(rn<16);
262 assert(rm<16);
263 return((rn<<16)|(rd<<12)|rm);
264}
265
266static u_int rd_rn_imm_shift(u_int rd, u_int rn, u_int imm, u_int shift)
267{
268 assert(rd<16);
269 assert(rn<16);
270 assert(imm<256);
271 assert((shift&1)==0);
272 return((rn<<16)|(rd<<12)|(((32-shift)&30)<<7)|imm);
273}
274
275static u_int genimm(u_int imm,u_int *encoded)
276{
277 *encoded=0;
278 if(imm==0) return 1;
279 int i=32;
280 while(i>0)
281 {
282 if(imm<256) {
283 *encoded=((i&30)<<7)|imm;
284 return 1;
285 }
286 imm=(imm>>2)|(imm<<30);i-=2;
287 }
288 return 0;
289}
290
291static void genimm_checked(u_int imm,u_int *encoded)
292{
293 u_int ret=genimm(imm,encoded);
294 assert(ret);
295 (void)ret;
296}
297
298static u_int genjmp(u_int addr)
299{
300 if (addr < 3) return 0; // a branch that will be patched later
301 int offset = addr-(int)out-8;
302 if (offset < -33554432 || offset >= 33554432) {
303 SysPrintf("genjmp: out of range: %08x\n", offset);
304 abort();
305 return 0;
306 }
307 return ((u_int)offset>>2)&0xffffff;
308}
309
310static unused void emit_breakpoint(void)
311{
312 assem_debug("bkpt #0\n");
313 //output_w32(0xe1200070);
314 output_w32(0xe7f001f0);
315}
316
317static void emit_mov(int rs,int rt)
318{
319 assem_debug("mov %s,%s\n",regname[rt],regname[rs]);
320 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs));
321}
322
323static void emit_movs(int rs,int rt)
324{
325 assem_debug("movs %s,%s\n",regname[rt],regname[rs]);
326 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs));
327}
328
329static void emit_add(int rs1,int rs2,int rt)
330{
331 assem_debug("add %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
332 output_w32(0xe0800000|rd_rn_rm(rt,rs1,rs2));
333}
334
335static void emit_adds(int rs1,int rs2,int rt)
336{
337 assem_debug("adds %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
338 output_w32(0xe0900000|rd_rn_rm(rt,rs1,rs2));
339}
340#define emit_adds_ptr emit_adds
341
342static void emit_adcs(int rs1,int rs2,int rt)
343{
344 assem_debug("adcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
345 output_w32(0xe0b00000|rd_rn_rm(rt,rs1,rs2));
346}
347
348static void emit_neg(int rs, int rt)
349{
350 assem_debug("rsb %s,%s,#0\n",regname[rt],regname[rs]);
351 output_w32(0xe2600000|rd_rn_rm(rt,rs,0));
352}
353
354static void emit_sub(int rs1,int rs2,int rt)
355{
356 assem_debug("sub %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
357 output_w32(0xe0400000|rd_rn_rm(rt,rs1,rs2));
358}
359
360static void emit_zeroreg(int rt)
361{
362 assem_debug("mov %s,#0\n",regname[rt]);
363 output_w32(0xe3a00000|rd_rn_rm(rt,0,0));
364}
365
366static void emit_loadlp(u_int imm,u_int rt)
367{
368 add_literal((int)out,imm);
369 assem_debug("ldr %s,pc+? [=%x]\n",regname[rt],imm);
370 output_w32(0xe5900000|rd_rn_rm(rt,15,0));
371}
372
373#ifdef HAVE_ARMV7
374static void emit_movw(u_int imm,u_int rt)
375{
376 assert(imm<65536);
377 assem_debug("movw %s,#%d (0x%x)\n",regname[rt],imm,imm);
378 output_w32(0xe3000000|rd_rn_rm(rt,0,0)|(imm&0xfff)|((imm<<4)&0xf0000));
379}
380
381static void emit_movt(u_int imm,u_int rt)
382{
383 assem_debug("movt %s,#%d (0x%x)\n",regname[rt],imm&0xffff0000,imm&0xffff0000);
384 output_w32(0xe3400000|rd_rn_rm(rt,0,0)|((imm>>16)&0xfff)|((imm>>12)&0xf0000));
385}
386#endif
387
388static void emit_movimm(u_int imm,u_int rt)
389{
390 u_int armval;
391 if(genimm(imm,&armval)) {
392 assem_debug("mov %s,#%d\n",regname[rt],imm);
393 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
394 }else if(genimm(~imm,&armval)) {
395 assem_debug("mvn %s,#%d\n",regname[rt],imm);
396 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
397 }else if(imm<65536) {
398 #ifndef HAVE_ARMV7
399 assem_debug("mov %s,#%d\n",regname[rt],imm&0xFF00);
400 output_w32(0xe3a00000|rd_rn_imm_shift(rt,0,imm>>8,8));
401 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
402 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
403 #else
404 emit_movw(imm,rt);
405 #endif
406 }else{
407 #ifndef HAVE_ARMV7
408 emit_loadlp(imm,rt);
409 #else
410 emit_movw(imm&0x0000FFFF,rt);
411 emit_movt(imm&0xFFFF0000,rt);
412 #endif
413 }
414}
415
416static void emit_pcreladdr(u_int rt)
417{
418 assem_debug("add %s,pc,#?\n",regname[rt]);
419 output_w32(0xe2800000|rd_rn_rm(rt,15,0));
420}
421
422static void emit_loadreg(int r, int hr)
423{
424 assert(hr != EXCLUDE_REG);
425 if (r == 0)
426 emit_zeroreg(hr);
427 else {
428 void *addr;
429 switch (r) {
430 //case HIREG: addr = &hi; break;
431 //case LOREG: addr = &lo; break;
432 case CCREG: addr = &cycle_count; break;
433 case CSREG: addr = &Status; break;
434 case INVCP: addr = &invc_ptr; break;
435 case ROREG: addr = &ram_offset; break;
436 default:
437 assert(r < 34);
438 addr = &psxRegs.GPR.r[r];
439 break;
440 }
441 u_int offset = (u_char *)addr - (u_char *)&dynarec_local;
442 assert(offset<4096);
443 assem_debug("ldr %s,fp+%d # r%d\n",regname[hr],offset,r);
444 output_w32(0xe5900000|rd_rn_rm(hr,FP,0)|offset);
445 }
446}
447
448static void emit_storereg(int r, int hr)
449{
450 assert(hr != EXCLUDE_REG);
451 int addr = (int)&psxRegs.GPR.r[r];
452 switch (r) {
453 //case HIREG: addr = &hi; break;
454 //case LOREG: addr = &lo; break;
455 case CCREG: addr = (int)&cycle_count; break;
456 default: assert(r < 34); break;
457 }
458 u_int offset = addr-(u_int)&dynarec_local;
459 assert(offset<4096);
460 assem_debug("str %s,fp+%d # r%d\n",regname[hr],offset,r);
461 output_w32(0xe5800000|rd_rn_rm(hr,FP,0)|offset);
462}
463
464static void emit_test(int rs, int rt)
465{
466 assem_debug("tst %s,%s\n",regname[rs],regname[rt]);
467 output_w32(0xe1100000|rd_rn_rm(0,rs,rt));
468}
469
470static void emit_testimm(int rs,int imm)
471{
472 u_int armval;
473 assem_debug("tst %s,#%d\n",regname[rs],imm);
474 genimm_checked(imm,&armval);
475 output_w32(0xe3100000|rd_rn_rm(0,rs,0)|armval);
476}
477
478static void emit_testeqimm(int rs,int imm)
479{
480 u_int armval;
481 assem_debug("tsteq %s,$%d\n",regname[rs],imm);
482 genimm_checked(imm,&armval);
483 output_w32(0x03100000|rd_rn_rm(0,rs,0)|armval);
484}
485
486static void emit_not(int rs,int rt)
487{
488 assem_debug("mvn %s,%s\n",regname[rt],regname[rs]);
489 output_w32(0xe1e00000|rd_rn_rm(rt,0,rs));
490}
491
492static void emit_and(u_int rs1,u_int rs2,u_int rt)
493{
494 assem_debug("and %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
495 output_w32(0xe0000000|rd_rn_rm(rt,rs1,rs2));
496}
497
498static void emit_or(u_int rs1,u_int rs2,u_int rt)
499{
500 assem_debug("orr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
501 output_w32(0xe1800000|rd_rn_rm(rt,rs1,rs2));
502}
503
504static void emit_orrshl_imm(u_int rs,u_int imm,u_int rt)
505{
506 assert(rs<16);
507 assert(rt<16);
508 assert(imm<32);
509 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs],imm);
510 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|(imm<<7));
511}
512
513static void emit_orrshr_imm(u_int rs,u_int imm,u_int rt)
514{
515 assert(rs<16);
516 assert(rt<16);
517 assert(imm<32);
518 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs],imm);
519 output_w32(0xe1800020|rd_rn_rm(rt,rt,rs)|(imm<<7));
520}
521
522static void emit_xor(u_int rs1,u_int rs2,u_int rt)
523{
524 assem_debug("eor %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
525 output_w32(0xe0200000|rd_rn_rm(rt,rs1,rs2));
526}
527
528static void emit_xorsar_imm(u_int rs1,u_int rs2,u_int imm,u_int rt)
529{
530 assem_debug("eor %s,%s,%s,asr #%d\n",regname[rt],regname[rs1],regname[rs2],imm);
531 output_w32(0xe0200040|rd_rn_rm(rt,rs1,rs2)|(imm<<7));
532}
533
534static void emit_addimm(u_int rs,int imm,u_int rt)
535{
536 assert(rs<16);
537 assert(rt<16);
538 if(imm!=0) {
539 u_int armval;
540 if(genimm(imm,&armval)) {
541 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],imm);
542 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
543 }else if(genimm(-imm,&armval)) {
544 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],-imm);
545 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
546 #ifdef HAVE_ARMV7
547 }else if(rt!=rs&&(u_int)imm<65536) {
548 emit_movw(imm&0x0000ffff,rt);
549 emit_add(rs,rt,rt);
550 }else if(rt!=rs&&(u_int)-imm<65536) {
551 emit_movw(-imm&0x0000ffff,rt);
552 emit_sub(rs,rt,rt);
553 #endif
554 }else if((u_int)-imm<65536) {
555 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],(-imm)&0xFF00);
556 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
557 output_w32(0xe2400000|rd_rn_imm_shift(rt,rs,(-imm)>>8,8));
558 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
559 }else {
560 do {
561 int shift = (ffs(imm) - 1) & ~1;
562 int imm8 = imm & (0xff << shift);
563 genimm_checked(imm8,&armval);
564 assem_debug("add %s,%s,#0x%x\n",regname[rt],regname[rs],imm8);
565 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
566 rs = rt;
567 imm &= ~imm8;
568 }
569 while (imm != 0);
570 }
571 }
572 else if(rs!=rt) emit_mov(rs,rt);
573}
574
575static void emit_addimm_and_set_flags(int imm,int rt)
576{
577 assert(imm>-65536&&imm<65536);
578 u_int armval;
579 if(genimm(imm,&armval)) {
580 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm);
581 output_w32(0xe2900000|rd_rn_rm(rt,rt,0)|armval);
582 }else if(genimm(-imm,&armval)) {
583 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],imm);
584 output_w32(0xe2500000|rd_rn_rm(rt,rt,0)|armval);
585 }else if(imm<0) {
586 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF00);
587 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
588 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)>>8,8));
589 output_w32(0xe2500000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
590 }else{
591 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF00);
592 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
593 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm>>8,8));
594 output_w32(0xe2900000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
595 }
596}
597
598static void emit_addnop(u_int r)
599{
600 assert(r<16);
601 assem_debug("add %s,%s,#0 (nop)\n",regname[r],regname[r]);
602 output_w32(0xe2800000|rd_rn_rm(r,r,0));
603}
604
605static void emit_andimm(int rs,int imm,int rt)
606{
607 u_int armval;
608 if(imm==0) {
609 emit_zeroreg(rt);
610 }else if(genimm(imm,&armval)) {
611 assem_debug("and %s,%s,#%d\n",regname[rt],regname[rs],imm);
612 output_w32(0xe2000000|rd_rn_rm(rt,rs,0)|armval);
613 }else if(genimm(~imm,&armval)) {
614 assem_debug("bic %s,%s,#%d\n",regname[rt],regname[rs],imm);
615 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|armval);
616 }else if(imm==65535) {
617 #ifndef HAVE_ARMV6
618 assem_debug("bic %s,%s,#FF000000\n",regname[rt],regname[rs]);
619 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|0x4FF);
620 assem_debug("bic %s,%s,#00FF0000\n",regname[rt],regname[rt]);
621 output_w32(0xe3c00000|rd_rn_rm(rt,rt,0)|0x8FF);
622 #else
623 assem_debug("uxth %s,%s\n",regname[rt],regname[rs]);
624 output_w32(0xe6ff0070|rd_rn_rm(rt,0,rs));
625 #endif
626 }else{
627 assert(imm>0&&imm<65535);
628 #ifndef HAVE_ARMV7
629 assem_debug("mov r14,#%d\n",imm&0xFF00);
630 output_w32(0xe3a00000|rd_rn_imm_shift(HOST_TEMPREG,0,imm>>8,8));
631 assem_debug("add r14,r14,#%d\n",imm&0xFF);
632 output_w32(0xe2800000|rd_rn_imm_shift(HOST_TEMPREG,HOST_TEMPREG,imm&0xff,0));
633 #else
634 emit_movw(imm,HOST_TEMPREG);
635 #endif
636 assem_debug("and %s,%s,r14\n",regname[rt],regname[rs]);
637 output_w32(0xe0000000|rd_rn_rm(rt,rs,HOST_TEMPREG));
638 }
639}
640
641static void emit_orimm(int rs,int imm,int rt)
642{
643 u_int armval;
644 if(imm==0) {
645 if(rs!=rt) emit_mov(rs,rt);
646 }else if(genimm(imm,&armval)) {
647 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm);
648 output_w32(0xe3800000|rd_rn_rm(rt,rs,0)|armval);
649 }else{
650 assert(imm>0&&imm<65536);
651 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
652 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
653 output_w32(0xe3800000|rd_rn_imm_shift(rt,rs,imm>>8,8));
654 output_w32(0xe3800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
655 }
656}
657
658static void emit_xorimm(int rs,int imm,int rt)
659{
660 u_int armval;
661 if(imm==0) {
662 if(rs!=rt) emit_mov(rs,rt);
663 }else if(genimm(imm,&armval)) {
664 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm);
665 output_w32(0xe2200000|rd_rn_rm(rt,rs,0)|armval);
666 }else{
667 assert(imm>0&&imm<65536);
668 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
669 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
670 output_w32(0xe2200000|rd_rn_imm_shift(rt,rs,imm>>8,8));
671 output_w32(0xe2200000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
672 }
673}
674
675static void emit_shlimm(int rs,u_int imm,int rt)
676{
677 assert(imm>0);
678 assert(imm<32);
679 //if(imm==1) ...
680 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
681 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
682}
683
684static void emit_lsls_imm(int rs,int imm,int rt)
685{
686 assert(imm>0);
687 assert(imm<32);
688 assem_debug("lsls %s,%s,#%d\n",regname[rt],regname[rs],imm);
689 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs)|(imm<<7));
690}
691
692static unused void emit_lslpls_imm(int rs,int imm,int rt)
693{
694 assert(imm>0);
695 assert(imm<32);
696 assem_debug("lslpls %s,%s,#%d\n",regname[rt],regname[rs],imm);
697 output_w32(0x51b00000|rd_rn_rm(rt,0,rs)|(imm<<7));
698}
699
700static void emit_shrimm(int rs,u_int imm,int rt)
701{
702 assert(imm>0);
703 assert(imm<32);
704 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
705 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
706}
707
708static void emit_sarimm(int rs,u_int imm,int rt)
709{
710 assert(imm>0);
711 assert(imm<32);
712 assem_debug("asr %s,%s,#%d\n",regname[rt],regname[rs],imm);
713 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x40|(imm<<7));
714}
715
716static void emit_rorimm(int rs,u_int imm,int rt)
717{
718 assert(imm>0);
719 assert(imm<32);
720 assem_debug("ror %s,%s,#%d\n",regname[rt],regname[rs],imm);
721 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x60|(imm<<7));
722}
723
724static void emit_signextend16(int rs,int rt)
725{
726 #ifndef HAVE_ARMV6
727 emit_shlimm(rs,16,rt);
728 emit_sarimm(rt,16,rt);
729 #else
730 assem_debug("sxth %s,%s\n",regname[rt],regname[rs]);
731 output_w32(0xe6bf0070|rd_rn_rm(rt,0,rs));
732 #endif
733}
734
735static void emit_signextend8(int rs,int rt)
736{
737 #ifndef HAVE_ARMV6
738 emit_shlimm(rs,24,rt);
739 emit_sarimm(rt,24,rt);
740 #else
741 assem_debug("sxtb %s,%s\n",regname[rt],regname[rs]);
742 output_w32(0xe6af0070|rd_rn_rm(rt,0,rs));
743 #endif
744}
745
746static void emit_shl(u_int rs,u_int shift,u_int rt)
747{
748 assert(rs<16);
749 assert(rt<16);
750 assert(shift<16);
751 //if(imm==1) ...
752 assem_debug("lsl %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
753 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x10|(shift<<8));
754}
755
756static void emit_shr(u_int rs,u_int shift,u_int rt)
757{
758 assert(rs<16);
759 assert(rt<16);
760 assert(shift<16);
761 assem_debug("lsr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
762 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x30|(shift<<8));
763}
764
765static void emit_sar(u_int rs,u_int shift,u_int rt)
766{
767 assert(rs<16);
768 assert(rt<16);
769 assert(shift<16);
770 assem_debug("asr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
771 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x50|(shift<<8));
772}
773
774static unused void emit_orrshl(u_int rs,u_int shift,u_int rt)
775{
776 assert(rs<16);
777 assert(rt<16);
778 assert(shift<16);
779 assem_debug("orr %s,%s,%s,lsl %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
780 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x10|(shift<<8));
781}
782
783static unused void emit_orrshr(u_int rs,u_int shift,u_int rt)
784{
785 assert(rs<16);
786 assert(rt<16);
787 assert(shift<16);
788 assem_debug("orr %s,%s,%s,lsr %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
789 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x30|(shift<<8));
790}
791
792static void emit_cmpimm(int rs,int imm)
793{
794 u_int armval;
795 if(genimm(imm,&armval)) {
796 assem_debug("cmp %s,#%d\n",regname[rs],imm);
797 output_w32(0xe3500000|rd_rn_rm(0,rs,0)|armval);
798 }else if(genimm(-imm,&armval)) {
799 assem_debug("cmn %s,#%d\n",regname[rs],imm);
800 output_w32(0xe3700000|rd_rn_rm(0,rs,0)|armval);
801 }else if(imm>0) {
802 assert(imm<65536);
803 emit_movimm(imm,HOST_TEMPREG);
804 assem_debug("cmp %s,r14\n",regname[rs]);
805 output_w32(0xe1500000|rd_rn_rm(0,rs,HOST_TEMPREG));
806 }else{
807 assert(imm>-65536);
808 emit_movimm(-imm,HOST_TEMPREG);
809 assem_debug("cmn %s,r14\n",regname[rs]);
810 output_w32(0xe1700000|rd_rn_rm(0,rs,HOST_TEMPREG));
811 }
812}
813
814static void emit_cmovne_imm(int imm,int rt)
815{
816 assem_debug("movne %s,#%d\n",regname[rt],imm);
817 u_int armval;
818 genimm_checked(imm,&armval);
819 output_w32(0x13a00000|rd_rn_rm(rt,0,0)|armval);
820}
821
822static void emit_cmovl_imm(int imm,int rt)
823{
824 assem_debug("movlt %s,#%d\n",regname[rt],imm);
825 u_int armval;
826 genimm_checked(imm,&armval);
827 output_w32(0xb3a00000|rd_rn_rm(rt,0,0)|armval);
828}
829
830static void emit_cmovb_imm(int imm,int rt)
831{
832 assem_debug("movcc %s,#%d\n",regname[rt],imm);
833 u_int armval;
834 genimm_checked(imm,&armval);
835 output_w32(0x33a00000|rd_rn_rm(rt,0,0)|armval);
836}
837
838static void emit_cmovae_imm(int imm,int rt)
839{
840 assem_debug("movcs %s,#%d\n",regname[rt],imm);
841 u_int armval;
842 genimm_checked(imm,&armval);
843 output_w32(0x23a00000|rd_rn_rm(rt,0,0)|armval);
844}
845
846static void emit_cmovs_imm(int imm,int rt)
847{
848 assem_debug("movmi %s,#%d\n",regname[rt],imm);
849 u_int armval;
850 genimm_checked(imm,&armval);
851 output_w32(0x43a00000|rd_rn_rm(rt,0,0)|armval);
852}
853
854static void emit_cmovne_reg(int rs,int rt)
855{
856 assem_debug("movne %s,%s\n",regname[rt],regname[rs]);
857 output_w32(0x11a00000|rd_rn_rm(rt,0,rs));
858}
859
860static void emit_cmovl_reg(int rs,int rt)
861{
862 assem_debug("movlt %s,%s\n",regname[rt],regname[rs]);
863 output_w32(0xb1a00000|rd_rn_rm(rt,0,rs));
864}
865
866static void emit_cmovb_reg(int rs,int rt)
867{
868 assem_debug("movcc %s,%s\n",regname[rt],regname[rs]);
869 output_w32(0x31a00000|rd_rn_rm(rt,0,rs));
870}
871
872static void emit_cmovs_reg(int rs,int rt)
873{
874 assem_debug("movmi %s,%s\n",regname[rt],regname[rs]);
875 output_w32(0x41a00000|rd_rn_rm(rt,0,rs));
876}
877
878static void emit_slti32(int rs,int imm,int rt)
879{
880 if(rs!=rt) emit_zeroreg(rt);
881 emit_cmpimm(rs,imm);
882 if(rs==rt) emit_movimm(0,rt);
883 emit_cmovl_imm(1,rt);
884}
885
886static void emit_sltiu32(int rs,int imm,int rt)
887{
888 if(rs!=rt) emit_zeroreg(rt);
889 emit_cmpimm(rs,imm);
890 if(rs==rt) emit_movimm(0,rt);
891 emit_cmovb_imm(1,rt);
892}
893
894static void emit_cmp(int rs,int rt)
895{
896 assem_debug("cmp %s,%s\n",regname[rs],regname[rt]);
897 output_w32(0xe1500000|rd_rn_rm(0,rs,rt));
898}
899
900static void emit_cmpcs(int rs,int rt)
901{
902 assem_debug("cmpcs %s,%s\n",regname[rs],regname[rt]);
903 output_w32(0x21500000|rd_rn_rm(0,rs,rt));
904}
905
906static void emit_set_gz32(int rs, int rt)
907{
908 //assem_debug("set_gz32\n");
909 emit_cmpimm(rs,1);
910 emit_movimm(1,rt);
911 emit_cmovl_imm(0,rt);
912}
913
914static void emit_set_nz32(int rs, int rt)
915{
916 //assem_debug("set_nz32\n");
917 if(rs!=rt) emit_movs(rs,rt);
918 else emit_test(rs,rs);
919 emit_cmovne_imm(1,rt);
920}
921
922static void emit_set_if_less32(int rs1, int rs2, int rt)
923{
924 //assem_debug("set if less (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
925 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
926 emit_cmp(rs1,rs2);
927 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
928 emit_cmovl_imm(1,rt);
929}
930
931static void emit_set_if_carry32(int rs1, int rs2, int rt)
932{
933 //assem_debug("set if carry (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
934 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
935 emit_cmp(rs1,rs2);
936 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
937 emit_cmovb_imm(1,rt);
938}
939
940static int can_jump_or_call(const void *a)
941{
942 intptr_t offset = (u_char *)a - out - 8;
943 return (-33554432 <= offset && offset < 33554432);
944}
945
946static void emit_call(const void *a_)
947{
948 int a = (int)a_;
949 assem_debug("bl %x (%x+%x)%s\n",a,(int)out,a-(int)out-8,func_name(a_));
950 u_int offset=genjmp(a);
951 output_w32(0xeb000000|offset);
952}
953
954static void emit_jmp(const void *a_)
955{
956 int a = (int)a_;
957 assem_debug("b %x (%x+%x)%s\n",a,(int)out,a-(int)out-8,func_name(a_));
958 u_int offset=genjmp(a);
959 output_w32(0xea000000|offset);
960}
961
962static void emit_jne(const void *a_)
963{
964 int a = (int)a_;
965 assem_debug("bne %x\n",a);
966 u_int offset=genjmp(a);
967 output_w32(0x1a000000|offset);
968}
969
970static void emit_jeq(const void *a_)
971{
972 int a = (int)a_;
973 assem_debug("beq %x\n",a);
974 u_int offset=genjmp(a);
975 output_w32(0x0a000000|offset);
976}
977
978static void emit_js(const void *a_)
979{
980 int a = (int)a_;
981 assem_debug("bmi %x\n",a);
982 u_int offset=genjmp(a);
983 output_w32(0x4a000000|offset);
984}
985
986static void emit_jns(const void *a_)
987{
988 int a = (int)a_;
989 assem_debug("bpl %x\n",a);
990 u_int offset=genjmp(a);
991 output_w32(0x5a000000|offset);
992}
993
994static void emit_jl(const void *a_)
995{
996 int a = (int)a_;
997 assem_debug("blt %x\n",a);
998 u_int offset=genjmp(a);
999 output_w32(0xba000000|offset);
1000}
1001
1002static void emit_jge(const void *a_)
1003{
1004 int a = (int)a_;
1005 assem_debug("bge %x\n",a);
1006 u_int offset=genjmp(a);
1007 output_w32(0xaa000000|offset);
1008}
1009
1010static void emit_jno(const void *a_)
1011{
1012 int a = (int)a_;
1013 assem_debug("bvc %x\n",a);
1014 u_int offset=genjmp(a);
1015 output_w32(0x7a000000|offset);
1016}
1017
1018static void emit_jc(const void *a_)
1019{
1020 int a = (int)a_;
1021 assem_debug("bcs %x\n",a);
1022 u_int offset=genjmp(a);
1023 output_w32(0x2a000000|offset);
1024}
1025
1026static void emit_jcc(const void *a_)
1027{
1028 int a = (int)a_;
1029 assem_debug("bcc %x\n",a);
1030 u_int offset=genjmp(a);
1031 output_w32(0x3a000000|offset);
1032}
1033
1034static void *emit_cbz(int rs, const void *a)
1035{
1036 void *ret;
1037 emit_test(rs, rs);
1038 ret = out;
1039 emit_jeq(a);
1040 return ret;
1041}
1042
1043static unused void emit_callreg(u_int r)
1044{
1045 assert(r<15);
1046 assem_debug("blx %s\n",regname[r]);
1047 output_w32(0xe12fff30|r);
1048}
1049
1050static void emit_jmpreg(u_int r)
1051{
1052 assem_debug("mov pc,%s\n",regname[r]);
1053 output_w32(0xe1a00000|rd_rn_rm(15,0,r));
1054}
1055
1056static void emit_ret(void)
1057{
1058 emit_jmpreg(14);
1059}
1060
1061static void emit_readword_indexed(int offset, int rs, int rt)
1062{
1063 assert(offset>-4096&&offset<4096);
1064 assem_debug("ldr %s,%s+%d\n",regname[rt],regname[rs],offset);
1065 if(offset>=0) {
1066 output_w32(0xe5900000|rd_rn_rm(rt,rs,0)|offset);
1067 }else{
1068 output_w32(0xe5100000|rd_rn_rm(rt,rs,0)|(-offset));
1069 }
1070}
1071
1072static void emit_readword_dualindexedx4(int rs1, int rs2, int rt)
1073{
1074 assem_debug("ldr %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1075 output_w32(0xe7900000|rd_rn_rm(rt,rs1,rs2)|0x100);
1076}
1077#define emit_readptr_dualindexedx_ptrlen emit_readword_dualindexedx4
1078
1079static void emit_ldr_dualindexed(int rs1, int rs2, int rt)
1080{
1081 assem_debug("ldr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1082 output_w32(0xe7900000|rd_rn_rm(rt,rs1,rs2));
1083}
1084
1085static void emit_ldrcc_dualindexed(int rs1, int rs2, int rt)
1086{
1087 assem_debug("ldrcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1088 output_w32(0x37900000|rd_rn_rm(rt,rs1,rs2));
1089}
1090
1091static void emit_ldrb_dualindexed(int rs1, int rs2, int rt)
1092{
1093 assem_debug("ldrb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1094 output_w32(0xe7d00000|rd_rn_rm(rt,rs1,rs2));
1095}
1096
1097static void emit_ldrccb_dualindexed(int rs1, int rs2, int rt)
1098{
1099 assem_debug("ldrccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1100 output_w32(0x37d00000|rd_rn_rm(rt,rs1,rs2));
1101}
1102
1103static void emit_ldrsb_dualindexed(int rs1, int rs2, int rt)
1104{
1105 assem_debug("ldrsb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1106 output_w32(0xe19000d0|rd_rn_rm(rt,rs1,rs2));
1107}
1108
1109static void emit_ldrccsb_dualindexed(int rs1, int rs2, int rt)
1110{
1111 assem_debug("ldrccsb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1112 output_w32(0x319000d0|rd_rn_rm(rt,rs1,rs2));
1113}
1114
1115static void emit_ldrh_dualindexed(int rs1, int rs2, int rt)
1116{
1117 assem_debug("ldrh %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1118 output_w32(0xe19000b0|rd_rn_rm(rt,rs1,rs2));
1119}
1120
1121static void emit_ldrcch_dualindexed(int rs1, int rs2, int rt)
1122{
1123 assem_debug("ldrcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1124 output_w32(0x319000b0|rd_rn_rm(rt,rs1,rs2));
1125}
1126
1127static void emit_ldrsh_dualindexed(int rs1, int rs2, int rt)
1128{
1129 assem_debug("ldrsh %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1130 output_w32(0xe19000f0|rd_rn_rm(rt,rs1,rs2));
1131}
1132
1133static void emit_ldrccsh_dualindexed(int rs1, int rs2, int rt)
1134{
1135 assem_debug("ldrccsh %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1136 output_w32(0x319000f0|rd_rn_rm(rt,rs1,rs2));
1137}
1138
1139static void emit_str_dualindexed(int rs1, int rs2, int rt)
1140{
1141 assem_debug("str %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1142 output_w32(0xe7800000|rd_rn_rm(rt,rs1,rs2));
1143}
1144
1145static void emit_strb_dualindexed(int rs1, int rs2, int rt)
1146{
1147 assem_debug("strb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1148 output_w32(0xe7c00000|rd_rn_rm(rt,rs1,rs2));
1149}
1150
1151static void emit_strh_dualindexed(int rs1, int rs2, int rt)
1152{
1153 assem_debug("strh %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1154 output_w32(0xe18000b0|rd_rn_rm(rt,rs1,rs2));
1155}
1156
1157static void emit_movsbl_indexed(int offset, int rs, int rt)
1158{
1159 assert(offset>-256&&offset<256);
1160 assem_debug("ldrsb %s,%s+%d\n",regname[rt],regname[rs],offset);
1161 if(offset>=0) {
1162 output_w32(0xe1d000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1163 }else{
1164 output_w32(0xe15000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1165 }
1166}
1167
1168static void emit_movswl_indexed(int offset, int rs, int rt)
1169{
1170 assert(offset>-256&&offset<256);
1171 assem_debug("ldrsh %s,%s+%d\n",regname[rt],regname[rs],offset);
1172 if(offset>=0) {
1173 output_w32(0xe1d000f0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1174 }else{
1175 output_w32(0xe15000f0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1176 }
1177}
1178
1179static void emit_movzbl_indexed(int offset, int rs, int rt)
1180{
1181 assert(offset>-4096&&offset<4096);
1182 assem_debug("ldrb %s,%s+%d\n",regname[rt],regname[rs],offset);
1183 if(offset>=0) {
1184 output_w32(0xe5d00000|rd_rn_rm(rt,rs,0)|offset);
1185 }else{
1186 output_w32(0xe5500000|rd_rn_rm(rt,rs,0)|(-offset));
1187 }
1188}
1189
1190static void emit_movzwl_indexed(int offset, int rs, int rt)
1191{
1192 assert(offset>-256&&offset<256);
1193 assem_debug("ldrh %s,%s+%d\n",regname[rt],regname[rs],offset);
1194 if(offset>=0) {
1195 output_w32(0xe1d000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1196 }else{
1197 output_w32(0xe15000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1198 }
1199}
1200
1201static void emit_ldrd(int offset, int rs, int rt)
1202{
1203 assert(offset>-256&&offset<256);
1204 assem_debug("ldrd %s,%s+%d\n",regname[rt],regname[rs],offset);
1205 if(offset>=0) {
1206 output_w32(0xe1c000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1207 }else{
1208 output_w32(0xe14000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1209 }
1210}
1211
1212static void emit_readword(void *addr, int rt)
1213{
1214 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
1215 assert(offset<4096);
1216 assem_debug("ldr %s,fp+%d\n",regname[rt],offset);
1217 output_w32(0xe5900000|rd_rn_rm(rt,FP,0)|offset);
1218}
1219#define emit_readptr emit_readword
1220
1221static void emit_writeword_indexed(int rt, int offset, int rs)
1222{
1223 assert(offset>-4096&&offset<4096);
1224 assem_debug("str %s,%s+%d\n",regname[rt],regname[rs],offset);
1225 if(offset>=0) {
1226 output_w32(0xe5800000|rd_rn_rm(rt,rs,0)|offset);
1227 }else{
1228 output_w32(0xe5000000|rd_rn_rm(rt,rs,0)|(-offset));
1229 }
1230}
1231
1232static void emit_writehword_indexed(int rt, int offset, int rs)
1233{
1234 assert(offset>-256&&offset<256);
1235 assem_debug("strh %s,%s+%d\n",regname[rt],regname[rs],offset);
1236 if(offset>=0) {
1237 output_w32(0xe1c000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1238 }else{
1239 output_w32(0xe14000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1240 }
1241}
1242
1243static void emit_writebyte_indexed(int rt, int offset, int rs)
1244{
1245 assert(offset>-4096&&offset<4096);
1246 assem_debug("strb %s,%s+%d\n",regname[rt],regname[rs],offset);
1247 if(offset>=0) {
1248 output_w32(0xe5c00000|rd_rn_rm(rt,rs,0)|offset);
1249 }else{
1250 output_w32(0xe5400000|rd_rn_rm(rt,rs,0)|(-offset));
1251 }
1252}
1253
1254static void emit_strcc_dualindexed(int rs1, int rs2, int rt)
1255{
1256 assem_debug("strcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1257 output_w32(0x37800000|rd_rn_rm(rt,rs1,rs2));
1258}
1259
1260static void emit_strccb_dualindexed(int rs1, int rs2, int rt)
1261{
1262 assem_debug("strccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1263 output_w32(0x37c00000|rd_rn_rm(rt,rs1,rs2));
1264}
1265
1266static void emit_strcch_dualindexed(int rs1, int rs2, int rt)
1267{
1268 assem_debug("strcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1269 output_w32(0x318000b0|rd_rn_rm(rt,rs1,rs2));
1270}
1271
1272static void emit_writeword(int rt, void *addr)
1273{
1274 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
1275 assert(offset<4096);
1276 assem_debug("str %s,fp+%d\n",regname[rt],offset);
1277 output_w32(0xe5800000|rd_rn_rm(rt,FP,0)|offset);
1278}
1279
1280static void emit_umull(u_int rs1, u_int rs2, u_int hi, u_int lo)
1281{
1282 assem_debug("umull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
1283 assert(rs1<16);
1284 assert(rs2<16);
1285 assert(hi<16);
1286 assert(lo<16);
1287 output_w32(0xe0800090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
1288}
1289
1290static void emit_smull(u_int rs1, u_int rs2, u_int hi, u_int lo)
1291{
1292 assem_debug("smull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
1293 assert(rs1<16);
1294 assert(rs2<16);
1295 assert(hi<16);
1296 assert(lo<16);
1297 output_w32(0xe0c00090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
1298}
1299
1300static void emit_clz(int rs,int rt)
1301{
1302 assem_debug("clz %s,%s\n",regname[rt],regname[rs]);
1303 output_w32(0xe16f0f10|rd_rn_rm(rt,0,rs));
1304}
1305
1306static void emit_subcs(int rs1,int rs2,int rt)
1307{
1308 assem_debug("subcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1309 output_w32(0x20400000|rd_rn_rm(rt,rs1,rs2));
1310}
1311
1312static void emit_shrcc_imm(int rs,u_int imm,int rt)
1313{
1314 assert(imm>0);
1315 assert(imm<32);
1316 assem_debug("lsrcc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1317 output_w32(0x31a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
1318}
1319
1320static void emit_shrne_imm(int rs,u_int imm,int rt)
1321{
1322 assert(imm>0);
1323 assert(imm<32);
1324 assem_debug("lsrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
1325 output_w32(0x11a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
1326}
1327
1328static void emit_negmi(int rs, int rt)
1329{
1330 assem_debug("rsbmi %s,%s,#0\n",regname[rt],regname[rs]);
1331 output_w32(0x42600000|rd_rn_rm(rt,rs,0));
1332}
1333
1334static void emit_negsmi(int rs, int rt)
1335{
1336 assem_debug("rsbsmi %s,%s,#0\n",regname[rt],regname[rs]);
1337 output_w32(0x42700000|rd_rn_rm(rt,rs,0));
1338}
1339
1340static void emit_bic_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
1341{
1342 assem_debug("bic %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
1343 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
1344}
1345
1346static void emit_bic_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
1347{
1348 assem_debug("bic %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
1349 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
1350}
1351
1352static void emit_teq(int rs, int rt)
1353{
1354 assem_debug("teq %s,%s\n",regname[rs],regname[rt]);
1355 output_w32(0xe1300000|rd_rn_rm(0,rs,rt));
1356}
1357
1358static unused void emit_rsbimm(int rs, int imm, int rt)
1359{
1360 u_int armval;
1361 genimm_checked(imm,&armval);
1362 assem_debug("rsb %s,%s,#%d\n",regname[rt],regname[rs],imm);
1363 output_w32(0xe2600000|rd_rn_rm(rt,rs,0)|armval);
1364}
1365
1366// Conditionally select one of two immediates, optimizing for small code size
1367// This will only be called if HAVE_CMOV_IMM is defined
1368static void emit_cmov2imm_e_ne_compact(int imm1,int imm2,u_int rt)
1369{
1370 u_int armval;
1371 if(genimm(imm2-imm1,&armval)) {
1372 emit_movimm(imm1,rt);
1373 assem_debug("addne %s,%s,#%d\n",regname[rt],regname[rt],imm2-imm1);
1374 output_w32(0x12800000|rd_rn_rm(rt,rt,0)|armval);
1375 }else if(genimm(imm1-imm2,&armval)) {
1376 emit_movimm(imm1,rt);
1377 assem_debug("subne %s,%s,#%d\n",regname[rt],regname[rt],imm1-imm2);
1378 output_w32(0x12400000|rd_rn_rm(rt,rt,0)|armval);
1379 }
1380 else {
1381 #ifndef HAVE_ARMV7
1382 emit_movimm(imm1,rt);
1383 add_literal((int)out,imm2);
1384 assem_debug("ldrne %s,pc+? [=%x]\n",regname[rt],imm2);
1385 output_w32(0x15900000|rd_rn_rm(rt,15,0));
1386 #else
1387 emit_movw(imm1&0x0000FFFF,rt);
1388 if((imm1&0xFFFF)!=(imm2&0xFFFF)) {
1389 assem_debug("movwne %s,#%d (0x%x)\n",regname[rt],imm2&0xFFFF,imm2&0xFFFF);
1390 output_w32(0x13000000|rd_rn_rm(rt,0,0)|(imm2&0xfff)|((imm2<<4)&0xf0000));
1391 }
1392 emit_movt(imm1&0xFFFF0000,rt);
1393 if((imm1&0xFFFF0000)!=(imm2&0xFFFF0000)) {
1394 assem_debug("movtne %s,#%d (0x%x)\n",regname[rt],imm2&0xffff0000,imm2&0xffff0000);
1395 output_w32(0x13400000|rd_rn_rm(rt,0,0)|((imm2>>16)&0xfff)|((imm2>>12)&0xf0000));
1396 }
1397 #endif
1398 }
1399}
1400
1401// special case for checking invalid_code
1402static void emit_ldrb_indexedsr12_reg(int base, int r, int rt)
1403{
1404 assem_debug("ldrb %s,%s,%s lsr #12\n",regname[rt],regname[base],regname[r]);
1405 output_w32(0xe7d00000|rd_rn_rm(rt,base,r)|0x620);
1406}
1407
1408static void emit_callne(int a)
1409{
1410 assem_debug("blne %x\n",a);
1411 u_int offset=genjmp(a);
1412 output_w32(0x1b000000|offset);
1413}
1414
1415// Used to preload hash table entries
1416static unused void emit_prefetchreg(int r)
1417{
1418 assem_debug("pld %s\n",regname[r]);
1419 output_w32(0xf5d0f000|rd_rn_rm(0,r,0));
1420}
1421
1422// Special case for mini_ht
1423static void emit_ldreq_indexed(int rs, u_int offset, int rt)
1424{
1425 assert(offset<4096);
1426 assem_debug("ldreq %s,[%s, #%d]\n",regname[rt],regname[rs],offset);
1427 output_w32(0x05900000|rd_rn_rm(rt,rs,0)|offset);
1428}
1429
1430static void emit_orrne_imm(int rs,int imm,int rt)
1431{
1432 u_int armval;
1433 genimm_checked(imm,&armval);
1434 assem_debug("orrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
1435 output_w32(0x13800000|rd_rn_rm(rt,rs,0)|armval);
1436}
1437
1438static unused void emit_addpl_imm(int rs,int imm,int rt)
1439{
1440 u_int armval;
1441 genimm_checked(imm,&armval);
1442 assem_debug("addpl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1443 output_w32(0x52800000|rd_rn_rm(rt,rs,0)|armval);
1444}
1445
1446static void emit_jno_unlikely(int a)
1447{
1448 //emit_jno(a);
1449 assem_debug("addvc pc,pc,#? (%x)\n",/*a-(int)out-8,*/a);
1450 output_w32(0x72800000|rd_rn_rm(15,15,0));
1451}
1452
1453static void save_regs_all(u_int reglist)
1454{
1455 int i;
1456 if(!reglist) return;
1457 assem_debug("stmia fp,{");
1458 for(i=0;i<16;i++)
1459 if(reglist&(1<<i))
1460 assem_debug("r%d,",i);
1461 assem_debug("}\n");
1462 output_w32(0xe88b0000|reglist);
1463}
1464
1465static void restore_regs_all(u_int reglist)
1466{
1467 int i;
1468 if(!reglist) return;
1469 assem_debug("ldmia fp,{");
1470 for(i=0;i<16;i++)
1471 if(reglist&(1<<i))
1472 assem_debug("r%d,",i);
1473 assem_debug("}\n");
1474 output_w32(0xe89b0000|reglist);
1475}
1476
1477// Save registers before function call
1478static void save_regs(u_int reglist)
1479{
1480 reglist&=CALLER_SAVE_REGS; // only save the caller-save registers, r0-r3, r12
1481 save_regs_all(reglist);
1482}
1483
1484// Restore registers after function call
1485static void restore_regs(u_int reglist)
1486{
1487 reglist&=CALLER_SAVE_REGS;
1488 restore_regs_all(reglist);
1489}
1490
1491/* Stubs/epilogue */
1492
1493static void literal_pool(int n)
1494{
1495 if(!literalcount) return;
1496 if(n) {
1497 if((int)out-literals[0][0]<4096-n) return;
1498 }
1499 u_int *ptr;
1500 int i;
1501 for(i=0;i<literalcount;i++)
1502 {
1503 u_int l_addr=(u_int)out;
1504 int j;
1505 for(j=0;j<i;j++) {
1506 if(literals[j][1]==literals[i][1]) {
1507 //printf("dup %08x\n",literals[i][1]);
1508 l_addr=literals[j][0];
1509 break;
1510 }
1511 }
1512 ptr=(u_int *)literals[i][0];
1513 u_int offset=l_addr-(u_int)ptr-8;
1514 assert(offset<4096);
1515 assert(!(offset&3));
1516 *ptr|=offset;
1517 if(l_addr==(u_int)out) {
1518 literals[i][0]=l_addr; // remember for dupes
1519 output_w32(literals[i][1]);
1520 }
1521 }
1522 literalcount=0;
1523}
1524
1525static void literal_pool_jumpover(int n)
1526{
1527 if(!literalcount) return;
1528 if(n) {
1529 if((int)out-literals[0][0]<4096-n) return;
1530 }
1531 void *jaddr = out;
1532 emit_jmp(0);
1533 literal_pool(0);
1534 set_jump_target(jaddr, out);
1535}
1536
1537// parsed by get_pointer, find_extjump_insn
1538static void emit_extjump(u_char *addr, u_int target)
1539{
1540 u_char *ptr=(u_char *)addr;
1541 assert((ptr[3]&0x0e)==0xa);
1542 (void)ptr;
1543
1544 emit_loadlp(target,0);
1545 emit_loadlp((u_int)addr,1);
1546 assert(ndrc->translation_cache <= addr &&
1547 addr < ndrc->translation_cache + sizeof(ndrc->translation_cache));
1548 emit_far_jump(dyna_linker);
1549}
1550
1551static void check_extjump2(void *src)
1552{
1553 u_int *ptr = src;
1554 assert((ptr[1] & 0x0fff0000) == 0x059f0000); // ldr rx, [pc, #ofs]
1555 (void)ptr;
1556}
1557
1558// put rt_val into rt, potentially making use of rs with value rs_val
1559static void emit_movimm_from(u_int rs_val,int rs,u_int rt_val,int rt)
1560{
1561 u_int armval;
1562 int diff;
1563 if(genimm(rt_val,&armval)) {
1564 assem_debug("mov %s,#%d\n",regname[rt],rt_val);
1565 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
1566 return;
1567 }
1568 if(genimm(~rt_val,&armval)) {
1569 assem_debug("mvn %s,#%d\n",regname[rt],rt_val);
1570 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
1571 return;
1572 }
1573 diff=rt_val-rs_val;
1574 if(genimm(diff,&armval)) {
1575 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],diff);
1576 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
1577 return;
1578 }else if(genimm(-diff,&armval)) {
1579 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],-diff);
1580 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
1581 return;
1582 }
1583 emit_movimm(rt_val,rt);
1584}
1585
1586// return 1 if above function can do it's job cheaply
1587static int is_similar_value(u_int v1,u_int v2)
1588{
1589 u_int xs;
1590 int diff;
1591 if(v1==v2) return 1;
1592 diff=v2-v1;
1593 for(xs=diff;xs!=0&&(xs&3)==0;xs>>=2)
1594 ;
1595 if(xs<0x100) return 1;
1596 for(xs=-diff;xs!=0&&(xs&3)==0;xs>>=2)
1597 ;
1598 if(xs<0x100) return 1;
1599 return 0;
1600}
1601
1602static void mov_loadtype_adj(enum stub_type type,int rs,int rt)
1603{
1604 switch(type) {
1605 case LOADB_STUB: emit_signextend8(rs,rt); break;
1606 case LOADBU_STUB: emit_andimm(rs,0xff,rt); break;
1607 case LOADH_STUB: emit_signextend16(rs,rt); break;
1608 case LOADHU_STUB: emit_andimm(rs,0xffff,rt); break;
1609 case LOADW_STUB: if(rs!=rt) emit_mov(rs,rt); break;
1610 default: assert(0);
1611 }
1612}
1613
1614#include "pcsxmem.h"
1615#include "pcsxmem_inline.c"
1616
1617static void do_readstub(int n)
1618{
1619 assem_debug("do_readstub %x\n",start+stubs[n].a*4);
1620 literal_pool(256);
1621 set_jump_target(stubs[n].addr, out);
1622 enum stub_type type=stubs[n].type;
1623 int i=stubs[n].a;
1624 int rs=stubs[n].b;
1625 const struct regstat *i_regs=(struct regstat *)stubs[n].c;
1626 u_int reglist=stubs[n].e;
1627 const signed char *i_regmap=i_regs->regmap;
1628 int rt;
1629 if(dops[i].itype==C1LS||dops[i].itype==C2LS||dops[i].itype==LOADLR) {
1630 rt=get_reg(i_regmap,FTEMP);
1631 }else{
1632 rt=get_reg(i_regmap,dops[i].rt1);
1633 }
1634 assert(rs>=0);
1635 int r,temp=-1,temp2=HOST_TEMPREG,regs_saved=0;
1636 void *restore_jump = NULL;
1637 reglist|=(1<<rs);
1638 for(r=0;r<=12;r++) {
1639 if(((1<<r)&0x13ff)&&((1<<r)&reglist)==0) {
1640 temp=r; break;
1641 }
1642 }
1643 if(rt>=0&&dops[i].rt1!=0)
1644 reglist&=~(1<<rt);
1645 if(temp==-1) {
1646 save_regs(reglist);
1647 regs_saved=1;
1648 temp=(rs==0)?2:0;
1649 }
1650 if((regs_saved||(reglist&2)==0)&&temp!=1&&rs!=1)
1651 temp2=1;
1652 emit_readword(&mem_rtab,temp);
1653 emit_shrimm(rs,12,temp2);
1654 emit_readword_dualindexedx4(temp,temp2,temp2);
1655 emit_lsls_imm(temp2,1,temp2);
1656 if(dops[i].itype==C1LS||dops[i].itype==C2LS||(rt>=0&&dops[i].rt1!=0)) {
1657 switch(type) {
1658 case LOADB_STUB: emit_ldrccsb_dualindexed(temp2,rs,rt); break;
1659 case LOADBU_STUB: emit_ldrccb_dualindexed(temp2,rs,rt); break;
1660 case LOADH_STUB: emit_ldrccsh_dualindexed(temp2,rs,rt); break;
1661 case LOADHU_STUB: emit_ldrcch_dualindexed(temp2,rs,rt); break;
1662 case LOADW_STUB: emit_ldrcc_dualindexed(temp2,rs,rt); break;
1663 default: assert(0);
1664 }
1665 }
1666 if(regs_saved) {
1667 restore_jump=out;
1668 emit_jcc(0); // jump to reg restore
1669 }
1670 else
1671 emit_jcc(stubs[n].retaddr); // return address
1672
1673 if(!regs_saved)
1674 save_regs(reglist);
1675 void *handler=NULL;
1676 if(type==LOADB_STUB||type==LOADBU_STUB)
1677 handler=jump_handler_read8;
1678 if(type==LOADH_STUB||type==LOADHU_STUB)
1679 handler=jump_handler_read16;
1680 if(type==LOADW_STUB)
1681 handler=jump_handler_read32;
1682 assert(handler);
1683 pass_args(rs,temp2);
1684 int cc=get_reg(i_regmap,CCREG);
1685 if(cc<0)
1686 emit_loadreg(CCREG,2);
1687 emit_addimm(cc<0?2:cc,(int)stubs[n].d,2);
1688 emit_far_call(handler);
1689 if(dops[i].itype==C1LS||dops[i].itype==C2LS||(rt>=0&&dops[i].rt1!=0)) {
1690 mov_loadtype_adj(type,0,rt);
1691 }
1692 if(restore_jump)
1693 set_jump_target(restore_jump, out);
1694 restore_regs(reglist);
1695 emit_jmp(stubs[n].retaddr); // return address
1696}
1697
1698static void inline_readstub(enum stub_type type, int i, u_int addr,
1699 const signed char regmap[], int target, int adj, u_int reglist)
1700{
1701 int rs=get_reg(regmap,target);
1702 int rt=get_reg(regmap,target);
1703 if(rs<0) rs=get_reg_temp(regmap);
1704 assert(rs>=0);
1705 u_int is_dynamic;
1706 uintptr_t host_addr = 0;
1707 void *handler;
1708 int cc=get_reg(regmap,CCREG);
1709 if(pcsx_direct_read(type,addr,adj,cc,target?rs:-1,rt))
1710 return;
1711 handler = get_direct_memhandler(mem_rtab, addr, type, &host_addr);
1712 if (handler == NULL) {
1713 if(rt<0||dops[i].rt1==0)
1714 return;
1715 if(addr!=host_addr)
1716 emit_movimm_from(addr,rs,host_addr,rs);
1717 switch(type) {
1718 case LOADB_STUB: emit_movsbl_indexed(0,rs,rt); break;
1719 case LOADBU_STUB: emit_movzbl_indexed(0,rs,rt); break;
1720 case LOADH_STUB: emit_movswl_indexed(0,rs,rt); break;
1721 case LOADHU_STUB: emit_movzwl_indexed(0,rs,rt); break;
1722 case LOADW_STUB: emit_readword_indexed(0,rs,rt); break;
1723 default: assert(0);
1724 }
1725 return;
1726 }
1727 is_dynamic=pcsxmem_is_handler_dynamic(addr);
1728 if(is_dynamic) {
1729 if(type==LOADB_STUB||type==LOADBU_STUB)
1730 handler=jump_handler_read8;
1731 if(type==LOADH_STUB||type==LOADHU_STUB)
1732 handler=jump_handler_read16;
1733 if(type==LOADW_STUB)
1734 handler=jump_handler_read32;
1735 }
1736
1737 // call a memhandler
1738 if(rt>=0&&dops[i].rt1!=0)
1739 reglist&=~(1<<rt);
1740 save_regs(reglist);
1741 if(target==0)
1742 emit_movimm(addr,0);
1743 else if(rs!=0)
1744 emit_mov(rs,0);
1745 if(cc<0)
1746 emit_loadreg(CCREG,2);
1747 if(is_dynamic) {
1748 emit_movimm(((u_int *)mem_rtab)[addr>>12]<<1,1);
1749 emit_addimm(cc<0?2:cc,adj,2);
1750 }
1751 else {
1752 emit_readword(&last_count,3);
1753 emit_addimm(cc<0?2:cc,adj,2);
1754 emit_add(2,3,2);
1755 emit_writeword(2,&Count);
1756 }
1757
1758 emit_far_call(handler);
1759
1760 if(rt>=0&&dops[i].rt1!=0) {
1761 switch(type) {
1762 case LOADB_STUB: emit_signextend8(0,rt); break;
1763 case LOADBU_STUB: emit_andimm(0,0xff,rt); break;
1764 case LOADH_STUB: emit_signextend16(0,rt); break;
1765 case LOADHU_STUB: emit_andimm(0,0xffff,rt); break;
1766 case LOADW_STUB: if(rt!=0) emit_mov(0,rt); break;
1767 default: assert(0);
1768 }
1769 }
1770 restore_regs(reglist);
1771}
1772
1773static void do_writestub(int n)
1774{
1775 assem_debug("do_writestub %x\n",start+stubs[n].a*4);
1776 literal_pool(256);
1777 set_jump_target(stubs[n].addr, out);
1778 enum stub_type type=stubs[n].type;
1779 int i=stubs[n].a;
1780 int rs=stubs[n].b;
1781 const struct regstat *i_regs=(struct regstat *)stubs[n].c;
1782 u_int reglist=stubs[n].e;
1783 const signed char *i_regmap=i_regs->regmap;
1784 int rt,r;
1785 if(dops[i].itype==C1LS||dops[i].itype==C2LS) {
1786 rt=get_reg(i_regmap,r=FTEMP);
1787 }else{
1788 rt=get_reg(i_regmap,r=dops[i].rs2);
1789 }
1790 assert(rs>=0);
1791 assert(rt>=0);
1792 int rtmp,temp=-1,temp2=HOST_TEMPREG,regs_saved=0;
1793 void *restore_jump = NULL;
1794 int reglist2=reglist|(1<<rs)|(1<<rt);
1795 for(rtmp=0;rtmp<=12;rtmp++) {
1796 if(((1<<rtmp)&0x13ff)&&((1<<rtmp)&reglist2)==0) {
1797 temp=rtmp; break;
1798 }
1799 }
1800 if(temp==-1) {
1801 save_regs(reglist);
1802 regs_saved=1;
1803 for(rtmp=0;rtmp<=3;rtmp++)
1804 if(rtmp!=rs&&rtmp!=rt)
1805 {temp=rtmp;break;}
1806 }
1807 if((regs_saved||(reglist2&8)==0)&&temp!=3&&rs!=3&&rt!=3)
1808 temp2=3;
1809 emit_readword(&mem_wtab,temp);
1810 emit_shrimm(rs,12,temp2);
1811 emit_readword_dualindexedx4(temp,temp2,temp2);
1812 emit_lsls_imm(temp2,1,temp2);
1813 switch(type) {
1814 case STOREB_STUB: emit_strccb_dualindexed(temp2,rs,rt); break;
1815 case STOREH_STUB: emit_strcch_dualindexed(temp2,rs,rt); break;
1816 case STOREW_STUB: emit_strcc_dualindexed(temp2,rs,rt); break;
1817 default: assert(0);
1818 }
1819 if(regs_saved) {
1820 restore_jump=out;
1821 emit_jcc(0); // jump to reg restore
1822 }
1823 else
1824 emit_jcc(stubs[n].retaddr); // return address (invcode check)
1825
1826 if(!regs_saved)
1827 save_regs(reglist);
1828 void *handler=NULL;
1829 switch(type) {
1830 case STOREB_STUB: handler=jump_handler_write8; break;
1831 case STOREH_STUB: handler=jump_handler_write16; break;
1832 case STOREW_STUB: handler=jump_handler_write32; break;
1833 default: assert(0);
1834 }
1835 assert(handler);
1836 pass_args(rs,rt);
1837 if(temp2!=3)
1838 emit_mov(temp2,3);
1839 int cc=get_reg(i_regmap,CCREG);
1840 if(cc<0)
1841 emit_loadreg(CCREG,2);
1842 emit_addimm(cc<0?2:cc,(int)stubs[n].d,2);
1843 // returns new cycle_count
1844 emit_far_call(handler);
1845 emit_addimm(0,-(int)stubs[n].d,cc<0?2:cc);
1846 if(cc<0)
1847 emit_storereg(CCREG,2);
1848 if(restore_jump)
1849 set_jump_target(restore_jump, out);
1850 restore_regs(reglist);
1851 emit_jmp(stubs[n].retaddr);
1852}
1853
1854static void inline_writestub(enum stub_type type, int i, u_int addr,
1855 const signed char regmap[], int target, int adj, u_int reglist)
1856{
1857 int rs=get_reg_temp(regmap);
1858 int rt=get_reg(regmap,target);
1859 assert(rs>=0);
1860 assert(rt>=0);
1861 uintptr_t host_addr = 0;
1862 void *handler = get_direct_memhandler(mem_wtab, addr, type, &host_addr);
1863 if (handler == NULL) {
1864 if(addr!=host_addr)
1865 emit_movimm_from(addr,rs,host_addr,rs);
1866 switch(type) {
1867 case STOREB_STUB: emit_writebyte_indexed(rt,0,rs); break;
1868 case STOREH_STUB: emit_writehword_indexed(rt,0,rs); break;
1869 case STOREW_STUB: emit_writeword_indexed(rt,0,rs); break;
1870 default: assert(0);
1871 }
1872 return;
1873 }
1874
1875 // call a memhandler
1876 save_regs(reglist);
1877 pass_args(rs,rt);
1878 int cc=get_reg(regmap,CCREG);
1879 if(cc<0)
1880 emit_loadreg(CCREG,2);
1881 emit_addimm(cc<0?2:cc,adj,2);
1882 emit_movimm((u_int)handler,3);
1883 // returns new cycle_count
1884 emit_far_call(jump_handler_write_h);
1885 emit_addimm(0,-adj,cc<0?2:cc);
1886 if(cc<0)
1887 emit_storereg(CCREG,2);
1888 restore_regs(reglist);
1889}
1890
1891/* Special assem */
1892
1893static void c2op_prologue(u_int op, int i, const struct regstat *i_regs, u_int reglist)
1894{
1895 save_regs_all(reglist);
1896 cop2_do_stall_check(op, i, i_regs, 0);
1897#ifdef PCNT
1898 emit_movimm(op, 0);
1899 emit_far_call(pcnt_gte_start);
1900#endif
1901 emit_addimm(FP, (u_char *)&psxRegs.CP2D.r[0] - (u_char *)&dynarec_local, 0); // cop2 regs
1902}
1903
1904static void c2op_epilogue(u_int op,u_int reglist)
1905{
1906#ifdef PCNT
1907 emit_movimm(op,0);
1908 emit_far_call(pcnt_gte_end);
1909#endif
1910 restore_regs_all(reglist);
1911}
1912
1913static void c2op_call_MACtoIR(int lm,int need_flags)
1914{
1915 if(need_flags)
1916 emit_far_call(lm?gteMACtoIR_lm1:gteMACtoIR_lm0);
1917 else
1918 emit_far_call(lm?gteMACtoIR_lm1_nf:gteMACtoIR_lm0_nf);
1919}
1920
1921static void c2op_call_rgb_func(void *func,int lm,int need_ir,int need_flags)
1922{
1923 emit_far_call(func);
1924 // func is C code and trashes r0
1925 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
1926 if(need_flags||need_ir)
1927 c2op_call_MACtoIR(lm,need_flags);
1928 emit_far_call(need_flags?gteMACtoRGB:gteMACtoRGB_nf);
1929}
1930
1931static void c2op_assemble(int i, const struct regstat *i_regs)
1932{
1933 u_int c2op = source[i] & 0x3f;
1934 u_int reglist_full = get_host_reglist(i_regs->regmap);
1935 u_int reglist = reglist_full & CALLER_SAVE_REGS;
1936 int need_flags, need_ir;
1937
1938 if (gte_handlers[c2op]!=NULL) {
1939 need_flags=!(gte_unneeded[i+1]>>63); // +1 because of how liveness detection works
1940 need_ir=(gte_unneeded[i+1]&0xe00)!=0xe00;
1941 assem_debug("gte op %08x, unneeded %016llx, need_flags %d, need_ir %d\n",
1942 source[i],gte_unneeded[i+1],need_flags,need_ir);
1943 if(HACK_ENABLED(NDHACK_GTE_NO_FLAGS))
1944 need_flags=0;
1945 int shift = (source[i] >> 19) & 1;
1946 int lm = (source[i] >> 10) & 1;
1947 switch(c2op) {
1948#ifndef DRC_DBG
1949 case GTE_MVMVA: {
1950#ifdef HAVE_ARMV5
1951 int v = (source[i] >> 15) & 3;
1952 int cv = (source[i] >> 13) & 3;
1953 int mx = (source[i] >> 17) & 3;
1954 reglist=reglist_full&(CALLER_SAVE_REGS|0xf0); // +{r4-r7}
1955 c2op_prologue(c2op,i,i_regs,reglist);
1956 /* r4,r5 = VXYZ(v) packed; r6 = &MX11(mx); r7 = &CV1(cv) */
1957 if(v<3)
1958 emit_ldrd(v*8,0,4);
1959 else {
1960 emit_movzwl_indexed(9*4,0,4); // gteIR
1961 emit_movzwl_indexed(10*4,0,6);
1962 emit_movzwl_indexed(11*4,0,5);
1963 emit_orrshl_imm(6,16,4);
1964 }
1965 if(mx<3)
1966 emit_addimm(0,32*4+mx*8*4,6);
1967 else
1968 emit_readword(&zeromem_ptr,6);
1969 if(cv<3)
1970 emit_addimm(0,32*4+(cv*8+5)*4,7);
1971 else
1972 emit_readword(&zeromem_ptr,7);
1973#ifdef __ARM_NEON__
1974 emit_movimm(source[i],1); // opcode
1975 emit_far_call(gteMVMVA_part_neon);
1976 if(need_flags) {
1977 emit_movimm(lm,1);
1978 emit_far_call(gteMACtoIR_flags_neon);
1979 }
1980#else
1981 if(cv==3&&shift)
1982 emit_far_call(gteMVMVA_part_cv3sh12_arm);
1983 else {
1984 emit_movimm(shift,1);
1985 emit_far_call(need_flags?gteMVMVA_part_arm:gteMVMVA_part_nf_arm);
1986 }
1987 if(need_flags||need_ir)
1988 c2op_call_MACtoIR(lm,need_flags);
1989#endif
1990#else /* if not HAVE_ARMV5 */
1991 c2op_prologue(c2op,i,i_regs,reglist);
1992 emit_movimm(source[i],1); // opcode
1993 emit_writeword(1,&psxRegs.code);
1994 emit_far_call(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]);
1995#endif
1996 break;
1997 }
1998 case GTE_OP:
1999 c2op_prologue(c2op,i,i_regs,reglist);
2000 emit_far_call(shift?gteOP_part_shift:gteOP_part_noshift);
2001 if(need_flags||need_ir) {
2002 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
2003 c2op_call_MACtoIR(lm,need_flags);
2004 }
2005 break;
2006 case GTE_DPCS:
2007 c2op_prologue(c2op,i,i_regs,reglist);
2008 c2op_call_rgb_func(shift?gteDPCS_part_shift:gteDPCS_part_noshift,lm,need_ir,need_flags);
2009 break;
2010 case GTE_INTPL:
2011 c2op_prologue(c2op,i,i_regs,reglist);
2012 c2op_call_rgb_func(shift?gteINTPL_part_shift:gteINTPL_part_noshift,lm,need_ir,need_flags);
2013 break;
2014 case GTE_SQR:
2015 c2op_prologue(c2op,i,i_regs,reglist);
2016 emit_far_call(shift?gteSQR_part_shift:gteSQR_part_noshift);
2017 if(need_flags||need_ir) {
2018 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
2019 c2op_call_MACtoIR(lm,need_flags);
2020 }
2021 break;
2022 case GTE_DCPL:
2023 c2op_prologue(c2op,i,i_regs,reglist);
2024 c2op_call_rgb_func(gteDCPL_part,lm,need_ir,need_flags);
2025 break;
2026 case GTE_GPF:
2027 c2op_prologue(c2op,i,i_regs,reglist);
2028 c2op_call_rgb_func(shift?gteGPF_part_shift:gteGPF_part_noshift,lm,need_ir,need_flags);
2029 break;
2030 case GTE_GPL:
2031 c2op_prologue(c2op,i,i_regs,reglist);
2032 c2op_call_rgb_func(shift?gteGPL_part_shift:gteGPL_part_noshift,lm,need_ir,need_flags);
2033 break;
2034#endif
2035 default:
2036 c2op_prologue(c2op,i,i_regs,reglist);
2037#ifdef DRC_DBG
2038 emit_movimm(source[i],1); // opcode
2039 emit_writeword(1,&psxRegs.code);
2040#endif
2041 emit_far_call(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]);
2042 break;
2043 }
2044 c2op_epilogue(c2op,reglist);
2045 }
2046}
2047
2048static void c2op_ctc2_31_assemble(signed char sl, signed char temp)
2049{
2050 //value = value & 0x7ffff000;
2051 //if (value & 0x7f87e000) value |= 0x80000000;
2052 emit_shrimm(sl,12,temp);
2053 emit_shlimm(temp,12,temp);
2054 emit_testimm(temp,0x7f000000);
2055 emit_testeqimm(temp,0x00870000);
2056 emit_testeqimm(temp,0x0000e000);
2057 emit_orrne_imm(temp,0x80000000,temp);
2058}
2059
2060static void do_mfc2_31_one(u_int copr,signed char temp)
2061{
2062 emit_readword(&reg_cop2d[copr],temp);
2063 emit_lsls_imm(temp,16,temp);
2064 emit_cmovs_imm(0,temp);
2065 emit_cmpimm(temp,0xf80<<16);
2066 emit_andimm(temp,0xf80<<16,temp);
2067 emit_cmovae_imm(0xf80<<16,temp);
2068}
2069
2070static void c2op_mfc2_29_assemble(signed char tl, signed char temp)
2071{
2072 if (temp < 0) {
2073 host_tempreg_acquire();
2074 temp = HOST_TEMPREG;
2075 }
2076 do_mfc2_31_one(9,temp);
2077 emit_shrimm(temp,7+16,tl);
2078 do_mfc2_31_one(10,temp);
2079 emit_orrshr_imm(temp,2+16,tl);
2080 do_mfc2_31_one(11,temp);
2081 emit_orrshr_imm(temp,-3+16,tl);
2082 emit_writeword(tl,&reg_cop2d[29]);
2083 if (temp == HOST_TEMPREG)
2084 host_tempreg_release();
2085}
2086
2087static void multdiv_assemble_arm(int i, const struct regstat *i_regs)
2088{
2089 // case 0x18: MULT
2090 // case 0x19: MULTU
2091 // case 0x1A: DIV
2092 // case 0x1B: DIVU
2093 // case 0x1C: DMULT
2094 // case 0x1D: DMULTU
2095 // case 0x1E: DDIV
2096 // case 0x1F: DDIVU
2097 if(dops[i].rs1&&dops[i].rs2)
2098 {
2099 if((dops[i].opcode2&4)==0) // 32-bit
2100 {
2101 if(dops[i].opcode2==0x18) // MULT
2102 {
2103 signed char m1=get_reg(i_regs->regmap,dops[i].rs1);
2104 signed char m2=get_reg(i_regs->regmap,dops[i].rs2);
2105 signed char hi=get_reg(i_regs->regmap,HIREG);
2106 signed char lo=get_reg(i_regs->regmap,LOREG);
2107 assert(m1>=0);
2108 assert(m2>=0);
2109 assert(hi>=0);
2110 assert(lo>=0);
2111 emit_smull(m1,m2,hi,lo);
2112 }
2113 if(dops[i].opcode2==0x19) // MULTU
2114 {
2115 signed char m1=get_reg(i_regs->regmap,dops[i].rs1);
2116 signed char m2=get_reg(i_regs->regmap,dops[i].rs2);
2117 signed char hi=get_reg(i_regs->regmap,HIREG);
2118 signed char lo=get_reg(i_regs->regmap,LOREG);
2119 assert(m1>=0);
2120 assert(m2>=0);
2121 assert(hi>=0);
2122 assert(lo>=0);
2123 emit_umull(m1,m2,hi,lo);
2124 }
2125 if(dops[i].opcode2==0x1A) // DIV
2126 {
2127 signed char d1=get_reg(i_regs->regmap,dops[i].rs1);
2128 signed char d2=get_reg(i_regs->regmap,dops[i].rs2);
2129 assert(d1>=0);
2130 assert(d2>=0);
2131 signed char quotient=get_reg(i_regs->regmap,LOREG);
2132 signed char remainder=get_reg(i_regs->regmap,HIREG);
2133 assert(quotient>=0);
2134 assert(remainder>=0);
2135 emit_movs(d1,remainder);
2136 emit_movimm(0xffffffff,quotient);
2137 emit_negmi(quotient,quotient); // .. quotient and ..
2138 emit_negmi(remainder,remainder); // .. remainder for div0 case (will be negated back after jump)
2139 emit_movs(d2,HOST_TEMPREG);
2140 emit_jeq(out+52); // Division by zero
2141 emit_negsmi(HOST_TEMPREG,HOST_TEMPREG);
2142#ifdef HAVE_ARMV5
2143 emit_clz(HOST_TEMPREG,quotient);
2144 emit_shl(HOST_TEMPREG,quotient,HOST_TEMPREG);
2145#else
2146 emit_movimm(0,quotient);
2147 emit_addpl_imm(quotient,1,quotient);
2148 emit_lslpls_imm(HOST_TEMPREG,1,HOST_TEMPREG);
2149 emit_jns(out-2*4);
2150#endif
2151 emit_orimm(quotient,1<<31,quotient);
2152 emit_shr(quotient,quotient,quotient);
2153 emit_cmp(remainder,HOST_TEMPREG);
2154 emit_subcs(remainder,HOST_TEMPREG,remainder);
2155 emit_adcs(quotient,quotient,quotient);
2156 emit_shrimm(HOST_TEMPREG,1,HOST_TEMPREG);
2157 emit_jcc(out-16); // -4
2158 emit_teq(d1,d2);
2159 emit_negmi(quotient,quotient);
2160 emit_test(d1,d1);
2161 emit_negmi(remainder,remainder);
2162 }
2163 if(dops[i].opcode2==0x1B) // DIVU
2164 {
2165 signed char d1=get_reg(i_regs->regmap,dops[i].rs1); // dividend
2166 signed char d2=get_reg(i_regs->regmap,dops[i].rs2); // divisor
2167 assert(d1>=0);
2168 assert(d2>=0);
2169 signed char quotient=get_reg(i_regs->regmap,LOREG);
2170 signed char remainder=get_reg(i_regs->regmap,HIREG);
2171 assert(quotient>=0);
2172 assert(remainder>=0);
2173 emit_mov(d1,remainder);
2174 emit_movimm(0xffffffff,quotient); // div0 case
2175 emit_test(d2,d2);
2176 emit_jeq(out+40); // Division by zero
2177#ifdef HAVE_ARMV5
2178 emit_clz(d2,HOST_TEMPREG);
2179 emit_movimm(1<<31,quotient);
2180 emit_shl(d2,HOST_TEMPREG,d2);
2181#else
2182 emit_movimm(0,HOST_TEMPREG);
2183 emit_addpl_imm(HOST_TEMPREG,1,HOST_TEMPREG);
2184 emit_lslpls_imm(d2,1,d2);
2185 emit_jns(out-2*4);
2186 emit_movimm(1<<31,quotient);
2187#endif
2188 emit_shr(quotient,HOST_TEMPREG,quotient);
2189 emit_cmp(remainder,d2);
2190 emit_subcs(remainder,d2,remainder);
2191 emit_adcs(quotient,quotient,quotient);
2192 emit_shrcc_imm(d2,1,d2);
2193 emit_jcc(out-16); // -4
2194 }
2195 }
2196 else // 64-bit
2197 assert(0);
2198 }
2199 else
2200 {
2201 // Multiply by zero is zero.
2202 // MIPS does not have a divide by zero exception.
2203 // The result is undefined, we return zero.
2204 signed char hr=get_reg(i_regs->regmap,HIREG);
2205 signed char lr=get_reg(i_regs->regmap,LOREG);
2206 if(hr>=0) emit_zeroreg(hr);
2207 if(lr>=0) emit_zeroreg(lr);
2208 }
2209}
2210#define multdiv_assemble multdiv_assemble_arm
2211
2212static void do_jump_vaddr(int rs)
2213{
2214 emit_far_jump(jump_vaddr_reg[rs]);
2215}
2216
2217static void do_preload_rhash(int r) {
2218 // Don't need this for ARM. On x86, this puts the value 0xf8 into the
2219 // register. On ARM the hash can be done with a single instruction (below)
2220}
2221
2222static void do_preload_rhtbl(int ht) {
2223 emit_addimm(FP,(int)&mini_ht-(int)&dynarec_local,ht);
2224}
2225
2226static void do_rhash(int rs,int rh) {
2227 emit_andimm(rs,0xf8,rh);
2228}
2229
2230static void do_miniht_load(int ht,int rh) {
2231 assem_debug("ldr %s,[%s,%s]!\n",regname[rh],regname[ht],regname[rh]);
2232 output_w32(0xe7b00000|rd_rn_rm(rh,ht,rh));
2233}
2234
2235static void do_miniht_jump(int rs,int rh,int ht) {
2236 emit_cmp(rh,rs);
2237 emit_ldreq_indexed(ht,4,15);
2238 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
2239 if(rs!=7)
2240 emit_mov(rs,7);
2241 rs=7;
2242 #endif
2243 do_jump_vaddr(rs);
2244}
2245
2246static void do_miniht_insert(u_int return_address,int rt,int temp) {
2247 #ifndef HAVE_ARMV7
2248 emit_movimm(return_address,rt); // PC into link register
2249 add_to_linker(out,return_address,1);
2250 emit_pcreladdr(temp);
2251 emit_writeword(rt,&mini_ht[(return_address&0xFF)>>3][0]);
2252 emit_writeword(temp,&mini_ht[(return_address&0xFF)>>3][1]);
2253 #else
2254 emit_movw(return_address&0x0000FFFF,rt);
2255 add_to_linker(out,return_address,1);
2256 emit_pcreladdr(temp);
2257 emit_writeword(temp,&mini_ht[(return_address&0xFF)>>3][1]);
2258 emit_movt(return_address&0xFFFF0000,rt);
2259 emit_writeword(rt,&mini_ht[(return_address&0xFF)>>3][0]);
2260 #endif
2261}
2262
2263// CPU-architecture-specific initialization
2264static void arch_init(void)
2265{
2266 uintptr_t diff = (u_char *)&ndrc->tramp.f - (u_char *)&ndrc->tramp.ops - 8;
2267 struct tramp_insns *ops = ndrc->tramp.ops;
2268 size_t i;
2269 assert(!(diff & 3));
2270 assert(diff < 0x1000);
2271 start_tcache_write(ops, (u_char *)ops + sizeof(ndrc->tramp.ops));
2272 for (i = 0; i < ARRAY_SIZE(ndrc->tramp.ops); i++)
2273 ops[i].ldrpc = 0xe5900000 | rd_rn_rm(15,15,0) | diff; // ldr pc, [=val]
2274 end_tcache_write(ops, (u_char *)ops + sizeof(ndrc->tramp.ops));
2275}
2276
2277// vim:shiftwidth=2:expandtab