drc: more precise invalidation checking for the invstub case
[pcsx_rearmed.git] / libpcsxcore / new_dynarec / assem_arm.c
... / ...
CommitLineData
1/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2 * Mupen64plus/PCSX - assem_arm.c *
3 * Copyright (C) 2009-2011 Ari64 *
4 * Copyright (C) 2010-2021 GraÅžvydas "notaz" Ignotas *
5 * *
6 * This program is free software; you can redistribute it and/or modify *
7 * it under the terms of the GNU General Public License as published by *
8 * the Free Software Foundation; either version 2 of the License, or *
9 * (at your option) any later version. *
10 * *
11 * This program is distributed in the hope that it will be useful, *
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
14 * GNU General Public License for more details. *
15 * *
16 * You should have received a copy of the GNU General Public License *
17 * along with this program; if not, write to the *
18 * Free Software Foundation, Inc., *
19 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
20 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
21
22#define FLAGLESS
23#include "../gte.h"
24#undef FLAGLESS
25#include "../gte_arm.h"
26#include "../gte_neon.h"
27#include "pcnt.h"
28#include "arm_features.h"
29
30#define unused __attribute__((unused))
31
32#ifdef DRC_DBG
33#pragma GCC diagnostic ignored "-Wunused-function"
34#pragma GCC diagnostic ignored "-Wunused-variable"
35#pragma GCC diagnostic ignored "-Wunused-but-set-variable"
36#endif
37
38void indirect_jump_indexed();
39void indirect_jump();
40void do_interrupt();
41void jump_vaddr_r0();
42void jump_vaddr_r1();
43void jump_vaddr_r2();
44void jump_vaddr_r3();
45void jump_vaddr_r4();
46void jump_vaddr_r5();
47void jump_vaddr_r6();
48void jump_vaddr_r7();
49void jump_vaddr_r8();
50void jump_vaddr_r9();
51void jump_vaddr_r10();
52void jump_vaddr_r12();
53
54void * const jump_vaddr_reg[16] = {
55 jump_vaddr_r0,
56 jump_vaddr_r1,
57 jump_vaddr_r2,
58 jump_vaddr_r3,
59 jump_vaddr_r4,
60 jump_vaddr_r5,
61 jump_vaddr_r6,
62 jump_vaddr_r7,
63 jump_vaddr_r8,
64 jump_vaddr_r9,
65 jump_vaddr_r10,
66 0,
67 jump_vaddr_r12,
68 0,
69 0,
70 0
71};
72
73void invalidate_addr_r0();
74void invalidate_addr_r1();
75void invalidate_addr_r2();
76void invalidate_addr_r3();
77void invalidate_addr_r4();
78void invalidate_addr_r5();
79void invalidate_addr_r6();
80void invalidate_addr_r7();
81void invalidate_addr_r8();
82void invalidate_addr_r9();
83void invalidate_addr_r10();
84void invalidate_addr_r12();
85
86const u_int invalidate_addr_reg[16] = {
87 (int)invalidate_addr_r0,
88 (int)invalidate_addr_r1,
89 (int)invalidate_addr_r2,
90 (int)invalidate_addr_r3,
91 (int)invalidate_addr_r4,
92 (int)invalidate_addr_r5,
93 (int)invalidate_addr_r6,
94 (int)invalidate_addr_r7,
95 (int)invalidate_addr_r8,
96 (int)invalidate_addr_r9,
97 (int)invalidate_addr_r10,
98 0,
99 (int)invalidate_addr_r12,
100 0,
101 0,
102 0};
103
104/* Linker */
105
106static void set_jump_target(void *addr, void *target_)
107{
108 u_int target = (u_int)target_;
109 u_char *ptr = addr;
110 u_int *ptr2=(u_int *)ptr;
111 if(ptr[3]==0xe2) {
112 assert((target-(u_int)ptr2-8)<1024);
113 assert(((uintptr_t)addr&3)==0);
114 assert((target&3)==0);
115 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
116 //printf("target=%x addr=%p insn=%x\n",target,addr,*ptr2);
117 }
118 else if(ptr[3]==0x72) {
119 // generated by emit_jno_unlikely
120 if((target-(u_int)ptr2-8)<1024) {
121 assert(((uintptr_t)addr&3)==0);
122 assert((target&3)==0);
123 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
124 }
125 else if((target-(u_int)ptr2-8)<4096&&!((target-(u_int)ptr2-8)&15)) {
126 assert(((uintptr_t)addr&3)==0);
127 assert((target&3)==0);
128 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>4)|0xE00;
129 }
130 else *ptr2=(0x7A000000)|(((target-(u_int)ptr2-8)<<6)>>8);
131 }
132 else {
133 assert((ptr[3]&0x0e)==0xa);
134 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
135 }
136}
137
138// This optionally copies the instruction from the target of the branch into
139// the space before the branch. Works, but the difference in speed is
140// usually insignificant.
141#if 0
142static void set_jump_target_fillslot(int addr,u_int target,int copy)
143{
144 u_char *ptr=(u_char *)addr;
145 u_int *ptr2=(u_int *)ptr;
146 assert(!copy||ptr2[-1]==0xe28dd000);
147 if(ptr[3]==0xe2) {
148 assert(!copy);
149 assert((target-(u_int)ptr2-8)<4096);
150 *ptr2=(*ptr2&0xFFFFF000)|(target-(u_int)ptr2-8);
151 }
152 else {
153 assert((ptr[3]&0x0e)==0xa);
154 u_int target_insn=*(u_int *)target;
155 if((target_insn&0x0e100000)==0) { // ALU, no immediate, no flags
156 copy=0;
157 }
158 if((target_insn&0x0c100000)==0x04100000) { // Load
159 copy=0;
160 }
161 if(target_insn&0x08000000) {
162 copy=0;
163 }
164 if(copy) {
165 ptr2[-1]=target_insn;
166 target+=4;
167 }
168 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
169 }
170}
171#endif
172
173/* Literal pool */
174static void add_literal(int addr,int val)
175{
176 assert(literalcount<sizeof(literals)/sizeof(literals[0]));
177 literals[literalcount][0]=addr;
178 literals[literalcount][1]=val;
179 literalcount++;
180}
181
182// from a pointer to external jump stub (which was produced by emit_extjump2)
183// find where the jumping insn is
184static void *find_extjump_insn(void *stub)
185{
186 int *ptr=(int *)(stub+4);
187 assert((*ptr&0x0fff0000)==0x059f0000); // ldr rx, [pc, #ofs]
188 u_int offset=*ptr&0xfff;
189 void **l_ptr=(void *)ptr+offset+8;
190 return *l_ptr;
191}
192
193// find where external branch is liked to using addr of it's stub:
194// get address that insn one after stub loads (dyna_linker arg1),
195// treat it as a pointer to branch insn,
196// return addr where that branch jumps to
197#if 0
198static void *get_pointer(void *stub)
199{
200 //printf("get_pointer(%x)\n",(int)stub);
201 int *i_ptr=find_extjump_insn(stub);
202 assert((*i_ptr&0x0f000000)==0x0a000000); // b
203 return (u_char *)i_ptr+((*i_ptr<<8)>>6)+8;
204}
205#endif
206
207// Allocate a specific ARM register.
208static void alloc_arm_reg(struct regstat *cur,int i,signed char reg,int hr)
209{
210 int n;
211 int dirty=0;
212
213 // see if it's already allocated (and dealloc it)
214 for(n=0;n<HOST_REGS;n++)
215 {
216 if(n!=EXCLUDE_REG&&cur->regmap[n]==reg) {
217 dirty=(cur->dirty>>n)&1;
218 cur->regmap[n]=-1;
219 }
220 }
221
222 cur->regmap[hr]=reg;
223 cur->dirty&=~(1<<hr);
224 cur->dirty|=dirty<<hr;
225 cur->isconst&=~(1<<hr);
226}
227
228// Alloc cycle count into dedicated register
229static void alloc_cc(struct regstat *cur,int i)
230{
231 alloc_arm_reg(cur,i,CCREG,HOST_CCREG);
232}
233
234/* Assembler */
235
236static unused char regname[16][4] = {
237 "r0",
238 "r1",
239 "r2",
240 "r3",
241 "r4",
242 "r5",
243 "r6",
244 "r7",
245 "r8",
246 "r9",
247 "r10",
248 "fp",
249 "r12",
250 "sp",
251 "lr",
252 "pc"};
253
254static void output_w32(u_int word)
255{
256 *((u_int *)out)=word;
257 out+=4;
258}
259
260static u_int rd_rn_rm(u_int rd, u_int rn, u_int rm)
261{
262 assert(rd<16);
263 assert(rn<16);
264 assert(rm<16);
265 return((rn<<16)|(rd<<12)|rm);
266}
267
268static u_int rd_rn_imm_shift(u_int rd, u_int rn, u_int imm, u_int shift)
269{
270 assert(rd<16);
271 assert(rn<16);
272 assert(imm<256);
273 assert((shift&1)==0);
274 return((rn<<16)|(rd<<12)|(((32-shift)&30)<<7)|imm);
275}
276
277static u_int genimm(u_int imm,u_int *encoded)
278{
279 *encoded=0;
280 if(imm==0) return 1;
281 int i=32;
282 while(i>0)
283 {
284 if(imm<256) {
285 *encoded=((i&30)<<7)|imm;
286 return 1;
287 }
288 imm=(imm>>2)|(imm<<30);i-=2;
289 }
290 return 0;
291}
292
293static void genimm_checked(u_int imm,u_int *encoded)
294{
295 u_int ret=genimm(imm,encoded);
296 assert(ret);
297 (void)ret;
298}
299
300static u_int genjmp(u_int addr)
301{
302 if (addr < 3) return 0; // a branch that will be patched later
303 int offset = addr-(int)out-8;
304 if (offset < -33554432 || offset >= 33554432) {
305 SysPrintf("genjmp: out of range: %08x\n", offset);
306 abort();
307 return 0;
308 }
309 return ((u_int)offset>>2)&0xffffff;
310}
311
312static unused void emit_breakpoint(void)
313{
314 assem_debug("bkpt #0\n");
315 //output_w32(0xe1200070);
316 output_w32(0xe7f001f0);
317}
318
319static void emit_mov(int rs,int rt)
320{
321 assem_debug("mov %s,%s\n",regname[rt],regname[rs]);
322 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs));
323}
324
325static void emit_movs(int rs,int rt)
326{
327 assem_debug("movs %s,%s\n",regname[rt],regname[rs]);
328 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs));
329}
330
331static void emit_add(int rs1,int rs2,int rt)
332{
333 assem_debug("add %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
334 output_w32(0xe0800000|rd_rn_rm(rt,rs1,rs2));
335}
336
337static void emit_adds(int rs1,int rs2,int rt)
338{
339 assem_debug("adds %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
340 output_w32(0xe0900000|rd_rn_rm(rt,rs1,rs2));
341}
342#define emit_adds_ptr emit_adds
343
344static void emit_adcs(int rs1,int rs2,int rt)
345{
346 assem_debug("adcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
347 output_w32(0xe0b00000|rd_rn_rm(rt,rs1,rs2));
348}
349
350static void emit_neg(int rs, int rt)
351{
352 assem_debug("rsb %s,%s,#0\n",regname[rt],regname[rs]);
353 output_w32(0xe2600000|rd_rn_rm(rt,rs,0));
354}
355
356static void emit_sub(int rs1,int rs2,int rt)
357{
358 assem_debug("sub %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
359 output_w32(0xe0400000|rd_rn_rm(rt,rs1,rs2));
360}
361
362static void emit_zeroreg(int rt)
363{
364 assem_debug("mov %s,#0\n",regname[rt]);
365 output_w32(0xe3a00000|rd_rn_rm(rt,0,0));
366}
367
368static void emit_loadlp(u_int imm,u_int rt)
369{
370 add_literal((int)out,imm);
371 assem_debug("ldr %s,pc+? [=%x]\n",regname[rt],imm);
372 output_w32(0xe5900000|rd_rn_rm(rt,15,0));
373}
374
375#ifdef HAVE_ARMV7
376static void emit_movw(u_int imm,u_int rt)
377{
378 assert(imm<65536);
379 assem_debug("movw %s,#%d (0x%x)\n",regname[rt],imm,imm);
380 output_w32(0xe3000000|rd_rn_rm(rt,0,0)|(imm&0xfff)|((imm<<4)&0xf0000));
381}
382
383static void emit_movt(u_int imm,u_int rt)
384{
385 assem_debug("movt %s,#%d (0x%x)\n",regname[rt],imm&0xffff0000,imm&0xffff0000);
386 output_w32(0xe3400000|rd_rn_rm(rt,0,0)|((imm>>16)&0xfff)|((imm>>12)&0xf0000));
387}
388#endif
389
390static void emit_movimm(u_int imm,u_int rt)
391{
392 u_int armval;
393 if(genimm(imm,&armval)) {
394 assem_debug("mov %s,#%d\n",regname[rt],imm);
395 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
396 }else if(genimm(~imm,&armval)) {
397 assem_debug("mvn %s,#%d\n",regname[rt],imm);
398 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
399 }else if(imm<65536) {
400 #ifndef HAVE_ARMV7
401 assem_debug("mov %s,#%d\n",regname[rt],imm&0xFF00);
402 output_w32(0xe3a00000|rd_rn_imm_shift(rt,0,imm>>8,8));
403 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
404 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
405 #else
406 emit_movw(imm,rt);
407 #endif
408 }else{
409 #ifndef HAVE_ARMV7
410 emit_loadlp(imm,rt);
411 #else
412 emit_movw(imm&0x0000FFFF,rt);
413 emit_movt(imm&0xFFFF0000,rt);
414 #endif
415 }
416}
417
418static void emit_pcreladdr(u_int rt)
419{
420 assem_debug("add %s,pc,#?\n",regname[rt]);
421 output_w32(0xe2800000|rd_rn_rm(rt,15,0));
422}
423
424static void emit_loadreg(int r, int hr)
425{
426 assert(hr != EXCLUDE_REG);
427 if (r == 0)
428 emit_zeroreg(hr);
429 else {
430 void *addr;
431 switch (r) {
432 //case HIREG: addr = &hi; break;
433 //case LOREG: addr = &lo; break;
434 case CCREG: addr = &cycle_count; break;
435 case CSREG: addr = &Status; break;
436 case INVCP: addr = &invc_ptr; break;
437 case ROREG: addr = &ram_offset; break;
438 default:
439 assert(r < 34);
440 addr = &psxRegs.GPR.r[r];
441 break;
442 }
443 u_int offset = (u_char *)addr - (u_char *)&dynarec_local;
444 assert(offset<4096);
445 assem_debug("ldr %s,fp+%d # r%d\n",regname[hr],offset,r);
446 output_w32(0xe5900000|rd_rn_rm(hr,FP,0)|offset);
447 }
448}
449
450static void emit_storereg(int r, int hr)
451{
452 assert(hr != EXCLUDE_REG);
453 int addr = (int)&psxRegs.GPR.r[r];
454 switch (r) {
455 //case HIREG: addr = &hi; break;
456 //case LOREG: addr = &lo; break;
457 case CCREG: addr = (int)&cycle_count; break;
458 default: assert(r < 34); break;
459 }
460 u_int offset = addr-(u_int)&dynarec_local;
461 assert(offset<4096);
462 assem_debug("str %s,fp+%d # r%d\n",regname[hr],offset,r);
463 output_w32(0xe5800000|rd_rn_rm(hr,FP,0)|offset);
464}
465
466static void emit_test(int rs, int rt)
467{
468 assem_debug("tst %s,%s\n",regname[rs],regname[rt]);
469 output_w32(0xe1100000|rd_rn_rm(0,rs,rt));
470}
471
472static void emit_testimm(int rs,int imm)
473{
474 u_int armval;
475 assem_debug("tst %s,#%d\n",regname[rs],imm);
476 genimm_checked(imm,&armval);
477 output_w32(0xe3100000|rd_rn_rm(0,rs,0)|armval);
478}
479
480static void emit_testeqimm(int rs,int imm)
481{
482 u_int armval;
483 assem_debug("tsteq %s,$%d\n",regname[rs],imm);
484 genimm_checked(imm,&armval);
485 output_w32(0x03100000|rd_rn_rm(0,rs,0)|armval);
486}
487
488static void emit_not(int rs,int rt)
489{
490 assem_debug("mvn %s,%s\n",regname[rt],regname[rs]);
491 output_w32(0xe1e00000|rd_rn_rm(rt,0,rs));
492}
493
494static void emit_and(u_int rs1,u_int rs2,u_int rt)
495{
496 assem_debug("and %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
497 output_w32(0xe0000000|rd_rn_rm(rt,rs1,rs2));
498}
499
500static void emit_or(u_int rs1,u_int rs2,u_int rt)
501{
502 assem_debug("orr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
503 output_w32(0xe1800000|rd_rn_rm(rt,rs1,rs2));
504}
505
506static void emit_orrshl_imm(u_int rs,u_int imm,u_int rt)
507{
508 assert(rs<16);
509 assert(rt<16);
510 assert(imm<32);
511 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs],imm);
512 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|(imm<<7));
513}
514
515static void emit_orrshr_imm(u_int rs,u_int imm,u_int rt)
516{
517 assert(rs<16);
518 assert(rt<16);
519 assert(imm<32);
520 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs],imm);
521 output_w32(0xe1800020|rd_rn_rm(rt,rt,rs)|(imm<<7));
522}
523
524static void emit_xor(u_int rs1,u_int rs2,u_int rt)
525{
526 assem_debug("eor %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
527 output_w32(0xe0200000|rd_rn_rm(rt,rs1,rs2));
528}
529
530static void emit_xorsar_imm(u_int rs1,u_int rs2,u_int imm,u_int rt)
531{
532 assem_debug("eor %s,%s,%s,asr #%d\n",regname[rt],regname[rs1],regname[rs2],imm);
533 output_w32(0xe0200040|rd_rn_rm(rt,rs1,rs2)|(imm<<7));
534}
535
536static void emit_addimm(u_int rs,int imm,u_int rt)
537{
538 assert(rs<16);
539 assert(rt<16);
540 if(imm!=0) {
541 u_int armval;
542 if(genimm(imm,&armval)) {
543 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],imm);
544 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
545 }else if(genimm(-imm,&armval)) {
546 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],-imm);
547 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
548 #ifdef HAVE_ARMV7
549 }else if(rt!=rs&&(u_int)imm<65536) {
550 emit_movw(imm&0x0000ffff,rt);
551 emit_add(rs,rt,rt);
552 }else if(rt!=rs&&(u_int)-imm<65536) {
553 emit_movw(-imm&0x0000ffff,rt);
554 emit_sub(rs,rt,rt);
555 #endif
556 }else if((u_int)-imm<65536) {
557 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],(-imm)&0xFF00);
558 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
559 output_w32(0xe2400000|rd_rn_imm_shift(rt,rs,(-imm)>>8,8));
560 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
561 }else {
562 do {
563 int shift = (ffs(imm) - 1) & ~1;
564 int imm8 = imm & (0xff << shift);
565 genimm_checked(imm8,&armval);
566 assem_debug("add %s,%s,#0x%x\n",regname[rt],regname[rs],imm8);
567 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
568 rs = rt;
569 imm &= ~imm8;
570 }
571 while (imm != 0);
572 }
573 }
574 else if(rs!=rt) emit_mov(rs,rt);
575}
576
577static void emit_addimm_and_set_flags(int imm,int rt)
578{
579 assert(imm>-65536&&imm<65536);
580 u_int armval;
581 if(genimm(imm,&armval)) {
582 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm);
583 output_w32(0xe2900000|rd_rn_rm(rt,rt,0)|armval);
584 }else if(genimm(-imm,&armval)) {
585 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],imm);
586 output_w32(0xe2500000|rd_rn_rm(rt,rt,0)|armval);
587 }else if(imm<0) {
588 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF00);
589 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
590 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)>>8,8));
591 output_w32(0xe2500000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
592 }else{
593 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF00);
594 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
595 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm>>8,8));
596 output_w32(0xe2900000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
597 }
598}
599
600static void emit_addnop(u_int r)
601{
602 assert(r<16);
603 assem_debug("add %s,%s,#0 (nop)\n",regname[r],regname[r]);
604 output_w32(0xe2800000|rd_rn_rm(r,r,0));
605}
606
607static void emit_andimm(int rs,int imm,int rt)
608{
609 u_int armval;
610 if(imm==0) {
611 emit_zeroreg(rt);
612 }else if(genimm(imm,&armval)) {
613 assem_debug("and %s,%s,#%d\n",regname[rt],regname[rs],imm);
614 output_w32(0xe2000000|rd_rn_rm(rt,rs,0)|armval);
615 }else if(genimm(~imm,&armval)) {
616 assem_debug("bic %s,%s,#%d\n",regname[rt],regname[rs],imm);
617 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|armval);
618 }else if(imm==65535) {
619 #ifndef HAVE_ARMV6
620 assem_debug("bic %s,%s,#FF000000\n",regname[rt],regname[rs]);
621 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|0x4FF);
622 assem_debug("bic %s,%s,#00FF0000\n",regname[rt],regname[rt]);
623 output_w32(0xe3c00000|rd_rn_rm(rt,rt,0)|0x8FF);
624 #else
625 assem_debug("uxth %s,%s\n",regname[rt],regname[rs]);
626 output_w32(0xe6ff0070|rd_rn_rm(rt,0,rs));
627 #endif
628 }else{
629 assert(imm>0&&imm<65535);
630 #ifndef HAVE_ARMV7
631 assem_debug("mov r14,#%d\n",imm&0xFF00);
632 output_w32(0xe3a00000|rd_rn_imm_shift(HOST_TEMPREG,0,imm>>8,8));
633 assem_debug("add r14,r14,#%d\n",imm&0xFF);
634 output_w32(0xe2800000|rd_rn_imm_shift(HOST_TEMPREG,HOST_TEMPREG,imm&0xff,0));
635 #else
636 emit_movw(imm,HOST_TEMPREG);
637 #endif
638 assem_debug("and %s,%s,r14\n",regname[rt],regname[rs]);
639 output_w32(0xe0000000|rd_rn_rm(rt,rs,HOST_TEMPREG));
640 }
641}
642
643static void emit_orimm(int rs,int imm,int rt)
644{
645 u_int armval;
646 if(imm==0) {
647 if(rs!=rt) emit_mov(rs,rt);
648 }else if(genimm(imm,&armval)) {
649 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm);
650 output_w32(0xe3800000|rd_rn_rm(rt,rs,0)|armval);
651 }else{
652 assert(imm>0&&imm<65536);
653 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
654 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
655 output_w32(0xe3800000|rd_rn_imm_shift(rt,rs,imm>>8,8));
656 output_w32(0xe3800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
657 }
658}
659
660static void emit_xorimm(int rs,int imm,int rt)
661{
662 u_int armval;
663 if(imm==0) {
664 if(rs!=rt) emit_mov(rs,rt);
665 }else if(genimm(imm,&armval)) {
666 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm);
667 output_w32(0xe2200000|rd_rn_rm(rt,rs,0)|armval);
668 }else{
669 assert(imm>0&&imm<65536);
670 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
671 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
672 output_w32(0xe2200000|rd_rn_imm_shift(rt,rs,imm>>8,8));
673 output_w32(0xe2200000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
674 }
675}
676
677static void emit_shlimm(int rs,u_int imm,int rt)
678{
679 assert(imm>0);
680 assert(imm<32);
681 //if(imm==1) ...
682 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
683 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
684}
685
686static void emit_lsls_imm(int rs,int imm,int rt)
687{
688 assert(imm>0);
689 assert(imm<32);
690 assem_debug("lsls %s,%s,#%d\n",regname[rt],regname[rs],imm);
691 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs)|(imm<<7));
692}
693
694static unused void emit_lslpls_imm(int rs,int imm,int rt)
695{
696 assert(imm>0);
697 assert(imm<32);
698 assem_debug("lslpls %s,%s,#%d\n",regname[rt],regname[rs],imm);
699 output_w32(0x51b00000|rd_rn_rm(rt,0,rs)|(imm<<7));
700}
701
702static void emit_shrimm(int rs,u_int imm,int rt)
703{
704 assert(imm>0);
705 assert(imm<32);
706 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
707 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
708}
709
710static void emit_sarimm(int rs,u_int imm,int rt)
711{
712 assert(imm>0);
713 assert(imm<32);
714 assem_debug("asr %s,%s,#%d\n",regname[rt],regname[rs],imm);
715 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x40|(imm<<7));
716}
717
718static void emit_rorimm(int rs,u_int imm,int rt)
719{
720 assert(imm>0);
721 assert(imm<32);
722 assem_debug("ror %s,%s,#%d\n",regname[rt],regname[rs],imm);
723 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x60|(imm<<7));
724}
725
726static void emit_signextend16(int rs,int rt)
727{
728 #ifndef HAVE_ARMV6
729 emit_shlimm(rs,16,rt);
730 emit_sarimm(rt,16,rt);
731 #else
732 assem_debug("sxth %s,%s\n",regname[rt],regname[rs]);
733 output_w32(0xe6bf0070|rd_rn_rm(rt,0,rs));
734 #endif
735}
736
737static void emit_signextend8(int rs,int rt)
738{
739 #ifndef HAVE_ARMV6
740 emit_shlimm(rs,24,rt);
741 emit_sarimm(rt,24,rt);
742 #else
743 assem_debug("sxtb %s,%s\n",regname[rt],regname[rs]);
744 output_w32(0xe6af0070|rd_rn_rm(rt,0,rs));
745 #endif
746}
747
748static void emit_shl(u_int rs,u_int shift,u_int rt)
749{
750 assert(rs<16);
751 assert(rt<16);
752 assert(shift<16);
753 //if(imm==1) ...
754 assem_debug("lsl %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
755 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x10|(shift<<8));
756}
757
758static void emit_shr(u_int rs,u_int shift,u_int rt)
759{
760 assert(rs<16);
761 assert(rt<16);
762 assert(shift<16);
763 assem_debug("lsr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
764 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x30|(shift<<8));
765}
766
767static void emit_sar(u_int rs,u_int shift,u_int rt)
768{
769 assert(rs<16);
770 assert(rt<16);
771 assert(shift<16);
772 assem_debug("asr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
773 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x50|(shift<<8));
774}
775
776static unused void emit_orrshl(u_int rs,u_int shift,u_int rt)
777{
778 assert(rs<16);
779 assert(rt<16);
780 assert(shift<16);
781 assem_debug("orr %s,%s,%s,lsl %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
782 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x10|(shift<<8));
783}
784
785static unused void emit_orrshr(u_int rs,u_int shift,u_int rt)
786{
787 assert(rs<16);
788 assert(rt<16);
789 assert(shift<16);
790 assem_debug("orr %s,%s,%s,lsr %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
791 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x30|(shift<<8));
792}
793
794static void emit_cmpimm(int rs,int imm)
795{
796 u_int armval;
797 if(genimm(imm,&armval)) {
798 assem_debug("cmp %s,#%d\n",regname[rs],imm);
799 output_w32(0xe3500000|rd_rn_rm(0,rs,0)|armval);
800 }else if(genimm(-imm,&armval)) {
801 assem_debug("cmn %s,#%d\n",regname[rs],imm);
802 output_w32(0xe3700000|rd_rn_rm(0,rs,0)|armval);
803 }else if(imm>0) {
804 assert(imm<65536);
805 emit_movimm(imm,HOST_TEMPREG);
806 assem_debug("cmp %s,r14\n",regname[rs]);
807 output_w32(0xe1500000|rd_rn_rm(0,rs,HOST_TEMPREG));
808 }else{
809 assert(imm>-65536);
810 emit_movimm(-imm,HOST_TEMPREG);
811 assem_debug("cmn %s,r14\n",regname[rs]);
812 output_w32(0xe1700000|rd_rn_rm(0,rs,HOST_TEMPREG));
813 }
814}
815
816static void emit_cmovne_imm(int imm,int rt)
817{
818 assem_debug("movne %s,#%d\n",regname[rt],imm);
819 u_int armval;
820 genimm_checked(imm,&armval);
821 output_w32(0x13a00000|rd_rn_rm(rt,0,0)|armval);
822}
823
824static void emit_cmovl_imm(int imm,int rt)
825{
826 assem_debug("movlt %s,#%d\n",regname[rt],imm);
827 u_int armval;
828 genimm_checked(imm,&armval);
829 output_w32(0xb3a00000|rd_rn_rm(rt,0,0)|armval);
830}
831
832static void emit_cmovb_imm(int imm,int rt)
833{
834 assem_debug("movcc %s,#%d\n",regname[rt],imm);
835 u_int armval;
836 genimm_checked(imm,&armval);
837 output_w32(0x33a00000|rd_rn_rm(rt,0,0)|armval);
838}
839
840static void emit_cmovae_imm(int imm,int rt)
841{
842 assem_debug("movcs %s,#%d\n",regname[rt],imm);
843 u_int armval;
844 genimm_checked(imm,&armval);
845 output_w32(0x23a00000|rd_rn_rm(rt,0,0)|armval);
846}
847
848static void emit_cmovs_imm(int imm,int rt)
849{
850 assem_debug("movmi %s,#%d\n",regname[rt],imm);
851 u_int armval;
852 genimm_checked(imm,&armval);
853 output_w32(0x43a00000|rd_rn_rm(rt,0,0)|armval);
854}
855
856static void emit_cmovne_reg(int rs,int rt)
857{
858 assem_debug("movne %s,%s\n",regname[rt],regname[rs]);
859 output_w32(0x11a00000|rd_rn_rm(rt,0,rs));
860}
861
862static void emit_cmovl_reg(int rs,int rt)
863{
864 assem_debug("movlt %s,%s\n",regname[rt],regname[rs]);
865 output_w32(0xb1a00000|rd_rn_rm(rt,0,rs));
866}
867
868static void emit_cmovb_reg(int rs,int rt)
869{
870 assem_debug("movcc %s,%s\n",regname[rt],regname[rs]);
871 output_w32(0x31a00000|rd_rn_rm(rt,0,rs));
872}
873
874static void emit_cmovs_reg(int rs,int rt)
875{
876 assem_debug("movmi %s,%s\n",regname[rt],regname[rs]);
877 output_w32(0x41a00000|rd_rn_rm(rt,0,rs));
878}
879
880static void emit_slti32(int rs,int imm,int rt)
881{
882 if(rs!=rt) emit_zeroreg(rt);
883 emit_cmpimm(rs,imm);
884 if(rs==rt) emit_movimm(0,rt);
885 emit_cmovl_imm(1,rt);
886}
887
888static void emit_sltiu32(int rs,int imm,int rt)
889{
890 if(rs!=rt) emit_zeroreg(rt);
891 emit_cmpimm(rs,imm);
892 if(rs==rt) emit_movimm(0,rt);
893 emit_cmovb_imm(1,rt);
894}
895
896static void emit_cmp(int rs,int rt)
897{
898 assem_debug("cmp %s,%s\n",regname[rs],regname[rt]);
899 output_w32(0xe1500000|rd_rn_rm(0,rs,rt));
900}
901
902static void emit_cmpcs(int rs,int rt)
903{
904 assem_debug("cmpcs %s,%s\n",regname[rs],regname[rt]);
905 output_w32(0x21500000|rd_rn_rm(0,rs,rt));
906}
907
908static void emit_set_gz32(int rs, int rt)
909{
910 //assem_debug("set_gz32\n");
911 emit_cmpimm(rs,1);
912 emit_movimm(1,rt);
913 emit_cmovl_imm(0,rt);
914}
915
916static void emit_set_nz32(int rs, int rt)
917{
918 //assem_debug("set_nz32\n");
919 if(rs!=rt) emit_movs(rs,rt);
920 else emit_test(rs,rs);
921 emit_cmovne_imm(1,rt);
922}
923
924static void emit_set_if_less32(int rs1, int rs2, int rt)
925{
926 //assem_debug("set if less (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
927 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
928 emit_cmp(rs1,rs2);
929 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
930 emit_cmovl_imm(1,rt);
931}
932
933static void emit_set_if_carry32(int rs1, int rs2, int rt)
934{
935 //assem_debug("set if carry (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
936 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
937 emit_cmp(rs1,rs2);
938 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
939 emit_cmovb_imm(1,rt);
940}
941
942static int can_jump_or_call(const void *a)
943{
944 intptr_t offset = (u_char *)a - out - 8;
945 return (-33554432 <= offset && offset < 33554432);
946}
947
948static void emit_call(const void *a_)
949{
950 int a = (int)a_;
951 assem_debug("bl %x (%x+%x)%s\n",a,(int)out,a-(int)out-8,func_name(a_));
952 u_int offset=genjmp(a);
953 output_w32(0xeb000000|offset);
954}
955
956static void emit_jmp(const void *a_)
957{
958 int a = (int)a_;
959 assem_debug("b %x (%x+%x)%s\n",a,(int)out,a-(int)out-8,func_name(a_));
960 u_int offset=genjmp(a);
961 output_w32(0xea000000|offset);
962}
963
964static void emit_jne(const void *a_)
965{
966 int a = (int)a_;
967 assem_debug("bne %x\n",a);
968 u_int offset=genjmp(a);
969 output_w32(0x1a000000|offset);
970}
971
972static void emit_jeq(const void *a_)
973{
974 int a = (int)a_;
975 assem_debug("beq %x\n",a);
976 u_int offset=genjmp(a);
977 output_w32(0x0a000000|offset);
978}
979
980static void emit_js(const void *a_)
981{
982 int a = (int)a_;
983 assem_debug("bmi %x\n",a);
984 u_int offset=genjmp(a);
985 output_w32(0x4a000000|offset);
986}
987
988static void emit_jns(const void *a_)
989{
990 int a = (int)a_;
991 assem_debug("bpl %x\n",a);
992 u_int offset=genjmp(a);
993 output_w32(0x5a000000|offset);
994}
995
996static void emit_jl(const void *a_)
997{
998 int a = (int)a_;
999 assem_debug("blt %x\n",a);
1000 u_int offset=genjmp(a);
1001 output_w32(0xba000000|offset);
1002}
1003
1004static void emit_jge(const void *a_)
1005{
1006 int a = (int)a_;
1007 assem_debug("bge %x\n",a);
1008 u_int offset=genjmp(a);
1009 output_w32(0xaa000000|offset);
1010}
1011
1012static void emit_jno(const void *a_)
1013{
1014 int a = (int)a_;
1015 assem_debug("bvc %x\n",a);
1016 u_int offset=genjmp(a);
1017 output_w32(0x7a000000|offset);
1018}
1019
1020static void emit_jc(const void *a_)
1021{
1022 int a = (int)a_;
1023 assem_debug("bcs %x\n",a);
1024 u_int offset=genjmp(a);
1025 output_w32(0x2a000000|offset);
1026}
1027
1028static void emit_jcc(const void *a_)
1029{
1030 int a = (int)a_;
1031 assem_debug("bcc %x\n",a);
1032 u_int offset=genjmp(a);
1033 output_w32(0x3a000000|offset);
1034}
1035
1036static unused void emit_callreg(u_int r)
1037{
1038 assert(r<15);
1039 assem_debug("blx %s\n",regname[r]);
1040 output_w32(0xe12fff30|r);
1041}
1042
1043static void emit_jmpreg(u_int r)
1044{
1045 assem_debug("mov pc,%s\n",regname[r]);
1046 output_w32(0xe1a00000|rd_rn_rm(15,0,r));
1047}
1048
1049static void emit_ret(void)
1050{
1051 emit_jmpreg(14);
1052}
1053
1054static void emit_readword_indexed(int offset, int rs, int rt)
1055{
1056 assert(offset>-4096&&offset<4096);
1057 assem_debug("ldr %s,%s+%d\n",regname[rt],regname[rs],offset);
1058 if(offset>=0) {
1059 output_w32(0xe5900000|rd_rn_rm(rt,rs,0)|offset);
1060 }else{
1061 output_w32(0xe5100000|rd_rn_rm(rt,rs,0)|(-offset));
1062 }
1063}
1064
1065static void emit_readword_dualindexedx4(int rs1, int rs2, int rt)
1066{
1067 assem_debug("ldr %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1068 output_w32(0xe7900000|rd_rn_rm(rt,rs1,rs2)|0x100);
1069}
1070#define emit_readptr_dualindexedx_ptrlen emit_readword_dualindexedx4
1071
1072static void emit_ldr_dualindexed(int rs1, int rs2, int rt)
1073{
1074 assem_debug("ldr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1075 output_w32(0xe7900000|rd_rn_rm(rt,rs1,rs2));
1076}
1077
1078static void emit_ldrcc_dualindexed(int rs1, int rs2, int rt)
1079{
1080 assem_debug("ldrcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1081 output_w32(0x37900000|rd_rn_rm(rt,rs1,rs2));
1082}
1083
1084static void emit_ldrb_dualindexed(int rs1, int rs2, int rt)
1085{
1086 assem_debug("ldrb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1087 output_w32(0xe7d00000|rd_rn_rm(rt,rs1,rs2));
1088}
1089
1090static void emit_ldrccb_dualindexed(int rs1, int rs2, int rt)
1091{
1092 assem_debug("ldrccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1093 output_w32(0x37d00000|rd_rn_rm(rt,rs1,rs2));
1094}
1095
1096static void emit_ldrsb_dualindexed(int rs1, int rs2, int rt)
1097{
1098 assem_debug("ldrsb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1099 output_w32(0xe19000d0|rd_rn_rm(rt,rs1,rs2));
1100}
1101
1102static void emit_ldrccsb_dualindexed(int rs1, int rs2, int rt)
1103{
1104 assem_debug("ldrccsb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1105 output_w32(0x319000d0|rd_rn_rm(rt,rs1,rs2));
1106}
1107
1108static void emit_ldrh_dualindexed(int rs1, int rs2, int rt)
1109{
1110 assem_debug("ldrh %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1111 output_w32(0xe19000b0|rd_rn_rm(rt,rs1,rs2));
1112}
1113
1114static void emit_ldrcch_dualindexed(int rs1, int rs2, int rt)
1115{
1116 assem_debug("ldrcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1117 output_w32(0x319000b0|rd_rn_rm(rt,rs1,rs2));
1118}
1119
1120static void emit_ldrsh_dualindexed(int rs1, int rs2, int rt)
1121{
1122 assem_debug("ldrsh %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1123 output_w32(0xe19000f0|rd_rn_rm(rt,rs1,rs2));
1124}
1125
1126static void emit_ldrccsh_dualindexed(int rs1, int rs2, int rt)
1127{
1128 assem_debug("ldrccsh %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1129 output_w32(0x319000f0|rd_rn_rm(rt,rs1,rs2));
1130}
1131
1132static void emit_str_dualindexed(int rs1, int rs2, int rt)
1133{
1134 assem_debug("str %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1135 output_w32(0xe7800000|rd_rn_rm(rt,rs1,rs2));
1136}
1137
1138static void emit_strb_dualindexed(int rs1, int rs2, int rt)
1139{
1140 assem_debug("strb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1141 output_w32(0xe7c00000|rd_rn_rm(rt,rs1,rs2));
1142}
1143
1144static void emit_strh_dualindexed(int rs1, int rs2, int rt)
1145{
1146 assem_debug("strh %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1147 output_w32(0xe18000b0|rd_rn_rm(rt,rs1,rs2));
1148}
1149
1150static void emit_movsbl_indexed(int offset, int rs, int rt)
1151{
1152 assert(offset>-256&&offset<256);
1153 assem_debug("ldrsb %s,%s+%d\n",regname[rt],regname[rs],offset);
1154 if(offset>=0) {
1155 output_w32(0xe1d000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1156 }else{
1157 output_w32(0xe15000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1158 }
1159}
1160
1161static void emit_movswl_indexed(int offset, int rs, int rt)
1162{
1163 assert(offset>-256&&offset<256);
1164 assem_debug("ldrsh %s,%s+%d\n",regname[rt],regname[rs],offset);
1165 if(offset>=0) {
1166 output_w32(0xe1d000f0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1167 }else{
1168 output_w32(0xe15000f0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1169 }
1170}
1171
1172static void emit_movzbl_indexed(int offset, int rs, int rt)
1173{
1174 assert(offset>-4096&&offset<4096);
1175 assem_debug("ldrb %s,%s+%d\n",regname[rt],regname[rs],offset);
1176 if(offset>=0) {
1177 output_w32(0xe5d00000|rd_rn_rm(rt,rs,0)|offset);
1178 }else{
1179 output_w32(0xe5500000|rd_rn_rm(rt,rs,0)|(-offset));
1180 }
1181}
1182
1183static void emit_movzwl_indexed(int offset, int rs, int rt)
1184{
1185 assert(offset>-256&&offset<256);
1186 assem_debug("ldrh %s,%s+%d\n",regname[rt],regname[rs],offset);
1187 if(offset>=0) {
1188 output_w32(0xe1d000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1189 }else{
1190 output_w32(0xe15000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1191 }
1192}
1193
1194static void emit_ldrd(int offset, int rs, int rt)
1195{
1196 assert(offset>-256&&offset<256);
1197 assem_debug("ldrd %s,%s+%d\n",regname[rt],regname[rs],offset);
1198 if(offset>=0) {
1199 output_w32(0xe1c000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1200 }else{
1201 output_w32(0xe14000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1202 }
1203}
1204
1205static void emit_readword(void *addr, int rt)
1206{
1207 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
1208 assert(offset<4096);
1209 assem_debug("ldr %s,fp+%d\n",regname[rt],offset);
1210 output_w32(0xe5900000|rd_rn_rm(rt,FP,0)|offset);
1211}
1212#define emit_readptr emit_readword
1213
1214static void emit_writeword_indexed(int rt, int offset, int rs)
1215{
1216 assert(offset>-4096&&offset<4096);
1217 assem_debug("str %s,%s+%d\n",regname[rt],regname[rs],offset);
1218 if(offset>=0) {
1219 output_w32(0xe5800000|rd_rn_rm(rt,rs,0)|offset);
1220 }else{
1221 output_w32(0xe5000000|rd_rn_rm(rt,rs,0)|(-offset));
1222 }
1223}
1224
1225static void emit_writehword_indexed(int rt, int offset, int rs)
1226{
1227 assert(offset>-256&&offset<256);
1228 assem_debug("strh %s,%s+%d\n",regname[rt],regname[rs],offset);
1229 if(offset>=0) {
1230 output_w32(0xe1c000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1231 }else{
1232 output_w32(0xe14000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1233 }
1234}
1235
1236static void emit_writebyte_indexed(int rt, int offset, int rs)
1237{
1238 assert(offset>-4096&&offset<4096);
1239 assem_debug("strb %s,%s+%d\n",regname[rt],regname[rs],offset);
1240 if(offset>=0) {
1241 output_w32(0xe5c00000|rd_rn_rm(rt,rs,0)|offset);
1242 }else{
1243 output_w32(0xe5400000|rd_rn_rm(rt,rs,0)|(-offset));
1244 }
1245}
1246
1247static void emit_strcc_dualindexed(int rs1, int rs2, int rt)
1248{
1249 assem_debug("strcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1250 output_w32(0x37800000|rd_rn_rm(rt,rs1,rs2));
1251}
1252
1253static void emit_strccb_dualindexed(int rs1, int rs2, int rt)
1254{
1255 assem_debug("strccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1256 output_w32(0x37c00000|rd_rn_rm(rt,rs1,rs2));
1257}
1258
1259static void emit_strcch_dualindexed(int rs1, int rs2, int rt)
1260{
1261 assem_debug("strcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1262 output_w32(0x318000b0|rd_rn_rm(rt,rs1,rs2));
1263}
1264
1265static void emit_writeword(int rt, void *addr)
1266{
1267 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
1268 assert(offset<4096);
1269 assem_debug("str %s,fp+%d\n",regname[rt],offset);
1270 output_w32(0xe5800000|rd_rn_rm(rt,FP,0)|offset);
1271}
1272
1273static void emit_umull(u_int rs1, u_int rs2, u_int hi, u_int lo)
1274{
1275 assem_debug("umull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
1276 assert(rs1<16);
1277 assert(rs2<16);
1278 assert(hi<16);
1279 assert(lo<16);
1280 output_w32(0xe0800090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
1281}
1282
1283static void emit_smull(u_int rs1, u_int rs2, u_int hi, u_int lo)
1284{
1285 assem_debug("smull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
1286 assert(rs1<16);
1287 assert(rs2<16);
1288 assert(hi<16);
1289 assert(lo<16);
1290 output_w32(0xe0c00090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
1291}
1292
1293static void emit_clz(int rs,int rt)
1294{
1295 assem_debug("clz %s,%s\n",regname[rt],regname[rs]);
1296 output_w32(0xe16f0f10|rd_rn_rm(rt,0,rs));
1297}
1298
1299static void emit_subcs(int rs1,int rs2,int rt)
1300{
1301 assem_debug("subcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1302 output_w32(0x20400000|rd_rn_rm(rt,rs1,rs2));
1303}
1304
1305static void emit_shrcc_imm(int rs,u_int imm,int rt)
1306{
1307 assert(imm>0);
1308 assert(imm<32);
1309 assem_debug("lsrcc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1310 output_w32(0x31a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
1311}
1312
1313static void emit_shrne_imm(int rs,u_int imm,int rt)
1314{
1315 assert(imm>0);
1316 assert(imm<32);
1317 assem_debug("lsrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
1318 output_w32(0x11a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
1319}
1320
1321static void emit_negmi(int rs, int rt)
1322{
1323 assem_debug("rsbmi %s,%s,#0\n",regname[rt],regname[rs]);
1324 output_w32(0x42600000|rd_rn_rm(rt,rs,0));
1325}
1326
1327static void emit_negsmi(int rs, int rt)
1328{
1329 assem_debug("rsbsmi %s,%s,#0\n",regname[rt],regname[rs]);
1330 output_w32(0x42700000|rd_rn_rm(rt,rs,0));
1331}
1332
1333static void emit_bic_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
1334{
1335 assem_debug("bic %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
1336 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
1337}
1338
1339static void emit_bic_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
1340{
1341 assem_debug("bic %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
1342 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
1343}
1344
1345static void emit_teq(int rs, int rt)
1346{
1347 assem_debug("teq %s,%s\n",regname[rs],regname[rt]);
1348 output_w32(0xe1300000|rd_rn_rm(0,rs,rt));
1349}
1350
1351static unused void emit_rsbimm(int rs, int imm, int rt)
1352{
1353 u_int armval;
1354 genimm_checked(imm,&armval);
1355 assem_debug("rsb %s,%s,#%d\n",regname[rt],regname[rs],imm);
1356 output_w32(0xe2600000|rd_rn_rm(rt,rs,0)|armval);
1357}
1358
1359// Conditionally select one of two immediates, optimizing for small code size
1360// This will only be called if HAVE_CMOV_IMM is defined
1361static void emit_cmov2imm_e_ne_compact(int imm1,int imm2,u_int rt)
1362{
1363 u_int armval;
1364 if(genimm(imm2-imm1,&armval)) {
1365 emit_movimm(imm1,rt);
1366 assem_debug("addne %s,%s,#%d\n",regname[rt],regname[rt],imm2-imm1);
1367 output_w32(0x12800000|rd_rn_rm(rt,rt,0)|armval);
1368 }else if(genimm(imm1-imm2,&armval)) {
1369 emit_movimm(imm1,rt);
1370 assem_debug("subne %s,%s,#%d\n",regname[rt],regname[rt],imm1-imm2);
1371 output_w32(0x12400000|rd_rn_rm(rt,rt,0)|armval);
1372 }
1373 else {
1374 #ifndef HAVE_ARMV7
1375 emit_movimm(imm1,rt);
1376 add_literal((int)out,imm2);
1377 assem_debug("ldrne %s,pc+? [=%x]\n",regname[rt],imm2);
1378 output_w32(0x15900000|rd_rn_rm(rt,15,0));
1379 #else
1380 emit_movw(imm1&0x0000FFFF,rt);
1381 if((imm1&0xFFFF)!=(imm2&0xFFFF)) {
1382 assem_debug("movwne %s,#%d (0x%x)\n",regname[rt],imm2&0xFFFF,imm2&0xFFFF);
1383 output_w32(0x13000000|rd_rn_rm(rt,0,0)|(imm2&0xfff)|((imm2<<4)&0xf0000));
1384 }
1385 emit_movt(imm1&0xFFFF0000,rt);
1386 if((imm1&0xFFFF0000)!=(imm2&0xFFFF0000)) {
1387 assem_debug("movtne %s,#%d (0x%x)\n",regname[rt],imm2&0xffff0000,imm2&0xffff0000);
1388 output_w32(0x13400000|rd_rn_rm(rt,0,0)|((imm2>>16)&0xfff)|((imm2>>12)&0xf0000));
1389 }
1390 #endif
1391 }
1392}
1393
1394// special case for checking invalid_code
1395static void emit_cmpmem_indexedsr12_reg(int base,int r,int imm)
1396{
1397 assert(imm<128&&imm>=0);
1398 assert(r>=0&&r<16);
1399 assem_debug("ldrb lr,%s,%s lsr #12\n",regname[base],regname[r]);
1400 output_w32(0xe7d00000|rd_rn_rm(HOST_TEMPREG,base,r)|0x620);
1401 emit_cmpimm(HOST_TEMPREG,imm);
1402}
1403
1404static void emit_callne(int a)
1405{
1406 assem_debug("blne %x\n",a);
1407 u_int offset=genjmp(a);
1408 output_w32(0x1b000000|offset);
1409}
1410
1411// Used to preload hash table entries
1412static unused void emit_prefetchreg(int r)
1413{
1414 assem_debug("pld %s\n",regname[r]);
1415 output_w32(0xf5d0f000|rd_rn_rm(0,r,0));
1416}
1417
1418// Special case for mini_ht
1419static void emit_ldreq_indexed(int rs, u_int offset, int rt)
1420{
1421 assert(offset<4096);
1422 assem_debug("ldreq %s,[%s, #%d]\n",regname[rt],regname[rs],offset);
1423 output_w32(0x05900000|rd_rn_rm(rt,rs,0)|offset);
1424}
1425
1426static void emit_orrne_imm(int rs,int imm,int rt)
1427{
1428 u_int armval;
1429 genimm_checked(imm,&armval);
1430 assem_debug("orrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
1431 output_w32(0x13800000|rd_rn_rm(rt,rs,0)|armval);
1432}
1433
1434static unused void emit_addpl_imm(int rs,int imm,int rt)
1435{
1436 u_int armval;
1437 genimm_checked(imm,&armval);
1438 assem_debug("addpl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1439 output_w32(0x52800000|rd_rn_rm(rt,rs,0)|armval);
1440}
1441
1442static void emit_jno_unlikely(int a)
1443{
1444 //emit_jno(a);
1445 assem_debug("addvc pc,pc,#? (%x)\n",/*a-(int)out-8,*/a);
1446 output_w32(0x72800000|rd_rn_rm(15,15,0));
1447}
1448
1449static void save_regs_all(u_int reglist)
1450{
1451 int i;
1452 if(!reglist) return;
1453 assem_debug("stmia fp,{");
1454 for(i=0;i<16;i++)
1455 if(reglist&(1<<i))
1456 assem_debug("r%d,",i);
1457 assem_debug("}\n");
1458 output_w32(0xe88b0000|reglist);
1459}
1460
1461static void restore_regs_all(u_int reglist)
1462{
1463 int i;
1464 if(!reglist) return;
1465 assem_debug("ldmia fp,{");
1466 for(i=0;i<16;i++)
1467 if(reglist&(1<<i))
1468 assem_debug("r%d,",i);
1469 assem_debug("}\n");
1470 output_w32(0xe89b0000|reglist);
1471}
1472
1473// Save registers before function call
1474static void save_regs(u_int reglist)
1475{
1476 reglist&=CALLER_SAVE_REGS; // only save the caller-save registers, r0-r3, r12
1477 save_regs_all(reglist);
1478}
1479
1480// Restore registers after function call
1481static void restore_regs(u_int reglist)
1482{
1483 reglist&=CALLER_SAVE_REGS;
1484 restore_regs_all(reglist);
1485}
1486
1487/* Stubs/epilogue */
1488
1489static void literal_pool(int n)
1490{
1491 if(!literalcount) return;
1492 if(n) {
1493 if((int)out-literals[0][0]<4096-n) return;
1494 }
1495 u_int *ptr;
1496 int i;
1497 for(i=0;i<literalcount;i++)
1498 {
1499 u_int l_addr=(u_int)out;
1500 int j;
1501 for(j=0;j<i;j++) {
1502 if(literals[j][1]==literals[i][1]) {
1503 //printf("dup %08x\n",literals[i][1]);
1504 l_addr=literals[j][0];
1505 break;
1506 }
1507 }
1508 ptr=(u_int *)literals[i][0];
1509 u_int offset=l_addr-(u_int)ptr-8;
1510 assert(offset<4096);
1511 assert(!(offset&3));
1512 *ptr|=offset;
1513 if(l_addr==(u_int)out) {
1514 literals[i][0]=l_addr; // remember for dupes
1515 output_w32(literals[i][1]);
1516 }
1517 }
1518 literalcount=0;
1519}
1520
1521static void literal_pool_jumpover(int n)
1522{
1523 if(!literalcount) return;
1524 if(n) {
1525 if((int)out-literals[0][0]<4096-n) return;
1526 }
1527 void *jaddr = out;
1528 emit_jmp(0);
1529 literal_pool(0);
1530 set_jump_target(jaddr, out);
1531}
1532
1533// parsed by get_pointer, find_extjump_insn
1534static void emit_extjump(u_char *addr, u_int target)
1535{
1536 u_char *ptr=(u_char *)addr;
1537 assert((ptr[3]&0x0e)==0xa);
1538 (void)ptr;
1539
1540 emit_loadlp(target,0);
1541 emit_loadlp((u_int)addr,1);
1542 assert(ndrc->translation_cache <= addr &&
1543 addr < ndrc->translation_cache + sizeof(ndrc->translation_cache));
1544 emit_far_jump(dyna_linker);
1545}
1546
1547static void check_extjump2(void *src)
1548{
1549 u_int *ptr = src;
1550 assert((ptr[1] & 0x0fff0000) == 0x059f0000); // ldr rx, [pc, #ofs]
1551 (void)ptr;
1552}
1553
1554// put rt_val into rt, potentially making use of rs with value rs_val
1555static void emit_movimm_from(u_int rs_val,int rs,u_int rt_val,int rt)
1556{
1557 u_int armval;
1558 int diff;
1559 if(genimm(rt_val,&armval)) {
1560 assem_debug("mov %s,#%d\n",regname[rt],rt_val);
1561 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
1562 return;
1563 }
1564 if(genimm(~rt_val,&armval)) {
1565 assem_debug("mvn %s,#%d\n",regname[rt],rt_val);
1566 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
1567 return;
1568 }
1569 diff=rt_val-rs_val;
1570 if(genimm(diff,&armval)) {
1571 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],diff);
1572 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
1573 return;
1574 }else if(genimm(-diff,&armval)) {
1575 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],-diff);
1576 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
1577 return;
1578 }
1579 emit_movimm(rt_val,rt);
1580}
1581
1582// return 1 if above function can do it's job cheaply
1583static int is_similar_value(u_int v1,u_int v2)
1584{
1585 u_int xs;
1586 int diff;
1587 if(v1==v2) return 1;
1588 diff=v2-v1;
1589 for(xs=diff;xs!=0&&(xs&3)==0;xs>>=2)
1590 ;
1591 if(xs<0x100) return 1;
1592 for(xs=-diff;xs!=0&&(xs&3)==0;xs>>=2)
1593 ;
1594 if(xs<0x100) return 1;
1595 return 0;
1596}
1597
1598static void mov_loadtype_adj(enum stub_type type,int rs,int rt)
1599{
1600 switch(type) {
1601 case LOADB_STUB: emit_signextend8(rs,rt); break;
1602 case LOADBU_STUB: emit_andimm(rs,0xff,rt); break;
1603 case LOADH_STUB: emit_signextend16(rs,rt); break;
1604 case LOADHU_STUB: emit_andimm(rs,0xffff,rt); break;
1605 case LOADW_STUB: if(rs!=rt) emit_mov(rs,rt); break;
1606 default: assert(0);
1607 }
1608}
1609
1610#include "pcsxmem.h"
1611#include "pcsxmem_inline.c"
1612
1613static void do_readstub(int n)
1614{
1615 assem_debug("do_readstub %x\n",start+stubs[n].a*4);
1616 literal_pool(256);
1617 set_jump_target(stubs[n].addr, out);
1618 enum stub_type type=stubs[n].type;
1619 int i=stubs[n].a;
1620 int rs=stubs[n].b;
1621 const struct regstat *i_regs=(struct regstat *)stubs[n].c;
1622 u_int reglist=stubs[n].e;
1623 const signed char *i_regmap=i_regs->regmap;
1624 int rt;
1625 if(dops[i].itype==C1LS||dops[i].itype==C2LS||dops[i].itype==LOADLR) {
1626 rt=get_reg(i_regmap,FTEMP);
1627 }else{
1628 rt=get_reg(i_regmap,dops[i].rt1);
1629 }
1630 assert(rs>=0);
1631 int r,temp=-1,temp2=HOST_TEMPREG,regs_saved=0;
1632 void *restore_jump = NULL;
1633 reglist|=(1<<rs);
1634 for(r=0;r<=12;r++) {
1635 if(((1<<r)&0x13ff)&&((1<<r)&reglist)==0) {
1636 temp=r; break;
1637 }
1638 }
1639 if(rt>=0&&dops[i].rt1!=0)
1640 reglist&=~(1<<rt);
1641 if(temp==-1) {
1642 save_regs(reglist);
1643 regs_saved=1;
1644 temp=(rs==0)?2:0;
1645 }
1646 if((regs_saved||(reglist&2)==0)&&temp!=1&&rs!=1)
1647 temp2=1;
1648 emit_readword(&mem_rtab,temp);
1649 emit_shrimm(rs,12,temp2);
1650 emit_readword_dualindexedx4(temp,temp2,temp2);
1651 emit_lsls_imm(temp2,1,temp2);
1652 if(dops[i].itype==C1LS||dops[i].itype==C2LS||(rt>=0&&dops[i].rt1!=0)) {
1653 switch(type) {
1654 case LOADB_STUB: emit_ldrccsb_dualindexed(temp2,rs,rt); break;
1655 case LOADBU_STUB: emit_ldrccb_dualindexed(temp2,rs,rt); break;
1656 case LOADH_STUB: emit_ldrccsh_dualindexed(temp2,rs,rt); break;
1657 case LOADHU_STUB: emit_ldrcch_dualindexed(temp2,rs,rt); break;
1658 case LOADW_STUB: emit_ldrcc_dualindexed(temp2,rs,rt); break;
1659 default: assert(0);
1660 }
1661 }
1662 if(regs_saved) {
1663 restore_jump=out;
1664 emit_jcc(0); // jump to reg restore
1665 }
1666 else
1667 emit_jcc(stubs[n].retaddr); // return address
1668
1669 if(!regs_saved)
1670 save_regs(reglist);
1671 void *handler=NULL;
1672 if(type==LOADB_STUB||type==LOADBU_STUB)
1673 handler=jump_handler_read8;
1674 if(type==LOADH_STUB||type==LOADHU_STUB)
1675 handler=jump_handler_read16;
1676 if(type==LOADW_STUB)
1677 handler=jump_handler_read32;
1678 assert(handler);
1679 pass_args(rs,temp2);
1680 int cc=get_reg(i_regmap,CCREG);
1681 if(cc<0)
1682 emit_loadreg(CCREG,2);
1683 emit_addimm(cc<0?2:cc,(int)stubs[n].d,2);
1684 emit_far_call(handler);
1685 if(dops[i].itype==C1LS||dops[i].itype==C2LS||(rt>=0&&dops[i].rt1!=0)) {
1686 mov_loadtype_adj(type,0,rt);
1687 }
1688 if(restore_jump)
1689 set_jump_target(restore_jump, out);
1690 restore_regs(reglist);
1691 emit_jmp(stubs[n].retaddr); // return address
1692}
1693
1694static void inline_readstub(enum stub_type type, int i, u_int addr,
1695 const signed char regmap[], int target, int adj, u_int reglist)
1696{
1697 int rs=get_reg(regmap,target);
1698 int rt=get_reg(regmap,target);
1699 if(rs<0) rs=get_reg_temp(regmap);
1700 assert(rs>=0);
1701 u_int is_dynamic;
1702 uintptr_t host_addr = 0;
1703 void *handler;
1704 int cc=get_reg(regmap,CCREG);
1705 if(pcsx_direct_read(type,addr,adj,cc,target?rs:-1,rt))
1706 return;
1707 handler = get_direct_memhandler(mem_rtab, addr, type, &host_addr);
1708 if (handler == NULL) {
1709 if(rt<0||dops[i].rt1==0)
1710 return;
1711 if(addr!=host_addr)
1712 emit_movimm_from(addr,rs,host_addr,rs);
1713 switch(type) {
1714 case LOADB_STUB: emit_movsbl_indexed(0,rs,rt); break;
1715 case LOADBU_STUB: emit_movzbl_indexed(0,rs,rt); break;
1716 case LOADH_STUB: emit_movswl_indexed(0,rs,rt); break;
1717 case LOADHU_STUB: emit_movzwl_indexed(0,rs,rt); break;
1718 case LOADW_STUB: emit_readword_indexed(0,rs,rt); break;
1719 default: assert(0);
1720 }
1721 return;
1722 }
1723 is_dynamic=pcsxmem_is_handler_dynamic(addr);
1724 if(is_dynamic) {
1725 if(type==LOADB_STUB||type==LOADBU_STUB)
1726 handler=jump_handler_read8;
1727 if(type==LOADH_STUB||type==LOADHU_STUB)
1728 handler=jump_handler_read16;
1729 if(type==LOADW_STUB)
1730 handler=jump_handler_read32;
1731 }
1732
1733 // call a memhandler
1734 if(rt>=0&&dops[i].rt1!=0)
1735 reglist&=~(1<<rt);
1736 save_regs(reglist);
1737 if(target==0)
1738 emit_movimm(addr,0);
1739 else if(rs!=0)
1740 emit_mov(rs,0);
1741 if(cc<0)
1742 emit_loadreg(CCREG,2);
1743 if(is_dynamic) {
1744 emit_movimm(((u_int *)mem_rtab)[addr>>12]<<1,1);
1745 emit_addimm(cc<0?2:cc,adj,2);
1746 }
1747 else {
1748 emit_readword(&last_count,3);
1749 emit_addimm(cc<0?2:cc,adj,2);
1750 emit_add(2,3,2);
1751 emit_writeword(2,&Count);
1752 }
1753
1754 emit_far_call(handler);
1755
1756 if(rt>=0&&dops[i].rt1!=0) {
1757 switch(type) {
1758 case LOADB_STUB: emit_signextend8(0,rt); break;
1759 case LOADBU_STUB: emit_andimm(0,0xff,rt); break;
1760 case LOADH_STUB: emit_signextend16(0,rt); break;
1761 case LOADHU_STUB: emit_andimm(0,0xffff,rt); break;
1762 case LOADW_STUB: if(rt!=0) emit_mov(0,rt); break;
1763 default: assert(0);
1764 }
1765 }
1766 restore_regs(reglist);
1767}
1768
1769static void do_writestub(int n)
1770{
1771 assem_debug("do_writestub %x\n",start+stubs[n].a*4);
1772 literal_pool(256);
1773 set_jump_target(stubs[n].addr, out);
1774 enum stub_type type=stubs[n].type;
1775 int i=stubs[n].a;
1776 int rs=stubs[n].b;
1777 const struct regstat *i_regs=(struct regstat *)stubs[n].c;
1778 u_int reglist=stubs[n].e;
1779 const signed char *i_regmap=i_regs->regmap;
1780 int rt,r;
1781 if(dops[i].itype==C1LS||dops[i].itype==C2LS) {
1782 rt=get_reg(i_regmap,r=FTEMP);
1783 }else{
1784 rt=get_reg(i_regmap,r=dops[i].rs2);
1785 }
1786 assert(rs>=0);
1787 assert(rt>=0);
1788 int rtmp,temp=-1,temp2=HOST_TEMPREG,regs_saved=0;
1789 void *restore_jump = NULL;
1790 int reglist2=reglist|(1<<rs)|(1<<rt);
1791 for(rtmp=0;rtmp<=12;rtmp++) {
1792 if(((1<<rtmp)&0x13ff)&&((1<<rtmp)&reglist2)==0) {
1793 temp=rtmp; break;
1794 }
1795 }
1796 if(temp==-1) {
1797 save_regs(reglist);
1798 regs_saved=1;
1799 for(rtmp=0;rtmp<=3;rtmp++)
1800 if(rtmp!=rs&&rtmp!=rt)
1801 {temp=rtmp;break;}
1802 }
1803 if((regs_saved||(reglist2&8)==0)&&temp!=3&&rs!=3&&rt!=3)
1804 temp2=3;
1805 emit_readword(&mem_wtab,temp);
1806 emit_shrimm(rs,12,temp2);
1807 emit_readword_dualindexedx4(temp,temp2,temp2);
1808 emit_lsls_imm(temp2,1,temp2);
1809 switch(type) {
1810 case STOREB_STUB: emit_strccb_dualindexed(temp2,rs,rt); break;
1811 case STOREH_STUB: emit_strcch_dualindexed(temp2,rs,rt); break;
1812 case STOREW_STUB: emit_strcc_dualindexed(temp2,rs,rt); break;
1813 default: assert(0);
1814 }
1815 if(regs_saved) {
1816 restore_jump=out;
1817 emit_jcc(0); // jump to reg restore
1818 }
1819 else
1820 emit_jcc(stubs[n].retaddr); // return address (invcode check)
1821
1822 if(!regs_saved)
1823 save_regs(reglist);
1824 void *handler=NULL;
1825 switch(type) {
1826 case STOREB_STUB: handler=jump_handler_write8; break;
1827 case STOREH_STUB: handler=jump_handler_write16; break;
1828 case STOREW_STUB: handler=jump_handler_write32; break;
1829 default: assert(0);
1830 }
1831 assert(handler);
1832 pass_args(rs,rt);
1833 if(temp2!=3)
1834 emit_mov(temp2,3);
1835 int cc=get_reg(i_regmap,CCREG);
1836 if(cc<0)
1837 emit_loadreg(CCREG,2);
1838 emit_addimm(cc<0?2:cc,(int)stubs[n].d,2);
1839 // returns new cycle_count
1840 emit_far_call(handler);
1841 emit_addimm(0,-(int)stubs[n].d,cc<0?2:cc);
1842 if(cc<0)
1843 emit_storereg(CCREG,2);
1844 if(restore_jump)
1845 set_jump_target(restore_jump, out);
1846 restore_regs(reglist);
1847 emit_jmp(stubs[n].retaddr);
1848}
1849
1850static void inline_writestub(enum stub_type type, int i, u_int addr,
1851 const signed char regmap[], int target, int adj, u_int reglist)
1852{
1853 int rs=get_reg_temp(regmap);
1854 int rt=get_reg(regmap,target);
1855 assert(rs>=0);
1856 assert(rt>=0);
1857 uintptr_t host_addr = 0;
1858 void *handler = get_direct_memhandler(mem_wtab, addr, type, &host_addr);
1859 if (handler == NULL) {
1860 if(addr!=host_addr)
1861 emit_movimm_from(addr,rs,host_addr,rs);
1862 switch(type) {
1863 case STOREB_STUB: emit_writebyte_indexed(rt,0,rs); break;
1864 case STOREH_STUB: emit_writehword_indexed(rt,0,rs); break;
1865 case STOREW_STUB: emit_writeword_indexed(rt,0,rs); break;
1866 default: assert(0);
1867 }
1868 return;
1869 }
1870
1871 // call a memhandler
1872 save_regs(reglist);
1873 pass_args(rs,rt);
1874 int cc=get_reg(regmap,CCREG);
1875 if(cc<0)
1876 emit_loadreg(CCREG,2);
1877 emit_addimm(cc<0?2:cc,adj,2);
1878 emit_movimm((u_int)handler,3);
1879 // returns new cycle_count
1880 emit_far_call(jump_handler_write_h);
1881 emit_addimm(0,-adj,cc<0?2:cc);
1882 if(cc<0)
1883 emit_storereg(CCREG,2);
1884 restore_regs(reglist);
1885}
1886
1887/* Special assem */
1888
1889static void c2op_prologue(u_int op, int i, const struct regstat *i_regs, u_int reglist)
1890{
1891 save_regs_all(reglist);
1892 cop2_do_stall_check(op, i, i_regs, 0);
1893#ifdef PCNT
1894 emit_movimm(op, 0);
1895 emit_far_call(pcnt_gte_start);
1896#endif
1897 emit_addimm(FP, (u_char *)&psxRegs.CP2D.r[0] - (u_char *)&dynarec_local, 0); // cop2 regs
1898}
1899
1900static void c2op_epilogue(u_int op,u_int reglist)
1901{
1902#ifdef PCNT
1903 emit_movimm(op,0);
1904 emit_far_call(pcnt_gte_end);
1905#endif
1906 restore_regs_all(reglist);
1907}
1908
1909static void c2op_call_MACtoIR(int lm,int need_flags)
1910{
1911 if(need_flags)
1912 emit_far_call(lm?gteMACtoIR_lm1:gteMACtoIR_lm0);
1913 else
1914 emit_far_call(lm?gteMACtoIR_lm1_nf:gteMACtoIR_lm0_nf);
1915}
1916
1917static void c2op_call_rgb_func(void *func,int lm,int need_ir,int need_flags)
1918{
1919 emit_far_call(func);
1920 // func is C code and trashes r0
1921 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
1922 if(need_flags||need_ir)
1923 c2op_call_MACtoIR(lm,need_flags);
1924 emit_far_call(need_flags?gteMACtoRGB:gteMACtoRGB_nf);
1925}
1926
1927static void c2op_assemble(int i, const struct regstat *i_regs)
1928{
1929 u_int c2op = source[i] & 0x3f;
1930 u_int reglist_full = get_host_reglist(i_regs->regmap);
1931 u_int reglist = reglist_full & CALLER_SAVE_REGS;
1932 int need_flags, need_ir;
1933
1934 if (gte_handlers[c2op]!=NULL) {
1935 need_flags=!(gte_unneeded[i+1]>>63); // +1 because of how liveness detection works
1936 need_ir=(gte_unneeded[i+1]&0xe00)!=0xe00;
1937 assem_debug("gte op %08x, unneeded %016llx, need_flags %d, need_ir %d\n",
1938 source[i],gte_unneeded[i+1],need_flags,need_ir);
1939 if(HACK_ENABLED(NDHACK_GTE_NO_FLAGS))
1940 need_flags=0;
1941 int shift = (source[i] >> 19) & 1;
1942 int lm = (source[i] >> 10) & 1;
1943 switch(c2op) {
1944#ifndef DRC_DBG
1945 case GTE_MVMVA: {
1946#ifdef HAVE_ARMV5
1947 int v = (source[i] >> 15) & 3;
1948 int cv = (source[i] >> 13) & 3;
1949 int mx = (source[i] >> 17) & 3;
1950 reglist=reglist_full&(CALLER_SAVE_REGS|0xf0); // +{r4-r7}
1951 c2op_prologue(c2op,i,i_regs,reglist);
1952 /* r4,r5 = VXYZ(v) packed; r6 = &MX11(mx); r7 = &CV1(cv) */
1953 if(v<3)
1954 emit_ldrd(v*8,0,4);
1955 else {
1956 emit_movzwl_indexed(9*4,0,4); // gteIR
1957 emit_movzwl_indexed(10*4,0,6);
1958 emit_movzwl_indexed(11*4,0,5);
1959 emit_orrshl_imm(6,16,4);
1960 }
1961 if(mx<3)
1962 emit_addimm(0,32*4+mx*8*4,6);
1963 else
1964 emit_readword(&zeromem_ptr,6);
1965 if(cv<3)
1966 emit_addimm(0,32*4+(cv*8+5)*4,7);
1967 else
1968 emit_readword(&zeromem_ptr,7);
1969#ifdef __ARM_NEON__
1970 emit_movimm(source[i],1); // opcode
1971 emit_far_call(gteMVMVA_part_neon);
1972 if(need_flags) {
1973 emit_movimm(lm,1);
1974 emit_far_call(gteMACtoIR_flags_neon);
1975 }
1976#else
1977 if(cv==3&&shift)
1978 emit_far_call(gteMVMVA_part_cv3sh12_arm);
1979 else {
1980 emit_movimm(shift,1);
1981 emit_far_call(need_flags?gteMVMVA_part_arm:gteMVMVA_part_nf_arm);
1982 }
1983 if(need_flags||need_ir)
1984 c2op_call_MACtoIR(lm,need_flags);
1985#endif
1986#else /* if not HAVE_ARMV5 */
1987 c2op_prologue(c2op,i,i_regs,reglist);
1988 emit_movimm(source[i],1); // opcode
1989 emit_writeword(1,&psxRegs.code);
1990 emit_far_call(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]);
1991#endif
1992 break;
1993 }
1994 case GTE_OP:
1995 c2op_prologue(c2op,i,i_regs,reglist);
1996 emit_far_call(shift?gteOP_part_shift:gteOP_part_noshift);
1997 if(need_flags||need_ir) {
1998 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
1999 c2op_call_MACtoIR(lm,need_flags);
2000 }
2001 break;
2002 case GTE_DPCS:
2003 c2op_prologue(c2op,i,i_regs,reglist);
2004 c2op_call_rgb_func(shift?gteDPCS_part_shift:gteDPCS_part_noshift,lm,need_ir,need_flags);
2005 break;
2006 case GTE_INTPL:
2007 c2op_prologue(c2op,i,i_regs,reglist);
2008 c2op_call_rgb_func(shift?gteINTPL_part_shift:gteINTPL_part_noshift,lm,need_ir,need_flags);
2009 break;
2010 case GTE_SQR:
2011 c2op_prologue(c2op,i,i_regs,reglist);
2012 emit_far_call(shift?gteSQR_part_shift:gteSQR_part_noshift);
2013 if(need_flags||need_ir) {
2014 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
2015 c2op_call_MACtoIR(lm,need_flags);
2016 }
2017 break;
2018 case GTE_DCPL:
2019 c2op_prologue(c2op,i,i_regs,reglist);
2020 c2op_call_rgb_func(gteDCPL_part,lm,need_ir,need_flags);
2021 break;
2022 case GTE_GPF:
2023 c2op_prologue(c2op,i,i_regs,reglist);
2024 c2op_call_rgb_func(shift?gteGPF_part_shift:gteGPF_part_noshift,lm,need_ir,need_flags);
2025 break;
2026 case GTE_GPL:
2027 c2op_prologue(c2op,i,i_regs,reglist);
2028 c2op_call_rgb_func(shift?gteGPL_part_shift:gteGPL_part_noshift,lm,need_ir,need_flags);
2029 break;
2030#endif
2031 default:
2032 c2op_prologue(c2op,i,i_regs,reglist);
2033#ifdef DRC_DBG
2034 emit_movimm(source[i],1); // opcode
2035 emit_writeword(1,&psxRegs.code);
2036#endif
2037 emit_far_call(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]);
2038 break;
2039 }
2040 c2op_epilogue(c2op,reglist);
2041 }
2042}
2043
2044static void c2op_ctc2_31_assemble(signed char sl, signed char temp)
2045{
2046 //value = value & 0x7ffff000;
2047 //if (value & 0x7f87e000) value |= 0x80000000;
2048 emit_shrimm(sl,12,temp);
2049 emit_shlimm(temp,12,temp);
2050 emit_testimm(temp,0x7f000000);
2051 emit_testeqimm(temp,0x00870000);
2052 emit_testeqimm(temp,0x0000e000);
2053 emit_orrne_imm(temp,0x80000000,temp);
2054}
2055
2056static void do_mfc2_31_one(u_int copr,signed char temp)
2057{
2058 emit_readword(&reg_cop2d[copr],temp);
2059 emit_lsls_imm(temp,16,temp);
2060 emit_cmovs_imm(0,temp);
2061 emit_cmpimm(temp,0xf80<<16);
2062 emit_andimm(temp,0xf80<<16,temp);
2063 emit_cmovae_imm(0xf80<<16,temp);
2064}
2065
2066static void c2op_mfc2_29_assemble(signed char tl, signed char temp)
2067{
2068 if (temp < 0) {
2069 host_tempreg_acquire();
2070 temp = HOST_TEMPREG;
2071 }
2072 do_mfc2_31_one(9,temp);
2073 emit_shrimm(temp,7+16,tl);
2074 do_mfc2_31_one(10,temp);
2075 emit_orrshr_imm(temp,2+16,tl);
2076 do_mfc2_31_one(11,temp);
2077 emit_orrshr_imm(temp,-3+16,tl);
2078 emit_writeword(tl,&reg_cop2d[29]);
2079 if (temp == HOST_TEMPREG)
2080 host_tempreg_release();
2081}
2082
2083static void multdiv_assemble_arm(int i, const struct regstat *i_regs)
2084{
2085 // case 0x18: MULT
2086 // case 0x19: MULTU
2087 // case 0x1A: DIV
2088 // case 0x1B: DIVU
2089 // case 0x1C: DMULT
2090 // case 0x1D: DMULTU
2091 // case 0x1E: DDIV
2092 // case 0x1F: DDIVU
2093 if(dops[i].rs1&&dops[i].rs2)
2094 {
2095 if((dops[i].opcode2&4)==0) // 32-bit
2096 {
2097 if(dops[i].opcode2==0x18) // MULT
2098 {
2099 signed char m1=get_reg(i_regs->regmap,dops[i].rs1);
2100 signed char m2=get_reg(i_regs->regmap,dops[i].rs2);
2101 signed char hi=get_reg(i_regs->regmap,HIREG);
2102 signed char lo=get_reg(i_regs->regmap,LOREG);
2103 assert(m1>=0);
2104 assert(m2>=0);
2105 assert(hi>=0);
2106 assert(lo>=0);
2107 emit_smull(m1,m2,hi,lo);
2108 }
2109 if(dops[i].opcode2==0x19) // MULTU
2110 {
2111 signed char m1=get_reg(i_regs->regmap,dops[i].rs1);
2112 signed char m2=get_reg(i_regs->regmap,dops[i].rs2);
2113 signed char hi=get_reg(i_regs->regmap,HIREG);
2114 signed char lo=get_reg(i_regs->regmap,LOREG);
2115 assert(m1>=0);
2116 assert(m2>=0);
2117 assert(hi>=0);
2118 assert(lo>=0);
2119 emit_umull(m1,m2,hi,lo);
2120 }
2121 if(dops[i].opcode2==0x1A) // DIV
2122 {
2123 signed char d1=get_reg(i_regs->regmap,dops[i].rs1);
2124 signed char d2=get_reg(i_regs->regmap,dops[i].rs2);
2125 assert(d1>=0);
2126 assert(d2>=0);
2127 signed char quotient=get_reg(i_regs->regmap,LOREG);
2128 signed char remainder=get_reg(i_regs->regmap,HIREG);
2129 assert(quotient>=0);
2130 assert(remainder>=0);
2131 emit_movs(d1,remainder);
2132 emit_movimm(0xffffffff,quotient);
2133 emit_negmi(quotient,quotient); // .. quotient and ..
2134 emit_negmi(remainder,remainder); // .. remainder for div0 case (will be negated back after jump)
2135 emit_movs(d2,HOST_TEMPREG);
2136 emit_jeq(out+52); // Division by zero
2137 emit_negsmi(HOST_TEMPREG,HOST_TEMPREG);
2138#ifdef HAVE_ARMV5
2139 emit_clz(HOST_TEMPREG,quotient);
2140 emit_shl(HOST_TEMPREG,quotient,HOST_TEMPREG);
2141#else
2142 emit_movimm(0,quotient);
2143 emit_addpl_imm(quotient,1,quotient);
2144 emit_lslpls_imm(HOST_TEMPREG,1,HOST_TEMPREG);
2145 emit_jns(out-2*4);
2146#endif
2147 emit_orimm(quotient,1<<31,quotient);
2148 emit_shr(quotient,quotient,quotient);
2149 emit_cmp(remainder,HOST_TEMPREG);
2150 emit_subcs(remainder,HOST_TEMPREG,remainder);
2151 emit_adcs(quotient,quotient,quotient);
2152 emit_shrimm(HOST_TEMPREG,1,HOST_TEMPREG);
2153 emit_jcc(out-16); // -4
2154 emit_teq(d1,d2);
2155 emit_negmi(quotient,quotient);
2156 emit_test(d1,d1);
2157 emit_negmi(remainder,remainder);
2158 }
2159 if(dops[i].opcode2==0x1B) // DIVU
2160 {
2161 signed char d1=get_reg(i_regs->regmap,dops[i].rs1); // dividend
2162 signed char d2=get_reg(i_regs->regmap,dops[i].rs2); // divisor
2163 assert(d1>=0);
2164 assert(d2>=0);
2165 signed char quotient=get_reg(i_regs->regmap,LOREG);
2166 signed char remainder=get_reg(i_regs->regmap,HIREG);
2167 assert(quotient>=0);
2168 assert(remainder>=0);
2169 emit_mov(d1,remainder);
2170 emit_movimm(0xffffffff,quotient); // div0 case
2171 emit_test(d2,d2);
2172 emit_jeq(out+40); // Division by zero
2173#ifdef HAVE_ARMV5
2174 emit_clz(d2,HOST_TEMPREG);
2175 emit_movimm(1<<31,quotient);
2176 emit_shl(d2,HOST_TEMPREG,d2);
2177#else
2178 emit_movimm(0,HOST_TEMPREG);
2179 emit_addpl_imm(HOST_TEMPREG,1,HOST_TEMPREG);
2180 emit_lslpls_imm(d2,1,d2);
2181 emit_jns(out-2*4);
2182 emit_movimm(1<<31,quotient);
2183#endif
2184 emit_shr(quotient,HOST_TEMPREG,quotient);
2185 emit_cmp(remainder,d2);
2186 emit_subcs(remainder,d2,remainder);
2187 emit_adcs(quotient,quotient,quotient);
2188 emit_shrcc_imm(d2,1,d2);
2189 emit_jcc(out-16); // -4
2190 }
2191 }
2192 else // 64-bit
2193 assert(0);
2194 }
2195 else
2196 {
2197 // Multiply by zero is zero.
2198 // MIPS does not have a divide by zero exception.
2199 // The result is undefined, we return zero.
2200 signed char hr=get_reg(i_regs->regmap,HIREG);
2201 signed char lr=get_reg(i_regs->regmap,LOREG);
2202 if(hr>=0) emit_zeroreg(hr);
2203 if(lr>=0) emit_zeroreg(lr);
2204 }
2205}
2206#define multdiv_assemble multdiv_assemble_arm
2207
2208static void do_jump_vaddr(int rs)
2209{
2210 emit_far_jump(jump_vaddr_reg[rs]);
2211}
2212
2213static void do_preload_rhash(int r) {
2214 // Don't need this for ARM. On x86, this puts the value 0xf8 into the
2215 // register. On ARM the hash can be done with a single instruction (below)
2216}
2217
2218static void do_preload_rhtbl(int ht) {
2219 emit_addimm(FP,(int)&mini_ht-(int)&dynarec_local,ht);
2220}
2221
2222static void do_rhash(int rs,int rh) {
2223 emit_andimm(rs,0xf8,rh);
2224}
2225
2226static void do_miniht_load(int ht,int rh) {
2227 assem_debug("ldr %s,[%s,%s]!\n",regname[rh],regname[ht],regname[rh]);
2228 output_w32(0xe7b00000|rd_rn_rm(rh,ht,rh));
2229}
2230
2231static void do_miniht_jump(int rs,int rh,int ht) {
2232 emit_cmp(rh,rs);
2233 emit_ldreq_indexed(ht,4,15);
2234 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
2235 if(rs!=7)
2236 emit_mov(rs,7);
2237 rs=7;
2238 #endif
2239 do_jump_vaddr(rs);
2240}
2241
2242static void do_miniht_insert(u_int return_address,int rt,int temp) {
2243 #ifndef HAVE_ARMV7
2244 emit_movimm(return_address,rt); // PC into link register
2245 add_to_linker(out,return_address,1);
2246 emit_pcreladdr(temp);
2247 emit_writeword(rt,&mini_ht[(return_address&0xFF)>>3][0]);
2248 emit_writeword(temp,&mini_ht[(return_address&0xFF)>>3][1]);
2249 #else
2250 emit_movw(return_address&0x0000FFFF,rt);
2251 add_to_linker(out,return_address,1);
2252 emit_pcreladdr(temp);
2253 emit_writeword(temp,&mini_ht[(return_address&0xFF)>>3][1]);
2254 emit_movt(return_address&0xFFFF0000,rt);
2255 emit_writeword(rt,&mini_ht[(return_address&0xFF)>>3][0]);
2256 #endif
2257}
2258
2259// CPU-architecture-specific initialization
2260static void arch_init(void)
2261{
2262 uintptr_t diff = (u_char *)&ndrc->tramp.f - (u_char *)&ndrc->tramp.ops - 8;
2263 struct tramp_insns *ops = ndrc->tramp.ops;
2264 size_t i;
2265 assert(!(diff & 3));
2266 assert(diff < 0x1000);
2267 start_tcache_write(ops, (u_char *)ops + sizeof(ndrc->tramp.ops));
2268 for (i = 0; i < ARRAY_SIZE(ndrc->tramp.ops); i++)
2269 ops[i].ldrpc = 0xe5900000 | rd_rn_rm(15,15,0) | diff; // ldr pc, [=val]
2270 end_tcache_write(ops, (u_char *)ops + sizeof(ndrc->tramp.ops));
2271}
2272
2273// vim:shiftwidth=2:expandtab