drc: rework block tracking
[pcsx_rearmed.git] / libpcsxcore / new_dynarec / assem_arm.c
... / ...
CommitLineData
1/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2 * Mupen64plus/PCSX - assem_arm.c *
3 * Copyright (C) 2009-2011 Ari64 *
4 * Copyright (C) 2010-2021 GraÅžvydas "notaz" Ignotas *
5 * *
6 * This program is free software; you can redistribute it and/or modify *
7 * it under the terms of the GNU General Public License as published by *
8 * the Free Software Foundation; either version 2 of the License, or *
9 * (at your option) any later version. *
10 * *
11 * This program is distributed in the hope that it will be useful, *
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
14 * GNU General Public License for more details. *
15 * *
16 * You should have received a copy of the GNU General Public License *
17 * along with this program; if not, write to the *
18 * Free Software Foundation, Inc., *
19 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
20 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
21
22#define FLAGLESS
23#include "../gte.h"
24#undef FLAGLESS
25#include "../gte_arm.h"
26#include "../gte_neon.h"
27#include "pcnt.h"
28#include "arm_features.h"
29
30#define unused __attribute__((unused))
31
32#ifdef DRC_DBG
33#pragma GCC diagnostic ignored "-Wunused-function"
34#pragma GCC diagnostic ignored "-Wunused-variable"
35#pragma GCC diagnostic ignored "-Wunused-but-set-variable"
36#endif
37
38void indirect_jump_indexed();
39void indirect_jump();
40void do_interrupt();
41void jump_vaddr_r0();
42void jump_vaddr_r1();
43void jump_vaddr_r2();
44void jump_vaddr_r3();
45void jump_vaddr_r4();
46void jump_vaddr_r5();
47void jump_vaddr_r6();
48void jump_vaddr_r7();
49void jump_vaddr_r8();
50void jump_vaddr_r9();
51void jump_vaddr_r10();
52void jump_vaddr_r12();
53
54void * const jump_vaddr_reg[16] = {
55 jump_vaddr_r0,
56 jump_vaddr_r1,
57 jump_vaddr_r2,
58 jump_vaddr_r3,
59 jump_vaddr_r4,
60 jump_vaddr_r5,
61 jump_vaddr_r6,
62 jump_vaddr_r7,
63 jump_vaddr_r8,
64 jump_vaddr_r9,
65 jump_vaddr_r10,
66 0,
67 jump_vaddr_r12,
68 0,
69 0,
70 0
71};
72
73void invalidate_addr_r0();
74void invalidate_addr_r1();
75void invalidate_addr_r2();
76void invalidate_addr_r3();
77void invalidate_addr_r4();
78void invalidate_addr_r5();
79void invalidate_addr_r6();
80void invalidate_addr_r7();
81void invalidate_addr_r8();
82void invalidate_addr_r9();
83void invalidate_addr_r10();
84void invalidate_addr_r12();
85
86const u_int invalidate_addr_reg[16] = {
87 (int)invalidate_addr_r0,
88 (int)invalidate_addr_r1,
89 (int)invalidate_addr_r2,
90 (int)invalidate_addr_r3,
91 (int)invalidate_addr_r4,
92 (int)invalidate_addr_r5,
93 (int)invalidate_addr_r6,
94 (int)invalidate_addr_r7,
95 (int)invalidate_addr_r8,
96 (int)invalidate_addr_r9,
97 (int)invalidate_addr_r10,
98 0,
99 (int)invalidate_addr_r12,
100 0,
101 0,
102 0};
103
104/* Linker */
105
106static void set_jump_target(void *addr, void *target_)
107{
108 u_int target = (u_int)target_;
109 u_char *ptr = addr;
110 u_int *ptr2=(u_int *)ptr;
111 if(ptr[3]==0xe2) {
112 assert((target-(u_int)ptr2-8)<1024);
113 assert(((uintptr_t)addr&3)==0);
114 assert((target&3)==0);
115 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
116 //printf("target=%x addr=%p insn=%x\n",target,addr,*ptr2);
117 }
118 else if(ptr[3]==0x72) {
119 // generated by emit_jno_unlikely
120 if((target-(u_int)ptr2-8)<1024) {
121 assert(((uintptr_t)addr&3)==0);
122 assert((target&3)==0);
123 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
124 }
125 else if((target-(u_int)ptr2-8)<4096&&!((target-(u_int)ptr2-8)&15)) {
126 assert(((uintptr_t)addr&3)==0);
127 assert((target&3)==0);
128 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>4)|0xE00;
129 }
130 else *ptr2=(0x7A000000)|(((target-(u_int)ptr2-8)<<6)>>8);
131 }
132 else {
133 assert((ptr[3]&0x0e)==0xa);
134 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
135 }
136}
137
138// This optionally copies the instruction from the target of the branch into
139// the space before the branch. Works, but the difference in speed is
140// usually insignificant.
141#if 0
142static void set_jump_target_fillslot(int addr,u_int target,int copy)
143{
144 u_char *ptr=(u_char *)addr;
145 u_int *ptr2=(u_int *)ptr;
146 assert(!copy||ptr2[-1]==0xe28dd000);
147 if(ptr[3]==0xe2) {
148 assert(!copy);
149 assert((target-(u_int)ptr2-8)<4096);
150 *ptr2=(*ptr2&0xFFFFF000)|(target-(u_int)ptr2-8);
151 }
152 else {
153 assert((ptr[3]&0x0e)==0xa);
154 u_int target_insn=*(u_int *)target;
155 if((target_insn&0x0e100000)==0) { // ALU, no immediate, no flags
156 copy=0;
157 }
158 if((target_insn&0x0c100000)==0x04100000) { // Load
159 copy=0;
160 }
161 if(target_insn&0x08000000) {
162 copy=0;
163 }
164 if(copy) {
165 ptr2[-1]=target_insn;
166 target+=4;
167 }
168 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
169 }
170}
171#endif
172
173/* Literal pool */
174static void add_literal(int addr,int val)
175{
176 assert(literalcount<sizeof(literals)/sizeof(literals[0]));
177 literals[literalcount][0]=addr;
178 literals[literalcount][1]=val;
179 literalcount++;
180}
181
182// from a pointer to external jump stub (which was produced by emit_extjump2)
183// find where the jumping insn is
184static void *find_extjump_insn(void *stub)
185{
186 int *ptr=(int *)(stub+4);
187 assert((*ptr&0x0fff0000)==0x059f0000); // ldr rx, [pc, #ofs]
188 u_int offset=*ptr&0xfff;
189 void **l_ptr=(void *)ptr+offset+8;
190 return *l_ptr;
191}
192
193// find where external branch is liked to using addr of it's stub:
194// get address that insn one after stub loads (dyna_linker arg1),
195// treat it as a pointer to branch insn,
196// return addr where that branch jumps to
197#if 0
198static void *get_pointer(void *stub)
199{
200 //printf("get_pointer(%x)\n",(int)stub);
201 int *i_ptr=find_extjump_insn(stub);
202 assert((*i_ptr&0x0f000000)==0x0a000000); // b
203 return (u_char *)i_ptr+((*i_ptr<<8)>>6)+8;
204}
205#endif
206
207// Allocate a specific ARM register.
208static void alloc_arm_reg(struct regstat *cur,int i,signed char reg,int hr)
209{
210 int n;
211 int dirty=0;
212
213 // see if it's already allocated (and dealloc it)
214 for(n=0;n<HOST_REGS;n++)
215 {
216 if(n!=EXCLUDE_REG&&cur->regmap[n]==reg) {
217 dirty=(cur->dirty>>n)&1;
218 cur->regmap[n]=-1;
219 }
220 }
221
222 cur->regmap[hr]=reg;
223 cur->dirty&=~(1<<hr);
224 cur->dirty|=dirty<<hr;
225 cur->isconst&=~(1<<hr);
226}
227
228// Alloc cycle count into dedicated register
229static void alloc_cc(struct regstat *cur,int i)
230{
231 alloc_arm_reg(cur,i,CCREG,HOST_CCREG);
232}
233
234/* Assembler */
235
236static unused char regname[16][4] = {
237 "r0",
238 "r1",
239 "r2",
240 "r3",
241 "r4",
242 "r5",
243 "r6",
244 "r7",
245 "r8",
246 "r9",
247 "r10",
248 "fp",
249 "r12",
250 "sp",
251 "lr",
252 "pc"};
253
254static void output_w32(u_int word)
255{
256 *((u_int *)out)=word;
257 out+=4;
258}
259
260static u_int rd_rn_rm(u_int rd, u_int rn, u_int rm)
261{
262 assert(rd<16);
263 assert(rn<16);
264 assert(rm<16);
265 return((rn<<16)|(rd<<12)|rm);
266}
267
268static u_int rd_rn_imm_shift(u_int rd, u_int rn, u_int imm, u_int shift)
269{
270 assert(rd<16);
271 assert(rn<16);
272 assert(imm<256);
273 assert((shift&1)==0);
274 return((rn<<16)|(rd<<12)|(((32-shift)&30)<<7)|imm);
275}
276
277static u_int genimm(u_int imm,u_int *encoded)
278{
279 *encoded=0;
280 if(imm==0) return 1;
281 int i=32;
282 while(i>0)
283 {
284 if(imm<256) {
285 *encoded=((i&30)<<7)|imm;
286 return 1;
287 }
288 imm=(imm>>2)|(imm<<30);i-=2;
289 }
290 return 0;
291}
292
293static void genimm_checked(u_int imm,u_int *encoded)
294{
295 u_int ret=genimm(imm,encoded);
296 assert(ret);
297 (void)ret;
298}
299
300static u_int genjmp(u_int addr)
301{
302 if (addr < 3) return 0; // a branch that will be patched later
303 int offset = addr-(int)out-8;
304 if (offset < -33554432 || offset >= 33554432) {
305 SysPrintf("genjmp: out of range: %08x\n", offset);
306 abort();
307 return 0;
308 }
309 return ((u_int)offset>>2)&0xffffff;
310}
311
312static unused void emit_breakpoint(void)
313{
314 assem_debug("bkpt #0\n");
315 //output_w32(0xe1200070);
316 output_w32(0xe7f001f0);
317}
318
319static void emit_mov(int rs,int rt)
320{
321 assem_debug("mov %s,%s\n",regname[rt],regname[rs]);
322 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs));
323}
324
325static void emit_movs(int rs,int rt)
326{
327 assem_debug("movs %s,%s\n",regname[rt],regname[rs]);
328 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs));
329}
330
331static void emit_add(int rs1,int rs2,int rt)
332{
333 assem_debug("add %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
334 output_w32(0xe0800000|rd_rn_rm(rt,rs1,rs2));
335}
336
337static void emit_adds(int rs1,int rs2,int rt)
338{
339 assem_debug("adds %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
340 output_w32(0xe0900000|rd_rn_rm(rt,rs1,rs2));
341}
342#define emit_adds_ptr emit_adds
343
344static void emit_adcs(int rs1,int rs2,int rt)
345{
346 assem_debug("adcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
347 output_w32(0xe0b00000|rd_rn_rm(rt,rs1,rs2));
348}
349
350static void emit_neg(int rs, int rt)
351{
352 assem_debug("rsb %s,%s,#0\n",regname[rt],regname[rs]);
353 output_w32(0xe2600000|rd_rn_rm(rt,rs,0));
354}
355
356static void emit_sub(int rs1,int rs2,int rt)
357{
358 assem_debug("sub %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
359 output_w32(0xe0400000|rd_rn_rm(rt,rs1,rs2));
360}
361
362static void emit_zeroreg(int rt)
363{
364 assem_debug("mov %s,#0\n",regname[rt]);
365 output_w32(0xe3a00000|rd_rn_rm(rt,0,0));
366}
367
368static void emit_loadlp(u_int imm,u_int rt)
369{
370 add_literal((int)out,imm);
371 assem_debug("ldr %s,pc+? [=%x]\n",regname[rt],imm);
372 output_w32(0xe5900000|rd_rn_rm(rt,15,0));
373}
374
375#ifdef HAVE_ARMV7
376static void emit_movw(u_int imm,u_int rt)
377{
378 assert(imm<65536);
379 assem_debug("movw %s,#%d (0x%x)\n",regname[rt],imm,imm);
380 output_w32(0xe3000000|rd_rn_rm(rt,0,0)|(imm&0xfff)|((imm<<4)&0xf0000));
381}
382
383static void emit_movt(u_int imm,u_int rt)
384{
385 assem_debug("movt %s,#%d (0x%x)\n",regname[rt],imm&0xffff0000,imm&0xffff0000);
386 output_w32(0xe3400000|rd_rn_rm(rt,0,0)|((imm>>16)&0xfff)|((imm>>12)&0xf0000));
387}
388#endif
389
390static void emit_movimm(u_int imm,u_int rt)
391{
392 u_int armval;
393 if(genimm(imm,&armval)) {
394 assem_debug("mov %s,#%d\n",regname[rt],imm);
395 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
396 }else if(genimm(~imm,&armval)) {
397 assem_debug("mvn %s,#%d\n",regname[rt],imm);
398 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
399 }else if(imm<65536) {
400 #ifndef HAVE_ARMV7
401 assem_debug("mov %s,#%d\n",regname[rt],imm&0xFF00);
402 output_w32(0xe3a00000|rd_rn_imm_shift(rt,0,imm>>8,8));
403 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
404 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
405 #else
406 emit_movw(imm,rt);
407 #endif
408 }else{
409 #ifndef HAVE_ARMV7
410 emit_loadlp(imm,rt);
411 #else
412 emit_movw(imm&0x0000FFFF,rt);
413 emit_movt(imm&0xFFFF0000,rt);
414 #endif
415 }
416}
417
418static void emit_pcreladdr(u_int rt)
419{
420 assem_debug("add %s,pc,#?\n",regname[rt]);
421 output_w32(0xe2800000|rd_rn_rm(rt,15,0));
422}
423
424static void emit_loadreg(int r, int hr)
425{
426 assert(hr != EXCLUDE_REG);
427 if (r == 0)
428 emit_zeroreg(hr);
429 else {
430 void *addr;
431 switch (r) {
432 //case HIREG: addr = &hi; break;
433 //case LOREG: addr = &lo; break;
434 case CCREG: addr = &cycle_count; break;
435 case CSREG: addr = &Status; break;
436 case INVCP: addr = &invc_ptr; break;
437 case ROREG: addr = &ram_offset; break;
438 default:
439 assert(r < 34);
440 addr = &psxRegs.GPR.r[r];
441 break;
442 }
443 u_int offset = (u_char *)addr - (u_char *)&dynarec_local;
444 assert(offset<4096);
445 assem_debug("ldr %s,fp+%d # r%d\n",regname[hr],offset,r);
446 output_w32(0xe5900000|rd_rn_rm(hr,FP,0)|offset);
447 }
448}
449
450static void emit_storereg(int r, int hr)
451{
452 assert(hr != EXCLUDE_REG);
453 int addr = (int)&psxRegs.GPR.r[r];
454 switch (r) {
455 //case HIREG: addr = &hi; break;
456 //case LOREG: addr = &lo; break;
457 case CCREG: addr = (int)&cycle_count; break;
458 default: assert(r < 34); break;
459 }
460 u_int offset = addr-(u_int)&dynarec_local;
461 assert(offset<4096);
462 assem_debug("str %s,fp+%d # r%d\n",regname[hr],offset,r);
463 output_w32(0xe5800000|rd_rn_rm(hr,FP,0)|offset);
464}
465
466static void emit_test(int rs, int rt)
467{
468 assem_debug("tst %s,%s\n",regname[rs],regname[rt]);
469 output_w32(0xe1100000|rd_rn_rm(0,rs,rt));
470}
471
472static void emit_testimm(int rs,int imm)
473{
474 u_int armval;
475 assem_debug("tst %s,#%d\n",regname[rs],imm);
476 genimm_checked(imm,&armval);
477 output_w32(0xe3100000|rd_rn_rm(0,rs,0)|armval);
478}
479
480static void emit_testeqimm(int rs,int imm)
481{
482 u_int armval;
483 assem_debug("tsteq %s,$%d\n",regname[rs],imm);
484 genimm_checked(imm,&armval);
485 output_w32(0x03100000|rd_rn_rm(0,rs,0)|armval);
486}
487
488static void emit_not(int rs,int rt)
489{
490 assem_debug("mvn %s,%s\n",regname[rt],regname[rs]);
491 output_w32(0xe1e00000|rd_rn_rm(rt,0,rs));
492}
493
494static void emit_and(u_int rs1,u_int rs2,u_int rt)
495{
496 assem_debug("and %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
497 output_w32(0xe0000000|rd_rn_rm(rt,rs1,rs2));
498}
499
500static void emit_or(u_int rs1,u_int rs2,u_int rt)
501{
502 assem_debug("orr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
503 output_w32(0xe1800000|rd_rn_rm(rt,rs1,rs2));
504}
505
506static void emit_orrshl_imm(u_int rs,u_int imm,u_int rt)
507{
508 assert(rs<16);
509 assert(rt<16);
510 assert(imm<32);
511 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs],imm);
512 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|(imm<<7));
513}
514
515static void emit_orrshr_imm(u_int rs,u_int imm,u_int rt)
516{
517 assert(rs<16);
518 assert(rt<16);
519 assert(imm<32);
520 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs],imm);
521 output_w32(0xe1800020|rd_rn_rm(rt,rt,rs)|(imm<<7));
522}
523
524static void emit_xor(u_int rs1,u_int rs2,u_int rt)
525{
526 assem_debug("eor %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
527 output_w32(0xe0200000|rd_rn_rm(rt,rs1,rs2));
528}
529
530static void emit_xorsar_imm(u_int rs1,u_int rs2,u_int imm,u_int rt)
531{
532 assem_debug("eor %s,%s,%s,asr #%d\n",regname[rt],regname[rs1],regname[rs2],imm);
533 output_w32(0xe0200040|rd_rn_rm(rt,rs1,rs2)|(imm<<7));
534}
535
536static void emit_addimm(u_int rs,int imm,u_int rt)
537{
538 assert(rs<16);
539 assert(rt<16);
540 if(imm!=0) {
541 u_int armval;
542 if(genimm(imm,&armval)) {
543 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],imm);
544 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
545 }else if(genimm(-imm,&armval)) {
546 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],-imm);
547 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
548 #ifdef HAVE_ARMV7
549 }else if(rt!=rs&&(u_int)imm<65536) {
550 emit_movw(imm&0x0000ffff,rt);
551 emit_add(rs,rt,rt);
552 }else if(rt!=rs&&(u_int)-imm<65536) {
553 emit_movw(-imm&0x0000ffff,rt);
554 emit_sub(rs,rt,rt);
555 #endif
556 }else if((u_int)-imm<65536) {
557 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],(-imm)&0xFF00);
558 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
559 output_w32(0xe2400000|rd_rn_imm_shift(rt,rs,(-imm)>>8,8));
560 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
561 }else {
562 do {
563 int shift = (ffs(imm) - 1) & ~1;
564 int imm8 = imm & (0xff << shift);
565 genimm_checked(imm8,&armval);
566 assem_debug("add %s,%s,#0x%x\n",regname[rt],regname[rs],imm8);
567 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
568 rs = rt;
569 imm &= ~imm8;
570 }
571 while (imm != 0);
572 }
573 }
574 else if(rs!=rt) emit_mov(rs,rt);
575}
576
577static void emit_addimm_and_set_flags(int imm,int rt)
578{
579 assert(imm>-65536&&imm<65536);
580 u_int armval;
581 if(genimm(imm,&armval)) {
582 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm);
583 output_w32(0xe2900000|rd_rn_rm(rt,rt,0)|armval);
584 }else if(genimm(-imm,&armval)) {
585 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],imm);
586 output_w32(0xe2500000|rd_rn_rm(rt,rt,0)|armval);
587 }else if(imm<0) {
588 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF00);
589 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
590 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)>>8,8));
591 output_w32(0xe2500000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
592 }else{
593 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF00);
594 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
595 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm>>8,8));
596 output_w32(0xe2900000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
597 }
598}
599
600static void emit_addnop(u_int r)
601{
602 assert(r<16);
603 assem_debug("add %s,%s,#0 (nop)\n",regname[r],regname[r]);
604 output_w32(0xe2800000|rd_rn_rm(r,r,0));
605}
606
607static void emit_andimm(int rs,int imm,int rt)
608{
609 u_int armval;
610 if(imm==0) {
611 emit_zeroreg(rt);
612 }else if(genimm(imm,&armval)) {
613 assem_debug("and %s,%s,#%d\n",regname[rt],regname[rs],imm);
614 output_w32(0xe2000000|rd_rn_rm(rt,rs,0)|armval);
615 }else if(genimm(~imm,&armval)) {
616 assem_debug("bic %s,%s,#%d\n",regname[rt],regname[rs],imm);
617 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|armval);
618 }else if(imm==65535) {
619 #ifndef HAVE_ARMV6
620 assem_debug("bic %s,%s,#FF000000\n",regname[rt],regname[rs]);
621 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|0x4FF);
622 assem_debug("bic %s,%s,#00FF0000\n",regname[rt],regname[rt]);
623 output_w32(0xe3c00000|rd_rn_rm(rt,rt,0)|0x8FF);
624 #else
625 assem_debug("uxth %s,%s\n",regname[rt],regname[rs]);
626 output_w32(0xe6ff0070|rd_rn_rm(rt,0,rs));
627 #endif
628 }else{
629 assert(imm>0&&imm<65535);
630 #ifndef HAVE_ARMV7
631 assem_debug("mov r14,#%d\n",imm&0xFF00);
632 output_w32(0xe3a00000|rd_rn_imm_shift(HOST_TEMPREG,0,imm>>8,8));
633 assem_debug("add r14,r14,#%d\n",imm&0xFF);
634 output_w32(0xe2800000|rd_rn_imm_shift(HOST_TEMPREG,HOST_TEMPREG,imm&0xff,0));
635 #else
636 emit_movw(imm,HOST_TEMPREG);
637 #endif
638 assem_debug("and %s,%s,r14\n",regname[rt],regname[rs]);
639 output_w32(0xe0000000|rd_rn_rm(rt,rs,HOST_TEMPREG));
640 }
641}
642
643static void emit_orimm(int rs,int imm,int rt)
644{
645 u_int armval;
646 if(imm==0) {
647 if(rs!=rt) emit_mov(rs,rt);
648 }else if(genimm(imm,&armval)) {
649 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm);
650 output_w32(0xe3800000|rd_rn_rm(rt,rs,0)|armval);
651 }else{
652 assert(imm>0&&imm<65536);
653 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
654 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
655 output_w32(0xe3800000|rd_rn_imm_shift(rt,rs,imm>>8,8));
656 output_w32(0xe3800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
657 }
658}
659
660static void emit_xorimm(int rs,int imm,int rt)
661{
662 u_int armval;
663 if(imm==0) {
664 if(rs!=rt) emit_mov(rs,rt);
665 }else if(genimm(imm,&armval)) {
666 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm);
667 output_w32(0xe2200000|rd_rn_rm(rt,rs,0)|armval);
668 }else{
669 assert(imm>0&&imm<65536);
670 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
671 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
672 output_w32(0xe2200000|rd_rn_imm_shift(rt,rs,imm>>8,8));
673 output_w32(0xe2200000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
674 }
675}
676
677static void emit_shlimm(int rs,u_int imm,int rt)
678{
679 assert(imm>0);
680 assert(imm<32);
681 //if(imm==1) ...
682 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
683 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
684}
685
686static void emit_lsls_imm(int rs,int imm,int rt)
687{
688 assert(imm>0);
689 assert(imm<32);
690 assem_debug("lsls %s,%s,#%d\n",regname[rt],regname[rs],imm);
691 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs)|(imm<<7));
692}
693
694static unused void emit_lslpls_imm(int rs,int imm,int rt)
695{
696 assert(imm>0);
697 assert(imm<32);
698 assem_debug("lslpls %s,%s,#%d\n",regname[rt],regname[rs],imm);
699 output_w32(0x51b00000|rd_rn_rm(rt,0,rs)|(imm<<7));
700}
701
702static void emit_shrimm(int rs,u_int imm,int rt)
703{
704 assert(imm>0);
705 assert(imm<32);
706 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
707 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
708}
709
710static void emit_sarimm(int rs,u_int imm,int rt)
711{
712 assert(imm>0);
713 assert(imm<32);
714 assem_debug("asr %s,%s,#%d\n",regname[rt],regname[rs],imm);
715 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x40|(imm<<7));
716}
717
718static void emit_rorimm(int rs,u_int imm,int rt)
719{
720 assert(imm>0);
721 assert(imm<32);
722 assem_debug("ror %s,%s,#%d\n",regname[rt],regname[rs],imm);
723 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x60|(imm<<7));
724}
725
726static void emit_signextend16(int rs,int rt)
727{
728 #ifndef HAVE_ARMV6
729 emit_shlimm(rs,16,rt);
730 emit_sarimm(rt,16,rt);
731 #else
732 assem_debug("sxth %s,%s\n",regname[rt],regname[rs]);
733 output_w32(0xe6bf0070|rd_rn_rm(rt,0,rs));
734 #endif
735}
736
737static void emit_signextend8(int rs,int rt)
738{
739 #ifndef HAVE_ARMV6
740 emit_shlimm(rs,24,rt);
741 emit_sarimm(rt,24,rt);
742 #else
743 assem_debug("sxtb %s,%s\n",regname[rt],regname[rs]);
744 output_w32(0xe6af0070|rd_rn_rm(rt,0,rs));
745 #endif
746}
747
748static void emit_shl(u_int rs,u_int shift,u_int rt)
749{
750 assert(rs<16);
751 assert(rt<16);
752 assert(shift<16);
753 //if(imm==1) ...
754 assem_debug("lsl %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
755 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x10|(shift<<8));
756}
757
758static void emit_shr(u_int rs,u_int shift,u_int rt)
759{
760 assert(rs<16);
761 assert(rt<16);
762 assert(shift<16);
763 assem_debug("lsr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
764 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x30|(shift<<8));
765}
766
767static void emit_sar(u_int rs,u_int shift,u_int rt)
768{
769 assert(rs<16);
770 assert(rt<16);
771 assert(shift<16);
772 assem_debug("asr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
773 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x50|(shift<<8));
774}
775
776static unused void emit_orrshl(u_int rs,u_int shift,u_int rt)
777{
778 assert(rs<16);
779 assert(rt<16);
780 assert(shift<16);
781 assem_debug("orr %s,%s,%s,lsl %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
782 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x10|(shift<<8));
783}
784
785static unused void emit_orrshr(u_int rs,u_int shift,u_int rt)
786{
787 assert(rs<16);
788 assert(rt<16);
789 assert(shift<16);
790 assem_debug("orr %s,%s,%s,lsr %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
791 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x30|(shift<<8));
792}
793
794static void emit_cmpimm(int rs,int imm)
795{
796 u_int armval;
797 if(genimm(imm,&armval)) {
798 assem_debug("cmp %s,#%d\n",regname[rs],imm);
799 output_w32(0xe3500000|rd_rn_rm(0,rs,0)|armval);
800 }else if(genimm(-imm,&armval)) {
801 assem_debug("cmn %s,#%d\n",regname[rs],imm);
802 output_w32(0xe3700000|rd_rn_rm(0,rs,0)|armval);
803 }else if(imm>0) {
804 assert(imm<65536);
805 emit_movimm(imm,HOST_TEMPREG);
806 assem_debug("cmp %s,r14\n",regname[rs]);
807 output_w32(0xe1500000|rd_rn_rm(0,rs,HOST_TEMPREG));
808 }else{
809 assert(imm>-65536);
810 emit_movimm(-imm,HOST_TEMPREG);
811 assem_debug("cmn %s,r14\n",regname[rs]);
812 output_w32(0xe1700000|rd_rn_rm(0,rs,HOST_TEMPREG));
813 }
814}
815
816static void emit_cmovne_imm(int imm,int rt)
817{
818 assem_debug("movne %s,#%d\n",regname[rt],imm);
819 u_int armval;
820 genimm_checked(imm,&armval);
821 output_w32(0x13a00000|rd_rn_rm(rt,0,0)|armval);
822}
823
824static void emit_cmovl_imm(int imm,int rt)
825{
826 assem_debug("movlt %s,#%d\n",regname[rt],imm);
827 u_int armval;
828 genimm_checked(imm,&armval);
829 output_w32(0xb3a00000|rd_rn_rm(rt,0,0)|armval);
830}
831
832static void emit_cmovb_imm(int imm,int rt)
833{
834 assem_debug("movcc %s,#%d\n",regname[rt],imm);
835 u_int armval;
836 genimm_checked(imm,&armval);
837 output_w32(0x33a00000|rd_rn_rm(rt,0,0)|armval);
838}
839
840static void emit_cmovae_imm(int imm,int rt)
841{
842 assem_debug("movcs %s,#%d\n",regname[rt],imm);
843 u_int armval;
844 genimm_checked(imm,&armval);
845 output_w32(0x23a00000|rd_rn_rm(rt,0,0)|armval);
846}
847
848static void emit_cmovs_imm(int imm,int rt)
849{
850 assem_debug("movmi %s,#%d\n",regname[rt],imm);
851 u_int armval;
852 genimm_checked(imm,&armval);
853 output_w32(0x43a00000|rd_rn_rm(rt,0,0)|armval);
854}
855
856static void emit_cmovne_reg(int rs,int rt)
857{
858 assem_debug("movne %s,%s\n",regname[rt],regname[rs]);
859 output_w32(0x11a00000|rd_rn_rm(rt,0,rs));
860}
861
862static void emit_cmovl_reg(int rs,int rt)
863{
864 assem_debug("movlt %s,%s\n",regname[rt],regname[rs]);
865 output_w32(0xb1a00000|rd_rn_rm(rt,0,rs));
866}
867
868static void emit_cmovb_reg(int rs,int rt)
869{
870 assem_debug("movcc %s,%s\n",regname[rt],regname[rs]);
871 output_w32(0x31a00000|rd_rn_rm(rt,0,rs));
872}
873
874static void emit_cmovs_reg(int rs,int rt)
875{
876 assem_debug("movmi %s,%s\n",regname[rt],regname[rs]);
877 output_w32(0x41a00000|rd_rn_rm(rt,0,rs));
878}
879
880static void emit_slti32(int rs,int imm,int rt)
881{
882 if(rs!=rt) emit_zeroreg(rt);
883 emit_cmpimm(rs,imm);
884 if(rs==rt) emit_movimm(0,rt);
885 emit_cmovl_imm(1,rt);
886}
887
888static void emit_sltiu32(int rs,int imm,int rt)
889{
890 if(rs!=rt) emit_zeroreg(rt);
891 emit_cmpimm(rs,imm);
892 if(rs==rt) emit_movimm(0,rt);
893 emit_cmovb_imm(1,rt);
894}
895
896static void emit_cmp(int rs,int rt)
897{
898 assem_debug("cmp %s,%s\n",regname[rs],regname[rt]);
899 output_w32(0xe1500000|rd_rn_rm(0,rs,rt));
900}
901
902static void emit_set_gz32(int rs, int rt)
903{
904 //assem_debug("set_gz32\n");
905 emit_cmpimm(rs,1);
906 emit_movimm(1,rt);
907 emit_cmovl_imm(0,rt);
908}
909
910static void emit_set_nz32(int rs, int rt)
911{
912 //assem_debug("set_nz32\n");
913 if(rs!=rt) emit_movs(rs,rt);
914 else emit_test(rs,rs);
915 emit_cmovne_imm(1,rt);
916}
917
918static void emit_set_if_less32(int rs1, int rs2, int rt)
919{
920 //assem_debug("set if less (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
921 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
922 emit_cmp(rs1,rs2);
923 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
924 emit_cmovl_imm(1,rt);
925}
926
927static void emit_set_if_carry32(int rs1, int rs2, int rt)
928{
929 //assem_debug("set if carry (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
930 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
931 emit_cmp(rs1,rs2);
932 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
933 emit_cmovb_imm(1,rt);
934}
935
936static int can_jump_or_call(const void *a)
937{
938 intptr_t offset = (u_char *)a - out - 8;
939 return (-33554432 <= offset && offset < 33554432);
940}
941
942static void emit_call(const void *a_)
943{
944 int a = (int)a_;
945 assem_debug("bl %x (%x+%x)%s\n",a,(int)out,a-(int)out-8,func_name(a_));
946 u_int offset=genjmp(a);
947 output_w32(0xeb000000|offset);
948}
949
950static void emit_jmp(const void *a_)
951{
952 int a = (int)a_;
953 assem_debug("b %x (%x+%x)%s\n",a,(int)out,a-(int)out-8,func_name(a_));
954 u_int offset=genjmp(a);
955 output_w32(0xea000000|offset);
956}
957
958static void emit_jne(const void *a_)
959{
960 int a = (int)a_;
961 assem_debug("bne %x\n",a);
962 u_int offset=genjmp(a);
963 output_w32(0x1a000000|offset);
964}
965
966static void emit_jeq(const void *a_)
967{
968 int a = (int)a_;
969 assem_debug("beq %x\n",a);
970 u_int offset=genjmp(a);
971 output_w32(0x0a000000|offset);
972}
973
974static void emit_js(const void *a_)
975{
976 int a = (int)a_;
977 assem_debug("bmi %x\n",a);
978 u_int offset=genjmp(a);
979 output_w32(0x4a000000|offset);
980}
981
982static void emit_jns(const void *a_)
983{
984 int a = (int)a_;
985 assem_debug("bpl %x\n",a);
986 u_int offset=genjmp(a);
987 output_w32(0x5a000000|offset);
988}
989
990static void emit_jl(const void *a_)
991{
992 int a = (int)a_;
993 assem_debug("blt %x\n",a);
994 u_int offset=genjmp(a);
995 output_w32(0xba000000|offset);
996}
997
998static void emit_jge(const void *a_)
999{
1000 int a = (int)a_;
1001 assem_debug("bge %x\n",a);
1002 u_int offset=genjmp(a);
1003 output_w32(0xaa000000|offset);
1004}
1005
1006static void emit_jno(const void *a_)
1007{
1008 int a = (int)a_;
1009 assem_debug("bvc %x\n",a);
1010 u_int offset=genjmp(a);
1011 output_w32(0x7a000000|offset);
1012}
1013
1014static void emit_jc(const void *a_)
1015{
1016 int a = (int)a_;
1017 assem_debug("bcs %x\n",a);
1018 u_int offset=genjmp(a);
1019 output_w32(0x2a000000|offset);
1020}
1021
1022static void emit_jcc(const void *a_)
1023{
1024 int a = (int)a_;
1025 assem_debug("bcc %x\n",a);
1026 u_int offset=genjmp(a);
1027 output_w32(0x3a000000|offset);
1028}
1029
1030static unused void emit_callreg(u_int r)
1031{
1032 assert(r<15);
1033 assem_debug("blx %s\n",regname[r]);
1034 output_w32(0xe12fff30|r);
1035}
1036
1037static void emit_jmpreg(u_int r)
1038{
1039 assem_debug("mov pc,%s\n",regname[r]);
1040 output_w32(0xe1a00000|rd_rn_rm(15,0,r));
1041}
1042
1043static void emit_ret(void)
1044{
1045 emit_jmpreg(14);
1046}
1047
1048static void emit_readword_indexed(int offset, int rs, int rt)
1049{
1050 assert(offset>-4096&&offset<4096);
1051 assem_debug("ldr %s,%s+%d\n",regname[rt],regname[rs],offset);
1052 if(offset>=0) {
1053 output_w32(0xe5900000|rd_rn_rm(rt,rs,0)|offset);
1054 }else{
1055 output_w32(0xe5100000|rd_rn_rm(rt,rs,0)|(-offset));
1056 }
1057}
1058
1059static void emit_readword_dualindexedx4(int rs1, int rs2, int rt)
1060{
1061 assem_debug("ldr %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1062 output_w32(0xe7900000|rd_rn_rm(rt,rs1,rs2)|0x100);
1063}
1064#define emit_readptr_dualindexedx_ptrlen emit_readword_dualindexedx4
1065
1066static void emit_ldr_dualindexed(int rs1, int rs2, int rt)
1067{
1068 assem_debug("ldr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1069 output_w32(0xe7900000|rd_rn_rm(rt,rs1,rs2));
1070}
1071
1072static void emit_ldrcc_dualindexed(int rs1, int rs2, int rt)
1073{
1074 assem_debug("ldrcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1075 output_w32(0x37900000|rd_rn_rm(rt,rs1,rs2));
1076}
1077
1078static void emit_ldrb_dualindexed(int rs1, int rs2, int rt)
1079{
1080 assem_debug("ldrb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1081 output_w32(0xe7d00000|rd_rn_rm(rt,rs1,rs2));
1082}
1083
1084static void emit_ldrccb_dualindexed(int rs1, int rs2, int rt)
1085{
1086 assem_debug("ldrccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1087 output_w32(0x37d00000|rd_rn_rm(rt,rs1,rs2));
1088}
1089
1090static void emit_ldrsb_dualindexed(int rs1, int rs2, int rt)
1091{
1092 assem_debug("ldrsb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1093 output_w32(0xe19000d0|rd_rn_rm(rt,rs1,rs2));
1094}
1095
1096static void emit_ldrccsb_dualindexed(int rs1, int rs2, int rt)
1097{
1098 assem_debug("ldrccsb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1099 output_w32(0x319000d0|rd_rn_rm(rt,rs1,rs2));
1100}
1101
1102static void emit_ldrh_dualindexed(int rs1, int rs2, int rt)
1103{
1104 assem_debug("ldrh %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1105 output_w32(0xe19000b0|rd_rn_rm(rt,rs1,rs2));
1106}
1107
1108static void emit_ldrcch_dualindexed(int rs1, int rs2, int rt)
1109{
1110 assem_debug("ldrcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1111 output_w32(0x319000b0|rd_rn_rm(rt,rs1,rs2));
1112}
1113
1114static void emit_ldrsh_dualindexed(int rs1, int rs2, int rt)
1115{
1116 assem_debug("ldrsh %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1117 output_w32(0xe19000f0|rd_rn_rm(rt,rs1,rs2));
1118}
1119
1120static void emit_ldrccsh_dualindexed(int rs1, int rs2, int rt)
1121{
1122 assem_debug("ldrccsh %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1123 output_w32(0x319000f0|rd_rn_rm(rt,rs1,rs2));
1124}
1125
1126static void emit_str_dualindexed(int rs1, int rs2, int rt)
1127{
1128 assem_debug("str %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1129 output_w32(0xe7800000|rd_rn_rm(rt,rs1,rs2));
1130}
1131
1132static void emit_strb_dualindexed(int rs1, int rs2, int rt)
1133{
1134 assem_debug("strb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1135 output_w32(0xe7c00000|rd_rn_rm(rt,rs1,rs2));
1136}
1137
1138static void emit_strh_dualindexed(int rs1, int rs2, int rt)
1139{
1140 assem_debug("strh %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1141 output_w32(0xe18000b0|rd_rn_rm(rt,rs1,rs2));
1142}
1143
1144static void emit_movsbl_indexed(int offset, int rs, int rt)
1145{
1146 assert(offset>-256&&offset<256);
1147 assem_debug("ldrsb %s,%s+%d\n",regname[rt],regname[rs],offset);
1148 if(offset>=0) {
1149 output_w32(0xe1d000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1150 }else{
1151 output_w32(0xe15000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1152 }
1153}
1154
1155static void emit_movswl_indexed(int offset, int rs, int rt)
1156{
1157 assert(offset>-256&&offset<256);
1158 assem_debug("ldrsh %s,%s+%d\n",regname[rt],regname[rs],offset);
1159 if(offset>=0) {
1160 output_w32(0xe1d000f0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1161 }else{
1162 output_w32(0xe15000f0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1163 }
1164}
1165
1166static void emit_movzbl_indexed(int offset, int rs, int rt)
1167{
1168 assert(offset>-4096&&offset<4096);
1169 assem_debug("ldrb %s,%s+%d\n",regname[rt],regname[rs],offset);
1170 if(offset>=0) {
1171 output_w32(0xe5d00000|rd_rn_rm(rt,rs,0)|offset);
1172 }else{
1173 output_w32(0xe5500000|rd_rn_rm(rt,rs,0)|(-offset));
1174 }
1175}
1176
1177static void emit_movzwl_indexed(int offset, int rs, int rt)
1178{
1179 assert(offset>-256&&offset<256);
1180 assem_debug("ldrh %s,%s+%d\n",regname[rt],regname[rs],offset);
1181 if(offset>=0) {
1182 output_w32(0xe1d000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1183 }else{
1184 output_w32(0xe15000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1185 }
1186}
1187
1188static void emit_ldrd(int offset, int rs, int rt)
1189{
1190 assert(offset>-256&&offset<256);
1191 assem_debug("ldrd %s,%s+%d\n",regname[rt],regname[rs],offset);
1192 if(offset>=0) {
1193 output_w32(0xe1c000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1194 }else{
1195 output_w32(0xe14000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1196 }
1197}
1198
1199static void emit_readword(void *addr, int rt)
1200{
1201 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
1202 assert(offset<4096);
1203 assem_debug("ldr %s,fp+%d\n",regname[rt],offset);
1204 output_w32(0xe5900000|rd_rn_rm(rt,FP,0)|offset);
1205}
1206#define emit_readptr emit_readword
1207
1208static void emit_writeword_indexed(int rt, int offset, int rs)
1209{
1210 assert(offset>-4096&&offset<4096);
1211 assem_debug("str %s,%s+%d\n",regname[rt],regname[rs],offset);
1212 if(offset>=0) {
1213 output_w32(0xe5800000|rd_rn_rm(rt,rs,0)|offset);
1214 }else{
1215 output_w32(0xe5000000|rd_rn_rm(rt,rs,0)|(-offset));
1216 }
1217}
1218
1219static void emit_writehword_indexed(int rt, int offset, int rs)
1220{
1221 assert(offset>-256&&offset<256);
1222 assem_debug("strh %s,%s+%d\n",regname[rt],regname[rs],offset);
1223 if(offset>=0) {
1224 output_w32(0xe1c000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1225 }else{
1226 output_w32(0xe14000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1227 }
1228}
1229
1230static void emit_writebyte_indexed(int rt, int offset, int rs)
1231{
1232 assert(offset>-4096&&offset<4096);
1233 assem_debug("strb %s,%s+%d\n",regname[rt],regname[rs],offset);
1234 if(offset>=0) {
1235 output_w32(0xe5c00000|rd_rn_rm(rt,rs,0)|offset);
1236 }else{
1237 output_w32(0xe5400000|rd_rn_rm(rt,rs,0)|(-offset));
1238 }
1239}
1240
1241static void emit_strcc_dualindexed(int rs1, int rs2, int rt)
1242{
1243 assem_debug("strcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1244 output_w32(0x37800000|rd_rn_rm(rt,rs1,rs2));
1245}
1246
1247static void emit_strccb_dualindexed(int rs1, int rs2, int rt)
1248{
1249 assem_debug("strccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1250 output_w32(0x37c00000|rd_rn_rm(rt,rs1,rs2));
1251}
1252
1253static void emit_strcch_dualindexed(int rs1, int rs2, int rt)
1254{
1255 assem_debug("strcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1256 output_w32(0x318000b0|rd_rn_rm(rt,rs1,rs2));
1257}
1258
1259static void emit_writeword(int rt, void *addr)
1260{
1261 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
1262 assert(offset<4096);
1263 assem_debug("str %s,fp+%d\n",regname[rt],offset);
1264 output_w32(0xe5800000|rd_rn_rm(rt,FP,0)|offset);
1265}
1266
1267static void emit_umull(u_int rs1, u_int rs2, u_int hi, u_int lo)
1268{
1269 assem_debug("umull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
1270 assert(rs1<16);
1271 assert(rs2<16);
1272 assert(hi<16);
1273 assert(lo<16);
1274 output_w32(0xe0800090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
1275}
1276
1277static void emit_smull(u_int rs1, u_int rs2, u_int hi, u_int lo)
1278{
1279 assem_debug("smull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
1280 assert(rs1<16);
1281 assert(rs2<16);
1282 assert(hi<16);
1283 assert(lo<16);
1284 output_w32(0xe0c00090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
1285}
1286
1287static void emit_clz(int rs,int rt)
1288{
1289 assem_debug("clz %s,%s\n",regname[rt],regname[rs]);
1290 output_w32(0xe16f0f10|rd_rn_rm(rt,0,rs));
1291}
1292
1293static void emit_subcs(int rs1,int rs2,int rt)
1294{
1295 assem_debug("subcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1296 output_w32(0x20400000|rd_rn_rm(rt,rs1,rs2));
1297}
1298
1299static void emit_shrcc_imm(int rs,u_int imm,int rt)
1300{
1301 assert(imm>0);
1302 assert(imm<32);
1303 assem_debug("lsrcc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1304 output_w32(0x31a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
1305}
1306
1307static void emit_shrne_imm(int rs,u_int imm,int rt)
1308{
1309 assert(imm>0);
1310 assert(imm<32);
1311 assem_debug("lsrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
1312 output_w32(0x11a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
1313}
1314
1315static void emit_negmi(int rs, int rt)
1316{
1317 assem_debug("rsbmi %s,%s,#0\n",regname[rt],regname[rs]);
1318 output_w32(0x42600000|rd_rn_rm(rt,rs,0));
1319}
1320
1321static void emit_negsmi(int rs, int rt)
1322{
1323 assem_debug("rsbsmi %s,%s,#0\n",regname[rt],regname[rs]);
1324 output_w32(0x42700000|rd_rn_rm(rt,rs,0));
1325}
1326
1327static void emit_bic_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
1328{
1329 assem_debug("bic %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
1330 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
1331}
1332
1333static void emit_bic_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
1334{
1335 assem_debug("bic %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
1336 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
1337}
1338
1339static void emit_teq(int rs, int rt)
1340{
1341 assem_debug("teq %s,%s\n",regname[rs],regname[rt]);
1342 output_w32(0xe1300000|rd_rn_rm(0,rs,rt));
1343}
1344
1345static unused void emit_rsbimm(int rs, int imm, int rt)
1346{
1347 u_int armval;
1348 genimm_checked(imm,&armval);
1349 assem_debug("rsb %s,%s,#%d\n",regname[rt],regname[rs],imm);
1350 output_w32(0xe2600000|rd_rn_rm(rt,rs,0)|armval);
1351}
1352
1353// Conditionally select one of two immediates, optimizing for small code size
1354// This will only be called if HAVE_CMOV_IMM is defined
1355static void emit_cmov2imm_e_ne_compact(int imm1,int imm2,u_int rt)
1356{
1357 u_int armval;
1358 if(genimm(imm2-imm1,&armval)) {
1359 emit_movimm(imm1,rt);
1360 assem_debug("addne %s,%s,#%d\n",regname[rt],regname[rt],imm2-imm1);
1361 output_w32(0x12800000|rd_rn_rm(rt,rt,0)|armval);
1362 }else if(genimm(imm1-imm2,&armval)) {
1363 emit_movimm(imm1,rt);
1364 assem_debug("subne %s,%s,#%d\n",regname[rt],regname[rt],imm1-imm2);
1365 output_w32(0x12400000|rd_rn_rm(rt,rt,0)|armval);
1366 }
1367 else {
1368 #ifndef HAVE_ARMV7
1369 emit_movimm(imm1,rt);
1370 add_literal((int)out,imm2);
1371 assem_debug("ldrne %s,pc+? [=%x]\n",regname[rt],imm2);
1372 output_w32(0x15900000|rd_rn_rm(rt,15,0));
1373 #else
1374 emit_movw(imm1&0x0000FFFF,rt);
1375 if((imm1&0xFFFF)!=(imm2&0xFFFF)) {
1376 assem_debug("movwne %s,#%d (0x%x)\n",regname[rt],imm2&0xFFFF,imm2&0xFFFF);
1377 output_w32(0x13000000|rd_rn_rm(rt,0,0)|(imm2&0xfff)|((imm2<<4)&0xf0000));
1378 }
1379 emit_movt(imm1&0xFFFF0000,rt);
1380 if((imm1&0xFFFF0000)!=(imm2&0xFFFF0000)) {
1381 assem_debug("movtne %s,#%d (0x%x)\n",regname[rt],imm2&0xffff0000,imm2&0xffff0000);
1382 output_w32(0x13400000|rd_rn_rm(rt,0,0)|((imm2>>16)&0xfff)|((imm2>>12)&0xf0000));
1383 }
1384 #endif
1385 }
1386}
1387
1388// special case for checking invalid_code
1389static void emit_cmpmem_indexedsr12_reg(int base,int r,int imm)
1390{
1391 assert(imm<128&&imm>=0);
1392 assert(r>=0&&r<16);
1393 assem_debug("ldrb lr,%s,%s lsr #12\n",regname[base],regname[r]);
1394 output_w32(0xe7d00000|rd_rn_rm(HOST_TEMPREG,base,r)|0x620);
1395 emit_cmpimm(HOST_TEMPREG,imm);
1396}
1397
1398static void emit_callne(int a)
1399{
1400 assem_debug("blne %x\n",a);
1401 u_int offset=genjmp(a);
1402 output_w32(0x1b000000|offset);
1403}
1404
1405// Used to preload hash table entries
1406static unused void emit_prefetchreg(int r)
1407{
1408 assem_debug("pld %s\n",regname[r]);
1409 output_w32(0xf5d0f000|rd_rn_rm(0,r,0));
1410}
1411
1412// Special case for mini_ht
1413static void emit_ldreq_indexed(int rs, u_int offset, int rt)
1414{
1415 assert(offset<4096);
1416 assem_debug("ldreq %s,[%s, #%d]\n",regname[rt],regname[rs],offset);
1417 output_w32(0x05900000|rd_rn_rm(rt,rs,0)|offset);
1418}
1419
1420static void emit_orrne_imm(int rs,int imm,int rt)
1421{
1422 u_int armval;
1423 genimm_checked(imm,&armval);
1424 assem_debug("orrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
1425 output_w32(0x13800000|rd_rn_rm(rt,rs,0)|armval);
1426}
1427
1428static unused void emit_addpl_imm(int rs,int imm,int rt)
1429{
1430 u_int armval;
1431 genimm_checked(imm,&armval);
1432 assem_debug("addpl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1433 output_w32(0x52800000|rd_rn_rm(rt,rs,0)|armval);
1434}
1435
1436static void emit_jno_unlikely(int a)
1437{
1438 //emit_jno(a);
1439 assem_debug("addvc pc,pc,#? (%x)\n",/*a-(int)out-8,*/a);
1440 output_w32(0x72800000|rd_rn_rm(15,15,0));
1441}
1442
1443static void save_regs_all(u_int reglist)
1444{
1445 int i;
1446 if(!reglist) return;
1447 assem_debug("stmia fp,{");
1448 for(i=0;i<16;i++)
1449 if(reglist&(1<<i))
1450 assem_debug("r%d,",i);
1451 assem_debug("}\n");
1452 output_w32(0xe88b0000|reglist);
1453}
1454
1455static void restore_regs_all(u_int reglist)
1456{
1457 int i;
1458 if(!reglist) return;
1459 assem_debug("ldmia fp,{");
1460 for(i=0;i<16;i++)
1461 if(reglist&(1<<i))
1462 assem_debug("r%d,",i);
1463 assem_debug("}\n");
1464 output_w32(0xe89b0000|reglist);
1465}
1466
1467// Save registers before function call
1468static void save_regs(u_int reglist)
1469{
1470 reglist&=CALLER_SAVE_REGS; // only save the caller-save registers, r0-r3, r12
1471 save_regs_all(reglist);
1472}
1473
1474// Restore registers after function call
1475static void restore_regs(u_int reglist)
1476{
1477 reglist&=CALLER_SAVE_REGS;
1478 restore_regs_all(reglist);
1479}
1480
1481/* Stubs/epilogue */
1482
1483static void literal_pool(int n)
1484{
1485 if(!literalcount) return;
1486 if(n) {
1487 if((int)out-literals[0][0]<4096-n) return;
1488 }
1489 u_int *ptr;
1490 int i;
1491 for(i=0;i<literalcount;i++)
1492 {
1493 u_int l_addr=(u_int)out;
1494 int j;
1495 for(j=0;j<i;j++) {
1496 if(literals[j][1]==literals[i][1]) {
1497 //printf("dup %08x\n",literals[i][1]);
1498 l_addr=literals[j][0];
1499 break;
1500 }
1501 }
1502 ptr=(u_int *)literals[i][0];
1503 u_int offset=l_addr-(u_int)ptr-8;
1504 assert(offset<4096);
1505 assert(!(offset&3));
1506 *ptr|=offset;
1507 if(l_addr==(u_int)out) {
1508 literals[i][0]=l_addr; // remember for dupes
1509 output_w32(literals[i][1]);
1510 }
1511 }
1512 literalcount=0;
1513}
1514
1515static void literal_pool_jumpover(int n)
1516{
1517 if(!literalcount) return;
1518 if(n) {
1519 if((int)out-literals[0][0]<4096-n) return;
1520 }
1521 void *jaddr = out;
1522 emit_jmp(0);
1523 literal_pool(0);
1524 set_jump_target(jaddr, out);
1525}
1526
1527// parsed by get_pointer, find_extjump_insn
1528static void emit_extjump(u_char *addr, u_int target)
1529{
1530 u_char *ptr=(u_char *)addr;
1531 assert((ptr[3]&0x0e)==0xa);
1532 (void)ptr;
1533
1534 emit_loadlp(target,0);
1535 emit_loadlp((u_int)addr,1);
1536 assert(ndrc->translation_cache <= addr &&
1537 addr < ndrc->translation_cache + sizeof(ndrc->translation_cache));
1538 emit_far_jump(dyna_linker);
1539}
1540
1541static void check_extjump2(void *src)
1542{
1543 u_int *ptr = src;
1544 assert((ptr[1] & 0x0fff0000) == 0x059f0000); // ldr rx, [pc, #ofs]
1545 (void)ptr;
1546}
1547
1548// put rt_val into rt, potentially making use of rs with value rs_val
1549static void emit_movimm_from(u_int rs_val,int rs,u_int rt_val,int rt)
1550{
1551 u_int armval;
1552 int diff;
1553 if(genimm(rt_val,&armval)) {
1554 assem_debug("mov %s,#%d\n",regname[rt],rt_val);
1555 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
1556 return;
1557 }
1558 if(genimm(~rt_val,&armval)) {
1559 assem_debug("mvn %s,#%d\n",regname[rt],rt_val);
1560 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
1561 return;
1562 }
1563 diff=rt_val-rs_val;
1564 if(genimm(diff,&armval)) {
1565 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],diff);
1566 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
1567 return;
1568 }else if(genimm(-diff,&armval)) {
1569 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],-diff);
1570 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
1571 return;
1572 }
1573 emit_movimm(rt_val,rt);
1574}
1575
1576// return 1 if above function can do it's job cheaply
1577static int is_similar_value(u_int v1,u_int v2)
1578{
1579 u_int xs;
1580 int diff;
1581 if(v1==v2) return 1;
1582 diff=v2-v1;
1583 for(xs=diff;xs!=0&&(xs&3)==0;xs>>=2)
1584 ;
1585 if(xs<0x100) return 1;
1586 for(xs=-diff;xs!=0&&(xs&3)==0;xs>>=2)
1587 ;
1588 if(xs<0x100) return 1;
1589 return 0;
1590}
1591
1592static void mov_loadtype_adj(enum stub_type type,int rs,int rt)
1593{
1594 switch(type) {
1595 case LOADB_STUB: emit_signextend8(rs,rt); break;
1596 case LOADBU_STUB: emit_andimm(rs,0xff,rt); break;
1597 case LOADH_STUB: emit_signextend16(rs,rt); break;
1598 case LOADHU_STUB: emit_andimm(rs,0xffff,rt); break;
1599 case LOADW_STUB: if(rs!=rt) emit_mov(rs,rt); break;
1600 default: assert(0);
1601 }
1602}
1603
1604#include "pcsxmem.h"
1605#include "pcsxmem_inline.c"
1606
1607static void do_readstub(int n)
1608{
1609 assem_debug("do_readstub %x\n",start+stubs[n].a*4);
1610 literal_pool(256);
1611 set_jump_target(stubs[n].addr, out);
1612 enum stub_type type=stubs[n].type;
1613 int i=stubs[n].a;
1614 int rs=stubs[n].b;
1615 const struct regstat *i_regs=(struct regstat *)stubs[n].c;
1616 u_int reglist=stubs[n].e;
1617 const signed char *i_regmap=i_regs->regmap;
1618 int rt;
1619 if(dops[i].itype==C1LS||dops[i].itype==C2LS||dops[i].itype==LOADLR) {
1620 rt=get_reg(i_regmap,FTEMP);
1621 }else{
1622 rt=get_reg(i_regmap,dops[i].rt1);
1623 }
1624 assert(rs>=0);
1625 int r,temp=-1,temp2=HOST_TEMPREG,regs_saved=0;
1626 void *restore_jump = NULL;
1627 reglist|=(1<<rs);
1628 for(r=0;r<=12;r++) {
1629 if(((1<<r)&0x13ff)&&((1<<r)&reglist)==0) {
1630 temp=r; break;
1631 }
1632 }
1633 if(rt>=0&&dops[i].rt1!=0)
1634 reglist&=~(1<<rt);
1635 if(temp==-1) {
1636 save_regs(reglist);
1637 regs_saved=1;
1638 temp=(rs==0)?2:0;
1639 }
1640 if((regs_saved||(reglist&2)==0)&&temp!=1&&rs!=1)
1641 temp2=1;
1642 emit_readword(&mem_rtab,temp);
1643 emit_shrimm(rs,12,temp2);
1644 emit_readword_dualindexedx4(temp,temp2,temp2);
1645 emit_lsls_imm(temp2,1,temp2);
1646 if(dops[i].itype==C1LS||dops[i].itype==C2LS||(rt>=0&&dops[i].rt1!=0)) {
1647 switch(type) {
1648 case LOADB_STUB: emit_ldrccsb_dualindexed(temp2,rs,rt); break;
1649 case LOADBU_STUB: emit_ldrccb_dualindexed(temp2,rs,rt); break;
1650 case LOADH_STUB: emit_ldrccsh_dualindexed(temp2,rs,rt); break;
1651 case LOADHU_STUB: emit_ldrcch_dualindexed(temp2,rs,rt); break;
1652 case LOADW_STUB: emit_ldrcc_dualindexed(temp2,rs,rt); break;
1653 default: assert(0);
1654 }
1655 }
1656 if(regs_saved) {
1657 restore_jump=out;
1658 emit_jcc(0); // jump to reg restore
1659 }
1660 else
1661 emit_jcc(stubs[n].retaddr); // return address
1662
1663 if(!regs_saved)
1664 save_regs(reglist);
1665 void *handler=NULL;
1666 if(type==LOADB_STUB||type==LOADBU_STUB)
1667 handler=jump_handler_read8;
1668 if(type==LOADH_STUB||type==LOADHU_STUB)
1669 handler=jump_handler_read16;
1670 if(type==LOADW_STUB)
1671 handler=jump_handler_read32;
1672 assert(handler);
1673 pass_args(rs,temp2);
1674 int cc=get_reg(i_regmap,CCREG);
1675 if(cc<0)
1676 emit_loadreg(CCREG,2);
1677 emit_addimm(cc<0?2:cc,(int)stubs[n].d,2);
1678 emit_far_call(handler);
1679 if(dops[i].itype==C1LS||dops[i].itype==C2LS||(rt>=0&&dops[i].rt1!=0)) {
1680 mov_loadtype_adj(type,0,rt);
1681 }
1682 if(restore_jump)
1683 set_jump_target(restore_jump, out);
1684 restore_regs(reglist);
1685 emit_jmp(stubs[n].retaddr); // return address
1686}
1687
1688static void inline_readstub(enum stub_type type, int i, u_int addr,
1689 const signed char regmap[], int target, int adj, u_int reglist)
1690{
1691 int rs=get_reg(regmap,target);
1692 int rt=get_reg(regmap,target);
1693 if(rs<0) rs=get_reg_temp(regmap);
1694 assert(rs>=0);
1695 u_int is_dynamic;
1696 uintptr_t host_addr = 0;
1697 void *handler;
1698 int cc=get_reg(regmap,CCREG);
1699 if(pcsx_direct_read(type,addr,adj,cc,target?rs:-1,rt))
1700 return;
1701 handler = get_direct_memhandler(mem_rtab, addr, type, &host_addr);
1702 if (handler == NULL) {
1703 if(rt<0||dops[i].rt1==0)
1704 return;
1705 if(addr!=host_addr)
1706 emit_movimm_from(addr,rs,host_addr,rs);
1707 switch(type) {
1708 case LOADB_STUB: emit_movsbl_indexed(0,rs,rt); break;
1709 case LOADBU_STUB: emit_movzbl_indexed(0,rs,rt); break;
1710 case LOADH_STUB: emit_movswl_indexed(0,rs,rt); break;
1711 case LOADHU_STUB: emit_movzwl_indexed(0,rs,rt); break;
1712 case LOADW_STUB: emit_readword_indexed(0,rs,rt); break;
1713 default: assert(0);
1714 }
1715 return;
1716 }
1717 is_dynamic=pcsxmem_is_handler_dynamic(addr);
1718 if(is_dynamic) {
1719 if(type==LOADB_STUB||type==LOADBU_STUB)
1720 handler=jump_handler_read8;
1721 if(type==LOADH_STUB||type==LOADHU_STUB)
1722 handler=jump_handler_read16;
1723 if(type==LOADW_STUB)
1724 handler=jump_handler_read32;
1725 }
1726
1727 // call a memhandler
1728 if(rt>=0&&dops[i].rt1!=0)
1729 reglist&=~(1<<rt);
1730 save_regs(reglist);
1731 if(target==0)
1732 emit_movimm(addr,0);
1733 else if(rs!=0)
1734 emit_mov(rs,0);
1735 if(cc<0)
1736 emit_loadreg(CCREG,2);
1737 if(is_dynamic) {
1738 emit_movimm(((u_int *)mem_rtab)[addr>>12]<<1,1);
1739 emit_addimm(cc<0?2:cc,adj,2);
1740 }
1741 else {
1742 emit_readword(&last_count,3);
1743 emit_addimm(cc<0?2:cc,adj,2);
1744 emit_add(2,3,2);
1745 emit_writeword(2,&Count);
1746 }
1747
1748 emit_far_call(handler);
1749
1750 if(rt>=0&&dops[i].rt1!=0) {
1751 switch(type) {
1752 case LOADB_STUB: emit_signextend8(0,rt); break;
1753 case LOADBU_STUB: emit_andimm(0,0xff,rt); break;
1754 case LOADH_STUB: emit_signextend16(0,rt); break;
1755 case LOADHU_STUB: emit_andimm(0,0xffff,rt); break;
1756 case LOADW_STUB: if(rt!=0) emit_mov(0,rt); break;
1757 default: assert(0);
1758 }
1759 }
1760 restore_regs(reglist);
1761}
1762
1763static void do_writestub(int n)
1764{
1765 assem_debug("do_writestub %x\n",start+stubs[n].a*4);
1766 literal_pool(256);
1767 set_jump_target(stubs[n].addr, out);
1768 enum stub_type type=stubs[n].type;
1769 int i=stubs[n].a;
1770 int rs=stubs[n].b;
1771 const struct regstat *i_regs=(struct regstat *)stubs[n].c;
1772 u_int reglist=stubs[n].e;
1773 const signed char *i_regmap=i_regs->regmap;
1774 int rt,r;
1775 if(dops[i].itype==C1LS||dops[i].itype==C2LS) {
1776 rt=get_reg(i_regmap,r=FTEMP);
1777 }else{
1778 rt=get_reg(i_regmap,r=dops[i].rs2);
1779 }
1780 assert(rs>=0);
1781 assert(rt>=0);
1782 int rtmp,temp=-1,temp2=HOST_TEMPREG,regs_saved=0;
1783 void *restore_jump = NULL;
1784 int reglist2=reglist|(1<<rs)|(1<<rt);
1785 for(rtmp=0;rtmp<=12;rtmp++) {
1786 if(((1<<rtmp)&0x13ff)&&((1<<rtmp)&reglist2)==0) {
1787 temp=rtmp; break;
1788 }
1789 }
1790 if(temp==-1) {
1791 save_regs(reglist);
1792 regs_saved=1;
1793 for(rtmp=0;rtmp<=3;rtmp++)
1794 if(rtmp!=rs&&rtmp!=rt)
1795 {temp=rtmp;break;}
1796 }
1797 if((regs_saved||(reglist2&8)==0)&&temp!=3&&rs!=3&&rt!=3)
1798 temp2=3;
1799 emit_readword(&mem_wtab,temp);
1800 emit_shrimm(rs,12,temp2);
1801 emit_readword_dualindexedx4(temp,temp2,temp2);
1802 emit_lsls_imm(temp2,1,temp2);
1803 switch(type) {
1804 case STOREB_STUB: emit_strccb_dualindexed(temp2,rs,rt); break;
1805 case STOREH_STUB: emit_strcch_dualindexed(temp2,rs,rt); break;
1806 case STOREW_STUB: emit_strcc_dualindexed(temp2,rs,rt); break;
1807 default: assert(0);
1808 }
1809 if(regs_saved) {
1810 restore_jump=out;
1811 emit_jcc(0); // jump to reg restore
1812 }
1813 else
1814 emit_jcc(stubs[n].retaddr); // return address (invcode check)
1815
1816 if(!regs_saved)
1817 save_regs(reglist);
1818 void *handler=NULL;
1819 switch(type) {
1820 case STOREB_STUB: handler=jump_handler_write8; break;
1821 case STOREH_STUB: handler=jump_handler_write16; break;
1822 case STOREW_STUB: handler=jump_handler_write32; break;
1823 default: assert(0);
1824 }
1825 assert(handler);
1826 pass_args(rs,rt);
1827 if(temp2!=3)
1828 emit_mov(temp2,3);
1829 int cc=get_reg(i_regmap,CCREG);
1830 if(cc<0)
1831 emit_loadreg(CCREG,2);
1832 emit_addimm(cc<0?2:cc,(int)stubs[n].d,2);
1833 // returns new cycle_count
1834 emit_far_call(handler);
1835 emit_addimm(0,-(int)stubs[n].d,cc<0?2:cc);
1836 if(cc<0)
1837 emit_storereg(CCREG,2);
1838 if(restore_jump)
1839 set_jump_target(restore_jump, out);
1840 restore_regs(reglist);
1841 emit_jmp(stubs[n].retaddr);
1842}
1843
1844static void inline_writestub(enum stub_type type, int i, u_int addr,
1845 const signed char regmap[], int target, int adj, u_int reglist)
1846{
1847 int rs=get_reg_temp(regmap);
1848 int rt=get_reg(regmap,target);
1849 assert(rs>=0);
1850 assert(rt>=0);
1851 uintptr_t host_addr = 0;
1852 void *handler = get_direct_memhandler(mem_wtab, addr, type, &host_addr);
1853 if (handler == NULL) {
1854 if(addr!=host_addr)
1855 emit_movimm_from(addr,rs,host_addr,rs);
1856 switch(type) {
1857 case STOREB_STUB: emit_writebyte_indexed(rt,0,rs); break;
1858 case STOREH_STUB: emit_writehword_indexed(rt,0,rs); break;
1859 case STOREW_STUB: emit_writeword_indexed(rt,0,rs); break;
1860 default: assert(0);
1861 }
1862 return;
1863 }
1864
1865 // call a memhandler
1866 save_regs(reglist);
1867 pass_args(rs,rt);
1868 int cc=get_reg(regmap,CCREG);
1869 if(cc<0)
1870 emit_loadreg(CCREG,2);
1871 emit_addimm(cc<0?2:cc,adj,2);
1872 emit_movimm((u_int)handler,3);
1873 // returns new cycle_count
1874 emit_far_call(jump_handler_write_h);
1875 emit_addimm(0,-adj,cc<0?2:cc);
1876 if(cc<0)
1877 emit_storereg(CCREG,2);
1878 restore_regs(reglist);
1879}
1880
1881/* Special assem */
1882
1883static void c2op_prologue(u_int op, int i, const struct regstat *i_regs, u_int reglist)
1884{
1885 save_regs_all(reglist);
1886 cop2_do_stall_check(op, i, i_regs, 0);
1887#ifdef PCNT
1888 emit_movimm(op, 0);
1889 emit_far_call(pcnt_gte_start);
1890#endif
1891 emit_addimm(FP, (u_char *)&psxRegs.CP2D.r[0] - (u_char *)&dynarec_local, 0); // cop2 regs
1892}
1893
1894static void c2op_epilogue(u_int op,u_int reglist)
1895{
1896#ifdef PCNT
1897 emit_movimm(op,0);
1898 emit_far_call(pcnt_gte_end);
1899#endif
1900 restore_regs_all(reglist);
1901}
1902
1903static void c2op_call_MACtoIR(int lm,int need_flags)
1904{
1905 if(need_flags)
1906 emit_far_call(lm?gteMACtoIR_lm1:gteMACtoIR_lm0);
1907 else
1908 emit_far_call(lm?gteMACtoIR_lm1_nf:gteMACtoIR_lm0_nf);
1909}
1910
1911static void c2op_call_rgb_func(void *func,int lm,int need_ir,int need_flags)
1912{
1913 emit_far_call(func);
1914 // func is C code and trashes r0
1915 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
1916 if(need_flags||need_ir)
1917 c2op_call_MACtoIR(lm,need_flags);
1918 emit_far_call(need_flags?gteMACtoRGB:gteMACtoRGB_nf);
1919}
1920
1921static void c2op_assemble(int i, const struct regstat *i_regs)
1922{
1923 u_int c2op = source[i] & 0x3f;
1924 u_int reglist_full = get_host_reglist(i_regs->regmap);
1925 u_int reglist = reglist_full & CALLER_SAVE_REGS;
1926 int need_flags, need_ir;
1927
1928 if (gte_handlers[c2op]!=NULL) {
1929 need_flags=!(gte_unneeded[i+1]>>63); // +1 because of how liveness detection works
1930 need_ir=(gte_unneeded[i+1]&0xe00)!=0xe00;
1931 assem_debug("gte op %08x, unneeded %016llx, need_flags %d, need_ir %d\n",
1932 source[i],gte_unneeded[i+1],need_flags,need_ir);
1933 if(HACK_ENABLED(NDHACK_GTE_NO_FLAGS))
1934 need_flags=0;
1935 int shift = (source[i] >> 19) & 1;
1936 int lm = (source[i] >> 10) & 1;
1937 switch(c2op) {
1938#ifndef DRC_DBG
1939 case GTE_MVMVA: {
1940#ifdef HAVE_ARMV5
1941 int v = (source[i] >> 15) & 3;
1942 int cv = (source[i] >> 13) & 3;
1943 int mx = (source[i] >> 17) & 3;
1944 reglist=reglist_full&(CALLER_SAVE_REGS|0xf0); // +{r4-r7}
1945 c2op_prologue(c2op,i,i_regs,reglist);
1946 /* r4,r5 = VXYZ(v) packed; r6 = &MX11(mx); r7 = &CV1(cv) */
1947 if(v<3)
1948 emit_ldrd(v*8,0,4);
1949 else {
1950 emit_movzwl_indexed(9*4,0,4); // gteIR
1951 emit_movzwl_indexed(10*4,0,6);
1952 emit_movzwl_indexed(11*4,0,5);
1953 emit_orrshl_imm(6,16,4);
1954 }
1955 if(mx<3)
1956 emit_addimm(0,32*4+mx*8*4,6);
1957 else
1958 emit_readword(&zeromem_ptr,6);
1959 if(cv<3)
1960 emit_addimm(0,32*4+(cv*8+5)*4,7);
1961 else
1962 emit_readword(&zeromem_ptr,7);
1963#ifdef __ARM_NEON__
1964 emit_movimm(source[i],1); // opcode
1965 emit_far_call(gteMVMVA_part_neon);
1966 if(need_flags) {
1967 emit_movimm(lm,1);
1968 emit_far_call(gteMACtoIR_flags_neon);
1969 }
1970#else
1971 if(cv==3&&shift)
1972 emit_far_call(gteMVMVA_part_cv3sh12_arm);
1973 else {
1974 emit_movimm(shift,1);
1975 emit_far_call(need_flags?gteMVMVA_part_arm:gteMVMVA_part_nf_arm);
1976 }
1977 if(need_flags||need_ir)
1978 c2op_call_MACtoIR(lm,need_flags);
1979#endif
1980#else /* if not HAVE_ARMV5 */
1981 c2op_prologue(c2op,i,i_regs,reglist);
1982 emit_movimm(source[i],1); // opcode
1983 emit_writeword(1,&psxRegs.code);
1984 emit_far_call(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]);
1985#endif
1986 break;
1987 }
1988 case GTE_OP:
1989 c2op_prologue(c2op,i,i_regs,reglist);
1990 emit_far_call(shift?gteOP_part_shift:gteOP_part_noshift);
1991 if(need_flags||need_ir) {
1992 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
1993 c2op_call_MACtoIR(lm,need_flags);
1994 }
1995 break;
1996 case GTE_DPCS:
1997 c2op_prologue(c2op,i,i_regs,reglist);
1998 c2op_call_rgb_func(shift?gteDPCS_part_shift:gteDPCS_part_noshift,lm,need_ir,need_flags);
1999 break;
2000 case GTE_INTPL:
2001 c2op_prologue(c2op,i,i_regs,reglist);
2002 c2op_call_rgb_func(shift?gteINTPL_part_shift:gteINTPL_part_noshift,lm,need_ir,need_flags);
2003 break;
2004 case GTE_SQR:
2005 c2op_prologue(c2op,i,i_regs,reglist);
2006 emit_far_call(shift?gteSQR_part_shift:gteSQR_part_noshift);
2007 if(need_flags||need_ir) {
2008 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
2009 c2op_call_MACtoIR(lm,need_flags);
2010 }
2011 break;
2012 case GTE_DCPL:
2013 c2op_prologue(c2op,i,i_regs,reglist);
2014 c2op_call_rgb_func(gteDCPL_part,lm,need_ir,need_flags);
2015 break;
2016 case GTE_GPF:
2017 c2op_prologue(c2op,i,i_regs,reglist);
2018 c2op_call_rgb_func(shift?gteGPF_part_shift:gteGPF_part_noshift,lm,need_ir,need_flags);
2019 break;
2020 case GTE_GPL:
2021 c2op_prologue(c2op,i,i_regs,reglist);
2022 c2op_call_rgb_func(shift?gteGPL_part_shift:gteGPL_part_noshift,lm,need_ir,need_flags);
2023 break;
2024#endif
2025 default:
2026 c2op_prologue(c2op,i,i_regs,reglist);
2027#ifdef DRC_DBG
2028 emit_movimm(source[i],1); // opcode
2029 emit_writeword(1,&psxRegs.code);
2030#endif
2031 emit_far_call(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]);
2032 break;
2033 }
2034 c2op_epilogue(c2op,reglist);
2035 }
2036}
2037
2038static void c2op_ctc2_31_assemble(signed char sl, signed char temp)
2039{
2040 //value = value & 0x7ffff000;
2041 //if (value & 0x7f87e000) value |= 0x80000000;
2042 emit_shrimm(sl,12,temp);
2043 emit_shlimm(temp,12,temp);
2044 emit_testimm(temp,0x7f000000);
2045 emit_testeqimm(temp,0x00870000);
2046 emit_testeqimm(temp,0x0000e000);
2047 emit_orrne_imm(temp,0x80000000,temp);
2048}
2049
2050static void do_mfc2_31_one(u_int copr,signed char temp)
2051{
2052 emit_readword(&reg_cop2d[copr],temp);
2053 emit_lsls_imm(temp,16,temp);
2054 emit_cmovs_imm(0,temp);
2055 emit_cmpimm(temp,0xf80<<16);
2056 emit_andimm(temp,0xf80<<16,temp);
2057 emit_cmovae_imm(0xf80<<16,temp);
2058}
2059
2060static void c2op_mfc2_29_assemble(signed char tl, signed char temp)
2061{
2062 if (temp < 0) {
2063 host_tempreg_acquire();
2064 temp = HOST_TEMPREG;
2065 }
2066 do_mfc2_31_one(9,temp);
2067 emit_shrimm(temp,7+16,tl);
2068 do_mfc2_31_one(10,temp);
2069 emit_orrshr_imm(temp,2+16,tl);
2070 do_mfc2_31_one(11,temp);
2071 emit_orrshr_imm(temp,-3+16,tl);
2072 emit_writeword(tl,&reg_cop2d[29]);
2073 if (temp == HOST_TEMPREG)
2074 host_tempreg_release();
2075}
2076
2077static void multdiv_assemble_arm(int i, const struct regstat *i_regs)
2078{
2079 // case 0x18: MULT
2080 // case 0x19: MULTU
2081 // case 0x1A: DIV
2082 // case 0x1B: DIVU
2083 // case 0x1C: DMULT
2084 // case 0x1D: DMULTU
2085 // case 0x1E: DDIV
2086 // case 0x1F: DDIVU
2087 if(dops[i].rs1&&dops[i].rs2)
2088 {
2089 if((dops[i].opcode2&4)==0) // 32-bit
2090 {
2091 if(dops[i].opcode2==0x18) // MULT
2092 {
2093 signed char m1=get_reg(i_regs->regmap,dops[i].rs1);
2094 signed char m2=get_reg(i_regs->regmap,dops[i].rs2);
2095 signed char hi=get_reg(i_regs->regmap,HIREG);
2096 signed char lo=get_reg(i_regs->regmap,LOREG);
2097 assert(m1>=0);
2098 assert(m2>=0);
2099 assert(hi>=0);
2100 assert(lo>=0);
2101 emit_smull(m1,m2,hi,lo);
2102 }
2103 if(dops[i].opcode2==0x19) // MULTU
2104 {
2105 signed char m1=get_reg(i_regs->regmap,dops[i].rs1);
2106 signed char m2=get_reg(i_regs->regmap,dops[i].rs2);
2107 signed char hi=get_reg(i_regs->regmap,HIREG);
2108 signed char lo=get_reg(i_regs->regmap,LOREG);
2109 assert(m1>=0);
2110 assert(m2>=0);
2111 assert(hi>=0);
2112 assert(lo>=0);
2113 emit_umull(m1,m2,hi,lo);
2114 }
2115 if(dops[i].opcode2==0x1A) // DIV
2116 {
2117 signed char d1=get_reg(i_regs->regmap,dops[i].rs1);
2118 signed char d2=get_reg(i_regs->regmap,dops[i].rs2);
2119 assert(d1>=0);
2120 assert(d2>=0);
2121 signed char quotient=get_reg(i_regs->regmap,LOREG);
2122 signed char remainder=get_reg(i_regs->regmap,HIREG);
2123 assert(quotient>=0);
2124 assert(remainder>=0);
2125 emit_movs(d1,remainder);
2126 emit_movimm(0xffffffff,quotient);
2127 emit_negmi(quotient,quotient); // .. quotient and ..
2128 emit_negmi(remainder,remainder); // .. remainder for div0 case (will be negated back after jump)
2129 emit_movs(d2,HOST_TEMPREG);
2130 emit_jeq(out+52); // Division by zero
2131 emit_negsmi(HOST_TEMPREG,HOST_TEMPREG);
2132#ifdef HAVE_ARMV5
2133 emit_clz(HOST_TEMPREG,quotient);
2134 emit_shl(HOST_TEMPREG,quotient,HOST_TEMPREG);
2135#else
2136 emit_movimm(0,quotient);
2137 emit_addpl_imm(quotient,1,quotient);
2138 emit_lslpls_imm(HOST_TEMPREG,1,HOST_TEMPREG);
2139 emit_jns(out-2*4);
2140#endif
2141 emit_orimm(quotient,1<<31,quotient);
2142 emit_shr(quotient,quotient,quotient);
2143 emit_cmp(remainder,HOST_TEMPREG);
2144 emit_subcs(remainder,HOST_TEMPREG,remainder);
2145 emit_adcs(quotient,quotient,quotient);
2146 emit_shrimm(HOST_TEMPREG,1,HOST_TEMPREG);
2147 emit_jcc(out-16); // -4
2148 emit_teq(d1,d2);
2149 emit_negmi(quotient,quotient);
2150 emit_test(d1,d1);
2151 emit_negmi(remainder,remainder);
2152 }
2153 if(dops[i].opcode2==0x1B) // DIVU
2154 {
2155 signed char d1=get_reg(i_regs->regmap,dops[i].rs1); // dividend
2156 signed char d2=get_reg(i_regs->regmap,dops[i].rs2); // divisor
2157 assert(d1>=0);
2158 assert(d2>=0);
2159 signed char quotient=get_reg(i_regs->regmap,LOREG);
2160 signed char remainder=get_reg(i_regs->regmap,HIREG);
2161 assert(quotient>=0);
2162 assert(remainder>=0);
2163 emit_mov(d1,remainder);
2164 emit_movimm(0xffffffff,quotient); // div0 case
2165 emit_test(d2,d2);
2166 emit_jeq(out+40); // Division by zero
2167#ifdef HAVE_ARMV5
2168 emit_clz(d2,HOST_TEMPREG);
2169 emit_movimm(1<<31,quotient);
2170 emit_shl(d2,HOST_TEMPREG,d2);
2171#else
2172 emit_movimm(0,HOST_TEMPREG);
2173 emit_addpl_imm(HOST_TEMPREG,1,HOST_TEMPREG);
2174 emit_lslpls_imm(d2,1,d2);
2175 emit_jns(out-2*4);
2176 emit_movimm(1<<31,quotient);
2177#endif
2178 emit_shr(quotient,HOST_TEMPREG,quotient);
2179 emit_cmp(remainder,d2);
2180 emit_subcs(remainder,d2,remainder);
2181 emit_adcs(quotient,quotient,quotient);
2182 emit_shrcc_imm(d2,1,d2);
2183 emit_jcc(out-16); // -4
2184 }
2185 }
2186 else // 64-bit
2187 assert(0);
2188 }
2189 else
2190 {
2191 // Multiply by zero is zero.
2192 // MIPS does not have a divide by zero exception.
2193 // The result is undefined, we return zero.
2194 signed char hr=get_reg(i_regs->regmap,HIREG);
2195 signed char lr=get_reg(i_regs->regmap,LOREG);
2196 if(hr>=0) emit_zeroreg(hr);
2197 if(lr>=0) emit_zeroreg(lr);
2198 }
2199}
2200#define multdiv_assemble multdiv_assemble_arm
2201
2202static void do_jump_vaddr(int rs)
2203{
2204 emit_far_jump(jump_vaddr_reg[rs]);
2205}
2206
2207static void do_preload_rhash(int r) {
2208 // Don't need this for ARM. On x86, this puts the value 0xf8 into the
2209 // register. On ARM the hash can be done with a single instruction (below)
2210}
2211
2212static void do_preload_rhtbl(int ht) {
2213 emit_addimm(FP,(int)&mini_ht-(int)&dynarec_local,ht);
2214}
2215
2216static void do_rhash(int rs,int rh) {
2217 emit_andimm(rs,0xf8,rh);
2218}
2219
2220static void do_miniht_load(int ht,int rh) {
2221 assem_debug("ldr %s,[%s,%s]!\n",regname[rh],regname[ht],regname[rh]);
2222 output_w32(0xe7b00000|rd_rn_rm(rh,ht,rh));
2223}
2224
2225static void do_miniht_jump(int rs,int rh,int ht) {
2226 emit_cmp(rh,rs);
2227 emit_ldreq_indexed(ht,4,15);
2228 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
2229 if(rs!=7)
2230 emit_mov(rs,7);
2231 rs=7;
2232 #endif
2233 do_jump_vaddr(rs);
2234}
2235
2236static void do_miniht_insert(u_int return_address,int rt,int temp) {
2237 #ifndef HAVE_ARMV7
2238 emit_movimm(return_address,rt); // PC into link register
2239 add_to_linker(out,return_address,1);
2240 emit_pcreladdr(temp);
2241 emit_writeword(rt,&mini_ht[(return_address&0xFF)>>3][0]);
2242 emit_writeword(temp,&mini_ht[(return_address&0xFF)>>3][1]);
2243 #else
2244 emit_movw(return_address&0x0000FFFF,rt);
2245 add_to_linker(out,return_address,1);
2246 emit_pcreladdr(temp);
2247 emit_writeword(temp,&mini_ht[(return_address&0xFF)>>3][1]);
2248 emit_movt(return_address&0xFFFF0000,rt);
2249 emit_writeword(rt,&mini_ht[(return_address&0xFF)>>3][0]);
2250 #endif
2251}
2252
2253// CPU-architecture-specific initialization
2254static void arch_init(void)
2255{
2256 uintptr_t diff = (u_char *)&ndrc->tramp.f - (u_char *)&ndrc->tramp.ops - 8;
2257 struct tramp_insns *ops = ndrc->tramp.ops;
2258 size_t i;
2259 assert(!(diff & 3));
2260 assert(diff < 0x1000);
2261 start_tcache_write(ops, (u_char *)ops + sizeof(ndrc->tramp.ops));
2262 for (i = 0; i < ARRAY_SIZE(ndrc->tramp.ops); i++)
2263 ops[i].ldrpc = 0xe5900000 | rd_rn_rm(15,15,0) | diff; // ldr pc, [=val]
2264 end_tcache_write(ops, (u_char *)ops + sizeof(ndrc->tramp.ops));
2265}
2266
2267// vim:shiftwidth=2:expandtab