drc: a bit more sophisticated f1 hack
[pcsx_rearmed.git] / libpcsxcore / new_dynarec / assem_arm.c
... / ...
CommitLineData
1/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2 * Mupen64plus/PCSX - assem_arm.c *
3 * Copyright (C) 2009-2011 Ari64 *
4 * Copyright (C) 2010-2021 GraÅžvydas "notaz" Ignotas *
5 * *
6 * This program is free software; you can redistribute it and/or modify *
7 * it under the terms of the GNU General Public License as published by *
8 * the Free Software Foundation; either version 2 of the License, or *
9 * (at your option) any later version. *
10 * *
11 * This program is distributed in the hope that it will be useful, *
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
14 * GNU General Public License for more details. *
15 * *
16 * You should have received a copy of the GNU General Public License *
17 * along with this program; if not, write to the *
18 * Free Software Foundation, Inc., *
19 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
20 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
21
22#define FLAGLESS
23#include "../gte.h"
24#undef FLAGLESS
25#include "../gte_arm.h"
26#include "../gte_neon.h"
27#include "pcnt.h"
28#include "arm_features.h"
29
30#define unused __attribute__((unused))
31
32#ifdef DRC_DBG
33#pragma GCC diagnostic ignored "-Wunused-function"
34#pragma GCC diagnostic ignored "-Wunused-variable"
35#pragma GCC diagnostic ignored "-Wunused-but-set-variable"
36#endif
37
38void indirect_jump_indexed();
39void indirect_jump();
40void do_interrupt();
41void jump_vaddr_r0();
42void jump_vaddr_r1();
43void jump_vaddr_r2();
44void jump_vaddr_r3();
45void jump_vaddr_r4();
46void jump_vaddr_r5();
47void jump_vaddr_r6();
48void jump_vaddr_r7();
49void jump_vaddr_r8();
50void jump_vaddr_r9();
51void jump_vaddr_r10();
52void jump_vaddr_r12();
53
54void * const jump_vaddr_reg[16] = {
55 jump_vaddr_r0,
56 jump_vaddr_r1,
57 jump_vaddr_r2,
58 jump_vaddr_r3,
59 jump_vaddr_r4,
60 jump_vaddr_r5,
61 jump_vaddr_r6,
62 jump_vaddr_r7,
63 jump_vaddr_r8,
64 jump_vaddr_r9,
65 jump_vaddr_r10,
66 0,
67 jump_vaddr_r12,
68 0,
69 0,
70 0
71};
72
73void invalidate_addr_r0();
74void invalidate_addr_r1();
75void invalidate_addr_r2();
76void invalidate_addr_r3();
77void invalidate_addr_r4();
78void invalidate_addr_r5();
79void invalidate_addr_r6();
80void invalidate_addr_r7();
81void invalidate_addr_r8();
82void invalidate_addr_r9();
83void invalidate_addr_r10();
84void invalidate_addr_r12();
85
86const u_int invalidate_addr_reg[16] = {
87 (int)invalidate_addr_r0,
88 (int)invalidate_addr_r1,
89 (int)invalidate_addr_r2,
90 (int)invalidate_addr_r3,
91 (int)invalidate_addr_r4,
92 (int)invalidate_addr_r5,
93 (int)invalidate_addr_r6,
94 (int)invalidate_addr_r7,
95 (int)invalidate_addr_r8,
96 (int)invalidate_addr_r9,
97 (int)invalidate_addr_r10,
98 0,
99 (int)invalidate_addr_r12,
100 0,
101 0,
102 0};
103
104static u_int needs_clear_cache[1<<(TARGET_SIZE_2-17)];
105
106/* Linker */
107
108static void set_jump_target(void *addr, void *target_)
109{
110 u_int target = (u_int)target_;
111 u_char *ptr = addr;
112 u_int *ptr2=(u_int *)ptr;
113 if(ptr[3]==0xe2) {
114 assert((target-(u_int)ptr2-8)<1024);
115 assert(((uintptr_t)addr&3)==0);
116 assert((target&3)==0);
117 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
118 //printf("target=%x addr=%p insn=%x\n",target,addr,*ptr2);
119 }
120 else if(ptr[3]==0x72) {
121 // generated by emit_jno_unlikely
122 if((target-(u_int)ptr2-8)<1024) {
123 assert(((uintptr_t)addr&3)==0);
124 assert((target&3)==0);
125 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
126 }
127 else if((target-(u_int)ptr2-8)<4096&&!((target-(u_int)ptr2-8)&15)) {
128 assert(((uintptr_t)addr&3)==0);
129 assert((target&3)==0);
130 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>4)|0xE00;
131 }
132 else *ptr2=(0x7A000000)|(((target-(u_int)ptr2-8)<<6)>>8);
133 }
134 else {
135 assert((ptr[3]&0x0e)==0xa);
136 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
137 }
138}
139
140// This optionally copies the instruction from the target of the branch into
141// the space before the branch. Works, but the difference in speed is
142// usually insignificant.
143#if 0
144static void set_jump_target_fillslot(int addr,u_int target,int copy)
145{
146 u_char *ptr=(u_char *)addr;
147 u_int *ptr2=(u_int *)ptr;
148 assert(!copy||ptr2[-1]==0xe28dd000);
149 if(ptr[3]==0xe2) {
150 assert(!copy);
151 assert((target-(u_int)ptr2-8)<4096);
152 *ptr2=(*ptr2&0xFFFFF000)|(target-(u_int)ptr2-8);
153 }
154 else {
155 assert((ptr[3]&0x0e)==0xa);
156 u_int target_insn=*(u_int *)target;
157 if((target_insn&0x0e100000)==0) { // ALU, no immediate, no flags
158 copy=0;
159 }
160 if((target_insn&0x0c100000)==0x04100000) { // Load
161 copy=0;
162 }
163 if(target_insn&0x08000000) {
164 copy=0;
165 }
166 if(copy) {
167 ptr2[-1]=target_insn;
168 target+=4;
169 }
170 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
171 }
172}
173#endif
174
175/* Literal pool */
176static void add_literal(int addr,int val)
177{
178 assert(literalcount<sizeof(literals)/sizeof(literals[0]));
179 literals[literalcount][0]=addr;
180 literals[literalcount][1]=val;
181 literalcount++;
182}
183
184// from a pointer to external jump stub (which was produced by emit_extjump2)
185// find where the jumping insn is
186static void *find_extjump_insn(void *stub)
187{
188 int *ptr=(int *)(stub+4);
189 assert((*ptr&0x0fff0000)==0x059f0000); // ldr rx, [pc, #ofs]
190 u_int offset=*ptr&0xfff;
191 void **l_ptr=(void *)ptr+offset+8;
192 return *l_ptr;
193}
194
195// find where external branch is liked to using addr of it's stub:
196// get address that insn one after stub loads (dyna_linker arg1),
197// treat it as a pointer to branch insn,
198// return addr where that branch jumps to
199static void *get_pointer(void *stub)
200{
201 //printf("get_pointer(%x)\n",(int)stub);
202 int *i_ptr=find_extjump_insn(stub);
203 assert((*i_ptr&0x0f000000)==0x0a000000); // b
204 return (u_char *)i_ptr+((*i_ptr<<8)>>6)+8;
205}
206
207// Find the "clean" entry point from a "dirty" entry point
208// by skipping past the call to verify_code
209static void *get_clean_addr(void *addr)
210{
211 signed int *ptr = addr;
212 #ifndef HAVE_ARMV7
213 ptr+=4;
214 #else
215 ptr+=6;
216 #endif
217 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
218 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
219 ptr++;
220 if((*ptr&0xFF000000)==0xea000000) {
221 return (char *)ptr+((*ptr<<8)>>6)+8; // follow jump
222 }
223 return ptr;
224}
225
226static int verify_dirty(const u_int *ptr)
227{
228 #ifndef HAVE_ARMV7
229 u_int offset;
230 // get from literal pool
231 assert((*ptr&0xFFFF0000)==0xe59f0000);
232 offset=*ptr&0xfff;
233 u_int source=*(u_int*)((void *)ptr+offset+8);
234 ptr++;
235 assert((*ptr&0xFFFF0000)==0xe59f0000);
236 offset=*ptr&0xfff;
237 u_int copy=*(u_int*)((void *)ptr+offset+8);
238 ptr++;
239 assert((*ptr&0xFFFF0000)==0xe59f0000);
240 offset=*ptr&0xfff;
241 u_int len=*(u_int*)((void *)ptr+offset+8);
242 ptr++;
243 ptr++;
244 #else
245 // ARMv7 movw/movt
246 assert((*ptr&0xFFF00000)==0xe3000000);
247 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
248 u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
249 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
250 ptr+=6;
251 #endif
252 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
253 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
254 //printf("verify_dirty: %x %x %x\n",source,copy,len);
255 return !memcmp((void *)source,(void *)copy,len);
256}
257
258// This doesn't necessarily find all clean entry points, just
259// guarantees that it's not dirty
260static int isclean(void *addr)
261{
262 #ifndef HAVE_ARMV7
263 u_int *ptr=((u_int *)addr)+4;
264 #else
265 u_int *ptr=((u_int *)addr)+6;
266 #endif
267 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
268 if((*ptr&0xFF000000)!=0xeb000000) return 1; // bl instruction
269 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code) return 0;
270 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_ds) return 0;
271 return 1;
272}
273
274// get source that block at addr was compiled from (host pointers)
275static void get_bounds(void *addr, u_char **start, u_char **end)
276{
277 u_int *ptr = addr;
278 #ifndef HAVE_ARMV7
279 u_int offset;
280 // get from literal pool
281 assert((*ptr&0xFFFF0000)==0xe59f0000);
282 offset=*ptr&0xfff;
283 u_int source=*(u_int*)((void *)ptr+offset+8);
284 ptr++;
285 //assert((*ptr&0xFFFF0000)==0xe59f0000);
286 //offset=*ptr&0xfff;
287 //u_int copy=*(u_int*)((void *)ptr+offset+8);
288 ptr++;
289 assert((*ptr&0xFFFF0000)==0xe59f0000);
290 offset=*ptr&0xfff;
291 u_int len=*(u_int*)((void *)ptr+offset+8);
292 ptr++;
293 ptr++;
294 #else
295 // ARMv7 movw/movt
296 assert((*ptr&0xFFF00000)==0xe3000000);
297 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
298 //u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
299 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
300 ptr+=6;
301 #endif
302 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
303 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
304 *start=(u_char *)source;
305 *end=(u_char *)source+len;
306}
307
308// Allocate a specific ARM register.
309static void alloc_arm_reg(struct regstat *cur,int i,signed char reg,int hr)
310{
311 int n;
312 int dirty=0;
313
314 // see if it's already allocated (and dealloc it)
315 for(n=0;n<HOST_REGS;n++)
316 {
317 if(n!=EXCLUDE_REG&&cur->regmap[n]==reg) {
318 dirty=(cur->dirty>>n)&1;
319 cur->regmap[n]=-1;
320 }
321 }
322
323 cur->regmap[hr]=reg;
324 cur->dirty&=~(1<<hr);
325 cur->dirty|=dirty<<hr;
326 cur->isconst&=~(1<<hr);
327}
328
329// Alloc cycle count into dedicated register
330static void alloc_cc(struct regstat *cur,int i)
331{
332 alloc_arm_reg(cur,i,CCREG,HOST_CCREG);
333}
334
335/* Assembler */
336
337static unused char regname[16][4] = {
338 "r0",
339 "r1",
340 "r2",
341 "r3",
342 "r4",
343 "r5",
344 "r6",
345 "r7",
346 "r8",
347 "r9",
348 "r10",
349 "fp",
350 "r12",
351 "sp",
352 "lr",
353 "pc"};
354
355static void output_w32(u_int word)
356{
357 *((u_int *)out)=word;
358 out+=4;
359}
360
361static u_int rd_rn_rm(u_int rd, u_int rn, u_int rm)
362{
363 assert(rd<16);
364 assert(rn<16);
365 assert(rm<16);
366 return((rn<<16)|(rd<<12)|rm);
367}
368
369static u_int rd_rn_imm_shift(u_int rd, u_int rn, u_int imm, u_int shift)
370{
371 assert(rd<16);
372 assert(rn<16);
373 assert(imm<256);
374 assert((shift&1)==0);
375 return((rn<<16)|(rd<<12)|(((32-shift)&30)<<7)|imm);
376}
377
378static u_int genimm(u_int imm,u_int *encoded)
379{
380 *encoded=0;
381 if(imm==0) return 1;
382 int i=32;
383 while(i>0)
384 {
385 if(imm<256) {
386 *encoded=((i&30)<<7)|imm;
387 return 1;
388 }
389 imm=(imm>>2)|(imm<<30);i-=2;
390 }
391 return 0;
392}
393
394static void genimm_checked(u_int imm,u_int *encoded)
395{
396 u_int ret=genimm(imm,encoded);
397 assert(ret);
398 (void)ret;
399}
400
401static u_int genjmp(u_int addr)
402{
403 if (addr < 3) return 0; // a branch that will be patched later
404 int offset = addr-(int)out-8;
405 if (offset < -33554432 || offset >= 33554432) {
406 SysPrintf("genjmp: out of range: %08x\n", offset);
407 abort();
408 return 0;
409 }
410 return ((u_int)offset>>2)&0xffffff;
411}
412
413static unused void emit_breakpoint(void)
414{
415 assem_debug("bkpt #0\n");
416 //output_w32(0xe1200070);
417 output_w32(0xe7f001f0);
418}
419
420static void emit_mov(int rs,int rt)
421{
422 assem_debug("mov %s,%s\n",regname[rt],regname[rs]);
423 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs));
424}
425
426static void emit_movs(int rs,int rt)
427{
428 assem_debug("movs %s,%s\n",regname[rt],regname[rs]);
429 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs));
430}
431
432static void emit_add(int rs1,int rs2,int rt)
433{
434 assem_debug("add %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
435 output_w32(0xe0800000|rd_rn_rm(rt,rs1,rs2));
436}
437
438static void emit_adds(int rs1,int rs2,int rt)
439{
440 assem_debug("adds %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
441 output_w32(0xe0900000|rd_rn_rm(rt,rs1,rs2));
442}
443#define emit_adds_ptr emit_adds
444
445static void emit_adcs(int rs1,int rs2,int rt)
446{
447 assem_debug("adcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
448 output_w32(0xe0b00000|rd_rn_rm(rt,rs1,rs2));
449}
450
451static void emit_neg(int rs, int rt)
452{
453 assem_debug("rsb %s,%s,#0\n",regname[rt],regname[rs]);
454 output_w32(0xe2600000|rd_rn_rm(rt,rs,0));
455}
456
457static void emit_sub(int rs1,int rs2,int rt)
458{
459 assem_debug("sub %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
460 output_w32(0xe0400000|rd_rn_rm(rt,rs1,rs2));
461}
462
463static void emit_zeroreg(int rt)
464{
465 assem_debug("mov %s,#0\n",regname[rt]);
466 output_w32(0xe3a00000|rd_rn_rm(rt,0,0));
467}
468
469static void emit_loadlp(u_int imm,u_int rt)
470{
471 add_literal((int)out,imm);
472 assem_debug("ldr %s,pc+? [=%x]\n",regname[rt],imm);
473 output_w32(0xe5900000|rd_rn_rm(rt,15,0));
474}
475
476#ifdef HAVE_ARMV7
477static void emit_movw(u_int imm,u_int rt)
478{
479 assert(imm<65536);
480 assem_debug("movw %s,#%d (0x%x)\n",regname[rt],imm,imm);
481 output_w32(0xe3000000|rd_rn_rm(rt,0,0)|(imm&0xfff)|((imm<<4)&0xf0000));
482}
483
484static void emit_movt(u_int imm,u_int rt)
485{
486 assem_debug("movt %s,#%d (0x%x)\n",regname[rt],imm&0xffff0000,imm&0xffff0000);
487 output_w32(0xe3400000|rd_rn_rm(rt,0,0)|((imm>>16)&0xfff)|((imm>>12)&0xf0000));
488}
489#endif
490
491static void emit_movimm(u_int imm,u_int rt)
492{
493 u_int armval;
494 if(genimm(imm,&armval)) {
495 assem_debug("mov %s,#%d\n",regname[rt],imm);
496 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
497 }else if(genimm(~imm,&armval)) {
498 assem_debug("mvn %s,#%d\n",regname[rt],imm);
499 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
500 }else if(imm<65536) {
501 #ifndef HAVE_ARMV7
502 assem_debug("mov %s,#%d\n",regname[rt],imm&0xFF00);
503 output_w32(0xe3a00000|rd_rn_imm_shift(rt,0,imm>>8,8));
504 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
505 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
506 #else
507 emit_movw(imm,rt);
508 #endif
509 }else{
510 #ifndef HAVE_ARMV7
511 emit_loadlp(imm,rt);
512 #else
513 emit_movw(imm&0x0000FFFF,rt);
514 emit_movt(imm&0xFFFF0000,rt);
515 #endif
516 }
517}
518
519static void emit_pcreladdr(u_int rt)
520{
521 assem_debug("add %s,pc,#?\n",regname[rt]);
522 output_w32(0xe2800000|rd_rn_rm(rt,15,0));
523}
524
525static void emit_loadreg(int r, int hr)
526{
527 if(r&64) {
528 SysPrintf("64bit load in 32bit mode!\n");
529 assert(0);
530 return;
531 }
532 if((r&63)==0)
533 emit_zeroreg(hr);
534 else {
535 void *addr;
536 switch (r) {
537 //case HIREG: addr = &hi; break;
538 //case LOREG: addr = &lo; break;
539 case CCREG: addr = &cycle_count; break;
540 case CSREG: addr = &Status; break;
541 case INVCP: addr = &invc_ptr; break;
542 case ROREG: addr = &ram_offset; break;
543 default:
544 assert(r < 34);
545 addr = &psxRegs.GPR.r[r];
546 break;
547 }
548 u_int offset = (u_char *)addr - (u_char *)&dynarec_local;
549 assert(offset<4096);
550 assem_debug("ldr %s,fp+%d\n",regname[hr],offset);
551 output_w32(0xe5900000|rd_rn_rm(hr,FP,0)|offset);
552 }
553}
554
555static void emit_storereg(int r, int hr)
556{
557 if(r&64) {
558 SysPrintf("64bit store in 32bit mode!\n");
559 assert(0);
560 return;
561 }
562 int addr = (int)&psxRegs.GPR.r[r];
563 switch (r) {
564 //case HIREG: addr = &hi; break;
565 //case LOREG: addr = &lo; break;
566 case CCREG: addr = (int)&cycle_count; break;
567 default: assert(r < 34); break;
568 }
569 u_int offset = addr-(u_int)&dynarec_local;
570 assert(offset<4096);
571 assem_debug("str %s,fp+%d\n",regname[hr],offset);
572 output_w32(0xe5800000|rd_rn_rm(hr,FP,0)|offset);
573}
574
575static void emit_test(int rs, int rt)
576{
577 assem_debug("tst %s,%s\n",regname[rs],regname[rt]);
578 output_w32(0xe1100000|rd_rn_rm(0,rs,rt));
579}
580
581static void emit_testimm(int rs,int imm)
582{
583 u_int armval;
584 assem_debug("tst %s,#%d\n",regname[rs],imm);
585 genimm_checked(imm,&armval);
586 output_w32(0xe3100000|rd_rn_rm(0,rs,0)|armval);
587}
588
589static void emit_testeqimm(int rs,int imm)
590{
591 u_int armval;
592 assem_debug("tsteq %s,$%d\n",regname[rs],imm);
593 genimm_checked(imm,&armval);
594 output_w32(0x03100000|rd_rn_rm(0,rs,0)|armval);
595}
596
597static void emit_not(int rs,int rt)
598{
599 assem_debug("mvn %s,%s\n",regname[rt],regname[rs]);
600 output_w32(0xe1e00000|rd_rn_rm(rt,0,rs));
601}
602
603static void emit_and(u_int rs1,u_int rs2,u_int rt)
604{
605 assem_debug("and %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
606 output_w32(0xe0000000|rd_rn_rm(rt,rs1,rs2));
607}
608
609static void emit_or(u_int rs1,u_int rs2,u_int rt)
610{
611 assem_debug("orr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
612 output_w32(0xe1800000|rd_rn_rm(rt,rs1,rs2));
613}
614
615static void emit_orrshl_imm(u_int rs,u_int imm,u_int rt)
616{
617 assert(rs<16);
618 assert(rt<16);
619 assert(imm<32);
620 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs],imm);
621 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|(imm<<7));
622}
623
624static void emit_orrshr_imm(u_int rs,u_int imm,u_int rt)
625{
626 assert(rs<16);
627 assert(rt<16);
628 assert(imm<32);
629 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs],imm);
630 output_w32(0xe1800020|rd_rn_rm(rt,rt,rs)|(imm<<7));
631}
632
633static void emit_xor(u_int rs1,u_int rs2,u_int rt)
634{
635 assem_debug("eor %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
636 output_w32(0xe0200000|rd_rn_rm(rt,rs1,rs2));
637}
638
639static void emit_xorsar_imm(u_int rs1,u_int rs2,u_int imm,u_int rt)
640{
641 assem_debug("eor %s,%s,%s,asr #%d\n",regname[rt],regname[rs1],regname[rs2],imm);
642 output_w32(0xe0200040|rd_rn_rm(rt,rs1,rs2)|(imm<<7));
643}
644
645static void emit_addimm(u_int rs,int imm,u_int rt)
646{
647 assert(rs<16);
648 assert(rt<16);
649 if(imm!=0) {
650 u_int armval;
651 if(genimm(imm,&armval)) {
652 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],imm);
653 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
654 }else if(genimm(-imm,&armval)) {
655 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],-imm);
656 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
657 #ifdef HAVE_ARMV7
658 }else if(rt!=rs&&(u_int)imm<65536) {
659 emit_movw(imm&0x0000ffff,rt);
660 emit_add(rs,rt,rt);
661 }else if(rt!=rs&&(u_int)-imm<65536) {
662 emit_movw(-imm&0x0000ffff,rt);
663 emit_sub(rs,rt,rt);
664 #endif
665 }else if((u_int)-imm<65536) {
666 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],(-imm)&0xFF00);
667 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
668 output_w32(0xe2400000|rd_rn_imm_shift(rt,rs,(-imm)>>8,8));
669 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
670 }else {
671 do {
672 int shift = (ffs(imm) - 1) & ~1;
673 int imm8 = imm & (0xff << shift);
674 genimm_checked(imm8,&armval);
675 assem_debug("add %s,%s,#0x%x\n",regname[rt],regname[rs],imm8);
676 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
677 rs = rt;
678 imm &= ~imm8;
679 }
680 while (imm != 0);
681 }
682 }
683 else if(rs!=rt) emit_mov(rs,rt);
684}
685
686static void emit_addimm_and_set_flags(int imm,int rt)
687{
688 assert(imm>-65536&&imm<65536);
689 u_int armval;
690 if(genimm(imm,&armval)) {
691 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm);
692 output_w32(0xe2900000|rd_rn_rm(rt,rt,0)|armval);
693 }else if(genimm(-imm,&armval)) {
694 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],imm);
695 output_w32(0xe2500000|rd_rn_rm(rt,rt,0)|armval);
696 }else if(imm<0) {
697 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF00);
698 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
699 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)>>8,8));
700 output_w32(0xe2500000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
701 }else{
702 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF00);
703 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
704 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm>>8,8));
705 output_w32(0xe2900000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
706 }
707}
708
709static void emit_addnop(u_int r)
710{
711 assert(r<16);
712 assem_debug("add %s,%s,#0 (nop)\n",regname[r],regname[r]);
713 output_w32(0xe2800000|rd_rn_rm(r,r,0));
714}
715
716static void emit_andimm(int rs,int imm,int rt)
717{
718 u_int armval;
719 if(imm==0) {
720 emit_zeroreg(rt);
721 }else if(genimm(imm,&armval)) {
722 assem_debug("and %s,%s,#%d\n",regname[rt],regname[rs],imm);
723 output_w32(0xe2000000|rd_rn_rm(rt,rs,0)|armval);
724 }else if(genimm(~imm,&armval)) {
725 assem_debug("bic %s,%s,#%d\n",regname[rt],regname[rs],imm);
726 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|armval);
727 }else if(imm==65535) {
728 #ifndef HAVE_ARMV6
729 assem_debug("bic %s,%s,#FF000000\n",regname[rt],regname[rs]);
730 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|0x4FF);
731 assem_debug("bic %s,%s,#00FF0000\n",regname[rt],regname[rt]);
732 output_w32(0xe3c00000|rd_rn_rm(rt,rt,0)|0x8FF);
733 #else
734 assem_debug("uxth %s,%s\n",regname[rt],regname[rs]);
735 output_w32(0xe6ff0070|rd_rn_rm(rt,0,rs));
736 #endif
737 }else{
738 assert(imm>0&&imm<65535);
739 #ifndef HAVE_ARMV7
740 assem_debug("mov r14,#%d\n",imm&0xFF00);
741 output_w32(0xe3a00000|rd_rn_imm_shift(HOST_TEMPREG,0,imm>>8,8));
742 assem_debug("add r14,r14,#%d\n",imm&0xFF);
743 output_w32(0xe2800000|rd_rn_imm_shift(HOST_TEMPREG,HOST_TEMPREG,imm&0xff,0));
744 #else
745 emit_movw(imm,HOST_TEMPREG);
746 #endif
747 assem_debug("and %s,%s,r14\n",regname[rt],regname[rs]);
748 output_w32(0xe0000000|rd_rn_rm(rt,rs,HOST_TEMPREG));
749 }
750}
751
752static void emit_orimm(int rs,int imm,int rt)
753{
754 u_int armval;
755 if(imm==0) {
756 if(rs!=rt) emit_mov(rs,rt);
757 }else if(genimm(imm,&armval)) {
758 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm);
759 output_w32(0xe3800000|rd_rn_rm(rt,rs,0)|armval);
760 }else{
761 assert(imm>0&&imm<65536);
762 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
763 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
764 output_w32(0xe3800000|rd_rn_imm_shift(rt,rs,imm>>8,8));
765 output_w32(0xe3800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
766 }
767}
768
769static void emit_xorimm(int rs,int imm,int rt)
770{
771 u_int armval;
772 if(imm==0) {
773 if(rs!=rt) emit_mov(rs,rt);
774 }else if(genimm(imm,&armval)) {
775 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm);
776 output_w32(0xe2200000|rd_rn_rm(rt,rs,0)|armval);
777 }else{
778 assert(imm>0&&imm<65536);
779 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
780 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
781 output_w32(0xe2200000|rd_rn_imm_shift(rt,rs,imm>>8,8));
782 output_w32(0xe2200000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
783 }
784}
785
786static void emit_shlimm(int rs,u_int imm,int rt)
787{
788 assert(imm>0);
789 assert(imm<32);
790 //if(imm==1) ...
791 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
792 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
793}
794
795static void emit_lsls_imm(int rs,int imm,int rt)
796{
797 assert(imm>0);
798 assert(imm<32);
799 assem_debug("lsls %s,%s,#%d\n",regname[rt],regname[rs],imm);
800 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs)|(imm<<7));
801}
802
803static unused void emit_lslpls_imm(int rs,int imm,int rt)
804{
805 assert(imm>0);
806 assert(imm<32);
807 assem_debug("lslpls %s,%s,#%d\n",regname[rt],regname[rs],imm);
808 output_w32(0x51b00000|rd_rn_rm(rt,0,rs)|(imm<<7));
809}
810
811static void emit_shrimm(int rs,u_int imm,int rt)
812{
813 assert(imm>0);
814 assert(imm<32);
815 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
816 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
817}
818
819static void emit_sarimm(int rs,u_int imm,int rt)
820{
821 assert(imm>0);
822 assert(imm<32);
823 assem_debug("asr %s,%s,#%d\n",regname[rt],regname[rs],imm);
824 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x40|(imm<<7));
825}
826
827static void emit_rorimm(int rs,u_int imm,int rt)
828{
829 assert(imm>0);
830 assert(imm<32);
831 assem_debug("ror %s,%s,#%d\n",regname[rt],regname[rs],imm);
832 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x60|(imm<<7));
833}
834
835static void emit_signextend16(int rs,int rt)
836{
837 #ifndef HAVE_ARMV6
838 emit_shlimm(rs,16,rt);
839 emit_sarimm(rt,16,rt);
840 #else
841 assem_debug("sxth %s,%s\n",regname[rt],regname[rs]);
842 output_w32(0xe6bf0070|rd_rn_rm(rt,0,rs));
843 #endif
844}
845
846static void emit_signextend8(int rs,int rt)
847{
848 #ifndef HAVE_ARMV6
849 emit_shlimm(rs,24,rt);
850 emit_sarimm(rt,24,rt);
851 #else
852 assem_debug("sxtb %s,%s\n",regname[rt],regname[rs]);
853 output_w32(0xe6af0070|rd_rn_rm(rt,0,rs));
854 #endif
855}
856
857static void emit_shl(u_int rs,u_int shift,u_int rt)
858{
859 assert(rs<16);
860 assert(rt<16);
861 assert(shift<16);
862 //if(imm==1) ...
863 assem_debug("lsl %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
864 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x10|(shift<<8));
865}
866
867static void emit_shr(u_int rs,u_int shift,u_int rt)
868{
869 assert(rs<16);
870 assert(rt<16);
871 assert(shift<16);
872 assem_debug("lsr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
873 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x30|(shift<<8));
874}
875
876static void emit_sar(u_int rs,u_int shift,u_int rt)
877{
878 assert(rs<16);
879 assert(rt<16);
880 assert(shift<16);
881 assem_debug("asr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
882 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x50|(shift<<8));
883}
884
885static unused void emit_orrshl(u_int rs,u_int shift,u_int rt)
886{
887 assert(rs<16);
888 assert(rt<16);
889 assert(shift<16);
890 assem_debug("orr %s,%s,%s,lsl %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
891 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x10|(shift<<8));
892}
893
894static unused void emit_orrshr(u_int rs,u_int shift,u_int rt)
895{
896 assert(rs<16);
897 assert(rt<16);
898 assert(shift<16);
899 assem_debug("orr %s,%s,%s,lsr %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
900 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x30|(shift<<8));
901}
902
903static void emit_cmpimm(int rs,int imm)
904{
905 u_int armval;
906 if(genimm(imm,&armval)) {
907 assem_debug("cmp %s,#%d\n",regname[rs],imm);
908 output_w32(0xe3500000|rd_rn_rm(0,rs,0)|armval);
909 }else if(genimm(-imm,&armval)) {
910 assem_debug("cmn %s,#%d\n",regname[rs],imm);
911 output_w32(0xe3700000|rd_rn_rm(0,rs,0)|armval);
912 }else if(imm>0) {
913 assert(imm<65536);
914 emit_movimm(imm,HOST_TEMPREG);
915 assem_debug("cmp %s,r14\n",regname[rs]);
916 output_w32(0xe1500000|rd_rn_rm(0,rs,HOST_TEMPREG));
917 }else{
918 assert(imm>-65536);
919 emit_movimm(-imm,HOST_TEMPREG);
920 assem_debug("cmn %s,r14\n",regname[rs]);
921 output_w32(0xe1700000|rd_rn_rm(0,rs,HOST_TEMPREG));
922 }
923}
924
925static void emit_cmovne_imm(int imm,int rt)
926{
927 assem_debug("movne %s,#%d\n",regname[rt],imm);
928 u_int armval;
929 genimm_checked(imm,&armval);
930 output_w32(0x13a00000|rd_rn_rm(rt,0,0)|armval);
931}
932
933static void emit_cmovl_imm(int imm,int rt)
934{
935 assem_debug("movlt %s,#%d\n",regname[rt],imm);
936 u_int armval;
937 genimm_checked(imm,&armval);
938 output_w32(0xb3a00000|rd_rn_rm(rt,0,0)|armval);
939}
940
941static void emit_cmovb_imm(int imm,int rt)
942{
943 assem_debug("movcc %s,#%d\n",regname[rt],imm);
944 u_int armval;
945 genimm_checked(imm,&armval);
946 output_w32(0x33a00000|rd_rn_rm(rt,0,0)|armval);
947}
948
949static void emit_cmovae_imm(int imm,int rt)
950{
951 assem_debug("movcs %s,#%d\n",regname[rt],imm);
952 u_int armval;
953 genimm_checked(imm,&armval);
954 output_w32(0x23a00000|rd_rn_rm(rt,0,0)|armval);
955}
956
957static void emit_cmovs_imm(int imm,int rt)
958{
959 assem_debug("movmi %s,#%d\n",regname[rt],imm);
960 u_int armval;
961 genimm_checked(imm,&armval);
962 output_w32(0x43a00000|rd_rn_rm(rt,0,0)|armval);
963}
964
965static void emit_cmovne_reg(int rs,int rt)
966{
967 assem_debug("movne %s,%s\n",regname[rt],regname[rs]);
968 output_w32(0x11a00000|rd_rn_rm(rt,0,rs));
969}
970
971static void emit_cmovl_reg(int rs,int rt)
972{
973 assem_debug("movlt %s,%s\n",regname[rt],regname[rs]);
974 output_w32(0xb1a00000|rd_rn_rm(rt,0,rs));
975}
976
977static void emit_cmovb_reg(int rs,int rt)
978{
979 assem_debug("movcc %s,%s\n",regname[rt],regname[rs]);
980 output_w32(0x31a00000|rd_rn_rm(rt,0,rs));
981}
982
983static void emit_cmovs_reg(int rs,int rt)
984{
985 assem_debug("movmi %s,%s\n",regname[rt],regname[rs]);
986 output_w32(0x41a00000|rd_rn_rm(rt,0,rs));
987}
988
989static void emit_slti32(int rs,int imm,int rt)
990{
991 if(rs!=rt) emit_zeroreg(rt);
992 emit_cmpimm(rs,imm);
993 if(rs==rt) emit_movimm(0,rt);
994 emit_cmovl_imm(1,rt);
995}
996
997static void emit_sltiu32(int rs,int imm,int rt)
998{
999 if(rs!=rt) emit_zeroreg(rt);
1000 emit_cmpimm(rs,imm);
1001 if(rs==rt) emit_movimm(0,rt);
1002 emit_cmovb_imm(1,rt);
1003}
1004
1005static void emit_cmp(int rs,int rt)
1006{
1007 assem_debug("cmp %s,%s\n",regname[rs],regname[rt]);
1008 output_w32(0xe1500000|rd_rn_rm(0,rs,rt));
1009}
1010
1011static void emit_set_gz32(int rs, int rt)
1012{
1013 //assem_debug("set_gz32\n");
1014 emit_cmpimm(rs,1);
1015 emit_movimm(1,rt);
1016 emit_cmovl_imm(0,rt);
1017}
1018
1019static void emit_set_nz32(int rs, int rt)
1020{
1021 //assem_debug("set_nz32\n");
1022 if(rs!=rt) emit_movs(rs,rt);
1023 else emit_test(rs,rs);
1024 emit_cmovne_imm(1,rt);
1025}
1026
1027static void emit_set_if_less32(int rs1, int rs2, int rt)
1028{
1029 //assem_debug("set if less (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1030 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1031 emit_cmp(rs1,rs2);
1032 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1033 emit_cmovl_imm(1,rt);
1034}
1035
1036static void emit_set_if_carry32(int rs1, int rs2, int rt)
1037{
1038 //assem_debug("set if carry (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1039 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1040 emit_cmp(rs1,rs2);
1041 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1042 emit_cmovb_imm(1,rt);
1043}
1044
1045static int can_jump_or_call(const void *a)
1046{
1047 intptr_t offset = (u_char *)a - out - 8;
1048 return (-33554432 <= offset && offset < 33554432);
1049}
1050
1051static void emit_call(const void *a_)
1052{
1053 int a = (int)a_;
1054 assem_debug("bl %x (%x+%x)%s\n",a,(int)out,a-(int)out-8,func_name(a_));
1055 u_int offset=genjmp(a);
1056 output_w32(0xeb000000|offset);
1057}
1058
1059static void emit_jmp(const void *a_)
1060{
1061 int a = (int)a_;
1062 assem_debug("b %x (%x+%x)%s\n",a,(int)out,a-(int)out-8,func_name(a_));
1063 u_int offset=genjmp(a);
1064 output_w32(0xea000000|offset);
1065}
1066
1067static void emit_jne(const void *a_)
1068{
1069 int a = (int)a_;
1070 assem_debug("bne %x\n",a);
1071 u_int offset=genjmp(a);
1072 output_w32(0x1a000000|offset);
1073}
1074
1075static void emit_jeq(const void *a_)
1076{
1077 int a = (int)a_;
1078 assem_debug("beq %x\n",a);
1079 u_int offset=genjmp(a);
1080 output_w32(0x0a000000|offset);
1081}
1082
1083static void emit_js(const void *a_)
1084{
1085 int a = (int)a_;
1086 assem_debug("bmi %x\n",a);
1087 u_int offset=genjmp(a);
1088 output_w32(0x4a000000|offset);
1089}
1090
1091static void emit_jns(const void *a_)
1092{
1093 int a = (int)a_;
1094 assem_debug("bpl %x\n",a);
1095 u_int offset=genjmp(a);
1096 output_w32(0x5a000000|offset);
1097}
1098
1099static void emit_jl(const void *a_)
1100{
1101 int a = (int)a_;
1102 assem_debug("blt %x\n",a);
1103 u_int offset=genjmp(a);
1104 output_w32(0xba000000|offset);
1105}
1106
1107static void emit_jge(const void *a_)
1108{
1109 int a = (int)a_;
1110 assem_debug("bge %x\n",a);
1111 u_int offset=genjmp(a);
1112 output_w32(0xaa000000|offset);
1113}
1114
1115static void emit_jno(const void *a_)
1116{
1117 int a = (int)a_;
1118 assem_debug("bvc %x\n",a);
1119 u_int offset=genjmp(a);
1120 output_w32(0x7a000000|offset);
1121}
1122
1123static void emit_jc(const void *a_)
1124{
1125 int a = (int)a_;
1126 assem_debug("bcs %x\n",a);
1127 u_int offset=genjmp(a);
1128 output_w32(0x2a000000|offset);
1129}
1130
1131static void emit_jcc(const void *a_)
1132{
1133 int a = (int)a_;
1134 assem_debug("bcc %x\n",a);
1135 u_int offset=genjmp(a);
1136 output_w32(0x3a000000|offset);
1137}
1138
1139static unused void emit_callreg(u_int r)
1140{
1141 assert(r<15);
1142 assem_debug("blx %s\n",regname[r]);
1143 output_w32(0xe12fff30|r);
1144}
1145
1146static void emit_jmpreg(u_int r)
1147{
1148 assem_debug("mov pc,%s\n",regname[r]);
1149 output_w32(0xe1a00000|rd_rn_rm(15,0,r));
1150}
1151
1152static void emit_ret(void)
1153{
1154 emit_jmpreg(14);
1155}
1156
1157static void emit_readword_indexed(int offset, int rs, int rt)
1158{
1159 assert(offset>-4096&&offset<4096);
1160 assem_debug("ldr %s,%s+%d\n",regname[rt],regname[rs],offset);
1161 if(offset>=0) {
1162 output_w32(0xe5900000|rd_rn_rm(rt,rs,0)|offset);
1163 }else{
1164 output_w32(0xe5100000|rd_rn_rm(rt,rs,0)|(-offset));
1165 }
1166}
1167
1168static void emit_readword_dualindexedx4(int rs1, int rs2, int rt)
1169{
1170 assem_debug("ldr %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1171 output_w32(0xe7900000|rd_rn_rm(rt,rs1,rs2)|0x100);
1172}
1173#define emit_readptr_dualindexedx_ptrlen emit_readword_dualindexedx4
1174
1175static void emit_ldr_dualindexed(int rs1, int rs2, int rt)
1176{
1177 assem_debug("ldr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1178 output_w32(0xe7900000|rd_rn_rm(rt,rs1,rs2));
1179}
1180
1181static void emit_ldrcc_dualindexed(int rs1, int rs2, int rt)
1182{
1183 assem_debug("ldrcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1184 output_w32(0x37900000|rd_rn_rm(rt,rs1,rs2));
1185}
1186
1187static void emit_ldrb_dualindexed(int rs1, int rs2, int rt)
1188{
1189 assem_debug("ldrb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1190 output_w32(0xe7d00000|rd_rn_rm(rt,rs1,rs2));
1191}
1192
1193static void emit_ldrccb_dualindexed(int rs1, int rs2, int rt)
1194{
1195 assem_debug("ldrccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1196 output_w32(0x37d00000|rd_rn_rm(rt,rs1,rs2));
1197}
1198
1199static void emit_ldrsb_dualindexed(int rs1, int rs2, int rt)
1200{
1201 assem_debug("ldrsb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1202 output_w32(0xe19000d0|rd_rn_rm(rt,rs1,rs2));
1203}
1204
1205static void emit_ldrccsb_dualindexed(int rs1, int rs2, int rt)
1206{
1207 assem_debug("ldrccsb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1208 output_w32(0x319000d0|rd_rn_rm(rt,rs1,rs2));
1209}
1210
1211static void emit_ldrh_dualindexed(int rs1, int rs2, int rt)
1212{
1213 assem_debug("ldrh %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1214 output_w32(0xe19000b0|rd_rn_rm(rt,rs1,rs2));
1215}
1216
1217static void emit_ldrcch_dualindexed(int rs1, int rs2, int rt)
1218{
1219 assem_debug("ldrcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1220 output_w32(0x319000b0|rd_rn_rm(rt,rs1,rs2));
1221}
1222
1223static void emit_ldrsh_dualindexed(int rs1, int rs2, int rt)
1224{
1225 assem_debug("ldrsh %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1226 output_w32(0xe19000f0|rd_rn_rm(rt,rs1,rs2));
1227}
1228
1229static void emit_ldrccsh_dualindexed(int rs1, int rs2, int rt)
1230{
1231 assem_debug("ldrccsh %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1232 output_w32(0x319000f0|rd_rn_rm(rt,rs1,rs2));
1233}
1234
1235static void emit_str_dualindexed(int rs1, int rs2, int rt)
1236{
1237 assem_debug("str %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1238 output_w32(0xe7800000|rd_rn_rm(rt,rs1,rs2));
1239}
1240
1241static void emit_strb_dualindexed(int rs1, int rs2, int rt)
1242{
1243 assem_debug("strb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1244 output_w32(0xe7c00000|rd_rn_rm(rt,rs1,rs2));
1245}
1246
1247static void emit_strh_dualindexed(int rs1, int rs2, int rt)
1248{
1249 assem_debug("strh %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1250 output_w32(0xe18000b0|rd_rn_rm(rt,rs1,rs2));
1251}
1252
1253static void emit_movsbl_indexed(int offset, int rs, int rt)
1254{
1255 assert(offset>-256&&offset<256);
1256 assem_debug("ldrsb %s,%s+%d\n",regname[rt],regname[rs],offset);
1257 if(offset>=0) {
1258 output_w32(0xe1d000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1259 }else{
1260 output_w32(0xe15000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1261 }
1262}
1263
1264static void emit_movswl_indexed(int offset, int rs, int rt)
1265{
1266 assert(offset>-256&&offset<256);
1267 assem_debug("ldrsh %s,%s+%d\n",regname[rt],regname[rs],offset);
1268 if(offset>=0) {
1269 output_w32(0xe1d000f0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1270 }else{
1271 output_w32(0xe15000f0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1272 }
1273}
1274
1275static void emit_movzbl_indexed(int offset, int rs, int rt)
1276{
1277 assert(offset>-4096&&offset<4096);
1278 assem_debug("ldrb %s,%s+%d\n",regname[rt],regname[rs],offset);
1279 if(offset>=0) {
1280 output_w32(0xe5d00000|rd_rn_rm(rt,rs,0)|offset);
1281 }else{
1282 output_w32(0xe5500000|rd_rn_rm(rt,rs,0)|(-offset));
1283 }
1284}
1285
1286static void emit_movzwl_indexed(int offset, int rs, int rt)
1287{
1288 assert(offset>-256&&offset<256);
1289 assem_debug("ldrh %s,%s+%d\n",regname[rt],regname[rs],offset);
1290 if(offset>=0) {
1291 output_w32(0xe1d000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1292 }else{
1293 output_w32(0xe15000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1294 }
1295}
1296
1297static void emit_ldrd(int offset, int rs, int rt)
1298{
1299 assert(offset>-256&&offset<256);
1300 assem_debug("ldrd %s,%s+%d\n",regname[rt],regname[rs],offset);
1301 if(offset>=0) {
1302 output_w32(0xe1c000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1303 }else{
1304 output_w32(0xe14000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1305 }
1306}
1307
1308static void emit_readword(void *addr, int rt)
1309{
1310 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
1311 assert(offset<4096);
1312 assem_debug("ldr %s,fp+%d\n",regname[rt],offset);
1313 output_w32(0xe5900000|rd_rn_rm(rt,FP,0)|offset);
1314}
1315#define emit_readptr emit_readword
1316
1317static void emit_writeword_indexed(int rt, int offset, int rs)
1318{
1319 assert(offset>-4096&&offset<4096);
1320 assem_debug("str %s,%s+%d\n",regname[rt],regname[rs],offset);
1321 if(offset>=0) {
1322 output_w32(0xe5800000|rd_rn_rm(rt,rs,0)|offset);
1323 }else{
1324 output_w32(0xe5000000|rd_rn_rm(rt,rs,0)|(-offset));
1325 }
1326}
1327
1328static void emit_writehword_indexed(int rt, int offset, int rs)
1329{
1330 assert(offset>-256&&offset<256);
1331 assem_debug("strh %s,%s+%d\n",regname[rt],regname[rs],offset);
1332 if(offset>=0) {
1333 output_w32(0xe1c000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1334 }else{
1335 output_w32(0xe14000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1336 }
1337}
1338
1339static void emit_writebyte_indexed(int rt, int offset, int rs)
1340{
1341 assert(offset>-4096&&offset<4096);
1342 assem_debug("strb %s,%s+%d\n",regname[rt],regname[rs],offset);
1343 if(offset>=0) {
1344 output_w32(0xe5c00000|rd_rn_rm(rt,rs,0)|offset);
1345 }else{
1346 output_w32(0xe5400000|rd_rn_rm(rt,rs,0)|(-offset));
1347 }
1348}
1349
1350static void emit_strcc_dualindexed(int rs1, int rs2, int rt)
1351{
1352 assem_debug("strcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1353 output_w32(0x37800000|rd_rn_rm(rt,rs1,rs2));
1354}
1355
1356static void emit_strccb_dualindexed(int rs1, int rs2, int rt)
1357{
1358 assem_debug("strccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1359 output_w32(0x37c00000|rd_rn_rm(rt,rs1,rs2));
1360}
1361
1362static void emit_strcch_dualindexed(int rs1, int rs2, int rt)
1363{
1364 assem_debug("strcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1365 output_w32(0x318000b0|rd_rn_rm(rt,rs1,rs2));
1366}
1367
1368static void emit_writeword(int rt, void *addr)
1369{
1370 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
1371 assert(offset<4096);
1372 assem_debug("str %s,fp+%d\n",regname[rt],offset);
1373 output_w32(0xe5800000|rd_rn_rm(rt,FP,0)|offset);
1374}
1375
1376static void emit_umull(u_int rs1, u_int rs2, u_int hi, u_int lo)
1377{
1378 assem_debug("umull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
1379 assert(rs1<16);
1380 assert(rs2<16);
1381 assert(hi<16);
1382 assert(lo<16);
1383 output_w32(0xe0800090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
1384}
1385
1386static void emit_smull(u_int rs1, u_int rs2, u_int hi, u_int lo)
1387{
1388 assem_debug("smull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
1389 assert(rs1<16);
1390 assert(rs2<16);
1391 assert(hi<16);
1392 assert(lo<16);
1393 output_w32(0xe0c00090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
1394}
1395
1396static void emit_clz(int rs,int rt)
1397{
1398 assem_debug("clz %s,%s\n",regname[rt],regname[rs]);
1399 output_w32(0xe16f0f10|rd_rn_rm(rt,0,rs));
1400}
1401
1402static void emit_subcs(int rs1,int rs2,int rt)
1403{
1404 assem_debug("subcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1405 output_w32(0x20400000|rd_rn_rm(rt,rs1,rs2));
1406}
1407
1408static void emit_shrcc_imm(int rs,u_int imm,int rt)
1409{
1410 assert(imm>0);
1411 assert(imm<32);
1412 assem_debug("lsrcc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1413 output_w32(0x31a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
1414}
1415
1416static void emit_shrne_imm(int rs,u_int imm,int rt)
1417{
1418 assert(imm>0);
1419 assert(imm<32);
1420 assem_debug("lsrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
1421 output_w32(0x11a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
1422}
1423
1424static void emit_negmi(int rs, int rt)
1425{
1426 assem_debug("rsbmi %s,%s,#0\n",regname[rt],regname[rs]);
1427 output_w32(0x42600000|rd_rn_rm(rt,rs,0));
1428}
1429
1430static void emit_negsmi(int rs, int rt)
1431{
1432 assem_debug("rsbsmi %s,%s,#0\n",regname[rt],regname[rs]);
1433 output_w32(0x42700000|rd_rn_rm(rt,rs,0));
1434}
1435
1436static void emit_bic_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
1437{
1438 assem_debug("bic %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
1439 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
1440}
1441
1442static void emit_bic_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
1443{
1444 assem_debug("bic %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
1445 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
1446}
1447
1448static void emit_teq(int rs, int rt)
1449{
1450 assem_debug("teq %s,%s\n",regname[rs],regname[rt]);
1451 output_w32(0xe1300000|rd_rn_rm(0,rs,rt));
1452}
1453
1454static unused void emit_rsbimm(int rs, int imm, int rt)
1455{
1456 u_int armval;
1457 genimm_checked(imm,&armval);
1458 assem_debug("rsb %s,%s,#%d\n",regname[rt],regname[rs],imm);
1459 output_w32(0xe2600000|rd_rn_rm(rt,rs,0)|armval);
1460}
1461
1462// Conditionally select one of two immediates, optimizing for small code size
1463// This will only be called if HAVE_CMOV_IMM is defined
1464static void emit_cmov2imm_e_ne_compact(int imm1,int imm2,u_int rt)
1465{
1466 u_int armval;
1467 if(genimm(imm2-imm1,&armval)) {
1468 emit_movimm(imm1,rt);
1469 assem_debug("addne %s,%s,#%d\n",regname[rt],regname[rt],imm2-imm1);
1470 output_w32(0x12800000|rd_rn_rm(rt,rt,0)|armval);
1471 }else if(genimm(imm1-imm2,&armval)) {
1472 emit_movimm(imm1,rt);
1473 assem_debug("subne %s,%s,#%d\n",regname[rt],regname[rt],imm1-imm2);
1474 output_w32(0x12400000|rd_rn_rm(rt,rt,0)|armval);
1475 }
1476 else {
1477 #ifndef HAVE_ARMV7
1478 emit_movimm(imm1,rt);
1479 add_literal((int)out,imm2);
1480 assem_debug("ldrne %s,pc+? [=%x]\n",regname[rt],imm2);
1481 output_w32(0x15900000|rd_rn_rm(rt,15,0));
1482 #else
1483 emit_movw(imm1&0x0000FFFF,rt);
1484 if((imm1&0xFFFF)!=(imm2&0xFFFF)) {
1485 assem_debug("movwne %s,#%d (0x%x)\n",regname[rt],imm2&0xFFFF,imm2&0xFFFF);
1486 output_w32(0x13000000|rd_rn_rm(rt,0,0)|(imm2&0xfff)|((imm2<<4)&0xf0000));
1487 }
1488 emit_movt(imm1&0xFFFF0000,rt);
1489 if((imm1&0xFFFF0000)!=(imm2&0xFFFF0000)) {
1490 assem_debug("movtne %s,#%d (0x%x)\n",regname[rt],imm2&0xffff0000,imm2&0xffff0000);
1491 output_w32(0x13400000|rd_rn_rm(rt,0,0)|((imm2>>16)&0xfff)|((imm2>>12)&0xf0000));
1492 }
1493 #endif
1494 }
1495}
1496
1497// special case for checking invalid_code
1498static void emit_cmpmem_indexedsr12_reg(int base,int r,int imm)
1499{
1500 assert(imm<128&&imm>=0);
1501 assert(r>=0&&r<16);
1502 assem_debug("ldrb lr,%s,%s lsr #12\n",regname[base],regname[r]);
1503 output_w32(0xe7d00000|rd_rn_rm(HOST_TEMPREG,base,r)|0x620);
1504 emit_cmpimm(HOST_TEMPREG,imm);
1505}
1506
1507static void emit_callne(int a)
1508{
1509 assem_debug("blne %x\n",a);
1510 u_int offset=genjmp(a);
1511 output_w32(0x1b000000|offset);
1512}
1513
1514// Used to preload hash table entries
1515static unused void emit_prefetchreg(int r)
1516{
1517 assem_debug("pld %s\n",regname[r]);
1518 output_w32(0xf5d0f000|rd_rn_rm(0,r,0));
1519}
1520
1521// Special case for mini_ht
1522static void emit_ldreq_indexed(int rs, u_int offset, int rt)
1523{
1524 assert(offset<4096);
1525 assem_debug("ldreq %s,[%s, #%d]\n",regname[rt],regname[rs],offset);
1526 output_w32(0x05900000|rd_rn_rm(rt,rs,0)|offset);
1527}
1528
1529static void emit_orrne_imm(int rs,int imm,int rt)
1530{
1531 u_int armval;
1532 genimm_checked(imm,&armval);
1533 assem_debug("orrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
1534 output_w32(0x13800000|rd_rn_rm(rt,rs,0)|armval);
1535}
1536
1537static unused void emit_addpl_imm(int rs,int imm,int rt)
1538{
1539 u_int armval;
1540 genimm_checked(imm,&armval);
1541 assem_debug("addpl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1542 output_w32(0x52800000|rd_rn_rm(rt,rs,0)|armval);
1543}
1544
1545static void emit_jno_unlikely(int a)
1546{
1547 //emit_jno(a);
1548 assem_debug("addvc pc,pc,#? (%x)\n",/*a-(int)out-8,*/a);
1549 output_w32(0x72800000|rd_rn_rm(15,15,0));
1550}
1551
1552static void save_regs_all(u_int reglist)
1553{
1554 int i;
1555 if(!reglist) return;
1556 assem_debug("stmia fp,{");
1557 for(i=0;i<16;i++)
1558 if(reglist&(1<<i))
1559 assem_debug("r%d,",i);
1560 assem_debug("}\n");
1561 output_w32(0xe88b0000|reglist);
1562}
1563
1564static void restore_regs_all(u_int reglist)
1565{
1566 int i;
1567 if(!reglist) return;
1568 assem_debug("ldmia fp,{");
1569 for(i=0;i<16;i++)
1570 if(reglist&(1<<i))
1571 assem_debug("r%d,",i);
1572 assem_debug("}\n");
1573 output_w32(0xe89b0000|reglist);
1574}
1575
1576// Save registers before function call
1577static void save_regs(u_int reglist)
1578{
1579 reglist&=CALLER_SAVE_REGS; // only save the caller-save registers, r0-r3, r12
1580 save_regs_all(reglist);
1581}
1582
1583// Restore registers after function call
1584static void restore_regs(u_int reglist)
1585{
1586 reglist&=CALLER_SAVE_REGS;
1587 restore_regs_all(reglist);
1588}
1589
1590/* Stubs/epilogue */
1591
1592static void literal_pool(int n)
1593{
1594 if(!literalcount) return;
1595 if(n) {
1596 if((int)out-literals[0][0]<4096-n) return;
1597 }
1598 u_int *ptr;
1599 int i;
1600 for(i=0;i<literalcount;i++)
1601 {
1602 u_int l_addr=(u_int)out;
1603 int j;
1604 for(j=0;j<i;j++) {
1605 if(literals[j][1]==literals[i][1]) {
1606 //printf("dup %08x\n",literals[i][1]);
1607 l_addr=literals[j][0];
1608 break;
1609 }
1610 }
1611 ptr=(u_int *)literals[i][0];
1612 u_int offset=l_addr-(u_int)ptr-8;
1613 assert(offset<4096);
1614 assert(!(offset&3));
1615 *ptr|=offset;
1616 if(l_addr==(u_int)out) {
1617 literals[i][0]=l_addr; // remember for dupes
1618 output_w32(literals[i][1]);
1619 }
1620 }
1621 literalcount=0;
1622}
1623
1624static void literal_pool_jumpover(int n)
1625{
1626 if(!literalcount) return;
1627 if(n) {
1628 if((int)out-literals[0][0]<4096-n) return;
1629 }
1630 void *jaddr = out;
1631 emit_jmp(0);
1632 literal_pool(0);
1633 set_jump_target(jaddr, out);
1634}
1635
1636// parsed by get_pointer, find_extjump_insn
1637static void emit_extjump2(u_char *addr, u_int target, void *linker)
1638{
1639 u_char *ptr=(u_char *)addr;
1640 assert((ptr[3]&0x0e)==0xa);
1641 (void)ptr;
1642
1643 emit_loadlp(target,0);
1644 emit_loadlp((u_int)addr,1);
1645 assert(addr>=ndrc->translation_cache&&addr<(ndrc->translation_cache+(1<<TARGET_SIZE_2)));
1646 //assert((target>=0x80000000&&target<0x80800000)||(target>0xA4000000&&target<0xA4001000));
1647//DEBUG >
1648#ifdef DEBUG_CYCLE_COUNT
1649 emit_readword(&last_count,ECX);
1650 emit_add(HOST_CCREG,ECX,HOST_CCREG);
1651 emit_readword(&next_interupt,ECX);
1652 emit_writeword(HOST_CCREG,&Count);
1653 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
1654 emit_writeword(ECX,&last_count);
1655#endif
1656//DEBUG <
1657 emit_far_jump(linker);
1658}
1659
1660static void check_extjump2(void *src)
1661{
1662 u_int *ptr = src;
1663 assert((ptr[1] & 0x0fff0000) == 0x059f0000); // ldr rx, [pc, #ofs]
1664 (void)ptr;
1665}
1666
1667// put rt_val into rt, potentially making use of rs with value rs_val
1668static void emit_movimm_from(u_int rs_val,int rs,u_int rt_val,int rt)
1669{
1670 u_int armval;
1671 int diff;
1672 if(genimm(rt_val,&armval)) {
1673 assem_debug("mov %s,#%d\n",regname[rt],rt_val);
1674 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
1675 return;
1676 }
1677 if(genimm(~rt_val,&armval)) {
1678 assem_debug("mvn %s,#%d\n",regname[rt],rt_val);
1679 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
1680 return;
1681 }
1682 diff=rt_val-rs_val;
1683 if(genimm(diff,&armval)) {
1684 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],diff);
1685 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
1686 return;
1687 }else if(genimm(-diff,&armval)) {
1688 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],-diff);
1689 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
1690 return;
1691 }
1692 emit_movimm(rt_val,rt);
1693}
1694
1695// return 1 if above function can do it's job cheaply
1696static int is_similar_value(u_int v1,u_int v2)
1697{
1698 u_int xs;
1699 int diff;
1700 if(v1==v2) return 1;
1701 diff=v2-v1;
1702 for(xs=diff;xs!=0&&(xs&3)==0;xs>>=2)
1703 ;
1704 if(xs<0x100) return 1;
1705 for(xs=-diff;xs!=0&&(xs&3)==0;xs>>=2)
1706 ;
1707 if(xs<0x100) return 1;
1708 return 0;
1709}
1710
1711static void mov_loadtype_adj(enum stub_type type,int rs,int rt)
1712{
1713 switch(type) {
1714 case LOADB_STUB: emit_signextend8(rs,rt); break;
1715 case LOADBU_STUB: emit_andimm(rs,0xff,rt); break;
1716 case LOADH_STUB: emit_signextend16(rs,rt); break;
1717 case LOADHU_STUB: emit_andimm(rs,0xffff,rt); break;
1718 case LOADW_STUB: if(rs!=rt) emit_mov(rs,rt); break;
1719 default: assert(0);
1720 }
1721}
1722
1723#include "pcsxmem.h"
1724#include "pcsxmem_inline.c"
1725
1726static void do_readstub(int n)
1727{
1728 assem_debug("do_readstub %x\n",start+stubs[n].a*4);
1729 literal_pool(256);
1730 set_jump_target(stubs[n].addr, out);
1731 enum stub_type type=stubs[n].type;
1732 int i=stubs[n].a;
1733 int rs=stubs[n].b;
1734 const struct regstat *i_regs=(struct regstat *)stubs[n].c;
1735 u_int reglist=stubs[n].e;
1736 const signed char *i_regmap=i_regs->regmap;
1737 int rt;
1738 if(dops[i].itype==C1LS||dops[i].itype==C2LS||dops[i].itype==LOADLR) {
1739 rt=get_reg(i_regmap,FTEMP);
1740 }else{
1741 rt=get_reg(i_regmap,dops[i].rt1);
1742 }
1743 assert(rs>=0);
1744 int r,temp=-1,temp2=HOST_TEMPREG,regs_saved=0;
1745 void *restore_jump = NULL;
1746 reglist|=(1<<rs);
1747 for(r=0;r<=12;r++) {
1748 if(((1<<r)&0x13ff)&&((1<<r)&reglist)==0) {
1749 temp=r; break;
1750 }
1751 }
1752 if(rt>=0&&dops[i].rt1!=0)
1753 reglist&=~(1<<rt);
1754 if(temp==-1) {
1755 save_regs(reglist);
1756 regs_saved=1;
1757 temp=(rs==0)?2:0;
1758 }
1759 if((regs_saved||(reglist&2)==0)&&temp!=1&&rs!=1)
1760 temp2=1;
1761 emit_readword(&mem_rtab,temp);
1762 emit_shrimm(rs,12,temp2);
1763 emit_readword_dualindexedx4(temp,temp2,temp2);
1764 emit_lsls_imm(temp2,1,temp2);
1765 if(dops[i].itype==C1LS||dops[i].itype==C2LS||(rt>=0&&dops[i].rt1!=0)) {
1766 switch(type) {
1767 case LOADB_STUB: emit_ldrccsb_dualindexed(temp2,rs,rt); break;
1768 case LOADBU_STUB: emit_ldrccb_dualindexed(temp2,rs,rt); break;
1769 case LOADH_STUB: emit_ldrccsh_dualindexed(temp2,rs,rt); break;
1770 case LOADHU_STUB: emit_ldrcch_dualindexed(temp2,rs,rt); break;
1771 case LOADW_STUB: emit_ldrcc_dualindexed(temp2,rs,rt); break;
1772 default: assert(0);
1773 }
1774 }
1775 if(regs_saved) {
1776 restore_jump=out;
1777 emit_jcc(0); // jump to reg restore
1778 }
1779 else
1780 emit_jcc(stubs[n].retaddr); // return address
1781
1782 if(!regs_saved)
1783 save_regs(reglist);
1784 void *handler=NULL;
1785 if(type==LOADB_STUB||type==LOADBU_STUB)
1786 handler=jump_handler_read8;
1787 if(type==LOADH_STUB||type==LOADHU_STUB)
1788 handler=jump_handler_read16;
1789 if(type==LOADW_STUB)
1790 handler=jump_handler_read32;
1791 assert(handler);
1792 pass_args(rs,temp2);
1793 int cc=get_reg(i_regmap,CCREG);
1794 if(cc<0)
1795 emit_loadreg(CCREG,2);
1796 emit_addimm(cc<0?2:cc,(int)stubs[n].d,2);
1797 emit_far_call(handler);
1798 if(dops[i].itype==C1LS||dops[i].itype==C2LS||(rt>=0&&dops[i].rt1!=0)) {
1799 mov_loadtype_adj(type,0,rt);
1800 }
1801 if(restore_jump)
1802 set_jump_target(restore_jump, out);
1803 restore_regs(reglist);
1804 emit_jmp(stubs[n].retaddr); // return address
1805}
1806
1807static void inline_readstub(enum stub_type type, int i, u_int addr,
1808 const signed char regmap[], int target, int adj, u_int reglist)
1809{
1810 int rs=get_reg(regmap,target);
1811 int rt=get_reg(regmap,target);
1812 if(rs<0) rs=get_reg(regmap,-1);
1813 assert(rs>=0);
1814 u_int is_dynamic;
1815 uintptr_t host_addr = 0;
1816 void *handler;
1817 int cc=get_reg(regmap,CCREG);
1818 if(pcsx_direct_read(type,addr,adj,cc,target?rs:-1,rt))
1819 return;
1820 handler = get_direct_memhandler(mem_rtab, addr, type, &host_addr);
1821 if (handler == NULL) {
1822 if(rt<0||dops[i].rt1==0)
1823 return;
1824 if(addr!=host_addr)
1825 emit_movimm_from(addr,rs,host_addr,rs);
1826 switch(type) {
1827 case LOADB_STUB: emit_movsbl_indexed(0,rs,rt); break;
1828 case LOADBU_STUB: emit_movzbl_indexed(0,rs,rt); break;
1829 case LOADH_STUB: emit_movswl_indexed(0,rs,rt); break;
1830 case LOADHU_STUB: emit_movzwl_indexed(0,rs,rt); break;
1831 case LOADW_STUB: emit_readword_indexed(0,rs,rt); break;
1832 default: assert(0);
1833 }
1834 return;
1835 }
1836 is_dynamic=pcsxmem_is_handler_dynamic(addr);
1837 if(is_dynamic) {
1838 if(type==LOADB_STUB||type==LOADBU_STUB)
1839 handler=jump_handler_read8;
1840 if(type==LOADH_STUB||type==LOADHU_STUB)
1841 handler=jump_handler_read16;
1842 if(type==LOADW_STUB)
1843 handler=jump_handler_read32;
1844 }
1845
1846 // call a memhandler
1847 if(rt>=0&&dops[i].rt1!=0)
1848 reglist&=~(1<<rt);
1849 save_regs(reglist);
1850 if(target==0)
1851 emit_movimm(addr,0);
1852 else if(rs!=0)
1853 emit_mov(rs,0);
1854 if(cc<0)
1855 emit_loadreg(CCREG,2);
1856 if(is_dynamic) {
1857 emit_movimm(((u_int *)mem_rtab)[addr>>12]<<1,1);
1858 emit_addimm(cc<0?2:cc,adj,2);
1859 }
1860 else {
1861 emit_readword(&last_count,3);
1862 emit_addimm(cc<0?2:cc,adj,2);
1863 emit_add(2,3,2);
1864 emit_writeword(2,&Count);
1865 }
1866
1867 emit_far_call(handler);
1868
1869 if(rt>=0&&dops[i].rt1!=0) {
1870 switch(type) {
1871 case LOADB_STUB: emit_signextend8(0,rt); break;
1872 case LOADBU_STUB: emit_andimm(0,0xff,rt); break;
1873 case LOADH_STUB: emit_signextend16(0,rt); break;
1874 case LOADHU_STUB: emit_andimm(0,0xffff,rt); break;
1875 case LOADW_STUB: if(rt!=0) emit_mov(0,rt); break;
1876 default: assert(0);
1877 }
1878 }
1879 restore_regs(reglist);
1880}
1881
1882static void do_writestub(int n)
1883{
1884 assem_debug("do_writestub %x\n",start+stubs[n].a*4);
1885 literal_pool(256);
1886 set_jump_target(stubs[n].addr, out);
1887 enum stub_type type=stubs[n].type;
1888 int i=stubs[n].a;
1889 int rs=stubs[n].b;
1890 const struct regstat *i_regs=(struct regstat *)stubs[n].c;
1891 u_int reglist=stubs[n].e;
1892 const signed char *i_regmap=i_regs->regmap;
1893 int rt,r;
1894 if(dops[i].itype==C1LS||dops[i].itype==C2LS) {
1895 rt=get_reg(i_regmap,r=FTEMP);
1896 }else{
1897 rt=get_reg(i_regmap,r=dops[i].rs2);
1898 }
1899 assert(rs>=0);
1900 assert(rt>=0);
1901 int rtmp,temp=-1,temp2=HOST_TEMPREG,regs_saved=0;
1902 void *restore_jump = NULL;
1903 int reglist2=reglist|(1<<rs)|(1<<rt);
1904 for(rtmp=0;rtmp<=12;rtmp++) {
1905 if(((1<<rtmp)&0x13ff)&&((1<<rtmp)&reglist2)==0) {
1906 temp=rtmp; break;
1907 }
1908 }
1909 if(temp==-1) {
1910 save_regs(reglist);
1911 regs_saved=1;
1912 for(rtmp=0;rtmp<=3;rtmp++)
1913 if(rtmp!=rs&&rtmp!=rt)
1914 {temp=rtmp;break;}
1915 }
1916 if((regs_saved||(reglist2&8)==0)&&temp!=3&&rs!=3&&rt!=3)
1917 temp2=3;
1918 emit_readword(&mem_wtab,temp);
1919 emit_shrimm(rs,12,temp2);
1920 emit_readword_dualindexedx4(temp,temp2,temp2);
1921 emit_lsls_imm(temp2,1,temp2);
1922 switch(type) {
1923 case STOREB_STUB: emit_strccb_dualindexed(temp2,rs,rt); break;
1924 case STOREH_STUB: emit_strcch_dualindexed(temp2,rs,rt); break;
1925 case STOREW_STUB: emit_strcc_dualindexed(temp2,rs,rt); break;
1926 default: assert(0);
1927 }
1928 if(regs_saved) {
1929 restore_jump=out;
1930 emit_jcc(0); // jump to reg restore
1931 }
1932 else
1933 emit_jcc(stubs[n].retaddr); // return address (invcode check)
1934
1935 if(!regs_saved)
1936 save_regs(reglist);
1937 void *handler=NULL;
1938 switch(type) {
1939 case STOREB_STUB: handler=jump_handler_write8; break;
1940 case STOREH_STUB: handler=jump_handler_write16; break;
1941 case STOREW_STUB: handler=jump_handler_write32; break;
1942 default: assert(0);
1943 }
1944 assert(handler);
1945 pass_args(rs,rt);
1946 if(temp2!=3)
1947 emit_mov(temp2,3);
1948 int cc=get_reg(i_regmap,CCREG);
1949 if(cc<0)
1950 emit_loadreg(CCREG,2);
1951 emit_addimm(cc<0?2:cc,(int)stubs[n].d,2);
1952 // returns new cycle_count
1953 emit_far_call(handler);
1954 emit_addimm(0,-(int)stubs[n].d,cc<0?2:cc);
1955 if(cc<0)
1956 emit_storereg(CCREG,2);
1957 if(restore_jump)
1958 set_jump_target(restore_jump, out);
1959 restore_regs(reglist);
1960 emit_jmp(stubs[n].retaddr);
1961}
1962
1963static void inline_writestub(enum stub_type type, int i, u_int addr,
1964 const signed char regmap[], int target, int adj, u_int reglist)
1965{
1966 int rs=get_reg(regmap,-1);
1967 int rt=get_reg(regmap,target);
1968 assert(rs>=0);
1969 assert(rt>=0);
1970 uintptr_t host_addr = 0;
1971 void *handler = get_direct_memhandler(mem_wtab, addr, type, &host_addr);
1972 if (handler == NULL) {
1973 if(addr!=host_addr)
1974 emit_movimm_from(addr,rs,host_addr,rs);
1975 switch(type) {
1976 case STOREB_STUB: emit_writebyte_indexed(rt,0,rs); break;
1977 case STOREH_STUB: emit_writehword_indexed(rt,0,rs); break;
1978 case STOREW_STUB: emit_writeword_indexed(rt,0,rs); break;
1979 default: assert(0);
1980 }
1981 return;
1982 }
1983
1984 // call a memhandler
1985 save_regs(reglist);
1986 pass_args(rs,rt);
1987 int cc=get_reg(regmap,CCREG);
1988 if(cc<0)
1989 emit_loadreg(CCREG,2);
1990 emit_addimm(cc<0?2:cc,adj,2);
1991 emit_movimm((u_int)handler,3);
1992 // returns new cycle_count
1993 emit_far_call(jump_handler_write_h);
1994 emit_addimm(0,-adj,cc<0?2:cc);
1995 if(cc<0)
1996 emit_storereg(CCREG,2);
1997 restore_regs(reglist);
1998}
1999
2000// this output is parsed by verify_dirty, get_bounds, isclean, get_clean_addr
2001static void do_dirty_stub_emit_args(u_int arg0, u_int source_len)
2002{
2003 #ifndef HAVE_ARMV7
2004 emit_loadlp((int)source, 1);
2005 emit_loadlp((int)copy, 2);
2006 emit_loadlp(source_len, 3);
2007 #else
2008 emit_movw(((u_int)source)&0x0000FFFF, 1);
2009 emit_movw(((u_int)copy)&0x0000FFFF, 2);
2010 emit_movt(((u_int)source)&0xFFFF0000, 1);
2011 emit_movt(((u_int)copy)&0xFFFF0000, 2);
2012 emit_movw(source_len, 3);
2013 #endif
2014 emit_movimm(arg0, 0);
2015}
2016
2017static void *do_dirty_stub(int i, u_int source_len)
2018{
2019 assem_debug("do_dirty_stub %x\n",start+i*4);
2020 do_dirty_stub_emit_args(start + i*4, source_len);
2021 emit_far_call(verify_code);
2022 void *entry = out;
2023 load_regs_entry(i);
2024 if (entry == out)
2025 entry = instr_addr[i];
2026 emit_jmp(instr_addr[i]);
2027 return entry;
2028}
2029
2030static void do_dirty_stub_ds(u_int source_len)
2031{
2032 do_dirty_stub_emit_args(start + 1, source_len);
2033 emit_far_call(verify_code_ds);
2034}
2035
2036/* Special assem */
2037
2038static void c2op_prologue(u_int op, int i, const struct regstat *i_regs, u_int reglist)
2039{
2040 save_regs_all(reglist);
2041 cop2_do_stall_check(op, i, i_regs, 0);
2042#ifdef PCNT
2043 emit_movimm(op, 0);
2044 emit_far_call(pcnt_gte_start);
2045#endif
2046 emit_addimm(FP, (u_char *)&psxRegs.CP2D.r[0] - (u_char *)&dynarec_local, 0); // cop2 regs
2047}
2048
2049static void c2op_epilogue(u_int op,u_int reglist)
2050{
2051#ifdef PCNT
2052 emit_movimm(op,0);
2053 emit_far_call(pcnt_gte_end);
2054#endif
2055 restore_regs_all(reglist);
2056}
2057
2058static void c2op_call_MACtoIR(int lm,int need_flags)
2059{
2060 if(need_flags)
2061 emit_far_call(lm?gteMACtoIR_lm1:gteMACtoIR_lm0);
2062 else
2063 emit_far_call(lm?gteMACtoIR_lm1_nf:gteMACtoIR_lm0_nf);
2064}
2065
2066static void c2op_call_rgb_func(void *func,int lm,int need_ir,int need_flags)
2067{
2068 emit_far_call(func);
2069 // func is C code and trashes r0
2070 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
2071 if(need_flags||need_ir)
2072 c2op_call_MACtoIR(lm,need_flags);
2073 emit_far_call(need_flags?gteMACtoRGB:gteMACtoRGB_nf);
2074}
2075
2076static void c2op_assemble(int i, const struct regstat *i_regs)
2077{
2078 u_int c2op = source[i] & 0x3f;
2079 u_int reglist_full = get_host_reglist(i_regs->regmap);
2080 u_int reglist = reglist_full & CALLER_SAVE_REGS;
2081 int need_flags, need_ir;
2082
2083 if (gte_handlers[c2op]!=NULL) {
2084 need_flags=!(gte_unneeded[i+1]>>63); // +1 because of how liveness detection works
2085 need_ir=(gte_unneeded[i+1]&0xe00)!=0xe00;
2086 assem_debug("gte op %08x, unneeded %016llx, need_flags %d, need_ir %d\n",
2087 source[i],gte_unneeded[i+1],need_flags,need_ir);
2088 if(HACK_ENABLED(NDHACK_GTE_NO_FLAGS))
2089 need_flags=0;
2090 int shift = (source[i] >> 19) & 1;
2091 int lm = (source[i] >> 10) & 1;
2092 switch(c2op) {
2093#ifndef DRC_DBG
2094 case GTE_MVMVA: {
2095#ifdef HAVE_ARMV5
2096 int v = (source[i] >> 15) & 3;
2097 int cv = (source[i] >> 13) & 3;
2098 int mx = (source[i] >> 17) & 3;
2099 reglist=reglist_full&(CALLER_SAVE_REGS|0xf0); // +{r4-r7}
2100 c2op_prologue(c2op,i,i_regs,reglist);
2101 /* r4,r5 = VXYZ(v) packed; r6 = &MX11(mx); r7 = &CV1(cv) */
2102 if(v<3)
2103 emit_ldrd(v*8,0,4);
2104 else {
2105 emit_movzwl_indexed(9*4,0,4); // gteIR
2106 emit_movzwl_indexed(10*4,0,6);
2107 emit_movzwl_indexed(11*4,0,5);
2108 emit_orrshl_imm(6,16,4);
2109 }
2110 if(mx<3)
2111 emit_addimm(0,32*4+mx*8*4,6);
2112 else
2113 emit_readword(&zeromem_ptr,6);
2114 if(cv<3)
2115 emit_addimm(0,32*4+(cv*8+5)*4,7);
2116 else
2117 emit_readword(&zeromem_ptr,7);
2118#ifdef __ARM_NEON__
2119 emit_movimm(source[i],1); // opcode
2120 emit_far_call(gteMVMVA_part_neon);
2121 if(need_flags) {
2122 emit_movimm(lm,1);
2123 emit_far_call(gteMACtoIR_flags_neon);
2124 }
2125#else
2126 if(cv==3&&shift)
2127 emit_far_call(gteMVMVA_part_cv3sh12_arm);
2128 else {
2129 emit_movimm(shift,1);
2130 emit_far_call(need_flags?gteMVMVA_part_arm:gteMVMVA_part_nf_arm);
2131 }
2132 if(need_flags||need_ir)
2133 c2op_call_MACtoIR(lm,need_flags);
2134#endif
2135#else /* if not HAVE_ARMV5 */
2136 c2op_prologue(c2op,i,i_regs,reglist);
2137 emit_movimm(source[i],1); // opcode
2138 emit_writeword(1,&psxRegs.code);
2139 emit_far_call(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]);
2140#endif
2141 break;
2142 }
2143 case GTE_OP:
2144 c2op_prologue(c2op,i,i_regs,reglist);
2145 emit_far_call(shift?gteOP_part_shift:gteOP_part_noshift);
2146 if(need_flags||need_ir) {
2147 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
2148 c2op_call_MACtoIR(lm,need_flags);
2149 }
2150 break;
2151 case GTE_DPCS:
2152 c2op_prologue(c2op,i,i_regs,reglist);
2153 c2op_call_rgb_func(shift?gteDPCS_part_shift:gteDPCS_part_noshift,lm,need_ir,need_flags);
2154 break;
2155 case GTE_INTPL:
2156 c2op_prologue(c2op,i,i_regs,reglist);
2157 c2op_call_rgb_func(shift?gteINTPL_part_shift:gteINTPL_part_noshift,lm,need_ir,need_flags);
2158 break;
2159 case GTE_SQR:
2160 c2op_prologue(c2op,i,i_regs,reglist);
2161 emit_far_call(shift?gteSQR_part_shift:gteSQR_part_noshift);
2162 if(need_flags||need_ir) {
2163 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
2164 c2op_call_MACtoIR(lm,need_flags);
2165 }
2166 break;
2167 case GTE_DCPL:
2168 c2op_prologue(c2op,i,i_regs,reglist);
2169 c2op_call_rgb_func(gteDCPL_part,lm,need_ir,need_flags);
2170 break;
2171 case GTE_GPF:
2172 c2op_prologue(c2op,i,i_regs,reglist);
2173 c2op_call_rgb_func(shift?gteGPF_part_shift:gteGPF_part_noshift,lm,need_ir,need_flags);
2174 break;
2175 case GTE_GPL:
2176 c2op_prologue(c2op,i,i_regs,reglist);
2177 c2op_call_rgb_func(shift?gteGPL_part_shift:gteGPL_part_noshift,lm,need_ir,need_flags);
2178 break;
2179#endif
2180 default:
2181 c2op_prologue(c2op,i,i_regs,reglist);
2182#ifdef DRC_DBG
2183 emit_movimm(source[i],1); // opcode
2184 emit_writeword(1,&psxRegs.code);
2185#endif
2186 emit_far_call(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]);
2187 break;
2188 }
2189 c2op_epilogue(c2op,reglist);
2190 }
2191}
2192
2193static void c2op_ctc2_31_assemble(signed char sl, signed char temp)
2194{
2195 //value = value & 0x7ffff000;
2196 //if (value & 0x7f87e000) value |= 0x80000000;
2197 emit_shrimm(sl,12,temp);
2198 emit_shlimm(temp,12,temp);
2199 emit_testimm(temp,0x7f000000);
2200 emit_testeqimm(temp,0x00870000);
2201 emit_testeqimm(temp,0x0000e000);
2202 emit_orrne_imm(temp,0x80000000,temp);
2203}
2204
2205static void do_mfc2_31_one(u_int copr,signed char temp)
2206{
2207 emit_readword(&reg_cop2d[copr],temp);
2208 emit_lsls_imm(temp,16,temp);
2209 emit_cmovs_imm(0,temp);
2210 emit_cmpimm(temp,0xf80<<16);
2211 emit_andimm(temp,0xf80<<16,temp);
2212 emit_cmovae_imm(0xf80<<16,temp);
2213}
2214
2215static void c2op_mfc2_29_assemble(signed char tl, signed char temp)
2216{
2217 if (temp < 0) {
2218 host_tempreg_acquire();
2219 temp = HOST_TEMPREG;
2220 }
2221 do_mfc2_31_one(9,temp);
2222 emit_shrimm(temp,7+16,tl);
2223 do_mfc2_31_one(10,temp);
2224 emit_orrshr_imm(temp,2+16,tl);
2225 do_mfc2_31_one(11,temp);
2226 emit_orrshr_imm(temp,-3+16,tl);
2227 emit_writeword(tl,&reg_cop2d[29]);
2228 if (temp == HOST_TEMPREG)
2229 host_tempreg_release();
2230}
2231
2232static void multdiv_assemble_arm(int i, const struct regstat *i_regs)
2233{
2234 // case 0x18: MULT
2235 // case 0x19: MULTU
2236 // case 0x1A: DIV
2237 // case 0x1B: DIVU
2238 // case 0x1C: DMULT
2239 // case 0x1D: DMULTU
2240 // case 0x1E: DDIV
2241 // case 0x1F: DDIVU
2242 if(dops[i].rs1&&dops[i].rs2)
2243 {
2244 if((dops[i].opcode2&4)==0) // 32-bit
2245 {
2246 if(dops[i].opcode2==0x18) // MULT
2247 {
2248 signed char m1=get_reg(i_regs->regmap,dops[i].rs1);
2249 signed char m2=get_reg(i_regs->regmap,dops[i].rs2);
2250 signed char hi=get_reg(i_regs->regmap,HIREG);
2251 signed char lo=get_reg(i_regs->regmap,LOREG);
2252 assert(m1>=0);
2253 assert(m2>=0);
2254 assert(hi>=0);
2255 assert(lo>=0);
2256 emit_smull(m1,m2,hi,lo);
2257 }
2258 if(dops[i].opcode2==0x19) // MULTU
2259 {
2260 signed char m1=get_reg(i_regs->regmap,dops[i].rs1);
2261 signed char m2=get_reg(i_regs->regmap,dops[i].rs2);
2262 signed char hi=get_reg(i_regs->regmap,HIREG);
2263 signed char lo=get_reg(i_regs->regmap,LOREG);
2264 assert(m1>=0);
2265 assert(m2>=0);
2266 assert(hi>=0);
2267 assert(lo>=0);
2268 emit_umull(m1,m2,hi,lo);
2269 }
2270 if(dops[i].opcode2==0x1A) // DIV
2271 {
2272 signed char d1=get_reg(i_regs->regmap,dops[i].rs1);
2273 signed char d2=get_reg(i_regs->regmap,dops[i].rs2);
2274 assert(d1>=0);
2275 assert(d2>=0);
2276 signed char quotient=get_reg(i_regs->regmap,LOREG);
2277 signed char remainder=get_reg(i_regs->regmap,HIREG);
2278 assert(quotient>=0);
2279 assert(remainder>=0);
2280 emit_movs(d1,remainder);
2281 emit_movimm(0xffffffff,quotient);
2282 emit_negmi(quotient,quotient); // .. quotient and ..
2283 emit_negmi(remainder,remainder); // .. remainder for div0 case (will be negated back after jump)
2284 emit_movs(d2,HOST_TEMPREG);
2285 emit_jeq(out+52); // Division by zero
2286 emit_negsmi(HOST_TEMPREG,HOST_TEMPREG);
2287#ifdef HAVE_ARMV5
2288 emit_clz(HOST_TEMPREG,quotient);
2289 emit_shl(HOST_TEMPREG,quotient,HOST_TEMPREG);
2290#else
2291 emit_movimm(0,quotient);
2292 emit_addpl_imm(quotient,1,quotient);
2293 emit_lslpls_imm(HOST_TEMPREG,1,HOST_TEMPREG);
2294 emit_jns(out-2*4);
2295#endif
2296 emit_orimm(quotient,1<<31,quotient);
2297 emit_shr(quotient,quotient,quotient);
2298 emit_cmp(remainder,HOST_TEMPREG);
2299 emit_subcs(remainder,HOST_TEMPREG,remainder);
2300 emit_adcs(quotient,quotient,quotient);
2301 emit_shrimm(HOST_TEMPREG,1,HOST_TEMPREG);
2302 emit_jcc(out-16); // -4
2303 emit_teq(d1,d2);
2304 emit_negmi(quotient,quotient);
2305 emit_test(d1,d1);
2306 emit_negmi(remainder,remainder);
2307 }
2308 if(dops[i].opcode2==0x1B) // DIVU
2309 {
2310 signed char d1=get_reg(i_regs->regmap,dops[i].rs1); // dividend
2311 signed char d2=get_reg(i_regs->regmap,dops[i].rs2); // divisor
2312 assert(d1>=0);
2313 assert(d2>=0);
2314 signed char quotient=get_reg(i_regs->regmap,LOREG);
2315 signed char remainder=get_reg(i_regs->regmap,HIREG);
2316 assert(quotient>=0);
2317 assert(remainder>=0);
2318 emit_mov(d1,remainder);
2319 emit_movimm(0xffffffff,quotient); // div0 case
2320 emit_test(d2,d2);
2321 emit_jeq(out+40); // Division by zero
2322#ifdef HAVE_ARMV5
2323 emit_clz(d2,HOST_TEMPREG);
2324 emit_movimm(1<<31,quotient);
2325 emit_shl(d2,HOST_TEMPREG,d2);
2326#else
2327 emit_movimm(0,HOST_TEMPREG);
2328 emit_addpl_imm(HOST_TEMPREG,1,HOST_TEMPREG);
2329 emit_lslpls_imm(d2,1,d2);
2330 emit_jns(out-2*4);
2331 emit_movimm(1<<31,quotient);
2332#endif
2333 emit_shr(quotient,HOST_TEMPREG,quotient);
2334 emit_cmp(remainder,d2);
2335 emit_subcs(remainder,d2,remainder);
2336 emit_adcs(quotient,quotient,quotient);
2337 emit_shrcc_imm(d2,1,d2);
2338 emit_jcc(out-16); // -4
2339 }
2340 }
2341 else // 64-bit
2342 assert(0);
2343 }
2344 else
2345 {
2346 // Multiply by zero is zero.
2347 // MIPS does not have a divide by zero exception.
2348 // The result is undefined, we return zero.
2349 signed char hr=get_reg(i_regs->regmap,HIREG);
2350 signed char lr=get_reg(i_regs->regmap,LOREG);
2351 if(hr>=0) emit_zeroreg(hr);
2352 if(lr>=0) emit_zeroreg(lr);
2353 }
2354}
2355#define multdiv_assemble multdiv_assemble_arm
2356
2357static void do_jump_vaddr(int rs)
2358{
2359 emit_far_jump(jump_vaddr_reg[rs]);
2360}
2361
2362static void do_preload_rhash(int r) {
2363 // Don't need this for ARM. On x86, this puts the value 0xf8 into the
2364 // register. On ARM the hash can be done with a single instruction (below)
2365}
2366
2367static void do_preload_rhtbl(int ht) {
2368 emit_addimm(FP,(int)&mini_ht-(int)&dynarec_local,ht);
2369}
2370
2371static void do_rhash(int rs,int rh) {
2372 emit_andimm(rs,0xf8,rh);
2373}
2374
2375static void do_miniht_load(int ht,int rh) {
2376 assem_debug("ldr %s,[%s,%s]!\n",regname[rh],regname[ht],regname[rh]);
2377 output_w32(0xe7b00000|rd_rn_rm(rh,ht,rh));
2378}
2379
2380static void do_miniht_jump(int rs,int rh,int ht) {
2381 emit_cmp(rh,rs);
2382 emit_ldreq_indexed(ht,4,15);
2383 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
2384 if(rs!=7)
2385 emit_mov(rs,7);
2386 rs=7;
2387 #endif
2388 do_jump_vaddr(rs);
2389}
2390
2391static void do_miniht_insert(u_int return_address,int rt,int temp) {
2392 #ifndef HAVE_ARMV7
2393 emit_movimm(return_address,rt); // PC into link register
2394 add_to_linker(out,return_address,1);
2395 emit_pcreladdr(temp);
2396 emit_writeword(rt,&mini_ht[(return_address&0xFF)>>3][0]);
2397 emit_writeword(temp,&mini_ht[(return_address&0xFF)>>3][1]);
2398 #else
2399 emit_movw(return_address&0x0000FFFF,rt);
2400 add_to_linker(out,return_address,1);
2401 emit_pcreladdr(temp);
2402 emit_writeword(temp,&mini_ht[(return_address&0xFF)>>3][1]);
2403 emit_movt(return_address&0xFFFF0000,rt);
2404 emit_writeword(rt,&mini_ht[(return_address&0xFF)>>3][0]);
2405 #endif
2406}
2407
2408// CPU-architecture-specific initialization
2409static void arch_init(void)
2410{
2411 uintptr_t diff = (u_char *)&ndrc->tramp.f - (u_char *)&ndrc->tramp.ops - 8;
2412 struct tramp_insns *ops = ndrc->tramp.ops;
2413 size_t i;
2414 assert(!(diff & 3));
2415 assert(diff < 0x1000);
2416 start_tcache_write(ops, (u_char *)ops + sizeof(ndrc->tramp.ops));
2417 for (i = 0; i < ARRAY_SIZE(ndrc->tramp.ops); i++)
2418 ops[i].ldrpc = 0xe5900000 | rd_rn_rm(15,15,0) | diff; // ldr pc, [=val]
2419 end_tcache_write(ops, (u_char *)ops + sizeof(ndrc->tramp.ops));
2420}
2421
2422// vim:shiftwidth=2:expandtab