drc: fix some mistake done during arm64 porting
[pcsx_rearmed.git] / libpcsxcore / new_dynarec / assem_arm.c
... / ...
CommitLineData
1/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2 * Mupen64plus/PCSX - assem_arm.c *
3 * Copyright (C) 2009-2011 Ari64 *
4 * Copyright (C) 2010-2021 GraÅžvydas "notaz" Ignotas *
5 * *
6 * This program is free software; you can redistribute it and/or modify *
7 * it under the terms of the GNU General Public License as published by *
8 * the Free Software Foundation; either version 2 of the License, or *
9 * (at your option) any later version. *
10 * *
11 * This program is distributed in the hope that it will be useful, *
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
14 * GNU General Public License for more details. *
15 * *
16 * You should have received a copy of the GNU General Public License *
17 * along with this program; if not, write to the *
18 * Free Software Foundation, Inc., *
19 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
20 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
21
22#define FLAGLESS
23#include "../gte.h"
24#undef FLAGLESS
25#include "../gte_arm.h"
26#include "../gte_neon.h"
27#include "pcnt.h"
28#include "arm_features.h"
29
30#define unused __attribute__((unused))
31
32#ifdef DRC_DBG
33#pragma GCC diagnostic ignored "-Wunused-function"
34#pragma GCC diagnostic ignored "-Wunused-variable"
35#pragma GCC diagnostic ignored "-Wunused-but-set-variable"
36#endif
37
38void indirect_jump_indexed();
39void indirect_jump();
40void do_interrupt();
41void jump_vaddr_r0();
42void jump_vaddr_r1();
43void jump_vaddr_r2();
44void jump_vaddr_r3();
45void jump_vaddr_r4();
46void jump_vaddr_r5();
47void jump_vaddr_r6();
48void jump_vaddr_r7();
49void jump_vaddr_r8();
50void jump_vaddr_r9();
51void jump_vaddr_r10();
52void jump_vaddr_r12();
53
54void * const jump_vaddr_reg[16] = {
55 jump_vaddr_r0,
56 jump_vaddr_r1,
57 jump_vaddr_r2,
58 jump_vaddr_r3,
59 jump_vaddr_r4,
60 jump_vaddr_r5,
61 jump_vaddr_r6,
62 jump_vaddr_r7,
63 jump_vaddr_r8,
64 jump_vaddr_r9,
65 jump_vaddr_r10,
66 0,
67 jump_vaddr_r12,
68 0,
69 0,
70 0
71};
72
73void invalidate_addr_r0();
74void invalidate_addr_r1();
75void invalidate_addr_r2();
76void invalidate_addr_r3();
77void invalidate_addr_r4();
78void invalidate_addr_r5();
79void invalidate_addr_r6();
80void invalidate_addr_r7();
81void invalidate_addr_r8();
82void invalidate_addr_r9();
83void invalidate_addr_r10();
84void invalidate_addr_r12();
85
86const u_int invalidate_addr_reg[16] = {
87 (int)invalidate_addr_r0,
88 (int)invalidate_addr_r1,
89 (int)invalidate_addr_r2,
90 (int)invalidate_addr_r3,
91 (int)invalidate_addr_r4,
92 (int)invalidate_addr_r5,
93 (int)invalidate_addr_r6,
94 (int)invalidate_addr_r7,
95 (int)invalidate_addr_r8,
96 (int)invalidate_addr_r9,
97 (int)invalidate_addr_r10,
98 0,
99 (int)invalidate_addr_r12,
100 0,
101 0,
102 0};
103
104static u_int needs_clear_cache[1<<(TARGET_SIZE_2-17)];
105
106/* Linker */
107
108static void set_jump_target(void *addr, void *target_)
109{
110 u_int target = (u_int)target_;
111 u_char *ptr = addr;
112 u_int *ptr2=(u_int *)ptr;
113 if(ptr[3]==0xe2) {
114 assert((target-(u_int)ptr2-8)<1024);
115 assert(((uintptr_t)addr&3)==0);
116 assert((target&3)==0);
117 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
118 //printf("target=%x addr=%p insn=%x\n",target,addr,*ptr2);
119 }
120 else if(ptr[3]==0x72) {
121 // generated by emit_jno_unlikely
122 if((target-(u_int)ptr2-8)<1024) {
123 assert(((uintptr_t)addr&3)==0);
124 assert((target&3)==0);
125 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
126 }
127 else if((target-(u_int)ptr2-8)<4096&&!((target-(u_int)ptr2-8)&15)) {
128 assert(((uintptr_t)addr&3)==0);
129 assert((target&3)==0);
130 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>4)|0xE00;
131 }
132 else *ptr2=(0x7A000000)|(((target-(u_int)ptr2-8)<<6)>>8);
133 }
134 else {
135 assert((ptr[3]&0x0e)==0xa);
136 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
137 }
138}
139
140// This optionally copies the instruction from the target of the branch into
141// the space before the branch. Works, but the difference in speed is
142// usually insignificant.
143#if 0
144static void set_jump_target_fillslot(int addr,u_int target,int copy)
145{
146 u_char *ptr=(u_char *)addr;
147 u_int *ptr2=(u_int *)ptr;
148 assert(!copy||ptr2[-1]==0xe28dd000);
149 if(ptr[3]==0xe2) {
150 assert(!copy);
151 assert((target-(u_int)ptr2-8)<4096);
152 *ptr2=(*ptr2&0xFFFFF000)|(target-(u_int)ptr2-8);
153 }
154 else {
155 assert((ptr[3]&0x0e)==0xa);
156 u_int target_insn=*(u_int *)target;
157 if((target_insn&0x0e100000)==0) { // ALU, no immediate, no flags
158 copy=0;
159 }
160 if((target_insn&0x0c100000)==0x04100000) { // Load
161 copy=0;
162 }
163 if(target_insn&0x08000000) {
164 copy=0;
165 }
166 if(copy) {
167 ptr2[-1]=target_insn;
168 target+=4;
169 }
170 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
171 }
172}
173#endif
174
175/* Literal pool */
176static void add_literal(int addr,int val)
177{
178 assert(literalcount<sizeof(literals)/sizeof(literals[0]));
179 literals[literalcount][0]=addr;
180 literals[literalcount][1]=val;
181 literalcount++;
182}
183
184// from a pointer to external jump stub (which was produced by emit_extjump2)
185// find where the jumping insn is
186static void *find_extjump_insn(void *stub)
187{
188 int *ptr=(int *)(stub+4);
189 assert((*ptr&0x0fff0000)==0x059f0000); // ldr rx, [pc, #ofs]
190 u_int offset=*ptr&0xfff;
191 void **l_ptr=(void *)ptr+offset+8;
192 return *l_ptr;
193}
194
195// find where external branch is liked to using addr of it's stub:
196// get address that insn one after stub loads (dyna_linker arg1),
197// treat it as a pointer to branch insn,
198// return addr where that branch jumps to
199static void *get_pointer(void *stub)
200{
201 //printf("get_pointer(%x)\n",(int)stub);
202 int *i_ptr=find_extjump_insn(stub);
203 assert((*i_ptr&0x0f000000)==0x0a000000); // b
204 return (u_char *)i_ptr+((*i_ptr<<8)>>6)+8;
205}
206
207// Find the "clean" entry point from a "dirty" entry point
208// by skipping past the call to verify_code
209static void *get_clean_addr(void *addr)
210{
211 signed int *ptr = addr;
212 #ifndef HAVE_ARMV7
213 ptr+=4;
214 #else
215 ptr+=6;
216 #endif
217 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
218 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
219 ptr++;
220 if((*ptr&0xFF000000)==0xea000000) {
221 return (char *)ptr+((*ptr<<8)>>6)+8; // follow jump
222 }
223 return ptr;
224}
225
226static int verify_dirty(const u_int *ptr)
227{
228 #ifndef HAVE_ARMV7
229 u_int offset;
230 // get from literal pool
231 assert((*ptr&0xFFFF0000)==0xe59f0000);
232 offset=*ptr&0xfff;
233 u_int source=*(u_int*)((void *)ptr+offset+8);
234 ptr++;
235 assert((*ptr&0xFFFF0000)==0xe59f0000);
236 offset=*ptr&0xfff;
237 u_int copy=*(u_int*)((void *)ptr+offset+8);
238 ptr++;
239 assert((*ptr&0xFFFF0000)==0xe59f0000);
240 offset=*ptr&0xfff;
241 u_int len=*(u_int*)((void *)ptr+offset+8);
242 ptr++;
243 ptr++;
244 #else
245 // ARMv7 movw/movt
246 assert((*ptr&0xFFF00000)==0xe3000000);
247 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
248 u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
249 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
250 ptr+=6;
251 #endif
252 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
253 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
254 //printf("verify_dirty: %x %x %x\n",source,copy,len);
255 return !memcmp((void *)source,(void *)copy,len);
256}
257
258// This doesn't necessarily find all clean entry points, just
259// guarantees that it's not dirty
260static int isclean(void *addr)
261{
262 #ifndef HAVE_ARMV7
263 u_int *ptr=((u_int *)addr)+4;
264 #else
265 u_int *ptr=((u_int *)addr)+6;
266 #endif
267 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
268 if((*ptr&0xFF000000)!=0xeb000000) return 1; // bl instruction
269 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code) return 0;
270 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_ds) return 0;
271 return 1;
272}
273
274// get source that block at addr was compiled from (host pointers)
275static void get_bounds(void *addr, u_char **start, u_char **end)
276{
277 u_int *ptr = addr;
278 #ifndef HAVE_ARMV7
279 u_int offset;
280 // get from literal pool
281 assert((*ptr&0xFFFF0000)==0xe59f0000);
282 offset=*ptr&0xfff;
283 u_int source=*(u_int*)((void *)ptr+offset+8);
284 ptr++;
285 //assert((*ptr&0xFFFF0000)==0xe59f0000);
286 //offset=*ptr&0xfff;
287 //u_int copy=*(u_int*)((void *)ptr+offset+8);
288 ptr++;
289 assert((*ptr&0xFFFF0000)==0xe59f0000);
290 offset=*ptr&0xfff;
291 u_int len=*(u_int*)((void *)ptr+offset+8);
292 ptr++;
293 ptr++;
294 #else
295 // ARMv7 movw/movt
296 assert((*ptr&0xFFF00000)==0xe3000000);
297 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
298 //u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
299 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
300 ptr+=6;
301 #endif
302 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
303 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
304 *start=(u_char *)source;
305 *end=(u_char *)source+len;
306}
307
308// Allocate a specific ARM register.
309static void alloc_arm_reg(struct regstat *cur,int i,signed char reg,int hr)
310{
311 int n;
312 int dirty=0;
313
314 // see if it's already allocated (and dealloc it)
315 for(n=0;n<HOST_REGS;n++)
316 {
317 if(n!=EXCLUDE_REG&&cur->regmap[n]==reg) {
318 dirty=(cur->dirty>>n)&1;
319 cur->regmap[n]=-1;
320 }
321 }
322
323 cur->regmap[hr]=reg;
324 cur->dirty&=~(1<<hr);
325 cur->dirty|=dirty<<hr;
326 cur->isconst&=~(1<<hr);
327}
328
329// Alloc cycle count into dedicated register
330static void alloc_cc(struct regstat *cur,int i)
331{
332 alloc_arm_reg(cur,i,CCREG,HOST_CCREG);
333}
334
335/* Assembler */
336
337static unused char regname[16][4] = {
338 "r0",
339 "r1",
340 "r2",
341 "r3",
342 "r4",
343 "r5",
344 "r6",
345 "r7",
346 "r8",
347 "r9",
348 "r10",
349 "fp",
350 "r12",
351 "sp",
352 "lr",
353 "pc"};
354
355static void output_w32(u_int word)
356{
357 *((u_int *)out)=word;
358 out+=4;
359}
360
361static u_int rd_rn_rm(u_int rd, u_int rn, u_int rm)
362{
363 assert(rd<16);
364 assert(rn<16);
365 assert(rm<16);
366 return((rn<<16)|(rd<<12)|rm);
367}
368
369static u_int rd_rn_imm_shift(u_int rd, u_int rn, u_int imm, u_int shift)
370{
371 assert(rd<16);
372 assert(rn<16);
373 assert(imm<256);
374 assert((shift&1)==0);
375 return((rn<<16)|(rd<<12)|(((32-shift)&30)<<7)|imm);
376}
377
378static u_int genimm(u_int imm,u_int *encoded)
379{
380 *encoded=0;
381 if(imm==0) return 1;
382 int i=32;
383 while(i>0)
384 {
385 if(imm<256) {
386 *encoded=((i&30)<<7)|imm;
387 return 1;
388 }
389 imm=(imm>>2)|(imm<<30);i-=2;
390 }
391 return 0;
392}
393
394static void genimm_checked(u_int imm,u_int *encoded)
395{
396 u_int ret=genimm(imm,encoded);
397 assert(ret);
398 (void)ret;
399}
400
401static u_int genjmp(u_int addr)
402{
403 if (addr < 3) return 0; // a branch that will be patched later
404 int offset = addr-(int)out-8;
405 if (offset < -33554432 || offset >= 33554432) {
406 SysPrintf("genjmp: out of range: %08x\n", offset);
407 abort();
408 return 0;
409 }
410 return ((u_int)offset>>2)&0xffffff;
411}
412
413static unused void emit_breakpoint(void)
414{
415 assem_debug("bkpt #0\n");
416 //output_w32(0xe1200070);
417 output_w32(0xe7f001f0);
418}
419
420static void emit_mov(int rs,int rt)
421{
422 assem_debug("mov %s,%s\n",regname[rt],regname[rs]);
423 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs));
424}
425
426static void emit_movs(int rs,int rt)
427{
428 assem_debug("movs %s,%s\n",regname[rt],regname[rs]);
429 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs));
430}
431
432static void emit_add(int rs1,int rs2,int rt)
433{
434 assem_debug("add %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
435 output_w32(0xe0800000|rd_rn_rm(rt,rs1,rs2));
436}
437
438static void emit_adds(int rs1,int rs2,int rt)
439{
440 assem_debug("adds %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
441 output_w32(0xe0900000|rd_rn_rm(rt,rs1,rs2));
442}
443#define emit_adds_ptr emit_adds
444
445static void emit_adcs(int rs1,int rs2,int rt)
446{
447 assem_debug("adcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
448 output_w32(0xe0b00000|rd_rn_rm(rt,rs1,rs2));
449}
450
451static void emit_neg(int rs, int rt)
452{
453 assem_debug("rsb %s,%s,#0\n",regname[rt],regname[rs]);
454 output_w32(0xe2600000|rd_rn_rm(rt,rs,0));
455}
456
457static void emit_sub(int rs1,int rs2,int rt)
458{
459 assem_debug("sub %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
460 output_w32(0xe0400000|rd_rn_rm(rt,rs1,rs2));
461}
462
463static void emit_zeroreg(int rt)
464{
465 assem_debug("mov %s,#0\n",regname[rt]);
466 output_w32(0xe3a00000|rd_rn_rm(rt,0,0));
467}
468
469static void emit_loadlp(u_int imm,u_int rt)
470{
471 add_literal((int)out,imm);
472 assem_debug("ldr %s,pc+? [=%x]\n",regname[rt],imm);
473 output_w32(0xe5900000|rd_rn_rm(rt,15,0));
474}
475
476static void emit_movw(u_int imm,u_int rt)
477{
478 assert(imm<65536);
479 assem_debug("movw %s,#%d (0x%x)\n",regname[rt],imm,imm);
480 output_w32(0xe3000000|rd_rn_rm(rt,0,0)|(imm&0xfff)|((imm<<4)&0xf0000));
481}
482
483static void emit_movt(u_int imm,u_int rt)
484{
485 assem_debug("movt %s,#%d (0x%x)\n",regname[rt],imm&0xffff0000,imm&0xffff0000);
486 output_w32(0xe3400000|rd_rn_rm(rt,0,0)|((imm>>16)&0xfff)|((imm>>12)&0xf0000));
487}
488
489static void emit_movimm(u_int imm,u_int rt)
490{
491 u_int armval;
492 if(genimm(imm,&armval)) {
493 assem_debug("mov %s,#%d\n",regname[rt],imm);
494 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
495 }else if(genimm(~imm,&armval)) {
496 assem_debug("mvn %s,#%d\n",regname[rt],imm);
497 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
498 }else if(imm<65536) {
499 #ifndef HAVE_ARMV7
500 assem_debug("mov %s,#%d\n",regname[rt],imm&0xFF00);
501 output_w32(0xe3a00000|rd_rn_imm_shift(rt,0,imm>>8,8));
502 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
503 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
504 #else
505 emit_movw(imm,rt);
506 #endif
507 }else{
508 #ifndef HAVE_ARMV7
509 emit_loadlp(imm,rt);
510 #else
511 emit_movw(imm&0x0000FFFF,rt);
512 emit_movt(imm&0xFFFF0000,rt);
513 #endif
514 }
515}
516
517static void emit_pcreladdr(u_int rt)
518{
519 assem_debug("add %s,pc,#?\n",regname[rt]);
520 output_w32(0xe2800000|rd_rn_rm(rt,15,0));
521}
522
523static void emit_loadreg(int r, int hr)
524{
525 if(r&64) {
526 SysPrintf("64bit load in 32bit mode!\n");
527 assert(0);
528 return;
529 }
530 if((r&63)==0)
531 emit_zeroreg(hr);
532 else {
533 int addr = (int)&psxRegs.GPR.r[r];
534 switch (r) {
535 //case HIREG: addr = &hi; break;
536 //case LOREG: addr = &lo; break;
537 case CCREG: addr = (int)&cycle_count; break;
538 case CSREG: addr = (int)&Status; break;
539 case INVCP: addr = (int)&invc_ptr; break;
540 case ROREG: addr = (int)&ram_offset; break;
541 default: assert(r < 34); break;
542 }
543 u_int offset = addr-(u_int)&dynarec_local;
544 assert(offset<4096);
545 assem_debug("ldr %s,fp+%d\n",regname[hr],offset);
546 output_w32(0xe5900000|rd_rn_rm(hr,FP,0)|offset);
547 }
548}
549
550static void emit_storereg(int r, int hr)
551{
552 if(r&64) {
553 SysPrintf("64bit store in 32bit mode!\n");
554 assert(0);
555 return;
556 }
557 int addr = (int)&psxRegs.GPR.r[r];
558 switch (r) {
559 //case HIREG: addr = &hi; break;
560 //case LOREG: addr = &lo; break;
561 case CCREG: addr = (int)&cycle_count; break;
562 default: assert(r < 34); break;
563 }
564 u_int offset = addr-(u_int)&dynarec_local;
565 assert(offset<4096);
566 assem_debug("str %s,fp+%d\n",regname[hr],offset);
567 output_w32(0xe5800000|rd_rn_rm(hr,FP,0)|offset);
568}
569
570static void emit_test(int rs, int rt)
571{
572 assem_debug("tst %s,%s\n",regname[rs],regname[rt]);
573 output_w32(0xe1100000|rd_rn_rm(0,rs,rt));
574}
575
576static void emit_testimm(int rs,int imm)
577{
578 u_int armval;
579 assem_debug("tst %s,#%d\n",regname[rs],imm);
580 genimm_checked(imm,&armval);
581 output_w32(0xe3100000|rd_rn_rm(0,rs,0)|armval);
582}
583
584static void emit_testeqimm(int rs,int imm)
585{
586 u_int armval;
587 assem_debug("tsteq %s,$%d\n",regname[rs],imm);
588 genimm_checked(imm,&armval);
589 output_w32(0x03100000|rd_rn_rm(0,rs,0)|armval);
590}
591
592static void emit_not(int rs,int rt)
593{
594 assem_debug("mvn %s,%s\n",regname[rt],regname[rs]);
595 output_w32(0xe1e00000|rd_rn_rm(rt,0,rs));
596}
597
598static void emit_and(u_int rs1,u_int rs2,u_int rt)
599{
600 assem_debug("and %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
601 output_w32(0xe0000000|rd_rn_rm(rt,rs1,rs2));
602}
603
604static void emit_or(u_int rs1,u_int rs2,u_int rt)
605{
606 assem_debug("orr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
607 output_w32(0xe1800000|rd_rn_rm(rt,rs1,rs2));
608}
609
610static void emit_orrshl_imm(u_int rs,u_int imm,u_int rt)
611{
612 assert(rs<16);
613 assert(rt<16);
614 assert(imm<32);
615 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs],imm);
616 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|(imm<<7));
617}
618
619static void emit_orrshr_imm(u_int rs,u_int imm,u_int rt)
620{
621 assert(rs<16);
622 assert(rt<16);
623 assert(imm<32);
624 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs],imm);
625 output_w32(0xe1800020|rd_rn_rm(rt,rt,rs)|(imm<<7));
626}
627
628static void emit_xor(u_int rs1,u_int rs2,u_int rt)
629{
630 assem_debug("eor %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
631 output_w32(0xe0200000|rd_rn_rm(rt,rs1,rs2));
632}
633
634static void emit_xorsar_imm(u_int rs1,u_int rs2,u_int imm,u_int rt)
635{
636 assem_debug("eor %s,%s,%s,asr #%d\n",regname[rt],regname[rs1],regname[rs2],imm);
637 output_w32(0xe0200040|rd_rn_rm(rt,rs1,rs2)|(imm<<7));
638}
639
640static void emit_addimm(u_int rs,int imm,u_int rt)
641{
642 assert(rs<16);
643 assert(rt<16);
644 if(imm!=0) {
645 u_int armval;
646 if(genimm(imm,&armval)) {
647 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],imm);
648 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
649 }else if(genimm(-imm,&armval)) {
650 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],-imm);
651 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
652 #ifdef HAVE_ARMV7
653 }else if(rt!=rs&&(u_int)imm<65536) {
654 emit_movw(imm&0x0000ffff,rt);
655 emit_add(rs,rt,rt);
656 }else if(rt!=rs&&(u_int)-imm<65536) {
657 emit_movw(-imm&0x0000ffff,rt);
658 emit_sub(rs,rt,rt);
659 #endif
660 }else if((u_int)-imm<65536) {
661 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],(-imm)&0xFF00);
662 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
663 output_w32(0xe2400000|rd_rn_imm_shift(rt,rs,(-imm)>>8,8));
664 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
665 }else {
666 do {
667 int shift = (ffs(imm) - 1) & ~1;
668 int imm8 = imm & (0xff << shift);
669 genimm_checked(imm8,&armval);
670 assem_debug("add %s,%s,#0x%x\n",regname[rt],regname[rs],imm8);
671 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
672 rs = rt;
673 imm &= ~imm8;
674 }
675 while (imm != 0);
676 }
677 }
678 else if(rs!=rt) emit_mov(rs,rt);
679}
680
681static void emit_addimm_and_set_flags(int imm,int rt)
682{
683 assert(imm>-65536&&imm<65536);
684 u_int armval;
685 if(genimm(imm,&armval)) {
686 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm);
687 output_w32(0xe2900000|rd_rn_rm(rt,rt,0)|armval);
688 }else if(genimm(-imm,&armval)) {
689 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],imm);
690 output_w32(0xe2500000|rd_rn_rm(rt,rt,0)|armval);
691 }else if(imm<0) {
692 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF00);
693 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
694 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)>>8,8));
695 output_w32(0xe2500000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
696 }else{
697 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF00);
698 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
699 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm>>8,8));
700 output_w32(0xe2900000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
701 }
702}
703
704static void emit_addnop(u_int r)
705{
706 assert(r<16);
707 assem_debug("add %s,%s,#0 (nop)\n",regname[r],regname[r]);
708 output_w32(0xe2800000|rd_rn_rm(r,r,0));
709}
710
711static void emit_andimm(int rs,int imm,int rt)
712{
713 u_int armval;
714 if(imm==0) {
715 emit_zeroreg(rt);
716 }else if(genimm(imm,&armval)) {
717 assem_debug("and %s,%s,#%d\n",regname[rt],regname[rs],imm);
718 output_w32(0xe2000000|rd_rn_rm(rt,rs,0)|armval);
719 }else if(genimm(~imm,&armval)) {
720 assem_debug("bic %s,%s,#%d\n",regname[rt],regname[rs],imm);
721 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|armval);
722 }else if(imm==65535) {
723 #ifndef HAVE_ARMV6
724 assem_debug("bic %s,%s,#FF000000\n",regname[rt],regname[rs]);
725 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|0x4FF);
726 assem_debug("bic %s,%s,#00FF0000\n",regname[rt],regname[rt]);
727 output_w32(0xe3c00000|rd_rn_rm(rt,rt,0)|0x8FF);
728 #else
729 assem_debug("uxth %s,%s\n",regname[rt],regname[rs]);
730 output_w32(0xe6ff0070|rd_rn_rm(rt,0,rs));
731 #endif
732 }else{
733 assert(imm>0&&imm<65535);
734 #ifndef HAVE_ARMV7
735 assem_debug("mov r14,#%d\n",imm&0xFF00);
736 output_w32(0xe3a00000|rd_rn_imm_shift(HOST_TEMPREG,0,imm>>8,8));
737 assem_debug("add r14,r14,#%d\n",imm&0xFF);
738 output_w32(0xe2800000|rd_rn_imm_shift(HOST_TEMPREG,HOST_TEMPREG,imm&0xff,0));
739 #else
740 emit_movw(imm,HOST_TEMPREG);
741 #endif
742 assem_debug("and %s,%s,r14\n",regname[rt],regname[rs]);
743 output_w32(0xe0000000|rd_rn_rm(rt,rs,HOST_TEMPREG));
744 }
745}
746
747static void emit_orimm(int rs,int imm,int rt)
748{
749 u_int armval;
750 if(imm==0) {
751 if(rs!=rt) emit_mov(rs,rt);
752 }else if(genimm(imm,&armval)) {
753 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm);
754 output_w32(0xe3800000|rd_rn_rm(rt,rs,0)|armval);
755 }else{
756 assert(imm>0&&imm<65536);
757 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
758 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
759 output_w32(0xe3800000|rd_rn_imm_shift(rt,rs,imm>>8,8));
760 output_w32(0xe3800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
761 }
762}
763
764static void emit_xorimm(int rs,int imm,int rt)
765{
766 u_int armval;
767 if(imm==0) {
768 if(rs!=rt) emit_mov(rs,rt);
769 }else if(genimm(imm,&armval)) {
770 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm);
771 output_w32(0xe2200000|rd_rn_rm(rt,rs,0)|armval);
772 }else{
773 assert(imm>0&&imm<65536);
774 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
775 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
776 output_w32(0xe2200000|rd_rn_imm_shift(rt,rs,imm>>8,8));
777 output_w32(0xe2200000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
778 }
779}
780
781static void emit_shlimm(int rs,u_int imm,int rt)
782{
783 assert(imm>0);
784 assert(imm<32);
785 //if(imm==1) ...
786 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
787 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
788}
789
790static void emit_lsls_imm(int rs,int imm,int rt)
791{
792 assert(imm>0);
793 assert(imm<32);
794 assem_debug("lsls %s,%s,#%d\n",regname[rt],regname[rs],imm);
795 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs)|(imm<<7));
796}
797
798static unused void emit_lslpls_imm(int rs,int imm,int rt)
799{
800 assert(imm>0);
801 assert(imm<32);
802 assem_debug("lslpls %s,%s,#%d\n",regname[rt],regname[rs],imm);
803 output_w32(0x51b00000|rd_rn_rm(rt,0,rs)|(imm<<7));
804}
805
806static void emit_shrimm(int rs,u_int imm,int rt)
807{
808 assert(imm>0);
809 assert(imm<32);
810 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
811 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
812}
813
814static void emit_sarimm(int rs,u_int imm,int rt)
815{
816 assert(imm>0);
817 assert(imm<32);
818 assem_debug("asr %s,%s,#%d\n",regname[rt],regname[rs],imm);
819 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x40|(imm<<7));
820}
821
822static void emit_rorimm(int rs,u_int imm,int rt)
823{
824 assert(imm>0);
825 assert(imm<32);
826 assem_debug("ror %s,%s,#%d\n",regname[rt],regname[rs],imm);
827 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x60|(imm<<7));
828}
829
830static void emit_signextend16(int rs,int rt)
831{
832 #ifndef HAVE_ARMV6
833 emit_shlimm(rs,16,rt);
834 emit_sarimm(rt,16,rt);
835 #else
836 assem_debug("sxth %s,%s\n",regname[rt],regname[rs]);
837 output_w32(0xe6bf0070|rd_rn_rm(rt,0,rs));
838 #endif
839}
840
841static void emit_signextend8(int rs,int rt)
842{
843 #ifndef HAVE_ARMV6
844 emit_shlimm(rs,24,rt);
845 emit_sarimm(rt,24,rt);
846 #else
847 assem_debug("sxtb %s,%s\n",regname[rt],regname[rs]);
848 output_w32(0xe6af0070|rd_rn_rm(rt,0,rs));
849 #endif
850}
851
852static void emit_shl(u_int rs,u_int shift,u_int rt)
853{
854 assert(rs<16);
855 assert(rt<16);
856 assert(shift<16);
857 //if(imm==1) ...
858 assem_debug("lsl %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
859 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x10|(shift<<8));
860}
861
862static void emit_shr(u_int rs,u_int shift,u_int rt)
863{
864 assert(rs<16);
865 assert(rt<16);
866 assert(shift<16);
867 assem_debug("lsr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
868 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x30|(shift<<8));
869}
870
871static void emit_sar(u_int rs,u_int shift,u_int rt)
872{
873 assert(rs<16);
874 assert(rt<16);
875 assert(shift<16);
876 assem_debug("asr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
877 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x50|(shift<<8));
878}
879
880static unused void emit_orrshl(u_int rs,u_int shift,u_int rt)
881{
882 assert(rs<16);
883 assert(rt<16);
884 assert(shift<16);
885 assem_debug("orr %s,%s,%s,lsl %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
886 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x10|(shift<<8));
887}
888
889static unused void emit_orrshr(u_int rs,u_int shift,u_int rt)
890{
891 assert(rs<16);
892 assert(rt<16);
893 assert(shift<16);
894 assem_debug("orr %s,%s,%s,lsr %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
895 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x30|(shift<<8));
896}
897
898static void emit_cmpimm(int rs,int imm)
899{
900 u_int armval;
901 if(genimm(imm,&armval)) {
902 assem_debug("cmp %s,#%d\n",regname[rs],imm);
903 output_w32(0xe3500000|rd_rn_rm(0,rs,0)|armval);
904 }else if(genimm(-imm,&armval)) {
905 assem_debug("cmn %s,#%d\n",regname[rs],imm);
906 output_w32(0xe3700000|rd_rn_rm(0,rs,0)|armval);
907 }else if(imm>0) {
908 assert(imm<65536);
909 emit_movimm(imm,HOST_TEMPREG);
910 assem_debug("cmp %s,r14\n",regname[rs]);
911 output_w32(0xe1500000|rd_rn_rm(0,rs,HOST_TEMPREG));
912 }else{
913 assert(imm>-65536);
914 emit_movimm(-imm,HOST_TEMPREG);
915 assem_debug("cmn %s,r14\n",regname[rs]);
916 output_w32(0xe1700000|rd_rn_rm(0,rs,HOST_TEMPREG));
917 }
918}
919
920static void emit_cmovne_imm(int imm,int rt)
921{
922 assem_debug("movne %s,#%d\n",regname[rt],imm);
923 u_int armval;
924 genimm_checked(imm,&armval);
925 output_w32(0x13a00000|rd_rn_rm(rt,0,0)|armval);
926}
927
928static void emit_cmovl_imm(int imm,int rt)
929{
930 assem_debug("movlt %s,#%d\n",regname[rt],imm);
931 u_int armval;
932 genimm_checked(imm,&armval);
933 output_w32(0xb3a00000|rd_rn_rm(rt,0,0)|armval);
934}
935
936static void emit_cmovb_imm(int imm,int rt)
937{
938 assem_debug("movcc %s,#%d\n",regname[rt],imm);
939 u_int armval;
940 genimm_checked(imm,&armval);
941 output_w32(0x33a00000|rd_rn_rm(rt,0,0)|armval);
942}
943
944static void emit_cmovae_imm(int imm,int rt)
945{
946 assem_debug("movcs %s,#%d\n",regname[rt],imm);
947 u_int armval;
948 genimm_checked(imm,&armval);
949 output_w32(0x23a00000|rd_rn_rm(rt,0,0)|armval);
950}
951
952static void emit_cmovs_imm(int imm,int rt)
953{
954 assem_debug("movmi %s,#%d\n",regname[rt],imm);
955 u_int armval;
956 genimm_checked(imm,&armval);
957 output_w32(0x43a00000|rd_rn_rm(rt,0,0)|armval);
958}
959
960static void emit_cmovne_reg(int rs,int rt)
961{
962 assem_debug("movne %s,%s\n",regname[rt],regname[rs]);
963 output_w32(0x11a00000|rd_rn_rm(rt,0,rs));
964}
965
966static void emit_cmovl_reg(int rs,int rt)
967{
968 assem_debug("movlt %s,%s\n",regname[rt],regname[rs]);
969 output_w32(0xb1a00000|rd_rn_rm(rt,0,rs));
970}
971
972static void emit_cmovb_reg(int rs,int rt)
973{
974 assem_debug("movcc %s,%s\n",regname[rt],regname[rs]);
975 output_w32(0x31a00000|rd_rn_rm(rt,0,rs));
976}
977
978static void emit_cmovs_reg(int rs,int rt)
979{
980 assem_debug("movmi %s,%s\n",regname[rt],regname[rs]);
981 output_w32(0x41a00000|rd_rn_rm(rt,0,rs));
982}
983
984static void emit_slti32(int rs,int imm,int rt)
985{
986 if(rs!=rt) emit_zeroreg(rt);
987 emit_cmpimm(rs,imm);
988 if(rs==rt) emit_movimm(0,rt);
989 emit_cmovl_imm(1,rt);
990}
991
992static void emit_sltiu32(int rs,int imm,int rt)
993{
994 if(rs!=rt) emit_zeroreg(rt);
995 emit_cmpimm(rs,imm);
996 if(rs==rt) emit_movimm(0,rt);
997 emit_cmovb_imm(1,rt);
998}
999
1000static void emit_cmp(int rs,int rt)
1001{
1002 assem_debug("cmp %s,%s\n",regname[rs],regname[rt]);
1003 output_w32(0xe1500000|rd_rn_rm(0,rs,rt));
1004}
1005
1006static void emit_set_gz32(int rs, int rt)
1007{
1008 //assem_debug("set_gz32\n");
1009 emit_cmpimm(rs,1);
1010 emit_movimm(1,rt);
1011 emit_cmovl_imm(0,rt);
1012}
1013
1014static void emit_set_nz32(int rs, int rt)
1015{
1016 //assem_debug("set_nz32\n");
1017 if(rs!=rt) emit_movs(rs,rt);
1018 else emit_test(rs,rs);
1019 emit_cmovne_imm(1,rt);
1020}
1021
1022static void emit_set_if_less32(int rs1, int rs2, int rt)
1023{
1024 //assem_debug("set if less (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1025 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1026 emit_cmp(rs1,rs2);
1027 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1028 emit_cmovl_imm(1,rt);
1029}
1030
1031static void emit_set_if_carry32(int rs1, int rs2, int rt)
1032{
1033 //assem_debug("set if carry (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1034 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1035 emit_cmp(rs1,rs2);
1036 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1037 emit_cmovb_imm(1,rt);
1038}
1039
1040static int can_jump_or_call(const void *a)
1041{
1042 intptr_t offset = (u_char *)a - out - 8;
1043 return (-33554432 <= offset && offset < 33554432);
1044}
1045
1046static void emit_call(const void *a_)
1047{
1048 int a = (int)a_;
1049 assem_debug("bl %x (%x+%x)%s\n",a,(int)out,a-(int)out-8,func_name(a_));
1050 u_int offset=genjmp(a);
1051 output_w32(0xeb000000|offset);
1052}
1053
1054static void emit_jmp(const void *a_)
1055{
1056 int a = (int)a_;
1057 assem_debug("b %x (%x+%x)%s\n",a,(int)out,a-(int)out-8,func_name(a_));
1058 u_int offset=genjmp(a);
1059 output_w32(0xea000000|offset);
1060}
1061
1062static void emit_jne(const void *a_)
1063{
1064 int a = (int)a_;
1065 assem_debug("bne %x\n",a);
1066 u_int offset=genjmp(a);
1067 output_w32(0x1a000000|offset);
1068}
1069
1070static void emit_jeq(const void *a_)
1071{
1072 int a = (int)a_;
1073 assem_debug("beq %x\n",a);
1074 u_int offset=genjmp(a);
1075 output_w32(0x0a000000|offset);
1076}
1077
1078static void emit_js(const void *a_)
1079{
1080 int a = (int)a_;
1081 assem_debug("bmi %x\n",a);
1082 u_int offset=genjmp(a);
1083 output_w32(0x4a000000|offset);
1084}
1085
1086static void emit_jns(const void *a_)
1087{
1088 int a = (int)a_;
1089 assem_debug("bpl %x\n",a);
1090 u_int offset=genjmp(a);
1091 output_w32(0x5a000000|offset);
1092}
1093
1094static void emit_jl(const void *a_)
1095{
1096 int a = (int)a_;
1097 assem_debug("blt %x\n",a);
1098 u_int offset=genjmp(a);
1099 output_w32(0xba000000|offset);
1100}
1101
1102static void emit_jge(const void *a_)
1103{
1104 int a = (int)a_;
1105 assem_debug("bge %x\n",a);
1106 u_int offset=genjmp(a);
1107 output_w32(0xaa000000|offset);
1108}
1109
1110static void emit_jno(const void *a_)
1111{
1112 int a = (int)a_;
1113 assem_debug("bvc %x\n",a);
1114 u_int offset=genjmp(a);
1115 output_w32(0x7a000000|offset);
1116}
1117
1118static void emit_jc(const void *a_)
1119{
1120 int a = (int)a_;
1121 assem_debug("bcs %x\n",a);
1122 u_int offset=genjmp(a);
1123 output_w32(0x2a000000|offset);
1124}
1125
1126static void emit_jcc(const void *a_)
1127{
1128 int a = (int)a_;
1129 assem_debug("bcc %x\n",a);
1130 u_int offset=genjmp(a);
1131 output_w32(0x3a000000|offset);
1132}
1133
1134static unused void emit_callreg(u_int r)
1135{
1136 assert(r<15);
1137 assem_debug("blx %s\n",regname[r]);
1138 output_w32(0xe12fff30|r);
1139}
1140
1141static void emit_jmpreg(u_int r)
1142{
1143 assem_debug("mov pc,%s\n",regname[r]);
1144 output_w32(0xe1a00000|rd_rn_rm(15,0,r));
1145}
1146
1147static void emit_ret(void)
1148{
1149 emit_jmpreg(14);
1150}
1151
1152static void emit_readword_indexed(int offset, int rs, int rt)
1153{
1154 assert(offset>-4096&&offset<4096);
1155 assem_debug("ldr %s,%s+%d\n",regname[rt],regname[rs],offset);
1156 if(offset>=0) {
1157 output_w32(0xe5900000|rd_rn_rm(rt,rs,0)|offset);
1158 }else{
1159 output_w32(0xe5100000|rd_rn_rm(rt,rs,0)|(-offset));
1160 }
1161}
1162
1163static void emit_readword_dualindexedx4(int rs1, int rs2, int rt)
1164{
1165 assem_debug("ldr %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1166 output_w32(0xe7900000|rd_rn_rm(rt,rs1,rs2)|0x100);
1167}
1168#define emit_readptr_dualindexedx_ptrlen emit_readword_dualindexedx4
1169
1170static void emit_ldr_dualindexed(int rs1, int rs2, int rt)
1171{
1172 assem_debug("ldr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1173 output_w32(0xe7900000|rd_rn_rm(rt,rs1,rs2));
1174}
1175
1176static void emit_ldrcc_dualindexed(int rs1, int rs2, int rt)
1177{
1178 assem_debug("ldrcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1179 output_w32(0x37900000|rd_rn_rm(rt,rs1,rs2));
1180}
1181
1182static void emit_ldrb_dualindexed(int rs1, int rs2, int rt)
1183{
1184 assem_debug("ldrb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1185 output_w32(0xe7d00000|rd_rn_rm(rt,rs1,rs2));
1186}
1187
1188static void emit_ldrccb_dualindexed(int rs1, int rs2, int rt)
1189{
1190 assem_debug("ldrccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1191 output_w32(0x37d00000|rd_rn_rm(rt,rs1,rs2));
1192}
1193
1194static void emit_ldrsb_dualindexed(int rs1, int rs2, int rt)
1195{
1196 assem_debug("ldrsb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1197 output_w32(0xe19000d0|rd_rn_rm(rt,rs1,rs2));
1198}
1199
1200static void emit_ldrccsb_dualindexed(int rs1, int rs2, int rt)
1201{
1202 assem_debug("ldrccsb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1203 output_w32(0x319000d0|rd_rn_rm(rt,rs1,rs2));
1204}
1205
1206static void emit_ldrh_dualindexed(int rs1, int rs2, int rt)
1207{
1208 assem_debug("ldrh %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1209 output_w32(0xe19000b0|rd_rn_rm(rt,rs1,rs2));
1210}
1211
1212static void emit_ldrcch_dualindexed(int rs1, int rs2, int rt)
1213{
1214 assem_debug("ldrcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1215 output_w32(0x319000b0|rd_rn_rm(rt,rs1,rs2));
1216}
1217
1218static void emit_ldrsh_dualindexed(int rs1, int rs2, int rt)
1219{
1220 assem_debug("ldrsh %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1221 output_w32(0xe19000f0|rd_rn_rm(rt,rs1,rs2));
1222}
1223
1224static void emit_ldrccsh_dualindexed(int rs1, int rs2, int rt)
1225{
1226 assem_debug("ldrccsh %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1227 output_w32(0x319000f0|rd_rn_rm(rt,rs1,rs2));
1228}
1229
1230static void emit_str_dualindexed(int rs1, int rs2, int rt)
1231{
1232 assem_debug("str %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1233 output_w32(0xe7800000|rd_rn_rm(rt,rs1,rs2));
1234}
1235
1236static void emit_strb_dualindexed(int rs1, int rs2, int rt)
1237{
1238 assem_debug("strb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1239 output_w32(0xe7c00000|rd_rn_rm(rt,rs1,rs2));
1240}
1241
1242static void emit_strh_dualindexed(int rs1, int rs2, int rt)
1243{
1244 assem_debug("strh %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1245 output_w32(0xe18000b0|rd_rn_rm(rt,rs1,rs2));
1246}
1247
1248static void emit_movsbl_indexed(int offset, int rs, int rt)
1249{
1250 assert(offset>-256&&offset<256);
1251 assem_debug("ldrsb %s,%s+%d\n",regname[rt],regname[rs],offset);
1252 if(offset>=0) {
1253 output_w32(0xe1d000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1254 }else{
1255 output_w32(0xe15000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1256 }
1257}
1258
1259static void emit_movswl_indexed(int offset, int rs, int rt)
1260{
1261 assert(offset>-256&&offset<256);
1262 assem_debug("ldrsh %s,%s+%d\n",regname[rt],regname[rs],offset);
1263 if(offset>=0) {
1264 output_w32(0xe1d000f0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1265 }else{
1266 output_w32(0xe15000f0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1267 }
1268}
1269
1270static void emit_movzbl_indexed(int offset, int rs, int rt)
1271{
1272 assert(offset>-4096&&offset<4096);
1273 assem_debug("ldrb %s,%s+%d\n",regname[rt],regname[rs],offset);
1274 if(offset>=0) {
1275 output_w32(0xe5d00000|rd_rn_rm(rt,rs,0)|offset);
1276 }else{
1277 output_w32(0xe5500000|rd_rn_rm(rt,rs,0)|(-offset));
1278 }
1279}
1280
1281static void emit_movzwl_indexed(int offset, int rs, int rt)
1282{
1283 assert(offset>-256&&offset<256);
1284 assem_debug("ldrh %s,%s+%d\n",regname[rt],regname[rs],offset);
1285 if(offset>=0) {
1286 output_w32(0xe1d000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1287 }else{
1288 output_w32(0xe15000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1289 }
1290}
1291
1292static void emit_ldrd(int offset, int rs, int rt)
1293{
1294 assert(offset>-256&&offset<256);
1295 assem_debug("ldrd %s,%s+%d\n",regname[rt],regname[rs],offset);
1296 if(offset>=0) {
1297 output_w32(0xe1c000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1298 }else{
1299 output_w32(0xe14000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1300 }
1301}
1302
1303static void emit_readword(void *addr, int rt)
1304{
1305 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
1306 assert(offset<4096);
1307 assem_debug("ldr %s,fp+%d\n",regname[rt],offset);
1308 output_w32(0xe5900000|rd_rn_rm(rt,FP,0)|offset);
1309}
1310#define emit_readptr emit_readword
1311
1312static void emit_writeword_indexed(int rt, int offset, int rs)
1313{
1314 assert(offset>-4096&&offset<4096);
1315 assem_debug("str %s,%s+%d\n",regname[rt],regname[rs],offset);
1316 if(offset>=0) {
1317 output_w32(0xe5800000|rd_rn_rm(rt,rs,0)|offset);
1318 }else{
1319 output_w32(0xe5000000|rd_rn_rm(rt,rs,0)|(-offset));
1320 }
1321}
1322
1323static void emit_writehword_indexed(int rt, int offset, int rs)
1324{
1325 assert(offset>-256&&offset<256);
1326 assem_debug("strh %s,%s+%d\n",regname[rt],regname[rs],offset);
1327 if(offset>=0) {
1328 output_w32(0xe1c000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1329 }else{
1330 output_w32(0xe14000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1331 }
1332}
1333
1334static void emit_writebyte_indexed(int rt, int offset, int rs)
1335{
1336 assert(offset>-4096&&offset<4096);
1337 assem_debug("strb %s,%s+%d\n",regname[rt],regname[rs],offset);
1338 if(offset>=0) {
1339 output_w32(0xe5c00000|rd_rn_rm(rt,rs,0)|offset);
1340 }else{
1341 output_w32(0xe5400000|rd_rn_rm(rt,rs,0)|(-offset));
1342 }
1343}
1344
1345static void emit_strcc_dualindexed(int rs1, int rs2, int rt)
1346{
1347 assem_debug("strcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1348 output_w32(0x37800000|rd_rn_rm(rt,rs1,rs2));
1349}
1350
1351static void emit_strccb_dualindexed(int rs1, int rs2, int rt)
1352{
1353 assem_debug("strccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1354 output_w32(0x37c00000|rd_rn_rm(rt,rs1,rs2));
1355}
1356
1357static void emit_strcch_dualindexed(int rs1, int rs2, int rt)
1358{
1359 assem_debug("strcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1360 output_w32(0x318000b0|rd_rn_rm(rt,rs1,rs2));
1361}
1362
1363static void emit_writeword(int rt, void *addr)
1364{
1365 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
1366 assert(offset<4096);
1367 assem_debug("str %s,fp+%d\n",regname[rt],offset);
1368 output_w32(0xe5800000|rd_rn_rm(rt,FP,0)|offset);
1369}
1370
1371static void emit_umull(u_int rs1, u_int rs2, u_int hi, u_int lo)
1372{
1373 assem_debug("umull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
1374 assert(rs1<16);
1375 assert(rs2<16);
1376 assert(hi<16);
1377 assert(lo<16);
1378 output_w32(0xe0800090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
1379}
1380
1381static void emit_smull(u_int rs1, u_int rs2, u_int hi, u_int lo)
1382{
1383 assem_debug("smull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
1384 assert(rs1<16);
1385 assert(rs2<16);
1386 assert(hi<16);
1387 assert(lo<16);
1388 output_w32(0xe0c00090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
1389}
1390
1391static void emit_clz(int rs,int rt)
1392{
1393 assem_debug("clz %s,%s\n",regname[rt],regname[rs]);
1394 output_w32(0xe16f0f10|rd_rn_rm(rt,0,rs));
1395}
1396
1397static void emit_subcs(int rs1,int rs2,int rt)
1398{
1399 assem_debug("subcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1400 output_w32(0x20400000|rd_rn_rm(rt,rs1,rs2));
1401}
1402
1403static void emit_shrcc_imm(int rs,u_int imm,int rt)
1404{
1405 assert(imm>0);
1406 assert(imm<32);
1407 assem_debug("lsrcc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1408 output_w32(0x31a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
1409}
1410
1411static void emit_shrne_imm(int rs,u_int imm,int rt)
1412{
1413 assert(imm>0);
1414 assert(imm<32);
1415 assem_debug("lsrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
1416 output_w32(0x11a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
1417}
1418
1419static void emit_negmi(int rs, int rt)
1420{
1421 assem_debug("rsbmi %s,%s,#0\n",regname[rt],regname[rs]);
1422 output_w32(0x42600000|rd_rn_rm(rt,rs,0));
1423}
1424
1425static void emit_negsmi(int rs, int rt)
1426{
1427 assem_debug("rsbsmi %s,%s,#0\n",regname[rt],regname[rs]);
1428 output_w32(0x42700000|rd_rn_rm(rt,rs,0));
1429}
1430
1431static void emit_bic_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
1432{
1433 assem_debug("bic %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
1434 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
1435}
1436
1437static void emit_bic_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
1438{
1439 assem_debug("bic %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
1440 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
1441}
1442
1443static void emit_teq(int rs, int rt)
1444{
1445 assem_debug("teq %s,%s\n",regname[rs],regname[rt]);
1446 output_w32(0xe1300000|rd_rn_rm(0,rs,rt));
1447}
1448
1449static unused void emit_rsbimm(int rs, int imm, int rt)
1450{
1451 u_int armval;
1452 genimm_checked(imm,&armval);
1453 assem_debug("rsb %s,%s,#%d\n",regname[rt],regname[rs],imm);
1454 output_w32(0xe2600000|rd_rn_rm(rt,rs,0)|armval);
1455}
1456
1457// Conditionally select one of two immediates, optimizing for small code size
1458// This will only be called if HAVE_CMOV_IMM is defined
1459static void emit_cmov2imm_e_ne_compact(int imm1,int imm2,u_int rt)
1460{
1461 u_int armval;
1462 if(genimm(imm2-imm1,&armval)) {
1463 emit_movimm(imm1,rt);
1464 assem_debug("addne %s,%s,#%d\n",regname[rt],regname[rt],imm2-imm1);
1465 output_w32(0x12800000|rd_rn_rm(rt,rt,0)|armval);
1466 }else if(genimm(imm1-imm2,&armval)) {
1467 emit_movimm(imm1,rt);
1468 assem_debug("subne %s,%s,#%d\n",regname[rt],regname[rt],imm1-imm2);
1469 output_w32(0x12400000|rd_rn_rm(rt,rt,0)|armval);
1470 }
1471 else {
1472 #ifndef HAVE_ARMV7
1473 emit_movimm(imm1,rt);
1474 add_literal((int)out,imm2);
1475 assem_debug("ldrne %s,pc+? [=%x]\n",regname[rt],imm2);
1476 output_w32(0x15900000|rd_rn_rm(rt,15,0));
1477 #else
1478 emit_movw(imm1&0x0000FFFF,rt);
1479 if((imm1&0xFFFF)!=(imm2&0xFFFF)) {
1480 assem_debug("movwne %s,#%d (0x%x)\n",regname[rt],imm2&0xFFFF,imm2&0xFFFF);
1481 output_w32(0x13000000|rd_rn_rm(rt,0,0)|(imm2&0xfff)|((imm2<<4)&0xf0000));
1482 }
1483 emit_movt(imm1&0xFFFF0000,rt);
1484 if((imm1&0xFFFF0000)!=(imm2&0xFFFF0000)) {
1485 assem_debug("movtne %s,#%d (0x%x)\n",regname[rt],imm2&0xffff0000,imm2&0xffff0000);
1486 output_w32(0x13400000|rd_rn_rm(rt,0,0)|((imm2>>16)&0xfff)|((imm2>>12)&0xf0000));
1487 }
1488 #endif
1489 }
1490}
1491
1492// special case for checking invalid_code
1493static void emit_cmpmem_indexedsr12_reg(int base,int r,int imm)
1494{
1495 assert(imm<128&&imm>=0);
1496 assert(r>=0&&r<16);
1497 assem_debug("ldrb lr,%s,%s lsr #12\n",regname[base],regname[r]);
1498 output_w32(0xe7d00000|rd_rn_rm(HOST_TEMPREG,base,r)|0x620);
1499 emit_cmpimm(HOST_TEMPREG,imm);
1500}
1501
1502static void emit_callne(int a)
1503{
1504 assem_debug("blne %x\n",a);
1505 u_int offset=genjmp(a);
1506 output_w32(0x1b000000|offset);
1507}
1508
1509// Used to preload hash table entries
1510static unused void emit_prefetchreg(int r)
1511{
1512 assem_debug("pld %s\n",regname[r]);
1513 output_w32(0xf5d0f000|rd_rn_rm(0,r,0));
1514}
1515
1516// Special case for mini_ht
1517static void emit_ldreq_indexed(int rs, u_int offset, int rt)
1518{
1519 assert(offset<4096);
1520 assem_debug("ldreq %s,[%s, #%d]\n",regname[rt],regname[rs],offset);
1521 output_w32(0x05900000|rd_rn_rm(rt,rs,0)|offset);
1522}
1523
1524static void emit_orrne_imm(int rs,int imm,int rt)
1525{
1526 u_int armval;
1527 genimm_checked(imm,&armval);
1528 assem_debug("orrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
1529 output_w32(0x13800000|rd_rn_rm(rt,rs,0)|armval);
1530}
1531
1532static unused void emit_addpl_imm(int rs,int imm,int rt)
1533{
1534 u_int armval;
1535 genimm_checked(imm,&armval);
1536 assem_debug("addpl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1537 output_w32(0x52800000|rd_rn_rm(rt,rs,0)|armval);
1538}
1539
1540static void emit_jno_unlikely(int a)
1541{
1542 //emit_jno(a);
1543 assem_debug("addvc pc,pc,#? (%x)\n",/*a-(int)out-8,*/a);
1544 output_w32(0x72800000|rd_rn_rm(15,15,0));
1545}
1546
1547static void save_regs_all(u_int reglist)
1548{
1549 int i;
1550 if(!reglist) return;
1551 assem_debug("stmia fp,{");
1552 for(i=0;i<16;i++)
1553 if(reglist&(1<<i))
1554 assem_debug("r%d,",i);
1555 assem_debug("}\n");
1556 output_w32(0xe88b0000|reglist);
1557}
1558
1559static void restore_regs_all(u_int reglist)
1560{
1561 int i;
1562 if(!reglist) return;
1563 assem_debug("ldmia fp,{");
1564 for(i=0;i<16;i++)
1565 if(reglist&(1<<i))
1566 assem_debug("r%d,",i);
1567 assem_debug("}\n");
1568 output_w32(0xe89b0000|reglist);
1569}
1570
1571// Save registers before function call
1572static void save_regs(u_int reglist)
1573{
1574 reglist&=CALLER_SAVE_REGS; // only save the caller-save registers, r0-r3, r12
1575 save_regs_all(reglist);
1576}
1577
1578// Restore registers after function call
1579static void restore_regs(u_int reglist)
1580{
1581 reglist&=CALLER_SAVE_REGS;
1582 restore_regs_all(reglist);
1583}
1584
1585/* Stubs/epilogue */
1586
1587static void literal_pool(int n)
1588{
1589 if(!literalcount) return;
1590 if(n) {
1591 if((int)out-literals[0][0]<4096-n) return;
1592 }
1593 u_int *ptr;
1594 int i;
1595 for(i=0;i<literalcount;i++)
1596 {
1597 u_int l_addr=(u_int)out;
1598 int j;
1599 for(j=0;j<i;j++) {
1600 if(literals[j][1]==literals[i][1]) {
1601 //printf("dup %08x\n",literals[i][1]);
1602 l_addr=literals[j][0];
1603 break;
1604 }
1605 }
1606 ptr=(u_int *)literals[i][0];
1607 u_int offset=l_addr-(u_int)ptr-8;
1608 assert(offset<4096);
1609 assert(!(offset&3));
1610 *ptr|=offset;
1611 if(l_addr==(u_int)out) {
1612 literals[i][0]=l_addr; // remember for dupes
1613 output_w32(literals[i][1]);
1614 }
1615 }
1616 literalcount=0;
1617}
1618
1619static void literal_pool_jumpover(int n)
1620{
1621 if(!literalcount) return;
1622 if(n) {
1623 if((int)out-literals[0][0]<4096-n) return;
1624 }
1625 void *jaddr = out;
1626 emit_jmp(0);
1627 literal_pool(0);
1628 set_jump_target(jaddr, out);
1629}
1630
1631// parsed by get_pointer, find_extjump_insn
1632static void emit_extjump2(u_char *addr, u_int target, void *linker)
1633{
1634 u_char *ptr=(u_char *)addr;
1635 assert((ptr[3]&0x0e)==0xa);
1636 (void)ptr;
1637
1638 emit_loadlp(target,0);
1639 emit_loadlp((u_int)addr,1);
1640 assert(addr>=ndrc->translation_cache&&addr<(ndrc->translation_cache+(1<<TARGET_SIZE_2)));
1641 //assert((target>=0x80000000&&target<0x80800000)||(target>0xA4000000&&target<0xA4001000));
1642//DEBUG >
1643#ifdef DEBUG_CYCLE_COUNT
1644 emit_readword(&last_count,ECX);
1645 emit_add(HOST_CCREG,ECX,HOST_CCREG);
1646 emit_readword(&next_interupt,ECX);
1647 emit_writeword(HOST_CCREG,&Count);
1648 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
1649 emit_writeword(ECX,&last_count);
1650#endif
1651//DEBUG <
1652 emit_far_jump(linker);
1653}
1654
1655static void check_extjump2(void *src)
1656{
1657 u_int *ptr = src;
1658 assert((ptr[1] & 0x0fff0000) == 0x059f0000); // ldr rx, [pc, #ofs]
1659 (void)ptr;
1660}
1661
1662// put rt_val into rt, potentially making use of rs with value rs_val
1663static void emit_movimm_from(u_int rs_val,int rs,u_int rt_val,int rt)
1664{
1665 u_int armval;
1666 int diff;
1667 if(genimm(rt_val,&armval)) {
1668 assem_debug("mov %s,#%d\n",regname[rt],rt_val);
1669 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
1670 return;
1671 }
1672 if(genimm(~rt_val,&armval)) {
1673 assem_debug("mvn %s,#%d\n",regname[rt],rt_val);
1674 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
1675 return;
1676 }
1677 diff=rt_val-rs_val;
1678 if(genimm(diff,&armval)) {
1679 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],diff);
1680 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
1681 return;
1682 }else if(genimm(-diff,&armval)) {
1683 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],-diff);
1684 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
1685 return;
1686 }
1687 emit_movimm(rt_val,rt);
1688}
1689
1690// return 1 if above function can do it's job cheaply
1691static int is_similar_value(u_int v1,u_int v2)
1692{
1693 u_int xs;
1694 int diff;
1695 if(v1==v2) return 1;
1696 diff=v2-v1;
1697 for(xs=diff;xs!=0&&(xs&3)==0;xs>>=2)
1698 ;
1699 if(xs<0x100) return 1;
1700 for(xs=-diff;xs!=0&&(xs&3)==0;xs>>=2)
1701 ;
1702 if(xs<0x100) return 1;
1703 return 0;
1704}
1705
1706static void mov_loadtype_adj(enum stub_type type,int rs,int rt)
1707{
1708 switch(type) {
1709 case LOADB_STUB: emit_signextend8(rs,rt); break;
1710 case LOADBU_STUB: emit_andimm(rs,0xff,rt); break;
1711 case LOADH_STUB: emit_signextend16(rs,rt); break;
1712 case LOADHU_STUB: emit_andimm(rs,0xffff,rt); break;
1713 case LOADW_STUB: if(rs!=rt) emit_mov(rs,rt); break;
1714 default: assert(0);
1715 }
1716}
1717
1718#include "pcsxmem.h"
1719#include "pcsxmem_inline.c"
1720
1721static void do_readstub(int n)
1722{
1723 assem_debug("do_readstub %x\n",start+stubs[n].a*4);
1724 literal_pool(256);
1725 set_jump_target(stubs[n].addr, out);
1726 enum stub_type type=stubs[n].type;
1727 int i=stubs[n].a;
1728 int rs=stubs[n].b;
1729 const struct regstat *i_regs=(struct regstat *)stubs[n].c;
1730 u_int reglist=stubs[n].e;
1731 const signed char *i_regmap=i_regs->regmap;
1732 int rt;
1733 if(dops[i].itype==C1LS||dops[i].itype==C2LS||dops[i].itype==LOADLR) {
1734 rt=get_reg(i_regmap,FTEMP);
1735 }else{
1736 rt=get_reg(i_regmap,dops[i].rt1);
1737 }
1738 assert(rs>=0);
1739 int r,temp=-1,temp2=HOST_TEMPREG,regs_saved=0;
1740 void *restore_jump = NULL;
1741 reglist|=(1<<rs);
1742 for(r=0;r<=12;r++) {
1743 if(((1<<r)&0x13ff)&&((1<<r)&reglist)==0) {
1744 temp=r; break;
1745 }
1746 }
1747 if(rt>=0&&dops[i].rt1!=0)
1748 reglist&=~(1<<rt);
1749 if(temp==-1) {
1750 save_regs(reglist);
1751 regs_saved=1;
1752 temp=(rs==0)?2:0;
1753 }
1754 if((regs_saved||(reglist&2)==0)&&temp!=1&&rs!=1)
1755 temp2=1;
1756 emit_readword(&mem_rtab,temp);
1757 emit_shrimm(rs,12,temp2);
1758 emit_readword_dualindexedx4(temp,temp2,temp2);
1759 emit_lsls_imm(temp2,1,temp2);
1760 if(dops[i].itype==C1LS||dops[i].itype==C2LS||(rt>=0&&dops[i].rt1!=0)) {
1761 switch(type) {
1762 case LOADB_STUB: emit_ldrccsb_dualindexed(temp2,rs,rt); break;
1763 case LOADBU_STUB: emit_ldrccb_dualindexed(temp2,rs,rt); break;
1764 case LOADH_STUB: emit_ldrccsh_dualindexed(temp2,rs,rt); break;
1765 case LOADHU_STUB: emit_ldrcch_dualindexed(temp2,rs,rt); break;
1766 case LOADW_STUB: emit_ldrcc_dualindexed(temp2,rs,rt); break;
1767 default: assert(0);
1768 }
1769 }
1770 if(regs_saved) {
1771 restore_jump=out;
1772 emit_jcc(0); // jump to reg restore
1773 }
1774 else
1775 emit_jcc(stubs[n].retaddr); // return address
1776
1777 if(!regs_saved)
1778 save_regs(reglist);
1779 void *handler=NULL;
1780 if(type==LOADB_STUB||type==LOADBU_STUB)
1781 handler=jump_handler_read8;
1782 if(type==LOADH_STUB||type==LOADHU_STUB)
1783 handler=jump_handler_read16;
1784 if(type==LOADW_STUB)
1785 handler=jump_handler_read32;
1786 assert(handler);
1787 pass_args(rs,temp2);
1788 int cc=get_reg(i_regmap,CCREG);
1789 if(cc<0)
1790 emit_loadreg(CCREG,2);
1791 emit_addimm(cc<0?2:cc,(int)stubs[n].d,2);
1792 emit_far_call(handler);
1793 if(dops[i].itype==C1LS||dops[i].itype==C2LS||(rt>=0&&dops[i].rt1!=0)) {
1794 mov_loadtype_adj(type,0,rt);
1795 }
1796 if(restore_jump)
1797 set_jump_target(restore_jump, out);
1798 restore_regs(reglist);
1799 emit_jmp(stubs[n].retaddr); // return address
1800}
1801
1802static void inline_readstub(enum stub_type type, int i, u_int addr,
1803 const signed char regmap[], int target, int adj, u_int reglist)
1804{
1805 int rs=get_reg(regmap,target);
1806 int rt=get_reg(regmap,target);
1807 if(rs<0) rs=get_reg(regmap,-1);
1808 assert(rs>=0);
1809 u_int is_dynamic;
1810 uintptr_t host_addr = 0;
1811 void *handler;
1812 int cc=get_reg(regmap,CCREG);
1813 if(pcsx_direct_read(type,addr,adj,cc,target?rs:-1,rt))
1814 return;
1815 handler = get_direct_memhandler(mem_rtab, addr, type, &host_addr);
1816 if (handler == NULL) {
1817 if(rt<0||dops[i].rt1==0)
1818 return;
1819 if(addr!=host_addr)
1820 emit_movimm_from(addr,rs,host_addr,rs);
1821 switch(type) {
1822 case LOADB_STUB: emit_movsbl_indexed(0,rs,rt); break;
1823 case LOADBU_STUB: emit_movzbl_indexed(0,rs,rt); break;
1824 case LOADH_STUB: emit_movswl_indexed(0,rs,rt); break;
1825 case LOADHU_STUB: emit_movzwl_indexed(0,rs,rt); break;
1826 case LOADW_STUB: emit_readword_indexed(0,rs,rt); break;
1827 default: assert(0);
1828 }
1829 return;
1830 }
1831 is_dynamic=pcsxmem_is_handler_dynamic(addr);
1832 if(is_dynamic) {
1833 if(type==LOADB_STUB||type==LOADBU_STUB)
1834 handler=jump_handler_read8;
1835 if(type==LOADH_STUB||type==LOADHU_STUB)
1836 handler=jump_handler_read16;
1837 if(type==LOADW_STUB)
1838 handler=jump_handler_read32;
1839 }
1840
1841 // call a memhandler
1842 if(rt>=0&&dops[i].rt1!=0)
1843 reglist&=~(1<<rt);
1844 save_regs(reglist);
1845 if(target==0)
1846 emit_movimm(addr,0);
1847 else if(rs!=0)
1848 emit_mov(rs,0);
1849 if(cc<0)
1850 emit_loadreg(CCREG,2);
1851 if(is_dynamic) {
1852 emit_movimm(((u_int *)mem_rtab)[addr>>12]<<1,1);
1853 emit_addimm(cc<0?2:cc,adj,2);
1854 }
1855 else {
1856 emit_readword(&last_count,3);
1857 emit_addimm(cc<0?2:cc,adj,2);
1858 emit_add(2,3,2);
1859 emit_writeword(2,&Count);
1860 }
1861
1862 emit_far_call(handler);
1863
1864 if(rt>=0&&dops[i].rt1!=0) {
1865 switch(type) {
1866 case LOADB_STUB: emit_signextend8(0,rt); break;
1867 case LOADBU_STUB: emit_andimm(0,0xff,rt); break;
1868 case LOADH_STUB: emit_signextend16(0,rt); break;
1869 case LOADHU_STUB: emit_andimm(0,0xffff,rt); break;
1870 case LOADW_STUB: if(rt!=0) emit_mov(0,rt); break;
1871 default: assert(0);
1872 }
1873 }
1874 restore_regs(reglist);
1875}
1876
1877static void do_writestub(int n)
1878{
1879 assem_debug("do_writestub %x\n",start+stubs[n].a*4);
1880 literal_pool(256);
1881 set_jump_target(stubs[n].addr, out);
1882 enum stub_type type=stubs[n].type;
1883 int i=stubs[n].a;
1884 int rs=stubs[n].b;
1885 const struct regstat *i_regs=(struct regstat *)stubs[n].c;
1886 u_int reglist=stubs[n].e;
1887 const signed char *i_regmap=i_regs->regmap;
1888 int rt,r;
1889 if(dops[i].itype==C1LS||dops[i].itype==C2LS) {
1890 rt=get_reg(i_regmap,r=FTEMP);
1891 }else{
1892 rt=get_reg(i_regmap,r=dops[i].rs2);
1893 }
1894 assert(rs>=0);
1895 assert(rt>=0);
1896 int rtmp,temp=-1,temp2=HOST_TEMPREG,regs_saved=0;
1897 void *restore_jump = NULL;
1898 int reglist2=reglist|(1<<rs)|(1<<rt);
1899 for(rtmp=0;rtmp<=12;rtmp++) {
1900 if(((1<<rtmp)&0x13ff)&&((1<<rtmp)&reglist2)==0) {
1901 temp=rtmp; break;
1902 }
1903 }
1904 if(temp==-1) {
1905 save_regs(reglist);
1906 regs_saved=1;
1907 for(rtmp=0;rtmp<=3;rtmp++)
1908 if(rtmp!=rs&&rtmp!=rt)
1909 {temp=rtmp;break;}
1910 }
1911 if((regs_saved||(reglist2&8)==0)&&temp!=3&&rs!=3&&rt!=3)
1912 temp2=3;
1913 emit_readword(&mem_wtab,temp);
1914 emit_shrimm(rs,12,temp2);
1915 emit_readword_dualindexedx4(temp,temp2,temp2);
1916 emit_lsls_imm(temp2,1,temp2);
1917 switch(type) {
1918 case STOREB_STUB: emit_strccb_dualindexed(temp2,rs,rt); break;
1919 case STOREH_STUB: emit_strcch_dualindexed(temp2,rs,rt); break;
1920 case STOREW_STUB: emit_strcc_dualindexed(temp2,rs,rt); break;
1921 default: assert(0);
1922 }
1923 if(regs_saved) {
1924 restore_jump=out;
1925 emit_jcc(0); // jump to reg restore
1926 }
1927 else
1928 emit_jcc(stubs[n].retaddr); // return address (invcode check)
1929
1930 if(!regs_saved)
1931 save_regs(reglist);
1932 void *handler=NULL;
1933 switch(type) {
1934 case STOREB_STUB: handler=jump_handler_write8; break;
1935 case STOREH_STUB: handler=jump_handler_write16; break;
1936 case STOREW_STUB: handler=jump_handler_write32; break;
1937 default: assert(0);
1938 }
1939 assert(handler);
1940 pass_args(rs,rt);
1941 if(temp2!=3)
1942 emit_mov(temp2,3);
1943 int cc=get_reg(i_regmap,CCREG);
1944 if(cc<0)
1945 emit_loadreg(CCREG,2);
1946 emit_addimm(cc<0?2:cc,(int)stubs[n].d,2);
1947 // returns new cycle_count
1948 emit_far_call(handler);
1949 emit_addimm(0,-(int)stubs[n].d,cc<0?2:cc);
1950 if(cc<0)
1951 emit_storereg(CCREG,2);
1952 if(restore_jump)
1953 set_jump_target(restore_jump, out);
1954 restore_regs(reglist);
1955 emit_jmp(stubs[n].retaddr);
1956}
1957
1958static void inline_writestub(enum stub_type type, int i, u_int addr,
1959 const signed char regmap[], int target, int adj, u_int reglist)
1960{
1961 int rs=get_reg(regmap,-1);
1962 int rt=get_reg(regmap,target);
1963 assert(rs>=0);
1964 assert(rt>=0);
1965 uintptr_t host_addr = 0;
1966 void *handler = get_direct_memhandler(mem_wtab, addr, type, &host_addr);
1967 if (handler == NULL) {
1968 if(addr!=host_addr)
1969 emit_movimm_from(addr,rs,host_addr,rs);
1970 switch(type) {
1971 case STOREB_STUB: emit_writebyte_indexed(rt,0,rs); break;
1972 case STOREH_STUB: emit_writehword_indexed(rt,0,rs); break;
1973 case STOREW_STUB: emit_writeword_indexed(rt,0,rs); break;
1974 default: assert(0);
1975 }
1976 return;
1977 }
1978
1979 // call a memhandler
1980 save_regs(reglist);
1981 pass_args(rs,rt);
1982 int cc=get_reg(regmap,CCREG);
1983 if(cc<0)
1984 emit_loadreg(CCREG,2);
1985 emit_addimm(cc<0?2:cc,adj,2);
1986 emit_movimm((u_int)handler,3);
1987 // returns new cycle_count
1988 emit_far_call(jump_handler_write_h);
1989 emit_addimm(0,-adj,cc<0?2:cc);
1990 if(cc<0)
1991 emit_storereg(CCREG,2);
1992 restore_regs(reglist);
1993}
1994
1995// this output is parsed by verify_dirty, get_bounds, isclean, get_clean_addr
1996static void do_dirty_stub_emit_args(u_int arg0, u_int source_len)
1997{
1998 #ifndef HAVE_ARMV7
1999 emit_loadlp((int)source, 1);
2000 emit_loadlp((int)copy, 2);
2001 emit_loadlp(source_len, 3);
2002 #else
2003 emit_movw(((u_int)source)&0x0000FFFF, 1);
2004 emit_movw(((u_int)copy)&0x0000FFFF, 2);
2005 emit_movt(((u_int)source)&0xFFFF0000, 1);
2006 emit_movt(((u_int)copy)&0xFFFF0000, 2);
2007 emit_movw(source_len, 3);
2008 #endif
2009 emit_movimm(arg0, 0);
2010}
2011
2012static void *do_dirty_stub(int i, u_int source_len)
2013{
2014 assem_debug("do_dirty_stub %x\n",start+i*4);
2015 do_dirty_stub_emit_args(start + i*4, source_len);
2016 emit_far_call(verify_code);
2017 void *entry = out;
2018 load_regs_entry(i);
2019 if (entry == out)
2020 entry = instr_addr[i];
2021 emit_jmp(instr_addr[i]);
2022 return entry;
2023}
2024
2025static void do_dirty_stub_ds(u_int source_len)
2026{
2027 do_dirty_stub_emit_args(start + 1, source_len);
2028 emit_far_call(verify_code_ds);
2029}
2030
2031/* Special assem */
2032
2033static void c2op_prologue(u_int op, int i, const struct regstat *i_regs, u_int reglist)
2034{
2035 save_regs_all(reglist);
2036 cop2_do_stall_check(op, i, i_regs, 0);
2037#ifdef PCNT
2038 emit_movimm(op, 0);
2039 emit_far_call(pcnt_gte_start);
2040#endif
2041 emit_addimm(FP, (u_char *)&psxRegs.CP2D.r[0] - (u_char *)&dynarec_local, 0); // cop2 regs
2042}
2043
2044static void c2op_epilogue(u_int op,u_int reglist)
2045{
2046#ifdef PCNT
2047 emit_movimm(op,0);
2048 emit_far_call(pcnt_gte_end);
2049#endif
2050 restore_regs_all(reglist);
2051}
2052
2053static void c2op_call_MACtoIR(int lm,int need_flags)
2054{
2055 if(need_flags)
2056 emit_far_call(lm?gteMACtoIR_lm1:gteMACtoIR_lm0);
2057 else
2058 emit_far_call(lm?gteMACtoIR_lm1_nf:gteMACtoIR_lm0_nf);
2059}
2060
2061static void c2op_call_rgb_func(void *func,int lm,int need_ir,int need_flags)
2062{
2063 emit_far_call(func);
2064 // func is C code and trashes r0
2065 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
2066 if(need_flags||need_ir)
2067 c2op_call_MACtoIR(lm,need_flags);
2068 emit_far_call(need_flags?gteMACtoRGB:gteMACtoRGB_nf);
2069}
2070
2071static void c2op_assemble(int i, const struct regstat *i_regs)
2072{
2073 u_int c2op = source[i] & 0x3f;
2074 u_int reglist_full = get_host_reglist(i_regs->regmap);
2075 u_int reglist = reglist_full & CALLER_SAVE_REGS;
2076 int need_flags, need_ir;
2077
2078 if (gte_handlers[c2op]!=NULL) {
2079 need_flags=!(gte_unneeded[i+1]>>63); // +1 because of how liveness detection works
2080 need_ir=(gte_unneeded[i+1]&0xe00)!=0xe00;
2081 assem_debug("gte op %08x, unneeded %016llx, need_flags %d, need_ir %d\n",
2082 source[i],gte_unneeded[i+1],need_flags,need_ir);
2083 if(HACK_ENABLED(NDHACK_GTE_NO_FLAGS))
2084 need_flags=0;
2085 int shift = (source[i] >> 19) & 1;
2086 int lm = (source[i] >> 10) & 1;
2087 switch(c2op) {
2088#ifndef DRC_DBG
2089 case GTE_MVMVA: {
2090#ifdef HAVE_ARMV5
2091 int v = (source[i] >> 15) & 3;
2092 int cv = (source[i] >> 13) & 3;
2093 int mx = (source[i] >> 17) & 3;
2094 reglist=reglist_full&(CALLER_SAVE_REGS|0xf0); // +{r4-r7}
2095 c2op_prologue(c2op,i,i_regs,reglist);
2096 /* r4,r5 = VXYZ(v) packed; r6 = &MX11(mx); r7 = &CV1(cv) */
2097 if(v<3)
2098 emit_ldrd(v*8,0,4);
2099 else {
2100 emit_movzwl_indexed(9*4,0,4); // gteIR
2101 emit_movzwl_indexed(10*4,0,6);
2102 emit_movzwl_indexed(11*4,0,5);
2103 emit_orrshl_imm(6,16,4);
2104 }
2105 if(mx<3)
2106 emit_addimm(0,32*4+mx*8*4,6);
2107 else
2108 emit_readword(&zeromem_ptr,6);
2109 if(cv<3)
2110 emit_addimm(0,32*4+(cv*8+5)*4,7);
2111 else
2112 emit_readword(&zeromem_ptr,7);
2113#ifdef __ARM_NEON__
2114 emit_movimm(source[i],1); // opcode
2115 emit_far_call(gteMVMVA_part_neon);
2116 if(need_flags) {
2117 emit_movimm(lm,1);
2118 emit_far_call(gteMACtoIR_flags_neon);
2119 }
2120#else
2121 if(cv==3&&shift)
2122 emit_far_call((int)gteMVMVA_part_cv3sh12_arm);
2123 else {
2124 emit_movimm(shift,1);
2125 emit_far_call((int)(need_flags?gteMVMVA_part_arm:gteMVMVA_part_nf_arm));
2126 }
2127 if(need_flags||need_ir)
2128 c2op_call_MACtoIR(lm,need_flags);
2129#endif
2130#else /* if not HAVE_ARMV5 */
2131 c2op_prologue(c2op,i,i_regs,reglist);
2132 emit_movimm(source[i],1); // opcode
2133 emit_writeword(1,&psxRegs.code);
2134 emit_far_call(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]);
2135#endif
2136 break;
2137 }
2138 case GTE_OP:
2139 c2op_prologue(c2op,i,i_regs,reglist);
2140 emit_far_call(shift?gteOP_part_shift:gteOP_part_noshift);
2141 if(need_flags||need_ir) {
2142 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
2143 c2op_call_MACtoIR(lm,need_flags);
2144 }
2145 break;
2146 case GTE_DPCS:
2147 c2op_prologue(c2op,i,i_regs,reglist);
2148 c2op_call_rgb_func(shift?gteDPCS_part_shift:gteDPCS_part_noshift,lm,need_ir,need_flags);
2149 break;
2150 case GTE_INTPL:
2151 c2op_prologue(c2op,i,i_regs,reglist);
2152 c2op_call_rgb_func(shift?gteINTPL_part_shift:gteINTPL_part_noshift,lm,need_ir,need_flags);
2153 break;
2154 case GTE_SQR:
2155 c2op_prologue(c2op,i,i_regs,reglist);
2156 emit_far_call(shift?gteSQR_part_shift:gteSQR_part_noshift);
2157 if(need_flags||need_ir) {
2158 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
2159 c2op_call_MACtoIR(lm,need_flags);
2160 }
2161 break;
2162 case GTE_DCPL:
2163 c2op_prologue(c2op,i,i_regs,reglist);
2164 c2op_call_rgb_func(gteDCPL_part,lm,need_ir,need_flags);
2165 break;
2166 case GTE_GPF:
2167 c2op_prologue(c2op,i,i_regs,reglist);
2168 c2op_call_rgb_func(shift?gteGPF_part_shift:gteGPF_part_noshift,lm,need_ir,need_flags);
2169 break;
2170 case GTE_GPL:
2171 c2op_prologue(c2op,i,i_regs,reglist);
2172 c2op_call_rgb_func(shift?gteGPL_part_shift:gteGPL_part_noshift,lm,need_ir,need_flags);
2173 break;
2174#endif
2175 default:
2176 c2op_prologue(c2op,i,i_regs,reglist);
2177#ifdef DRC_DBG
2178 emit_movimm(source[i],1); // opcode
2179 emit_writeword(1,&psxRegs.code);
2180#endif
2181 emit_far_call(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]);
2182 break;
2183 }
2184 c2op_epilogue(c2op,reglist);
2185 }
2186}
2187
2188static void c2op_ctc2_31_assemble(signed char sl, signed char temp)
2189{
2190 //value = value & 0x7ffff000;
2191 //if (value & 0x7f87e000) value |= 0x80000000;
2192 emit_shrimm(sl,12,temp);
2193 emit_shlimm(temp,12,temp);
2194 emit_testimm(temp,0x7f000000);
2195 emit_testeqimm(temp,0x00870000);
2196 emit_testeqimm(temp,0x0000e000);
2197 emit_orrne_imm(temp,0x80000000,temp);
2198}
2199
2200static void do_mfc2_31_one(u_int copr,signed char temp)
2201{
2202 emit_readword(&reg_cop2d[copr],temp);
2203 emit_lsls_imm(temp,16,temp);
2204 emit_cmovs_imm(0,temp);
2205 emit_cmpimm(temp,0xf80<<16);
2206 emit_andimm(temp,0xf80<<16,temp);
2207 emit_cmovae_imm(0xf80<<16,temp);
2208}
2209
2210static void c2op_mfc2_29_assemble(signed char tl, signed char temp)
2211{
2212 if (temp < 0) {
2213 host_tempreg_acquire();
2214 temp = HOST_TEMPREG;
2215 }
2216 do_mfc2_31_one(9,temp);
2217 emit_shrimm(temp,7+16,tl);
2218 do_mfc2_31_one(10,temp);
2219 emit_orrshr_imm(temp,2+16,tl);
2220 do_mfc2_31_one(11,temp);
2221 emit_orrshr_imm(temp,-3+16,tl);
2222 emit_writeword(tl,&reg_cop2d[29]);
2223 if (temp == HOST_TEMPREG)
2224 host_tempreg_release();
2225}
2226
2227static void multdiv_assemble_arm(int i, const struct regstat *i_regs)
2228{
2229 // case 0x18: MULT
2230 // case 0x19: MULTU
2231 // case 0x1A: DIV
2232 // case 0x1B: DIVU
2233 // case 0x1C: DMULT
2234 // case 0x1D: DMULTU
2235 // case 0x1E: DDIV
2236 // case 0x1F: DDIVU
2237 if(dops[i].rs1&&dops[i].rs2)
2238 {
2239 if((dops[i].opcode2&4)==0) // 32-bit
2240 {
2241 if(dops[i].opcode2==0x18) // MULT
2242 {
2243 signed char m1=get_reg(i_regs->regmap,dops[i].rs1);
2244 signed char m2=get_reg(i_regs->regmap,dops[i].rs2);
2245 signed char hi=get_reg(i_regs->regmap,HIREG);
2246 signed char lo=get_reg(i_regs->regmap,LOREG);
2247 assert(m1>=0);
2248 assert(m2>=0);
2249 assert(hi>=0);
2250 assert(lo>=0);
2251 emit_smull(m1,m2,hi,lo);
2252 }
2253 if(dops[i].opcode2==0x19) // MULTU
2254 {
2255 signed char m1=get_reg(i_regs->regmap,dops[i].rs1);
2256 signed char m2=get_reg(i_regs->regmap,dops[i].rs2);
2257 signed char hi=get_reg(i_regs->regmap,HIREG);
2258 signed char lo=get_reg(i_regs->regmap,LOREG);
2259 assert(m1>=0);
2260 assert(m2>=0);
2261 assert(hi>=0);
2262 assert(lo>=0);
2263 emit_umull(m1,m2,hi,lo);
2264 }
2265 if(dops[i].opcode2==0x1A) // DIV
2266 {
2267 signed char d1=get_reg(i_regs->regmap,dops[i].rs1);
2268 signed char d2=get_reg(i_regs->regmap,dops[i].rs2);
2269 assert(d1>=0);
2270 assert(d2>=0);
2271 signed char quotient=get_reg(i_regs->regmap,LOREG);
2272 signed char remainder=get_reg(i_regs->regmap,HIREG);
2273 assert(quotient>=0);
2274 assert(remainder>=0);
2275 emit_movs(d1,remainder);
2276 emit_movimm(0xffffffff,quotient);
2277 emit_negmi(quotient,quotient); // .. quotient and ..
2278 emit_negmi(remainder,remainder); // .. remainder for div0 case (will be negated back after jump)
2279 emit_movs(d2,HOST_TEMPREG);
2280 emit_jeq(out+52); // Division by zero
2281 emit_negsmi(HOST_TEMPREG,HOST_TEMPREG);
2282#ifdef HAVE_ARMV5
2283 emit_clz(HOST_TEMPREG,quotient);
2284 emit_shl(HOST_TEMPREG,quotient,HOST_TEMPREG);
2285#else
2286 emit_movimm(0,quotient);
2287 emit_addpl_imm(quotient,1,quotient);
2288 emit_lslpls_imm(HOST_TEMPREG,1,HOST_TEMPREG);
2289 emit_jns(out-2*4);
2290#endif
2291 emit_orimm(quotient,1<<31,quotient);
2292 emit_shr(quotient,quotient,quotient);
2293 emit_cmp(remainder,HOST_TEMPREG);
2294 emit_subcs(remainder,HOST_TEMPREG,remainder);
2295 emit_adcs(quotient,quotient,quotient);
2296 emit_shrimm(HOST_TEMPREG,1,HOST_TEMPREG);
2297 emit_jcc(out-16); // -4
2298 emit_teq(d1,d2);
2299 emit_negmi(quotient,quotient);
2300 emit_test(d1,d1);
2301 emit_negmi(remainder,remainder);
2302 }
2303 if(dops[i].opcode2==0x1B) // DIVU
2304 {
2305 signed char d1=get_reg(i_regs->regmap,dops[i].rs1); // dividend
2306 signed char d2=get_reg(i_regs->regmap,dops[i].rs2); // divisor
2307 assert(d1>=0);
2308 assert(d2>=0);
2309 signed char quotient=get_reg(i_regs->regmap,LOREG);
2310 signed char remainder=get_reg(i_regs->regmap,HIREG);
2311 assert(quotient>=0);
2312 assert(remainder>=0);
2313 emit_mov(d1,remainder);
2314 emit_movimm(0xffffffff,quotient); // div0 case
2315 emit_test(d2,d2);
2316 emit_jeq(out+40); // Division by zero
2317#ifdef HAVE_ARMV5
2318 emit_clz(d2,HOST_TEMPREG);
2319 emit_movimm(1<<31,quotient);
2320 emit_shl(d2,HOST_TEMPREG,d2);
2321#else
2322 emit_movimm(0,HOST_TEMPREG);
2323 emit_addpl_imm(HOST_TEMPREG,1,HOST_TEMPREG);
2324 emit_lslpls_imm(d2,1,d2);
2325 emit_jns(out-2*4);
2326 emit_movimm(1<<31,quotient);
2327#endif
2328 emit_shr(quotient,HOST_TEMPREG,quotient);
2329 emit_cmp(remainder,d2);
2330 emit_subcs(remainder,d2,remainder);
2331 emit_adcs(quotient,quotient,quotient);
2332 emit_shrcc_imm(d2,1,d2);
2333 emit_jcc(out-16); // -4
2334 }
2335 }
2336 else // 64-bit
2337 assert(0);
2338 }
2339 else
2340 {
2341 // Multiply by zero is zero.
2342 // MIPS does not have a divide by zero exception.
2343 // The result is undefined, we return zero.
2344 signed char hr=get_reg(i_regs->regmap,HIREG);
2345 signed char lr=get_reg(i_regs->regmap,LOREG);
2346 if(hr>=0) emit_zeroreg(hr);
2347 if(lr>=0) emit_zeroreg(lr);
2348 }
2349}
2350#define multdiv_assemble multdiv_assemble_arm
2351
2352static void do_jump_vaddr(int rs)
2353{
2354 emit_far_jump(jump_vaddr_reg[rs]);
2355}
2356
2357static void do_preload_rhash(int r) {
2358 // Don't need this for ARM. On x86, this puts the value 0xf8 into the
2359 // register. On ARM the hash can be done with a single instruction (below)
2360}
2361
2362static void do_preload_rhtbl(int ht) {
2363 emit_addimm(FP,(int)&mini_ht-(int)&dynarec_local,ht);
2364}
2365
2366static void do_rhash(int rs,int rh) {
2367 emit_andimm(rs,0xf8,rh);
2368}
2369
2370static void do_miniht_load(int ht,int rh) {
2371 assem_debug("ldr %s,[%s,%s]!\n",regname[rh],regname[ht],regname[rh]);
2372 output_w32(0xe7b00000|rd_rn_rm(rh,ht,rh));
2373}
2374
2375static void do_miniht_jump(int rs,int rh,int ht) {
2376 emit_cmp(rh,rs);
2377 emit_ldreq_indexed(ht,4,15);
2378 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
2379 if(rs!=7)
2380 emit_mov(rs,7);
2381 rs=7;
2382 #endif
2383 do_jump_vaddr(rs);
2384}
2385
2386static void do_miniht_insert(u_int return_address,int rt,int temp) {
2387 #ifndef HAVE_ARMV7
2388 emit_movimm(return_address,rt); // PC into link register
2389 add_to_linker(out,return_address,1);
2390 emit_pcreladdr(temp);
2391 emit_writeword(rt,&mini_ht[(return_address&0xFF)>>3][0]);
2392 emit_writeword(temp,&mini_ht[(return_address&0xFF)>>3][1]);
2393 #else
2394 emit_movw(return_address&0x0000FFFF,rt);
2395 add_to_linker(out,return_address,1);
2396 emit_pcreladdr(temp);
2397 emit_writeword(temp,&mini_ht[(return_address&0xFF)>>3][1]);
2398 emit_movt(return_address&0xFFFF0000,rt);
2399 emit_writeword(rt,&mini_ht[(return_address&0xFF)>>3][0]);
2400 #endif
2401}
2402
2403// CPU-architecture-specific initialization
2404static void arch_init(void)
2405{
2406 uintptr_t diff = (u_char *)&ndrc->tramp.f - (u_char *)&ndrc->tramp.ops - 8;
2407 struct tramp_insns *ops = ndrc->tramp.ops;
2408 size_t i;
2409 assert(!(diff & 3));
2410 assert(diff < 0x1000);
2411 start_tcache_write(ops, (u_char *)ops + sizeof(ndrc->tramp.ops));
2412 for (i = 0; i < ARRAY_SIZE(ndrc->tramp.ops); i++)
2413 ops[i].ldrpc = 0xe5900000 | rd_rn_rm(15,15,0) | diff; // ldr pc, [=val]
2414 end_tcache_write(ops, (u_char *)ops + sizeof(ndrc->tramp.ops));
2415}
2416
2417// vim:shiftwidth=2:expandtab