drc: prefer callee-saved regs on alloc
[pcsx_rearmed.git] / libpcsxcore / new_dynarec / assem_arm.c
... / ...
CommitLineData
1/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2 * Mupen64plus/PCSX - assem_arm.c *
3 * Copyright (C) 2009-2011 Ari64 *
4 * Copyright (C) 2010-2021 GraÅžvydas "notaz" Ignotas *
5 * *
6 * This program is free software; you can redistribute it and/or modify *
7 * it under the terms of the GNU General Public License as published by *
8 * the Free Software Foundation; either version 2 of the License, or *
9 * (at your option) any later version. *
10 * *
11 * This program is distributed in the hope that it will be useful, *
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
14 * GNU General Public License for more details. *
15 * *
16 * You should have received a copy of the GNU General Public License *
17 * along with this program; if not, write to the *
18 * Free Software Foundation, Inc., *
19 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
20 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
21
22#define FLAGLESS
23#include "../gte.h"
24#undef FLAGLESS
25#include "../gte_arm.h"
26#include "../gte_neon.h"
27#include "pcnt.h"
28#include "arm_features.h"
29
30#define unused __attribute__((unused))
31
32#ifdef DRC_DBG
33#pragma GCC diagnostic ignored "-Wunused-function"
34#pragma GCC diagnostic ignored "-Wunused-variable"
35#pragma GCC diagnostic ignored "-Wunused-but-set-variable"
36#endif
37
38void indirect_jump_indexed();
39void indirect_jump();
40void do_interrupt();
41void jump_vaddr_r0();
42void jump_vaddr_r1();
43void jump_vaddr_r2();
44void jump_vaddr_r3();
45void jump_vaddr_r4();
46void jump_vaddr_r5();
47void jump_vaddr_r6();
48void jump_vaddr_r7();
49void jump_vaddr_r8();
50void jump_vaddr_r9();
51void jump_vaddr_r10();
52void jump_vaddr_r12();
53
54void * const jump_vaddr_reg[16] = {
55 jump_vaddr_r0,
56 jump_vaddr_r1,
57 jump_vaddr_r2,
58 jump_vaddr_r3,
59 jump_vaddr_r4,
60 jump_vaddr_r5,
61 jump_vaddr_r6,
62 jump_vaddr_r7,
63 jump_vaddr_r8,
64 jump_vaddr_r9,
65 jump_vaddr_r10,
66 0,
67 jump_vaddr_r12,
68 0,
69 0,
70 0
71};
72
73void invalidate_addr_r0();
74void invalidate_addr_r1();
75void invalidate_addr_r2();
76void invalidate_addr_r3();
77void invalidate_addr_r4();
78void invalidate_addr_r5();
79void invalidate_addr_r6();
80void invalidate_addr_r7();
81void invalidate_addr_r8();
82void invalidate_addr_r9();
83void invalidate_addr_r10();
84void invalidate_addr_r12();
85
86const u_int invalidate_addr_reg[16] = {
87 (int)invalidate_addr_r0,
88 (int)invalidate_addr_r1,
89 (int)invalidate_addr_r2,
90 (int)invalidate_addr_r3,
91 (int)invalidate_addr_r4,
92 (int)invalidate_addr_r5,
93 (int)invalidate_addr_r6,
94 (int)invalidate_addr_r7,
95 (int)invalidate_addr_r8,
96 (int)invalidate_addr_r9,
97 (int)invalidate_addr_r10,
98 0,
99 (int)invalidate_addr_r12,
100 0,
101 0,
102 0};
103
104static u_int needs_clear_cache[1<<(TARGET_SIZE_2-17)];
105
106/* Linker */
107
108static void set_jump_target(void *addr, void *target_)
109{
110 u_int target = (u_int)target_;
111 u_char *ptr = addr;
112 u_int *ptr2=(u_int *)ptr;
113 if(ptr[3]==0xe2) {
114 assert((target-(u_int)ptr2-8)<1024);
115 assert(((uintptr_t)addr&3)==0);
116 assert((target&3)==0);
117 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
118 //printf("target=%x addr=%p insn=%x\n",target,addr,*ptr2);
119 }
120 else if(ptr[3]==0x72) {
121 // generated by emit_jno_unlikely
122 if((target-(u_int)ptr2-8)<1024) {
123 assert(((uintptr_t)addr&3)==0);
124 assert((target&3)==0);
125 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
126 }
127 else if((target-(u_int)ptr2-8)<4096&&!((target-(u_int)ptr2-8)&15)) {
128 assert(((uintptr_t)addr&3)==0);
129 assert((target&3)==0);
130 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>4)|0xE00;
131 }
132 else *ptr2=(0x7A000000)|(((target-(u_int)ptr2-8)<<6)>>8);
133 }
134 else {
135 assert((ptr[3]&0x0e)==0xa);
136 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
137 }
138}
139
140// This optionally copies the instruction from the target of the branch into
141// the space before the branch. Works, but the difference in speed is
142// usually insignificant.
143#if 0
144static void set_jump_target_fillslot(int addr,u_int target,int copy)
145{
146 u_char *ptr=(u_char *)addr;
147 u_int *ptr2=(u_int *)ptr;
148 assert(!copy||ptr2[-1]==0xe28dd000);
149 if(ptr[3]==0xe2) {
150 assert(!copy);
151 assert((target-(u_int)ptr2-8)<4096);
152 *ptr2=(*ptr2&0xFFFFF000)|(target-(u_int)ptr2-8);
153 }
154 else {
155 assert((ptr[3]&0x0e)==0xa);
156 u_int target_insn=*(u_int *)target;
157 if((target_insn&0x0e100000)==0) { // ALU, no immediate, no flags
158 copy=0;
159 }
160 if((target_insn&0x0c100000)==0x04100000) { // Load
161 copy=0;
162 }
163 if(target_insn&0x08000000) {
164 copy=0;
165 }
166 if(copy) {
167 ptr2[-1]=target_insn;
168 target+=4;
169 }
170 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
171 }
172}
173#endif
174
175/* Literal pool */
176static void add_literal(int addr,int val)
177{
178 assert(literalcount<sizeof(literals)/sizeof(literals[0]));
179 literals[literalcount][0]=addr;
180 literals[literalcount][1]=val;
181 literalcount++;
182}
183
184// from a pointer to external jump stub (which was produced by emit_extjump2)
185// find where the jumping insn is
186static void *find_extjump_insn(void *stub)
187{
188 int *ptr=(int *)(stub+4);
189 assert((*ptr&0x0fff0000)==0x059f0000); // ldr rx, [pc, #ofs]
190 u_int offset=*ptr&0xfff;
191 void **l_ptr=(void *)ptr+offset+8;
192 return *l_ptr;
193}
194
195// find where external branch is liked to using addr of it's stub:
196// get address that insn one after stub loads (dyna_linker arg1),
197// treat it as a pointer to branch insn,
198// return addr where that branch jumps to
199static void *get_pointer(void *stub)
200{
201 //printf("get_pointer(%x)\n",(int)stub);
202 int *i_ptr=find_extjump_insn(stub);
203 assert((*i_ptr&0x0f000000)==0x0a000000); // b
204 return (u_char *)i_ptr+((*i_ptr<<8)>>6)+8;
205}
206
207// Find the "clean" entry point from a "dirty" entry point
208// by skipping past the call to verify_code
209static void *get_clean_addr(void *addr)
210{
211 signed int *ptr = addr;
212 #ifndef HAVE_ARMV7
213 ptr+=4;
214 #else
215 ptr+=6;
216 #endif
217 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
218 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
219 ptr++;
220 if((*ptr&0xFF000000)==0xea000000) {
221 return (char *)ptr+((*ptr<<8)>>6)+8; // follow jump
222 }
223 return ptr;
224}
225
226static int verify_dirty(const u_int *ptr)
227{
228 #ifndef HAVE_ARMV7
229 u_int offset;
230 // get from literal pool
231 assert((*ptr&0xFFFF0000)==0xe59f0000);
232 offset=*ptr&0xfff;
233 u_int source=*(u_int*)((void *)ptr+offset+8);
234 ptr++;
235 assert((*ptr&0xFFFF0000)==0xe59f0000);
236 offset=*ptr&0xfff;
237 u_int copy=*(u_int*)((void *)ptr+offset+8);
238 ptr++;
239 assert((*ptr&0xFFFF0000)==0xe59f0000);
240 offset=*ptr&0xfff;
241 u_int len=*(u_int*)((void *)ptr+offset+8);
242 ptr++;
243 ptr++;
244 #else
245 // ARMv7 movw/movt
246 assert((*ptr&0xFFF00000)==0xe3000000);
247 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
248 u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
249 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
250 ptr+=6;
251 #endif
252 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
253 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
254 //printf("verify_dirty: %x %x %x\n",source,copy,len);
255 return !memcmp((void *)source,(void *)copy,len);
256}
257
258// This doesn't necessarily find all clean entry points, just
259// guarantees that it's not dirty
260static int isclean(void *addr)
261{
262 #ifndef HAVE_ARMV7
263 u_int *ptr=((u_int *)addr)+4;
264 #else
265 u_int *ptr=((u_int *)addr)+6;
266 #endif
267 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
268 if((*ptr&0xFF000000)!=0xeb000000) return 1; // bl instruction
269 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code) return 0;
270 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_ds) return 0;
271 return 1;
272}
273
274// get source that block at addr was compiled from (host pointers)
275static void get_bounds(void *addr, u_char **start, u_char **end)
276{
277 u_int *ptr = addr;
278 #ifndef HAVE_ARMV7
279 u_int offset;
280 // get from literal pool
281 assert((*ptr&0xFFFF0000)==0xe59f0000);
282 offset=*ptr&0xfff;
283 u_int source=*(u_int*)((void *)ptr+offset+8);
284 ptr++;
285 //assert((*ptr&0xFFFF0000)==0xe59f0000);
286 //offset=*ptr&0xfff;
287 //u_int copy=*(u_int*)((void *)ptr+offset+8);
288 ptr++;
289 assert((*ptr&0xFFFF0000)==0xe59f0000);
290 offset=*ptr&0xfff;
291 u_int len=*(u_int*)((void *)ptr+offset+8);
292 ptr++;
293 ptr++;
294 #else
295 // ARMv7 movw/movt
296 assert((*ptr&0xFFF00000)==0xe3000000);
297 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
298 //u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
299 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
300 ptr+=6;
301 #endif
302 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
303 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
304 *start=(u_char *)source;
305 *end=(u_char *)source+len;
306}
307
308// Allocate a specific ARM register.
309static void alloc_arm_reg(struct regstat *cur,int i,signed char reg,int hr)
310{
311 int n;
312 int dirty=0;
313
314 // see if it's already allocated (and dealloc it)
315 for(n=0;n<HOST_REGS;n++)
316 {
317 if(n!=EXCLUDE_REG&&cur->regmap[n]==reg) {
318 dirty=(cur->dirty>>n)&1;
319 cur->regmap[n]=-1;
320 }
321 }
322
323 cur->regmap[hr]=reg;
324 cur->dirty&=~(1<<hr);
325 cur->dirty|=dirty<<hr;
326 cur->isconst&=~(1<<hr);
327}
328
329// Alloc cycle count into dedicated register
330static void alloc_cc(struct regstat *cur,int i)
331{
332 alloc_arm_reg(cur,i,CCREG,HOST_CCREG);
333}
334
335/* Assembler */
336
337static unused char regname[16][4] = {
338 "r0",
339 "r1",
340 "r2",
341 "r3",
342 "r4",
343 "r5",
344 "r6",
345 "r7",
346 "r8",
347 "r9",
348 "r10",
349 "fp",
350 "r12",
351 "sp",
352 "lr",
353 "pc"};
354
355static void output_w32(u_int word)
356{
357 *((u_int *)out)=word;
358 out+=4;
359}
360
361static u_int rd_rn_rm(u_int rd, u_int rn, u_int rm)
362{
363 assert(rd<16);
364 assert(rn<16);
365 assert(rm<16);
366 return((rn<<16)|(rd<<12)|rm);
367}
368
369static u_int rd_rn_imm_shift(u_int rd, u_int rn, u_int imm, u_int shift)
370{
371 assert(rd<16);
372 assert(rn<16);
373 assert(imm<256);
374 assert((shift&1)==0);
375 return((rn<<16)|(rd<<12)|(((32-shift)&30)<<7)|imm);
376}
377
378static u_int genimm(u_int imm,u_int *encoded)
379{
380 *encoded=0;
381 if(imm==0) return 1;
382 int i=32;
383 while(i>0)
384 {
385 if(imm<256) {
386 *encoded=((i&30)<<7)|imm;
387 return 1;
388 }
389 imm=(imm>>2)|(imm<<30);i-=2;
390 }
391 return 0;
392}
393
394static void genimm_checked(u_int imm,u_int *encoded)
395{
396 u_int ret=genimm(imm,encoded);
397 assert(ret);
398 (void)ret;
399}
400
401static u_int genjmp(u_int addr)
402{
403 if (addr < 3) return 0; // a branch that will be patched later
404 int offset = addr-(int)out-8;
405 if (offset < -33554432 || offset >= 33554432) {
406 SysPrintf("genjmp: out of range: %08x\n", offset);
407 abort();
408 return 0;
409 }
410 return ((u_int)offset>>2)&0xffffff;
411}
412
413static unused void emit_breakpoint(void)
414{
415 assem_debug("bkpt #0\n");
416 //output_w32(0xe1200070);
417 output_w32(0xe7f001f0);
418}
419
420static void emit_mov(int rs,int rt)
421{
422 assem_debug("mov %s,%s\n",regname[rt],regname[rs]);
423 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs));
424}
425
426static void emit_movs(int rs,int rt)
427{
428 assem_debug("movs %s,%s\n",regname[rt],regname[rs]);
429 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs));
430}
431
432static void emit_add(int rs1,int rs2,int rt)
433{
434 assem_debug("add %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
435 output_w32(0xe0800000|rd_rn_rm(rt,rs1,rs2));
436}
437
438static void emit_adds(int rs1,int rs2,int rt)
439{
440 assem_debug("adds %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
441 output_w32(0xe0900000|rd_rn_rm(rt,rs1,rs2));
442}
443#define emit_adds_ptr emit_adds
444
445static void emit_adcs(int rs1,int rs2,int rt)
446{
447 assem_debug("adcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
448 output_w32(0xe0b00000|rd_rn_rm(rt,rs1,rs2));
449}
450
451static void emit_neg(int rs, int rt)
452{
453 assem_debug("rsb %s,%s,#0\n",regname[rt],regname[rs]);
454 output_w32(0xe2600000|rd_rn_rm(rt,rs,0));
455}
456
457static void emit_sub(int rs1,int rs2,int rt)
458{
459 assem_debug("sub %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
460 output_w32(0xe0400000|rd_rn_rm(rt,rs1,rs2));
461}
462
463static void emit_zeroreg(int rt)
464{
465 assem_debug("mov %s,#0\n",regname[rt]);
466 output_w32(0xe3a00000|rd_rn_rm(rt,0,0));
467}
468
469static void emit_loadlp(u_int imm,u_int rt)
470{
471 add_literal((int)out,imm);
472 assem_debug("ldr %s,pc+? [=%x]\n",regname[rt],imm);
473 output_w32(0xe5900000|rd_rn_rm(rt,15,0));
474}
475
476static void emit_movw(u_int imm,u_int rt)
477{
478 assert(imm<65536);
479 assem_debug("movw %s,#%d (0x%x)\n",regname[rt],imm,imm);
480 output_w32(0xe3000000|rd_rn_rm(rt,0,0)|(imm&0xfff)|((imm<<4)&0xf0000));
481}
482
483static void emit_movt(u_int imm,u_int rt)
484{
485 assem_debug("movt %s,#%d (0x%x)\n",regname[rt],imm&0xffff0000,imm&0xffff0000);
486 output_w32(0xe3400000|rd_rn_rm(rt,0,0)|((imm>>16)&0xfff)|((imm>>12)&0xf0000));
487}
488
489static void emit_movimm(u_int imm,u_int rt)
490{
491 u_int armval;
492 if(genimm(imm,&armval)) {
493 assem_debug("mov %s,#%d\n",regname[rt],imm);
494 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
495 }else if(genimm(~imm,&armval)) {
496 assem_debug("mvn %s,#%d\n",regname[rt],imm);
497 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
498 }else if(imm<65536) {
499 #ifndef HAVE_ARMV7
500 assem_debug("mov %s,#%d\n",regname[rt],imm&0xFF00);
501 output_w32(0xe3a00000|rd_rn_imm_shift(rt,0,imm>>8,8));
502 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
503 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
504 #else
505 emit_movw(imm,rt);
506 #endif
507 }else{
508 #ifndef HAVE_ARMV7
509 emit_loadlp(imm,rt);
510 #else
511 emit_movw(imm&0x0000FFFF,rt);
512 emit_movt(imm&0xFFFF0000,rt);
513 #endif
514 }
515}
516
517static void emit_pcreladdr(u_int rt)
518{
519 assem_debug("add %s,pc,#?\n",regname[rt]);
520 output_w32(0xe2800000|rd_rn_rm(rt,15,0));
521}
522
523static void emit_loadreg(int r, int hr)
524{
525 if(r&64) {
526 SysPrintf("64bit load in 32bit mode!\n");
527 assert(0);
528 return;
529 }
530 if((r&63)==0)
531 emit_zeroreg(hr);
532 else {
533 int addr = (int)&psxRegs.GPR.r[r];
534 switch (r) {
535 //case HIREG: addr = &hi; break;
536 //case LOREG: addr = &lo; break;
537 case CCREG: addr = (int)&cycle_count; break;
538 case CSREG: addr = (int)&Status; break;
539 case INVCP: addr = (int)&invc_ptr; break;
540 case ROREG: addr = (int)&ram_offset; break;
541 default: assert(r < 34); break;
542 }
543 u_int offset = addr-(u_int)&dynarec_local;
544 assert(offset<4096);
545 assem_debug("ldr %s,fp+%d\n",regname[hr],offset);
546 output_w32(0xe5900000|rd_rn_rm(hr,FP,0)|offset);
547 }
548}
549
550static void emit_storereg(int r, int hr)
551{
552 if(r&64) {
553 SysPrintf("64bit store in 32bit mode!\n");
554 assert(0);
555 return;
556 }
557 int addr = (int)&psxRegs.GPR.r[r];
558 switch (r) {
559 //case HIREG: addr = &hi; break;
560 //case LOREG: addr = &lo; break;
561 case CCREG: addr = (int)&cycle_count; break;
562 default: assert(r < 34); break;
563 }
564 u_int offset = addr-(u_int)&dynarec_local;
565 assert(offset<4096);
566 assem_debug("str %s,fp+%d\n",regname[hr],offset);
567 output_w32(0xe5800000|rd_rn_rm(hr,FP,0)|offset);
568}
569
570static void emit_test(int rs, int rt)
571{
572 assem_debug("tst %s,%s\n",regname[rs],regname[rt]);
573 output_w32(0xe1100000|rd_rn_rm(0,rs,rt));
574}
575
576static void emit_testimm(int rs,int imm)
577{
578 u_int armval;
579 assem_debug("tst %s,#%d\n",regname[rs],imm);
580 genimm_checked(imm,&armval);
581 output_w32(0xe3100000|rd_rn_rm(0,rs,0)|armval);
582}
583
584static void emit_testeqimm(int rs,int imm)
585{
586 u_int armval;
587 assem_debug("tsteq %s,$%d\n",regname[rs],imm);
588 genimm_checked(imm,&armval);
589 output_w32(0x03100000|rd_rn_rm(0,rs,0)|armval);
590}
591
592static void emit_not(int rs,int rt)
593{
594 assem_debug("mvn %s,%s\n",regname[rt],regname[rs]);
595 output_w32(0xe1e00000|rd_rn_rm(rt,0,rs));
596}
597
598static void emit_and(u_int rs1,u_int rs2,u_int rt)
599{
600 assem_debug("and %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
601 output_w32(0xe0000000|rd_rn_rm(rt,rs1,rs2));
602}
603
604static void emit_or(u_int rs1,u_int rs2,u_int rt)
605{
606 assem_debug("orr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
607 output_w32(0xe1800000|rd_rn_rm(rt,rs1,rs2));
608}
609
610static void emit_orrshl_imm(u_int rs,u_int imm,u_int rt)
611{
612 assert(rs<16);
613 assert(rt<16);
614 assert(imm<32);
615 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs],imm);
616 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|(imm<<7));
617}
618
619static void emit_orrshr_imm(u_int rs,u_int imm,u_int rt)
620{
621 assert(rs<16);
622 assert(rt<16);
623 assert(imm<32);
624 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs],imm);
625 output_w32(0xe1800020|rd_rn_rm(rt,rt,rs)|(imm<<7));
626}
627
628static void emit_xor(u_int rs1,u_int rs2,u_int rt)
629{
630 assem_debug("eor %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
631 output_w32(0xe0200000|rd_rn_rm(rt,rs1,rs2));
632}
633
634static void emit_xorsar_imm(u_int rs1,u_int rs2,u_int imm,u_int rt)
635{
636 assem_debug("eor %s,%s,%s,asr #%d\n",regname[rt],regname[rs1],regname[rs2],imm);
637 output_w32(0xe0200040|rd_rn_rm(rt,rs1,rs2)|(imm<<7));
638}
639
640static void emit_addimm(u_int rs,int imm,u_int rt)
641{
642 assert(rs<16);
643 assert(rt<16);
644 if(imm!=0) {
645 u_int armval;
646 if(genimm(imm,&armval)) {
647 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],imm);
648 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
649 }else if(genimm(-imm,&armval)) {
650 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],-imm);
651 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
652 #ifdef HAVE_ARMV7
653 }else if(rt!=rs&&(u_int)imm<65536) {
654 emit_movw(imm&0x0000ffff,rt);
655 emit_add(rs,rt,rt);
656 }else if(rt!=rs&&(u_int)-imm<65536) {
657 emit_movw(-imm&0x0000ffff,rt);
658 emit_sub(rs,rt,rt);
659 #endif
660 }else if((u_int)-imm<65536) {
661 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],(-imm)&0xFF00);
662 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
663 output_w32(0xe2400000|rd_rn_imm_shift(rt,rs,(-imm)>>8,8));
664 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
665 }else {
666 do {
667 int shift = (ffs(imm) - 1) & ~1;
668 int imm8 = imm & (0xff << shift);
669 genimm_checked(imm8,&armval);
670 assem_debug("add %s,%s,#0x%x\n",regname[rt],regname[rs],imm8);
671 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
672 rs = rt;
673 imm &= ~imm8;
674 }
675 while (imm != 0);
676 }
677 }
678 else if(rs!=rt) emit_mov(rs,rt);
679}
680
681static void emit_addimm_and_set_flags(int imm,int rt)
682{
683 assert(imm>-65536&&imm<65536);
684 u_int armval;
685 if(genimm(imm,&armval)) {
686 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm);
687 output_w32(0xe2900000|rd_rn_rm(rt,rt,0)|armval);
688 }else if(genimm(-imm,&armval)) {
689 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],imm);
690 output_w32(0xe2500000|rd_rn_rm(rt,rt,0)|armval);
691 }else if(imm<0) {
692 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF00);
693 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
694 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)>>8,8));
695 output_w32(0xe2500000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
696 }else{
697 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF00);
698 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
699 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm>>8,8));
700 output_w32(0xe2900000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
701 }
702}
703
704static void emit_addnop(u_int r)
705{
706 assert(r<16);
707 assem_debug("add %s,%s,#0 (nop)\n",regname[r],regname[r]);
708 output_w32(0xe2800000|rd_rn_rm(r,r,0));
709}
710
711static void emit_andimm(int rs,int imm,int rt)
712{
713 u_int armval;
714 if(imm==0) {
715 emit_zeroreg(rt);
716 }else if(genimm(imm,&armval)) {
717 assem_debug("and %s,%s,#%d\n",regname[rt],regname[rs],imm);
718 output_w32(0xe2000000|rd_rn_rm(rt,rs,0)|armval);
719 }else if(genimm(~imm,&armval)) {
720 assem_debug("bic %s,%s,#%d\n",regname[rt],regname[rs],imm);
721 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|armval);
722 }else if(imm==65535) {
723 #ifndef HAVE_ARMV6
724 assem_debug("bic %s,%s,#FF000000\n",regname[rt],regname[rs]);
725 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|0x4FF);
726 assem_debug("bic %s,%s,#00FF0000\n",regname[rt],regname[rt]);
727 output_w32(0xe3c00000|rd_rn_rm(rt,rt,0)|0x8FF);
728 #else
729 assem_debug("uxth %s,%s\n",regname[rt],regname[rs]);
730 output_w32(0xe6ff0070|rd_rn_rm(rt,0,rs));
731 #endif
732 }else{
733 assert(imm>0&&imm<65535);
734 #ifndef HAVE_ARMV7
735 assem_debug("mov r14,#%d\n",imm&0xFF00);
736 output_w32(0xe3a00000|rd_rn_imm_shift(HOST_TEMPREG,0,imm>>8,8));
737 assem_debug("add r14,r14,#%d\n",imm&0xFF);
738 output_w32(0xe2800000|rd_rn_imm_shift(HOST_TEMPREG,HOST_TEMPREG,imm&0xff,0));
739 #else
740 emit_movw(imm,HOST_TEMPREG);
741 #endif
742 assem_debug("and %s,%s,r14\n",regname[rt],regname[rs]);
743 output_w32(0xe0000000|rd_rn_rm(rt,rs,HOST_TEMPREG));
744 }
745}
746
747static void emit_orimm(int rs,int imm,int rt)
748{
749 u_int armval;
750 if(imm==0) {
751 if(rs!=rt) emit_mov(rs,rt);
752 }else if(genimm(imm,&armval)) {
753 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm);
754 output_w32(0xe3800000|rd_rn_rm(rt,rs,0)|armval);
755 }else{
756 assert(imm>0&&imm<65536);
757 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
758 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
759 output_w32(0xe3800000|rd_rn_imm_shift(rt,rs,imm>>8,8));
760 output_w32(0xe3800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
761 }
762}
763
764static void emit_xorimm(int rs,int imm,int rt)
765{
766 u_int armval;
767 if(imm==0) {
768 if(rs!=rt) emit_mov(rs,rt);
769 }else if(genimm(imm,&armval)) {
770 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm);
771 output_w32(0xe2200000|rd_rn_rm(rt,rs,0)|armval);
772 }else{
773 assert(imm>0&&imm<65536);
774 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
775 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
776 output_w32(0xe2200000|rd_rn_imm_shift(rt,rs,imm>>8,8));
777 output_w32(0xe2200000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
778 }
779}
780
781static void emit_shlimm(int rs,u_int imm,int rt)
782{
783 assert(imm>0);
784 assert(imm<32);
785 //if(imm==1) ...
786 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
787 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
788}
789
790static void emit_lsls_imm(int rs,int imm,int rt)
791{
792 assert(imm>0);
793 assert(imm<32);
794 assem_debug("lsls %s,%s,#%d\n",regname[rt],regname[rs],imm);
795 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs)|(imm<<7));
796}
797
798static unused void emit_lslpls_imm(int rs,int imm,int rt)
799{
800 assert(imm>0);
801 assert(imm<32);
802 assem_debug("lslpls %s,%s,#%d\n",regname[rt],regname[rs],imm);
803 output_w32(0x51b00000|rd_rn_rm(rt,0,rs)|(imm<<7));
804}
805
806static void emit_shrimm(int rs,u_int imm,int rt)
807{
808 assert(imm>0);
809 assert(imm<32);
810 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
811 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
812}
813
814static void emit_sarimm(int rs,u_int imm,int rt)
815{
816 assert(imm>0);
817 assert(imm<32);
818 assem_debug("asr %s,%s,#%d\n",regname[rt],regname[rs],imm);
819 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x40|(imm<<7));
820}
821
822static void emit_rorimm(int rs,u_int imm,int rt)
823{
824 assert(imm>0);
825 assert(imm<32);
826 assem_debug("ror %s,%s,#%d\n",regname[rt],regname[rs],imm);
827 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x60|(imm<<7));
828}
829
830static void emit_signextend16(int rs,int rt)
831{
832 #ifndef HAVE_ARMV6
833 emit_shlimm(rs,16,rt);
834 emit_sarimm(rt,16,rt);
835 #else
836 assem_debug("sxth %s,%s\n",regname[rt],regname[rs]);
837 output_w32(0xe6bf0070|rd_rn_rm(rt,0,rs));
838 #endif
839}
840
841static void emit_signextend8(int rs,int rt)
842{
843 #ifndef HAVE_ARMV6
844 emit_shlimm(rs,24,rt);
845 emit_sarimm(rt,24,rt);
846 #else
847 assem_debug("sxtb %s,%s\n",regname[rt],regname[rs]);
848 output_w32(0xe6af0070|rd_rn_rm(rt,0,rs));
849 #endif
850}
851
852static void emit_shl(u_int rs,u_int shift,u_int rt)
853{
854 assert(rs<16);
855 assert(rt<16);
856 assert(shift<16);
857 //if(imm==1) ...
858 assem_debug("lsl %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
859 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x10|(shift<<8));
860}
861
862static void emit_shr(u_int rs,u_int shift,u_int rt)
863{
864 assert(rs<16);
865 assert(rt<16);
866 assert(shift<16);
867 assem_debug("lsr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
868 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x30|(shift<<8));
869}
870
871static void emit_sar(u_int rs,u_int shift,u_int rt)
872{
873 assert(rs<16);
874 assert(rt<16);
875 assert(shift<16);
876 assem_debug("asr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
877 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x50|(shift<<8));
878}
879
880static unused void emit_orrshl(u_int rs,u_int shift,u_int rt)
881{
882 assert(rs<16);
883 assert(rt<16);
884 assert(shift<16);
885 assem_debug("orr %s,%s,%s,lsl %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
886 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x10|(shift<<8));
887}
888
889static unused void emit_orrshr(u_int rs,u_int shift,u_int rt)
890{
891 assert(rs<16);
892 assert(rt<16);
893 assert(shift<16);
894 assem_debug("orr %s,%s,%s,lsr %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
895 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x30|(shift<<8));
896}
897
898static void emit_cmpimm(int rs,int imm)
899{
900 u_int armval;
901 if(genimm(imm,&armval)) {
902 assem_debug("cmp %s,#%d\n",regname[rs],imm);
903 output_w32(0xe3500000|rd_rn_rm(0,rs,0)|armval);
904 }else if(genimm(-imm,&armval)) {
905 assem_debug("cmn %s,#%d\n",regname[rs],imm);
906 output_w32(0xe3700000|rd_rn_rm(0,rs,0)|armval);
907 }else if(imm>0) {
908 assert(imm<65536);
909 emit_movimm(imm,HOST_TEMPREG);
910 assem_debug("cmp %s,r14\n",regname[rs]);
911 output_w32(0xe1500000|rd_rn_rm(0,rs,HOST_TEMPREG));
912 }else{
913 assert(imm>-65536);
914 emit_movimm(-imm,HOST_TEMPREG);
915 assem_debug("cmn %s,r14\n",regname[rs]);
916 output_w32(0xe1700000|rd_rn_rm(0,rs,HOST_TEMPREG));
917 }
918}
919
920static void emit_cmovne_imm(int imm,int rt)
921{
922 assem_debug("movne %s,#%d\n",regname[rt],imm);
923 u_int armval;
924 genimm_checked(imm,&armval);
925 output_w32(0x13a00000|rd_rn_rm(rt,0,0)|armval);
926}
927
928static void emit_cmovl_imm(int imm,int rt)
929{
930 assem_debug("movlt %s,#%d\n",regname[rt],imm);
931 u_int armval;
932 genimm_checked(imm,&armval);
933 output_w32(0xb3a00000|rd_rn_rm(rt,0,0)|armval);
934}
935
936static void emit_cmovb_imm(int imm,int rt)
937{
938 assem_debug("movcc %s,#%d\n",regname[rt],imm);
939 u_int armval;
940 genimm_checked(imm,&armval);
941 output_w32(0x33a00000|rd_rn_rm(rt,0,0)|armval);
942}
943
944static void emit_cmovae_imm(int imm,int rt)
945{
946 assem_debug("movcs %s,#%d\n",regname[rt],imm);
947 u_int armval;
948 genimm_checked(imm,&armval);
949 output_w32(0x23a00000|rd_rn_rm(rt,0,0)|armval);
950}
951
952static void emit_cmovne_reg(int rs,int rt)
953{
954 assem_debug("movne %s,%s\n",regname[rt],regname[rs]);
955 output_w32(0x11a00000|rd_rn_rm(rt,0,rs));
956}
957
958static void emit_cmovl_reg(int rs,int rt)
959{
960 assem_debug("movlt %s,%s\n",regname[rt],regname[rs]);
961 output_w32(0xb1a00000|rd_rn_rm(rt,0,rs));
962}
963
964static void emit_cmovb_reg(int rs,int rt)
965{
966 assem_debug("movcc %s,%s\n",regname[rt],regname[rs]);
967 output_w32(0x31a00000|rd_rn_rm(rt,0,rs));
968}
969
970static void emit_cmovs_reg(int rs,int rt)
971{
972 assem_debug("movmi %s,%s\n",regname[rt],regname[rs]);
973 output_w32(0x41a00000|rd_rn_rm(rt,0,rs));
974}
975
976static void emit_slti32(int rs,int imm,int rt)
977{
978 if(rs!=rt) emit_zeroreg(rt);
979 emit_cmpimm(rs,imm);
980 if(rs==rt) emit_movimm(0,rt);
981 emit_cmovl_imm(1,rt);
982}
983
984static void emit_sltiu32(int rs,int imm,int rt)
985{
986 if(rs!=rt) emit_zeroreg(rt);
987 emit_cmpimm(rs,imm);
988 if(rs==rt) emit_movimm(0,rt);
989 emit_cmovb_imm(1,rt);
990}
991
992static void emit_cmp(int rs,int rt)
993{
994 assem_debug("cmp %s,%s\n",regname[rs],regname[rt]);
995 output_w32(0xe1500000|rd_rn_rm(0,rs,rt));
996}
997
998static void emit_set_gz32(int rs, int rt)
999{
1000 //assem_debug("set_gz32\n");
1001 emit_cmpimm(rs,1);
1002 emit_movimm(1,rt);
1003 emit_cmovl_imm(0,rt);
1004}
1005
1006static void emit_set_nz32(int rs, int rt)
1007{
1008 //assem_debug("set_nz32\n");
1009 if(rs!=rt) emit_movs(rs,rt);
1010 else emit_test(rs,rs);
1011 emit_cmovne_imm(1,rt);
1012}
1013
1014static void emit_set_if_less32(int rs1, int rs2, int rt)
1015{
1016 //assem_debug("set if less (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1017 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1018 emit_cmp(rs1,rs2);
1019 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1020 emit_cmovl_imm(1,rt);
1021}
1022
1023static void emit_set_if_carry32(int rs1, int rs2, int rt)
1024{
1025 //assem_debug("set if carry (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1026 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1027 emit_cmp(rs1,rs2);
1028 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1029 emit_cmovb_imm(1,rt);
1030}
1031
1032static int can_jump_or_call(const void *a)
1033{
1034 intptr_t offset = (u_char *)a - out - 8;
1035 return (-33554432 <= offset && offset < 33554432);
1036}
1037
1038static void emit_call(const void *a_)
1039{
1040 int a = (int)a_;
1041 assem_debug("bl %x (%x+%x)%s\n",a,(int)out,a-(int)out-8,func_name(a_));
1042 u_int offset=genjmp(a);
1043 output_w32(0xeb000000|offset);
1044}
1045
1046static void emit_jmp(const void *a_)
1047{
1048 int a = (int)a_;
1049 assem_debug("b %x (%x+%x)%s\n",a,(int)out,a-(int)out-8,func_name(a_));
1050 u_int offset=genjmp(a);
1051 output_w32(0xea000000|offset);
1052}
1053
1054static void emit_jne(const void *a_)
1055{
1056 int a = (int)a_;
1057 assem_debug("bne %x\n",a);
1058 u_int offset=genjmp(a);
1059 output_w32(0x1a000000|offset);
1060}
1061
1062static void emit_jeq(const void *a_)
1063{
1064 int a = (int)a_;
1065 assem_debug("beq %x\n",a);
1066 u_int offset=genjmp(a);
1067 output_w32(0x0a000000|offset);
1068}
1069
1070static void emit_js(const void *a_)
1071{
1072 int a = (int)a_;
1073 assem_debug("bmi %x\n",a);
1074 u_int offset=genjmp(a);
1075 output_w32(0x4a000000|offset);
1076}
1077
1078static void emit_jns(const void *a_)
1079{
1080 int a = (int)a_;
1081 assem_debug("bpl %x\n",a);
1082 u_int offset=genjmp(a);
1083 output_w32(0x5a000000|offset);
1084}
1085
1086static void emit_jl(const void *a_)
1087{
1088 int a = (int)a_;
1089 assem_debug("blt %x\n",a);
1090 u_int offset=genjmp(a);
1091 output_w32(0xba000000|offset);
1092}
1093
1094static void emit_jge(const void *a_)
1095{
1096 int a = (int)a_;
1097 assem_debug("bge %x\n",a);
1098 u_int offset=genjmp(a);
1099 output_w32(0xaa000000|offset);
1100}
1101
1102static void emit_jno(const void *a_)
1103{
1104 int a = (int)a_;
1105 assem_debug("bvc %x\n",a);
1106 u_int offset=genjmp(a);
1107 output_w32(0x7a000000|offset);
1108}
1109
1110static void emit_jc(const void *a_)
1111{
1112 int a = (int)a_;
1113 assem_debug("bcs %x\n",a);
1114 u_int offset=genjmp(a);
1115 output_w32(0x2a000000|offset);
1116}
1117
1118static void emit_jcc(const void *a_)
1119{
1120 int a = (int)a_;
1121 assem_debug("bcc %x\n",a);
1122 u_int offset=genjmp(a);
1123 output_w32(0x3a000000|offset);
1124}
1125
1126static unused void emit_callreg(u_int r)
1127{
1128 assert(r<15);
1129 assem_debug("blx %s\n",regname[r]);
1130 output_w32(0xe12fff30|r);
1131}
1132
1133static void emit_jmpreg(u_int r)
1134{
1135 assem_debug("mov pc,%s\n",regname[r]);
1136 output_w32(0xe1a00000|rd_rn_rm(15,0,r));
1137}
1138
1139static void emit_ret(void)
1140{
1141 emit_jmpreg(14);
1142}
1143
1144static void emit_readword_indexed(int offset, int rs, int rt)
1145{
1146 assert(offset>-4096&&offset<4096);
1147 assem_debug("ldr %s,%s+%d\n",regname[rt],regname[rs],offset);
1148 if(offset>=0) {
1149 output_w32(0xe5900000|rd_rn_rm(rt,rs,0)|offset);
1150 }else{
1151 output_w32(0xe5100000|rd_rn_rm(rt,rs,0)|(-offset));
1152 }
1153}
1154
1155static void emit_readword_dualindexedx4(int rs1, int rs2, int rt)
1156{
1157 assem_debug("ldr %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1158 output_w32(0xe7900000|rd_rn_rm(rt,rs1,rs2)|0x100);
1159}
1160#define emit_readptr_dualindexedx_ptrlen emit_readword_dualindexedx4
1161
1162static void emit_ldr_dualindexed(int rs1, int rs2, int rt)
1163{
1164 assem_debug("ldr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1165 output_w32(0xe7900000|rd_rn_rm(rt,rs1,rs2));
1166}
1167
1168static void emit_ldrcc_dualindexed(int rs1, int rs2, int rt)
1169{
1170 assem_debug("ldrcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1171 output_w32(0x37900000|rd_rn_rm(rt,rs1,rs2));
1172}
1173
1174static void emit_ldrb_dualindexed(int rs1, int rs2, int rt)
1175{
1176 assem_debug("ldrb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1177 output_w32(0xe7d00000|rd_rn_rm(rt,rs1,rs2));
1178}
1179
1180static void emit_ldrccb_dualindexed(int rs1, int rs2, int rt)
1181{
1182 assem_debug("ldrccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1183 output_w32(0x37d00000|rd_rn_rm(rt,rs1,rs2));
1184}
1185
1186static void emit_ldrsb_dualindexed(int rs1, int rs2, int rt)
1187{
1188 assem_debug("ldrsb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1189 output_w32(0xe19000d0|rd_rn_rm(rt,rs1,rs2));
1190}
1191
1192static void emit_ldrccsb_dualindexed(int rs1, int rs2, int rt)
1193{
1194 assem_debug("ldrccsb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1195 output_w32(0x319000d0|rd_rn_rm(rt,rs1,rs2));
1196}
1197
1198static void emit_ldrh_dualindexed(int rs1, int rs2, int rt)
1199{
1200 assem_debug("ldrh %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1201 output_w32(0xe19000b0|rd_rn_rm(rt,rs1,rs2));
1202}
1203
1204static void emit_ldrcch_dualindexed(int rs1, int rs2, int rt)
1205{
1206 assem_debug("ldrcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1207 output_w32(0x319000b0|rd_rn_rm(rt,rs1,rs2));
1208}
1209
1210static void emit_ldrsh_dualindexed(int rs1, int rs2, int rt)
1211{
1212 assem_debug("ldrsh %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1213 output_w32(0xe19000f0|rd_rn_rm(rt,rs1,rs2));
1214}
1215
1216static void emit_ldrccsh_dualindexed(int rs1, int rs2, int rt)
1217{
1218 assem_debug("ldrccsh %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1219 output_w32(0x319000f0|rd_rn_rm(rt,rs1,rs2));
1220}
1221
1222static void emit_str_dualindexed(int rs1, int rs2, int rt)
1223{
1224 assem_debug("str %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1225 output_w32(0xe7800000|rd_rn_rm(rt,rs1,rs2));
1226}
1227
1228static void emit_strb_dualindexed(int rs1, int rs2, int rt)
1229{
1230 assem_debug("strb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1231 output_w32(0xe7c00000|rd_rn_rm(rt,rs1,rs2));
1232}
1233
1234static void emit_strh_dualindexed(int rs1, int rs2, int rt)
1235{
1236 assem_debug("strh %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1237 output_w32(0xe18000b0|rd_rn_rm(rt,rs1,rs2));
1238}
1239
1240static void emit_movsbl_indexed(int offset, int rs, int rt)
1241{
1242 assert(offset>-256&&offset<256);
1243 assem_debug("ldrsb %s,%s+%d\n",regname[rt],regname[rs],offset);
1244 if(offset>=0) {
1245 output_w32(0xe1d000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1246 }else{
1247 output_w32(0xe15000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1248 }
1249}
1250
1251static void emit_movswl_indexed(int offset, int rs, int rt)
1252{
1253 assert(offset>-256&&offset<256);
1254 assem_debug("ldrsh %s,%s+%d\n",regname[rt],regname[rs],offset);
1255 if(offset>=0) {
1256 output_w32(0xe1d000f0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1257 }else{
1258 output_w32(0xe15000f0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1259 }
1260}
1261
1262static void emit_movzbl_indexed(int offset, int rs, int rt)
1263{
1264 assert(offset>-4096&&offset<4096);
1265 assem_debug("ldrb %s,%s+%d\n",regname[rt],regname[rs],offset);
1266 if(offset>=0) {
1267 output_w32(0xe5d00000|rd_rn_rm(rt,rs,0)|offset);
1268 }else{
1269 output_w32(0xe5500000|rd_rn_rm(rt,rs,0)|(-offset));
1270 }
1271}
1272
1273static void emit_movzwl_indexed(int offset, int rs, int rt)
1274{
1275 assert(offset>-256&&offset<256);
1276 assem_debug("ldrh %s,%s+%d\n",regname[rt],regname[rs],offset);
1277 if(offset>=0) {
1278 output_w32(0xe1d000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1279 }else{
1280 output_w32(0xe15000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1281 }
1282}
1283
1284static void emit_ldrd(int offset, int rs, int rt)
1285{
1286 assert(offset>-256&&offset<256);
1287 assem_debug("ldrd %s,%s+%d\n",regname[rt],regname[rs],offset);
1288 if(offset>=0) {
1289 output_w32(0xe1c000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1290 }else{
1291 output_w32(0xe14000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1292 }
1293}
1294
1295static void emit_readword(void *addr, int rt)
1296{
1297 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
1298 assert(offset<4096);
1299 assem_debug("ldr %s,fp+%d\n",regname[rt],offset);
1300 output_w32(0xe5900000|rd_rn_rm(rt,FP,0)|offset);
1301}
1302#define emit_readptr emit_readword
1303
1304static void emit_writeword_indexed(int rt, int offset, int rs)
1305{
1306 assert(offset>-4096&&offset<4096);
1307 assem_debug("str %s,%s+%d\n",regname[rt],regname[rs],offset);
1308 if(offset>=0) {
1309 output_w32(0xe5800000|rd_rn_rm(rt,rs,0)|offset);
1310 }else{
1311 output_w32(0xe5000000|rd_rn_rm(rt,rs,0)|(-offset));
1312 }
1313}
1314
1315static void emit_writehword_indexed(int rt, int offset, int rs)
1316{
1317 assert(offset>-256&&offset<256);
1318 assem_debug("strh %s,%s+%d\n",regname[rt],regname[rs],offset);
1319 if(offset>=0) {
1320 output_w32(0xe1c000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1321 }else{
1322 output_w32(0xe14000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1323 }
1324}
1325
1326static void emit_writebyte_indexed(int rt, int offset, int rs)
1327{
1328 assert(offset>-4096&&offset<4096);
1329 assem_debug("strb %s,%s+%d\n",regname[rt],regname[rs],offset);
1330 if(offset>=0) {
1331 output_w32(0xe5c00000|rd_rn_rm(rt,rs,0)|offset);
1332 }else{
1333 output_w32(0xe5400000|rd_rn_rm(rt,rs,0)|(-offset));
1334 }
1335}
1336
1337static void emit_strcc_dualindexed(int rs1, int rs2, int rt)
1338{
1339 assem_debug("strcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1340 output_w32(0x37800000|rd_rn_rm(rt,rs1,rs2));
1341}
1342
1343static void emit_strccb_dualindexed(int rs1, int rs2, int rt)
1344{
1345 assem_debug("strccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1346 output_w32(0x37c00000|rd_rn_rm(rt,rs1,rs2));
1347}
1348
1349static void emit_strcch_dualindexed(int rs1, int rs2, int rt)
1350{
1351 assem_debug("strcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1352 output_w32(0x318000b0|rd_rn_rm(rt,rs1,rs2));
1353}
1354
1355static void emit_writeword(int rt, void *addr)
1356{
1357 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
1358 assert(offset<4096);
1359 assem_debug("str %s,fp+%d\n",regname[rt],offset);
1360 output_w32(0xe5800000|rd_rn_rm(rt,FP,0)|offset);
1361}
1362
1363static void emit_umull(u_int rs1, u_int rs2, u_int hi, u_int lo)
1364{
1365 assem_debug("umull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
1366 assert(rs1<16);
1367 assert(rs2<16);
1368 assert(hi<16);
1369 assert(lo<16);
1370 output_w32(0xe0800090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
1371}
1372
1373static void emit_smull(u_int rs1, u_int rs2, u_int hi, u_int lo)
1374{
1375 assem_debug("smull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
1376 assert(rs1<16);
1377 assert(rs2<16);
1378 assert(hi<16);
1379 assert(lo<16);
1380 output_w32(0xe0c00090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
1381}
1382
1383static void emit_clz(int rs,int rt)
1384{
1385 assem_debug("clz %s,%s\n",regname[rt],regname[rs]);
1386 output_w32(0xe16f0f10|rd_rn_rm(rt,0,rs));
1387}
1388
1389static void emit_subcs(int rs1,int rs2,int rt)
1390{
1391 assem_debug("subcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1392 output_w32(0x20400000|rd_rn_rm(rt,rs1,rs2));
1393}
1394
1395static void emit_shrcc_imm(int rs,u_int imm,int rt)
1396{
1397 assert(imm>0);
1398 assert(imm<32);
1399 assem_debug("lsrcc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1400 output_w32(0x31a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
1401}
1402
1403static void emit_shrne_imm(int rs,u_int imm,int rt)
1404{
1405 assert(imm>0);
1406 assert(imm<32);
1407 assem_debug("lsrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
1408 output_w32(0x11a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
1409}
1410
1411static void emit_negmi(int rs, int rt)
1412{
1413 assem_debug("rsbmi %s,%s,#0\n",regname[rt],regname[rs]);
1414 output_w32(0x42600000|rd_rn_rm(rt,rs,0));
1415}
1416
1417static void emit_negsmi(int rs, int rt)
1418{
1419 assem_debug("rsbsmi %s,%s,#0\n",regname[rt],regname[rs]);
1420 output_w32(0x42700000|rd_rn_rm(rt,rs,0));
1421}
1422
1423static void emit_bic_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
1424{
1425 assem_debug("bic %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
1426 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
1427}
1428
1429static void emit_bic_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
1430{
1431 assem_debug("bic %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
1432 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
1433}
1434
1435static void emit_teq(int rs, int rt)
1436{
1437 assem_debug("teq %s,%s\n",regname[rs],regname[rt]);
1438 output_w32(0xe1300000|rd_rn_rm(0,rs,rt));
1439}
1440
1441static unused void emit_rsbimm(int rs, int imm, int rt)
1442{
1443 u_int armval;
1444 genimm_checked(imm,&armval);
1445 assem_debug("rsb %s,%s,#%d\n",regname[rt],regname[rs],imm);
1446 output_w32(0xe2600000|rd_rn_rm(rt,rs,0)|armval);
1447}
1448
1449// Conditionally select one of two immediates, optimizing for small code size
1450// This will only be called if HAVE_CMOV_IMM is defined
1451static void emit_cmov2imm_e_ne_compact(int imm1,int imm2,u_int rt)
1452{
1453 u_int armval;
1454 if(genimm(imm2-imm1,&armval)) {
1455 emit_movimm(imm1,rt);
1456 assem_debug("addne %s,%s,#%d\n",regname[rt],regname[rt],imm2-imm1);
1457 output_w32(0x12800000|rd_rn_rm(rt,rt,0)|armval);
1458 }else if(genimm(imm1-imm2,&armval)) {
1459 emit_movimm(imm1,rt);
1460 assem_debug("subne %s,%s,#%d\n",regname[rt],regname[rt],imm1-imm2);
1461 output_w32(0x12400000|rd_rn_rm(rt,rt,0)|armval);
1462 }
1463 else {
1464 #ifndef HAVE_ARMV7
1465 emit_movimm(imm1,rt);
1466 add_literal((int)out,imm2);
1467 assem_debug("ldrne %s,pc+? [=%x]\n",regname[rt],imm2);
1468 output_w32(0x15900000|rd_rn_rm(rt,15,0));
1469 #else
1470 emit_movw(imm1&0x0000FFFF,rt);
1471 if((imm1&0xFFFF)!=(imm2&0xFFFF)) {
1472 assem_debug("movwne %s,#%d (0x%x)\n",regname[rt],imm2&0xFFFF,imm2&0xFFFF);
1473 output_w32(0x13000000|rd_rn_rm(rt,0,0)|(imm2&0xfff)|((imm2<<4)&0xf0000));
1474 }
1475 emit_movt(imm1&0xFFFF0000,rt);
1476 if((imm1&0xFFFF0000)!=(imm2&0xFFFF0000)) {
1477 assem_debug("movtne %s,#%d (0x%x)\n",regname[rt],imm2&0xffff0000,imm2&0xffff0000);
1478 output_w32(0x13400000|rd_rn_rm(rt,0,0)|((imm2>>16)&0xfff)|((imm2>>12)&0xf0000));
1479 }
1480 #endif
1481 }
1482}
1483
1484// special case for checking invalid_code
1485static void emit_cmpmem_indexedsr12_reg(int base,int r,int imm)
1486{
1487 assert(imm<128&&imm>=0);
1488 assert(r>=0&&r<16);
1489 assem_debug("ldrb lr,%s,%s lsr #12\n",regname[base],regname[r]);
1490 output_w32(0xe7d00000|rd_rn_rm(HOST_TEMPREG,base,r)|0x620);
1491 emit_cmpimm(HOST_TEMPREG,imm);
1492}
1493
1494static void emit_callne(int a)
1495{
1496 assem_debug("blne %x\n",a);
1497 u_int offset=genjmp(a);
1498 output_w32(0x1b000000|offset);
1499}
1500
1501// Used to preload hash table entries
1502static unused void emit_prefetchreg(int r)
1503{
1504 assem_debug("pld %s\n",regname[r]);
1505 output_w32(0xf5d0f000|rd_rn_rm(0,r,0));
1506}
1507
1508// Special case for mini_ht
1509static void emit_ldreq_indexed(int rs, u_int offset, int rt)
1510{
1511 assert(offset<4096);
1512 assem_debug("ldreq %s,[%s, #%d]\n",regname[rt],regname[rs],offset);
1513 output_w32(0x05900000|rd_rn_rm(rt,rs,0)|offset);
1514}
1515
1516static void emit_orrne_imm(int rs,int imm,int rt)
1517{
1518 u_int armval;
1519 genimm_checked(imm,&armval);
1520 assem_debug("orrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
1521 output_w32(0x13800000|rd_rn_rm(rt,rs,0)|armval);
1522}
1523
1524static void emit_andne_imm(int rs,int imm,int rt)
1525{
1526 u_int armval;
1527 genimm_checked(imm,&armval);
1528 assem_debug("andne %s,%s,#%d\n",regname[rt],regname[rs],imm);
1529 output_w32(0x12000000|rd_rn_rm(rt,rs,0)|armval);
1530}
1531
1532static unused void emit_addpl_imm(int rs,int imm,int rt)
1533{
1534 u_int armval;
1535 genimm_checked(imm,&armval);
1536 assem_debug("addpl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1537 output_w32(0x52800000|rd_rn_rm(rt,rs,0)|armval);
1538}
1539
1540static void emit_jno_unlikely(int a)
1541{
1542 //emit_jno(a);
1543 assem_debug("addvc pc,pc,#? (%x)\n",/*a-(int)out-8,*/a);
1544 output_w32(0x72800000|rd_rn_rm(15,15,0));
1545}
1546
1547static void save_regs_all(u_int reglist)
1548{
1549 int i;
1550 if(!reglist) return;
1551 assem_debug("stmia fp,{");
1552 for(i=0;i<16;i++)
1553 if(reglist&(1<<i))
1554 assem_debug("r%d,",i);
1555 assem_debug("}\n");
1556 output_w32(0xe88b0000|reglist);
1557}
1558
1559static void restore_regs_all(u_int reglist)
1560{
1561 int i;
1562 if(!reglist) return;
1563 assem_debug("ldmia fp,{");
1564 for(i=0;i<16;i++)
1565 if(reglist&(1<<i))
1566 assem_debug("r%d,",i);
1567 assem_debug("}\n");
1568 output_w32(0xe89b0000|reglist);
1569}
1570
1571// Save registers before function call
1572static void save_regs(u_int reglist)
1573{
1574 reglist&=CALLER_SAVE_REGS; // only save the caller-save registers, r0-r3, r12
1575 save_regs_all(reglist);
1576}
1577
1578// Restore registers after function call
1579static void restore_regs(u_int reglist)
1580{
1581 reglist&=CALLER_SAVE_REGS;
1582 restore_regs_all(reglist);
1583}
1584
1585/* Stubs/epilogue */
1586
1587static void literal_pool(int n)
1588{
1589 if(!literalcount) return;
1590 if(n) {
1591 if((int)out-literals[0][0]<4096-n) return;
1592 }
1593 u_int *ptr;
1594 int i;
1595 for(i=0;i<literalcount;i++)
1596 {
1597 u_int l_addr=(u_int)out;
1598 int j;
1599 for(j=0;j<i;j++) {
1600 if(literals[j][1]==literals[i][1]) {
1601 //printf("dup %08x\n",literals[i][1]);
1602 l_addr=literals[j][0];
1603 break;
1604 }
1605 }
1606 ptr=(u_int *)literals[i][0];
1607 u_int offset=l_addr-(u_int)ptr-8;
1608 assert(offset<4096);
1609 assert(!(offset&3));
1610 *ptr|=offset;
1611 if(l_addr==(u_int)out) {
1612 literals[i][0]=l_addr; // remember for dupes
1613 output_w32(literals[i][1]);
1614 }
1615 }
1616 literalcount=0;
1617}
1618
1619static void literal_pool_jumpover(int n)
1620{
1621 if(!literalcount) return;
1622 if(n) {
1623 if((int)out-literals[0][0]<4096-n) return;
1624 }
1625 void *jaddr = out;
1626 emit_jmp(0);
1627 literal_pool(0);
1628 set_jump_target(jaddr, out);
1629}
1630
1631// parsed by get_pointer, find_extjump_insn
1632static void emit_extjump2(u_char *addr, u_int target, void *linker)
1633{
1634 u_char *ptr=(u_char *)addr;
1635 assert((ptr[3]&0x0e)==0xa);
1636 (void)ptr;
1637
1638 emit_loadlp(target,0);
1639 emit_loadlp((u_int)addr,1);
1640 assert(addr>=ndrc->translation_cache&&addr<(ndrc->translation_cache+(1<<TARGET_SIZE_2)));
1641 //assert((target>=0x80000000&&target<0x80800000)||(target>0xA4000000&&target<0xA4001000));
1642//DEBUG >
1643#ifdef DEBUG_CYCLE_COUNT
1644 emit_readword(&last_count,ECX);
1645 emit_add(HOST_CCREG,ECX,HOST_CCREG);
1646 emit_readword(&next_interupt,ECX);
1647 emit_writeword(HOST_CCREG,&Count);
1648 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
1649 emit_writeword(ECX,&last_count);
1650#endif
1651//DEBUG <
1652 emit_far_jump(linker);
1653}
1654
1655static void check_extjump2(void *src)
1656{
1657 u_int *ptr = src;
1658 assert((ptr[1] & 0x0fff0000) == 0x059f0000); // ldr rx, [pc, #ofs]
1659 (void)ptr;
1660}
1661
1662// put rt_val into rt, potentially making use of rs with value rs_val
1663static void emit_movimm_from(u_int rs_val,int rs,u_int rt_val,int rt)
1664{
1665 u_int armval;
1666 int diff;
1667 if(genimm(rt_val,&armval)) {
1668 assem_debug("mov %s,#%d\n",regname[rt],rt_val);
1669 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
1670 return;
1671 }
1672 if(genimm(~rt_val,&armval)) {
1673 assem_debug("mvn %s,#%d\n",regname[rt],rt_val);
1674 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
1675 return;
1676 }
1677 diff=rt_val-rs_val;
1678 if(genimm(diff,&armval)) {
1679 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],diff);
1680 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
1681 return;
1682 }else if(genimm(-diff,&armval)) {
1683 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],-diff);
1684 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
1685 return;
1686 }
1687 emit_movimm(rt_val,rt);
1688}
1689
1690// return 1 if above function can do it's job cheaply
1691static int is_similar_value(u_int v1,u_int v2)
1692{
1693 u_int xs;
1694 int diff;
1695 if(v1==v2) return 1;
1696 diff=v2-v1;
1697 for(xs=diff;xs!=0&&(xs&3)==0;xs>>=2)
1698 ;
1699 if(xs<0x100) return 1;
1700 for(xs=-diff;xs!=0&&(xs&3)==0;xs>>=2)
1701 ;
1702 if(xs<0x100) return 1;
1703 return 0;
1704}
1705
1706static void mov_loadtype_adj(enum stub_type type,int rs,int rt)
1707{
1708 switch(type) {
1709 case LOADB_STUB: emit_signextend8(rs,rt); break;
1710 case LOADBU_STUB: emit_andimm(rs,0xff,rt); break;
1711 case LOADH_STUB: emit_signextend16(rs,rt); break;
1712 case LOADHU_STUB: emit_andimm(rs,0xffff,rt); break;
1713 case LOADW_STUB: if(rs!=rt) emit_mov(rs,rt); break;
1714 default: assert(0);
1715 }
1716}
1717
1718#include "pcsxmem.h"
1719#include "pcsxmem_inline.c"
1720
1721static void do_readstub(int n)
1722{
1723 assem_debug("do_readstub %x\n",start+stubs[n].a*4);
1724 literal_pool(256);
1725 set_jump_target(stubs[n].addr, out);
1726 enum stub_type type=stubs[n].type;
1727 int i=stubs[n].a;
1728 int rs=stubs[n].b;
1729 const struct regstat *i_regs=(struct regstat *)stubs[n].c;
1730 u_int reglist=stubs[n].e;
1731 const signed char *i_regmap=i_regs->regmap;
1732 int rt;
1733 if(dops[i].itype==C1LS||dops[i].itype==C2LS||dops[i].itype==LOADLR) {
1734 rt=get_reg(i_regmap,FTEMP);
1735 }else{
1736 rt=get_reg(i_regmap,dops[i].rt1);
1737 }
1738 assert(rs>=0);
1739 int r,temp=-1,temp2=HOST_TEMPREG,regs_saved=0;
1740 void *restore_jump = NULL;
1741 reglist|=(1<<rs);
1742 for(r=0;r<=12;r++) {
1743 if(((1<<r)&0x13ff)&&((1<<r)&reglist)==0) {
1744 temp=r; break;
1745 }
1746 }
1747 if(rt>=0&&dops[i].rt1!=0)
1748 reglist&=~(1<<rt);
1749 if(temp==-1) {
1750 save_regs(reglist);
1751 regs_saved=1;
1752 temp=(rs==0)?2:0;
1753 }
1754 if((regs_saved||(reglist&2)==0)&&temp!=1&&rs!=1)
1755 temp2=1;
1756 emit_readword(&mem_rtab,temp);
1757 emit_shrimm(rs,12,temp2);
1758 emit_readword_dualindexedx4(temp,temp2,temp2);
1759 emit_lsls_imm(temp2,1,temp2);
1760 if(dops[i].itype==C1LS||dops[i].itype==C2LS||(rt>=0&&dops[i].rt1!=0)) {
1761 switch(type) {
1762 case LOADB_STUB: emit_ldrccsb_dualindexed(temp2,rs,rt); break;
1763 case LOADBU_STUB: emit_ldrccb_dualindexed(temp2,rs,rt); break;
1764 case LOADH_STUB: emit_ldrccsh_dualindexed(temp2,rs,rt); break;
1765 case LOADHU_STUB: emit_ldrcch_dualindexed(temp2,rs,rt); break;
1766 case LOADW_STUB: emit_ldrcc_dualindexed(temp2,rs,rt); break;
1767 default: assert(0);
1768 }
1769 }
1770 if(regs_saved) {
1771 restore_jump=out;
1772 emit_jcc(0); // jump to reg restore
1773 }
1774 else
1775 emit_jcc(stubs[n].retaddr); // return address
1776
1777 if(!regs_saved)
1778 save_regs(reglist);
1779 void *handler=NULL;
1780 if(type==LOADB_STUB||type==LOADBU_STUB)
1781 handler=jump_handler_read8;
1782 if(type==LOADH_STUB||type==LOADHU_STUB)
1783 handler=jump_handler_read16;
1784 if(type==LOADW_STUB)
1785 handler=jump_handler_read32;
1786 assert(handler);
1787 pass_args(rs,temp2);
1788 int cc=get_reg(i_regmap,CCREG);
1789 if(cc<0)
1790 emit_loadreg(CCREG,2);
1791 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n].d),2);
1792 emit_far_call(handler);
1793 if(dops[i].itype==C1LS||dops[i].itype==C2LS||(rt>=0&&dops[i].rt1!=0)) {
1794 mov_loadtype_adj(type,0,rt);
1795 }
1796 if(restore_jump)
1797 set_jump_target(restore_jump, out);
1798 restore_regs(reglist);
1799 emit_jmp(stubs[n].retaddr); // return address
1800}
1801
1802static void inline_readstub(enum stub_type type, int i, u_int addr,
1803 const signed char regmap[], int target, int adj, u_int reglist)
1804{
1805 int rs=get_reg(regmap,target);
1806 int rt=get_reg(regmap,target);
1807 if(rs<0) rs=get_reg(regmap,-1);
1808 assert(rs>=0);
1809 u_int is_dynamic;
1810 uintptr_t host_addr = 0;
1811 void *handler;
1812 int cc=get_reg(regmap,CCREG);
1813 if(pcsx_direct_read(type,addr,CLOCK_ADJUST(adj),cc,target?rs:-1,rt))
1814 return;
1815 handler = get_direct_memhandler(mem_rtab, addr, type, &host_addr);
1816 if (handler == NULL) {
1817 if(rt<0||dops[i].rt1==0)
1818 return;
1819 if(addr!=host_addr)
1820 emit_movimm_from(addr,rs,host_addr,rs);
1821 switch(type) {
1822 case LOADB_STUB: emit_movsbl_indexed(0,rs,rt); break;
1823 case LOADBU_STUB: emit_movzbl_indexed(0,rs,rt); break;
1824 case LOADH_STUB: emit_movswl_indexed(0,rs,rt); break;
1825 case LOADHU_STUB: emit_movzwl_indexed(0,rs,rt); break;
1826 case LOADW_STUB: emit_readword_indexed(0,rs,rt); break;
1827 default: assert(0);
1828 }
1829 return;
1830 }
1831 is_dynamic=pcsxmem_is_handler_dynamic(addr);
1832 if(is_dynamic) {
1833 if(type==LOADB_STUB||type==LOADBU_STUB)
1834 handler=jump_handler_read8;
1835 if(type==LOADH_STUB||type==LOADHU_STUB)
1836 handler=jump_handler_read16;
1837 if(type==LOADW_STUB)
1838 handler=jump_handler_read32;
1839 }
1840
1841 // call a memhandler
1842 if(rt>=0&&dops[i].rt1!=0)
1843 reglist&=~(1<<rt);
1844 save_regs(reglist);
1845 if(target==0)
1846 emit_movimm(addr,0);
1847 else if(rs!=0)
1848 emit_mov(rs,0);
1849 if(cc<0)
1850 emit_loadreg(CCREG,2);
1851 if(is_dynamic) {
1852 emit_movimm(((u_int *)mem_rtab)[addr>>12]<<1,1);
1853 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj),2);
1854 }
1855 else {
1856 emit_readword(&last_count,3);
1857 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj),2);
1858 emit_add(2,3,2);
1859 emit_writeword(2,&Count);
1860 }
1861
1862 emit_far_call(handler);
1863
1864 if(rt>=0&&dops[i].rt1!=0) {
1865 switch(type) {
1866 case LOADB_STUB: emit_signextend8(0,rt); break;
1867 case LOADBU_STUB: emit_andimm(0,0xff,rt); break;
1868 case LOADH_STUB: emit_signextend16(0,rt); break;
1869 case LOADHU_STUB: emit_andimm(0,0xffff,rt); break;
1870 case LOADW_STUB: if(rt!=0) emit_mov(0,rt); break;
1871 default: assert(0);
1872 }
1873 }
1874 restore_regs(reglist);
1875}
1876
1877static void do_writestub(int n)
1878{
1879 assem_debug("do_writestub %x\n",start+stubs[n].a*4);
1880 literal_pool(256);
1881 set_jump_target(stubs[n].addr, out);
1882 enum stub_type type=stubs[n].type;
1883 int i=stubs[n].a;
1884 int rs=stubs[n].b;
1885 const struct regstat *i_regs=(struct regstat *)stubs[n].c;
1886 u_int reglist=stubs[n].e;
1887 const signed char *i_regmap=i_regs->regmap;
1888 int rt,r;
1889 if(dops[i].itype==C1LS||dops[i].itype==C2LS) {
1890 rt=get_reg(i_regmap,r=FTEMP);
1891 }else{
1892 rt=get_reg(i_regmap,r=dops[i].rs2);
1893 }
1894 assert(rs>=0);
1895 assert(rt>=0);
1896 int rtmp,temp=-1,temp2=HOST_TEMPREG,regs_saved=0;
1897 void *restore_jump = NULL;
1898 int reglist2=reglist|(1<<rs)|(1<<rt);
1899 for(rtmp=0;rtmp<=12;rtmp++) {
1900 if(((1<<rtmp)&0x13ff)&&((1<<rtmp)&reglist2)==0) {
1901 temp=rtmp; break;
1902 }
1903 }
1904 if(temp==-1) {
1905 save_regs(reglist);
1906 regs_saved=1;
1907 for(rtmp=0;rtmp<=3;rtmp++)
1908 if(rtmp!=rs&&rtmp!=rt)
1909 {temp=rtmp;break;}
1910 }
1911 if((regs_saved||(reglist2&8)==0)&&temp!=3&&rs!=3&&rt!=3)
1912 temp2=3;
1913 emit_readword(&mem_wtab,temp);
1914 emit_shrimm(rs,12,temp2);
1915 emit_readword_dualindexedx4(temp,temp2,temp2);
1916 emit_lsls_imm(temp2,1,temp2);
1917 switch(type) {
1918 case STOREB_STUB: emit_strccb_dualindexed(temp2,rs,rt); break;
1919 case STOREH_STUB: emit_strcch_dualindexed(temp2,rs,rt); break;
1920 case STOREW_STUB: emit_strcc_dualindexed(temp2,rs,rt); break;
1921 default: assert(0);
1922 }
1923 if(regs_saved) {
1924 restore_jump=out;
1925 emit_jcc(0); // jump to reg restore
1926 }
1927 else
1928 emit_jcc(stubs[n].retaddr); // return address (invcode check)
1929
1930 if(!regs_saved)
1931 save_regs(reglist);
1932 void *handler=NULL;
1933 switch(type) {
1934 case STOREB_STUB: handler=jump_handler_write8; break;
1935 case STOREH_STUB: handler=jump_handler_write16; break;
1936 case STOREW_STUB: handler=jump_handler_write32; break;
1937 default: assert(0);
1938 }
1939 assert(handler);
1940 pass_args(rs,rt);
1941 if(temp2!=3)
1942 emit_mov(temp2,3);
1943 int cc=get_reg(i_regmap,CCREG);
1944 if(cc<0)
1945 emit_loadreg(CCREG,2);
1946 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n].d),2);
1947 // returns new cycle_count
1948 emit_far_call(handler);
1949 emit_addimm(0,-CLOCK_ADJUST((int)stubs[n].d),cc<0?2:cc);
1950 if(cc<0)
1951 emit_storereg(CCREG,2);
1952 if(restore_jump)
1953 set_jump_target(restore_jump, out);
1954 restore_regs(reglist);
1955 emit_jmp(stubs[n].retaddr);
1956}
1957
1958static void inline_writestub(enum stub_type type, int i, u_int addr,
1959 const signed char regmap[], int target, int adj, u_int reglist)
1960{
1961 int rs=get_reg(regmap,-1);
1962 int rt=get_reg(regmap,target);
1963 assert(rs>=0);
1964 assert(rt>=0);
1965 uintptr_t host_addr = 0;
1966 void *handler = get_direct_memhandler(mem_wtab, addr, type, &host_addr);
1967 if (handler == NULL) {
1968 if(addr!=host_addr)
1969 emit_movimm_from(addr,rs,host_addr,rs);
1970 switch(type) {
1971 case STOREB_STUB: emit_writebyte_indexed(rt,0,rs); break;
1972 case STOREH_STUB: emit_writehword_indexed(rt,0,rs); break;
1973 case STOREW_STUB: emit_writeword_indexed(rt,0,rs); break;
1974 default: assert(0);
1975 }
1976 return;
1977 }
1978
1979 // call a memhandler
1980 save_regs(reglist);
1981 pass_args(rs,rt);
1982 int cc=get_reg(regmap,CCREG);
1983 if(cc<0)
1984 emit_loadreg(CCREG,2);
1985 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj),2);
1986 emit_movimm((u_int)handler,3);
1987 // returns new cycle_count
1988 emit_far_call(jump_handler_write_h);
1989 emit_addimm(0,-CLOCK_ADJUST(adj),cc<0?2:cc);
1990 if(cc<0)
1991 emit_storereg(CCREG,2);
1992 restore_regs(reglist);
1993}
1994
1995// this output is parsed by verify_dirty, get_bounds, isclean, get_clean_addr
1996static void do_dirty_stub_emit_args(u_int arg0, u_int source_len)
1997{
1998 #ifndef HAVE_ARMV7
1999 emit_loadlp((int)source, 1);
2000 emit_loadlp((int)copy, 2);
2001 emit_loadlp(source_len, 3);
2002 #else
2003 emit_movw(((u_int)source)&0x0000FFFF, 1);
2004 emit_movw(((u_int)copy)&0x0000FFFF, 2);
2005 emit_movt(((u_int)source)&0xFFFF0000, 1);
2006 emit_movt(((u_int)copy)&0xFFFF0000, 2);
2007 emit_movw(source_len, 3);
2008 #endif
2009 emit_movimm(arg0, 0);
2010}
2011
2012static void *do_dirty_stub(int i, u_int source_len)
2013{
2014 assem_debug("do_dirty_stub %x\n",start+i*4);
2015 do_dirty_stub_emit_args(start + i*4, source_len);
2016 emit_far_call(verify_code);
2017 void *entry = out;
2018 load_regs_entry(i);
2019 if (entry == out)
2020 entry = instr_addr[i];
2021 emit_jmp(instr_addr[i]);
2022 return entry;
2023}
2024
2025static void do_dirty_stub_ds(u_int source_len)
2026{
2027 do_dirty_stub_emit_args(start + 1, source_len);
2028 emit_far_call(verify_code_ds);
2029}
2030
2031/* Special assem */
2032
2033static void c2op_prologue(u_int op, int i, const struct regstat *i_regs, u_int reglist)
2034{
2035 save_regs_all(reglist);
2036 cop2_do_stall_check(op, i, i_regs, 0);
2037#ifdef PCNT
2038 emit_movimm(op, 0);
2039 emit_far_call(pcnt_gte_start);
2040#endif
2041 emit_addimm(FP, (u_char *)&psxRegs.CP2D.r[0] - (u_char *)&dynarec_local, 0); // cop2 regs
2042}
2043
2044static void c2op_epilogue(u_int op,u_int reglist)
2045{
2046#ifdef PCNT
2047 emit_movimm(op,0);
2048 emit_far_call(pcnt_gte_end);
2049#endif
2050 restore_regs_all(reglist);
2051}
2052
2053static void c2op_call_MACtoIR(int lm,int need_flags)
2054{
2055 if(need_flags)
2056 emit_far_call(lm?gteMACtoIR_lm1:gteMACtoIR_lm0);
2057 else
2058 emit_far_call(lm?gteMACtoIR_lm1_nf:gteMACtoIR_lm0_nf);
2059}
2060
2061static void c2op_call_rgb_func(void *func,int lm,int need_ir,int need_flags)
2062{
2063 emit_far_call(func);
2064 // func is C code and trashes r0
2065 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
2066 if(need_flags||need_ir)
2067 c2op_call_MACtoIR(lm,need_flags);
2068 emit_far_call(need_flags?gteMACtoRGB:gteMACtoRGB_nf);
2069}
2070
2071static void c2op_assemble(int i, const struct regstat *i_regs)
2072{
2073 u_int c2op = source[i] & 0x3f;
2074 u_int reglist_full = get_host_reglist(i_regs->regmap);
2075 u_int reglist = reglist_full & CALLER_SAVE_REGS;
2076 int need_flags, need_ir;
2077
2078 if (gte_handlers[c2op]!=NULL) {
2079 need_flags=!(gte_unneeded[i+1]>>63); // +1 because of how liveness detection works
2080 need_ir=(gte_unneeded[i+1]&0xe00)!=0xe00;
2081 assem_debug("gte op %08x, unneeded %016llx, need_flags %d, need_ir %d\n",
2082 source[i],gte_unneeded[i+1],need_flags,need_ir);
2083 if(HACK_ENABLED(NDHACK_GTE_NO_FLAGS))
2084 need_flags=0;
2085 int shift = (source[i] >> 19) & 1;
2086 int lm = (source[i] >> 10) & 1;
2087 switch(c2op) {
2088#ifndef DRC_DBG
2089 case GTE_MVMVA: {
2090#ifdef HAVE_ARMV5
2091 int v = (source[i] >> 15) & 3;
2092 int cv = (source[i] >> 13) & 3;
2093 int mx = (source[i] >> 17) & 3;
2094 reglist=reglist_full&(CALLER_SAVE_REGS|0xf0); // +{r4-r7}
2095 c2op_prologue(c2op,i,i_regs,reglist);
2096 /* r4,r5 = VXYZ(v) packed; r6 = &MX11(mx); r7 = &CV1(cv) */
2097 if(v<3)
2098 emit_ldrd(v*8,0,4);
2099 else {
2100 emit_movzwl_indexed(9*4,0,4); // gteIR
2101 emit_movzwl_indexed(10*4,0,6);
2102 emit_movzwl_indexed(11*4,0,5);
2103 emit_orrshl_imm(6,16,4);
2104 }
2105 if(mx<3)
2106 emit_addimm(0,32*4+mx*8*4,6);
2107 else
2108 emit_readword(&zeromem_ptr,6);
2109 if(cv<3)
2110 emit_addimm(0,32*4+(cv*8+5)*4,7);
2111 else
2112 emit_readword(&zeromem_ptr,7);
2113#ifdef __ARM_NEON__
2114 emit_movimm(source[i],1); // opcode
2115 emit_far_call(gteMVMVA_part_neon);
2116 if(need_flags) {
2117 emit_movimm(lm,1);
2118 emit_far_call(gteMACtoIR_flags_neon);
2119 }
2120#else
2121 if(cv==3&&shift)
2122 emit_far_call((int)gteMVMVA_part_cv3sh12_arm);
2123 else {
2124 emit_movimm(shift,1);
2125 emit_far_call((int)(need_flags?gteMVMVA_part_arm:gteMVMVA_part_nf_arm));
2126 }
2127 if(need_flags||need_ir)
2128 c2op_call_MACtoIR(lm,need_flags);
2129#endif
2130#else /* if not HAVE_ARMV5 */
2131 c2op_prologue(c2op,i,i_regs,reglist);
2132 emit_movimm(source[i],1); // opcode
2133 emit_writeword(1,&psxRegs.code);
2134 emit_far_call(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]);
2135#endif
2136 break;
2137 }
2138 case GTE_OP:
2139 c2op_prologue(c2op,i,i_regs,reglist);
2140 emit_far_call(shift?gteOP_part_shift:gteOP_part_noshift);
2141 if(need_flags||need_ir) {
2142 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
2143 c2op_call_MACtoIR(lm,need_flags);
2144 }
2145 break;
2146 case GTE_DPCS:
2147 c2op_prologue(c2op,i,i_regs,reglist);
2148 c2op_call_rgb_func(shift?gteDPCS_part_shift:gteDPCS_part_noshift,lm,need_ir,need_flags);
2149 break;
2150 case GTE_INTPL:
2151 c2op_prologue(c2op,i,i_regs,reglist);
2152 c2op_call_rgb_func(shift?gteINTPL_part_shift:gteINTPL_part_noshift,lm,need_ir,need_flags);
2153 break;
2154 case GTE_SQR:
2155 c2op_prologue(c2op,i,i_regs,reglist);
2156 emit_far_call(shift?gteSQR_part_shift:gteSQR_part_noshift);
2157 if(need_flags||need_ir) {
2158 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
2159 c2op_call_MACtoIR(lm,need_flags);
2160 }
2161 break;
2162 case GTE_DCPL:
2163 c2op_prologue(c2op,i,i_regs,reglist);
2164 c2op_call_rgb_func(gteDCPL_part,lm,need_ir,need_flags);
2165 break;
2166 case GTE_GPF:
2167 c2op_prologue(c2op,i,i_regs,reglist);
2168 c2op_call_rgb_func(shift?gteGPF_part_shift:gteGPF_part_noshift,lm,need_ir,need_flags);
2169 break;
2170 case GTE_GPL:
2171 c2op_prologue(c2op,i,i_regs,reglist);
2172 c2op_call_rgb_func(shift?gteGPL_part_shift:gteGPL_part_noshift,lm,need_ir,need_flags);
2173 break;
2174#endif
2175 default:
2176 c2op_prologue(c2op,i,i_regs,reglist);
2177#ifdef DRC_DBG
2178 emit_movimm(source[i],1); // opcode
2179 emit_writeword(1,&psxRegs.code);
2180#endif
2181 emit_far_call(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]);
2182 break;
2183 }
2184 c2op_epilogue(c2op,reglist);
2185 }
2186}
2187
2188static void c2op_ctc2_31_assemble(signed char sl, signed char temp)
2189{
2190 //value = value & 0x7ffff000;
2191 //if (value & 0x7f87e000) value |= 0x80000000;
2192 emit_shrimm(sl,12,temp);
2193 emit_shlimm(temp,12,temp);
2194 emit_testimm(temp,0x7f000000);
2195 emit_testeqimm(temp,0x00870000);
2196 emit_testeqimm(temp,0x0000e000);
2197 emit_orrne_imm(temp,0x80000000,temp);
2198}
2199
2200static void do_mfc2_31_one(u_int copr,signed char temp)
2201{
2202 emit_readword(&reg_cop2d[copr],temp);
2203 emit_testimm(temp,0x8000); // do we need this?
2204 emit_andne_imm(temp,0,temp);
2205 emit_cmpimm(temp,0xf80);
2206 emit_andimm(temp,0xf80,temp);
2207 emit_cmovae_imm(0xf80,temp);
2208}
2209
2210static void c2op_mfc2_29_assemble(signed char tl, signed char temp)
2211{
2212 if (temp < 0) {
2213 host_tempreg_acquire();
2214 temp = HOST_TEMPREG;
2215 }
2216 do_mfc2_31_one(9,temp);
2217 emit_shrimm(temp,7,tl);
2218 do_mfc2_31_one(10,temp);
2219 emit_orrshr_imm(temp,2,tl);
2220 do_mfc2_31_one(11,temp);
2221 emit_orrshl_imm(temp,3,tl);
2222 emit_writeword(tl,&reg_cop2d[29]);
2223 if (temp == HOST_TEMPREG)
2224 host_tempreg_release();
2225}
2226
2227static void multdiv_assemble_arm(int i,struct regstat *i_regs)
2228{
2229 // case 0x18: MULT
2230 // case 0x19: MULTU
2231 // case 0x1A: DIV
2232 // case 0x1B: DIVU
2233 // case 0x1C: DMULT
2234 // case 0x1D: DMULTU
2235 // case 0x1E: DDIV
2236 // case 0x1F: DDIVU
2237 if(dops[i].rs1&&dops[i].rs2)
2238 {
2239 if((dops[i].opcode2&4)==0) // 32-bit
2240 {
2241 if(dops[i].opcode2==0x18) // MULT
2242 {
2243 signed char m1=get_reg(i_regs->regmap,dops[i].rs1);
2244 signed char m2=get_reg(i_regs->regmap,dops[i].rs2);
2245 signed char hi=get_reg(i_regs->regmap,HIREG);
2246 signed char lo=get_reg(i_regs->regmap,LOREG);
2247 assert(m1>=0);
2248 assert(m2>=0);
2249 assert(hi>=0);
2250 assert(lo>=0);
2251 emit_smull(m1,m2,hi,lo);
2252 }
2253 if(dops[i].opcode2==0x19) // MULTU
2254 {
2255 signed char m1=get_reg(i_regs->regmap,dops[i].rs1);
2256 signed char m2=get_reg(i_regs->regmap,dops[i].rs2);
2257 signed char hi=get_reg(i_regs->regmap,HIREG);
2258 signed char lo=get_reg(i_regs->regmap,LOREG);
2259 assert(m1>=0);
2260 assert(m2>=0);
2261 assert(hi>=0);
2262 assert(lo>=0);
2263 emit_umull(m1,m2,hi,lo);
2264 }
2265 if(dops[i].opcode2==0x1A) // DIV
2266 {
2267 signed char d1=get_reg(i_regs->regmap,dops[i].rs1);
2268 signed char d2=get_reg(i_regs->regmap,dops[i].rs2);
2269 assert(d1>=0);
2270 assert(d2>=0);
2271 signed char quotient=get_reg(i_regs->regmap,LOREG);
2272 signed char remainder=get_reg(i_regs->regmap,HIREG);
2273 assert(quotient>=0);
2274 assert(remainder>=0);
2275 emit_movs(d1,remainder);
2276 emit_movimm(0xffffffff,quotient);
2277 emit_negmi(quotient,quotient); // .. quotient and ..
2278 emit_negmi(remainder,remainder); // .. remainder for div0 case (will be negated back after jump)
2279 emit_movs(d2,HOST_TEMPREG);
2280 emit_jeq(out+52); // Division by zero
2281 emit_negsmi(HOST_TEMPREG,HOST_TEMPREG);
2282#ifdef HAVE_ARMV5
2283 emit_clz(HOST_TEMPREG,quotient);
2284 emit_shl(HOST_TEMPREG,quotient,HOST_TEMPREG);
2285#else
2286 emit_movimm(0,quotient);
2287 emit_addpl_imm(quotient,1,quotient);
2288 emit_lslpls_imm(HOST_TEMPREG,1,HOST_TEMPREG);
2289 emit_jns(out-2*4);
2290#endif
2291 emit_orimm(quotient,1<<31,quotient);
2292 emit_shr(quotient,quotient,quotient);
2293 emit_cmp(remainder,HOST_TEMPREG);
2294 emit_subcs(remainder,HOST_TEMPREG,remainder);
2295 emit_adcs(quotient,quotient,quotient);
2296 emit_shrimm(HOST_TEMPREG,1,HOST_TEMPREG);
2297 emit_jcc(out-16); // -4
2298 emit_teq(d1,d2);
2299 emit_negmi(quotient,quotient);
2300 emit_test(d1,d1);
2301 emit_negmi(remainder,remainder);
2302 }
2303 if(dops[i].opcode2==0x1B) // DIVU
2304 {
2305 signed char d1=get_reg(i_regs->regmap,dops[i].rs1); // dividend
2306 signed char d2=get_reg(i_regs->regmap,dops[i].rs2); // divisor
2307 assert(d1>=0);
2308 assert(d2>=0);
2309 signed char quotient=get_reg(i_regs->regmap,LOREG);
2310 signed char remainder=get_reg(i_regs->regmap,HIREG);
2311 assert(quotient>=0);
2312 assert(remainder>=0);
2313 emit_mov(d1,remainder);
2314 emit_movimm(0xffffffff,quotient); // div0 case
2315 emit_test(d2,d2);
2316 emit_jeq(out+40); // Division by zero
2317#ifdef HAVE_ARMV5
2318 emit_clz(d2,HOST_TEMPREG);
2319 emit_movimm(1<<31,quotient);
2320 emit_shl(d2,HOST_TEMPREG,d2);
2321#else
2322 emit_movimm(0,HOST_TEMPREG);
2323 emit_addpl_imm(HOST_TEMPREG,1,HOST_TEMPREG);
2324 emit_lslpls_imm(d2,1,d2);
2325 emit_jns(out-2*4);
2326 emit_movimm(1<<31,quotient);
2327#endif
2328 emit_shr(quotient,HOST_TEMPREG,quotient);
2329 emit_cmp(remainder,d2);
2330 emit_subcs(remainder,d2,remainder);
2331 emit_adcs(quotient,quotient,quotient);
2332 emit_shrcc_imm(d2,1,d2);
2333 emit_jcc(out-16); // -4
2334 }
2335 }
2336 else // 64-bit
2337 assert(0);
2338 }
2339 else
2340 {
2341 // Multiply by zero is zero.
2342 // MIPS does not have a divide by zero exception.
2343 // The result is undefined, we return zero.
2344 signed char hr=get_reg(i_regs->regmap,HIREG);
2345 signed char lr=get_reg(i_regs->regmap,LOREG);
2346 if(hr>=0) emit_zeroreg(hr);
2347 if(lr>=0) emit_zeroreg(lr);
2348 }
2349}
2350#define multdiv_assemble multdiv_assemble_arm
2351
2352static void do_jump_vaddr(int rs)
2353{
2354 emit_far_jump(jump_vaddr_reg[rs]);
2355}
2356
2357static void do_preload_rhash(int r) {
2358 // Don't need this for ARM. On x86, this puts the value 0xf8 into the
2359 // register. On ARM the hash can be done with a single instruction (below)
2360}
2361
2362static void do_preload_rhtbl(int ht) {
2363 emit_addimm(FP,(int)&mini_ht-(int)&dynarec_local,ht);
2364}
2365
2366static void do_rhash(int rs,int rh) {
2367 emit_andimm(rs,0xf8,rh);
2368}
2369
2370static void do_miniht_load(int ht,int rh) {
2371 assem_debug("ldr %s,[%s,%s]!\n",regname[rh],regname[ht],regname[rh]);
2372 output_w32(0xe7b00000|rd_rn_rm(rh,ht,rh));
2373}
2374
2375static void do_miniht_jump(int rs,int rh,int ht) {
2376 emit_cmp(rh,rs);
2377 emit_ldreq_indexed(ht,4,15);
2378 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
2379 if(rs!=7)
2380 emit_mov(rs,7);
2381 rs=7;
2382 #endif
2383 do_jump_vaddr(rs);
2384}
2385
2386static void do_miniht_insert(u_int return_address,int rt,int temp) {
2387 #ifndef HAVE_ARMV7
2388 emit_movimm(return_address,rt); // PC into link register
2389 add_to_linker(out,return_address,1);
2390 emit_pcreladdr(temp);
2391 emit_writeword(rt,&mini_ht[(return_address&0xFF)>>3][0]);
2392 emit_writeword(temp,&mini_ht[(return_address&0xFF)>>3][1]);
2393 #else
2394 emit_movw(return_address&0x0000FFFF,rt);
2395 add_to_linker(out,return_address,1);
2396 emit_pcreladdr(temp);
2397 emit_writeword(temp,&mini_ht[(return_address&0xFF)>>3][1]);
2398 emit_movt(return_address&0xFFFF0000,rt);
2399 emit_writeword(rt,&mini_ht[(return_address&0xFF)>>3][0]);
2400 #endif
2401}
2402
2403// CPU-architecture-specific initialization
2404static void arch_init(void)
2405{
2406 uintptr_t diff = (u_char *)&ndrc->tramp.f - (u_char *)&ndrc->tramp.ops - 8;
2407 struct tramp_insns *ops = ndrc->tramp.ops;
2408 size_t i;
2409 assert(!(diff & 3));
2410 assert(diff < 0x1000);
2411 start_tcache_write(ops, (u_char *)ops + sizeof(ndrc->tramp.ops));
2412 for (i = 0; i < ARRAY_SIZE(ndrc->tramp.ops); i++)
2413 ops[i].ldrpc = 0xe5900000 | rd_rn_rm(15,15,0) | diff; // ldr pc, [=val]
2414 end_tcache_write(ops, (u_char *)ops + sizeof(ndrc->tramp.ops));
2415}
2416
2417// vim:shiftwidth=2:expandtab