drc: try to make some passes not as slow, part 2
[pcsx_rearmed.git] / libpcsxcore / new_dynarec / assem_arm.c
... / ...
CommitLineData
1/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2 * Mupen64plus/PCSX - assem_arm.c *
3 * Copyright (C) 2009-2011 Ari64 *
4 * Copyright (C) 2010-2021 GraÅžvydas "notaz" Ignotas *
5 * *
6 * This program is free software; you can redistribute it and/or modify *
7 * it under the terms of the GNU General Public License as published by *
8 * the Free Software Foundation; either version 2 of the License, or *
9 * (at your option) any later version. *
10 * *
11 * This program is distributed in the hope that it will be useful, *
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
14 * GNU General Public License for more details. *
15 * *
16 * You should have received a copy of the GNU General Public License *
17 * along with this program; if not, write to the *
18 * Free Software Foundation, Inc., *
19 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
20 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
21
22#define FLAGLESS
23#include "../gte.h"
24#undef FLAGLESS
25#include "../gte_arm.h"
26#include "../gte_neon.h"
27#include "pcnt.h"
28#include "arm_features.h"
29
30#define unused __attribute__((unused))
31
32#ifdef DRC_DBG
33#pragma GCC diagnostic ignored "-Wunused-function"
34#pragma GCC diagnostic ignored "-Wunused-variable"
35#pragma GCC diagnostic ignored "-Wunused-but-set-variable"
36#endif
37
38void indirect_jump_indexed();
39void indirect_jump();
40void do_interrupt();
41void jump_vaddr_r0();
42void jump_vaddr_r1();
43void jump_vaddr_r2();
44void jump_vaddr_r3();
45void jump_vaddr_r4();
46void jump_vaddr_r5();
47void jump_vaddr_r6();
48void jump_vaddr_r7();
49void jump_vaddr_r8();
50void jump_vaddr_r9();
51void jump_vaddr_r10();
52void jump_vaddr_r12();
53
54void * const jump_vaddr_reg[16] = {
55 jump_vaddr_r0,
56 jump_vaddr_r1,
57 jump_vaddr_r2,
58 jump_vaddr_r3,
59 jump_vaddr_r4,
60 jump_vaddr_r5,
61 jump_vaddr_r6,
62 jump_vaddr_r7,
63 jump_vaddr_r8,
64 jump_vaddr_r9,
65 jump_vaddr_r10,
66 0,
67 jump_vaddr_r12,
68 0,
69 0,
70 0
71};
72
73void invalidate_addr_r0();
74void invalidate_addr_r1();
75void invalidate_addr_r2();
76void invalidate_addr_r3();
77void invalidate_addr_r4();
78void invalidate_addr_r5();
79void invalidate_addr_r6();
80void invalidate_addr_r7();
81void invalidate_addr_r8();
82void invalidate_addr_r9();
83void invalidate_addr_r10();
84void invalidate_addr_r12();
85
86const u_int invalidate_addr_reg[16] = {
87 (int)invalidate_addr_r0,
88 (int)invalidate_addr_r1,
89 (int)invalidate_addr_r2,
90 (int)invalidate_addr_r3,
91 (int)invalidate_addr_r4,
92 (int)invalidate_addr_r5,
93 (int)invalidate_addr_r6,
94 (int)invalidate_addr_r7,
95 (int)invalidate_addr_r8,
96 (int)invalidate_addr_r9,
97 (int)invalidate_addr_r10,
98 0,
99 (int)invalidate_addr_r12,
100 0,
101 0,
102 0};
103
104/* Linker */
105
106static void set_jump_target(void *addr, void *target_)
107{
108 u_int target = (u_int)target_;
109 u_char *ptr = addr;
110 u_int *ptr2=(u_int *)ptr;
111 if(ptr[3]==0xe2) {
112 assert((target-(u_int)ptr2-8)<1024);
113 assert(((uintptr_t)addr&3)==0);
114 assert((target&3)==0);
115 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
116 //printf("target=%x addr=%p insn=%x\n",target,addr,*ptr2);
117 }
118 else if(ptr[3]==0x72) {
119 // generated by emit_jno_unlikely
120 if((target-(u_int)ptr2-8)<1024) {
121 assert(((uintptr_t)addr&3)==0);
122 assert((target&3)==0);
123 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
124 }
125 else if((target-(u_int)ptr2-8)<4096&&!((target-(u_int)ptr2-8)&15)) {
126 assert(((uintptr_t)addr&3)==0);
127 assert((target&3)==0);
128 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>4)|0xE00;
129 }
130 else *ptr2=(0x7A000000)|(((target-(u_int)ptr2-8)<<6)>>8);
131 }
132 else {
133 assert((ptr[3]&0x0e)==0xa);
134 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
135 }
136}
137
138// This optionally copies the instruction from the target of the branch into
139// the space before the branch. Works, but the difference in speed is
140// usually insignificant.
141#if 0
142static void set_jump_target_fillslot(int addr,u_int target,int copy)
143{
144 u_char *ptr=(u_char *)addr;
145 u_int *ptr2=(u_int *)ptr;
146 assert(!copy||ptr2[-1]==0xe28dd000);
147 if(ptr[3]==0xe2) {
148 assert(!copy);
149 assert((target-(u_int)ptr2-8)<4096);
150 *ptr2=(*ptr2&0xFFFFF000)|(target-(u_int)ptr2-8);
151 }
152 else {
153 assert((ptr[3]&0x0e)==0xa);
154 u_int target_insn=*(u_int *)target;
155 if((target_insn&0x0e100000)==0) { // ALU, no immediate, no flags
156 copy=0;
157 }
158 if((target_insn&0x0c100000)==0x04100000) { // Load
159 copy=0;
160 }
161 if(target_insn&0x08000000) {
162 copy=0;
163 }
164 if(copy) {
165 ptr2[-1]=target_insn;
166 target+=4;
167 }
168 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
169 }
170}
171#endif
172
173/* Literal pool */
174static void add_literal(int addr,int val)
175{
176 assert(literalcount<sizeof(literals)/sizeof(literals[0]));
177 literals[literalcount][0]=addr;
178 literals[literalcount][1]=val;
179 literalcount++;
180}
181
182// from a pointer to external jump stub (which was produced by emit_extjump2)
183// find where the jumping insn is
184static void *find_extjump_insn(void *stub)
185{
186 int *ptr=(int *)(stub+4);
187 assert((*ptr&0x0fff0000)==0x059f0000); // ldr rx, [pc, #ofs]
188 u_int offset=*ptr&0xfff;
189 void **l_ptr=(void *)ptr+offset+8;
190 return *l_ptr;
191}
192
193// find where external branch is liked to using addr of it's stub:
194// get address that insn one after stub loads (dyna_linker arg1),
195// treat it as a pointer to branch insn,
196// return addr where that branch jumps to
197static void *get_pointer(void *stub)
198{
199 //printf("get_pointer(%x)\n",(int)stub);
200 int *i_ptr=find_extjump_insn(stub);
201 assert((*i_ptr&0x0f000000)==0x0a000000); // b
202 return (u_char *)i_ptr+((*i_ptr<<8)>>6)+8;
203}
204
205// Find the "clean" entry point from a "dirty" entry point
206// by skipping past the call to verify_code
207static void *get_clean_addr(void *addr)
208{
209 signed int *ptr = addr;
210 #ifndef HAVE_ARMV7
211 ptr+=4;
212 #else
213 ptr+=6;
214 #endif
215 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
216 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
217 ptr++;
218 if((*ptr&0xFF000000)==0xea000000) {
219 return (char *)ptr+((*ptr<<8)>>6)+8; // follow jump
220 }
221 return ptr;
222}
223
224static int verify_dirty(const u_int *ptr)
225{
226 #ifndef HAVE_ARMV7
227 u_int offset;
228 // get from literal pool
229 assert((*ptr&0xFFFF0000)==0xe59f0000);
230 offset=*ptr&0xfff;
231 u_int source=*(u_int*)((void *)ptr+offset+8);
232 ptr++;
233 assert((*ptr&0xFFFF0000)==0xe59f0000);
234 offset=*ptr&0xfff;
235 u_int copy=*(u_int*)((void *)ptr+offset+8);
236 ptr++;
237 assert((*ptr&0xFFFF0000)==0xe59f0000);
238 offset=*ptr&0xfff;
239 u_int len=*(u_int*)((void *)ptr+offset+8);
240 ptr++;
241 ptr++;
242 #else
243 // ARMv7 movw/movt
244 assert((*ptr&0xFFF00000)==0xe3000000);
245 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
246 u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
247 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
248 ptr+=6;
249 #endif
250 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
251 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
252 //printf("verify_dirty: %x %x %x\n",source,copy,len);
253 return !memcmp((void *)source,(void *)copy,len);
254}
255
256// This doesn't necessarily find all clean entry points, just
257// guarantees that it's not dirty
258static int isclean(void *addr)
259{
260 #ifndef HAVE_ARMV7
261 u_int *ptr=((u_int *)addr)+4;
262 #else
263 u_int *ptr=((u_int *)addr)+6;
264 #endif
265 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
266 if((*ptr&0xFF000000)!=0xeb000000) return 1; // bl instruction
267 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code) return 0;
268 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_ds) return 0;
269 return 1;
270}
271
272// get source that block at addr was compiled from (host pointers)
273static void get_bounds(void *addr, u_char **start, u_char **end)
274{
275 u_int *ptr = addr;
276 #ifndef HAVE_ARMV7
277 u_int offset;
278 // get from literal pool
279 assert((*ptr&0xFFFF0000)==0xe59f0000);
280 offset=*ptr&0xfff;
281 u_int source=*(u_int*)((void *)ptr+offset+8);
282 ptr++;
283 //assert((*ptr&0xFFFF0000)==0xe59f0000);
284 //offset=*ptr&0xfff;
285 //u_int copy=*(u_int*)((void *)ptr+offset+8);
286 ptr++;
287 assert((*ptr&0xFFFF0000)==0xe59f0000);
288 offset=*ptr&0xfff;
289 u_int len=*(u_int*)((void *)ptr+offset+8);
290 ptr++;
291 ptr++;
292 #else
293 // ARMv7 movw/movt
294 assert((*ptr&0xFFF00000)==0xe3000000);
295 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
296 //u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
297 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
298 ptr+=6;
299 #endif
300 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
301 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
302 *start=(u_char *)source;
303 *end=(u_char *)source+len;
304}
305
306// Allocate a specific ARM register.
307static void alloc_arm_reg(struct regstat *cur,int i,signed char reg,int hr)
308{
309 int n;
310 int dirty=0;
311
312 // see if it's already allocated (and dealloc it)
313 for(n=0;n<HOST_REGS;n++)
314 {
315 if(n!=EXCLUDE_REG&&cur->regmap[n]==reg) {
316 dirty=(cur->dirty>>n)&1;
317 cur->regmap[n]=-1;
318 }
319 }
320
321 cur->regmap[hr]=reg;
322 cur->dirty&=~(1<<hr);
323 cur->dirty|=dirty<<hr;
324 cur->isconst&=~(1<<hr);
325}
326
327// Alloc cycle count into dedicated register
328static void alloc_cc(struct regstat *cur,int i)
329{
330 alloc_arm_reg(cur,i,CCREG,HOST_CCREG);
331}
332
333/* Assembler */
334
335static unused char regname[16][4] = {
336 "r0",
337 "r1",
338 "r2",
339 "r3",
340 "r4",
341 "r5",
342 "r6",
343 "r7",
344 "r8",
345 "r9",
346 "r10",
347 "fp",
348 "r12",
349 "sp",
350 "lr",
351 "pc"};
352
353static void output_w32(u_int word)
354{
355 *((u_int *)out)=word;
356 out+=4;
357}
358
359static u_int rd_rn_rm(u_int rd, u_int rn, u_int rm)
360{
361 assert(rd<16);
362 assert(rn<16);
363 assert(rm<16);
364 return((rn<<16)|(rd<<12)|rm);
365}
366
367static u_int rd_rn_imm_shift(u_int rd, u_int rn, u_int imm, u_int shift)
368{
369 assert(rd<16);
370 assert(rn<16);
371 assert(imm<256);
372 assert((shift&1)==0);
373 return((rn<<16)|(rd<<12)|(((32-shift)&30)<<7)|imm);
374}
375
376static u_int genimm(u_int imm,u_int *encoded)
377{
378 *encoded=0;
379 if(imm==0) return 1;
380 int i=32;
381 while(i>0)
382 {
383 if(imm<256) {
384 *encoded=((i&30)<<7)|imm;
385 return 1;
386 }
387 imm=(imm>>2)|(imm<<30);i-=2;
388 }
389 return 0;
390}
391
392static void genimm_checked(u_int imm,u_int *encoded)
393{
394 u_int ret=genimm(imm,encoded);
395 assert(ret);
396 (void)ret;
397}
398
399static u_int genjmp(u_int addr)
400{
401 if (addr < 3) return 0; // a branch that will be patched later
402 int offset = addr-(int)out-8;
403 if (offset < -33554432 || offset >= 33554432) {
404 SysPrintf("genjmp: out of range: %08x\n", offset);
405 abort();
406 return 0;
407 }
408 return ((u_int)offset>>2)&0xffffff;
409}
410
411static unused void emit_breakpoint(void)
412{
413 assem_debug("bkpt #0\n");
414 //output_w32(0xe1200070);
415 output_w32(0xe7f001f0);
416}
417
418static void emit_mov(int rs,int rt)
419{
420 assem_debug("mov %s,%s\n",regname[rt],regname[rs]);
421 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs));
422}
423
424static void emit_movs(int rs,int rt)
425{
426 assem_debug("movs %s,%s\n",regname[rt],regname[rs]);
427 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs));
428}
429
430static void emit_add(int rs1,int rs2,int rt)
431{
432 assem_debug("add %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
433 output_w32(0xe0800000|rd_rn_rm(rt,rs1,rs2));
434}
435
436static void emit_adds(int rs1,int rs2,int rt)
437{
438 assem_debug("adds %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
439 output_w32(0xe0900000|rd_rn_rm(rt,rs1,rs2));
440}
441#define emit_adds_ptr emit_adds
442
443static void emit_adcs(int rs1,int rs2,int rt)
444{
445 assem_debug("adcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
446 output_w32(0xe0b00000|rd_rn_rm(rt,rs1,rs2));
447}
448
449static void emit_neg(int rs, int rt)
450{
451 assem_debug("rsb %s,%s,#0\n",regname[rt],regname[rs]);
452 output_w32(0xe2600000|rd_rn_rm(rt,rs,0));
453}
454
455static void emit_sub(int rs1,int rs2,int rt)
456{
457 assem_debug("sub %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
458 output_w32(0xe0400000|rd_rn_rm(rt,rs1,rs2));
459}
460
461static void emit_zeroreg(int rt)
462{
463 assem_debug("mov %s,#0\n",regname[rt]);
464 output_w32(0xe3a00000|rd_rn_rm(rt,0,0));
465}
466
467static void emit_loadlp(u_int imm,u_int rt)
468{
469 add_literal((int)out,imm);
470 assem_debug("ldr %s,pc+? [=%x]\n",regname[rt],imm);
471 output_w32(0xe5900000|rd_rn_rm(rt,15,0));
472}
473
474#ifdef HAVE_ARMV7
475static void emit_movw(u_int imm,u_int rt)
476{
477 assert(imm<65536);
478 assem_debug("movw %s,#%d (0x%x)\n",regname[rt],imm,imm);
479 output_w32(0xe3000000|rd_rn_rm(rt,0,0)|(imm&0xfff)|((imm<<4)&0xf0000));
480}
481
482static void emit_movt(u_int imm,u_int rt)
483{
484 assem_debug("movt %s,#%d (0x%x)\n",regname[rt],imm&0xffff0000,imm&0xffff0000);
485 output_w32(0xe3400000|rd_rn_rm(rt,0,0)|((imm>>16)&0xfff)|((imm>>12)&0xf0000));
486}
487#endif
488
489static void emit_movimm(u_int imm,u_int rt)
490{
491 u_int armval;
492 if(genimm(imm,&armval)) {
493 assem_debug("mov %s,#%d\n",regname[rt],imm);
494 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
495 }else if(genimm(~imm,&armval)) {
496 assem_debug("mvn %s,#%d\n",regname[rt],imm);
497 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
498 }else if(imm<65536) {
499 #ifndef HAVE_ARMV7
500 assem_debug("mov %s,#%d\n",regname[rt],imm&0xFF00);
501 output_w32(0xe3a00000|rd_rn_imm_shift(rt,0,imm>>8,8));
502 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
503 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
504 #else
505 emit_movw(imm,rt);
506 #endif
507 }else{
508 #ifndef HAVE_ARMV7
509 emit_loadlp(imm,rt);
510 #else
511 emit_movw(imm&0x0000FFFF,rt);
512 emit_movt(imm&0xFFFF0000,rt);
513 #endif
514 }
515}
516
517static void emit_pcreladdr(u_int rt)
518{
519 assem_debug("add %s,pc,#?\n",regname[rt]);
520 output_w32(0xe2800000|rd_rn_rm(rt,15,0));
521}
522
523static void emit_loadreg(int r, int hr)
524{
525 assert(hr != EXCLUDE_REG);
526 if (r == 0)
527 emit_zeroreg(hr);
528 else {
529 void *addr;
530 switch (r) {
531 //case HIREG: addr = &hi; break;
532 //case LOREG: addr = &lo; break;
533 case CCREG: addr = &cycle_count; break;
534 case CSREG: addr = &Status; break;
535 case INVCP: addr = &invc_ptr; break;
536 case ROREG: addr = &ram_offset; break;
537 default:
538 assert(r < 34);
539 addr = &psxRegs.GPR.r[r];
540 break;
541 }
542 u_int offset = (u_char *)addr - (u_char *)&dynarec_local;
543 assert(offset<4096);
544 assem_debug("ldr %s,fp+%d # r%d\n",regname[hr],offset,r);
545 output_w32(0xe5900000|rd_rn_rm(hr,FP,0)|offset);
546 }
547}
548
549static void emit_storereg(int r, int hr)
550{
551 assert(hr != EXCLUDE_REG);
552 int addr = (int)&psxRegs.GPR.r[r];
553 switch (r) {
554 //case HIREG: addr = &hi; break;
555 //case LOREG: addr = &lo; break;
556 case CCREG: addr = (int)&cycle_count; break;
557 default: assert(r < 34); break;
558 }
559 u_int offset = addr-(u_int)&dynarec_local;
560 assert(offset<4096);
561 assem_debug("str %s,fp+%d # r%d\n",regname[hr],offset,r);
562 output_w32(0xe5800000|rd_rn_rm(hr,FP,0)|offset);
563}
564
565static void emit_test(int rs, int rt)
566{
567 assem_debug("tst %s,%s\n",regname[rs],regname[rt]);
568 output_w32(0xe1100000|rd_rn_rm(0,rs,rt));
569}
570
571static void emit_testimm(int rs,int imm)
572{
573 u_int armval;
574 assem_debug("tst %s,#%d\n",regname[rs],imm);
575 genimm_checked(imm,&armval);
576 output_w32(0xe3100000|rd_rn_rm(0,rs,0)|armval);
577}
578
579static void emit_testeqimm(int rs,int imm)
580{
581 u_int armval;
582 assem_debug("tsteq %s,$%d\n",regname[rs],imm);
583 genimm_checked(imm,&armval);
584 output_w32(0x03100000|rd_rn_rm(0,rs,0)|armval);
585}
586
587static void emit_not(int rs,int rt)
588{
589 assem_debug("mvn %s,%s\n",regname[rt],regname[rs]);
590 output_w32(0xe1e00000|rd_rn_rm(rt,0,rs));
591}
592
593static void emit_and(u_int rs1,u_int rs2,u_int rt)
594{
595 assem_debug("and %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
596 output_w32(0xe0000000|rd_rn_rm(rt,rs1,rs2));
597}
598
599static void emit_or(u_int rs1,u_int rs2,u_int rt)
600{
601 assem_debug("orr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
602 output_w32(0xe1800000|rd_rn_rm(rt,rs1,rs2));
603}
604
605static void emit_orrshl_imm(u_int rs,u_int imm,u_int rt)
606{
607 assert(rs<16);
608 assert(rt<16);
609 assert(imm<32);
610 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs],imm);
611 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|(imm<<7));
612}
613
614static void emit_orrshr_imm(u_int rs,u_int imm,u_int rt)
615{
616 assert(rs<16);
617 assert(rt<16);
618 assert(imm<32);
619 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs],imm);
620 output_w32(0xe1800020|rd_rn_rm(rt,rt,rs)|(imm<<7));
621}
622
623static void emit_xor(u_int rs1,u_int rs2,u_int rt)
624{
625 assem_debug("eor %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
626 output_w32(0xe0200000|rd_rn_rm(rt,rs1,rs2));
627}
628
629static void emit_xorsar_imm(u_int rs1,u_int rs2,u_int imm,u_int rt)
630{
631 assem_debug("eor %s,%s,%s,asr #%d\n",regname[rt],regname[rs1],regname[rs2],imm);
632 output_w32(0xe0200040|rd_rn_rm(rt,rs1,rs2)|(imm<<7));
633}
634
635static void emit_addimm(u_int rs,int imm,u_int rt)
636{
637 assert(rs<16);
638 assert(rt<16);
639 if(imm!=0) {
640 u_int armval;
641 if(genimm(imm,&armval)) {
642 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],imm);
643 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
644 }else if(genimm(-imm,&armval)) {
645 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],-imm);
646 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
647 #ifdef HAVE_ARMV7
648 }else if(rt!=rs&&(u_int)imm<65536) {
649 emit_movw(imm&0x0000ffff,rt);
650 emit_add(rs,rt,rt);
651 }else if(rt!=rs&&(u_int)-imm<65536) {
652 emit_movw(-imm&0x0000ffff,rt);
653 emit_sub(rs,rt,rt);
654 #endif
655 }else if((u_int)-imm<65536) {
656 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],(-imm)&0xFF00);
657 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
658 output_w32(0xe2400000|rd_rn_imm_shift(rt,rs,(-imm)>>8,8));
659 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
660 }else {
661 do {
662 int shift = (ffs(imm) - 1) & ~1;
663 int imm8 = imm & (0xff << shift);
664 genimm_checked(imm8,&armval);
665 assem_debug("add %s,%s,#0x%x\n",regname[rt],regname[rs],imm8);
666 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
667 rs = rt;
668 imm &= ~imm8;
669 }
670 while (imm != 0);
671 }
672 }
673 else if(rs!=rt) emit_mov(rs,rt);
674}
675
676static void emit_addimm_and_set_flags(int imm,int rt)
677{
678 assert(imm>-65536&&imm<65536);
679 u_int armval;
680 if(genimm(imm,&armval)) {
681 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm);
682 output_w32(0xe2900000|rd_rn_rm(rt,rt,0)|armval);
683 }else if(genimm(-imm,&armval)) {
684 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],imm);
685 output_w32(0xe2500000|rd_rn_rm(rt,rt,0)|armval);
686 }else if(imm<0) {
687 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF00);
688 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
689 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)>>8,8));
690 output_w32(0xe2500000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
691 }else{
692 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF00);
693 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
694 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm>>8,8));
695 output_w32(0xe2900000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
696 }
697}
698
699static void emit_addnop(u_int r)
700{
701 assert(r<16);
702 assem_debug("add %s,%s,#0 (nop)\n",regname[r],regname[r]);
703 output_w32(0xe2800000|rd_rn_rm(r,r,0));
704}
705
706static void emit_andimm(int rs,int imm,int rt)
707{
708 u_int armval;
709 if(imm==0) {
710 emit_zeroreg(rt);
711 }else if(genimm(imm,&armval)) {
712 assem_debug("and %s,%s,#%d\n",regname[rt],regname[rs],imm);
713 output_w32(0xe2000000|rd_rn_rm(rt,rs,0)|armval);
714 }else if(genimm(~imm,&armval)) {
715 assem_debug("bic %s,%s,#%d\n",regname[rt],regname[rs],imm);
716 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|armval);
717 }else if(imm==65535) {
718 #ifndef HAVE_ARMV6
719 assem_debug("bic %s,%s,#FF000000\n",regname[rt],regname[rs]);
720 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|0x4FF);
721 assem_debug("bic %s,%s,#00FF0000\n",regname[rt],regname[rt]);
722 output_w32(0xe3c00000|rd_rn_rm(rt,rt,0)|0x8FF);
723 #else
724 assem_debug("uxth %s,%s\n",regname[rt],regname[rs]);
725 output_w32(0xe6ff0070|rd_rn_rm(rt,0,rs));
726 #endif
727 }else{
728 assert(imm>0&&imm<65535);
729 #ifndef HAVE_ARMV7
730 assem_debug("mov r14,#%d\n",imm&0xFF00);
731 output_w32(0xe3a00000|rd_rn_imm_shift(HOST_TEMPREG,0,imm>>8,8));
732 assem_debug("add r14,r14,#%d\n",imm&0xFF);
733 output_w32(0xe2800000|rd_rn_imm_shift(HOST_TEMPREG,HOST_TEMPREG,imm&0xff,0));
734 #else
735 emit_movw(imm,HOST_TEMPREG);
736 #endif
737 assem_debug("and %s,%s,r14\n",regname[rt],regname[rs]);
738 output_w32(0xe0000000|rd_rn_rm(rt,rs,HOST_TEMPREG));
739 }
740}
741
742static void emit_orimm(int rs,int imm,int rt)
743{
744 u_int armval;
745 if(imm==0) {
746 if(rs!=rt) emit_mov(rs,rt);
747 }else if(genimm(imm,&armval)) {
748 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm);
749 output_w32(0xe3800000|rd_rn_rm(rt,rs,0)|armval);
750 }else{
751 assert(imm>0&&imm<65536);
752 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
753 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
754 output_w32(0xe3800000|rd_rn_imm_shift(rt,rs,imm>>8,8));
755 output_w32(0xe3800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
756 }
757}
758
759static void emit_xorimm(int rs,int imm,int rt)
760{
761 u_int armval;
762 if(imm==0) {
763 if(rs!=rt) emit_mov(rs,rt);
764 }else if(genimm(imm,&armval)) {
765 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm);
766 output_w32(0xe2200000|rd_rn_rm(rt,rs,0)|armval);
767 }else{
768 assert(imm>0&&imm<65536);
769 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
770 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
771 output_w32(0xe2200000|rd_rn_imm_shift(rt,rs,imm>>8,8));
772 output_w32(0xe2200000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
773 }
774}
775
776static void emit_shlimm(int rs,u_int imm,int rt)
777{
778 assert(imm>0);
779 assert(imm<32);
780 //if(imm==1) ...
781 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
782 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
783}
784
785static void emit_lsls_imm(int rs,int imm,int rt)
786{
787 assert(imm>0);
788 assert(imm<32);
789 assem_debug("lsls %s,%s,#%d\n",regname[rt],regname[rs],imm);
790 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs)|(imm<<7));
791}
792
793static unused void emit_lslpls_imm(int rs,int imm,int rt)
794{
795 assert(imm>0);
796 assert(imm<32);
797 assem_debug("lslpls %s,%s,#%d\n",regname[rt],regname[rs],imm);
798 output_w32(0x51b00000|rd_rn_rm(rt,0,rs)|(imm<<7));
799}
800
801static void emit_shrimm(int rs,u_int imm,int rt)
802{
803 assert(imm>0);
804 assert(imm<32);
805 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
806 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
807}
808
809static void emit_sarimm(int rs,u_int imm,int rt)
810{
811 assert(imm>0);
812 assert(imm<32);
813 assem_debug("asr %s,%s,#%d\n",regname[rt],regname[rs],imm);
814 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x40|(imm<<7));
815}
816
817static void emit_rorimm(int rs,u_int imm,int rt)
818{
819 assert(imm>0);
820 assert(imm<32);
821 assem_debug("ror %s,%s,#%d\n",regname[rt],regname[rs],imm);
822 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x60|(imm<<7));
823}
824
825static void emit_signextend16(int rs,int rt)
826{
827 #ifndef HAVE_ARMV6
828 emit_shlimm(rs,16,rt);
829 emit_sarimm(rt,16,rt);
830 #else
831 assem_debug("sxth %s,%s\n",regname[rt],regname[rs]);
832 output_w32(0xe6bf0070|rd_rn_rm(rt,0,rs));
833 #endif
834}
835
836static void emit_signextend8(int rs,int rt)
837{
838 #ifndef HAVE_ARMV6
839 emit_shlimm(rs,24,rt);
840 emit_sarimm(rt,24,rt);
841 #else
842 assem_debug("sxtb %s,%s\n",regname[rt],regname[rs]);
843 output_w32(0xe6af0070|rd_rn_rm(rt,0,rs));
844 #endif
845}
846
847static void emit_shl(u_int rs,u_int shift,u_int rt)
848{
849 assert(rs<16);
850 assert(rt<16);
851 assert(shift<16);
852 //if(imm==1) ...
853 assem_debug("lsl %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
854 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x10|(shift<<8));
855}
856
857static void emit_shr(u_int rs,u_int shift,u_int rt)
858{
859 assert(rs<16);
860 assert(rt<16);
861 assert(shift<16);
862 assem_debug("lsr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
863 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x30|(shift<<8));
864}
865
866static void emit_sar(u_int rs,u_int shift,u_int rt)
867{
868 assert(rs<16);
869 assert(rt<16);
870 assert(shift<16);
871 assem_debug("asr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
872 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x50|(shift<<8));
873}
874
875static unused void emit_orrshl(u_int rs,u_int shift,u_int rt)
876{
877 assert(rs<16);
878 assert(rt<16);
879 assert(shift<16);
880 assem_debug("orr %s,%s,%s,lsl %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
881 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x10|(shift<<8));
882}
883
884static unused void emit_orrshr(u_int rs,u_int shift,u_int rt)
885{
886 assert(rs<16);
887 assert(rt<16);
888 assert(shift<16);
889 assem_debug("orr %s,%s,%s,lsr %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
890 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x30|(shift<<8));
891}
892
893static void emit_cmpimm(int rs,int imm)
894{
895 u_int armval;
896 if(genimm(imm,&armval)) {
897 assem_debug("cmp %s,#%d\n",regname[rs],imm);
898 output_w32(0xe3500000|rd_rn_rm(0,rs,0)|armval);
899 }else if(genimm(-imm,&armval)) {
900 assem_debug("cmn %s,#%d\n",regname[rs],imm);
901 output_w32(0xe3700000|rd_rn_rm(0,rs,0)|armval);
902 }else if(imm>0) {
903 assert(imm<65536);
904 emit_movimm(imm,HOST_TEMPREG);
905 assem_debug("cmp %s,r14\n",regname[rs]);
906 output_w32(0xe1500000|rd_rn_rm(0,rs,HOST_TEMPREG));
907 }else{
908 assert(imm>-65536);
909 emit_movimm(-imm,HOST_TEMPREG);
910 assem_debug("cmn %s,r14\n",regname[rs]);
911 output_w32(0xe1700000|rd_rn_rm(0,rs,HOST_TEMPREG));
912 }
913}
914
915static void emit_cmovne_imm(int imm,int rt)
916{
917 assem_debug("movne %s,#%d\n",regname[rt],imm);
918 u_int armval;
919 genimm_checked(imm,&armval);
920 output_w32(0x13a00000|rd_rn_rm(rt,0,0)|armval);
921}
922
923static void emit_cmovl_imm(int imm,int rt)
924{
925 assem_debug("movlt %s,#%d\n",regname[rt],imm);
926 u_int armval;
927 genimm_checked(imm,&armval);
928 output_w32(0xb3a00000|rd_rn_rm(rt,0,0)|armval);
929}
930
931static void emit_cmovb_imm(int imm,int rt)
932{
933 assem_debug("movcc %s,#%d\n",regname[rt],imm);
934 u_int armval;
935 genimm_checked(imm,&armval);
936 output_w32(0x33a00000|rd_rn_rm(rt,0,0)|armval);
937}
938
939static void emit_cmovae_imm(int imm,int rt)
940{
941 assem_debug("movcs %s,#%d\n",regname[rt],imm);
942 u_int armval;
943 genimm_checked(imm,&armval);
944 output_w32(0x23a00000|rd_rn_rm(rt,0,0)|armval);
945}
946
947static void emit_cmovs_imm(int imm,int rt)
948{
949 assem_debug("movmi %s,#%d\n",regname[rt],imm);
950 u_int armval;
951 genimm_checked(imm,&armval);
952 output_w32(0x43a00000|rd_rn_rm(rt,0,0)|armval);
953}
954
955static void emit_cmovne_reg(int rs,int rt)
956{
957 assem_debug("movne %s,%s\n",regname[rt],regname[rs]);
958 output_w32(0x11a00000|rd_rn_rm(rt,0,rs));
959}
960
961static void emit_cmovl_reg(int rs,int rt)
962{
963 assem_debug("movlt %s,%s\n",regname[rt],regname[rs]);
964 output_w32(0xb1a00000|rd_rn_rm(rt,0,rs));
965}
966
967static void emit_cmovb_reg(int rs,int rt)
968{
969 assem_debug("movcc %s,%s\n",regname[rt],regname[rs]);
970 output_w32(0x31a00000|rd_rn_rm(rt,0,rs));
971}
972
973static void emit_cmovs_reg(int rs,int rt)
974{
975 assem_debug("movmi %s,%s\n",regname[rt],regname[rs]);
976 output_w32(0x41a00000|rd_rn_rm(rt,0,rs));
977}
978
979static void emit_slti32(int rs,int imm,int rt)
980{
981 if(rs!=rt) emit_zeroreg(rt);
982 emit_cmpimm(rs,imm);
983 if(rs==rt) emit_movimm(0,rt);
984 emit_cmovl_imm(1,rt);
985}
986
987static void emit_sltiu32(int rs,int imm,int rt)
988{
989 if(rs!=rt) emit_zeroreg(rt);
990 emit_cmpimm(rs,imm);
991 if(rs==rt) emit_movimm(0,rt);
992 emit_cmovb_imm(1,rt);
993}
994
995static void emit_cmp(int rs,int rt)
996{
997 assem_debug("cmp %s,%s\n",regname[rs],regname[rt]);
998 output_w32(0xe1500000|rd_rn_rm(0,rs,rt));
999}
1000
1001static void emit_set_gz32(int rs, int rt)
1002{
1003 //assem_debug("set_gz32\n");
1004 emit_cmpimm(rs,1);
1005 emit_movimm(1,rt);
1006 emit_cmovl_imm(0,rt);
1007}
1008
1009static void emit_set_nz32(int rs, int rt)
1010{
1011 //assem_debug("set_nz32\n");
1012 if(rs!=rt) emit_movs(rs,rt);
1013 else emit_test(rs,rs);
1014 emit_cmovne_imm(1,rt);
1015}
1016
1017static void emit_set_if_less32(int rs1, int rs2, int rt)
1018{
1019 //assem_debug("set if less (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1020 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1021 emit_cmp(rs1,rs2);
1022 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1023 emit_cmovl_imm(1,rt);
1024}
1025
1026static void emit_set_if_carry32(int rs1, int rs2, int rt)
1027{
1028 //assem_debug("set if carry (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1029 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1030 emit_cmp(rs1,rs2);
1031 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1032 emit_cmovb_imm(1,rt);
1033}
1034
1035static int can_jump_or_call(const void *a)
1036{
1037 intptr_t offset = (u_char *)a - out - 8;
1038 return (-33554432 <= offset && offset < 33554432);
1039}
1040
1041static void emit_call(const void *a_)
1042{
1043 int a = (int)a_;
1044 assem_debug("bl %x (%x+%x)%s\n",a,(int)out,a-(int)out-8,func_name(a_));
1045 u_int offset=genjmp(a);
1046 output_w32(0xeb000000|offset);
1047}
1048
1049static void emit_jmp(const void *a_)
1050{
1051 int a = (int)a_;
1052 assem_debug("b %x (%x+%x)%s\n",a,(int)out,a-(int)out-8,func_name(a_));
1053 u_int offset=genjmp(a);
1054 output_w32(0xea000000|offset);
1055}
1056
1057static void emit_jne(const void *a_)
1058{
1059 int a = (int)a_;
1060 assem_debug("bne %x\n",a);
1061 u_int offset=genjmp(a);
1062 output_w32(0x1a000000|offset);
1063}
1064
1065static void emit_jeq(const void *a_)
1066{
1067 int a = (int)a_;
1068 assem_debug("beq %x\n",a);
1069 u_int offset=genjmp(a);
1070 output_w32(0x0a000000|offset);
1071}
1072
1073static void emit_js(const void *a_)
1074{
1075 int a = (int)a_;
1076 assem_debug("bmi %x\n",a);
1077 u_int offset=genjmp(a);
1078 output_w32(0x4a000000|offset);
1079}
1080
1081static void emit_jns(const void *a_)
1082{
1083 int a = (int)a_;
1084 assem_debug("bpl %x\n",a);
1085 u_int offset=genjmp(a);
1086 output_w32(0x5a000000|offset);
1087}
1088
1089static void emit_jl(const void *a_)
1090{
1091 int a = (int)a_;
1092 assem_debug("blt %x\n",a);
1093 u_int offset=genjmp(a);
1094 output_w32(0xba000000|offset);
1095}
1096
1097static void emit_jge(const void *a_)
1098{
1099 int a = (int)a_;
1100 assem_debug("bge %x\n",a);
1101 u_int offset=genjmp(a);
1102 output_w32(0xaa000000|offset);
1103}
1104
1105static void emit_jno(const void *a_)
1106{
1107 int a = (int)a_;
1108 assem_debug("bvc %x\n",a);
1109 u_int offset=genjmp(a);
1110 output_w32(0x7a000000|offset);
1111}
1112
1113static void emit_jc(const void *a_)
1114{
1115 int a = (int)a_;
1116 assem_debug("bcs %x\n",a);
1117 u_int offset=genjmp(a);
1118 output_w32(0x2a000000|offset);
1119}
1120
1121static void emit_jcc(const void *a_)
1122{
1123 int a = (int)a_;
1124 assem_debug("bcc %x\n",a);
1125 u_int offset=genjmp(a);
1126 output_w32(0x3a000000|offset);
1127}
1128
1129static unused void emit_callreg(u_int r)
1130{
1131 assert(r<15);
1132 assem_debug("blx %s\n",regname[r]);
1133 output_w32(0xe12fff30|r);
1134}
1135
1136static void emit_jmpreg(u_int r)
1137{
1138 assem_debug("mov pc,%s\n",regname[r]);
1139 output_w32(0xe1a00000|rd_rn_rm(15,0,r));
1140}
1141
1142static void emit_ret(void)
1143{
1144 emit_jmpreg(14);
1145}
1146
1147static void emit_readword_indexed(int offset, int rs, int rt)
1148{
1149 assert(offset>-4096&&offset<4096);
1150 assem_debug("ldr %s,%s+%d\n",regname[rt],regname[rs],offset);
1151 if(offset>=0) {
1152 output_w32(0xe5900000|rd_rn_rm(rt,rs,0)|offset);
1153 }else{
1154 output_w32(0xe5100000|rd_rn_rm(rt,rs,0)|(-offset));
1155 }
1156}
1157
1158static void emit_readword_dualindexedx4(int rs1, int rs2, int rt)
1159{
1160 assem_debug("ldr %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1161 output_w32(0xe7900000|rd_rn_rm(rt,rs1,rs2)|0x100);
1162}
1163#define emit_readptr_dualindexedx_ptrlen emit_readword_dualindexedx4
1164
1165static void emit_ldr_dualindexed(int rs1, int rs2, int rt)
1166{
1167 assem_debug("ldr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1168 output_w32(0xe7900000|rd_rn_rm(rt,rs1,rs2));
1169}
1170
1171static void emit_ldrcc_dualindexed(int rs1, int rs2, int rt)
1172{
1173 assem_debug("ldrcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1174 output_w32(0x37900000|rd_rn_rm(rt,rs1,rs2));
1175}
1176
1177static void emit_ldrb_dualindexed(int rs1, int rs2, int rt)
1178{
1179 assem_debug("ldrb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1180 output_w32(0xe7d00000|rd_rn_rm(rt,rs1,rs2));
1181}
1182
1183static void emit_ldrccb_dualindexed(int rs1, int rs2, int rt)
1184{
1185 assem_debug("ldrccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1186 output_w32(0x37d00000|rd_rn_rm(rt,rs1,rs2));
1187}
1188
1189static void emit_ldrsb_dualindexed(int rs1, int rs2, int rt)
1190{
1191 assem_debug("ldrsb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1192 output_w32(0xe19000d0|rd_rn_rm(rt,rs1,rs2));
1193}
1194
1195static void emit_ldrccsb_dualindexed(int rs1, int rs2, int rt)
1196{
1197 assem_debug("ldrccsb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1198 output_w32(0x319000d0|rd_rn_rm(rt,rs1,rs2));
1199}
1200
1201static void emit_ldrh_dualindexed(int rs1, int rs2, int rt)
1202{
1203 assem_debug("ldrh %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1204 output_w32(0xe19000b0|rd_rn_rm(rt,rs1,rs2));
1205}
1206
1207static void emit_ldrcch_dualindexed(int rs1, int rs2, int rt)
1208{
1209 assem_debug("ldrcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1210 output_w32(0x319000b0|rd_rn_rm(rt,rs1,rs2));
1211}
1212
1213static void emit_ldrsh_dualindexed(int rs1, int rs2, int rt)
1214{
1215 assem_debug("ldrsh %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1216 output_w32(0xe19000f0|rd_rn_rm(rt,rs1,rs2));
1217}
1218
1219static void emit_ldrccsh_dualindexed(int rs1, int rs2, int rt)
1220{
1221 assem_debug("ldrccsh %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1222 output_w32(0x319000f0|rd_rn_rm(rt,rs1,rs2));
1223}
1224
1225static void emit_str_dualindexed(int rs1, int rs2, int rt)
1226{
1227 assem_debug("str %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1228 output_w32(0xe7800000|rd_rn_rm(rt,rs1,rs2));
1229}
1230
1231static void emit_strb_dualindexed(int rs1, int rs2, int rt)
1232{
1233 assem_debug("strb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1234 output_w32(0xe7c00000|rd_rn_rm(rt,rs1,rs2));
1235}
1236
1237static void emit_strh_dualindexed(int rs1, int rs2, int rt)
1238{
1239 assem_debug("strh %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1240 output_w32(0xe18000b0|rd_rn_rm(rt,rs1,rs2));
1241}
1242
1243static void emit_movsbl_indexed(int offset, int rs, int rt)
1244{
1245 assert(offset>-256&&offset<256);
1246 assem_debug("ldrsb %s,%s+%d\n",regname[rt],regname[rs],offset);
1247 if(offset>=0) {
1248 output_w32(0xe1d000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1249 }else{
1250 output_w32(0xe15000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1251 }
1252}
1253
1254static void emit_movswl_indexed(int offset, int rs, int rt)
1255{
1256 assert(offset>-256&&offset<256);
1257 assem_debug("ldrsh %s,%s+%d\n",regname[rt],regname[rs],offset);
1258 if(offset>=0) {
1259 output_w32(0xe1d000f0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1260 }else{
1261 output_w32(0xe15000f0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1262 }
1263}
1264
1265static void emit_movzbl_indexed(int offset, int rs, int rt)
1266{
1267 assert(offset>-4096&&offset<4096);
1268 assem_debug("ldrb %s,%s+%d\n",regname[rt],regname[rs],offset);
1269 if(offset>=0) {
1270 output_w32(0xe5d00000|rd_rn_rm(rt,rs,0)|offset);
1271 }else{
1272 output_w32(0xe5500000|rd_rn_rm(rt,rs,0)|(-offset));
1273 }
1274}
1275
1276static void emit_movzwl_indexed(int offset, int rs, int rt)
1277{
1278 assert(offset>-256&&offset<256);
1279 assem_debug("ldrh %s,%s+%d\n",regname[rt],regname[rs],offset);
1280 if(offset>=0) {
1281 output_w32(0xe1d000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1282 }else{
1283 output_w32(0xe15000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1284 }
1285}
1286
1287static void emit_ldrd(int offset, int rs, int rt)
1288{
1289 assert(offset>-256&&offset<256);
1290 assem_debug("ldrd %s,%s+%d\n",regname[rt],regname[rs],offset);
1291 if(offset>=0) {
1292 output_w32(0xe1c000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1293 }else{
1294 output_w32(0xe14000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1295 }
1296}
1297
1298static void emit_readword(void *addr, int rt)
1299{
1300 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
1301 assert(offset<4096);
1302 assem_debug("ldr %s,fp+%d\n",regname[rt],offset);
1303 output_w32(0xe5900000|rd_rn_rm(rt,FP,0)|offset);
1304}
1305#define emit_readptr emit_readword
1306
1307static void emit_writeword_indexed(int rt, int offset, int rs)
1308{
1309 assert(offset>-4096&&offset<4096);
1310 assem_debug("str %s,%s+%d\n",regname[rt],regname[rs],offset);
1311 if(offset>=0) {
1312 output_w32(0xe5800000|rd_rn_rm(rt,rs,0)|offset);
1313 }else{
1314 output_w32(0xe5000000|rd_rn_rm(rt,rs,0)|(-offset));
1315 }
1316}
1317
1318static void emit_writehword_indexed(int rt, int offset, int rs)
1319{
1320 assert(offset>-256&&offset<256);
1321 assem_debug("strh %s,%s+%d\n",regname[rt],regname[rs],offset);
1322 if(offset>=0) {
1323 output_w32(0xe1c000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1324 }else{
1325 output_w32(0xe14000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1326 }
1327}
1328
1329static void emit_writebyte_indexed(int rt, int offset, int rs)
1330{
1331 assert(offset>-4096&&offset<4096);
1332 assem_debug("strb %s,%s+%d\n",regname[rt],regname[rs],offset);
1333 if(offset>=0) {
1334 output_w32(0xe5c00000|rd_rn_rm(rt,rs,0)|offset);
1335 }else{
1336 output_w32(0xe5400000|rd_rn_rm(rt,rs,0)|(-offset));
1337 }
1338}
1339
1340static void emit_strcc_dualindexed(int rs1, int rs2, int rt)
1341{
1342 assem_debug("strcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1343 output_w32(0x37800000|rd_rn_rm(rt,rs1,rs2));
1344}
1345
1346static void emit_strccb_dualindexed(int rs1, int rs2, int rt)
1347{
1348 assem_debug("strccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1349 output_w32(0x37c00000|rd_rn_rm(rt,rs1,rs2));
1350}
1351
1352static void emit_strcch_dualindexed(int rs1, int rs2, int rt)
1353{
1354 assem_debug("strcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1355 output_w32(0x318000b0|rd_rn_rm(rt,rs1,rs2));
1356}
1357
1358static void emit_writeword(int rt, void *addr)
1359{
1360 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
1361 assert(offset<4096);
1362 assem_debug("str %s,fp+%d\n",regname[rt],offset);
1363 output_w32(0xe5800000|rd_rn_rm(rt,FP,0)|offset);
1364}
1365
1366static void emit_umull(u_int rs1, u_int rs2, u_int hi, u_int lo)
1367{
1368 assem_debug("umull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
1369 assert(rs1<16);
1370 assert(rs2<16);
1371 assert(hi<16);
1372 assert(lo<16);
1373 output_w32(0xe0800090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
1374}
1375
1376static void emit_smull(u_int rs1, u_int rs2, u_int hi, u_int lo)
1377{
1378 assem_debug("smull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
1379 assert(rs1<16);
1380 assert(rs2<16);
1381 assert(hi<16);
1382 assert(lo<16);
1383 output_w32(0xe0c00090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
1384}
1385
1386static void emit_clz(int rs,int rt)
1387{
1388 assem_debug("clz %s,%s\n",regname[rt],regname[rs]);
1389 output_w32(0xe16f0f10|rd_rn_rm(rt,0,rs));
1390}
1391
1392static void emit_subcs(int rs1,int rs2,int rt)
1393{
1394 assem_debug("subcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1395 output_w32(0x20400000|rd_rn_rm(rt,rs1,rs2));
1396}
1397
1398static void emit_shrcc_imm(int rs,u_int imm,int rt)
1399{
1400 assert(imm>0);
1401 assert(imm<32);
1402 assem_debug("lsrcc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1403 output_w32(0x31a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
1404}
1405
1406static void emit_shrne_imm(int rs,u_int imm,int rt)
1407{
1408 assert(imm>0);
1409 assert(imm<32);
1410 assem_debug("lsrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
1411 output_w32(0x11a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
1412}
1413
1414static void emit_negmi(int rs, int rt)
1415{
1416 assem_debug("rsbmi %s,%s,#0\n",regname[rt],regname[rs]);
1417 output_w32(0x42600000|rd_rn_rm(rt,rs,0));
1418}
1419
1420static void emit_negsmi(int rs, int rt)
1421{
1422 assem_debug("rsbsmi %s,%s,#0\n",regname[rt],regname[rs]);
1423 output_w32(0x42700000|rd_rn_rm(rt,rs,0));
1424}
1425
1426static void emit_bic_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
1427{
1428 assem_debug("bic %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
1429 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
1430}
1431
1432static void emit_bic_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
1433{
1434 assem_debug("bic %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
1435 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
1436}
1437
1438static void emit_teq(int rs, int rt)
1439{
1440 assem_debug("teq %s,%s\n",regname[rs],regname[rt]);
1441 output_w32(0xe1300000|rd_rn_rm(0,rs,rt));
1442}
1443
1444static unused void emit_rsbimm(int rs, int imm, int rt)
1445{
1446 u_int armval;
1447 genimm_checked(imm,&armval);
1448 assem_debug("rsb %s,%s,#%d\n",regname[rt],regname[rs],imm);
1449 output_w32(0xe2600000|rd_rn_rm(rt,rs,0)|armval);
1450}
1451
1452// Conditionally select one of two immediates, optimizing for small code size
1453// This will only be called if HAVE_CMOV_IMM is defined
1454static void emit_cmov2imm_e_ne_compact(int imm1,int imm2,u_int rt)
1455{
1456 u_int armval;
1457 if(genimm(imm2-imm1,&armval)) {
1458 emit_movimm(imm1,rt);
1459 assem_debug("addne %s,%s,#%d\n",regname[rt],regname[rt],imm2-imm1);
1460 output_w32(0x12800000|rd_rn_rm(rt,rt,0)|armval);
1461 }else if(genimm(imm1-imm2,&armval)) {
1462 emit_movimm(imm1,rt);
1463 assem_debug("subne %s,%s,#%d\n",regname[rt],regname[rt],imm1-imm2);
1464 output_w32(0x12400000|rd_rn_rm(rt,rt,0)|armval);
1465 }
1466 else {
1467 #ifndef HAVE_ARMV7
1468 emit_movimm(imm1,rt);
1469 add_literal((int)out,imm2);
1470 assem_debug("ldrne %s,pc+? [=%x]\n",regname[rt],imm2);
1471 output_w32(0x15900000|rd_rn_rm(rt,15,0));
1472 #else
1473 emit_movw(imm1&0x0000FFFF,rt);
1474 if((imm1&0xFFFF)!=(imm2&0xFFFF)) {
1475 assem_debug("movwne %s,#%d (0x%x)\n",regname[rt],imm2&0xFFFF,imm2&0xFFFF);
1476 output_w32(0x13000000|rd_rn_rm(rt,0,0)|(imm2&0xfff)|((imm2<<4)&0xf0000));
1477 }
1478 emit_movt(imm1&0xFFFF0000,rt);
1479 if((imm1&0xFFFF0000)!=(imm2&0xFFFF0000)) {
1480 assem_debug("movtne %s,#%d (0x%x)\n",regname[rt],imm2&0xffff0000,imm2&0xffff0000);
1481 output_w32(0x13400000|rd_rn_rm(rt,0,0)|((imm2>>16)&0xfff)|((imm2>>12)&0xf0000));
1482 }
1483 #endif
1484 }
1485}
1486
1487// special case for checking invalid_code
1488static void emit_cmpmem_indexedsr12_reg(int base,int r,int imm)
1489{
1490 assert(imm<128&&imm>=0);
1491 assert(r>=0&&r<16);
1492 assem_debug("ldrb lr,%s,%s lsr #12\n",regname[base],regname[r]);
1493 output_w32(0xe7d00000|rd_rn_rm(HOST_TEMPREG,base,r)|0x620);
1494 emit_cmpimm(HOST_TEMPREG,imm);
1495}
1496
1497static void emit_callne(int a)
1498{
1499 assem_debug("blne %x\n",a);
1500 u_int offset=genjmp(a);
1501 output_w32(0x1b000000|offset);
1502}
1503
1504// Used to preload hash table entries
1505static unused void emit_prefetchreg(int r)
1506{
1507 assem_debug("pld %s\n",regname[r]);
1508 output_w32(0xf5d0f000|rd_rn_rm(0,r,0));
1509}
1510
1511// Special case for mini_ht
1512static void emit_ldreq_indexed(int rs, u_int offset, int rt)
1513{
1514 assert(offset<4096);
1515 assem_debug("ldreq %s,[%s, #%d]\n",regname[rt],regname[rs],offset);
1516 output_w32(0x05900000|rd_rn_rm(rt,rs,0)|offset);
1517}
1518
1519static void emit_orrne_imm(int rs,int imm,int rt)
1520{
1521 u_int armval;
1522 genimm_checked(imm,&armval);
1523 assem_debug("orrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
1524 output_w32(0x13800000|rd_rn_rm(rt,rs,0)|armval);
1525}
1526
1527static unused void emit_addpl_imm(int rs,int imm,int rt)
1528{
1529 u_int armval;
1530 genimm_checked(imm,&armval);
1531 assem_debug("addpl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1532 output_w32(0x52800000|rd_rn_rm(rt,rs,0)|armval);
1533}
1534
1535static void emit_jno_unlikely(int a)
1536{
1537 //emit_jno(a);
1538 assem_debug("addvc pc,pc,#? (%x)\n",/*a-(int)out-8,*/a);
1539 output_w32(0x72800000|rd_rn_rm(15,15,0));
1540}
1541
1542static void save_regs_all(u_int reglist)
1543{
1544 int i;
1545 if(!reglist) return;
1546 assem_debug("stmia fp,{");
1547 for(i=0;i<16;i++)
1548 if(reglist&(1<<i))
1549 assem_debug("r%d,",i);
1550 assem_debug("}\n");
1551 output_w32(0xe88b0000|reglist);
1552}
1553
1554static void restore_regs_all(u_int reglist)
1555{
1556 int i;
1557 if(!reglist) return;
1558 assem_debug("ldmia fp,{");
1559 for(i=0;i<16;i++)
1560 if(reglist&(1<<i))
1561 assem_debug("r%d,",i);
1562 assem_debug("}\n");
1563 output_w32(0xe89b0000|reglist);
1564}
1565
1566// Save registers before function call
1567static void save_regs(u_int reglist)
1568{
1569 reglist&=CALLER_SAVE_REGS; // only save the caller-save registers, r0-r3, r12
1570 save_regs_all(reglist);
1571}
1572
1573// Restore registers after function call
1574static void restore_regs(u_int reglist)
1575{
1576 reglist&=CALLER_SAVE_REGS;
1577 restore_regs_all(reglist);
1578}
1579
1580/* Stubs/epilogue */
1581
1582static void literal_pool(int n)
1583{
1584 if(!literalcount) return;
1585 if(n) {
1586 if((int)out-literals[0][0]<4096-n) return;
1587 }
1588 u_int *ptr;
1589 int i;
1590 for(i=0;i<literalcount;i++)
1591 {
1592 u_int l_addr=(u_int)out;
1593 int j;
1594 for(j=0;j<i;j++) {
1595 if(literals[j][1]==literals[i][1]) {
1596 //printf("dup %08x\n",literals[i][1]);
1597 l_addr=literals[j][0];
1598 break;
1599 }
1600 }
1601 ptr=(u_int *)literals[i][0];
1602 u_int offset=l_addr-(u_int)ptr-8;
1603 assert(offset<4096);
1604 assert(!(offset&3));
1605 *ptr|=offset;
1606 if(l_addr==(u_int)out) {
1607 literals[i][0]=l_addr; // remember for dupes
1608 output_w32(literals[i][1]);
1609 }
1610 }
1611 literalcount=0;
1612}
1613
1614static void literal_pool_jumpover(int n)
1615{
1616 if(!literalcount) return;
1617 if(n) {
1618 if((int)out-literals[0][0]<4096-n) return;
1619 }
1620 void *jaddr = out;
1621 emit_jmp(0);
1622 literal_pool(0);
1623 set_jump_target(jaddr, out);
1624}
1625
1626// parsed by get_pointer, find_extjump_insn
1627static void emit_extjump2(u_char *addr, u_int target, void *linker)
1628{
1629 u_char *ptr=(u_char *)addr;
1630 assert((ptr[3]&0x0e)==0xa);
1631 (void)ptr;
1632
1633 emit_loadlp(target,0);
1634 emit_loadlp((u_int)addr,1);
1635 assert(ndrc->translation_cache <= addr &&
1636 addr < ndrc->translation_cache + sizeof(ndrc->translation_cache));
1637 //assert((target>=0x80000000&&target<0x80800000)||(target>0xA4000000&&target<0xA4001000));
1638//DEBUG >
1639#ifdef DEBUG_CYCLE_COUNT
1640 emit_readword(&last_count,ECX);
1641 emit_add(HOST_CCREG,ECX,HOST_CCREG);
1642 emit_readword(&next_interupt,ECX);
1643 emit_writeword(HOST_CCREG,&Count);
1644 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
1645 emit_writeword(ECX,&last_count);
1646#endif
1647//DEBUG <
1648 emit_far_jump(linker);
1649}
1650
1651static void check_extjump2(void *src)
1652{
1653 u_int *ptr = src;
1654 assert((ptr[1] & 0x0fff0000) == 0x059f0000); // ldr rx, [pc, #ofs]
1655 (void)ptr;
1656}
1657
1658// put rt_val into rt, potentially making use of rs with value rs_val
1659static void emit_movimm_from(u_int rs_val,int rs,u_int rt_val,int rt)
1660{
1661 u_int armval;
1662 int diff;
1663 if(genimm(rt_val,&armval)) {
1664 assem_debug("mov %s,#%d\n",regname[rt],rt_val);
1665 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
1666 return;
1667 }
1668 if(genimm(~rt_val,&armval)) {
1669 assem_debug("mvn %s,#%d\n",regname[rt],rt_val);
1670 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
1671 return;
1672 }
1673 diff=rt_val-rs_val;
1674 if(genimm(diff,&armval)) {
1675 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],diff);
1676 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
1677 return;
1678 }else if(genimm(-diff,&armval)) {
1679 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],-diff);
1680 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
1681 return;
1682 }
1683 emit_movimm(rt_val,rt);
1684}
1685
1686// return 1 if above function can do it's job cheaply
1687static int is_similar_value(u_int v1,u_int v2)
1688{
1689 u_int xs;
1690 int diff;
1691 if(v1==v2) return 1;
1692 diff=v2-v1;
1693 for(xs=diff;xs!=0&&(xs&3)==0;xs>>=2)
1694 ;
1695 if(xs<0x100) return 1;
1696 for(xs=-diff;xs!=0&&(xs&3)==0;xs>>=2)
1697 ;
1698 if(xs<0x100) return 1;
1699 return 0;
1700}
1701
1702static void mov_loadtype_adj(enum stub_type type,int rs,int rt)
1703{
1704 switch(type) {
1705 case LOADB_STUB: emit_signextend8(rs,rt); break;
1706 case LOADBU_STUB: emit_andimm(rs,0xff,rt); break;
1707 case LOADH_STUB: emit_signextend16(rs,rt); break;
1708 case LOADHU_STUB: emit_andimm(rs,0xffff,rt); break;
1709 case LOADW_STUB: if(rs!=rt) emit_mov(rs,rt); break;
1710 default: assert(0);
1711 }
1712}
1713
1714#include "pcsxmem.h"
1715#include "pcsxmem_inline.c"
1716
1717static void do_readstub(int n)
1718{
1719 assem_debug("do_readstub %x\n",start+stubs[n].a*4);
1720 literal_pool(256);
1721 set_jump_target(stubs[n].addr, out);
1722 enum stub_type type=stubs[n].type;
1723 int i=stubs[n].a;
1724 int rs=stubs[n].b;
1725 const struct regstat *i_regs=(struct regstat *)stubs[n].c;
1726 u_int reglist=stubs[n].e;
1727 const signed char *i_regmap=i_regs->regmap;
1728 int rt;
1729 if(dops[i].itype==C1LS||dops[i].itype==C2LS||dops[i].itype==LOADLR) {
1730 rt=get_reg(i_regmap,FTEMP);
1731 }else{
1732 rt=get_reg(i_regmap,dops[i].rt1);
1733 }
1734 assert(rs>=0);
1735 int r,temp=-1,temp2=HOST_TEMPREG,regs_saved=0;
1736 void *restore_jump = NULL;
1737 reglist|=(1<<rs);
1738 for(r=0;r<=12;r++) {
1739 if(((1<<r)&0x13ff)&&((1<<r)&reglist)==0) {
1740 temp=r; break;
1741 }
1742 }
1743 if(rt>=0&&dops[i].rt1!=0)
1744 reglist&=~(1<<rt);
1745 if(temp==-1) {
1746 save_regs(reglist);
1747 regs_saved=1;
1748 temp=(rs==0)?2:0;
1749 }
1750 if((regs_saved||(reglist&2)==0)&&temp!=1&&rs!=1)
1751 temp2=1;
1752 emit_readword(&mem_rtab,temp);
1753 emit_shrimm(rs,12,temp2);
1754 emit_readword_dualindexedx4(temp,temp2,temp2);
1755 emit_lsls_imm(temp2,1,temp2);
1756 if(dops[i].itype==C1LS||dops[i].itype==C2LS||(rt>=0&&dops[i].rt1!=0)) {
1757 switch(type) {
1758 case LOADB_STUB: emit_ldrccsb_dualindexed(temp2,rs,rt); break;
1759 case LOADBU_STUB: emit_ldrccb_dualindexed(temp2,rs,rt); break;
1760 case LOADH_STUB: emit_ldrccsh_dualindexed(temp2,rs,rt); break;
1761 case LOADHU_STUB: emit_ldrcch_dualindexed(temp2,rs,rt); break;
1762 case LOADW_STUB: emit_ldrcc_dualindexed(temp2,rs,rt); break;
1763 default: assert(0);
1764 }
1765 }
1766 if(regs_saved) {
1767 restore_jump=out;
1768 emit_jcc(0); // jump to reg restore
1769 }
1770 else
1771 emit_jcc(stubs[n].retaddr); // return address
1772
1773 if(!regs_saved)
1774 save_regs(reglist);
1775 void *handler=NULL;
1776 if(type==LOADB_STUB||type==LOADBU_STUB)
1777 handler=jump_handler_read8;
1778 if(type==LOADH_STUB||type==LOADHU_STUB)
1779 handler=jump_handler_read16;
1780 if(type==LOADW_STUB)
1781 handler=jump_handler_read32;
1782 assert(handler);
1783 pass_args(rs,temp2);
1784 int cc=get_reg(i_regmap,CCREG);
1785 if(cc<0)
1786 emit_loadreg(CCREG,2);
1787 emit_addimm(cc<0?2:cc,(int)stubs[n].d,2);
1788 emit_far_call(handler);
1789 if(dops[i].itype==C1LS||dops[i].itype==C2LS||(rt>=0&&dops[i].rt1!=0)) {
1790 mov_loadtype_adj(type,0,rt);
1791 }
1792 if(restore_jump)
1793 set_jump_target(restore_jump, out);
1794 restore_regs(reglist);
1795 emit_jmp(stubs[n].retaddr); // return address
1796}
1797
1798static void inline_readstub(enum stub_type type, int i, u_int addr,
1799 const signed char regmap[], int target, int adj, u_int reglist)
1800{
1801 int rs=get_reg(regmap,target);
1802 int rt=get_reg(regmap,target);
1803 if(rs<0) rs=get_reg_temp(regmap);
1804 assert(rs>=0);
1805 u_int is_dynamic;
1806 uintptr_t host_addr = 0;
1807 void *handler;
1808 int cc=get_reg(regmap,CCREG);
1809 if(pcsx_direct_read(type,addr,adj,cc,target?rs:-1,rt))
1810 return;
1811 handler = get_direct_memhandler(mem_rtab, addr, type, &host_addr);
1812 if (handler == NULL) {
1813 if(rt<0||dops[i].rt1==0)
1814 return;
1815 if(addr!=host_addr)
1816 emit_movimm_from(addr,rs,host_addr,rs);
1817 switch(type) {
1818 case LOADB_STUB: emit_movsbl_indexed(0,rs,rt); break;
1819 case LOADBU_STUB: emit_movzbl_indexed(0,rs,rt); break;
1820 case LOADH_STUB: emit_movswl_indexed(0,rs,rt); break;
1821 case LOADHU_STUB: emit_movzwl_indexed(0,rs,rt); break;
1822 case LOADW_STUB: emit_readword_indexed(0,rs,rt); break;
1823 default: assert(0);
1824 }
1825 return;
1826 }
1827 is_dynamic=pcsxmem_is_handler_dynamic(addr);
1828 if(is_dynamic) {
1829 if(type==LOADB_STUB||type==LOADBU_STUB)
1830 handler=jump_handler_read8;
1831 if(type==LOADH_STUB||type==LOADHU_STUB)
1832 handler=jump_handler_read16;
1833 if(type==LOADW_STUB)
1834 handler=jump_handler_read32;
1835 }
1836
1837 // call a memhandler
1838 if(rt>=0&&dops[i].rt1!=0)
1839 reglist&=~(1<<rt);
1840 save_regs(reglist);
1841 if(target==0)
1842 emit_movimm(addr,0);
1843 else if(rs!=0)
1844 emit_mov(rs,0);
1845 if(cc<0)
1846 emit_loadreg(CCREG,2);
1847 if(is_dynamic) {
1848 emit_movimm(((u_int *)mem_rtab)[addr>>12]<<1,1);
1849 emit_addimm(cc<0?2:cc,adj,2);
1850 }
1851 else {
1852 emit_readword(&last_count,3);
1853 emit_addimm(cc<0?2:cc,adj,2);
1854 emit_add(2,3,2);
1855 emit_writeword(2,&Count);
1856 }
1857
1858 emit_far_call(handler);
1859
1860 if(rt>=0&&dops[i].rt1!=0) {
1861 switch(type) {
1862 case LOADB_STUB: emit_signextend8(0,rt); break;
1863 case LOADBU_STUB: emit_andimm(0,0xff,rt); break;
1864 case LOADH_STUB: emit_signextend16(0,rt); break;
1865 case LOADHU_STUB: emit_andimm(0,0xffff,rt); break;
1866 case LOADW_STUB: if(rt!=0) emit_mov(0,rt); break;
1867 default: assert(0);
1868 }
1869 }
1870 restore_regs(reglist);
1871}
1872
1873static void do_writestub(int n)
1874{
1875 assem_debug("do_writestub %x\n",start+stubs[n].a*4);
1876 literal_pool(256);
1877 set_jump_target(stubs[n].addr, out);
1878 enum stub_type type=stubs[n].type;
1879 int i=stubs[n].a;
1880 int rs=stubs[n].b;
1881 const struct regstat *i_regs=(struct regstat *)stubs[n].c;
1882 u_int reglist=stubs[n].e;
1883 const signed char *i_regmap=i_regs->regmap;
1884 int rt,r;
1885 if(dops[i].itype==C1LS||dops[i].itype==C2LS) {
1886 rt=get_reg(i_regmap,r=FTEMP);
1887 }else{
1888 rt=get_reg(i_regmap,r=dops[i].rs2);
1889 }
1890 assert(rs>=0);
1891 assert(rt>=0);
1892 int rtmp,temp=-1,temp2=HOST_TEMPREG,regs_saved=0;
1893 void *restore_jump = NULL;
1894 int reglist2=reglist|(1<<rs)|(1<<rt);
1895 for(rtmp=0;rtmp<=12;rtmp++) {
1896 if(((1<<rtmp)&0x13ff)&&((1<<rtmp)&reglist2)==0) {
1897 temp=rtmp; break;
1898 }
1899 }
1900 if(temp==-1) {
1901 save_regs(reglist);
1902 regs_saved=1;
1903 for(rtmp=0;rtmp<=3;rtmp++)
1904 if(rtmp!=rs&&rtmp!=rt)
1905 {temp=rtmp;break;}
1906 }
1907 if((regs_saved||(reglist2&8)==0)&&temp!=3&&rs!=3&&rt!=3)
1908 temp2=3;
1909 emit_readword(&mem_wtab,temp);
1910 emit_shrimm(rs,12,temp2);
1911 emit_readword_dualindexedx4(temp,temp2,temp2);
1912 emit_lsls_imm(temp2,1,temp2);
1913 switch(type) {
1914 case STOREB_STUB: emit_strccb_dualindexed(temp2,rs,rt); break;
1915 case STOREH_STUB: emit_strcch_dualindexed(temp2,rs,rt); break;
1916 case STOREW_STUB: emit_strcc_dualindexed(temp2,rs,rt); break;
1917 default: assert(0);
1918 }
1919 if(regs_saved) {
1920 restore_jump=out;
1921 emit_jcc(0); // jump to reg restore
1922 }
1923 else
1924 emit_jcc(stubs[n].retaddr); // return address (invcode check)
1925
1926 if(!regs_saved)
1927 save_regs(reglist);
1928 void *handler=NULL;
1929 switch(type) {
1930 case STOREB_STUB: handler=jump_handler_write8; break;
1931 case STOREH_STUB: handler=jump_handler_write16; break;
1932 case STOREW_STUB: handler=jump_handler_write32; break;
1933 default: assert(0);
1934 }
1935 assert(handler);
1936 pass_args(rs,rt);
1937 if(temp2!=3)
1938 emit_mov(temp2,3);
1939 int cc=get_reg(i_regmap,CCREG);
1940 if(cc<0)
1941 emit_loadreg(CCREG,2);
1942 emit_addimm(cc<0?2:cc,(int)stubs[n].d,2);
1943 // returns new cycle_count
1944 emit_far_call(handler);
1945 emit_addimm(0,-(int)stubs[n].d,cc<0?2:cc);
1946 if(cc<0)
1947 emit_storereg(CCREG,2);
1948 if(restore_jump)
1949 set_jump_target(restore_jump, out);
1950 restore_regs(reglist);
1951 emit_jmp(stubs[n].retaddr);
1952}
1953
1954static void inline_writestub(enum stub_type type, int i, u_int addr,
1955 const signed char regmap[], int target, int adj, u_int reglist)
1956{
1957 int rs=get_reg_temp(regmap);
1958 int rt=get_reg(regmap,target);
1959 assert(rs>=0);
1960 assert(rt>=0);
1961 uintptr_t host_addr = 0;
1962 void *handler = get_direct_memhandler(mem_wtab, addr, type, &host_addr);
1963 if (handler == NULL) {
1964 if(addr!=host_addr)
1965 emit_movimm_from(addr,rs,host_addr,rs);
1966 switch(type) {
1967 case STOREB_STUB: emit_writebyte_indexed(rt,0,rs); break;
1968 case STOREH_STUB: emit_writehword_indexed(rt,0,rs); break;
1969 case STOREW_STUB: emit_writeword_indexed(rt,0,rs); break;
1970 default: assert(0);
1971 }
1972 return;
1973 }
1974
1975 // call a memhandler
1976 save_regs(reglist);
1977 pass_args(rs,rt);
1978 int cc=get_reg(regmap,CCREG);
1979 if(cc<0)
1980 emit_loadreg(CCREG,2);
1981 emit_addimm(cc<0?2:cc,adj,2);
1982 emit_movimm((u_int)handler,3);
1983 // returns new cycle_count
1984 emit_far_call(jump_handler_write_h);
1985 emit_addimm(0,-adj,cc<0?2:cc);
1986 if(cc<0)
1987 emit_storereg(CCREG,2);
1988 restore_regs(reglist);
1989}
1990
1991// this output is parsed by verify_dirty, get_bounds, isclean, get_clean_addr
1992static void do_dirty_stub_emit_args(u_int arg0, u_int source_len)
1993{
1994 #ifndef HAVE_ARMV7
1995 emit_loadlp((int)source, 1);
1996 emit_loadlp((int)copy, 2);
1997 emit_loadlp(source_len, 3);
1998 #else
1999 emit_movw(((u_int)source)&0x0000FFFF, 1);
2000 emit_movw(((u_int)copy)&0x0000FFFF, 2);
2001 emit_movt(((u_int)source)&0xFFFF0000, 1);
2002 emit_movt(((u_int)copy)&0xFFFF0000, 2);
2003 emit_movw(source_len, 3);
2004 #endif
2005 emit_movimm(arg0, 0);
2006}
2007
2008static void *do_dirty_stub(int i, u_int source_len)
2009{
2010 assem_debug("do_dirty_stub %x\n",start+i*4);
2011 do_dirty_stub_emit_args(start + i*4, source_len);
2012 emit_far_call(verify_code);
2013 void *entry = out;
2014 load_regs_entry(i);
2015 if (entry == out)
2016 entry = instr_addr[i];
2017 emit_jmp(instr_addr[i]);
2018 return entry;
2019}
2020
2021static void do_dirty_stub_ds(u_int source_len)
2022{
2023 do_dirty_stub_emit_args(start + 1, source_len);
2024 emit_far_call(verify_code_ds);
2025}
2026
2027/* Special assem */
2028
2029static void c2op_prologue(u_int op, int i, const struct regstat *i_regs, u_int reglist)
2030{
2031 save_regs_all(reglist);
2032 cop2_do_stall_check(op, i, i_regs, 0);
2033#ifdef PCNT
2034 emit_movimm(op, 0);
2035 emit_far_call(pcnt_gte_start);
2036#endif
2037 emit_addimm(FP, (u_char *)&psxRegs.CP2D.r[0] - (u_char *)&dynarec_local, 0); // cop2 regs
2038}
2039
2040static void c2op_epilogue(u_int op,u_int reglist)
2041{
2042#ifdef PCNT
2043 emit_movimm(op,0);
2044 emit_far_call(pcnt_gte_end);
2045#endif
2046 restore_regs_all(reglist);
2047}
2048
2049static void c2op_call_MACtoIR(int lm,int need_flags)
2050{
2051 if(need_flags)
2052 emit_far_call(lm?gteMACtoIR_lm1:gteMACtoIR_lm0);
2053 else
2054 emit_far_call(lm?gteMACtoIR_lm1_nf:gteMACtoIR_lm0_nf);
2055}
2056
2057static void c2op_call_rgb_func(void *func,int lm,int need_ir,int need_flags)
2058{
2059 emit_far_call(func);
2060 // func is C code and trashes r0
2061 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
2062 if(need_flags||need_ir)
2063 c2op_call_MACtoIR(lm,need_flags);
2064 emit_far_call(need_flags?gteMACtoRGB:gteMACtoRGB_nf);
2065}
2066
2067static void c2op_assemble(int i, const struct regstat *i_regs)
2068{
2069 u_int c2op = source[i] & 0x3f;
2070 u_int reglist_full = get_host_reglist(i_regs->regmap);
2071 u_int reglist = reglist_full & CALLER_SAVE_REGS;
2072 int need_flags, need_ir;
2073
2074 if (gte_handlers[c2op]!=NULL) {
2075 need_flags=!(gte_unneeded[i+1]>>63); // +1 because of how liveness detection works
2076 need_ir=(gte_unneeded[i+1]&0xe00)!=0xe00;
2077 assem_debug("gte op %08x, unneeded %016llx, need_flags %d, need_ir %d\n",
2078 source[i],gte_unneeded[i+1],need_flags,need_ir);
2079 if(HACK_ENABLED(NDHACK_GTE_NO_FLAGS))
2080 need_flags=0;
2081 int shift = (source[i] >> 19) & 1;
2082 int lm = (source[i] >> 10) & 1;
2083 switch(c2op) {
2084#ifndef DRC_DBG
2085 case GTE_MVMVA: {
2086#ifdef HAVE_ARMV5
2087 int v = (source[i] >> 15) & 3;
2088 int cv = (source[i] >> 13) & 3;
2089 int mx = (source[i] >> 17) & 3;
2090 reglist=reglist_full&(CALLER_SAVE_REGS|0xf0); // +{r4-r7}
2091 c2op_prologue(c2op,i,i_regs,reglist);
2092 /* r4,r5 = VXYZ(v) packed; r6 = &MX11(mx); r7 = &CV1(cv) */
2093 if(v<3)
2094 emit_ldrd(v*8,0,4);
2095 else {
2096 emit_movzwl_indexed(9*4,0,4); // gteIR
2097 emit_movzwl_indexed(10*4,0,6);
2098 emit_movzwl_indexed(11*4,0,5);
2099 emit_orrshl_imm(6,16,4);
2100 }
2101 if(mx<3)
2102 emit_addimm(0,32*4+mx*8*4,6);
2103 else
2104 emit_readword(&zeromem_ptr,6);
2105 if(cv<3)
2106 emit_addimm(0,32*4+(cv*8+5)*4,7);
2107 else
2108 emit_readword(&zeromem_ptr,7);
2109#ifdef __ARM_NEON__
2110 emit_movimm(source[i],1); // opcode
2111 emit_far_call(gteMVMVA_part_neon);
2112 if(need_flags) {
2113 emit_movimm(lm,1);
2114 emit_far_call(gteMACtoIR_flags_neon);
2115 }
2116#else
2117 if(cv==3&&shift)
2118 emit_far_call(gteMVMVA_part_cv3sh12_arm);
2119 else {
2120 emit_movimm(shift,1);
2121 emit_far_call(need_flags?gteMVMVA_part_arm:gteMVMVA_part_nf_arm);
2122 }
2123 if(need_flags||need_ir)
2124 c2op_call_MACtoIR(lm,need_flags);
2125#endif
2126#else /* if not HAVE_ARMV5 */
2127 c2op_prologue(c2op,i,i_regs,reglist);
2128 emit_movimm(source[i],1); // opcode
2129 emit_writeword(1,&psxRegs.code);
2130 emit_far_call(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]);
2131#endif
2132 break;
2133 }
2134 case GTE_OP:
2135 c2op_prologue(c2op,i,i_regs,reglist);
2136 emit_far_call(shift?gteOP_part_shift:gteOP_part_noshift);
2137 if(need_flags||need_ir) {
2138 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
2139 c2op_call_MACtoIR(lm,need_flags);
2140 }
2141 break;
2142 case GTE_DPCS:
2143 c2op_prologue(c2op,i,i_regs,reglist);
2144 c2op_call_rgb_func(shift?gteDPCS_part_shift:gteDPCS_part_noshift,lm,need_ir,need_flags);
2145 break;
2146 case GTE_INTPL:
2147 c2op_prologue(c2op,i,i_regs,reglist);
2148 c2op_call_rgb_func(shift?gteINTPL_part_shift:gteINTPL_part_noshift,lm,need_ir,need_flags);
2149 break;
2150 case GTE_SQR:
2151 c2op_prologue(c2op,i,i_regs,reglist);
2152 emit_far_call(shift?gteSQR_part_shift:gteSQR_part_noshift);
2153 if(need_flags||need_ir) {
2154 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
2155 c2op_call_MACtoIR(lm,need_flags);
2156 }
2157 break;
2158 case GTE_DCPL:
2159 c2op_prologue(c2op,i,i_regs,reglist);
2160 c2op_call_rgb_func(gteDCPL_part,lm,need_ir,need_flags);
2161 break;
2162 case GTE_GPF:
2163 c2op_prologue(c2op,i,i_regs,reglist);
2164 c2op_call_rgb_func(shift?gteGPF_part_shift:gteGPF_part_noshift,lm,need_ir,need_flags);
2165 break;
2166 case GTE_GPL:
2167 c2op_prologue(c2op,i,i_regs,reglist);
2168 c2op_call_rgb_func(shift?gteGPL_part_shift:gteGPL_part_noshift,lm,need_ir,need_flags);
2169 break;
2170#endif
2171 default:
2172 c2op_prologue(c2op,i,i_regs,reglist);
2173#ifdef DRC_DBG
2174 emit_movimm(source[i],1); // opcode
2175 emit_writeword(1,&psxRegs.code);
2176#endif
2177 emit_far_call(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]);
2178 break;
2179 }
2180 c2op_epilogue(c2op,reglist);
2181 }
2182}
2183
2184static void c2op_ctc2_31_assemble(signed char sl, signed char temp)
2185{
2186 //value = value & 0x7ffff000;
2187 //if (value & 0x7f87e000) value |= 0x80000000;
2188 emit_shrimm(sl,12,temp);
2189 emit_shlimm(temp,12,temp);
2190 emit_testimm(temp,0x7f000000);
2191 emit_testeqimm(temp,0x00870000);
2192 emit_testeqimm(temp,0x0000e000);
2193 emit_orrne_imm(temp,0x80000000,temp);
2194}
2195
2196static void do_mfc2_31_one(u_int copr,signed char temp)
2197{
2198 emit_readword(&reg_cop2d[copr],temp);
2199 emit_lsls_imm(temp,16,temp);
2200 emit_cmovs_imm(0,temp);
2201 emit_cmpimm(temp,0xf80<<16);
2202 emit_andimm(temp,0xf80<<16,temp);
2203 emit_cmovae_imm(0xf80<<16,temp);
2204}
2205
2206static void c2op_mfc2_29_assemble(signed char tl, signed char temp)
2207{
2208 if (temp < 0) {
2209 host_tempreg_acquire();
2210 temp = HOST_TEMPREG;
2211 }
2212 do_mfc2_31_one(9,temp);
2213 emit_shrimm(temp,7+16,tl);
2214 do_mfc2_31_one(10,temp);
2215 emit_orrshr_imm(temp,2+16,tl);
2216 do_mfc2_31_one(11,temp);
2217 emit_orrshr_imm(temp,-3+16,tl);
2218 emit_writeword(tl,&reg_cop2d[29]);
2219 if (temp == HOST_TEMPREG)
2220 host_tempreg_release();
2221}
2222
2223static void multdiv_assemble_arm(int i, const struct regstat *i_regs)
2224{
2225 // case 0x18: MULT
2226 // case 0x19: MULTU
2227 // case 0x1A: DIV
2228 // case 0x1B: DIVU
2229 // case 0x1C: DMULT
2230 // case 0x1D: DMULTU
2231 // case 0x1E: DDIV
2232 // case 0x1F: DDIVU
2233 if(dops[i].rs1&&dops[i].rs2)
2234 {
2235 if((dops[i].opcode2&4)==0) // 32-bit
2236 {
2237 if(dops[i].opcode2==0x18) // MULT
2238 {
2239 signed char m1=get_reg(i_regs->regmap,dops[i].rs1);
2240 signed char m2=get_reg(i_regs->regmap,dops[i].rs2);
2241 signed char hi=get_reg(i_regs->regmap,HIREG);
2242 signed char lo=get_reg(i_regs->regmap,LOREG);
2243 assert(m1>=0);
2244 assert(m2>=0);
2245 assert(hi>=0);
2246 assert(lo>=0);
2247 emit_smull(m1,m2,hi,lo);
2248 }
2249 if(dops[i].opcode2==0x19) // MULTU
2250 {
2251 signed char m1=get_reg(i_regs->regmap,dops[i].rs1);
2252 signed char m2=get_reg(i_regs->regmap,dops[i].rs2);
2253 signed char hi=get_reg(i_regs->regmap,HIREG);
2254 signed char lo=get_reg(i_regs->regmap,LOREG);
2255 assert(m1>=0);
2256 assert(m2>=0);
2257 assert(hi>=0);
2258 assert(lo>=0);
2259 emit_umull(m1,m2,hi,lo);
2260 }
2261 if(dops[i].opcode2==0x1A) // DIV
2262 {
2263 signed char d1=get_reg(i_regs->regmap,dops[i].rs1);
2264 signed char d2=get_reg(i_regs->regmap,dops[i].rs2);
2265 assert(d1>=0);
2266 assert(d2>=0);
2267 signed char quotient=get_reg(i_regs->regmap,LOREG);
2268 signed char remainder=get_reg(i_regs->regmap,HIREG);
2269 assert(quotient>=0);
2270 assert(remainder>=0);
2271 emit_movs(d1,remainder);
2272 emit_movimm(0xffffffff,quotient);
2273 emit_negmi(quotient,quotient); // .. quotient and ..
2274 emit_negmi(remainder,remainder); // .. remainder for div0 case (will be negated back after jump)
2275 emit_movs(d2,HOST_TEMPREG);
2276 emit_jeq(out+52); // Division by zero
2277 emit_negsmi(HOST_TEMPREG,HOST_TEMPREG);
2278#ifdef HAVE_ARMV5
2279 emit_clz(HOST_TEMPREG,quotient);
2280 emit_shl(HOST_TEMPREG,quotient,HOST_TEMPREG);
2281#else
2282 emit_movimm(0,quotient);
2283 emit_addpl_imm(quotient,1,quotient);
2284 emit_lslpls_imm(HOST_TEMPREG,1,HOST_TEMPREG);
2285 emit_jns(out-2*4);
2286#endif
2287 emit_orimm(quotient,1<<31,quotient);
2288 emit_shr(quotient,quotient,quotient);
2289 emit_cmp(remainder,HOST_TEMPREG);
2290 emit_subcs(remainder,HOST_TEMPREG,remainder);
2291 emit_adcs(quotient,quotient,quotient);
2292 emit_shrimm(HOST_TEMPREG,1,HOST_TEMPREG);
2293 emit_jcc(out-16); // -4
2294 emit_teq(d1,d2);
2295 emit_negmi(quotient,quotient);
2296 emit_test(d1,d1);
2297 emit_negmi(remainder,remainder);
2298 }
2299 if(dops[i].opcode2==0x1B) // DIVU
2300 {
2301 signed char d1=get_reg(i_regs->regmap,dops[i].rs1); // dividend
2302 signed char d2=get_reg(i_regs->regmap,dops[i].rs2); // divisor
2303 assert(d1>=0);
2304 assert(d2>=0);
2305 signed char quotient=get_reg(i_regs->regmap,LOREG);
2306 signed char remainder=get_reg(i_regs->regmap,HIREG);
2307 assert(quotient>=0);
2308 assert(remainder>=0);
2309 emit_mov(d1,remainder);
2310 emit_movimm(0xffffffff,quotient); // div0 case
2311 emit_test(d2,d2);
2312 emit_jeq(out+40); // Division by zero
2313#ifdef HAVE_ARMV5
2314 emit_clz(d2,HOST_TEMPREG);
2315 emit_movimm(1<<31,quotient);
2316 emit_shl(d2,HOST_TEMPREG,d2);
2317#else
2318 emit_movimm(0,HOST_TEMPREG);
2319 emit_addpl_imm(HOST_TEMPREG,1,HOST_TEMPREG);
2320 emit_lslpls_imm(d2,1,d2);
2321 emit_jns(out-2*4);
2322 emit_movimm(1<<31,quotient);
2323#endif
2324 emit_shr(quotient,HOST_TEMPREG,quotient);
2325 emit_cmp(remainder,d2);
2326 emit_subcs(remainder,d2,remainder);
2327 emit_adcs(quotient,quotient,quotient);
2328 emit_shrcc_imm(d2,1,d2);
2329 emit_jcc(out-16); // -4
2330 }
2331 }
2332 else // 64-bit
2333 assert(0);
2334 }
2335 else
2336 {
2337 // Multiply by zero is zero.
2338 // MIPS does not have a divide by zero exception.
2339 // The result is undefined, we return zero.
2340 signed char hr=get_reg(i_regs->regmap,HIREG);
2341 signed char lr=get_reg(i_regs->regmap,LOREG);
2342 if(hr>=0) emit_zeroreg(hr);
2343 if(lr>=0) emit_zeroreg(lr);
2344 }
2345}
2346#define multdiv_assemble multdiv_assemble_arm
2347
2348static void do_jump_vaddr(int rs)
2349{
2350 emit_far_jump(jump_vaddr_reg[rs]);
2351}
2352
2353static void do_preload_rhash(int r) {
2354 // Don't need this for ARM. On x86, this puts the value 0xf8 into the
2355 // register. On ARM the hash can be done with a single instruction (below)
2356}
2357
2358static void do_preload_rhtbl(int ht) {
2359 emit_addimm(FP,(int)&mini_ht-(int)&dynarec_local,ht);
2360}
2361
2362static void do_rhash(int rs,int rh) {
2363 emit_andimm(rs,0xf8,rh);
2364}
2365
2366static void do_miniht_load(int ht,int rh) {
2367 assem_debug("ldr %s,[%s,%s]!\n",regname[rh],regname[ht],regname[rh]);
2368 output_w32(0xe7b00000|rd_rn_rm(rh,ht,rh));
2369}
2370
2371static void do_miniht_jump(int rs,int rh,int ht) {
2372 emit_cmp(rh,rs);
2373 emit_ldreq_indexed(ht,4,15);
2374 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
2375 if(rs!=7)
2376 emit_mov(rs,7);
2377 rs=7;
2378 #endif
2379 do_jump_vaddr(rs);
2380}
2381
2382static void do_miniht_insert(u_int return_address,int rt,int temp) {
2383 #ifndef HAVE_ARMV7
2384 emit_movimm(return_address,rt); // PC into link register
2385 add_to_linker(out,return_address,1);
2386 emit_pcreladdr(temp);
2387 emit_writeword(rt,&mini_ht[(return_address&0xFF)>>3][0]);
2388 emit_writeword(temp,&mini_ht[(return_address&0xFF)>>3][1]);
2389 #else
2390 emit_movw(return_address&0x0000FFFF,rt);
2391 add_to_linker(out,return_address,1);
2392 emit_pcreladdr(temp);
2393 emit_writeword(temp,&mini_ht[(return_address&0xFF)>>3][1]);
2394 emit_movt(return_address&0xFFFF0000,rt);
2395 emit_writeword(rt,&mini_ht[(return_address&0xFF)>>3][0]);
2396 #endif
2397}
2398
2399// CPU-architecture-specific initialization
2400static void arch_init(void)
2401{
2402 uintptr_t diff = (u_char *)&ndrc->tramp.f - (u_char *)&ndrc->tramp.ops - 8;
2403 struct tramp_insns *ops = ndrc->tramp.ops;
2404 size_t i;
2405 assert(!(diff & 3));
2406 assert(diff < 0x1000);
2407 start_tcache_write(ops, (u_char *)ops + sizeof(ndrc->tramp.ops));
2408 for (i = 0; i < ARRAY_SIZE(ndrc->tramp.ops); i++)
2409 ops[i].ldrpc = 0xe5900000 | rd_rn_rm(15,15,0) | diff; // ldr pc, [=val]
2410 end_tcache_write(ops, (u_char *)ops + sizeof(ndrc->tramp.ops));
2411}
2412
2413// vim:shiftwidth=2:expandtab