drc: get rid of RAM_FIXED, revive ROREG
[pcsx_rearmed.git] / libpcsxcore / new_dynarec / assem_arm.c
... / ...
CommitLineData
1/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2 * Mupen64plus/PCSX - assem_arm.c *
3 * Copyright (C) 2009-2011 Ari64 *
4 * Copyright (C) 2010-2021 GraÅžvydas "notaz" Ignotas *
5 * *
6 * This program is free software; you can redistribute it and/or modify *
7 * it under the terms of the GNU General Public License as published by *
8 * the Free Software Foundation; either version 2 of the License, or *
9 * (at your option) any later version. *
10 * *
11 * This program is distributed in the hope that it will be useful, *
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
14 * GNU General Public License for more details. *
15 * *
16 * You should have received a copy of the GNU General Public License *
17 * along with this program; if not, write to the *
18 * Free Software Foundation, Inc., *
19 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
20 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
21
22#define FLAGLESS
23#include "../gte.h"
24#undef FLAGLESS
25#include "../gte_arm.h"
26#include "../gte_neon.h"
27#include "pcnt.h"
28#include "arm_features.h"
29
30#ifndef __MACH__
31#define CALLER_SAVE_REGS 0x100f
32#else
33#define CALLER_SAVE_REGS 0x120f
34#endif
35
36#define unused __attribute__((unused))
37
38#ifdef DRC_DBG
39#pragma GCC diagnostic ignored "-Wunused-function"
40#pragma GCC diagnostic ignored "-Wunused-variable"
41#pragma GCC diagnostic ignored "-Wunused-but-set-variable"
42#endif
43
44void indirect_jump_indexed();
45void indirect_jump();
46void do_interrupt();
47void jump_vaddr_r0();
48void jump_vaddr_r1();
49void jump_vaddr_r2();
50void jump_vaddr_r3();
51void jump_vaddr_r4();
52void jump_vaddr_r5();
53void jump_vaddr_r6();
54void jump_vaddr_r7();
55void jump_vaddr_r8();
56void jump_vaddr_r9();
57void jump_vaddr_r10();
58void jump_vaddr_r12();
59
60void * const jump_vaddr_reg[16] = {
61 jump_vaddr_r0,
62 jump_vaddr_r1,
63 jump_vaddr_r2,
64 jump_vaddr_r3,
65 jump_vaddr_r4,
66 jump_vaddr_r5,
67 jump_vaddr_r6,
68 jump_vaddr_r7,
69 jump_vaddr_r8,
70 jump_vaddr_r9,
71 jump_vaddr_r10,
72 0,
73 jump_vaddr_r12,
74 0,
75 0,
76 0
77};
78
79void invalidate_addr_r0();
80void invalidate_addr_r1();
81void invalidate_addr_r2();
82void invalidate_addr_r3();
83void invalidate_addr_r4();
84void invalidate_addr_r5();
85void invalidate_addr_r6();
86void invalidate_addr_r7();
87void invalidate_addr_r8();
88void invalidate_addr_r9();
89void invalidate_addr_r10();
90void invalidate_addr_r12();
91
92const u_int invalidate_addr_reg[16] = {
93 (int)invalidate_addr_r0,
94 (int)invalidate_addr_r1,
95 (int)invalidate_addr_r2,
96 (int)invalidate_addr_r3,
97 (int)invalidate_addr_r4,
98 (int)invalidate_addr_r5,
99 (int)invalidate_addr_r6,
100 (int)invalidate_addr_r7,
101 (int)invalidate_addr_r8,
102 (int)invalidate_addr_r9,
103 (int)invalidate_addr_r10,
104 0,
105 (int)invalidate_addr_r12,
106 0,
107 0,
108 0};
109
110static u_int needs_clear_cache[1<<(TARGET_SIZE_2-17)];
111
112/* Linker */
113
114static void set_jump_target(void *addr, void *target_)
115{
116 u_int target = (u_int)target_;
117 u_char *ptr = addr;
118 u_int *ptr2=(u_int *)ptr;
119 if(ptr[3]==0xe2) {
120 assert((target-(u_int)ptr2-8)<1024);
121 assert(((uintptr_t)addr&3)==0);
122 assert((target&3)==0);
123 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
124 //printf("target=%x addr=%p insn=%x\n",target,addr,*ptr2);
125 }
126 else if(ptr[3]==0x72) {
127 // generated by emit_jno_unlikely
128 if((target-(u_int)ptr2-8)<1024) {
129 assert(((uintptr_t)addr&3)==0);
130 assert((target&3)==0);
131 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
132 }
133 else if((target-(u_int)ptr2-8)<4096&&!((target-(u_int)ptr2-8)&15)) {
134 assert(((uintptr_t)addr&3)==0);
135 assert((target&3)==0);
136 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>4)|0xE00;
137 }
138 else *ptr2=(0x7A000000)|(((target-(u_int)ptr2-8)<<6)>>8);
139 }
140 else {
141 assert((ptr[3]&0x0e)==0xa);
142 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
143 }
144}
145
146// This optionally copies the instruction from the target of the branch into
147// the space before the branch. Works, but the difference in speed is
148// usually insignificant.
149#if 0
150static void set_jump_target_fillslot(int addr,u_int target,int copy)
151{
152 u_char *ptr=(u_char *)addr;
153 u_int *ptr2=(u_int *)ptr;
154 assert(!copy||ptr2[-1]==0xe28dd000);
155 if(ptr[3]==0xe2) {
156 assert(!copy);
157 assert((target-(u_int)ptr2-8)<4096);
158 *ptr2=(*ptr2&0xFFFFF000)|(target-(u_int)ptr2-8);
159 }
160 else {
161 assert((ptr[3]&0x0e)==0xa);
162 u_int target_insn=*(u_int *)target;
163 if((target_insn&0x0e100000)==0) { // ALU, no immediate, no flags
164 copy=0;
165 }
166 if((target_insn&0x0c100000)==0x04100000) { // Load
167 copy=0;
168 }
169 if(target_insn&0x08000000) {
170 copy=0;
171 }
172 if(copy) {
173 ptr2[-1]=target_insn;
174 target+=4;
175 }
176 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
177 }
178}
179#endif
180
181/* Literal pool */
182static void add_literal(int addr,int val)
183{
184 assert(literalcount<sizeof(literals)/sizeof(literals[0]));
185 literals[literalcount][0]=addr;
186 literals[literalcount][1]=val;
187 literalcount++;
188}
189
190// from a pointer to external jump stub (which was produced by emit_extjump2)
191// find where the jumping insn is
192static void *find_extjump_insn(void *stub)
193{
194 int *ptr=(int *)(stub+4);
195 assert((*ptr&0x0fff0000)==0x059f0000); // ldr rx, [pc, #ofs]
196 u_int offset=*ptr&0xfff;
197 void **l_ptr=(void *)ptr+offset+8;
198 return *l_ptr;
199}
200
201// find where external branch is liked to using addr of it's stub:
202// get address that insn one after stub loads (dyna_linker arg1),
203// treat it as a pointer to branch insn,
204// return addr where that branch jumps to
205static void *get_pointer(void *stub)
206{
207 //printf("get_pointer(%x)\n",(int)stub);
208 int *i_ptr=find_extjump_insn(stub);
209 assert((*i_ptr&0x0f000000)==0x0a000000); // b
210 return (u_char *)i_ptr+((*i_ptr<<8)>>6)+8;
211}
212
213// Find the "clean" entry point from a "dirty" entry point
214// by skipping past the call to verify_code
215static void *get_clean_addr(void *addr)
216{
217 signed int *ptr = addr;
218 #ifndef HAVE_ARMV7
219 ptr+=4;
220 #else
221 ptr+=6;
222 #endif
223 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
224 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
225 ptr++;
226 if((*ptr&0xFF000000)==0xea000000) {
227 return (char *)ptr+((*ptr<<8)>>6)+8; // follow jump
228 }
229 return ptr;
230}
231
232static int verify_dirty(const u_int *ptr)
233{
234 #ifndef HAVE_ARMV7
235 u_int offset;
236 // get from literal pool
237 assert((*ptr&0xFFFF0000)==0xe59f0000);
238 offset=*ptr&0xfff;
239 u_int source=*(u_int*)((void *)ptr+offset+8);
240 ptr++;
241 assert((*ptr&0xFFFF0000)==0xe59f0000);
242 offset=*ptr&0xfff;
243 u_int copy=*(u_int*)((void *)ptr+offset+8);
244 ptr++;
245 assert((*ptr&0xFFFF0000)==0xe59f0000);
246 offset=*ptr&0xfff;
247 u_int len=*(u_int*)((void *)ptr+offset+8);
248 ptr++;
249 ptr++;
250 #else
251 // ARMv7 movw/movt
252 assert((*ptr&0xFFF00000)==0xe3000000);
253 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
254 u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
255 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
256 ptr+=6;
257 #endif
258 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
259 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
260 //printf("verify_dirty: %x %x %x\n",source,copy,len);
261 return !memcmp((void *)source,(void *)copy,len);
262}
263
264// This doesn't necessarily find all clean entry points, just
265// guarantees that it's not dirty
266static int isclean(void *addr)
267{
268 #ifndef HAVE_ARMV7
269 u_int *ptr=((u_int *)addr)+4;
270 #else
271 u_int *ptr=((u_int *)addr)+6;
272 #endif
273 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
274 if((*ptr&0xFF000000)!=0xeb000000) return 1; // bl instruction
275 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code) return 0;
276 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_ds) return 0;
277 return 1;
278}
279
280// get source that block at addr was compiled from (host pointers)
281static void get_bounds(void *addr, u_char **start, u_char **end)
282{
283 u_int *ptr = addr;
284 #ifndef HAVE_ARMV7
285 u_int offset;
286 // get from literal pool
287 assert((*ptr&0xFFFF0000)==0xe59f0000);
288 offset=*ptr&0xfff;
289 u_int source=*(u_int*)((void *)ptr+offset+8);
290 ptr++;
291 //assert((*ptr&0xFFFF0000)==0xe59f0000);
292 //offset=*ptr&0xfff;
293 //u_int copy=*(u_int*)((void *)ptr+offset+8);
294 ptr++;
295 assert((*ptr&0xFFFF0000)==0xe59f0000);
296 offset=*ptr&0xfff;
297 u_int len=*(u_int*)((void *)ptr+offset+8);
298 ptr++;
299 ptr++;
300 #else
301 // ARMv7 movw/movt
302 assert((*ptr&0xFFF00000)==0xe3000000);
303 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
304 //u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
305 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
306 ptr+=6;
307 #endif
308 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
309 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
310 *start=(u_char *)source;
311 *end=(u_char *)source+len;
312}
313
314// Allocate a specific ARM register.
315static void alloc_arm_reg(struct regstat *cur,int i,signed char reg,int hr)
316{
317 int n;
318 int dirty=0;
319
320 // see if it's already allocated (and dealloc it)
321 for(n=0;n<HOST_REGS;n++)
322 {
323 if(n!=EXCLUDE_REG&&cur->regmap[n]==reg) {
324 dirty=(cur->dirty>>n)&1;
325 cur->regmap[n]=-1;
326 }
327 }
328
329 cur->regmap[hr]=reg;
330 cur->dirty&=~(1<<hr);
331 cur->dirty|=dirty<<hr;
332 cur->isconst&=~(1<<hr);
333}
334
335// Alloc cycle count into dedicated register
336static void alloc_cc(struct regstat *cur,int i)
337{
338 alloc_arm_reg(cur,i,CCREG,HOST_CCREG);
339}
340
341/* Assembler */
342
343static unused char regname[16][4] = {
344 "r0",
345 "r1",
346 "r2",
347 "r3",
348 "r4",
349 "r5",
350 "r6",
351 "r7",
352 "r8",
353 "r9",
354 "r10",
355 "fp",
356 "r12",
357 "sp",
358 "lr",
359 "pc"};
360
361static void output_w32(u_int word)
362{
363 *((u_int *)out)=word;
364 out+=4;
365}
366
367static u_int rd_rn_rm(u_int rd, u_int rn, u_int rm)
368{
369 assert(rd<16);
370 assert(rn<16);
371 assert(rm<16);
372 return((rn<<16)|(rd<<12)|rm);
373}
374
375static u_int rd_rn_imm_shift(u_int rd, u_int rn, u_int imm, u_int shift)
376{
377 assert(rd<16);
378 assert(rn<16);
379 assert(imm<256);
380 assert((shift&1)==0);
381 return((rn<<16)|(rd<<12)|(((32-shift)&30)<<7)|imm);
382}
383
384static u_int genimm(u_int imm,u_int *encoded)
385{
386 *encoded=0;
387 if(imm==0) return 1;
388 int i=32;
389 while(i>0)
390 {
391 if(imm<256) {
392 *encoded=((i&30)<<7)|imm;
393 return 1;
394 }
395 imm=(imm>>2)|(imm<<30);i-=2;
396 }
397 return 0;
398}
399
400static void genimm_checked(u_int imm,u_int *encoded)
401{
402 u_int ret=genimm(imm,encoded);
403 assert(ret);
404 (void)ret;
405}
406
407static u_int genjmp(u_int addr)
408{
409 if (addr < 3) return 0; // a branch that will be patched later
410 int offset = addr-(int)out-8;
411 if (offset < -33554432 || offset >= 33554432) {
412 SysPrintf("genjmp: out of range: %08x\n", offset);
413 abort();
414 return 0;
415 }
416 return ((u_int)offset>>2)&0xffffff;
417}
418
419static unused void emit_breakpoint(void)
420{
421 assem_debug("bkpt #0\n");
422 //output_w32(0xe1200070);
423 output_w32(0xe7f001f0);
424}
425
426static void emit_mov(int rs,int rt)
427{
428 assem_debug("mov %s,%s\n",regname[rt],regname[rs]);
429 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs));
430}
431
432static void emit_movs(int rs,int rt)
433{
434 assem_debug("movs %s,%s\n",regname[rt],regname[rs]);
435 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs));
436}
437
438static void emit_add(int rs1,int rs2,int rt)
439{
440 assem_debug("add %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
441 output_w32(0xe0800000|rd_rn_rm(rt,rs1,rs2));
442}
443
444static void emit_adds(int rs1,int rs2,int rt)
445{
446 assem_debug("adds %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
447 output_w32(0xe0900000|rd_rn_rm(rt,rs1,rs2));
448}
449#define emit_adds_ptr emit_adds
450
451static void emit_adcs(int rs1,int rs2,int rt)
452{
453 assem_debug("adcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
454 output_w32(0xe0b00000|rd_rn_rm(rt,rs1,rs2));
455}
456
457static void emit_neg(int rs, int rt)
458{
459 assem_debug("rsb %s,%s,#0\n",regname[rt],regname[rs]);
460 output_w32(0xe2600000|rd_rn_rm(rt,rs,0));
461}
462
463static void emit_sub(int rs1,int rs2,int rt)
464{
465 assem_debug("sub %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
466 output_w32(0xe0400000|rd_rn_rm(rt,rs1,rs2));
467}
468
469static void emit_zeroreg(int rt)
470{
471 assem_debug("mov %s,#0\n",regname[rt]);
472 output_w32(0xe3a00000|rd_rn_rm(rt,0,0));
473}
474
475static void emit_loadlp(u_int imm,u_int rt)
476{
477 add_literal((int)out,imm);
478 assem_debug("ldr %s,pc+? [=%x]\n",regname[rt],imm);
479 output_w32(0xe5900000|rd_rn_rm(rt,15,0));
480}
481
482static void emit_movw(u_int imm,u_int rt)
483{
484 assert(imm<65536);
485 assem_debug("movw %s,#%d (0x%x)\n",regname[rt],imm,imm);
486 output_w32(0xe3000000|rd_rn_rm(rt,0,0)|(imm&0xfff)|((imm<<4)&0xf0000));
487}
488
489static void emit_movt(u_int imm,u_int rt)
490{
491 assem_debug("movt %s,#%d (0x%x)\n",regname[rt],imm&0xffff0000,imm&0xffff0000);
492 output_w32(0xe3400000|rd_rn_rm(rt,0,0)|((imm>>16)&0xfff)|((imm>>12)&0xf0000));
493}
494
495static void emit_movimm(u_int imm,u_int rt)
496{
497 u_int armval;
498 if(genimm(imm,&armval)) {
499 assem_debug("mov %s,#%d\n",regname[rt],imm);
500 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
501 }else if(genimm(~imm,&armval)) {
502 assem_debug("mvn %s,#%d\n",regname[rt],imm);
503 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
504 }else if(imm<65536) {
505 #ifndef HAVE_ARMV7
506 assem_debug("mov %s,#%d\n",regname[rt],imm&0xFF00);
507 output_w32(0xe3a00000|rd_rn_imm_shift(rt,0,imm>>8,8));
508 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
509 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
510 #else
511 emit_movw(imm,rt);
512 #endif
513 }else{
514 #ifndef HAVE_ARMV7
515 emit_loadlp(imm,rt);
516 #else
517 emit_movw(imm&0x0000FFFF,rt);
518 emit_movt(imm&0xFFFF0000,rt);
519 #endif
520 }
521}
522
523static void emit_pcreladdr(u_int rt)
524{
525 assem_debug("add %s,pc,#?\n",regname[rt]);
526 output_w32(0xe2800000|rd_rn_rm(rt,15,0));
527}
528
529static void emit_loadreg(int r, int hr)
530{
531 if(r&64) {
532 SysPrintf("64bit load in 32bit mode!\n");
533 assert(0);
534 return;
535 }
536 if((r&63)==0)
537 emit_zeroreg(hr);
538 else {
539 int addr = (int)&psxRegs.GPR.r[r];
540 switch (r) {
541 //case HIREG: addr = &hi; break;
542 //case LOREG: addr = &lo; break;
543 case CCREG: addr = (int)&cycle_count; break;
544 case CSREG: addr = (int)&Status; break;
545 case INVCP: addr = (int)&invc_ptr; break;
546 case ROREG: addr = (int)&ram_offset; break;
547 default: assert(r < 34); break;
548 }
549 u_int offset = addr-(u_int)&dynarec_local;
550 assert(offset<4096);
551 assem_debug("ldr %s,fp+%d\n",regname[hr],offset);
552 output_w32(0xe5900000|rd_rn_rm(hr,FP,0)|offset);
553 }
554}
555
556static void emit_storereg(int r, int hr)
557{
558 if(r&64) {
559 SysPrintf("64bit store in 32bit mode!\n");
560 assert(0);
561 return;
562 }
563 int addr = (int)&psxRegs.GPR.r[r];
564 switch (r) {
565 //case HIREG: addr = &hi; break;
566 //case LOREG: addr = &lo; break;
567 case CCREG: addr = (int)&cycle_count; break;
568 default: assert(r < 34); break;
569 }
570 u_int offset = addr-(u_int)&dynarec_local;
571 assert(offset<4096);
572 assem_debug("str %s,fp+%d\n",regname[hr],offset);
573 output_w32(0xe5800000|rd_rn_rm(hr,FP,0)|offset);
574}
575
576static void emit_test(int rs, int rt)
577{
578 assem_debug("tst %s,%s\n",regname[rs],regname[rt]);
579 output_w32(0xe1100000|rd_rn_rm(0,rs,rt));
580}
581
582static void emit_testimm(int rs,int imm)
583{
584 u_int armval;
585 assem_debug("tst %s,#%d\n",regname[rs],imm);
586 genimm_checked(imm,&armval);
587 output_w32(0xe3100000|rd_rn_rm(0,rs,0)|armval);
588}
589
590static void emit_testeqimm(int rs,int imm)
591{
592 u_int armval;
593 assem_debug("tsteq %s,$%d\n",regname[rs],imm);
594 genimm_checked(imm,&armval);
595 output_w32(0x03100000|rd_rn_rm(0,rs,0)|armval);
596}
597
598static void emit_not(int rs,int rt)
599{
600 assem_debug("mvn %s,%s\n",regname[rt],regname[rs]);
601 output_w32(0xe1e00000|rd_rn_rm(rt,0,rs));
602}
603
604static void emit_and(u_int rs1,u_int rs2,u_int rt)
605{
606 assem_debug("and %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
607 output_w32(0xe0000000|rd_rn_rm(rt,rs1,rs2));
608}
609
610static void emit_or(u_int rs1,u_int rs2,u_int rt)
611{
612 assem_debug("orr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
613 output_w32(0xe1800000|rd_rn_rm(rt,rs1,rs2));
614}
615
616static void emit_orrshl_imm(u_int rs,u_int imm,u_int rt)
617{
618 assert(rs<16);
619 assert(rt<16);
620 assert(imm<32);
621 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs],imm);
622 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|(imm<<7));
623}
624
625static void emit_orrshr_imm(u_int rs,u_int imm,u_int rt)
626{
627 assert(rs<16);
628 assert(rt<16);
629 assert(imm<32);
630 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs],imm);
631 output_w32(0xe1800020|rd_rn_rm(rt,rt,rs)|(imm<<7));
632}
633
634static void emit_xor(u_int rs1,u_int rs2,u_int rt)
635{
636 assem_debug("eor %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
637 output_w32(0xe0200000|rd_rn_rm(rt,rs1,rs2));
638}
639
640static void emit_xorsar_imm(u_int rs1,u_int rs2,u_int imm,u_int rt)
641{
642 assem_debug("eor %s,%s,%s,asr #%d\n",regname[rt],regname[rs1],regname[rs2],imm);
643 output_w32(0xe0200040|rd_rn_rm(rt,rs1,rs2)|(imm<<7));
644}
645
646static void emit_addimm(u_int rs,int imm,u_int rt)
647{
648 assert(rs<16);
649 assert(rt<16);
650 if(imm!=0) {
651 u_int armval;
652 if(genimm(imm,&armval)) {
653 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],imm);
654 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
655 }else if(genimm(-imm,&armval)) {
656 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],-imm);
657 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
658 #ifdef HAVE_ARMV7
659 }else if(rt!=rs&&(u_int)imm<65536) {
660 emit_movw(imm&0x0000ffff,rt);
661 emit_add(rs,rt,rt);
662 }else if(rt!=rs&&(u_int)-imm<65536) {
663 emit_movw(-imm&0x0000ffff,rt);
664 emit_sub(rs,rt,rt);
665 #endif
666 }else if((u_int)-imm<65536) {
667 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],(-imm)&0xFF00);
668 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
669 output_w32(0xe2400000|rd_rn_imm_shift(rt,rs,(-imm)>>8,8));
670 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
671 }else {
672 do {
673 int shift = (ffs(imm) - 1) & ~1;
674 int imm8 = imm & (0xff << shift);
675 genimm_checked(imm8,&armval);
676 assem_debug("add %s,%s,#0x%x\n",regname[rt],regname[rs],imm8);
677 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
678 rs = rt;
679 imm &= ~imm8;
680 }
681 while (imm != 0);
682 }
683 }
684 else if(rs!=rt) emit_mov(rs,rt);
685}
686
687static void emit_addimm_and_set_flags(int imm,int rt)
688{
689 assert(imm>-65536&&imm<65536);
690 u_int armval;
691 if(genimm(imm,&armval)) {
692 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm);
693 output_w32(0xe2900000|rd_rn_rm(rt,rt,0)|armval);
694 }else if(genimm(-imm,&armval)) {
695 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],imm);
696 output_w32(0xe2500000|rd_rn_rm(rt,rt,0)|armval);
697 }else if(imm<0) {
698 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF00);
699 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
700 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)>>8,8));
701 output_w32(0xe2500000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
702 }else{
703 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF00);
704 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
705 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm>>8,8));
706 output_w32(0xe2900000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
707 }
708}
709
710static void emit_addnop(u_int r)
711{
712 assert(r<16);
713 assem_debug("add %s,%s,#0 (nop)\n",regname[r],regname[r]);
714 output_w32(0xe2800000|rd_rn_rm(r,r,0));
715}
716
717static void emit_andimm(int rs,int imm,int rt)
718{
719 u_int armval;
720 if(imm==0) {
721 emit_zeroreg(rt);
722 }else if(genimm(imm,&armval)) {
723 assem_debug("and %s,%s,#%d\n",regname[rt],regname[rs],imm);
724 output_w32(0xe2000000|rd_rn_rm(rt,rs,0)|armval);
725 }else if(genimm(~imm,&armval)) {
726 assem_debug("bic %s,%s,#%d\n",regname[rt],regname[rs],imm);
727 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|armval);
728 }else if(imm==65535) {
729 #ifndef HAVE_ARMV6
730 assem_debug("bic %s,%s,#FF000000\n",regname[rt],regname[rs]);
731 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|0x4FF);
732 assem_debug("bic %s,%s,#00FF0000\n",regname[rt],regname[rt]);
733 output_w32(0xe3c00000|rd_rn_rm(rt,rt,0)|0x8FF);
734 #else
735 assem_debug("uxth %s,%s\n",regname[rt],regname[rs]);
736 output_w32(0xe6ff0070|rd_rn_rm(rt,0,rs));
737 #endif
738 }else{
739 assert(imm>0&&imm<65535);
740 #ifndef HAVE_ARMV7
741 assem_debug("mov r14,#%d\n",imm&0xFF00);
742 output_w32(0xe3a00000|rd_rn_imm_shift(HOST_TEMPREG,0,imm>>8,8));
743 assem_debug("add r14,r14,#%d\n",imm&0xFF);
744 output_w32(0xe2800000|rd_rn_imm_shift(HOST_TEMPREG,HOST_TEMPREG,imm&0xff,0));
745 #else
746 emit_movw(imm,HOST_TEMPREG);
747 #endif
748 assem_debug("and %s,%s,r14\n",regname[rt],regname[rs]);
749 output_w32(0xe0000000|rd_rn_rm(rt,rs,HOST_TEMPREG));
750 }
751}
752
753static void emit_orimm(int rs,int imm,int rt)
754{
755 u_int armval;
756 if(imm==0) {
757 if(rs!=rt) emit_mov(rs,rt);
758 }else if(genimm(imm,&armval)) {
759 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm);
760 output_w32(0xe3800000|rd_rn_rm(rt,rs,0)|armval);
761 }else{
762 assert(imm>0&&imm<65536);
763 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
764 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
765 output_w32(0xe3800000|rd_rn_imm_shift(rt,rs,imm>>8,8));
766 output_w32(0xe3800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
767 }
768}
769
770static void emit_xorimm(int rs,int imm,int rt)
771{
772 u_int armval;
773 if(imm==0) {
774 if(rs!=rt) emit_mov(rs,rt);
775 }else if(genimm(imm,&armval)) {
776 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm);
777 output_w32(0xe2200000|rd_rn_rm(rt,rs,0)|armval);
778 }else{
779 assert(imm>0&&imm<65536);
780 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
781 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
782 output_w32(0xe2200000|rd_rn_imm_shift(rt,rs,imm>>8,8));
783 output_w32(0xe2200000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
784 }
785}
786
787static void emit_shlimm(int rs,u_int imm,int rt)
788{
789 assert(imm>0);
790 assert(imm<32);
791 //if(imm==1) ...
792 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
793 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
794}
795
796static void emit_lsls_imm(int rs,int imm,int rt)
797{
798 assert(imm>0);
799 assert(imm<32);
800 assem_debug("lsls %s,%s,#%d\n",regname[rt],regname[rs],imm);
801 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs)|(imm<<7));
802}
803
804static unused void emit_lslpls_imm(int rs,int imm,int rt)
805{
806 assert(imm>0);
807 assert(imm<32);
808 assem_debug("lslpls %s,%s,#%d\n",regname[rt],regname[rs],imm);
809 output_w32(0x51b00000|rd_rn_rm(rt,0,rs)|(imm<<7));
810}
811
812static void emit_shrimm(int rs,u_int imm,int rt)
813{
814 assert(imm>0);
815 assert(imm<32);
816 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
817 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
818}
819
820static void emit_sarimm(int rs,u_int imm,int rt)
821{
822 assert(imm>0);
823 assert(imm<32);
824 assem_debug("asr %s,%s,#%d\n",regname[rt],regname[rs],imm);
825 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x40|(imm<<7));
826}
827
828static void emit_rorimm(int rs,u_int imm,int rt)
829{
830 assert(imm>0);
831 assert(imm<32);
832 assem_debug("ror %s,%s,#%d\n",regname[rt],regname[rs],imm);
833 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x60|(imm<<7));
834}
835
836static void emit_signextend16(int rs,int rt)
837{
838 #ifndef HAVE_ARMV6
839 emit_shlimm(rs,16,rt);
840 emit_sarimm(rt,16,rt);
841 #else
842 assem_debug("sxth %s,%s\n",regname[rt],regname[rs]);
843 output_w32(0xe6bf0070|rd_rn_rm(rt,0,rs));
844 #endif
845}
846
847static void emit_signextend8(int rs,int rt)
848{
849 #ifndef HAVE_ARMV6
850 emit_shlimm(rs,24,rt);
851 emit_sarimm(rt,24,rt);
852 #else
853 assem_debug("sxtb %s,%s\n",regname[rt],regname[rs]);
854 output_w32(0xe6af0070|rd_rn_rm(rt,0,rs));
855 #endif
856}
857
858static void emit_shl(u_int rs,u_int shift,u_int rt)
859{
860 assert(rs<16);
861 assert(rt<16);
862 assert(shift<16);
863 //if(imm==1) ...
864 assem_debug("lsl %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
865 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x10|(shift<<8));
866}
867
868static void emit_shr(u_int rs,u_int shift,u_int rt)
869{
870 assert(rs<16);
871 assert(rt<16);
872 assert(shift<16);
873 assem_debug("lsr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
874 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x30|(shift<<8));
875}
876
877static void emit_sar(u_int rs,u_int shift,u_int rt)
878{
879 assert(rs<16);
880 assert(rt<16);
881 assert(shift<16);
882 assem_debug("asr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
883 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x50|(shift<<8));
884}
885
886static unused void emit_orrshl(u_int rs,u_int shift,u_int rt)
887{
888 assert(rs<16);
889 assert(rt<16);
890 assert(shift<16);
891 assem_debug("orr %s,%s,%s,lsl %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
892 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x10|(shift<<8));
893}
894
895static unused void emit_orrshr(u_int rs,u_int shift,u_int rt)
896{
897 assert(rs<16);
898 assert(rt<16);
899 assert(shift<16);
900 assem_debug("orr %s,%s,%s,lsr %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
901 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x30|(shift<<8));
902}
903
904static void emit_cmpimm(int rs,int imm)
905{
906 u_int armval;
907 if(genimm(imm,&armval)) {
908 assem_debug("cmp %s,#%d\n",regname[rs],imm);
909 output_w32(0xe3500000|rd_rn_rm(0,rs,0)|armval);
910 }else if(genimm(-imm,&armval)) {
911 assem_debug("cmn %s,#%d\n",regname[rs],imm);
912 output_w32(0xe3700000|rd_rn_rm(0,rs,0)|armval);
913 }else if(imm>0) {
914 assert(imm<65536);
915 emit_movimm(imm,HOST_TEMPREG);
916 assem_debug("cmp %s,r14\n",regname[rs]);
917 output_w32(0xe1500000|rd_rn_rm(0,rs,HOST_TEMPREG));
918 }else{
919 assert(imm>-65536);
920 emit_movimm(-imm,HOST_TEMPREG);
921 assem_debug("cmn %s,r14\n",regname[rs]);
922 output_w32(0xe1700000|rd_rn_rm(0,rs,HOST_TEMPREG));
923 }
924}
925
926static void emit_cmovne_imm(int imm,int rt)
927{
928 assem_debug("movne %s,#%d\n",regname[rt],imm);
929 u_int armval;
930 genimm_checked(imm,&armval);
931 output_w32(0x13a00000|rd_rn_rm(rt,0,0)|armval);
932}
933
934static void emit_cmovl_imm(int imm,int rt)
935{
936 assem_debug("movlt %s,#%d\n",regname[rt],imm);
937 u_int armval;
938 genimm_checked(imm,&armval);
939 output_w32(0xb3a00000|rd_rn_rm(rt,0,0)|armval);
940}
941
942static void emit_cmovb_imm(int imm,int rt)
943{
944 assem_debug("movcc %s,#%d\n",regname[rt],imm);
945 u_int armval;
946 genimm_checked(imm,&armval);
947 output_w32(0x33a00000|rd_rn_rm(rt,0,0)|armval);
948}
949
950static void emit_cmovae_imm(int imm,int rt)
951{
952 assem_debug("movcs %s,#%d\n",regname[rt],imm);
953 u_int armval;
954 genimm_checked(imm,&armval);
955 output_w32(0x23a00000|rd_rn_rm(rt,0,0)|armval);
956}
957
958static void emit_cmovne_reg(int rs,int rt)
959{
960 assem_debug("movne %s,%s\n",regname[rt],regname[rs]);
961 output_w32(0x11a00000|rd_rn_rm(rt,0,rs));
962}
963
964static void emit_cmovl_reg(int rs,int rt)
965{
966 assem_debug("movlt %s,%s\n",regname[rt],regname[rs]);
967 output_w32(0xb1a00000|rd_rn_rm(rt,0,rs));
968}
969
970static void emit_cmovb_reg(int rs,int rt)
971{
972 assem_debug("movcc %s,%s\n",regname[rt],regname[rs]);
973 output_w32(0x31a00000|rd_rn_rm(rt,0,rs));
974}
975
976static void emit_cmovs_reg(int rs,int rt)
977{
978 assem_debug("movmi %s,%s\n",regname[rt],regname[rs]);
979 output_w32(0x41a00000|rd_rn_rm(rt,0,rs));
980}
981
982static void emit_slti32(int rs,int imm,int rt)
983{
984 if(rs!=rt) emit_zeroreg(rt);
985 emit_cmpimm(rs,imm);
986 if(rs==rt) emit_movimm(0,rt);
987 emit_cmovl_imm(1,rt);
988}
989
990static void emit_sltiu32(int rs,int imm,int rt)
991{
992 if(rs!=rt) emit_zeroreg(rt);
993 emit_cmpimm(rs,imm);
994 if(rs==rt) emit_movimm(0,rt);
995 emit_cmovb_imm(1,rt);
996}
997
998static void emit_cmp(int rs,int rt)
999{
1000 assem_debug("cmp %s,%s\n",regname[rs],regname[rt]);
1001 output_w32(0xe1500000|rd_rn_rm(0,rs,rt));
1002}
1003
1004static void emit_set_gz32(int rs, int rt)
1005{
1006 //assem_debug("set_gz32\n");
1007 emit_cmpimm(rs,1);
1008 emit_movimm(1,rt);
1009 emit_cmovl_imm(0,rt);
1010}
1011
1012static void emit_set_nz32(int rs, int rt)
1013{
1014 //assem_debug("set_nz32\n");
1015 if(rs!=rt) emit_movs(rs,rt);
1016 else emit_test(rs,rs);
1017 emit_cmovne_imm(1,rt);
1018}
1019
1020static void emit_set_if_less32(int rs1, int rs2, int rt)
1021{
1022 //assem_debug("set if less (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1023 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1024 emit_cmp(rs1,rs2);
1025 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1026 emit_cmovl_imm(1,rt);
1027}
1028
1029static void emit_set_if_carry32(int rs1, int rs2, int rt)
1030{
1031 //assem_debug("set if carry (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1032 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1033 emit_cmp(rs1,rs2);
1034 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1035 emit_cmovb_imm(1,rt);
1036}
1037
1038static int can_jump_or_call(const void *a)
1039{
1040 intptr_t offset = (u_char *)a - out - 8;
1041 return (-33554432 <= offset && offset < 33554432);
1042}
1043
1044static void emit_call(const void *a_)
1045{
1046 int a = (int)a_;
1047 assem_debug("bl %x (%x+%x)%s\n",a,(int)out,a-(int)out-8,func_name(a_));
1048 u_int offset=genjmp(a);
1049 output_w32(0xeb000000|offset);
1050}
1051
1052static void emit_jmp(const void *a_)
1053{
1054 int a = (int)a_;
1055 assem_debug("b %x (%x+%x)%s\n",a,(int)out,a-(int)out-8,func_name(a_));
1056 u_int offset=genjmp(a);
1057 output_w32(0xea000000|offset);
1058}
1059
1060static void emit_jne(const void *a_)
1061{
1062 int a = (int)a_;
1063 assem_debug("bne %x\n",a);
1064 u_int offset=genjmp(a);
1065 output_w32(0x1a000000|offset);
1066}
1067
1068static void emit_jeq(const void *a_)
1069{
1070 int a = (int)a_;
1071 assem_debug("beq %x\n",a);
1072 u_int offset=genjmp(a);
1073 output_w32(0x0a000000|offset);
1074}
1075
1076static void emit_js(const void *a_)
1077{
1078 int a = (int)a_;
1079 assem_debug("bmi %x\n",a);
1080 u_int offset=genjmp(a);
1081 output_w32(0x4a000000|offset);
1082}
1083
1084static void emit_jns(const void *a_)
1085{
1086 int a = (int)a_;
1087 assem_debug("bpl %x\n",a);
1088 u_int offset=genjmp(a);
1089 output_w32(0x5a000000|offset);
1090}
1091
1092static void emit_jl(const void *a_)
1093{
1094 int a = (int)a_;
1095 assem_debug("blt %x\n",a);
1096 u_int offset=genjmp(a);
1097 output_w32(0xba000000|offset);
1098}
1099
1100static void emit_jge(const void *a_)
1101{
1102 int a = (int)a_;
1103 assem_debug("bge %x\n",a);
1104 u_int offset=genjmp(a);
1105 output_w32(0xaa000000|offset);
1106}
1107
1108static void emit_jno(const void *a_)
1109{
1110 int a = (int)a_;
1111 assem_debug("bvc %x\n",a);
1112 u_int offset=genjmp(a);
1113 output_w32(0x7a000000|offset);
1114}
1115
1116static void emit_jc(const void *a_)
1117{
1118 int a = (int)a_;
1119 assem_debug("bcs %x\n",a);
1120 u_int offset=genjmp(a);
1121 output_w32(0x2a000000|offset);
1122}
1123
1124static void emit_jcc(const void *a_)
1125{
1126 int a = (int)a_;
1127 assem_debug("bcc %x\n",a);
1128 u_int offset=genjmp(a);
1129 output_w32(0x3a000000|offset);
1130}
1131
1132static unused void emit_callreg(u_int r)
1133{
1134 assert(r<15);
1135 assem_debug("blx %s\n",regname[r]);
1136 output_w32(0xe12fff30|r);
1137}
1138
1139static void emit_jmpreg(u_int r)
1140{
1141 assem_debug("mov pc,%s\n",regname[r]);
1142 output_w32(0xe1a00000|rd_rn_rm(15,0,r));
1143}
1144
1145static void emit_ret(void)
1146{
1147 emit_jmpreg(14);
1148}
1149
1150static void emit_readword_indexed(int offset, int rs, int rt)
1151{
1152 assert(offset>-4096&&offset<4096);
1153 assem_debug("ldr %s,%s+%d\n",regname[rt],regname[rs],offset);
1154 if(offset>=0) {
1155 output_w32(0xe5900000|rd_rn_rm(rt,rs,0)|offset);
1156 }else{
1157 output_w32(0xe5100000|rd_rn_rm(rt,rs,0)|(-offset));
1158 }
1159}
1160
1161static void emit_readword_dualindexedx4(int rs1, int rs2, int rt)
1162{
1163 assem_debug("ldr %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1164 output_w32(0xe7900000|rd_rn_rm(rt,rs1,rs2)|0x100);
1165}
1166#define emit_readptr_dualindexedx_ptrlen emit_readword_dualindexedx4
1167
1168static void emit_ldr_dualindexed(int rs1, int rs2, int rt)
1169{
1170 assem_debug("ldr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1171 output_w32(0xe7900000|rd_rn_rm(rt,rs1,rs2));
1172}
1173
1174static void emit_ldrcc_dualindexed(int rs1, int rs2, int rt)
1175{
1176 assem_debug("ldrcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1177 output_w32(0x37900000|rd_rn_rm(rt,rs1,rs2));
1178}
1179
1180static void emit_ldrb_dualindexed(int rs1, int rs2, int rt)
1181{
1182 assem_debug("ldrb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1183 output_w32(0xe7d00000|rd_rn_rm(rt,rs1,rs2));
1184}
1185
1186static void emit_ldrccb_dualindexed(int rs1, int rs2, int rt)
1187{
1188 assem_debug("ldrccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1189 output_w32(0x37d00000|rd_rn_rm(rt,rs1,rs2));
1190}
1191
1192static void emit_ldrsb_dualindexed(int rs1, int rs2, int rt)
1193{
1194 assem_debug("ldrsb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1195 output_w32(0xe19000d0|rd_rn_rm(rt,rs1,rs2));
1196}
1197
1198static void emit_ldrccsb_dualindexed(int rs1, int rs2, int rt)
1199{
1200 assem_debug("ldrccsb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1201 output_w32(0x319000d0|rd_rn_rm(rt,rs1,rs2));
1202}
1203
1204static void emit_ldrh_dualindexed(int rs1, int rs2, int rt)
1205{
1206 assem_debug("ldrh %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1207 output_w32(0xe19000b0|rd_rn_rm(rt,rs1,rs2));
1208}
1209
1210static void emit_ldrcch_dualindexed(int rs1, int rs2, int rt)
1211{
1212 assem_debug("ldrcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1213 output_w32(0x319000b0|rd_rn_rm(rt,rs1,rs2));
1214}
1215
1216static void emit_ldrsh_dualindexed(int rs1, int rs2, int rt)
1217{
1218 assem_debug("ldrsh %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1219 output_w32(0xe19000f0|rd_rn_rm(rt,rs1,rs2));
1220}
1221
1222static void emit_ldrccsh_dualindexed(int rs1, int rs2, int rt)
1223{
1224 assem_debug("ldrccsh %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1225 output_w32(0x319000f0|rd_rn_rm(rt,rs1,rs2));
1226}
1227
1228static void emit_str_dualindexed(int rs1, int rs2, int rt)
1229{
1230 assem_debug("str %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1231 output_w32(0xe7800000|rd_rn_rm(rt,rs1,rs2));
1232}
1233
1234static void emit_strb_dualindexed(int rs1, int rs2, int rt)
1235{
1236 assem_debug("strb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1237 output_w32(0xe7c00000|rd_rn_rm(rt,rs1,rs2));
1238}
1239
1240static void emit_strh_dualindexed(int rs1, int rs2, int rt)
1241{
1242 assem_debug("strh %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1243 output_w32(0xe18000b0|rd_rn_rm(rt,rs1,rs2));
1244}
1245
1246static void emit_movsbl_indexed(int offset, int rs, int rt)
1247{
1248 assert(offset>-256&&offset<256);
1249 assem_debug("ldrsb %s,%s+%d\n",regname[rt],regname[rs],offset);
1250 if(offset>=0) {
1251 output_w32(0xe1d000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1252 }else{
1253 output_w32(0xe15000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1254 }
1255}
1256
1257static void emit_movswl_indexed(int offset, int rs, int rt)
1258{
1259 assert(offset>-256&&offset<256);
1260 assem_debug("ldrsh %s,%s+%d\n",regname[rt],regname[rs],offset);
1261 if(offset>=0) {
1262 output_w32(0xe1d000f0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1263 }else{
1264 output_w32(0xe15000f0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1265 }
1266}
1267
1268static void emit_movzbl_indexed(int offset, int rs, int rt)
1269{
1270 assert(offset>-4096&&offset<4096);
1271 assem_debug("ldrb %s,%s+%d\n",regname[rt],regname[rs],offset);
1272 if(offset>=0) {
1273 output_w32(0xe5d00000|rd_rn_rm(rt,rs,0)|offset);
1274 }else{
1275 output_w32(0xe5500000|rd_rn_rm(rt,rs,0)|(-offset));
1276 }
1277}
1278
1279static void emit_movzwl_indexed(int offset, int rs, int rt)
1280{
1281 assert(offset>-256&&offset<256);
1282 assem_debug("ldrh %s,%s+%d\n",regname[rt],regname[rs],offset);
1283 if(offset>=0) {
1284 output_w32(0xe1d000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1285 }else{
1286 output_w32(0xe15000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1287 }
1288}
1289
1290static void emit_ldrd(int offset, int rs, int rt)
1291{
1292 assert(offset>-256&&offset<256);
1293 assem_debug("ldrd %s,%s+%d\n",regname[rt],regname[rs],offset);
1294 if(offset>=0) {
1295 output_w32(0xe1c000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1296 }else{
1297 output_w32(0xe14000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1298 }
1299}
1300
1301static void emit_readword(void *addr, int rt)
1302{
1303 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
1304 assert(offset<4096);
1305 assem_debug("ldr %s,fp+%d\n",regname[rt],offset);
1306 output_w32(0xe5900000|rd_rn_rm(rt,FP,0)|offset);
1307}
1308#define emit_readptr emit_readword
1309
1310static void emit_writeword_indexed(int rt, int offset, int rs)
1311{
1312 assert(offset>-4096&&offset<4096);
1313 assem_debug("str %s,%s+%d\n",regname[rt],regname[rs],offset);
1314 if(offset>=0) {
1315 output_w32(0xe5800000|rd_rn_rm(rt,rs,0)|offset);
1316 }else{
1317 output_w32(0xe5000000|rd_rn_rm(rt,rs,0)|(-offset));
1318 }
1319}
1320
1321static void emit_writehword_indexed(int rt, int offset, int rs)
1322{
1323 assert(offset>-256&&offset<256);
1324 assem_debug("strh %s,%s+%d\n",regname[rt],regname[rs],offset);
1325 if(offset>=0) {
1326 output_w32(0xe1c000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1327 }else{
1328 output_w32(0xe14000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1329 }
1330}
1331
1332static void emit_writebyte_indexed(int rt, int offset, int rs)
1333{
1334 assert(offset>-4096&&offset<4096);
1335 assem_debug("strb %s,%s+%d\n",regname[rt],regname[rs],offset);
1336 if(offset>=0) {
1337 output_w32(0xe5c00000|rd_rn_rm(rt,rs,0)|offset);
1338 }else{
1339 output_w32(0xe5400000|rd_rn_rm(rt,rs,0)|(-offset));
1340 }
1341}
1342
1343static void emit_strcc_dualindexed(int rs1, int rs2, int rt)
1344{
1345 assem_debug("strcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1346 output_w32(0x37800000|rd_rn_rm(rt,rs1,rs2));
1347}
1348
1349static void emit_strccb_dualindexed(int rs1, int rs2, int rt)
1350{
1351 assem_debug("strccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1352 output_w32(0x37c00000|rd_rn_rm(rt,rs1,rs2));
1353}
1354
1355static void emit_strcch_dualindexed(int rs1, int rs2, int rt)
1356{
1357 assem_debug("strcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1358 output_w32(0x318000b0|rd_rn_rm(rt,rs1,rs2));
1359}
1360
1361static void emit_writeword(int rt, void *addr)
1362{
1363 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
1364 assert(offset<4096);
1365 assem_debug("str %s,fp+%d\n",regname[rt],offset);
1366 output_w32(0xe5800000|rd_rn_rm(rt,FP,0)|offset);
1367}
1368
1369static void emit_umull(u_int rs1, u_int rs2, u_int hi, u_int lo)
1370{
1371 assem_debug("umull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
1372 assert(rs1<16);
1373 assert(rs2<16);
1374 assert(hi<16);
1375 assert(lo<16);
1376 output_w32(0xe0800090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
1377}
1378
1379static void emit_smull(u_int rs1, u_int rs2, u_int hi, u_int lo)
1380{
1381 assem_debug("smull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
1382 assert(rs1<16);
1383 assert(rs2<16);
1384 assert(hi<16);
1385 assert(lo<16);
1386 output_w32(0xe0c00090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
1387}
1388
1389static void emit_clz(int rs,int rt)
1390{
1391 assem_debug("clz %s,%s\n",regname[rt],regname[rs]);
1392 output_w32(0xe16f0f10|rd_rn_rm(rt,0,rs));
1393}
1394
1395static void emit_subcs(int rs1,int rs2,int rt)
1396{
1397 assem_debug("subcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1398 output_w32(0x20400000|rd_rn_rm(rt,rs1,rs2));
1399}
1400
1401static void emit_shrcc_imm(int rs,u_int imm,int rt)
1402{
1403 assert(imm>0);
1404 assert(imm<32);
1405 assem_debug("lsrcc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1406 output_w32(0x31a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
1407}
1408
1409static void emit_shrne_imm(int rs,u_int imm,int rt)
1410{
1411 assert(imm>0);
1412 assert(imm<32);
1413 assem_debug("lsrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
1414 output_w32(0x11a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
1415}
1416
1417static void emit_negmi(int rs, int rt)
1418{
1419 assem_debug("rsbmi %s,%s,#0\n",regname[rt],regname[rs]);
1420 output_w32(0x42600000|rd_rn_rm(rt,rs,0));
1421}
1422
1423static void emit_negsmi(int rs, int rt)
1424{
1425 assem_debug("rsbsmi %s,%s,#0\n",regname[rt],regname[rs]);
1426 output_w32(0x42700000|rd_rn_rm(rt,rs,0));
1427}
1428
1429static void emit_bic_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
1430{
1431 assem_debug("bic %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
1432 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
1433}
1434
1435static void emit_bic_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
1436{
1437 assem_debug("bic %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
1438 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
1439}
1440
1441static void emit_teq(int rs, int rt)
1442{
1443 assem_debug("teq %s,%s\n",regname[rs],regname[rt]);
1444 output_w32(0xe1300000|rd_rn_rm(0,rs,rt));
1445}
1446
1447static unused void emit_rsbimm(int rs, int imm, int rt)
1448{
1449 u_int armval;
1450 genimm_checked(imm,&armval);
1451 assem_debug("rsb %s,%s,#%d\n",regname[rt],regname[rs],imm);
1452 output_w32(0xe2600000|rd_rn_rm(rt,rs,0)|armval);
1453}
1454
1455// Conditionally select one of two immediates, optimizing for small code size
1456// This will only be called if HAVE_CMOV_IMM is defined
1457static void emit_cmov2imm_e_ne_compact(int imm1,int imm2,u_int rt)
1458{
1459 u_int armval;
1460 if(genimm(imm2-imm1,&armval)) {
1461 emit_movimm(imm1,rt);
1462 assem_debug("addne %s,%s,#%d\n",regname[rt],regname[rt],imm2-imm1);
1463 output_w32(0x12800000|rd_rn_rm(rt,rt,0)|armval);
1464 }else if(genimm(imm1-imm2,&armval)) {
1465 emit_movimm(imm1,rt);
1466 assem_debug("subne %s,%s,#%d\n",regname[rt],regname[rt],imm1-imm2);
1467 output_w32(0x12400000|rd_rn_rm(rt,rt,0)|armval);
1468 }
1469 else {
1470 #ifndef HAVE_ARMV7
1471 emit_movimm(imm1,rt);
1472 add_literal((int)out,imm2);
1473 assem_debug("ldrne %s,pc+? [=%x]\n",regname[rt],imm2);
1474 output_w32(0x15900000|rd_rn_rm(rt,15,0));
1475 #else
1476 emit_movw(imm1&0x0000FFFF,rt);
1477 if((imm1&0xFFFF)!=(imm2&0xFFFF)) {
1478 assem_debug("movwne %s,#%d (0x%x)\n",regname[rt],imm2&0xFFFF,imm2&0xFFFF);
1479 output_w32(0x13000000|rd_rn_rm(rt,0,0)|(imm2&0xfff)|((imm2<<4)&0xf0000));
1480 }
1481 emit_movt(imm1&0xFFFF0000,rt);
1482 if((imm1&0xFFFF0000)!=(imm2&0xFFFF0000)) {
1483 assem_debug("movtne %s,#%d (0x%x)\n",regname[rt],imm2&0xffff0000,imm2&0xffff0000);
1484 output_w32(0x13400000|rd_rn_rm(rt,0,0)|((imm2>>16)&0xfff)|((imm2>>12)&0xf0000));
1485 }
1486 #endif
1487 }
1488}
1489
1490// special case for checking invalid_code
1491static void emit_cmpmem_indexedsr12_reg(int base,int r,int imm)
1492{
1493 assert(imm<128&&imm>=0);
1494 assert(r>=0&&r<16);
1495 assem_debug("ldrb lr,%s,%s lsr #12\n",regname[base],regname[r]);
1496 output_w32(0xe7d00000|rd_rn_rm(HOST_TEMPREG,base,r)|0x620);
1497 emit_cmpimm(HOST_TEMPREG,imm);
1498}
1499
1500static void emit_callne(int a)
1501{
1502 assem_debug("blne %x\n",a);
1503 u_int offset=genjmp(a);
1504 output_w32(0x1b000000|offset);
1505}
1506
1507// Used to preload hash table entries
1508static unused void emit_prefetchreg(int r)
1509{
1510 assem_debug("pld %s\n",regname[r]);
1511 output_w32(0xf5d0f000|rd_rn_rm(0,r,0));
1512}
1513
1514// Special case for mini_ht
1515static void emit_ldreq_indexed(int rs, u_int offset, int rt)
1516{
1517 assert(offset<4096);
1518 assem_debug("ldreq %s,[%s, #%d]\n",regname[rt],regname[rs],offset);
1519 output_w32(0x05900000|rd_rn_rm(rt,rs,0)|offset);
1520}
1521
1522static void emit_orrne_imm(int rs,int imm,int rt)
1523{
1524 u_int armval;
1525 genimm_checked(imm,&armval);
1526 assem_debug("orrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
1527 output_w32(0x13800000|rd_rn_rm(rt,rs,0)|armval);
1528}
1529
1530static void emit_andne_imm(int rs,int imm,int rt)
1531{
1532 u_int armval;
1533 genimm_checked(imm,&armval);
1534 assem_debug("andne %s,%s,#%d\n",regname[rt],regname[rs],imm);
1535 output_w32(0x12000000|rd_rn_rm(rt,rs,0)|armval);
1536}
1537
1538static unused void emit_addpl_imm(int rs,int imm,int rt)
1539{
1540 u_int armval;
1541 genimm_checked(imm,&armval);
1542 assem_debug("addpl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1543 output_w32(0x52800000|rd_rn_rm(rt,rs,0)|armval);
1544}
1545
1546static void emit_jno_unlikely(int a)
1547{
1548 //emit_jno(a);
1549 assem_debug("addvc pc,pc,#? (%x)\n",/*a-(int)out-8,*/a);
1550 output_w32(0x72800000|rd_rn_rm(15,15,0));
1551}
1552
1553static void save_regs_all(u_int reglist)
1554{
1555 int i;
1556 if(!reglist) return;
1557 assem_debug("stmia fp,{");
1558 for(i=0;i<16;i++)
1559 if(reglist&(1<<i))
1560 assem_debug("r%d,",i);
1561 assem_debug("}\n");
1562 output_w32(0xe88b0000|reglist);
1563}
1564
1565static void restore_regs_all(u_int reglist)
1566{
1567 int i;
1568 if(!reglist) return;
1569 assem_debug("ldmia fp,{");
1570 for(i=0;i<16;i++)
1571 if(reglist&(1<<i))
1572 assem_debug("r%d,",i);
1573 assem_debug("}\n");
1574 output_w32(0xe89b0000|reglist);
1575}
1576
1577// Save registers before function call
1578static void save_regs(u_int reglist)
1579{
1580 reglist&=CALLER_SAVE_REGS; // only save the caller-save registers, r0-r3, r12
1581 save_regs_all(reglist);
1582}
1583
1584// Restore registers after function call
1585static void restore_regs(u_int reglist)
1586{
1587 reglist&=CALLER_SAVE_REGS;
1588 restore_regs_all(reglist);
1589}
1590
1591/* Stubs/epilogue */
1592
1593static void literal_pool(int n)
1594{
1595 if(!literalcount) return;
1596 if(n) {
1597 if((int)out-literals[0][0]<4096-n) return;
1598 }
1599 u_int *ptr;
1600 int i;
1601 for(i=0;i<literalcount;i++)
1602 {
1603 u_int l_addr=(u_int)out;
1604 int j;
1605 for(j=0;j<i;j++) {
1606 if(literals[j][1]==literals[i][1]) {
1607 //printf("dup %08x\n",literals[i][1]);
1608 l_addr=literals[j][0];
1609 break;
1610 }
1611 }
1612 ptr=(u_int *)literals[i][0];
1613 u_int offset=l_addr-(u_int)ptr-8;
1614 assert(offset<4096);
1615 assert(!(offset&3));
1616 *ptr|=offset;
1617 if(l_addr==(u_int)out) {
1618 literals[i][0]=l_addr; // remember for dupes
1619 output_w32(literals[i][1]);
1620 }
1621 }
1622 literalcount=0;
1623}
1624
1625static void literal_pool_jumpover(int n)
1626{
1627 if(!literalcount) return;
1628 if(n) {
1629 if((int)out-literals[0][0]<4096-n) return;
1630 }
1631 void *jaddr = out;
1632 emit_jmp(0);
1633 literal_pool(0);
1634 set_jump_target(jaddr, out);
1635}
1636
1637// parsed by get_pointer, find_extjump_insn
1638static void emit_extjump2(u_char *addr, u_int target, void *linker)
1639{
1640 u_char *ptr=(u_char *)addr;
1641 assert((ptr[3]&0x0e)==0xa);
1642 (void)ptr;
1643
1644 emit_loadlp(target,0);
1645 emit_loadlp((u_int)addr,1);
1646 assert(addr>=ndrc->translation_cache&&addr<(ndrc->translation_cache+(1<<TARGET_SIZE_2)));
1647 //assert((target>=0x80000000&&target<0x80800000)||(target>0xA4000000&&target<0xA4001000));
1648//DEBUG >
1649#ifdef DEBUG_CYCLE_COUNT
1650 emit_readword(&last_count,ECX);
1651 emit_add(HOST_CCREG,ECX,HOST_CCREG);
1652 emit_readword(&next_interupt,ECX);
1653 emit_writeword(HOST_CCREG,&Count);
1654 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
1655 emit_writeword(ECX,&last_count);
1656#endif
1657//DEBUG <
1658 emit_far_jump(linker);
1659}
1660
1661static void check_extjump2(void *src)
1662{
1663 u_int *ptr = src;
1664 assert((ptr[1] & 0x0fff0000) == 0x059f0000); // ldr rx, [pc, #ofs]
1665 (void)ptr;
1666}
1667
1668// put rt_val into rt, potentially making use of rs with value rs_val
1669static void emit_movimm_from(u_int rs_val,int rs,u_int rt_val,int rt)
1670{
1671 u_int armval;
1672 int diff;
1673 if(genimm(rt_val,&armval)) {
1674 assem_debug("mov %s,#%d\n",regname[rt],rt_val);
1675 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
1676 return;
1677 }
1678 if(genimm(~rt_val,&armval)) {
1679 assem_debug("mvn %s,#%d\n",regname[rt],rt_val);
1680 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
1681 return;
1682 }
1683 diff=rt_val-rs_val;
1684 if(genimm(diff,&armval)) {
1685 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],diff);
1686 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
1687 return;
1688 }else if(genimm(-diff,&armval)) {
1689 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],-diff);
1690 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
1691 return;
1692 }
1693 emit_movimm(rt_val,rt);
1694}
1695
1696// return 1 if above function can do it's job cheaply
1697static int is_similar_value(u_int v1,u_int v2)
1698{
1699 u_int xs;
1700 int diff;
1701 if(v1==v2) return 1;
1702 diff=v2-v1;
1703 for(xs=diff;xs!=0&&(xs&3)==0;xs>>=2)
1704 ;
1705 if(xs<0x100) return 1;
1706 for(xs=-diff;xs!=0&&(xs&3)==0;xs>>=2)
1707 ;
1708 if(xs<0x100) return 1;
1709 return 0;
1710}
1711
1712static void mov_loadtype_adj(enum stub_type type,int rs,int rt)
1713{
1714 switch(type) {
1715 case LOADB_STUB: emit_signextend8(rs,rt); break;
1716 case LOADBU_STUB: emit_andimm(rs,0xff,rt); break;
1717 case LOADH_STUB: emit_signextend16(rs,rt); break;
1718 case LOADHU_STUB: emit_andimm(rs,0xffff,rt); break;
1719 case LOADW_STUB: if(rs!=rt) emit_mov(rs,rt); break;
1720 default: assert(0);
1721 }
1722}
1723
1724#include "pcsxmem.h"
1725#include "pcsxmem_inline.c"
1726
1727static void do_readstub(int n)
1728{
1729 assem_debug("do_readstub %x\n",start+stubs[n].a*4);
1730 literal_pool(256);
1731 set_jump_target(stubs[n].addr, out);
1732 enum stub_type type=stubs[n].type;
1733 int i=stubs[n].a;
1734 int rs=stubs[n].b;
1735 const struct regstat *i_regs=(struct regstat *)stubs[n].c;
1736 u_int reglist=stubs[n].e;
1737 const signed char *i_regmap=i_regs->regmap;
1738 int rt;
1739 if(dops[i].itype==C1LS||dops[i].itype==C2LS||dops[i].itype==LOADLR) {
1740 rt=get_reg(i_regmap,FTEMP);
1741 }else{
1742 rt=get_reg(i_regmap,dops[i].rt1);
1743 }
1744 assert(rs>=0);
1745 int r,temp=-1,temp2=HOST_TEMPREG,regs_saved=0;
1746 void *restore_jump = NULL;
1747 reglist|=(1<<rs);
1748 for(r=0;r<=12;r++) {
1749 if(((1<<r)&0x13ff)&&((1<<r)&reglist)==0) {
1750 temp=r; break;
1751 }
1752 }
1753 if(rt>=0&&dops[i].rt1!=0)
1754 reglist&=~(1<<rt);
1755 if(temp==-1) {
1756 save_regs(reglist);
1757 regs_saved=1;
1758 temp=(rs==0)?2:0;
1759 }
1760 if((regs_saved||(reglist&2)==0)&&temp!=1&&rs!=1)
1761 temp2=1;
1762 emit_readword(&mem_rtab,temp);
1763 emit_shrimm(rs,12,temp2);
1764 emit_readword_dualindexedx4(temp,temp2,temp2);
1765 emit_lsls_imm(temp2,1,temp2);
1766 if(dops[i].itype==C1LS||dops[i].itype==C2LS||(rt>=0&&dops[i].rt1!=0)) {
1767 switch(type) {
1768 case LOADB_STUB: emit_ldrccsb_dualindexed(temp2,rs,rt); break;
1769 case LOADBU_STUB: emit_ldrccb_dualindexed(temp2,rs,rt); break;
1770 case LOADH_STUB: emit_ldrccsh_dualindexed(temp2,rs,rt); break;
1771 case LOADHU_STUB: emit_ldrcch_dualindexed(temp2,rs,rt); break;
1772 case LOADW_STUB: emit_ldrcc_dualindexed(temp2,rs,rt); break;
1773 default: assert(0);
1774 }
1775 }
1776 if(regs_saved) {
1777 restore_jump=out;
1778 emit_jcc(0); // jump to reg restore
1779 }
1780 else
1781 emit_jcc(stubs[n].retaddr); // return address
1782
1783 if(!regs_saved)
1784 save_regs(reglist);
1785 void *handler=NULL;
1786 if(type==LOADB_STUB||type==LOADBU_STUB)
1787 handler=jump_handler_read8;
1788 if(type==LOADH_STUB||type==LOADHU_STUB)
1789 handler=jump_handler_read16;
1790 if(type==LOADW_STUB)
1791 handler=jump_handler_read32;
1792 assert(handler);
1793 pass_args(rs,temp2);
1794 int cc=get_reg(i_regmap,CCREG);
1795 if(cc<0)
1796 emit_loadreg(CCREG,2);
1797 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n].d),2);
1798 emit_far_call(handler);
1799 if(dops[i].itype==C1LS||dops[i].itype==C2LS||(rt>=0&&dops[i].rt1!=0)) {
1800 mov_loadtype_adj(type,0,rt);
1801 }
1802 if(restore_jump)
1803 set_jump_target(restore_jump, out);
1804 restore_regs(reglist);
1805 emit_jmp(stubs[n].retaddr); // return address
1806}
1807
1808static void inline_readstub(enum stub_type type, int i, u_int addr,
1809 const signed char regmap[], int target, int adj, u_int reglist)
1810{
1811 int rs=get_reg(regmap,target);
1812 int rt=get_reg(regmap,target);
1813 if(rs<0) rs=get_reg(regmap,-1);
1814 assert(rs>=0);
1815 u_int is_dynamic;
1816 uintptr_t host_addr = 0;
1817 void *handler;
1818 int cc=get_reg(regmap,CCREG);
1819 if(pcsx_direct_read(type,addr,CLOCK_ADJUST(adj),cc,target?rs:-1,rt))
1820 return;
1821 handler = get_direct_memhandler(mem_rtab, addr, type, &host_addr);
1822 if (handler == NULL) {
1823 if(rt<0||dops[i].rt1==0)
1824 return;
1825 if(addr!=host_addr)
1826 emit_movimm_from(addr,rs,host_addr,rs);
1827 switch(type) {
1828 case LOADB_STUB: emit_movsbl_indexed(0,rs,rt); break;
1829 case LOADBU_STUB: emit_movzbl_indexed(0,rs,rt); break;
1830 case LOADH_STUB: emit_movswl_indexed(0,rs,rt); break;
1831 case LOADHU_STUB: emit_movzwl_indexed(0,rs,rt); break;
1832 case LOADW_STUB: emit_readword_indexed(0,rs,rt); break;
1833 default: assert(0);
1834 }
1835 return;
1836 }
1837 is_dynamic=pcsxmem_is_handler_dynamic(addr);
1838 if(is_dynamic) {
1839 if(type==LOADB_STUB||type==LOADBU_STUB)
1840 handler=jump_handler_read8;
1841 if(type==LOADH_STUB||type==LOADHU_STUB)
1842 handler=jump_handler_read16;
1843 if(type==LOADW_STUB)
1844 handler=jump_handler_read32;
1845 }
1846
1847 // call a memhandler
1848 if(rt>=0&&dops[i].rt1!=0)
1849 reglist&=~(1<<rt);
1850 save_regs(reglist);
1851 if(target==0)
1852 emit_movimm(addr,0);
1853 else if(rs!=0)
1854 emit_mov(rs,0);
1855 if(cc<0)
1856 emit_loadreg(CCREG,2);
1857 if(is_dynamic) {
1858 emit_movimm(((u_int *)mem_rtab)[addr>>12]<<1,1);
1859 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj),2);
1860 }
1861 else {
1862 emit_readword(&last_count,3);
1863 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj),2);
1864 emit_add(2,3,2);
1865 emit_writeword(2,&Count);
1866 }
1867
1868 emit_far_call(handler);
1869
1870 if(rt>=0&&dops[i].rt1!=0) {
1871 switch(type) {
1872 case LOADB_STUB: emit_signextend8(0,rt); break;
1873 case LOADBU_STUB: emit_andimm(0,0xff,rt); break;
1874 case LOADH_STUB: emit_signextend16(0,rt); break;
1875 case LOADHU_STUB: emit_andimm(0,0xffff,rt); break;
1876 case LOADW_STUB: if(rt!=0) emit_mov(0,rt); break;
1877 default: assert(0);
1878 }
1879 }
1880 restore_regs(reglist);
1881}
1882
1883static void do_writestub(int n)
1884{
1885 assem_debug("do_writestub %x\n",start+stubs[n].a*4);
1886 literal_pool(256);
1887 set_jump_target(stubs[n].addr, out);
1888 enum stub_type type=stubs[n].type;
1889 int i=stubs[n].a;
1890 int rs=stubs[n].b;
1891 const struct regstat *i_regs=(struct regstat *)stubs[n].c;
1892 u_int reglist=stubs[n].e;
1893 const signed char *i_regmap=i_regs->regmap;
1894 int rt,r;
1895 if(dops[i].itype==C1LS||dops[i].itype==C2LS) {
1896 rt=get_reg(i_regmap,r=FTEMP);
1897 }else{
1898 rt=get_reg(i_regmap,r=dops[i].rs2);
1899 }
1900 assert(rs>=0);
1901 assert(rt>=0);
1902 int rtmp,temp=-1,temp2=HOST_TEMPREG,regs_saved=0;
1903 void *restore_jump = NULL;
1904 int reglist2=reglist|(1<<rs)|(1<<rt);
1905 for(rtmp=0;rtmp<=12;rtmp++) {
1906 if(((1<<rtmp)&0x13ff)&&((1<<rtmp)&reglist2)==0) {
1907 temp=rtmp; break;
1908 }
1909 }
1910 if(temp==-1) {
1911 save_regs(reglist);
1912 regs_saved=1;
1913 for(rtmp=0;rtmp<=3;rtmp++)
1914 if(rtmp!=rs&&rtmp!=rt)
1915 {temp=rtmp;break;}
1916 }
1917 if((regs_saved||(reglist2&8)==0)&&temp!=3&&rs!=3&&rt!=3)
1918 temp2=3;
1919 emit_readword(&mem_wtab,temp);
1920 emit_shrimm(rs,12,temp2);
1921 emit_readword_dualindexedx4(temp,temp2,temp2);
1922 emit_lsls_imm(temp2,1,temp2);
1923 switch(type) {
1924 case STOREB_STUB: emit_strccb_dualindexed(temp2,rs,rt); break;
1925 case STOREH_STUB: emit_strcch_dualindexed(temp2,rs,rt); break;
1926 case STOREW_STUB: emit_strcc_dualindexed(temp2,rs,rt); break;
1927 default: assert(0);
1928 }
1929 if(regs_saved) {
1930 restore_jump=out;
1931 emit_jcc(0); // jump to reg restore
1932 }
1933 else
1934 emit_jcc(stubs[n].retaddr); // return address (invcode check)
1935
1936 if(!regs_saved)
1937 save_regs(reglist);
1938 void *handler=NULL;
1939 switch(type) {
1940 case STOREB_STUB: handler=jump_handler_write8; break;
1941 case STOREH_STUB: handler=jump_handler_write16; break;
1942 case STOREW_STUB: handler=jump_handler_write32; break;
1943 default: assert(0);
1944 }
1945 assert(handler);
1946 pass_args(rs,rt);
1947 if(temp2!=3)
1948 emit_mov(temp2,3);
1949 int cc=get_reg(i_regmap,CCREG);
1950 if(cc<0)
1951 emit_loadreg(CCREG,2);
1952 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n].d),2);
1953 // returns new cycle_count
1954 emit_far_call(handler);
1955 emit_addimm(0,-CLOCK_ADJUST((int)stubs[n].d),cc<0?2:cc);
1956 if(cc<0)
1957 emit_storereg(CCREG,2);
1958 if(restore_jump)
1959 set_jump_target(restore_jump, out);
1960 restore_regs(reglist);
1961 emit_jmp(stubs[n].retaddr);
1962}
1963
1964static void inline_writestub(enum stub_type type, int i, u_int addr,
1965 const signed char regmap[], int target, int adj, u_int reglist)
1966{
1967 int rs=get_reg(regmap,-1);
1968 int rt=get_reg(regmap,target);
1969 assert(rs>=0);
1970 assert(rt>=0);
1971 uintptr_t host_addr = 0;
1972 void *handler = get_direct_memhandler(mem_wtab, addr, type, &host_addr);
1973 if (handler == NULL) {
1974 if(addr!=host_addr)
1975 emit_movimm_from(addr,rs,host_addr,rs);
1976 switch(type) {
1977 case STOREB_STUB: emit_writebyte_indexed(rt,0,rs); break;
1978 case STOREH_STUB: emit_writehword_indexed(rt,0,rs); break;
1979 case STOREW_STUB: emit_writeword_indexed(rt,0,rs); break;
1980 default: assert(0);
1981 }
1982 return;
1983 }
1984
1985 // call a memhandler
1986 save_regs(reglist);
1987 pass_args(rs,rt);
1988 int cc=get_reg(regmap,CCREG);
1989 if(cc<0)
1990 emit_loadreg(CCREG,2);
1991 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj),2);
1992 emit_movimm((u_int)handler,3);
1993 // returns new cycle_count
1994 emit_far_call(jump_handler_write_h);
1995 emit_addimm(0,-CLOCK_ADJUST(adj),cc<0?2:cc);
1996 if(cc<0)
1997 emit_storereg(CCREG,2);
1998 restore_regs(reglist);
1999}
2000
2001// this output is parsed by verify_dirty, get_bounds, isclean, get_clean_addr
2002static void do_dirty_stub_emit_args(u_int arg0, u_int source_len)
2003{
2004 #ifndef HAVE_ARMV7
2005 emit_loadlp((int)source, 1);
2006 emit_loadlp((int)copy, 2);
2007 emit_loadlp(source_len, 3);
2008 #else
2009 emit_movw(((u_int)source)&0x0000FFFF, 1);
2010 emit_movw(((u_int)copy)&0x0000FFFF, 2);
2011 emit_movt(((u_int)source)&0xFFFF0000, 1);
2012 emit_movt(((u_int)copy)&0xFFFF0000, 2);
2013 emit_movw(source_len, 3);
2014 #endif
2015 emit_movimm(arg0, 0);
2016}
2017
2018static void *do_dirty_stub(int i, u_int source_len)
2019{
2020 assem_debug("do_dirty_stub %x\n",start+i*4);
2021 do_dirty_stub_emit_args(start + i*4, source_len);
2022 emit_far_call(verify_code);
2023 void *entry = out;
2024 load_regs_entry(i);
2025 if (entry == out)
2026 entry = instr_addr[i];
2027 emit_jmp(instr_addr[i]);
2028 return entry;
2029}
2030
2031static void do_dirty_stub_ds(u_int source_len)
2032{
2033 do_dirty_stub_emit_args(start + 1, source_len);
2034 emit_far_call(verify_code_ds);
2035}
2036
2037/* Special assem */
2038
2039static void c2op_prologue(u_int op, int i, const struct regstat *i_regs, u_int reglist)
2040{
2041 save_regs_all(reglist);
2042 cop2_do_stall_check(op, i, i_regs, 0);
2043#ifdef PCNT
2044 emit_movimm(op, 0);
2045 emit_far_call(pcnt_gte_start);
2046#endif
2047 emit_addimm(FP, (u_char *)&psxRegs.CP2D.r[0] - (u_char *)&dynarec_local, 0); // cop2 regs
2048}
2049
2050static void c2op_epilogue(u_int op,u_int reglist)
2051{
2052#ifdef PCNT
2053 emit_movimm(op,0);
2054 emit_far_call(pcnt_gte_end);
2055#endif
2056 restore_regs_all(reglist);
2057}
2058
2059static void c2op_call_MACtoIR(int lm,int need_flags)
2060{
2061 if(need_flags)
2062 emit_far_call(lm?gteMACtoIR_lm1:gteMACtoIR_lm0);
2063 else
2064 emit_far_call(lm?gteMACtoIR_lm1_nf:gteMACtoIR_lm0_nf);
2065}
2066
2067static void c2op_call_rgb_func(void *func,int lm,int need_ir,int need_flags)
2068{
2069 emit_far_call(func);
2070 // func is C code and trashes r0
2071 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
2072 if(need_flags||need_ir)
2073 c2op_call_MACtoIR(lm,need_flags);
2074 emit_far_call(need_flags?gteMACtoRGB:gteMACtoRGB_nf);
2075}
2076
2077static void c2op_assemble(int i, const struct regstat *i_regs)
2078{
2079 u_int c2op = source[i] & 0x3f;
2080 u_int reglist_full = get_host_reglist(i_regs->regmap);
2081 u_int reglist = reglist_full & CALLER_SAVE_REGS;
2082 int need_flags, need_ir;
2083
2084 if (gte_handlers[c2op]!=NULL) {
2085 need_flags=!(gte_unneeded[i+1]>>63); // +1 because of how liveness detection works
2086 need_ir=(gte_unneeded[i+1]&0xe00)!=0xe00;
2087 assem_debug("gte op %08x, unneeded %016llx, need_flags %d, need_ir %d\n",
2088 source[i],gte_unneeded[i+1],need_flags,need_ir);
2089 if(HACK_ENABLED(NDHACK_GTE_NO_FLAGS))
2090 need_flags=0;
2091 int shift = (source[i] >> 19) & 1;
2092 int lm = (source[i] >> 10) & 1;
2093 switch(c2op) {
2094#ifndef DRC_DBG
2095 case GTE_MVMVA: {
2096#ifdef HAVE_ARMV5
2097 int v = (source[i] >> 15) & 3;
2098 int cv = (source[i] >> 13) & 3;
2099 int mx = (source[i] >> 17) & 3;
2100 reglist=reglist_full&(CALLER_SAVE_REGS|0xf0); // +{r4-r7}
2101 c2op_prologue(c2op,i,i_regs,reglist);
2102 /* r4,r5 = VXYZ(v) packed; r6 = &MX11(mx); r7 = &CV1(cv) */
2103 if(v<3)
2104 emit_ldrd(v*8,0,4);
2105 else {
2106 emit_movzwl_indexed(9*4,0,4); // gteIR
2107 emit_movzwl_indexed(10*4,0,6);
2108 emit_movzwl_indexed(11*4,0,5);
2109 emit_orrshl_imm(6,16,4);
2110 }
2111 if(mx<3)
2112 emit_addimm(0,32*4+mx*8*4,6);
2113 else
2114 emit_readword(&zeromem_ptr,6);
2115 if(cv<3)
2116 emit_addimm(0,32*4+(cv*8+5)*4,7);
2117 else
2118 emit_readword(&zeromem_ptr,7);
2119#ifdef __ARM_NEON__
2120 emit_movimm(source[i],1); // opcode
2121 emit_far_call(gteMVMVA_part_neon);
2122 if(need_flags) {
2123 emit_movimm(lm,1);
2124 emit_far_call(gteMACtoIR_flags_neon);
2125 }
2126#else
2127 if(cv==3&&shift)
2128 emit_far_call((int)gteMVMVA_part_cv3sh12_arm);
2129 else {
2130 emit_movimm(shift,1);
2131 emit_far_call((int)(need_flags?gteMVMVA_part_arm:gteMVMVA_part_nf_arm));
2132 }
2133 if(need_flags||need_ir)
2134 c2op_call_MACtoIR(lm,need_flags);
2135#endif
2136#else /* if not HAVE_ARMV5 */
2137 c2op_prologue(c2op,i,i_regs,reglist);
2138 emit_movimm(source[i],1); // opcode
2139 emit_writeword(1,&psxRegs.code);
2140 emit_far_call(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]);
2141#endif
2142 break;
2143 }
2144 case GTE_OP:
2145 c2op_prologue(c2op,i,i_regs,reglist);
2146 emit_far_call(shift?gteOP_part_shift:gteOP_part_noshift);
2147 if(need_flags||need_ir) {
2148 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
2149 c2op_call_MACtoIR(lm,need_flags);
2150 }
2151 break;
2152 case GTE_DPCS:
2153 c2op_prologue(c2op,i,i_regs,reglist);
2154 c2op_call_rgb_func(shift?gteDPCS_part_shift:gteDPCS_part_noshift,lm,need_ir,need_flags);
2155 break;
2156 case GTE_INTPL:
2157 c2op_prologue(c2op,i,i_regs,reglist);
2158 c2op_call_rgb_func(shift?gteINTPL_part_shift:gteINTPL_part_noshift,lm,need_ir,need_flags);
2159 break;
2160 case GTE_SQR:
2161 c2op_prologue(c2op,i,i_regs,reglist);
2162 emit_far_call(shift?gteSQR_part_shift:gteSQR_part_noshift);
2163 if(need_flags||need_ir) {
2164 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
2165 c2op_call_MACtoIR(lm,need_flags);
2166 }
2167 break;
2168 case GTE_DCPL:
2169 c2op_prologue(c2op,i,i_regs,reglist);
2170 c2op_call_rgb_func(gteDCPL_part,lm,need_ir,need_flags);
2171 break;
2172 case GTE_GPF:
2173 c2op_prologue(c2op,i,i_regs,reglist);
2174 c2op_call_rgb_func(shift?gteGPF_part_shift:gteGPF_part_noshift,lm,need_ir,need_flags);
2175 break;
2176 case GTE_GPL:
2177 c2op_prologue(c2op,i,i_regs,reglist);
2178 c2op_call_rgb_func(shift?gteGPL_part_shift:gteGPL_part_noshift,lm,need_ir,need_flags);
2179 break;
2180#endif
2181 default:
2182 c2op_prologue(c2op,i,i_regs,reglist);
2183#ifdef DRC_DBG
2184 emit_movimm(source[i],1); // opcode
2185 emit_writeword(1,&psxRegs.code);
2186#endif
2187 emit_far_call(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]);
2188 break;
2189 }
2190 c2op_epilogue(c2op,reglist);
2191 }
2192}
2193
2194static void c2op_ctc2_31_assemble(signed char sl, signed char temp)
2195{
2196 //value = value & 0x7ffff000;
2197 //if (value & 0x7f87e000) value |= 0x80000000;
2198 emit_shrimm(sl,12,temp);
2199 emit_shlimm(temp,12,temp);
2200 emit_testimm(temp,0x7f000000);
2201 emit_testeqimm(temp,0x00870000);
2202 emit_testeqimm(temp,0x0000e000);
2203 emit_orrne_imm(temp,0x80000000,temp);
2204}
2205
2206static void do_mfc2_31_one(u_int copr,signed char temp)
2207{
2208 emit_readword(&reg_cop2d[copr],temp);
2209 emit_testimm(temp,0x8000); // do we need this?
2210 emit_andne_imm(temp,0,temp);
2211 emit_cmpimm(temp,0xf80);
2212 emit_andimm(temp,0xf80,temp);
2213 emit_cmovae_imm(0xf80,temp);
2214}
2215
2216static void c2op_mfc2_29_assemble(signed char tl, signed char temp)
2217{
2218 if (temp < 0) {
2219 host_tempreg_acquire();
2220 temp = HOST_TEMPREG;
2221 }
2222 do_mfc2_31_one(9,temp);
2223 emit_shrimm(temp,7,tl);
2224 do_mfc2_31_one(10,temp);
2225 emit_orrshr_imm(temp,2,tl);
2226 do_mfc2_31_one(11,temp);
2227 emit_orrshl_imm(temp,3,tl);
2228 emit_writeword(tl,&reg_cop2d[29]);
2229 if (temp == HOST_TEMPREG)
2230 host_tempreg_release();
2231}
2232
2233static void multdiv_assemble_arm(int i,struct regstat *i_regs)
2234{
2235 // case 0x18: MULT
2236 // case 0x19: MULTU
2237 // case 0x1A: DIV
2238 // case 0x1B: DIVU
2239 // case 0x1C: DMULT
2240 // case 0x1D: DMULTU
2241 // case 0x1E: DDIV
2242 // case 0x1F: DDIVU
2243 if(dops[i].rs1&&dops[i].rs2)
2244 {
2245 if((dops[i].opcode2&4)==0) // 32-bit
2246 {
2247 if(dops[i].opcode2==0x18) // MULT
2248 {
2249 signed char m1=get_reg(i_regs->regmap,dops[i].rs1);
2250 signed char m2=get_reg(i_regs->regmap,dops[i].rs2);
2251 signed char hi=get_reg(i_regs->regmap,HIREG);
2252 signed char lo=get_reg(i_regs->regmap,LOREG);
2253 assert(m1>=0);
2254 assert(m2>=0);
2255 assert(hi>=0);
2256 assert(lo>=0);
2257 emit_smull(m1,m2,hi,lo);
2258 }
2259 if(dops[i].opcode2==0x19) // MULTU
2260 {
2261 signed char m1=get_reg(i_regs->regmap,dops[i].rs1);
2262 signed char m2=get_reg(i_regs->regmap,dops[i].rs2);
2263 signed char hi=get_reg(i_regs->regmap,HIREG);
2264 signed char lo=get_reg(i_regs->regmap,LOREG);
2265 assert(m1>=0);
2266 assert(m2>=0);
2267 assert(hi>=0);
2268 assert(lo>=0);
2269 emit_umull(m1,m2,hi,lo);
2270 }
2271 if(dops[i].opcode2==0x1A) // DIV
2272 {
2273 signed char d1=get_reg(i_regs->regmap,dops[i].rs1);
2274 signed char d2=get_reg(i_regs->regmap,dops[i].rs2);
2275 assert(d1>=0);
2276 assert(d2>=0);
2277 signed char quotient=get_reg(i_regs->regmap,LOREG);
2278 signed char remainder=get_reg(i_regs->regmap,HIREG);
2279 assert(quotient>=0);
2280 assert(remainder>=0);
2281 emit_movs(d1,remainder);
2282 emit_movimm(0xffffffff,quotient);
2283 emit_negmi(quotient,quotient); // .. quotient and ..
2284 emit_negmi(remainder,remainder); // .. remainder for div0 case (will be negated back after jump)
2285 emit_movs(d2,HOST_TEMPREG);
2286 emit_jeq(out+52); // Division by zero
2287 emit_negsmi(HOST_TEMPREG,HOST_TEMPREG);
2288#ifdef HAVE_ARMV5
2289 emit_clz(HOST_TEMPREG,quotient);
2290 emit_shl(HOST_TEMPREG,quotient,HOST_TEMPREG);
2291#else
2292 emit_movimm(0,quotient);
2293 emit_addpl_imm(quotient,1,quotient);
2294 emit_lslpls_imm(HOST_TEMPREG,1,HOST_TEMPREG);
2295 emit_jns(out-2*4);
2296#endif
2297 emit_orimm(quotient,1<<31,quotient);
2298 emit_shr(quotient,quotient,quotient);
2299 emit_cmp(remainder,HOST_TEMPREG);
2300 emit_subcs(remainder,HOST_TEMPREG,remainder);
2301 emit_adcs(quotient,quotient,quotient);
2302 emit_shrimm(HOST_TEMPREG,1,HOST_TEMPREG);
2303 emit_jcc(out-16); // -4
2304 emit_teq(d1,d2);
2305 emit_negmi(quotient,quotient);
2306 emit_test(d1,d1);
2307 emit_negmi(remainder,remainder);
2308 }
2309 if(dops[i].opcode2==0x1B) // DIVU
2310 {
2311 signed char d1=get_reg(i_regs->regmap,dops[i].rs1); // dividend
2312 signed char d2=get_reg(i_regs->regmap,dops[i].rs2); // divisor
2313 assert(d1>=0);
2314 assert(d2>=0);
2315 signed char quotient=get_reg(i_regs->regmap,LOREG);
2316 signed char remainder=get_reg(i_regs->regmap,HIREG);
2317 assert(quotient>=0);
2318 assert(remainder>=0);
2319 emit_mov(d1,remainder);
2320 emit_movimm(0xffffffff,quotient); // div0 case
2321 emit_test(d2,d2);
2322 emit_jeq(out+40); // Division by zero
2323#ifdef HAVE_ARMV5
2324 emit_clz(d2,HOST_TEMPREG);
2325 emit_movimm(1<<31,quotient);
2326 emit_shl(d2,HOST_TEMPREG,d2);
2327#else
2328 emit_movimm(0,HOST_TEMPREG);
2329 emit_addpl_imm(HOST_TEMPREG,1,HOST_TEMPREG);
2330 emit_lslpls_imm(d2,1,d2);
2331 emit_jns(out-2*4);
2332 emit_movimm(1<<31,quotient);
2333#endif
2334 emit_shr(quotient,HOST_TEMPREG,quotient);
2335 emit_cmp(remainder,d2);
2336 emit_subcs(remainder,d2,remainder);
2337 emit_adcs(quotient,quotient,quotient);
2338 emit_shrcc_imm(d2,1,d2);
2339 emit_jcc(out-16); // -4
2340 }
2341 }
2342 else // 64-bit
2343 assert(0);
2344 }
2345 else
2346 {
2347 // Multiply by zero is zero.
2348 // MIPS does not have a divide by zero exception.
2349 // The result is undefined, we return zero.
2350 signed char hr=get_reg(i_regs->regmap,HIREG);
2351 signed char lr=get_reg(i_regs->regmap,LOREG);
2352 if(hr>=0) emit_zeroreg(hr);
2353 if(lr>=0) emit_zeroreg(lr);
2354 }
2355}
2356#define multdiv_assemble multdiv_assemble_arm
2357
2358static void do_jump_vaddr(int rs)
2359{
2360 emit_far_jump(jump_vaddr_reg[rs]);
2361}
2362
2363static void do_preload_rhash(int r) {
2364 // Don't need this for ARM. On x86, this puts the value 0xf8 into the
2365 // register. On ARM the hash can be done with a single instruction (below)
2366}
2367
2368static void do_preload_rhtbl(int ht) {
2369 emit_addimm(FP,(int)&mini_ht-(int)&dynarec_local,ht);
2370}
2371
2372static void do_rhash(int rs,int rh) {
2373 emit_andimm(rs,0xf8,rh);
2374}
2375
2376static void do_miniht_load(int ht,int rh) {
2377 assem_debug("ldr %s,[%s,%s]!\n",regname[rh],regname[ht],regname[rh]);
2378 output_w32(0xe7b00000|rd_rn_rm(rh,ht,rh));
2379}
2380
2381static void do_miniht_jump(int rs,int rh,int ht) {
2382 emit_cmp(rh,rs);
2383 emit_ldreq_indexed(ht,4,15);
2384 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
2385 if(rs!=7)
2386 emit_mov(rs,7);
2387 rs=7;
2388 #endif
2389 do_jump_vaddr(rs);
2390}
2391
2392static void do_miniht_insert(u_int return_address,int rt,int temp) {
2393 #ifndef HAVE_ARMV7
2394 emit_movimm(return_address,rt); // PC into link register
2395 add_to_linker(out,return_address,1);
2396 emit_pcreladdr(temp);
2397 emit_writeword(rt,&mini_ht[(return_address&0xFF)>>3][0]);
2398 emit_writeword(temp,&mini_ht[(return_address&0xFF)>>3][1]);
2399 #else
2400 emit_movw(return_address&0x0000FFFF,rt);
2401 add_to_linker(out,return_address,1);
2402 emit_pcreladdr(temp);
2403 emit_writeword(temp,&mini_ht[(return_address&0xFF)>>3][1]);
2404 emit_movt(return_address&0xFFFF0000,rt);
2405 emit_writeword(rt,&mini_ht[(return_address&0xFF)>>3][0]);
2406 #endif
2407}
2408
2409// CPU-architecture-specific initialization
2410static void arch_init(void)
2411{
2412 uintptr_t diff = (u_char *)&ndrc->tramp.f - (u_char *)&ndrc->tramp.ops - 8;
2413 struct tramp_insns *ops = ndrc->tramp.ops;
2414 size_t i;
2415 assert(!(diff & 3));
2416 assert(diff < 0x1000);
2417 start_tcache_write(ops, (u_char *)ops + sizeof(ndrc->tramp.ops));
2418 for (i = 0; i < ARRAY_SIZE(ndrc->tramp.ops); i++)
2419 ops[i].ldrpc = 0xe5900000 | rd_rn_rm(15,15,0) | diff; // ldr pc, [=val]
2420 end_tcache_write(ops, (u_char *)ops + sizeof(ndrc->tramp.ops));
2421}
2422
2423// vim:shiftwidth=2:expandtab