drc: add a hack for f1 games
[pcsx_rearmed.git] / libpcsxcore / new_dynarec / assem_arm.c
... / ...
CommitLineData
1/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2 * Mupen64plus/PCSX - assem_arm.c *
3 * Copyright (C) 2009-2011 Ari64 *
4 * Copyright (C) 2010-2021 GraÅžvydas "notaz" Ignotas *
5 * *
6 * This program is free software; you can redistribute it and/or modify *
7 * it under the terms of the GNU General Public License as published by *
8 * the Free Software Foundation; either version 2 of the License, or *
9 * (at your option) any later version. *
10 * *
11 * This program is distributed in the hope that it will be useful, *
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
14 * GNU General Public License for more details. *
15 * *
16 * You should have received a copy of the GNU General Public License *
17 * along with this program; if not, write to the *
18 * Free Software Foundation, Inc., *
19 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
20 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
21
22#define FLAGLESS
23#include "../gte.h"
24#undef FLAGLESS
25#include "../gte_arm.h"
26#include "../gte_neon.h"
27#include "pcnt.h"
28#include "arm_features.h"
29
30#ifndef __MACH__
31#define CALLER_SAVE_REGS 0x100f
32#else
33#define CALLER_SAVE_REGS 0x120f
34#endif
35
36#define unused __attribute__((unused))
37
38#ifdef DRC_DBG
39#pragma GCC diagnostic ignored "-Wunused-function"
40#pragma GCC diagnostic ignored "-Wunused-variable"
41#pragma GCC diagnostic ignored "-Wunused-but-set-variable"
42#endif
43
44void indirect_jump_indexed();
45void indirect_jump();
46void do_interrupt();
47void jump_vaddr_r0();
48void jump_vaddr_r1();
49void jump_vaddr_r2();
50void jump_vaddr_r3();
51void jump_vaddr_r4();
52void jump_vaddr_r5();
53void jump_vaddr_r6();
54void jump_vaddr_r7();
55void jump_vaddr_r8();
56void jump_vaddr_r9();
57void jump_vaddr_r10();
58void jump_vaddr_r12();
59
60void * const jump_vaddr_reg[16] = {
61 jump_vaddr_r0,
62 jump_vaddr_r1,
63 jump_vaddr_r2,
64 jump_vaddr_r3,
65 jump_vaddr_r4,
66 jump_vaddr_r5,
67 jump_vaddr_r6,
68 jump_vaddr_r7,
69 jump_vaddr_r8,
70 jump_vaddr_r9,
71 jump_vaddr_r10,
72 0,
73 jump_vaddr_r12,
74 0,
75 0,
76 0
77};
78
79void invalidate_addr_r0();
80void invalidate_addr_r1();
81void invalidate_addr_r2();
82void invalidate_addr_r3();
83void invalidate_addr_r4();
84void invalidate_addr_r5();
85void invalidate_addr_r6();
86void invalidate_addr_r7();
87void invalidate_addr_r8();
88void invalidate_addr_r9();
89void invalidate_addr_r10();
90void invalidate_addr_r12();
91
92const u_int invalidate_addr_reg[16] = {
93 (int)invalidate_addr_r0,
94 (int)invalidate_addr_r1,
95 (int)invalidate_addr_r2,
96 (int)invalidate_addr_r3,
97 (int)invalidate_addr_r4,
98 (int)invalidate_addr_r5,
99 (int)invalidate_addr_r6,
100 (int)invalidate_addr_r7,
101 (int)invalidate_addr_r8,
102 (int)invalidate_addr_r9,
103 (int)invalidate_addr_r10,
104 0,
105 (int)invalidate_addr_r12,
106 0,
107 0,
108 0};
109
110static u_int needs_clear_cache[1<<(TARGET_SIZE_2-17)];
111
112/* Linker */
113
114static void set_jump_target(void *addr, void *target_)
115{
116 u_int target = (u_int)target_;
117 u_char *ptr = addr;
118 u_int *ptr2=(u_int *)ptr;
119 if(ptr[3]==0xe2) {
120 assert((target-(u_int)ptr2-8)<1024);
121 assert(((uintptr_t)addr&3)==0);
122 assert((target&3)==0);
123 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
124 //printf("target=%x addr=%p insn=%x\n",target,addr,*ptr2);
125 }
126 else if(ptr[3]==0x72) {
127 // generated by emit_jno_unlikely
128 if((target-(u_int)ptr2-8)<1024) {
129 assert(((uintptr_t)addr&3)==0);
130 assert((target&3)==0);
131 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
132 }
133 else if((target-(u_int)ptr2-8)<4096&&!((target-(u_int)ptr2-8)&15)) {
134 assert(((uintptr_t)addr&3)==0);
135 assert((target&3)==0);
136 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>4)|0xE00;
137 }
138 else *ptr2=(0x7A000000)|(((target-(u_int)ptr2-8)<<6)>>8);
139 }
140 else {
141 assert((ptr[3]&0x0e)==0xa);
142 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
143 }
144}
145
146// This optionally copies the instruction from the target of the branch into
147// the space before the branch. Works, but the difference in speed is
148// usually insignificant.
149#if 0
150static void set_jump_target_fillslot(int addr,u_int target,int copy)
151{
152 u_char *ptr=(u_char *)addr;
153 u_int *ptr2=(u_int *)ptr;
154 assert(!copy||ptr2[-1]==0xe28dd000);
155 if(ptr[3]==0xe2) {
156 assert(!copy);
157 assert((target-(u_int)ptr2-8)<4096);
158 *ptr2=(*ptr2&0xFFFFF000)|(target-(u_int)ptr2-8);
159 }
160 else {
161 assert((ptr[3]&0x0e)==0xa);
162 u_int target_insn=*(u_int *)target;
163 if((target_insn&0x0e100000)==0) { // ALU, no immediate, no flags
164 copy=0;
165 }
166 if((target_insn&0x0c100000)==0x04100000) { // Load
167 copy=0;
168 }
169 if(target_insn&0x08000000) {
170 copy=0;
171 }
172 if(copy) {
173 ptr2[-1]=target_insn;
174 target+=4;
175 }
176 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
177 }
178}
179#endif
180
181/* Literal pool */
182static void add_literal(int addr,int val)
183{
184 assert(literalcount<sizeof(literals)/sizeof(literals[0]));
185 literals[literalcount][0]=addr;
186 literals[literalcount][1]=val;
187 literalcount++;
188}
189
190// from a pointer to external jump stub (which was produced by emit_extjump2)
191// find where the jumping insn is
192static void *find_extjump_insn(void *stub)
193{
194 int *ptr=(int *)(stub+4);
195 assert((*ptr&0x0fff0000)==0x059f0000); // ldr rx, [pc, #ofs]
196 u_int offset=*ptr&0xfff;
197 void **l_ptr=(void *)ptr+offset+8;
198 return *l_ptr;
199}
200
201// find where external branch is liked to using addr of it's stub:
202// get address that insn one after stub loads (dyna_linker arg1),
203// treat it as a pointer to branch insn,
204// return addr where that branch jumps to
205static void *get_pointer(void *stub)
206{
207 //printf("get_pointer(%x)\n",(int)stub);
208 int *i_ptr=find_extjump_insn(stub);
209 assert((*i_ptr&0x0f000000)==0x0a000000); // b
210 return (u_char *)i_ptr+((*i_ptr<<8)>>6)+8;
211}
212
213// Find the "clean" entry point from a "dirty" entry point
214// by skipping past the call to verify_code
215static void *get_clean_addr(void *addr)
216{
217 signed int *ptr = addr;
218 #ifndef HAVE_ARMV7
219 ptr+=4;
220 #else
221 ptr+=6;
222 #endif
223 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
224 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
225 ptr++;
226 if((*ptr&0xFF000000)==0xea000000) {
227 return (char *)ptr+((*ptr<<8)>>6)+8; // follow jump
228 }
229 return ptr;
230}
231
232static int verify_dirty(const u_int *ptr)
233{
234 #ifndef HAVE_ARMV7
235 u_int offset;
236 // get from literal pool
237 assert((*ptr&0xFFFF0000)==0xe59f0000);
238 offset=*ptr&0xfff;
239 u_int source=*(u_int*)((void *)ptr+offset+8);
240 ptr++;
241 assert((*ptr&0xFFFF0000)==0xe59f0000);
242 offset=*ptr&0xfff;
243 u_int copy=*(u_int*)((void *)ptr+offset+8);
244 ptr++;
245 assert((*ptr&0xFFFF0000)==0xe59f0000);
246 offset=*ptr&0xfff;
247 u_int len=*(u_int*)((void *)ptr+offset+8);
248 ptr++;
249 ptr++;
250 #else
251 // ARMv7 movw/movt
252 assert((*ptr&0xFFF00000)==0xe3000000);
253 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
254 u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
255 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
256 ptr+=6;
257 #endif
258 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
259 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
260 //printf("verify_dirty: %x %x %x\n",source,copy,len);
261 return !memcmp((void *)source,(void *)copy,len);
262}
263
264// This doesn't necessarily find all clean entry points, just
265// guarantees that it's not dirty
266static int isclean(void *addr)
267{
268 #ifndef HAVE_ARMV7
269 u_int *ptr=((u_int *)addr)+4;
270 #else
271 u_int *ptr=((u_int *)addr)+6;
272 #endif
273 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
274 if((*ptr&0xFF000000)!=0xeb000000) return 1; // bl instruction
275 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code) return 0;
276 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_ds) return 0;
277 return 1;
278}
279
280// get source that block at addr was compiled from (host pointers)
281static void get_bounds(void *addr, u_char **start, u_char **end)
282{
283 u_int *ptr = addr;
284 #ifndef HAVE_ARMV7
285 u_int offset;
286 // get from literal pool
287 assert((*ptr&0xFFFF0000)==0xe59f0000);
288 offset=*ptr&0xfff;
289 u_int source=*(u_int*)((void *)ptr+offset+8);
290 ptr++;
291 //assert((*ptr&0xFFFF0000)==0xe59f0000);
292 //offset=*ptr&0xfff;
293 //u_int copy=*(u_int*)((void *)ptr+offset+8);
294 ptr++;
295 assert((*ptr&0xFFFF0000)==0xe59f0000);
296 offset=*ptr&0xfff;
297 u_int len=*(u_int*)((void *)ptr+offset+8);
298 ptr++;
299 ptr++;
300 #else
301 // ARMv7 movw/movt
302 assert((*ptr&0xFFF00000)==0xe3000000);
303 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
304 //u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
305 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
306 ptr+=6;
307 #endif
308 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
309 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
310 *start=(u_char *)source;
311 *end=(u_char *)source+len;
312}
313
314// Allocate a specific ARM register.
315static void alloc_arm_reg(struct regstat *cur,int i,signed char reg,int hr)
316{
317 int n;
318 int dirty=0;
319
320 // see if it's already allocated (and dealloc it)
321 for(n=0;n<HOST_REGS;n++)
322 {
323 if(n!=EXCLUDE_REG&&cur->regmap[n]==reg) {
324 dirty=(cur->dirty>>n)&1;
325 cur->regmap[n]=-1;
326 }
327 }
328
329 cur->regmap[hr]=reg;
330 cur->dirty&=~(1<<hr);
331 cur->dirty|=dirty<<hr;
332 cur->isconst&=~(1<<hr);
333}
334
335// Alloc cycle count into dedicated register
336static void alloc_cc(struct regstat *cur,int i)
337{
338 alloc_arm_reg(cur,i,CCREG,HOST_CCREG);
339}
340
341/* Assembler */
342
343static unused char regname[16][4] = {
344 "r0",
345 "r1",
346 "r2",
347 "r3",
348 "r4",
349 "r5",
350 "r6",
351 "r7",
352 "r8",
353 "r9",
354 "r10",
355 "fp",
356 "r12",
357 "sp",
358 "lr",
359 "pc"};
360
361static void output_w32(u_int word)
362{
363 *((u_int *)out)=word;
364 out+=4;
365}
366
367static u_int rd_rn_rm(u_int rd, u_int rn, u_int rm)
368{
369 assert(rd<16);
370 assert(rn<16);
371 assert(rm<16);
372 return((rn<<16)|(rd<<12)|rm);
373}
374
375static u_int rd_rn_imm_shift(u_int rd, u_int rn, u_int imm, u_int shift)
376{
377 assert(rd<16);
378 assert(rn<16);
379 assert(imm<256);
380 assert((shift&1)==0);
381 return((rn<<16)|(rd<<12)|(((32-shift)&30)<<7)|imm);
382}
383
384static u_int genimm(u_int imm,u_int *encoded)
385{
386 *encoded=0;
387 if(imm==0) return 1;
388 int i=32;
389 while(i>0)
390 {
391 if(imm<256) {
392 *encoded=((i&30)<<7)|imm;
393 return 1;
394 }
395 imm=(imm>>2)|(imm<<30);i-=2;
396 }
397 return 0;
398}
399
400static void genimm_checked(u_int imm,u_int *encoded)
401{
402 u_int ret=genimm(imm,encoded);
403 assert(ret);
404 (void)ret;
405}
406
407static u_int genjmp(u_int addr)
408{
409 if (addr < 3) return 0; // a branch that will be patched later
410 int offset = addr-(int)out-8;
411 if (offset < -33554432 || offset >= 33554432) {
412 SysPrintf("genjmp: out of range: %08x\n", offset);
413 abort();
414 return 0;
415 }
416 return ((u_int)offset>>2)&0xffffff;
417}
418
419static unused void emit_breakpoint(void)
420{
421 assem_debug("bkpt #0\n");
422 //output_w32(0xe1200070);
423 output_w32(0xe7f001f0);
424}
425
426static void emit_mov(int rs,int rt)
427{
428 assem_debug("mov %s,%s\n",regname[rt],regname[rs]);
429 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs));
430}
431
432static void emit_movs(int rs,int rt)
433{
434 assem_debug("movs %s,%s\n",regname[rt],regname[rs]);
435 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs));
436}
437
438static void emit_add(int rs1,int rs2,int rt)
439{
440 assem_debug("add %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
441 output_w32(0xe0800000|rd_rn_rm(rt,rs1,rs2));
442}
443
444static void emit_adds(int rs1,int rs2,int rt)
445{
446 assem_debug("adds %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
447 output_w32(0xe0900000|rd_rn_rm(rt,rs1,rs2));
448}
449#define emit_adds_ptr emit_adds
450
451static void emit_adcs(int rs1,int rs2,int rt)
452{
453 assem_debug("adcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
454 output_w32(0xe0b00000|rd_rn_rm(rt,rs1,rs2));
455}
456
457static void emit_neg(int rs, int rt)
458{
459 assem_debug("rsb %s,%s,#0\n",regname[rt],regname[rs]);
460 output_w32(0xe2600000|rd_rn_rm(rt,rs,0));
461}
462
463static void emit_sub(int rs1,int rs2,int rt)
464{
465 assem_debug("sub %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
466 output_w32(0xe0400000|rd_rn_rm(rt,rs1,rs2));
467}
468
469static void emit_zeroreg(int rt)
470{
471 assem_debug("mov %s,#0\n",regname[rt]);
472 output_w32(0xe3a00000|rd_rn_rm(rt,0,0));
473}
474
475static void emit_loadlp(u_int imm,u_int rt)
476{
477 add_literal((int)out,imm);
478 assem_debug("ldr %s,pc+? [=%x]\n",regname[rt],imm);
479 output_w32(0xe5900000|rd_rn_rm(rt,15,0));
480}
481
482static void emit_movw(u_int imm,u_int rt)
483{
484 assert(imm<65536);
485 assem_debug("movw %s,#%d (0x%x)\n",regname[rt],imm,imm);
486 output_w32(0xe3000000|rd_rn_rm(rt,0,0)|(imm&0xfff)|((imm<<4)&0xf0000));
487}
488
489static void emit_movt(u_int imm,u_int rt)
490{
491 assem_debug("movt %s,#%d (0x%x)\n",regname[rt],imm&0xffff0000,imm&0xffff0000);
492 output_w32(0xe3400000|rd_rn_rm(rt,0,0)|((imm>>16)&0xfff)|((imm>>12)&0xf0000));
493}
494
495static void emit_movimm(u_int imm,u_int rt)
496{
497 u_int armval;
498 if(genimm(imm,&armval)) {
499 assem_debug("mov %s,#%d\n",regname[rt],imm);
500 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
501 }else if(genimm(~imm,&armval)) {
502 assem_debug("mvn %s,#%d\n",regname[rt],imm);
503 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
504 }else if(imm<65536) {
505 #ifndef HAVE_ARMV7
506 assem_debug("mov %s,#%d\n",regname[rt],imm&0xFF00);
507 output_w32(0xe3a00000|rd_rn_imm_shift(rt,0,imm>>8,8));
508 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
509 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
510 #else
511 emit_movw(imm,rt);
512 #endif
513 }else{
514 #ifndef HAVE_ARMV7
515 emit_loadlp(imm,rt);
516 #else
517 emit_movw(imm&0x0000FFFF,rt);
518 emit_movt(imm&0xFFFF0000,rt);
519 #endif
520 }
521}
522
523static void emit_pcreladdr(u_int rt)
524{
525 assem_debug("add %s,pc,#?\n",regname[rt]);
526 output_w32(0xe2800000|rd_rn_rm(rt,15,0));
527}
528
529static void emit_loadreg(int r, int hr)
530{
531 if(r&64) {
532 SysPrintf("64bit load in 32bit mode!\n");
533 assert(0);
534 return;
535 }
536 if((r&63)==0)
537 emit_zeroreg(hr);
538 else {
539 int addr = (int)&psxRegs.GPR.r[r];
540 switch (r) {
541 //case HIREG: addr = &hi; break;
542 //case LOREG: addr = &lo; break;
543 case CCREG: addr = (int)&cycle_count; break;
544 case CSREG: addr = (int)&Status; break;
545 case INVCP: addr = (int)&invc_ptr; break;
546 default: assert(r < 34); break;
547 }
548 u_int offset = addr-(u_int)&dynarec_local;
549 assert(offset<4096);
550 assem_debug("ldr %s,fp+%d\n",regname[hr],offset);
551 output_w32(0xe5900000|rd_rn_rm(hr,FP,0)|offset);
552 }
553}
554
555static void emit_storereg(int r, int hr)
556{
557 if(r&64) {
558 SysPrintf("64bit store in 32bit mode!\n");
559 assert(0);
560 return;
561 }
562 int addr = (int)&psxRegs.GPR.r[r];
563 switch (r) {
564 //case HIREG: addr = &hi; break;
565 //case LOREG: addr = &lo; break;
566 case CCREG: addr = (int)&cycle_count; break;
567 default: assert(r < 34); break;
568 }
569 u_int offset = addr-(u_int)&dynarec_local;
570 assert(offset<4096);
571 assem_debug("str %s,fp+%d\n",regname[hr],offset);
572 output_w32(0xe5800000|rd_rn_rm(hr,FP,0)|offset);
573}
574
575static void emit_test(int rs, int rt)
576{
577 assem_debug("tst %s,%s\n",regname[rs],regname[rt]);
578 output_w32(0xe1100000|rd_rn_rm(0,rs,rt));
579}
580
581static void emit_testimm(int rs,int imm)
582{
583 u_int armval;
584 assem_debug("tst %s,#%d\n",regname[rs],imm);
585 genimm_checked(imm,&armval);
586 output_w32(0xe3100000|rd_rn_rm(0,rs,0)|armval);
587}
588
589static void emit_testeqimm(int rs,int imm)
590{
591 u_int armval;
592 assem_debug("tsteq %s,$%d\n",regname[rs],imm);
593 genimm_checked(imm,&armval);
594 output_w32(0x03100000|rd_rn_rm(0,rs,0)|armval);
595}
596
597static void emit_not(int rs,int rt)
598{
599 assem_debug("mvn %s,%s\n",regname[rt],regname[rs]);
600 output_w32(0xe1e00000|rd_rn_rm(rt,0,rs));
601}
602
603static void emit_and(u_int rs1,u_int rs2,u_int rt)
604{
605 assem_debug("and %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
606 output_w32(0xe0000000|rd_rn_rm(rt,rs1,rs2));
607}
608
609static void emit_or(u_int rs1,u_int rs2,u_int rt)
610{
611 assem_debug("orr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
612 output_w32(0xe1800000|rd_rn_rm(rt,rs1,rs2));
613}
614
615static void emit_orrshl_imm(u_int rs,u_int imm,u_int rt)
616{
617 assert(rs<16);
618 assert(rt<16);
619 assert(imm<32);
620 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs],imm);
621 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|(imm<<7));
622}
623
624static void emit_orrshr_imm(u_int rs,u_int imm,u_int rt)
625{
626 assert(rs<16);
627 assert(rt<16);
628 assert(imm<32);
629 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs],imm);
630 output_w32(0xe1800020|rd_rn_rm(rt,rt,rs)|(imm<<7));
631}
632
633static void emit_xor(u_int rs1,u_int rs2,u_int rt)
634{
635 assem_debug("eor %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
636 output_w32(0xe0200000|rd_rn_rm(rt,rs1,rs2));
637}
638
639static void emit_xorsar_imm(u_int rs1,u_int rs2,u_int imm,u_int rt)
640{
641 assem_debug("eor %s,%s,%s,asr #%d\n",regname[rt],regname[rs1],regname[rs2],imm);
642 output_w32(0xe0200040|rd_rn_rm(rt,rs1,rs2)|(imm<<7));
643}
644
645static void emit_addimm(u_int rs,int imm,u_int rt)
646{
647 assert(rs<16);
648 assert(rt<16);
649 if(imm!=0) {
650 u_int armval;
651 if(genimm(imm,&armval)) {
652 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],imm);
653 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
654 }else if(genimm(-imm,&armval)) {
655 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],-imm);
656 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
657 #ifdef HAVE_ARMV7
658 }else if(rt!=rs&&(u_int)imm<65536) {
659 emit_movw(imm&0x0000ffff,rt);
660 emit_add(rs,rt,rt);
661 }else if(rt!=rs&&(u_int)-imm<65536) {
662 emit_movw(-imm&0x0000ffff,rt);
663 emit_sub(rs,rt,rt);
664 #endif
665 }else if((u_int)-imm<65536) {
666 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],(-imm)&0xFF00);
667 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
668 output_w32(0xe2400000|rd_rn_imm_shift(rt,rs,(-imm)>>8,8));
669 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
670 }else {
671 do {
672 int shift = (ffs(imm) - 1) & ~1;
673 int imm8 = imm & (0xff << shift);
674 genimm_checked(imm8,&armval);
675 assem_debug("add %s,%s,#0x%x\n",regname[rt],regname[rs],imm8);
676 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
677 rs = rt;
678 imm &= ~imm8;
679 }
680 while (imm != 0);
681 }
682 }
683 else if(rs!=rt) emit_mov(rs,rt);
684}
685
686static void emit_addimm_and_set_flags(int imm,int rt)
687{
688 assert(imm>-65536&&imm<65536);
689 u_int armval;
690 if(genimm(imm,&armval)) {
691 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm);
692 output_w32(0xe2900000|rd_rn_rm(rt,rt,0)|armval);
693 }else if(genimm(-imm,&armval)) {
694 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],imm);
695 output_w32(0xe2500000|rd_rn_rm(rt,rt,0)|armval);
696 }else if(imm<0) {
697 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF00);
698 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
699 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)>>8,8));
700 output_w32(0xe2500000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
701 }else{
702 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF00);
703 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
704 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm>>8,8));
705 output_w32(0xe2900000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
706 }
707}
708
709static void emit_addimm_no_flags(u_int imm,u_int rt)
710{
711 emit_addimm(rt,imm,rt);
712}
713
714static void emit_addnop(u_int r)
715{
716 assert(r<16);
717 assem_debug("add %s,%s,#0 (nop)\n",regname[r],regname[r]);
718 output_w32(0xe2800000|rd_rn_rm(r,r,0));
719}
720
721static void emit_andimm(int rs,int imm,int rt)
722{
723 u_int armval;
724 if(imm==0) {
725 emit_zeroreg(rt);
726 }else if(genimm(imm,&armval)) {
727 assem_debug("and %s,%s,#%d\n",regname[rt],regname[rs],imm);
728 output_w32(0xe2000000|rd_rn_rm(rt,rs,0)|armval);
729 }else if(genimm(~imm,&armval)) {
730 assem_debug("bic %s,%s,#%d\n",regname[rt],regname[rs],imm);
731 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|armval);
732 }else if(imm==65535) {
733 #ifndef HAVE_ARMV6
734 assem_debug("bic %s,%s,#FF000000\n",regname[rt],regname[rs]);
735 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|0x4FF);
736 assem_debug("bic %s,%s,#00FF0000\n",regname[rt],regname[rt]);
737 output_w32(0xe3c00000|rd_rn_rm(rt,rt,0)|0x8FF);
738 #else
739 assem_debug("uxth %s,%s\n",regname[rt],regname[rs]);
740 output_w32(0xe6ff0070|rd_rn_rm(rt,0,rs));
741 #endif
742 }else{
743 assert(imm>0&&imm<65535);
744 #ifndef HAVE_ARMV7
745 assem_debug("mov r14,#%d\n",imm&0xFF00);
746 output_w32(0xe3a00000|rd_rn_imm_shift(HOST_TEMPREG,0,imm>>8,8));
747 assem_debug("add r14,r14,#%d\n",imm&0xFF);
748 output_w32(0xe2800000|rd_rn_imm_shift(HOST_TEMPREG,HOST_TEMPREG,imm&0xff,0));
749 #else
750 emit_movw(imm,HOST_TEMPREG);
751 #endif
752 assem_debug("and %s,%s,r14\n",regname[rt],regname[rs]);
753 output_w32(0xe0000000|rd_rn_rm(rt,rs,HOST_TEMPREG));
754 }
755}
756
757static void emit_orimm(int rs,int imm,int rt)
758{
759 u_int armval;
760 if(imm==0) {
761 if(rs!=rt) emit_mov(rs,rt);
762 }else if(genimm(imm,&armval)) {
763 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm);
764 output_w32(0xe3800000|rd_rn_rm(rt,rs,0)|armval);
765 }else{
766 assert(imm>0&&imm<65536);
767 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
768 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
769 output_w32(0xe3800000|rd_rn_imm_shift(rt,rs,imm>>8,8));
770 output_w32(0xe3800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
771 }
772}
773
774static void emit_xorimm(int rs,int imm,int rt)
775{
776 u_int armval;
777 if(imm==0) {
778 if(rs!=rt) emit_mov(rs,rt);
779 }else if(genimm(imm,&armval)) {
780 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm);
781 output_w32(0xe2200000|rd_rn_rm(rt,rs,0)|armval);
782 }else{
783 assert(imm>0&&imm<65536);
784 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
785 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
786 output_w32(0xe2200000|rd_rn_imm_shift(rt,rs,imm>>8,8));
787 output_w32(0xe2200000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
788 }
789}
790
791static void emit_shlimm(int rs,u_int imm,int rt)
792{
793 assert(imm>0);
794 assert(imm<32);
795 //if(imm==1) ...
796 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
797 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
798}
799
800static void emit_lsls_imm(int rs,int imm,int rt)
801{
802 assert(imm>0);
803 assert(imm<32);
804 assem_debug("lsls %s,%s,#%d\n",regname[rt],regname[rs],imm);
805 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs)|(imm<<7));
806}
807
808static unused void emit_lslpls_imm(int rs,int imm,int rt)
809{
810 assert(imm>0);
811 assert(imm<32);
812 assem_debug("lslpls %s,%s,#%d\n",regname[rt],regname[rs],imm);
813 output_w32(0x51b00000|rd_rn_rm(rt,0,rs)|(imm<<7));
814}
815
816static void emit_shrimm(int rs,u_int imm,int rt)
817{
818 assert(imm>0);
819 assert(imm<32);
820 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
821 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
822}
823
824static void emit_sarimm(int rs,u_int imm,int rt)
825{
826 assert(imm>0);
827 assert(imm<32);
828 assem_debug("asr %s,%s,#%d\n",regname[rt],regname[rs],imm);
829 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x40|(imm<<7));
830}
831
832static void emit_rorimm(int rs,u_int imm,int rt)
833{
834 assert(imm>0);
835 assert(imm<32);
836 assem_debug("ror %s,%s,#%d\n",regname[rt],regname[rs],imm);
837 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x60|(imm<<7));
838}
839
840static void emit_signextend16(int rs,int rt)
841{
842 #ifndef HAVE_ARMV6
843 emit_shlimm(rs,16,rt);
844 emit_sarimm(rt,16,rt);
845 #else
846 assem_debug("sxth %s,%s\n",regname[rt],regname[rs]);
847 output_w32(0xe6bf0070|rd_rn_rm(rt,0,rs));
848 #endif
849}
850
851static void emit_signextend8(int rs,int rt)
852{
853 #ifndef HAVE_ARMV6
854 emit_shlimm(rs,24,rt);
855 emit_sarimm(rt,24,rt);
856 #else
857 assem_debug("sxtb %s,%s\n",regname[rt],regname[rs]);
858 output_w32(0xe6af0070|rd_rn_rm(rt,0,rs));
859 #endif
860}
861
862static void emit_shl(u_int rs,u_int shift,u_int rt)
863{
864 assert(rs<16);
865 assert(rt<16);
866 assert(shift<16);
867 //if(imm==1) ...
868 assem_debug("lsl %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
869 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x10|(shift<<8));
870}
871
872static void emit_shr(u_int rs,u_int shift,u_int rt)
873{
874 assert(rs<16);
875 assert(rt<16);
876 assert(shift<16);
877 assem_debug("lsr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
878 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x30|(shift<<8));
879}
880
881static void emit_sar(u_int rs,u_int shift,u_int rt)
882{
883 assert(rs<16);
884 assert(rt<16);
885 assert(shift<16);
886 assem_debug("asr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
887 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x50|(shift<<8));
888}
889
890static unused void emit_orrshl(u_int rs,u_int shift,u_int rt)
891{
892 assert(rs<16);
893 assert(rt<16);
894 assert(shift<16);
895 assem_debug("orr %s,%s,%s,lsl %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
896 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x10|(shift<<8));
897}
898
899static unused void emit_orrshr(u_int rs,u_int shift,u_int rt)
900{
901 assert(rs<16);
902 assert(rt<16);
903 assert(shift<16);
904 assem_debug("orr %s,%s,%s,lsr %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
905 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x30|(shift<<8));
906}
907
908static void emit_cmpimm(int rs,int imm)
909{
910 u_int armval;
911 if(genimm(imm,&armval)) {
912 assem_debug("cmp %s,#%d\n",regname[rs],imm);
913 output_w32(0xe3500000|rd_rn_rm(0,rs,0)|armval);
914 }else if(genimm(-imm,&armval)) {
915 assem_debug("cmn %s,#%d\n",regname[rs],imm);
916 output_w32(0xe3700000|rd_rn_rm(0,rs,0)|armval);
917 }else if(imm>0) {
918 assert(imm<65536);
919 emit_movimm(imm,HOST_TEMPREG);
920 assem_debug("cmp %s,r14\n",regname[rs]);
921 output_w32(0xe1500000|rd_rn_rm(0,rs,HOST_TEMPREG));
922 }else{
923 assert(imm>-65536);
924 emit_movimm(-imm,HOST_TEMPREG);
925 assem_debug("cmn %s,r14\n",regname[rs]);
926 output_w32(0xe1700000|rd_rn_rm(0,rs,HOST_TEMPREG));
927 }
928}
929
930static void emit_cmovne_imm(int imm,int rt)
931{
932 assem_debug("movne %s,#%d\n",regname[rt],imm);
933 u_int armval;
934 genimm_checked(imm,&armval);
935 output_w32(0x13a00000|rd_rn_rm(rt,0,0)|armval);
936}
937
938static void emit_cmovl_imm(int imm,int rt)
939{
940 assem_debug("movlt %s,#%d\n",regname[rt],imm);
941 u_int armval;
942 genimm_checked(imm,&armval);
943 output_w32(0xb3a00000|rd_rn_rm(rt,0,0)|armval);
944}
945
946static void emit_cmovb_imm(int imm,int rt)
947{
948 assem_debug("movcc %s,#%d\n",regname[rt],imm);
949 u_int armval;
950 genimm_checked(imm,&armval);
951 output_w32(0x33a00000|rd_rn_rm(rt,0,0)|armval);
952}
953
954static void emit_cmovae_imm(int imm,int rt)
955{
956 assem_debug("movcs %s,#%d\n",regname[rt],imm);
957 u_int armval;
958 genimm_checked(imm,&armval);
959 output_w32(0x23a00000|rd_rn_rm(rt,0,0)|armval);
960}
961
962static void emit_cmovne_reg(int rs,int rt)
963{
964 assem_debug("movne %s,%s\n",regname[rt],regname[rs]);
965 output_w32(0x11a00000|rd_rn_rm(rt,0,rs));
966}
967
968static void emit_cmovl_reg(int rs,int rt)
969{
970 assem_debug("movlt %s,%s\n",regname[rt],regname[rs]);
971 output_w32(0xb1a00000|rd_rn_rm(rt,0,rs));
972}
973
974static void emit_cmovb_reg(int rs,int rt)
975{
976 assem_debug("movcc %s,%s\n",regname[rt],regname[rs]);
977 output_w32(0x31a00000|rd_rn_rm(rt,0,rs));
978}
979
980static void emit_cmovs_reg(int rs,int rt)
981{
982 assem_debug("movmi %s,%s\n",regname[rt],regname[rs]);
983 output_w32(0x41a00000|rd_rn_rm(rt,0,rs));
984}
985
986static void emit_slti32(int rs,int imm,int rt)
987{
988 if(rs!=rt) emit_zeroreg(rt);
989 emit_cmpimm(rs,imm);
990 if(rs==rt) emit_movimm(0,rt);
991 emit_cmovl_imm(1,rt);
992}
993
994static void emit_sltiu32(int rs,int imm,int rt)
995{
996 if(rs!=rt) emit_zeroreg(rt);
997 emit_cmpimm(rs,imm);
998 if(rs==rt) emit_movimm(0,rt);
999 emit_cmovb_imm(1,rt);
1000}
1001
1002static void emit_cmp(int rs,int rt)
1003{
1004 assem_debug("cmp %s,%s\n",regname[rs],regname[rt]);
1005 output_w32(0xe1500000|rd_rn_rm(0,rs,rt));
1006}
1007
1008static void emit_set_gz32(int rs, int rt)
1009{
1010 //assem_debug("set_gz32\n");
1011 emit_cmpimm(rs,1);
1012 emit_movimm(1,rt);
1013 emit_cmovl_imm(0,rt);
1014}
1015
1016static void emit_set_nz32(int rs, int rt)
1017{
1018 //assem_debug("set_nz32\n");
1019 if(rs!=rt) emit_movs(rs,rt);
1020 else emit_test(rs,rs);
1021 emit_cmovne_imm(1,rt);
1022}
1023
1024static void emit_set_if_less32(int rs1, int rs2, int rt)
1025{
1026 //assem_debug("set if less (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1027 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1028 emit_cmp(rs1,rs2);
1029 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1030 emit_cmovl_imm(1,rt);
1031}
1032
1033static void emit_set_if_carry32(int rs1, int rs2, int rt)
1034{
1035 //assem_debug("set if carry (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1036 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1037 emit_cmp(rs1,rs2);
1038 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1039 emit_cmovb_imm(1,rt);
1040}
1041
1042static int can_jump_or_call(const void *a)
1043{
1044 intptr_t offset = (u_char *)a - out - 8;
1045 return (-33554432 <= offset && offset < 33554432);
1046}
1047
1048static void emit_call(const void *a_)
1049{
1050 int a = (int)a_;
1051 assem_debug("bl %x (%x+%x)%s\n",a,(int)out,a-(int)out-8,func_name(a_));
1052 u_int offset=genjmp(a);
1053 output_w32(0xeb000000|offset);
1054}
1055
1056static void emit_jmp(const void *a_)
1057{
1058 int a = (int)a_;
1059 assem_debug("b %x (%x+%x)%s\n",a,(int)out,a-(int)out-8,func_name(a_));
1060 u_int offset=genjmp(a);
1061 output_w32(0xea000000|offset);
1062}
1063
1064static void emit_jne(const void *a_)
1065{
1066 int a = (int)a_;
1067 assem_debug("bne %x\n",a);
1068 u_int offset=genjmp(a);
1069 output_w32(0x1a000000|offset);
1070}
1071
1072static void emit_jeq(const void *a_)
1073{
1074 int a = (int)a_;
1075 assem_debug("beq %x\n",a);
1076 u_int offset=genjmp(a);
1077 output_w32(0x0a000000|offset);
1078}
1079
1080static void emit_js(const void *a_)
1081{
1082 int a = (int)a_;
1083 assem_debug("bmi %x\n",a);
1084 u_int offset=genjmp(a);
1085 output_w32(0x4a000000|offset);
1086}
1087
1088static void emit_jns(const void *a_)
1089{
1090 int a = (int)a_;
1091 assem_debug("bpl %x\n",a);
1092 u_int offset=genjmp(a);
1093 output_w32(0x5a000000|offset);
1094}
1095
1096static void emit_jl(const void *a_)
1097{
1098 int a = (int)a_;
1099 assem_debug("blt %x\n",a);
1100 u_int offset=genjmp(a);
1101 output_w32(0xba000000|offset);
1102}
1103
1104static void emit_jge(const void *a_)
1105{
1106 int a = (int)a_;
1107 assem_debug("bge %x\n",a);
1108 u_int offset=genjmp(a);
1109 output_w32(0xaa000000|offset);
1110}
1111
1112static void emit_jno(const void *a_)
1113{
1114 int a = (int)a_;
1115 assem_debug("bvc %x\n",a);
1116 u_int offset=genjmp(a);
1117 output_w32(0x7a000000|offset);
1118}
1119
1120static void emit_jc(const void *a_)
1121{
1122 int a = (int)a_;
1123 assem_debug("bcs %x\n",a);
1124 u_int offset=genjmp(a);
1125 output_w32(0x2a000000|offset);
1126}
1127
1128static void emit_jcc(const void *a_)
1129{
1130 int a = (int)a_;
1131 assem_debug("bcc %x\n",a);
1132 u_int offset=genjmp(a);
1133 output_w32(0x3a000000|offset);
1134}
1135
1136static unused void emit_callreg(u_int r)
1137{
1138 assert(r<15);
1139 assem_debug("blx %s\n",regname[r]);
1140 output_w32(0xe12fff30|r);
1141}
1142
1143static void emit_jmpreg(u_int r)
1144{
1145 assem_debug("mov pc,%s\n",regname[r]);
1146 output_w32(0xe1a00000|rd_rn_rm(15,0,r));
1147}
1148
1149static void emit_ret(void)
1150{
1151 emit_jmpreg(14);
1152}
1153
1154static void emit_readword_indexed(int offset, int rs, int rt)
1155{
1156 assert(offset>-4096&&offset<4096);
1157 assem_debug("ldr %s,%s+%d\n",regname[rt],regname[rs],offset);
1158 if(offset>=0) {
1159 output_w32(0xe5900000|rd_rn_rm(rt,rs,0)|offset);
1160 }else{
1161 output_w32(0xe5100000|rd_rn_rm(rt,rs,0)|(-offset));
1162 }
1163}
1164
1165static void emit_readword_dualindexedx4(int rs1, int rs2, int rt)
1166{
1167 assem_debug("ldr %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1168 output_w32(0xe7900000|rd_rn_rm(rt,rs1,rs2)|0x100);
1169}
1170#define emit_readptr_dualindexedx_ptrlen emit_readword_dualindexedx4
1171
1172static void emit_ldr_dualindexed(int rs1, int rs2, int rt)
1173{
1174 assem_debug("ldr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1175 output_w32(0xe7900000|rd_rn_rm(rt,rs1,rs2));
1176}
1177
1178static void emit_ldrcc_dualindexed(int rs1, int rs2, int rt)
1179{
1180 assem_debug("ldrcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1181 output_w32(0x37900000|rd_rn_rm(rt,rs1,rs2));
1182}
1183
1184static void emit_ldrccb_dualindexed(int rs1, int rs2, int rt)
1185{
1186 assem_debug("ldrccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1187 output_w32(0x37d00000|rd_rn_rm(rt,rs1,rs2));
1188}
1189
1190static void emit_ldrccsb_dualindexed(int rs1, int rs2, int rt)
1191{
1192 assem_debug("ldrccsb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1193 output_w32(0x319000d0|rd_rn_rm(rt,rs1,rs2));
1194}
1195
1196static void emit_ldrcch_dualindexed(int rs1, int rs2, int rt)
1197{
1198 assem_debug("ldrcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1199 output_w32(0x319000b0|rd_rn_rm(rt,rs1,rs2));
1200}
1201
1202static void emit_ldrccsh_dualindexed(int rs1, int rs2, int rt)
1203{
1204 assem_debug("ldrccsh %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1205 output_w32(0x319000f0|rd_rn_rm(rt,rs1,rs2));
1206}
1207
1208static void emit_movsbl_indexed(int offset, int rs, int rt)
1209{
1210 assert(offset>-256&&offset<256);
1211 assem_debug("ldrsb %s,%s+%d\n",regname[rt],regname[rs],offset);
1212 if(offset>=0) {
1213 output_w32(0xe1d000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1214 }else{
1215 output_w32(0xe15000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1216 }
1217}
1218
1219static void emit_movswl_indexed(int offset, int rs, int rt)
1220{
1221 assert(offset>-256&&offset<256);
1222 assem_debug("ldrsh %s,%s+%d\n",regname[rt],regname[rs],offset);
1223 if(offset>=0) {
1224 output_w32(0xe1d000f0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1225 }else{
1226 output_w32(0xe15000f0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1227 }
1228}
1229
1230static void emit_movzbl_indexed(int offset, int rs, int rt)
1231{
1232 assert(offset>-4096&&offset<4096);
1233 assem_debug("ldrb %s,%s+%d\n",regname[rt],regname[rs],offset);
1234 if(offset>=0) {
1235 output_w32(0xe5d00000|rd_rn_rm(rt,rs,0)|offset);
1236 }else{
1237 output_w32(0xe5500000|rd_rn_rm(rt,rs,0)|(-offset));
1238 }
1239}
1240
1241static void emit_movzwl_indexed(int offset, int rs, int rt)
1242{
1243 assert(offset>-256&&offset<256);
1244 assem_debug("ldrh %s,%s+%d\n",regname[rt],regname[rs],offset);
1245 if(offset>=0) {
1246 output_w32(0xe1d000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1247 }else{
1248 output_w32(0xe15000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1249 }
1250}
1251
1252static void emit_ldrd(int offset, int rs, int rt)
1253{
1254 assert(offset>-256&&offset<256);
1255 assem_debug("ldrd %s,%s+%d\n",regname[rt],regname[rs],offset);
1256 if(offset>=0) {
1257 output_w32(0xe1c000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1258 }else{
1259 output_w32(0xe14000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1260 }
1261}
1262
1263static void emit_readword(void *addr, int rt)
1264{
1265 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
1266 assert(offset<4096);
1267 assem_debug("ldr %s,fp+%d\n",regname[rt],offset);
1268 output_w32(0xe5900000|rd_rn_rm(rt,FP,0)|offset);
1269}
1270#define emit_readptr emit_readword
1271
1272static void emit_writeword_indexed(int rt, int offset, int rs)
1273{
1274 assert(offset>-4096&&offset<4096);
1275 assem_debug("str %s,%s+%d\n",regname[rt],regname[rs],offset);
1276 if(offset>=0) {
1277 output_w32(0xe5800000|rd_rn_rm(rt,rs,0)|offset);
1278 }else{
1279 output_w32(0xe5000000|rd_rn_rm(rt,rs,0)|(-offset));
1280 }
1281}
1282
1283static void emit_writehword_indexed(int rt, int offset, int rs)
1284{
1285 assert(offset>-256&&offset<256);
1286 assem_debug("strh %s,%s+%d\n",regname[rt],regname[rs],offset);
1287 if(offset>=0) {
1288 output_w32(0xe1c000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1289 }else{
1290 output_w32(0xe14000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1291 }
1292}
1293
1294static void emit_writebyte_indexed(int rt, int offset, int rs)
1295{
1296 assert(offset>-4096&&offset<4096);
1297 assem_debug("strb %s,%s+%d\n",regname[rt],regname[rs],offset);
1298 if(offset>=0) {
1299 output_w32(0xe5c00000|rd_rn_rm(rt,rs,0)|offset);
1300 }else{
1301 output_w32(0xe5400000|rd_rn_rm(rt,rs,0)|(-offset));
1302 }
1303}
1304
1305static void emit_strcc_dualindexed(int rs1, int rs2, int rt)
1306{
1307 assem_debug("strcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1308 output_w32(0x37800000|rd_rn_rm(rt,rs1,rs2));
1309}
1310
1311static void emit_strccb_dualindexed(int rs1, int rs2, int rt)
1312{
1313 assem_debug("strccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1314 output_w32(0x37c00000|rd_rn_rm(rt,rs1,rs2));
1315}
1316
1317static void emit_strcch_dualindexed(int rs1, int rs2, int rt)
1318{
1319 assem_debug("strcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1320 output_w32(0x318000b0|rd_rn_rm(rt,rs1,rs2));
1321}
1322
1323static void emit_writeword(int rt, void *addr)
1324{
1325 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
1326 assert(offset<4096);
1327 assem_debug("str %s,fp+%d\n",regname[rt],offset);
1328 output_w32(0xe5800000|rd_rn_rm(rt,FP,0)|offset);
1329}
1330
1331static void emit_umull(u_int rs1, u_int rs2, u_int hi, u_int lo)
1332{
1333 assem_debug("umull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
1334 assert(rs1<16);
1335 assert(rs2<16);
1336 assert(hi<16);
1337 assert(lo<16);
1338 output_w32(0xe0800090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
1339}
1340
1341static void emit_smull(u_int rs1, u_int rs2, u_int hi, u_int lo)
1342{
1343 assem_debug("smull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
1344 assert(rs1<16);
1345 assert(rs2<16);
1346 assert(hi<16);
1347 assert(lo<16);
1348 output_w32(0xe0c00090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
1349}
1350
1351static void emit_clz(int rs,int rt)
1352{
1353 assem_debug("clz %s,%s\n",regname[rt],regname[rs]);
1354 output_w32(0xe16f0f10|rd_rn_rm(rt,0,rs));
1355}
1356
1357static void emit_subcs(int rs1,int rs2,int rt)
1358{
1359 assem_debug("subcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1360 output_w32(0x20400000|rd_rn_rm(rt,rs1,rs2));
1361}
1362
1363static void emit_shrcc_imm(int rs,u_int imm,int rt)
1364{
1365 assert(imm>0);
1366 assert(imm<32);
1367 assem_debug("lsrcc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1368 output_w32(0x31a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
1369}
1370
1371static void emit_shrne_imm(int rs,u_int imm,int rt)
1372{
1373 assert(imm>0);
1374 assert(imm<32);
1375 assem_debug("lsrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
1376 output_w32(0x11a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
1377}
1378
1379static void emit_negmi(int rs, int rt)
1380{
1381 assem_debug("rsbmi %s,%s,#0\n",regname[rt],regname[rs]);
1382 output_w32(0x42600000|rd_rn_rm(rt,rs,0));
1383}
1384
1385static void emit_negsmi(int rs, int rt)
1386{
1387 assem_debug("rsbsmi %s,%s,#0\n",regname[rt],regname[rs]);
1388 output_w32(0x42700000|rd_rn_rm(rt,rs,0));
1389}
1390
1391static void emit_bic_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
1392{
1393 assem_debug("bic %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
1394 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
1395}
1396
1397static void emit_bic_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
1398{
1399 assem_debug("bic %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
1400 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
1401}
1402
1403static void emit_teq(int rs, int rt)
1404{
1405 assem_debug("teq %s,%s\n",regname[rs],regname[rt]);
1406 output_w32(0xe1300000|rd_rn_rm(0,rs,rt));
1407}
1408
1409static unused void emit_rsbimm(int rs, int imm, int rt)
1410{
1411 u_int armval;
1412 genimm_checked(imm,&armval);
1413 assem_debug("rsb %s,%s,#%d\n",regname[rt],regname[rs],imm);
1414 output_w32(0xe2600000|rd_rn_rm(rt,rs,0)|armval);
1415}
1416
1417// Conditionally select one of two immediates, optimizing for small code size
1418// This will only be called if HAVE_CMOV_IMM is defined
1419static void emit_cmov2imm_e_ne_compact(int imm1,int imm2,u_int rt)
1420{
1421 u_int armval;
1422 if(genimm(imm2-imm1,&armval)) {
1423 emit_movimm(imm1,rt);
1424 assem_debug("addne %s,%s,#%d\n",regname[rt],regname[rt],imm2-imm1);
1425 output_w32(0x12800000|rd_rn_rm(rt,rt,0)|armval);
1426 }else if(genimm(imm1-imm2,&armval)) {
1427 emit_movimm(imm1,rt);
1428 assem_debug("subne %s,%s,#%d\n",regname[rt],regname[rt],imm1-imm2);
1429 output_w32(0x12400000|rd_rn_rm(rt,rt,0)|armval);
1430 }
1431 else {
1432 #ifndef HAVE_ARMV7
1433 emit_movimm(imm1,rt);
1434 add_literal((int)out,imm2);
1435 assem_debug("ldrne %s,pc+? [=%x]\n",regname[rt],imm2);
1436 output_w32(0x15900000|rd_rn_rm(rt,15,0));
1437 #else
1438 emit_movw(imm1&0x0000FFFF,rt);
1439 if((imm1&0xFFFF)!=(imm2&0xFFFF)) {
1440 assem_debug("movwne %s,#%d (0x%x)\n",regname[rt],imm2&0xFFFF,imm2&0xFFFF);
1441 output_w32(0x13000000|rd_rn_rm(rt,0,0)|(imm2&0xfff)|((imm2<<4)&0xf0000));
1442 }
1443 emit_movt(imm1&0xFFFF0000,rt);
1444 if((imm1&0xFFFF0000)!=(imm2&0xFFFF0000)) {
1445 assem_debug("movtne %s,#%d (0x%x)\n",regname[rt],imm2&0xffff0000,imm2&0xffff0000);
1446 output_w32(0x13400000|rd_rn_rm(rt,0,0)|((imm2>>16)&0xfff)|((imm2>>12)&0xf0000));
1447 }
1448 #endif
1449 }
1450}
1451
1452// special case for checking invalid_code
1453static void emit_cmpmem_indexedsr12_reg(int base,int r,int imm)
1454{
1455 assert(imm<128&&imm>=0);
1456 assert(r>=0&&r<16);
1457 assem_debug("ldrb lr,%s,%s lsr #12\n",regname[base],regname[r]);
1458 output_w32(0xe7d00000|rd_rn_rm(HOST_TEMPREG,base,r)|0x620);
1459 emit_cmpimm(HOST_TEMPREG,imm);
1460}
1461
1462static void emit_callne(int a)
1463{
1464 assem_debug("blne %x\n",a);
1465 u_int offset=genjmp(a);
1466 output_w32(0x1b000000|offset);
1467}
1468
1469// Used to preload hash table entries
1470static unused void emit_prefetchreg(int r)
1471{
1472 assem_debug("pld %s\n",regname[r]);
1473 output_w32(0xf5d0f000|rd_rn_rm(0,r,0));
1474}
1475
1476// Special case for mini_ht
1477static void emit_ldreq_indexed(int rs, u_int offset, int rt)
1478{
1479 assert(offset<4096);
1480 assem_debug("ldreq %s,[%s, #%d]\n",regname[rt],regname[rs],offset);
1481 output_w32(0x05900000|rd_rn_rm(rt,rs,0)|offset);
1482}
1483
1484static void emit_orrne_imm(int rs,int imm,int rt)
1485{
1486 u_int armval;
1487 genimm_checked(imm,&armval);
1488 assem_debug("orrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
1489 output_w32(0x13800000|rd_rn_rm(rt,rs,0)|armval);
1490}
1491
1492static void emit_andne_imm(int rs,int imm,int rt)
1493{
1494 u_int armval;
1495 genimm_checked(imm,&armval);
1496 assem_debug("andne %s,%s,#%d\n",regname[rt],regname[rs],imm);
1497 output_w32(0x12000000|rd_rn_rm(rt,rs,0)|armval);
1498}
1499
1500static unused void emit_addpl_imm(int rs,int imm,int rt)
1501{
1502 u_int armval;
1503 genimm_checked(imm,&armval);
1504 assem_debug("addpl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1505 output_w32(0x52800000|rd_rn_rm(rt,rs,0)|armval);
1506}
1507
1508static void emit_jno_unlikely(int a)
1509{
1510 //emit_jno(a);
1511 assem_debug("addvc pc,pc,#? (%x)\n",/*a-(int)out-8,*/a);
1512 output_w32(0x72800000|rd_rn_rm(15,15,0));
1513}
1514
1515static void save_regs_all(u_int reglist)
1516{
1517 int i;
1518 if(!reglist) return;
1519 assem_debug("stmia fp,{");
1520 for(i=0;i<16;i++)
1521 if(reglist&(1<<i))
1522 assem_debug("r%d,",i);
1523 assem_debug("}\n");
1524 output_w32(0xe88b0000|reglist);
1525}
1526
1527static void restore_regs_all(u_int reglist)
1528{
1529 int i;
1530 if(!reglist) return;
1531 assem_debug("ldmia fp,{");
1532 for(i=0;i<16;i++)
1533 if(reglist&(1<<i))
1534 assem_debug("r%d,",i);
1535 assem_debug("}\n");
1536 output_w32(0xe89b0000|reglist);
1537}
1538
1539// Save registers before function call
1540static void save_regs(u_int reglist)
1541{
1542 reglist&=CALLER_SAVE_REGS; // only save the caller-save registers, r0-r3, r12
1543 save_regs_all(reglist);
1544}
1545
1546// Restore registers after function call
1547static void restore_regs(u_int reglist)
1548{
1549 reglist&=CALLER_SAVE_REGS;
1550 restore_regs_all(reglist);
1551}
1552
1553/* Stubs/epilogue */
1554
1555static void literal_pool(int n)
1556{
1557 if(!literalcount) return;
1558 if(n) {
1559 if((int)out-literals[0][0]<4096-n) return;
1560 }
1561 u_int *ptr;
1562 int i;
1563 for(i=0;i<literalcount;i++)
1564 {
1565 u_int l_addr=(u_int)out;
1566 int j;
1567 for(j=0;j<i;j++) {
1568 if(literals[j][1]==literals[i][1]) {
1569 //printf("dup %08x\n",literals[i][1]);
1570 l_addr=literals[j][0];
1571 break;
1572 }
1573 }
1574 ptr=(u_int *)literals[i][0];
1575 u_int offset=l_addr-(u_int)ptr-8;
1576 assert(offset<4096);
1577 assert(!(offset&3));
1578 *ptr|=offset;
1579 if(l_addr==(u_int)out) {
1580 literals[i][0]=l_addr; // remember for dupes
1581 output_w32(literals[i][1]);
1582 }
1583 }
1584 literalcount=0;
1585}
1586
1587static void literal_pool_jumpover(int n)
1588{
1589 if(!literalcount) return;
1590 if(n) {
1591 if((int)out-literals[0][0]<4096-n) return;
1592 }
1593 void *jaddr = out;
1594 emit_jmp(0);
1595 literal_pool(0);
1596 set_jump_target(jaddr, out);
1597}
1598
1599// parsed by get_pointer, find_extjump_insn
1600static void emit_extjump2(u_char *addr, u_int target, void *linker)
1601{
1602 u_char *ptr=(u_char *)addr;
1603 assert((ptr[3]&0x0e)==0xa);
1604 (void)ptr;
1605
1606 emit_loadlp(target,0);
1607 emit_loadlp((u_int)addr,1);
1608 assert(addr>=ndrc->translation_cache&&addr<(ndrc->translation_cache+(1<<TARGET_SIZE_2)));
1609 //assert((target>=0x80000000&&target<0x80800000)||(target>0xA4000000&&target<0xA4001000));
1610//DEBUG >
1611#ifdef DEBUG_CYCLE_COUNT
1612 emit_readword(&last_count,ECX);
1613 emit_add(HOST_CCREG,ECX,HOST_CCREG);
1614 emit_readword(&next_interupt,ECX);
1615 emit_writeword(HOST_CCREG,&Count);
1616 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
1617 emit_writeword(ECX,&last_count);
1618#endif
1619//DEBUG <
1620 emit_far_jump(linker);
1621}
1622
1623static void check_extjump2(void *src)
1624{
1625 u_int *ptr = src;
1626 assert((ptr[1] & 0x0fff0000) == 0x059f0000); // ldr rx, [pc, #ofs]
1627 (void)ptr;
1628}
1629
1630// put rt_val into rt, potentially making use of rs with value rs_val
1631static void emit_movimm_from(u_int rs_val,int rs,u_int rt_val,int rt)
1632{
1633 u_int armval;
1634 int diff;
1635 if(genimm(rt_val,&armval)) {
1636 assem_debug("mov %s,#%d\n",regname[rt],rt_val);
1637 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
1638 return;
1639 }
1640 if(genimm(~rt_val,&armval)) {
1641 assem_debug("mvn %s,#%d\n",regname[rt],rt_val);
1642 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
1643 return;
1644 }
1645 diff=rt_val-rs_val;
1646 if(genimm(diff,&armval)) {
1647 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],diff);
1648 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
1649 return;
1650 }else if(genimm(-diff,&armval)) {
1651 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],-diff);
1652 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
1653 return;
1654 }
1655 emit_movimm(rt_val,rt);
1656}
1657
1658// return 1 if above function can do it's job cheaply
1659static int is_similar_value(u_int v1,u_int v2)
1660{
1661 u_int xs;
1662 int diff;
1663 if(v1==v2) return 1;
1664 diff=v2-v1;
1665 for(xs=diff;xs!=0&&(xs&3)==0;xs>>=2)
1666 ;
1667 if(xs<0x100) return 1;
1668 for(xs=-diff;xs!=0&&(xs&3)==0;xs>>=2)
1669 ;
1670 if(xs<0x100) return 1;
1671 return 0;
1672}
1673
1674static void mov_loadtype_adj(enum stub_type type,int rs,int rt)
1675{
1676 switch(type) {
1677 case LOADB_STUB: emit_signextend8(rs,rt); break;
1678 case LOADBU_STUB: emit_andimm(rs,0xff,rt); break;
1679 case LOADH_STUB: emit_signextend16(rs,rt); break;
1680 case LOADHU_STUB: emit_andimm(rs,0xffff,rt); break;
1681 case LOADW_STUB: if(rs!=rt) emit_mov(rs,rt); break;
1682 default: assert(0);
1683 }
1684}
1685
1686#include "pcsxmem.h"
1687#include "pcsxmem_inline.c"
1688
1689static void do_readstub(int n)
1690{
1691 assem_debug("do_readstub %x\n",start+stubs[n].a*4);
1692 literal_pool(256);
1693 set_jump_target(stubs[n].addr, out);
1694 enum stub_type type=stubs[n].type;
1695 int i=stubs[n].a;
1696 int rs=stubs[n].b;
1697 const struct regstat *i_regs=(struct regstat *)stubs[n].c;
1698 u_int reglist=stubs[n].e;
1699 const signed char *i_regmap=i_regs->regmap;
1700 int rt;
1701 if(dops[i].itype==C1LS||dops[i].itype==C2LS||dops[i].itype==LOADLR) {
1702 rt=get_reg(i_regmap,FTEMP);
1703 }else{
1704 rt=get_reg(i_regmap,dops[i].rt1);
1705 }
1706 assert(rs>=0);
1707 int r,temp=-1,temp2=HOST_TEMPREG,regs_saved=0;
1708 void *restore_jump = NULL;
1709 reglist|=(1<<rs);
1710 for(r=0;r<=12;r++) {
1711 if(((1<<r)&0x13ff)&&((1<<r)&reglist)==0) {
1712 temp=r; break;
1713 }
1714 }
1715 if(rt>=0&&dops[i].rt1!=0)
1716 reglist&=~(1<<rt);
1717 if(temp==-1) {
1718 save_regs(reglist);
1719 regs_saved=1;
1720 temp=(rs==0)?2:0;
1721 }
1722 if((regs_saved||(reglist&2)==0)&&temp!=1&&rs!=1)
1723 temp2=1;
1724 emit_readword(&mem_rtab,temp);
1725 emit_shrimm(rs,12,temp2);
1726 emit_readword_dualindexedx4(temp,temp2,temp2);
1727 emit_lsls_imm(temp2,1,temp2);
1728 if(dops[i].itype==C1LS||dops[i].itype==C2LS||(rt>=0&&dops[i].rt1!=0)) {
1729 switch(type) {
1730 case LOADB_STUB: emit_ldrccsb_dualindexed(temp2,rs,rt); break;
1731 case LOADBU_STUB: emit_ldrccb_dualindexed(temp2,rs,rt); break;
1732 case LOADH_STUB: emit_ldrccsh_dualindexed(temp2,rs,rt); break;
1733 case LOADHU_STUB: emit_ldrcch_dualindexed(temp2,rs,rt); break;
1734 case LOADW_STUB: emit_ldrcc_dualindexed(temp2,rs,rt); break;
1735 default: assert(0);
1736 }
1737 }
1738 if(regs_saved) {
1739 restore_jump=out;
1740 emit_jcc(0); // jump to reg restore
1741 }
1742 else
1743 emit_jcc(stubs[n].retaddr); // return address
1744
1745 if(!regs_saved)
1746 save_regs(reglist);
1747 void *handler=NULL;
1748 if(type==LOADB_STUB||type==LOADBU_STUB)
1749 handler=jump_handler_read8;
1750 if(type==LOADH_STUB||type==LOADHU_STUB)
1751 handler=jump_handler_read16;
1752 if(type==LOADW_STUB)
1753 handler=jump_handler_read32;
1754 assert(handler);
1755 pass_args(rs,temp2);
1756 int cc=get_reg(i_regmap,CCREG);
1757 if(cc<0)
1758 emit_loadreg(CCREG,2);
1759 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n].d),2);
1760 emit_far_call(handler);
1761 if(dops[i].itype==C1LS||dops[i].itype==C2LS||(rt>=0&&dops[i].rt1!=0)) {
1762 mov_loadtype_adj(type,0,rt);
1763 }
1764 if(restore_jump)
1765 set_jump_target(restore_jump, out);
1766 restore_regs(reglist);
1767 emit_jmp(stubs[n].retaddr); // return address
1768}
1769
1770static void inline_readstub(enum stub_type type, int i, u_int addr,
1771 const signed char regmap[], int target, int adj, u_int reglist)
1772{
1773 int rs=get_reg(regmap,target);
1774 int rt=get_reg(regmap,target);
1775 if(rs<0) rs=get_reg(regmap,-1);
1776 assert(rs>=0);
1777 u_int is_dynamic;
1778 uintptr_t host_addr = 0;
1779 void *handler;
1780 int cc=get_reg(regmap,CCREG);
1781 if(pcsx_direct_read(type,addr,CLOCK_ADJUST(adj),cc,target?rs:-1,rt))
1782 return;
1783 handler = get_direct_memhandler(mem_rtab, addr, type, &host_addr);
1784 if (handler == NULL) {
1785 if(rt<0||dops[i].rt1==0)
1786 return;
1787 if(addr!=host_addr)
1788 emit_movimm_from(addr,rs,host_addr,rs);
1789 switch(type) {
1790 case LOADB_STUB: emit_movsbl_indexed(0,rs,rt); break;
1791 case LOADBU_STUB: emit_movzbl_indexed(0,rs,rt); break;
1792 case LOADH_STUB: emit_movswl_indexed(0,rs,rt); break;
1793 case LOADHU_STUB: emit_movzwl_indexed(0,rs,rt); break;
1794 case LOADW_STUB: emit_readword_indexed(0,rs,rt); break;
1795 default: assert(0);
1796 }
1797 return;
1798 }
1799 is_dynamic=pcsxmem_is_handler_dynamic(addr);
1800 if(is_dynamic) {
1801 if(type==LOADB_STUB||type==LOADBU_STUB)
1802 handler=jump_handler_read8;
1803 if(type==LOADH_STUB||type==LOADHU_STUB)
1804 handler=jump_handler_read16;
1805 if(type==LOADW_STUB)
1806 handler=jump_handler_read32;
1807 }
1808
1809 // call a memhandler
1810 if(rt>=0&&dops[i].rt1!=0)
1811 reglist&=~(1<<rt);
1812 save_regs(reglist);
1813 if(target==0)
1814 emit_movimm(addr,0);
1815 else if(rs!=0)
1816 emit_mov(rs,0);
1817 if(cc<0)
1818 emit_loadreg(CCREG,2);
1819 if(is_dynamic) {
1820 emit_movimm(((u_int *)mem_rtab)[addr>>12]<<1,1);
1821 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj),2);
1822 }
1823 else {
1824 emit_readword(&last_count,3);
1825 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj),2);
1826 emit_add(2,3,2);
1827 emit_writeword(2,&Count);
1828 }
1829
1830 emit_far_call(handler);
1831
1832 if(rt>=0&&dops[i].rt1!=0) {
1833 switch(type) {
1834 case LOADB_STUB: emit_signextend8(0,rt); break;
1835 case LOADBU_STUB: emit_andimm(0,0xff,rt); break;
1836 case LOADH_STUB: emit_signextend16(0,rt); break;
1837 case LOADHU_STUB: emit_andimm(0,0xffff,rt); break;
1838 case LOADW_STUB: if(rt!=0) emit_mov(0,rt); break;
1839 default: assert(0);
1840 }
1841 }
1842 restore_regs(reglist);
1843}
1844
1845static void do_writestub(int n)
1846{
1847 assem_debug("do_writestub %x\n",start+stubs[n].a*4);
1848 literal_pool(256);
1849 set_jump_target(stubs[n].addr, out);
1850 enum stub_type type=stubs[n].type;
1851 int i=stubs[n].a;
1852 int rs=stubs[n].b;
1853 const struct regstat *i_regs=(struct regstat *)stubs[n].c;
1854 u_int reglist=stubs[n].e;
1855 const signed char *i_regmap=i_regs->regmap;
1856 int rt,r;
1857 if(dops[i].itype==C1LS||dops[i].itype==C2LS) {
1858 rt=get_reg(i_regmap,r=FTEMP);
1859 }else{
1860 rt=get_reg(i_regmap,r=dops[i].rs2);
1861 }
1862 assert(rs>=0);
1863 assert(rt>=0);
1864 int rtmp,temp=-1,temp2=HOST_TEMPREG,regs_saved=0;
1865 void *restore_jump = NULL;
1866 int reglist2=reglist|(1<<rs)|(1<<rt);
1867 for(rtmp=0;rtmp<=12;rtmp++) {
1868 if(((1<<rtmp)&0x13ff)&&((1<<rtmp)&reglist2)==0) {
1869 temp=rtmp; break;
1870 }
1871 }
1872 if(temp==-1) {
1873 save_regs(reglist);
1874 regs_saved=1;
1875 for(rtmp=0;rtmp<=3;rtmp++)
1876 if(rtmp!=rs&&rtmp!=rt)
1877 {temp=rtmp;break;}
1878 }
1879 if((regs_saved||(reglist2&8)==0)&&temp!=3&&rs!=3&&rt!=3)
1880 temp2=3;
1881 emit_readword(&mem_wtab,temp);
1882 emit_shrimm(rs,12,temp2);
1883 emit_readword_dualindexedx4(temp,temp2,temp2);
1884 emit_lsls_imm(temp2,1,temp2);
1885 switch(type) {
1886 case STOREB_STUB: emit_strccb_dualindexed(temp2,rs,rt); break;
1887 case STOREH_STUB: emit_strcch_dualindexed(temp2,rs,rt); break;
1888 case STOREW_STUB: emit_strcc_dualindexed(temp2,rs,rt); break;
1889 default: assert(0);
1890 }
1891 if(regs_saved) {
1892 restore_jump=out;
1893 emit_jcc(0); // jump to reg restore
1894 }
1895 else
1896 emit_jcc(stubs[n].retaddr); // return address (invcode check)
1897
1898 if(!regs_saved)
1899 save_regs(reglist);
1900 void *handler=NULL;
1901 switch(type) {
1902 case STOREB_STUB: handler=jump_handler_write8; break;
1903 case STOREH_STUB: handler=jump_handler_write16; break;
1904 case STOREW_STUB: handler=jump_handler_write32; break;
1905 default: assert(0);
1906 }
1907 assert(handler);
1908 pass_args(rs,rt);
1909 if(temp2!=3)
1910 emit_mov(temp2,3);
1911 int cc=get_reg(i_regmap,CCREG);
1912 if(cc<0)
1913 emit_loadreg(CCREG,2);
1914 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n].d),2);
1915 // returns new cycle_count
1916 emit_far_call(handler);
1917 emit_addimm(0,-CLOCK_ADJUST((int)stubs[n].d),cc<0?2:cc);
1918 if(cc<0)
1919 emit_storereg(CCREG,2);
1920 if(restore_jump)
1921 set_jump_target(restore_jump, out);
1922 restore_regs(reglist);
1923 emit_jmp(stubs[n].retaddr);
1924}
1925
1926static void inline_writestub(enum stub_type type, int i, u_int addr,
1927 const signed char regmap[], int target, int adj, u_int reglist)
1928{
1929 int rs=get_reg(regmap,-1);
1930 int rt=get_reg(regmap,target);
1931 assert(rs>=0);
1932 assert(rt>=0);
1933 uintptr_t host_addr = 0;
1934 void *handler = get_direct_memhandler(mem_wtab, addr, type, &host_addr);
1935 if (handler == NULL) {
1936 if(addr!=host_addr)
1937 emit_movimm_from(addr,rs,host_addr,rs);
1938 switch(type) {
1939 case STOREB_STUB: emit_writebyte_indexed(rt,0,rs); break;
1940 case STOREH_STUB: emit_writehword_indexed(rt,0,rs); break;
1941 case STOREW_STUB: emit_writeword_indexed(rt,0,rs); break;
1942 default: assert(0);
1943 }
1944 return;
1945 }
1946
1947 // call a memhandler
1948 save_regs(reglist);
1949 pass_args(rs,rt);
1950 int cc=get_reg(regmap,CCREG);
1951 if(cc<0)
1952 emit_loadreg(CCREG,2);
1953 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj),2);
1954 emit_movimm((u_int)handler,3);
1955 // returns new cycle_count
1956 emit_far_call(jump_handler_write_h);
1957 emit_addimm(0,-CLOCK_ADJUST(adj),cc<0?2:cc);
1958 if(cc<0)
1959 emit_storereg(CCREG,2);
1960 restore_regs(reglist);
1961}
1962
1963// this output is parsed by verify_dirty, get_bounds, isclean, get_clean_addr
1964static void do_dirty_stub_emit_args(u_int arg0, u_int source_len)
1965{
1966 #ifndef HAVE_ARMV7
1967 emit_loadlp((int)source, 1);
1968 emit_loadlp((int)copy, 2);
1969 emit_loadlp(source_len, 3);
1970 #else
1971 emit_movw(((u_int)source)&0x0000FFFF, 1);
1972 emit_movw(((u_int)copy)&0x0000FFFF, 2);
1973 emit_movt(((u_int)source)&0xFFFF0000, 1);
1974 emit_movt(((u_int)copy)&0xFFFF0000, 2);
1975 emit_movw(source_len, 3);
1976 #endif
1977 emit_movimm(arg0, 0);
1978}
1979
1980static void *do_dirty_stub(int i, u_int source_len)
1981{
1982 assem_debug("do_dirty_stub %x\n",start+i*4);
1983 do_dirty_stub_emit_args(start + i*4, source_len);
1984 emit_far_call(verify_code);
1985 void *entry = out;
1986 load_regs_entry(i);
1987 if (entry == out)
1988 entry = instr_addr[i];
1989 emit_jmp(instr_addr[i]);
1990 return entry;
1991}
1992
1993static void do_dirty_stub_ds(u_int source_len)
1994{
1995 do_dirty_stub_emit_args(start + 1, source_len);
1996 emit_far_call(verify_code_ds);
1997}
1998
1999/* Special assem */
2000
2001static void c2op_prologue(u_int op, int i, const struct regstat *i_regs, u_int reglist)
2002{
2003 save_regs_all(reglist);
2004 cop2_do_stall_check(op, i, i_regs, 0);
2005#ifdef PCNT
2006 emit_movimm(op, 0);
2007 emit_far_call(pcnt_gte_start);
2008#endif
2009 emit_addimm(FP, (u_char *)&psxRegs.CP2D.r[0] - (u_char *)&dynarec_local, 0); // cop2 regs
2010}
2011
2012static void c2op_epilogue(u_int op,u_int reglist)
2013{
2014#ifdef PCNT
2015 emit_movimm(op,0);
2016 emit_far_call(pcnt_gte_end);
2017#endif
2018 restore_regs_all(reglist);
2019}
2020
2021static void c2op_call_MACtoIR(int lm,int need_flags)
2022{
2023 if(need_flags)
2024 emit_far_call(lm?gteMACtoIR_lm1:gteMACtoIR_lm0);
2025 else
2026 emit_far_call(lm?gteMACtoIR_lm1_nf:gteMACtoIR_lm0_nf);
2027}
2028
2029static void c2op_call_rgb_func(void *func,int lm,int need_ir,int need_flags)
2030{
2031 emit_far_call(func);
2032 // func is C code and trashes r0
2033 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
2034 if(need_flags||need_ir)
2035 c2op_call_MACtoIR(lm,need_flags);
2036 emit_far_call(need_flags?gteMACtoRGB:gteMACtoRGB_nf);
2037}
2038
2039static void c2op_assemble(int i, const struct regstat *i_regs)
2040{
2041 u_int c2op = source[i] & 0x3f;
2042 u_int reglist_full = get_host_reglist(i_regs->regmap);
2043 u_int reglist = reglist_full & CALLER_SAVE_REGS;
2044 int need_flags, need_ir;
2045
2046 if (gte_handlers[c2op]!=NULL) {
2047 need_flags=!(gte_unneeded[i+1]>>63); // +1 because of how liveness detection works
2048 need_ir=(gte_unneeded[i+1]&0xe00)!=0xe00;
2049 assem_debug("gte op %08x, unneeded %016llx, need_flags %d, need_ir %d\n",
2050 source[i],gte_unneeded[i+1],need_flags,need_ir);
2051 if(HACK_ENABLED(NDHACK_GTE_NO_FLAGS))
2052 need_flags=0;
2053 int shift = (source[i] >> 19) & 1;
2054 int lm = (source[i] >> 10) & 1;
2055 switch(c2op) {
2056#ifndef DRC_DBG
2057 case GTE_MVMVA: {
2058#ifdef HAVE_ARMV5
2059 int v = (source[i] >> 15) & 3;
2060 int cv = (source[i] >> 13) & 3;
2061 int mx = (source[i] >> 17) & 3;
2062 reglist=reglist_full&(CALLER_SAVE_REGS|0xf0); // +{r4-r7}
2063 c2op_prologue(c2op,i,i_regs,reglist);
2064 /* r4,r5 = VXYZ(v) packed; r6 = &MX11(mx); r7 = &CV1(cv) */
2065 if(v<3)
2066 emit_ldrd(v*8,0,4);
2067 else {
2068 emit_movzwl_indexed(9*4,0,4); // gteIR
2069 emit_movzwl_indexed(10*4,0,6);
2070 emit_movzwl_indexed(11*4,0,5);
2071 emit_orrshl_imm(6,16,4);
2072 }
2073 if(mx<3)
2074 emit_addimm(0,32*4+mx*8*4,6);
2075 else
2076 emit_readword(&zeromem_ptr,6);
2077 if(cv<3)
2078 emit_addimm(0,32*4+(cv*8+5)*4,7);
2079 else
2080 emit_readword(&zeromem_ptr,7);
2081#ifdef __ARM_NEON__
2082 emit_movimm(source[i],1); // opcode
2083 emit_far_call(gteMVMVA_part_neon);
2084 if(need_flags) {
2085 emit_movimm(lm,1);
2086 emit_far_call(gteMACtoIR_flags_neon);
2087 }
2088#else
2089 if(cv==3&&shift)
2090 emit_far_call((int)gteMVMVA_part_cv3sh12_arm);
2091 else {
2092 emit_movimm(shift,1);
2093 emit_far_call((int)(need_flags?gteMVMVA_part_arm:gteMVMVA_part_nf_arm));
2094 }
2095 if(need_flags||need_ir)
2096 c2op_call_MACtoIR(lm,need_flags);
2097#endif
2098#else /* if not HAVE_ARMV5 */
2099 c2op_prologue(c2op,i,i_regs,reglist);
2100 emit_movimm(source[i],1); // opcode
2101 emit_writeword(1,&psxRegs.code);
2102 emit_far_call(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]);
2103#endif
2104 break;
2105 }
2106 case GTE_OP:
2107 c2op_prologue(c2op,i,i_regs,reglist);
2108 emit_far_call(shift?gteOP_part_shift:gteOP_part_noshift);
2109 if(need_flags||need_ir) {
2110 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
2111 c2op_call_MACtoIR(lm,need_flags);
2112 }
2113 break;
2114 case GTE_DPCS:
2115 c2op_prologue(c2op,i,i_regs,reglist);
2116 c2op_call_rgb_func(shift?gteDPCS_part_shift:gteDPCS_part_noshift,lm,need_ir,need_flags);
2117 break;
2118 case GTE_INTPL:
2119 c2op_prologue(c2op,i,i_regs,reglist);
2120 c2op_call_rgb_func(shift?gteINTPL_part_shift:gteINTPL_part_noshift,lm,need_ir,need_flags);
2121 break;
2122 case GTE_SQR:
2123 c2op_prologue(c2op,i,i_regs,reglist);
2124 emit_far_call(shift?gteSQR_part_shift:gteSQR_part_noshift);
2125 if(need_flags||need_ir) {
2126 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
2127 c2op_call_MACtoIR(lm,need_flags);
2128 }
2129 break;
2130 case GTE_DCPL:
2131 c2op_prologue(c2op,i,i_regs,reglist);
2132 c2op_call_rgb_func(gteDCPL_part,lm,need_ir,need_flags);
2133 break;
2134 case GTE_GPF:
2135 c2op_prologue(c2op,i,i_regs,reglist);
2136 c2op_call_rgb_func(shift?gteGPF_part_shift:gteGPF_part_noshift,lm,need_ir,need_flags);
2137 break;
2138 case GTE_GPL:
2139 c2op_prologue(c2op,i,i_regs,reglist);
2140 c2op_call_rgb_func(shift?gteGPL_part_shift:gteGPL_part_noshift,lm,need_ir,need_flags);
2141 break;
2142#endif
2143 default:
2144 c2op_prologue(c2op,i,i_regs,reglist);
2145#ifdef DRC_DBG
2146 emit_movimm(source[i],1); // opcode
2147 emit_writeword(1,&psxRegs.code);
2148#endif
2149 emit_far_call(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]);
2150 break;
2151 }
2152 c2op_epilogue(c2op,reglist);
2153 }
2154}
2155
2156static void c2op_ctc2_31_assemble(signed char sl, signed char temp)
2157{
2158 //value = value & 0x7ffff000;
2159 //if (value & 0x7f87e000) value |= 0x80000000;
2160 emit_shrimm(sl,12,temp);
2161 emit_shlimm(temp,12,temp);
2162 emit_testimm(temp,0x7f000000);
2163 emit_testeqimm(temp,0x00870000);
2164 emit_testeqimm(temp,0x0000e000);
2165 emit_orrne_imm(temp,0x80000000,temp);
2166}
2167
2168static void do_mfc2_31_one(u_int copr,signed char temp)
2169{
2170 emit_readword(&reg_cop2d[copr],temp);
2171 emit_testimm(temp,0x8000); // do we need this?
2172 emit_andne_imm(temp,0,temp);
2173 emit_cmpimm(temp,0xf80);
2174 emit_andimm(temp,0xf80,temp);
2175 emit_cmovae_imm(0xf80,temp);
2176}
2177
2178static void c2op_mfc2_29_assemble(signed char tl, signed char temp)
2179{
2180 if (temp < 0) {
2181 host_tempreg_acquire();
2182 temp = HOST_TEMPREG;
2183 }
2184 do_mfc2_31_one(9,temp);
2185 emit_shrimm(temp,7,tl);
2186 do_mfc2_31_one(10,temp);
2187 emit_orrshr_imm(temp,2,tl);
2188 do_mfc2_31_one(11,temp);
2189 emit_orrshl_imm(temp,3,tl);
2190 emit_writeword(tl,&reg_cop2d[29]);
2191 if (temp == HOST_TEMPREG)
2192 host_tempreg_release();
2193}
2194
2195static void multdiv_assemble_arm(int i,struct regstat *i_regs)
2196{
2197 // case 0x18: MULT
2198 // case 0x19: MULTU
2199 // case 0x1A: DIV
2200 // case 0x1B: DIVU
2201 // case 0x1C: DMULT
2202 // case 0x1D: DMULTU
2203 // case 0x1E: DDIV
2204 // case 0x1F: DDIVU
2205 if(dops[i].rs1&&dops[i].rs2)
2206 {
2207 if((dops[i].opcode2&4)==0) // 32-bit
2208 {
2209 if(dops[i].opcode2==0x18) // MULT
2210 {
2211 signed char m1=get_reg(i_regs->regmap,dops[i].rs1);
2212 signed char m2=get_reg(i_regs->regmap,dops[i].rs2);
2213 signed char hi=get_reg(i_regs->regmap,HIREG);
2214 signed char lo=get_reg(i_regs->regmap,LOREG);
2215 assert(m1>=0);
2216 assert(m2>=0);
2217 assert(hi>=0);
2218 assert(lo>=0);
2219 emit_smull(m1,m2,hi,lo);
2220 }
2221 if(dops[i].opcode2==0x19) // MULTU
2222 {
2223 signed char m1=get_reg(i_regs->regmap,dops[i].rs1);
2224 signed char m2=get_reg(i_regs->regmap,dops[i].rs2);
2225 signed char hi=get_reg(i_regs->regmap,HIREG);
2226 signed char lo=get_reg(i_regs->regmap,LOREG);
2227 assert(m1>=0);
2228 assert(m2>=0);
2229 assert(hi>=0);
2230 assert(lo>=0);
2231 emit_umull(m1,m2,hi,lo);
2232 }
2233 if(dops[i].opcode2==0x1A) // DIV
2234 {
2235 signed char d1=get_reg(i_regs->regmap,dops[i].rs1);
2236 signed char d2=get_reg(i_regs->regmap,dops[i].rs2);
2237 assert(d1>=0);
2238 assert(d2>=0);
2239 signed char quotient=get_reg(i_regs->regmap,LOREG);
2240 signed char remainder=get_reg(i_regs->regmap,HIREG);
2241 assert(quotient>=0);
2242 assert(remainder>=0);
2243 emit_movs(d1,remainder);
2244 emit_movimm(0xffffffff,quotient);
2245 emit_negmi(quotient,quotient); // .. quotient and ..
2246 emit_negmi(remainder,remainder); // .. remainder for div0 case (will be negated back after jump)
2247 emit_movs(d2,HOST_TEMPREG);
2248 emit_jeq(out+52); // Division by zero
2249 emit_negsmi(HOST_TEMPREG,HOST_TEMPREG);
2250#ifdef HAVE_ARMV5
2251 emit_clz(HOST_TEMPREG,quotient);
2252 emit_shl(HOST_TEMPREG,quotient,HOST_TEMPREG);
2253#else
2254 emit_movimm(0,quotient);
2255 emit_addpl_imm(quotient,1,quotient);
2256 emit_lslpls_imm(HOST_TEMPREG,1,HOST_TEMPREG);
2257 emit_jns(out-2*4);
2258#endif
2259 emit_orimm(quotient,1<<31,quotient);
2260 emit_shr(quotient,quotient,quotient);
2261 emit_cmp(remainder,HOST_TEMPREG);
2262 emit_subcs(remainder,HOST_TEMPREG,remainder);
2263 emit_adcs(quotient,quotient,quotient);
2264 emit_shrimm(HOST_TEMPREG,1,HOST_TEMPREG);
2265 emit_jcc(out-16); // -4
2266 emit_teq(d1,d2);
2267 emit_negmi(quotient,quotient);
2268 emit_test(d1,d1);
2269 emit_negmi(remainder,remainder);
2270 }
2271 if(dops[i].opcode2==0x1B) // DIVU
2272 {
2273 signed char d1=get_reg(i_regs->regmap,dops[i].rs1); // dividend
2274 signed char d2=get_reg(i_regs->regmap,dops[i].rs2); // divisor
2275 assert(d1>=0);
2276 assert(d2>=0);
2277 signed char quotient=get_reg(i_regs->regmap,LOREG);
2278 signed char remainder=get_reg(i_regs->regmap,HIREG);
2279 assert(quotient>=0);
2280 assert(remainder>=0);
2281 emit_mov(d1,remainder);
2282 emit_movimm(0xffffffff,quotient); // div0 case
2283 emit_test(d2,d2);
2284 emit_jeq(out+40); // Division by zero
2285#ifdef HAVE_ARMV5
2286 emit_clz(d2,HOST_TEMPREG);
2287 emit_movimm(1<<31,quotient);
2288 emit_shl(d2,HOST_TEMPREG,d2);
2289#else
2290 emit_movimm(0,HOST_TEMPREG);
2291 emit_addpl_imm(HOST_TEMPREG,1,HOST_TEMPREG);
2292 emit_lslpls_imm(d2,1,d2);
2293 emit_jns(out-2*4);
2294 emit_movimm(1<<31,quotient);
2295#endif
2296 emit_shr(quotient,HOST_TEMPREG,quotient);
2297 emit_cmp(remainder,d2);
2298 emit_subcs(remainder,d2,remainder);
2299 emit_adcs(quotient,quotient,quotient);
2300 emit_shrcc_imm(d2,1,d2);
2301 emit_jcc(out-16); // -4
2302 }
2303 }
2304 else // 64-bit
2305 assert(0);
2306 }
2307 else
2308 {
2309 // Multiply by zero is zero.
2310 // MIPS does not have a divide by zero exception.
2311 // The result is undefined, we return zero.
2312 signed char hr=get_reg(i_regs->regmap,HIREG);
2313 signed char lr=get_reg(i_regs->regmap,LOREG);
2314 if(hr>=0) emit_zeroreg(hr);
2315 if(lr>=0) emit_zeroreg(lr);
2316 }
2317}
2318#define multdiv_assemble multdiv_assemble_arm
2319
2320static void do_jump_vaddr(int rs)
2321{
2322 emit_far_jump(jump_vaddr_reg[rs]);
2323}
2324
2325static void do_preload_rhash(int r) {
2326 // Don't need this for ARM. On x86, this puts the value 0xf8 into the
2327 // register. On ARM the hash can be done with a single instruction (below)
2328}
2329
2330static void do_preload_rhtbl(int ht) {
2331 emit_addimm(FP,(int)&mini_ht-(int)&dynarec_local,ht);
2332}
2333
2334static void do_rhash(int rs,int rh) {
2335 emit_andimm(rs,0xf8,rh);
2336}
2337
2338static void do_miniht_load(int ht,int rh) {
2339 assem_debug("ldr %s,[%s,%s]!\n",regname[rh],regname[ht],regname[rh]);
2340 output_w32(0xe7b00000|rd_rn_rm(rh,ht,rh));
2341}
2342
2343static void do_miniht_jump(int rs,int rh,int ht) {
2344 emit_cmp(rh,rs);
2345 emit_ldreq_indexed(ht,4,15);
2346 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
2347 if(rs!=7)
2348 emit_mov(rs,7);
2349 rs=7;
2350 #endif
2351 do_jump_vaddr(rs);
2352}
2353
2354static void do_miniht_insert(u_int return_address,int rt,int temp) {
2355 #ifndef HAVE_ARMV7
2356 emit_movimm(return_address,rt); // PC into link register
2357 add_to_linker(out,return_address,1);
2358 emit_pcreladdr(temp);
2359 emit_writeword(rt,&mini_ht[(return_address&0xFF)>>3][0]);
2360 emit_writeword(temp,&mini_ht[(return_address&0xFF)>>3][1]);
2361 #else
2362 emit_movw(return_address&0x0000FFFF,rt);
2363 add_to_linker(out,return_address,1);
2364 emit_pcreladdr(temp);
2365 emit_writeword(temp,&mini_ht[(return_address&0xFF)>>3][1]);
2366 emit_movt(return_address&0xFFFF0000,rt);
2367 emit_writeword(rt,&mini_ht[(return_address&0xFF)>>3][0]);
2368 #endif
2369}
2370
2371// CPU-architecture-specific initialization
2372static void arch_init(void)
2373{
2374 uintptr_t diff = (u_char *)&ndrc->tramp.f - (u_char *)&ndrc->tramp.ops - 8;
2375 struct tramp_insns *ops = ndrc->tramp.ops;
2376 size_t i;
2377 assert(!(diff & 3));
2378 assert(diff < 0x1000);
2379 start_tcache_write(ops, (u_char *)ops + sizeof(ndrc->tramp.ops));
2380 for (i = 0; i < ARRAY_SIZE(ndrc->tramp.ops); i++)
2381 ops[i].ldrpc = 0xe5900000 | rd_rn_rm(15,15,0) | diff; // ldr pc, [=val]
2382 end_tcache_write(ops, (u_char *)ops + sizeof(ndrc->tramp.ops));
2383}
2384
2385// vim:shiftwidth=2:expandtab