drc: try to make gte stall handling less bloaty
[pcsx_rearmed.git] / libpcsxcore / new_dynarec / assem_arm.c
... / ...
CommitLineData
1/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2 * Mupen64plus/PCSX - assem_arm.c *
3 * Copyright (C) 2009-2011 Ari64 *
4 * Copyright (C) 2010-2021 GraÅžvydas "notaz" Ignotas *
5 * *
6 * This program is free software; you can redistribute it and/or modify *
7 * it under the terms of the GNU General Public License as published by *
8 * the Free Software Foundation; either version 2 of the License, or *
9 * (at your option) any later version. *
10 * *
11 * This program is distributed in the hope that it will be useful, *
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
14 * GNU General Public License for more details. *
15 * *
16 * You should have received a copy of the GNU General Public License *
17 * along with this program; if not, write to the *
18 * Free Software Foundation, Inc., *
19 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
20 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
21
22#define FLAGLESS
23#include "../gte.h"
24#undef FLAGLESS
25#include "../gte_arm.h"
26#include "../gte_neon.h"
27#include "pcnt.h"
28#include "arm_features.h"
29
30#ifndef __MACH__
31#define CALLER_SAVE_REGS 0x100f
32#else
33#define CALLER_SAVE_REGS 0x120f
34#endif
35
36#define unused __attribute__((unused))
37
38#ifdef DRC_DBG
39#pragma GCC diagnostic ignored "-Wunused-function"
40#pragma GCC diagnostic ignored "-Wunused-variable"
41#pragma GCC diagnostic ignored "-Wunused-but-set-variable"
42#endif
43
44void indirect_jump_indexed();
45void indirect_jump();
46void do_interrupt();
47void jump_vaddr_r0();
48void jump_vaddr_r1();
49void jump_vaddr_r2();
50void jump_vaddr_r3();
51void jump_vaddr_r4();
52void jump_vaddr_r5();
53void jump_vaddr_r6();
54void jump_vaddr_r7();
55void jump_vaddr_r8();
56void jump_vaddr_r9();
57void jump_vaddr_r10();
58void jump_vaddr_r12();
59
60void * const jump_vaddr_reg[16] = {
61 jump_vaddr_r0,
62 jump_vaddr_r1,
63 jump_vaddr_r2,
64 jump_vaddr_r3,
65 jump_vaddr_r4,
66 jump_vaddr_r5,
67 jump_vaddr_r6,
68 jump_vaddr_r7,
69 jump_vaddr_r8,
70 jump_vaddr_r9,
71 jump_vaddr_r10,
72 0,
73 jump_vaddr_r12,
74 0,
75 0,
76 0
77};
78
79void invalidate_addr_r0();
80void invalidate_addr_r1();
81void invalidate_addr_r2();
82void invalidate_addr_r3();
83void invalidate_addr_r4();
84void invalidate_addr_r5();
85void invalidate_addr_r6();
86void invalidate_addr_r7();
87void invalidate_addr_r8();
88void invalidate_addr_r9();
89void invalidate_addr_r10();
90void invalidate_addr_r12();
91
92const u_int invalidate_addr_reg[16] = {
93 (int)invalidate_addr_r0,
94 (int)invalidate_addr_r1,
95 (int)invalidate_addr_r2,
96 (int)invalidate_addr_r3,
97 (int)invalidate_addr_r4,
98 (int)invalidate_addr_r5,
99 (int)invalidate_addr_r6,
100 (int)invalidate_addr_r7,
101 (int)invalidate_addr_r8,
102 (int)invalidate_addr_r9,
103 (int)invalidate_addr_r10,
104 0,
105 (int)invalidate_addr_r12,
106 0,
107 0,
108 0};
109
110static u_int needs_clear_cache[1<<(TARGET_SIZE_2-17)];
111
112/* Linker */
113
114static void set_jump_target(void *addr, void *target_)
115{
116 u_int target = (u_int)target_;
117 u_char *ptr = addr;
118 u_int *ptr2=(u_int *)ptr;
119 if(ptr[3]==0xe2) {
120 assert((target-(u_int)ptr2-8)<1024);
121 assert(((uintptr_t)addr&3)==0);
122 assert((target&3)==0);
123 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
124 //printf("target=%x addr=%p insn=%x\n",target,addr,*ptr2);
125 }
126 else if(ptr[3]==0x72) {
127 // generated by emit_jno_unlikely
128 if((target-(u_int)ptr2-8)<1024) {
129 assert(((uintptr_t)addr&3)==0);
130 assert((target&3)==0);
131 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
132 }
133 else if((target-(u_int)ptr2-8)<4096&&!((target-(u_int)ptr2-8)&15)) {
134 assert(((uintptr_t)addr&3)==0);
135 assert((target&3)==0);
136 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>4)|0xE00;
137 }
138 else *ptr2=(0x7A000000)|(((target-(u_int)ptr2-8)<<6)>>8);
139 }
140 else {
141 assert((ptr[3]&0x0e)==0xa);
142 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
143 }
144}
145
146// This optionally copies the instruction from the target of the branch into
147// the space before the branch. Works, but the difference in speed is
148// usually insignificant.
149#if 0
150static void set_jump_target_fillslot(int addr,u_int target,int copy)
151{
152 u_char *ptr=(u_char *)addr;
153 u_int *ptr2=(u_int *)ptr;
154 assert(!copy||ptr2[-1]==0xe28dd000);
155 if(ptr[3]==0xe2) {
156 assert(!copy);
157 assert((target-(u_int)ptr2-8)<4096);
158 *ptr2=(*ptr2&0xFFFFF000)|(target-(u_int)ptr2-8);
159 }
160 else {
161 assert((ptr[3]&0x0e)==0xa);
162 u_int target_insn=*(u_int *)target;
163 if((target_insn&0x0e100000)==0) { // ALU, no immediate, no flags
164 copy=0;
165 }
166 if((target_insn&0x0c100000)==0x04100000) { // Load
167 copy=0;
168 }
169 if(target_insn&0x08000000) {
170 copy=0;
171 }
172 if(copy) {
173 ptr2[-1]=target_insn;
174 target+=4;
175 }
176 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
177 }
178}
179#endif
180
181/* Literal pool */
182static void add_literal(int addr,int val)
183{
184 assert(literalcount<sizeof(literals)/sizeof(literals[0]));
185 literals[literalcount][0]=addr;
186 literals[literalcount][1]=val;
187 literalcount++;
188}
189
190// from a pointer to external jump stub (which was produced by emit_extjump2)
191// find where the jumping insn is
192static void *find_extjump_insn(void *stub)
193{
194 int *ptr=(int *)(stub+4);
195 assert((*ptr&0x0fff0000)==0x059f0000); // ldr rx, [pc, #ofs]
196 u_int offset=*ptr&0xfff;
197 void **l_ptr=(void *)ptr+offset+8;
198 return *l_ptr;
199}
200
201// find where external branch is liked to using addr of it's stub:
202// get address that insn one after stub loads (dyna_linker arg1),
203// treat it as a pointer to branch insn,
204// return addr where that branch jumps to
205static void *get_pointer(void *stub)
206{
207 //printf("get_pointer(%x)\n",(int)stub);
208 int *i_ptr=find_extjump_insn(stub);
209 assert((*i_ptr&0x0f000000)==0x0a000000);
210 return (u_char *)i_ptr+((*i_ptr<<8)>>6)+8;
211}
212
213// Find the "clean" entry point from a "dirty" entry point
214// by skipping past the call to verify_code
215static void *get_clean_addr(void *addr)
216{
217 signed int *ptr = addr;
218 #ifndef HAVE_ARMV7
219 ptr+=4;
220 #else
221 ptr+=6;
222 #endif
223 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
224 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
225 ptr++;
226 if((*ptr&0xFF000000)==0xea000000) {
227 return (char *)ptr+((*ptr<<8)>>6)+8; // follow jump
228 }
229 return ptr;
230}
231
232static int verify_dirty(const u_int *ptr)
233{
234 #ifndef HAVE_ARMV7
235 u_int offset;
236 // get from literal pool
237 assert((*ptr&0xFFFF0000)==0xe59f0000);
238 offset=*ptr&0xfff;
239 u_int source=*(u_int*)((void *)ptr+offset+8);
240 ptr++;
241 assert((*ptr&0xFFFF0000)==0xe59f0000);
242 offset=*ptr&0xfff;
243 u_int copy=*(u_int*)((void *)ptr+offset+8);
244 ptr++;
245 assert((*ptr&0xFFFF0000)==0xe59f0000);
246 offset=*ptr&0xfff;
247 u_int len=*(u_int*)((void *)ptr+offset+8);
248 ptr++;
249 ptr++;
250 #else
251 // ARMv7 movw/movt
252 assert((*ptr&0xFFF00000)==0xe3000000);
253 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
254 u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
255 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
256 ptr+=6;
257 #endif
258 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
259 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
260 //printf("verify_dirty: %x %x %x\n",source,copy,len);
261 return !memcmp((void *)source,(void *)copy,len);
262}
263
264// This doesn't necessarily find all clean entry points, just
265// guarantees that it's not dirty
266static int isclean(void *addr)
267{
268 #ifndef HAVE_ARMV7
269 u_int *ptr=((u_int *)addr)+4;
270 #else
271 u_int *ptr=((u_int *)addr)+6;
272 #endif
273 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
274 if((*ptr&0xFF000000)!=0xeb000000) return 1; // bl instruction
275 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code) return 0;
276 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_ds) return 0;
277 return 1;
278}
279
280// get source that block at addr was compiled from (host pointers)
281static void get_bounds(void *addr, u_char **start, u_char **end)
282{
283 u_int *ptr = addr;
284 #ifndef HAVE_ARMV7
285 u_int offset;
286 // get from literal pool
287 assert((*ptr&0xFFFF0000)==0xe59f0000);
288 offset=*ptr&0xfff;
289 u_int source=*(u_int*)((void *)ptr+offset+8);
290 ptr++;
291 //assert((*ptr&0xFFFF0000)==0xe59f0000);
292 //offset=*ptr&0xfff;
293 //u_int copy=*(u_int*)((void *)ptr+offset+8);
294 ptr++;
295 assert((*ptr&0xFFFF0000)==0xe59f0000);
296 offset=*ptr&0xfff;
297 u_int len=*(u_int*)((void *)ptr+offset+8);
298 ptr++;
299 ptr++;
300 #else
301 // ARMv7 movw/movt
302 assert((*ptr&0xFFF00000)==0xe3000000);
303 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
304 //u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
305 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
306 ptr+=6;
307 #endif
308 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
309 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
310 *start=(u_char *)source;
311 *end=(u_char *)source+len;
312}
313
314// Allocate a specific ARM register.
315static void alloc_arm_reg(struct regstat *cur,int i,signed char reg,int hr)
316{
317 int n;
318 int dirty=0;
319
320 // see if it's already allocated (and dealloc it)
321 for(n=0;n<HOST_REGS;n++)
322 {
323 if(n!=EXCLUDE_REG&&cur->regmap[n]==reg) {
324 dirty=(cur->dirty>>n)&1;
325 cur->regmap[n]=-1;
326 }
327 }
328
329 cur->regmap[hr]=reg;
330 cur->dirty&=~(1<<hr);
331 cur->dirty|=dirty<<hr;
332 cur->isconst&=~(1<<hr);
333}
334
335// Alloc cycle count into dedicated register
336static void alloc_cc(struct regstat *cur,int i)
337{
338 alloc_arm_reg(cur,i,CCREG,HOST_CCREG);
339}
340
341/* Assembler */
342
343static unused char regname[16][4] = {
344 "r0",
345 "r1",
346 "r2",
347 "r3",
348 "r4",
349 "r5",
350 "r6",
351 "r7",
352 "r8",
353 "r9",
354 "r10",
355 "fp",
356 "r12",
357 "sp",
358 "lr",
359 "pc"};
360
361static void output_w32(u_int word)
362{
363 *((u_int *)out)=word;
364 out+=4;
365}
366
367static u_int rd_rn_rm(u_int rd, u_int rn, u_int rm)
368{
369 assert(rd<16);
370 assert(rn<16);
371 assert(rm<16);
372 return((rn<<16)|(rd<<12)|rm);
373}
374
375static u_int rd_rn_imm_shift(u_int rd, u_int rn, u_int imm, u_int shift)
376{
377 assert(rd<16);
378 assert(rn<16);
379 assert(imm<256);
380 assert((shift&1)==0);
381 return((rn<<16)|(rd<<12)|(((32-shift)&30)<<7)|imm);
382}
383
384static u_int genimm(u_int imm,u_int *encoded)
385{
386 *encoded=0;
387 if(imm==0) return 1;
388 int i=32;
389 while(i>0)
390 {
391 if(imm<256) {
392 *encoded=((i&30)<<7)|imm;
393 return 1;
394 }
395 imm=(imm>>2)|(imm<<30);i-=2;
396 }
397 return 0;
398}
399
400static void genimm_checked(u_int imm,u_int *encoded)
401{
402 u_int ret=genimm(imm,encoded);
403 assert(ret);
404 (void)ret;
405}
406
407static u_int genjmp(u_int addr)
408{
409 if (addr < 3) return 0; // a branch that will be patched later
410 int offset = addr-(int)out-8;
411 if (offset < -33554432 || offset >= 33554432) {
412 SysPrintf("genjmp: out of range: %08x\n", offset);
413 abort();
414 return 0;
415 }
416 return ((u_int)offset>>2)&0xffffff;
417}
418
419static unused void emit_breakpoint(void)
420{
421 assem_debug("bkpt #0\n");
422 //output_w32(0xe1200070);
423 output_w32(0xe7f001f0);
424}
425
426static void emit_mov(int rs,int rt)
427{
428 assem_debug("mov %s,%s\n",regname[rt],regname[rs]);
429 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs));
430}
431
432static void emit_movs(int rs,int rt)
433{
434 assem_debug("movs %s,%s\n",regname[rt],regname[rs]);
435 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs));
436}
437
438static void emit_add(int rs1,int rs2,int rt)
439{
440 assem_debug("add %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
441 output_w32(0xe0800000|rd_rn_rm(rt,rs1,rs2));
442}
443
444static void emit_adcs(int rs1,int rs2,int rt)
445{
446 assem_debug("adcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
447 output_w32(0xe0b00000|rd_rn_rm(rt,rs1,rs2));
448}
449
450static void emit_neg(int rs, int rt)
451{
452 assem_debug("rsb %s,%s,#0\n",regname[rt],regname[rs]);
453 output_w32(0xe2600000|rd_rn_rm(rt,rs,0));
454}
455
456static void emit_sub(int rs1,int rs2,int rt)
457{
458 assem_debug("sub %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
459 output_w32(0xe0400000|rd_rn_rm(rt,rs1,rs2));
460}
461
462static void emit_zeroreg(int rt)
463{
464 assem_debug("mov %s,#0\n",regname[rt]);
465 output_w32(0xe3a00000|rd_rn_rm(rt,0,0));
466}
467
468static void emit_loadlp(u_int imm,u_int rt)
469{
470 add_literal((int)out,imm);
471 assem_debug("ldr %s,pc+? [=%x]\n",regname[rt],imm);
472 output_w32(0xe5900000|rd_rn_rm(rt,15,0));
473}
474
475static void emit_movw(u_int imm,u_int rt)
476{
477 assert(imm<65536);
478 assem_debug("movw %s,#%d (0x%x)\n",regname[rt],imm,imm);
479 output_w32(0xe3000000|rd_rn_rm(rt,0,0)|(imm&0xfff)|((imm<<4)&0xf0000));
480}
481
482static void emit_movt(u_int imm,u_int rt)
483{
484 assem_debug("movt %s,#%d (0x%x)\n",regname[rt],imm&0xffff0000,imm&0xffff0000);
485 output_w32(0xe3400000|rd_rn_rm(rt,0,0)|((imm>>16)&0xfff)|((imm>>12)&0xf0000));
486}
487
488static void emit_movimm(u_int imm,u_int rt)
489{
490 u_int armval;
491 if(genimm(imm,&armval)) {
492 assem_debug("mov %s,#%d\n",regname[rt],imm);
493 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
494 }else if(genimm(~imm,&armval)) {
495 assem_debug("mvn %s,#%d\n",regname[rt],imm);
496 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
497 }else if(imm<65536) {
498 #ifndef HAVE_ARMV7
499 assem_debug("mov %s,#%d\n",regname[rt],imm&0xFF00);
500 output_w32(0xe3a00000|rd_rn_imm_shift(rt,0,imm>>8,8));
501 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
502 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
503 #else
504 emit_movw(imm,rt);
505 #endif
506 }else{
507 #ifndef HAVE_ARMV7
508 emit_loadlp(imm,rt);
509 #else
510 emit_movw(imm&0x0000FFFF,rt);
511 emit_movt(imm&0xFFFF0000,rt);
512 #endif
513 }
514}
515
516static void emit_pcreladdr(u_int rt)
517{
518 assem_debug("add %s,pc,#?\n",regname[rt]);
519 output_w32(0xe2800000|rd_rn_rm(rt,15,0));
520}
521
522static void emit_loadreg(int r, int hr)
523{
524 if(r&64) {
525 SysPrintf("64bit load in 32bit mode!\n");
526 assert(0);
527 return;
528 }
529 if((r&63)==0)
530 emit_zeroreg(hr);
531 else {
532 int addr = (int)&psxRegs.GPR.r[r];
533 switch (r) {
534 //case HIREG: addr = &hi; break;
535 //case LOREG: addr = &lo; break;
536 case CCREG: addr = (int)&cycle_count; break;
537 case CSREG: addr = (int)&Status; break;
538 case INVCP: addr = (int)&invc_ptr; break;
539 default: assert(r < 34); break;
540 }
541 u_int offset = addr-(u_int)&dynarec_local;
542 assert(offset<4096);
543 assem_debug("ldr %s,fp+%d\n",regname[hr],offset);
544 output_w32(0xe5900000|rd_rn_rm(hr,FP,0)|offset);
545 }
546}
547
548static void emit_storereg(int r, int hr)
549{
550 if(r&64) {
551 SysPrintf("64bit store in 32bit mode!\n");
552 assert(0);
553 return;
554 }
555 int addr = (int)&psxRegs.GPR.r[r];
556 switch (r) {
557 //case HIREG: addr = &hi; break;
558 //case LOREG: addr = &lo; break;
559 case CCREG: addr = (int)&cycle_count; break;
560 default: assert(r < 34); break;
561 }
562 u_int offset = addr-(u_int)&dynarec_local;
563 assert(offset<4096);
564 assem_debug("str %s,fp+%d\n",regname[hr],offset);
565 output_w32(0xe5800000|rd_rn_rm(hr,FP,0)|offset);
566}
567
568static void emit_test(int rs, int rt)
569{
570 assem_debug("tst %s,%s\n",regname[rs],regname[rt]);
571 output_w32(0xe1100000|rd_rn_rm(0,rs,rt));
572}
573
574static void emit_testimm(int rs,int imm)
575{
576 u_int armval;
577 assem_debug("tst %s,#%d\n",regname[rs],imm);
578 genimm_checked(imm,&armval);
579 output_w32(0xe3100000|rd_rn_rm(0,rs,0)|armval);
580}
581
582static void emit_testeqimm(int rs,int imm)
583{
584 u_int armval;
585 assem_debug("tsteq %s,$%d\n",regname[rs],imm);
586 genimm_checked(imm,&armval);
587 output_w32(0x03100000|rd_rn_rm(0,rs,0)|armval);
588}
589
590static void emit_not(int rs,int rt)
591{
592 assem_debug("mvn %s,%s\n",regname[rt],regname[rs]);
593 output_w32(0xe1e00000|rd_rn_rm(rt,0,rs));
594}
595
596static void emit_and(u_int rs1,u_int rs2,u_int rt)
597{
598 assem_debug("and %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
599 output_w32(0xe0000000|rd_rn_rm(rt,rs1,rs2));
600}
601
602static void emit_or(u_int rs1,u_int rs2,u_int rt)
603{
604 assem_debug("orr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
605 output_w32(0xe1800000|rd_rn_rm(rt,rs1,rs2));
606}
607
608static void emit_orrshl_imm(u_int rs,u_int imm,u_int rt)
609{
610 assert(rs<16);
611 assert(rt<16);
612 assert(imm<32);
613 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs],imm);
614 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|(imm<<7));
615}
616
617static void emit_orrshr_imm(u_int rs,u_int imm,u_int rt)
618{
619 assert(rs<16);
620 assert(rt<16);
621 assert(imm<32);
622 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs],imm);
623 output_w32(0xe1800020|rd_rn_rm(rt,rt,rs)|(imm<<7));
624}
625
626static void emit_xor(u_int rs1,u_int rs2,u_int rt)
627{
628 assem_debug("eor %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
629 output_w32(0xe0200000|rd_rn_rm(rt,rs1,rs2));
630}
631
632static void emit_xorsar_imm(u_int rs1,u_int rs2,u_int imm,u_int rt)
633{
634 assem_debug("eor %s,%s,%s,asr #%d\n",regname[rt],regname[rs1],regname[rs2],imm);
635 output_w32(0xe0200040|rd_rn_rm(rt,rs1,rs2)|(imm<<7));
636}
637
638static void emit_addimm(u_int rs,int imm,u_int rt)
639{
640 assert(rs<16);
641 assert(rt<16);
642 if(imm!=0) {
643 u_int armval;
644 if(genimm(imm,&armval)) {
645 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],imm);
646 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
647 }else if(genimm(-imm,&armval)) {
648 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],-imm);
649 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
650 #ifdef HAVE_ARMV7
651 }else if(rt!=rs&&(u_int)imm<65536) {
652 emit_movw(imm&0x0000ffff,rt);
653 emit_add(rs,rt,rt);
654 }else if(rt!=rs&&(u_int)-imm<65536) {
655 emit_movw(-imm&0x0000ffff,rt);
656 emit_sub(rs,rt,rt);
657 #endif
658 }else if((u_int)-imm<65536) {
659 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],(-imm)&0xFF00);
660 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
661 output_w32(0xe2400000|rd_rn_imm_shift(rt,rs,(-imm)>>8,8));
662 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
663 }else {
664 do {
665 int shift = (ffs(imm) - 1) & ~1;
666 int imm8 = imm & (0xff << shift);
667 genimm_checked(imm8,&armval);
668 assem_debug("add %s,%s,#0x%x\n",regname[rt],regname[rs],imm8);
669 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
670 rs = rt;
671 imm &= ~imm8;
672 }
673 while (imm != 0);
674 }
675 }
676 else if(rs!=rt) emit_mov(rs,rt);
677}
678
679static void emit_addimm_and_set_flags(int imm,int rt)
680{
681 assert(imm>-65536&&imm<65536);
682 u_int armval;
683 if(genimm(imm,&armval)) {
684 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm);
685 output_w32(0xe2900000|rd_rn_rm(rt,rt,0)|armval);
686 }else if(genimm(-imm,&armval)) {
687 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],imm);
688 output_w32(0xe2500000|rd_rn_rm(rt,rt,0)|armval);
689 }else if(imm<0) {
690 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF00);
691 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
692 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)>>8,8));
693 output_w32(0xe2500000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
694 }else{
695 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF00);
696 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
697 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm>>8,8));
698 output_w32(0xe2900000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
699 }
700}
701
702static void emit_addimm_no_flags(u_int imm,u_int rt)
703{
704 emit_addimm(rt,imm,rt);
705}
706
707static void emit_addnop(u_int r)
708{
709 assert(r<16);
710 assem_debug("add %s,%s,#0 (nop)\n",regname[r],regname[r]);
711 output_w32(0xe2800000|rd_rn_rm(r,r,0));
712}
713
714static void emit_andimm(int rs,int imm,int rt)
715{
716 u_int armval;
717 if(imm==0) {
718 emit_zeroreg(rt);
719 }else if(genimm(imm,&armval)) {
720 assem_debug("and %s,%s,#%d\n",regname[rt],regname[rs],imm);
721 output_w32(0xe2000000|rd_rn_rm(rt,rs,0)|armval);
722 }else if(genimm(~imm,&armval)) {
723 assem_debug("bic %s,%s,#%d\n",regname[rt],regname[rs],imm);
724 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|armval);
725 }else if(imm==65535) {
726 #ifndef HAVE_ARMV6
727 assem_debug("bic %s,%s,#FF000000\n",regname[rt],regname[rs]);
728 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|0x4FF);
729 assem_debug("bic %s,%s,#00FF0000\n",regname[rt],regname[rt]);
730 output_w32(0xe3c00000|rd_rn_rm(rt,rt,0)|0x8FF);
731 #else
732 assem_debug("uxth %s,%s\n",regname[rt],regname[rs]);
733 output_w32(0xe6ff0070|rd_rn_rm(rt,0,rs));
734 #endif
735 }else{
736 assert(imm>0&&imm<65535);
737 #ifndef HAVE_ARMV7
738 assem_debug("mov r14,#%d\n",imm&0xFF00);
739 output_w32(0xe3a00000|rd_rn_imm_shift(HOST_TEMPREG,0,imm>>8,8));
740 assem_debug("add r14,r14,#%d\n",imm&0xFF);
741 output_w32(0xe2800000|rd_rn_imm_shift(HOST_TEMPREG,HOST_TEMPREG,imm&0xff,0));
742 #else
743 emit_movw(imm,HOST_TEMPREG);
744 #endif
745 assem_debug("and %s,%s,r14\n",regname[rt],regname[rs]);
746 output_w32(0xe0000000|rd_rn_rm(rt,rs,HOST_TEMPREG));
747 }
748}
749
750static void emit_orimm(int rs,int imm,int rt)
751{
752 u_int armval;
753 if(imm==0) {
754 if(rs!=rt) emit_mov(rs,rt);
755 }else if(genimm(imm,&armval)) {
756 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm);
757 output_w32(0xe3800000|rd_rn_rm(rt,rs,0)|armval);
758 }else{
759 assert(imm>0&&imm<65536);
760 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
761 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
762 output_w32(0xe3800000|rd_rn_imm_shift(rt,rs,imm>>8,8));
763 output_w32(0xe3800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
764 }
765}
766
767static void emit_xorimm(int rs,int imm,int rt)
768{
769 u_int armval;
770 if(imm==0) {
771 if(rs!=rt) emit_mov(rs,rt);
772 }else if(genimm(imm,&armval)) {
773 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm);
774 output_w32(0xe2200000|rd_rn_rm(rt,rs,0)|armval);
775 }else{
776 assert(imm>0&&imm<65536);
777 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
778 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
779 output_w32(0xe2200000|rd_rn_imm_shift(rt,rs,imm>>8,8));
780 output_w32(0xe2200000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
781 }
782}
783
784static void emit_shlimm(int rs,u_int imm,int rt)
785{
786 assert(imm>0);
787 assert(imm<32);
788 //if(imm==1) ...
789 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
790 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
791}
792
793static void emit_lsls_imm(int rs,int imm,int rt)
794{
795 assert(imm>0);
796 assert(imm<32);
797 assem_debug("lsls %s,%s,#%d\n",regname[rt],regname[rs],imm);
798 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs)|(imm<<7));
799}
800
801static unused void emit_lslpls_imm(int rs,int imm,int rt)
802{
803 assert(imm>0);
804 assert(imm<32);
805 assem_debug("lslpls %s,%s,#%d\n",regname[rt],regname[rs],imm);
806 output_w32(0x51b00000|rd_rn_rm(rt,0,rs)|(imm<<7));
807}
808
809static void emit_shrimm(int rs,u_int imm,int rt)
810{
811 assert(imm>0);
812 assert(imm<32);
813 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
814 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
815}
816
817static void emit_sarimm(int rs,u_int imm,int rt)
818{
819 assert(imm>0);
820 assert(imm<32);
821 assem_debug("asr %s,%s,#%d\n",regname[rt],regname[rs],imm);
822 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x40|(imm<<7));
823}
824
825static void emit_rorimm(int rs,u_int imm,int rt)
826{
827 assert(imm>0);
828 assert(imm<32);
829 assem_debug("ror %s,%s,#%d\n",regname[rt],regname[rs],imm);
830 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x60|(imm<<7));
831}
832
833static void emit_signextend16(int rs,int rt)
834{
835 #ifndef HAVE_ARMV6
836 emit_shlimm(rs,16,rt);
837 emit_sarimm(rt,16,rt);
838 #else
839 assem_debug("sxth %s,%s\n",regname[rt],regname[rs]);
840 output_w32(0xe6bf0070|rd_rn_rm(rt,0,rs));
841 #endif
842}
843
844static void emit_signextend8(int rs,int rt)
845{
846 #ifndef HAVE_ARMV6
847 emit_shlimm(rs,24,rt);
848 emit_sarimm(rt,24,rt);
849 #else
850 assem_debug("sxtb %s,%s\n",regname[rt],regname[rs]);
851 output_w32(0xe6af0070|rd_rn_rm(rt,0,rs));
852 #endif
853}
854
855static void emit_shl(u_int rs,u_int shift,u_int rt)
856{
857 assert(rs<16);
858 assert(rt<16);
859 assert(shift<16);
860 //if(imm==1) ...
861 assem_debug("lsl %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
862 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x10|(shift<<8));
863}
864
865static void emit_shr(u_int rs,u_int shift,u_int rt)
866{
867 assert(rs<16);
868 assert(rt<16);
869 assert(shift<16);
870 assem_debug("lsr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
871 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x30|(shift<<8));
872}
873
874static void emit_sar(u_int rs,u_int shift,u_int rt)
875{
876 assert(rs<16);
877 assert(rt<16);
878 assert(shift<16);
879 assem_debug("asr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
880 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x50|(shift<<8));
881}
882
883static unused void emit_orrshl(u_int rs,u_int shift,u_int rt)
884{
885 assert(rs<16);
886 assert(rt<16);
887 assert(shift<16);
888 assem_debug("orr %s,%s,%s,lsl %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
889 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x10|(shift<<8));
890}
891
892static unused void emit_orrshr(u_int rs,u_int shift,u_int rt)
893{
894 assert(rs<16);
895 assert(rt<16);
896 assert(shift<16);
897 assem_debug("orr %s,%s,%s,lsr %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
898 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x30|(shift<<8));
899}
900
901static void emit_cmpimm(int rs,int imm)
902{
903 u_int armval;
904 if(genimm(imm,&armval)) {
905 assem_debug("cmp %s,#%d\n",regname[rs],imm);
906 output_w32(0xe3500000|rd_rn_rm(0,rs,0)|armval);
907 }else if(genimm(-imm,&armval)) {
908 assem_debug("cmn %s,#%d\n",regname[rs],imm);
909 output_w32(0xe3700000|rd_rn_rm(0,rs,0)|armval);
910 }else if(imm>0) {
911 assert(imm<65536);
912 emit_movimm(imm,HOST_TEMPREG);
913 assem_debug("cmp %s,r14\n",regname[rs]);
914 output_w32(0xe1500000|rd_rn_rm(0,rs,HOST_TEMPREG));
915 }else{
916 assert(imm>-65536);
917 emit_movimm(-imm,HOST_TEMPREG);
918 assem_debug("cmn %s,r14\n",regname[rs]);
919 output_w32(0xe1700000|rd_rn_rm(0,rs,HOST_TEMPREG));
920 }
921}
922
923static void emit_cmovne_imm(int imm,int rt)
924{
925 assem_debug("movne %s,#%d\n",regname[rt],imm);
926 u_int armval;
927 genimm_checked(imm,&armval);
928 output_w32(0x13a00000|rd_rn_rm(rt,0,0)|armval);
929}
930
931static void emit_cmovl_imm(int imm,int rt)
932{
933 assem_debug("movlt %s,#%d\n",regname[rt],imm);
934 u_int armval;
935 genimm_checked(imm,&armval);
936 output_w32(0xb3a00000|rd_rn_rm(rt,0,0)|armval);
937}
938
939static void emit_cmovb_imm(int imm,int rt)
940{
941 assem_debug("movcc %s,#%d\n",regname[rt],imm);
942 u_int armval;
943 genimm_checked(imm,&armval);
944 output_w32(0x33a00000|rd_rn_rm(rt,0,0)|armval);
945}
946
947static void emit_cmovae_imm(int imm,int rt)
948{
949 assem_debug("movcs %s,#%d\n",regname[rt],imm);
950 u_int armval;
951 genimm_checked(imm,&armval);
952 output_w32(0x23a00000|rd_rn_rm(rt,0,0)|armval);
953}
954
955static void emit_cmovne_reg(int rs,int rt)
956{
957 assem_debug("movne %s,%s\n",regname[rt],regname[rs]);
958 output_w32(0x11a00000|rd_rn_rm(rt,0,rs));
959}
960
961static void emit_cmovl_reg(int rs,int rt)
962{
963 assem_debug("movlt %s,%s\n",regname[rt],regname[rs]);
964 output_w32(0xb1a00000|rd_rn_rm(rt,0,rs));
965}
966
967static void emit_cmovb_reg(int rs,int rt)
968{
969 assem_debug("movcc %s,%s\n",regname[rt],regname[rs]);
970 output_w32(0x31a00000|rd_rn_rm(rt,0,rs));
971}
972
973static void emit_cmovs_reg(int rs,int rt)
974{
975 assem_debug("movmi %s,%s\n",regname[rt],regname[rs]);
976 output_w32(0x41a00000|rd_rn_rm(rt,0,rs));
977}
978
979static void emit_slti32(int rs,int imm,int rt)
980{
981 if(rs!=rt) emit_zeroreg(rt);
982 emit_cmpimm(rs,imm);
983 if(rs==rt) emit_movimm(0,rt);
984 emit_cmovl_imm(1,rt);
985}
986
987static void emit_sltiu32(int rs,int imm,int rt)
988{
989 if(rs!=rt) emit_zeroreg(rt);
990 emit_cmpimm(rs,imm);
991 if(rs==rt) emit_movimm(0,rt);
992 emit_cmovb_imm(1,rt);
993}
994
995static void emit_cmp(int rs,int rt)
996{
997 assem_debug("cmp %s,%s\n",regname[rs],regname[rt]);
998 output_w32(0xe1500000|rd_rn_rm(0,rs,rt));
999}
1000
1001static void emit_set_gz32(int rs, int rt)
1002{
1003 //assem_debug("set_gz32\n");
1004 emit_cmpimm(rs,1);
1005 emit_movimm(1,rt);
1006 emit_cmovl_imm(0,rt);
1007}
1008
1009static void emit_set_nz32(int rs, int rt)
1010{
1011 //assem_debug("set_nz32\n");
1012 if(rs!=rt) emit_movs(rs,rt);
1013 else emit_test(rs,rs);
1014 emit_cmovne_imm(1,rt);
1015}
1016
1017static void emit_set_if_less32(int rs1, int rs2, int rt)
1018{
1019 //assem_debug("set if less (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1020 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1021 emit_cmp(rs1,rs2);
1022 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1023 emit_cmovl_imm(1,rt);
1024}
1025
1026static void emit_set_if_carry32(int rs1, int rs2, int rt)
1027{
1028 //assem_debug("set if carry (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1029 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1030 emit_cmp(rs1,rs2);
1031 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1032 emit_cmovb_imm(1,rt);
1033}
1034
1035static int can_jump_or_call(const void *a)
1036{
1037 intptr_t offset = (u_char *)a - out - 8;
1038 return (-33554432 <= offset && offset < 33554432);
1039}
1040
1041static void emit_call(const void *a_)
1042{
1043 int a = (int)a_;
1044 assem_debug("bl %x (%x+%x)%s\n",a,(int)out,a-(int)out-8,func_name(a_));
1045 u_int offset=genjmp(a);
1046 output_w32(0xeb000000|offset);
1047}
1048
1049static void emit_jmp(const void *a_)
1050{
1051 int a = (int)a_;
1052 assem_debug("b %x (%x+%x)%s\n",a,(int)out,a-(int)out-8,func_name(a_));
1053 u_int offset=genjmp(a);
1054 output_w32(0xea000000|offset);
1055}
1056
1057static void emit_jne(const void *a_)
1058{
1059 int a = (int)a_;
1060 assem_debug("bne %x\n",a);
1061 u_int offset=genjmp(a);
1062 output_w32(0x1a000000|offset);
1063}
1064
1065static void emit_jeq(const void *a_)
1066{
1067 int a = (int)a_;
1068 assem_debug("beq %x\n",a);
1069 u_int offset=genjmp(a);
1070 output_w32(0x0a000000|offset);
1071}
1072
1073static void emit_js(const void *a_)
1074{
1075 int a = (int)a_;
1076 assem_debug("bmi %x\n",a);
1077 u_int offset=genjmp(a);
1078 output_w32(0x4a000000|offset);
1079}
1080
1081static void emit_jns(const void *a_)
1082{
1083 int a = (int)a_;
1084 assem_debug("bpl %x\n",a);
1085 u_int offset=genjmp(a);
1086 output_w32(0x5a000000|offset);
1087}
1088
1089static void emit_jl(const void *a_)
1090{
1091 int a = (int)a_;
1092 assem_debug("blt %x\n",a);
1093 u_int offset=genjmp(a);
1094 output_w32(0xba000000|offset);
1095}
1096
1097static void emit_jge(const void *a_)
1098{
1099 int a = (int)a_;
1100 assem_debug("bge %x\n",a);
1101 u_int offset=genjmp(a);
1102 output_w32(0xaa000000|offset);
1103}
1104
1105static void emit_jno(const void *a_)
1106{
1107 int a = (int)a_;
1108 assem_debug("bvc %x\n",a);
1109 u_int offset=genjmp(a);
1110 output_w32(0x7a000000|offset);
1111}
1112
1113static void emit_jc(const void *a_)
1114{
1115 int a = (int)a_;
1116 assem_debug("bcs %x\n",a);
1117 u_int offset=genjmp(a);
1118 output_w32(0x2a000000|offset);
1119}
1120
1121static void emit_jcc(const void *a_)
1122{
1123 int a = (int)a_;
1124 assem_debug("bcc %x\n",a);
1125 u_int offset=genjmp(a);
1126 output_w32(0x3a000000|offset);
1127}
1128
1129static unused void emit_callreg(u_int r)
1130{
1131 assert(r<15);
1132 assem_debug("blx %s\n",regname[r]);
1133 output_w32(0xe12fff30|r);
1134}
1135
1136static void emit_jmpreg(u_int r)
1137{
1138 assem_debug("mov pc,%s\n",regname[r]);
1139 output_w32(0xe1a00000|rd_rn_rm(15,0,r));
1140}
1141
1142static void emit_ret(void)
1143{
1144 emit_jmpreg(14);
1145}
1146
1147static void emit_readword_indexed(int offset, int rs, int rt)
1148{
1149 assert(offset>-4096&&offset<4096);
1150 assem_debug("ldr %s,%s+%d\n",regname[rt],regname[rs],offset);
1151 if(offset>=0) {
1152 output_w32(0xe5900000|rd_rn_rm(rt,rs,0)|offset);
1153 }else{
1154 output_w32(0xe5100000|rd_rn_rm(rt,rs,0)|(-offset));
1155 }
1156}
1157
1158static void emit_readword_dualindexedx4(int rs1, int rs2, int rt)
1159{
1160 assem_debug("ldr %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1161 output_w32(0xe7900000|rd_rn_rm(rt,rs1,rs2)|0x100);
1162}
1163
1164static void emit_ldrcc_dualindexed(int rs1, int rs2, int rt)
1165{
1166 assem_debug("ldrcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1167 output_w32(0x37900000|rd_rn_rm(rt,rs1,rs2));
1168}
1169
1170static void emit_ldrccb_dualindexed(int rs1, int rs2, int rt)
1171{
1172 assem_debug("ldrccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1173 output_w32(0x37d00000|rd_rn_rm(rt,rs1,rs2));
1174}
1175
1176static void emit_ldrccsb_dualindexed(int rs1, int rs2, int rt)
1177{
1178 assem_debug("ldrccsb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1179 output_w32(0x319000d0|rd_rn_rm(rt,rs1,rs2));
1180}
1181
1182static void emit_ldrcch_dualindexed(int rs1, int rs2, int rt)
1183{
1184 assem_debug("ldrcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1185 output_w32(0x319000b0|rd_rn_rm(rt,rs1,rs2));
1186}
1187
1188static void emit_ldrccsh_dualindexed(int rs1, int rs2, int rt)
1189{
1190 assem_debug("ldrccsh %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1191 output_w32(0x319000f0|rd_rn_rm(rt,rs1,rs2));
1192}
1193
1194static void emit_movsbl_indexed(int offset, int rs, int rt)
1195{
1196 assert(offset>-256&&offset<256);
1197 assem_debug("ldrsb %s,%s+%d\n",regname[rt],regname[rs],offset);
1198 if(offset>=0) {
1199 output_w32(0xe1d000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1200 }else{
1201 output_w32(0xe15000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1202 }
1203}
1204
1205static void emit_movswl_indexed(int offset, int rs, int rt)
1206{
1207 assert(offset>-256&&offset<256);
1208 assem_debug("ldrsh %s,%s+%d\n",regname[rt],regname[rs],offset);
1209 if(offset>=0) {
1210 output_w32(0xe1d000f0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1211 }else{
1212 output_w32(0xe15000f0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1213 }
1214}
1215
1216static void emit_movzbl_indexed(int offset, int rs, int rt)
1217{
1218 assert(offset>-4096&&offset<4096);
1219 assem_debug("ldrb %s,%s+%d\n",regname[rt],regname[rs],offset);
1220 if(offset>=0) {
1221 output_w32(0xe5d00000|rd_rn_rm(rt,rs,0)|offset);
1222 }else{
1223 output_w32(0xe5500000|rd_rn_rm(rt,rs,0)|(-offset));
1224 }
1225}
1226
1227static void emit_movzwl_indexed(int offset, int rs, int rt)
1228{
1229 assert(offset>-256&&offset<256);
1230 assem_debug("ldrh %s,%s+%d\n",regname[rt],regname[rs],offset);
1231 if(offset>=0) {
1232 output_w32(0xe1d000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1233 }else{
1234 output_w32(0xe15000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1235 }
1236}
1237
1238static void emit_ldrd(int offset, int rs, int rt)
1239{
1240 assert(offset>-256&&offset<256);
1241 assem_debug("ldrd %s,%s+%d\n",regname[rt],regname[rs],offset);
1242 if(offset>=0) {
1243 output_w32(0xe1c000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1244 }else{
1245 output_w32(0xe14000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1246 }
1247}
1248
1249static void emit_readword(void *addr, int rt)
1250{
1251 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
1252 assert(offset<4096);
1253 assem_debug("ldr %s,fp+%d\n",regname[rt],offset);
1254 output_w32(0xe5900000|rd_rn_rm(rt,FP,0)|offset);
1255}
1256
1257static void emit_writeword_indexed(int rt, int offset, int rs)
1258{
1259 assert(offset>-4096&&offset<4096);
1260 assem_debug("str %s,%s+%d\n",regname[rt],regname[rs],offset);
1261 if(offset>=0) {
1262 output_w32(0xe5800000|rd_rn_rm(rt,rs,0)|offset);
1263 }else{
1264 output_w32(0xe5000000|rd_rn_rm(rt,rs,0)|(-offset));
1265 }
1266}
1267
1268static void emit_writehword_indexed(int rt, int offset, int rs)
1269{
1270 assert(offset>-256&&offset<256);
1271 assem_debug("strh %s,%s+%d\n",regname[rt],regname[rs],offset);
1272 if(offset>=0) {
1273 output_w32(0xe1c000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1274 }else{
1275 output_w32(0xe14000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1276 }
1277}
1278
1279static void emit_writebyte_indexed(int rt, int offset, int rs)
1280{
1281 assert(offset>-4096&&offset<4096);
1282 assem_debug("strb %s,%s+%d\n",regname[rt],regname[rs],offset);
1283 if(offset>=0) {
1284 output_w32(0xe5c00000|rd_rn_rm(rt,rs,0)|offset);
1285 }else{
1286 output_w32(0xe5400000|rd_rn_rm(rt,rs,0)|(-offset));
1287 }
1288}
1289
1290static void emit_strcc_dualindexed(int rs1, int rs2, int rt)
1291{
1292 assem_debug("strcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1293 output_w32(0x37800000|rd_rn_rm(rt,rs1,rs2));
1294}
1295
1296static void emit_strccb_dualindexed(int rs1, int rs2, int rt)
1297{
1298 assem_debug("strccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1299 output_w32(0x37c00000|rd_rn_rm(rt,rs1,rs2));
1300}
1301
1302static void emit_strcch_dualindexed(int rs1, int rs2, int rt)
1303{
1304 assem_debug("strcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1305 output_w32(0x318000b0|rd_rn_rm(rt,rs1,rs2));
1306}
1307
1308static void emit_writeword(int rt, void *addr)
1309{
1310 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
1311 assert(offset<4096);
1312 assem_debug("str %s,fp+%d\n",regname[rt],offset);
1313 output_w32(0xe5800000|rd_rn_rm(rt,FP,0)|offset);
1314}
1315
1316static void emit_umull(u_int rs1, u_int rs2, u_int hi, u_int lo)
1317{
1318 assem_debug("umull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
1319 assert(rs1<16);
1320 assert(rs2<16);
1321 assert(hi<16);
1322 assert(lo<16);
1323 output_w32(0xe0800090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
1324}
1325
1326static void emit_smull(u_int rs1, u_int rs2, u_int hi, u_int lo)
1327{
1328 assem_debug("smull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
1329 assert(rs1<16);
1330 assert(rs2<16);
1331 assert(hi<16);
1332 assert(lo<16);
1333 output_w32(0xe0c00090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
1334}
1335
1336static void emit_clz(int rs,int rt)
1337{
1338 assem_debug("clz %s,%s\n",regname[rt],regname[rs]);
1339 output_w32(0xe16f0f10|rd_rn_rm(rt,0,rs));
1340}
1341
1342static void emit_subcs(int rs1,int rs2,int rt)
1343{
1344 assem_debug("subcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1345 output_w32(0x20400000|rd_rn_rm(rt,rs1,rs2));
1346}
1347
1348static void emit_shrcc_imm(int rs,u_int imm,int rt)
1349{
1350 assert(imm>0);
1351 assert(imm<32);
1352 assem_debug("lsrcc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1353 output_w32(0x31a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
1354}
1355
1356static void emit_shrne_imm(int rs,u_int imm,int rt)
1357{
1358 assert(imm>0);
1359 assert(imm<32);
1360 assem_debug("lsrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
1361 output_w32(0x11a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
1362}
1363
1364static void emit_negmi(int rs, int rt)
1365{
1366 assem_debug("rsbmi %s,%s,#0\n",regname[rt],regname[rs]);
1367 output_w32(0x42600000|rd_rn_rm(rt,rs,0));
1368}
1369
1370static void emit_negsmi(int rs, int rt)
1371{
1372 assem_debug("rsbsmi %s,%s,#0\n",regname[rt],regname[rs]);
1373 output_w32(0x42700000|rd_rn_rm(rt,rs,0));
1374}
1375
1376static void emit_bic_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
1377{
1378 assem_debug("bic %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
1379 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
1380}
1381
1382static void emit_bic_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
1383{
1384 assem_debug("bic %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
1385 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
1386}
1387
1388static void emit_teq(int rs, int rt)
1389{
1390 assem_debug("teq %s,%s\n",regname[rs],regname[rt]);
1391 output_w32(0xe1300000|rd_rn_rm(0,rs,rt));
1392}
1393
1394static unused void emit_rsbimm(int rs, int imm, int rt)
1395{
1396 u_int armval;
1397 genimm_checked(imm,&armval);
1398 assem_debug("rsb %s,%s,#%d\n",regname[rt],regname[rs],imm);
1399 output_w32(0xe2600000|rd_rn_rm(rt,rs,0)|armval);
1400}
1401
1402// Conditionally select one of two immediates, optimizing for small code size
1403// This will only be called if HAVE_CMOV_IMM is defined
1404static void emit_cmov2imm_e_ne_compact(int imm1,int imm2,u_int rt)
1405{
1406 u_int armval;
1407 if(genimm(imm2-imm1,&armval)) {
1408 emit_movimm(imm1,rt);
1409 assem_debug("addne %s,%s,#%d\n",regname[rt],regname[rt],imm2-imm1);
1410 output_w32(0x12800000|rd_rn_rm(rt,rt,0)|armval);
1411 }else if(genimm(imm1-imm2,&armval)) {
1412 emit_movimm(imm1,rt);
1413 assem_debug("subne %s,%s,#%d\n",regname[rt],regname[rt],imm1-imm2);
1414 output_w32(0x12400000|rd_rn_rm(rt,rt,0)|armval);
1415 }
1416 else {
1417 #ifndef HAVE_ARMV7
1418 emit_movimm(imm1,rt);
1419 add_literal((int)out,imm2);
1420 assem_debug("ldrne %s,pc+? [=%x]\n",regname[rt],imm2);
1421 output_w32(0x15900000|rd_rn_rm(rt,15,0));
1422 #else
1423 emit_movw(imm1&0x0000FFFF,rt);
1424 if((imm1&0xFFFF)!=(imm2&0xFFFF)) {
1425 assem_debug("movwne %s,#%d (0x%x)\n",regname[rt],imm2&0xFFFF,imm2&0xFFFF);
1426 output_w32(0x13000000|rd_rn_rm(rt,0,0)|(imm2&0xfff)|((imm2<<4)&0xf0000));
1427 }
1428 emit_movt(imm1&0xFFFF0000,rt);
1429 if((imm1&0xFFFF0000)!=(imm2&0xFFFF0000)) {
1430 assem_debug("movtne %s,#%d (0x%x)\n",regname[rt],imm2&0xffff0000,imm2&0xffff0000);
1431 output_w32(0x13400000|rd_rn_rm(rt,0,0)|((imm2>>16)&0xfff)|((imm2>>12)&0xf0000));
1432 }
1433 #endif
1434 }
1435}
1436
1437// special case for checking invalid_code
1438static void emit_cmpmem_indexedsr12_reg(int base,int r,int imm)
1439{
1440 assert(imm<128&&imm>=0);
1441 assert(r>=0&&r<16);
1442 assem_debug("ldrb lr,%s,%s lsr #12\n",regname[base],regname[r]);
1443 output_w32(0xe7d00000|rd_rn_rm(HOST_TEMPREG,base,r)|0x620);
1444 emit_cmpimm(HOST_TEMPREG,imm);
1445}
1446
1447static void emit_callne(int a)
1448{
1449 assem_debug("blne %x\n",a);
1450 u_int offset=genjmp(a);
1451 output_w32(0x1b000000|offset);
1452}
1453
1454// Used to preload hash table entries
1455static unused void emit_prefetchreg(int r)
1456{
1457 assem_debug("pld %s\n",regname[r]);
1458 output_w32(0xf5d0f000|rd_rn_rm(0,r,0));
1459}
1460
1461// Special case for mini_ht
1462static void emit_ldreq_indexed(int rs, u_int offset, int rt)
1463{
1464 assert(offset<4096);
1465 assem_debug("ldreq %s,[%s, #%d]\n",regname[rt],regname[rs],offset);
1466 output_w32(0x05900000|rd_rn_rm(rt,rs,0)|offset);
1467}
1468
1469static void emit_orrne_imm(int rs,int imm,int rt)
1470{
1471 u_int armval;
1472 genimm_checked(imm,&armval);
1473 assem_debug("orrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
1474 output_w32(0x13800000|rd_rn_rm(rt,rs,0)|armval);
1475}
1476
1477static void emit_andne_imm(int rs,int imm,int rt)
1478{
1479 u_int armval;
1480 genimm_checked(imm,&armval);
1481 assem_debug("andne %s,%s,#%d\n",regname[rt],regname[rs],imm);
1482 output_w32(0x12000000|rd_rn_rm(rt,rs,0)|armval);
1483}
1484
1485static unused void emit_addpl_imm(int rs,int imm,int rt)
1486{
1487 u_int armval;
1488 genimm_checked(imm,&armval);
1489 assem_debug("addpl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1490 output_w32(0x52800000|rd_rn_rm(rt,rs,0)|armval);
1491}
1492
1493static void emit_jno_unlikely(int a)
1494{
1495 //emit_jno(a);
1496 assem_debug("addvc pc,pc,#? (%x)\n",/*a-(int)out-8,*/a);
1497 output_w32(0x72800000|rd_rn_rm(15,15,0));
1498}
1499
1500static void save_regs_all(u_int reglist)
1501{
1502 int i;
1503 if(!reglist) return;
1504 assem_debug("stmia fp,{");
1505 for(i=0;i<16;i++)
1506 if(reglist&(1<<i))
1507 assem_debug("r%d,",i);
1508 assem_debug("}\n");
1509 output_w32(0xe88b0000|reglist);
1510}
1511
1512static void restore_regs_all(u_int reglist)
1513{
1514 int i;
1515 if(!reglist) return;
1516 assem_debug("ldmia fp,{");
1517 for(i=0;i<16;i++)
1518 if(reglist&(1<<i))
1519 assem_debug("r%d,",i);
1520 assem_debug("}\n");
1521 output_w32(0xe89b0000|reglist);
1522}
1523
1524// Save registers before function call
1525static void save_regs(u_int reglist)
1526{
1527 reglist&=CALLER_SAVE_REGS; // only save the caller-save registers, r0-r3, r12
1528 save_regs_all(reglist);
1529}
1530
1531// Restore registers after function call
1532static void restore_regs(u_int reglist)
1533{
1534 reglist&=CALLER_SAVE_REGS;
1535 restore_regs_all(reglist);
1536}
1537
1538/* Stubs/epilogue */
1539
1540static void literal_pool(int n)
1541{
1542 if(!literalcount) return;
1543 if(n) {
1544 if((int)out-literals[0][0]<4096-n) return;
1545 }
1546 u_int *ptr;
1547 int i;
1548 for(i=0;i<literalcount;i++)
1549 {
1550 u_int l_addr=(u_int)out;
1551 int j;
1552 for(j=0;j<i;j++) {
1553 if(literals[j][1]==literals[i][1]) {
1554 //printf("dup %08x\n",literals[i][1]);
1555 l_addr=literals[j][0];
1556 break;
1557 }
1558 }
1559 ptr=(u_int *)literals[i][0];
1560 u_int offset=l_addr-(u_int)ptr-8;
1561 assert(offset<4096);
1562 assert(!(offset&3));
1563 *ptr|=offset;
1564 if(l_addr==(u_int)out) {
1565 literals[i][0]=l_addr; // remember for dupes
1566 output_w32(literals[i][1]);
1567 }
1568 }
1569 literalcount=0;
1570}
1571
1572static void literal_pool_jumpover(int n)
1573{
1574 if(!literalcount) return;
1575 if(n) {
1576 if((int)out-literals[0][0]<4096-n) return;
1577 }
1578 void *jaddr = out;
1579 emit_jmp(0);
1580 literal_pool(0);
1581 set_jump_target(jaddr, out);
1582}
1583
1584// parsed by get_pointer, find_extjump_insn
1585static void emit_extjump2(u_char *addr, u_int target, void *linker)
1586{
1587 u_char *ptr=(u_char *)addr;
1588 assert((ptr[3]&0x0e)==0xa);
1589 (void)ptr;
1590
1591 emit_loadlp(target,0);
1592 emit_loadlp((u_int)addr,1);
1593 assert(addr>=ndrc->translation_cache&&addr<(ndrc->translation_cache+(1<<TARGET_SIZE_2)));
1594 //assert((target>=0x80000000&&target<0x80800000)||(target>0xA4000000&&target<0xA4001000));
1595//DEBUG >
1596#ifdef DEBUG_CYCLE_COUNT
1597 emit_readword(&last_count,ECX);
1598 emit_add(HOST_CCREG,ECX,HOST_CCREG);
1599 emit_readword(&next_interupt,ECX);
1600 emit_writeword(HOST_CCREG,&Count);
1601 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
1602 emit_writeword(ECX,&last_count);
1603#endif
1604//DEBUG <
1605 emit_far_jump(linker);
1606}
1607
1608static void check_extjump2(void *src)
1609{
1610 u_int *ptr = src;
1611 assert((ptr[1] & 0x0fff0000) == 0x059f0000); // ldr rx, [pc, #ofs]
1612 (void)ptr;
1613}
1614
1615// put rt_val into rt, potentially making use of rs with value rs_val
1616static void emit_movimm_from(u_int rs_val,int rs,u_int rt_val,int rt)
1617{
1618 u_int armval;
1619 int diff;
1620 if(genimm(rt_val,&armval)) {
1621 assem_debug("mov %s,#%d\n",regname[rt],rt_val);
1622 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
1623 return;
1624 }
1625 if(genimm(~rt_val,&armval)) {
1626 assem_debug("mvn %s,#%d\n",regname[rt],rt_val);
1627 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
1628 return;
1629 }
1630 diff=rt_val-rs_val;
1631 if(genimm(diff,&armval)) {
1632 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],diff);
1633 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
1634 return;
1635 }else if(genimm(-diff,&armval)) {
1636 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],-diff);
1637 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
1638 return;
1639 }
1640 emit_movimm(rt_val,rt);
1641}
1642
1643// return 1 if above function can do it's job cheaply
1644static int is_similar_value(u_int v1,u_int v2)
1645{
1646 u_int xs;
1647 int diff;
1648 if(v1==v2) return 1;
1649 diff=v2-v1;
1650 for(xs=diff;xs!=0&&(xs&3)==0;xs>>=2)
1651 ;
1652 if(xs<0x100) return 1;
1653 for(xs=-diff;xs!=0&&(xs&3)==0;xs>>=2)
1654 ;
1655 if(xs<0x100) return 1;
1656 return 0;
1657}
1658
1659static void mov_loadtype_adj(enum stub_type type,int rs,int rt)
1660{
1661 switch(type) {
1662 case LOADB_STUB: emit_signextend8(rs,rt); break;
1663 case LOADBU_STUB: emit_andimm(rs,0xff,rt); break;
1664 case LOADH_STUB: emit_signextend16(rs,rt); break;
1665 case LOADHU_STUB: emit_andimm(rs,0xffff,rt); break;
1666 case LOADW_STUB: if(rs!=rt) emit_mov(rs,rt); break;
1667 default: assert(0);
1668 }
1669}
1670
1671#include "pcsxmem.h"
1672#include "pcsxmem_inline.c"
1673
1674static void do_readstub(int n)
1675{
1676 assem_debug("do_readstub %x\n",start+stubs[n].a*4);
1677 literal_pool(256);
1678 set_jump_target(stubs[n].addr, out);
1679 enum stub_type type=stubs[n].type;
1680 int i=stubs[n].a;
1681 int rs=stubs[n].b;
1682 const struct regstat *i_regs=(struct regstat *)stubs[n].c;
1683 u_int reglist=stubs[n].e;
1684 const signed char *i_regmap=i_regs->regmap;
1685 int rt;
1686 if(itype[i]==C1LS||itype[i]==C2LS||itype[i]==LOADLR) {
1687 rt=get_reg(i_regmap,FTEMP);
1688 }else{
1689 rt=get_reg(i_regmap,rt1[i]);
1690 }
1691 assert(rs>=0);
1692 int r,temp=-1,temp2=HOST_TEMPREG,regs_saved=0;
1693 void *restore_jump = NULL;
1694 reglist|=(1<<rs);
1695 for(r=0;r<=12;r++) {
1696 if(((1<<r)&0x13ff)&&((1<<r)&reglist)==0) {
1697 temp=r; break;
1698 }
1699 }
1700 if(rt>=0&&rt1[i]!=0)
1701 reglist&=~(1<<rt);
1702 if(temp==-1) {
1703 save_regs(reglist);
1704 regs_saved=1;
1705 temp=(rs==0)?2:0;
1706 }
1707 if((regs_saved||(reglist&2)==0)&&temp!=1&&rs!=1)
1708 temp2=1;
1709 emit_readword(&mem_rtab,temp);
1710 emit_shrimm(rs,12,temp2);
1711 emit_readword_dualindexedx4(temp,temp2,temp2);
1712 emit_lsls_imm(temp2,1,temp2);
1713 if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) {
1714 switch(type) {
1715 case LOADB_STUB: emit_ldrccsb_dualindexed(temp2,rs,rt); break;
1716 case LOADBU_STUB: emit_ldrccb_dualindexed(temp2,rs,rt); break;
1717 case LOADH_STUB: emit_ldrccsh_dualindexed(temp2,rs,rt); break;
1718 case LOADHU_STUB: emit_ldrcch_dualindexed(temp2,rs,rt); break;
1719 case LOADW_STUB: emit_ldrcc_dualindexed(temp2,rs,rt); break;
1720 default: assert(0);
1721 }
1722 }
1723 if(regs_saved) {
1724 restore_jump=out;
1725 emit_jcc(0); // jump to reg restore
1726 }
1727 else
1728 emit_jcc(stubs[n].retaddr); // return address
1729
1730 if(!regs_saved)
1731 save_regs(reglist);
1732 void *handler=NULL;
1733 if(type==LOADB_STUB||type==LOADBU_STUB)
1734 handler=jump_handler_read8;
1735 if(type==LOADH_STUB||type==LOADHU_STUB)
1736 handler=jump_handler_read16;
1737 if(type==LOADW_STUB)
1738 handler=jump_handler_read32;
1739 assert(handler);
1740 pass_args(rs,temp2);
1741 int cc=get_reg(i_regmap,CCREG);
1742 if(cc<0)
1743 emit_loadreg(CCREG,2);
1744 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n].d),2);
1745 emit_far_call(handler);
1746 if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) {
1747 mov_loadtype_adj(type,0,rt);
1748 }
1749 if(restore_jump)
1750 set_jump_target(restore_jump, out);
1751 restore_regs(reglist);
1752 emit_jmp(stubs[n].retaddr); // return address
1753}
1754
1755static void inline_readstub(enum stub_type type, int i, u_int addr,
1756 const signed char regmap[], int target, int adj, u_int reglist)
1757{
1758 int rs=get_reg(regmap,target);
1759 int rt=get_reg(regmap,target);
1760 if(rs<0) rs=get_reg(regmap,-1);
1761 assert(rs>=0);
1762 u_int is_dynamic;
1763 uintptr_t host_addr = 0;
1764 void *handler;
1765 int cc=get_reg(regmap,CCREG);
1766 if(pcsx_direct_read(type,addr,CLOCK_ADJUST(adj),cc,target?rs:-1,rt))
1767 return;
1768 handler = get_direct_memhandler(mem_rtab, addr, type, &host_addr);
1769 if (handler == NULL) {
1770 if(rt<0||rt1[i]==0)
1771 return;
1772 if(addr!=host_addr)
1773 emit_movimm_from(addr,rs,host_addr,rs);
1774 switch(type) {
1775 case LOADB_STUB: emit_movsbl_indexed(0,rs,rt); break;
1776 case LOADBU_STUB: emit_movzbl_indexed(0,rs,rt); break;
1777 case LOADH_STUB: emit_movswl_indexed(0,rs,rt); break;
1778 case LOADHU_STUB: emit_movzwl_indexed(0,rs,rt); break;
1779 case LOADW_STUB: emit_readword_indexed(0,rs,rt); break;
1780 default: assert(0);
1781 }
1782 return;
1783 }
1784 is_dynamic=pcsxmem_is_handler_dynamic(addr);
1785 if(is_dynamic) {
1786 if(type==LOADB_STUB||type==LOADBU_STUB)
1787 handler=jump_handler_read8;
1788 if(type==LOADH_STUB||type==LOADHU_STUB)
1789 handler=jump_handler_read16;
1790 if(type==LOADW_STUB)
1791 handler=jump_handler_read32;
1792 }
1793
1794 // call a memhandler
1795 if(rt>=0&&rt1[i]!=0)
1796 reglist&=~(1<<rt);
1797 save_regs(reglist);
1798 if(target==0)
1799 emit_movimm(addr,0);
1800 else if(rs!=0)
1801 emit_mov(rs,0);
1802 if(cc<0)
1803 emit_loadreg(CCREG,2);
1804 if(is_dynamic) {
1805 emit_movimm(((u_int *)mem_rtab)[addr>>12]<<1,1);
1806 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj),2);
1807 }
1808 else {
1809 emit_readword(&last_count,3);
1810 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj),2);
1811 emit_add(2,3,2);
1812 emit_writeword(2,&Count);
1813 }
1814
1815 emit_far_call(handler);
1816
1817 if(rt>=0&&rt1[i]!=0) {
1818 switch(type) {
1819 case LOADB_STUB: emit_signextend8(0,rt); break;
1820 case LOADBU_STUB: emit_andimm(0,0xff,rt); break;
1821 case LOADH_STUB: emit_signextend16(0,rt); break;
1822 case LOADHU_STUB: emit_andimm(0,0xffff,rt); break;
1823 case LOADW_STUB: if(rt!=0) emit_mov(0,rt); break;
1824 default: assert(0);
1825 }
1826 }
1827 restore_regs(reglist);
1828}
1829
1830static void do_writestub(int n)
1831{
1832 assem_debug("do_writestub %x\n",start+stubs[n].a*4);
1833 literal_pool(256);
1834 set_jump_target(stubs[n].addr, out);
1835 enum stub_type type=stubs[n].type;
1836 int i=stubs[n].a;
1837 int rs=stubs[n].b;
1838 const struct regstat *i_regs=(struct regstat *)stubs[n].c;
1839 u_int reglist=stubs[n].e;
1840 const signed char *i_regmap=i_regs->regmap;
1841 int rt,r;
1842 if(itype[i]==C1LS||itype[i]==C2LS) {
1843 rt=get_reg(i_regmap,r=FTEMP);
1844 }else{
1845 rt=get_reg(i_regmap,r=rs2[i]);
1846 }
1847 assert(rs>=0);
1848 assert(rt>=0);
1849 int rtmp,temp=-1,temp2=HOST_TEMPREG,regs_saved=0;
1850 void *restore_jump = NULL;
1851 int reglist2=reglist|(1<<rs)|(1<<rt);
1852 for(rtmp=0;rtmp<=12;rtmp++) {
1853 if(((1<<rtmp)&0x13ff)&&((1<<rtmp)&reglist2)==0) {
1854 temp=rtmp; break;
1855 }
1856 }
1857 if(temp==-1) {
1858 save_regs(reglist);
1859 regs_saved=1;
1860 for(rtmp=0;rtmp<=3;rtmp++)
1861 if(rtmp!=rs&&rtmp!=rt)
1862 {temp=rtmp;break;}
1863 }
1864 if((regs_saved||(reglist2&8)==0)&&temp!=3&&rs!=3&&rt!=3)
1865 temp2=3;
1866 emit_readword(&mem_wtab,temp);
1867 emit_shrimm(rs,12,temp2);
1868 emit_readword_dualindexedx4(temp,temp2,temp2);
1869 emit_lsls_imm(temp2,1,temp2);
1870 switch(type) {
1871 case STOREB_STUB: emit_strccb_dualindexed(temp2,rs,rt); break;
1872 case STOREH_STUB: emit_strcch_dualindexed(temp2,rs,rt); break;
1873 case STOREW_STUB: emit_strcc_dualindexed(temp2,rs,rt); break;
1874 default: assert(0);
1875 }
1876 if(regs_saved) {
1877 restore_jump=out;
1878 emit_jcc(0); // jump to reg restore
1879 }
1880 else
1881 emit_jcc(stubs[n].retaddr); // return address (invcode check)
1882
1883 if(!regs_saved)
1884 save_regs(reglist);
1885 void *handler=NULL;
1886 switch(type) {
1887 case STOREB_STUB: handler=jump_handler_write8; break;
1888 case STOREH_STUB: handler=jump_handler_write16; break;
1889 case STOREW_STUB: handler=jump_handler_write32; break;
1890 default: assert(0);
1891 }
1892 assert(handler);
1893 pass_args(rs,rt);
1894 if(temp2!=3)
1895 emit_mov(temp2,3);
1896 int cc=get_reg(i_regmap,CCREG);
1897 if(cc<0)
1898 emit_loadreg(CCREG,2);
1899 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n].d),2);
1900 // returns new cycle_count
1901 emit_far_call(handler);
1902 emit_addimm(0,-CLOCK_ADJUST((int)stubs[n].d),cc<0?2:cc);
1903 if(cc<0)
1904 emit_storereg(CCREG,2);
1905 if(restore_jump)
1906 set_jump_target(restore_jump, out);
1907 restore_regs(reglist);
1908 emit_jmp(stubs[n].retaddr);
1909}
1910
1911static void inline_writestub(enum stub_type type, int i, u_int addr,
1912 const signed char regmap[], int target, int adj, u_int reglist)
1913{
1914 int rs=get_reg(regmap,-1);
1915 int rt=get_reg(regmap,target);
1916 assert(rs>=0);
1917 assert(rt>=0);
1918 uintptr_t host_addr = 0;
1919 void *handler = get_direct_memhandler(mem_wtab, addr, type, &host_addr);
1920 if (handler == NULL) {
1921 if(addr!=host_addr)
1922 emit_movimm_from(addr,rs,host_addr,rs);
1923 switch(type) {
1924 case STOREB_STUB: emit_writebyte_indexed(rt,0,rs); break;
1925 case STOREH_STUB: emit_writehword_indexed(rt,0,rs); break;
1926 case STOREW_STUB: emit_writeword_indexed(rt,0,rs); break;
1927 default: assert(0);
1928 }
1929 return;
1930 }
1931
1932 // call a memhandler
1933 save_regs(reglist);
1934 pass_args(rs,rt);
1935 int cc=get_reg(regmap,CCREG);
1936 if(cc<0)
1937 emit_loadreg(CCREG,2);
1938 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj),2);
1939 emit_movimm((u_int)handler,3);
1940 // returns new cycle_count
1941 emit_far_call(jump_handler_write_h);
1942 emit_addimm(0,-CLOCK_ADJUST(adj),cc<0?2:cc);
1943 if(cc<0)
1944 emit_storereg(CCREG,2);
1945 restore_regs(reglist);
1946}
1947
1948// this output is parsed by verify_dirty, get_bounds, isclean, get_clean_addr
1949static void do_dirty_stub_emit_args(u_int arg0)
1950{
1951 #ifndef HAVE_ARMV7
1952 emit_loadlp((int)source, 1);
1953 emit_loadlp((int)copy, 2);
1954 emit_loadlp(slen*4, 3);
1955 #else
1956 emit_movw(((u_int)source)&0x0000FFFF, 1);
1957 emit_movw(((u_int)copy)&0x0000FFFF, 2);
1958 emit_movt(((u_int)source)&0xFFFF0000, 1);
1959 emit_movt(((u_int)copy)&0xFFFF0000, 2);
1960 emit_movw(slen*4, 3);
1961 #endif
1962 emit_movimm(arg0, 0);
1963}
1964
1965static void *do_dirty_stub(int i)
1966{
1967 assem_debug("do_dirty_stub %x\n",start+i*4);
1968 do_dirty_stub_emit_args(start + i*4);
1969 emit_far_call(verify_code);
1970 void *entry = out;
1971 load_regs_entry(i);
1972 if (entry == out)
1973 entry = instr_addr[i];
1974 emit_jmp(instr_addr[i]);
1975 return entry;
1976}
1977
1978static void do_dirty_stub_ds()
1979{
1980 do_dirty_stub_emit_args(start + 1);
1981 emit_far_call(verify_code_ds);
1982}
1983
1984/* Special assem */
1985
1986static void c2op_prologue(u_int op, int i, const struct regstat *i_regs, u_int reglist)
1987{
1988 save_regs_all(reglist);
1989 cop2_call_stall_check(op, i, i_regs, 0);
1990#ifdef PCNT
1991 emit_movimm(op, 0);
1992 emit_far_call(pcnt_gte_start);
1993#endif
1994 emit_addimm(FP, (u_char *)&psxRegs.CP2D.r[0] - (u_char *)&dynarec_local, 0); // cop2 regs
1995}
1996
1997static void c2op_epilogue(u_int op,u_int reglist)
1998{
1999#ifdef PCNT
2000 emit_movimm(op,0);
2001 emit_far_call(pcnt_gte_end);
2002#endif
2003 restore_regs_all(reglist);
2004}
2005
2006static void c2op_call_MACtoIR(int lm,int need_flags)
2007{
2008 if(need_flags)
2009 emit_far_call(lm?gteMACtoIR_lm1:gteMACtoIR_lm0);
2010 else
2011 emit_far_call(lm?gteMACtoIR_lm1_nf:gteMACtoIR_lm0_nf);
2012}
2013
2014static void c2op_call_rgb_func(void *func,int lm,int need_ir,int need_flags)
2015{
2016 emit_far_call(func);
2017 // func is C code and trashes r0
2018 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
2019 if(need_flags||need_ir)
2020 c2op_call_MACtoIR(lm,need_flags);
2021 emit_far_call(need_flags?gteMACtoRGB:gteMACtoRGB_nf);
2022}
2023
2024static void c2op_assemble(int i, const struct regstat *i_regs)
2025{
2026 u_int c2op = source[i] & 0x3f;
2027 u_int reglist_full = get_host_reglist(i_regs->regmap);
2028 u_int reglist = reglist_full & CALLER_SAVE_REGS;
2029 int need_flags, need_ir;
2030
2031 if (gte_handlers[c2op]!=NULL) {
2032 need_flags=!(gte_unneeded[i+1]>>63); // +1 because of how liveness detection works
2033 need_ir=(gte_unneeded[i+1]&0xe00)!=0xe00;
2034 assem_debug("gte op %08x, unneeded %016llx, need_flags %d, need_ir %d\n",
2035 source[i],gte_unneeded[i+1],need_flags,need_ir);
2036 if(HACK_ENABLED(NDHACK_GTE_NO_FLAGS))
2037 need_flags=0;
2038 int shift = (source[i] >> 19) & 1;
2039 int lm = (source[i] >> 10) & 1;
2040 switch(c2op) {
2041#ifndef DRC_DBG
2042 case GTE_MVMVA: {
2043#ifdef HAVE_ARMV5
2044 int v = (source[i] >> 15) & 3;
2045 int cv = (source[i] >> 13) & 3;
2046 int mx = (source[i] >> 17) & 3;
2047 reglist=reglist_full&(CALLER_SAVE_REGS|0xf0); // +{r4-r7}
2048 c2op_prologue(c2op,i,i_regs,reglist);
2049 /* r4,r5 = VXYZ(v) packed; r6 = &MX11(mx); r7 = &CV1(cv) */
2050 if(v<3)
2051 emit_ldrd(v*8,0,4);
2052 else {
2053 emit_movzwl_indexed(9*4,0,4); // gteIR
2054 emit_movzwl_indexed(10*4,0,6);
2055 emit_movzwl_indexed(11*4,0,5);
2056 emit_orrshl_imm(6,16,4);
2057 }
2058 if(mx<3)
2059 emit_addimm(0,32*4+mx*8*4,6);
2060 else
2061 emit_readword(&zeromem_ptr,6);
2062 if(cv<3)
2063 emit_addimm(0,32*4+(cv*8+5)*4,7);
2064 else
2065 emit_readword(&zeromem_ptr,7);
2066#ifdef __ARM_NEON__
2067 emit_movimm(source[i],1); // opcode
2068 emit_far_call(gteMVMVA_part_neon);
2069 if(need_flags) {
2070 emit_movimm(lm,1);
2071 emit_far_call(gteMACtoIR_flags_neon);
2072 }
2073#else
2074 if(cv==3&&shift)
2075 emit_far_call((int)gteMVMVA_part_cv3sh12_arm);
2076 else {
2077 emit_movimm(shift,1);
2078 emit_far_call((int)(need_flags?gteMVMVA_part_arm:gteMVMVA_part_nf_arm));
2079 }
2080 if(need_flags||need_ir)
2081 c2op_call_MACtoIR(lm,need_flags);
2082#endif
2083#else /* if not HAVE_ARMV5 */
2084 c2op_prologue(c2op,i,i_regs,reglist);
2085 emit_movimm(source[i],1); // opcode
2086 emit_writeword(1,&psxRegs.code);
2087 emit_far_call(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]);
2088#endif
2089 break;
2090 }
2091 case GTE_OP:
2092 c2op_prologue(c2op,i,i_regs,reglist);
2093 emit_far_call(shift?gteOP_part_shift:gteOP_part_noshift);
2094 if(need_flags||need_ir) {
2095 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
2096 c2op_call_MACtoIR(lm,need_flags);
2097 }
2098 break;
2099 case GTE_DPCS:
2100 c2op_prologue(c2op,i,i_regs,reglist);
2101 c2op_call_rgb_func(shift?gteDPCS_part_shift:gteDPCS_part_noshift,lm,need_ir,need_flags);
2102 break;
2103 case GTE_INTPL:
2104 c2op_prologue(c2op,i,i_regs,reglist);
2105 c2op_call_rgb_func(shift?gteINTPL_part_shift:gteINTPL_part_noshift,lm,need_ir,need_flags);
2106 break;
2107 case GTE_SQR:
2108 c2op_prologue(c2op,i,i_regs,reglist);
2109 emit_far_call(shift?gteSQR_part_shift:gteSQR_part_noshift);
2110 if(need_flags||need_ir) {
2111 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
2112 c2op_call_MACtoIR(lm,need_flags);
2113 }
2114 break;
2115 case GTE_DCPL:
2116 c2op_prologue(c2op,i,i_regs,reglist);
2117 c2op_call_rgb_func(gteDCPL_part,lm,need_ir,need_flags);
2118 break;
2119 case GTE_GPF:
2120 c2op_prologue(c2op,i,i_regs,reglist);
2121 c2op_call_rgb_func(shift?gteGPF_part_shift:gteGPF_part_noshift,lm,need_ir,need_flags);
2122 break;
2123 case GTE_GPL:
2124 c2op_prologue(c2op,i,i_regs,reglist);
2125 c2op_call_rgb_func(shift?gteGPL_part_shift:gteGPL_part_noshift,lm,need_ir,need_flags);
2126 break;
2127#endif
2128 default:
2129 c2op_prologue(c2op,i,i_regs,reglist);
2130#ifdef DRC_DBG
2131 emit_movimm(source[i],1); // opcode
2132 emit_writeword(1,&psxRegs.code);
2133#endif
2134 emit_far_call(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]);
2135 break;
2136 }
2137 c2op_epilogue(c2op,reglist);
2138 }
2139}
2140
2141static void c2op_ctc2_31_assemble(signed char sl, signed char temp)
2142{
2143 //value = value & 0x7ffff000;
2144 //if (value & 0x7f87e000) value |= 0x80000000;
2145 emit_shrimm(sl,12,temp);
2146 emit_shlimm(temp,12,temp);
2147 emit_testimm(temp,0x7f000000);
2148 emit_testeqimm(temp,0x00870000);
2149 emit_testeqimm(temp,0x0000e000);
2150 emit_orrne_imm(temp,0x80000000,temp);
2151}
2152
2153static void do_mfc2_31_one(u_int copr,signed char temp)
2154{
2155 emit_readword(&reg_cop2d[copr],temp);
2156 emit_testimm(temp,0x8000); // do we need this?
2157 emit_andne_imm(temp,0,temp);
2158 emit_cmpimm(temp,0xf80);
2159 emit_andimm(temp,0xf80,temp);
2160 emit_cmovae_imm(0xf80,temp);
2161}
2162
2163static void c2op_mfc2_29_assemble(signed char tl, signed char temp)
2164{
2165 if (temp < 0) {
2166 host_tempreg_acquire();
2167 temp = HOST_TEMPREG;
2168 }
2169 do_mfc2_31_one(9,temp);
2170 emit_shrimm(temp,7,tl);
2171 do_mfc2_31_one(10,temp);
2172 emit_orrshr_imm(temp,2,tl);
2173 do_mfc2_31_one(11,temp);
2174 emit_orrshl_imm(temp,3,tl);
2175 emit_writeword(tl,&reg_cop2d[29]);
2176 if (temp == HOST_TEMPREG)
2177 host_tempreg_release();
2178}
2179
2180static void multdiv_assemble_arm(int i,struct regstat *i_regs)
2181{
2182 // case 0x18: MULT
2183 // case 0x19: MULTU
2184 // case 0x1A: DIV
2185 // case 0x1B: DIVU
2186 // case 0x1C: DMULT
2187 // case 0x1D: DMULTU
2188 // case 0x1E: DDIV
2189 // case 0x1F: DDIVU
2190 if(rs1[i]&&rs2[i])
2191 {
2192 if((opcode2[i]&4)==0) // 32-bit
2193 {
2194 if(opcode2[i]==0x18) // MULT
2195 {
2196 signed char m1=get_reg(i_regs->regmap,rs1[i]);
2197 signed char m2=get_reg(i_regs->regmap,rs2[i]);
2198 signed char hi=get_reg(i_regs->regmap,HIREG);
2199 signed char lo=get_reg(i_regs->regmap,LOREG);
2200 assert(m1>=0);
2201 assert(m2>=0);
2202 assert(hi>=0);
2203 assert(lo>=0);
2204 emit_smull(m1,m2,hi,lo);
2205 }
2206 if(opcode2[i]==0x19) // MULTU
2207 {
2208 signed char m1=get_reg(i_regs->regmap,rs1[i]);
2209 signed char m2=get_reg(i_regs->regmap,rs2[i]);
2210 signed char hi=get_reg(i_regs->regmap,HIREG);
2211 signed char lo=get_reg(i_regs->regmap,LOREG);
2212 assert(m1>=0);
2213 assert(m2>=0);
2214 assert(hi>=0);
2215 assert(lo>=0);
2216 emit_umull(m1,m2,hi,lo);
2217 }
2218 if(opcode2[i]==0x1A) // DIV
2219 {
2220 signed char d1=get_reg(i_regs->regmap,rs1[i]);
2221 signed char d2=get_reg(i_regs->regmap,rs2[i]);
2222 assert(d1>=0);
2223 assert(d2>=0);
2224 signed char quotient=get_reg(i_regs->regmap,LOREG);
2225 signed char remainder=get_reg(i_regs->regmap,HIREG);
2226 assert(quotient>=0);
2227 assert(remainder>=0);
2228 emit_movs(d1,remainder);
2229 emit_movimm(0xffffffff,quotient);
2230 emit_negmi(quotient,quotient); // .. quotient and ..
2231 emit_negmi(remainder,remainder); // .. remainder for div0 case (will be negated back after jump)
2232 emit_movs(d2,HOST_TEMPREG);
2233 emit_jeq(out+52); // Division by zero
2234 emit_negsmi(HOST_TEMPREG,HOST_TEMPREG);
2235#ifdef HAVE_ARMV5
2236 emit_clz(HOST_TEMPREG,quotient);
2237 emit_shl(HOST_TEMPREG,quotient,HOST_TEMPREG);
2238#else
2239 emit_movimm(0,quotient);
2240 emit_addpl_imm(quotient,1,quotient);
2241 emit_lslpls_imm(HOST_TEMPREG,1,HOST_TEMPREG);
2242 emit_jns(out-2*4);
2243#endif
2244 emit_orimm(quotient,1<<31,quotient);
2245 emit_shr(quotient,quotient,quotient);
2246 emit_cmp(remainder,HOST_TEMPREG);
2247 emit_subcs(remainder,HOST_TEMPREG,remainder);
2248 emit_adcs(quotient,quotient,quotient);
2249 emit_shrimm(HOST_TEMPREG,1,HOST_TEMPREG);
2250 emit_jcc(out-16); // -4
2251 emit_teq(d1,d2);
2252 emit_negmi(quotient,quotient);
2253 emit_test(d1,d1);
2254 emit_negmi(remainder,remainder);
2255 }
2256 if(opcode2[i]==0x1B) // DIVU
2257 {
2258 signed char d1=get_reg(i_regs->regmap,rs1[i]); // dividend
2259 signed char d2=get_reg(i_regs->regmap,rs2[i]); // divisor
2260 assert(d1>=0);
2261 assert(d2>=0);
2262 signed char quotient=get_reg(i_regs->regmap,LOREG);
2263 signed char remainder=get_reg(i_regs->regmap,HIREG);
2264 assert(quotient>=0);
2265 assert(remainder>=0);
2266 emit_mov(d1,remainder);
2267 emit_movimm(0xffffffff,quotient); // div0 case
2268 emit_test(d2,d2);
2269 emit_jeq(out+40); // Division by zero
2270#ifdef HAVE_ARMV5
2271 emit_clz(d2,HOST_TEMPREG);
2272 emit_movimm(1<<31,quotient);
2273 emit_shl(d2,HOST_TEMPREG,d2);
2274#else
2275 emit_movimm(0,HOST_TEMPREG);
2276 emit_addpl_imm(HOST_TEMPREG,1,HOST_TEMPREG);
2277 emit_lslpls_imm(d2,1,d2);
2278 emit_jns(out-2*4);
2279 emit_movimm(1<<31,quotient);
2280#endif
2281 emit_shr(quotient,HOST_TEMPREG,quotient);
2282 emit_cmp(remainder,d2);
2283 emit_subcs(remainder,d2,remainder);
2284 emit_adcs(quotient,quotient,quotient);
2285 emit_shrcc_imm(d2,1,d2);
2286 emit_jcc(out-16); // -4
2287 }
2288 }
2289 else // 64-bit
2290 assert(0);
2291 }
2292 else
2293 {
2294 // Multiply by zero is zero.
2295 // MIPS does not have a divide by zero exception.
2296 // The result is undefined, we return zero.
2297 signed char hr=get_reg(i_regs->regmap,HIREG);
2298 signed char lr=get_reg(i_regs->regmap,LOREG);
2299 if(hr>=0) emit_zeroreg(hr);
2300 if(lr>=0) emit_zeroreg(lr);
2301 }
2302}
2303#define multdiv_assemble multdiv_assemble_arm
2304
2305static void do_jump_vaddr(int rs)
2306{
2307 emit_far_jump(jump_vaddr_reg[rs]);
2308}
2309
2310static void do_preload_rhash(int r) {
2311 // Don't need this for ARM. On x86, this puts the value 0xf8 into the
2312 // register. On ARM the hash can be done with a single instruction (below)
2313}
2314
2315static void do_preload_rhtbl(int ht) {
2316 emit_addimm(FP,(int)&mini_ht-(int)&dynarec_local,ht);
2317}
2318
2319static void do_rhash(int rs,int rh) {
2320 emit_andimm(rs,0xf8,rh);
2321}
2322
2323static void do_miniht_load(int ht,int rh) {
2324 assem_debug("ldr %s,[%s,%s]!\n",regname[rh],regname[ht],regname[rh]);
2325 output_w32(0xe7b00000|rd_rn_rm(rh,ht,rh));
2326}
2327
2328static void do_miniht_jump(int rs,int rh,int ht) {
2329 emit_cmp(rh,rs);
2330 emit_ldreq_indexed(ht,4,15);
2331 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
2332 if(rs!=7)
2333 emit_mov(rs,7);
2334 rs=7;
2335 #endif
2336 do_jump_vaddr(rs);
2337}
2338
2339static void do_miniht_insert(u_int return_address,int rt,int temp) {
2340 #ifndef HAVE_ARMV7
2341 emit_movimm(return_address,rt); // PC into link register
2342 add_to_linker(out,return_address,1);
2343 emit_pcreladdr(temp);
2344 emit_writeword(rt,&mini_ht[(return_address&0xFF)>>3][0]);
2345 emit_writeword(temp,&mini_ht[(return_address&0xFF)>>3][1]);
2346 #else
2347 emit_movw(return_address&0x0000FFFF,rt);
2348 add_to_linker(out,return_address,1);
2349 emit_pcreladdr(temp);
2350 emit_writeword(temp,&mini_ht[(return_address&0xFF)>>3][1]);
2351 emit_movt(return_address&0xFFFF0000,rt);
2352 emit_writeword(rt,&mini_ht[(return_address&0xFF)>>3][0]);
2353 #endif
2354}
2355
2356// CPU-architecture-specific initialization
2357static void arch_init(void)
2358{
2359 uintptr_t diff = (u_char *)&ndrc->tramp.f - (u_char *)&ndrc->tramp.ops - 8;
2360 struct tramp_insns *ops = ndrc->tramp.ops;
2361 size_t i;
2362 assert(!(diff & 3));
2363 assert(diff < 0x1000);
2364 start_tcache_write(ops, (u_char *)ops + sizeof(ndrc->tramp.ops));
2365 for (i = 0; i < ARRAY_SIZE(ndrc->tramp.ops); i++)
2366 ops[i].ldrpc = 0xe5900000 | rd_rn_rm(15,15,0) | diff; // ldr pc, [=val]
2367 end_tcache_write(ops, (u_char *)ops + sizeof(ndrc->tramp.ops));
2368}
2369
2370// vim:shiftwidth=2:expandtab