drc: use a separate var for game hacks
[pcsx_rearmed.git] / libpcsxcore / new_dynarec / assem_arm.c
... / ...
CommitLineData
1/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2 * Mupen64plus/PCSX - assem_arm.c *
3 * Copyright (C) 2009-2011 Ari64 *
4 * Copyright (C) 2010-2021 GraÅžvydas "notaz" Ignotas *
5 * *
6 * This program is free software; you can redistribute it and/or modify *
7 * it under the terms of the GNU General Public License as published by *
8 * the Free Software Foundation; either version 2 of the License, or *
9 * (at your option) any later version. *
10 * *
11 * This program is distributed in the hope that it will be useful, *
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
14 * GNU General Public License for more details. *
15 * *
16 * You should have received a copy of the GNU General Public License *
17 * along with this program; if not, write to the *
18 * Free Software Foundation, Inc., *
19 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
20 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
21
22#include "../gte.h"
23#define FLAGLESS
24#include "../gte.h"
25#undef FLAGLESS
26#include "../gte_arm.h"
27#include "../gte_neon.h"
28#include "pcnt.h"
29#include "arm_features.h"
30
31#ifndef __MACH__
32#define CALLER_SAVE_REGS 0x100f
33#else
34#define CALLER_SAVE_REGS 0x120f
35#endif
36
37#define unused __attribute__((unused))
38
39#ifdef DRC_DBG
40#pragma GCC diagnostic ignored "-Wunused-function"
41#pragma GCC diagnostic ignored "-Wunused-variable"
42#pragma GCC diagnostic ignored "-Wunused-but-set-variable"
43#endif
44
45void indirect_jump_indexed();
46void indirect_jump();
47void do_interrupt();
48void jump_vaddr_r0();
49void jump_vaddr_r1();
50void jump_vaddr_r2();
51void jump_vaddr_r3();
52void jump_vaddr_r4();
53void jump_vaddr_r5();
54void jump_vaddr_r6();
55void jump_vaddr_r7();
56void jump_vaddr_r8();
57void jump_vaddr_r9();
58void jump_vaddr_r10();
59void jump_vaddr_r12();
60
61void * const jump_vaddr_reg[16] = {
62 jump_vaddr_r0,
63 jump_vaddr_r1,
64 jump_vaddr_r2,
65 jump_vaddr_r3,
66 jump_vaddr_r4,
67 jump_vaddr_r5,
68 jump_vaddr_r6,
69 jump_vaddr_r7,
70 jump_vaddr_r8,
71 jump_vaddr_r9,
72 jump_vaddr_r10,
73 0,
74 jump_vaddr_r12,
75 0,
76 0,
77 0
78};
79
80void invalidate_addr_r0();
81void invalidate_addr_r1();
82void invalidate_addr_r2();
83void invalidate_addr_r3();
84void invalidate_addr_r4();
85void invalidate_addr_r5();
86void invalidate_addr_r6();
87void invalidate_addr_r7();
88void invalidate_addr_r8();
89void invalidate_addr_r9();
90void invalidate_addr_r10();
91void invalidate_addr_r12();
92
93const u_int invalidate_addr_reg[16] = {
94 (int)invalidate_addr_r0,
95 (int)invalidate_addr_r1,
96 (int)invalidate_addr_r2,
97 (int)invalidate_addr_r3,
98 (int)invalidate_addr_r4,
99 (int)invalidate_addr_r5,
100 (int)invalidate_addr_r6,
101 (int)invalidate_addr_r7,
102 (int)invalidate_addr_r8,
103 (int)invalidate_addr_r9,
104 (int)invalidate_addr_r10,
105 0,
106 (int)invalidate_addr_r12,
107 0,
108 0,
109 0};
110
111static u_int needs_clear_cache[1<<(TARGET_SIZE_2-17)];
112
113/* Linker */
114
115static void set_jump_target(void *addr, void *target_)
116{
117 u_int target = (u_int)target_;
118 u_char *ptr = addr;
119 u_int *ptr2=(u_int *)ptr;
120 if(ptr[3]==0xe2) {
121 assert((target-(u_int)ptr2-8)<1024);
122 assert(((uintptr_t)addr&3)==0);
123 assert((target&3)==0);
124 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
125 //printf("target=%x addr=%p insn=%x\n",target,addr,*ptr2);
126 }
127 else if(ptr[3]==0x72) {
128 // generated by emit_jno_unlikely
129 if((target-(u_int)ptr2-8)<1024) {
130 assert(((uintptr_t)addr&3)==0);
131 assert((target&3)==0);
132 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
133 }
134 else if((target-(u_int)ptr2-8)<4096&&!((target-(u_int)ptr2-8)&15)) {
135 assert(((uintptr_t)addr&3)==0);
136 assert((target&3)==0);
137 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>4)|0xE00;
138 }
139 else *ptr2=(0x7A000000)|(((target-(u_int)ptr2-8)<<6)>>8);
140 }
141 else {
142 assert((ptr[3]&0x0e)==0xa);
143 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
144 }
145}
146
147// This optionally copies the instruction from the target of the branch into
148// the space before the branch. Works, but the difference in speed is
149// usually insignificant.
150#if 0
151static void set_jump_target_fillslot(int addr,u_int target,int copy)
152{
153 u_char *ptr=(u_char *)addr;
154 u_int *ptr2=(u_int *)ptr;
155 assert(!copy||ptr2[-1]==0xe28dd000);
156 if(ptr[3]==0xe2) {
157 assert(!copy);
158 assert((target-(u_int)ptr2-8)<4096);
159 *ptr2=(*ptr2&0xFFFFF000)|(target-(u_int)ptr2-8);
160 }
161 else {
162 assert((ptr[3]&0x0e)==0xa);
163 u_int target_insn=*(u_int *)target;
164 if((target_insn&0x0e100000)==0) { // ALU, no immediate, no flags
165 copy=0;
166 }
167 if((target_insn&0x0c100000)==0x04100000) { // Load
168 copy=0;
169 }
170 if(target_insn&0x08000000) {
171 copy=0;
172 }
173 if(copy) {
174 ptr2[-1]=target_insn;
175 target+=4;
176 }
177 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
178 }
179}
180#endif
181
182/* Literal pool */
183static void add_literal(int addr,int val)
184{
185 assert(literalcount<sizeof(literals)/sizeof(literals[0]));
186 literals[literalcount][0]=addr;
187 literals[literalcount][1]=val;
188 literalcount++;
189}
190
191// from a pointer to external jump stub (which was produced by emit_extjump2)
192// find where the jumping insn is
193static void *find_extjump_insn(void *stub)
194{
195 int *ptr=(int *)(stub+4);
196 assert((*ptr&0x0fff0000)==0x059f0000); // ldr rx, [pc, #ofs]
197 u_int offset=*ptr&0xfff;
198 void **l_ptr=(void *)ptr+offset+8;
199 return *l_ptr;
200}
201
202// find where external branch is liked to using addr of it's stub:
203// get address that insn one after stub loads (dyna_linker arg1),
204// treat it as a pointer to branch insn,
205// return addr where that branch jumps to
206static void *get_pointer(void *stub)
207{
208 //printf("get_pointer(%x)\n",(int)stub);
209 int *i_ptr=find_extjump_insn(stub);
210 assert((*i_ptr&0x0f000000)==0x0a000000);
211 return (u_char *)i_ptr+((*i_ptr<<8)>>6)+8;
212}
213
214// Find the "clean" entry point from a "dirty" entry point
215// by skipping past the call to verify_code
216static void *get_clean_addr(void *addr)
217{
218 signed int *ptr = addr;
219 #ifndef HAVE_ARMV7
220 ptr+=4;
221 #else
222 ptr+=6;
223 #endif
224 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
225 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
226 ptr++;
227 if((*ptr&0xFF000000)==0xea000000) {
228 return (char *)ptr+((*ptr<<8)>>6)+8; // follow jump
229 }
230 return ptr;
231}
232
233static int verify_dirty(const u_int *ptr)
234{
235 #ifndef HAVE_ARMV7
236 u_int offset;
237 // get from literal pool
238 assert((*ptr&0xFFFF0000)==0xe59f0000);
239 offset=*ptr&0xfff;
240 u_int source=*(u_int*)((void *)ptr+offset+8);
241 ptr++;
242 assert((*ptr&0xFFFF0000)==0xe59f0000);
243 offset=*ptr&0xfff;
244 u_int copy=*(u_int*)((void *)ptr+offset+8);
245 ptr++;
246 assert((*ptr&0xFFFF0000)==0xe59f0000);
247 offset=*ptr&0xfff;
248 u_int len=*(u_int*)((void *)ptr+offset+8);
249 ptr++;
250 ptr++;
251 #else
252 // ARMv7 movw/movt
253 assert((*ptr&0xFFF00000)==0xe3000000);
254 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
255 u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
256 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
257 ptr+=6;
258 #endif
259 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
260 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
261 //printf("verify_dirty: %x %x %x\n",source,copy,len);
262 return !memcmp((void *)source,(void *)copy,len);
263}
264
265// This doesn't necessarily find all clean entry points, just
266// guarantees that it's not dirty
267static int isclean(void *addr)
268{
269 #ifndef HAVE_ARMV7
270 u_int *ptr=((u_int *)addr)+4;
271 #else
272 u_int *ptr=((u_int *)addr)+6;
273 #endif
274 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
275 if((*ptr&0xFF000000)!=0xeb000000) return 1; // bl instruction
276 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code) return 0;
277 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_ds) return 0;
278 return 1;
279}
280
281// get source that block at addr was compiled from (host pointers)
282static void get_bounds(void *addr, u_char **start, u_char **end)
283{
284 u_int *ptr = addr;
285 #ifndef HAVE_ARMV7
286 u_int offset;
287 // get from literal pool
288 assert((*ptr&0xFFFF0000)==0xe59f0000);
289 offset=*ptr&0xfff;
290 u_int source=*(u_int*)((void *)ptr+offset+8);
291 ptr++;
292 //assert((*ptr&0xFFFF0000)==0xe59f0000);
293 //offset=*ptr&0xfff;
294 //u_int copy=*(u_int*)((void *)ptr+offset+8);
295 ptr++;
296 assert((*ptr&0xFFFF0000)==0xe59f0000);
297 offset=*ptr&0xfff;
298 u_int len=*(u_int*)((void *)ptr+offset+8);
299 ptr++;
300 ptr++;
301 #else
302 // ARMv7 movw/movt
303 assert((*ptr&0xFFF00000)==0xe3000000);
304 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
305 //u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
306 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
307 ptr+=6;
308 #endif
309 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
310 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
311 *start=(u_char *)source;
312 *end=(u_char *)source+len;
313}
314
315// Allocate a specific ARM register.
316static void alloc_arm_reg(struct regstat *cur,int i,signed char reg,int hr)
317{
318 int n;
319 int dirty=0;
320
321 // see if it's already allocated (and dealloc it)
322 for(n=0;n<HOST_REGS;n++)
323 {
324 if(n!=EXCLUDE_REG&&cur->regmap[n]==reg) {
325 dirty=(cur->dirty>>n)&1;
326 cur->regmap[n]=-1;
327 }
328 }
329
330 cur->regmap[hr]=reg;
331 cur->dirty&=~(1<<hr);
332 cur->dirty|=dirty<<hr;
333 cur->isconst&=~(1<<hr);
334}
335
336// Alloc cycle count into dedicated register
337static void alloc_cc(struct regstat *cur,int i)
338{
339 alloc_arm_reg(cur,i,CCREG,HOST_CCREG);
340}
341
342/* Assembler */
343
344static unused char regname[16][4] = {
345 "r0",
346 "r1",
347 "r2",
348 "r3",
349 "r4",
350 "r5",
351 "r6",
352 "r7",
353 "r8",
354 "r9",
355 "r10",
356 "fp",
357 "r12",
358 "sp",
359 "lr",
360 "pc"};
361
362static void output_w32(u_int word)
363{
364 *((u_int *)out)=word;
365 out+=4;
366}
367
368static u_int rd_rn_rm(u_int rd, u_int rn, u_int rm)
369{
370 assert(rd<16);
371 assert(rn<16);
372 assert(rm<16);
373 return((rn<<16)|(rd<<12)|rm);
374}
375
376static u_int rd_rn_imm_shift(u_int rd, u_int rn, u_int imm, u_int shift)
377{
378 assert(rd<16);
379 assert(rn<16);
380 assert(imm<256);
381 assert((shift&1)==0);
382 return((rn<<16)|(rd<<12)|(((32-shift)&30)<<7)|imm);
383}
384
385static u_int genimm(u_int imm,u_int *encoded)
386{
387 *encoded=0;
388 if(imm==0) return 1;
389 int i=32;
390 while(i>0)
391 {
392 if(imm<256) {
393 *encoded=((i&30)<<7)|imm;
394 return 1;
395 }
396 imm=(imm>>2)|(imm<<30);i-=2;
397 }
398 return 0;
399}
400
401static void genimm_checked(u_int imm,u_int *encoded)
402{
403 u_int ret=genimm(imm,encoded);
404 assert(ret);
405 (void)ret;
406}
407
408static u_int genjmp(u_int addr)
409{
410 if (addr < 3) return 0; // a branch that will be patched later
411 int offset = addr-(int)out-8;
412 if (offset < -33554432 || offset >= 33554432) {
413 SysPrintf("genjmp: out of range: %08x\n", offset);
414 abort();
415 return 0;
416 }
417 return ((u_int)offset>>2)&0xffffff;
418}
419
420static unused void emit_breakpoint(void)
421{
422 assem_debug("bkpt #0\n");
423 //output_w32(0xe1200070);
424 output_w32(0xe7f001f0);
425}
426
427static void emit_mov(int rs,int rt)
428{
429 assem_debug("mov %s,%s\n",regname[rt],regname[rs]);
430 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs));
431}
432
433static void emit_movs(int rs,int rt)
434{
435 assem_debug("movs %s,%s\n",regname[rt],regname[rs]);
436 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs));
437}
438
439static void emit_add(int rs1,int rs2,int rt)
440{
441 assem_debug("add %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
442 output_w32(0xe0800000|rd_rn_rm(rt,rs1,rs2));
443}
444
445static void emit_adcs(int rs1,int rs2,int rt)
446{
447 assem_debug("adcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
448 output_w32(0xe0b00000|rd_rn_rm(rt,rs1,rs2));
449}
450
451static void emit_neg(int rs, int rt)
452{
453 assem_debug("rsb %s,%s,#0\n",regname[rt],regname[rs]);
454 output_w32(0xe2600000|rd_rn_rm(rt,rs,0));
455}
456
457static void emit_sub(int rs1,int rs2,int rt)
458{
459 assem_debug("sub %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
460 output_w32(0xe0400000|rd_rn_rm(rt,rs1,rs2));
461}
462
463static void emit_zeroreg(int rt)
464{
465 assem_debug("mov %s,#0\n",regname[rt]);
466 output_w32(0xe3a00000|rd_rn_rm(rt,0,0));
467}
468
469static void emit_loadlp(u_int imm,u_int rt)
470{
471 add_literal((int)out,imm);
472 assem_debug("ldr %s,pc+? [=%x]\n",regname[rt],imm);
473 output_w32(0xe5900000|rd_rn_rm(rt,15,0));
474}
475
476static void emit_movw(u_int imm,u_int rt)
477{
478 assert(imm<65536);
479 assem_debug("movw %s,#%d (0x%x)\n",regname[rt],imm,imm);
480 output_w32(0xe3000000|rd_rn_rm(rt,0,0)|(imm&0xfff)|((imm<<4)&0xf0000));
481}
482
483static void emit_movt(u_int imm,u_int rt)
484{
485 assem_debug("movt %s,#%d (0x%x)\n",regname[rt],imm&0xffff0000,imm&0xffff0000);
486 output_w32(0xe3400000|rd_rn_rm(rt,0,0)|((imm>>16)&0xfff)|((imm>>12)&0xf0000));
487}
488
489static void emit_movimm(u_int imm,u_int rt)
490{
491 u_int armval;
492 if(genimm(imm,&armval)) {
493 assem_debug("mov %s,#%d\n",regname[rt],imm);
494 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
495 }else if(genimm(~imm,&armval)) {
496 assem_debug("mvn %s,#%d\n",regname[rt],imm);
497 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
498 }else if(imm<65536) {
499 #ifndef HAVE_ARMV7
500 assem_debug("mov %s,#%d\n",regname[rt],imm&0xFF00);
501 output_w32(0xe3a00000|rd_rn_imm_shift(rt,0,imm>>8,8));
502 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
503 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
504 #else
505 emit_movw(imm,rt);
506 #endif
507 }else{
508 #ifndef HAVE_ARMV7
509 emit_loadlp(imm,rt);
510 #else
511 emit_movw(imm&0x0000FFFF,rt);
512 emit_movt(imm&0xFFFF0000,rt);
513 #endif
514 }
515}
516
517static void emit_pcreladdr(u_int rt)
518{
519 assem_debug("add %s,pc,#?\n",regname[rt]);
520 output_w32(0xe2800000|rd_rn_rm(rt,15,0));
521}
522
523static void emit_loadreg(int r, int hr)
524{
525 if(r&64) {
526 SysPrintf("64bit load in 32bit mode!\n");
527 assert(0);
528 return;
529 }
530 if((r&63)==0)
531 emit_zeroreg(hr);
532 else {
533 int addr = (int)&psxRegs.GPR.r[r];
534 switch (r) {
535 //case HIREG: addr = &hi; break;
536 //case LOREG: addr = &lo; break;
537 case CCREG: addr = (int)&cycle_count; break;
538 case CSREG: addr = (int)&Status; break;
539 case INVCP: addr = (int)&invc_ptr; break;
540 default: assert(r < 34); break;
541 }
542 u_int offset = addr-(u_int)&dynarec_local;
543 assert(offset<4096);
544 assem_debug("ldr %s,fp+%d\n",regname[hr],offset);
545 output_w32(0xe5900000|rd_rn_rm(hr,FP,0)|offset);
546 }
547}
548
549static void emit_storereg(int r, int hr)
550{
551 if(r&64) {
552 SysPrintf("64bit store in 32bit mode!\n");
553 assert(0);
554 return;
555 }
556 int addr = (int)&psxRegs.GPR.r[r];
557 switch (r) {
558 //case HIREG: addr = &hi; break;
559 //case LOREG: addr = &lo; break;
560 case CCREG: addr = (int)&cycle_count; break;
561 default: assert(r < 34); break;
562 }
563 u_int offset = addr-(u_int)&dynarec_local;
564 assert(offset<4096);
565 assem_debug("str %s,fp+%d\n",regname[hr],offset);
566 output_w32(0xe5800000|rd_rn_rm(hr,FP,0)|offset);
567}
568
569static void emit_test(int rs, int rt)
570{
571 assem_debug("tst %s,%s\n",regname[rs],regname[rt]);
572 output_w32(0xe1100000|rd_rn_rm(0,rs,rt));
573}
574
575static void emit_testimm(int rs,int imm)
576{
577 u_int armval;
578 assem_debug("tst %s,#%d\n",regname[rs],imm);
579 genimm_checked(imm,&armval);
580 output_w32(0xe3100000|rd_rn_rm(0,rs,0)|armval);
581}
582
583static void emit_testeqimm(int rs,int imm)
584{
585 u_int armval;
586 assem_debug("tsteq %s,$%d\n",regname[rs],imm);
587 genimm_checked(imm,&armval);
588 output_w32(0x03100000|rd_rn_rm(0,rs,0)|armval);
589}
590
591static void emit_not(int rs,int rt)
592{
593 assem_debug("mvn %s,%s\n",regname[rt],regname[rs]);
594 output_w32(0xe1e00000|rd_rn_rm(rt,0,rs));
595}
596
597static void emit_and(u_int rs1,u_int rs2,u_int rt)
598{
599 assem_debug("and %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
600 output_w32(0xe0000000|rd_rn_rm(rt,rs1,rs2));
601}
602
603static void emit_or(u_int rs1,u_int rs2,u_int rt)
604{
605 assem_debug("orr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
606 output_w32(0xe1800000|rd_rn_rm(rt,rs1,rs2));
607}
608
609static void emit_orrshl_imm(u_int rs,u_int imm,u_int rt)
610{
611 assert(rs<16);
612 assert(rt<16);
613 assert(imm<32);
614 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs],imm);
615 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|(imm<<7));
616}
617
618static void emit_orrshr_imm(u_int rs,u_int imm,u_int rt)
619{
620 assert(rs<16);
621 assert(rt<16);
622 assert(imm<32);
623 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs],imm);
624 output_w32(0xe1800020|rd_rn_rm(rt,rt,rs)|(imm<<7));
625}
626
627static void emit_xor(u_int rs1,u_int rs2,u_int rt)
628{
629 assem_debug("eor %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
630 output_w32(0xe0200000|rd_rn_rm(rt,rs1,rs2));
631}
632
633static void emit_xorsar_imm(u_int rs1,u_int rs2,u_int imm,u_int rt)
634{
635 assem_debug("eor %s,%s,%s,asr #%d\n",regname[rt],regname[rs1],regname[rs2],imm);
636 output_w32(0xe0200040|rd_rn_rm(rt,rs1,rs2)|(imm<<7));
637}
638
639static void emit_addimm(u_int rs,int imm,u_int rt)
640{
641 assert(rs<16);
642 assert(rt<16);
643 if(imm!=0) {
644 u_int armval;
645 if(genimm(imm,&armval)) {
646 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],imm);
647 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
648 }else if(genimm(-imm,&armval)) {
649 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],-imm);
650 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
651 #ifdef HAVE_ARMV7
652 }else if(rt!=rs&&(u_int)imm<65536) {
653 emit_movw(imm&0x0000ffff,rt);
654 emit_add(rs,rt,rt);
655 }else if(rt!=rs&&(u_int)-imm<65536) {
656 emit_movw(-imm&0x0000ffff,rt);
657 emit_sub(rs,rt,rt);
658 #endif
659 }else if((u_int)-imm<65536) {
660 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],(-imm)&0xFF00);
661 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
662 output_w32(0xe2400000|rd_rn_imm_shift(rt,rs,(-imm)>>8,8));
663 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
664 }else {
665 do {
666 int shift = (ffs(imm) - 1) & ~1;
667 int imm8 = imm & (0xff << shift);
668 genimm_checked(imm8,&armval);
669 assem_debug("add %s,%s,#0x%x\n",regname[rt],regname[rs],imm8);
670 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
671 rs = rt;
672 imm &= ~imm8;
673 }
674 while (imm != 0);
675 }
676 }
677 else if(rs!=rt) emit_mov(rs,rt);
678}
679
680static void emit_addimm_and_set_flags(int imm,int rt)
681{
682 assert(imm>-65536&&imm<65536);
683 u_int armval;
684 if(genimm(imm,&armval)) {
685 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm);
686 output_w32(0xe2900000|rd_rn_rm(rt,rt,0)|armval);
687 }else if(genimm(-imm,&armval)) {
688 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],imm);
689 output_w32(0xe2500000|rd_rn_rm(rt,rt,0)|armval);
690 }else if(imm<0) {
691 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF00);
692 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
693 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)>>8,8));
694 output_w32(0xe2500000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
695 }else{
696 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF00);
697 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
698 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm>>8,8));
699 output_w32(0xe2900000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
700 }
701}
702
703static void emit_addimm_no_flags(u_int imm,u_int rt)
704{
705 emit_addimm(rt,imm,rt);
706}
707
708static void emit_addnop(u_int r)
709{
710 assert(r<16);
711 assem_debug("add %s,%s,#0 (nop)\n",regname[r],regname[r]);
712 output_w32(0xe2800000|rd_rn_rm(r,r,0));
713}
714
715static void emit_andimm(int rs,int imm,int rt)
716{
717 u_int armval;
718 if(imm==0) {
719 emit_zeroreg(rt);
720 }else if(genimm(imm,&armval)) {
721 assem_debug("and %s,%s,#%d\n",regname[rt],regname[rs],imm);
722 output_w32(0xe2000000|rd_rn_rm(rt,rs,0)|armval);
723 }else if(genimm(~imm,&armval)) {
724 assem_debug("bic %s,%s,#%d\n",regname[rt],regname[rs],imm);
725 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|armval);
726 }else if(imm==65535) {
727 #ifndef HAVE_ARMV6
728 assem_debug("bic %s,%s,#FF000000\n",regname[rt],regname[rs]);
729 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|0x4FF);
730 assem_debug("bic %s,%s,#00FF0000\n",regname[rt],regname[rt]);
731 output_w32(0xe3c00000|rd_rn_rm(rt,rt,0)|0x8FF);
732 #else
733 assem_debug("uxth %s,%s\n",regname[rt],regname[rs]);
734 output_w32(0xe6ff0070|rd_rn_rm(rt,0,rs));
735 #endif
736 }else{
737 assert(imm>0&&imm<65535);
738 #ifndef HAVE_ARMV7
739 assem_debug("mov r14,#%d\n",imm&0xFF00);
740 output_w32(0xe3a00000|rd_rn_imm_shift(HOST_TEMPREG,0,imm>>8,8));
741 assem_debug("add r14,r14,#%d\n",imm&0xFF);
742 output_w32(0xe2800000|rd_rn_imm_shift(HOST_TEMPREG,HOST_TEMPREG,imm&0xff,0));
743 #else
744 emit_movw(imm,HOST_TEMPREG);
745 #endif
746 assem_debug("and %s,%s,r14\n",regname[rt],regname[rs]);
747 output_w32(0xe0000000|rd_rn_rm(rt,rs,HOST_TEMPREG));
748 }
749}
750
751static void emit_orimm(int rs,int imm,int rt)
752{
753 u_int armval;
754 if(imm==0) {
755 if(rs!=rt) emit_mov(rs,rt);
756 }else if(genimm(imm,&armval)) {
757 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm);
758 output_w32(0xe3800000|rd_rn_rm(rt,rs,0)|armval);
759 }else{
760 assert(imm>0&&imm<65536);
761 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
762 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
763 output_w32(0xe3800000|rd_rn_imm_shift(rt,rs,imm>>8,8));
764 output_w32(0xe3800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
765 }
766}
767
768static void emit_xorimm(int rs,int imm,int rt)
769{
770 u_int armval;
771 if(imm==0) {
772 if(rs!=rt) emit_mov(rs,rt);
773 }else if(genimm(imm,&armval)) {
774 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm);
775 output_w32(0xe2200000|rd_rn_rm(rt,rs,0)|armval);
776 }else{
777 assert(imm>0&&imm<65536);
778 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
779 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
780 output_w32(0xe2200000|rd_rn_imm_shift(rt,rs,imm>>8,8));
781 output_w32(0xe2200000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
782 }
783}
784
785static void emit_shlimm(int rs,u_int imm,int rt)
786{
787 assert(imm>0);
788 assert(imm<32);
789 //if(imm==1) ...
790 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
791 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
792}
793
794static void emit_lsls_imm(int rs,int imm,int rt)
795{
796 assert(imm>0);
797 assert(imm<32);
798 assem_debug("lsls %s,%s,#%d\n",regname[rt],regname[rs],imm);
799 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs)|(imm<<7));
800}
801
802static unused void emit_lslpls_imm(int rs,int imm,int rt)
803{
804 assert(imm>0);
805 assert(imm<32);
806 assem_debug("lslpls %s,%s,#%d\n",regname[rt],regname[rs],imm);
807 output_w32(0x51b00000|rd_rn_rm(rt,0,rs)|(imm<<7));
808}
809
810static void emit_shrimm(int rs,u_int imm,int rt)
811{
812 assert(imm>0);
813 assert(imm<32);
814 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
815 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
816}
817
818static void emit_sarimm(int rs,u_int imm,int rt)
819{
820 assert(imm>0);
821 assert(imm<32);
822 assem_debug("asr %s,%s,#%d\n",regname[rt],regname[rs],imm);
823 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x40|(imm<<7));
824}
825
826static void emit_rorimm(int rs,u_int imm,int rt)
827{
828 assert(imm>0);
829 assert(imm<32);
830 assem_debug("ror %s,%s,#%d\n",regname[rt],regname[rs],imm);
831 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x60|(imm<<7));
832}
833
834static void emit_signextend16(int rs,int rt)
835{
836 #ifndef HAVE_ARMV6
837 emit_shlimm(rs,16,rt);
838 emit_sarimm(rt,16,rt);
839 #else
840 assem_debug("sxth %s,%s\n",regname[rt],regname[rs]);
841 output_w32(0xe6bf0070|rd_rn_rm(rt,0,rs));
842 #endif
843}
844
845static void emit_signextend8(int rs,int rt)
846{
847 #ifndef HAVE_ARMV6
848 emit_shlimm(rs,24,rt);
849 emit_sarimm(rt,24,rt);
850 #else
851 assem_debug("sxtb %s,%s\n",regname[rt],regname[rs]);
852 output_w32(0xe6af0070|rd_rn_rm(rt,0,rs));
853 #endif
854}
855
856static void emit_shl(u_int rs,u_int shift,u_int rt)
857{
858 assert(rs<16);
859 assert(rt<16);
860 assert(shift<16);
861 //if(imm==1) ...
862 assem_debug("lsl %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
863 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x10|(shift<<8));
864}
865
866static void emit_shr(u_int rs,u_int shift,u_int rt)
867{
868 assert(rs<16);
869 assert(rt<16);
870 assert(shift<16);
871 assem_debug("lsr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
872 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x30|(shift<<8));
873}
874
875static void emit_sar(u_int rs,u_int shift,u_int rt)
876{
877 assert(rs<16);
878 assert(rt<16);
879 assert(shift<16);
880 assem_debug("asr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
881 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x50|(shift<<8));
882}
883
884static unused void emit_orrshl(u_int rs,u_int shift,u_int rt)
885{
886 assert(rs<16);
887 assert(rt<16);
888 assert(shift<16);
889 assem_debug("orr %s,%s,%s,lsl %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
890 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x10|(shift<<8));
891}
892
893static unused void emit_orrshr(u_int rs,u_int shift,u_int rt)
894{
895 assert(rs<16);
896 assert(rt<16);
897 assert(shift<16);
898 assem_debug("orr %s,%s,%s,lsr %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
899 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x30|(shift<<8));
900}
901
902static void emit_cmpimm(int rs,int imm)
903{
904 u_int armval;
905 if(genimm(imm,&armval)) {
906 assem_debug("cmp %s,#%d\n",regname[rs],imm);
907 output_w32(0xe3500000|rd_rn_rm(0,rs,0)|armval);
908 }else if(genimm(-imm,&armval)) {
909 assem_debug("cmn %s,#%d\n",regname[rs],imm);
910 output_w32(0xe3700000|rd_rn_rm(0,rs,0)|armval);
911 }else if(imm>0) {
912 assert(imm<65536);
913 emit_movimm(imm,HOST_TEMPREG);
914 assem_debug("cmp %s,r14\n",regname[rs]);
915 output_w32(0xe1500000|rd_rn_rm(0,rs,HOST_TEMPREG));
916 }else{
917 assert(imm>-65536);
918 emit_movimm(-imm,HOST_TEMPREG);
919 assem_debug("cmn %s,r14\n",regname[rs]);
920 output_w32(0xe1700000|rd_rn_rm(0,rs,HOST_TEMPREG));
921 }
922}
923
924static void emit_cmovne_imm(int imm,int rt)
925{
926 assem_debug("movne %s,#%d\n",regname[rt],imm);
927 u_int armval;
928 genimm_checked(imm,&armval);
929 output_w32(0x13a00000|rd_rn_rm(rt,0,0)|armval);
930}
931
932static void emit_cmovl_imm(int imm,int rt)
933{
934 assem_debug("movlt %s,#%d\n",regname[rt],imm);
935 u_int armval;
936 genimm_checked(imm,&armval);
937 output_w32(0xb3a00000|rd_rn_rm(rt,0,0)|armval);
938}
939
940static void emit_cmovb_imm(int imm,int rt)
941{
942 assem_debug("movcc %s,#%d\n",regname[rt],imm);
943 u_int armval;
944 genimm_checked(imm,&armval);
945 output_w32(0x33a00000|rd_rn_rm(rt,0,0)|armval);
946}
947
948static void emit_cmovae_imm(int imm,int rt)
949{
950 assem_debug("movcs %s,#%d\n",regname[rt],imm);
951 u_int armval;
952 genimm_checked(imm,&armval);
953 output_w32(0x23a00000|rd_rn_rm(rt,0,0)|armval);
954}
955
956static void emit_cmovne_reg(int rs,int rt)
957{
958 assem_debug("movne %s,%s\n",regname[rt],regname[rs]);
959 output_w32(0x11a00000|rd_rn_rm(rt,0,rs));
960}
961
962static void emit_cmovl_reg(int rs,int rt)
963{
964 assem_debug("movlt %s,%s\n",regname[rt],regname[rs]);
965 output_w32(0xb1a00000|rd_rn_rm(rt,0,rs));
966}
967
968static void emit_cmovs_reg(int rs,int rt)
969{
970 assem_debug("movmi %s,%s\n",regname[rt],regname[rs]);
971 output_w32(0x41a00000|rd_rn_rm(rt,0,rs));
972}
973
974static void emit_slti32(int rs,int imm,int rt)
975{
976 if(rs!=rt) emit_zeroreg(rt);
977 emit_cmpimm(rs,imm);
978 if(rs==rt) emit_movimm(0,rt);
979 emit_cmovl_imm(1,rt);
980}
981
982static void emit_sltiu32(int rs,int imm,int rt)
983{
984 if(rs!=rt) emit_zeroreg(rt);
985 emit_cmpimm(rs,imm);
986 if(rs==rt) emit_movimm(0,rt);
987 emit_cmovb_imm(1,rt);
988}
989
990static void emit_cmp(int rs,int rt)
991{
992 assem_debug("cmp %s,%s\n",regname[rs],regname[rt]);
993 output_w32(0xe1500000|rd_rn_rm(0,rs,rt));
994}
995
996static void emit_set_gz32(int rs, int rt)
997{
998 //assem_debug("set_gz32\n");
999 emit_cmpimm(rs,1);
1000 emit_movimm(1,rt);
1001 emit_cmovl_imm(0,rt);
1002}
1003
1004static void emit_set_nz32(int rs, int rt)
1005{
1006 //assem_debug("set_nz32\n");
1007 if(rs!=rt) emit_movs(rs,rt);
1008 else emit_test(rs,rs);
1009 emit_cmovne_imm(1,rt);
1010}
1011
1012static void emit_set_if_less32(int rs1, int rs2, int rt)
1013{
1014 //assem_debug("set if less (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1015 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1016 emit_cmp(rs1,rs2);
1017 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1018 emit_cmovl_imm(1,rt);
1019}
1020
1021static void emit_set_if_carry32(int rs1, int rs2, int rt)
1022{
1023 //assem_debug("set if carry (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1024 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1025 emit_cmp(rs1,rs2);
1026 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1027 emit_cmovb_imm(1,rt);
1028}
1029
1030static int can_jump_or_call(const void *a)
1031{
1032 intptr_t offset = (u_char *)a - out - 8;
1033 return (-33554432 <= offset && offset < 33554432);
1034}
1035
1036static void emit_call(const void *a_)
1037{
1038 int a = (int)a_;
1039 assem_debug("bl %x (%x+%x)%s\n",a,(int)out,a-(int)out-8,func_name(a_));
1040 u_int offset=genjmp(a);
1041 output_w32(0xeb000000|offset);
1042}
1043
1044static void emit_jmp(const void *a_)
1045{
1046 int a = (int)a_;
1047 assem_debug("b %x (%x+%x)%s\n",a,(int)out,a-(int)out-8,func_name(a_));
1048 u_int offset=genjmp(a);
1049 output_w32(0xea000000|offset);
1050}
1051
1052static void emit_jne(const void *a_)
1053{
1054 int a = (int)a_;
1055 assem_debug("bne %x\n",a);
1056 u_int offset=genjmp(a);
1057 output_w32(0x1a000000|offset);
1058}
1059
1060static void emit_jeq(const void *a_)
1061{
1062 int a = (int)a_;
1063 assem_debug("beq %x\n",a);
1064 u_int offset=genjmp(a);
1065 output_w32(0x0a000000|offset);
1066}
1067
1068static void emit_js(const void *a_)
1069{
1070 int a = (int)a_;
1071 assem_debug("bmi %x\n",a);
1072 u_int offset=genjmp(a);
1073 output_w32(0x4a000000|offset);
1074}
1075
1076static void emit_jns(const void *a_)
1077{
1078 int a = (int)a_;
1079 assem_debug("bpl %x\n",a);
1080 u_int offset=genjmp(a);
1081 output_w32(0x5a000000|offset);
1082}
1083
1084static void emit_jl(const void *a_)
1085{
1086 int a = (int)a_;
1087 assem_debug("blt %x\n",a);
1088 u_int offset=genjmp(a);
1089 output_w32(0xba000000|offset);
1090}
1091
1092static void emit_jge(const void *a_)
1093{
1094 int a = (int)a_;
1095 assem_debug("bge %x\n",a);
1096 u_int offset=genjmp(a);
1097 output_w32(0xaa000000|offset);
1098}
1099
1100static void emit_jno(const void *a_)
1101{
1102 int a = (int)a_;
1103 assem_debug("bvc %x\n",a);
1104 u_int offset=genjmp(a);
1105 output_w32(0x7a000000|offset);
1106}
1107
1108static void emit_jc(const void *a_)
1109{
1110 int a = (int)a_;
1111 assem_debug("bcs %x\n",a);
1112 u_int offset=genjmp(a);
1113 output_w32(0x2a000000|offset);
1114}
1115
1116static void emit_jcc(const void *a_)
1117{
1118 int a = (int)a_;
1119 assem_debug("bcc %x\n",a);
1120 u_int offset=genjmp(a);
1121 output_w32(0x3a000000|offset);
1122}
1123
1124static unused void emit_callreg(u_int r)
1125{
1126 assert(r<15);
1127 assem_debug("blx %s\n",regname[r]);
1128 output_w32(0xe12fff30|r);
1129}
1130
1131static void emit_jmpreg(u_int r)
1132{
1133 assem_debug("mov pc,%s\n",regname[r]);
1134 output_w32(0xe1a00000|rd_rn_rm(15,0,r));
1135}
1136
1137static void emit_ret(void)
1138{
1139 emit_jmpreg(14);
1140}
1141
1142static void emit_readword_indexed(int offset, int rs, int rt)
1143{
1144 assert(offset>-4096&&offset<4096);
1145 assem_debug("ldr %s,%s+%d\n",regname[rt],regname[rs],offset);
1146 if(offset>=0) {
1147 output_w32(0xe5900000|rd_rn_rm(rt,rs,0)|offset);
1148 }else{
1149 output_w32(0xe5100000|rd_rn_rm(rt,rs,0)|(-offset));
1150 }
1151}
1152
1153static void emit_readword_dualindexedx4(int rs1, int rs2, int rt)
1154{
1155 assem_debug("ldr %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1156 output_w32(0xe7900000|rd_rn_rm(rt,rs1,rs2)|0x100);
1157}
1158
1159static void emit_ldrcc_dualindexed(int rs1, int rs2, int rt)
1160{
1161 assem_debug("ldrcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1162 output_w32(0x37900000|rd_rn_rm(rt,rs1,rs2));
1163}
1164
1165static void emit_ldrccb_dualindexed(int rs1, int rs2, int rt)
1166{
1167 assem_debug("ldrccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1168 output_w32(0x37d00000|rd_rn_rm(rt,rs1,rs2));
1169}
1170
1171static void emit_ldrccsb_dualindexed(int rs1, int rs2, int rt)
1172{
1173 assem_debug("ldrccsb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1174 output_w32(0x319000d0|rd_rn_rm(rt,rs1,rs2));
1175}
1176
1177static void emit_ldrcch_dualindexed(int rs1, int rs2, int rt)
1178{
1179 assem_debug("ldrcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1180 output_w32(0x319000b0|rd_rn_rm(rt,rs1,rs2));
1181}
1182
1183static void emit_ldrccsh_dualindexed(int rs1, int rs2, int rt)
1184{
1185 assem_debug("ldrccsh %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1186 output_w32(0x319000f0|rd_rn_rm(rt,rs1,rs2));
1187}
1188
1189static void emit_movsbl_indexed(int offset, int rs, int rt)
1190{
1191 assert(offset>-256&&offset<256);
1192 assem_debug("ldrsb %s,%s+%d\n",regname[rt],regname[rs],offset);
1193 if(offset>=0) {
1194 output_w32(0xe1d000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1195 }else{
1196 output_w32(0xe15000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1197 }
1198}
1199
1200static void emit_movswl_indexed(int offset, int rs, int rt)
1201{
1202 assert(offset>-256&&offset<256);
1203 assem_debug("ldrsh %s,%s+%d\n",regname[rt],regname[rs],offset);
1204 if(offset>=0) {
1205 output_w32(0xe1d000f0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1206 }else{
1207 output_w32(0xe15000f0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1208 }
1209}
1210
1211static void emit_movzbl_indexed(int offset, int rs, int rt)
1212{
1213 assert(offset>-4096&&offset<4096);
1214 assem_debug("ldrb %s,%s+%d\n",regname[rt],regname[rs],offset);
1215 if(offset>=0) {
1216 output_w32(0xe5d00000|rd_rn_rm(rt,rs,0)|offset);
1217 }else{
1218 output_w32(0xe5500000|rd_rn_rm(rt,rs,0)|(-offset));
1219 }
1220}
1221
1222static void emit_movzwl_indexed(int offset, int rs, int rt)
1223{
1224 assert(offset>-256&&offset<256);
1225 assem_debug("ldrh %s,%s+%d\n",regname[rt],regname[rs],offset);
1226 if(offset>=0) {
1227 output_w32(0xe1d000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1228 }else{
1229 output_w32(0xe15000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1230 }
1231}
1232
1233static void emit_ldrd(int offset, int rs, int rt)
1234{
1235 assert(offset>-256&&offset<256);
1236 assem_debug("ldrd %s,%s+%d\n",regname[rt],regname[rs],offset);
1237 if(offset>=0) {
1238 output_w32(0xe1c000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1239 }else{
1240 output_w32(0xe14000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1241 }
1242}
1243
1244static void emit_readword(void *addr, int rt)
1245{
1246 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
1247 assert(offset<4096);
1248 assem_debug("ldr %s,fp+%d\n",regname[rt],offset);
1249 output_w32(0xe5900000|rd_rn_rm(rt,FP,0)|offset);
1250}
1251
1252static void emit_writeword_indexed(int rt, int offset, int rs)
1253{
1254 assert(offset>-4096&&offset<4096);
1255 assem_debug("str %s,%s+%d\n",regname[rt],regname[rs],offset);
1256 if(offset>=0) {
1257 output_w32(0xe5800000|rd_rn_rm(rt,rs,0)|offset);
1258 }else{
1259 output_w32(0xe5000000|rd_rn_rm(rt,rs,0)|(-offset));
1260 }
1261}
1262
1263static void emit_writehword_indexed(int rt, int offset, int rs)
1264{
1265 assert(offset>-256&&offset<256);
1266 assem_debug("strh %s,%s+%d\n",regname[rt],regname[rs],offset);
1267 if(offset>=0) {
1268 output_w32(0xe1c000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1269 }else{
1270 output_w32(0xe14000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1271 }
1272}
1273
1274static void emit_writebyte_indexed(int rt, int offset, int rs)
1275{
1276 assert(offset>-4096&&offset<4096);
1277 assem_debug("strb %s,%s+%d\n",regname[rt],regname[rs],offset);
1278 if(offset>=0) {
1279 output_w32(0xe5c00000|rd_rn_rm(rt,rs,0)|offset);
1280 }else{
1281 output_w32(0xe5400000|rd_rn_rm(rt,rs,0)|(-offset));
1282 }
1283}
1284
1285static void emit_strcc_dualindexed(int rs1, int rs2, int rt)
1286{
1287 assem_debug("strcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1288 output_w32(0x37800000|rd_rn_rm(rt,rs1,rs2));
1289}
1290
1291static void emit_strccb_dualindexed(int rs1, int rs2, int rt)
1292{
1293 assem_debug("strccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1294 output_w32(0x37c00000|rd_rn_rm(rt,rs1,rs2));
1295}
1296
1297static void emit_strcch_dualindexed(int rs1, int rs2, int rt)
1298{
1299 assem_debug("strcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1300 output_w32(0x318000b0|rd_rn_rm(rt,rs1,rs2));
1301}
1302
1303static void emit_writeword(int rt, void *addr)
1304{
1305 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
1306 assert(offset<4096);
1307 assem_debug("str %s,fp+%d\n",regname[rt],offset);
1308 output_w32(0xe5800000|rd_rn_rm(rt,FP,0)|offset);
1309}
1310
1311static void emit_umull(u_int rs1, u_int rs2, u_int hi, u_int lo)
1312{
1313 assem_debug("umull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
1314 assert(rs1<16);
1315 assert(rs2<16);
1316 assert(hi<16);
1317 assert(lo<16);
1318 output_w32(0xe0800090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
1319}
1320
1321static void emit_smull(u_int rs1, u_int rs2, u_int hi, u_int lo)
1322{
1323 assem_debug("smull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
1324 assert(rs1<16);
1325 assert(rs2<16);
1326 assert(hi<16);
1327 assert(lo<16);
1328 output_w32(0xe0c00090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
1329}
1330
1331static void emit_clz(int rs,int rt)
1332{
1333 assem_debug("clz %s,%s\n",regname[rt],regname[rs]);
1334 output_w32(0xe16f0f10|rd_rn_rm(rt,0,rs));
1335}
1336
1337static void emit_subcs(int rs1,int rs2,int rt)
1338{
1339 assem_debug("subcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1340 output_w32(0x20400000|rd_rn_rm(rt,rs1,rs2));
1341}
1342
1343static void emit_shrcc_imm(int rs,u_int imm,int rt)
1344{
1345 assert(imm>0);
1346 assert(imm<32);
1347 assem_debug("lsrcc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1348 output_w32(0x31a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
1349}
1350
1351static void emit_shrne_imm(int rs,u_int imm,int rt)
1352{
1353 assert(imm>0);
1354 assert(imm<32);
1355 assem_debug("lsrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
1356 output_w32(0x11a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
1357}
1358
1359static void emit_negmi(int rs, int rt)
1360{
1361 assem_debug("rsbmi %s,%s,#0\n",regname[rt],regname[rs]);
1362 output_w32(0x42600000|rd_rn_rm(rt,rs,0));
1363}
1364
1365static void emit_negsmi(int rs, int rt)
1366{
1367 assem_debug("rsbsmi %s,%s,#0\n",regname[rt],regname[rs]);
1368 output_w32(0x42700000|rd_rn_rm(rt,rs,0));
1369}
1370
1371static void emit_bic_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
1372{
1373 assem_debug("bic %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
1374 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
1375}
1376
1377static void emit_bic_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
1378{
1379 assem_debug("bic %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
1380 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
1381}
1382
1383static void emit_teq(int rs, int rt)
1384{
1385 assem_debug("teq %s,%s\n",regname[rs],regname[rt]);
1386 output_w32(0xe1300000|rd_rn_rm(0,rs,rt));
1387}
1388
1389static unused void emit_rsbimm(int rs, int imm, int rt)
1390{
1391 u_int armval;
1392 genimm_checked(imm,&armval);
1393 assem_debug("rsb %s,%s,#%d\n",regname[rt],regname[rs],imm);
1394 output_w32(0xe2600000|rd_rn_rm(rt,rs,0)|armval);
1395}
1396
1397// Conditionally select one of two immediates, optimizing for small code size
1398// This will only be called if HAVE_CMOV_IMM is defined
1399static void emit_cmov2imm_e_ne_compact(int imm1,int imm2,u_int rt)
1400{
1401 u_int armval;
1402 if(genimm(imm2-imm1,&armval)) {
1403 emit_movimm(imm1,rt);
1404 assem_debug("addne %s,%s,#%d\n",regname[rt],regname[rt],imm2-imm1);
1405 output_w32(0x12800000|rd_rn_rm(rt,rt,0)|armval);
1406 }else if(genimm(imm1-imm2,&armval)) {
1407 emit_movimm(imm1,rt);
1408 assem_debug("subne %s,%s,#%d\n",regname[rt],regname[rt],imm1-imm2);
1409 output_w32(0x12400000|rd_rn_rm(rt,rt,0)|armval);
1410 }
1411 else {
1412 #ifndef HAVE_ARMV7
1413 emit_movimm(imm1,rt);
1414 add_literal((int)out,imm2);
1415 assem_debug("ldrne %s,pc+? [=%x]\n",regname[rt],imm2);
1416 output_w32(0x15900000|rd_rn_rm(rt,15,0));
1417 #else
1418 emit_movw(imm1&0x0000FFFF,rt);
1419 if((imm1&0xFFFF)!=(imm2&0xFFFF)) {
1420 assem_debug("movwne %s,#%d (0x%x)\n",regname[rt],imm2&0xFFFF,imm2&0xFFFF);
1421 output_w32(0x13000000|rd_rn_rm(rt,0,0)|(imm2&0xfff)|((imm2<<4)&0xf0000));
1422 }
1423 emit_movt(imm1&0xFFFF0000,rt);
1424 if((imm1&0xFFFF0000)!=(imm2&0xFFFF0000)) {
1425 assem_debug("movtne %s,#%d (0x%x)\n",regname[rt],imm2&0xffff0000,imm2&0xffff0000);
1426 output_w32(0x13400000|rd_rn_rm(rt,0,0)|((imm2>>16)&0xfff)|((imm2>>12)&0xf0000));
1427 }
1428 #endif
1429 }
1430}
1431
1432// special case for checking invalid_code
1433static void emit_cmpmem_indexedsr12_reg(int base,int r,int imm)
1434{
1435 assert(imm<128&&imm>=0);
1436 assert(r>=0&&r<16);
1437 assem_debug("ldrb lr,%s,%s lsr #12\n",regname[base],regname[r]);
1438 output_w32(0xe7d00000|rd_rn_rm(HOST_TEMPREG,base,r)|0x620);
1439 emit_cmpimm(HOST_TEMPREG,imm);
1440}
1441
1442static void emit_callne(int a)
1443{
1444 assem_debug("blne %x\n",a);
1445 u_int offset=genjmp(a);
1446 output_w32(0x1b000000|offset);
1447}
1448
1449// Used to preload hash table entries
1450static unused void emit_prefetchreg(int r)
1451{
1452 assem_debug("pld %s\n",regname[r]);
1453 output_w32(0xf5d0f000|rd_rn_rm(0,r,0));
1454}
1455
1456// Special case for mini_ht
1457static void emit_ldreq_indexed(int rs, u_int offset, int rt)
1458{
1459 assert(offset<4096);
1460 assem_debug("ldreq %s,[%s, #%d]\n",regname[rt],regname[rs],offset);
1461 output_w32(0x05900000|rd_rn_rm(rt,rs,0)|offset);
1462}
1463
1464static void emit_orrne_imm(int rs,int imm,int rt)
1465{
1466 u_int armval;
1467 genimm_checked(imm,&armval);
1468 assem_debug("orrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
1469 output_w32(0x13800000|rd_rn_rm(rt,rs,0)|armval);
1470}
1471
1472static void emit_andne_imm(int rs,int imm,int rt)
1473{
1474 u_int armval;
1475 genimm_checked(imm,&armval);
1476 assem_debug("andne %s,%s,#%d\n",regname[rt],regname[rs],imm);
1477 output_w32(0x12000000|rd_rn_rm(rt,rs,0)|armval);
1478}
1479
1480static unused void emit_addpl_imm(int rs,int imm,int rt)
1481{
1482 u_int armval;
1483 genimm_checked(imm,&armval);
1484 assem_debug("addpl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1485 output_w32(0x52800000|rd_rn_rm(rt,rs,0)|armval);
1486}
1487
1488static void emit_jno_unlikely(int a)
1489{
1490 //emit_jno(a);
1491 assem_debug("addvc pc,pc,#? (%x)\n",/*a-(int)out-8,*/a);
1492 output_w32(0x72800000|rd_rn_rm(15,15,0));
1493}
1494
1495static void save_regs_all(u_int reglist)
1496{
1497 int i;
1498 if(!reglist) return;
1499 assem_debug("stmia fp,{");
1500 for(i=0;i<16;i++)
1501 if(reglist&(1<<i))
1502 assem_debug("r%d,",i);
1503 assem_debug("}\n");
1504 output_w32(0xe88b0000|reglist);
1505}
1506
1507static void restore_regs_all(u_int reglist)
1508{
1509 int i;
1510 if(!reglist) return;
1511 assem_debug("ldmia fp,{");
1512 for(i=0;i<16;i++)
1513 if(reglist&(1<<i))
1514 assem_debug("r%d,",i);
1515 assem_debug("}\n");
1516 output_w32(0xe89b0000|reglist);
1517}
1518
1519// Save registers before function call
1520static void save_regs(u_int reglist)
1521{
1522 reglist&=CALLER_SAVE_REGS; // only save the caller-save registers, r0-r3, r12
1523 save_regs_all(reglist);
1524}
1525
1526// Restore registers after function call
1527static void restore_regs(u_int reglist)
1528{
1529 reglist&=CALLER_SAVE_REGS;
1530 restore_regs_all(reglist);
1531}
1532
1533/* Stubs/epilogue */
1534
1535static void literal_pool(int n)
1536{
1537 if(!literalcount) return;
1538 if(n) {
1539 if((int)out-literals[0][0]<4096-n) return;
1540 }
1541 u_int *ptr;
1542 int i;
1543 for(i=0;i<literalcount;i++)
1544 {
1545 u_int l_addr=(u_int)out;
1546 int j;
1547 for(j=0;j<i;j++) {
1548 if(literals[j][1]==literals[i][1]) {
1549 //printf("dup %08x\n",literals[i][1]);
1550 l_addr=literals[j][0];
1551 break;
1552 }
1553 }
1554 ptr=(u_int *)literals[i][0];
1555 u_int offset=l_addr-(u_int)ptr-8;
1556 assert(offset<4096);
1557 assert(!(offset&3));
1558 *ptr|=offset;
1559 if(l_addr==(u_int)out) {
1560 literals[i][0]=l_addr; // remember for dupes
1561 output_w32(literals[i][1]);
1562 }
1563 }
1564 literalcount=0;
1565}
1566
1567static void literal_pool_jumpover(int n)
1568{
1569 if(!literalcount) return;
1570 if(n) {
1571 if((int)out-literals[0][0]<4096-n) return;
1572 }
1573 void *jaddr = out;
1574 emit_jmp(0);
1575 literal_pool(0);
1576 set_jump_target(jaddr, out);
1577}
1578
1579// parsed by get_pointer, find_extjump_insn
1580static void emit_extjump2(u_char *addr, u_int target, void *linker)
1581{
1582 u_char *ptr=(u_char *)addr;
1583 assert((ptr[3]&0x0e)==0xa);
1584 (void)ptr;
1585
1586 emit_loadlp(target,0);
1587 emit_loadlp((u_int)addr,1);
1588 assert(addr>=ndrc->translation_cache&&addr<(ndrc->translation_cache+(1<<TARGET_SIZE_2)));
1589 //assert((target>=0x80000000&&target<0x80800000)||(target>0xA4000000&&target<0xA4001000));
1590//DEBUG >
1591#ifdef DEBUG_CYCLE_COUNT
1592 emit_readword(&last_count,ECX);
1593 emit_add(HOST_CCREG,ECX,HOST_CCREG);
1594 emit_readword(&next_interupt,ECX);
1595 emit_writeword(HOST_CCREG,&Count);
1596 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
1597 emit_writeword(ECX,&last_count);
1598#endif
1599//DEBUG <
1600 emit_far_jump(linker);
1601}
1602
1603static void check_extjump2(void *src)
1604{
1605 u_int *ptr = src;
1606 assert((ptr[1] & 0x0fff0000) == 0x059f0000); // ldr rx, [pc, #ofs]
1607 (void)ptr;
1608}
1609
1610// put rt_val into rt, potentially making use of rs with value rs_val
1611static void emit_movimm_from(u_int rs_val,int rs,u_int rt_val,int rt)
1612{
1613 u_int armval;
1614 int diff;
1615 if(genimm(rt_val,&armval)) {
1616 assem_debug("mov %s,#%d\n",regname[rt],rt_val);
1617 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
1618 return;
1619 }
1620 if(genimm(~rt_val,&armval)) {
1621 assem_debug("mvn %s,#%d\n",regname[rt],rt_val);
1622 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
1623 return;
1624 }
1625 diff=rt_val-rs_val;
1626 if(genimm(diff,&armval)) {
1627 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],diff);
1628 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
1629 return;
1630 }else if(genimm(-diff,&armval)) {
1631 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],-diff);
1632 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
1633 return;
1634 }
1635 emit_movimm(rt_val,rt);
1636}
1637
1638// return 1 if above function can do it's job cheaply
1639static int is_similar_value(u_int v1,u_int v2)
1640{
1641 u_int xs;
1642 int diff;
1643 if(v1==v2) return 1;
1644 diff=v2-v1;
1645 for(xs=diff;xs!=0&&(xs&3)==0;xs>>=2)
1646 ;
1647 if(xs<0x100) return 1;
1648 for(xs=-diff;xs!=0&&(xs&3)==0;xs>>=2)
1649 ;
1650 if(xs<0x100) return 1;
1651 return 0;
1652}
1653
1654static void mov_loadtype_adj(enum stub_type type,int rs,int rt)
1655{
1656 switch(type) {
1657 case LOADB_STUB: emit_signextend8(rs,rt); break;
1658 case LOADBU_STUB: emit_andimm(rs,0xff,rt); break;
1659 case LOADH_STUB: emit_signextend16(rs,rt); break;
1660 case LOADHU_STUB: emit_andimm(rs,0xffff,rt); break;
1661 case LOADW_STUB: if(rs!=rt) emit_mov(rs,rt); break;
1662 default: assert(0);
1663 }
1664}
1665
1666#include "pcsxmem.h"
1667#include "pcsxmem_inline.c"
1668
1669static void do_readstub(int n)
1670{
1671 assem_debug("do_readstub %x\n",start+stubs[n].a*4);
1672 literal_pool(256);
1673 set_jump_target(stubs[n].addr, out);
1674 enum stub_type type=stubs[n].type;
1675 int i=stubs[n].a;
1676 int rs=stubs[n].b;
1677 struct regstat *i_regs=(struct regstat *)stubs[n].c;
1678 u_int reglist=stubs[n].e;
1679 signed char *i_regmap=i_regs->regmap;
1680 int rt;
1681 if(itype[i]==C1LS||itype[i]==C2LS||itype[i]==LOADLR) {
1682 rt=get_reg(i_regmap,FTEMP);
1683 }else{
1684 rt=get_reg(i_regmap,rt1[i]);
1685 }
1686 assert(rs>=0);
1687 int r,temp=-1,temp2=HOST_TEMPREG,regs_saved=0;
1688 void *restore_jump = NULL;
1689 reglist|=(1<<rs);
1690 for(r=0;r<=12;r++) {
1691 if(((1<<r)&0x13ff)&&((1<<r)&reglist)==0) {
1692 temp=r; break;
1693 }
1694 }
1695 if(rt>=0&&rt1[i]!=0)
1696 reglist&=~(1<<rt);
1697 if(temp==-1) {
1698 save_regs(reglist);
1699 regs_saved=1;
1700 temp=(rs==0)?2:0;
1701 }
1702 if((regs_saved||(reglist&2)==0)&&temp!=1&&rs!=1)
1703 temp2=1;
1704 emit_readword(&mem_rtab,temp);
1705 emit_shrimm(rs,12,temp2);
1706 emit_readword_dualindexedx4(temp,temp2,temp2);
1707 emit_lsls_imm(temp2,1,temp2);
1708 if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) {
1709 switch(type) {
1710 case LOADB_STUB: emit_ldrccsb_dualindexed(temp2,rs,rt); break;
1711 case LOADBU_STUB: emit_ldrccb_dualindexed(temp2,rs,rt); break;
1712 case LOADH_STUB: emit_ldrccsh_dualindexed(temp2,rs,rt); break;
1713 case LOADHU_STUB: emit_ldrcch_dualindexed(temp2,rs,rt); break;
1714 case LOADW_STUB: emit_ldrcc_dualindexed(temp2,rs,rt); break;
1715 default: assert(0);
1716 }
1717 }
1718 if(regs_saved) {
1719 restore_jump=out;
1720 emit_jcc(0); // jump to reg restore
1721 }
1722 else
1723 emit_jcc(stubs[n].retaddr); // return address
1724
1725 if(!regs_saved)
1726 save_regs(reglist);
1727 void *handler=NULL;
1728 if(type==LOADB_STUB||type==LOADBU_STUB)
1729 handler=jump_handler_read8;
1730 if(type==LOADH_STUB||type==LOADHU_STUB)
1731 handler=jump_handler_read16;
1732 if(type==LOADW_STUB)
1733 handler=jump_handler_read32;
1734 assert(handler);
1735 pass_args(rs,temp2);
1736 int cc=get_reg(i_regmap,CCREG);
1737 if(cc<0)
1738 emit_loadreg(CCREG,2);
1739 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n].d),2);
1740 emit_far_call(handler);
1741 if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) {
1742 mov_loadtype_adj(type,0,rt);
1743 }
1744 if(restore_jump)
1745 set_jump_target(restore_jump, out);
1746 restore_regs(reglist);
1747 emit_jmp(stubs[n].retaddr); // return address
1748}
1749
1750static void inline_readstub(enum stub_type type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
1751{
1752 int rs=get_reg(regmap,target);
1753 int rt=get_reg(regmap,target);
1754 if(rs<0) rs=get_reg(regmap,-1);
1755 assert(rs>=0);
1756 u_int is_dynamic;
1757 uintptr_t host_addr = 0;
1758 void *handler;
1759 int cc=get_reg(regmap,CCREG);
1760 if(pcsx_direct_read(type,addr,CLOCK_ADJUST(adj),cc,target?rs:-1,rt))
1761 return;
1762 handler = get_direct_memhandler(mem_rtab, addr, type, &host_addr);
1763 if (handler == NULL) {
1764 if(rt<0||rt1[i]==0)
1765 return;
1766 if(addr!=host_addr)
1767 emit_movimm_from(addr,rs,host_addr,rs);
1768 switch(type) {
1769 case LOADB_STUB: emit_movsbl_indexed(0,rs,rt); break;
1770 case LOADBU_STUB: emit_movzbl_indexed(0,rs,rt); break;
1771 case LOADH_STUB: emit_movswl_indexed(0,rs,rt); break;
1772 case LOADHU_STUB: emit_movzwl_indexed(0,rs,rt); break;
1773 case LOADW_STUB: emit_readword_indexed(0,rs,rt); break;
1774 default: assert(0);
1775 }
1776 return;
1777 }
1778 is_dynamic=pcsxmem_is_handler_dynamic(addr);
1779 if(is_dynamic) {
1780 if(type==LOADB_STUB||type==LOADBU_STUB)
1781 handler=jump_handler_read8;
1782 if(type==LOADH_STUB||type==LOADHU_STUB)
1783 handler=jump_handler_read16;
1784 if(type==LOADW_STUB)
1785 handler=jump_handler_read32;
1786 }
1787
1788 // call a memhandler
1789 if(rt>=0&&rt1[i]!=0)
1790 reglist&=~(1<<rt);
1791 save_regs(reglist);
1792 if(target==0)
1793 emit_movimm(addr,0);
1794 else if(rs!=0)
1795 emit_mov(rs,0);
1796 if(cc<0)
1797 emit_loadreg(CCREG,2);
1798 if(is_dynamic) {
1799 emit_movimm(((u_int *)mem_rtab)[addr>>12]<<1,1);
1800 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj),2);
1801 }
1802 else {
1803 emit_readword(&last_count,3);
1804 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj),2);
1805 emit_add(2,3,2);
1806 emit_writeword(2,&Count);
1807 }
1808
1809 emit_far_call(handler);
1810
1811 if(rt>=0&&rt1[i]!=0) {
1812 switch(type) {
1813 case LOADB_STUB: emit_signextend8(0,rt); break;
1814 case LOADBU_STUB: emit_andimm(0,0xff,rt); break;
1815 case LOADH_STUB: emit_signextend16(0,rt); break;
1816 case LOADHU_STUB: emit_andimm(0,0xffff,rt); break;
1817 case LOADW_STUB: if(rt!=0) emit_mov(0,rt); break;
1818 default: assert(0);
1819 }
1820 }
1821 restore_regs(reglist);
1822}
1823
1824static void do_writestub(int n)
1825{
1826 assem_debug("do_writestub %x\n",start+stubs[n].a*4);
1827 literal_pool(256);
1828 set_jump_target(stubs[n].addr, out);
1829 enum stub_type type=stubs[n].type;
1830 int i=stubs[n].a;
1831 int rs=stubs[n].b;
1832 struct regstat *i_regs=(struct regstat *)stubs[n].c;
1833 u_int reglist=stubs[n].e;
1834 signed char *i_regmap=i_regs->regmap;
1835 int rt,r;
1836 if(itype[i]==C1LS||itype[i]==C2LS) {
1837 rt=get_reg(i_regmap,r=FTEMP);
1838 }else{
1839 rt=get_reg(i_regmap,r=rs2[i]);
1840 }
1841 assert(rs>=0);
1842 assert(rt>=0);
1843 int rtmp,temp=-1,temp2=HOST_TEMPREG,regs_saved=0;
1844 void *restore_jump = NULL;
1845 int reglist2=reglist|(1<<rs)|(1<<rt);
1846 for(rtmp=0;rtmp<=12;rtmp++) {
1847 if(((1<<rtmp)&0x13ff)&&((1<<rtmp)&reglist2)==0) {
1848 temp=rtmp; break;
1849 }
1850 }
1851 if(temp==-1) {
1852 save_regs(reglist);
1853 regs_saved=1;
1854 for(rtmp=0;rtmp<=3;rtmp++)
1855 if(rtmp!=rs&&rtmp!=rt)
1856 {temp=rtmp;break;}
1857 }
1858 if((regs_saved||(reglist2&8)==0)&&temp!=3&&rs!=3&&rt!=3)
1859 temp2=3;
1860 emit_readword(&mem_wtab,temp);
1861 emit_shrimm(rs,12,temp2);
1862 emit_readword_dualindexedx4(temp,temp2,temp2);
1863 emit_lsls_imm(temp2,1,temp2);
1864 switch(type) {
1865 case STOREB_STUB: emit_strccb_dualindexed(temp2,rs,rt); break;
1866 case STOREH_STUB: emit_strcch_dualindexed(temp2,rs,rt); break;
1867 case STOREW_STUB: emit_strcc_dualindexed(temp2,rs,rt); break;
1868 default: assert(0);
1869 }
1870 if(regs_saved) {
1871 restore_jump=out;
1872 emit_jcc(0); // jump to reg restore
1873 }
1874 else
1875 emit_jcc(stubs[n].retaddr); // return address (invcode check)
1876
1877 if(!regs_saved)
1878 save_regs(reglist);
1879 void *handler=NULL;
1880 switch(type) {
1881 case STOREB_STUB: handler=jump_handler_write8; break;
1882 case STOREH_STUB: handler=jump_handler_write16; break;
1883 case STOREW_STUB: handler=jump_handler_write32; break;
1884 default: assert(0);
1885 }
1886 assert(handler);
1887 pass_args(rs,rt);
1888 if(temp2!=3)
1889 emit_mov(temp2,3);
1890 int cc=get_reg(i_regmap,CCREG);
1891 if(cc<0)
1892 emit_loadreg(CCREG,2);
1893 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n].d),2);
1894 // returns new cycle_count
1895 emit_far_call(handler);
1896 emit_addimm(0,-CLOCK_ADJUST((int)stubs[n].d),cc<0?2:cc);
1897 if(cc<0)
1898 emit_storereg(CCREG,2);
1899 if(restore_jump)
1900 set_jump_target(restore_jump, out);
1901 restore_regs(reglist);
1902 emit_jmp(stubs[n].retaddr);
1903}
1904
1905static void inline_writestub(enum stub_type type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
1906{
1907 int rs=get_reg(regmap,-1);
1908 int rt=get_reg(regmap,target);
1909 assert(rs>=0);
1910 assert(rt>=0);
1911 uintptr_t host_addr = 0;
1912 void *handler = get_direct_memhandler(mem_wtab, addr, type, &host_addr);
1913 if (handler == NULL) {
1914 if(addr!=host_addr)
1915 emit_movimm_from(addr,rs,host_addr,rs);
1916 switch(type) {
1917 case STOREB_STUB: emit_writebyte_indexed(rt,0,rs); break;
1918 case STOREH_STUB: emit_writehword_indexed(rt,0,rs); break;
1919 case STOREW_STUB: emit_writeword_indexed(rt,0,rs); break;
1920 default: assert(0);
1921 }
1922 return;
1923 }
1924
1925 // call a memhandler
1926 save_regs(reglist);
1927 pass_args(rs,rt);
1928 int cc=get_reg(regmap,CCREG);
1929 if(cc<0)
1930 emit_loadreg(CCREG,2);
1931 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj),2);
1932 emit_movimm((u_int)handler,3);
1933 // returns new cycle_count
1934 emit_far_call(jump_handler_write_h);
1935 emit_addimm(0,-CLOCK_ADJUST(adj),cc<0?2:cc);
1936 if(cc<0)
1937 emit_storereg(CCREG,2);
1938 restore_regs(reglist);
1939}
1940
1941// this output is parsed by verify_dirty, get_bounds, isclean, get_clean_addr
1942static void do_dirty_stub_emit_args(u_int arg0)
1943{
1944 #ifndef HAVE_ARMV7
1945 emit_loadlp((int)source, 1);
1946 emit_loadlp((int)copy, 2);
1947 emit_loadlp(slen*4, 3);
1948 #else
1949 emit_movw(((u_int)source)&0x0000FFFF, 1);
1950 emit_movw(((u_int)copy)&0x0000FFFF, 2);
1951 emit_movt(((u_int)source)&0xFFFF0000, 1);
1952 emit_movt(((u_int)copy)&0xFFFF0000, 2);
1953 emit_movw(slen*4, 3);
1954 #endif
1955 emit_movimm(arg0, 0);
1956}
1957
1958static void *do_dirty_stub(int i)
1959{
1960 assem_debug("do_dirty_stub %x\n",start+i*4);
1961 do_dirty_stub_emit_args(start + i*4);
1962 emit_far_call(verify_code);
1963 void *entry = out;
1964 load_regs_entry(i);
1965 if (entry == out)
1966 entry = instr_addr[i];
1967 emit_jmp(instr_addr[i]);
1968 return entry;
1969}
1970
1971static void do_dirty_stub_ds()
1972{
1973 do_dirty_stub_emit_args(start + 1);
1974 emit_far_call(verify_code_ds);
1975}
1976
1977/* Special assem */
1978
1979static void c2op_prologue(u_int op,u_int reglist)
1980{
1981 save_regs_all(reglist);
1982#ifdef PCNT
1983 emit_movimm(op,0);
1984 emit_far_call(pcnt_gte_start);
1985#endif
1986 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0); // cop2 regs
1987}
1988
1989static void c2op_epilogue(u_int op,u_int reglist)
1990{
1991#ifdef PCNT
1992 emit_movimm(op,0);
1993 emit_far_call(pcnt_gte_end);
1994#endif
1995 restore_regs_all(reglist);
1996}
1997
1998static void c2op_call_MACtoIR(int lm,int need_flags)
1999{
2000 if(need_flags)
2001 emit_far_call(lm?gteMACtoIR_lm1:gteMACtoIR_lm0);
2002 else
2003 emit_far_call(lm?gteMACtoIR_lm1_nf:gteMACtoIR_lm0_nf);
2004}
2005
2006static void c2op_call_rgb_func(void *func,int lm,int need_ir,int need_flags)
2007{
2008 emit_far_call(func);
2009 // func is C code and trashes r0
2010 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
2011 if(need_flags||need_ir)
2012 c2op_call_MACtoIR(lm,need_flags);
2013 emit_far_call(need_flags?gteMACtoRGB:gteMACtoRGB_nf);
2014}
2015
2016static void c2op_assemble(int i,struct regstat *i_regs)
2017{
2018 u_int c2op=source[i]&0x3f;
2019 u_int hr,reglist_full=0,reglist;
2020 int need_flags,need_ir;
2021 for(hr=0;hr<HOST_REGS;hr++) {
2022 if(i_regs->regmap[hr]>=0) reglist_full|=1<<hr;
2023 }
2024 reglist=reglist_full&CALLER_SAVE_REGS;
2025
2026 if (gte_handlers[c2op]!=NULL) {
2027 need_flags=!(gte_unneeded[i+1]>>63); // +1 because of how liveness detection works
2028 need_ir=(gte_unneeded[i+1]&0xe00)!=0xe00;
2029 assem_debug("gte op %08x, unneeded %016llx, need_flags %d, need_ir %d\n",
2030 source[i],gte_unneeded[i+1],need_flags,need_ir);
2031 if(new_dynarec_hacks&NDHACK_GTE_NO_FLAGS)
2032 need_flags=0;
2033 int shift = (source[i] >> 19) & 1;
2034 int lm = (source[i] >> 10) & 1;
2035 switch(c2op) {
2036#ifndef DRC_DBG
2037 case GTE_MVMVA: {
2038#ifdef HAVE_ARMV5
2039 int v = (source[i] >> 15) & 3;
2040 int cv = (source[i] >> 13) & 3;
2041 int mx = (source[i] >> 17) & 3;
2042 reglist=reglist_full&(CALLER_SAVE_REGS|0xf0); // +{r4-r7}
2043 c2op_prologue(c2op,reglist);
2044 /* r4,r5 = VXYZ(v) packed; r6 = &MX11(mx); r7 = &CV1(cv) */
2045 if(v<3)
2046 emit_ldrd(v*8,0,4);
2047 else {
2048 emit_movzwl_indexed(9*4,0,4); // gteIR
2049 emit_movzwl_indexed(10*4,0,6);
2050 emit_movzwl_indexed(11*4,0,5);
2051 emit_orrshl_imm(6,16,4);
2052 }
2053 if(mx<3)
2054 emit_addimm(0,32*4+mx*8*4,6);
2055 else
2056 emit_readword(&zeromem_ptr,6);
2057 if(cv<3)
2058 emit_addimm(0,32*4+(cv*8+5)*4,7);
2059 else
2060 emit_readword(&zeromem_ptr,7);
2061#ifdef __ARM_NEON__
2062 emit_movimm(source[i],1); // opcode
2063 emit_far_call(gteMVMVA_part_neon);
2064 if(need_flags) {
2065 emit_movimm(lm,1);
2066 emit_far_call(gteMACtoIR_flags_neon);
2067 }
2068#else
2069 if(cv==3&&shift)
2070 emit_far_call((int)gteMVMVA_part_cv3sh12_arm);
2071 else {
2072 emit_movimm(shift,1);
2073 emit_far_call((int)(need_flags?gteMVMVA_part_arm:gteMVMVA_part_nf_arm));
2074 }
2075 if(need_flags||need_ir)
2076 c2op_call_MACtoIR(lm,need_flags);
2077#endif
2078#else /* if not HAVE_ARMV5 */
2079 c2op_prologue(c2op,reglist);
2080 emit_movimm(source[i],1); // opcode
2081 emit_writeword(1,&psxRegs.code);
2082 emit_far_call(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]);
2083#endif
2084 break;
2085 }
2086 case GTE_OP:
2087 c2op_prologue(c2op,reglist);
2088 emit_far_call(shift?gteOP_part_shift:gteOP_part_noshift);
2089 if(need_flags||need_ir) {
2090 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
2091 c2op_call_MACtoIR(lm,need_flags);
2092 }
2093 break;
2094 case GTE_DPCS:
2095 c2op_prologue(c2op,reglist);
2096 c2op_call_rgb_func(shift?gteDPCS_part_shift:gteDPCS_part_noshift,lm,need_ir,need_flags);
2097 break;
2098 case GTE_INTPL:
2099 c2op_prologue(c2op,reglist);
2100 c2op_call_rgb_func(shift?gteINTPL_part_shift:gteINTPL_part_noshift,lm,need_ir,need_flags);
2101 break;
2102 case GTE_SQR:
2103 c2op_prologue(c2op,reglist);
2104 emit_far_call(shift?gteSQR_part_shift:gteSQR_part_noshift);
2105 if(need_flags||need_ir) {
2106 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
2107 c2op_call_MACtoIR(lm,need_flags);
2108 }
2109 break;
2110 case GTE_DCPL:
2111 c2op_prologue(c2op,reglist);
2112 c2op_call_rgb_func(gteDCPL_part,lm,need_ir,need_flags);
2113 break;
2114 case GTE_GPF:
2115 c2op_prologue(c2op,reglist);
2116 c2op_call_rgb_func(shift?gteGPF_part_shift:gteGPF_part_noshift,lm,need_ir,need_flags);
2117 break;
2118 case GTE_GPL:
2119 c2op_prologue(c2op,reglist);
2120 c2op_call_rgb_func(shift?gteGPL_part_shift:gteGPL_part_noshift,lm,need_ir,need_flags);
2121 break;
2122#endif
2123 default:
2124 c2op_prologue(c2op,reglist);
2125#ifdef DRC_DBG
2126 emit_movimm(source[i],1); // opcode
2127 emit_writeword(1,&psxRegs.code);
2128#endif
2129 emit_far_call(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]);
2130 break;
2131 }
2132 c2op_epilogue(c2op,reglist);
2133 }
2134}
2135
2136static void c2op_ctc2_31_assemble(signed char sl, signed char temp)
2137{
2138 //value = value & 0x7ffff000;
2139 //if (value & 0x7f87e000) value |= 0x80000000;
2140 emit_shrimm(sl,12,temp);
2141 emit_shlimm(temp,12,temp);
2142 emit_testimm(temp,0x7f000000);
2143 emit_testeqimm(temp,0x00870000);
2144 emit_testeqimm(temp,0x0000e000);
2145 emit_orrne_imm(temp,0x80000000,temp);
2146}
2147
2148static void do_mfc2_31_one(u_int copr,signed char temp)
2149{
2150 emit_readword(&reg_cop2d[copr],temp);
2151 emit_testimm(temp,0x8000); // do we need this?
2152 emit_andne_imm(temp,0,temp);
2153 emit_cmpimm(temp,0xf80);
2154 emit_andimm(temp,0xf80,temp);
2155 emit_cmovae_imm(0xf80,temp);
2156}
2157
2158static void c2op_mfc2_29_assemble(signed char tl, signed char temp)
2159{
2160 if (temp < 0) {
2161 host_tempreg_acquire();
2162 temp = HOST_TEMPREG;
2163 }
2164 do_mfc2_31_one(9,temp);
2165 emit_shrimm(temp,7,tl);
2166 do_mfc2_31_one(10,temp);
2167 emit_orrshr_imm(temp,2,tl);
2168 do_mfc2_31_one(11,temp);
2169 emit_orrshl_imm(temp,3,tl);
2170 emit_writeword(tl,&reg_cop2d[29]);
2171 if (temp == HOST_TEMPREG)
2172 host_tempreg_release();
2173}
2174
2175static void multdiv_assemble_arm(int i,struct regstat *i_regs)
2176{
2177 // case 0x18: MULT
2178 // case 0x19: MULTU
2179 // case 0x1A: DIV
2180 // case 0x1B: DIVU
2181 // case 0x1C: DMULT
2182 // case 0x1D: DMULTU
2183 // case 0x1E: DDIV
2184 // case 0x1F: DDIVU
2185 if(rs1[i]&&rs2[i])
2186 {
2187 if((opcode2[i]&4)==0) // 32-bit
2188 {
2189 if(opcode2[i]==0x18) // MULT
2190 {
2191 signed char m1=get_reg(i_regs->regmap,rs1[i]);
2192 signed char m2=get_reg(i_regs->regmap,rs2[i]);
2193 signed char hi=get_reg(i_regs->regmap,HIREG);
2194 signed char lo=get_reg(i_regs->regmap,LOREG);
2195 assert(m1>=0);
2196 assert(m2>=0);
2197 assert(hi>=0);
2198 assert(lo>=0);
2199 emit_smull(m1,m2,hi,lo);
2200 }
2201 if(opcode2[i]==0x19) // MULTU
2202 {
2203 signed char m1=get_reg(i_regs->regmap,rs1[i]);
2204 signed char m2=get_reg(i_regs->regmap,rs2[i]);
2205 signed char hi=get_reg(i_regs->regmap,HIREG);
2206 signed char lo=get_reg(i_regs->regmap,LOREG);
2207 assert(m1>=0);
2208 assert(m2>=0);
2209 assert(hi>=0);
2210 assert(lo>=0);
2211 emit_umull(m1,m2,hi,lo);
2212 }
2213 if(opcode2[i]==0x1A) // DIV
2214 {
2215 signed char d1=get_reg(i_regs->regmap,rs1[i]);
2216 signed char d2=get_reg(i_regs->regmap,rs2[i]);
2217 assert(d1>=0);
2218 assert(d2>=0);
2219 signed char quotient=get_reg(i_regs->regmap,LOREG);
2220 signed char remainder=get_reg(i_regs->regmap,HIREG);
2221 assert(quotient>=0);
2222 assert(remainder>=0);
2223 emit_movs(d1,remainder);
2224 emit_movimm(0xffffffff,quotient);
2225 emit_negmi(quotient,quotient); // .. quotient and ..
2226 emit_negmi(remainder,remainder); // .. remainder for div0 case (will be negated back after jump)
2227 emit_movs(d2,HOST_TEMPREG);
2228 emit_jeq(out+52); // Division by zero
2229 emit_negsmi(HOST_TEMPREG,HOST_TEMPREG);
2230#ifdef HAVE_ARMV5
2231 emit_clz(HOST_TEMPREG,quotient);
2232 emit_shl(HOST_TEMPREG,quotient,HOST_TEMPREG);
2233#else
2234 emit_movimm(0,quotient);
2235 emit_addpl_imm(quotient,1,quotient);
2236 emit_lslpls_imm(HOST_TEMPREG,1,HOST_TEMPREG);
2237 emit_jns(out-2*4);
2238#endif
2239 emit_orimm(quotient,1<<31,quotient);
2240 emit_shr(quotient,quotient,quotient);
2241 emit_cmp(remainder,HOST_TEMPREG);
2242 emit_subcs(remainder,HOST_TEMPREG,remainder);
2243 emit_adcs(quotient,quotient,quotient);
2244 emit_shrimm(HOST_TEMPREG,1,HOST_TEMPREG);
2245 emit_jcc(out-16); // -4
2246 emit_teq(d1,d2);
2247 emit_negmi(quotient,quotient);
2248 emit_test(d1,d1);
2249 emit_negmi(remainder,remainder);
2250 }
2251 if(opcode2[i]==0x1B) // DIVU
2252 {
2253 signed char d1=get_reg(i_regs->regmap,rs1[i]); // dividend
2254 signed char d2=get_reg(i_regs->regmap,rs2[i]); // divisor
2255 assert(d1>=0);
2256 assert(d2>=0);
2257 signed char quotient=get_reg(i_regs->regmap,LOREG);
2258 signed char remainder=get_reg(i_regs->regmap,HIREG);
2259 assert(quotient>=0);
2260 assert(remainder>=0);
2261 emit_mov(d1,remainder);
2262 emit_movimm(0xffffffff,quotient); // div0 case
2263 emit_test(d2,d2);
2264 emit_jeq(out+40); // Division by zero
2265#ifdef HAVE_ARMV5
2266 emit_clz(d2,HOST_TEMPREG);
2267 emit_movimm(1<<31,quotient);
2268 emit_shl(d2,HOST_TEMPREG,d2);
2269#else
2270 emit_movimm(0,HOST_TEMPREG);
2271 emit_addpl_imm(HOST_TEMPREG,1,HOST_TEMPREG);
2272 emit_lslpls_imm(d2,1,d2);
2273 emit_jns(out-2*4);
2274 emit_movimm(1<<31,quotient);
2275#endif
2276 emit_shr(quotient,HOST_TEMPREG,quotient);
2277 emit_cmp(remainder,d2);
2278 emit_subcs(remainder,d2,remainder);
2279 emit_adcs(quotient,quotient,quotient);
2280 emit_shrcc_imm(d2,1,d2);
2281 emit_jcc(out-16); // -4
2282 }
2283 }
2284 else // 64-bit
2285 assert(0);
2286 }
2287 else
2288 {
2289 // Multiply by zero is zero.
2290 // MIPS does not have a divide by zero exception.
2291 // The result is undefined, we return zero.
2292 signed char hr=get_reg(i_regs->regmap,HIREG);
2293 signed char lr=get_reg(i_regs->regmap,LOREG);
2294 if(hr>=0) emit_zeroreg(hr);
2295 if(lr>=0) emit_zeroreg(lr);
2296 }
2297}
2298#define multdiv_assemble multdiv_assemble_arm
2299
2300static void do_jump_vaddr(int rs)
2301{
2302 emit_far_jump(jump_vaddr_reg[rs]);
2303}
2304
2305static void do_preload_rhash(int r) {
2306 // Don't need this for ARM. On x86, this puts the value 0xf8 into the
2307 // register. On ARM the hash can be done with a single instruction (below)
2308}
2309
2310static void do_preload_rhtbl(int ht) {
2311 emit_addimm(FP,(int)&mini_ht-(int)&dynarec_local,ht);
2312}
2313
2314static void do_rhash(int rs,int rh) {
2315 emit_andimm(rs,0xf8,rh);
2316}
2317
2318static void do_miniht_load(int ht,int rh) {
2319 assem_debug("ldr %s,[%s,%s]!\n",regname[rh],regname[ht],regname[rh]);
2320 output_w32(0xe7b00000|rd_rn_rm(rh,ht,rh));
2321}
2322
2323static void do_miniht_jump(int rs,int rh,int ht) {
2324 emit_cmp(rh,rs);
2325 emit_ldreq_indexed(ht,4,15);
2326 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
2327 if(rs!=7)
2328 emit_mov(rs,7);
2329 rs=7;
2330 #endif
2331 do_jump_vaddr(rs);
2332}
2333
2334static void do_miniht_insert(u_int return_address,int rt,int temp) {
2335 #ifndef HAVE_ARMV7
2336 emit_movimm(return_address,rt); // PC into link register
2337 add_to_linker(out,return_address,1);
2338 emit_pcreladdr(temp);
2339 emit_writeword(rt,&mini_ht[(return_address&0xFF)>>3][0]);
2340 emit_writeword(temp,&mini_ht[(return_address&0xFF)>>3][1]);
2341 #else
2342 emit_movw(return_address&0x0000FFFF,rt);
2343 add_to_linker(out,return_address,1);
2344 emit_pcreladdr(temp);
2345 emit_writeword(temp,&mini_ht[(return_address&0xFF)>>3][1]);
2346 emit_movt(return_address&0xFFFF0000,rt);
2347 emit_writeword(rt,&mini_ht[(return_address&0xFF)>>3][0]);
2348 #endif
2349}
2350
2351// CPU-architecture-specific initialization
2352static void arch_init(void)
2353{
2354 uintptr_t diff = (u_char *)&ndrc->tramp.f - (u_char *)&ndrc->tramp.ops - 8;
2355 struct tramp_insns *ops = ndrc->tramp.ops;
2356 size_t i;
2357 assert(!(diff & 3));
2358 assert(diff < 0x1000);
2359 start_tcache_write(ops, (u_char *)ops + sizeof(ndrc->tramp.ops));
2360 for (i = 0; i < ARRAY_SIZE(ndrc->tramp.ops); i++)
2361 ops[i].ldrpc = 0xe5900000 | rd_rn_rm(15,15,0) | diff; // ldr pc, [=val]
2362 end_tcache_write(ops, (u_char *)ops + sizeof(ndrc->tramp.ops));
2363}
2364
2365// vim:shiftwidth=2:expandtab