drc: something works on arm64
[pcsx_rearmed.git] / libpcsxcore / new_dynarec / assem_arm.c
... / ...
CommitLineData
1/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2 * Mupen64plus/PCSX - assem_arm.c *
3 * Copyright (C) 2009-2011 Ari64 *
4 * Copyright (C) 2010-2011 GraÅžvydas "notaz" Ignotas *
5 * *
6 * This program is free software; you can redistribute it and/or modify *
7 * it under the terms of the GNU General Public License as published by *
8 * the Free Software Foundation; either version 2 of the License, or *
9 * (at your option) any later version. *
10 * *
11 * This program is distributed in the hope that it will be useful, *
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
14 * GNU General Public License for more details. *
15 * *
16 * You should have received a copy of the GNU General Public License *
17 * along with this program; if not, write to the *
18 * Free Software Foundation, Inc., *
19 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
20 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
21
22#include "../gte.h"
23#define FLAGLESS
24#include "../gte.h"
25#undef FLAGLESS
26#include "../gte_arm.h"
27#include "../gte_neon.h"
28#include "pcnt.h"
29#include "arm_features.h"
30
31#if defined(BASE_ADDR_FIXED)
32#elif defined(BASE_ADDR_DYNAMIC)
33u_char *translation_cache;
34#else
35u_char translation_cache[1 << TARGET_SIZE_2] __attribute__((aligned(4096)));
36#endif
37
38#ifndef __MACH__
39#define CALLER_SAVE_REGS 0x100f
40#else
41#define CALLER_SAVE_REGS 0x120f
42#endif
43
44#define unused __attribute__((unused))
45
46#ifdef DRC_DBG
47#pragma GCC diagnostic ignored "-Wunused-function"
48#pragma GCC diagnostic ignored "-Wunused-variable"
49#pragma GCC diagnostic ignored "-Wunused-but-set-variable"
50#endif
51
52void indirect_jump_indexed();
53void indirect_jump();
54void do_interrupt();
55void jump_vaddr_r0();
56void jump_vaddr_r1();
57void jump_vaddr_r2();
58void jump_vaddr_r3();
59void jump_vaddr_r4();
60void jump_vaddr_r5();
61void jump_vaddr_r6();
62void jump_vaddr_r7();
63void jump_vaddr_r8();
64void jump_vaddr_r9();
65void jump_vaddr_r10();
66void jump_vaddr_r12();
67
68void * const jump_vaddr_reg[16] = {
69 jump_vaddr_r0,
70 jump_vaddr_r1,
71 jump_vaddr_r2,
72 jump_vaddr_r3,
73 jump_vaddr_r4,
74 jump_vaddr_r5,
75 jump_vaddr_r6,
76 jump_vaddr_r7,
77 jump_vaddr_r8,
78 jump_vaddr_r9,
79 jump_vaddr_r10,
80 0,
81 jump_vaddr_r12,
82 0,
83 0,
84 0
85};
86
87void invalidate_addr_r0();
88void invalidate_addr_r1();
89void invalidate_addr_r2();
90void invalidate_addr_r3();
91void invalidate_addr_r4();
92void invalidate_addr_r5();
93void invalidate_addr_r6();
94void invalidate_addr_r7();
95void invalidate_addr_r8();
96void invalidate_addr_r9();
97void invalidate_addr_r10();
98void invalidate_addr_r12();
99
100const u_int invalidate_addr_reg[16] = {
101 (int)invalidate_addr_r0,
102 (int)invalidate_addr_r1,
103 (int)invalidate_addr_r2,
104 (int)invalidate_addr_r3,
105 (int)invalidate_addr_r4,
106 (int)invalidate_addr_r5,
107 (int)invalidate_addr_r6,
108 (int)invalidate_addr_r7,
109 (int)invalidate_addr_r8,
110 (int)invalidate_addr_r9,
111 (int)invalidate_addr_r10,
112 0,
113 (int)invalidate_addr_r12,
114 0,
115 0,
116 0};
117
118static u_int needs_clear_cache[1<<(TARGET_SIZE_2-17)];
119
120/* Linker */
121
122static void set_jump_target(void *addr, void *target_)
123{
124 u_int target = (u_int)target_;
125 u_char *ptr = addr;
126 u_int *ptr2=(u_int *)ptr;
127 if(ptr[3]==0xe2) {
128 assert((target-(u_int)ptr2-8)<1024);
129 assert(((uintptr_t)addr&3)==0);
130 assert((target&3)==0);
131 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
132 //printf("target=%x addr=%p insn=%x\n",target,addr,*ptr2);
133 }
134 else if(ptr[3]==0x72) {
135 // generated by emit_jno_unlikely
136 if((target-(u_int)ptr2-8)<1024) {
137 assert(((uintptr_t)addr&3)==0);
138 assert((target&3)==0);
139 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
140 }
141 else if((target-(u_int)ptr2-8)<4096&&!((target-(u_int)ptr2-8)&15)) {
142 assert(((uintptr_t)addr&3)==0);
143 assert((target&3)==0);
144 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>4)|0xE00;
145 }
146 else *ptr2=(0x7A000000)|(((target-(u_int)ptr2-8)<<6)>>8);
147 }
148 else {
149 assert((ptr[3]&0x0e)==0xa);
150 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
151 }
152}
153
154// This optionally copies the instruction from the target of the branch into
155// the space before the branch. Works, but the difference in speed is
156// usually insignificant.
157#if 0
158static void set_jump_target_fillslot(int addr,u_int target,int copy)
159{
160 u_char *ptr=(u_char *)addr;
161 u_int *ptr2=(u_int *)ptr;
162 assert(!copy||ptr2[-1]==0xe28dd000);
163 if(ptr[3]==0xe2) {
164 assert(!copy);
165 assert((target-(u_int)ptr2-8)<4096);
166 *ptr2=(*ptr2&0xFFFFF000)|(target-(u_int)ptr2-8);
167 }
168 else {
169 assert((ptr[3]&0x0e)==0xa);
170 u_int target_insn=*(u_int *)target;
171 if((target_insn&0x0e100000)==0) { // ALU, no immediate, no flags
172 copy=0;
173 }
174 if((target_insn&0x0c100000)==0x04100000) { // Load
175 copy=0;
176 }
177 if(target_insn&0x08000000) {
178 copy=0;
179 }
180 if(copy) {
181 ptr2[-1]=target_insn;
182 target+=4;
183 }
184 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
185 }
186}
187#endif
188
189/* Literal pool */
190static void add_literal(int addr,int val)
191{
192 assert(literalcount<sizeof(literals)/sizeof(literals[0]));
193 literals[literalcount][0]=addr;
194 literals[literalcount][1]=val;
195 literalcount++;
196}
197
198// from a pointer to external jump stub (which was produced by emit_extjump2)
199// find where the jumping insn is
200static void *find_extjump_insn(void *stub)
201{
202 int *ptr=(int *)(stub+4);
203 assert((*ptr&0x0fff0000)==0x059f0000); // ldr rx, [pc, #ofs]
204 u_int offset=*ptr&0xfff;
205 void **l_ptr=(void *)ptr+offset+8;
206 return *l_ptr;
207}
208
209// find where external branch is liked to using addr of it's stub:
210// get address that insn one after stub loads (dyna_linker arg1),
211// treat it as a pointer to branch insn,
212// return addr where that branch jumps to
213static void *get_pointer(void *stub)
214{
215 //printf("get_pointer(%x)\n",(int)stub);
216 int *i_ptr=find_extjump_insn(stub);
217 assert((*i_ptr&0x0f000000)==0x0a000000);
218 return (u_char *)i_ptr+((*i_ptr<<8)>>6)+8;
219}
220
221// Find the "clean" entry point from a "dirty" entry point
222// by skipping past the call to verify_code
223static void *get_clean_addr(void *addr)
224{
225 signed int *ptr = addr;
226 #ifndef HAVE_ARMV7
227 ptr+=4;
228 #else
229 ptr+=6;
230 #endif
231 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
232 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
233 ptr++;
234 if((*ptr&0xFF000000)==0xea000000) {
235 return (char *)ptr+((*ptr<<8)>>6)+8; // follow jump
236 }
237 return ptr;
238}
239
240static int verify_dirty(const u_int *ptr)
241{
242 #ifndef HAVE_ARMV7
243 u_int offset;
244 // get from literal pool
245 assert((*ptr&0xFFFF0000)==0xe59f0000);
246 offset=*ptr&0xfff;
247 u_int source=*(u_int*)((void *)ptr+offset+8);
248 ptr++;
249 assert((*ptr&0xFFFF0000)==0xe59f0000);
250 offset=*ptr&0xfff;
251 u_int copy=*(u_int*)((void *)ptr+offset+8);
252 ptr++;
253 assert((*ptr&0xFFFF0000)==0xe59f0000);
254 offset=*ptr&0xfff;
255 u_int len=*(u_int*)((void *)ptr+offset+8);
256 ptr++;
257 ptr++;
258 #else
259 // ARMv7 movw/movt
260 assert((*ptr&0xFFF00000)==0xe3000000);
261 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
262 u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
263 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
264 ptr+=6;
265 #endif
266 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
267 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
268 //printf("verify_dirty: %x %x %x\n",source,copy,len);
269 return !memcmp((void *)source,(void *)copy,len);
270}
271
272// This doesn't necessarily find all clean entry points, just
273// guarantees that it's not dirty
274static int isclean(void *addr)
275{
276 #ifndef HAVE_ARMV7
277 u_int *ptr=((u_int *)addr)+4;
278 #else
279 u_int *ptr=((u_int *)addr)+6;
280 #endif
281 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
282 if((*ptr&0xFF000000)!=0xeb000000) return 1; // bl instruction
283 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code) return 0;
284 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_ds) return 0;
285 return 1;
286}
287
288// get source that block at addr was compiled from (host pointers)
289static void get_bounds(void *addr, u_char **start, u_char **end)
290{
291 u_int *ptr = addr;
292 #ifndef HAVE_ARMV7
293 u_int offset;
294 // get from literal pool
295 assert((*ptr&0xFFFF0000)==0xe59f0000);
296 offset=*ptr&0xfff;
297 u_int source=*(u_int*)((void *)ptr+offset+8);
298 ptr++;
299 //assert((*ptr&0xFFFF0000)==0xe59f0000);
300 //offset=*ptr&0xfff;
301 //u_int copy=*(u_int*)((void *)ptr+offset+8);
302 ptr++;
303 assert((*ptr&0xFFFF0000)==0xe59f0000);
304 offset=*ptr&0xfff;
305 u_int len=*(u_int*)((void *)ptr+offset+8);
306 ptr++;
307 ptr++;
308 #else
309 // ARMv7 movw/movt
310 assert((*ptr&0xFFF00000)==0xe3000000);
311 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
312 //u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
313 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
314 ptr+=6;
315 #endif
316 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
317 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
318 *start=(u_char *)source;
319 *end=(u_char *)source+len;
320}
321
322// Allocate a specific ARM register.
323static void alloc_arm_reg(struct regstat *cur,int i,signed char reg,int hr)
324{
325 int n;
326 int dirty=0;
327
328 // see if it's already allocated (and dealloc it)
329 for(n=0;n<HOST_REGS;n++)
330 {
331 if(n!=EXCLUDE_REG&&cur->regmap[n]==reg) {
332 dirty=(cur->dirty>>n)&1;
333 cur->regmap[n]=-1;
334 }
335 }
336
337 cur->regmap[hr]=reg;
338 cur->dirty&=~(1<<hr);
339 cur->dirty|=dirty<<hr;
340 cur->isconst&=~(1<<hr);
341}
342
343// Alloc cycle count into dedicated register
344static void alloc_cc(struct regstat *cur,int i)
345{
346 alloc_arm_reg(cur,i,CCREG,HOST_CCREG);
347}
348
349/* Assembler */
350
351static unused char regname[16][4] = {
352 "r0",
353 "r1",
354 "r2",
355 "r3",
356 "r4",
357 "r5",
358 "r6",
359 "r7",
360 "r8",
361 "r9",
362 "r10",
363 "fp",
364 "r12",
365 "sp",
366 "lr",
367 "pc"};
368
369static void output_w32(u_int word)
370{
371 *((u_int *)out)=word;
372 out+=4;
373}
374
375static u_int rd_rn_rm(u_int rd, u_int rn, u_int rm)
376{
377 assert(rd<16);
378 assert(rn<16);
379 assert(rm<16);
380 return((rn<<16)|(rd<<12)|rm);
381}
382
383static u_int rd_rn_imm_shift(u_int rd, u_int rn, u_int imm, u_int shift)
384{
385 assert(rd<16);
386 assert(rn<16);
387 assert(imm<256);
388 assert((shift&1)==0);
389 return((rn<<16)|(rd<<12)|(((32-shift)&30)<<7)|imm);
390}
391
392static u_int genimm(u_int imm,u_int *encoded)
393{
394 *encoded=0;
395 if(imm==0) return 1;
396 int i=32;
397 while(i>0)
398 {
399 if(imm<256) {
400 *encoded=((i&30)<<7)|imm;
401 return 1;
402 }
403 imm=(imm>>2)|(imm<<30);i-=2;
404 }
405 return 0;
406}
407
408static void genimm_checked(u_int imm,u_int *encoded)
409{
410 u_int ret=genimm(imm,encoded);
411 assert(ret);
412 (void)ret;
413}
414
415static u_int genjmp(u_int addr)
416{
417 if (addr < 3) return 0; // a branch that will be patched later
418 int offset = addr-(int)out-8;
419 if (offset < -33554432 || offset >= 33554432) {
420 SysPrintf("genjmp: out of range: %08x\n", offset);
421 abort();
422 return 0;
423 }
424 return ((u_int)offset>>2)&0xffffff;
425}
426
427static unused void emit_breakpoint(void)
428{
429 assem_debug("bkpt #0\n");
430 //output_w32(0xe1200070);
431 output_w32(0xe7f001f0);
432}
433
434static void emit_mov(int rs,int rt)
435{
436 assem_debug("mov %s,%s\n",regname[rt],regname[rs]);
437 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs));
438}
439
440static void emit_movs(int rs,int rt)
441{
442 assem_debug("movs %s,%s\n",regname[rt],regname[rs]);
443 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs));
444}
445
446static void emit_add(int rs1,int rs2,int rt)
447{
448 assem_debug("add %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
449 output_w32(0xe0800000|rd_rn_rm(rt,rs1,rs2));
450}
451
452static void emit_adcs(int rs1,int rs2,int rt)
453{
454 assem_debug("adcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
455 output_w32(0xe0b00000|rd_rn_rm(rt,rs1,rs2));
456}
457
458static void emit_neg(int rs, int rt)
459{
460 assem_debug("rsb %s,%s,#0\n",regname[rt],regname[rs]);
461 output_w32(0xe2600000|rd_rn_rm(rt,rs,0));
462}
463
464static void emit_sub(int rs1,int rs2,int rt)
465{
466 assem_debug("sub %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
467 output_w32(0xe0400000|rd_rn_rm(rt,rs1,rs2));
468}
469
470static void emit_zeroreg(int rt)
471{
472 assem_debug("mov %s,#0\n",regname[rt]);
473 output_w32(0xe3a00000|rd_rn_rm(rt,0,0));
474}
475
476static void emit_loadlp(u_int imm,u_int rt)
477{
478 add_literal((int)out,imm);
479 assem_debug("ldr %s,pc+? [=%x]\n",regname[rt],imm);
480 output_w32(0xe5900000|rd_rn_rm(rt,15,0));
481}
482
483static void emit_movw(u_int imm,u_int rt)
484{
485 assert(imm<65536);
486 assem_debug("movw %s,#%d (0x%x)\n",regname[rt],imm,imm);
487 output_w32(0xe3000000|rd_rn_rm(rt,0,0)|(imm&0xfff)|((imm<<4)&0xf0000));
488}
489
490static void emit_movt(u_int imm,u_int rt)
491{
492 assem_debug("movt %s,#%d (0x%x)\n",regname[rt],imm&0xffff0000,imm&0xffff0000);
493 output_w32(0xe3400000|rd_rn_rm(rt,0,0)|((imm>>16)&0xfff)|((imm>>12)&0xf0000));
494}
495
496static void emit_movimm(u_int imm,u_int rt)
497{
498 u_int armval;
499 if(genimm(imm,&armval)) {
500 assem_debug("mov %s,#%d\n",regname[rt],imm);
501 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
502 }else if(genimm(~imm,&armval)) {
503 assem_debug("mvn %s,#%d\n",regname[rt],imm);
504 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
505 }else if(imm<65536) {
506 #ifndef HAVE_ARMV7
507 assem_debug("mov %s,#%d\n",regname[rt],imm&0xFF00);
508 output_w32(0xe3a00000|rd_rn_imm_shift(rt,0,imm>>8,8));
509 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
510 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
511 #else
512 emit_movw(imm,rt);
513 #endif
514 }else{
515 #ifndef HAVE_ARMV7
516 emit_loadlp(imm,rt);
517 #else
518 emit_movw(imm&0x0000FFFF,rt);
519 emit_movt(imm&0xFFFF0000,rt);
520 #endif
521 }
522}
523
524static void emit_pcreladdr(u_int rt)
525{
526 assem_debug("add %s,pc,#?\n",regname[rt]);
527 output_w32(0xe2800000|rd_rn_rm(rt,15,0));
528}
529
530static void emit_loadreg(int r, int hr)
531{
532 if(r&64) {
533 SysPrintf("64bit load in 32bit mode!\n");
534 assert(0);
535 return;
536 }
537 if((r&63)==0)
538 emit_zeroreg(hr);
539 else {
540 int addr = (int)&psxRegs.GPR.r[r];
541 switch (r) {
542 //case HIREG: addr = &hi; break;
543 //case LOREG: addr = &lo; break;
544 case CCREG: addr = (int)&cycle_count; break;
545 case CSREG: addr = (int)&Status; break;
546 case INVCP: addr = (int)&invc_ptr; break;
547 default: assert(r < 34); break;
548 }
549 u_int offset = addr-(u_int)&dynarec_local;
550 assert(offset<4096);
551 assem_debug("ldr %s,fp+%d\n",regname[hr],offset);
552 output_w32(0xe5900000|rd_rn_rm(hr,FP,0)|offset);
553 }
554}
555
556static void emit_storereg(int r, int hr)
557{
558 if(r&64) {
559 SysPrintf("64bit store in 32bit mode!\n");
560 assert(0);
561 return;
562 }
563 int addr = (int)&psxRegs.GPR.r[r];
564 switch (r) {
565 //case HIREG: addr = &hi; break;
566 //case LOREG: addr = &lo; break;
567 case CCREG: addr = (int)&cycle_count; break;
568 default: assert(r < 34); break;
569 }
570 u_int offset = addr-(u_int)&dynarec_local;
571 assert(offset<4096);
572 assem_debug("str %s,fp+%d\n",regname[hr],offset);
573 output_w32(0xe5800000|rd_rn_rm(hr,FP,0)|offset);
574}
575
576static void emit_test(int rs, int rt)
577{
578 assem_debug("tst %s,%s\n",regname[rs],regname[rt]);
579 output_w32(0xe1100000|rd_rn_rm(0,rs,rt));
580}
581
582static void emit_testimm(int rs,int imm)
583{
584 u_int armval;
585 assem_debug("tst %s,#%d\n",regname[rs],imm);
586 genimm_checked(imm,&armval);
587 output_w32(0xe3100000|rd_rn_rm(0,rs,0)|armval);
588}
589
590static void emit_testeqimm(int rs,int imm)
591{
592 u_int armval;
593 assem_debug("tsteq %s,$%d\n",regname[rs],imm);
594 genimm_checked(imm,&armval);
595 output_w32(0x03100000|rd_rn_rm(0,rs,0)|armval);
596}
597
598static void emit_not(int rs,int rt)
599{
600 assem_debug("mvn %s,%s\n",regname[rt],regname[rs]);
601 output_w32(0xe1e00000|rd_rn_rm(rt,0,rs));
602}
603
604static void emit_and(u_int rs1,u_int rs2,u_int rt)
605{
606 assem_debug("and %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
607 output_w32(0xe0000000|rd_rn_rm(rt,rs1,rs2));
608}
609
610static void emit_or(u_int rs1,u_int rs2,u_int rt)
611{
612 assem_debug("orr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
613 output_w32(0xe1800000|rd_rn_rm(rt,rs1,rs2));
614}
615
616static void emit_orrshl_imm(u_int rs,u_int imm,u_int rt)
617{
618 assert(rs<16);
619 assert(rt<16);
620 assert(imm<32);
621 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs],imm);
622 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|(imm<<7));
623}
624
625static void emit_orrshr_imm(u_int rs,u_int imm,u_int rt)
626{
627 assert(rs<16);
628 assert(rt<16);
629 assert(imm<32);
630 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs],imm);
631 output_w32(0xe1800020|rd_rn_rm(rt,rt,rs)|(imm<<7));
632}
633
634static void emit_xor(u_int rs1,u_int rs2,u_int rt)
635{
636 assem_debug("eor %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
637 output_w32(0xe0200000|rd_rn_rm(rt,rs1,rs2));
638}
639
640static void emit_xorsar_imm(u_int rs1,u_int rs2,u_int imm,u_int rt)
641{
642 assem_debug("eor %s,%s,%s,asr #%d\n",regname[rt],regname[rs1],regname[rs2],imm);
643 output_w32(0xe0200040|rd_rn_rm(rt,rs1,rs2)|(imm<<7));
644}
645
646static void emit_addimm(u_int rs,int imm,u_int rt)
647{
648 assert(rs<16);
649 assert(rt<16);
650 if(imm!=0) {
651 u_int armval;
652 if(genimm(imm,&armval)) {
653 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],imm);
654 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
655 }else if(genimm(-imm,&armval)) {
656 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],-imm);
657 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
658 #ifdef HAVE_ARMV7
659 }else if(rt!=rs&&(u_int)imm<65536) {
660 emit_movw(imm&0x0000ffff,rt);
661 emit_add(rs,rt,rt);
662 }else if(rt!=rs&&(u_int)-imm<65536) {
663 emit_movw(-imm&0x0000ffff,rt);
664 emit_sub(rs,rt,rt);
665 #endif
666 }else if((u_int)-imm<65536) {
667 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],(-imm)&0xFF00);
668 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
669 output_w32(0xe2400000|rd_rn_imm_shift(rt,rs,(-imm)>>8,8));
670 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
671 }else {
672 do {
673 int shift = (ffs(imm) - 1) & ~1;
674 int imm8 = imm & (0xff << shift);
675 genimm_checked(imm8,&armval);
676 assem_debug("add %s,%s,#0x%x\n",regname[rt],regname[rs],imm8);
677 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
678 rs = rt;
679 imm &= ~imm8;
680 }
681 while (imm != 0);
682 }
683 }
684 else if(rs!=rt) emit_mov(rs,rt);
685}
686
687static void emit_addimm_and_set_flags(int imm,int rt)
688{
689 assert(imm>-65536&&imm<65536);
690 u_int armval;
691 if(genimm(imm,&armval)) {
692 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm);
693 output_w32(0xe2900000|rd_rn_rm(rt,rt,0)|armval);
694 }else if(genimm(-imm,&armval)) {
695 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],imm);
696 output_w32(0xe2500000|rd_rn_rm(rt,rt,0)|armval);
697 }else if(imm<0) {
698 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF00);
699 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
700 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)>>8,8));
701 output_w32(0xe2500000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
702 }else{
703 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF00);
704 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
705 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm>>8,8));
706 output_w32(0xe2900000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
707 }
708}
709
710static void emit_addimm_no_flags(u_int imm,u_int rt)
711{
712 emit_addimm(rt,imm,rt);
713}
714
715static void emit_addnop(u_int r)
716{
717 assert(r<16);
718 assem_debug("add %s,%s,#0 (nop)\n",regname[r],regname[r]);
719 output_w32(0xe2800000|rd_rn_rm(r,r,0));
720}
721
722static void emit_andimm(int rs,int imm,int rt)
723{
724 u_int armval;
725 if(imm==0) {
726 emit_zeroreg(rt);
727 }else if(genimm(imm,&armval)) {
728 assem_debug("and %s,%s,#%d\n",regname[rt],regname[rs],imm);
729 output_w32(0xe2000000|rd_rn_rm(rt,rs,0)|armval);
730 }else if(genimm(~imm,&armval)) {
731 assem_debug("bic %s,%s,#%d\n",regname[rt],regname[rs],imm);
732 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|armval);
733 }else if(imm==65535) {
734 #ifndef HAVE_ARMV6
735 assem_debug("bic %s,%s,#FF000000\n",regname[rt],regname[rs]);
736 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|0x4FF);
737 assem_debug("bic %s,%s,#00FF0000\n",regname[rt],regname[rt]);
738 output_w32(0xe3c00000|rd_rn_rm(rt,rt,0)|0x8FF);
739 #else
740 assem_debug("uxth %s,%s\n",regname[rt],regname[rs]);
741 output_w32(0xe6ff0070|rd_rn_rm(rt,0,rs));
742 #endif
743 }else{
744 assert(imm>0&&imm<65535);
745 #ifndef HAVE_ARMV7
746 assem_debug("mov r14,#%d\n",imm&0xFF00);
747 output_w32(0xe3a00000|rd_rn_imm_shift(HOST_TEMPREG,0,imm>>8,8));
748 assem_debug("add r14,r14,#%d\n",imm&0xFF);
749 output_w32(0xe2800000|rd_rn_imm_shift(HOST_TEMPREG,HOST_TEMPREG,imm&0xff,0));
750 #else
751 emit_movw(imm,HOST_TEMPREG);
752 #endif
753 assem_debug("and %s,%s,r14\n",regname[rt],regname[rs]);
754 output_w32(0xe0000000|rd_rn_rm(rt,rs,HOST_TEMPREG));
755 }
756}
757
758static void emit_orimm(int rs,int imm,int rt)
759{
760 u_int armval;
761 if(imm==0) {
762 if(rs!=rt) emit_mov(rs,rt);
763 }else if(genimm(imm,&armval)) {
764 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm);
765 output_w32(0xe3800000|rd_rn_rm(rt,rs,0)|armval);
766 }else{
767 assert(imm>0&&imm<65536);
768 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
769 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
770 output_w32(0xe3800000|rd_rn_imm_shift(rt,rs,imm>>8,8));
771 output_w32(0xe3800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
772 }
773}
774
775static void emit_xorimm(int rs,int imm,int rt)
776{
777 u_int armval;
778 if(imm==0) {
779 if(rs!=rt) emit_mov(rs,rt);
780 }else if(genimm(imm,&armval)) {
781 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm);
782 output_w32(0xe2200000|rd_rn_rm(rt,rs,0)|armval);
783 }else{
784 assert(imm>0&&imm<65536);
785 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
786 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
787 output_w32(0xe2200000|rd_rn_imm_shift(rt,rs,imm>>8,8));
788 output_w32(0xe2200000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
789 }
790}
791
792static void emit_shlimm(int rs,u_int imm,int rt)
793{
794 assert(imm>0);
795 assert(imm<32);
796 //if(imm==1) ...
797 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
798 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
799}
800
801static void emit_lsls_imm(int rs,int imm,int rt)
802{
803 assert(imm>0);
804 assert(imm<32);
805 assem_debug("lsls %s,%s,#%d\n",regname[rt],regname[rs],imm);
806 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs)|(imm<<7));
807}
808
809static unused void emit_lslpls_imm(int rs,int imm,int rt)
810{
811 assert(imm>0);
812 assert(imm<32);
813 assem_debug("lslpls %s,%s,#%d\n",regname[rt],regname[rs],imm);
814 output_w32(0x51b00000|rd_rn_rm(rt,0,rs)|(imm<<7));
815}
816
817static void emit_shrimm(int rs,u_int imm,int rt)
818{
819 assert(imm>0);
820 assert(imm<32);
821 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
822 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
823}
824
825static void emit_sarimm(int rs,u_int imm,int rt)
826{
827 assert(imm>0);
828 assert(imm<32);
829 assem_debug("asr %s,%s,#%d\n",regname[rt],regname[rs],imm);
830 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x40|(imm<<7));
831}
832
833static void emit_rorimm(int rs,u_int imm,int rt)
834{
835 assert(imm>0);
836 assert(imm<32);
837 assem_debug("ror %s,%s,#%d\n",regname[rt],regname[rs],imm);
838 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x60|(imm<<7));
839}
840
841static void emit_signextend16(int rs,int rt)
842{
843 #ifndef HAVE_ARMV6
844 emit_shlimm(rs,16,rt);
845 emit_sarimm(rt,16,rt);
846 #else
847 assem_debug("sxth %s,%s\n",regname[rt],regname[rs]);
848 output_w32(0xe6bf0070|rd_rn_rm(rt,0,rs));
849 #endif
850}
851
852static void emit_signextend8(int rs,int rt)
853{
854 #ifndef HAVE_ARMV6
855 emit_shlimm(rs,24,rt);
856 emit_sarimm(rt,24,rt);
857 #else
858 assem_debug("sxtb %s,%s\n",regname[rt],regname[rs]);
859 output_w32(0xe6af0070|rd_rn_rm(rt,0,rs));
860 #endif
861}
862
863static void emit_shl(u_int rs,u_int shift,u_int rt)
864{
865 assert(rs<16);
866 assert(rt<16);
867 assert(shift<16);
868 //if(imm==1) ...
869 assem_debug("lsl %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
870 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x10|(shift<<8));
871}
872
873static void emit_shr(u_int rs,u_int shift,u_int rt)
874{
875 assert(rs<16);
876 assert(rt<16);
877 assert(shift<16);
878 assem_debug("lsr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
879 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x30|(shift<<8));
880}
881
882static void emit_sar(u_int rs,u_int shift,u_int rt)
883{
884 assert(rs<16);
885 assert(rt<16);
886 assert(shift<16);
887 assem_debug("asr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
888 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x50|(shift<<8));
889}
890
891static unused void emit_orrshl(u_int rs,u_int shift,u_int rt)
892{
893 assert(rs<16);
894 assert(rt<16);
895 assert(shift<16);
896 assem_debug("orr %s,%s,%s,lsl %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
897 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x10|(shift<<8));
898}
899
900static unused void emit_orrshr(u_int rs,u_int shift,u_int rt)
901{
902 assert(rs<16);
903 assert(rt<16);
904 assert(shift<16);
905 assem_debug("orr %s,%s,%s,lsr %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
906 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x30|(shift<<8));
907}
908
909static void emit_cmpimm(int rs,int imm)
910{
911 u_int armval;
912 if(genimm(imm,&armval)) {
913 assem_debug("cmp %s,#%d\n",regname[rs],imm);
914 output_w32(0xe3500000|rd_rn_rm(0,rs,0)|armval);
915 }else if(genimm(-imm,&armval)) {
916 assem_debug("cmn %s,#%d\n",regname[rs],imm);
917 output_w32(0xe3700000|rd_rn_rm(0,rs,0)|armval);
918 }else if(imm>0) {
919 assert(imm<65536);
920 emit_movimm(imm,HOST_TEMPREG);
921 assem_debug("cmp %s,r14\n",regname[rs]);
922 output_w32(0xe1500000|rd_rn_rm(0,rs,HOST_TEMPREG));
923 }else{
924 assert(imm>-65536);
925 emit_movimm(-imm,HOST_TEMPREG);
926 assem_debug("cmn %s,r14\n",regname[rs]);
927 output_w32(0xe1700000|rd_rn_rm(0,rs,HOST_TEMPREG));
928 }
929}
930
931static void emit_cmovne_imm(int imm,int rt)
932{
933 assem_debug("movne %s,#%d\n",regname[rt],imm);
934 u_int armval;
935 genimm_checked(imm,&armval);
936 output_w32(0x13a00000|rd_rn_rm(rt,0,0)|armval);
937}
938
939static void emit_cmovl_imm(int imm,int rt)
940{
941 assem_debug("movlt %s,#%d\n",regname[rt],imm);
942 u_int armval;
943 genimm_checked(imm,&armval);
944 output_w32(0xb3a00000|rd_rn_rm(rt,0,0)|armval);
945}
946
947static void emit_cmovb_imm(int imm,int rt)
948{
949 assem_debug("movcc %s,#%d\n",regname[rt],imm);
950 u_int armval;
951 genimm_checked(imm,&armval);
952 output_w32(0x33a00000|rd_rn_rm(rt,0,0)|armval);
953}
954
955static void emit_cmovae_imm(int imm,int rt)
956{
957 assem_debug("movcs %s,#%d\n",regname[rt],imm);
958 u_int armval;
959 genimm_checked(imm,&armval);
960 output_w32(0x23a00000|rd_rn_rm(rt,0,0)|armval);
961}
962
963static void emit_cmovne_reg(int rs,int rt)
964{
965 assem_debug("movne %s,%s\n",regname[rt],regname[rs]);
966 output_w32(0x11a00000|rd_rn_rm(rt,0,rs));
967}
968
969static void emit_cmovl_reg(int rs,int rt)
970{
971 assem_debug("movlt %s,%s\n",regname[rt],regname[rs]);
972 output_w32(0xb1a00000|rd_rn_rm(rt,0,rs));
973}
974
975static void emit_cmovs_reg(int rs,int rt)
976{
977 assem_debug("movmi %s,%s\n",regname[rt],regname[rs]);
978 output_w32(0x41a00000|rd_rn_rm(rt,0,rs));
979}
980
981static void emit_slti32(int rs,int imm,int rt)
982{
983 if(rs!=rt) emit_zeroreg(rt);
984 emit_cmpimm(rs,imm);
985 if(rs==rt) emit_movimm(0,rt);
986 emit_cmovl_imm(1,rt);
987}
988
989static void emit_sltiu32(int rs,int imm,int rt)
990{
991 if(rs!=rt) emit_zeroreg(rt);
992 emit_cmpimm(rs,imm);
993 if(rs==rt) emit_movimm(0,rt);
994 emit_cmovb_imm(1,rt);
995}
996
997static void emit_cmp(int rs,int rt)
998{
999 assem_debug("cmp %s,%s\n",regname[rs],regname[rt]);
1000 output_w32(0xe1500000|rd_rn_rm(0,rs,rt));
1001}
1002
1003static void emit_set_gz32(int rs, int rt)
1004{
1005 //assem_debug("set_gz32\n");
1006 emit_cmpimm(rs,1);
1007 emit_movimm(1,rt);
1008 emit_cmovl_imm(0,rt);
1009}
1010
1011static void emit_set_nz32(int rs, int rt)
1012{
1013 //assem_debug("set_nz32\n");
1014 if(rs!=rt) emit_movs(rs,rt);
1015 else emit_test(rs,rs);
1016 emit_cmovne_imm(1,rt);
1017}
1018
1019static void emit_set_if_less32(int rs1, int rs2, int rt)
1020{
1021 //assem_debug("set if less (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1022 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1023 emit_cmp(rs1,rs2);
1024 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1025 emit_cmovl_imm(1,rt);
1026}
1027
1028static void emit_set_if_carry32(int rs1, int rs2, int rt)
1029{
1030 //assem_debug("set if carry (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1031 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1032 emit_cmp(rs1,rs2);
1033 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1034 emit_cmovb_imm(1,rt);
1035}
1036
1037static void emit_call(const void *a_)
1038{
1039 int a = (int)a_;
1040 assem_debug("bl %x (%x+%x)%s\n",a,(int)out,a-(int)out-8,func_name(a_));
1041 u_int offset=genjmp(a);
1042 output_w32(0xeb000000|offset);
1043}
1044
1045static void emit_jmp(const void *a_)
1046{
1047 int a = (int)a_;
1048 assem_debug("b %x (%x+%x)%s\n",a,(int)out,a-(int)out-8,func_name(a_));
1049 u_int offset=genjmp(a);
1050 output_w32(0xea000000|offset);
1051}
1052
1053static void emit_jne(const void *a_)
1054{
1055 int a = (int)a_;
1056 assem_debug("bne %x\n",a);
1057 u_int offset=genjmp(a);
1058 output_w32(0x1a000000|offset);
1059}
1060
1061static void emit_jeq(const void *a_)
1062{
1063 int a = (int)a_;
1064 assem_debug("beq %x\n",a);
1065 u_int offset=genjmp(a);
1066 output_w32(0x0a000000|offset);
1067}
1068
1069static void emit_js(const void *a_)
1070{
1071 int a = (int)a_;
1072 assem_debug("bmi %x\n",a);
1073 u_int offset=genjmp(a);
1074 output_w32(0x4a000000|offset);
1075}
1076
1077static void emit_jns(const void *a_)
1078{
1079 int a = (int)a_;
1080 assem_debug("bpl %x\n",a);
1081 u_int offset=genjmp(a);
1082 output_w32(0x5a000000|offset);
1083}
1084
1085static void emit_jl(const void *a_)
1086{
1087 int a = (int)a_;
1088 assem_debug("blt %x\n",a);
1089 u_int offset=genjmp(a);
1090 output_w32(0xba000000|offset);
1091}
1092
1093static void emit_jge(const void *a_)
1094{
1095 int a = (int)a_;
1096 assem_debug("bge %x\n",a);
1097 u_int offset=genjmp(a);
1098 output_w32(0xaa000000|offset);
1099}
1100
1101static void emit_jno(const void *a_)
1102{
1103 int a = (int)a_;
1104 assem_debug("bvc %x\n",a);
1105 u_int offset=genjmp(a);
1106 output_w32(0x7a000000|offset);
1107}
1108
1109static void emit_jc(const void *a_)
1110{
1111 int a = (int)a_;
1112 assem_debug("bcs %x\n",a);
1113 u_int offset=genjmp(a);
1114 output_w32(0x2a000000|offset);
1115}
1116
1117static void emit_jcc(const void *a_)
1118{
1119 int a = (int)a_;
1120 assem_debug("bcc %x\n",a);
1121 u_int offset=genjmp(a);
1122 output_w32(0x3a000000|offset);
1123}
1124
1125static unused void emit_callreg(u_int r)
1126{
1127 assert(r<15);
1128 assem_debug("blx %s\n",regname[r]);
1129 output_w32(0xe12fff30|r);
1130}
1131
1132static void emit_jmpreg(u_int r)
1133{
1134 assem_debug("mov pc,%s\n",regname[r]);
1135 output_w32(0xe1a00000|rd_rn_rm(15,0,r));
1136}
1137
1138static void emit_ret(void)
1139{
1140 emit_jmpreg(14);
1141}
1142
1143static void emit_readword_indexed(int offset, int rs, int rt)
1144{
1145 assert(offset>-4096&&offset<4096);
1146 assem_debug("ldr %s,%s+%d\n",regname[rt],regname[rs],offset);
1147 if(offset>=0) {
1148 output_w32(0xe5900000|rd_rn_rm(rt,rs,0)|offset);
1149 }else{
1150 output_w32(0xe5100000|rd_rn_rm(rt,rs,0)|(-offset));
1151 }
1152}
1153
1154static void emit_readword_dualindexedx4(int rs1, int rs2, int rt)
1155{
1156 assem_debug("ldr %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1157 output_w32(0xe7900000|rd_rn_rm(rt,rs1,rs2)|0x100);
1158}
1159
1160static void emit_ldrcc_dualindexed(int rs1, int rs2, int rt)
1161{
1162 assem_debug("ldrcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1163 output_w32(0x37900000|rd_rn_rm(rt,rs1,rs2));
1164}
1165
1166static void emit_ldrccb_dualindexed(int rs1, int rs2, int rt)
1167{
1168 assem_debug("ldrccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1169 output_w32(0x37d00000|rd_rn_rm(rt,rs1,rs2));
1170}
1171
1172static void emit_ldrccsb_dualindexed(int rs1, int rs2, int rt)
1173{
1174 assem_debug("ldrccsb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1175 output_w32(0x319000d0|rd_rn_rm(rt,rs1,rs2));
1176}
1177
1178static void emit_ldrcch_dualindexed(int rs1, int rs2, int rt)
1179{
1180 assem_debug("ldrcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1181 output_w32(0x319000b0|rd_rn_rm(rt,rs1,rs2));
1182}
1183
1184static void emit_ldrccsh_dualindexed(int rs1, int rs2, int rt)
1185{
1186 assem_debug("ldrccsh %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1187 output_w32(0x319000f0|rd_rn_rm(rt,rs1,rs2));
1188}
1189
1190static void emit_movsbl_indexed(int offset, int rs, int rt)
1191{
1192 assert(offset>-256&&offset<256);
1193 assem_debug("ldrsb %s,%s+%d\n",regname[rt],regname[rs],offset);
1194 if(offset>=0) {
1195 output_w32(0xe1d000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1196 }else{
1197 output_w32(0xe15000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1198 }
1199}
1200
1201static void emit_movswl_indexed(int offset, int rs, int rt)
1202{
1203 assert(offset>-256&&offset<256);
1204 assem_debug("ldrsh %s,%s+%d\n",regname[rt],regname[rs],offset);
1205 if(offset>=0) {
1206 output_w32(0xe1d000f0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1207 }else{
1208 output_w32(0xe15000f0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1209 }
1210}
1211
1212static void emit_movzbl_indexed(int offset, int rs, int rt)
1213{
1214 assert(offset>-4096&&offset<4096);
1215 assem_debug("ldrb %s,%s+%d\n",regname[rt],regname[rs],offset);
1216 if(offset>=0) {
1217 output_w32(0xe5d00000|rd_rn_rm(rt,rs,0)|offset);
1218 }else{
1219 output_w32(0xe5500000|rd_rn_rm(rt,rs,0)|(-offset));
1220 }
1221}
1222
1223static void emit_movzwl_indexed(int offset, int rs, int rt)
1224{
1225 assert(offset>-256&&offset<256);
1226 assem_debug("ldrh %s,%s+%d\n",regname[rt],regname[rs],offset);
1227 if(offset>=0) {
1228 output_w32(0xe1d000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1229 }else{
1230 output_w32(0xe15000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1231 }
1232}
1233
1234static void emit_ldrd(int offset, int rs, int rt)
1235{
1236 assert(offset>-256&&offset<256);
1237 assem_debug("ldrd %s,%s+%d\n",regname[rt],regname[rs],offset);
1238 if(offset>=0) {
1239 output_w32(0xe1c000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1240 }else{
1241 output_w32(0xe14000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1242 }
1243}
1244
1245static void emit_readword(void *addr, int rt)
1246{
1247 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
1248 assert(offset<4096);
1249 assem_debug("ldr %s,fp+%d\n",regname[rt],offset);
1250 output_w32(0xe5900000|rd_rn_rm(rt,FP,0)|offset);
1251}
1252
1253static void emit_writeword_indexed(int rt, int offset, int rs)
1254{
1255 assert(offset>-4096&&offset<4096);
1256 assem_debug("str %s,%s+%d\n",regname[rt],regname[rs],offset);
1257 if(offset>=0) {
1258 output_w32(0xe5800000|rd_rn_rm(rt,rs,0)|offset);
1259 }else{
1260 output_w32(0xe5000000|rd_rn_rm(rt,rs,0)|(-offset));
1261 }
1262}
1263
1264static void emit_writehword_indexed(int rt, int offset, int rs)
1265{
1266 assert(offset>-256&&offset<256);
1267 assem_debug("strh %s,%s+%d\n",regname[rt],regname[rs],offset);
1268 if(offset>=0) {
1269 output_w32(0xe1c000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1270 }else{
1271 output_w32(0xe14000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1272 }
1273}
1274
1275static void emit_writebyte_indexed(int rt, int offset, int rs)
1276{
1277 assert(offset>-4096&&offset<4096);
1278 assem_debug("strb %s,%s+%d\n",regname[rt],regname[rs],offset);
1279 if(offset>=0) {
1280 output_w32(0xe5c00000|rd_rn_rm(rt,rs,0)|offset);
1281 }else{
1282 output_w32(0xe5400000|rd_rn_rm(rt,rs,0)|(-offset));
1283 }
1284}
1285
1286static void emit_strcc_dualindexed(int rs1, int rs2, int rt)
1287{
1288 assem_debug("strcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1289 output_w32(0x37800000|rd_rn_rm(rt,rs1,rs2));
1290}
1291
1292static void emit_strccb_dualindexed(int rs1, int rs2, int rt)
1293{
1294 assem_debug("strccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1295 output_w32(0x37c00000|rd_rn_rm(rt,rs1,rs2));
1296}
1297
1298static void emit_strcch_dualindexed(int rs1, int rs2, int rt)
1299{
1300 assem_debug("strcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1301 output_w32(0x318000b0|rd_rn_rm(rt,rs1,rs2));
1302}
1303
1304static void emit_writeword(int rt, void *addr)
1305{
1306 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
1307 assert(offset<4096);
1308 assem_debug("str %s,fp+%d\n",regname[rt],offset);
1309 output_w32(0xe5800000|rd_rn_rm(rt,FP,0)|offset);
1310}
1311
1312static void emit_umull(u_int rs1, u_int rs2, u_int hi, u_int lo)
1313{
1314 assem_debug("umull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
1315 assert(rs1<16);
1316 assert(rs2<16);
1317 assert(hi<16);
1318 assert(lo<16);
1319 output_w32(0xe0800090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
1320}
1321
1322static void emit_smull(u_int rs1, u_int rs2, u_int hi, u_int lo)
1323{
1324 assem_debug("smull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
1325 assert(rs1<16);
1326 assert(rs2<16);
1327 assert(hi<16);
1328 assert(lo<16);
1329 output_w32(0xe0c00090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
1330}
1331
1332static void emit_clz(int rs,int rt)
1333{
1334 assem_debug("clz %s,%s\n",regname[rt],regname[rs]);
1335 output_w32(0xe16f0f10|rd_rn_rm(rt,0,rs));
1336}
1337
1338static void emit_subcs(int rs1,int rs2,int rt)
1339{
1340 assem_debug("subcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1341 output_w32(0x20400000|rd_rn_rm(rt,rs1,rs2));
1342}
1343
1344static void emit_shrcc_imm(int rs,u_int imm,int rt)
1345{
1346 assert(imm>0);
1347 assert(imm<32);
1348 assem_debug("lsrcc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1349 output_w32(0x31a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
1350}
1351
1352static void emit_shrne_imm(int rs,u_int imm,int rt)
1353{
1354 assert(imm>0);
1355 assert(imm<32);
1356 assem_debug("lsrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
1357 output_w32(0x11a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
1358}
1359
1360static void emit_negmi(int rs, int rt)
1361{
1362 assem_debug("rsbmi %s,%s,#0\n",regname[rt],regname[rs]);
1363 output_w32(0x42600000|rd_rn_rm(rt,rs,0));
1364}
1365
1366static void emit_negsmi(int rs, int rt)
1367{
1368 assem_debug("rsbsmi %s,%s,#0\n",regname[rt],regname[rs]);
1369 output_w32(0x42700000|rd_rn_rm(rt,rs,0));
1370}
1371
1372static void emit_bic_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
1373{
1374 assem_debug("bic %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
1375 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
1376}
1377
1378static void emit_bic_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
1379{
1380 assem_debug("bic %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
1381 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
1382}
1383
1384static void emit_teq(int rs, int rt)
1385{
1386 assem_debug("teq %s,%s\n",regname[rs],regname[rt]);
1387 output_w32(0xe1300000|rd_rn_rm(0,rs,rt));
1388}
1389
1390static unused void emit_rsbimm(int rs, int imm, int rt)
1391{
1392 u_int armval;
1393 genimm_checked(imm,&armval);
1394 assem_debug("rsb %s,%s,#%d\n",regname[rt],regname[rs],imm);
1395 output_w32(0xe2600000|rd_rn_rm(rt,rs,0)|armval);
1396}
1397
1398// Conditionally select one of two immediates, optimizing for small code size
1399// This will only be called if HAVE_CMOV_IMM is defined
1400static void emit_cmov2imm_e_ne_compact(int imm1,int imm2,u_int rt)
1401{
1402 u_int armval;
1403 if(genimm(imm2-imm1,&armval)) {
1404 emit_movimm(imm1,rt);
1405 assem_debug("addne %s,%s,#%d\n",regname[rt],regname[rt],imm2-imm1);
1406 output_w32(0x12800000|rd_rn_rm(rt,rt,0)|armval);
1407 }else if(genimm(imm1-imm2,&armval)) {
1408 emit_movimm(imm1,rt);
1409 assem_debug("subne %s,%s,#%d\n",regname[rt],regname[rt],imm1-imm2);
1410 output_w32(0x12400000|rd_rn_rm(rt,rt,0)|armval);
1411 }
1412 else {
1413 #ifndef HAVE_ARMV7
1414 emit_movimm(imm1,rt);
1415 add_literal((int)out,imm2);
1416 assem_debug("ldrne %s,pc+? [=%x]\n",regname[rt],imm2);
1417 output_w32(0x15900000|rd_rn_rm(rt,15,0));
1418 #else
1419 emit_movw(imm1&0x0000FFFF,rt);
1420 if((imm1&0xFFFF)!=(imm2&0xFFFF)) {
1421 assem_debug("movwne %s,#%d (0x%x)\n",regname[rt],imm2&0xFFFF,imm2&0xFFFF);
1422 output_w32(0x13000000|rd_rn_rm(rt,0,0)|(imm2&0xfff)|((imm2<<4)&0xf0000));
1423 }
1424 emit_movt(imm1&0xFFFF0000,rt);
1425 if((imm1&0xFFFF0000)!=(imm2&0xFFFF0000)) {
1426 assem_debug("movtne %s,#%d (0x%x)\n",regname[rt],imm2&0xffff0000,imm2&0xffff0000);
1427 output_w32(0x13400000|rd_rn_rm(rt,0,0)|((imm2>>16)&0xfff)|((imm2>>12)&0xf0000));
1428 }
1429 #endif
1430 }
1431}
1432
1433// special case for checking invalid_code
1434static void emit_cmpmem_indexedsr12_reg(int base,int r,int imm)
1435{
1436 assert(imm<128&&imm>=0);
1437 assert(r>=0&&r<16);
1438 assem_debug("ldrb lr,%s,%s lsr #12\n",regname[base],regname[r]);
1439 output_w32(0xe7d00000|rd_rn_rm(HOST_TEMPREG,base,r)|0x620);
1440 emit_cmpimm(HOST_TEMPREG,imm);
1441}
1442
1443static void emit_callne(int a)
1444{
1445 assem_debug("blne %x\n",a);
1446 u_int offset=genjmp(a);
1447 output_w32(0x1b000000|offset);
1448}
1449
1450// Used to preload hash table entries
1451static unused void emit_prefetchreg(int r)
1452{
1453 assem_debug("pld %s\n",regname[r]);
1454 output_w32(0xf5d0f000|rd_rn_rm(0,r,0));
1455}
1456
1457// Special case for mini_ht
1458static void emit_ldreq_indexed(int rs, u_int offset, int rt)
1459{
1460 assert(offset<4096);
1461 assem_debug("ldreq %s,[%s, #%d]\n",regname[rt],regname[rs],offset);
1462 output_w32(0x05900000|rd_rn_rm(rt,rs,0)|offset);
1463}
1464
1465static void emit_orrne_imm(int rs,int imm,int rt)
1466{
1467 u_int armval;
1468 genimm_checked(imm,&armval);
1469 assem_debug("orrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
1470 output_w32(0x13800000|rd_rn_rm(rt,rs,0)|armval);
1471}
1472
1473static void emit_andne_imm(int rs,int imm,int rt)
1474{
1475 u_int armval;
1476 genimm_checked(imm,&armval);
1477 assem_debug("andne %s,%s,#%d\n",regname[rt],regname[rs],imm);
1478 output_w32(0x12000000|rd_rn_rm(rt,rs,0)|armval);
1479}
1480
1481static unused void emit_addpl_imm(int rs,int imm,int rt)
1482{
1483 u_int armval;
1484 genimm_checked(imm,&armval);
1485 assem_debug("addpl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1486 output_w32(0x52800000|rd_rn_rm(rt,rs,0)|armval);
1487}
1488
1489static void emit_jno_unlikely(int a)
1490{
1491 //emit_jno(a);
1492 assem_debug("addvc pc,pc,#? (%x)\n",/*a-(int)out-8,*/a);
1493 output_w32(0x72800000|rd_rn_rm(15,15,0));
1494}
1495
1496static void save_regs_all(u_int reglist)
1497{
1498 int i;
1499 if(!reglist) return;
1500 assem_debug("stmia fp,{");
1501 for(i=0;i<16;i++)
1502 if(reglist&(1<<i))
1503 assem_debug("r%d,",i);
1504 assem_debug("}\n");
1505 output_w32(0xe88b0000|reglist);
1506}
1507
1508static void restore_regs_all(u_int reglist)
1509{
1510 int i;
1511 if(!reglist) return;
1512 assem_debug("ldmia fp,{");
1513 for(i=0;i<16;i++)
1514 if(reglist&(1<<i))
1515 assem_debug("r%d,",i);
1516 assem_debug("}\n");
1517 output_w32(0xe89b0000|reglist);
1518}
1519
1520// Save registers before function call
1521static void save_regs(u_int reglist)
1522{
1523 reglist&=CALLER_SAVE_REGS; // only save the caller-save registers, r0-r3, r12
1524 save_regs_all(reglist);
1525}
1526
1527// Restore registers after function call
1528static void restore_regs(u_int reglist)
1529{
1530 reglist&=CALLER_SAVE_REGS;
1531 restore_regs_all(reglist);
1532}
1533
1534/* Stubs/epilogue */
1535
1536static void literal_pool(int n)
1537{
1538 if(!literalcount) return;
1539 if(n) {
1540 if((int)out-literals[0][0]<4096-n) return;
1541 }
1542 u_int *ptr;
1543 int i;
1544 for(i=0;i<literalcount;i++)
1545 {
1546 u_int l_addr=(u_int)out;
1547 int j;
1548 for(j=0;j<i;j++) {
1549 if(literals[j][1]==literals[i][1]) {
1550 //printf("dup %08x\n",literals[i][1]);
1551 l_addr=literals[j][0];
1552 break;
1553 }
1554 }
1555 ptr=(u_int *)literals[i][0];
1556 u_int offset=l_addr-(u_int)ptr-8;
1557 assert(offset<4096);
1558 assert(!(offset&3));
1559 *ptr|=offset;
1560 if(l_addr==(u_int)out) {
1561 literals[i][0]=l_addr; // remember for dupes
1562 output_w32(literals[i][1]);
1563 }
1564 }
1565 literalcount=0;
1566}
1567
1568static void literal_pool_jumpover(int n)
1569{
1570 if(!literalcount) return;
1571 if(n) {
1572 if((int)out-literals[0][0]<4096-n) return;
1573 }
1574 void *jaddr = out;
1575 emit_jmp(0);
1576 literal_pool(0);
1577 set_jump_target(jaddr, out);
1578}
1579
1580// parsed by get_pointer, find_extjump_insn
1581static void emit_extjump2(u_char *addr, u_int target, void *linker)
1582{
1583 u_char *ptr=(u_char *)addr;
1584 assert((ptr[3]&0x0e)==0xa);
1585 (void)ptr;
1586
1587 emit_loadlp(target,0);
1588 emit_loadlp((u_int)addr,1);
1589 assert(addr>=translation_cache&&addr<(translation_cache+(1<<TARGET_SIZE_2)));
1590 //assert((target>=0x80000000&&target<0x80800000)||(target>0xA4000000&&target<0xA4001000));
1591//DEBUG >
1592#ifdef DEBUG_CYCLE_COUNT
1593 emit_readword(&last_count,ECX);
1594 emit_add(HOST_CCREG,ECX,HOST_CCREG);
1595 emit_readword(&next_interupt,ECX);
1596 emit_writeword(HOST_CCREG,&Count);
1597 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
1598 emit_writeword(ECX,&last_count);
1599#endif
1600//DEBUG <
1601 emit_jmp(linker);
1602}
1603
1604static void check_extjump2(void *src)
1605{
1606 u_int *ptr = src;
1607 assert((ptr[1] & 0x0fff0000) == 0x059f0000); // ldr rx, [pc, #ofs]
1608 (void)ptr;
1609}
1610
1611// put rt_val into rt, potentially making use of rs with value rs_val
1612static void emit_movimm_from(u_int rs_val,int rs,u_int rt_val,int rt)
1613{
1614 u_int armval;
1615 int diff;
1616 if(genimm(rt_val,&armval)) {
1617 assem_debug("mov %s,#%d\n",regname[rt],rt_val);
1618 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
1619 return;
1620 }
1621 if(genimm(~rt_val,&armval)) {
1622 assem_debug("mvn %s,#%d\n",regname[rt],rt_val);
1623 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
1624 return;
1625 }
1626 diff=rt_val-rs_val;
1627 if(genimm(diff,&armval)) {
1628 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],diff);
1629 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
1630 return;
1631 }else if(genimm(-diff,&armval)) {
1632 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],-diff);
1633 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
1634 return;
1635 }
1636 emit_movimm(rt_val,rt);
1637}
1638
1639// return 1 if above function can do it's job cheaply
1640static int is_similar_value(u_int v1,u_int v2)
1641{
1642 u_int xs;
1643 int diff;
1644 if(v1==v2) return 1;
1645 diff=v2-v1;
1646 for(xs=diff;xs!=0&&(xs&3)==0;xs>>=2)
1647 ;
1648 if(xs<0x100) return 1;
1649 for(xs=-diff;xs!=0&&(xs&3)==0;xs>>=2)
1650 ;
1651 if(xs<0x100) return 1;
1652 return 0;
1653}
1654
1655static void mov_loadtype_adj(enum stub_type type,int rs,int rt)
1656{
1657 switch(type) {
1658 case LOADB_STUB: emit_signextend8(rs,rt); break;
1659 case LOADBU_STUB: emit_andimm(rs,0xff,rt); break;
1660 case LOADH_STUB: emit_signextend16(rs,rt); break;
1661 case LOADHU_STUB: emit_andimm(rs,0xffff,rt); break;
1662 case LOADW_STUB: if(rs!=rt) emit_mov(rs,rt); break;
1663 default: assert(0);
1664 }
1665}
1666
1667#include "pcsxmem.h"
1668#include "pcsxmem_inline.c"
1669
1670static void do_readstub(int n)
1671{
1672 assem_debug("do_readstub %x\n",start+stubs[n].a*4);
1673 literal_pool(256);
1674 set_jump_target(stubs[n].addr, out);
1675 enum stub_type type=stubs[n].type;
1676 int i=stubs[n].a;
1677 int rs=stubs[n].b;
1678 struct regstat *i_regs=(struct regstat *)stubs[n].c;
1679 u_int reglist=stubs[n].e;
1680 signed char *i_regmap=i_regs->regmap;
1681 int rt;
1682 if(itype[i]==C1LS||itype[i]==C2LS||itype[i]==LOADLR) {
1683 rt=get_reg(i_regmap,FTEMP);
1684 }else{
1685 rt=get_reg(i_regmap,rt1[i]);
1686 }
1687 assert(rs>=0);
1688 int r,temp=-1,temp2=HOST_TEMPREG,regs_saved=0;
1689 void *restore_jump = NULL;
1690 reglist|=(1<<rs);
1691 for(r=0;r<=12;r++) {
1692 if(((1<<r)&0x13ff)&&((1<<r)&reglist)==0) {
1693 temp=r; break;
1694 }
1695 }
1696 if(rt>=0&&rt1[i]!=0)
1697 reglist&=~(1<<rt);
1698 if(temp==-1) {
1699 save_regs(reglist);
1700 regs_saved=1;
1701 temp=(rs==0)?2:0;
1702 }
1703 if((regs_saved||(reglist&2)==0)&&temp!=1&&rs!=1)
1704 temp2=1;
1705 emit_readword(&mem_rtab,temp);
1706 emit_shrimm(rs,12,temp2);
1707 emit_readword_dualindexedx4(temp,temp2,temp2);
1708 emit_lsls_imm(temp2,1,temp2);
1709 if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) {
1710 switch(type) {
1711 case LOADB_STUB: emit_ldrccsb_dualindexed(temp2,rs,rt); break;
1712 case LOADBU_STUB: emit_ldrccb_dualindexed(temp2,rs,rt); break;
1713 case LOADH_STUB: emit_ldrccsh_dualindexed(temp2,rs,rt); break;
1714 case LOADHU_STUB: emit_ldrcch_dualindexed(temp2,rs,rt); break;
1715 case LOADW_STUB: emit_ldrcc_dualindexed(temp2,rs,rt); break;
1716 default: assert(0);
1717 }
1718 }
1719 if(regs_saved) {
1720 restore_jump=out;
1721 emit_jcc(0); // jump to reg restore
1722 }
1723 else
1724 emit_jcc(stubs[n].retaddr); // return address
1725
1726 if(!regs_saved)
1727 save_regs(reglist);
1728 void *handler=NULL;
1729 if(type==LOADB_STUB||type==LOADBU_STUB)
1730 handler=jump_handler_read8;
1731 if(type==LOADH_STUB||type==LOADHU_STUB)
1732 handler=jump_handler_read16;
1733 if(type==LOADW_STUB)
1734 handler=jump_handler_read32;
1735 assert(handler);
1736 pass_args(rs,temp2);
1737 int cc=get_reg(i_regmap,CCREG);
1738 if(cc<0)
1739 emit_loadreg(CCREG,2);
1740 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n].d+1),2);
1741 emit_call(handler);
1742 if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) {
1743 mov_loadtype_adj(type,0,rt);
1744 }
1745 if(restore_jump)
1746 set_jump_target(restore_jump, out);
1747 restore_regs(reglist);
1748 emit_jmp(stubs[n].retaddr); // return address
1749}
1750
1751static void inline_readstub(enum stub_type type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
1752{
1753 int rs=get_reg(regmap,target);
1754 int rt=get_reg(regmap,target);
1755 if(rs<0) rs=get_reg(regmap,-1);
1756 assert(rs>=0);
1757 u_int is_dynamic,far_call=0;
1758 uintptr_t host_addr = 0;
1759 void *handler;
1760 int cc=get_reg(regmap,CCREG);
1761 if(pcsx_direct_read(type,addr,CLOCK_ADJUST(adj+1),cc,target?rs:-1,rt))
1762 return;
1763 handler = get_direct_memhandler(mem_rtab, addr, type, &host_addr);
1764 if (handler == NULL) {
1765 if(rt<0||rt1[i]==0)
1766 return;
1767 if(addr!=host_addr)
1768 emit_movimm_from(addr,rs,host_addr,rs);
1769 switch(type) {
1770 case LOADB_STUB: emit_movsbl_indexed(0,rs,rt); break;
1771 case LOADBU_STUB: emit_movzbl_indexed(0,rs,rt); break;
1772 case LOADH_STUB: emit_movswl_indexed(0,rs,rt); break;
1773 case LOADHU_STUB: emit_movzwl_indexed(0,rs,rt); break;
1774 case LOADW_STUB: emit_readword_indexed(0,rs,rt); break;
1775 default: assert(0);
1776 }
1777 return;
1778 }
1779 is_dynamic=pcsxmem_is_handler_dynamic(addr);
1780 if(is_dynamic) {
1781 if(type==LOADB_STUB||type==LOADBU_STUB)
1782 handler=jump_handler_read8;
1783 if(type==LOADH_STUB||type==LOADHU_STUB)
1784 handler=jump_handler_read16;
1785 if(type==LOADW_STUB)
1786 handler=jump_handler_read32;
1787 }
1788
1789 // call a memhandler
1790 if(rt>=0&&rt1[i]!=0)
1791 reglist&=~(1<<rt);
1792 save_regs(reglist);
1793 if(target==0)
1794 emit_movimm(addr,0);
1795 else if(rs!=0)
1796 emit_mov(rs,0);
1797 int offset=(u_char *)handler-out-8;
1798 if(offset<-33554432||offset>=33554432) {
1799 // unreachable memhandler, a plugin func perhaps
1800 emit_movimm((u_int)handler,12);
1801 far_call=1;
1802 }
1803 if(cc<0)
1804 emit_loadreg(CCREG,2);
1805 if(is_dynamic) {
1806 emit_movimm(((u_int *)mem_rtab)[addr>>12]<<1,1);
1807 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2);
1808 }
1809 else {
1810 emit_readword(&last_count,3);
1811 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2);
1812 emit_add(2,3,2);
1813 emit_writeword(2,&Count);
1814 }
1815
1816 if(far_call)
1817 emit_callreg(12);
1818 else
1819 emit_call(handler);
1820
1821 if(rt>=0&&rt1[i]!=0) {
1822 switch(type) {
1823 case LOADB_STUB: emit_signextend8(0,rt); break;
1824 case LOADBU_STUB: emit_andimm(0,0xff,rt); break;
1825 case LOADH_STUB: emit_signextend16(0,rt); break;
1826 case LOADHU_STUB: emit_andimm(0,0xffff,rt); break;
1827 case LOADW_STUB: if(rt!=0) emit_mov(0,rt); break;
1828 default: assert(0);
1829 }
1830 }
1831 restore_regs(reglist);
1832}
1833
1834static void do_writestub(int n)
1835{
1836 assem_debug("do_writestub %x\n",start+stubs[n].a*4);
1837 literal_pool(256);
1838 set_jump_target(stubs[n].addr, out);
1839 enum stub_type type=stubs[n].type;
1840 int i=stubs[n].a;
1841 int rs=stubs[n].b;
1842 struct regstat *i_regs=(struct regstat *)stubs[n].c;
1843 u_int reglist=stubs[n].e;
1844 signed char *i_regmap=i_regs->regmap;
1845 int rt,r;
1846 if(itype[i]==C1LS||itype[i]==C2LS) {
1847 rt=get_reg(i_regmap,r=FTEMP);
1848 }else{
1849 rt=get_reg(i_regmap,r=rs2[i]);
1850 }
1851 assert(rs>=0);
1852 assert(rt>=0);
1853 int rtmp,temp=-1,temp2=HOST_TEMPREG,regs_saved=0;
1854 void *restore_jump = NULL;
1855 int reglist2=reglist|(1<<rs)|(1<<rt);
1856 for(rtmp=0;rtmp<=12;rtmp++) {
1857 if(((1<<rtmp)&0x13ff)&&((1<<rtmp)&reglist2)==0) {
1858 temp=rtmp; break;
1859 }
1860 }
1861 if(temp==-1) {
1862 save_regs(reglist);
1863 regs_saved=1;
1864 for(rtmp=0;rtmp<=3;rtmp++)
1865 if(rtmp!=rs&&rtmp!=rt)
1866 {temp=rtmp;break;}
1867 }
1868 if((regs_saved||(reglist2&8)==0)&&temp!=3&&rs!=3&&rt!=3)
1869 temp2=3;
1870 emit_readword(&mem_wtab,temp);
1871 emit_shrimm(rs,12,temp2);
1872 emit_readword_dualindexedx4(temp,temp2,temp2);
1873 emit_lsls_imm(temp2,1,temp2);
1874 switch(type) {
1875 case STOREB_STUB: emit_strccb_dualindexed(temp2,rs,rt); break;
1876 case STOREH_STUB: emit_strcch_dualindexed(temp2,rs,rt); break;
1877 case STOREW_STUB: emit_strcc_dualindexed(temp2,rs,rt); break;
1878 default: assert(0);
1879 }
1880 if(regs_saved) {
1881 restore_jump=out;
1882 emit_jcc(0); // jump to reg restore
1883 }
1884 else
1885 emit_jcc(stubs[n].retaddr); // return address (invcode check)
1886
1887 if(!regs_saved)
1888 save_regs(reglist);
1889 void *handler=NULL;
1890 switch(type) {
1891 case STOREB_STUB: handler=jump_handler_write8; break;
1892 case STOREH_STUB: handler=jump_handler_write16; break;
1893 case STOREW_STUB: handler=jump_handler_write32; break;
1894 default: assert(0);
1895 }
1896 assert(handler);
1897 pass_args(rs,rt);
1898 if(temp2!=3)
1899 emit_mov(temp2,3);
1900 int cc=get_reg(i_regmap,CCREG);
1901 if(cc<0)
1902 emit_loadreg(CCREG,2);
1903 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n].d+1),2);
1904 // returns new cycle_count
1905 emit_call(handler);
1906 emit_addimm(0,-CLOCK_ADJUST((int)stubs[n].d+1),cc<0?2:cc);
1907 if(cc<0)
1908 emit_storereg(CCREG,2);
1909 if(restore_jump)
1910 set_jump_target(restore_jump, out);
1911 restore_regs(reglist);
1912 emit_jmp(stubs[n].retaddr);
1913}
1914
1915static void inline_writestub(enum stub_type type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
1916{
1917 int rs=get_reg(regmap,-1);
1918 int rt=get_reg(regmap,target);
1919 assert(rs>=0);
1920 assert(rt>=0);
1921 uintptr_t host_addr = 0;
1922 void *handler = get_direct_memhandler(mem_wtab, addr, type, &host_addr);
1923 if (handler == NULL) {
1924 if(addr!=host_addr)
1925 emit_movimm_from(addr,rs,host_addr,rs);
1926 switch(type) {
1927 case STOREB_STUB: emit_writebyte_indexed(rt,0,rs); break;
1928 case STOREH_STUB: emit_writehword_indexed(rt,0,rs); break;
1929 case STOREW_STUB: emit_writeword_indexed(rt,0,rs); break;
1930 default: assert(0);
1931 }
1932 return;
1933 }
1934
1935 // call a memhandler
1936 save_regs(reglist);
1937 pass_args(rs,rt);
1938 int cc=get_reg(regmap,CCREG);
1939 if(cc<0)
1940 emit_loadreg(CCREG,2);
1941 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2);
1942 emit_movimm((u_int)handler,3);
1943 // returns new cycle_count
1944 emit_call(jump_handler_write_h);
1945 emit_addimm(0,-CLOCK_ADJUST(adj+1),cc<0?2:cc);
1946 if(cc<0)
1947 emit_storereg(CCREG,2);
1948 restore_regs(reglist);
1949}
1950
1951// this output is parsed by verify_dirty, get_bounds, isclean, get_clean_addr
1952static void do_dirty_stub_emit_args(u_int arg0)
1953{
1954 #ifndef HAVE_ARMV7
1955 emit_loadlp((int)source, 1);
1956 emit_loadlp((int)copy, 2);
1957 emit_loadlp(slen*4, 3);
1958 #else
1959 emit_movw(((u_int)source)&0x0000FFFF, 1);
1960 emit_movw(((u_int)copy)&0x0000FFFF, 2);
1961 emit_movt(((u_int)source)&0xFFFF0000, 1);
1962 emit_movt(((u_int)copy)&0xFFFF0000, 2);
1963 emit_movw(slen*4, 3);
1964 #endif
1965 emit_movimm(arg0, 0);
1966}
1967
1968static void *do_dirty_stub(int i)
1969{
1970 assem_debug("do_dirty_stub %x\n",start+i*4);
1971 do_dirty_stub_emit_args(start + i*4);
1972 emit_call(verify_code);
1973 void *entry = out;
1974 load_regs_entry(i);
1975 if (entry == out)
1976 entry = instr_addr[i];
1977 emit_jmp(instr_addr[i]);
1978 return entry;
1979}
1980
1981static void do_dirty_stub_ds()
1982{
1983 do_dirty_stub_emit_args(start + 1);
1984 emit_call(verify_code_ds);
1985}
1986
1987/* Special assem */
1988
1989static void c2op_prologue(u_int op,u_int reglist)
1990{
1991 save_regs_all(reglist);
1992#ifdef PCNT
1993 emit_movimm(op,0);
1994 emit_call(pcnt_gte_start);
1995#endif
1996 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0); // cop2 regs
1997}
1998
1999static void c2op_epilogue(u_int op,u_int reglist)
2000{
2001#ifdef PCNT
2002 emit_movimm(op,0);
2003 emit_call(pcnt_gte_end);
2004#endif
2005 restore_regs_all(reglist);
2006}
2007
2008static void c2op_call_MACtoIR(int lm,int need_flags)
2009{
2010 if(need_flags)
2011 emit_call(lm?gteMACtoIR_lm1:gteMACtoIR_lm0);
2012 else
2013 emit_call(lm?gteMACtoIR_lm1_nf:gteMACtoIR_lm0_nf);
2014}
2015
2016static void c2op_call_rgb_func(void *func,int lm,int need_ir,int need_flags)
2017{
2018 emit_call(func);
2019 // func is C code and trashes r0
2020 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
2021 if(need_flags||need_ir)
2022 c2op_call_MACtoIR(lm,need_flags);
2023 emit_call(need_flags?gteMACtoRGB:gteMACtoRGB_nf);
2024}
2025
2026static void c2op_assemble(int i,struct regstat *i_regs)
2027{
2028 u_int c2op=source[i]&0x3f;
2029 u_int hr,reglist_full=0,reglist;
2030 int need_flags,need_ir;
2031 for(hr=0;hr<HOST_REGS;hr++) {
2032 if(i_regs->regmap[hr]>=0) reglist_full|=1<<hr;
2033 }
2034 reglist=reglist_full&CALLER_SAVE_REGS;
2035
2036 if (gte_handlers[c2op]!=NULL) {
2037 need_flags=!(gte_unneeded[i+1]>>63); // +1 because of how liveness detection works
2038 need_ir=(gte_unneeded[i+1]&0xe00)!=0xe00;
2039 assem_debug("gte op %08x, unneeded %016llx, need_flags %d, need_ir %d\n",
2040 source[i],gte_unneeded[i+1],need_flags,need_ir);
2041 if(new_dynarec_hacks&NDHACK_GTE_NO_FLAGS)
2042 need_flags=0;
2043 int shift = (source[i] >> 19) & 1;
2044 int lm = (source[i] >> 10) & 1;
2045 switch(c2op) {
2046#ifndef DRC_DBG
2047 case GTE_MVMVA: {
2048#ifdef HAVE_ARMV5
2049 int v = (source[i] >> 15) & 3;
2050 int cv = (source[i] >> 13) & 3;
2051 int mx = (source[i] >> 17) & 3;
2052 reglist=reglist_full&(CALLER_SAVE_REGS|0xf0); // +{r4-r7}
2053 c2op_prologue(c2op,reglist);
2054 /* r4,r5 = VXYZ(v) packed; r6 = &MX11(mx); r7 = &CV1(cv) */
2055 if(v<3)
2056 emit_ldrd(v*8,0,4);
2057 else {
2058 emit_movzwl_indexed(9*4,0,4); // gteIR
2059 emit_movzwl_indexed(10*4,0,6);
2060 emit_movzwl_indexed(11*4,0,5);
2061 emit_orrshl_imm(6,16,4);
2062 }
2063 if(mx<3)
2064 emit_addimm(0,32*4+mx*8*4,6);
2065 else
2066 emit_readword(&zeromem_ptr,6);
2067 if(cv<3)
2068 emit_addimm(0,32*4+(cv*8+5)*4,7);
2069 else
2070 emit_readword(&zeromem_ptr,7);
2071#ifdef __ARM_NEON__
2072 emit_movimm(source[i],1); // opcode
2073 emit_call(gteMVMVA_part_neon);
2074 if(need_flags) {
2075 emit_movimm(lm,1);
2076 emit_call(gteMACtoIR_flags_neon);
2077 }
2078#else
2079 if(cv==3&&shift)
2080 emit_call((int)gteMVMVA_part_cv3sh12_arm);
2081 else {
2082 emit_movimm(shift,1);
2083 emit_call((int)(need_flags?gteMVMVA_part_arm:gteMVMVA_part_nf_arm));
2084 }
2085 if(need_flags||need_ir)
2086 c2op_call_MACtoIR(lm,need_flags);
2087#endif
2088#else /* if not HAVE_ARMV5 */
2089 c2op_prologue(c2op,reglist);
2090 emit_movimm(source[i],1); // opcode
2091 emit_writeword(1,&psxRegs.code);
2092 emit_call((int)(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]));
2093#endif
2094 break;
2095 }
2096 case GTE_OP:
2097 c2op_prologue(c2op,reglist);
2098 emit_call(shift?gteOP_part_shift:gteOP_part_noshift);
2099 if(need_flags||need_ir) {
2100 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
2101 c2op_call_MACtoIR(lm,need_flags);
2102 }
2103 break;
2104 case GTE_DPCS:
2105 c2op_prologue(c2op,reglist);
2106 c2op_call_rgb_func(shift?gteDPCS_part_shift:gteDPCS_part_noshift,lm,need_ir,need_flags);
2107 break;
2108 case GTE_INTPL:
2109 c2op_prologue(c2op,reglist);
2110 c2op_call_rgb_func(shift?gteINTPL_part_shift:gteINTPL_part_noshift,lm,need_ir,need_flags);
2111 break;
2112 case GTE_SQR:
2113 c2op_prologue(c2op,reglist);
2114 emit_call(shift?gteSQR_part_shift:gteSQR_part_noshift);
2115 if(need_flags||need_ir) {
2116 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
2117 c2op_call_MACtoIR(lm,need_flags);
2118 }
2119 break;
2120 case GTE_DCPL:
2121 c2op_prologue(c2op,reglist);
2122 c2op_call_rgb_func(gteDCPL_part,lm,need_ir,need_flags);
2123 break;
2124 case GTE_GPF:
2125 c2op_prologue(c2op,reglist);
2126 c2op_call_rgb_func(shift?gteGPF_part_shift:gteGPF_part_noshift,lm,need_ir,need_flags);
2127 break;
2128 case GTE_GPL:
2129 c2op_prologue(c2op,reglist);
2130 c2op_call_rgb_func(shift?gteGPL_part_shift:gteGPL_part_noshift,lm,need_ir,need_flags);
2131 break;
2132#endif
2133 default:
2134 c2op_prologue(c2op,reglist);
2135#ifdef DRC_DBG
2136 emit_movimm(source[i],1); // opcode
2137 emit_writeword(1,&psxRegs.code);
2138#endif
2139 emit_call(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]);
2140 break;
2141 }
2142 c2op_epilogue(c2op,reglist);
2143 }
2144}
2145
2146static void c2op_ctc2_31_assemble(signed char sl, signed char temp)
2147{
2148 //value = value & 0x7ffff000;
2149 //if (value & 0x7f87e000) value |= 0x80000000;
2150 emit_shrimm(sl,12,temp);
2151 emit_shlimm(temp,12,temp);
2152 emit_testimm(temp,0x7f000000);
2153 emit_testeqimm(temp,0x00870000);
2154 emit_testeqimm(temp,0x0000e000);
2155 emit_orrne_imm(temp,0x80000000,temp);
2156}
2157
2158static void do_mfc2_31_one(u_int copr,signed char temp)
2159{
2160 emit_readword(&reg_cop2d[copr],temp);
2161 emit_testimm(temp,0x8000); // do we need this?
2162 emit_andne_imm(temp,0,temp);
2163 emit_cmpimm(temp,0xf80);
2164 emit_andimm(temp,0xf80,temp);
2165 emit_cmovae_imm(0xf80,temp);
2166}
2167
2168static void c2op_mfc2_29_assemble(signed char tl, signed char temp)
2169{
2170 if (temp < 0) {
2171 host_tempreg_acquire();
2172 temp = HOST_TEMPREG;
2173 }
2174 do_mfc2_31_one(9,temp);
2175 emit_shrimm(temp,7,tl);
2176 do_mfc2_31_one(10,temp);
2177 emit_orrshr_imm(temp,2,tl);
2178 do_mfc2_31_one(11,temp);
2179 emit_orrshl_imm(temp,3,tl);
2180 emit_writeword(tl,&reg_cop2d[29]);
2181 if (temp == HOST_TEMPREG)
2182 host_tempreg_release();
2183}
2184
2185static void multdiv_assemble_arm(int i,struct regstat *i_regs)
2186{
2187 // case 0x18: MULT
2188 // case 0x19: MULTU
2189 // case 0x1A: DIV
2190 // case 0x1B: DIVU
2191 // case 0x1C: DMULT
2192 // case 0x1D: DMULTU
2193 // case 0x1E: DDIV
2194 // case 0x1F: DDIVU
2195 if(rs1[i]&&rs2[i])
2196 {
2197 if((opcode2[i]&4)==0) // 32-bit
2198 {
2199 if(opcode2[i]==0x18) // MULT
2200 {
2201 signed char m1=get_reg(i_regs->regmap,rs1[i]);
2202 signed char m2=get_reg(i_regs->regmap,rs2[i]);
2203 signed char hi=get_reg(i_regs->regmap,HIREG);
2204 signed char lo=get_reg(i_regs->regmap,LOREG);
2205 assert(m1>=0);
2206 assert(m2>=0);
2207 assert(hi>=0);
2208 assert(lo>=0);
2209 emit_smull(m1,m2,hi,lo);
2210 }
2211 if(opcode2[i]==0x19) // MULTU
2212 {
2213 signed char m1=get_reg(i_regs->regmap,rs1[i]);
2214 signed char m2=get_reg(i_regs->regmap,rs2[i]);
2215 signed char hi=get_reg(i_regs->regmap,HIREG);
2216 signed char lo=get_reg(i_regs->regmap,LOREG);
2217 assert(m1>=0);
2218 assert(m2>=0);
2219 assert(hi>=0);
2220 assert(lo>=0);
2221 emit_umull(m1,m2,hi,lo);
2222 }
2223 if(opcode2[i]==0x1A) // DIV
2224 {
2225 signed char d1=get_reg(i_regs->regmap,rs1[i]);
2226 signed char d2=get_reg(i_regs->regmap,rs2[i]);
2227 assert(d1>=0);
2228 assert(d2>=0);
2229 signed char quotient=get_reg(i_regs->regmap,LOREG);
2230 signed char remainder=get_reg(i_regs->regmap,HIREG);
2231 assert(quotient>=0);
2232 assert(remainder>=0);
2233 emit_movs(d1,remainder);
2234 emit_movimm(0xffffffff,quotient);
2235 emit_negmi(quotient,quotient); // .. quotient and ..
2236 emit_negmi(remainder,remainder); // .. remainder for div0 case (will be negated back after jump)
2237 emit_movs(d2,HOST_TEMPREG);
2238 emit_jeq(out+52); // Division by zero
2239 emit_negsmi(HOST_TEMPREG,HOST_TEMPREG);
2240#ifdef HAVE_ARMV5
2241 emit_clz(HOST_TEMPREG,quotient);
2242 emit_shl(HOST_TEMPREG,quotient,HOST_TEMPREG);
2243#else
2244 emit_movimm(0,quotient);
2245 emit_addpl_imm(quotient,1,quotient);
2246 emit_lslpls_imm(HOST_TEMPREG,1,HOST_TEMPREG);
2247 emit_jns(out-2*4);
2248#endif
2249 emit_orimm(quotient,1<<31,quotient);
2250 emit_shr(quotient,quotient,quotient);
2251 emit_cmp(remainder,HOST_TEMPREG);
2252 emit_subcs(remainder,HOST_TEMPREG,remainder);
2253 emit_adcs(quotient,quotient,quotient);
2254 emit_shrimm(HOST_TEMPREG,1,HOST_TEMPREG);
2255 emit_jcc(out-16); // -4
2256 emit_teq(d1,d2);
2257 emit_negmi(quotient,quotient);
2258 emit_test(d1,d1);
2259 emit_negmi(remainder,remainder);
2260 }
2261 if(opcode2[i]==0x1B) // DIVU
2262 {
2263 signed char d1=get_reg(i_regs->regmap,rs1[i]); // dividend
2264 signed char d2=get_reg(i_regs->regmap,rs2[i]); // divisor
2265 assert(d1>=0);
2266 assert(d2>=0);
2267 signed char quotient=get_reg(i_regs->regmap,LOREG);
2268 signed char remainder=get_reg(i_regs->regmap,HIREG);
2269 assert(quotient>=0);
2270 assert(remainder>=0);
2271 emit_mov(d1,remainder);
2272 emit_movimm(0xffffffff,quotient); // div0 case
2273 emit_test(d2,d2);
2274 emit_jeq(out+40); // Division by zero
2275#ifdef HAVE_ARMV5
2276 emit_clz(d2,HOST_TEMPREG);
2277 emit_movimm(1<<31,quotient);
2278 emit_shl(d2,HOST_TEMPREG,d2);
2279#else
2280 emit_movimm(0,HOST_TEMPREG);
2281 emit_addpl_imm(HOST_TEMPREG,1,HOST_TEMPREG);
2282 emit_lslpls_imm(d2,1,d2);
2283 emit_jns(out-2*4);
2284 emit_movimm(1<<31,quotient);
2285#endif
2286 emit_shr(quotient,HOST_TEMPREG,quotient);
2287 emit_cmp(remainder,d2);
2288 emit_subcs(remainder,d2,remainder);
2289 emit_adcs(quotient,quotient,quotient);
2290 emit_shrcc_imm(d2,1,d2);
2291 emit_jcc(out-16); // -4
2292 }
2293 }
2294 else // 64-bit
2295 assert(0);
2296 }
2297 else
2298 {
2299 // Multiply by zero is zero.
2300 // MIPS does not have a divide by zero exception.
2301 // The result is undefined, we return zero.
2302 signed char hr=get_reg(i_regs->regmap,HIREG);
2303 signed char lr=get_reg(i_regs->regmap,LOREG);
2304 if(hr>=0) emit_zeroreg(hr);
2305 if(lr>=0) emit_zeroreg(lr);
2306 }
2307}
2308#define multdiv_assemble multdiv_assemble_arm
2309
2310static void do_jump_vaddr(int rs)
2311{
2312 emit_jmp(jump_vaddr_reg[rs]);
2313}
2314
2315static void do_preload_rhash(int r) {
2316 // Don't need this for ARM. On x86, this puts the value 0xf8 into the
2317 // register. On ARM the hash can be done with a single instruction (below)
2318}
2319
2320static void do_preload_rhtbl(int ht) {
2321 emit_addimm(FP,(int)&mini_ht-(int)&dynarec_local,ht);
2322}
2323
2324static void do_rhash(int rs,int rh) {
2325 emit_andimm(rs,0xf8,rh);
2326}
2327
2328static void do_miniht_load(int ht,int rh) {
2329 assem_debug("ldr %s,[%s,%s]!\n",regname[rh],regname[ht],regname[rh]);
2330 output_w32(0xe7b00000|rd_rn_rm(rh,ht,rh));
2331}
2332
2333static void do_miniht_jump(int rs,int rh,int ht) {
2334 emit_cmp(rh,rs);
2335 emit_ldreq_indexed(ht,4,15);
2336 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
2337 if(rs!=7)
2338 emit_mov(rs,7);
2339 rs=7;
2340 #endif
2341 do_jump_vaddr(rs);
2342}
2343
2344static void do_miniht_insert(u_int return_address,int rt,int temp) {
2345 #ifndef HAVE_ARMV7
2346 emit_movimm(return_address,rt); // PC into link register
2347 add_to_linker(out,return_address,1);
2348 emit_pcreladdr(temp);
2349 emit_writeword(rt,&mini_ht[(return_address&0xFF)>>3][0]);
2350 emit_writeword(temp,&mini_ht[(return_address&0xFF)>>3][1]);
2351 #else
2352 emit_movw(return_address&0x0000FFFF,rt);
2353 add_to_linker(out,return_address,1);
2354 emit_pcreladdr(temp);
2355 emit_writeword(temp,&mini_ht[(return_address&0xFF)>>3][1]);
2356 emit_movt(return_address&0xFFFF0000,rt);
2357 emit_writeword(rt,&mini_ht[(return_address&0xFF)>>3][0]);
2358 #endif
2359}
2360
2361static void mark_clear_cache(void *target)
2362{
2363 u_long offset = (u_char *)target - translation_cache;
2364 u_int mask = 1u << ((offset >> 12) & 31);
2365 if (!(needs_clear_cache[offset >> 17] & mask)) {
2366 char *start = (char *)((u_long)target & ~4095ul);
2367 start_tcache_write(start, start + 4096);
2368 needs_clear_cache[offset >> 17] |= mask;
2369 }
2370}
2371
2372// Clearing the cache is rather slow on ARM Linux, so mark the areas
2373// that need to be cleared, and then only clear these areas once.
2374static void do_clear_cache()
2375{
2376 int i,j;
2377 for (i=0;i<(1<<(TARGET_SIZE_2-17));i++)
2378 {
2379 u_int bitmap=needs_clear_cache[i];
2380 if(bitmap) {
2381 u_char *start, *end;
2382 for(j=0;j<32;j++)
2383 {
2384 if(bitmap&(1<<j)) {
2385 start=translation_cache+i*131072+j*4096;
2386 end=start+4095;
2387 j++;
2388 while(j<32) {
2389 if(bitmap&(1<<j)) {
2390 end+=4096;
2391 j++;
2392 }else{
2393 end_tcache_write(start, end);
2394 break;
2395 }
2396 }
2397 }
2398 }
2399 needs_clear_cache[i]=0;
2400 }
2401 }
2402}
2403
2404// CPU-architecture-specific initialization
2405static void arch_init() {
2406}
2407
2408// vim:shiftwidth=2:expandtab