drc: arm64 wip
[pcsx_rearmed.git] / libpcsxcore / new_dynarec / assem_arm.c
... / ...
CommitLineData
1/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2 * Mupen64plus/PCSX - assem_arm.c *
3 * Copyright (C) 2009-2011 Ari64 *
4 * Copyright (C) 2010-2011 GraÅžvydas "notaz" Ignotas *
5 * *
6 * This program is free software; you can redistribute it and/or modify *
7 * it under the terms of the GNU General Public License as published by *
8 * the Free Software Foundation; either version 2 of the License, or *
9 * (at your option) any later version. *
10 * *
11 * This program is distributed in the hope that it will be useful, *
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
14 * GNU General Public License for more details. *
15 * *
16 * You should have received a copy of the GNU General Public License *
17 * along with this program; if not, write to the *
18 * Free Software Foundation, Inc., *
19 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
20 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
21
22#include "../gte.h"
23#define FLAGLESS
24#include "../gte.h"
25#undef FLAGLESS
26#include "../gte_arm.h"
27#include "../gte_neon.h"
28#include "pcnt.h"
29#include "arm_features.h"
30
31#if defined(BASE_ADDR_FIXED)
32#elif defined(BASE_ADDR_DYNAMIC)
33u_char *translation_cache;
34#else
35u_char translation_cache[1 << TARGET_SIZE_2] __attribute__((aligned(4096)));
36#endif
37
38#ifndef __MACH__
39#define CALLER_SAVE_REGS 0x100f
40#else
41#define CALLER_SAVE_REGS 0x120f
42#endif
43
44#define unused __attribute__((unused))
45
46#ifdef DRC_DBG
47#pragma GCC diagnostic ignored "-Wunused-function"
48#pragma GCC diagnostic ignored "-Wunused-variable"
49#pragma GCC diagnostic ignored "-Wunused-but-set-variable"
50#endif
51
52void indirect_jump_indexed();
53void indirect_jump();
54void do_interrupt();
55void jump_vaddr_r0();
56void jump_vaddr_r1();
57void jump_vaddr_r2();
58void jump_vaddr_r3();
59void jump_vaddr_r4();
60void jump_vaddr_r5();
61void jump_vaddr_r6();
62void jump_vaddr_r7();
63void jump_vaddr_r8();
64void jump_vaddr_r9();
65void jump_vaddr_r10();
66void jump_vaddr_r12();
67
68void * const jump_vaddr_reg[16] = {
69 jump_vaddr_r0,
70 jump_vaddr_r1,
71 jump_vaddr_r2,
72 jump_vaddr_r3,
73 jump_vaddr_r4,
74 jump_vaddr_r5,
75 jump_vaddr_r6,
76 jump_vaddr_r7,
77 jump_vaddr_r8,
78 jump_vaddr_r9,
79 jump_vaddr_r10,
80 0,
81 jump_vaddr_r12,
82 0,
83 0,
84 0
85};
86
87void invalidate_addr_r0();
88void invalidate_addr_r1();
89void invalidate_addr_r2();
90void invalidate_addr_r3();
91void invalidate_addr_r4();
92void invalidate_addr_r5();
93void invalidate_addr_r6();
94void invalidate_addr_r7();
95void invalidate_addr_r8();
96void invalidate_addr_r9();
97void invalidate_addr_r10();
98void invalidate_addr_r12();
99
100const u_int invalidate_addr_reg[16] = {
101 (int)invalidate_addr_r0,
102 (int)invalidate_addr_r1,
103 (int)invalidate_addr_r2,
104 (int)invalidate_addr_r3,
105 (int)invalidate_addr_r4,
106 (int)invalidate_addr_r5,
107 (int)invalidate_addr_r6,
108 (int)invalidate_addr_r7,
109 (int)invalidate_addr_r8,
110 (int)invalidate_addr_r9,
111 (int)invalidate_addr_r10,
112 0,
113 (int)invalidate_addr_r12,
114 0,
115 0,
116 0};
117
118static u_int needs_clear_cache[1<<(TARGET_SIZE_2-17)];
119
120/* Linker */
121
122static void set_jump_target(void *addr, void *target_)
123{
124 u_int target = (u_int)target_;
125 u_char *ptr = addr;
126 u_int *ptr2=(u_int *)ptr;
127 if(ptr[3]==0xe2) {
128 assert((target-(u_int)ptr2-8)<1024);
129 assert(((uintptr_t)addr&3)==0);
130 assert((target&3)==0);
131 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
132 //printf("target=%x addr=%p insn=%x\n",target,addr,*ptr2);
133 }
134 else if(ptr[3]==0x72) {
135 // generated by emit_jno_unlikely
136 if((target-(u_int)ptr2-8)<1024) {
137 assert(((uintptr_t)addr&3)==0);
138 assert((target&3)==0);
139 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
140 }
141 else if((target-(u_int)ptr2-8)<4096&&!((target-(u_int)ptr2-8)&15)) {
142 assert(((uintptr_t)addr&3)==0);
143 assert((target&3)==0);
144 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>4)|0xE00;
145 }
146 else *ptr2=(0x7A000000)|(((target-(u_int)ptr2-8)<<6)>>8);
147 }
148 else {
149 assert((ptr[3]&0x0e)==0xa);
150 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
151 }
152}
153
154// This optionally copies the instruction from the target of the branch into
155// the space before the branch. Works, but the difference in speed is
156// usually insignificant.
157#if 0
158static void set_jump_target_fillslot(int addr,u_int target,int copy)
159{
160 u_char *ptr=(u_char *)addr;
161 u_int *ptr2=(u_int *)ptr;
162 assert(!copy||ptr2[-1]==0xe28dd000);
163 if(ptr[3]==0xe2) {
164 assert(!copy);
165 assert((target-(u_int)ptr2-8)<4096);
166 *ptr2=(*ptr2&0xFFFFF000)|(target-(u_int)ptr2-8);
167 }
168 else {
169 assert((ptr[3]&0x0e)==0xa);
170 u_int target_insn=*(u_int *)target;
171 if((target_insn&0x0e100000)==0) { // ALU, no immediate, no flags
172 copy=0;
173 }
174 if((target_insn&0x0c100000)==0x04100000) { // Load
175 copy=0;
176 }
177 if(target_insn&0x08000000) {
178 copy=0;
179 }
180 if(copy) {
181 ptr2[-1]=target_insn;
182 target+=4;
183 }
184 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
185 }
186}
187#endif
188
189/* Literal pool */
190static void add_literal(int addr,int val)
191{
192 assert(literalcount<sizeof(literals)/sizeof(literals[0]));
193 literals[literalcount][0]=addr;
194 literals[literalcount][1]=val;
195 literalcount++;
196}
197
198// from a pointer to external jump stub (which was produced by emit_extjump2)
199// find where the jumping insn is
200static void *find_extjump_insn(void *stub)
201{
202 int *ptr=(int *)(stub+4);
203 assert((*ptr&0x0fff0000)==0x059f0000); // ldr rx, [pc, #ofs]
204 u_int offset=*ptr&0xfff;
205 void **l_ptr=(void *)ptr+offset+8;
206 return *l_ptr;
207}
208
209// find where external branch is liked to using addr of it's stub:
210// get address that insn one after stub loads (dyna_linker arg1),
211// treat it as a pointer to branch insn,
212// return addr where that branch jumps to
213static void *get_pointer(void *stub)
214{
215 //printf("get_pointer(%x)\n",(int)stub);
216 int *i_ptr=find_extjump_insn(stub);
217 assert((*i_ptr&0x0f000000)==0x0a000000);
218 return (u_char *)i_ptr+((*i_ptr<<8)>>6)+8;
219}
220
221// Find the "clean" entry point from a "dirty" entry point
222// by skipping past the call to verify_code
223static void *get_clean_addr(void *addr)
224{
225 signed int *ptr = addr;
226 #ifndef HAVE_ARMV7
227 ptr+=4;
228 #else
229 ptr+=6;
230 #endif
231 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
232 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
233 ptr++;
234 if((*ptr&0xFF000000)==0xea000000) {
235 return (char *)ptr+((*ptr<<8)>>6)+8; // follow jump
236 }
237 return ptr;
238}
239
240static int verify_dirty(u_int *ptr)
241{
242 #ifndef HAVE_ARMV7
243 u_int offset;
244 // get from literal pool
245 assert((*ptr&0xFFFF0000)==0xe59f0000);
246 offset=*ptr&0xfff;
247 u_int source=*(u_int*)((void *)ptr+offset+8);
248 ptr++;
249 assert((*ptr&0xFFFF0000)==0xe59f0000);
250 offset=*ptr&0xfff;
251 u_int copy=*(u_int*)((void *)ptr+offset+8);
252 ptr++;
253 assert((*ptr&0xFFFF0000)==0xe59f0000);
254 offset=*ptr&0xfff;
255 u_int len=*(u_int*)((void *)ptr+offset+8);
256 ptr++;
257 ptr++;
258 #else
259 // ARMv7 movw/movt
260 assert((*ptr&0xFFF00000)==0xe3000000);
261 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
262 u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
263 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
264 ptr+=6;
265 #endif
266 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
267 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
268 //printf("verify_dirty: %x %x %x\n",source,copy,len);
269 return !memcmp((void *)source,(void *)copy,len);
270}
271
272// This doesn't necessarily find all clean entry points, just
273// guarantees that it's not dirty
274static int isclean(void *addr)
275{
276 #ifndef HAVE_ARMV7
277 u_int *ptr=((u_int *)addr)+4;
278 #else
279 u_int *ptr=((u_int *)addr)+6;
280 #endif
281 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
282 if((*ptr&0xFF000000)!=0xeb000000) return 1; // bl instruction
283 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code) return 0;
284 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_ds) return 0;
285 return 1;
286}
287
288// get source that block at addr was compiled from (host pointers)
289static void get_bounds(void *addr, u_char **start, u_char **end)
290{
291 u_int *ptr = addr;
292 #ifndef HAVE_ARMV7
293 u_int offset;
294 // get from literal pool
295 assert((*ptr&0xFFFF0000)==0xe59f0000);
296 offset=*ptr&0xfff;
297 u_int source=*(u_int*)((void *)ptr+offset+8);
298 ptr++;
299 //assert((*ptr&0xFFFF0000)==0xe59f0000);
300 //offset=*ptr&0xfff;
301 //u_int copy=*(u_int*)((void *)ptr+offset+8);
302 ptr++;
303 assert((*ptr&0xFFFF0000)==0xe59f0000);
304 offset=*ptr&0xfff;
305 u_int len=*(u_int*)((void *)ptr+offset+8);
306 ptr++;
307 ptr++;
308 #else
309 // ARMv7 movw/movt
310 assert((*ptr&0xFFF00000)==0xe3000000);
311 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
312 //u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
313 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
314 ptr+=6;
315 #endif
316 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
317 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
318 *start=(u_char *)source;
319 *end=(u_char *)source+len;
320}
321
322// Allocate a specific ARM register.
323static void alloc_arm_reg(struct regstat *cur,int i,signed char reg,int hr)
324{
325 int n;
326 int dirty=0;
327
328 // see if it's already allocated (and dealloc it)
329 for(n=0;n<HOST_REGS;n++)
330 {
331 if(n!=EXCLUDE_REG&&cur->regmap[n]==reg) {
332 dirty=(cur->dirty>>n)&1;
333 cur->regmap[n]=-1;
334 }
335 }
336
337 cur->regmap[hr]=reg;
338 cur->dirty&=~(1<<hr);
339 cur->dirty|=dirty<<hr;
340 cur->isconst&=~(1<<hr);
341}
342
343// Alloc cycle count into dedicated register
344static void alloc_cc(struct regstat *cur,int i)
345{
346 alloc_arm_reg(cur,i,CCREG,HOST_CCREG);
347}
348
349/* Assembler */
350
351static unused char regname[16][4] = {
352 "r0",
353 "r1",
354 "r2",
355 "r3",
356 "r4",
357 "r5",
358 "r6",
359 "r7",
360 "r8",
361 "r9",
362 "r10",
363 "fp",
364 "r12",
365 "sp",
366 "lr",
367 "pc"};
368
369static void output_w32(u_int word)
370{
371 *((u_int *)out)=word;
372 out+=4;
373}
374
375static u_int rd_rn_rm(u_int rd, u_int rn, u_int rm)
376{
377 assert(rd<16);
378 assert(rn<16);
379 assert(rm<16);
380 return((rn<<16)|(rd<<12)|rm);
381}
382
383static u_int rd_rn_imm_shift(u_int rd, u_int rn, u_int imm, u_int shift)
384{
385 assert(rd<16);
386 assert(rn<16);
387 assert(imm<256);
388 assert((shift&1)==0);
389 return((rn<<16)|(rd<<12)|(((32-shift)&30)<<7)|imm);
390}
391
392static u_int genimm(u_int imm,u_int *encoded)
393{
394 *encoded=0;
395 if(imm==0) return 1;
396 int i=32;
397 while(i>0)
398 {
399 if(imm<256) {
400 *encoded=((i&30)<<7)|imm;
401 return 1;
402 }
403 imm=(imm>>2)|(imm<<30);i-=2;
404 }
405 return 0;
406}
407
408static void genimm_checked(u_int imm,u_int *encoded)
409{
410 u_int ret=genimm(imm,encoded);
411 assert(ret);
412 (void)ret;
413}
414
415static u_int genjmp(u_int addr)
416{
417 if (addr < 3) return 0; // a branch that will be patched later
418 int offset = addr-(int)out-8;
419 if (offset < -33554432 || offset >= 33554432) {
420 SysPrintf("genjmp: out of range: %08x\n", offset);
421 abort();
422 return 0;
423 }
424 return ((u_int)offset>>2)&0xffffff;
425}
426
427static unused void emit_breakpoint(void)
428{
429 assem_debug("bkpt #0\n");
430 //output_w32(0xe1200070);
431 output_w32(0xe7f001f0);
432}
433
434static void emit_mov(int rs,int rt)
435{
436 assem_debug("mov %s,%s\n",regname[rt],regname[rs]);
437 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs));
438}
439
440static void emit_movs(int rs,int rt)
441{
442 assem_debug("movs %s,%s\n",regname[rt],regname[rs]);
443 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs));
444}
445
446static void emit_add(int rs1,int rs2,int rt)
447{
448 assem_debug("add %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
449 output_w32(0xe0800000|rd_rn_rm(rt,rs1,rs2));
450}
451
452static void emit_adcs(int rs1,int rs2,int rt)
453{
454 assem_debug("adcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
455 output_w32(0xe0b00000|rd_rn_rm(rt,rs1,rs2));
456}
457
458static void emit_neg(int rs, int rt)
459{
460 assem_debug("rsb %s,%s,#0\n",regname[rt],regname[rs]);
461 output_w32(0xe2600000|rd_rn_rm(rt,rs,0));
462}
463
464static void emit_sub(int rs1,int rs2,int rt)
465{
466 assem_debug("sub %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
467 output_w32(0xe0400000|rd_rn_rm(rt,rs1,rs2));
468}
469
470static void emit_zeroreg(int rt)
471{
472 assem_debug("mov %s,#0\n",regname[rt]);
473 output_w32(0xe3a00000|rd_rn_rm(rt,0,0));
474}
475
476static void emit_loadlp(u_int imm,u_int rt)
477{
478 add_literal((int)out,imm);
479 assem_debug("ldr %s,pc+? [=%x]\n",regname[rt],imm);
480 output_w32(0xe5900000|rd_rn_rm(rt,15,0));
481}
482
483static void emit_movw(u_int imm,u_int rt)
484{
485 assert(imm<65536);
486 assem_debug("movw %s,#%d (0x%x)\n",regname[rt],imm,imm);
487 output_w32(0xe3000000|rd_rn_rm(rt,0,0)|(imm&0xfff)|((imm<<4)&0xf0000));
488}
489
490static void emit_movt(u_int imm,u_int rt)
491{
492 assem_debug("movt %s,#%d (0x%x)\n",regname[rt],imm&0xffff0000,imm&0xffff0000);
493 output_w32(0xe3400000|rd_rn_rm(rt,0,0)|((imm>>16)&0xfff)|((imm>>12)&0xf0000));
494}
495
496static void emit_movimm(u_int imm,u_int rt)
497{
498 u_int armval;
499 if(genimm(imm,&armval)) {
500 assem_debug("mov %s,#%d\n",regname[rt],imm);
501 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
502 }else if(genimm(~imm,&armval)) {
503 assem_debug("mvn %s,#%d\n",regname[rt],imm);
504 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
505 }else if(imm<65536) {
506 #ifndef HAVE_ARMV7
507 assem_debug("mov %s,#%d\n",regname[rt],imm&0xFF00);
508 output_w32(0xe3a00000|rd_rn_imm_shift(rt,0,imm>>8,8));
509 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
510 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
511 #else
512 emit_movw(imm,rt);
513 #endif
514 }else{
515 #ifndef HAVE_ARMV7
516 emit_loadlp(imm,rt);
517 #else
518 emit_movw(imm&0x0000FFFF,rt);
519 emit_movt(imm&0xFFFF0000,rt);
520 #endif
521 }
522}
523
524static void emit_pcreladdr(u_int rt)
525{
526 assem_debug("add %s,pc,#?\n",regname[rt]);
527 output_w32(0xe2800000|rd_rn_rm(rt,15,0));
528}
529
530static void emit_loadreg(int r, int hr)
531{
532 if(r&64) {
533 SysPrintf("64bit load in 32bit mode!\n");
534 assert(0);
535 return;
536 }
537 if((r&63)==0)
538 emit_zeroreg(hr);
539 else {
540 int addr = (int)&psxRegs.GPR.r[r];
541 switch (r) {
542 //case HIREG: addr = &hi; break;
543 //case LOREG: addr = &lo; break;
544 case CCREG: addr = (int)&cycle_count; break;
545 case CSREG: addr = (int)&Status; break;
546 case INVCP: addr = (int)&invc_ptr; break;
547 default: assert(r < 34); break;
548 }
549 u_int offset = addr-(u_int)&dynarec_local;
550 assert(offset<4096);
551 assem_debug("ldr %s,fp+%d\n",regname[hr],offset);
552 output_w32(0xe5900000|rd_rn_rm(hr,FP,0)|offset);
553 }
554}
555
556static void emit_storereg(int r, int hr)
557{
558 if(r&64) {
559 SysPrintf("64bit store in 32bit mode!\n");
560 assert(0);
561 return;
562 }
563 int addr = (int)&psxRegs.GPR.r[r];
564 switch (r) {
565 //case HIREG: addr = &hi; break;
566 //case LOREG: addr = &lo; break;
567 case CCREG: addr = (int)&cycle_count; break;
568 default: assert(r < 34); break;
569 }
570 u_int offset = addr-(u_int)&dynarec_local;
571 assert(offset<4096);
572 assem_debug("str %s,fp+%d\n",regname[hr],offset);
573 output_w32(0xe5800000|rd_rn_rm(hr,FP,0)|offset);
574}
575
576static void emit_test(int rs, int rt)
577{
578 assem_debug("tst %s,%s\n",regname[rs],regname[rt]);
579 output_w32(0xe1100000|rd_rn_rm(0,rs,rt));
580}
581
582static void emit_testimm(int rs,int imm)
583{
584 u_int armval;
585 assem_debug("tst %s,#%d\n",regname[rs],imm);
586 genimm_checked(imm,&armval);
587 output_w32(0xe3100000|rd_rn_rm(0,rs,0)|armval);
588}
589
590static void emit_testeqimm(int rs,int imm)
591{
592 u_int armval;
593 assem_debug("tsteq %s,$%d\n",regname[rs],imm);
594 genimm_checked(imm,&armval);
595 output_w32(0x03100000|rd_rn_rm(0,rs,0)|armval);
596}
597
598static void emit_not(int rs,int rt)
599{
600 assem_debug("mvn %s,%s\n",regname[rt],regname[rs]);
601 output_w32(0xe1e00000|rd_rn_rm(rt,0,rs));
602}
603
604static void emit_mvnmi(int rs,int rt)
605{
606 assem_debug("mvnmi %s,%s\n",regname[rt],regname[rs]);
607 output_w32(0x41e00000|rd_rn_rm(rt,0,rs));
608}
609
610static void emit_and(u_int rs1,u_int rs2,u_int rt)
611{
612 assem_debug("and %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
613 output_w32(0xe0000000|rd_rn_rm(rt,rs1,rs2));
614}
615
616static void emit_or(u_int rs1,u_int rs2,u_int rt)
617{
618 assem_debug("orr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
619 output_w32(0xe1800000|rd_rn_rm(rt,rs1,rs2));
620}
621
622static void emit_orrshl_imm(u_int rs,u_int imm,u_int rt)
623{
624 assert(rs<16);
625 assert(rt<16);
626 assert(imm<32);
627 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs],imm);
628 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|(imm<<7));
629}
630
631static void emit_orrshr_imm(u_int rs,u_int imm,u_int rt)
632{
633 assert(rs<16);
634 assert(rt<16);
635 assert(imm<32);
636 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs],imm);
637 output_w32(0xe1800020|rd_rn_rm(rt,rt,rs)|(imm<<7));
638}
639
640static void emit_xor(u_int rs1,u_int rs2,u_int rt)
641{
642 assem_debug("eor %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
643 output_w32(0xe0200000|rd_rn_rm(rt,rs1,rs2));
644}
645
646static void emit_addimm(u_int rs,int imm,u_int rt)
647{
648 assert(rs<16);
649 assert(rt<16);
650 if(imm!=0) {
651 u_int armval;
652 if(genimm(imm,&armval)) {
653 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],imm);
654 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
655 }else if(genimm(-imm,&armval)) {
656 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],-imm);
657 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
658 #ifdef HAVE_ARMV7
659 }else if(rt!=rs&&(u_int)imm<65536) {
660 emit_movw(imm&0x0000ffff,rt);
661 emit_add(rs,rt,rt);
662 }else if(rt!=rs&&(u_int)-imm<65536) {
663 emit_movw(-imm&0x0000ffff,rt);
664 emit_sub(rs,rt,rt);
665 #endif
666 }else if((u_int)-imm<65536) {
667 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],(-imm)&0xFF00);
668 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
669 output_w32(0xe2400000|rd_rn_imm_shift(rt,rs,(-imm)>>8,8));
670 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
671 }else {
672 do {
673 int shift = (ffs(imm) - 1) & ~1;
674 int imm8 = imm & (0xff << shift);
675 genimm_checked(imm8,&armval);
676 assem_debug("add %s,%s,#0x%x\n",regname[rt],regname[rs],imm8);
677 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
678 rs = rt;
679 imm &= ~imm8;
680 }
681 while (imm != 0);
682 }
683 }
684 else if(rs!=rt) emit_mov(rs,rt);
685}
686
687static void emit_addimm_and_set_flags(int imm,int rt)
688{
689 assert(imm>-65536&&imm<65536);
690 u_int armval;
691 if(genimm(imm,&armval)) {
692 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm);
693 output_w32(0xe2900000|rd_rn_rm(rt,rt,0)|armval);
694 }else if(genimm(-imm,&armval)) {
695 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],imm);
696 output_w32(0xe2500000|rd_rn_rm(rt,rt,0)|armval);
697 }else if(imm<0) {
698 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF00);
699 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
700 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)>>8,8));
701 output_w32(0xe2500000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
702 }else{
703 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF00);
704 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
705 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm>>8,8));
706 output_w32(0xe2900000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
707 }
708}
709
710static void emit_addimm_no_flags(u_int imm,u_int rt)
711{
712 emit_addimm(rt,imm,rt);
713}
714
715static void emit_addnop(u_int r)
716{
717 assert(r<16);
718 assem_debug("add %s,%s,#0 (nop)\n",regname[r],regname[r]);
719 output_w32(0xe2800000|rd_rn_rm(r,r,0));
720}
721
722static void emit_andimm(int rs,int imm,int rt)
723{
724 u_int armval;
725 if(imm==0) {
726 emit_zeroreg(rt);
727 }else if(genimm(imm,&armval)) {
728 assem_debug("and %s,%s,#%d\n",regname[rt],regname[rs],imm);
729 output_w32(0xe2000000|rd_rn_rm(rt,rs,0)|armval);
730 }else if(genimm(~imm,&armval)) {
731 assem_debug("bic %s,%s,#%d\n",regname[rt],regname[rs],imm);
732 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|armval);
733 }else if(imm==65535) {
734 #ifndef HAVE_ARMV6
735 assem_debug("bic %s,%s,#FF000000\n",regname[rt],regname[rs]);
736 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|0x4FF);
737 assem_debug("bic %s,%s,#00FF0000\n",regname[rt],regname[rt]);
738 output_w32(0xe3c00000|rd_rn_rm(rt,rt,0)|0x8FF);
739 #else
740 assem_debug("uxth %s,%s\n",regname[rt],regname[rs]);
741 output_w32(0xe6ff0070|rd_rn_rm(rt,0,rs));
742 #endif
743 }else{
744 assert(imm>0&&imm<65535);
745 #ifndef HAVE_ARMV7
746 assem_debug("mov r14,#%d\n",imm&0xFF00);
747 output_w32(0xe3a00000|rd_rn_imm_shift(HOST_TEMPREG,0,imm>>8,8));
748 assem_debug("add r14,r14,#%d\n",imm&0xFF);
749 output_w32(0xe2800000|rd_rn_imm_shift(HOST_TEMPREG,HOST_TEMPREG,imm&0xff,0));
750 #else
751 emit_movw(imm,HOST_TEMPREG);
752 #endif
753 assem_debug("and %s,%s,r14\n",regname[rt],regname[rs]);
754 output_w32(0xe0000000|rd_rn_rm(rt,rs,HOST_TEMPREG));
755 }
756}
757
758static void emit_orimm(int rs,int imm,int rt)
759{
760 u_int armval;
761 if(imm==0) {
762 if(rs!=rt) emit_mov(rs,rt);
763 }else if(genimm(imm,&armval)) {
764 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm);
765 output_w32(0xe3800000|rd_rn_rm(rt,rs,0)|armval);
766 }else{
767 assert(imm>0&&imm<65536);
768 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
769 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
770 output_w32(0xe3800000|rd_rn_imm_shift(rt,rs,imm>>8,8));
771 output_w32(0xe3800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
772 }
773}
774
775static void emit_xorimm(int rs,int imm,int rt)
776{
777 u_int armval;
778 if(imm==0) {
779 if(rs!=rt) emit_mov(rs,rt);
780 }else if(genimm(imm,&armval)) {
781 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm);
782 output_w32(0xe2200000|rd_rn_rm(rt,rs,0)|armval);
783 }else{
784 assert(imm>0&&imm<65536);
785 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
786 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
787 output_w32(0xe2200000|rd_rn_imm_shift(rt,rs,imm>>8,8));
788 output_w32(0xe2200000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
789 }
790}
791
792static void emit_shlimm(int rs,u_int imm,int rt)
793{
794 assert(imm>0);
795 assert(imm<32);
796 //if(imm==1) ...
797 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
798 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
799}
800
801static void emit_lsls_imm(int rs,int imm,int rt)
802{
803 assert(imm>0);
804 assert(imm<32);
805 assem_debug("lsls %s,%s,#%d\n",regname[rt],regname[rs],imm);
806 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs)|(imm<<7));
807}
808
809static unused void emit_lslpls_imm(int rs,int imm,int rt)
810{
811 assert(imm>0);
812 assert(imm<32);
813 assem_debug("lslpls %s,%s,#%d\n",regname[rt],regname[rs],imm);
814 output_w32(0x51b00000|rd_rn_rm(rt,0,rs)|(imm<<7));
815}
816
817static void emit_shrimm(int rs,u_int imm,int rt)
818{
819 assert(imm>0);
820 assert(imm<32);
821 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
822 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
823}
824
825static void emit_sarimm(int rs,u_int imm,int rt)
826{
827 assert(imm>0);
828 assert(imm<32);
829 assem_debug("asr %s,%s,#%d\n",regname[rt],regname[rs],imm);
830 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x40|(imm<<7));
831}
832
833static void emit_rorimm(int rs,u_int imm,int rt)
834{
835 assert(imm>0);
836 assert(imm<32);
837 assem_debug("ror %s,%s,#%d\n",regname[rt],regname[rs],imm);
838 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x60|(imm<<7));
839}
840
841static void emit_signextend16(int rs,int rt)
842{
843 #ifndef HAVE_ARMV6
844 emit_shlimm(rs,16,rt);
845 emit_sarimm(rt,16,rt);
846 #else
847 assem_debug("sxth %s,%s\n",regname[rt],regname[rs]);
848 output_w32(0xe6bf0070|rd_rn_rm(rt,0,rs));
849 #endif
850}
851
852static void emit_signextend8(int rs,int rt)
853{
854 #ifndef HAVE_ARMV6
855 emit_shlimm(rs,24,rt);
856 emit_sarimm(rt,24,rt);
857 #else
858 assem_debug("sxtb %s,%s\n",regname[rt],regname[rs]);
859 output_w32(0xe6af0070|rd_rn_rm(rt,0,rs));
860 #endif
861}
862
863static void emit_shl(u_int rs,u_int shift,u_int rt)
864{
865 assert(rs<16);
866 assert(rt<16);
867 assert(shift<16);
868 //if(imm==1) ...
869 assem_debug("lsl %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
870 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x10|(shift<<8));
871}
872
873static void emit_shr(u_int rs,u_int shift,u_int rt)
874{
875 assert(rs<16);
876 assert(rt<16);
877 assert(shift<16);
878 assem_debug("lsr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
879 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x30|(shift<<8));
880}
881
882static void emit_sar(u_int rs,u_int shift,u_int rt)
883{
884 assert(rs<16);
885 assert(rt<16);
886 assert(shift<16);
887 assem_debug("asr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
888 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x50|(shift<<8));
889}
890
891static void emit_orrshl(u_int rs,u_int shift,u_int rt)
892{
893 assert(rs<16);
894 assert(rt<16);
895 assert(shift<16);
896 assem_debug("orr %s,%s,%s,lsl %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
897 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x10|(shift<<8));
898}
899
900static void emit_orrshr(u_int rs,u_int shift,u_int rt)
901{
902 assert(rs<16);
903 assert(rt<16);
904 assert(shift<16);
905 assem_debug("orr %s,%s,%s,lsr %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
906 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x30|(shift<<8));
907}
908
909static void emit_cmpimm(int rs,int imm)
910{
911 u_int armval;
912 if(genimm(imm,&armval)) {
913 assem_debug("cmp %s,#%d\n",regname[rs],imm);
914 output_w32(0xe3500000|rd_rn_rm(0,rs,0)|armval);
915 }else if(genimm(-imm,&armval)) {
916 assem_debug("cmn %s,#%d\n",regname[rs],imm);
917 output_w32(0xe3700000|rd_rn_rm(0,rs,0)|armval);
918 }else if(imm>0) {
919 assert(imm<65536);
920 emit_movimm(imm,HOST_TEMPREG);
921 assem_debug("cmp %s,r14\n",regname[rs]);
922 output_w32(0xe1500000|rd_rn_rm(0,rs,HOST_TEMPREG));
923 }else{
924 assert(imm>-65536);
925 emit_movimm(-imm,HOST_TEMPREG);
926 assem_debug("cmn %s,r14\n",regname[rs]);
927 output_w32(0xe1700000|rd_rn_rm(0,rs,HOST_TEMPREG));
928 }
929}
930
931static void emit_cmovne_imm(int imm,int rt)
932{
933 assem_debug("movne %s,#%d\n",regname[rt],imm);
934 u_int armval;
935 genimm_checked(imm,&armval);
936 output_w32(0x13a00000|rd_rn_rm(rt,0,0)|armval);
937}
938
939static void emit_cmovl_imm(int imm,int rt)
940{
941 assem_debug("movlt %s,#%d\n",regname[rt],imm);
942 u_int armval;
943 genimm_checked(imm,&armval);
944 output_w32(0xb3a00000|rd_rn_rm(rt,0,0)|armval);
945}
946
947static void emit_cmovb_imm(int imm,int rt)
948{
949 assem_debug("movcc %s,#%d\n",regname[rt],imm);
950 u_int armval;
951 genimm_checked(imm,&armval);
952 output_w32(0x33a00000|rd_rn_rm(rt,0,0)|armval);
953}
954
955static void emit_cmovne_reg(int rs,int rt)
956{
957 assem_debug("movne %s,%s\n",regname[rt],regname[rs]);
958 output_w32(0x11a00000|rd_rn_rm(rt,0,rs));
959}
960
961static void emit_cmovl_reg(int rs,int rt)
962{
963 assem_debug("movlt %s,%s\n",regname[rt],regname[rs]);
964 output_w32(0xb1a00000|rd_rn_rm(rt,0,rs));
965}
966
967static void emit_cmovs_reg(int rs,int rt)
968{
969 assem_debug("movmi %s,%s\n",regname[rt],regname[rs]);
970 output_w32(0x41a00000|rd_rn_rm(rt,0,rs));
971}
972
973static void emit_slti32(int rs,int imm,int rt)
974{
975 if(rs!=rt) emit_zeroreg(rt);
976 emit_cmpimm(rs,imm);
977 if(rs==rt) emit_movimm(0,rt);
978 emit_cmovl_imm(1,rt);
979}
980
981static void emit_sltiu32(int rs,int imm,int rt)
982{
983 if(rs!=rt) emit_zeroreg(rt);
984 emit_cmpimm(rs,imm);
985 if(rs==rt) emit_movimm(0,rt);
986 emit_cmovb_imm(1,rt);
987}
988
989static void emit_cmp(int rs,int rt)
990{
991 assem_debug("cmp %s,%s\n",regname[rs],regname[rt]);
992 output_w32(0xe1500000|rd_rn_rm(0,rs,rt));
993}
994
995static void emit_set_gz32(int rs, int rt)
996{
997 //assem_debug("set_gz32\n");
998 emit_cmpimm(rs,1);
999 emit_movimm(1,rt);
1000 emit_cmovl_imm(0,rt);
1001}
1002
1003static void emit_set_nz32(int rs, int rt)
1004{
1005 //assem_debug("set_nz32\n");
1006 if(rs!=rt) emit_movs(rs,rt);
1007 else emit_test(rs,rs);
1008 emit_cmovne_imm(1,rt);
1009}
1010
1011static void emit_set_if_less32(int rs1, int rs2, int rt)
1012{
1013 //assem_debug("set if less (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1014 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1015 emit_cmp(rs1,rs2);
1016 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1017 emit_cmovl_imm(1,rt);
1018}
1019
1020static void emit_set_if_carry32(int rs1, int rs2, int rt)
1021{
1022 //assem_debug("set if carry (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1023 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1024 emit_cmp(rs1,rs2);
1025 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1026 emit_cmovb_imm(1,rt);
1027}
1028
1029static void emit_call(const void *a_)
1030{
1031 int a = (int)a_;
1032 assem_debug("bl %x (%x+%x)%s\n",a,(int)out,a-(int)out-8,func_name(a_));
1033 u_int offset=genjmp(a);
1034 output_w32(0xeb000000|offset);
1035}
1036
1037static void emit_jmp(const void *a_)
1038{
1039 int a = (int)a_;
1040 assem_debug("b %x (%x+%x)%s\n",a,(int)out,a-(int)out-8,func_name(a_));
1041 u_int offset=genjmp(a);
1042 output_w32(0xea000000|offset);
1043}
1044
1045static void emit_jne(const void *a_)
1046{
1047 int a = (int)a_;
1048 assem_debug("bne %x\n",a);
1049 u_int offset=genjmp(a);
1050 output_w32(0x1a000000|offset);
1051}
1052
1053static void emit_jeq(const void *a_)
1054{
1055 int a = (int)a_;
1056 assem_debug("beq %x\n",a);
1057 u_int offset=genjmp(a);
1058 output_w32(0x0a000000|offset);
1059}
1060
1061static void emit_js(const void *a_)
1062{
1063 int a = (int)a_;
1064 assem_debug("bmi %x\n",a);
1065 u_int offset=genjmp(a);
1066 output_w32(0x4a000000|offset);
1067}
1068
1069static void emit_jns(const void *a_)
1070{
1071 int a = (int)a_;
1072 assem_debug("bpl %x\n",a);
1073 u_int offset=genjmp(a);
1074 output_w32(0x5a000000|offset);
1075}
1076
1077static void emit_jl(const void *a_)
1078{
1079 int a = (int)a_;
1080 assem_debug("blt %x\n",a);
1081 u_int offset=genjmp(a);
1082 output_w32(0xba000000|offset);
1083}
1084
1085static void emit_jge(const void *a_)
1086{
1087 int a = (int)a_;
1088 assem_debug("bge %x\n",a);
1089 u_int offset=genjmp(a);
1090 output_w32(0xaa000000|offset);
1091}
1092
1093static void emit_jno(const void *a_)
1094{
1095 int a = (int)a_;
1096 assem_debug("bvc %x\n",a);
1097 u_int offset=genjmp(a);
1098 output_w32(0x7a000000|offset);
1099}
1100
1101static void emit_jc(const void *a_)
1102{
1103 int a = (int)a_;
1104 assem_debug("bcs %x\n",a);
1105 u_int offset=genjmp(a);
1106 output_w32(0x2a000000|offset);
1107}
1108
1109static void emit_jcc(const void *a_)
1110{
1111 int a = (int)a_;
1112 assem_debug("bcc %x\n",a);
1113 u_int offset=genjmp(a);
1114 output_w32(0x3a000000|offset);
1115}
1116
1117static void emit_callreg(u_int r)
1118{
1119 assert(r<15);
1120 assem_debug("blx %s\n",regname[r]);
1121 output_w32(0xe12fff30|r);
1122}
1123
1124static void emit_jmpreg(u_int r)
1125{
1126 assem_debug("mov pc,%s\n",regname[r]);
1127 output_w32(0xe1a00000|rd_rn_rm(15,0,r));
1128}
1129
1130static void emit_ret(void)
1131{
1132 emit_jmpreg(14);
1133}
1134
1135static void emit_readword_indexed(int offset, int rs, int rt)
1136{
1137 assert(offset>-4096&&offset<4096);
1138 assem_debug("ldr %s,%s+%d\n",regname[rt],regname[rs],offset);
1139 if(offset>=0) {
1140 output_w32(0xe5900000|rd_rn_rm(rt,rs,0)|offset);
1141 }else{
1142 output_w32(0xe5100000|rd_rn_rm(rt,rs,0)|(-offset));
1143 }
1144}
1145
1146static void emit_readword_dualindexedx4(int rs1, int rs2, int rt)
1147{
1148 assem_debug("ldr %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1149 output_w32(0xe7900000|rd_rn_rm(rt,rs1,rs2)|0x100);
1150}
1151
1152static void emit_ldrcc_dualindexed(int rs1, int rs2, int rt)
1153{
1154 assem_debug("ldrcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1155 output_w32(0x37900000|rd_rn_rm(rt,rs1,rs2));
1156}
1157
1158static void emit_ldrccb_dualindexed(int rs1, int rs2, int rt)
1159{
1160 assem_debug("ldrccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1161 output_w32(0x37d00000|rd_rn_rm(rt,rs1,rs2));
1162}
1163
1164static void emit_ldrccsb_dualindexed(int rs1, int rs2, int rt)
1165{
1166 assem_debug("ldrccsb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1167 output_w32(0x319000d0|rd_rn_rm(rt,rs1,rs2));
1168}
1169
1170static void emit_ldrcch_dualindexed(int rs1, int rs2, int rt)
1171{
1172 assem_debug("ldrcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1173 output_w32(0x319000b0|rd_rn_rm(rt,rs1,rs2));
1174}
1175
1176static void emit_ldrccsh_dualindexed(int rs1, int rs2, int rt)
1177{
1178 assem_debug("ldrccsh %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1179 output_w32(0x319000f0|rd_rn_rm(rt,rs1,rs2));
1180}
1181
1182static void emit_movsbl_indexed(int offset, int rs, int rt)
1183{
1184 assert(offset>-256&&offset<256);
1185 assem_debug("ldrsb %s,%s+%d\n",regname[rt],regname[rs],offset);
1186 if(offset>=0) {
1187 output_w32(0xe1d000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1188 }else{
1189 output_w32(0xe15000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1190 }
1191}
1192
1193static void emit_movswl_indexed(int offset, int rs, int rt)
1194{
1195 assert(offset>-256&&offset<256);
1196 assem_debug("ldrsh %s,%s+%d\n",regname[rt],regname[rs],offset);
1197 if(offset>=0) {
1198 output_w32(0xe1d000f0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1199 }else{
1200 output_w32(0xe15000f0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1201 }
1202}
1203
1204static void emit_movzbl_indexed(int offset, int rs, int rt)
1205{
1206 assert(offset>-4096&&offset<4096);
1207 assem_debug("ldrb %s,%s+%d\n",regname[rt],regname[rs],offset);
1208 if(offset>=0) {
1209 output_w32(0xe5d00000|rd_rn_rm(rt,rs,0)|offset);
1210 }else{
1211 output_w32(0xe5500000|rd_rn_rm(rt,rs,0)|(-offset));
1212 }
1213}
1214
1215static void emit_movzwl_indexed(int offset, int rs, int rt)
1216{
1217 assert(offset>-256&&offset<256);
1218 assem_debug("ldrh %s,%s+%d\n",regname[rt],regname[rs],offset);
1219 if(offset>=0) {
1220 output_w32(0xe1d000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1221 }else{
1222 output_w32(0xe15000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1223 }
1224}
1225
1226static void emit_ldrd(int offset, int rs, int rt)
1227{
1228 assert(offset>-256&&offset<256);
1229 assem_debug("ldrd %s,%s+%d\n",regname[rt],regname[rs],offset);
1230 if(offset>=0) {
1231 output_w32(0xe1c000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1232 }else{
1233 output_w32(0xe14000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1234 }
1235}
1236
1237static void emit_readword(void *addr, int rt)
1238{
1239 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
1240 assert(offset<4096);
1241 assem_debug("ldr %s,fp+%d\n",regname[rt],offset);
1242 output_w32(0xe5900000|rd_rn_rm(rt,FP,0)|offset);
1243}
1244
1245static void emit_writeword_indexed(int rt, int offset, int rs)
1246{
1247 assert(offset>-4096&&offset<4096);
1248 assem_debug("str %s,%s+%d\n",regname[rt],regname[rs],offset);
1249 if(offset>=0) {
1250 output_w32(0xe5800000|rd_rn_rm(rt,rs,0)|offset);
1251 }else{
1252 output_w32(0xe5000000|rd_rn_rm(rt,rs,0)|(-offset));
1253 }
1254}
1255
1256static void emit_writehword_indexed(int rt, int offset, int rs)
1257{
1258 assert(offset>-256&&offset<256);
1259 assem_debug("strh %s,%s+%d\n",regname[rt],regname[rs],offset);
1260 if(offset>=0) {
1261 output_w32(0xe1c000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1262 }else{
1263 output_w32(0xe14000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1264 }
1265}
1266
1267static void emit_writebyte_indexed(int rt, int offset, int rs)
1268{
1269 assert(offset>-4096&&offset<4096);
1270 assem_debug("strb %s,%s+%d\n",regname[rt],regname[rs],offset);
1271 if(offset>=0) {
1272 output_w32(0xe5c00000|rd_rn_rm(rt,rs,0)|offset);
1273 }else{
1274 output_w32(0xe5400000|rd_rn_rm(rt,rs,0)|(-offset));
1275 }
1276}
1277
1278static void emit_strcc_dualindexed(int rs1, int rs2, int rt)
1279{
1280 assem_debug("strcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1281 output_w32(0x37800000|rd_rn_rm(rt,rs1,rs2));
1282}
1283
1284static void emit_strccb_dualindexed(int rs1, int rs2, int rt)
1285{
1286 assem_debug("strccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1287 output_w32(0x37c00000|rd_rn_rm(rt,rs1,rs2));
1288}
1289
1290static void emit_strcch_dualindexed(int rs1, int rs2, int rt)
1291{
1292 assem_debug("strcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1293 output_w32(0x318000b0|rd_rn_rm(rt,rs1,rs2));
1294}
1295
1296static void emit_writeword(int rt, void *addr)
1297{
1298 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
1299 assert(offset<4096);
1300 assem_debug("str %s,fp+%d\n",regname[rt],offset);
1301 output_w32(0xe5800000|rd_rn_rm(rt,FP,0)|offset);
1302}
1303
1304static void emit_umull(u_int rs1, u_int rs2, u_int hi, u_int lo)
1305{
1306 assem_debug("umull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
1307 assert(rs1<16);
1308 assert(rs2<16);
1309 assert(hi<16);
1310 assert(lo<16);
1311 output_w32(0xe0800090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
1312}
1313
1314static void emit_smull(u_int rs1, u_int rs2, u_int hi, u_int lo)
1315{
1316 assem_debug("smull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
1317 assert(rs1<16);
1318 assert(rs2<16);
1319 assert(hi<16);
1320 assert(lo<16);
1321 output_w32(0xe0c00090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
1322}
1323
1324static void emit_clz(int rs,int rt)
1325{
1326 assem_debug("clz %s,%s\n",regname[rt],regname[rs]);
1327 output_w32(0xe16f0f10|rd_rn_rm(rt,0,rs));
1328}
1329
1330static void emit_subcs(int rs1,int rs2,int rt)
1331{
1332 assem_debug("subcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1333 output_w32(0x20400000|rd_rn_rm(rt,rs1,rs2));
1334}
1335
1336static void emit_shrcc_imm(int rs,u_int imm,int rt)
1337{
1338 assert(imm>0);
1339 assert(imm<32);
1340 assem_debug("lsrcc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1341 output_w32(0x31a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
1342}
1343
1344static void emit_shrne_imm(int rs,u_int imm,int rt)
1345{
1346 assert(imm>0);
1347 assert(imm<32);
1348 assem_debug("lsrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
1349 output_w32(0x11a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
1350}
1351
1352static void emit_negmi(int rs, int rt)
1353{
1354 assem_debug("rsbmi %s,%s,#0\n",regname[rt],regname[rs]);
1355 output_w32(0x42600000|rd_rn_rm(rt,rs,0));
1356}
1357
1358static void emit_negsmi(int rs, int rt)
1359{
1360 assem_debug("rsbsmi %s,%s,#0\n",regname[rt],regname[rs]);
1361 output_w32(0x42700000|rd_rn_rm(rt,rs,0));
1362}
1363
1364static void emit_bic_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
1365{
1366 assem_debug("bic %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
1367 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
1368}
1369
1370static void emit_bic_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
1371{
1372 assem_debug("bic %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
1373 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
1374}
1375
1376static void emit_teq(int rs, int rt)
1377{
1378 assem_debug("teq %s,%s\n",regname[rs],regname[rt]);
1379 output_w32(0xe1300000|rd_rn_rm(0,rs,rt));
1380}
1381
1382static void emit_rsbimm(int rs, int imm, int rt)
1383{
1384 u_int armval;
1385 genimm_checked(imm,&armval);
1386 assem_debug("rsb %s,%s,#%d\n",regname[rt],regname[rs],imm);
1387 output_w32(0xe2600000|rd_rn_rm(rt,rs,0)|armval);
1388}
1389
1390// Conditionally select one of two immediates, optimizing for small code size
1391// This will only be called if HAVE_CMOV_IMM is defined
1392static void emit_cmov2imm_e_ne_compact(int imm1,int imm2,u_int rt)
1393{
1394 u_int armval;
1395 if(genimm(imm2-imm1,&armval)) {
1396 emit_movimm(imm1,rt);
1397 assem_debug("addne %s,%s,#%d\n",regname[rt],regname[rt],imm2-imm1);
1398 output_w32(0x12800000|rd_rn_rm(rt,rt,0)|armval);
1399 }else if(genimm(imm1-imm2,&armval)) {
1400 emit_movimm(imm1,rt);
1401 assem_debug("subne %s,%s,#%d\n",regname[rt],regname[rt],imm1-imm2);
1402 output_w32(0x12400000|rd_rn_rm(rt,rt,0)|armval);
1403 }
1404 else {
1405 #ifndef HAVE_ARMV7
1406 emit_movimm(imm1,rt);
1407 add_literal((int)out,imm2);
1408 assem_debug("ldrne %s,pc+? [=%x]\n",regname[rt],imm2);
1409 output_w32(0x15900000|rd_rn_rm(rt,15,0));
1410 #else
1411 emit_movw(imm1&0x0000FFFF,rt);
1412 if((imm1&0xFFFF)!=(imm2&0xFFFF)) {
1413 assem_debug("movwne %s,#%d (0x%x)\n",regname[rt],imm2&0xFFFF,imm2&0xFFFF);
1414 output_w32(0x13000000|rd_rn_rm(rt,0,0)|(imm2&0xfff)|((imm2<<4)&0xf0000));
1415 }
1416 emit_movt(imm1&0xFFFF0000,rt);
1417 if((imm1&0xFFFF0000)!=(imm2&0xFFFF0000)) {
1418 assem_debug("movtne %s,#%d (0x%x)\n",regname[rt],imm2&0xffff0000,imm2&0xffff0000);
1419 output_w32(0x13400000|rd_rn_rm(rt,0,0)|((imm2>>16)&0xfff)|((imm2>>12)&0xf0000));
1420 }
1421 #endif
1422 }
1423}
1424
1425// special case for checking invalid_code
1426static void emit_cmpmem_indexedsr12_reg(int base,int r,int imm)
1427{
1428 assert(imm<128&&imm>=0);
1429 assert(r>=0&&r<16);
1430 assem_debug("ldrb lr,%s,%s lsr #12\n",regname[base],regname[r]);
1431 output_w32(0xe7d00000|rd_rn_rm(HOST_TEMPREG,base,r)|0x620);
1432 emit_cmpimm(HOST_TEMPREG,imm);
1433}
1434
1435static void emit_callne(int a)
1436{
1437 assem_debug("blne %x\n",a);
1438 u_int offset=genjmp(a);
1439 output_w32(0x1b000000|offset);
1440}
1441
1442// Used to preload hash table entries
1443static unused void emit_prefetchreg(int r)
1444{
1445 assem_debug("pld %s\n",regname[r]);
1446 output_w32(0xf5d0f000|rd_rn_rm(0,r,0));
1447}
1448
1449// Special case for mini_ht
1450static void emit_ldreq_indexed(int rs, u_int offset, int rt)
1451{
1452 assert(offset<4096);
1453 assem_debug("ldreq %s,[%s, #%d]\n",regname[rt],regname[rs],offset);
1454 output_w32(0x05900000|rd_rn_rm(rt,rs,0)|offset);
1455}
1456
1457static void emit_orrne_imm(int rs,int imm,int rt)
1458{
1459 u_int armval;
1460 genimm_checked(imm,&armval);
1461 assem_debug("orrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
1462 output_w32(0x13800000|rd_rn_rm(rt,rs,0)|armval);
1463}
1464
1465static void emit_andne_imm(int rs,int imm,int rt)
1466{
1467 u_int armval;
1468 genimm_checked(imm,&armval);
1469 assem_debug("andne %s,%s,#%d\n",regname[rt],regname[rs],imm);
1470 output_w32(0x12000000|rd_rn_rm(rt,rs,0)|armval);
1471}
1472
1473static unused void emit_addpl_imm(int rs,int imm,int rt)
1474{
1475 u_int armval;
1476 genimm_checked(imm,&armval);
1477 assem_debug("addpl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1478 output_w32(0x52800000|rd_rn_rm(rt,rs,0)|armval);
1479}
1480
1481static void emit_jno_unlikely(int a)
1482{
1483 //emit_jno(a);
1484 assem_debug("addvc pc,pc,#? (%x)\n",/*a-(int)out-8,*/a);
1485 output_w32(0x72800000|rd_rn_rm(15,15,0));
1486}
1487
1488static void save_regs_all(u_int reglist)
1489{
1490 int i;
1491 if(!reglist) return;
1492 assem_debug("stmia fp,{");
1493 for(i=0;i<16;i++)
1494 if(reglist&(1<<i))
1495 assem_debug("r%d,",i);
1496 assem_debug("}\n");
1497 output_w32(0xe88b0000|reglist);
1498}
1499
1500static void restore_regs_all(u_int reglist)
1501{
1502 int i;
1503 if(!reglist) return;
1504 assem_debug("ldmia fp,{");
1505 for(i=0;i<16;i++)
1506 if(reglist&(1<<i))
1507 assem_debug("r%d,",i);
1508 assem_debug("}\n");
1509 output_w32(0xe89b0000|reglist);
1510}
1511
1512// Save registers before function call
1513static void save_regs(u_int reglist)
1514{
1515 reglist&=CALLER_SAVE_REGS; // only save the caller-save registers, r0-r3, r12
1516 save_regs_all(reglist);
1517}
1518
1519// Restore registers after function call
1520static void restore_regs(u_int reglist)
1521{
1522 reglist&=CALLER_SAVE_REGS;
1523 restore_regs_all(reglist);
1524}
1525
1526/* Stubs/epilogue */
1527
1528static void literal_pool(int n)
1529{
1530 if(!literalcount) return;
1531 if(n) {
1532 if((int)out-literals[0][0]<4096-n) return;
1533 }
1534 u_int *ptr;
1535 int i;
1536 for(i=0;i<literalcount;i++)
1537 {
1538 u_int l_addr=(u_int)out;
1539 int j;
1540 for(j=0;j<i;j++) {
1541 if(literals[j][1]==literals[i][1]) {
1542 //printf("dup %08x\n",literals[i][1]);
1543 l_addr=literals[j][0];
1544 break;
1545 }
1546 }
1547 ptr=(u_int *)literals[i][0];
1548 u_int offset=l_addr-(u_int)ptr-8;
1549 assert(offset<4096);
1550 assert(!(offset&3));
1551 *ptr|=offset;
1552 if(l_addr==(u_int)out) {
1553 literals[i][0]=l_addr; // remember for dupes
1554 output_w32(literals[i][1]);
1555 }
1556 }
1557 literalcount=0;
1558}
1559
1560static void literal_pool_jumpover(int n)
1561{
1562 if(!literalcount) return;
1563 if(n) {
1564 if((int)out-literals[0][0]<4096-n) return;
1565 }
1566 void *jaddr = out;
1567 emit_jmp(0);
1568 literal_pool(0);
1569 set_jump_target(jaddr, out);
1570}
1571
1572// parsed by get_pointer, find_extjump_insn
1573static void emit_extjump2(u_char *addr, u_int target, void *linker)
1574{
1575 u_char *ptr=(u_char *)addr;
1576 assert((ptr[3]&0x0e)==0xa);
1577 (void)ptr;
1578
1579 emit_loadlp(target,0);
1580 emit_loadlp((u_int)addr,1);
1581 assert(addr>=translation_cache&&addr<(translation_cache+(1<<TARGET_SIZE_2)));
1582 //assert((target>=0x80000000&&target<0x80800000)||(target>0xA4000000&&target<0xA4001000));
1583//DEBUG >
1584#ifdef DEBUG_CYCLE_COUNT
1585 emit_readword(&last_count,ECX);
1586 emit_add(HOST_CCREG,ECX,HOST_CCREG);
1587 emit_readword(&next_interupt,ECX);
1588 emit_writeword(HOST_CCREG,&Count);
1589 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
1590 emit_writeword(ECX,&last_count);
1591#endif
1592//DEBUG <
1593 emit_jmp(linker);
1594}
1595
1596static void check_extjump2(void *src)
1597{
1598 u_int *ptr = src;
1599 assert((ptr[1] & 0x0fff0000) == 0x059f0000); // ldr rx, [pc, #ofs]
1600 (void)ptr;
1601}
1602
1603// put rt_val into rt, potentially making use of rs with value rs_val
1604static void emit_movimm_from(u_int rs_val,int rs,u_int rt_val,int rt)
1605{
1606 u_int armval;
1607 int diff;
1608 if(genimm(rt_val,&armval)) {
1609 assem_debug("mov %s,#%d\n",regname[rt],rt_val);
1610 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
1611 return;
1612 }
1613 if(genimm(~rt_val,&armval)) {
1614 assem_debug("mvn %s,#%d\n",regname[rt],rt_val);
1615 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
1616 return;
1617 }
1618 diff=rt_val-rs_val;
1619 if(genimm(diff,&armval)) {
1620 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],diff);
1621 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
1622 return;
1623 }else if(genimm(-diff,&armval)) {
1624 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],-diff);
1625 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
1626 return;
1627 }
1628 emit_movimm(rt_val,rt);
1629}
1630
1631// return 1 if above function can do it's job cheaply
1632static int is_similar_value(u_int v1,u_int v2)
1633{
1634 u_int xs;
1635 int diff;
1636 if(v1==v2) return 1;
1637 diff=v2-v1;
1638 for(xs=diff;xs!=0&&(xs&3)==0;xs>>=2)
1639 ;
1640 if(xs<0x100) return 1;
1641 for(xs=-diff;xs!=0&&(xs&3)==0;xs>>=2)
1642 ;
1643 if(xs<0x100) return 1;
1644 return 0;
1645}
1646
1647static void mov_loadtype_adj(enum stub_type type,int rs,int rt)
1648{
1649 switch(type) {
1650 case LOADB_STUB: emit_signextend8(rs,rt); break;
1651 case LOADBU_STUB: emit_andimm(rs,0xff,rt); break;
1652 case LOADH_STUB: emit_signextend16(rs,rt); break;
1653 case LOADHU_STUB: emit_andimm(rs,0xffff,rt); break;
1654 case LOADW_STUB: if(rs!=rt) emit_mov(rs,rt); break;
1655 default: assert(0);
1656 }
1657}
1658
1659#include "pcsxmem.h"
1660#include "pcsxmem_inline.c"
1661
1662static void do_readstub(int n)
1663{
1664 assem_debug("do_readstub %x\n",start+stubs[n].a*4);
1665 literal_pool(256);
1666 set_jump_target(stubs[n].addr, out);
1667 enum stub_type type=stubs[n].type;
1668 int i=stubs[n].a;
1669 int rs=stubs[n].b;
1670 struct regstat *i_regs=(struct regstat *)stubs[n].c;
1671 u_int reglist=stubs[n].e;
1672 signed char *i_regmap=i_regs->regmap;
1673 int rt;
1674 if(itype[i]==C1LS||itype[i]==C2LS||itype[i]==LOADLR) {
1675 rt=get_reg(i_regmap,FTEMP);
1676 }else{
1677 rt=get_reg(i_regmap,rt1[i]);
1678 }
1679 assert(rs>=0);
1680 int r,temp=-1,temp2=HOST_TEMPREG,regs_saved=0;
1681 void *restore_jump = NULL;
1682 reglist|=(1<<rs);
1683 for(r=0;r<=12;r++) {
1684 if(((1<<r)&0x13ff)&&((1<<r)&reglist)==0) {
1685 temp=r; break;
1686 }
1687 }
1688 if(rt>=0&&rt1[i]!=0)
1689 reglist&=~(1<<rt);
1690 if(temp==-1) {
1691 save_regs(reglist);
1692 regs_saved=1;
1693 temp=(rs==0)?2:0;
1694 }
1695 if((regs_saved||(reglist&2)==0)&&temp!=1&&rs!=1)
1696 temp2=1;
1697 emit_readword(&mem_rtab,temp);
1698 emit_shrimm(rs,12,temp2);
1699 emit_readword_dualindexedx4(temp,temp2,temp2);
1700 emit_lsls_imm(temp2,1,temp2);
1701 if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) {
1702 switch(type) {
1703 case LOADB_STUB: emit_ldrccsb_dualindexed(temp2,rs,rt); break;
1704 case LOADBU_STUB: emit_ldrccb_dualindexed(temp2,rs,rt); break;
1705 case LOADH_STUB: emit_ldrccsh_dualindexed(temp2,rs,rt); break;
1706 case LOADHU_STUB: emit_ldrcch_dualindexed(temp2,rs,rt); break;
1707 case LOADW_STUB: emit_ldrcc_dualindexed(temp2,rs,rt); break;
1708 default: assert(0);
1709 }
1710 }
1711 if(regs_saved) {
1712 restore_jump=out;
1713 emit_jcc(0); // jump to reg restore
1714 }
1715 else
1716 emit_jcc(stubs[n].retaddr); // return address
1717
1718 if(!regs_saved)
1719 save_regs(reglist);
1720 void *handler=NULL;
1721 if(type==LOADB_STUB||type==LOADBU_STUB)
1722 handler=jump_handler_read8;
1723 if(type==LOADH_STUB||type==LOADHU_STUB)
1724 handler=jump_handler_read16;
1725 if(type==LOADW_STUB)
1726 handler=jump_handler_read32;
1727 assert(handler);
1728 pass_args(rs,temp2);
1729 int cc=get_reg(i_regmap,CCREG);
1730 if(cc<0)
1731 emit_loadreg(CCREG,2);
1732 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n].d+1),2);
1733 emit_call(handler);
1734 if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) {
1735 mov_loadtype_adj(type,0,rt);
1736 }
1737 if(restore_jump)
1738 set_jump_target(restore_jump, out);
1739 restore_regs(reglist);
1740 emit_jmp(stubs[n].retaddr); // return address
1741}
1742
1743static void inline_readstub(enum stub_type type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
1744{
1745 int rs=get_reg(regmap,target);
1746 int rt=get_reg(regmap,target);
1747 if(rs<0) rs=get_reg(regmap,-1);
1748 assert(rs>=0);
1749 u_int is_dynamic,far_call=0;
1750 uintptr_t host_addr = 0;
1751 void *handler;
1752 int cc=get_reg(regmap,CCREG);
1753 if(pcsx_direct_read(type,addr,CLOCK_ADJUST(adj+1),cc,target?rs:-1,rt))
1754 return;
1755 handler = get_direct_memhandler(mem_rtab, addr, type, &host_addr);
1756 if (handler == NULL) {
1757 if(rt<0||rt1[i]==0)
1758 return;
1759 if(addr!=host_addr)
1760 emit_movimm_from(addr,rs,host_addr,rs);
1761 switch(type) {
1762 case LOADB_STUB: emit_movsbl_indexed(0,rs,rt); break;
1763 case LOADBU_STUB: emit_movzbl_indexed(0,rs,rt); break;
1764 case LOADH_STUB: emit_movswl_indexed(0,rs,rt); break;
1765 case LOADHU_STUB: emit_movzwl_indexed(0,rs,rt); break;
1766 case LOADW_STUB: emit_readword_indexed(0,rs,rt); break;
1767 default: assert(0);
1768 }
1769 return;
1770 }
1771 is_dynamic=pcsxmem_is_handler_dynamic(addr);
1772 if(is_dynamic) {
1773 if(type==LOADB_STUB||type==LOADBU_STUB)
1774 handler=jump_handler_read8;
1775 if(type==LOADH_STUB||type==LOADHU_STUB)
1776 handler=jump_handler_read16;
1777 if(type==LOADW_STUB)
1778 handler=jump_handler_read32;
1779 }
1780
1781 // call a memhandler
1782 if(rt>=0&&rt1[i]!=0)
1783 reglist&=~(1<<rt);
1784 save_regs(reglist);
1785 if(target==0)
1786 emit_movimm(addr,0);
1787 else if(rs!=0)
1788 emit_mov(rs,0);
1789 int offset=(u_char *)handler-out-8;
1790 if(offset<-33554432||offset>=33554432) {
1791 // unreachable memhandler, a plugin func perhaps
1792 emit_movimm((u_int)handler,12);
1793 far_call=1;
1794 }
1795 if(cc<0)
1796 emit_loadreg(CCREG,2);
1797 if(is_dynamic) {
1798 emit_movimm(((u_int *)mem_rtab)[addr>>12]<<1,1);
1799 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2);
1800 }
1801 else {
1802 emit_readword(&last_count,3);
1803 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2);
1804 emit_add(2,3,2);
1805 emit_writeword(2,&Count);
1806 }
1807
1808 if(far_call)
1809 emit_callreg(12);
1810 else
1811 emit_call(handler);
1812
1813 if(rt>=0&&rt1[i]!=0) {
1814 switch(type) {
1815 case LOADB_STUB: emit_signextend8(0,rt); break;
1816 case LOADBU_STUB: emit_andimm(0,0xff,rt); break;
1817 case LOADH_STUB: emit_signextend16(0,rt); break;
1818 case LOADHU_STUB: emit_andimm(0,0xffff,rt); break;
1819 case LOADW_STUB: if(rt!=0) emit_mov(0,rt); break;
1820 default: assert(0);
1821 }
1822 }
1823 restore_regs(reglist);
1824}
1825
1826static void do_writestub(int n)
1827{
1828 assem_debug("do_writestub %x\n",start+stubs[n].a*4);
1829 literal_pool(256);
1830 set_jump_target(stubs[n].addr, out);
1831 enum stub_type type=stubs[n].type;
1832 int i=stubs[n].a;
1833 int rs=stubs[n].b;
1834 struct regstat *i_regs=(struct regstat *)stubs[n].c;
1835 u_int reglist=stubs[n].e;
1836 signed char *i_regmap=i_regs->regmap;
1837 int rt,r;
1838 if(itype[i]==C1LS||itype[i]==C2LS) {
1839 rt=get_reg(i_regmap,r=FTEMP);
1840 }else{
1841 rt=get_reg(i_regmap,r=rs2[i]);
1842 }
1843 assert(rs>=0);
1844 assert(rt>=0);
1845 int rtmp,temp=-1,temp2=HOST_TEMPREG,regs_saved=0;
1846 void *restore_jump = NULL;
1847 int reglist2=reglist|(1<<rs)|(1<<rt);
1848 for(rtmp=0;rtmp<=12;rtmp++) {
1849 if(((1<<rtmp)&0x13ff)&&((1<<rtmp)&reglist2)==0) {
1850 temp=rtmp; break;
1851 }
1852 }
1853 if(temp==-1) {
1854 save_regs(reglist);
1855 regs_saved=1;
1856 for(rtmp=0;rtmp<=3;rtmp++)
1857 if(rtmp!=rs&&rtmp!=rt)
1858 {temp=rtmp;break;}
1859 }
1860 if((regs_saved||(reglist2&8)==0)&&temp!=3&&rs!=3&&rt!=3)
1861 temp2=3;
1862 emit_readword(&mem_wtab,temp);
1863 emit_shrimm(rs,12,temp2);
1864 emit_readword_dualindexedx4(temp,temp2,temp2);
1865 emit_lsls_imm(temp2,1,temp2);
1866 switch(type) {
1867 case STOREB_STUB: emit_strccb_dualindexed(temp2,rs,rt); break;
1868 case STOREH_STUB: emit_strcch_dualindexed(temp2,rs,rt); break;
1869 case STOREW_STUB: emit_strcc_dualindexed(temp2,rs,rt); break;
1870 default: assert(0);
1871 }
1872 if(regs_saved) {
1873 restore_jump=out;
1874 emit_jcc(0); // jump to reg restore
1875 }
1876 else
1877 emit_jcc(stubs[n].retaddr); // return address (invcode check)
1878
1879 if(!regs_saved)
1880 save_regs(reglist);
1881 void *handler=NULL;
1882 switch(type) {
1883 case STOREB_STUB: handler=jump_handler_write8; break;
1884 case STOREH_STUB: handler=jump_handler_write16; break;
1885 case STOREW_STUB: handler=jump_handler_write32; break;
1886 default: assert(0);
1887 }
1888 assert(handler);
1889 pass_args(rs,rt);
1890 if(temp2!=3)
1891 emit_mov(temp2,3);
1892 int cc=get_reg(i_regmap,CCREG);
1893 if(cc<0)
1894 emit_loadreg(CCREG,2);
1895 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n].d+1),2);
1896 // returns new cycle_count
1897 emit_call(handler);
1898 emit_addimm(0,-CLOCK_ADJUST((int)stubs[n].d+1),cc<0?2:cc);
1899 if(cc<0)
1900 emit_storereg(CCREG,2);
1901 if(restore_jump)
1902 set_jump_target(restore_jump, out);
1903 restore_regs(reglist);
1904 emit_jmp(stubs[n].retaddr);
1905}
1906
1907static void inline_writestub(enum stub_type type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
1908{
1909 int rs=get_reg(regmap,-1);
1910 int rt=get_reg(regmap,target);
1911 assert(rs>=0);
1912 assert(rt>=0);
1913 uintptr_t host_addr = 0;
1914 void *handler = get_direct_memhandler(mem_wtab, addr, type, &host_addr);
1915 if (handler == NULL) {
1916 if(addr!=host_addr)
1917 emit_movimm_from(addr,rs,host_addr,rs);
1918 switch(type) {
1919 case STOREB_STUB: emit_writebyte_indexed(rt,0,rs); break;
1920 case STOREH_STUB: emit_writehword_indexed(rt,0,rs); break;
1921 case STOREW_STUB: emit_writeword_indexed(rt,0,rs); break;
1922 default: assert(0);
1923 }
1924 return;
1925 }
1926
1927 // call a memhandler
1928 save_regs(reglist);
1929 pass_args(rs,rt);
1930 int cc=get_reg(regmap,CCREG);
1931 if(cc<0)
1932 emit_loadreg(CCREG,2);
1933 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2);
1934 emit_movimm((u_int)handler,3);
1935 // returns new cycle_count
1936 emit_call(jump_handler_write_h);
1937 emit_addimm(0,-CLOCK_ADJUST(adj+1),cc<0?2:cc);
1938 if(cc<0)
1939 emit_storereg(CCREG,2);
1940 restore_regs(reglist);
1941}
1942
1943static void do_unalignedwritestub(int n)
1944{
1945 assem_debug("do_unalignedwritestub %x\n",start+stubs[n].a*4);
1946 literal_pool(256);
1947 set_jump_target(stubs[n].addr, out);
1948
1949 int i=stubs[n].a;
1950 struct regstat *i_regs=(struct regstat *)stubs[n].c;
1951 int addr=stubs[n].b;
1952 u_int reglist=stubs[n].e;
1953 signed char *i_regmap=i_regs->regmap;
1954 int temp2=get_reg(i_regmap,FTEMP);
1955 int rt;
1956 rt=get_reg(i_regmap,rs2[i]);
1957 assert(rt>=0);
1958 assert(addr>=0);
1959 assert(opcode[i]==0x2a||opcode[i]==0x2e); // SWL/SWR only implemented
1960 reglist|=(1<<addr);
1961 reglist&=~(1<<temp2);
1962
1963#if 1
1964 // don't bother with it and call write handler
1965 save_regs(reglist);
1966 pass_args(addr,rt);
1967 int cc=get_reg(i_regmap,CCREG);
1968 if(cc<0)
1969 emit_loadreg(CCREG,2);
1970 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n].d+1),2);
1971 emit_call((opcode[i]==0x2a?jump_handle_swl:jump_handle_swr));
1972 emit_addimm(0,-CLOCK_ADJUST((int)stubs[n].d+1),cc<0?2:cc);
1973 if(cc<0)
1974 emit_storereg(CCREG,2);
1975 restore_regs(reglist);
1976 emit_jmp(stubs[n].retaddr); // return address
1977#else
1978 emit_andimm(addr,0xfffffffc,temp2);
1979 emit_writeword(temp2,&address);
1980
1981 save_regs(reglist);
1982 emit_shrimm(addr,16,1);
1983 int cc=get_reg(i_regmap,CCREG);
1984 if(cc<0) {
1985 emit_loadreg(CCREG,2);
1986 }
1987 emit_movimm((u_int)readmem,0);
1988 emit_addimm(cc<0?2:cc,2*stubs[n].d+2,2);
1989 emit_call((int)&indirect_jump_indexed);
1990 restore_regs(reglist);
1991
1992 emit_readword(&readmem_dword,temp2);
1993 int temp=addr; //hmh
1994 emit_shlimm(addr,3,temp);
1995 emit_andimm(temp,24,temp);
1996#ifdef BIG_ENDIAN_MIPS
1997 if (opcode[i]==0x2e) // SWR
1998#else
1999 if (opcode[i]==0x2a) // SWL
2000#endif
2001 emit_xorimm(temp,24,temp);
2002 emit_movimm(-1,HOST_TEMPREG);
2003 if (opcode[i]==0x2a) { // SWL
2004 emit_bic_lsr(temp2,HOST_TEMPREG,temp,temp2);
2005 emit_orrshr(rt,temp,temp2);
2006 }else{
2007 emit_bic_lsl(temp2,HOST_TEMPREG,temp,temp2);
2008 emit_orrshl(rt,temp,temp2);
2009 }
2010 emit_readword(&address,addr);
2011 emit_writeword(temp2,&word);
2012 //save_regs(reglist); // don't need to, no state changes
2013 emit_shrimm(addr,16,1);
2014 emit_movimm((u_int)writemem,0);
2015 //emit_call((int)&indirect_jump_indexed);
2016 emit_mov(15,14);
2017 emit_readword_dualindexedx4(0,1,15);
2018 emit_readword(&Count,HOST_TEMPREG);
2019 emit_readword(&next_interupt,2);
2020 emit_addimm(HOST_TEMPREG,-2*stubs[n].d-2,HOST_TEMPREG);
2021 emit_writeword(2,&last_count);
2022 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2023 if(cc<0) {
2024 emit_storereg(CCREG,HOST_TEMPREG);
2025 }
2026 restore_regs(reglist);
2027 emit_jmp(stubs[n].retaddr); // return address
2028#endif
2029}
2030
2031// this output is parsed by verify_dirty, get_bounds, isclean, get_clean_addr
2032static void do_dirty_stub_emit_args(u_int arg0)
2033{
2034 #ifndef HAVE_ARMV7
2035 emit_loadlp((int)source, 1);
2036 emit_loadlp((int)copy, 2);
2037 emit_loadlp(slen*4, 3);
2038 #else
2039 emit_movw(((u_int)source)&0x0000FFFF, 1);
2040 emit_movw(((u_int)copy)&0x0000FFFF, 2);
2041 emit_movt(((u_int)source)&0xFFFF0000, 1);
2042 emit_movt(((u_int)copy)&0xFFFF0000, 2);
2043 emit_movw(slen*4, 3);
2044 #endif
2045 emit_movimm(arg0, 0);
2046}
2047
2048static void *do_dirty_stub(int i)
2049{
2050 assem_debug("do_dirty_stub %x\n",start+i*4);
2051 do_dirty_stub_emit_args(start + i*4);
2052 emit_call(verify_code);
2053 void *entry = out;
2054 load_regs_entry(i);
2055 if (entry == out)
2056 entry = instr_addr[i];
2057 emit_jmp(instr_addr[i]);
2058 return entry;
2059}
2060
2061static void do_dirty_stub_ds()
2062{
2063 do_dirty_stub_emit_args(start + 1);
2064 emit_call(verify_code_ds);
2065}
2066
2067/* Special assem */
2068
2069static void shift_assemble_arm(int i,struct regstat *i_regs)
2070{
2071 if(rt1[i]) {
2072 if(opcode2[i]<=0x07) // SLLV/SRLV/SRAV
2073 {
2074 signed char s,t,shift;
2075 t=get_reg(i_regs->regmap,rt1[i]);
2076 s=get_reg(i_regs->regmap,rs1[i]);
2077 shift=get_reg(i_regs->regmap,rs2[i]);
2078 if(t>=0){
2079 if(rs1[i]==0)
2080 {
2081 emit_zeroreg(t);
2082 }
2083 else if(rs2[i]==0)
2084 {
2085 assert(s>=0);
2086 if(s!=t) emit_mov(s,t);
2087 }
2088 else
2089 {
2090 emit_andimm(shift,31,HOST_TEMPREG);
2091 if(opcode2[i]==4) // SLLV
2092 {
2093 emit_shl(s,HOST_TEMPREG,t);
2094 }
2095 if(opcode2[i]==6) // SRLV
2096 {
2097 emit_shr(s,HOST_TEMPREG,t);
2098 }
2099 if(opcode2[i]==7) // SRAV
2100 {
2101 emit_sar(s,HOST_TEMPREG,t);
2102 }
2103 }
2104 }
2105 } else { // DSLLV/DSRLV/DSRAV
2106 signed char sh,sl,th,tl,shift;
2107 th=get_reg(i_regs->regmap,rt1[i]|64);
2108 tl=get_reg(i_regs->regmap,rt1[i]);
2109 sh=get_reg(i_regs->regmap,rs1[i]|64);
2110 sl=get_reg(i_regs->regmap,rs1[i]);
2111 shift=get_reg(i_regs->regmap,rs2[i]);
2112 if(tl>=0){
2113 if(rs1[i]==0)
2114 {
2115 emit_zeroreg(tl);
2116 if(th>=0) emit_zeroreg(th);
2117 }
2118 else if(rs2[i]==0)
2119 {
2120 assert(sl>=0);
2121 if(sl!=tl) emit_mov(sl,tl);
2122 if(th>=0&&sh!=th) emit_mov(sh,th);
2123 }
2124 else
2125 {
2126 // FIXME: What if shift==tl ?
2127 assert(shift!=tl);
2128 int temp=get_reg(i_regs->regmap,-1);
2129 int real_th=th;
2130 if(th<0&&opcode2[i]!=0x14) {th=temp;} // DSLLV doesn't need a temporary register
2131 assert(sl>=0);
2132 assert(sh>=0);
2133 emit_andimm(shift,31,HOST_TEMPREG);
2134 if(opcode2[i]==0x14) // DSLLV
2135 {
2136 if(th>=0) emit_shl(sh,HOST_TEMPREG,th);
2137 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
2138 emit_orrshr(sl,HOST_TEMPREG,th);
2139 emit_andimm(shift,31,HOST_TEMPREG);
2140 emit_testimm(shift,32);
2141 emit_shl(sl,HOST_TEMPREG,tl);
2142 if(th>=0) emit_cmovne_reg(tl,th);
2143 emit_cmovne_imm(0,tl);
2144 }
2145 if(opcode2[i]==0x16) // DSRLV
2146 {
2147 assert(th>=0);
2148 emit_shr(sl,HOST_TEMPREG,tl);
2149 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
2150 emit_orrshl(sh,HOST_TEMPREG,tl);
2151 emit_andimm(shift,31,HOST_TEMPREG);
2152 emit_testimm(shift,32);
2153 emit_shr(sh,HOST_TEMPREG,th);
2154 emit_cmovne_reg(th,tl);
2155 if(real_th>=0) emit_cmovne_imm(0,th);
2156 }
2157 if(opcode2[i]==0x17) // DSRAV
2158 {
2159 assert(th>=0);
2160 emit_shr(sl,HOST_TEMPREG,tl);
2161 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
2162 if(real_th>=0) {
2163 assert(temp>=0);
2164 emit_sarimm(th,31,temp);
2165 }
2166 emit_orrshl(sh,HOST_TEMPREG,tl);
2167 emit_andimm(shift,31,HOST_TEMPREG);
2168 emit_testimm(shift,32);
2169 emit_sar(sh,HOST_TEMPREG,th);
2170 emit_cmovne_reg(th,tl);
2171 if(real_th>=0) emit_cmovne_reg(temp,th);
2172 }
2173 }
2174 }
2175 }
2176 }
2177}
2178#define shift_assemble shift_assemble_arm
2179
2180static void loadlr_assemble_arm(int i,struct regstat *i_regs)
2181{
2182 int s,tl,temp,temp2,addr;
2183 int offset;
2184 void *jaddr=0;
2185 int memtarget=0,c=0;
2186 int fastio_reg_override=-1;
2187 u_int hr,reglist=0;
2188 tl=get_reg(i_regs->regmap,rt1[i]);
2189 s=get_reg(i_regs->regmap,rs1[i]);
2190 temp=get_reg(i_regs->regmap,-1);
2191 temp2=get_reg(i_regs->regmap,FTEMP);
2192 addr=get_reg(i_regs->regmap,AGEN1+(i&1));
2193 assert(addr<0);
2194 offset=imm[i];
2195 for(hr=0;hr<HOST_REGS;hr++) {
2196 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
2197 }
2198 reglist|=1<<temp;
2199 if(offset||s<0||c) addr=temp2;
2200 else addr=s;
2201 if(s>=0) {
2202 c=(i_regs->wasconst>>s)&1;
2203 if(c) {
2204 memtarget=((signed int)(constmap[i][s]+offset))<(signed int)0x80000000+RAM_SIZE;
2205 }
2206 }
2207 if(!c) {
2208 emit_shlimm(addr,3,temp);
2209 if (opcode[i]==0x22||opcode[i]==0x26) {
2210 emit_andimm(addr,0xFFFFFFFC,temp2); // LWL/LWR
2211 }else{
2212 emit_andimm(addr,0xFFFFFFF8,temp2); // LDL/LDR
2213 }
2214 jaddr=emit_fastpath_cmp_jump(i,temp2,&fastio_reg_override);
2215 }
2216 else {
2217 if(ram_offset&&memtarget) {
2218 host_tempreg_acquire();
2219 emit_addimm(temp2,ram_offset,HOST_TEMPREG);
2220 fastio_reg_override=HOST_TEMPREG;
2221 }
2222 if (opcode[i]==0x22||opcode[i]==0x26) {
2223 emit_movimm(((constmap[i][s]+offset)<<3)&24,temp); // LWL/LWR
2224 }else{
2225 emit_movimm(((constmap[i][s]+offset)<<3)&56,temp); // LDL/LDR
2226 }
2227 }
2228 if (opcode[i]==0x22||opcode[i]==0x26) { // LWL/LWR
2229 if(!c||memtarget) {
2230 int a=temp2;
2231 if(fastio_reg_override>=0) a=fastio_reg_override;
2232 emit_readword_indexed(0,a,temp2);
2233 if(fastio_reg_override==HOST_TEMPREG) host_tempreg_release();
2234 if(jaddr) add_stub_r(LOADW_STUB,jaddr,out,i,temp2,i_regs,ccadj[i],reglist);
2235 }
2236 else
2237 inline_readstub(LOADW_STUB,i,(constmap[i][s]+offset)&0xFFFFFFFC,i_regs->regmap,FTEMP,ccadj[i],reglist);
2238 if(rt1[i]) {
2239 assert(tl>=0);
2240 emit_andimm(temp,24,temp);
2241#ifdef BIG_ENDIAN_MIPS
2242 if (opcode[i]==0x26) // LWR
2243#else
2244 if (opcode[i]==0x22) // LWL
2245#endif
2246 emit_xorimm(temp,24,temp);
2247 emit_movimm(-1,HOST_TEMPREG);
2248 if (opcode[i]==0x26) {
2249 emit_shr(temp2,temp,temp2);
2250 emit_bic_lsr(tl,HOST_TEMPREG,temp,tl);
2251 }else{
2252 emit_shl(temp2,temp,temp2);
2253 emit_bic_lsl(tl,HOST_TEMPREG,temp,tl);
2254 }
2255 emit_or(temp2,tl,tl);
2256 }
2257 //emit_storereg(rt1[i],tl); // DEBUG
2258 }
2259 if (opcode[i]==0x1A||opcode[i]==0x1B) { // LDL/LDR
2260 assert(0);
2261 }
2262}
2263#define loadlr_assemble loadlr_assemble_arm
2264
2265static void c2op_prologue(u_int op,u_int reglist)
2266{
2267 save_regs_all(reglist);
2268#ifdef PCNT
2269 emit_movimm(op,0);
2270 emit_call((int)pcnt_gte_start);
2271#endif
2272 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0); // cop2 regs
2273}
2274
2275static void c2op_epilogue(u_int op,u_int reglist)
2276{
2277#ifdef PCNT
2278 emit_movimm(op,0);
2279 emit_call((int)pcnt_gte_end);
2280#endif
2281 restore_regs_all(reglist);
2282}
2283
2284static void c2op_call_MACtoIR(int lm,int need_flags)
2285{
2286 if(need_flags)
2287 emit_call(lm?gteMACtoIR_lm1:gteMACtoIR_lm0);
2288 else
2289 emit_call(lm?gteMACtoIR_lm1_nf:gteMACtoIR_lm0_nf);
2290}
2291
2292static void c2op_call_rgb_func(void *func,int lm,int need_ir,int need_flags)
2293{
2294 emit_call(func);
2295 // func is C code and trashes r0
2296 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
2297 if(need_flags||need_ir)
2298 c2op_call_MACtoIR(lm,need_flags);
2299 emit_call(need_flags?gteMACtoRGB:gteMACtoRGB_nf);
2300}
2301
2302static void c2op_assemble(int i,struct regstat *i_regs)
2303{
2304 u_int c2op=source[i]&0x3f;
2305 u_int hr,reglist_full=0,reglist;
2306 int need_flags,need_ir;
2307 for(hr=0;hr<HOST_REGS;hr++) {
2308 if(i_regs->regmap[hr]>=0) reglist_full|=1<<hr;
2309 }
2310 reglist=reglist_full&CALLER_SAVE_REGS;
2311
2312 if (gte_handlers[c2op]!=NULL) {
2313 need_flags=!(gte_unneeded[i+1]>>63); // +1 because of how liveness detection works
2314 need_ir=(gte_unneeded[i+1]&0xe00)!=0xe00;
2315 assem_debug("gte op %08x, unneeded %016llx, need_flags %d, need_ir %d\n",
2316 source[i],gte_unneeded[i+1],need_flags,need_ir);
2317 if(new_dynarec_hacks&NDHACK_GTE_NO_FLAGS)
2318 need_flags=0;
2319 int shift = (source[i] >> 19) & 1;
2320 int lm = (source[i] >> 10) & 1;
2321 switch(c2op) {
2322#ifndef DRC_DBG
2323 case GTE_MVMVA: {
2324#ifdef HAVE_ARMV5
2325 int v = (source[i] >> 15) & 3;
2326 int cv = (source[i] >> 13) & 3;
2327 int mx = (source[i] >> 17) & 3;
2328 reglist=reglist_full&(CALLER_SAVE_REGS|0xf0); // +{r4-r7}
2329 c2op_prologue(c2op,reglist);
2330 /* r4,r5 = VXYZ(v) packed; r6 = &MX11(mx); r7 = &CV1(cv) */
2331 if(v<3)
2332 emit_ldrd(v*8,0,4);
2333 else {
2334 emit_movzwl_indexed(9*4,0,4); // gteIR
2335 emit_movzwl_indexed(10*4,0,6);
2336 emit_movzwl_indexed(11*4,0,5);
2337 emit_orrshl_imm(6,16,4);
2338 }
2339 if(mx<3)
2340 emit_addimm(0,32*4+mx*8*4,6);
2341 else
2342 emit_readword(&zeromem_ptr,6);
2343 if(cv<3)
2344 emit_addimm(0,32*4+(cv*8+5)*4,7);
2345 else
2346 emit_readword(&zeromem_ptr,7);
2347#ifdef __ARM_NEON__
2348 emit_movimm(source[i],1); // opcode
2349 emit_call(gteMVMVA_part_neon);
2350 if(need_flags) {
2351 emit_movimm(lm,1);
2352 emit_call(gteMACtoIR_flags_neon);
2353 }
2354#else
2355 if(cv==3&&shift)
2356 emit_call((int)gteMVMVA_part_cv3sh12_arm);
2357 else {
2358 emit_movimm(shift,1);
2359 emit_call((int)(need_flags?gteMVMVA_part_arm:gteMVMVA_part_nf_arm));
2360 }
2361 if(need_flags||need_ir)
2362 c2op_call_MACtoIR(lm,need_flags);
2363#endif
2364#else /* if not HAVE_ARMV5 */
2365 c2op_prologue(c2op,reglist);
2366 emit_movimm(source[i],1); // opcode
2367 emit_writeword(1,&psxRegs.code);
2368 emit_call((int)(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]));
2369#endif
2370 break;
2371 }
2372 case GTE_OP:
2373 c2op_prologue(c2op,reglist);
2374 emit_call(shift?gteOP_part_shift:gteOP_part_noshift);
2375 if(need_flags||need_ir) {
2376 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
2377 c2op_call_MACtoIR(lm,need_flags);
2378 }
2379 break;
2380 case GTE_DPCS:
2381 c2op_prologue(c2op,reglist);
2382 c2op_call_rgb_func(shift?gteDPCS_part_shift:gteDPCS_part_noshift,lm,need_ir,need_flags);
2383 break;
2384 case GTE_INTPL:
2385 c2op_prologue(c2op,reglist);
2386 c2op_call_rgb_func(shift?gteINTPL_part_shift:gteINTPL_part_noshift,lm,need_ir,need_flags);
2387 break;
2388 case GTE_SQR:
2389 c2op_prologue(c2op,reglist);
2390 emit_call(shift?gteSQR_part_shift:gteSQR_part_noshift);
2391 if(need_flags||need_ir) {
2392 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
2393 c2op_call_MACtoIR(lm,need_flags);
2394 }
2395 break;
2396 case GTE_DCPL:
2397 c2op_prologue(c2op,reglist);
2398 c2op_call_rgb_func(gteDCPL_part,lm,need_ir,need_flags);
2399 break;
2400 case GTE_GPF:
2401 c2op_prologue(c2op,reglist);
2402 c2op_call_rgb_func(shift?gteGPF_part_shift:gteGPF_part_noshift,lm,need_ir,need_flags);
2403 break;
2404 case GTE_GPL:
2405 c2op_prologue(c2op,reglist);
2406 c2op_call_rgb_func(shift?gteGPL_part_shift:gteGPL_part_noshift,lm,need_ir,need_flags);
2407 break;
2408#endif
2409 default:
2410 c2op_prologue(c2op,reglist);
2411#ifdef DRC_DBG
2412 emit_movimm(source[i],1); // opcode
2413 emit_writeword(1,&psxRegs.code);
2414#endif
2415 emit_call(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]);
2416 break;
2417 }
2418 c2op_epilogue(c2op,reglist);
2419 }
2420}
2421
2422static void multdiv_assemble_arm(int i,struct regstat *i_regs)
2423{
2424 // case 0x18: MULT
2425 // case 0x19: MULTU
2426 // case 0x1A: DIV
2427 // case 0x1B: DIVU
2428 // case 0x1C: DMULT
2429 // case 0x1D: DMULTU
2430 // case 0x1E: DDIV
2431 // case 0x1F: DDIVU
2432 if(rs1[i]&&rs2[i])
2433 {
2434 if((opcode2[i]&4)==0) // 32-bit
2435 {
2436 if(opcode2[i]==0x18) // MULT
2437 {
2438 signed char m1=get_reg(i_regs->regmap,rs1[i]);
2439 signed char m2=get_reg(i_regs->regmap,rs2[i]);
2440 signed char hi=get_reg(i_regs->regmap,HIREG);
2441 signed char lo=get_reg(i_regs->regmap,LOREG);
2442 assert(m1>=0);
2443 assert(m2>=0);
2444 assert(hi>=0);
2445 assert(lo>=0);
2446 emit_smull(m1,m2,hi,lo);
2447 }
2448 if(opcode2[i]==0x19) // MULTU
2449 {
2450 signed char m1=get_reg(i_regs->regmap,rs1[i]);
2451 signed char m2=get_reg(i_regs->regmap,rs2[i]);
2452 signed char hi=get_reg(i_regs->regmap,HIREG);
2453 signed char lo=get_reg(i_regs->regmap,LOREG);
2454 assert(m1>=0);
2455 assert(m2>=0);
2456 assert(hi>=0);
2457 assert(lo>=0);
2458 emit_umull(m1,m2,hi,lo);
2459 }
2460 if(opcode2[i]==0x1A) // DIV
2461 {
2462 signed char d1=get_reg(i_regs->regmap,rs1[i]);
2463 signed char d2=get_reg(i_regs->regmap,rs2[i]);
2464 assert(d1>=0);
2465 assert(d2>=0);
2466 signed char quotient=get_reg(i_regs->regmap,LOREG);
2467 signed char remainder=get_reg(i_regs->regmap,HIREG);
2468 assert(quotient>=0);
2469 assert(remainder>=0);
2470 emit_movs(d1,remainder);
2471 emit_movimm(0xffffffff,quotient);
2472 emit_negmi(quotient,quotient); // .. quotient and ..
2473 emit_negmi(remainder,remainder); // .. remainder for div0 case (will be negated back after jump)
2474 emit_movs(d2,HOST_TEMPREG);
2475 emit_jeq(out+52); // Division by zero
2476 emit_negsmi(HOST_TEMPREG,HOST_TEMPREG);
2477#ifdef HAVE_ARMV5
2478 emit_clz(HOST_TEMPREG,quotient);
2479 emit_shl(HOST_TEMPREG,quotient,HOST_TEMPREG);
2480#else
2481 emit_movimm(0,quotient);
2482 emit_addpl_imm(quotient,1,quotient);
2483 emit_lslpls_imm(HOST_TEMPREG,1,HOST_TEMPREG);
2484 emit_jns(out-2*4);
2485#endif
2486 emit_orimm(quotient,1<<31,quotient);
2487 emit_shr(quotient,quotient,quotient);
2488 emit_cmp(remainder,HOST_TEMPREG);
2489 emit_subcs(remainder,HOST_TEMPREG,remainder);
2490 emit_adcs(quotient,quotient,quotient);
2491 emit_shrimm(HOST_TEMPREG,1,HOST_TEMPREG);
2492 emit_jcc(out-16); // -4
2493 emit_teq(d1,d2);
2494 emit_negmi(quotient,quotient);
2495 emit_test(d1,d1);
2496 emit_negmi(remainder,remainder);
2497 }
2498 if(opcode2[i]==0x1B) // DIVU
2499 {
2500 signed char d1=get_reg(i_regs->regmap,rs1[i]); // dividend
2501 signed char d2=get_reg(i_regs->regmap,rs2[i]); // divisor
2502 assert(d1>=0);
2503 assert(d2>=0);
2504 signed char quotient=get_reg(i_regs->regmap,LOREG);
2505 signed char remainder=get_reg(i_regs->regmap,HIREG);
2506 assert(quotient>=0);
2507 assert(remainder>=0);
2508 emit_mov(d1,remainder);
2509 emit_movimm(0xffffffff,quotient); // div0 case
2510 emit_test(d2,d2);
2511 emit_jeq(out+40); // Division by zero
2512#ifdef HAVE_ARMV5
2513 emit_clz(d2,HOST_TEMPREG);
2514 emit_movimm(1<<31,quotient);
2515 emit_shl(d2,HOST_TEMPREG,d2);
2516#else
2517 emit_movimm(0,HOST_TEMPREG);
2518 emit_addpl_imm(HOST_TEMPREG,1,HOST_TEMPREG);
2519 emit_lslpls_imm(d2,1,d2);
2520 emit_jns(out-2*4);
2521 emit_movimm(1<<31,quotient);
2522#endif
2523 emit_shr(quotient,HOST_TEMPREG,quotient);
2524 emit_cmp(remainder,d2);
2525 emit_subcs(remainder,d2,remainder);
2526 emit_adcs(quotient,quotient,quotient);
2527 emit_shrcc_imm(d2,1,d2);
2528 emit_jcc(out-16); // -4
2529 }
2530 }
2531 else // 64-bit
2532 assert(0);
2533 }
2534 else
2535 {
2536 // Multiply by zero is zero.
2537 // MIPS does not have a divide by zero exception.
2538 // The result is undefined, we return zero.
2539 signed char hr=get_reg(i_regs->regmap,HIREG);
2540 signed char lr=get_reg(i_regs->regmap,LOREG);
2541 if(hr>=0) emit_zeroreg(hr);
2542 if(lr>=0) emit_zeroreg(lr);
2543 }
2544}
2545#define multdiv_assemble multdiv_assemble_arm
2546
2547static void do_jump_vaddr(int rs)
2548{
2549 emit_jmp(jump_vaddr_reg[rs]);
2550}
2551
2552static void do_preload_rhash(int r) {
2553 // Don't need this for ARM. On x86, this puts the value 0xf8 into the
2554 // register. On ARM the hash can be done with a single instruction (below)
2555}
2556
2557static void do_preload_rhtbl(int ht) {
2558 emit_addimm(FP,(int)&mini_ht-(int)&dynarec_local,ht);
2559}
2560
2561static void do_rhash(int rs,int rh) {
2562 emit_andimm(rs,0xf8,rh);
2563}
2564
2565static void do_miniht_load(int ht,int rh) {
2566 assem_debug("ldr %s,[%s,%s]!\n",regname[rh],regname[ht],regname[rh]);
2567 output_w32(0xe7b00000|rd_rn_rm(rh,ht,rh));
2568}
2569
2570static void do_miniht_jump(int rs,int rh,int ht) {
2571 emit_cmp(rh,rs);
2572 emit_ldreq_indexed(ht,4,15);
2573 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
2574 if(rs!=7)
2575 emit_mov(rs,7);
2576 rs=7;
2577 #endif
2578 do_jump_vaddr(rs);
2579}
2580
2581static void do_miniht_insert(u_int return_address,int rt,int temp) {
2582 #ifndef HAVE_ARMV7
2583 emit_movimm(return_address,rt); // PC into link register
2584 add_to_linker(out,return_address,1);
2585 emit_pcreladdr(temp);
2586 emit_writeword(rt,&mini_ht[(return_address&0xFF)>>3][0]);
2587 emit_writeword(temp,&mini_ht[(return_address&0xFF)>>3][1]);
2588 #else
2589 emit_movw(return_address&0x0000FFFF,rt);
2590 add_to_linker(out,return_address,1);
2591 emit_pcreladdr(temp);
2592 emit_writeword(temp,&mini_ht[(return_address&0xFF)>>3][1]);
2593 emit_movt(return_address&0xFFFF0000,rt);
2594 emit_writeword(rt,&mini_ht[(return_address&0xFF)>>3][0]);
2595 #endif
2596}
2597
2598static void mark_clear_cache(void *target)
2599{
2600 u_long offset = (u_char *)target - translation_cache;
2601 u_int mask = 1u << ((offset >> 12) & 31);
2602 if (!(needs_clear_cache[offset >> 17] & mask)) {
2603 char *start = (char *)((u_long)target & ~4095ul);
2604 start_tcache_write(start, start + 4096);
2605 needs_clear_cache[offset >> 17] |= mask;
2606 }
2607}
2608
2609// Clearing the cache is rather slow on ARM Linux, so mark the areas
2610// that need to be cleared, and then only clear these areas once.
2611static void do_clear_cache()
2612{
2613 int i,j;
2614 for (i=0;i<(1<<(TARGET_SIZE_2-17));i++)
2615 {
2616 u_int bitmap=needs_clear_cache[i];
2617 if(bitmap) {
2618 u_char *start, *end;
2619 for(j=0;j<32;j++)
2620 {
2621 if(bitmap&(1<<j)) {
2622 start=translation_cache+i*131072+j*4096;
2623 end=start+4095;
2624 j++;
2625 while(j<32) {
2626 if(bitmap&(1<<j)) {
2627 end+=4096;
2628 j++;
2629 }else{
2630 end_tcache_write(start, end);
2631 break;
2632 }
2633 }
2634 }
2635 }
2636 needs_clear_cache[i]=0;
2637 }
2638 }
2639}
2640
2641// CPU-architecture-specific initialization
2642static void arch_init() {
2643}
2644
2645// vim:shiftwidth=2:expandtab