drc: arm64 wip
[pcsx_rearmed.git] / libpcsxcore / new_dynarec / assem_arm.c
... / ...
CommitLineData
1/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2 * Mupen64plus/PCSX - assem_arm.c *
3 * Copyright (C) 2009-2011 Ari64 *
4 * Copyright (C) 2010-2011 GraÅžvydas "notaz" Ignotas *
5 * *
6 * This program is free software; you can redistribute it and/or modify *
7 * it under the terms of the GNU General Public License as published by *
8 * the Free Software Foundation; either version 2 of the License, or *
9 * (at your option) any later version. *
10 * *
11 * This program is distributed in the hope that it will be useful, *
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
14 * GNU General Public License for more details. *
15 * *
16 * You should have received a copy of the GNU General Public License *
17 * along with this program; if not, write to the *
18 * Free Software Foundation, Inc., *
19 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
20 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
21
22#include "../gte.h"
23#define FLAGLESS
24#include "../gte.h"
25#undef FLAGLESS
26#include "../gte_arm.h"
27#include "../gte_neon.h"
28#include "pcnt.h"
29#include "arm_features.h"
30
31#if defined(BASE_ADDR_FIXED)
32#elif defined(BASE_ADDR_DYNAMIC)
33u_char *translation_cache;
34#else
35u_char translation_cache[1 << TARGET_SIZE_2] __attribute__((aligned(4096)));
36#endif
37
38#ifndef __MACH__
39#define CALLER_SAVE_REGS 0x100f
40#else
41#define CALLER_SAVE_REGS 0x120f
42#endif
43
44#define unused __attribute__((unused))
45
46#ifdef DRC_DBG
47#pragma GCC diagnostic ignored "-Wunused-function"
48#pragma GCC diagnostic ignored "-Wunused-variable"
49#pragma GCC diagnostic ignored "-Wunused-but-set-variable"
50#endif
51
52void indirect_jump_indexed();
53void indirect_jump();
54void do_interrupt();
55void jump_vaddr_r0();
56void jump_vaddr_r1();
57void jump_vaddr_r2();
58void jump_vaddr_r3();
59void jump_vaddr_r4();
60void jump_vaddr_r5();
61void jump_vaddr_r6();
62void jump_vaddr_r7();
63void jump_vaddr_r8();
64void jump_vaddr_r9();
65void jump_vaddr_r10();
66void jump_vaddr_r12();
67
68void * const jump_vaddr_reg[16] = {
69 jump_vaddr_r0,
70 jump_vaddr_r1,
71 jump_vaddr_r2,
72 jump_vaddr_r3,
73 jump_vaddr_r4,
74 jump_vaddr_r5,
75 jump_vaddr_r6,
76 jump_vaddr_r7,
77 jump_vaddr_r8,
78 jump_vaddr_r9,
79 jump_vaddr_r10,
80 0,
81 jump_vaddr_r12,
82 0,
83 0,
84 0
85};
86
87void invalidate_addr_r0();
88void invalidate_addr_r1();
89void invalidate_addr_r2();
90void invalidate_addr_r3();
91void invalidate_addr_r4();
92void invalidate_addr_r5();
93void invalidate_addr_r6();
94void invalidate_addr_r7();
95void invalidate_addr_r8();
96void invalidate_addr_r9();
97void invalidate_addr_r10();
98void invalidate_addr_r12();
99
100const u_int invalidate_addr_reg[16] = {
101 (int)invalidate_addr_r0,
102 (int)invalidate_addr_r1,
103 (int)invalidate_addr_r2,
104 (int)invalidate_addr_r3,
105 (int)invalidate_addr_r4,
106 (int)invalidate_addr_r5,
107 (int)invalidate_addr_r6,
108 (int)invalidate_addr_r7,
109 (int)invalidate_addr_r8,
110 (int)invalidate_addr_r9,
111 (int)invalidate_addr_r10,
112 0,
113 (int)invalidate_addr_r12,
114 0,
115 0,
116 0};
117
118static u_int needs_clear_cache[1<<(TARGET_SIZE_2-17)];
119
120/* Linker */
121
122static void set_jump_target(void *addr, void *target_)
123{
124 u_int target = (u_int)target_;
125 u_char *ptr = addr;
126 u_int *ptr2=(u_int *)ptr;
127 if(ptr[3]==0xe2) {
128 assert((target-(u_int)ptr2-8)<1024);
129 assert(((uintptr_t)addr&3)==0);
130 assert((target&3)==0);
131 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
132 //printf("target=%x addr=%p insn=%x\n",target,addr,*ptr2);
133 }
134 else if(ptr[3]==0x72) {
135 // generated by emit_jno_unlikely
136 if((target-(u_int)ptr2-8)<1024) {
137 assert(((uintptr_t)addr&3)==0);
138 assert((target&3)==0);
139 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
140 }
141 else if((target-(u_int)ptr2-8)<4096&&!((target-(u_int)ptr2-8)&15)) {
142 assert(((uintptr_t)addr&3)==0);
143 assert((target&3)==0);
144 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>4)|0xE00;
145 }
146 else *ptr2=(0x7A000000)|(((target-(u_int)ptr2-8)<<6)>>8);
147 }
148 else {
149 assert((ptr[3]&0x0e)==0xa);
150 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
151 }
152}
153
154// This optionally copies the instruction from the target of the branch into
155// the space before the branch. Works, but the difference in speed is
156// usually insignificant.
157#if 0
158static void set_jump_target_fillslot(int addr,u_int target,int copy)
159{
160 u_char *ptr=(u_char *)addr;
161 u_int *ptr2=(u_int *)ptr;
162 assert(!copy||ptr2[-1]==0xe28dd000);
163 if(ptr[3]==0xe2) {
164 assert(!copy);
165 assert((target-(u_int)ptr2-8)<4096);
166 *ptr2=(*ptr2&0xFFFFF000)|(target-(u_int)ptr2-8);
167 }
168 else {
169 assert((ptr[3]&0x0e)==0xa);
170 u_int target_insn=*(u_int *)target;
171 if((target_insn&0x0e100000)==0) { // ALU, no immediate, no flags
172 copy=0;
173 }
174 if((target_insn&0x0c100000)==0x04100000) { // Load
175 copy=0;
176 }
177 if(target_insn&0x08000000) {
178 copy=0;
179 }
180 if(copy) {
181 ptr2[-1]=target_insn;
182 target+=4;
183 }
184 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
185 }
186}
187#endif
188
189/* Literal pool */
190static void add_literal(int addr,int val)
191{
192 assert(literalcount<sizeof(literals)/sizeof(literals[0]));
193 literals[literalcount][0]=addr;
194 literals[literalcount][1]=val;
195 literalcount++;
196}
197
198// from a pointer to external jump stub (which was produced by emit_extjump2)
199// find where the jumping insn is
200static void *find_extjump_insn(void *stub)
201{
202 int *ptr=(int *)(stub+4);
203 assert((*ptr&0x0fff0000)==0x059f0000); // ldr rx, [pc, #ofs]
204 u_int offset=*ptr&0xfff;
205 void **l_ptr=(void *)ptr+offset+8;
206 return *l_ptr;
207}
208
209// find where external branch is liked to using addr of it's stub:
210// get address that insn one after stub loads (dyna_linker arg1),
211// treat it as a pointer to branch insn,
212// return addr where that branch jumps to
213static void *get_pointer(void *stub)
214{
215 //printf("get_pointer(%x)\n",(int)stub);
216 int *i_ptr=find_extjump_insn(stub);
217 assert((*i_ptr&0x0f000000)==0x0a000000);
218 return (u_char *)i_ptr+((*i_ptr<<8)>>6)+8;
219}
220
221// Find the "clean" entry point from a "dirty" entry point
222// by skipping past the call to verify_code
223static void *get_clean_addr(void *addr)
224{
225 signed int *ptr = addr;
226 #ifndef HAVE_ARMV7
227 ptr+=4;
228 #else
229 ptr+=6;
230 #endif
231 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
232 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
233 ptr++;
234 if((*ptr&0xFF000000)==0xea000000) {
235 return (char *)ptr+((*ptr<<8)>>6)+8; // follow jump
236 }
237 return ptr;
238}
239
240static int verify_dirty(u_int *ptr)
241{
242 #ifndef HAVE_ARMV7
243 u_int offset;
244 // get from literal pool
245 assert((*ptr&0xFFFF0000)==0xe59f0000);
246 offset=*ptr&0xfff;
247 u_int source=*(u_int*)((void *)ptr+offset+8);
248 ptr++;
249 assert((*ptr&0xFFFF0000)==0xe59f0000);
250 offset=*ptr&0xfff;
251 u_int copy=*(u_int*)((void *)ptr+offset+8);
252 ptr++;
253 assert((*ptr&0xFFFF0000)==0xe59f0000);
254 offset=*ptr&0xfff;
255 u_int len=*(u_int*)((void *)ptr+offset+8);
256 ptr++;
257 ptr++;
258 #else
259 // ARMv7 movw/movt
260 assert((*ptr&0xFFF00000)==0xe3000000);
261 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
262 u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
263 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
264 ptr+=6;
265 #endif
266 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
267 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
268 //printf("verify_dirty: %x %x %x\n",source,copy,len);
269 return !memcmp((void *)source,(void *)copy,len);
270}
271
272// This doesn't necessarily find all clean entry points, just
273// guarantees that it's not dirty
274static int isclean(void *addr)
275{
276 #ifndef HAVE_ARMV7
277 u_int *ptr=((u_int *)addr)+4;
278 #else
279 u_int *ptr=((u_int *)addr)+6;
280 #endif
281 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
282 if((*ptr&0xFF000000)!=0xeb000000) return 1; // bl instruction
283 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code) return 0;
284 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_vm) return 0;
285 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_ds) return 0;
286 return 1;
287}
288
289// get source that block at addr was compiled from (host pointers)
290static void get_bounds(void *addr, u_char **start, u_char **end)
291{
292 u_int *ptr = addr;
293 #ifndef HAVE_ARMV7
294 u_int offset;
295 // get from literal pool
296 assert((*ptr&0xFFFF0000)==0xe59f0000);
297 offset=*ptr&0xfff;
298 u_int source=*(u_int*)((void *)ptr+offset+8);
299 ptr++;
300 //assert((*ptr&0xFFFF0000)==0xe59f0000);
301 //offset=*ptr&0xfff;
302 //u_int copy=*(u_int*)((void *)ptr+offset+8);
303 ptr++;
304 assert((*ptr&0xFFFF0000)==0xe59f0000);
305 offset=*ptr&0xfff;
306 u_int len=*(u_int*)((void *)ptr+offset+8);
307 ptr++;
308 ptr++;
309 #else
310 // ARMv7 movw/movt
311 assert((*ptr&0xFFF00000)==0xe3000000);
312 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
313 //u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
314 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
315 ptr+=6;
316 #endif
317 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
318 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
319 *start=(u_char *)source;
320 *end=(u_char *)source+len;
321}
322
323// Allocate a specific ARM register.
324static void alloc_arm_reg(struct regstat *cur,int i,signed char reg,int hr)
325{
326 int n;
327 int dirty=0;
328
329 // see if it's already allocated (and dealloc it)
330 for(n=0;n<HOST_REGS;n++)
331 {
332 if(n!=EXCLUDE_REG&&cur->regmap[n]==reg) {
333 dirty=(cur->dirty>>n)&1;
334 cur->regmap[n]=-1;
335 }
336 }
337
338 cur->regmap[hr]=reg;
339 cur->dirty&=~(1<<hr);
340 cur->dirty|=dirty<<hr;
341 cur->isconst&=~(1<<hr);
342}
343
344// Alloc cycle count into dedicated register
345static void alloc_cc(struct regstat *cur,int i)
346{
347 alloc_arm_reg(cur,i,CCREG,HOST_CCREG);
348}
349
350/* Assembler */
351
352static unused char regname[16][4] = {
353 "r0",
354 "r1",
355 "r2",
356 "r3",
357 "r4",
358 "r5",
359 "r6",
360 "r7",
361 "r8",
362 "r9",
363 "r10",
364 "fp",
365 "r12",
366 "sp",
367 "lr",
368 "pc"};
369
370static void output_w32(u_int word)
371{
372 *((u_int *)out)=word;
373 out+=4;
374}
375
376static u_int rd_rn_rm(u_int rd, u_int rn, u_int rm)
377{
378 assert(rd<16);
379 assert(rn<16);
380 assert(rm<16);
381 return((rn<<16)|(rd<<12)|rm);
382}
383
384static u_int rd_rn_imm_shift(u_int rd, u_int rn, u_int imm, u_int shift)
385{
386 assert(rd<16);
387 assert(rn<16);
388 assert(imm<256);
389 assert((shift&1)==0);
390 return((rn<<16)|(rd<<12)|(((32-shift)&30)<<7)|imm);
391}
392
393static u_int genimm(u_int imm,u_int *encoded)
394{
395 *encoded=0;
396 if(imm==0) return 1;
397 int i=32;
398 while(i>0)
399 {
400 if(imm<256) {
401 *encoded=((i&30)<<7)|imm;
402 return 1;
403 }
404 imm=(imm>>2)|(imm<<30);i-=2;
405 }
406 return 0;
407}
408
409static void genimm_checked(u_int imm,u_int *encoded)
410{
411 u_int ret=genimm(imm,encoded);
412 assert(ret);
413 (void)ret;
414}
415
416static u_int genjmp(u_int addr)
417{
418 int offset=addr-(int)out-8;
419 if(offset<-33554432||offset>=33554432) {
420 if (addr>2) {
421 SysPrintf("genjmp: out of range: %08x\n", offset);
422 exit(1);
423 }
424 return 0;
425 }
426 return ((u_int)offset>>2)&0xffffff;
427}
428
429static void emit_mov(int rs,int rt)
430{
431 assem_debug("mov %s,%s\n",regname[rt],regname[rs]);
432 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs));
433}
434
435static void emit_movs(int rs,int rt)
436{
437 assem_debug("movs %s,%s\n",regname[rt],regname[rs]);
438 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs));
439}
440
441static void emit_add(int rs1,int rs2,int rt)
442{
443 assem_debug("add %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
444 output_w32(0xe0800000|rd_rn_rm(rt,rs1,rs2));
445}
446
447static void emit_adds(int rs1,int rs2,int rt)
448{
449 assem_debug("adds %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
450 output_w32(0xe0900000|rd_rn_rm(rt,rs1,rs2));
451}
452
453static void emit_adcs(int rs1,int rs2,int rt)
454{
455 assem_debug("adcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
456 output_w32(0xe0b00000|rd_rn_rm(rt,rs1,rs2));
457}
458
459static void emit_neg(int rs, int rt)
460{
461 assem_debug("rsb %s,%s,#0\n",regname[rt],regname[rs]);
462 output_w32(0xe2600000|rd_rn_rm(rt,rs,0));
463}
464
465static void emit_sub(int rs1,int rs2,int rt)
466{
467 assem_debug("sub %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
468 output_w32(0xe0400000|rd_rn_rm(rt,rs1,rs2));
469}
470
471static void emit_zeroreg(int rt)
472{
473 assem_debug("mov %s,#0\n",regname[rt]);
474 output_w32(0xe3a00000|rd_rn_rm(rt,0,0));
475}
476
477static void emit_loadlp(u_int imm,u_int rt)
478{
479 add_literal((int)out,imm);
480 assem_debug("ldr %s,pc+? [=%x]\n",regname[rt],imm);
481 output_w32(0xe5900000|rd_rn_rm(rt,15,0));
482}
483
484static void emit_movw(u_int imm,u_int rt)
485{
486 assert(imm<65536);
487 assem_debug("movw %s,#%d (0x%x)\n",regname[rt],imm,imm);
488 output_w32(0xe3000000|rd_rn_rm(rt,0,0)|(imm&0xfff)|((imm<<4)&0xf0000));
489}
490
491static void emit_movt(u_int imm,u_int rt)
492{
493 assem_debug("movt %s,#%d (0x%x)\n",regname[rt],imm&0xffff0000,imm&0xffff0000);
494 output_w32(0xe3400000|rd_rn_rm(rt,0,0)|((imm>>16)&0xfff)|((imm>>12)&0xf0000));
495}
496
497static void emit_movimm(u_int imm,u_int rt)
498{
499 u_int armval;
500 if(genimm(imm,&armval)) {
501 assem_debug("mov %s,#%d\n",regname[rt],imm);
502 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
503 }else if(genimm(~imm,&armval)) {
504 assem_debug("mvn %s,#%d\n",regname[rt],imm);
505 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
506 }else if(imm<65536) {
507 #ifndef HAVE_ARMV7
508 assem_debug("mov %s,#%d\n",regname[rt],imm&0xFF00);
509 output_w32(0xe3a00000|rd_rn_imm_shift(rt,0,imm>>8,8));
510 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
511 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
512 #else
513 emit_movw(imm,rt);
514 #endif
515 }else{
516 #ifndef HAVE_ARMV7
517 emit_loadlp(imm,rt);
518 #else
519 emit_movw(imm&0x0000FFFF,rt);
520 emit_movt(imm&0xFFFF0000,rt);
521 #endif
522 }
523}
524
525static void emit_pcreladdr(u_int rt)
526{
527 assem_debug("add %s,pc,#?\n",regname[rt]);
528 output_w32(0xe2800000|rd_rn_rm(rt,15,0));
529}
530
531static void emit_loadreg(int r, int hr)
532{
533 if(r&64) {
534 SysPrintf("64bit load in 32bit mode!\n");
535 assert(0);
536 return;
537 }
538 if((r&63)==0)
539 emit_zeroreg(hr);
540 else {
541 int addr=((int)reg)+((r&63)<<REG_SHIFT)+((r&64)>>4);
542 if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4);
543 if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4);
544 if(r==CCREG) addr=(int)&cycle_count;
545 if(r==CSREG) addr=(int)&Status;
546 if(r==INVCP) addr=(int)&invc_ptr;
547 u_int offset = addr-(u_int)&dynarec_local;
548 assert(offset<4096);
549 assem_debug("ldr %s,fp+%d\n",regname[hr],offset);
550 output_w32(0xe5900000|rd_rn_rm(hr,FP,0)|offset);
551 }
552}
553
554static void emit_storereg(int r, int hr)
555{
556 if(r&64) {
557 SysPrintf("64bit store in 32bit mode!\n");
558 assert(0);
559 return;
560 }
561 int addr=((int)reg)+((r&63)<<REG_SHIFT)+((r&64)>>4);
562 if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4);
563 if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4);
564 if(r==CCREG) addr=(int)&cycle_count;
565 u_int offset = addr-(u_int)&dynarec_local;
566 assert(offset<4096);
567 assem_debug("str %s,fp+%d\n",regname[hr],offset);
568 output_w32(0xe5800000|rd_rn_rm(hr,FP,0)|offset);
569}
570
571static void emit_test(int rs, int rt)
572{
573 assem_debug("tst %s,%s\n",regname[rs],regname[rt]);
574 output_w32(0xe1100000|rd_rn_rm(0,rs,rt));
575}
576
577static void emit_testimm(int rs,int imm)
578{
579 u_int armval;
580 assem_debug("tst %s,#%d\n",regname[rs],imm);
581 genimm_checked(imm,&armval);
582 output_w32(0xe3100000|rd_rn_rm(0,rs,0)|armval);
583}
584
585static void emit_testeqimm(int rs,int imm)
586{
587 u_int armval;
588 assem_debug("tsteq %s,$%d\n",regname[rs],imm);
589 genimm_checked(imm,&armval);
590 output_w32(0x03100000|rd_rn_rm(0,rs,0)|armval);
591}
592
593static void emit_not(int rs,int rt)
594{
595 assem_debug("mvn %s,%s\n",regname[rt],regname[rs]);
596 output_w32(0xe1e00000|rd_rn_rm(rt,0,rs));
597}
598
599static void emit_mvnmi(int rs,int rt)
600{
601 assem_debug("mvnmi %s,%s\n",regname[rt],regname[rs]);
602 output_w32(0x41e00000|rd_rn_rm(rt,0,rs));
603}
604
605static void emit_and(u_int rs1,u_int rs2,u_int rt)
606{
607 assem_debug("and %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
608 output_w32(0xe0000000|rd_rn_rm(rt,rs1,rs2));
609}
610
611static void emit_or(u_int rs1,u_int rs2,u_int rt)
612{
613 assem_debug("orr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
614 output_w32(0xe1800000|rd_rn_rm(rt,rs1,rs2));
615}
616
617static void emit_orrshl_imm(u_int rs,u_int imm,u_int rt)
618{
619 assert(rs<16);
620 assert(rt<16);
621 assert(imm<32);
622 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs],imm);
623 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|(imm<<7));
624}
625
626static void emit_orrshr_imm(u_int rs,u_int imm,u_int rt)
627{
628 assert(rs<16);
629 assert(rt<16);
630 assert(imm<32);
631 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs],imm);
632 output_w32(0xe1800020|rd_rn_rm(rt,rt,rs)|(imm<<7));
633}
634
635static void emit_xor(u_int rs1,u_int rs2,u_int rt)
636{
637 assem_debug("eor %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
638 output_w32(0xe0200000|rd_rn_rm(rt,rs1,rs2));
639}
640
641static void emit_addimm(u_int rs,int imm,u_int rt)
642{
643 assert(rs<16);
644 assert(rt<16);
645 if(imm!=0) {
646 u_int armval;
647 if(genimm(imm,&armval)) {
648 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],imm);
649 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
650 }else if(genimm(-imm,&armval)) {
651 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],-imm);
652 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
653 #ifdef HAVE_ARMV7
654 }else if(rt!=rs&&(u_int)imm<65536) {
655 emit_movw(imm&0x0000ffff,rt);
656 emit_add(rs,rt,rt);
657 }else if(rt!=rs&&(u_int)-imm<65536) {
658 emit_movw(-imm&0x0000ffff,rt);
659 emit_sub(rs,rt,rt);
660 #endif
661 }else if((u_int)-imm<65536) {
662 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],(-imm)&0xFF00);
663 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
664 output_w32(0xe2400000|rd_rn_imm_shift(rt,rs,(-imm)>>8,8));
665 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
666 }else {
667 do {
668 int shift = (ffs(imm) - 1) & ~1;
669 int imm8 = imm & (0xff << shift);
670 genimm_checked(imm8,&armval);
671 assem_debug("add %s,%s,#0x%x\n",regname[rt],regname[rs],imm8);
672 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
673 rs = rt;
674 imm &= ~imm8;
675 }
676 while (imm != 0);
677 }
678 }
679 else if(rs!=rt) emit_mov(rs,rt);
680}
681
682static void emit_addimm_and_set_flags(int imm,int rt)
683{
684 assert(imm>-65536&&imm<65536);
685 u_int armval;
686 if(genimm(imm,&armval)) {
687 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm);
688 output_w32(0xe2900000|rd_rn_rm(rt,rt,0)|armval);
689 }else if(genimm(-imm,&armval)) {
690 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],imm);
691 output_w32(0xe2500000|rd_rn_rm(rt,rt,0)|armval);
692 }else if(imm<0) {
693 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF00);
694 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
695 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)>>8,8));
696 output_w32(0xe2500000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
697 }else{
698 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF00);
699 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
700 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm>>8,8));
701 output_w32(0xe2900000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
702 }
703}
704
705static void emit_addimm_no_flags(u_int imm,u_int rt)
706{
707 emit_addimm(rt,imm,rt);
708}
709
710static void emit_addnop(u_int r)
711{
712 assert(r<16);
713 assem_debug("add %s,%s,#0 (nop)\n",regname[r],regname[r]);
714 output_w32(0xe2800000|rd_rn_rm(r,r,0));
715}
716
717static void emit_adcimm(u_int rs,int imm,u_int rt)
718{
719 u_int armval;
720 genimm_checked(imm,&armval);
721 assem_debug("adc %s,%s,#%d\n",regname[rt],regname[rs],imm);
722 output_w32(0xe2a00000|rd_rn_rm(rt,rs,0)|armval);
723}
724
725static void emit_addimm64_32(int rsh,int rsl,int imm,int rth,int rtl)
726{
727 // TODO: if(genimm(imm,&armval)) ...
728 // else
729 emit_movimm(imm,HOST_TEMPREG);
730 emit_adds(HOST_TEMPREG,rsl,rtl);
731 emit_adcimm(rsh,0,rth);
732}
733
734static void emit_andimm(int rs,int imm,int rt)
735{
736 u_int armval;
737 if(imm==0) {
738 emit_zeroreg(rt);
739 }else if(genimm(imm,&armval)) {
740 assem_debug("and %s,%s,#%d\n",regname[rt],regname[rs],imm);
741 output_w32(0xe2000000|rd_rn_rm(rt,rs,0)|armval);
742 }else if(genimm(~imm,&armval)) {
743 assem_debug("bic %s,%s,#%d\n",regname[rt],regname[rs],imm);
744 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|armval);
745 }else if(imm==65535) {
746 #ifndef HAVE_ARMV6
747 assem_debug("bic %s,%s,#FF000000\n",regname[rt],regname[rs]);
748 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|0x4FF);
749 assem_debug("bic %s,%s,#00FF0000\n",regname[rt],regname[rt]);
750 output_w32(0xe3c00000|rd_rn_rm(rt,rt,0)|0x8FF);
751 #else
752 assem_debug("uxth %s,%s\n",regname[rt],regname[rs]);
753 output_w32(0xe6ff0070|rd_rn_rm(rt,0,rs));
754 #endif
755 }else{
756 assert(imm>0&&imm<65535);
757 #ifndef HAVE_ARMV7
758 assem_debug("mov r14,#%d\n",imm&0xFF00);
759 output_w32(0xe3a00000|rd_rn_imm_shift(HOST_TEMPREG,0,imm>>8,8));
760 assem_debug("add r14,r14,#%d\n",imm&0xFF);
761 output_w32(0xe2800000|rd_rn_imm_shift(HOST_TEMPREG,HOST_TEMPREG,imm&0xff,0));
762 #else
763 emit_movw(imm,HOST_TEMPREG);
764 #endif
765 assem_debug("and %s,%s,r14\n",regname[rt],regname[rs]);
766 output_w32(0xe0000000|rd_rn_rm(rt,rs,HOST_TEMPREG));
767 }
768}
769
770static void emit_orimm(int rs,int imm,int rt)
771{
772 u_int armval;
773 if(imm==0) {
774 if(rs!=rt) emit_mov(rs,rt);
775 }else if(genimm(imm,&armval)) {
776 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm);
777 output_w32(0xe3800000|rd_rn_rm(rt,rs,0)|armval);
778 }else{
779 assert(imm>0&&imm<65536);
780 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
781 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
782 output_w32(0xe3800000|rd_rn_imm_shift(rt,rs,imm>>8,8));
783 output_w32(0xe3800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
784 }
785}
786
787static void emit_xorimm(int rs,int imm,int rt)
788{
789 u_int armval;
790 if(imm==0) {
791 if(rs!=rt) emit_mov(rs,rt);
792 }else if(genimm(imm,&armval)) {
793 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm);
794 output_w32(0xe2200000|rd_rn_rm(rt,rs,0)|armval);
795 }else{
796 assert(imm>0&&imm<65536);
797 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
798 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
799 output_w32(0xe2200000|rd_rn_imm_shift(rt,rs,imm>>8,8));
800 output_w32(0xe2200000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
801 }
802}
803
804static void emit_shlimm(int rs,u_int imm,int rt)
805{
806 assert(imm>0);
807 assert(imm<32);
808 //if(imm==1) ...
809 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
810 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
811}
812
813static void emit_lsls_imm(int rs,int imm,int rt)
814{
815 assert(imm>0);
816 assert(imm<32);
817 assem_debug("lsls %s,%s,#%d\n",regname[rt],regname[rs],imm);
818 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs)|(imm<<7));
819}
820
821static unused void emit_lslpls_imm(int rs,int imm,int rt)
822{
823 assert(imm>0);
824 assert(imm<32);
825 assem_debug("lslpls %s,%s,#%d\n",regname[rt],regname[rs],imm);
826 output_w32(0x51b00000|rd_rn_rm(rt,0,rs)|(imm<<7));
827}
828
829static void emit_shrimm(int rs,u_int imm,int rt)
830{
831 assert(imm>0);
832 assert(imm<32);
833 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
834 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
835}
836
837static void emit_sarimm(int rs,u_int imm,int rt)
838{
839 assert(imm>0);
840 assert(imm<32);
841 assem_debug("asr %s,%s,#%d\n",regname[rt],regname[rs],imm);
842 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x40|(imm<<7));
843}
844
845static void emit_rorimm(int rs,u_int imm,int rt)
846{
847 assert(imm>0);
848 assert(imm<32);
849 assem_debug("ror %s,%s,#%d\n",regname[rt],regname[rs],imm);
850 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x60|(imm<<7));
851}
852
853static void emit_signextend16(int rs,int rt)
854{
855 #ifndef HAVE_ARMV6
856 emit_shlimm(rs,16,rt);
857 emit_sarimm(rt,16,rt);
858 #else
859 assem_debug("sxth %s,%s\n",regname[rt],regname[rs]);
860 output_w32(0xe6bf0070|rd_rn_rm(rt,0,rs));
861 #endif
862}
863
864static void emit_signextend8(int rs,int rt)
865{
866 #ifndef HAVE_ARMV6
867 emit_shlimm(rs,24,rt);
868 emit_sarimm(rt,24,rt);
869 #else
870 assem_debug("sxtb %s,%s\n",regname[rt],regname[rs]);
871 output_w32(0xe6af0070|rd_rn_rm(rt,0,rs));
872 #endif
873}
874
875static void emit_shl(u_int rs,u_int shift,u_int rt)
876{
877 assert(rs<16);
878 assert(rt<16);
879 assert(shift<16);
880 //if(imm==1) ...
881 assem_debug("lsl %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
882 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x10|(shift<<8));
883}
884
885static void emit_shr(u_int rs,u_int shift,u_int rt)
886{
887 assert(rs<16);
888 assert(rt<16);
889 assert(shift<16);
890 assem_debug("lsr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
891 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x30|(shift<<8));
892}
893
894static void emit_sar(u_int rs,u_int shift,u_int rt)
895{
896 assert(rs<16);
897 assert(rt<16);
898 assert(shift<16);
899 assem_debug("asr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
900 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x50|(shift<<8));
901}
902
903static void emit_orrshl(u_int rs,u_int shift,u_int rt)
904{
905 assert(rs<16);
906 assert(rt<16);
907 assert(shift<16);
908 assem_debug("orr %s,%s,%s,lsl %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
909 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x10|(shift<<8));
910}
911
912static void emit_orrshr(u_int rs,u_int shift,u_int rt)
913{
914 assert(rs<16);
915 assert(rt<16);
916 assert(shift<16);
917 assem_debug("orr %s,%s,%s,lsr %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
918 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x30|(shift<<8));
919}
920
921static void emit_cmpimm(int rs,int imm)
922{
923 u_int armval;
924 if(genimm(imm,&armval)) {
925 assem_debug("cmp %s,#%d\n",regname[rs],imm);
926 output_w32(0xe3500000|rd_rn_rm(0,rs,0)|armval);
927 }else if(genimm(-imm,&armval)) {
928 assem_debug("cmn %s,#%d\n",regname[rs],imm);
929 output_w32(0xe3700000|rd_rn_rm(0,rs,0)|armval);
930 }else if(imm>0) {
931 assert(imm<65536);
932 emit_movimm(imm,HOST_TEMPREG);
933 assem_debug("cmp %s,r14\n",regname[rs]);
934 output_w32(0xe1500000|rd_rn_rm(0,rs,HOST_TEMPREG));
935 }else{
936 assert(imm>-65536);
937 emit_movimm(-imm,HOST_TEMPREG);
938 assem_debug("cmn %s,r14\n",regname[rs]);
939 output_w32(0xe1700000|rd_rn_rm(0,rs,HOST_TEMPREG));
940 }
941}
942
943static void emit_cmovne_imm(int imm,int rt)
944{
945 assem_debug("movne %s,#%d\n",regname[rt],imm);
946 u_int armval;
947 genimm_checked(imm,&armval);
948 output_w32(0x13a00000|rd_rn_rm(rt,0,0)|armval);
949}
950
951static void emit_cmovl_imm(int imm,int rt)
952{
953 assem_debug("movlt %s,#%d\n",regname[rt],imm);
954 u_int armval;
955 genimm_checked(imm,&armval);
956 output_w32(0xb3a00000|rd_rn_rm(rt,0,0)|armval);
957}
958
959static void emit_cmovb_imm(int imm,int rt)
960{
961 assem_debug("movcc %s,#%d\n",regname[rt],imm);
962 u_int armval;
963 genimm_checked(imm,&armval);
964 output_w32(0x33a00000|rd_rn_rm(rt,0,0)|armval);
965}
966
967static void emit_cmovne_reg(int rs,int rt)
968{
969 assem_debug("movne %s,%s\n",regname[rt],regname[rs]);
970 output_w32(0x11a00000|rd_rn_rm(rt,0,rs));
971}
972
973static void emit_cmovl_reg(int rs,int rt)
974{
975 assem_debug("movlt %s,%s\n",regname[rt],regname[rs]);
976 output_w32(0xb1a00000|rd_rn_rm(rt,0,rs));
977}
978
979static void emit_cmovs_reg(int rs,int rt)
980{
981 assem_debug("movmi %s,%s\n",regname[rt],regname[rs]);
982 output_w32(0x41a00000|rd_rn_rm(rt,0,rs));
983}
984
985static void emit_slti32(int rs,int imm,int rt)
986{
987 if(rs!=rt) emit_zeroreg(rt);
988 emit_cmpimm(rs,imm);
989 if(rs==rt) emit_movimm(0,rt);
990 emit_cmovl_imm(1,rt);
991}
992
993static void emit_sltiu32(int rs,int imm,int rt)
994{
995 if(rs!=rt) emit_zeroreg(rt);
996 emit_cmpimm(rs,imm);
997 if(rs==rt) emit_movimm(0,rt);
998 emit_cmovb_imm(1,rt);
999}
1000
1001static void emit_cmp(int rs,int rt)
1002{
1003 assem_debug("cmp %s,%s\n",regname[rs],regname[rt]);
1004 output_w32(0xe1500000|rd_rn_rm(0,rs,rt));
1005}
1006
1007static void emit_set_gz32(int rs, int rt)
1008{
1009 //assem_debug("set_gz32\n");
1010 emit_cmpimm(rs,1);
1011 emit_movimm(1,rt);
1012 emit_cmovl_imm(0,rt);
1013}
1014
1015static void emit_set_nz32(int rs, int rt)
1016{
1017 //assem_debug("set_nz32\n");
1018 if(rs!=rt) emit_movs(rs,rt);
1019 else emit_test(rs,rs);
1020 emit_cmovne_imm(1,rt);
1021}
1022
1023static void emit_set_if_less32(int rs1, int rs2, int rt)
1024{
1025 //assem_debug("set if less (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1026 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1027 emit_cmp(rs1,rs2);
1028 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1029 emit_cmovl_imm(1,rt);
1030}
1031
1032static void emit_set_if_carry32(int rs1, int rs2, int rt)
1033{
1034 //assem_debug("set if carry (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1035 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1036 emit_cmp(rs1,rs2);
1037 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1038 emit_cmovb_imm(1,rt);
1039}
1040
1041static void emit_call(const void *a_)
1042{
1043 int a = (int)a_;
1044 assem_debug("bl %x (%x+%x)%s\n",a,(int)out,a-(int)out-8,func_name(a));
1045 u_int offset=genjmp(a);
1046 output_w32(0xeb000000|offset);
1047}
1048
1049static void emit_jmp(const void *a_)
1050{
1051 int a = (int)a_;
1052 assem_debug("b %x (%x+%x)%s\n",a,(int)out,a-(int)out-8,func_name(a));
1053 u_int offset=genjmp(a);
1054 output_w32(0xea000000|offset);
1055}
1056
1057static void emit_jne(const void *a_)
1058{
1059 int a = (int)a_;
1060 assem_debug("bne %x\n",a);
1061 u_int offset=genjmp(a);
1062 output_w32(0x1a000000|offset);
1063}
1064
1065static void emit_jeq(int a)
1066{
1067 assem_debug("beq %x\n",a);
1068 u_int offset=genjmp(a);
1069 output_w32(0x0a000000|offset);
1070}
1071
1072static void emit_js(int a)
1073{
1074 assem_debug("bmi %x\n",a);
1075 u_int offset=genjmp(a);
1076 output_w32(0x4a000000|offset);
1077}
1078
1079static void emit_jns(int a)
1080{
1081 assem_debug("bpl %x\n",a);
1082 u_int offset=genjmp(a);
1083 output_w32(0x5a000000|offset);
1084}
1085
1086static void emit_jl(int a)
1087{
1088 assem_debug("blt %x\n",a);
1089 u_int offset=genjmp(a);
1090 output_w32(0xba000000|offset);
1091}
1092
1093static void emit_jge(int a)
1094{
1095 assem_debug("bge %x\n",a);
1096 u_int offset=genjmp(a);
1097 output_w32(0xaa000000|offset);
1098}
1099
1100static void emit_jno(int a)
1101{
1102 assem_debug("bvc %x\n",a);
1103 u_int offset=genjmp(a);
1104 output_w32(0x7a000000|offset);
1105}
1106
1107static void emit_jc(int a)
1108{
1109 assem_debug("bcs %x\n",a);
1110 u_int offset=genjmp(a);
1111 output_w32(0x2a000000|offset);
1112}
1113
1114static void emit_jcc(void *a_)
1115{
1116 int a = (int)a_;
1117 assem_debug("bcc %x\n",a);
1118 u_int offset=genjmp(a);
1119 output_w32(0x3a000000|offset);
1120}
1121
1122static void emit_callreg(u_int r)
1123{
1124 assert(r<15);
1125 assem_debug("blx %s\n",regname[r]);
1126 output_w32(0xe12fff30|r);
1127}
1128
1129static void emit_jmpreg(u_int r)
1130{
1131 assem_debug("mov pc,%s\n",regname[r]);
1132 output_w32(0xe1a00000|rd_rn_rm(15,0,r));
1133}
1134
1135static void emit_ret(void)
1136{
1137 emit_jmpreg(14);
1138}
1139
1140static void emit_readword_indexed(int offset, int rs, int rt)
1141{
1142 assert(offset>-4096&&offset<4096);
1143 assem_debug("ldr %s,%s+%d\n",regname[rt],regname[rs],offset);
1144 if(offset>=0) {
1145 output_w32(0xe5900000|rd_rn_rm(rt,rs,0)|offset);
1146 }else{
1147 output_w32(0xe5100000|rd_rn_rm(rt,rs,0)|(-offset));
1148 }
1149}
1150
1151static void emit_readword_dualindexedx4(int rs1, int rs2, int rt)
1152{
1153 assem_debug("ldr %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1154 output_w32(0xe7900000|rd_rn_rm(rt,rs1,rs2)|0x100);
1155}
1156
1157static void emit_ldrcc_dualindexed(int rs1, int rs2, int rt)
1158{
1159 assem_debug("ldrcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1160 output_w32(0x37900000|rd_rn_rm(rt,rs1,rs2));
1161}
1162
1163static void emit_ldrccb_dualindexed(int rs1, int rs2, int rt)
1164{
1165 assem_debug("ldrccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1166 output_w32(0x37d00000|rd_rn_rm(rt,rs1,rs2));
1167}
1168
1169static void emit_ldrccsb_dualindexed(int rs1, int rs2, int rt)
1170{
1171 assem_debug("ldrccsb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1172 output_w32(0x319000d0|rd_rn_rm(rt,rs1,rs2));
1173}
1174
1175static void emit_ldrcch_dualindexed(int rs1, int rs2, int rt)
1176{
1177 assem_debug("ldrcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1178 output_w32(0x319000b0|rd_rn_rm(rt,rs1,rs2));
1179}
1180
1181static void emit_ldrccsh_dualindexed(int rs1, int rs2, int rt)
1182{
1183 assem_debug("ldrccsh %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1184 output_w32(0x319000f0|rd_rn_rm(rt,rs1,rs2));
1185}
1186
1187static void emit_movsbl_indexed(int offset, int rs, int rt)
1188{
1189 assert(offset>-256&&offset<256);
1190 assem_debug("ldrsb %s,%s+%d\n",regname[rt],regname[rs],offset);
1191 if(offset>=0) {
1192 output_w32(0xe1d000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1193 }else{
1194 output_w32(0xe15000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1195 }
1196}
1197
1198static void emit_movswl_indexed(int offset, int rs, int rt)
1199{
1200 assert(offset>-256&&offset<256);
1201 assem_debug("ldrsh %s,%s+%d\n",regname[rt],regname[rs],offset);
1202 if(offset>=0) {
1203 output_w32(0xe1d000f0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1204 }else{
1205 output_w32(0xe15000f0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1206 }
1207}
1208
1209static void emit_movzbl_indexed(int offset, int rs, int rt)
1210{
1211 assert(offset>-4096&&offset<4096);
1212 assem_debug("ldrb %s,%s+%d\n",regname[rt],regname[rs],offset);
1213 if(offset>=0) {
1214 output_w32(0xe5d00000|rd_rn_rm(rt,rs,0)|offset);
1215 }else{
1216 output_w32(0xe5500000|rd_rn_rm(rt,rs,0)|(-offset));
1217 }
1218}
1219
1220static void emit_movzwl_indexed(int offset, int rs, int rt)
1221{
1222 assert(offset>-256&&offset<256);
1223 assem_debug("ldrh %s,%s+%d\n",regname[rt],regname[rs],offset);
1224 if(offset>=0) {
1225 output_w32(0xe1d000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1226 }else{
1227 output_w32(0xe15000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1228 }
1229}
1230
1231static void emit_ldrd(int offset, int rs, int rt)
1232{
1233 assert(offset>-256&&offset<256);
1234 assem_debug("ldrd %s,%s+%d\n",regname[rt],regname[rs],offset);
1235 if(offset>=0) {
1236 output_w32(0xe1c000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1237 }else{
1238 output_w32(0xe14000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1239 }
1240}
1241
1242static void emit_readword(void *addr, int rt)
1243{
1244 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
1245 assert(offset<4096);
1246 assem_debug("ldr %s,fp+%d\n",regname[rt],offset);
1247 output_w32(0xe5900000|rd_rn_rm(rt,FP,0)|offset);
1248}
1249
1250static void emit_writeword_indexed(int rt, int offset, int rs)
1251{
1252 assert(offset>-4096&&offset<4096);
1253 assem_debug("str %s,%s+%d\n",regname[rt],regname[rs],offset);
1254 if(offset>=0) {
1255 output_w32(0xe5800000|rd_rn_rm(rt,rs,0)|offset);
1256 }else{
1257 output_w32(0xe5000000|rd_rn_rm(rt,rs,0)|(-offset));
1258 }
1259}
1260
1261static void emit_writehword_indexed(int rt, int offset, int rs)
1262{
1263 assert(offset>-256&&offset<256);
1264 assem_debug("strh %s,%s+%d\n",regname[rt],regname[rs],offset);
1265 if(offset>=0) {
1266 output_w32(0xe1c000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1267 }else{
1268 output_w32(0xe14000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1269 }
1270}
1271
1272static void emit_writebyte_indexed(int rt, int offset, int rs)
1273{
1274 assert(offset>-4096&&offset<4096);
1275 assem_debug("strb %s,%s+%d\n",regname[rt],regname[rs],offset);
1276 if(offset>=0) {
1277 output_w32(0xe5c00000|rd_rn_rm(rt,rs,0)|offset);
1278 }else{
1279 output_w32(0xe5400000|rd_rn_rm(rt,rs,0)|(-offset));
1280 }
1281}
1282
1283static void emit_strcc_dualindexed(int rs1, int rs2, int rt)
1284{
1285 assem_debug("strcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1286 output_w32(0x37800000|rd_rn_rm(rt,rs1,rs2));
1287}
1288
1289static void emit_strccb_dualindexed(int rs1, int rs2, int rt)
1290{
1291 assem_debug("strccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1292 output_w32(0x37c00000|rd_rn_rm(rt,rs1,rs2));
1293}
1294
1295static void emit_strcch_dualindexed(int rs1, int rs2, int rt)
1296{
1297 assem_debug("strcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1298 output_w32(0x318000b0|rd_rn_rm(rt,rs1,rs2));
1299}
1300
1301static void emit_writeword(int rt, void *addr)
1302{
1303 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
1304 assert(offset<4096);
1305 assem_debug("str %s,fp+%d\n",regname[rt],offset);
1306 output_w32(0xe5800000|rd_rn_rm(rt,FP,0)|offset);
1307}
1308
1309static void emit_umull(u_int rs1, u_int rs2, u_int hi, u_int lo)
1310{
1311 assem_debug("umull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
1312 assert(rs1<16);
1313 assert(rs2<16);
1314 assert(hi<16);
1315 assert(lo<16);
1316 output_w32(0xe0800090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
1317}
1318
1319static void emit_smull(u_int rs1, u_int rs2, u_int hi, u_int lo)
1320{
1321 assem_debug("smull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
1322 assert(rs1<16);
1323 assert(rs2<16);
1324 assert(hi<16);
1325 assert(lo<16);
1326 output_w32(0xe0c00090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
1327}
1328
1329static void emit_clz(int rs,int rt)
1330{
1331 assem_debug("clz %s,%s\n",regname[rt],regname[rs]);
1332 output_w32(0xe16f0f10|rd_rn_rm(rt,0,rs));
1333}
1334
1335static void emit_subcs(int rs1,int rs2,int rt)
1336{
1337 assem_debug("subcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1338 output_w32(0x20400000|rd_rn_rm(rt,rs1,rs2));
1339}
1340
1341static void emit_shrcc_imm(int rs,u_int imm,int rt)
1342{
1343 assert(imm>0);
1344 assert(imm<32);
1345 assem_debug("lsrcc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1346 output_w32(0x31a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
1347}
1348
1349static void emit_shrne_imm(int rs,u_int imm,int rt)
1350{
1351 assert(imm>0);
1352 assert(imm<32);
1353 assem_debug("lsrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
1354 output_w32(0x11a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
1355}
1356
1357static void emit_negmi(int rs, int rt)
1358{
1359 assem_debug("rsbmi %s,%s,#0\n",regname[rt],regname[rs]);
1360 output_w32(0x42600000|rd_rn_rm(rt,rs,0));
1361}
1362
1363static void emit_negsmi(int rs, int rt)
1364{
1365 assem_debug("rsbsmi %s,%s,#0\n",regname[rt],regname[rs]);
1366 output_w32(0x42700000|rd_rn_rm(rt,rs,0));
1367}
1368
1369static void emit_bic_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
1370{
1371 assem_debug("bic %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
1372 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
1373}
1374
1375static void emit_bic_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
1376{
1377 assem_debug("bic %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
1378 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
1379}
1380
1381static void emit_teq(int rs, int rt)
1382{
1383 assem_debug("teq %s,%s\n",regname[rs],regname[rt]);
1384 output_w32(0xe1300000|rd_rn_rm(0,rs,rt));
1385}
1386
1387static void emit_rsbimm(int rs, int imm, int rt)
1388{
1389 u_int armval;
1390 genimm_checked(imm,&armval);
1391 assem_debug("rsb %s,%s,#%d\n",regname[rt],regname[rs],imm);
1392 output_w32(0xe2600000|rd_rn_rm(rt,rs,0)|armval);
1393}
1394
1395// Load 2 immediates optimizing for small code size
1396static void emit_mov2imm_compact(int imm1,u_int rt1,int imm2,u_int rt2)
1397{
1398 emit_movimm(imm1,rt1);
1399 u_int armval;
1400 if(genimm(imm2-imm1,&armval)) {
1401 assem_debug("add %s,%s,#%d\n",regname[rt2],regname[rt1],imm2-imm1);
1402 output_w32(0xe2800000|rd_rn_rm(rt2,rt1,0)|armval);
1403 }else if(genimm(imm1-imm2,&armval)) {
1404 assem_debug("sub %s,%s,#%d\n",regname[rt2],regname[rt1],imm1-imm2);
1405 output_w32(0xe2400000|rd_rn_rm(rt2,rt1,0)|armval);
1406 }
1407 else emit_movimm(imm2,rt2);
1408}
1409
1410// Conditionally select one of two immediates, optimizing for small code size
1411// This will only be called if HAVE_CMOV_IMM is defined
1412static void emit_cmov2imm_e_ne_compact(int imm1,int imm2,u_int rt)
1413{
1414 u_int armval;
1415 if(genimm(imm2-imm1,&armval)) {
1416 emit_movimm(imm1,rt);
1417 assem_debug("addne %s,%s,#%d\n",regname[rt],regname[rt],imm2-imm1);
1418 output_w32(0x12800000|rd_rn_rm(rt,rt,0)|armval);
1419 }else if(genimm(imm1-imm2,&armval)) {
1420 emit_movimm(imm1,rt);
1421 assem_debug("subne %s,%s,#%d\n",regname[rt],regname[rt],imm1-imm2);
1422 output_w32(0x12400000|rd_rn_rm(rt,rt,0)|armval);
1423 }
1424 else {
1425 #ifndef HAVE_ARMV7
1426 emit_movimm(imm1,rt);
1427 add_literal((int)out,imm2);
1428 assem_debug("ldrne %s,pc+? [=%x]\n",regname[rt],imm2);
1429 output_w32(0x15900000|rd_rn_rm(rt,15,0));
1430 #else
1431 emit_movw(imm1&0x0000FFFF,rt);
1432 if((imm1&0xFFFF)!=(imm2&0xFFFF)) {
1433 assem_debug("movwne %s,#%d (0x%x)\n",regname[rt],imm2&0xFFFF,imm2&0xFFFF);
1434 output_w32(0x13000000|rd_rn_rm(rt,0,0)|(imm2&0xfff)|((imm2<<4)&0xf0000));
1435 }
1436 emit_movt(imm1&0xFFFF0000,rt);
1437 if((imm1&0xFFFF0000)!=(imm2&0xFFFF0000)) {
1438 assem_debug("movtne %s,#%d (0x%x)\n",regname[rt],imm2&0xffff0000,imm2&0xffff0000);
1439 output_w32(0x13400000|rd_rn_rm(rt,0,0)|((imm2>>16)&0xfff)|((imm2>>12)&0xf0000));
1440 }
1441 #endif
1442 }
1443}
1444
1445// special case for checking invalid_code
1446static void emit_cmpmem_indexedsr12_reg(int base,int r,int imm)
1447{
1448 assert(imm<128&&imm>=0);
1449 assert(r>=0&&r<16);
1450 assem_debug("ldrb lr,%s,%s lsr #12\n",regname[base],regname[r]);
1451 output_w32(0xe7d00000|rd_rn_rm(HOST_TEMPREG,base,r)|0x620);
1452 emit_cmpimm(HOST_TEMPREG,imm);
1453}
1454
1455static void emit_callne(int a)
1456{
1457 assem_debug("blne %x\n",a);
1458 u_int offset=genjmp(a);
1459 output_w32(0x1b000000|offset);
1460}
1461
1462// Used to preload hash table entries
1463static unused void emit_prefetchreg(int r)
1464{
1465 assem_debug("pld %s\n",regname[r]);
1466 output_w32(0xf5d0f000|rd_rn_rm(0,r,0));
1467}
1468
1469// Special case for mini_ht
1470static void emit_ldreq_indexed(int rs, u_int offset, int rt)
1471{
1472 assert(offset<4096);
1473 assem_debug("ldreq %s,[%s, #%d]\n",regname[rt],regname[rs],offset);
1474 output_w32(0x05900000|rd_rn_rm(rt,rs,0)|offset);
1475}
1476
1477static void emit_orrne_imm(int rs,int imm,int rt)
1478{
1479 u_int armval;
1480 genimm_checked(imm,&armval);
1481 assem_debug("orrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
1482 output_w32(0x13800000|rd_rn_rm(rt,rs,0)|armval);
1483}
1484
1485static void emit_andne_imm(int rs,int imm,int rt)
1486{
1487 u_int armval;
1488 genimm_checked(imm,&armval);
1489 assem_debug("andne %s,%s,#%d\n",regname[rt],regname[rs],imm);
1490 output_w32(0x12000000|rd_rn_rm(rt,rs,0)|armval);
1491}
1492
1493static unused void emit_addpl_imm(int rs,int imm,int rt)
1494{
1495 u_int armval;
1496 genimm_checked(imm,&armval);
1497 assem_debug("addpl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1498 output_w32(0x52800000|rd_rn_rm(rt,rs,0)|armval);
1499}
1500
1501static void emit_jno_unlikely(int a)
1502{
1503 //emit_jno(a);
1504 assem_debug("addvc pc,pc,#? (%x)\n",/*a-(int)out-8,*/a);
1505 output_w32(0x72800000|rd_rn_rm(15,15,0));
1506}
1507
1508static void save_regs_all(u_int reglist)
1509{
1510 int i;
1511 if(!reglist) return;
1512 assem_debug("stmia fp,{");
1513 for(i=0;i<16;i++)
1514 if(reglist&(1<<i))
1515 assem_debug("r%d,",i);
1516 assem_debug("}\n");
1517 output_w32(0xe88b0000|reglist);
1518}
1519
1520static void restore_regs_all(u_int reglist)
1521{
1522 int i;
1523 if(!reglist) return;
1524 assem_debug("ldmia fp,{");
1525 for(i=0;i<16;i++)
1526 if(reglist&(1<<i))
1527 assem_debug("r%d,",i);
1528 assem_debug("}\n");
1529 output_w32(0xe89b0000|reglist);
1530}
1531
1532// Save registers before function call
1533static void save_regs(u_int reglist)
1534{
1535 reglist&=CALLER_SAVE_REGS; // only save the caller-save registers, r0-r3, r12
1536 save_regs_all(reglist);
1537}
1538
1539// Restore registers after function call
1540static void restore_regs(u_int reglist)
1541{
1542 reglist&=CALLER_SAVE_REGS;
1543 restore_regs_all(reglist);
1544}
1545
1546/* Stubs/epilogue */
1547
1548static void literal_pool(int n)
1549{
1550 if(!literalcount) return;
1551 if(n) {
1552 if((int)out-literals[0][0]<4096-n) return;
1553 }
1554 u_int *ptr;
1555 int i;
1556 for(i=0;i<literalcount;i++)
1557 {
1558 u_int l_addr=(u_int)out;
1559 int j;
1560 for(j=0;j<i;j++) {
1561 if(literals[j][1]==literals[i][1]) {
1562 //printf("dup %08x\n",literals[i][1]);
1563 l_addr=literals[j][0];
1564 break;
1565 }
1566 }
1567 ptr=(u_int *)literals[i][0];
1568 u_int offset=l_addr-(u_int)ptr-8;
1569 assert(offset<4096);
1570 assert(!(offset&3));
1571 *ptr|=offset;
1572 if(l_addr==(u_int)out) {
1573 literals[i][0]=l_addr; // remember for dupes
1574 output_w32(literals[i][1]);
1575 }
1576 }
1577 literalcount=0;
1578}
1579
1580static void literal_pool_jumpover(int n)
1581{
1582 if(!literalcount) return;
1583 if(n) {
1584 if((int)out-literals[0][0]<4096-n) return;
1585 }
1586 void *jaddr = out;
1587 emit_jmp(0);
1588 literal_pool(0);
1589 set_jump_target(jaddr, out);
1590}
1591
1592static void emit_extjump2(u_char *addr, int target, void *linker)
1593{
1594 u_char *ptr=(u_char *)addr;
1595 assert((ptr[3]&0x0e)==0xa);
1596 (void)ptr;
1597
1598 emit_loadlp(target,0);
1599 emit_loadlp((u_int)addr,1);
1600 assert(addr>=translation_cache&&addr<(translation_cache+(1<<TARGET_SIZE_2)));
1601 //assert((target>=0x80000000&&target<0x80800000)||(target>0xA4000000&&target<0xA4001000));
1602//DEBUG >
1603#ifdef DEBUG_CYCLE_COUNT
1604 emit_readword(&last_count,ECX);
1605 emit_add(HOST_CCREG,ECX,HOST_CCREG);
1606 emit_readword(&next_interupt,ECX);
1607 emit_writeword(HOST_CCREG,&Count);
1608 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
1609 emit_writeword(ECX,&last_count);
1610#endif
1611//DEBUG <
1612 emit_jmp(linker);
1613}
1614
1615static void emit_extjump(void *addr, int target)
1616{
1617 emit_extjump2(addr, target, dyna_linker);
1618}
1619
1620static void emit_extjump_ds(void *addr, int target)
1621{
1622 emit_extjump2(addr, target, dyna_linker_ds);
1623}
1624
1625// put rt_val into rt, potentially making use of rs with value rs_val
1626static void emit_movimm_from(u_int rs_val,int rs,u_int rt_val,int rt)
1627{
1628 u_int armval;
1629 int diff;
1630 if(genimm(rt_val,&armval)) {
1631 assem_debug("mov %s,#%d\n",regname[rt],rt_val);
1632 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
1633 return;
1634 }
1635 if(genimm(~rt_val,&armval)) {
1636 assem_debug("mvn %s,#%d\n",regname[rt],rt_val);
1637 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
1638 return;
1639 }
1640 diff=rt_val-rs_val;
1641 if(genimm(diff,&armval)) {
1642 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],diff);
1643 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
1644 return;
1645 }else if(genimm(-diff,&armval)) {
1646 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],-diff);
1647 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
1648 return;
1649 }
1650 emit_movimm(rt_val,rt);
1651}
1652
1653// return 1 if above function can do it's job cheaply
1654static int is_similar_value(u_int v1,u_int v2)
1655{
1656 u_int xs;
1657 int diff;
1658 if(v1==v2) return 1;
1659 diff=v2-v1;
1660 for(xs=diff;xs!=0&&(xs&3)==0;xs>>=2)
1661 ;
1662 if(xs<0x100) return 1;
1663 for(xs=-diff;xs!=0&&(xs&3)==0;xs>>=2)
1664 ;
1665 if(xs<0x100) return 1;
1666 return 0;
1667}
1668
1669static void mov_loadtype_adj(enum stub_type type,int rs,int rt)
1670{
1671 switch(type) {
1672 case LOADB_STUB: emit_signextend8(rs,rt); break;
1673 case LOADBU_STUB: emit_andimm(rs,0xff,rt); break;
1674 case LOADH_STUB: emit_signextend16(rs,rt); break;
1675 case LOADHU_STUB: emit_andimm(rs,0xffff,rt); break;
1676 case LOADW_STUB: if(rs!=rt) emit_mov(rs,rt); break;
1677 default: assert(0);
1678 }
1679}
1680
1681#include "pcsxmem.h"
1682#include "pcsxmem_inline.c"
1683
1684static void do_readstub(int n)
1685{
1686 assem_debug("do_readstub %x\n",start+stubs[n].a*4);
1687 literal_pool(256);
1688 set_jump_target(stubs[n].addr, out);
1689 enum stub_type type=stubs[n].type;
1690 int i=stubs[n].a;
1691 int rs=stubs[n].b;
1692 struct regstat *i_regs=(struct regstat *)stubs[n].c;
1693 u_int reglist=stubs[n].e;
1694 signed char *i_regmap=i_regs->regmap;
1695 int rt;
1696 if(itype[i]==C1LS||itype[i]==C2LS||itype[i]==LOADLR) {
1697 rt=get_reg(i_regmap,FTEMP);
1698 }else{
1699 rt=get_reg(i_regmap,rt1[i]);
1700 }
1701 assert(rs>=0);
1702 int r,temp=-1,temp2=HOST_TEMPREG,regs_saved=0;
1703 void *restore_jump = NULL;
1704 reglist|=(1<<rs);
1705 for(r=0;r<=12;r++) {
1706 if(((1<<r)&0x13ff)&&((1<<r)&reglist)==0) {
1707 temp=r; break;
1708 }
1709 }
1710 if(rt>=0&&rt1[i]!=0)
1711 reglist&=~(1<<rt);
1712 if(temp==-1) {
1713 save_regs(reglist);
1714 regs_saved=1;
1715 temp=(rs==0)?2:0;
1716 }
1717 if((regs_saved||(reglist&2)==0)&&temp!=1&&rs!=1)
1718 temp2=1;
1719 emit_readword(&mem_rtab,temp);
1720 emit_shrimm(rs,12,temp2);
1721 emit_readword_dualindexedx4(temp,temp2,temp2);
1722 emit_lsls_imm(temp2,1,temp2);
1723 if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) {
1724 switch(type) {
1725 case LOADB_STUB: emit_ldrccsb_dualindexed(temp2,rs,rt); break;
1726 case LOADBU_STUB: emit_ldrccb_dualindexed(temp2,rs,rt); break;
1727 case LOADH_STUB: emit_ldrccsh_dualindexed(temp2,rs,rt); break;
1728 case LOADHU_STUB: emit_ldrcch_dualindexed(temp2,rs,rt); break;
1729 case LOADW_STUB: emit_ldrcc_dualindexed(temp2,rs,rt); break;
1730 default: assert(0);
1731 }
1732 }
1733 if(regs_saved) {
1734 restore_jump=out;
1735 emit_jcc(0); // jump to reg restore
1736 }
1737 else
1738 emit_jcc(stubs[n].retaddr); // return address
1739
1740 if(!regs_saved)
1741 save_regs(reglist);
1742 void *handler=NULL;
1743 if(type==LOADB_STUB||type==LOADBU_STUB)
1744 handler=jump_handler_read8;
1745 if(type==LOADH_STUB||type==LOADHU_STUB)
1746 handler=jump_handler_read16;
1747 if(type==LOADW_STUB)
1748 handler=jump_handler_read32;
1749 assert(handler);
1750 pass_args(rs,temp2);
1751 int cc=get_reg(i_regmap,CCREG);
1752 if(cc<0)
1753 emit_loadreg(CCREG,2);
1754 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n].d+1),2);
1755 emit_call(handler);
1756 if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) {
1757 mov_loadtype_adj(type,0,rt);
1758 }
1759 if(restore_jump)
1760 set_jump_target(restore_jump, out);
1761 restore_regs(reglist);
1762 emit_jmp(stubs[n].retaddr); // return address
1763}
1764
1765static void inline_readstub(enum stub_type type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
1766{
1767 int rs=get_reg(regmap,target);
1768 int rt=get_reg(regmap,target);
1769 if(rs<0) rs=get_reg(regmap,-1);
1770 assert(rs>=0);
1771 u_int is_dynamic,far_call=0;
1772 uintptr_t host_addr = 0;
1773 void *handler;
1774 int cc=get_reg(regmap,CCREG);
1775 if(pcsx_direct_read(type,addr,CLOCK_ADJUST(adj+1),cc,target?rs:-1,rt))
1776 return;
1777 handler = get_direct_memhandler(mem_rtab, addr, type, &host_addr);
1778 if (handler == NULL) {
1779 if(rt<0||rt1[i]==0)
1780 return;
1781 if(addr!=host_addr)
1782 emit_movimm_from(addr,rs,host_addr,rs);
1783 switch(type) {
1784 case LOADB_STUB: emit_movsbl_indexed(0,rs,rt); break;
1785 case LOADBU_STUB: emit_movzbl_indexed(0,rs,rt); break;
1786 case LOADH_STUB: emit_movswl_indexed(0,rs,rt); break;
1787 case LOADHU_STUB: emit_movzwl_indexed(0,rs,rt); break;
1788 case LOADW_STUB: emit_readword_indexed(0,rs,rt); break;
1789 default: assert(0);
1790 }
1791 return;
1792 }
1793 is_dynamic=pcsxmem_is_handler_dynamic(addr);
1794 if(is_dynamic) {
1795 if(type==LOADB_STUB||type==LOADBU_STUB)
1796 handler=jump_handler_read8;
1797 if(type==LOADH_STUB||type==LOADHU_STUB)
1798 handler=jump_handler_read16;
1799 if(type==LOADW_STUB)
1800 handler=jump_handler_read32;
1801 }
1802
1803 // call a memhandler
1804 if(rt>=0&&rt1[i]!=0)
1805 reglist&=~(1<<rt);
1806 save_regs(reglist);
1807 if(target==0)
1808 emit_movimm(addr,0);
1809 else if(rs!=0)
1810 emit_mov(rs,0);
1811 int offset=(u_char *)handler-out-8;
1812 if(offset<-33554432||offset>=33554432) {
1813 // unreachable memhandler, a plugin func perhaps
1814 emit_movimm((u_int)handler,12);
1815 far_call=1;
1816 }
1817 if(cc<0)
1818 emit_loadreg(CCREG,2);
1819 if(is_dynamic) {
1820 emit_movimm(((u_int *)mem_rtab)[addr>>12]<<1,1);
1821 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2);
1822 }
1823 else {
1824 emit_readword(&last_count,3);
1825 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2);
1826 emit_add(2,3,2);
1827 emit_writeword(2,&Count);
1828 }
1829
1830 if(far_call)
1831 emit_callreg(12);
1832 else
1833 emit_call(handler);
1834
1835 if(rt>=0&&rt1[i]!=0) {
1836 switch(type) {
1837 case LOADB_STUB: emit_signextend8(0,rt); break;
1838 case LOADBU_STUB: emit_andimm(0,0xff,rt); break;
1839 case LOADH_STUB: emit_signextend16(0,rt); break;
1840 case LOADHU_STUB: emit_andimm(0,0xffff,rt); break;
1841 case LOADW_STUB: if(rt!=0) emit_mov(0,rt); break;
1842 default: assert(0);
1843 }
1844 }
1845 restore_regs(reglist);
1846}
1847
1848static void do_writestub(int n)
1849{
1850 assem_debug("do_writestub %x\n",start+stubs[n].a*4);
1851 literal_pool(256);
1852 set_jump_target(stubs[n].addr, out);
1853 enum stub_type type=stubs[n].type;
1854 int i=stubs[n].a;
1855 int rs=stubs[n].b;
1856 struct regstat *i_regs=(struct regstat *)stubs[n].c;
1857 u_int reglist=stubs[n].e;
1858 signed char *i_regmap=i_regs->regmap;
1859 int rt,r;
1860 if(itype[i]==C1LS||itype[i]==C2LS) {
1861 rt=get_reg(i_regmap,r=FTEMP);
1862 }else{
1863 rt=get_reg(i_regmap,r=rs2[i]);
1864 }
1865 assert(rs>=0);
1866 assert(rt>=0);
1867 int rtmp,temp=-1,temp2=HOST_TEMPREG,regs_saved=0;
1868 void *restore_jump = NULL;
1869 int reglist2=reglist|(1<<rs)|(1<<rt);
1870 for(rtmp=0;rtmp<=12;rtmp++) {
1871 if(((1<<rtmp)&0x13ff)&&((1<<rtmp)&reglist2)==0) {
1872 temp=rtmp; break;
1873 }
1874 }
1875 if(temp==-1) {
1876 save_regs(reglist);
1877 regs_saved=1;
1878 for(rtmp=0;rtmp<=3;rtmp++)
1879 if(rtmp!=rs&&rtmp!=rt)
1880 {temp=rtmp;break;}
1881 }
1882 if((regs_saved||(reglist2&8)==0)&&temp!=3&&rs!=3&&rt!=3)
1883 temp2=3;
1884 emit_readword(&mem_wtab,temp);
1885 emit_shrimm(rs,12,temp2);
1886 emit_readword_dualindexedx4(temp,temp2,temp2);
1887 emit_lsls_imm(temp2,1,temp2);
1888 switch(type) {
1889 case STOREB_STUB: emit_strccb_dualindexed(temp2,rs,rt); break;
1890 case STOREH_STUB: emit_strcch_dualindexed(temp2,rs,rt); break;
1891 case STOREW_STUB: emit_strcc_dualindexed(temp2,rs,rt); break;
1892 default: assert(0);
1893 }
1894 if(regs_saved) {
1895 restore_jump=out;
1896 emit_jcc(0); // jump to reg restore
1897 }
1898 else
1899 emit_jcc(stubs[n].retaddr); // return address (invcode check)
1900
1901 if(!regs_saved)
1902 save_regs(reglist);
1903 void *handler=NULL;
1904 switch(type) {
1905 case STOREB_STUB: handler=jump_handler_write8; break;
1906 case STOREH_STUB: handler=jump_handler_write16; break;
1907 case STOREW_STUB: handler=jump_handler_write32; break;
1908 default: assert(0);
1909 }
1910 assert(handler);
1911 pass_args(rs,rt);
1912 if(temp2!=3)
1913 emit_mov(temp2,3);
1914 int cc=get_reg(i_regmap,CCREG);
1915 if(cc<0)
1916 emit_loadreg(CCREG,2);
1917 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n].d+1),2);
1918 // returns new cycle_count
1919 emit_call(handler);
1920 emit_addimm(0,-CLOCK_ADJUST((int)stubs[n].d+1),cc<0?2:cc);
1921 if(cc<0)
1922 emit_storereg(CCREG,2);
1923 if(restore_jump)
1924 set_jump_target(restore_jump, out);
1925 restore_regs(reglist);
1926 emit_jmp(stubs[n].retaddr);
1927}
1928
1929static void inline_writestub(enum stub_type type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
1930{
1931 int rs=get_reg(regmap,-1);
1932 int rt=get_reg(regmap,target);
1933 assert(rs>=0);
1934 assert(rt>=0);
1935 uintptr_t host_addr = 0;
1936 void *handler = get_direct_memhandler(mem_wtab, addr, type, &host_addr);
1937 if (handler == NULL) {
1938 if(addr!=host_addr)
1939 emit_movimm_from(addr,rs,host_addr,rs);
1940 switch(type) {
1941 case STOREB_STUB: emit_writebyte_indexed(rt,0,rs); break;
1942 case STOREH_STUB: emit_writehword_indexed(rt,0,rs); break;
1943 case STOREW_STUB: emit_writeword_indexed(rt,0,rs); break;
1944 default: assert(0);
1945 }
1946 return;
1947 }
1948
1949 // call a memhandler
1950 save_regs(reglist);
1951 pass_args(rs,rt);
1952 int cc=get_reg(regmap,CCREG);
1953 if(cc<0)
1954 emit_loadreg(CCREG,2);
1955 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2);
1956 emit_movimm((u_int)handler,3);
1957 // returns new cycle_count
1958 emit_call(jump_handler_write_h);
1959 emit_addimm(0,-CLOCK_ADJUST(adj+1),cc<0?2:cc);
1960 if(cc<0)
1961 emit_storereg(CCREG,2);
1962 restore_regs(reglist);
1963}
1964
1965static void do_unalignedwritestub(int n)
1966{
1967 assem_debug("do_unalignedwritestub %x\n",start+stubs[n].a*4);
1968 literal_pool(256);
1969 set_jump_target(stubs[n].addr, out);
1970
1971 int i=stubs[n].a;
1972 struct regstat *i_regs=(struct regstat *)stubs[n].c;
1973 int addr=stubs[n].b;
1974 u_int reglist=stubs[n].e;
1975 signed char *i_regmap=i_regs->regmap;
1976 int temp2=get_reg(i_regmap,FTEMP);
1977 int rt;
1978 rt=get_reg(i_regmap,rs2[i]);
1979 assert(rt>=0);
1980 assert(addr>=0);
1981 assert(opcode[i]==0x2a||opcode[i]==0x2e); // SWL/SWR only implemented
1982 reglist|=(1<<addr);
1983 reglist&=~(1<<temp2);
1984
1985#if 1
1986 // don't bother with it and call write handler
1987 save_regs(reglist);
1988 pass_args(addr,rt);
1989 int cc=get_reg(i_regmap,CCREG);
1990 if(cc<0)
1991 emit_loadreg(CCREG,2);
1992 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n].d+1),2);
1993 emit_call((opcode[i]==0x2a?jump_handle_swl:jump_handle_swr));
1994 emit_addimm(0,-CLOCK_ADJUST((int)stubs[n].d+1),cc<0?2:cc);
1995 if(cc<0)
1996 emit_storereg(CCREG,2);
1997 restore_regs(reglist);
1998 emit_jmp(stubs[n].retaddr); // return address
1999#else
2000 emit_andimm(addr,0xfffffffc,temp2);
2001 emit_writeword(temp2,&address);
2002
2003 save_regs(reglist);
2004 emit_shrimm(addr,16,1);
2005 int cc=get_reg(i_regmap,CCREG);
2006 if(cc<0) {
2007 emit_loadreg(CCREG,2);
2008 }
2009 emit_movimm((u_int)readmem,0);
2010 emit_addimm(cc<0?2:cc,2*stubs[n].d+2,2);
2011 emit_call((int)&indirect_jump_indexed);
2012 restore_regs(reglist);
2013
2014 emit_readword(&readmem_dword,temp2);
2015 int temp=addr; //hmh
2016 emit_shlimm(addr,3,temp);
2017 emit_andimm(temp,24,temp);
2018#ifdef BIG_ENDIAN_MIPS
2019 if (opcode[i]==0x2e) // SWR
2020#else
2021 if (opcode[i]==0x2a) // SWL
2022#endif
2023 emit_xorimm(temp,24,temp);
2024 emit_movimm(-1,HOST_TEMPREG);
2025 if (opcode[i]==0x2a) { // SWL
2026 emit_bic_lsr(temp2,HOST_TEMPREG,temp,temp2);
2027 emit_orrshr(rt,temp,temp2);
2028 }else{
2029 emit_bic_lsl(temp2,HOST_TEMPREG,temp,temp2);
2030 emit_orrshl(rt,temp,temp2);
2031 }
2032 emit_readword(&address,addr);
2033 emit_writeword(temp2,&word);
2034 //save_regs(reglist); // don't need to, no state changes
2035 emit_shrimm(addr,16,1);
2036 emit_movimm((u_int)writemem,0);
2037 //emit_call((int)&indirect_jump_indexed);
2038 emit_mov(15,14);
2039 emit_readword_dualindexedx4(0,1,15);
2040 emit_readword(&Count,HOST_TEMPREG);
2041 emit_readword(&next_interupt,2);
2042 emit_addimm(HOST_TEMPREG,-2*stubs[n].d-2,HOST_TEMPREG);
2043 emit_writeword(2,&last_count);
2044 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2045 if(cc<0) {
2046 emit_storereg(CCREG,HOST_TEMPREG);
2047 }
2048 restore_regs(reglist);
2049 emit_jmp(stubs[n].retaddr); // return address
2050#endif
2051}
2052
2053static void do_invstub(int n)
2054{
2055 literal_pool(20);
2056 u_int reglist=stubs[n].a;
2057 set_jump_target(stubs[n].addr, out);
2058 save_regs(reglist);
2059 if(stubs[n].b!=0) emit_mov(stubs[n].b,0);
2060 emit_call(&invalidate_addr);
2061 restore_regs(reglist);
2062 emit_jmp(stubs[n].retaddr); // return address
2063}
2064
2065void *do_dirty_stub(int i)
2066{
2067 assem_debug("do_dirty_stub %x\n",start+i*4);
2068 u_int addr=(u_int)source;
2069 // Careful about the code output here, verify_dirty needs to parse it.
2070 #ifndef HAVE_ARMV7
2071 emit_loadlp(addr,1);
2072 emit_loadlp((int)copy,2);
2073 emit_loadlp(slen*4,3);
2074 #else
2075 emit_movw(addr&0x0000FFFF,1);
2076 emit_movw(((u_int)copy)&0x0000FFFF,2);
2077 emit_movt(addr&0xFFFF0000,1);
2078 emit_movt(((u_int)copy)&0xFFFF0000,2);
2079 emit_movw(slen*4,3);
2080 #endif
2081 emit_movimm(start+i*4,0);
2082 emit_call((int)start<(int)0xC0000000?&verify_code:&verify_code_vm);
2083 void *entry = out;
2084 load_regs_entry(i);
2085 if (entry == out)
2086 entry = instr_addr[i];
2087 emit_jmp(instr_addr[i]);
2088 return entry;
2089}
2090
2091static void do_dirty_stub_ds()
2092{
2093 // Careful about the code output here, verify_dirty needs to parse it.
2094 #ifndef HAVE_ARMV7
2095 emit_loadlp((int)start<(int)0xC0000000?(int)source:(int)start,1);
2096 emit_loadlp((int)copy,2);
2097 emit_loadlp(slen*4,3);
2098 #else
2099 emit_movw(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0x0000FFFF,1);
2100 emit_movw(((u_int)copy)&0x0000FFFF,2);
2101 emit_movt(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0xFFFF0000,1);
2102 emit_movt(((u_int)copy)&0xFFFF0000,2);
2103 emit_movw(slen*4,3);
2104 #endif
2105 emit_movimm(start+1,0);
2106 emit_call(&verify_code_ds);
2107}
2108
2109/* Special assem */
2110
2111static void shift_assemble_arm(int i,struct regstat *i_regs)
2112{
2113 if(rt1[i]) {
2114 if(opcode2[i]<=0x07) // SLLV/SRLV/SRAV
2115 {
2116 signed char s,t,shift;
2117 t=get_reg(i_regs->regmap,rt1[i]);
2118 s=get_reg(i_regs->regmap,rs1[i]);
2119 shift=get_reg(i_regs->regmap,rs2[i]);
2120 if(t>=0){
2121 if(rs1[i]==0)
2122 {
2123 emit_zeroreg(t);
2124 }
2125 else if(rs2[i]==0)
2126 {
2127 assert(s>=0);
2128 if(s!=t) emit_mov(s,t);
2129 }
2130 else
2131 {
2132 emit_andimm(shift,31,HOST_TEMPREG);
2133 if(opcode2[i]==4) // SLLV
2134 {
2135 emit_shl(s,HOST_TEMPREG,t);
2136 }
2137 if(opcode2[i]==6) // SRLV
2138 {
2139 emit_shr(s,HOST_TEMPREG,t);
2140 }
2141 if(opcode2[i]==7) // SRAV
2142 {
2143 emit_sar(s,HOST_TEMPREG,t);
2144 }
2145 }
2146 }
2147 } else { // DSLLV/DSRLV/DSRAV
2148 signed char sh,sl,th,tl,shift;
2149 th=get_reg(i_regs->regmap,rt1[i]|64);
2150 tl=get_reg(i_regs->regmap,rt1[i]);
2151 sh=get_reg(i_regs->regmap,rs1[i]|64);
2152 sl=get_reg(i_regs->regmap,rs1[i]);
2153 shift=get_reg(i_regs->regmap,rs2[i]);
2154 if(tl>=0){
2155 if(rs1[i]==0)
2156 {
2157 emit_zeroreg(tl);
2158 if(th>=0) emit_zeroreg(th);
2159 }
2160 else if(rs2[i]==0)
2161 {
2162 assert(sl>=0);
2163 if(sl!=tl) emit_mov(sl,tl);
2164 if(th>=0&&sh!=th) emit_mov(sh,th);
2165 }
2166 else
2167 {
2168 // FIXME: What if shift==tl ?
2169 assert(shift!=tl);
2170 int temp=get_reg(i_regs->regmap,-1);
2171 int real_th=th;
2172 if(th<0&&opcode2[i]!=0x14) {th=temp;} // DSLLV doesn't need a temporary register
2173 assert(sl>=0);
2174 assert(sh>=0);
2175 emit_andimm(shift,31,HOST_TEMPREG);
2176 if(opcode2[i]==0x14) // DSLLV
2177 {
2178 if(th>=0) emit_shl(sh,HOST_TEMPREG,th);
2179 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
2180 emit_orrshr(sl,HOST_TEMPREG,th);
2181 emit_andimm(shift,31,HOST_TEMPREG);
2182 emit_testimm(shift,32);
2183 emit_shl(sl,HOST_TEMPREG,tl);
2184 if(th>=0) emit_cmovne_reg(tl,th);
2185 emit_cmovne_imm(0,tl);
2186 }
2187 if(opcode2[i]==0x16) // DSRLV
2188 {
2189 assert(th>=0);
2190 emit_shr(sl,HOST_TEMPREG,tl);
2191 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
2192 emit_orrshl(sh,HOST_TEMPREG,tl);
2193 emit_andimm(shift,31,HOST_TEMPREG);
2194 emit_testimm(shift,32);
2195 emit_shr(sh,HOST_TEMPREG,th);
2196 emit_cmovne_reg(th,tl);
2197 if(real_th>=0) emit_cmovne_imm(0,th);
2198 }
2199 if(opcode2[i]==0x17) // DSRAV
2200 {
2201 assert(th>=0);
2202 emit_shr(sl,HOST_TEMPREG,tl);
2203 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
2204 if(real_th>=0) {
2205 assert(temp>=0);
2206 emit_sarimm(th,31,temp);
2207 }
2208 emit_orrshl(sh,HOST_TEMPREG,tl);
2209 emit_andimm(shift,31,HOST_TEMPREG);
2210 emit_testimm(shift,32);
2211 emit_sar(sh,HOST_TEMPREG,th);
2212 emit_cmovne_reg(th,tl);
2213 if(real_th>=0) emit_cmovne_reg(temp,th);
2214 }
2215 }
2216 }
2217 }
2218 }
2219}
2220#define shift_assemble shift_assemble_arm
2221
2222static void loadlr_assemble_arm(int i,struct regstat *i_regs)
2223{
2224 int s,tl,temp,temp2,addr;
2225 int offset;
2226 void *jaddr=0;
2227 int memtarget=0,c=0;
2228 int fastload_reg_override=0;
2229 u_int hr,reglist=0;
2230 tl=get_reg(i_regs->regmap,rt1[i]);
2231 s=get_reg(i_regs->regmap,rs1[i]);
2232 temp=get_reg(i_regs->regmap,-1);
2233 temp2=get_reg(i_regs->regmap,FTEMP);
2234 addr=get_reg(i_regs->regmap,AGEN1+(i&1));
2235 assert(addr<0);
2236 offset=imm[i];
2237 for(hr=0;hr<HOST_REGS;hr++) {
2238 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
2239 }
2240 reglist|=1<<temp;
2241 if(offset||s<0||c) addr=temp2;
2242 else addr=s;
2243 if(s>=0) {
2244 c=(i_regs->wasconst>>s)&1;
2245 if(c) {
2246 memtarget=((signed int)(constmap[i][s]+offset))<(signed int)0x80000000+RAM_SIZE;
2247 }
2248 }
2249 if(!c) {
2250 emit_shlimm(addr,3,temp);
2251 if (opcode[i]==0x22||opcode[i]==0x26) {
2252 emit_andimm(addr,0xFFFFFFFC,temp2); // LWL/LWR
2253 }else{
2254 emit_andimm(addr,0xFFFFFFF8,temp2); // LDL/LDR
2255 }
2256 jaddr=emit_fastpath_cmp_jump(i,temp2,&fastload_reg_override);
2257 }
2258 else {
2259 if(ram_offset&&memtarget) {
2260 emit_addimm(temp2,ram_offset,HOST_TEMPREG);
2261 fastload_reg_override=HOST_TEMPREG;
2262 }
2263 if (opcode[i]==0x22||opcode[i]==0x26) {
2264 emit_movimm(((constmap[i][s]+offset)<<3)&24,temp); // LWL/LWR
2265 }else{
2266 emit_movimm(((constmap[i][s]+offset)<<3)&56,temp); // LDL/LDR
2267 }
2268 }
2269 if (opcode[i]==0x22||opcode[i]==0x26) { // LWL/LWR
2270 if(!c||memtarget) {
2271 int a=temp2;
2272 if(fastload_reg_override) a=fastload_reg_override;
2273 emit_readword_indexed(0,a,temp2);
2274 if(jaddr) add_stub_r(LOADW_STUB,jaddr,out,i,temp2,i_regs,ccadj[i],reglist);
2275 }
2276 else
2277 inline_readstub(LOADW_STUB,i,(constmap[i][s]+offset)&0xFFFFFFFC,i_regs->regmap,FTEMP,ccadj[i],reglist);
2278 if(rt1[i]) {
2279 assert(tl>=0);
2280 emit_andimm(temp,24,temp);
2281#ifdef BIG_ENDIAN_MIPS
2282 if (opcode[i]==0x26) // LWR
2283#else
2284 if (opcode[i]==0x22) // LWL
2285#endif
2286 emit_xorimm(temp,24,temp);
2287 emit_movimm(-1,HOST_TEMPREG);
2288 if (opcode[i]==0x26) {
2289 emit_shr(temp2,temp,temp2);
2290 emit_bic_lsr(tl,HOST_TEMPREG,temp,tl);
2291 }else{
2292 emit_shl(temp2,temp,temp2);
2293 emit_bic_lsl(tl,HOST_TEMPREG,temp,tl);
2294 }
2295 emit_or(temp2,tl,tl);
2296 }
2297 //emit_storereg(rt1[i],tl); // DEBUG
2298 }
2299 if (opcode[i]==0x1A||opcode[i]==0x1B) { // LDL/LDR
2300 assert(0);
2301 }
2302}
2303#define loadlr_assemble loadlr_assemble_arm
2304
2305static void c2op_prologue(u_int op,u_int reglist)
2306{
2307 save_regs_all(reglist);
2308#ifdef PCNT
2309 emit_movimm(op,0);
2310 emit_call((int)pcnt_gte_start);
2311#endif
2312 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0); // cop2 regs
2313}
2314
2315static void c2op_epilogue(u_int op,u_int reglist)
2316{
2317#ifdef PCNT
2318 emit_movimm(op,0);
2319 emit_call((int)pcnt_gte_end);
2320#endif
2321 restore_regs_all(reglist);
2322}
2323
2324static void c2op_call_MACtoIR(int lm,int need_flags)
2325{
2326 if(need_flags)
2327 emit_call(lm?gteMACtoIR_lm1:gteMACtoIR_lm0);
2328 else
2329 emit_call(lm?gteMACtoIR_lm1_nf:gteMACtoIR_lm0_nf);
2330}
2331
2332static void c2op_call_rgb_func(void *func,int lm,int need_ir,int need_flags)
2333{
2334 emit_call(func);
2335 // func is C code and trashes r0
2336 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
2337 if(need_flags||need_ir)
2338 c2op_call_MACtoIR(lm,need_flags);
2339 emit_call(need_flags?gteMACtoRGB:gteMACtoRGB_nf);
2340}
2341
2342static void c2op_assemble(int i,struct regstat *i_regs)
2343{
2344 u_int c2op=source[i]&0x3f;
2345 u_int hr,reglist_full=0,reglist;
2346 int need_flags,need_ir;
2347 for(hr=0;hr<HOST_REGS;hr++) {
2348 if(i_regs->regmap[hr]>=0) reglist_full|=1<<hr;
2349 }
2350 reglist=reglist_full&CALLER_SAVE_REGS;
2351
2352 if (gte_handlers[c2op]!=NULL) {
2353 need_flags=!(gte_unneeded[i+1]>>63); // +1 because of how liveness detection works
2354 need_ir=(gte_unneeded[i+1]&0xe00)!=0xe00;
2355 assem_debug("gte op %08x, unneeded %016llx, need_flags %d, need_ir %d\n",
2356 source[i],gte_unneeded[i+1],need_flags,need_ir);
2357 if(new_dynarec_hacks&NDHACK_GTE_NO_FLAGS)
2358 need_flags=0;
2359 int shift = (source[i] >> 19) & 1;
2360 int lm = (source[i] >> 10) & 1;
2361 switch(c2op) {
2362#ifndef DRC_DBG
2363 case GTE_MVMVA: {
2364#ifdef HAVE_ARMV5
2365 int v = (source[i] >> 15) & 3;
2366 int cv = (source[i] >> 13) & 3;
2367 int mx = (source[i] >> 17) & 3;
2368 reglist=reglist_full&(CALLER_SAVE_REGS|0xf0); // +{r4-r7}
2369 c2op_prologue(c2op,reglist);
2370 /* r4,r5 = VXYZ(v) packed; r6 = &MX11(mx); r7 = &CV1(cv) */
2371 if(v<3)
2372 emit_ldrd(v*8,0,4);
2373 else {
2374 emit_movzwl_indexed(9*4,0,4); // gteIR
2375 emit_movzwl_indexed(10*4,0,6);
2376 emit_movzwl_indexed(11*4,0,5);
2377 emit_orrshl_imm(6,16,4);
2378 }
2379 if(mx<3)
2380 emit_addimm(0,32*4+mx*8*4,6);
2381 else
2382 emit_readword(&zeromem_ptr,6);
2383 if(cv<3)
2384 emit_addimm(0,32*4+(cv*8+5)*4,7);
2385 else
2386 emit_readword(&zeromem_ptr,7);
2387#ifdef __ARM_NEON__
2388 emit_movimm(source[i],1); // opcode
2389 emit_call(gteMVMVA_part_neon);
2390 if(need_flags) {
2391 emit_movimm(lm,1);
2392 emit_call(gteMACtoIR_flags_neon);
2393 }
2394#else
2395 if(cv==3&&shift)
2396 emit_call((int)gteMVMVA_part_cv3sh12_arm);
2397 else {
2398 emit_movimm(shift,1);
2399 emit_call((int)(need_flags?gteMVMVA_part_arm:gteMVMVA_part_nf_arm));
2400 }
2401 if(need_flags||need_ir)
2402 c2op_call_MACtoIR(lm,need_flags);
2403#endif
2404#else /* if not HAVE_ARMV5 */
2405 c2op_prologue(c2op,reglist);
2406 emit_movimm(source[i],1); // opcode
2407 emit_writeword(1,&psxRegs.code);
2408 emit_call((int)(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]));
2409#endif
2410 break;
2411 }
2412 case GTE_OP:
2413 c2op_prologue(c2op,reglist);
2414 emit_call(shift?gteOP_part_shift:gteOP_part_noshift);
2415 if(need_flags||need_ir) {
2416 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
2417 c2op_call_MACtoIR(lm,need_flags);
2418 }
2419 break;
2420 case GTE_DPCS:
2421 c2op_prologue(c2op,reglist);
2422 c2op_call_rgb_func(shift?gteDPCS_part_shift:gteDPCS_part_noshift,lm,need_ir,need_flags);
2423 break;
2424 case GTE_INTPL:
2425 c2op_prologue(c2op,reglist);
2426 c2op_call_rgb_func(shift?gteINTPL_part_shift:gteINTPL_part_noshift,lm,need_ir,need_flags);
2427 break;
2428 case GTE_SQR:
2429 c2op_prologue(c2op,reglist);
2430 emit_call(shift?gteSQR_part_shift:gteSQR_part_noshift);
2431 if(need_flags||need_ir) {
2432 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
2433 c2op_call_MACtoIR(lm,need_flags);
2434 }
2435 break;
2436 case GTE_DCPL:
2437 c2op_prologue(c2op,reglist);
2438 c2op_call_rgb_func(gteDCPL_part,lm,need_ir,need_flags);
2439 break;
2440 case GTE_GPF:
2441 c2op_prologue(c2op,reglist);
2442 c2op_call_rgb_func(shift?gteGPF_part_shift:gteGPF_part_noshift,lm,need_ir,need_flags);
2443 break;
2444 case GTE_GPL:
2445 c2op_prologue(c2op,reglist);
2446 c2op_call_rgb_func(shift?gteGPL_part_shift:gteGPL_part_noshift,lm,need_ir,need_flags);
2447 break;
2448#endif
2449 default:
2450 c2op_prologue(c2op,reglist);
2451#ifdef DRC_DBG
2452 emit_movimm(source[i],1); // opcode
2453 emit_writeword(1,&psxRegs.code);
2454#endif
2455 emit_call(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]);
2456 break;
2457 }
2458 c2op_epilogue(c2op,reglist);
2459 }
2460}
2461
2462static void multdiv_assemble_arm(int i,struct regstat *i_regs)
2463{
2464 // case 0x18: MULT
2465 // case 0x19: MULTU
2466 // case 0x1A: DIV
2467 // case 0x1B: DIVU
2468 // case 0x1C: DMULT
2469 // case 0x1D: DMULTU
2470 // case 0x1E: DDIV
2471 // case 0x1F: DDIVU
2472 if(rs1[i]&&rs2[i])
2473 {
2474 if((opcode2[i]&4)==0) // 32-bit
2475 {
2476 if(opcode2[i]==0x18) // MULT
2477 {
2478 signed char m1=get_reg(i_regs->regmap,rs1[i]);
2479 signed char m2=get_reg(i_regs->regmap,rs2[i]);
2480 signed char hi=get_reg(i_regs->regmap,HIREG);
2481 signed char lo=get_reg(i_regs->regmap,LOREG);
2482 assert(m1>=0);
2483 assert(m2>=0);
2484 assert(hi>=0);
2485 assert(lo>=0);
2486 emit_smull(m1,m2,hi,lo);
2487 }
2488 if(opcode2[i]==0x19) // MULTU
2489 {
2490 signed char m1=get_reg(i_regs->regmap,rs1[i]);
2491 signed char m2=get_reg(i_regs->regmap,rs2[i]);
2492 signed char hi=get_reg(i_regs->regmap,HIREG);
2493 signed char lo=get_reg(i_regs->regmap,LOREG);
2494 assert(m1>=0);
2495 assert(m2>=0);
2496 assert(hi>=0);
2497 assert(lo>=0);
2498 emit_umull(m1,m2,hi,lo);
2499 }
2500 if(opcode2[i]==0x1A) // DIV
2501 {
2502 signed char d1=get_reg(i_regs->regmap,rs1[i]);
2503 signed char d2=get_reg(i_regs->regmap,rs2[i]);
2504 assert(d1>=0);
2505 assert(d2>=0);
2506 signed char quotient=get_reg(i_regs->regmap,LOREG);
2507 signed char remainder=get_reg(i_regs->regmap,HIREG);
2508 assert(quotient>=0);
2509 assert(remainder>=0);
2510 emit_movs(d1,remainder);
2511 emit_movimm(0xffffffff,quotient);
2512 emit_negmi(quotient,quotient); // .. quotient and ..
2513 emit_negmi(remainder,remainder); // .. remainder for div0 case (will be negated back after jump)
2514 emit_movs(d2,HOST_TEMPREG);
2515 emit_jeq((int)out+52); // Division by zero
2516 emit_negsmi(HOST_TEMPREG,HOST_TEMPREG);
2517#ifdef HAVE_ARMV5
2518 emit_clz(HOST_TEMPREG,quotient);
2519 emit_shl(HOST_TEMPREG,quotient,HOST_TEMPREG);
2520#else
2521 emit_movimm(0,quotient);
2522 emit_addpl_imm(quotient,1,quotient);
2523 emit_lslpls_imm(HOST_TEMPREG,1,HOST_TEMPREG);
2524 emit_jns((int)out-2*4);
2525#endif
2526 emit_orimm(quotient,1<<31,quotient);
2527 emit_shr(quotient,quotient,quotient);
2528 emit_cmp(remainder,HOST_TEMPREG);
2529 emit_subcs(remainder,HOST_TEMPREG,remainder);
2530 emit_adcs(quotient,quotient,quotient);
2531 emit_shrimm(HOST_TEMPREG,1,HOST_TEMPREG);
2532 emit_jcc(out-16); // -4
2533 emit_teq(d1,d2);
2534 emit_negmi(quotient,quotient);
2535 emit_test(d1,d1);
2536 emit_negmi(remainder,remainder);
2537 }
2538 if(opcode2[i]==0x1B) // DIVU
2539 {
2540 signed char d1=get_reg(i_regs->regmap,rs1[i]); // dividend
2541 signed char d2=get_reg(i_regs->regmap,rs2[i]); // divisor
2542 assert(d1>=0);
2543 assert(d2>=0);
2544 signed char quotient=get_reg(i_regs->regmap,LOREG);
2545 signed char remainder=get_reg(i_regs->regmap,HIREG);
2546 assert(quotient>=0);
2547 assert(remainder>=0);
2548 emit_mov(d1,remainder);
2549 emit_movimm(0xffffffff,quotient); // div0 case
2550 emit_test(d2,d2);
2551 emit_jeq((int)out+40); // Division by zero
2552#ifdef HAVE_ARMV5
2553 emit_clz(d2,HOST_TEMPREG);
2554 emit_movimm(1<<31,quotient);
2555 emit_shl(d2,HOST_TEMPREG,d2);
2556#else
2557 emit_movimm(0,HOST_TEMPREG);
2558 emit_addpl_imm(HOST_TEMPREG,1,HOST_TEMPREG);
2559 emit_lslpls_imm(d2,1,d2);
2560 emit_jns((int)out-2*4);
2561 emit_movimm(1<<31,quotient);
2562#endif
2563 emit_shr(quotient,HOST_TEMPREG,quotient);
2564 emit_cmp(remainder,d2);
2565 emit_subcs(remainder,d2,remainder);
2566 emit_adcs(quotient,quotient,quotient);
2567 emit_shrcc_imm(d2,1,d2);
2568 emit_jcc(out-16); // -4
2569 }
2570 }
2571 else // 64-bit
2572 assert(0);
2573 }
2574 else
2575 {
2576 // Multiply by zero is zero.
2577 // MIPS does not have a divide by zero exception.
2578 // The result is undefined, we return zero.
2579 signed char hr=get_reg(i_regs->regmap,HIREG);
2580 signed char lr=get_reg(i_regs->regmap,LOREG);
2581 if(hr>=0) emit_zeroreg(hr);
2582 if(lr>=0) emit_zeroreg(lr);
2583 }
2584}
2585#define multdiv_assemble multdiv_assemble_arm
2586
2587static void do_preload_rhash(int r) {
2588 // Don't need this for ARM. On x86, this puts the value 0xf8 into the
2589 // register. On ARM the hash can be done with a single instruction (below)
2590}
2591
2592static void do_preload_rhtbl(int ht) {
2593 emit_addimm(FP,(int)&mini_ht-(int)&dynarec_local,ht);
2594}
2595
2596static void do_rhash(int rs,int rh) {
2597 emit_andimm(rs,0xf8,rh);
2598}
2599
2600static void do_miniht_load(int ht,int rh) {
2601 assem_debug("ldr %s,[%s,%s]!\n",regname[rh],regname[ht],regname[rh]);
2602 output_w32(0xe7b00000|rd_rn_rm(rh,ht,rh));
2603}
2604
2605static void do_miniht_jump(int rs,int rh,int ht) {
2606 emit_cmp(rh,rs);
2607 emit_ldreq_indexed(ht,4,15);
2608 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
2609 emit_mov(rs,7);
2610 emit_jmp(jump_vaddr_reg[7]);
2611 #else
2612 emit_jmp(jump_vaddr_reg[rs]);
2613 #endif
2614}
2615
2616static void do_miniht_insert(u_int return_address,int rt,int temp) {
2617 #ifndef HAVE_ARMV7
2618 emit_movimm(return_address,rt); // PC into link register
2619 add_to_linker(out,return_address,1);
2620 emit_pcreladdr(temp);
2621 emit_writeword(rt,&mini_ht[(return_address&0xFF)>>3][0]);
2622 emit_writeword(temp,&mini_ht[(return_address&0xFF)>>3][1]);
2623 #else
2624 emit_movw(return_address&0x0000FFFF,rt);
2625 add_to_linker(out,return_address,1);
2626 emit_pcreladdr(temp);
2627 emit_writeword(temp,&mini_ht[(return_address&0xFF)>>3][1]);
2628 emit_movt(return_address&0xFFFF0000,rt);
2629 emit_writeword(rt,&mini_ht[(return_address&0xFF)>>3][0]);
2630 #endif
2631}
2632
2633static void mark_clear_cache(void *target)
2634{
2635 u_long offset = (u_char *)target - translation_cache;
2636 u_int mask = 1u << ((offset >> 12) & 31);
2637 if (!(needs_clear_cache[offset >> 17] & mask)) {
2638 char *start = (char *)((u_long)target & ~4095ul);
2639 start_tcache_write(start, start + 4096);
2640 needs_clear_cache[offset >> 17] |= mask;
2641 }
2642}
2643
2644// Clearing the cache is rather slow on ARM Linux, so mark the areas
2645// that need to be cleared, and then only clear these areas once.
2646static void do_clear_cache()
2647{
2648 int i,j;
2649 for (i=0;i<(1<<(TARGET_SIZE_2-17));i++)
2650 {
2651 u_int bitmap=needs_clear_cache[i];
2652 if(bitmap) {
2653 u_char *start, *end;
2654 for(j=0;j<32;j++)
2655 {
2656 if(bitmap&(1<<j)) {
2657 start=translation_cache+i*131072+j*4096;
2658 end=start+4095;
2659 j++;
2660 while(j<32) {
2661 if(bitmap&(1<<j)) {
2662 end+=4096;
2663 j++;
2664 }else{
2665 end_tcache_write(start, end);
2666 break;
2667 }
2668 }
2669 }
2670 }
2671 needs_clear_cache[i]=0;
2672 }
2673 }
2674}
2675
2676// CPU-architecture-specific initialization
2677static void arch_init() {
2678}
2679
2680// vim:shiftwidth=2:expandtab