drc: some more general cleanup
[pcsx_rearmed.git] / libpcsxcore / new_dynarec / assem_arm.c
... / ...
CommitLineData
1/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2 * Mupen64plus/PCSX - assem_arm.c *
3 * Copyright (C) 2009-2011 Ari64 *
4 * Copyright (C) 2010-2011 GraÅžvydas "notaz" Ignotas *
5 * *
6 * This program is free software; you can redistribute it and/or modify *
7 * it under the terms of the GNU General Public License as published by *
8 * the Free Software Foundation; either version 2 of the License, or *
9 * (at your option) any later version. *
10 * *
11 * This program is distributed in the hope that it will be useful, *
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
14 * GNU General Public License for more details. *
15 * *
16 * You should have received a copy of the GNU General Public License *
17 * along with this program; if not, write to the *
18 * Free Software Foundation, Inc., *
19 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
20 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
21
22#include "../gte.h"
23#define FLAGLESS
24#include "../gte.h"
25#undef FLAGLESS
26#include "../gte_arm.h"
27#include "../gte_neon.h"
28#include "pcnt.h"
29#include "arm_features.h"
30
31#if defined(BASE_ADDR_FIXED)
32#elif defined(BASE_ADDR_DYNAMIC)
33u_char *translation_cache;
34#else
35u_char translation_cache[1 << TARGET_SIZE_2] __attribute__((aligned(4096)));
36#endif
37
38#ifndef __MACH__
39#define CALLER_SAVE_REGS 0x100f
40#else
41#define CALLER_SAVE_REGS 0x120f
42#endif
43
44#define unused __attribute__((unused))
45
46#ifdef DRC_DBG
47#pragma GCC diagnostic ignored "-Wunused-function"
48#pragma GCC diagnostic ignored "-Wunused-variable"
49#pragma GCC diagnostic ignored "-Wunused-but-set-variable"
50#endif
51
52void indirect_jump_indexed();
53void indirect_jump();
54void do_interrupt();
55void jump_vaddr_r0();
56void jump_vaddr_r1();
57void jump_vaddr_r2();
58void jump_vaddr_r3();
59void jump_vaddr_r4();
60void jump_vaddr_r5();
61void jump_vaddr_r6();
62void jump_vaddr_r7();
63void jump_vaddr_r8();
64void jump_vaddr_r9();
65void jump_vaddr_r10();
66void jump_vaddr_r12();
67
68void * const jump_vaddr_reg[16] = {
69 jump_vaddr_r0,
70 jump_vaddr_r1,
71 jump_vaddr_r2,
72 jump_vaddr_r3,
73 jump_vaddr_r4,
74 jump_vaddr_r5,
75 jump_vaddr_r6,
76 jump_vaddr_r7,
77 jump_vaddr_r8,
78 jump_vaddr_r9,
79 jump_vaddr_r10,
80 0,
81 jump_vaddr_r12,
82 0,
83 0,
84 0
85};
86
87void invalidate_addr_r0();
88void invalidate_addr_r1();
89void invalidate_addr_r2();
90void invalidate_addr_r3();
91void invalidate_addr_r4();
92void invalidate_addr_r5();
93void invalidate_addr_r6();
94void invalidate_addr_r7();
95void invalidate_addr_r8();
96void invalidate_addr_r9();
97void invalidate_addr_r10();
98void invalidate_addr_r12();
99
100const u_int invalidate_addr_reg[16] = {
101 (int)invalidate_addr_r0,
102 (int)invalidate_addr_r1,
103 (int)invalidate_addr_r2,
104 (int)invalidate_addr_r3,
105 (int)invalidate_addr_r4,
106 (int)invalidate_addr_r5,
107 (int)invalidate_addr_r6,
108 (int)invalidate_addr_r7,
109 (int)invalidate_addr_r8,
110 (int)invalidate_addr_r9,
111 (int)invalidate_addr_r10,
112 0,
113 (int)invalidate_addr_r12,
114 0,
115 0,
116 0};
117
118static u_int needs_clear_cache[1<<(TARGET_SIZE_2-17)];
119
120/* Linker */
121
122static void set_jump_target(void *addr, void *target_)
123{
124 u_int target = (u_int)target_;
125 u_char *ptr = addr;
126 u_int *ptr2=(u_int *)ptr;
127 if(ptr[3]==0xe2) {
128 assert((target-(u_int)ptr2-8)<1024);
129 assert(((uintptr_t)addr&3)==0);
130 assert((target&3)==0);
131 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
132 //printf("target=%x addr=%p insn=%x\n",target,addr,*ptr2);
133 }
134 else if(ptr[3]==0x72) {
135 // generated by emit_jno_unlikely
136 if((target-(u_int)ptr2-8)<1024) {
137 assert(((uintptr_t)addr&3)==0);
138 assert((target&3)==0);
139 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
140 }
141 else if((target-(u_int)ptr2-8)<4096&&!((target-(u_int)ptr2-8)&15)) {
142 assert(((uintptr_t)addr&3)==0);
143 assert((target&3)==0);
144 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>4)|0xE00;
145 }
146 else *ptr2=(0x7A000000)|(((target-(u_int)ptr2-8)<<6)>>8);
147 }
148 else {
149 assert((ptr[3]&0x0e)==0xa);
150 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
151 }
152}
153
154// This optionally copies the instruction from the target of the branch into
155// the space before the branch. Works, but the difference in speed is
156// usually insignificant.
157#if 0
158static void set_jump_target_fillslot(int addr,u_int target,int copy)
159{
160 u_char *ptr=(u_char *)addr;
161 u_int *ptr2=(u_int *)ptr;
162 assert(!copy||ptr2[-1]==0xe28dd000);
163 if(ptr[3]==0xe2) {
164 assert(!copy);
165 assert((target-(u_int)ptr2-8)<4096);
166 *ptr2=(*ptr2&0xFFFFF000)|(target-(u_int)ptr2-8);
167 }
168 else {
169 assert((ptr[3]&0x0e)==0xa);
170 u_int target_insn=*(u_int *)target;
171 if((target_insn&0x0e100000)==0) { // ALU, no immediate, no flags
172 copy=0;
173 }
174 if((target_insn&0x0c100000)==0x04100000) { // Load
175 copy=0;
176 }
177 if(target_insn&0x08000000) {
178 copy=0;
179 }
180 if(copy) {
181 ptr2[-1]=target_insn;
182 target+=4;
183 }
184 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
185 }
186}
187#endif
188
189/* Literal pool */
190static void add_literal(int addr,int val)
191{
192 assert(literalcount<sizeof(literals)/sizeof(literals[0]));
193 literals[literalcount][0]=addr;
194 literals[literalcount][1]=val;
195 literalcount++;
196}
197
198// from a pointer to external jump stub (which was produced by emit_extjump2)
199// find where the jumping insn is
200static void *find_extjump_insn(void *stub)
201{
202 int *ptr=(int *)(stub+4);
203 assert((*ptr&0x0fff0000)==0x059f0000); // ldr rx, [pc, #ofs]
204 u_int offset=*ptr&0xfff;
205 void **l_ptr=(void *)ptr+offset+8;
206 return *l_ptr;
207}
208
209// find where external branch is liked to using addr of it's stub:
210// get address that insn one after stub loads (dyna_linker arg1),
211// treat it as a pointer to branch insn,
212// return addr where that branch jumps to
213static void *get_pointer(void *stub)
214{
215 //printf("get_pointer(%x)\n",(int)stub);
216 int *i_ptr=find_extjump_insn(stub);
217 assert((*i_ptr&0x0f000000)==0x0a000000);
218 return (u_char *)i_ptr+((*i_ptr<<8)>>6)+8;
219}
220
221// Find the "clean" entry point from a "dirty" entry point
222// by skipping past the call to verify_code
223static void *get_clean_addr(void *addr)
224{
225 signed int *ptr = addr;
226 #ifndef HAVE_ARMV7
227 ptr+=4;
228 #else
229 ptr+=6;
230 #endif
231 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
232 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
233 ptr++;
234 if((*ptr&0xFF000000)==0xea000000) {
235 return (char *)ptr+((*ptr<<8)>>6)+8; // follow jump
236 }
237 return ptr;
238}
239
240static int verify_dirty(u_int *ptr)
241{
242 #ifndef HAVE_ARMV7
243 u_int offset;
244 // get from literal pool
245 assert((*ptr&0xFFFF0000)==0xe59f0000);
246 offset=*ptr&0xfff;
247 u_int source=*(u_int*)((void *)ptr+offset+8);
248 ptr++;
249 assert((*ptr&0xFFFF0000)==0xe59f0000);
250 offset=*ptr&0xfff;
251 u_int copy=*(u_int*)((void *)ptr+offset+8);
252 ptr++;
253 assert((*ptr&0xFFFF0000)==0xe59f0000);
254 offset=*ptr&0xfff;
255 u_int len=*(u_int*)((void *)ptr+offset+8);
256 ptr++;
257 ptr++;
258 #else
259 // ARMv7 movw/movt
260 assert((*ptr&0xFFF00000)==0xe3000000);
261 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
262 u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
263 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
264 ptr+=6;
265 #endif
266 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
267 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
268 //printf("verify_dirty: %x %x %x\n",source,copy,len);
269 return !memcmp((void *)source,(void *)copy,len);
270}
271
272// This doesn't necessarily find all clean entry points, just
273// guarantees that it's not dirty
274static int isclean(void *addr)
275{
276 #ifndef HAVE_ARMV7
277 u_int *ptr=((u_int *)addr)+4;
278 #else
279 u_int *ptr=((u_int *)addr)+6;
280 #endif
281 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
282 if((*ptr&0xFF000000)!=0xeb000000) return 1; // bl instruction
283 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code) return 0;
284 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_ds) return 0;
285 return 1;
286}
287
288// get source that block at addr was compiled from (host pointers)
289static void get_bounds(void *addr, u_char **start, u_char **end)
290{
291 u_int *ptr = addr;
292 #ifndef HAVE_ARMV7
293 u_int offset;
294 // get from literal pool
295 assert((*ptr&0xFFFF0000)==0xe59f0000);
296 offset=*ptr&0xfff;
297 u_int source=*(u_int*)((void *)ptr+offset+8);
298 ptr++;
299 //assert((*ptr&0xFFFF0000)==0xe59f0000);
300 //offset=*ptr&0xfff;
301 //u_int copy=*(u_int*)((void *)ptr+offset+8);
302 ptr++;
303 assert((*ptr&0xFFFF0000)==0xe59f0000);
304 offset=*ptr&0xfff;
305 u_int len=*(u_int*)((void *)ptr+offset+8);
306 ptr++;
307 ptr++;
308 #else
309 // ARMv7 movw/movt
310 assert((*ptr&0xFFF00000)==0xe3000000);
311 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
312 //u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
313 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
314 ptr+=6;
315 #endif
316 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
317 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
318 *start=(u_char *)source;
319 *end=(u_char *)source+len;
320}
321
322// Allocate a specific ARM register.
323static void alloc_arm_reg(struct regstat *cur,int i,signed char reg,int hr)
324{
325 int n;
326 int dirty=0;
327
328 // see if it's already allocated (and dealloc it)
329 for(n=0;n<HOST_REGS;n++)
330 {
331 if(n!=EXCLUDE_REG&&cur->regmap[n]==reg) {
332 dirty=(cur->dirty>>n)&1;
333 cur->regmap[n]=-1;
334 }
335 }
336
337 cur->regmap[hr]=reg;
338 cur->dirty&=~(1<<hr);
339 cur->dirty|=dirty<<hr;
340 cur->isconst&=~(1<<hr);
341}
342
343// Alloc cycle count into dedicated register
344static void alloc_cc(struct regstat *cur,int i)
345{
346 alloc_arm_reg(cur,i,CCREG,HOST_CCREG);
347}
348
349/* Assembler */
350
351static unused char regname[16][4] = {
352 "r0",
353 "r1",
354 "r2",
355 "r3",
356 "r4",
357 "r5",
358 "r6",
359 "r7",
360 "r8",
361 "r9",
362 "r10",
363 "fp",
364 "r12",
365 "sp",
366 "lr",
367 "pc"};
368
369static void output_w32(u_int word)
370{
371 *((u_int *)out)=word;
372 out+=4;
373}
374
375static u_int rd_rn_rm(u_int rd, u_int rn, u_int rm)
376{
377 assert(rd<16);
378 assert(rn<16);
379 assert(rm<16);
380 return((rn<<16)|(rd<<12)|rm);
381}
382
383static u_int rd_rn_imm_shift(u_int rd, u_int rn, u_int imm, u_int shift)
384{
385 assert(rd<16);
386 assert(rn<16);
387 assert(imm<256);
388 assert((shift&1)==0);
389 return((rn<<16)|(rd<<12)|(((32-shift)&30)<<7)|imm);
390}
391
392static u_int genimm(u_int imm,u_int *encoded)
393{
394 *encoded=0;
395 if(imm==0) return 1;
396 int i=32;
397 while(i>0)
398 {
399 if(imm<256) {
400 *encoded=((i&30)<<7)|imm;
401 return 1;
402 }
403 imm=(imm>>2)|(imm<<30);i-=2;
404 }
405 return 0;
406}
407
408static void genimm_checked(u_int imm,u_int *encoded)
409{
410 u_int ret=genimm(imm,encoded);
411 assert(ret);
412 (void)ret;
413}
414
415static u_int genjmp(u_int addr)
416{
417 if (addr < 3) return 0; // a branch that will be patched later
418 int offset = addr-(int)out-8;
419 if (offset < -33554432 || offset >= 33554432) {
420 SysPrintf("genjmp: out of range: %08x\n", offset);
421 abort();
422 return 0;
423 }
424 return ((u_int)offset>>2)&0xffffff;
425}
426
427static void emit_mov(int rs,int rt)
428{
429 assem_debug("mov %s,%s\n",regname[rt],regname[rs]);
430 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs));
431}
432
433static void emit_movs(int rs,int rt)
434{
435 assem_debug("movs %s,%s\n",regname[rt],regname[rs]);
436 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs));
437}
438
439static void emit_add(int rs1,int rs2,int rt)
440{
441 assem_debug("add %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
442 output_w32(0xe0800000|rd_rn_rm(rt,rs1,rs2));
443}
444
445static void emit_adcs(int rs1,int rs2,int rt)
446{
447 assem_debug("adcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
448 output_w32(0xe0b00000|rd_rn_rm(rt,rs1,rs2));
449}
450
451static void emit_neg(int rs, int rt)
452{
453 assem_debug("rsb %s,%s,#0\n",regname[rt],regname[rs]);
454 output_w32(0xe2600000|rd_rn_rm(rt,rs,0));
455}
456
457static void emit_sub(int rs1,int rs2,int rt)
458{
459 assem_debug("sub %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
460 output_w32(0xe0400000|rd_rn_rm(rt,rs1,rs2));
461}
462
463static void emit_zeroreg(int rt)
464{
465 assem_debug("mov %s,#0\n",regname[rt]);
466 output_w32(0xe3a00000|rd_rn_rm(rt,0,0));
467}
468
469static void emit_loadlp(u_int imm,u_int rt)
470{
471 add_literal((int)out,imm);
472 assem_debug("ldr %s,pc+? [=%x]\n",regname[rt],imm);
473 output_w32(0xe5900000|rd_rn_rm(rt,15,0));
474}
475
476static void emit_movw(u_int imm,u_int rt)
477{
478 assert(imm<65536);
479 assem_debug("movw %s,#%d (0x%x)\n",regname[rt],imm,imm);
480 output_w32(0xe3000000|rd_rn_rm(rt,0,0)|(imm&0xfff)|((imm<<4)&0xf0000));
481}
482
483static void emit_movt(u_int imm,u_int rt)
484{
485 assem_debug("movt %s,#%d (0x%x)\n",regname[rt],imm&0xffff0000,imm&0xffff0000);
486 output_w32(0xe3400000|rd_rn_rm(rt,0,0)|((imm>>16)&0xfff)|((imm>>12)&0xf0000));
487}
488
489static void emit_movimm(u_int imm,u_int rt)
490{
491 u_int armval;
492 if(genimm(imm,&armval)) {
493 assem_debug("mov %s,#%d\n",regname[rt],imm);
494 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
495 }else if(genimm(~imm,&armval)) {
496 assem_debug("mvn %s,#%d\n",regname[rt],imm);
497 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
498 }else if(imm<65536) {
499 #ifndef HAVE_ARMV7
500 assem_debug("mov %s,#%d\n",regname[rt],imm&0xFF00);
501 output_w32(0xe3a00000|rd_rn_imm_shift(rt,0,imm>>8,8));
502 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
503 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
504 #else
505 emit_movw(imm,rt);
506 #endif
507 }else{
508 #ifndef HAVE_ARMV7
509 emit_loadlp(imm,rt);
510 #else
511 emit_movw(imm&0x0000FFFF,rt);
512 emit_movt(imm&0xFFFF0000,rt);
513 #endif
514 }
515}
516
517static void emit_pcreladdr(u_int rt)
518{
519 assem_debug("add %s,pc,#?\n",regname[rt]);
520 output_w32(0xe2800000|rd_rn_rm(rt,15,0));
521}
522
523static void emit_loadreg(int r, int hr)
524{
525 if(r&64) {
526 SysPrintf("64bit load in 32bit mode!\n");
527 assert(0);
528 return;
529 }
530 if((r&63)==0)
531 emit_zeroreg(hr);
532 else {
533 int addr = (int)&psxRegs.GPR.r[r];
534 switch (r) {
535 //case HIREG: addr = &hi; break;
536 //case LOREG: addr = &lo; break;
537 case CCREG: addr = (int)&cycle_count; break;
538 case CSREG: addr = (int)&Status; break;
539 case INVCP: addr = (int)&invc_ptr; break;
540 default: assert(r < 34); break;
541 }
542 u_int offset = addr-(u_int)&dynarec_local;
543 assert(offset<4096);
544 assem_debug("ldr %s,fp+%d\n",regname[hr],offset);
545 output_w32(0xe5900000|rd_rn_rm(hr,FP,0)|offset);
546 }
547}
548
549static void emit_storereg(int r, int hr)
550{
551 if(r&64) {
552 SysPrintf("64bit store in 32bit mode!\n");
553 assert(0);
554 return;
555 }
556 int addr = (int)&psxRegs.GPR.r[r];
557 switch (r) {
558 //case HIREG: addr = &hi; break;
559 //case LOREG: addr = &lo; break;
560 case CCREG: addr = (int)&cycle_count; break;
561 default: assert(r < 34); break;
562 }
563 u_int offset = addr-(u_int)&dynarec_local;
564 assert(offset<4096);
565 assem_debug("str %s,fp+%d\n",regname[hr],offset);
566 output_w32(0xe5800000|rd_rn_rm(hr,FP,0)|offset);
567}
568
569static void emit_test(int rs, int rt)
570{
571 assem_debug("tst %s,%s\n",regname[rs],regname[rt]);
572 output_w32(0xe1100000|rd_rn_rm(0,rs,rt));
573}
574
575static void emit_testimm(int rs,int imm)
576{
577 u_int armval;
578 assem_debug("tst %s,#%d\n",regname[rs],imm);
579 genimm_checked(imm,&armval);
580 output_w32(0xe3100000|rd_rn_rm(0,rs,0)|armval);
581}
582
583static void emit_testeqimm(int rs,int imm)
584{
585 u_int armval;
586 assem_debug("tsteq %s,$%d\n",regname[rs],imm);
587 genimm_checked(imm,&armval);
588 output_w32(0x03100000|rd_rn_rm(0,rs,0)|armval);
589}
590
591static void emit_not(int rs,int rt)
592{
593 assem_debug("mvn %s,%s\n",regname[rt],regname[rs]);
594 output_w32(0xe1e00000|rd_rn_rm(rt,0,rs));
595}
596
597static void emit_mvnmi(int rs,int rt)
598{
599 assem_debug("mvnmi %s,%s\n",regname[rt],regname[rs]);
600 output_w32(0x41e00000|rd_rn_rm(rt,0,rs));
601}
602
603static void emit_and(u_int rs1,u_int rs2,u_int rt)
604{
605 assem_debug("and %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
606 output_w32(0xe0000000|rd_rn_rm(rt,rs1,rs2));
607}
608
609static void emit_or(u_int rs1,u_int rs2,u_int rt)
610{
611 assem_debug("orr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
612 output_w32(0xe1800000|rd_rn_rm(rt,rs1,rs2));
613}
614
615static void emit_orrshl_imm(u_int rs,u_int imm,u_int rt)
616{
617 assert(rs<16);
618 assert(rt<16);
619 assert(imm<32);
620 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs],imm);
621 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|(imm<<7));
622}
623
624static void emit_orrshr_imm(u_int rs,u_int imm,u_int rt)
625{
626 assert(rs<16);
627 assert(rt<16);
628 assert(imm<32);
629 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs],imm);
630 output_w32(0xe1800020|rd_rn_rm(rt,rt,rs)|(imm<<7));
631}
632
633static void emit_xor(u_int rs1,u_int rs2,u_int rt)
634{
635 assem_debug("eor %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
636 output_w32(0xe0200000|rd_rn_rm(rt,rs1,rs2));
637}
638
639static void emit_addimm(u_int rs,int imm,u_int rt)
640{
641 assert(rs<16);
642 assert(rt<16);
643 if(imm!=0) {
644 u_int armval;
645 if(genimm(imm,&armval)) {
646 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],imm);
647 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
648 }else if(genimm(-imm,&armval)) {
649 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],-imm);
650 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
651 #ifdef HAVE_ARMV7
652 }else if(rt!=rs&&(u_int)imm<65536) {
653 emit_movw(imm&0x0000ffff,rt);
654 emit_add(rs,rt,rt);
655 }else if(rt!=rs&&(u_int)-imm<65536) {
656 emit_movw(-imm&0x0000ffff,rt);
657 emit_sub(rs,rt,rt);
658 #endif
659 }else if((u_int)-imm<65536) {
660 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],(-imm)&0xFF00);
661 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
662 output_w32(0xe2400000|rd_rn_imm_shift(rt,rs,(-imm)>>8,8));
663 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
664 }else {
665 do {
666 int shift = (ffs(imm) - 1) & ~1;
667 int imm8 = imm & (0xff << shift);
668 genimm_checked(imm8,&armval);
669 assem_debug("add %s,%s,#0x%x\n",regname[rt],regname[rs],imm8);
670 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
671 rs = rt;
672 imm &= ~imm8;
673 }
674 while (imm != 0);
675 }
676 }
677 else if(rs!=rt) emit_mov(rs,rt);
678}
679
680static void emit_addimm_and_set_flags(int imm,int rt)
681{
682 assert(imm>-65536&&imm<65536);
683 u_int armval;
684 if(genimm(imm,&armval)) {
685 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm);
686 output_w32(0xe2900000|rd_rn_rm(rt,rt,0)|armval);
687 }else if(genimm(-imm,&armval)) {
688 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],imm);
689 output_w32(0xe2500000|rd_rn_rm(rt,rt,0)|armval);
690 }else if(imm<0) {
691 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF00);
692 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
693 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)>>8,8));
694 output_w32(0xe2500000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
695 }else{
696 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF00);
697 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
698 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm>>8,8));
699 output_w32(0xe2900000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
700 }
701}
702
703static void emit_addimm_no_flags(u_int imm,u_int rt)
704{
705 emit_addimm(rt,imm,rt);
706}
707
708static void emit_addnop(u_int r)
709{
710 assert(r<16);
711 assem_debug("add %s,%s,#0 (nop)\n",regname[r],regname[r]);
712 output_w32(0xe2800000|rd_rn_rm(r,r,0));
713}
714
715static void emit_andimm(int rs,int imm,int rt)
716{
717 u_int armval;
718 if(imm==0) {
719 emit_zeroreg(rt);
720 }else if(genimm(imm,&armval)) {
721 assem_debug("and %s,%s,#%d\n",regname[rt],regname[rs],imm);
722 output_w32(0xe2000000|rd_rn_rm(rt,rs,0)|armval);
723 }else if(genimm(~imm,&armval)) {
724 assem_debug("bic %s,%s,#%d\n",regname[rt],regname[rs],imm);
725 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|armval);
726 }else if(imm==65535) {
727 #ifndef HAVE_ARMV6
728 assem_debug("bic %s,%s,#FF000000\n",regname[rt],regname[rs]);
729 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|0x4FF);
730 assem_debug("bic %s,%s,#00FF0000\n",regname[rt],regname[rt]);
731 output_w32(0xe3c00000|rd_rn_rm(rt,rt,0)|0x8FF);
732 #else
733 assem_debug("uxth %s,%s\n",regname[rt],regname[rs]);
734 output_w32(0xe6ff0070|rd_rn_rm(rt,0,rs));
735 #endif
736 }else{
737 assert(imm>0&&imm<65535);
738 #ifndef HAVE_ARMV7
739 assem_debug("mov r14,#%d\n",imm&0xFF00);
740 output_w32(0xe3a00000|rd_rn_imm_shift(HOST_TEMPREG,0,imm>>8,8));
741 assem_debug("add r14,r14,#%d\n",imm&0xFF);
742 output_w32(0xe2800000|rd_rn_imm_shift(HOST_TEMPREG,HOST_TEMPREG,imm&0xff,0));
743 #else
744 emit_movw(imm,HOST_TEMPREG);
745 #endif
746 assem_debug("and %s,%s,r14\n",regname[rt],regname[rs]);
747 output_w32(0xe0000000|rd_rn_rm(rt,rs,HOST_TEMPREG));
748 }
749}
750
751static void emit_orimm(int rs,int imm,int rt)
752{
753 u_int armval;
754 if(imm==0) {
755 if(rs!=rt) emit_mov(rs,rt);
756 }else if(genimm(imm,&armval)) {
757 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm);
758 output_w32(0xe3800000|rd_rn_rm(rt,rs,0)|armval);
759 }else{
760 assert(imm>0&&imm<65536);
761 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
762 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
763 output_w32(0xe3800000|rd_rn_imm_shift(rt,rs,imm>>8,8));
764 output_w32(0xe3800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
765 }
766}
767
768static void emit_xorimm(int rs,int imm,int rt)
769{
770 u_int armval;
771 if(imm==0) {
772 if(rs!=rt) emit_mov(rs,rt);
773 }else if(genimm(imm,&armval)) {
774 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm);
775 output_w32(0xe2200000|rd_rn_rm(rt,rs,0)|armval);
776 }else{
777 assert(imm>0&&imm<65536);
778 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
779 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
780 output_w32(0xe2200000|rd_rn_imm_shift(rt,rs,imm>>8,8));
781 output_w32(0xe2200000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
782 }
783}
784
785static void emit_shlimm(int rs,u_int imm,int rt)
786{
787 assert(imm>0);
788 assert(imm<32);
789 //if(imm==1) ...
790 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
791 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
792}
793
794static void emit_lsls_imm(int rs,int imm,int rt)
795{
796 assert(imm>0);
797 assert(imm<32);
798 assem_debug("lsls %s,%s,#%d\n",regname[rt],regname[rs],imm);
799 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs)|(imm<<7));
800}
801
802static unused void emit_lslpls_imm(int rs,int imm,int rt)
803{
804 assert(imm>0);
805 assert(imm<32);
806 assem_debug("lslpls %s,%s,#%d\n",regname[rt],regname[rs],imm);
807 output_w32(0x51b00000|rd_rn_rm(rt,0,rs)|(imm<<7));
808}
809
810static void emit_shrimm(int rs,u_int imm,int rt)
811{
812 assert(imm>0);
813 assert(imm<32);
814 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
815 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
816}
817
818static void emit_sarimm(int rs,u_int imm,int rt)
819{
820 assert(imm>0);
821 assert(imm<32);
822 assem_debug("asr %s,%s,#%d\n",regname[rt],regname[rs],imm);
823 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x40|(imm<<7));
824}
825
826static void emit_rorimm(int rs,u_int imm,int rt)
827{
828 assert(imm>0);
829 assert(imm<32);
830 assem_debug("ror %s,%s,#%d\n",regname[rt],regname[rs],imm);
831 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x60|(imm<<7));
832}
833
834static void emit_signextend16(int rs,int rt)
835{
836 #ifndef HAVE_ARMV6
837 emit_shlimm(rs,16,rt);
838 emit_sarimm(rt,16,rt);
839 #else
840 assem_debug("sxth %s,%s\n",regname[rt],regname[rs]);
841 output_w32(0xe6bf0070|rd_rn_rm(rt,0,rs));
842 #endif
843}
844
845static void emit_signextend8(int rs,int rt)
846{
847 #ifndef HAVE_ARMV6
848 emit_shlimm(rs,24,rt);
849 emit_sarimm(rt,24,rt);
850 #else
851 assem_debug("sxtb %s,%s\n",regname[rt],regname[rs]);
852 output_w32(0xe6af0070|rd_rn_rm(rt,0,rs));
853 #endif
854}
855
856static void emit_shl(u_int rs,u_int shift,u_int rt)
857{
858 assert(rs<16);
859 assert(rt<16);
860 assert(shift<16);
861 //if(imm==1) ...
862 assem_debug("lsl %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
863 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x10|(shift<<8));
864}
865
866static void emit_shr(u_int rs,u_int shift,u_int rt)
867{
868 assert(rs<16);
869 assert(rt<16);
870 assert(shift<16);
871 assem_debug("lsr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
872 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x30|(shift<<8));
873}
874
875static void emit_sar(u_int rs,u_int shift,u_int rt)
876{
877 assert(rs<16);
878 assert(rt<16);
879 assert(shift<16);
880 assem_debug("asr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
881 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x50|(shift<<8));
882}
883
884static void emit_orrshl(u_int rs,u_int shift,u_int rt)
885{
886 assert(rs<16);
887 assert(rt<16);
888 assert(shift<16);
889 assem_debug("orr %s,%s,%s,lsl %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
890 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x10|(shift<<8));
891}
892
893static void emit_orrshr(u_int rs,u_int shift,u_int rt)
894{
895 assert(rs<16);
896 assert(rt<16);
897 assert(shift<16);
898 assem_debug("orr %s,%s,%s,lsr %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
899 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x30|(shift<<8));
900}
901
902static void emit_cmpimm(int rs,int imm)
903{
904 u_int armval;
905 if(genimm(imm,&armval)) {
906 assem_debug("cmp %s,#%d\n",regname[rs],imm);
907 output_w32(0xe3500000|rd_rn_rm(0,rs,0)|armval);
908 }else if(genimm(-imm,&armval)) {
909 assem_debug("cmn %s,#%d\n",regname[rs],imm);
910 output_w32(0xe3700000|rd_rn_rm(0,rs,0)|armval);
911 }else if(imm>0) {
912 assert(imm<65536);
913 emit_movimm(imm,HOST_TEMPREG);
914 assem_debug("cmp %s,r14\n",regname[rs]);
915 output_w32(0xe1500000|rd_rn_rm(0,rs,HOST_TEMPREG));
916 }else{
917 assert(imm>-65536);
918 emit_movimm(-imm,HOST_TEMPREG);
919 assem_debug("cmn %s,r14\n",regname[rs]);
920 output_w32(0xe1700000|rd_rn_rm(0,rs,HOST_TEMPREG));
921 }
922}
923
924static void emit_cmovne_imm(int imm,int rt)
925{
926 assem_debug("movne %s,#%d\n",regname[rt],imm);
927 u_int armval;
928 genimm_checked(imm,&armval);
929 output_w32(0x13a00000|rd_rn_rm(rt,0,0)|armval);
930}
931
932static void emit_cmovl_imm(int imm,int rt)
933{
934 assem_debug("movlt %s,#%d\n",regname[rt],imm);
935 u_int armval;
936 genimm_checked(imm,&armval);
937 output_w32(0xb3a00000|rd_rn_rm(rt,0,0)|armval);
938}
939
940static void emit_cmovb_imm(int imm,int rt)
941{
942 assem_debug("movcc %s,#%d\n",regname[rt],imm);
943 u_int armval;
944 genimm_checked(imm,&armval);
945 output_w32(0x33a00000|rd_rn_rm(rt,0,0)|armval);
946}
947
948static void emit_cmovne_reg(int rs,int rt)
949{
950 assem_debug("movne %s,%s\n",regname[rt],regname[rs]);
951 output_w32(0x11a00000|rd_rn_rm(rt,0,rs));
952}
953
954static void emit_cmovl_reg(int rs,int rt)
955{
956 assem_debug("movlt %s,%s\n",regname[rt],regname[rs]);
957 output_w32(0xb1a00000|rd_rn_rm(rt,0,rs));
958}
959
960static void emit_cmovs_reg(int rs,int rt)
961{
962 assem_debug("movmi %s,%s\n",regname[rt],regname[rs]);
963 output_w32(0x41a00000|rd_rn_rm(rt,0,rs));
964}
965
966static void emit_slti32(int rs,int imm,int rt)
967{
968 if(rs!=rt) emit_zeroreg(rt);
969 emit_cmpimm(rs,imm);
970 if(rs==rt) emit_movimm(0,rt);
971 emit_cmovl_imm(1,rt);
972}
973
974static void emit_sltiu32(int rs,int imm,int rt)
975{
976 if(rs!=rt) emit_zeroreg(rt);
977 emit_cmpimm(rs,imm);
978 if(rs==rt) emit_movimm(0,rt);
979 emit_cmovb_imm(1,rt);
980}
981
982static void emit_cmp(int rs,int rt)
983{
984 assem_debug("cmp %s,%s\n",regname[rs],regname[rt]);
985 output_w32(0xe1500000|rd_rn_rm(0,rs,rt));
986}
987
988static void emit_set_gz32(int rs, int rt)
989{
990 //assem_debug("set_gz32\n");
991 emit_cmpimm(rs,1);
992 emit_movimm(1,rt);
993 emit_cmovl_imm(0,rt);
994}
995
996static void emit_set_nz32(int rs, int rt)
997{
998 //assem_debug("set_nz32\n");
999 if(rs!=rt) emit_movs(rs,rt);
1000 else emit_test(rs,rs);
1001 emit_cmovne_imm(1,rt);
1002}
1003
1004static void emit_set_if_less32(int rs1, int rs2, int rt)
1005{
1006 //assem_debug("set if less (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1007 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1008 emit_cmp(rs1,rs2);
1009 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1010 emit_cmovl_imm(1,rt);
1011}
1012
1013static void emit_set_if_carry32(int rs1, int rs2, int rt)
1014{
1015 //assem_debug("set if carry (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1016 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1017 emit_cmp(rs1,rs2);
1018 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1019 emit_cmovb_imm(1,rt);
1020}
1021
1022static void emit_call(const void *a_)
1023{
1024 int a = (int)a_;
1025 assem_debug("bl %x (%x+%x)%s\n",a,(int)out,a-(int)out-8,func_name(a));
1026 u_int offset=genjmp(a);
1027 output_w32(0xeb000000|offset);
1028}
1029
1030static void emit_jmp(const void *a_)
1031{
1032 int a = (int)a_;
1033 assem_debug("b %x (%x+%x)%s\n",a,(int)out,a-(int)out-8,func_name(a));
1034 u_int offset=genjmp(a);
1035 output_w32(0xea000000|offset);
1036}
1037
1038static void emit_jne(const void *a_)
1039{
1040 int a = (int)a_;
1041 assem_debug("bne %x\n",a);
1042 u_int offset=genjmp(a);
1043 output_w32(0x1a000000|offset);
1044}
1045
1046static void emit_jeq(const void *a_)
1047{
1048 int a = (int)a_;
1049 assem_debug("beq %x\n",a);
1050 u_int offset=genjmp(a);
1051 output_w32(0x0a000000|offset);
1052}
1053
1054static void emit_js(const void *a_)
1055{
1056 int a = (int)a_;
1057 assem_debug("bmi %x\n",a);
1058 u_int offset=genjmp(a);
1059 output_w32(0x4a000000|offset);
1060}
1061
1062static void emit_jns(const void *a_)
1063{
1064 int a = (int)a_;
1065 assem_debug("bpl %x\n",a);
1066 u_int offset=genjmp(a);
1067 output_w32(0x5a000000|offset);
1068}
1069
1070static void emit_jl(const void *a_)
1071{
1072 int a = (int)a_;
1073 assem_debug("blt %x\n",a);
1074 u_int offset=genjmp(a);
1075 output_w32(0xba000000|offset);
1076}
1077
1078static void emit_jge(const void *a_)
1079{
1080 int a = (int)a_;
1081 assem_debug("bge %x\n",a);
1082 u_int offset=genjmp(a);
1083 output_w32(0xaa000000|offset);
1084}
1085
1086static void emit_jno(const void *a_)
1087{
1088 int a = (int)a_;
1089 assem_debug("bvc %x\n",a);
1090 u_int offset=genjmp(a);
1091 output_w32(0x7a000000|offset);
1092}
1093
1094static void emit_jc(const void *a_)
1095{
1096 int a = (int)a_;
1097 assem_debug("bcs %x\n",a);
1098 u_int offset=genjmp(a);
1099 output_w32(0x2a000000|offset);
1100}
1101
1102static void emit_jcc(const void *a_)
1103{
1104 int a = (int)a_;
1105 assem_debug("bcc %x\n",a);
1106 u_int offset=genjmp(a);
1107 output_w32(0x3a000000|offset);
1108}
1109
1110static void emit_callreg(u_int r)
1111{
1112 assert(r<15);
1113 assem_debug("blx %s\n",regname[r]);
1114 output_w32(0xe12fff30|r);
1115}
1116
1117static void emit_jmpreg(u_int r)
1118{
1119 assem_debug("mov pc,%s\n",regname[r]);
1120 output_w32(0xe1a00000|rd_rn_rm(15,0,r));
1121}
1122
1123static void emit_ret(void)
1124{
1125 emit_jmpreg(14);
1126}
1127
1128static void emit_readword_indexed(int offset, int rs, int rt)
1129{
1130 assert(offset>-4096&&offset<4096);
1131 assem_debug("ldr %s,%s+%d\n",regname[rt],regname[rs],offset);
1132 if(offset>=0) {
1133 output_w32(0xe5900000|rd_rn_rm(rt,rs,0)|offset);
1134 }else{
1135 output_w32(0xe5100000|rd_rn_rm(rt,rs,0)|(-offset));
1136 }
1137}
1138
1139static void emit_readword_dualindexedx4(int rs1, int rs2, int rt)
1140{
1141 assem_debug("ldr %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1142 output_w32(0xe7900000|rd_rn_rm(rt,rs1,rs2)|0x100);
1143}
1144
1145static void emit_ldrcc_dualindexed(int rs1, int rs2, int rt)
1146{
1147 assem_debug("ldrcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1148 output_w32(0x37900000|rd_rn_rm(rt,rs1,rs2));
1149}
1150
1151static void emit_ldrccb_dualindexed(int rs1, int rs2, int rt)
1152{
1153 assem_debug("ldrccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1154 output_w32(0x37d00000|rd_rn_rm(rt,rs1,rs2));
1155}
1156
1157static void emit_ldrccsb_dualindexed(int rs1, int rs2, int rt)
1158{
1159 assem_debug("ldrccsb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1160 output_w32(0x319000d0|rd_rn_rm(rt,rs1,rs2));
1161}
1162
1163static void emit_ldrcch_dualindexed(int rs1, int rs2, int rt)
1164{
1165 assem_debug("ldrcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1166 output_w32(0x319000b0|rd_rn_rm(rt,rs1,rs2));
1167}
1168
1169static void emit_ldrccsh_dualindexed(int rs1, int rs2, int rt)
1170{
1171 assem_debug("ldrccsh %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1172 output_w32(0x319000f0|rd_rn_rm(rt,rs1,rs2));
1173}
1174
1175static void emit_movsbl_indexed(int offset, int rs, int rt)
1176{
1177 assert(offset>-256&&offset<256);
1178 assem_debug("ldrsb %s,%s+%d\n",regname[rt],regname[rs],offset);
1179 if(offset>=0) {
1180 output_w32(0xe1d000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1181 }else{
1182 output_w32(0xe15000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1183 }
1184}
1185
1186static void emit_movswl_indexed(int offset, int rs, int rt)
1187{
1188 assert(offset>-256&&offset<256);
1189 assem_debug("ldrsh %s,%s+%d\n",regname[rt],regname[rs],offset);
1190 if(offset>=0) {
1191 output_w32(0xe1d000f0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1192 }else{
1193 output_w32(0xe15000f0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1194 }
1195}
1196
1197static void emit_movzbl_indexed(int offset, int rs, int rt)
1198{
1199 assert(offset>-4096&&offset<4096);
1200 assem_debug("ldrb %s,%s+%d\n",regname[rt],regname[rs],offset);
1201 if(offset>=0) {
1202 output_w32(0xe5d00000|rd_rn_rm(rt,rs,0)|offset);
1203 }else{
1204 output_w32(0xe5500000|rd_rn_rm(rt,rs,0)|(-offset));
1205 }
1206}
1207
1208static void emit_movzwl_indexed(int offset, int rs, int rt)
1209{
1210 assert(offset>-256&&offset<256);
1211 assem_debug("ldrh %s,%s+%d\n",regname[rt],regname[rs],offset);
1212 if(offset>=0) {
1213 output_w32(0xe1d000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1214 }else{
1215 output_w32(0xe15000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1216 }
1217}
1218
1219static void emit_ldrd(int offset, int rs, int rt)
1220{
1221 assert(offset>-256&&offset<256);
1222 assem_debug("ldrd %s,%s+%d\n",regname[rt],regname[rs],offset);
1223 if(offset>=0) {
1224 output_w32(0xe1c000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1225 }else{
1226 output_w32(0xe14000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1227 }
1228}
1229
1230static void emit_readword(void *addr, int rt)
1231{
1232 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
1233 assert(offset<4096);
1234 assem_debug("ldr %s,fp+%d\n",regname[rt],offset);
1235 output_w32(0xe5900000|rd_rn_rm(rt,FP,0)|offset);
1236}
1237
1238static void emit_writeword_indexed(int rt, int offset, int rs)
1239{
1240 assert(offset>-4096&&offset<4096);
1241 assem_debug("str %s,%s+%d\n",regname[rt],regname[rs],offset);
1242 if(offset>=0) {
1243 output_w32(0xe5800000|rd_rn_rm(rt,rs,0)|offset);
1244 }else{
1245 output_w32(0xe5000000|rd_rn_rm(rt,rs,0)|(-offset));
1246 }
1247}
1248
1249static void emit_writehword_indexed(int rt, int offset, int rs)
1250{
1251 assert(offset>-256&&offset<256);
1252 assem_debug("strh %s,%s+%d\n",regname[rt],regname[rs],offset);
1253 if(offset>=0) {
1254 output_w32(0xe1c000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1255 }else{
1256 output_w32(0xe14000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1257 }
1258}
1259
1260static void emit_writebyte_indexed(int rt, int offset, int rs)
1261{
1262 assert(offset>-4096&&offset<4096);
1263 assem_debug("strb %s,%s+%d\n",regname[rt],regname[rs],offset);
1264 if(offset>=0) {
1265 output_w32(0xe5c00000|rd_rn_rm(rt,rs,0)|offset);
1266 }else{
1267 output_w32(0xe5400000|rd_rn_rm(rt,rs,0)|(-offset));
1268 }
1269}
1270
1271static void emit_strcc_dualindexed(int rs1, int rs2, int rt)
1272{
1273 assem_debug("strcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1274 output_w32(0x37800000|rd_rn_rm(rt,rs1,rs2));
1275}
1276
1277static void emit_strccb_dualindexed(int rs1, int rs2, int rt)
1278{
1279 assem_debug("strccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1280 output_w32(0x37c00000|rd_rn_rm(rt,rs1,rs2));
1281}
1282
1283static void emit_strcch_dualindexed(int rs1, int rs2, int rt)
1284{
1285 assem_debug("strcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1286 output_w32(0x318000b0|rd_rn_rm(rt,rs1,rs2));
1287}
1288
1289static void emit_writeword(int rt, void *addr)
1290{
1291 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
1292 assert(offset<4096);
1293 assem_debug("str %s,fp+%d\n",regname[rt],offset);
1294 output_w32(0xe5800000|rd_rn_rm(rt,FP,0)|offset);
1295}
1296
1297static void emit_umull(u_int rs1, u_int rs2, u_int hi, u_int lo)
1298{
1299 assem_debug("umull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
1300 assert(rs1<16);
1301 assert(rs2<16);
1302 assert(hi<16);
1303 assert(lo<16);
1304 output_w32(0xe0800090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
1305}
1306
1307static void emit_smull(u_int rs1, u_int rs2, u_int hi, u_int lo)
1308{
1309 assem_debug("smull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
1310 assert(rs1<16);
1311 assert(rs2<16);
1312 assert(hi<16);
1313 assert(lo<16);
1314 output_w32(0xe0c00090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
1315}
1316
1317static void emit_clz(int rs,int rt)
1318{
1319 assem_debug("clz %s,%s\n",regname[rt],regname[rs]);
1320 output_w32(0xe16f0f10|rd_rn_rm(rt,0,rs));
1321}
1322
1323static void emit_subcs(int rs1,int rs2,int rt)
1324{
1325 assem_debug("subcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1326 output_w32(0x20400000|rd_rn_rm(rt,rs1,rs2));
1327}
1328
1329static void emit_shrcc_imm(int rs,u_int imm,int rt)
1330{
1331 assert(imm>0);
1332 assert(imm<32);
1333 assem_debug("lsrcc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1334 output_w32(0x31a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
1335}
1336
1337static void emit_shrne_imm(int rs,u_int imm,int rt)
1338{
1339 assert(imm>0);
1340 assert(imm<32);
1341 assem_debug("lsrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
1342 output_w32(0x11a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
1343}
1344
1345static void emit_negmi(int rs, int rt)
1346{
1347 assem_debug("rsbmi %s,%s,#0\n",regname[rt],regname[rs]);
1348 output_w32(0x42600000|rd_rn_rm(rt,rs,0));
1349}
1350
1351static void emit_negsmi(int rs, int rt)
1352{
1353 assem_debug("rsbsmi %s,%s,#0\n",regname[rt],regname[rs]);
1354 output_w32(0x42700000|rd_rn_rm(rt,rs,0));
1355}
1356
1357static void emit_bic_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
1358{
1359 assem_debug("bic %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
1360 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
1361}
1362
1363static void emit_bic_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
1364{
1365 assem_debug("bic %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
1366 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
1367}
1368
1369static void emit_teq(int rs, int rt)
1370{
1371 assem_debug("teq %s,%s\n",regname[rs],regname[rt]);
1372 output_w32(0xe1300000|rd_rn_rm(0,rs,rt));
1373}
1374
1375static void emit_rsbimm(int rs, int imm, int rt)
1376{
1377 u_int armval;
1378 genimm_checked(imm,&armval);
1379 assem_debug("rsb %s,%s,#%d\n",regname[rt],regname[rs],imm);
1380 output_w32(0xe2600000|rd_rn_rm(rt,rs,0)|armval);
1381}
1382
1383// Load 2 immediates optimizing for small code size
1384static void emit_mov2imm_compact(int imm1,u_int rt1,int imm2,u_int rt2)
1385{
1386 emit_movimm(imm1,rt1);
1387 u_int armval;
1388 if(genimm(imm2-imm1,&armval)) {
1389 assem_debug("add %s,%s,#%d\n",regname[rt2],regname[rt1],imm2-imm1);
1390 output_w32(0xe2800000|rd_rn_rm(rt2,rt1,0)|armval);
1391 }else if(genimm(imm1-imm2,&armval)) {
1392 assem_debug("sub %s,%s,#%d\n",regname[rt2],regname[rt1],imm1-imm2);
1393 output_w32(0xe2400000|rd_rn_rm(rt2,rt1,0)|armval);
1394 }
1395 else emit_movimm(imm2,rt2);
1396}
1397
1398// Conditionally select one of two immediates, optimizing for small code size
1399// This will only be called if HAVE_CMOV_IMM is defined
1400static void emit_cmov2imm_e_ne_compact(int imm1,int imm2,u_int rt)
1401{
1402 u_int armval;
1403 if(genimm(imm2-imm1,&armval)) {
1404 emit_movimm(imm1,rt);
1405 assem_debug("addne %s,%s,#%d\n",regname[rt],regname[rt],imm2-imm1);
1406 output_w32(0x12800000|rd_rn_rm(rt,rt,0)|armval);
1407 }else if(genimm(imm1-imm2,&armval)) {
1408 emit_movimm(imm1,rt);
1409 assem_debug("subne %s,%s,#%d\n",regname[rt],regname[rt],imm1-imm2);
1410 output_w32(0x12400000|rd_rn_rm(rt,rt,0)|armval);
1411 }
1412 else {
1413 #ifndef HAVE_ARMV7
1414 emit_movimm(imm1,rt);
1415 add_literal((int)out,imm2);
1416 assem_debug("ldrne %s,pc+? [=%x]\n",regname[rt],imm2);
1417 output_w32(0x15900000|rd_rn_rm(rt,15,0));
1418 #else
1419 emit_movw(imm1&0x0000FFFF,rt);
1420 if((imm1&0xFFFF)!=(imm2&0xFFFF)) {
1421 assem_debug("movwne %s,#%d (0x%x)\n",regname[rt],imm2&0xFFFF,imm2&0xFFFF);
1422 output_w32(0x13000000|rd_rn_rm(rt,0,0)|(imm2&0xfff)|((imm2<<4)&0xf0000));
1423 }
1424 emit_movt(imm1&0xFFFF0000,rt);
1425 if((imm1&0xFFFF0000)!=(imm2&0xFFFF0000)) {
1426 assem_debug("movtne %s,#%d (0x%x)\n",regname[rt],imm2&0xffff0000,imm2&0xffff0000);
1427 output_w32(0x13400000|rd_rn_rm(rt,0,0)|((imm2>>16)&0xfff)|((imm2>>12)&0xf0000));
1428 }
1429 #endif
1430 }
1431}
1432
1433// special case for checking invalid_code
1434static void emit_cmpmem_indexedsr12_reg(int base,int r,int imm)
1435{
1436 assert(imm<128&&imm>=0);
1437 assert(r>=0&&r<16);
1438 assem_debug("ldrb lr,%s,%s lsr #12\n",regname[base],regname[r]);
1439 output_w32(0xe7d00000|rd_rn_rm(HOST_TEMPREG,base,r)|0x620);
1440 emit_cmpimm(HOST_TEMPREG,imm);
1441}
1442
1443static void emit_callne(int a)
1444{
1445 assem_debug("blne %x\n",a);
1446 u_int offset=genjmp(a);
1447 output_w32(0x1b000000|offset);
1448}
1449
1450// Used to preload hash table entries
1451static unused void emit_prefetchreg(int r)
1452{
1453 assem_debug("pld %s\n",regname[r]);
1454 output_w32(0xf5d0f000|rd_rn_rm(0,r,0));
1455}
1456
1457// Special case for mini_ht
1458static void emit_ldreq_indexed(int rs, u_int offset, int rt)
1459{
1460 assert(offset<4096);
1461 assem_debug("ldreq %s,[%s, #%d]\n",regname[rt],regname[rs],offset);
1462 output_w32(0x05900000|rd_rn_rm(rt,rs,0)|offset);
1463}
1464
1465static void emit_orrne_imm(int rs,int imm,int rt)
1466{
1467 u_int armval;
1468 genimm_checked(imm,&armval);
1469 assem_debug("orrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
1470 output_w32(0x13800000|rd_rn_rm(rt,rs,0)|armval);
1471}
1472
1473static void emit_andne_imm(int rs,int imm,int rt)
1474{
1475 u_int armval;
1476 genimm_checked(imm,&armval);
1477 assem_debug("andne %s,%s,#%d\n",regname[rt],regname[rs],imm);
1478 output_w32(0x12000000|rd_rn_rm(rt,rs,0)|armval);
1479}
1480
1481static unused void emit_addpl_imm(int rs,int imm,int rt)
1482{
1483 u_int armval;
1484 genimm_checked(imm,&armval);
1485 assem_debug("addpl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1486 output_w32(0x52800000|rd_rn_rm(rt,rs,0)|armval);
1487}
1488
1489static void emit_jno_unlikely(int a)
1490{
1491 //emit_jno(a);
1492 assem_debug("addvc pc,pc,#? (%x)\n",/*a-(int)out-8,*/a);
1493 output_w32(0x72800000|rd_rn_rm(15,15,0));
1494}
1495
1496static void save_regs_all(u_int reglist)
1497{
1498 int i;
1499 if(!reglist) return;
1500 assem_debug("stmia fp,{");
1501 for(i=0;i<16;i++)
1502 if(reglist&(1<<i))
1503 assem_debug("r%d,",i);
1504 assem_debug("}\n");
1505 output_w32(0xe88b0000|reglist);
1506}
1507
1508static void restore_regs_all(u_int reglist)
1509{
1510 int i;
1511 if(!reglist) return;
1512 assem_debug("ldmia fp,{");
1513 for(i=0;i<16;i++)
1514 if(reglist&(1<<i))
1515 assem_debug("r%d,",i);
1516 assem_debug("}\n");
1517 output_w32(0xe89b0000|reglist);
1518}
1519
1520// Save registers before function call
1521static void save_regs(u_int reglist)
1522{
1523 reglist&=CALLER_SAVE_REGS; // only save the caller-save registers, r0-r3, r12
1524 save_regs_all(reglist);
1525}
1526
1527// Restore registers after function call
1528static void restore_regs(u_int reglist)
1529{
1530 reglist&=CALLER_SAVE_REGS;
1531 restore_regs_all(reglist);
1532}
1533
1534/* Stubs/epilogue */
1535
1536static void literal_pool(int n)
1537{
1538 if(!literalcount) return;
1539 if(n) {
1540 if((int)out-literals[0][0]<4096-n) return;
1541 }
1542 u_int *ptr;
1543 int i;
1544 for(i=0;i<literalcount;i++)
1545 {
1546 u_int l_addr=(u_int)out;
1547 int j;
1548 for(j=0;j<i;j++) {
1549 if(literals[j][1]==literals[i][1]) {
1550 //printf("dup %08x\n",literals[i][1]);
1551 l_addr=literals[j][0];
1552 break;
1553 }
1554 }
1555 ptr=(u_int *)literals[i][0];
1556 u_int offset=l_addr-(u_int)ptr-8;
1557 assert(offset<4096);
1558 assert(!(offset&3));
1559 *ptr|=offset;
1560 if(l_addr==(u_int)out) {
1561 literals[i][0]=l_addr; // remember for dupes
1562 output_w32(literals[i][1]);
1563 }
1564 }
1565 literalcount=0;
1566}
1567
1568static void literal_pool_jumpover(int n)
1569{
1570 if(!literalcount) return;
1571 if(n) {
1572 if((int)out-literals[0][0]<4096-n) return;
1573 }
1574 void *jaddr = out;
1575 emit_jmp(0);
1576 literal_pool(0);
1577 set_jump_target(jaddr, out);
1578}
1579
1580// parsed by get_pointer, find_extjump_insn
1581static void emit_extjump2(u_char *addr, u_int target, void *linker)
1582{
1583 u_char *ptr=(u_char *)addr;
1584 assert((ptr[3]&0x0e)==0xa);
1585 (void)ptr;
1586
1587 emit_loadlp(target,0);
1588 emit_loadlp((u_int)addr,1);
1589 assert(addr>=translation_cache&&addr<(translation_cache+(1<<TARGET_SIZE_2)));
1590 //assert((target>=0x80000000&&target<0x80800000)||(target>0xA4000000&&target<0xA4001000));
1591//DEBUG >
1592#ifdef DEBUG_CYCLE_COUNT
1593 emit_readword(&last_count,ECX);
1594 emit_add(HOST_CCREG,ECX,HOST_CCREG);
1595 emit_readword(&next_interupt,ECX);
1596 emit_writeword(HOST_CCREG,&Count);
1597 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
1598 emit_writeword(ECX,&last_count);
1599#endif
1600//DEBUG <
1601 emit_jmp(linker);
1602}
1603
1604// put rt_val into rt, potentially making use of rs with value rs_val
1605static void emit_movimm_from(u_int rs_val,int rs,u_int rt_val,int rt)
1606{
1607 u_int armval;
1608 int diff;
1609 if(genimm(rt_val,&armval)) {
1610 assem_debug("mov %s,#%d\n",regname[rt],rt_val);
1611 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
1612 return;
1613 }
1614 if(genimm(~rt_val,&armval)) {
1615 assem_debug("mvn %s,#%d\n",regname[rt],rt_val);
1616 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
1617 return;
1618 }
1619 diff=rt_val-rs_val;
1620 if(genimm(diff,&armval)) {
1621 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],diff);
1622 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
1623 return;
1624 }else if(genimm(-diff,&armval)) {
1625 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],-diff);
1626 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
1627 return;
1628 }
1629 emit_movimm(rt_val,rt);
1630}
1631
1632// return 1 if above function can do it's job cheaply
1633static int is_similar_value(u_int v1,u_int v2)
1634{
1635 u_int xs;
1636 int diff;
1637 if(v1==v2) return 1;
1638 diff=v2-v1;
1639 for(xs=diff;xs!=0&&(xs&3)==0;xs>>=2)
1640 ;
1641 if(xs<0x100) return 1;
1642 for(xs=-diff;xs!=0&&(xs&3)==0;xs>>=2)
1643 ;
1644 if(xs<0x100) return 1;
1645 return 0;
1646}
1647
1648static void mov_loadtype_adj(enum stub_type type,int rs,int rt)
1649{
1650 switch(type) {
1651 case LOADB_STUB: emit_signextend8(rs,rt); break;
1652 case LOADBU_STUB: emit_andimm(rs,0xff,rt); break;
1653 case LOADH_STUB: emit_signextend16(rs,rt); break;
1654 case LOADHU_STUB: emit_andimm(rs,0xffff,rt); break;
1655 case LOADW_STUB: if(rs!=rt) emit_mov(rs,rt); break;
1656 default: assert(0);
1657 }
1658}
1659
1660#include "pcsxmem.h"
1661#include "pcsxmem_inline.c"
1662
1663static void do_readstub(int n)
1664{
1665 assem_debug("do_readstub %x\n",start+stubs[n].a*4);
1666 literal_pool(256);
1667 set_jump_target(stubs[n].addr, out);
1668 enum stub_type type=stubs[n].type;
1669 int i=stubs[n].a;
1670 int rs=stubs[n].b;
1671 struct regstat *i_regs=(struct regstat *)stubs[n].c;
1672 u_int reglist=stubs[n].e;
1673 signed char *i_regmap=i_regs->regmap;
1674 int rt;
1675 if(itype[i]==C1LS||itype[i]==C2LS||itype[i]==LOADLR) {
1676 rt=get_reg(i_regmap,FTEMP);
1677 }else{
1678 rt=get_reg(i_regmap,rt1[i]);
1679 }
1680 assert(rs>=0);
1681 int r,temp=-1,temp2=HOST_TEMPREG,regs_saved=0;
1682 void *restore_jump = NULL;
1683 reglist|=(1<<rs);
1684 for(r=0;r<=12;r++) {
1685 if(((1<<r)&0x13ff)&&((1<<r)&reglist)==0) {
1686 temp=r; break;
1687 }
1688 }
1689 if(rt>=0&&rt1[i]!=0)
1690 reglist&=~(1<<rt);
1691 if(temp==-1) {
1692 save_regs(reglist);
1693 regs_saved=1;
1694 temp=(rs==0)?2:0;
1695 }
1696 if((regs_saved||(reglist&2)==0)&&temp!=1&&rs!=1)
1697 temp2=1;
1698 emit_readword(&mem_rtab,temp);
1699 emit_shrimm(rs,12,temp2);
1700 emit_readword_dualindexedx4(temp,temp2,temp2);
1701 emit_lsls_imm(temp2,1,temp2);
1702 if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) {
1703 switch(type) {
1704 case LOADB_STUB: emit_ldrccsb_dualindexed(temp2,rs,rt); break;
1705 case LOADBU_STUB: emit_ldrccb_dualindexed(temp2,rs,rt); break;
1706 case LOADH_STUB: emit_ldrccsh_dualindexed(temp2,rs,rt); break;
1707 case LOADHU_STUB: emit_ldrcch_dualindexed(temp2,rs,rt); break;
1708 case LOADW_STUB: emit_ldrcc_dualindexed(temp2,rs,rt); break;
1709 default: assert(0);
1710 }
1711 }
1712 if(regs_saved) {
1713 restore_jump=out;
1714 emit_jcc(0); // jump to reg restore
1715 }
1716 else
1717 emit_jcc(stubs[n].retaddr); // return address
1718
1719 if(!regs_saved)
1720 save_regs(reglist);
1721 void *handler=NULL;
1722 if(type==LOADB_STUB||type==LOADBU_STUB)
1723 handler=jump_handler_read8;
1724 if(type==LOADH_STUB||type==LOADHU_STUB)
1725 handler=jump_handler_read16;
1726 if(type==LOADW_STUB)
1727 handler=jump_handler_read32;
1728 assert(handler);
1729 pass_args(rs,temp2);
1730 int cc=get_reg(i_regmap,CCREG);
1731 if(cc<0)
1732 emit_loadreg(CCREG,2);
1733 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n].d+1),2);
1734 emit_call(handler);
1735 if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) {
1736 mov_loadtype_adj(type,0,rt);
1737 }
1738 if(restore_jump)
1739 set_jump_target(restore_jump, out);
1740 restore_regs(reglist);
1741 emit_jmp(stubs[n].retaddr); // return address
1742}
1743
1744static void inline_readstub(enum stub_type type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
1745{
1746 int rs=get_reg(regmap,target);
1747 int rt=get_reg(regmap,target);
1748 if(rs<0) rs=get_reg(regmap,-1);
1749 assert(rs>=0);
1750 u_int is_dynamic,far_call=0;
1751 uintptr_t host_addr = 0;
1752 void *handler;
1753 int cc=get_reg(regmap,CCREG);
1754 if(pcsx_direct_read(type,addr,CLOCK_ADJUST(adj+1),cc,target?rs:-1,rt))
1755 return;
1756 handler = get_direct_memhandler(mem_rtab, addr, type, &host_addr);
1757 if (handler == NULL) {
1758 if(rt<0||rt1[i]==0)
1759 return;
1760 if(addr!=host_addr)
1761 emit_movimm_from(addr,rs,host_addr,rs);
1762 switch(type) {
1763 case LOADB_STUB: emit_movsbl_indexed(0,rs,rt); break;
1764 case LOADBU_STUB: emit_movzbl_indexed(0,rs,rt); break;
1765 case LOADH_STUB: emit_movswl_indexed(0,rs,rt); break;
1766 case LOADHU_STUB: emit_movzwl_indexed(0,rs,rt); break;
1767 case LOADW_STUB: emit_readword_indexed(0,rs,rt); break;
1768 default: assert(0);
1769 }
1770 return;
1771 }
1772 is_dynamic=pcsxmem_is_handler_dynamic(addr);
1773 if(is_dynamic) {
1774 if(type==LOADB_STUB||type==LOADBU_STUB)
1775 handler=jump_handler_read8;
1776 if(type==LOADH_STUB||type==LOADHU_STUB)
1777 handler=jump_handler_read16;
1778 if(type==LOADW_STUB)
1779 handler=jump_handler_read32;
1780 }
1781
1782 // call a memhandler
1783 if(rt>=0&&rt1[i]!=0)
1784 reglist&=~(1<<rt);
1785 save_regs(reglist);
1786 if(target==0)
1787 emit_movimm(addr,0);
1788 else if(rs!=0)
1789 emit_mov(rs,0);
1790 int offset=(u_char *)handler-out-8;
1791 if(offset<-33554432||offset>=33554432) {
1792 // unreachable memhandler, a plugin func perhaps
1793 emit_movimm((u_int)handler,12);
1794 far_call=1;
1795 }
1796 if(cc<0)
1797 emit_loadreg(CCREG,2);
1798 if(is_dynamic) {
1799 emit_movimm(((u_int *)mem_rtab)[addr>>12]<<1,1);
1800 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2);
1801 }
1802 else {
1803 emit_readword(&last_count,3);
1804 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2);
1805 emit_add(2,3,2);
1806 emit_writeword(2,&Count);
1807 }
1808
1809 if(far_call)
1810 emit_callreg(12);
1811 else
1812 emit_call(handler);
1813
1814 if(rt>=0&&rt1[i]!=0) {
1815 switch(type) {
1816 case LOADB_STUB: emit_signextend8(0,rt); break;
1817 case LOADBU_STUB: emit_andimm(0,0xff,rt); break;
1818 case LOADH_STUB: emit_signextend16(0,rt); break;
1819 case LOADHU_STUB: emit_andimm(0,0xffff,rt); break;
1820 case LOADW_STUB: if(rt!=0) emit_mov(0,rt); break;
1821 default: assert(0);
1822 }
1823 }
1824 restore_regs(reglist);
1825}
1826
1827static void do_writestub(int n)
1828{
1829 assem_debug("do_writestub %x\n",start+stubs[n].a*4);
1830 literal_pool(256);
1831 set_jump_target(stubs[n].addr, out);
1832 enum stub_type type=stubs[n].type;
1833 int i=stubs[n].a;
1834 int rs=stubs[n].b;
1835 struct regstat *i_regs=(struct regstat *)stubs[n].c;
1836 u_int reglist=stubs[n].e;
1837 signed char *i_regmap=i_regs->regmap;
1838 int rt,r;
1839 if(itype[i]==C1LS||itype[i]==C2LS) {
1840 rt=get_reg(i_regmap,r=FTEMP);
1841 }else{
1842 rt=get_reg(i_regmap,r=rs2[i]);
1843 }
1844 assert(rs>=0);
1845 assert(rt>=0);
1846 int rtmp,temp=-1,temp2=HOST_TEMPREG,regs_saved=0;
1847 void *restore_jump = NULL;
1848 int reglist2=reglist|(1<<rs)|(1<<rt);
1849 for(rtmp=0;rtmp<=12;rtmp++) {
1850 if(((1<<rtmp)&0x13ff)&&((1<<rtmp)&reglist2)==0) {
1851 temp=rtmp; break;
1852 }
1853 }
1854 if(temp==-1) {
1855 save_regs(reglist);
1856 regs_saved=1;
1857 for(rtmp=0;rtmp<=3;rtmp++)
1858 if(rtmp!=rs&&rtmp!=rt)
1859 {temp=rtmp;break;}
1860 }
1861 if((regs_saved||(reglist2&8)==0)&&temp!=3&&rs!=3&&rt!=3)
1862 temp2=3;
1863 emit_readword(&mem_wtab,temp);
1864 emit_shrimm(rs,12,temp2);
1865 emit_readword_dualindexedx4(temp,temp2,temp2);
1866 emit_lsls_imm(temp2,1,temp2);
1867 switch(type) {
1868 case STOREB_STUB: emit_strccb_dualindexed(temp2,rs,rt); break;
1869 case STOREH_STUB: emit_strcch_dualindexed(temp2,rs,rt); break;
1870 case STOREW_STUB: emit_strcc_dualindexed(temp2,rs,rt); break;
1871 default: assert(0);
1872 }
1873 if(regs_saved) {
1874 restore_jump=out;
1875 emit_jcc(0); // jump to reg restore
1876 }
1877 else
1878 emit_jcc(stubs[n].retaddr); // return address (invcode check)
1879
1880 if(!regs_saved)
1881 save_regs(reglist);
1882 void *handler=NULL;
1883 switch(type) {
1884 case STOREB_STUB: handler=jump_handler_write8; break;
1885 case STOREH_STUB: handler=jump_handler_write16; break;
1886 case STOREW_STUB: handler=jump_handler_write32; break;
1887 default: assert(0);
1888 }
1889 assert(handler);
1890 pass_args(rs,rt);
1891 if(temp2!=3)
1892 emit_mov(temp2,3);
1893 int cc=get_reg(i_regmap,CCREG);
1894 if(cc<0)
1895 emit_loadreg(CCREG,2);
1896 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n].d+1),2);
1897 // returns new cycle_count
1898 emit_call(handler);
1899 emit_addimm(0,-CLOCK_ADJUST((int)stubs[n].d+1),cc<0?2:cc);
1900 if(cc<0)
1901 emit_storereg(CCREG,2);
1902 if(restore_jump)
1903 set_jump_target(restore_jump, out);
1904 restore_regs(reglist);
1905 emit_jmp(stubs[n].retaddr);
1906}
1907
1908static void inline_writestub(enum stub_type type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
1909{
1910 int rs=get_reg(regmap,-1);
1911 int rt=get_reg(regmap,target);
1912 assert(rs>=0);
1913 assert(rt>=0);
1914 uintptr_t host_addr = 0;
1915 void *handler = get_direct_memhandler(mem_wtab, addr, type, &host_addr);
1916 if (handler == NULL) {
1917 if(addr!=host_addr)
1918 emit_movimm_from(addr,rs,host_addr,rs);
1919 switch(type) {
1920 case STOREB_STUB: emit_writebyte_indexed(rt,0,rs); break;
1921 case STOREH_STUB: emit_writehword_indexed(rt,0,rs); break;
1922 case STOREW_STUB: emit_writeword_indexed(rt,0,rs); break;
1923 default: assert(0);
1924 }
1925 return;
1926 }
1927
1928 // call a memhandler
1929 save_regs(reglist);
1930 pass_args(rs,rt);
1931 int cc=get_reg(regmap,CCREG);
1932 if(cc<0)
1933 emit_loadreg(CCREG,2);
1934 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2);
1935 emit_movimm((u_int)handler,3);
1936 // returns new cycle_count
1937 emit_call(jump_handler_write_h);
1938 emit_addimm(0,-CLOCK_ADJUST(adj+1),cc<0?2:cc);
1939 if(cc<0)
1940 emit_storereg(CCREG,2);
1941 restore_regs(reglist);
1942}
1943
1944static void do_unalignedwritestub(int n)
1945{
1946 assem_debug("do_unalignedwritestub %x\n",start+stubs[n].a*4);
1947 literal_pool(256);
1948 set_jump_target(stubs[n].addr, out);
1949
1950 int i=stubs[n].a;
1951 struct regstat *i_regs=(struct regstat *)stubs[n].c;
1952 int addr=stubs[n].b;
1953 u_int reglist=stubs[n].e;
1954 signed char *i_regmap=i_regs->regmap;
1955 int temp2=get_reg(i_regmap,FTEMP);
1956 int rt;
1957 rt=get_reg(i_regmap,rs2[i]);
1958 assert(rt>=0);
1959 assert(addr>=0);
1960 assert(opcode[i]==0x2a||opcode[i]==0x2e); // SWL/SWR only implemented
1961 reglist|=(1<<addr);
1962 reglist&=~(1<<temp2);
1963
1964#if 1
1965 // don't bother with it and call write handler
1966 save_regs(reglist);
1967 pass_args(addr,rt);
1968 int cc=get_reg(i_regmap,CCREG);
1969 if(cc<0)
1970 emit_loadreg(CCREG,2);
1971 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n].d+1),2);
1972 emit_call((opcode[i]==0x2a?jump_handle_swl:jump_handle_swr));
1973 emit_addimm(0,-CLOCK_ADJUST((int)stubs[n].d+1),cc<0?2:cc);
1974 if(cc<0)
1975 emit_storereg(CCREG,2);
1976 restore_regs(reglist);
1977 emit_jmp(stubs[n].retaddr); // return address
1978#else
1979 emit_andimm(addr,0xfffffffc,temp2);
1980 emit_writeword(temp2,&address);
1981
1982 save_regs(reglist);
1983 emit_shrimm(addr,16,1);
1984 int cc=get_reg(i_regmap,CCREG);
1985 if(cc<0) {
1986 emit_loadreg(CCREG,2);
1987 }
1988 emit_movimm((u_int)readmem,0);
1989 emit_addimm(cc<0?2:cc,2*stubs[n].d+2,2);
1990 emit_call((int)&indirect_jump_indexed);
1991 restore_regs(reglist);
1992
1993 emit_readword(&readmem_dword,temp2);
1994 int temp=addr; //hmh
1995 emit_shlimm(addr,3,temp);
1996 emit_andimm(temp,24,temp);
1997#ifdef BIG_ENDIAN_MIPS
1998 if (opcode[i]==0x2e) // SWR
1999#else
2000 if (opcode[i]==0x2a) // SWL
2001#endif
2002 emit_xorimm(temp,24,temp);
2003 emit_movimm(-1,HOST_TEMPREG);
2004 if (opcode[i]==0x2a) { // SWL
2005 emit_bic_lsr(temp2,HOST_TEMPREG,temp,temp2);
2006 emit_orrshr(rt,temp,temp2);
2007 }else{
2008 emit_bic_lsl(temp2,HOST_TEMPREG,temp,temp2);
2009 emit_orrshl(rt,temp,temp2);
2010 }
2011 emit_readword(&address,addr);
2012 emit_writeword(temp2,&word);
2013 //save_regs(reglist); // don't need to, no state changes
2014 emit_shrimm(addr,16,1);
2015 emit_movimm((u_int)writemem,0);
2016 //emit_call((int)&indirect_jump_indexed);
2017 emit_mov(15,14);
2018 emit_readword_dualindexedx4(0,1,15);
2019 emit_readword(&Count,HOST_TEMPREG);
2020 emit_readword(&next_interupt,2);
2021 emit_addimm(HOST_TEMPREG,-2*stubs[n].d-2,HOST_TEMPREG);
2022 emit_writeword(2,&last_count);
2023 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2024 if(cc<0) {
2025 emit_storereg(CCREG,HOST_TEMPREG);
2026 }
2027 restore_regs(reglist);
2028 emit_jmp(stubs[n].retaddr); // return address
2029#endif
2030}
2031
2032static void do_invstub(int n)
2033{
2034 literal_pool(20);
2035 u_int reglist=stubs[n].a;
2036 set_jump_target(stubs[n].addr, out);
2037 save_regs(reglist);
2038 if(stubs[n].b!=0) emit_mov(stubs[n].b,0);
2039 emit_call(&invalidate_addr);
2040 restore_regs(reglist);
2041 emit_jmp(stubs[n].retaddr); // return address
2042}
2043
2044// this output is parsed by verify_dirty, get_bounds
2045static void do_dirty_stub_emit_args(u_int arg0)
2046{
2047 #ifndef HAVE_ARMV7
2048 emit_loadlp((int)source, 1);
2049 emit_loadlp((int)copy, 2);
2050 emit_loadlp(slen*4, 3);
2051 #else
2052 emit_movw(((u_int)source)&0x0000FFFF, 1);
2053 emit_movw(((u_int)copy)&0x0000FFFF, 2);
2054 emit_movt(((u_int)source)&0xFFFF0000, 1);
2055 emit_movt(((u_int)copy)&0xFFFF0000, 2);
2056 emit_movw(slen*4, 3);
2057 #endif
2058 emit_movimm(arg0, 0);
2059}
2060
2061static void *do_dirty_stub(int i)
2062{
2063 assem_debug("do_dirty_stub %x\n",start+i*4);
2064 do_dirty_stub_emit_args(start + i*4);
2065 emit_call(verify_code);
2066 void *entry = out;
2067 load_regs_entry(i);
2068 if (entry == out)
2069 entry = instr_addr[i];
2070 emit_jmp(instr_addr[i]);
2071 return entry;
2072}
2073
2074static void do_dirty_stub_ds()
2075{
2076 do_dirty_stub_emit_args(start + 1);
2077 emit_call(verify_code_ds);
2078}
2079
2080/* Special assem */
2081
2082static void shift_assemble_arm(int i,struct regstat *i_regs)
2083{
2084 if(rt1[i]) {
2085 if(opcode2[i]<=0x07) // SLLV/SRLV/SRAV
2086 {
2087 signed char s,t,shift;
2088 t=get_reg(i_regs->regmap,rt1[i]);
2089 s=get_reg(i_regs->regmap,rs1[i]);
2090 shift=get_reg(i_regs->regmap,rs2[i]);
2091 if(t>=0){
2092 if(rs1[i]==0)
2093 {
2094 emit_zeroreg(t);
2095 }
2096 else if(rs2[i]==0)
2097 {
2098 assert(s>=0);
2099 if(s!=t) emit_mov(s,t);
2100 }
2101 else
2102 {
2103 emit_andimm(shift,31,HOST_TEMPREG);
2104 if(opcode2[i]==4) // SLLV
2105 {
2106 emit_shl(s,HOST_TEMPREG,t);
2107 }
2108 if(opcode2[i]==6) // SRLV
2109 {
2110 emit_shr(s,HOST_TEMPREG,t);
2111 }
2112 if(opcode2[i]==7) // SRAV
2113 {
2114 emit_sar(s,HOST_TEMPREG,t);
2115 }
2116 }
2117 }
2118 } else { // DSLLV/DSRLV/DSRAV
2119 signed char sh,sl,th,tl,shift;
2120 th=get_reg(i_regs->regmap,rt1[i]|64);
2121 tl=get_reg(i_regs->regmap,rt1[i]);
2122 sh=get_reg(i_regs->regmap,rs1[i]|64);
2123 sl=get_reg(i_regs->regmap,rs1[i]);
2124 shift=get_reg(i_regs->regmap,rs2[i]);
2125 if(tl>=0){
2126 if(rs1[i]==0)
2127 {
2128 emit_zeroreg(tl);
2129 if(th>=0) emit_zeroreg(th);
2130 }
2131 else if(rs2[i]==0)
2132 {
2133 assert(sl>=0);
2134 if(sl!=tl) emit_mov(sl,tl);
2135 if(th>=0&&sh!=th) emit_mov(sh,th);
2136 }
2137 else
2138 {
2139 // FIXME: What if shift==tl ?
2140 assert(shift!=tl);
2141 int temp=get_reg(i_regs->regmap,-1);
2142 int real_th=th;
2143 if(th<0&&opcode2[i]!=0x14) {th=temp;} // DSLLV doesn't need a temporary register
2144 assert(sl>=0);
2145 assert(sh>=0);
2146 emit_andimm(shift,31,HOST_TEMPREG);
2147 if(opcode2[i]==0x14) // DSLLV
2148 {
2149 if(th>=0) emit_shl(sh,HOST_TEMPREG,th);
2150 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
2151 emit_orrshr(sl,HOST_TEMPREG,th);
2152 emit_andimm(shift,31,HOST_TEMPREG);
2153 emit_testimm(shift,32);
2154 emit_shl(sl,HOST_TEMPREG,tl);
2155 if(th>=0) emit_cmovne_reg(tl,th);
2156 emit_cmovne_imm(0,tl);
2157 }
2158 if(opcode2[i]==0x16) // DSRLV
2159 {
2160 assert(th>=0);
2161 emit_shr(sl,HOST_TEMPREG,tl);
2162 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
2163 emit_orrshl(sh,HOST_TEMPREG,tl);
2164 emit_andimm(shift,31,HOST_TEMPREG);
2165 emit_testimm(shift,32);
2166 emit_shr(sh,HOST_TEMPREG,th);
2167 emit_cmovne_reg(th,tl);
2168 if(real_th>=0) emit_cmovne_imm(0,th);
2169 }
2170 if(opcode2[i]==0x17) // DSRAV
2171 {
2172 assert(th>=0);
2173 emit_shr(sl,HOST_TEMPREG,tl);
2174 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
2175 if(real_th>=0) {
2176 assert(temp>=0);
2177 emit_sarimm(th,31,temp);
2178 }
2179 emit_orrshl(sh,HOST_TEMPREG,tl);
2180 emit_andimm(shift,31,HOST_TEMPREG);
2181 emit_testimm(shift,32);
2182 emit_sar(sh,HOST_TEMPREG,th);
2183 emit_cmovne_reg(th,tl);
2184 if(real_th>=0) emit_cmovne_reg(temp,th);
2185 }
2186 }
2187 }
2188 }
2189 }
2190}
2191#define shift_assemble shift_assemble_arm
2192
2193static void loadlr_assemble_arm(int i,struct regstat *i_regs)
2194{
2195 int s,tl,temp,temp2,addr;
2196 int offset;
2197 void *jaddr=0;
2198 int memtarget=0,c=0;
2199 int fastload_reg_override=0;
2200 u_int hr,reglist=0;
2201 tl=get_reg(i_regs->regmap,rt1[i]);
2202 s=get_reg(i_regs->regmap,rs1[i]);
2203 temp=get_reg(i_regs->regmap,-1);
2204 temp2=get_reg(i_regs->regmap,FTEMP);
2205 addr=get_reg(i_regs->regmap,AGEN1+(i&1));
2206 assert(addr<0);
2207 offset=imm[i];
2208 for(hr=0;hr<HOST_REGS;hr++) {
2209 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
2210 }
2211 reglist|=1<<temp;
2212 if(offset||s<0||c) addr=temp2;
2213 else addr=s;
2214 if(s>=0) {
2215 c=(i_regs->wasconst>>s)&1;
2216 if(c) {
2217 memtarget=((signed int)(constmap[i][s]+offset))<(signed int)0x80000000+RAM_SIZE;
2218 }
2219 }
2220 if(!c) {
2221 emit_shlimm(addr,3,temp);
2222 if (opcode[i]==0x22||opcode[i]==0x26) {
2223 emit_andimm(addr,0xFFFFFFFC,temp2); // LWL/LWR
2224 }else{
2225 emit_andimm(addr,0xFFFFFFF8,temp2); // LDL/LDR
2226 }
2227 jaddr=emit_fastpath_cmp_jump(i,temp2,&fastload_reg_override);
2228 }
2229 else {
2230 if(ram_offset&&memtarget) {
2231 emit_addimm(temp2,ram_offset,HOST_TEMPREG);
2232 fastload_reg_override=HOST_TEMPREG;
2233 }
2234 if (opcode[i]==0x22||opcode[i]==0x26) {
2235 emit_movimm(((constmap[i][s]+offset)<<3)&24,temp); // LWL/LWR
2236 }else{
2237 emit_movimm(((constmap[i][s]+offset)<<3)&56,temp); // LDL/LDR
2238 }
2239 }
2240 if (opcode[i]==0x22||opcode[i]==0x26) { // LWL/LWR
2241 if(!c||memtarget) {
2242 int a=temp2;
2243 if(fastload_reg_override) a=fastload_reg_override;
2244 emit_readword_indexed(0,a,temp2);
2245 if(jaddr) add_stub_r(LOADW_STUB,jaddr,out,i,temp2,i_regs,ccadj[i],reglist);
2246 }
2247 else
2248 inline_readstub(LOADW_STUB,i,(constmap[i][s]+offset)&0xFFFFFFFC,i_regs->regmap,FTEMP,ccadj[i],reglist);
2249 if(rt1[i]) {
2250 assert(tl>=0);
2251 emit_andimm(temp,24,temp);
2252#ifdef BIG_ENDIAN_MIPS
2253 if (opcode[i]==0x26) // LWR
2254#else
2255 if (opcode[i]==0x22) // LWL
2256#endif
2257 emit_xorimm(temp,24,temp);
2258 emit_movimm(-1,HOST_TEMPREG);
2259 if (opcode[i]==0x26) {
2260 emit_shr(temp2,temp,temp2);
2261 emit_bic_lsr(tl,HOST_TEMPREG,temp,tl);
2262 }else{
2263 emit_shl(temp2,temp,temp2);
2264 emit_bic_lsl(tl,HOST_TEMPREG,temp,tl);
2265 }
2266 emit_or(temp2,tl,tl);
2267 }
2268 //emit_storereg(rt1[i],tl); // DEBUG
2269 }
2270 if (opcode[i]==0x1A||opcode[i]==0x1B) { // LDL/LDR
2271 assert(0);
2272 }
2273}
2274#define loadlr_assemble loadlr_assemble_arm
2275
2276static void c2op_prologue(u_int op,u_int reglist)
2277{
2278 save_regs_all(reglist);
2279#ifdef PCNT
2280 emit_movimm(op,0);
2281 emit_call((int)pcnt_gte_start);
2282#endif
2283 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0); // cop2 regs
2284}
2285
2286static void c2op_epilogue(u_int op,u_int reglist)
2287{
2288#ifdef PCNT
2289 emit_movimm(op,0);
2290 emit_call((int)pcnt_gte_end);
2291#endif
2292 restore_regs_all(reglist);
2293}
2294
2295static void c2op_call_MACtoIR(int lm,int need_flags)
2296{
2297 if(need_flags)
2298 emit_call(lm?gteMACtoIR_lm1:gteMACtoIR_lm0);
2299 else
2300 emit_call(lm?gteMACtoIR_lm1_nf:gteMACtoIR_lm0_nf);
2301}
2302
2303static void c2op_call_rgb_func(void *func,int lm,int need_ir,int need_flags)
2304{
2305 emit_call(func);
2306 // func is C code and trashes r0
2307 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
2308 if(need_flags||need_ir)
2309 c2op_call_MACtoIR(lm,need_flags);
2310 emit_call(need_flags?gteMACtoRGB:gteMACtoRGB_nf);
2311}
2312
2313static void c2op_assemble(int i,struct regstat *i_regs)
2314{
2315 u_int c2op=source[i]&0x3f;
2316 u_int hr,reglist_full=0,reglist;
2317 int need_flags,need_ir;
2318 for(hr=0;hr<HOST_REGS;hr++) {
2319 if(i_regs->regmap[hr]>=0) reglist_full|=1<<hr;
2320 }
2321 reglist=reglist_full&CALLER_SAVE_REGS;
2322
2323 if (gte_handlers[c2op]!=NULL) {
2324 need_flags=!(gte_unneeded[i+1]>>63); // +1 because of how liveness detection works
2325 need_ir=(gte_unneeded[i+1]&0xe00)!=0xe00;
2326 assem_debug("gte op %08x, unneeded %016llx, need_flags %d, need_ir %d\n",
2327 source[i],gte_unneeded[i+1],need_flags,need_ir);
2328 if(new_dynarec_hacks&NDHACK_GTE_NO_FLAGS)
2329 need_flags=0;
2330 int shift = (source[i] >> 19) & 1;
2331 int lm = (source[i] >> 10) & 1;
2332 switch(c2op) {
2333#ifndef DRC_DBG
2334 case GTE_MVMVA: {
2335#ifdef HAVE_ARMV5
2336 int v = (source[i] >> 15) & 3;
2337 int cv = (source[i] >> 13) & 3;
2338 int mx = (source[i] >> 17) & 3;
2339 reglist=reglist_full&(CALLER_SAVE_REGS|0xf0); // +{r4-r7}
2340 c2op_prologue(c2op,reglist);
2341 /* r4,r5 = VXYZ(v) packed; r6 = &MX11(mx); r7 = &CV1(cv) */
2342 if(v<3)
2343 emit_ldrd(v*8,0,4);
2344 else {
2345 emit_movzwl_indexed(9*4,0,4); // gteIR
2346 emit_movzwl_indexed(10*4,0,6);
2347 emit_movzwl_indexed(11*4,0,5);
2348 emit_orrshl_imm(6,16,4);
2349 }
2350 if(mx<3)
2351 emit_addimm(0,32*4+mx*8*4,6);
2352 else
2353 emit_readword(&zeromem_ptr,6);
2354 if(cv<3)
2355 emit_addimm(0,32*4+(cv*8+5)*4,7);
2356 else
2357 emit_readword(&zeromem_ptr,7);
2358#ifdef __ARM_NEON__
2359 emit_movimm(source[i],1); // opcode
2360 emit_call(gteMVMVA_part_neon);
2361 if(need_flags) {
2362 emit_movimm(lm,1);
2363 emit_call(gteMACtoIR_flags_neon);
2364 }
2365#else
2366 if(cv==3&&shift)
2367 emit_call((int)gteMVMVA_part_cv3sh12_arm);
2368 else {
2369 emit_movimm(shift,1);
2370 emit_call((int)(need_flags?gteMVMVA_part_arm:gteMVMVA_part_nf_arm));
2371 }
2372 if(need_flags||need_ir)
2373 c2op_call_MACtoIR(lm,need_flags);
2374#endif
2375#else /* if not HAVE_ARMV5 */
2376 c2op_prologue(c2op,reglist);
2377 emit_movimm(source[i],1); // opcode
2378 emit_writeword(1,&psxRegs.code);
2379 emit_call((int)(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]));
2380#endif
2381 break;
2382 }
2383 case GTE_OP:
2384 c2op_prologue(c2op,reglist);
2385 emit_call(shift?gteOP_part_shift:gteOP_part_noshift);
2386 if(need_flags||need_ir) {
2387 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
2388 c2op_call_MACtoIR(lm,need_flags);
2389 }
2390 break;
2391 case GTE_DPCS:
2392 c2op_prologue(c2op,reglist);
2393 c2op_call_rgb_func(shift?gteDPCS_part_shift:gteDPCS_part_noshift,lm,need_ir,need_flags);
2394 break;
2395 case GTE_INTPL:
2396 c2op_prologue(c2op,reglist);
2397 c2op_call_rgb_func(shift?gteINTPL_part_shift:gteINTPL_part_noshift,lm,need_ir,need_flags);
2398 break;
2399 case GTE_SQR:
2400 c2op_prologue(c2op,reglist);
2401 emit_call(shift?gteSQR_part_shift:gteSQR_part_noshift);
2402 if(need_flags||need_ir) {
2403 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
2404 c2op_call_MACtoIR(lm,need_flags);
2405 }
2406 break;
2407 case GTE_DCPL:
2408 c2op_prologue(c2op,reglist);
2409 c2op_call_rgb_func(gteDCPL_part,lm,need_ir,need_flags);
2410 break;
2411 case GTE_GPF:
2412 c2op_prologue(c2op,reglist);
2413 c2op_call_rgb_func(shift?gteGPF_part_shift:gteGPF_part_noshift,lm,need_ir,need_flags);
2414 break;
2415 case GTE_GPL:
2416 c2op_prologue(c2op,reglist);
2417 c2op_call_rgb_func(shift?gteGPL_part_shift:gteGPL_part_noshift,lm,need_ir,need_flags);
2418 break;
2419#endif
2420 default:
2421 c2op_prologue(c2op,reglist);
2422#ifdef DRC_DBG
2423 emit_movimm(source[i],1); // opcode
2424 emit_writeword(1,&psxRegs.code);
2425#endif
2426 emit_call(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]);
2427 break;
2428 }
2429 c2op_epilogue(c2op,reglist);
2430 }
2431}
2432
2433static void multdiv_assemble_arm(int i,struct regstat *i_regs)
2434{
2435 // case 0x18: MULT
2436 // case 0x19: MULTU
2437 // case 0x1A: DIV
2438 // case 0x1B: DIVU
2439 // case 0x1C: DMULT
2440 // case 0x1D: DMULTU
2441 // case 0x1E: DDIV
2442 // case 0x1F: DDIVU
2443 if(rs1[i]&&rs2[i])
2444 {
2445 if((opcode2[i]&4)==0) // 32-bit
2446 {
2447 if(opcode2[i]==0x18) // MULT
2448 {
2449 signed char m1=get_reg(i_regs->regmap,rs1[i]);
2450 signed char m2=get_reg(i_regs->regmap,rs2[i]);
2451 signed char hi=get_reg(i_regs->regmap,HIREG);
2452 signed char lo=get_reg(i_regs->regmap,LOREG);
2453 assert(m1>=0);
2454 assert(m2>=0);
2455 assert(hi>=0);
2456 assert(lo>=0);
2457 emit_smull(m1,m2,hi,lo);
2458 }
2459 if(opcode2[i]==0x19) // MULTU
2460 {
2461 signed char m1=get_reg(i_regs->regmap,rs1[i]);
2462 signed char m2=get_reg(i_regs->regmap,rs2[i]);
2463 signed char hi=get_reg(i_regs->regmap,HIREG);
2464 signed char lo=get_reg(i_regs->regmap,LOREG);
2465 assert(m1>=0);
2466 assert(m2>=0);
2467 assert(hi>=0);
2468 assert(lo>=0);
2469 emit_umull(m1,m2,hi,lo);
2470 }
2471 if(opcode2[i]==0x1A) // DIV
2472 {
2473 signed char d1=get_reg(i_regs->regmap,rs1[i]);
2474 signed char d2=get_reg(i_regs->regmap,rs2[i]);
2475 assert(d1>=0);
2476 assert(d2>=0);
2477 signed char quotient=get_reg(i_regs->regmap,LOREG);
2478 signed char remainder=get_reg(i_regs->regmap,HIREG);
2479 assert(quotient>=0);
2480 assert(remainder>=0);
2481 emit_movs(d1,remainder);
2482 emit_movimm(0xffffffff,quotient);
2483 emit_negmi(quotient,quotient); // .. quotient and ..
2484 emit_negmi(remainder,remainder); // .. remainder for div0 case (will be negated back after jump)
2485 emit_movs(d2,HOST_TEMPREG);
2486 emit_jeq(out+52); // Division by zero
2487 emit_negsmi(HOST_TEMPREG,HOST_TEMPREG);
2488#ifdef HAVE_ARMV5
2489 emit_clz(HOST_TEMPREG,quotient);
2490 emit_shl(HOST_TEMPREG,quotient,HOST_TEMPREG);
2491#else
2492 emit_movimm(0,quotient);
2493 emit_addpl_imm(quotient,1,quotient);
2494 emit_lslpls_imm(HOST_TEMPREG,1,HOST_TEMPREG);
2495 emit_jns(out-2*4);
2496#endif
2497 emit_orimm(quotient,1<<31,quotient);
2498 emit_shr(quotient,quotient,quotient);
2499 emit_cmp(remainder,HOST_TEMPREG);
2500 emit_subcs(remainder,HOST_TEMPREG,remainder);
2501 emit_adcs(quotient,quotient,quotient);
2502 emit_shrimm(HOST_TEMPREG,1,HOST_TEMPREG);
2503 emit_jcc(out-16); // -4
2504 emit_teq(d1,d2);
2505 emit_negmi(quotient,quotient);
2506 emit_test(d1,d1);
2507 emit_negmi(remainder,remainder);
2508 }
2509 if(opcode2[i]==0x1B) // DIVU
2510 {
2511 signed char d1=get_reg(i_regs->regmap,rs1[i]); // dividend
2512 signed char d2=get_reg(i_regs->regmap,rs2[i]); // divisor
2513 assert(d1>=0);
2514 assert(d2>=0);
2515 signed char quotient=get_reg(i_regs->regmap,LOREG);
2516 signed char remainder=get_reg(i_regs->regmap,HIREG);
2517 assert(quotient>=0);
2518 assert(remainder>=0);
2519 emit_mov(d1,remainder);
2520 emit_movimm(0xffffffff,quotient); // div0 case
2521 emit_test(d2,d2);
2522 emit_jeq(out+40); // Division by zero
2523#ifdef HAVE_ARMV5
2524 emit_clz(d2,HOST_TEMPREG);
2525 emit_movimm(1<<31,quotient);
2526 emit_shl(d2,HOST_TEMPREG,d2);
2527#else
2528 emit_movimm(0,HOST_TEMPREG);
2529 emit_addpl_imm(HOST_TEMPREG,1,HOST_TEMPREG);
2530 emit_lslpls_imm(d2,1,d2);
2531 emit_jns(out-2*4);
2532 emit_movimm(1<<31,quotient);
2533#endif
2534 emit_shr(quotient,HOST_TEMPREG,quotient);
2535 emit_cmp(remainder,d2);
2536 emit_subcs(remainder,d2,remainder);
2537 emit_adcs(quotient,quotient,quotient);
2538 emit_shrcc_imm(d2,1,d2);
2539 emit_jcc(out-16); // -4
2540 }
2541 }
2542 else // 64-bit
2543 assert(0);
2544 }
2545 else
2546 {
2547 // Multiply by zero is zero.
2548 // MIPS does not have a divide by zero exception.
2549 // The result is undefined, we return zero.
2550 signed char hr=get_reg(i_regs->regmap,HIREG);
2551 signed char lr=get_reg(i_regs->regmap,LOREG);
2552 if(hr>=0) emit_zeroreg(hr);
2553 if(lr>=0) emit_zeroreg(lr);
2554 }
2555}
2556#define multdiv_assemble multdiv_assemble_arm
2557
2558static void do_preload_rhash(int r) {
2559 // Don't need this for ARM. On x86, this puts the value 0xf8 into the
2560 // register. On ARM the hash can be done with a single instruction (below)
2561}
2562
2563static void do_preload_rhtbl(int ht) {
2564 emit_addimm(FP,(int)&mini_ht-(int)&dynarec_local,ht);
2565}
2566
2567static void do_rhash(int rs,int rh) {
2568 emit_andimm(rs,0xf8,rh);
2569}
2570
2571static void do_miniht_load(int ht,int rh) {
2572 assem_debug("ldr %s,[%s,%s]!\n",regname[rh],regname[ht],regname[rh]);
2573 output_w32(0xe7b00000|rd_rn_rm(rh,ht,rh));
2574}
2575
2576static void do_miniht_jump(int rs,int rh,int ht) {
2577 emit_cmp(rh,rs);
2578 emit_ldreq_indexed(ht,4,15);
2579 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
2580 emit_mov(rs,7);
2581 emit_jmp(jump_vaddr_reg[7]);
2582 #else
2583 emit_jmp(jump_vaddr_reg[rs]);
2584 #endif
2585}
2586
2587static void do_miniht_insert(u_int return_address,int rt,int temp) {
2588 #ifndef HAVE_ARMV7
2589 emit_movimm(return_address,rt); // PC into link register
2590 add_to_linker(out,return_address,1);
2591 emit_pcreladdr(temp);
2592 emit_writeword(rt,&mini_ht[(return_address&0xFF)>>3][0]);
2593 emit_writeword(temp,&mini_ht[(return_address&0xFF)>>3][1]);
2594 #else
2595 emit_movw(return_address&0x0000FFFF,rt);
2596 add_to_linker(out,return_address,1);
2597 emit_pcreladdr(temp);
2598 emit_writeword(temp,&mini_ht[(return_address&0xFF)>>3][1]);
2599 emit_movt(return_address&0xFFFF0000,rt);
2600 emit_writeword(rt,&mini_ht[(return_address&0xFF)>>3][0]);
2601 #endif
2602}
2603
2604static void mark_clear_cache(void *target)
2605{
2606 u_long offset = (u_char *)target - translation_cache;
2607 u_int mask = 1u << ((offset >> 12) & 31);
2608 if (!(needs_clear_cache[offset >> 17] & mask)) {
2609 char *start = (char *)((u_long)target & ~4095ul);
2610 start_tcache_write(start, start + 4096);
2611 needs_clear_cache[offset >> 17] |= mask;
2612 }
2613}
2614
2615// Clearing the cache is rather slow on ARM Linux, so mark the areas
2616// that need to be cleared, and then only clear these areas once.
2617static void do_clear_cache()
2618{
2619 int i,j;
2620 for (i=0;i<(1<<(TARGET_SIZE_2-17));i++)
2621 {
2622 u_int bitmap=needs_clear_cache[i];
2623 if(bitmap) {
2624 u_char *start, *end;
2625 for(j=0;j<32;j++)
2626 {
2627 if(bitmap&(1<<j)) {
2628 start=translation_cache+i*131072+j*4096;
2629 end=start+4095;
2630 j++;
2631 while(j<32) {
2632 if(bitmap&(1<<j)) {
2633 end+=4096;
2634 j++;
2635 }else{
2636 end_tcache_write(start, end);
2637 break;
2638 }
2639 }
2640 }
2641 }
2642 needs_clear_cache[i]=0;
2643 }
2644 }
2645}
2646
2647// CPU-architecture-specific initialization
2648static void arch_init() {
2649}
2650
2651// vim:shiftwidth=2:expandtab