drc: move some stuff out of assem_arm for reuse
[pcsx_rearmed.git] / libpcsxcore / new_dynarec / assem_arm.c
... / ...
CommitLineData
1/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2 * Mupen64plus/PCSX - assem_arm.c *
3 * Copyright (C) 2009-2011 Ari64 *
4 * Copyright (C) 2010-2011 GraÅžvydas "notaz" Ignotas *
5 * *
6 * This program is free software; you can redistribute it and/or modify *
7 * it under the terms of the GNU General Public License as published by *
8 * the Free Software Foundation; either version 2 of the License, or *
9 * (at your option) any later version. *
10 * *
11 * This program is distributed in the hope that it will be useful, *
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
14 * GNU General Public License for more details. *
15 * *
16 * You should have received a copy of the GNU General Public License *
17 * along with this program; if not, write to the *
18 * Free Software Foundation, Inc., *
19 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
20 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
21
22#include "../gte.h"
23#define FLAGLESS
24#include "../gte.h"
25#undef FLAGLESS
26#include "../gte_arm.h"
27#include "../gte_neon.h"
28#include "pcnt.h"
29#include "arm_features.h"
30
31#if defined(BASE_ADDR_FIXED)
32#elif defined(BASE_ADDR_DYNAMIC)
33u_char *translation_cache;
34#else
35u_char translation_cache[1 << TARGET_SIZE_2] __attribute__((aligned(4096)));
36#endif
37
38#ifndef __MACH__
39#define CALLER_SAVE_REGS 0x100f
40#else
41#define CALLER_SAVE_REGS 0x120f
42#endif
43
44#define unused __attribute__((unused))
45
46#ifdef DRC_DBG
47#pragma GCC diagnostic ignored "-Wunused-function"
48#pragma GCC diagnostic ignored "-Wunused-variable"
49#pragma GCC diagnostic ignored "-Wunused-but-set-variable"
50#endif
51
52extern int cycle_count;
53extern int last_count;
54extern int pcaddr;
55extern int pending_exception;
56extern int branch_target;
57extern uint64_t readmem_dword;
58extern void *dynarec_local;
59extern u_int mini_ht[32][2];
60
61void indirect_jump_indexed();
62void indirect_jump();
63void do_interrupt();
64void jump_vaddr_r0();
65void jump_vaddr_r1();
66void jump_vaddr_r2();
67void jump_vaddr_r3();
68void jump_vaddr_r4();
69void jump_vaddr_r5();
70void jump_vaddr_r6();
71void jump_vaddr_r7();
72void jump_vaddr_r8();
73void jump_vaddr_r9();
74void jump_vaddr_r10();
75void jump_vaddr_r12();
76
77void * const jump_vaddr_reg[16] = {
78 jump_vaddr_r0,
79 jump_vaddr_r1,
80 jump_vaddr_r2,
81 jump_vaddr_r3,
82 jump_vaddr_r4,
83 jump_vaddr_r5,
84 jump_vaddr_r6,
85 jump_vaddr_r7,
86 jump_vaddr_r8,
87 jump_vaddr_r9,
88 jump_vaddr_r10,
89 0,
90 jump_vaddr_r12,
91 0,
92 0,
93 0
94};
95
96void invalidate_addr_r0();
97void invalidate_addr_r1();
98void invalidate_addr_r2();
99void invalidate_addr_r3();
100void invalidate_addr_r4();
101void invalidate_addr_r5();
102void invalidate_addr_r6();
103void invalidate_addr_r7();
104void invalidate_addr_r8();
105void invalidate_addr_r9();
106void invalidate_addr_r10();
107void invalidate_addr_r12();
108
109const u_int invalidate_addr_reg[16] = {
110 (int)invalidate_addr_r0,
111 (int)invalidate_addr_r1,
112 (int)invalidate_addr_r2,
113 (int)invalidate_addr_r3,
114 (int)invalidate_addr_r4,
115 (int)invalidate_addr_r5,
116 (int)invalidate_addr_r6,
117 (int)invalidate_addr_r7,
118 (int)invalidate_addr_r8,
119 (int)invalidate_addr_r9,
120 (int)invalidate_addr_r10,
121 0,
122 (int)invalidate_addr_r12,
123 0,
124 0,
125 0};
126
127static u_int needs_clear_cache[1<<(TARGET_SIZE_2-17)];
128
129/* Linker */
130
131static void set_jump_target(void *addr, void *target_)
132{
133 u_int target = (u_int)target_;
134 u_char *ptr = addr;
135 u_int *ptr2=(u_int *)ptr;
136 if(ptr[3]==0xe2) {
137 assert((target-(u_int)ptr2-8)<1024);
138 assert(((uintptr_t)addr&3)==0);
139 assert((target&3)==0);
140 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
141 //printf("target=%x addr=%p insn=%x\n",target,addr,*ptr2);
142 }
143 else if(ptr[3]==0x72) {
144 // generated by emit_jno_unlikely
145 if((target-(u_int)ptr2-8)<1024) {
146 assert(((uintptr_t)addr&3)==0);
147 assert((target&3)==0);
148 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
149 }
150 else if((target-(u_int)ptr2-8)<4096&&!((target-(u_int)ptr2-8)&15)) {
151 assert(((uintptr_t)addr&3)==0);
152 assert((target&3)==0);
153 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>4)|0xE00;
154 }
155 else *ptr2=(0x7A000000)|(((target-(u_int)ptr2-8)<<6)>>8);
156 }
157 else {
158 assert((ptr[3]&0x0e)==0xa);
159 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
160 }
161}
162
163// This optionally copies the instruction from the target of the branch into
164// the space before the branch. Works, but the difference in speed is
165// usually insignificant.
166#if 0
167static void set_jump_target_fillslot(int addr,u_int target,int copy)
168{
169 u_char *ptr=(u_char *)addr;
170 u_int *ptr2=(u_int *)ptr;
171 assert(!copy||ptr2[-1]==0xe28dd000);
172 if(ptr[3]==0xe2) {
173 assert(!copy);
174 assert((target-(u_int)ptr2-8)<4096);
175 *ptr2=(*ptr2&0xFFFFF000)|(target-(u_int)ptr2-8);
176 }
177 else {
178 assert((ptr[3]&0x0e)==0xa);
179 u_int target_insn=*(u_int *)target;
180 if((target_insn&0x0e100000)==0) { // ALU, no immediate, no flags
181 copy=0;
182 }
183 if((target_insn&0x0c100000)==0x04100000) { // Load
184 copy=0;
185 }
186 if(target_insn&0x08000000) {
187 copy=0;
188 }
189 if(copy) {
190 ptr2[-1]=target_insn;
191 target+=4;
192 }
193 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
194 }
195}
196#endif
197
198/* Literal pool */
199static void add_literal(int addr,int val)
200{
201 assert(literalcount<sizeof(literals)/sizeof(literals[0]));
202 literals[literalcount][0]=addr;
203 literals[literalcount][1]=val;
204 literalcount++;
205}
206
207// from a pointer to external jump stub (which was produced by emit_extjump2)
208// find where the jumping insn is
209static void *find_extjump_insn(void *stub)
210{
211 int *ptr=(int *)(stub+4);
212 assert((*ptr&0x0fff0000)==0x059f0000); // ldr rx, [pc, #ofs]
213 u_int offset=*ptr&0xfff;
214 void **l_ptr=(void *)ptr+offset+8;
215 return *l_ptr;
216}
217
218// find where external branch is liked to using addr of it's stub:
219// get address that insn one after stub loads (dyna_linker arg1),
220// treat it as a pointer to branch insn,
221// return addr where that branch jumps to
222static void *get_pointer(void *stub)
223{
224 //printf("get_pointer(%x)\n",(int)stub);
225 int *i_ptr=find_extjump_insn(stub);
226 assert((*i_ptr&0x0f000000)==0x0a000000);
227 return (u_char *)i_ptr+((*i_ptr<<8)>>6)+8;
228}
229
230// Find the "clean" entry point from a "dirty" entry point
231// by skipping past the call to verify_code
232static void *get_clean_addr(void *addr)
233{
234 signed int *ptr = addr;
235 #ifndef HAVE_ARMV7
236 ptr+=4;
237 #else
238 ptr+=6;
239 #endif
240 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
241 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
242 ptr++;
243 if((*ptr&0xFF000000)==0xea000000) {
244 return (char *)ptr+((*ptr<<8)>>6)+8; // follow jump
245 }
246 return ptr;
247}
248
249static int verify_dirty(u_int *ptr)
250{
251 #ifndef HAVE_ARMV7
252 u_int offset;
253 // get from literal pool
254 assert((*ptr&0xFFFF0000)==0xe59f0000);
255 offset=*ptr&0xfff;
256 u_int source=*(u_int*)((void *)ptr+offset+8);
257 ptr++;
258 assert((*ptr&0xFFFF0000)==0xe59f0000);
259 offset=*ptr&0xfff;
260 u_int copy=*(u_int*)((void *)ptr+offset+8);
261 ptr++;
262 assert((*ptr&0xFFFF0000)==0xe59f0000);
263 offset=*ptr&0xfff;
264 u_int len=*(u_int*)((void *)ptr+offset+8);
265 ptr++;
266 ptr++;
267 #else
268 // ARMv7 movw/movt
269 assert((*ptr&0xFFF00000)==0xe3000000);
270 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
271 u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
272 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
273 ptr+=6;
274 #endif
275 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
276 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
277 //printf("verify_dirty: %x %x %x\n",source,copy,len);
278 return !memcmp((void *)source,(void *)copy,len);
279}
280
281// This doesn't necessarily find all clean entry points, just
282// guarantees that it's not dirty
283static int isclean(void *addr)
284{
285 #ifndef HAVE_ARMV7
286 u_int *ptr=((u_int *)addr)+4;
287 #else
288 u_int *ptr=((u_int *)addr)+6;
289 #endif
290 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
291 if((*ptr&0xFF000000)!=0xeb000000) return 1; // bl instruction
292 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code) return 0;
293 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_vm) return 0;
294 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_ds) return 0;
295 return 1;
296}
297
298// get source that block at addr was compiled from (host pointers)
299static void get_bounds(void *addr, u_char **start, u_char **end)
300{
301 u_int *ptr = addr;
302 #ifndef HAVE_ARMV7
303 u_int offset;
304 // get from literal pool
305 assert((*ptr&0xFFFF0000)==0xe59f0000);
306 offset=*ptr&0xfff;
307 u_int source=*(u_int*)((void *)ptr+offset+8);
308 ptr++;
309 //assert((*ptr&0xFFFF0000)==0xe59f0000);
310 //offset=*ptr&0xfff;
311 //u_int copy=*(u_int*)((void *)ptr+offset+8);
312 ptr++;
313 assert((*ptr&0xFFFF0000)==0xe59f0000);
314 offset=*ptr&0xfff;
315 u_int len=*(u_int*)((void *)ptr+offset+8);
316 ptr++;
317 ptr++;
318 #else
319 // ARMv7 movw/movt
320 assert((*ptr&0xFFF00000)==0xe3000000);
321 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
322 //u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
323 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
324 ptr+=6;
325 #endif
326 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
327 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
328 *start=(u_char *)source;
329 *end=(u_char *)source+len;
330}
331
332// Allocate a specific ARM register.
333static void alloc_arm_reg(struct regstat *cur,int i,signed char reg,int hr)
334{
335 int n;
336 int dirty=0;
337
338 // see if it's already allocated (and dealloc it)
339 for(n=0;n<HOST_REGS;n++)
340 {
341 if(n!=EXCLUDE_REG&&cur->regmap[n]==reg) {
342 dirty=(cur->dirty>>n)&1;
343 cur->regmap[n]=-1;
344 }
345 }
346
347 cur->regmap[hr]=reg;
348 cur->dirty&=~(1<<hr);
349 cur->dirty|=dirty<<hr;
350 cur->isconst&=~(1<<hr);
351}
352
353// Alloc cycle count into dedicated register
354static void alloc_cc(struct regstat *cur,int i)
355{
356 alloc_arm_reg(cur,i,CCREG,HOST_CCREG);
357}
358
359/* Assembler */
360
361static unused char regname[16][4] = {
362 "r0",
363 "r1",
364 "r2",
365 "r3",
366 "r4",
367 "r5",
368 "r6",
369 "r7",
370 "r8",
371 "r9",
372 "r10",
373 "fp",
374 "r12",
375 "sp",
376 "lr",
377 "pc"};
378
379static void output_w32(u_int word)
380{
381 *((u_int *)out)=word;
382 out+=4;
383}
384
385static u_int rd_rn_rm(u_int rd, u_int rn, u_int rm)
386{
387 assert(rd<16);
388 assert(rn<16);
389 assert(rm<16);
390 return((rn<<16)|(rd<<12)|rm);
391}
392
393static u_int rd_rn_imm_shift(u_int rd, u_int rn, u_int imm, u_int shift)
394{
395 assert(rd<16);
396 assert(rn<16);
397 assert(imm<256);
398 assert((shift&1)==0);
399 return((rn<<16)|(rd<<12)|(((32-shift)&30)<<7)|imm);
400}
401
402static u_int genimm(u_int imm,u_int *encoded)
403{
404 *encoded=0;
405 if(imm==0) return 1;
406 int i=32;
407 while(i>0)
408 {
409 if(imm<256) {
410 *encoded=((i&30)<<7)|imm;
411 return 1;
412 }
413 imm=(imm>>2)|(imm<<30);i-=2;
414 }
415 return 0;
416}
417
418static void genimm_checked(u_int imm,u_int *encoded)
419{
420 u_int ret=genimm(imm,encoded);
421 assert(ret);
422 (void)ret;
423}
424
425static u_int genjmp(u_int addr)
426{
427 int offset=addr-(int)out-8;
428 if(offset<-33554432||offset>=33554432) {
429 if (addr>2) {
430 SysPrintf("genjmp: out of range: %08x\n", offset);
431 exit(1);
432 }
433 return 0;
434 }
435 return ((u_int)offset>>2)&0xffffff;
436}
437
438static void emit_mov(int rs,int rt)
439{
440 assem_debug("mov %s,%s\n",regname[rt],regname[rs]);
441 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs));
442}
443
444static void emit_movs(int rs,int rt)
445{
446 assem_debug("movs %s,%s\n",regname[rt],regname[rs]);
447 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs));
448}
449
450static void emit_add(int rs1,int rs2,int rt)
451{
452 assem_debug("add %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
453 output_w32(0xe0800000|rd_rn_rm(rt,rs1,rs2));
454}
455
456static void emit_adds(int rs1,int rs2,int rt)
457{
458 assem_debug("adds %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
459 output_w32(0xe0900000|rd_rn_rm(rt,rs1,rs2));
460}
461
462static void emit_adcs(int rs1,int rs2,int rt)
463{
464 assem_debug("adcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
465 output_w32(0xe0b00000|rd_rn_rm(rt,rs1,rs2));
466}
467
468static void emit_neg(int rs, int rt)
469{
470 assem_debug("rsb %s,%s,#0\n",regname[rt],regname[rs]);
471 output_w32(0xe2600000|rd_rn_rm(rt,rs,0));
472}
473
474static void emit_sub(int rs1,int rs2,int rt)
475{
476 assem_debug("sub %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
477 output_w32(0xe0400000|rd_rn_rm(rt,rs1,rs2));
478}
479
480static void emit_zeroreg(int rt)
481{
482 assem_debug("mov %s,#0\n",regname[rt]);
483 output_w32(0xe3a00000|rd_rn_rm(rt,0,0));
484}
485
486static void emit_loadlp(u_int imm,u_int rt)
487{
488 add_literal((int)out,imm);
489 assem_debug("ldr %s,pc+? [=%x]\n",regname[rt],imm);
490 output_w32(0xe5900000|rd_rn_rm(rt,15,0));
491}
492
493static void emit_movw(u_int imm,u_int rt)
494{
495 assert(imm<65536);
496 assem_debug("movw %s,#%d (0x%x)\n",regname[rt],imm,imm);
497 output_w32(0xe3000000|rd_rn_rm(rt,0,0)|(imm&0xfff)|((imm<<4)&0xf0000));
498}
499
500static void emit_movt(u_int imm,u_int rt)
501{
502 assem_debug("movt %s,#%d (0x%x)\n",regname[rt],imm&0xffff0000,imm&0xffff0000);
503 output_w32(0xe3400000|rd_rn_rm(rt,0,0)|((imm>>16)&0xfff)|((imm>>12)&0xf0000));
504}
505
506static void emit_movimm(u_int imm,u_int rt)
507{
508 u_int armval;
509 if(genimm(imm,&armval)) {
510 assem_debug("mov %s,#%d\n",regname[rt],imm);
511 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
512 }else if(genimm(~imm,&armval)) {
513 assem_debug("mvn %s,#%d\n",regname[rt],imm);
514 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
515 }else if(imm<65536) {
516 #ifndef HAVE_ARMV7
517 assem_debug("mov %s,#%d\n",regname[rt],imm&0xFF00);
518 output_w32(0xe3a00000|rd_rn_imm_shift(rt,0,imm>>8,8));
519 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
520 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
521 #else
522 emit_movw(imm,rt);
523 #endif
524 }else{
525 #ifndef HAVE_ARMV7
526 emit_loadlp(imm,rt);
527 #else
528 emit_movw(imm&0x0000FFFF,rt);
529 emit_movt(imm&0xFFFF0000,rt);
530 #endif
531 }
532}
533
534static void emit_pcreladdr(u_int rt)
535{
536 assem_debug("add %s,pc,#?\n",regname[rt]);
537 output_w32(0xe2800000|rd_rn_rm(rt,15,0));
538}
539
540static void emit_loadreg(int r, int hr)
541{
542 if(r&64) {
543 SysPrintf("64bit load in 32bit mode!\n");
544 assert(0);
545 return;
546 }
547 if((r&63)==0)
548 emit_zeroreg(hr);
549 else {
550 int addr=((int)reg)+((r&63)<<REG_SHIFT)+((r&64)>>4);
551 if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4);
552 if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4);
553 if(r==CCREG) addr=(int)&cycle_count;
554 if(r==CSREG) addr=(int)&Status;
555 if(r==INVCP) addr=(int)&invc_ptr;
556 u_int offset = addr-(u_int)&dynarec_local;
557 assert(offset<4096);
558 assem_debug("ldr %s,fp+%d\n",regname[hr],offset);
559 output_w32(0xe5900000|rd_rn_rm(hr,FP,0)|offset);
560 }
561}
562
563static void emit_storereg(int r, int hr)
564{
565 if(r&64) {
566 SysPrintf("64bit store in 32bit mode!\n");
567 assert(0);
568 return;
569 }
570 int addr=((int)reg)+((r&63)<<REG_SHIFT)+((r&64)>>4);
571 if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4);
572 if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4);
573 if(r==CCREG) addr=(int)&cycle_count;
574 u_int offset = addr-(u_int)&dynarec_local;
575 assert(offset<4096);
576 assem_debug("str %s,fp+%d\n",regname[hr],offset);
577 output_w32(0xe5800000|rd_rn_rm(hr,FP,0)|offset);
578}
579
580static void emit_test(int rs, int rt)
581{
582 assem_debug("tst %s,%s\n",regname[rs],regname[rt]);
583 output_w32(0xe1100000|rd_rn_rm(0,rs,rt));
584}
585
586static void emit_testimm(int rs,int imm)
587{
588 u_int armval;
589 assem_debug("tst %s,#%d\n",regname[rs],imm);
590 genimm_checked(imm,&armval);
591 output_w32(0xe3100000|rd_rn_rm(0,rs,0)|armval);
592}
593
594static void emit_testeqimm(int rs,int imm)
595{
596 u_int armval;
597 assem_debug("tsteq %s,$%d\n",regname[rs],imm);
598 genimm_checked(imm,&armval);
599 output_w32(0x03100000|rd_rn_rm(0,rs,0)|armval);
600}
601
602static void emit_not(int rs,int rt)
603{
604 assem_debug("mvn %s,%s\n",regname[rt],regname[rs]);
605 output_w32(0xe1e00000|rd_rn_rm(rt,0,rs));
606}
607
608static void emit_mvnmi(int rs,int rt)
609{
610 assem_debug("mvnmi %s,%s\n",regname[rt],regname[rs]);
611 output_w32(0x41e00000|rd_rn_rm(rt,0,rs));
612}
613
614static void emit_and(u_int rs1,u_int rs2,u_int rt)
615{
616 assem_debug("and %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
617 output_w32(0xe0000000|rd_rn_rm(rt,rs1,rs2));
618}
619
620static void emit_or(u_int rs1,u_int rs2,u_int rt)
621{
622 assem_debug("orr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
623 output_w32(0xe1800000|rd_rn_rm(rt,rs1,rs2));
624}
625
626static void emit_orrshl_imm(u_int rs,u_int imm,u_int rt)
627{
628 assert(rs<16);
629 assert(rt<16);
630 assert(imm<32);
631 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs],imm);
632 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|(imm<<7));
633}
634
635static void emit_orrshr_imm(u_int rs,u_int imm,u_int rt)
636{
637 assert(rs<16);
638 assert(rt<16);
639 assert(imm<32);
640 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs],imm);
641 output_w32(0xe1800020|rd_rn_rm(rt,rt,rs)|(imm<<7));
642}
643
644static void emit_xor(u_int rs1,u_int rs2,u_int rt)
645{
646 assem_debug("eor %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
647 output_w32(0xe0200000|rd_rn_rm(rt,rs1,rs2));
648}
649
650static void emit_addimm(u_int rs,int imm,u_int rt)
651{
652 assert(rs<16);
653 assert(rt<16);
654 if(imm!=0) {
655 u_int armval;
656 if(genimm(imm,&armval)) {
657 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],imm);
658 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
659 }else if(genimm(-imm,&armval)) {
660 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],-imm);
661 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
662 #ifdef HAVE_ARMV7
663 }else if(rt!=rs&&(u_int)imm<65536) {
664 emit_movw(imm&0x0000ffff,rt);
665 emit_add(rs,rt,rt);
666 }else if(rt!=rs&&(u_int)-imm<65536) {
667 emit_movw(-imm&0x0000ffff,rt);
668 emit_sub(rs,rt,rt);
669 #endif
670 }else if((u_int)-imm<65536) {
671 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],(-imm)&0xFF00);
672 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
673 output_w32(0xe2400000|rd_rn_imm_shift(rt,rs,(-imm)>>8,8));
674 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
675 }else {
676 do {
677 int shift = (ffs(imm) - 1) & ~1;
678 int imm8 = imm & (0xff << shift);
679 genimm_checked(imm8,&armval);
680 assem_debug("add %s,%s,#0x%x\n",regname[rt],regname[rs],imm8);
681 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
682 rs = rt;
683 imm &= ~imm8;
684 }
685 while (imm != 0);
686 }
687 }
688 else if(rs!=rt) emit_mov(rs,rt);
689}
690
691static void emit_addimm_and_set_flags(int imm,int rt)
692{
693 assert(imm>-65536&&imm<65536);
694 u_int armval;
695 if(genimm(imm,&armval)) {
696 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm);
697 output_w32(0xe2900000|rd_rn_rm(rt,rt,0)|armval);
698 }else if(genimm(-imm,&armval)) {
699 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],imm);
700 output_w32(0xe2500000|rd_rn_rm(rt,rt,0)|armval);
701 }else if(imm<0) {
702 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF00);
703 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
704 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)>>8,8));
705 output_w32(0xe2500000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
706 }else{
707 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF00);
708 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
709 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm>>8,8));
710 output_w32(0xe2900000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
711 }
712}
713
714static void emit_addimm_no_flags(u_int imm,u_int rt)
715{
716 emit_addimm(rt,imm,rt);
717}
718
719static void emit_addnop(u_int r)
720{
721 assert(r<16);
722 assem_debug("add %s,%s,#0 (nop)\n",regname[r],regname[r]);
723 output_w32(0xe2800000|rd_rn_rm(r,r,0));
724}
725
726static void emit_adcimm(u_int rs,int imm,u_int rt)
727{
728 u_int armval;
729 genimm_checked(imm,&armval);
730 assem_debug("adc %s,%s,#%d\n",regname[rt],regname[rs],imm);
731 output_w32(0xe2a00000|rd_rn_rm(rt,rs,0)|armval);
732}
733
734static void emit_addimm64_32(int rsh,int rsl,int imm,int rth,int rtl)
735{
736 // TODO: if(genimm(imm,&armval)) ...
737 // else
738 emit_movimm(imm,HOST_TEMPREG);
739 emit_adds(HOST_TEMPREG,rsl,rtl);
740 emit_adcimm(rsh,0,rth);
741}
742
743static void emit_andimm(int rs,int imm,int rt)
744{
745 u_int armval;
746 if(imm==0) {
747 emit_zeroreg(rt);
748 }else if(genimm(imm,&armval)) {
749 assem_debug("and %s,%s,#%d\n",regname[rt],regname[rs],imm);
750 output_w32(0xe2000000|rd_rn_rm(rt,rs,0)|armval);
751 }else if(genimm(~imm,&armval)) {
752 assem_debug("bic %s,%s,#%d\n",regname[rt],regname[rs],imm);
753 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|armval);
754 }else if(imm==65535) {
755 #ifndef HAVE_ARMV6
756 assem_debug("bic %s,%s,#FF000000\n",regname[rt],regname[rs]);
757 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|0x4FF);
758 assem_debug("bic %s,%s,#00FF0000\n",regname[rt],regname[rt]);
759 output_w32(0xe3c00000|rd_rn_rm(rt,rt,0)|0x8FF);
760 #else
761 assem_debug("uxth %s,%s\n",regname[rt],regname[rs]);
762 output_w32(0xe6ff0070|rd_rn_rm(rt,0,rs));
763 #endif
764 }else{
765 assert(imm>0&&imm<65535);
766 #ifndef HAVE_ARMV7
767 assem_debug("mov r14,#%d\n",imm&0xFF00);
768 output_w32(0xe3a00000|rd_rn_imm_shift(HOST_TEMPREG,0,imm>>8,8));
769 assem_debug("add r14,r14,#%d\n",imm&0xFF);
770 output_w32(0xe2800000|rd_rn_imm_shift(HOST_TEMPREG,HOST_TEMPREG,imm&0xff,0));
771 #else
772 emit_movw(imm,HOST_TEMPREG);
773 #endif
774 assem_debug("and %s,%s,r14\n",regname[rt],regname[rs]);
775 output_w32(0xe0000000|rd_rn_rm(rt,rs,HOST_TEMPREG));
776 }
777}
778
779static void emit_orimm(int rs,int imm,int rt)
780{
781 u_int armval;
782 if(imm==0) {
783 if(rs!=rt) emit_mov(rs,rt);
784 }else if(genimm(imm,&armval)) {
785 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm);
786 output_w32(0xe3800000|rd_rn_rm(rt,rs,0)|armval);
787 }else{
788 assert(imm>0&&imm<65536);
789 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
790 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
791 output_w32(0xe3800000|rd_rn_imm_shift(rt,rs,imm>>8,8));
792 output_w32(0xe3800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
793 }
794}
795
796static void emit_xorimm(int rs,int imm,int rt)
797{
798 u_int armval;
799 if(imm==0) {
800 if(rs!=rt) emit_mov(rs,rt);
801 }else if(genimm(imm,&armval)) {
802 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm);
803 output_w32(0xe2200000|rd_rn_rm(rt,rs,0)|armval);
804 }else{
805 assert(imm>0&&imm<65536);
806 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
807 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
808 output_w32(0xe2200000|rd_rn_imm_shift(rt,rs,imm>>8,8));
809 output_w32(0xe2200000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
810 }
811}
812
813static void emit_shlimm(int rs,u_int imm,int rt)
814{
815 assert(imm>0);
816 assert(imm<32);
817 //if(imm==1) ...
818 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
819 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
820}
821
822static void emit_lsls_imm(int rs,int imm,int rt)
823{
824 assert(imm>0);
825 assert(imm<32);
826 assem_debug("lsls %s,%s,#%d\n",regname[rt],regname[rs],imm);
827 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs)|(imm<<7));
828}
829
830static unused void emit_lslpls_imm(int rs,int imm,int rt)
831{
832 assert(imm>0);
833 assert(imm<32);
834 assem_debug("lslpls %s,%s,#%d\n",regname[rt],regname[rs],imm);
835 output_w32(0x51b00000|rd_rn_rm(rt,0,rs)|(imm<<7));
836}
837
838static void emit_shrimm(int rs,u_int imm,int rt)
839{
840 assert(imm>0);
841 assert(imm<32);
842 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
843 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
844}
845
846static void emit_sarimm(int rs,u_int imm,int rt)
847{
848 assert(imm>0);
849 assert(imm<32);
850 assem_debug("asr %s,%s,#%d\n",regname[rt],regname[rs],imm);
851 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x40|(imm<<7));
852}
853
854static void emit_rorimm(int rs,u_int imm,int rt)
855{
856 assert(imm>0);
857 assert(imm<32);
858 assem_debug("ror %s,%s,#%d\n",regname[rt],regname[rs],imm);
859 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x60|(imm<<7));
860}
861
862static void emit_signextend16(int rs,int rt)
863{
864 #ifndef HAVE_ARMV6
865 emit_shlimm(rs,16,rt);
866 emit_sarimm(rt,16,rt);
867 #else
868 assem_debug("sxth %s,%s\n",regname[rt],regname[rs]);
869 output_w32(0xe6bf0070|rd_rn_rm(rt,0,rs));
870 #endif
871}
872
873static void emit_signextend8(int rs,int rt)
874{
875 #ifndef HAVE_ARMV6
876 emit_shlimm(rs,24,rt);
877 emit_sarimm(rt,24,rt);
878 #else
879 assem_debug("sxtb %s,%s\n",regname[rt],regname[rs]);
880 output_w32(0xe6af0070|rd_rn_rm(rt,0,rs));
881 #endif
882}
883
884static void emit_shl(u_int rs,u_int shift,u_int rt)
885{
886 assert(rs<16);
887 assert(rt<16);
888 assert(shift<16);
889 //if(imm==1) ...
890 assem_debug("lsl %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
891 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x10|(shift<<8));
892}
893
894static void emit_shr(u_int rs,u_int shift,u_int rt)
895{
896 assert(rs<16);
897 assert(rt<16);
898 assert(shift<16);
899 assem_debug("lsr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
900 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x30|(shift<<8));
901}
902
903static void emit_sar(u_int rs,u_int shift,u_int rt)
904{
905 assert(rs<16);
906 assert(rt<16);
907 assert(shift<16);
908 assem_debug("asr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
909 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x50|(shift<<8));
910}
911
912static void emit_orrshl(u_int rs,u_int shift,u_int rt)
913{
914 assert(rs<16);
915 assert(rt<16);
916 assert(shift<16);
917 assem_debug("orr %s,%s,%s,lsl %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
918 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x10|(shift<<8));
919}
920
921static void emit_orrshr(u_int rs,u_int shift,u_int rt)
922{
923 assert(rs<16);
924 assert(rt<16);
925 assert(shift<16);
926 assem_debug("orr %s,%s,%s,lsr %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
927 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x30|(shift<<8));
928}
929
930static void emit_cmpimm(int rs,int imm)
931{
932 u_int armval;
933 if(genimm(imm,&armval)) {
934 assem_debug("cmp %s,#%d\n",regname[rs],imm);
935 output_w32(0xe3500000|rd_rn_rm(0,rs,0)|armval);
936 }else if(genimm(-imm,&armval)) {
937 assem_debug("cmn %s,#%d\n",regname[rs],imm);
938 output_w32(0xe3700000|rd_rn_rm(0,rs,0)|armval);
939 }else if(imm>0) {
940 assert(imm<65536);
941 emit_movimm(imm,HOST_TEMPREG);
942 assem_debug("cmp %s,r14\n",regname[rs]);
943 output_w32(0xe1500000|rd_rn_rm(0,rs,HOST_TEMPREG));
944 }else{
945 assert(imm>-65536);
946 emit_movimm(-imm,HOST_TEMPREG);
947 assem_debug("cmn %s,r14\n",regname[rs]);
948 output_w32(0xe1700000|rd_rn_rm(0,rs,HOST_TEMPREG));
949 }
950}
951
952static void emit_cmovne_imm(int imm,int rt)
953{
954 assem_debug("movne %s,#%d\n",regname[rt],imm);
955 u_int armval;
956 genimm_checked(imm,&armval);
957 output_w32(0x13a00000|rd_rn_rm(rt,0,0)|armval);
958}
959
960static void emit_cmovl_imm(int imm,int rt)
961{
962 assem_debug("movlt %s,#%d\n",regname[rt],imm);
963 u_int armval;
964 genimm_checked(imm,&armval);
965 output_w32(0xb3a00000|rd_rn_rm(rt,0,0)|armval);
966}
967
968static void emit_cmovb_imm(int imm,int rt)
969{
970 assem_debug("movcc %s,#%d\n",regname[rt],imm);
971 u_int armval;
972 genimm_checked(imm,&armval);
973 output_w32(0x33a00000|rd_rn_rm(rt,0,0)|armval);
974}
975
976static void emit_cmovne_reg(int rs,int rt)
977{
978 assem_debug("movne %s,%s\n",regname[rt],regname[rs]);
979 output_w32(0x11a00000|rd_rn_rm(rt,0,rs));
980}
981
982static void emit_cmovl_reg(int rs,int rt)
983{
984 assem_debug("movlt %s,%s\n",regname[rt],regname[rs]);
985 output_w32(0xb1a00000|rd_rn_rm(rt,0,rs));
986}
987
988static void emit_cmovs_reg(int rs,int rt)
989{
990 assem_debug("movmi %s,%s\n",regname[rt],regname[rs]);
991 output_w32(0x41a00000|rd_rn_rm(rt,0,rs));
992}
993
994static void emit_slti32(int rs,int imm,int rt)
995{
996 if(rs!=rt) emit_zeroreg(rt);
997 emit_cmpimm(rs,imm);
998 if(rs==rt) emit_movimm(0,rt);
999 emit_cmovl_imm(1,rt);
1000}
1001
1002static void emit_sltiu32(int rs,int imm,int rt)
1003{
1004 if(rs!=rt) emit_zeroreg(rt);
1005 emit_cmpimm(rs,imm);
1006 if(rs==rt) emit_movimm(0,rt);
1007 emit_cmovb_imm(1,rt);
1008}
1009
1010static void emit_cmp(int rs,int rt)
1011{
1012 assem_debug("cmp %s,%s\n",regname[rs],regname[rt]);
1013 output_w32(0xe1500000|rd_rn_rm(0,rs,rt));
1014}
1015
1016static void emit_set_gz32(int rs, int rt)
1017{
1018 //assem_debug("set_gz32\n");
1019 emit_cmpimm(rs,1);
1020 emit_movimm(1,rt);
1021 emit_cmovl_imm(0,rt);
1022}
1023
1024static void emit_set_nz32(int rs, int rt)
1025{
1026 //assem_debug("set_nz32\n");
1027 if(rs!=rt) emit_movs(rs,rt);
1028 else emit_test(rs,rs);
1029 emit_cmovne_imm(1,rt);
1030}
1031
1032static void emit_set_if_less32(int rs1, int rs2, int rt)
1033{
1034 //assem_debug("set if less (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1035 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1036 emit_cmp(rs1,rs2);
1037 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1038 emit_cmovl_imm(1,rt);
1039}
1040
1041static void emit_set_if_carry32(int rs1, int rs2, int rt)
1042{
1043 //assem_debug("set if carry (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1044 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1045 emit_cmp(rs1,rs2);
1046 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1047 emit_cmovb_imm(1,rt);
1048}
1049
1050static void emit_call(const void *a_)
1051{
1052 int a = (int)a_;
1053 assem_debug("bl %x (%x+%x)%s\n",a,(int)out,a-(int)out-8,func_name(a));
1054 u_int offset=genjmp(a);
1055 output_w32(0xeb000000|offset);
1056}
1057
1058static void emit_jmp(const void *a_)
1059{
1060 int a = (int)a_;
1061 assem_debug("b %x (%x+%x)%s\n",a,(int)out,a-(int)out-8,func_name(a));
1062 u_int offset=genjmp(a);
1063 output_w32(0xea000000|offset);
1064}
1065
1066static void emit_jne(const void *a_)
1067{
1068 int a = (int)a_;
1069 assem_debug("bne %x\n",a);
1070 u_int offset=genjmp(a);
1071 output_w32(0x1a000000|offset);
1072}
1073
1074static void emit_jeq(int a)
1075{
1076 assem_debug("beq %x\n",a);
1077 u_int offset=genjmp(a);
1078 output_w32(0x0a000000|offset);
1079}
1080
1081static void emit_js(int a)
1082{
1083 assem_debug("bmi %x\n",a);
1084 u_int offset=genjmp(a);
1085 output_w32(0x4a000000|offset);
1086}
1087
1088static void emit_jns(int a)
1089{
1090 assem_debug("bpl %x\n",a);
1091 u_int offset=genjmp(a);
1092 output_w32(0x5a000000|offset);
1093}
1094
1095static void emit_jl(int a)
1096{
1097 assem_debug("blt %x\n",a);
1098 u_int offset=genjmp(a);
1099 output_w32(0xba000000|offset);
1100}
1101
1102static void emit_jge(int a)
1103{
1104 assem_debug("bge %x\n",a);
1105 u_int offset=genjmp(a);
1106 output_w32(0xaa000000|offset);
1107}
1108
1109static void emit_jno(int a)
1110{
1111 assem_debug("bvc %x\n",a);
1112 u_int offset=genjmp(a);
1113 output_w32(0x7a000000|offset);
1114}
1115
1116static void emit_jc(int a)
1117{
1118 assem_debug("bcs %x\n",a);
1119 u_int offset=genjmp(a);
1120 output_w32(0x2a000000|offset);
1121}
1122
1123static void emit_jcc(void *a_)
1124{
1125 int a = (int)a_;
1126 assem_debug("bcc %x\n",a);
1127 u_int offset=genjmp(a);
1128 output_w32(0x3a000000|offset);
1129}
1130
1131static void emit_callreg(u_int r)
1132{
1133 assert(r<15);
1134 assem_debug("blx %s\n",regname[r]);
1135 output_w32(0xe12fff30|r);
1136}
1137
1138static void emit_jmpreg(u_int r)
1139{
1140 assem_debug("mov pc,%s\n",regname[r]);
1141 output_w32(0xe1a00000|rd_rn_rm(15,0,r));
1142}
1143
1144static void emit_readword_indexed(int offset, int rs, int rt)
1145{
1146 assert(offset>-4096&&offset<4096);
1147 assem_debug("ldr %s,%s+%d\n",regname[rt],regname[rs],offset);
1148 if(offset>=0) {
1149 output_w32(0xe5900000|rd_rn_rm(rt,rs,0)|offset);
1150 }else{
1151 output_w32(0xe5100000|rd_rn_rm(rt,rs,0)|(-offset));
1152 }
1153}
1154
1155static void emit_readword_dualindexedx4(int rs1, int rs2, int rt)
1156{
1157 assem_debug("ldr %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1158 output_w32(0xe7900000|rd_rn_rm(rt,rs1,rs2)|0x100);
1159}
1160
1161static void emit_ldrcc_dualindexed(int rs1, int rs2, int rt)
1162{
1163 assem_debug("ldrcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1164 output_w32(0x37900000|rd_rn_rm(rt,rs1,rs2));
1165}
1166
1167static void emit_ldrccb_dualindexed(int rs1, int rs2, int rt)
1168{
1169 assem_debug("ldrccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1170 output_w32(0x37d00000|rd_rn_rm(rt,rs1,rs2));
1171}
1172
1173static void emit_ldrccsb_dualindexed(int rs1, int rs2, int rt)
1174{
1175 assem_debug("ldrccsb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1176 output_w32(0x319000d0|rd_rn_rm(rt,rs1,rs2));
1177}
1178
1179static void emit_ldrcch_dualindexed(int rs1, int rs2, int rt)
1180{
1181 assem_debug("ldrcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1182 output_w32(0x319000b0|rd_rn_rm(rt,rs1,rs2));
1183}
1184
1185static void emit_ldrccsh_dualindexed(int rs1, int rs2, int rt)
1186{
1187 assem_debug("ldrccsh %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1188 output_w32(0x319000f0|rd_rn_rm(rt,rs1,rs2));
1189}
1190
1191static void emit_movsbl_indexed(int offset, int rs, int rt)
1192{
1193 assert(offset>-256&&offset<256);
1194 assem_debug("ldrsb %s,%s+%d\n",regname[rt],regname[rs],offset);
1195 if(offset>=0) {
1196 output_w32(0xe1d000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1197 }else{
1198 output_w32(0xe15000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1199 }
1200}
1201
1202static void emit_movswl_indexed(int offset, int rs, int rt)
1203{
1204 assert(offset>-256&&offset<256);
1205 assem_debug("ldrsh %s,%s+%d\n",regname[rt],regname[rs],offset);
1206 if(offset>=0) {
1207 output_w32(0xe1d000f0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1208 }else{
1209 output_w32(0xe15000f0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1210 }
1211}
1212
1213static void emit_movzbl_indexed(int offset, int rs, int rt)
1214{
1215 assert(offset>-4096&&offset<4096);
1216 assem_debug("ldrb %s,%s+%d\n",regname[rt],regname[rs],offset);
1217 if(offset>=0) {
1218 output_w32(0xe5d00000|rd_rn_rm(rt,rs,0)|offset);
1219 }else{
1220 output_w32(0xe5500000|rd_rn_rm(rt,rs,0)|(-offset));
1221 }
1222}
1223
1224static void emit_movzwl_indexed(int offset, int rs, int rt)
1225{
1226 assert(offset>-256&&offset<256);
1227 assem_debug("ldrh %s,%s+%d\n",regname[rt],regname[rs],offset);
1228 if(offset>=0) {
1229 output_w32(0xe1d000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1230 }else{
1231 output_w32(0xe15000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1232 }
1233}
1234
1235static void emit_ldrd(int offset, int rs, int rt)
1236{
1237 assert(offset>-256&&offset<256);
1238 assem_debug("ldrd %s,%s+%d\n",regname[rt],regname[rs],offset);
1239 if(offset>=0) {
1240 output_w32(0xe1c000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1241 }else{
1242 output_w32(0xe14000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1243 }
1244}
1245
1246static void emit_readword(void *addr, int rt)
1247{
1248 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
1249 assert(offset<4096);
1250 assem_debug("ldr %s,fp+%d\n",regname[rt],offset);
1251 output_w32(0xe5900000|rd_rn_rm(rt,FP,0)|offset);
1252}
1253
1254static void emit_writeword_indexed(int rt, int offset, int rs)
1255{
1256 assert(offset>-4096&&offset<4096);
1257 assem_debug("str %s,%s+%d\n",regname[rt],regname[rs],offset);
1258 if(offset>=0) {
1259 output_w32(0xe5800000|rd_rn_rm(rt,rs,0)|offset);
1260 }else{
1261 output_w32(0xe5000000|rd_rn_rm(rt,rs,0)|(-offset));
1262 }
1263}
1264
1265static void emit_writehword_indexed(int rt, int offset, int rs)
1266{
1267 assert(offset>-256&&offset<256);
1268 assem_debug("strh %s,%s+%d\n",regname[rt],regname[rs],offset);
1269 if(offset>=0) {
1270 output_w32(0xe1c000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1271 }else{
1272 output_w32(0xe14000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1273 }
1274}
1275
1276static void emit_writebyte_indexed(int rt, int offset, int rs)
1277{
1278 assert(offset>-4096&&offset<4096);
1279 assem_debug("strb %s,%s+%d\n",regname[rt],regname[rs],offset);
1280 if(offset>=0) {
1281 output_w32(0xe5c00000|rd_rn_rm(rt,rs,0)|offset);
1282 }else{
1283 output_w32(0xe5400000|rd_rn_rm(rt,rs,0)|(-offset));
1284 }
1285}
1286
1287static void emit_strcc_dualindexed(int rs1, int rs2, int rt)
1288{
1289 assem_debug("strcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1290 output_w32(0x37800000|rd_rn_rm(rt,rs1,rs2));
1291}
1292
1293static void emit_strccb_dualindexed(int rs1, int rs2, int rt)
1294{
1295 assem_debug("strccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1296 output_w32(0x37c00000|rd_rn_rm(rt,rs1,rs2));
1297}
1298
1299static void emit_strcch_dualindexed(int rs1, int rs2, int rt)
1300{
1301 assem_debug("strcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1302 output_w32(0x318000b0|rd_rn_rm(rt,rs1,rs2));
1303}
1304
1305static void emit_writeword(int rt, void *addr)
1306{
1307 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
1308 assert(offset<4096);
1309 assem_debug("str %s,fp+%d\n",regname[rt],offset);
1310 output_w32(0xe5800000|rd_rn_rm(rt,FP,0)|offset);
1311}
1312
1313static void emit_umull(u_int rs1, u_int rs2, u_int hi, u_int lo)
1314{
1315 assem_debug("umull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
1316 assert(rs1<16);
1317 assert(rs2<16);
1318 assert(hi<16);
1319 assert(lo<16);
1320 output_w32(0xe0800090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
1321}
1322
1323static void emit_smull(u_int rs1, u_int rs2, u_int hi, u_int lo)
1324{
1325 assem_debug("smull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
1326 assert(rs1<16);
1327 assert(rs2<16);
1328 assert(hi<16);
1329 assert(lo<16);
1330 output_w32(0xe0c00090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
1331}
1332
1333static void emit_clz(int rs,int rt)
1334{
1335 assem_debug("clz %s,%s\n",regname[rt],regname[rs]);
1336 output_w32(0xe16f0f10|rd_rn_rm(rt,0,rs));
1337}
1338
1339static void emit_subcs(int rs1,int rs2,int rt)
1340{
1341 assem_debug("subcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1342 output_w32(0x20400000|rd_rn_rm(rt,rs1,rs2));
1343}
1344
1345static void emit_shrcc_imm(int rs,u_int imm,int rt)
1346{
1347 assert(imm>0);
1348 assert(imm<32);
1349 assem_debug("lsrcc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1350 output_w32(0x31a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
1351}
1352
1353static void emit_shrne_imm(int rs,u_int imm,int rt)
1354{
1355 assert(imm>0);
1356 assert(imm<32);
1357 assem_debug("lsrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
1358 output_w32(0x11a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
1359}
1360
1361static void emit_negmi(int rs, int rt)
1362{
1363 assem_debug("rsbmi %s,%s,#0\n",regname[rt],regname[rs]);
1364 output_w32(0x42600000|rd_rn_rm(rt,rs,0));
1365}
1366
1367static void emit_negsmi(int rs, int rt)
1368{
1369 assem_debug("rsbsmi %s,%s,#0\n",regname[rt],regname[rs]);
1370 output_w32(0x42700000|rd_rn_rm(rt,rs,0));
1371}
1372
1373static void emit_bic_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
1374{
1375 assem_debug("bic %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
1376 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
1377}
1378
1379static void emit_bic_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
1380{
1381 assem_debug("bic %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
1382 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
1383}
1384
1385static void emit_teq(int rs, int rt)
1386{
1387 assem_debug("teq %s,%s\n",regname[rs],regname[rt]);
1388 output_w32(0xe1300000|rd_rn_rm(0,rs,rt));
1389}
1390
1391static void emit_rsbimm(int rs, int imm, int rt)
1392{
1393 u_int armval;
1394 genimm_checked(imm,&armval);
1395 assem_debug("rsb %s,%s,#%d\n",regname[rt],regname[rs],imm);
1396 output_w32(0xe2600000|rd_rn_rm(rt,rs,0)|armval);
1397}
1398
1399// Load 2 immediates optimizing for small code size
1400static void emit_mov2imm_compact(int imm1,u_int rt1,int imm2,u_int rt2)
1401{
1402 emit_movimm(imm1,rt1);
1403 u_int armval;
1404 if(genimm(imm2-imm1,&armval)) {
1405 assem_debug("add %s,%s,#%d\n",regname[rt2],regname[rt1],imm2-imm1);
1406 output_w32(0xe2800000|rd_rn_rm(rt2,rt1,0)|armval);
1407 }else if(genimm(imm1-imm2,&armval)) {
1408 assem_debug("sub %s,%s,#%d\n",regname[rt2],regname[rt1],imm1-imm2);
1409 output_w32(0xe2400000|rd_rn_rm(rt2,rt1,0)|armval);
1410 }
1411 else emit_movimm(imm2,rt2);
1412}
1413
1414// Conditionally select one of two immediates, optimizing for small code size
1415// This will only be called if HAVE_CMOV_IMM is defined
1416static void emit_cmov2imm_e_ne_compact(int imm1,int imm2,u_int rt)
1417{
1418 u_int armval;
1419 if(genimm(imm2-imm1,&armval)) {
1420 emit_movimm(imm1,rt);
1421 assem_debug("addne %s,%s,#%d\n",regname[rt],regname[rt],imm2-imm1);
1422 output_w32(0x12800000|rd_rn_rm(rt,rt,0)|armval);
1423 }else if(genimm(imm1-imm2,&armval)) {
1424 emit_movimm(imm1,rt);
1425 assem_debug("subne %s,%s,#%d\n",regname[rt],regname[rt],imm1-imm2);
1426 output_w32(0x12400000|rd_rn_rm(rt,rt,0)|armval);
1427 }
1428 else {
1429 #ifndef HAVE_ARMV7
1430 emit_movimm(imm1,rt);
1431 add_literal((int)out,imm2);
1432 assem_debug("ldrne %s,pc+? [=%x]\n",regname[rt],imm2);
1433 output_w32(0x15900000|rd_rn_rm(rt,15,0));
1434 #else
1435 emit_movw(imm1&0x0000FFFF,rt);
1436 if((imm1&0xFFFF)!=(imm2&0xFFFF)) {
1437 assem_debug("movwne %s,#%d (0x%x)\n",regname[rt],imm2&0xFFFF,imm2&0xFFFF);
1438 output_w32(0x13000000|rd_rn_rm(rt,0,0)|(imm2&0xfff)|((imm2<<4)&0xf0000));
1439 }
1440 emit_movt(imm1&0xFFFF0000,rt);
1441 if((imm1&0xFFFF0000)!=(imm2&0xFFFF0000)) {
1442 assem_debug("movtne %s,#%d (0x%x)\n",regname[rt],imm2&0xffff0000,imm2&0xffff0000);
1443 output_w32(0x13400000|rd_rn_rm(rt,0,0)|((imm2>>16)&0xfff)|((imm2>>12)&0xf0000));
1444 }
1445 #endif
1446 }
1447}
1448
1449// special case for checking invalid_code
1450static void emit_cmpmem_indexedsr12_reg(int base,int r,int imm)
1451{
1452 assert(imm<128&&imm>=0);
1453 assert(r>=0&&r<16);
1454 assem_debug("ldrb lr,%s,%s lsr #12\n",regname[base],regname[r]);
1455 output_w32(0xe7d00000|rd_rn_rm(HOST_TEMPREG,base,r)|0x620);
1456 emit_cmpimm(HOST_TEMPREG,imm);
1457}
1458
1459static void emit_callne(int a)
1460{
1461 assem_debug("blne %x\n",a);
1462 u_int offset=genjmp(a);
1463 output_w32(0x1b000000|offset);
1464}
1465
1466// Used to preload hash table entries
1467static unused void emit_prefetchreg(int r)
1468{
1469 assem_debug("pld %s\n",regname[r]);
1470 output_w32(0xf5d0f000|rd_rn_rm(0,r,0));
1471}
1472
1473// Special case for mini_ht
1474static void emit_ldreq_indexed(int rs, u_int offset, int rt)
1475{
1476 assert(offset<4096);
1477 assem_debug("ldreq %s,[%s, #%d]\n",regname[rt],regname[rs],offset);
1478 output_w32(0x05900000|rd_rn_rm(rt,rs,0)|offset);
1479}
1480
1481static void emit_orrne_imm(int rs,int imm,int rt)
1482{
1483 u_int armval;
1484 genimm_checked(imm,&armval);
1485 assem_debug("orrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
1486 output_w32(0x13800000|rd_rn_rm(rt,rs,0)|armval);
1487}
1488
1489static void emit_andne_imm(int rs,int imm,int rt)
1490{
1491 u_int armval;
1492 genimm_checked(imm,&armval);
1493 assem_debug("andne %s,%s,#%d\n",regname[rt],regname[rs],imm);
1494 output_w32(0x12000000|rd_rn_rm(rt,rs,0)|armval);
1495}
1496
1497static unused void emit_addpl_imm(int rs,int imm,int rt)
1498{
1499 u_int armval;
1500 genimm_checked(imm,&armval);
1501 assem_debug("addpl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1502 output_w32(0x52800000|rd_rn_rm(rt,rs,0)|armval);
1503}
1504
1505static void emit_jno_unlikely(int a)
1506{
1507 //emit_jno(a);
1508 assem_debug("addvc pc,pc,#? (%x)\n",/*a-(int)out-8,*/a);
1509 output_w32(0x72800000|rd_rn_rm(15,15,0));
1510}
1511
1512static void save_regs_all(u_int reglist)
1513{
1514 int i;
1515 if(!reglist) return;
1516 assem_debug("stmia fp,{");
1517 for(i=0;i<16;i++)
1518 if(reglist&(1<<i))
1519 assem_debug("r%d,",i);
1520 assem_debug("}\n");
1521 output_w32(0xe88b0000|reglist);
1522}
1523
1524static void restore_regs_all(u_int reglist)
1525{
1526 int i;
1527 if(!reglist) return;
1528 assem_debug("ldmia fp,{");
1529 for(i=0;i<16;i++)
1530 if(reglist&(1<<i))
1531 assem_debug("r%d,",i);
1532 assem_debug("}\n");
1533 output_w32(0xe89b0000|reglist);
1534}
1535
1536// Save registers before function call
1537static void save_regs(u_int reglist)
1538{
1539 reglist&=CALLER_SAVE_REGS; // only save the caller-save registers, r0-r3, r12
1540 save_regs_all(reglist);
1541}
1542
1543// Restore registers after function call
1544static void restore_regs(u_int reglist)
1545{
1546 reglist&=CALLER_SAVE_REGS;
1547 restore_regs_all(reglist);
1548}
1549
1550/* Stubs/epilogue */
1551
1552static void literal_pool(int n)
1553{
1554 if(!literalcount) return;
1555 if(n) {
1556 if((int)out-literals[0][0]<4096-n) return;
1557 }
1558 u_int *ptr;
1559 int i;
1560 for(i=0;i<literalcount;i++)
1561 {
1562 u_int l_addr=(u_int)out;
1563 int j;
1564 for(j=0;j<i;j++) {
1565 if(literals[j][1]==literals[i][1]) {
1566 //printf("dup %08x\n",literals[i][1]);
1567 l_addr=literals[j][0];
1568 break;
1569 }
1570 }
1571 ptr=(u_int *)literals[i][0];
1572 u_int offset=l_addr-(u_int)ptr-8;
1573 assert(offset<4096);
1574 assert(!(offset&3));
1575 *ptr|=offset;
1576 if(l_addr==(u_int)out) {
1577 literals[i][0]=l_addr; // remember for dupes
1578 output_w32(literals[i][1]);
1579 }
1580 }
1581 literalcount=0;
1582}
1583
1584static void literal_pool_jumpover(int n)
1585{
1586 if(!literalcount) return;
1587 if(n) {
1588 if((int)out-literals[0][0]<4096-n) return;
1589 }
1590 void *jaddr = out;
1591 emit_jmp(0);
1592 literal_pool(0);
1593 set_jump_target(jaddr, out);
1594}
1595
1596static void emit_extjump2(u_char *addr, int target, void *linker)
1597{
1598 u_char *ptr=(u_char *)addr;
1599 assert((ptr[3]&0x0e)==0xa);
1600 (void)ptr;
1601
1602 emit_loadlp(target,0);
1603 emit_loadlp((u_int)addr,1);
1604 assert(addr>=translation_cache&&addr<(translation_cache+(1<<TARGET_SIZE_2)));
1605 //assert((target>=0x80000000&&target<0x80800000)||(target>0xA4000000&&target<0xA4001000));
1606//DEBUG >
1607#ifdef DEBUG_CYCLE_COUNT
1608 emit_readword(&last_count,ECX);
1609 emit_add(HOST_CCREG,ECX,HOST_CCREG);
1610 emit_readword(&next_interupt,ECX);
1611 emit_writeword(HOST_CCREG,&Count);
1612 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
1613 emit_writeword(ECX,&last_count);
1614#endif
1615//DEBUG <
1616 emit_jmp(linker);
1617}
1618
1619static void emit_extjump(void *addr, int target)
1620{
1621 emit_extjump2(addr, target, dyna_linker);
1622}
1623
1624static void emit_extjump_ds(void *addr, int target)
1625{
1626 emit_extjump2(addr, target, dyna_linker_ds);
1627}
1628
1629// put rt_val into rt, potentially making use of rs with value rs_val
1630static void emit_movimm_from(u_int rs_val,int rs,u_int rt_val,int rt)
1631{
1632 u_int armval;
1633 int diff;
1634 if(genimm(rt_val,&armval)) {
1635 assem_debug("mov %s,#%d\n",regname[rt],rt_val);
1636 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
1637 return;
1638 }
1639 if(genimm(~rt_val,&armval)) {
1640 assem_debug("mvn %s,#%d\n",regname[rt],rt_val);
1641 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
1642 return;
1643 }
1644 diff=rt_val-rs_val;
1645 if(genimm(diff,&armval)) {
1646 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],diff);
1647 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
1648 return;
1649 }else if(genimm(-diff,&armval)) {
1650 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],-diff);
1651 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
1652 return;
1653 }
1654 emit_movimm(rt_val,rt);
1655}
1656
1657// return 1 if above function can do it's job cheaply
1658static int is_similar_value(u_int v1,u_int v2)
1659{
1660 u_int xs;
1661 int diff;
1662 if(v1==v2) return 1;
1663 diff=v2-v1;
1664 for(xs=diff;xs!=0&&(xs&3)==0;xs>>=2)
1665 ;
1666 if(xs<0x100) return 1;
1667 for(xs=-diff;xs!=0&&(xs&3)==0;xs>>=2)
1668 ;
1669 if(xs<0x100) return 1;
1670 return 0;
1671}
1672
1673// trashes r2
1674static void pass_args(int a0, int a1)
1675{
1676 if(a0==1&&a1==0) {
1677 // must swap
1678 emit_mov(a0,2); emit_mov(a1,1); emit_mov(2,0);
1679 }
1680 else if(a0!=0&&a1==0) {
1681 emit_mov(a1,1);
1682 if (a0>=0) emit_mov(a0,0);
1683 }
1684 else {
1685 if(a0>=0&&a0!=0) emit_mov(a0,0);
1686 if(a1>=0&&a1!=1) emit_mov(a1,1);
1687 }
1688}
1689
1690static void mov_loadtype_adj(enum stub_type type,int rs,int rt)
1691{
1692 switch(type) {
1693 case LOADB_STUB: emit_signextend8(rs,rt); break;
1694 case LOADBU_STUB: emit_andimm(rs,0xff,rt); break;
1695 case LOADH_STUB: emit_signextend16(rs,rt); break;
1696 case LOADHU_STUB: emit_andimm(rs,0xffff,rt); break;
1697 case LOADW_STUB: if(rs!=rt) emit_mov(rs,rt); break;
1698 default: assert(0);
1699 }
1700}
1701
1702#include "pcsxmem.h"
1703#include "pcsxmem_inline.c"
1704
1705static void do_readstub(int n)
1706{
1707 assem_debug("do_readstub %x\n",start+stubs[n].a*4);
1708 literal_pool(256);
1709 set_jump_target(stubs[n].addr, out);
1710 enum stub_type type=stubs[n].type;
1711 int i=stubs[n].a;
1712 int rs=stubs[n].b;
1713 struct regstat *i_regs=(struct regstat *)stubs[n].c;
1714 u_int reglist=stubs[n].e;
1715 signed char *i_regmap=i_regs->regmap;
1716 int rt;
1717 if(itype[i]==C1LS||itype[i]==C2LS||itype[i]==LOADLR) {
1718 rt=get_reg(i_regmap,FTEMP);
1719 }else{
1720 rt=get_reg(i_regmap,rt1[i]);
1721 }
1722 assert(rs>=0);
1723 int r,temp=-1,temp2=HOST_TEMPREG,regs_saved=0;
1724 void *restore_jump = NULL;
1725 reglist|=(1<<rs);
1726 for(r=0;r<=12;r++) {
1727 if(((1<<r)&0x13ff)&&((1<<r)&reglist)==0) {
1728 temp=r; break;
1729 }
1730 }
1731 if(rt>=0&&rt1[i]!=0)
1732 reglist&=~(1<<rt);
1733 if(temp==-1) {
1734 save_regs(reglist);
1735 regs_saved=1;
1736 temp=(rs==0)?2:0;
1737 }
1738 if((regs_saved||(reglist&2)==0)&&temp!=1&&rs!=1)
1739 temp2=1;
1740 emit_readword(&mem_rtab,temp);
1741 emit_shrimm(rs,12,temp2);
1742 emit_readword_dualindexedx4(temp,temp2,temp2);
1743 emit_lsls_imm(temp2,1,temp2);
1744 if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) {
1745 switch(type) {
1746 case LOADB_STUB: emit_ldrccsb_dualindexed(temp2,rs,rt); break;
1747 case LOADBU_STUB: emit_ldrccb_dualindexed(temp2,rs,rt); break;
1748 case LOADH_STUB: emit_ldrccsh_dualindexed(temp2,rs,rt); break;
1749 case LOADHU_STUB: emit_ldrcch_dualindexed(temp2,rs,rt); break;
1750 case LOADW_STUB: emit_ldrcc_dualindexed(temp2,rs,rt); break;
1751 default: assert(0);
1752 }
1753 }
1754 if(regs_saved) {
1755 restore_jump=out;
1756 emit_jcc(0); // jump to reg restore
1757 }
1758 else
1759 emit_jcc(stubs[n].retaddr); // return address
1760
1761 if(!regs_saved)
1762 save_regs(reglist);
1763 void *handler=NULL;
1764 if(type==LOADB_STUB||type==LOADBU_STUB)
1765 handler=jump_handler_read8;
1766 if(type==LOADH_STUB||type==LOADHU_STUB)
1767 handler=jump_handler_read16;
1768 if(type==LOADW_STUB)
1769 handler=jump_handler_read32;
1770 assert(handler);
1771 pass_args(rs,temp2);
1772 int cc=get_reg(i_regmap,CCREG);
1773 if(cc<0)
1774 emit_loadreg(CCREG,2);
1775 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n].d+1),2);
1776 emit_call(handler);
1777 if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) {
1778 mov_loadtype_adj(type,0,rt);
1779 }
1780 if(restore_jump)
1781 set_jump_target(restore_jump, out);
1782 restore_regs(reglist);
1783 emit_jmp(stubs[n].retaddr); // return address
1784}
1785
1786// return memhandler, or get directly accessable address and return 0
1787static void *get_direct_memhandler(void *table,u_int addr,enum stub_type type,u_int *addr_host)
1788{
1789 u_int l1,l2=0;
1790 l1=((u_int *)table)[addr>>12];
1791 if((l1&(1<<31))==0) {
1792 u_int v=l1<<1;
1793 *addr_host=v+addr;
1794 return NULL;
1795 }
1796 else {
1797 l1<<=1;
1798 if(type==LOADB_STUB||type==LOADBU_STUB||type==STOREB_STUB)
1799 l2=((u_int *)l1)[0x1000/4 + 0x1000/2 + (addr&0xfff)];
1800 else if(type==LOADH_STUB||type==LOADHU_STUB||type==STOREH_STUB)
1801 l2=((u_int *)l1)[0x1000/4 + (addr&0xfff)/2];
1802 else
1803 l2=((u_int *)l1)[(addr&0xfff)/4];
1804 if((l2&(1<<31))==0) {
1805 u_int v=l2<<1;
1806 *addr_host=v+(addr&0xfff);
1807 return NULL;
1808 }
1809 return (void *)(l2<<1);
1810 }
1811}
1812
1813static void inline_readstub(enum stub_type type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
1814{
1815 int rs=get_reg(regmap,target);
1816 int rt=get_reg(regmap,target);
1817 if(rs<0) rs=get_reg(regmap,-1);
1818 assert(rs>=0);
1819 u_int host_addr=0,is_dynamic,far_call=0;
1820 void *handler;
1821 int cc=get_reg(regmap,CCREG);
1822 if(pcsx_direct_read(type,addr,CLOCK_ADJUST(adj+1),cc,target?rs:-1,rt))
1823 return;
1824 handler = get_direct_memhandler(mem_rtab, addr, type, &host_addr);
1825 if (handler == NULL) {
1826 if(rt<0||rt1[i]==0)
1827 return;
1828 if(addr!=host_addr)
1829 emit_movimm_from(addr,rs,host_addr,rs);
1830 switch(type) {
1831 case LOADB_STUB: emit_movsbl_indexed(0,rs,rt); break;
1832 case LOADBU_STUB: emit_movzbl_indexed(0,rs,rt); break;
1833 case LOADH_STUB: emit_movswl_indexed(0,rs,rt); break;
1834 case LOADHU_STUB: emit_movzwl_indexed(0,rs,rt); break;
1835 case LOADW_STUB: emit_readword_indexed(0,rs,rt); break;
1836 default: assert(0);
1837 }
1838 return;
1839 }
1840 is_dynamic=pcsxmem_is_handler_dynamic(addr);
1841 if(is_dynamic) {
1842 if(type==LOADB_STUB||type==LOADBU_STUB)
1843 handler=jump_handler_read8;
1844 if(type==LOADH_STUB||type==LOADHU_STUB)
1845 handler=jump_handler_read16;
1846 if(type==LOADW_STUB)
1847 handler=jump_handler_read32;
1848 }
1849
1850 // call a memhandler
1851 if(rt>=0&&rt1[i]!=0)
1852 reglist&=~(1<<rt);
1853 save_regs(reglist);
1854 if(target==0)
1855 emit_movimm(addr,0);
1856 else if(rs!=0)
1857 emit_mov(rs,0);
1858 int offset=(u_char *)handler-out-8;
1859 if(offset<-33554432||offset>=33554432) {
1860 // unreachable memhandler, a plugin func perhaps
1861 emit_movimm((u_int)handler,12);
1862 far_call=1;
1863 }
1864 if(cc<0)
1865 emit_loadreg(CCREG,2);
1866 if(is_dynamic) {
1867 emit_movimm(((u_int *)mem_rtab)[addr>>12]<<1,1);
1868 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2);
1869 }
1870 else {
1871 emit_readword(&last_count,3);
1872 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2);
1873 emit_add(2,3,2);
1874 emit_writeword(2,&Count);
1875 }
1876
1877 if(far_call)
1878 emit_callreg(12);
1879 else
1880 emit_call(handler);
1881
1882 if(rt>=0&&rt1[i]!=0) {
1883 switch(type) {
1884 case LOADB_STUB: emit_signextend8(0,rt); break;
1885 case LOADBU_STUB: emit_andimm(0,0xff,rt); break;
1886 case LOADH_STUB: emit_signextend16(0,rt); break;
1887 case LOADHU_STUB: emit_andimm(0,0xffff,rt); break;
1888 case LOADW_STUB: if(rt!=0) emit_mov(0,rt); break;
1889 default: assert(0);
1890 }
1891 }
1892 restore_regs(reglist);
1893}
1894
1895static void do_writestub(int n)
1896{
1897 assem_debug("do_writestub %x\n",start+stubs[n].a*4);
1898 literal_pool(256);
1899 set_jump_target(stubs[n].addr, out);
1900 enum stub_type type=stubs[n].type;
1901 int i=stubs[n].a;
1902 int rs=stubs[n].b;
1903 struct regstat *i_regs=(struct regstat *)stubs[n].c;
1904 u_int reglist=stubs[n].e;
1905 signed char *i_regmap=i_regs->regmap;
1906 int rt,r;
1907 if(itype[i]==C1LS||itype[i]==C2LS) {
1908 rt=get_reg(i_regmap,r=FTEMP);
1909 }else{
1910 rt=get_reg(i_regmap,r=rs2[i]);
1911 }
1912 assert(rs>=0);
1913 assert(rt>=0);
1914 int rtmp,temp=-1,temp2=HOST_TEMPREG,regs_saved=0;
1915 void *restore_jump = NULL;
1916 int reglist2=reglist|(1<<rs)|(1<<rt);
1917 for(rtmp=0;rtmp<=12;rtmp++) {
1918 if(((1<<rtmp)&0x13ff)&&((1<<rtmp)&reglist2)==0) {
1919 temp=rtmp; break;
1920 }
1921 }
1922 if(temp==-1) {
1923 save_regs(reglist);
1924 regs_saved=1;
1925 for(rtmp=0;rtmp<=3;rtmp++)
1926 if(rtmp!=rs&&rtmp!=rt)
1927 {temp=rtmp;break;}
1928 }
1929 if((regs_saved||(reglist2&8)==0)&&temp!=3&&rs!=3&&rt!=3)
1930 temp2=3;
1931 emit_readword(&mem_wtab,temp);
1932 emit_shrimm(rs,12,temp2);
1933 emit_readword_dualindexedx4(temp,temp2,temp2);
1934 emit_lsls_imm(temp2,1,temp2);
1935 switch(type) {
1936 case STOREB_STUB: emit_strccb_dualindexed(temp2,rs,rt); break;
1937 case STOREH_STUB: emit_strcch_dualindexed(temp2,rs,rt); break;
1938 case STOREW_STUB: emit_strcc_dualindexed(temp2,rs,rt); break;
1939 default: assert(0);
1940 }
1941 if(regs_saved) {
1942 restore_jump=out;
1943 emit_jcc(0); // jump to reg restore
1944 }
1945 else
1946 emit_jcc(stubs[n].retaddr); // return address (invcode check)
1947
1948 if(!regs_saved)
1949 save_regs(reglist);
1950 void *handler=NULL;
1951 switch(type) {
1952 case STOREB_STUB: handler=jump_handler_write8; break;
1953 case STOREH_STUB: handler=jump_handler_write16; break;
1954 case STOREW_STUB: handler=jump_handler_write32; break;
1955 default: assert(0);
1956 }
1957 assert(handler);
1958 pass_args(rs,rt);
1959 if(temp2!=3)
1960 emit_mov(temp2,3);
1961 int cc=get_reg(i_regmap,CCREG);
1962 if(cc<0)
1963 emit_loadreg(CCREG,2);
1964 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n].d+1),2);
1965 // returns new cycle_count
1966 emit_call(handler);
1967 emit_addimm(0,-CLOCK_ADJUST((int)stubs[n].d+1),cc<0?2:cc);
1968 if(cc<0)
1969 emit_storereg(CCREG,2);
1970 if(restore_jump)
1971 set_jump_target(restore_jump, out);
1972 restore_regs(reglist);
1973 emit_jmp(stubs[n].retaddr);
1974}
1975
1976static void inline_writestub(enum stub_type type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
1977{
1978 int rs=get_reg(regmap,-1);
1979 int rt=get_reg(regmap,target);
1980 assert(rs>=0);
1981 assert(rt>=0);
1982 u_int host_addr=0;
1983 void *handler = get_direct_memhandler(mem_wtab, addr, type, &host_addr);
1984 if (handler == NULL) {
1985 if(addr!=host_addr)
1986 emit_movimm_from(addr,rs,host_addr,rs);
1987 switch(type) {
1988 case STOREB_STUB: emit_writebyte_indexed(rt,0,rs); break;
1989 case STOREH_STUB: emit_writehword_indexed(rt,0,rs); break;
1990 case STOREW_STUB: emit_writeword_indexed(rt,0,rs); break;
1991 default: assert(0);
1992 }
1993 return;
1994 }
1995
1996 // call a memhandler
1997 save_regs(reglist);
1998 pass_args(rs,rt);
1999 int cc=get_reg(regmap,CCREG);
2000 if(cc<0)
2001 emit_loadreg(CCREG,2);
2002 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2);
2003 emit_movimm((u_int)handler,3);
2004 // returns new cycle_count
2005 emit_call(jump_handler_write_h);
2006 emit_addimm(0,-CLOCK_ADJUST(adj+1),cc<0?2:cc);
2007 if(cc<0)
2008 emit_storereg(CCREG,2);
2009 restore_regs(reglist);
2010}
2011
2012static void do_unalignedwritestub(int n)
2013{
2014 assem_debug("do_unalignedwritestub %x\n",start+stubs[n].a*4);
2015 literal_pool(256);
2016 set_jump_target(stubs[n].addr, out);
2017
2018 int i=stubs[n].a;
2019 struct regstat *i_regs=(struct regstat *)stubs[n].c;
2020 int addr=stubs[n].b;
2021 u_int reglist=stubs[n].e;
2022 signed char *i_regmap=i_regs->regmap;
2023 int temp2=get_reg(i_regmap,FTEMP);
2024 int rt;
2025 rt=get_reg(i_regmap,rs2[i]);
2026 assert(rt>=0);
2027 assert(addr>=0);
2028 assert(opcode[i]==0x2a||opcode[i]==0x2e); // SWL/SWR only implemented
2029 reglist|=(1<<addr);
2030 reglist&=~(1<<temp2);
2031
2032#if 1
2033 // don't bother with it and call write handler
2034 save_regs(reglist);
2035 pass_args(addr,rt);
2036 int cc=get_reg(i_regmap,CCREG);
2037 if(cc<0)
2038 emit_loadreg(CCREG,2);
2039 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n].d+1),2);
2040 emit_call((opcode[i]==0x2a?jump_handle_swl:jump_handle_swr));
2041 emit_addimm(0,-CLOCK_ADJUST((int)stubs[n].d+1),cc<0?2:cc);
2042 if(cc<0)
2043 emit_storereg(CCREG,2);
2044 restore_regs(reglist);
2045 emit_jmp(stubs[n].retaddr); // return address
2046#else
2047 emit_andimm(addr,0xfffffffc,temp2);
2048 emit_writeword(temp2,&address);
2049
2050 save_regs(reglist);
2051 emit_shrimm(addr,16,1);
2052 int cc=get_reg(i_regmap,CCREG);
2053 if(cc<0) {
2054 emit_loadreg(CCREG,2);
2055 }
2056 emit_movimm((u_int)readmem,0);
2057 emit_addimm(cc<0?2:cc,2*stubs[n].d+2,2);
2058 emit_call((int)&indirect_jump_indexed);
2059 restore_regs(reglist);
2060
2061 emit_readword(&readmem_dword,temp2);
2062 int temp=addr; //hmh
2063 emit_shlimm(addr,3,temp);
2064 emit_andimm(temp,24,temp);
2065#ifdef BIG_ENDIAN_MIPS
2066 if (opcode[i]==0x2e) // SWR
2067#else
2068 if (opcode[i]==0x2a) // SWL
2069#endif
2070 emit_xorimm(temp,24,temp);
2071 emit_movimm(-1,HOST_TEMPREG);
2072 if (opcode[i]==0x2a) { // SWL
2073 emit_bic_lsr(temp2,HOST_TEMPREG,temp,temp2);
2074 emit_orrshr(rt,temp,temp2);
2075 }else{
2076 emit_bic_lsl(temp2,HOST_TEMPREG,temp,temp2);
2077 emit_orrshl(rt,temp,temp2);
2078 }
2079 emit_readword(&address,addr);
2080 emit_writeword(temp2,&word);
2081 //save_regs(reglist); // don't need to, no state changes
2082 emit_shrimm(addr,16,1);
2083 emit_movimm((u_int)writemem,0);
2084 //emit_call((int)&indirect_jump_indexed);
2085 emit_mov(15,14);
2086 emit_readword_dualindexedx4(0,1,15);
2087 emit_readword(&Count,HOST_TEMPREG);
2088 emit_readword(&next_interupt,2);
2089 emit_addimm(HOST_TEMPREG,-2*stubs[n].d-2,HOST_TEMPREG);
2090 emit_writeword(2,&last_count);
2091 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2092 if(cc<0) {
2093 emit_storereg(CCREG,HOST_TEMPREG);
2094 }
2095 restore_regs(reglist);
2096 emit_jmp(stubs[n].retaddr); // return address
2097#endif
2098}
2099
2100static void do_invstub(int n)
2101{
2102 literal_pool(20);
2103 u_int reglist=stubs[n].a;
2104 set_jump_target(stubs[n].addr, out);
2105 save_regs(reglist);
2106 if(stubs[n].b!=0) emit_mov(stubs[n].b,0);
2107 emit_call(&invalidate_addr);
2108 restore_regs(reglist);
2109 emit_jmp(stubs[n].retaddr); // return address
2110}
2111
2112void *do_dirty_stub(int i)
2113{
2114 assem_debug("do_dirty_stub %x\n",start+i*4);
2115 u_int addr=(u_int)source;
2116 // Careful about the code output here, verify_dirty needs to parse it.
2117 #ifndef HAVE_ARMV7
2118 emit_loadlp(addr,1);
2119 emit_loadlp((int)copy,2);
2120 emit_loadlp(slen*4,3);
2121 #else
2122 emit_movw(addr&0x0000FFFF,1);
2123 emit_movw(((u_int)copy)&0x0000FFFF,2);
2124 emit_movt(addr&0xFFFF0000,1);
2125 emit_movt(((u_int)copy)&0xFFFF0000,2);
2126 emit_movw(slen*4,3);
2127 #endif
2128 emit_movimm(start+i*4,0);
2129 emit_call((int)start<(int)0xC0000000?&verify_code:&verify_code_vm);
2130 void *entry = out;
2131 load_regs_entry(i);
2132 if (entry == out)
2133 entry = instr_addr[i];
2134 emit_jmp(instr_addr[i]);
2135 return entry;
2136}
2137
2138static void do_dirty_stub_ds()
2139{
2140 // Careful about the code output here, verify_dirty needs to parse it.
2141 #ifndef HAVE_ARMV7
2142 emit_loadlp((int)start<(int)0xC0000000?(int)source:(int)start,1);
2143 emit_loadlp((int)copy,2);
2144 emit_loadlp(slen*4,3);
2145 #else
2146 emit_movw(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0x0000FFFF,1);
2147 emit_movw(((u_int)copy)&0x0000FFFF,2);
2148 emit_movt(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0xFFFF0000,1);
2149 emit_movt(((u_int)copy)&0xFFFF0000,2);
2150 emit_movw(slen*4,3);
2151 #endif
2152 emit_movimm(start+1,0);
2153 emit_call(&verify_code_ds);
2154}
2155
2156/* Special assem */
2157
2158static void shift_assemble_arm(int i,struct regstat *i_regs)
2159{
2160 if(rt1[i]) {
2161 if(opcode2[i]<=0x07) // SLLV/SRLV/SRAV
2162 {
2163 signed char s,t,shift;
2164 t=get_reg(i_regs->regmap,rt1[i]);
2165 s=get_reg(i_regs->regmap,rs1[i]);
2166 shift=get_reg(i_regs->regmap,rs2[i]);
2167 if(t>=0){
2168 if(rs1[i]==0)
2169 {
2170 emit_zeroreg(t);
2171 }
2172 else if(rs2[i]==0)
2173 {
2174 assert(s>=0);
2175 if(s!=t) emit_mov(s,t);
2176 }
2177 else
2178 {
2179 emit_andimm(shift,31,HOST_TEMPREG);
2180 if(opcode2[i]==4) // SLLV
2181 {
2182 emit_shl(s,HOST_TEMPREG,t);
2183 }
2184 if(opcode2[i]==6) // SRLV
2185 {
2186 emit_shr(s,HOST_TEMPREG,t);
2187 }
2188 if(opcode2[i]==7) // SRAV
2189 {
2190 emit_sar(s,HOST_TEMPREG,t);
2191 }
2192 }
2193 }
2194 } else { // DSLLV/DSRLV/DSRAV
2195 signed char sh,sl,th,tl,shift;
2196 th=get_reg(i_regs->regmap,rt1[i]|64);
2197 tl=get_reg(i_regs->regmap,rt1[i]);
2198 sh=get_reg(i_regs->regmap,rs1[i]|64);
2199 sl=get_reg(i_regs->regmap,rs1[i]);
2200 shift=get_reg(i_regs->regmap,rs2[i]);
2201 if(tl>=0){
2202 if(rs1[i]==0)
2203 {
2204 emit_zeroreg(tl);
2205 if(th>=0) emit_zeroreg(th);
2206 }
2207 else if(rs2[i]==0)
2208 {
2209 assert(sl>=0);
2210 if(sl!=tl) emit_mov(sl,tl);
2211 if(th>=0&&sh!=th) emit_mov(sh,th);
2212 }
2213 else
2214 {
2215 // FIXME: What if shift==tl ?
2216 assert(shift!=tl);
2217 int temp=get_reg(i_regs->regmap,-1);
2218 int real_th=th;
2219 if(th<0&&opcode2[i]!=0x14) {th=temp;} // DSLLV doesn't need a temporary register
2220 assert(sl>=0);
2221 assert(sh>=0);
2222 emit_andimm(shift,31,HOST_TEMPREG);
2223 if(opcode2[i]==0x14) // DSLLV
2224 {
2225 if(th>=0) emit_shl(sh,HOST_TEMPREG,th);
2226 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
2227 emit_orrshr(sl,HOST_TEMPREG,th);
2228 emit_andimm(shift,31,HOST_TEMPREG);
2229 emit_testimm(shift,32);
2230 emit_shl(sl,HOST_TEMPREG,tl);
2231 if(th>=0) emit_cmovne_reg(tl,th);
2232 emit_cmovne_imm(0,tl);
2233 }
2234 if(opcode2[i]==0x16) // DSRLV
2235 {
2236 assert(th>=0);
2237 emit_shr(sl,HOST_TEMPREG,tl);
2238 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
2239 emit_orrshl(sh,HOST_TEMPREG,tl);
2240 emit_andimm(shift,31,HOST_TEMPREG);
2241 emit_testimm(shift,32);
2242 emit_shr(sh,HOST_TEMPREG,th);
2243 emit_cmovne_reg(th,tl);
2244 if(real_th>=0) emit_cmovne_imm(0,th);
2245 }
2246 if(opcode2[i]==0x17) // DSRAV
2247 {
2248 assert(th>=0);
2249 emit_shr(sl,HOST_TEMPREG,tl);
2250 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
2251 if(real_th>=0) {
2252 assert(temp>=0);
2253 emit_sarimm(th,31,temp);
2254 }
2255 emit_orrshl(sh,HOST_TEMPREG,tl);
2256 emit_andimm(shift,31,HOST_TEMPREG);
2257 emit_testimm(shift,32);
2258 emit_sar(sh,HOST_TEMPREG,th);
2259 emit_cmovne_reg(th,tl);
2260 if(real_th>=0) emit_cmovne_reg(temp,th);
2261 }
2262 }
2263 }
2264 }
2265 }
2266}
2267#define shift_assemble shift_assemble_arm
2268
2269static void loadlr_assemble_arm(int i,struct regstat *i_regs)
2270{
2271 int s,tl,temp,temp2,addr;
2272 int offset;
2273 void *jaddr=0;
2274 int memtarget=0,c=0;
2275 int fastload_reg_override=0;
2276 u_int hr,reglist=0;
2277 tl=get_reg(i_regs->regmap,rt1[i]);
2278 s=get_reg(i_regs->regmap,rs1[i]);
2279 temp=get_reg(i_regs->regmap,-1);
2280 temp2=get_reg(i_regs->regmap,FTEMP);
2281 addr=get_reg(i_regs->regmap,AGEN1+(i&1));
2282 assert(addr<0);
2283 offset=imm[i];
2284 for(hr=0;hr<HOST_REGS;hr++) {
2285 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
2286 }
2287 reglist|=1<<temp;
2288 if(offset||s<0||c) addr=temp2;
2289 else addr=s;
2290 if(s>=0) {
2291 c=(i_regs->wasconst>>s)&1;
2292 if(c) {
2293 memtarget=((signed int)(constmap[i][s]+offset))<(signed int)0x80000000+RAM_SIZE;
2294 }
2295 }
2296 if(!c) {
2297 emit_shlimm(addr,3,temp);
2298 if (opcode[i]==0x22||opcode[i]==0x26) {
2299 emit_andimm(addr,0xFFFFFFFC,temp2); // LWL/LWR
2300 }else{
2301 emit_andimm(addr,0xFFFFFFF8,temp2); // LDL/LDR
2302 }
2303 jaddr=emit_fastpath_cmp_jump(i,temp2,&fastload_reg_override);
2304 }
2305 else {
2306 if(ram_offset&&memtarget) {
2307 emit_addimm(temp2,ram_offset,HOST_TEMPREG);
2308 fastload_reg_override=HOST_TEMPREG;
2309 }
2310 if (opcode[i]==0x22||opcode[i]==0x26) {
2311 emit_movimm(((constmap[i][s]+offset)<<3)&24,temp); // LWL/LWR
2312 }else{
2313 emit_movimm(((constmap[i][s]+offset)<<3)&56,temp); // LDL/LDR
2314 }
2315 }
2316 if (opcode[i]==0x22||opcode[i]==0x26) { // LWL/LWR
2317 if(!c||memtarget) {
2318 int a=temp2;
2319 if(fastload_reg_override) a=fastload_reg_override;
2320 emit_readword_indexed(0,a,temp2);
2321 if(jaddr) add_stub_r(LOADW_STUB,jaddr,out,i,temp2,i_regs,ccadj[i],reglist);
2322 }
2323 else
2324 inline_readstub(LOADW_STUB,i,(constmap[i][s]+offset)&0xFFFFFFFC,i_regs->regmap,FTEMP,ccadj[i],reglist);
2325 if(rt1[i]) {
2326 assert(tl>=0);
2327 emit_andimm(temp,24,temp);
2328#ifdef BIG_ENDIAN_MIPS
2329 if (opcode[i]==0x26) // LWR
2330#else
2331 if (opcode[i]==0x22) // LWL
2332#endif
2333 emit_xorimm(temp,24,temp);
2334 emit_movimm(-1,HOST_TEMPREG);
2335 if (opcode[i]==0x26) {
2336 emit_shr(temp2,temp,temp2);
2337 emit_bic_lsr(tl,HOST_TEMPREG,temp,tl);
2338 }else{
2339 emit_shl(temp2,temp,temp2);
2340 emit_bic_lsl(tl,HOST_TEMPREG,temp,tl);
2341 }
2342 emit_or(temp2,tl,tl);
2343 }
2344 //emit_storereg(rt1[i],tl); // DEBUG
2345 }
2346 if (opcode[i]==0x1A||opcode[i]==0x1B) { // LDL/LDR
2347 assert(0);
2348 }
2349}
2350#define loadlr_assemble loadlr_assemble_arm
2351
2352static void c2op_prologue(u_int op,u_int reglist)
2353{
2354 save_regs_all(reglist);
2355#ifdef PCNT
2356 emit_movimm(op,0);
2357 emit_call((int)pcnt_gte_start);
2358#endif
2359 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0); // cop2 regs
2360}
2361
2362static void c2op_epilogue(u_int op,u_int reglist)
2363{
2364#ifdef PCNT
2365 emit_movimm(op,0);
2366 emit_call((int)pcnt_gte_end);
2367#endif
2368 restore_regs_all(reglist);
2369}
2370
2371static void c2op_call_MACtoIR(int lm,int need_flags)
2372{
2373 if(need_flags)
2374 emit_call(lm?gteMACtoIR_lm1:gteMACtoIR_lm0);
2375 else
2376 emit_call(lm?gteMACtoIR_lm1_nf:gteMACtoIR_lm0_nf);
2377}
2378
2379static void c2op_call_rgb_func(void *func,int lm,int need_ir,int need_flags)
2380{
2381 emit_call(func);
2382 // func is C code and trashes r0
2383 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
2384 if(need_flags||need_ir)
2385 c2op_call_MACtoIR(lm,need_flags);
2386 emit_call(need_flags?gteMACtoRGB:gteMACtoRGB_nf);
2387}
2388
2389static void c2op_assemble(int i,struct regstat *i_regs)
2390{
2391 u_int c2op=source[i]&0x3f;
2392 u_int hr,reglist_full=0,reglist;
2393 int need_flags,need_ir;
2394 for(hr=0;hr<HOST_REGS;hr++) {
2395 if(i_regs->regmap[hr]>=0) reglist_full|=1<<hr;
2396 }
2397 reglist=reglist_full&CALLER_SAVE_REGS;
2398
2399 if (gte_handlers[c2op]!=NULL) {
2400 need_flags=!(gte_unneeded[i+1]>>63); // +1 because of how liveness detection works
2401 need_ir=(gte_unneeded[i+1]&0xe00)!=0xe00;
2402 assem_debug("gte op %08x, unneeded %016llx, need_flags %d, need_ir %d\n",
2403 source[i],gte_unneeded[i+1],need_flags,need_ir);
2404 if(new_dynarec_hacks&NDHACK_GTE_NO_FLAGS)
2405 need_flags=0;
2406 int shift = (source[i] >> 19) & 1;
2407 int lm = (source[i] >> 10) & 1;
2408 switch(c2op) {
2409#ifndef DRC_DBG
2410 case GTE_MVMVA: {
2411#ifdef HAVE_ARMV5
2412 int v = (source[i] >> 15) & 3;
2413 int cv = (source[i] >> 13) & 3;
2414 int mx = (source[i] >> 17) & 3;
2415 reglist=reglist_full&(CALLER_SAVE_REGS|0xf0); // +{r4-r7}
2416 c2op_prologue(c2op,reglist);
2417 /* r4,r5 = VXYZ(v) packed; r6 = &MX11(mx); r7 = &CV1(cv) */
2418 if(v<3)
2419 emit_ldrd(v*8,0,4);
2420 else {
2421 emit_movzwl_indexed(9*4,0,4); // gteIR
2422 emit_movzwl_indexed(10*4,0,6);
2423 emit_movzwl_indexed(11*4,0,5);
2424 emit_orrshl_imm(6,16,4);
2425 }
2426 if(mx<3)
2427 emit_addimm(0,32*4+mx*8*4,6);
2428 else
2429 emit_readword(&zeromem_ptr,6);
2430 if(cv<3)
2431 emit_addimm(0,32*4+(cv*8+5)*4,7);
2432 else
2433 emit_readword(&zeromem_ptr,7);
2434#ifdef __ARM_NEON__
2435 emit_movimm(source[i],1); // opcode
2436 emit_call(gteMVMVA_part_neon);
2437 if(need_flags) {
2438 emit_movimm(lm,1);
2439 emit_call(gteMACtoIR_flags_neon);
2440 }
2441#else
2442 if(cv==3&&shift)
2443 emit_call((int)gteMVMVA_part_cv3sh12_arm);
2444 else {
2445 emit_movimm(shift,1);
2446 emit_call((int)(need_flags?gteMVMVA_part_arm:gteMVMVA_part_nf_arm));
2447 }
2448 if(need_flags||need_ir)
2449 c2op_call_MACtoIR(lm,need_flags);
2450#endif
2451#else /* if not HAVE_ARMV5 */
2452 c2op_prologue(c2op,reglist);
2453 emit_movimm(source[i],1); // opcode
2454 emit_writeword(1,&psxRegs.code);
2455 emit_call((int)(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]));
2456#endif
2457 break;
2458 }
2459 case GTE_OP:
2460 c2op_prologue(c2op,reglist);
2461 emit_call(shift?gteOP_part_shift:gteOP_part_noshift);
2462 if(need_flags||need_ir) {
2463 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
2464 c2op_call_MACtoIR(lm,need_flags);
2465 }
2466 break;
2467 case GTE_DPCS:
2468 c2op_prologue(c2op,reglist);
2469 c2op_call_rgb_func(shift?gteDPCS_part_shift:gteDPCS_part_noshift,lm,need_ir,need_flags);
2470 break;
2471 case GTE_INTPL:
2472 c2op_prologue(c2op,reglist);
2473 c2op_call_rgb_func(shift?gteINTPL_part_shift:gteINTPL_part_noshift,lm,need_ir,need_flags);
2474 break;
2475 case GTE_SQR:
2476 c2op_prologue(c2op,reglist);
2477 emit_call(shift?gteSQR_part_shift:gteSQR_part_noshift);
2478 if(need_flags||need_ir) {
2479 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
2480 c2op_call_MACtoIR(lm,need_flags);
2481 }
2482 break;
2483 case GTE_DCPL:
2484 c2op_prologue(c2op,reglist);
2485 c2op_call_rgb_func(gteDCPL_part,lm,need_ir,need_flags);
2486 break;
2487 case GTE_GPF:
2488 c2op_prologue(c2op,reglist);
2489 c2op_call_rgb_func(shift?gteGPF_part_shift:gteGPF_part_noshift,lm,need_ir,need_flags);
2490 break;
2491 case GTE_GPL:
2492 c2op_prologue(c2op,reglist);
2493 c2op_call_rgb_func(shift?gteGPL_part_shift:gteGPL_part_noshift,lm,need_ir,need_flags);
2494 break;
2495#endif
2496 default:
2497 c2op_prologue(c2op,reglist);
2498#ifdef DRC_DBG
2499 emit_movimm(source[i],1); // opcode
2500 emit_writeword(1,&psxRegs.code);
2501#endif
2502 emit_call(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]);
2503 break;
2504 }
2505 c2op_epilogue(c2op,reglist);
2506 }
2507}
2508
2509static void multdiv_assemble_arm(int i,struct regstat *i_regs)
2510{
2511 // case 0x18: MULT
2512 // case 0x19: MULTU
2513 // case 0x1A: DIV
2514 // case 0x1B: DIVU
2515 // case 0x1C: DMULT
2516 // case 0x1D: DMULTU
2517 // case 0x1E: DDIV
2518 // case 0x1F: DDIVU
2519 if(rs1[i]&&rs2[i])
2520 {
2521 if((opcode2[i]&4)==0) // 32-bit
2522 {
2523 if(opcode2[i]==0x18) // MULT
2524 {
2525 signed char m1=get_reg(i_regs->regmap,rs1[i]);
2526 signed char m2=get_reg(i_regs->regmap,rs2[i]);
2527 signed char hi=get_reg(i_regs->regmap,HIREG);
2528 signed char lo=get_reg(i_regs->regmap,LOREG);
2529 assert(m1>=0);
2530 assert(m2>=0);
2531 assert(hi>=0);
2532 assert(lo>=0);
2533 emit_smull(m1,m2,hi,lo);
2534 }
2535 if(opcode2[i]==0x19) // MULTU
2536 {
2537 signed char m1=get_reg(i_regs->regmap,rs1[i]);
2538 signed char m2=get_reg(i_regs->regmap,rs2[i]);
2539 signed char hi=get_reg(i_regs->regmap,HIREG);
2540 signed char lo=get_reg(i_regs->regmap,LOREG);
2541 assert(m1>=0);
2542 assert(m2>=0);
2543 assert(hi>=0);
2544 assert(lo>=0);
2545 emit_umull(m1,m2,hi,lo);
2546 }
2547 if(opcode2[i]==0x1A) // DIV
2548 {
2549 signed char d1=get_reg(i_regs->regmap,rs1[i]);
2550 signed char d2=get_reg(i_regs->regmap,rs2[i]);
2551 assert(d1>=0);
2552 assert(d2>=0);
2553 signed char quotient=get_reg(i_regs->regmap,LOREG);
2554 signed char remainder=get_reg(i_regs->regmap,HIREG);
2555 assert(quotient>=0);
2556 assert(remainder>=0);
2557 emit_movs(d1,remainder);
2558 emit_movimm(0xffffffff,quotient);
2559 emit_negmi(quotient,quotient); // .. quotient and ..
2560 emit_negmi(remainder,remainder); // .. remainder for div0 case (will be negated back after jump)
2561 emit_movs(d2,HOST_TEMPREG);
2562 emit_jeq((int)out+52); // Division by zero
2563 emit_negsmi(HOST_TEMPREG,HOST_TEMPREG);
2564#ifdef HAVE_ARMV5
2565 emit_clz(HOST_TEMPREG,quotient);
2566 emit_shl(HOST_TEMPREG,quotient,HOST_TEMPREG);
2567#else
2568 emit_movimm(0,quotient);
2569 emit_addpl_imm(quotient,1,quotient);
2570 emit_lslpls_imm(HOST_TEMPREG,1,HOST_TEMPREG);
2571 emit_jns((int)out-2*4);
2572#endif
2573 emit_orimm(quotient,1<<31,quotient);
2574 emit_shr(quotient,quotient,quotient);
2575 emit_cmp(remainder,HOST_TEMPREG);
2576 emit_subcs(remainder,HOST_TEMPREG,remainder);
2577 emit_adcs(quotient,quotient,quotient);
2578 emit_shrimm(HOST_TEMPREG,1,HOST_TEMPREG);
2579 emit_jcc(out-16); // -4
2580 emit_teq(d1,d2);
2581 emit_negmi(quotient,quotient);
2582 emit_test(d1,d1);
2583 emit_negmi(remainder,remainder);
2584 }
2585 if(opcode2[i]==0x1B) // DIVU
2586 {
2587 signed char d1=get_reg(i_regs->regmap,rs1[i]); // dividend
2588 signed char d2=get_reg(i_regs->regmap,rs2[i]); // divisor
2589 assert(d1>=0);
2590 assert(d2>=0);
2591 signed char quotient=get_reg(i_regs->regmap,LOREG);
2592 signed char remainder=get_reg(i_regs->regmap,HIREG);
2593 assert(quotient>=0);
2594 assert(remainder>=0);
2595 emit_mov(d1,remainder);
2596 emit_movimm(0xffffffff,quotient); // div0 case
2597 emit_test(d2,d2);
2598 emit_jeq((int)out+40); // Division by zero
2599#ifdef HAVE_ARMV5
2600 emit_clz(d2,HOST_TEMPREG);
2601 emit_movimm(1<<31,quotient);
2602 emit_shl(d2,HOST_TEMPREG,d2);
2603#else
2604 emit_movimm(0,HOST_TEMPREG);
2605 emit_addpl_imm(HOST_TEMPREG,1,HOST_TEMPREG);
2606 emit_lslpls_imm(d2,1,d2);
2607 emit_jns((int)out-2*4);
2608 emit_movimm(1<<31,quotient);
2609#endif
2610 emit_shr(quotient,HOST_TEMPREG,quotient);
2611 emit_cmp(remainder,d2);
2612 emit_subcs(remainder,d2,remainder);
2613 emit_adcs(quotient,quotient,quotient);
2614 emit_shrcc_imm(d2,1,d2);
2615 emit_jcc(out-16); // -4
2616 }
2617 }
2618 else // 64-bit
2619 assert(0);
2620 }
2621 else
2622 {
2623 // Multiply by zero is zero.
2624 // MIPS does not have a divide by zero exception.
2625 // The result is undefined, we return zero.
2626 signed char hr=get_reg(i_regs->regmap,HIREG);
2627 signed char lr=get_reg(i_regs->regmap,LOREG);
2628 if(hr>=0) emit_zeroreg(hr);
2629 if(lr>=0) emit_zeroreg(lr);
2630 }
2631}
2632#define multdiv_assemble multdiv_assemble_arm
2633
2634static void do_preload_rhash(int r) {
2635 // Don't need this for ARM. On x86, this puts the value 0xf8 into the
2636 // register. On ARM the hash can be done with a single instruction (below)
2637}
2638
2639static void do_preload_rhtbl(int ht) {
2640 emit_addimm(FP,(int)&mini_ht-(int)&dynarec_local,ht);
2641}
2642
2643static void do_rhash(int rs,int rh) {
2644 emit_andimm(rs,0xf8,rh);
2645}
2646
2647static void do_miniht_load(int ht,int rh) {
2648 assem_debug("ldr %s,[%s,%s]!\n",regname[rh],regname[ht],regname[rh]);
2649 output_w32(0xe7b00000|rd_rn_rm(rh,ht,rh));
2650}
2651
2652static void do_miniht_jump(int rs,int rh,int ht) {
2653 emit_cmp(rh,rs);
2654 emit_ldreq_indexed(ht,4,15);
2655 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
2656 emit_mov(rs,7);
2657 emit_jmp(jump_vaddr_reg[7]);
2658 #else
2659 emit_jmp(jump_vaddr_reg[rs]);
2660 #endif
2661}
2662
2663static void do_miniht_insert(u_int return_address,int rt,int temp) {
2664 #ifndef HAVE_ARMV7
2665 emit_movimm(return_address,rt); // PC into link register
2666 add_to_linker(out,return_address,1);
2667 emit_pcreladdr(temp);
2668 emit_writeword(rt,&mini_ht[(return_address&0xFF)>>3][0]);
2669 emit_writeword(temp,&mini_ht[(return_address&0xFF)>>3][1]);
2670 #else
2671 emit_movw(return_address&0x0000FFFF,rt);
2672 add_to_linker(out,return_address,1);
2673 emit_pcreladdr(temp);
2674 emit_writeword(temp,&mini_ht[(return_address&0xFF)>>3][1]);
2675 emit_movt(return_address&0xFFFF0000,rt);
2676 emit_writeword(rt,&mini_ht[(return_address&0xFF)>>3][0]);
2677 #endif
2678}
2679
2680static void mark_clear_cache(void *target)
2681{
2682 u_long offset = (u_char *)target - translation_cache;
2683 u_int mask = 1u << ((offset >> 12) & 31);
2684 if (!(needs_clear_cache[offset >> 17] & mask)) {
2685 char *start = (char *)((u_long)target & ~4095ul);
2686 start_tcache_write(start, start + 4096);
2687 needs_clear_cache[offset >> 17] |= mask;
2688 }
2689}
2690
2691// Clearing the cache is rather slow on ARM Linux, so mark the areas
2692// that need to be cleared, and then only clear these areas once.
2693static void do_clear_cache()
2694{
2695 int i,j;
2696 for (i=0;i<(1<<(TARGET_SIZE_2-17));i++)
2697 {
2698 u_int bitmap=needs_clear_cache[i];
2699 if(bitmap) {
2700 u_char *start, *end;
2701 for(j=0;j<32;j++)
2702 {
2703 if(bitmap&(1<<j)) {
2704 start=translation_cache+i*131072+j*4096;
2705 end=start+4095;
2706 j++;
2707 while(j<32) {
2708 if(bitmap&(1<<j)) {
2709 end+=4096;
2710 j++;
2711 }else{
2712 end_tcache_write(start, end);
2713 break;
2714 }
2715 }
2716 }
2717 }
2718 needs_clear_cache[i]=0;
2719 }
2720 }
2721}
2722
2723// CPU-architecture-specific initialization
2724static void arch_init() {
2725}
2726
2727// vim:shiftwidth=2:expandtab