drc: starting arm64 support
[pcsx_rearmed.git] / libpcsxcore / new_dynarec / assem_arm.c
... / ...
CommitLineData
1/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2 * Mupen64plus/PCSX - assem_arm.c *
3 * Copyright (C) 2009-2011 Ari64 *
4 * Copyright (C) 2010-2011 GraÅžvydas "notaz" Ignotas *
5 * *
6 * This program is free software; you can redistribute it and/or modify *
7 * it under the terms of the GNU General Public License as published by *
8 * the Free Software Foundation; either version 2 of the License, or *
9 * (at your option) any later version. *
10 * *
11 * This program is distributed in the hope that it will be useful, *
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
14 * GNU General Public License for more details. *
15 * *
16 * You should have received a copy of the GNU General Public License *
17 * along with this program; if not, write to the *
18 * Free Software Foundation, Inc., *
19 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
20 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
21
22#include "../gte.h"
23#define FLAGLESS
24#include "../gte.h"
25#undef FLAGLESS
26#include "../gte_arm.h"
27#include "../gte_neon.h"
28#include "pcnt.h"
29#include "arm_features.h"
30
31#if defined(BASE_ADDR_FIXED)
32#elif defined(BASE_ADDR_DYNAMIC)
33u_char *translation_cache;
34#else
35u_char translation_cache[1 << TARGET_SIZE_2] __attribute__((aligned(4096)));
36#endif
37
38#ifndef __MACH__
39#define CALLER_SAVE_REGS 0x100f
40#else
41#define CALLER_SAVE_REGS 0x120f
42#endif
43
44#define unused __attribute__((unused))
45
46#ifdef DRC_DBG
47#pragma GCC diagnostic ignored "-Wunused-function"
48#pragma GCC diagnostic ignored "-Wunused-variable"
49#pragma GCC diagnostic ignored "-Wunused-but-set-variable"
50#endif
51
52extern int cycle_count;
53extern int last_count;
54extern int pcaddr;
55extern int pending_exception;
56extern int branch_target;
57extern uint64_t readmem_dword;
58extern u_int mini_ht[32][2];
59
60void indirect_jump_indexed();
61void indirect_jump();
62void do_interrupt();
63void jump_vaddr_r0();
64void jump_vaddr_r1();
65void jump_vaddr_r2();
66void jump_vaddr_r3();
67void jump_vaddr_r4();
68void jump_vaddr_r5();
69void jump_vaddr_r6();
70void jump_vaddr_r7();
71void jump_vaddr_r8();
72void jump_vaddr_r9();
73void jump_vaddr_r10();
74void jump_vaddr_r12();
75
76void * const jump_vaddr_reg[16] = {
77 jump_vaddr_r0,
78 jump_vaddr_r1,
79 jump_vaddr_r2,
80 jump_vaddr_r3,
81 jump_vaddr_r4,
82 jump_vaddr_r5,
83 jump_vaddr_r6,
84 jump_vaddr_r7,
85 jump_vaddr_r8,
86 jump_vaddr_r9,
87 jump_vaddr_r10,
88 0,
89 jump_vaddr_r12,
90 0,
91 0,
92 0
93};
94
95void invalidate_addr_r0();
96void invalidate_addr_r1();
97void invalidate_addr_r2();
98void invalidate_addr_r3();
99void invalidate_addr_r4();
100void invalidate_addr_r5();
101void invalidate_addr_r6();
102void invalidate_addr_r7();
103void invalidate_addr_r8();
104void invalidate_addr_r9();
105void invalidate_addr_r10();
106void invalidate_addr_r12();
107
108const u_int invalidate_addr_reg[16] = {
109 (int)invalidate_addr_r0,
110 (int)invalidate_addr_r1,
111 (int)invalidate_addr_r2,
112 (int)invalidate_addr_r3,
113 (int)invalidate_addr_r4,
114 (int)invalidate_addr_r5,
115 (int)invalidate_addr_r6,
116 (int)invalidate_addr_r7,
117 (int)invalidate_addr_r8,
118 (int)invalidate_addr_r9,
119 (int)invalidate_addr_r10,
120 0,
121 (int)invalidate_addr_r12,
122 0,
123 0,
124 0};
125
126static u_int needs_clear_cache[1<<(TARGET_SIZE_2-17)];
127
128/* Linker */
129
130static void set_jump_target(void *addr, void *target_)
131{
132 u_int target = (u_int)target_;
133 u_char *ptr = addr;
134 u_int *ptr2=(u_int *)ptr;
135 if(ptr[3]==0xe2) {
136 assert((target-(u_int)ptr2-8)<1024);
137 assert(((uintptr_t)addr&3)==0);
138 assert((target&3)==0);
139 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
140 //printf("target=%x addr=%p insn=%x\n",target,addr,*ptr2);
141 }
142 else if(ptr[3]==0x72) {
143 // generated by emit_jno_unlikely
144 if((target-(u_int)ptr2-8)<1024) {
145 assert(((uintptr_t)addr&3)==0);
146 assert((target&3)==0);
147 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
148 }
149 else if((target-(u_int)ptr2-8)<4096&&!((target-(u_int)ptr2-8)&15)) {
150 assert(((uintptr_t)addr&3)==0);
151 assert((target&3)==0);
152 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>4)|0xE00;
153 }
154 else *ptr2=(0x7A000000)|(((target-(u_int)ptr2-8)<<6)>>8);
155 }
156 else {
157 assert((ptr[3]&0x0e)==0xa);
158 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
159 }
160}
161
162// This optionally copies the instruction from the target of the branch into
163// the space before the branch. Works, but the difference in speed is
164// usually insignificant.
165#if 0
166static void set_jump_target_fillslot(int addr,u_int target,int copy)
167{
168 u_char *ptr=(u_char *)addr;
169 u_int *ptr2=(u_int *)ptr;
170 assert(!copy||ptr2[-1]==0xe28dd000);
171 if(ptr[3]==0xe2) {
172 assert(!copy);
173 assert((target-(u_int)ptr2-8)<4096);
174 *ptr2=(*ptr2&0xFFFFF000)|(target-(u_int)ptr2-8);
175 }
176 else {
177 assert((ptr[3]&0x0e)==0xa);
178 u_int target_insn=*(u_int *)target;
179 if((target_insn&0x0e100000)==0) { // ALU, no immediate, no flags
180 copy=0;
181 }
182 if((target_insn&0x0c100000)==0x04100000) { // Load
183 copy=0;
184 }
185 if(target_insn&0x08000000) {
186 copy=0;
187 }
188 if(copy) {
189 ptr2[-1]=target_insn;
190 target+=4;
191 }
192 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
193 }
194}
195#endif
196
197/* Literal pool */
198static void add_literal(int addr,int val)
199{
200 assert(literalcount<sizeof(literals)/sizeof(literals[0]));
201 literals[literalcount][0]=addr;
202 literals[literalcount][1]=val;
203 literalcount++;
204}
205
206// from a pointer to external jump stub (which was produced by emit_extjump2)
207// find where the jumping insn is
208static void *find_extjump_insn(void *stub)
209{
210 int *ptr=(int *)(stub+4);
211 assert((*ptr&0x0fff0000)==0x059f0000); // ldr rx, [pc, #ofs]
212 u_int offset=*ptr&0xfff;
213 void **l_ptr=(void *)ptr+offset+8;
214 return *l_ptr;
215}
216
217// find where external branch is liked to using addr of it's stub:
218// get address that insn one after stub loads (dyna_linker arg1),
219// treat it as a pointer to branch insn,
220// return addr where that branch jumps to
221static void *get_pointer(void *stub)
222{
223 //printf("get_pointer(%x)\n",(int)stub);
224 int *i_ptr=find_extjump_insn(stub);
225 assert((*i_ptr&0x0f000000)==0x0a000000);
226 return (u_char *)i_ptr+((*i_ptr<<8)>>6)+8;
227}
228
229// Find the "clean" entry point from a "dirty" entry point
230// by skipping past the call to verify_code
231static void *get_clean_addr(void *addr)
232{
233 signed int *ptr = addr;
234 #ifndef HAVE_ARMV7
235 ptr+=4;
236 #else
237 ptr+=6;
238 #endif
239 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
240 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
241 ptr++;
242 if((*ptr&0xFF000000)==0xea000000) {
243 return (char *)ptr+((*ptr<<8)>>6)+8; // follow jump
244 }
245 return ptr;
246}
247
248static int verify_dirty(u_int *ptr)
249{
250 #ifndef HAVE_ARMV7
251 u_int offset;
252 // get from literal pool
253 assert((*ptr&0xFFFF0000)==0xe59f0000);
254 offset=*ptr&0xfff;
255 u_int source=*(u_int*)((void *)ptr+offset+8);
256 ptr++;
257 assert((*ptr&0xFFFF0000)==0xe59f0000);
258 offset=*ptr&0xfff;
259 u_int copy=*(u_int*)((void *)ptr+offset+8);
260 ptr++;
261 assert((*ptr&0xFFFF0000)==0xe59f0000);
262 offset=*ptr&0xfff;
263 u_int len=*(u_int*)((void *)ptr+offset+8);
264 ptr++;
265 ptr++;
266 #else
267 // ARMv7 movw/movt
268 assert((*ptr&0xFFF00000)==0xe3000000);
269 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
270 u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
271 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
272 ptr+=6;
273 #endif
274 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
275 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
276 //printf("verify_dirty: %x %x %x\n",source,copy,len);
277 return !memcmp((void *)source,(void *)copy,len);
278}
279
280// This doesn't necessarily find all clean entry points, just
281// guarantees that it's not dirty
282static int isclean(void *addr)
283{
284 #ifndef HAVE_ARMV7
285 u_int *ptr=((u_int *)addr)+4;
286 #else
287 u_int *ptr=((u_int *)addr)+6;
288 #endif
289 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
290 if((*ptr&0xFF000000)!=0xeb000000) return 1; // bl instruction
291 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code) return 0;
292 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_vm) return 0;
293 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_ds) return 0;
294 return 1;
295}
296
297// get source that block at addr was compiled from (host pointers)
298static void get_bounds(void *addr, u_char **start, u_char **end)
299{
300 u_int *ptr = addr;
301 #ifndef HAVE_ARMV7
302 u_int offset;
303 // get from literal pool
304 assert((*ptr&0xFFFF0000)==0xe59f0000);
305 offset=*ptr&0xfff;
306 u_int source=*(u_int*)((void *)ptr+offset+8);
307 ptr++;
308 //assert((*ptr&0xFFFF0000)==0xe59f0000);
309 //offset=*ptr&0xfff;
310 //u_int copy=*(u_int*)((void *)ptr+offset+8);
311 ptr++;
312 assert((*ptr&0xFFFF0000)==0xe59f0000);
313 offset=*ptr&0xfff;
314 u_int len=*(u_int*)((void *)ptr+offset+8);
315 ptr++;
316 ptr++;
317 #else
318 // ARMv7 movw/movt
319 assert((*ptr&0xFFF00000)==0xe3000000);
320 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
321 //u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
322 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
323 ptr+=6;
324 #endif
325 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
326 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
327 *start=(u_char *)source;
328 *end=(u_char *)source+len;
329}
330
331// Allocate a specific ARM register.
332static void alloc_arm_reg(struct regstat *cur,int i,signed char reg,int hr)
333{
334 int n;
335 int dirty=0;
336
337 // see if it's already allocated (and dealloc it)
338 for(n=0;n<HOST_REGS;n++)
339 {
340 if(n!=EXCLUDE_REG&&cur->regmap[n]==reg) {
341 dirty=(cur->dirty>>n)&1;
342 cur->regmap[n]=-1;
343 }
344 }
345
346 cur->regmap[hr]=reg;
347 cur->dirty&=~(1<<hr);
348 cur->dirty|=dirty<<hr;
349 cur->isconst&=~(1<<hr);
350}
351
352// Alloc cycle count into dedicated register
353static void alloc_cc(struct regstat *cur,int i)
354{
355 alloc_arm_reg(cur,i,CCREG,HOST_CCREG);
356}
357
358/* Assembler */
359
360static unused char regname[16][4] = {
361 "r0",
362 "r1",
363 "r2",
364 "r3",
365 "r4",
366 "r5",
367 "r6",
368 "r7",
369 "r8",
370 "r9",
371 "r10",
372 "fp",
373 "r12",
374 "sp",
375 "lr",
376 "pc"};
377
378static void output_w32(u_int word)
379{
380 *((u_int *)out)=word;
381 out+=4;
382}
383
384static u_int rd_rn_rm(u_int rd, u_int rn, u_int rm)
385{
386 assert(rd<16);
387 assert(rn<16);
388 assert(rm<16);
389 return((rn<<16)|(rd<<12)|rm);
390}
391
392static u_int rd_rn_imm_shift(u_int rd, u_int rn, u_int imm, u_int shift)
393{
394 assert(rd<16);
395 assert(rn<16);
396 assert(imm<256);
397 assert((shift&1)==0);
398 return((rn<<16)|(rd<<12)|(((32-shift)&30)<<7)|imm);
399}
400
401static u_int genimm(u_int imm,u_int *encoded)
402{
403 *encoded=0;
404 if(imm==0) return 1;
405 int i=32;
406 while(i>0)
407 {
408 if(imm<256) {
409 *encoded=((i&30)<<7)|imm;
410 return 1;
411 }
412 imm=(imm>>2)|(imm<<30);i-=2;
413 }
414 return 0;
415}
416
417static void genimm_checked(u_int imm,u_int *encoded)
418{
419 u_int ret=genimm(imm,encoded);
420 assert(ret);
421 (void)ret;
422}
423
424static u_int genjmp(u_int addr)
425{
426 int offset=addr-(int)out-8;
427 if(offset<-33554432||offset>=33554432) {
428 if (addr>2) {
429 SysPrintf("genjmp: out of range: %08x\n", offset);
430 exit(1);
431 }
432 return 0;
433 }
434 return ((u_int)offset>>2)&0xffffff;
435}
436
437static void emit_mov(int rs,int rt)
438{
439 assem_debug("mov %s,%s\n",regname[rt],regname[rs]);
440 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs));
441}
442
443static void emit_movs(int rs,int rt)
444{
445 assem_debug("movs %s,%s\n",regname[rt],regname[rs]);
446 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs));
447}
448
449static void emit_add(int rs1,int rs2,int rt)
450{
451 assem_debug("add %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
452 output_w32(0xe0800000|rd_rn_rm(rt,rs1,rs2));
453}
454
455static void emit_adds(int rs1,int rs2,int rt)
456{
457 assem_debug("adds %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
458 output_w32(0xe0900000|rd_rn_rm(rt,rs1,rs2));
459}
460
461static void emit_adcs(int rs1,int rs2,int rt)
462{
463 assem_debug("adcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
464 output_w32(0xe0b00000|rd_rn_rm(rt,rs1,rs2));
465}
466
467static void emit_neg(int rs, int rt)
468{
469 assem_debug("rsb %s,%s,#0\n",regname[rt],regname[rs]);
470 output_w32(0xe2600000|rd_rn_rm(rt,rs,0));
471}
472
473static void emit_sub(int rs1,int rs2,int rt)
474{
475 assem_debug("sub %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
476 output_w32(0xe0400000|rd_rn_rm(rt,rs1,rs2));
477}
478
479static void emit_zeroreg(int rt)
480{
481 assem_debug("mov %s,#0\n",regname[rt]);
482 output_w32(0xe3a00000|rd_rn_rm(rt,0,0));
483}
484
485static void emit_loadlp(u_int imm,u_int rt)
486{
487 add_literal((int)out,imm);
488 assem_debug("ldr %s,pc+? [=%x]\n",regname[rt],imm);
489 output_w32(0xe5900000|rd_rn_rm(rt,15,0));
490}
491
492static void emit_movw(u_int imm,u_int rt)
493{
494 assert(imm<65536);
495 assem_debug("movw %s,#%d (0x%x)\n",regname[rt],imm,imm);
496 output_w32(0xe3000000|rd_rn_rm(rt,0,0)|(imm&0xfff)|((imm<<4)&0xf0000));
497}
498
499static void emit_movt(u_int imm,u_int rt)
500{
501 assem_debug("movt %s,#%d (0x%x)\n",regname[rt],imm&0xffff0000,imm&0xffff0000);
502 output_w32(0xe3400000|rd_rn_rm(rt,0,0)|((imm>>16)&0xfff)|((imm>>12)&0xf0000));
503}
504
505static void emit_movimm(u_int imm,u_int rt)
506{
507 u_int armval;
508 if(genimm(imm,&armval)) {
509 assem_debug("mov %s,#%d\n",regname[rt],imm);
510 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
511 }else if(genimm(~imm,&armval)) {
512 assem_debug("mvn %s,#%d\n",regname[rt],imm);
513 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
514 }else if(imm<65536) {
515 #ifndef HAVE_ARMV7
516 assem_debug("mov %s,#%d\n",regname[rt],imm&0xFF00);
517 output_w32(0xe3a00000|rd_rn_imm_shift(rt,0,imm>>8,8));
518 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
519 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
520 #else
521 emit_movw(imm,rt);
522 #endif
523 }else{
524 #ifndef HAVE_ARMV7
525 emit_loadlp(imm,rt);
526 #else
527 emit_movw(imm&0x0000FFFF,rt);
528 emit_movt(imm&0xFFFF0000,rt);
529 #endif
530 }
531}
532
533static void emit_pcreladdr(u_int rt)
534{
535 assem_debug("add %s,pc,#?\n",regname[rt]);
536 output_w32(0xe2800000|rd_rn_rm(rt,15,0));
537}
538
539static void emit_loadreg(int r, int hr)
540{
541 if(r&64) {
542 SysPrintf("64bit load in 32bit mode!\n");
543 assert(0);
544 return;
545 }
546 if((r&63)==0)
547 emit_zeroreg(hr);
548 else {
549 int addr=((int)reg)+((r&63)<<REG_SHIFT)+((r&64)>>4);
550 if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4);
551 if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4);
552 if(r==CCREG) addr=(int)&cycle_count;
553 if(r==CSREG) addr=(int)&Status;
554 if(r==INVCP) addr=(int)&invc_ptr;
555 u_int offset = addr-(u_int)&dynarec_local;
556 assert(offset<4096);
557 assem_debug("ldr %s,fp+%d\n",regname[hr],offset);
558 output_w32(0xe5900000|rd_rn_rm(hr,FP,0)|offset);
559 }
560}
561
562static void emit_storereg(int r, int hr)
563{
564 if(r&64) {
565 SysPrintf("64bit store in 32bit mode!\n");
566 assert(0);
567 return;
568 }
569 int addr=((int)reg)+((r&63)<<REG_SHIFT)+((r&64)>>4);
570 if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4);
571 if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4);
572 if(r==CCREG) addr=(int)&cycle_count;
573 u_int offset = addr-(u_int)&dynarec_local;
574 assert(offset<4096);
575 assem_debug("str %s,fp+%d\n",regname[hr],offset);
576 output_w32(0xe5800000|rd_rn_rm(hr,FP,0)|offset);
577}
578
579static void emit_test(int rs, int rt)
580{
581 assem_debug("tst %s,%s\n",regname[rs],regname[rt]);
582 output_w32(0xe1100000|rd_rn_rm(0,rs,rt));
583}
584
585static void emit_testimm(int rs,int imm)
586{
587 u_int armval;
588 assem_debug("tst %s,#%d\n",regname[rs],imm);
589 genimm_checked(imm,&armval);
590 output_w32(0xe3100000|rd_rn_rm(0,rs,0)|armval);
591}
592
593static void emit_testeqimm(int rs,int imm)
594{
595 u_int armval;
596 assem_debug("tsteq %s,$%d\n",regname[rs],imm);
597 genimm_checked(imm,&armval);
598 output_w32(0x03100000|rd_rn_rm(0,rs,0)|armval);
599}
600
601static void emit_not(int rs,int rt)
602{
603 assem_debug("mvn %s,%s\n",regname[rt],regname[rs]);
604 output_w32(0xe1e00000|rd_rn_rm(rt,0,rs));
605}
606
607static void emit_mvnmi(int rs,int rt)
608{
609 assem_debug("mvnmi %s,%s\n",regname[rt],regname[rs]);
610 output_w32(0x41e00000|rd_rn_rm(rt,0,rs));
611}
612
613static void emit_and(u_int rs1,u_int rs2,u_int rt)
614{
615 assem_debug("and %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
616 output_w32(0xe0000000|rd_rn_rm(rt,rs1,rs2));
617}
618
619static void emit_or(u_int rs1,u_int rs2,u_int rt)
620{
621 assem_debug("orr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
622 output_w32(0xe1800000|rd_rn_rm(rt,rs1,rs2));
623}
624
625static void emit_orrshl_imm(u_int rs,u_int imm,u_int rt)
626{
627 assert(rs<16);
628 assert(rt<16);
629 assert(imm<32);
630 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs],imm);
631 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|(imm<<7));
632}
633
634static void emit_orrshr_imm(u_int rs,u_int imm,u_int rt)
635{
636 assert(rs<16);
637 assert(rt<16);
638 assert(imm<32);
639 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs],imm);
640 output_w32(0xe1800020|rd_rn_rm(rt,rt,rs)|(imm<<7));
641}
642
643static void emit_xor(u_int rs1,u_int rs2,u_int rt)
644{
645 assem_debug("eor %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
646 output_w32(0xe0200000|rd_rn_rm(rt,rs1,rs2));
647}
648
649static void emit_addimm(u_int rs,int imm,u_int rt)
650{
651 assert(rs<16);
652 assert(rt<16);
653 if(imm!=0) {
654 u_int armval;
655 if(genimm(imm,&armval)) {
656 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],imm);
657 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
658 }else if(genimm(-imm,&armval)) {
659 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],-imm);
660 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
661 #ifdef HAVE_ARMV7
662 }else if(rt!=rs&&(u_int)imm<65536) {
663 emit_movw(imm&0x0000ffff,rt);
664 emit_add(rs,rt,rt);
665 }else if(rt!=rs&&(u_int)-imm<65536) {
666 emit_movw(-imm&0x0000ffff,rt);
667 emit_sub(rs,rt,rt);
668 #endif
669 }else if((u_int)-imm<65536) {
670 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],(-imm)&0xFF00);
671 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
672 output_w32(0xe2400000|rd_rn_imm_shift(rt,rs,(-imm)>>8,8));
673 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
674 }else {
675 do {
676 int shift = (ffs(imm) - 1) & ~1;
677 int imm8 = imm & (0xff << shift);
678 genimm_checked(imm8,&armval);
679 assem_debug("add %s,%s,#0x%x\n",regname[rt],regname[rs],imm8);
680 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
681 rs = rt;
682 imm &= ~imm8;
683 }
684 while (imm != 0);
685 }
686 }
687 else if(rs!=rt) emit_mov(rs,rt);
688}
689
690static void emit_addimm_and_set_flags(int imm,int rt)
691{
692 assert(imm>-65536&&imm<65536);
693 u_int armval;
694 if(genimm(imm,&armval)) {
695 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm);
696 output_w32(0xe2900000|rd_rn_rm(rt,rt,0)|armval);
697 }else if(genimm(-imm,&armval)) {
698 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],imm);
699 output_w32(0xe2500000|rd_rn_rm(rt,rt,0)|armval);
700 }else if(imm<0) {
701 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF00);
702 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
703 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)>>8,8));
704 output_w32(0xe2500000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
705 }else{
706 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF00);
707 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
708 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm>>8,8));
709 output_w32(0xe2900000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
710 }
711}
712
713static void emit_addimm_no_flags(u_int imm,u_int rt)
714{
715 emit_addimm(rt,imm,rt);
716}
717
718static void emit_addnop(u_int r)
719{
720 assert(r<16);
721 assem_debug("add %s,%s,#0 (nop)\n",regname[r],regname[r]);
722 output_w32(0xe2800000|rd_rn_rm(r,r,0));
723}
724
725static void emit_adcimm(u_int rs,int imm,u_int rt)
726{
727 u_int armval;
728 genimm_checked(imm,&armval);
729 assem_debug("adc %s,%s,#%d\n",regname[rt],regname[rs],imm);
730 output_w32(0xe2a00000|rd_rn_rm(rt,rs,0)|armval);
731}
732
733static void emit_addimm64_32(int rsh,int rsl,int imm,int rth,int rtl)
734{
735 // TODO: if(genimm(imm,&armval)) ...
736 // else
737 emit_movimm(imm,HOST_TEMPREG);
738 emit_adds(HOST_TEMPREG,rsl,rtl);
739 emit_adcimm(rsh,0,rth);
740}
741
742static void emit_andimm(int rs,int imm,int rt)
743{
744 u_int armval;
745 if(imm==0) {
746 emit_zeroreg(rt);
747 }else if(genimm(imm,&armval)) {
748 assem_debug("and %s,%s,#%d\n",regname[rt],regname[rs],imm);
749 output_w32(0xe2000000|rd_rn_rm(rt,rs,0)|armval);
750 }else if(genimm(~imm,&armval)) {
751 assem_debug("bic %s,%s,#%d\n",regname[rt],regname[rs],imm);
752 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|armval);
753 }else if(imm==65535) {
754 #ifndef HAVE_ARMV6
755 assem_debug("bic %s,%s,#FF000000\n",regname[rt],regname[rs]);
756 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|0x4FF);
757 assem_debug("bic %s,%s,#00FF0000\n",regname[rt],regname[rt]);
758 output_w32(0xe3c00000|rd_rn_rm(rt,rt,0)|0x8FF);
759 #else
760 assem_debug("uxth %s,%s\n",regname[rt],regname[rs]);
761 output_w32(0xe6ff0070|rd_rn_rm(rt,0,rs));
762 #endif
763 }else{
764 assert(imm>0&&imm<65535);
765 #ifndef HAVE_ARMV7
766 assem_debug("mov r14,#%d\n",imm&0xFF00);
767 output_w32(0xe3a00000|rd_rn_imm_shift(HOST_TEMPREG,0,imm>>8,8));
768 assem_debug("add r14,r14,#%d\n",imm&0xFF);
769 output_w32(0xe2800000|rd_rn_imm_shift(HOST_TEMPREG,HOST_TEMPREG,imm&0xff,0));
770 #else
771 emit_movw(imm,HOST_TEMPREG);
772 #endif
773 assem_debug("and %s,%s,r14\n",regname[rt],regname[rs]);
774 output_w32(0xe0000000|rd_rn_rm(rt,rs,HOST_TEMPREG));
775 }
776}
777
778static void emit_orimm(int rs,int imm,int rt)
779{
780 u_int armval;
781 if(imm==0) {
782 if(rs!=rt) emit_mov(rs,rt);
783 }else if(genimm(imm,&armval)) {
784 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm);
785 output_w32(0xe3800000|rd_rn_rm(rt,rs,0)|armval);
786 }else{
787 assert(imm>0&&imm<65536);
788 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
789 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
790 output_w32(0xe3800000|rd_rn_imm_shift(rt,rs,imm>>8,8));
791 output_w32(0xe3800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
792 }
793}
794
795static void emit_xorimm(int rs,int imm,int rt)
796{
797 u_int armval;
798 if(imm==0) {
799 if(rs!=rt) emit_mov(rs,rt);
800 }else if(genimm(imm,&armval)) {
801 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm);
802 output_w32(0xe2200000|rd_rn_rm(rt,rs,0)|armval);
803 }else{
804 assert(imm>0&&imm<65536);
805 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
806 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
807 output_w32(0xe2200000|rd_rn_imm_shift(rt,rs,imm>>8,8));
808 output_w32(0xe2200000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
809 }
810}
811
812static void emit_shlimm(int rs,u_int imm,int rt)
813{
814 assert(imm>0);
815 assert(imm<32);
816 //if(imm==1) ...
817 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
818 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
819}
820
821static void emit_lsls_imm(int rs,int imm,int rt)
822{
823 assert(imm>0);
824 assert(imm<32);
825 assem_debug("lsls %s,%s,#%d\n",regname[rt],regname[rs],imm);
826 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs)|(imm<<7));
827}
828
829static unused void emit_lslpls_imm(int rs,int imm,int rt)
830{
831 assert(imm>0);
832 assert(imm<32);
833 assem_debug("lslpls %s,%s,#%d\n",regname[rt],regname[rs],imm);
834 output_w32(0x51b00000|rd_rn_rm(rt,0,rs)|(imm<<7));
835}
836
837static void emit_shrimm(int rs,u_int imm,int rt)
838{
839 assert(imm>0);
840 assert(imm<32);
841 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
842 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
843}
844
845static void emit_sarimm(int rs,u_int imm,int rt)
846{
847 assert(imm>0);
848 assert(imm<32);
849 assem_debug("asr %s,%s,#%d\n",regname[rt],regname[rs],imm);
850 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x40|(imm<<7));
851}
852
853static void emit_rorimm(int rs,u_int imm,int rt)
854{
855 assert(imm>0);
856 assert(imm<32);
857 assem_debug("ror %s,%s,#%d\n",regname[rt],regname[rs],imm);
858 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x60|(imm<<7));
859}
860
861static void emit_signextend16(int rs,int rt)
862{
863 #ifndef HAVE_ARMV6
864 emit_shlimm(rs,16,rt);
865 emit_sarimm(rt,16,rt);
866 #else
867 assem_debug("sxth %s,%s\n",regname[rt],regname[rs]);
868 output_w32(0xe6bf0070|rd_rn_rm(rt,0,rs));
869 #endif
870}
871
872static void emit_signextend8(int rs,int rt)
873{
874 #ifndef HAVE_ARMV6
875 emit_shlimm(rs,24,rt);
876 emit_sarimm(rt,24,rt);
877 #else
878 assem_debug("sxtb %s,%s\n",regname[rt],regname[rs]);
879 output_w32(0xe6af0070|rd_rn_rm(rt,0,rs));
880 #endif
881}
882
883static void emit_shl(u_int rs,u_int shift,u_int rt)
884{
885 assert(rs<16);
886 assert(rt<16);
887 assert(shift<16);
888 //if(imm==1) ...
889 assem_debug("lsl %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
890 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x10|(shift<<8));
891}
892
893static void emit_shr(u_int rs,u_int shift,u_int rt)
894{
895 assert(rs<16);
896 assert(rt<16);
897 assert(shift<16);
898 assem_debug("lsr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
899 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x30|(shift<<8));
900}
901
902static void emit_sar(u_int rs,u_int shift,u_int rt)
903{
904 assert(rs<16);
905 assert(rt<16);
906 assert(shift<16);
907 assem_debug("asr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
908 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x50|(shift<<8));
909}
910
911static void emit_orrshl(u_int rs,u_int shift,u_int rt)
912{
913 assert(rs<16);
914 assert(rt<16);
915 assert(shift<16);
916 assem_debug("orr %s,%s,%s,lsl %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
917 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x10|(shift<<8));
918}
919
920static void emit_orrshr(u_int rs,u_int shift,u_int rt)
921{
922 assert(rs<16);
923 assert(rt<16);
924 assert(shift<16);
925 assem_debug("orr %s,%s,%s,lsr %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
926 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x30|(shift<<8));
927}
928
929static void emit_cmpimm(int rs,int imm)
930{
931 u_int armval;
932 if(genimm(imm,&armval)) {
933 assem_debug("cmp %s,#%d\n",regname[rs],imm);
934 output_w32(0xe3500000|rd_rn_rm(0,rs,0)|armval);
935 }else if(genimm(-imm,&armval)) {
936 assem_debug("cmn %s,#%d\n",regname[rs],imm);
937 output_w32(0xe3700000|rd_rn_rm(0,rs,0)|armval);
938 }else if(imm>0) {
939 assert(imm<65536);
940 emit_movimm(imm,HOST_TEMPREG);
941 assem_debug("cmp %s,r14\n",regname[rs]);
942 output_w32(0xe1500000|rd_rn_rm(0,rs,HOST_TEMPREG));
943 }else{
944 assert(imm>-65536);
945 emit_movimm(-imm,HOST_TEMPREG);
946 assem_debug("cmn %s,r14\n",regname[rs]);
947 output_w32(0xe1700000|rd_rn_rm(0,rs,HOST_TEMPREG));
948 }
949}
950
951static void emit_cmovne_imm(int imm,int rt)
952{
953 assem_debug("movne %s,#%d\n",regname[rt],imm);
954 u_int armval;
955 genimm_checked(imm,&armval);
956 output_w32(0x13a00000|rd_rn_rm(rt,0,0)|armval);
957}
958
959static void emit_cmovl_imm(int imm,int rt)
960{
961 assem_debug("movlt %s,#%d\n",regname[rt],imm);
962 u_int armval;
963 genimm_checked(imm,&armval);
964 output_w32(0xb3a00000|rd_rn_rm(rt,0,0)|armval);
965}
966
967static void emit_cmovb_imm(int imm,int rt)
968{
969 assem_debug("movcc %s,#%d\n",regname[rt],imm);
970 u_int armval;
971 genimm_checked(imm,&armval);
972 output_w32(0x33a00000|rd_rn_rm(rt,0,0)|armval);
973}
974
975static void emit_cmovne_reg(int rs,int rt)
976{
977 assem_debug("movne %s,%s\n",regname[rt],regname[rs]);
978 output_w32(0x11a00000|rd_rn_rm(rt,0,rs));
979}
980
981static void emit_cmovl_reg(int rs,int rt)
982{
983 assem_debug("movlt %s,%s\n",regname[rt],regname[rs]);
984 output_w32(0xb1a00000|rd_rn_rm(rt,0,rs));
985}
986
987static void emit_cmovs_reg(int rs,int rt)
988{
989 assem_debug("movmi %s,%s\n",regname[rt],regname[rs]);
990 output_w32(0x41a00000|rd_rn_rm(rt,0,rs));
991}
992
993static void emit_slti32(int rs,int imm,int rt)
994{
995 if(rs!=rt) emit_zeroreg(rt);
996 emit_cmpimm(rs,imm);
997 if(rs==rt) emit_movimm(0,rt);
998 emit_cmovl_imm(1,rt);
999}
1000
1001static void emit_sltiu32(int rs,int imm,int rt)
1002{
1003 if(rs!=rt) emit_zeroreg(rt);
1004 emit_cmpimm(rs,imm);
1005 if(rs==rt) emit_movimm(0,rt);
1006 emit_cmovb_imm(1,rt);
1007}
1008
1009static void emit_cmp(int rs,int rt)
1010{
1011 assem_debug("cmp %s,%s\n",regname[rs],regname[rt]);
1012 output_w32(0xe1500000|rd_rn_rm(0,rs,rt));
1013}
1014
1015static void emit_set_gz32(int rs, int rt)
1016{
1017 //assem_debug("set_gz32\n");
1018 emit_cmpimm(rs,1);
1019 emit_movimm(1,rt);
1020 emit_cmovl_imm(0,rt);
1021}
1022
1023static void emit_set_nz32(int rs, int rt)
1024{
1025 //assem_debug("set_nz32\n");
1026 if(rs!=rt) emit_movs(rs,rt);
1027 else emit_test(rs,rs);
1028 emit_cmovne_imm(1,rt);
1029}
1030
1031static void emit_set_if_less32(int rs1, int rs2, int rt)
1032{
1033 //assem_debug("set if less (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1034 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1035 emit_cmp(rs1,rs2);
1036 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1037 emit_cmovl_imm(1,rt);
1038}
1039
1040static void emit_set_if_carry32(int rs1, int rs2, int rt)
1041{
1042 //assem_debug("set if carry (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1043 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1044 emit_cmp(rs1,rs2);
1045 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1046 emit_cmovb_imm(1,rt);
1047}
1048
1049static void emit_call(const void *a_)
1050{
1051 int a = (int)a_;
1052 assem_debug("bl %x (%x+%x)%s\n",a,(int)out,a-(int)out-8,func_name(a));
1053 u_int offset=genjmp(a);
1054 output_w32(0xeb000000|offset);
1055}
1056
1057static void emit_jmp(const void *a_)
1058{
1059 int a = (int)a_;
1060 assem_debug("b %x (%x+%x)%s\n",a,(int)out,a-(int)out-8,func_name(a));
1061 u_int offset=genjmp(a);
1062 output_w32(0xea000000|offset);
1063}
1064
1065static void emit_jne(const void *a_)
1066{
1067 int a = (int)a_;
1068 assem_debug("bne %x\n",a);
1069 u_int offset=genjmp(a);
1070 output_w32(0x1a000000|offset);
1071}
1072
1073static void emit_jeq(int a)
1074{
1075 assem_debug("beq %x\n",a);
1076 u_int offset=genjmp(a);
1077 output_w32(0x0a000000|offset);
1078}
1079
1080static void emit_js(int a)
1081{
1082 assem_debug("bmi %x\n",a);
1083 u_int offset=genjmp(a);
1084 output_w32(0x4a000000|offset);
1085}
1086
1087static void emit_jns(int a)
1088{
1089 assem_debug("bpl %x\n",a);
1090 u_int offset=genjmp(a);
1091 output_w32(0x5a000000|offset);
1092}
1093
1094static void emit_jl(int a)
1095{
1096 assem_debug("blt %x\n",a);
1097 u_int offset=genjmp(a);
1098 output_w32(0xba000000|offset);
1099}
1100
1101static void emit_jge(int a)
1102{
1103 assem_debug("bge %x\n",a);
1104 u_int offset=genjmp(a);
1105 output_w32(0xaa000000|offset);
1106}
1107
1108static void emit_jno(int a)
1109{
1110 assem_debug("bvc %x\n",a);
1111 u_int offset=genjmp(a);
1112 output_w32(0x7a000000|offset);
1113}
1114
1115static void emit_jc(int a)
1116{
1117 assem_debug("bcs %x\n",a);
1118 u_int offset=genjmp(a);
1119 output_w32(0x2a000000|offset);
1120}
1121
1122static void emit_jcc(void *a_)
1123{
1124 int a = (int)a_;
1125 assem_debug("bcc %x\n",a);
1126 u_int offset=genjmp(a);
1127 output_w32(0x3a000000|offset);
1128}
1129
1130static void emit_callreg(u_int r)
1131{
1132 assert(r<15);
1133 assem_debug("blx %s\n",regname[r]);
1134 output_w32(0xe12fff30|r);
1135}
1136
1137static void emit_jmpreg(u_int r)
1138{
1139 assem_debug("mov pc,%s\n",regname[r]);
1140 output_w32(0xe1a00000|rd_rn_rm(15,0,r));
1141}
1142
1143static void emit_ret(void)
1144{
1145 emit_jmpreg(14);
1146}
1147
1148static void emit_readword_indexed(int offset, int rs, int rt)
1149{
1150 assert(offset>-4096&&offset<4096);
1151 assem_debug("ldr %s,%s+%d\n",regname[rt],regname[rs],offset);
1152 if(offset>=0) {
1153 output_w32(0xe5900000|rd_rn_rm(rt,rs,0)|offset);
1154 }else{
1155 output_w32(0xe5100000|rd_rn_rm(rt,rs,0)|(-offset));
1156 }
1157}
1158
1159static void emit_readword_dualindexedx4(int rs1, int rs2, int rt)
1160{
1161 assem_debug("ldr %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1162 output_w32(0xe7900000|rd_rn_rm(rt,rs1,rs2)|0x100);
1163}
1164
1165static void emit_ldrcc_dualindexed(int rs1, int rs2, int rt)
1166{
1167 assem_debug("ldrcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1168 output_w32(0x37900000|rd_rn_rm(rt,rs1,rs2));
1169}
1170
1171static void emit_ldrccb_dualindexed(int rs1, int rs2, int rt)
1172{
1173 assem_debug("ldrccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1174 output_w32(0x37d00000|rd_rn_rm(rt,rs1,rs2));
1175}
1176
1177static void emit_ldrccsb_dualindexed(int rs1, int rs2, int rt)
1178{
1179 assem_debug("ldrccsb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1180 output_w32(0x319000d0|rd_rn_rm(rt,rs1,rs2));
1181}
1182
1183static void emit_ldrcch_dualindexed(int rs1, int rs2, int rt)
1184{
1185 assem_debug("ldrcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1186 output_w32(0x319000b0|rd_rn_rm(rt,rs1,rs2));
1187}
1188
1189static void emit_ldrccsh_dualindexed(int rs1, int rs2, int rt)
1190{
1191 assem_debug("ldrccsh %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1192 output_w32(0x319000f0|rd_rn_rm(rt,rs1,rs2));
1193}
1194
1195static void emit_movsbl_indexed(int offset, int rs, int rt)
1196{
1197 assert(offset>-256&&offset<256);
1198 assem_debug("ldrsb %s,%s+%d\n",regname[rt],regname[rs],offset);
1199 if(offset>=0) {
1200 output_w32(0xe1d000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1201 }else{
1202 output_w32(0xe15000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1203 }
1204}
1205
1206static void emit_movswl_indexed(int offset, int rs, int rt)
1207{
1208 assert(offset>-256&&offset<256);
1209 assem_debug("ldrsh %s,%s+%d\n",regname[rt],regname[rs],offset);
1210 if(offset>=0) {
1211 output_w32(0xe1d000f0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1212 }else{
1213 output_w32(0xe15000f0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1214 }
1215}
1216
1217static void emit_movzbl_indexed(int offset, int rs, int rt)
1218{
1219 assert(offset>-4096&&offset<4096);
1220 assem_debug("ldrb %s,%s+%d\n",regname[rt],regname[rs],offset);
1221 if(offset>=0) {
1222 output_w32(0xe5d00000|rd_rn_rm(rt,rs,0)|offset);
1223 }else{
1224 output_w32(0xe5500000|rd_rn_rm(rt,rs,0)|(-offset));
1225 }
1226}
1227
1228static void emit_movzwl_indexed(int offset, int rs, int rt)
1229{
1230 assert(offset>-256&&offset<256);
1231 assem_debug("ldrh %s,%s+%d\n",regname[rt],regname[rs],offset);
1232 if(offset>=0) {
1233 output_w32(0xe1d000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1234 }else{
1235 output_w32(0xe15000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1236 }
1237}
1238
1239static void emit_ldrd(int offset, int rs, int rt)
1240{
1241 assert(offset>-256&&offset<256);
1242 assem_debug("ldrd %s,%s+%d\n",regname[rt],regname[rs],offset);
1243 if(offset>=0) {
1244 output_w32(0xe1c000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1245 }else{
1246 output_w32(0xe14000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1247 }
1248}
1249
1250static void emit_readword(void *addr, int rt)
1251{
1252 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
1253 assert(offset<4096);
1254 assem_debug("ldr %s,fp+%d\n",regname[rt],offset);
1255 output_w32(0xe5900000|rd_rn_rm(rt,FP,0)|offset);
1256}
1257
1258static void emit_writeword_indexed(int rt, int offset, int rs)
1259{
1260 assert(offset>-4096&&offset<4096);
1261 assem_debug("str %s,%s+%d\n",regname[rt],regname[rs],offset);
1262 if(offset>=0) {
1263 output_w32(0xe5800000|rd_rn_rm(rt,rs,0)|offset);
1264 }else{
1265 output_w32(0xe5000000|rd_rn_rm(rt,rs,0)|(-offset));
1266 }
1267}
1268
1269static void emit_writehword_indexed(int rt, int offset, int rs)
1270{
1271 assert(offset>-256&&offset<256);
1272 assem_debug("strh %s,%s+%d\n",regname[rt],regname[rs],offset);
1273 if(offset>=0) {
1274 output_w32(0xe1c000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1275 }else{
1276 output_w32(0xe14000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1277 }
1278}
1279
1280static void emit_writebyte_indexed(int rt, int offset, int rs)
1281{
1282 assert(offset>-4096&&offset<4096);
1283 assem_debug("strb %s,%s+%d\n",regname[rt],regname[rs],offset);
1284 if(offset>=0) {
1285 output_w32(0xe5c00000|rd_rn_rm(rt,rs,0)|offset);
1286 }else{
1287 output_w32(0xe5400000|rd_rn_rm(rt,rs,0)|(-offset));
1288 }
1289}
1290
1291static void emit_strcc_dualindexed(int rs1, int rs2, int rt)
1292{
1293 assem_debug("strcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1294 output_w32(0x37800000|rd_rn_rm(rt,rs1,rs2));
1295}
1296
1297static void emit_strccb_dualindexed(int rs1, int rs2, int rt)
1298{
1299 assem_debug("strccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1300 output_w32(0x37c00000|rd_rn_rm(rt,rs1,rs2));
1301}
1302
1303static void emit_strcch_dualindexed(int rs1, int rs2, int rt)
1304{
1305 assem_debug("strcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1306 output_w32(0x318000b0|rd_rn_rm(rt,rs1,rs2));
1307}
1308
1309static void emit_writeword(int rt, void *addr)
1310{
1311 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
1312 assert(offset<4096);
1313 assem_debug("str %s,fp+%d\n",regname[rt],offset);
1314 output_w32(0xe5800000|rd_rn_rm(rt,FP,0)|offset);
1315}
1316
1317static void emit_umull(u_int rs1, u_int rs2, u_int hi, u_int lo)
1318{
1319 assem_debug("umull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
1320 assert(rs1<16);
1321 assert(rs2<16);
1322 assert(hi<16);
1323 assert(lo<16);
1324 output_w32(0xe0800090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
1325}
1326
1327static void emit_smull(u_int rs1, u_int rs2, u_int hi, u_int lo)
1328{
1329 assem_debug("smull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
1330 assert(rs1<16);
1331 assert(rs2<16);
1332 assert(hi<16);
1333 assert(lo<16);
1334 output_w32(0xe0c00090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
1335}
1336
1337static void emit_clz(int rs,int rt)
1338{
1339 assem_debug("clz %s,%s\n",regname[rt],regname[rs]);
1340 output_w32(0xe16f0f10|rd_rn_rm(rt,0,rs));
1341}
1342
1343static void emit_subcs(int rs1,int rs2,int rt)
1344{
1345 assem_debug("subcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1346 output_w32(0x20400000|rd_rn_rm(rt,rs1,rs2));
1347}
1348
1349static void emit_shrcc_imm(int rs,u_int imm,int rt)
1350{
1351 assert(imm>0);
1352 assert(imm<32);
1353 assem_debug("lsrcc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1354 output_w32(0x31a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
1355}
1356
1357static void emit_shrne_imm(int rs,u_int imm,int rt)
1358{
1359 assert(imm>0);
1360 assert(imm<32);
1361 assem_debug("lsrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
1362 output_w32(0x11a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
1363}
1364
1365static void emit_negmi(int rs, int rt)
1366{
1367 assem_debug("rsbmi %s,%s,#0\n",regname[rt],regname[rs]);
1368 output_w32(0x42600000|rd_rn_rm(rt,rs,0));
1369}
1370
1371static void emit_negsmi(int rs, int rt)
1372{
1373 assem_debug("rsbsmi %s,%s,#0\n",regname[rt],regname[rs]);
1374 output_w32(0x42700000|rd_rn_rm(rt,rs,0));
1375}
1376
1377static void emit_bic_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
1378{
1379 assem_debug("bic %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
1380 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
1381}
1382
1383static void emit_bic_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
1384{
1385 assem_debug("bic %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
1386 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
1387}
1388
1389static void emit_teq(int rs, int rt)
1390{
1391 assem_debug("teq %s,%s\n",regname[rs],regname[rt]);
1392 output_w32(0xe1300000|rd_rn_rm(0,rs,rt));
1393}
1394
1395static void emit_rsbimm(int rs, int imm, int rt)
1396{
1397 u_int armval;
1398 genimm_checked(imm,&armval);
1399 assem_debug("rsb %s,%s,#%d\n",regname[rt],regname[rs],imm);
1400 output_w32(0xe2600000|rd_rn_rm(rt,rs,0)|armval);
1401}
1402
1403// Load 2 immediates optimizing for small code size
1404static void emit_mov2imm_compact(int imm1,u_int rt1,int imm2,u_int rt2)
1405{
1406 emit_movimm(imm1,rt1);
1407 u_int armval;
1408 if(genimm(imm2-imm1,&armval)) {
1409 assem_debug("add %s,%s,#%d\n",regname[rt2],regname[rt1],imm2-imm1);
1410 output_w32(0xe2800000|rd_rn_rm(rt2,rt1,0)|armval);
1411 }else if(genimm(imm1-imm2,&armval)) {
1412 assem_debug("sub %s,%s,#%d\n",regname[rt2],regname[rt1],imm1-imm2);
1413 output_w32(0xe2400000|rd_rn_rm(rt2,rt1,0)|armval);
1414 }
1415 else emit_movimm(imm2,rt2);
1416}
1417
1418// Conditionally select one of two immediates, optimizing for small code size
1419// This will only be called if HAVE_CMOV_IMM is defined
1420static void emit_cmov2imm_e_ne_compact(int imm1,int imm2,u_int rt)
1421{
1422 u_int armval;
1423 if(genimm(imm2-imm1,&armval)) {
1424 emit_movimm(imm1,rt);
1425 assem_debug("addne %s,%s,#%d\n",regname[rt],regname[rt],imm2-imm1);
1426 output_w32(0x12800000|rd_rn_rm(rt,rt,0)|armval);
1427 }else if(genimm(imm1-imm2,&armval)) {
1428 emit_movimm(imm1,rt);
1429 assem_debug("subne %s,%s,#%d\n",regname[rt],regname[rt],imm1-imm2);
1430 output_w32(0x12400000|rd_rn_rm(rt,rt,0)|armval);
1431 }
1432 else {
1433 #ifndef HAVE_ARMV7
1434 emit_movimm(imm1,rt);
1435 add_literal((int)out,imm2);
1436 assem_debug("ldrne %s,pc+? [=%x]\n",regname[rt],imm2);
1437 output_w32(0x15900000|rd_rn_rm(rt,15,0));
1438 #else
1439 emit_movw(imm1&0x0000FFFF,rt);
1440 if((imm1&0xFFFF)!=(imm2&0xFFFF)) {
1441 assem_debug("movwne %s,#%d (0x%x)\n",regname[rt],imm2&0xFFFF,imm2&0xFFFF);
1442 output_w32(0x13000000|rd_rn_rm(rt,0,0)|(imm2&0xfff)|((imm2<<4)&0xf0000));
1443 }
1444 emit_movt(imm1&0xFFFF0000,rt);
1445 if((imm1&0xFFFF0000)!=(imm2&0xFFFF0000)) {
1446 assem_debug("movtne %s,#%d (0x%x)\n",regname[rt],imm2&0xffff0000,imm2&0xffff0000);
1447 output_w32(0x13400000|rd_rn_rm(rt,0,0)|((imm2>>16)&0xfff)|((imm2>>12)&0xf0000));
1448 }
1449 #endif
1450 }
1451}
1452
1453// special case for checking invalid_code
1454static void emit_cmpmem_indexedsr12_reg(int base,int r,int imm)
1455{
1456 assert(imm<128&&imm>=0);
1457 assert(r>=0&&r<16);
1458 assem_debug("ldrb lr,%s,%s lsr #12\n",regname[base],regname[r]);
1459 output_w32(0xe7d00000|rd_rn_rm(HOST_TEMPREG,base,r)|0x620);
1460 emit_cmpimm(HOST_TEMPREG,imm);
1461}
1462
1463static void emit_callne(int a)
1464{
1465 assem_debug("blne %x\n",a);
1466 u_int offset=genjmp(a);
1467 output_w32(0x1b000000|offset);
1468}
1469
1470// Used to preload hash table entries
1471static unused void emit_prefetchreg(int r)
1472{
1473 assem_debug("pld %s\n",regname[r]);
1474 output_w32(0xf5d0f000|rd_rn_rm(0,r,0));
1475}
1476
1477// Special case for mini_ht
1478static void emit_ldreq_indexed(int rs, u_int offset, int rt)
1479{
1480 assert(offset<4096);
1481 assem_debug("ldreq %s,[%s, #%d]\n",regname[rt],regname[rs],offset);
1482 output_w32(0x05900000|rd_rn_rm(rt,rs,0)|offset);
1483}
1484
1485static void emit_orrne_imm(int rs,int imm,int rt)
1486{
1487 u_int armval;
1488 genimm_checked(imm,&armval);
1489 assem_debug("orrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
1490 output_w32(0x13800000|rd_rn_rm(rt,rs,0)|armval);
1491}
1492
1493static void emit_andne_imm(int rs,int imm,int rt)
1494{
1495 u_int armval;
1496 genimm_checked(imm,&armval);
1497 assem_debug("andne %s,%s,#%d\n",regname[rt],regname[rs],imm);
1498 output_w32(0x12000000|rd_rn_rm(rt,rs,0)|armval);
1499}
1500
1501static unused void emit_addpl_imm(int rs,int imm,int rt)
1502{
1503 u_int armval;
1504 genimm_checked(imm,&armval);
1505 assem_debug("addpl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1506 output_w32(0x52800000|rd_rn_rm(rt,rs,0)|armval);
1507}
1508
1509static void emit_jno_unlikely(int a)
1510{
1511 //emit_jno(a);
1512 assem_debug("addvc pc,pc,#? (%x)\n",/*a-(int)out-8,*/a);
1513 output_w32(0x72800000|rd_rn_rm(15,15,0));
1514}
1515
1516static void save_regs_all(u_int reglist)
1517{
1518 int i;
1519 if(!reglist) return;
1520 assem_debug("stmia fp,{");
1521 for(i=0;i<16;i++)
1522 if(reglist&(1<<i))
1523 assem_debug("r%d,",i);
1524 assem_debug("}\n");
1525 output_w32(0xe88b0000|reglist);
1526}
1527
1528static void restore_regs_all(u_int reglist)
1529{
1530 int i;
1531 if(!reglist) return;
1532 assem_debug("ldmia fp,{");
1533 for(i=0;i<16;i++)
1534 if(reglist&(1<<i))
1535 assem_debug("r%d,",i);
1536 assem_debug("}\n");
1537 output_w32(0xe89b0000|reglist);
1538}
1539
1540// Save registers before function call
1541static void save_regs(u_int reglist)
1542{
1543 reglist&=CALLER_SAVE_REGS; // only save the caller-save registers, r0-r3, r12
1544 save_regs_all(reglist);
1545}
1546
1547// Restore registers after function call
1548static void restore_regs(u_int reglist)
1549{
1550 reglist&=CALLER_SAVE_REGS;
1551 restore_regs_all(reglist);
1552}
1553
1554/* Stubs/epilogue */
1555
1556static void literal_pool(int n)
1557{
1558 if(!literalcount) return;
1559 if(n) {
1560 if((int)out-literals[0][0]<4096-n) return;
1561 }
1562 u_int *ptr;
1563 int i;
1564 for(i=0;i<literalcount;i++)
1565 {
1566 u_int l_addr=(u_int)out;
1567 int j;
1568 for(j=0;j<i;j++) {
1569 if(literals[j][1]==literals[i][1]) {
1570 //printf("dup %08x\n",literals[i][1]);
1571 l_addr=literals[j][0];
1572 break;
1573 }
1574 }
1575 ptr=(u_int *)literals[i][0];
1576 u_int offset=l_addr-(u_int)ptr-8;
1577 assert(offset<4096);
1578 assert(!(offset&3));
1579 *ptr|=offset;
1580 if(l_addr==(u_int)out) {
1581 literals[i][0]=l_addr; // remember for dupes
1582 output_w32(literals[i][1]);
1583 }
1584 }
1585 literalcount=0;
1586}
1587
1588static void literal_pool_jumpover(int n)
1589{
1590 if(!literalcount) return;
1591 if(n) {
1592 if((int)out-literals[0][0]<4096-n) return;
1593 }
1594 void *jaddr = out;
1595 emit_jmp(0);
1596 literal_pool(0);
1597 set_jump_target(jaddr, out);
1598}
1599
1600static void emit_extjump2(u_char *addr, int target, void *linker)
1601{
1602 u_char *ptr=(u_char *)addr;
1603 assert((ptr[3]&0x0e)==0xa);
1604 (void)ptr;
1605
1606 emit_loadlp(target,0);
1607 emit_loadlp((u_int)addr,1);
1608 assert(addr>=translation_cache&&addr<(translation_cache+(1<<TARGET_SIZE_2)));
1609 //assert((target>=0x80000000&&target<0x80800000)||(target>0xA4000000&&target<0xA4001000));
1610//DEBUG >
1611#ifdef DEBUG_CYCLE_COUNT
1612 emit_readword(&last_count,ECX);
1613 emit_add(HOST_CCREG,ECX,HOST_CCREG);
1614 emit_readword(&next_interupt,ECX);
1615 emit_writeword(HOST_CCREG,&Count);
1616 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
1617 emit_writeword(ECX,&last_count);
1618#endif
1619//DEBUG <
1620 emit_jmp(linker);
1621}
1622
1623static void emit_extjump(void *addr, int target)
1624{
1625 emit_extjump2(addr, target, dyna_linker);
1626}
1627
1628static void emit_extjump_ds(void *addr, int target)
1629{
1630 emit_extjump2(addr, target, dyna_linker_ds);
1631}
1632
1633// put rt_val into rt, potentially making use of rs with value rs_val
1634static void emit_movimm_from(u_int rs_val,int rs,u_int rt_val,int rt)
1635{
1636 u_int armval;
1637 int diff;
1638 if(genimm(rt_val,&armval)) {
1639 assem_debug("mov %s,#%d\n",regname[rt],rt_val);
1640 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
1641 return;
1642 }
1643 if(genimm(~rt_val,&armval)) {
1644 assem_debug("mvn %s,#%d\n",regname[rt],rt_val);
1645 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
1646 return;
1647 }
1648 diff=rt_val-rs_val;
1649 if(genimm(diff,&armval)) {
1650 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],diff);
1651 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
1652 return;
1653 }else if(genimm(-diff,&armval)) {
1654 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],-diff);
1655 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
1656 return;
1657 }
1658 emit_movimm(rt_val,rt);
1659}
1660
1661// return 1 if above function can do it's job cheaply
1662static int is_similar_value(u_int v1,u_int v2)
1663{
1664 u_int xs;
1665 int diff;
1666 if(v1==v2) return 1;
1667 diff=v2-v1;
1668 for(xs=diff;xs!=0&&(xs&3)==0;xs>>=2)
1669 ;
1670 if(xs<0x100) return 1;
1671 for(xs=-diff;xs!=0&&(xs&3)==0;xs>>=2)
1672 ;
1673 if(xs<0x100) return 1;
1674 return 0;
1675}
1676
1677// trashes r2
1678static void pass_args(int a0, int a1)
1679{
1680 if(a0==1&&a1==0) {
1681 // must swap
1682 emit_mov(a0,2); emit_mov(a1,1); emit_mov(2,0);
1683 }
1684 else if(a0!=0&&a1==0) {
1685 emit_mov(a1,1);
1686 if (a0>=0) emit_mov(a0,0);
1687 }
1688 else {
1689 if(a0>=0&&a0!=0) emit_mov(a0,0);
1690 if(a1>=0&&a1!=1) emit_mov(a1,1);
1691 }
1692}
1693
1694static void mov_loadtype_adj(enum stub_type type,int rs,int rt)
1695{
1696 switch(type) {
1697 case LOADB_STUB: emit_signextend8(rs,rt); break;
1698 case LOADBU_STUB: emit_andimm(rs,0xff,rt); break;
1699 case LOADH_STUB: emit_signextend16(rs,rt); break;
1700 case LOADHU_STUB: emit_andimm(rs,0xffff,rt); break;
1701 case LOADW_STUB: if(rs!=rt) emit_mov(rs,rt); break;
1702 default: assert(0);
1703 }
1704}
1705
1706#include "pcsxmem.h"
1707#include "pcsxmem_inline.c"
1708
1709static void do_readstub(int n)
1710{
1711 assem_debug("do_readstub %x\n",start+stubs[n].a*4);
1712 literal_pool(256);
1713 set_jump_target(stubs[n].addr, out);
1714 enum stub_type type=stubs[n].type;
1715 int i=stubs[n].a;
1716 int rs=stubs[n].b;
1717 struct regstat *i_regs=(struct regstat *)stubs[n].c;
1718 u_int reglist=stubs[n].e;
1719 signed char *i_regmap=i_regs->regmap;
1720 int rt;
1721 if(itype[i]==C1LS||itype[i]==C2LS||itype[i]==LOADLR) {
1722 rt=get_reg(i_regmap,FTEMP);
1723 }else{
1724 rt=get_reg(i_regmap,rt1[i]);
1725 }
1726 assert(rs>=0);
1727 int r,temp=-1,temp2=HOST_TEMPREG,regs_saved=0;
1728 void *restore_jump = NULL;
1729 reglist|=(1<<rs);
1730 for(r=0;r<=12;r++) {
1731 if(((1<<r)&0x13ff)&&((1<<r)&reglist)==0) {
1732 temp=r; break;
1733 }
1734 }
1735 if(rt>=0&&rt1[i]!=0)
1736 reglist&=~(1<<rt);
1737 if(temp==-1) {
1738 save_regs(reglist);
1739 regs_saved=1;
1740 temp=(rs==0)?2:0;
1741 }
1742 if((regs_saved||(reglist&2)==0)&&temp!=1&&rs!=1)
1743 temp2=1;
1744 emit_readword(&mem_rtab,temp);
1745 emit_shrimm(rs,12,temp2);
1746 emit_readword_dualindexedx4(temp,temp2,temp2);
1747 emit_lsls_imm(temp2,1,temp2);
1748 if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) {
1749 switch(type) {
1750 case LOADB_STUB: emit_ldrccsb_dualindexed(temp2,rs,rt); break;
1751 case LOADBU_STUB: emit_ldrccb_dualindexed(temp2,rs,rt); break;
1752 case LOADH_STUB: emit_ldrccsh_dualindexed(temp2,rs,rt); break;
1753 case LOADHU_STUB: emit_ldrcch_dualindexed(temp2,rs,rt); break;
1754 case LOADW_STUB: emit_ldrcc_dualindexed(temp2,rs,rt); break;
1755 default: assert(0);
1756 }
1757 }
1758 if(regs_saved) {
1759 restore_jump=out;
1760 emit_jcc(0); // jump to reg restore
1761 }
1762 else
1763 emit_jcc(stubs[n].retaddr); // return address
1764
1765 if(!regs_saved)
1766 save_regs(reglist);
1767 void *handler=NULL;
1768 if(type==LOADB_STUB||type==LOADBU_STUB)
1769 handler=jump_handler_read8;
1770 if(type==LOADH_STUB||type==LOADHU_STUB)
1771 handler=jump_handler_read16;
1772 if(type==LOADW_STUB)
1773 handler=jump_handler_read32;
1774 assert(handler);
1775 pass_args(rs,temp2);
1776 int cc=get_reg(i_regmap,CCREG);
1777 if(cc<0)
1778 emit_loadreg(CCREG,2);
1779 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n].d+1),2);
1780 emit_call(handler);
1781 if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) {
1782 mov_loadtype_adj(type,0,rt);
1783 }
1784 if(restore_jump)
1785 set_jump_target(restore_jump, out);
1786 restore_regs(reglist);
1787 emit_jmp(stubs[n].retaddr); // return address
1788}
1789
1790// return memhandler, or get directly accessable address and return 0
1791static void *get_direct_memhandler(void *table,u_int addr,enum stub_type type,u_int *addr_host)
1792{
1793 u_int l1,l2=0;
1794 l1=((u_int *)table)[addr>>12];
1795 if((l1&(1<<31))==0) {
1796 u_int v=l1<<1;
1797 *addr_host=v+addr;
1798 return NULL;
1799 }
1800 else {
1801 l1<<=1;
1802 if(type==LOADB_STUB||type==LOADBU_STUB||type==STOREB_STUB)
1803 l2=((u_int *)l1)[0x1000/4 + 0x1000/2 + (addr&0xfff)];
1804 else if(type==LOADH_STUB||type==LOADHU_STUB||type==STOREH_STUB)
1805 l2=((u_int *)l1)[0x1000/4 + (addr&0xfff)/2];
1806 else
1807 l2=((u_int *)l1)[(addr&0xfff)/4];
1808 if((l2&(1<<31))==0) {
1809 u_int v=l2<<1;
1810 *addr_host=v+(addr&0xfff);
1811 return NULL;
1812 }
1813 return (void *)(l2<<1);
1814 }
1815}
1816
1817static void inline_readstub(enum stub_type type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
1818{
1819 int rs=get_reg(regmap,target);
1820 int rt=get_reg(regmap,target);
1821 if(rs<0) rs=get_reg(regmap,-1);
1822 assert(rs>=0);
1823 u_int host_addr=0,is_dynamic,far_call=0;
1824 void *handler;
1825 int cc=get_reg(regmap,CCREG);
1826 if(pcsx_direct_read(type,addr,CLOCK_ADJUST(adj+1),cc,target?rs:-1,rt))
1827 return;
1828 handler = get_direct_memhandler(mem_rtab, addr, type, &host_addr);
1829 if (handler == NULL) {
1830 if(rt<0||rt1[i]==0)
1831 return;
1832 if(addr!=host_addr)
1833 emit_movimm_from(addr,rs,host_addr,rs);
1834 switch(type) {
1835 case LOADB_STUB: emit_movsbl_indexed(0,rs,rt); break;
1836 case LOADBU_STUB: emit_movzbl_indexed(0,rs,rt); break;
1837 case LOADH_STUB: emit_movswl_indexed(0,rs,rt); break;
1838 case LOADHU_STUB: emit_movzwl_indexed(0,rs,rt); break;
1839 case LOADW_STUB: emit_readword_indexed(0,rs,rt); break;
1840 default: assert(0);
1841 }
1842 return;
1843 }
1844 is_dynamic=pcsxmem_is_handler_dynamic(addr);
1845 if(is_dynamic) {
1846 if(type==LOADB_STUB||type==LOADBU_STUB)
1847 handler=jump_handler_read8;
1848 if(type==LOADH_STUB||type==LOADHU_STUB)
1849 handler=jump_handler_read16;
1850 if(type==LOADW_STUB)
1851 handler=jump_handler_read32;
1852 }
1853
1854 // call a memhandler
1855 if(rt>=0&&rt1[i]!=0)
1856 reglist&=~(1<<rt);
1857 save_regs(reglist);
1858 if(target==0)
1859 emit_movimm(addr,0);
1860 else if(rs!=0)
1861 emit_mov(rs,0);
1862 int offset=(u_char *)handler-out-8;
1863 if(offset<-33554432||offset>=33554432) {
1864 // unreachable memhandler, a plugin func perhaps
1865 emit_movimm((u_int)handler,12);
1866 far_call=1;
1867 }
1868 if(cc<0)
1869 emit_loadreg(CCREG,2);
1870 if(is_dynamic) {
1871 emit_movimm(((u_int *)mem_rtab)[addr>>12]<<1,1);
1872 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2);
1873 }
1874 else {
1875 emit_readword(&last_count,3);
1876 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2);
1877 emit_add(2,3,2);
1878 emit_writeword(2,&Count);
1879 }
1880
1881 if(far_call)
1882 emit_callreg(12);
1883 else
1884 emit_call(handler);
1885
1886 if(rt>=0&&rt1[i]!=0) {
1887 switch(type) {
1888 case LOADB_STUB: emit_signextend8(0,rt); break;
1889 case LOADBU_STUB: emit_andimm(0,0xff,rt); break;
1890 case LOADH_STUB: emit_signextend16(0,rt); break;
1891 case LOADHU_STUB: emit_andimm(0,0xffff,rt); break;
1892 case LOADW_STUB: if(rt!=0) emit_mov(0,rt); break;
1893 default: assert(0);
1894 }
1895 }
1896 restore_regs(reglist);
1897}
1898
1899static void do_writestub(int n)
1900{
1901 assem_debug("do_writestub %x\n",start+stubs[n].a*4);
1902 literal_pool(256);
1903 set_jump_target(stubs[n].addr, out);
1904 enum stub_type type=stubs[n].type;
1905 int i=stubs[n].a;
1906 int rs=stubs[n].b;
1907 struct regstat *i_regs=(struct regstat *)stubs[n].c;
1908 u_int reglist=stubs[n].e;
1909 signed char *i_regmap=i_regs->regmap;
1910 int rt,r;
1911 if(itype[i]==C1LS||itype[i]==C2LS) {
1912 rt=get_reg(i_regmap,r=FTEMP);
1913 }else{
1914 rt=get_reg(i_regmap,r=rs2[i]);
1915 }
1916 assert(rs>=0);
1917 assert(rt>=0);
1918 int rtmp,temp=-1,temp2=HOST_TEMPREG,regs_saved=0;
1919 void *restore_jump = NULL;
1920 int reglist2=reglist|(1<<rs)|(1<<rt);
1921 for(rtmp=0;rtmp<=12;rtmp++) {
1922 if(((1<<rtmp)&0x13ff)&&((1<<rtmp)&reglist2)==0) {
1923 temp=rtmp; break;
1924 }
1925 }
1926 if(temp==-1) {
1927 save_regs(reglist);
1928 regs_saved=1;
1929 for(rtmp=0;rtmp<=3;rtmp++)
1930 if(rtmp!=rs&&rtmp!=rt)
1931 {temp=rtmp;break;}
1932 }
1933 if((regs_saved||(reglist2&8)==0)&&temp!=3&&rs!=3&&rt!=3)
1934 temp2=3;
1935 emit_readword(&mem_wtab,temp);
1936 emit_shrimm(rs,12,temp2);
1937 emit_readword_dualindexedx4(temp,temp2,temp2);
1938 emit_lsls_imm(temp2,1,temp2);
1939 switch(type) {
1940 case STOREB_STUB: emit_strccb_dualindexed(temp2,rs,rt); break;
1941 case STOREH_STUB: emit_strcch_dualindexed(temp2,rs,rt); break;
1942 case STOREW_STUB: emit_strcc_dualindexed(temp2,rs,rt); break;
1943 default: assert(0);
1944 }
1945 if(regs_saved) {
1946 restore_jump=out;
1947 emit_jcc(0); // jump to reg restore
1948 }
1949 else
1950 emit_jcc(stubs[n].retaddr); // return address (invcode check)
1951
1952 if(!regs_saved)
1953 save_regs(reglist);
1954 void *handler=NULL;
1955 switch(type) {
1956 case STOREB_STUB: handler=jump_handler_write8; break;
1957 case STOREH_STUB: handler=jump_handler_write16; break;
1958 case STOREW_STUB: handler=jump_handler_write32; break;
1959 default: assert(0);
1960 }
1961 assert(handler);
1962 pass_args(rs,rt);
1963 if(temp2!=3)
1964 emit_mov(temp2,3);
1965 int cc=get_reg(i_regmap,CCREG);
1966 if(cc<0)
1967 emit_loadreg(CCREG,2);
1968 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n].d+1),2);
1969 // returns new cycle_count
1970 emit_call(handler);
1971 emit_addimm(0,-CLOCK_ADJUST((int)stubs[n].d+1),cc<0?2:cc);
1972 if(cc<0)
1973 emit_storereg(CCREG,2);
1974 if(restore_jump)
1975 set_jump_target(restore_jump, out);
1976 restore_regs(reglist);
1977 emit_jmp(stubs[n].retaddr);
1978}
1979
1980static void inline_writestub(enum stub_type type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
1981{
1982 int rs=get_reg(regmap,-1);
1983 int rt=get_reg(regmap,target);
1984 assert(rs>=0);
1985 assert(rt>=0);
1986 u_int host_addr=0;
1987 void *handler = get_direct_memhandler(mem_wtab, addr, type, &host_addr);
1988 if (handler == NULL) {
1989 if(addr!=host_addr)
1990 emit_movimm_from(addr,rs,host_addr,rs);
1991 switch(type) {
1992 case STOREB_STUB: emit_writebyte_indexed(rt,0,rs); break;
1993 case STOREH_STUB: emit_writehword_indexed(rt,0,rs); break;
1994 case STOREW_STUB: emit_writeword_indexed(rt,0,rs); break;
1995 default: assert(0);
1996 }
1997 return;
1998 }
1999
2000 // call a memhandler
2001 save_regs(reglist);
2002 pass_args(rs,rt);
2003 int cc=get_reg(regmap,CCREG);
2004 if(cc<0)
2005 emit_loadreg(CCREG,2);
2006 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2);
2007 emit_movimm((u_int)handler,3);
2008 // returns new cycle_count
2009 emit_call(jump_handler_write_h);
2010 emit_addimm(0,-CLOCK_ADJUST(adj+1),cc<0?2:cc);
2011 if(cc<0)
2012 emit_storereg(CCREG,2);
2013 restore_regs(reglist);
2014}
2015
2016static void do_unalignedwritestub(int n)
2017{
2018 assem_debug("do_unalignedwritestub %x\n",start+stubs[n].a*4);
2019 literal_pool(256);
2020 set_jump_target(stubs[n].addr, out);
2021
2022 int i=stubs[n].a;
2023 struct regstat *i_regs=(struct regstat *)stubs[n].c;
2024 int addr=stubs[n].b;
2025 u_int reglist=stubs[n].e;
2026 signed char *i_regmap=i_regs->regmap;
2027 int temp2=get_reg(i_regmap,FTEMP);
2028 int rt;
2029 rt=get_reg(i_regmap,rs2[i]);
2030 assert(rt>=0);
2031 assert(addr>=0);
2032 assert(opcode[i]==0x2a||opcode[i]==0x2e); // SWL/SWR only implemented
2033 reglist|=(1<<addr);
2034 reglist&=~(1<<temp2);
2035
2036#if 1
2037 // don't bother with it and call write handler
2038 save_regs(reglist);
2039 pass_args(addr,rt);
2040 int cc=get_reg(i_regmap,CCREG);
2041 if(cc<0)
2042 emit_loadreg(CCREG,2);
2043 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n].d+1),2);
2044 emit_call((opcode[i]==0x2a?jump_handle_swl:jump_handle_swr));
2045 emit_addimm(0,-CLOCK_ADJUST((int)stubs[n].d+1),cc<0?2:cc);
2046 if(cc<0)
2047 emit_storereg(CCREG,2);
2048 restore_regs(reglist);
2049 emit_jmp(stubs[n].retaddr); // return address
2050#else
2051 emit_andimm(addr,0xfffffffc,temp2);
2052 emit_writeword(temp2,&address);
2053
2054 save_regs(reglist);
2055 emit_shrimm(addr,16,1);
2056 int cc=get_reg(i_regmap,CCREG);
2057 if(cc<0) {
2058 emit_loadreg(CCREG,2);
2059 }
2060 emit_movimm((u_int)readmem,0);
2061 emit_addimm(cc<0?2:cc,2*stubs[n].d+2,2);
2062 emit_call((int)&indirect_jump_indexed);
2063 restore_regs(reglist);
2064
2065 emit_readword(&readmem_dword,temp2);
2066 int temp=addr; //hmh
2067 emit_shlimm(addr,3,temp);
2068 emit_andimm(temp,24,temp);
2069#ifdef BIG_ENDIAN_MIPS
2070 if (opcode[i]==0x2e) // SWR
2071#else
2072 if (opcode[i]==0x2a) // SWL
2073#endif
2074 emit_xorimm(temp,24,temp);
2075 emit_movimm(-1,HOST_TEMPREG);
2076 if (opcode[i]==0x2a) { // SWL
2077 emit_bic_lsr(temp2,HOST_TEMPREG,temp,temp2);
2078 emit_orrshr(rt,temp,temp2);
2079 }else{
2080 emit_bic_lsl(temp2,HOST_TEMPREG,temp,temp2);
2081 emit_orrshl(rt,temp,temp2);
2082 }
2083 emit_readword(&address,addr);
2084 emit_writeword(temp2,&word);
2085 //save_regs(reglist); // don't need to, no state changes
2086 emit_shrimm(addr,16,1);
2087 emit_movimm((u_int)writemem,0);
2088 //emit_call((int)&indirect_jump_indexed);
2089 emit_mov(15,14);
2090 emit_readword_dualindexedx4(0,1,15);
2091 emit_readword(&Count,HOST_TEMPREG);
2092 emit_readword(&next_interupt,2);
2093 emit_addimm(HOST_TEMPREG,-2*stubs[n].d-2,HOST_TEMPREG);
2094 emit_writeword(2,&last_count);
2095 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2096 if(cc<0) {
2097 emit_storereg(CCREG,HOST_TEMPREG);
2098 }
2099 restore_regs(reglist);
2100 emit_jmp(stubs[n].retaddr); // return address
2101#endif
2102}
2103
2104static void do_invstub(int n)
2105{
2106 literal_pool(20);
2107 u_int reglist=stubs[n].a;
2108 set_jump_target(stubs[n].addr, out);
2109 save_regs(reglist);
2110 if(stubs[n].b!=0) emit_mov(stubs[n].b,0);
2111 emit_call(&invalidate_addr);
2112 restore_regs(reglist);
2113 emit_jmp(stubs[n].retaddr); // return address
2114}
2115
2116void *do_dirty_stub(int i)
2117{
2118 assem_debug("do_dirty_stub %x\n",start+i*4);
2119 u_int addr=(u_int)source;
2120 // Careful about the code output here, verify_dirty needs to parse it.
2121 #ifndef HAVE_ARMV7
2122 emit_loadlp(addr,1);
2123 emit_loadlp((int)copy,2);
2124 emit_loadlp(slen*4,3);
2125 #else
2126 emit_movw(addr&0x0000FFFF,1);
2127 emit_movw(((u_int)copy)&0x0000FFFF,2);
2128 emit_movt(addr&0xFFFF0000,1);
2129 emit_movt(((u_int)copy)&0xFFFF0000,2);
2130 emit_movw(slen*4,3);
2131 #endif
2132 emit_movimm(start+i*4,0);
2133 emit_call((int)start<(int)0xC0000000?&verify_code:&verify_code_vm);
2134 void *entry = out;
2135 load_regs_entry(i);
2136 if (entry == out)
2137 entry = instr_addr[i];
2138 emit_jmp(instr_addr[i]);
2139 return entry;
2140}
2141
2142static void do_dirty_stub_ds()
2143{
2144 // Careful about the code output here, verify_dirty needs to parse it.
2145 #ifndef HAVE_ARMV7
2146 emit_loadlp((int)start<(int)0xC0000000?(int)source:(int)start,1);
2147 emit_loadlp((int)copy,2);
2148 emit_loadlp(slen*4,3);
2149 #else
2150 emit_movw(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0x0000FFFF,1);
2151 emit_movw(((u_int)copy)&0x0000FFFF,2);
2152 emit_movt(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0xFFFF0000,1);
2153 emit_movt(((u_int)copy)&0xFFFF0000,2);
2154 emit_movw(slen*4,3);
2155 #endif
2156 emit_movimm(start+1,0);
2157 emit_call(&verify_code_ds);
2158}
2159
2160/* Special assem */
2161
2162static void shift_assemble_arm(int i,struct regstat *i_regs)
2163{
2164 if(rt1[i]) {
2165 if(opcode2[i]<=0x07) // SLLV/SRLV/SRAV
2166 {
2167 signed char s,t,shift;
2168 t=get_reg(i_regs->regmap,rt1[i]);
2169 s=get_reg(i_regs->regmap,rs1[i]);
2170 shift=get_reg(i_regs->regmap,rs2[i]);
2171 if(t>=0){
2172 if(rs1[i]==0)
2173 {
2174 emit_zeroreg(t);
2175 }
2176 else if(rs2[i]==0)
2177 {
2178 assert(s>=0);
2179 if(s!=t) emit_mov(s,t);
2180 }
2181 else
2182 {
2183 emit_andimm(shift,31,HOST_TEMPREG);
2184 if(opcode2[i]==4) // SLLV
2185 {
2186 emit_shl(s,HOST_TEMPREG,t);
2187 }
2188 if(opcode2[i]==6) // SRLV
2189 {
2190 emit_shr(s,HOST_TEMPREG,t);
2191 }
2192 if(opcode2[i]==7) // SRAV
2193 {
2194 emit_sar(s,HOST_TEMPREG,t);
2195 }
2196 }
2197 }
2198 } else { // DSLLV/DSRLV/DSRAV
2199 signed char sh,sl,th,tl,shift;
2200 th=get_reg(i_regs->regmap,rt1[i]|64);
2201 tl=get_reg(i_regs->regmap,rt1[i]);
2202 sh=get_reg(i_regs->regmap,rs1[i]|64);
2203 sl=get_reg(i_regs->regmap,rs1[i]);
2204 shift=get_reg(i_regs->regmap,rs2[i]);
2205 if(tl>=0){
2206 if(rs1[i]==0)
2207 {
2208 emit_zeroreg(tl);
2209 if(th>=0) emit_zeroreg(th);
2210 }
2211 else if(rs2[i]==0)
2212 {
2213 assert(sl>=0);
2214 if(sl!=tl) emit_mov(sl,tl);
2215 if(th>=0&&sh!=th) emit_mov(sh,th);
2216 }
2217 else
2218 {
2219 // FIXME: What if shift==tl ?
2220 assert(shift!=tl);
2221 int temp=get_reg(i_regs->regmap,-1);
2222 int real_th=th;
2223 if(th<0&&opcode2[i]!=0x14) {th=temp;} // DSLLV doesn't need a temporary register
2224 assert(sl>=0);
2225 assert(sh>=0);
2226 emit_andimm(shift,31,HOST_TEMPREG);
2227 if(opcode2[i]==0x14) // DSLLV
2228 {
2229 if(th>=0) emit_shl(sh,HOST_TEMPREG,th);
2230 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
2231 emit_orrshr(sl,HOST_TEMPREG,th);
2232 emit_andimm(shift,31,HOST_TEMPREG);
2233 emit_testimm(shift,32);
2234 emit_shl(sl,HOST_TEMPREG,tl);
2235 if(th>=0) emit_cmovne_reg(tl,th);
2236 emit_cmovne_imm(0,tl);
2237 }
2238 if(opcode2[i]==0x16) // DSRLV
2239 {
2240 assert(th>=0);
2241 emit_shr(sl,HOST_TEMPREG,tl);
2242 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
2243 emit_orrshl(sh,HOST_TEMPREG,tl);
2244 emit_andimm(shift,31,HOST_TEMPREG);
2245 emit_testimm(shift,32);
2246 emit_shr(sh,HOST_TEMPREG,th);
2247 emit_cmovne_reg(th,tl);
2248 if(real_th>=0) emit_cmovne_imm(0,th);
2249 }
2250 if(opcode2[i]==0x17) // DSRAV
2251 {
2252 assert(th>=0);
2253 emit_shr(sl,HOST_TEMPREG,tl);
2254 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
2255 if(real_th>=0) {
2256 assert(temp>=0);
2257 emit_sarimm(th,31,temp);
2258 }
2259 emit_orrshl(sh,HOST_TEMPREG,tl);
2260 emit_andimm(shift,31,HOST_TEMPREG);
2261 emit_testimm(shift,32);
2262 emit_sar(sh,HOST_TEMPREG,th);
2263 emit_cmovne_reg(th,tl);
2264 if(real_th>=0) emit_cmovne_reg(temp,th);
2265 }
2266 }
2267 }
2268 }
2269 }
2270}
2271#define shift_assemble shift_assemble_arm
2272
2273static void loadlr_assemble_arm(int i,struct regstat *i_regs)
2274{
2275 int s,tl,temp,temp2,addr;
2276 int offset;
2277 void *jaddr=0;
2278 int memtarget=0,c=0;
2279 int fastload_reg_override=0;
2280 u_int hr,reglist=0;
2281 tl=get_reg(i_regs->regmap,rt1[i]);
2282 s=get_reg(i_regs->regmap,rs1[i]);
2283 temp=get_reg(i_regs->regmap,-1);
2284 temp2=get_reg(i_regs->regmap,FTEMP);
2285 addr=get_reg(i_regs->regmap,AGEN1+(i&1));
2286 assert(addr<0);
2287 offset=imm[i];
2288 for(hr=0;hr<HOST_REGS;hr++) {
2289 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
2290 }
2291 reglist|=1<<temp;
2292 if(offset||s<0||c) addr=temp2;
2293 else addr=s;
2294 if(s>=0) {
2295 c=(i_regs->wasconst>>s)&1;
2296 if(c) {
2297 memtarget=((signed int)(constmap[i][s]+offset))<(signed int)0x80000000+RAM_SIZE;
2298 }
2299 }
2300 if(!c) {
2301 emit_shlimm(addr,3,temp);
2302 if (opcode[i]==0x22||opcode[i]==0x26) {
2303 emit_andimm(addr,0xFFFFFFFC,temp2); // LWL/LWR
2304 }else{
2305 emit_andimm(addr,0xFFFFFFF8,temp2); // LDL/LDR
2306 }
2307 jaddr=emit_fastpath_cmp_jump(i,temp2,&fastload_reg_override);
2308 }
2309 else {
2310 if(ram_offset&&memtarget) {
2311 emit_addimm(temp2,ram_offset,HOST_TEMPREG);
2312 fastload_reg_override=HOST_TEMPREG;
2313 }
2314 if (opcode[i]==0x22||opcode[i]==0x26) {
2315 emit_movimm(((constmap[i][s]+offset)<<3)&24,temp); // LWL/LWR
2316 }else{
2317 emit_movimm(((constmap[i][s]+offset)<<3)&56,temp); // LDL/LDR
2318 }
2319 }
2320 if (opcode[i]==0x22||opcode[i]==0x26) { // LWL/LWR
2321 if(!c||memtarget) {
2322 int a=temp2;
2323 if(fastload_reg_override) a=fastload_reg_override;
2324 emit_readword_indexed(0,a,temp2);
2325 if(jaddr) add_stub_r(LOADW_STUB,jaddr,out,i,temp2,i_regs,ccadj[i],reglist);
2326 }
2327 else
2328 inline_readstub(LOADW_STUB,i,(constmap[i][s]+offset)&0xFFFFFFFC,i_regs->regmap,FTEMP,ccadj[i],reglist);
2329 if(rt1[i]) {
2330 assert(tl>=0);
2331 emit_andimm(temp,24,temp);
2332#ifdef BIG_ENDIAN_MIPS
2333 if (opcode[i]==0x26) // LWR
2334#else
2335 if (opcode[i]==0x22) // LWL
2336#endif
2337 emit_xorimm(temp,24,temp);
2338 emit_movimm(-1,HOST_TEMPREG);
2339 if (opcode[i]==0x26) {
2340 emit_shr(temp2,temp,temp2);
2341 emit_bic_lsr(tl,HOST_TEMPREG,temp,tl);
2342 }else{
2343 emit_shl(temp2,temp,temp2);
2344 emit_bic_lsl(tl,HOST_TEMPREG,temp,tl);
2345 }
2346 emit_or(temp2,tl,tl);
2347 }
2348 //emit_storereg(rt1[i],tl); // DEBUG
2349 }
2350 if (opcode[i]==0x1A||opcode[i]==0x1B) { // LDL/LDR
2351 assert(0);
2352 }
2353}
2354#define loadlr_assemble loadlr_assemble_arm
2355
2356static void c2op_prologue(u_int op,u_int reglist)
2357{
2358 save_regs_all(reglist);
2359#ifdef PCNT
2360 emit_movimm(op,0);
2361 emit_call((int)pcnt_gte_start);
2362#endif
2363 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0); // cop2 regs
2364}
2365
2366static void c2op_epilogue(u_int op,u_int reglist)
2367{
2368#ifdef PCNT
2369 emit_movimm(op,0);
2370 emit_call((int)pcnt_gte_end);
2371#endif
2372 restore_regs_all(reglist);
2373}
2374
2375static void c2op_call_MACtoIR(int lm,int need_flags)
2376{
2377 if(need_flags)
2378 emit_call(lm?gteMACtoIR_lm1:gteMACtoIR_lm0);
2379 else
2380 emit_call(lm?gteMACtoIR_lm1_nf:gteMACtoIR_lm0_nf);
2381}
2382
2383static void c2op_call_rgb_func(void *func,int lm,int need_ir,int need_flags)
2384{
2385 emit_call(func);
2386 // func is C code and trashes r0
2387 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
2388 if(need_flags||need_ir)
2389 c2op_call_MACtoIR(lm,need_flags);
2390 emit_call(need_flags?gteMACtoRGB:gteMACtoRGB_nf);
2391}
2392
2393static void c2op_assemble(int i,struct regstat *i_regs)
2394{
2395 u_int c2op=source[i]&0x3f;
2396 u_int hr,reglist_full=0,reglist;
2397 int need_flags,need_ir;
2398 for(hr=0;hr<HOST_REGS;hr++) {
2399 if(i_regs->regmap[hr]>=0) reglist_full|=1<<hr;
2400 }
2401 reglist=reglist_full&CALLER_SAVE_REGS;
2402
2403 if (gte_handlers[c2op]!=NULL) {
2404 need_flags=!(gte_unneeded[i+1]>>63); // +1 because of how liveness detection works
2405 need_ir=(gte_unneeded[i+1]&0xe00)!=0xe00;
2406 assem_debug("gte op %08x, unneeded %016llx, need_flags %d, need_ir %d\n",
2407 source[i],gte_unneeded[i+1],need_flags,need_ir);
2408 if(new_dynarec_hacks&NDHACK_GTE_NO_FLAGS)
2409 need_flags=0;
2410 int shift = (source[i] >> 19) & 1;
2411 int lm = (source[i] >> 10) & 1;
2412 switch(c2op) {
2413#ifndef DRC_DBG
2414 case GTE_MVMVA: {
2415#ifdef HAVE_ARMV5
2416 int v = (source[i] >> 15) & 3;
2417 int cv = (source[i] >> 13) & 3;
2418 int mx = (source[i] >> 17) & 3;
2419 reglist=reglist_full&(CALLER_SAVE_REGS|0xf0); // +{r4-r7}
2420 c2op_prologue(c2op,reglist);
2421 /* r4,r5 = VXYZ(v) packed; r6 = &MX11(mx); r7 = &CV1(cv) */
2422 if(v<3)
2423 emit_ldrd(v*8,0,4);
2424 else {
2425 emit_movzwl_indexed(9*4,0,4); // gteIR
2426 emit_movzwl_indexed(10*4,0,6);
2427 emit_movzwl_indexed(11*4,0,5);
2428 emit_orrshl_imm(6,16,4);
2429 }
2430 if(mx<3)
2431 emit_addimm(0,32*4+mx*8*4,6);
2432 else
2433 emit_readword(&zeromem_ptr,6);
2434 if(cv<3)
2435 emit_addimm(0,32*4+(cv*8+5)*4,7);
2436 else
2437 emit_readword(&zeromem_ptr,7);
2438#ifdef __ARM_NEON__
2439 emit_movimm(source[i],1); // opcode
2440 emit_call(gteMVMVA_part_neon);
2441 if(need_flags) {
2442 emit_movimm(lm,1);
2443 emit_call(gteMACtoIR_flags_neon);
2444 }
2445#else
2446 if(cv==3&&shift)
2447 emit_call((int)gteMVMVA_part_cv3sh12_arm);
2448 else {
2449 emit_movimm(shift,1);
2450 emit_call((int)(need_flags?gteMVMVA_part_arm:gteMVMVA_part_nf_arm));
2451 }
2452 if(need_flags||need_ir)
2453 c2op_call_MACtoIR(lm,need_flags);
2454#endif
2455#else /* if not HAVE_ARMV5 */
2456 c2op_prologue(c2op,reglist);
2457 emit_movimm(source[i],1); // opcode
2458 emit_writeword(1,&psxRegs.code);
2459 emit_call((int)(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]));
2460#endif
2461 break;
2462 }
2463 case GTE_OP:
2464 c2op_prologue(c2op,reglist);
2465 emit_call(shift?gteOP_part_shift:gteOP_part_noshift);
2466 if(need_flags||need_ir) {
2467 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
2468 c2op_call_MACtoIR(lm,need_flags);
2469 }
2470 break;
2471 case GTE_DPCS:
2472 c2op_prologue(c2op,reglist);
2473 c2op_call_rgb_func(shift?gteDPCS_part_shift:gteDPCS_part_noshift,lm,need_ir,need_flags);
2474 break;
2475 case GTE_INTPL:
2476 c2op_prologue(c2op,reglist);
2477 c2op_call_rgb_func(shift?gteINTPL_part_shift:gteINTPL_part_noshift,lm,need_ir,need_flags);
2478 break;
2479 case GTE_SQR:
2480 c2op_prologue(c2op,reglist);
2481 emit_call(shift?gteSQR_part_shift:gteSQR_part_noshift);
2482 if(need_flags||need_ir) {
2483 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
2484 c2op_call_MACtoIR(lm,need_flags);
2485 }
2486 break;
2487 case GTE_DCPL:
2488 c2op_prologue(c2op,reglist);
2489 c2op_call_rgb_func(gteDCPL_part,lm,need_ir,need_flags);
2490 break;
2491 case GTE_GPF:
2492 c2op_prologue(c2op,reglist);
2493 c2op_call_rgb_func(shift?gteGPF_part_shift:gteGPF_part_noshift,lm,need_ir,need_flags);
2494 break;
2495 case GTE_GPL:
2496 c2op_prologue(c2op,reglist);
2497 c2op_call_rgb_func(shift?gteGPL_part_shift:gteGPL_part_noshift,lm,need_ir,need_flags);
2498 break;
2499#endif
2500 default:
2501 c2op_prologue(c2op,reglist);
2502#ifdef DRC_DBG
2503 emit_movimm(source[i],1); // opcode
2504 emit_writeword(1,&psxRegs.code);
2505#endif
2506 emit_call(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]);
2507 break;
2508 }
2509 c2op_epilogue(c2op,reglist);
2510 }
2511}
2512
2513static void multdiv_assemble_arm(int i,struct regstat *i_regs)
2514{
2515 // case 0x18: MULT
2516 // case 0x19: MULTU
2517 // case 0x1A: DIV
2518 // case 0x1B: DIVU
2519 // case 0x1C: DMULT
2520 // case 0x1D: DMULTU
2521 // case 0x1E: DDIV
2522 // case 0x1F: DDIVU
2523 if(rs1[i]&&rs2[i])
2524 {
2525 if((opcode2[i]&4)==0) // 32-bit
2526 {
2527 if(opcode2[i]==0x18) // MULT
2528 {
2529 signed char m1=get_reg(i_regs->regmap,rs1[i]);
2530 signed char m2=get_reg(i_regs->regmap,rs2[i]);
2531 signed char hi=get_reg(i_regs->regmap,HIREG);
2532 signed char lo=get_reg(i_regs->regmap,LOREG);
2533 assert(m1>=0);
2534 assert(m2>=0);
2535 assert(hi>=0);
2536 assert(lo>=0);
2537 emit_smull(m1,m2,hi,lo);
2538 }
2539 if(opcode2[i]==0x19) // MULTU
2540 {
2541 signed char m1=get_reg(i_regs->regmap,rs1[i]);
2542 signed char m2=get_reg(i_regs->regmap,rs2[i]);
2543 signed char hi=get_reg(i_regs->regmap,HIREG);
2544 signed char lo=get_reg(i_regs->regmap,LOREG);
2545 assert(m1>=0);
2546 assert(m2>=0);
2547 assert(hi>=0);
2548 assert(lo>=0);
2549 emit_umull(m1,m2,hi,lo);
2550 }
2551 if(opcode2[i]==0x1A) // DIV
2552 {
2553 signed char d1=get_reg(i_regs->regmap,rs1[i]);
2554 signed char d2=get_reg(i_regs->regmap,rs2[i]);
2555 assert(d1>=0);
2556 assert(d2>=0);
2557 signed char quotient=get_reg(i_regs->regmap,LOREG);
2558 signed char remainder=get_reg(i_regs->regmap,HIREG);
2559 assert(quotient>=0);
2560 assert(remainder>=0);
2561 emit_movs(d1,remainder);
2562 emit_movimm(0xffffffff,quotient);
2563 emit_negmi(quotient,quotient); // .. quotient and ..
2564 emit_negmi(remainder,remainder); // .. remainder for div0 case (will be negated back after jump)
2565 emit_movs(d2,HOST_TEMPREG);
2566 emit_jeq((int)out+52); // Division by zero
2567 emit_negsmi(HOST_TEMPREG,HOST_TEMPREG);
2568#ifdef HAVE_ARMV5
2569 emit_clz(HOST_TEMPREG,quotient);
2570 emit_shl(HOST_TEMPREG,quotient,HOST_TEMPREG);
2571#else
2572 emit_movimm(0,quotient);
2573 emit_addpl_imm(quotient,1,quotient);
2574 emit_lslpls_imm(HOST_TEMPREG,1,HOST_TEMPREG);
2575 emit_jns((int)out-2*4);
2576#endif
2577 emit_orimm(quotient,1<<31,quotient);
2578 emit_shr(quotient,quotient,quotient);
2579 emit_cmp(remainder,HOST_TEMPREG);
2580 emit_subcs(remainder,HOST_TEMPREG,remainder);
2581 emit_adcs(quotient,quotient,quotient);
2582 emit_shrimm(HOST_TEMPREG,1,HOST_TEMPREG);
2583 emit_jcc(out-16); // -4
2584 emit_teq(d1,d2);
2585 emit_negmi(quotient,quotient);
2586 emit_test(d1,d1);
2587 emit_negmi(remainder,remainder);
2588 }
2589 if(opcode2[i]==0x1B) // DIVU
2590 {
2591 signed char d1=get_reg(i_regs->regmap,rs1[i]); // dividend
2592 signed char d2=get_reg(i_regs->regmap,rs2[i]); // divisor
2593 assert(d1>=0);
2594 assert(d2>=0);
2595 signed char quotient=get_reg(i_regs->regmap,LOREG);
2596 signed char remainder=get_reg(i_regs->regmap,HIREG);
2597 assert(quotient>=0);
2598 assert(remainder>=0);
2599 emit_mov(d1,remainder);
2600 emit_movimm(0xffffffff,quotient); // div0 case
2601 emit_test(d2,d2);
2602 emit_jeq((int)out+40); // Division by zero
2603#ifdef HAVE_ARMV5
2604 emit_clz(d2,HOST_TEMPREG);
2605 emit_movimm(1<<31,quotient);
2606 emit_shl(d2,HOST_TEMPREG,d2);
2607#else
2608 emit_movimm(0,HOST_TEMPREG);
2609 emit_addpl_imm(HOST_TEMPREG,1,HOST_TEMPREG);
2610 emit_lslpls_imm(d2,1,d2);
2611 emit_jns((int)out-2*4);
2612 emit_movimm(1<<31,quotient);
2613#endif
2614 emit_shr(quotient,HOST_TEMPREG,quotient);
2615 emit_cmp(remainder,d2);
2616 emit_subcs(remainder,d2,remainder);
2617 emit_adcs(quotient,quotient,quotient);
2618 emit_shrcc_imm(d2,1,d2);
2619 emit_jcc(out-16); // -4
2620 }
2621 }
2622 else // 64-bit
2623 assert(0);
2624 }
2625 else
2626 {
2627 // Multiply by zero is zero.
2628 // MIPS does not have a divide by zero exception.
2629 // The result is undefined, we return zero.
2630 signed char hr=get_reg(i_regs->regmap,HIREG);
2631 signed char lr=get_reg(i_regs->regmap,LOREG);
2632 if(hr>=0) emit_zeroreg(hr);
2633 if(lr>=0) emit_zeroreg(lr);
2634 }
2635}
2636#define multdiv_assemble multdiv_assemble_arm
2637
2638static void do_preload_rhash(int r) {
2639 // Don't need this for ARM. On x86, this puts the value 0xf8 into the
2640 // register. On ARM the hash can be done with a single instruction (below)
2641}
2642
2643static void do_preload_rhtbl(int ht) {
2644 emit_addimm(FP,(int)&mini_ht-(int)&dynarec_local,ht);
2645}
2646
2647static void do_rhash(int rs,int rh) {
2648 emit_andimm(rs,0xf8,rh);
2649}
2650
2651static void do_miniht_load(int ht,int rh) {
2652 assem_debug("ldr %s,[%s,%s]!\n",regname[rh],regname[ht],regname[rh]);
2653 output_w32(0xe7b00000|rd_rn_rm(rh,ht,rh));
2654}
2655
2656static void do_miniht_jump(int rs,int rh,int ht) {
2657 emit_cmp(rh,rs);
2658 emit_ldreq_indexed(ht,4,15);
2659 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
2660 emit_mov(rs,7);
2661 emit_jmp(jump_vaddr_reg[7]);
2662 #else
2663 emit_jmp(jump_vaddr_reg[rs]);
2664 #endif
2665}
2666
2667static void do_miniht_insert(u_int return_address,int rt,int temp) {
2668 #ifndef HAVE_ARMV7
2669 emit_movimm(return_address,rt); // PC into link register
2670 add_to_linker(out,return_address,1);
2671 emit_pcreladdr(temp);
2672 emit_writeword(rt,&mini_ht[(return_address&0xFF)>>3][0]);
2673 emit_writeword(temp,&mini_ht[(return_address&0xFF)>>3][1]);
2674 #else
2675 emit_movw(return_address&0x0000FFFF,rt);
2676 add_to_linker(out,return_address,1);
2677 emit_pcreladdr(temp);
2678 emit_writeword(temp,&mini_ht[(return_address&0xFF)>>3][1]);
2679 emit_movt(return_address&0xFFFF0000,rt);
2680 emit_writeword(rt,&mini_ht[(return_address&0xFF)>>3][0]);
2681 #endif
2682}
2683
2684static void mark_clear_cache(void *target)
2685{
2686 u_long offset = (u_char *)target - translation_cache;
2687 u_int mask = 1u << ((offset >> 12) & 31);
2688 if (!(needs_clear_cache[offset >> 17] & mask)) {
2689 char *start = (char *)((u_long)target & ~4095ul);
2690 start_tcache_write(start, start + 4096);
2691 needs_clear_cache[offset >> 17] |= mask;
2692 }
2693}
2694
2695// Clearing the cache is rather slow on ARM Linux, so mark the areas
2696// that need to be cleared, and then only clear these areas once.
2697static void do_clear_cache()
2698{
2699 int i,j;
2700 for (i=0;i<(1<<(TARGET_SIZE_2-17));i++)
2701 {
2702 u_int bitmap=needs_clear_cache[i];
2703 if(bitmap) {
2704 u_char *start, *end;
2705 for(j=0;j<32;j++)
2706 {
2707 if(bitmap&(1<<j)) {
2708 start=translation_cache+i*131072+j*4096;
2709 end=start+4095;
2710 j++;
2711 while(j<32) {
2712 if(bitmap&(1<<j)) {
2713 end+=4096;
2714 j++;
2715 }else{
2716 end_tcache_write(start, end);
2717 break;
2718 }
2719 }
2720 }
2721 }
2722 needs_clear_cache[i]=0;
2723 }
2724 }
2725}
2726
2727// CPU-architecture-specific initialization
2728static void arch_init() {
2729}
2730
2731// vim:shiftwidth=2:expandtab