drc: get rid of SPAN
[pcsx_rearmed.git] / libpcsxcore / new_dynarec / assem_arm.c
... / ...
CommitLineData
1/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2 * Mupen64plus/PCSX - assem_arm.c *
3 * Copyright (C) 2009-2011 Ari64 *
4 * Copyright (C) 2010-2021 GraÅžvydas "notaz" Ignotas *
5 * *
6 * This program is free software; you can redistribute it and/or modify *
7 * it under the terms of the GNU General Public License as published by *
8 * the Free Software Foundation; either version 2 of the License, or *
9 * (at your option) any later version. *
10 * *
11 * This program is distributed in the hope that it will be useful, *
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
14 * GNU General Public License for more details. *
15 * *
16 * You should have received a copy of the GNU General Public License *
17 * along with this program; if not, write to the *
18 * Free Software Foundation, Inc., *
19 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
20 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
21
22#define FLAGLESS
23#include "../gte.h"
24#undef FLAGLESS
25#include "../gte_arm.h"
26#include "../gte_neon.h"
27#include "pcnt.h"
28#include "arm_features.h"
29
30#define unused __attribute__((unused))
31
32#ifdef DRC_DBG
33#pragma GCC diagnostic ignored "-Wunused-function"
34#pragma GCC diagnostic ignored "-Wunused-variable"
35#pragma GCC diagnostic ignored "-Wunused-but-set-variable"
36#endif
37
38void indirect_jump_indexed();
39void indirect_jump();
40void do_interrupt();
41void jump_vaddr_r0();
42void jump_vaddr_r1();
43void jump_vaddr_r2();
44void jump_vaddr_r3();
45void jump_vaddr_r4();
46void jump_vaddr_r5();
47void jump_vaddr_r6();
48void jump_vaddr_r7();
49void jump_vaddr_r8();
50void jump_vaddr_r9();
51void jump_vaddr_r10();
52void jump_vaddr_r12();
53
54void * const jump_vaddr_reg[16] = {
55 jump_vaddr_r0,
56 jump_vaddr_r1,
57 jump_vaddr_r2,
58 jump_vaddr_r3,
59 jump_vaddr_r4,
60 jump_vaddr_r5,
61 jump_vaddr_r6,
62 jump_vaddr_r7,
63 jump_vaddr_r8,
64 jump_vaddr_r9,
65 jump_vaddr_r10,
66 0,
67 jump_vaddr_r12,
68 0,
69 0,
70 0
71};
72
73void invalidate_addr_r0();
74void invalidate_addr_r1();
75void invalidate_addr_r2();
76void invalidate_addr_r3();
77void invalidate_addr_r4();
78void invalidate_addr_r5();
79void invalidate_addr_r6();
80void invalidate_addr_r7();
81void invalidate_addr_r8();
82void invalidate_addr_r9();
83void invalidate_addr_r10();
84void invalidate_addr_r12();
85
86const u_int invalidate_addr_reg[16] = {
87 (int)invalidate_addr_r0,
88 (int)invalidate_addr_r1,
89 (int)invalidate_addr_r2,
90 (int)invalidate_addr_r3,
91 (int)invalidate_addr_r4,
92 (int)invalidate_addr_r5,
93 (int)invalidate_addr_r6,
94 (int)invalidate_addr_r7,
95 (int)invalidate_addr_r8,
96 (int)invalidate_addr_r9,
97 (int)invalidate_addr_r10,
98 0,
99 (int)invalidate_addr_r12,
100 0,
101 0,
102 0};
103
104/* Linker */
105
106static void set_jump_target(void *addr, void *target_)
107{
108 u_int target = (u_int)target_;
109 u_char *ptr = addr;
110 u_int *ptr2=(u_int *)ptr;
111 if(ptr[3]==0xe2) {
112 assert((target-(u_int)ptr2-8)<1024);
113 assert(((uintptr_t)addr&3)==0);
114 assert((target&3)==0);
115 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
116 //printf("target=%x addr=%p insn=%x\n",target,addr,*ptr2);
117 }
118 else if(ptr[3]==0x72) {
119 // generated by emit_jno_unlikely
120 if((target-(u_int)ptr2-8)<1024) {
121 assert(((uintptr_t)addr&3)==0);
122 assert((target&3)==0);
123 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
124 }
125 else if((target-(u_int)ptr2-8)<4096&&!((target-(u_int)ptr2-8)&15)) {
126 assert(((uintptr_t)addr&3)==0);
127 assert((target&3)==0);
128 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>4)|0xE00;
129 }
130 else *ptr2=(0x7A000000)|(((target-(u_int)ptr2-8)<<6)>>8);
131 }
132 else {
133 assert((ptr[3]&0x0e)==0xa);
134 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
135 }
136}
137
138// This optionally copies the instruction from the target of the branch into
139// the space before the branch. Works, but the difference in speed is
140// usually insignificant.
141#if 0
142static void set_jump_target_fillslot(int addr,u_int target,int copy)
143{
144 u_char *ptr=(u_char *)addr;
145 u_int *ptr2=(u_int *)ptr;
146 assert(!copy||ptr2[-1]==0xe28dd000);
147 if(ptr[3]==0xe2) {
148 assert(!copy);
149 assert((target-(u_int)ptr2-8)<4096);
150 *ptr2=(*ptr2&0xFFFFF000)|(target-(u_int)ptr2-8);
151 }
152 else {
153 assert((ptr[3]&0x0e)==0xa);
154 u_int target_insn=*(u_int *)target;
155 if((target_insn&0x0e100000)==0) { // ALU, no immediate, no flags
156 copy=0;
157 }
158 if((target_insn&0x0c100000)==0x04100000) { // Load
159 copy=0;
160 }
161 if(target_insn&0x08000000) {
162 copy=0;
163 }
164 if(copy) {
165 ptr2[-1]=target_insn;
166 target+=4;
167 }
168 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
169 }
170}
171#endif
172
173/* Literal pool */
174static void add_literal(int addr,int val)
175{
176 assert(literalcount<sizeof(literals)/sizeof(literals[0]));
177 literals[literalcount][0]=addr;
178 literals[literalcount][1]=val;
179 literalcount++;
180}
181
182// from a pointer to external jump stub (which was produced by emit_extjump2)
183// find where the jumping insn is
184static void *find_extjump_insn(void *stub)
185{
186 int *ptr=(int *)(stub+4);
187 assert((*ptr&0x0fff0000)==0x059f0000); // ldr rx, [pc, #ofs]
188 u_int offset=*ptr&0xfff;
189 void **l_ptr=(void *)ptr+offset+8;
190 return *l_ptr;
191}
192
193// find where external branch is liked to using addr of it's stub:
194// get address that insn one after stub loads (dyna_linker arg1),
195// treat it as a pointer to branch insn,
196// return addr where that branch jumps to
197static void *get_pointer(void *stub)
198{
199 //printf("get_pointer(%x)\n",(int)stub);
200 int *i_ptr=find_extjump_insn(stub);
201 assert((*i_ptr&0x0f000000)==0x0a000000); // b
202 return (u_char *)i_ptr+((*i_ptr<<8)>>6)+8;
203}
204
205// Find the "clean" entry point from a "dirty" entry point
206// by skipping past the call to verify_code
207static void *get_clean_addr(void *addr)
208{
209 signed int *ptr = addr;
210 #ifndef HAVE_ARMV7
211 ptr+=4;
212 #else
213 ptr+=6;
214 #endif
215 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
216 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
217 ptr++;
218 if((*ptr&0xFF000000)==0xea000000) {
219 return (char *)ptr+((*ptr<<8)>>6)+8; // follow jump
220 }
221 return ptr;
222}
223
224static int verify_dirty(const u_int *ptr)
225{
226 #ifndef HAVE_ARMV7
227 u_int offset;
228 // get from literal pool
229 assert((*ptr&0xFFFF0000)==0xe59f0000);
230 offset=*ptr&0xfff;
231 u_int source=*(u_int*)((void *)ptr+offset+8);
232 ptr++;
233 assert((*ptr&0xFFFF0000)==0xe59f0000);
234 offset=*ptr&0xfff;
235 u_int copy=*(u_int*)((void *)ptr+offset+8);
236 ptr++;
237 assert((*ptr&0xFFFF0000)==0xe59f0000);
238 offset=*ptr&0xfff;
239 u_int len=*(u_int*)((void *)ptr+offset+8);
240 ptr++;
241 ptr++;
242 #else
243 // ARMv7 movw/movt
244 assert((*ptr&0xFFF00000)==0xe3000000);
245 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
246 u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
247 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
248 ptr+=6;
249 #endif
250 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
251 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
252 //printf("verify_dirty: %x %x %x\n",source,copy,len);
253 return !memcmp((void *)source,(void *)copy,len);
254}
255
256// This doesn't necessarily find all clean entry points, just
257// guarantees that it's not dirty
258static int isclean(void *addr)
259{
260 #ifndef HAVE_ARMV7
261 u_int *ptr=((u_int *)addr)+4;
262 #else
263 u_int *ptr=((u_int *)addr)+6;
264 #endif
265 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
266 if((*ptr&0xFF000000)!=0xeb000000) return 1; // bl instruction
267 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code) return 0;
268 return 1;
269}
270
271// get source that block at addr was compiled from (host pointers)
272static void get_bounds(void *addr, u_char **start, u_char **end)
273{
274 u_int *ptr = addr;
275 #ifndef HAVE_ARMV7
276 u_int offset;
277 // get from literal pool
278 assert((*ptr&0xFFFF0000)==0xe59f0000);
279 offset=*ptr&0xfff;
280 u_int source=*(u_int*)((void *)ptr+offset+8);
281 ptr++;
282 //assert((*ptr&0xFFFF0000)==0xe59f0000);
283 //offset=*ptr&0xfff;
284 //u_int copy=*(u_int*)((void *)ptr+offset+8);
285 ptr++;
286 assert((*ptr&0xFFFF0000)==0xe59f0000);
287 offset=*ptr&0xfff;
288 u_int len=*(u_int*)((void *)ptr+offset+8);
289 ptr++;
290 ptr++;
291 #else
292 // ARMv7 movw/movt
293 assert((*ptr&0xFFF00000)==0xe3000000);
294 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
295 //u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
296 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
297 ptr+=6;
298 #endif
299 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
300 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
301 *start=(u_char *)source;
302 *end=(u_char *)source+len;
303}
304
305// Allocate a specific ARM register.
306static void alloc_arm_reg(struct regstat *cur,int i,signed char reg,int hr)
307{
308 int n;
309 int dirty=0;
310
311 // see if it's already allocated (and dealloc it)
312 for(n=0;n<HOST_REGS;n++)
313 {
314 if(n!=EXCLUDE_REG&&cur->regmap[n]==reg) {
315 dirty=(cur->dirty>>n)&1;
316 cur->regmap[n]=-1;
317 }
318 }
319
320 cur->regmap[hr]=reg;
321 cur->dirty&=~(1<<hr);
322 cur->dirty|=dirty<<hr;
323 cur->isconst&=~(1<<hr);
324}
325
326// Alloc cycle count into dedicated register
327static void alloc_cc(struct regstat *cur,int i)
328{
329 alloc_arm_reg(cur,i,CCREG,HOST_CCREG);
330}
331
332/* Assembler */
333
334static unused char regname[16][4] = {
335 "r0",
336 "r1",
337 "r2",
338 "r3",
339 "r4",
340 "r5",
341 "r6",
342 "r7",
343 "r8",
344 "r9",
345 "r10",
346 "fp",
347 "r12",
348 "sp",
349 "lr",
350 "pc"};
351
352static void output_w32(u_int word)
353{
354 *((u_int *)out)=word;
355 out+=4;
356}
357
358static u_int rd_rn_rm(u_int rd, u_int rn, u_int rm)
359{
360 assert(rd<16);
361 assert(rn<16);
362 assert(rm<16);
363 return((rn<<16)|(rd<<12)|rm);
364}
365
366static u_int rd_rn_imm_shift(u_int rd, u_int rn, u_int imm, u_int shift)
367{
368 assert(rd<16);
369 assert(rn<16);
370 assert(imm<256);
371 assert((shift&1)==0);
372 return((rn<<16)|(rd<<12)|(((32-shift)&30)<<7)|imm);
373}
374
375static u_int genimm(u_int imm,u_int *encoded)
376{
377 *encoded=0;
378 if(imm==0) return 1;
379 int i=32;
380 while(i>0)
381 {
382 if(imm<256) {
383 *encoded=((i&30)<<7)|imm;
384 return 1;
385 }
386 imm=(imm>>2)|(imm<<30);i-=2;
387 }
388 return 0;
389}
390
391static void genimm_checked(u_int imm,u_int *encoded)
392{
393 u_int ret=genimm(imm,encoded);
394 assert(ret);
395 (void)ret;
396}
397
398static u_int genjmp(u_int addr)
399{
400 if (addr < 3) return 0; // a branch that will be patched later
401 int offset = addr-(int)out-8;
402 if (offset < -33554432 || offset >= 33554432) {
403 SysPrintf("genjmp: out of range: %08x\n", offset);
404 abort();
405 return 0;
406 }
407 return ((u_int)offset>>2)&0xffffff;
408}
409
410static unused void emit_breakpoint(void)
411{
412 assem_debug("bkpt #0\n");
413 //output_w32(0xe1200070);
414 output_w32(0xe7f001f0);
415}
416
417static void emit_mov(int rs,int rt)
418{
419 assem_debug("mov %s,%s\n",regname[rt],regname[rs]);
420 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs));
421}
422
423static void emit_movs(int rs,int rt)
424{
425 assem_debug("movs %s,%s\n",regname[rt],regname[rs]);
426 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs));
427}
428
429static void emit_add(int rs1,int rs2,int rt)
430{
431 assem_debug("add %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
432 output_w32(0xe0800000|rd_rn_rm(rt,rs1,rs2));
433}
434
435static void emit_adds(int rs1,int rs2,int rt)
436{
437 assem_debug("adds %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
438 output_w32(0xe0900000|rd_rn_rm(rt,rs1,rs2));
439}
440#define emit_adds_ptr emit_adds
441
442static void emit_adcs(int rs1,int rs2,int rt)
443{
444 assem_debug("adcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
445 output_w32(0xe0b00000|rd_rn_rm(rt,rs1,rs2));
446}
447
448static void emit_neg(int rs, int rt)
449{
450 assem_debug("rsb %s,%s,#0\n",regname[rt],regname[rs]);
451 output_w32(0xe2600000|rd_rn_rm(rt,rs,0));
452}
453
454static void emit_sub(int rs1,int rs2,int rt)
455{
456 assem_debug("sub %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
457 output_w32(0xe0400000|rd_rn_rm(rt,rs1,rs2));
458}
459
460static void emit_zeroreg(int rt)
461{
462 assem_debug("mov %s,#0\n",regname[rt]);
463 output_w32(0xe3a00000|rd_rn_rm(rt,0,0));
464}
465
466static void emit_loadlp(u_int imm,u_int rt)
467{
468 add_literal((int)out,imm);
469 assem_debug("ldr %s,pc+? [=%x]\n",regname[rt],imm);
470 output_w32(0xe5900000|rd_rn_rm(rt,15,0));
471}
472
473#ifdef HAVE_ARMV7
474static void emit_movw(u_int imm,u_int rt)
475{
476 assert(imm<65536);
477 assem_debug("movw %s,#%d (0x%x)\n",regname[rt],imm,imm);
478 output_w32(0xe3000000|rd_rn_rm(rt,0,0)|(imm&0xfff)|((imm<<4)&0xf0000));
479}
480
481static void emit_movt(u_int imm,u_int rt)
482{
483 assem_debug("movt %s,#%d (0x%x)\n",regname[rt],imm&0xffff0000,imm&0xffff0000);
484 output_w32(0xe3400000|rd_rn_rm(rt,0,0)|((imm>>16)&0xfff)|((imm>>12)&0xf0000));
485}
486#endif
487
488static void emit_movimm(u_int imm,u_int rt)
489{
490 u_int armval;
491 if(genimm(imm,&armval)) {
492 assem_debug("mov %s,#%d\n",regname[rt],imm);
493 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
494 }else if(genimm(~imm,&armval)) {
495 assem_debug("mvn %s,#%d\n",regname[rt],imm);
496 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
497 }else if(imm<65536) {
498 #ifndef HAVE_ARMV7
499 assem_debug("mov %s,#%d\n",regname[rt],imm&0xFF00);
500 output_w32(0xe3a00000|rd_rn_imm_shift(rt,0,imm>>8,8));
501 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
502 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
503 #else
504 emit_movw(imm,rt);
505 #endif
506 }else{
507 #ifndef HAVE_ARMV7
508 emit_loadlp(imm,rt);
509 #else
510 emit_movw(imm&0x0000FFFF,rt);
511 emit_movt(imm&0xFFFF0000,rt);
512 #endif
513 }
514}
515
516static void emit_pcreladdr(u_int rt)
517{
518 assem_debug("add %s,pc,#?\n",regname[rt]);
519 output_w32(0xe2800000|rd_rn_rm(rt,15,0));
520}
521
522static void emit_loadreg(int r, int hr)
523{
524 assert(hr != EXCLUDE_REG);
525 if (r == 0)
526 emit_zeroreg(hr);
527 else {
528 void *addr;
529 switch (r) {
530 //case HIREG: addr = &hi; break;
531 //case LOREG: addr = &lo; break;
532 case CCREG: addr = &cycle_count; break;
533 case CSREG: addr = &Status; break;
534 case INVCP: addr = &invc_ptr; break;
535 case ROREG: addr = &ram_offset; break;
536 default:
537 assert(r < 34);
538 addr = &psxRegs.GPR.r[r];
539 break;
540 }
541 u_int offset = (u_char *)addr - (u_char *)&dynarec_local;
542 assert(offset<4096);
543 assem_debug("ldr %s,fp+%d # r%d\n",regname[hr],offset,r);
544 output_w32(0xe5900000|rd_rn_rm(hr,FP,0)|offset);
545 }
546}
547
548static void emit_storereg(int r, int hr)
549{
550 assert(hr != EXCLUDE_REG);
551 int addr = (int)&psxRegs.GPR.r[r];
552 switch (r) {
553 //case HIREG: addr = &hi; break;
554 //case LOREG: addr = &lo; break;
555 case CCREG: addr = (int)&cycle_count; break;
556 default: assert(r < 34); break;
557 }
558 u_int offset = addr-(u_int)&dynarec_local;
559 assert(offset<4096);
560 assem_debug("str %s,fp+%d # r%d\n",regname[hr],offset,r);
561 output_w32(0xe5800000|rd_rn_rm(hr,FP,0)|offset);
562}
563
564static void emit_test(int rs, int rt)
565{
566 assem_debug("tst %s,%s\n",regname[rs],regname[rt]);
567 output_w32(0xe1100000|rd_rn_rm(0,rs,rt));
568}
569
570static void emit_testimm(int rs,int imm)
571{
572 u_int armval;
573 assem_debug("tst %s,#%d\n",regname[rs],imm);
574 genimm_checked(imm,&armval);
575 output_w32(0xe3100000|rd_rn_rm(0,rs,0)|armval);
576}
577
578static void emit_testeqimm(int rs,int imm)
579{
580 u_int armval;
581 assem_debug("tsteq %s,$%d\n",regname[rs],imm);
582 genimm_checked(imm,&armval);
583 output_w32(0x03100000|rd_rn_rm(0,rs,0)|armval);
584}
585
586static void emit_not(int rs,int rt)
587{
588 assem_debug("mvn %s,%s\n",regname[rt],regname[rs]);
589 output_w32(0xe1e00000|rd_rn_rm(rt,0,rs));
590}
591
592static void emit_and(u_int rs1,u_int rs2,u_int rt)
593{
594 assem_debug("and %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
595 output_w32(0xe0000000|rd_rn_rm(rt,rs1,rs2));
596}
597
598static void emit_or(u_int rs1,u_int rs2,u_int rt)
599{
600 assem_debug("orr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
601 output_w32(0xe1800000|rd_rn_rm(rt,rs1,rs2));
602}
603
604static void emit_orrshl_imm(u_int rs,u_int imm,u_int rt)
605{
606 assert(rs<16);
607 assert(rt<16);
608 assert(imm<32);
609 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs],imm);
610 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|(imm<<7));
611}
612
613static void emit_orrshr_imm(u_int rs,u_int imm,u_int rt)
614{
615 assert(rs<16);
616 assert(rt<16);
617 assert(imm<32);
618 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs],imm);
619 output_w32(0xe1800020|rd_rn_rm(rt,rt,rs)|(imm<<7));
620}
621
622static void emit_xor(u_int rs1,u_int rs2,u_int rt)
623{
624 assem_debug("eor %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
625 output_w32(0xe0200000|rd_rn_rm(rt,rs1,rs2));
626}
627
628static void emit_xorsar_imm(u_int rs1,u_int rs2,u_int imm,u_int rt)
629{
630 assem_debug("eor %s,%s,%s,asr #%d\n",regname[rt],regname[rs1],regname[rs2],imm);
631 output_w32(0xe0200040|rd_rn_rm(rt,rs1,rs2)|(imm<<7));
632}
633
634static void emit_addimm(u_int rs,int imm,u_int rt)
635{
636 assert(rs<16);
637 assert(rt<16);
638 if(imm!=0) {
639 u_int armval;
640 if(genimm(imm,&armval)) {
641 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],imm);
642 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
643 }else if(genimm(-imm,&armval)) {
644 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],-imm);
645 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
646 #ifdef HAVE_ARMV7
647 }else if(rt!=rs&&(u_int)imm<65536) {
648 emit_movw(imm&0x0000ffff,rt);
649 emit_add(rs,rt,rt);
650 }else if(rt!=rs&&(u_int)-imm<65536) {
651 emit_movw(-imm&0x0000ffff,rt);
652 emit_sub(rs,rt,rt);
653 #endif
654 }else if((u_int)-imm<65536) {
655 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],(-imm)&0xFF00);
656 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
657 output_w32(0xe2400000|rd_rn_imm_shift(rt,rs,(-imm)>>8,8));
658 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
659 }else {
660 do {
661 int shift = (ffs(imm) - 1) & ~1;
662 int imm8 = imm & (0xff << shift);
663 genimm_checked(imm8,&armval);
664 assem_debug("add %s,%s,#0x%x\n",regname[rt],regname[rs],imm8);
665 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
666 rs = rt;
667 imm &= ~imm8;
668 }
669 while (imm != 0);
670 }
671 }
672 else if(rs!=rt) emit_mov(rs,rt);
673}
674
675static void emit_addimm_and_set_flags(int imm,int rt)
676{
677 assert(imm>-65536&&imm<65536);
678 u_int armval;
679 if(genimm(imm,&armval)) {
680 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm);
681 output_w32(0xe2900000|rd_rn_rm(rt,rt,0)|armval);
682 }else if(genimm(-imm,&armval)) {
683 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],imm);
684 output_w32(0xe2500000|rd_rn_rm(rt,rt,0)|armval);
685 }else if(imm<0) {
686 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF00);
687 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
688 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)>>8,8));
689 output_w32(0xe2500000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
690 }else{
691 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF00);
692 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
693 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm>>8,8));
694 output_w32(0xe2900000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
695 }
696}
697
698static void emit_addnop(u_int r)
699{
700 assert(r<16);
701 assem_debug("add %s,%s,#0 (nop)\n",regname[r],regname[r]);
702 output_w32(0xe2800000|rd_rn_rm(r,r,0));
703}
704
705static void emit_andimm(int rs,int imm,int rt)
706{
707 u_int armval;
708 if(imm==0) {
709 emit_zeroreg(rt);
710 }else if(genimm(imm,&armval)) {
711 assem_debug("and %s,%s,#%d\n",regname[rt],regname[rs],imm);
712 output_w32(0xe2000000|rd_rn_rm(rt,rs,0)|armval);
713 }else if(genimm(~imm,&armval)) {
714 assem_debug("bic %s,%s,#%d\n",regname[rt],regname[rs],imm);
715 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|armval);
716 }else if(imm==65535) {
717 #ifndef HAVE_ARMV6
718 assem_debug("bic %s,%s,#FF000000\n",regname[rt],regname[rs]);
719 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|0x4FF);
720 assem_debug("bic %s,%s,#00FF0000\n",regname[rt],regname[rt]);
721 output_w32(0xe3c00000|rd_rn_rm(rt,rt,0)|0x8FF);
722 #else
723 assem_debug("uxth %s,%s\n",regname[rt],regname[rs]);
724 output_w32(0xe6ff0070|rd_rn_rm(rt,0,rs));
725 #endif
726 }else{
727 assert(imm>0&&imm<65535);
728 #ifndef HAVE_ARMV7
729 assem_debug("mov r14,#%d\n",imm&0xFF00);
730 output_w32(0xe3a00000|rd_rn_imm_shift(HOST_TEMPREG,0,imm>>8,8));
731 assem_debug("add r14,r14,#%d\n",imm&0xFF);
732 output_w32(0xe2800000|rd_rn_imm_shift(HOST_TEMPREG,HOST_TEMPREG,imm&0xff,0));
733 #else
734 emit_movw(imm,HOST_TEMPREG);
735 #endif
736 assem_debug("and %s,%s,r14\n",regname[rt],regname[rs]);
737 output_w32(0xe0000000|rd_rn_rm(rt,rs,HOST_TEMPREG));
738 }
739}
740
741static void emit_orimm(int rs,int imm,int rt)
742{
743 u_int armval;
744 if(imm==0) {
745 if(rs!=rt) emit_mov(rs,rt);
746 }else if(genimm(imm,&armval)) {
747 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm);
748 output_w32(0xe3800000|rd_rn_rm(rt,rs,0)|armval);
749 }else{
750 assert(imm>0&&imm<65536);
751 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
752 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
753 output_w32(0xe3800000|rd_rn_imm_shift(rt,rs,imm>>8,8));
754 output_w32(0xe3800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
755 }
756}
757
758static void emit_xorimm(int rs,int imm,int rt)
759{
760 u_int armval;
761 if(imm==0) {
762 if(rs!=rt) emit_mov(rs,rt);
763 }else if(genimm(imm,&armval)) {
764 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm);
765 output_w32(0xe2200000|rd_rn_rm(rt,rs,0)|armval);
766 }else{
767 assert(imm>0&&imm<65536);
768 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
769 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
770 output_w32(0xe2200000|rd_rn_imm_shift(rt,rs,imm>>8,8));
771 output_w32(0xe2200000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
772 }
773}
774
775static void emit_shlimm(int rs,u_int imm,int rt)
776{
777 assert(imm>0);
778 assert(imm<32);
779 //if(imm==1) ...
780 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
781 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
782}
783
784static void emit_lsls_imm(int rs,int imm,int rt)
785{
786 assert(imm>0);
787 assert(imm<32);
788 assem_debug("lsls %s,%s,#%d\n",regname[rt],regname[rs],imm);
789 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs)|(imm<<7));
790}
791
792static unused void emit_lslpls_imm(int rs,int imm,int rt)
793{
794 assert(imm>0);
795 assert(imm<32);
796 assem_debug("lslpls %s,%s,#%d\n",regname[rt],regname[rs],imm);
797 output_w32(0x51b00000|rd_rn_rm(rt,0,rs)|(imm<<7));
798}
799
800static void emit_shrimm(int rs,u_int imm,int rt)
801{
802 assert(imm>0);
803 assert(imm<32);
804 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
805 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
806}
807
808static void emit_sarimm(int rs,u_int imm,int rt)
809{
810 assert(imm>0);
811 assert(imm<32);
812 assem_debug("asr %s,%s,#%d\n",regname[rt],regname[rs],imm);
813 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x40|(imm<<7));
814}
815
816static void emit_rorimm(int rs,u_int imm,int rt)
817{
818 assert(imm>0);
819 assert(imm<32);
820 assem_debug("ror %s,%s,#%d\n",regname[rt],regname[rs],imm);
821 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x60|(imm<<7));
822}
823
824static void emit_signextend16(int rs,int rt)
825{
826 #ifndef HAVE_ARMV6
827 emit_shlimm(rs,16,rt);
828 emit_sarimm(rt,16,rt);
829 #else
830 assem_debug("sxth %s,%s\n",regname[rt],regname[rs]);
831 output_w32(0xe6bf0070|rd_rn_rm(rt,0,rs));
832 #endif
833}
834
835static void emit_signextend8(int rs,int rt)
836{
837 #ifndef HAVE_ARMV6
838 emit_shlimm(rs,24,rt);
839 emit_sarimm(rt,24,rt);
840 #else
841 assem_debug("sxtb %s,%s\n",regname[rt],regname[rs]);
842 output_w32(0xe6af0070|rd_rn_rm(rt,0,rs));
843 #endif
844}
845
846static void emit_shl(u_int rs,u_int shift,u_int rt)
847{
848 assert(rs<16);
849 assert(rt<16);
850 assert(shift<16);
851 //if(imm==1) ...
852 assem_debug("lsl %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
853 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x10|(shift<<8));
854}
855
856static void emit_shr(u_int rs,u_int shift,u_int rt)
857{
858 assert(rs<16);
859 assert(rt<16);
860 assert(shift<16);
861 assem_debug("lsr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
862 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x30|(shift<<8));
863}
864
865static void emit_sar(u_int rs,u_int shift,u_int rt)
866{
867 assert(rs<16);
868 assert(rt<16);
869 assert(shift<16);
870 assem_debug("asr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
871 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x50|(shift<<8));
872}
873
874static unused void emit_orrshl(u_int rs,u_int shift,u_int rt)
875{
876 assert(rs<16);
877 assert(rt<16);
878 assert(shift<16);
879 assem_debug("orr %s,%s,%s,lsl %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
880 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x10|(shift<<8));
881}
882
883static unused void emit_orrshr(u_int rs,u_int shift,u_int rt)
884{
885 assert(rs<16);
886 assert(rt<16);
887 assert(shift<16);
888 assem_debug("orr %s,%s,%s,lsr %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
889 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x30|(shift<<8));
890}
891
892static void emit_cmpimm(int rs,int imm)
893{
894 u_int armval;
895 if(genimm(imm,&armval)) {
896 assem_debug("cmp %s,#%d\n",regname[rs],imm);
897 output_w32(0xe3500000|rd_rn_rm(0,rs,0)|armval);
898 }else if(genimm(-imm,&armval)) {
899 assem_debug("cmn %s,#%d\n",regname[rs],imm);
900 output_w32(0xe3700000|rd_rn_rm(0,rs,0)|armval);
901 }else if(imm>0) {
902 assert(imm<65536);
903 emit_movimm(imm,HOST_TEMPREG);
904 assem_debug("cmp %s,r14\n",regname[rs]);
905 output_w32(0xe1500000|rd_rn_rm(0,rs,HOST_TEMPREG));
906 }else{
907 assert(imm>-65536);
908 emit_movimm(-imm,HOST_TEMPREG);
909 assem_debug("cmn %s,r14\n",regname[rs]);
910 output_w32(0xe1700000|rd_rn_rm(0,rs,HOST_TEMPREG));
911 }
912}
913
914static void emit_cmovne_imm(int imm,int rt)
915{
916 assem_debug("movne %s,#%d\n",regname[rt],imm);
917 u_int armval;
918 genimm_checked(imm,&armval);
919 output_w32(0x13a00000|rd_rn_rm(rt,0,0)|armval);
920}
921
922static void emit_cmovl_imm(int imm,int rt)
923{
924 assem_debug("movlt %s,#%d\n",regname[rt],imm);
925 u_int armval;
926 genimm_checked(imm,&armval);
927 output_w32(0xb3a00000|rd_rn_rm(rt,0,0)|armval);
928}
929
930static void emit_cmovb_imm(int imm,int rt)
931{
932 assem_debug("movcc %s,#%d\n",regname[rt],imm);
933 u_int armval;
934 genimm_checked(imm,&armval);
935 output_w32(0x33a00000|rd_rn_rm(rt,0,0)|armval);
936}
937
938static void emit_cmovae_imm(int imm,int rt)
939{
940 assem_debug("movcs %s,#%d\n",regname[rt],imm);
941 u_int armval;
942 genimm_checked(imm,&armval);
943 output_w32(0x23a00000|rd_rn_rm(rt,0,0)|armval);
944}
945
946static void emit_cmovs_imm(int imm,int rt)
947{
948 assem_debug("movmi %s,#%d\n",regname[rt],imm);
949 u_int armval;
950 genimm_checked(imm,&armval);
951 output_w32(0x43a00000|rd_rn_rm(rt,0,0)|armval);
952}
953
954static void emit_cmovne_reg(int rs,int rt)
955{
956 assem_debug("movne %s,%s\n",regname[rt],regname[rs]);
957 output_w32(0x11a00000|rd_rn_rm(rt,0,rs));
958}
959
960static void emit_cmovl_reg(int rs,int rt)
961{
962 assem_debug("movlt %s,%s\n",regname[rt],regname[rs]);
963 output_w32(0xb1a00000|rd_rn_rm(rt,0,rs));
964}
965
966static void emit_cmovb_reg(int rs,int rt)
967{
968 assem_debug("movcc %s,%s\n",regname[rt],regname[rs]);
969 output_w32(0x31a00000|rd_rn_rm(rt,0,rs));
970}
971
972static void emit_cmovs_reg(int rs,int rt)
973{
974 assem_debug("movmi %s,%s\n",regname[rt],regname[rs]);
975 output_w32(0x41a00000|rd_rn_rm(rt,0,rs));
976}
977
978static void emit_slti32(int rs,int imm,int rt)
979{
980 if(rs!=rt) emit_zeroreg(rt);
981 emit_cmpimm(rs,imm);
982 if(rs==rt) emit_movimm(0,rt);
983 emit_cmovl_imm(1,rt);
984}
985
986static void emit_sltiu32(int rs,int imm,int rt)
987{
988 if(rs!=rt) emit_zeroreg(rt);
989 emit_cmpimm(rs,imm);
990 if(rs==rt) emit_movimm(0,rt);
991 emit_cmovb_imm(1,rt);
992}
993
994static void emit_cmp(int rs,int rt)
995{
996 assem_debug("cmp %s,%s\n",regname[rs],regname[rt]);
997 output_w32(0xe1500000|rd_rn_rm(0,rs,rt));
998}
999
1000static void emit_set_gz32(int rs, int rt)
1001{
1002 //assem_debug("set_gz32\n");
1003 emit_cmpimm(rs,1);
1004 emit_movimm(1,rt);
1005 emit_cmovl_imm(0,rt);
1006}
1007
1008static void emit_set_nz32(int rs, int rt)
1009{
1010 //assem_debug("set_nz32\n");
1011 if(rs!=rt) emit_movs(rs,rt);
1012 else emit_test(rs,rs);
1013 emit_cmovne_imm(1,rt);
1014}
1015
1016static void emit_set_if_less32(int rs1, int rs2, int rt)
1017{
1018 //assem_debug("set if less (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1019 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1020 emit_cmp(rs1,rs2);
1021 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1022 emit_cmovl_imm(1,rt);
1023}
1024
1025static void emit_set_if_carry32(int rs1, int rs2, int rt)
1026{
1027 //assem_debug("set if carry (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1028 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1029 emit_cmp(rs1,rs2);
1030 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1031 emit_cmovb_imm(1,rt);
1032}
1033
1034static int can_jump_or_call(const void *a)
1035{
1036 intptr_t offset = (u_char *)a - out - 8;
1037 return (-33554432 <= offset && offset < 33554432);
1038}
1039
1040static void emit_call(const void *a_)
1041{
1042 int a = (int)a_;
1043 assem_debug("bl %x (%x+%x)%s\n",a,(int)out,a-(int)out-8,func_name(a_));
1044 u_int offset=genjmp(a);
1045 output_w32(0xeb000000|offset);
1046}
1047
1048static void emit_jmp(const void *a_)
1049{
1050 int a = (int)a_;
1051 assem_debug("b %x (%x+%x)%s\n",a,(int)out,a-(int)out-8,func_name(a_));
1052 u_int offset=genjmp(a);
1053 output_w32(0xea000000|offset);
1054}
1055
1056static void emit_jne(const void *a_)
1057{
1058 int a = (int)a_;
1059 assem_debug("bne %x\n",a);
1060 u_int offset=genjmp(a);
1061 output_w32(0x1a000000|offset);
1062}
1063
1064static void emit_jeq(const void *a_)
1065{
1066 int a = (int)a_;
1067 assem_debug("beq %x\n",a);
1068 u_int offset=genjmp(a);
1069 output_w32(0x0a000000|offset);
1070}
1071
1072static void emit_js(const void *a_)
1073{
1074 int a = (int)a_;
1075 assem_debug("bmi %x\n",a);
1076 u_int offset=genjmp(a);
1077 output_w32(0x4a000000|offset);
1078}
1079
1080static void emit_jns(const void *a_)
1081{
1082 int a = (int)a_;
1083 assem_debug("bpl %x\n",a);
1084 u_int offset=genjmp(a);
1085 output_w32(0x5a000000|offset);
1086}
1087
1088static void emit_jl(const void *a_)
1089{
1090 int a = (int)a_;
1091 assem_debug("blt %x\n",a);
1092 u_int offset=genjmp(a);
1093 output_w32(0xba000000|offset);
1094}
1095
1096static void emit_jge(const void *a_)
1097{
1098 int a = (int)a_;
1099 assem_debug("bge %x\n",a);
1100 u_int offset=genjmp(a);
1101 output_w32(0xaa000000|offset);
1102}
1103
1104static void emit_jno(const void *a_)
1105{
1106 int a = (int)a_;
1107 assem_debug("bvc %x\n",a);
1108 u_int offset=genjmp(a);
1109 output_w32(0x7a000000|offset);
1110}
1111
1112static void emit_jc(const void *a_)
1113{
1114 int a = (int)a_;
1115 assem_debug("bcs %x\n",a);
1116 u_int offset=genjmp(a);
1117 output_w32(0x2a000000|offset);
1118}
1119
1120static void emit_jcc(const void *a_)
1121{
1122 int a = (int)a_;
1123 assem_debug("bcc %x\n",a);
1124 u_int offset=genjmp(a);
1125 output_w32(0x3a000000|offset);
1126}
1127
1128static unused void emit_callreg(u_int r)
1129{
1130 assert(r<15);
1131 assem_debug("blx %s\n",regname[r]);
1132 output_w32(0xe12fff30|r);
1133}
1134
1135static void emit_jmpreg(u_int r)
1136{
1137 assem_debug("mov pc,%s\n",regname[r]);
1138 output_w32(0xe1a00000|rd_rn_rm(15,0,r));
1139}
1140
1141static void emit_ret(void)
1142{
1143 emit_jmpreg(14);
1144}
1145
1146static void emit_readword_indexed(int offset, int rs, int rt)
1147{
1148 assert(offset>-4096&&offset<4096);
1149 assem_debug("ldr %s,%s+%d\n",regname[rt],regname[rs],offset);
1150 if(offset>=0) {
1151 output_w32(0xe5900000|rd_rn_rm(rt,rs,0)|offset);
1152 }else{
1153 output_w32(0xe5100000|rd_rn_rm(rt,rs,0)|(-offset));
1154 }
1155}
1156
1157static void emit_readword_dualindexedx4(int rs1, int rs2, int rt)
1158{
1159 assem_debug("ldr %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1160 output_w32(0xe7900000|rd_rn_rm(rt,rs1,rs2)|0x100);
1161}
1162#define emit_readptr_dualindexedx_ptrlen emit_readword_dualindexedx4
1163
1164static void emit_ldr_dualindexed(int rs1, int rs2, int rt)
1165{
1166 assem_debug("ldr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1167 output_w32(0xe7900000|rd_rn_rm(rt,rs1,rs2));
1168}
1169
1170static void emit_ldrcc_dualindexed(int rs1, int rs2, int rt)
1171{
1172 assem_debug("ldrcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1173 output_w32(0x37900000|rd_rn_rm(rt,rs1,rs2));
1174}
1175
1176static void emit_ldrb_dualindexed(int rs1, int rs2, int rt)
1177{
1178 assem_debug("ldrb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1179 output_w32(0xe7d00000|rd_rn_rm(rt,rs1,rs2));
1180}
1181
1182static void emit_ldrccb_dualindexed(int rs1, int rs2, int rt)
1183{
1184 assem_debug("ldrccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1185 output_w32(0x37d00000|rd_rn_rm(rt,rs1,rs2));
1186}
1187
1188static void emit_ldrsb_dualindexed(int rs1, int rs2, int rt)
1189{
1190 assem_debug("ldrsb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1191 output_w32(0xe19000d0|rd_rn_rm(rt,rs1,rs2));
1192}
1193
1194static void emit_ldrccsb_dualindexed(int rs1, int rs2, int rt)
1195{
1196 assem_debug("ldrccsb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1197 output_w32(0x319000d0|rd_rn_rm(rt,rs1,rs2));
1198}
1199
1200static void emit_ldrh_dualindexed(int rs1, int rs2, int rt)
1201{
1202 assem_debug("ldrh %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1203 output_w32(0xe19000b0|rd_rn_rm(rt,rs1,rs2));
1204}
1205
1206static void emit_ldrcch_dualindexed(int rs1, int rs2, int rt)
1207{
1208 assem_debug("ldrcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1209 output_w32(0x319000b0|rd_rn_rm(rt,rs1,rs2));
1210}
1211
1212static void emit_ldrsh_dualindexed(int rs1, int rs2, int rt)
1213{
1214 assem_debug("ldrsh %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1215 output_w32(0xe19000f0|rd_rn_rm(rt,rs1,rs2));
1216}
1217
1218static void emit_ldrccsh_dualindexed(int rs1, int rs2, int rt)
1219{
1220 assem_debug("ldrccsh %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1221 output_w32(0x319000f0|rd_rn_rm(rt,rs1,rs2));
1222}
1223
1224static void emit_str_dualindexed(int rs1, int rs2, int rt)
1225{
1226 assem_debug("str %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1227 output_w32(0xe7800000|rd_rn_rm(rt,rs1,rs2));
1228}
1229
1230static void emit_strb_dualindexed(int rs1, int rs2, int rt)
1231{
1232 assem_debug("strb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1233 output_w32(0xe7c00000|rd_rn_rm(rt,rs1,rs2));
1234}
1235
1236static void emit_strh_dualindexed(int rs1, int rs2, int rt)
1237{
1238 assem_debug("strh %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1239 output_w32(0xe18000b0|rd_rn_rm(rt,rs1,rs2));
1240}
1241
1242static void emit_movsbl_indexed(int offset, int rs, int rt)
1243{
1244 assert(offset>-256&&offset<256);
1245 assem_debug("ldrsb %s,%s+%d\n",regname[rt],regname[rs],offset);
1246 if(offset>=0) {
1247 output_w32(0xe1d000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1248 }else{
1249 output_w32(0xe15000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1250 }
1251}
1252
1253static void emit_movswl_indexed(int offset, int rs, int rt)
1254{
1255 assert(offset>-256&&offset<256);
1256 assem_debug("ldrsh %s,%s+%d\n",regname[rt],regname[rs],offset);
1257 if(offset>=0) {
1258 output_w32(0xe1d000f0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1259 }else{
1260 output_w32(0xe15000f0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1261 }
1262}
1263
1264static void emit_movzbl_indexed(int offset, int rs, int rt)
1265{
1266 assert(offset>-4096&&offset<4096);
1267 assem_debug("ldrb %s,%s+%d\n",regname[rt],regname[rs],offset);
1268 if(offset>=0) {
1269 output_w32(0xe5d00000|rd_rn_rm(rt,rs,0)|offset);
1270 }else{
1271 output_w32(0xe5500000|rd_rn_rm(rt,rs,0)|(-offset));
1272 }
1273}
1274
1275static void emit_movzwl_indexed(int offset, int rs, int rt)
1276{
1277 assert(offset>-256&&offset<256);
1278 assem_debug("ldrh %s,%s+%d\n",regname[rt],regname[rs],offset);
1279 if(offset>=0) {
1280 output_w32(0xe1d000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1281 }else{
1282 output_w32(0xe15000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1283 }
1284}
1285
1286static void emit_ldrd(int offset, int rs, int rt)
1287{
1288 assert(offset>-256&&offset<256);
1289 assem_debug("ldrd %s,%s+%d\n",regname[rt],regname[rs],offset);
1290 if(offset>=0) {
1291 output_w32(0xe1c000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1292 }else{
1293 output_w32(0xe14000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1294 }
1295}
1296
1297static void emit_readword(void *addr, int rt)
1298{
1299 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
1300 assert(offset<4096);
1301 assem_debug("ldr %s,fp+%d\n",regname[rt],offset);
1302 output_w32(0xe5900000|rd_rn_rm(rt,FP,0)|offset);
1303}
1304#define emit_readptr emit_readword
1305
1306static void emit_writeword_indexed(int rt, int offset, int rs)
1307{
1308 assert(offset>-4096&&offset<4096);
1309 assem_debug("str %s,%s+%d\n",regname[rt],regname[rs],offset);
1310 if(offset>=0) {
1311 output_w32(0xe5800000|rd_rn_rm(rt,rs,0)|offset);
1312 }else{
1313 output_w32(0xe5000000|rd_rn_rm(rt,rs,0)|(-offset));
1314 }
1315}
1316
1317static void emit_writehword_indexed(int rt, int offset, int rs)
1318{
1319 assert(offset>-256&&offset<256);
1320 assem_debug("strh %s,%s+%d\n",regname[rt],regname[rs],offset);
1321 if(offset>=0) {
1322 output_w32(0xe1c000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1323 }else{
1324 output_w32(0xe14000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1325 }
1326}
1327
1328static void emit_writebyte_indexed(int rt, int offset, int rs)
1329{
1330 assert(offset>-4096&&offset<4096);
1331 assem_debug("strb %s,%s+%d\n",regname[rt],regname[rs],offset);
1332 if(offset>=0) {
1333 output_w32(0xe5c00000|rd_rn_rm(rt,rs,0)|offset);
1334 }else{
1335 output_w32(0xe5400000|rd_rn_rm(rt,rs,0)|(-offset));
1336 }
1337}
1338
1339static void emit_strcc_dualindexed(int rs1, int rs2, int rt)
1340{
1341 assem_debug("strcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1342 output_w32(0x37800000|rd_rn_rm(rt,rs1,rs2));
1343}
1344
1345static void emit_strccb_dualindexed(int rs1, int rs2, int rt)
1346{
1347 assem_debug("strccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1348 output_w32(0x37c00000|rd_rn_rm(rt,rs1,rs2));
1349}
1350
1351static void emit_strcch_dualindexed(int rs1, int rs2, int rt)
1352{
1353 assem_debug("strcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1354 output_w32(0x318000b0|rd_rn_rm(rt,rs1,rs2));
1355}
1356
1357static void emit_writeword(int rt, void *addr)
1358{
1359 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
1360 assert(offset<4096);
1361 assem_debug("str %s,fp+%d\n",regname[rt],offset);
1362 output_w32(0xe5800000|rd_rn_rm(rt,FP,0)|offset);
1363}
1364
1365static void emit_umull(u_int rs1, u_int rs2, u_int hi, u_int lo)
1366{
1367 assem_debug("umull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
1368 assert(rs1<16);
1369 assert(rs2<16);
1370 assert(hi<16);
1371 assert(lo<16);
1372 output_w32(0xe0800090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
1373}
1374
1375static void emit_smull(u_int rs1, u_int rs2, u_int hi, u_int lo)
1376{
1377 assem_debug("smull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
1378 assert(rs1<16);
1379 assert(rs2<16);
1380 assert(hi<16);
1381 assert(lo<16);
1382 output_w32(0xe0c00090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
1383}
1384
1385static void emit_clz(int rs,int rt)
1386{
1387 assem_debug("clz %s,%s\n",regname[rt],regname[rs]);
1388 output_w32(0xe16f0f10|rd_rn_rm(rt,0,rs));
1389}
1390
1391static void emit_subcs(int rs1,int rs2,int rt)
1392{
1393 assem_debug("subcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1394 output_w32(0x20400000|rd_rn_rm(rt,rs1,rs2));
1395}
1396
1397static void emit_shrcc_imm(int rs,u_int imm,int rt)
1398{
1399 assert(imm>0);
1400 assert(imm<32);
1401 assem_debug("lsrcc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1402 output_w32(0x31a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
1403}
1404
1405static void emit_shrne_imm(int rs,u_int imm,int rt)
1406{
1407 assert(imm>0);
1408 assert(imm<32);
1409 assem_debug("lsrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
1410 output_w32(0x11a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
1411}
1412
1413static void emit_negmi(int rs, int rt)
1414{
1415 assem_debug("rsbmi %s,%s,#0\n",regname[rt],regname[rs]);
1416 output_w32(0x42600000|rd_rn_rm(rt,rs,0));
1417}
1418
1419static void emit_negsmi(int rs, int rt)
1420{
1421 assem_debug("rsbsmi %s,%s,#0\n",regname[rt],regname[rs]);
1422 output_w32(0x42700000|rd_rn_rm(rt,rs,0));
1423}
1424
1425static void emit_bic_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
1426{
1427 assem_debug("bic %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
1428 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
1429}
1430
1431static void emit_bic_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
1432{
1433 assem_debug("bic %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
1434 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
1435}
1436
1437static void emit_teq(int rs, int rt)
1438{
1439 assem_debug("teq %s,%s\n",regname[rs],regname[rt]);
1440 output_w32(0xe1300000|rd_rn_rm(0,rs,rt));
1441}
1442
1443static unused void emit_rsbimm(int rs, int imm, int rt)
1444{
1445 u_int armval;
1446 genimm_checked(imm,&armval);
1447 assem_debug("rsb %s,%s,#%d\n",regname[rt],regname[rs],imm);
1448 output_w32(0xe2600000|rd_rn_rm(rt,rs,0)|armval);
1449}
1450
1451// Conditionally select one of two immediates, optimizing for small code size
1452// This will only be called if HAVE_CMOV_IMM is defined
1453static void emit_cmov2imm_e_ne_compact(int imm1,int imm2,u_int rt)
1454{
1455 u_int armval;
1456 if(genimm(imm2-imm1,&armval)) {
1457 emit_movimm(imm1,rt);
1458 assem_debug("addne %s,%s,#%d\n",regname[rt],regname[rt],imm2-imm1);
1459 output_w32(0x12800000|rd_rn_rm(rt,rt,0)|armval);
1460 }else if(genimm(imm1-imm2,&armval)) {
1461 emit_movimm(imm1,rt);
1462 assem_debug("subne %s,%s,#%d\n",regname[rt],regname[rt],imm1-imm2);
1463 output_w32(0x12400000|rd_rn_rm(rt,rt,0)|armval);
1464 }
1465 else {
1466 #ifndef HAVE_ARMV7
1467 emit_movimm(imm1,rt);
1468 add_literal((int)out,imm2);
1469 assem_debug("ldrne %s,pc+? [=%x]\n",regname[rt],imm2);
1470 output_w32(0x15900000|rd_rn_rm(rt,15,0));
1471 #else
1472 emit_movw(imm1&0x0000FFFF,rt);
1473 if((imm1&0xFFFF)!=(imm2&0xFFFF)) {
1474 assem_debug("movwne %s,#%d (0x%x)\n",regname[rt],imm2&0xFFFF,imm2&0xFFFF);
1475 output_w32(0x13000000|rd_rn_rm(rt,0,0)|(imm2&0xfff)|((imm2<<4)&0xf0000));
1476 }
1477 emit_movt(imm1&0xFFFF0000,rt);
1478 if((imm1&0xFFFF0000)!=(imm2&0xFFFF0000)) {
1479 assem_debug("movtne %s,#%d (0x%x)\n",regname[rt],imm2&0xffff0000,imm2&0xffff0000);
1480 output_w32(0x13400000|rd_rn_rm(rt,0,0)|((imm2>>16)&0xfff)|((imm2>>12)&0xf0000));
1481 }
1482 #endif
1483 }
1484}
1485
1486// special case for checking invalid_code
1487static void emit_cmpmem_indexedsr12_reg(int base,int r,int imm)
1488{
1489 assert(imm<128&&imm>=0);
1490 assert(r>=0&&r<16);
1491 assem_debug("ldrb lr,%s,%s lsr #12\n",regname[base],regname[r]);
1492 output_w32(0xe7d00000|rd_rn_rm(HOST_TEMPREG,base,r)|0x620);
1493 emit_cmpimm(HOST_TEMPREG,imm);
1494}
1495
1496static void emit_callne(int a)
1497{
1498 assem_debug("blne %x\n",a);
1499 u_int offset=genjmp(a);
1500 output_w32(0x1b000000|offset);
1501}
1502
1503// Used to preload hash table entries
1504static unused void emit_prefetchreg(int r)
1505{
1506 assem_debug("pld %s\n",regname[r]);
1507 output_w32(0xf5d0f000|rd_rn_rm(0,r,0));
1508}
1509
1510// Special case for mini_ht
1511static void emit_ldreq_indexed(int rs, u_int offset, int rt)
1512{
1513 assert(offset<4096);
1514 assem_debug("ldreq %s,[%s, #%d]\n",regname[rt],regname[rs],offset);
1515 output_w32(0x05900000|rd_rn_rm(rt,rs,0)|offset);
1516}
1517
1518static void emit_orrne_imm(int rs,int imm,int rt)
1519{
1520 u_int armval;
1521 genimm_checked(imm,&armval);
1522 assem_debug("orrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
1523 output_w32(0x13800000|rd_rn_rm(rt,rs,0)|armval);
1524}
1525
1526static unused void emit_addpl_imm(int rs,int imm,int rt)
1527{
1528 u_int armval;
1529 genimm_checked(imm,&armval);
1530 assem_debug("addpl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1531 output_w32(0x52800000|rd_rn_rm(rt,rs,0)|armval);
1532}
1533
1534static void emit_jno_unlikely(int a)
1535{
1536 //emit_jno(a);
1537 assem_debug("addvc pc,pc,#? (%x)\n",/*a-(int)out-8,*/a);
1538 output_w32(0x72800000|rd_rn_rm(15,15,0));
1539}
1540
1541static void save_regs_all(u_int reglist)
1542{
1543 int i;
1544 if(!reglist) return;
1545 assem_debug("stmia fp,{");
1546 for(i=0;i<16;i++)
1547 if(reglist&(1<<i))
1548 assem_debug("r%d,",i);
1549 assem_debug("}\n");
1550 output_w32(0xe88b0000|reglist);
1551}
1552
1553static void restore_regs_all(u_int reglist)
1554{
1555 int i;
1556 if(!reglist) return;
1557 assem_debug("ldmia fp,{");
1558 for(i=0;i<16;i++)
1559 if(reglist&(1<<i))
1560 assem_debug("r%d,",i);
1561 assem_debug("}\n");
1562 output_w32(0xe89b0000|reglist);
1563}
1564
1565// Save registers before function call
1566static void save_regs(u_int reglist)
1567{
1568 reglist&=CALLER_SAVE_REGS; // only save the caller-save registers, r0-r3, r12
1569 save_regs_all(reglist);
1570}
1571
1572// Restore registers after function call
1573static void restore_regs(u_int reglist)
1574{
1575 reglist&=CALLER_SAVE_REGS;
1576 restore_regs_all(reglist);
1577}
1578
1579/* Stubs/epilogue */
1580
1581static void literal_pool(int n)
1582{
1583 if(!literalcount) return;
1584 if(n) {
1585 if((int)out-literals[0][0]<4096-n) return;
1586 }
1587 u_int *ptr;
1588 int i;
1589 for(i=0;i<literalcount;i++)
1590 {
1591 u_int l_addr=(u_int)out;
1592 int j;
1593 for(j=0;j<i;j++) {
1594 if(literals[j][1]==literals[i][1]) {
1595 //printf("dup %08x\n",literals[i][1]);
1596 l_addr=literals[j][0];
1597 break;
1598 }
1599 }
1600 ptr=(u_int *)literals[i][0];
1601 u_int offset=l_addr-(u_int)ptr-8;
1602 assert(offset<4096);
1603 assert(!(offset&3));
1604 *ptr|=offset;
1605 if(l_addr==(u_int)out) {
1606 literals[i][0]=l_addr; // remember for dupes
1607 output_w32(literals[i][1]);
1608 }
1609 }
1610 literalcount=0;
1611}
1612
1613static void literal_pool_jumpover(int n)
1614{
1615 if(!literalcount) return;
1616 if(n) {
1617 if((int)out-literals[0][0]<4096-n) return;
1618 }
1619 void *jaddr = out;
1620 emit_jmp(0);
1621 literal_pool(0);
1622 set_jump_target(jaddr, out);
1623}
1624
1625// parsed by get_pointer, find_extjump_insn
1626static void emit_extjump2(u_char *addr, u_int target, void *linker)
1627{
1628 u_char *ptr=(u_char *)addr;
1629 assert((ptr[3]&0x0e)==0xa);
1630 (void)ptr;
1631
1632 emit_loadlp(target,0);
1633 emit_loadlp((u_int)addr,1);
1634 assert(ndrc->translation_cache <= addr &&
1635 addr < ndrc->translation_cache + sizeof(ndrc->translation_cache));
1636 //assert((target>=0x80000000&&target<0x80800000)||(target>0xA4000000&&target<0xA4001000));
1637//DEBUG >
1638#ifdef DEBUG_CYCLE_COUNT
1639 emit_readword(&last_count,ECX);
1640 emit_add(HOST_CCREG,ECX,HOST_CCREG);
1641 emit_readword(&next_interupt,ECX);
1642 emit_writeword(HOST_CCREG,&Count);
1643 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
1644 emit_writeword(ECX,&last_count);
1645#endif
1646//DEBUG <
1647 emit_far_jump(linker);
1648}
1649
1650static void check_extjump2(void *src)
1651{
1652 u_int *ptr = src;
1653 assert((ptr[1] & 0x0fff0000) == 0x059f0000); // ldr rx, [pc, #ofs]
1654 (void)ptr;
1655}
1656
1657// put rt_val into rt, potentially making use of rs with value rs_val
1658static void emit_movimm_from(u_int rs_val,int rs,u_int rt_val,int rt)
1659{
1660 u_int armval;
1661 int diff;
1662 if(genimm(rt_val,&armval)) {
1663 assem_debug("mov %s,#%d\n",regname[rt],rt_val);
1664 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
1665 return;
1666 }
1667 if(genimm(~rt_val,&armval)) {
1668 assem_debug("mvn %s,#%d\n",regname[rt],rt_val);
1669 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
1670 return;
1671 }
1672 diff=rt_val-rs_val;
1673 if(genimm(diff,&armval)) {
1674 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],diff);
1675 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
1676 return;
1677 }else if(genimm(-diff,&armval)) {
1678 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],-diff);
1679 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
1680 return;
1681 }
1682 emit_movimm(rt_val,rt);
1683}
1684
1685// return 1 if above function can do it's job cheaply
1686static int is_similar_value(u_int v1,u_int v2)
1687{
1688 u_int xs;
1689 int diff;
1690 if(v1==v2) return 1;
1691 diff=v2-v1;
1692 for(xs=diff;xs!=0&&(xs&3)==0;xs>>=2)
1693 ;
1694 if(xs<0x100) return 1;
1695 for(xs=-diff;xs!=0&&(xs&3)==0;xs>>=2)
1696 ;
1697 if(xs<0x100) return 1;
1698 return 0;
1699}
1700
1701static void mov_loadtype_adj(enum stub_type type,int rs,int rt)
1702{
1703 switch(type) {
1704 case LOADB_STUB: emit_signextend8(rs,rt); break;
1705 case LOADBU_STUB: emit_andimm(rs,0xff,rt); break;
1706 case LOADH_STUB: emit_signextend16(rs,rt); break;
1707 case LOADHU_STUB: emit_andimm(rs,0xffff,rt); break;
1708 case LOADW_STUB: if(rs!=rt) emit_mov(rs,rt); break;
1709 default: assert(0);
1710 }
1711}
1712
1713#include "pcsxmem.h"
1714#include "pcsxmem_inline.c"
1715
1716static void do_readstub(int n)
1717{
1718 assem_debug("do_readstub %x\n",start+stubs[n].a*4);
1719 literal_pool(256);
1720 set_jump_target(stubs[n].addr, out);
1721 enum stub_type type=stubs[n].type;
1722 int i=stubs[n].a;
1723 int rs=stubs[n].b;
1724 const struct regstat *i_regs=(struct regstat *)stubs[n].c;
1725 u_int reglist=stubs[n].e;
1726 const signed char *i_regmap=i_regs->regmap;
1727 int rt;
1728 if(dops[i].itype==C1LS||dops[i].itype==C2LS||dops[i].itype==LOADLR) {
1729 rt=get_reg(i_regmap,FTEMP);
1730 }else{
1731 rt=get_reg(i_regmap,dops[i].rt1);
1732 }
1733 assert(rs>=0);
1734 int r,temp=-1,temp2=HOST_TEMPREG,regs_saved=0;
1735 void *restore_jump = NULL;
1736 reglist|=(1<<rs);
1737 for(r=0;r<=12;r++) {
1738 if(((1<<r)&0x13ff)&&((1<<r)&reglist)==0) {
1739 temp=r; break;
1740 }
1741 }
1742 if(rt>=0&&dops[i].rt1!=0)
1743 reglist&=~(1<<rt);
1744 if(temp==-1) {
1745 save_regs(reglist);
1746 regs_saved=1;
1747 temp=(rs==0)?2:0;
1748 }
1749 if((regs_saved||(reglist&2)==0)&&temp!=1&&rs!=1)
1750 temp2=1;
1751 emit_readword(&mem_rtab,temp);
1752 emit_shrimm(rs,12,temp2);
1753 emit_readword_dualindexedx4(temp,temp2,temp2);
1754 emit_lsls_imm(temp2,1,temp2);
1755 if(dops[i].itype==C1LS||dops[i].itype==C2LS||(rt>=0&&dops[i].rt1!=0)) {
1756 switch(type) {
1757 case LOADB_STUB: emit_ldrccsb_dualindexed(temp2,rs,rt); break;
1758 case LOADBU_STUB: emit_ldrccb_dualindexed(temp2,rs,rt); break;
1759 case LOADH_STUB: emit_ldrccsh_dualindexed(temp2,rs,rt); break;
1760 case LOADHU_STUB: emit_ldrcch_dualindexed(temp2,rs,rt); break;
1761 case LOADW_STUB: emit_ldrcc_dualindexed(temp2,rs,rt); break;
1762 default: assert(0);
1763 }
1764 }
1765 if(regs_saved) {
1766 restore_jump=out;
1767 emit_jcc(0); // jump to reg restore
1768 }
1769 else
1770 emit_jcc(stubs[n].retaddr); // return address
1771
1772 if(!regs_saved)
1773 save_regs(reglist);
1774 void *handler=NULL;
1775 if(type==LOADB_STUB||type==LOADBU_STUB)
1776 handler=jump_handler_read8;
1777 if(type==LOADH_STUB||type==LOADHU_STUB)
1778 handler=jump_handler_read16;
1779 if(type==LOADW_STUB)
1780 handler=jump_handler_read32;
1781 assert(handler);
1782 pass_args(rs,temp2);
1783 int cc=get_reg(i_regmap,CCREG);
1784 if(cc<0)
1785 emit_loadreg(CCREG,2);
1786 emit_addimm(cc<0?2:cc,(int)stubs[n].d,2);
1787 emit_far_call(handler);
1788 if(dops[i].itype==C1LS||dops[i].itype==C2LS||(rt>=0&&dops[i].rt1!=0)) {
1789 mov_loadtype_adj(type,0,rt);
1790 }
1791 if(restore_jump)
1792 set_jump_target(restore_jump, out);
1793 restore_regs(reglist);
1794 emit_jmp(stubs[n].retaddr); // return address
1795}
1796
1797static void inline_readstub(enum stub_type type, int i, u_int addr,
1798 const signed char regmap[], int target, int adj, u_int reglist)
1799{
1800 int rs=get_reg(regmap,target);
1801 int rt=get_reg(regmap,target);
1802 if(rs<0) rs=get_reg_temp(regmap);
1803 assert(rs>=0);
1804 u_int is_dynamic;
1805 uintptr_t host_addr = 0;
1806 void *handler;
1807 int cc=get_reg(regmap,CCREG);
1808 if(pcsx_direct_read(type,addr,adj,cc,target?rs:-1,rt))
1809 return;
1810 handler = get_direct_memhandler(mem_rtab, addr, type, &host_addr);
1811 if (handler == NULL) {
1812 if(rt<0||dops[i].rt1==0)
1813 return;
1814 if(addr!=host_addr)
1815 emit_movimm_from(addr,rs,host_addr,rs);
1816 switch(type) {
1817 case LOADB_STUB: emit_movsbl_indexed(0,rs,rt); break;
1818 case LOADBU_STUB: emit_movzbl_indexed(0,rs,rt); break;
1819 case LOADH_STUB: emit_movswl_indexed(0,rs,rt); break;
1820 case LOADHU_STUB: emit_movzwl_indexed(0,rs,rt); break;
1821 case LOADW_STUB: emit_readword_indexed(0,rs,rt); break;
1822 default: assert(0);
1823 }
1824 return;
1825 }
1826 is_dynamic=pcsxmem_is_handler_dynamic(addr);
1827 if(is_dynamic) {
1828 if(type==LOADB_STUB||type==LOADBU_STUB)
1829 handler=jump_handler_read8;
1830 if(type==LOADH_STUB||type==LOADHU_STUB)
1831 handler=jump_handler_read16;
1832 if(type==LOADW_STUB)
1833 handler=jump_handler_read32;
1834 }
1835
1836 // call a memhandler
1837 if(rt>=0&&dops[i].rt1!=0)
1838 reglist&=~(1<<rt);
1839 save_regs(reglist);
1840 if(target==0)
1841 emit_movimm(addr,0);
1842 else if(rs!=0)
1843 emit_mov(rs,0);
1844 if(cc<0)
1845 emit_loadreg(CCREG,2);
1846 if(is_dynamic) {
1847 emit_movimm(((u_int *)mem_rtab)[addr>>12]<<1,1);
1848 emit_addimm(cc<0?2:cc,adj,2);
1849 }
1850 else {
1851 emit_readword(&last_count,3);
1852 emit_addimm(cc<0?2:cc,adj,2);
1853 emit_add(2,3,2);
1854 emit_writeword(2,&Count);
1855 }
1856
1857 emit_far_call(handler);
1858
1859 if(rt>=0&&dops[i].rt1!=0) {
1860 switch(type) {
1861 case LOADB_STUB: emit_signextend8(0,rt); break;
1862 case LOADBU_STUB: emit_andimm(0,0xff,rt); break;
1863 case LOADH_STUB: emit_signextend16(0,rt); break;
1864 case LOADHU_STUB: emit_andimm(0,0xffff,rt); break;
1865 case LOADW_STUB: if(rt!=0) emit_mov(0,rt); break;
1866 default: assert(0);
1867 }
1868 }
1869 restore_regs(reglist);
1870}
1871
1872static void do_writestub(int n)
1873{
1874 assem_debug("do_writestub %x\n",start+stubs[n].a*4);
1875 literal_pool(256);
1876 set_jump_target(stubs[n].addr, out);
1877 enum stub_type type=stubs[n].type;
1878 int i=stubs[n].a;
1879 int rs=stubs[n].b;
1880 const struct regstat *i_regs=(struct regstat *)stubs[n].c;
1881 u_int reglist=stubs[n].e;
1882 const signed char *i_regmap=i_regs->regmap;
1883 int rt,r;
1884 if(dops[i].itype==C1LS||dops[i].itype==C2LS) {
1885 rt=get_reg(i_regmap,r=FTEMP);
1886 }else{
1887 rt=get_reg(i_regmap,r=dops[i].rs2);
1888 }
1889 assert(rs>=0);
1890 assert(rt>=0);
1891 int rtmp,temp=-1,temp2=HOST_TEMPREG,regs_saved=0;
1892 void *restore_jump = NULL;
1893 int reglist2=reglist|(1<<rs)|(1<<rt);
1894 for(rtmp=0;rtmp<=12;rtmp++) {
1895 if(((1<<rtmp)&0x13ff)&&((1<<rtmp)&reglist2)==0) {
1896 temp=rtmp; break;
1897 }
1898 }
1899 if(temp==-1) {
1900 save_regs(reglist);
1901 regs_saved=1;
1902 for(rtmp=0;rtmp<=3;rtmp++)
1903 if(rtmp!=rs&&rtmp!=rt)
1904 {temp=rtmp;break;}
1905 }
1906 if((regs_saved||(reglist2&8)==0)&&temp!=3&&rs!=3&&rt!=3)
1907 temp2=3;
1908 emit_readword(&mem_wtab,temp);
1909 emit_shrimm(rs,12,temp2);
1910 emit_readword_dualindexedx4(temp,temp2,temp2);
1911 emit_lsls_imm(temp2,1,temp2);
1912 switch(type) {
1913 case STOREB_STUB: emit_strccb_dualindexed(temp2,rs,rt); break;
1914 case STOREH_STUB: emit_strcch_dualindexed(temp2,rs,rt); break;
1915 case STOREW_STUB: emit_strcc_dualindexed(temp2,rs,rt); break;
1916 default: assert(0);
1917 }
1918 if(regs_saved) {
1919 restore_jump=out;
1920 emit_jcc(0); // jump to reg restore
1921 }
1922 else
1923 emit_jcc(stubs[n].retaddr); // return address (invcode check)
1924
1925 if(!regs_saved)
1926 save_regs(reglist);
1927 void *handler=NULL;
1928 switch(type) {
1929 case STOREB_STUB: handler=jump_handler_write8; break;
1930 case STOREH_STUB: handler=jump_handler_write16; break;
1931 case STOREW_STUB: handler=jump_handler_write32; break;
1932 default: assert(0);
1933 }
1934 assert(handler);
1935 pass_args(rs,rt);
1936 if(temp2!=3)
1937 emit_mov(temp2,3);
1938 int cc=get_reg(i_regmap,CCREG);
1939 if(cc<0)
1940 emit_loadreg(CCREG,2);
1941 emit_addimm(cc<0?2:cc,(int)stubs[n].d,2);
1942 // returns new cycle_count
1943 emit_far_call(handler);
1944 emit_addimm(0,-(int)stubs[n].d,cc<0?2:cc);
1945 if(cc<0)
1946 emit_storereg(CCREG,2);
1947 if(restore_jump)
1948 set_jump_target(restore_jump, out);
1949 restore_regs(reglist);
1950 emit_jmp(stubs[n].retaddr);
1951}
1952
1953static void inline_writestub(enum stub_type type, int i, u_int addr,
1954 const signed char regmap[], int target, int adj, u_int reglist)
1955{
1956 int rs=get_reg_temp(regmap);
1957 int rt=get_reg(regmap,target);
1958 assert(rs>=0);
1959 assert(rt>=0);
1960 uintptr_t host_addr = 0;
1961 void *handler = get_direct_memhandler(mem_wtab, addr, type, &host_addr);
1962 if (handler == NULL) {
1963 if(addr!=host_addr)
1964 emit_movimm_from(addr,rs,host_addr,rs);
1965 switch(type) {
1966 case STOREB_STUB: emit_writebyte_indexed(rt,0,rs); break;
1967 case STOREH_STUB: emit_writehword_indexed(rt,0,rs); break;
1968 case STOREW_STUB: emit_writeword_indexed(rt,0,rs); break;
1969 default: assert(0);
1970 }
1971 return;
1972 }
1973
1974 // call a memhandler
1975 save_regs(reglist);
1976 pass_args(rs,rt);
1977 int cc=get_reg(regmap,CCREG);
1978 if(cc<0)
1979 emit_loadreg(CCREG,2);
1980 emit_addimm(cc<0?2:cc,adj,2);
1981 emit_movimm((u_int)handler,3);
1982 // returns new cycle_count
1983 emit_far_call(jump_handler_write_h);
1984 emit_addimm(0,-adj,cc<0?2:cc);
1985 if(cc<0)
1986 emit_storereg(CCREG,2);
1987 restore_regs(reglist);
1988}
1989
1990// this output is parsed by verify_dirty, get_bounds, isclean, get_clean_addr
1991static void do_dirty_stub_emit_args(u_int arg0, u_int source_len)
1992{
1993 #ifndef HAVE_ARMV7
1994 emit_loadlp((int)source, 1);
1995 emit_loadlp((int)copy, 2);
1996 emit_loadlp(source_len, 3);
1997 #else
1998 emit_movw(((u_int)source)&0x0000FFFF, 1);
1999 emit_movw(((u_int)copy)&0x0000FFFF, 2);
2000 emit_movt(((u_int)source)&0xFFFF0000, 1);
2001 emit_movt(((u_int)copy)&0xFFFF0000, 2);
2002 emit_movw(source_len, 3);
2003 #endif
2004 emit_movimm(arg0, 0);
2005}
2006
2007static void *do_dirty_stub(int i, u_int source_len)
2008{
2009 assem_debug("do_dirty_stub %x\n",start+i*4);
2010 do_dirty_stub_emit_args(start + i*4, source_len);
2011 emit_far_call(verify_code);
2012 void *entry = out;
2013 load_regs_entry(i);
2014 if (entry == out)
2015 entry = instr_addr[i];
2016 emit_jmp(instr_addr[i]);
2017 return entry;
2018}
2019
2020/* Special assem */
2021
2022static void c2op_prologue(u_int op, int i, const struct regstat *i_regs, u_int reglist)
2023{
2024 save_regs_all(reglist);
2025 cop2_do_stall_check(op, i, i_regs, 0);
2026#ifdef PCNT
2027 emit_movimm(op, 0);
2028 emit_far_call(pcnt_gte_start);
2029#endif
2030 emit_addimm(FP, (u_char *)&psxRegs.CP2D.r[0] - (u_char *)&dynarec_local, 0); // cop2 regs
2031}
2032
2033static void c2op_epilogue(u_int op,u_int reglist)
2034{
2035#ifdef PCNT
2036 emit_movimm(op,0);
2037 emit_far_call(pcnt_gte_end);
2038#endif
2039 restore_regs_all(reglist);
2040}
2041
2042static void c2op_call_MACtoIR(int lm,int need_flags)
2043{
2044 if(need_flags)
2045 emit_far_call(lm?gteMACtoIR_lm1:gteMACtoIR_lm0);
2046 else
2047 emit_far_call(lm?gteMACtoIR_lm1_nf:gteMACtoIR_lm0_nf);
2048}
2049
2050static void c2op_call_rgb_func(void *func,int lm,int need_ir,int need_flags)
2051{
2052 emit_far_call(func);
2053 // func is C code and trashes r0
2054 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
2055 if(need_flags||need_ir)
2056 c2op_call_MACtoIR(lm,need_flags);
2057 emit_far_call(need_flags?gteMACtoRGB:gteMACtoRGB_nf);
2058}
2059
2060static void c2op_assemble(int i, const struct regstat *i_regs)
2061{
2062 u_int c2op = source[i] & 0x3f;
2063 u_int reglist_full = get_host_reglist(i_regs->regmap);
2064 u_int reglist = reglist_full & CALLER_SAVE_REGS;
2065 int need_flags, need_ir;
2066
2067 if (gte_handlers[c2op]!=NULL) {
2068 need_flags=!(gte_unneeded[i+1]>>63); // +1 because of how liveness detection works
2069 need_ir=(gte_unneeded[i+1]&0xe00)!=0xe00;
2070 assem_debug("gte op %08x, unneeded %016llx, need_flags %d, need_ir %d\n",
2071 source[i],gte_unneeded[i+1],need_flags,need_ir);
2072 if(HACK_ENABLED(NDHACK_GTE_NO_FLAGS))
2073 need_flags=0;
2074 int shift = (source[i] >> 19) & 1;
2075 int lm = (source[i] >> 10) & 1;
2076 switch(c2op) {
2077#ifndef DRC_DBG
2078 case GTE_MVMVA: {
2079#ifdef HAVE_ARMV5
2080 int v = (source[i] >> 15) & 3;
2081 int cv = (source[i] >> 13) & 3;
2082 int mx = (source[i] >> 17) & 3;
2083 reglist=reglist_full&(CALLER_SAVE_REGS|0xf0); // +{r4-r7}
2084 c2op_prologue(c2op,i,i_regs,reglist);
2085 /* r4,r5 = VXYZ(v) packed; r6 = &MX11(mx); r7 = &CV1(cv) */
2086 if(v<3)
2087 emit_ldrd(v*8,0,4);
2088 else {
2089 emit_movzwl_indexed(9*4,0,4); // gteIR
2090 emit_movzwl_indexed(10*4,0,6);
2091 emit_movzwl_indexed(11*4,0,5);
2092 emit_orrshl_imm(6,16,4);
2093 }
2094 if(mx<3)
2095 emit_addimm(0,32*4+mx*8*4,6);
2096 else
2097 emit_readword(&zeromem_ptr,6);
2098 if(cv<3)
2099 emit_addimm(0,32*4+(cv*8+5)*4,7);
2100 else
2101 emit_readword(&zeromem_ptr,7);
2102#ifdef __ARM_NEON__
2103 emit_movimm(source[i],1); // opcode
2104 emit_far_call(gteMVMVA_part_neon);
2105 if(need_flags) {
2106 emit_movimm(lm,1);
2107 emit_far_call(gteMACtoIR_flags_neon);
2108 }
2109#else
2110 if(cv==3&&shift)
2111 emit_far_call(gteMVMVA_part_cv3sh12_arm);
2112 else {
2113 emit_movimm(shift,1);
2114 emit_far_call(need_flags?gteMVMVA_part_arm:gteMVMVA_part_nf_arm);
2115 }
2116 if(need_flags||need_ir)
2117 c2op_call_MACtoIR(lm,need_flags);
2118#endif
2119#else /* if not HAVE_ARMV5 */
2120 c2op_prologue(c2op,i,i_regs,reglist);
2121 emit_movimm(source[i],1); // opcode
2122 emit_writeword(1,&psxRegs.code);
2123 emit_far_call(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]);
2124#endif
2125 break;
2126 }
2127 case GTE_OP:
2128 c2op_prologue(c2op,i,i_regs,reglist);
2129 emit_far_call(shift?gteOP_part_shift:gteOP_part_noshift);
2130 if(need_flags||need_ir) {
2131 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
2132 c2op_call_MACtoIR(lm,need_flags);
2133 }
2134 break;
2135 case GTE_DPCS:
2136 c2op_prologue(c2op,i,i_regs,reglist);
2137 c2op_call_rgb_func(shift?gteDPCS_part_shift:gteDPCS_part_noshift,lm,need_ir,need_flags);
2138 break;
2139 case GTE_INTPL:
2140 c2op_prologue(c2op,i,i_regs,reglist);
2141 c2op_call_rgb_func(shift?gteINTPL_part_shift:gteINTPL_part_noshift,lm,need_ir,need_flags);
2142 break;
2143 case GTE_SQR:
2144 c2op_prologue(c2op,i,i_regs,reglist);
2145 emit_far_call(shift?gteSQR_part_shift:gteSQR_part_noshift);
2146 if(need_flags||need_ir) {
2147 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
2148 c2op_call_MACtoIR(lm,need_flags);
2149 }
2150 break;
2151 case GTE_DCPL:
2152 c2op_prologue(c2op,i,i_regs,reglist);
2153 c2op_call_rgb_func(gteDCPL_part,lm,need_ir,need_flags);
2154 break;
2155 case GTE_GPF:
2156 c2op_prologue(c2op,i,i_regs,reglist);
2157 c2op_call_rgb_func(shift?gteGPF_part_shift:gteGPF_part_noshift,lm,need_ir,need_flags);
2158 break;
2159 case GTE_GPL:
2160 c2op_prologue(c2op,i,i_regs,reglist);
2161 c2op_call_rgb_func(shift?gteGPL_part_shift:gteGPL_part_noshift,lm,need_ir,need_flags);
2162 break;
2163#endif
2164 default:
2165 c2op_prologue(c2op,i,i_regs,reglist);
2166#ifdef DRC_DBG
2167 emit_movimm(source[i],1); // opcode
2168 emit_writeword(1,&psxRegs.code);
2169#endif
2170 emit_far_call(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]);
2171 break;
2172 }
2173 c2op_epilogue(c2op,reglist);
2174 }
2175}
2176
2177static void c2op_ctc2_31_assemble(signed char sl, signed char temp)
2178{
2179 //value = value & 0x7ffff000;
2180 //if (value & 0x7f87e000) value |= 0x80000000;
2181 emit_shrimm(sl,12,temp);
2182 emit_shlimm(temp,12,temp);
2183 emit_testimm(temp,0x7f000000);
2184 emit_testeqimm(temp,0x00870000);
2185 emit_testeqimm(temp,0x0000e000);
2186 emit_orrne_imm(temp,0x80000000,temp);
2187}
2188
2189static void do_mfc2_31_one(u_int copr,signed char temp)
2190{
2191 emit_readword(&reg_cop2d[copr],temp);
2192 emit_lsls_imm(temp,16,temp);
2193 emit_cmovs_imm(0,temp);
2194 emit_cmpimm(temp,0xf80<<16);
2195 emit_andimm(temp,0xf80<<16,temp);
2196 emit_cmovae_imm(0xf80<<16,temp);
2197}
2198
2199static void c2op_mfc2_29_assemble(signed char tl, signed char temp)
2200{
2201 if (temp < 0) {
2202 host_tempreg_acquire();
2203 temp = HOST_TEMPREG;
2204 }
2205 do_mfc2_31_one(9,temp);
2206 emit_shrimm(temp,7+16,tl);
2207 do_mfc2_31_one(10,temp);
2208 emit_orrshr_imm(temp,2+16,tl);
2209 do_mfc2_31_one(11,temp);
2210 emit_orrshr_imm(temp,-3+16,tl);
2211 emit_writeword(tl,&reg_cop2d[29]);
2212 if (temp == HOST_TEMPREG)
2213 host_tempreg_release();
2214}
2215
2216static void multdiv_assemble_arm(int i, const struct regstat *i_regs)
2217{
2218 // case 0x18: MULT
2219 // case 0x19: MULTU
2220 // case 0x1A: DIV
2221 // case 0x1B: DIVU
2222 // case 0x1C: DMULT
2223 // case 0x1D: DMULTU
2224 // case 0x1E: DDIV
2225 // case 0x1F: DDIVU
2226 if(dops[i].rs1&&dops[i].rs2)
2227 {
2228 if((dops[i].opcode2&4)==0) // 32-bit
2229 {
2230 if(dops[i].opcode2==0x18) // MULT
2231 {
2232 signed char m1=get_reg(i_regs->regmap,dops[i].rs1);
2233 signed char m2=get_reg(i_regs->regmap,dops[i].rs2);
2234 signed char hi=get_reg(i_regs->regmap,HIREG);
2235 signed char lo=get_reg(i_regs->regmap,LOREG);
2236 assert(m1>=0);
2237 assert(m2>=0);
2238 assert(hi>=0);
2239 assert(lo>=0);
2240 emit_smull(m1,m2,hi,lo);
2241 }
2242 if(dops[i].opcode2==0x19) // MULTU
2243 {
2244 signed char m1=get_reg(i_regs->regmap,dops[i].rs1);
2245 signed char m2=get_reg(i_regs->regmap,dops[i].rs2);
2246 signed char hi=get_reg(i_regs->regmap,HIREG);
2247 signed char lo=get_reg(i_regs->regmap,LOREG);
2248 assert(m1>=0);
2249 assert(m2>=0);
2250 assert(hi>=0);
2251 assert(lo>=0);
2252 emit_umull(m1,m2,hi,lo);
2253 }
2254 if(dops[i].opcode2==0x1A) // DIV
2255 {
2256 signed char d1=get_reg(i_regs->regmap,dops[i].rs1);
2257 signed char d2=get_reg(i_regs->regmap,dops[i].rs2);
2258 assert(d1>=0);
2259 assert(d2>=0);
2260 signed char quotient=get_reg(i_regs->regmap,LOREG);
2261 signed char remainder=get_reg(i_regs->regmap,HIREG);
2262 assert(quotient>=0);
2263 assert(remainder>=0);
2264 emit_movs(d1,remainder);
2265 emit_movimm(0xffffffff,quotient);
2266 emit_negmi(quotient,quotient); // .. quotient and ..
2267 emit_negmi(remainder,remainder); // .. remainder for div0 case (will be negated back after jump)
2268 emit_movs(d2,HOST_TEMPREG);
2269 emit_jeq(out+52); // Division by zero
2270 emit_negsmi(HOST_TEMPREG,HOST_TEMPREG);
2271#ifdef HAVE_ARMV5
2272 emit_clz(HOST_TEMPREG,quotient);
2273 emit_shl(HOST_TEMPREG,quotient,HOST_TEMPREG);
2274#else
2275 emit_movimm(0,quotient);
2276 emit_addpl_imm(quotient,1,quotient);
2277 emit_lslpls_imm(HOST_TEMPREG,1,HOST_TEMPREG);
2278 emit_jns(out-2*4);
2279#endif
2280 emit_orimm(quotient,1<<31,quotient);
2281 emit_shr(quotient,quotient,quotient);
2282 emit_cmp(remainder,HOST_TEMPREG);
2283 emit_subcs(remainder,HOST_TEMPREG,remainder);
2284 emit_adcs(quotient,quotient,quotient);
2285 emit_shrimm(HOST_TEMPREG,1,HOST_TEMPREG);
2286 emit_jcc(out-16); // -4
2287 emit_teq(d1,d2);
2288 emit_negmi(quotient,quotient);
2289 emit_test(d1,d1);
2290 emit_negmi(remainder,remainder);
2291 }
2292 if(dops[i].opcode2==0x1B) // DIVU
2293 {
2294 signed char d1=get_reg(i_regs->regmap,dops[i].rs1); // dividend
2295 signed char d2=get_reg(i_regs->regmap,dops[i].rs2); // divisor
2296 assert(d1>=0);
2297 assert(d2>=0);
2298 signed char quotient=get_reg(i_regs->regmap,LOREG);
2299 signed char remainder=get_reg(i_regs->regmap,HIREG);
2300 assert(quotient>=0);
2301 assert(remainder>=0);
2302 emit_mov(d1,remainder);
2303 emit_movimm(0xffffffff,quotient); // div0 case
2304 emit_test(d2,d2);
2305 emit_jeq(out+40); // Division by zero
2306#ifdef HAVE_ARMV5
2307 emit_clz(d2,HOST_TEMPREG);
2308 emit_movimm(1<<31,quotient);
2309 emit_shl(d2,HOST_TEMPREG,d2);
2310#else
2311 emit_movimm(0,HOST_TEMPREG);
2312 emit_addpl_imm(HOST_TEMPREG,1,HOST_TEMPREG);
2313 emit_lslpls_imm(d2,1,d2);
2314 emit_jns(out-2*4);
2315 emit_movimm(1<<31,quotient);
2316#endif
2317 emit_shr(quotient,HOST_TEMPREG,quotient);
2318 emit_cmp(remainder,d2);
2319 emit_subcs(remainder,d2,remainder);
2320 emit_adcs(quotient,quotient,quotient);
2321 emit_shrcc_imm(d2,1,d2);
2322 emit_jcc(out-16); // -4
2323 }
2324 }
2325 else // 64-bit
2326 assert(0);
2327 }
2328 else
2329 {
2330 // Multiply by zero is zero.
2331 // MIPS does not have a divide by zero exception.
2332 // The result is undefined, we return zero.
2333 signed char hr=get_reg(i_regs->regmap,HIREG);
2334 signed char lr=get_reg(i_regs->regmap,LOREG);
2335 if(hr>=0) emit_zeroreg(hr);
2336 if(lr>=0) emit_zeroreg(lr);
2337 }
2338}
2339#define multdiv_assemble multdiv_assemble_arm
2340
2341static void do_jump_vaddr(int rs)
2342{
2343 emit_far_jump(jump_vaddr_reg[rs]);
2344}
2345
2346static void do_preload_rhash(int r) {
2347 // Don't need this for ARM. On x86, this puts the value 0xf8 into the
2348 // register. On ARM the hash can be done with a single instruction (below)
2349}
2350
2351static void do_preload_rhtbl(int ht) {
2352 emit_addimm(FP,(int)&mini_ht-(int)&dynarec_local,ht);
2353}
2354
2355static void do_rhash(int rs,int rh) {
2356 emit_andimm(rs,0xf8,rh);
2357}
2358
2359static void do_miniht_load(int ht,int rh) {
2360 assem_debug("ldr %s,[%s,%s]!\n",regname[rh],regname[ht],regname[rh]);
2361 output_w32(0xe7b00000|rd_rn_rm(rh,ht,rh));
2362}
2363
2364static void do_miniht_jump(int rs,int rh,int ht) {
2365 emit_cmp(rh,rs);
2366 emit_ldreq_indexed(ht,4,15);
2367 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
2368 if(rs!=7)
2369 emit_mov(rs,7);
2370 rs=7;
2371 #endif
2372 do_jump_vaddr(rs);
2373}
2374
2375static void do_miniht_insert(u_int return_address,int rt,int temp) {
2376 #ifndef HAVE_ARMV7
2377 emit_movimm(return_address,rt); // PC into link register
2378 add_to_linker(out,return_address,1);
2379 emit_pcreladdr(temp);
2380 emit_writeword(rt,&mini_ht[(return_address&0xFF)>>3][0]);
2381 emit_writeword(temp,&mini_ht[(return_address&0xFF)>>3][1]);
2382 #else
2383 emit_movw(return_address&0x0000FFFF,rt);
2384 add_to_linker(out,return_address,1);
2385 emit_pcreladdr(temp);
2386 emit_writeword(temp,&mini_ht[(return_address&0xFF)>>3][1]);
2387 emit_movt(return_address&0xFFFF0000,rt);
2388 emit_writeword(rt,&mini_ht[(return_address&0xFF)>>3][0]);
2389 #endif
2390}
2391
2392// CPU-architecture-specific initialization
2393static void arch_init(void)
2394{
2395 uintptr_t diff = (u_char *)&ndrc->tramp.f - (u_char *)&ndrc->tramp.ops - 8;
2396 struct tramp_insns *ops = ndrc->tramp.ops;
2397 size_t i;
2398 assert(!(diff & 3));
2399 assert(diff < 0x1000);
2400 start_tcache_write(ops, (u_char *)ops + sizeof(ndrc->tramp.ops));
2401 for (i = 0; i < ARRAY_SIZE(ndrc->tramp.ops); i++)
2402 ops[i].ldrpc = 0xe5900000 | rd_rn_rm(15,15,0) | diff; // ldr pc, [=val]
2403 end_tcache_write(ops, (u_char *)ops + sizeof(ndrc->tramp.ops));
2404}
2405
2406// vim:shiftwidth=2:expandtab