drc: try to support w^x platforms like iOS
[pcsx_rearmed.git] / libpcsxcore / new_dynarec / assem_arm.c
... / ...
CommitLineData
1/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2 * Mupen64plus/PCSX - assem_arm.c *
3 * Copyright (C) 2009-2011 Ari64 *
4 * Copyright (C) 2010-2011 GraÅžvydas "notaz" Ignotas *
5 * *
6 * This program is free software; you can redistribute it and/or modify *
7 * it under the terms of the GNU General Public License as published by *
8 * the Free Software Foundation; either version 2 of the License, or *
9 * (at your option) any later version. *
10 * *
11 * This program is distributed in the hope that it will be useful, *
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
14 * GNU General Public License for more details. *
15 * *
16 * You should have received a copy of the GNU General Public License *
17 * along with this program; if not, write to the *
18 * Free Software Foundation, Inc., *
19 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
20 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
21
22#include "../gte.h"
23#define FLAGLESS
24#include "../gte.h"
25#undef FLAGLESS
26#include "../gte_arm.h"
27#include "../gte_neon.h"
28#include "pcnt.h"
29#include "arm_features.h"
30
31#if !BASE_ADDR_FIXED
32char translation_cache[1 << TARGET_SIZE_2] __attribute__((aligned(4096)));
33#endif
34
35#ifndef __MACH__
36#define CALLER_SAVE_REGS 0x100f
37#else
38#define CALLER_SAVE_REGS 0x120f
39#endif
40
41#define unused __attribute__((unused))
42
43extern int cycle_count;
44extern int last_count;
45extern int pcaddr;
46extern int pending_exception;
47extern int branch_target;
48extern uint64_t readmem_dword;
49extern void *dynarec_local;
50extern u_int mini_ht[32][2];
51
52void indirect_jump_indexed();
53void indirect_jump();
54void do_interrupt();
55void jump_vaddr_r0();
56void jump_vaddr_r1();
57void jump_vaddr_r2();
58void jump_vaddr_r3();
59void jump_vaddr_r4();
60void jump_vaddr_r5();
61void jump_vaddr_r6();
62void jump_vaddr_r7();
63void jump_vaddr_r8();
64void jump_vaddr_r9();
65void jump_vaddr_r10();
66void jump_vaddr_r12();
67
68const u_int jump_vaddr_reg[16] = {
69 (int)jump_vaddr_r0,
70 (int)jump_vaddr_r1,
71 (int)jump_vaddr_r2,
72 (int)jump_vaddr_r3,
73 (int)jump_vaddr_r4,
74 (int)jump_vaddr_r5,
75 (int)jump_vaddr_r6,
76 (int)jump_vaddr_r7,
77 (int)jump_vaddr_r8,
78 (int)jump_vaddr_r9,
79 (int)jump_vaddr_r10,
80 0,
81 (int)jump_vaddr_r12,
82 0,
83 0,
84 0};
85
86void invalidate_addr_r0();
87void invalidate_addr_r1();
88void invalidate_addr_r2();
89void invalidate_addr_r3();
90void invalidate_addr_r4();
91void invalidate_addr_r5();
92void invalidate_addr_r6();
93void invalidate_addr_r7();
94void invalidate_addr_r8();
95void invalidate_addr_r9();
96void invalidate_addr_r10();
97void invalidate_addr_r12();
98
99const u_int invalidate_addr_reg[16] = {
100 (int)invalidate_addr_r0,
101 (int)invalidate_addr_r1,
102 (int)invalidate_addr_r2,
103 (int)invalidate_addr_r3,
104 (int)invalidate_addr_r4,
105 (int)invalidate_addr_r5,
106 (int)invalidate_addr_r6,
107 (int)invalidate_addr_r7,
108 (int)invalidate_addr_r8,
109 (int)invalidate_addr_r9,
110 (int)invalidate_addr_r10,
111 0,
112 (int)invalidate_addr_r12,
113 0,
114 0,
115 0};
116
117static u_int needs_clear_cache[1<<(TARGET_SIZE_2-17)];
118
119/* Linker */
120
121static void set_jump_target(int addr,u_int target)
122{
123 u_char *ptr=(u_char *)addr;
124 u_int *ptr2=(u_int *)ptr;
125 if(ptr[3]==0xe2) {
126 assert((target-(u_int)ptr2-8)<1024);
127 assert((addr&3)==0);
128 assert((target&3)==0);
129 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
130 //printf("target=%x addr=%x insn=%x\n",target,addr,*ptr2);
131 }
132 else if(ptr[3]==0x72) {
133 // generated by emit_jno_unlikely
134 if((target-(u_int)ptr2-8)<1024) {
135 assert((addr&3)==0);
136 assert((target&3)==0);
137 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
138 }
139 else if((target-(u_int)ptr2-8)<4096&&!((target-(u_int)ptr2-8)&15)) {
140 assert((addr&3)==0);
141 assert((target&3)==0);
142 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>4)|0xE00;
143 }
144 else *ptr2=(0x7A000000)|(((target-(u_int)ptr2-8)<<6)>>8);
145 }
146 else {
147 assert((ptr[3]&0x0e)==0xa);
148 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
149 }
150}
151
152// This optionally copies the instruction from the target of the branch into
153// the space before the branch. Works, but the difference in speed is
154// usually insignificant.
155#if 0
156static void set_jump_target_fillslot(int addr,u_int target,int copy)
157{
158 u_char *ptr=(u_char *)addr;
159 u_int *ptr2=(u_int *)ptr;
160 assert(!copy||ptr2[-1]==0xe28dd000);
161 if(ptr[3]==0xe2) {
162 assert(!copy);
163 assert((target-(u_int)ptr2-8)<4096);
164 *ptr2=(*ptr2&0xFFFFF000)|(target-(u_int)ptr2-8);
165 }
166 else {
167 assert((ptr[3]&0x0e)==0xa);
168 u_int target_insn=*(u_int *)target;
169 if((target_insn&0x0e100000)==0) { // ALU, no immediate, no flags
170 copy=0;
171 }
172 if((target_insn&0x0c100000)==0x04100000) { // Load
173 copy=0;
174 }
175 if(target_insn&0x08000000) {
176 copy=0;
177 }
178 if(copy) {
179 ptr2[-1]=target_insn;
180 target+=4;
181 }
182 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
183 }
184}
185#endif
186
187/* Literal pool */
188static void add_literal(int addr,int val)
189{
190 assert(literalcount<sizeof(literals)/sizeof(literals[0]));
191 literals[literalcount][0]=addr;
192 literals[literalcount][1]=val;
193 literalcount++;
194}
195
196// from a pointer to external jump stub (which was produced by emit_extjump2)
197// find where the jumping insn is
198static void *find_extjump_insn(void *stub)
199{
200 int *ptr=(int *)(stub+4);
201 assert((*ptr&0x0fff0000)==0x059f0000); // ldr rx, [pc, #ofs]
202 u_int offset=*ptr&0xfff;
203 void **l_ptr=(void *)ptr+offset+8;
204 return *l_ptr;
205}
206
207// find where external branch is liked to using addr of it's stub:
208// get address that insn one after stub loads (dyna_linker arg1),
209// treat it as a pointer to branch insn,
210// return addr where that branch jumps to
211static int get_pointer(void *stub)
212{
213 //printf("get_pointer(%x)\n",(int)stub);
214 int *i_ptr=find_extjump_insn(stub);
215 assert((*i_ptr&0x0f000000)==0x0a000000);
216 return (int)i_ptr+((*i_ptr<<8)>>6)+8;
217}
218
219// Find the "clean" entry point from a "dirty" entry point
220// by skipping past the call to verify_code
221static u_int get_clean_addr(int addr)
222{
223 int *ptr=(int *)addr;
224 #ifndef HAVE_ARMV7
225 ptr+=4;
226 #else
227 ptr+=6;
228 #endif
229 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
230 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
231 ptr++;
232 if((*ptr&0xFF000000)==0xea000000) {
233 return (int)ptr+((*ptr<<8)>>6)+8; // follow jump
234 }
235 return (u_int)ptr;
236}
237
238static int verify_dirty(u_int *ptr)
239{
240 #ifndef HAVE_ARMV7
241 // get from literal pool
242 assert((*ptr&0xFFFF0000)==0xe59f0000);
243 u_int offset=*ptr&0xfff;
244 u_int *l_ptr=(void *)ptr+offset+8;
245 u_int source=l_ptr[0];
246 u_int copy=l_ptr[1];
247 u_int len=l_ptr[2];
248 ptr+=4;
249 #else
250 // ARMv7 movw/movt
251 assert((*ptr&0xFFF00000)==0xe3000000);
252 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
253 u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
254 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
255 ptr+=6;
256 #endif
257 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
258 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
259 //printf("verify_dirty: %x %x %x\n",source,copy,len);
260 return !memcmp((void *)source,(void *)copy,len);
261}
262
263// This doesn't necessarily find all clean entry points, just
264// guarantees that it's not dirty
265static int isclean(int addr)
266{
267 #ifndef HAVE_ARMV7
268 u_int *ptr=((u_int *)addr)+4;
269 #else
270 u_int *ptr=((u_int *)addr)+6;
271 #endif
272 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
273 if((*ptr&0xFF000000)!=0xeb000000) return 1; // bl instruction
274 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code) return 0;
275 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_vm) return 0;
276 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_ds) return 0;
277 return 1;
278}
279
280// get source that block at addr was compiled from (host pointers)
281static void get_bounds(int addr,u_int *start,u_int *end)
282{
283 u_int *ptr=(u_int *)addr;
284 #ifndef HAVE_ARMV7
285 // get from literal pool
286 assert((*ptr&0xFFFF0000)==0xe59f0000);
287 u_int offset=*ptr&0xfff;
288 u_int *l_ptr=(void *)ptr+offset+8;
289 u_int source=l_ptr[0];
290 //u_int copy=l_ptr[1];
291 u_int len=l_ptr[2];
292 ptr+=4;
293 #else
294 // ARMv7 movw/movt
295 assert((*ptr&0xFFF00000)==0xe3000000);
296 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
297 //u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
298 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
299 ptr+=6;
300 #endif
301 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
302 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
303 *start=source;
304 *end=source+len;
305}
306
307/* Register allocation */
308
309// Note: registers are allocated clean (unmodified state)
310// if you intend to modify the register, you must call dirty_reg().
311static void alloc_reg(struct regstat *cur,int i,signed char reg)
312{
313 int r,hr;
314 int preferred_reg = (reg&7);
315 if(reg==CCREG) preferred_reg=HOST_CCREG;
316 if(reg==PTEMP||reg==FTEMP) preferred_reg=12;
317
318 // Don't allocate unused registers
319 if((cur->u>>reg)&1) return;
320
321 // see if it's already allocated
322 for(hr=0;hr<HOST_REGS;hr++)
323 {
324 if(cur->regmap[hr]==reg) return;
325 }
326
327 // Keep the same mapping if the register was already allocated in a loop
328 preferred_reg = loop_reg(i,reg,preferred_reg);
329
330 // Try to allocate the preferred register
331 if(cur->regmap[preferred_reg]==-1) {
332 cur->regmap[preferred_reg]=reg;
333 cur->dirty&=~(1<<preferred_reg);
334 cur->isconst&=~(1<<preferred_reg);
335 return;
336 }
337 r=cur->regmap[preferred_reg];
338 if(r<64&&((cur->u>>r)&1)) {
339 cur->regmap[preferred_reg]=reg;
340 cur->dirty&=~(1<<preferred_reg);
341 cur->isconst&=~(1<<preferred_reg);
342 return;
343 }
344 if(r>=64&&((cur->uu>>(r&63))&1)) {
345 cur->regmap[preferred_reg]=reg;
346 cur->dirty&=~(1<<preferred_reg);
347 cur->isconst&=~(1<<preferred_reg);
348 return;
349 }
350
351 // Clear any unneeded registers
352 // We try to keep the mapping consistent, if possible, because it
353 // makes branches easier (especially loops). So we try to allocate
354 // first (see above) before removing old mappings. If this is not
355 // possible then go ahead and clear out the registers that are no
356 // longer needed.
357 for(hr=0;hr<HOST_REGS;hr++)
358 {
359 r=cur->regmap[hr];
360 if(r>=0) {
361 if(r<64) {
362 if((cur->u>>r)&1) {cur->regmap[hr]=-1;break;}
363 }
364 else
365 {
366 if((cur->uu>>(r&63))&1) {cur->regmap[hr]=-1;break;}
367 }
368 }
369 }
370 // Try to allocate any available register, but prefer
371 // registers that have not been used recently.
372 if(i>0) {
373 for(hr=0;hr<HOST_REGS;hr++) {
374 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
375 if(regs[i-1].regmap[hr]!=rs1[i-1]&&regs[i-1].regmap[hr]!=rs2[i-1]&&regs[i-1].regmap[hr]!=rt1[i-1]&&regs[i-1].regmap[hr]!=rt2[i-1]) {
376 cur->regmap[hr]=reg;
377 cur->dirty&=~(1<<hr);
378 cur->isconst&=~(1<<hr);
379 return;
380 }
381 }
382 }
383 }
384 // Try to allocate any available register
385 for(hr=0;hr<HOST_REGS;hr++) {
386 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
387 cur->regmap[hr]=reg;
388 cur->dirty&=~(1<<hr);
389 cur->isconst&=~(1<<hr);
390 return;
391 }
392 }
393
394 // Ok, now we have to evict someone
395 // Pick a register we hopefully won't need soon
396 u_char hsn[MAXREG+1];
397 memset(hsn,10,sizeof(hsn));
398 int j;
399 lsn(hsn,i,&preferred_reg);
400 //printf("eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",cur->regmap[0],cur->regmap[1],cur->regmap[2],cur->regmap[3],cur->regmap[5],cur->regmap[6],cur->regmap[7]);
401 //printf("hsn(%x): %d %d %d %d %d %d %d\n",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
402 if(i>0) {
403 // Don't evict the cycle count at entry points, otherwise the entry
404 // stub will have to write it.
405 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
406 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
407 for(j=10;j>=3;j--)
408 {
409 // Alloc preferred register if available
410 if(hsn[r=cur->regmap[preferred_reg]&63]==j) {
411 for(hr=0;hr<HOST_REGS;hr++) {
412 // Evict both parts of a 64-bit register
413 if((cur->regmap[hr]&63)==r) {
414 cur->regmap[hr]=-1;
415 cur->dirty&=~(1<<hr);
416 cur->isconst&=~(1<<hr);
417 }
418 }
419 cur->regmap[preferred_reg]=reg;
420 return;
421 }
422 for(r=1;r<=MAXREG;r++)
423 {
424 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
425 for(hr=0;hr<HOST_REGS;hr++) {
426 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
427 if(cur->regmap[hr]==r+64) {
428 cur->regmap[hr]=reg;
429 cur->dirty&=~(1<<hr);
430 cur->isconst&=~(1<<hr);
431 return;
432 }
433 }
434 }
435 for(hr=0;hr<HOST_REGS;hr++) {
436 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
437 if(cur->regmap[hr]==r) {
438 cur->regmap[hr]=reg;
439 cur->dirty&=~(1<<hr);
440 cur->isconst&=~(1<<hr);
441 return;
442 }
443 }
444 }
445 }
446 }
447 }
448 }
449 for(j=10;j>=0;j--)
450 {
451 for(r=1;r<=MAXREG;r++)
452 {
453 if(hsn[r]==j) {
454 for(hr=0;hr<HOST_REGS;hr++) {
455 if(cur->regmap[hr]==r+64) {
456 cur->regmap[hr]=reg;
457 cur->dirty&=~(1<<hr);
458 cur->isconst&=~(1<<hr);
459 return;
460 }
461 }
462 for(hr=0;hr<HOST_REGS;hr++) {
463 if(cur->regmap[hr]==r) {
464 cur->regmap[hr]=reg;
465 cur->dirty&=~(1<<hr);
466 cur->isconst&=~(1<<hr);
467 return;
468 }
469 }
470 }
471 }
472 }
473 SysPrintf("This shouldn't happen (alloc_reg)");exit(1);
474}
475
476static void alloc_reg64(struct regstat *cur,int i,signed char reg)
477{
478 int preferred_reg = 8+(reg&1);
479 int r,hr;
480
481 // allocate the lower 32 bits
482 alloc_reg(cur,i,reg);
483
484 // Don't allocate unused registers
485 if((cur->uu>>reg)&1) return;
486
487 // see if the upper half is already allocated
488 for(hr=0;hr<HOST_REGS;hr++)
489 {
490 if(cur->regmap[hr]==reg+64) return;
491 }
492
493 // Keep the same mapping if the register was already allocated in a loop
494 preferred_reg = loop_reg(i,reg,preferred_reg);
495
496 // Try to allocate the preferred register
497 if(cur->regmap[preferred_reg]==-1) {
498 cur->regmap[preferred_reg]=reg|64;
499 cur->dirty&=~(1<<preferred_reg);
500 cur->isconst&=~(1<<preferred_reg);
501 return;
502 }
503 r=cur->regmap[preferred_reg];
504 if(r<64&&((cur->u>>r)&1)) {
505 cur->regmap[preferred_reg]=reg|64;
506 cur->dirty&=~(1<<preferred_reg);
507 cur->isconst&=~(1<<preferred_reg);
508 return;
509 }
510 if(r>=64&&((cur->uu>>(r&63))&1)) {
511 cur->regmap[preferred_reg]=reg|64;
512 cur->dirty&=~(1<<preferred_reg);
513 cur->isconst&=~(1<<preferred_reg);
514 return;
515 }
516
517 // Clear any unneeded registers
518 // We try to keep the mapping consistent, if possible, because it
519 // makes branches easier (especially loops). So we try to allocate
520 // first (see above) before removing old mappings. If this is not
521 // possible then go ahead and clear out the registers that are no
522 // longer needed.
523 for(hr=HOST_REGS-1;hr>=0;hr--)
524 {
525 r=cur->regmap[hr];
526 if(r>=0) {
527 if(r<64) {
528 if((cur->u>>r)&1) {cur->regmap[hr]=-1;break;}
529 }
530 else
531 {
532 if((cur->uu>>(r&63))&1) {cur->regmap[hr]=-1;break;}
533 }
534 }
535 }
536 // Try to allocate any available register, but prefer
537 // registers that have not been used recently.
538 if(i>0) {
539 for(hr=0;hr<HOST_REGS;hr++) {
540 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
541 if(regs[i-1].regmap[hr]!=rs1[i-1]&&regs[i-1].regmap[hr]!=rs2[i-1]&&regs[i-1].regmap[hr]!=rt1[i-1]&&regs[i-1].regmap[hr]!=rt2[i-1]) {
542 cur->regmap[hr]=reg|64;
543 cur->dirty&=~(1<<hr);
544 cur->isconst&=~(1<<hr);
545 return;
546 }
547 }
548 }
549 }
550 // Try to allocate any available register
551 for(hr=0;hr<HOST_REGS;hr++) {
552 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
553 cur->regmap[hr]=reg|64;
554 cur->dirty&=~(1<<hr);
555 cur->isconst&=~(1<<hr);
556 return;
557 }
558 }
559
560 // Ok, now we have to evict someone
561 // Pick a register we hopefully won't need soon
562 u_char hsn[MAXREG+1];
563 memset(hsn,10,sizeof(hsn));
564 int j;
565 lsn(hsn,i,&preferred_reg);
566 //printf("eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",cur->regmap[0],cur->regmap[1],cur->regmap[2],cur->regmap[3],cur->regmap[5],cur->regmap[6],cur->regmap[7]);
567 //printf("hsn(%x): %d %d %d %d %d %d %d\n",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
568 if(i>0) {
569 // Don't evict the cycle count at entry points, otherwise the entry
570 // stub will have to write it.
571 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
572 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
573 for(j=10;j>=3;j--)
574 {
575 // Alloc preferred register if available
576 if(hsn[r=cur->regmap[preferred_reg]&63]==j) {
577 for(hr=0;hr<HOST_REGS;hr++) {
578 // Evict both parts of a 64-bit register
579 if((cur->regmap[hr]&63)==r) {
580 cur->regmap[hr]=-1;
581 cur->dirty&=~(1<<hr);
582 cur->isconst&=~(1<<hr);
583 }
584 }
585 cur->regmap[preferred_reg]=reg|64;
586 return;
587 }
588 for(r=1;r<=MAXREG;r++)
589 {
590 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
591 for(hr=0;hr<HOST_REGS;hr++) {
592 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
593 if(cur->regmap[hr]==r+64) {
594 cur->regmap[hr]=reg|64;
595 cur->dirty&=~(1<<hr);
596 cur->isconst&=~(1<<hr);
597 return;
598 }
599 }
600 }
601 for(hr=0;hr<HOST_REGS;hr++) {
602 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
603 if(cur->regmap[hr]==r) {
604 cur->regmap[hr]=reg|64;
605 cur->dirty&=~(1<<hr);
606 cur->isconst&=~(1<<hr);
607 return;
608 }
609 }
610 }
611 }
612 }
613 }
614 }
615 for(j=10;j>=0;j--)
616 {
617 for(r=1;r<=MAXREG;r++)
618 {
619 if(hsn[r]==j) {
620 for(hr=0;hr<HOST_REGS;hr++) {
621 if(cur->regmap[hr]==r+64) {
622 cur->regmap[hr]=reg|64;
623 cur->dirty&=~(1<<hr);
624 cur->isconst&=~(1<<hr);
625 return;
626 }
627 }
628 for(hr=0;hr<HOST_REGS;hr++) {
629 if(cur->regmap[hr]==r) {
630 cur->regmap[hr]=reg|64;
631 cur->dirty&=~(1<<hr);
632 cur->isconst&=~(1<<hr);
633 return;
634 }
635 }
636 }
637 }
638 }
639 SysPrintf("This shouldn't happen");exit(1);
640}
641
642// Allocate a temporary register. This is done without regard to
643// dirty status or whether the register we request is on the unneeded list
644// Note: This will only allocate one register, even if called multiple times
645static void alloc_reg_temp(struct regstat *cur,int i,signed char reg)
646{
647 int r,hr;
648 int preferred_reg = -1;
649
650 // see if it's already allocated
651 for(hr=0;hr<HOST_REGS;hr++)
652 {
653 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==reg) return;
654 }
655
656 // Try to allocate any available register
657 for(hr=HOST_REGS-1;hr>=0;hr--) {
658 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
659 cur->regmap[hr]=reg;
660 cur->dirty&=~(1<<hr);
661 cur->isconst&=~(1<<hr);
662 return;
663 }
664 }
665
666 // Find an unneeded register
667 for(hr=HOST_REGS-1;hr>=0;hr--)
668 {
669 r=cur->regmap[hr];
670 if(r>=0) {
671 if(r<64) {
672 if((cur->u>>r)&1) {
673 if(i==0||((unneeded_reg[i-1]>>r)&1)) {
674 cur->regmap[hr]=reg;
675 cur->dirty&=~(1<<hr);
676 cur->isconst&=~(1<<hr);
677 return;
678 }
679 }
680 }
681 else
682 {
683 if((cur->uu>>(r&63))&1) {
684 if(i==0||((unneeded_reg_upper[i-1]>>(r&63))&1)) {
685 cur->regmap[hr]=reg;
686 cur->dirty&=~(1<<hr);
687 cur->isconst&=~(1<<hr);
688 return;
689 }
690 }
691 }
692 }
693 }
694
695 // Ok, now we have to evict someone
696 // Pick a register we hopefully won't need soon
697 // TODO: we might want to follow unconditional jumps here
698 // TODO: get rid of dupe code and make this into a function
699 u_char hsn[MAXREG+1];
700 memset(hsn,10,sizeof(hsn));
701 int j;
702 lsn(hsn,i,&preferred_reg);
703 //printf("hsn: %d %d %d %d %d %d %d\n",hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
704 if(i>0) {
705 // Don't evict the cycle count at entry points, otherwise the entry
706 // stub will have to write it.
707 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
708 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
709 for(j=10;j>=3;j--)
710 {
711 for(r=1;r<=MAXREG;r++)
712 {
713 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
714 for(hr=0;hr<HOST_REGS;hr++) {
715 if(hr!=HOST_CCREG||hsn[CCREG]>2) {
716 if(cur->regmap[hr]==r+64) {
717 cur->regmap[hr]=reg;
718 cur->dirty&=~(1<<hr);
719 cur->isconst&=~(1<<hr);
720 return;
721 }
722 }
723 }
724 for(hr=0;hr<HOST_REGS;hr++) {
725 if(hr!=HOST_CCREG||hsn[CCREG]>2) {
726 if(cur->regmap[hr]==r) {
727 cur->regmap[hr]=reg;
728 cur->dirty&=~(1<<hr);
729 cur->isconst&=~(1<<hr);
730 return;
731 }
732 }
733 }
734 }
735 }
736 }
737 }
738 for(j=10;j>=0;j--)
739 {
740 for(r=1;r<=MAXREG;r++)
741 {
742 if(hsn[r]==j) {
743 for(hr=0;hr<HOST_REGS;hr++) {
744 if(cur->regmap[hr]==r+64) {
745 cur->regmap[hr]=reg;
746 cur->dirty&=~(1<<hr);
747 cur->isconst&=~(1<<hr);
748 return;
749 }
750 }
751 for(hr=0;hr<HOST_REGS;hr++) {
752 if(cur->regmap[hr]==r) {
753 cur->regmap[hr]=reg;
754 cur->dirty&=~(1<<hr);
755 cur->isconst&=~(1<<hr);
756 return;
757 }
758 }
759 }
760 }
761 }
762 SysPrintf("This shouldn't happen");exit(1);
763}
764
765// Allocate a specific ARM register.
766static void alloc_arm_reg(struct regstat *cur,int i,signed char reg,int hr)
767{
768 int n;
769 int dirty=0;
770
771 // see if it's already allocated (and dealloc it)
772 for(n=0;n<HOST_REGS;n++)
773 {
774 if(n!=EXCLUDE_REG&&cur->regmap[n]==reg) {
775 dirty=(cur->dirty>>n)&1;
776 cur->regmap[n]=-1;
777 }
778 }
779
780 cur->regmap[hr]=reg;
781 cur->dirty&=~(1<<hr);
782 cur->dirty|=dirty<<hr;
783 cur->isconst&=~(1<<hr);
784}
785
786// Alloc cycle count into dedicated register
787static void alloc_cc(struct regstat *cur,int i)
788{
789 alloc_arm_reg(cur,i,CCREG,HOST_CCREG);
790}
791
792/* Special alloc */
793
794
795/* Assembler */
796
797static unused char regname[16][4] = {
798 "r0",
799 "r1",
800 "r2",
801 "r3",
802 "r4",
803 "r5",
804 "r6",
805 "r7",
806 "r8",
807 "r9",
808 "r10",
809 "fp",
810 "r12",
811 "sp",
812 "lr",
813 "pc"};
814
815static void output_w32(u_int word)
816{
817 *((u_int *)out)=word;
818 out+=4;
819}
820
821static u_int rd_rn_rm(u_int rd, u_int rn, u_int rm)
822{
823 assert(rd<16);
824 assert(rn<16);
825 assert(rm<16);
826 return((rn<<16)|(rd<<12)|rm);
827}
828
829static u_int rd_rn_imm_shift(u_int rd, u_int rn, u_int imm, u_int shift)
830{
831 assert(rd<16);
832 assert(rn<16);
833 assert(imm<256);
834 assert((shift&1)==0);
835 return((rn<<16)|(rd<<12)|(((32-shift)&30)<<7)|imm);
836}
837
838static u_int genimm(u_int imm,u_int *encoded)
839{
840 *encoded=0;
841 if(imm==0) return 1;
842 int i=32;
843 while(i>0)
844 {
845 if(imm<256) {
846 *encoded=((i&30)<<7)|imm;
847 return 1;
848 }
849 imm=(imm>>2)|(imm<<30);i-=2;
850 }
851 return 0;
852}
853
854static void genimm_checked(u_int imm,u_int *encoded)
855{
856 u_int ret=genimm(imm,encoded);
857 assert(ret);
858 (void)ret;
859}
860
861static u_int genjmp(u_int addr)
862{
863 int offset=addr-(int)out-8;
864 if(offset<-33554432||offset>=33554432) {
865 if (addr>2) {
866 SysPrintf("genjmp: out of range: %08x\n", offset);
867 exit(1);
868 }
869 return 0;
870 }
871 return ((u_int)offset>>2)&0xffffff;
872}
873
874static void emit_mov(int rs,int rt)
875{
876 assem_debug("mov %s,%s\n",regname[rt],regname[rs]);
877 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs));
878}
879
880static void emit_movs(int rs,int rt)
881{
882 assem_debug("movs %s,%s\n",regname[rt],regname[rs]);
883 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs));
884}
885
886static void emit_add(int rs1,int rs2,int rt)
887{
888 assem_debug("add %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
889 output_w32(0xe0800000|rd_rn_rm(rt,rs1,rs2));
890}
891
892static void emit_adds(int rs1,int rs2,int rt)
893{
894 assem_debug("adds %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
895 output_w32(0xe0900000|rd_rn_rm(rt,rs1,rs2));
896}
897
898static void emit_adcs(int rs1,int rs2,int rt)
899{
900 assem_debug("adcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
901 output_w32(0xe0b00000|rd_rn_rm(rt,rs1,rs2));
902}
903
904static void emit_sbc(int rs1,int rs2,int rt)
905{
906 assem_debug("sbc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
907 output_w32(0xe0c00000|rd_rn_rm(rt,rs1,rs2));
908}
909
910static void emit_sbcs(int rs1,int rs2,int rt)
911{
912 assem_debug("sbcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
913 output_w32(0xe0d00000|rd_rn_rm(rt,rs1,rs2));
914}
915
916static void emit_neg(int rs, int rt)
917{
918 assem_debug("rsb %s,%s,#0\n",regname[rt],regname[rs]);
919 output_w32(0xe2600000|rd_rn_rm(rt,rs,0));
920}
921
922static void emit_negs(int rs, int rt)
923{
924 assem_debug("rsbs %s,%s,#0\n",regname[rt],regname[rs]);
925 output_w32(0xe2700000|rd_rn_rm(rt,rs,0));
926}
927
928static void emit_sub(int rs1,int rs2,int rt)
929{
930 assem_debug("sub %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
931 output_w32(0xe0400000|rd_rn_rm(rt,rs1,rs2));
932}
933
934static void emit_subs(int rs1,int rs2,int rt)
935{
936 assem_debug("subs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
937 output_w32(0xe0500000|rd_rn_rm(rt,rs1,rs2));
938}
939
940static void emit_zeroreg(int rt)
941{
942 assem_debug("mov %s,#0\n",regname[rt]);
943 output_w32(0xe3a00000|rd_rn_rm(rt,0,0));
944}
945
946static void emit_loadlp(u_int imm,u_int rt)
947{
948 add_literal((int)out,imm);
949 assem_debug("ldr %s,pc+? [=%x]\n",regname[rt],imm);
950 output_w32(0xe5900000|rd_rn_rm(rt,15,0));
951}
952
953static void emit_movw(u_int imm,u_int rt)
954{
955 assert(imm<65536);
956 assem_debug("movw %s,#%d (0x%x)\n",regname[rt],imm,imm);
957 output_w32(0xe3000000|rd_rn_rm(rt,0,0)|(imm&0xfff)|((imm<<4)&0xf0000));
958}
959
960static void emit_movt(u_int imm,u_int rt)
961{
962 assem_debug("movt %s,#%d (0x%x)\n",regname[rt],imm&0xffff0000,imm&0xffff0000);
963 output_w32(0xe3400000|rd_rn_rm(rt,0,0)|((imm>>16)&0xfff)|((imm>>12)&0xf0000));
964}
965
966static void emit_movimm(u_int imm,u_int rt)
967{
968 u_int armval;
969 if(genimm(imm,&armval)) {
970 assem_debug("mov %s,#%d\n",regname[rt],imm);
971 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
972 }else if(genimm(~imm,&armval)) {
973 assem_debug("mvn %s,#%d\n",regname[rt],imm);
974 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
975 }else if(imm<65536) {
976 #ifndef HAVE_ARMV7
977 assem_debug("mov %s,#%d\n",regname[rt],imm&0xFF00);
978 output_w32(0xe3a00000|rd_rn_imm_shift(rt,0,imm>>8,8));
979 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
980 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
981 #else
982 emit_movw(imm,rt);
983 #endif
984 }else{
985 #ifndef HAVE_ARMV7
986 emit_loadlp(imm,rt);
987 #else
988 emit_movw(imm&0x0000FFFF,rt);
989 emit_movt(imm&0xFFFF0000,rt);
990 #endif
991 }
992}
993
994static void emit_pcreladdr(u_int rt)
995{
996 assem_debug("add %s,pc,#?\n",regname[rt]);
997 output_w32(0xe2800000|rd_rn_rm(rt,15,0));
998}
999
1000static void emit_loadreg(int r, int hr)
1001{
1002 if(r&64) {
1003 SysPrintf("64bit load in 32bit mode!\n");
1004 assert(0);
1005 return;
1006 }
1007 if((r&63)==0)
1008 emit_zeroreg(hr);
1009 else {
1010 int addr=((int)reg)+((r&63)<<REG_SHIFT)+((r&64)>>4);
1011 if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4);
1012 if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4);
1013 if(r==CCREG) addr=(int)&cycle_count;
1014 if(r==CSREG) addr=(int)&Status;
1015 if(r==FSREG) addr=(int)&FCR31;
1016 if(r==INVCP) addr=(int)&invc_ptr;
1017 u_int offset = addr-(u_int)&dynarec_local;
1018 assert(offset<4096);
1019 assem_debug("ldr %s,fp+%d\n",regname[hr],offset);
1020 output_w32(0xe5900000|rd_rn_rm(hr,FP,0)|offset);
1021 }
1022}
1023
1024static void emit_storereg(int r, int hr)
1025{
1026 if(r&64) {
1027 SysPrintf("64bit store in 32bit mode!\n");
1028 assert(0);
1029 return;
1030 }
1031 int addr=((int)reg)+((r&63)<<REG_SHIFT)+((r&64)>>4);
1032 if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4);
1033 if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4);
1034 if(r==CCREG) addr=(int)&cycle_count;
1035 if(r==FSREG) addr=(int)&FCR31;
1036 u_int offset = addr-(u_int)&dynarec_local;
1037 assert(offset<4096);
1038 assem_debug("str %s,fp+%d\n",regname[hr],offset);
1039 output_w32(0xe5800000|rd_rn_rm(hr,FP,0)|offset);
1040}
1041
1042static void emit_test(int rs, int rt)
1043{
1044 assem_debug("tst %s,%s\n",regname[rs],regname[rt]);
1045 output_w32(0xe1100000|rd_rn_rm(0,rs,rt));
1046}
1047
1048static void emit_testimm(int rs,int imm)
1049{
1050 u_int armval;
1051 assem_debug("tst %s,#%d\n",regname[rs],imm);
1052 genimm_checked(imm,&armval);
1053 output_w32(0xe3100000|rd_rn_rm(0,rs,0)|armval);
1054}
1055
1056static void emit_testeqimm(int rs,int imm)
1057{
1058 u_int armval;
1059 assem_debug("tsteq %s,$%d\n",regname[rs],imm);
1060 genimm_checked(imm,&armval);
1061 output_w32(0x03100000|rd_rn_rm(0,rs,0)|armval);
1062}
1063
1064static void emit_not(int rs,int rt)
1065{
1066 assem_debug("mvn %s,%s\n",regname[rt],regname[rs]);
1067 output_w32(0xe1e00000|rd_rn_rm(rt,0,rs));
1068}
1069
1070static void emit_mvnmi(int rs,int rt)
1071{
1072 assem_debug("mvnmi %s,%s\n",regname[rt],regname[rs]);
1073 output_w32(0x41e00000|rd_rn_rm(rt,0,rs));
1074}
1075
1076static void emit_and(u_int rs1,u_int rs2,u_int rt)
1077{
1078 assem_debug("and %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1079 output_w32(0xe0000000|rd_rn_rm(rt,rs1,rs2));
1080}
1081
1082static void emit_or(u_int rs1,u_int rs2,u_int rt)
1083{
1084 assem_debug("orr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1085 output_w32(0xe1800000|rd_rn_rm(rt,rs1,rs2));
1086}
1087
1088static void emit_or_and_set_flags(int rs1,int rs2,int rt)
1089{
1090 assem_debug("orrs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1091 output_w32(0xe1900000|rd_rn_rm(rt,rs1,rs2));
1092}
1093
1094static void emit_orrshl_imm(u_int rs,u_int imm,u_int rt)
1095{
1096 assert(rs<16);
1097 assert(rt<16);
1098 assert(imm<32);
1099 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs],imm);
1100 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|(imm<<7));
1101}
1102
1103static void emit_orrshr_imm(u_int rs,u_int imm,u_int rt)
1104{
1105 assert(rs<16);
1106 assert(rt<16);
1107 assert(imm<32);
1108 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs],imm);
1109 output_w32(0xe1800020|rd_rn_rm(rt,rt,rs)|(imm<<7));
1110}
1111
1112static void emit_xor(u_int rs1,u_int rs2,u_int rt)
1113{
1114 assem_debug("eor %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1115 output_w32(0xe0200000|rd_rn_rm(rt,rs1,rs2));
1116}
1117
1118static void emit_addimm(u_int rs,int imm,u_int rt)
1119{
1120 assert(rs<16);
1121 assert(rt<16);
1122 if(imm!=0) {
1123 u_int armval;
1124 if(genimm(imm,&armval)) {
1125 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],imm);
1126 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
1127 }else if(genimm(-imm,&armval)) {
1128 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],-imm);
1129 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
1130 #ifdef HAVE_ARMV7
1131 }else if(rt!=rs&&(u_int)imm<65536) {
1132 emit_movw(imm&0x0000ffff,rt);
1133 emit_add(rs,rt,rt);
1134 }else if(rt!=rs&&(u_int)-imm<65536) {
1135 emit_movw(-imm&0x0000ffff,rt);
1136 emit_sub(rs,rt,rt);
1137 #endif
1138 }else if((u_int)-imm<65536) {
1139 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],(-imm)&0xFF00);
1140 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
1141 output_w32(0xe2400000|rd_rn_imm_shift(rt,rs,(-imm)>>8,8));
1142 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
1143 }else {
1144 do {
1145 int shift = (ffs(imm) - 1) & ~1;
1146 int imm8 = imm & (0xff << shift);
1147 genimm_checked(imm8,&armval);
1148 assem_debug("add %s,%s,#0x%x\n",regname[rt],regname[rs],imm8);
1149 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
1150 rs = rt;
1151 imm &= ~imm8;
1152 }
1153 while (imm != 0);
1154 }
1155 }
1156 else if(rs!=rt) emit_mov(rs,rt);
1157}
1158
1159static void emit_addimm_and_set_flags(int imm,int rt)
1160{
1161 assert(imm>-65536&&imm<65536);
1162 u_int armval;
1163 if(genimm(imm,&armval)) {
1164 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm);
1165 output_w32(0xe2900000|rd_rn_rm(rt,rt,0)|armval);
1166 }else if(genimm(-imm,&armval)) {
1167 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],imm);
1168 output_w32(0xe2500000|rd_rn_rm(rt,rt,0)|armval);
1169 }else if(imm<0) {
1170 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF00);
1171 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
1172 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)>>8,8));
1173 output_w32(0xe2500000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
1174 }else{
1175 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF00);
1176 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
1177 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm>>8,8));
1178 output_w32(0xe2900000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1179 }
1180}
1181
1182static void emit_addimm_no_flags(u_int imm,u_int rt)
1183{
1184 emit_addimm(rt,imm,rt);
1185}
1186
1187static void emit_addnop(u_int r)
1188{
1189 assert(r<16);
1190 assem_debug("add %s,%s,#0 (nop)\n",regname[r],regname[r]);
1191 output_w32(0xe2800000|rd_rn_rm(r,r,0));
1192}
1193
1194static void emit_adcimm(u_int rs,int imm,u_int rt)
1195{
1196 u_int armval;
1197 genimm_checked(imm,&armval);
1198 assem_debug("adc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1199 output_w32(0xe2a00000|rd_rn_rm(rt,rs,0)|armval);
1200}
1201
1202static void emit_rscimm(int rs,int imm,u_int rt)
1203{
1204 assert(0);
1205 u_int armval;
1206 genimm_checked(imm,&armval);
1207 assem_debug("rsc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1208 output_w32(0xe2e00000|rd_rn_rm(rt,rs,0)|armval);
1209}
1210
1211static void emit_addimm64_32(int rsh,int rsl,int imm,int rth,int rtl)
1212{
1213 // TODO: if(genimm(imm,&armval)) ...
1214 // else
1215 emit_movimm(imm,HOST_TEMPREG);
1216 emit_adds(HOST_TEMPREG,rsl,rtl);
1217 emit_adcimm(rsh,0,rth);
1218}
1219
1220static void emit_andimm(int rs,int imm,int rt)
1221{
1222 u_int armval;
1223 if(imm==0) {
1224 emit_zeroreg(rt);
1225 }else if(genimm(imm,&armval)) {
1226 assem_debug("and %s,%s,#%d\n",regname[rt],regname[rs],imm);
1227 output_w32(0xe2000000|rd_rn_rm(rt,rs,0)|armval);
1228 }else if(genimm(~imm,&armval)) {
1229 assem_debug("bic %s,%s,#%d\n",regname[rt],regname[rs],imm);
1230 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|armval);
1231 }else if(imm==65535) {
1232 #ifndef HAVE_ARMV6
1233 assem_debug("bic %s,%s,#FF000000\n",regname[rt],regname[rs]);
1234 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|0x4FF);
1235 assem_debug("bic %s,%s,#00FF0000\n",regname[rt],regname[rt]);
1236 output_w32(0xe3c00000|rd_rn_rm(rt,rt,0)|0x8FF);
1237 #else
1238 assem_debug("uxth %s,%s\n",regname[rt],regname[rs]);
1239 output_w32(0xe6ff0070|rd_rn_rm(rt,0,rs));
1240 #endif
1241 }else{
1242 assert(imm>0&&imm<65535);
1243 #ifndef HAVE_ARMV7
1244 assem_debug("mov r14,#%d\n",imm&0xFF00);
1245 output_w32(0xe3a00000|rd_rn_imm_shift(HOST_TEMPREG,0,imm>>8,8));
1246 assem_debug("add r14,r14,#%d\n",imm&0xFF);
1247 output_w32(0xe2800000|rd_rn_imm_shift(HOST_TEMPREG,HOST_TEMPREG,imm&0xff,0));
1248 #else
1249 emit_movw(imm,HOST_TEMPREG);
1250 #endif
1251 assem_debug("and %s,%s,r14\n",regname[rt],regname[rs]);
1252 output_w32(0xe0000000|rd_rn_rm(rt,rs,HOST_TEMPREG));
1253 }
1254}
1255
1256static void emit_orimm(int rs,int imm,int rt)
1257{
1258 u_int armval;
1259 if(imm==0) {
1260 if(rs!=rt) emit_mov(rs,rt);
1261 }else if(genimm(imm,&armval)) {
1262 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1263 output_w32(0xe3800000|rd_rn_rm(rt,rs,0)|armval);
1264 }else{
1265 assert(imm>0&&imm<65536);
1266 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1267 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
1268 output_w32(0xe3800000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1269 output_w32(0xe3800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1270 }
1271}
1272
1273static void emit_xorimm(int rs,int imm,int rt)
1274{
1275 u_int armval;
1276 if(imm==0) {
1277 if(rs!=rt) emit_mov(rs,rt);
1278 }else if(genimm(imm,&armval)) {
1279 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm);
1280 output_w32(0xe2200000|rd_rn_rm(rt,rs,0)|armval);
1281 }else{
1282 assert(imm>0&&imm<65536);
1283 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1284 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
1285 output_w32(0xe2200000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1286 output_w32(0xe2200000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1287 }
1288}
1289
1290static void emit_shlimm(int rs,u_int imm,int rt)
1291{
1292 assert(imm>0);
1293 assert(imm<32);
1294 //if(imm==1) ...
1295 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1296 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1297}
1298
1299static void emit_lsls_imm(int rs,int imm,int rt)
1300{
1301 assert(imm>0);
1302 assert(imm<32);
1303 assem_debug("lsls %s,%s,#%d\n",regname[rt],regname[rs],imm);
1304 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1305}
1306
1307static unused void emit_lslpls_imm(int rs,int imm,int rt)
1308{
1309 assert(imm>0);
1310 assert(imm<32);
1311 assem_debug("lslpls %s,%s,#%d\n",regname[rt],regname[rs],imm);
1312 output_w32(0x51b00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1313}
1314
1315static void emit_shrimm(int rs,u_int imm,int rt)
1316{
1317 assert(imm>0);
1318 assert(imm<32);
1319 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1320 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
1321}
1322
1323static void emit_sarimm(int rs,u_int imm,int rt)
1324{
1325 assert(imm>0);
1326 assert(imm<32);
1327 assem_debug("asr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1328 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x40|(imm<<7));
1329}
1330
1331static void emit_rorimm(int rs,u_int imm,int rt)
1332{
1333 assert(imm>0);
1334 assert(imm<32);
1335 assem_debug("ror %s,%s,#%d\n",regname[rt],regname[rs],imm);
1336 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x60|(imm<<7));
1337}
1338
1339static void emit_shldimm(int rs,int rs2,u_int imm,int rt)
1340{
1341 assem_debug("shld %%%s,%%%s,%d\n",regname[rt],regname[rs2],imm);
1342 assert(imm>0);
1343 assert(imm<32);
1344 //if(imm==1) ...
1345 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1346 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1347 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs2],32-imm);
1348 output_w32(0xe1800020|rd_rn_rm(rt,rt,rs2)|((32-imm)<<7));
1349}
1350
1351static void emit_shrdimm(int rs,int rs2,u_int imm,int rt)
1352{
1353 assem_debug("shrd %%%s,%%%s,%d\n",regname[rt],regname[rs2],imm);
1354 assert(imm>0);
1355 assert(imm<32);
1356 //if(imm==1) ...
1357 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1358 output_w32(0xe1a00020|rd_rn_rm(rt,0,rs)|(imm<<7));
1359 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs2],32-imm);
1360 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs2)|((32-imm)<<7));
1361}
1362
1363static void emit_signextend16(int rs,int rt)
1364{
1365 #ifndef HAVE_ARMV6
1366 emit_shlimm(rs,16,rt);
1367 emit_sarimm(rt,16,rt);
1368 #else
1369 assem_debug("sxth %s,%s\n",regname[rt],regname[rs]);
1370 output_w32(0xe6bf0070|rd_rn_rm(rt,0,rs));
1371 #endif
1372}
1373
1374static void emit_signextend8(int rs,int rt)
1375{
1376 #ifndef HAVE_ARMV6
1377 emit_shlimm(rs,24,rt);
1378 emit_sarimm(rt,24,rt);
1379 #else
1380 assem_debug("sxtb %s,%s\n",regname[rt],regname[rs]);
1381 output_w32(0xe6af0070|rd_rn_rm(rt,0,rs));
1382 #endif
1383}
1384
1385static void emit_shl(u_int rs,u_int shift,u_int rt)
1386{
1387 assert(rs<16);
1388 assert(rt<16);
1389 assert(shift<16);
1390 //if(imm==1) ...
1391 assem_debug("lsl %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1392 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x10|(shift<<8));
1393}
1394
1395static void emit_shr(u_int rs,u_int shift,u_int rt)
1396{
1397 assert(rs<16);
1398 assert(rt<16);
1399 assert(shift<16);
1400 assem_debug("lsr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1401 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x30|(shift<<8));
1402}
1403
1404static void emit_sar(u_int rs,u_int shift,u_int rt)
1405{
1406 assert(rs<16);
1407 assert(rt<16);
1408 assert(shift<16);
1409 assem_debug("asr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1410 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x50|(shift<<8));
1411}
1412
1413static void emit_orrshl(u_int rs,u_int shift,u_int rt)
1414{
1415 assert(rs<16);
1416 assert(rt<16);
1417 assert(shift<16);
1418 assem_debug("orr %s,%s,%s,lsl %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
1419 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x10|(shift<<8));
1420}
1421
1422static void emit_orrshr(u_int rs,u_int shift,u_int rt)
1423{
1424 assert(rs<16);
1425 assert(rt<16);
1426 assert(shift<16);
1427 assem_debug("orr %s,%s,%s,lsr %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
1428 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x30|(shift<<8));
1429}
1430
1431static void emit_cmpimm(int rs,int imm)
1432{
1433 u_int armval;
1434 if(genimm(imm,&armval)) {
1435 assem_debug("cmp %s,#%d\n",regname[rs],imm);
1436 output_w32(0xe3500000|rd_rn_rm(0,rs,0)|armval);
1437 }else if(genimm(-imm,&armval)) {
1438 assem_debug("cmn %s,#%d\n",regname[rs],imm);
1439 output_w32(0xe3700000|rd_rn_rm(0,rs,0)|armval);
1440 }else if(imm>0) {
1441 assert(imm<65536);
1442 emit_movimm(imm,HOST_TEMPREG);
1443 assem_debug("cmp %s,r14\n",regname[rs]);
1444 output_w32(0xe1500000|rd_rn_rm(0,rs,HOST_TEMPREG));
1445 }else{
1446 assert(imm>-65536);
1447 emit_movimm(-imm,HOST_TEMPREG);
1448 assem_debug("cmn %s,r14\n",regname[rs]);
1449 output_w32(0xe1700000|rd_rn_rm(0,rs,HOST_TEMPREG));
1450 }
1451}
1452
1453static void emit_cmovne_imm(int imm,int rt)
1454{
1455 assem_debug("movne %s,#%d\n",regname[rt],imm);
1456 u_int armval;
1457 genimm_checked(imm,&armval);
1458 output_w32(0x13a00000|rd_rn_rm(rt,0,0)|armval);
1459}
1460
1461static void emit_cmovl_imm(int imm,int rt)
1462{
1463 assem_debug("movlt %s,#%d\n",regname[rt],imm);
1464 u_int armval;
1465 genimm_checked(imm,&armval);
1466 output_w32(0xb3a00000|rd_rn_rm(rt,0,0)|armval);
1467}
1468
1469static void emit_cmovb_imm(int imm,int rt)
1470{
1471 assem_debug("movcc %s,#%d\n",regname[rt],imm);
1472 u_int armval;
1473 genimm_checked(imm,&armval);
1474 output_w32(0x33a00000|rd_rn_rm(rt,0,0)|armval);
1475}
1476
1477static void emit_cmovs_imm(int imm,int rt)
1478{
1479 assem_debug("movmi %s,#%d\n",regname[rt],imm);
1480 u_int armval;
1481 genimm_checked(imm,&armval);
1482 output_w32(0x43a00000|rd_rn_rm(rt,0,0)|armval);
1483}
1484
1485static void emit_cmove_reg(int rs,int rt)
1486{
1487 assem_debug("moveq %s,%s\n",regname[rt],regname[rs]);
1488 output_w32(0x01a00000|rd_rn_rm(rt,0,rs));
1489}
1490
1491static void emit_cmovne_reg(int rs,int rt)
1492{
1493 assem_debug("movne %s,%s\n",regname[rt],regname[rs]);
1494 output_w32(0x11a00000|rd_rn_rm(rt,0,rs));
1495}
1496
1497static void emit_cmovl_reg(int rs,int rt)
1498{
1499 assem_debug("movlt %s,%s\n",regname[rt],regname[rs]);
1500 output_w32(0xb1a00000|rd_rn_rm(rt,0,rs));
1501}
1502
1503static void emit_cmovs_reg(int rs,int rt)
1504{
1505 assem_debug("movmi %s,%s\n",regname[rt],regname[rs]);
1506 output_w32(0x41a00000|rd_rn_rm(rt,0,rs));
1507}
1508
1509static void emit_slti32(int rs,int imm,int rt)
1510{
1511 if(rs!=rt) emit_zeroreg(rt);
1512 emit_cmpimm(rs,imm);
1513 if(rs==rt) emit_movimm(0,rt);
1514 emit_cmovl_imm(1,rt);
1515}
1516
1517static void emit_sltiu32(int rs,int imm,int rt)
1518{
1519 if(rs!=rt) emit_zeroreg(rt);
1520 emit_cmpimm(rs,imm);
1521 if(rs==rt) emit_movimm(0,rt);
1522 emit_cmovb_imm(1,rt);
1523}
1524
1525static void emit_slti64_32(int rsh,int rsl,int imm,int rt)
1526{
1527 assert(rsh!=rt);
1528 emit_slti32(rsl,imm,rt);
1529 if(imm>=0)
1530 {
1531 emit_test(rsh,rsh);
1532 emit_cmovne_imm(0,rt);
1533 emit_cmovs_imm(1,rt);
1534 }
1535 else
1536 {
1537 emit_cmpimm(rsh,-1);
1538 emit_cmovne_imm(0,rt);
1539 emit_cmovl_imm(1,rt);
1540 }
1541}
1542
1543static void emit_sltiu64_32(int rsh,int rsl,int imm,int rt)
1544{
1545 assert(rsh!=rt);
1546 emit_sltiu32(rsl,imm,rt);
1547 if(imm>=0)
1548 {
1549 emit_test(rsh,rsh);
1550 emit_cmovne_imm(0,rt);
1551 }
1552 else
1553 {
1554 emit_cmpimm(rsh,-1);
1555 emit_cmovne_imm(1,rt);
1556 }
1557}
1558
1559static void emit_cmp(int rs,int rt)
1560{
1561 assem_debug("cmp %s,%s\n",regname[rs],regname[rt]);
1562 output_w32(0xe1500000|rd_rn_rm(0,rs,rt));
1563}
1564
1565static void emit_set_gz32(int rs, int rt)
1566{
1567 //assem_debug("set_gz32\n");
1568 emit_cmpimm(rs,1);
1569 emit_movimm(1,rt);
1570 emit_cmovl_imm(0,rt);
1571}
1572
1573static void emit_set_nz32(int rs, int rt)
1574{
1575 //assem_debug("set_nz32\n");
1576 if(rs!=rt) emit_movs(rs,rt);
1577 else emit_test(rs,rs);
1578 emit_cmovne_imm(1,rt);
1579}
1580
1581static void emit_set_gz64_32(int rsh, int rsl, int rt)
1582{
1583 //assem_debug("set_gz64\n");
1584 emit_set_gz32(rsl,rt);
1585 emit_test(rsh,rsh);
1586 emit_cmovne_imm(1,rt);
1587 emit_cmovs_imm(0,rt);
1588}
1589
1590static void emit_set_nz64_32(int rsh, int rsl, int rt)
1591{
1592 //assem_debug("set_nz64\n");
1593 emit_or_and_set_flags(rsh,rsl,rt);
1594 emit_cmovne_imm(1,rt);
1595}
1596
1597static void emit_set_if_less32(int rs1, int rs2, int rt)
1598{
1599 //assem_debug("set if less (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1600 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1601 emit_cmp(rs1,rs2);
1602 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1603 emit_cmovl_imm(1,rt);
1604}
1605
1606static void emit_set_if_carry32(int rs1, int rs2, int rt)
1607{
1608 //assem_debug("set if carry (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1609 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1610 emit_cmp(rs1,rs2);
1611 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1612 emit_cmovb_imm(1,rt);
1613}
1614
1615static void emit_set_if_less64_32(int u1, int l1, int u2, int l2, int rt)
1616{
1617 //assem_debug("set if less64 (%%%s,%%%s,%%%s,%%%s),%%%s\n",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]);
1618 assert(u1!=rt);
1619 assert(u2!=rt);
1620 emit_cmp(l1,l2);
1621 emit_movimm(0,rt);
1622 emit_sbcs(u1,u2,HOST_TEMPREG);
1623 emit_cmovl_imm(1,rt);
1624}
1625
1626static void emit_set_if_carry64_32(int u1, int l1, int u2, int l2, int rt)
1627{
1628 //assem_debug("set if carry64 (%%%s,%%%s,%%%s,%%%s),%%%s\n",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]);
1629 assert(u1!=rt);
1630 assert(u2!=rt);
1631 emit_cmp(l1,l2);
1632 emit_movimm(0,rt);
1633 emit_sbcs(u1,u2,HOST_TEMPREG);
1634 emit_cmovb_imm(1,rt);
1635}
1636
1637static void emit_call(int a)
1638{
1639 assem_debug("bl %x (%x+%x)\n",a,(int)out,a-(int)out-8);
1640 u_int offset=genjmp(a);
1641 output_w32(0xeb000000|offset);
1642}
1643
1644static void emit_jmp(int a)
1645{
1646 assem_debug("b %x (%x+%x)\n",a,(int)out,a-(int)out-8);
1647 u_int offset=genjmp(a);
1648 output_w32(0xea000000|offset);
1649}
1650
1651static void emit_jne(int a)
1652{
1653 assem_debug("bne %x\n",a);
1654 u_int offset=genjmp(a);
1655 output_w32(0x1a000000|offset);
1656}
1657
1658static void emit_jeq(int a)
1659{
1660 assem_debug("beq %x\n",a);
1661 u_int offset=genjmp(a);
1662 output_w32(0x0a000000|offset);
1663}
1664
1665static void emit_js(int a)
1666{
1667 assem_debug("bmi %x\n",a);
1668 u_int offset=genjmp(a);
1669 output_w32(0x4a000000|offset);
1670}
1671
1672static void emit_jns(int a)
1673{
1674 assem_debug("bpl %x\n",a);
1675 u_int offset=genjmp(a);
1676 output_w32(0x5a000000|offset);
1677}
1678
1679static void emit_jl(int a)
1680{
1681 assem_debug("blt %x\n",a);
1682 u_int offset=genjmp(a);
1683 output_w32(0xba000000|offset);
1684}
1685
1686static void emit_jge(int a)
1687{
1688 assem_debug("bge %x\n",a);
1689 u_int offset=genjmp(a);
1690 output_w32(0xaa000000|offset);
1691}
1692
1693static void emit_jno(int a)
1694{
1695 assem_debug("bvc %x\n",a);
1696 u_int offset=genjmp(a);
1697 output_w32(0x7a000000|offset);
1698}
1699
1700static void emit_jc(int a)
1701{
1702 assem_debug("bcs %x\n",a);
1703 u_int offset=genjmp(a);
1704 output_w32(0x2a000000|offset);
1705}
1706
1707static void emit_jcc(int a)
1708{
1709 assem_debug("bcc %x\n",a);
1710 u_int offset=genjmp(a);
1711 output_w32(0x3a000000|offset);
1712}
1713
1714static void emit_callreg(u_int r)
1715{
1716 assert(r<15);
1717 assem_debug("blx %s\n",regname[r]);
1718 output_w32(0xe12fff30|r);
1719}
1720
1721static void emit_jmpreg(u_int r)
1722{
1723 assem_debug("mov pc,%s\n",regname[r]);
1724 output_w32(0xe1a00000|rd_rn_rm(15,0,r));
1725}
1726
1727static void emit_readword_indexed(int offset, int rs, int rt)
1728{
1729 assert(offset>-4096&&offset<4096);
1730 assem_debug("ldr %s,%s+%d\n",regname[rt],regname[rs],offset);
1731 if(offset>=0) {
1732 output_w32(0xe5900000|rd_rn_rm(rt,rs,0)|offset);
1733 }else{
1734 output_w32(0xe5100000|rd_rn_rm(rt,rs,0)|(-offset));
1735 }
1736}
1737
1738static void emit_readword_dualindexedx4(int rs1, int rs2, int rt)
1739{
1740 assem_debug("ldr %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1741 output_w32(0xe7900000|rd_rn_rm(rt,rs1,rs2)|0x100);
1742}
1743
1744static void emit_ldrcc_dualindexed(int rs1, int rs2, int rt)
1745{
1746 assem_debug("ldrcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1747 output_w32(0x37900000|rd_rn_rm(rt,rs1,rs2));
1748}
1749
1750static void emit_ldrccb_dualindexed(int rs1, int rs2, int rt)
1751{
1752 assem_debug("ldrccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1753 output_w32(0x37d00000|rd_rn_rm(rt,rs1,rs2));
1754}
1755
1756static void emit_ldrccsb_dualindexed(int rs1, int rs2, int rt)
1757{
1758 assem_debug("ldrccsb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1759 output_w32(0x319000d0|rd_rn_rm(rt,rs1,rs2));
1760}
1761
1762static void emit_ldrcch_dualindexed(int rs1, int rs2, int rt)
1763{
1764 assem_debug("ldrcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1765 output_w32(0x319000b0|rd_rn_rm(rt,rs1,rs2));
1766}
1767
1768static void emit_ldrccsh_dualindexed(int rs1, int rs2, int rt)
1769{
1770 assem_debug("ldrccsh %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1771 output_w32(0x319000f0|rd_rn_rm(rt,rs1,rs2));
1772}
1773
1774static void emit_readword_indexed_tlb(int addr, int rs, int map, int rt)
1775{
1776 if(map<0) emit_readword_indexed(addr, rs, rt);
1777 else {
1778 assert(addr==0);
1779 emit_readword_dualindexedx4(rs, map, rt);
1780 }
1781}
1782
1783static void emit_readdword_indexed_tlb(int addr, int rs, int map, int rh, int rl)
1784{
1785 if(map<0) {
1786 if(rh>=0) emit_readword_indexed(addr, rs, rh);
1787 emit_readword_indexed(addr+4, rs, rl);
1788 }else{
1789 assert(rh!=rs);
1790 if(rh>=0) emit_readword_indexed_tlb(addr, rs, map, rh);
1791 emit_addimm(map,1,map);
1792 emit_readword_indexed_tlb(addr, rs, map, rl);
1793 }
1794}
1795
1796static void emit_movsbl_indexed(int offset, int rs, int rt)
1797{
1798 assert(offset>-256&&offset<256);
1799 assem_debug("ldrsb %s,%s+%d\n",regname[rt],regname[rs],offset);
1800 if(offset>=0) {
1801 output_w32(0xe1d000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1802 }else{
1803 output_w32(0xe15000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1804 }
1805}
1806
1807static void emit_movsbl_indexed_tlb(int addr, int rs, int map, int rt)
1808{
1809 if(map<0) emit_movsbl_indexed(addr, rs, rt);
1810 else {
1811 if(addr==0) {
1812 emit_shlimm(map,2,map);
1813 assem_debug("ldrsb %s,%s+%s\n",regname[rt],regname[rs],regname[map]);
1814 output_w32(0xe19000d0|rd_rn_rm(rt,rs,map));
1815 }else{
1816 assert(addr>-256&&addr<256);
1817 assem_debug("add %s,%s,%s,lsl #2\n",regname[rt],regname[rs],regname[map]);
1818 output_w32(0xe0800000|rd_rn_rm(rt,rs,map)|(2<<7));
1819 emit_movsbl_indexed(addr, rt, rt);
1820 }
1821 }
1822}
1823
1824static void emit_movswl_indexed(int offset, int rs, int rt)
1825{
1826 assert(offset>-256&&offset<256);
1827 assem_debug("ldrsh %s,%s+%d\n",regname[rt],regname[rs],offset);
1828 if(offset>=0) {
1829 output_w32(0xe1d000f0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1830 }else{
1831 output_w32(0xe15000f0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1832 }
1833}
1834
1835static void emit_movzbl_indexed(int offset, int rs, int rt)
1836{
1837 assert(offset>-4096&&offset<4096);
1838 assem_debug("ldrb %s,%s+%d\n",regname[rt],regname[rs],offset);
1839 if(offset>=0) {
1840 output_w32(0xe5d00000|rd_rn_rm(rt,rs,0)|offset);
1841 }else{
1842 output_w32(0xe5500000|rd_rn_rm(rt,rs,0)|(-offset));
1843 }
1844}
1845
1846static void emit_movzbl_dualindexedx4(int rs1, int rs2, int rt)
1847{
1848 assem_debug("ldrb %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1849 output_w32(0xe7d00000|rd_rn_rm(rt,rs1,rs2)|0x100);
1850}
1851
1852static void emit_movzbl_indexed_tlb(int addr, int rs, int map, int rt)
1853{
1854 if(map<0) emit_movzbl_indexed(addr, rs, rt);
1855 else {
1856 if(addr==0) {
1857 emit_movzbl_dualindexedx4(rs, map, rt);
1858 }else{
1859 emit_addimm(rs,addr,rt);
1860 emit_movzbl_dualindexedx4(rt, map, rt);
1861 }
1862 }
1863}
1864
1865static void emit_movzwl_indexed(int offset, int rs, int rt)
1866{
1867 assert(offset>-256&&offset<256);
1868 assem_debug("ldrh %s,%s+%d\n",regname[rt],regname[rs],offset);
1869 if(offset>=0) {
1870 output_w32(0xe1d000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1871 }else{
1872 output_w32(0xe15000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1873 }
1874}
1875
1876static void emit_ldrd(int offset, int rs, int rt)
1877{
1878 assert(offset>-256&&offset<256);
1879 assem_debug("ldrd %s,%s+%d\n",regname[rt],regname[rs],offset);
1880 if(offset>=0) {
1881 output_w32(0xe1c000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1882 }else{
1883 output_w32(0xe14000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1884 }
1885}
1886
1887static void emit_readword(int addr, int rt)
1888{
1889 u_int offset = addr-(u_int)&dynarec_local;
1890 assert(offset<4096);
1891 assem_debug("ldr %s,fp+%d\n",regname[rt],offset);
1892 output_w32(0xe5900000|rd_rn_rm(rt,FP,0)|offset);
1893}
1894
1895static unused void emit_movsbl(int addr, int rt)
1896{
1897 u_int offset = addr-(u_int)&dynarec_local;
1898 assert(offset<256);
1899 assem_debug("ldrsb %s,fp+%d\n",regname[rt],offset);
1900 output_w32(0xe1d000d0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1901}
1902
1903static unused void emit_movswl(int addr, int rt)
1904{
1905 u_int offset = addr-(u_int)&dynarec_local;
1906 assert(offset<256);
1907 assem_debug("ldrsh %s,fp+%d\n",regname[rt],offset);
1908 output_w32(0xe1d000f0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1909}
1910
1911static unused void emit_movzbl(int addr, int rt)
1912{
1913 u_int offset = addr-(u_int)&dynarec_local;
1914 assert(offset<4096);
1915 assem_debug("ldrb %s,fp+%d\n",regname[rt],offset);
1916 output_w32(0xe5d00000|rd_rn_rm(rt,FP,0)|offset);
1917}
1918
1919static unused void emit_movzwl(int addr, int rt)
1920{
1921 u_int offset = addr-(u_int)&dynarec_local;
1922 assert(offset<256);
1923 assem_debug("ldrh %s,fp+%d\n",regname[rt],offset);
1924 output_w32(0xe1d000b0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1925}
1926
1927static void emit_writeword_indexed(int rt, int offset, int rs)
1928{
1929 assert(offset>-4096&&offset<4096);
1930 assem_debug("str %s,%s+%d\n",regname[rt],regname[rs],offset);
1931 if(offset>=0) {
1932 output_w32(0xe5800000|rd_rn_rm(rt,rs,0)|offset);
1933 }else{
1934 output_w32(0xe5000000|rd_rn_rm(rt,rs,0)|(-offset));
1935 }
1936}
1937
1938static void emit_writeword_dualindexedx4(int rt, int rs1, int rs2)
1939{
1940 assem_debug("str %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1941 output_w32(0xe7800000|rd_rn_rm(rt,rs1,rs2)|0x100);
1942}
1943
1944static void emit_writeword_indexed_tlb(int rt, int addr, int rs, int map, int temp)
1945{
1946 if(map<0) emit_writeword_indexed(rt, addr, rs);
1947 else {
1948 assert(addr==0);
1949 emit_writeword_dualindexedx4(rt, rs, map);
1950 }
1951}
1952
1953static void emit_writedword_indexed_tlb(int rh, int rl, int addr, int rs, int map, int temp)
1954{
1955 if(map<0) {
1956 if(rh>=0) emit_writeword_indexed(rh, addr, rs);
1957 emit_writeword_indexed(rl, addr+4, rs);
1958 }else{
1959 assert(rh>=0);
1960 if(temp!=rs) emit_addimm(map,1,temp);
1961 emit_writeword_indexed_tlb(rh, addr, rs, map, temp);
1962 if(temp!=rs) emit_writeword_indexed_tlb(rl, addr, rs, temp, temp);
1963 else {
1964 emit_addimm(rs,4,rs);
1965 emit_writeword_indexed_tlb(rl, addr, rs, map, temp);
1966 }
1967 }
1968}
1969
1970static void emit_writehword_indexed(int rt, int offset, int rs)
1971{
1972 assert(offset>-256&&offset<256);
1973 assem_debug("strh %s,%s+%d\n",regname[rt],regname[rs],offset);
1974 if(offset>=0) {
1975 output_w32(0xe1c000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1976 }else{
1977 output_w32(0xe14000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1978 }
1979}
1980
1981static void emit_writebyte_indexed(int rt, int offset, int rs)
1982{
1983 assert(offset>-4096&&offset<4096);
1984 assem_debug("strb %s,%s+%d\n",regname[rt],regname[rs],offset);
1985 if(offset>=0) {
1986 output_w32(0xe5c00000|rd_rn_rm(rt,rs,0)|offset);
1987 }else{
1988 output_w32(0xe5400000|rd_rn_rm(rt,rs,0)|(-offset));
1989 }
1990}
1991
1992static void emit_writebyte_dualindexedx4(int rt, int rs1, int rs2)
1993{
1994 assem_debug("strb %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1995 output_w32(0xe7c00000|rd_rn_rm(rt,rs1,rs2)|0x100);
1996}
1997
1998static void emit_writebyte_indexed_tlb(int rt, int addr, int rs, int map, int temp)
1999{
2000 if(map<0) emit_writebyte_indexed(rt, addr, rs);
2001 else {
2002 if(addr==0) {
2003 emit_writebyte_dualindexedx4(rt, rs, map);
2004 }else{
2005 emit_addimm(rs,addr,temp);
2006 emit_writebyte_dualindexedx4(rt, temp, map);
2007 }
2008 }
2009}
2010
2011static void emit_strcc_dualindexed(int rs1, int rs2, int rt)
2012{
2013 assem_debug("strcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2014 output_w32(0x37800000|rd_rn_rm(rt,rs1,rs2));
2015}
2016
2017static void emit_strccb_dualindexed(int rs1, int rs2, int rt)
2018{
2019 assem_debug("strccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2020 output_w32(0x37c00000|rd_rn_rm(rt,rs1,rs2));
2021}
2022
2023static void emit_strcch_dualindexed(int rs1, int rs2, int rt)
2024{
2025 assem_debug("strcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2026 output_w32(0x318000b0|rd_rn_rm(rt,rs1,rs2));
2027}
2028
2029static void emit_writeword(int rt, int addr)
2030{
2031 u_int offset = addr-(u_int)&dynarec_local;
2032 assert(offset<4096);
2033 assem_debug("str %s,fp+%d\n",regname[rt],offset);
2034 output_w32(0xe5800000|rd_rn_rm(rt,FP,0)|offset);
2035}
2036
2037static unused void emit_writehword(int rt, int addr)
2038{
2039 u_int offset = addr-(u_int)&dynarec_local;
2040 assert(offset<256);
2041 assem_debug("strh %s,fp+%d\n",regname[rt],offset);
2042 output_w32(0xe1c000b0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
2043}
2044
2045static unused void emit_writebyte(int rt, int addr)
2046{
2047 u_int offset = addr-(u_int)&dynarec_local;
2048 assert(offset<4096);
2049 assem_debug("strb %s,fp+%d\n",regname[rt],offset);
2050 output_w32(0xe5c00000|rd_rn_rm(rt,FP,0)|offset);
2051}
2052
2053static void emit_umull(u_int rs1, u_int rs2, u_int hi, u_int lo)
2054{
2055 assem_debug("umull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
2056 assert(rs1<16);
2057 assert(rs2<16);
2058 assert(hi<16);
2059 assert(lo<16);
2060 output_w32(0xe0800090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
2061}
2062
2063static void emit_smull(u_int rs1, u_int rs2, u_int hi, u_int lo)
2064{
2065 assem_debug("smull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
2066 assert(rs1<16);
2067 assert(rs2<16);
2068 assert(hi<16);
2069 assert(lo<16);
2070 output_w32(0xe0c00090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
2071}
2072
2073static void emit_clz(int rs,int rt)
2074{
2075 assem_debug("clz %s,%s\n",regname[rt],regname[rs]);
2076 output_w32(0xe16f0f10|rd_rn_rm(rt,0,rs));
2077}
2078
2079static void emit_subcs(int rs1,int rs2,int rt)
2080{
2081 assem_debug("subcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2082 output_w32(0x20400000|rd_rn_rm(rt,rs1,rs2));
2083}
2084
2085static void emit_shrcc_imm(int rs,u_int imm,int rt)
2086{
2087 assert(imm>0);
2088 assert(imm<32);
2089 assem_debug("lsrcc %s,%s,#%d\n",regname[rt],regname[rs],imm);
2090 output_w32(0x31a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
2091}
2092
2093static void emit_shrne_imm(int rs,u_int imm,int rt)
2094{
2095 assert(imm>0);
2096 assert(imm<32);
2097 assem_debug("lsrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2098 output_w32(0x11a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
2099}
2100
2101static void emit_negmi(int rs, int rt)
2102{
2103 assem_debug("rsbmi %s,%s,#0\n",regname[rt],regname[rs]);
2104 output_w32(0x42600000|rd_rn_rm(rt,rs,0));
2105}
2106
2107static void emit_negsmi(int rs, int rt)
2108{
2109 assem_debug("rsbsmi %s,%s,#0\n",regname[rt],regname[rs]);
2110 output_w32(0x42700000|rd_rn_rm(rt,rs,0));
2111}
2112
2113static void emit_orreq(u_int rs1,u_int rs2,u_int rt)
2114{
2115 assem_debug("orreq %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2116 output_w32(0x01800000|rd_rn_rm(rt,rs1,rs2));
2117}
2118
2119static void emit_orrne(u_int rs1,u_int rs2,u_int rt)
2120{
2121 assem_debug("orrne %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2122 output_w32(0x11800000|rd_rn_rm(rt,rs1,rs2));
2123}
2124
2125static void emit_bic_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2126{
2127 assem_debug("bic %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2128 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2129}
2130
2131static void emit_biceq_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2132{
2133 assem_debug("biceq %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2134 output_w32(0x01C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2135}
2136
2137static void emit_bicne_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2138{
2139 assem_debug("bicne %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2140 output_w32(0x11C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2141}
2142
2143static void emit_bic_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2144{
2145 assem_debug("bic %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2146 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2147}
2148
2149static void emit_biceq_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2150{
2151 assem_debug("biceq %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2152 output_w32(0x01C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2153}
2154
2155static void emit_bicne_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2156{
2157 assem_debug("bicne %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2158 output_w32(0x11C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2159}
2160
2161static void emit_teq(int rs, int rt)
2162{
2163 assem_debug("teq %s,%s\n",regname[rs],regname[rt]);
2164 output_w32(0xe1300000|rd_rn_rm(0,rs,rt));
2165}
2166
2167static void emit_rsbimm(int rs, int imm, int rt)
2168{
2169 u_int armval;
2170 genimm_checked(imm,&armval);
2171 assem_debug("rsb %s,%s,#%d\n",regname[rt],regname[rs],imm);
2172 output_w32(0xe2600000|rd_rn_rm(rt,rs,0)|armval);
2173}
2174
2175// Load 2 immediates optimizing for small code size
2176static void emit_mov2imm_compact(int imm1,u_int rt1,int imm2,u_int rt2)
2177{
2178 emit_movimm(imm1,rt1);
2179 u_int armval;
2180 if(genimm(imm2-imm1,&armval)) {
2181 assem_debug("add %s,%s,#%d\n",regname[rt2],regname[rt1],imm2-imm1);
2182 output_w32(0xe2800000|rd_rn_rm(rt2,rt1,0)|armval);
2183 }else if(genimm(imm1-imm2,&armval)) {
2184 assem_debug("sub %s,%s,#%d\n",regname[rt2],regname[rt1],imm1-imm2);
2185 output_w32(0xe2400000|rd_rn_rm(rt2,rt1,0)|armval);
2186 }
2187 else emit_movimm(imm2,rt2);
2188}
2189
2190// Conditionally select one of two immediates, optimizing for small code size
2191// This will only be called if HAVE_CMOV_IMM is defined
2192static void emit_cmov2imm_e_ne_compact(int imm1,int imm2,u_int rt)
2193{
2194 u_int armval;
2195 if(genimm(imm2-imm1,&armval)) {
2196 emit_movimm(imm1,rt);
2197 assem_debug("addne %s,%s,#%d\n",regname[rt],regname[rt],imm2-imm1);
2198 output_w32(0x12800000|rd_rn_rm(rt,rt,0)|armval);
2199 }else if(genimm(imm1-imm2,&armval)) {
2200 emit_movimm(imm1,rt);
2201 assem_debug("subne %s,%s,#%d\n",regname[rt],regname[rt],imm1-imm2);
2202 output_w32(0x12400000|rd_rn_rm(rt,rt,0)|armval);
2203 }
2204 else {
2205 #ifndef HAVE_ARMV7
2206 emit_movimm(imm1,rt);
2207 add_literal((int)out,imm2);
2208 assem_debug("ldrne %s,pc+? [=%x]\n",regname[rt],imm2);
2209 output_w32(0x15900000|rd_rn_rm(rt,15,0));
2210 #else
2211 emit_movw(imm1&0x0000FFFF,rt);
2212 if((imm1&0xFFFF)!=(imm2&0xFFFF)) {
2213 assem_debug("movwne %s,#%d (0x%x)\n",regname[rt],imm2&0xFFFF,imm2&0xFFFF);
2214 output_w32(0x13000000|rd_rn_rm(rt,0,0)|(imm2&0xfff)|((imm2<<4)&0xf0000));
2215 }
2216 emit_movt(imm1&0xFFFF0000,rt);
2217 if((imm1&0xFFFF0000)!=(imm2&0xFFFF0000)) {
2218 assem_debug("movtne %s,#%d (0x%x)\n",regname[rt],imm2&0xffff0000,imm2&0xffff0000);
2219 output_w32(0x13400000|rd_rn_rm(rt,0,0)|((imm2>>16)&0xfff)|((imm2>>12)&0xf0000));
2220 }
2221 #endif
2222 }
2223}
2224
2225// special case for checking invalid_code
2226static void emit_cmpmem_indexedsr12_reg(int base,int r,int imm)
2227{
2228 assert(imm<128&&imm>=0);
2229 assert(r>=0&&r<16);
2230 assem_debug("ldrb lr,%s,%s lsr #12\n",regname[base],regname[r]);
2231 output_w32(0xe7d00000|rd_rn_rm(HOST_TEMPREG,base,r)|0x620);
2232 emit_cmpimm(HOST_TEMPREG,imm);
2233}
2234
2235static void emit_callne(int a)
2236{
2237 assem_debug("blne %x\n",a);
2238 u_int offset=genjmp(a);
2239 output_w32(0x1b000000|offset);
2240}
2241
2242// Used to preload hash table entries
2243static unused void emit_prefetchreg(int r)
2244{
2245 assem_debug("pld %s\n",regname[r]);
2246 output_w32(0xf5d0f000|rd_rn_rm(0,r,0));
2247}
2248
2249// Special case for mini_ht
2250static void emit_ldreq_indexed(int rs, u_int offset, int rt)
2251{
2252 assert(offset<4096);
2253 assem_debug("ldreq %s,[%s, #%d]\n",regname[rt],regname[rs],offset);
2254 output_w32(0x05900000|rd_rn_rm(rt,rs,0)|offset);
2255}
2256
2257static unused void emit_bicne_imm(int rs,int imm,int rt)
2258{
2259 u_int armval;
2260 genimm_checked(imm,&armval);
2261 assem_debug("bicne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2262 output_w32(0x13c00000|rd_rn_rm(rt,rs,0)|armval);
2263}
2264
2265static unused void emit_biccs_imm(int rs,int imm,int rt)
2266{
2267 u_int armval;
2268 genimm_checked(imm,&armval);
2269 assem_debug("biccs %s,%s,#%d\n",regname[rt],regname[rs],imm);
2270 output_w32(0x23c00000|rd_rn_rm(rt,rs,0)|armval);
2271}
2272
2273static unused void emit_bicvc_imm(int rs,int imm,int rt)
2274{
2275 u_int armval;
2276 genimm_checked(imm,&armval);
2277 assem_debug("bicvc %s,%s,#%d\n",regname[rt],regname[rs],imm);
2278 output_w32(0x73c00000|rd_rn_rm(rt,rs,0)|armval);
2279}
2280
2281static unused void emit_bichi_imm(int rs,int imm,int rt)
2282{
2283 u_int armval;
2284 genimm_checked(imm,&armval);
2285 assem_debug("bichi %s,%s,#%d\n",regname[rt],regname[rs],imm);
2286 output_w32(0x83c00000|rd_rn_rm(rt,rs,0)|armval);
2287}
2288
2289static unused void emit_orrvs_imm(int rs,int imm,int rt)
2290{
2291 u_int armval;
2292 genimm_checked(imm,&armval);
2293 assem_debug("orrvs %s,%s,#%d\n",regname[rt],regname[rs],imm);
2294 output_w32(0x63800000|rd_rn_rm(rt,rs,0)|armval);
2295}
2296
2297static void emit_orrne_imm(int rs,int imm,int rt)
2298{
2299 u_int armval;
2300 genimm_checked(imm,&armval);
2301 assem_debug("orrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2302 output_w32(0x13800000|rd_rn_rm(rt,rs,0)|armval);
2303}
2304
2305static void emit_andne_imm(int rs,int imm,int rt)
2306{
2307 u_int armval;
2308 genimm_checked(imm,&armval);
2309 assem_debug("andne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2310 output_w32(0x12000000|rd_rn_rm(rt,rs,0)|armval);
2311}
2312
2313static unused void emit_addpl_imm(int rs,int imm,int rt)
2314{
2315 u_int armval;
2316 genimm_checked(imm,&armval);
2317 assem_debug("addpl %s,%s,#%d\n",regname[rt],regname[rs],imm);
2318 output_w32(0x52800000|rd_rn_rm(rt,rs,0)|armval);
2319}
2320
2321static void emit_jno_unlikely(int a)
2322{
2323 //emit_jno(a);
2324 assem_debug("addvc pc,pc,#? (%x)\n",/*a-(int)out-8,*/a);
2325 output_w32(0x72800000|rd_rn_rm(15,15,0));
2326}
2327
2328static void save_regs_all(u_int reglist)
2329{
2330 int i;
2331 if(!reglist) return;
2332 assem_debug("stmia fp,{");
2333 for(i=0;i<16;i++)
2334 if(reglist&(1<<i))
2335 assem_debug("r%d,",i);
2336 assem_debug("}\n");
2337 output_w32(0xe88b0000|reglist);
2338}
2339
2340static void restore_regs_all(u_int reglist)
2341{
2342 int i;
2343 if(!reglist) return;
2344 assem_debug("ldmia fp,{");
2345 for(i=0;i<16;i++)
2346 if(reglist&(1<<i))
2347 assem_debug("r%d,",i);
2348 assem_debug("}\n");
2349 output_w32(0xe89b0000|reglist);
2350}
2351
2352// Save registers before function call
2353static void save_regs(u_int reglist)
2354{
2355 reglist&=CALLER_SAVE_REGS; // only save the caller-save registers, r0-r3, r12
2356 save_regs_all(reglist);
2357}
2358
2359// Restore registers after function call
2360static void restore_regs(u_int reglist)
2361{
2362 reglist&=CALLER_SAVE_REGS;
2363 restore_regs_all(reglist);
2364}
2365
2366/* Stubs/epilogue */
2367
2368static void literal_pool(int n)
2369{
2370 if(!literalcount) return;
2371 if(n) {
2372 if((int)out-literals[0][0]<4096-n) return;
2373 }
2374 u_int *ptr;
2375 int i;
2376 for(i=0;i<literalcount;i++)
2377 {
2378 u_int l_addr=(u_int)out;
2379 int j;
2380 for(j=0;j<i;j++) {
2381 if(literals[j][1]==literals[i][1]) {
2382 //printf("dup %08x\n",literals[i][1]);
2383 l_addr=literals[j][0];
2384 break;
2385 }
2386 }
2387 ptr=(u_int *)literals[i][0];
2388 u_int offset=l_addr-(u_int)ptr-8;
2389 assert(offset<4096);
2390 assert(!(offset&3));
2391 *ptr|=offset;
2392 if(l_addr==(u_int)out) {
2393 literals[i][0]=l_addr; // remember for dupes
2394 output_w32(literals[i][1]);
2395 }
2396 }
2397 literalcount=0;
2398}
2399
2400static void literal_pool_jumpover(int n)
2401{
2402 if(!literalcount) return;
2403 if(n) {
2404 if((int)out-literals[0][0]<4096-n) return;
2405 }
2406 int jaddr=(int)out;
2407 emit_jmp(0);
2408 literal_pool(0);
2409 set_jump_target(jaddr,(int)out);
2410}
2411
2412static void emit_extjump2(u_int addr, int target, int linker)
2413{
2414 u_char *ptr=(u_char *)addr;
2415 assert((ptr[3]&0x0e)==0xa);
2416 (void)ptr;
2417
2418 emit_loadlp(target,0);
2419 emit_loadlp(addr,1);
2420 assert(addr>=BASE_ADDR&&addr<(BASE_ADDR+(1<<TARGET_SIZE_2)));
2421 //assert((target>=0x80000000&&target<0x80800000)||(target>0xA4000000&&target<0xA4001000));
2422//DEBUG >
2423#ifdef DEBUG_CYCLE_COUNT
2424 emit_readword((int)&last_count,ECX);
2425 emit_add(HOST_CCREG,ECX,HOST_CCREG);
2426 emit_readword((int)&next_interupt,ECX);
2427 emit_writeword(HOST_CCREG,(int)&Count);
2428 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
2429 emit_writeword(ECX,(int)&last_count);
2430#endif
2431//DEBUG <
2432 emit_jmp(linker);
2433}
2434
2435static void emit_extjump(int addr, int target)
2436{
2437 emit_extjump2(addr, target, (int)dyna_linker);
2438}
2439
2440static void emit_extjump_ds(int addr, int target)
2441{
2442 emit_extjump2(addr, target, (int)dyna_linker_ds);
2443}
2444
2445// put rt_val into rt, potentially making use of rs with value rs_val
2446static void emit_movimm_from(u_int rs_val,int rs,u_int rt_val,int rt)
2447{
2448 u_int armval;
2449 int diff;
2450 if(genimm(rt_val,&armval)) {
2451 assem_debug("mov %s,#%d\n",regname[rt],rt_val);
2452 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
2453 return;
2454 }
2455 if(genimm(~rt_val,&armval)) {
2456 assem_debug("mvn %s,#%d\n",regname[rt],rt_val);
2457 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
2458 return;
2459 }
2460 diff=rt_val-rs_val;
2461 if(genimm(diff,&armval)) {
2462 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],diff);
2463 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
2464 return;
2465 }else if(genimm(-diff,&armval)) {
2466 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],-diff);
2467 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
2468 return;
2469 }
2470 emit_movimm(rt_val,rt);
2471}
2472
2473// return 1 if above function can do it's job cheaply
2474static int is_similar_value(u_int v1,u_int v2)
2475{
2476 u_int xs;
2477 int diff;
2478 if(v1==v2) return 1;
2479 diff=v2-v1;
2480 for(xs=diff;xs!=0&&(xs&3)==0;xs>>=2)
2481 ;
2482 if(xs<0x100) return 1;
2483 for(xs=-diff;xs!=0&&(xs&3)==0;xs>>=2)
2484 ;
2485 if(xs<0x100) return 1;
2486 return 0;
2487}
2488
2489// trashes r2
2490static void pass_args(int a0, int a1)
2491{
2492 if(a0==1&&a1==0) {
2493 // must swap
2494 emit_mov(a0,2); emit_mov(a1,1); emit_mov(2,0);
2495 }
2496 else if(a0!=0&&a1==0) {
2497 emit_mov(a1,1);
2498 if (a0>=0) emit_mov(a0,0);
2499 }
2500 else {
2501 if(a0>=0&&a0!=0) emit_mov(a0,0);
2502 if(a1>=0&&a1!=1) emit_mov(a1,1);
2503 }
2504}
2505
2506static void mov_loadtype_adj(int type,int rs,int rt)
2507{
2508 switch(type) {
2509 case LOADB_STUB: emit_signextend8(rs,rt); break;
2510 case LOADBU_STUB: emit_andimm(rs,0xff,rt); break;
2511 case LOADH_STUB: emit_signextend16(rs,rt); break;
2512 case LOADHU_STUB: emit_andimm(rs,0xffff,rt); break;
2513 case LOADW_STUB: if(rs!=rt) emit_mov(rs,rt); break;
2514 default: assert(0);
2515 }
2516}
2517
2518#include "pcsxmem.h"
2519#include "pcsxmem_inline.c"
2520
2521static void do_readstub(int n)
2522{
2523 assem_debug("do_readstub %x\n",start+stubs[n][3]*4);
2524 literal_pool(256);
2525 set_jump_target(stubs[n][1],(int)out);
2526 int type=stubs[n][0];
2527 int i=stubs[n][3];
2528 int rs=stubs[n][4];
2529 struct regstat *i_regs=(struct regstat *)stubs[n][5];
2530 u_int reglist=stubs[n][7];
2531 signed char *i_regmap=i_regs->regmap;
2532 int rt;
2533 if(itype[i]==C1LS||itype[i]==C2LS||itype[i]==LOADLR) {
2534 rt=get_reg(i_regmap,FTEMP);
2535 }else{
2536 rt=get_reg(i_regmap,rt1[i]);
2537 }
2538 assert(rs>=0);
2539 int r,temp=-1,temp2=HOST_TEMPREG,regs_saved=0,restore_jump=0;
2540 reglist|=(1<<rs);
2541 for(r=0;r<=12;r++) {
2542 if(((1<<r)&0x13ff)&&((1<<r)&reglist)==0) {
2543 temp=r; break;
2544 }
2545 }
2546 if(rt>=0&&rt1[i]!=0)
2547 reglist&=~(1<<rt);
2548 if(temp==-1) {
2549 save_regs(reglist);
2550 regs_saved=1;
2551 temp=(rs==0)?2:0;
2552 }
2553 if((regs_saved||(reglist&2)==0)&&temp!=1&&rs!=1)
2554 temp2=1;
2555 emit_readword((int)&mem_rtab,temp);
2556 emit_shrimm(rs,12,temp2);
2557 emit_readword_dualindexedx4(temp,temp2,temp2);
2558 emit_lsls_imm(temp2,1,temp2);
2559 if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) {
2560 switch(type) {
2561 case LOADB_STUB: emit_ldrccsb_dualindexed(temp2,rs,rt); break;
2562 case LOADBU_STUB: emit_ldrccb_dualindexed(temp2,rs,rt); break;
2563 case LOADH_STUB: emit_ldrccsh_dualindexed(temp2,rs,rt); break;
2564 case LOADHU_STUB: emit_ldrcch_dualindexed(temp2,rs,rt); break;
2565 case LOADW_STUB: emit_ldrcc_dualindexed(temp2,rs,rt); break;
2566 }
2567 }
2568 if(regs_saved) {
2569 restore_jump=(int)out;
2570 emit_jcc(0); // jump to reg restore
2571 }
2572 else
2573 emit_jcc(stubs[n][2]); // return address
2574
2575 if(!regs_saved)
2576 save_regs(reglist);
2577 int handler=0;
2578 if(type==LOADB_STUB||type==LOADBU_STUB)
2579 handler=(int)jump_handler_read8;
2580 if(type==LOADH_STUB||type==LOADHU_STUB)
2581 handler=(int)jump_handler_read16;
2582 if(type==LOADW_STUB)
2583 handler=(int)jump_handler_read32;
2584 assert(handler!=0);
2585 pass_args(rs,temp2);
2586 int cc=get_reg(i_regmap,CCREG);
2587 if(cc<0)
2588 emit_loadreg(CCREG,2);
2589 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n][6]+1),2);
2590 emit_call(handler);
2591 if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) {
2592 mov_loadtype_adj(type,0,rt);
2593 }
2594 if(restore_jump)
2595 set_jump_target(restore_jump,(int)out);
2596 restore_regs(reglist);
2597 emit_jmp(stubs[n][2]); // return address
2598}
2599
2600// return memhandler, or get directly accessable address and return 0
2601static u_int get_direct_memhandler(void *table,u_int addr,int type,u_int *addr_host)
2602{
2603 u_int l1,l2=0;
2604 l1=((u_int *)table)[addr>>12];
2605 if((l1&(1<<31))==0) {
2606 u_int v=l1<<1;
2607 *addr_host=v+addr;
2608 return 0;
2609 }
2610 else {
2611 l1<<=1;
2612 if(type==LOADB_STUB||type==LOADBU_STUB||type==STOREB_STUB)
2613 l2=((u_int *)l1)[0x1000/4 + 0x1000/2 + (addr&0xfff)];
2614 else if(type==LOADH_STUB||type==LOADHU_STUB||type==STOREH_STUB)
2615 l2=((u_int *)l1)[0x1000/4 + (addr&0xfff)/2];
2616 else
2617 l2=((u_int *)l1)[(addr&0xfff)/4];
2618 if((l2&(1<<31))==0) {
2619 u_int v=l2<<1;
2620 *addr_host=v+(addr&0xfff);
2621 return 0;
2622 }
2623 return l2<<1;
2624 }
2625}
2626
2627static void inline_readstub(int type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
2628{
2629 int rs=get_reg(regmap,target);
2630 int rt=get_reg(regmap,target);
2631 if(rs<0) rs=get_reg(regmap,-1);
2632 assert(rs>=0);
2633 u_int handler,host_addr=0,is_dynamic,far_call=0;
2634 int cc=get_reg(regmap,CCREG);
2635 if(pcsx_direct_read(type,addr,CLOCK_ADJUST(adj+1),cc,target?rs:-1,rt))
2636 return;
2637 handler=get_direct_memhandler(mem_rtab,addr,type,&host_addr);
2638 if (handler==0) {
2639 if(rt<0||rt1[i]==0)
2640 return;
2641 if(addr!=host_addr)
2642 emit_movimm_from(addr,rs,host_addr,rs);
2643 switch(type) {
2644 case LOADB_STUB: emit_movsbl_indexed(0,rs,rt); break;
2645 case LOADBU_STUB: emit_movzbl_indexed(0,rs,rt); break;
2646 case LOADH_STUB: emit_movswl_indexed(0,rs,rt); break;
2647 case LOADHU_STUB: emit_movzwl_indexed(0,rs,rt); break;
2648 case LOADW_STUB: emit_readword_indexed(0,rs,rt); break;
2649 default: assert(0);
2650 }
2651 return;
2652 }
2653 is_dynamic=pcsxmem_is_handler_dynamic(addr);
2654 if(is_dynamic) {
2655 if(type==LOADB_STUB||type==LOADBU_STUB)
2656 handler=(int)jump_handler_read8;
2657 if(type==LOADH_STUB||type==LOADHU_STUB)
2658 handler=(int)jump_handler_read16;
2659 if(type==LOADW_STUB)
2660 handler=(int)jump_handler_read32;
2661 }
2662
2663 // call a memhandler
2664 if(rt>=0&&rt1[i]!=0)
2665 reglist&=~(1<<rt);
2666 save_regs(reglist);
2667 if(target==0)
2668 emit_movimm(addr,0);
2669 else if(rs!=0)
2670 emit_mov(rs,0);
2671 int offset=(int)handler-(int)out-8;
2672 if(offset<-33554432||offset>=33554432) {
2673 // unreachable memhandler, a plugin func perhaps
2674 emit_movimm(handler,12);
2675 far_call=1;
2676 }
2677 if(cc<0)
2678 emit_loadreg(CCREG,2);
2679 if(is_dynamic) {
2680 emit_movimm(((u_int *)mem_rtab)[addr>>12]<<1,1);
2681 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2);
2682 }
2683 else {
2684 emit_readword((int)&last_count,3);
2685 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2);
2686 emit_add(2,3,2);
2687 emit_writeword(2,(int)&Count);
2688 }
2689
2690 if(far_call)
2691 emit_callreg(12);
2692 else
2693 emit_call(handler);
2694
2695 if(rt>=0&&rt1[i]!=0) {
2696 switch(type) {
2697 case LOADB_STUB: emit_signextend8(0,rt); break;
2698 case LOADBU_STUB: emit_andimm(0,0xff,rt); break;
2699 case LOADH_STUB: emit_signextend16(0,rt); break;
2700 case LOADHU_STUB: emit_andimm(0,0xffff,rt); break;
2701 case LOADW_STUB: if(rt!=0) emit_mov(0,rt); break;
2702 default: assert(0);
2703 }
2704 }
2705 restore_regs(reglist);
2706}
2707
2708static void do_writestub(int n)
2709{
2710 assem_debug("do_writestub %x\n",start+stubs[n][3]*4);
2711 literal_pool(256);
2712 set_jump_target(stubs[n][1],(int)out);
2713 int type=stubs[n][0];
2714 int i=stubs[n][3];
2715 int rs=stubs[n][4];
2716 struct regstat *i_regs=(struct regstat *)stubs[n][5];
2717 u_int reglist=stubs[n][7];
2718 signed char *i_regmap=i_regs->regmap;
2719 int rt,r;
2720 if(itype[i]==C1LS||itype[i]==C2LS) {
2721 rt=get_reg(i_regmap,r=FTEMP);
2722 }else{
2723 rt=get_reg(i_regmap,r=rs2[i]);
2724 }
2725 assert(rs>=0);
2726 assert(rt>=0);
2727 int rtmp,temp=-1,temp2=HOST_TEMPREG,regs_saved=0,restore_jump=0,ra;
2728 int reglist2=reglist|(1<<rs)|(1<<rt);
2729 for(rtmp=0;rtmp<=12;rtmp++) {
2730 if(((1<<rtmp)&0x13ff)&&((1<<rtmp)&reglist2)==0) {
2731 temp=rtmp; break;
2732 }
2733 }
2734 if(temp==-1) {
2735 save_regs(reglist);
2736 regs_saved=1;
2737 for(rtmp=0;rtmp<=3;rtmp++)
2738 if(rtmp!=rs&&rtmp!=rt)
2739 {temp=rtmp;break;}
2740 }
2741 if((regs_saved||(reglist2&8)==0)&&temp!=3&&rs!=3&&rt!=3)
2742 temp2=3;
2743 emit_readword((int)&mem_wtab,temp);
2744 emit_shrimm(rs,12,temp2);
2745 emit_readword_dualindexedx4(temp,temp2,temp2);
2746 emit_lsls_imm(temp2,1,temp2);
2747 switch(type) {
2748 case STOREB_STUB: emit_strccb_dualindexed(temp2,rs,rt); break;
2749 case STOREH_STUB: emit_strcch_dualindexed(temp2,rs,rt); break;
2750 case STOREW_STUB: emit_strcc_dualindexed(temp2,rs,rt); break;
2751 default: assert(0);
2752 }
2753 if(regs_saved) {
2754 restore_jump=(int)out;
2755 emit_jcc(0); // jump to reg restore
2756 }
2757 else
2758 emit_jcc(stubs[n][2]); // return address (invcode check)
2759
2760 if(!regs_saved)
2761 save_regs(reglist);
2762 int handler=0;
2763 switch(type) {
2764 case STOREB_STUB: handler=(int)jump_handler_write8; break;
2765 case STOREH_STUB: handler=(int)jump_handler_write16; break;
2766 case STOREW_STUB: handler=(int)jump_handler_write32; break;
2767 }
2768 assert(handler!=0);
2769 pass_args(rs,rt);
2770 if(temp2!=3)
2771 emit_mov(temp2,3);
2772 int cc=get_reg(i_regmap,CCREG);
2773 if(cc<0)
2774 emit_loadreg(CCREG,2);
2775 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n][6]+1),2);
2776 // returns new cycle_count
2777 emit_call(handler);
2778 emit_addimm(0,-CLOCK_ADJUST((int)stubs[n][6]+1),cc<0?2:cc);
2779 if(cc<0)
2780 emit_storereg(CCREG,2);
2781 if(restore_jump)
2782 set_jump_target(restore_jump,(int)out);
2783 restore_regs(reglist);
2784 ra=stubs[n][2];
2785 emit_jmp(ra);
2786}
2787
2788static void inline_writestub(int type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
2789{
2790 int rs=get_reg(regmap,-1);
2791 int rt=get_reg(regmap,target);
2792 assert(rs>=0);
2793 assert(rt>=0);
2794 u_int handler,host_addr=0;
2795 handler=get_direct_memhandler(mem_wtab,addr,type,&host_addr);
2796 if (handler==0) {
2797 if(addr!=host_addr)
2798 emit_movimm_from(addr,rs,host_addr,rs);
2799 switch(type) {
2800 case STOREB_STUB: emit_writebyte_indexed(rt,0,rs); break;
2801 case STOREH_STUB: emit_writehword_indexed(rt,0,rs); break;
2802 case STOREW_STUB: emit_writeword_indexed(rt,0,rs); break;
2803 default: assert(0);
2804 }
2805 return;
2806 }
2807
2808 // call a memhandler
2809 save_regs(reglist);
2810 pass_args(rs,rt);
2811 int cc=get_reg(regmap,CCREG);
2812 if(cc<0)
2813 emit_loadreg(CCREG,2);
2814 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2);
2815 emit_movimm(handler,3);
2816 // returns new cycle_count
2817 emit_call((int)jump_handler_write_h);
2818 emit_addimm(0,-CLOCK_ADJUST(adj+1),cc<0?2:cc);
2819 if(cc<0)
2820 emit_storereg(CCREG,2);
2821 restore_regs(reglist);
2822}
2823
2824static void do_unalignedwritestub(int n)
2825{
2826 assem_debug("do_unalignedwritestub %x\n",start+stubs[n][3]*4);
2827 literal_pool(256);
2828 set_jump_target(stubs[n][1],(int)out);
2829
2830 int i=stubs[n][3];
2831 struct regstat *i_regs=(struct regstat *)stubs[n][4];
2832 int addr=stubs[n][5];
2833 u_int reglist=stubs[n][7];
2834 signed char *i_regmap=i_regs->regmap;
2835 int temp2=get_reg(i_regmap,FTEMP);
2836 int rt;
2837 rt=get_reg(i_regmap,rs2[i]);
2838 assert(rt>=0);
2839 assert(addr>=0);
2840 assert(opcode[i]==0x2a||opcode[i]==0x2e); // SWL/SWR only implemented
2841 reglist|=(1<<addr);
2842 reglist&=~(1<<temp2);
2843
2844#if 1
2845 // don't bother with it and call write handler
2846 save_regs(reglist);
2847 pass_args(addr,rt);
2848 int cc=get_reg(i_regmap,CCREG);
2849 if(cc<0)
2850 emit_loadreg(CCREG,2);
2851 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n][6]+1),2);
2852 emit_call((int)(opcode[i]==0x2a?jump_handle_swl:jump_handle_swr));
2853 emit_addimm(0,-CLOCK_ADJUST((int)stubs[n][6]+1),cc<0?2:cc);
2854 if(cc<0)
2855 emit_storereg(CCREG,2);
2856 restore_regs(reglist);
2857 emit_jmp(stubs[n][2]); // return address
2858#else
2859 emit_andimm(addr,0xfffffffc,temp2);
2860 emit_writeword(temp2,(int)&address);
2861
2862 save_regs(reglist);
2863 emit_shrimm(addr,16,1);
2864 int cc=get_reg(i_regmap,CCREG);
2865 if(cc<0) {
2866 emit_loadreg(CCREG,2);
2867 }
2868 emit_movimm((u_int)readmem,0);
2869 emit_addimm(cc<0?2:cc,2*stubs[n][6]+2,2);
2870 emit_call((int)&indirect_jump_indexed);
2871 restore_regs(reglist);
2872
2873 emit_readword((int)&readmem_dword,temp2);
2874 int temp=addr; //hmh
2875 emit_shlimm(addr,3,temp);
2876 emit_andimm(temp,24,temp);
2877#ifdef BIG_ENDIAN_MIPS
2878 if (opcode[i]==0x2e) // SWR
2879#else
2880 if (opcode[i]==0x2a) // SWL
2881#endif
2882 emit_xorimm(temp,24,temp);
2883 emit_movimm(-1,HOST_TEMPREG);
2884 if (opcode[i]==0x2a) { // SWL
2885 emit_bic_lsr(temp2,HOST_TEMPREG,temp,temp2);
2886 emit_orrshr(rt,temp,temp2);
2887 }else{
2888 emit_bic_lsl(temp2,HOST_TEMPREG,temp,temp2);
2889 emit_orrshl(rt,temp,temp2);
2890 }
2891 emit_readword((int)&address,addr);
2892 emit_writeword(temp2,(int)&word);
2893 //save_regs(reglist); // don't need to, no state changes
2894 emit_shrimm(addr,16,1);
2895 emit_movimm((u_int)writemem,0);
2896 //emit_call((int)&indirect_jump_indexed);
2897 emit_mov(15,14);
2898 emit_readword_dualindexedx4(0,1,15);
2899 emit_readword((int)&Count,HOST_TEMPREG);
2900 emit_readword((int)&next_interupt,2);
2901 emit_addimm(HOST_TEMPREG,-2*stubs[n][6]-2,HOST_TEMPREG);
2902 emit_writeword(2,(int)&last_count);
2903 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2904 if(cc<0) {
2905 emit_storereg(CCREG,HOST_TEMPREG);
2906 }
2907 restore_regs(reglist);
2908 emit_jmp(stubs[n][2]); // return address
2909#endif
2910}
2911
2912static void do_invstub(int n)
2913{
2914 literal_pool(20);
2915 u_int reglist=stubs[n][3];
2916 set_jump_target(stubs[n][1],(int)out);
2917 save_regs(reglist);
2918 if(stubs[n][4]!=0) emit_mov(stubs[n][4],0);
2919 emit_call((int)&invalidate_addr);
2920 restore_regs(reglist);
2921 emit_jmp(stubs[n][2]); // return address
2922}
2923
2924int do_dirty_stub(int i)
2925{
2926 assem_debug("do_dirty_stub %x\n",start+i*4);
2927 u_int addr=(u_int)source;
2928 // Careful about the code output here, verify_dirty needs to parse it.
2929 #ifndef HAVE_ARMV7
2930 emit_loadlp(addr,1);
2931 emit_loadlp((int)copy,2);
2932 emit_loadlp(slen*4,3);
2933 #else
2934 emit_movw(addr&0x0000FFFF,1);
2935 emit_movw(((u_int)copy)&0x0000FFFF,2);
2936 emit_movt(addr&0xFFFF0000,1);
2937 emit_movt(((u_int)copy)&0xFFFF0000,2);
2938 emit_movw(slen*4,3);
2939 #endif
2940 emit_movimm(start+i*4,0);
2941 emit_call((int)start<(int)0xC0000000?(int)&verify_code:(int)&verify_code_vm);
2942 int entry=(int)out;
2943 load_regs_entry(i);
2944 if(entry==(int)out) entry=instr_addr[i];
2945 emit_jmp(instr_addr[i]);
2946 return entry;
2947}
2948
2949static void do_dirty_stub_ds()
2950{
2951 // Careful about the code output here, verify_dirty needs to parse it.
2952 #ifndef HAVE_ARMV7
2953 emit_loadlp((int)start<(int)0xC0000000?(int)source:(int)start,1);
2954 emit_loadlp((int)copy,2);
2955 emit_loadlp(slen*4,3);
2956 #else
2957 emit_movw(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0x0000FFFF,1);
2958 emit_movw(((u_int)copy)&0x0000FFFF,2);
2959 emit_movt(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0xFFFF0000,1);
2960 emit_movt(((u_int)copy)&0xFFFF0000,2);
2961 emit_movw(slen*4,3);
2962 #endif
2963 emit_movimm(start+1,0);
2964 emit_call((int)&verify_code_ds);
2965}
2966
2967static void do_cop1stub(int n)
2968{
2969 literal_pool(256);
2970 assem_debug("do_cop1stub %x\n",start+stubs[n][3]*4);
2971 set_jump_target(stubs[n][1],(int)out);
2972 int i=stubs[n][3];
2973// int rs=stubs[n][4];
2974 struct regstat *i_regs=(struct regstat *)stubs[n][5];
2975 int ds=stubs[n][6];
2976 if(!ds) {
2977 load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty,i);
2978 //if(i_regs!=&regs[i]) printf("oops: regs[i]=%x i_regs=%x",(int)&regs[i],(int)i_regs);
2979 }
2980 //else {printf("fp exception in delay slot\n");}
2981 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty);
2982 if(regs[i].regmap_entry[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
2983 emit_movimm(start+(i-ds)*4,EAX); // Get PC
2984 emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]),HOST_CCREG); // CHECK: is this right? There should probably be an extra cycle...
2985 emit_jmp(ds?(int)fp_exception_ds:(int)fp_exception);
2986}
2987
2988/* Special assem */
2989
2990static void shift_assemble_arm(int i,struct regstat *i_regs)
2991{
2992 if(rt1[i]) {
2993 if(opcode2[i]<=0x07) // SLLV/SRLV/SRAV
2994 {
2995 signed char s,t,shift;
2996 t=get_reg(i_regs->regmap,rt1[i]);
2997 s=get_reg(i_regs->regmap,rs1[i]);
2998 shift=get_reg(i_regs->regmap,rs2[i]);
2999 if(t>=0){
3000 if(rs1[i]==0)
3001 {
3002 emit_zeroreg(t);
3003 }
3004 else if(rs2[i]==0)
3005 {
3006 assert(s>=0);
3007 if(s!=t) emit_mov(s,t);
3008 }
3009 else
3010 {
3011 emit_andimm(shift,31,HOST_TEMPREG);
3012 if(opcode2[i]==4) // SLLV
3013 {
3014 emit_shl(s,HOST_TEMPREG,t);
3015 }
3016 if(opcode2[i]==6) // SRLV
3017 {
3018 emit_shr(s,HOST_TEMPREG,t);
3019 }
3020 if(opcode2[i]==7) // SRAV
3021 {
3022 emit_sar(s,HOST_TEMPREG,t);
3023 }
3024 }
3025 }
3026 } else { // DSLLV/DSRLV/DSRAV
3027 signed char sh,sl,th,tl,shift;
3028 th=get_reg(i_regs->regmap,rt1[i]|64);
3029 tl=get_reg(i_regs->regmap,rt1[i]);
3030 sh=get_reg(i_regs->regmap,rs1[i]|64);
3031 sl=get_reg(i_regs->regmap,rs1[i]);
3032 shift=get_reg(i_regs->regmap,rs2[i]);
3033 if(tl>=0){
3034 if(rs1[i]==0)
3035 {
3036 emit_zeroreg(tl);
3037 if(th>=0) emit_zeroreg(th);
3038 }
3039 else if(rs2[i]==0)
3040 {
3041 assert(sl>=0);
3042 if(sl!=tl) emit_mov(sl,tl);
3043 if(th>=0&&sh!=th) emit_mov(sh,th);
3044 }
3045 else
3046 {
3047 // FIXME: What if shift==tl ?
3048 assert(shift!=tl);
3049 int temp=get_reg(i_regs->regmap,-1);
3050 int real_th=th;
3051 if(th<0&&opcode2[i]!=0x14) {th=temp;} // DSLLV doesn't need a temporary register
3052 assert(sl>=0);
3053 assert(sh>=0);
3054 emit_andimm(shift,31,HOST_TEMPREG);
3055 if(opcode2[i]==0x14) // DSLLV
3056 {
3057 if(th>=0) emit_shl(sh,HOST_TEMPREG,th);
3058 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3059 emit_orrshr(sl,HOST_TEMPREG,th);
3060 emit_andimm(shift,31,HOST_TEMPREG);
3061 emit_testimm(shift,32);
3062 emit_shl(sl,HOST_TEMPREG,tl);
3063 if(th>=0) emit_cmovne_reg(tl,th);
3064 emit_cmovne_imm(0,tl);
3065 }
3066 if(opcode2[i]==0x16) // DSRLV
3067 {
3068 assert(th>=0);
3069 emit_shr(sl,HOST_TEMPREG,tl);
3070 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3071 emit_orrshl(sh,HOST_TEMPREG,tl);
3072 emit_andimm(shift,31,HOST_TEMPREG);
3073 emit_testimm(shift,32);
3074 emit_shr(sh,HOST_TEMPREG,th);
3075 emit_cmovne_reg(th,tl);
3076 if(real_th>=0) emit_cmovne_imm(0,th);
3077 }
3078 if(opcode2[i]==0x17) // DSRAV
3079 {
3080 assert(th>=0);
3081 emit_shr(sl,HOST_TEMPREG,tl);
3082 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3083 if(real_th>=0) {
3084 assert(temp>=0);
3085 emit_sarimm(th,31,temp);
3086 }
3087 emit_orrshl(sh,HOST_TEMPREG,tl);
3088 emit_andimm(shift,31,HOST_TEMPREG);
3089 emit_testimm(shift,32);
3090 emit_sar(sh,HOST_TEMPREG,th);
3091 emit_cmovne_reg(th,tl);
3092 if(real_th>=0) emit_cmovne_reg(temp,th);
3093 }
3094 }
3095 }
3096 }
3097 }
3098}
3099
3100static void speculate_mov(int rs,int rt)
3101{
3102 if(rt!=0) {
3103 smrv_strong_next|=1<<rt;
3104 smrv[rt]=smrv[rs];
3105 }
3106}
3107
3108static void speculate_mov_weak(int rs,int rt)
3109{
3110 if(rt!=0) {
3111 smrv_weak_next|=1<<rt;
3112 smrv[rt]=smrv[rs];
3113 }
3114}
3115
3116static void speculate_register_values(int i)
3117{
3118 if(i==0) {
3119 memcpy(smrv,psxRegs.GPR.r,sizeof(smrv));
3120 // gp,sp are likely to stay the same throughout the block
3121 smrv_strong_next=(1<<28)|(1<<29)|(1<<30);
3122 smrv_weak_next=~smrv_strong_next;
3123 //printf(" llr %08x\n", smrv[4]);
3124 }
3125 smrv_strong=smrv_strong_next;
3126 smrv_weak=smrv_weak_next;
3127 switch(itype[i]) {
3128 case ALU:
3129 if ((smrv_strong>>rs1[i])&1) speculate_mov(rs1[i],rt1[i]);
3130 else if((smrv_strong>>rs2[i])&1) speculate_mov(rs2[i],rt1[i]);
3131 else if((smrv_weak>>rs1[i])&1) speculate_mov_weak(rs1[i],rt1[i]);
3132 else if((smrv_weak>>rs2[i])&1) speculate_mov_weak(rs2[i],rt1[i]);
3133 else {
3134 smrv_strong_next&=~(1<<rt1[i]);
3135 smrv_weak_next&=~(1<<rt1[i]);
3136 }
3137 break;
3138 case SHIFTIMM:
3139 smrv_strong_next&=~(1<<rt1[i]);
3140 smrv_weak_next&=~(1<<rt1[i]);
3141 // fallthrough
3142 case IMM16:
3143 if(rt1[i]&&is_const(&regs[i],rt1[i])) {
3144 int value,hr=get_reg(regs[i].regmap,rt1[i]);
3145 if(hr>=0) {
3146 if(get_final_value(hr,i,&value))
3147 smrv[rt1[i]]=value;
3148 else smrv[rt1[i]]=constmap[i][hr];
3149 smrv_strong_next|=1<<rt1[i];
3150 }
3151 }
3152 else {
3153 if ((smrv_strong>>rs1[i])&1) speculate_mov(rs1[i],rt1[i]);
3154 else if((smrv_weak>>rs1[i])&1) speculate_mov_weak(rs1[i],rt1[i]);
3155 }
3156 break;
3157 case LOAD:
3158 if(start<0x2000&&(rt1[i]==26||(smrv[rt1[i]]>>24)==0xa0)) {
3159 // special case for BIOS
3160 smrv[rt1[i]]=0xa0000000;
3161 smrv_strong_next|=1<<rt1[i];
3162 break;
3163 }
3164 // fallthrough
3165 case SHIFT:
3166 case LOADLR:
3167 case MOV:
3168 smrv_strong_next&=~(1<<rt1[i]);
3169 smrv_weak_next&=~(1<<rt1[i]);
3170 break;
3171 case COP0:
3172 case COP2:
3173 if(opcode2[i]==0||opcode2[i]==2) { // MFC/CFC
3174 smrv_strong_next&=~(1<<rt1[i]);
3175 smrv_weak_next&=~(1<<rt1[i]);
3176 }
3177 break;
3178 case C2LS:
3179 if (opcode[i]==0x32) { // LWC2
3180 smrv_strong_next&=~(1<<rt1[i]);
3181 smrv_weak_next&=~(1<<rt1[i]);
3182 }
3183 break;
3184 }
3185#if 0
3186 int r=4;
3187 printf("x %08x %08x %d %d c %08x %08x\n",smrv[r],start+i*4,
3188 ((smrv_strong>>r)&1),(smrv_weak>>r)&1,regs[i].isconst,regs[i].wasconst);
3189#endif
3190}
3191
3192enum {
3193 MTYPE_8000 = 0,
3194 MTYPE_8020,
3195 MTYPE_0000,
3196 MTYPE_A000,
3197 MTYPE_1F80,
3198};
3199
3200static int get_ptr_mem_type(u_int a)
3201{
3202 if(a < 0x00200000) {
3203 if(a<0x1000&&((start>>20)==0xbfc||(start>>24)==0xa0))
3204 // return wrong, must use memhandler for BIOS self-test to pass
3205 // 007 does similar stuff from a00 mirror, weird stuff
3206 return MTYPE_8000;
3207 return MTYPE_0000;
3208 }
3209 if(0x1f800000 <= a && a < 0x1f801000)
3210 return MTYPE_1F80;
3211 if(0x80200000 <= a && a < 0x80800000)
3212 return MTYPE_8020;
3213 if(0xa0000000 <= a && a < 0xa0200000)
3214 return MTYPE_A000;
3215 return MTYPE_8000;
3216}
3217
3218static int emit_fastpath_cmp_jump(int i,int addr,int *addr_reg_override)
3219{
3220 int jaddr=0,type=0;
3221 int mr=rs1[i];
3222 if(((smrv_strong|smrv_weak)>>mr)&1) {
3223 type=get_ptr_mem_type(smrv[mr]);
3224 //printf("set %08x @%08x r%d %d\n", smrv[mr], start+i*4, mr, type);
3225 }
3226 else {
3227 // use the mirror we are running on
3228 type=get_ptr_mem_type(start);
3229 //printf("set nospec @%08x r%d %d\n", start+i*4, mr, type);
3230 }
3231
3232 if(type==MTYPE_8020) { // RAM 80200000+ mirror
3233 emit_andimm(addr,~0x00e00000,HOST_TEMPREG);
3234 addr=*addr_reg_override=HOST_TEMPREG;
3235 type=0;
3236 }
3237 else if(type==MTYPE_0000) { // RAM 0 mirror
3238 emit_orimm(addr,0x80000000,HOST_TEMPREG);
3239 addr=*addr_reg_override=HOST_TEMPREG;
3240 type=0;
3241 }
3242 else if(type==MTYPE_A000) { // RAM A mirror
3243 emit_andimm(addr,~0x20000000,HOST_TEMPREG);
3244 addr=*addr_reg_override=HOST_TEMPREG;
3245 type=0;
3246 }
3247 else if(type==MTYPE_1F80) { // scratchpad
3248 if (psxH == (void *)0x1f800000) {
3249 emit_addimm(addr,-0x1f800000,HOST_TEMPREG);
3250 emit_cmpimm(HOST_TEMPREG,0x1000);
3251 jaddr=(int)out;
3252 emit_jc(0);
3253 }
3254 else {
3255 // do usual RAM check, jump will go to the right handler
3256 type=0;
3257 }
3258 }
3259
3260 if(type==0)
3261 {
3262 emit_cmpimm(addr,RAM_SIZE);
3263 jaddr=(int)out;
3264 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
3265 // Hint to branch predictor that the branch is unlikely to be taken
3266 if(rs1[i]>=28)
3267 emit_jno_unlikely(0);
3268 else
3269 #endif
3270 emit_jno(0);
3271 if(ram_offset!=0) {
3272 emit_addimm(addr,ram_offset,HOST_TEMPREG);
3273 addr=*addr_reg_override=HOST_TEMPREG;
3274 }
3275 }
3276
3277 return jaddr;
3278}
3279
3280#define shift_assemble shift_assemble_arm
3281
3282static void loadlr_assemble_arm(int i,struct regstat *i_regs)
3283{
3284 int s,th,tl,temp,temp2,addr,map=-1;
3285 int offset;
3286 int jaddr=0;
3287 int memtarget=0,c=0;
3288 int fastload_reg_override=0;
3289 u_int hr,reglist=0;
3290 th=get_reg(i_regs->regmap,rt1[i]|64);
3291 tl=get_reg(i_regs->regmap,rt1[i]);
3292 s=get_reg(i_regs->regmap,rs1[i]);
3293 temp=get_reg(i_regs->regmap,-1);
3294 temp2=get_reg(i_regs->regmap,FTEMP);
3295 addr=get_reg(i_regs->regmap,AGEN1+(i&1));
3296 assert(addr<0);
3297 offset=imm[i];
3298 for(hr=0;hr<HOST_REGS;hr++) {
3299 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
3300 }
3301 reglist|=1<<temp;
3302 if(offset||s<0||c) addr=temp2;
3303 else addr=s;
3304 if(s>=0) {
3305 c=(i_regs->wasconst>>s)&1;
3306 if(c) {
3307 memtarget=((signed int)(constmap[i][s]+offset))<(signed int)0x80000000+RAM_SIZE;
3308 }
3309 }
3310 if(!c) {
3311 #ifdef RAM_OFFSET
3312 map=get_reg(i_regs->regmap,ROREG);
3313 if(map<0) emit_loadreg(ROREG,map=HOST_TEMPREG);
3314 #endif
3315 emit_shlimm(addr,3,temp);
3316 if (opcode[i]==0x22||opcode[i]==0x26) {
3317 emit_andimm(addr,0xFFFFFFFC,temp2); // LWL/LWR
3318 }else{
3319 emit_andimm(addr,0xFFFFFFF8,temp2); // LDL/LDR
3320 }
3321 jaddr=emit_fastpath_cmp_jump(i,temp2,&fastload_reg_override);
3322 }
3323 else {
3324 if(ram_offset&&memtarget) {
3325 emit_addimm(temp2,ram_offset,HOST_TEMPREG);
3326 fastload_reg_override=HOST_TEMPREG;
3327 }
3328 if (opcode[i]==0x22||opcode[i]==0x26) {
3329 emit_movimm(((constmap[i][s]+offset)<<3)&24,temp); // LWL/LWR
3330 }else{
3331 emit_movimm(((constmap[i][s]+offset)<<3)&56,temp); // LDL/LDR
3332 }
3333 }
3334 if (opcode[i]==0x22||opcode[i]==0x26) { // LWL/LWR
3335 if(!c||memtarget) {
3336 int a=temp2;
3337 if(fastload_reg_override) a=fastload_reg_override;
3338 //emit_readword_indexed((int)rdram-0x80000000,temp2,temp2);
3339 emit_readword_indexed_tlb(0,a,map,temp2);
3340 if(jaddr) add_stub(LOADW_STUB,jaddr,(int)out,i,temp2,(int)i_regs,ccadj[i],reglist);
3341 }
3342 else
3343 inline_readstub(LOADW_STUB,i,(constmap[i][s]+offset)&0xFFFFFFFC,i_regs->regmap,FTEMP,ccadj[i],reglist);
3344 if(rt1[i]) {
3345 assert(tl>=0);
3346 emit_andimm(temp,24,temp);
3347#ifdef BIG_ENDIAN_MIPS
3348 if (opcode[i]==0x26) // LWR
3349#else
3350 if (opcode[i]==0x22) // LWL
3351#endif
3352 emit_xorimm(temp,24,temp);
3353 emit_movimm(-1,HOST_TEMPREG);
3354 if (opcode[i]==0x26) {
3355 emit_shr(temp2,temp,temp2);
3356 emit_bic_lsr(tl,HOST_TEMPREG,temp,tl);
3357 }else{
3358 emit_shl(temp2,temp,temp2);
3359 emit_bic_lsl(tl,HOST_TEMPREG,temp,tl);
3360 }
3361 emit_or(temp2,tl,tl);
3362 }
3363 //emit_storereg(rt1[i],tl); // DEBUG
3364 }
3365 if (opcode[i]==0x1A||opcode[i]==0x1B) { // LDL/LDR
3366 // FIXME: little endian, fastload_reg_override
3367 int temp2h=get_reg(i_regs->regmap,FTEMP|64);
3368 if(!c||memtarget) {
3369 //if(th>=0) emit_readword_indexed((int)rdram-0x80000000,temp2,temp2h);
3370 //emit_readword_indexed((int)rdram-0x7FFFFFFC,temp2,temp2);
3371 emit_readdword_indexed_tlb(0,temp2,map,temp2h,temp2);
3372 if(jaddr) add_stub(LOADD_STUB,jaddr,(int)out,i,temp2,(int)i_regs,ccadj[i],reglist);
3373 }
3374 else
3375 inline_readstub(LOADD_STUB,i,(constmap[i][s]+offset)&0xFFFFFFF8,i_regs->regmap,FTEMP,ccadj[i],reglist);
3376 if(rt1[i]) {
3377 assert(th>=0);
3378 assert(tl>=0);
3379 emit_testimm(temp,32);
3380 emit_andimm(temp,24,temp);
3381 if (opcode[i]==0x1A) { // LDL
3382 emit_rsbimm(temp,32,HOST_TEMPREG);
3383 emit_shl(temp2h,temp,temp2h);
3384 emit_orrshr(temp2,HOST_TEMPREG,temp2h);
3385 emit_movimm(-1,HOST_TEMPREG);
3386 emit_shl(temp2,temp,temp2);
3387 emit_cmove_reg(temp2h,th);
3388 emit_biceq_lsl(tl,HOST_TEMPREG,temp,tl);
3389 emit_bicne_lsl(th,HOST_TEMPREG,temp,th);
3390 emit_orreq(temp2,tl,tl);
3391 emit_orrne(temp2,th,th);
3392 }
3393 if (opcode[i]==0x1B) { // LDR
3394 emit_xorimm(temp,24,temp);
3395 emit_rsbimm(temp,32,HOST_TEMPREG);
3396 emit_shr(temp2,temp,temp2);
3397 emit_orrshl(temp2h,HOST_TEMPREG,temp2);
3398 emit_movimm(-1,HOST_TEMPREG);
3399 emit_shr(temp2h,temp,temp2h);
3400 emit_cmovne_reg(temp2,tl);
3401 emit_bicne_lsr(th,HOST_TEMPREG,temp,th);
3402 emit_biceq_lsr(tl,HOST_TEMPREG,temp,tl);
3403 emit_orrne(temp2h,th,th);
3404 emit_orreq(temp2h,tl,tl);
3405 }
3406 }
3407 }
3408}
3409#define loadlr_assemble loadlr_assemble_arm
3410
3411static void cop0_assemble(int i,struct regstat *i_regs)
3412{
3413 if(opcode2[i]==0) // MFC0
3414 {
3415 signed char t=get_reg(i_regs->regmap,rt1[i]);
3416 char copr=(source[i]>>11)&0x1f;
3417 //assert(t>=0); // Why does this happen? OOT is weird
3418 if(t>=0&&rt1[i]!=0) {
3419 emit_readword((int)&reg_cop0+copr*4,t);
3420 }
3421 }
3422 else if(opcode2[i]==4) // MTC0
3423 {
3424 signed char s=get_reg(i_regs->regmap,rs1[i]);
3425 char copr=(source[i]>>11)&0x1f;
3426 assert(s>=0);
3427 wb_register(rs1[i],i_regs->regmap,i_regs->dirty,i_regs->is32);
3428 if(copr==9||copr==11||copr==12||copr==13) {
3429 emit_readword((int)&last_count,HOST_TEMPREG);
3430 emit_loadreg(CCREG,HOST_CCREG); // TODO: do proper reg alloc
3431 emit_add(HOST_CCREG,HOST_TEMPREG,HOST_CCREG);
3432 emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]),HOST_CCREG);
3433 emit_writeword(HOST_CCREG,(int)&Count);
3434 }
3435 // What a mess. The status register (12) can enable interrupts,
3436 // so needs a special case to handle a pending interrupt.
3437 // The interrupt must be taken immediately, because a subsequent
3438 // instruction might disable interrupts again.
3439 if(copr==12||copr==13) {
3440 if (is_delayslot) {
3441 // burn cycles to cause cc_interrupt, which will
3442 // reschedule next_interupt. Relies on CCREG from above.
3443 assem_debug("MTC0 DS %d\n", copr);
3444 emit_writeword(HOST_CCREG,(int)&last_count);
3445 emit_movimm(0,HOST_CCREG);
3446 emit_storereg(CCREG,HOST_CCREG);
3447 emit_loadreg(rs1[i],1);
3448 emit_movimm(copr,0);
3449 emit_call((int)pcsx_mtc0_ds);
3450 emit_loadreg(rs1[i],s);
3451 return;
3452 }
3453 emit_movimm(start+i*4+4,HOST_TEMPREG);
3454 emit_writeword(HOST_TEMPREG,(int)&pcaddr);
3455 emit_movimm(0,HOST_TEMPREG);
3456 emit_writeword(HOST_TEMPREG,(int)&pending_exception);
3457 }
3458 //else if(copr==12&&is_delayslot) emit_call((int)MTC0_R12);
3459 //else
3460 if(s==HOST_CCREG)
3461 emit_loadreg(rs1[i],1);
3462 else if(s!=1)
3463 emit_mov(s,1);
3464 emit_movimm(copr,0);
3465 emit_call((int)pcsx_mtc0);
3466 if(copr==9||copr==11||copr==12||copr==13) {
3467 emit_readword((int)&Count,HOST_CCREG);
3468 emit_readword((int)&next_interupt,HOST_TEMPREG);
3469 emit_addimm(HOST_CCREG,-CLOCK_ADJUST(ccadj[i]),HOST_CCREG);
3470 emit_sub(HOST_CCREG,HOST_TEMPREG,HOST_CCREG);
3471 emit_writeword(HOST_TEMPREG,(int)&last_count);
3472 emit_storereg(CCREG,HOST_CCREG);
3473 }
3474 if(copr==12||copr==13) {
3475 assert(!is_delayslot);
3476 emit_readword((int)&pending_exception,14);
3477 emit_test(14,14);
3478 emit_jne((int)&do_interrupt);
3479 }
3480 emit_loadreg(rs1[i],s);
3481 if(get_reg(i_regs->regmap,rs1[i]|64)>=0)
3482 emit_loadreg(rs1[i]|64,get_reg(i_regs->regmap,rs1[i]|64));
3483 cop1_usable=0;
3484 }
3485 else
3486 {
3487 assert(opcode2[i]==0x10);
3488 if((source[i]&0x3f)==0x10) // RFE
3489 {
3490 emit_readword((int)&Status,0);
3491 emit_andimm(0,0x3c,1);
3492 emit_andimm(0,~0xf,0);
3493 emit_orrshr_imm(1,2,0);
3494 emit_writeword(0,(int)&Status);
3495 }
3496 }
3497}
3498
3499static void cop2_get_dreg(u_int copr,signed char tl,signed char temp)
3500{
3501 switch (copr) {
3502 case 1:
3503 case 3:
3504 case 5:
3505 case 8:
3506 case 9:
3507 case 10:
3508 case 11:
3509 emit_readword((int)&reg_cop2d[copr],tl);
3510 emit_signextend16(tl,tl);
3511 emit_writeword(tl,(int)&reg_cop2d[copr]); // hmh
3512 break;
3513 case 7:
3514 case 16:
3515 case 17:
3516 case 18:
3517 case 19:
3518 emit_readword((int)&reg_cop2d[copr],tl);
3519 emit_andimm(tl,0xffff,tl);
3520 emit_writeword(tl,(int)&reg_cop2d[copr]);
3521 break;
3522 case 15:
3523 emit_readword((int)&reg_cop2d[14],tl); // SXY2
3524 emit_writeword(tl,(int)&reg_cop2d[copr]);
3525 break;
3526 case 28:
3527 case 29:
3528 emit_readword((int)&reg_cop2d[9],temp);
3529 emit_testimm(temp,0x8000); // do we need this?
3530 emit_andimm(temp,0xf80,temp);
3531 emit_andne_imm(temp,0,temp);
3532 emit_shrimm(temp,7,tl);
3533 emit_readword((int)&reg_cop2d[10],temp);
3534 emit_testimm(temp,0x8000);
3535 emit_andimm(temp,0xf80,temp);
3536 emit_andne_imm(temp,0,temp);
3537 emit_orrshr_imm(temp,2,tl);
3538 emit_readword((int)&reg_cop2d[11],temp);
3539 emit_testimm(temp,0x8000);
3540 emit_andimm(temp,0xf80,temp);
3541 emit_andne_imm(temp,0,temp);
3542 emit_orrshl_imm(temp,3,tl);
3543 emit_writeword(tl,(int)&reg_cop2d[copr]);
3544 break;
3545 default:
3546 emit_readword((int)&reg_cop2d[copr],tl);
3547 break;
3548 }
3549}
3550
3551static void cop2_put_dreg(u_int copr,signed char sl,signed char temp)
3552{
3553 switch (copr) {
3554 case 15:
3555 emit_readword((int)&reg_cop2d[13],temp); // SXY1
3556 emit_writeword(sl,(int)&reg_cop2d[copr]);
3557 emit_writeword(temp,(int)&reg_cop2d[12]); // SXY0
3558 emit_readword((int)&reg_cop2d[14],temp); // SXY2
3559 emit_writeword(sl,(int)&reg_cop2d[14]);
3560 emit_writeword(temp,(int)&reg_cop2d[13]); // SXY1
3561 break;
3562 case 28:
3563 emit_andimm(sl,0x001f,temp);
3564 emit_shlimm(temp,7,temp);
3565 emit_writeword(temp,(int)&reg_cop2d[9]);
3566 emit_andimm(sl,0x03e0,temp);
3567 emit_shlimm(temp,2,temp);
3568 emit_writeword(temp,(int)&reg_cop2d[10]);
3569 emit_andimm(sl,0x7c00,temp);
3570 emit_shrimm(temp,3,temp);
3571 emit_writeword(temp,(int)&reg_cop2d[11]);
3572 emit_writeword(sl,(int)&reg_cop2d[28]);
3573 break;
3574 case 30:
3575 emit_movs(sl,temp);
3576 emit_mvnmi(temp,temp);
3577#ifdef HAVE_ARMV5
3578 emit_clz(temp,temp);
3579#else
3580 emit_movs(temp,HOST_TEMPREG);
3581 emit_movimm(0,temp);
3582 emit_jeq((int)out+4*4);
3583 emit_addpl_imm(temp,1,temp);
3584 emit_lslpls_imm(HOST_TEMPREG,1,HOST_TEMPREG);
3585 emit_jns((int)out-2*4);
3586#endif
3587 emit_writeword(sl,(int)&reg_cop2d[30]);
3588 emit_writeword(temp,(int)&reg_cop2d[31]);
3589 break;
3590 case 31:
3591 break;
3592 default:
3593 emit_writeword(sl,(int)&reg_cop2d[copr]);
3594 break;
3595 }
3596}
3597
3598static void cop2_assemble(int i,struct regstat *i_regs)
3599{
3600 u_int copr=(source[i]>>11)&0x1f;
3601 signed char temp=get_reg(i_regs->regmap,-1);
3602 if (opcode2[i]==0) { // MFC2
3603 signed char tl=get_reg(i_regs->regmap,rt1[i]);
3604 if(tl>=0&&rt1[i]!=0)
3605 cop2_get_dreg(copr,tl,temp);
3606 }
3607 else if (opcode2[i]==4) { // MTC2
3608 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3609 cop2_put_dreg(copr,sl,temp);
3610 }
3611 else if (opcode2[i]==2) // CFC2
3612 {
3613 signed char tl=get_reg(i_regs->regmap,rt1[i]);
3614 if(tl>=0&&rt1[i]!=0)
3615 emit_readword((int)&reg_cop2c[copr],tl);
3616 }
3617 else if (opcode2[i]==6) // CTC2
3618 {
3619 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3620 switch(copr) {
3621 case 4:
3622 case 12:
3623 case 20:
3624 case 26:
3625 case 27:
3626 case 29:
3627 case 30:
3628 emit_signextend16(sl,temp);
3629 break;
3630 case 31:
3631 //value = value & 0x7ffff000;
3632 //if (value & 0x7f87e000) value |= 0x80000000;
3633 emit_shrimm(sl,12,temp);
3634 emit_shlimm(temp,12,temp);
3635 emit_testimm(temp,0x7f000000);
3636 emit_testeqimm(temp,0x00870000);
3637 emit_testeqimm(temp,0x0000e000);
3638 emit_orrne_imm(temp,0x80000000,temp);
3639 break;
3640 default:
3641 temp=sl;
3642 break;
3643 }
3644 emit_writeword(temp,(int)&reg_cop2c[copr]);
3645 assert(sl>=0);
3646 }
3647}
3648
3649static void c2op_prologue(u_int op,u_int reglist)
3650{
3651 save_regs_all(reglist);
3652#ifdef PCNT
3653 emit_movimm(op,0);
3654 emit_call((int)pcnt_gte_start);
3655#endif
3656 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0); // cop2 regs
3657}
3658
3659static void c2op_epilogue(u_int op,u_int reglist)
3660{
3661#ifdef PCNT
3662 emit_movimm(op,0);
3663 emit_call((int)pcnt_gte_end);
3664#endif
3665 restore_regs_all(reglist);
3666}
3667
3668static void c2op_call_MACtoIR(int lm,int need_flags)
3669{
3670 if(need_flags)
3671 emit_call((int)(lm?gteMACtoIR_lm1:gteMACtoIR_lm0));
3672 else
3673 emit_call((int)(lm?gteMACtoIR_lm1_nf:gteMACtoIR_lm0_nf));
3674}
3675
3676static void c2op_call_rgb_func(void *func,int lm,int need_ir,int need_flags)
3677{
3678 emit_call((int)func);
3679 // func is C code and trashes r0
3680 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
3681 if(need_flags||need_ir)
3682 c2op_call_MACtoIR(lm,need_flags);
3683 emit_call((int)(need_flags?gteMACtoRGB:gteMACtoRGB_nf));
3684}
3685
3686static void c2op_assemble(int i,struct regstat *i_regs)
3687{
3688 u_int c2op=source[i]&0x3f;
3689 u_int hr,reglist_full=0,reglist;
3690 int need_flags,need_ir;
3691 for(hr=0;hr<HOST_REGS;hr++) {
3692 if(i_regs->regmap[hr]>=0) reglist_full|=1<<hr;
3693 }
3694 reglist=reglist_full&CALLER_SAVE_REGS;
3695
3696 if (gte_handlers[c2op]!=NULL) {
3697 need_flags=!(gte_unneeded[i+1]>>63); // +1 because of how liveness detection works
3698 need_ir=(gte_unneeded[i+1]&0xe00)!=0xe00;
3699 assem_debug("gte op %08x, unneeded %016llx, need_flags %d, need_ir %d\n",
3700 source[i],gte_unneeded[i+1],need_flags,need_ir);
3701 if(new_dynarec_hacks&NDHACK_GTE_NO_FLAGS)
3702 need_flags=0;
3703 int shift = (source[i] >> 19) & 1;
3704 int lm = (source[i] >> 10) & 1;
3705 switch(c2op) {
3706#ifndef DRC_DBG
3707 case GTE_MVMVA: {
3708#ifdef HAVE_ARMV5
3709 int v = (source[i] >> 15) & 3;
3710 int cv = (source[i] >> 13) & 3;
3711 int mx = (source[i] >> 17) & 3;
3712 reglist=reglist_full&(CALLER_SAVE_REGS|0xf0); // +{r4-r7}
3713 c2op_prologue(c2op,reglist);
3714 /* r4,r5 = VXYZ(v) packed; r6 = &MX11(mx); r7 = &CV1(cv) */
3715 if(v<3)
3716 emit_ldrd(v*8,0,4);
3717 else {
3718 emit_movzwl_indexed(9*4,0,4); // gteIR
3719 emit_movzwl_indexed(10*4,0,6);
3720 emit_movzwl_indexed(11*4,0,5);
3721 emit_orrshl_imm(6,16,4);
3722 }
3723 if(mx<3)
3724 emit_addimm(0,32*4+mx*8*4,6);
3725 else
3726 emit_readword((int)&zeromem_ptr,6);
3727 if(cv<3)
3728 emit_addimm(0,32*4+(cv*8+5)*4,7);
3729 else
3730 emit_readword((int)&zeromem_ptr,7);
3731#ifdef __ARM_NEON__
3732 emit_movimm(source[i],1); // opcode
3733 emit_call((int)gteMVMVA_part_neon);
3734 if(need_flags) {
3735 emit_movimm(lm,1);
3736 emit_call((int)gteMACtoIR_flags_neon);
3737 }
3738#else
3739 if(cv==3&&shift)
3740 emit_call((int)gteMVMVA_part_cv3sh12_arm);
3741 else {
3742 emit_movimm(shift,1);
3743 emit_call((int)(need_flags?gteMVMVA_part_arm:gteMVMVA_part_nf_arm));
3744 }
3745 if(need_flags||need_ir)
3746 c2op_call_MACtoIR(lm,need_flags);
3747#endif
3748#else /* if not HAVE_ARMV5 */
3749 c2op_prologue(c2op,reglist);
3750 emit_movimm(source[i],1); // opcode
3751 emit_writeword(1,(int)&psxRegs.code);
3752 emit_call((int)(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]));
3753#endif
3754 break;
3755 }
3756 case GTE_OP:
3757 c2op_prologue(c2op,reglist);
3758 emit_call((int)(shift?gteOP_part_shift:gteOP_part_noshift));
3759 if(need_flags||need_ir) {
3760 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
3761 c2op_call_MACtoIR(lm,need_flags);
3762 }
3763 break;
3764 case GTE_DPCS:
3765 c2op_prologue(c2op,reglist);
3766 c2op_call_rgb_func(shift?gteDPCS_part_shift:gteDPCS_part_noshift,lm,need_ir,need_flags);
3767 break;
3768 case GTE_INTPL:
3769 c2op_prologue(c2op,reglist);
3770 c2op_call_rgb_func(shift?gteINTPL_part_shift:gteINTPL_part_noshift,lm,need_ir,need_flags);
3771 break;
3772 case GTE_SQR:
3773 c2op_prologue(c2op,reglist);
3774 emit_call((int)(shift?gteSQR_part_shift:gteSQR_part_noshift));
3775 if(need_flags||need_ir) {
3776 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
3777 c2op_call_MACtoIR(lm,need_flags);
3778 }
3779 break;
3780 case GTE_DCPL:
3781 c2op_prologue(c2op,reglist);
3782 c2op_call_rgb_func(gteDCPL_part,lm,need_ir,need_flags);
3783 break;
3784 case GTE_GPF:
3785 c2op_prologue(c2op,reglist);
3786 c2op_call_rgb_func(shift?gteGPF_part_shift:gteGPF_part_noshift,lm,need_ir,need_flags);
3787 break;
3788 case GTE_GPL:
3789 c2op_prologue(c2op,reglist);
3790 c2op_call_rgb_func(shift?gteGPL_part_shift:gteGPL_part_noshift,lm,need_ir,need_flags);
3791 break;
3792#endif
3793 default:
3794 c2op_prologue(c2op,reglist);
3795#ifdef DRC_DBG
3796 emit_movimm(source[i],1); // opcode
3797 emit_writeword(1,(int)&psxRegs.code);
3798#endif
3799 emit_call((int)(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]));
3800 break;
3801 }
3802 c2op_epilogue(c2op,reglist);
3803 }
3804}
3805
3806static void cop1_unusable(int i,struct regstat *i_regs)
3807{
3808 // XXX: should just just do the exception instead
3809 if(!cop1_usable) {
3810 int jaddr=(int)out;
3811 emit_jmp(0);
3812 add_stub(FP_STUB,jaddr,(int)out,i,0,(int)i_regs,is_delayslot,0);
3813 cop1_usable=1;
3814 }
3815}
3816
3817static void cop1_assemble(int i,struct regstat *i_regs)
3818{
3819 cop1_unusable(i, i_regs);
3820}
3821
3822static void fconv_assemble_arm(int i,struct regstat *i_regs)
3823{
3824 cop1_unusable(i, i_regs);
3825}
3826#define fconv_assemble fconv_assemble_arm
3827
3828static void fcomp_assemble(int i,struct regstat *i_regs)
3829{
3830 cop1_unusable(i, i_regs);
3831}
3832
3833static void float_assemble(int i,struct regstat *i_regs)
3834{
3835 cop1_unusable(i, i_regs);
3836}
3837
3838static void multdiv_assemble_arm(int i,struct regstat *i_regs)
3839{
3840 // case 0x18: MULT
3841 // case 0x19: MULTU
3842 // case 0x1A: DIV
3843 // case 0x1B: DIVU
3844 // case 0x1C: DMULT
3845 // case 0x1D: DMULTU
3846 // case 0x1E: DDIV
3847 // case 0x1F: DDIVU
3848 if(rs1[i]&&rs2[i])
3849 {
3850 if((opcode2[i]&4)==0) // 32-bit
3851 {
3852 if(opcode2[i]==0x18) // MULT
3853 {
3854 signed char m1=get_reg(i_regs->regmap,rs1[i]);
3855 signed char m2=get_reg(i_regs->regmap,rs2[i]);
3856 signed char hi=get_reg(i_regs->regmap,HIREG);
3857 signed char lo=get_reg(i_regs->regmap,LOREG);
3858 assert(m1>=0);
3859 assert(m2>=0);
3860 assert(hi>=0);
3861 assert(lo>=0);
3862 emit_smull(m1,m2,hi,lo);
3863 }
3864 if(opcode2[i]==0x19) // MULTU
3865 {
3866 signed char m1=get_reg(i_regs->regmap,rs1[i]);
3867 signed char m2=get_reg(i_regs->regmap,rs2[i]);
3868 signed char hi=get_reg(i_regs->regmap,HIREG);
3869 signed char lo=get_reg(i_regs->regmap,LOREG);
3870 assert(m1>=0);
3871 assert(m2>=0);
3872 assert(hi>=0);
3873 assert(lo>=0);
3874 emit_umull(m1,m2,hi,lo);
3875 }
3876 if(opcode2[i]==0x1A) // DIV
3877 {
3878 signed char d1=get_reg(i_regs->regmap,rs1[i]);
3879 signed char d2=get_reg(i_regs->regmap,rs2[i]);
3880 assert(d1>=0);
3881 assert(d2>=0);
3882 signed char quotient=get_reg(i_regs->regmap,LOREG);
3883 signed char remainder=get_reg(i_regs->regmap,HIREG);
3884 assert(quotient>=0);
3885 assert(remainder>=0);
3886 emit_movs(d1,remainder);
3887 emit_movimm(0xffffffff,quotient);
3888 emit_negmi(quotient,quotient); // .. quotient and ..
3889 emit_negmi(remainder,remainder); // .. remainder for div0 case (will be negated back after jump)
3890 emit_movs(d2,HOST_TEMPREG);
3891 emit_jeq((int)out+52); // Division by zero
3892 emit_negsmi(HOST_TEMPREG,HOST_TEMPREG);
3893#ifdef HAVE_ARMV5
3894 emit_clz(HOST_TEMPREG,quotient);
3895 emit_shl(HOST_TEMPREG,quotient,HOST_TEMPREG);
3896#else
3897 emit_movimm(0,quotient);
3898 emit_addpl_imm(quotient,1,quotient);
3899 emit_lslpls_imm(HOST_TEMPREG,1,HOST_TEMPREG);
3900 emit_jns((int)out-2*4);
3901#endif
3902 emit_orimm(quotient,1<<31,quotient);
3903 emit_shr(quotient,quotient,quotient);
3904 emit_cmp(remainder,HOST_TEMPREG);
3905 emit_subcs(remainder,HOST_TEMPREG,remainder);
3906 emit_adcs(quotient,quotient,quotient);
3907 emit_shrimm(HOST_TEMPREG,1,HOST_TEMPREG);
3908 emit_jcc((int)out-16); // -4
3909 emit_teq(d1,d2);
3910 emit_negmi(quotient,quotient);
3911 emit_test(d1,d1);
3912 emit_negmi(remainder,remainder);
3913 }
3914 if(opcode2[i]==0x1B) // DIVU
3915 {
3916 signed char d1=get_reg(i_regs->regmap,rs1[i]); // dividend
3917 signed char d2=get_reg(i_regs->regmap,rs2[i]); // divisor
3918 assert(d1>=0);
3919 assert(d2>=0);
3920 signed char quotient=get_reg(i_regs->regmap,LOREG);
3921 signed char remainder=get_reg(i_regs->regmap,HIREG);
3922 assert(quotient>=0);
3923 assert(remainder>=0);
3924 emit_mov(d1,remainder);
3925 emit_movimm(0xffffffff,quotient); // div0 case
3926 emit_test(d2,d2);
3927 emit_jeq((int)out+40); // Division by zero
3928#ifdef HAVE_ARMV5
3929 emit_clz(d2,HOST_TEMPREG);
3930 emit_movimm(1<<31,quotient);
3931 emit_shl(d2,HOST_TEMPREG,d2);
3932#else
3933 emit_movimm(0,HOST_TEMPREG);
3934 emit_addpl_imm(HOST_TEMPREG,1,HOST_TEMPREG);
3935 emit_lslpls_imm(d2,1,d2);
3936 emit_jns((int)out-2*4);
3937 emit_movimm(1<<31,quotient);
3938#endif
3939 emit_shr(quotient,HOST_TEMPREG,quotient);
3940 emit_cmp(remainder,d2);
3941 emit_subcs(remainder,d2,remainder);
3942 emit_adcs(quotient,quotient,quotient);
3943 emit_shrcc_imm(d2,1,d2);
3944 emit_jcc((int)out-16); // -4
3945 }
3946 }
3947 else // 64-bit
3948 assert(0);
3949 }
3950 else
3951 {
3952 // Multiply by zero is zero.
3953 // MIPS does not have a divide by zero exception.
3954 // The result is undefined, we return zero.
3955 signed char hr=get_reg(i_regs->regmap,HIREG);
3956 signed char lr=get_reg(i_regs->regmap,LOREG);
3957 if(hr>=0) emit_zeroreg(hr);
3958 if(lr>=0) emit_zeroreg(lr);
3959 }
3960}
3961#define multdiv_assemble multdiv_assemble_arm
3962
3963static void do_preload_rhash(int r) {
3964 // Don't need this for ARM. On x86, this puts the value 0xf8 into the
3965 // register. On ARM the hash can be done with a single instruction (below)
3966}
3967
3968static void do_preload_rhtbl(int ht) {
3969 emit_addimm(FP,(int)&mini_ht-(int)&dynarec_local,ht);
3970}
3971
3972static void do_rhash(int rs,int rh) {
3973 emit_andimm(rs,0xf8,rh);
3974}
3975
3976static void do_miniht_load(int ht,int rh) {
3977 assem_debug("ldr %s,[%s,%s]!\n",regname[rh],regname[ht],regname[rh]);
3978 output_w32(0xe7b00000|rd_rn_rm(rh,ht,rh));
3979}
3980
3981static void do_miniht_jump(int rs,int rh,int ht) {
3982 emit_cmp(rh,rs);
3983 emit_ldreq_indexed(ht,4,15);
3984 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
3985 emit_mov(rs,7);
3986 emit_jmp(jump_vaddr_reg[7]);
3987 #else
3988 emit_jmp(jump_vaddr_reg[rs]);
3989 #endif
3990}
3991
3992static void do_miniht_insert(u_int return_address,int rt,int temp) {
3993 #ifndef HAVE_ARMV7
3994 emit_movimm(return_address,rt); // PC into link register
3995 add_to_linker((int)out,return_address,1);
3996 emit_pcreladdr(temp);
3997 emit_writeword(rt,(int)&mini_ht[(return_address&0xFF)>>3][0]);
3998 emit_writeword(temp,(int)&mini_ht[(return_address&0xFF)>>3][1]);
3999 #else
4000 emit_movw(return_address&0x0000FFFF,rt);
4001 add_to_linker((int)out,return_address,1);
4002 emit_pcreladdr(temp);
4003 emit_writeword(temp,(int)&mini_ht[(return_address&0xFF)>>3][1]);
4004 emit_movt(return_address&0xFFFF0000,rt);
4005 emit_writeword(rt,(int)&mini_ht[(return_address&0xFF)>>3][0]);
4006 #endif
4007}
4008
4009static void wb_valid(signed char pre[],signed char entry[],u_int dirty_pre,u_int dirty,uint64_t is32_pre,uint64_t u,uint64_t uu)
4010{
4011 //if(dirty_pre==dirty) return;
4012 int hr,reg;
4013 for(hr=0;hr<HOST_REGS;hr++) {
4014 if(hr!=EXCLUDE_REG) {
4015 reg=pre[hr];
4016 if(((~u)>>(reg&63))&1) {
4017 if(reg>0) {
4018 if(((dirty_pre&~dirty)>>hr)&1) {
4019 if(reg>0&&reg<34) {
4020 emit_storereg(reg,hr);
4021 if( ((is32_pre&~uu)>>reg)&1 ) {
4022 emit_sarimm(hr,31,HOST_TEMPREG);
4023 emit_storereg(reg|64,HOST_TEMPREG);
4024 }
4025 }
4026 else if(reg>=64) {
4027 emit_storereg(reg,hr);
4028 }
4029 }
4030 }
4031 }
4032 }
4033 }
4034}
4035
4036
4037/* using strd could possibly help but you'd have to allocate registers in pairs
4038static void wb_invalidate_arm(signed char pre[],signed char entry[],uint64_t dirty,uint64_t is32,uint64_t u,uint64_t uu)
4039{
4040 int hr;
4041 int wrote=-1;
4042 for(hr=HOST_REGS-1;hr>=0;hr--) {
4043 if(hr!=EXCLUDE_REG) {
4044 if(pre[hr]!=entry[hr]) {
4045 if(pre[hr]>=0) {
4046 if((dirty>>hr)&1) {
4047 if(get_reg(entry,pre[hr])<0) {
4048 if(pre[hr]<64) {
4049 if(!((u>>pre[hr])&1)) {
4050 if(hr<10&&(~hr&1)&&(pre[hr+1]<0||wrote==hr+1)) {
4051 if( ((is32>>pre[hr])&1) && !((uu>>pre[hr])&1) ) {
4052 emit_sarimm(hr,31,hr+1);
4053 emit_strdreg(pre[hr],hr);
4054 }
4055 else
4056 emit_storereg(pre[hr],hr);
4057 }else{
4058 emit_storereg(pre[hr],hr);
4059 if( ((is32>>pre[hr])&1) && !((uu>>pre[hr])&1) ) {
4060 emit_sarimm(hr,31,hr);
4061 emit_storereg(pre[hr]|64,hr);
4062 }
4063 }
4064 }
4065 }else{
4066 if(!((uu>>(pre[hr]&63))&1) && !((is32>>(pre[hr]&63))&1)) {
4067 emit_storereg(pre[hr],hr);
4068 }
4069 }
4070 wrote=hr;
4071 }
4072 }
4073 }
4074 }
4075 }
4076 }
4077 for(hr=0;hr<HOST_REGS;hr++) {
4078 if(hr!=EXCLUDE_REG) {
4079 if(pre[hr]!=entry[hr]) {
4080 if(pre[hr]>=0) {
4081 int nr;
4082 if((nr=get_reg(entry,pre[hr]))>=0) {
4083 emit_mov(hr,nr);
4084 }
4085 }
4086 }
4087 }
4088 }
4089}
4090#define wb_invalidate wb_invalidate_arm
4091*/
4092
4093static void mark_clear_cache(void *target)
4094{
4095 u_long offset = (char *)target - (char *)BASE_ADDR;
4096 u_int mask = 1u << ((offset >> 12) & 31);
4097 if (!(needs_clear_cache[offset >> 17] & mask)) {
4098 char *start = (char *)((u_long)target & ~4095ul);
4099 start_tcache_write(start, start + 4096);
4100 needs_clear_cache[offset >> 17] |= mask;
4101 }
4102}
4103
4104// Clearing the cache is rather slow on ARM Linux, so mark the areas
4105// that need to be cleared, and then only clear these areas once.
4106static void do_clear_cache()
4107{
4108 int i,j;
4109 for (i=0;i<(1<<(TARGET_SIZE_2-17));i++)
4110 {
4111 u_int bitmap=needs_clear_cache[i];
4112 if(bitmap) {
4113 u_int start,end;
4114 for(j=0;j<32;j++)
4115 {
4116 if(bitmap&(1<<j)) {
4117 start=(u_int)BASE_ADDR+i*131072+j*4096;
4118 end=start+4095;
4119 j++;
4120 while(j<32) {
4121 if(bitmap&(1<<j)) {
4122 end+=4096;
4123 j++;
4124 }else{
4125 end_tcache_write((void *)start,(void *)end);
4126 break;
4127 }
4128 }
4129 }
4130 }
4131 needs_clear_cache[i]=0;
4132 }
4133 }
4134}
4135
4136// CPU-architecture-specific initialization
4137static void arch_init() {
4138}
4139
4140// vim:shiftwidth=2:expandtab