drc: some vita and 3ds support
[pcsx_rearmed.git] / libpcsxcore / new_dynarec / assem_arm.c
... / ...
CommitLineData
1/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2 * Mupen64plus/PCSX - assem_arm.c *
3 * Copyright (C) 2009-2011 Ari64 *
4 * Copyright (C) 2010-2011 GraÅžvydas "notaz" Ignotas *
5 * *
6 * This program is free software; you can redistribute it and/or modify *
7 * it under the terms of the GNU General Public License as published by *
8 * the Free Software Foundation; either version 2 of the License, or *
9 * (at your option) any later version. *
10 * *
11 * This program is distributed in the hope that it will be useful, *
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
14 * GNU General Public License for more details. *
15 * *
16 * You should have received a copy of the GNU General Public License *
17 * along with this program; if not, write to the *
18 * Free Software Foundation, Inc., *
19 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
20 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
21
22#include "../gte.h"
23#define FLAGLESS
24#include "../gte.h"
25#undef FLAGLESS
26#include "../gte_arm.h"
27#include "../gte_neon.h"
28#include "pcnt.h"
29#include "arm_features.h"
30
31#if defined(BASE_ADDR_FIXED)
32#elif defined(BASE_ADDR_DYNAMIC)
33char *translation_cache;
34#else
35char translation_cache[1 << TARGET_SIZE_2] __attribute__((aligned(4096)));
36#endif
37
38#ifndef __MACH__
39#define CALLER_SAVE_REGS 0x100f
40#else
41#define CALLER_SAVE_REGS 0x120f
42#endif
43
44#define unused __attribute__((unused))
45
46extern int cycle_count;
47extern int last_count;
48extern int pcaddr;
49extern int pending_exception;
50extern int branch_target;
51extern uint64_t readmem_dword;
52extern void *dynarec_local;
53extern u_int mini_ht[32][2];
54
55void indirect_jump_indexed();
56void indirect_jump();
57void do_interrupt();
58void jump_vaddr_r0();
59void jump_vaddr_r1();
60void jump_vaddr_r2();
61void jump_vaddr_r3();
62void jump_vaddr_r4();
63void jump_vaddr_r5();
64void jump_vaddr_r6();
65void jump_vaddr_r7();
66void jump_vaddr_r8();
67void jump_vaddr_r9();
68void jump_vaddr_r10();
69void jump_vaddr_r12();
70
71const u_int jump_vaddr_reg[16] = {
72 (int)jump_vaddr_r0,
73 (int)jump_vaddr_r1,
74 (int)jump_vaddr_r2,
75 (int)jump_vaddr_r3,
76 (int)jump_vaddr_r4,
77 (int)jump_vaddr_r5,
78 (int)jump_vaddr_r6,
79 (int)jump_vaddr_r7,
80 (int)jump_vaddr_r8,
81 (int)jump_vaddr_r9,
82 (int)jump_vaddr_r10,
83 0,
84 (int)jump_vaddr_r12,
85 0,
86 0,
87 0};
88
89void invalidate_addr_r0();
90void invalidate_addr_r1();
91void invalidate_addr_r2();
92void invalidate_addr_r3();
93void invalidate_addr_r4();
94void invalidate_addr_r5();
95void invalidate_addr_r6();
96void invalidate_addr_r7();
97void invalidate_addr_r8();
98void invalidate_addr_r9();
99void invalidate_addr_r10();
100void invalidate_addr_r12();
101
102const u_int invalidate_addr_reg[16] = {
103 (int)invalidate_addr_r0,
104 (int)invalidate_addr_r1,
105 (int)invalidate_addr_r2,
106 (int)invalidate_addr_r3,
107 (int)invalidate_addr_r4,
108 (int)invalidate_addr_r5,
109 (int)invalidate_addr_r6,
110 (int)invalidate_addr_r7,
111 (int)invalidate_addr_r8,
112 (int)invalidate_addr_r9,
113 (int)invalidate_addr_r10,
114 0,
115 (int)invalidate_addr_r12,
116 0,
117 0,
118 0};
119
120static u_int needs_clear_cache[1<<(TARGET_SIZE_2-17)];
121
122/* Linker */
123
124static void set_jump_target(int addr,u_int target)
125{
126 u_char *ptr=(u_char *)addr;
127 u_int *ptr2=(u_int *)ptr;
128 if(ptr[3]==0xe2) {
129 assert((target-(u_int)ptr2-8)<1024);
130 assert((addr&3)==0);
131 assert((target&3)==0);
132 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
133 //printf("target=%x addr=%x insn=%x\n",target,addr,*ptr2);
134 }
135 else if(ptr[3]==0x72) {
136 // generated by emit_jno_unlikely
137 if((target-(u_int)ptr2-8)<1024) {
138 assert((addr&3)==0);
139 assert((target&3)==0);
140 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
141 }
142 else if((target-(u_int)ptr2-8)<4096&&!((target-(u_int)ptr2-8)&15)) {
143 assert((addr&3)==0);
144 assert((target&3)==0);
145 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>4)|0xE00;
146 }
147 else *ptr2=(0x7A000000)|(((target-(u_int)ptr2-8)<<6)>>8);
148 }
149 else {
150 assert((ptr[3]&0x0e)==0xa);
151 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
152 }
153}
154
155// This optionally copies the instruction from the target of the branch into
156// the space before the branch. Works, but the difference in speed is
157// usually insignificant.
158#if 0
159static void set_jump_target_fillslot(int addr,u_int target,int copy)
160{
161 u_char *ptr=(u_char *)addr;
162 u_int *ptr2=(u_int *)ptr;
163 assert(!copy||ptr2[-1]==0xe28dd000);
164 if(ptr[3]==0xe2) {
165 assert(!copy);
166 assert((target-(u_int)ptr2-8)<4096);
167 *ptr2=(*ptr2&0xFFFFF000)|(target-(u_int)ptr2-8);
168 }
169 else {
170 assert((ptr[3]&0x0e)==0xa);
171 u_int target_insn=*(u_int *)target;
172 if((target_insn&0x0e100000)==0) { // ALU, no immediate, no flags
173 copy=0;
174 }
175 if((target_insn&0x0c100000)==0x04100000) { // Load
176 copy=0;
177 }
178 if(target_insn&0x08000000) {
179 copy=0;
180 }
181 if(copy) {
182 ptr2[-1]=target_insn;
183 target+=4;
184 }
185 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
186 }
187}
188#endif
189
190/* Literal pool */
191static void add_literal(int addr,int val)
192{
193 assert(literalcount<sizeof(literals)/sizeof(literals[0]));
194 literals[literalcount][0]=addr;
195 literals[literalcount][1]=val;
196 literalcount++;
197}
198
199// from a pointer to external jump stub (which was produced by emit_extjump2)
200// find where the jumping insn is
201static void *find_extjump_insn(void *stub)
202{
203 int *ptr=(int *)(stub+4);
204 assert((*ptr&0x0fff0000)==0x059f0000); // ldr rx, [pc, #ofs]
205 u_int offset=*ptr&0xfff;
206 void **l_ptr=(void *)ptr+offset+8;
207 return *l_ptr;
208}
209
210// find where external branch is liked to using addr of it's stub:
211// get address that insn one after stub loads (dyna_linker arg1),
212// treat it as a pointer to branch insn,
213// return addr where that branch jumps to
214static int get_pointer(void *stub)
215{
216 //printf("get_pointer(%x)\n",(int)stub);
217 int *i_ptr=find_extjump_insn(stub);
218 assert((*i_ptr&0x0f000000)==0x0a000000);
219 return (int)i_ptr+((*i_ptr<<8)>>6)+8;
220}
221
222// Find the "clean" entry point from a "dirty" entry point
223// by skipping past the call to verify_code
224static u_int get_clean_addr(int addr)
225{
226 int *ptr=(int *)addr;
227 #ifndef HAVE_ARMV7
228 ptr+=4;
229 #else
230 ptr+=6;
231 #endif
232 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
233 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
234 ptr++;
235 if((*ptr&0xFF000000)==0xea000000) {
236 return (int)ptr+((*ptr<<8)>>6)+8; // follow jump
237 }
238 return (u_int)ptr;
239}
240
241static int verify_dirty(u_int *ptr)
242{
243 #ifndef HAVE_ARMV7
244 // get from literal pool
245 assert((*ptr&0xFFFF0000)==0xe59f0000);
246 u_int offset=*ptr&0xfff;
247 u_int *l_ptr=(void *)ptr+offset+8;
248 u_int source=l_ptr[0];
249 u_int copy=l_ptr[1];
250 u_int len=l_ptr[2];
251 ptr+=4;
252 #else
253 // ARMv7 movw/movt
254 assert((*ptr&0xFFF00000)==0xe3000000);
255 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
256 u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
257 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
258 ptr+=6;
259 #endif
260 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
261 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
262 //printf("verify_dirty: %x %x %x\n",source,copy,len);
263 return !memcmp((void *)source,(void *)copy,len);
264}
265
266// This doesn't necessarily find all clean entry points, just
267// guarantees that it's not dirty
268static int isclean(int addr)
269{
270 #ifndef HAVE_ARMV7
271 u_int *ptr=((u_int *)addr)+4;
272 #else
273 u_int *ptr=((u_int *)addr)+6;
274 #endif
275 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
276 if((*ptr&0xFF000000)!=0xeb000000) return 1; // bl instruction
277 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code) return 0;
278 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_vm) return 0;
279 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_ds) return 0;
280 return 1;
281}
282
283// get source that block at addr was compiled from (host pointers)
284static void get_bounds(int addr,u_int *start,u_int *end)
285{
286 u_int *ptr=(u_int *)addr;
287 #ifndef HAVE_ARMV7
288 // get from literal pool
289 assert((*ptr&0xFFFF0000)==0xe59f0000);
290 u_int offset=*ptr&0xfff;
291 u_int *l_ptr=(void *)ptr+offset+8;
292 u_int source=l_ptr[0];
293 //u_int copy=l_ptr[1];
294 u_int len=l_ptr[2];
295 ptr+=4;
296 #else
297 // ARMv7 movw/movt
298 assert((*ptr&0xFFF00000)==0xe3000000);
299 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
300 //u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
301 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
302 ptr+=6;
303 #endif
304 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
305 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
306 *start=source;
307 *end=source+len;
308}
309
310/* Register allocation */
311
312// Note: registers are allocated clean (unmodified state)
313// if you intend to modify the register, you must call dirty_reg().
314static void alloc_reg(struct regstat *cur,int i,signed char reg)
315{
316 int r,hr;
317 int preferred_reg = (reg&7);
318 if(reg==CCREG) preferred_reg=HOST_CCREG;
319 if(reg==PTEMP||reg==FTEMP) preferred_reg=12;
320
321 // Don't allocate unused registers
322 if((cur->u>>reg)&1) return;
323
324 // see if it's already allocated
325 for(hr=0;hr<HOST_REGS;hr++)
326 {
327 if(cur->regmap[hr]==reg) return;
328 }
329
330 // Keep the same mapping if the register was already allocated in a loop
331 preferred_reg = loop_reg(i,reg,preferred_reg);
332
333 // Try to allocate the preferred register
334 if(cur->regmap[preferred_reg]==-1) {
335 cur->regmap[preferred_reg]=reg;
336 cur->dirty&=~(1<<preferred_reg);
337 cur->isconst&=~(1<<preferred_reg);
338 return;
339 }
340 r=cur->regmap[preferred_reg];
341 if(r<64&&((cur->u>>r)&1)) {
342 cur->regmap[preferred_reg]=reg;
343 cur->dirty&=~(1<<preferred_reg);
344 cur->isconst&=~(1<<preferred_reg);
345 return;
346 }
347 if(r>=64&&((cur->uu>>(r&63))&1)) {
348 cur->regmap[preferred_reg]=reg;
349 cur->dirty&=~(1<<preferred_reg);
350 cur->isconst&=~(1<<preferred_reg);
351 return;
352 }
353
354 // Clear any unneeded registers
355 // We try to keep the mapping consistent, if possible, because it
356 // makes branches easier (especially loops). So we try to allocate
357 // first (see above) before removing old mappings. If this is not
358 // possible then go ahead and clear out the registers that are no
359 // longer needed.
360 for(hr=0;hr<HOST_REGS;hr++)
361 {
362 r=cur->regmap[hr];
363 if(r>=0) {
364 if(r<64) {
365 if((cur->u>>r)&1) {cur->regmap[hr]=-1;break;}
366 }
367 else
368 {
369 if((cur->uu>>(r&63))&1) {cur->regmap[hr]=-1;break;}
370 }
371 }
372 }
373 // Try to allocate any available register, but prefer
374 // registers that have not been used recently.
375 if(i>0) {
376 for(hr=0;hr<HOST_REGS;hr++) {
377 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
378 if(regs[i-1].regmap[hr]!=rs1[i-1]&&regs[i-1].regmap[hr]!=rs2[i-1]&&regs[i-1].regmap[hr]!=rt1[i-1]&&regs[i-1].regmap[hr]!=rt2[i-1]) {
379 cur->regmap[hr]=reg;
380 cur->dirty&=~(1<<hr);
381 cur->isconst&=~(1<<hr);
382 return;
383 }
384 }
385 }
386 }
387 // Try to allocate any available register
388 for(hr=0;hr<HOST_REGS;hr++) {
389 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
390 cur->regmap[hr]=reg;
391 cur->dirty&=~(1<<hr);
392 cur->isconst&=~(1<<hr);
393 return;
394 }
395 }
396
397 // Ok, now we have to evict someone
398 // Pick a register we hopefully won't need soon
399 u_char hsn[MAXREG+1];
400 memset(hsn,10,sizeof(hsn));
401 int j;
402 lsn(hsn,i,&preferred_reg);
403 //printf("eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",cur->regmap[0],cur->regmap[1],cur->regmap[2],cur->regmap[3],cur->regmap[5],cur->regmap[6],cur->regmap[7]);
404 //printf("hsn(%x): %d %d %d %d %d %d %d\n",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
405 if(i>0) {
406 // Don't evict the cycle count at entry points, otherwise the entry
407 // stub will have to write it.
408 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
409 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
410 for(j=10;j>=3;j--)
411 {
412 // Alloc preferred register if available
413 if(hsn[r=cur->regmap[preferred_reg]&63]==j) {
414 for(hr=0;hr<HOST_REGS;hr++) {
415 // Evict both parts of a 64-bit register
416 if((cur->regmap[hr]&63)==r) {
417 cur->regmap[hr]=-1;
418 cur->dirty&=~(1<<hr);
419 cur->isconst&=~(1<<hr);
420 }
421 }
422 cur->regmap[preferred_reg]=reg;
423 return;
424 }
425 for(r=1;r<=MAXREG;r++)
426 {
427 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
428 for(hr=0;hr<HOST_REGS;hr++) {
429 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
430 if(cur->regmap[hr]==r+64) {
431 cur->regmap[hr]=reg;
432 cur->dirty&=~(1<<hr);
433 cur->isconst&=~(1<<hr);
434 return;
435 }
436 }
437 }
438 for(hr=0;hr<HOST_REGS;hr++) {
439 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
440 if(cur->regmap[hr]==r) {
441 cur->regmap[hr]=reg;
442 cur->dirty&=~(1<<hr);
443 cur->isconst&=~(1<<hr);
444 return;
445 }
446 }
447 }
448 }
449 }
450 }
451 }
452 for(j=10;j>=0;j--)
453 {
454 for(r=1;r<=MAXREG;r++)
455 {
456 if(hsn[r]==j) {
457 for(hr=0;hr<HOST_REGS;hr++) {
458 if(cur->regmap[hr]==r+64) {
459 cur->regmap[hr]=reg;
460 cur->dirty&=~(1<<hr);
461 cur->isconst&=~(1<<hr);
462 return;
463 }
464 }
465 for(hr=0;hr<HOST_REGS;hr++) {
466 if(cur->regmap[hr]==r) {
467 cur->regmap[hr]=reg;
468 cur->dirty&=~(1<<hr);
469 cur->isconst&=~(1<<hr);
470 return;
471 }
472 }
473 }
474 }
475 }
476 SysPrintf("This shouldn't happen (alloc_reg)");exit(1);
477}
478
479static void alloc_reg64(struct regstat *cur,int i,signed char reg)
480{
481 int preferred_reg = 8+(reg&1);
482 int r,hr;
483
484 // allocate the lower 32 bits
485 alloc_reg(cur,i,reg);
486
487 // Don't allocate unused registers
488 if((cur->uu>>reg)&1) return;
489
490 // see if the upper half is already allocated
491 for(hr=0;hr<HOST_REGS;hr++)
492 {
493 if(cur->regmap[hr]==reg+64) return;
494 }
495
496 // Keep the same mapping if the register was already allocated in a loop
497 preferred_reg = loop_reg(i,reg,preferred_reg);
498
499 // Try to allocate the preferred register
500 if(cur->regmap[preferred_reg]==-1) {
501 cur->regmap[preferred_reg]=reg|64;
502 cur->dirty&=~(1<<preferred_reg);
503 cur->isconst&=~(1<<preferred_reg);
504 return;
505 }
506 r=cur->regmap[preferred_reg];
507 if(r<64&&((cur->u>>r)&1)) {
508 cur->regmap[preferred_reg]=reg|64;
509 cur->dirty&=~(1<<preferred_reg);
510 cur->isconst&=~(1<<preferred_reg);
511 return;
512 }
513 if(r>=64&&((cur->uu>>(r&63))&1)) {
514 cur->regmap[preferred_reg]=reg|64;
515 cur->dirty&=~(1<<preferred_reg);
516 cur->isconst&=~(1<<preferred_reg);
517 return;
518 }
519
520 // Clear any unneeded registers
521 // We try to keep the mapping consistent, if possible, because it
522 // makes branches easier (especially loops). So we try to allocate
523 // first (see above) before removing old mappings. If this is not
524 // possible then go ahead and clear out the registers that are no
525 // longer needed.
526 for(hr=HOST_REGS-1;hr>=0;hr--)
527 {
528 r=cur->regmap[hr];
529 if(r>=0) {
530 if(r<64) {
531 if((cur->u>>r)&1) {cur->regmap[hr]=-1;break;}
532 }
533 else
534 {
535 if((cur->uu>>(r&63))&1) {cur->regmap[hr]=-1;break;}
536 }
537 }
538 }
539 // Try to allocate any available register, but prefer
540 // registers that have not been used recently.
541 if(i>0) {
542 for(hr=0;hr<HOST_REGS;hr++) {
543 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
544 if(regs[i-1].regmap[hr]!=rs1[i-1]&&regs[i-1].regmap[hr]!=rs2[i-1]&&regs[i-1].regmap[hr]!=rt1[i-1]&&regs[i-1].regmap[hr]!=rt2[i-1]) {
545 cur->regmap[hr]=reg|64;
546 cur->dirty&=~(1<<hr);
547 cur->isconst&=~(1<<hr);
548 return;
549 }
550 }
551 }
552 }
553 // Try to allocate any available register
554 for(hr=0;hr<HOST_REGS;hr++) {
555 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
556 cur->regmap[hr]=reg|64;
557 cur->dirty&=~(1<<hr);
558 cur->isconst&=~(1<<hr);
559 return;
560 }
561 }
562
563 // Ok, now we have to evict someone
564 // Pick a register we hopefully won't need soon
565 u_char hsn[MAXREG+1];
566 memset(hsn,10,sizeof(hsn));
567 int j;
568 lsn(hsn,i,&preferred_reg);
569 //printf("eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",cur->regmap[0],cur->regmap[1],cur->regmap[2],cur->regmap[3],cur->regmap[5],cur->regmap[6],cur->regmap[7]);
570 //printf("hsn(%x): %d %d %d %d %d %d %d\n",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
571 if(i>0) {
572 // Don't evict the cycle count at entry points, otherwise the entry
573 // stub will have to write it.
574 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
575 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
576 for(j=10;j>=3;j--)
577 {
578 // Alloc preferred register if available
579 if(hsn[r=cur->regmap[preferred_reg]&63]==j) {
580 for(hr=0;hr<HOST_REGS;hr++) {
581 // Evict both parts of a 64-bit register
582 if((cur->regmap[hr]&63)==r) {
583 cur->regmap[hr]=-1;
584 cur->dirty&=~(1<<hr);
585 cur->isconst&=~(1<<hr);
586 }
587 }
588 cur->regmap[preferred_reg]=reg|64;
589 return;
590 }
591 for(r=1;r<=MAXREG;r++)
592 {
593 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
594 for(hr=0;hr<HOST_REGS;hr++) {
595 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
596 if(cur->regmap[hr]==r+64) {
597 cur->regmap[hr]=reg|64;
598 cur->dirty&=~(1<<hr);
599 cur->isconst&=~(1<<hr);
600 return;
601 }
602 }
603 }
604 for(hr=0;hr<HOST_REGS;hr++) {
605 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
606 if(cur->regmap[hr]==r) {
607 cur->regmap[hr]=reg|64;
608 cur->dirty&=~(1<<hr);
609 cur->isconst&=~(1<<hr);
610 return;
611 }
612 }
613 }
614 }
615 }
616 }
617 }
618 for(j=10;j>=0;j--)
619 {
620 for(r=1;r<=MAXREG;r++)
621 {
622 if(hsn[r]==j) {
623 for(hr=0;hr<HOST_REGS;hr++) {
624 if(cur->regmap[hr]==r+64) {
625 cur->regmap[hr]=reg|64;
626 cur->dirty&=~(1<<hr);
627 cur->isconst&=~(1<<hr);
628 return;
629 }
630 }
631 for(hr=0;hr<HOST_REGS;hr++) {
632 if(cur->regmap[hr]==r) {
633 cur->regmap[hr]=reg|64;
634 cur->dirty&=~(1<<hr);
635 cur->isconst&=~(1<<hr);
636 return;
637 }
638 }
639 }
640 }
641 }
642 SysPrintf("This shouldn't happen");exit(1);
643}
644
645// Allocate a temporary register. This is done without regard to
646// dirty status or whether the register we request is on the unneeded list
647// Note: This will only allocate one register, even if called multiple times
648static void alloc_reg_temp(struct regstat *cur,int i,signed char reg)
649{
650 int r,hr;
651 int preferred_reg = -1;
652
653 // see if it's already allocated
654 for(hr=0;hr<HOST_REGS;hr++)
655 {
656 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==reg) return;
657 }
658
659 // Try to allocate any available register
660 for(hr=HOST_REGS-1;hr>=0;hr--) {
661 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
662 cur->regmap[hr]=reg;
663 cur->dirty&=~(1<<hr);
664 cur->isconst&=~(1<<hr);
665 return;
666 }
667 }
668
669 // Find an unneeded register
670 for(hr=HOST_REGS-1;hr>=0;hr--)
671 {
672 r=cur->regmap[hr];
673 if(r>=0) {
674 if(r<64) {
675 if((cur->u>>r)&1) {
676 if(i==0||((unneeded_reg[i-1]>>r)&1)) {
677 cur->regmap[hr]=reg;
678 cur->dirty&=~(1<<hr);
679 cur->isconst&=~(1<<hr);
680 return;
681 }
682 }
683 }
684 else
685 {
686 if((cur->uu>>(r&63))&1) {
687 if(i==0||((unneeded_reg_upper[i-1]>>(r&63))&1)) {
688 cur->regmap[hr]=reg;
689 cur->dirty&=~(1<<hr);
690 cur->isconst&=~(1<<hr);
691 return;
692 }
693 }
694 }
695 }
696 }
697
698 // Ok, now we have to evict someone
699 // Pick a register we hopefully won't need soon
700 // TODO: we might want to follow unconditional jumps here
701 // TODO: get rid of dupe code and make this into a function
702 u_char hsn[MAXREG+1];
703 memset(hsn,10,sizeof(hsn));
704 int j;
705 lsn(hsn,i,&preferred_reg);
706 //printf("hsn: %d %d %d %d %d %d %d\n",hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
707 if(i>0) {
708 // Don't evict the cycle count at entry points, otherwise the entry
709 // stub will have to write it.
710 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
711 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
712 for(j=10;j>=3;j--)
713 {
714 for(r=1;r<=MAXREG;r++)
715 {
716 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
717 for(hr=0;hr<HOST_REGS;hr++) {
718 if(hr!=HOST_CCREG||hsn[CCREG]>2) {
719 if(cur->regmap[hr]==r+64) {
720 cur->regmap[hr]=reg;
721 cur->dirty&=~(1<<hr);
722 cur->isconst&=~(1<<hr);
723 return;
724 }
725 }
726 }
727 for(hr=0;hr<HOST_REGS;hr++) {
728 if(hr!=HOST_CCREG||hsn[CCREG]>2) {
729 if(cur->regmap[hr]==r) {
730 cur->regmap[hr]=reg;
731 cur->dirty&=~(1<<hr);
732 cur->isconst&=~(1<<hr);
733 return;
734 }
735 }
736 }
737 }
738 }
739 }
740 }
741 for(j=10;j>=0;j--)
742 {
743 for(r=1;r<=MAXREG;r++)
744 {
745 if(hsn[r]==j) {
746 for(hr=0;hr<HOST_REGS;hr++) {
747 if(cur->regmap[hr]==r+64) {
748 cur->regmap[hr]=reg;
749 cur->dirty&=~(1<<hr);
750 cur->isconst&=~(1<<hr);
751 return;
752 }
753 }
754 for(hr=0;hr<HOST_REGS;hr++) {
755 if(cur->regmap[hr]==r) {
756 cur->regmap[hr]=reg;
757 cur->dirty&=~(1<<hr);
758 cur->isconst&=~(1<<hr);
759 return;
760 }
761 }
762 }
763 }
764 }
765 SysPrintf("This shouldn't happen");exit(1);
766}
767
768// Allocate a specific ARM register.
769static void alloc_arm_reg(struct regstat *cur,int i,signed char reg,int hr)
770{
771 int n;
772 int dirty=0;
773
774 // see if it's already allocated (and dealloc it)
775 for(n=0;n<HOST_REGS;n++)
776 {
777 if(n!=EXCLUDE_REG&&cur->regmap[n]==reg) {
778 dirty=(cur->dirty>>n)&1;
779 cur->regmap[n]=-1;
780 }
781 }
782
783 cur->regmap[hr]=reg;
784 cur->dirty&=~(1<<hr);
785 cur->dirty|=dirty<<hr;
786 cur->isconst&=~(1<<hr);
787}
788
789// Alloc cycle count into dedicated register
790static void alloc_cc(struct regstat *cur,int i)
791{
792 alloc_arm_reg(cur,i,CCREG,HOST_CCREG);
793}
794
795/* Special alloc */
796
797
798/* Assembler */
799
800static unused char regname[16][4] = {
801 "r0",
802 "r1",
803 "r2",
804 "r3",
805 "r4",
806 "r5",
807 "r6",
808 "r7",
809 "r8",
810 "r9",
811 "r10",
812 "fp",
813 "r12",
814 "sp",
815 "lr",
816 "pc"};
817
818static void output_w32(u_int word)
819{
820 *((u_int *)out)=word;
821 out+=4;
822}
823
824static u_int rd_rn_rm(u_int rd, u_int rn, u_int rm)
825{
826 assert(rd<16);
827 assert(rn<16);
828 assert(rm<16);
829 return((rn<<16)|(rd<<12)|rm);
830}
831
832static u_int rd_rn_imm_shift(u_int rd, u_int rn, u_int imm, u_int shift)
833{
834 assert(rd<16);
835 assert(rn<16);
836 assert(imm<256);
837 assert((shift&1)==0);
838 return((rn<<16)|(rd<<12)|(((32-shift)&30)<<7)|imm);
839}
840
841static u_int genimm(u_int imm,u_int *encoded)
842{
843 *encoded=0;
844 if(imm==0) return 1;
845 int i=32;
846 while(i>0)
847 {
848 if(imm<256) {
849 *encoded=((i&30)<<7)|imm;
850 return 1;
851 }
852 imm=(imm>>2)|(imm<<30);i-=2;
853 }
854 return 0;
855}
856
857static void genimm_checked(u_int imm,u_int *encoded)
858{
859 u_int ret=genimm(imm,encoded);
860 assert(ret);
861 (void)ret;
862}
863
864static u_int genjmp(u_int addr)
865{
866 int offset=addr-(int)out-8;
867 if(offset<-33554432||offset>=33554432) {
868 if (addr>2) {
869 SysPrintf("genjmp: out of range: %08x\n", offset);
870 exit(1);
871 }
872 return 0;
873 }
874 return ((u_int)offset>>2)&0xffffff;
875}
876
877static void emit_mov(int rs,int rt)
878{
879 assem_debug("mov %s,%s\n",regname[rt],regname[rs]);
880 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs));
881}
882
883static void emit_movs(int rs,int rt)
884{
885 assem_debug("movs %s,%s\n",regname[rt],regname[rs]);
886 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs));
887}
888
889static void emit_add(int rs1,int rs2,int rt)
890{
891 assem_debug("add %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
892 output_w32(0xe0800000|rd_rn_rm(rt,rs1,rs2));
893}
894
895static void emit_adds(int rs1,int rs2,int rt)
896{
897 assem_debug("adds %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
898 output_w32(0xe0900000|rd_rn_rm(rt,rs1,rs2));
899}
900
901static void emit_adcs(int rs1,int rs2,int rt)
902{
903 assem_debug("adcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
904 output_w32(0xe0b00000|rd_rn_rm(rt,rs1,rs2));
905}
906
907static void emit_sbc(int rs1,int rs2,int rt)
908{
909 assem_debug("sbc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
910 output_w32(0xe0c00000|rd_rn_rm(rt,rs1,rs2));
911}
912
913static void emit_sbcs(int rs1,int rs2,int rt)
914{
915 assem_debug("sbcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
916 output_w32(0xe0d00000|rd_rn_rm(rt,rs1,rs2));
917}
918
919static void emit_neg(int rs, int rt)
920{
921 assem_debug("rsb %s,%s,#0\n",regname[rt],regname[rs]);
922 output_w32(0xe2600000|rd_rn_rm(rt,rs,0));
923}
924
925static void emit_negs(int rs, int rt)
926{
927 assem_debug("rsbs %s,%s,#0\n",regname[rt],regname[rs]);
928 output_w32(0xe2700000|rd_rn_rm(rt,rs,0));
929}
930
931static void emit_sub(int rs1,int rs2,int rt)
932{
933 assem_debug("sub %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
934 output_w32(0xe0400000|rd_rn_rm(rt,rs1,rs2));
935}
936
937static void emit_subs(int rs1,int rs2,int rt)
938{
939 assem_debug("subs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
940 output_w32(0xe0500000|rd_rn_rm(rt,rs1,rs2));
941}
942
943static void emit_zeroreg(int rt)
944{
945 assem_debug("mov %s,#0\n",regname[rt]);
946 output_w32(0xe3a00000|rd_rn_rm(rt,0,0));
947}
948
949static void emit_loadlp(u_int imm,u_int rt)
950{
951 add_literal((int)out,imm);
952 assem_debug("ldr %s,pc+? [=%x]\n",regname[rt],imm);
953 output_w32(0xe5900000|rd_rn_rm(rt,15,0));
954}
955
956static void emit_movw(u_int imm,u_int rt)
957{
958 assert(imm<65536);
959 assem_debug("movw %s,#%d (0x%x)\n",regname[rt],imm,imm);
960 output_w32(0xe3000000|rd_rn_rm(rt,0,0)|(imm&0xfff)|((imm<<4)&0xf0000));
961}
962
963static void emit_movt(u_int imm,u_int rt)
964{
965 assem_debug("movt %s,#%d (0x%x)\n",regname[rt],imm&0xffff0000,imm&0xffff0000);
966 output_w32(0xe3400000|rd_rn_rm(rt,0,0)|((imm>>16)&0xfff)|((imm>>12)&0xf0000));
967}
968
969static void emit_movimm(u_int imm,u_int rt)
970{
971 u_int armval;
972 if(genimm(imm,&armval)) {
973 assem_debug("mov %s,#%d\n",regname[rt],imm);
974 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
975 }else if(genimm(~imm,&armval)) {
976 assem_debug("mvn %s,#%d\n",regname[rt],imm);
977 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
978 }else if(imm<65536) {
979 #ifndef HAVE_ARMV7
980 assem_debug("mov %s,#%d\n",regname[rt],imm&0xFF00);
981 output_w32(0xe3a00000|rd_rn_imm_shift(rt,0,imm>>8,8));
982 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
983 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
984 #else
985 emit_movw(imm,rt);
986 #endif
987 }else{
988 #ifndef HAVE_ARMV7
989 emit_loadlp(imm,rt);
990 #else
991 emit_movw(imm&0x0000FFFF,rt);
992 emit_movt(imm&0xFFFF0000,rt);
993 #endif
994 }
995}
996
997static void emit_pcreladdr(u_int rt)
998{
999 assem_debug("add %s,pc,#?\n",regname[rt]);
1000 output_w32(0xe2800000|rd_rn_rm(rt,15,0));
1001}
1002
1003static void emit_loadreg(int r, int hr)
1004{
1005 if(r&64) {
1006 SysPrintf("64bit load in 32bit mode!\n");
1007 assert(0);
1008 return;
1009 }
1010 if((r&63)==0)
1011 emit_zeroreg(hr);
1012 else {
1013 int addr=((int)reg)+((r&63)<<REG_SHIFT)+((r&64)>>4);
1014 if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4);
1015 if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4);
1016 if(r==CCREG) addr=(int)&cycle_count;
1017 if(r==CSREG) addr=(int)&Status;
1018 if(r==FSREG) addr=(int)&FCR31;
1019 if(r==INVCP) addr=(int)&invc_ptr;
1020 u_int offset = addr-(u_int)&dynarec_local;
1021 assert(offset<4096);
1022 assem_debug("ldr %s,fp+%d\n",regname[hr],offset);
1023 output_w32(0xe5900000|rd_rn_rm(hr,FP,0)|offset);
1024 }
1025}
1026
1027static void emit_storereg(int r, int hr)
1028{
1029 if(r&64) {
1030 SysPrintf("64bit store in 32bit mode!\n");
1031 assert(0);
1032 return;
1033 }
1034 int addr=((int)reg)+((r&63)<<REG_SHIFT)+((r&64)>>4);
1035 if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4);
1036 if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4);
1037 if(r==CCREG) addr=(int)&cycle_count;
1038 if(r==FSREG) addr=(int)&FCR31;
1039 u_int offset = addr-(u_int)&dynarec_local;
1040 assert(offset<4096);
1041 assem_debug("str %s,fp+%d\n",regname[hr],offset);
1042 output_w32(0xe5800000|rd_rn_rm(hr,FP,0)|offset);
1043}
1044
1045static void emit_test(int rs, int rt)
1046{
1047 assem_debug("tst %s,%s\n",regname[rs],regname[rt]);
1048 output_w32(0xe1100000|rd_rn_rm(0,rs,rt));
1049}
1050
1051static void emit_testimm(int rs,int imm)
1052{
1053 u_int armval;
1054 assem_debug("tst %s,#%d\n",regname[rs],imm);
1055 genimm_checked(imm,&armval);
1056 output_w32(0xe3100000|rd_rn_rm(0,rs,0)|armval);
1057}
1058
1059static void emit_testeqimm(int rs,int imm)
1060{
1061 u_int armval;
1062 assem_debug("tsteq %s,$%d\n",regname[rs],imm);
1063 genimm_checked(imm,&armval);
1064 output_w32(0x03100000|rd_rn_rm(0,rs,0)|armval);
1065}
1066
1067static void emit_not(int rs,int rt)
1068{
1069 assem_debug("mvn %s,%s\n",regname[rt],regname[rs]);
1070 output_w32(0xe1e00000|rd_rn_rm(rt,0,rs));
1071}
1072
1073static void emit_mvnmi(int rs,int rt)
1074{
1075 assem_debug("mvnmi %s,%s\n",regname[rt],regname[rs]);
1076 output_w32(0x41e00000|rd_rn_rm(rt,0,rs));
1077}
1078
1079static void emit_and(u_int rs1,u_int rs2,u_int rt)
1080{
1081 assem_debug("and %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1082 output_w32(0xe0000000|rd_rn_rm(rt,rs1,rs2));
1083}
1084
1085static void emit_or(u_int rs1,u_int rs2,u_int rt)
1086{
1087 assem_debug("orr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1088 output_w32(0xe1800000|rd_rn_rm(rt,rs1,rs2));
1089}
1090
1091static void emit_or_and_set_flags(int rs1,int rs2,int rt)
1092{
1093 assem_debug("orrs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1094 output_w32(0xe1900000|rd_rn_rm(rt,rs1,rs2));
1095}
1096
1097static void emit_orrshl_imm(u_int rs,u_int imm,u_int rt)
1098{
1099 assert(rs<16);
1100 assert(rt<16);
1101 assert(imm<32);
1102 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs],imm);
1103 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|(imm<<7));
1104}
1105
1106static void emit_orrshr_imm(u_int rs,u_int imm,u_int rt)
1107{
1108 assert(rs<16);
1109 assert(rt<16);
1110 assert(imm<32);
1111 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs],imm);
1112 output_w32(0xe1800020|rd_rn_rm(rt,rt,rs)|(imm<<7));
1113}
1114
1115static void emit_xor(u_int rs1,u_int rs2,u_int rt)
1116{
1117 assem_debug("eor %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1118 output_w32(0xe0200000|rd_rn_rm(rt,rs1,rs2));
1119}
1120
1121static void emit_addimm(u_int rs,int imm,u_int rt)
1122{
1123 assert(rs<16);
1124 assert(rt<16);
1125 if(imm!=0) {
1126 u_int armval;
1127 if(genimm(imm,&armval)) {
1128 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],imm);
1129 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
1130 }else if(genimm(-imm,&armval)) {
1131 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],-imm);
1132 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
1133 #ifdef HAVE_ARMV7
1134 }else if(rt!=rs&&(u_int)imm<65536) {
1135 emit_movw(imm&0x0000ffff,rt);
1136 emit_add(rs,rt,rt);
1137 }else if(rt!=rs&&(u_int)-imm<65536) {
1138 emit_movw(-imm&0x0000ffff,rt);
1139 emit_sub(rs,rt,rt);
1140 #endif
1141 }else if((u_int)-imm<65536) {
1142 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],(-imm)&0xFF00);
1143 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
1144 output_w32(0xe2400000|rd_rn_imm_shift(rt,rs,(-imm)>>8,8));
1145 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
1146 }else {
1147 do {
1148 int shift = (ffs(imm) - 1) & ~1;
1149 int imm8 = imm & (0xff << shift);
1150 genimm_checked(imm8,&armval);
1151 assem_debug("add %s,%s,#0x%x\n",regname[rt],regname[rs],imm8);
1152 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
1153 rs = rt;
1154 imm &= ~imm8;
1155 }
1156 while (imm != 0);
1157 }
1158 }
1159 else if(rs!=rt) emit_mov(rs,rt);
1160}
1161
1162static void emit_addimm_and_set_flags(int imm,int rt)
1163{
1164 assert(imm>-65536&&imm<65536);
1165 u_int armval;
1166 if(genimm(imm,&armval)) {
1167 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm);
1168 output_w32(0xe2900000|rd_rn_rm(rt,rt,0)|armval);
1169 }else if(genimm(-imm,&armval)) {
1170 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],imm);
1171 output_w32(0xe2500000|rd_rn_rm(rt,rt,0)|armval);
1172 }else if(imm<0) {
1173 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF00);
1174 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
1175 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)>>8,8));
1176 output_w32(0xe2500000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
1177 }else{
1178 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF00);
1179 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
1180 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm>>8,8));
1181 output_w32(0xe2900000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1182 }
1183}
1184
1185static void emit_addimm_no_flags(u_int imm,u_int rt)
1186{
1187 emit_addimm(rt,imm,rt);
1188}
1189
1190static void emit_addnop(u_int r)
1191{
1192 assert(r<16);
1193 assem_debug("add %s,%s,#0 (nop)\n",regname[r],regname[r]);
1194 output_w32(0xe2800000|rd_rn_rm(r,r,0));
1195}
1196
1197static void emit_adcimm(u_int rs,int imm,u_int rt)
1198{
1199 u_int armval;
1200 genimm_checked(imm,&armval);
1201 assem_debug("adc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1202 output_w32(0xe2a00000|rd_rn_rm(rt,rs,0)|armval);
1203}
1204
1205static void emit_rscimm(int rs,int imm,u_int rt)
1206{
1207 assert(0);
1208 u_int armval;
1209 genimm_checked(imm,&armval);
1210 assem_debug("rsc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1211 output_w32(0xe2e00000|rd_rn_rm(rt,rs,0)|armval);
1212}
1213
1214static void emit_addimm64_32(int rsh,int rsl,int imm,int rth,int rtl)
1215{
1216 // TODO: if(genimm(imm,&armval)) ...
1217 // else
1218 emit_movimm(imm,HOST_TEMPREG);
1219 emit_adds(HOST_TEMPREG,rsl,rtl);
1220 emit_adcimm(rsh,0,rth);
1221}
1222
1223static void emit_andimm(int rs,int imm,int rt)
1224{
1225 u_int armval;
1226 if(imm==0) {
1227 emit_zeroreg(rt);
1228 }else if(genimm(imm,&armval)) {
1229 assem_debug("and %s,%s,#%d\n",regname[rt],regname[rs],imm);
1230 output_w32(0xe2000000|rd_rn_rm(rt,rs,0)|armval);
1231 }else if(genimm(~imm,&armval)) {
1232 assem_debug("bic %s,%s,#%d\n",regname[rt],regname[rs],imm);
1233 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|armval);
1234 }else if(imm==65535) {
1235 #ifndef HAVE_ARMV6
1236 assem_debug("bic %s,%s,#FF000000\n",regname[rt],regname[rs]);
1237 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|0x4FF);
1238 assem_debug("bic %s,%s,#00FF0000\n",regname[rt],regname[rt]);
1239 output_w32(0xe3c00000|rd_rn_rm(rt,rt,0)|0x8FF);
1240 #else
1241 assem_debug("uxth %s,%s\n",regname[rt],regname[rs]);
1242 output_w32(0xe6ff0070|rd_rn_rm(rt,0,rs));
1243 #endif
1244 }else{
1245 assert(imm>0&&imm<65535);
1246 #ifndef HAVE_ARMV7
1247 assem_debug("mov r14,#%d\n",imm&0xFF00);
1248 output_w32(0xe3a00000|rd_rn_imm_shift(HOST_TEMPREG,0,imm>>8,8));
1249 assem_debug("add r14,r14,#%d\n",imm&0xFF);
1250 output_w32(0xe2800000|rd_rn_imm_shift(HOST_TEMPREG,HOST_TEMPREG,imm&0xff,0));
1251 #else
1252 emit_movw(imm,HOST_TEMPREG);
1253 #endif
1254 assem_debug("and %s,%s,r14\n",regname[rt],regname[rs]);
1255 output_w32(0xe0000000|rd_rn_rm(rt,rs,HOST_TEMPREG));
1256 }
1257}
1258
1259static void emit_orimm(int rs,int imm,int rt)
1260{
1261 u_int armval;
1262 if(imm==0) {
1263 if(rs!=rt) emit_mov(rs,rt);
1264 }else if(genimm(imm,&armval)) {
1265 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1266 output_w32(0xe3800000|rd_rn_rm(rt,rs,0)|armval);
1267 }else{
1268 assert(imm>0&&imm<65536);
1269 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1270 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
1271 output_w32(0xe3800000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1272 output_w32(0xe3800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1273 }
1274}
1275
1276static void emit_xorimm(int rs,int imm,int rt)
1277{
1278 u_int armval;
1279 if(imm==0) {
1280 if(rs!=rt) emit_mov(rs,rt);
1281 }else if(genimm(imm,&armval)) {
1282 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm);
1283 output_w32(0xe2200000|rd_rn_rm(rt,rs,0)|armval);
1284 }else{
1285 assert(imm>0&&imm<65536);
1286 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1287 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
1288 output_w32(0xe2200000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1289 output_w32(0xe2200000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1290 }
1291}
1292
1293static void emit_shlimm(int rs,u_int imm,int rt)
1294{
1295 assert(imm>0);
1296 assert(imm<32);
1297 //if(imm==1) ...
1298 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1299 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1300}
1301
1302static void emit_lsls_imm(int rs,int imm,int rt)
1303{
1304 assert(imm>0);
1305 assert(imm<32);
1306 assem_debug("lsls %s,%s,#%d\n",regname[rt],regname[rs],imm);
1307 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1308}
1309
1310static unused void emit_lslpls_imm(int rs,int imm,int rt)
1311{
1312 assert(imm>0);
1313 assert(imm<32);
1314 assem_debug("lslpls %s,%s,#%d\n",regname[rt],regname[rs],imm);
1315 output_w32(0x51b00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1316}
1317
1318static void emit_shrimm(int rs,u_int imm,int rt)
1319{
1320 assert(imm>0);
1321 assert(imm<32);
1322 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1323 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
1324}
1325
1326static void emit_sarimm(int rs,u_int imm,int rt)
1327{
1328 assert(imm>0);
1329 assert(imm<32);
1330 assem_debug("asr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1331 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x40|(imm<<7));
1332}
1333
1334static void emit_rorimm(int rs,u_int imm,int rt)
1335{
1336 assert(imm>0);
1337 assert(imm<32);
1338 assem_debug("ror %s,%s,#%d\n",regname[rt],regname[rs],imm);
1339 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x60|(imm<<7));
1340}
1341
1342static void emit_shldimm(int rs,int rs2,u_int imm,int rt)
1343{
1344 assem_debug("shld %%%s,%%%s,%d\n",regname[rt],regname[rs2],imm);
1345 assert(imm>0);
1346 assert(imm<32);
1347 //if(imm==1) ...
1348 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1349 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1350 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs2],32-imm);
1351 output_w32(0xe1800020|rd_rn_rm(rt,rt,rs2)|((32-imm)<<7));
1352}
1353
1354static void emit_shrdimm(int rs,int rs2,u_int imm,int rt)
1355{
1356 assem_debug("shrd %%%s,%%%s,%d\n",regname[rt],regname[rs2],imm);
1357 assert(imm>0);
1358 assert(imm<32);
1359 //if(imm==1) ...
1360 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1361 output_w32(0xe1a00020|rd_rn_rm(rt,0,rs)|(imm<<7));
1362 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs2],32-imm);
1363 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs2)|((32-imm)<<7));
1364}
1365
1366static void emit_signextend16(int rs,int rt)
1367{
1368 #ifndef HAVE_ARMV6
1369 emit_shlimm(rs,16,rt);
1370 emit_sarimm(rt,16,rt);
1371 #else
1372 assem_debug("sxth %s,%s\n",regname[rt],regname[rs]);
1373 output_w32(0xe6bf0070|rd_rn_rm(rt,0,rs));
1374 #endif
1375}
1376
1377static void emit_signextend8(int rs,int rt)
1378{
1379 #ifndef HAVE_ARMV6
1380 emit_shlimm(rs,24,rt);
1381 emit_sarimm(rt,24,rt);
1382 #else
1383 assem_debug("sxtb %s,%s\n",regname[rt],regname[rs]);
1384 output_w32(0xe6af0070|rd_rn_rm(rt,0,rs));
1385 #endif
1386}
1387
1388static void emit_shl(u_int rs,u_int shift,u_int rt)
1389{
1390 assert(rs<16);
1391 assert(rt<16);
1392 assert(shift<16);
1393 //if(imm==1) ...
1394 assem_debug("lsl %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1395 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x10|(shift<<8));
1396}
1397
1398static void emit_shr(u_int rs,u_int shift,u_int rt)
1399{
1400 assert(rs<16);
1401 assert(rt<16);
1402 assert(shift<16);
1403 assem_debug("lsr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1404 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x30|(shift<<8));
1405}
1406
1407static void emit_sar(u_int rs,u_int shift,u_int rt)
1408{
1409 assert(rs<16);
1410 assert(rt<16);
1411 assert(shift<16);
1412 assem_debug("asr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1413 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x50|(shift<<8));
1414}
1415
1416static void emit_orrshl(u_int rs,u_int shift,u_int rt)
1417{
1418 assert(rs<16);
1419 assert(rt<16);
1420 assert(shift<16);
1421 assem_debug("orr %s,%s,%s,lsl %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
1422 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x10|(shift<<8));
1423}
1424
1425static void emit_orrshr(u_int rs,u_int shift,u_int rt)
1426{
1427 assert(rs<16);
1428 assert(rt<16);
1429 assert(shift<16);
1430 assem_debug("orr %s,%s,%s,lsr %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
1431 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x30|(shift<<8));
1432}
1433
1434static void emit_cmpimm(int rs,int imm)
1435{
1436 u_int armval;
1437 if(genimm(imm,&armval)) {
1438 assem_debug("cmp %s,#%d\n",regname[rs],imm);
1439 output_w32(0xe3500000|rd_rn_rm(0,rs,0)|armval);
1440 }else if(genimm(-imm,&armval)) {
1441 assem_debug("cmn %s,#%d\n",regname[rs],imm);
1442 output_w32(0xe3700000|rd_rn_rm(0,rs,0)|armval);
1443 }else if(imm>0) {
1444 assert(imm<65536);
1445 emit_movimm(imm,HOST_TEMPREG);
1446 assem_debug("cmp %s,r14\n",regname[rs]);
1447 output_w32(0xe1500000|rd_rn_rm(0,rs,HOST_TEMPREG));
1448 }else{
1449 assert(imm>-65536);
1450 emit_movimm(-imm,HOST_TEMPREG);
1451 assem_debug("cmn %s,r14\n",regname[rs]);
1452 output_w32(0xe1700000|rd_rn_rm(0,rs,HOST_TEMPREG));
1453 }
1454}
1455
1456static void emit_cmovne_imm(int imm,int rt)
1457{
1458 assem_debug("movne %s,#%d\n",regname[rt],imm);
1459 u_int armval;
1460 genimm_checked(imm,&armval);
1461 output_w32(0x13a00000|rd_rn_rm(rt,0,0)|armval);
1462}
1463
1464static void emit_cmovl_imm(int imm,int rt)
1465{
1466 assem_debug("movlt %s,#%d\n",regname[rt],imm);
1467 u_int armval;
1468 genimm_checked(imm,&armval);
1469 output_w32(0xb3a00000|rd_rn_rm(rt,0,0)|armval);
1470}
1471
1472static void emit_cmovb_imm(int imm,int rt)
1473{
1474 assem_debug("movcc %s,#%d\n",regname[rt],imm);
1475 u_int armval;
1476 genimm_checked(imm,&armval);
1477 output_w32(0x33a00000|rd_rn_rm(rt,0,0)|armval);
1478}
1479
1480static void emit_cmovs_imm(int imm,int rt)
1481{
1482 assem_debug("movmi %s,#%d\n",regname[rt],imm);
1483 u_int armval;
1484 genimm_checked(imm,&armval);
1485 output_w32(0x43a00000|rd_rn_rm(rt,0,0)|armval);
1486}
1487
1488static void emit_cmove_reg(int rs,int rt)
1489{
1490 assem_debug("moveq %s,%s\n",regname[rt],regname[rs]);
1491 output_w32(0x01a00000|rd_rn_rm(rt,0,rs));
1492}
1493
1494static void emit_cmovne_reg(int rs,int rt)
1495{
1496 assem_debug("movne %s,%s\n",regname[rt],regname[rs]);
1497 output_w32(0x11a00000|rd_rn_rm(rt,0,rs));
1498}
1499
1500static void emit_cmovl_reg(int rs,int rt)
1501{
1502 assem_debug("movlt %s,%s\n",regname[rt],regname[rs]);
1503 output_w32(0xb1a00000|rd_rn_rm(rt,0,rs));
1504}
1505
1506static void emit_cmovs_reg(int rs,int rt)
1507{
1508 assem_debug("movmi %s,%s\n",regname[rt],regname[rs]);
1509 output_w32(0x41a00000|rd_rn_rm(rt,0,rs));
1510}
1511
1512static void emit_slti32(int rs,int imm,int rt)
1513{
1514 if(rs!=rt) emit_zeroreg(rt);
1515 emit_cmpimm(rs,imm);
1516 if(rs==rt) emit_movimm(0,rt);
1517 emit_cmovl_imm(1,rt);
1518}
1519
1520static void emit_sltiu32(int rs,int imm,int rt)
1521{
1522 if(rs!=rt) emit_zeroreg(rt);
1523 emit_cmpimm(rs,imm);
1524 if(rs==rt) emit_movimm(0,rt);
1525 emit_cmovb_imm(1,rt);
1526}
1527
1528static void emit_slti64_32(int rsh,int rsl,int imm,int rt)
1529{
1530 assert(rsh!=rt);
1531 emit_slti32(rsl,imm,rt);
1532 if(imm>=0)
1533 {
1534 emit_test(rsh,rsh);
1535 emit_cmovne_imm(0,rt);
1536 emit_cmovs_imm(1,rt);
1537 }
1538 else
1539 {
1540 emit_cmpimm(rsh,-1);
1541 emit_cmovne_imm(0,rt);
1542 emit_cmovl_imm(1,rt);
1543 }
1544}
1545
1546static void emit_sltiu64_32(int rsh,int rsl,int imm,int rt)
1547{
1548 assert(rsh!=rt);
1549 emit_sltiu32(rsl,imm,rt);
1550 if(imm>=0)
1551 {
1552 emit_test(rsh,rsh);
1553 emit_cmovne_imm(0,rt);
1554 }
1555 else
1556 {
1557 emit_cmpimm(rsh,-1);
1558 emit_cmovne_imm(1,rt);
1559 }
1560}
1561
1562static void emit_cmp(int rs,int rt)
1563{
1564 assem_debug("cmp %s,%s\n",regname[rs],regname[rt]);
1565 output_w32(0xe1500000|rd_rn_rm(0,rs,rt));
1566}
1567
1568static void emit_set_gz32(int rs, int rt)
1569{
1570 //assem_debug("set_gz32\n");
1571 emit_cmpimm(rs,1);
1572 emit_movimm(1,rt);
1573 emit_cmovl_imm(0,rt);
1574}
1575
1576static void emit_set_nz32(int rs, int rt)
1577{
1578 //assem_debug("set_nz32\n");
1579 if(rs!=rt) emit_movs(rs,rt);
1580 else emit_test(rs,rs);
1581 emit_cmovne_imm(1,rt);
1582}
1583
1584static void emit_set_gz64_32(int rsh, int rsl, int rt)
1585{
1586 //assem_debug("set_gz64\n");
1587 emit_set_gz32(rsl,rt);
1588 emit_test(rsh,rsh);
1589 emit_cmovne_imm(1,rt);
1590 emit_cmovs_imm(0,rt);
1591}
1592
1593static void emit_set_nz64_32(int rsh, int rsl, int rt)
1594{
1595 //assem_debug("set_nz64\n");
1596 emit_or_and_set_flags(rsh,rsl,rt);
1597 emit_cmovne_imm(1,rt);
1598}
1599
1600static void emit_set_if_less32(int rs1, int rs2, int rt)
1601{
1602 //assem_debug("set if less (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1603 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1604 emit_cmp(rs1,rs2);
1605 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1606 emit_cmovl_imm(1,rt);
1607}
1608
1609static void emit_set_if_carry32(int rs1, int rs2, int rt)
1610{
1611 //assem_debug("set if carry (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1612 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1613 emit_cmp(rs1,rs2);
1614 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1615 emit_cmovb_imm(1,rt);
1616}
1617
1618static void emit_set_if_less64_32(int u1, int l1, int u2, int l2, int rt)
1619{
1620 //assem_debug("set if less64 (%%%s,%%%s,%%%s,%%%s),%%%s\n",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]);
1621 assert(u1!=rt);
1622 assert(u2!=rt);
1623 emit_cmp(l1,l2);
1624 emit_movimm(0,rt);
1625 emit_sbcs(u1,u2,HOST_TEMPREG);
1626 emit_cmovl_imm(1,rt);
1627}
1628
1629static void emit_set_if_carry64_32(int u1, int l1, int u2, int l2, int rt)
1630{
1631 //assem_debug("set if carry64 (%%%s,%%%s,%%%s,%%%s),%%%s\n",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]);
1632 assert(u1!=rt);
1633 assert(u2!=rt);
1634 emit_cmp(l1,l2);
1635 emit_movimm(0,rt);
1636 emit_sbcs(u1,u2,HOST_TEMPREG);
1637 emit_cmovb_imm(1,rt);
1638}
1639
1640static void emit_call(int a)
1641{
1642 assem_debug("bl %x (%x+%x)\n",a,(int)out,a-(int)out-8);
1643 u_int offset=genjmp(a);
1644 output_w32(0xeb000000|offset);
1645}
1646
1647static void emit_jmp(int a)
1648{
1649 assem_debug("b %x (%x+%x)\n",a,(int)out,a-(int)out-8);
1650 u_int offset=genjmp(a);
1651 output_w32(0xea000000|offset);
1652}
1653
1654static void emit_jne(int a)
1655{
1656 assem_debug("bne %x\n",a);
1657 u_int offset=genjmp(a);
1658 output_w32(0x1a000000|offset);
1659}
1660
1661static void emit_jeq(int a)
1662{
1663 assem_debug("beq %x\n",a);
1664 u_int offset=genjmp(a);
1665 output_w32(0x0a000000|offset);
1666}
1667
1668static void emit_js(int a)
1669{
1670 assem_debug("bmi %x\n",a);
1671 u_int offset=genjmp(a);
1672 output_w32(0x4a000000|offset);
1673}
1674
1675static void emit_jns(int a)
1676{
1677 assem_debug("bpl %x\n",a);
1678 u_int offset=genjmp(a);
1679 output_w32(0x5a000000|offset);
1680}
1681
1682static void emit_jl(int a)
1683{
1684 assem_debug("blt %x\n",a);
1685 u_int offset=genjmp(a);
1686 output_w32(0xba000000|offset);
1687}
1688
1689static void emit_jge(int a)
1690{
1691 assem_debug("bge %x\n",a);
1692 u_int offset=genjmp(a);
1693 output_w32(0xaa000000|offset);
1694}
1695
1696static void emit_jno(int a)
1697{
1698 assem_debug("bvc %x\n",a);
1699 u_int offset=genjmp(a);
1700 output_w32(0x7a000000|offset);
1701}
1702
1703static void emit_jc(int a)
1704{
1705 assem_debug("bcs %x\n",a);
1706 u_int offset=genjmp(a);
1707 output_w32(0x2a000000|offset);
1708}
1709
1710static void emit_jcc(int a)
1711{
1712 assem_debug("bcc %x\n",a);
1713 u_int offset=genjmp(a);
1714 output_w32(0x3a000000|offset);
1715}
1716
1717static void emit_callreg(u_int r)
1718{
1719 assert(r<15);
1720 assem_debug("blx %s\n",regname[r]);
1721 output_w32(0xe12fff30|r);
1722}
1723
1724static void emit_jmpreg(u_int r)
1725{
1726 assem_debug("mov pc,%s\n",regname[r]);
1727 output_w32(0xe1a00000|rd_rn_rm(15,0,r));
1728}
1729
1730static void emit_readword_indexed(int offset, int rs, int rt)
1731{
1732 assert(offset>-4096&&offset<4096);
1733 assem_debug("ldr %s,%s+%d\n",regname[rt],regname[rs],offset);
1734 if(offset>=0) {
1735 output_w32(0xe5900000|rd_rn_rm(rt,rs,0)|offset);
1736 }else{
1737 output_w32(0xe5100000|rd_rn_rm(rt,rs,0)|(-offset));
1738 }
1739}
1740
1741static void emit_readword_dualindexedx4(int rs1, int rs2, int rt)
1742{
1743 assem_debug("ldr %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1744 output_w32(0xe7900000|rd_rn_rm(rt,rs1,rs2)|0x100);
1745}
1746
1747static void emit_ldrcc_dualindexed(int rs1, int rs2, int rt)
1748{
1749 assem_debug("ldrcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1750 output_w32(0x37900000|rd_rn_rm(rt,rs1,rs2));
1751}
1752
1753static void emit_ldrccb_dualindexed(int rs1, int rs2, int rt)
1754{
1755 assem_debug("ldrccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1756 output_w32(0x37d00000|rd_rn_rm(rt,rs1,rs2));
1757}
1758
1759static void emit_ldrccsb_dualindexed(int rs1, int rs2, int rt)
1760{
1761 assem_debug("ldrccsb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1762 output_w32(0x319000d0|rd_rn_rm(rt,rs1,rs2));
1763}
1764
1765static void emit_ldrcch_dualindexed(int rs1, int rs2, int rt)
1766{
1767 assem_debug("ldrcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1768 output_w32(0x319000b0|rd_rn_rm(rt,rs1,rs2));
1769}
1770
1771static void emit_ldrccsh_dualindexed(int rs1, int rs2, int rt)
1772{
1773 assem_debug("ldrccsh %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1774 output_w32(0x319000f0|rd_rn_rm(rt,rs1,rs2));
1775}
1776
1777static void emit_readword_indexed_tlb(int addr, int rs, int map, int rt)
1778{
1779 if(map<0) emit_readword_indexed(addr, rs, rt);
1780 else {
1781 assert(addr==0);
1782 emit_readword_dualindexedx4(rs, map, rt);
1783 }
1784}
1785
1786static void emit_readdword_indexed_tlb(int addr, int rs, int map, int rh, int rl)
1787{
1788 if(map<0) {
1789 if(rh>=0) emit_readword_indexed(addr, rs, rh);
1790 emit_readword_indexed(addr+4, rs, rl);
1791 }else{
1792 assert(rh!=rs);
1793 if(rh>=0) emit_readword_indexed_tlb(addr, rs, map, rh);
1794 emit_addimm(map,1,map);
1795 emit_readword_indexed_tlb(addr, rs, map, rl);
1796 }
1797}
1798
1799static void emit_movsbl_indexed(int offset, int rs, int rt)
1800{
1801 assert(offset>-256&&offset<256);
1802 assem_debug("ldrsb %s,%s+%d\n",regname[rt],regname[rs],offset);
1803 if(offset>=0) {
1804 output_w32(0xe1d000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1805 }else{
1806 output_w32(0xe15000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1807 }
1808}
1809
1810static void emit_movsbl_indexed_tlb(int addr, int rs, int map, int rt)
1811{
1812 if(map<0) emit_movsbl_indexed(addr, rs, rt);
1813 else {
1814 if(addr==0) {
1815 emit_shlimm(map,2,map);
1816 assem_debug("ldrsb %s,%s+%s\n",regname[rt],regname[rs],regname[map]);
1817 output_w32(0xe19000d0|rd_rn_rm(rt,rs,map));
1818 }else{
1819 assert(addr>-256&&addr<256);
1820 assem_debug("add %s,%s,%s,lsl #2\n",regname[rt],regname[rs],regname[map]);
1821 output_w32(0xe0800000|rd_rn_rm(rt,rs,map)|(2<<7));
1822 emit_movsbl_indexed(addr, rt, rt);
1823 }
1824 }
1825}
1826
1827static void emit_movswl_indexed(int offset, int rs, int rt)
1828{
1829 assert(offset>-256&&offset<256);
1830 assem_debug("ldrsh %s,%s+%d\n",regname[rt],regname[rs],offset);
1831 if(offset>=0) {
1832 output_w32(0xe1d000f0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1833 }else{
1834 output_w32(0xe15000f0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1835 }
1836}
1837
1838static void emit_movzbl_indexed(int offset, int rs, int rt)
1839{
1840 assert(offset>-4096&&offset<4096);
1841 assem_debug("ldrb %s,%s+%d\n",regname[rt],regname[rs],offset);
1842 if(offset>=0) {
1843 output_w32(0xe5d00000|rd_rn_rm(rt,rs,0)|offset);
1844 }else{
1845 output_w32(0xe5500000|rd_rn_rm(rt,rs,0)|(-offset));
1846 }
1847}
1848
1849static void emit_movzbl_dualindexedx4(int rs1, int rs2, int rt)
1850{
1851 assem_debug("ldrb %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1852 output_w32(0xe7d00000|rd_rn_rm(rt,rs1,rs2)|0x100);
1853}
1854
1855static void emit_movzbl_indexed_tlb(int addr, int rs, int map, int rt)
1856{
1857 if(map<0) emit_movzbl_indexed(addr, rs, rt);
1858 else {
1859 if(addr==0) {
1860 emit_movzbl_dualindexedx4(rs, map, rt);
1861 }else{
1862 emit_addimm(rs,addr,rt);
1863 emit_movzbl_dualindexedx4(rt, map, rt);
1864 }
1865 }
1866}
1867
1868static void emit_movzwl_indexed(int offset, int rs, int rt)
1869{
1870 assert(offset>-256&&offset<256);
1871 assem_debug("ldrh %s,%s+%d\n",regname[rt],regname[rs],offset);
1872 if(offset>=0) {
1873 output_w32(0xe1d000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1874 }else{
1875 output_w32(0xe15000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1876 }
1877}
1878
1879static void emit_ldrd(int offset, int rs, int rt)
1880{
1881 assert(offset>-256&&offset<256);
1882 assem_debug("ldrd %s,%s+%d\n",regname[rt],regname[rs],offset);
1883 if(offset>=0) {
1884 output_w32(0xe1c000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1885 }else{
1886 output_w32(0xe14000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1887 }
1888}
1889
1890static void emit_readword(int addr, int rt)
1891{
1892 u_int offset = addr-(u_int)&dynarec_local;
1893 assert(offset<4096);
1894 assem_debug("ldr %s,fp+%d\n",regname[rt],offset);
1895 output_w32(0xe5900000|rd_rn_rm(rt,FP,0)|offset);
1896}
1897
1898static unused void emit_movsbl(int addr, int rt)
1899{
1900 u_int offset = addr-(u_int)&dynarec_local;
1901 assert(offset<256);
1902 assem_debug("ldrsb %s,fp+%d\n",regname[rt],offset);
1903 output_w32(0xe1d000d0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1904}
1905
1906static unused void emit_movswl(int addr, int rt)
1907{
1908 u_int offset = addr-(u_int)&dynarec_local;
1909 assert(offset<256);
1910 assem_debug("ldrsh %s,fp+%d\n",regname[rt],offset);
1911 output_w32(0xe1d000f0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1912}
1913
1914static unused void emit_movzbl(int addr, int rt)
1915{
1916 u_int offset = addr-(u_int)&dynarec_local;
1917 assert(offset<4096);
1918 assem_debug("ldrb %s,fp+%d\n",regname[rt],offset);
1919 output_w32(0xe5d00000|rd_rn_rm(rt,FP,0)|offset);
1920}
1921
1922static unused void emit_movzwl(int addr, int rt)
1923{
1924 u_int offset = addr-(u_int)&dynarec_local;
1925 assert(offset<256);
1926 assem_debug("ldrh %s,fp+%d\n",regname[rt],offset);
1927 output_w32(0xe1d000b0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1928}
1929
1930static void emit_writeword_indexed(int rt, int offset, int rs)
1931{
1932 assert(offset>-4096&&offset<4096);
1933 assem_debug("str %s,%s+%d\n",regname[rt],regname[rs],offset);
1934 if(offset>=0) {
1935 output_w32(0xe5800000|rd_rn_rm(rt,rs,0)|offset);
1936 }else{
1937 output_w32(0xe5000000|rd_rn_rm(rt,rs,0)|(-offset));
1938 }
1939}
1940
1941static void emit_writeword_dualindexedx4(int rt, int rs1, int rs2)
1942{
1943 assem_debug("str %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1944 output_w32(0xe7800000|rd_rn_rm(rt,rs1,rs2)|0x100);
1945}
1946
1947static void emit_writeword_indexed_tlb(int rt, int addr, int rs, int map, int temp)
1948{
1949 if(map<0) emit_writeword_indexed(rt, addr, rs);
1950 else {
1951 assert(addr==0);
1952 emit_writeword_dualindexedx4(rt, rs, map);
1953 }
1954}
1955
1956static void emit_writedword_indexed_tlb(int rh, int rl, int addr, int rs, int map, int temp)
1957{
1958 if(map<0) {
1959 if(rh>=0) emit_writeword_indexed(rh, addr, rs);
1960 emit_writeword_indexed(rl, addr+4, rs);
1961 }else{
1962 assert(rh>=0);
1963 if(temp!=rs) emit_addimm(map,1,temp);
1964 emit_writeword_indexed_tlb(rh, addr, rs, map, temp);
1965 if(temp!=rs) emit_writeword_indexed_tlb(rl, addr, rs, temp, temp);
1966 else {
1967 emit_addimm(rs,4,rs);
1968 emit_writeword_indexed_tlb(rl, addr, rs, map, temp);
1969 }
1970 }
1971}
1972
1973static void emit_writehword_indexed(int rt, int offset, int rs)
1974{
1975 assert(offset>-256&&offset<256);
1976 assem_debug("strh %s,%s+%d\n",regname[rt],regname[rs],offset);
1977 if(offset>=0) {
1978 output_w32(0xe1c000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1979 }else{
1980 output_w32(0xe14000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1981 }
1982}
1983
1984static void emit_writebyte_indexed(int rt, int offset, int rs)
1985{
1986 assert(offset>-4096&&offset<4096);
1987 assem_debug("strb %s,%s+%d\n",regname[rt],regname[rs],offset);
1988 if(offset>=0) {
1989 output_w32(0xe5c00000|rd_rn_rm(rt,rs,0)|offset);
1990 }else{
1991 output_w32(0xe5400000|rd_rn_rm(rt,rs,0)|(-offset));
1992 }
1993}
1994
1995static void emit_writebyte_dualindexedx4(int rt, int rs1, int rs2)
1996{
1997 assem_debug("strb %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1998 output_w32(0xe7c00000|rd_rn_rm(rt,rs1,rs2)|0x100);
1999}
2000
2001static void emit_writebyte_indexed_tlb(int rt, int addr, int rs, int map, int temp)
2002{
2003 if(map<0) emit_writebyte_indexed(rt, addr, rs);
2004 else {
2005 if(addr==0) {
2006 emit_writebyte_dualindexedx4(rt, rs, map);
2007 }else{
2008 emit_addimm(rs,addr,temp);
2009 emit_writebyte_dualindexedx4(rt, temp, map);
2010 }
2011 }
2012}
2013
2014static void emit_strcc_dualindexed(int rs1, int rs2, int rt)
2015{
2016 assem_debug("strcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2017 output_w32(0x37800000|rd_rn_rm(rt,rs1,rs2));
2018}
2019
2020static void emit_strccb_dualindexed(int rs1, int rs2, int rt)
2021{
2022 assem_debug("strccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2023 output_w32(0x37c00000|rd_rn_rm(rt,rs1,rs2));
2024}
2025
2026static void emit_strcch_dualindexed(int rs1, int rs2, int rt)
2027{
2028 assem_debug("strcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2029 output_w32(0x318000b0|rd_rn_rm(rt,rs1,rs2));
2030}
2031
2032static void emit_writeword(int rt, int addr)
2033{
2034 u_int offset = addr-(u_int)&dynarec_local;
2035 assert(offset<4096);
2036 assem_debug("str %s,fp+%d\n",regname[rt],offset);
2037 output_w32(0xe5800000|rd_rn_rm(rt,FP,0)|offset);
2038}
2039
2040static unused void emit_writehword(int rt, int addr)
2041{
2042 u_int offset = addr-(u_int)&dynarec_local;
2043 assert(offset<256);
2044 assem_debug("strh %s,fp+%d\n",regname[rt],offset);
2045 output_w32(0xe1c000b0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
2046}
2047
2048static unused void emit_writebyte(int rt, int addr)
2049{
2050 u_int offset = addr-(u_int)&dynarec_local;
2051 assert(offset<4096);
2052 assem_debug("strb %s,fp+%d\n",regname[rt],offset);
2053 output_w32(0xe5c00000|rd_rn_rm(rt,FP,0)|offset);
2054}
2055
2056static void emit_umull(u_int rs1, u_int rs2, u_int hi, u_int lo)
2057{
2058 assem_debug("umull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
2059 assert(rs1<16);
2060 assert(rs2<16);
2061 assert(hi<16);
2062 assert(lo<16);
2063 output_w32(0xe0800090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
2064}
2065
2066static void emit_smull(u_int rs1, u_int rs2, u_int hi, u_int lo)
2067{
2068 assem_debug("smull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
2069 assert(rs1<16);
2070 assert(rs2<16);
2071 assert(hi<16);
2072 assert(lo<16);
2073 output_w32(0xe0c00090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
2074}
2075
2076static void emit_clz(int rs,int rt)
2077{
2078 assem_debug("clz %s,%s\n",regname[rt],regname[rs]);
2079 output_w32(0xe16f0f10|rd_rn_rm(rt,0,rs));
2080}
2081
2082static void emit_subcs(int rs1,int rs2,int rt)
2083{
2084 assem_debug("subcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2085 output_w32(0x20400000|rd_rn_rm(rt,rs1,rs2));
2086}
2087
2088static void emit_shrcc_imm(int rs,u_int imm,int rt)
2089{
2090 assert(imm>0);
2091 assert(imm<32);
2092 assem_debug("lsrcc %s,%s,#%d\n",regname[rt],regname[rs],imm);
2093 output_w32(0x31a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
2094}
2095
2096static void emit_shrne_imm(int rs,u_int imm,int rt)
2097{
2098 assert(imm>0);
2099 assert(imm<32);
2100 assem_debug("lsrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2101 output_w32(0x11a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
2102}
2103
2104static void emit_negmi(int rs, int rt)
2105{
2106 assem_debug("rsbmi %s,%s,#0\n",regname[rt],regname[rs]);
2107 output_w32(0x42600000|rd_rn_rm(rt,rs,0));
2108}
2109
2110static void emit_negsmi(int rs, int rt)
2111{
2112 assem_debug("rsbsmi %s,%s,#0\n",regname[rt],regname[rs]);
2113 output_w32(0x42700000|rd_rn_rm(rt,rs,0));
2114}
2115
2116static void emit_orreq(u_int rs1,u_int rs2,u_int rt)
2117{
2118 assem_debug("orreq %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2119 output_w32(0x01800000|rd_rn_rm(rt,rs1,rs2));
2120}
2121
2122static void emit_orrne(u_int rs1,u_int rs2,u_int rt)
2123{
2124 assem_debug("orrne %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2125 output_w32(0x11800000|rd_rn_rm(rt,rs1,rs2));
2126}
2127
2128static void emit_bic_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2129{
2130 assem_debug("bic %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2131 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2132}
2133
2134static void emit_biceq_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2135{
2136 assem_debug("biceq %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2137 output_w32(0x01C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2138}
2139
2140static void emit_bicne_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2141{
2142 assem_debug("bicne %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2143 output_w32(0x11C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2144}
2145
2146static void emit_bic_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2147{
2148 assem_debug("bic %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2149 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2150}
2151
2152static void emit_biceq_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2153{
2154 assem_debug("biceq %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2155 output_w32(0x01C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2156}
2157
2158static void emit_bicne_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2159{
2160 assem_debug("bicne %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2161 output_w32(0x11C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2162}
2163
2164static void emit_teq(int rs, int rt)
2165{
2166 assem_debug("teq %s,%s\n",regname[rs],regname[rt]);
2167 output_w32(0xe1300000|rd_rn_rm(0,rs,rt));
2168}
2169
2170static void emit_rsbimm(int rs, int imm, int rt)
2171{
2172 u_int armval;
2173 genimm_checked(imm,&armval);
2174 assem_debug("rsb %s,%s,#%d\n",regname[rt],regname[rs],imm);
2175 output_w32(0xe2600000|rd_rn_rm(rt,rs,0)|armval);
2176}
2177
2178// Load 2 immediates optimizing for small code size
2179static void emit_mov2imm_compact(int imm1,u_int rt1,int imm2,u_int rt2)
2180{
2181 emit_movimm(imm1,rt1);
2182 u_int armval;
2183 if(genimm(imm2-imm1,&armval)) {
2184 assem_debug("add %s,%s,#%d\n",regname[rt2],regname[rt1],imm2-imm1);
2185 output_w32(0xe2800000|rd_rn_rm(rt2,rt1,0)|armval);
2186 }else if(genimm(imm1-imm2,&armval)) {
2187 assem_debug("sub %s,%s,#%d\n",regname[rt2],regname[rt1],imm1-imm2);
2188 output_w32(0xe2400000|rd_rn_rm(rt2,rt1,0)|armval);
2189 }
2190 else emit_movimm(imm2,rt2);
2191}
2192
2193// Conditionally select one of two immediates, optimizing for small code size
2194// This will only be called if HAVE_CMOV_IMM is defined
2195static void emit_cmov2imm_e_ne_compact(int imm1,int imm2,u_int rt)
2196{
2197 u_int armval;
2198 if(genimm(imm2-imm1,&armval)) {
2199 emit_movimm(imm1,rt);
2200 assem_debug("addne %s,%s,#%d\n",regname[rt],regname[rt],imm2-imm1);
2201 output_w32(0x12800000|rd_rn_rm(rt,rt,0)|armval);
2202 }else if(genimm(imm1-imm2,&armval)) {
2203 emit_movimm(imm1,rt);
2204 assem_debug("subne %s,%s,#%d\n",regname[rt],regname[rt],imm1-imm2);
2205 output_w32(0x12400000|rd_rn_rm(rt,rt,0)|armval);
2206 }
2207 else {
2208 #ifndef HAVE_ARMV7
2209 emit_movimm(imm1,rt);
2210 add_literal((int)out,imm2);
2211 assem_debug("ldrne %s,pc+? [=%x]\n",regname[rt],imm2);
2212 output_w32(0x15900000|rd_rn_rm(rt,15,0));
2213 #else
2214 emit_movw(imm1&0x0000FFFF,rt);
2215 if((imm1&0xFFFF)!=(imm2&0xFFFF)) {
2216 assem_debug("movwne %s,#%d (0x%x)\n",regname[rt],imm2&0xFFFF,imm2&0xFFFF);
2217 output_w32(0x13000000|rd_rn_rm(rt,0,0)|(imm2&0xfff)|((imm2<<4)&0xf0000));
2218 }
2219 emit_movt(imm1&0xFFFF0000,rt);
2220 if((imm1&0xFFFF0000)!=(imm2&0xFFFF0000)) {
2221 assem_debug("movtne %s,#%d (0x%x)\n",regname[rt],imm2&0xffff0000,imm2&0xffff0000);
2222 output_w32(0x13400000|rd_rn_rm(rt,0,0)|((imm2>>16)&0xfff)|((imm2>>12)&0xf0000));
2223 }
2224 #endif
2225 }
2226}
2227
2228// special case for checking invalid_code
2229static void emit_cmpmem_indexedsr12_reg(int base,int r,int imm)
2230{
2231 assert(imm<128&&imm>=0);
2232 assert(r>=0&&r<16);
2233 assem_debug("ldrb lr,%s,%s lsr #12\n",regname[base],regname[r]);
2234 output_w32(0xe7d00000|rd_rn_rm(HOST_TEMPREG,base,r)|0x620);
2235 emit_cmpimm(HOST_TEMPREG,imm);
2236}
2237
2238static void emit_callne(int a)
2239{
2240 assem_debug("blne %x\n",a);
2241 u_int offset=genjmp(a);
2242 output_w32(0x1b000000|offset);
2243}
2244
2245// Used to preload hash table entries
2246static unused void emit_prefetchreg(int r)
2247{
2248 assem_debug("pld %s\n",regname[r]);
2249 output_w32(0xf5d0f000|rd_rn_rm(0,r,0));
2250}
2251
2252// Special case for mini_ht
2253static void emit_ldreq_indexed(int rs, u_int offset, int rt)
2254{
2255 assert(offset<4096);
2256 assem_debug("ldreq %s,[%s, #%d]\n",regname[rt],regname[rs],offset);
2257 output_w32(0x05900000|rd_rn_rm(rt,rs,0)|offset);
2258}
2259
2260static unused void emit_bicne_imm(int rs,int imm,int rt)
2261{
2262 u_int armval;
2263 genimm_checked(imm,&armval);
2264 assem_debug("bicne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2265 output_w32(0x13c00000|rd_rn_rm(rt,rs,0)|armval);
2266}
2267
2268static unused void emit_biccs_imm(int rs,int imm,int rt)
2269{
2270 u_int armval;
2271 genimm_checked(imm,&armval);
2272 assem_debug("biccs %s,%s,#%d\n",regname[rt],regname[rs],imm);
2273 output_w32(0x23c00000|rd_rn_rm(rt,rs,0)|armval);
2274}
2275
2276static unused void emit_bicvc_imm(int rs,int imm,int rt)
2277{
2278 u_int armval;
2279 genimm_checked(imm,&armval);
2280 assem_debug("bicvc %s,%s,#%d\n",regname[rt],regname[rs],imm);
2281 output_w32(0x73c00000|rd_rn_rm(rt,rs,0)|armval);
2282}
2283
2284static unused void emit_bichi_imm(int rs,int imm,int rt)
2285{
2286 u_int armval;
2287 genimm_checked(imm,&armval);
2288 assem_debug("bichi %s,%s,#%d\n",regname[rt],regname[rs],imm);
2289 output_w32(0x83c00000|rd_rn_rm(rt,rs,0)|armval);
2290}
2291
2292static unused void emit_orrvs_imm(int rs,int imm,int rt)
2293{
2294 u_int armval;
2295 genimm_checked(imm,&armval);
2296 assem_debug("orrvs %s,%s,#%d\n",regname[rt],regname[rs],imm);
2297 output_w32(0x63800000|rd_rn_rm(rt,rs,0)|armval);
2298}
2299
2300static void emit_orrne_imm(int rs,int imm,int rt)
2301{
2302 u_int armval;
2303 genimm_checked(imm,&armval);
2304 assem_debug("orrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2305 output_w32(0x13800000|rd_rn_rm(rt,rs,0)|armval);
2306}
2307
2308static void emit_andne_imm(int rs,int imm,int rt)
2309{
2310 u_int armval;
2311 genimm_checked(imm,&armval);
2312 assem_debug("andne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2313 output_w32(0x12000000|rd_rn_rm(rt,rs,0)|armval);
2314}
2315
2316static unused void emit_addpl_imm(int rs,int imm,int rt)
2317{
2318 u_int armval;
2319 genimm_checked(imm,&armval);
2320 assem_debug("addpl %s,%s,#%d\n",regname[rt],regname[rs],imm);
2321 output_w32(0x52800000|rd_rn_rm(rt,rs,0)|armval);
2322}
2323
2324static void emit_jno_unlikely(int a)
2325{
2326 //emit_jno(a);
2327 assem_debug("addvc pc,pc,#? (%x)\n",/*a-(int)out-8,*/a);
2328 output_w32(0x72800000|rd_rn_rm(15,15,0));
2329}
2330
2331static void save_regs_all(u_int reglist)
2332{
2333 int i;
2334 if(!reglist) return;
2335 assem_debug("stmia fp,{");
2336 for(i=0;i<16;i++)
2337 if(reglist&(1<<i))
2338 assem_debug("r%d,",i);
2339 assem_debug("}\n");
2340 output_w32(0xe88b0000|reglist);
2341}
2342
2343static void restore_regs_all(u_int reglist)
2344{
2345 int i;
2346 if(!reglist) return;
2347 assem_debug("ldmia fp,{");
2348 for(i=0;i<16;i++)
2349 if(reglist&(1<<i))
2350 assem_debug("r%d,",i);
2351 assem_debug("}\n");
2352 output_w32(0xe89b0000|reglist);
2353}
2354
2355// Save registers before function call
2356static void save_regs(u_int reglist)
2357{
2358 reglist&=CALLER_SAVE_REGS; // only save the caller-save registers, r0-r3, r12
2359 save_regs_all(reglist);
2360}
2361
2362// Restore registers after function call
2363static void restore_regs(u_int reglist)
2364{
2365 reglist&=CALLER_SAVE_REGS;
2366 restore_regs_all(reglist);
2367}
2368
2369/* Stubs/epilogue */
2370
2371static void literal_pool(int n)
2372{
2373 if(!literalcount) return;
2374 if(n) {
2375 if((int)out-literals[0][0]<4096-n) return;
2376 }
2377 u_int *ptr;
2378 int i;
2379 for(i=0;i<literalcount;i++)
2380 {
2381 u_int l_addr=(u_int)out;
2382 int j;
2383 for(j=0;j<i;j++) {
2384 if(literals[j][1]==literals[i][1]) {
2385 //printf("dup %08x\n",literals[i][1]);
2386 l_addr=literals[j][0];
2387 break;
2388 }
2389 }
2390 ptr=(u_int *)literals[i][0];
2391 u_int offset=l_addr-(u_int)ptr-8;
2392 assert(offset<4096);
2393 assert(!(offset&3));
2394 *ptr|=offset;
2395 if(l_addr==(u_int)out) {
2396 literals[i][0]=l_addr; // remember for dupes
2397 output_w32(literals[i][1]);
2398 }
2399 }
2400 literalcount=0;
2401}
2402
2403static void literal_pool_jumpover(int n)
2404{
2405 if(!literalcount) return;
2406 if(n) {
2407 if((int)out-literals[0][0]<4096-n) return;
2408 }
2409 int jaddr=(int)out;
2410 emit_jmp(0);
2411 literal_pool(0);
2412 set_jump_target(jaddr,(int)out);
2413}
2414
2415static void emit_extjump2(u_int addr, int target, int linker)
2416{
2417 u_char *ptr=(u_char *)addr;
2418 assert((ptr[3]&0x0e)==0xa);
2419 (void)ptr;
2420
2421 emit_loadlp(target,0);
2422 emit_loadlp(addr,1);
2423 assert(addr>=BASE_ADDR&&addr<(BASE_ADDR+(1<<TARGET_SIZE_2)));
2424 //assert((target>=0x80000000&&target<0x80800000)||(target>0xA4000000&&target<0xA4001000));
2425//DEBUG >
2426#ifdef DEBUG_CYCLE_COUNT
2427 emit_readword((int)&last_count,ECX);
2428 emit_add(HOST_CCREG,ECX,HOST_CCREG);
2429 emit_readword((int)&next_interupt,ECX);
2430 emit_writeword(HOST_CCREG,(int)&Count);
2431 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
2432 emit_writeword(ECX,(int)&last_count);
2433#endif
2434//DEBUG <
2435 emit_jmp(linker);
2436}
2437
2438static void emit_extjump(int addr, int target)
2439{
2440 emit_extjump2(addr, target, (int)dyna_linker);
2441}
2442
2443static void emit_extjump_ds(int addr, int target)
2444{
2445 emit_extjump2(addr, target, (int)dyna_linker_ds);
2446}
2447
2448// put rt_val into rt, potentially making use of rs with value rs_val
2449static void emit_movimm_from(u_int rs_val,int rs,u_int rt_val,int rt)
2450{
2451 u_int armval;
2452 int diff;
2453 if(genimm(rt_val,&armval)) {
2454 assem_debug("mov %s,#%d\n",regname[rt],rt_val);
2455 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
2456 return;
2457 }
2458 if(genimm(~rt_val,&armval)) {
2459 assem_debug("mvn %s,#%d\n",regname[rt],rt_val);
2460 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
2461 return;
2462 }
2463 diff=rt_val-rs_val;
2464 if(genimm(diff,&armval)) {
2465 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],diff);
2466 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
2467 return;
2468 }else if(genimm(-diff,&armval)) {
2469 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],-diff);
2470 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
2471 return;
2472 }
2473 emit_movimm(rt_val,rt);
2474}
2475
2476// return 1 if above function can do it's job cheaply
2477static int is_similar_value(u_int v1,u_int v2)
2478{
2479 u_int xs;
2480 int diff;
2481 if(v1==v2) return 1;
2482 diff=v2-v1;
2483 for(xs=diff;xs!=0&&(xs&3)==0;xs>>=2)
2484 ;
2485 if(xs<0x100) return 1;
2486 for(xs=-diff;xs!=0&&(xs&3)==0;xs>>=2)
2487 ;
2488 if(xs<0x100) return 1;
2489 return 0;
2490}
2491
2492// trashes r2
2493static void pass_args(int a0, int a1)
2494{
2495 if(a0==1&&a1==0) {
2496 // must swap
2497 emit_mov(a0,2); emit_mov(a1,1); emit_mov(2,0);
2498 }
2499 else if(a0!=0&&a1==0) {
2500 emit_mov(a1,1);
2501 if (a0>=0) emit_mov(a0,0);
2502 }
2503 else {
2504 if(a0>=0&&a0!=0) emit_mov(a0,0);
2505 if(a1>=0&&a1!=1) emit_mov(a1,1);
2506 }
2507}
2508
2509static void mov_loadtype_adj(int type,int rs,int rt)
2510{
2511 switch(type) {
2512 case LOADB_STUB: emit_signextend8(rs,rt); break;
2513 case LOADBU_STUB: emit_andimm(rs,0xff,rt); break;
2514 case LOADH_STUB: emit_signextend16(rs,rt); break;
2515 case LOADHU_STUB: emit_andimm(rs,0xffff,rt); break;
2516 case LOADW_STUB: if(rs!=rt) emit_mov(rs,rt); break;
2517 default: assert(0);
2518 }
2519}
2520
2521#include "pcsxmem.h"
2522#include "pcsxmem_inline.c"
2523
2524static void do_readstub(int n)
2525{
2526 assem_debug("do_readstub %x\n",start+stubs[n][3]*4);
2527 literal_pool(256);
2528 set_jump_target(stubs[n][1],(int)out);
2529 int type=stubs[n][0];
2530 int i=stubs[n][3];
2531 int rs=stubs[n][4];
2532 struct regstat *i_regs=(struct regstat *)stubs[n][5];
2533 u_int reglist=stubs[n][7];
2534 signed char *i_regmap=i_regs->regmap;
2535 int rt;
2536 if(itype[i]==C1LS||itype[i]==C2LS||itype[i]==LOADLR) {
2537 rt=get_reg(i_regmap,FTEMP);
2538 }else{
2539 rt=get_reg(i_regmap,rt1[i]);
2540 }
2541 assert(rs>=0);
2542 int r,temp=-1,temp2=HOST_TEMPREG,regs_saved=0,restore_jump=0;
2543 reglist|=(1<<rs);
2544 for(r=0;r<=12;r++) {
2545 if(((1<<r)&0x13ff)&&((1<<r)&reglist)==0) {
2546 temp=r; break;
2547 }
2548 }
2549 if(rt>=0&&rt1[i]!=0)
2550 reglist&=~(1<<rt);
2551 if(temp==-1) {
2552 save_regs(reglist);
2553 regs_saved=1;
2554 temp=(rs==0)?2:0;
2555 }
2556 if((regs_saved||(reglist&2)==0)&&temp!=1&&rs!=1)
2557 temp2=1;
2558 emit_readword((int)&mem_rtab,temp);
2559 emit_shrimm(rs,12,temp2);
2560 emit_readword_dualindexedx4(temp,temp2,temp2);
2561 emit_lsls_imm(temp2,1,temp2);
2562 if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) {
2563 switch(type) {
2564 case LOADB_STUB: emit_ldrccsb_dualindexed(temp2,rs,rt); break;
2565 case LOADBU_STUB: emit_ldrccb_dualindexed(temp2,rs,rt); break;
2566 case LOADH_STUB: emit_ldrccsh_dualindexed(temp2,rs,rt); break;
2567 case LOADHU_STUB: emit_ldrcch_dualindexed(temp2,rs,rt); break;
2568 case LOADW_STUB: emit_ldrcc_dualindexed(temp2,rs,rt); break;
2569 }
2570 }
2571 if(regs_saved) {
2572 restore_jump=(int)out;
2573 emit_jcc(0); // jump to reg restore
2574 }
2575 else
2576 emit_jcc(stubs[n][2]); // return address
2577
2578 if(!regs_saved)
2579 save_regs(reglist);
2580 int handler=0;
2581 if(type==LOADB_STUB||type==LOADBU_STUB)
2582 handler=(int)jump_handler_read8;
2583 if(type==LOADH_STUB||type==LOADHU_STUB)
2584 handler=(int)jump_handler_read16;
2585 if(type==LOADW_STUB)
2586 handler=(int)jump_handler_read32;
2587 assert(handler!=0);
2588 pass_args(rs,temp2);
2589 int cc=get_reg(i_regmap,CCREG);
2590 if(cc<0)
2591 emit_loadreg(CCREG,2);
2592 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n][6]+1),2);
2593 emit_call(handler);
2594 if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) {
2595 mov_loadtype_adj(type,0,rt);
2596 }
2597 if(restore_jump)
2598 set_jump_target(restore_jump,(int)out);
2599 restore_regs(reglist);
2600 emit_jmp(stubs[n][2]); // return address
2601}
2602
2603// return memhandler, or get directly accessable address and return 0
2604static u_int get_direct_memhandler(void *table,u_int addr,int type,u_int *addr_host)
2605{
2606 u_int l1,l2=0;
2607 l1=((u_int *)table)[addr>>12];
2608 if((l1&(1<<31))==0) {
2609 u_int v=l1<<1;
2610 *addr_host=v+addr;
2611 return 0;
2612 }
2613 else {
2614 l1<<=1;
2615 if(type==LOADB_STUB||type==LOADBU_STUB||type==STOREB_STUB)
2616 l2=((u_int *)l1)[0x1000/4 + 0x1000/2 + (addr&0xfff)];
2617 else if(type==LOADH_STUB||type==LOADHU_STUB||type==STOREH_STUB)
2618 l2=((u_int *)l1)[0x1000/4 + (addr&0xfff)/2];
2619 else
2620 l2=((u_int *)l1)[(addr&0xfff)/4];
2621 if((l2&(1<<31))==0) {
2622 u_int v=l2<<1;
2623 *addr_host=v+(addr&0xfff);
2624 return 0;
2625 }
2626 return l2<<1;
2627 }
2628}
2629
2630static void inline_readstub(int type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
2631{
2632 int rs=get_reg(regmap,target);
2633 int rt=get_reg(regmap,target);
2634 if(rs<0) rs=get_reg(regmap,-1);
2635 assert(rs>=0);
2636 u_int handler,host_addr=0,is_dynamic,far_call=0;
2637 int cc=get_reg(regmap,CCREG);
2638 if(pcsx_direct_read(type,addr,CLOCK_ADJUST(adj+1),cc,target?rs:-1,rt))
2639 return;
2640 handler=get_direct_memhandler(mem_rtab,addr,type,&host_addr);
2641 if (handler==0) {
2642 if(rt<0||rt1[i]==0)
2643 return;
2644 if(addr!=host_addr)
2645 emit_movimm_from(addr,rs,host_addr,rs);
2646 switch(type) {
2647 case LOADB_STUB: emit_movsbl_indexed(0,rs,rt); break;
2648 case LOADBU_STUB: emit_movzbl_indexed(0,rs,rt); break;
2649 case LOADH_STUB: emit_movswl_indexed(0,rs,rt); break;
2650 case LOADHU_STUB: emit_movzwl_indexed(0,rs,rt); break;
2651 case LOADW_STUB: emit_readword_indexed(0,rs,rt); break;
2652 default: assert(0);
2653 }
2654 return;
2655 }
2656 is_dynamic=pcsxmem_is_handler_dynamic(addr);
2657 if(is_dynamic) {
2658 if(type==LOADB_STUB||type==LOADBU_STUB)
2659 handler=(int)jump_handler_read8;
2660 if(type==LOADH_STUB||type==LOADHU_STUB)
2661 handler=(int)jump_handler_read16;
2662 if(type==LOADW_STUB)
2663 handler=(int)jump_handler_read32;
2664 }
2665
2666 // call a memhandler
2667 if(rt>=0&&rt1[i]!=0)
2668 reglist&=~(1<<rt);
2669 save_regs(reglist);
2670 if(target==0)
2671 emit_movimm(addr,0);
2672 else if(rs!=0)
2673 emit_mov(rs,0);
2674 int offset=(int)handler-(int)out-8;
2675 if(offset<-33554432||offset>=33554432) {
2676 // unreachable memhandler, a plugin func perhaps
2677 emit_movimm(handler,12);
2678 far_call=1;
2679 }
2680 if(cc<0)
2681 emit_loadreg(CCREG,2);
2682 if(is_dynamic) {
2683 emit_movimm(((u_int *)mem_rtab)[addr>>12]<<1,1);
2684 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2);
2685 }
2686 else {
2687 emit_readword((int)&last_count,3);
2688 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2);
2689 emit_add(2,3,2);
2690 emit_writeword(2,(int)&Count);
2691 }
2692
2693 if(far_call)
2694 emit_callreg(12);
2695 else
2696 emit_call(handler);
2697
2698 if(rt>=0&&rt1[i]!=0) {
2699 switch(type) {
2700 case LOADB_STUB: emit_signextend8(0,rt); break;
2701 case LOADBU_STUB: emit_andimm(0,0xff,rt); break;
2702 case LOADH_STUB: emit_signextend16(0,rt); break;
2703 case LOADHU_STUB: emit_andimm(0,0xffff,rt); break;
2704 case LOADW_STUB: if(rt!=0) emit_mov(0,rt); break;
2705 default: assert(0);
2706 }
2707 }
2708 restore_regs(reglist);
2709}
2710
2711static void do_writestub(int n)
2712{
2713 assem_debug("do_writestub %x\n",start+stubs[n][3]*4);
2714 literal_pool(256);
2715 set_jump_target(stubs[n][1],(int)out);
2716 int type=stubs[n][0];
2717 int i=stubs[n][3];
2718 int rs=stubs[n][4];
2719 struct regstat *i_regs=(struct regstat *)stubs[n][5];
2720 u_int reglist=stubs[n][7];
2721 signed char *i_regmap=i_regs->regmap;
2722 int rt,r;
2723 if(itype[i]==C1LS||itype[i]==C2LS) {
2724 rt=get_reg(i_regmap,r=FTEMP);
2725 }else{
2726 rt=get_reg(i_regmap,r=rs2[i]);
2727 }
2728 assert(rs>=0);
2729 assert(rt>=0);
2730 int rtmp,temp=-1,temp2=HOST_TEMPREG,regs_saved=0,restore_jump=0,ra;
2731 int reglist2=reglist|(1<<rs)|(1<<rt);
2732 for(rtmp=0;rtmp<=12;rtmp++) {
2733 if(((1<<rtmp)&0x13ff)&&((1<<rtmp)&reglist2)==0) {
2734 temp=rtmp; break;
2735 }
2736 }
2737 if(temp==-1) {
2738 save_regs(reglist);
2739 regs_saved=1;
2740 for(rtmp=0;rtmp<=3;rtmp++)
2741 if(rtmp!=rs&&rtmp!=rt)
2742 {temp=rtmp;break;}
2743 }
2744 if((regs_saved||(reglist2&8)==0)&&temp!=3&&rs!=3&&rt!=3)
2745 temp2=3;
2746 emit_readword((int)&mem_wtab,temp);
2747 emit_shrimm(rs,12,temp2);
2748 emit_readword_dualindexedx4(temp,temp2,temp2);
2749 emit_lsls_imm(temp2,1,temp2);
2750 switch(type) {
2751 case STOREB_STUB: emit_strccb_dualindexed(temp2,rs,rt); break;
2752 case STOREH_STUB: emit_strcch_dualindexed(temp2,rs,rt); break;
2753 case STOREW_STUB: emit_strcc_dualindexed(temp2,rs,rt); break;
2754 default: assert(0);
2755 }
2756 if(regs_saved) {
2757 restore_jump=(int)out;
2758 emit_jcc(0); // jump to reg restore
2759 }
2760 else
2761 emit_jcc(stubs[n][2]); // return address (invcode check)
2762
2763 if(!regs_saved)
2764 save_regs(reglist);
2765 int handler=0;
2766 switch(type) {
2767 case STOREB_STUB: handler=(int)jump_handler_write8; break;
2768 case STOREH_STUB: handler=(int)jump_handler_write16; break;
2769 case STOREW_STUB: handler=(int)jump_handler_write32; break;
2770 }
2771 assert(handler!=0);
2772 pass_args(rs,rt);
2773 if(temp2!=3)
2774 emit_mov(temp2,3);
2775 int cc=get_reg(i_regmap,CCREG);
2776 if(cc<0)
2777 emit_loadreg(CCREG,2);
2778 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n][6]+1),2);
2779 // returns new cycle_count
2780 emit_call(handler);
2781 emit_addimm(0,-CLOCK_ADJUST((int)stubs[n][6]+1),cc<0?2:cc);
2782 if(cc<0)
2783 emit_storereg(CCREG,2);
2784 if(restore_jump)
2785 set_jump_target(restore_jump,(int)out);
2786 restore_regs(reglist);
2787 ra=stubs[n][2];
2788 emit_jmp(ra);
2789}
2790
2791static void inline_writestub(int type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
2792{
2793 int rs=get_reg(regmap,-1);
2794 int rt=get_reg(regmap,target);
2795 assert(rs>=0);
2796 assert(rt>=0);
2797 u_int handler,host_addr=0;
2798 handler=get_direct_memhandler(mem_wtab,addr,type,&host_addr);
2799 if (handler==0) {
2800 if(addr!=host_addr)
2801 emit_movimm_from(addr,rs,host_addr,rs);
2802 switch(type) {
2803 case STOREB_STUB: emit_writebyte_indexed(rt,0,rs); break;
2804 case STOREH_STUB: emit_writehword_indexed(rt,0,rs); break;
2805 case STOREW_STUB: emit_writeword_indexed(rt,0,rs); break;
2806 default: assert(0);
2807 }
2808 return;
2809 }
2810
2811 // call a memhandler
2812 save_regs(reglist);
2813 pass_args(rs,rt);
2814 int cc=get_reg(regmap,CCREG);
2815 if(cc<0)
2816 emit_loadreg(CCREG,2);
2817 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2);
2818 emit_movimm(handler,3);
2819 // returns new cycle_count
2820 emit_call((int)jump_handler_write_h);
2821 emit_addimm(0,-CLOCK_ADJUST(adj+1),cc<0?2:cc);
2822 if(cc<0)
2823 emit_storereg(CCREG,2);
2824 restore_regs(reglist);
2825}
2826
2827static void do_unalignedwritestub(int n)
2828{
2829 assem_debug("do_unalignedwritestub %x\n",start+stubs[n][3]*4);
2830 literal_pool(256);
2831 set_jump_target(stubs[n][1],(int)out);
2832
2833 int i=stubs[n][3];
2834 struct regstat *i_regs=(struct regstat *)stubs[n][4];
2835 int addr=stubs[n][5];
2836 u_int reglist=stubs[n][7];
2837 signed char *i_regmap=i_regs->regmap;
2838 int temp2=get_reg(i_regmap,FTEMP);
2839 int rt;
2840 rt=get_reg(i_regmap,rs2[i]);
2841 assert(rt>=0);
2842 assert(addr>=0);
2843 assert(opcode[i]==0x2a||opcode[i]==0x2e); // SWL/SWR only implemented
2844 reglist|=(1<<addr);
2845 reglist&=~(1<<temp2);
2846
2847#if 1
2848 // don't bother with it and call write handler
2849 save_regs(reglist);
2850 pass_args(addr,rt);
2851 int cc=get_reg(i_regmap,CCREG);
2852 if(cc<0)
2853 emit_loadreg(CCREG,2);
2854 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n][6]+1),2);
2855 emit_call((int)(opcode[i]==0x2a?jump_handle_swl:jump_handle_swr));
2856 emit_addimm(0,-CLOCK_ADJUST((int)stubs[n][6]+1),cc<0?2:cc);
2857 if(cc<0)
2858 emit_storereg(CCREG,2);
2859 restore_regs(reglist);
2860 emit_jmp(stubs[n][2]); // return address
2861#else
2862 emit_andimm(addr,0xfffffffc,temp2);
2863 emit_writeword(temp2,(int)&address);
2864
2865 save_regs(reglist);
2866 emit_shrimm(addr,16,1);
2867 int cc=get_reg(i_regmap,CCREG);
2868 if(cc<0) {
2869 emit_loadreg(CCREG,2);
2870 }
2871 emit_movimm((u_int)readmem,0);
2872 emit_addimm(cc<0?2:cc,2*stubs[n][6]+2,2);
2873 emit_call((int)&indirect_jump_indexed);
2874 restore_regs(reglist);
2875
2876 emit_readword((int)&readmem_dword,temp2);
2877 int temp=addr; //hmh
2878 emit_shlimm(addr,3,temp);
2879 emit_andimm(temp,24,temp);
2880#ifdef BIG_ENDIAN_MIPS
2881 if (opcode[i]==0x2e) // SWR
2882#else
2883 if (opcode[i]==0x2a) // SWL
2884#endif
2885 emit_xorimm(temp,24,temp);
2886 emit_movimm(-1,HOST_TEMPREG);
2887 if (opcode[i]==0x2a) { // SWL
2888 emit_bic_lsr(temp2,HOST_TEMPREG,temp,temp2);
2889 emit_orrshr(rt,temp,temp2);
2890 }else{
2891 emit_bic_lsl(temp2,HOST_TEMPREG,temp,temp2);
2892 emit_orrshl(rt,temp,temp2);
2893 }
2894 emit_readword((int)&address,addr);
2895 emit_writeword(temp2,(int)&word);
2896 //save_regs(reglist); // don't need to, no state changes
2897 emit_shrimm(addr,16,1);
2898 emit_movimm((u_int)writemem,0);
2899 //emit_call((int)&indirect_jump_indexed);
2900 emit_mov(15,14);
2901 emit_readword_dualindexedx4(0,1,15);
2902 emit_readword((int)&Count,HOST_TEMPREG);
2903 emit_readword((int)&next_interupt,2);
2904 emit_addimm(HOST_TEMPREG,-2*stubs[n][6]-2,HOST_TEMPREG);
2905 emit_writeword(2,(int)&last_count);
2906 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2907 if(cc<0) {
2908 emit_storereg(CCREG,HOST_TEMPREG);
2909 }
2910 restore_regs(reglist);
2911 emit_jmp(stubs[n][2]); // return address
2912#endif
2913}
2914
2915static void do_invstub(int n)
2916{
2917 literal_pool(20);
2918 u_int reglist=stubs[n][3];
2919 set_jump_target(stubs[n][1],(int)out);
2920 save_regs(reglist);
2921 if(stubs[n][4]!=0) emit_mov(stubs[n][4],0);
2922 emit_call((int)&invalidate_addr);
2923 restore_regs(reglist);
2924 emit_jmp(stubs[n][2]); // return address
2925}
2926
2927int do_dirty_stub(int i)
2928{
2929 assem_debug("do_dirty_stub %x\n",start+i*4);
2930 u_int addr=(u_int)source;
2931 // Careful about the code output here, verify_dirty needs to parse it.
2932 #ifndef HAVE_ARMV7
2933 emit_loadlp(addr,1);
2934 emit_loadlp((int)copy,2);
2935 emit_loadlp(slen*4,3);
2936 #else
2937 emit_movw(addr&0x0000FFFF,1);
2938 emit_movw(((u_int)copy)&0x0000FFFF,2);
2939 emit_movt(addr&0xFFFF0000,1);
2940 emit_movt(((u_int)copy)&0xFFFF0000,2);
2941 emit_movw(slen*4,3);
2942 #endif
2943 emit_movimm(start+i*4,0);
2944 emit_call((int)start<(int)0xC0000000?(int)&verify_code:(int)&verify_code_vm);
2945 int entry=(int)out;
2946 load_regs_entry(i);
2947 if(entry==(int)out) entry=instr_addr[i];
2948 emit_jmp(instr_addr[i]);
2949 return entry;
2950}
2951
2952static void do_dirty_stub_ds()
2953{
2954 // Careful about the code output here, verify_dirty needs to parse it.
2955 #ifndef HAVE_ARMV7
2956 emit_loadlp((int)start<(int)0xC0000000?(int)source:(int)start,1);
2957 emit_loadlp((int)copy,2);
2958 emit_loadlp(slen*4,3);
2959 #else
2960 emit_movw(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0x0000FFFF,1);
2961 emit_movw(((u_int)copy)&0x0000FFFF,2);
2962 emit_movt(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0xFFFF0000,1);
2963 emit_movt(((u_int)copy)&0xFFFF0000,2);
2964 emit_movw(slen*4,3);
2965 #endif
2966 emit_movimm(start+1,0);
2967 emit_call((int)&verify_code_ds);
2968}
2969
2970static void do_cop1stub(int n)
2971{
2972 literal_pool(256);
2973 assem_debug("do_cop1stub %x\n",start+stubs[n][3]*4);
2974 set_jump_target(stubs[n][1],(int)out);
2975 int i=stubs[n][3];
2976// int rs=stubs[n][4];
2977 struct regstat *i_regs=(struct regstat *)stubs[n][5];
2978 int ds=stubs[n][6];
2979 if(!ds) {
2980 load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty,i);
2981 //if(i_regs!=&regs[i]) printf("oops: regs[i]=%x i_regs=%x",(int)&regs[i],(int)i_regs);
2982 }
2983 //else {printf("fp exception in delay slot\n");}
2984 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty);
2985 if(regs[i].regmap_entry[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
2986 emit_movimm(start+(i-ds)*4,EAX); // Get PC
2987 emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]),HOST_CCREG); // CHECK: is this right? There should probably be an extra cycle...
2988 emit_jmp(ds?(int)fp_exception_ds:(int)fp_exception);
2989}
2990
2991/* Special assem */
2992
2993static void shift_assemble_arm(int i,struct regstat *i_regs)
2994{
2995 if(rt1[i]) {
2996 if(opcode2[i]<=0x07) // SLLV/SRLV/SRAV
2997 {
2998 signed char s,t,shift;
2999 t=get_reg(i_regs->regmap,rt1[i]);
3000 s=get_reg(i_regs->regmap,rs1[i]);
3001 shift=get_reg(i_regs->regmap,rs2[i]);
3002 if(t>=0){
3003 if(rs1[i]==0)
3004 {
3005 emit_zeroreg(t);
3006 }
3007 else if(rs2[i]==0)
3008 {
3009 assert(s>=0);
3010 if(s!=t) emit_mov(s,t);
3011 }
3012 else
3013 {
3014 emit_andimm(shift,31,HOST_TEMPREG);
3015 if(opcode2[i]==4) // SLLV
3016 {
3017 emit_shl(s,HOST_TEMPREG,t);
3018 }
3019 if(opcode2[i]==6) // SRLV
3020 {
3021 emit_shr(s,HOST_TEMPREG,t);
3022 }
3023 if(opcode2[i]==7) // SRAV
3024 {
3025 emit_sar(s,HOST_TEMPREG,t);
3026 }
3027 }
3028 }
3029 } else { // DSLLV/DSRLV/DSRAV
3030 signed char sh,sl,th,tl,shift;
3031 th=get_reg(i_regs->regmap,rt1[i]|64);
3032 tl=get_reg(i_regs->regmap,rt1[i]);
3033 sh=get_reg(i_regs->regmap,rs1[i]|64);
3034 sl=get_reg(i_regs->regmap,rs1[i]);
3035 shift=get_reg(i_regs->regmap,rs2[i]);
3036 if(tl>=0){
3037 if(rs1[i]==0)
3038 {
3039 emit_zeroreg(tl);
3040 if(th>=0) emit_zeroreg(th);
3041 }
3042 else if(rs2[i]==0)
3043 {
3044 assert(sl>=0);
3045 if(sl!=tl) emit_mov(sl,tl);
3046 if(th>=0&&sh!=th) emit_mov(sh,th);
3047 }
3048 else
3049 {
3050 // FIXME: What if shift==tl ?
3051 assert(shift!=tl);
3052 int temp=get_reg(i_regs->regmap,-1);
3053 int real_th=th;
3054 if(th<0&&opcode2[i]!=0x14) {th=temp;} // DSLLV doesn't need a temporary register
3055 assert(sl>=0);
3056 assert(sh>=0);
3057 emit_andimm(shift,31,HOST_TEMPREG);
3058 if(opcode2[i]==0x14) // DSLLV
3059 {
3060 if(th>=0) emit_shl(sh,HOST_TEMPREG,th);
3061 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3062 emit_orrshr(sl,HOST_TEMPREG,th);
3063 emit_andimm(shift,31,HOST_TEMPREG);
3064 emit_testimm(shift,32);
3065 emit_shl(sl,HOST_TEMPREG,tl);
3066 if(th>=0) emit_cmovne_reg(tl,th);
3067 emit_cmovne_imm(0,tl);
3068 }
3069 if(opcode2[i]==0x16) // DSRLV
3070 {
3071 assert(th>=0);
3072 emit_shr(sl,HOST_TEMPREG,tl);
3073 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3074 emit_orrshl(sh,HOST_TEMPREG,tl);
3075 emit_andimm(shift,31,HOST_TEMPREG);
3076 emit_testimm(shift,32);
3077 emit_shr(sh,HOST_TEMPREG,th);
3078 emit_cmovne_reg(th,tl);
3079 if(real_th>=0) emit_cmovne_imm(0,th);
3080 }
3081 if(opcode2[i]==0x17) // DSRAV
3082 {
3083 assert(th>=0);
3084 emit_shr(sl,HOST_TEMPREG,tl);
3085 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3086 if(real_th>=0) {
3087 assert(temp>=0);
3088 emit_sarimm(th,31,temp);
3089 }
3090 emit_orrshl(sh,HOST_TEMPREG,tl);
3091 emit_andimm(shift,31,HOST_TEMPREG);
3092 emit_testimm(shift,32);
3093 emit_sar(sh,HOST_TEMPREG,th);
3094 emit_cmovne_reg(th,tl);
3095 if(real_th>=0) emit_cmovne_reg(temp,th);
3096 }
3097 }
3098 }
3099 }
3100 }
3101}
3102
3103static void speculate_mov(int rs,int rt)
3104{
3105 if(rt!=0) {
3106 smrv_strong_next|=1<<rt;
3107 smrv[rt]=smrv[rs];
3108 }
3109}
3110
3111static void speculate_mov_weak(int rs,int rt)
3112{
3113 if(rt!=0) {
3114 smrv_weak_next|=1<<rt;
3115 smrv[rt]=smrv[rs];
3116 }
3117}
3118
3119static void speculate_register_values(int i)
3120{
3121 if(i==0) {
3122 memcpy(smrv,psxRegs.GPR.r,sizeof(smrv));
3123 // gp,sp are likely to stay the same throughout the block
3124 smrv_strong_next=(1<<28)|(1<<29)|(1<<30);
3125 smrv_weak_next=~smrv_strong_next;
3126 //printf(" llr %08x\n", smrv[4]);
3127 }
3128 smrv_strong=smrv_strong_next;
3129 smrv_weak=smrv_weak_next;
3130 switch(itype[i]) {
3131 case ALU:
3132 if ((smrv_strong>>rs1[i])&1) speculate_mov(rs1[i],rt1[i]);
3133 else if((smrv_strong>>rs2[i])&1) speculate_mov(rs2[i],rt1[i]);
3134 else if((smrv_weak>>rs1[i])&1) speculate_mov_weak(rs1[i],rt1[i]);
3135 else if((smrv_weak>>rs2[i])&1) speculate_mov_weak(rs2[i],rt1[i]);
3136 else {
3137 smrv_strong_next&=~(1<<rt1[i]);
3138 smrv_weak_next&=~(1<<rt1[i]);
3139 }
3140 break;
3141 case SHIFTIMM:
3142 smrv_strong_next&=~(1<<rt1[i]);
3143 smrv_weak_next&=~(1<<rt1[i]);
3144 // fallthrough
3145 case IMM16:
3146 if(rt1[i]&&is_const(&regs[i],rt1[i])) {
3147 int value,hr=get_reg(regs[i].regmap,rt1[i]);
3148 if(hr>=0) {
3149 if(get_final_value(hr,i,&value))
3150 smrv[rt1[i]]=value;
3151 else smrv[rt1[i]]=constmap[i][hr];
3152 smrv_strong_next|=1<<rt1[i];
3153 }
3154 }
3155 else {
3156 if ((smrv_strong>>rs1[i])&1) speculate_mov(rs1[i],rt1[i]);
3157 else if((smrv_weak>>rs1[i])&1) speculate_mov_weak(rs1[i],rt1[i]);
3158 }
3159 break;
3160 case LOAD:
3161 if(start<0x2000&&(rt1[i]==26||(smrv[rt1[i]]>>24)==0xa0)) {
3162 // special case for BIOS
3163 smrv[rt1[i]]=0xa0000000;
3164 smrv_strong_next|=1<<rt1[i];
3165 break;
3166 }
3167 // fallthrough
3168 case SHIFT:
3169 case LOADLR:
3170 case MOV:
3171 smrv_strong_next&=~(1<<rt1[i]);
3172 smrv_weak_next&=~(1<<rt1[i]);
3173 break;
3174 case COP0:
3175 case COP2:
3176 if(opcode2[i]==0||opcode2[i]==2) { // MFC/CFC
3177 smrv_strong_next&=~(1<<rt1[i]);
3178 smrv_weak_next&=~(1<<rt1[i]);
3179 }
3180 break;
3181 case C2LS:
3182 if (opcode[i]==0x32) { // LWC2
3183 smrv_strong_next&=~(1<<rt1[i]);
3184 smrv_weak_next&=~(1<<rt1[i]);
3185 }
3186 break;
3187 }
3188#if 0
3189 int r=4;
3190 printf("x %08x %08x %d %d c %08x %08x\n",smrv[r],start+i*4,
3191 ((smrv_strong>>r)&1),(smrv_weak>>r)&1,regs[i].isconst,regs[i].wasconst);
3192#endif
3193}
3194
3195enum {
3196 MTYPE_8000 = 0,
3197 MTYPE_8020,
3198 MTYPE_0000,
3199 MTYPE_A000,
3200 MTYPE_1F80,
3201};
3202
3203static int get_ptr_mem_type(u_int a)
3204{
3205 if(a < 0x00200000) {
3206 if(a<0x1000&&((start>>20)==0xbfc||(start>>24)==0xa0))
3207 // return wrong, must use memhandler for BIOS self-test to pass
3208 // 007 does similar stuff from a00 mirror, weird stuff
3209 return MTYPE_8000;
3210 return MTYPE_0000;
3211 }
3212 if(0x1f800000 <= a && a < 0x1f801000)
3213 return MTYPE_1F80;
3214 if(0x80200000 <= a && a < 0x80800000)
3215 return MTYPE_8020;
3216 if(0xa0000000 <= a && a < 0xa0200000)
3217 return MTYPE_A000;
3218 return MTYPE_8000;
3219}
3220
3221static int emit_fastpath_cmp_jump(int i,int addr,int *addr_reg_override)
3222{
3223 int jaddr=0,type=0;
3224 int mr=rs1[i];
3225 if(((smrv_strong|smrv_weak)>>mr)&1) {
3226 type=get_ptr_mem_type(smrv[mr]);
3227 //printf("set %08x @%08x r%d %d\n", smrv[mr], start+i*4, mr, type);
3228 }
3229 else {
3230 // use the mirror we are running on
3231 type=get_ptr_mem_type(start);
3232 //printf("set nospec @%08x r%d %d\n", start+i*4, mr, type);
3233 }
3234
3235 if(type==MTYPE_8020) { // RAM 80200000+ mirror
3236 emit_andimm(addr,~0x00e00000,HOST_TEMPREG);
3237 addr=*addr_reg_override=HOST_TEMPREG;
3238 type=0;
3239 }
3240 else if(type==MTYPE_0000) { // RAM 0 mirror
3241 emit_orimm(addr,0x80000000,HOST_TEMPREG);
3242 addr=*addr_reg_override=HOST_TEMPREG;
3243 type=0;
3244 }
3245 else if(type==MTYPE_A000) { // RAM A mirror
3246 emit_andimm(addr,~0x20000000,HOST_TEMPREG);
3247 addr=*addr_reg_override=HOST_TEMPREG;
3248 type=0;
3249 }
3250 else if(type==MTYPE_1F80) { // scratchpad
3251 if (psxH == (void *)0x1f800000) {
3252 emit_addimm(addr,-0x1f800000,HOST_TEMPREG);
3253 emit_cmpimm(HOST_TEMPREG,0x1000);
3254 jaddr=(int)out;
3255 emit_jc(0);
3256 }
3257 else {
3258 // do usual RAM check, jump will go to the right handler
3259 type=0;
3260 }
3261 }
3262
3263 if(type==0)
3264 {
3265 emit_cmpimm(addr,RAM_SIZE);
3266 jaddr=(int)out;
3267 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
3268 // Hint to branch predictor that the branch is unlikely to be taken
3269 if(rs1[i]>=28)
3270 emit_jno_unlikely(0);
3271 else
3272 #endif
3273 emit_jno(0);
3274 if(ram_offset!=0) {
3275 emit_addimm(addr,ram_offset,HOST_TEMPREG);
3276 addr=*addr_reg_override=HOST_TEMPREG;
3277 }
3278 }
3279
3280 return jaddr;
3281}
3282
3283#define shift_assemble shift_assemble_arm
3284
3285static void loadlr_assemble_arm(int i,struct regstat *i_regs)
3286{
3287 int s,th,tl,temp,temp2,addr,map=-1;
3288 int offset;
3289 int jaddr=0;
3290 int memtarget=0,c=0;
3291 int fastload_reg_override=0;
3292 u_int hr,reglist=0;
3293 th=get_reg(i_regs->regmap,rt1[i]|64);
3294 tl=get_reg(i_regs->regmap,rt1[i]);
3295 s=get_reg(i_regs->regmap,rs1[i]);
3296 temp=get_reg(i_regs->regmap,-1);
3297 temp2=get_reg(i_regs->regmap,FTEMP);
3298 addr=get_reg(i_regs->regmap,AGEN1+(i&1));
3299 assert(addr<0);
3300 offset=imm[i];
3301 for(hr=0;hr<HOST_REGS;hr++) {
3302 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
3303 }
3304 reglist|=1<<temp;
3305 if(offset||s<0||c) addr=temp2;
3306 else addr=s;
3307 if(s>=0) {
3308 c=(i_regs->wasconst>>s)&1;
3309 if(c) {
3310 memtarget=((signed int)(constmap[i][s]+offset))<(signed int)0x80000000+RAM_SIZE;
3311 }
3312 }
3313 if(!c) {
3314 #ifdef RAM_OFFSET
3315 map=get_reg(i_regs->regmap,ROREG);
3316 if(map<0) emit_loadreg(ROREG,map=HOST_TEMPREG);
3317 #endif
3318 emit_shlimm(addr,3,temp);
3319 if (opcode[i]==0x22||opcode[i]==0x26) {
3320 emit_andimm(addr,0xFFFFFFFC,temp2); // LWL/LWR
3321 }else{
3322 emit_andimm(addr,0xFFFFFFF8,temp2); // LDL/LDR
3323 }
3324 jaddr=emit_fastpath_cmp_jump(i,temp2,&fastload_reg_override);
3325 }
3326 else {
3327 if(ram_offset&&memtarget) {
3328 emit_addimm(temp2,ram_offset,HOST_TEMPREG);
3329 fastload_reg_override=HOST_TEMPREG;
3330 }
3331 if (opcode[i]==0x22||opcode[i]==0x26) {
3332 emit_movimm(((constmap[i][s]+offset)<<3)&24,temp); // LWL/LWR
3333 }else{
3334 emit_movimm(((constmap[i][s]+offset)<<3)&56,temp); // LDL/LDR
3335 }
3336 }
3337 if (opcode[i]==0x22||opcode[i]==0x26) { // LWL/LWR
3338 if(!c||memtarget) {
3339 int a=temp2;
3340 if(fastload_reg_override) a=fastload_reg_override;
3341 //emit_readword_indexed((int)rdram-0x80000000,temp2,temp2);
3342 emit_readword_indexed_tlb(0,a,map,temp2);
3343 if(jaddr) add_stub(LOADW_STUB,jaddr,(int)out,i,temp2,(int)i_regs,ccadj[i],reglist);
3344 }
3345 else
3346 inline_readstub(LOADW_STUB,i,(constmap[i][s]+offset)&0xFFFFFFFC,i_regs->regmap,FTEMP,ccadj[i],reglist);
3347 if(rt1[i]) {
3348 assert(tl>=0);
3349 emit_andimm(temp,24,temp);
3350#ifdef BIG_ENDIAN_MIPS
3351 if (opcode[i]==0x26) // LWR
3352#else
3353 if (opcode[i]==0x22) // LWL
3354#endif
3355 emit_xorimm(temp,24,temp);
3356 emit_movimm(-1,HOST_TEMPREG);
3357 if (opcode[i]==0x26) {
3358 emit_shr(temp2,temp,temp2);
3359 emit_bic_lsr(tl,HOST_TEMPREG,temp,tl);
3360 }else{
3361 emit_shl(temp2,temp,temp2);
3362 emit_bic_lsl(tl,HOST_TEMPREG,temp,tl);
3363 }
3364 emit_or(temp2,tl,tl);
3365 }
3366 //emit_storereg(rt1[i],tl); // DEBUG
3367 }
3368 if (opcode[i]==0x1A||opcode[i]==0x1B) { // LDL/LDR
3369 // FIXME: little endian, fastload_reg_override
3370 int temp2h=get_reg(i_regs->regmap,FTEMP|64);
3371 if(!c||memtarget) {
3372 //if(th>=0) emit_readword_indexed((int)rdram-0x80000000,temp2,temp2h);
3373 //emit_readword_indexed((int)rdram-0x7FFFFFFC,temp2,temp2);
3374 emit_readdword_indexed_tlb(0,temp2,map,temp2h,temp2);
3375 if(jaddr) add_stub(LOADD_STUB,jaddr,(int)out,i,temp2,(int)i_regs,ccadj[i],reglist);
3376 }
3377 else
3378 inline_readstub(LOADD_STUB,i,(constmap[i][s]+offset)&0xFFFFFFF8,i_regs->regmap,FTEMP,ccadj[i],reglist);
3379 if(rt1[i]) {
3380 assert(th>=0);
3381 assert(tl>=0);
3382 emit_testimm(temp,32);
3383 emit_andimm(temp,24,temp);
3384 if (opcode[i]==0x1A) { // LDL
3385 emit_rsbimm(temp,32,HOST_TEMPREG);
3386 emit_shl(temp2h,temp,temp2h);
3387 emit_orrshr(temp2,HOST_TEMPREG,temp2h);
3388 emit_movimm(-1,HOST_TEMPREG);
3389 emit_shl(temp2,temp,temp2);
3390 emit_cmove_reg(temp2h,th);
3391 emit_biceq_lsl(tl,HOST_TEMPREG,temp,tl);
3392 emit_bicne_lsl(th,HOST_TEMPREG,temp,th);
3393 emit_orreq(temp2,tl,tl);
3394 emit_orrne(temp2,th,th);
3395 }
3396 if (opcode[i]==0x1B) { // LDR
3397 emit_xorimm(temp,24,temp);
3398 emit_rsbimm(temp,32,HOST_TEMPREG);
3399 emit_shr(temp2,temp,temp2);
3400 emit_orrshl(temp2h,HOST_TEMPREG,temp2);
3401 emit_movimm(-1,HOST_TEMPREG);
3402 emit_shr(temp2h,temp,temp2h);
3403 emit_cmovne_reg(temp2,tl);
3404 emit_bicne_lsr(th,HOST_TEMPREG,temp,th);
3405 emit_biceq_lsr(tl,HOST_TEMPREG,temp,tl);
3406 emit_orrne(temp2h,th,th);
3407 emit_orreq(temp2h,tl,tl);
3408 }
3409 }
3410 }
3411}
3412#define loadlr_assemble loadlr_assemble_arm
3413
3414static void cop0_assemble(int i,struct regstat *i_regs)
3415{
3416 if(opcode2[i]==0) // MFC0
3417 {
3418 signed char t=get_reg(i_regs->regmap,rt1[i]);
3419 char copr=(source[i]>>11)&0x1f;
3420 //assert(t>=0); // Why does this happen? OOT is weird
3421 if(t>=0&&rt1[i]!=0) {
3422 emit_readword((int)&reg_cop0+copr*4,t);
3423 }
3424 }
3425 else if(opcode2[i]==4) // MTC0
3426 {
3427 signed char s=get_reg(i_regs->regmap,rs1[i]);
3428 char copr=(source[i]>>11)&0x1f;
3429 assert(s>=0);
3430 wb_register(rs1[i],i_regs->regmap,i_regs->dirty,i_regs->is32);
3431 if(copr==9||copr==11||copr==12||copr==13) {
3432 emit_readword((int)&last_count,HOST_TEMPREG);
3433 emit_loadreg(CCREG,HOST_CCREG); // TODO: do proper reg alloc
3434 emit_add(HOST_CCREG,HOST_TEMPREG,HOST_CCREG);
3435 emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]),HOST_CCREG);
3436 emit_writeword(HOST_CCREG,(int)&Count);
3437 }
3438 // What a mess. The status register (12) can enable interrupts,
3439 // so needs a special case to handle a pending interrupt.
3440 // The interrupt must be taken immediately, because a subsequent
3441 // instruction might disable interrupts again.
3442 if(copr==12||copr==13) {
3443 if (is_delayslot) {
3444 // burn cycles to cause cc_interrupt, which will
3445 // reschedule next_interupt. Relies on CCREG from above.
3446 assem_debug("MTC0 DS %d\n", copr);
3447 emit_writeword(HOST_CCREG,(int)&last_count);
3448 emit_movimm(0,HOST_CCREG);
3449 emit_storereg(CCREG,HOST_CCREG);
3450 emit_loadreg(rs1[i],1);
3451 emit_movimm(copr,0);
3452 emit_call((int)pcsx_mtc0_ds);
3453 emit_loadreg(rs1[i],s);
3454 return;
3455 }
3456 emit_movimm(start+i*4+4,HOST_TEMPREG);
3457 emit_writeword(HOST_TEMPREG,(int)&pcaddr);
3458 emit_movimm(0,HOST_TEMPREG);
3459 emit_writeword(HOST_TEMPREG,(int)&pending_exception);
3460 }
3461 //else if(copr==12&&is_delayslot) emit_call((int)MTC0_R12);
3462 //else
3463 if(s==HOST_CCREG)
3464 emit_loadreg(rs1[i],1);
3465 else if(s!=1)
3466 emit_mov(s,1);
3467 emit_movimm(copr,0);
3468 emit_call((int)pcsx_mtc0);
3469 if(copr==9||copr==11||copr==12||copr==13) {
3470 emit_readword((int)&Count,HOST_CCREG);
3471 emit_readword((int)&next_interupt,HOST_TEMPREG);
3472 emit_addimm(HOST_CCREG,-CLOCK_ADJUST(ccadj[i]),HOST_CCREG);
3473 emit_sub(HOST_CCREG,HOST_TEMPREG,HOST_CCREG);
3474 emit_writeword(HOST_TEMPREG,(int)&last_count);
3475 emit_storereg(CCREG,HOST_CCREG);
3476 }
3477 if(copr==12||copr==13) {
3478 assert(!is_delayslot);
3479 emit_readword((int)&pending_exception,14);
3480 emit_test(14,14);
3481 emit_jne((int)&do_interrupt);
3482 }
3483 emit_loadreg(rs1[i],s);
3484 if(get_reg(i_regs->regmap,rs1[i]|64)>=0)
3485 emit_loadreg(rs1[i]|64,get_reg(i_regs->regmap,rs1[i]|64));
3486 cop1_usable=0;
3487 }
3488 else
3489 {
3490 assert(opcode2[i]==0x10);
3491 if((source[i]&0x3f)==0x10) // RFE
3492 {
3493 emit_readword((int)&Status,0);
3494 emit_andimm(0,0x3c,1);
3495 emit_andimm(0,~0xf,0);
3496 emit_orrshr_imm(1,2,0);
3497 emit_writeword(0,(int)&Status);
3498 }
3499 }
3500}
3501
3502static void cop2_get_dreg(u_int copr,signed char tl,signed char temp)
3503{
3504 switch (copr) {
3505 case 1:
3506 case 3:
3507 case 5:
3508 case 8:
3509 case 9:
3510 case 10:
3511 case 11:
3512 emit_readword((int)&reg_cop2d[copr],tl);
3513 emit_signextend16(tl,tl);
3514 emit_writeword(tl,(int)&reg_cop2d[copr]); // hmh
3515 break;
3516 case 7:
3517 case 16:
3518 case 17:
3519 case 18:
3520 case 19:
3521 emit_readword((int)&reg_cop2d[copr],tl);
3522 emit_andimm(tl,0xffff,tl);
3523 emit_writeword(tl,(int)&reg_cop2d[copr]);
3524 break;
3525 case 15:
3526 emit_readword((int)&reg_cop2d[14],tl); // SXY2
3527 emit_writeword(tl,(int)&reg_cop2d[copr]);
3528 break;
3529 case 28:
3530 case 29:
3531 emit_readword((int)&reg_cop2d[9],temp);
3532 emit_testimm(temp,0x8000); // do we need this?
3533 emit_andimm(temp,0xf80,temp);
3534 emit_andne_imm(temp,0,temp);
3535 emit_shrimm(temp,7,tl);
3536 emit_readword((int)&reg_cop2d[10],temp);
3537 emit_testimm(temp,0x8000);
3538 emit_andimm(temp,0xf80,temp);
3539 emit_andne_imm(temp,0,temp);
3540 emit_orrshr_imm(temp,2,tl);
3541 emit_readword((int)&reg_cop2d[11],temp);
3542 emit_testimm(temp,0x8000);
3543 emit_andimm(temp,0xf80,temp);
3544 emit_andne_imm(temp,0,temp);
3545 emit_orrshl_imm(temp,3,tl);
3546 emit_writeword(tl,(int)&reg_cop2d[copr]);
3547 break;
3548 default:
3549 emit_readword((int)&reg_cop2d[copr],tl);
3550 break;
3551 }
3552}
3553
3554static void cop2_put_dreg(u_int copr,signed char sl,signed char temp)
3555{
3556 switch (copr) {
3557 case 15:
3558 emit_readword((int)&reg_cop2d[13],temp); // SXY1
3559 emit_writeword(sl,(int)&reg_cop2d[copr]);
3560 emit_writeword(temp,(int)&reg_cop2d[12]); // SXY0
3561 emit_readword((int)&reg_cop2d[14],temp); // SXY2
3562 emit_writeword(sl,(int)&reg_cop2d[14]);
3563 emit_writeword(temp,(int)&reg_cop2d[13]); // SXY1
3564 break;
3565 case 28:
3566 emit_andimm(sl,0x001f,temp);
3567 emit_shlimm(temp,7,temp);
3568 emit_writeword(temp,(int)&reg_cop2d[9]);
3569 emit_andimm(sl,0x03e0,temp);
3570 emit_shlimm(temp,2,temp);
3571 emit_writeword(temp,(int)&reg_cop2d[10]);
3572 emit_andimm(sl,0x7c00,temp);
3573 emit_shrimm(temp,3,temp);
3574 emit_writeword(temp,(int)&reg_cop2d[11]);
3575 emit_writeword(sl,(int)&reg_cop2d[28]);
3576 break;
3577 case 30:
3578 emit_movs(sl,temp);
3579 emit_mvnmi(temp,temp);
3580#ifdef HAVE_ARMV5
3581 emit_clz(temp,temp);
3582#else
3583 emit_movs(temp,HOST_TEMPREG);
3584 emit_movimm(0,temp);
3585 emit_jeq((int)out+4*4);
3586 emit_addpl_imm(temp,1,temp);
3587 emit_lslpls_imm(HOST_TEMPREG,1,HOST_TEMPREG);
3588 emit_jns((int)out-2*4);
3589#endif
3590 emit_writeword(sl,(int)&reg_cop2d[30]);
3591 emit_writeword(temp,(int)&reg_cop2d[31]);
3592 break;
3593 case 31:
3594 break;
3595 default:
3596 emit_writeword(sl,(int)&reg_cop2d[copr]);
3597 break;
3598 }
3599}
3600
3601static void cop2_assemble(int i,struct regstat *i_regs)
3602{
3603 u_int copr=(source[i]>>11)&0x1f;
3604 signed char temp=get_reg(i_regs->regmap,-1);
3605 if (opcode2[i]==0) { // MFC2
3606 signed char tl=get_reg(i_regs->regmap,rt1[i]);
3607 if(tl>=0&&rt1[i]!=0)
3608 cop2_get_dreg(copr,tl,temp);
3609 }
3610 else if (opcode2[i]==4) { // MTC2
3611 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3612 cop2_put_dreg(copr,sl,temp);
3613 }
3614 else if (opcode2[i]==2) // CFC2
3615 {
3616 signed char tl=get_reg(i_regs->regmap,rt1[i]);
3617 if(tl>=0&&rt1[i]!=0)
3618 emit_readword((int)&reg_cop2c[copr],tl);
3619 }
3620 else if (opcode2[i]==6) // CTC2
3621 {
3622 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3623 switch(copr) {
3624 case 4:
3625 case 12:
3626 case 20:
3627 case 26:
3628 case 27:
3629 case 29:
3630 case 30:
3631 emit_signextend16(sl,temp);
3632 break;
3633 case 31:
3634 //value = value & 0x7ffff000;
3635 //if (value & 0x7f87e000) value |= 0x80000000;
3636 emit_shrimm(sl,12,temp);
3637 emit_shlimm(temp,12,temp);
3638 emit_testimm(temp,0x7f000000);
3639 emit_testeqimm(temp,0x00870000);
3640 emit_testeqimm(temp,0x0000e000);
3641 emit_orrne_imm(temp,0x80000000,temp);
3642 break;
3643 default:
3644 temp=sl;
3645 break;
3646 }
3647 emit_writeword(temp,(int)&reg_cop2c[copr]);
3648 assert(sl>=0);
3649 }
3650}
3651
3652static void c2op_prologue(u_int op,u_int reglist)
3653{
3654 save_regs_all(reglist);
3655#ifdef PCNT
3656 emit_movimm(op,0);
3657 emit_call((int)pcnt_gte_start);
3658#endif
3659 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0); // cop2 regs
3660}
3661
3662static void c2op_epilogue(u_int op,u_int reglist)
3663{
3664#ifdef PCNT
3665 emit_movimm(op,0);
3666 emit_call((int)pcnt_gte_end);
3667#endif
3668 restore_regs_all(reglist);
3669}
3670
3671static void c2op_call_MACtoIR(int lm,int need_flags)
3672{
3673 if(need_flags)
3674 emit_call((int)(lm?gteMACtoIR_lm1:gteMACtoIR_lm0));
3675 else
3676 emit_call((int)(lm?gteMACtoIR_lm1_nf:gteMACtoIR_lm0_nf));
3677}
3678
3679static void c2op_call_rgb_func(void *func,int lm,int need_ir,int need_flags)
3680{
3681 emit_call((int)func);
3682 // func is C code and trashes r0
3683 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
3684 if(need_flags||need_ir)
3685 c2op_call_MACtoIR(lm,need_flags);
3686 emit_call((int)(need_flags?gteMACtoRGB:gteMACtoRGB_nf));
3687}
3688
3689static void c2op_assemble(int i,struct regstat *i_regs)
3690{
3691 u_int c2op=source[i]&0x3f;
3692 u_int hr,reglist_full=0,reglist;
3693 int need_flags,need_ir;
3694 for(hr=0;hr<HOST_REGS;hr++) {
3695 if(i_regs->regmap[hr]>=0) reglist_full|=1<<hr;
3696 }
3697 reglist=reglist_full&CALLER_SAVE_REGS;
3698
3699 if (gte_handlers[c2op]!=NULL) {
3700 need_flags=!(gte_unneeded[i+1]>>63); // +1 because of how liveness detection works
3701 need_ir=(gte_unneeded[i+1]&0xe00)!=0xe00;
3702 assem_debug("gte op %08x, unneeded %016llx, need_flags %d, need_ir %d\n",
3703 source[i],gte_unneeded[i+1],need_flags,need_ir);
3704 if(new_dynarec_hacks&NDHACK_GTE_NO_FLAGS)
3705 need_flags=0;
3706 int shift = (source[i] >> 19) & 1;
3707 int lm = (source[i] >> 10) & 1;
3708 switch(c2op) {
3709#ifndef DRC_DBG
3710 case GTE_MVMVA: {
3711#ifdef HAVE_ARMV5
3712 int v = (source[i] >> 15) & 3;
3713 int cv = (source[i] >> 13) & 3;
3714 int mx = (source[i] >> 17) & 3;
3715 reglist=reglist_full&(CALLER_SAVE_REGS|0xf0); // +{r4-r7}
3716 c2op_prologue(c2op,reglist);
3717 /* r4,r5 = VXYZ(v) packed; r6 = &MX11(mx); r7 = &CV1(cv) */
3718 if(v<3)
3719 emit_ldrd(v*8,0,4);
3720 else {
3721 emit_movzwl_indexed(9*4,0,4); // gteIR
3722 emit_movzwl_indexed(10*4,0,6);
3723 emit_movzwl_indexed(11*4,0,5);
3724 emit_orrshl_imm(6,16,4);
3725 }
3726 if(mx<3)
3727 emit_addimm(0,32*4+mx*8*4,6);
3728 else
3729 emit_readword((int)&zeromem_ptr,6);
3730 if(cv<3)
3731 emit_addimm(0,32*4+(cv*8+5)*4,7);
3732 else
3733 emit_readword((int)&zeromem_ptr,7);
3734#ifdef __ARM_NEON__
3735 emit_movimm(source[i],1); // opcode
3736 emit_call((int)gteMVMVA_part_neon);
3737 if(need_flags) {
3738 emit_movimm(lm,1);
3739 emit_call((int)gteMACtoIR_flags_neon);
3740 }
3741#else
3742 if(cv==3&&shift)
3743 emit_call((int)gteMVMVA_part_cv3sh12_arm);
3744 else {
3745 emit_movimm(shift,1);
3746 emit_call((int)(need_flags?gteMVMVA_part_arm:gteMVMVA_part_nf_arm));
3747 }
3748 if(need_flags||need_ir)
3749 c2op_call_MACtoIR(lm,need_flags);
3750#endif
3751#else /* if not HAVE_ARMV5 */
3752 c2op_prologue(c2op,reglist);
3753 emit_movimm(source[i],1); // opcode
3754 emit_writeword(1,(int)&psxRegs.code);
3755 emit_call((int)(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]));
3756#endif
3757 break;
3758 }
3759 case GTE_OP:
3760 c2op_prologue(c2op,reglist);
3761 emit_call((int)(shift?gteOP_part_shift:gteOP_part_noshift));
3762 if(need_flags||need_ir) {
3763 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
3764 c2op_call_MACtoIR(lm,need_flags);
3765 }
3766 break;
3767 case GTE_DPCS:
3768 c2op_prologue(c2op,reglist);
3769 c2op_call_rgb_func(shift?gteDPCS_part_shift:gteDPCS_part_noshift,lm,need_ir,need_flags);
3770 break;
3771 case GTE_INTPL:
3772 c2op_prologue(c2op,reglist);
3773 c2op_call_rgb_func(shift?gteINTPL_part_shift:gteINTPL_part_noshift,lm,need_ir,need_flags);
3774 break;
3775 case GTE_SQR:
3776 c2op_prologue(c2op,reglist);
3777 emit_call((int)(shift?gteSQR_part_shift:gteSQR_part_noshift));
3778 if(need_flags||need_ir) {
3779 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
3780 c2op_call_MACtoIR(lm,need_flags);
3781 }
3782 break;
3783 case GTE_DCPL:
3784 c2op_prologue(c2op,reglist);
3785 c2op_call_rgb_func(gteDCPL_part,lm,need_ir,need_flags);
3786 break;
3787 case GTE_GPF:
3788 c2op_prologue(c2op,reglist);
3789 c2op_call_rgb_func(shift?gteGPF_part_shift:gteGPF_part_noshift,lm,need_ir,need_flags);
3790 break;
3791 case GTE_GPL:
3792 c2op_prologue(c2op,reglist);
3793 c2op_call_rgb_func(shift?gteGPL_part_shift:gteGPL_part_noshift,lm,need_ir,need_flags);
3794 break;
3795#endif
3796 default:
3797 c2op_prologue(c2op,reglist);
3798#ifdef DRC_DBG
3799 emit_movimm(source[i],1); // opcode
3800 emit_writeword(1,(int)&psxRegs.code);
3801#endif
3802 emit_call((int)(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]));
3803 break;
3804 }
3805 c2op_epilogue(c2op,reglist);
3806 }
3807}
3808
3809static void cop1_unusable(int i,struct regstat *i_regs)
3810{
3811 // XXX: should just just do the exception instead
3812 if(!cop1_usable) {
3813 int jaddr=(int)out;
3814 emit_jmp(0);
3815 add_stub(FP_STUB,jaddr,(int)out,i,0,(int)i_regs,is_delayslot,0);
3816 cop1_usable=1;
3817 }
3818}
3819
3820static void cop1_assemble(int i,struct regstat *i_regs)
3821{
3822 cop1_unusable(i, i_regs);
3823}
3824
3825static void fconv_assemble_arm(int i,struct regstat *i_regs)
3826{
3827 cop1_unusable(i, i_regs);
3828}
3829#define fconv_assemble fconv_assemble_arm
3830
3831static void fcomp_assemble(int i,struct regstat *i_regs)
3832{
3833 cop1_unusable(i, i_regs);
3834}
3835
3836static void float_assemble(int i,struct regstat *i_regs)
3837{
3838 cop1_unusable(i, i_regs);
3839}
3840
3841static void multdiv_assemble_arm(int i,struct regstat *i_regs)
3842{
3843 // case 0x18: MULT
3844 // case 0x19: MULTU
3845 // case 0x1A: DIV
3846 // case 0x1B: DIVU
3847 // case 0x1C: DMULT
3848 // case 0x1D: DMULTU
3849 // case 0x1E: DDIV
3850 // case 0x1F: DDIVU
3851 if(rs1[i]&&rs2[i])
3852 {
3853 if((opcode2[i]&4)==0) // 32-bit
3854 {
3855 if(opcode2[i]==0x18) // MULT
3856 {
3857 signed char m1=get_reg(i_regs->regmap,rs1[i]);
3858 signed char m2=get_reg(i_regs->regmap,rs2[i]);
3859 signed char hi=get_reg(i_regs->regmap,HIREG);
3860 signed char lo=get_reg(i_regs->regmap,LOREG);
3861 assert(m1>=0);
3862 assert(m2>=0);
3863 assert(hi>=0);
3864 assert(lo>=0);
3865 emit_smull(m1,m2,hi,lo);
3866 }
3867 if(opcode2[i]==0x19) // MULTU
3868 {
3869 signed char m1=get_reg(i_regs->regmap,rs1[i]);
3870 signed char m2=get_reg(i_regs->regmap,rs2[i]);
3871 signed char hi=get_reg(i_regs->regmap,HIREG);
3872 signed char lo=get_reg(i_regs->regmap,LOREG);
3873 assert(m1>=0);
3874 assert(m2>=0);
3875 assert(hi>=0);
3876 assert(lo>=0);
3877 emit_umull(m1,m2,hi,lo);
3878 }
3879 if(opcode2[i]==0x1A) // DIV
3880 {
3881 signed char d1=get_reg(i_regs->regmap,rs1[i]);
3882 signed char d2=get_reg(i_regs->regmap,rs2[i]);
3883 assert(d1>=0);
3884 assert(d2>=0);
3885 signed char quotient=get_reg(i_regs->regmap,LOREG);
3886 signed char remainder=get_reg(i_regs->regmap,HIREG);
3887 assert(quotient>=0);
3888 assert(remainder>=0);
3889 emit_movs(d1,remainder);
3890 emit_movimm(0xffffffff,quotient);
3891 emit_negmi(quotient,quotient); // .. quotient and ..
3892 emit_negmi(remainder,remainder); // .. remainder for div0 case (will be negated back after jump)
3893 emit_movs(d2,HOST_TEMPREG);
3894 emit_jeq((int)out+52); // Division by zero
3895 emit_negsmi(HOST_TEMPREG,HOST_TEMPREG);
3896#ifdef HAVE_ARMV5
3897 emit_clz(HOST_TEMPREG,quotient);
3898 emit_shl(HOST_TEMPREG,quotient,HOST_TEMPREG);
3899#else
3900 emit_movimm(0,quotient);
3901 emit_addpl_imm(quotient,1,quotient);
3902 emit_lslpls_imm(HOST_TEMPREG,1,HOST_TEMPREG);
3903 emit_jns((int)out-2*4);
3904#endif
3905 emit_orimm(quotient,1<<31,quotient);
3906 emit_shr(quotient,quotient,quotient);
3907 emit_cmp(remainder,HOST_TEMPREG);
3908 emit_subcs(remainder,HOST_TEMPREG,remainder);
3909 emit_adcs(quotient,quotient,quotient);
3910 emit_shrimm(HOST_TEMPREG,1,HOST_TEMPREG);
3911 emit_jcc((int)out-16); // -4
3912 emit_teq(d1,d2);
3913 emit_negmi(quotient,quotient);
3914 emit_test(d1,d1);
3915 emit_negmi(remainder,remainder);
3916 }
3917 if(opcode2[i]==0x1B) // DIVU
3918 {
3919 signed char d1=get_reg(i_regs->regmap,rs1[i]); // dividend
3920 signed char d2=get_reg(i_regs->regmap,rs2[i]); // divisor
3921 assert(d1>=0);
3922 assert(d2>=0);
3923 signed char quotient=get_reg(i_regs->regmap,LOREG);
3924 signed char remainder=get_reg(i_regs->regmap,HIREG);
3925 assert(quotient>=0);
3926 assert(remainder>=0);
3927 emit_mov(d1,remainder);
3928 emit_movimm(0xffffffff,quotient); // div0 case
3929 emit_test(d2,d2);
3930 emit_jeq((int)out+40); // Division by zero
3931#ifdef HAVE_ARMV5
3932 emit_clz(d2,HOST_TEMPREG);
3933 emit_movimm(1<<31,quotient);
3934 emit_shl(d2,HOST_TEMPREG,d2);
3935#else
3936 emit_movimm(0,HOST_TEMPREG);
3937 emit_addpl_imm(HOST_TEMPREG,1,HOST_TEMPREG);
3938 emit_lslpls_imm(d2,1,d2);
3939 emit_jns((int)out-2*4);
3940 emit_movimm(1<<31,quotient);
3941#endif
3942 emit_shr(quotient,HOST_TEMPREG,quotient);
3943 emit_cmp(remainder,d2);
3944 emit_subcs(remainder,d2,remainder);
3945 emit_adcs(quotient,quotient,quotient);
3946 emit_shrcc_imm(d2,1,d2);
3947 emit_jcc((int)out-16); // -4
3948 }
3949 }
3950 else // 64-bit
3951 assert(0);
3952 }
3953 else
3954 {
3955 // Multiply by zero is zero.
3956 // MIPS does not have a divide by zero exception.
3957 // The result is undefined, we return zero.
3958 signed char hr=get_reg(i_regs->regmap,HIREG);
3959 signed char lr=get_reg(i_regs->regmap,LOREG);
3960 if(hr>=0) emit_zeroreg(hr);
3961 if(lr>=0) emit_zeroreg(lr);
3962 }
3963}
3964#define multdiv_assemble multdiv_assemble_arm
3965
3966static void do_preload_rhash(int r) {
3967 // Don't need this for ARM. On x86, this puts the value 0xf8 into the
3968 // register. On ARM the hash can be done with a single instruction (below)
3969}
3970
3971static void do_preload_rhtbl(int ht) {
3972 emit_addimm(FP,(int)&mini_ht-(int)&dynarec_local,ht);
3973}
3974
3975static void do_rhash(int rs,int rh) {
3976 emit_andimm(rs,0xf8,rh);
3977}
3978
3979static void do_miniht_load(int ht,int rh) {
3980 assem_debug("ldr %s,[%s,%s]!\n",regname[rh],regname[ht],regname[rh]);
3981 output_w32(0xe7b00000|rd_rn_rm(rh,ht,rh));
3982}
3983
3984static void do_miniht_jump(int rs,int rh,int ht) {
3985 emit_cmp(rh,rs);
3986 emit_ldreq_indexed(ht,4,15);
3987 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
3988 emit_mov(rs,7);
3989 emit_jmp(jump_vaddr_reg[7]);
3990 #else
3991 emit_jmp(jump_vaddr_reg[rs]);
3992 #endif
3993}
3994
3995static void do_miniht_insert(u_int return_address,int rt,int temp) {
3996 #ifndef HAVE_ARMV7
3997 emit_movimm(return_address,rt); // PC into link register
3998 add_to_linker((int)out,return_address,1);
3999 emit_pcreladdr(temp);
4000 emit_writeword(rt,(int)&mini_ht[(return_address&0xFF)>>3][0]);
4001 emit_writeword(temp,(int)&mini_ht[(return_address&0xFF)>>3][1]);
4002 #else
4003 emit_movw(return_address&0x0000FFFF,rt);
4004 add_to_linker((int)out,return_address,1);
4005 emit_pcreladdr(temp);
4006 emit_writeword(temp,(int)&mini_ht[(return_address&0xFF)>>3][1]);
4007 emit_movt(return_address&0xFFFF0000,rt);
4008 emit_writeword(rt,(int)&mini_ht[(return_address&0xFF)>>3][0]);
4009 #endif
4010}
4011
4012static void wb_valid(signed char pre[],signed char entry[],u_int dirty_pre,u_int dirty,uint64_t is32_pre,uint64_t u,uint64_t uu)
4013{
4014 //if(dirty_pre==dirty) return;
4015 int hr,reg;
4016 for(hr=0;hr<HOST_REGS;hr++) {
4017 if(hr!=EXCLUDE_REG) {
4018 reg=pre[hr];
4019 if(((~u)>>(reg&63))&1) {
4020 if(reg>0) {
4021 if(((dirty_pre&~dirty)>>hr)&1) {
4022 if(reg>0&&reg<34) {
4023 emit_storereg(reg,hr);
4024 if( ((is32_pre&~uu)>>reg)&1 ) {
4025 emit_sarimm(hr,31,HOST_TEMPREG);
4026 emit_storereg(reg|64,HOST_TEMPREG);
4027 }
4028 }
4029 else if(reg>=64) {
4030 emit_storereg(reg,hr);
4031 }
4032 }
4033 }
4034 }
4035 }
4036 }
4037}
4038
4039
4040/* using strd could possibly help but you'd have to allocate registers in pairs
4041static void wb_invalidate_arm(signed char pre[],signed char entry[],uint64_t dirty,uint64_t is32,uint64_t u,uint64_t uu)
4042{
4043 int hr;
4044 int wrote=-1;
4045 for(hr=HOST_REGS-1;hr>=0;hr--) {
4046 if(hr!=EXCLUDE_REG) {
4047 if(pre[hr]!=entry[hr]) {
4048 if(pre[hr]>=0) {
4049 if((dirty>>hr)&1) {
4050 if(get_reg(entry,pre[hr])<0) {
4051 if(pre[hr]<64) {
4052 if(!((u>>pre[hr])&1)) {
4053 if(hr<10&&(~hr&1)&&(pre[hr+1]<0||wrote==hr+1)) {
4054 if( ((is32>>pre[hr])&1) && !((uu>>pre[hr])&1) ) {
4055 emit_sarimm(hr,31,hr+1);
4056 emit_strdreg(pre[hr],hr);
4057 }
4058 else
4059 emit_storereg(pre[hr],hr);
4060 }else{
4061 emit_storereg(pre[hr],hr);
4062 if( ((is32>>pre[hr])&1) && !((uu>>pre[hr])&1) ) {
4063 emit_sarimm(hr,31,hr);
4064 emit_storereg(pre[hr]|64,hr);
4065 }
4066 }
4067 }
4068 }else{
4069 if(!((uu>>(pre[hr]&63))&1) && !((is32>>(pre[hr]&63))&1)) {
4070 emit_storereg(pre[hr],hr);
4071 }
4072 }
4073 wrote=hr;
4074 }
4075 }
4076 }
4077 }
4078 }
4079 }
4080 for(hr=0;hr<HOST_REGS;hr++) {
4081 if(hr!=EXCLUDE_REG) {
4082 if(pre[hr]!=entry[hr]) {
4083 if(pre[hr]>=0) {
4084 int nr;
4085 if((nr=get_reg(entry,pre[hr]))>=0) {
4086 emit_mov(hr,nr);
4087 }
4088 }
4089 }
4090 }
4091 }
4092}
4093#define wb_invalidate wb_invalidate_arm
4094*/
4095
4096static void mark_clear_cache(void *target)
4097{
4098 u_long offset = (char *)target - (char *)BASE_ADDR;
4099 u_int mask = 1u << ((offset >> 12) & 31);
4100 if (!(needs_clear_cache[offset >> 17] & mask)) {
4101 char *start = (char *)((u_long)target & ~4095ul);
4102 start_tcache_write(start, start + 4096);
4103 needs_clear_cache[offset >> 17] |= mask;
4104 }
4105}
4106
4107// Clearing the cache is rather slow on ARM Linux, so mark the areas
4108// that need to be cleared, and then only clear these areas once.
4109static void do_clear_cache()
4110{
4111 int i,j;
4112 for (i=0;i<(1<<(TARGET_SIZE_2-17));i++)
4113 {
4114 u_int bitmap=needs_clear_cache[i];
4115 if(bitmap) {
4116 u_int start,end;
4117 for(j=0;j<32;j++)
4118 {
4119 if(bitmap&(1<<j)) {
4120 start=(u_int)BASE_ADDR+i*131072+j*4096;
4121 end=start+4095;
4122 j++;
4123 while(j<32) {
4124 if(bitmap&(1<<j)) {
4125 end+=4096;
4126 j++;
4127 }else{
4128 end_tcache_write((void *)start,(void *)end);
4129 break;
4130 }
4131 }
4132 }
4133 }
4134 needs_clear_cache[i]=0;
4135 }
4136 }
4137}
4138
4139// CPU-architecture-specific initialization
4140static void arch_init() {
4141}
4142
4143// vim:shiftwidth=2:expandtab