drc: drop heaps of dead code
[pcsx_rearmed.git] / libpcsxcore / new_dynarec / assem_arm.c
CommitLineData
57871462 1/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
c6c3b1b3 2 * Mupen64plus/PCSX - assem_arm.c *
20d507ba 3 * Copyright (C) 2009-2011 Ari64 *
c6c3b1b3 4 * Copyright (C) 2010-2011 GraÅžvydas "notaz" Ignotas *
57871462 5 * *
6 * This program is free software; you can redistribute it and/or modify *
7 * it under the terms of the GNU General Public License as published by *
8 * the Free Software Foundation; either version 2 of the License, or *
9 * (at your option) any later version. *
10 * *
11 * This program is distributed in the hope that it will be useful, *
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
14 * GNU General Public License for more details. *
15 * *
16 * You should have received a copy of the GNU General Public License *
17 * along with this program; if not, write to the *
18 * Free Software Foundation, Inc., *
19 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
20 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
21
6c0eefaf 22#include "../gte.h"
23#define FLAGLESS
24#include "../gte.h"
25#undef FLAGLESS
054175e9 26#include "../gte_arm.h"
27#include "../gte_neon.h"
28#include "pcnt.h"
665f33e1 29#include "arm_features.h"
054175e9 30
a327ad27 31#if !BASE_ADDR_FIXED
bdeade46 32char translation_cache[1 << TARGET_SIZE_2] __attribute__((aligned(4096)));
33#endif
34
4d646738 35#ifndef __MACH__
36#define CALLER_SAVE_REGS 0x100f
37#else
38#define CALLER_SAVE_REGS 0x120f
39#endif
40
57871462 41extern int cycle_count;
42extern int last_count;
43extern int pcaddr;
44extern int pending_exception;
45extern int branch_target;
46extern uint64_t readmem_dword;
57871462 47extern void *dynarec_local;
48extern u_int memory_map[1048576];
49extern u_int mini_ht[32][2];
50extern u_int rounding_modes[4];
51
52void indirect_jump_indexed();
53void indirect_jump();
54void do_interrupt();
55void jump_vaddr_r0();
56void jump_vaddr_r1();
57void jump_vaddr_r2();
58void jump_vaddr_r3();
59void jump_vaddr_r4();
60void jump_vaddr_r5();
61void jump_vaddr_r6();
62void jump_vaddr_r7();
63void jump_vaddr_r8();
64void jump_vaddr_r9();
65void jump_vaddr_r10();
66void jump_vaddr_r12();
67
68const u_int jump_vaddr_reg[16] = {
69 (int)jump_vaddr_r0,
70 (int)jump_vaddr_r1,
71 (int)jump_vaddr_r2,
72 (int)jump_vaddr_r3,
73 (int)jump_vaddr_r4,
74 (int)jump_vaddr_r5,
75 (int)jump_vaddr_r6,
76 (int)jump_vaddr_r7,
77 (int)jump_vaddr_r8,
78 (int)jump_vaddr_r9,
79 (int)jump_vaddr_r10,
80 0,
81 (int)jump_vaddr_r12,
82 0,
83 0,
84 0};
85
0bbd1454 86void invalidate_addr_r0();
87void invalidate_addr_r1();
88void invalidate_addr_r2();
89void invalidate_addr_r3();
90void invalidate_addr_r4();
91void invalidate_addr_r5();
92void invalidate_addr_r6();
93void invalidate_addr_r7();
94void invalidate_addr_r8();
95void invalidate_addr_r9();
96void invalidate_addr_r10();
97void invalidate_addr_r12();
98
99const u_int invalidate_addr_reg[16] = {
100 (int)invalidate_addr_r0,
101 (int)invalidate_addr_r1,
102 (int)invalidate_addr_r2,
103 (int)invalidate_addr_r3,
104 (int)invalidate_addr_r4,
105 (int)invalidate_addr_r5,
106 (int)invalidate_addr_r6,
107 (int)invalidate_addr_r7,
108 (int)invalidate_addr_r8,
109 (int)invalidate_addr_r9,
110 (int)invalidate_addr_r10,
111 0,
112 (int)invalidate_addr_r12,
113 0,
114 0,
115 0};
116
dd3a91a1 117unsigned int needs_clear_cache[1<<(TARGET_SIZE_2-17)];
118
57871462 119/* Linker */
120
121void set_jump_target(int addr,u_int target)
122{
123 u_char *ptr=(u_char *)addr;
124 u_int *ptr2=(u_int *)ptr;
125 if(ptr[3]==0xe2) {
126 assert((target-(u_int)ptr2-8)<1024);
127 assert((addr&3)==0);
128 assert((target&3)==0);
129 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
130 //printf("target=%x addr=%x insn=%x\n",target,addr,*ptr2);
131 }
132 else if(ptr[3]==0x72) {
133 // generated by emit_jno_unlikely
134 if((target-(u_int)ptr2-8)<1024) {
135 assert((addr&3)==0);
136 assert((target&3)==0);
137 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
138 }
139 else if((target-(u_int)ptr2-8)<4096&&!((target-(u_int)ptr2-8)&15)) {
140 assert((addr&3)==0);
141 assert((target&3)==0);
142 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>4)|0xE00;
143 }
144 else *ptr2=(0x7A000000)|(((target-(u_int)ptr2-8)<<6)>>8);
145 }
146 else {
147 assert((ptr[3]&0x0e)==0xa);
148 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
149 }
150}
151
152// This optionally copies the instruction from the target of the branch into
153// the space before the branch. Works, but the difference in speed is
154// usually insignificant.
155void set_jump_target_fillslot(int addr,u_int target,int copy)
156{
157 u_char *ptr=(u_char *)addr;
158 u_int *ptr2=(u_int *)ptr;
159 assert(!copy||ptr2[-1]==0xe28dd000);
160 if(ptr[3]==0xe2) {
161 assert(!copy);
162 assert((target-(u_int)ptr2-8)<4096);
163 *ptr2=(*ptr2&0xFFFFF000)|(target-(u_int)ptr2-8);
164 }
165 else {
166 assert((ptr[3]&0x0e)==0xa);
167 u_int target_insn=*(u_int *)target;
168 if((target_insn&0x0e100000)==0) { // ALU, no immediate, no flags
169 copy=0;
170 }
171 if((target_insn&0x0c100000)==0x04100000) { // Load
172 copy=0;
173 }
174 if(target_insn&0x08000000) {
175 copy=0;
176 }
177 if(copy) {
178 ptr2[-1]=target_insn;
179 target+=4;
180 }
181 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
182 }
183}
184
185/* Literal pool */
186add_literal(int addr,int val)
187{
15776b68 188 assert(literalcount<sizeof(literals)/sizeof(literals[0]));
57871462 189 literals[literalcount][0]=addr;
190 literals[literalcount][1]=val;
191 literalcount++;
192}
193
f76eeef9 194void *kill_pointer(void *stub)
57871462 195{
196 int *ptr=(int *)(stub+4);
197 assert((*ptr&0x0ff00000)==0x05900000);
198 u_int offset=*ptr&0xfff;
199 int **l_ptr=(void *)ptr+offset+8;
200 int *i_ptr=*l_ptr;
201 set_jump_target((int)i_ptr,(int)stub);
f76eeef9 202 return i_ptr;
57871462 203}
204
f968d35d 205// find where external branch is liked to using addr of it's stub:
206// get address that insn one after stub loads (dyna_linker arg1),
207// treat it as a pointer to branch insn,
208// return addr where that branch jumps to
57871462 209int get_pointer(void *stub)
210{
211 //printf("get_pointer(%x)\n",(int)stub);
212 int *ptr=(int *)(stub+4);
f968d35d 213 assert((*ptr&0x0fff0000)==0x059f0000);
57871462 214 u_int offset=*ptr&0xfff;
215 int **l_ptr=(void *)ptr+offset+8;
216 int *i_ptr=*l_ptr;
217 assert((*i_ptr&0x0f000000)==0x0a000000);
218 return (int)i_ptr+((*i_ptr<<8)>>6)+8;
219}
220
221// Find the "clean" entry point from a "dirty" entry point
222// by skipping past the call to verify_code
223u_int get_clean_addr(int addr)
224{
225 int *ptr=(int *)addr;
665f33e1 226 #ifndef HAVE_ARMV7
57871462 227 ptr+=4;
228 #else
229 ptr+=6;
230 #endif
231 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
232 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
233 ptr++;
234 if((*ptr&0xFF000000)==0xea000000) {
235 return (int)ptr+((*ptr<<8)>>6)+8; // follow jump
236 }
237 return (u_int)ptr;
238}
239
240int verify_dirty(int addr)
241{
242 u_int *ptr=(u_int *)addr;
665f33e1 243 #ifndef HAVE_ARMV7
57871462 244 // get from literal pool
15776b68 245 assert((*ptr&0xFFFF0000)==0xe59f0000);
57871462 246 u_int offset=*ptr&0xfff;
247 u_int *l_ptr=(void *)ptr+offset+8;
248 u_int source=l_ptr[0];
249 u_int copy=l_ptr[1];
250 u_int len=l_ptr[2];
251 ptr+=4;
252 #else
253 // ARMv7 movw/movt
254 assert((*ptr&0xFFF00000)==0xe3000000);
255 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
256 u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
257 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
258 ptr+=6;
259 #endif
260 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
261 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
57871462 262 //printf("verify_dirty: %x %x %x\n",source,copy,len);
263 return !memcmp((void *)source,(void *)copy,len);
264}
265
266// This doesn't necessarily find all clean entry points, just
267// guarantees that it's not dirty
268int isclean(int addr)
269{
665f33e1 270 #ifndef HAVE_ARMV7
57871462 271 int *ptr=((u_int *)addr)+4;
272 #else
273 int *ptr=((u_int *)addr)+6;
274 #endif
275 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
276 if((*ptr&0xFF000000)!=0xeb000000) return 1; // bl instruction
277 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code) return 0;
278 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_vm) return 0;
279 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_ds) return 0;
280 return 1;
281}
282
4a35de07 283// get source that block at addr was compiled from (host pointers)
57871462 284void get_bounds(int addr,u_int *start,u_int *end)
285{
286 u_int *ptr=(u_int *)addr;
665f33e1 287 #ifndef HAVE_ARMV7
57871462 288 // get from literal pool
15776b68 289 assert((*ptr&0xFFFF0000)==0xe59f0000);
57871462 290 u_int offset=*ptr&0xfff;
291 u_int *l_ptr=(void *)ptr+offset+8;
292 u_int source=l_ptr[0];
293 //u_int copy=l_ptr[1];
294 u_int len=l_ptr[2];
295 ptr+=4;
296 #else
297 // ARMv7 movw/movt
298 assert((*ptr&0xFFF00000)==0xe3000000);
299 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
300 //u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
301 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
302 ptr+=6;
303 #endif
304 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
305 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
57871462 306 *start=source;
307 *end=source+len;
308}
309
310/* Register allocation */
311
312// Note: registers are allocated clean (unmodified state)
313// if you intend to modify the register, you must call dirty_reg().
314void alloc_reg(struct regstat *cur,int i,signed char reg)
315{
316 int r,hr;
317 int preferred_reg = (reg&7);
318 if(reg==CCREG) preferred_reg=HOST_CCREG;
319 if(reg==PTEMP||reg==FTEMP) preferred_reg=12;
320
321 // Don't allocate unused registers
322 if((cur->u>>reg)&1) return;
323
324 // see if it's already allocated
325 for(hr=0;hr<HOST_REGS;hr++)
326 {
327 if(cur->regmap[hr]==reg) return;
328 }
329
330 // Keep the same mapping if the register was already allocated in a loop
331 preferred_reg = loop_reg(i,reg,preferred_reg);
332
333 // Try to allocate the preferred register
334 if(cur->regmap[preferred_reg]==-1) {
335 cur->regmap[preferred_reg]=reg;
336 cur->dirty&=~(1<<preferred_reg);
337 cur->isconst&=~(1<<preferred_reg);
338 return;
339 }
340 r=cur->regmap[preferred_reg];
341 if(r<64&&((cur->u>>r)&1)) {
342 cur->regmap[preferred_reg]=reg;
343 cur->dirty&=~(1<<preferred_reg);
344 cur->isconst&=~(1<<preferred_reg);
345 return;
346 }
347 if(r>=64&&((cur->uu>>(r&63))&1)) {
348 cur->regmap[preferred_reg]=reg;
349 cur->dirty&=~(1<<preferred_reg);
350 cur->isconst&=~(1<<preferred_reg);
351 return;
352 }
353
354 // Clear any unneeded registers
355 // We try to keep the mapping consistent, if possible, because it
356 // makes branches easier (especially loops). So we try to allocate
357 // first (see above) before removing old mappings. If this is not
358 // possible then go ahead and clear out the registers that are no
359 // longer needed.
360 for(hr=0;hr<HOST_REGS;hr++)
361 {
362 r=cur->regmap[hr];
363 if(r>=0) {
364 if(r<64) {
365 if((cur->u>>r)&1) {cur->regmap[hr]=-1;break;}
366 }
367 else
368 {
369 if((cur->uu>>(r&63))&1) {cur->regmap[hr]=-1;break;}
370 }
371 }
372 }
373 // Try to allocate any available register, but prefer
374 // registers that have not been used recently.
375 if(i>0) {
376 for(hr=0;hr<HOST_REGS;hr++) {
377 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
378 if(regs[i-1].regmap[hr]!=rs1[i-1]&&regs[i-1].regmap[hr]!=rs2[i-1]&&regs[i-1].regmap[hr]!=rt1[i-1]&&regs[i-1].regmap[hr]!=rt2[i-1]) {
379 cur->regmap[hr]=reg;
380 cur->dirty&=~(1<<hr);
381 cur->isconst&=~(1<<hr);
382 return;
383 }
384 }
385 }
386 }
387 // Try to allocate any available register
388 for(hr=0;hr<HOST_REGS;hr++) {
389 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
390 cur->regmap[hr]=reg;
391 cur->dirty&=~(1<<hr);
392 cur->isconst&=~(1<<hr);
393 return;
394 }
395 }
396
397 // Ok, now we have to evict someone
398 // Pick a register we hopefully won't need soon
399 u_char hsn[MAXREG+1];
400 memset(hsn,10,sizeof(hsn));
401 int j;
402 lsn(hsn,i,&preferred_reg);
403 //printf("eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",cur->regmap[0],cur->regmap[1],cur->regmap[2],cur->regmap[3],cur->regmap[5],cur->regmap[6],cur->regmap[7]);
404 //printf("hsn(%x): %d %d %d %d %d %d %d\n",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
405 if(i>0) {
406 // Don't evict the cycle count at entry points, otherwise the entry
407 // stub will have to write it.
408 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
409 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
410 for(j=10;j>=3;j--)
411 {
412 // Alloc preferred register if available
413 if(hsn[r=cur->regmap[preferred_reg]&63]==j) {
414 for(hr=0;hr<HOST_REGS;hr++) {
415 // Evict both parts of a 64-bit register
416 if((cur->regmap[hr]&63)==r) {
417 cur->regmap[hr]=-1;
418 cur->dirty&=~(1<<hr);
419 cur->isconst&=~(1<<hr);
420 }
421 }
422 cur->regmap[preferred_reg]=reg;
423 return;
424 }
425 for(r=1;r<=MAXREG;r++)
426 {
427 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
428 for(hr=0;hr<HOST_REGS;hr++) {
429 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
430 if(cur->regmap[hr]==r+64) {
431 cur->regmap[hr]=reg;
432 cur->dirty&=~(1<<hr);
433 cur->isconst&=~(1<<hr);
434 return;
435 }
436 }
437 }
438 for(hr=0;hr<HOST_REGS;hr++) {
439 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
440 if(cur->regmap[hr]==r) {
441 cur->regmap[hr]=reg;
442 cur->dirty&=~(1<<hr);
443 cur->isconst&=~(1<<hr);
444 return;
445 }
446 }
447 }
448 }
449 }
450 }
451 }
452 for(j=10;j>=0;j--)
453 {
454 for(r=1;r<=MAXREG;r++)
455 {
456 if(hsn[r]==j) {
457 for(hr=0;hr<HOST_REGS;hr++) {
458 if(cur->regmap[hr]==r+64) {
459 cur->regmap[hr]=reg;
460 cur->dirty&=~(1<<hr);
461 cur->isconst&=~(1<<hr);
462 return;
463 }
464 }
465 for(hr=0;hr<HOST_REGS;hr++) {
466 if(cur->regmap[hr]==r) {
467 cur->regmap[hr]=reg;
468 cur->dirty&=~(1<<hr);
469 cur->isconst&=~(1<<hr);
470 return;
471 }
472 }
473 }
474 }
475 }
c43b5311 476 SysPrintf("This shouldn't happen (alloc_reg)");exit(1);
57871462 477}
478
479void alloc_reg64(struct regstat *cur,int i,signed char reg)
480{
481 int preferred_reg = 8+(reg&1);
482 int r,hr;
483
484 // allocate the lower 32 bits
485 alloc_reg(cur,i,reg);
486
487 // Don't allocate unused registers
488 if((cur->uu>>reg)&1) return;
489
490 // see if the upper half is already allocated
491 for(hr=0;hr<HOST_REGS;hr++)
492 {
493 if(cur->regmap[hr]==reg+64) return;
494 }
495
496 // Keep the same mapping if the register was already allocated in a loop
497 preferred_reg = loop_reg(i,reg,preferred_reg);
498
499 // Try to allocate the preferred register
500 if(cur->regmap[preferred_reg]==-1) {
501 cur->regmap[preferred_reg]=reg|64;
502 cur->dirty&=~(1<<preferred_reg);
503 cur->isconst&=~(1<<preferred_reg);
504 return;
505 }
506 r=cur->regmap[preferred_reg];
507 if(r<64&&((cur->u>>r)&1)) {
508 cur->regmap[preferred_reg]=reg|64;
509 cur->dirty&=~(1<<preferred_reg);
510 cur->isconst&=~(1<<preferred_reg);
511 return;
512 }
513 if(r>=64&&((cur->uu>>(r&63))&1)) {
514 cur->regmap[preferred_reg]=reg|64;
515 cur->dirty&=~(1<<preferred_reg);
516 cur->isconst&=~(1<<preferred_reg);
517 return;
518 }
519
520 // Clear any unneeded registers
521 // We try to keep the mapping consistent, if possible, because it
522 // makes branches easier (especially loops). So we try to allocate
523 // first (see above) before removing old mappings. If this is not
524 // possible then go ahead and clear out the registers that are no
525 // longer needed.
526 for(hr=HOST_REGS-1;hr>=0;hr--)
527 {
528 r=cur->regmap[hr];
529 if(r>=0) {
530 if(r<64) {
531 if((cur->u>>r)&1) {cur->regmap[hr]=-1;break;}
532 }
533 else
534 {
535 if((cur->uu>>(r&63))&1) {cur->regmap[hr]=-1;break;}
536 }
537 }
538 }
539 // Try to allocate any available register, but prefer
540 // registers that have not been used recently.
541 if(i>0) {
542 for(hr=0;hr<HOST_REGS;hr++) {
543 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
544 if(regs[i-1].regmap[hr]!=rs1[i-1]&&regs[i-1].regmap[hr]!=rs2[i-1]&&regs[i-1].regmap[hr]!=rt1[i-1]&&regs[i-1].regmap[hr]!=rt2[i-1]) {
545 cur->regmap[hr]=reg|64;
546 cur->dirty&=~(1<<hr);
547 cur->isconst&=~(1<<hr);
548 return;
549 }
550 }
551 }
552 }
553 // Try to allocate any available register
554 for(hr=0;hr<HOST_REGS;hr++) {
555 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
556 cur->regmap[hr]=reg|64;
557 cur->dirty&=~(1<<hr);
558 cur->isconst&=~(1<<hr);
559 return;
560 }
561 }
562
563 // Ok, now we have to evict someone
564 // Pick a register we hopefully won't need soon
565 u_char hsn[MAXREG+1];
566 memset(hsn,10,sizeof(hsn));
567 int j;
568 lsn(hsn,i,&preferred_reg);
569 //printf("eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",cur->regmap[0],cur->regmap[1],cur->regmap[2],cur->regmap[3],cur->regmap[5],cur->regmap[6],cur->regmap[7]);
570 //printf("hsn(%x): %d %d %d %d %d %d %d\n",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
571 if(i>0) {
572 // Don't evict the cycle count at entry points, otherwise the entry
573 // stub will have to write it.
574 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
575 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
576 for(j=10;j>=3;j--)
577 {
578 // Alloc preferred register if available
579 if(hsn[r=cur->regmap[preferred_reg]&63]==j) {
580 for(hr=0;hr<HOST_REGS;hr++) {
581 // Evict both parts of a 64-bit register
582 if((cur->regmap[hr]&63)==r) {
583 cur->regmap[hr]=-1;
584 cur->dirty&=~(1<<hr);
585 cur->isconst&=~(1<<hr);
586 }
587 }
588 cur->regmap[preferred_reg]=reg|64;
589 return;
590 }
591 for(r=1;r<=MAXREG;r++)
592 {
593 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
594 for(hr=0;hr<HOST_REGS;hr++) {
595 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
596 if(cur->regmap[hr]==r+64) {
597 cur->regmap[hr]=reg|64;
598 cur->dirty&=~(1<<hr);
599 cur->isconst&=~(1<<hr);
600 return;
601 }
602 }
603 }
604 for(hr=0;hr<HOST_REGS;hr++) {
605 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
606 if(cur->regmap[hr]==r) {
607 cur->regmap[hr]=reg|64;
608 cur->dirty&=~(1<<hr);
609 cur->isconst&=~(1<<hr);
610 return;
611 }
612 }
613 }
614 }
615 }
616 }
617 }
618 for(j=10;j>=0;j--)
619 {
620 for(r=1;r<=MAXREG;r++)
621 {
622 if(hsn[r]==j) {
623 for(hr=0;hr<HOST_REGS;hr++) {
624 if(cur->regmap[hr]==r+64) {
625 cur->regmap[hr]=reg|64;
626 cur->dirty&=~(1<<hr);
627 cur->isconst&=~(1<<hr);
628 return;
629 }
630 }
631 for(hr=0;hr<HOST_REGS;hr++) {
632 if(cur->regmap[hr]==r) {
633 cur->regmap[hr]=reg|64;
634 cur->dirty&=~(1<<hr);
635 cur->isconst&=~(1<<hr);
636 return;
637 }
638 }
639 }
640 }
641 }
c43b5311 642 SysPrintf("This shouldn't happen");exit(1);
57871462 643}
644
645// Allocate a temporary register. This is done without regard to
646// dirty status or whether the register we request is on the unneeded list
647// Note: This will only allocate one register, even if called multiple times
648void alloc_reg_temp(struct regstat *cur,int i,signed char reg)
649{
650 int r,hr;
651 int preferred_reg = -1;
652
653 // see if it's already allocated
654 for(hr=0;hr<HOST_REGS;hr++)
655 {
656 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==reg) return;
657 }
658
659 // Try to allocate any available register
660 for(hr=HOST_REGS-1;hr>=0;hr--) {
661 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
662 cur->regmap[hr]=reg;
663 cur->dirty&=~(1<<hr);
664 cur->isconst&=~(1<<hr);
665 return;
666 }
667 }
668
669 // Find an unneeded register
670 for(hr=HOST_REGS-1;hr>=0;hr--)
671 {
672 r=cur->regmap[hr];
673 if(r>=0) {
674 if(r<64) {
675 if((cur->u>>r)&1) {
676 if(i==0||((unneeded_reg[i-1]>>r)&1)) {
677 cur->regmap[hr]=reg;
678 cur->dirty&=~(1<<hr);
679 cur->isconst&=~(1<<hr);
680 return;
681 }
682 }
683 }
684 else
685 {
686 if((cur->uu>>(r&63))&1) {
687 if(i==0||((unneeded_reg_upper[i-1]>>(r&63))&1)) {
688 cur->regmap[hr]=reg;
689 cur->dirty&=~(1<<hr);
690 cur->isconst&=~(1<<hr);
691 return;
692 }
693 }
694 }
695 }
696 }
697
698 // Ok, now we have to evict someone
699 // Pick a register we hopefully won't need soon
700 // TODO: we might want to follow unconditional jumps here
701 // TODO: get rid of dupe code and make this into a function
702 u_char hsn[MAXREG+1];
703 memset(hsn,10,sizeof(hsn));
704 int j;
705 lsn(hsn,i,&preferred_reg);
706 //printf("hsn: %d %d %d %d %d %d %d\n",hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
707 if(i>0) {
708 // Don't evict the cycle count at entry points, otherwise the entry
709 // stub will have to write it.
710 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
711 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
712 for(j=10;j>=3;j--)
713 {
714 for(r=1;r<=MAXREG;r++)
715 {
716 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
717 for(hr=0;hr<HOST_REGS;hr++) {
718 if(hr!=HOST_CCREG||hsn[CCREG]>2) {
719 if(cur->regmap[hr]==r+64) {
720 cur->regmap[hr]=reg;
721 cur->dirty&=~(1<<hr);
722 cur->isconst&=~(1<<hr);
723 return;
724 }
725 }
726 }
727 for(hr=0;hr<HOST_REGS;hr++) {
728 if(hr!=HOST_CCREG||hsn[CCREG]>2) {
729 if(cur->regmap[hr]==r) {
730 cur->regmap[hr]=reg;
731 cur->dirty&=~(1<<hr);
732 cur->isconst&=~(1<<hr);
733 return;
734 }
735 }
736 }
737 }
738 }
739 }
740 }
741 for(j=10;j>=0;j--)
742 {
743 for(r=1;r<=MAXREG;r++)
744 {
745 if(hsn[r]==j) {
746 for(hr=0;hr<HOST_REGS;hr++) {
747 if(cur->regmap[hr]==r+64) {
748 cur->regmap[hr]=reg;
749 cur->dirty&=~(1<<hr);
750 cur->isconst&=~(1<<hr);
751 return;
752 }
753 }
754 for(hr=0;hr<HOST_REGS;hr++) {
755 if(cur->regmap[hr]==r) {
756 cur->regmap[hr]=reg;
757 cur->dirty&=~(1<<hr);
758 cur->isconst&=~(1<<hr);
759 return;
760 }
761 }
762 }
763 }
764 }
c43b5311 765 SysPrintf("This shouldn't happen");exit(1);
57871462 766}
767// Allocate a specific ARM register.
768void alloc_arm_reg(struct regstat *cur,int i,signed char reg,char hr)
769{
770 int n;
f776eb14 771 int dirty=0;
57871462 772
773 // see if it's already allocated (and dealloc it)
774 for(n=0;n<HOST_REGS;n++)
775 {
f776eb14 776 if(n!=EXCLUDE_REG&&cur->regmap[n]==reg) {
777 dirty=(cur->dirty>>n)&1;
778 cur->regmap[n]=-1;
779 }
57871462 780 }
781
782 cur->regmap[hr]=reg;
783 cur->dirty&=~(1<<hr);
f776eb14 784 cur->dirty|=dirty<<hr;
57871462 785 cur->isconst&=~(1<<hr);
786}
787
788// Alloc cycle count into dedicated register
789alloc_cc(struct regstat *cur,int i)
790{
791 alloc_arm_reg(cur,i,CCREG,HOST_CCREG);
792}
793
794/* Special alloc */
795
796
797/* Assembler */
798
799char regname[16][4] = {
800 "r0",
801 "r1",
802 "r2",
803 "r3",
804 "r4",
805 "r5",
806 "r6",
807 "r7",
808 "r8",
809 "r9",
810 "r10",
811 "fp",
812 "r12",
813 "sp",
814 "lr",
815 "pc"};
816
817void output_byte(u_char byte)
818{
819 *(out++)=byte;
820}
821void output_modrm(u_char mod,u_char rm,u_char ext)
822{
823 assert(mod<4);
824 assert(rm<8);
825 assert(ext<8);
826 u_char byte=(mod<<6)|(ext<<3)|rm;
827 *(out++)=byte;
828}
829void output_sib(u_char scale,u_char index,u_char base)
830{
831 assert(scale<4);
832 assert(index<8);
833 assert(base<8);
834 u_char byte=(scale<<6)|(index<<3)|base;
835 *(out++)=byte;
836}
837void output_w32(u_int word)
838{
839 *((u_int *)out)=word;
840 out+=4;
841}
842u_int rd_rn_rm(u_int rd, u_int rn, u_int rm)
843{
844 assert(rd<16);
845 assert(rn<16);
846 assert(rm<16);
847 return((rn<<16)|(rd<<12)|rm);
848}
849u_int rd_rn_imm_shift(u_int rd, u_int rn, u_int imm, u_int shift)
850{
851 assert(rd<16);
852 assert(rn<16);
853 assert(imm<256);
854 assert((shift&1)==0);
855 return((rn<<16)|(rd<<12)|(((32-shift)&30)<<7)|imm);
856}
857u_int genimm(u_int imm,u_int *encoded)
858{
c2e3bd42 859 *encoded=0;
860 if(imm==0) return 1;
57871462 861 int i=32;
862 while(i>0)
863 {
864 if(imm<256) {
865 *encoded=((i&30)<<7)|imm;
866 return 1;
867 }
868 imm=(imm>>2)|(imm<<30);i-=2;
869 }
870 return 0;
871}
cfbd3c6e 872void genimm_checked(u_int imm,u_int *encoded)
873{
874 u_int ret=genimm(imm,encoded);
875 assert(ret);
876}
57871462 877u_int genjmp(u_int addr)
878{
879 int offset=addr-(int)out-8;
e80343e2 880 if(offset<-33554432||offset>=33554432) {
881 if (addr>2) {
c43b5311 882 SysPrintf("genjmp: out of range: %08x\n", offset);
e80343e2 883 exit(1);
884 }
885 return 0;
886 }
57871462 887 return ((u_int)offset>>2)&0xffffff;
888}
889
890void emit_mov(int rs,int rt)
891{
892 assem_debug("mov %s,%s\n",regname[rt],regname[rs]);
893 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs));
894}
895
896void emit_movs(int rs,int rt)
897{
898 assem_debug("movs %s,%s\n",regname[rt],regname[rs]);
899 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs));
900}
901
902void emit_add(int rs1,int rs2,int rt)
903{
904 assem_debug("add %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
905 output_w32(0xe0800000|rd_rn_rm(rt,rs1,rs2));
906}
907
908void emit_adds(int rs1,int rs2,int rt)
909{
910 assem_debug("adds %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
911 output_w32(0xe0900000|rd_rn_rm(rt,rs1,rs2));
912}
913
914void emit_adcs(int rs1,int rs2,int rt)
915{
916 assem_debug("adcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
917 output_w32(0xe0b00000|rd_rn_rm(rt,rs1,rs2));
918}
919
920void emit_sbc(int rs1,int rs2,int rt)
921{
922 assem_debug("sbc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
923 output_w32(0xe0c00000|rd_rn_rm(rt,rs1,rs2));
924}
925
926void emit_sbcs(int rs1,int rs2,int rt)
927{
928 assem_debug("sbcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
929 output_w32(0xe0d00000|rd_rn_rm(rt,rs1,rs2));
930}
931
932void emit_neg(int rs, int rt)
933{
934 assem_debug("rsb %s,%s,#0\n",regname[rt],regname[rs]);
935 output_w32(0xe2600000|rd_rn_rm(rt,rs,0));
936}
937
938void emit_negs(int rs, int rt)
939{
940 assem_debug("rsbs %s,%s,#0\n",regname[rt],regname[rs]);
941 output_w32(0xe2700000|rd_rn_rm(rt,rs,0));
942}
943
944void emit_sub(int rs1,int rs2,int rt)
945{
946 assem_debug("sub %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
947 output_w32(0xe0400000|rd_rn_rm(rt,rs1,rs2));
948}
949
950void emit_subs(int rs1,int rs2,int rt)
951{
952 assem_debug("subs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
953 output_w32(0xe0500000|rd_rn_rm(rt,rs1,rs2));
954}
955
956void emit_zeroreg(int rt)
957{
958 assem_debug("mov %s,#0\n",regname[rt]);
959 output_w32(0xe3a00000|rd_rn_rm(rt,0,0));
960}
961
790ee18e 962void emit_loadlp(u_int imm,u_int rt)
963{
964 add_literal((int)out,imm);
965 assem_debug("ldr %s,pc+? [=%x]\n",regname[rt],imm);
966 output_w32(0xe5900000|rd_rn_rm(rt,15,0));
967}
968void emit_movw(u_int imm,u_int rt)
969{
970 assert(imm<65536);
971 assem_debug("movw %s,#%d (0x%x)\n",regname[rt],imm,imm);
972 output_w32(0xe3000000|rd_rn_rm(rt,0,0)|(imm&0xfff)|((imm<<4)&0xf0000));
973}
974void emit_movt(u_int imm,u_int rt)
975{
976 assem_debug("movt %s,#%d (0x%x)\n",regname[rt],imm&0xffff0000,imm&0xffff0000);
977 output_w32(0xe3400000|rd_rn_rm(rt,0,0)|((imm>>16)&0xfff)|((imm>>12)&0xf0000));
978}
979void emit_movimm(u_int imm,u_int rt)
980{
981 u_int armval;
982 if(genimm(imm,&armval)) {
983 assem_debug("mov %s,#%d\n",regname[rt],imm);
984 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
985 }else if(genimm(~imm,&armval)) {
986 assem_debug("mvn %s,#%d\n",regname[rt],imm);
987 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
988 }else if(imm<65536) {
665f33e1 989 #ifndef HAVE_ARMV7
790ee18e 990 assem_debug("mov %s,#%d\n",regname[rt],imm&0xFF00);
991 output_w32(0xe3a00000|rd_rn_imm_shift(rt,0,imm>>8,8));
992 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
993 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
994 #else
995 emit_movw(imm,rt);
996 #endif
997 }else{
665f33e1 998 #ifndef HAVE_ARMV7
790ee18e 999 emit_loadlp(imm,rt);
1000 #else
1001 emit_movw(imm&0x0000FFFF,rt);
1002 emit_movt(imm&0xFFFF0000,rt);
1003 #endif
1004 }
1005}
1006void emit_pcreladdr(u_int rt)
1007{
1008 assem_debug("add %s,pc,#?\n",regname[rt]);
1009 output_w32(0xe2800000|rd_rn_rm(rt,15,0));
1010}
1011
57871462 1012void emit_loadreg(int r, int hr)
1013{
3d624f89 1014 if(r&64) {
c43b5311 1015 SysPrintf("64bit load in 32bit mode!\n");
7f2607ea 1016 assert(0);
1017 return;
3d624f89 1018 }
57871462 1019 if((r&63)==0)
1020 emit_zeroreg(hr);
1021 else {
3d624f89 1022 int addr=((int)reg)+((r&63)<<REG_SHIFT)+((r&64)>>4);
57871462 1023 if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4);
1024 if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4);
1025 if(r==CCREG) addr=(int)&cycle_count;
1026 if(r==CSREG) addr=(int)&Status;
1027 if(r==FSREG) addr=(int)&FCR31;
1028 if(r==INVCP) addr=(int)&invc_ptr;
1029 u_int offset = addr-(u_int)&dynarec_local;
1030 assert(offset<4096);
1031 assem_debug("ldr %s,fp+%d\n",regname[hr],offset);
1032 output_w32(0xe5900000|rd_rn_rm(hr,FP,0)|offset);
1033 }
1034}
1035void emit_storereg(int r, int hr)
1036{
3d624f89 1037 if(r&64) {
c43b5311 1038 SysPrintf("64bit store in 32bit mode!\n");
7f2607ea 1039 assert(0);
1040 return;
3d624f89 1041 }
3d624f89 1042 int addr=((int)reg)+((r&63)<<REG_SHIFT)+((r&64)>>4);
57871462 1043 if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4);
1044 if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4);
1045 if(r==CCREG) addr=(int)&cycle_count;
1046 if(r==FSREG) addr=(int)&FCR31;
1047 u_int offset = addr-(u_int)&dynarec_local;
1048 assert(offset<4096);
1049 assem_debug("str %s,fp+%d\n",regname[hr],offset);
1050 output_w32(0xe5800000|rd_rn_rm(hr,FP,0)|offset);
1051}
1052
1053void emit_test(int rs, int rt)
1054{
1055 assem_debug("tst %s,%s\n",regname[rs],regname[rt]);
1056 output_w32(0xe1100000|rd_rn_rm(0,rs,rt));
1057}
1058
1059void emit_testimm(int rs,int imm)
1060{
1061 u_int armval;
5a05d80c 1062 assem_debug("tst %s,#%d\n",regname[rs],imm);
cfbd3c6e 1063 genimm_checked(imm,&armval);
57871462 1064 output_w32(0xe3100000|rd_rn_rm(0,rs,0)|armval);
1065}
1066
b9b61529 1067void emit_testeqimm(int rs,int imm)
1068{
1069 u_int armval;
1070 assem_debug("tsteq %s,$%d\n",regname[rs],imm);
cfbd3c6e 1071 genimm_checked(imm,&armval);
b9b61529 1072 output_w32(0x03100000|rd_rn_rm(0,rs,0)|armval);
1073}
1074
57871462 1075void emit_not(int rs,int rt)
1076{
1077 assem_debug("mvn %s,%s\n",regname[rt],regname[rs]);
1078 output_w32(0xe1e00000|rd_rn_rm(rt,0,rs));
1079}
1080
b9b61529 1081void emit_mvnmi(int rs,int rt)
1082{
1083 assem_debug("mvnmi %s,%s\n",regname[rt],regname[rs]);
1084 output_w32(0x41e00000|rd_rn_rm(rt,0,rs));
1085}
1086
57871462 1087void emit_and(u_int rs1,u_int rs2,u_int rt)
1088{
1089 assem_debug("and %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1090 output_w32(0xe0000000|rd_rn_rm(rt,rs1,rs2));
1091}
1092
1093void emit_or(u_int rs1,u_int rs2,u_int rt)
1094{
1095 assem_debug("orr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1096 output_w32(0xe1800000|rd_rn_rm(rt,rs1,rs2));
1097}
1098void emit_or_and_set_flags(int rs1,int rs2,int rt)
1099{
1100 assem_debug("orrs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1101 output_w32(0xe1900000|rd_rn_rm(rt,rs1,rs2));
1102}
1103
f70d384d 1104void emit_orrshl_imm(u_int rs,u_int imm,u_int rt)
1105{
1106 assert(rs<16);
1107 assert(rt<16);
1108 assert(imm<32);
1109 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs],imm);
1110 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|(imm<<7));
1111}
1112
576bbd8f 1113void emit_orrshr_imm(u_int rs,u_int imm,u_int rt)
1114{
1115 assert(rs<16);
1116 assert(rt<16);
1117 assert(imm<32);
1118 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs],imm);
1119 output_w32(0xe1800020|rd_rn_rm(rt,rt,rs)|(imm<<7));
1120}
1121
57871462 1122void emit_xor(u_int rs1,u_int rs2,u_int rt)
1123{
1124 assem_debug("eor %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1125 output_w32(0xe0200000|rd_rn_rm(rt,rs1,rs2));
1126}
1127
57871462 1128void emit_addimm(u_int rs,int imm,u_int rt)
1129{
1130 assert(rs<16);
1131 assert(rt<16);
1132 if(imm!=0) {
57871462 1133 u_int armval;
1134 if(genimm(imm,&armval)) {
1135 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],imm);
1136 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
1137 }else if(genimm(-imm,&armval)) {
8a0a8423 1138 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],-imm);
57871462 1139 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
397614d0 1140 #ifdef HAVE_ARMV7
1141 }else if(rt!=rs&&(u_int)imm<65536) {
1142 emit_movw(imm&0x0000ffff,rt);
1143 emit_add(rs,rt,rt);
1144 }else if(rt!=rs&&(u_int)-imm<65536) {
1145 emit_movw(-imm&0x0000ffff,rt);
1146 emit_sub(rs,rt,rt);
1147 #endif
1148 }else if((u_int)-imm<65536) {
57871462 1149 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],(-imm)&0xFF00);
1150 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
1151 output_w32(0xe2400000|rd_rn_imm_shift(rt,rs,(-imm)>>8,8));
1152 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
397614d0 1153 }else {
1154 do {
1155 int shift = (ffs(imm) - 1) & ~1;
1156 int imm8 = imm & (0xff << shift);
1157 genimm_checked(imm8,&armval);
1158 assem_debug("add %s,%s,#0x%x\n",regname[rt],regname[rs],imm8);
1159 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
1160 rs = rt;
1161 imm &= ~imm8;
1162 }
1163 while (imm != 0);
57871462 1164 }
1165 }
1166 else if(rs!=rt) emit_mov(rs,rt);
1167}
1168
1169void emit_addimm_and_set_flags(int imm,int rt)
1170{
1171 assert(imm>-65536&&imm<65536);
1172 u_int armval;
1173 if(genimm(imm,&armval)) {
1174 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm);
1175 output_w32(0xe2900000|rd_rn_rm(rt,rt,0)|armval);
1176 }else if(genimm(-imm,&armval)) {
1177 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],imm);
1178 output_w32(0xe2500000|rd_rn_rm(rt,rt,0)|armval);
1179 }else if(imm<0) {
1180 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF00);
1181 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
1182 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)>>8,8));
1183 output_w32(0xe2500000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
1184 }else{
1185 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF00);
1186 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
1187 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm>>8,8));
1188 output_w32(0xe2900000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1189 }
1190}
1191void emit_addimm_no_flags(u_int imm,u_int rt)
1192{
1193 emit_addimm(rt,imm,rt);
1194}
1195
1196void emit_addnop(u_int r)
1197{
1198 assert(r<16);
1199 assem_debug("add %s,%s,#0 (nop)\n",regname[r],regname[r]);
1200 output_w32(0xe2800000|rd_rn_rm(r,r,0));
1201}
1202
1203void emit_adcimm(u_int rs,int imm,u_int rt)
1204{
1205 u_int armval;
cfbd3c6e 1206 genimm_checked(imm,&armval);
57871462 1207 assem_debug("adc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1208 output_w32(0xe2a00000|rd_rn_rm(rt,rs,0)|armval);
1209}
1210/*void emit_sbcimm(int imm,u_int rt)
1211{
1212 u_int armval;
cfbd3c6e 1213 genimm_checked(imm,&armval);
57871462 1214 assem_debug("sbc %s,%s,#%d\n",regname[rt],regname[rt],imm);
1215 output_w32(0xe2c00000|rd_rn_rm(rt,rt,0)|armval);
1216}*/
1217void emit_sbbimm(int imm,u_int rt)
1218{
1219 assem_debug("sbb $%d,%%%s\n",imm,regname[rt]);
1220 assert(rt<8);
1221 if(imm<128&&imm>=-128) {
1222 output_byte(0x83);
1223 output_modrm(3,rt,3);
1224 output_byte(imm);
1225 }
1226 else
1227 {
1228 output_byte(0x81);
1229 output_modrm(3,rt,3);
1230 output_w32(imm);
1231 }
1232}
1233void emit_rscimm(int rs,int imm,u_int rt)
1234{
1235 assert(0);
1236 u_int armval;
cfbd3c6e 1237 genimm_checked(imm,&armval);
57871462 1238 assem_debug("rsc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1239 output_w32(0xe2e00000|rd_rn_rm(rt,rs,0)|armval);
1240}
1241
1242void emit_addimm64_32(int rsh,int rsl,int imm,int rth,int rtl)
1243{
1244 // TODO: if(genimm(imm,&armval)) ...
1245 // else
1246 emit_movimm(imm,HOST_TEMPREG);
1247 emit_adds(HOST_TEMPREG,rsl,rtl);
1248 emit_adcimm(rsh,0,rth);
1249}
1250
1251void emit_sbb(int rs1,int rs2)
1252{
1253 assem_debug("sbb %%%s,%%%s\n",regname[rs2],regname[rs1]);
1254 output_byte(0x19);
1255 output_modrm(3,rs1,rs2);
1256}
1257
1258void emit_andimm(int rs,int imm,int rt)
1259{
1260 u_int armval;
790ee18e 1261 if(imm==0) {
1262 emit_zeroreg(rt);
1263 }else if(genimm(imm,&armval)) {
57871462 1264 assem_debug("and %s,%s,#%d\n",regname[rt],regname[rs],imm);
1265 output_w32(0xe2000000|rd_rn_rm(rt,rs,0)|armval);
1266 }else if(genimm(~imm,&armval)) {
1267 assem_debug("bic %s,%s,#%d\n",regname[rt],regname[rs],imm);
1268 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|armval);
1269 }else if(imm==65535) {
332a4533 1270 #ifndef HAVE_ARMV6
57871462 1271 assem_debug("bic %s,%s,#FF000000\n",regname[rt],regname[rs]);
1272 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|0x4FF);
1273 assem_debug("bic %s,%s,#00FF0000\n",regname[rt],regname[rt]);
1274 output_w32(0xe3c00000|rd_rn_rm(rt,rt,0)|0x8FF);
1275 #else
1276 assem_debug("uxth %s,%s\n",regname[rt],regname[rs]);
1277 output_w32(0xe6ff0070|rd_rn_rm(rt,0,rs));
1278 #endif
1279 }else{
1280 assert(imm>0&&imm<65535);
665f33e1 1281 #ifndef HAVE_ARMV7
57871462 1282 assem_debug("mov r14,#%d\n",imm&0xFF00);
1283 output_w32(0xe3a00000|rd_rn_imm_shift(HOST_TEMPREG,0,imm>>8,8));
1284 assem_debug("add r14,r14,#%d\n",imm&0xFF);
1285 output_w32(0xe2800000|rd_rn_imm_shift(HOST_TEMPREG,HOST_TEMPREG,imm&0xff,0));
1286 #else
1287 emit_movw(imm,HOST_TEMPREG);
1288 #endif
1289 assem_debug("and %s,%s,r14\n",regname[rt],regname[rs]);
1290 output_w32(0xe0000000|rd_rn_rm(rt,rs,HOST_TEMPREG));
1291 }
1292}
1293
1294void emit_orimm(int rs,int imm,int rt)
1295{
1296 u_int armval;
790ee18e 1297 if(imm==0) {
1298 if(rs!=rt) emit_mov(rs,rt);
1299 }else if(genimm(imm,&armval)) {
57871462 1300 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1301 output_w32(0xe3800000|rd_rn_rm(rt,rs,0)|armval);
1302 }else{
1303 assert(imm>0&&imm<65536);
1304 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1305 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
1306 output_w32(0xe3800000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1307 output_w32(0xe3800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1308 }
1309}
1310
1311void emit_xorimm(int rs,int imm,int rt)
1312{
57871462 1313 u_int armval;
790ee18e 1314 if(imm==0) {
1315 if(rs!=rt) emit_mov(rs,rt);
1316 }else if(genimm(imm,&armval)) {
57871462 1317 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm);
1318 output_w32(0xe2200000|rd_rn_rm(rt,rs,0)|armval);
1319 }else{
514ed0d9 1320 assert(imm>0&&imm<65536);
57871462 1321 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1322 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
1323 output_w32(0xe2200000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1324 output_w32(0xe2200000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1325 }
1326}
1327
1328void emit_shlimm(int rs,u_int imm,int rt)
1329{
1330 assert(imm>0);
1331 assert(imm<32);
1332 //if(imm==1) ...
1333 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1334 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1335}
1336
c6c3b1b3 1337void emit_lsls_imm(int rs,int imm,int rt)
1338{
1339 assert(imm>0);
1340 assert(imm<32);
1341 assem_debug("lsls %s,%s,#%d\n",regname[rt],regname[rs],imm);
1342 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1343}
1344
665f33e1 1345void emit_lslpls_imm(int rs,int imm,int rt)
1346{
1347 assert(imm>0);
1348 assert(imm<32);
1349 assem_debug("lslpls %s,%s,#%d\n",regname[rt],regname[rs],imm);
1350 output_w32(0x51b00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1351}
1352
57871462 1353void emit_shrimm(int rs,u_int imm,int rt)
1354{
1355 assert(imm>0);
1356 assert(imm<32);
1357 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1358 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
1359}
1360
1361void emit_sarimm(int rs,u_int imm,int rt)
1362{
1363 assert(imm>0);
1364 assert(imm<32);
1365 assem_debug("asr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1366 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x40|(imm<<7));
1367}
1368
1369void emit_rorimm(int rs,u_int imm,int rt)
1370{
1371 assert(imm>0);
1372 assert(imm<32);
1373 assem_debug("ror %s,%s,#%d\n",regname[rt],regname[rs],imm);
1374 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x60|(imm<<7));
1375}
1376
1377void emit_shldimm(int rs,int rs2,u_int imm,int rt)
1378{
1379 assem_debug("shld %%%s,%%%s,%d\n",regname[rt],regname[rs2],imm);
1380 assert(imm>0);
1381 assert(imm<32);
1382 //if(imm==1) ...
1383 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1384 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1385 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs2],32-imm);
1386 output_w32(0xe1800020|rd_rn_rm(rt,rt,rs2)|((32-imm)<<7));
1387}
1388
1389void emit_shrdimm(int rs,int rs2,u_int imm,int rt)
1390{
1391 assem_debug("shrd %%%s,%%%s,%d\n",regname[rt],regname[rs2],imm);
1392 assert(imm>0);
1393 assert(imm<32);
1394 //if(imm==1) ...
1395 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1396 output_w32(0xe1a00020|rd_rn_rm(rt,0,rs)|(imm<<7));
1397 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs2],32-imm);
1398 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs2)|((32-imm)<<7));
1399}
1400
b9b61529 1401void emit_signextend16(int rs,int rt)
1402{
332a4533 1403 #ifndef HAVE_ARMV6
b9b61529 1404 emit_shlimm(rs,16,rt);
1405 emit_sarimm(rt,16,rt);
1406 #else
1407 assem_debug("sxth %s,%s\n",regname[rt],regname[rs]);
1408 output_w32(0xe6bf0070|rd_rn_rm(rt,0,rs));
1409 #endif
1410}
1411
c6c3b1b3 1412void emit_signextend8(int rs,int rt)
1413{
332a4533 1414 #ifndef HAVE_ARMV6
c6c3b1b3 1415 emit_shlimm(rs,24,rt);
1416 emit_sarimm(rt,24,rt);
1417 #else
1418 assem_debug("sxtb %s,%s\n",regname[rt],regname[rs]);
1419 output_w32(0xe6af0070|rd_rn_rm(rt,0,rs));
1420 #endif
1421}
1422
57871462 1423void emit_shl(u_int rs,u_int shift,u_int rt)
1424{
1425 assert(rs<16);
1426 assert(rt<16);
1427 assert(shift<16);
1428 //if(imm==1) ...
1429 assem_debug("lsl %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1430 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x10|(shift<<8));
1431}
1432void emit_shr(u_int rs,u_int shift,u_int rt)
1433{
1434 assert(rs<16);
1435 assert(rt<16);
1436 assert(shift<16);
1437 assem_debug("lsr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1438 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x30|(shift<<8));
1439}
1440void emit_sar(u_int rs,u_int shift,u_int rt)
1441{
1442 assert(rs<16);
1443 assert(rt<16);
1444 assert(shift<16);
1445 assem_debug("asr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1446 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x50|(shift<<8));
1447}
1448void emit_shlcl(int r)
1449{
1450 assem_debug("shl %%%s,%%cl\n",regname[r]);
1451 assert(0);
1452}
1453void emit_shrcl(int r)
1454{
1455 assem_debug("shr %%%s,%%cl\n",regname[r]);
1456 assert(0);
1457}
1458void emit_sarcl(int r)
1459{
1460 assem_debug("sar %%%s,%%cl\n",regname[r]);
1461 assert(0);
1462}
1463
1464void emit_shldcl(int r1,int r2)
1465{
1466 assem_debug("shld %%%s,%%%s,%%cl\n",regname[r1],regname[r2]);
1467 assert(0);
1468}
1469void emit_shrdcl(int r1,int r2)
1470{
1471 assem_debug("shrd %%%s,%%%s,%%cl\n",regname[r1],regname[r2]);
1472 assert(0);
1473}
1474void emit_orrshl(u_int rs,u_int shift,u_int rt)
1475{
1476 assert(rs<16);
1477 assert(rt<16);
1478 assert(shift<16);
1479 assem_debug("orr %s,%s,%s,lsl %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
1480 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x10|(shift<<8));
1481}
1482void emit_orrshr(u_int rs,u_int shift,u_int rt)
1483{
1484 assert(rs<16);
1485 assert(rt<16);
1486 assert(shift<16);
1487 assem_debug("orr %s,%s,%s,lsr %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
1488 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x30|(shift<<8));
1489}
1490
1491void emit_cmpimm(int rs,int imm)
1492{
1493 u_int armval;
1494 if(genimm(imm,&armval)) {
5a05d80c 1495 assem_debug("cmp %s,#%d\n",regname[rs],imm);
57871462 1496 output_w32(0xe3500000|rd_rn_rm(0,rs,0)|armval);
1497 }else if(genimm(-imm,&armval)) {
5a05d80c 1498 assem_debug("cmn %s,#%d\n",regname[rs],imm);
57871462 1499 output_w32(0xe3700000|rd_rn_rm(0,rs,0)|armval);
1500 }else if(imm>0) {
1501 assert(imm<65536);
57871462 1502 emit_movimm(imm,HOST_TEMPREG);
57871462 1503 assem_debug("cmp %s,r14\n",regname[rs]);
1504 output_w32(0xe1500000|rd_rn_rm(0,rs,HOST_TEMPREG));
1505 }else{
1506 assert(imm>-65536);
57871462 1507 emit_movimm(-imm,HOST_TEMPREG);
57871462 1508 assem_debug("cmn %s,r14\n",regname[rs]);
1509 output_w32(0xe1700000|rd_rn_rm(0,rs,HOST_TEMPREG));
1510 }
1511}
1512
1513void emit_cmovne(u_int *addr,int rt)
1514{
1515 assem_debug("cmovne %x,%%%s",(int)addr,regname[rt]);
1516 assert(0);
1517}
1518void emit_cmovl(u_int *addr,int rt)
1519{
1520 assem_debug("cmovl %x,%%%s",(int)addr,regname[rt]);
1521 assert(0);
1522}
1523void emit_cmovs(u_int *addr,int rt)
1524{
1525 assem_debug("cmovs %x,%%%s",(int)addr,regname[rt]);
1526 assert(0);
1527}
1528void emit_cmovne_imm(int imm,int rt)
1529{
1530 assem_debug("movne %s,#%d\n",regname[rt],imm);
1531 u_int armval;
cfbd3c6e 1532 genimm_checked(imm,&armval);
57871462 1533 output_w32(0x13a00000|rd_rn_rm(rt,0,0)|armval);
1534}
1535void emit_cmovl_imm(int imm,int rt)
1536{
1537 assem_debug("movlt %s,#%d\n",regname[rt],imm);
1538 u_int armval;
cfbd3c6e 1539 genimm_checked(imm,&armval);
57871462 1540 output_w32(0xb3a00000|rd_rn_rm(rt,0,0)|armval);
1541}
1542void emit_cmovb_imm(int imm,int rt)
1543{
1544 assem_debug("movcc %s,#%d\n",regname[rt],imm);
1545 u_int armval;
cfbd3c6e 1546 genimm_checked(imm,&armval);
57871462 1547 output_w32(0x33a00000|rd_rn_rm(rt,0,0)|armval);
1548}
1549void emit_cmovs_imm(int imm,int rt)
1550{
1551 assem_debug("movmi %s,#%d\n",regname[rt],imm);
1552 u_int armval;
cfbd3c6e 1553 genimm_checked(imm,&armval);
57871462 1554 output_w32(0x43a00000|rd_rn_rm(rt,0,0)|armval);
1555}
1556void emit_cmove_reg(int rs,int rt)
1557{
1558 assem_debug("moveq %s,%s\n",regname[rt],regname[rs]);
1559 output_w32(0x01a00000|rd_rn_rm(rt,0,rs));
1560}
1561void emit_cmovne_reg(int rs,int rt)
1562{
1563 assem_debug("movne %s,%s\n",regname[rt],regname[rs]);
1564 output_w32(0x11a00000|rd_rn_rm(rt,0,rs));
1565}
1566void emit_cmovl_reg(int rs,int rt)
1567{
1568 assem_debug("movlt %s,%s\n",regname[rt],regname[rs]);
1569 output_w32(0xb1a00000|rd_rn_rm(rt,0,rs));
1570}
1571void emit_cmovs_reg(int rs,int rt)
1572{
1573 assem_debug("movmi %s,%s\n",regname[rt],regname[rs]);
1574 output_w32(0x41a00000|rd_rn_rm(rt,0,rs));
1575}
1576
1577void emit_slti32(int rs,int imm,int rt)
1578{
1579 if(rs!=rt) emit_zeroreg(rt);
1580 emit_cmpimm(rs,imm);
1581 if(rs==rt) emit_movimm(0,rt);
1582 emit_cmovl_imm(1,rt);
1583}
1584void emit_sltiu32(int rs,int imm,int rt)
1585{
1586 if(rs!=rt) emit_zeroreg(rt);
1587 emit_cmpimm(rs,imm);
1588 if(rs==rt) emit_movimm(0,rt);
1589 emit_cmovb_imm(1,rt);
1590}
1591void emit_slti64_32(int rsh,int rsl,int imm,int rt)
1592{
1593 assert(rsh!=rt);
1594 emit_slti32(rsl,imm,rt);
1595 if(imm>=0)
1596 {
1597 emit_test(rsh,rsh);
1598 emit_cmovne_imm(0,rt);
1599 emit_cmovs_imm(1,rt);
1600 }
1601 else
1602 {
1603 emit_cmpimm(rsh,-1);
1604 emit_cmovne_imm(0,rt);
1605 emit_cmovl_imm(1,rt);
1606 }
1607}
1608void emit_sltiu64_32(int rsh,int rsl,int imm,int rt)
1609{
1610 assert(rsh!=rt);
1611 emit_sltiu32(rsl,imm,rt);
1612 if(imm>=0)
1613 {
1614 emit_test(rsh,rsh);
1615 emit_cmovne_imm(0,rt);
1616 }
1617 else
1618 {
1619 emit_cmpimm(rsh,-1);
1620 emit_cmovne_imm(1,rt);
1621 }
1622}
1623
1624void emit_cmp(int rs,int rt)
1625{
1626 assem_debug("cmp %s,%s\n",regname[rs],regname[rt]);
1627 output_w32(0xe1500000|rd_rn_rm(0,rs,rt));
1628}
1629void emit_set_gz32(int rs, int rt)
1630{
1631 //assem_debug("set_gz32\n");
1632 emit_cmpimm(rs,1);
1633 emit_movimm(1,rt);
1634 emit_cmovl_imm(0,rt);
1635}
1636void emit_set_nz32(int rs, int rt)
1637{
1638 //assem_debug("set_nz32\n");
1639 if(rs!=rt) emit_movs(rs,rt);
1640 else emit_test(rs,rs);
1641 emit_cmovne_imm(1,rt);
1642}
1643void emit_set_gz64_32(int rsh, int rsl, int rt)
1644{
1645 //assem_debug("set_gz64\n");
1646 emit_set_gz32(rsl,rt);
1647 emit_test(rsh,rsh);
1648 emit_cmovne_imm(1,rt);
1649 emit_cmovs_imm(0,rt);
1650}
1651void emit_set_nz64_32(int rsh, int rsl, int rt)
1652{
1653 //assem_debug("set_nz64\n");
1654 emit_or_and_set_flags(rsh,rsl,rt);
1655 emit_cmovne_imm(1,rt);
1656}
1657void emit_set_if_less32(int rs1, int rs2, int rt)
1658{
1659 //assem_debug("set if less (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1660 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1661 emit_cmp(rs1,rs2);
1662 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1663 emit_cmovl_imm(1,rt);
1664}
1665void emit_set_if_carry32(int rs1, int rs2, int rt)
1666{
1667 //assem_debug("set if carry (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1668 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1669 emit_cmp(rs1,rs2);
1670 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1671 emit_cmovb_imm(1,rt);
1672}
1673void emit_set_if_less64_32(int u1, int l1, int u2, int l2, int rt)
1674{
1675 //assem_debug("set if less64 (%%%s,%%%s,%%%s,%%%s),%%%s\n",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]);
1676 assert(u1!=rt);
1677 assert(u2!=rt);
1678 emit_cmp(l1,l2);
1679 emit_movimm(0,rt);
1680 emit_sbcs(u1,u2,HOST_TEMPREG);
1681 emit_cmovl_imm(1,rt);
1682}
1683void emit_set_if_carry64_32(int u1, int l1, int u2, int l2, int rt)
1684{
1685 //assem_debug("set if carry64 (%%%s,%%%s,%%%s,%%%s),%%%s\n",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]);
1686 assert(u1!=rt);
1687 assert(u2!=rt);
1688 emit_cmp(l1,l2);
1689 emit_movimm(0,rt);
1690 emit_sbcs(u1,u2,HOST_TEMPREG);
1691 emit_cmovb_imm(1,rt);
1692}
1693
1694void emit_call(int a)
1695{
1696 assem_debug("bl %x (%x+%x)\n",a,(int)out,a-(int)out-8);
1697 u_int offset=genjmp(a);
1698 output_w32(0xeb000000|offset);
1699}
1700void emit_jmp(int a)
1701{
1702 assem_debug("b %x (%x+%x)\n",a,(int)out,a-(int)out-8);
1703 u_int offset=genjmp(a);
1704 output_w32(0xea000000|offset);
1705}
1706void emit_jne(int a)
1707{
1708 assem_debug("bne %x\n",a);
1709 u_int offset=genjmp(a);
1710 output_w32(0x1a000000|offset);
1711}
1712void emit_jeq(int a)
1713{
1714 assem_debug("beq %x\n",a);
1715 u_int offset=genjmp(a);
1716 output_w32(0x0a000000|offset);
1717}
1718void emit_js(int a)
1719{
1720 assem_debug("bmi %x\n",a);
1721 u_int offset=genjmp(a);
1722 output_w32(0x4a000000|offset);
1723}
1724void emit_jns(int a)
1725{
1726 assem_debug("bpl %x\n",a);
1727 u_int offset=genjmp(a);
1728 output_w32(0x5a000000|offset);
1729}
1730void emit_jl(int a)
1731{
1732 assem_debug("blt %x\n",a);
1733 u_int offset=genjmp(a);
1734 output_w32(0xba000000|offset);
1735}
1736void emit_jge(int a)
1737{
1738 assem_debug("bge %x\n",a);
1739 u_int offset=genjmp(a);
1740 output_w32(0xaa000000|offset);
1741}
1742void emit_jno(int a)
1743{
1744 assem_debug("bvc %x\n",a);
1745 u_int offset=genjmp(a);
1746 output_w32(0x7a000000|offset);
1747}
1748void emit_jc(int a)
1749{
1750 assem_debug("bcs %x\n",a);
1751 u_int offset=genjmp(a);
1752 output_w32(0x2a000000|offset);
1753}
1754void emit_jcc(int a)
1755{
1756 assem_debug("bcc %x\n",a);
1757 u_int offset=genjmp(a);
1758 output_w32(0x3a000000|offset);
1759}
1760
1761void emit_pushimm(int imm)
1762{
1763 assem_debug("push $%x\n",imm);
1764 assert(0);
1765}
1766void emit_pusha()
1767{
1768 assem_debug("pusha\n");
1769 assert(0);
1770}
1771void emit_popa()
1772{
1773 assem_debug("popa\n");
1774 assert(0);
1775}
1776void emit_pushreg(u_int r)
1777{
1778 assem_debug("push %%%s\n",regname[r]);
1779 assert(0);
1780}
1781void emit_popreg(u_int r)
1782{
1783 assem_debug("pop %%%s\n",regname[r]);
1784 assert(0);
1785}
1786void emit_callreg(u_int r)
1787{
c6c3b1b3 1788 assert(r<15);
1789 assem_debug("blx %s\n",regname[r]);
1790 output_w32(0xe12fff30|r);
57871462 1791}
1792void emit_jmpreg(u_int r)
1793{
1794 assem_debug("mov pc,%s\n",regname[r]);
1795 output_w32(0xe1a00000|rd_rn_rm(15,0,r));
1796}
1797
1798void emit_readword_indexed(int offset, int rs, int rt)
1799{
1800 assert(offset>-4096&&offset<4096);
1801 assem_debug("ldr %s,%s+%d\n",regname[rt],regname[rs],offset);
1802 if(offset>=0) {
1803 output_w32(0xe5900000|rd_rn_rm(rt,rs,0)|offset);
1804 }else{
1805 output_w32(0xe5100000|rd_rn_rm(rt,rs,0)|(-offset));
1806 }
1807}
1808void emit_readword_dualindexedx4(int rs1, int rs2, int rt)
1809{
1810 assem_debug("ldr %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1811 output_w32(0xe7900000|rd_rn_rm(rt,rs1,rs2)|0x100);
1812}
c6c3b1b3 1813void emit_ldrcc_dualindexed(int rs1, int rs2, int rt)
1814{
1815 assem_debug("ldrcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1816 output_w32(0x37900000|rd_rn_rm(rt,rs1,rs2));
1817}
1818void emit_ldrccb_dualindexed(int rs1, int rs2, int rt)
1819{
1820 assem_debug("ldrccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1821 output_w32(0x37d00000|rd_rn_rm(rt,rs1,rs2));
1822}
1823void emit_ldrccsb_dualindexed(int rs1, int rs2, int rt)
1824{
1825 assem_debug("ldrccsb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1826 output_w32(0x319000d0|rd_rn_rm(rt,rs1,rs2));
1827}
1828void emit_ldrcch_dualindexed(int rs1, int rs2, int rt)
1829{
1830 assem_debug("ldrcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1831 output_w32(0x319000b0|rd_rn_rm(rt,rs1,rs2));
1832}
1833void emit_ldrccsh_dualindexed(int rs1, int rs2, int rt)
1834{
1835 assem_debug("ldrccsh %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1836 output_w32(0x319000f0|rd_rn_rm(rt,rs1,rs2));
1837}
57871462 1838void emit_readword_indexed_tlb(int addr, int rs, int map, int rt)
1839{
1840 if(map<0) emit_readword_indexed(addr, rs, rt);
1841 else {
1842 assert(addr==0);
1843 emit_readword_dualindexedx4(rs, map, rt);
1844 }
1845}
1846void emit_readdword_indexed_tlb(int addr, int rs, int map, int rh, int rl)
1847{
1848 if(map<0) {
1849 if(rh>=0) emit_readword_indexed(addr, rs, rh);
1850 emit_readword_indexed(addr+4, rs, rl);
1851 }else{
1852 assert(rh!=rs);
1853 if(rh>=0) emit_readword_indexed_tlb(addr, rs, map, rh);
1854 emit_addimm(map,1,map);
1855 emit_readword_indexed_tlb(addr, rs, map, rl);
1856 }
1857}
1858void emit_movsbl_indexed(int offset, int rs, int rt)
1859{
1860 assert(offset>-256&&offset<256);
1861 assem_debug("ldrsb %s,%s+%d\n",regname[rt],regname[rs],offset);
1862 if(offset>=0) {
1863 output_w32(0xe1d000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1864 }else{
1865 output_w32(0xe15000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1866 }
1867}
1868void emit_movsbl_indexed_tlb(int addr, int rs, int map, int rt)
1869{
1870 if(map<0) emit_movsbl_indexed(addr, rs, rt);
1871 else {
1872 if(addr==0) {
1873 emit_shlimm(map,2,map);
1874 assem_debug("ldrsb %s,%s+%s\n",regname[rt],regname[rs],regname[map]);
1875 output_w32(0xe19000d0|rd_rn_rm(rt,rs,map));
1876 }else{
1877 assert(addr>-256&&addr<256);
1878 assem_debug("add %s,%s,%s,lsl #2\n",regname[rt],regname[rs],regname[map]);
1879 output_w32(0xe0800000|rd_rn_rm(rt,rs,map)|(2<<7));
1880 emit_movsbl_indexed(addr, rt, rt);
1881 }
1882 }
1883}
1884void emit_movswl_indexed(int offset, int rs, int rt)
1885{
1886 assert(offset>-256&&offset<256);
1887 assem_debug("ldrsh %s,%s+%d\n",regname[rt],regname[rs],offset);
1888 if(offset>=0) {
1889 output_w32(0xe1d000f0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1890 }else{
1891 output_w32(0xe15000f0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1892 }
1893}
1894void emit_movzbl_indexed(int offset, int rs, int rt)
1895{
1896 assert(offset>-4096&&offset<4096);
1897 assem_debug("ldrb %s,%s+%d\n",regname[rt],regname[rs],offset);
1898 if(offset>=0) {
1899 output_w32(0xe5d00000|rd_rn_rm(rt,rs,0)|offset);
1900 }else{
1901 output_w32(0xe5500000|rd_rn_rm(rt,rs,0)|(-offset));
1902 }
1903}
1904void emit_movzbl_dualindexedx4(int rs1, int rs2, int rt)
1905{
1906 assem_debug("ldrb %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1907 output_w32(0xe7d00000|rd_rn_rm(rt,rs1,rs2)|0x100);
1908}
1909void emit_movzbl_indexed_tlb(int addr, int rs, int map, int rt)
1910{
1911 if(map<0) emit_movzbl_indexed(addr, rs, rt);
1912 else {
1913 if(addr==0) {
1914 emit_movzbl_dualindexedx4(rs, map, rt);
1915 }else{
1916 emit_addimm(rs,addr,rt);
1917 emit_movzbl_dualindexedx4(rt, map, rt);
1918 }
1919 }
1920}
1921void emit_movzwl_indexed(int offset, int rs, int rt)
1922{
1923 assert(offset>-256&&offset<256);
1924 assem_debug("ldrh %s,%s+%d\n",regname[rt],regname[rs],offset);
1925 if(offset>=0) {
1926 output_w32(0xe1d000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1927 }else{
1928 output_w32(0xe15000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1929 }
1930}
054175e9 1931static void emit_ldrd(int offset, int rs, int rt)
1932{
1933 assert(offset>-256&&offset<256);
1934 assem_debug("ldrd %s,%s+%d\n",regname[rt],regname[rs],offset);
1935 if(offset>=0) {
1936 output_w32(0xe1c000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1937 }else{
1938 output_w32(0xe14000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1939 }
1940}
57871462 1941void emit_readword(int addr, int rt)
1942{
1943 u_int offset = addr-(u_int)&dynarec_local;
1944 assert(offset<4096);
1945 assem_debug("ldr %s,fp+%d\n",regname[rt],offset);
1946 output_w32(0xe5900000|rd_rn_rm(rt,FP,0)|offset);
1947}
1948void emit_movsbl(int addr, int rt)
1949{
1950 u_int offset = addr-(u_int)&dynarec_local;
1951 assert(offset<256);
1952 assem_debug("ldrsb %s,fp+%d\n",regname[rt],offset);
1953 output_w32(0xe1d000d0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1954}
1955void emit_movswl(int addr, int rt)
1956{
1957 u_int offset = addr-(u_int)&dynarec_local;
1958 assert(offset<256);
1959 assem_debug("ldrsh %s,fp+%d\n",regname[rt],offset);
1960 output_w32(0xe1d000f0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1961}
1962void emit_movzbl(int addr, int rt)
1963{
1964 u_int offset = addr-(u_int)&dynarec_local;
1965 assert(offset<4096);
1966 assem_debug("ldrb %s,fp+%d\n",regname[rt],offset);
1967 output_w32(0xe5d00000|rd_rn_rm(rt,FP,0)|offset);
1968}
1969void emit_movzwl(int addr, int rt)
1970{
1971 u_int offset = addr-(u_int)&dynarec_local;
1972 assert(offset<256);
1973 assem_debug("ldrh %s,fp+%d\n",regname[rt],offset);
1974 output_w32(0xe1d000b0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1975}
1976void emit_movzwl_reg(int rs, int rt)
1977{
1978 assem_debug("movzwl %%%s,%%%s\n",regname[rs]+1,regname[rt]);
1979 assert(0);
1980}
1981
1982void emit_xchg(int rs, int rt)
1983{
1984 assem_debug("xchg %%%s,%%%s\n",regname[rs],regname[rt]);
1985 assert(0);
1986}
1987void emit_writeword_indexed(int rt, int offset, int rs)
1988{
1989 assert(offset>-4096&&offset<4096);
1990 assem_debug("str %s,%s+%d\n",regname[rt],regname[rs],offset);
1991 if(offset>=0) {
1992 output_w32(0xe5800000|rd_rn_rm(rt,rs,0)|offset);
1993 }else{
1994 output_w32(0xe5000000|rd_rn_rm(rt,rs,0)|(-offset));
1995 }
1996}
1997void emit_writeword_dualindexedx4(int rt, int rs1, int rs2)
1998{
1999 assem_debug("str %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
2000 output_w32(0xe7800000|rd_rn_rm(rt,rs1,rs2)|0x100);
2001}
2002void emit_writeword_indexed_tlb(int rt, int addr, int rs, int map, int temp)
2003{
2004 if(map<0) emit_writeword_indexed(rt, addr, rs);
2005 else {
2006 assert(addr==0);
2007 emit_writeword_dualindexedx4(rt, rs, map);
2008 }
2009}
2010void emit_writedword_indexed_tlb(int rh, int rl, int addr, int rs, int map, int temp)
2011{
2012 if(map<0) {
2013 if(rh>=0) emit_writeword_indexed(rh, addr, rs);
2014 emit_writeword_indexed(rl, addr+4, rs);
2015 }else{
2016 assert(rh>=0);
2017 if(temp!=rs) emit_addimm(map,1,temp);
2018 emit_writeword_indexed_tlb(rh, addr, rs, map, temp);
2019 if(temp!=rs) emit_writeword_indexed_tlb(rl, addr, rs, temp, temp);
2020 else {
2021 emit_addimm(rs,4,rs);
2022 emit_writeword_indexed_tlb(rl, addr, rs, map, temp);
2023 }
2024 }
2025}
2026void emit_writehword_indexed(int rt, int offset, int rs)
2027{
2028 assert(offset>-256&&offset<256);
2029 assem_debug("strh %s,%s+%d\n",regname[rt],regname[rs],offset);
2030 if(offset>=0) {
2031 output_w32(0xe1c000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
2032 }else{
2033 output_w32(0xe14000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
2034 }
2035}
2036void emit_writebyte_indexed(int rt, int offset, int rs)
2037{
2038 assert(offset>-4096&&offset<4096);
2039 assem_debug("strb %s,%s+%d\n",regname[rt],regname[rs],offset);
2040 if(offset>=0) {
2041 output_w32(0xe5c00000|rd_rn_rm(rt,rs,0)|offset);
2042 }else{
2043 output_w32(0xe5400000|rd_rn_rm(rt,rs,0)|(-offset));
2044 }
2045}
2046void emit_writebyte_dualindexedx4(int rt, int rs1, int rs2)
2047{
2048 assem_debug("strb %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
2049 output_w32(0xe7c00000|rd_rn_rm(rt,rs1,rs2)|0x100);
2050}
2051void emit_writebyte_indexed_tlb(int rt, int addr, int rs, int map, int temp)
2052{
2053 if(map<0) emit_writebyte_indexed(rt, addr, rs);
2054 else {
2055 if(addr==0) {
2056 emit_writebyte_dualindexedx4(rt, rs, map);
2057 }else{
2058 emit_addimm(rs,addr,temp);
2059 emit_writebyte_dualindexedx4(rt, temp, map);
2060 }
2061 }
2062}
b96d3df7 2063void emit_strcc_dualindexed(int rs1, int rs2, int rt)
2064{
2065 assem_debug("strcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2066 output_w32(0x37800000|rd_rn_rm(rt,rs1,rs2));
2067}
2068void emit_strccb_dualindexed(int rs1, int rs2, int rt)
2069{
2070 assem_debug("strccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2071 output_w32(0x37c00000|rd_rn_rm(rt,rs1,rs2));
2072}
2073void emit_strcch_dualindexed(int rs1, int rs2, int rt)
2074{
2075 assem_debug("strcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2076 output_w32(0x318000b0|rd_rn_rm(rt,rs1,rs2));
2077}
57871462 2078void emit_writeword(int rt, int addr)
2079{
2080 u_int offset = addr-(u_int)&dynarec_local;
2081 assert(offset<4096);
2082 assem_debug("str %s,fp+%d\n",regname[rt],offset);
2083 output_w32(0xe5800000|rd_rn_rm(rt,FP,0)|offset);
2084}
2085void emit_writehword(int rt, int addr)
2086{
2087 u_int offset = addr-(u_int)&dynarec_local;
2088 assert(offset<256);
2089 assem_debug("strh %s,fp+%d\n",regname[rt],offset);
2090 output_w32(0xe1c000b0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
2091}
2092void emit_writebyte(int rt, int addr)
2093{
2094 u_int offset = addr-(u_int)&dynarec_local;
2095 assert(offset<4096);
74426039 2096 assem_debug("strb %s,fp+%d\n",regname[rt],offset);
57871462 2097 output_w32(0xe5c00000|rd_rn_rm(rt,FP,0)|offset);
2098}
2099void emit_writeword_imm(int imm, int addr)
2100{
2101 assem_debug("movl $%x,%x\n",imm,addr);
2102 assert(0);
2103}
2104void emit_writebyte_imm(int imm, int addr)
2105{
2106 assem_debug("movb $%x,%x\n",imm,addr);
2107 assert(0);
2108}
2109
2110void emit_mul(int rs)
2111{
2112 assem_debug("mul %%%s\n",regname[rs]);
2113 assert(0);
2114}
2115void emit_imul(int rs)
2116{
2117 assem_debug("imul %%%s\n",regname[rs]);
2118 assert(0);
2119}
2120void emit_umull(u_int rs1, u_int rs2, u_int hi, u_int lo)
2121{
2122 assem_debug("umull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
2123 assert(rs1<16);
2124 assert(rs2<16);
2125 assert(hi<16);
2126 assert(lo<16);
2127 output_w32(0xe0800090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
2128}
2129void emit_smull(u_int rs1, u_int rs2, u_int hi, u_int lo)
2130{
2131 assem_debug("smull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
2132 assert(rs1<16);
2133 assert(rs2<16);
2134 assert(hi<16);
2135 assert(lo<16);
2136 output_w32(0xe0c00090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
2137}
2138
2139void emit_div(int rs)
2140{
2141 assem_debug("div %%%s\n",regname[rs]);
2142 assert(0);
2143}
2144void emit_idiv(int rs)
2145{
2146 assem_debug("idiv %%%s\n",regname[rs]);
2147 assert(0);
2148}
2149void emit_cdq()
2150{
2151 assem_debug("cdq\n");
2152 assert(0);
2153}
2154
2155void emit_clz(int rs,int rt)
2156{
2157 assem_debug("clz %s,%s\n",regname[rt],regname[rs]);
2158 output_w32(0xe16f0f10|rd_rn_rm(rt,0,rs));
2159}
2160
2161void emit_subcs(int rs1,int rs2,int rt)
2162{
2163 assem_debug("subcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2164 output_w32(0x20400000|rd_rn_rm(rt,rs1,rs2));
2165}
2166
2167void emit_shrcc_imm(int rs,u_int imm,int rt)
2168{
2169 assert(imm>0);
2170 assert(imm<32);
2171 assem_debug("lsrcc %s,%s,#%d\n",regname[rt],regname[rs],imm);
2172 output_w32(0x31a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
2173}
2174
b1be1eee 2175void emit_shrne_imm(int rs,u_int imm,int rt)
2176{
2177 assert(imm>0);
2178 assert(imm<32);
2179 assem_debug("lsrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2180 output_w32(0x11a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
2181}
2182
57871462 2183void emit_negmi(int rs, int rt)
2184{
2185 assem_debug("rsbmi %s,%s,#0\n",regname[rt],regname[rs]);
2186 output_w32(0x42600000|rd_rn_rm(rt,rs,0));
2187}
2188
2189void emit_negsmi(int rs, int rt)
2190{
2191 assem_debug("rsbsmi %s,%s,#0\n",regname[rt],regname[rs]);
2192 output_w32(0x42700000|rd_rn_rm(rt,rs,0));
2193}
2194
2195void emit_orreq(u_int rs1,u_int rs2,u_int rt)
2196{
2197 assem_debug("orreq %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2198 output_w32(0x01800000|rd_rn_rm(rt,rs1,rs2));
2199}
2200
2201void emit_orrne(u_int rs1,u_int rs2,u_int rt)
2202{
2203 assem_debug("orrne %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2204 output_w32(0x11800000|rd_rn_rm(rt,rs1,rs2));
2205}
2206
2207void emit_bic_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2208{
2209 assem_debug("bic %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2210 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2211}
2212
2213void emit_biceq_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2214{
2215 assem_debug("biceq %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2216 output_w32(0x01C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2217}
2218
2219void emit_bicne_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2220{
2221 assem_debug("bicne %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2222 output_w32(0x11C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2223}
2224
2225void emit_bic_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2226{
2227 assem_debug("bic %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2228 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2229}
2230
2231void emit_biceq_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2232{
2233 assem_debug("biceq %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2234 output_w32(0x01C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2235}
2236
2237void emit_bicne_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2238{
2239 assem_debug("bicne %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2240 output_w32(0x11C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2241}
2242
2243void emit_teq(int rs, int rt)
2244{
2245 assem_debug("teq %s,%s\n",regname[rs],regname[rt]);
2246 output_w32(0xe1300000|rd_rn_rm(0,rs,rt));
2247}
2248
2249void emit_rsbimm(int rs, int imm, int rt)
2250{
2251 u_int armval;
cfbd3c6e 2252 genimm_checked(imm,&armval);
57871462 2253 assem_debug("rsb %s,%s,#%d\n",regname[rt],regname[rs],imm);
2254 output_w32(0xe2600000|rd_rn_rm(rt,rs,0)|armval);
2255}
2256
2257// Load 2 immediates optimizing for small code size
2258void emit_mov2imm_compact(int imm1,u_int rt1,int imm2,u_int rt2)
2259{
2260 emit_movimm(imm1,rt1);
2261 u_int armval;
2262 if(genimm(imm2-imm1,&armval)) {
2263 assem_debug("add %s,%s,#%d\n",regname[rt2],regname[rt1],imm2-imm1);
2264 output_w32(0xe2800000|rd_rn_rm(rt2,rt1,0)|armval);
2265 }else if(genimm(imm1-imm2,&armval)) {
2266 assem_debug("sub %s,%s,#%d\n",regname[rt2],regname[rt1],imm1-imm2);
2267 output_w32(0xe2400000|rd_rn_rm(rt2,rt1,0)|armval);
2268 }
2269 else emit_movimm(imm2,rt2);
2270}
2271
2272// Conditionally select one of two immediates, optimizing for small code size
2273// This will only be called if HAVE_CMOV_IMM is defined
2274void emit_cmov2imm_e_ne_compact(int imm1,int imm2,u_int rt)
2275{
2276 u_int armval;
2277 if(genimm(imm2-imm1,&armval)) {
2278 emit_movimm(imm1,rt);
2279 assem_debug("addne %s,%s,#%d\n",regname[rt],regname[rt],imm2-imm1);
2280 output_w32(0x12800000|rd_rn_rm(rt,rt,0)|armval);
2281 }else if(genimm(imm1-imm2,&armval)) {
2282 emit_movimm(imm1,rt);
2283 assem_debug("subne %s,%s,#%d\n",regname[rt],regname[rt],imm1-imm2);
2284 output_w32(0x12400000|rd_rn_rm(rt,rt,0)|armval);
2285 }
2286 else {
665f33e1 2287 #ifndef HAVE_ARMV7
57871462 2288 emit_movimm(imm1,rt);
2289 add_literal((int)out,imm2);
2290 assem_debug("ldrne %s,pc+? [=%x]\n",regname[rt],imm2);
2291 output_w32(0x15900000|rd_rn_rm(rt,15,0));
2292 #else
2293 emit_movw(imm1&0x0000FFFF,rt);
2294 if((imm1&0xFFFF)!=(imm2&0xFFFF)) {
2295 assem_debug("movwne %s,#%d (0x%x)\n",regname[rt],imm2&0xFFFF,imm2&0xFFFF);
2296 output_w32(0x13000000|rd_rn_rm(rt,0,0)|(imm2&0xfff)|((imm2<<4)&0xf0000));
2297 }
2298 emit_movt(imm1&0xFFFF0000,rt);
2299 if((imm1&0xFFFF0000)!=(imm2&0xFFFF0000)) {
2300 assem_debug("movtne %s,#%d (0x%x)\n",regname[rt],imm2&0xffff0000,imm2&0xffff0000);
2301 output_w32(0x13400000|rd_rn_rm(rt,0,0)|((imm2>>16)&0xfff)|((imm2>>12)&0xf0000));
2302 }
2303 #endif
2304 }
2305}
2306
2307// special case for checking invalid_code
2308void emit_cmpmem_indexedsr12_imm(int addr,int r,int imm)
2309{
2310 assert(0);
2311}
2312
2313// special case for checking invalid_code
2314void emit_cmpmem_indexedsr12_reg(int base,int r,int imm)
2315{
2316 assert(imm<128&&imm>=0);
2317 assert(r>=0&&r<16);
2318 assem_debug("ldrb lr,%s,%s lsr #12\n",regname[base],regname[r]);
2319 output_w32(0xe7d00000|rd_rn_rm(HOST_TEMPREG,base,r)|0x620);
2320 emit_cmpimm(HOST_TEMPREG,imm);
2321}
2322
2323// special case for tlb mapping
2324void emit_addsr12(int rs1,int rs2,int rt)
2325{
2326 assem_debug("add %s,%s,%s lsr #12\n",regname[rt],regname[rs1],regname[rs2]);
2327 output_w32(0xe0800620|rd_rn_rm(rt,rs1,rs2));
2328}
2329
0bbd1454 2330void emit_callne(int a)
2331{
2332 assem_debug("blne %x\n",a);
2333 u_int offset=genjmp(a);
2334 output_w32(0x1b000000|offset);
2335}
2336
57871462 2337// Used to preload hash table entries
2338void emit_prefetch(void *addr)
2339{
2340 assem_debug("prefetch %x\n",(int)addr);
2341 output_byte(0x0F);
2342 output_byte(0x18);
2343 output_modrm(0,5,1);
2344 output_w32((int)addr);
2345}
2346void emit_prefetchreg(int r)
2347{
2348 assem_debug("pld %s\n",regname[r]);
2349 output_w32(0xf5d0f000|rd_rn_rm(0,r,0));
2350}
2351
2352// Special case for mini_ht
2353void emit_ldreq_indexed(int rs, u_int offset, int rt)
2354{
2355 assert(offset<4096);
2356 assem_debug("ldreq %s,[%s, #%d]\n",regname[rt],regname[rs],offset);
2357 output_w32(0x05900000|rd_rn_rm(rt,rs,0)|offset);
2358}
2359
2360void emit_flds(int r,int sr)
2361{
2362 assem_debug("flds s%d,[%s]\n",sr,regname[r]);
2363 output_w32(0xed900a00|((sr&14)<<11)|((sr&1)<<22)|(r<<16));
2364}
2365
2366void emit_vldr(int r,int vr)
2367{
2368 assem_debug("vldr d%d,[%s]\n",vr,regname[r]);
2369 output_w32(0xed900b00|(vr<<12)|(r<<16));
2370}
2371
2372void emit_fsts(int sr,int r)
2373{
2374 assem_debug("fsts s%d,[%s]\n",sr,regname[r]);
2375 output_w32(0xed800a00|((sr&14)<<11)|((sr&1)<<22)|(r<<16));
2376}
2377
2378void emit_vstr(int vr,int r)
2379{
2380 assem_debug("vstr d%d,[%s]\n",vr,regname[r]);
2381 output_w32(0xed800b00|(vr<<12)|(r<<16));
2382}
2383
2384void emit_ftosizs(int s,int d)
2385{
2386 assem_debug("ftosizs s%d,s%d\n",d,s);
2387 output_w32(0xeebd0ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2388}
2389
2390void emit_ftosizd(int s,int d)
2391{
2392 assem_debug("ftosizd s%d,d%d\n",d,s);
2393 output_w32(0xeebd0bc0|((d&14)<<11)|((d&1)<<22)|(s&7));
2394}
2395
2396void emit_fsitos(int s,int d)
2397{
2398 assem_debug("fsitos s%d,s%d\n",d,s);
2399 output_w32(0xeeb80ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2400}
2401
2402void emit_fsitod(int s,int d)
2403{
2404 assem_debug("fsitod d%d,s%d\n",d,s);
2405 output_w32(0xeeb80bc0|((d&7)<<12)|((s&14)>>1)|((s&1)<<5));
2406}
2407
2408void emit_fcvtds(int s,int d)
2409{
2410 assem_debug("fcvtds d%d,s%d\n",d,s);
2411 output_w32(0xeeb70ac0|((d&7)<<12)|((s&14)>>1)|((s&1)<<5));
2412}
2413
2414void emit_fcvtsd(int s,int d)
2415{
2416 assem_debug("fcvtsd s%d,d%d\n",d,s);
2417 output_w32(0xeeb70bc0|((d&14)<<11)|((d&1)<<22)|(s&7));
2418}
2419
2420void emit_fsqrts(int s,int d)
2421{
2422 assem_debug("fsqrts d%d,s%d\n",d,s);
2423 output_w32(0xeeb10ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2424}
2425
2426void emit_fsqrtd(int s,int d)
2427{
2428 assem_debug("fsqrtd s%d,d%d\n",d,s);
2429 output_w32(0xeeb10bc0|((d&7)<<12)|(s&7));
2430}
2431
2432void emit_fabss(int s,int d)
2433{
2434 assem_debug("fabss d%d,s%d\n",d,s);
2435 output_w32(0xeeb00ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2436}
2437
2438void emit_fabsd(int s,int d)
2439{
2440 assem_debug("fabsd s%d,d%d\n",d,s);
2441 output_w32(0xeeb00bc0|((d&7)<<12)|(s&7));
2442}
2443
2444void emit_fnegs(int s,int d)
2445{
2446 assem_debug("fnegs d%d,s%d\n",d,s);
2447 output_w32(0xeeb10a40|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2448}
2449
2450void emit_fnegd(int s,int d)
2451{
2452 assem_debug("fnegd s%d,d%d\n",d,s);
2453 output_w32(0xeeb10b40|((d&7)<<12)|(s&7));
2454}
2455
2456void emit_fadds(int s1,int s2,int d)
2457{
2458 assem_debug("fadds s%d,s%d,s%d\n",d,s1,s2);
2459 output_w32(0xee300a00|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2460}
2461
2462void emit_faddd(int s1,int s2,int d)
2463{
2464 assem_debug("faddd d%d,d%d,d%d\n",d,s1,s2);
2465 output_w32(0xee300b00|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2466}
2467
2468void emit_fsubs(int s1,int s2,int d)
2469{
2470 assem_debug("fsubs s%d,s%d,s%d\n",d,s1,s2);
2471 output_w32(0xee300a40|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2472}
2473
2474void emit_fsubd(int s1,int s2,int d)
2475{
2476 assem_debug("fsubd d%d,d%d,d%d\n",d,s1,s2);
2477 output_w32(0xee300b40|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2478}
2479
2480void emit_fmuls(int s1,int s2,int d)
2481{
2482 assem_debug("fmuls s%d,s%d,s%d\n",d,s1,s2);
2483 output_w32(0xee200a00|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2484}
2485
2486void emit_fmuld(int s1,int s2,int d)
2487{
2488 assem_debug("fmuld d%d,d%d,d%d\n",d,s1,s2);
2489 output_w32(0xee200b00|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2490}
2491
2492void emit_fdivs(int s1,int s2,int d)
2493{
2494 assem_debug("fdivs s%d,s%d,s%d\n",d,s1,s2);
2495 output_w32(0xee800a00|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2496}
2497
2498void emit_fdivd(int s1,int s2,int d)
2499{
2500 assem_debug("fdivd d%d,d%d,d%d\n",d,s1,s2);
2501 output_w32(0xee800b00|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2502}
2503
2504void emit_fcmps(int x,int y)
2505{
2506 assem_debug("fcmps s14, s15\n");
2507 output_w32(0xeeb47a67);
2508}
2509
2510void emit_fcmpd(int x,int y)
2511{
2512 assem_debug("fcmpd d6, d7\n");
2513 output_w32(0xeeb46b47);
2514}
2515
2516void emit_fmstat()
2517{
2518 assem_debug("fmstat\n");
2519 output_w32(0xeef1fa10);
2520}
2521
2522void emit_bicne_imm(int rs,int imm,int rt)
2523{
2524 u_int armval;
cfbd3c6e 2525 genimm_checked(imm,&armval);
57871462 2526 assem_debug("bicne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2527 output_w32(0x13c00000|rd_rn_rm(rt,rs,0)|armval);
2528}
2529
2530void emit_biccs_imm(int rs,int imm,int rt)
2531{
2532 u_int armval;
cfbd3c6e 2533 genimm_checked(imm,&armval);
57871462 2534 assem_debug("biccs %s,%s,#%d\n",regname[rt],regname[rs],imm);
2535 output_w32(0x23c00000|rd_rn_rm(rt,rs,0)|armval);
2536}
2537
2538void emit_bicvc_imm(int rs,int imm,int rt)
2539{
2540 u_int armval;
cfbd3c6e 2541 genimm_checked(imm,&armval);
57871462 2542 assem_debug("bicvc %s,%s,#%d\n",regname[rt],regname[rs],imm);
2543 output_w32(0x73c00000|rd_rn_rm(rt,rs,0)|armval);
2544}
2545
2546void emit_bichi_imm(int rs,int imm,int rt)
2547{
2548 u_int armval;
cfbd3c6e 2549 genimm_checked(imm,&armval);
57871462 2550 assem_debug("bichi %s,%s,#%d\n",regname[rt],regname[rs],imm);
2551 output_w32(0x83c00000|rd_rn_rm(rt,rs,0)|armval);
2552}
2553
2554void emit_orrvs_imm(int rs,int imm,int rt)
2555{
2556 u_int armval;
cfbd3c6e 2557 genimm_checked(imm,&armval);
57871462 2558 assem_debug("orrvs %s,%s,#%d\n",regname[rt],regname[rs],imm);
2559 output_w32(0x63800000|rd_rn_rm(rt,rs,0)|armval);
2560}
2561
b9b61529 2562void emit_orrne_imm(int rs,int imm,int rt)
2563{
2564 u_int armval;
cfbd3c6e 2565 genimm_checked(imm,&armval);
b9b61529 2566 assem_debug("orrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2567 output_w32(0x13800000|rd_rn_rm(rt,rs,0)|armval);
2568}
2569
2570void emit_andne_imm(int rs,int imm,int rt)
2571{
2572 u_int armval;
cfbd3c6e 2573 genimm_checked(imm,&armval);
b9b61529 2574 assem_debug("andne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2575 output_w32(0x12000000|rd_rn_rm(rt,rs,0)|armval);
2576}
2577
665f33e1 2578void emit_addpl_imm(int rs,int imm,int rt)
2579{
2580 u_int armval;
2581 genimm_checked(imm,&armval);
2582 assem_debug("addpl %s,%s,#%d\n",regname[rt],regname[rs],imm);
2583 output_w32(0x52800000|rd_rn_rm(rt,rs,0)|armval);
2584}
2585
57871462 2586void emit_jno_unlikely(int a)
2587{
2588 //emit_jno(a);
2589 assem_debug("addvc pc,pc,#? (%x)\n",/*a-(int)out-8,*/a);
2590 output_w32(0x72800000|rd_rn_rm(15,15,0));
2591}
2592
054175e9 2593static void save_regs_all(u_int reglist)
57871462 2594{
054175e9 2595 int i;
57871462 2596 if(!reglist) return;
2597 assem_debug("stmia fp,{");
054175e9 2598 for(i=0;i<16;i++)
2599 if(reglist&(1<<i))
2600 assem_debug("r%d,",i);
57871462 2601 assem_debug("}\n");
2602 output_w32(0xe88b0000|reglist);
2603}
054175e9 2604static void restore_regs_all(u_int reglist)
57871462 2605{
054175e9 2606 int i;
57871462 2607 if(!reglist) return;
2608 assem_debug("ldmia fp,{");
054175e9 2609 for(i=0;i<16;i++)
2610 if(reglist&(1<<i))
2611 assem_debug("r%d,",i);
57871462 2612 assem_debug("}\n");
2613 output_w32(0xe89b0000|reglist);
2614}
054175e9 2615// Save registers before function call
2616static void save_regs(u_int reglist)
2617{
4d646738 2618 reglist&=CALLER_SAVE_REGS; // only save the caller-save registers, r0-r3, r12
054175e9 2619 save_regs_all(reglist);
2620}
2621// Restore registers after function call
2622static void restore_regs(u_int reglist)
2623{
4d646738 2624 reglist&=CALLER_SAVE_REGS;
054175e9 2625 restore_regs_all(reglist);
2626}
57871462 2627
2628// Write back consts using r14 so we don't disturb the other registers
2629void wb_consts(signed char i_regmap[],uint64_t i_is32,u_int i_dirty,int i)
2630{
2631 int hr;
2632 for(hr=0;hr<HOST_REGS;hr++) {
2633 if(hr!=EXCLUDE_REG&&i_regmap[hr]>=0&&((i_dirty>>hr)&1)) {
2634 if(((regs[i].isconst>>hr)&1)&&i_regmap[hr]>0) {
2635 if(i_regmap[hr]<64 || !((i_is32>>(i_regmap[hr]&63))&1) ) {
2636 int value=constmap[i][hr];
2637 if(value==0) {
2638 emit_zeroreg(HOST_TEMPREG);
2639 }
2640 else {
2641 emit_movimm(value,HOST_TEMPREG);
2642 }
2643 emit_storereg(i_regmap[hr],HOST_TEMPREG);
57871462 2644 }
2645 }
2646 }
2647 }
2648}
2649
2650/* Stubs/epilogue */
2651
2652void literal_pool(int n)
2653{
2654 if(!literalcount) return;
2655 if(n) {
2656 if((int)out-literals[0][0]<4096-n) return;
2657 }
2658 u_int *ptr;
2659 int i;
2660 for(i=0;i<literalcount;i++)
2661 {
77750690 2662 u_int l_addr=(u_int)out;
2663 int j;
2664 for(j=0;j<i;j++) {
2665 if(literals[j][1]==literals[i][1]) {
2666 //printf("dup %08x\n",literals[i][1]);
2667 l_addr=literals[j][0];
2668 break;
2669 }
2670 }
57871462 2671 ptr=(u_int *)literals[i][0];
77750690 2672 u_int offset=l_addr-(u_int)ptr-8;
57871462 2673 assert(offset<4096);
2674 assert(!(offset&3));
2675 *ptr|=offset;
77750690 2676 if(l_addr==(u_int)out) {
2677 literals[i][0]=l_addr; // remember for dupes
2678 output_w32(literals[i][1]);
2679 }
57871462 2680 }
2681 literalcount=0;
2682}
2683
2684void literal_pool_jumpover(int n)
2685{
2686 if(!literalcount) return;
2687 if(n) {
2688 if((int)out-literals[0][0]<4096-n) return;
2689 }
2690 int jaddr=(int)out;
2691 emit_jmp(0);
2692 literal_pool(0);
2693 set_jump_target(jaddr,(int)out);
2694}
2695
c67af2ac 2696emit_extjump2(u_int addr, int target, int linker)
57871462 2697{
2698 u_char *ptr=(u_char *)addr;
2699 assert((ptr[3]&0x0e)==0xa);
2700 emit_loadlp(target,0);
2701 emit_loadlp(addr,1);
24385cae 2702 assert(addr>=BASE_ADDR&&addr<(BASE_ADDR+(1<<TARGET_SIZE_2)));
57871462 2703 //assert((target>=0x80000000&&target<0x80800000)||(target>0xA4000000&&target<0xA4001000));
2704//DEBUG >
2705#ifdef DEBUG_CYCLE_COUNT
2706 emit_readword((int)&last_count,ECX);
2707 emit_add(HOST_CCREG,ECX,HOST_CCREG);
2708 emit_readword((int)&next_interupt,ECX);
2709 emit_writeword(HOST_CCREG,(int)&Count);
2710 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
2711 emit_writeword(ECX,(int)&last_count);
2712#endif
2713//DEBUG <
2714 emit_jmp(linker);
2715}
2716
2717emit_extjump(int addr, int target)
2718{
2719 emit_extjump2(addr, target, (int)dyna_linker);
2720}
2721emit_extjump_ds(int addr, int target)
2722{
2723 emit_extjump2(addr, target, (int)dyna_linker_ds);
2724}
2725
13e35c04 2726// put rt_val into rt, potentially making use of rs with value rs_val
2727static void emit_movimm_from(u_int rs_val,int rs,u_int rt_val,int rt)
2728{
8575a877 2729 u_int armval;
2730 int diff;
2731 if(genimm(rt_val,&armval)) {
2732 assem_debug("mov %s,#%d\n",regname[rt],rt_val);
2733 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
2734 return;
2735 }
2736 if(genimm(~rt_val,&armval)) {
2737 assem_debug("mvn %s,#%d\n",regname[rt],rt_val);
2738 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
2739 return;
2740 }
2741 diff=rt_val-rs_val;
2742 if(genimm(diff,&armval)) {
2743 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],diff);
2744 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
2745 return;
2746 }else if(genimm(-diff,&armval)) {
2747 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],-diff);
2748 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
2749 return;
2750 }
2751 emit_movimm(rt_val,rt);
2752}
2753
2754// return 1 if above function can do it's job cheaply
2755static int is_similar_value(u_int v1,u_int v2)
2756{
13e35c04 2757 u_int xs;
8575a877 2758 int diff;
2759 if(v1==v2) return 1;
2760 diff=v2-v1;
2761 for(xs=diff;xs!=0&&(xs&3)==0;xs>>=2)
13e35c04 2762 ;
8575a877 2763 if(xs<0x100) return 1;
2764 for(xs=-diff;xs!=0&&(xs&3)==0;xs>>=2)
2765 ;
2766 if(xs<0x100) return 1;
2767 return 0;
13e35c04 2768}
cbbab9cd 2769
b96d3df7 2770// trashes r2
2771static void pass_args(int a0, int a1)
2772{
2773 if(a0==1&&a1==0) {
2774 // must swap
2775 emit_mov(a0,2); emit_mov(a1,1); emit_mov(2,0);
2776 }
2777 else if(a0!=0&&a1==0) {
2778 emit_mov(a1,1);
2779 if (a0>=0) emit_mov(a0,0);
2780 }
2781 else {
2782 if(a0>=0&&a0!=0) emit_mov(a0,0);
2783 if(a1>=0&&a1!=1) emit_mov(a1,1);
2784 }
2785}
2786
b1be1eee 2787static void mov_loadtype_adj(int type,int rs,int rt)
2788{
2789 switch(type) {
2790 case LOADB_STUB: emit_signextend8(rs,rt); break;
2791 case LOADBU_STUB: emit_andimm(rs,0xff,rt); break;
2792 case LOADH_STUB: emit_signextend16(rs,rt); break;
2793 case LOADHU_STUB: emit_andimm(rs,0xffff,rt); break;
2794 case LOADW_STUB: if(rs!=rt) emit_mov(rs,rt); break;
2795 default: assert(0);
2796 }
2797}
2798
b1be1eee 2799#include "pcsxmem.h"
2800#include "pcsxmem_inline.c"
b1be1eee 2801
57871462 2802do_readstub(int n)
2803{
2804 assem_debug("do_readstub %x\n",start+stubs[n][3]*4);
2805 literal_pool(256);
2806 set_jump_target(stubs[n][1],(int)out);
2807 int type=stubs[n][0];
2808 int i=stubs[n][3];
2809 int rs=stubs[n][4];
2810 struct regstat *i_regs=(struct regstat *)stubs[n][5];
2811 u_int reglist=stubs[n][7];
2812 signed char *i_regmap=i_regs->regmap;
2813 int addr=get_reg(i_regmap,AGEN1+(i&1));
2814 int rth,rt;
2815 int ds;
b9b61529 2816 if(itype[i]==C1LS||itype[i]==C2LS||itype[i]==LOADLR) {
57871462 2817 rth=get_reg(i_regmap,FTEMP|64);
2818 rt=get_reg(i_regmap,FTEMP);
2819 }else{
2820 rth=get_reg(i_regmap,rt1[i]|64);
2821 rt=get_reg(i_regmap,rt1[i]);
2822 }
2823 assert(rs>=0);
c6c3b1b3 2824 int r,temp=-1,temp2=HOST_TEMPREG,regs_saved=0,restore_jump=0;
2825 reglist|=(1<<rs);
2826 for(r=0;r<=12;r++) {
2827 if(((1<<r)&0x13ff)&&((1<<r)&reglist)==0) {
2828 temp=r; break;
2829 }
2830 }
db829eeb 2831 if(rt>=0&&rt1[i]!=0)
c6c3b1b3 2832 reglist&=~(1<<rt);
2833 if(temp==-1) {
2834 save_regs(reglist);
2835 regs_saved=1;
2836 temp=(rs==0)?2:0;
2837 }
2838 if((regs_saved||(reglist&2)==0)&&temp!=1&&rs!=1)
2839 temp2=1;
2840 emit_readword((int)&mem_rtab,temp);
2841 emit_shrimm(rs,12,temp2);
2842 emit_readword_dualindexedx4(temp,temp2,temp2);
2843 emit_lsls_imm(temp2,1,temp2);
2844 if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) {
2845 switch(type) {
2846 case LOADB_STUB: emit_ldrccsb_dualindexed(temp2,rs,rt); break;
2847 case LOADBU_STUB: emit_ldrccb_dualindexed(temp2,rs,rt); break;
2848 case LOADH_STUB: emit_ldrccsh_dualindexed(temp2,rs,rt); break;
2849 case LOADHU_STUB: emit_ldrcch_dualindexed(temp2,rs,rt); break;
2850 case LOADW_STUB: emit_ldrcc_dualindexed(temp2,rs,rt); break;
2851 }
2852 }
2853 if(regs_saved) {
2854 restore_jump=(int)out;
2855 emit_jcc(0); // jump to reg restore
2856 }
2857 else
2858 emit_jcc(stubs[n][2]); // return address
2859
2860 if(!regs_saved)
2861 save_regs(reglist);
2862 int handler=0;
2863 if(type==LOADB_STUB||type==LOADBU_STUB)
2864 handler=(int)jump_handler_read8;
2865 if(type==LOADH_STUB||type==LOADHU_STUB)
2866 handler=(int)jump_handler_read16;
2867 if(type==LOADW_STUB)
2868 handler=(int)jump_handler_read32;
2869 assert(handler!=0);
b96d3df7 2870 pass_args(rs,temp2);
c6c3b1b3 2871 int cc=get_reg(i_regmap,CCREG);
2872 if(cc<0)
2873 emit_loadreg(CCREG,2);
2573466a 2874 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n][6]+1),2);
c6c3b1b3 2875 emit_call(handler);
2876 if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) {
b1be1eee 2877 mov_loadtype_adj(type,0,rt);
c6c3b1b3 2878 }
2879 if(restore_jump)
2880 set_jump_target(restore_jump,(int)out);
2881 restore_regs(reglist);
2882 emit_jmp(stubs[n][2]); // return address
57871462 2883}
2884
c6c3b1b3 2885// return memhandler, or get directly accessable address and return 0
2886u_int get_direct_memhandler(void *table,u_int addr,int type,u_int *addr_host)
2887{
2888 u_int l1,l2=0;
2889 l1=((u_int *)table)[addr>>12];
2890 if((l1&(1<<31))==0) {
2891 u_int v=l1<<1;
2892 *addr_host=v+addr;
2893 return 0;
2894 }
2895 else {
2896 l1<<=1;
2897 if(type==LOADB_STUB||type==LOADBU_STUB||type==STOREB_STUB)
2898 l2=((u_int *)l1)[0x1000/4 + 0x1000/2 + (addr&0xfff)];
b96d3df7 2899 else if(type==LOADH_STUB||type==LOADHU_STUB||type==STOREH_STUB)
c6c3b1b3 2900 l2=((u_int *)l1)[0x1000/4 + (addr&0xfff)/2];
2901 else
2902 l2=((u_int *)l1)[(addr&0xfff)/4];
2903 if((l2&(1<<31))==0) {
2904 u_int v=l2<<1;
2905 *addr_host=v+(addr&0xfff);
2906 return 0;
2907 }
2908 return l2<<1;
2909 }
2910}
c6c3b1b3 2911
57871462 2912inline_readstub(int type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
2913{
2914 int rs=get_reg(regmap,target);
2915 int rth=get_reg(regmap,target|64);
2916 int rt=get_reg(regmap,target);
535d208a 2917 if(rs<0) rs=get_reg(regmap,-1);
57871462 2918 assert(rs>=0);
b1be1eee 2919 u_int handler,host_addr=0,is_dynamic,far_call=0;
2920 int cc=get_reg(regmap,CCREG);
2921 if(pcsx_direct_read(type,addr,CLOCK_ADJUST(adj+1),cc,target?rs:-1,rt))
2922 return;
c6c3b1b3 2923 handler=get_direct_memhandler(mem_rtab,addr,type,&host_addr);
2924 if (handler==0) {
db829eeb 2925 if(rt<0||rt1[i]==0)
c6c3b1b3 2926 return;
13e35c04 2927 if(addr!=host_addr)
2928 emit_movimm_from(addr,rs,host_addr,rs);
c6c3b1b3 2929 switch(type) {
2930 case LOADB_STUB: emit_movsbl_indexed(0,rs,rt); break;
2931 case LOADBU_STUB: emit_movzbl_indexed(0,rs,rt); break;
2932 case LOADH_STUB: emit_movswl_indexed(0,rs,rt); break;
2933 case LOADHU_STUB: emit_movzwl_indexed(0,rs,rt); break;
2934 case LOADW_STUB: emit_readword_indexed(0,rs,rt); break;
2935 default: assert(0);
2936 }
2937 return;
2938 }
b1be1eee 2939 is_dynamic=pcsxmem_is_handler_dynamic(addr);
2940 if(is_dynamic) {
2941 if(type==LOADB_STUB||type==LOADBU_STUB)
2942 handler=(int)jump_handler_read8;
2943 if(type==LOADH_STUB||type==LOADHU_STUB)
2944 handler=(int)jump_handler_read16;
2945 if(type==LOADW_STUB)
2946 handler=(int)jump_handler_read32;
2947 }
c6c3b1b3 2948
2949 // call a memhandler
db829eeb 2950 if(rt>=0&&rt1[i]!=0)
c6c3b1b3 2951 reglist&=~(1<<rt);
2952 save_regs(reglist);
2953 if(target==0)
2954 emit_movimm(addr,0);
2955 else if(rs!=0)
2956 emit_mov(rs,0);
c6c3b1b3 2957 int offset=(int)handler-(int)out-8;
2958 if(offset<-33554432||offset>=33554432) {
2959 // unreachable memhandler, a plugin func perhaps
b1be1eee 2960 emit_movimm(handler,12);
2961 far_call=1;
2962 }
2963 if(cc<0)
2964 emit_loadreg(CCREG,2);
2965 if(is_dynamic) {
2966 emit_movimm(((u_int *)mem_rtab)[addr>>12]<<1,1);
2967 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2);
c6c3b1b3 2968 }
b1be1eee 2969 else {
2970 emit_readword((int)&last_count,3);
2971 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2);
2972 emit_add(2,3,2);
2973 emit_writeword(2,(int)&Count);
2974 }
2975
2976 if(far_call)
2977 emit_callreg(12);
c6c3b1b3 2978 else
2979 emit_call(handler);
b1be1eee 2980
db829eeb 2981 if(rt>=0&&rt1[i]!=0) {
c6c3b1b3 2982 switch(type) {
2983 case LOADB_STUB: emit_signextend8(0,rt); break;
2984 case LOADBU_STUB: emit_andimm(0,0xff,rt); break;
2985 case LOADH_STUB: emit_signextend16(0,rt); break;
2986 case LOADHU_STUB: emit_andimm(0,0xffff,rt); break;
2987 case LOADW_STUB: if(rt!=0) emit_mov(0,rt); break;
2988 default: assert(0);
2989 }
2990 }
2991 restore_regs(reglist);
57871462 2992}
2993
2994do_writestub(int n)
2995{
2996 assem_debug("do_writestub %x\n",start+stubs[n][3]*4);
2997 literal_pool(256);
2998 set_jump_target(stubs[n][1],(int)out);
2999 int type=stubs[n][0];
3000 int i=stubs[n][3];
3001 int rs=stubs[n][4];
3002 struct regstat *i_regs=(struct regstat *)stubs[n][5];
3003 u_int reglist=stubs[n][7];
3004 signed char *i_regmap=i_regs->regmap;
3005 int addr=get_reg(i_regmap,AGEN1+(i&1));
3006 int rth,rt,r;
3007 int ds;
b9b61529 3008 if(itype[i]==C1LS||itype[i]==C2LS) {
57871462 3009 rth=get_reg(i_regmap,FTEMP|64);
3010 rt=get_reg(i_regmap,r=FTEMP);
3011 }else{
3012 rth=get_reg(i_regmap,rs2[i]|64);
3013 rt=get_reg(i_regmap,r=rs2[i]);
3014 }
3015 assert(rs>=0);
3016 assert(rt>=0);
b96d3df7 3017 int rtmp,temp=-1,temp2=HOST_TEMPREG,regs_saved=0,restore_jump=0,ra;
3018 int reglist2=reglist|(1<<rs)|(1<<rt);
3019 for(rtmp=0;rtmp<=12;rtmp++) {
3020 if(((1<<rtmp)&0x13ff)&&((1<<rtmp)&reglist2)==0) {
3021 temp=rtmp; break;
3022 }
3023 }
3024 if(temp==-1) {
3025 save_regs(reglist);
3026 regs_saved=1;
3027 for(rtmp=0;rtmp<=3;rtmp++)
3028 if(rtmp!=rs&&rtmp!=rt)
3029 {temp=rtmp;break;}
3030 }
3031 if((regs_saved||(reglist2&8)==0)&&temp!=3&&rs!=3&&rt!=3)
3032 temp2=3;
3033 emit_readword((int)&mem_wtab,temp);
3034 emit_shrimm(rs,12,temp2);
3035 emit_readword_dualindexedx4(temp,temp2,temp2);
3036 emit_lsls_imm(temp2,1,temp2);
3037 switch(type) {
3038 case STOREB_STUB: emit_strccb_dualindexed(temp2,rs,rt); break;
3039 case STOREH_STUB: emit_strcch_dualindexed(temp2,rs,rt); break;
3040 case STOREW_STUB: emit_strcc_dualindexed(temp2,rs,rt); break;
3041 default: assert(0);
3042 }
3043 if(regs_saved) {
3044 restore_jump=(int)out;
3045 emit_jcc(0); // jump to reg restore
3046 }
3047 else
3048 emit_jcc(stubs[n][2]); // return address (invcode check)
3049
3050 if(!regs_saved)
3051 save_regs(reglist);
3052 int handler=0;
3053 switch(type) {
3054 case STOREB_STUB: handler=(int)jump_handler_write8; break;
3055 case STOREH_STUB: handler=(int)jump_handler_write16; break;
3056 case STOREW_STUB: handler=(int)jump_handler_write32; break;
3057 }
3058 assert(handler!=0);
3059 pass_args(rs,rt);
3060 if(temp2!=3)
3061 emit_mov(temp2,3);
3062 int cc=get_reg(i_regmap,CCREG);
3063 if(cc<0)
3064 emit_loadreg(CCREG,2);
2573466a 3065 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n][6]+1),2);
b96d3df7 3066 // returns new cycle_count
3067 emit_call(handler);
2573466a 3068 emit_addimm(0,-CLOCK_ADJUST((int)stubs[n][6]+1),cc<0?2:cc);
b96d3df7 3069 if(cc<0)
3070 emit_storereg(CCREG,2);
3071 if(restore_jump)
3072 set_jump_target(restore_jump,(int)out);
3073 restore_regs(reglist);
3074 ra=stubs[n][2];
b96d3df7 3075 emit_jmp(ra);
57871462 3076}
3077
3078inline_writestub(int type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
3079{
3080 int rs=get_reg(regmap,-1);
3081 int rth=get_reg(regmap,target|64);
3082 int rt=get_reg(regmap,target);
3083 assert(rs>=0);
3084 assert(rt>=0);
b96d3df7 3085 u_int handler,host_addr=0;
b96d3df7 3086 handler=get_direct_memhandler(mem_wtab,addr,type,&host_addr);
3087 if (handler==0) {
13e35c04 3088 if(addr!=host_addr)
3089 emit_movimm_from(addr,rs,host_addr,rs);
b96d3df7 3090 switch(type) {
3091 case STOREB_STUB: emit_writebyte_indexed(rt,0,rs); break;
3092 case STOREH_STUB: emit_writehword_indexed(rt,0,rs); break;
3093 case STOREW_STUB: emit_writeword_indexed(rt,0,rs); break;
3094 default: assert(0);
3095 }
3096 return;
3097 }
3098
3099 // call a memhandler
3100 save_regs(reglist);
13e35c04 3101 pass_args(rs,rt);
b96d3df7 3102 int cc=get_reg(regmap,CCREG);
3103 if(cc<0)
3104 emit_loadreg(CCREG,2);
2573466a 3105 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2);
b96d3df7 3106 emit_movimm(handler,3);
3107 // returns new cycle_count
3108 emit_call((int)jump_handler_write_h);
2573466a 3109 emit_addimm(0,-CLOCK_ADJUST(adj+1),cc<0?2:cc);
b96d3df7 3110 if(cc<0)
3111 emit_storereg(CCREG,2);
3112 restore_regs(reglist);
57871462 3113}
3114
3115do_unalignedwritestub(int n)
3116{
b7918751 3117 assem_debug("do_unalignedwritestub %x\n",start+stubs[n][3]*4);
3118 literal_pool(256);
57871462 3119 set_jump_target(stubs[n][1],(int)out);
b7918751 3120
3121 int i=stubs[n][3];
3122 struct regstat *i_regs=(struct regstat *)stubs[n][4];
3123 int addr=stubs[n][5];
3124 u_int reglist=stubs[n][7];
3125 signed char *i_regmap=i_regs->regmap;
3126 int temp2=get_reg(i_regmap,FTEMP);
3127 int rt;
3128 int ds, real_rs;
3129 rt=get_reg(i_regmap,rs2[i]);
3130 assert(rt>=0);
3131 assert(addr>=0);
3132 assert(opcode[i]==0x2a||opcode[i]==0x2e); // SWL/SWR only implemented
3133 reglist|=(1<<addr);
3134 reglist&=~(1<<temp2);
3135
b96d3df7 3136#if 1
3137 // don't bother with it and call write handler
3138 save_regs(reglist);
3139 pass_args(addr,rt);
3140 int cc=get_reg(i_regmap,CCREG);
3141 if(cc<0)
3142 emit_loadreg(CCREG,2);
2573466a 3143 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n][6]+1),2);
b96d3df7 3144 emit_call((int)(opcode[i]==0x2a?jump_handle_swl:jump_handle_swr));
2573466a 3145 emit_addimm(0,-CLOCK_ADJUST((int)stubs[n][6]+1),cc<0?2:cc);
b96d3df7 3146 if(cc<0)
3147 emit_storereg(CCREG,2);
3148 restore_regs(reglist);
3149 emit_jmp(stubs[n][2]); // return address
3150#else
b7918751 3151 emit_andimm(addr,0xfffffffc,temp2);
3152 emit_writeword(temp2,(int)&address);
3153
3154 save_regs(reglist);
b7918751 3155 emit_shrimm(addr,16,1);
3156 int cc=get_reg(i_regmap,CCREG);
3157 if(cc<0) {
3158 emit_loadreg(CCREG,2);
3159 }
3160 emit_movimm((u_int)readmem,0);
3161 emit_addimm(cc<0?2:cc,2*stubs[n][6]+2,2);
b7918751 3162 emit_call((int)&indirect_jump_indexed);
3163 restore_regs(reglist);
3164
3165 emit_readword((int)&readmem_dword,temp2);
3166 int temp=addr; //hmh
3167 emit_shlimm(addr,3,temp);
3168 emit_andimm(temp,24,temp);
3169#ifdef BIG_ENDIAN_MIPS
3170 if (opcode[i]==0x2e) // SWR
3171#else
3172 if (opcode[i]==0x2a) // SWL
3173#endif
3174 emit_xorimm(temp,24,temp);
3175 emit_movimm(-1,HOST_TEMPREG);
55439448 3176 if (opcode[i]==0x2a) { // SWL
b7918751 3177 emit_bic_lsr(temp2,HOST_TEMPREG,temp,temp2);
3178 emit_orrshr(rt,temp,temp2);
3179 }else{
3180 emit_bic_lsl(temp2,HOST_TEMPREG,temp,temp2);
3181 emit_orrshl(rt,temp,temp2);
3182 }
3183 emit_readword((int)&address,addr);
3184 emit_writeword(temp2,(int)&word);
3185 //save_regs(reglist); // don't need to, no state changes
3186 emit_shrimm(addr,16,1);
3187 emit_movimm((u_int)writemem,0);
3188 //emit_call((int)&indirect_jump_indexed);
3189 emit_mov(15,14);
3190 emit_readword_dualindexedx4(0,1,15);
3191 emit_readword((int)&Count,HOST_TEMPREG);
3192 emit_readword((int)&next_interupt,2);
3193 emit_addimm(HOST_TEMPREG,-2*stubs[n][6]-2,HOST_TEMPREG);
3194 emit_writeword(2,(int)&last_count);
3195 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
3196 if(cc<0) {
3197 emit_storereg(CCREG,HOST_TEMPREG);
3198 }
3199 restore_regs(reglist);
57871462 3200 emit_jmp(stubs[n][2]); // return address
b96d3df7 3201#endif
57871462 3202}
3203
3204void printregs(int edi,int esi,int ebp,int esp,int b,int d,int c,int a)
3205{
3206 printf("regs: %x %x %x %x %x %x %x (%x)\n",a,b,c,d,ebp,esi,edi,(&edi)[-1]);
3207}
3208
3209do_invstub(int n)
3210{
3211 literal_pool(20);
3212 u_int reglist=stubs[n][3];
3213 set_jump_target(stubs[n][1],(int)out);
3214 save_regs(reglist);
3215 if(stubs[n][4]!=0) emit_mov(stubs[n][4],0);
3216 emit_call((int)&invalidate_addr);
3217 restore_regs(reglist);
3218 emit_jmp(stubs[n][2]); // return address
3219}
3220
3221int do_dirty_stub(int i)
3222{
3223 assem_debug("do_dirty_stub %x\n",start+i*4);
71e490c5 3224 u_int addr=(u_int)source;
57871462 3225 // Careful about the code output here, verify_dirty needs to parse it.
665f33e1 3226 #ifndef HAVE_ARMV7
ac545b3a 3227 emit_loadlp(addr,1);
57871462 3228 emit_loadlp((int)copy,2);
3229 emit_loadlp(slen*4,3);
3230 #else
ac545b3a 3231 emit_movw(addr&0x0000FFFF,1);
57871462 3232 emit_movw(((u_int)copy)&0x0000FFFF,2);
ac545b3a 3233 emit_movt(addr&0xFFFF0000,1);
57871462 3234 emit_movt(((u_int)copy)&0xFFFF0000,2);
3235 emit_movw(slen*4,3);
3236 #endif
3237 emit_movimm(start+i*4,0);
3238 emit_call((int)start<(int)0xC0000000?(int)&verify_code:(int)&verify_code_vm);
3239 int entry=(int)out;
3240 load_regs_entry(i);
3241 if(entry==(int)out) entry=instr_addr[i];
3242 emit_jmp(instr_addr[i]);
3243 return entry;
3244}
3245
3246void do_dirty_stub_ds()
3247{
3248 // Careful about the code output here, verify_dirty needs to parse it.
665f33e1 3249 #ifndef HAVE_ARMV7
57871462 3250 emit_loadlp((int)start<(int)0xC0000000?(int)source:(int)start,1);
3251 emit_loadlp((int)copy,2);
3252 emit_loadlp(slen*4,3);
3253 #else
3254 emit_movw(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0x0000FFFF,1);
3255 emit_movw(((u_int)copy)&0x0000FFFF,2);
3256 emit_movt(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0xFFFF0000,1);
3257 emit_movt(((u_int)copy)&0xFFFF0000,2);
3258 emit_movw(slen*4,3);
3259 #endif
3260 emit_movimm(start+1,0);
3261 emit_call((int)&verify_code_ds);
3262}
3263
3264do_cop1stub(int n)
3265{
3266 literal_pool(256);
3267 assem_debug("do_cop1stub %x\n",start+stubs[n][3]*4);
3268 set_jump_target(stubs[n][1],(int)out);
3269 int i=stubs[n][3];
3d624f89 3270// int rs=stubs[n][4];
57871462 3271 struct regstat *i_regs=(struct regstat *)stubs[n][5];
3272 int ds=stubs[n][6];
3273 if(!ds) {
3274 load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty,i);
3275 //if(i_regs!=&regs[i]) printf("oops: regs[i]=%x i_regs=%x",(int)&regs[i],(int)i_regs);
3276 }
3277 //else {printf("fp exception in delay slot\n");}
3278 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty);
3279 if(regs[i].regmap_entry[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
3280 emit_movimm(start+(i-ds)*4,EAX); // Get PC
2573466a 3281 emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]),HOST_CCREG); // CHECK: is this right? There should probably be an extra cycle...
57871462 3282 emit_jmp(ds?(int)fp_exception_ds:(int)fp_exception);
3283}
3284
57ab9898 3285static int do_tlb_r(int a, ...) { return 0; }
3286static int do_tlb_r_branch(int a, ...) { return 0; }
3287static int gen_tlb_addr_r(int a, ...) { return 0; }
3288static int do_tlb_w(int a, ...) { return 0; }
3289static int do_tlb_w_branch(int a, ...) { return 0; }
3290static int gen_tlb_addr_w(int a, ...) { return 0; }
63cb0298 3291
57871462 3292/* Special assem */
3293
3294void shift_assemble_arm(int i,struct regstat *i_regs)
3295{
3296 if(rt1[i]) {
3297 if(opcode2[i]<=0x07) // SLLV/SRLV/SRAV
3298 {
3299 signed char s,t,shift;
3300 t=get_reg(i_regs->regmap,rt1[i]);
3301 s=get_reg(i_regs->regmap,rs1[i]);
3302 shift=get_reg(i_regs->regmap,rs2[i]);
3303 if(t>=0){
3304 if(rs1[i]==0)
3305 {
3306 emit_zeroreg(t);
3307 }
3308 else if(rs2[i]==0)
3309 {
3310 assert(s>=0);
3311 if(s!=t) emit_mov(s,t);
3312 }
3313 else
3314 {
3315 emit_andimm(shift,31,HOST_TEMPREG);
3316 if(opcode2[i]==4) // SLLV
3317 {
3318 emit_shl(s,HOST_TEMPREG,t);
3319 }
3320 if(opcode2[i]==6) // SRLV
3321 {
3322 emit_shr(s,HOST_TEMPREG,t);
3323 }
3324 if(opcode2[i]==7) // SRAV
3325 {
3326 emit_sar(s,HOST_TEMPREG,t);
3327 }
3328 }
3329 }
3330 } else { // DSLLV/DSRLV/DSRAV
3331 signed char sh,sl,th,tl,shift;
3332 th=get_reg(i_regs->regmap,rt1[i]|64);
3333 tl=get_reg(i_regs->regmap,rt1[i]);
3334 sh=get_reg(i_regs->regmap,rs1[i]|64);
3335 sl=get_reg(i_regs->regmap,rs1[i]);
3336 shift=get_reg(i_regs->regmap,rs2[i]);
3337 if(tl>=0){
3338 if(rs1[i]==0)
3339 {
3340 emit_zeroreg(tl);
3341 if(th>=0) emit_zeroreg(th);
3342 }
3343 else if(rs2[i]==0)
3344 {
3345 assert(sl>=0);
3346 if(sl!=tl) emit_mov(sl,tl);
3347 if(th>=0&&sh!=th) emit_mov(sh,th);
3348 }
3349 else
3350 {
3351 // FIXME: What if shift==tl ?
3352 assert(shift!=tl);
3353 int temp=get_reg(i_regs->regmap,-1);
3354 int real_th=th;
3355 if(th<0&&opcode2[i]!=0x14) {th=temp;} // DSLLV doesn't need a temporary register
3356 assert(sl>=0);
3357 assert(sh>=0);
3358 emit_andimm(shift,31,HOST_TEMPREG);
3359 if(opcode2[i]==0x14) // DSLLV
3360 {
3361 if(th>=0) emit_shl(sh,HOST_TEMPREG,th);
3362 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3363 emit_orrshr(sl,HOST_TEMPREG,th);
3364 emit_andimm(shift,31,HOST_TEMPREG);
3365 emit_testimm(shift,32);
3366 emit_shl(sl,HOST_TEMPREG,tl);
3367 if(th>=0) emit_cmovne_reg(tl,th);
3368 emit_cmovne_imm(0,tl);
3369 }
3370 if(opcode2[i]==0x16) // DSRLV
3371 {
3372 assert(th>=0);
3373 emit_shr(sl,HOST_TEMPREG,tl);
3374 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3375 emit_orrshl(sh,HOST_TEMPREG,tl);
3376 emit_andimm(shift,31,HOST_TEMPREG);
3377 emit_testimm(shift,32);
3378 emit_shr(sh,HOST_TEMPREG,th);
3379 emit_cmovne_reg(th,tl);
3380 if(real_th>=0) emit_cmovne_imm(0,th);
3381 }
3382 if(opcode2[i]==0x17) // DSRAV
3383 {
3384 assert(th>=0);
3385 emit_shr(sl,HOST_TEMPREG,tl);
3386 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3387 if(real_th>=0) {
3388 assert(temp>=0);
3389 emit_sarimm(th,31,temp);
3390 }
3391 emit_orrshl(sh,HOST_TEMPREG,tl);
3392 emit_andimm(shift,31,HOST_TEMPREG);
3393 emit_testimm(shift,32);
3394 emit_sar(sh,HOST_TEMPREG,th);
3395 emit_cmovne_reg(th,tl);
3396 if(real_th>=0) emit_cmovne_reg(temp,th);
3397 }
3398 }
3399 }
3400 }
3401 }
3402}
ffb0b9e0 3403
ffb0b9e0 3404static void speculate_mov(int rs,int rt)
3405{
3406 if(rt!=0) {
3407 smrv_strong_next|=1<<rt;
3408 smrv[rt]=smrv[rs];
3409 }
3410}
3411
3412static void speculate_mov_weak(int rs,int rt)
3413{
3414 if(rt!=0) {
3415 smrv_weak_next|=1<<rt;
3416 smrv[rt]=smrv[rs];
3417 }
3418}
3419
3420static void speculate_register_values(int i)
3421{
3422 if(i==0) {
3423 memcpy(smrv,psxRegs.GPR.r,sizeof(smrv));
3424 // gp,sp are likely to stay the same throughout the block
3425 smrv_strong_next=(1<<28)|(1<<29)|(1<<30);
3426 smrv_weak_next=~smrv_strong_next;
3427 //printf(" llr %08x\n", smrv[4]);
3428 }
3429 smrv_strong=smrv_strong_next;
3430 smrv_weak=smrv_weak_next;
3431 switch(itype[i]) {
3432 case ALU:
3433 if ((smrv_strong>>rs1[i])&1) speculate_mov(rs1[i],rt1[i]);
3434 else if((smrv_strong>>rs2[i])&1) speculate_mov(rs2[i],rt1[i]);
3435 else if((smrv_weak>>rs1[i])&1) speculate_mov_weak(rs1[i],rt1[i]);
3436 else if((smrv_weak>>rs2[i])&1) speculate_mov_weak(rs2[i],rt1[i]);
3437 else {
3438 smrv_strong_next&=~(1<<rt1[i]);
3439 smrv_weak_next&=~(1<<rt1[i]);
3440 }
3441 break;
3442 case SHIFTIMM:
3443 smrv_strong_next&=~(1<<rt1[i]);
3444 smrv_weak_next&=~(1<<rt1[i]);
3445 // fallthrough
3446 case IMM16:
3447 if(rt1[i]&&is_const(&regs[i],rt1[i])) {
3448 int value,hr=get_reg(regs[i].regmap,rt1[i]);
3449 if(hr>=0) {
3450 if(get_final_value(hr,i,&value))
3451 smrv[rt1[i]]=value;
3452 else smrv[rt1[i]]=constmap[i][hr];
3453 smrv_strong_next|=1<<rt1[i];
3454 }
3455 }
3456 else {
3457 if ((smrv_strong>>rs1[i])&1) speculate_mov(rs1[i],rt1[i]);
3458 else if((smrv_weak>>rs1[i])&1) speculate_mov_weak(rs1[i],rt1[i]);
3459 }
3460 break;
3461 case LOAD:
3462 if(start<0x2000&&(rt1[i]==26||(smrv[rt1[i]]>>24)==0xa0)) {
3463 // special case for BIOS
3464 smrv[rt1[i]]=0xa0000000;
3465 smrv_strong_next|=1<<rt1[i];
3466 break;
3467 }
3468 // fallthrough
3469 case SHIFT:
3470 case LOADLR:
3471 case MOV:
3472 smrv_strong_next&=~(1<<rt1[i]);
3473 smrv_weak_next&=~(1<<rt1[i]);
3474 break;
3475 case COP0:
3476 case COP2:
3477 if(opcode2[i]==0||opcode2[i]==2) { // MFC/CFC
3478 smrv_strong_next&=~(1<<rt1[i]);
3479 smrv_weak_next&=~(1<<rt1[i]);
3480 }
3481 break;
3482 case C2LS:
3483 if (opcode[i]==0x32) { // LWC2
3484 smrv_strong_next&=~(1<<rt1[i]);
3485 smrv_weak_next&=~(1<<rt1[i]);
3486 }
3487 break;
3488 }
3489#if 0
3490 int r=4;
3491 printf("x %08x %08x %d %d c %08x %08x\n",smrv[r],start+i*4,
3492 ((smrv_strong>>r)&1),(smrv_weak>>r)&1,regs[i].isconst,regs[i].wasconst);
3493#endif
3494}
3495
3496enum {
3497 MTYPE_8000 = 0,
3498 MTYPE_8020,
3499 MTYPE_0000,
3500 MTYPE_A000,
3501 MTYPE_1F80,
3502};
3503
3504static int get_ptr_mem_type(u_int a)
3505{
3506 if(a < 0x00200000) {
3507 if(a<0x1000&&((start>>20)==0xbfc||(start>>24)==0xa0))
3508 // return wrong, must use memhandler for BIOS self-test to pass
3509 // 007 does similar stuff from a00 mirror, weird stuff
3510 return MTYPE_8000;
3511 return MTYPE_0000;
3512 }
3513 if(0x1f800000 <= a && a < 0x1f801000)
3514 return MTYPE_1F80;
3515 if(0x80200000 <= a && a < 0x80800000)
3516 return MTYPE_8020;
3517 if(0xa0000000 <= a && a < 0xa0200000)
3518 return MTYPE_A000;
3519 return MTYPE_8000;
3520}
ffb0b9e0 3521
3522static int emit_fastpath_cmp_jump(int i,int addr,int *addr_reg_override)
3523{
3524 int jaddr,type=0;
ffb0b9e0 3525 int mr=rs1[i];
3526 if(((smrv_strong|smrv_weak)>>mr)&1) {
3527 type=get_ptr_mem_type(smrv[mr]);
3528 //printf("set %08x @%08x r%d %d\n", smrv[mr], start+i*4, mr, type);
3529 }
3530 else {
3531 // use the mirror we are running on
3532 type=get_ptr_mem_type(start);
3533 //printf("set nospec @%08x r%d %d\n", start+i*4, mr, type);
3534 }
3535
3536 if(type==MTYPE_8020) { // RAM 80200000+ mirror
3537 emit_andimm(addr,~0x00e00000,HOST_TEMPREG);
3538 addr=*addr_reg_override=HOST_TEMPREG;
3539 type=0;
3540 }
3541 else if(type==MTYPE_0000) { // RAM 0 mirror
3542 emit_orimm(addr,0x80000000,HOST_TEMPREG);
3543 addr=*addr_reg_override=HOST_TEMPREG;
3544 type=0;
3545 }
3546 else if(type==MTYPE_A000) { // RAM A mirror
3547 emit_andimm(addr,~0x20000000,HOST_TEMPREG);
3548 addr=*addr_reg_override=HOST_TEMPREG;
3549 type=0;
3550 }
3551 else if(type==MTYPE_1F80) { // scratchpad
6d760c92 3552 if (psxH == (void *)0x1f800000) {
3553 emit_addimm(addr,-0x1f800000,HOST_TEMPREG);
3554 emit_cmpimm(HOST_TEMPREG,0x1000);
3555 jaddr=(int)out;
3556 emit_jc(0);
3557 }
3558 else {
3559 // do usual RAM check, jump will go to the right handler
3560 type=0;
3561 }
ffb0b9e0 3562 }
ffb0b9e0 3563
3564 if(type==0)
3565 {
3566 emit_cmpimm(addr,RAM_SIZE);
3567 jaddr=(int)out;
3568 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
3569 // Hint to branch predictor that the branch is unlikely to be taken
3570 if(rs1[i]>=28)
3571 emit_jno_unlikely(0);
3572 else
3573 #endif
3574 emit_jno(0);
a327ad27 3575 if(ram_offset!=0) {
3576 emit_addimm(addr,ram_offset,HOST_TEMPREG);
3577 addr=*addr_reg_override=HOST_TEMPREG;
3578 }
ffb0b9e0 3579 }
3580
3581 return jaddr;
3582}
3583
57871462 3584#define shift_assemble shift_assemble_arm
3585
3586void loadlr_assemble_arm(int i,struct regstat *i_regs)
3587{
3588 int s,th,tl,temp,temp2,addr,map=-1;
3589 int offset;
3590 int jaddr=0;
af4ee1fe 3591 int memtarget=0,c=0;
ffb0b9e0 3592 int fastload_reg_override=0;
57871462 3593 u_int hr,reglist=0;
3594 th=get_reg(i_regs->regmap,rt1[i]|64);
3595 tl=get_reg(i_regs->regmap,rt1[i]);
3596 s=get_reg(i_regs->regmap,rs1[i]);
3597 temp=get_reg(i_regs->regmap,-1);
3598 temp2=get_reg(i_regs->regmap,FTEMP);
3599 addr=get_reg(i_regs->regmap,AGEN1+(i&1));
3600 assert(addr<0);
3601 offset=imm[i];
3602 for(hr=0;hr<HOST_REGS;hr++) {
3603 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
3604 }
3605 reglist|=1<<temp;
3606 if(offset||s<0||c) addr=temp2;
3607 else addr=s;
3608 if(s>=0) {
3609 c=(i_regs->wasconst>>s)&1;
af4ee1fe 3610 if(c) {
3611 memtarget=((signed int)(constmap[i][s]+offset))<(signed int)0x80000000+RAM_SIZE;
3612 if(using_tlb&&((signed int)(constmap[i][s]+offset))>=(signed int)0xC0000000) memtarget=1;
3613 }
57871462 3614 }
535d208a 3615 if(!using_tlb) {
3616 if(!c) {
3617 #ifdef RAM_OFFSET
3618 map=get_reg(i_regs->regmap,ROREG);
3619 if(map<0) emit_loadreg(ROREG,map=HOST_TEMPREG);
3620 #endif
3621 emit_shlimm(addr,3,temp);
3622 if (opcode[i]==0x22||opcode[i]==0x26) {
3623 emit_andimm(addr,0xFFFFFFFC,temp2); // LWL/LWR
57871462 3624 }else{
535d208a 3625 emit_andimm(addr,0xFFFFFFF8,temp2); // LDL/LDR
57871462 3626 }
ffb0b9e0 3627 jaddr=emit_fastpath_cmp_jump(i,temp2,&fastload_reg_override);
535d208a 3628 }
3629 else {
a327ad27 3630 if(ram_offset&&memtarget) {
3631 emit_addimm(temp2,ram_offset,HOST_TEMPREG);
3632 fastload_reg_override=HOST_TEMPREG;
3633 }
535d208a 3634 if (opcode[i]==0x22||opcode[i]==0x26) {
3635 emit_movimm(((constmap[i][s]+offset)<<3)&24,temp); // LWL/LWR
3636 }else{
3637 emit_movimm(((constmap[i][s]+offset)<<3)&56,temp); // LDL/LDR
57871462 3638 }
57871462 3639 }
535d208a 3640 }else{ // using tlb
3641 int a;
3642 if(c) {
3643 a=-1;
3644 }else if (opcode[i]==0x22||opcode[i]==0x26) {
3645 a=0xFFFFFFFC; // LWL/LWR
3646 }else{
3647 a=0xFFFFFFF8; // LDL/LDR
3648 }
3649 map=get_reg(i_regs->regmap,TLREG);
3650 assert(map>=0);
ea3d2e6e 3651 reglist&=~(1<<map);
535d208a 3652 if(c) {
3653 if (opcode[i]==0x22||opcode[i]==0x26) {
3654 emit_movimm(((constmap[i][s]+offset)<<3)&24,temp); // LWL/LWR
3655 }else{
3656 emit_movimm(((constmap[i][s]+offset)<<3)&56,temp); // LDL/LDR
57871462 3657 }
535d208a 3658 }
535d208a 3659 }
3660 if (opcode[i]==0x22||opcode[i]==0x26) { // LWL/LWR
3661 if(!c||memtarget) {
ffb0b9e0 3662 int a=temp2;
3663 if(fastload_reg_override) a=fastload_reg_override;
535d208a 3664 //emit_readword_indexed((int)rdram-0x80000000,temp2,temp2);
ffb0b9e0 3665 emit_readword_indexed_tlb(0,a,map,temp2);
535d208a 3666 if(jaddr) add_stub(LOADW_STUB,jaddr,(int)out,i,temp2,(int)i_regs,ccadj[i],reglist);
3667 }
3668 else
3669 inline_readstub(LOADW_STUB,i,(constmap[i][s]+offset)&0xFFFFFFFC,i_regs->regmap,FTEMP,ccadj[i],reglist);
3670 if(rt1[i]) {
3671 assert(tl>=0);
57871462 3672 emit_andimm(temp,24,temp);
2002a1db 3673#ifdef BIG_ENDIAN_MIPS
3674 if (opcode[i]==0x26) // LWR
3675#else
3676 if (opcode[i]==0x22) // LWL
3677#endif
3678 emit_xorimm(temp,24,temp);
57871462 3679 emit_movimm(-1,HOST_TEMPREG);
3680 if (opcode[i]==0x26) {
3681 emit_shr(temp2,temp,temp2);
3682 emit_bic_lsr(tl,HOST_TEMPREG,temp,tl);
3683 }else{
3684 emit_shl(temp2,temp,temp2);
3685 emit_bic_lsl(tl,HOST_TEMPREG,temp,tl);
3686 }
3687 emit_or(temp2,tl,tl);
57871462 3688 }
535d208a 3689 //emit_storereg(rt1[i],tl); // DEBUG
3690 }
3691 if (opcode[i]==0x1A||opcode[i]==0x1B) { // LDL/LDR
ffb0b9e0 3692 // FIXME: little endian, fastload_reg_override
535d208a 3693 int temp2h=get_reg(i_regs->regmap,FTEMP|64);
3694 if(!c||memtarget) {
3695 //if(th>=0) emit_readword_indexed((int)rdram-0x80000000,temp2,temp2h);
3696 //emit_readword_indexed((int)rdram-0x7FFFFFFC,temp2,temp2);
3697 emit_readdword_indexed_tlb(0,temp2,map,temp2h,temp2);
3698 if(jaddr) add_stub(LOADD_STUB,jaddr,(int)out,i,temp2,(int)i_regs,ccadj[i],reglist);
3699 }
3700 else
3701 inline_readstub(LOADD_STUB,i,(constmap[i][s]+offset)&0xFFFFFFF8,i_regs->regmap,FTEMP,ccadj[i],reglist);
3702 if(rt1[i]) {
3703 assert(th>=0);
3704 assert(tl>=0);
57871462 3705 emit_testimm(temp,32);
3706 emit_andimm(temp,24,temp);
3707 if (opcode[i]==0x1A) { // LDL
3708 emit_rsbimm(temp,32,HOST_TEMPREG);
3709 emit_shl(temp2h,temp,temp2h);
3710 emit_orrshr(temp2,HOST_TEMPREG,temp2h);
3711 emit_movimm(-1,HOST_TEMPREG);
3712 emit_shl(temp2,temp,temp2);
3713 emit_cmove_reg(temp2h,th);
3714 emit_biceq_lsl(tl,HOST_TEMPREG,temp,tl);
3715 emit_bicne_lsl(th,HOST_TEMPREG,temp,th);
3716 emit_orreq(temp2,tl,tl);
3717 emit_orrne(temp2,th,th);
3718 }
3719 if (opcode[i]==0x1B) { // LDR
3720 emit_xorimm(temp,24,temp);
3721 emit_rsbimm(temp,32,HOST_TEMPREG);
3722 emit_shr(temp2,temp,temp2);
3723 emit_orrshl(temp2h,HOST_TEMPREG,temp2);
3724 emit_movimm(-1,HOST_TEMPREG);
3725 emit_shr(temp2h,temp,temp2h);
3726 emit_cmovne_reg(temp2,tl);
3727 emit_bicne_lsr(th,HOST_TEMPREG,temp,th);
3728 emit_biceq_lsr(tl,HOST_TEMPREG,temp,tl);
3729 emit_orrne(temp2h,th,th);
3730 emit_orreq(temp2h,tl,tl);
3731 }
3732 }
3733 }
3734}
3735#define loadlr_assemble loadlr_assemble_arm
3736
3737void cop0_assemble(int i,struct regstat *i_regs)
3738{
3739 if(opcode2[i]==0) // MFC0
3740 {
3741 signed char t=get_reg(i_regs->regmap,rt1[i]);
3742 char copr=(source[i]>>11)&0x1f;
3743 //assert(t>=0); // Why does this happen? OOT is weird
f1b3b369 3744 if(t>=0&&rt1[i]!=0) {
7139f3c8 3745 emit_readword((int)&reg_cop0+copr*4,t);
57871462 3746 }
3747 }
3748 else if(opcode2[i]==4) // MTC0
3749 {
3750 signed char s=get_reg(i_regs->regmap,rs1[i]);
3751 char copr=(source[i]>>11)&0x1f;
3752 assert(s>=0);
63cb0298 3753 wb_register(rs1[i],i_regs->regmap,i_regs->dirty,i_regs->is32);
7139f3c8 3754 if(copr==9||copr==11||copr==12||copr==13) {
63cb0298 3755 emit_readword((int)&last_count,HOST_TEMPREG);
57871462 3756 emit_loadreg(CCREG,HOST_CCREG); // TODO: do proper reg alloc
63cb0298 3757 emit_add(HOST_CCREG,HOST_TEMPREG,HOST_CCREG);
2573466a 3758 emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]),HOST_CCREG);
57871462 3759 emit_writeword(HOST_CCREG,(int)&Count);
3760 }
3761 // What a mess. The status register (12) can enable interrupts,
3762 // so needs a special case to handle a pending interrupt.
3763 // The interrupt must be taken immediately, because a subsequent
3764 // instruction might disable interrupts again.
7139f3c8 3765 if(copr==12||copr==13) {
fca1aef2 3766 if (is_delayslot) {
3767 // burn cycles to cause cc_interrupt, which will
3768 // reschedule next_interupt. Relies on CCREG from above.
3769 assem_debug("MTC0 DS %d\n", copr);
3770 emit_writeword(HOST_CCREG,(int)&last_count);
3771 emit_movimm(0,HOST_CCREG);
3772 emit_storereg(CCREG,HOST_CCREG);
caeefe31 3773 emit_loadreg(rs1[i],1);
fca1aef2 3774 emit_movimm(copr,0);
3775 emit_call((int)pcsx_mtc0_ds);
042c7287 3776 emit_loadreg(rs1[i],s);
fca1aef2 3777 return;
3778 }
63cb0298 3779 emit_movimm(start+i*4+4,HOST_TEMPREG);
3780 emit_writeword(HOST_TEMPREG,(int)&pcaddr);
3781 emit_movimm(0,HOST_TEMPREG);
3782 emit_writeword(HOST_TEMPREG,(int)&pending_exception);
57871462 3783 }
3784 //else if(copr==12&&is_delayslot) emit_call((int)MTC0_R12);
3785 //else
caeefe31 3786 if(s==HOST_CCREG)
3787 emit_loadreg(rs1[i],1);
3788 else if(s!=1)
63cb0298 3789 emit_mov(s,1);
fca1aef2 3790 emit_movimm(copr,0);
3791 emit_call((int)pcsx_mtc0);
7139f3c8 3792 if(copr==9||copr==11||copr==12||copr==13) {
57871462 3793 emit_readword((int)&Count,HOST_CCREG);
042c7287 3794 emit_readword((int)&next_interupt,HOST_TEMPREG);
2573466a 3795 emit_addimm(HOST_CCREG,-CLOCK_ADJUST(ccadj[i]),HOST_CCREG);
042c7287 3796 emit_sub(HOST_CCREG,HOST_TEMPREG,HOST_CCREG);
3797 emit_writeword(HOST_TEMPREG,(int)&last_count);
57871462 3798 emit_storereg(CCREG,HOST_CCREG);
3799 }
7139f3c8 3800 if(copr==12||copr==13) {
57871462 3801 assert(!is_delayslot);
3802 emit_readword((int)&pending_exception,14);
042c7287 3803 emit_test(14,14);
3804 emit_jne((int)&do_interrupt);
57871462 3805 }
3806 emit_loadreg(rs1[i],s);
3807 if(get_reg(i_regs->regmap,rs1[i]|64)>=0)
3808 emit_loadreg(rs1[i]|64,get_reg(i_regs->regmap,rs1[i]|64));
57871462 3809 cop1_usable=0;
3810 }
3811 else
3812 {
3813 assert(opcode2[i]==0x10);
576bbd8f 3814 if((source[i]&0x3f)==0x10) // RFE
3815 {
3816 emit_readword((int)&Status,0);
3817 emit_andimm(0,0x3c,1);
3818 emit_andimm(0,~0xf,0);
3819 emit_orrshr_imm(1,2,0);
3820 emit_writeword(0,(int)&Status);
3821 }
57871462 3822 }
3823}
3824
b9b61529 3825static void cop2_get_dreg(u_int copr,signed char tl,signed char temp)
3826{
3827 switch (copr) {
3828 case 1:
3829 case 3:
3830 case 5:
3831 case 8:
3832 case 9:
3833 case 10:
3834 case 11:
3835 emit_readword((int)&reg_cop2d[copr],tl);
3836 emit_signextend16(tl,tl);
3837 emit_writeword(tl,(int)&reg_cop2d[copr]); // hmh
3838 break;
3839 case 7:
3840 case 16:
3841 case 17:
3842 case 18:
3843 case 19:
3844 emit_readword((int)&reg_cop2d[copr],tl);
3845 emit_andimm(tl,0xffff,tl);
3846 emit_writeword(tl,(int)&reg_cop2d[copr]);
3847 break;
3848 case 15:
3849 emit_readword((int)&reg_cop2d[14],tl); // SXY2
3850 emit_writeword(tl,(int)&reg_cop2d[copr]);
3851 break;
3852 case 28:
b9b61529 3853 case 29:
3854 emit_readword((int)&reg_cop2d[9],temp);
3855 emit_testimm(temp,0x8000); // do we need this?
3856 emit_andimm(temp,0xf80,temp);
3857 emit_andne_imm(temp,0,temp);
f70d384d 3858 emit_shrimm(temp,7,tl);
b9b61529 3859 emit_readword((int)&reg_cop2d[10],temp);
3860 emit_testimm(temp,0x8000);
3861 emit_andimm(temp,0xf80,temp);
3862 emit_andne_imm(temp,0,temp);
f70d384d 3863 emit_orrshr_imm(temp,2,tl);
b9b61529 3864 emit_readword((int)&reg_cop2d[11],temp);
3865 emit_testimm(temp,0x8000);
3866 emit_andimm(temp,0xf80,temp);
3867 emit_andne_imm(temp,0,temp);
f70d384d 3868 emit_orrshl_imm(temp,3,tl);
b9b61529 3869 emit_writeword(tl,(int)&reg_cop2d[copr]);
3870 break;
3871 default:
3872 emit_readword((int)&reg_cop2d[copr],tl);
3873 break;
3874 }
3875}
3876
3877static void cop2_put_dreg(u_int copr,signed char sl,signed char temp)
3878{
3879 switch (copr) {
3880 case 15:
3881 emit_readword((int)&reg_cop2d[13],temp); // SXY1
3882 emit_writeword(sl,(int)&reg_cop2d[copr]);
3883 emit_writeword(temp,(int)&reg_cop2d[12]); // SXY0
3884 emit_readword((int)&reg_cop2d[14],temp); // SXY2
3885 emit_writeword(sl,(int)&reg_cop2d[14]);
3886 emit_writeword(temp,(int)&reg_cop2d[13]); // SXY1
3887 break;
3888 case 28:
3889 emit_andimm(sl,0x001f,temp);
f70d384d 3890 emit_shlimm(temp,7,temp);
b9b61529 3891 emit_writeword(temp,(int)&reg_cop2d[9]);
3892 emit_andimm(sl,0x03e0,temp);
f70d384d 3893 emit_shlimm(temp,2,temp);
b9b61529 3894 emit_writeword(temp,(int)&reg_cop2d[10]);
3895 emit_andimm(sl,0x7c00,temp);
f70d384d 3896 emit_shrimm(temp,3,temp);
b9b61529 3897 emit_writeword(temp,(int)&reg_cop2d[11]);
3898 emit_writeword(sl,(int)&reg_cop2d[28]);
3899 break;
3900 case 30:
3901 emit_movs(sl,temp);
3902 emit_mvnmi(temp,temp);
665f33e1 3903#ifdef HAVE_ARMV5
b9b61529 3904 emit_clz(temp,temp);
665f33e1 3905#else
3906 emit_movs(temp,HOST_TEMPREG);
3907 emit_movimm(0,temp);
3908 emit_jeq((int)out+4*4);
3909 emit_addpl_imm(temp,1,temp);
3910 emit_lslpls_imm(HOST_TEMPREG,1,HOST_TEMPREG);
3911 emit_jns((int)out-2*4);
3912#endif
b9b61529 3913 emit_writeword(sl,(int)&reg_cop2d[30]);
3914 emit_writeword(temp,(int)&reg_cop2d[31]);
3915 break;
b9b61529 3916 case 31:
3917 break;
3918 default:
3919 emit_writeword(sl,(int)&reg_cop2d[copr]);
3920 break;
3921 }
3922}
3923
3924void cop2_assemble(int i,struct regstat *i_regs)
3925{
3926 u_int copr=(source[i]>>11)&0x1f;
3927 signed char temp=get_reg(i_regs->regmap,-1);
3928 if (opcode2[i]==0) { // MFC2
3929 signed char tl=get_reg(i_regs->regmap,rt1[i]);
f1b3b369 3930 if(tl>=0&&rt1[i]!=0)
b9b61529 3931 cop2_get_dreg(copr,tl,temp);
3932 }
3933 else if (opcode2[i]==4) { // MTC2
3934 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3935 cop2_put_dreg(copr,sl,temp);
3936 }
3937 else if (opcode2[i]==2) // CFC2
3938 {
3939 signed char tl=get_reg(i_regs->regmap,rt1[i]);
f1b3b369 3940 if(tl>=0&&rt1[i]!=0)
b9b61529 3941 emit_readword((int)&reg_cop2c[copr],tl);
3942 }
3943 else if (opcode2[i]==6) // CTC2
3944 {
3945 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3946 switch(copr) {
3947 case 4:
3948 case 12:
3949 case 20:
3950 case 26:
3951 case 27:
3952 case 29:
3953 case 30:
3954 emit_signextend16(sl,temp);
3955 break;
3956 case 31:
3957 //value = value & 0x7ffff000;
3958 //if (value & 0x7f87e000) value |= 0x80000000;
3959 emit_shrimm(sl,12,temp);
3960 emit_shlimm(temp,12,temp);
3961 emit_testimm(temp,0x7f000000);
3962 emit_testeqimm(temp,0x00870000);
3963 emit_testeqimm(temp,0x0000e000);
3964 emit_orrne_imm(temp,0x80000000,temp);
3965 break;
3966 default:
3967 temp=sl;
3968 break;
3969 }
3970 emit_writeword(temp,(int)&reg_cop2c[copr]);
3971 assert(sl>=0);
3972 }
3973}
3974
054175e9 3975static void c2op_prologue(u_int op,u_int reglist)
3976{
3977 save_regs_all(reglist);
82ed88eb 3978#ifdef PCNT
3979 emit_movimm(op,0);
3980 emit_call((int)pcnt_gte_start);
3981#endif
054175e9 3982 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0); // cop2 regs
3983}
3984
3985static void c2op_epilogue(u_int op,u_int reglist)
3986{
82ed88eb 3987#ifdef PCNT
3988 emit_movimm(op,0);
3989 emit_call((int)pcnt_gte_end);
3990#endif
054175e9 3991 restore_regs_all(reglist);
3992}
3993
6c0eefaf 3994static void c2op_call_MACtoIR(int lm,int need_flags)
3995{
3996 if(need_flags)
3997 emit_call((int)(lm?gteMACtoIR_lm1:gteMACtoIR_lm0));
3998 else
3999 emit_call((int)(lm?gteMACtoIR_lm1_nf:gteMACtoIR_lm0_nf));
4000}
4001
4002static void c2op_call_rgb_func(void *func,int lm,int need_ir,int need_flags)
4003{
4004 emit_call((int)func);
4005 // func is C code and trashes r0
4006 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
4007 if(need_flags||need_ir)
4008 c2op_call_MACtoIR(lm,need_flags);
4009 emit_call((int)(need_flags?gteMACtoRGB:gteMACtoRGB_nf));
4010}
4011
054175e9 4012static void c2op_assemble(int i,struct regstat *i_regs)
b9b61529 4013{
4014 signed char temp=get_reg(i_regs->regmap,-1);
4015 u_int c2op=source[i]&0x3f;
6c0eefaf 4016 u_int hr,reglist_full=0,reglist;
054175e9 4017 int need_flags,need_ir;
b9b61529 4018 for(hr=0;hr<HOST_REGS;hr++) {
6c0eefaf 4019 if(i_regs->regmap[hr]>=0) reglist_full|=1<<hr;
b9b61529 4020 }
4d646738 4021 reglist=reglist_full&CALLER_SAVE_REGS;
b9b61529 4022
4023 if (gte_handlers[c2op]!=NULL) {
bedfea38 4024 need_flags=!(gte_unneeded[i+1]>>63); // +1 because of how liveness detection works
054175e9 4025 need_ir=(gte_unneeded[i+1]&0xe00)!=0xe00;
cbbd8dd7 4026 assem_debug("gte op %08x, unneeded %016llx, need_flags %d, need_ir %d\n",
4027 source[i],gte_unneeded[i+1],need_flags,need_ir);
0ff8c62c 4028 if(new_dynarec_hacks&NDHACK_GTE_NO_FLAGS)
4029 need_flags=0;
6c0eefaf 4030 int shift = (source[i] >> 19) & 1;
4031 int lm = (source[i] >> 10) & 1;
054175e9 4032 switch(c2op) {
19776aef 4033#ifndef DRC_DBG
054175e9 4034 case GTE_MVMVA: {
82336ba3 4035#ifdef HAVE_ARMV5
054175e9 4036 int v = (source[i] >> 15) & 3;
4037 int cv = (source[i] >> 13) & 3;
4038 int mx = (source[i] >> 17) & 3;
4d646738 4039 reglist=reglist_full&(CALLER_SAVE_REGS|0xf0); // +{r4-r7}
054175e9 4040 c2op_prologue(c2op,reglist);
4041 /* r4,r5 = VXYZ(v) packed; r6 = &MX11(mx); r7 = &CV1(cv) */
4042 if(v<3)
4043 emit_ldrd(v*8,0,4);
4044 else {
4045 emit_movzwl_indexed(9*4,0,4); // gteIR
4046 emit_movzwl_indexed(10*4,0,6);
4047 emit_movzwl_indexed(11*4,0,5);
4048 emit_orrshl_imm(6,16,4);
4049 }
4050 if(mx<3)
4051 emit_addimm(0,32*4+mx*8*4,6);
4052 else
4053 emit_readword((int)&zeromem_ptr,6);
4054 if(cv<3)
4055 emit_addimm(0,32*4+(cv*8+5)*4,7);
4056 else
4057 emit_readword((int)&zeromem_ptr,7);
4058#ifdef __ARM_NEON__
4059 emit_movimm(source[i],1); // opcode
4060 emit_call((int)gteMVMVA_part_neon);
4061 if(need_flags) {
4062 emit_movimm(lm,1);
4063 emit_call((int)gteMACtoIR_flags_neon);
4064 }
4065#else
4066 if(cv==3&&shift)
4067 emit_call((int)gteMVMVA_part_cv3sh12_arm);
4068 else {
4069 emit_movimm(shift,1);
4070 emit_call((int)(need_flags?gteMVMVA_part_arm:gteMVMVA_part_nf_arm));
4071 }
6c0eefaf 4072 if(need_flags||need_ir)
4073 c2op_call_MACtoIR(lm,need_flags);
82336ba3 4074#endif
4075#else /* if not HAVE_ARMV5 */
4076 c2op_prologue(c2op,reglist);
4077 emit_movimm(source[i],1); // opcode
4078 emit_writeword(1,(int)&psxRegs.code);
4079 emit_call((int)(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]));
054175e9 4080#endif
4081 break;
4082 }
6c0eefaf 4083 case GTE_OP:
4084 c2op_prologue(c2op,reglist);
4085 emit_call((int)(shift?gteOP_part_shift:gteOP_part_noshift));
4086 if(need_flags||need_ir) {
4087 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
4088 c2op_call_MACtoIR(lm,need_flags);
4089 }
4090 break;
4091 case GTE_DPCS:
4092 c2op_prologue(c2op,reglist);
4093 c2op_call_rgb_func(shift?gteDPCS_part_shift:gteDPCS_part_noshift,lm,need_ir,need_flags);
4094 break;
4095 case GTE_INTPL:
4096 c2op_prologue(c2op,reglist);
4097 c2op_call_rgb_func(shift?gteINTPL_part_shift:gteINTPL_part_noshift,lm,need_ir,need_flags);
4098 break;
4099 case GTE_SQR:
4100 c2op_prologue(c2op,reglist);
4101 emit_call((int)(shift?gteSQR_part_shift:gteSQR_part_noshift));
4102 if(need_flags||need_ir) {
4103 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
4104 c2op_call_MACtoIR(lm,need_flags);
4105 }
4106 break;
4107 case GTE_DCPL:
4108 c2op_prologue(c2op,reglist);
4109 c2op_call_rgb_func(gteDCPL_part,lm,need_ir,need_flags);
4110 break;
4111 case GTE_GPF:
4112 c2op_prologue(c2op,reglist);
4113 c2op_call_rgb_func(shift?gteGPF_part_shift:gteGPF_part_noshift,lm,need_ir,need_flags);
4114 break;
4115 case GTE_GPL:
4116 c2op_prologue(c2op,reglist);
4117 c2op_call_rgb_func(shift?gteGPL_part_shift:gteGPL_part_noshift,lm,need_ir,need_flags);
4118 break;
19776aef 4119#endif
054175e9 4120 default:
054175e9 4121 c2op_prologue(c2op,reglist);
19776aef 4122#ifdef DRC_DBG
4123 emit_movimm(source[i],1); // opcode
4124 emit_writeword(1,(int)&psxRegs.code);
4125#endif
054175e9 4126 emit_call((int)(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]));
4127 break;
4128 }
4129 c2op_epilogue(c2op,reglist);
4130 }
b9b61529 4131}
4132
4133void cop1_unusable(int i,struct regstat *i_regs)
3d624f89 4134{
4135 // XXX: should just just do the exception instead
4136 if(!cop1_usable) {
4137 int jaddr=(int)out;
4138 emit_jmp(0);
4139 add_stub(FP_STUB,jaddr,(int)out,i,0,(int)i_regs,is_delayslot,0);
4140 cop1_usable=1;
4141 }
4142}
4143
57871462 4144void cop1_assemble(int i,struct regstat *i_regs)
4145{
3d624f89 4146 cop1_unusable(i, i_regs);
57871462 4147}
4148
4149void fconv_assemble_arm(int i,struct regstat *i_regs)
4150{
3d624f89 4151 cop1_unusable(i, i_regs);
57871462 4152}
4153#define fconv_assemble fconv_assemble_arm
4154
4155void fcomp_assemble(int i,struct regstat *i_regs)
4156{
3d624f89 4157 cop1_unusable(i, i_regs);
57871462 4158}
4159
4160void float_assemble(int i,struct regstat *i_regs)
4161{
3d624f89 4162 cop1_unusable(i, i_regs);
57871462 4163}
4164
4165void multdiv_assemble_arm(int i,struct regstat *i_regs)
4166{
4167 // case 0x18: MULT
4168 // case 0x19: MULTU
4169 // case 0x1A: DIV
4170 // case 0x1B: DIVU
4171 // case 0x1C: DMULT
4172 // case 0x1D: DMULTU
4173 // case 0x1E: DDIV
4174 // case 0x1F: DDIVU
4175 if(rs1[i]&&rs2[i])
4176 {
4177 if((opcode2[i]&4)==0) // 32-bit
4178 {
4179 if(opcode2[i]==0x18) // MULT
4180 {
4181 signed char m1=get_reg(i_regs->regmap,rs1[i]);
4182 signed char m2=get_reg(i_regs->regmap,rs2[i]);
4183 signed char hi=get_reg(i_regs->regmap,HIREG);
4184 signed char lo=get_reg(i_regs->regmap,LOREG);
4185 assert(m1>=0);
4186 assert(m2>=0);
4187 assert(hi>=0);
4188 assert(lo>=0);
4189 emit_smull(m1,m2,hi,lo);
4190 }
4191 if(opcode2[i]==0x19) // MULTU
4192 {
4193 signed char m1=get_reg(i_regs->regmap,rs1[i]);
4194 signed char m2=get_reg(i_regs->regmap,rs2[i]);
4195 signed char hi=get_reg(i_regs->regmap,HIREG);
4196 signed char lo=get_reg(i_regs->regmap,LOREG);
4197 assert(m1>=0);
4198 assert(m2>=0);
4199 assert(hi>=0);
4200 assert(lo>=0);
4201 emit_umull(m1,m2,hi,lo);
4202 }
4203 if(opcode2[i]==0x1A) // DIV
4204 {
4205 signed char d1=get_reg(i_regs->regmap,rs1[i]);
4206 signed char d2=get_reg(i_regs->regmap,rs2[i]);
4207 assert(d1>=0);
4208 assert(d2>=0);
4209 signed char quotient=get_reg(i_regs->regmap,LOREG);
4210 signed char remainder=get_reg(i_regs->regmap,HIREG);
4211 assert(quotient>=0);
4212 assert(remainder>=0);
4213 emit_movs(d1,remainder);
44a80f6a 4214 emit_movimm(0xffffffff,quotient);
4215 emit_negmi(quotient,quotient); // .. quotient and ..
4216 emit_negmi(remainder,remainder); // .. remainder for div0 case (will be negated back after jump)
57871462 4217 emit_movs(d2,HOST_TEMPREG);
4218 emit_jeq((int)out+52); // Division by zero
82336ba3 4219 emit_negsmi(HOST_TEMPREG,HOST_TEMPREG);
665f33e1 4220#ifdef HAVE_ARMV5
57871462 4221 emit_clz(HOST_TEMPREG,quotient);
4222 emit_shl(HOST_TEMPREG,quotient,HOST_TEMPREG);
665f33e1 4223#else
4224 emit_movimm(0,quotient);
4225 emit_addpl_imm(quotient,1,quotient);
4226 emit_lslpls_imm(HOST_TEMPREG,1,HOST_TEMPREG);
4227 emit_jns((int)out-2*4);
4228#endif
57871462 4229 emit_orimm(quotient,1<<31,quotient);
4230 emit_shr(quotient,quotient,quotient);
4231 emit_cmp(remainder,HOST_TEMPREG);
4232 emit_subcs(remainder,HOST_TEMPREG,remainder);
4233 emit_adcs(quotient,quotient,quotient);
4234 emit_shrimm(HOST_TEMPREG,1,HOST_TEMPREG);
4235 emit_jcc((int)out-16); // -4
4236 emit_teq(d1,d2);
4237 emit_negmi(quotient,quotient);
4238 emit_test(d1,d1);
4239 emit_negmi(remainder,remainder);
4240 }
4241 if(opcode2[i]==0x1B) // DIVU
4242 {
4243 signed char d1=get_reg(i_regs->regmap,rs1[i]); // dividend
4244 signed char d2=get_reg(i_regs->regmap,rs2[i]); // divisor
4245 assert(d1>=0);
4246 assert(d2>=0);
4247 signed char quotient=get_reg(i_regs->regmap,LOREG);
4248 signed char remainder=get_reg(i_regs->regmap,HIREG);
4249 assert(quotient>=0);
4250 assert(remainder>=0);
44a80f6a 4251 emit_mov(d1,remainder);
4252 emit_movimm(0xffffffff,quotient); // div0 case
57871462 4253 emit_test(d2,d2);
44a80f6a 4254 emit_jeq((int)out+40); // Division by zero
665f33e1 4255#ifdef HAVE_ARMV5
57871462 4256 emit_clz(d2,HOST_TEMPREG);
4257 emit_movimm(1<<31,quotient);
4258 emit_shl(d2,HOST_TEMPREG,d2);
665f33e1 4259#else
4260 emit_movimm(0,HOST_TEMPREG);
82336ba3 4261 emit_addpl_imm(HOST_TEMPREG,1,HOST_TEMPREG);
4262 emit_lslpls_imm(d2,1,d2);
665f33e1 4263 emit_jns((int)out-2*4);
4264 emit_movimm(1<<31,quotient);
4265#endif
57871462 4266 emit_shr(quotient,HOST_TEMPREG,quotient);
4267 emit_cmp(remainder,d2);
4268 emit_subcs(remainder,d2,remainder);
4269 emit_adcs(quotient,quotient,quotient);
4270 emit_shrcc_imm(d2,1,d2);
4271 emit_jcc((int)out-16); // -4
4272 }
4273 }
4274 else // 64-bit
71e490c5 4275 assert(0);
57871462 4276 }
4277 else
4278 {
4279 // Multiply by zero is zero.
4280 // MIPS does not have a divide by zero exception.
4281 // The result is undefined, we return zero.
4282 signed char hr=get_reg(i_regs->regmap,HIREG);
4283 signed char lr=get_reg(i_regs->regmap,LOREG);
4284 if(hr>=0) emit_zeroreg(hr);
4285 if(lr>=0) emit_zeroreg(lr);
4286 }
4287}
4288#define multdiv_assemble multdiv_assemble_arm
4289
4290void do_preload_rhash(int r) {
4291 // Don't need this for ARM. On x86, this puts the value 0xf8 into the
4292 // register. On ARM the hash can be done with a single instruction (below)
4293}
4294
4295void do_preload_rhtbl(int ht) {
4296 emit_addimm(FP,(int)&mini_ht-(int)&dynarec_local,ht);
4297}
4298
4299void do_rhash(int rs,int rh) {
4300 emit_andimm(rs,0xf8,rh);
4301}
4302
4303void do_miniht_load(int ht,int rh) {
4304 assem_debug("ldr %s,[%s,%s]!\n",regname[rh],regname[ht],regname[rh]);
4305 output_w32(0xe7b00000|rd_rn_rm(rh,ht,rh));
4306}
4307
4308void do_miniht_jump(int rs,int rh,int ht) {
4309 emit_cmp(rh,rs);
4310 emit_ldreq_indexed(ht,4,15);
4311 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
4312 emit_mov(rs,7);
4313 emit_jmp(jump_vaddr_reg[7]);
4314 #else
4315 emit_jmp(jump_vaddr_reg[rs]);
4316 #endif
4317}
4318
4319void do_miniht_insert(u_int return_address,int rt,int temp) {
665f33e1 4320 #ifndef HAVE_ARMV7
57871462 4321 emit_movimm(return_address,rt); // PC into link register
4322 add_to_linker((int)out,return_address,1);
4323 emit_pcreladdr(temp);
4324 emit_writeword(rt,(int)&mini_ht[(return_address&0xFF)>>3][0]);
4325 emit_writeword(temp,(int)&mini_ht[(return_address&0xFF)>>3][1]);
4326 #else
4327 emit_movw(return_address&0x0000FFFF,rt);
4328 add_to_linker((int)out,return_address,1);
4329 emit_pcreladdr(temp);
4330 emit_writeword(temp,(int)&mini_ht[(return_address&0xFF)>>3][1]);
4331 emit_movt(return_address&0xFFFF0000,rt);
4332 emit_writeword(rt,(int)&mini_ht[(return_address&0xFF)>>3][0]);
4333 #endif
4334}
4335
57871462 4336void wb_valid(signed char pre[],signed char entry[],u_int dirty_pre,u_int dirty,uint64_t is32_pre,uint64_t u,uint64_t uu)
4337{
4338 //if(dirty_pre==dirty) return;
4339 int hr,reg,new_hr;
4340 for(hr=0;hr<HOST_REGS;hr++) {
4341 if(hr!=EXCLUDE_REG) {
4342 reg=pre[hr];
4343 if(((~u)>>(reg&63))&1) {
f776eb14 4344 if(reg>0) {
57871462 4345 if(((dirty_pre&~dirty)>>hr)&1) {
4346 if(reg>0&&reg<34) {
4347 emit_storereg(reg,hr);
4348 if( ((is32_pre&~uu)>>reg)&1 ) {
4349 emit_sarimm(hr,31,HOST_TEMPREG);
4350 emit_storereg(reg|64,HOST_TEMPREG);
4351 }
4352 }
4353 else if(reg>=64) {
4354 emit_storereg(reg,hr);
4355 }
4356 }
4357 }
57871462 4358 }
4359 }
4360 }
4361}
4362
4363
4364/* using strd could possibly help but you'd have to allocate registers in pairs
4365void wb_invalidate_arm(signed char pre[],signed char entry[],uint64_t dirty,uint64_t is32,uint64_t u,uint64_t uu)
4366{
4367 int hr;
4368 int wrote=-1;
4369 for(hr=HOST_REGS-1;hr>=0;hr--) {
4370 if(hr!=EXCLUDE_REG) {
4371 if(pre[hr]!=entry[hr]) {
4372 if(pre[hr]>=0) {
4373 if((dirty>>hr)&1) {
4374 if(get_reg(entry,pre[hr])<0) {
4375 if(pre[hr]<64) {
4376 if(!((u>>pre[hr])&1)) {
4377 if(hr<10&&(~hr&1)&&(pre[hr+1]<0||wrote==hr+1)) {
4378 if( ((is32>>pre[hr])&1) && !((uu>>pre[hr])&1) ) {
4379 emit_sarimm(hr,31,hr+1);
4380 emit_strdreg(pre[hr],hr);
4381 }
4382 else
4383 emit_storereg(pre[hr],hr);
4384 }else{
4385 emit_storereg(pre[hr],hr);
4386 if( ((is32>>pre[hr])&1) && !((uu>>pre[hr])&1) ) {
4387 emit_sarimm(hr,31,hr);
4388 emit_storereg(pre[hr]|64,hr);
4389 }
4390 }
4391 }
4392 }else{
4393 if(!((uu>>(pre[hr]&63))&1) && !((is32>>(pre[hr]&63))&1)) {
4394 emit_storereg(pre[hr],hr);
4395 }
4396 }
4397 wrote=hr;
4398 }
4399 }
4400 }
4401 }
4402 }
4403 }
4404 for(hr=0;hr<HOST_REGS;hr++) {
4405 if(hr!=EXCLUDE_REG) {
4406 if(pre[hr]!=entry[hr]) {
4407 if(pre[hr]>=0) {
4408 int nr;
4409 if((nr=get_reg(entry,pre[hr]))>=0) {
4410 emit_mov(hr,nr);
4411 }
4412 }
4413 }
4414 }
4415 }
4416}
4417#define wb_invalidate wb_invalidate_arm
4418*/
4419
dd3a91a1 4420// Clearing the cache is rather slow on ARM Linux, so mark the areas
4421// that need to be cleared, and then only clear these areas once.
4422void do_clear_cache()
4423{
4424 int i,j;
4425 for (i=0;i<(1<<(TARGET_SIZE_2-17));i++)
4426 {
4427 u_int bitmap=needs_clear_cache[i];
4428 if(bitmap) {
4429 u_int start,end;
4430 for(j=0;j<32;j++)
4431 {
4432 if(bitmap&(1<<j)) {
bdeade46 4433 start=(u_int)BASE_ADDR+i*131072+j*4096;
dd3a91a1 4434 end=start+4095;
4435 j++;
4436 while(j<32) {
4437 if(bitmap&(1<<j)) {
4438 end+=4096;
4439 j++;
4440 }else{
4441 __clear_cache((void *)start,(void *)end);
4442 break;
4443 }
4444 }
4445 }
4446 }
4447 needs_clear_cache[i]=0;
4448 }
4449 }
4450}
4451
57871462 4452// CPU-architecture-specific initialization
71e490c5 4453static void arch_init() {
57871462 4454}
b9b61529 4455
4456// vim:shiftwidth=2:expandtab