drc: drop less obvious dead code
[pcsx_rearmed.git] / libpcsxcore / new_dynarec / assem_arm.c
CommitLineData
57871462 1/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
c6c3b1b3 2 * Mupen64plus/PCSX - assem_arm.c *
20d507ba 3 * Copyright (C) 2009-2011 Ari64 *
c6c3b1b3 4 * Copyright (C) 2010-2011 GraÅžvydas "notaz" Ignotas *
57871462 5 * *
6 * This program is free software; you can redistribute it and/or modify *
7 * it under the terms of the GNU General Public License as published by *
8 * the Free Software Foundation; either version 2 of the License, or *
9 * (at your option) any later version. *
10 * *
11 * This program is distributed in the hope that it will be useful, *
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
14 * GNU General Public License for more details. *
15 * *
16 * You should have received a copy of the GNU General Public License *
17 * along with this program; if not, write to the *
18 * Free Software Foundation, Inc., *
19 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
20 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
21
6c0eefaf 22#include "../gte.h"
23#define FLAGLESS
24#include "../gte.h"
25#undef FLAGLESS
054175e9 26#include "../gte_arm.h"
27#include "../gte_neon.h"
28#include "pcnt.h"
665f33e1 29#include "arm_features.h"
054175e9 30
a327ad27 31#if !BASE_ADDR_FIXED
bdeade46 32char translation_cache[1 << TARGET_SIZE_2] __attribute__((aligned(4096)));
33#endif
34
4d646738 35#ifndef __MACH__
36#define CALLER_SAVE_REGS 0x100f
37#else
38#define CALLER_SAVE_REGS 0x120f
39#endif
40
57871462 41extern int cycle_count;
42extern int last_count;
43extern int pcaddr;
44extern int pending_exception;
45extern int branch_target;
46extern uint64_t readmem_dword;
57871462 47extern void *dynarec_local;
57871462 48extern u_int mini_ht[32][2];
57871462 49
50void indirect_jump_indexed();
51void indirect_jump();
52void do_interrupt();
53void jump_vaddr_r0();
54void jump_vaddr_r1();
55void jump_vaddr_r2();
56void jump_vaddr_r3();
57void jump_vaddr_r4();
58void jump_vaddr_r5();
59void jump_vaddr_r6();
60void jump_vaddr_r7();
61void jump_vaddr_r8();
62void jump_vaddr_r9();
63void jump_vaddr_r10();
64void jump_vaddr_r12();
65
66const u_int jump_vaddr_reg[16] = {
67 (int)jump_vaddr_r0,
68 (int)jump_vaddr_r1,
69 (int)jump_vaddr_r2,
70 (int)jump_vaddr_r3,
71 (int)jump_vaddr_r4,
72 (int)jump_vaddr_r5,
73 (int)jump_vaddr_r6,
74 (int)jump_vaddr_r7,
75 (int)jump_vaddr_r8,
76 (int)jump_vaddr_r9,
77 (int)jump_vaddr_r10,
78 0,
79 (int)jump_vaddr_r12,
80 0,
81 0,
82 0};
83
0bbd1454 84void invalidate_addr_r0();
85void invalidate_addr_r1();
86void invalidate_addr_r2();
87void invalidate_addr_r3();
88void invalidate_addr_r4();
89void invalidate_addr_r5();
90void invalidate_addr_r6();
91void invalidate_addr_r7();
92void invalidate_addr_r8();
93void invalidate_addr_r9();
94void invalidate_addr_r10();
95void invalidate_addr_r12();
96
97const u_int invalidate_addr_reg[16] = {
98 (int)invalidate_addr_r0,
99 (int)invalidate_addr_r1,
100 (int)invalidate_addr_r2,
101 (int)invalidate_addr_r3,
102 (int)invalidate_addr_r4,
103 (int)invalidate_addr_r5,
104 (int)invalidate_addr_r6,
105 (int)invalidate_addr_r7,
106 (int)invalidate_addr_r8,
107 (int)invalidate_addr_r9,
108 (int)invalidate_addr_r10,
109 0,
110 (int)invalidate_addr_r12,
111 0,
112 0,
113 0};
114
dd3a91a1 115unsigned int needs_clear_cache[1<<(TARGET_SIZE_2-17)];
116
57871462 117/* Linker */
118
119void set_jump_target(int addr,u_int target)
120{
121 u_char *ptr=(u_char *)addr;
122 u_int *ptr2=(u_int *)ptr;
123 if(ptr[3]==0xe2) {
124 assert((target-(u_int)ptr2-8)<1024);
125 assert((addr&3)==0);
126 assert((target&3)==0);
127 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
128 //printf("target=%x addr=%x insn=%x\n",target,addr,*ptr2);
129 }
130 else if(ptr[3]==0x72) {
131 // generated by emit_jno_unlikely
132 if((target-(u_int)ptr2-8)<1024) {
133 assert((addr&3)==0);
134 assert((target&3)==0);
135 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
136 }
137 else if((target-(u_int)ptr2-8)<4096&&!((target-(u_int)ptr2-8)&15)) {
138 assert((addr&3)==0);
139 assert((target&3)==0);
140 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>4)|0xE00;
141 }
142 else *ptr2=(0x7A000000)|(((target-(u_int)ptr2-8)<<6)>>8);
143 }
144 else {
145 assert((ptr[3]&0x0e)==0xa);
146 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
147 }
148}
149
150// This optionally copies the instruction from the target of the branch into
151// the space before the branch. Works, but the difference in speed is
152// usually insignificant.
153void set_jump_target_fillslot(int addr,u_int target,int copy)
154{
155 u_char *ptr=(u_char *)addr;
156 u_int *ptr2=(u_int *)ptr;
157 assert(!copy||ptr2[-1]==0xe28dd000);
158 if(ptr[3]==0xe2) {
159 assert(!copy);
160 assert((target-(u_int)ptr2-8)<4096);
161 *ptr2=(*ptr2&0xFFFFF000)|(target-(u_int)ptr2-8);
162 }
163 else {
164 assert((ptr[3]&0x0e)==0xa);
165 u_int target_insn=*(u_int *)target;
166 if((target_insn&0x0e100000)==0) { // ALU, no immediate, no flags
167 copy=0;
168 }
169 if((target_insn&0x0c100000)==0x04100000) { // Load
170 copy=0;
171 }
172 if(target_insn&0x08000000) {
173 copy=0;
174 }
175 if(copy) {
176 ptr2[-1]=target_insn;
177 target+=4;
178 }
179 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
180 }
181}
182
183/* Literal pool */
184add_literal(int addr,int val)
185{
15776b68 186 assert(literalcount<sizeof(literals)/sizeof(literals[0]));
57871462 187 literals[literalcount][0]=addr;
188 literals[literalcount][1]=val;
189 literalcount++;
190}
191
f76eeef9 192void *kill_pointer(void *stub)
57871462 193{
194 int *ptr=(int *)(stub+4);
195 assert((*ptr&0x0ff00000)==0x05900000);
196 u_int offset=*ptr&0xfff;
197 int **l_ptr=(void *)ptr+offset+8;
198 int *i_ptr=*l_ptr;
199 set_jump_target((int)i_ptr,(int)stub);
f76eeef9 200 return i_ptr;
57871462 201}
202
f968d35d 203// find where external branch is liked to using addr of it's stub:
204// get address that insn one after stub loads (dyna_linker arg1),
205// treat it as a pointer to branch insn,
206// return addr where that branch jumps to
57871462 207int get_pointer(void *stub)
208{
209 //printf("get_pointer(%x)\n",(int)stub);
210 int *ptr=(int *)(stub+4);
f968d35d 211 assert((*ptr&0x0fff0000)==0x059f0000);
57871462 212 u_int offset=*ptr&0xfff;
213 int **l_ptr=(void *)ptr+offset+8;
214 int *i_ptr=*l_ptr;
215 assert((*i_ptr&0x0f000000)==0x0a000000);
216 return (int)i_ptr+((*i_ptr<<8)>>6)+8;
217}
218
219// Find the "clean" entry point from a "dirty" entry point
220// by skipping past the call to verify_code
221u_int get_clean_addr(int addr)
222{
223 int *ptr=(int *)addr;
665f33e1 224 #ifndef HAVE_ARMV7
57871462 225 ptr+=4;
226 #else
227 ptr+=6;
228 #endif
229 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
230 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
231 ptr++;
232 if((*ptr&0xFF000000)==0xea000000) {
233 return (int)ptr+((*ptr<<8)>>6)+8; // follow jump
234 }
235 return (u_int)ptr;
236}
237
238int verify_dirty(int addr)
239{
240 u_int *ptr=(u_int *)addr;
665f33e1 241 #ifndef HAVE_ARMV7
57871462 242 // get from literal pool
15776b68 243 assert((*ptr&0xFFFF0000)==0xe59f0000);
57871462 244 u_int offset=*ptr&0xfff;
245 u_int *l_ptr=(void *)ptr+offset+8;
246 u_int source=l_ptr[0];
247 u_int copy=l_ptr[1];
248 u_int len=l_ptr[2];
249 ptr+=4;
250 #else
251 // ARMv7 movw/movt
252 assert((*ptr&0xFFF00000)==0xe3000000);
253 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
254 u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
255 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
256 ptr+=6;
257 #endif
258 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
259 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
57871462 260 //printf("verify_dirty: %x %x %x\n",source,copy,len);
261 return !memcmp((void *)source,(void *)copy,len);
262}
263
264// This doesn't necessarily find all clean entry points, just
265// guarantees that it's not dirty
266int isclean(int addr)
267{
665f33e1 268 #ifndef HAVE_ARMV7
57871462 269 int *ptr=((u_int *)addr)+4;
270 #else
271 int *ptr=((u_int *)addr)+6;
272 #endif
273 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
274 if((*ptr&0xFF000000)!=0xeb000000) return 1; // bl instruction
275 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code) return 0;
276 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_vm) return 0;
277 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_ds) return 0;
278 return 1;
279}
280
4a35de07 281// get source that block at addr was compiled from (host pointers)
57871462 282void get_bounds(int addr,u_int *start,u_int *end)
283{
284 u_int *ptr=(u_int *)addr;
665f33e1 285 #ifndef HAVE_ARMV7
57871462 286 // get from literal pool
15776b68 287 assert((*ptr&0xFFFF0000)==0xe59f0000);
57871462 288 u_int offset=*ptr&0xfff;
289 u_int *l_ptr=(void *)ptr+offset+8;
290 u_int source=l_ptr[0];
291 //u_int copy=l_ptr[1];
292 u_int len=l_ptr[2];
293 ptr+=4;
294 #else
295 // ARMv7 movw/movt
296 assert((*ptr&0xFFF00000)==0xe3000000);
297 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
298 //u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
299 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
300 ptr+=6;
301 #endif
302 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
303 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
57871462 304 *start=source;
305 *end=source+len;
306}
307
308/* Register allocation */
309
310// Note: registers are allocated clean (unmodified state)
311// if you intend to modify the register, you must call dirty_reg().
312void alloc_reg(struct regstat *cur,int i,signed char reg)
313{
314 int r,hr;
315 int preferred_reg = (reg&7);
316 if(reg==CCREG) preferred_reg=HOST_CCREG;
317 if(reg==PTEMP||reg==FTEMP) preferred_reg=12;
318
319 // Don't allocate unused registers
320 if((cur->u>>reg)&1) return;
321
322 // see if it's already allocated
323 for(hr=0;hr<HOST_REGS;hr++)
324 {
325 if(cur->regmap[hr]==reg) return;
326 }
327
328 // Keep the same mapping if the register was already allocated in a loop
329 preferred_reg = loop_reg(i,reg,preferred_reg);
330
331 // Try to allocate the preferred register
332 if(cur->regmap[preferred_reg]==-1) {
333 cur->regmap[preferred_reg]=reg;
334 cur->dirty&=~(1<<preferred_reg);
335 cur->isconst&=~(1<<preferred_reg);
336 return;
337 }
338 r=cur->regmap[preferred_reg];
339 if(r<64&&((cur->u>>r)&1)) {
340 cur->regmap[preferred_reg]=reg;
341 cur->dirty&=~(1<<preferred_reg);
342 cur->isconst&=~(1<<preferred_reg);
343 return;
344 }
345 if(r>=64&&((cur->uu>>(r&63))&1)) {
346 cur->regmap[preferred_reg]=reg;
347 cur->dirty&=~(1<<preferred_reg);
348 cur->isconst&=~(1<<preferred_reg);
349 return;
350 }
351
352 // Clear any unneeded registers
353 // We try to keep the mapping consistent, if possible, because it
354 // makes branches easier (especially loops). So we try to allocate
355 // first (see above) before removing old mappings. If this is not
356 // possible then go ahead and clear out the registers that are no
357 // longer needed.
358 for(hr=0;hr<HOST_REGS;hr++)
359 {
360 r=cur->regmap[hr];
361 if(r>=0) {
362 if(r<64) {
363 if((cur->u>>r)&1) {cur->regmap[hr]=-1;break;}
364 }
365 else
366 {
367 if((cur->uu>>(r&63))&1) {cur->regmap[hr]=-1;break;}
368 }
369 }
370 }
371 // Try to allocate any available register, but prefer
372 // registers that have not been used recently.
373 if(i>0) {
374 for(hr=0;hr<HOST_REGS;hr++) {
375 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
376 if(regs[i-1].regmap[hr]!=rs1[i-1]&&regs[i-1].regmap[hr]!=rs2[i-1]&&regs[i-1].regmap[hr]!=rt1[i-1]&&regs[i-1].regmap[hr]!=rt2[i-1]) {
377 cur->regmap[hr]=reg;
378 cur->dirty&=~(1<<hr);
379 cur->isconst&=~(1<<hr);
380 return;
381 }
382 }
383 }
384 }
385 // Try to allocate any available register
386 for(hr=0;hr<HOST_REGS;hr++) {
387 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
388 cur->regmap[hr]=reg;
389 cur->dirty&=~(1<<hr);
390 cur->isconst&=~(1<<hr);
391 return;
392 }
393 }
394
395 // Ok, now we have to evict someone
396 // Pick a register we hopefully won't need soon
397 u_char hsn[MAXREG+1];
398 memset(hsn,10,sizeof(hsn));
399 int j;
400 lsn(hsn,i,&preferred_reg);
401 //printf("eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",cur->regmap[0],cur->regmap[1],cur->regmap[2],cur->regmap[3],cur->regmap[5],cur->regmap[6],cur->regmap[7]);
402 //printf("hsn(%x): %d %d %d %d %d %d %d\n",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
403 if(i>0) {
404 // Don't evict the cycle count at entry points, otherwise the entry
405 // stub will have to write it.
406 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
407 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
408 for(j=10;j>=3;j--)
409 {
410 // Alloc preferred register if available
411 if(hsn[r=cur->regmap[preferred_reg]&63]==j) {
412 for(hr=0;hr<HOST_REGS;hr++) {
413 // Evict both parts of a 64-bit register
414 if((cur->regmap[hr]&63)==r) {
415 cur->regmap[hr]=-1;
416 cur->dirty&=~(1<<hr);
417 cur->isconst&=~(1<<hr);
418 }
419 }
420 cur->regmap[preferred_reg]=reg;
421 return;
422 }
423 for(r=1;r<=MAXREG;r++)
424 {
425 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
426 for(hr=0;hr<HOST_REGS;hr++) {
427 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
428 if(cur->regmap[hr]==r+64) {
429 cur->regmap[hr]=reg;
430 cur->dirty&=~(1<<hr);
431 cur->isconst&=~(1<<hr);
432 return;
433 }
434 }
435 }
436 for(hr=0;hr<HOST_REGS;hr++) {
437 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
438 if(cur->regmap[hr]==r) {
439 cur->regmap[hr]=reg;
440 cur->dirty&=~(1<<hr);
441 cur->isconst&=~(1<<hr);
442 return;
443 }
444 }
445 }
446 }
447 }
448 }
449 }
450 for(j=10;j>=0;j--)
451 {
452 for(r=1;r<=MAXREG;r++)
453 {
454 if(hsn[r]==j) {
455 for(hr=0;hr<HOST_REGS;hr++) {
456 if(cur->regmap[hr]==r+64) {
457 cur->regmap[hr]=reg;
458 cur->dirty&=~(1<<hr);
459 cur->isconst&=~(1<<hr);
460 return;
461 }
462 }
463 for(hr=0;hr<HOST_REGS;hr++) {
464 if(cur->regmap[hr]==r) {
465 cur->regmap[hr]=reg;
466 cur->dirty&=~(1<<hr);
467 cur->isconst&=~(1<<hr);
468 return;
469 }
470 }
471 }
472 }
473 }
c43b5311 474 SysPrintf("This shouldn't happen (alloc_reg)");exit(1);
57871462 475}
476
477void alloc_reg64(struct regstat *cur,int i,signed char reg)
478{
479 int preferred_reg = 8+(reg&1);
480 int r,hr;
481
482 // allocate the lower 32 bits
483 alloc_reg(cur,i,reg);
484
485 // Don't allocate unused registers
486 if((cur->uu>>reg)&1) return;
487
488 // see if the upper half is already allocated
489 for(hr=0;hr<HOST_REGS;hr++)
490 {
491 if(cur->regmap[hr]==reg+64) return;
492 }
493
494 // Keep the same mapping if the register was already allocated in a loop
495 preferred_reg = loop_reg(i,reg,preferred_reg);
496
497 // Try to allocate the preferred register
498 if(cur->regmap[preferred_reg]==-1) {
499 cur->regmap[preferred_reg]=reg|64;
500 cur->dirty&=~(1<<preferred_reg);
501 cur->isconst&=~(1<<preferred_reg);
502 return;
503 }
504 r=cur->regmap[preferred_reg];
505 if(r<64&&((cur->u>>r)&1)) {
506 cur->regmap[preferred_reg]=reg|64;
507 cur->dirty&=~(1<<preferred_reg);
508 cur->isconst&=~(1<<preferred_reg);
509 return;
510 }
511 if(r>=64&&((cur->uu>>(r&63))&1)) {
512 cur->regmap[preferred_reg]=reg|64;
513 cur->dirty&=~(1<<preferred_reg);
514 cur->isconst&=~(1<<preferred_reg);
515 return;
516 }
517
518 // Clear any unneeded registers
519 // We try to keep the mapping consistent, if possible, because it
520 // makes branches easier (especially loops). So we try to allocate
521 // first (see above) before removing old mappings. If this is not
522 // possible then go ahead and clear out the registers that are no
523 // longer needed.
524 for(hr=HOST_REGS-1;hr>=0;hr--)
525 {
526 r=cur->regmap[hr];
527 if(r>=0) {
528 if(r<64) {
529 if((cur->u>>r)&1) {cur->regmap[hr]=-1;break;}
530 }
531 else
532 {
533 if((cur->uu>>(r&63))&1) {cur->regmap[hr]=-1;break;}
534 }
535 }
536 }
537 // Try to allocate any available register, but prefer
538 // registers that have not been used recently.
539 if(i>0) {
540 for(hr=0;hr<HOST_REGS;hr++) {
541 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
542 if(regs[i-1].regmap[hr]!=rs1[i-1]&&regs[i-1].regmap[hr]!=rs2[i-1]&&regs[i-1].regmap[hr]!=rt1[i-1]&&regs[i-1].regmap[hr]!=rt2[i-1]) {
543 cur->regmap[hr]=reg|64;
544 cur->dirty&=~(1<<hr);
545 cur->isconst&=~(1<<hr);
546 return;
547 }
548 }
549 }
550 }
551 // Try to allocate any available register
552 for(hr=0;hr<HOST_REGS;hr++) {
553 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
554 cur->regmap[hr]=reg|64;
555 cur->dirty&=~(1<<hr);
556 cur->isconst&=~(1<<hr);
557 return;
558 }
559 }
560
561 // Ok, now we have to evict someone
562 // Pick a register we hopefully won't need soon
563 u_char hsn[MAXREG+1];
564 memset(hsn,10,sizeof(hsn));
565 int j;
566 lsn(hsn,i,&preferred_reg);
567 //printf("eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",cur->regmap[0],cur->regmap[1],cur->regmap[2],cur->regmap[3],cur->regmap[5],cur->regmap[6],cur->regmap[7]);
568 //printf("hsn(%x): %d %d %d %d %d %d %d\n",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
569 if(i>0) {
570 // Don't evict the cycle count at entry points, otherwise the entry
571 // stub will have to write it.
572 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
573 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
574 for(j=10;j>=3;j--)
575 {
576 // Alloc preferred register if available
577 if(hsn[r=cur->regmap[preferred_reg]&63]==j) {
578 for(hr=0;hr<HOST_REGS;hr++) {
579 // Evict both parts of a 64-bit register
580 if((cur->regmap[hr]&63)==r) {
581 cur->regmap[hr]=-1;
582 cur->dirty&=~(1<<hr);
583 cur->isconst&=~(1<<hr);
584 }
585 }
586 cur->regmap[preferred_reg]=reg|64;
587 return;
588 }
589 for(r=1;r<=MAXREG;r++)
590 {
591 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
592 for(hr=0;hr<HOST_REGS;hr++) {
593 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
594 if(cur->regmap[hr]==r+64) {
595 cur->regmap[hr]=reg|64;
596 cur->dirty&=~(1<<hr);
597 cur->isconst&=~(1<<hr);
598 return;
599 }
600 }
601 }
602 for(hr=0;hr<HOST_REGS;hr++) {
603 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
604 if(cur->regmap[hr]==r) {
605 cur->regmap[hr]=reg|64;
606 cur->dirty&=~(1<<hr);
607 cur->isconst&=~(1<<hr);
608 return;
609 }
610 }
611 }
612 }
613 }
614 }
615 }
616 for(j=10;j>=0;j--)
617 {
618 for(r=1;r<=MAXREG;r++)
619 {
620 if(hsn[r]==j) {
621 for(hr=0;hr<HOST_REGS;hr++) {
622 if(cur->regmap[hr]==r+64) {
623 cur->regmap[hr]=reg|64;
624 cur->dirty&=~(1<<hr);
625 cur->isconst&=~(1<<hr);
626 return;
627 }
628 }
629 for(hr=0;hr<HOST_REGS;hr++) {
630 if(cur->regmap[hr]==r) {
631 cur->regmap[hr]=reg|64;
632 cur->dirty&=~(1<<hr);
633 cur->isconst&=~(1<<hr);
634 return;
635 }
636 }
637 }
638 }
639 }
c43b5311 640 SysPrintf("This shouldn't happen");exit(1);
57871462 641}
642
643// Allocate a temporary register. This is done without regard to
644// dirty status or whether the register we request is on the unneeded list
645// Note: This will only allocate one register, even if called multiple times
646void alloc_reg_temp(struct regstat *cur,int i,signed char reg)
647{
648 int r,hr;
649 int preferred_reg = -1;
650
651 // see if it's already allocated
652 for(hr=0;hr<HOST_REGS;hr++)
653 {
654 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==reg) return;
655 }
656
657 // Try to allocate any available register
658 for(hr=HOST_REGS-1;hr>=0;hr--) {
659 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
660 cur->regmap[hr]=reg;
661 cur->dirty&=~(1<<hr);
662 cur->isconst&=~(1<<hr);
663 return;
664 }
665 }
666
667 // Find an unneeded register
668 for(hr=HOST_REGS-1;hr>=0;hr--)
669 {
670 r=cur->regmap[hr];
671 if(r>=0) {
672 if(r<64) {
673 if((cur->u>>r)&1) {
674 if(i==0||((unneeded_reg[i-1]>>r)&1)) {
675 cur->regmap[hr]=reg;
676 cur->dirty&=~(1<<hr);
677 cur->isconst&=~(1<<hr);
678 return;
679 }
680 }
681 }
682 else
683 {
684 if((cur->uu>>(r&63))&1) {
685 if(i==0||((unneeded_reg_upper[i-1]>>(r&63))&1)) {
686 cur->regmap[hr]=reg;
687 cur->dirty&=~(1<<hr);
688 cur->isconst&=~(1<<hr);
689 return;
690 }
691 }
692 }
693 }
694 }
695
696 // Ok, now we have to evict someone
697 // Pick a register we hopefully won't need soon
698 // TODO: we might want to follow unconditional jumps here
699 // TODO: get rid of dupe code and make this into a function
700 u_char hsn[MAXREG+1];
701 memset(hsn,10,sizeof(hsn));
702 int j;
703 lsn(hsn,i,&preferred_reg);
704 //printf("hsn: %d %d %d %d %d %d %d\n",hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
705 if(i>0) {
706 // Don't evict the cycle count at entry points, otherwise the entry
707 // stub will have to write it.
708 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
709 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
710 for(j=10;j>=3;j--)
711 {
712 for(r=1;r<=MAXREG;r++)
713 {
714 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
715 for(hr=0;hr<HOST_REGS;hr++) {
716 if(hr!=HOST_CCREG||hsn[CCREG]>2) {
717 if(cur->regmap[hr]==r+64) {
718 cur->regmap[hr]=reg;
719 cur->dirty&=~(1<<hr);
720 cur->isconst&=~(1<<hr);
721 return;
722 }
723 }
724 }
725 for(hr=0;hr<HOST_REGS;hr++) {
726 if(hr!=HOST_CCREG||hsn[CCREG]>2) {
727 if(cur->regmap[hr]==r) {
728 cur->regmap[hr]=reg;
729 cur->dirty&=~(1<<hr);
730 cur->isconst&=~(1<<hr);
731 return;
732 }
733 }
734 }
735 }
736 }
737 }
738 }
739 for(j=10;j>=0;j--)
740 {
741 for(r=1;r<=MAXREG;r++)
742 {
743 if(hsn[r]==j) {
744 for(hr=0;hr<HOST_REGS;hr++) {
745 if(cur->regmap[hr]==r+64) {
746 cur->regmap[hr]=reg;
747 cur->dirty&=~(1<<hr);
748 cur->isconst&=~(1<<hr);
749 return;
750 }
751 }
752 for(hr=0;hr<HOST_REGS;hr++) {
753 if(cur->regmap[hr]==r) {
754 cur->regmap[hr]=reg;
755 cur->dirty&=~(1<<hr);
756 cur->isconst&=~(1<<hr);
757 return;
758 }
759 }
760 }
761 }
762 }
c43b5311 763 SysPrintf("This shouldn't happen");exit(1);
57871462 764}
765// Allocate a specific ARM register.
766void alloc_arm_reg(struct regstat *cur,int i,signed char reg,char hr)
767{
768 int n;
f776eb14 769 int dirty=0;
57871462 770
771 // see if it's already allocated (and dealloc it)
772 for(n=0;n<HOST_REGS;n++)
773 {
f776eb14 774 if(n!=EXCLUDE_REG&&cur->regmap[n]==reg) {
775 dirty=(cur->dirty>>n)&1;
776 cur->regmap[n]=-1;
777 }
57871462 778 }
779
780 cur->regmap[hr]=reg;
781 cur->dirty&=~(1<<hr);
f776eb14 782 cur->dirty|=dirty<<hr;
57871462 783 cur->isconst&=~(1<<hr);
784}
785
786// Alloc cycle count into dedicated register
787alloc_cc(struct regstat *cur,int i)
788{
789 alloc_arm_reg(cur,i,CCREG,HOST_CCREG);
790}
791
792/* Special alloc */
793
794
795/* Assembler */
796
797char regname[16][4] = {
798 "r0",
799 "r1",
800 "r2",
801 "r3",
802 "r4",
803 "r5",
804 "r6",
805 "r7",
806 "r8",
807 "r9",
808 "r10",
809 "fp",
810 "r12",
811 "sp",
812 "lr",
813 "pc"};
814
57871462 815void output_w32(u_int word)
816{
817 *((u_int *)out)=word;
818 out+=4;
819}
820u_int rd_rn_rm(u_int rd, u_int rn, u_int rm)
821{
822 assert(rd<16);
823 assert(rn<16);
824 assert(rm<16);
825 return((rn<<16)|(rd<<12)|rm);
826}
827u_int rd_rn_imm_shift(u_int rd, u_int rn, u_int imm, u_int shift)
828{
829 assert(rd<16);
830 assert(rn<16);
831 assert(imm<256);
832 assert((shift&1)==0);
833 return((rn<<16)|(rd<<12)|(((32-shift)&30)<<7)|imm);
834}
835u_int genimm(u_int imm,u_int *encoded)
836{
c2e3bd42 837 *encoded=0;
838 if(imm==0) return 1;
57871462 839 int i=32;
840 while(i>0)
841 {
842 if(imm<256) {
843 *encoded=((i&30)<<7)|imm;
844 return 1;
845 }
846 imm=(imm>>2)|(imm<<30);i-=2;
847 }
848 return 0;
849}
cfbd3c6e 850void genimm_checked(u_int imm,u_int *encoded)
851{
852 u_int ret=genimm(imm,encoded);
853 assert(ret);
854}
57871462 855u_int genjmp(u_int addr)
856{
857 int offset=addr-(int)out-8;
e80343e2 858 if(offset<-33554432||offset>=33554432) {
859 if (addr>2) {
c43b5311 860 SysPrintf("genjmp: out of range: %08x\n", offset);
e80343e2 861 exit(1);
862 }
863 return 0;
864 }
57871462 865 return ((u_int)offset>>2)&0xffffff;
866}
867
868void emit_mov(int rs,int rt)
869{
870 assem_debug("mov %s,%s\n",regname[rt],regname[rs]);
871 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs));
872}
873
874void emit_movs(int rs,int rt)
875{
876 assem_debug("movs %s,%s\n",regname[rt],regname[rs]);
877 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs));
878}
879
880void emit_add(int rs1,int rs2,int rt)
881{
882 assem_debug("add %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
883 output_w32(0xe0800000|rd_rn_rm(rt,rs1,rs2));
884}
885
886void emit_adds(int rs1,int rs2,int rt)
887{
888 assem_debug("adds %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
889 output_w32(0xe0900000|rd_rn_rm(rt,rs1,rs2));
890}
891
892void emit_adcs(int rs1,int rs2,int rt)
893{
894 assem_debug("adcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
895 output_w32(0xe0b00000|rd_rn_rm(rt,rs1,rs2));
896}
897
898void emit_sbc(int rs1,int rs2,int rt)
899{
900 assem_debug("sbc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
901 output_w32(0xe0c00000|rd_rn_rm(rt,rs1,rs2));
902}
903
904void emit_sbcs(int rs1,int rs2,int rt)
905{
906 assem_debug("sbcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
907 output_w32(0xe0d00000|rd_rn_rm(rt,rs1,rs2));
908}
909
910void emit_neg(int rs, int rt)
911{
912 assem_debug("rsb %s,%s,#0\n",regname[rt],regname[rs]);
913 output_w32(0xe2600000|rd_rn_rm(rt,rs,0));
914}
915
916void emit_negs(int rs, int rt)
917{
918 assem_debug("rsbs %s,%s,#0\n",regname[rt],regname[rs]);
919 output_w32(0xe2700000|rd_rn_rm(rt,rs,0));
920}
921
922void emit_sub(int rs1,int rs2,int rt)
923{
924 assem_debug("sub %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
925 output_w32(0xe0400000|rd_rn_rm(rt,rs1,rs2));
926}
927
928void emit_subs(int rs1,int rs2,int rt)
929{
930 assem_debug("subs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
931 output_w32(0xe0500000|rd_rn_rm(rt,rs1,rs2));
932}
933
934void emit_zeroreg(int rt)
935{
936 assem_debug("mov %s,#0\n",regname[rt]);
937 output_w32(0xe3a00000|rd_rn_rm(rt,0,0));
938}
939
790ee18e 940void emit_loadlp(u_int imm,u_int rt)
941{
942 add_literal((int)out,imm);
943 assem_debug("ldr %s,pc+? [=%x]\n",regname[rt],imm);
944 output_w32(0xe5900000|rd_rn_rm(rt,15,0));
945}
946void emit_movw(u_int imm,u_int rt)
947{
948 assert(imm<65536);
949 assem_debug("movw %s,#%d (0x%x)\n",regname[rt],imm,imm);
950 output_w32(0xe3000000|rd_rn_rm(rt,0,0)|(imm&0xfff)|((imm<<4)&0xf0000));
951}
952void emit_movt(u_int imm,u_int rt)
953{
954 assem_debug("movt %s,#%d (0x%x)\n",regname[rt],imm&0xffff0000,imm&0xffff0000);
955 output_w32(0xe3400000|rd_rn_rm(rt,0,0)|((imm>>16)&0xfff)|((imm>>12)&0xf0000));
956}
957void emit_movimm(u_int imm,u_int rt)
958{
959 u_int armval;
960 if(genimm(imm,&armval)) {
961 assem_debug("mov %s,#%d\n",regname[rt],imm);
962 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
963 }else if(genimm(~imm,&armval)) {
964 assem_debug("mvn %s,#%d\n",regname[rt],imm);
965 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
966 }else if(imm<65536) {
665f33e1 967 #ifndef HAVE_ARMV7
790ee18e 968 assem_debug("mov %s,#%d\n",regname[rt],imm&0xFF00);
969 output_w32(0xe3a00000|rd_rn_imm_shift(rt,0,imm>>8,8));
970 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
971 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
972 #else
973 emit_movw(imm,rt);
974 #endif
975 }else{
665f33e1 976 #ifndef HAVE_ARMV7
790ee18e 977 emit_loadlp(imm,rt);
978 #else
979 emit_movw(imm&0x0000FFFF,rt);
980 emit_movt(imm&0xFFFF0000,rt);
981 #endif
982 }
983}
984void emit_pcreladdr(u_int rt)
985{
986 assem_debug("add %s,pc,#?\n",regname[rt]);
987 output_w32(0xe2800000|rd_rn_rm(rt,15,0));
988}
989
57871462 990void emit_loadreg(int r, int hr)
991{
3d624f89 992 if(r&64) {
c43b5311 993 SysPrintf("64bit load in 32bit mode!\n");
7f2607ea 994 assert(0);
995 return;
3d624f89 996 }
57871462 997 if((r&63)==0)
998 emit_zeroreg(hr);
999 else {
3d624f89 1000 int addr=((int)reg)+((r&63)<<REG_SHIFT)+((r&64)>>4);
57871462 1001 if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4);
1002 if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4);
1003 if(r==CCREG) addr=(int)&cycle_count;
1004 if(r==CSREG) addr=(int)&Status;
1005 if(r==FSREG) addr=(int)&FCR31;
1006 if(r==INVCP) addr=(int)&invc_ptr;
1007 u_int offset = addr-(u_int)&dynarec_local;
1008 assert(offset<4096);
1009 assem_debug("ldr %s,fp+%d\n",regname[hr],offset);
1010 output_w32(0xe5900000|rd_rn_rm(hr,FP,0)|offset);
1011 }
1012}
1013void emit_storereg(int r, int hr)
1014{
3d624f89 1015 if(r&64) {
c43b5311 1016 SysPrintf("64bit store in 32bit mode!\n");
7f2607ea 1017 assert(0);
1018 return;
3d624f89 1019 }
3d624f89 1020 int addr=((int)reg)+((r&63)<<REG_SHIFT)+((r&64)>>4);
57871462 1021 if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4);
1022 if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4);
1023 if(r==CCREG) addr=(int)&cycle_count;
1024 if(r==FSREG) addr=(int)&FCR31;
1025 u_int offset = addr-(u_int)&dynarec_local;
1026 assert(offset<4096);
1027 assem_debug("str %s,fp+%d\n",regname[hr],offset);
1028 output_w32(0xe5800000|rd_rn_rm(hr,FP,0)|offset);
1029}
1030
1031void emit_test(int rs, int rt)
1032{
1033 assem_debug("tst %s,%s\n",regname[rs],regname[rt]);
1034 output_w32(0xe1100000|rd_rn_rm(0,rs,rt));
1035}
1036
1037void emit_testimm(int rs,int imm)
1038{
1039 u_int armval;
5a05d80c 1040 assem_debug("tst %s,#%d\n",regname[rs],imm);
cfbd3c6e 1041 genimm_checked(imm,&armval);
57871462 1042 output_w32(0xe3100000|rd_rn_rm(0,rs,0)|armval);
1043}
1044
b9b61529 1045void emit_testeqimm(int rs,int imm)
1046{
1047 u_int armval;
1048 assem_debug("tsteq %s,$%d\n",regname[rs],imm);
cfbd3c6e 1049 genimm_checked(imm,&armval);
b9b61529 1050 output_w32(0x03100000|rd_rn_rm(0,rs,0)|armval);
1051}
1052
57871462 1053void emit_not(int rs,int rt)
1054{
1055 assem_debug("mvn %s,%s\n",regname[rt],regname[rs]);
1056 output_w32(0xe1e00000|rd_rn_rm(rt,0,rs));
1057}
1058
b9b61529 1059void emit_mvnmi(int rs,int rt)
1060{
1061 assem_debug("mvnmi %s,%s\n",regname[rt],regname[rs]);
1062 output_w32(0x41e00000|rd_rn_rm(rt,0,rs));
1063}
1064
57871462 1065void emit_and(u_int rs1,u_int rs2,u_int rt)
1066{
1067 assem_debug("and %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1068 output_w32(0xe0000000|rd_rn_rm(rt,rs1,rs2));
1069}
1070
1071void emit_or(u_int rs1,u_int rs2,u_int rt)
1072{
1073 assem_debug("orr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1074 output_w32(0xe1800000|rd_rn_rm(rt,rs1,rs2));
1075}
1076void emit_or_and_set_flags(int rs1,int rs2,int rt)
1077{
1078 assem_debug("orrs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1079 output_w32(0xe1900000|rd_rn_rm(rt,rs1,rs2));
1080}
1081
f70d384d 1082void emit_orrshl_imm(u_int rs,u_int imm,u_int rt)
1083{
1084 assert(rs<16);
1085 assert(rt<16);
1086 assert(imm<32);
1087 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs],imm);
1088 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|(imm<<7));
1089}
1090
576bbd8f 1091void emit_orrshr_imm(u_int rs,u_int imm,u_int rt)
1092{
1093 assert(rs<16);
1094 assert(rt<16);
1095 assert(imm<32);
1096 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs],imm);
1097 output_w32(0xe1800020|rd_rn_rm(rt,rt,rs)|(imm<<7));
1098}
1099
57871462 1100void emit_xor(u_int rs1,u_int rs2,u_int rt)
1101{
1102 assem_debug("eor %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1103 output_w32(0xe0200000|rd_rn_rm(rt,rs1,rs2));
1104}
1105
57871462 1106void emit_addimm(u_int rs,int imm,u_int rt)
1107{
1108 assert(rs<16);
1109 assert(rt<16);
1110 if(imm!=0) {
57871462 1111 u_int armval;
1112 if(genimm(imm,&armval)) {
1113 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],imm);
1114 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
1115 }else if(genimm(-imm,&armval)) {
8a0a8423 1116 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],-imm);
57871462 1117 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
397614d0 1118 #ifdef HAVE_ARMV7
1119 }else if(rt!=rs&&(u_int)imm<65536) {
1120 emit_movw(imm&0x0000ffff,rt);
1121 emit_add(rs,rt,rt);
1122 }else if(rt!=rs&&(u_int)-imm<65536) {
1123 emit_movw(-imm&0x0000ffff,rt);
1124 emit_sub(rs,rt,rt);
1125 #endif
1126 }else if((u_int)-imm<65536) {
57871462 1127 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],(-imm)&0xFF00);
1128 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
1129 output_w32(0xe2400000|rd_rn_imm_shift(rt,rs,(-imm)>>8,8));
1130 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
397614d0 1131 }else {
1132 do {
1133 int shift = (ffs(imm) - 1) & ~1;
1134 int imm8 = imm & (0xff << shift);
1135 genimm_checked(imm8,&armval);
1136 assem_debug("add %s,%s,#0x%x\n",regname[rt],regname[rs],imm8);
1137 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
1138 rs = rt;
1139 imm &= ~imm8;
1140 }
1141 while (imm != 0);
57871462 1142 }
1143 }
1144 else if(rs!=rt) emit_mov(rs,rt);
1145}
1146
1147void emit_addimm_and_set_flags(int imm,int rt)
1148{
1149 assert(imm>-65536&&imm<65536);
1150 u_int armval;
1151 if(genimm(imm,&armval)) {
1152 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm);
1153 output_w32(0xe2900000|rd_rn_rm(rt,rt,0)|armval);
1154 }else if(genimm(-imm,&armval)) {
1155 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],imm);
1156 output_w32(0xe2500000|rd_rn_rm(rt,rt,0)|armval);
1157 }else if(imm<0) {
1158 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF00);
1159 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
1160 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)>>8,8));
1161 output_w32(0xe2500000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
1162 }else{
1163 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF00);
1164 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
1165 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm>>8,8));
1166 output_w32(0xe2900000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1167 }
1168}
1169void emit_addimm_no_flags(u_int imm,u_int rt)
1170{
1171 emit_addimm(rt,imm,rt);
1172}
1173
1174void emit_addnop(u_int r)
1175{
1176 assert(r<16);
1177 assem_debug("add %s,%s,#0 (nop)\n",regname[r],regname[r]);
1178 output_w32(0xe2800000|rd_rn_rm(r,r,0));
1179}
1180
1181void emit_adcimm(u_int rs,int imm,u_int rt)
1182{
1183 u_int armval;
cfbd3c6e 1184 genimm_checked(imm,&armval);
57871462 1185 assem_debug("adc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1186 output_w32(0xe2a00000|rd_rn_rm(rt,rs,0)|armval);
1187}
1edfcc68 1188
57871462 1189void emit_rscimm(int rs,int imm,u_int rt)
1190{
1191 assert(0);
1192 u_int armval;
cfbd3c6e 1193 genimm_checked(imm,&armval);
57871462 1194 assem_debug("rsc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1195 output_w32(0xe2e00000|rd_rn_rm(rt,rs,0)|armval);
1196}
1197
1198void emit_addimm64_32(int rsh,int rsl,int imm,int rth,int rtl)
1199{
1200 // TODO: if(genimm(imm,&armval)) ...
1201 // else
1202 emit_movimm(imm,HOST_TEMPREG);
1203 emit_adds(HOST_TEMPREG,rsl,rtl);
1204 emit_adcimm(rsh,0,rth);
1205}
1206
57871462 1207void emit_andimm(int rs,int imm,int rt)
1208{
1209 u_int armval;
790ee18e 1210 if(imm==0) {
1211 emit_zeroreg(rt);
1212 }else if(genimm(imm,&armval)) {
57871462 1213 assem_debug("and %s,%s,#%d\n",regname[rt],regname[rs],imm);
1214 output_w32(0xe2000000|rd_rn_rm(rt,rs,0)|armval);
1215 }else if(genimm(~imm,&armval)) {
1216 assem_debug("bic %s,%s,#%d\n",regname[rt],regname[rs],imm);
1217 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|armval);
1218 }else if(imm==65535) {
332a4533 1219 #ifndef HAVE_ARMV6
57871462 1220 assem_debug("bic %s,%s,#FF000000\n",regname[rt],regname[rs]);
1221 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|0x4FF);
1222 assem_debug("bic %s,%s,#00FF0000\n",regname[rt],regname[rt]);
1223 output_w32(0xe3c00000|rd_rn_rm(rt,rt,0)|0x8FF);
1224 #else
1225 assem_debug("uxth %s,%s\n",regname[rt],regname[rs]);
1226 output_w32(0xe6ff0070|rd_rn_rm(rt,0,rs));
1227 #endif
1228 }else{
1229 assert(imm>0&&imm<65535);
665f33e1 1230 #ifndef HAVE_ARMV7
57871462 1231 assem_debug("mov r14,#%d\n",imm&0xFF00);
1232 output_w32(0xe3a00000|rd_rn_imm_shift(HOST_TEMPREG,0,imm>>8,8));
1233 assem_debug("add r14,r14,#%d\n",imm&0xFF);
1234 output_w32(0xe2800000|rd_rn_imm_shift(HOST_TEMPREG,HOST_TEMPREG,imm&0xff,0));
1235 #else
1236 emit_movw(imm,HOST_TEMPREG);
1237 #endif
1238 assem_debug("and %s,%s,r14\n",regname[rt],regname[rs]);
1239 output_w32(0xe0000000|rd_rn_rm(rt,rs,HOST_TEMPREG));
1240 }
1241}
1242
1243void emit_orimm(int rs,int imm,int rt)
1244{
1245 u_int armval;
790ee18e 1246 if(imm==0) {
1247 if(rs!=rt) emit_mov(rs,rt);
1248 }else if(genimm(imm,&armval)) {
57871462 1249 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1250 output_w32(0xe3800000|rd_rn_rm(rt,rs,0)|armval);
1251 }else{
1252 assert(imm>0&&imm<65536);
1253 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1254 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
1255 output_w32(0xe3800000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1256 output_w32(0xe3800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1257 }
1258}
1259
1260void emit_xorimm(int rs,int imm,int rt)
1261{
57871462 1262 u_int armval;
790ee18e 1263 if(imm==0) {
1264 if(rs!=rt) emit_mov(rs,rt);
1265 }else if(genimm(imm,&armval)) {
57871462 1266 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm);
1267 output_w32(0xe2200000|rd_rn_rm(rt,rs,0)|armval);
1268 }else{
514ed0d9 1269 assert(imm>0&&imm<65536);
57871462 1270 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1271 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
1272 output_w32(0xe2200000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1273 output_w32(0xe2200000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1274 }
1275}
1276
1277void emit_shlimm(int rs,u_int imm,int rt)
1278{
1279 assert(imm>0);
1280 assert(imm<32);
1281 //if(imm==1) ...
1282 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1283 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1284}
1285
c6c3b1b3 1286void emit_lsls_imm(int rs,int imm,int rt)
1287{
1288 assert(imm>0);
1289 assert(imm<32);
1290 assem_debug("lsls %s,%s,#%d\n",regname[rt],regname[rs],imm);
1291 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1292}
1293
665f33e1 1294void emit_lslpls_imm(int rs,int imm,int rt)
1295{
1296 assert(imm>0);
1297 assert(imm<32);
1298 assem_debug("lslpls %s,%s,#%d\n",regname[rt],regname[rs],imm);
1299 output_w32(0x51b00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1300}
1301
57871462 1302void emit_shrimm(int rs,u_int imm,int rt)
1303{
1304 assert(imm>0);
1305 assert(imm<32);
1306 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1307 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
1308}
1309
1310void emit_sarimm(int rs,u_int imm,int rt)
1311{
1312 assert(imm>0);
1313 assert(imm<32);
1314 assem_debug("asr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1315 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x40|(imm<<7));
1316}
1317
1318void emit_rorimm(int rs,u_int imm,int rt)
1319{
1320 assert(imm>0);
1321 assert(imm<32);
1322 assem_debug("ror %s,%s,#%d\n",regname[rt],regname[rs],imm);
1323 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x60|(imm<<7));
1324}
1325
1326void emit_shldimm(int rs,int rs2,u_int imm,int rt)
1327{
1328 assem_debug("shld %%%s,%%%s,%d\n",regname[rt],regname[rs2],imm);
1329 assert(imm>0);
1330 assert(imm<32);
1331 //if(imm==1) ...
1332 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1333 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1334 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs2],32-imm);
1335 output_w32(0xe1800020|rd_rn_rm(rt,rt,rs2)|((32-imm)<<7));
1336}
1337
1338void emit_shrdimm(int rs,int rs2,u_int imm,int rt)
1339{
1340 assem_debug("shrd %%%s,%%%s,%d\n",regname[rt],regname[rs2],imm);
1341 assert(imm>0);
1342 assert(imm<32);
1343 //if(imm==1) ...
1344 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1345 output_w32(0xe1a00020|rd_rn_rm(rt,0,rs)|(imm<<7));
1346 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs2],32-imm);
1347 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs2)|((32-imm)<<7));
1348}
1349
b9b61529 1350void emit_signextend16(int rs,int rt)
1351{
332a4533 1352 #ifndef HAVE_ARMV6
b9b61529 1353 emit_shlimm(rs,16,rt);
1354 emit_sarimm(rt,16,rt);
1355 #else
1356 assem_debug("sxth %s,%s\n",regname[rt],regname[rs]);
1357 output_w32(0xe6bf0070|rd_rn_rm(rt,0,rs));
1358 #endif
1359}
1360
c6c3b1b3 1361void emit_signextend8(int rs,int rt)
1362{
332a4533 1363 #ifndef HAVE_ARMV6
c6c3b1b3 1364 emit_shlimm(rs,24,rt);
1365 emit_sarimm(rt,24,rt);
1366 #else
1367 assem_debug("sxtb %s,%s\n",regname[rt],regname[rs]);
1368 output_w32(0xe6af0070|rd_rn_rm(rt,0,rs));
1369 #endif
1370}
1371
57871462 1372void emit_shl(u_int rs,u_int shift,u_int rt)
1373{
1374 assert(rs<16);
1375 assert(rt<16);
1376 assert(shift<16);
1377 //if(imm==1) ...
1378 assem_debug("lsl %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1379 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x10|(shift<<8));
1380}
1381void emit_shr(u_int rs,u_int shift,u_int rt)
1382{
1383 assert(rs<16);
1384 assert(rt<16);
1385 assert(shift<16);
1386 assem_debug("lsr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1387 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x30|(shift<<8));
1388}
1389void emit_sar(u_int rs,u_int shift,u_int rt)
1390{
1391 assert(rs<16);
1392 assert(rt<16);
1393 assert(shift<16);
1394 assem_debug("asr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1395 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x50|(shift<<8));
1396}
57871462 1397
57871462 1398void emit_orrshl(u_int rs,u_int shift,u_int rt)
1399{
1400 assert(rs<16);
1401 assert(rt<16);
1402 assert(shift<16);
1403 assem_debug("orr %s,%s,%s,lsl %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
1404 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x10|(shift<<8));
1405}
1406void emit_orrshr(u_int rs,u_int shift,u_int rt)
1407{
1408 assert(rs<16);
1409 assert(rt<16);
1410 assert(shift<16);
1411 assem_debug("orr %s,%s,%s,lsr %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
1412 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x30|(shift<<8));
1413}
1414
1415void emit_cmpimm(int rs,int imm)
1416{
1417 u_int armval;
1418 if(genimm(imm,&armval)) {
5a05d80c 1419 assem_debug("cmp %s,#%d\n",regname[rs],imm);
57871462 1420 output_w32(0xe3500000|rd_rn_rm(0,rs,0)|armval);
1421 }else if(genimm(-imm,&armval)) {
5a05d80c 1422 assem_debug("cmn %s,#%d\n",regname[rs],imm);
57871462 1423 output_w32(0xe3700000|rd_rn_rm(0,rs,0)|armval);
1424 }else if(imm>0) {
1425 assert(imm<65536);
57871462 1426 emit_movimm(imm,HOST_TEMPREG);
57871462 1427 assem_debug("cmp %s,r14\n",regname[rs]);
1428 output_w32(0xe1500000|rd_rn_rm(0,rs,HOST_TEMPREG));
1429 }else{
1430 assert(imm>-65536);
57871462 1431 emit_movimm(-imm,HOST_TEMPREG);
57871462 1432 assem_debug("cmn %s,r14\n",regname[rs]);
1433 output_w32(0xe1700000|rd_rn_rm(0,rs,HOST_TEMPREG));
1434 }
1435}
1436
57871462 1437void emit_cmovne_imm(int imm,int rt)
1438{
1439 assem_debug("movne %s,#%d\n",regname[rt],imm);
1440 u_int armval;
cfbd3c6e 1441 genimm_checked(imm,&armval);
57871462 1442 output_w32(0x13a00000|rd_rn_rm(rt,0,0)|armval);
1443}
1444void emit_cmovl_imm(int imm,int rt)
1445{
1446 assem_debug("movlt %s,#%d\n",regname[rt],imm);
1447 u_int armval;
cfbd3c6e 1448 genimm_checked(imm,&armval);
57871462 1449 output_w32(0xb3a00000|rd_rn_rm(rt,0,0)|armval);
1450}
1451void emit_cmovb_imm(int imm,int rt)
1452{
1453 assem_debug("movcc %s,#%d\n",regname[rt],imm);
1454 u_int armval;
cfbd3c6e 1455 genimm_checked(imm,&armval);
57871462 1456 output_w32(0x33a00000|rd_rn_rm(rt,0,0)|armval);
1457}
1458void emit_cmovs_imm(int imm,int rt)
1459{
1460 assem_debug("movmi %s,#%d\n",regname[rt],imm);
1461 u_int armval;
cfbd3c6e 1462 genimm_checked(imm,&armval);
57871462 1463 output_w32(0x43a00000|rd_rn_rm(rt,0,0)|armval);
1464}
1465void emit_cmove_reg(int rs,int rt)
1466{
1467 assem_debug("moveq %s,%s\n",regname[rt],regname[rs]);
1468 output_w32(0x01a00000|rd_rn_rm(rt,0,rs));
1469}
1470void emit_cmovne_reg(int rs,int rt)
1471{
1472 assem_debug("movne %s,%s\n",regname[rt],regname[rs]);
1473 output_w32(0x11a00000|rd_rn_rm(rt,0,rs));
1474}
1475void emit_cmovl_reg(int rs,int rt)
1476{
1477 assem_debug("movlt %s,%s\n",regname[rt],regname[rs]);
1478 output_w32(0xb1a00000|rd_rn_rm(rt,0,rs));
1479}
1480void emit_cmovs_reg(int rs,int rt)
1481{
1482 assem_debug("movmi %s,%s\n",regname[rt],regname[rs]);
1483 output_w32(0x41a00000|rd_rn_rm(rt,0,rs));
1484}
1485
1486void emit_slti32(int rs,int imm,int rt)
1487{
1488 if(rs!=rt) emit_zeroreg(rt);
1489 emit_cmpimm(rs,imm);
1490 if(rs==rt) emit_movimm(0,rt);
1491 emit_cmovl_imm(1,rt);
1492}
1493void emit_sltiu32(int rs,int imm,int rt)
1494{
1495 if(rs!=rt) emit_zeroreg(rt);
1496 emit_cmpimm(rs,imm);
1497 if(rs==rt) emit_movimm(0,rt);
1498 emit_cmovb_imm(1,rt);
1499}
1500void emit_slti64_32(int rsh,int rsl,int imm,int rt)
1501{
1502 assert(rsh!=rt);
1503 emit_slti32(rsl,imm,rt);
1504 if(imm>=0)
1505 {
1506 emit_test(rsh,rsh);
1507 emit_cmovne_imm(0,rt);
1508 emit_cmovs_imm(1,rt);
1509 }
1510 else
1511 {
1512 emit_cmpimm(rsh,-1);
1513 emit_cmovne_imm(0,rt);
1514 emit_cmovl_imm(1,rt);
1515 }
1516}
1517void emit_sltiu64_32(int rsh,int rsl,int imm,int rt)
1518{
1519 assert(rsh!=rt);
1520 emit_sltiu32(rsl,imm,rt);
1521 if(imm>=0)
1522 {
1523 emit_test(rsh,rsh);
1524 emit_cmovne_imm(0,rt);
1525 }
1526 else
1527 {
1528 emit_cmpimm(rsh,-1);
1529 emit_cmovne_imm(1,rt);
1530 }
1531}
1532
1533void emit_cmp(int rs,int rt)
1534{
1535 assem_debug("cmp %s,%s\n",regname[rs],regname[rt]);
1536 output_w32(0xe1500000|rd_rn_rm(0,rs,rt));
1537}
1538void emit_set_gz32(int rs, int rt)
1539{
1540 //assem_debug("set_gz32\n");
1541 emit_cmpimm(rs,1);
1542 emit_movimm(1,rt);
1543 emit_cmovl_imm(0,rt);
1544}
1545void emit_set_nz32(int rs, int rt)
1546{
1547 //assem_debug("set_nz32\n");
1548 if(rs!=rt) emit_movs(rs,rt);
1549 else emit_test(rs,rs);
1550 emit_cmovne_imm(1,rt);
1551}
1552void emit_set_gz64_32(int rsh, int rsl, int rt)
1553{
1554 //assem_debug("set_gz64\n");
1555 emit_set_gz32(rsl,rt);
1556 emit_test(rsh,rsh);
1557 emit_cmovne_imm(1,rt);
1558 emit_cmovs_imm(0,rt);
1559}
1560void emit_set_nz64_32(int rsh, int rsl, int rt)
1561{
1562 //assem_debug("set_nz64\n");
1563 emit_or_and_set_flags(rsh,rsl,rt);
1564 emit_cmovne_imm(1,rt);
1565}
1566void emit_set_if_less32(int rs1, int rs2, int rt)
1567{
1568 //assem_debug("set if less (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1569 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1570 emit_cmp(rs1,rs2);
1571 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1572 emit_cmovl_imm(1,rt);
1573}
1574void emit_set_if_carry32(int rs1, int rs2, int rt)
1575{
1576 //assem_debug("set if carry (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1577 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1578 emit_cmp(rs1,rs2);
1579 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1580 emit_cmovb_imm(1,rt);
1581}
1582void emit_set_if_less64_32(int u1, int l1, int u2, int l2, int rt)
1583{
1584 //assem_debug("set if less64 (%%%s,%%%s,%%%s,%%%s),%%%s\n",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]);
1585 assert(u1!=rt);
1586 assert(u2!=rt);
1587 emit_cmp(l1,l2);
1588 emit_movimm(0,rt);
1589 emit_sbcs(u1,u2,HOST_TEMPREG);
1590 emit_cmovl_imm(1,rt);
1591}
1592void emit_set_if_carry64_32(int u1, int l1, int u2, int l2, int rt)
1593{
1594 //assem_debug("set if carry64 (%%%s,%%%s,%%%s,%%%s),%%%s\n",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]);
1595 assert(u1!=rt);
1596 assert(u2!=rt);
1597 emit_cmp(l1,l2);
1598 emit_movimm(0,rt);
1599 emit_sbcs(u1,u2,HOST_TEMPREG);
1600 emit_cmovb_imm(1,rt);
1601}
1602
1603void emit_call(int a)
1604{
1605 assem_debug("bl %x (%x+%x)\n",a,(int)out,a-(int)out-8);
1606 u_int offset=genjmp(a);
1607 output_w32(0xeb000000|offset);
1608}
1609void emit_jmp(int a)
1610{
1611 assem_debug("b %x (%x+%x)\n",a,(int)out,a-(int)out-8);
1612 u_int offset=genjmp(a);
1613 output_w32(0xea000000|offset);
1614}
1615void emit_jne(int a)
1616{
1617 assem_debug("bne %x\n",a);
1618 u_int offset=genjmp(a);
1619 output_w32(0x1a000000|offset);
1620}
1621void emit_jeq(int a)
1622{
1623 assem_debug("beq %x\n",a);
1624 u_int offset=genjmp(a);
1625 output_w32(0x0a000000|offset);
1626}
1627void emit_js(int a)
1628{
1629 assem_debug("bmi %x\n",a);
1630 u_int offset=genjmp(a);
1631 output_w32(0x4a000000|offset);
1632}
1633void emit_jns(int a)
1634{
1635 assem_debug("bpl %x\n",a);
1636 u_int offset=genjmp(a);
1637 output_w32(0x5a000000|offset);
1638}
1639void emit_jl(int a)
1640{
1641 assem_debug("blt %x\n",a);
1642 u_int offset=genjmp(a);
1643 output_w32(0xba000000|offset);
1644}
1645void emit_jge(int a)
1646{
1647 assem_debug("bge %x\n",a);
1648 u_int offset=genjmp(a);
1649 output_w32(0xaa000000|offset);
1650}
1651void emit_jno(int a)
1652{
1653 assem_debug("bvc %x\n",a);
1654 u_int offset=genjmp(a);
1655 output_w32(0x7a000000|offset);
1656}
1657void emit_jc(int a)
1658{
1659 assem_debug("bcs %x\n",a);
1660 u_int offset=genjmp(a);
1661 output_w32(0x2a000000|offset);
1662}
1663void emit_jcc(int a)
1664{
1665 assem_debug("bcc %x\n",a);
1666 u_int offset=genjmp(a);
1667 output_w32(0x3a000000|offset);
1668}
1669
57871462 1670void emit_callreg(u_int r)
1671{
c6c3b1b3 1672 assert(r<15);
1673 assem_debug("blx %s\n",regname[r]);
1674 output_w32(0xe12fff30|r);
57871462 1675}
1676void emit_jmpreg(u_int r)
1677{
1678 assem_debug("mov pc,%s\n",regname[r]);
1679 output_w32(0xe1a00000|rd_rn_rm(15,0,r));
1680}
1681
1682void emit_readword_indexed(int offset, int rs, int rt)
1683{
1684 assert(offset>-4096&&offset<4096);
1685 assem_debug("ldr %s,%s+%d\n",regname[rt],regname[rs],offset);
1686 if(offset>=0) {
1687 output_w32(0xe5900000|rd_rn_rm(rt,rs,0)|offset);
1688 }else{
1689 output_w32(0xe5100000|rd_rn_rm(rt,rs,0)|(-offset));
1690 }
1691}
1692void emit_readword_dualindexedx4(int rs1, int rs2, int rt)
1693{
1694 assem_debug("ldr %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1695 output_w32(0xe7900000|rd_rn_rm(rt,rs1,rs2)|0x100);
1696}
c6c3b1b3 1697void emit_ldrcc_dualindexed(int rs1, int rs2, int rt)
1698{
1699 assem_debug("ldrcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1700 output_w32(0x37900000|rd_rn_rm(rt,rs1,rs2));
1701}
1702void emit_ldrccb_dualindexed(int rs1, int rs2, int rt)
1703{
1704 assem_debug("ldrccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1705 output_w32(0x37d00000|rd_rn_rm(rt,rs1,rs2));
1706}
1707void emit_ldrccsb_dualindexed(int rs1, int rs2, int rt)
1708{
1709 assem_debug("ldrccsb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1710 output_w32(0x319000d0|rd_rn_rm(rt,rs1,rs2));
1711}
1712void emit_ldrcch_dualindexed(int rs1, int rs2, int rt)
1713{
1714 assem_debug("ldrcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1715 output_w32(0x319000b0|rd_rn_rm(rt,rs1,rs2));
1716}
1717void emit_ldrccsh_dualindexed(int rs1, int rs2, int rt)
1718{
1719 assem_debug("ldrccsh %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1720 output_w32(0x319000f0|rd_rn_rm(rt,rs1,rs2));
1721}
57871462 1722void emit_readword_indexed_tlb(int addr, int rs, int map, int rt)
1723{
1724 if(map<0) emit_readword_indexed(addr, rs, rt);
1725 else {
1726 assert(addr==0);
1727 emit_readword_dualindexedx4(rs, map, rt);
1728 }
1729}
1730void emit_readdword_indexed_tlb(int addr, int rs, int map, int rh, int rl)
1731{
1732 if(map<0) {
1733 if(rh>=0) emit_readword_indexed(addr, rs, rh);
1734 emit_readword_indexed(addr+4, rs, rl);
1735 }else{
1736 assert(rh!=rs);
1737 if(rh>=0) emit_readword_indexed_tlb(addr, rs, map, rh);
1738 emit_addimm(map,1,map);
1739 emit_readword_indexed_tlb(addr, rs, map, rl);
1740 }
1741}
1742void emit_movsbl_indexed(int offset, int rs, int rt)
1743{
1744 assert(offset>-256&&offset<256);
1745 assem_debug("ldrsb %s,%s+%d\n",regname[rt],regname[rs],offset);
1746 if(offset>=0) {
1747 output_w32(0xe1d000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1748 }else{
1749 output_w32(0xe15000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1750 }
1751}
1752void emit_movsbl_indexed_tlb(int addr, int rs, int map, int rt)
1753{
1754 if(map<0) emit_movsbl_indexed(addr, rs, rt);
1755 else {
1756 if(addr==0) {
1757 emit_shlimm(map,2,map);
1758 assem_debug("ldrsb %s,%s+%s\n",regname[rt],regname[rs],regname[map]);
1759 output_w32(0xe19000d0|rd_rn_rm(rt,rs,map));
1760 }else{
1761 assert(addr>-256&&addr<256);
1762 assem_debug("add %s,%s,%s,lsl #2\n",regname[rt],regname[rs],regname[map]);
1763 output_w32(0xe0800000|rd_rn_rm(rt,rs,map)|(2<<7));
1764 emit_movsbl_indexed(addr, rt, rt);
1765 }
1766 }
1767}
1768void emit_movswl_indexed(int offset, int rs, int rt)
1769{
1770 assert(offset>-256&&offset<256);
1771 assem_debug("ldrsh %s,%s+%d\n",regname[rt],regname[rs],offset);
1772 if(offset>=0) {
1773 output_w32(0xe1d000f0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1774 }else{
1775 output_w32(0xe15000f0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1776 }
1777}
1778void emit_movzbl_indexed(int offset, int rs, int rt)
1779{
1780 assert(offset>-4096&&offset<4096);
1781 assem_debug("ldrb %s,%s+%d\n",regname[rt],regname[rs],offset);
1782 if(offset>=0) {
1783 output_w32(0xe5d00000|rd_rn_rm(rt,rs,0)|offset);
1784 }else{
1785 output_w32(0xe5500000|rd_rn_rm(rt,rs,0)|(-offset));
1786 }
1787}
1788void emit_movzbl_dualindexedx4(int rs1, int rs2, int rt)
1789{
1790 assem_debug("ldrb %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1791 output_w32(0xe7d00000|rd_rn_rm(rt,rs1,rs2)|0x100);
1792}
1793void emit_movzbl_indexed_tlb(int addr, int rs, int map, int rt)
1794{
1795 if(map<0) emit_movzbl_indexed(addr, rs, rt);
1796 else {
1797 if(addr==0) {
1798 emit_movzbl_dualindexedx4(rs, map, rt);
1799 }else{
1800 emit_addimm(rs,addr,rt);
1801 emit_movzbl_dualindexedx4(rt, map, rt);
1802 }
1803 }
1804}
1805void emit_movzwl_indexed(int offset, int rs, int rt)
1806{
1807 assert(offset>-256&&offset<256);
1808 assem_debug("ldrh %s,%s+%d\n",regname[rt],regname[rs],offset);
1809 if(offset>=0) {
1810 output_w32(0xe1d000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1811 }else{
1812 output_w32(0xe15000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1813 }
1814}
054175e9 1815static void emit_ldrd(int offset, int rs, int rt)
1816{
1817 assert(offset>-256&&offset<256);
1818 assem_debug("ldrd %s,%s+%d\n",regname[rt],regname[rs],offset);
1819 if(offset>=0) {
1820 output_w32(0xe1c000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1821 }else{
1822 output_w32(0xe14000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1823 }
1824}
57871462 1825void emit_readword(int addr, int rt)
1826{
1827 u_int offset = addr-(u_int)&dynarec_local;
1828 assert(offset<4096);
1829 assem_debug("ldr %s,fp+%d\n",regname[rt],offset);
1830 output_w32(0xe5900000|rd_rn_rm(rt,FP,0)|offset);
1831}
1832void emit_movsbl(int addr, int rt)
1833{
1834 u_int offset = addr-(u_int)&dynarec_local;
1835 assert(offset<256);
1836 assem_debug("ldrsb %s,fp+%d\n",regname[rt],offset);
1837 output_w32(0xe1d000d0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1838}
1839void emit_movswl(int addr, int rt)
1840{
1841 u_int offset = addr-(u_int)&dynarec_local;
1842 assert(offset<256);
1843 assem_debug("ldrsh %s,fp+%d\n",regname[rt],offset);
1844 output_w32(0xe1d000f0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1845}
1846void emit_movzbl(int addr, int rt)
1847{
1848 u_int offset = addr-(u_int)&dynarec_local;
1849 assert(offset<4096);
1850 assem_debug("ldrb %s,fp+%d\n",regname[rt],offset);
1851 output_w32(0xe5d00000|rd_rn_rm(rt,FP,0)|offset);
1852}
1853void emit_movzwl(int addr, int rt)
1854{
1855 u_int offset = addr-(u_int)&dynarec_local;
1856 assert(offset<256);
1857 assem_debug("ldrh %s,fp+%d\n",regname[rt],offset);
1858 output_w32(0xe1d000b0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1859}
57871462 1860
57871462 1861void emit_writeword_indexed(int rt, int offset, int rs)
1862{
1863 assert(offset>-4096&&offset<4096);
1864 assem_debug("str %s,%s+%d\n",regname[rt],regname[rs],offset);
1865 if(offset>=0) {
1866 output_w32(0xe5800000|rd_rn_rm(rt,rs,0)|offset);
1867 }else{
1868 output_w32(0xe5000000|rd_rn_rm(rt,rs,0)|(-offset));
1869 }
1870}
1871void emit_writeword_dualindexedx4(int rt, int rs1, int rs2)
1872{
1873 assem_debug("str %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1874 output_w32(0xe7800000|rd_rn_rm(rt,rs1,rs2)|0x100);
1875}
1876void emit_writeword_indexed_tlb(int rt, int addr, int rs, int map, int temp)
1877{
1878 if(map<0) emit_writeword_indexed(rt, addr, rs);
1879 else {
1880 assert(addr==0);
1881 emit_writeword_dualindexedx4(rt, rs, map);
1882 }
1883}
1884void emit_writedword_indexed_tlb(int rh, int rl, int addr, int rs, int map, int temp)
1885{
1886 if(map<0) {
1887 if(rh>=0) emit_writeword_indexed(rh, addr, rs);
1888 emit_writeword_indexed(rl, addr+4, rs);
1889 }else{
1890 assert(rh>=0);
1891 if(temp!=rs) emit_addimm(map,1,temp);
1892 emit_writeword_indexed_tlb(rh, addr, rs, map, temp);
1893 if(temp!=rs) emit_writeword_indexed_tlb(rl, addr, rs, temp, temp);
1894 else {
1895 emit_addimm(rs,4,rs);
1896 emit_writeword_indexed_tlb(rl, addr, rs, map, temp);
1897 }
1898 }
1899}
1900void emit_writehword_indexed(int rt, int offset, int rs)
1901{
1902 assert(offset>-256&&offset<256);
1903 assem_debug("strh %s,%s+%d\n",regname[rt],regname[rs],offset);
1904 if(offset>=0) {
1905 output_w32(0xe1c000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1906 }else{
1907 output_w32(0xe14000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1908 }
1909}
1910void emit_writebyte_indexed(int rt, int offset, int rs)
1911{
1912 assert(offset>-4096&&offset<4096);
1913 assem_debug("strb %s,%s+%d\n",regname[rt],regname[rs],offset);
1914 if(offset>=0) {
1915 output_w32(0xe5c00000|rd_rn_rm(rt,rs,0)|offset);
1916 }else{
1917 output_w32(0xe5400000|rd_rn_rm(rt,rs,0)|(-offset));
1918 }
1919}
1920void emit_writebyte_dualindexedx4(int rt, int rs1, int rs2)
1921{
1922 assem_debug("strb %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1923 output_w32(0xe7c00000|rd_rn_rm(rt,rs1,rs2)|0x100);
1924}
1925void emit_writebyte_indexed_tlb(int rt, int addr, int rs, int map, int temp)
1926{
1927 if(map<0) emit_writebyte_indexed(rt, addr, rs);
1928 else {
1929 if(addr==0) {
1930 emit_writebyte_dualindexedx4(rt, rs, map);
1931 }else{
1932 emit_addimm(rs,addr,temp);
1933 emit_writebyte_dualindexedx4(rt, temp, map);
1934 }
1935 }
1936}
b96d3df7 1937void emit_strcc_dualindexed(int rs1, int rs2, int rt)
1938{
1939 assem_debug("strcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1940 output_w32(0x37800000|rd_rn_rm(rt,rs1,rs2));
1941}
1942void emit_strccb_dualindexed(int rs1, int rs2, int rt)
1943{
1944 assem_debug("strccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1945 output_w32(0x37c00000|rd_rn_rm(rt,rs1,rs2));
1946}
1947void emit_strcch_dualindexed(int rs1, int rs2, int rt)
1948{
1949 assem_debug("strcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1950 output_w32(0x318000b0|rd_rn_rm(rt,rs1,rs2));
1951}
57871462 1952void emit_writeword(int rt, int addr)
1953{
1954 u_int offset = addr-(u_int)&dynarec_local;
1955 assert(offset<4096);
1956 assem_debug("str %s,fp+%d\n",regname[rt],offset);
1957 output_w32(0xe5800000|rd_rn_rm(rt,FP,0)|offset);
1958}
1959void emit_writehword(int rt, int addr)
1960{
1961 u_int offset = addr-(u_int)&dynarec_local;
1962 assert(offset<256);
1963 assem_debug("strh %s,fp+%d\n",regname[rt],offset);
1964 output_w32(0xe1c000b0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1965}
1966void emit_writebyte(int rt, int addr)
1967{
1968 u_int offset = addr-(u_int)&dynarec_local;
1969 assert(offset<4096);
74426039 1970 assem_debug("strb %s,fp+%d\n",regname[rt],offset);
57871462 1971 output_w32(0xe5c00000|rd_rn_rm(rt,FP,0)|offset);
1972}
57871462 1973
57871462 1974void emit_umull(u_int rs1, u_int rs2, u_int hi, u_int lo)
1975{
1976 assem_debug("umull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
1977 assert(rs1<16);
1978 assert(rs2<16);
1979 assert(hi<16);
1980 assert(lo<16);
1981 output_w32(0xe0800090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
1982}
1983void emit_smull(u_int rs1, u_int rs2, u_int hi, u_int lo)
1984{
1985 assem_debug("smull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
1986 assert(rs1<16);
1987 assert(rs2<16);
1988 assert(hi<16);
1989 assert(lo<16);
1990 output_w32(0xe0c00090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
1991}
1992
57871462 1993void emit_clz(int rs,int rt)
1994{
1995 assem_debug("clz %s,%s\n",regname[rt],regname[rs]);
1996 output_w32(0xe16f0f10|rd_rn_rm(rt,0,rs));
1997}
1998
1999void emit_subcs(int rs1,int rs2,int rt)
2000{
2001 assem_debug("subcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2002 output_w32(0x20400000|rd_rn_rm(rt,rs1,rs2));
2003}
2004
2005void emit_shrcc_imm(int rs,u_int imm,int rt)
2006{
2007 assert(imm>0);
2008 assert(imm<32);
2009 assem_debug("lsrcc %s,%s,#%d\n",regname[rt],regname[rs],imm);
2010 output_w32(0x31a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
2011}
2012
b1be1eee 2013void emit_shrne_imm(int rs,u_int imm,int rt)
2014{
2015 assert(imm>0);
2016 assert(imm<32);
2017 assem_debug("lsrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2018 output_w32(0x11a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
2019}
2020
57871462 2021void emit_negmi(int rs, int rt)
2022{
2023 assem_debug("rsbmi %s,%s,#0\n",regname[rt],regname[rs]);
2024 output_w32(0x42600000|rd_rn_rm(rt,rs,0));
2025}
2026
2027void emit_negsmi(int rs, int rt)
2028{
2029 assem_debug("rsbsmi %s,%s,#0\n",regname[rt],regname[rs]);
2030 output_w32(0x42700000|rd_rn_rm(rt,rs,0));
2031}
2032
2033void emit_orreq(u_int rs1,u_int rs2,u_int rt)
2034{
2035 assem_debug("orreq %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2036 output_w32(0x01800000|rd_rn_rm(rt,rs1,rs2));
2037}
2038
2039void emit_orrne(u_int rs1,u_int rs2,u_int rt)
2040{
2041 assem_debug("orrne %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2042 output_w32(0x11800000|rd_rn_rm(rt,rs1,rs2));
2043}
2044
2045void emit_bic_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2046{
2047 assem_debug("bic %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2048 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2049}
2050
2051void emit_biceq_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2052{
2053 assem_debug("biceq %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2054 output_w32(0x01C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2055}
2056
2057void emit_bicne_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2058{
2059 assem_debug("bicne %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2060 output_w32(0x11C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2061}
2062
2063void emit_bic_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2064{
2065 assem_debug("bic %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2066 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2067}
2068
2069void emit_biceq_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2070{
2071 assem_debug("biceq %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2072 output_w32(0x01C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2073}
2074
2075void emit_bicne_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2076{
2077 assem_debug("bicne %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2078 output_w32(0x11C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2079}
2080
2081void emit_teq(int rs, int rt)
2082{
2083 assem_debug("teq %s,%s\n",regname[rs],regname[rt]);
2084 output_w32(0xe1300000|rd_rn_rm(0,rs,rt));
2085}
2086
2087void emit_rsbimm(int rs, int imm, int rt)
2088{
2089 u_int armval;
cfbd3c6e 2090 genimm_checked(imm,&armval);
57871462 2091 assem_debug("rsb %s,%s,#%d\n",regname[rt],regname[rs],imm);
2092 output_w32(0xe2600000|rd_rn_rm(rt,rs,0)|armval);
2093}
2094
2095// Load 2 immediates optimizing for small code size
2096void emit_mov2imm_compact(int imm1,u_int rt1,int imm2,u_int rt2)
2097{
2098 emit_movimm(imm1,rt1);
2099 u_int armval;
2100 if(genimm(imm2-imm1,&armval)) {
2101 assem_debug("add %s,%s,#%d\n",regname[rt2],regname[rt1],imm2-imm1);
2102 output_w32(0xe2800000|rd_rn_rm(rt2,rt1,0)|armval);
2103 }else if(genimm(imm1-imm2,&armval)) {
2104 assem_debug("sub %s,%s,#%d\n",regname[rt2],regname[rt1],imm1-imm2);
2105 output_w32(0xe2400000|rd_rn_rm(rt2,rt1,0)|armval);
2106 }
2107 else emit_movimm(imm2,rt2);
2108}
2109
2110// Conditionally select one of two immediates, optimizing for small code size
2111// This will only be called if HAVE_CMOV_IMM is defined
2112void emit_cmov2imm_e_ne_compact(int imm1,int imm2,u_int rt)
2113{
2114 u_int armval;
2115 if(genimm(imm2-imm1,&armval)) {
2116 emit_movimm(imm1,rt);
2117 assem_debug("addne %s,%s,#%d\n",regname[rt],regname[rt],imm2-imm1);
2118 output_w32(0x12800000|rd_rn_rm(rt,rt,0)|armval);
2119 }else if(genimm(imm1-imm2,&armval)) {
2120 emit_movimm(imm1,rt);
2121 assem_debug("subne %s,%s,#%d\n",regname[rt],regname[rt],imm1-imm2);
2122 output_w32(0x12400000|rd_rn_rm(rt,rt,0)|armval);
2123 }
2124 else {
665f33e1 2125 #ifndef HAVE_ARMV7
57871462 2126 emit_movimm(imm1,rt);
2127 add_literal((int)out,imm2);
2128 assem_debug("ldrne %s,pc+? [=%x]\n",regname[rt],imm2);
2129 output_w32(0x15900000|rd_rn_rm(rt,15,0));
2130 #else
2131 emit_movw(imm1&0x0000FFFF,rt);
2132 if((imm1&0xFFFF)!=(imm2&0xFFFF)) {
2133 assem_debug("movwne %s,#%d (0x%x)\n",regname[rt],imm2&0xFFFF,imm2&0xFFFF);
2134 output_w32(0x13000000|rd_rn_rm(rt,0,0)|(imm2&0xfff)|((imm2<<4)&0xf0000));
2135 }
2136 emit_movt(imm1&0xFFFF0000,rt);
2137 if((imm1&0xFFFF0000)!=(imm2&0xFFFF0000)) {
2138 assem_debug("movtne %s,#%d (0x%x)\n",regname[rt],imm2&0xffff0000,imm2&0xffff0000);
2139 output_w32(0x13400000|rd_rn_rm(rt,0,0)|((imm2>>16)&0xfff)|((imm2>>12)&0xf0000));
2140 }
2141 #endif
2142 }
2143}
2144
57871462 2145// special case for checking invalid_code
2146void emit_cmpmem_indexedsr12_reg(int base,int r,int imm)
2147{
2148 assert(imm<128&&imm>=0);
2149 assert(r>=0&&r<16);
2150 assem_debug("ldrb lr,%s,%s lsr #12\n",regname[base],regname[r]);
2151 output_w32(0xe7d00000|rd_rn_rm(HOST_TEMPREG,base,r)|0x620);
2152 emit_cmpimm(HOST_TEMPREG,imm);
2153}
2154
0bbd1454 2155void emit_callne(int a)
2156{
2157 assem_debug("blne %x\n",a);
2158 u_int offset=genjmp(a);
2159 output_w32(0x1b000000|offset);
2160}
2161
57871462 2162// Used to preload hash table entries
57871462 2163void emit_prefetchreg(int r)
2164{
2165 assem_debug("pld %s\n",regname[r]);
2166 output_w32(0xf5d0f000|rd_rn_rm(0,r,0));
2167}
2168
2169// Special case for mini_ht
2170void emit_ldreq_indexed(int rs, u_int offset, int rt)
2171{
2172 assert(offset<4096);
2173 assem_debug("ldreq %s,[%s, #%d]\n",regname[rt],regname[rs],offset);
2174 output_w32(0x05900000|rd_rn_rm(rt,rs,0)|offset);
2175}
2176
57871462 2177void emit_bicne_imm(int rs,int imm,int rt)
2178{
2179 u_int armval;
cfbd3c6e 2180 genimm_checked(imm,&armval);
57871462 2181 assem_debug("bicne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2182 output_w32(0x13c00000|rd_rn_rm(rt,rs,0)|armval);
2183}
2184
2185void emit_biccs_imm(int rs,int imm,int rt)
2186{
2187 u_int armval;
cfbd3c6e 2188 genimm_checked(imm,&armval);
57871462 2189 assem_debug("biccs %s,%s,#%d\n",regname[rt],regname[rs],imm);
2190 output_w32(0x23c00000|rd_rn_rm(rt,rs,0)|armval);
2191}
2192
2193void emit_bicvc_imm(int rs,int imm,int rt)
2194{
2195 u_int armval;
cfbd3c6e 2196 genimm_checked(imm,&armval);
57871462 2197 assem_debug("bicvc %s,%s,#%d\n",regname[rt],regname[rs],imm);
2198 output_w32(0x73c00000|rd_rn_rm(rt,rs,0)|armval);
2199}
2200
2201void emit_bichi_imm(int rs,int imm,int rt)
2202{
2203 u_int armval;
cfbd3c6e 2204 genimm_checked(imm,&armval);
57871462 2205 assem_debug("bichi %s,%s,#%d\n",regname[rt],regname[rs],imm);
2206 output_w32(0x83c00000|rd_rn_rm(rt,rs,0)|armval);
2207}
2208
2209void emit_orrvs_imm(int rs,int imm,int rt)
2210{
2211 u_int armval;
cfbd3c6e 2212 genimm_checked(imm,&armval);
57871462 2213 assem_debug("orrvs %s,%s,#%d\n",regname[rt],regname[rs],imm);
2214 output_w32(0x63800000|rd_rn_rm(rt,rs,0)|armval);
2215}
2216
b9b61529 2217void emit_orrne_imm(int rs,int imm,int rt)
2218{
2219 u_int armval;
cfbd3c6e 2220 genimm_checked(imm,&armval);
b9b61529 2221 assem_debug("orrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2222 output_w32(0x13800000|rd_rn_rm(rt,rs,0)|armval);
2223}
2224
2225void emit_andne_imm(int rs,int imm,int rt)
2226{
2227 u_int armval;
cfbd3c6e 2228 genimm_checked(imm,&armval);
b9b61529 2229 assem_debug("andne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2230 output_w32(0x12000000|rd_rn_rm(rt,rs,0)|armval);
2231}
2232
665f33e1 2233void emit_addpl_imm(int rs,int imm,int rt)
2234{
2235 u_int armval;
2236 genimm_checked(imm,&armval);
2237 assem_debug("addpl %s,%s,#%d\n",regname[rt],regname[rs],imm);
2238 output_w32(0x52800000|rd_rn_rm(rt,rs,0)|armval);
2239}
2240
57871462 2241void emit_jno_unlikely(int a)
2242{
2243 //emit_jno(a);
2244 assem_debug("addvc pc,pc,#? (%x)\n",/*a-(int)out-8,*/a);
2245 output_w32(0x72800000|rd_rn_rm(15,15,0));
2246}
2247
054175e9 2248static void save_regs_all(u_int reglist)
57871462 2249{
054175e9 2250 int i;
57871462 2251 if(!reglist) return;
2252 assem_debug("stmia fp,{");
054175e9 2253 for(i=0;i<16;i++)
2254 if(reglist&(1<<i))
2255 assem_debug("r%d,",i);
57871462 2256 assem_debug("}\n");
2257 output_w32(0xe88b0000|reglist);
2258}
054175e9 2259static void restore_regs_all(u_int reglist)
57871462 2260{
054175e9 2261 int i;
57871462 2262 if(!reglist) return;
2263 assem_debug("ldmia fp,{");
054175e9 2264 for(i=0;i<16;i++)
2265 if(reglist&(1<<i))
2266 assem_debug("r%d,",i);
57871462 2267 assem_debug("}\n");
2268 output_w32(0xe89b0000|reglist);
2269}
054175e9 2270// Save registers before function call
2271static void save_regs(u_int reglist)
2272{
4d646738 2273 reglist&=CALLER_SAVE_REGS; // only save the caller-save registers, r0-r3, r12
054175e9 2274 save_regs_all(reglist);
2275}
2276// Restore registers after function call
2277static void restore_regs(u_int reglist)
2278{
4d646738 2279 reglist&=CALLER_SAVE_REGS;
054175e9 2280 restore_regs_all(reglist);
2281}
57871462 2282
57871462 2283/* Stubs/epilogue */
2284
2285void literal_pool(int n)
2286{
2287 if(!literalcount) return;
2288 if(n) {
2289 if((int)out-literals[0][0]<4096-n) return;
2290 }
2291 u_int *ptr;
2292 int i;
2293 for(i=0;i<literalcount;i++)
2294 {
77750690 2295 u_int l_addr=(u_int)out;
2296 int j;
2297 for(j=0;j<i;j++) {
2298 if(literals[j][1]==literals[i][1]) {
2299 //printf("dup %08x\n",literals[i][1]);
2300 l_addr=literals[j][0];
2301 break;
2302 }
2303 }
57871462 2304 ptr=(u_int *)literals[i][0];
77750690 2305 u_int offset=l_addr-(u_int)ptr-8;
57871462 2306 assert(offset<4096);
2307 assert(!(offset&3));
2308 *ptr|=offset;
77750690 2309 if(l_addr==(u_int)out) {
2310 literals[i][0]=l_addr; // remember for dupes
2311 output_w32(literals[i][1]);
2312 }
57871462 2313 }
2314 literalcount=0;
2315}
2316
2317void literal_pool_jumpover(int n)
2318{
2319 if(!literalcount) return;
2320 if(n) {
2321 if((int)out-literals[0][0]<4096-n) return;
2322 }
2323 int jaddr=(int)out;
2324 emit_jmp(0);
2325 literal_pool(0);
2326 set_jump_target(jaddr,(int)out);
2327}
2328
c67af2ac 2329emit_extjump2(u_int addr, int target, int linker)
57871462 2330{
2331 u_char *ptr=(u_char *)addr;
2332 assert((ptr[3]&0x0e)==0xa);
2333 emit_loadlp(target,0);
2334 emit_loadlp(addr,1);
24385cae 2335 assert(addr>=BASE_ADDR&&addr<(BASE_ADDR+(1<<TARGET_SIZE_2)));
57871462 2336 //assert((target>=0x80000000&&target<0x80800000)||(target>0xA4000000&&target<0xA4001000));
2337//DEBUG >
2338#ifdef DEBUG_CYCLE_COUNT
2339 emit_readword((int)&last_count,ECX);
2340 emit_add(HOST_CCREG,ECX,HOST_CCREG);
2341 emit_readword((int)&next_interupt,ECX);
2342 emit_writeword(HOST_CCREG,(int)&Count);
2343 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
2344 emit_writeword(ECX,(int)&last_count);
2345#endif
2346//DEBUG <
2347 emit_jmp(linker);
2348}
2349
2350emit_extjump(int addr, int target)
2351{
2352 emit_extjump2(addr, target, (int)dyna_linker);
2353}
2354emit_extjump_ds(int addr, int target)
2355{
2356 emit_extjump2(addr, target, (int)dyna_linker_ds);
2357}
2358
13e35c04 2359// put rt_val into rt, potentially making use of rs with value rs_val
2360static void emit_movimm_from(u_int rs_val,int rs,u_int rt_val,int rt)
2361{
8575a877 2362 u_int armval;
2363 int diff;
2364 if(genimm(rt_val,&armval)) {
2365 assem_debug("mov %s,#%d\n",regname[rt],rt_val);
2366 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
2367 return;
2368 }
2369 if(genimm(~rt_val,&armval)) {
2370 assem_debug("mvn %s,#%d\n",regname[rt],rt_val);
2371 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
2372 return;
2373 }
2374 diff=rt_val-rs_val;
2375 if(genimm(diff,&armval)) {
2376 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],diff);
2377 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
2378 return;
2379 }else if(genimm(-diff,&armval)) {
2380 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],-diff);
2381 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
2382 return;
2383 }
2384 emit_movimm(rt_val,rt);
2385}
2386
2387// return 1 if above function can do it's job cheaply
2388static int is_similar_value(u_int v1,u_int v2)
2389{
13e35c04 2390 u_int xs;
8575a877 2391 int diff;
2392 if(v1==v2) return 1;
2393 diff=v2-v1;
2394 for(xs=diff;xs!=0&&(xs&3)==0;xs>>=2)
13e35c04 2395 ;
8575a877 2396 if(xs<0x100) return 1;
2397 for(xs=-diff;xs!=0&&(xs&3)==0;xs>>=2)
2398 ;
2399 if(xs<0x100) return 1;
2400 return 0;
13e35c04 2401}
cbbab9cd 2402
b96d3df7 2403// trashes r2
2404static void pass_args(int a0, int a1)
2405{
2406 if(a0==1&&a1==0) {
2407 // must swap
2408 emit_mov(a0,2); emit_mov(a1,1); emit_mov(2,0);
2409 }
2410 else if(a0!=0&&a1==0) {
2411 emit_mov(a1,1);
2412 if (a0>=0) emit_mov(a0,0);
2413 }
2414 else {
2415 if(a0>=0&&a0!=0) emit_mov(a0,0);
2416 if(a1>=0&&a1!=1) emit_mov(a1,1);
2417 }
2418}
2419
b1be1eee 2420static void mov_loadtype_adj(int type,int rs,int rt)
2421{
2422 switch(type) {
2423 case LOADB_STUB: emit_signextend8(rs,rt); break;
2424 case LOADBU_STUB: emit_andimm(rs,0xff,rt); break;
2425 case LOADH_STUB: emit_signextend16(rs,rt); break;
2426 case LOADHU_STUB: emit_andimm(rs,0xffff,rt); break;
2427 case LOADW_STUB: if(rs!=rt) emit_mov(rs,rt); break;
2428 default: assert(0);
2429 }
2430}
2431
b1be1eee 2432#include "pcsxmem.h"
2433#include "pcsxmem_inline.c"
b1be1eee 2434
57871462 2435do_readstub(int n)
2436{
2437 assem_debug("do_readstub %x\n",start+stubs[n][3]*4);
2438 literal_pool(256);
2439 set_jump_target(stubs[n][1],(int)out);
2440 int type=stubs[n][0];
2441 int i=stubs[n][3];
2442 int rs=stubs[n][4];
2443 struct regstat *i_regs=(struct regstat *)stubs[n][5];
2444 u_int reglist=stubs[n][7];
2445 signed char *i_regmap=i_regs->regmap;
2446 int addr=get_reg(i_regmap,AGEN1+(i&1));
2447 int rth,rt;
2448 int ds;
b9b61529 2449 if(itype[i]==C1LS||itype[i]==C2LS||itype[i]==LOADLR) {
57871462 2450 rth=get_reg(i_regmap,FTEMP|64);
2451 rt=get_reg(i_regmap,FTEMP);
2452 }else{
2453 rth=get_reg(i_regmap,rt1[i]|64);
2454 rt=get_reg(i_regmap,rt1[i]);
2455 }
2456 assert(rs>=0);
c6c3b1b3 2457 int r,temp=-1,temp2=HOST_TEMPREG,regs_saved=0,restore_jump=0;
2458 reglist|=(1<<rs);
2459 for(r=0;r<=12;r++) {
2460 if(((1<<r)&0x13ff)&&((1<<r)&reglist)==0) {
2461 temp=r; break;
2462 }
2463 }
db829eeb 2464 if(rt>=0&&rt1[i]!=0)
c6c3b1b3 2465 reglist&=~(1<<rt);
2466 if(temp==-1) {
2467 save_regs(reglist);
2468 regs_saved=1;
2469 temp=(rs==0)?2:0;
2470 }
2471 if((regs_saved||(reglist&2)==0)&&temp!=1&&rs!=1)
2472 temp2=1;
2473 emit_readword((int)&mem_rtab,temp);
2474 emit_shrimm(rs,12,temp2);
2475 emit_readword_dualindexedx4(temp,temp2,temp2);
2476 emit_lsls_imm(temp2,1,temp2);
2477 if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) {
2478 switch(type) {
2479 case LOADB_STUB: emit_ldrccsb_dualindexed(temp2,rs,rt); break;
2480 case LOADBU_STUB: emit_ldrccb_dualindexed(temp2,rs,rt); break;
2481 case LOADH_STUB: emit_ldrccsh_dualindexed(temp2,rs,rt); break;
2482 case LOADHU_STUB: emit_ldrcch_dualindexed(temp2,rs,rt); break;
2483 case LOADW_STUB: emit_ldrcc_dualindexed(temp2,rs,rt); break;
2484 }
2485 }
2486 if(regs_saved) {
2487 restore_jump=(int)out;
2488 emit_jcc(0); // jump to reg restore
2489 }
2490 else
2491 emit_jcc(stubs[n][2]); // return address
2492
2493 if(!regs_saved)
2494 save_regs(reglist);
2495 int handler=0;
2496 if(type==LOADB_STUB||type==LOADBU_STUB)
2497 handler=(int)jump_handler_read8;
2498 if(type==LOADH_STUB||type==LOADHU_STUB)
2499 handler=(int)jump_handler_read16;
2500 if(type==LOADW_STUB)
2501 handler=(int)jump_handler_read32;
2502 assert(handler!=0);
b96d3df7 2503 pass_args(rs,temp2);
c6c3b1b3 2504 int cc=get_reg(i_regmap,CCREG);
2505 if(cc<0)
2506 emit_loadreg(CCREG,2);
2573466a 2507 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n][6]+1),2);
c6c3b1b3 2508 emit_call(handler);
2509 if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) {
b1be1eee 2510 mov_loadtype_adj(type,0,rt);
c6c3b1b3 2511 }
2512 if(restore_jump)
2513 set_jump_target(restore_jump,(int)out);
2514 restore_regs(reglist);
2515 emit_jmp(stubs[n][2]); // return address
57871462 2516}
2517
c6c3b1b3 2518// return memhandler, or get directly accessable address and return 0
2519u_int get_direct_memhandler(void *table,u_int addr,int type,u_int *addr_host)
2520{
2521 u_int l1,l2=0;
2522 l1=((u_int *)table)[addr>>12];
2523 if((l1&(1<<31))==0) {
2524 u_int v=l1<<1;
2525 *addr_host=v+addr;
2526 return 0;
2527 }
2528 else {
2529 l1<<=1;
2530 if(type==LOADB_STUB||type==LOADBU_STUB||type==STOREB_STUB)
2531 l2=((u_int *)l1)[0x1000/4 + 0x1000/2 + (addr&0xfff)];
b96d3df7 2532 else if(type==LOADH_STUB||type==LOADHU_STUB||type==STOREH_STUB)
c6c3b1b3 2533 l2=((u_int *)l1)[0x1000/4 + (addr&0xfff)/2];
2534 else
2535 l2=((u_int *)l1)[(addr&0xfff)/4];
2536 if((l2&(1<<31))==0) {
2537 u_int v=l2<<1;
2538 *addr_host=v+(addr&0xfff);
2539 return 0;
2540 }
2541 return l2<<1;
2542 }
2543}
c6c3b1b3 2544
57871462 2545inline_readstub(int type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
2546{
2547 int rs=get_reg(regmap,target);
2548 int rth=get_reg(regmap,target|64);
2549 int rt=get_reg(regmap,target);
535d208a 2550 if(rs<0) rs=get_reg(regmap,-1);
57871462 2551 assert(rs>=0);
b1be1eee 2552 u_int handler,host_addr=0,is_dynamic,far_call=0;
2553 int cc=get_reg(regmap,CCREG);
2554 if(pcsx_direct_read(type,addr,CLOCK_ADJUST(adj+1),cc,target?rs:-1,rt))
2555 return;
c6c3b1b3 2556 handler=get_direct_memhandler(mem_rtab,addr,type,&host_addr);
2557 if (handler==0) {
db829eeb 2558 if(rt<0||rt1[i]==0)
c6c3b1b3 2559 return;
13e35c04 2560 if(addr!=host_addr)
2561 emit_movimm_from(addr,rs,host_addr,rs);
c6c3b1b3 2562 switch(type) {
2563 case LOADB_STUB: emit_movsbl_indexed(0,rs,rt); break;
2564 case LOADBU_STUB: emit_movzbl_indexed(0,rs,rt); break;
2565 case LOADH_STUB: emit_movswl_indexed(0,rs,rt); break;
2566 case LOADHU_STUB: emit_movzwl_indexed(0,rs,rt); break;
2567 case LOADW_STUB: emit_readword_indexed(0,rs,rt); break;
2568 default: assert(0);
2569 }
2570 return;
2571 }
b1be1eee 2572 is_dynamic=pcsxmem_is_handler_dynamic(addr);
2573 if(is_dynamic) {
2574 if(type==LOADB_STUB||type==LOADBU_STUB)
2575 handler=(int)jump_handler_read8;
2576 if(type==LOADH_STUB||type==LOADHU_STUB)
2577 handler=(int)jump_handler_read16;
2578 if(type==LOADW_STUB)
2579 handler=(int)jump_handler_read32;
2580 }
c6c3b1b3 2581
2582 // call a memhandler
db829eeb 2583 if(rt>=0&&rt1[i]!=0)
c6c3b1b3 2584 reglist&=~(1<<rt);
2585 save_regs(reglist);
2586 if(target==0)
2587 emit_movimm(addr,0);
2588 else if(rs!=0)
2589 emit_mov(rs,0);
c6c3b1b3 2590 int offset=(int)handler-(int)out-8;
2591 if(offset<-33554432||offset>=33554432) {
2592 // unreachable memhandler, a plugin func perhaps
b1be1eee 2593 emit_movimm(handler,12);
2594 far_call=1;
2595 }
2596 if(cc<0)
2597 emit_loadreg(CCREG,2);
2598 if(is_dynamic) {
2599 emit_movimm(((u_int *)mem_rtab)[addr>>12]<<1,1);
2600 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2);
c6c3b1b3 2601 }
b1be1eee 2602 else {
2603 emit_readword((int)&last_count,3);
2604 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2);
2605 emit_add(2,3,2);
2606 emit_writeword(2,(int)&Count);
2607 }
2608
2609 if(far_call)
2610 emit_callreg(12);
c6c3b1b3 2611 else
2612 emit_call(handler);
b1be1eee 2613
db829eeb 2614 if(rt>=0&&rt1[i]!=0) {
c6c3b1b3 2615 switch(type) {
2616 case LOADB_STUB: emit_signextend8(0,rt); break;
2617 case LOADBU_STUB: emit_andimm(0,0xff,rt); break;
2618 case LOADH_STUB: emit_signextend16(0,rt); break;
2619 case LOADHU_STUB: emit_andimm(0,0xffff,rt); break;
2620 case LOADW_STUB: if(rt!=0) emit_mov(0,rt); break;
2621 default: assert(0);
2622 }
2623 }
2624 restore_regs(reglist);
57871462 2625}
2626
2627do_writestub(int n)
2628{
2629 assem_debug("do_writestub %x\n",start+stubs[n][3]*4);
2630 literal_pool(256);
2631 set_jump_target(stubs[n][1],(int)out);
2632 int type=stubs[n][0];
2633 int i=stubs[n][3];
2634 int rs=stubs[n][4];
2635 struct regstat *i_regs=(struct regstat *)stubs[n][5];
2636 u_int reglist=stubs[n][7];
2637 signed char *i_regmap=i_regs->regmap;
2638 int addr=get_reg(i_regmap,AGEN1+(i&1));
2639 int rth,rt,r;
2640 int ds;
b9b61529 2641 if(itype[i]==C1LS||itype[i]==C2LS) {
57871462 2642 rth=get_reg(i_regmap,FTEMP|64);
2643 rt=get_reg(i_regmap,r=FTEMP);
2644 }else{
2645 rth=get_reg(i_regmap,rs2[i]|64);
2646 rt=get_reg(i_regmap,r=rs2[i]);
2647 }
2648 assert(rs>=0);
2649 assert(rt>=0);
b96d3df7 2650 int rtmp,temp=-1,temp2=HOST_TEMPREG,regs_saved=0,restore_jump=0,ra;
2651 int reglist2=reglist|(1<<rs)|(1<<rt);
2652 for(rtmp=0;rtmp<=12;rtmp++) {
2653 if(((1<<rtmp)&0x13ff)&&((1<<rtmp)&reglist2)==0) {
2654 temp=rtmp; break;
2655 }
2656 }
2657 if(temp==-1) {
2658 save_regs(reglist);
2659 regs_saved=1;
2660 for(rtmp=0;rtmp<=3;rtmp++)
2661 if(rtmp!=rs&&rtmp!=rt)
2662 {temp=rtmp;break;}
2663 }
2664 if((regs_saved||(reglist2&8)==0)&&temp!=3&&rs!=3&&rt!=3)
2665 temp2=3;
2666 emit_readword((int)&mem_wtab,temp);
2667 emit_shrimm(rs,12,temp2);
2668 emit_readword_dualindexedx4(temp,temp2,temp2);
2669 emit_lsls_imm(temp2,1,temp2);
2670 switch(type) {
2671 case STOREB_STUB: emit_strccb_dualindexed(temp2,rs,rt); break;
2672 case STOREH_STUB: emit_strcch_dualindexed(temp2,rs,rt); break;
2673 case STOREW_STUB: emit_strcc_dualindexed(temp2,rs,rt); break;
2674 default: assert(0);
2675 }
2676 if(regs_saved) {
2677 restore_jump=(int)out;
2678 emit_jcc(0); // jump to reg restore
2679 }
2680 else
2681 emit_jcc(stubs[n][2]); // return address (invcode check)
2682
2683 if(!regs_saved)
2684 save_regs(reglist);
2685 int handler=0;
2686 switch(type) {
2687 case STOREB_STUB: handler=(int)jump_handler_write8; break;
2688 case STOREH_STUB: handler=(int)jump_handler_write16; break;
2689 case STOREW_STUB: handler=(int)jump_handler_write32; break;
2690 }
2691 assert(handler!=0);
2692 pass_args(rs,rt);
2693 if(temp2!=3)
2694 emit_mov(temp2,3);
2695 int cc=get_reg(i_regmap,CCREG);
2696 if(cc<0)
2697 emit_loadreg(CCREG,2);
2573466a 2698 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n][6]+1),2);
b96d3df7 2699 // returns new cycle_count
2700 emit_call(handler);
2573466a 2701 emit_addimm(0,-CLOCK_ADJUST((int)stubs[n][6]+1),cc<0?2:cc);
b96d3df7 2702 if(cc<0)
2703 emit_storereg(CCREG,2);
2704 if(restore_jump)
2705 set_jump_target(restore_jump,(int)out);
2706 restore_regs(reglist);
2707 ra=stubs[n][2];
b96d3df7 2708 emit_jmp(ra);
57871462 2709}
2710
2711inline_writestub(int type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
2712{
2713 int rs=get_reg(regmap,-1);
2714 int rth=get_reg(regmap,target|64);
2715 int rt=get_reg(regmap,target);
2716 assert(rs>=0);
2717 assert(rt>=0);
b96d3df7 2718 u_int handler,host_addr=0;
b96d3df7 2719 handler=get_direct_memhandler(mem_wtab,addr,type,&host_addr);
2720 if (handler==0) {
13e35c04 2721 if(addr!=host_addr)
2722 emit_movimm_from(addr,rs,host_addr,rs);
b96d3df7 2723 switch(type) {
2724 case STOREB_STUB: emit_writebyte_indexed(rt,0,rs); break;
2725 case STOREH_STUB: emit_writehword_indexed(rt,0,rs); break;
2726 case STOREW_STUB: emit_writeword_indexed(rt,0,rs); break;
2727 default: assert(0);
2728 }
2729 return;
2730 }
2731
2732 // call a memhandler
2733 save_regs(reglist);
13e35c04 2734 pass_args(rs,rt);
b96d3df7 2735 int cc=get_reg(regmap,CCREG);
2736 if(cc<0)
2737 emit_loadreg(CCREG,2);
2573466a 2738 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2);
b96d3df7 2739 emit_movimm(handler,3);
2740 // returns new cycle_count
2741 emit_call((int)jump_handler_write_h);
2573466a 2742 emit_addimm(0,-CLOCK_ADJUST(adj+1),cc<0?2:cc);
b96d3df7 2743 if(cc<0)
2744 emit_storereg(CCREG,2);
2745 restore_regs(reglist);
57871462 2746}
2747
2748do_unalignedwritestub(int n)
2749{
b7918751 2750 assem_debug("do_unalignedwritestub %x\n",start+stubs[n][3]*4);
2751 literal_pool(256);
57871462 2752 set_jump_target(stubs[n][1],(int)out);
b7918751 2753
2754 int i=stubs[n][3];
2755 struct regstat *i_regs=(struct regstat *)stubs[n][4];
2756 int addr=stubs[n][5];
2757 u_int reglist=stubs[n][7];
2758 signed char *i_regmap=i_regs->regmap;
2759 int temp2=get_reg(i_regmap,FTEMP);
2760 int rt;
2761 int ds, real_rs;
2762 rt=get_reg(i_regmap,rs2[i]);
2763 assert(rt>=0);
2764 assert(addr>=0);
2765 assert(opcode[i]==0x2a||opcode[i]==0x2e); // SWL/SWR only implemented
2766 reglist|=(1<<addr);
2767 reglist&=~(1<<temp2);
2768
b96d3df7 2769#if 1
2770 // don't bother with it and call write handler
2771 save_regs(reglist);
2772 pass_args(addr,rt);
2773 int cc=get_reg(i_regmap,CCREG);
2774 if(cc<0)
2775 emit_loadreg(CCREG,2);
2573466a 2776 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n][6]+1),2);
b96d3df7 2777 emit_call((int)(opcode[i]==0x2a?jump_handle_swl:jump_handle_swr));
2573466a 2778 emit_addimm(0,-CLOCK_ADJUST((int)stubs[n][6]+1),cc<0?2:cc);
b96d3df7 2779 if(cc<0)
2780 emit_storereg(CCREG,2);
2781 restore_regs(reglist);
2782 emit_jmp(stubs[n][2]); // return address
2783#else
b7918751 2784 emit_andimm(addr,0xfffffffc,temp2);
2785 emit_writeword(temp2,(int)&address);
2786
2787 save_regs(reglist);
b7918751 2788 emit_shrimm(addr,16,1);
2789 int cc=get_reg(i_regmap,CCREG);
2790 if(cc<0) {
2791 emit_loadreg(CCREG,2);
2792 }
2793 emit_movimm((u_int)readmem,0);
2794 emit_addimm(cc<0?2:cc,2*stubs[n][6]+2,2);
b7918751 2795 emit_call((int)&indirect_jump_indexed);
2796 restore_regs(reglist);
2797
2798 emit_readword((int)&readmem_dword,temp2);
2799 int temp=addr; //hmh
2800 emit_shlimm(addr,3,temp);
2801 emit_andimm(temp,24,temp);
2802#ifdef BIG_ENDIAN_MIPS
2803 if (opcode[i]==0x2e) // SWR
2804#else
2805 if (opcode[i]==0x2a) // SWL
2806#endif
2807 emit_xorimm(temp,24,temp);
2808 emit_movimm(-1,HOST_TEMPREG);
55439448 2809 if (opcode[i]==0x2a) { // SWL
b7918751 2810 emit_bic_lsr(temp2,HOST_TEMPREG,temp,temp2);
2811 emit_orrshr(rt,temp,temp2);
2812 }else{
2813 emit_bic_lsl(temp2,HOST_TEMPREG,temp,temp2);
2814 emit_orrshl(rt,temp,temp2);
2815 }
2816 emit_readword((int)&address,addr);
2817 emit_writeword(temp2,(int)&word);
2818 //save_regs(reglist); // don't need to, no state changes
2819 emit_shrimm(addr,16,1);
2820 emit_movimm((u_int)writemem,0);
2821 //emit_call((int)&indirect_jump_indexed);
2822 emit_mov(15,14);
2823 emit_readword_dualindexedx4(0,1,15);
2824 emit_readword((int)&Count,HOST_TEMPREG);
2825 emit_readword((int)&next_interupt,2);
2826 emit_addimm(HOST_TEMPREG,-2*stubs[n][6]-2,HOST_TEMPREG);
2827 emit_writeword(2,(int)&last_count);
2828 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2829 if(cc<0) {
2830 emit_storereg(CCREG,HOST_TEMPREG);
2831 }
2832 restore_regs(reglist);
57871462 2833 emit_jmp(stubs[n][2]); // return address
b96d3df7 2834#endif
57871462 2835}
2836
2837void printregs(int edi,int esi,int ebp,int esp,int b,int d,int c,int a)
2838{
2839 printf("regs: %x %x %x %x %x %x %x (%x)\n",a,b,c,d,ebp,esi,edi,(&edi)[-1]);
2840}
2841
2842do_invstub(int n)
2843{
2844 literal_pool(20);
2845 u_int reglist=stubs[n][3];
2846 set_jump_target(stubs[n][1],(int)out);
2847 save_regs(reglist);
2848 if(stubs[n][4]!=0) emit_mov(stubs[n][4],0);
2849 emit_call((int)&invalidate_addr);
2850 restore_regs(reglist);
2851 emit_jmp(stubs[n][2]); // return address
2852}
2853
2854int do_dirty_stub(int i)
2855{
2856 assem_debug("do_dirty_stub %x\n",start+i*4);
71e490c5 2857 u_int addr=(u_int)source;
57871462 2858 // Careful about the code output here, verify_dirty needs to parse it.
665f33e1 2859 #ifndef HAVE_ARMV7
ac545b3a 2860 emit_loadlp(addr,1);
57871462 2861 emit_loadlp((int)copy,2);
2862 emit_loadlp(slen*4,3);
2863 #else
ac545b3a 2864 emit_movw(addr&0x0000FFFF,1);
57871462 2865 emit_movw(((u_int)copy)&0x0000FFFF,2);
ac545b3a 2866 emit_movt(addr&0xFFFF0000,1);
57871462 2867 emit_movt(((u_int)copy)&0xFFFF0000,2);
2868 emit_movw(slen*4,3);
2869 #endif
2870 emit_movimm(start+i*4,0);
2871 emit_call((int)start<(int)0xC0000000?(int)&verify_code:(int)&verify_code_vm);
2872 int entry=(int)out;
2873 load_regs_entry(i);
2874 if(entry==(int)out) entry=instr_addr[i];
2875 emit_jmp(instr_addr[i]);
2876 return entry;
2877}
2878
2879void do_dirty_stub_ds()
2880{
2881 // Careful about the code output here, verify_dirty needs to parse it.
665f33e1 2882 #ifndef HAVE_ARMV7
57871462 2883 emit_loadlp((int)start<(int)0xC0000000?(int)source:(int)start,1);
2884 emit_loadlp((int)copy,2);
2885 emit_loadlp(slen*4,3);
2886 #else
2887 emit_movw(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0x0000FFFF,1);
2888 emit_movw(((u_int)copy)&0x0000FFFF,2);
2889 emit_movt(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0xFFFF0000,1);
2890 emit_movt(((u_int)copy)&0xFFFF0000,2);
2891 emit_movw(slen*4,3);
2892 #endif
2893 emit_movimm(start+1,0);
2894 emit_call((int)&verify_code_ds);
2895}
2896
2897do_cop1stub(int n)
2898{
2899 literal_pool(256);
2900 assem_debug("do_cop1stub %x\n",start+stubs[n][3]*4);
2901 set_jump_target(stubs[n][1],(int)out);
2902 int i=stubs[n][3];
3d624f89 2903// int rs=stubs[n][4];
57871462 2904 struct regstat *i_regs=(struct regstat *)stubs[n][5];
2905 int ds=stubs[n][6];
2906 if(!ds) {
2907 load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty,i);
2908 //if(i_regs!=&regs[i]) printf("oops: regs[i]=%x i_regs=%x",(int)&regs[i],(int)i_regs);
2909 }
2910 //else {printf("fp exception in delay slot\n");}
2911 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty);
2912 if(regs[i].regmap_entry[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
2913 emit_movimm(start+(i-ds)*4,EAX); // Get PC
2573466a 2914 emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]),HOST_CCREG); // CHECK: is this right? There should probably be an extra cycle...
57871462 2915 emit_jmp(ds?(int)fp_exception_ds:(int)fp_exception);
2916}
2917
57871462 2918/* Special assem */
2919
2920void shift_assemble_arm(int i,struct regstat *i_regs)
2921{
2922 if(rt1[i]) {
2923 if(opcode2[i]<=0x07) // SLLV/SRLV/SRAV
2924 {
2925 signed char s,t,shift;
2926 t=get_reg(i_regs->regmap,rt1[i]);
2927 s=get_reg(i_regs->regmap,rs1[i]);
2928 shift=get_reg(i_regs->regmap,rs2[i]);
2929 if(t>=0){
2930 if(rs1[i]==0)
2931 {
2932 emit_zeroreg(t);
2933 }
2934 else if(rs2[i]==0)
2935 {
2936 assert(s>=0);
2937 if(s!=t) emit_mov(s,t);
2938 }
2939 else
2940 {
2941 emit_andimm(shift,31,HOST_TEMPREG);
2942 if(opcode2[i]==4) // SLLV
2943 {
2944 emit_shl(s,HOST_TEMPREG,t);
2945 }
2946 if(opcode2[i]==6) // SRLV
2947 {
2948 emit_shr(s,HOST_TEMPREG,t);
2949 }
2950 if(opcode2[i]==7) // SRAV
2951 {
2952 emit_sar(s,HOST_TEMPREG,t);
2953 }
2954 }
2955 }
2956 } else { // DSLLV/DSRLV/DSRAV
2957 signed char sh,sl,th,tl,shift;
2958 th=get_reg(i_regs->regmap,rt1[i]|64);
2959 tl=get_reg(i_regs->regmap,rt1[i]);
2960 sh=get_reg(i_regs->regmap,rs1[i]|64);
2961 sl=get_reg(i_regs->regmap,rs1[i]);
2962 shift=get_reg(i_regs->regmap,rs2[i]);
2963 if(tl>=0){
2964 if(rs1[i]==0)
2965 {
2966 emit_zeroreg(tl);
2967 if(th>=0) emit_zeroreg(th);
2968 }
2969 else if(rs2[i]==0)
2970 {
2971 assert(sl>=0);
2972 if(sl!=tl) emit_mov(sl,tl);
2973 if(th>=0&&sh!=th) emit_mov(sh,th);
2974 }
2975 else
2976 {
2977 // FIXME: What if shift==tl ?
2978 assert(shift!=tl);
2979 int temp=get_reg(i_regs->regmap,-1);
2980 int real_th=th;
2981 if(th<0&&opcode2[i]!=0x14) {th=temp;} // DSLLV doesn't need a temporary register
2982 assert(sl>=0);
2983 assert(sh>=0);
2984 emit_andimm(shift,31,HOST_TEMPREG);
2985 if(opcode2[i]==0x14) // DSLLV
2986 {
2987 if(th>=0) emit_shl(sh,HOST_TEMPREG,th);
2988 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
2989 emit_orrshr(sl,HOST_TEMPREG,th);
2990 emit_andimm(shift,31,HOST_TEMPREG);
2991 emit_testimm(shift,32);
2992 emit_shl(sl,HOST_TEMPREG,tl);
2993 if(th>=0) emit_cmovne_reg(tl,th);
2994 emit_cmovne_imm(0,tl);
2995 }
2996 if(opcode2[i]==0x16) // DSRLV
2997 {
2998 assert(th>=0);
2999 emit_shr(sl,HOST_TEMPREG,tl);
3000 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3001 emit_orrshl(sh,HOST_TEMPREG,tl);
3002 emit_andimm(shift,31,HOST_TEMPREG);
3003 emit_testimm(shift,32);
3004 emit_shr(sh,HOST_TEMPREG,th);
3005 emit_cmovne_reg(th,tl);
3006 if(real_th>=0) emit_cmovne_imm(0,th);
3007 }
3008 if(opcode2[i]==0x17) // DSRAV
3009 {
3010 assert(th>=0);
3011 emit_shr(sl,HOST_TEMPREG,tl);
3012 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3013 if(real_th>=0) {
3014 assert(temp>=0);
3015 emit_sarimm(th,31,temp);
3016 }
3017 emit_orrshl(sh,HOST_TEMPREG,tl);
3018 emit_andimm(shift,31,HOST_TEMPREG);
3019 emit_testimm(shift,32);
3020 emit_sar(sh,HOST_TEMPREG,th);
3021 emit_cmovne_reg(th,tl);
3022 if(real_th>=0) emit_cmovne_reg(temp,th);
3023 }
3024 }
3025 }
3026 }
3027 }
3028}
ffb0b9e0 3029
ffb0b9e0 3030static void speculate_mov(int rs,int rt)
3031{
3032 if(rt!=0) {
3033 smrv_strong_next|=1<<rt;
3034 smrv[rt]=smrv[rs];
3035 }
3036}
3037
3038static void speculate_mov_weak(int rs,int rt)
3039{
3040 if(rt!=0) {
3041 smrv_weak_next|=1<<rt;
3042 smrv[rt]=smrv[rs];
3043 }
3044}
3045
3046static void speculate_register_values(int i)
3047{
3048 if(i==0) {
3049 memcpy(smrv,psxRegs.GPR.r,sizeof(smrv));
3050 // gp,sp are likely to stay the same throughout the block
3051 smrv_strong_next=(1<<28)|(1<<29)|(1<<30);
3052 smrv_weak_next=~smrv_strong_next;
3053 //printf(" llr %08x\n", smrv[4]);
3054 }
3055 smrv_strong=smrv_strong_next;
3056 smrv_weak=smrv_weak_next;
3057 switch(itype[i]) {
3058 case ALU:
3059 if ((smrv_strong>>rs1[i])&1) speculate_mov(rs1[i],rt1[i]);
3060 else if((smrv_strong>>rs2[i])&1) speculate_mov(rs2[i],rt1[i]);
3061 else if((smrv_weak>>rs1[i])&1) speculate_mov_weak(rs1[i],rt1[i]);
3062 else if((smrv_weak>>rs2[i])&1) speculate_mov_weak(rs2[i],rt1[i]);
3063 else {
3064 smrv_strong_next&=~(1<<rt1[i]);
3065 smrv_weak_next&=~(1<<rt1[i]);
3066 }
3067 break;
3068 case SHIFTIMM:
3069 smrv_strong_next&=~(1<<rt1[i]);
3070 smrv_weak_next&=~(1<<rt1[i]);
3071 // fallthrough
3072 case IMM16:
3073 if(rt1[i]&&is_const(&regs[i],rt1[i])) {
3074 int value,hr=get_reg(regs[i].regmap,rt1[i]);
3075 if(hr>=0) {
3076 if(get_final_value(hr,i,&value))
3077 smrv[rt1[i]]=value;
3078 else smrv[rt1[i]]=constmap[i][hr];
3079 smrv_strong_next|=1<<rt1[i];
3080 }
3081 }
3082 else {
3083 if ((smrv_strong>>rs1[i])&1) speculate_mov(rs1[i],rt1[i]);
3084 else if((smrv_weak>>rs1[i])&1) speculate_mov_weak(rs1[i],rt1[i]);
3085 }
3086 break;
3087 case LOAD:
3088 if(start<0x2000&&(rt1[i]==26||(smrv[rt1[i]]>>24)==0xa0)) {
3089 // special case for BIOS
3090 smrv[rt1[i]]=0xa0000000;
3091 smrv_strong_next|=1<<rt1[i];
3092 break;
3093 }
3094 // fallthrough
3095 case SHIFT:
3096 case LOADLR:
3097 case MOV:
3098 smrv_strong_next&=~(1<<rt1[i]);
3099 smrv_weak_next&=~(1<<rt1[i]);
3100 break;
3101 case COP0:
3102 case COP2:
3103 if(opcode2[i]==0||opcode2[i]==2) { // MFC/CFC
3104 smrv_strong_next&=~(1<<rt1[i]);
3105 smrv_weak_next&=~(1<<rt1[i]);
3106 }
3107 break;
3108 case C2LS:
3109 if (opcode[i]==0x32) { // LWC2
3110 smrv_strong_next&=~(1<<rt1[i]);
3111 smrv_weak_next&=~(1<<rt1[i]);
3112 }
3113 break;
3114 }
3115#if 0
3116 int r=4;
3117 printf("x %08x %08x %d %d c %08x %08x\n",smrv[r],start+i*4,
3118 ((smrv_strong>>r)&1),(smrv_weak>>r)&1,regs[i].isconst,regs[i].wasconst);
3119#endif
3120}
3121
3122enum {
3123 MTYPE_8000 = 0,
3124 MTYPE_8020,
3125 MTYPE_0000,
3126 MTYPE_A000,
3127 MTYPE_1F80,
3128};
3129
3130static int get_ptr_mem_type(u_int a)
3131{
3132 if(a < 0x00200000) {
3133 if(a<0x1000&&((start>>20)==0xbfc||(start>>24)==0xa0))
3134 // return wrong, must use memhandler for BIOS self-test to pass
3135 // 007 does similar stuff from a00 mirror, weird stuff
3136 return MTYPE_8000;
3137 return MTYPE_0000;
3138 }
3139 if(0x1f800000 <= a && a < 0x1f801000)
3140 return MTYPE_1F80;
3141 if(0x80200000 <= a && a < 0x80800000)
3142 return MTYPE_8020;
3143 if(0xa0000000 <= a && a < 0xa0200000)
3144 return MTYPE_A000;
3145 return MTYPE_8000;
3146}
ffb0b9e0 3147
3148static int emit_fastpath_cmp_jump(int i,int addr,int *addr_reg_override)
3149{
3150 int jaddr,type=0;
ffb0b9e0 3151 int mr=rs1[i];
3152 if(((smrv_strong|smrv_weak)>>mr)&1) {
3153 type=get_ptr_mem_type(smrv[mr]);
3154 //printf("set %08x @%08x r%d %d\n", smrv[mr], start+i*4, mr, type);
3155 }
3156 else {
3157 // use the mirror we are running on
3158 type=get_ptr_mem_type(start);
3159 //printf("set nospec @%08x r%d %d\n", start+i*4, mr, type);
3160 }
3161
3162 if(type==MTYPE_8020) { // RAM 80200000+ mirror
3163 emit_andimm(addr,~0x00e00000,HOST_TEMPREG);
3164 addr=*addr_reg_override=HOST_TEMPREG;
3165 type=0;
3166 }
3167 else if(type==MTYPE_0000) { // RAM 0 mirror
3168 emit_orimm(addr,0x80000000,HOST_TEMPREG);
3169 addr=*addr_reg_override=HOST_TEMPREG;
3170 type=0;
3171 }
3172 else if(type==MTYPE_A000) { // RAM A mirror
3173 emit_andimm(addr,~0x20000000,HOST_TEMPREG);
3174 addr=*addr_reg_override=HOST_TEMPREG;
3175 type=0;
3176 }
3177 else if(type==MTYPE_1F80) { // scratchpad
6d760c92 3178 if (psxH == (void *)0x1f800000) {
3179 emit_addimm(addr,-0x1f800000,HOST_TEMPREG);
3180 emit_cmpimm(HOST_TEMPREG,0x1000);
3181 jaddr=(int)out;
3182 emit_jc(0);
3183 }
3184 else {
3185 // do usual RAM check, jump will go to the right handler
3186 type=0;
3187 }
ffb0b9e0 3188 }
ffb0b9e0 3189
3190 if(type==0)
3191 {
3192 emit_cmpimm(addr,RAM_SIZE);
3193 jaddr=(int)out;
3194 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
3195 // Hint to branch predictor that the branch is unlikely to be taken
3196 if(rs1[i]>=28)
3197 emit_jno_unlikely(0);
3198 else
3199 #endif
3200 emit_jno(0);
a327ad27 3201 if(ram_offset!=0) {
3202 emit_addimm(addr,ram_offset,HOST_TEMPREG);
3203 addr=*addr_reg_override=HOST_TEMPREG;
3204 }
ffb0b9e0 3205 }
3206
3207 return jaddr;
3208}
3209
57871462 3210#define shift_assemble shift_assemble_arm
3211
3212void loadlr_assemble_arm(int i,struct regstat *i_regs)
3213{
3214 int s,th,tl,temp,temp2,addr,map=-1;
3215 int offset;
3216 int jaddr=0;
af4ee1fe 3217 int memtarget=0,c=0;
ffb0b9e0 3218 int fastload_reg_override=0;
57871462 3219 u_int hr,reglist=0;
3220 th=get_reg(i_regs->regmap,rt1[i]|64);
3221 tl=get_reg(i_regs->regmap,rt1[i]);
3222 s=get_reg(i_regs->regmap,rs1[i]);
3223 temp=get_reg(i_regs->regmap,-1);
3224 temp2=get_reg(i_regs->regmap,FTEMP);
3225 addr=get_reg(i_regs->regmap,AGEN1+(i&1));
3226 assert(addr<0);
3227 offset=imm[i];
3228 for(hr=0;hr<HOST_REGS;hr++) {
3229 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
3230 }
3231 reglist|=1<<temp;
3232 if(offset||s<0||c) addr=temp2;
3233 else addr=s;
3234 if(s>=0) {
3235 c=(i_regs->wasconst>>s)&1;
af4ee1fe 3236 if(c) {
3237 memtarget=((signed int)(constmap[i][s]+offset))<(signed int)0x80000000+RAM_SIZE;
af4ee1fe 3238 }
57871462 3239 }
1edfcc68 3240 if(!c) {
3241 #ifdef RAM_OFFSET
3242 map=get_reg(i_regs->regmap,ROREG);
3243 if(map<0) emit_loadreg(ROREG,map=HOST_TEMPREG);
3244 #endif
3245 emit_shlimm(addr,3,temp);
3246 if (opcode[i]==0x22||opcode[i]==0x26) {
3247 emit_andimm(addr,0xFFFFFFFC,temp2); // LWL/LWR
3248 }else{
3249 emit_andimm(addr,0xFFFFFFF8,temp2); // LDL/LDR
535d208a 3250 }
1edfcc68 3251 jaddr=emit_fastpath_cmp_jump(i,temp2,&fastload_reg_override);
3252 }
3253 else {
3254 if(ram_offset&&memtarget) {
3255 emit_addimm(temp2,ram_offset,HOST_TEMPREG);
3256 fastload_reg_override=HOST_TEMPREG;
57871462 3257 }
1edfcc68 3258 if (opcode[i]==0x22||opcode[i]==0x26) {
3259 emit_movimm(((constmap[i][s]+offset)<<3)&24,temp); // LWL/LWR
535d208a 3260 }else{
1edfcc68 3261 emit_movimm(((constmap[i][s]+offset)<<3)&56,temp); // LDL/LDR
535d208a 3262 }
535d208a 3263 }
3264 if (opcode[i]==0x22||opcode[i]==0x26) { // LWL/LWR
3265 if(!c||memtarget) {
ffb0b9e0 3266 int a=temp2;
3267 if(fastload_reg_override) a=fastload_reg_override;
535d208a 3268 //emit_readword_indexed((int)rdram-0x80000000,temp2,temp2);
ffb0b9e0 3269 emit_readword_indexed_tlb(0,a,map,temp2);
535d208a 3270 if(jaddr) add_stub(LOADW_STUB,jaddr,(int)out,i,temp2,(int)i_regs,ccadj[i],reglist);
3271 }
3272 else
3273 inline_readstub(LOADW_STUB,i,(constmap[i][s]+offset)&0xFFFFFFFC,i_regs->regmap,FTEMP,ccadj[i],reglist);
3274 if(rt1[i]) {
3275 assert(tl>=0);
57871462 3276 emit_andimm(temp,24,temp);
2002a1db 3277#ifdef BIG_ENDIAN_MIPS
3278 if (opcode[i]==0x26) // LWR
3279#else
3280 if (opcode[i]==0x22) // LWL
3281#endif
3282 emit_xorimm(temp,24,temp);
57871462 3283 emit_movimm(-1,HOST_TEMPREG);
3284 if (opcode[i]==0x26) {
3285 emit_shr(temp2,temp,temp2);
3286 emit_bic_lsr(tl,HOST_TEMPREG,temp,tl);
3287 }else{
3288 emit_shl(temp2,temp,temp2);
3289 emit_bic_lsl(tl,HOST_TEMPREG,temp,tl);
3290 }
3291 emit_or(temp2,tl,tl);
57871462 3292 }
535d208a 3293 //emit_storereg(rt1[i],tl); // DEBUG
3294 }
3295 if (opcode[i]==0x1A||opcode[i]==0x1B) { // LDL/LDR
ffb0b9e0 3296 // FIXME: little endian, fastload_reg_override
535d208a 3297 int temp2h=get_reg(i_regs->regmap,FTEMP|64);
3298 if(!c||memtarget) {
3299 //if(th>=0) emit_readword_indexed((int)rdram-0x80000000,temp2,temp2h);
3300 //emit_readword_indexed((int)rdram-0x7FFFFFFC,temp2,temp2);
3301 emit_readdword_indexed_tlb(0,temp2,map,temp2h,temp2);
3302 if(jaddr) add_stub(LOADD_STUB,jaddr,(int)out,i,temp2,(int)i_regs,ccadj[i],reglist);
3303 }
3304 else
3305 inline_readstub(LOADD_STUB,i,(constmap[i][s]+offset)&0xFFFFFFF8,i_regs->regmap,FTEMP,ccadj[i],reglist);
3306 if(rt1[i]) {
3307 assert(th>=0);
3308 assert(tl>=0);
57871462 3309 emit_testimm(temp,32);
3310 emit_andimm(temp,24,temp);
3311 if (opcode[i]==0x1A) { // LDL
3312 emit_rsbimm(temp,32,HOST_TEMPREG);
3313 emit_shl(temp2h,temp,temp2h);
3314 emit_orrshr(temp2,HOST_TEMPREG,temp2h);
3315 emit_movimm(-1,HOST_TEMPREG);
3316 emit_shl(temp2,temp,temp2);
3317 emit_cmove_reg(temp2h,th);
3318 emit_biceq_lsl(tl,HOST_TEMPREG,temp,tl);
3319 emit_bicne_lsl(th,HOST_TEMPREG,temp,th);
3320 emit_orreq(temp2,tl,tl);
3321 emit_orrne(temp2,th,th);
3322 }
3323 if (opcode[i]==0x1B) { // LDR
3324 emit_xorimm(temp,24,temp);
3325 emit_rsbimm(temp,32,HOST_TEMPREG);
3326 emit_shr(temp2,temp,temp2);
3327 emit_orrshl(temp2h,HOST_TEMPREG,temp2);
3328 emit_movimm(-1,HOST_TEMPREG);
3329 emit_shr(temp2h,temp,temp2h);
3330 emit_cmovne_reg(temp2,tl);
3331 emit_bicne_lsr(th,HOST_TEMPREG,temp,th);
3332 emit_biceq_lsr(tl,HOST_TEMPREG,temp,tl);
3333 emit_orrne(temp2h,th,th);
3334 emit_orreq(temp2h,tl,tl);
3335 }
3336 }
3337 }
3338}
3339#define loadlr_assemble loadlr_assemble_arm
3340
3341void cop0_assemble(int i,struct regstat *i_regs)
3342{
3343 if(opcode2[i]==0) // MFC0
3344 {
3345 signed char t=get_reg(i_regs->regmap,rt1[i]);
3346 char copr=(source[i]>>11)&0x1f;
3347 //assert(t>=0); // Why does this happen? OOT is weird
f1b3b369 3348 if(t>=0&&rt1[i]!=0) {
7139f3c8 3349 emit_readword((int)&reg_cop0+copr*4,t);
57871462 3350 }
3351 }
3352 else if(opcode2[i]==4) // MTC0
3353 {
3354 signed char s=get_reg(i_regs->regmap,rs1[i]);
3355 char copr=(source[i]>>11)&0x1f;
3356 assert(s>=0);
63cb0298 3357 wb_register(rs1[i],i_regs->regmap,i_regs->dirty,i_regs->is32);
7139f3c8 3358 if(copr==9||copr==11||copr==12||copr==13) {
63cb0298 3359 emit_readword((int)&last_count,HOST_TEMPREG);
57871462 3360 emit_loadreg(CCREG,HOST_CCREG); // TODO: do proper reg alloc
63cb0298 3361 emit_add(HOST_CCREG,HOST_TEMPREG,HOST_CCREG);
2573466a 3362 emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]),HOST_CCREG);
57871462 3363 emit_writeword(HOST_CCREG,(int)&Count);
3364 }
3365 // What a mess. The status register (12) can enable interrupts,
3366 // so needs a special case to handle a pending interrupt.
3367 // The interrupt must be taken immediately, because a subsequent
3368 // instruction might disable interrupts again.
7139f3c8 3369 if(copr==12||copr==13) {
fca1aef2 3370 if (is_delayslot) {
3371 // burn cycles to cause cc_interrupt, which will
3372 // reschedule next_interupt. Relies on CCREG from above.
3373 assem_debug("MTC0 DS %d\n", copr);
3374 emit_writeword(HOST_CCREG,(int)&last_count);
3375 emit_movimm(0,HOST_CCREG);
3376 emit_storereg(CCREG,HOST_CCREG);
caeefe31 3377 emit_loadreg(rs1[i],1);
fca1aef2 3378 emit_movimm(copr,0);
3379 emit_call((int)pcsx_mtc0_ds);
042c7287 3380 emit_loadreg(rs1[i],s);
fca1aef2 3381 return;
3382 }
63cb0298 3383 emit_movimm(start+i*4+4,HOST_TEMPREG);
3384 emit_writeword(HOST_TEMPREG,(int)&pcaddr);
3385 emit_movimm(0,HOST_TEMPREG);
3386 emit_writeword(HOST_TEMPREG,(int)&pending_exception);
57871462 3387 }
3388 //else if(copr==12&&is_delayslot) emit_call((int)MTC0_R12);
3389 //else
caeefe31 3390 if(s==HOST_CCREG)
3391 emit_loadreg(rs1[i],1);
3392 else if(s!=1)
63cb0298 3393 emit_mov(s,1);
fca1aef2 3394 emit_movimm(copr,0);
3395 emit_call((int)pcsx_mtc0);
7139f3c8 3396 if(copr==9||copr==11||copr==12||copr==13) {
57871462 3397 emit_readword((int)&Count,HOST_CCREG);
042c7287 3398 emit_readword((int)&next_interupt,HOST_TEMPREG);
2573466a 3399 emit_addimm(HOST_CCREG,-CLOCK_ADJUST(ccadj[i]),HOST_CCREG);
042c7287 3400 emit_sub(HOST_CCREG,HOST_TEMPREG,HOST_CCREG);
3401 emit_writeword(HOST_TEMPREG,(int)&last_count);
57871462 3402 emit_storereg(CCREG,HOST_CCREG);
3403 }
7139f3c8 3404 if(copr==12||copr==13) {
57871462 3405 assert(!is_delayslot);
3406 emit_readword((int)&pending_exception,14);
042c7287 3407 emit_test(14,14);
3408 emit_jne((int)&do_interrupt);
57871462 3409 }
3410 emit_loadreg(rs1[i],s);
3411 if(get_reg(i_regs->regmap,rs1[i]|64)>=0)
3412 emit_loadreg(rs1[i]|64,get_reg(i_regs->regmap,rs1[i]|64));
57871462 3413 cop1_usable=0;
3414 }
3415 else
3416 {
3417 assert(opcode2[i]==0x10);
576bbd8f 3418 if((source[i]&0x3f)==0x10) // RFE
3419 {
3420 emit_readword((int)&Status,0);
3421 emit_andimm(0,0x3c,1);
3422 emit_andimm(0,~0xf,0);
3423 emit_orrshr_imm(1,2,0);
3424 emit_writeword(0,(int)&Status);
3425 }
57871462 3426 }
3427}
3428
b9b61529 3429static void cop2_get_dreg(u_int copr,signed char tl,signed char temp)
3430{
3431 switch (copr) {
3432 case 1:
3433 case 3:
3434 case 5:
3435 case 8:
3436 case 9:
3437 case 10:
3438 case 11:
3439 emit_readword((int)&reg_cop2d[copr],tl);
3440 emit_signextend16(tl,tl);
3441 emit_writeword(tl,(int)&reg_cop2d[copr]); // hmh
3442 break;
3443 case 7:
3444 case 16:
3445 case 17:
3446 case 18:
3447 case 19:
3448 emit_readword((int)&reg_cop2d[copr],tl);
3449 emit_andimm(tl,0xffff,tl);
3450 emit_writeword(tl,(int)&reg_cop2d[copr]);
3451 break;
3452 case 15:
3453 emit_readword((int)&reg_cop2d[14],tl); // SXY2
3454 emit_writeword(tl,(int)&reg_cop2d[copr]);
3455 break;
3456 case 28:
b9b61529 3457 case 29:
3458 emit_readword((int)&reg_cop2d[9],temp);
3459 emit_testimm(temp,0x8000); // do we need this?
3460 emit_andimm(temp,0xf80,temp);
3461 emit_andne_imm(temp,0,temp);
f70d384d 3462 emit_shrimm(temp,7,tl);
b9b61529 3463 emit_readword((int)&reg_cop2d[10],temp);
3464 emit_testimm(temp,0x8000);
3465 emit_andimm(temp,0xf80,temp);
3466 emit_andne_imm(temp,0,temp);
f70d384d 3467 emit_orrshr_imm(temp,2,tl);
b9b61529 3468 emit_readword((int)&reg_cop2d[11],temp);
3469 emit_testimm(temp,0x8000);
3470 emit_andimm(temp,0xf80,temp);
3471 emit_andne_imm(temp,0,temp);
f70d384d 3472 emit_orrshl_imm(temp,3,tl);
b9b61529 3473 emit_writeword(tl,(int)&reg_cop2d[copr]);
3474 break;
3475 default:
3476 emit_readword((int)&reg_cop2d[copr],tl);
3477 break;
3478 }
3479}
3480
3481static void cop2_put_dreg(u_int copr,signed char sl,signed char temp)
3482{
3483 switch (copr) {
3484 case 15:
3485 emit_readword((int)&reg_cop2d[13],temp); // SXY1
3486 emit_writeword(sl,(int)&reg_cop2d[copr]);
3487 emit_writeword(temp,(int)&reg_cop2d[12]); // SXY0
3488 emit_readword((int)&reg_cop2d[14],temp); // SXY2
3489 emit_writeword(sl,(int)&reg_cop2d[14]);
3490 emit_writeword(temp,(int)&reg_cop2d[13]); // SXY1
3491 break;
3492 case 28:
3493 emit_andimm(sl,0x001f,temp);
f70d384d 3494 emit_shlimm(temp,7,temp);
b9b61529 3495 emit_writeword(temp,(int)&reg_cop2d[9]);
3496 emit_andimm(sl,0x03e0,temp);
f70d384d 3497 emit_shlimm(temp,2,temp);
b9b61529 3498 emit_writeword(temp,(int)&reg_cop2d[10]);
3499 emit_andimm(sl,0x7c00,temp);
f70d384d 3500 emit_shrimm(temp,3,temp);
b9b61529 3501 emit_writeword(temp,(int)&reg_cop2d[11]);
3502 emit_writeword(sl,(int)&reg_cop2d[28]);
3503 break;
3504 case 30:
3505 emit_movs(sl,temp);
3506 emit_mvnmi(temp,temp);
665f33e1 3507#ifdef HAVE_ARMV5
b9b61529 3508 emit_clz(temp,temp);
665f33e1 3509#else
3510 emit_movs(temp,HOST_TEMPREG);
3511 emit_movimm(0,temp);
3512 emit_jeq((int)out+4*4);
3513 emit_addpl_imm(temp,1,temp);
3514 emit_lslpls_imm(HOST_TEMPREG,1,HOST_TEMPREG);
3515 emit_jns((int)out-2*4);
3516#endif
b9b61529 3517 emit_writeword(sl,(int)&reg_cop2d[30]);
3518 emit_writeword(temp,(int)&reg_cop2d[31]);
3519 break;
b9b61529 3520 case 31:
3521 break;
3522 default:
3523 emit_writeword(sl,(int)&reg_cop2d[copr]);
3524 break;
3525 }
3526}
3527
3528void cop2_assemble(int i,struct regstat *i_regs)
3529{
3530 u_int copr=(source[i]>>11)&0x1f;
3531 signed char temp=get_reg(i_regs->regmap,-1);
3532 if (opcode2[i]==0) { // MFC2
3533 signed char tl=get_reg(i_regs->regmap,rt1[i]);
f1b3b369 3534 if(tl>=0&&rt1[i]!=0)
b9b61529 3535 cop2_get_dreg(copr,tl,temp);
3536 }
3537 else if (opcode2[i]==4) { // MTC2
3538 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3539 cop2_put_dreg(copr,sl,temp);
3540 }
3541 else if (opcode2[i]==2) // CFC2
3542 {
3543 signed char tl=get_reg(i_regs->regmap,rt1[i]);
f1b3b369 3544 if(tl>=0&&rt1[i]!=0)
b9b61529 3545 emit_readword((int)&reg_cop2c[copr],tl);
3546 }
3547 else if (opcode2[i]==6) // CTC2
3548 {
3549 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3550 switch(copr) {
3551 case 4:
3552 case 12:
3553 case 20:
3554 case 26:
3555 case 27:
3556 case 29:
3557 case 30:
3558 emit_signextend16(sl,temp);
3559 break;
3560 case 31:
3561 //value = value & 0x7ffff000;
3562 //if (value & 0x7f87e000) value |= 0x80000000;
3563 emit_shrimm(sl,12,temp);
3564 emit_shlimm(temp,12,temp);
3565 emit_testimm(temp,0x7f000000);
3566 emit_testeqimm(temp,0x00870000);
3567 emit_testeqimm(temp,0x0000e000);
3568 emit_orrne_imm(temp,0x80000000,temp);
3569 break;
3570 default:
3571 temp=sl;
3572 break;
3573 }
3574 emit_writeword(temp,(int)&reg_cop2c[copr]);
3575 assert(sl>=0);
3576 }
3577}
3578
054175e9 3579static void c2op_prologue(u_int op,u_int reglist)
3580{
3581 save_regs_all(reglist);
82ed88eb 3582#ifdef PCNT
3583 emit_movimm(op,0);
3584 emit_call((int)pcnt_gte_start);
3585#endif
054175e9 3586 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0); // cop2 regs
3587}
3588
3589static void c2op_epilogue(u_int op,u_int reglist)
3590{
82ed88eb 3591#ifdef PCNT
3592 emit_movimm(op,0);
3593 emit_call((int)pcnt_gte_end);
3594#endif
054175e9 3595 restore_regs_all(reglist);
3596}
3597
6c0eefaf 3598static void c2op_call_MACtoIR(int lm,int need_flags)
3599{
3600 if(need_flags)
3601 emit_call((int)(lm?gteMACtoIR_lm1:gteMACtoIR_lm0));
3602 else
3603 emit_call((int)(lm?gteMACtoIR_lm1_nf:gteMACtoIR_lm0_nf));
3604}
3605
3606static void c2op_call_rgb_func(void *func,int lm,int need_ir,int need_flags)
3607{
3608 emit_call((int)func);
3609 // func is C code and trashes r0
3610 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
3611 if(need_flags||need_ir)
3612 c2op_call_MACtoIR(lm,need_flags);
3613 emit_call((int)(need_flags?gteMACtoRGB:gteMACtoRGB_nf));
3614}
3615
054175e9 3616static void c2op_assemble(int i,struct regstat *i_regs)
b9b61529 3617{
3618 signed char temp=get_reg(i_regs->regmap,-1);
3619 u_int c2op=source[i]&0x3f;
6c0eefaf 3620 u_int hr,reglist_full=0,reglist;
054175e9 3621 int need_flags,need_ir;
b9b61529 3622 for(hr=0;hr<HOST_REGS;hr++) {
6c0eefaf 3623 if(i_regs->regmap[hr]>=0) reglist_full|=1<<hr;
b9b61529 3624 }
4d646738 3625 reglist=reglist_full&CALLER_SAVE_REGS;
b9b61529 3626
3627 if (gte_handlers[c2op]!=NULL) {
bedfea38 3628 need_flags=!(gte_unneeded[i+1]>>63); // +1 because of how liveness detection works
054175e9 3629 need_ir=(gte_unneeded[i+1]&0xe00)!=0xe00;
cbbd8dd7 3630 assem_debug("gte op %08x, unneeded %016llx, need_flags %d, need_ir %d\n",
3631 source[i],gte_unneeded[i+1],need_flags,need_ir);
0ff8c62c 3632 if(new_dynarec_hacks&NDHACK_GTE_NO_FLAGS)
3633 need_flags=0;
6c0eefaf 3634 int shift = (source[i] >> 19) & 1;
3635 int lm = (source[i] >> 10) & 1;
054175e9 3636 switch(c2op) {
19776aef 3637#ifndef DRC_DBG
054175e9 3638 case GTE_MVMVA: {
82336ba3 3639#ifdef HAVE_ARMV5
054175e9 3640 int v = (source[i] >> 15) & 3;
3641 int cv = (source[i] >> 13) & 3;
3642 int mx = (source[i] >> 17) & 3;
4d646738 3643 reglist=reglist_full&(CALLER_SAVE_REGS|0xf0); // +{r4-r7}
054175e9 3644 c2op_prologue(c2op,reglist);
3645 /* r4,r5 = VXYZ(v) packed; r6 = &MX11(mx); r7 = &CV1(cv) */
3646 if(v<3)
3647 emit_ldrd(v*8,0,4);
3648 else {
3649 emit_movzwl_indexed(9*4,0,4); // gteIR
3650 emit_movzwl_indexed(10*4,0,6);
3651 emit_movzwl_indexed(11*4,0,5);
3652 emit_orrshl_imm(6,16,4);
3653 }
3654 if(mx<3)
3655 emit_addimm(0,32*4+mx*8*4,6);
3656 else
3657 emit_readword((int)&zeromem_ptr,6);
3658 if(cv<3)
3659 emit_addimm(0,32*4+(cv*8+5)*4,7);
3660 else
3661 emit_readword((int)&zeromem_ptr,7);
3662#ifdef __ARM_NEON__
3663 emit_movimm(source[i],1); // opcode
3664 emit_call((int)gteMVMVA_part_neon);
3665 if(need_flags) {
3666 emit_movimm(lm,1);
3667 emit_call((int)gteMACtoIR_flags_neon);
3668 }
3669#else
3670 if(cv==3&&shift)
3671 emit_call((int)gteMVMVA_part_cv3sh12_arm);
3672 else {
3673 emit_movimm(shift,1);
3674 emit_call((int)(need_flags?gteMVMVA_part_arm:gteMVMVA_part_nf_arm));
3675 }
6c0eefaf 3676 if(need_flags||need_ir)
3677 c2op_call_MACtoIR(lm,need_flags);
82336ba3 3678#endif
3679#else /* if not HAVE_ARMV5 */
3680 c2op_prologue(c2op,reglist);
3681 emit_movimm(source[i],1); // opcode
3682 emit_writeword(1,(int)&psxRegs.code);
3683 emit_call((int)(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]));
054175e9 3684#endif
3685 break;
3686 }
6c0eefaf 3687 case GTE_OP:
3688 c2op_prologue(c2op,reglist);
3689 emit_call((int)(shift?gteOP_part_shift:gteOP_part_noshift));
3690 if(need_flags||need_ir) {
3691 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
3692 c2op_call_MACtoIR(lm,need_flags);
3693 }
3694 break;
3695 case GTE_DPCS:
3696 c2op_prologue(c2op,reglist);
3697 c2op_call_rgb_func(shift?gteDPCS_part_shift:gteDPCS_part_noshift,lm,need_ir,need_flags);
3698 break;
3699 case GTE_INTPL:
3700 c2op_prologue(c2op,reglist);
3701 c2op_call_rgb_func(shift?gteINTPL_part_shift:gteINTPL_part_noshift,lm,need_ir,need_flags);
3702 break;
3703 case GTE_SQR:
3704 c2op_prologue(c2op,reglist);
3705 emit_call((int)(shift?gteSQR_part_shift:gteSQR_part_noshift));
3706 if(need_flags||need_ir) {
3707 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
3708 c2op_call_MACtoIR(lm,need_flags);
3709 }
3710 break;
3711 case GTE_DCPL:
3712 c2op_prologue(c2op,reglist);
3713 c2op_call_rgb_func(gteDCPL_part,lm,need_ir,need_flags);
3714 break;
3715 case GTE_GPF:
3716 c2op_prologue(c2op,reglist);
3717 c2op_call_rgb_func(shift?gteGPF_part_shift:gteGPF_part_noshift,lm,need_ir,need_flags);
3718 break;
3719 case GTE_GPL:
3720 c2op_prologue(c2op,reglist);
3721 c2op_call_rgb_func(shift?gteGPL_part_shift:gteGPL_part_noshift,lm,need_ir,need_flags);
3722 break;
19776aef 3723#endif
054175e9 3724 default:
054175e9 3725 c2op_prologue(c2op,reglist);
19776aef 3726#ifdef DRC_DBG
3727 emit_movimm(source[i],1); // opcode
3728 emit_writeword(1,(int)&psxRegs.code);
3729#endif
054175e9 3730 emit_call((int)(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]));
3731 break;
3732 }
3733 c2op_epilogue(c2op,reglist);
3734 }
b9b61529 3735}
3736
3737void cop1_unusable(int i,struct regstat *i_regs)
3d624f89 3738{
3739 // XXX: should just just do the exception instead
3740 if(!cop1_usable) {
3741 int jaddr=(int)out;
3742 emit_jmp(0);
3743 add_stub(FP_STUB,jaddr,(int)out,i,0,(int)i_regs,is_delayslot,0);
3744 cop1_usable=1;
3745 }
3746}
3747
57871462 3748void cop1_assemble(int i,struct regstat *i_regs)
3749{
3d624f89 3750 cop1_unusable(i, i_regs);
57871462 3751}
3752
3753void fconv_assemble_arm(int i,struct regstat *i_regs)
3754{
3d624f89 3755 cop1_unusable(i, i_regs);
57871462 3756}
3757#define fconv_assemble fconv_assemble_arm
3758
3759void fcomp_assemble(int i,struct regstat *i_regs)
3760{
3d624f89 3761 cop1_unusable(i, i_regs);
57871462 3762}
3763
3764void float_assemble(int i,struct regstat *i_regs)
3765{
3d624f89 3766 cop1_unusable(i, i_regs);
57871462 3767}
3768
3769void multdiv_assemble_arm(int i,struct regstat *i_regs)
3770{
3771 // case 0x18: MULT
3772 // case 0x19: MULTU
3773 // case 0x1A: DIV
3774 // case 0x1B: DIVU
3775 // case 0x1C: DMULT
3776 // case 0x1D: DMULTU
3777 // case 0x1E: DDIV
3778 // case 0x1F: DDIVU
3779 if(rs1[i]&&rs2[i])
3780 {
3781 if((opcode2[i]&4)==0) // 32-bit
3782 {
3783 if(opcode2[i]==0x18) // MULT
3784 {
3785 signed char m1=get_reg(i_regs->regmap,rs1[i]);
3786 signed char m2=get_reg(i_regs->regmap,rs2[i]);
3787 signed char hi=get_reg(i_regs->regmap,HIREG);
3788 signed char lo=get_reg(i_regs->regmap,LOREG);
3789 assert(m1>=0);
3790 assert(m2>=0);
3791 assert(hi>=0);
3792 assert(lo>=0);
3793 emit_smull(m1,m2,hi,lo);
3794 }
3795 if(opcode2[i]==0x19) // MULTU
3796 {
3797 signed char m1=get_reg(i_regs->regmap,rs1[i]);
3798 signed char m2=get_reg(i_regs->regmap,rs2[i]);
3799 signed char hi=get_reg(i_regs->regmap,HIREG);
3800 signed char lo=get_reg(i_regs->regmap,LOREG);
3801 assert(m1>=0);
3802 assert(m2>=0);
3803 assert(hi>=0);
3804 assert(lo>=0);
3805 emit_umull(m1,m2,hi,lo);
3806 }
3807 if(opcode2[i]==0x1A) // DIV
3808 {
3809 signed char d1=get_reg(i_regs->regmap,rs1[i]);
3810 signed char d2=get_reg(i_regs->regmap,rs2[i]);
3811 assert(d1>=0);
3812 assert(d2>=0);
3813 signed char quotient=get_reg(i_regs->regmap,LOREG);
3814 signed char remainder=get_reg(i_regs->regmap,HIREG);
3815 assert(quotient>=0);
3816 assert(remainder>=0);
3817 emit_movs(d1,remainder);
44a80f6a 3818 emit_movimm(0xffffffff,quotient);
3819 emit_negmi(quotient,quotient); // .. quotient and ..
3820 emit_negmi(remainder,remainder); // .. remainder for div0 case (will be negated back after jump)
57871462 3821 emit_movs(d2,HOST_TEMPREG);
3822 emit_jeq((int)out+52); // Division by zero
82336ba3 3823 emit_negsmi(HOST_TEMPREG,HOST_TEMPREG);
665f33e1 3824#ifdef HAVE_ARMV5
57871462 3825 emit_clz(HOST_TEMPREG,quotient);
3826 emit_shl(HOST_TEMPREG,quotient,HOST_TEMPREG);
665f33e1 3827#else
3828 emit_movimm(0,quotient);
3829 emit_addpl_imm(quotient,1,quotient);
3830 emit_lslpls_imm(HOST_TEMPREG,1,HOST_TEMPREG);
3831 emit_jns((int)out-2*4);
3832#endif
57871462 3833 emit_orimm(quotient,1<<31,quotient);
3834 emit_shr(quotient,quotient,quotient);
3835 emit_cmp(remainder,HOST_TEMPREG);
3836 emit_subcs(remainder,HOST_TEMPREG,remainder);
3837 emit_adcs(quotient,quotient,quotient);
3838 emit_shrimm(HOST_TEMPREG,1,HOST_TEMPREG);
3839 emit_jcc((int)out-16); // -4
3840 emit_teq(d1,d2);
3841 emit_negmi(quotient,quotient);
3842 emit_test(d1,d1);
3843 emit_negmi(remainder,remainder);
3844 }
3845 if(opcode2[i]==0x1B) // DIVU
3846 {
3847 signed char d1=get_reg(i_regs->regmap,rs1[i]); // dividend
3848 signed char d2=get_reg(i_regs->regmap,rs2[i]); // divisor
3849 assert(d1>=0);
3850 assert(d2>=0);
3851 signed char quotient=get_reg(i_regs->regmap,LOREG);
3852 signed char remainder=get_reg(i_regs->regmap,HIREG);
3853 assert(quotient>=0);
3854 assert(remainder>=0);
44a80f6a 3855 emit_mov(d1,remainder);
3856 emit_movimm(0xffffffff,quotient); // div0 case
57871462 3857 emit_test(d2,d2);
44a80f6a 3858 emit_jeq((int)out+40); // Division by zero
665f33e1 3859#ifdef HAVE_ARMV5
57871462 3860 emit_clz(d2,HOST_TEMPREG);
3861 emit_movimm(1<<31,quotient);
3862 emit_shl(d2,HOST_TEMPREG,d2);
665f33e1 3863#else
3864 emit_movimm(0,HOST_TEMPREG);
82336ba3 3865 emit_addpl_imm(HOST_TEMPREG,1,HOST_TEMPREG);
3866 emit_lslpls_imm(d2,1,d2);
665f33e1 3867 emit_jns((int)out-2*4);
3868 emit_movimm(1<<31,quotient);
3869#endif
57871462 3870 emit_shr(quotient,HOST_TEMPREG,quotient);
3871 emit_cmp(remainder,d2);
3872 emit_subcs(remainder,d2,remainder);
3873 emit_adcs(quotient,quotient,quotient);
3874 emit_shrcc_imm(d2,1,d2);
3875 emit_jcc((int)out-16); // -4
3876 }
3877 }
3878 else // 64-bit
71e490c5 3879 assert(0);
57871462 3880 }
3881 else
3882 {
3883 // Multiply by zero is zero.
3884 // MIPS does not have a divide by zero exception.
3885 // The result is undefined, we return zero.
3886 signed char hr=get_reg(i_regs->regmap,HIREG);
3887 signed char lr=get_reg(i_regs->regmap,LOREG);
3888 if(hr>=0) emit_zeroreg(hr);
3889 if(lr>=0) emit_zeroreg(lr);
3890 }
3891}
3892#define multdiv_assemble multdiv_assemble_arm
3893
3894void do_preload_rhash(int r) {
3895 // Don't need this for ARM. On x86, this puts the value 0xf8 into the
3896 // register. On ARM the hash can be done with a single instruction (below)
3897}
3898
3899void do_preload_rhtbl(int ht) {
3900 emit_addimm(FP,(int)&mini_ht-(int)&dynarec_local,ht);
3901}
3902
3903void do_rhash(int rs,int rh) {
3904 emit_andimm(rs,0xf8,rh);
3905}
3906
3907void do_miniht_load(int ht,int rh) {
3908 assem_debug("ldr %s,[%s,%s]!\n",regname[rh],regname[ht],regname[rh]);
3909 output_w32(0xe7b00000|rd_rn_rm(rh,ht,rh));
3910}
3911
3912void do_miniht_jump(int rs,int rh,int ht) {
3913 emit_cmp(rh,rs);
3914 emit_ldreq_indexed(ht,4,15);
3915 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
3916 emit_mov(rs,7);
3917 emit_jmp(jump_vaddr_reg[7]);
3918 #else
3919 emit_jmp(jump_vaddr_reg[rs]);
3920 #endif
3921}
3922
3923void do_miniht_insert(u_int return_address,int rt,int temp) {
665f33e1 3924 #ifndef HAVE_ARMV7
57871462 3925 emit_movimm(return_address,rt); // PC into link register
3926 add_to_linker((int)out,return_address,1);
3927 emit_pcreladdr(temp);
3928 emit_writeword(rt,(int)&mini_ht[(return_address&0xFF)>>3][0]);
3929 emit_writeword(temp,(int)&mini_ht[(return_address&0xFF)>>3][1]);
3930 #else
3931 emit_movw(return_address&0x0000FFFF,rt);
3932 add_to_linker((int)out,return_address,1);
3933 emit_pcreladdr(temp);
3934 emit_writeword(temp,(int)&mini_ht[(return_address&0xFF)>>3][1]);
3935 emit_movt(return_address&0xFFFF0000,rt);
3936 emit_writeword(rt,(int)&mini_ht[(return_address&0xFF)>>3][0]);
3937 #endif
3938}
3939
57871462 3940void wb_valid(signed char pre[],signed char entry[],u_int dirty_pre,u_int dirty,uint64_t is32_pre,uint64_t u,uint64_t uu)
3941{
3942 //if(dirty_pre==dirty) return;
3943 int hr,reg,new_hr;
3944 for(hr=0;hr<HOST_REGS;hr++) {
3945 if(hr!=EXCLUDE_REG) {
3946 reg=pre[hr];
3947 if(((~u)>>(reg&63))&1) {
f776eb14 3948 if(reg>0) {
57871462 3949 if(((dirty_pre&~dirty)>>hr)&1) {
3950 if(reg>0&&reg<34) {
3951 emit_storereg(reg,hr);
3952 if( ((is32_pre&~uu)>>reg)&1 ) {
3953 emit_sarimm(hr,31,HOST_TEMPREG);
3954 emit_storereg(reg|64,HOST_TEMPREG);
3955 }
3956 }
3957 else if(reg>=64) {
3958 emit_storereg(reg,hr);
3959 }
3960 }
3961 }
57871462 3962 }
3963 }
3964 }
3965}
3966
3967
3968/* using strd could possibly help but you'd have to allocate registers in pairs
3969void wb_invalidate_arm(signed char pre[],signed char entry[],uint64_t dirty,uint64_t is32,uint64_t u,uint64_t uu)
3970{
3971 int hr;
3972 int wrote=-1;
3973 for(hr=HOST_REGS-1;hr>=0;hr--) {
3974 if(hr!=EXCLUDE_REG) {
3975 if(pre[hr]!=entry[hr]) {
3976 if(pre[hr]>=0) {
3977 if((dirty>>hr)&1) {
3978 if(get_reg(entry,pre[hr])<0) {
3979 if(pre[hr]<64) {
3980 if(!((u>>pre[hr])&1)) {
3981 if(hr<10&&(~hr&1)&&(pre[hr+1]<0||wrote==hr+1)) {
3982 if( ((is32>>pre[hr])&1) && !((uu>>pre[hr])&1) ) {
3983 emit_sarimm(hr,31,hr+1);
3984 emit_strdreg(pre[hr],hr);
3985 }
3986 else
3987 emit_storereg(pre[hr],hr);
3988 }else{
3989 emit_storereg(pre[hr],hr);
3990 if( ((is32>>pre[hr])&1) && !((uu>>pre[hr])&1) ) {
3991 emit_sarimm(hr,31,hr);
3992 emit_storereg(pre[hr]|64,hr);
3993 }
3994 }
3995 }
3996 }else{
3997 if(!((uu>>(pre[hr]&63))&1) && !((is32>>(pre[hr]&63))&1)) {
3998 emit_storereg(pre[hr],hr);
3999 }
4000 }
4001 wrote=hr;
4002 }
4003 }
4004 }
4005 }
4006 }
4007 }
4008 for(hr=0;hr<HOST_REGS;hr++) {
4009 if(hr!=EXCLUDE_REG) {
4010 if(pre[hr]!=entry[hr]) {
4011 if(pre[hr]>=0) {
4012 int nr;
4013 if((nr=get_reg(entry,pre[hr]))>=0) {
4014 emit_mov(hr,nr);
4015 }
4016 }
4017 }
4018 }
4019 }
4020}
4021#define wb_invalidate wb_invalidate_arm
4022*/
4023
dd3a91a1 4024// Clearing the cache is rather slow on ARM Linux, so mark the areas
4025// that need to be cleared, and then only clear these areas once.
4026void do_clear_cache()
4027{
4028 int i,j;
4029 for (i=0;i<(1<<(TARGET_SIZE_2-17));i++)
4030 {
4031 u_int bitmap=needs_clear_cache[i];
4032 if(bitmap) {
4033 u_int start,end;
4034 for(j=0;j<32;j++)
4035 {
4036 if(bitmap&(1<<j)) {
bdeade46 4037 start=(u_int)BASE_ADDR+i*131072+j*4096;
dd3a91a1 4038 end=start+4095;
4039 j++;
4040 while(j<32) {
4041 if(bitmap&(1<<j)) {
4042 end+=4096;
4043 j++;
4044 }else{
4045 __clear_cache((void *)start,(void *)end);
4046 break;
4047 }
4048 }
4049 }
4050 }
4051 needs_clear_cache[i]=0;
4052 }
4053 }
4054}
4055
57871462 4056// CPU-architecture-specific initialization
71e490c5 4057static void arch_init() {
57871462 4058}
b9b61529 4059
4060// vim:shiftwidth=2:expandtab