drc: inv: fix ram ofset and mirror handling
[pcsx_rearmed.git] / libpcsxcore / new_dynarec / assem_arm.c
CommitLineData
57871462 1/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
c6c3b1b3 2 * Mupen64plus/PCSX - assem_arm.c *
20d507ba 3 * Copyright (C) 2009-2011 Ari64 *
c6c3b1b3 4 * Copyright (C) 2010-2011 GraÅžvydas "notaz" Ignotas *
57871462 5 * *
6 * This program is free software; you can redistribute it and/or modify *
7 * it under the terms of the GNU General Public License as published by *
8 * the Free Software Foundation; either version 2 of the License, or *
9 * (at your option) any later version. *
10 * *
11 * This program is distributed in the hope that it will be useful, *
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
14 * GNU General Public License for more details. *
15 * *
16 * You should have received a copy of the GNU General Public License *
17 * along with this program; if not, write to the *
18 * Free Software Foundation, Inc., *
19 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
20 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
21
054175e9 22#ifdef PCSX
6c0eefaf 23#include "../gte.h"
24#define FLAGLESS
25#include "../gte.h"
26#undef FLAGLESS
054175e9 27#include "../gte_arm.h"
28#include "../gte_neon.h"
29#include "pcnt.h"
30#endif
31
a327ad27 32#if !BASE_ADDR_FIXED
bdeade46 33char translation_cache[1 << TARGET_SIZE_2] __attribute__((aligned(4096)));
34#endif
35
57871462 36extern int cycle_count;
37extern int last_count;
38extern int pcaddr;
39extern int pending_exception;
40extern int branch_target;
41extern uint64_t readmem_dword;
3d624f89 42#ifdef MUPEN64
57871462 43extern precomp_instr fake_pc;
3d624f89 44#endif
57871462 45extern void *dynarec_local;
46extern u_int memory_map[1048576];
47extern u_int mini_ht[32][2];
48extern u_int rounding_modes[4];
49
50void indirect_jump_indexed();
51void indirect_jump();
52void do_interrupt();
53void jump_vaddr_r0();
54void jump_vaddr_r1();
55void jump_vaddr_r2();
56void jump_vaddr_r3();
57void jump_vaddr_r4();
58void jump_vaddr_r5();
59void jump_vaddr_r6();
60void jump_vaddr_r7();
61void jump_vaddr_r8();
62void jump_vaddr_r9();
63void jump_vaddr_r10();
64void jump_vaddr_r12();
65
66const u_int jump_vaddr_reg[16] = {
67 (int)jump_vaddr_r0,
68 (int)jump_vaddr_r1,
69 (int)jump_vaddr_r2,
70 (int)jump_vaddr_r3,
71 (int)jump_vaddr_r4,
72 (int)jump_vaddr_r5,
73 (int)jump_vaddr_r6,
74 (int)jump_vaddr_r7,
75 (int)jump_vaddr_r8,
76 (int)jump_vaddr_r9,
77 (int)jump_vaddr_r10,
78 0,
79 (int)jump_vaddr_r12,
80 0,
81 0,
82 0};
83
0bbd1454 84void invalidate_addr_r0();
85void invalidate_addr_r1();
86void invalidate_addr_r2();
87void invalidate_addr_r3();
88void invalidate_addr_r4();
89void invalidate_addr_r5();
90void invalidate_addr_r6();
91void invalidate_addr_r7();
92void invalidate_addr_r8();
93void invalidate_addr_r9();
94void invalidate_addr_r10();
95void invalidate_addr_r12();
96
97const u_int invalidate_addr_reg[16] = {
98 (int)invalidate_addr_r0,
99 (int)invalidate_addr_r1,
100 (int)invalidate_addr_r2,
101 (int)invalidate_addr_r3,
102 (int)invalidate_addr_r4,
103 (int)invalidate_addr_r5,
104 (int)invalidate_addr_r6,
105 (int)invalidate_addr_r7,
106 (int)invalidate_addr_r8,
107 (int)invalidate_addr_r9,
108 (int)invalidate_addr_r10,
109 0,
110 (int)invalidate_addr_r12,
111 0,
112 0,
113 0};
114
57871462 115#include "fpu.h"
116
dd3a91a1 117unsigned int needs_clear_cache[1<<(TARGET_SIZE_2-17)];
118
57871462 119/* Linker */
120
121void set_jump_target(int addr,u_int target)
122{
123 u_char *ptr=(u_char *)addr;
124 u_int *ptr2=(u_int *)ptr;
125 if(ptr[3]==0xe2) {
126 assert((target-(u_int)ptr2-8)<1024);
127 assert((addr&3)==0);
128 assert((target&3)==0);
129 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
130 //printf("target=%x addr=%x insn=%x\n",target,addr,*ptr2);
131 }
132 else if(ptr[3]==0x72) {
133 // generated by emit_jno_unlikely
134 if((target-(u_int)ptr2-8)<1024) {
135 assert((addr&3)==0);
136 assert((target&3)==0);
137 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
138 }
139 else if((target-(u_int)ptr2-8)<4096&&!((target-(u_int)ptr2-8)&15)) {
140 assert((addr&3)==0);
141 assert((target&3)==0);
142 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>4)|0xE00;
143 }
144 else *ptr2=(0x7A000000)|(((target-(u_int)ptr2-8)<<6)>>8);
145 }
146 else {
147 assert((ptr[3]&0x0e)==0xa);
148 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
149 }
150}
151
152// This optionally copies the instruction from the target of the branch into
153// the space before the branch. Works, but the difference in speed is
154// usually insignificant.
155void set_jump_target_fillslot(int addr,u_int target,int copy)
156{
157 u_char *ptr=(u_char *)addr;
158 u_int *ptr2=(u_int *)ptr;
159 assert(!copy||ptr2[-1]==0xe28dd000);
160 if(ptr[3]==0xe2) {
161 assert(!copy);
162 assert((target-(u_int)ptr2-8)<4096);
163 *ptr2=(*ptr2&0xFFFFF000)|(target-(u_int)ptr2-8);
164 }
165 else {
166 assert((ptr[3]&0x0e)==0xa);
167 u_int target_insn=*(u_int *)target;
168 if((target_insn&0x0e100000)==0) { // ALU, no immediate, no flags
169 copy=0;
170 }
171 if((target_insn&0x0c100000)==0x04100000) { // Load
172 copy=0;
173 }
174 if(target_insn&0x08000000) {
175 copy=0;
176 }
177 if(copy) {
178 ptr2[-1]=target_insn;
179 target+=4;
180 }
181 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
182 }
183}
184
185/* Literal pool */
186add_literal(int addr,int val)
187{
15776b68 188 assert(literalcount<sizeof(literals)/sizeof(literals[0]));
57871462 189 literals[literalcount][0]=addr;
190 literals[literalcount][1]=val;
191 literalcount++;
192}
193
f76eeef9 194void *kill_pointer(void *stub)
57871462 195{
196 int *ptr=(int *)(stub+4);
197 assert((*ptr&0x0ff00000)==0x05900000);
198 u_int offset=*ptr&0xfff;
199 int **l_ptr=(void *)ptr+offset+8;
200 int *i_ptr=*l_ptr;
201 set_jump_target((int)i_ptr,(int)stub);
f76eeef9 202 return i_ptr;
57871462 203}
204
f968d35d 205// find where external branch is liked to using addr of it's stub:
206// get address that insn one after stub loads (dyna_linker arg1),
207// treat it as a pointer to branch insn,
208// return addr where that branch jumps to
57871462 209int get_pointer(void *stub)
210{
211 //printf("get_pointer(%x)\n",(int)stub);
212 int *ptr=(int *)(stub+4);
f968d35d 213 assert((*ptr&0x0fff0000)==0x059f0000);
57871462 214 u_int offset=*ptr&0xfff;
215 int **l_ptr=(void *)ptr+offset+8;
216 int *i_ptr=*l_ptr;
217 assert((*i_ptr&0x0f000000)==0x0a000000);
218 return (int)i_ptr+((*i_ptr<<8)>>6)+8;
219}
220
221// Find the "clean" entry point from a "dirty" entry point
222// by skipping past the call to verify_code
223u_int get_clean_addr(int addr)
224{
225 int *ptr=(int *)addr;
226 #ifdef ARMv5_ONLY
227 ptr+=4;
228 #else
229 ptr+=6;
230 #endif
231 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
232 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
233 ptr++;
234 if((*ptr&0xFF000000)==0xea000000) {
235 return (int)ptr+((*ptr<<8)>>6)+8; // follow jump
236 }
237 return (u_int)ptr;
238}
239
240int verify_dirty(int addr)
241{
242 u_int *ptr=(u_int *)addr;
243 #ifdef ARMv5_ONLY
244 // get from literal pool
15776b68 245 assert((*ptr&0xFFFF0000)==0xe59f0000);
57871462 246 u_int offset=*ptr&0xfff;
247 u_int *l_ptr=(void *)ptr+offset+8;
248 u_int source=l_ptr[0];
249 u_int copy=l_ptr[1];
250 u_int len=l_ptr[2];
251 ptr+=4;
252 #else
253 // ARMv7 movw/movt
254 assert((*ptr&0xFFF00000)==0xe3000000);
255 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
256 u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
257 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
258 ptr+=6;
259 #endif
260 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
261 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
63cb0298 262#ifndef DISABLE_TLB
cfcba99a 263 u_int verifier=(int)ptr+((signed int)(*ptr<<8)>>6)+8; // get target of bl
57871462 264 if(verifier==(u_int)verify_code_vm||verifier==(u_int)verify_code_ds) {
265 unsigned int page=source>>12;
266 unsigned int map_value=memory_map[page];
267 if(map_value>=0x80000000) return 0;
268 while(page<((source+len-1)>>12)) {
269 if((memory_map[++page]<<2)!=(map_value<<2)) return 0;
270 }
271 source = source+(map_value<<2);
272 }
63cb0298 273#endif
57871462 274 //printf("verify_dirty: %x %x %x\n",source,copy,len);
275 return !memcmp((void *)source,(void *)copy,len);
276}
277
278// This doesn't necessarily find all clean entry points, just
279// guarantees that it's not dirty
280int isclean(int addr)
281{
282 #ifdef ARMv5_ONLY
283 int *ptr=((u_int *)addr)+4;
284 #else
285 int *ptr=((u_int *)addr)+6;
286 #endif
287 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
288 if((*ptr&0xFF000000)!=0xeb000000) return 1; // bl instruction
289 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code) return 0;
290 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_vm) return 0;
291 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_ds) return 0;
292 return 1;
293}
294
4a35de07 295// get source that block at addr was compiled from (host pointers)
57871462 296void get_bounds(int addr,u_int *start,u_int *end)
297{
298 u_int *ptr=(u_int *)addr;
299 #ifdef ARMv5_ONLY
300 // get from literal pool
15776b68 301 assert((*ptr&0xFFFF0000)==0xe59f0000);
57871462 302 u_int offset=*ptr&0xfff;
303 u_int *l_ptr=(void *)ptr+offset+8;
304 u_int source=l_ptr[0];
305 //u_int copy=l_ptr[1];
306 u_int len=l_ptr[2];
307 ptr+=4;
308 #else
309 // ARMv7 movw/movt
310 assert((*ptr&0xFFF00000)==0xe3000000);
311 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
312 //u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
313 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
314 ptr+=6;
315 #endif
316 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
317 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
63cb0298 318#ifndef DISABLE_TLB
cfcba99a 319 u_int verifier=(int)ptr+((signed int)(*ptr<<8)>>6)+8; // get target of bl
57871462 320 if(verifier==(u_int)verify_code_vm||verifier==(u_int)verify_code_ds) {
321 if(memory_map[source>>12]>=0x80000000) source = 0;
322 else source = source+(memory_map[source>>12]<<2);
323 }
63cb0298 324#endif
57871462 325 *start=source;
326 *end=source+len;
327}
328
329/* Register allocation */
330
331// Note: registers are allocated clean (unmodified state)
332// if you intend to modify the register, you must call dirty_reg().
333void alloc_reg(struct regstat *cur,int i,signed char reg)
334{
335 int r,hr;
336 int preferred_reg = (reg&7);
337 if(reg==CCREG) preferred_reg=HOST_CCREG;
338 if(reg==PTEMP||reg==FTEMP) preferred_reg=12;
339
340 // Don't allocate unused registers
341 if((cur->u>>reg)&1) return;
342
343 // see if it's already allocated
344 for(hr=0;hr<HOST_REGS;hr++)
345 {
346 if(cur->regmap[hr]==reg) return;
347 }
348
349 // Keep the same mapping if the register was already allocated in a loop
350 preferred_reg = loop_reg(i,reg,preferred_reg);
351
352 // Try to allocate the preferred register
353 if(cur->regmap[preferred_reg]==-1) {
354 cur->regmap[preferred_reg]=reg;
355 cur->dirty&=~(1<<preferred_reg);
356 cur->isconst&=~(1<<preferred_reg);
357 return;
358 }
359 r=cur->regmap[preferred_reg];
360 if(r<64&&((cur->u>>r)&1)) {
361 cur->regmap[preferred_reg]=reg;
362 cur->dirty&=~(1<<preferred_reg);
363 cur->isconst&=~(1<<preferred_reg);
364 return;
365 }
366 if(r>=64&&((cur->uu>>(r&63))&1)) {
367 cur->regmap[preferred_reg]=reg;
368 cur->dirty&=~(1<<preferred_reg);
369 cur->isconst&=~(1<<preferred_reg);
370 return;
371 }
372
373 // Clear any unneeded registers
374 // We try to keep the mapping consistent, if possible, because it
375 // makes branches easier (especially loops). So we try to allocate
376 // first (see above) before removing old mappings. If this is not
377 // possible then go ahead and clear out the registers that are no
378 // longer needed.
379 for(hr=0;hr<HOST_REGS;hr++)
380 {
381 r=cur->regmap[hr];
382 if(r>=0) {
383 if(r<64) {
384 if((cur->u>>r)&1) {cur->regmap[hr]=-1;break;}
385 }
386 else
387 {
388 if((cur->uu>>(r&63))&1) {cur->regmap[hr]=-1;break;}
389 }
390 }
391 }
392 // Try to allocate any available register, but prefer
393 // registers that have not been used recently.
394 if(i>0) {
395 for(hr=0;hr<HOST_REGS;hr++) {
396 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
397 if(regs[i-1].regmap[hr]!=rs1[i-1]&&regs[i-1].regmap[hr]!=rs2[i-1]&&regs[i-1].regmap[hr]!=rt1[i-1]&&regs[i-1].regmap[hr]!=rt2[i-1]) {
398 cur->regmap[hr]=reg;
399 cur->dirty&=~(1<<hr);
400 cur->isconst&=~(1<<hr);
401 return;
402 }
403 }
404 }
405 }
406 // Try to allocate any available register
407 for(hr=0;hr<HOST_REGS;hr++) {
408 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
409 cur->regmap[hr]=reg;
410 cur->dirty&=~(1<<hr);
411 cur->isconst&=~(1<<hr);
412 return;
413 }
414 }
415
416 // Ok, now we have to evict someone
417 // Pick a register we hopefully won't need soon
418 u_char hsn[MAXREG+1];
419 memset(hsn,10,sizeof(hsn));
420 int j;
421 lsn(hsn,i,&preferred_reg);
422 //printf("eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",cur->regmap[0],cur->regmap[1],cur->regmap[2],cur->regmap[3],cur->regmap[5],cur->regmap[6],cur->regmap[7]);
423 //printf("hsn(%x): %d %d %d %d %d %d %d\n",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
424 if(i>0) {
425 // Don't evict the cycle count at entry points, otherwise the entry
426 // stub will have to write it.
427 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
428 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
429 for(j=10;j>=3;j--)
430 {
431 // Alloc preferred register if available
432 if(hsn[r=cur->regmap[preferred_reg]&63]==j) {
433 for(hr=0;hr<HOST_REGS;hr++) {
434 // Evict both parts of a 64-bit register
435 if((cur->regmap[hr]&63)==r) {
436 cur->regmap[hr]=-1;
437 cur->dirty&=~(1<<hr);
438 cur->isconst&=~(1<<hr);
439 }
440 }
441 cur->regmap[preferred_reg]=reg;
442 return;
443 }
444 for(r=1;r<=MAXREG;r++)
445 {
446 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
447 for(hr=0;hr<HOST_REGS;hr++) {
448 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
449 if(cur->regmap[hr]==r+64) {
450 cur->regmap[hr]=reg;
451 cur->dirty&=~(1<<hr);
452 cur->isconst&=~(1<<hr);
453 return;
454 }
455 }
456 }
457 for(hr=0;hr<HOST_REGS;hr++) {
458 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
459 if(cur->regmap[hr]==r) {
460 cur->regmap[hr]=reg;
461 cur->dirty&=~(1<<hr);
462 cur->isconst&=~(1<<hr);
463 return;
464 }
465 }
466 }
467 }
468 }
469 }
470 }
471 for(j=10;j>=0;j--)
472 {
473 for(r=1;r<=MAXREG;r++)
474 {
475 if(hsn[r]==j) {
476 for(hr=0;hr<HOST_REGS;hr++) {
477 if(cur->regmap[hr]==r+64) {
478 cur->regmap[hr]=reg;
479 cur->dirty&=~(1<<hr);
480 cur->isconst&=~(1<<hr);
481 return;
482 }
483 }
484 for(hr=0;hr<HOST_REGS;hr++) {
485 if(cur->regmap[hr]==r) {
486 cur->regmap[hr]=reg;
487 cur->dirty&=~(1<<hr);
488 cur->isconst&=~(1<<hr);
489 return;
490 }
491 }
492 }
493 }
494 }
495 printf("This shouldn't happen (alloc_reg)");exit(1);
496}
497
498void alloc_reg64(struct regstat *cur,int i,signed char reg)
499{
500 int preferred_reg = 8+(reg&1);
501 int r,hr;
502
503 // allocate the lower 32 bits
504 alloc_reg(cur,i,reg);
505
506 // Don't allocate unused registers
507 if((cur->uu>>reg)&1) return;
508
509 // see if the upper half is already allocated
510 for(hr=0;hr<HOST_REGS;hr++)
511 {
512 if(cur->regmap[hr]==reg+64) return;
513 }
514
515 // Keep the same mapping if the register was already allocated in a loop
516 preferred_reg = loop_reg(i,reg,preferred_reg);
517
518 // Try to allocate the preferred register
519 if(cur->regmap[preferred_reg]==-1) {
520 cur->regmap[preferred_reg]=reg|64;
521 cur->dirty&=~(1<<preferred_reg);
522 cur->isconst&=~(1<<preferred_reg);
523 return;
524 }
525 r=cur->regmap[preferred_reg];
526 if(r<64&&((cur->u>>r)&1)) {
527 cur->regmap[preferred_reg]=reg|64;
528 cur->dirty&=~(1<<preferred_reg);
529 cur->isconst&=~(1<<preferred_reg);
530 return;
531 }
532 if(r>=64&&((cur->uu>>(r&63))&1)) {
533 cur->regmap[preferred_reg]=reg|64;
534 cur->dirty&=~(1<<preferred_reg);
535 cur->isconst&=~(1<<preferred_reg);
536 return;
537 }
538
539 // Clear any unneeded registers
540 // We try to keep the mapping consistent, if possible, because it
541 // makes branches easier (especially loops). So we try to allocate
542 // first (see above) before removing old mappings. If this is not
543 // possible then go ahead and clear out the registers that are no
544 // longer needed.
545 for(hr=HOST_REGS-1;hr>=0;hr--)
546 {
547 r=cur->regmap[hr];
548 if(r>=0) {
549 if(r<64) {
550 if((cur->u>>r)&1) {cur->regmap[hr]=-1;break;}
551 }
552 else
553 {
554 if((cur->uu>>(r&63))&1) {cur->regmap[hr]=-1;break;}
555 }
556 }
557 }
558 // Try to allocate any available register, but prefer
559 // registers that have not been used recently.
560 if(i>0) {
561 for(hr=0;hr<HOST_REGS;hr++) {
562 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
563 if(regs[i-1].regmap[hr]!=rs1[i-1]&&regs[i-1].regmap[hr]!=rs2[i-1]&&regs[i-1].regmap[hr]!=rt1[i-1]&&regs[i-1].regmap[hr]!=rt2[i-1]) {
564 cur->regmap[hr]=reg|64;
565 cur->dirty&=~(1<<hr);
566 cur->isconst&=~(1<<hr);
567 return;
568 }
569 }
570 }
571 }
572 // Try to allocate any available register
573 for(hr=0;hr<HOST_REGS;hr++) {
574 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
575 cur->regmap[hr]=reg|64;
576 cur->dirty&=~(1<<hr);
577 cur->isconst&=~(1<<hr);
578 return;
579 }
580 }
581
582 // Ok, now we have to evict someone
583 // Pick a register we hopefully won't need soon
584 u_char hsn[MAXREG+1];
585 memset(hsn,10,sizeof(hsn));
586 int j;
587 lsn(hsn,i,&preferred_reg);
588 //printf("eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",cur->regmap[0],cur->regmap[1],cur->regmap[2],cur->regmap[3],cur->regmap[5],cur->regmap[6],cur->regmap[7]);
589 //printf("hsn(%x): %d %d %d %d %d %d %d\n",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
590 if(i>0) {
591 // Don't evict the cycle count at entry points, otherwise the entry
592 // stub will have to write it.
593 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
594 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
595 for(j=10;j>=3;j--)
596 {
597 // Alloc preferred register if available
598 if(hsn[r=cur->regmap[preferred_reg]&63]==j) {
599 for(hr=0;hr<HOST_REGS;hr++) {
600 // Evict both parts of a 64-bit register
601 if((cur->regmap[hr]&63)==r) {
602 cur->regmap[hr]=-1;
603 cur->dirty&=~(1<<hr);
604 cur->isconst&=~(1<<hr);
605 }
606 }
607 cur->regmap[preferred_reg]=reg|64;
608 return;
609 }
610 for(r=1;r<=MAXREG;r++)
611 {
612 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
613 for(hr=0;hr<HOST_REGS;hr++) {
614 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
615 if(cur->regmap[hr]==r+64) {
616 cur->regmap[hr]=reg|64;
617 cur->dirty&=~(1<<hr);
618 cur->isconst&=~(1<<hr);
619 return;
620 }
621 }
622 }
623 for(hr=0;hr<HOST_REGS;hr++) {
624 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
625 if(cur->regmap[hr]==r) {
626 cur->regmap[hr]=reg|64;
627 cur->dirty&=~(1<<hr);
628 cur->isconst&=~(1<<hr);
629 return;
630 }
631 }
632 }
633 }
634 }
635 }
636 }
637 for(j=10;j>=0;j--)
638 {
639 for(r=1;r<=MAXREG;r++)
640 {
641 if(hsn[r]==j) {
642 for(hr=0;hr<HOST_REGS;hr++) {
643 if(cur->regmap[hr]==r+64) {
644 cur->regmap[hr]=reg|64;
645 cur->dirty&=~(1<<hr);
646 cur->isconst&=~(1<<hr);
647 return;
648 }
649 }
650 for(hr=0;hr<HOST_REGS;hr++) {
651 if(cur->regmap[hr]==r) {
652 cur->regmap[hr]=reg|64;
653 cur->dirty&=~(1<<hr);
654 cur->isconst&=~(1<<hr);
655 return;
656 }
657 }
658 }
659 }
660 }
661 printf("This shouldn't happen");exit(1);
662}
663
664// Allocate a temporary register. This is done without regard to
665// dirty status or whether the register we request is on the unneeded list
666// Note: This will only allocate one register, even if called multiple times
667void alloc_reg_temp(struct regstat *cur,int i,signed char reg)
668{
669 int r,hr;
670 int preferred_reg = -1;
671
672 // see if it's already allocated
673 for(hr=0;hr<HOST_REGS;hr++)
674 {
675 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==reg) return;
676 }
677
678 // Try to allocate any available register
679 for(hr=HOST_REGS-1;hr>=0;hr--) {
680 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
681 cur->regmap[hr]=reg;
682 cur->dirty&=~(1<<hr);
683 cur->isconst&=~(1<<hr);
684 return;
685 }
686 }
687
688 // Find an unneeded register
689 for(hr=HOST_REGS-1;hr>=0;hr--)
690 {
691 r=cur->regmap[hr];
692 if(r>=0) {
693 if(r<64) {
694 if((cur->u>>r)&1) {
695 if(i==0||((unneeded_reg[i-1]>>r)&1)) {
696 cur->regmap[hr]=reg;
697 cur->dirty&=~(1<<hr);
698 cur->isconst&=~(1<<hr);
699 return;
700 }
701 }
702 }
703 else
704 {
705 if((cur->uu>>(r&63))&1) {
706 if(i==0||((unneeded_reg_upper[i-1]>>(r&63))&1)) {
707 cur->regmap[hr]=reg;
708 cur->dirty&=~(1<<hr);
709 cur->isconst&=~(1<<hr);
710 return;
711 }
712 }
713 }
714 }
715 }
716
717 // Ok, now we have to evict someone
718 // Pick a register we hopefully won't need soon
719 // TODO: we might want to follow unconditional jumps here
720 // TODO: get rid of dupe code and make this into a function
721 u_char hsn[MAXREG+1];
722 memset(hsn,10,sizeof(hsn));
723 int j;
724 lsn(hsn,i,&preferred_reg);
725 //printf("hsn: %d %d %d %d %d %d %d\n",hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
726 if(i>0) {
727 // Don't evict the cycle count at entry points, otherwise the entry
728 // stub will have to write it.
729 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
730 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
731 for(j=10;j>=3;j--)
732 {
733 for(r=1;r<=MAXREG;r++)
734 {
735 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
736 for(hr=0;hr<HOST_REGS;hr++) {
737 if(hr!=HOST_CCREG||hsn[CCREG]>2) {
738 if(cur->regmap[hr]==r+64) {
739 cur->regmap[hr]=reg;
740 cur->dirty&=~(1<<hr);
741 cur->isconst&=~(1<<hr);
742 return;
743 }
744 }
745 }
746 for(hr=0;hr<HOST_REGS;hr++) {
747 if(hr!=HOST_CCREG||hsn[CCREG]>2) {
748 if(cur->regmap[hr]==r) {
749 cur->regmap[hr]=reg;
750 cur->dirty&=~(1<<hr);
751 cur->isconst&=~(1<<hr);
752 return;
753 }
754 }
755 }
756 }
757 }
758 }
759 }
760 for(j=10;j>=0;j--)
761 {
762 for(r=1;r<=MAXREG;r++)
763 {
764 if(hsn[r]==j) {
765 for(hr=0;hr<HOST_REGS;hr++) {
766 if(cur->regmap[hr]==r+64) {
767 cur->regmap[hr]=reg;
768 cur->dirty&=~(1<<hr);
769 cur->isconst&=~(1<<hr);
770 return;
771 }
772 }
773 for(hr=0;hr<HOST_REGS;hr++) {
774 if(cur->regmap[hr]==r) {
775 cur->regmap[hr]=reg;
776 cur->dirty&=~(1<<hr);
777 cur->isconst&=~(1<<hr);
778 return;
779 }
780 }
781 }
782 }
783 }
784 printf("This shouldn't happen");exit(1);
785}
786// Allocate a specific ARM register.
787void alloc_arm_reg(struct regstat *cur,int i,signed char reg,char hr)
788{
789 int n;
f776eb14 790 int dirty=0;
57871462 791
792 // see if it's already allocated (and dealloc it)
793 for(n=0;n<HOST_REGS;n++)
794 {
f776eb14 795 if(n!=EXCLUDE_REG&&cur->regmap[n]==reg) {
796 dirty=(cur->dirty>>n)&1;
797 cur->regmap[n]=-1;
798 }
57871462 799 }
800
801 cur->regmap[hr]=reg;
802 cur->dirty&=~(1<<hr);
f776eb14 803 cur->dirty|=dirty<<hr;
57871462 804 cur->isconst&=~(1<<hr);
805}
806
807// Alloc cycle count into dedicated register
808alloc_cc(struct regstat *cur,int i)
809{
810 alloc_arm_reg(cur,i,CCREG,HOST_CCREG);
811}
812
813/* Special alloc */
814
815
816/* Assembler */
817
818char regname[16][4] = {
819 "r0",
820 "r1",
821 "r2",
822 "r3",
823 "r4",
824 "r5",
825 "r6",
826 "r7",
827 "r8",
828 "r9",
829 "r10",
830 "fp",
831 "r12",
832 "sp",
833 "lr",
834 "pc"};
835
836void output_byte(u_char byte)
837{
838 *(out++)=byte;
839}
840void output_modrm(u_char mod,u_char rm,u_char ext)
841{
842 assert(mod<4);
843 assert(rm<8);
844 assert(ext<8);
845 u_char byte=(mod<<6)|(ext<<3)|rm;
846 *(out++)=byte;
847}
848void output_sib(u_char scale,u_char index,u_char base)
849{
850 assert(scale<4);
851 assert(index<8);
852 assert(base<8);
853 u_char byte=(scale<<6)|(index<<3)|base;
854 *(out++)=byte;
855}
856void output_w32(u_int word)
857{
858 *((u_int *)out)=word;
859 out+=4;
860}
861u_int rd_rn_rm(u_int rd, u_int rn, u_int rm)
862{
863 assert(rd<16);
864 assert(rn<16);
865 assert(rm<16);
866 return((rn<<16)|(rd<<12)|rm);
867}
868u_int rd_rn_imm_shift(u_int rd, u_int rn, u_int imm, u_int shift)
869{
870 assert(rd<16);
871 assert(rn<16);
872 assert(imm<256);
873 assert((shift&1)==0);
874 return((rn<<16)|(rd<<12)|(((32-shift)&30)<<7)|imm);
875}
876u_int genimm(u_int imm,u_int *encoded)
877{
c2e3bd42 878 *encoded=0;
879 if(imm==0) return 1;
57871462 880 int i=32;
881 while(i>0)
882 {
883 if(imm<256) {
884 *encoded=((i&30)<<7)|imm;
885 return 1;
886 }
887 imm=(imm>>2)|(imm<<30);i-=2;
888 }
889 return 0;
890}
cfbd3c6e 891void genimm_checked(u_int imm,u_int *encoded)
892{
893 u_int ret=genimm(imm,encoded);
894 assert(ret);
895}
57871462 896u_int genjmp(u_int addr)
897{
898 int offset=addr-(int)out-8;
e80343e2 899 if(offset<-33554432||offset>=33554432) {
900 if (addr>2) {
901 printf("genjmp: out of range: %08x\n", offset);
902 exit(1);
903 }
904 return 0;
905 }
57871462 906 return ((u_int)offset>>2)&0xffffff;
907}
908
909void emit_mov(int rs,int rt)
910{
911 assem_debug("mov %s,%s\n",regname[rt],regname[rs]);
912 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs));
913}
914
915void emit_movs(int rs,int rt)
916{
917 assem_debug("movs %s,%s\n",regname[rt],regname[rs]);
918 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs));
919}
920
921void emit_add(int rs1,int rs2,int rt)
922{
923 assem_debug("add %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
924 output_w32(0xe0800000|rd_rn_rm(rt,rs1,rs2));
925}
926
927void emit_adds(int rs1,int rs2,int rt)
928{
929 assem_debug("adds %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
930 output_w32(0xe0900000|rd_rn_rm(rt,rs1,rs2));
931}
932
933void emit_adcs(int rs1,int rs2,int rt)
934{
935 assem_debug("adcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
936 output_w32(0xe0b00000|rd_rn_rm(rt,rs1,rs2));
937}
938
939void emit_sbc(int rs1,int rs2,int rt)
940{
941 assem_debug("sbc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
942 output_w32(0xe0c00000|rd_rn_rm(rt,rs1,rs2));
943}
944
945void emit_sbcs(int rs1,int rs2,int rt)
946{
947 assem_debug("sbcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
948 output_w32(0xe0d00000|rd_rn_rm(rt,rs1,rs2));
949}
950
951void emit_neg(int rs, int rt)
952{
953 assem_debug("rsb %s,%s,#0\n",regname[rt],regname[rs]);
954 output_w32(0xe2600000|rd_rn_rm(rt,rs,0));
955}
956
957void emit_negs(int rs, int rt)
958{
959 assem_debug("rsbs %s,%s,#0\n",regname[rt],regname[rs]);
960 output_w32(0xe2700000|rd_rn_rm(rt,rs,0));
961}
962
963void emit_sub(int rs1,int rs2,int rt)
964{
965 assem_debug("sub %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
966 output_w32(0xe0400000|rd_rn_rm(rt,rs1,rs2));
967}
968
969void emit_subs(int rs1,int rs2,int rt)
970{
971 assem_debug("subs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
972 output_w32(0xe0500000|rd_rn_rm(rt,rs1,rs2));
973}
974
975void emit_zeroreg(int rt)
976{
977 assem_debug("mov %s,#0\n",regname[rt]);
978 output_w32(0xe3a00000|rd_rn_rm(rt,0,0));
979}
980
790ee18e 981void emit_loadlp(u_int imm,u_int rt)
982{
983 add_literal((int)out,imm);
984 assem_debug("ldr %s,pc+? [=%x]\n",regname[rt],imm);
985 output_w32(0xe5900000|rd_rn_rm(rt,15,0));
986}
987void emit_movw(u_int imm,u_int rt)
988{
989 assert(imm<65536);
990 assem_debug("movw %s,#%d (0x%x)\n",regname[rt],imm,imm);
991 output_w32(0xe3000000|rd_rn_rm(rt,0,0)|(imm&0xfff)|((imm<<4)&0xf0000));
992}
993void emit_movt(u_int imm,u_int rt)
994{
995 assem_debug("movt %s,#%d (0x%x)\n",regname[rt],imm&0xffff0000,imm&0xffff0000);
996 output_w32(0xe3400000|rd_rn_rm(rt,0,0)|((imm>>16)&0xfff)|((imm>>12)&0xf0000));
997}
998void emit_movimm(u_int imm,u_int rt)
999{
1000 u_int armval;
1001 if(genimm(imm,&armval)) {
1002 assem_debug("mov %s,#%d\n",regname[rt],imm);
1003 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
1004 }else if(genimm(~imm,&armval)) {
1005 assem_debug("mvn %s,#%d\n",regname[rt],imm);
1006 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
1007 }else if(imm<65536) {
1008 #ifdef ARMv5_ONLY
1009 assem_debug("mov %s,#%d\n",regname[rt],imm&0xFF00);
1010 output_w32(0xe3a00000|rd_rn_imm_shift(rt,0,imm>>8,8));
1011 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
1012 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1013 #else
1014 emit_movw(imm,rt);
1015 #endif
1016 }else{
1017 #ifdef ARMv5_ONLY
1018 emit_loadlp(imm,rt);
1019 #else
1020 emit_movw(imm&0x0000FFFF,rt);
1021 emit_movt(imm&0xFFFF0000,rt);
1022 #endif
1023 }
1024}
1025void emit_pcreladdr(u_int rt)
1026{
1027 assem_debug("add %s,pc,#?\n",regname[rt]);
1028 output_w32(0xe2800000|rd_rn_rm(rt,15,0));
1029}
1030
57871462 1031void emit_loadreg(int r, int hr)
1032{
3d624f89 1033#ifdef FORCE32
1034 if(r&64) {
1035 printf("64bit load in 32bit mode!\n");
7f2607ea 1036 assert(0);
1037 return;
3d624f89 1038 }
1039#endif
57871462 1040 if((r&63)==0)
1041 emit_zeroreg(hr);
1042 else {
3d624f89 1043 int addr=((int)reg)+((r&63)<<REG_SHIFT)+((r&64)>>4);
57871462 1044 if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4);
1045 if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4);
1046 if(r==CCREG) addr=(int)&cycle_count;
1047 if(r==CSREG) addr=(int)&Status;
1048 if(r==FSREG) addr=(int)&FCR31;
1049 if(r==INVCP) addr=(int)&invc_ptr;
1050 u_int offset = addr-(u_int)&dynarec_local;
1051 assert(offset<4096);
1052 assem_debug("ldr %s,fp+%d\n",regname[hr],offset);
1053 output_w32(0xe5900000|rd_rn_rm(hr,FP,0)|offset);
1054 }
1055}
1056void emit_storereg(int r, int hr)
1057{
3d624f89 1058#ifdef FORCE32
1059 if(r&64) {
1060 printf("64bit store in 32bit mode!\n");
7f2607ea 1061 assert(0);
1062 return;
3d624f89 1063 }
1064#endif
1065 int addr=((int)reg)+((r&63)<<REG_SHIFT)+((r&64)>>4);
57871462 1066 if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4);
1067 if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4);
1068 if(r==CCREG) addr=(int)&cycle_count;
1069 if(r==FSREG) addr=(int)&FCR31;
1070 u_int offset = addr-(u_int)&dynarec_local;
1071 assert(offset<4096);
1072 assem_debug("str %s,fp+%d\n",regname[hr],offset);
1073 output_w32(0xe5800000|rd_rn_rm(hr,FP,0)|offset);
1074}
1075
1076void emit_test(int rs, int rt)
1077{
1078 assem_debug("tst %s,%s\n",regname[rs],regname[rt]);
1079 output_w32(0xe1100000|rd_rn_rm(0,rs,rt));
1080}
1081
1082void emit_testimm(int rs,int imm)
1083{
1084 u_int armval;
5a05d80c 1085 assem_debug("tst %s,#%d\n",regname[rs],imm);
cfbd3c6e 1086 genimm_checked(imm,&armval);
57871462 1087 output_w32(0xe3100000|rd_rn_rm(0,rs,0)|armval);
1088}
1089
b9b61529 1090void emit_testeqimm(int rs,int imm)
1091{
1092 u_int armval;
1093 assem_debug("tsteq %s,$%d\n",regname[rs],imm);
cfbd3c6e 1094 genimm_checked(imm,&armval);
b9b61529 1095 output_w32(0x03100000|rd_rn_rm(0,rs,0)|armval);
1096}
1097
57871462 1098void emit_not(int rs,int rt)
1099{
1100 assem_debug("mvn %s,%s\n",regname[rt],regname[rs]);
1101 output_w32(0xe1e00000|rd_rn_rm(rt,0,rs));
1102}
1103
b9b61529 1104void emit_mvnmi(int rs,int rt)
1105{
1106 assem_debug("mvnmi %s,%s\n",regname[rt],regname[rs]);
1107 output_w32(0x41e00000|rd_rn_rm(rt,0,rs));
1108}
1109
57871462 1110void emit_and(u_int rs1,u_int rs2,u_int rt)
1111{
1112 assem_debug("and %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1113 output_w32(0xe0000000|rd_rn_rm(rt,rs1,rs2));
1114}
1115
1116void emit_or(u_int rs1,u_int rs2,u_int rt)
1117{
1118 assem_debug("orr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1119 output_w32(0xe1800000|rd_rn_rm(rt,rs1,rs2));
1120}
1121void emit_or_and_set_flags(int rs1,int rs2,int rt)
1122{
1123 assem_debug("orrs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1124 output_w32(0xe1900000|rd_rn_rm(rt,rs1,rs2));
1125}
1126
f70d384d 1127void emit_orrshl_imm(u_int rs,u_int imm,u_int rt)
1128{
1129 assert(rs<16);
1130 assert(rt<16);
1131 assert(imm<32);
1132 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs],imm);
1133 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|(imm<<7));
1134}
1135
576bbd8f 1136void emit_orrshr_imm(u_int rs,u_int imm,u_int rt)
1137{
1138 assert(rs<16);
1139 assert(rt<16);
1140 assert(imm<32);
1141 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs],imm);
1142 output_w32(0xe1800020|rd_rn_rm(rt,rt,rs)|(imm<<7));
1143}
1144
57871462 1145void emit_xor(u_int rs1,u_int rs2,u_int rt)
1146{
1147 assem_debug("eor %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1148 output_w32(0xe0200000|rd_rn_rm(rt,rs1,rs2));
1149}
1150
57871462 1151void emit_addimm(u_int rs,int imm,u_int rt)
1152{
1153 assert(rs<16);
1154 assert(rt<16);
1155 if(imm!=0) {
57871462 1156 u_int armval;
1157 if(genimm(imm,&armval)) {
1158 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],imm);
1159 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
1160 }else if(genimm(-imm,&armval)) {
8a0a8423 1161 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],-imm);
57871462 1162 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
1163 }else if(imm<0) {
ffb0b9e0 1164 assert(imm>-65536);
57871462 1165 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],(-imm)&0xFF00);
1166 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
1167 output_w32(0xe2400000|rd_rn_imm_shift(rt,rs,(-imm)>>8,8));
1168 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
1169 }else{
ffb0b9e0 1170 assert(imm<65536);
57871462 1171 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1172 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
1173 output_w32(0xe2800000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1174 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1175 }
1176 }
1177 else if(rs!=rt) emit_mov(rs,rt);
1178}
1179
1180void emit_addimm_and_set_flags(int imm,int rt)
1181{
1182 assert(imm>-65536&&imm<65536);
1183 u_int armval;
1184 if(genimm(imm,&armval)) {
1185 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm);
1186 output_w32(0xe2900000|rd_rn_rm(rt,rt,0)|armval);
1187 }else if(genimm(-imm,&armval)) {
1188 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],imm);
1189 output_w32(0xe2500000|rd_rn_rm(rt,rt,0)|armval);
1190 }else if(imm<0) {
1191 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF00);
1192 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
1193 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)>>8,8));
1194 output_w32(0xe2500000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
1195 }else{
1196 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF00);
1197 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
1198 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm>>8,8));
1199 output_w32(0xe2900000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1200 }
1201}
1202void emit_addimm_no_flags(u_int imm,u_int rt)
1203{
1204 emit_addimm(rt,imm,rt);
1205}
1206
1207void emit_addnop(u_int r)
1208{
1209 assert(r<16);
1210 assem_debug("add %s,%s,#0 (nop)\n",regname[r],regname[r]);
1211 output_w32(0xe2800000|rd_rn_rm(r,r,0));
1212}
1213
1214void emit_adcimm(u_int rs,int imm,u_int rt)
1215{
1216 u_int armval;
cfbd3c6e 1217 genimm_checked(imm,&armval);
57871462 1218 assem_debug("adc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1219 output_w32(0xe2a00000|rd_rn_rm(rt,rs,0)|armval);
1220}
1221/*void emit_sbcimm(int imm,u_int rt)
1222{
1223 u_int armval;
cfbd3c6e 1224 genimm_checked(imm,&armval);
57871462 1225 assem_debug("sbc %s,%s,#%d\n",regname[rt],regname[rt],imm);
1226 output_w32(0xe2c00000|rd_rn_rm(rt,rt,0)|armval);
1227}*/
1228void emit_sbbimm(int imm,u_int rt)
1229{
1230 assem_debug("sbb $%d,%%%s\n",imm,regname[rt]);
1231 assert(rt<8);
1232 if(imm<128&&imm>=-128) {
1233 output_byte(0x83);
1234 output_modrm(3,rt,3);
1235 output_byte(imm);
1236 }
1237 else
1238 {
1239 output_byte(0x81);
1240 output_modrm(3,rt,3);
1241 output_w32(imm);
1242 }
1243}
1244void emit_rscimm(int rs,int imm,u_int rt)
1245{
1246 assert(0);
1247 u_int armval;
cfbd3c6e 1248 genimm_checked(imm,&armval);
57871462 1249 assem_debug("rsc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1250 output_w32(0xe2e00000|rd_rn_rm(rt,rs,0)|armval);
1251}
1252
1253void emit_addimm64_32(int rsh,int rsl,int imm,int rth,int rtl)
1254{
1255 // TODO: if(genimm(imm,&armval)) ...
1256 // else
1257 emit_movimm(imm,HOST_TEMPREG);
1258 emit_adds(HOST_TEMPREG,rsl,rtl);
1259 emit_adcimm(rsh,0,rth);
1260}
1261
1262void emit_sbb(int rs1,int rs2)
1263{
1264 assem_debug("sbb %%%s,%%%s\n",regname[rs2],regname[rs1]);
1265 output_byte(0x19);
1266 output_modrm(3,rs1,rs2);
1267}
1268
1269void emit_andimm(int rs,int imm,int rt)
1270{
1271 u_int armval;
790ee18e 1272 if(imm==0) {
1273 emit_zeroreg(rt);
1274 }else if(genimm(imm,&armval)) {
57871462 1275 assem_debug("and %s,%s,#%d\n",regname[rt],regname[rs],imm);
1276 output_w32(0xe2000000|rd_rn_rm(rt,rs,0)|armval);
1277 }else if(genimm(~imm,&armval)) {
1278 assem_debug("bic %s,%s,#%d\n",regname[rt],regname[rs],imm);
1279 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|armval);
1280 }else if(imm==65535) {
1281 #ifdef ARMv5_ONLY
1282 assem_debug("bic %s,%s,#FF000000\n",regname[rt],regname[rs]);
1283 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|0x4FF);
1284 assem_debug("bic %s,%s,#00FF0000\n",regname[rt],regname[rt]);
1285 output_w32(0xe3c00000|rd_rn_rm(rt,rt,0)|0x8FF);
1286 #else
1287 assem_debug("uxth %s,%s\n",regname[rt],regname[rs]);
1288 output_w32(0xe6ff0070|rd_rn_rm(rt,0,rs));
1289 #endif
1290 }else{
1291 assert(imm>0&&imm<65535);
1292 #ifdef ARMv5_ONLY
1293 assem_debug("mov r14,#%d\n",imm&0xFF00);
1294 output_w32(0xe3a00000|rd_rn_imm_shift(HOST_TEMPREG,0,imm>>8,8));
1295 assem_debug("add r14,r14,#%d\n",imm&0xFF);
1296 output_w32(0xe2800000|rd_rn_imm_shift(HOST_TEMPREG,HOST_TEMPREG,imm&0xff,0));
1297 #else
1298 emit_movw(imm,HOST_TEMPREG);
1299 #endif
1300 assem_debug("and %s,%s,r14\n",regname[rt],regname[rs]);
1301 output_w32(0xe0000000|rd_rn_rm(rt,rs,HOST_TEMPREG));
1302 }
1303}
1304
1305void emit_orimm(int rs,int imm,int rt)
1306{
1307 u_int armval;
790ee18e 1308 if(imm==0) {
1309 if(rs!=rt) emit_mov(rs,rt);
1310 }else if(genimm(imm,&armval)) {
57871462 1311 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1312 output_w32(0xe3800000|rd_rn_rm(rt,rs,0)|armval);
1313 }else{
1314 assert(imm>0&&imm<65536);
1315 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1316 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
1317 output_w32(0xe3800000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1318 output_w32(0xe3800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1319 }
1320}
1321
1322void emit_xorimm(int rs,int imm,int rt)
1323{
57871462 1324 u_int armval;
790ee18e 1325 if(imm==0) {
1326 if(rs!=rt) emit_mov(rs,rt);
1327 }else if(genimm(imm,&armval)) {
57871462 1328 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm);
1329 output_w32(0xe2200000|rd_rn_rm(rt,rs,0)|armval);
1330 }else{
514ed0d9 1331 assert(imm>0&&imm<65536);
57871462 1332 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1333 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
1334 output_w32(0xe2200000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1335 output_w32(0xe2200000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1336 }
1337}
1338
1339void emit_shlimm(int rs,u_int imm,int rt)
1340{
1341 assert(imm>0);
1342 assert(imm<32);
1343 //if(imm==1) ...
1344 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1345 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1346}
1347
c6c3b1b3 1348void emit_lsls_imm(int rs,int imm,int rt)
1349{
1350 assert(imm>0);
1351 assert(imm<32);
1352 assem_debug("lsls %s,%s,#%d\n",regname[rt],regname[rs],imm);
1353 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1354}
1355
57871462 1356void emit_shrimm(int rs,u_int imm,int rt)
1357{
1358 assert(imm>0);
1359 assert(imm<32);
1360 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1361 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
1362}
1363
1364void emit_sarimm(int rs,u_int imm,int rt)
1365{
1366 assert(imm>0);
1367 assert(imm<32);
1368 assem_debug("asr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1369 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x40|(imm<<7));
1370}
1371
1372void emit_rorimm(int rs,u_int imm,int rt)
1373{
1374 assert(imm>0);
1375 assert(imm<32);
1376 assem_debug("ror %s,%s,#%d\n",regname[rt],regname[rs],imm);
1377 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x60|(imm<<7));
1378}
1379
1380void emit_shldimm(int rs,int rs2,u_int imm,int rt)
1381{
1382 assem_debug("shld %%%s,%%%s,%d\n",regname[rt],regname[rs2],imm);
1383 assert(imm>0);
1384 assert(imm<32);
1385 //if(imm==1) ...
1386 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1387 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1388 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs2],32-imm);
1389 output_w32(0xe1800020|rd_rn_rm(rt,rt,rs2)|((32-imm)<<7));
1390}
1391
1392void emit_shrdimm(int rs,int rs2,u_int imm,int rt)
1393{
1394 assem_debug("shrd %%%s,%%%s,%d\n",regname[rt],regname[rs2],imm);
1395 assert(imm>0);
1396 assert(imm<32);
1397 //if(imm==1) ...
1398 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1399 output_w32(0xe1a00020|rd_rn_rm(rt,0,rs)|(imm<<7));
1400 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs2],32-imm);
1401 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs2)|((32-imm)<<7));
1402}
1403
b9b61529 1404void emit_signextend16(int rs,int rt)
1405{
1406 #ifdef ARMv5_ONLY
1407 emit_shlimm(rs,16,rt);
1408 emit_sarimm(rt,16,rt);
1409 #else
1410 assem_debug("sxth %s,%s\n",regname[rt],regname[rs]);
1411 output_w32(0xe6bf0070|rd_rn_rm(rt,0,rs));
1412 #endif
1413}
1414
c6c3b1b3 1415void emit_signextend8(int rs,int rt)
1416{
1417 #ifdef ARMv5_ONLY
1418 emit_shlimm(rs,24,rt);
1419 emit_sarimm(rt,24,rt);
1420 #else
1421 assem_debug("sxtb %s,%s\n",regname[rt],regname[rs]);
1422 output_w32(0xe6af0070|rd_rn_rm(rt,0,rs));
1423 #endif
1424}
1425
57871462 1426void emit_shl(u_int rs,u_int shift,u_int rt)
1427{
1428 assert(rs<16);
1429 assert(rt<16);
1430 assert(shift<16);
1431 //if(imm==1) ...
1432 assem_debug("lsl %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1433 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x10|(shift<<8));
1434}
1435void emit_shr(u_int rs,u_int shift,u_int rt)
1436{
1437 assert(rs<16);
1438 assert(rt<16);
1439 assert(shift<16);
1440 assem_debug("lsr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1441 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x30|(shift<<8));
1442}
1443void emit_sar(u_int rs,u_int shift,u_int rt)
1444{
1445 assert(rs<16);
1446 assert(rt<16);
1447 assert(shift<16);
1448 assem_debug("asr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1449 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x50|(shift<<8));
1450}
1451void emit_shlcl(int r)
1452{
1453 assem_debug("shl %%%s,%%cl\n",regname[r]);
1454 assert(0);
1455}
1456void emit_shrcl(int r)
1457{
1458 assem_debug("shr %%%s,%%cl\n",regname[r]);
1459 assert(0);
1460}
1461void emit_sarcl(int r)
1462{
1463 assem_debug("sar %%%s,%%cl\n",regname[r]);
1464 assert(0);
1465}
1466
1467void emit_shldcl(int r1,int r2)
1468{
1469 assem_debug("shld %%%s,%%%s,%%cl\n",regname[r1],regname[r2]);
1470 assert(0);
1471}
1472void emit_shrdcl(int r1,int r2)
1473{
1474 assem_debug("shrd %%%s,%%%s,%%cl\n",regname[r1],regname[r2]);
1475 assert(0);
1476}
1477void emit_orrshl(u_int rs,u_int shift,u_int rt)
1478{
1479 assert(rs<16);
1480 assert(rt<16);
1481 assert(shift<16);
1482 assem_debug("orr %s,%s,%s,lsl %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
1483 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x10|(shift<<8));
1484}
1485void emit_orrshr(u_int rs,u_int shift,u_int rt)
1486{
1487 assert(rs<16);
1488 assert(rt<16);
1489 assert(shift<16);
1490 assem_debug("orr %s,%s,%s,lsr %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
1491 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x30|(shift<<8));
1492}
1493
1494void emit_cmpimm(int rs,int imm)
1495{
1496 u_int armval;
1497 if(genimm(imm,&armval)) {
5a05d80c 1498 assem_debug("cmp %s,#%d\n",regname[rs],imm);
57871462 1499 output_w32(0xe3500000|rd_rn_rm(0,rs,0)|armval);
1500 }else if(genimm(-imm,&armval)) {
5a05d80c 1501 assem_debug("cmn %s,#%d\n",regname[rs],imm);
57871462 1502 output_w32(0xe3700000|rd_rn_rm(0,rs,0)|armval);
1503 }else if(imm>0) {
1504 assert(imm<65536);
1505 #ifdef ARMv5_ONLY
1506 emit_movimm(imm,HOST_TEMPREG);
1507 #else
1508 emit_movw(imm,HOST_TEMPREG);
1509 #endif
1510 assem_debug("cmp %s,r14\n",regname[rs]);
1511 output_w32(0xe1500000|rd_rn_rm(0,rs,HOST_TEMPREG));
1512 }else{
1513 assert(imm>-65536);
1514 #ifdef ARMv5_ONLY
1515 emit_movimm(-imm,HOST_TEMPREG);
1516 #else
1517 emit_movw(-imm,HOST_TEMPREG);
1518 #endif
1519 assem_debug("cmn %s,r14\n",regname[rs]);
1520 output_w32(0xe1700000|rd_rn_rm(0,rs,HOST_TEMPREG));
1521 }
1522}
1523
1524void emit_cmovne(u_int *addr,int rt)
1525{
1526 assem_debug("cmovne %x,%%%s",(int)addr,regname[rt]);
1527 assert(0);
1528}
1529void emit_cmovl(u_int *addr,int rt)
1530{
1531 assem_debug("cmovl %x,%%%s",(int)addr,regname[rt]);
1532 assert(0);
1533}
1534void emit_cmovs(u_int *addr,int rt)
1535{
1536 assem_debug("cmovs %x,%%%s",(int)addr,regname[rt]);
1537 assert(0);
1538}
1539void emit_cmovne_imm(int imm,int rt)
1540{
1541 assem_debug("movne %s,#%d\n",regname[rt],imm);
1542 u_int armval;
cfbd3c6e 1543 genimm_checked(imm,&armval);
57871462 1544 output_w32(0x13a00000|rd_rn_rm(rt,0,0)|armval);
1545}
1546void emit_cmovl_imm(int imm,int rt)
1547{
1548 assem_debug("movlt %s,#%d\n",regname[rt],imm);
1549 u_int armval;
cfbd3c6e 1550 genimm_checked(imm,&armval);
57871462 1551 output_w32(0xb3a00000|rd_rn_rm(rt,0,0)|armval);
1552}
1553void emit_cmovb_imm(int imm,int rt)
1554{
1555 assem_debug("movcc %s,#%d\n",regname[rt],imm);
1556 u_int armval;
cfbd3c6e 1557 genimm_checked(imm,&armval);
57871462 1558 output_w32(0x33a00000|rd_rn_rm(rt,0,0)|armval);
1559}
1560void emit_cmovs_imm(int imm,int rt)
1561{
1562 assem_debug("movmi %s,#%d\n",regname[rt],imm);
1563 u_int armval;
cfbd3c6e 1564 genimm_checked(imm,&armval);
57871462 1565 output_w32(0x43a00000|rd_rn_rm(rt,0,0)|armval);
1566}
1567void emit_cmove_reg(int rs,int rt)
1568{
1569 assem_debug("moveq %s,%s\n",regname[rt],regname[rs]);
1570 output_w32(0x01a00000|rd_rn_rm(rt,0,rs));
1571}
1572void emit_cmovne_reg(int rs,int rt)
1573{
1574 assem_debug("movne %s,%s\n",regname[rt],regname[rs]);
1575 output_w32(0x11a00000|rd_rn_rm(rt,0,rs));
1576}
1577void emit_cmovl_reg(int rs,int rt)
1578{
1579 assem_debug("movlt %s,%s\n",regname[rt],regname[rs]);
1580 output_w32(0xb1a00000|rd_rn_rm(rt,0,rs));
1581}
1582void emit_cmovs_reg(int rs,int rt)
1583{
1584 assem_debug("movmi %s,%s\n",regname[rt],regname[rs]);
1585 output_w32(0x41a00000|rd_rn_rm(rt,0,rs));
1586}
1587
1588void emit_slti32(int rs,int imm,int rt)
1589{
1590 if(rs!=rt) emit_zeroreg(rt);
1591 emit_cmpimm(rs,imm);
1592 if(rs==rt) emit_movimm(0,rt);
1593 emit_cmovl_imm(1,rt);
1594}
1595void emit_sltiu32(int rs,int imm,int rt)
1596{
1597 if(rs!=rt) emit_zeroreg(rt);
1598 emit_cmpimm(rs,imm);
1599 if(rs==rt) emit_movimm(0,rt);
1600 emit_cmovb_imm(1,rt);
1601}
1602void emit_slti64_32(int rsh,int rsl,int imm,int rt)
1603{
1604 assert(rsh!=rt);
1605 emit_slti32(rsl,imm,rt);
1606 if(imm>=0)
1607 {
1608 emit_test(rsh,rsh);
1609 emit_cmovne_imm(0,rt);
1610 emit_cmovs_imm(1,rt);
1611 }
1612 else
1613 {
1614 emit_cmpimm(rsh,-1);
1615 emit_cmovne_imm(0,rt);
1616 emit_cmovl_imm(1,rt);
1617 }
1618}
1619void emit_sltiu64_32(int rsh,int rsl,int imm,int rt)
1620{
1621 assert(rsh!=rt);
1622 emit_sltiu32(rsl,imm,rt);
1623 if(imm>=0)
1624 {
1625 emit_test(rsh,rsh);
1626 emit_cmovne_imm(0,rt);
1627 }
1628 else
1629 {
1630 emit_cmpimm(rsh,-1);
1631 emit_cmovne_imm(1,rt);
1632 }
1633}
1634
1635void emit_cmp(int rs,int rt)
1636{
1637 assem_debug("cmp %s,%s\n",regname[rs],regname[rt]);
1638 output_w32(0xe1500000|rd_rn_rm(0,rs,rt));
1639}
1640void emit_set_gz32(int rs, int rt)
1641{
1642 //assem_debug("set_gz32\n");
1643 emit_cmpimm(rs,1);
1644 emit_movimm(1,rt);
1645 emit_cmovl_imm(0,rt);
1646}
1647void emit_set_nz32(int rs, int rt)
1648{
1649 //assem_debug("set_nz32\n");
1650 if(rs!=rt) emit_movs(rs,rt);
1651 else emit_test(rs,rs);
1652 emit_cmovne_imm(1,rt);
1653}
1654void emit_set_gz64_32(int rsh, int rsl, int rt)
1655{
1656 //assem_debug("set_gz64\n");
1657 emit_set_gz32(rsl,rt);
1658 emit_test(rsh,rsh);
1659 emit_cmovne_imm(1,rt);
1660 emit_cmovs_imm(0,rt);
1661}
1662void emit_set_nz64_32(int rsh, int rsl, int rt)
1663{
1664 //assem_debug("set_nz64\n");
1665 emit_or_and_set_flags(rsh,rsl,rt);
1666 emit_cmovne_imm(1,rt);
1667}
1668void emit_set_if_less32(int rs1, int rs2, int rt)
1669{
1670 //assem_debug("set if less (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1671 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1672 emit_cmp(rs1,rs2);
1673 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1674 emit_cmovl_imm(1,rt);
1675}
1676void emit_set_if_carry32(int rs1, int rs2, int rt)
1677{
1678 //assem_debug("set if carry (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1679 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1680 emit_cmp(rs1,rs2);
1681 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1682 emit_cmovb_imm(1,rt);
1683}
1684void emit_set_if_less64_32(int u1, int l1, int u2, int l2, int rt)
1685{
1686 //assem_debug("set if less64 (%%%s,%%%s,%%%s,%%%s),%%%s\n",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]);
1687 assert(u1!=rt);
1688 assert(u2!=rt);
1689 emit_cmp(l1,l2);
1690 emit_movimm(0,rt);
1691 emit_sbcs(u1,u2,HOST_TEMPREG);
1692 emit_cmovl_imm(1,rt);
1693}
1694void emit_set_if_carry64_32(int u1, int l1, int u2, int l2, int rt)
1695{
1696 //assem_debug("set if carry64 (%%%s,%%%s,%%%s,%%%s),%%%s\n",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]);
1697 assert(u1!=rt);
1698 assert(u2!=rt);
1699 emit_cmp(l1,l2);
1700 emit_movimm(0,rt);
1701 emit_sbcs(u1,u2,HOST_TEMPREG);
1702 emit_cmovb_imm(1,rt);
1703}
1704
1705void emit_call(int a)
1706{
1707 assem_debug("bl %x (%x+%x)\n",a,(int)out,a-(int)out-8);
1708 u_int offset=genjmp(a);
1709 output_w32(0xeb000000|offset);
1710}
1711void emit_jmp(int a)
1712{
1713 assem_debug("b %x (%x+%x)\n",a,(int)out,a-(int)out-8);
1714 u_int offset=genjmp(a);
1715 output_w32(0xea000000|offset);
1716}
1717void emit_jne(int a)
1718{
1719 assem_debug("bne %x\n",a);
1720 u_int offset=genjmp(a);
1721 output_w32(0x1a000000|offset);
1722}
1723void emit_jeq(int a)
1724{
1725 assem_debug("beq %x\n",a);
1726 u_int offset=genjmp(a);
1727 output_w32(0x0a000000|offset);
1728}
1729void emit_js(int a)
1730{
1731 assem_debug("bmi %x\n",a);
1732 u_int offset=genjmp(a);
1733 output_w32(0x4a000000|offset);
1734}
1735void emit_jns(int a)
1736{
1737 assem_debug("bpl %x\n",a);
1738 u_int offset=genjmp(a);
1739 output_w32(0x5a000000|offset);
1740}
1741void emit_jl(int a)
1742{
1743 assem_debug("blt %x\n",a);
1744 u_int offset=genjmp(a);
1745 output_w32(0xba000000|offset);
1746}
1747void emit_jge(int a)
1748{
1749 assem_debug("bge %x\n",a);
1750 u_int offset=genjmp(a);
1751 output_w32(0xaa000000|offset);
1752}
1753void emit_jno(int a)
1754{
1755 assem_debug("bvc %x\n",a);
1756 u_int offset=genjmp(a);
1757 output_w32(0x7a000000|offset);
1758}
1759void emit_jc(int a)
1760{
1761 assem_debug("bcs %x\n",a);
1762 u_int offset=genjmp(a);
1763 output_w32(0x2a000000|offset);
1764}
1765void emit_jcc(int a)
1766{
1767 assem_debug("bcc %x\n",a);
1768 u_int offset=genjmp(a);
1769 output_w32(0x3a000000|offset);
1770}
1771
1772void emit_pushimm(int imm)
1773{
1774 assem_debug("push $%x\n",imm);
1775 assert(0);
1776}
1777void emit_pusha()
1778{
1779 assem_debug("pusha\n");
1780 assert(0);
1781}
1782void emit_popa()
1783{
1784 assem_debug("popa\n");
1785 assert(0);
1786}
1787void emit_pushreg(u_int r)
1788{
1789 assem_debug("push %%%s\n",regname[r]);
1790 assert(0);
1791}
1792void emit_popreg(u_int r)
1793{
1794 assem_debug("pop %%%s\n",regname[r]);
1795 assert(0);
1796}
1797void emit_callreg(u_int r)
1798{
c6c3b1b3 1799 assert(r<15);
1800 assem_debug("blx %s\n",regname[r]);
1801 output_w32(0xe12fff30|r);
57871462 1802}
1803void emit_jmpreg(u_int r)
1804{
1805 assem_debug("mov pc,%s\n",regname[r]);
1806 output_w32(0xe1a00000|rd_rn_rm(15,0,r));
1807}
1808
1809void emit_readword_indexed(int offset, int rs, int rt)
1810{
1811 assert(offset>-4096&&offset<4096);
1812 assem_debug("ldr %s,%s+%d\n",regname[rt],regname[rs],offset);
1813 if(offset>=0) {
1814 output_w32(0xe5900000|rd_rn_rm(rt,rs,0)|offset);
1815 }else{
1816 output_w32(0xe5100000|rd_rn_rm(rt,rs,0)|(-offset));
1817 }
1818}
1819void emit_readword_dualindexedx4(int rs1, int rs2, int rt)
1820{
1821 assem_debug("ldr %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1822 output_w32(0xe7900000|rd_rn_rm(rt,rs1,rs2)|0x100);
1823}
c6c3b1b3 1824void emit_ldrcc_dualindexed(int rs1, int rs2, int rt)
1825{
1826 assem_debug("ldrcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1827 output_w32(0x37900000|rd_rn_rm(rt,rs1,rs2));
1828}
1829void emit_ldrccb_dualindexed(int rs1, int rs2, int rt)
1830{
1831 assem_debug("ldrccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1832 output_w32(0x37d00000|rd_rn_rm(rt,rs1,rs2));
1833}
1834void emit_ldrccsb_dualindexed(int rs1, int rs2, int rt)
1835{
1836 assem_debug("ldrccsb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1837 output_w32(0x319000d0|rd_rn_rm(rt,rs1,rs2));
1838}
1839void emit_ldrcch_dualindexed(int rs1, int rs2, int rt)
1840{
1841 assem_debug("ldrcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1842 output_w32(0x319000b0|rd_rn_rm(rt,rs1,rs2));
1843}
1844void emit_ldrccsh_dualindexed(int rs1, int rs2, int rt)
1845{
1846 assem_debug("ldrccsh %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1847 output_w32(0x319000f0|rd_rn_rm(rt,rs1,rs2));
1848}
57871462 1849void emit_readword_indexed_tlb(int addr, int rs, int map, int rt)
1850{
1851 if(map<0) emit_readword_indexed(addr, rs, rt);
1852 else {
1853 assert(addr==0);
1854 emit_readword_dualindexedx4(rs, map, rt);
1855 }
1856}
1857void emit_readdword_indexed_tlb(int addr, int rs, int map, int rh, int rl)
1858{
1859 if(map<0) {
1860 if(rh>=0) emit_readword_indexed(addr, rs, rh);
1861 emit_readword_indexed(addr+4, rs, rl);
1862 }else{
1863 assert(rh!=rs);
1864 if(rh>=0) emit_readword_indexed_tlb(addr, rs, map, rh);
1865 emit_addimm(map,1,map);
1866 emit_readword_indexed_tlb(addr, rs, map, rl);
1867 }
1868}
1869void emit_movsbl_indexed(int offset, int rs, int rt)
1870{
1871 assert(offset>-256&&offset<256);
1872 assem_debug("ldrsb %s,%s+%d\n",regname[rt],regname[rs],offset);
1873 if(offset>=0) {
1874 output_w32(0xe1d000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1875 }else{
1876 output_w32(0xe15000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1877 }
1878}
1879void emit_movsbl_indexed_tlb(int addr, int rs, int map, int rt)
1880{
1881 if(map<0) emit_movsbl_indexed(addr, rs, rt);
1882 else {
1883 if(addr==0) {
1884 emit_shlimm(map,2,map);
1885 assem_debug("ldrsb %s,%s+%s\n",regname[rt],regname[rs],regname[map]);
1886 output_w32(0xe19000d0|rd_rn_rm(rt,rs,map));
1887 }else{
1888 assert(addr>-256&&addr<256);
1889 assem_debug("add %s,%s,%s,lsl #2\n",regname[rt],regname[rs],regname[map]);
1890 output_w32(0xe0800000|rd_rn_rm(rt,rs,map)|(2<<7));
1891 emit_movsbl_indexed(addr, rt, rt);
1892 }
1893 }
1894}
1895void emit_movswl_indexed(int offset, int rs, int rt)
1896{
1897 assert(offset>-256&&offset<256);
1898 assem_debug("ldrsh %s,%s+%d\n",regname[rt],regname[rs],offset);
1899 if(offset>=0) {
1900 output_w32(0xe1d000f0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1901 }else{
1902 output_w32(0xe15000f0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1903 }
1904}
1905void emit_movzbl_indexed(int offset, int rs, int rt)
1906{
1907 assert(offset>-4096&&offset<4096);
1908 assem_debug("ldrb %s,%s+%d\n",regname[rt],regname[rs],offset);
1909 if(offset>=0) {
1910 output_w32(0xe5d00000|rd_rn_rm(rt,rs,0)|offset);
1911 }else{
1912 output_w32(0xe5500000|rd_rn_rm(rt,rs,0)|(-offset));
1913 }
1914}
1915void emit_movzbl_dualindexedx4(int rs1, int rs2, int rt)
1916{
1917 assem_debug("ldrb %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1918 output_w32(0xe7d00000|rd_rn_rm(rt,rs1,rs2)|0x100);
1919}
1920void emit_movzbl_indexed_tlb(int addr, int rs, int map, int rt)
1921{
1922 if(map<0) emit_movzbl_indexed(addr, rs, rt);
1923 else {
1924 if(addr==0) {
1925 emit_movzbl_dualindexedx4(rs, map, rt);
1926 }else{
1927 emit_addimm(rs,addr,rt);
1928 emit_movzbl_dualindexedx4(rt, map, rt);
1929 }
1930 }
1931}
1932void emit_movzwl_indexed(int offset, int rs, int rt)
1933{
1934 assert(offset>-256&&offset<256);
1935 assem_debug("ldrh %s,%s+%d\n",regname[rt],regname[rs],offset);
1936 if(offset>=0) {
1937 output_w32(0xe1d000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1938 }else{
1939 output_w32(0xe15000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1940 }
1941}
054175e9 1942static void emit_ldrd(int offset, int rs, int rt)
1943{
1944 assert(offset>-256&&offset<256);
1945 assem_debug("ldrd %s,%s+%d\n",regname[rt],regname[rs],offset);
1946 if(offset>=0) {
1947 output_w32(0xe1c000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1948 }else{
1949 output_w32(0xe14000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1950 }
1951}
57871462 1952void emit_readword(int addr, int rt)
1953{
1954 u_int offset = addr-(u_int)&dynarec_local;
1955 assert(offset<4096);
1956 assem_debug("ldr %s,fp+%d\n",regname[rt],offset);
1957 output_w32(0xe5900000|rd_rn_rm(rt,FP,0)|offset);
1958}
1959void emit_movsbl(int addr, int rt)
1960{
1961 u_int offset = addr-(u_int)&dynarec_local;
1962 assert(offset<256);
1963 assem_debug("ldrsb %s,fp+%d\n",regname[rt],offset);
1964 output_w32(0xe1d000d0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1965}
1966void emit_movswl(int addr, int rt)
1967{
1968 u_int offset = addr-(u_int)&dynarec_local;
1969 assert(offset<256);
1970 assem_debug("ldrsh %s,fp+%d\n",regname[rt],offset);
1971 output_w32(0xe1d000f0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1972}
1973void emit_movzbl(int addr, int rt)
1974{
1975 u_int offset = addr-(u_int)&dynarec_local;
1976 assert(offset<4096);
1977 assem_debug("ldrb %s,fp+%d\n",regname[rt],offset);
1978 output_w32(0xe5d00000|rd_rn_rm(rt,FP,0)|offset);
1979}
1980void emit_movzwl(int addr, int rt)
1981{
1982 u_int offset = addr-(u_int)&dynarec_local;
1983 assert(offset<256);
1984 assem_debug("ldrh %s,fp+%d\n",regname[rt],offset);
1985 output_w32(0xe1d000b0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1986}
1987void emit_movzwl_reg(int rs, int rt)
1988{
1989 assem_debug("movzwl %%%s,%%%s\n",regname[rs]+1,regname[rt]);
1990 assert(0);
1991}
1992
1993void emit_xchg(int rs, int rt)
1994{
1995 assem_debug("xchg %%%s,%%%s\n",regname[rs],regname[rt]);
1996 assert(0);
1997}
1998void emit_writeword_indexed(int rt, int offset, int rs)
1999{
2000 assert(offset>-4096&&offset<4096);
2001 assem_debug("str %s,%s+%d\n",regname[rt],regname[rs],offset);
2002 if(offset>=0) {
2003 output_w32(0xe5800000|rd_rn_rm(rt,rs,0)|offset);
2004 }else{
2005 output_w32(0xe5000000|rd_rn_rm(rt,rs,0)|(-offset));
2006 }
2007}
2008void emit_writeword_dualindexedx4(int rt, int rs1, int rs2)
2009{
2010 assem_debug("str %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
2011 output_w32(0xe7800000|rd_rn_rm(rt,rs1,rs2)|0x100);
2012}
2013void emit_writeword_indexed_tlb(int rt, int addr, int rs, int map, int temp)
2014{
2015 if(map<0) emit_writeword_indexed(rt, addr, rs);
2016 else {
2017 assert(addr==0);
2018 emit_writeword_dualindexedx4(rt, rs, map);
2019 }
2020}
2021void emit_writedword_indexed_tlb(int rh, int rl, int addr, int rs, int map, int temp)
2022{
2023 if(map<0) {
2024 if(rh>=0) emit_writeword_indexed(rh, addr, rs);
2025 emit_writeword_indexed(rl, addr+4, rs);
2026 }else{
2027 assert(rh>=0);
2028 if(temp!=rs) emit_addimm(map,1,temp);
2029 emit_writeword_indexed_tlb(rh, addr, rs, map, temp);
2030 if(temp!=rs) emit_writeword_indexed_tlb(rl, addr, rs, temp, temp);
2031 else {
2032 emit_addimm(rs,4,rs);
2033 emit_writeword_indexed_tlb(rl, addr, rs, map, temp);
2034 }
2035 }
2036}
2037void emit_writehword_indexed(int rt, int offset, int rs)
2038{
2039 assert(offset>-256&&offset<256);
2040 assem_debug("strh %s,%s+%d\n",regname[rt],regname[rs],offset);
2041 if(offset>=0) {
2042 output_w32(0xe1c000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
2043 }else{
2044 output_w32(0xe14000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
2045 }
2046}
2047void emit_writebyte_indexed(int rt, int offset, int rs)
2048{
2049 assert(offset>-4096&&offset<4096);
2050 assem_debug("strb %s,%s+%d\n",regname[rt],regname[rs],offset);
2051 if(offset>=0) {
2052 output_w32(0xe5c00000|rd_rn_rm(rt,rs,0)|offset);
2053 }else{
2054 output_w32(0xe5400000|rd_rn_rm(rt,rs,0)|(-offset));
2055 }
2056}
2057void emit_writebyte_dualindexedx4(int rt, int rs1, int rs2)
2058{
2059 assem_debug("strb %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
2060 output_w32(0xe7c00000|rd_rn_rm(rt,rs1,rs2)|0x100);
2061}
2062void emit_writebyte_indexed_tlb(int rt, int addr, int rs, int map, int temp)
2063{
2064 if(map<0) emit_writebyte_indexed(rt, addr, rs);
2065 else {
2066 if(addr==0) {
2067 emit_writebyte_dualindexedx4(rt, rs, map);
2068 }else{
2069 emit_addimm(rs,addr,temp);
2070 emit_writebyte_dualindexedx4(rt, temp, map);
2071 }
2072 }
2073}
b96d3df7 2074void emit_strcc_dualindexed(int rs1, int rs2, int rt)
2075{
2076 assem_debug("strcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2077 output_w32(0x37800000|rd_rn_rm(rt,rs1,rs2));
2078}
2079void emit_strccb_dualindexed(int rs1, int rs2, int rt)
2080{
2081 assem_debug("strccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2082 output_w32(0x37c00000|rd_rn_rm(rt,rs1,rs2));
2083}
2084void emit_strcch_dualindexed(int rs1, int rs2, int rt)
2085{
2086 assem_debug("strcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2087 output_w32(0x318000b0|rd_rn_rm(rt,rs1,rs2));
2088}
57871462 2089void emit_writeword(int rt, int addr)
2090{
2091 u_int offset = addr-(u_int)&dynarec_local;
2092 assert(offset<4096);
2093 assem_debug("str %s,fp+%d\n",regname[rt],offset);
2094 output_w32(0xe5800000|rd_rn_rm(rt,FP,0)|offset);
2095}
2096void emit_writehword(int rt, int addr)
2097{
2098 u_int offset = addr-(u_int)&dynarec_local;
2099 assert(offset<256);
2100 assem_debug("strh %s,fp+%d\n",regname[rt],offset);
2101 output_w32(0xe1c000b0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
2102}
2103void emit_writebyte(int rt, int addr)
2104{
2105 u_int offset = addr-(u_int)&dynarec_local;
2106 assert(offset<4096);
74426039 2107 assem_debug("strb %s,fp+%d\n",regname[rt],offset);
57871462 2108 output_w32(0xe5c00000|rd_rn_rm(rt,FP,0)|offset);
2109}
2110void emit_writeword_imm(int imm, int addr)
2111{
2112 assem_debug("movl $%x,%x\n",imm,addr);
2113 assert(0);
2114}
2115void emit_writebyte_imm(int imm, int addr)
2116{
2117 assem_debug("movb $%x,%x\n",imm,addr);
2118 assert(0);
2119}
2120
2121void emit_mul(int rs)
2122{
2123 assem_debug("mul %%%s\n",regname[rs]);
2124 assert(0);
2125}
2126void emit_imul(int rs)
2127{
2128 assem_debug("imul %%%s\n",regname[rs]);
2129 assert(0);
2130}
2131void emit_umull(u_int rs1, u_int rs2, u_int hi, u_int lo)
2132{
2133 assem_debug("umull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
2134 assert(rs1<16);
2135 assert(rs2<16);
2136 assert(hi<16);
2137 assert(lo<16);
2138 output_w32(0xe0800090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
2139}
2140void emit_smull(u_int rs1, u_int rs2, u_int hi, u_int lo)
2141{
2142 assem_debug("smull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
2143 assert(rs1<16);
2144 assert(rs2<16);
2145 assert(hi<16);
2146 assert(lo<16);
2147 output_w32(0xe0c00090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
2148}
2149
2150void emit_div(int rs)
2151{
2152 assem_debug("div %%%s\n",regname[rs]);
2153 assert(0);
2154}
2155void emit_idiv(int rs)
2156{
2157 assem_debug("idiv %%%s\n",regname[rs]);
2158 assert(0);
2159}
2160void emit_cdq()
2161{
2162 assem_debug("cdq\n");
2163 assert(0);
2164}
2165
2166void emit_clz(int rs,int rt)
2167{
2168 assem_debug("clz %s,%s\n",regname[rt],regname[rs]);
2169 output_w32(0xe16f0f10|rd_rn_rm(rt,0,rs));
2170}
2171
2172void emit_subcs(int rs1,int rs2,int rt)
2173{
2174 assem_debug("subcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2175 output_w32(0x20400000|rd_rn_rm(rt,rs1,rs2));
2176}
2177
2178void emit_shrcc_imm(int rs,u_int imm,int rt)
2179{
2180 assert(imm>0);
2181 assert(imm<32);
2182 assem_debug("lsrcc %s,%s,#%d\n",regname[rt],regname[rs],imm);
2183 output_w32(0x31a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
2184}
2185
b1be1eee 2186void emit_shrne_imm(int rs,u_int imm,int rt)
2187{
2188 assert(imm>0);
2189 assert(imm<32);
2190 assem_debug("lsrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2191 output_w32(0x11a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
2192}
2193
57871462 2194void emit_negmi(int rs, int rt)
2195{
2196 assem_debug("rsbmi %s,%s,#0\n",regname[rt],regname[rs]);
2197 output_w32(0x42600000|rd_rn_rm(rt,rs,0));
2198}
2199
2200void emit_negsmi(int rs, int rt)
2201{
2202 assem_debug("rsbsmi %s,%s,#0\n",regname[rt],regname[rs]);
2203 output_w32(0x42700000|rd_rn_rm(rt,rs,0));
2204}
2205
2206void emit_orreq(u_int rs1,u_int rs2,u_int rt)
2207{
2208 assem_debug("orreq %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2209 output_w32(0x01800000|rd_rn_rm(rt,rs1,rs2));
2210}
2211
2212void emit_orrne(u_int rs1,u_int rs2,u_int rt)
2213{
2214 assem_debug("orrne %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2215 output_w32(0x11800000|rd_rn_rm(rt,rs1,rs2));
2216}
2217
2218void emit_bic_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2219{
2220 assem_debug("bic %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2221 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2222}
2223
2224void emit_biceq_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2225{
2226 assem_debug("biceq %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2227 output_w32(0x01C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2228}
2229
2230void emit_bicne_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2231{
2232 assem_debug("bicne %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2233 output_w32(0x11C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2234}
2235
2236void emit_bic_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2237{
2238 assem_debug("bic %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2239 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2240}
2241
2242void emit_biceq_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2243{
2244 assem_debug("biceq %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2245 output_w32(0x01C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2246}
2247
2248void emit_bicne_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2249{
2250 assem_debug("bicne %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2251 output_w32(0x11C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2252}
2253
2254void emit_teq(int rs, int rt)
2255{
2256 assem_debug("teq %s,%s\n",regname[rs],regname[rt]);
2257 output_w32(0xe1300000|rd_rn_rm(0,rs,rt));
2258}
2259
2260void emit_rsbimm(int rs, int imm, int rt)
2261{
2262 u_int armval;
cfbd3c6e 2263 genimm_checked(imm,&armval);
57871462 2264 assem_debug("rsb %s,%s,#%d\n",regname[rt],regname[rs],imm);
2265 output_w32(0xe2600000|rd_rn_rm(rt,rs,0)|armval);
2266}
2267
2268// Load 2 immediates optimizing for small code size
2269void emit_mov2imm_compact(int imm1,u_int rt1,int imm2,u_int rt2)
2270{
2271 emit_movimm(imm1,rt1);
2272 u_int armval;
2273 if(genimm(imm2-imm1,&armval)) {
2274 assem_debug("add %s,%s,#%d\n",regname[rt2],regname[rt1],imm2-imm1);
2275 output_w32(0xe2800000|rd_rn_rm(rt2,rt1,0)|armval);
2276 }else if(genimm(imm1-imm2,&armval)) {
2277 assem_debug("sub %s,%s,#%d\n",regname[rt2],regname[rt1],imm1-imm2);
2278 output_w32(0xe2400000|rd_rn_rm(rt2,rt1,0)|armval);
2279 }
2280 else emit_movimm(imm2,rt2);
2281}
2282
2283// Conditionally select one of two immediates, optimizing for small code size
2284// This will only be called if HAVE_CMOV_IMM is defined
2285void emit_cmov2imm_e_ne_compact(int imm1,int imm2,u_int rt)
2286{
2287 u_int armval;
2288 if(genimm(imm2-imm1,&armval)) {
2289 emit_movimm(imm1,rt);
2290 assem_debug("addne %s,%s,#%d\n",regname[rt],regname[rt],imm2-imm1);
2291 output_w32(0x12800000|rd_rn_rm(rt,rt,0)|armval);
2292 }else if(genimm(imm1-imm2,&armval)) {
2293 emit_movimm(imm1,rt);
2294 assem_debug("subne %s,%s,#%d\n",regname[rt],regname[rt],imm1-imm2);
2295 output_w32(0x12400000|rd_rn_rm(rt,rt,0)|armval);
2296 }
2297 else {
2298 #ifdef ARMv5_ONLY
2299 emit_movimm(imm1,rt);
2300 add_literal((int)out,imm2);
2301 assem_debug("ldrne %s,pc+? [=%x]\n",regname[rt],imm2);
2302 output_w32(0x15900000|rd_rn_rm(rt,15,0));
2303 #else
2304 emit_movw(imm1&0x0000FFFF,rt);
2305 if((imm1&0xFFFF)!=(imm2&0xFFFF)) {
2306 assem_debug("movwne %s,#%d (0x%x)\n",regname[rt],imm2&0xFFFF,imm2&0xFFFF);
2307 output_w32(0x13000000|rd_rn_rm(rt,0,0)|(imm2&0xfff)|((imm2<<4)&0xf0000));
2308 }
2309 emit_movt(imm1&0xFFFF0000,rt);
2310 if((imm1&0xFFFF0000)!=(imm2&0xFFFF0000)) {
2311 assem_debug("movtne %s,#%d (0x%x)\n",regname[rt],imm2&0xffff0000,imm2&0xffff0000);
2312 output_w32(0x13400000|rd_rn_rm(rt,0,0)|((imm2>>16)&0xfff)|((imm2>>12)&0xf0000));
2313 }
2314 #endif
2315 }
2316}
2317
2318// special case for checking invalid_code
2319void emit_cmpmem_indexedsr12_imm(int addr,int r,int imm)
2320{
2321 assert(0);
2322}
2323
2324// special case for checking invalid_code
2325void emit_cmpmem_indexedsr12_reg(int base,int r,int imm)
2326{
2327 assert(imm<128&&imm>=0);
2328 assert(r>=0&&r<16);
2329 assem_debug("ldrb lr,%s,%s lsr #12\n",regname[base],regname[r]);
2330 output_w32(0xe7d00000|rd_rn_rm(HOST_TEMPREG,base,r)|0x620);
2331 emit_cmpimm(HOST_TEMPREG,imm);
2332}
2333
2334// special case for tlb mapping
2335void emit_addsr12(int rs1,int rs2,int rt)
2336{
2337 assem_debug("add %s,%s,%s lsr #12\n",regname[rt],regname[rs1],regname[rs2]);
2338 output_w32(0xe0800620|rd_rn_rm(rt,rs1,rs2));
2339}
2340
0bbd1454 2341void emit_callne(int a)
2342{
2343 assem_debug("blne %x\n",a);
2344 u_int offset=genjmp(a);
2345 output_w32(0x1b000000|offset);
2346}
2347
57871462 2348// Used to preload hash table entries
2349void emit_prefetch(void *addr)
2350{
2351 assem_debug("prefetch %x\n",(int)addr);
2352 output_byte(0x0F);
2353 output_byte(0x18);
2354 output_modrm(0,5,1);
2355 output_w32((int)addr);
2356}
2357void emit_prefetchreg(int r)
2358{
2359 assem_debug("pld %s\n",regname[r]);
2360 output_w32(0xf5d0f000|rd_rn_rm(0,r,0));
2361}
2362
2363// Special case for mini_ht
2364void emit_ldreq_indexed(int rs, u_int offset, int rt)
2365{
2366 assert(offset<4096);
2367 assem_debug("ldreq %s,[%s, #%d]\n",regname[rt],regname[rs],offset);
2368 output_w32(0x05900000|rd_rn_rm(rt,rs,0)|offset);
2369}
2370
2371void emit_flds(int r,int sr)
2372{
2373 assem_debug("flds s%d,[%s]\n",sr,regname[r]);
2374 output_w32(0xed900a00|((sr&14)<<11)|((sr&1)<<22)|(r<<16));
2375}
2376
2377void emit_vldr(int r,int vr)
2378{
2379 assem_debug("vldr d%d,[%s]\n",vr,regname[r]);
2380 output_w32(0xed900b00|(vr<<12)|(r<<16));
2381}
2382
2383void emit_fsts(int sr,int r)
2384{
2385 assem_debug("fsts s%d,[%s]\n",sr,regname[r]);
2386 output_w32(0xed800a00|((sr&14)<<11)|((sr&1)<<22)|(r<<16));
2387}
2388
2389void emit_vstr(int vr,int r)
2390{
2391 assem_debug("vstr d%d,[%s]\n",vr,regname[r]);
2392 output_w32(0xed800b00|(vr<<12)|(r<<16));
2393}
2394
2395void emit_ftosizs(int s,int d)
2396{
2397 assem_debug("ftosizs s%d,s%d\n",d,s);
2398 output_w32(0xeebd0ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2399}
2400
2401void emit_ftosizd(int s,int d)
2402{
2403 assem_debug("ftosizd s%d,d%d\n",d,s);
2404 output_w32(0xeebd0bc0|((d&14)<<11)|((d&1)<<22)|(s&7));
2405}
2406
2407void emit_fsitos(int s,int d)
2408{
2409 assem_debug("fsitos s%d,s%d\n",d,s);
2410 output_w32(0xeeb80ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2411}
2412
2413void emit_fsitod(int s,int d)
2414{
2415 assem_debug("fsitod d%d,s%d\n",d,s);
2416 output_w32(0xeeb80bc0|((d&7)<<12)|((s&14)>>1)|((s&1)<<5));
2417}
2418
2419void emit_fcvtds(int s,int d)
2420{
2421 assem_debug("fcvtds d%d,s%d\n",d,s);
2422 output_w32(0xeeb70ac0|((d&7)<<12)|((s&14)>>1)|((s&1)<<5));
2423}
2424
2425void emit_fcvtsd(int s,int d)
2426{
2427 assem_debug("fcvtsd s%d,d%d\n",d,s);
2428 output_w32(0xeeb70bc0|((d&14)<<11)|((d&1)<<22)|(s&7));
2429}
2430
2431void emit_fsqrts(int s,int d)
2432{
2433 assem_debug("fsqrts d%d,s%d\n",d,s);
2434 output_w32(0xeeb10ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2435}
2436
2437void emit_fsqrtd(int s,int d)
2438{
2439 assem_debug("fsqrtd s%d,d%d\n",d,s);
2440 output_w32(0xeeb10bc0|((d&7)<<12)|(s&7));
2441}
2442
2443void emit_fabss(int s,int d)
2444{
2445 assem_debug("fabss d%d,s%d\n",d,s);
2446 output_w32(0xeeb00ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2447}
2448
2449void emit_fabsd(int s,int d)
2450{
2451 assem_debug("fabsd s%d,d%d\n",d,s);
2452 output_w32(0xeeb00bc0|((d&7)<<12)|(s&7));
2453}
2454
2455void emit_fnegs(int s,int d)
2456{
2457 assem_debug("fnegs d%d,s%d\n",d,s);
2458 output_w32(0xeeb10a40|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2459}
2460
2461void emit_fnegd(int s,int d)
2462{
2463 assem_debug("fnegd s%d,d%d\n",d,s);
2464 output_w32(0xeeb10b40|((d&7)<<12)|(s&7));
2465}
2466
2467void emit_fadds(int s1,int s2,int d)
2468{
2469 assem_debug("fadds s%d,s%d,s%d\n",d,s1,s2);
2470 output_w32(0xee300a00|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2471}
2472
2473void emit_faddd(int s1,int s2,int d)
2474{
2475 assem_debug("faddd d%d,d%d,d%d\n",d,s1,s2);
2476 output_w32(0xee300b00|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2477}
2478
2479void emit_fsubs(int s1,int s2,int d)
2480{
2481 assem_debug("fsubs s%d,s%d,s%d\n",d,s1,s2);
2482 output_w32(0xee300a40|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2483}
2484
2485void emit_fsubd(int s1,int s2,int d)
2486{
2487 assem_debug("fsubd d%d,d%d,d%d\n",d,s1,s2);
2488 output_w32(0xee300b40|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2489}
2490
2491void emit_fmuls(int s1,int s2,int d)
2492{
2493 assem_debug("fmuls s%d,s%d,s%d\n",d,s1,s2);
2494 output_w32(0xee200a00|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2495}
2496
2497void emit_fmuld(int s1,int s2,int d)
2498{
2499 assem_debug("fmuld d%d,d%d,d%d\n",d,s1,s2);
2500 output_w32(0xee200b00|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2501}
2502
2503void emit_fdivs(int s1,int s2,int d)
2504{
2505 assem_debug("fdivs s%d,s%d,s%d\n",d,s1,s2);
2506 output_w32(0xee800a00|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2507}
2508
2509void emit_fdivd(int s1,int s2,int d)
2510{
2511 assem_debug("fdivd d%d,d%d,d%d\n",d,s1,s2);
2512 output_w32(0xee800b00|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2513}
2514
2515void emit_fcmps(int x,int y)
2516{
2517 assem_debug("fcmps s14, s15\n");
2518 output_w32(0xeeb47a67);
2519}
2520
2521void emit_fcmpd(int x,int y)
2522{
2523 assem_debug("fcmpd d6, d7\n");
2524 output_w32(0xeeb46b47);
2525}
2526
2527void emit_fmstat()
2528{
2529 assem_debug("fmstat\n");
2530 output_w32(0xeef1fa10);
2531}
2532
2533void emit_bicne_imm(int rs,int imm,int rt)
2534{
2535 u_int armval;
cfbd3c6e 2536 genimm_checked(imm,&armval);
57871462 2537 assem_debug("bicne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2538 output_w32(0x13c00000|rd_rn_rm(rt,rs,0)|armval);
2539}
2540
2541void emit_biccs_imm(int rs,int imm,int rt)
2542{
2543 u_int armval;
cfbd3c6e 2544 genimm_checked(imm,&armval);
57871462 2545 assem_debug("biccs %s,%s,#%d\n",regname[rt],regname[rs],imm);
2546 output_w32(0x23c00000|rd_rn_rm(rt,rs,0)|armval);
2547}
2548
2549void emit_bicvc_imm(int rs,int imm,int rt)
2550{
2551 u_int armval;
cfbd3c6e 2552 genimm_checked(imm,&armval);
57871462 2553 assem_debug("bicvc %s,%s,#%d\n",regname[rt],regname[rs],imm);
2554 output_w32(0x73c00000|rd_rn_rm(rt,rs,0)|armval);
2555}
2556
2557void emit_bichi_imm(int rs,int imm,int rt)
2558{
2559 u_int armval;
cfbd3c6e 2560 genimm_checked(imm,&armval);
57871462 2561 assem_debug("bichi %s,%s,#%d\n",regname[rt],regname[rs],imm);
2562 output_w32(0x83c00000|rd_rn_rm(rt,rs,0)|armval);
2563}
2564
2565void emit_orrvs_imm(int rs,int imm,int rt)
2566{
2567 u_int armval;
cfbd3c6e 2568 genimm_checked(imm,&armval);
57871462 2569 assem_debug("orrvs %s,%s,#%d\n",regname[rt],regname[rs],imm);
2570 output_w32(0x63800000|rd_rn_rm(rt,rs,0)|armval);
2571}
2572
b9b61529 2573void emit_orrne_imm(int rs,int imm,int rt)
2574{
2575 u_int armval;
cfbd3c6e 2576 genimm_checked(imm,&armval);
b9b61529 2577 assem_debug("orrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2578 output_w32(0x13800000|rd_rn_rm(rt,rs,0)|armval);
2579}
2580
2581void emit_andne_imm(int rs,int imm,int rt)
2582{
2583 u_int armval;
cfbd3c6e 2584 genimm_checked(imm,&armval);
b9b61529 2585 assem_debug("andne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2586 output_w32(0x12000000|rd_rn_rm(rt,rs,0)|armval);
2587}
2588
57871462 2589void emit_jno_unlikely(int a)
2590{
2591 //emit_jno(a);
2592 assem_debug("addvc pc,pc,#? (%x)\n",/*a-(int)out-8,*/a);
2593 output_w32(0x72800000|rd_rn_rm(15,15,0));
2594}
2595
054175e9 2596static void save_regs_all(u_int reglist)
57871462 2597{
054175e9 2598 int i;
57871462 2599 if(!reglist) return;
2600 assem_debug("stmia fp,{");
054175e9 2601 for(i=0;i<16;i++)
2602 if(reglist&(1<<i))
2603 assem_debug("r%d,",i);
57871462 2604 assem_debug("}\n");
2605 output_w32(0xe88b0000|reglist);
2606}
054175e9 2607static void restore_regs_all(u_int reglist)
57871462 2608{
054175e9 2609 int i;
57871462 2610 if(!reglist) return;
2611 assem_debug("ldmia fp,{");
054175e9 2612 for(i=0;i<16;i++)
2613 if(reglist&(1<<i))
2614 assem_debug("r%d,",i);
57871462 2615 assem_debug("}\n");
2616 output_w32(0xe89b0000|reglist);
2617}
054175e9 2618// Save registers before function call
2619static void save_regs(u_int reglist)
2620{
2621 reglist&=0x100f; // only save the caller-save registers, r0-r3, r12
2622 save_regs_all(reglist);
2623}
2624// Restore registers after function call
2625static void restore_regs(u_int reglist)
2626{
2627 reglist&=0x100f; // only restore the caller-save registers, r0-r3, r12
2628 restore_regs_all(reglist);
2629}
57871462 2630
2631// Write back consts using r14 so we don't disturb the other registers
2632void wb_consts(signed char i_regmap[],uint64_t i_is32,u_int i_dirty,int i)
2633{
2634 int hr;
2635 for(hr=0;hr<HOST_REGS;hr++) {
2636 if(hr!=EXCLUDE_REG&&i_regmap[hr]>=0&&((i_dirty>>hr)&1)) {
2637 if(((regs[i].isconst>>hr)&1)&&i_regmap[hr]>0) {
2638 if(i_regmap[hr]<64 || !((i_is32>>(i_regmap[hr]&63))&1) ) {
2639 int value=constmap[i][hr];
2640 if(value==0) {
2641 emit_zeroreg(HOST_TEMPREG);
2642 }
2643 else {
2644 emit_movimm(value,HOST_TEMPREG);
2645 }
2646 emit_storereg(i_regmap[hr],HOST_TEMPREG);
24385cae 2647#ifndef FORCE32
57871462 2648 if((i_is32>>i_regmap[hr])&1) {
2649 if(value!=-1&&value!=0) emit_sarimm(HOST_TEMPREG,31,HOST_TEMPREG);
2650 emit_storereg(i_regmap[hr]|64,HOST_TEMPREG);
2651 }
24385cae 2652#endif
57871462 2653 }
2654 }
2655 }
2656 }
2657}
2658
2659/* Stubs/epilogue */
2660
2661void literal_pool(int n)
2662{
2663 if(!literalcount) return;
2664 if(n) {
2665 if((int)out-literals[0][0]<4096-n) return;
2666 }
2667 u_int *ptr;
2668 int i;
2669 for(i=0;i<literalcount;i++)
2670 {
77750690 2671 u_int l_addr=(u_int)out;
2672 int j;
2673 for(j=0;j<i;j++) {
2674 if(literals[j][1]==literals[i][1]) {
2675 //printf("dup %08x\n",literals[i][1]);
2676 l_addr=literals[j][0];
2677 break;
2678 }
2679 }
57871462 2680 ptr=(u_int *)literals[i][0];
77750690 2681 u_int offset=l_addr-(u_int)ptr-8;
57871462 2682 assert(offset<4096);
2683 assert(!(offset&3));
2684 *ptr|=offset;
77750690 2685 if(l_addr==(u_int)out) {
2686 literals[i][0]=l_addr; // remember for dupes
2687 output_w32(literals[i][1]);
2688 }
57871462 2689 }
2690 literalcount=0;
2691}
2692
2693void literal_pool_jumpover(int n)
2694{
2695 if(!literalcount) return;
2696 if(n) {
2697 if((int)out-literals[0][0]<4096-n) return;
2698 }
2699 int jaddr=(int)out;
2700 emit_jmp(0);
2701 literal_pool(0);
2702 set_jump_target(jaddr,(int)out);
2703}
2704
2705emit_extjump2(int addr, int target, int linker)
2706{
2707 u_char *ptr=(u_char *)addr;
2708 assert((ptr[3]&0x0e)==0xa);
2709 emit_loadlp(target,0);
2710 emit_loadlp(addr,1);
24385cae 2711 assert(addr>=BASE_ADDR&&addr<(BASE_ADDR+(1<<TARGET_SIZE_2)));
57871462 2712 //assert((target>=0x80000000&&target<0x80800000)||(target>0xA4000000&&target<0xA4001000));
2713//DEBUG >
2714#ifdef DEBUG_CYCLE_COUNT
2715 emit_readword((int)&last_count,ECX);
2716 emit_add(HOST_CCREG,ECX,HOST_CCREG);
2717 emit_readword((int)&next_interupt,ECX);
2718 emit_writeword(HOST_CCREG,(int)&Count);
2719 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
2720 emit_writeword(ECX,(int)&last_count);
2721#endif
2722//DEBUG <
2723 emit_jmp(linker);
2724}
2725
2726emit_extjump(int addr, int target)
2727{
2728 emit_extjump2(addr, target, (int)dyna_linker);
2729}
2730emit_extjump_ds(int addr, int target)
2731{
2732 emit_extjump2(addr, target, (int)dyna_linker_ds);
2733}
2734
13e35c04 2735// put rt_val into rt, potentially making use of rs with value rs_val
2736static void emit_movimm_from(u_int rs_val,int rs,u_int rt_val,int rt)
2737{
8575a877 2738 u_int armval;
2739 int diff;
2740 if(genimm(rt_val,&armval)) {
2741 assem_debug("mov %s,#%d\n",regname[rt],rt_val);
2742 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
2743 return;
2744 }
2745 if(genimm(~rt_val,&armval)) {
2746 assem_debug("mvn %s,#%d\n",regname[rt],rt_val);
2747 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
2748 return;
2749 }
2750 diff=rt_val-rs_val;
2751 if(genimm(diff,&armval)) {
2752 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],diff);
2753 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
2754 return;
2755 }else if(genimm(-diff,&armval)) {
2756 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],-diff);
2757 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
2758 return;
2759 }
2760 emit_movimm(rt_val,rt);
2761}
2762
2763// return 1 if above function can do it's job cheaply
2764static int is_similar_value(u_int v1,u_int v2)
2765{
13e35c04 2766 u_int xs;
8575a877 2767 int diff;
2768 if(v1==v2) return 1;
2769 diff=v2-v1;
2770 for(xs=diff;xs!=0&&(xs&3)==0;xs>>=2)
13e35c04 2771 ;
8575a877 2772 if(xs<0x100) return 1;
2773 for(xs=-diff;xs!=0&&(xs&3)==0;xs>>=2)
2774 ;
2775 if(xs<0x100) return 1;
2776 return 0;
13e35c04 2777}
cbbab9cd 2778
b96d3df7 2779// trashes r2
2780static void pass_args(int a0, int a1)
2781{
2782 if(a0==1&&a1==0) {
2783 // must swap
2784 emit_mov(a0,2); emit_mov(a1,1); emit_mov(2,0);
2785 }
2786 else if(a0!=0&&a1==0) {
2787 emit_mov(a1,1);
2788 if (a0>=0) emit_mov(a0,0);
2789 }
2790 else {
2791 if(a0>=0&&a0!=0) emit_mov(a0,0);
2792 if(a1>=0&&a1!=1) emit_mov(a1,1);
2793 }
2794}
2795
b1be1eee 2796static void mov_loadtype_adj(int type,int rs,int rt)
2797{
2798 switch(type) {
2799 case LOADB_STUB: emit_signextend8(rs,rt); break;
2800 case LOADBU_STUB: emit_andimm(rs,0xff,rt); break;
2801 case LOADH_STUB: emit_signextend16(rs,rt); break;
2802 case LOADHU_STUB: emit_andimm(rs,0xffff,rt); break;
2803 case LOADW_STUB: if(rs!=rt) emit_mov(rs,rt); break;
2804 default: assert(0);
2805 }
2806}
2807
2808#ifdef PCSX
2809#include "pcsxmem.h"
2810#include "pcsxmem_inline.c"
2811#endif
2812
57871462 2813do_readstub(int n)
2814{
2815 assem_debug("do_readstub %x\n",start+stubs[n][3]*4);
2816 literal_pool(256);
2817 set_jump_target(stubs[n][1],(int)out);
2818 int type=stubs[n][0];
2819 int i=stubs[n][3];
2820 int rs=stubs[n][4];
2821 struct regstat *i_regs=(struct regstat *)stubs[n][5];
2822 u_int reglist=stubs[n][7];
2823 signed char *i_regmap=i_regs->regmap;
2824 int addr=get_reg(i_regmap,AGEN1+(i&1));
2825 int rth,rt;
2826 int ds;
b9b61529 2827 if(itype[i]==C1LS||itype[i]==C2LS||itype[i]==LOADLR) {
57871462 2828 rth=get_reg(i_regmap,FTEMP|64);
2829 rt=get_reg(i_regmap,FTEMP);
2830 }else{
2831 rth=get_reg(i_regmap,rt1[i]|64);
2832 rt=get_reg(i_regmap,rt1[i]);
2833 }
2834 assert(rs>=0);
c6c3b1b3 2835#ifdef PCSX
2836 int r,temp=-1,temp2=HOST_TEMPREG,regs_saved=0,restore_jump=0;
2837 reglist|=(1<<rs);
2838 for(r=0;r<=12;r++) {
2839 if(((1<<r)&0x13ff)&&((1<<r)&reglist)==0) {
2840 temp=r; break;
2841 }
2842 }
db829eeb 2843 if(rt>=0&&rt1[i]!=0)
c6c3b1b3 2844 reglist&=~(1<<rt);
2845 if(temp==-1) {
2846 save_regs(reglist);
2847 regs_saved=1;
2848 temp=(rs==0)?2:0;
2849 }
2850 if((regs_saved||(reglist&2)==0)&&temp!=1&&rs!=1)
2851 temp2=1;
2852 emit_readword((int)&mem_rtab,temp);
2853 emit_shrimm(rs,12,temp2);
2854 emit_readword_dualindexedx4(temp,temp2,temp2);
2855 emit_lsls_imm(temp2,1,temp2);
2856 if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) {
2857 switch(type) {
2858 case LOADB_STUB: emit_ldrccsb_dualindexed(temp2,rs,rt); break;
2859 case LOADBU_STUB: emit_ldrccb_dualindexed(temp2,rs,rt); break;
2860 case LOADH_STUB: emit_ldrccsh_dualindexed(temp2,rs,rt); break;
2861 case LOADHU_STUB: emit_ldrcch_dualindexed(temp2,rs,rt); break;
2862 case LOADW_STUB: emit_ldrcc_dualindexed(temp2,rs,rt); break;
2863 }
2864 }
2865 if(regs_saved) {
2866 restore_jump=(int)out;
2867 emit_jcc(0); // jump to reg restore
2868 }
2869 else
2870 emit_jcc(stubs[n][2]); // return address
2871
2872 if(!regs_saved)
2873 save_regs(reglist);
2874 int handler=0;
2875 if(type==LOADB_STUB||type==LOADBU_STUB)
2876 handler=(int)jump_handler_read8;
2877 if(type==LOADH_STUB||type==LOADHU_STUB)
2878 handler=(int)jump_handler_read16;
2879 if(type==LOADW_STUB)
2880 handler=(int)jump_handler_read32;
2881 assert(handler!=0);
b96d3df7 2882 pass_args(rs,temp2);
c6c3b1b3 2883 int cc=get_reg(i_regmap,CCREG);
2884 if(cc<0)
2885 emit_loadreg(CCREG,2);
2573466a 2886 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n][6]+1),2);
c6c3b1b3 2887 emit_call(handler);
2888 if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) {
b1be1eee 2889 mov_loadtype_adj(type,0,rt);
c6c3b1b3 2890 }
2891 if(restore_jump)
2892 set_jump_target(restore_jump,(int)out);
2893 restore_regs(reglist);
2894 emit_jmp(stubs[n][2]); // return address
2895#else // !PCSX
57871462 2896 if(addr<0) addr=rt;
535d208a 2897 if(addr<0&&itype[i]!=C1LS&&itype[i]!=C2LS&&itype[i]!=LOADLR) addr=get_reg(i_regmap,-1);
57871462 2898 assert(addr>=0);
2899 int ftable=0;
2900 if(type==LOADB_STUB||type==LOADBU_STUB)
2901 ftable=(int)readmemb;
2902 if(type==LOADH_STUB||type==LOADHU_STUB)
2903 ftable=(int)readmemh;
2904 if(type==LOADW_STUB)
2905 ftable=(int)readmem;
24385cae 2906#ifndef FORCE32
57871462 2907 if(type==LOADD_STUB)
2908 ftable=(int)readmemd;
24385cae 2909#endif
2910 assert(ftable!=0);
57871462 2911 emit_writeword(rs,(int)&address);
2912 //emit_pusha();
2913 save_regs(reglist);
97a238a6 2914#ifndef PCSX
57871462 2915 ds=i_regs!=&regs[i];
2916 int real_rs=(itype[i]==LOADLR)?-1:get_reg(i_regmap,rs1[i]);
2917 u_int cmask=ds?-1:(0x100f|~i_regs->wasconst);
2918 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&0x100f,i);
2919 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty&cmask&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs)));
2920 if(!ds) wb_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&~0x100f,i);
97a238a6 2921#endif
57871462 2922 emit_shrimm(rs,16,1);
2923 int cc=get_reg(i_regmap,CCREG);
2924 if(cc<0) {
2925 emit_loadreg(CCREG,2);
2926 }
2927 emit_movimm(ftable,0);
2928 emit_addimm(cc<0?2:cc,2*stubs[n][6]+2,2);
f51dc36c 2929#ifndef PCSX
57871462 2930 emit_movimm(start+stubs[n][3]*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
f51dc36c 2931#endif
57871462 2932 //emit_readword((int)&last_count,temp);
2933 //emit_add(cc,temp,cc);
2934 //emit_writeword(cc,(int)&Count);
2935 //emit_mov(15,14);
2936 emit_call((int)&indirect_jump_indexed);
2937 //emit_callreg(rs);
2938 //emit_readword_dualindexedx4(rs,HOST_TEMPREG,15);
f51dc36c 2939#ifndef PCSX
57871462 2940 // We really shouldn't need to update the count here,
2941 // but not doing so causes random crashes...
2942 emit_readword((int)&Count,HOST_TEMPREG);
2943 emit_readword((int)&next_interupt,2);
2944 emit_addimm(HOST_TEMPREG,-2*stubs[n][6]-2,HOST_TEMPREG);
2945 emit_writeword(2,(int)&last_count);
2946 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2947 if(cc<0) {
2948 emit_storereg(CCREG,HOST_TEMPREG);
2949 }
f51dc36c 2950#endif
57871462 2951 //emit_popa();
2952 restore_regs(reglist);
2953 //if((cc=get_reg(regmap,CCREG))>=0) {
2954 // emit_loadreg(CCREG,cc);
2955 //}
f18c0f46 2956 if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) {
2957 assert(rt>=0);
2958 if(type==LOADB_STUB)
2959 emit_movsbl((int)&readmem_dword,rt);
2960 if(type==LOADBU_STUB)
2961 emit_movzbl((int)&readmem_dword,rt);
2962 if(type==LOADH_STUB)
2963 emit_movswl((int)&readmem_dword,rt);
2964 if(type==LOADHU_STUB)
2965 emit_movzwl((int)&readmem_dword,rt);
2966 if(type==LOADW_STUB)
2967 emit_readword((int)&readmem_dword,rt);
2968 if(type==LOADD_STUB) {
2969 emit_readword((int)&readmem_dword,rt);
2970 if(rth>=0) emit_readword(((int)&readmem_dword)+4,rth);
2971 }
57871462 2972 }
2973 emit_jmp(stubs[n][2]); // return address
c6c3b1b3 2974#endif // !PCSX
57871462 2975}
2976
c6c3b1b3 2977#ifdef PCSX
2978// return memhandler, or get directly accessable address and return 0
2979u_int get_direct_memhandler(void *table,u_int addr,int type,u_int *addr_host)
2980{
2981 u_int l1,l2=0;
2982 l1=((u_int *)table)[addr>>12];
2983 if((l1&(1<<31))==0) {
2984 u_int v=l1<<1;
2985 *addr_host=v+addr;
2986 return 0;
2987 }
2988 else {
2989 l1<<=1;
2990 if(type==LOADB_STUB||type==LOADBU_STUB||type==STOREB_STUB)
2991 l2=((u_int *)l1)[0x1000/4 + 0x1000/2 + (addr&0xfff)];
b96d3df7 2992 else if(type==LOADH_STUB||type==LOADHU_STUB||type==STOREH_STUB)
c6c3b1b3 2993 l2=((u_int *)l1)[0x1000/4 + (addr&0xfff)/2];
2994 else
2995 l2=((u_int *)l1)[(addr&0xfff)/4];
2996 if((l2&(1<<31))==0) {
2997 u_int v=l2<<1;
2998 *addr_host=v+(addr&0xfff);
2999 return 0;
3000 }
3001 return l2<<1;
3002 }
3003}
3004#endif
3005
57871462 3006inline_readstub(int type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
3007{
3008 int rs=get_reg(regmap,target);
3009 int rth=get_reg(regmap,target|64);
3010 int rt=get_reg(regmap,target);
535d208a 3011 if(rs<0) rs=get_reg(regmap,-1);
57871462 3012 assert(rs>=0);
c6c3b1b3 3013#ifdef PCSX
b1be1eee 3014 u_int handler,host_addr=0,is_dynamic,far_call=0;
3015 int cc=get_reg(regmap,CCREG);
3016 if(pcsx_direct_read(type,addr,CLOCK_ADJUST(adj+1),cc,target?rs:-1,rt))
3017 return;
c6c3b1b3 3018 handler=get_direct_memhandler(mem_rtab,addr,type,&host_addr);
3019 if (handler==0) {
db829eeb 3020 if(rt<0||rt1[i]==0)
c6c3b1b3 3021 return;
13e35c04 3022 if(addr!=host_addr)
3023 emit_movimm_from(addr,rs,host_addr,rs);
c6c3b1b3 3024 switch(type) {
3025 case LOADB_STUB: emit_movsbl_indexed(0,rs,rt); break;
3026 case LOADBU_STUB: emit_movzbl_indexed(0,rs,rt); break;
3027 case LOADH_STUB: emit_movswl_indexed(0,rs,rt); break;
3028 case LOADHU_STUB: emit_movzwl_indexed(0,rs,rt); break;
3029 case LOADW_STUB: emit_readword_indexed(0,rs,rt); break;
3030 default: assert(0);
3031 }
3032 return;
3033 }
b1be1eee 3034 is_dynamic=pcsxmem_is_handler_dynamic(addr);
3035 if(is_dynamic) {
3036 if(type==LOADB_STUB||type==LOADBU_STUB)
3037 handler=(int)jump_handler_read8;
3038 if(type==LOADH_STUB||type==LOADHU_STUB)
3039 handler=(int)jump_handler_read16;
3040 if(type==LOADW_STUB)
3041 handler=(int)jump_handler_read32;
3042 }
c6c3b1b3 3043
3044 // call a memhandler
db829eeb 3045 if(rt>=0&&rt1[i]!=0)
c6c3b1b3 3046 reglist&=~(1<<rt);
3047 save_regs(reglist);
3048 if(target==0)
3049 emit_movimm(addr,0);
3050 else if(rs!=0)
3051 emit_mov(rs,0);
c6c3b1b3 3052 int offset=(int)handler-(int)out-8;
3053 if(offset<-33554432||offset>=33554432) {
3054 // unreachable memhandler, a plugin func perhaps
b1be1eee 3055 emit_movimm(handler,12);
3056 far_call=1;
3057 }
3058 if(cc<0)
3059 emit_loadreg(CCREG,2);
3060 if(is_dynamic) {
3061 emit_movimm(((u_int *)mem_rtab)[addr>>12]<<1,1);
3062 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2);
c6c3b1b3 3063 }
b1be1eee 3064 else {
3065 emit_readword((int)&last_count,3);
3066 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2);
3067 emit_add(2,3,2);
3068 emit_writeword(2,(int)&Count);
3069 }
3070
3071 if(far_call)
3072 emit_callreg(12);
c6c3b1b3 3073 else
3074 emit_call(handler);
b1be1eee 3075
db829eeb 3076 if(rt>=0&&rt1[i]!=0) {
c6c3b1b3 3077 switch(type) {
3078 case LOADB_STUB: emit_signextend8(0,rt); break;
3079 case LOADBU_STUB: emit_andimm(0,0xff,rt); break;
3080 case LOADH_STUB: emit_signextend16(0,rt); break;
3081 case LOADHU_STUB: emit_andimm(0,0xffff,rt); break;
3082 case LOADW_STUB: if(rt!=0) emit_mov(0,rt); break;
3083 default: assert(0);
3084 }
3085 }
3086 restore_regs(reglist);
3087#else // if !PCSX
57871462 3088 int ftable=0;
3089 if(type==LOADB_STUB||type==LOADBU_STUB)
3090 ftable=(int)readmemb;
3091 if(type==LOADH_STUB||type==LOADHU_STUB)
3092 ftable=(int)readmemh;
3093 if(type==LOADW_STUB)
3094 ftable=(int)readmem;
24385cae 3095#ifndef FORCE32
57871462 3096 if(type==LOADD_STUB)
3097 ftable=(int)readmemd;
24385cae 3098#endif
3099 assert(ftable!=0);
fd99c415 3100 if(target==0)
3101 emit_movimm(addr,rs);
57871462 3102 emit_writeword(rs,(int)&address);
3103 //emit_pusha();
3104 save_regs(reglist);
0c1fe38b 3105#ifndef PCSX
3106 if((signed int)addr>=(signed int)0xC0000000) {
3107 // Theoretically we can have a pagefault here, if the TLB has never
3108 // been enabled and the address is outside the range 80000000..BFFFFFFF
3109 // Write out the registers so the pagefault can be handled. This is
3110 // a very rare case and likely represents a bug.
3111 int ds=regmap!=regs[i].regmap;
3112 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty,i);
3113 if(!ds) wb_dirtys(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty);
3114 else wb_dirtys(branch_regs[i-1].regmap_entry,branch_regs[i-1].was32,branch_regs[i-1].wasdirty);
3115 }
3116#endif
57871462 3117 //emit_shrimm(rs,16,1);
3118 int cc=get_reg(regmap,CCREG);
3119 if(cc<0) {
3120 emit_loadreg(CCREG,2);
3121 }
3122 //emit_movimm(ftable,0);
3123 emit_movimm(((u_int *)ftable)[addr>>16],0);
3124 //emit_readword((int)&last_count,12);
2573466a 3125 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2);
f51dc36c 3126#ifndef PCSX
57871462 3127 if((signed int)addr>=(signed int)0xC0000000) {
3128 // Pagefault address
3129 int ds=regmap!=regs[i].regmap;
3130 emit_movimm(start+i*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
3131 }
f51dc36c 3132#endif
57871462 3133 //emit_add(12,2,2);
3134 //emit_writeword(2,(int)&Count);
3135 //emit_call(((u_int *)ftable)[addr>>16]);
3136 emit_call((int)&indirect_jump);
f51dc36c 3137#ifndef PCSX
57871462 3138 // We really shouldn't need to update the count here,
3139 // but not doing so causes random crashes...
3140 emit_readword((int)&Count,HOST_TEMPREG);
3141 emit_readword((int)&next_interupt,2);
2573466a 3142 emit_addimm(HOST_TEMPREG,-CLOCK_ADJUST(adj+1),HOST_TEMPREG);
57871462 3143 emit_writeword(2,(int)&last_count);
3144 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
3145 if(cc<0) {
3146 emit_storereg(CCREG,HOST_TEMPREG);
3147 }
f51dc36c 3148#endif
57871462 3149 //emit_popa();
3150 restore_regs(reglist);
fd99c415 3151 if(rt>=0) {
3152 if(type==LOADB_STUB)
3153 emit_movsbl((int)&readmem_dword,rt);
3154 if(type==LOADBU_STUB)
3155 emit_movzbl((int)&readmem_dword,rt);
3156 if(type==LOADH_STUB)
3157 emit_movswl((int)&readmem_dword,rt);
3158 if(type==LOADHU_STUB)
3159 emit_movzwl((int)&readmem_dword,rt);
3160 if(type==LOADW_STUB)
3161 emit_readword((int)&readmem_dword,rt);
3162 if(type==LOADD_STUB) {
3163 emit_readword((int)&readmem_dword,rt);
3164 if(rth>=0) emit_readword(((int)&readmem_dword)+4,rth);
3165 }
57871462 3166 }
c6c3b1b3 3167#endif // !PCSX
57871462 3168}
3169
3170do_writestub(int n)
3171{
3172 assem_debug("do_writestub %x\n",start+stubs[n][3]*4);
3173 literal_pool(256);
3174 set_jump_target(stubs[n][1],(int)out);
3175 int type=stubs[n][0];
3176 int i=stubs[n][3];
3177 int rs=stubs[n][4];
3178 struct regstat *i_regs=(struct regstat *)stubs[n][5];
3179 u_int reglist=stubs[n][7];
3180 signed char *i_regmap=i_regs->regmap;
3181 int addr=get_reg(i_regmap,AGEN1+(i&1));
3182 int rth,rt,r;
3183 int ds;
b9b61529 3184 if(itype[i]==C1LS||itype[i]==C2LS) {
57871462 3185 rth=get_reg(i_regmap,FTEMP|64);
3186 rt=get_reg(i_regmap,r=FTEMP);
3187 }else{
3188 rth=get_reg(i_regmap,rs2[i]|64);
3189 rt=get_reg(i_regmap,r=rs2[i]);
3190 }
3191 assert(rs>=0);
3192 assert(rt>=0);
b96d3df7 3193#ifdef PCSX
3194 int rtmp,temp=-1,temp2=HOST_TEMPREG,regs_saved=0,restore_jump=0,ra;
3195 int reglist2=reglist|(1<<rs)|(1<<rt);
3196 for(rtmp=0;rtmp<=12;rtmp++) {
3197 if(((1<<rtmp)&0x13ff)&&((1<<rtmp)&reglist2)==0) {
3198 temp=rtmp; break;
3199 }
3200 }
3201 if(temp==-1) {
3202 save_regs(reglist);
3203 regs_saved=1;
3204 for(rtmp=0;rtmp<=3;rtmp++)
3205 if(rtmp!=rs&&rtmp!=rt)
3206 {temp=rtmp;break;}
3207 }
3208 if((regs_saved||(reglist2&8)==0)&&temp!=3&&rs!=3&&rt!=3)
3209 temp2=3;
3210 emit_readword((int)&mem_wtab,temp);
3211 emit_shrimm(rs,12,temp2);
3212 emit_readword_dualindexedx4(temp,temp2,temp2);
3213 emit_lsls_imm(temp2,1,temp2);
3214 switch(type) {
3215 case STOREB_STUB: emit_strccb_dualindexed(temp2,rs,rt); break;
3216 case STOREH_STUB: emit_strcch_dualindexed(temp2,rs,rt); break;
3217 case STOREW_STUB: emit_strcc_dualindexed(temp2,rs,rt); break;
3218 default: assert(0);
3219 }
3220 if(regs_saved) {
3221 restore_jump=(int)out;
3222 emit_jcc(0); // jump to reg restore
3223 }
3224 else
3225 emit_jcc(stubs[n][2]); // return address (invcode check)
3226
3227 if(!regs_saved)
3228 save_regs(reglist);
3229 int handler=0;
3230 switch(type) {
3231 case STOREB_STUB: handler=(int)jump_handler_write8; break;
3232 case STOREH_STUB: handler=(int)jump_handler_write16; break;
3233 case STOREW_STUB: handler=(int)jump_handler_write32; break;
3234 }
3235 assert(handler!=0);
3236 pass_args(rs,rt);
3237 if(temp2!=3)
3238 emit_mov(temp2,3);
3239 int cc=get_reg(i_regmap,CCREG);
3240 if(cc<0)
3241 emit_loadreg(CCREG,2);
2573466a 3242 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n][6]+1),2);
b96d3df7 3243 // returns new cycle_count
3244 emit_call(handler);
2573466a 3245 emit_addimm(0,-CLOCK_ADJUST((int)stubs[n][6]+1),cc<0?2:cc);
b96d3df7 3246 if(cc<0)
3247 emit_storereg(CCREG,2);
3248 if(restore_jump)
3249 set_jump_target(restore_jump,(int)out);
3250 restore_regs(reglist);
3251 ra=stubs[n][2];
b96d3df7 3252 emit_jmp(ra);
3253#else // if !PCSX
57871462 3254 if(addr<0) addr=get_reg(i_regmap,-1);
3255 assert(addr>=0);
3256 int ftable=0;
3257 if(type==STOREB_STUB)
3258 ftable=(int)writememb;
3259 if(type==STOREH_STUB)
3260 ftable=(int)writememh;
3261 if(type==STOREW_STUB)
3262 ftable=(int)writemem;
24385cae 3263#ifndef FORCE32
57871462 3264 if(type==STORED_STUB)
3265 ftable=(int)writememd;
24385cae 3266#endif
3267 assert(ftable!=0);
57871462 3268 emit_writeword(rs,(int)&address);
3269 //emit_shrimm(rs,16,rs);
3270 //emit_movmem_indexedx4(ftable,rs,rs);
3271 if(type==STOREB_STUB)
3272 emit_writebyte(rt,(int)&byte);
3273 if(type==STOREH_STUB)
3274 emit_writehword(rt,(int)&hword);
3275 if(type==STOREW_STUB)
3276 emit_writeword(rt,(int)&word);
3277 if(type==STORED_STUB) {
3d624f89 3278#ifndef FORCE32
57871462 3279 emit_writeword(rt,(int)&dword);
3280 emit_writeword(r?rth:rt,(int)&dword+4);
3d624f89 3281#else
3282 printf("STORED_STUB\n");
3283#endif
57871462 3284 }
3285 //emit_pusha();
3286 save_regs(reglist);
97a238a6 3287#ifndef PCSX
57871462 3288 ds=i_regs!=&regs[i];
3289 int real_rs=get_reg(i_regmap,rs1[i]);
3290 u_int cmask=ds?-1:(0x100f|~i_regs->wasconst);
3291 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&0x100f,i);
3292 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty&cmask&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs)));
3293 if(!ds) wb_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&~0x100f,i);
97a238a6 3294#endif
57871462 3295 emit_shrimm(rs,16,1);
3296 int cc=get_reg(i_regmap,CCREG);
3297 if(cc<0) {
3298 emit_loadreg(CCREG,2);
3299 }
3300 emit_movimm(ftable,0);
3301 emit_addimm(cc<0?2:cc,2*stubs[n][6]+2,2);
f51dc36c 3302#ifndef PCSX
57871462 3303 emit_movimm(start+stubs[n][3]*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
f51dc36c 3304#endif
57871462 3305 //emit_readword((int)&last_count,temp);
3306 //emit_addimm(cc,2*stubs[n][5]+2,cc);
3307 //emit_add(cc,temp,cc);
3308 //emit_writeword(cc,(int)&Count);
3309 emit_call((int)&indirect_jump_indexed);
3310 //emit_callreg(rs);
3311 emit_readword((int)&Count,HOST_TEMPREG);
3312 emit_readword((int)&next_interupt,2);
3313 emit_addimm(HOST_TEMPREG,-2*stubs[n][6]-2,HOST_TEMPREG);
3314 emit_writeword(2,(int)&last_count);
3315 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
3316 if(cc<0) {
3317 emit_storereg(CCREG,HOST_TEMPREG);
3318 }
3319 //emit_popa();
3320 restore_regs(reglist);
3321 //if((cc=get_reg(regmap,CCREG))>=0) {
3322 // emit_loadreg(CCREG,cc);
3323 //}
3324 emit_jmp(stubs[n][2]); // return address
b96d3df7 3325#endif // !PCSX
57871462 3326}
3327
3328inline_writestub(int type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
3329{
3330 int rs=get_reg(regmap,-1);
3331 int rth=get_reg(regmap,target|64);
3332 int rt=get_reg(regmap,target);
3333 assert(rs>=0);
3334 assert(rt>=0);
cbbab9cd 3335#ifdef PCSX
b96d3df7 3336 u_int handler,host_addr=0;
b96d3df7 3337 handler=get_direct_memhandler(mem_wtab,addr,type,&host_addr);
3338 if (handler==0) {
13e35c04 3339 if(addr!=host_addr)
3340 emit_movimm_from(addr,rs,host_addr,rs);
b96d3df7 3341 switch(type) {
3342 case STOREB_STUB: emit_writebyte_indexed(rt,0,rs); break;
3343 case STOREH_STUB: emit_writehword_indexed(rt,0,rs); break;
3344 case STOREW_STUB: emit_writeword_indexed(rt,0,rs); break;
3345 default: assert(0);
3346 }
3347 return;
3348 }
3349
3350 // call a memhandler
3351 save_regs(reglist);
13e35c04 3352 pass_args(rs,rt);
b96d3df7 3353 int cc=get_reg(regmap,CCREG);
3354 if(cc<0)
3355 emit_loadreg(CCREG,2);
2573466a 3356 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2);
b96d3df7 3357 emit_movimm(handler,3);
3358 // returns new cycle_count
3359 emit_call((int)jump_handler_write_h);
2573466a 3360 emit_addimm(0,-CLOCK_ADJUST(adj+1),cc<0?2:cc);
b96d3df7 3361 if(cc<0)
3362 emit_storereg(CCREG,2);
3363 restore_regs(reglist);
3364#else // if !pcsx
57871462 3365 int ftable=0;
3366 if(type==STOREB_STUB)
3367 ftable=(int)writememb;
3368 if(type==STOREH_STUB)
3369 ftable=(int)writememh;
3370 if(type==STOREW_STUB)
3371 ftable=(int)writemem;
24385cae 3372#ifndef FORCE32
57871462 3373 if(type==STORED_STUB)
3374 ftable=(int)writememd;
24385cae 3375#endif
3376 assert(ftable!=0);
57871462 3377 emit_writeword(rs,(int)&address);
3378 //emit_shrimm(rs,16,rs);
3379 //emit_movmem_indexedx4(ftable,rs,rs);
3380 if(type==STOREB_STUB)
3381 emit_writebyte(rt,(int)&byte);
3382 if(type==STOREH_STUB)
3383 emit_writehword(rt,(int)&hword);
3384 if(type==STOREW_STUB)
3385 emit_writeword(rt,(int)&word);
3386 if(type==STORED_STUB) {
3d624f89 3387#ifndef FORCE32
57871462 3388 emit_writeword(rt,(int)&dword);
3389 emit_writeword(target?rth:rt,(int)&dword+4);
3d624f89 3390#else
3391 printf("STORED_STUB\n");
3392#endif
57871462 3393 }
3394 //emit_pusha();
3395 save_regs(reglist);
0c1fe38b 3396#ifndef PCSX
3397 // rearmed note: load_all_consts prevents BIOS boot, some bug?
3398 if((signed int)addr>=(signed int)0xC0000000) {
3399 // Theoretically we can have a pagefault here, if the TLB has never
3400 // been enabled and the address is outside the range 80000000..BFFFFFFF
3401 // Write out the registers so the pagefault can be handled. This is
3402 // a very rare case and likely represents a bug.
3403 int ds=regmap!=regs[i].regmap;
3404 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty,i);
3405 if(!ds) wb_dirtys(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty);
3406 else wb_dirtys(branch_regs[i-1].regmap_entry,branch_regs[i-1].was32,branch_regs[i-1].wasdirty);
3407 }
3408#endif
57871462 3409 //emit_shrimm(rs,16,1);
3410 int cc=get_reg(regmap,CCREG);
3411 if(cc<0) {
3412 emit_loadreg(CCREG,2);
3413 }
3414 //emit_movimm(ftable,0);
3415 emit_movimm(((u_int *)ftable)[addr>>16],0);
3416 //emit_readword((int)&last_count,12);
2573466a 3417 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2);
f51dc36c 3418#ifndef PCSX
57871462 3419 if((signed int)addr>=(signed int)0xC0000000) {
3420 // Pagefault address
3421 int ds=regmap!=regs[i].regmap;
3422 emit_movimm(start+i*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
3423 }
f51dc36c 3424#endif
57871462 3425 //emit_add(12,2,2);
3426 //emit_writeword(2,(int)&Count);
3427 //emit_call(((u_int *)ftable)[addr>>16]);
3428 emit_call((int)&indirect_jump);
3429 emit_readword((int)&Count,HOST_TEMPREG);
3430 emit_readword((int)&next_interupt,2);
2573466a 3431 emit_addimm(HOST_TEMPREG,-CLOCK_ADJUST(adj+1),HOST_TEMPREG);
57871462 3432 emit_writeword(2,(int)&last_count);
3433 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
3434 if(cc<0) {
3435 emit_storereg(CCREG,HOST_TEMPREG);
3436 }
3437 //emit_popa();
3438 restore_regs(reglist);
b96d3df7 3439#endif
57871462 3440}
3441
3442do_unalignedwritestub(int n)
3443{
b7918751 3444 assem_debug("do_unalignedwritestub %x\n",start+stubs[n][3]*4);
3445 literal_pool(256);
57871462 3446 set_jump_target(stubs[n][1],(int)out);
b7918751 3447
3448 int i=stubs[n][3];
3449 struct regstat *i_regs=(struct regstat *)stubs[n][4];
3450 int addr=stubs[n][5];
3451 u_int reglist=stubs[n][7];
3452 signed char *i_regmap=i_regs->regmap;
3453 int temp2=get_reg(i_regmap,FTEMP);
3454 int rt;
3455 int ds, real_rs;
3456 rt=get_reg(i_regmap,rs2[i]);
3457 assert(rt>=0);
3458 assert(addr>=0);
3459 assert(opcode[i]==0x2a||opcode[i]==0x2e); // SWL/SWR only implemented
3460 reglist|=(1<<addr);
3461 reglist&=~(1<<temp2);
3462
b96d3df7 3463#if 1
3464 // don't bother with it and call write handler
3465 save_regs(reglist);
3466 pass_args(addr,rt);
3467 int cc=get_reg(i_regmap,CCREG);
3468 if(cc<0)
3469 emit_loadreg(CCREG,2);
2573466a 3470 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n][6]+1),2);
b96d3df7 3471 emit_call((int)(opcode[i]==0x2a?jump_handle_swl:jump_handle_swr));
2573466a 3472 emit_addimm(0,-CLOCK_ADJUST((int)stubs[n][6]+1),cc<0?2:cc);
b96d3df7 3473 if(cc<0)
3474 emit_storereg(CCREG,2);
3475 restore_regs(reglist);
3476 emit_jmp(stubs[n][2]); // return address
3477#else
b7918751 3478 emit_andimm(addr,0xfffffffc,temp2);
3479 emit_writeword(temp2,(int)&address);
3480
3481 save_regs(reglist);
97a238a6 3482#ifndef PCSX
b7918751 3483 ds=i_regs!=&regs[i];
3484 real_rs=get_reg(i_regmap,rs1[i]);
3485 u_int cmask=ds?-1:(0x100f|~i_regs->wasconst);
3486 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&0x100f,i);
3487 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty&cmask&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs)));
3488 if(!ds) wb_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&~0x100f,i);
97a238a6 3489#endif
b7918751 3490 emit_shrimm(addr,16,1);
3491 int cc=get_reg(i_regmap,CCREG);
3492 if(cc<0) {
3493 emit_loadreg(CCREG,2);
3494 }
3495 emit_movimm((u_int)readmem,0);
3496 emit_addimm(cc<0?2:cc,2*stubs[n][6]+2,2);
f51dc36c 3497#ifndef PCSX
3498 // pagefault address
3499 emit_movimm(start+stubs[n][3]*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
3500#endif
b7918751 3501 emit_call((int)&indirect_jump_indexed);
3502 restore_regs(reglist);
3503
3504 emit_readword((int)&readmem_dword,temp2);
3505 int temp=addr; //hmh
3506 emit_shlimm(addr,3,temp);
3507 emit_andimm(temp,24,temp);
3508#ifdef BIG_ENDIAN_MIPS
3509 if (opcode[i]==0x2e) // SWR
3510#else
3511 if (opcode[i]==0x2a) // SWL
3512#endif
3513 emit_xorimm(temp,24,temp);
3514 emit_movimm(-1,HOST_TEMPREG);
55439448 3515 if (opcode[i]==0x2a) { // SWL
b7918751 3516 emit_bic_lsr(temp2,HOST_TEMPREG,temp,temp2);
3517 emit_orrshr(rt,temp,temp2);
3518 }else{
3519 emit_bic_lsl(temp2,HOST_TEMPREG,temp,temp2);
3520 emit_orrshl(rt,temp,temp2);
3521 }
3522 emit_readword((int)&address,addr);
3523 emit_writeword(temp2,(int)&word);
3524 //save_regs(reglist); // don't need to, no state changes
3525 emit_shrimm(addr,16,1);
3526 emit_movimm((u_int)writemem,0);
3527 //emit_call((int)&indirect_jump_indexed);
3528 emit_mov(15,14);
3529 emit_readword_dualindexedx4(0,1,15);
3530 emit_readword((int)&Count,HOST_TEMPREG);
3531 emit_readword((int)&next_interupt,2);
3532 emit_addimm(HOST_TEMPREG,-2*stubs[n][6]-2,HOST_TEMPREG);
3533 emit_writeword(2,(int)&last_count);
3534 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
3535 if(cc<0) {
3536 emit_storereg(CCREG,HOST_TEMPREG);
3537 }
3538 restore_regs(reglist);
57871462 3539 emit_jmp(stubs[n][2]); // return address
b96d3df7 3540#endif
57871462 3541}
3542
3543void printregs(int edi,int esi,int ebp,int esp,int b,int d,int c,int a)
3544{
3545 printf("regs: %x %x %x %x %x %x %x (%x)\n",a,b,c,d,ebp,esi,edi,(&edi)[-1]);
3546}
3547
3548do_invstub(int n)
3549{
3550 literal_pool(20);
3551 u_int reglist=stubs[n][3];
3552 set_jump_target(stubs[n][1],(int)out);
3553 save_regs(reglist);
3554 if(stubs[n][4]!=0) emit_mov(stubs[n][4],0);
3555 emit_call((int)&invalidate_addr);
3556 restore_regs(reglist);
3557 emit_jmp(stubs[n][2]); // return address
3558}
3559
3560int do_dirty_stub(int i)
3561{
3562 assem_debug("do_dirty_stub %x\n",start+i*4);
ac545b3a 3563 u_int addr=(int)start<(int)0xC0000000?(u_int)source:(u_int)start;
3564 #ifdef PCSX
3565 addr=(u_int)source;
3566 #endif
57871462 3567 // Careful about the code output here, verify_dirty needs to parse it.
3568 #ifdef ARMv5_ONLY
ac545b3a 3569 emit_loadlp(addr,1);
57871462 3570 emit_loadlp((int)copy,2);
3571 emit_loadlp(slen*4,3);
3572 #else
ac545b3a 3573 emit_movw(addr&0x0000FFFF,1);
57871462 3574 emit_movw(((u_int)copy)&0x0000FFFF,2);
ac545b3a 3575 emit_movt(addr&0xFFFF0000,1);
57871462 3576 emit_movt(((u_int)copy)&0xFFFF0000,2);
3577 emit_movw(slen*4,3);
3578 #endif
3579 emit_movimm(start+i*4,0);
3580 emit_call((int)start<(int)0xC0000000?(int)&verify_code:(int)&verify_code_vm);
3581 int entry=(int)out;
3582 load_regs_entry(i);
3583 if(entry==(int)out) entry=instr_addr[i];
3584 emit_jmp(instr_addr[i]);
3585 return entry;
3586}
3587
3588void do_dirty_stub_ds()
3589{
3590 // Careful about the code output here, verify_dirty needs to parse it.
3591 #ifdef ARMv5_ONLY
3592 emit_loadlp((int)start<(int)0xC0000000?(int)source:(int)start,1);
3593 emit_loadlp((int)copy,2);
3594 emit_loadlp(slen*4,3);
3595 #else
3596 emit_movw(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0x0000FFFF,1);
3597 emit_movw(((u_int)copy)&0x0000FFFF,2);
3598 emit_movt(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0xFFFF0000,1);
3599 emit_movt(((u_int)copy)&0xFFFF0000,2);
3600 emit_movw(slen*4,3);
3601 #endif
3602 emit_movimm(start+1,0);
3603 emit_call((int)&verify_code_ds);
3604}
3605
3606do_cop1stub(int n)
3607{
3608 literal_pool(256);
3609 assem_debug("do_cop1stub %x\n",start+stubs[n][3]*4);
3610 set_jump_target(stubs[n][1],(int)out);
3611 int i=stubs[n][3];
3d624f89 3612// int rs=stubs[n][4];
57871462 3613 struct regstat *i_regs=(struct regstat *)stubs[n][5];
3614 int ds=stubs[n][6];
3615 if(!ds) {
3616 load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty,i);
3617 //if(i_regs!=&regs[i]) printf("oops: regs[i]=%x i_regs=%x",(int)&regs[i],(int)i_regs);
3618 }
3619 //else {printf("fp exception in delay slot\n");}
3620 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty);
3621 if(regs[i].regmap_entry[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
3622 emit_movimm(start+(i-ds)*4,EAX); // Get PC
2573466a 3623 emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]),HOST_CCREG); // CHECK: is this right? There should probably be an extra cycle...
57871462 3624 emit_jmp(ds?(int)fp_exception_ds:(int)fp_exception);
3625}
3626
63cb0298 3627#ifndef DISABLE_TLB
3628
57871462 3629/* TLB */
3630
3631int do_tlb_r(int s,int ar,int map,int x,int a,int shift,int c,u_int addr)
3632{
3633 if(c) {
3634 if((signed int)addr>=(signed int)0xC0000000) {
3635 // address_generation already loaded the const
3636 emit_readword_dualindexedx4(FP,map,map);
3637 }
3638 else
3639 return -1; // No mapping
3640 }
3641 else {
3642 assert(s!=map);
3643 emit_movimm(((int)memory_map-(int)&dynarec_local)>>2,map);
3644 emit_addsr12(map,s,map);
3645 // Schedule this while we wait on the load
3646 //if(x) emit_xorimm(s,x,ar);
3647 if(shift>=0) emit_shlimm(s,3,shift);
3648 if(~a) emit_andimm(s,a,ar);
3649 emit_readword_dualindexedx4(FP,map,map);
3650 }
3651 return map;
3652}
3653int do_tlb_r_branch(int map, int c, u_int addr, int *jaddr)
3654{
3655 if(!c||(signed int)addr>=(signed int)0xC0000000) {
3656 emit_test(map,map);
3657 *jaddr=(int)out;
3658 emit_js(0);
3659 }
3660 return map;
3661}
3662
3663int gen_tlb_addr_r(int ar, int map) {
3664 if(map>=0) {
3665 assem_debug("add %s,%s,%s lsl #2\n",regname[ar],regname[ar],regname[map]);
3666 output_w32(0xe0800100|rd_rn_rm(ar,ar,map));
3667 }
3668}
3669
3670int do_tlb_w(int s,int ar,int map,int x,int c,u_int addr)
3671{
3672 if(c) {
3673 if(addr<0x80800000||addr>=0xC0000000) {
3674 // address_generation already loaded the const
3675 emit_readword_dualindexedx4(FP,map,map);
3676 }
3677 else
3678 return -1; // No mapping
3679 }
3680 else {
3681 assert(s!=map);
3682 emit_movimm(((int)memory_map-(int)&dynarec_local)>>2,map);
3683 emit_addsr12(map,s,map);
3684 // Schedule this while we wait on the load
3685 //if(x) emit_xorimm(s,x,ar);
3686 emit_readword_dualindexedx4(FP,map,map);
3687 }
3688 return map;
3689}
3690int do_tlb_w_branch(int map, int c, u_int addr, int *jaddr)
3691{
3692 if(!c||addr<0x80800000||addr>=0xC0000000) {
3693 emit_testimm(map,0x40000000);
3694 *jaddr=(int)out;
3695 emit_jne(0);
3696 }
3697}
3698
3699int gen_tlb_addr_w(int ar, int map) {
3700 if(map>=0) {
3701 assem_debug("add %s,%s,%s lsl #2\n",regname[ar],regname[ar],regname[map]);
3702 output_w32(0xe0800100|rd_rn_rm(ar,ar,map));
3703 }
3704}
3705
3706// Generate the address of the memory_map entry, relative to dynarec_local
3707generate_map_const(u_int addr,int reg) {
3708 //printf("generate_map_const(%x,%s)\n",addr,regname[reg]);
3709 emit_movimm((addr>>12)+(((u_int)memory_map-(u_int)&dynarec_local)>>2),reg);
3710}
3711
63cb0298 3712#else
3713
3714static int do_tlb_r() { return 0; }
3715static int do_tlb_r_branch() { return 0; }
3716static int gen_tlb_addr_r() { return 0; }
3717static int do_tlb_w() { return 0; }
3718static int do_tlb_w_branch() { return 0; }
3719static int gen_tlb_addr_w() { return 0; }
3720
3721#endif // DISABLE_TLB
3722
57871462 3723/* Special assem */
3724
3725void shift_assemble_arm(int i,struct regstat *i_regs)
3726{
3727 if(rt1[i]) {
3728 if(opcode2[i]<=0x07) // SLLV/SRLV/SRAV
3729 {
3730 signed char s,t,shift;
3731 t=get_reg(i_regs->regmap,rt1[i]);
3732 s=get_reg(i_regs->regmap,rs1[i]);
3733 shift=get_reg(i_regs->regmap,rs2[i]);
3734 if(t>=0){
3735 if(rs1[i]==0)
3736 {
3737 emit_zeroreg(t);
3738 }
3739 else if(rs2[i]==0)
3740 {
3741 assert(s>=0);
3742 if(s!=t) emit_mov(s,t);
3743 }
3744 else
3745 {
3746 emit_andimm(shift,31,HOST_TEMPREG);
3747 if(opcode2[i]==4) // SLLV
3748 {
3749 emit_shl(s,HOST_TEMPREG,t);
3750 }
3751 if(opcode2[i]==6) // SRLV
3752 {
3753 emit_shr(s,HOST_TEMPREG,t);
3754 }
3755 if(opcode2[i]==7) // SRAV
3756 {
3757 emit_sar(s,HOST_TEMPREG,t);
3758 }
3759 }
3760 }
3761 } else { // DSLLV/DSRLV/DSRAV
3762 signed char sh,sl,th,tl,shift;
3763 th=get_reg(i_regs->regmap,rt1[i]|64);
3764 tl=get_reg(i_regs->regmap,rt1[i]);
3765 sh=get_reg(i_regs->regmap,rs1[i]|64);
3766 sl=get_reg(i_regs->regmap,rs1[i]);
3767 shift=get_reg(i_regs->regmap,rs2[i]);
3768 if(tl>=0){
3769 if(rs1[i]==0)
3770 {
3771 emit_zeroreg(tl);
3772 if(th>=0) emit_zeroreg(th);
3773 }
3774 else if(rs2[i]==0)
3775 {
3776 assert(sl>=0);
3777 if(sl!=tl) emit_mov(sl,tl);
3778 if(th>=0&&sh!=th) emit_mov(sh,th);
3779 }
3780 else
3781 {
3782 // FIXME: What if shift==tl ?
3783 assert(shift!=tl);
3784 int temp=get_reg(i_regs->regmap,-1);
3785 int real_th=th;
3786 if(th<0&&opcode2[i]!=0x14) {th=temp;} // DSLLV doesn't need a temporary register
3787 assert(sl>=0);
3788 assert(sh>=0);
3789 emit_andimm(shift,31,HOST_TEMPREG);
3790 if(opcode2[i]==0x14) // DSLLV
3791 {
3792 if(th>=0) emit_shl(sh,HOST_TEMPREG,th);
3793 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3794 emit_orrshr(sl,HOST_TEMPREG,th);
3795 emit_andimm(shift,31,HOST_TEMPREG);
3796 emit_testimm(shift,32);
3797 emit_shl(sl,HOST_TEMPREG,tl);
3798 if(th>=0) emit_cmovne_reg(tl,th);
3799 emit_cmovne_imm(0,tl);
3800 }
3801 if(opcode2[i]==0x16) // DSRLV
3802 {
3803 assert(th>=0);
3804 emit_shr(sl,HOST_TEMPREG,tl);
3805 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3806 emit_orrshl(sh,HOST_TEMPREG,tl);
3807 emit_andimm(shift,31,HOST_TEMPREG);
3808 emit_testimm(shift,32);
3809 emit_shr(sh,HOST_TEMPREG,th);
3810 emit_cmovne_reg(th,tl);
3811 if(real_th>=0) emit_cmovne_imm(0,th);
3812 }
3813 if(opcode2[i]==0x17) // DSRAV
3814 {
3815 assert(th>=0);
3816 emit_shr(sl,HOST_TEMPREG,tl);
3817 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3818 if(real_th>=0) {
3819 assert(temp>=0);
3820 emit_sarimm(th,31,temp);
3821 }
3822 emit_orrshl(sh,HOST_TEMPREG,tl);
3823 emit_andimm(shift,31,HOST_TEMPREG);
3824 emit_testimm(shift,32);
3825 emit_sar(sh,HOST_TEMPREG,th);
3826 emit_cmovne_reg(th,tl);
3827 if(real_th>=0) emit_cmovne_reg(temp,th);
3828 }
3829 }
3830 }
3831 }
3832 }
3833}
ffb0b9e0 3834
3835#ifdef PCSX
3836static void speculate_mov(int rs,int rt)
3837{
3838 if(rt!=0) {
3839 smrv_strong_next|=1<<rt;
3840 smrv[rt]=smrv[rs];
3841 }
3842}
3843
3844static void speculate_mov_weak(int rs,int rt)
3845{
3846 if(rt!=0) {
3847 smrv_weak_next|=1<<rt;
3848 smrv[rt]=smrv[rs];
3849 }
3850}
3851
3852static void speculate_register_values(int i)
3853{
3854 if(i==0) {
3855 memcpy(smrv,psxRegs.GPR.r,sizeof(smrv));
3856 // gp,sp are likely to stay the same throughout the block
3857 smrv_strong_next=(1<<28)|(1<<29)|(1<<30);
3858 smrv_weak_next=~smrv_strong_next;
3859 //printf(" llr %08x\n", smrv[4]);
3860 }
3861 smrv_strong=smrv_strong_next;
3862 smrv_weak=smrv_weak_next;
3863 switch(itype[i]) {
3864 case ALU:
3865 if ((smrv_strong>>rs1[i])&1) speculate_mov(rs1[i],rt1[i]);
3866 else if((smrv_strong>>rs2[i])&1) speculate_mov(rs2[i],rt1[i]);
3867 else if((smrv_weak>>rs1[i])&1) speculate_mov_weak(rs1[i],rt1[i]);
3868 else if((smrv_weak>>rs2[i])&1) speculate_mov_weak(rs2[i],rt1[i]);
3869 else {
3870 smrv_strong_next&=~(1<<rt1[i]);
3871 smrv_weak_next&=~(1<<rt1[i]);
3872 }
3873 break;
3874 case SHIFTIMM:
3875 smrv_strong_next&=~(1<<rt1[i]);
3876 smrv_weak_next&=~(1<<rt1[i]);
3877 // fallthrough
3878 case IMM16:
3879 if(rt1[i]&&is_const(&regs[i],rt1[i])) {
3880 int value,hr=get_reg(regs[i].regmap,rt1[i]);
3881 if(hr>=0) {
3882 if(get_final_value(hr,i,&value))
3883 smrv[rt1[i]]=value;
3884 else smrv[rt1[i]]=constmap[i][hr];
3885 smrv_strong_next|=1<<rt1[i];
3886 }
3887 }
3888 else {
3889 if ((smrv_strong>>rs1[i])&1) speculate_mov(rs1[i],rt1[i]);
3890 else if((smrv_weak>>rs1[i])&1) speculate_mov_weak(rs1[i],rt1[i]);
3891 }
3892 break;
3893 case LOAD:
3894 if(start<0x2000&&(rt1[i]==26||(smrv[rt1[i]]>>24)==0xa0)) {
3895 // special case for BIOS
3896 smrv[rt1[i]]=0xa0000000;
3897 smrv_strong_next|=1<<rt1[i];
3898 break;
3899 }
3900 // fallthrough
3901 case SHIFT:
3902 case LOADLR:
3903 case MOV:
3904 smrv_strong_next&=~(1<<rt1[i]);
3905 smrv_weak_next&=~(1<<rt1[i]);
3906 break;
3907 case COP0:
3908 case COP2:
3909 if(opcode2[i]==0||opcode2[i]==2) { // MFC/CFC
3910 smrv_strong_next&=~(1<<rt1[i]);
3911 smrv_weak_next&=~(1<<rt1[i]);
3912 }
3913 break;
3914 case C2LS:
3915 if (opcode[i]==0x32) { // LWC2
3916 smrv_strong_next&=~(1<<rt1[i]);
3917 smrv_weak_next&=~(1<<rt1[i]);
3918 }
3919 break;
3920 }
3921#if 0
3922 int r=4;
3923 printf("x %08x %08x %d %d c %08x %08x\n",smrv[r],start+i*4,
3924 ((smrv_strong>>r)&1),(smrv_weak>>r)&1,regs[i].isconst,regs[i].wasconst);
3925#endif
3926}
3927
3928enum {
3929 MTYPE_8000 = 0,
3930 MTYPE_8020,
3931 MTYPE_0000,
3932 MTYPE_A000,
3933 MTYPE_1F80,
3934};
3935
3936static int get_ptr_mem_type(u_int a)
3937{
3938 if(a < 0x00200000) {
3939 if(a<0x1000&&((start>>20)==0xbfc||(start>>24)==0xa0))
3940 // return wrong, must use memhandler for BIOS self-test to pass
3941 // 007 does similar stuff from a00 mirror, weird stuff
3942 return MTYPE_8000;
3943 return MTYPE_0000;
3944 }
3945 if(0x1f800000 <= a && a < 0x1f801000)
3946 return MTYPE_1F80;
3947 if(0x80200000 <= a && a < 0x80800000)
3948 return MTYPE_8020;
3949 if(0xa0000000 <= a && a < 0xa0200000)
3950 return MTYPE_A000;
3951 return MTYPE_8000;
3952}
3953#endif
3954
3955static int emit_fastpath_cmp_jump(int i,int addr,int *addr_reg_override)
3956{
3957 int jaddr,type=0;
3958
3959#ifdef PCSX
3960 int mr=rs1[i];
3961 if(((smrv_strong|smrv_weak)>>mr)&1) {
3962 type=get_ptr_mem_type(smrv[mr]);
3963 //printf("set %08x @%08x r%d %d\n", smrv[mr], start+i*4, mr, type);
3964 }
3965 else {
3966 // use the mirror we are running on
3967 type=get_ptr_mem_type(start);
3968 //printf("set nospec @%08x r%d %d\n", start+i*4, mr, type);
3969 }
3970
3971 if(type==MTYPE_8020) { // RAM 80200000+ mirror
3972 emit_andimm(addr,~0x00e00000,HOST_TEMPREG);
3973 addr=*addr_reg_override=HOST_TEMPREG;
3974 type=0;
3975 }
3976 else if(type==MTYPE_0000) { // RAM 0 mirror
3977 emit_orimm(addr,0x80000000,HOST_TEMPREG);
3978 addr=*addr_reg_override=HOST_TEMPREG;
3979 type=0;
3980 }
3981 else if(type==MTYPE_A000) { // RAM A mirror
3982 emit_andimm(addr,~0x20000000,HOST_TEMPREG);
3983 addr=*addr_reg_override=HOST_TEMPREG;
3984 type=0;
3985 }
3986 else if(type==MTYPE_1F80) { // scratchpad
3987 emit_addimm(addr,-0x1f800000,HOST_TEMPREG);
3988 emit_cmpimm(HOST_TEMPREG,0x1000);
3989 jaddr=(int)out;
3990 emit_jc(0);
3991 }
3992#endif
3993
3994 if(type==0)
3995 {
3996 emit_cmpimm(addr,RAM_SIZE);
3997 jaddr=(int)out;
3998 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
3999 // Hint to branch predictor that the branch is unlikely to be taken
4000 if(rs1[i]>=28)
4001 emit_jno_unlikely(0);
4002 else
4003 #endif
4004 emit_jno(0);
a327ad27 4005 if(ram_offset!=0) {
4006 emit_addimm(addr,ram_offset,HOST_TEMPREG);
4007 addr=*addr_reg_override=HOST_TEMPREG;
4008 }
ffb0b9e0 4009 }
4010
4011 return jaddr;
4012}
4013
57871462 4014#define shift_assemble shift_assemble_arm
4015
4016void loadlr_assemble_arm(int i,struct regstat *i_regs)
4017{
4018 int s,th,tl,temp,temp2,addr,map=-1;
4019 int offset;
4020 int jaddr=0;
af4ee1fe 4021 int memtarget=0,c=0;
ffb0b9e0 4022 int fastload_reg_override=0;
57871462 4023 u_int hr,reglist=0;
4024 th=get_reg(i_regs->regmap,rt1[i]|64);
4025 tl=get_reg(i_regs->regmap,rt1[i]);
4026 s=get_reg(i_regs->regmap,rs1[i]);
4027 temp=get_reg(i_regs->regmap,-1);
4028 temp2=get_reg(i_regs->regmap,FTEMP);
4029 addr=get_reg(i_regs->regmap,AGEN1+(i&1));
4030 assert(addr<0);
4031 offset=imm[i];
4032 for(hr=0;hr<HOST_REGS;hr++) {
4033 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
4034 }
4035 reglist|=1<<temp;
4036 if(offset||s<0||c) addr=temp2;
4037 else addr=s;
4038 if(s>=0) {
4039 c=(i_regs->wasconst>>s)&1;
af4ee1fe 4040 if(c) {
4041 memtarget=((signed int)(constmap[i][s]+offset))<(signed int)0x80000000+RAM_SIZE;
4042 if(using_tlb&&((signed int)(constmap[i][s]+offset))>=(signed int)0xC0000000) memtarget=1;
4043 }
57871462 4044 }
535d208a 4045 if(!using_tlb) {
4046 if(!c) {
4047 #ifdef RAM_OFFSET
4048 map=get_reg(i_regs->regmap,ROREG);
4049 if(map<0) emit_loadreg(ROREG,map=HOST_TEMPREG);
4050 #endif
4051 emit_shlimm(addr,3,temp);
4052 if (opcode[i]==0x22||opcode[i]==0x26) {
4053 emit_andimm(addr,0xFFFFFFFC,temp2); // LWL/LWR
57871462 4054 }else{
535d208a 4055 emit_andimm(addr,0xFFFFFFF8,temp2); // LDL/LDR
57871462 4056 }
ffb0b9e0 4057 jaddr=emit_fastpath_cmp_jump(i,temp2,&fastload_reg_override);
535d208a 4058 }
4059 else {
a327ad27 4060 if(ram_offset&&memtarget) {
4061 emit_addimm(temp2,ram_offset,HOST_TEMPREG);
4062 fastload_reg_override=HOST_TEMPREG;
4063 }
535d208a 4064 if (opcode[i]==0x22||opcode[i]==0x26) {
4065 emit_movimm(((constmap[i][s]+offset)<<3)&24,temp); // LWL/LWR
4066 }else{
4067 emit_movimm(((constmap[i][s]+offset)<<3)&56,temp); // LDL/LDR
57871462 4068 }
57871462 4069 }
535d208a 4070 }else{ // using tlb
4071 int a;
4072 if(c) {
4073 a=-1;
4074 }else if (opcode[i]==0x22||opcode[i]==0x26) {
4075 a=0xFFFFFFFC; // LWL/LWR
4076 }else{
4077 a=0xFFFFFFF8; // LDL/LDR
4078 }
4079 map=get_reg(i_regs->regmap,TLREG);
4080 assert(map>=0);
ea3d2e6e 4081 reglist&=~(1<<map);
535d208a 4082 map=do_tlb_r(addr,temp2,map,0,a,c?-1:temp,c,constmap[i][s]+offset);
4083 if(c) {
4084 if (opcode[i]==0x22||opcode[i]==0x26) {
4085 emit_movimm(((constmap[i][s]+offset)<<3)&24,temp); // LWL/LWR
4086 }else{
4087 emit_movimm(((constmap[i][s]+offset)<<3)&56,temp); // LDL/LDR
57871462 4088 }
535d208a 4089 }
4090 do_tlb_r_branch(map,c,constmap[i][s]+offset,&jaddr);
4091 }
4092 if (opcode[i]==0x22||opcode[i]==0x26) { // LWL/LWR
4093 if(!c||memtarget) {
ffb0b9e0 4094 int a=temp2;
4095 if(fastload_reg_override) a=fastload_reg_override;
535d208a 4096 //emit_readword_indexed((int)rdram-0x80000000,temp2,temp2);
ffb0b9e0 4097 emit_readword_indexed_tlb(0,a,map,temp2);
535d208a 4098 if(jaddr) add_stub(LOADW_STUB,jaddr,(int)out,i,temp2,(int)i_regs,ccadj[i],reglist);
4099 }
4100 else
4101 inline_readstub(LOADW_STUB,i,(constmap[i][s]+offset)&0xFFFFFFFC,i_regs->regmap,FTEMP,ccadj[i],reglist);
4102 if(rt1[i]) {
4103 assert(tl>=0);
57871462 4104 emit_andimm(temp,24,temp);
2002a1db 4105#ifdef BIG_ENDIAN_MIPS
4106 if (opcode[i]==0x26) // LWR
4107#else
4108 if (opcode[i]==0x22) // LWL
4109#endif
4110 emit_xorimm(temp,24,temp);
57871462 4111 emit_movimm(-1,HOST_TEMPREG);
4112 if (opcode[i]==0x26) {
4113 emit_shr(temp2,temp,temp2);
4114 emit_bic_lsr(tl,HOST_TEMPREG,temp,tl);
4115 }else{
4116 emit_shl(temp2,temp,temp2);
4117 emit_bic_lsl(tl,HOST_TEMPREG,temp,tl);
4118 }
4119 emit_or(temp2,tl,tl);
57871462 4120 }
535d208a 4121 //emit_storereg(rt1[i],tl); // DEBUG
4122 }
4123 if (opcode[i]==0x1A||opcode[i]==0x1B) { // LDL/LDR
ffb0b9e0 4124 // FIXME: little endian, fastload_reg_override
535d208a 4125 int temp2h=get_reg(i_regs->regmap,FTEMP|64);
4126 if(!c||memtarget) {
4127 //if(th>=0) emit_readword_indexed((int)rdram-0x80000000,temp2,temp2h);
4128 //emit_readword_indexed((int)rdram-0x7FFFFFFC,temp2,temp2);
4129 emit_readdword_indexed_tlb(0,temp2,map,temp2h,temp2);
4130 if(jaddr) add_stub(LOADD_STUB,jaddr,(int)out,i,temp2,(int)i_regs,ccadj[i],reglist);
4131 }
4132 else
4133 inline_readstub(LOADD_STUB,i,(constmap[i][s]+offset)&0xFFFFFFF8,i_regs->regmap,FTEMP,ccadj[i],reglist);
4134 if(rt1[i]) {
4135 assert(th>=0);
4136 assert(tl>=0);
57871462 4137 emit_testimm(temp,32);
4138 emit_andimm(temp,24,temp);
4139 if (opcode[i]==0x1A) { // LDL
4140 emit_rsbimm(temp,32,HOST_TEMPREG);
4141 emit_shl(temp2h,temp,temp2h);
4142 emit_orrshr(temp2,HOST_TEMPREG,temp2h);
4143 emit_movimm(-1,HOST_TEMPREG);
4144 emit_shl(temp2,temp,temp2);
4145 emit_cmove_reg(temp2h,th);
4146 emit_biceq_lsl(tl,HOST_TEMPREG,temp,tl);
4147 emit_bicne_lsl(th,HOST_TEMPREG,temp,th);
4148 emit_orreq(temp2,tl,tl);
4149 emit_orrne(temp2,th,th);
4150 }
4151 if (opcode[i]==0x1B) { // LDR
4152 emit_xorimm(temp,24,temp);
4153 emit_rsbimm(temp,32,HOST_TEMPREG);
4154 emit_shr(temp2,temp,temp2);
4155 emit_orrshl(temp2h,HOST_TEMPREG,temp2);
4156 emit_movimm(-1,HOST_TEMPREG);
4157 emit_shr(temp2h,temp,temp2h);
4158 emit_cmovne_reg(temp2,tl);
4159 emit_bicne_lsr(th,HOST_TEMPREG,temp,th);
4160 emit_biceq_lsr(tl,HOST_TEMPREG,temp,tl);
4161 emit_orrne(temp2h,th,th);
4162 emit_orreq(temp2h,tl,tl);
4163 }
4164 }
4165 }
4166}
4167#define loadlr_assemble loadlr_assemble_arm
4168
4169void cop0_assemble(int i,struct regstat *i_regs)
4170{
4171 if(opcode2[i]==0) // MFC0
4172 {
4173 signed char t=get_reg(i_regs->regmap,rt1[i]);
4174 char copr=(source[i]>>11)&0x1f;
4175 //assert(t>=0); // Why does this happen? OOT is weird
f1b3b369 4176 if(t>=0&&rt1[i]!=0) {
7139f3c8 4177#ifdef MUPEN64
57871462 4178 emit_addimm(FP,(int)&fake_pc-(int)&dynarec_local,0);
4179 emit_movimm((source[i]>>11)&0x1f,1);
4180 emit_writeword(0,(int)&PC);
4181 emit_writebyte(1,(int)&(fake_pc.f.r.nrd));
4182 if(copr==9) {
4183 emit_readword((int)&last_count,ECX);
4184 emit_loadreg(CCREG,HOST_CCREG); // TODO: do proper reg alloc
4185 emit_add(HOST_CCREG,ECX,HOST_CCREG);
2573466a 4186 emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]),HOST_CCREG);
57871462 4187 emit_writeword(HOST_CCREG,(int)&Count);
4188 }
4189 emit_call((int)MFC0);
4190 emit_readword((int)&readmem_dword,t);
7139f3c8 4191#else
4192 emit_readword((int)&reg_cop0+copr*4,t);
4193#endif
57871462 4194 }
4195 }
4196 else if(opcode2[i]==4) // MTC0
4197 {
4198 signed char s=get_reg(i_regs->regmap,rs1[i]);
4199 char copr=(source[i]>>11)&0x1f;
4200 assert(s>=0);
63cb0298 4201#ifdef MUPEN64
57871462 4202 emit_writeword(s,(int)&readmem_dword);
4203 wb_register(rs1[i],i_regs->regmap,i_regs->dirty,i_regs->is32);
4204 emit_addimm(FP,(int)&fake_pc-(int)&dynarec_local,0);
4205 emit_movimm((source[i]>>11)&0x1f,1);
4206 emit_writeword(0,(int)&PC);
4207 emit_writebyte(1,(int)&(fake_pc.f.r.nrd));
63cb0298 4208#else
4209 wb_register(rs1[i],i_regs->regmap,i_regs->dirty,i_regs->is32);
7139f3c8 4210#endif
4211 if(copr==9||copr==11||copr==12||copr==13) {
63cb0298 4212 emit_readword((int)&last_count,HOST_TEMPREG);
57871462 4213 emit_loadreg(CCREG,HOST_CCREG); // TODO: do proper reg alloc
63cb0298 4214 emit_add(HOST_CCREG,HOST_TEMPREG,HOST_CCREG);
2573466a 4215 emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]),HOST_CCREG);
57871462 4216 emit_writeword(HOST_CCREG,(int)&Count);
4217 }
4218 // What a mess. The status register (12) can enable interrupts,
4219 // so needs a special case to handle a pending interrupt.
4220 // The interrupt must be taken immediately, because a subsequent
4221 // instruction might disable interrupts again.
7139f3c8 4222 if(copr==12||copr==13) {
fca1aef2 4223#ifdef PCSX
4224 if (is_delayslot) {
4225 // burn cycles to cause cc_interrupt, which will
4226 // reschedule next_interupt. Relies on CCREG from above.
4227 assem_debug("MTC0 DS %d\n", copr);
4228 emit_writeword(HOST_CCREG,(int)&last_count);
4229 emit_movimm(0,HOST_CCREG);
4230 emit_storereg(CCREG,HOST_CCREG);
caeefe31 4231 emit_loadreg(rs1[i],1);
fca1aef2 4232 emit_movimm(copr,0);
4233 emit_call((int)pcsx_mtc0_ds);
042c7287 4234 emit_loadreg(rs1[i],s);
fca1aef2 4235 return;
4236 }
4237#endif
63cb0298 4238 emit_movimm(start+i*4+4,HOST_TEMPREG);
4239 emit_writeword(HOST_TEMPREG,(int)&pcaddr);
4240 emit_movimm(0,HOST_TEMPREG);
4241 emit_writeword(HOST_TEMPREG,(int)&pending_exception);
57871462 4242 }
4243 //else if(copr==12&&is_delayslot) emit_call((int)MTC0_R12);
4244 //else
fca1aef2 4245#ifdef PCSX
caeefe31 4246 if(s==HOST_CCREG)
4247 emit_loadreg(rs1[i],1);
4248 else if(s!=1)
63cb0298 4249 emit_mov(s,1);
fca1aef2 4250 emit_movimm(copr,0);
4251 emit_call((int)pcsx_mtc0);
4252#else
57871462 4253 emit_call((int)MTC0);
fca1aef2 4254#endif
7139f3c8 4255 if(copr==9||copr==11||copr==12||copr==13) {
57871462 4256 emit_readword((int)&Count,HOST_CCREG);
042c7287 4257 emit_readword((int)&next_interupt,HOST_TEMPREG);
2573466a 4258 emit_addimm(HOST_CCREG,-CLOCK_ADJUST(ccadj[i]),HOST_CCREG);
042c7287 4259 emit_sub(HOST_CCREG,HOST_TEMPREG,HOST_CCREG);
4260 emit_writeword(HOST_TEMPREG,(int)&last_count);
57871462 4261 emit_storereg(CCREG,HOST_CCREG);
4262 }
7139f3c8 4263 if(copr==12||copr==13) {
57871462 4264 assert(!is_delayslot);
4265 emit_readword((int)&pending_exception,14);
042c7287 4266 emit_test(14,14);
4267 emit_jne((int)&do_interrupt);
57871462 4268 }
4269 emit_loadreg(rs1[i],s);
4270 if(get_reg(i_regs->regmap,rs1[i]|64)>=0)
4271 emit_loadreg(rs1[i]|64,get_reg(i_regs->regmap,rs1[i]|64));
57871462 4272 cop1_usable=0;
4273 }
4274 else
4275 {
4276 assert(opcode2[i]==0x10);
3d624f89 4277#ifndef DISABLE_TLB
57871462 4278 if((source[i]&0x3f)==0x01) // TLBR
4279 emit_call((int)TLBR);
4280 if((source[i]&0x3f)==0x02) // TLBWI
4281 emit_call((int)TLBWI_new);
4282 if((source[i]&0x3f)==0x06) { // TLBWR
4283 // The TLB entry written by TLBWR is dependent on the count,
4284 // so update the cycle count
4285 emit_readword((int)&last_count,ECX);
4286 if(i_regs->regmap[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
4287 emit_add(HOST_CCREG,ECX,HOST_CCREG);
2573466a 4288 emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]),HOST_CCREG);
57871462 4289 emit_writeword(HOST_CCREG,(int)&Count);
4290 emit_call((int)TLBWR_new);
4291 }
4292 if((source[i]&0x3f)==0x08) // TLBP
4293 emit_call((int)TLBP);
3d624f89 4294#endif
576bbd8f 4295#ifdef PCSX
4296 if((source[i]&0x3f)==0x10) // RFE
4297 {
4298 emit_readword((int)&Status,0);
4299 emit_andimm(0,0x3c,1);
4300 emit_andimm(0,~0xf,0);
4301 emit_orrshr_imm(1,2,0);
4302 emit_writeword(0,(int)&Status);
4303 }
4304#else
57871462 4305 if((source[i]&0x3f)==0x18) // ERET
4306 {
4307 int count=ccadj[i];
4308 if(i_regs->regmap[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
2573466a 4309 emit_addimm(HOST_CCREG,CLOCK_ADJUST(count),HOST_CCREG); // TODO: Should there be an extra cycle here?
57871462 4310 emit_jmp((int)jump_eret);
4311 }
576bbd8f 4312#endif
57871462 4313 }
4314}
4315
b9b61529 4316static void cop2_get_dreg(u_int copr,signed char tl,signed char temp)
4317{
4318 switch (copr) {
4319 case 1:
4320 case 3:
4321 case 5:
4322 case 8:
4323 case 9:
4324 case 10:
4325 case 11:
4326 emit_readword((int)&reg_cop2d[copr],tl);
4327 emit_signextend16(tl,tl);
4328 emit_writeword(tl,(int)&reg_cop2d[copr]); // hmh
4329 break;
4330 case 7:
4331 case 16:
4332 case 17:
4333 case 18:
4334 case 19:
4335 emit_readword((int)&reg_cop2d[copr],tl);
4336 emit_andimm(tl,0xffff,tl);
4337 emit_writeword(tl,(int)&reg_cop2d[copr]);
4338 break;
4339 case 15:
4340 emit_readword((int)&reg_cop2d[14],tl); // SXY2
4341 emit_writeword(tl,(int)&reg_cop2d[copr]);
4342 break;
4343 case 28:
b9b61529 4344 case 29:
4345 emit_readword((int)&reg_cop2d[9],temp);
4346 emit_testimm(temp,0x8000); // do we need this?
4347 emit_andimm(temp,0xf80,temp);
4348 emit_andne_imm(temp,0,temp);
f70d384d 4349 emit_shrimm(temp,7,tl);
b9b61529 4350 emit_readword((int)&reg_cop2d[10],temp);
4351 emit_testimm(temp,0x8000);
4352 emit_andimm(temp,0xf80,temp);
4353 emit_andne_imm(temp,0,temp);
f70d384d 4354 emit_orrshr_imm(temp,2,tl);
b9b61529 4355 emit_readword((int)&reg_cop2d[11],temp);
4356 emit_testimm(temp,0x8000);
4357 emit_andimm(temp,0xf80,temp);
4358 emit_andne_imm(temp,0,temp);
f70d384d 4359 emit_orrshl_imm(temp,3,tl);
b9b61529 4360 emit_writeword(tl,(int)&reg_cop2d[copr]);
4361 break;
4362 default:
4363 emit_readword((int)&reg_cop2d[copr],tl);
4364 break;
4365 }
4366}
4367
4368static void cop2_put_dreg(u_int copr,signed char sl,signed char temp)
4369{
4370 switch (copr) {
4371 case 15:
4372 emit_readword((int)&reg_cop2d[13],temp); // SXY1
4373 emit_writeword(sl,(int)&reg_cop2d[copr]);
4374 emit_writeword(temp,(int)&reg_cop2d[12]); // SXY0
4375 emit_readword((int)&reg_cop2d[14],temp); // SXY2
4376 emit_writeword(sl,(int)&reg_cop2d[14]);
4377 emit_writeword(temp,(int)&reg_cop2d[13]); // SXY1
4378 break;
4379 case 28:
4380 emit_andimm(sl,0x001f,temp);
f70d384d 4381 emit_shlimm(temp,7,temp);
b9b61529 4382 emit_writeword(temp,(int)&reg_cop2d[9]);
4383 emit_andimm(sl,0x03e0,temp);
f70d384d 4384 emit_shlimm(temp,2,temp);
b9b61529 4385 emit_writeword(temp,(int)&reg_cop2d[10]);
4386 emit_andimm(sl,0x7c00,temp);
f70d384d 4387 emit_shrimm(temp,3,temp);
b9b61529 4388 emit_writeword(temp,(int)&reg_cop2d[11]);
4389 emit_writeword(sl,(int)&reg_cop2d[28]);
4390 break;
4391 case 30:
4392 emit_movs(sl,temp);
4393 emit_mvnmi(temp,temp);
4394 emit_clz(temp,temp);
4395 emit_writeword(sl,(int)&reg_cop2d[30]);
4396 emit_writeword(temp,(int)&reg_cop2d[31]);
4397 break;
b9b61529 4398 case 31:
4399 break;
4400 default:
4401 emit_writeword(sl,(int)&reg_cop2d[copr]);
4402 break;
4403 }
4404}
4405
4406void cop2_assemble(int i,struct regstat *i_regs)
4407{
4408 u_int copr=(source[i]>>11)&0x1f;
4409 signed char temp=get_reg(i_regs->regmap,-1);
4410 if (opcode2[i]==0) { // MFC2
4411 signed char tl=get_reg(i_regs->regmap,rt1[i]);
f1b3b369 4412 if(tl>=0&&rt1[i]!=0)
b9b61529 4413 cop2_get_dreg(copr,tl,temp);
4414 }
4415 else if (opcode2[i]==4) { // MTC2
4416 signed char sl=get_reg(i_regs->regmap,rs1[i]);
4417 cop2_put_dreg(copr,sl,temp);
4418 }
4419 else if (opcode2[i]==2) // CFC2
4420 {
4421 signed char tl=get_reg(i_regs->regmap,rt1[i]);
f1b3b369 4422 if(tl>=0&&rt1[i]!=0)
b9b61529 4423 emit_readword((int)&reg_cop2c[copr],tl);
4424 }
4425 else if (opcode2[i]==6) // CTC2
4426 {
4427 signed char sl=get_reg(i_regs->regmap,rs1[i]);
4428 switch(copr) {
4429 case 4:
4430 case 12:
4431 case 20:
4432 case 26:
4433 case 27:
4434 case 29:
4435 case 30:
4436 emit_signextend16(sl,temp);
4437 break;
4438 case 31:
4439 //value = value & 0x7ffff000;
4440 //if (value & 0x7f87e000) value |= 0x80000000;
4441 emit_shrimm(sl,12,temp);
4442 emit_shlimm(temp,12,temp);
4443 emit_testimm(temp,0x7f000000);
4444 emit_testeqimm(temp,0x00870000);
4445 emit_testeqimm(temp,0x0000e000);
4446 emit_orrne_imm(temp,0x80000000,temp);
4447 break;
4448 default:
4449 temp=sl;
4450 break;
4451 }
4452 emit_writeword(temp,(int)&reg_cop2c[copr]);
4453 assert(sl>=0);
4454 }
4455}
4456
054175e9 4457static void c2op_prologue(u_int op,u_int reglist)
4458{
4459 save_regs_all(reglist);
82ed88eb 4460#ifdef PCNT
4461 emit_movimm(op,0);
4462 emit_call((int)pcnt_gte_start);
4463#endif
054175e9 4464 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0); // cop2 regs
4465}
4466
4467static void c2op_epilogue(u_int op,u_int reglist)
4468{
82ed88eb 4469#ifdef PCNT
4470 emit_movimm(op,0);
4471 emit_call((int)pcnt_gte_end);
4472#endif
054175e9 4473 restore_regs_all(reglist);
4474}
4475
6c0eefaf 4476static void c2op_call_MACtoIR(int lm,int need_flags)
4477{
4478 if(need_flags)
4479 emit_call((int)(lm?gteMACtoIR_lm1:gteMACtoIR_lm0));
4480 else
4481 emit_call((int)(lm?gteMACtoIR_lm1_nf:gteMACtoIR_lm0_nf));
4482}
4483
4484static void c2op_call_rgb_func(void *func,int lm,int need_ir,int need_flags)
4485{
4486 emit_call((int)func);
4487 // func is C code and trashes r0
4488 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
4489 if(need_flags||need_ir)
4490 c2op_call_MACtoIR(lm,need_flags);
4491 emit_call((int)(need_flags?gteMACtoRGB:gteMACtoRGB_nf));
4492}
4493
054175e9 4494static void c2op_assemble(int i,struct regstat *i_regs)
b9b61529 4495{
4496 signed char temp=get_reg(i_regs->regmap,-1);
4497 u_int c2op=source[i]&0x3f;
6c0eefaf 4498 u_int hr,reglist_full=0,reglist;
054175e9 4499 int need_flags,need_ir;
b9b61529 4500 for(hr=0;hr<HOST_REGS;hr++) {
6c0eefaf 4501 if(i_regs->regmap[hr]>=0) reglist_full|=1<<hr;
b9b61529 4502 }
6c0eefaf 4503 reglist=reglist_full&0x100f;
b9b61529 4504
4505 if (gte_handlers[c2op]!=NULL) {
bedfea38 4506 need_flags=!(gte_unneeded[i+1]>>63); // +1 because of how liveness detection works
054175e9 4507 need_ir=(gte_unneeded[i+1]&0xe00)!=0xe00;
cbbd8dd7 4508 assem_debug("gte op %08x, unneeded %016llx, need_flags %d, need_ir %d\n",
4509 source[i],gte_unneeded[i+1],need_flags,need_ir);
0ff8c62c 4510 if(new_dynarec_hacks&NDHACK_GTE_NO_FLAGS)
4511 need_flags=0;
6c0eefaf 4512 int shift = (source[i] >> 19) & 1;
4513 int lm = (source[i] >> 10) & 1;
054175e9 4514 switch(c2op) {
19776aef 4515#ifndef DRC_DBG
054175e9 4516 case GTE_MVMVA: {
054175e9 4517 int v = (source[i] >> 15) & 3;
4518 int cv = (source[i] >> 13) & 3;
4519 int mx = (source[i] >> 17) & 3;
6c0eefaf 4520 reglist=reglist_full&0x10ff; // +{r4-r7}
054175e9 4521 c2op_prologue(c2op,reglist);
4522 /* r4,r5 = VXYZ(v) packed; r6 = &MX11(mx); r7 = &CV1(cv) */
4523 if(v<3)
4524 emit_ldrd(v*8,0,4);
4525 else {
4526 emit_movzwl_indexed(9*4,0,4); // gteIR
4527 emit_movzwl_indexed(10*4,0,6);
4528 emit_movzwl_indexed(11*4,0,5);
4529 emit_orrshl_imm(6,16,4);
4530 }
4531 if(mx<3)
4532 emit_addimm(0,32*4+mx*8*4,6);
4533 else
4534 emit_readword((int)&zeromem_ptr,6);
4535 if(cv<3)
4536 emit_addimm(0,32*4+(cv*8+5)*4,7);
4537 else
4538 emit_readword((int)&zeromem_ptr,7);
4539#ifdef __ARM_NEON__
4540 emit_movimm(source[i],1); // opcode
4541 emit_call((int)gteMVMVA_part_neon);
4542 if(need_flags) {
4543 emit_movimm(lm,1);
4544 emit_call((int)gteMACtoIR_flags_neon);
4545 }
4546#else
4547 if(cv==3&&shift)
4548 emit_call((int)gteMVMVA_part_cv3sh12_arm);
4549 else {
4550 emit_movimm(shift,1);
4551 emit_call((int)(need_flags?gteMVMVA_part_arm:gteMVMVA_part_nf_arm));
4552 }
6c0eefaf 4553 if(need_flags||need_ir)
4554 c2op_call_MACtoIR(lm,need_flags);
054175e9 4555#endif
4556 break;
4557 }
6c0eefaf 4558 case GTE_OP:
4559 c2op_prologue(c2op,reglist);
4560 emit_call((int)(shift?gteOP_part_shift:gteOP_part_noshift));
4561 if(need_flags||need_ir) {
4562 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
4563 c2op_call_MACtoIR(lm,need_flags);
4564 }
4565 break;
4566 case GTE_DPCS:
4567 c2op_prologue(c2op,reglist);
4568 c2op_call_rgb_func(shift?gteDPCS_part_shift:gteDPCS_part_noshift,lm,need_ir,need_flags);
4569 break;
4570 case GTE_INTPL:
4571 c2op_prologue(c2op,reglist);
4572 c2op_call_rgb_func(shift?gteINTPL_part_shift:gteINTPL_part_noshift,lm,need_ir,need_flags);
4573 break;
4574 case GTE_SQR:
4575 c2op_prologue(c2op,reglist);
4576 emit_call((int)(shift?gteSQR_part_shift:gteSQR_part_noshift));
4577 if(need_flags||need_ir) {
4578 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
4579 c2op_call_MACtoIR(lm,need_flags);
4580 }
4581 break;
4582 case GTE_DCPL:
4583 c2op_prologue(c2op,reglist);
4584 c2op_call_rgb_func(gteDCPL_part,lm,need_ir,need_flags);
4585 break;
4586 case GTE_GPF:
4587 c2op_prologue(c2op,reglist);
4588 c2op_call_rgb_func(shift?gteGPF_part_shift:gteGPF_part_noshift,lm,need_ir,need_flags);
4589 break;
4590 case GTE_GPL:
4591 c2op_prologue(c2op,reglist);
4592 c2op_call_rgb_func(shift?gteGPL_part_shift:gteGPL_part_noshift,lm,need_ir,need_flags);
4593 break;
19776aef 4594#endif
054175e9 4595 default:
054175e9 4596 c2op_prologue(c2op,reglist);
19776aef 4597#ifdef DRC_DBG
4598 emit_movimm(source[i],1); // opcode
4599 emit_writeword(1,(int)&psxRegs.code);
4600#endif
054175e9 4601 emit_call((int)(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]));
4602 break;
4603 }
4604 c2op_epilogue(c2op,reglist);
4605 }
b9b61529 4606}
4607
4608void cop1_unusable(int i,struct regstat *i_regs)
3d624f89 4609{
4610 // XXX: should just just do the exception instead
4611 if(!cop1_usable) {
4612 int jaddr=(int)out;
4613 emit_jmp(0);
4614 add_stub(FP_STUB,jaddr,(int)out,i,0,(int)i_regs,is_delayslot,0);
4615 cop1_usable=1;
4616 }
4617}
4618
57871462 4619void cop1_assemble(int i,struct regstat *i_regs)
4620{
3d624f89 4621#ifndef DISABLE_COP1
57871462 4622 // Check cop1 unusable
4623 if(!cop1_usable) {
4624 signed char rs=get_reg(i_regs->regmap,CSREG);
4625 assert(rs>=0);
4626 emit_testimm(rs,0x20000000);
4627 int jaddr=(int)out;
4628 emit_jeq(0);
4629 add_stub(FP_STUB,jaddr,(int)out,i,rs,(int)i_regs,is_delayslot,0);
4630 cop1_usable=1;
4631 }
4632 if (opcode2[i]==0) { // MFC1
4633 signed char tl=get_reg(i_regs->regmap,rt1[i]);
4634 if(tl>=0) {
4635 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],tl);
4636 emit_readword_indexed(0,tl,tl);
4637 }
4638 }
4639 else if (opcode2[i]==1) { // DMFC1
4640 signed char tl=get_reg(i_regs->regmap,rt1[i]);
4641 signed char th=get_reg(i_regs->regmap,rt1[i]|64);
4642 if(tl>=0) {
4643 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],tl);
4644 if(th>=0) emit_readword_indexed(4,tl,th);
4645 emit_readword_indexed(0,tl,tl);
4646 }
4647 }
4648 else if (opcode2[i]==4) { // MTC1
4649 signed char sl=get_reg(i_regs->regmap,rs1[i]);
4650 signed char temp=get_reg(i_regs->regmap,-1);
4651 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4652 emit_writeword_indexed(sl,0,temp);
4653 }
4654 else if (opcode2[i]==5) { // DMTC1
4655 signed char sl=get_reg(i_regs->regmap,rs1[i]);
4656 signed char sh=rs1[i]>0?get_reg(i_regs->regmap,rs1[i]|64):sl;
4657 signed char temp=get_reg(i_regs->regmap,-1);
4658 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
4659 emit_writeword_indexed(sh,4,temp);
4660 emit_writeword_indexed(sl,0,temp);
4661 }
4662 else if (opcode2[i]==2) // CFC1
4663 {
4664 signed char tl=get_reg(i_regs->regmap,rt1[i]);
4665 if(tl>=0) {
4666 u_int copr=(source[i]>>11)&0x1f;
4667 if(copr==0) emit_readword((int)&FCR0,tl);
4668 if(copr==31) emit_readword((int)&FCR31,tl);
4669 }
4670 }
4671 else if (opcode2[i]==6) // CTC1
4672 {
4673 signed char sl=get_reg(i_regs->regmap,rs1[i]);
4674 u_int copr=(source[i]>>11)&0x1f;
4675 assert(sl>=0);
4676 if(copr==31)
4677 {
4678 emit_writeword(sl,(int)&FCR31);
4679 // Set the rounding mode
4680 //FIXME
4681 //char temp=get_reg(i_regs->regmap,-1);
4682 //emit_andimm(sl,3,temp);
4683 //emit_fldcw_indexed((int)&rounding_modes,temp);
4684 }
4685 }
3d624f89 4686#else
4687 cop1_unusable(i, i_regs);
4688#endif
57871462 4689}
4690
4691void fconv_assemble_arm(int i,struct regstat *i_regs)
4692{
3d624f89 4693#ifndef DISABLE_COP1
57871462 4694 signed char temp=get_reg(i_regs->regmap,-1);
4695 assert(temp>=0);
4696 // Check cop1 unusable
4697 if(!cop1_usable) {
4698 signed char rs=get_reg(i_regs->regmap,CSREG);
4699 assert(rs>=0);
4700 emit_testimm(rs,0x20000000);
4701 int jaddr=(int)out;
4702 emit_jeq(0);
4703 add_stub(FP_STUB,jaddr,(int)out,i,rs,(int)i_regs,is_delayslot,0);
4704 cop1_usable=1;
4705 }
4706
4707 #if(defined(__VFP_FP__) && !defined(__SOFTFP__))
4708 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0d) { // trunc_w_s
4709 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4710 emit_flds(temp,15);
4711 emit_ftosizs(15,15); // float->int, truncate
4712 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f))
4713 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
4714 emit_fsts(15,temp);
4715 return;
4716 }
4717 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0d) { // trunc_w_d
4718 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
4719 emit_vldr(temp,7);
4720 emit_ftosizd(7,13); // double->int, truncate
4721 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
4722 emit_fsts(13,temp);
4723 return;
4724 }
4725
4726 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x20) { // cvt_s_w
4727 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4728 emit_flds(temp,13);
4729 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f))
4730 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
4731 emit_fsitos(13,15);
4732 emit_fsts(15,temp);
4733 return;
4734 }
4735 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x21) { // cvt_d_w
4736 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4737 emit_flds(temp,13);
4738 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
4739 emit_fsitod(13,7);
4740 emit_vstr(7,temp);
4741 return;
4742 }
4743
4744 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x21) { // cvt_d_s
4745 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4746 emit_flds(temp,13);
4747 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
4748 emit_fcvtds(13,7);
4749 emit_vstr(7,temp);
4750 return;
4751 }
4752 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x20) { // cvt_s_d
4753 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
4754 emit_vldr(temp,7);
4755 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
4756 emit_fcvtsd(7,13);
4757 emit_fsts(13,temp);
4758 return;
4759 }
4760 #endif
4761
4762 // C emulation code
4763
4764 u_int hr,reglist=0;
4765 for(hr=0;hr<HOST_REGS;hr++) {
4766 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
4767 }
4768 save_regs(reglist);
4769
4770 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x20) {
4771 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4772 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4773 emit_call((int)cvt_s_w);
4774 }
4775 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x21) {
4776 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4777 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4778 emit_call((int)cvt_d_w);
4779 }
4780 if(opcode2[i]==0x15&&(source[i]&0x3f)==0x20) {
4781 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4782 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4783 emit_call((int)cvt_s_l);
4784 }
4785 if(opcode2[i]==0x15&&(source[i]&0x3f)==0x21) {
4786 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4787 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4788 emit_call((int)cvt_d_l);
4789 }
4790
4791 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x21) {
4792 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4793 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4794 emit_call((int)cvt_d_s);
4795 }
4796 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x24) {
4797 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4798 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4799 emit_call((int)cvt_w_s);
4800 }
4801 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x25) {
4802 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4803 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4804 emit_call((int)cvt_l_s);
4805 }
4806
4807 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x20) {
4808 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4809 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4810 emit_call((int)cvt_s_d);
4811 }
4812 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x24) {
4813 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4814 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4815 emit_call((int)cvt_w_d);
4816 }
4817 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x25) {
4818 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4819 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4820 emit_call((int)cvt_l_d);
4821 }
4822
4823 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x08) {
4824 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4825 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4826 emit_call((int)round_l_s);
4827 }
4828 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x09) {
4829 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4830 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4831 emit_call((int)trunc_l_s);
4832 }
4833 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0a) {
4834 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4835 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4836 emit_call((int)ceil_l_s);
4837 }
4838 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0b) {
4839 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4840 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4841 emit_call((int)floor_l_s);
4842 }
4843 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0c) {
4844 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4845 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4846 emit_call((int)round_w_s);
4847 }
4848 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0d) {
4849 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4850 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4851 emit_call((int)trunc_w_s);
4852 }
4853 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0e) {
4854 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4855 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4856 emit_call((int)ceil_w_s);
4857 }
4858 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0f) {
4859 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4860 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4861 emit_call((int)floor_w_s);
4862 }
4863
4864 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x08) {
4865 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4866 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4867 emit_call((int)round_l_d);
4868 }
4869 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x09) {
4870 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4871 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4872 emit_call((int)trunc_l_d);
4873 }
4874 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0a) {
4875 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4876 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4877 emit_call((int)ceil_l_d);
4878 }
4879 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0b) {
4880 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4881 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4882 emit_call((int)floor_l_d);
4883 }
4884 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0c) {
4885 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4886 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4887 emit_call((int)round_w_d);
4888 }
4889 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0d) {
4890 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4891 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4892 emit_call((int)trunc_w_d);
4893 }
4894 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0e) {
4895 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4896 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4897 emit_call((int)ceil_w_d);
4898 }
4899 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0f) {
4900 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4901 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4902 emit_call((int)floor_w_d);
4903 }
4904
4905 restore_regs(reglist);
3d624f89 4906#else
4907 cop1_unusable(i, i_regs);
4908#endif
57871462 4909}
4910#define fconv_assemble fconv_assemble_arm
4911
4912void fcomp_assemble(int i,struct regstat *i_regs)
4913{
3d624f89 4914#ifndef DISABLE_COP1
57871462 4915 signed char fs=get_reg(i_regs->regmap,FSREG);
4916 signed char temp=get_reg(i_regs->regmap,-1);
4917 assert(temp>=0);
4918 // Check cop1 unusable
4919 if(!cop1_usable) {
4920 signed char cs=get_reg(i_regs->regmap,CSREG);
4921 assert(cs>=0);
4922 emit_testimm(cs,0x20000000);
4923 int jaddr=(int)out;
4924 emit_jeq(0);
4925 add_stub(FP_STUB,jaddr,(int)out,i,cs,(int)i_regs,is_delayslot,0);
4926 cop1_usable=1;
4927 }
4928
4929 if((source[i]&0x3f)==0x30) {
4930 emit_andimm(fs,~0x800000,fs);
4931 return;
4932 }
4933
4934 if((source[i]&0x3e)==0x38) {
4935 // sf/ngle - these should throw exceptions for NaNs
4936 emit_andimm(fs,~0x800000,fs);
4937 return;
4938 }
4939
4940 #if(defined(__VFP_FP__) && !defined(__SOFTFP__))
4941 if(opcode2[i]==0x10) {
4942 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4943 emit_readword((int)&reg_cop1_simple[(source[i]>>16)&0x1f],HOST_TEMPREG);
4944 emit_orimm(fs,0x800000,fs);
4945 emit_flds(temp,14);
4946 emit_flds(HOST_TEMPREG,15);
4947 emit_fcmps(14,15);
4948 emit_fmstat();
4949 if((source[i]&0x3f)==0x31) emit_bicvc_imm(fs,0x800000,fs); // c_un_s
4950 if((source[i]&0x3f)==0x32) emit_bicne_imm(fs,0x800000,fs); // c_eq_s
4951 if((source[i]&0x3f)==0x33) {emit_bicne_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ueq_s
4952 if((source[i]&0x3f)==0x34) emit_biccs_imm(fs,0x800000,fs); // c_olt_s
4953 if((source[i]&0x3f)==0x35) {emit_biccs_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ult_s
4954 if((source[i]&0x3f)==0x36) emit_bichi_imm(fs,0x800000,fs); // c_ole_s
4955 if((source[i]&0x3f)==0x37) {emit_bichi_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ule_s
4956 if((source[i]&0x3f)==0x3a) emit_bicne_imm(fs,0x800000,fs); // c_seq_s
4957 if((source[i]&0x3f)==0x3b) emit_bicne_imm(fs,0x800000,fs); // c_ngl_s
4958 if((source[i]&0x3f)==0x3c) emit_biccs_imm(fs,0x800000,fs); // c_lt_s
4959 if((source[i]&0x3f)==0x3d) emit_biccs_imm(fs,0x800000,fs); // c_nge_s
4960 if((source[i]&0x3f)==0x3e) emit_bichi_imm(fs,0x800000,fs); // c_le_s
4961 if((source[i]&0x3f)==0x3f) emit_bichi_imm(fs,0x800000,fs); // c_ngt_s
4962 return;
4963 }
4964 if(opcode2[i]==0x11) {
4965 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
4966 emit_readword((int)&reg_cop1_double[(source[i]>>16)&0x1f],HOST_TEMPREG);
4967 emit_orimm(fs,0x800000,fs);
4968 emit_vldr(temp,6);
4969 emit_vldr(HOST_TEMPREG,7);
4970 emit_fcmpd(6,7);
4971 emit_fmstat();
4972 if((source[i]&0x3f)==0x31) emit_bicvc_imm(fs,0x800000,fs); // c_un_d
4973 if((source[i]&0x3f)==0x32) emit_bicne_imm(fs,0x800000,fs); // c_eq_d
4974 if((source[i]&0x3f)==0x33) {emit_bicne_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ueq_d
4975 if((source[i]&0x3f)==0x34) emit_biccs_imm(fs,0x800000,fs); // c_olt_d
4976 if((source[i]&0x3f)==0x35) {emit_biccs_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ult_d
4977 if((source[i]&0x3f)==0x36) emit_bichi_imm(fs,0x800000,fs); // c_ole_d
4978 if((source[i]&0x3f)==0x37) {emit_bichi_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ule_d
4979 if((source[i]&0x3f)==0x3a) emit_bicne_imm(fs,0x800000,fs); // c_seq_d
4980 if((source[i]&0x3f)==0x3b) emit_bicne_imm(fs,0x800000,fs); // c_ngl_d
4981 if((source[i]&0x3f)==0x3c) emit_biccs_imm(fs,0x800000,fs); // c_lt_d
4982 if((source[i]&0x3f)==0x3d) emit_biccs_imm(fs,0x800000,fs); // c_nge_d
4983 if((source[i]&0x3f)==0x3e) emit_bichi_imm(fs,0x800000,fs); // c_le_d
4984 if((source[i]&0x3f)==0x3f) emit_bichi_imm(fs,0x800000,fs); // c_ngt_d
4985 return;
4986 }
4987 #endif
4988
4989 // C only
4990
4991 u_int hr,reglist=0;
4992 for(hr=0;hr<HOST_REGS;hr++) {
4993 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
4994 }
4995 reglist&=~(1<<fs);
4996 save_regs(reglist);
4997 if(opcode2[i]==0x10) {
4998 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4999 emit_readword((int)&reg_cop1_simple[(source[i]>>16)&0x1f],ARG2_REG);
5000 if((source[i]&0x3f)==0x30) emit_call((int)c_f_s);
5001 if((source[i]&0x3f)==0x31) emit_call((int)c_un_s);
5002 if((source[i]&0x3f)==0x32) emit_call((int)c_eq_s);
5003 if((source[i]&0x3f)==0x33) emit_call((int)c_ueq_s);
5004 if((source[i]&0x3f)==0x34) emit_call((int)c_olt_s);
5005 if((source[i]&0x3f)==0x35) emit_call((int)c_ult_s);
5006 if((source[i]&0x3f)==0x36) emit_call((int)c_ole_s);
5007 if((source[i]&0x3f)==0x37) emit_call((int)c_ule_s);
5008 if((source[i]&0x3f)==0x38) emit_call((int)c_sf_s);
5009 if((source[i]&0x3f)==0x39) emit_call((int)c_ngle_s);
5010 if((source[i]&0x3f)==0x3a) emit_call((int)c_seq_s);
5011 if((source[i]&0x3f)==0x3b) emit_call((int)c_ngl_s);
5012 if((source[i]&0x3f)==0x3c) emit_call((int)c_lt_s);
5013 if((source[i]&0x3f)==0x3d) emit_call((int)c_nge_s);
5014 if((source[i]&0x3f)==0x3e) emit_call((int)c_le_s);
5015 if((source[i]&0x3f)==0x3f) emit_call((int)c_ngt_s);
5016 }
5017 if(opcode2[i]==0x11) {
5018 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
5019 emit_readword((int)&reg_cop1_double[(source[i]>>16)&0x1f],ARG2_REG);
5020 if((source[i]&0x3f)==0x30) emit_call((int)c_f_d);
5021 if((source[i]&0x3f)==0x31) emit_call((int)c_un_d);
5022 if((source[i]&0x3f)==0x32) emit_call((int)c_eq_d);
5023 if((source[i]&0x3f)==0x33) emit_call((int)c_ueq_d);
5024 if((source[i]&0x3f)==0x34) emit_call((int)c_olt_d);
5025 if((source[i]&0x3f)==0x35) emit_call((int)c_ult_d);
5026 if((source[i]&0x3f)==0x36) emit_call((int)c_ole_d);
5027 if((source[i]&0x3f)==0x37) emit_call((int)c_ule_d);
5028 if((source[i]&0x3f)==0x38) emit_call((int)c_sf_d);
5029 if((source[i]&0x3f)==0x39) emit_call((int)c_ngle_d);
5030 if((source[i]&0x3f)==0x3a) emit_call((int)c_seq_d);
5031 if((source[i]&0x3f)==0x3b) emit_call((int)c_ngl_d);
5032 if((source[i]&0x3f)==0x3c) emit_call((int)c_lt_d);
5033 if((source[i]&0x3f)==0x3d) emit_call((int)c_nge_d);
5034 if((source[i]&0x3f)==0x3e) emit_call((int)c_le_d);
5035 if((source[i]&0x3f)==0x3f) emit_call((int)c_ngt_d);
5036 }
5037 restore_regs(reglist);
5038 emit_loadreg(FSREG,fs);
3d624f89 5039#else
5040 cop1_unusable(i, i_regs);
5041#endif
57871462 5042}
5043
5044void float_assemble(int i,struct regstat *i_regs)
5045{
3d624f89 5046#ifndef DISABLE_COP1
57871462 5047 signed char temp=get_reg(i_regs->regmap,-1);
5048 assert(temp>=0);
5049 // Check cop1 unusable
5050 if(!cop1_usable) {
5051 signed char cs=get_reg(i_regs->regmap,CSREG);
5052 assert(cs>=0);
5053 emit_testimm(cs,0x20000000);
5054 int jaddr=(int)out;
5055 emit_jeq(0);
5056 add_stub(FP_STUB,jaddr,(int)out,i,cs,(int)i_regs,is_delayslot,0);
5057 cop1_usable=1;
5058 }
5059
5060 #if(defined(__VFP_FP__) && !defined(__SOFTFP__))
5061 if((source[i]&0x3f)==6) // mov
5062 {
5063 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
5064 if(opcode2[i]==0x10) {
5065 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
5066 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],HOST_TEMPREG);
5067 emit_readword_indexed(0,temp,temp);
5068 emit_writeword_indexed(temp,0,HOST_TEMPREG);
5069 }
5070 if(opcode2[i]==0x11) {
5071 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
5072 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],HOST_TEMPREG);
5073 emit_vldr(temp,7);
5074 emit_vstr(7,HOST_TEMPREG);
5075 }
5076 }
5077 return;
5078 }
5079
5080 if((source[i]&0x3f)>3)
5081 {
5082 if(opcode2[i]==0x10) {
5083 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
5084 emit_flds(temp,15);
5085 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
5086 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
5087 }
5088 if((source[i]&0x3f)==4) // sqrt
5089 emit_fsqrts(15,15);
5090 if((source[i]&0x3f)==5) // abs
5091 emit_fabss(15,15);
5092 if((source[i]&0x3f)==7) // neg
5093 emit_fnegs(15,15);
5094 emit_fsts(15,temp);
5095 }
5096 if(opcode2[i]==0x11) {
5097 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
5098 emit_vldr(temp,7);
5099 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
5100 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
5101 }
5102 if((source[i]&0x3f)==4) // sqrt
5103 emit_fsqrtd(7,7);
5104 if((source[i]&0x3f)==5) // abs
5105 emit_fabsd(7,7);
5106 if((source[i]&0x3f)==7) // neg
5107 emit_fnegd(7,7);
5108 emit_vstr(7,temp);
5109 }
5110 return;
5111 }
5112 if((source[i]&0x3f)<4)
5113 {
5114 if(opcode2[i]==0x10) {
5115 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
5116 }
5117 if(opcode2[i]==0x11) {
5118 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
5119 }
5120 if(((source[i]>>11)&0x1f)!=((source[i]>>16)&0x1f)) {
5121 if(opcode2[i]==0x10) {
5122 emit_readword((int)&reg_cop1_simple[(source[i]>>16)&0x1f],HOST_TEMPREG);
5123 emit_flds(temp,15);
5124 emit_flds(HOST_TEMPREG,13);
5125 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
5126 if(((source[i]>>16)&0x1f)!=((source[i]>>6)&0x1f)) {
5127 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
5128 }
5129 }
5130 if((source[i]&0x3f)==0) emit_fadds(15,13,15);
5131 if((source[i]&0x3f)==1) emit_fsubs(15,13,15);
5132 if((source[i]&0x3f)==2) emit_fmuls(15,13,15);
5133 if((source[i]&0x3f)==3) emit_fdivs(15,13,15);
5134 if(((source[i]>>16)&0x1f)==((source[i]>>6)&0x1f)) {
5135 emit_fsts(15,HOST_TEMPREG);
5136 }else{
5137 emit_fsts(15,temp);
5138 }
5139 }
5140 else if(opcode2[i]==0x11) {
5141 emit_readword((int)&reg_cop1_double[(source[i]>>16)&0x1f],HOST_TEMPREG);
5142 emit_vldr(temp,7);
5143 emit_vldr(HOST_TEMPREG,6);
5144 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
5145 if(((source[i]>>16)&0x1f)!=((source[i]>>6)&0x1f)) {
5146 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
5147 }
5148 }
5149 if((source[i]&0x3f)==0) emit_faddd(7,6,7);
5150 if((source[i]&0x3f)==1) emit_fsubd(7,6,7);
5151 if((source[i]&0x3f)==2) emit_fmuld(7,6,7);
5152 if((source[i]&0x3f)==3) emit_fdivd(7,6,7);
5153 if(((source[i]>>16)&0x1f)==((source[i]>>6)&0x1f)) {
5154 emit_vstr(7,HOST_TEMPREG);
5155 }else{
5156 emit_vstr(7,temp);
5157 }
5158 }
5159 }
5160 else {
5161 if(opcode2[i]==0x10) {
5162 emit_flds(temp,15);
5163 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
5164 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
5165 }
5166 if((source[i]&0x3f)==0) emit_fadds(15,15,15);
5167 if((source[i]&0x3f)==1) emit_fsubs(15,15,15);
5168 if((source[i]&0x3f)==2) emit_fmuls(15,15,15);
5169 if((source[i]&0x3f)==3) emit_fdivs(15,15,15);
5170 emit_fsts(15,temp);
5171 }
5172 else if(opcode2[i]==0x11) {
5173 emit_vldr(temp,7);
5174 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
5175 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
5176 }
5177 if((source[i]&0x3f)==0) emit_faddd(7,7,7);
5178 if((source[i]&0x3f)==1) emit_fsubd(7,7,7);
5179 if((source[i]&0x3f)==2) emit_fmuld(7,7,7);
5180 if((source[i]&0x3f)==3) emit_fdivd(7,7,7);
5181 emit_vstr(7,temp);
5182 }
5183 }
5184 return;
5185 }
5186 #endif
5187
5188 u_int hr,reglist=0;
5189 for(hr=0;hr<HOST_REGS;hr++) {
5190 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
5191 }
5192 if(opcode2[i]==0x10) { // Single precision
5193 save_regs(reglist);
5194 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
5195 if((source[i]&0x3f)<4) {
5196 emit_readword((int)&reg_cop1_simple[(source[i]>>16)&0x1f],ARG2_REG);
5197 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG3_REG);
5198 }else{
5199 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
5200 }
5201 switch(source[i]&0x3f)
5202 {
5203 case 0x00: emit_call((int)add_s);break;
5204 case 0x01: emit_call((int)sub_s);break;
5205 case 0x02: emit_call((int)mul_s);break;
5206 case 0x03: emit_call((int)div_s);break;
5207 case 0x04: emit_call((int)sqrt_s);break;
5208 case 0x05: emit_call((int)abs_s);break;
5209 case 0x06: emit_call((int)mov_s);break;
5210 case 0x07: emit_call((int)neg_s);break;
5211 }
5212 restore_regs(reglist);
5213 }
5214 if(opcode2[i]==0x11) { // Double precision
5215 save_regs(reglist);
5216 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
5217 if((source[i]&0x3f)<4) {
5218 emit_readword((int)&reg_cop1_double[(source[i]>>16)&0x1f],ARG2_REG);
5219 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG3_REG);
5220 }else{
5221 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
5222 }
5223 switch(source[i]&0x3f)
5224 {
5225 case 0x00: emit_call((int)add_d);break;
5226 case 0x01: emit_call((int)sub_d);break;
5227 case 0x02: emit_call((int)mul_d);break;
5228 case 0x03: emit_call((int)div_d);break;
5229 case 0x04: emit_call((int)sqrt_d);break;
5230 case 0x05: emit_call((int)abs_d);break;
5231 case 0x06: emit_call((int)mov_d);break;
5232 case 0x07: emit_call((int)neg_d);break;
5233 }
5234 restore_regs(reglist);
5235 }
3d624f89 5236#else
5237 cop1_unusable(i, i_regs);
5238#endif
57871462 5239}
5240
5241void multdiv_assemble_arm(int i,struct regstat *i_regs)
5242{
5243 // case 0x18: MULT
5244 // case 0x19: MULTU
5245 // case 0x1A: DIV
5246 // case 0x1B: DIVU
5247 // case 0x1C: DMULT
5248 // case 0x1D: DMULTU
5249 // case 0x1E: DDIV
5250 // case 0x1F: DDIVU
5251 if(rs1[i]&&rs2[i])
5252 {
5253 if((opcode2[i]&4)==0) // 32-bit
5254 {
5255 if(opcode2[i]==0x18) // MULT
5256 {
5257 signed char m1=get_reg(i_regs->regmap,rs1[i]);
5258 signed char m2=get_reg(i_regs->regmap,rs2[i]);
5259 signed char hi=get_reg(i_regs->regmap,HIREG);
5260 signed char lo=get_reg(i_regs->regmap,LOREG);
5261 assert(m1>=0);
5262 assert(m2>=0);
5263 assert(hi>=0);
5264 assert(lo>=0);
5265 emit_smull(m1,m2,hi,lo);
5266 }
5267 if(opcode2[i]==0x19) // MULTU
5268 {
5269 signed char m1=get_reg(i_regs->regmap,rs1[i]);
5270 signed char m2=get_reg(i_regs->regmap,rs2[i]);
5271 signed char hi=get_reg(i_regs->regmap,HIREG);
5272 signed char lo=get_reg(i_regs->regmap,LOREG);
5273 assert(m1>=0);
5274 assert(m2>=0);
5275 assert(hi>=0);
5276 assert(lo>=0);
5277 emit_umull(m1,m2,hi,lo);
5278 }
5279 if(opcode2[i]==0x1A) // DIV
5280 {
5281 signed char d1=get_reg(i_regs->regmap,rs1[i]);
5282 signed char d2=get_reg(i_regs->regmap,rs2[i]);
5283 assert(d1>=0);
5284 assert(d2>=0);
5285 signed char quotient=get_reg(i_regs->regmap,LOREG);
5286 signed char remainder=get_reg(i_regs->regmap,HIREG);
5287 assert(quotient>=0);
5288 assert(remainder>=0);
5289 emit_movs(d1,remainder);
44a80f6a 5290 emit_movimm(0xffffffff,quotient);
5291 emit_negmi(quotient,quotient); // .. quotient and ..
5292 emit_negmi(remainder,remainder); // .. remainder for div0 case (will be negated back after jump)
57871462 5293 emit_movs(d2,HOST_TEMPREG);
5294 emit_jeq((int)out+52); // Division by zero
5295 emit_negmi(HOST_TEMPREG,HOST_TEMPREG);
5296 emit_clz(HOST_TEMPREG,quotient);
5297 emit_shl(HOST_TEMPREG,quotient,HOST_TEMPREG);
5298 emit_orimm(quotient,1<<31,quotient);
5299 emit_shr(quotient,quotient,quotient);
5300 emit_cmp(remainder,HOST_TEMPREG);
5301 emit_subcs(remainder,HOST_TEMPREG,remainder);
5302 emit_adcs(quotient,quotient,quotient);
5303 emit_shrimm(HOST_TEMPREG,1,HOST_TEMPREG);
5304 emit_jcc((int)out-16); // -4
5305 emit_teq(d1,d2);
5306 emit_negmi(quotient,quotient);
5307 emit_test(d1,d1);
5308 emit_negmi(remainder,remainder);
5309 }
5310 if(opcode2[i]==0x1B) // DIVU
5311 {
5312 signed char d1=get_reg(i_regs->regmap,rs1[i]); // dividend
5313 signed char d2=get_reg(i_regs->regmap,rs2[i]); // divisor
5314 assert(d1>=0);
5315 assert(d2>=0);
5316 signed char quotient=get_reg(i_regs->regmap,LOREG);
5317 signed char remainder=get_reg(i_regs->regmap,HIREG);
5318 assert(quotient>=0);
5319 assert(remainder>=0);
44a80f6a 5320 emit_mov(d1,remainder);
5321 emit_movimm(0xffffffff,quotient); // div0 case
57871462 5322 emit_test(d2,d2);
44a80f6a 5323 emit_jeq((int)out+40); // Division by zero
57871462 5324 emit_clz(d2,HOST_TEMPREG);
5325 emit_movimm(1<<31,quotient);
5326 emit_shl(d2,HOST_TEMPREG,d2);
57871462 5327 emit_shr(quotient,HOST_TEMPREG,quotient);
5328 emit_cmp(remainder,d2);
5329 emit_subcs(remainder,d2,remainder);
5330 emit_adcs(quotient,quotient,quotient);
5331 emit_shrcc_imm(d2,1,d2);
5332 emit_jcc((int)out-16); // -4
5333 }
5334 }
5335 else // 64-bit
4600ba03 5336#ifndef FORCE32
57871462 5337 {
5338 if(opcode2[i]==0x1C) // DMULT
5339 {
5340 assert(opcode2[i]!=0x1C);
5341 signed char m1h=get_reg(i_regs->regmap,rs1[i]|64);
5342 signed char m1l=get_reg(i_regs->regmap,rs1[i]);
5343 signed char m2h=get_reg(i_regs->regmap,rs2[i]|64);
5344 signed char m2l=get_reg(i_regs->regmap,rs2[i]);
5345 assert(m1h>=0);
5346 assert(m2h>=0);
5347 assert(m1l>=0);
5348 assert(m2l>=0);
5349 emit_pushreg(m2h);
5350 emit_pushreg(m2l);
5351 emit_pushreg(m1h);
5352 emit_pushreg(m1l);
5353 emit_call((int)&mult64);
5354 emit_popreg(m1l);
5355 emit_popreg(m1h);
5356 emit_popreg(m2l);
5357 emit_popreg(m2h);
5358 signed char hih=get_reg(i_regs->regmap,HIREG|64);
5359 signed char hil=get_reg(i_regs->regmap,HIREG);
5360 if(hih>=0) emit_loadreg(HIREG|64,hih);
5361 if(hil>=0) emit_loadreg(HIREG,hil);
5362 signed char loh=get_reg(i_regs->regmap,LOREG|64);
5363 signed char lol=get_reg(i_regs->regmap,LOREG);
5364 if(loh>=0) emit_loadreg(LOREG|64,loh);
5365 if(lol>=0) emit_loadreg(LOREG,lol);
5366 }
5367 if(opcode2[i]==0x1D) // DMULTU
5368 {
5369 signed char m1h=get_reg(i_regs->regmap,rs1[i]|64);
5370 signed char m1l=get_reg(i_regs->regmap,rs1[i]);
5371 signed char m2h=get_reg(i_regs->regmap,rs2[i]|64);
5372 signed char m2l=get_reg(i_regs->regmap,rs2[i]);
5373 assert(m1h>=0);
5374 assert(m2h>=0);
5375 assert(m1l>=0);
5376 assert(m2l>=0);
5377 save_regs(0x100f);
5378 if(m1l!=0) emit_mov(m1l,0);
5379 if(m1h==0) emit_readword((int)&dynarec_local,1);
5380 else if(m1h>1) emit_mov(m1h,1);
5381 if(m2l<2) emit_readword((int)&dynarec_local+m2l*4,2);
5382 else if(m2l>2) emit_mov(m2l,2);
5383 if(m2h<3) emit_readword((int)&dynarec_local+m2h*4,3);
5384 else if(m2h>3) emit_mov(m2h,3);
5385 emit_call((int)&multu64);
5386 restore_regs(0x100f);
5387 signed char hih=get_reg(i_regs->regmap,HIREG|64);
5388 signed char hil=get_reg(i_regs->regmap,HIREG);
5389 signed char loh=get_reg(i_regs->regmap,LOREG|64);
5390 signed char lol=get_reg(i_regs->regmap,LOREG);
5391 /*signed char temp=get_reg(i_regs->regmap,-1);
5392 signed char rh=get_reg(i_regs->regmap,HIREG|64);
5393 signed char rl=get_reg(i_regs->regmap,HIREG);
5394 assert(m1h>=0);
5395 assert(m2h>=0);
5396 assert(m1l>=0);
5397 assert(m2l>=0);
5398 assert(temp>=0);
5399 //emit_mov(m1l,EAX);
5400 //emit_mul(m2l);
5401 emit_umull(rl,rh,m1l,m2l);
5402 emit_storereg(LOREG,rl);
5403 emit_mov(rh,temp);
5404 //emit_mov(m1h,EAX);
5405 //emit_mul(m2l);
5406 emit_umull(rl,rh,m1h,m2l);
5407 emit_adds(rl,temp,temp);
5408 emit_adcimm(rh,0,rh);
5409 emit_storereg(HIREG,rh);
5410 //emit_mov(m2h,EAX);
5411 //emit_mul(m1l);
5412 emit_umull(rl,rh,m1l,m2h);
5413 emit_adds(rl,temp,temp);
5414 emit_adcimm(rh,0,rh);
5415 emit_storereg(LOREG|64,temp);
5416 emit_mov(rh,temp);
5417 //emit_mov(m2h,EAX);
5418 //emit_mul(m1h);
5419 emit_umull(rl,rh,m1h,m2h);
5420 emit_adds(rl,temp,rl);
5421 emit_loadreg(HIREG,temp);
5422 emit_adcimm(rh,0,rh);
5423 emit_adds(rl,temp,rl);
5424 emit_adcimm(rh,0,rh);
5425 // DEBUG
5426 /*
5427 emit_pushreg(m2h);
5428 emit_pushreg(m2l);
5429 emit_pushreg(m1h);
5430 emit_pushreg(m1l);
5431 emit_call((int)&multu64);
5432 emit_popreg(m1l);
5433 emit_popreg(m1h);
5434 emit_popreg(m2l);
5435 emit_popreg(m2h);
5436 signed char hih=get_reg(i_regs->regmap,HIREG|64);
5437 signed char hil=get_reg(i_regs->regmap,HIREG);
5438 if(hih>=0) emit_loadreg(HIREG|64,hih); // DEBUG
5439 if(hil>=0) emit_loadreg(HIREG,hil); // DEBUG
5440 */
5441 // Shouldn't be necessary
5442 //char loh=get_reg(i_regs->regmap,LOREG|64);
5443 //char lol=get_reg(i_regs->regmap,LOREG);
5444 //if(loh>=0) emit_loadreg(LOREG|64,loh);
5445 //if(lol>=0) emit_loadreg(LOREG,lol);
5446 }
5447 if(opcode2[i]==0x1E) // DDIV
5448 {
5449 signed char d1h=get_reg(i_regs->regmap,rs1[i]|64);
5450 signed char d1l=get_reg(i_regs->regmap,rs1[i]);
5451 signed char d2h=get_reg(i_regs->regmap,rs2[i]|64);
5452 signed char d2l=get_reg(i_regs->regmap,rs2[i]);
5453 assert(d1h>=0);
5454 assert(d2h>=0);
5455 assert(d1l>=0);
5456 assert(d2l>=0);
5457 save_regs(0x100f);
5458 if(d1l!=0) emit_mov(d1l,0);
5459 if(d1h==0) emit_readword((int)&dynarec_local,1);
5460 else if(d1h>1) emit_mov(d1h,1);
5461 if(d2l<2) emit_readword((int)&dynarec_local+d2l*4,2);
5462 else if(d2l>2) emit_mov(d2l,2);
5463 if(d2h<3) emit_readword((int)&dynarec_local+d2h*4,3);
5464 else if(d2h>3) emit_mov(d2h,3);
5465 emit_call((int)&div64);
5466 restore_regs(0x100f);
5467 signed char hih=get_reg(i_regs->regmap,HIREG|64);
5468 signed char hil=get_reg(i_regs->regmap,HIREG);
5469 signed char loh=get_reg(i_regs->regmap,LOREG|64);
5470 signed char lol=get_reg(i_regs->regmap,LOREG);
5471 if(hih>=0) emit_loadreg(HIREG|64,hih);
5472 if(hil>=0) emit_loadreg(HIREG,hil);
5473 if(loh>=0) emit_loadreg(LOREG|64,loh);
5474 if(lol>=0) emit_loadreg(LOREG,lol);
5475 }
5476 if(opcode2[i]==0x1F) // DDIVU
5477 {
5478 //u_int hr,reglist=0;
5479 //for(hr=0;hr<HOST_REGS;hr++) {
5480 // if(i_regs->regmap[hr]>=0 && (i_regs->regmap[hr]&62)!=HIREG) reglist|=1<<hr;
5481 //}
5482 signed char d1h=get_reg(i_regs->regmap,rs1[i]|64);
5483 signed char d1l=get_reg(i_regs->regmap,rs1[i]);
5484 signed char d2h=get_reg(i_regs->regmap,rs2[i]|64);
5485 signed char d2l=get_reg(i_regs->regmap,rs2[i]);
5486 assert(d1h>=0);
5487 assert(d2h>=0);
5488 assert(d1l>=0);
5489 assert(d2l>=0);
5490 save_regs(0x100f);
5491 if(d1l!=0) emit_mov(d1l,0);
5492 if(d1h==0) emit_readword((int)&dynarec_local,1);
5493 else if(d1h>1) emit_mov(d1h,1);
5494 if(d2l<2) emit_readword((int)&dynarec_local+d2l*4,2);
5495 else if(d2l>2) emit_mov(d2l,2);
5496 if(d2h<3) emit_readword((int)&dynarec_local+d2h*4,3);
5497 else if(d2h>3) emit_mov(d2h,3);
5498 emit_call((int)&divu64);
5499 restore_regs(0x100f);
5500 signed char hih=get_reg(i_regs->regmap,HIREG|64);
5501 signed char hil=get_reg(i_regs->regmap,HIREG);
5502 signed char loh=get_reg(i_regs->regmap,LOREG|64);
5503 signed char lol=get_reg(i_regs->regmap,LOREG);
5504 if(hih>=0) emit_loadreg(HIREG|64,hih);
5505 if(hil>=0) emit_loadreg(HIREG,hil);
5506 if(loh>=0) emit_loadreg(LOREG|64,loh);
5507 if(lol>=0) emit_loadreg(LOREG,lol);
5508 }
5509 }
4600ba03 5510#else
5511 assert(0);
5512#endif
57871462 5513 }
5514 else
5515 {
5516 // Multiply by zero is zero.
5517 // MIPS does not have a divide by zero exception.
5518 // The result is undefined, we return zero.
5519 signed char hr=get_reg(i_regs->regmap,HIREG);
5520 signed char lr=get_reg(i_regs->regmap,LOREG);
5521 if(hr>=0) emit_zeroreg(hr);
5522 if(lr>=0) emit_zeroreg(lr);
5523 }
5524}
5525#define multdiv_assemble multdiv_assemble_arm
5526
5527void do_preload_rhash(int r) {
5528 // Don't need this for ARM. On x86, this puts the value 0xf8 into the
5529 // register. On ARM the hash can be done with a single instruction (below)
5530}
5531
5532void do_preload_rhtbl(int ht) {
5533 emit_addimm(FP,(int)&mini_ht-(int)&dynarec_local,ht);
5534}
5535
5536void do_rhash(int rs,int rh) {
5537 emit_andimm(rs,0xf8,rh);
5538}
5539
5540void do_miniht_load(int ht,int rh) {
5541 assem_debug("ldr %s,[%s,%s]!\n",regname[rh],regname[ht],regname[rh]);
5542 output_w32(0xe7b00000|rd_rn_rm(rh,ht,rh));
5543}
5544
5545void do_miniht_jump(int rs,int rh,int ht) {
5546 emit_cmp(rh,rs);
5547 emit_ldreq_indexed(ht,4,15);
5548 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
5549 emit_mov(rs,7);
5550 emit_jmp(jump_vaddr_reg[7]);
5551 #else
5552 emit_jmp(jump_vaddr_reg[rs]);
5553 #endif
5554}
5555
5556void do_miniht_insert(u_int return_address,int rt,int temp) {
5557 #ifdef ARMv5_ONLY
5558 emit_movimm(return_address,rt); // PC into link register
5559 add_to_linker((int)out,return_address,1);
5560 emit_pcreladdr(temp);
5561 emit_writeword(rt,(int)&mini_ht[(return_address&0xFF)>>3][0]);
5562 emit_writeword(temp,(int)&mini_ht[(return_address&0xFF)>>3][1]);
5563 #else
5564 emit_movw(return_address&0x0000FFFF,rt);
5565 add_to_linker((int)out,return_address,1);
5566 emit_pcreladdr(temp);
5567 emit_writeword(temp,(int)&mini_ht[(return_address&0xFF)>>3][1]);
5568 emit_movt(return_address&0xFFFF0000,rt);
5569 emit_writeword(rt,(int)&mini_ht[(return_address&0xFF)>>3][0]);
5570 #endif
5571}
5572
5573// Sign-extend to 64 bits and write out upper half of a register
5574// This is useful where we have a 32-bit value in a register, and want to
5575// keep it in a 32-bit register, but can't guarantee that it won't be read
5576// as a 64-bit value later.
5577void wb_sx(signed char pre[],signed char entry[],uint64_t dirty,uint64_t is32_pre,uint64_t is32,uint64_t u,uint64_t uu)
5578{
24385cae 5579#ifndef FORCE32
57871462 5580 if(is32_pre==is32) return;
5581 int hr,reg;
5582 for(hr=0;hr<HOST_REGS;hr++) {
5583 if(hr!=EXCLUDE_REG) {
5584 //if(pre[hr]==entry[hr]) {
5585 if((reg=pre[hr])>=0) {
5586 if((dirty>>hr)&1) {
5587 if( ((is32_pre&~is32&~uu)>>reg)&1 ) {
5588 emit_sarimm(hr,31,HOST_TEMPREG);
5589 emit_storereg(reg|64,HOST_TEMPREG);
5590 }
5591 }
5592 }
5593 //}
5594 }
5595 }
24385cae 5596#endif
57871462 5597}
5598
5599void wb_valid(signed char pre[],signed char entry[],u_int dirty_pre,u_int dirty,uint64_t is32_pre,uint64_t u,uint64_t uu)
5600{
5601 //if(dirty_pre==dirty) return;
5602 int hr,reg,new_hr;
5603 for(hr=0;hr<HOST_REGS;hr++) {
5604 if(hr!=EXCLUDE_REG) {
5605 reg=pre[hr];
5606 if(((~u)>>(reg&63))&1) {
f776eb14 5607 if(reg>0) {
57871462 5608 if(((dirty_pre&~dirty)>>hr)&1) {
5609 if(reg>0&&reg<34) {
5610 emit_storereg(reg,hr);
5611 if( ((is32_pre&~uu)>>reg)&1 ) {
5612 emit_sarimm(hr,31,HOST_TEMPREG);
5613 emit_storereg(reg|64,HOST_TEMPREG);
5614 }
5615 }
5616 else if(reg>=64) {
5617 emit_storereg(reg,hr);
5618 }
5619 }
5620 }
57871462 5621 }
5622 }
5623 }
5624}
5625
5626
5627/* using strd could possibly help but you'd have to allocate registers in pairs
5628void wb_invalidate_arm(signed char pre[],signed char entry[],uint64_t dirty,uint64_t is32,uint64_t u,uint64_t uu)
5629{
5630 int hr;
5631 int wrote=-1;
5632 for(hr=HOST_REGS-1;hr>=0;hr--) {
5633 if(hr!=EXCLUDE_REG) {
5634 if(pre[hr]!=entry[hr]) {
5635 if(pre[hr]>=0) {
5636 if((dirty>>hr)&1) {
5637 if(get_reg(entry,pre[hr])<0) {
5638 if(pre[hr]<64) {
5639 if(!((u>>pre[hr])&1)) {
5640 if(hr<10&&(~hr&1)&&(pre[hr+1]<0||wrote==hr+1)) {
5641 if( ((is32>>pre[hr])&1) && !((uu>>pre[hr])&1) ) {
5642 emit_sarimm(hr,31,hr+1);
5643 emit_strdreg(pre[hr],hr);
5644 }
5645 else
5646 emit_storereg(pre[hr],hr);
5647 }else{
5648 emit_storereg(pre[hr],hr);
5649 if( ((is32>>pre[hr])&1) && !((uu>>pre[hr])&1) ) {
5650 emit_sarimm(hr,31,hr);
5651 emit_storereg(pre[hr]|64,hr);
5652 }
5653 }
5654 }
5655 }else{
5656 if(!((uu>>(pre[hr]&63))&1) && !((is32>>(pre[hr]&63))&1)) {
5657 emit_storereg(pre[hr],hr);
5658 }
5659 }
5660 wrote=hr;
5661 }
5662 }
5663 }
5664 }
5665 }
5666 }
5667 for(hr=0;hr<HOST_REGS;hr++) {
5668 if(hr!=EXCLUDE_REG) {
5669 if(pre[hr]!=entry[hr]) {
5670 if(pre[hr]>=0) {
5671 int nr;
5672 if((nr=get_reg(entry,pre[hr]))>=0) {
5673 emit_mov(hr,nr);
5674 }
5675 }
5676 }
5677 }
5678 }
5679}
5680#define wb_invalidate wb_invalidate_arm
5681*/
5682
dd3a91a1 5683// Clearing the cache is rather slow on ARM Linux, so mark the areas
5684// that need to be cleared, and then only clear these areas once.
5685void do_clear_cache()
5686{
5687 int i,j;
5688 for (i=0;i<(1<<(TARGET_SIZE_2-17));i++)
5689 {
5690 u_int bitmap=needs_clear_cache[i];
5691 if(bitmap) {
5692 u_int start,end;
5693 for(j=0;j<32;j++)
5694 {
5695 if(bitmap&(1<<j)) {
bdeade46 5696 start=(u_int)BASE_ADDR+i*131072+j*4096;
dd3a91a1 5697 end=start+4095;
5698 j++;
5699 while(j<32) {
5700 if(bitmap&(1<<j)) {
5701 end+=4096;
5702 j++;
5703 }else{
5704 __clear_cache((void *)start,(void *)end);
5705 break;
5706 }
5707 }
5708 }
5709 }
5710 needs_clear_cache[i]=0;
5711 }
5712 }
5713}
5714
57871462 5715// CPU-architecture-specific initialization
5716void arch_init() {
3d624f89 5717#ifndef DISABLE_COP1
57871462 5718 rounding_modes[0]=0x0<<22; // round
5719 rounding_modes[1]=0x3<<22; // trunc
5720 rounding_modes[2]=0x1<<22; // ceil
5721 rounding_modes[3]=0x2<<22; // floor
3d624f89 5722#endif
57871462 5723}
b9b61529 5724
5725// vim:shiftwidth=2:expandtab