frontend: merge updates from SDL project
[pcsx_rearmed.git] / libpcsxcore / new_dynarec / assem_arm.c
CommitLineData
57871462 1/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
c6c3b1b3 2 * Mupen64plus/PCSX - assem_arm.c *
20d507ba 3 * Copyright (C) 2009-2011 Ari64 *
c6c3b1b3 4 * Copyright (C) 2010-2011 GraÅžvydas "notaz" Ignotas *
57871462 5 * *
6 * This program is free software; you can redistribute it and/or modify *
7 * it under the terms of the GNU General Public License as published by *
8 * the Free Software Foundation; either version 2 of the License, or *
9 * (at your option) any later version. *
10 * *
11 * This program is distributed in the hope that it will be useful, *
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
14 * GNU General Public License for more details. *
15 * *
16 * You should have received a copy of the GNU General Public License *
17 * along with this program; if not, write to the *
18 * Free Software Foundation, Inc., *
19 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
20 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
21
054175e9 22#ifdef PCSX
6c0eefaf 23#include "../gte.h"
24#define FLAGLESS
25#include "../gte.h"
26#undef FLAGLESS
054175e9 27#include "../gte_arm.h"
28#include "../gte_neon.h"
29#include "pcnt.h"
30#endif
31
bdeade46 32#ifndef BASE_ADDR_FIXED
33char translation_cache[1 << TARGET_SIZE_2] __attribute__((aligned(4096)));
34#endif
35
57871462 36extern int cycle_count;
37extern int last_count;
38extern int pcaddr;
39extern int pending_exception;
40extern int branch_target;
41extern uint64_t readmem_dword;
3d624f89 42#ifdef MUPEN64
57871462 43extern precomp_instr fake_pc;
3d624f89 44#endif
57871462 45extern void *dynarec_local;
46extern u_int memory_map[1048576];
47extern u_int mini_ht[32][2];
48extern u_int rounding_modes[4];
49
50void indirect_jump_indexed();
51void indirect_jump();
52void do_interrupt();
53void jump_vaddr_r0();
54void jump_vaddr_r1();
55void jump_vaddr_r2();
56void jump_vaddr_r3();
57void jump_vaddr_r4();
58void jump_vaddr_r5();
59void jump_vaddr_r6();
60void jump_vaddr_r7();
61void jump_vaddr_r8();
62void jump_vaddr_r9();
63void jump_vaddr_r10();
64void jump_vaddr_r12();
65
66const u_int jump_vaddr_reg[16] = {
67 (int)jump_vaddr_r0,
68 (int)jump_vaddr_r1,
69 (int)jump_vaddr_r2,
70 (int)jump_vaddr_r3,
71 (int)jump_vaddr_r4,
72 (int)jump_vaddr_r5,
73 (int)jump_vaddr_r6,
74 (int)jump_vaddr_r7,
75 (int)jump_vaddr_r8,
76 (int)jump_vaddr_r9,
77 (int)jump_vaddr_r10,
78 0,
79 (int)jump_vaddr_r12,
80 0,
81 0,
82 0};
83
0bbd1454 84void invalidate_addr_r0();
85void invalidate_addr_r1();
86void invalidate_addr_r2();
87void invalidate_addr_r3();
88void invalidate_addr_r4();
89void invalidate_addr_r5();
90void invalidate_addr_r6();
91void invalidate_addr_r7();
92void invalidate_addr_r8();
93void invalidate_addr_r9();
94void invalidate_addr_r10();
95void invalidate_addr_r12();
96
97const u_int invalidate_addr_reg[16] = {
98 (int)invalidate_addr_r0,
99 (int)invalidate_addr_r1,
100 (int)invalidate_addr_r2,
101 (int)invalidate_addr_r3,
102 (int)invalidate_addr_r4,
103 (int)invalidate_addr_r5,
104 (int)invalidate_addr_r6,
105 (int)invalidate_addr_r7,
106 (int)invalidate_addr_r8,
107 (int)invalidate_addr_r9,
108 (int)invalidate_addr_r10,
109 0,
110 (int)invalidate_addr_r12,
111 0,
112 0,
113 0};
114
57871462 115#include "fpu.h"
116
dd3a91a1 117unsigned int needs_clear_cache[1<<(TARGET_SIZE_2-17)];
118
57871462 119/* Linker */
120
121void set_jump_target(int addr,u_int target)
122{
123 u_char *ptr=(u_char *)addr;
124 u_int *ptr2=(u_int *)ptr;
125 if(ptr[3]==0xe2) {
126 assert((target-(u_int)ptr2-8)<1024);
127 assert((addr&3)==0);
128 assert((target&3)==0);
129 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
130 //printf("target=%x addr=%x insn=%x\n",target,addr,*ptr2);
131 }
132 else if(ptr[3]==0x72) {
133 // generated by emit_jno_unlikely
134 if((target-(u_int)ptr2-8)<1024) {
135 assert((addr&3)==0);
136 assert((target&3)==0);
137 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
138 }
139 else if((target-(u_int)ptr2-8)<4096&&!((target-(u_int)ptr2-8)&15)) {
140 assert((addr&3)==0);
141 assert((target&3)==0);
142 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>4)|0xE00;
143 }
144 else *ptr2=(0x7A000000)|(((target-(u_int)ptr2-8)<<6)>>8);
145 }
146 else {
147 assert((ptr[3]&0x0e)==0xa);
148 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
149 }
150}
151
152// This optionally copies the instruction from the target of the branch into
153// the space before the branch. Works, but the difference in speed is
154// usually insignificant.
155void set_jump_target_fillslot(int addr,u_int target,int copy)
156{
157 u_char *ptr=(u_char *)addr;
158 u_int *ptr2=(u_int *)ptr;
159 assert(!copy||ptr2[-1]==0xe28dd000);
160 if(ptr[3]==0xe2) {
161 assert(!copy);
162 assert((target-(u_int)ptr2-8)<4096);
163 *ptr2=(*ptr2&0xFFFFF000)|(target-(u_int)ptr2-8);
164 }
165 else {
166 assert((ptr[3]&0x0e)==0xa);
167 u_int target_insn=*(u_int *)target;
168 if((target_insn&0x0e100000)==0) { // ALU, no immediate, no flags
169 copy=0;
170 }
171 if((target_insn&0x0c100000)==0x04100000) { // Load
172 copy=0;
173 }
174 if(target_insn&0x08000000) {
175 copy=0;
176 }
177 if(copy) {
178 ptr2[-1]=target_insn;
179 target+=4;
180 }
181 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
182 }
183}
184
185/* Literal pool */
186add_literal(int addr,int val)
187{
15776b68 188 assert(literalcount<sizeof(literals)/sizeof(literals[0]));
57871462 189 literals[literalcount][0]=addr;
190 literals[literalcount][1]=val;
191 literalcount++;
192}
193
f76eeef9 194void *kill_pointer(void *stub)
57871462 195{
196 int *ptr=(int *)(stub+4);
197 assert((*ptr&0x0ff00000)==0x05900000);
198 u_int offset=*ptr&0xfff;
199 int **l_ptr=(void *)ptr+offset+8;
200 int *i_ptr=*l_ptr;
201 set_jump_target((int)i_ptr,(int)stub);
f76eeef9 202 return i_ptr;
57871462 203}
204
f968d35d 205// find where external branch is liked to using addr of it's stub:
206// get address that insn one after stub loads (dyna_linker arg1),
207// treat it as a pointer to branch insn,
208// return addr where that branch jumps to
57871462 209int get_pointer(void *stub)
210{
211 //printf("get_pointer(%x)\n",(int)stub);
212 int *ptr=(int *)(stub+4);
f968d35d 213 assert((*ptr&0x0fff0000)==0x059f0000);
57871462 214 u_int offset=*ptr&0xfff;
215 int **l_ptr=(void *)ptr+offset+8;
216 int *i_ptr=*l_ptr;
217 assert((*i_ptr&0x0f000000)==0x0a000000);
218 return (int)i_ptr+((*i_ptr<<8)>>6)+8;
219}
220
221// Find the "clean" entry point from a "dirty" entry point
222// by skipping past the call to verify_code
223u_int get_clean_addr(int addr)
224{
225 int *ptr=(int *)addr;
226 #ifdef ARMv5_ONLY
227 ptr+=4;
228 #else
229 ptr+=6;
230 #endif
231 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
232 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
233 ptr++;
234 if((*ptr&0xFF000000)==0xea000000) {
235 return (int)ptr+((*ptr<<8)>>6)+8; // follow jump
236 }
237 return (u_int)ptr;
238}
239
240int verify_dirty(int addr)
241{
242 u_int *ptr=(u_int *)addr;
243 #ifdef ARMv5_ONLY
244 // get from literal pool
15776b68 245 assert((*ptr&0xFFFF0000)==0xe59f0000);
57871462 246 u_int offset=*ptr&0xfff;
247 u_int *l_ptr=(void *)ptr+offset+8;
248 u_int source=l_ptr[0];
249 u_int copy=l_ptr[1];
250 u_int len=l_ptr[2];
251 ptr+=4;
252 #else
253 // ARMv7 movw/movt
254 assert((*ptr&0xFFF00000)==0xe3000000);
255 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
256 u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
257 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
258 ptr+=6;
259 #endif
260 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
261 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
63cb0298 262#ifndef DISABLE_TLB
cfcba99a 263 u_int verifier=(int)ptr+((signed int)(*ptr<<8)>>6)+8; // get target of bl
57871462 264 if(verifier==(u_int)verify_code_vm||verifier==(u_int)verify_code_ds) {
265 unsigned int page=source>>12;
266 unsigned int map_value=memory_map[page];
267 if(map_value>=0x80000000) return 0;
268 while(page<((source+len-1)>>12)) {
269 if((memory_map[++page]<<2)!=(map_value<<2)) return 0;
270 }
271 source = source+(map_value<<2);
272 }
63cb0298 273#endif
57871462 274 //printf("verify_dirty: %x %x %x\n",source,copy,len);
275 return !memcmp((void *)source,(void *)copy,len);
276}
277
278// This doesn't necessarily find all clean entry points, just
279// guarantees that it's not dirty
280int isclean(int addr)
281{
282 #ifdef ARMv5_ONLY
283 int *ptr=((u_int *)addr)+4;
284 #else
285 int *ptr=((u_int *)addr)+6;
286 #endif
287 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
288 if((*ptr&0xFF000000)!=0xeb000000) return 1; // bl instruction
289 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code) return 0;
290 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_vm) return 0;
291 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_ds) return 0;
292 return 1;
293}
294
295void get_bounds(int addr,u_int *start,u_int *end)
296{
297 u_int *ptr=(u_int *)addr;
298 #ifdef ARMv5_ONLY
299 // get from literal pool
15776b68 300 assert((*ptr&0xFFFF0000)==0xe59f0000);
57871462 301 u_int offset=*ptr&0xfff;
302 u_int *l_ptr=(void *)ptr+offset+8;
303 u_int source=l_ptr[0];
304 //u_int copy=l_ptr[1];
305 u_int len=l_ptr[2];
306 ptr+=4;
307 #else
308 // ARMv7 movw/movt
309 assert((*ptr&0xFFF00000)==0xe3000000);
310 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
311 //u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
312 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
313 ptr+=6;
314 #endif
315 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
316 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
63cb0298 317#ifndef DISABLE_TLB
cfcba99a 318 u_int verifier=(int)ptr+((signed int)(*ptr<<8)>>6)+8; // get target of bl
57871462 319 if(verifier==(u_int)verify_code_vm||verifier==(u_int)verify_code_ds) {
320 if(memory_map[source>>12]>=0x80000000) source = 0;
321 else source = source+(memory_map[source>>12]<<2);
322 }
63cb0298 323#endif
57871462 324 *start=source;
325 *end=source+len;
326}
327
328/* Register allocation */
329
330// Note: registers are allocated clean (unmodified state)
331// if you intend to modify the register, you must call dirty_reg().
332void alloc_reg(struct regstat *cur,int i,signed char reg)
333{
334 int r,hr;
335 int preferred_reg = (reg&7);
336 if(reg==CCREG) preferred_reg=HOST_CCREG;
337 if(reg==PTEMP||reg==FTEMP) preferred_reg=12;
338
339 // Don't allocate unused registers
340 if((cur->u>>reg)&1) return;
341
342 // see if it's already allocated
343 for(hr=0;hr<HOST_REGS;hr++)
344 {
345 if(cur->regmap[hr]==reg) return;
346 }
347
348 // Keep the same mapping if the register was already allocated in a loop
349 preferred_reg = loop_reg(i,reg,preferred_reg);
350
351 // Try to allocate the preferred register
352 if(cur->regmap[preferred_reg]==-1) {
353 cur->regmap[preferred_reg]=reg;
354 cur->dirty&=~(1<<preferred_reg);
355 cur->isconst&=~(1<<preferred_reg);
356 return;
357 }
358 r=cur->regmap[preferred_reg];
359 if(r<64&&((cur->u>>r)&1)) {
360 cur->regmap[preferred_reg]=reg;
361 cur->dirty&=~(1<<preferred_reg);
362 cur->isconst&=~(1<<preferred_reg);
363 return;
364 }
365 if(r>=64&&((cur->uu>>(r&63))&1)) {
366 cur->regmap[preferred_reg]=reg;
367 cur->dirty&=~(1<<preferred_reg);
368 cur->isconst&=~(1<<preferred_reg);
369 return;
370 }
371
372 // Clear any unneeded registers
373 // We try to keep the mapping consistent, if possible, because it
374 // makes branches easier (especially loops). So we try to allocate
375 // first (see above) before removing old mappings. If this is not
376 // possible then go ahead and clear out the registers that are no
377 // longer needed.
378 for(hr=0;hr<HOST_REGS;hr++)
379 {
380 r=cur->regmap[hr];
381 if(r>=0) {
382 if(r<64) {
383 if((cur->u>>r)&1) {cur->regmap[hr]=-1;break;}
384 }
385 else
386 {
387 if((cur->uu>>(r&63))&1) {cur->regmap[hr]=-1;break;}
388 }
389 }
390 }
391 // Try to allocate any available register, but prefer
392 // registers that have not been used recently.
393 if(i>0) {
394 for(hr=0;hr<HOST_REGS;hr++) {
395 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
396 if(regs[i-1].regmap[hr]!=rs1[i-1]&&regs[i-1].regmap[hr]!=rs2[i-1]&&regs[i-1].regmap[hr]!=rt1[i-1]&&regs[i-1].regmap[hr]!=rt2[i-1]) {
397 cur->regmap[hr]=reg;
398 cur->dirty&=~(1<<hr);
399 cur->isconst&=~(1<<hr);
400 return;
401 }
402 }
403 }
404 }
405 // Try to allocate any available register
406 for(hr=0;hr<HOST_REGS;hr++) {
407 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
408 cur->regmap[hr]=reg;
409 cur->dirty&=~(1<<hr);
410 cur->isconst&=~(1<<hr);
411 return;
412 }
413 }
414
415 // Ok, now we have to evict someone
416 // Pick a register we hopefully won't need soon
417 u_char hsn[MAXREG+1];
418 memset(hsn,10,sizeof(hsn));
419 int j;
420 lsn(hsn,i,&preferred_reg);
421 //printf("eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",cur->regmap[0],cur->regmap[1],cur->regmap[2],cur->regmap[3],cur->regmap[5],cur->regmap[6],cur->regmap[7]);
422 //printf("hsn(%x): %d %d %d %d %d %d %d\n",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
423 if(i>0) {
424 // Don't evict the cycle count at entry points, otherwise the entry
425 // stub will have to write it.
426 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
427 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
428 for(j=10;j>=3;j--)
429 {
430 // Alloc preferred register if available
431 if(hsn[r=cur->regmap[preferred_reg]&63]==j) {
432 for(hr=0;hr<HOST_REGS;hr++) {
433 // Evict both parts of a 64-bit register
434 if((cur->regmap[hr]&63)==r) {
435 cur->regmap[hr]=-1;
436 cur->dirty&=~(1<<hr);
437 cur->isconst&=~(1<<hr);
438 }
439 }
440 cur->regmap[preferred_reg]=reg;
441 return;
442 }
443 for(r=1;r<=MAXREG;r++)
444 {
445 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
446 for(hr=0;hr<HOST_REGS;hr++) {
447 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
448 if(cur->regmap[hr]==r+64) {
449 cur->regmap[hr]=reg;
450 cur->dirty&=~(1<<hr);
451 cur->isconst&=~(1<<hr);
452 return;
453 }
454 }
455 }
456 for(hr=0;hr<HOST_REGS;hr++) {
457 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
458 if(cur->regmap[hr]==r) {
459 cur->regmap[hr]=reg;
460 cur->dirty&=~(1<<hr);
461 cur->isconst&=~(1<<hr);
462 return;
463 }
464 }
465 }
466 }
467 }
468 }
469 }
470 for(j=10;j>=0;j--)
471 {
472 for(r=1;r<=MAXREG;r++)
473 {
474 if(hsn[r]==j) {
475 for(hr=0;hr<HOST_REGS;hr++) {
476 if(cur->regmap[hr]==r+64) {
477 cur->regmap[hr]=reg;
478 cur->dirty&=~(1<<hr);
479 cur->isconst&=~(1<<hr);
480 return;
481 }
482 }
483 for(hr=0;hr<HOST_REGS;hr++) {
484 if(cur->regmap[hr]==r) {
485 cur->regmap[hr]=reg;
486 cur->dirty&=~(1<<hr);
487 cur->isconst&=~(1<<hr);
488 return;
489 }
490 }
491 }
492 }
493 }
494 printf("This shouldn't happen (alloc_reg)");exit(1);
495}
496
497void alloc_reg64(struct regstat *cur,int i,signed char reg)
498{
499 int preferred_reg = 8+(reg&1);
500 int r,hr;
501
502 // allocate the lower 32 bits
503 alloc_reg(cur,i,reg);
504
505 // Don't allocate unused registers
506 if((cur->uu>>reg)&1) return;
507
508 // see if the upper half is already allocated
509 for(hr=0;hr<HOST_REGS;hr++)
510 {
511 if(cur->regmap[hr]==reg+64) return;
512 }
513
514 // Keep the same mapping if the register was already allocated in a loop
515 preferred_reg = loop_reg(i,reg,preferred_reg);
516
517 // Try to allocate the preferred register
518 if(cur->regmap[preferred_reg]==-1) {
519 cur->regmap[preferred_reg]=reg|64;
520 cur->dirty&=~(1<<preferred_reg);
521 cur->isconst&=~(1<<preferred_reg);
522 return;
523 }
524 r=cur->regmap[preferred_reg];
525 if(r<64&&((cur->u>>r)&1)) {
526 cur->regmap[preferred_reg]=reg|64;
527 cur->dirty&=~(1<<preferred_reg);
528 cur->isconst&=~(1<<preferred_reg);
529 return;
530 }
531 if(r>=64&&((cur->uu>>(r&63))&1)) {
532 cur->regmap[preferred_reg]=reg|64;
533 cur->dirty&=~(1<<preferred_reg);
534 cur->isconst&=~(1<<preferred_reg);
535 return;
536 }
537
538 // Clear any unneeded registers
539 // We try to keep the mapping consistent, if possible, because it
540 // makes branches easier (especially loops). So we try to allocate
541 // first (see above) before removing old mappings. If this is not
542 // possible then go ahead and clear out the registers that are no
543 // longer needed.
544 for(hr=HOST_REGS-1;hr>=0;hr--)
545 {
546 r=cur->regmap[hr];
547 if(r>=0) {
548 if(r<64) {
549 if((cur->u>>r)&1) {cur->regmap[hr]=-1;break;}
550 }
551 else
552 {
553 if((cur->uu>>(r&63))&1) {cur->regmap[hr]=-1;break;}
554 }
555 }
556 }
557 // Try to allocate any available register, but prefer
558 // registers that have not been used recently.
559 if(i>0) {
560 for(hr=0;hr<HOST_REGS;hr++) {
561 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
562 if(regs[i-1].regmap[hr]!=rs1[i-1]&&regs[i-1].regmap[hr]!=rs2[i-1]&&regs[i-1].regmap[hr]!=rt1[i-1]&&regs[i-1].regmap[hr]!=rt2[i-1]) {
563 cur->regmap[hr]=reg|64;
564 cur->dirty&=~(1<<hr);
565 cur->isconst&=~(1<<hr);
566 return;
567 }
568 }
569 }
570 }
571 // Try to allocate any available register
572 for(hr=0;hr<HOST_REGS;hr++) {
573 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
574 cur->regmap[hr]=reg|64;
575 cur->dirty&=~(1<<hr);
576 cur->isconst&=~(1<<hr);
577 return;
578 }
579 }
580
581 // Ok, now we have to evict someone
582 // Pick a register we hopefully won't need soon
583 u_char hsn[MAXREG+1];
584 memset(hsn,10,sizeof(hsn));
585 int j;
586 lsn(hsn,i,&preferred_reg);
587 //printf("eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",cur->regmap[0],cur->regmap[1],cur->regmap[2],cur->regmap[3],cur->regmap[5],cur->regmap[6],cur->regmap[7]);
588 //printf("hsn(%x): %d %d %d %d %d %d %d\n",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
589 if(i>0) {
590 // Don't evict the cycle count at entry points, otherwise the entry
591 // stub will have to write it.
592 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
593 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
594 for(j=10;j>=3;j--)
595 {
596 // Alloc preferred register if available
597 if(hsn[r=cur->regmap[preferred_reg]&63]==j) {
598 for(hr=0;hr<HOST_REGS;hr++) {
599 // Evict both parts of a 64-bit register
600 if((cur->regmap[hr]&63)==r) {
601 cur->regmap[hr]=-1;
602 cur->dirty&=~(1<<hr);
603 cur->isconst&=~(1<<hr);
604 }
605 }
606 cur->regmap[preferred_reg]=reg|64;
607 return;
608 }
609 for(r=1;r<=MAXREG;r++)
610 {
611 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
612 for(hr=0;hr<HOST_REGS;hr++) {
613 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
614 if(cur->regmap[hr]==r+64) {
615 cur->regmap[hr]=reg|64;
616 cur->dirty&=~(1<<hr);
617 cur->isconst&=~(1<<hr);
618 return;
619 }
620 }
621 }
622 for(hr=0;hr<HOST_REGS;hr++) {
623 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
624 if(cur->regmap[hr]==r) {
625 cur->regmap[hr]=reg|64;
626 cur->dirty&=~(1<<hr);
627 cur->isconst&=~(1<<hr);
628 return;
629 }
630 }
631 }
632 }
633 }
634 }
635 }
636 for(j=10;j>=0;j--)
637 {
638 for(r=1;r<=MAXREG;r++)
639 {
640 if(hsn[r]==j) {
641 for(hr=0;hr<HOST_REGS;hr++) {
642 if(cur->regmap[hr]==r+64) {
643 cur->regmap[hr]=reg|64;
644 cur->dirty&=~(1<<hr);
645 cur->isconst&=~(1<<hr);
646 return;
647 }
648 }
649 for(hr=0;hr<HOST_REGS;hr++) {
650 if(cur->regmap[hr]==r) {
651 cur->regmap[hr]=reg|64;
652 cur->dirty&=~(1<<hr);
653 cur->isconst&=~(1<<hr);
654 return;
655 }
656 }
657 }
658 }
659 }
660 printf("This shouldn't happen");exit(1);
661}
662
663// Allocate a temporary register. This is done without regard to
664// dirty status or whether the register we request is on the unneeded list
665// Note: This will only allocate one register, even if called multiple times
666void alloc_reg_temp(struct regstat *cur,int i,signed char reg)
667{
668 int r,hr;
669 int preferred_reg = -1;
670
671 // see if it's already allocated
672 for(hr=0;hr<HOST_REGS;hr++)
673 {
674 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==reg) return;
675 }
676
677 // Try to allocate any available register
678 for(hr=HOST_REGS-1;hr>=0;hr--) {
679 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
680 cur->regmap[hr]=reg;
681 cur->dirty&=~(1<<hr);
682 cur->isconst&=~(1<<hr);
683 return;
684 }
685 }
686
687 // Find an unneeded register
688 for(hr=HOST_REGS-1;hr>=0;hr--)
689 {
690 r=cur->regmap[hr];
691 if(r>=0) {
692 if(r<64) {
693 if((cur->u>>r)&1) {
694 if(i==0||((unneeded_reg[i-1]>>r)&1)) {
695 cur->regmap[hr]=reg;
696 cur->dirty&=~(1<<hr);
697 cur->isconst&=~(1<<hr);
698 return;
699 }
700 }
701 }
702 else
703 {
704 if((cur->uu>>(r&63))&1) {
705 if(i==0||((unneeded_reg_upper[i-1]>>(r&63))&1)) {
706 cur->regmap[hr]=reg;
707 cur->dirty&=~(1<<hr);
708 cur->isconst&=~(1<<hr);
709 return;
710 }
711 }
712 }
713 }
714 }
715
716 // Ok, now we have to evict someone
717 // Pick a register we hopefully won't need soon
718 // TODO: we might want to follow unconditional jumps here
719 // TODO: get rid of dupe code and make this into a function
720 u_char hsn[MAXREG+1];
721 memset(hsn,10,sizeof(hsn));
722 int j;
723 lsn(hsn,i,&preferred_reg);
724 //printf("hsn: %d %d %d %d %d %d %d\n",hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
725 if(i>0) {
726 // Don't evict the cycle count at entry points, otherwise the entry
727 // stub will have to write it.
728 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
729 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
730 for(j=10;j>=3;j--)
731 {
732 for(r=1;r<=MAXREG;r++)
733 {
734 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
735 for(hr=0;hr<HOST_REGS;hr++) {
736 if(hr!=HOST_CCREG||hsn[CCREG]>2) {
737 if(cur->regmap[hr]==r+64) {
738 cur->regmap[hr]=reg;
739 cur->dirty&=~(1<<hr);
740 cur->isconst&=~(1<<hr);
741 return;
742 }
743 }
744 }
745 for(hr=0;hr<HOST_REGS;hr++) {
746 if(hr!=HOST_CCREG||hsn[CCREG]>2) {
747 if(cur->regmap[hr]==r) {
748 cur->regmap[hr]=reg;
749 cur->dirty&=~(1<<hr);
750 cur->isconst&=~(1<<hr);
751 return;
752 }
753 }
754 }
755 }
756 }
757 }
758 }
759 for(j=10;j>=0;j--)
760 {
761 for(r=1;r<=MAXREG;r++)
762 {
763 if(hsn[r]==j) {
764 for(hr=0;hr<HOST_REGS;hr++) {
765 if(cur->regmap[hr]==r+64) {
766 cur->regmap[hr]=reg;
767 cur->dirty&=~(1<<hr);
768 cur->isconst&=~(1<<hr);
769 return;
770 }
771 }
772 for(hr=0;hr<HOST_REGS;hr++) {
773 if(cur->regmap[hr]==r) {
774 cur->regmap[hr]=reg;
775 cur->dirty&=~(1<<hr);
776 cur->isconst&=~(1<<hr);
777 return;
778 }
779 }
780 }
781 }
782 }
783 printf("This shouldn't happen");exit(1);
784}
785// Allocate a specific ARM register.
786void alloc_arm_reg(struct regstat *cur,int i,signed char reg,char hr)
787{
788 int n;
f776eb14 789 int dirty=0;
57871462 790
791 // see if it's already allocated (and dealloc it)
792 for(n=0;n<HOST_REGS;n++)
793 {
f776eb14 794 if(n!=EXCLUDE_REG&&cur->regmap[n]==reg) {
795 dirty=(cur->dirty>>n)&1;
796 cur->regmap[n]=-1;
797 }
57871462 798 }
799
800 cur->regmap[hr]=reg;
801 cur->dirty&=~(1<<hr);
f776eb14 802 cur->dirty|=dirty<<hr;
57871462 803 cur->isconst&=~(1<<hr);
804}
805
806// Alloc cycle count into dedicated register
807alloc_cc(struct regstat *cur,int i)
808{
809 alloc_arm_reg(cur,i,CCREG,HOST_CCREG);
810}
811
812/* Special alloc */
813
814
815/* Assembler */
816
817char regname[16][4] = {
818 "r0",
819 "r1",
820 "r2",
821 "r3",
822 "r4",
823 "r5",
824 "r6",
825 "r7",
826 "r8",
827 "r9",
828 "r10",
829 "fp",
830 "r12",
831 "sp",
832 "lr",
833 "pc"};
834
835void output_byte(u_char byte)
836{
837 *(out++)=byte;
838}
839void output_modrm(u_char mod,u_char rm,u_char ext)
840{
841 assert(mod<4);
842 assert(rm<8);
843 assert(ext<8);
844 u_char byte=(mod<<6)|(ext<<3)|rm;
845 *(out++)=byte;
846}
847void output_sib(u_char scale,u_char index,u_char base)
848{
849 assert(scale<4);
850 assert(index<8);
851 assert(base<8);
852 u_char byte=(scale<<6)|(index<<3)|base;
853 *(out++)=byte;
854}
855void output_w32(u_int word)
856{
857 *((u_int *)out)=word;
858 out+=4;
859}
860u_int rd_rn_rm(u_int rd, u_int rn, u_int rm)
861{
862 assert(rd<16);
863 assert(rn<16);
864 assert(rm<16);
865 return((rn<<16)|(rd<<12)|rm);
866}
867u_int rd_rn_imm_shift(u_int rd, u_int rn, u_int imm, u_int shift)
868{
869 assert(rd<16);
870 assert(rn<16);
871 assert(imm<256);
872 assert((shift&1)==0);
873 return((rn<<16)|(rd<<12)|(((32-shift)&30)<<7)|imm);
874}
875u_int genimm(u_int imm,u_int *encoded)
876{
c2e3bd42 877 *encoded=0;
878 if(imm==0) return 1;
57871462 879 int i=32;
880 while(i>0)
881 {
882 if(imm<256) {
883 *encoded=((i&30)<<7)|imm;
884 return 1;
885 }
886 imm=(imm>>2)|(imm<<30);i-=2;
887 }
888 return 0;
889}
cfbd3c6e 890void genimm_checked(u_int imm,u_int *encoded)
891{
892 u_int ret=genimm(imm,encoded);
893 assert(ret);
894}
57871462 895u_int genjmp(u_int addr)
896{
897 int offset=addr-(int)out-8;
e80343e2 898 if(offset<-33554432||offset>=33554432) {
899 if (addr>2) {
900 printf("genjmp: out of range: %08x\n", offset);
901 exit(1);
902 }
903 return 0;
904 }
57871462 905 return ((u_int)offset>>2)&0xffffff;
906}
907
908void emit_mov(int rs,int rt)
909{
910 assem_debug("mov %s,%s\n",regname[rt],regname[rs]);
911 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs));
912}
913
914void emit_movs(int rs,int rt)
915{
916 assem_debug("movs %s,%s\n",regname[rt],regname[rs]);
917 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs));
918}
919
920void emit_add(int rs1,int rs2,int rt)
921{
922 assem_debug("add %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
923 output_w32(0xe0800000|rd_rn_rm(rt,rs1,rs2));
924}
925
926void emit_adds(int rs1,int rs2,int rt)
927{
928 assem_debug("adds %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
929 output_w32(0xe0900000|rd_rn_rm(rt,rs1,rs2));
930}
931
932void emit_adcs(int rs1,int rs2,int rt)
933{
934 assem_debug("adcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
935 output_w32(0xe0b00000|rd_rn_rm(rt,rs1,rs2));
936}
937
938void emit_sbc(int rs1,int rs2,int rt)
939{
940 assem_debug("sbc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
941 output_w32(0xe0c00000|rd_rn_rm(rt,rs1,rs2));
942}
943
944void emit_sbcs(int rs1,int rs2,int rt)
945{
946 assem_debug("sbcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
947 output_w32(0xe0d00000|rd_rn_rm(rt,rs1,rs2));
948}
949
950void emit_neg(int rs, int rt)
951{
952 assem_debug("rsb %s,%s,#0\n",regname[rt],regname[rs]);
953 output_w32(0xe2600000|rd_rn_rm(rt,rs,0));
954}
955
956void emit_negs(int rs, int rt)
957{
958 assem_debug("rsbs %s,%s,#0\n",regname[rt],regname[rs]);
959 output_w32(0xe2700000|rd_rn_rm(rt,rs,0));
960}
961
962void emit_sub(int rs1,int rs2,int rt)
963{
964 assem_debug("sub %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
965 output_w32(0xe0400000|rd_rn_rm(rt,rs1,rs2));
966}
967
968void emit_subs(int rs1,int rs2,int rt)
969{
970 assem_debug("subs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
971 output_w32(0xe0500000|rd_rn_rm(rt,rs1,rs2));
972}
973
974void emit_zeroreg(int rt)
975{
976 assem_debug("mov %s,#0\n",regname[rt]);
977 output_w32(0xe3a00000|rd_rn_rm(rt,0,0));
978}
979
790ee18e 980void emit_loadlp(u_int imm,u_int rt)
981{
982 add_literal((int)out,imm);
983 assem_debug("ldr %s,pc+? [=%x]\n",regname[rt],imm);
984 output_w32(0xe5900000|rd_rn_rm(rt,15,0));
985}
986void emit_movw(u_int imm,u_int rt)
987{
988 assert(imm<65536);
989 assem_debug("movw %s,#%d (0x%x)\n",regname[rt],imm,imm);
990 output_w32(0xe3000000|rd_rn_rm(rt,0,0)|(imm&0xfff)|((imm<<4)&0xf0000));
991}
992void emit_movt(u_int imm,u_int rt)
993{
994 assem_debug("movt %s,#%d (0x%x)\n",regname[rt],imm&0xffff0000,imm&0xffff0000);
995 output_w32(0xe3400000|rd_rn_rm(rt,0,0)|((imm>>16)&0xfff)|((imm>>12)&0xf0000));
996}
997void emit_movimm(u_int imm,u_int rt)
998{
999 u_int armval;
1000 if(genimm(imm,&armval)) {
1001 assem_debug("mov %s,#%d\n",regname[rt],imm);
1002 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
1003 }else if(genimm(~imm,&armval)) {
1004 assem_debug("mvn %s,#%d\n",regname[rt],imm);
1005 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
1006 }else if(imm<65536) {
1007 #ifdef ARMv5_ONLY
1008 assem_debug("mov %s,#%d\n",regname[rt],imm&0xFF00);
1009 output_w32(0xe3a00000|rd_rn_imm_shift(rt,0,imm>>8,8));
1010 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
1011 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1012 #else
1013 emit_movw(imm,rt);
1014 #endif
1015 }else{
1016 #ifdef ARMv5_ONLY
1017 emit_loadlp(imm,rt);
1018 #else
1019 emit_movw(imm&0x0000FFFF,rt);
1020 emit_movt(imm&0xFFFF0000,rt);
1021 #endif
1022 }
1023}
1024void emit_pcreladdr(u_int rt)
1025{
1026 assem_debug("add %s,pc,#?\n",regname[rt]);
1027 output_w32(0xe2800000|rd_rn_rm(rt,15,0));
1028}
1029
57871462 1030void emit_loadreg(int r, int hr)
1031{
3d624f89 1032#ifdef FORCE32
1033 if(r&64) {
1034 printf("64bit load in 32bit mode!\n");
7f2607ea 1035 assert(0);
1036 return;
3d624f89 1037 }
1038#endif
57871462 1039 if((r&63)==0)
1040 emit_zeroreg(hr);
1041 else {
3d624f89 1042 int addr=((int)reg)+((r&63)<<REG_SHIFT)+((r&64)>>4);
57871462 1043 if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4);
1044 if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4);
1045 if(r==CCREG) addr=(int)&cycle_count;
1046 if(r==CSREG) addr=(int)&Status;
1047 if(r==FSREG) addr=(int)&FCR31;
1048 if(r==INVCP) addr=(int)&invc_ptr;
1049 u_int offset = addr-(u_int)&dynarec_local;
1050 assert(offset<4096);
1051 assem_debug("ldr %s,fp+%d\n",regname[hr],offset);
1052 output_w32(0xe5900000|rd_rn_rm(hr,FP,0)|offset);
1053 }
1054}
1055void emit_storereg(int r, int hr)
1056{
3d624f89 1057#ifdef FORCE32
1058 if(r&64) {
1059 printf("64bit store in 32bit mode!\n");
7f2607ea 1060 assert(0);
1061 return;
3d624f89 1062 }
1063#endif
1064 int addr=((int)reg)+((r&63)<<REG_SHIFT)+((r&64)>>4);
57871462 1065 if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4);
1066 if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4);
1067 if(r==CCREG) addr=(int)&cycle_count;
1068 if(r==FSREG) addr=(int)&FCR31;
1069 u_int offset = addr-(u_int)&dynarec_local;
1070 assert(offset<4096);
1071 assem_debug("str %s,fp+%d\n",regname[hr],offset);
1072 output_w32(0xe5800000|rd_rn_rm(hr,FP,0)|offset);
1073}
1074
1075void emit_test(int rs, int rt)
1076{
1077 assem_debug("tst %s,%s\n",regname[rs],regname[rt]);
1078 output_w32(0xe1100000|rd_rn_rm(0,rs,rt));
1079}
1080
1081void emit_testimm(int rs,int imm)
1082{
1083 u_int armval;
5a05d80c 1084 assem_debug("tst %s,#%d\n",regname[rs],imm);
cfbd3c6e 1085 genimm_checked(imm,&armval);
57871462 1086 output_w32(0xe3100000|rd_rn_rm(0,rs,0)|armval);
1087}
1088
b9b61529 1089void emit_testeqimm(int rs,int imm)
1090{
1091 u_int armval;
1092 assem_debug("tsteq %s,$%d\n",regname[rs],imm);
cfbd3c6e 1093 genimm_checked(imm,&armval);
b9b61529 1094 output_w32(0x03100000|rd_rn_rm(0,rs,0)|armval);
1095}
1096
57871462 1097void emit_not(int rs,int rt)
1098{
1099 assem_debug("mvn %s,%s\n",regname[rt],regname[rs]);
1100 output_w32(0xe1e00000|rd_rn_rm(rt,0,rs));
1101}
1102
b9b61529 1103void emit_mvnmi(int rs,int rt)
1104{
1105 assem_debug("mvnmi %s,%s\n",regname[rt],regname[rs]);
1106 output_w32(0x41e00000|rd_rn_rm(rt,0,rs));
1107}
1108
57871462 1109void emit_and(u_int rs1,u_int rs2,u_int rt)
1110{
1111 assem_debug("and %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1112 output_w32(0xe0000000|rd_rn_rm(rt,rs1,rs2));
1113}
1114
1115void emit_or(u_int rs1,u_int rs2,u_int rt)
1116{
1117 assem_debug("orr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1118 output_w32(0xe1800000|rd_rn_rm(rt,rs1,rs2));
1119}
1120void emit_or_and_set_flags(int rs1,int rs2,int rt)
1121{
1122 assem_debug("orrs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1123 output_w32(0xe1900000|rd_rn_rm(rt,rs1,rs2));
1124}
1125
f70d384d 1126void emit_orrshl_imm(u_int rs,u_int imm,u_int rt)
1127{
1128 assert(rs<16);
1129 assert(rt<16);
1130 assert(imm<32);
1131 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs],imm);
1132 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|(imm<<7));
1133}
1134
576bbd8f 1135void emit_orrshr_imm(u_int rs,u_int imm,u_int rt)
1136{
1137 assert(rs<16);
1138 assert(rt<16);
1139 assert(imm<32);
1140 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs],imm);
1141 output_w32(0xe1800020|rd_rn_rm(rt,rt,rs)|(imm<<7));
1142}
1143
57871462 1144void emit_xor(u_int rs1,u_int rs2,u_int rt)
1145{
1146 assem_debug("eor %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1147 output_w32(0xe0200000|rd_rn_rm(rt,rs1,rs2));
1148}
1149
57871462 1150void emit_addimm(u_int rs,int imm,u_int rt)
1151{
1152 assert(rs<16);
1153 assert(rt<16);
1154 if(imm!=0) {
57871462 1155 u_int armval;
1156 if(genimm(imm,&armval)) {
1157 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],imm);
1158 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
1159 }else if(genimm(-imm,&armval)) {
8a0a8423 1160 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],-imm);
57871462 1161 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
1162 }else if(imm<0) {
ffb0b9e0 1163 assert(imm>-65536);
57871462 1164 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],(-imm)&0xFF00);
1165 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
1166 output_w32(0xe2400000|rd_rn_imm_shift(rt,rs,(-imm)>>8,8));
1167 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
1168 }else{
ffb0b9e0 1169 assert(imm<65536);
57871462 1170 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1171 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
1172 output_w32(0xe2800000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1173 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1174 }
1175 }
1176 else if(rs!=rt) emit_mov(rs,rt);
1177}
1178
1179void emit_addimm_and_set_flags(int imm,int rt)
1180{
1181 assert(imm>-65536&&imm<65536);
1182 u_int armval;
1183 if(genimm(imm,&armval)) {
1184 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm);
1185 output_w32(0xe2900000|rd_rn_rm(rt,rt,0)|armval);
1186 }else if(genimm(-imm,&armval)) {
1187 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],imm);
1188 output_w32(0xe2500000|rd_rn_rm(rt,rt,0)|armval);
1189 }else if(imm<0) {
1190 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF00);
1191 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
1192 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)>>8,8));
1193 output_w32(0xe2500000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
1194 }else{
1195 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF00);
1196 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
1197 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm>>8,8));
1198 output_w32(0xe2900000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1199 }
1200}
1201void emit_addimm_no_flags(u_int imm,u_int rt)
1202{
1203 emit_addimm(rt,imm,rt);
1204}
1205
1206void emit_addnop(u_int r)
1207{
1208 assert(r<16);
1209 assem_debug("add %s,%s,#0 (nop)\n",regname[r],regname[r]);
1210 output_w32(0xe2800000|rd_rn_rm(r,r,0));
1211}
1212
1213void emit_adcimm(u_int rs,int imm,u_int rt)
1214{
1215 u_int armval;
cfbd3c6e 1216 genimm_checked(imm,&armval);
57871462 1217 assem_debug("adc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1218 output_w32(0xe2a00000|rd_rn_rm(rt,rs,0)|armval);
1219}
1220/*void emit_sbcimm(int imm,u_int rt)
1221{
1222 u_int armval;
cfbd3c6e 1223 genimm_checked(imm,&armval);
57871462 1224 assem_debug("sbc %s,%s,#%d\n",regname[rt],regname[rt],imm);
1225 output_w32(0xe2c00000|rd_rn_rm(rt,rt,0)|armval);
1226}*/
1227void emit_sbbimm(int imm,u_int rt)
1228{
1229 assem_debug("sbb $%d,%%%s\n",imm,regname[rt]);
1230 assert(rt<8);
1231 if(imm<128&&imm>=-128) {
1232 output_byte(0x83);
1233 output_modrm(3,rt,3);
1234 output_byte(imm);
1235 }
1236 else
1237 {
1238 output_byte(0x81);
1239 output_modrm(3,rt,3);
1240 output_w32(imm);
1241 }
1242}
1243void emit_rscimm(int rs,int imm,u_int rt)
1244{
1245 assert(0);
1246 u_int armval;
cfbd3c6e 1247 genimm_checked(imm,&armval);
57871462 1248 assem_debug("rsc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1249 output_w32(0xe2e00000|rd_rn_rm(rt,rs,0)|armval);
1250}
1251
1252void emit_addimm64_32(int rsh,int rsl,int imm,int rth,int rtl)
1253{
1254 // TODO: if(genimm(imm,&armval)) ...
1255 // else
1256 emit_movimm(imm,HOST_TEMPREG);
1257 emit_adds(HOST_TEMPREG,rsl,rtl);
1258 emit_adcimm(rsh,0,rth);
1259}
1260
1261void emit_sbb(int rs1,int rs2)
1262{
1263 assem_debug("sbb %%%s,%%%s\n",regname[rs2],regname[rs1]);
1264 output_byte(0x19);
1265 output_modrm(3,rs1,rs2);
1266}
1267
1268void emit_andimm(int rs,int imm,int rt)
1269{
1270 u_int armval;
790ee18e 1271 if(imm==0) {
1272 emit_zeroreg(rt);
1273 }else if(genimm(imm,&armval)) {
57871462 1274 assem_debug("and %s,%s,#%d\n",regname[rt],regname[rs],imm);
1275 output_w32(0xe2000000|rd_rn_rm(rt,rs,0)|armval);
1276 }else if(genimm(~imm,&armval)) {
1277 assem_debug("bic %s,%s,#%d\n",regname[rt],regname[rs],imm);
1278 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|armval);
1279 }else if(imm==65535) {
1280 #ifdef ARMv5_ONLY
1281 assem_debug("bic %s,%s,#FF000000\n",regname[rt],regname[rs]);
1282 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|0x4FF);
1283 assem_debug("bic %s,%s,#00FF0000\n",regname[rt],regname[rt]);
1284 output_w32(0xe3c00000|rd_rn_rm(rt,rt,0)|0x8FF);
1285 #else
1286 assem_debug("uxth %s,%s\n",regname[rt],regname[rs]);
1287 output_w32(0xe6ff0070|rd_rn_rm(rt,0,rs));
1288 #endif
1289 }else{
1290 assert(imm>0&&imm<65535);
1291 #ifdef ARMv5_ONLY
1292 assem_debug("mov r14,#%d\n",imm&0xFF00);
1293 output_w32(0xe3a00000|rd_rn_imm_shift(HOST_TEMPREG,0,imm>>8,8));
1294 assem_debug("add r14,r14,#%d\n",imm&0xFF);
1295 output_w32(0xe2800000|rd_rn_imm_shift(HOST_TEMPREG,HOST_TEMPREG,imm&0xff,0));
1296 #else
1297 emit_movw(imm,HOST_TEMPREG);
1298 #endif
1299 assem_debug("and %s,%s,r14\n",regname[rt],regname[rs]);
1300 output_w32(0xe0000000|rd_rn_rm(rt,rs,HOST_TEMPREG));
1301 }
1302}
1303
1304void emit_orimm(int rs,int imm,int rt)
1305{
1306 u_int armval;
790ee18e 1307 if(imm==0) {
1308 if(rs!=rt) emit_mov(rs,rt);
1309 }else if(genimm(imm,&armval)) {
57871462 1310 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1311 output_w32(0xe3800000|rd_rn_rm(rt,rs,0)|armval);
1312 }else{
1313 assert(imm>0&&imm<65536);
1314 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1315 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
1316 output_w32(0xe3800000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1317 output_w32(0xe3800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1318 }
1319}
1320
1321void emit_xorimm(int rs,int imm,int rt)
1322{
57871462 1323 u_int armval;
790ee18e 1324 if(imm==0) {
1325 if(rs!=rt) emit_mov(rs,rt);
1326 }else if(genimm(imm,&armval)) {
57871462 1327 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm);
1328 output_w32(0xe2200000|rd_rn_rm(rt,rs,0)|armval);
1329 }else{
514ed0d9 1330 assert(imm>0&&imm<65536);
57871462 1331 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1332 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
1333 output_w32(0xe2200000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1334 output_w32(0xe2200000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1335 }
1336}
1337
1338void emit_shlimm(int rs,u_int imm,int rt)
1339{
1340 assert(imm>0);
1341 assert(imm<32);
1342 //if(imm==1) ...
1343 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1344 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1345}
1346
c6c3b1b3 1347void emit_lsls_imm(int rs,int imm,int rt)
1348{
1349 assert(imm>0);
1350 assert(imm<32);
1351 assem_debug("lsls %s,%s,#%d\n",regname[rt],regname[rs],imm);
1352 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1353}
1354
57871462 1355void emit_shrimm(int rs,u_int imm,int rt)
1356{
1357 assert(imm>0);
1358 assert(imm<32);
1359 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1360 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
1361}
1362
1363void emit_sarimm(int rs,u_int imm,int rt)
1364{
1365 assert(imm>0);
1366 assert(imm<32);
1367 assem_debug("asr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1368 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x40|(imm<<7));
1369}
1370
1371void emit_rorimm(int rs,u_int imm,int rt)
1372{
1373 assert(imm>0);
1374 assert(imm<32);
1375 assem_debug("ror %s,%s,#%d\n",regname[rt],regname[rs],imm);
1376 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x60|(imm<<7));
1377}
1378
1379void emit_shldimm(int rs,int rs2,u_int imm,int rt)
1380{
1381 assem_debug("shld %%%s,%%%s,%d\n",regname[rt],regname[rs2],imm);
1382 assert(imm>0);
1383 assert(imm<32);
1384 //if(imm==1) ...
1385 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1386 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1387 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs2],32-imm);
1388 output_w32(0xe1800020|rd_rn_rm(rt,rt,rs2)|((32-imm)<<7));
1389}
1390
1391void emit_shrdimm(int rs,int rs2,u_int imm,int rt)
1392{
1393 assem_debug("shrd %%%s,%%%s,%d\n",regname[rt],regname[rs2],imm);
1394 assert(imm>0);
1395 assert(imm<32);
1396 //if(imm==1) ...
1397 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1398 output_w32(0xe1a00020|rd_rn_rm(rt,0,rs)|(imm<<7));
1399 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs2],32-imm);
1400 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs2)|((32-imm)<<7));
1401}
1402
b9b61529 1403void emit_signextend16(int rs,int rt)
1404{
1405 #ifdef ARMv5_ONLY
1406 emit_shlimm(rs,16,rt);
1407 emit_sarimm(rt,16,rt);
1408 #else
1409 assem_debug("sxth %s,%s\n",regname[rt],regname[rs]);
1410 output_w32(0xe6bf0070|rd_rn_rm(rt,0,rs));
1411 #endif
1412}
1413
c6c3b1b3 1414void emit_signextend8(int rs,int rt)
1415{
1416 #ifdef ARMv5_ONLY
1417 emit_shlimm(rs,24,rt);
1418 emit_sarimm(rt,24,rt);
1419 #else
1420 assem_debug("sxtb %s,%s\n",regname[rt],regname[rs]);
1421 output_w32(0xe6af0070|rd_rn_rm(rt,0,rs));
1422 #endif
1423}
1424
57871462 1425void emit_shl(u_int rs,u_int shift,u_int rt)
1426{
1427 assert(rs<16);
1428 assert(rt<16);
1429 assert(shift<16);
1430 //if(imm==1) ...
1431 assem_debug("lsl %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1432 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x10|(shift<<8));
1433}
1434void emit_shr(u_int rs,u_int shift,u_int rt)
1435{
1436 assert(rs<16);
1437 assert(rt<16);
1438 assert(shift<16);
1439 assem_debug("lsr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1440 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x30|(shift<<8));
1441}
1442void emit_sar(u_int rs,u_int shift,u_int rt)
1443{
1444 assert(rs<16);
1445 assert(rt<16);
1446 assert(shift<16);
1447 assem_debug("asr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1448 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x50|(shift<<8));
1449}
1450void emit_shlcl(int r)
1451{
1452 assem_debug("shl %%%s,%%cl\n",regname[r]);
1453 assert(0);
1454}
1455void emit_shrcl(int r)
1456{
1457 assem_debug("shr %%%s,%%cl\n",regname[r]);
1458 assert(0);
1459}
1460void emit_sarcl(int r)
1461{
1462 assem_debug("sar %%%s,%%cl\n",regname[r]);
1463 assert(0);
1464}
1465
1466void emit_shldcl(int r1,int r2)
1467{
1468 assem_debug("shld %%%s,%%%s,%%cl\n",regname[r1],regname[r2]);
1469 assert(0);
1470}
1471void emit_shrdcl(int r1,int r2)
1472{
1473 assem_debug("shrd %%%s,%%%s,%%cl\n",regname[r1],regname[r2]);
1474 assert(0);
1475}
1476void emit_orrshl(u_int rs,u_int shift,u_int rt)
1477{
1478 assert(rs<16);
1479 assert(rt<16);
1480 assert(shift<16);
1481 assem_debug("orr %s,%s,%s,lsl %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
1482 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x10|(shift<<8));
1483}
1484void emit_orrshr(u_int rs,u_int shift,u_int rt)
1485{
1486 assert(rs<16);
1487 assert(rt<16);
1488 assert(shift<16);
1489 assem_debug("orr %s,%s,%s,lsr %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
1490 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x30|(shift<<8));
1491}
1492
1493void emit_cmpimm(int rs,int imm)
1494{
1495 u_int armval;
1496 if(genimm(imm,&armval)) {
5a05d80c 1497 assem_debug("cmp %s,#%d\n",regname[rs],imm);
57871462 1498 output_w32(0xe3500000|rd_rn_rm(0,rs,0)|armval);
1499 }else if(genimm(-imm,&armval)) {
5a05d80c 1500 assem_debug("cmn %s,#%d\n",regname[rs],imm);
57871462 1501 output_w32(0xe3700000|rd_rn_rm(0,rs,0)|armval);
1502 }else if(imm>0) {
1503 assert(imm<65536);
1504 #ifdef ARMv5_ONLY
1505 emit_movimm(imm,HOST_TEMPREG);
1506 #else
1507 emit_movw(imm,HOST_TEMPREG);
1508 #endif
1509 assem_debug("cmp %s,r14\n",regname[rs]);
1510 output_w32(0xe1500000|rd_rn_rm(0,rs,HOST_TEMPREG));
1511 }else{
1512 assert(imm>-65536);
1513 #ifdef ARMv5_ONLY
1514 emit_movimm(-imm,HOST_TEMPREG);
1515 #else
1516 emit_movw(-imm,HOST_TEMPREG);
1517 #endif
1518 assem_debug("cmn %s,r14\n",regname[rs]);
1519 output_w32(0xe1700000|rd_rn_rm(0,rs,HOST_TEMPREG));
1520 }
1521}
1522
1523void emit_cmovne(u_int *addr,int rt)
1524{
1525 assem_debug("cmovne %x,%%%s",(int)addr,regname[rt]);
1526 assert(0);
1527}
1528void emit_cmovl(u_int *addr,int rt)
1529{
1530 assem_debug("cmovl %x,%%%s",(int)addr,regname[rt]);
1531 assert(0);
1532}
1533void emit_cmovs(u_int *addr,int rt)
1534{
1535 assem_debug("cmovs %x,%%%s",(int)addr,regname[rt]);
1536 assert(0);
1537}
1538void emit_cmovne_imm(int imm,int rt)
1539{
1540 assem_debug("movne %s,#%d\n",regname[rt],imm);
1541 u_int armval;
cfbd3c6e 1542 genimm_checked(imm,&armval);
57871462 1543 output_w32(0x13a00000|rd_rn_rm(rt,0,0)|armval);
1544}
1545void emit_cmovl_imm(int imm,int rt)
1546{
1547 assem_debug("movlt %s,#%d\n",regname[rt],imm);
1548 u_int armval;
cfbd3c6e 1549 genimm_checked(imm,&armval);
57871462 1550 output_w32(0xb3a00000|rd_rn_rm(rt,0,0)|armval);
1551}
1552void emit_cmovb_imm(int imm,int rt)
1553{
1554 assem_debug("movcc %s,#%d\n",regname[rt],imm);
1555 u_int armval;
cfbd3c6e 1556 genimm_checked(imm,&armval);
57871462 1557 output_w32(0x33a00000|rd_rn_rm(rt,0,0)|armval);
1558}
1559void emit_cmovs_imm(int imm,int rt)
1560{
1561 assem_debug("movmi %s,#%d\n",regname[rt],imm);
1562 u_int armval;
cfbd3c6e 1563 genimm_checked(imm,&armval);
57871462 1564 output_w32(0x43a00000|rd_rn_rm(rt,0,0)|armval);
1565}
1566void emit_cmove_reg(int rs,int rt)
1567{
1568 assem_debug("moveq %s,%s\n",regname[rt],regname[rs]);
1569 output_w32(0x01a00000|rd_rn_rm(rt,0,rs));
1570}
1571void emit_cmovne_reg(int rs,int rt)
1572{
1573 assem_debug("movne %s,%s\n",regname[rt],regname[rs]);
1574 output_w32(0x11a00000|rd_rn_rm(rt,0,rs));
1575}
1576void emit_cmovl_reg(int rs,int rt)
1577{
1578 assem_debug("movlt %s,%s\n",regname[rt],regname[rs]);
1579 output_w32(0xb1a00000|rd_rn_rm(rt,0,rs));
1580}
1581void emit_cmovs_reg(int rs,int rt)
1582{
1583 assem_debug("movmi %s,%s\n",regname[rt],regname[rs]);
1584 output_w32(0x41a00000|rd_rn_rm(rt,0,rs));
1585}
1586
1587void emit_slti32(int rs,int imm,int rt)
1588{
1589 if(rs!=rt) emit_zeroreg(rt);
1590 emit_cmpimm(rs,imm);
1591 if(rs==rt) emit_movimm(0,rt);
1592 emit_cmovl_imm(1,rt);
1593}
1594void emit_sltiu32(int rs,int imm,int rt)
1595{
1596 if(rs!=rt) emit_zeroreg(rt);
1597 emit_cmpimm(rs,imm);
1598 if(rs==rt) emit_movimm(0,rt);
1599 emit_cmovb_imm(1,rt);
1600}
1601void emit_slti64_32(int rsh,int rsl,int imm,int rt)
1602{
1603 assert(rsh!=rt);
1604 emit_slti32(rsl,imm,rt);
1605 if(imm>=0)
1606 {
1607 emit_test(rsh,rsh);
1608 emit_cmovne_imm(0,rt);
1609 emit_cmovs_imm(1,rt);
1610 }
1611 else
1612 {
1613 emit_cmpimm(rsh,-1);
1614 emit_cmovne_imm(0,rt);
1615 emit_cmovl_imm(1,rt);
1616 }
1617}
1618void emit_sltiu64_32(int rsh,int rsl,int imm,int rt)
1619{
1620 assert(rsh!=rt);
1621 emit_sltiu32(rsl,imm,rt);
1622 if(imm>=0)
1623 {
1624 emit_test(rsh,rsh);
1625 emit_cmovne_imm(0,rt);
1626 }
1627 else
1628 {
1629 emit_cmpimm(rsh,-1);
1630 emit_cmovne_imm(1,rt);
1631 }
1632}
1633
1634void emit_cmp(int rs,int rt)
1635{
1636 assem_debug("cmp %s,%s\n",regname[rs],regname[rt]);
1637 output_w32(0xe1500000|rd_rn_rm(0,rs,rt));
1638}
1639void emit_set_gz32(int rs, int rt)
1640{
1641 //assem_debug("set_gz32\n");
1642 emit_cmpimm(rs,1);
1643 emit_movimm(1,rt);
1644 emit_cmovl_imm(0,rt);
1645}
1646void emit_set_nz32(int rs, int rt)
1647{
1648 //assem_debug("set_nz32\n");
1649 if(rs!=rt) emit_movs(rs,rt);
1650 else emit_test(rs,rs);
1651 emit_cmovne_imm(1,rt);
1652}
1653void emit_set_gz64_32(int rsh, int rsl, int rt)
1654{
1655 //assem_debug("set_gz64\n");
1656 emit_set_gz32(rsl,rt);
1657 emit_test(rsh,rsh);
1658 emit_cmovne_imm(1,rt);
1659 emit_cmovs_imm(0,rt);
1660}
1661void emit_set_nz64_32(int rsh, int rsl, int rt)
1662{
1663 //assem_debug("set_nz64\n");
1664 emit_or_and_set_flags(rsh,rsl,rt);
1665 emit_cmovne_imm(1,rt);
1666}
1667void emit_set_if_less32(int rs1, int rs2, int rt)
1668{
1669 //assem_debug("set if less (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1670 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1671 emit_cmp(rs1,rs2);
1672 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1673 emit_cmovl_imm(1,rt);
1674}
1675void emit_set_if_carry32(int rs1, int rs2, int rt)
1676{
1677 //assem_debug("set if carry (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1678 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1679 emit_cmp(rs1,rs2);
1680 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1681 emit_cmovb_imm(1,rt);
1682}
1683void emit_set_if_less64_32(int u1, int l1, int u2, int l2, int rt)
1684{
1685 //assem_debug("set if less64 (%%%s,%%%s,%%%s,%%%s),%%%s\n",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]);
1686 assert(u1!=rt);
1687 assert(u2!=rt);
1688 emit_cmp(l1,l2);
1689 emit_movimm(0,rt);
1690 emit_sbcs(u1,u2,HOST_TEMPREG);
1691 emit_cmovl_imm(1,rt);
1692}
1693void emit_set_if_carry64_32(int u1, int l1, int u2, int l2, int rt)
1694{
1695 //assem_debug("set if carry64 (%%%s,%%%s,%%%s,%%%s),%%%s\n",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]);
1696 assert(u1!=rt);
1697 assert(u2!=rt);
1698 emit_cmp(l1,l2);
1699 emit_movimm(0,rt);
1700 emit_sbcs(u1,u2,HOST_TEMPREG);
1701 emit_cmovb_imm(1,rt);
1702}
1703
1704void emit_call(int a)
1705{
1706 assem_debug("bl %x (%x+%x)\n",a,(int)out,a-(int)out-8);
1707 u_int offset=genjmp(a);
1708 output_w32(0xeb000000|offset);
1709}
1710void emit_jmp(int a)
1711{
1712 assem_debug("b %x (%x+%x)\n",a,(int)out,a-(int)out-8);
1713 u_int offset=genjmp(a);
1714 output_w32(0xea000000|offset);
1715}
1716void emit_jne(int a)
1717{
1718 assem_debug("bne %x\n",a);
1719 u_int offset=genjmp(a);
1720 output_w32(0x1a000000|offset);
1721}
1722void emit_jeq(int a)
1723{
1724 assem_debug("beq %x\n",a);
1725 u_int offset=genjmp(a);
1726 output_w32(0x0a000000|offset);
1727}
1728void emit_js(int a)
1729{
1730 assem_debug("bmi %x\n",a);
1731 u_int offset=genjmp(a);
1732 output_w32(0x4a000000|offset);
1733}
1734void emit_jns(int a)
1735{
1736 assem_debug("bpl %x\n",a);
1737 u_int offset=genjmp(a);
1738 output_w32(0x5a000000|offset);
1739}
1740void emit_jl(int a)
1741{
1742 assem_debug("blt %x\n",a);
1743 u_int offset=genjmp(a);
1744 output_w32(0xba000000|offset);
1745}
1746void emit_jge(int a)
1747{
1748 assem_debug("bge %x\n",a);
1749 u_int offset=genjmp(a);
1750 output_w32(0xaa000000|offset);
1751}
1752void emit_jno(int a)
1753{
1754 assem_debug("bvc %x\n",a);
1755 u_int offset=genjmp(a);
1756 output_w32(0x7a000000|offset);
1757}
1758void emit_jc(int a)
1759{
1760 assem_debug("bcs %x\n",a);
1761 u_int offset=genjmp(a);
1762 output_w32(0x2a000000|offset);
1763}
1764void emit_jcc(int a)
1765{
1766 assem_debug("bcc %x\n",a);
1767 u_int offset=genjmp(a);
1768 output_w32(0x3a000000|offset);
1769}
1770
1771void emit_pushimm(int imm)
1772{
1773 assem_debug("push $%x\n",imm);
1774 assert(0);
1775}
1776void emit_pusha()
1777{
1778 assem_debug("pusha\n");
1779 assert(0);
1780}
1781void emit_popa()
1782{
1783 assem_debug("popa\n");
1784 assert(0);
1785}
1786void emit_pushreg(u_int r)
1787{
1788 assem_debug("push %%%s\n",regname[r]);
1789 assert(0);
1790}
1791void emit_popreg(u_int r)
1792{
1793 assem_debug("pop %%%s\n",regname[r]);
1794 assert(0);
1795}
1796void emit_callreg(u_int r)
1797{
c6c3b1b3 1798 assert(r<15);
1799 assem_debug("blx %s\n",regname[r]);
1800 output_w32(0xe12fff30|r);
57871462 1801}
1802void emit_jmpreg(u_int r)
1803{
1804 assem_debug("mov pc,%s\n",regname[r]);
1805 output_w32(0xe1a00000|rd_rn_rm(15,0,r));
1806}
1807
1808void emit_readword_indexed(int offset, int rs, int rt)
1809{
1810 assert(offset>-4096&&offset<4096);
1811 assem_debug("ldr %s,%s+%d\n",regname[rt],regname[rs],offset);
1812 if(offset>=0) {
1813 output_w32(0xe5900000|rd_rn_rm(rt,rs,0)|offset);
1814 }else{
1815 output_w32(0xe5100000|rd_rn_rm(rt,rs,0)|(-offset));
1816 }
1817}
1818void emit_readword_dualindexedx4(int rs1, int rs2, int rt)
1819{
1820 assem_debug("ldr %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1821 output_w32(0xe7900000|rd_rn_rm(rt,rs1,rs2)|0x100);
1822}
c6c3b1b3 1823void emit_ldrcc_dualindexed(int rs1, int rs2, int rt)
1824{
1825 assem_debug("ldrcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1826 output_w32(0x37900000|rd_rn_rm(rt,rs1,rs2));
1827}
1828void emit_ldrccb_dualindexed(int rs1, int rs2, int rt)
1829{
1830 assem_debug("ldrccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1831 output_w32(0x37d00000|rd_rn_rm(rt,rs1,rs2));
1832}
1833void emit_ldrccsb_dualindexed(int rs1, int rs2, int rt)
1834{
1835 assem_debug("ldrccsb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1836 output_w32(0x319000d0|rd_rn_rm(rt,rs1,rs2));
1837}
1838void emit_ldrcch_dualindexed(int rs1, int rs2, int rt)
1839{
1840 assem_debug("ldrcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1841 output_w32(0x319000b0|rd_rn_rm(rt,rs1,rs2));
1842}
1843void emit_ldrccsh_dualindexed(int rs1, int rs2, int rt)
1844{
1845 assem_debug("ldrccsh %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1846 output_w32(0x319000f0|rd_rn_rm(rt,rs1,rs2));
1847}
57871462 1848void emit_readword_indexed_tlb(int addr, int rs, int map, int rt)
1849{
1850 if(map<0) emit_readword_indexed(addr, rs, rt);
1851 else {
1852 assert(addr==0);
1853 emit_readword_dualindexedx4(rs, map, rt);
1854 }
1855}
1856void emit_readdword_indexed_tlb(int addr, int rs, int map, int rh, int rl)
1857{
1858 if(map<0) {
1859 if(rh>=0) emit_readword_indexed(addr, rs, rh);
1860 emit_readword_indexed(addr+4, rs, rl);
1861 }else{
1862 assert(rh!=rs);
1863 if(rh>=0) emit_readword_indexed_tlb(addr, rs, map, rh);
1864 emit_addimm(map,1,map);
1865 emit_readword_indexed_tlb(addr, rs, map, rl);
1866 }
1867}
1868void emit_movsbl_indexed(int offset, int rs, int rt)
1869{
1870 assert(offset>-256&&offset<256);
1871 assem_debug("ldrsb %s,%s+%d\n",regname[rt],regname[rs],offset);
1872 if(offset>=0) {
1873 output_w32(0xe1d000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1874 }else{
1875 output_w32(0xe15000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1876 }
1877}
1878void emit_movsbl_indexed_tlb(int addr, int rs, int map, int rt)
1879{
1880 if(map<0) emit_movsbl_indexed(addr, rs, rt);
1881 else {
1882 if(addr==0) {
1883 emit_shlimm(map,2,map);
1884 assem_debug("ldrsb %s,%s+%s\n",regname[rt],regname[rs],regname[map]);
1885 output_w32(0xe19000d0|rd_rn_rm(rt,rs,map));
1886 }else{
1887 assert(addr>-256&&addr<256);
1888 assem_debug("add %s,%s,%s,lsl #2\n",regname[rt],regname[rs],regname[map]);
1889 output_w32(0xe0800000|rd_rn_rm(rt,rs,map)|(2<<7));
1890 emit_movsbl_indexed(addr, rt, rt);
1891 }
1892 }
1893}
1894void emit_movswl_indexed(int offset, int rs, int rt)
1895{
1896 assert(offset>-256&&offset<256);
1897 assem_debug("ldrsh %s,%s+%d\n",regname[rt],regname[rs],offset);
1898 if(offset>=0) {
1899 output_w32(0xe1d000f0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1900 }else{
1901 output_w32(0xe15000f0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1902 }
1903}
1904void emit_movzbl_indexed(int offset, int rs, int rt)
1905{
1906 assert(offset>-4096&&offset<4096);
1907 assem_debug("ldrb %s,%s+%d\n",regname[rt],regname[rs],offset);
1908 if(offset>=0) {
1909 output_w32(0xe5d00000|rd_rn_rm(rt,rs,0)|offset);
1910 }else{
1911 output_w32(0xe5500000|rd_rn_rm(rt,rs,0)|(-offset));
1912 }
1913}
1914void emit_movzbl_dualindexedx4(int rs1, int rs2, int rt)
1915{
1916 assem_debug("ldrb %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1917 output_w32(0xe7d00000|rd_rn_rm(rt,rs1,rs2)|0x100);
1918}
1919void emit_movzbl_indexed_tlb(int addr, int rs, int map, int rt)
1920{
1921 if(map<0) emit_movzbl_indexed(addr, rs, rt);
1922 else {
1923 if(addr==0) {
1924 emit_movzbl_dualindexedx4(rs, map, rt);
1925 }else{
1926 emit_addimm(rs,addr,rt);
1927 emit_movzbl_dualindexedx4(rt, map, rt);
1928 }
1929 }
1930}
1931void emit_movzwl_indexed(int offset, int rs, int rt)
1932{
1933 assert(offset>-256&&offset<256);
1934 assem_debug("ldrh %s,%s+%d\n",regname[rt],regname[rs],offset);
1935 if(offset>=0) {
1936 output_w32(0xe1d000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1937 }else{
1938 output_w32(0xe15000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1939 }
1940}
054175e9 1941static void emit_ldrd(int offset, int rs, int rt)
1942{
1943 assert(offset>-256&&offset<256);
1944 assem_debug("ldrd %s,%s+%d\n",regname[rt],regname[rs],offset);
1945 if(offset>=0) {
1946 output_w32(0xe1c000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1947 }else{
1948 output_w32(0xe14000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1949 }
1950}
57871462 1951void emit_readword(int addr, int rt)
1952{
1953 u_int offset = addr-(u_int)&dynarec_local;
1954 assert(offset<4096);
1955 assem_debug("ldr %s,fp+%d\n",regname[rt],offset);
1956 output_w32(0xe5900000|rd_rn_rm(rt,FP,0)|offset);
1957}
1958void emit_movsbl(int addr, int rt)
1959{
1960 u_int offset = addr-(u_int)&dynarec_local;
1961 assert(offset<256);
1962 assem_debug("ldrsb %s,fp+%d\n",regname[rt],offset);
1963 output_w32(0xe1d000d0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1964}
1965void emit_movswl(int addr, int rt)
1966{
1967 u_int offset = addr-(u_int)&dynarec_local;
1968 assert(offset<256);
1969 assem_debug("ldrsh %s,fp+%d\n",regname[rt],offset);
1970 output_w32(0xe1d000f0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1971}
1972void emit_movzbl(int addr, int rt)
1973{
1974 u_int offset = addr-(u_int)&dynarec_local;
1975 assert(offset<4096);
1976 assem_debug("ldrb %s,fp+%d\n",regname[rt],offset);
1977 output_w32(0xe5d00000|rd_rn_rm(rt,FP,0)|offset);
1978}
1979void emit_movzwl(int addr, int rt)
1980{
1981 u_int offset = addr-(u_int)&dynarec_local;
1982 assert(offset<256);
1983 assem_debug("ldrh %s,fp+%d\n",regname[rt],offset);
1984 output_w32(0xe1d000b0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1985}
1986void emit_movzwl_reg(int rs, int rt)
1987{
1988 assem_debug("movzwl %%%s,%%%s\n",regname[rs]+1,regname[rt]);
1989 assert(0);
1990}
1991
1992void emit_xchg(int rs, int rt)
1993{
1994 assem_debug("xchg %%%s,%%%s\n",regname[rs],regname[rt]);
1995 assert(0);
1996}
1997void emit_writeword_indexed(int rt, int offset, int rs)
1998{
1999 assert(offset>-4096&&offset<4096);
2000 assem_debug("str %s,%s+%d\n",regname[rt],regname[rs],offset);
2001 if(offset>=0) {
2002 output_w32(0xe5800000|rd_rn_rm(rt,rs,0)|offset);
2003 }else{
2004 output_w32(0xe5000000|rd_rn_rm(rt,rs,0)|(-offset));
2005 }
2006}
2007void emit_writeword_dualindexedx4(int rt, int rs1, int rs2)
2008{
2009 assem_debug("str %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
2010 output_w32(0xe7800000|rd_rn_rm(rt,rs1,rs2)|0x100);
2011}
2012void emit_writeword_indexed_tlb(int rt, int addr, int rs, int map, int temp)
2013{
2014 if(map<0) emit_writeword_indexed(rt, addr, rs);
2015 else {
2016 assert(addr==0);
2017 emit_writeword_dualindexedx4(rt, rs, map);
2018 }
2019}
2020void emit_writedword_indexed_tlb(int rh, int rl, int addr, int rs, int map, int temp)
2021{
2022 if(map<0) {
2023 if(rh>=0) emit_writeword_indexed(rh, addr, rs);
2024 emit_writeword_indexed(rl, addr+4, rs);
2025 }else{
2026 assert(rh>=0);
2027 if(temp!=rs) emit_addimm(map,1,temp);
2028 emit_writeword_indexed_tlb(rh, addr, rs, map, temp);
2029 if(temp!=rs) emit_writeword_indexed_tlb(rl, addr, rs, temp, temp);
2030 else {
2031 emit_addimm(rs,4,rs);
2032 emit_writeword_indexed_tlb(rl, addr, rs, map, temp);
2033 }
2034 }
2035}
2036void emit_writehword_indexed(int rt, int offset, int rs)
2037{
2038 assert(offset>-256&&offset<256);
2039 assem_debug("strh %s,%s+%d\n",regname[rt],regname[rs],offset);
2040 if(offset>=0) {
2041 output_w32(0xe1c000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
2042 }else{
2043 output_w32(0xe14000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
2044 }
2045}
2046void emit_writebyte_indexed(int rt, int offset, int rs)
2047{
2048 assert(offset>-4096&&offset<4096);
2049 assem_debug("strb %s,%s+%d\n",regname[rt],regname[rs],offset);
2050 if(offset>=0) {
2051 output_w32(0xe5c00000|rd_rn_rm(rt,rs,0)|offset);
2052 }else{
2053 output_w32(0xe5400000|rd_rn_rm(rt,rs,0)|(-offset));
2054 }
2055}
2056void emit_writebyte_dualindexedx4(int rt, int rs1, int rs2)
2057{
2058 assem_debug("strb %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
2059 output_w32(0xe7c00000|rd_rn_rm(rt,rs1,rs2)|0x100);
2060}
2061void emit_writebyte_indexed_tlb(int rt, int addr, int rs, int map, int temp)
2062{
2063 if(map<0) emit_writebyte_indexed(rt, addr, rs);
2064 else {
2065 if(addr==0) {
2066 emit_writebyte_dualindexedx4(rt, rs, map);
2067 }else{
2068 emit_addimm(rs,addr,temp);
2069 emit_writebyte_dualindexedx4(rt, temp, map);
2070 }
2071 }
2072}
b96d3df7 2073void emit_strcc_dualindexed(int rs1, int rs2, int rt)
2074{
2075 assem_debug("strcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2076 output_w32(0x37800000|rd_rn_rm(rt,rs1,rs2));
2077}
2078void emit_strccb_dualindexed(int rs1, int rs2, int rt)
2079{
2080 assem_debug("strccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2081 output_w32(0x37c00000|rd_rn_rm(rt,rs1,rs2));
2082}
2083void emit_strcch_dualindexed(int rs1, int rs2, int rt)
2084{
2085 assem_debug("strcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2086 output_w32(0x318000b0|rd_rn_rm(rt,rs1,rs2));
2087}
57871462 2088void emit_writeword(int rt, int addr)
2089{
2090 u_int offset = addr-(u_int)&dynarec_local;
2091 assert(offset<4096);
2092 assem_debug("str %s,fp+%d\n",regname[rt],offset);
2093 output_w32(0xe5800000|rd_rn_rm(rt,FP,0)|offset);
2094}
2095void emit_writehword(int rt, int addr)
2096{
2097 u_int offset = addr-(u_int)&dynarec_local;
2098 assert(offset<256);
2099 assem_debug("strh %s,fp+%d\n",regname[rt],offset);
2100 output_w32(0xe1c000b0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
2101}
2102void emit_writebyte(int rt, int addr)
2103{
2104 u_int offset = addr-(u_int)&dynarec_local;
2105 assert(offset<4096);
74426039 2106 assem_debug("strb %s,fp+%d\n",regname[rt],offset);
57871462 2107 output_w32(0xe5c00000|rd_rn_rm(rt,FP,0)|offset);
2108}
2109void emit_writeword_imm(int imm, int addr)
2110{
2111 assem_debug("movl $%x,%x\n",imm,addr);
2112 assert(0);
2113}
2114void emit_writebyte_imm(int imm, int addr)
2115{
2116 assem_debug("movb $%x,%x\n",imm,addr);
2117 assert(0);
2118}
2119
2120void emit_mul(int rs)
2121{
2122 assem_debug("mul %%%s\n",regname[rs]);
2123 assert(0);
2124}
2125void emit_imul(int rs)
2126{
2127 assem_debug("imul %%%s\n",regname[rs]);
2128 assert(0);
2129}
2130void emit_umull(u_int rs1, u_int rs2, u_int hi, u_int lo)
2131{
2132 assem_debug("umull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
2133 assert(rs1<16);
2134 assert(rs2<16);
2135 assert(hi<16);
2136 assert(lo<16);
2137 output_w32(0xe0800090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
2138}
2139void emit_smull(u_int rs1, u_int rs2, u_int hi, u_int lo)
2140{
2141 assem_debug("smull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
2142 assert(rs1<16);
2143 assert(rs2<16);
2144 assert(hi<16);
2145 assert(lo<16);
2146 output_w32(0xe0c00090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
2147}
2148
2149void emit_div(int rs)
2150{
2151 assem_debug("div %%%s\n",regname[rs]);
2152 assert(0);
2153}
2154void emit_idiv(int rs)
2155{
2156 assem_debug("idiv %%%s\n",regname[rs]);
2157 assert(0);
2158}
2159void emit_cdq()
2160{
2161 assem_debug("cdq\n");
2162 assert(0);
2163}
2164
2165void emit_clz(int rs,int rt)
2166{
2167 assem_debug("clz %s,%s\n",regname[rt],regname[rs]);
2168 output_w32(0xe16f0f10|rd_rn_rm(rt,0,rs));
2169}
2170
2171void emit_subcs(int rs1,int rs2,int rt)
2172{
2173 assem_debug("subcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2174 output_w32(0x20400000|rd_rn_rm(rt,rs1,rs2));
2175}
2176
2177void emit_shrcc_imm(int rs,u_int imm,int rt)
2178{
2179 assert(imm>0);
2180 assert(imm<32);
2181 assem_debug("lsrcc %s,%s,#%d\n",regname[rt],regname[rs],imm);
2182 output_w32(0x31a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
2183}
2184
b1be1eee 2185void emit_shrne_imm(int rs,u_int imm,int rt)
2186{
2187 assert(imm>0);
2188 assert(imm<32);
2189 assem_debug("lsrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2190 output_w32(0x11a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
2191}
2192
57871462 2193void emit_negmi(int rs, int rt)
2194{
2195 assem_debug("rsbmi %s,%s,#0\n",regname[rt],regname[rs]);
2196 output_w32(0x42600000|rd_rn_rm(rt,rs,0));
2197}
2198
2199void emit_negsmi(int rs, int rt)
2200{
2201 assem_debug("rsbsmi %s,%s,#0\n",regname[rt],regname[rs]);
2202 output_w32(0x42700000|rd_rn_rm(rt,rs,0));
2203}
2204
2205void emit_orreq(u_int rs1,u_int rs2,u_int rt)
2206{
2207 assem_debug("orreq %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2208 output_w32(0x01800000|rd_rn_rm(rt,rs1,rs2));
2209}
2210
2211void emit_orrne(u_int rs1,u_int rs2,u_int rt)
2212{
2213 assem_debug("orrne %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2214 output_w32(0x11800000|rd_rn_rm(rt,rs1,rs2));
2215}
2216
2217void emit_bic_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2218{
2219 assem_debug("bic %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2220 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2221}
2222
2223void emit_biceq_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2224{
2225 assem_debug("biceq %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2226 output_w32(0x01C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2227}
2228
2229void emit_bicne_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2230{
2231 assem_debug("bicne %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2232 output_w32(0x11C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2233}
2234
2235void emit_bic_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2236{
2237 assem_debug("bic %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2238 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2239}
2240
2241void emit_biceq_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2242{
2243 assem_debug("biceq %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2244 output_w32(0x01C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2245}
2246
2247void emit_bicne_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2248{
2249 assem_debug("bicne %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2250 output_w32(0x11C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2251}
2252
2253void emit_teq(int rs, int rt)
2254{
2255 assem_debug("teq %s,%s\n",regname[rs],regname[rt]);
2256 output_w32(0xe1300000|rd_rn_rm(0,rs,rt));
2257}
2258
2259void emit_rsbimm(int rs, int imm, int rt)
2260{
2261 u_int armval;
cfbd3c6e 2262 genimm_checked(imm,&armval);
57871462 2263 assem_debug("rsb %s,%s,#%d\n",regname[rt],regname[rs],imm);
2264 output_w32(0xe2600000|rd_rn_rm(rt,rs,0)|armval);
2265}
2266
2267// Load 2 immediates optimizing for small code size
2268void emit_mov2imm_compact(int imm1,u_int rt1,int imm2,u_int rt2)
2269{
2270 emit_movimm(imm1,rt1);
2271 u_int armval;
2272 if(genimm(imm2-imm1,&armval)) {
2273 assem_debug("add %s,%s,#%d\n",regname[rt2],regname[rt1],imm2-imm1);
2274 output_w32(0xe2800000|rd_rn_rm(rt2,rt1,0)|armval);
2275 }else if(genimm(imm1-imm2,&armval)) {
2276 assem_debug("sub %s,%s,#%d\n",regname[rt2],regname[rt1],imm1-imm2);
2277 output_w32(0xe2400000|rd_rn_rm(rt2,rt1,0)|armval);
2278 }
2279 else emit_movimm(imm2,rt2);
2280}
2281
2282// Conditionally select one of two immediates, optimizing for small code size
2283// This will only be called if HAVE_CMOV_IMM is defined
2284void emit_cmov2imm_e_ne_compact(int imm1,int imm2,u_int rt)
2285{
2286 u_int armval;
2287 if(genimm(imm2-imm1,&armval)) {
2288 emit_movimm(imm1,rt);
2289 assem_debug("addne %s,%s,#%d\n",regname[rt],regname[rt],imm2-imm1);
2290 output_w32(0x12800000|rd_rn_rm(rt,rt,0)|armval);
2291 }else if(genimm(imm1-imm2,&armval)) {
2292 emit_movimm(imm1,rt);
2293 assem_debug("subne %s,%s,#%d\n",regname[rt],regname[rt],imm1-imm2);
2294 output_w32(0x12400000|rd_rn_rm(rt,rt,0)|armval);
2295 }
2296 else {
2297 #ifdef ARMv5_ONLY
2298 emit_movimm(imm1,rt);
2299 add_literal((int)out,imm2);
2300 assem_debug("ldrne %s,pc+? [=%x]\n",regname[rt],imm2);
2301 output_w32(0x15900000|rd_rn_rm(rt,15,0));
2302 #else
2303 emit_movw(imm1&0x0000FFFF,rt);
2304 if((imm1&0xFFFF)!=(imm2&0xFFFF)) {
2305 assem_debug("movwne %s,#%d (0x%x)\n",regname[rt],imm2&0xFFFF,imm2&0xFFFF);
2306 output_w32(0x13000000|rd_rn_rm(rt,0,0)|(imm2&0xfff)|((imm2<<4)&0xf0000));
2307 }
2308 emit_movt(imm1&0xFFFF0000,rt);
2309 if((imm1&0xFFFF0000)!=(imm2&0xFFFF0000)) {
2310 assem_debug("movtne %s,#%d (0x%x)\n",regname[rt],imm2&0xffff0000,imm2&0xffff0000);
2311 output_w32(0x13400000|rd_rn_rm(rt,0,0)|((imm2>>16)&0xfff)|((imm2>>12)&0xf0000));
2312 }
2313 #endif
2314 }
2315}
2316
2317// special case for checking invalid_code
2318void emit_cmpmem_indexedsr12_imm(int addr,int r,int imm)
2319{
2320 assert(0);
2321}
2322
2323// special case for checking invalid_code
2324void emit_cmpmem_indexedsr12_reg(int base,int r,int imm)
2325{
2326 assert(imm<128&&imm>=0);
2327 assert(r>=0&&r<16);
2328 assem_debug("ldrb lr,%s,%s lsr #12\n",regname[base],regname[r]);
2329 output_w32(0xe7d00000|rd_rn_rm(HOST_TEMPREG,base,r)|0x620);
2330 emit_cmpimm(HOST_TEMPREG,imm);
2331}
2332
2333// special case for tlb mapping
2334void emit_addsr12(int rs1,int rs2,int rt)
2335{
2336 assem_debug("add %s,%s,%s lsr #12\n",regname[rt],regname[rs1],regname[rs2]);
2337 output_w32(0xe0800620|rd_rn_rm(rt,rs1,rs2));
2338}
2339
0bbd1454 2340void emit_callne(int a)
2341{
2342 assem_debug("blne %x\n",a);
2343 u_int offset=genjmp(a);
2344 output_w32(0x1b000000|offset);
2345}
2346
57871462 2347// Used to preload hash table entries
2348void emit_prefetch(void *addr)
2349{
2350 assem_debug("prefetch %x\n",(int)addr);
2351 output_byte(0x0F);
2352 output_byte(0x18);
2353 output_modrm(0,5,1);
2354 output_w32((int)addr);
2355}
2356void emit_prefetchreg(int r)
2357{
2358 assem_debug("pld %s\n",regname[r]);
2359 output_w32(0xf5d0f000|rd_rn_rm(0,r,0));
2360}
2361
2362// Special case for mini_ht
2363void emit_ldreq_indexed(int rs, u_int offset, int rt)
2364{
2365 assert(offset<4096);
2366 assem_debug("ldreq %s,[%s, #%d]\n",regname[rt],regname[rs],offset);
2367 output_w32(0x05900000|rd_rn_rm(rt,rs,0)|offset);
2368}
2369
2370void emit_flds(int r,int sr)
2371{
2372 assem_debug("flds s%d,[%s]\n",sr,regname[r]);
2373 output_w32(0xed900a00|((sr&14)<<11)|((sr&1)<<22)|(r<<16));
2374}
2375
2376void emit_vldr(int r,int vr)
2377{
2378 assem_debug("vldr d%d,[%s]\n",vr,regname[r]);
2379 output_w32(0xed900b00|(vr<<12)|(r<<16));
2380}
2381
2382void emit_fsts(int sr,int r)
2383{
2384 assem_debug("fsts s%d,[%s]\n",sr,regname[r]);
2385 output_w32(0xed800a00|((sr&14)<<11)|((sr&1)<<22)|(r<<16));
2386}
2387
2388void emit_vstr(int vr,int r)
2389{
2390 assem_debug("vstr d%d,[%s]\n",vr,regname[r]);
2391 output_w32(0xed800b00|(vr<<12)|(r<<16));
2392}
2393
2394void emit_ftosizs(int s,int d)
2395{
2396 assem_debug("ftosizs s%d,s%d\n",d,s);
2397 output_w32(0xeebd0ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2398}
2399
2400void emit_ftosizd(int s,int d)
2401{
2402 assem_debug("ftosizd s%d,d%d\n",d,s);
2403 output_w32(0xeebd0bc0|((d&14)<<11)|((d&1)<<22)|(s&7));
2404}
2405
2406void emit_fsitos(int s,int d)
2407{
2408 assem_debug("fsitos s%d,s%d\n",d,s);
2409 output_w32(0xeeb80ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2410}
2411
2412void emit_fsitod(int s,int d)
2413{
2414 assem_debug("fsitod d%d,s%d\n",d,s);
2415 output_w32(0xeeb80bc0|((d&7)<<12)|((s&14)>>1)|((s&1)<<5));
2416}
2417
2418void emit_fcvtds(int s,int d)
2419{
2420 assem_debug("fcvtds d%d,s%d\n",d,s);
2421 output_w32(0xeeb70ac0|((d&7)<<12)|((s&14)>>1)|((s&1)<<5));
2422}
2423
2424void emit_fcvtsd(int s,int d)
2425{
2426 assem_debug("fcvtsd s%d,d%d\n",d,s);
2427 output_w32(0xeeb70bc0|((d&14)<<11)|((d&1)<<22)|(s&7));
2428}
2429
2430void emit_fsqrts(int s,int d)
2431{
2432 assem_debug("fsqrts d%d,s%d\n",d,s);
2433 output_w32(0xeeb10ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2434}
2435
2436void emit_fsqrtd(int s,int d)
2437{
2438 assem_debug("fsqrtd s%d,d%d\n",d,s);
2439 output_w32(0xeeb10bc0|((d&7)<<12)|(s&7));
2440}
2441
2442void emit_fabss(int s,int d)
2443{
2444 assem_debug("fabss d%d,s%d\n",d,s);
2445 output_w32(0xeeb00ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2446}
2447
2448void emit_fabsd(int s,int d)
2449{
2450 assem_debug("fabsd s%d,d%d\n",d,s);
2451 output_w32(0xeeb00bc0|((d&7)<<12)|(s&7));
2452}
2453
2454void emit_fnegs(int s,int d)
2455{
2456 assem_debug("fnegs d%d,s%d\n",d,s);
2457 output_w32(0xeeb10a40|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2458}
2459
2460void emit_fnegd(int s,int d)
2461{
2462 assem_debug("fnegd s%d,d%d\n",d,s);
2463 output_w32(0xeeb10b40|((d&7)<<12)|(s&7));
2464}
2465
2466void emit_fadds(int s1,int s2,int d)
2467{
2468 assem_debug("fadds s%d,s%d,s%d\n",d,s1,s2);
2469 output_w32(0xee300a00|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2470}
2471
2472void emit_faddd(int s1,int s2,int d)
2473{
2474 assem_debug("faddd d%d,d%d,d%d\n",d,s1,s2);
2475 output_w32(0xee300b00|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2476}
2477
2478void emit_fsubs(int s1,int s2,int d)
2479{
2480 assem_debug("fsubs s%d,s%d,s%d\n",d,s1,s2);
2481 output_w32(0xee300a40|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2482}
2483
2484void emit_fsubd(int s1,int s2,int d)
2485{
2486 assem_debug("fsubd d%d,d%d,d%d\n",d,s1,s2);
2487 output_w32(0xee300b40|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2488}
2489
2490void emit_fmuls(int s1,int s2,int d)
2491{
2492 assem_debug("fmuls s%d,s%d,s%d\n",d,s1,s2);
2493 output_w32(0xee200a00|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2494}
2495
2496void emit_fmuld(int s1,int s2,int d)
2497{
2498 assem_debug("fmuld d%d,d%d,d%d\n",d,s1,s2);
2499 output_w32(0xee200b00|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2500}
2501
2502void emit_fdivs(int s1,int s2,int d)
2503{
2504 assem_debug("fdivs s%d,s%d,s%d\n",d,s1,s2);
2505 output_w32(0xee800a00|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2506}
2507
2508void emit_fdivd(int s1,int s2,int d)
2509{
2510 assem_debug("fdivd d%d,d%d,d%d\n",d,s1,s2);
2511 output_w32(0xee800b00|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2512}
2513
2514void emit_fcmps(int x,int y)
2515{
2516 assem_debug("fcmps s14, s15\n");
2517 output_w32(0xeeb47a67);
2518}
2519
2520void emit_fcmpd(int x,int y)
2521{
2522 assem_debug("fcmpd d6, d7\n");
2523 output_w32(0xeeb46b47);
2524}
2525
2526void emit_fmstat()
2527{
2528 assem_debug("fmstat\n");
2529 output_w32(0xeef1fa10);
2530}
2531
2532void emit_bicne_imm(int rs,int imm,int rt)
2533{
2534 u_int armval;
cfbd3c6e 2535 genimm_checked(imm,&armval);
57871462 2536 assem_debug("bicne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2537 output_w32(0x13c00000|rd_rn_rm(rt,rs,0)|armval);
2538}
2539
2540void emit_biccs_imm(int rs,int imm,int rt)
2541{
2542 u_int armval;
cfbd3c6e 2543 genimm_checked(imm,&armval);
57871462 2544 assem_debug("biccs %s,%s,#%d\n",regname[rt],regname[rs],imm);
2545 output_w32(0x23c00000|rd_rn_rm(rt,rs,0)|armval);
2546}
2547
2548void emit_bicvc_imm(int rs,int imm,int rt)
2549{
2550 u_int armval;
cfbd3c6e 2551 genimm_checked(imm,&armval);
57871462 2552 assem_debug("bicvc %s,%s,#%d\n",regname[rt],regname[rs],imm);
2553 output_w32(0x73c00000|rd_rn_rm(rt,rs,0)|armval);
2554}
2555
2556void emit_bichi_imm(int rs,int imm,int rt)
2557{
2558 u_int armval;
cfbd3c6e 2559 genimm_checked(imm,&armval);
57871462 2560 assem_debug("bichi %s,%s,#%d\n",regname[rt],regname[rs],imm);
2561 output_w32(0x83c00000|rd_rn_rm(rt,rs,0)|armval);
2562}
2563
2564void emit_orrvs_imm(int rs,int imm,int rt)
2565{
2566 u_int armval;
cfbd3c6e 2567 genimm_checked(imm,&armval);
57871462 2568 assem_debug("orrvs %s,%s,#%d\n",regname[rt],regname[rs],imm);
2569 output_w32(0x63800000|rd_rn_rm(rt,rs,0)|armval);
2570}
2571
b9b61529 2572void emit_orrne_imm(int rs,int imm,int rt)
2573{
2574 u_int armval;
cfbd3c6e 2575 genimm_checked(imm,&armval);
b9b61529 2576 assem_debug("orrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2577 output_w32(0x13800000|rd_rn_rm(rt,rs,0)|armval);
2578}
2579
2580void emit_andne_imm(int rs,int imm,int rt)
2581{
2582 u_int armval;
cfbd3c6e 2583 genimm_checked(imm,&armval);
b9b61529 2584 assem_debug("andne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2585 output_w32(0x12000000|rd_rn_rm(rt,rs,0)|armval);
2586}
2587
57871462 2588void emit_jno_unlikely(int a)
2589{
2590 //emit_jno(a);
2591 assem_debug("addvc pc,pc,#? (%x)\n",/*a-(int)out-8,*/a);
2592 output_w32(0x72800000|rd_rn_rm(15,15,0));
2593}
2594
054175e9 2595static void save_regs_all(u_int reglist)
57871462 2596{
054175e9 2597 int i;
57871462 2598 if(!reglist) return;
2599 assem_debug("stmia fp,{");
054175e9 2600 for(i=0;i<16;i++)
2601 if(reglist&(1<<i))
2602 assem_debug("r%d,",i);
57871462 2603 assem_debug("}\n");
2604 output_w32(0xe88b0000|reglist);
2605}
054175e9 2606static void restore_regs_all(u_int reglist)
57871462 2607{
054175e9 2608 int i;
57871462 2609 if(!reglist) return;
2610 assem_debug("ldmia fp,{");
054175e9 2611 for(i=0;i<16;i++)
2612 if(reglist&(1<<i))
2613 assem_debug("r%d,",i);
57871462 2614 assem_debug("}\n");
2615 output_w32(0xe89b0000|reglist);
2616}
054175e9 2617// Save registers before function call
2618static void save_regs(u_int reglist)
2619{
2620 reglist&=0x100f; // only save the caller-save registers, r0-r3, r12
2621 save_regs_all(reglist);
2622}
2623// Restore registers after function call
2624static void restore_regs(u_int reglist)
2625{
2626 reglist&=0x100f; // only restore the caller-save registers, r0-r3, r12
2627 restore_regs_all(reglist);
2628}
57871462 2629
2630// Write back consts using r14 so we don't disturb the other registers
2631void wb_consts(signed char i_regmap[],uint64_t i_is32,u_int i_dirty,int i)
2632{
2633 int hr;
2634 for(hr=0;hr<HOST_REGS;hr++) {
2635 if(hr!=EXCLUDE_REG&&i_regmap[hr]>=0&&((i_dirty>>hr)&1)) {
2636 if(((regs[i].isconst>>hr)&1)&&i_regmap[hr]>0) {
2637 if(i_regmap[hr]<64 || !((i_is32>>(i_regmap[hr]&63))&1) ) {
2638 int value=constmap[i][hr];
2639 if(value==0) {
2640 emit_zeroreg(HOST_TEMPREG);
2641 }
2642 else {
2643 emit_movimm(value,HOST_TEMPREG);
2644 }
2645 emit_storereg(i_regmap[hr],HOST_TEMPREG);
24385cae 2646#ifndef FORCE32
57871462 2647 if((i_is32>>i_regmap[hr])&1) {
2648 if(value!=-1&&value!=0) emit_sarimm(HOST_TEMPREG,31,HOST_TEMPREG);
2649 emit_storereg(i_regmap[hr]|64,HOST_TEMPREG);
2650 }
24385cae 2651#endif
57871462 2652 }
2653 }
2654 }
2655 }
2656}
2657
2658/* Stubs/epilogue */
2659
2660void literal_pool(int n)
2661{
2662 if(!literalcount) return;
2663 if(n) {
2664 if((int)out-literals[0][0]<4096-n) return;
2665 }
2666 u_int *ptr;
2667 int i;
2668 for(i=0;i<literalcount;i++)
2669 {
77750690 2670 u_int l_addr=(u_int)out;
2671 int j;
2672 for(j=0;j<i;j++) {
2673 if(literals[j][1]==literals[i][1]) {
2674 //printf("dup %08x\n",literals[i][1]);
2675 l_addr=literals[j][0];
2676 break;
2677 }
2678 }
57871462 2679 ptr=(u_int *)literals[i][0];
77750690 2680 u_int offset=l_addr-(u_int)ptr-8;
57871462 2681 assert(offset<4096);
2682 assert(!(offset&3));
2683 *ptr|=offset;
77750690 2684 if(l_addr==(u_int)out) {
2685 literals[i][0]=l_addr; // remember for dupes
2686 output_w32(literals[i][1]);
2687 }
57871462 2688 }
2689 literalcount=0;
2690}
2691
2692void literal_pool_jumpover(int n)
2693{
2694 if(!literalcount) return;
2695 if(n) {
2696 if((int)out-literals[0][0]<4096-n) return;
2697 }
2698 int jaddr=(int)out;
2699 emit_jmp(0);
2700 literal_pool(0);
2701 set_jump_target(jaddr,(int)out);
2702}
2703
2704emit_extjump2(int addr, int target, int linker)
2705{
2706 u_char *ptr=(u_char *)addr;
2707 assert((ptr[3]&0x0e)==0xa);
2708 emit_loadlp(target,0);
2709 emit_loadlp(addr,1);
24385cae 2710 assert(addr>=BASE_ADDR&&addr<(BASE_ADDR+(1<<TARGET_SIZE_2)));
57871462 2711 //assert((target>=0x80000000&&target<0x80800000)||(target>0xA4000000&&target<0xA4001000));
2712//DEBUG >
2713#ifdef DEBUG_CYCLE_COUNT
2714 emit_readword((int)&last_count,ECX);
2715 emit_add(HOST_CCREG,ECX,HOST_CCREG);
2716 emit_readword((int)&next_interupt,ECX);
2717 emit_writeword(HOST_CCREG,(int)&Count);
2718 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
2719 emit_writeword(ECX,(int)&last_count);
2720#endif
2721//DEBUG <
2722 emit_jmp(linker);
2723}
2724
2725emit_extjump(int addr, int target)
2726{
2727 emit_extjump2(addr, target, (int)dyna_linker);
2728}
2729emit_extjump_ds(int addr, int target)
2730{
2731 emit_extjump2(addr, target, (int)dyna_linker_ds);
2732}
2733
13e35c04 2734// put rt_val into rt, potentially making use of rs with value rs_val
2735static void emit_movimm_from(u_int rs_val,int rs,u_int rt_val,int rt)
2736{
8575a877 2737 u_int armval;
2738 int diff;
2739 if(genimm(rt_val,&armval)) {
2740 assem_debug("mov %s,#%d\n",regname[rt],rt_val);
2741 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
2742 return;
2743 }
2744 if(genimm(~rt_val,&armval)) {
2745 assem_debug("mvn %s,#%d\n",regname[rt],rt_val);
2746 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
2747 return;
2748 }
2749 diff=rt_val-rs_val;
2750 if(genimm(diff,&armval)) {
2751 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],diff);
2752 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
2753 return;
2754 }else if(genimm(-diff,&armval)) {
2755 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],-diff);
2756 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
2757 return;
2758 }
2759 emit_movimm(rt_val,rt);
2760}
2761
2762// return 1 if above function can do it's job cheaply
2763static int is_similar_value(u_int v1,u_int v2)
2764{
13e35c04 2765 u_int xs;
8575a877 2766 int diff;
2767 if(v1==v2) return 1;
2768 diff=v2-v1;
2769 for(xs=diff;xs!=0&&(xs&3)==0;xs>>=2)
13e35c04 2770 ;
8575a877 2771 if(xs<0x100) return 1;
2772 for(xs=-diff;xs!=0&&(xs&3)==0;xs>>=2)
2773 ;
2774 if(xs<0x100) return 1;
2775 return 0;
13e35c04 2776}
cbbab9cd 2777
b96d3df7 2778// trashes r2
2779static void pass_args(int a0, int a1)
2780{
2781 if(a0==1&&a1==0) {
2782 // must swap
2783 emit_mov(a0,2); emit_mov(a1,1); emit_mov(2,0);
2784 }
2785 else if(a0!=0&&a1==0) {
2786 emit_mov(a1,1);
2787 if (a0>=0) emit_mov(a0,0);
2788 }
2789 else {
2790 if(a0>=0&&a0!=0) emit_mov(a0,0);
2791 if(a1>=0&&a1!=1) emit_mov(a1,1);
2792 }
2793}
2794
b1be1eee 2795static void mov_loadtype_adj(int type,int rs,int rt)
2796{
2797 switch(type) {
2798 case LOADB_STUB: emit_signextend8(rs,rt); break;
2799 case LOADBU_STUB: emit_andimm(rs,0xff,rt); break;
2800 case LOADH_STUB: emit_signextend16(rs,rt); break;
2801 case LOADHU_STUB: emit_andimm(rs,0xffff,rt); break;
2802 case LOADW_STUB: if(rs!=rt) emit_mov(rs,rt); break;
2803 default: assert(0);
2804 }
2805}
2806
2807#ifdef PCSX
2808#include "pcsxmem.h"
2809#include "pcsxmem_inline.c"
2810#endif
2811
57871462 2812do_readstub(int n)
2813{
2814 assem_debug("do_readstub %x\n",start+stubs[n][3]*4);
2815 literal_pool(256);
2816 set_jump_target(stubs[n][1],(int)out);
2817 int type=stubs[n][0];
2818 int i=stubs[n][3];
2819 int rs=stubs[n][4];
2820 struct regstat *i_regs=(struct regstat *)stubs[n][5];
2821 u_int reglist=stubs[n][7];
2822 signed char *i_regmap=i_regs->regmap;
2823 int addr=get_reg(i_regmap,AGEN1+(i&1));
2824 int rth,rt;
2825 int ds;
b9b61529 2826 if(itype[i]==C1LS||itype[i]==C2LS||itype[i]==LOADLR) {
57871462 2827 rth=get_reg(i_regmap,FTEMP|64);
2828 rt=get_reg(i_regmap,FTEMP);
2829 }else{
2830 rth=get_reg(i_regmap,rt1[i]|64);
2831 rt=get_reg(i_regmap,rt1[i]);
2832 }
2833 assert(rs>=0);
c6c3b1b3 2834#ifdef PCSX
2835 int r,temp=-1,temp2=HOST_TEMPREG,regs_saved=0,restore_jump=0;
2836 reglist|=(1<<rs);
2837 for(r=0;r<=12;r++) {
2838 if(((1<<r)&0x13ff)&&((1<<r)&reglist)==0) {
2839 temp=r; break;
2840 }
2841 }
db829eeb 2842 if(rt>=0&&rt1[i]!=0)
c6c3b1b3 2843 reglist&=~(1<<rt);
2844 if(temp==-1) {
2845 save_regs(reglist);
2846 regs_saved=1;
2847 temp=(rs==0)?2:0;
2848 }
2849 if((regs_saved||(reglist&2)==0)&&temp!=1&&rs!=1)
2850 temp2=1;
2851 emit_readword((int)&mem_rtab,temp);
2852 emit_shrimm(rs,12,temp2);
2853 emit_readword_dualindexedx4(temp,temp2,temp2);
2854 emit_lsls_imm(temp2,1,temp2);
2855 if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) {
2856 switch(type) {
2857 case LOADB_STUB: emit_ldrccsb_dualindexed(temp2,rs,rt); break;
2858 case LOADBU_STUB: emit_ldrccb_dualindexed(temp2,rs,rt); break;
2859 case LOADH_STUB: emit_ldrccsh_dualindexed(temp2,rs,rt); break;
2860 case LOADHU_STUB: emit_ldrcch_dualindexed(temp2,rs,rt); break;
2861 case LOADW_STUB: emit_ldrcc_dualindexed(temp2,rs,rt); break;
2862 }
2863 }
2864 if(regs_saved) {
2865 restore_jump=(int)out;
2866 emit_jcc(0); // jump to reg restore
2867 }
2868 else
2869 emit_jcc(stubs[n][2]); // return address
2870
2871 if(!regs_saved)
2872 save_regs(reglist);
2873 int handler=0;
2874 if(type==LOADB_STUB||type==LOADBU_STUB)
2875 handler=(int)jump_handler_read8;
2876 if(type==LOADH_STUB||type==LOADHU_STUB)
2877 handler=(int)jump_handler_read16;
2878 if(type==LOADW_STUB)
2879 handler=(int)jump_handler_read32;
2880 assert(handler!=0);
b96d3df7 2881 pass_args(rs,temp2);
c6c3b1b3 2882 int cc=get_reg(i_regmap,CCREG);
2883 if(cc<0)
2884 emit_loadreg(CCREG,2);
2573466a 2885 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n][6]+1),2);
c6c3b1b3 2886 emit_call(handler);
2887 if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) {
b1be1eee 2888 mov_loadtype_adj(type,0,rt);
c6c3b1b3 2889 }
2890 if(restore_jump)
2891 set_jump_target(restore_jump,(int)out);
2892 restore_regs(reglist);
2893 emit_jmp(stubs[n][2]); // return address
2894#else // !PCSX
57871462 2895 if(addr<0) addr=rt;
535d208a 2896 if(addr<0&&itype[i]!=C1LS&&itype[i]!=C2LS&&itype[i]!=LOADLR) addr=get_reg(i_regmap,-1);
57871462 2897 assert(addr>=0);
2898 int ftable=0;
2899 if(type==LOADB_STUB||type==LOADBU_STUB)
2900 ftable=(int)readmemb;
2901 if(type==LOADH_STUB||type==LOADHU_STUB)
2902 ftable=(int)readmemh;
2903 if(type==LOADW_STUB)
2904 ftable=(int)readmem;
24385cae 2905#ifndef FORCE32
57871462 2906 if(type==LOADD_STUB)
2907 ftable=(int)readmemd;
24385cae 2908#endif
2909 assert(ftable!=0);
57871462 2910 emit_writeword(rs,(int)&address);
2911 //emit_pusha();
2912 save_regs(reglist);
97a238a6 2913#ifndef PCSX
57871462 2914 ds=i_regs!=&regs[i];
2915 int real_rs=(itype[i]==LOADLR)?-1:get_reg(i_regmap,rs1[i]);
2916 u_int cmask=ds?-1:(0x100f|~i_regs->wasconst);
2917 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&0x100f,i);
2918 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty&cmask&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs)));
2919 if(!ds) wb_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&~0x100f,i);
97a238a6 2920#endif
57871462 2921 emit_shrimm(rs,16,1);
2922 int cc=get_reg(i_regmap,CCREG);
2923 if(cc<0) {
2924 emit_loadreg(CCREG,2);
2925 }
2926 emit_movimm(ftable,0);
2927 emit_addimm(cc<0?2:cc,2*stubs[n][6]+2,2);
f51dc36c 2928#ifndef PCSX
57871462 2929 emit_movimm(start+stubs[n][3]*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
f51dc36c 2930#endif
57871462 2931 //emit_readword((int)&last_count,temp);
2932 //emit_add(cc,temp,cc);
2933 //emit_writeword(cc,(int)&Count);
2934 //emit_mov(15,14);
2935 emit_call((int)&indirect_jump_indexed);
2936 //emit_callreg(rs);
2937 //emit_readword_dualindexedx4(rs,HOST_TEMPREG,15);
f51dc36c 2938#ifndef PCSX
57871462 2939 // We really shouldn't need to update the count here,
2940 // but not doing so causes random crashes...
2941 emit_readword((int)&Count,HOST_TEMPREG);
2942 emit_readword((int)&next_interupt,2);
2943 emit_addimm(HOST_TEMPREG,-2*stubs[n][6]-2,HOST_TEMPREG);
2944 emit_writeword(2,(int)&last_count);
2945 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2946 if(cc<0) {
2947 emit_storereg(CCREG,HOST_TEMPREG);
2948 }
f51dc36c 2949#endif
57871462 2950 //emit_popa();
2951 restore_regs(reglist);
2952 //if((cc=get_reg(regmap,CCREG))>=0) {
2953 // emit_loadreg(CCREG,cc);
2954 //}
f18c0f46 2955 if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) {
2956 assert(rt>=0);
2957 if(type==LOADB_STUB)
2958 emit_movsbl((int)&readmem_dword,rt);
2959 if(type==LOADBU_STUB)
2960 emit_movzbl((int)&readmem_dword,rt);
2961 if(type==LOADH_STUB)
2962 emit_movswl((int)&readmem_dword,rt);
2963 if(type==LOADHU_STUB)
2964 emit_movzwl((int)&readmem_dword,rt);
2965 if(type==LOADW_STUB)
2966 emit_readword((int)&readmem_dword,rt);
2967 if(type==LOADD_STUB) {
2968 emit_readword((int)&readmem_dword,rt);
2969 if(rth>=0) emit_readword(((int)&readmem_dword)+4,rth);
2970 }
57871462 2971 }
2972 emit_jmp(stubs[n][2]); // return address
c6c3b1b3 2973#endif // !PCSX
57871462 2974}
2975
c6c3b1b3 2976#ifdef PCSX
2977// return memhandler, or get directly accessable address and return 0
2978u_int get_direct_memhandler(void *table,u_int addr,int type,u_int *addr_host)
2979{
2980 u_int l1,l2=0;
2981 l1=((u_int *)table)[addr>>12];
2982 if((l1&(1<<31))==0) {
2983 u_int v=l1<<1;
2984 *addr_host=v+addr;
2985 return 0;
2986 }
2987 else {
2988 l1<<=1;
2989 if(type==LOADB_STUB||type==LOADBU_STUB||type==STOREB_STUB)
2990 l2=((u_int *)l1)[0x1000/4 + 0x1000/2 + (addr&0xfff)];
b96d3df7 2991 else if(type==LOADH_STUB||type==LOADHU_STUB||type==STOREH_STUB)
c6c3b1b3 2992 l2=((u_int *)l1)[0x1000/4 + (addr&0xfff)/2];
2993 else
2994 l2=((u_int *)l1)[(addr&0xfff)/4];
2995 if((l2&(1<<31))==0) {
2996 u_int v=l2<<1;
2997 *addr_host=v+(addr&0xfff);
2998 return 0;
2999 }
3000 return l2<<1;
3001 }
3002}
3003#endif
3004
57871462 3005inline_readstub(int type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
3006{
3007 int rs=get_reg(regmap,target);
3008 int rth=get_reg(regmap,target|64);
3009 int rt=get_reg(regmap,target);
535d208a 3010 if(rs<0) rs=get_reg(regmap,-1);
57871462 3011 assert(rs>=0);
c6c3b1b3 3012#ifdef PCSX
b1be1eee 3013 u_int handler,host_addr=0,is_dynamic,far_call=0;
3014 int cc=get_reg(regmap,CCREG);
3015 if(pcsx_direct_read(type,addr,CLOCK_ADJUST(adj+1),cc,target?rs:-1,rt))
3016 return;
c6c3b1b3 3017 handler=get_direct_memhandler(mem_rtab,addr,type,&host_addr);
3018 if (handler==0) {
db829eeb 3019 if(rt<0||rt1[i]==0)
c6c3b1b3 3020 return;
13e35c04 3021 if(addr!=host_addr)
3022 emit_movimm_from(addr,rs,host_addr,rs);
c6c3b1b3 3023 switch(type) {
3024 case LOADB_STUB: emit_movsbl_indexed(0,rs,rt); break;
3025 case LOADBU_STUB: emit_movzbl_indexed(0,rs,rt); break;
3026 case LOADH_STUB: emit_movswl_indexed(0,rs,rt); break;
3027 case LOADHU_STUB: emit_movzwl_indexed(0,rs,rt); break;
3028 case LOADW_STUB: emit_readword_indexed(0,rs,rt); break;
3029 default: assert(0);
3030 }
3031 return;
3032 }
b1be1eee 3033 is_dynamic=pcsxmem_is_handler_dynamic(addr);
3034 if(is_dynamic) {
3035 if(type==LOADB_STUB||type==LOADBU_STUB)
3036 handler=(int)jump_handler_read8;
3037 if(type==LOADH_STUB||type==LOADHU_STUB)
3038 handler=(int)jump_handler_read16;
3039 if(type==LOADW_STUB)
3040 handler=(int)jump_handler_read32;
3041 }
c6c3b1b3 3042
3043 // call a memhandler
db829eeb 3044 if(rt>=0&&rt1[i]!=0)
c6c3b1b3 3045 reglist&=~(1<<rt);
3046 save_regs(reglist);
3047 if(target==0)
3048 emit_movimm(addr,0);
3049 else if(rs!=0)
3050 emit_mov(rs,0);
c6c3b1b3 3051 int offset=(int)handler-(int)out-8;
3052 if(offset<-33554432||offset>=33554432) {
3053 // unreachable memhandler, a plugin func perhaps
b1be1eee 3054 emit_movimm(handler,12);
3055 far_call=1;
3056 }
3057 if(cc<0)
3058 emit_loadreg(CCREG,2);
3059 if(is_dynamic) {
3060 emit_movimm(((u_int *)mem_rtab)[addr>>12]<<1,1);
3061 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2);
c6c3b1b3 3062 }
b1be1eee 3063 else {
3064 emit_readword((int)&last_count,3);
3065 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2);
3066 emit_add(2,3,2);
3067 emit_writeword(2,(int)&Count);
3068 }
3069
3070 if(far_call)
3071 emit_callreg(12);
c6c3b1b3 3072 else
3073 emit_call(handler);
b1be1eee 3074
db829eeb 3075 if(rt>=0&&rt1[i]!=0) {
c6c3b1b3 3076 switch(type) {
3077 case LOADB_STUB: emit_signextend8(0,rt); break;
3078 case LOADBU_STUB: emit_andimm(0,0xff,rt); break;
3079 case LOADH_STUB: emit_signextend16(0,rt); break;
3080 case LOADHU_STUB: emit_andimm(0,0xffff,rt); break;
3081 case LOADW_STUB: if(rt!=0) emit_mov(0,rt); break;
3082 default: assert(0);
3083 }
3084 }
3085 restore_regs(reglist);
3086#else // if !PCSX
57871462 3087 int ftable=0;
3088 if(type==LOADB_STUB||type==LOADBU_STUB)
3089 ftable=(int)readmemb;
3090 if(type==LOADH_STUB||type==LOADHU_STUB)
3091 ftable=(int)readmemh;
3092 if(type==LOADW_STUB)
3093 ftable=(int)readmem;
24385cae 3094#ifndef FORCE32
57871462 3095 if(type==LOADD_STUB)
3096 ftable=(int)readmemd;
24385cae 3097#endif
3098 assert(ftable!=0);
fd99c415 3099 if(target==0)
3100 emit_movimm(addr,rs);
57871462 3101 emit_writeword(rs,(int)&address);
3102 //emit_pusha();
3103 save_regs(reglist);
0c1fe38b 3104#ifndef PCSX
3105 if((signed int)addr>=(signed int)0xC0000000) {
3106 // Theoretically we can have a pagefault here, if the TLB has never
3107 // been enabled and the address is outside the range 80000000..BFFFFFFF
3108 // Write out the registers so the pagefault can be handled. This is
3109 // a very rare case and likely represents a bug.
3110 int ds=regmap!=regs[i].regmap;
3111 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty,i);
3112 if(!ds) wb_dirtys(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty);
3113 else wb_dirtys(branch_regs[i-1].regmap_entry,branch_regs[i-1].was32,branch_regs[i-1].wasdirty);
3114 }
3115#endif
57871462 3116 //emit_shrimm(rs,16,1);
3117 int cc=get_reg(regmap,CCREG);
3118 if(cc<0) {
3119 emit_loadreg(CCREG,2);
3120 }
3121 //emit_movimm(ftable,0);
3122 emit_movimm(((u_int *)ftable)[addr>>16],0);
3123 //emit_readword((int)&last_count,12);
2573466a 3124 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2);
f51dc36c 3125#ifndef PCSX
57871462 3126 if((signed int)addr>=(signed int)0xC0000000) {
3127 // Pagefault address
3128 int ds=regmap!=regs[i].regmap;
3129 emit_movimm(start+i*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
3130 }
f51dc36c 3131#endif
57871462 3132 //emit_add(12,2,2);
3133 //emit_writeword(2,(int)&Count);
3134 //emit_call(((u_int *)ftable)[addr>>16]);
3135 emit_call((int)&indirect_jump);
f51dc36c 3136#ifndef PCSX
57871462 3137 // We really shouldn't need to update the count here,
3138 // but not doing so causes random crashes...
3139 emit_readword((int)&Count,HOST_TEMPREG);
3140 emit_readword((int)&next_interupt,2);
2573466a 3141 emit_addimm(HOST_TEMPREG,-CLOCK_ADJUST(adj+1),HOST_TEMPREG);
57871462 3142 emit_writeword(2,(int)&last_count);
3143 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
3144 if(cc<0) {
3145 emit_storereg(CCREG,HOST_TEMPREG);
3146 }
f51dc36c 3147#endif
57871462 3148 //emit_popa();
3149 restore_regs(reglist);
fd99c415 3150 if(rt>=0) {
3151 if(type==LOADB_STUB)
3152 emit_movsbl((int)&readmem_dword,rt);
3153 if(type==LOADBU_STUB)
3154 emit_movzbl((int)&readmem_dword,rt);
3155 if(type==LOADH_STUB)
3156 emit_movswl((int)&readmem_dword,rt);
3157 if(type==LOADHU_STUB)
3158 emit_movzwl((int)&readmem_dword,rt);
3159 if(type==LOADW_STUB)
3160 emit_readword((int)&readmem_dword,rt);
3161 if(type==LOADD_STUB) {
3162 emit_readword((int)&readmem_dword,rt);
3163 if(rth>=0) emit_readword(((int)&readmem_dword)+4,rth);
3164 }
57871462 3165 }
c6c3b1b3 3166#endif // !PCSX
57871462 3167}
3168
3169do_writestub(int n)
3170{
3171 assem_debug("do_writestub %x\n",start+stubs[n][3]*4);
3172 literal_pool(256);
3173 set_jump_target(stubs[n][1],(int)out);
3174 int type=stubs[n][0];
3175 int i=stubs[n][3];
3176 int rs=stubs[n][4];
3177 struct regstat *i_regs=(struct regstat *)stubs[n][5];
3178 u_int reglist=stubs[n][7];
3179 signed char *i_regmap=i_regs->regmap;
3180 int addr=get_reg(i_regmap,AGEN1+(i&1));
3181 int rth,rt,r;
3182 int ds;
b9b61529 3183 if(itype[i]==C1LS||itype[i]==C2LS) {
57871462 3184 rth=get_reg(i_regmap,FTEMP|64);
3185 rt=get_reg(i_regmap,r=FTEMP);
3186 }else{
3187 rth=get_reg(i_regmap,rs2[i]|64);
3188 rt=get_reg(i_regmap,r=rs2[i]);
3189 }
3190 assert(rs>=0);
3191 assert(rt>=0);
b96d3df7 3192#ifdef PCSX
3193 int rtmp,temp=-1,temp2=HOST_TEMPREG,regs_saved=0,restore_jump=0,ra;
3194 int reglist2=reglist|(1<<rs)|(1<<rt);
3195 for(rtmp=0;rtmp<=12;rtmp++) {
3196 if(((1<<rtmp)&0x13ff)&&((1<<rtmp)&reglist2)==0) {
3197 temp=rtmp; break;
3198 }
3199 }
3200 if(temp==-1) {
3201 save_regs(reglist);
3202 regs_saved=1;
3203 for(rtmp=0;rtmp<=3;rtmp++)
3204 if(rtmp!=rs&&rtmp!=rt)
3205 {temp=rtmp;break;}
3206 }
3207 if((regs_saved||(reglist2&8)==0)&&temp!=3&&rs!=3&&rt!=3)
3208 temp2=3;
3209 emit_readword((int)&mem_wtab,temp);
3210 emit_shrimm(rs,12,temp2);
3211 emit_readword_dualindexedx4(temp,temp2,temp2);
3212 emit_lsls_imm(temp2,1,temp2);
3213 switch(type) {
3214 case STOREB_STUB: emit_strccb_dualindexed(temp2,rs,rt); break;
3215 case STOREH_STUB: emit_strcch_dualindexed(temp2,rs,rt); break;
3216 case STOREW_STUB: emit_strcc_dualindexed(temp2,rs,rt); break;
3217 default: assert(0);
3218 }
3219 if(regs_saved) {
3220 restore_jump=(int)out;
3221 emit_jcc(0); // jump to reg restore
3222 }
3223 else
3224 emit_jcc(stubs[n][2]); // return address (invcode check)
3225
3226 if(!regs_saved)
3227 save_regs(reglist);
3228 int handler=0;
3229 switch(type) {
3230 case STOREB_STUB: handler=(int)jump_handler_write8; break;
3231 case STOREH_STUB: handler=(int)jump_handler_write16; break;
3232 case STOREW_STUB: handler=(int)jump_handler_write32; break;
3233 }
3234 assert(handler!=0);
3235 pass_args(rs,rt);
3236 if(temp2!=3)
3237 emit_mov(temp2,3);
3238 int cc=get_reg(i_regmap,CCREG);
3239 if(cc<0)
3240 emit_loadreg(CCREG,2);
2573466a 3241 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n][6]+1),2);
b96d3df7 3242 // returns new cycle_count
3243 emit_call(handler);
2573466a 3244 emit_addimm(0,-CLOCK_ADJUST((int)stubs[n][6]+1),cc<0?2:cc);
b96d3df7 3245 if(cc<0)
3246 emit_storereg(CCREG,2);
3247 if(restore_jump)
3248 set_jump_target(restore_jump,(int)out);
3249 restore_regs(reglist);
3250 ra=stubs[n][2];
b96d3df7 3251 emit_jmp(ra);
3252#else // if !PCSX
57871462 3253 if(addr<0) addr=get_reg(i_regmap,-1);
3254 assert(addr>=0);
3255 int ftable=0;
3256 if(type==STOREB_STUB)
3257 ftable=(int)writememb;
3258 if(type==STOREH_STUB)
3259 ftable=(int)writememh;
3260 if(type==STOREW_STUB)
3261 ftable=(int)writemem;
24385cae 3262#ifndef FORCE32
57871462 3263 if(type==STORED_STUB)
3264 ftable=(int)writememd;
24385cae 3265#endif
3266 assert(ftable!=0);
57871462 3267 emit_writeword(rs,(int)&address);
3268 //emit_shrimm(rs,16,rs);
3269 //emit_movmem_indexedx4(ftable,rs,rs);
3270 if(type==STOREB_STUB)
3271 emit_writebyte(rt,(int)&byte);
3272 if(type==STOREH_STUB)
3273 emit_writehword(rt,(int)&hword);
3274 if(type==STOREW_STUB)
3275 emit_writeword(rt,(int)&word);
3276 if(type==STORED_STUB) {
3d624f89 3277#ifndef FORCE32
57871462 3278 emit_writeword(rt,(int)&dword);
3279 emit_writeword(r?rth:rt,(int)&dword+4);
3d624f89 3280#else
3281 printf("STORED_STUB\n");
3282#endif
57871462 3283 }
3284 //emit_pusha();
3285 save_regs(reglist);
97a238a6 3286#ifndef PCSX
57871462 3287 ds=i_regs!=&regs[i];
3288 int real_rs=get_reg(i_regmap,rs1[i]);
3289 u_int cmask=ds?-1:(0x100f|~i_regs->wasconst);
3290 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&0x100f,i);
3291 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty&cmask&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs)));
3292 if(!ds) wb_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&~0x100f,i);
97a238a6 3293#endif
57871462 3294 emit_shrimm(rs,16,1);
3295 int cc=get_reg(i_regmap,CCREG);
3296 if(cc<0) {
3297 emit_loadreg(CCREG,2);
3298 }
3299 emit_movimm(ftable,0);
3300 emit_addimm(cc<0?2:cc,2*stubs[n][6]+2,2);
f51dc36c 3301#ifndef PCSX
57871462 3302 emit_movimm(start+stubs[n][3]*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
f51dc36c 3303#endif
57871462 3304 //emit_readword((int)&last_count,temp);
3305 //emit_addimm(cc,2*stubs[n][5]+2,cc);
3306 //emit_add(cc,temp,cc);
3307 //emit_writeword(cc,(int)&Count);
3308 emit_call((int)&indirect_jump_indexed);
3309 //emit_callreg(rs);
3310 emit_readword((int)&Count,HOST_TEMPREG);
3311 emit_readword((int)&next_interupt,2);
3312 emit_addimm(HOST_TEMPREG,-2*stubs[n][6]-2,HOST_TEMPREG);
3313 emit_writeword(2,(int)&last_count);
3314 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
3315 if(cc<0) {
3316 emit_storereg(CCREG,HOST_TEMPREG);
3317 }
3318 //emit_popa();
3319 restore_regs(reglist);
3320 //if((cc=get_reg(regmap,CCREG))>=0) {
3321 // emit_loadreg(CCREG,cc);
3322 //}
3323 emit_jmp(stubs[n][2]); // return address
b96d3df7 3324#endif // !PCSX
57871462 3325}
3326
3327inline_writestub(int type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
3328{
3329 int rs=get_reg(regmap,-1);
3330 int rth=get_reg(regmap,target|64);
3331 int rt=get_reg(regmap,target);
3332 assert(rs>=0);
3333 assert(rt>=0);
cbbab9cd 3334#ifdef PCSX
b96d3df7 3335 u_int handler,host_addr=0;
b96d3df7 3336 handler=get_direct_memhandler(mem_wtab,addr,type,&host_addr);
3337 if (handler==0) {
13e35c04 3338 if(addr!=host_addr)
3339 emit_movimm_from(addr,rs,host_addr,rs);
b96d3df7 3340 switch(type) {
3341 case STOREB_STUB: emit_writebyte_indexed(rt,0,rs); break;
3342 case STOREH_STUB: emit_writehword_indexed(rt,0,rs); break;
3343 case STOREW_STUB: emit_writeword_indexed(rt,0,rs); break;
3344 default: assert(0);
3345 }
3346 return;
3347 }
3348
3349 // call a memhandler
3350 save_regs(reglist);
13e35c04 3351 pass_args(rs,rt);
b96d3df7 3352 int cc=get_reg(regmap,CCREG);
3353 if(cc<0)
3354 emit_loadreg(CCREG,2);
2573466a 3355 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2);
b96d3df7 3356 emit_movimm(handler,3);
3357 // returns new cycle_count
3358 emit_call((int)jump_handler_write_h);
2573466a 3359 emit_addimm(0,-CLOCK_ADJUST(adj+1),cc<0?2:cc);
b96d3df7 3360 if(cc<0)
3361 emit_storereg(CCREG,2);
3362 restore_regs(reglist);
3363#else // if !pcsx
57871462 3364 int ftable=0;
3365 if(type==STOREB_STUB)
3366 ftable=(int)writememb;
3367 if(type==STOREH_STUB)
3368 ftable=(int)writememh;
3369 if(type==STOREW_STUB)
3370 ftable=(int)writemem;
24385cae 3371#ifndef FORCE32
57871462 3372 if(type==STORED_STUB)
3373 ftable=(int)writememd;
24385cae 3374#endif
3375 assert(ftable!=0);
57871462 3376 emit_writeword(rs,(int)&address);
3377 //emit_shrimm(rs,16,rs);
3378 //emit_movmem_indexedx4(ftable,rs,rs);
3379 if(type==STOREB_STUB)
3380 emit_writebyte(rt,(int)&byte);
3381 if(type==STOREH_STUB)
3382 emit_writehword(rt,(int)&hword);
3383 if(type==STOREW_STUB)
3384 emit_writeword(rt,(int)&word);
3385 if(type==STORED_STUB) {
3d624f89 3386#ifndef FORCE32
57871462 3387 emit_writeword(rt,(int)&dword);
3388 emit_writeword(target?rth:rt,(int)&dword+4);
3d624f89 3389#else
3390 printf("STORED_STUB\n");
3391#endif
57871462 3392 }
3393 //emit_pusha();
3394 save_regs(reglist);
0c1fe38b 3395#ifndef PCSX
3396 // rearmed note: load_all_consts prevents BIOS boot, some bug?
3397 if((signed int)addr>=(signed int)0xC0000000) {
3398 // Theoretically we can have a pagefault here, if the TLB has never
3399 // been enabled and the address is outside the range 80000000..BFFFFFFF
3400 // Write out the registers so the pagefault can be handled. This is
3401 // a very rare case and likely represents a bug.
3402 int ds=regmap!=regs[i].regmap;
3403 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty,i);
3404 if(!ds) wb_dirtys(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty);
3405 else wb_dirtys(branch_regs[i-1].regmap_entry,branch_regs[i-1].was32,branch_regs[i-1].wasdirty);
3406 }
3407#endif
57871462 3408 //emit_shrimm(rs,16,1);
3409 int cc=get_reg(regmap,CCREG);
3410 if(cc<0) {
3411 emit_loadreg(CCREG,2);
3412 }
3413 //emit_movimm(ftable,0);
3414 emit_movimm(((u_int *)ftable)[addr>>16],0);
3415 //emit_readword((int)&last_count,12);
2573466a 3416 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2);
f51dc36c 3417#ifndef PCSX
57871462 3418 if((signed int)addr>=(signed int)0xC0000000) {
3419 // Pagefault address
3420 int ds=regmap!=regs[i].regmap;
3421 emit_movimm(start+i*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
3422 }
f51dc36c 3423#endif
57871462 3424 //emit_add(12,2,2);
3425 //emit_writeword(2,(int)&Count);
3426 //emit_call(((u_int *)ftable)[addr>>16]);
3427 emit_call((int)&indirect_jump);
3428 emit_readword((int)&Count,HOST_TEMPREG);
3429 emit_readword((int)&next_interupt,2);
2573466a 3430 emit_addimm(HOST_TEMPREG,-CLOCK_ADJUST(adj+1),HOST_TEMPREG);
57871462 3431 emit_writeword(2,(int)&last_count);
3432 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
3433 if(cc<0) {
3434 emit_storereg(CCREG,HOST_TEMPREG);
3435 }
3436 //emit_popa();
3437 restore_regs(reglist);
b96d3df7 3438#endif
57871462 3439}
3440
3441do_unalignedwritestub(int n)
3442{
b7918751 3443 assem_debug("do_unalignedwritestub %x\n",start+stubs[n][3]*4);
3444 literal_pool(256);
57871462 3445 set_jump_target(stubs[n][1],(int)out);
b7918751 3446
3447 int i=stubs[n][3];
3448 struct regstat *i_regs=(struct regstat *)stubs[n][4];
3449 int addr=stubs[n][5];
3450 u_int reglist=stubs[n][7];
3451 signed char *i_regmap=i_regs->regmap;
3452 int temp2=get_reg(i_regmap,FTEMP);
3453 int rt;
3454 int ds, real_rs;
3455 rt=get_reg(i_regmap,rs2[i]);
3456 assert(rt>=0);
3457 assert(addr>=0);
3458 assert(opcode[i]==0x2a||opcode[i]==0x2e); // SWL/SWR only implemented
3459 reglist|=(1<<addr);
3460 reglist&=~(1<<temp2);
3461
b96d3df7 3462#if 1
3463 // don't bother with it and call write handler
3464 save_regs(reglist);
3465 pass_args(addr,rt);
3466 int cc=get_reg(i_regmap,CCREG);
3467 if(cc<0)
3468 emit_loadreg(CCREG,2);
2573466a 3469 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n][6]+1),2);
b96d3df7 3470 emit_call((int)(opcode[i]==0x2a?jump_handle_swl:jump_handle_swr));
2573466a 3471 emit_addimm(0,-CLOCK_ADJUST((int)stubs[n][6]+1),cc<0?2:cc);
b96d3df7 3472 if(cc<0)
3473 emit_storereg(CCREG,2);
3474 restore_regs(reglist);
3475 emit_jmp(stubs[n][2]); // return address
3476#else
b7918751 3477 emit_andimm(addr,0xfffffffc,temp2);
3478 emit_writeword(temp2,(int)&address);
3479
3480 save_regs(reglist);
97a238a6 3481#ifndef PCSX
b7918751 3482 ds=i_regs!=&regs[i];
3483 real_rs=get_reg(i_regmap,rs1[i]);
3484 u_int cmask=ds?-1:(0x100f|~i_regs->wasconst);
3485 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&0x100f,i);
3486 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty&cmask&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs)));
3487 if(!ds) wb_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&~0x100f,i);
97a238a6 3488#endif
b7918751 3489 emit_shrimm(addr,16,1);
3490 int cc=get_reg(i_regmap,CCREG);
3491 if(cc<0) {
3492 emit_loadreg(CCREG,2);
3493 }
3494 emit_movimm((u_int)readmem,0);
3495 emit_addimm(cc<0?2:cc,2*stubs[n][6]+2,2);
f51dc36c 3496#ifndef PCSX
3497 // pagefault address
3498 emit_movimm(start+stubs[n][3]*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
3499#endif
b7918751 3500 emit_call((int)&indirect_jump_indexed);
3501 restore_regs(reglist);
3502
3503 emit_readword((int)&readmem_dword,temp2);
3504 int temp=addr; //hmh
3505 emit_shlimm(addr,3,temp);
3506 emit_andimm(temp,24,temp);
3507#ifdef BIG_ENDIAN_MIPS
3508 if (opcode[i]==0x2e) // SWR
3509#else
3510 if (opcode[i]==0x2a) // SWL
3511#endif
3512 emit_xorimm(temp,24,temp);
3513 emit_movimm(-1,HOST_TEMPREG);
55439448 3514 if (opcode[i]==0x2a) { // SWL
b7918751 3515 emit_bic_lsr(temp2,HOST_TEMPREG,temp,temp2);
3516 emit_orrshr(rt,temp,temp2);
3517 }else{
3518 emit_bic_lsl(temp2,HOST_TEMPREG,temp,temp2);
3519 emit_orrshl(rt,temp,temp2);
3520 }
3521 emit_readword((int)&address,addr);
3522 emit_writeword(temp2,(int)&word);
3523 //save_regs(reglist); // don't need to, no state changes
3524 emit_shrimm(addr,16,1);
3525 emit_movimm((u_int)writemem,0);
3526 //emit_call((int)&indirect_jump_indexed);
3527 emit_mov(15,14);
3528 emit_readword_dualindexedx4(0,1,15);
3529 emit_readword((int)&Count,HOST_TEMPREG);
3530 emit_readword((int)&next_interupt,2);
3531 emit_addimm(HOST_TEMPREG,-2*stubs[n][6]-2,HOST_TEMPREG);
3532 emit_writeword(2,(int)&last_count);
3533 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
3534 if(cc<0) {
3535 emit_storereg(CCREG,HOST_TEMPREG);
3536 }
3537 restore_regs(reglist);
57871462 3538 emit_jmp(stubs[n][2]); // return address
b96d3df7 3539#endif
57871462 3540}
3541
3542void printregs(int edi,int esi,int ebp,int esp,int b,int d,int c,int a)
3543{
3544 printf("regs: %x %x %x %x %x %x %x (%x)\n",a,b,c,d,ebp,esi,edi,(&edi)[-1]);
3545}
3546
3547do_invstub(int n)
3548{
3549 literal_pool(20);
3550 u_int reglist=stubs[n][3];
3551 set_jump_target(stubs[n][1],(int)out);
3552 save_regs(reglist);
3553 if(stubs[n][4]!=0) emit_mov(stubs[n][4],0);
3554 emit_call((int)&invalidate_addr);
3555 restore_regs(reglist);
3556 emit_jmp(stubs[n][2]); // return address
3557}
3558
3559int do_dirty_stub(int i)
3560{
3561 assem_debug("do_dirty_stub %x\n",start+i*4);
ac545b3a 3562 u_int addr=(int)start<(int)0xC0000000?(u_int)source:(u_int)start;
3563 #ifdef PCSX
3564 addr=(u_int)source;
3565 #endif
57871462 3566 // Careful about the code output here, verify_dirty needs to parse it.
3567 #ifdef ARMv5_ONLY
ac545b3a 3568 emit_loadlp(addr,1);
57871462 3569 emit_loadlp((int)copy,2);
3570 emit_loadlp(slen*4,3);
3571 #else
ac545b3a 3572 emit_movw(addr&0x0000FFFF,1);
57871462 3573 emit_movw(((u_int)copy)&0x0000FFFF,2);
ac545b3a 3574 emit_movt(addr&0xFFFF0000,1);
57871462 3575 emit_movt(((u_int)copy)&0xFFFF0000,2);
3576 emit_movw(slen*4,3);
3577 #endif
3578 emit_movimm(start+i*4,0);
3579 emit_call((int)start<(int)0xC0000000?(int)&verify_code:(int)&verify_code_vm);
3580 int entry=(int)out;
3581 load_regs_entry(i);
3582 if(entry==(int)out) entry=instr_addr[i];
3583 emit_jmp(instr_addr[i]);
3584 return entry;
3585}
3586
3587void do_dirty_stub_ds()
3588{
3589 // Careful about the code output here, verify_dirty needs to parse it.
3590 #ifdef ARMv5_ONLY
3591 emit_loadlp((int)start<(int)0xC0000000?(int)source:(int)start,1);
3592 emit_loadlp((int)copy,2);
3593 emit_loadlp(slen*4,3);
3594 #else
3595 emit_movw(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0x0000FFFF,1);
3596 emit_movw(((u_int)copy)&0x0000FFFF,2);
3597 emit_movt(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0xFFFF0000,1);
3598 emit_movt(((u_int)copy)&0xFFFF0000,2);
3599 emit_movw(slen*4,3);
3600 #endif
3601 emit_movimm(start+1,0);
3602 emit_call((int)&verify_code_ds);
3603}
3604
3605do_cop1stub(int n)
3606{
3607 literal_pool(256);
3608 assem_debug("do_cop1stub %x\n",start+stubs[n][3]*4);
3609 set_jump_target(stubs[n][1],(int)out);
3610 int i=stubs[n][3];
3d624f89 3611// int rs=stubs[n][4];
57871462 3612 struct regstat *i_regs=(struct regstat *)stubs[n][5];
3613 int ds=stubs[n][6];
3614 if(!ds) {
3615 load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty,i);
3616 //if(i_regs!=&regs[i]) printf("oops: regs[i]=%x i_regs=%x",(int)&regs[i],(int)i_regs);
3617 }
3618 //else {printf("fp exception in delay slot\n");}
3619 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty);
3620 if(regs[i].regmap_entry[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
3621 emit_movimm(start+(i-ds)*4,EAX); // Get PC
2573466a 3622 emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]),HOST_CCREG); // CHECK: is this right? There should probably be an extra cycle...
57871462 3623 emit_jmp(ds?(int)fp_exception_ds:(int)fp_exception);
3624}
3625
63cb0298 3626#ifndef DISABLE_TLB
3627
57871462 3628/* TLB */
3629
3630int do_tlb_r(int s,int ar,int map,int x,int a,int shift,int c,u_int addr)
3631{
3632 if(c) {
3633 if((signed int)addr>=(signed int)0xC0000000) {
3634 // address_generation already loaded the const
3635 emit_readword_dualindexedx4(FP,map,map);
3636 }
3637 else
3638 return -1; // No mapping
3639 }
3640 else {
3641 assert(s!=map);
3642 emit_movimm(((int)memory_map-(int)&dynarec_local)>>2,map);
3643 emit_addsr12(map,s,map);
3644 // Schedule this while we wait on the load
3645 //if(x) emit_xorimm(s,x,ar);
3646 if(shift>=0) emit_shlimm(s,3,shift);
3647 if(~a) emit_andimm(s,a,ar);
3648 emit_readword_dualindexedx4(FP,map,map);
3649 }
3650 return map;
3651}
3652int do_tlb_r_branch(int map, int c, u_int addr, int *jaddr)
3653{
3654 if(!c||(signed int)addr>=(signed int)0xC0000000) {
3655 emit_test(map,map);
3656 *jaddr=(int)out;
3657 emit_js(0);
3658 }
3659 return map;
3660}
3661
3662int gen_tlb_addr_r(int ar, int map) {
3663 if(map>=0) {
3664 assem_debug("add %s,%s,%s lsl #2\n",regname[ar],regname[ar],regname[map]);
3665 output_w32(0xe0800100|rd_rn_rm(ar,ar,map));
3666 }
3667}
3668
3669int do_tlb_w(int s,int ar,int map,int x,int c,u_int addr)
3670{
3671 if(c) {
3672 if(addr<0x80800000||addr>=0xC0000000) {
3673 // address_generation already loaded the const
3674 emit_readword_dualindexedx4(FP,map,map);
3675 }
3676 else
3677 return -1; // No mapping
3678 }
3679 else {
3680 assert(s!=map);
3681 emit_movimm(((int)memory_map-(int)&dynarec_local)>>2,map);
3682 emit_addsr12(map,s,map);
3683 // Schedule this while we wait on the load
3684 //if(x) emit_xorimm(s,x,ar);
3685 emit_readword_dualindexedx4(FP,map,map);
3686 }
3687 return map;
3688}
3689int do_tlb_w_branch(int map, int c, u_int addr, int *jaddr)
3690{
3691 if(!c||addr<0x80800000||addr>=0xC0000000) {
3692 emit_testimm(map,0x40000000);
3693 *jaddr=(int)out;
3694 emit_jne(0);
3695 }
3696}
3697
3698int gen_tlb_addr_w(int ar, int map) {
3699 if(map>=0) {
3700 assem_debug("add %s,%s,%s lsl #2\n",regname[ar],regname[ar],regname[map]);
3701 output_w32(0xe0800100|rd_rn_rm(ar,ar,map));
3702 }
3703}
3704
3705// Generate the address of the memory_map entry, relative to dynarec_local
3706generate_map_const(u_int addr,int reg) {
3707 //printf("generate_map_const(%x,%s)\n",addr,regname[reg]);
3708 emit_movimm((addr>>12)+(((u_int)memory_map-(u_int)&dynarec_local)>>2),reg);
3709}
3710
63cb0298 3711#else
3712
3713static int do_tlb_r() { return 0; }
3714static int do_tlb_r_branch() { return 0; }
3715static int gen_tlb_addr_r() { return 0; }
3716static int do_tlb_w() { return 0; }
3717static int do_tlb_w_branch() { return 0; }
3718static int gen_tlb_addr_w() { return 0; }
3719
3720#endif // DISABLE_TLB
3721
57871462 3722/* Special assem */
3723
3724void shift_assemble_arm(int i,struct regstat *i_regs)
3725{
3726 if(rt1[i]) {
3727 if(opcode2[i]<=0x07) // SLLV/SRLV/SRAV
3728 {
3729 signed char s,t,shift;
3730 t=get_reg(i_regs->regmap,rt1[i]);
3731 s=get_reg(i_regs->regmap,rs1[i]);
3732 shift=get_reg(i_regs->regmap,rs2[i]);
3733 if(t>=0){
3734 if(rs1[i]==0)
3735 {
3736 emit_zeroreg(t);
3737 }
3738 else if(rs2[i]==0)
3739 {
3740 assert(s>=0);
3741 if(s!=t) emit_mov(s,t);
3742 }
3743 else
3744 {
3745 emit_andimm(shift,31,HOST_TEMPREG);
3746 if(opcode2[i]==4) // SLLV
3747 {
3748 emit_shl(s,HOST_TEMPREG,t);
3749 }
3750 if(opcode2[i]==6) // SRLV
3751 {
3752 emit_shr(s,HOST_TEMPREG,t);
3753 }
3754 if(opcode2[i]==7) // SRAV
3755 {
3756 emit_sar(s,HOST_TEMPREG,t);
3757 }
3758 }
3759 }
3760 } else { // DSLLV/DSRLV/DSRAV
3761 signed char sh,sl,th,tl,shift;
3762 th=get_reg(i_regs->regmap,rt1[i]|64);
3763 tl=get_reg(i_regs->regmap,rt1[i]);
3764 sh=get_reg(i_regs->regmap,rs1[i]|64);
3765 sl=get_reg(i_regs->regmap,rs1[i]);
3766 shift=get_reg(i_regs->regmap,rs2[i]);
3767 if(tl>=0){
3768 if(rs1[i]==0)
3769 {
3770 emit_zeroreg(tl);
3771 if(th>=0) emit_zeroreg(th);
3772 }
3773 else if(rs2[i]==0)
3774 {
3775 assert(sl>=0);
3776 if(sl!=tl) emit_mov(sl,tl);
3777 if(th>=0&&sh!=th) emit_mov(sh,th);
3778 }
3779 else
3780 {
3781 // FIXME: What if shift==tl ?
3782 assert(shift!=tl);
3783 int temp=get_reg(i_regs->regmap,-1);
3784 int real_th=th;
3785 if(th<0&&opcode2[i]!=0x14) {th=temp;} // DSLLV doesn't need a temporary register
3786 assert(sl>=0);
3787 assert(sh>=0);
3788 emit_andimm(shift,31,HOST_TEMPREG);
3789 if(opcode2[i]==0x14) // DSLLV
3790 {
3791 if(th>=0) emit_shl(sh,HOST_TEMPREG,th);
3792 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3793 emit_orrshr(sl,HOST_TEMPREG,th);
3794 emit_andimm(shift,31,HOST_TEMPREG);
3795 emit_testimm(shift,32);
3796 emit_shl(sl,HOST_TEMPREG,tl);
3797 if(th>=0) emit_cmovne_reg(tl,th);
3798 emit_cmovne_imm(0,tl);
3799 }
3800 if(opcode2[i]==0x16) // DSRLV
3801 {
3802 assert(th>=0);
3803 emit_shr(sl,HOST_TEMPREG,tl);
3804 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3805 emit_orrshl(sh,HOST_TEMPREG,tl);
3806 emit_andimm(shift,31,HOST_TEMPREG);
3807 emit_testimm(shift,32);
3808 emit_shr(sh,HOST_TEMPREG,th);
3809 emit_cmovne_reg(th,tl);
3810 if(real_th>=0) emit_cmovne_imm(0,th);
3811 }
3812 if(opcode2[i]==0x17) // DSRAV
3813 {
3814 assert(th>=0);
3815 emit_shr(sl,HOST_TEMPREG,tl);
3816 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3817 if(real_th>=0) {
3818 assert(temp>=0);
3819 emit_sarimm(th,31,temp);
3820 }
3821 emit_orrshl(sh,HOST_TEMPREG,tl);
3822 emit_andimm(shift,31,HOST_TEMPREG);
3823 emit_testimm(shift,32);
3824 emit_sar(sh,HOST_TEMPREG,th);
3825 emit_cmovne_reg(th,tl);
3826 if(real_th>=0) emit_cmovne_reg(temp,th);
3827 }
3828 }
3829 }
3830 }
3831 }
3832}
ffb0b9e0 3833
3834#ifdef PCSX
3835static void speculate_mov(int rs,int rt)
3836{
3837 if(rt!=0) {
3838 smrv_strong_next|=1<<rt;
3839 smrv[rt]=smrv[rs];
3840 }
3841}
3842
3843static void speculate_mov_weak(int rs,int rt)
3844{
3845 if(rt!=0) {
3846 smrv_weak_next|=1<<rt;
3847 smrv[rt]=smrv[rs];
3848 }
3849}
3850
3851static void speculate_register_values(int i)
3852{
3853 if(i==0) {
3854 memcpy(smrv,psxRegs.GPR.r,sizeof(smrv));
3855 // gp,sp are likely to stay the same throughout the block
3856 smrv_strong_next=(1<<28)|(1<<29)|(1<<30);
3857 smrv_weak_next=~smrv_strong_next;
3858 //printf(" llr %08x\n", smrv[4]);
3859 }
3860 smrv_strong=smrv_strong_next;
3861 smrv_weak=smrv_weak_next;
3862 switch(itype[i]) {
3863 case ALU:
3864 if ((smrv_strong>>rs1[i])&1) speculate_mov(rs1[i],rt1[i]);
3865 else if((smrv_strong>>rs2[i])&1) speculate_mov(rs2[i],rt1[i]);
3866 else if((smrv_weak>>rs1[i])&1) speculate_mov_weak(rs1[i],rt1[i]);
3867 else if((smrv_weak>>rs2[i])&1) speculate_mov_weak(rs2[i],rt1[i]);
3868 else {
3869 smrv_strong_next&=~(1<<rt1[i]);
3870 smrv_weak_next&=~(1<<rt1[i]);
3871 }
3872 break;
3873 case SHIFTIMM:
3874 smrv_strong_next&=~(1<<rt1[i]);
3875 smrv_weak_next&=~(1<<rt1[i]);
3876 // fallthrough
3877 case IMM16:
3878 if(rt1[i]&&is_const(&regs[i],rt1[i])) {
3879 int value,hr=get_reg(regs[i].regmap,rt1[i]);
3880 if(hr>=0) {
3881 if(get_final_value(hr,i,&value))
3882 smrv[rt1[i]]=value;
3883 else smrv[rt1[i]]=constmap[i][hr];
3884 smrv_strong_next|=1<<rt1[i];
3885 }
3886 }
3887 else {
3888 if ((smrv_strong>>rs1[i])&1) speculate_mov(rs1[i],rt1[i]);
3889 else if((smrv_weak>>rs1[i])&1) speculate_mov_weak(rs1[i],rt1[i]);
3890 }
3891 break;
3892 case LOAD:
3893 if(start<0x2000&&(rt1[i]==26||(smrv[rt1[i]]>>24)==0xa0)) {
3894 // special case for BIOS
3895 smrv[rt1[i]]=0xa0000000;
3896 smrv_strong_next|=1<<rt1[i];
3897 break;
3898 }
3899 // fallthrough
3900 case SHIFT:
3901 case LOADLR:
3902 case MOV:
3903 smrv_strong_next&=~(1<<rt1[i]);
3904 smrv_weak_next&=~(1<<rt1[i]);
3905 break;
3906 case COP0:
3907 case COP2:
3908 if(opcode2[i]==0||opcode2[i]==2) { // MFC/CFC
3909 smrv_strong_next&=~(1<<rt1[i]);
3910 smrv_weak_next&=~(1<<rt1[i]);
3911 }
3912 break;
3913 case C2LS:
3914 if (opcode[i]==0x32) { // LWC2
3915 smrv_strong_next&=~(1<<rt1[i]);
3916 smrv_weak_next&=~(1<<rt1[i]);
3917 }
3918 break;
3919 }
3920#if 0
3921 int r=4;
3922 printf("x %08x %08x %d %d c %08x %08x\n",smrv[r],start+i*4,
3923 ((smrv_strong>>r)&1),(smrv_weak>>r)&1,regs[i].isconst,regs[i].wasconst);
3924#endif
3925}
3926
3927enum {
3928 MTYPE_8000 = 0,
3929 MTYPE_8020,
3930 MTYPE_0000,
3931 MTYPE_A000,
3932 MTYPE_1F80,
3933};
3934
3935static int get_ptr_mem_type(u_int a)
3936{
3937 if(a < 0x00200000) {
3938 if(a<0x1000&&((start>>20)==0xbfc||(start>>24)==0xa0))
3939 // return wrong, must use memhandler for BIOS self-test to pass
3940 // 007 does similar stuff from a00 mirror, weird stuff
3941 return MTYPE_8000;
3942 return MTYPE_0000;
3943 }
3944 if(0x1f800000 <= a && a < 0x1f801000)
3945 return MTYPE_1F80;
3946 if(0x80200000 <= a && a < 0x80800000)
3947 return MTYPE_8020;
3948 if(0xa0000000 <= a && a < 0xa0200000)
3949 return MTYPE_A000;
3950 return MTYPE_8000;
3951}
3952#endif
3953
3954static int emit_fastpath_cmp_jump(int i,int addr,int *addr_reg_override)
3955{
3956 int jaddr,type=0;
3957
3958#ifdef PCSX
3959 int mr=rs1[i];
3960 if(((smrv_strong|smrv_weak)>>mr)&1) {
3961 type=get_ptr_mem_type(smrv[mr]);
3962 //printf("set %08x @%08x r%d %d\n", smrv[mr], start+i*4, mr, type);
3963 }
3964 else {
3965 // use the mirror we are running on
3966 type=get_ptr_mem_type(start);
3967 //printf("set nospec @%08x r%d %d\n", start+i*4, mr, type);
3968 }
3969
3970 if(type==MTYPE_8020) { // RAM 80200000+ mirror
3971 emit_andimm(addr,~0x00e00000,HOST_TEMPREG);
3972 addr=*addr_reg_override=HOST_TEMPREG;
3973 type=0;
3974 }
3975 else if(type==MTYPE_0000) { // RAM 0 mirror
3976 emit_orimm(addr,0x80000000,HOST_TEMPREG);
3977 addr=*addr_reg_override=HOST_TEMPREG;
3978 type=0;
3979 }
3980 else if(type==MTYPE_A000) { // RAM A mirror
3981 emit_andimm(addr,~0x20000000,HOST_TEMPREG);
3982 addr=*addr_reg_override=HOST_TEMPREG;
3983 type=0;
3984 }
3985 else if(type==MTYPE_1F80) { // scratchpad
3986 emit_addimm(addr,-0x1f800000,HOST_TEMPREG);
3987 emit_cmpimm(HOST_TEMPREG,0x1000);
3988 jaddr=(int)out;
3989 emit_jc(0);
3990 }
3991#endif
3992
3993 if(type==0)
3994 {
3995 emit_cmpimm(addr,RAM_SIZE);
3996 jaddr=(int)out;
3997 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
3998 // Hint to branch predictor that the branch is unlikely to be taken
3999 if(rs1[i]>=28)
4000 emit_jno_unlikely(0);
4001 else
4002 #endif
4003 emit_jno(0);
4004 }
4005
4006 return jaddr;
4007}
4008
57871462 4009#define shift_assemble shift_assemble_arm
4010
4011void loadlr_assemble_arm(int i,struct regstat *i_regs)
4012{
4013 int s,th,tl,temp,temp2,addr,map=-1;
4014 int offset;
4015 int jaddr=0;
af4ee1fe 4016 int memtarget=0,c=0;
ffb0b9e0 4017 int fastload_reg_override=0;
57871462 4018 u_int hr,reglist=0;
4019 th=get_reg(i_regs->regmap,rt1[i]|64);
4020 tl=get_reg(i_regs->regmap,rt1[i]);
4021 s=get_reg(i_regs->regmap,rs1[i]);
4022 temp=get_reg(i_regs->regmap,-1);
4023 temp2=get_reg(i_regs->regmap,FTEMP);
4024 addr=get_reg(i_regs->regmap,AGEN1+(i&1));
4025 assert(addr<0);
4026 offset=imm[i];
4027 for(hr=0;hr<HOST_REGS;hr++) {
4028 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
4029 }
4030 reglist|=1<<temp;
4031 if(offset||s<0||c) addr=temp2;
4032 else addr=s;
4033 if(s>=0) {
4034 c=(i_regs->wasconst>>s)&1;
af4ee1fe 4035 if(c) {
4036 memtarget=((signed int)(constmap[i][s]+offset))<(signed int)0x80000000+RAM_SIZE;
4037 if(using_tlb&&((signed int)(constmap[i][s]+offset))>=(signed int)0xC0000000) memtarget=1;
4038 }
57871462 4039 }
535d208a 4040 if(!using_tlb) {
4041 if(!c) {
4042 #ifdef RAM_OFFSET
4043 map=get_reg(i_regs->regmap,ROREG);
4044 if(map<0) emit_loadreg(ROREG,map=HOST_TEMPREG);
4045 #endif
4046 emit_shlimm(addr,3,temp);
4047 if (opcode[i]==0x22||opcode[i]==0x26) {
4048 emit_andimm(addr,0xFFFFFFFC,temp2); // LWL/LWR
57871462 4049 }else{
535d208a 4050 emit_andimm(addr,0xFFFFFFF8,temp2); // LDL/LDR
57871462 4051 }
ffb0b9e0 4052 jaddr=emit_fastpath_cmp_jump(i,temp2,&fastload_reg_override);
535d208a 4053 }
4054 else {
4055 if (opcode[i]==0x22||opcode[i]==0x26) {
4056 emit_movimm(((constmap[i][s]+offset)<<3)&24,temp); // LWL/LWR
4057 }else{
4058 emit_movimm(((constmap[i][s]+offset)<<3)&56,temp); // LDL/LDR
57871462 4059 }
57871462 4060 }
535d208a 4061 }else{ // using tlb
4062 int a;
4063 if(c) {
4064 a=-1;
4065 }else if (opcode[i]==0x22||opcode[i]==0x26) {
4066 a=0xFFFFFFFC; // LWL/LWR
4067 }else{
4068 a=0xFFFFFFF8; // LDL/LDR
4069 }
4070 map=get_reg(i_regs->regmap,TLREG);
4071 assert(map>=0);
ea3d2e6e 4072 reglist&=~(1<<map);
535d208a 4073 map=do_tlb_r(addr,temp2,map,0,a,c?-1:temp,c,constmap[i][s]+offset);
4074 if(c) {
4075 if (opcode[i]==0x22||opcode[i]==0x26) {
4076 emit_movimm(((constmap[i][s]+offset)<<3)&24,temp); // LWL/LWR
4077 }else{
4078 emit_movimm(((constmap[i][s]+offset)<<3)&56,temp); // LDL/LDR
57871462 4079 }
535d208a 4080 }
4081 do_tlb_r_branch(map,c,constmap[i][s]+offset,&jaddr);
4082 }
4083 if (opcode[i]==0x22||opcode[i]==0x26) { // LWL/LWR
4084 if(!c||memtarget) {
ffb0b9e0 4085 int a=temp2;
4086 if(fastload_reg_override) a=fastload_reg_override;
535d208a 4087 //emit_readword_indexed((int)rdram-0x80000000,temp2,temp2);
ffb0b9e0 4088 emit_readword_indexed_tlb(0,a,map,temp2);
535d208a 4089 if(jaddr) add_stub(LOADW_STUB,jaddr,(int)out,i,temp2,(int)i_regs,ccadj[i],reglist);
4090 }
4091 else
4092 inline_readstub(LOADW_STUB,i,(constmap[i][s]+offset)&0xFFFFFFFC,i_regs->regmap,FTEMP,ccadj[i],reglist);
4093 if(rt1[i]) {
4094 assert(tl>=0);
57871462 4095 emit_andimm(temp,24,temp);
2002a1db 4096#ifdef BIG_ENDIAN_MIPS
4097 if (opcode[i]==0x26) // LWR
4098#else
4099 if (opcode[i]==0x22) // LWL
4100#endif
4101 emit_xorimm(temp,24,temp);
57871462 4102 emit_movimm(-1,HOST_TEMPREG);
4103 if (opcode[i]==0x26) {
4104 emit_shr(temp2,temp,temp2);
4105 emit_bic_lsr(tl,HOST_TEMPREG,temp,tl);
4106 }else{
4107 emit_shl(temp2,temp,temp2);
4108 emit_bic_lsl(tl,HOST_TEMPREG,temp,tl);
4109 }
4110 emit_or(temp2,tl,tl);
57871462 4111 }
535d208a 4112 //emit_storereg(rt1[i],tl); // DEBUG
4113 }
4114 if (opcode[i]==0x1A||opcode[i]==0x1B) { // LDL/LDR
ffb0b9e0 4115 // FIXME: little endian, fastload_reg_override
535d208a 4116 int temp2h=get_reg(i_regs->regmap,FTEMP|64);
4117 if(!c||memtarget) {
4118 //if(th>=0) emit_readword_indexed((int)rdram-0x80000000,temp2,temp2h);
4119 //emit_readword_indexed((int)rdram-0x7FFFFFFC,temp2,temp2);
4120 emit_readdword_indexed_tlb(0,temp2,map,temp2h,temp2);
4121 if(jaddr) add_stub(LOADD_STUB,jaddr,(int)out,i,temp2,(int)i_regs,ccadj[i],reglist);
4122 }
4123 else
4124 inline_readstub(LOADD_STUB,i,(constmap[i][s]+offset)&0xFFFFFFF8,i_regs->regmap,FTEMP,ccadj[i],reglist);
4125 if(rt1[i]) {
4126 assert(th>=0);
4127 assert(tl>=0);
57871462 4128 emit_testimm(temp,32);
4129 emit_andimm(temp,24,temp);
4130 if (opcode[i]==0x1A) { // LDL
4131 emit_rsbimm(temp,32,HOST_TEMPREG);
4132 emit_shl(temp2h,temp,temp2h);
4133 emit_orrshr(temp2,HOST_TEMPREG,temp2h);
4134 emit_movimm(-1,HOST_TEMPREG);
4135 emit_shl(temp2,temp,temp2);
4136 emit_cmove_reg(temp2h,th);
4137 emit_biceq_lsl(tl,HOST_TEMPREG,temp,tl);
4138 emit_bicne_lsl(th,HOST_TEMPREG,temp,th);
4139 emit_orreq(temp2,tl,tl);
4140 emit_orrne(temp2,th,th);
4141 }
4142 if (opcode[i]==0x1B) { // LDR
4143 emit_xorimm(temp,24,temp);
4144 emit_rsbimm(temp,32,HOST_TEMPREG);
4145 emit_shr(temp2,temp,temp2);
4146 emit_orrshl(temp2h,HOST_TEMPREG,temp2);
4147 emit_movimm(-1,HOST_TEMPREG);
4148 emit_shr(temp2h,temp,temp2h);
4149 emit_cmovne_reg(temp2,tl);
4150 emit_bicne_lsr(th,HOST_TEMPREG,temp,th);
4151 emit_biceq_lsr(tl,HOST_TEMPREG,temp,tl);
4152 emit_orrne(temp2h,th,th);
4153 emit_orreq(temp2h,tl,tl);
4154 }
4155 }
4156 }
4157}
4158#define loadlr_assemble loadlr_assemble_arm
4159
4160void cop0_assemble(int i,struct regstat *i_regs)
4161{
4162 if(opcode2[i]==0) // MFC0
4163 {
4164 signed char t=get_reg(i_regs->regmap,rt1[i]);
4165 char copr=(source[i]>>11)&0x1f;
4166 //assert(t>=0); // Why does this happen? OOT is weird
f1b3b369 4167 if(t>=0&&rt1[i]!=0) {
7139f3c8 4168#ifdef MUPEN64
57871462 4169 emit_addimm(FP,(int)&fake_pc-(int)&dynarec_local,0);
4170 emit_movimm((source[i]>>11)&0x1f,1);
4171 emit_writeword(0,(int)&PC);
4172 emit_writebyte(1,(int)&(fake_pc.f.r.nrd));
4173 if(copr==9) {
4174 emit_readword((int)&last_count,ECX);
4175 emit_loadreg(CCREG,HOST_CCREG); // TODO: do proper reg alloc
4176 emit_add(HOST_CCREG,ECX,HOST_CCREG);
2573466a 4177 emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]),HOST_CCREG);
57871462 4178 emit_writeword(HOST_CCREG,(int)&Count);
4179 }
4180 emit_call((int)MFC0);
4181 emit_readword((int)&readmem_dword,t);
7139f3c8 4182#else
4183 emit_readword((int)&reg_cop0+copr*4,t);
4184#endif
57871462 4185 }
4186 }
4187 else if(opcode2[i]==4) // MTC0
4188 {
4189 signed char s=get_reg(i_regs->regmap,rs1[i]);
4190 char copr=(source[i]>>11)&0x1f;
4191 assert(s>=0);
63cb0298 4192#ifdef MUPEN64
57871462 4193 emit_writeword(s,(int)&readmem_dword);
4194 wb_register(rs1[i],i_regs->regmap,i_regs->dirty,i_regs->is32);
4195 emit_addimm(FP,(int)&fake_pc-(int)&dynarec_local,0);
4196 emit_movimm((source[i]>>11)&0x1f,1);
4197 emit_writeword(0,(int)&PC);
4198 emit_writebyte(1,(int)&(fake_pc.f.r.nrd));
63cb0298 4199#else
4200 wb_register(rs1[i],i_regs->regmap,i_regs->dirty,i_regs->is32);
7139f3c8 4201#endif
4202 if(copr==9||copr==11||copr==12||copr==13) {
63cb0298 4203 emit_readword((int)&last_count,HOST_TEMPREG);
57871462 4204 emit_loadreg(CCREG,HOST_CCREG); // TODO: do proper reg alloc
63cb0298 4205 emit_add(HOST_CCREG,HOST_TEMPREG,HOST_CCREG);
2573466a 4206 emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]),HOST_CCREG);
57871462 4207 emit_writeword(HOST_CCREG,(int)&Count);
4208 }
4209 // What a mess. The status register (12) can enable interrupts,
4210 // so needs a special case to handle a pending interrupt.
4211 // The interrupt must be taken immediately, because a subsequent
4212 // instruction might disable interrupts again.
7139f3c8 4213 if(copr==12||copr==13) {
fca1aef2 4214#ifdef PCSX
4215 if (is_delayslot) {
4216 // burn cycles to cause cc_interrupt, which will
4217 // reschedule next_interupt. Relies on CCREG from above.
4218 assem_debug("MTC0 DS %d\n", copr);
4219 emit_writeword(HOST_CCREG,(int)&last_count);
4220 emit_movimm(0,HOST_CCREG);
4221 emit_storereg(CCREG,HOST_CCREG);
caeefe31 4222 emit_loadreg(rs1[i],1);
fca1aef2 4223 emit_movimm(copr,0);
4224 emit_call((int)pcsx_mtc0_ds);
042c7287 4225 emit_loadreg(rs1[i],s);
fca1aef2 4226 return;
4227 }
4228#endif
63cb0298 4229 emit_movimm(start+i*4+4,HOST_TEMPREG);
4230 emit_writeword(HOST_TEMPREG,(int)&pcaddr);
4231 emit_movimm(0,HOST_TEMPREG);
4232 emit_writeword(HOST_TEMPREG,(int)&pending_exception);
57871462 4233 }
4234 //else if(copr==12&&is_delayslot) emit_call((int)MTC0_R12);
4235 //else
fca1aef2 4236#ifdef PCSX
caeefe31 4237 if(s==HOST_CCREG)
4238 emit_loadreg(rs1[i],1);
4239 else if(s!=1)
63cb0298 4240 emit_mov(s,1);
fca1aef2 4241 emit_movimm(copr,0);
4242 emit_call((int)pcsx_mtc0);
4243#else
57871462 4244 emit_call((int)MTC0);
fca1aef2 4245#endif
7139f3c8 4246 if(copr==9||copr==11||copr==12||copr==13) {
57871462 4247 emit_readword((int)&Count,HOST_CCREG);
042c7287 4248 emit_readword((int)&next_interupt,HOST_TEMPREG);
2573466a 4249 emit_addimm(HOST_CCREG,-CLOCK_ADJUST(ccadj[i]),HOST_CCREG);
042c7287 4250 emit_sub(HOST_CCREG,HOST_TEMPREG,HOST_CCREG);
4251 emit_writeword(HOST_TEMPREG,(int)&last_count);
57871462 4252 emit_storereg(CCREG,HOST_CCREG);
4253 }
7139f3c8 4254 if(copr==12||copr==13) {
57871462 4255 assert(!is_delayslot);
4256 emit_readword((int)&pending_exception,14);
042c7287 4257 emit_test(14,14);
4258 emit_jne((int)&do_interrupt);
57871462 4259 }
4260 emit_loadreg(rs1[i],s);
4261 if(get_reg(i_regs->regmap,rs1[i]|64)>=0)
4262 emit_loadreg(rs1[i]|64,get_reg(i_regs->regmap,rs1[i]|64));
57871462 4263 cop1_usable=0;
4264 }
4265 else
4266 {
4267 assert(opcode2[i]==0x10);
3d624f89 4268#ifndef DISABLE_TLB
57871462 4269 if((source[i]&0x3f)==0x01) // TLBR
4270 emit_call((int)TLBR);
4271 if((source[i]&0x3f)==0x02) // TLBWI
4272 emit_call((int)TLBWI_new);
4273 if((source[i]&0x3f)==0x06) { // TLBWR
4274 // The TLB entry written by TLBWR is dependent on the count,
4275 // so update the cycle count
4276 emit_readword((int)&last_count,ECX);
4277 if(i_regs->regmap[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
4278 emit_add(HOST_CCREG,ECX,HOST_CCREG);
2573466a 4279 emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]),HOST_CCREG);
57871462 4280 emit_writeword(HOST_CCREG,(int)&Count);
4281 emit_call((int)TLBWR_new);
4282 }
4283 if((source[i]&0x3f)==0x08) // TLBP
4284 emit_call((int)TLBP);
3d624f89 4285#endif
576bbd8f 4286#ifdef PCSX
4287 if((source[i]&0x3f)==0x10) // RFE
4288 {
4289 emit_readword((int)&Status,0);
4290 emit_andimm(0,0x3c,1);
4291 emit_andimm(0,~0xf,0);
4292 emit_orrshr_imm(1,2,0);
4293 emit_writeword(0,(int)&Status);
4294 }
4295#else
57871462 4296 if((source[i]&0x3f)==0x18) // ERET
4297 {
4298 int count=ccadj[i];
4299 if(i_regs->regmap[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
2573466a 4300 emit_addimm(HOST_CCREG,CLOCK_ADJUST(count),HOST_CCREG); // TODO: Should there be an extra cycle here?
57871462 4301 emit_jmp((int)jump_eret);
4302 }
576bbd8f 4303#endif
57871462 4304 }
4305}
4306
b9b61529 4307static void cop2_get_dreg(u_int copr,signed char tl,signed char temp)
4308{
4309 switch (copr) {
4310 case 1:
4311 case 3:
4312 case 5:
4313 case 8:
4314 case 9:
4315 case 10:
4316 case 11:
4317 emit_readword((int)&reg_cop2d[copr],tl);
4318 emit_signextend16(tl,tl);
4319 emit_writeword(tl,(int)&reg_cop2d[copr]); // hmh
4320 break;
4321 case 7:
4322 case 16:
4323 case 17:
4324 case 18:
4325 case 19:
4326 emit_readword((int)&reg_cop2d[copr],tl);
4327 emit_andimm(tl,0xffff,tl);
4328 emit_writeword(tl,(int)&reg_cop2d[copr]);
4329 break;
4330 case 15:
4331 emit_readword((int)&reg_cop2d[14],tl); // SXY2
4332 emit_writeword(tl,(int)&reg_cop2d[copr]);
4333 break;
4334 case 28:
b9b61529 4335 case 29:
4336 emit_readword((int)&reg_cop2d[9],temp);
4337 emit_testimm(temp,0x8000); // do we need this?
4338 emit_andimm(temp,0xf80,temp);
4339 emit_andne_imm(temp,0,temp);
f70d384d 4340 emit_shrimm(temp,7,tl);
b9b61529 4341 emit_readword((int)&reg_cop2d[10],temp);
4342 emit_testimm(temp,0x8000);
4343 emit_andimm(temp,0xf80,temp);
4344 emit_andne_imm(temp,0,temp);
f70d384d 4345 emit_orrshr_imm(temp,2,tl);
b9b61529 4346 emit_readword((int)&reg_cop2d[11],temp);
4347 emit_testimm(temp,0x8000);
4348 emit_andimm(temp,0xf80,temp);
4349 emit_andne_imm(temp,0,temp);
f70d384d 4350 emit_orrshl_imm(temp,3,tl);
b9b61529 4351 emit_writeword(tl,(int)&reg_cop2d[copr]);
4352 break;
4353 default:
4354 emit_readword((int)&reg_cop2d[copr],tl);
4355 break;
4356 }
4357}
4358
4359static void cop2_put_dreg(u_int copr,signed char sl,signed char temp)
4360{
4361 switch (copr) {
4362 case 15:
4363 emit_readword((int)&reg_cop2d[13],temp); // SXY1
4364 emit_writeword(sl,(int)&reg_cop2d[copr]);
4365 emit_writeword(temp,(int)&reg_cop2d[12]); // SXY0
4366 emit_readword((int)&reg_cop2d[14],temp); // SXY2
4367 emit_writeword(sl,(int)&reg_cop2d[14]);
4368 emit_writeword(temp,(int)&reg_cop2d[13]); // SXY1
4369 break;
4370 case 28:
4371 emit_andimm(sl,0x001f,temp);
f70d384d 4372 emit_shlimm(temp,7,temp);
b9b61529 4373 emit_writeword(temp,(int)&reg_cop2d[9]);
4374 emit_andimm(sl,0x03e0,temp);
f70d384d 4375 emit_shlimm(temp,2,temp);
b9b61529 4376 emit_writeword(temp,(int)&reg_cop2d[10]);
4377 emit_andimm(sl,0x7c00,temp);
f70d384d 4378 emit_shrimm(temp,3,temp);
b9b61529 4379 emit_writeword(temp,(int)&reg_cop2d[11]);
4380 emit_writeword(sl,(int)&reg_cop2d[28]);
4381 break;
4382 case 30:
4383 emit_movs(sl,temp);
4384 emit_mvnmi(temp,temp);
4385 emit_clz(temp,temp);
4386 emit_writeword(sl,(int)&reg_cop2d[30]);
4387 emit_writeword(temp,(int)&reg_cop2d[31]);
4388 break;
b9b61529 4389 case 31:
4390 break;
4391 default:
4392 emit_writeword(sl,(int)&reg_cop2d[copr]);
4393 break;
4394 }
4395}
4396
4397void cop2_assemble(int i,struct regstat *i_regs)
4398{
4399 u_int copr=(source[i]>>11)&0x1f;
4400 signed char temp=get_reg(i_regs->regmap,-1);
4401 if (opcode2[i]==0) { // MFC2
4402 signed char tl=get_reg(i_regs->regmap,rt1[i]);
f1b3b369 4403 if(tl>=0&&rt1[i]!=0)
b9b61529 4404 cop2_get_dreg(copr,tl,temp);
4405 }
4406 else if (opcode2[i]==4) { // MTC2
4407 signed char sl=get_reg(i_regs->regmap,rs1[i]);
4408 cop2_put_dreg(copr,sl,temp);
4409 }
4410 else if (opcode2[i]==2) // CFC2
4411 {
4412 signed char tl=get_reg(i_regs->regmap,rt1[i]);
f1b3b369 4413 if(tl>=0&&rt1[i]!=0)
b9b61529 4414 emit_readword((int)&reg_cop2c[copr],tl);
4415 }
4416 else if (opcode2[i]==6) // CTC2
4417 {
4418 signed char sl=get_reg(i_regs->regmap,rs1[i]);
4419 switch(copr) {
4420 case 4:
4421 case 12:
4422 case 20:
4423 case 26:
4424 case 27:
4425 case 29:
4426 case 30:
4427 emit_signextend16(sl,temp);
4428 break;
4429 case 31:
4430 //value = value & 0x7ffff000;
4431 //if (value & 0x7f87e000) value |= 0x80000000;
4432 emit_shrimm(sl,12,temp);
4433 emit_shlimm(temp,12,temp);
4434 emit_testimm(temp,0x7f000000);
4435 emit_testeqimm(temp,0x00870000);
4436 emit_testeqimm(temp,0x0000e000);
4437 emit_orrne_imm(temp,0x80000000,temp);
4438 break;
4439 default:
4440 temp=sl;
4441 break;
4442 }
4443 emit_writeword(temp,(int)&reg_cop2c[copr]);
4444 assert(sl>=0);
4445 }
4446}
4447
054175e9 4448static void c2op_prologue(u_int op,u_int reglist)
4449{
4450 save_regs_all(reglist);
82ed88eb 4451#ifdef PCNT
4452 emit_movimm(op,0);
4453 emit_call((int)pcnt_gte_start);
4454#endif
054175e9 4455 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0); // cop2 regs
4456}
4457
4458static void c2op_epilogue(u_int op,u_int reglist)
4459{
82ed88eb 4460#ifdef PCNT
4461 emit_movimm(op,0);
4462 emit_call((int)pcnt_gte_end);
4463#endif
054175e9 4464 restore_regs_all(reglist);
4465}
4466
6c0eefaf 4467static void c2op_call_MACtoIR(int lm,int need_flags)
4468{
4469 if(need_flags)
4470 emit_call((int)(lm?gteMACtoIR_lm1:gteMACtoIR_lm0));
4471 else
4472 emit_call((int)(lm?gteMACtoIR_lm1_nf:gteMACtoIR_lm0_nf));
4473}
4474
4475static void c2op_call_rgb_func(void *func,int lm,int need_ir,int need_flags)
4476{
4477 emit_call((int)func);
4478 // func is C code and trashes r0
4479 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
4480 if(need_flags||need_ir)
4481 c2op_call_MACtoIR(lm,need_flags);
4482 emit_call((int)(need_flags?gteMACtoRGB:gteMACtoRGB_nf));
4483}
4484
054175e9 4485static void c2op_assemble(int i,struct regstat *i_regs)
b9b61529 4486{
4487 signed char temp=get_reg(i_regs->regmap,-1);
4488 u_int c2op=source[i]&0x3f;
6c0eefaf 4489 u_int hr,reglist_full=0,reglist;
054175e9 4490 int need_flags,need_ir;
b9b61529 4491 for(hr=0;hr<HOST_REGS;hr++) {
6c0eefaf 4492 if(i_regs->regmap[hr]>=0) reglist_full|=1<<hr;
b9b61529 4493 }
6c0eefaf 4494 reglist=reglist_full&0x100f;
b9b61529 4495
4496 if (gte_handlers[c2op]!=NULL) {
bedfea38 4497 need_flags=!(gte_unneeded[i+1]>>63); // +1 because of how liveness detection works
054175e9 4498 need_ir=(gte_unneeded[i+1]&0xe00)!=0xe00;
cbbd8dd7 4499 assem_debug("gte op %08x, unneeded %016llx, need_flags %d, need_ir %d\n",
4500 source[i],gte_unneeded[i+1],need_flags,need_ir);
0ff8c62c 4501 if(new_dynarec_hacks&NDHACK_GTE_NO_FLAGS)
4502 need_flags=0;
6c0eefaf 4503 int shift = (source[i] >> 19) & 1;
4504 int lm = (source[i] >> 10) & 1;
054175e9 4505 switch(c2op) {
19776aef 4506#ifndef DRC_DBG
054175e9 4507 case GTE_MVMVA: {
054175e9 4508 int v = (source[i] >> 15) & 3;
4509 int cv = (source[i] >> 13) & 3;
4510 int mx = (source[i] >> 17) & 3;
6c0eefaf 4511 reglist=reglist_full&0x10ff; // +{r4-r7}
054175e9 4512 c2op_prologue(c2op,reglist);
4513 /* r4,r5 = VXYZ(v) packed; r6 = &MX11(mx); r7 = &CV1(cv) */
4514 if(v<3)
4515 emit_ldrd(v*8,0,4);
4516 else {
4517 emit_movzwl_indexed(9*4,0,4); // gteIR
4518 emit_movzwl_indexed(10*4,0,6);
4519 emit_movzwl_indexed(11*4,0,5);
4520 emit_orrshl_imm(6,16,4);
4521 }
4522 if(mx<3)
4523 emit_addimm(0,32*4+mx*8*4,6);
4524 else
4525 emit_readword((int)&zeromem_ptr,6);
4526 if(cv<3)
4527 emit_addimm(0,32*4+(cv*8+5)*4,7);
4528 else
4529 emit_readword((int)&zeromem_ptr,7);
4530#ifdef __ARM_NEON__
4531 emit_movimm(source[i],1); // opcode
4532 emit_call((int)gteMVMVA_part_neon);
4533 if(need_flags) {
4534 emit_movimm(lm,1);
4535 emit_call((int)gteMACtoIR_flags_neon);
4536 }
4537#else
4538 if(cv==3&&shift)
4539 emit_call((int)gteMVMVA_part_cv3sh12_arm);
4540 else {
4541 emit_movimm(shift,1);
4542 emit_call((int)(need_flags?gteMVMVA_part_arm:gteMVMVA_part_nf_arm));
4543 }
6c0eefaf 4544 if(need_flags||need_ir)
4545 c2op_call_MACtoIR(lm,need_flags);
054175e9 4546#endif
4547 break;
4548 }
6c0eefaf 4549 case GTE_OP:
4550 c2op_prologue(c2op,reglist);
4551 emit_call((int)(shift?gteOP_part_shift:gteOP_part_noshift));
4552 if(need_flags||need_ir) {
4553 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
4554 c2op_call_MACtoIR(lm,need_flags);
4555 }
4556 break;
4557 case GTE_DPCS:
4558 c2op_prologue(c2op,reglist);
4559 c2op_call_rgb_func(shift?gteDPCS_part_shift:gteDPCS_part_noshift,lm,need_ir,need_flags);
4560 break;
4561 case GTE_INTPL:
4562 c2op_prologue(c2op,reglist);
4563 c2op_call_rgb_func(shift?gteINTPL_part_shift:gteINTPL_part_noshift,lm,need_ir,need_flags);
4564 break;
4565 case GTE_SQR:
4566 c2op_prologue(c2op,reglist);
4567 emit_call((int)(shift?gteSQR_part_shift:gteSQR_part_noshift));
4568 if(need_flags||need_ir) {
4569 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
4570 c2op_call_MACtoIR(lm,need_flags);
4571 }
4572 break;
4573 case GTE_DCPL:
4574 c2op_prologue(c2op,reglist);
4575 c2op_call_rgb_func(gteDCPL_part,lm,need_ir,need_flags);
4576 break;
4577 case GTE_GPF:
4578 c2op_prologue(c2op,reglist);
4579 c2op_call_rgb_func(shift?gteGPF_part_shift:gteGPF_part_noshift,lm,need_ir,need_flags);
4580 break;
4581 case GTE_GPL:
4582 c2op_prologue(c2op,reglist);
4583 c2op_call_rgb_func(shift?gteGPL_part_shift:gteGPL_part_noshift,lm,need_ir,need_flags);
4584 break;
19776aef 4585#endif
054175e9 4586 default:
054175e9 4587 c2op_prologue(c2op,reglist);
19776aef 4588#ifdef DRC_DBG
4589 emit_movimm(source[i],1); // opcode
4590 emit_writeword(1,(int)&psxRegs.code);
4591#endif
054175e9 4592 emit_call((int)(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]));
4593 break;
4594 }
4595 c2op_epilogue(c2op,reglist);
4596 }
b9b61529 4597}
4598
4599void cop1_unusable(int i,struct regstat *i_regs)
3d624f89 4600{
4601 // XXX: should just just do the exception instead
4602 if(!cop1_usable) {
4603 int jaddr=(int)out;
4604 emit_jmp(0);
4605 add_stub(FP_STUB,jaddr,(int)out,i,0,(int)i_regs,is_delayslot,0);
4606 cop1_usable=1;
4607 }
4608}
4609
57871462 4610void cop1_assemble(int i,struct regstat *i_regs)
4611{
3d624f89 4612#ifndef DISABLE_COP1
57871462 4613 // Check cop1 unusable
4614 if(!cop1_usable) {
4615 signed char rs=get_reg(i_regs->regmap,CSREG);
4616 assert(rs>=0);
4617 emit_testimm(rs,0x20000000);
4618 int jaddr=(int)out;
4619 emit_jeq(0);
4620 add_stub(FP_STUB,jaddr,(int)out,i,rs,(int)i_regs,is_delayslot,0);
4621 cop1_usable=1;
4622 }
4623 if (opcode2[i]==0) { // MFC1
4624 signed char tl=get_reg(i_regs->regmap,rt1[i]);
4625 if(tl>=0) {
4626 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],tl);
4627 emit_readword_indexed(0,tl,tl);
4628 }
4629 }
4630 else if (opcode2[i]==1) { // DMFC1
4631 signed char tl=get_reg(i_regs->regmap,rt1[i]);
4632 signed char th=get_reg(i_regs->regmap,rt1[i]|64);
4633 if(tl>=0) {
4634 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],tl);
4635 if(th>=0) emit_readword_indexed(4,tl,th);
4636 emit_readword_indexed(0,tl,tl);
4637 }
4638 }
4639 else if (opcode2[i]==4) { // MTC1
4640 signed char sl=get_reg(i_regs->regmap,rs1[i]);
4641 signed char temp=get_reg(i_regs->regmap,-1);
4642 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4643 emit_writeword_indexed(sl,0,temp);
4644 }
4645 else if (opcode2[i]==5) { // DMTC1
4646 signed char sl=get_reg(i_regs->regmap,rs1[i]);
4647 signed char sh=rs1[i]>0?get_reg(i_regs->regmap,rs1[i]|64):sl;
4648 signed char temp=get_reg(i_regs->regmap,-1);
4649 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
4650 emit_writeword_indexed(sh,4,temp);
4651 emit_writeword_indexed(sl,0,temp);
4652 }
4653 else if (opcode2[i]==2) // CFC1
4654 {
4655 signed char tl=get_reg(i_regs->regmap,rt1[i]);
4656 if(tl>=0) {
4657 u_int copr=(source[i]>>11)&0x1f;
4658 if(copr==0) emit_readword((int)&FCR0,tl);
4659 if(copr==31) emit_readword((int)&FCR31,tl);
4660 }
4661 }
4662 else if (opcode2[i]==6) // CTC1
4663 {
4664 signed char sl=get_reg(i_regs->regmap,rs1[i]);
4665 u_int copr=(source[i]>>11)&0x1f;
4666 assert(sl>=0);
4667 if(copr==31)
4668 {
4669 emit_writeword(sl,(int)&FCR31);
4670 // Set the rounding mode
4671 //FIXME
4672 //char temp=get_reg(i_regs->regmap,-1);
4673 //emit_andimm(sl,3,temp);
4674 //emit_fldcw_indexed((int)&rounding_modes,temp);
4675 }
4676 }
3d624f89 4677#else
4678 cop1_unusable(i, i_regs);
4679#endif
57871462 4680}
4681
4682void fconv_assemble_arm(int i,struct regstat *i_regs)
4683{
3d624f89 4684#ifndef DISABLE_COP1
57871462 4685 signed char temp=get_reg(i_regs->regmap,-1);
4686 assert(temp>=0);
4687 // Check cop1 unusable
4688 if(!cop1_usable) {
4689 signed char rs=get_reg(i_regs->regmap,CSREG);
4690 assert(rs>=0);
4691 emit_testimm(rs,0x20000000);
4692 int jaddr=(int)out;
4693 emit_jeq(0);
4694 add_stub(FP_STUB,jaddr,(int)out,i,rs,(int)i_regs,is_delayslot,0);
4695 cop1_usable=1;
4696 }
4697
4698 #if(defined(__VFP_FP__) && !defined(__SOFTFP__))
4699 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0d) { // trunc_w_s
4700 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4701 emit_flds(temp,15);
4702 emit_ftosizs(15,15); // float->int, truncate
4703 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f))
4704 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
4705 emit_fsts(15,temp);
4706 return;
4707 }
4708 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0d) { // trunc_w_d
4709 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
4710 emit_vldr(temp,7);
4711 emit_ftosizd(7,13); // double->int, truncate
4712 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
4713 emit_fsts(13,temp);
4714 return;
4715 }
4716
4717 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x20) { // cvt_s_w
4718 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4719 emit_flds(temp,13);
4720 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f))
4721 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
4722 emit_fsitos(13,15);
4723 emit_fsts(15,temp);
4724 return;
4725 }
4726 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x21) { // cvt_d_w
4727 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4728 emit_flds(temp,13);
4729 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
4730 emit_fsitod(13,7);
4731 emit_vstr(7,temp);
4732 return;
4733 }
4734
4735 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x21) { // cvt_d_s
4736 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4737 emit_flds(temp,13);
4738 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
4739 emit_fcvtds(13,7);
4740 emit_vstr(7,temp);
4741 return;
4742 }
4743 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x20) { // cvt_s_d
4744 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
4745 emit_vldr(temp,7);
4746 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
4747 emit_fcvtsd(7,13);
4748 emit_fsts(13,temp);
4749 return;
4750 }
4751 #endif
4752
4753 // C emulation code
4754
4755 u_int hr,reglist=0;
4756 for(hr=0;hr<HOST_REGS;hr++) {
4757 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
4758 }
4759 save_regs(reglist);
4760
4761 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x20) {
4762 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4763 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4764 emit_call((int)cvt_s_w);
4765 }
4766 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x21) {
4767 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4768 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4769 emit_call((int)cvt_d_w);
4770 }
4771 if(opcode2[i]==0x15&&(source[i]&0x3f)==0x20) {
4772 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4773 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4774 emit_call((int)cvt_s_l);
4775 }
4776 if(opcode2[i]==0x15&&(source[i]&0x3f)==0x21) {
4777 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4778 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4779 emit_call((int)cvt_d_l);
4780 }
4781
4782 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x21) {
4783 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4784 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4785 emit_call((int)cvt_d_s);
4786 }
4787 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x24) {
4788 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4789 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4790 emit_call((int)cvt_w_s);
4791 }
4792 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x25) {
4793 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4794 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4795 emit_call((int)cvt_l_s);
4796 }
4797
4798 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x20) {
4799 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4800 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4801 emit_call((int)cvt_s_d);
4802 }
4803 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x24) {
4804 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4805 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4806 emit_call((int)cvt_w_d);
4807 }
4808 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x25) {
4809 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4810 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4811 emit_call((int)cvt_l_d);
4812 }
4813
4814 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x08) {
4815 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4816 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4817 emit_call((int)round_l_s);
4818 }
4819 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x09) {
4820 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4821 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4822 emit_call((int)trunc_l_s);
4823 }
4824 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0a) {
4825 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4826 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4827 emit_call((int)ceil_l_s);
4828 }
4829 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0b) {
4830 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4831 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4832 emit_call((int)floor_l_s);
4833 }
4834 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0c) {
4835 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4836 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4837 emit_call((int)round_w_s);
4838 }
4839 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0d) {
4840 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4841 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4842 emit_call((int)trunc_w_s);
4843 }
4844 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0e) {
4845 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4846 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4847 emit_call((int)ceil_w_s);
4848 }
4849 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0f) {
4850 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4851 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4852 emit_call((int)floor_w_s);
4853 }
4854
4855 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x08) {
4856 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4857 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4858 emit_call((int)round_l_d);
4859 }
4860 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x09) {
4861 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4862 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4863 emit_call((int)trunc_l_d);
4864 }
4865 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0a) {
4866 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4867 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4868 emit_call((int)ceil_l_d);
4869 }
4870 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0b) {
4871 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4872 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4873 emit_call((int)floor_l_d);
4874 }
4875 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0c) {
4876 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4877 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4878 emit_call((int)round_w_d);
4879 }
4880 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0d) {
4881 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4882 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4883 emit_call((int)trunc_w_d);
4884 }
4885 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0e) {
4886 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4887 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4888 emit_call((int)ceil_w_d);
4889 }
4890 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0f) {
4891 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4892 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4893 emit_call((int)floor_w_d);
4894 }
4895
4896 restore_regs(reglist);
3d624f89 4897#else
4898 cop1_unusable(i, i_regs);
4899#endif
57871462 4900}
4901#define fconv_assemble fconv_assemble_arm
4902
4903void fcomp_assemble(int i,struct regstat *i_regs)
4904{
3d624f89 4905#ifndef DISABLE_COP1
57871462 4906 signed char fs=get_reg(i_regs->regmap,FSREG);
4907 signed char temp=get_reg(i_regs->regmap,-1);
4908 assert(temp>=0);
4909 // Check cop1 unusable
4910 if(!cop1_usable) {
4911 signed char cs=get_reg(i_regs->regmap,CSREG);
4912 assert(cs>=0);
4913 emit_testimm(cs,0x20000000);
4914 int jaddr=(int)out;
4915 emit_jeq(0);
4916 add_stub(FP_STUB,jaddr,(int)out,i,cs,(int)i_regs,is_delayslot,0);
4917 cop1_usable=1;
4918 }
4919
4920 if((source[i]&0x3f)==0x30) {
4921 emit_andimm(fs,~0x800000,fs);
4922 return;
4923 }
4924
4925 if((source[i]&0x3e)==0x38) {
4926 // sf/ngle - these should throw exceptions for NaNs
4927 emit_andimm(fs,~0x800000,fs);
4928 return;
4929 }
4930
4931 #if(defined(__VFP_FP__) && !defined(__SOFTFP__))
4932 if(opcode2[i]==0x10) {
4933 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4934 emit_readword((int)&reg_cop1_simple[(source[i]>>16)&0x1f],HOST_TEMPREG);
4935 emit_orimm(fs,0x800000,fs);
4936 emit_flds(temp,14);
4937 emit_flds(HOST_TEMPREG,15);
4938 emit_fcmps(14,15);
4939 emit_fmstat();
4940 if((source[i]&0x3f)==0x31) emit_bicvc_imm(fs,0x800000,fs); // c_un_s
4941 if((source[i]&0x3f)==0x32) emit_bicne_imm(fs,0x800000,fs); // c_eq_s
4942 if((source[i]&0x3f)==0x33) {emit_bicne_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ueq_s
4943 if((source[i]&0x3f)==0x34) emit_biccs_imm(fs,0x800000,fs); // c_olt_s
4944 if((source[i]&0x3f)==0x35) {emit_biccs_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ult_s
4945 if((source[i]&0x3f)==0x36) emit_bichi_imm(fs,0x800000,fs); // c_ole_s
4946 if((source[i]&0x3f)==0x37) {emit_bichi_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ule_s
4947 if((source[i]&0x3f)==0x3a) emit_bicne_imm(fs,0x800000,fs); // c_seq_s
4948 if((source[i]&0x3f)==0x3b) emit_bicne_imm(fs,0x800000,fs); // c_ngl_s
4949 if((source[i]&0x3f)==0x3c) emit_biccs_imm(fs,0x800000,fs); // c_lt_s
4950 if((source[i]&0x3f)==0x3d) emit_biccs_imm(fs,0x800000,fs); // c_nge_s
4951 if((source[i]&0x3f)==0x3e) emit_bichi_imm(fs,0x800000,fs); // c_le_s
4952 if((source[i]&0x3f)==0x3f) emit_bichi_imm(fs,0x800000,fs); // c_ngt_s
4953 return;
4954 }
4955 if(opcode2[i]==0x11) {
4956 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
4957 emit_readword((int)&reg_cop1_double[(source[i]>>16)&0x1f],HOST_TEMPREG);
4958 emit_orimm(fs,0x800000,fs);
4959 emit_vldr(temp,6);
4960 emit_vldr(HOST_TEMPREG,7);
4961 emit_fcmpd(6,7);
4962 emit_fmstat();
4963 if((source[i]&0x3f)==0x31) emit_bicvc_imm(fs,0x800000,fs); // c_un_d
4964 if((source[i]&0x3f)==0x32) emit_bicne_imm(fs,0x800000,fs); // c_eq_d
4965 if((source[i]&0x3f)==0x33) {emit_bicne_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ueq_d
4966 if((source[i]&0x3f)==0x34) emit_biccs_imm(fs,0x800000,fs); // c_olt_d
4967 if((source[i]&0x3f)==0x35) {emit_biccs_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ult_d
4968 if((source[i]&0x3f)==0x36) emit_bichi_imm(fs,0x800000,fs); // c_ole_d
4969 if((source[i]&0x3f)==0x37) {emit_bichi_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ule_d
4970 if((source[i]&0x3f)==0x3a) emit_bicne_imm(fs,0x800000,fs); // c_seq_d
4971 if((source[i]&0x3f)==0x3b) emit_bicne_imm(fs,0x800000,fs); // c_ngl_d
4972 if((source[i]&0x3f)==0x3c) emit_biccs_imm(fs,0x800000,fs); // c_lt_d
4973 if((source[i]&0x3f)==0x3d) emit_biccs_imm(fs,0x800000,fs); // c_nge_d
4974 if((source[i]&0x3f)==0x3e) emit_bichi_imm(fs,0x800000,fs); // c_le_d
4975 if((source[i]&0x3f)==0x3f) emit_bichi_imm(fs,0x800000,fs); // c_ngt_d
4976 return;
4977 }
4978 #endif
4979
4980 // C only
4981
4982 u_int hr,reglist=0;
4983 for(hr=0;hr<HOST_REGS;hr++) {
4984 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
4985 }
4986 reglist&=~(1<<fs);
4987 save_regs(reglist);
4988 if(opcode2[i]==0x10) {
4989 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4990 emit_readword((int)&reg_cop1_simple[(source[i]>>16)&0x1f],ARG2_REG);
4991 if((source[i]&0x3f)==0x30) emit_call((int)c_f_s);
4992 if((source[i]&0x3f)==0x31) emit_call((int)c_un_s);
4993 if((source[i]&0x3f)==0x32) emit_call((int)c_eq_s);
4994 if((source[i]&0x3f)==0x33) emit_call((int)c_ueq_s);
4995 if((source[i]&0x3f)==0x34) emit_call((int)c_olt_s);
4996 if((source[i]&0x3f)==0x35) emit_call((int)c_ult_s);
4997 if((source[i]&0x3f)==0x36) emit_call((int)c_ole_s);
4998 if((source[i]&0x3f)==0x37) emit_call((int)c_ule_s);
4999 if((source[i]&0x3f)==0x38) emit_call((int)c_sf_s);
5000 if((source[i]&0x3f)==0x39) emit_call((int)c_ngle_s);
5001 if((source[i]&0x3f)==0x3a) emit_call((int)c_seq_s);
5002 if((source[i]&0x3f)==0x3b) emit_call((int)c_ngl_s);
5003 if((source[i]&0x3f)==0x3c) emit_call((int)c_lt_s);
5004 if((source[i]&0x3f)==0x3d) emit_call((int)c_nge_s);
5005 if((source[i]&0x3f)==0x3e) emit_call((int)c_le_s);
5006 if((source[i]&0x3f)==0x3f) emit_call((int)c_ngt_s);
5007 }
5008 if(opcode2[i]==0x11) {
5009 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
5010 emit_readword((int)&reg_cop1_double[(source[i]>>16)&0x1f],ARG2_REG);
5011 if((source[i]&0x3f)==0x30) emit_call((int)c_f_d);
5012 if((source[i]&0x3f)==0x31) emit_call((int)c_un_d);
5013 if((source[i]&0x3f)==0x32) emit_call((int)c_eq_d);
5014 if((source[i]&0x3f)==0x33) emit_call((int)c_ueq_d);
5015 if((source[i]&0x3f)==0x34) emit_call((int)c_olt_d);
5016 if((source[i]&0x3f)==0x35) emit_call((int)c_ult_d);
5017 if((source[i]&0x3f)==0x36) emit_call((int)c_ole_d);
5018 if((source[i]&0x3f)==0x37) emit_call((int)c_ule_d);
5019 if((source[i]&0x3f)==0x38) emit_call((int)c_sf_d);
5020 if((source[i]&0x3f)==0x39) emit_call((int)c_ngle_d);
5021 if((source[i]&0x3f)==0x3a) emit_call((int)c_seq_d);
5022 if((source[i]&0x3f)==0x3b) emit_call((int)c_ngl_d);
5023 if((source[i]&0x3f)==0x3c) emit_call((int)c_lt_d);
5024 if((source[i]&0x3f)==0x3d) emit_call((int)c_nge_d);
5025 if((source[i]&0x3f)==0x3e) emit_call((int)c_le_d);
5026 if((source[i]&0x3f)==0x3f) emit_call((int)c_ngt_d);
5027 }
5028 restore_regs(reglist);
5029 emit_loadreg(FSREG,fs);
3d624f89 5030#else
5031 cop1_unusable(i, i_regs);
5032#endif
57871462 5033}
5034
5035void float_assemble(int i,struct regstat *i_regs)
5036{
3d624f89 5037#ifndef DISABLE_COP1
57871462 5038 signed char temp=get_reg(i_regs->regmap,-1);
5039 assert(temp>=0);
5040 // Check cop1 unusable
5041 if(!cop1_usable) {
5042 signed char cs=get_reg(i_regs->regmap,CSREG);
5043 assert(cs>=0);
5044 emit_testimm(cs,0x20000000);
5045 int jaddr=(int)out;
5046 emit_jeq(0);
5047 add_stub(FP_STUB,jaddr,(int)out,i,cs,(int)i_regs,is_delayslot,0);
5048 cop1_usable=1;
5049 }
5050
5051 #if(defined(__VFP_FP__) && !defined(__SOFTFP__))
5052 if((source[i]&0x3f)==6) // mov
5053 {
5054 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
5055 if(opcode2[i]==0x10) {
5056 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
5057 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],HOST_TEMPREG);
5058 emit_readword_indexed(0,temp,temp);
5059 emit_writeword_indexed(temp,0,HOST_TEMPREG);
5060 }
5061 if(opcode2[i]==0x11) {
5062 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
5063 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],HOST_TEMPREG);
5064 emit_vldr(temp,7);
5065 emit_vstr(7,HOST_TEMPREG);
5066 }
5067 }
5068 return;
5069 }
5070
5071 if((source[i]&0x3f)>3)
5072 {
5073 if(opcode2[i]==0x10) {
5074 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
5075 emit_flds(temp,15);
5076 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
5077 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
5078 }
5079 if((source[i]&0x3f)==4) // sqrt
5080 emit_fsqrts(15,15);
5081 if((source[i]&0x3f)==5) // abs
5082 emit_fabss(15,15);
5083 if((source[i]&0x3f)==7) // neg
5084 emit_fnegs(15,15);
5085 emit_fsts(15,temp);
5086 }
5087 if(opcode2[i]==0x11) {
5088 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
5089 emit_vldr(temp,7);
5090 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
5091 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
5092 }
5093 if((source[i]&0x3f)==4) // sqrt
5094 emit_fsqrtd(7,7);
5095 if((source[i]&0x3f)==5) // abs
5096 emit_fabsd(7,7);
5097 if((source[i]&0x3f)==7) // neg
5098 emit_fnegd(7,7);
5099 emit_vstr(7,temp);
5100 }
5101 return;
5102 }
5103 if((source[i]&0x3f)<4)
5104 {
5105 if(opcode2[i]==0x10) {
5106 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
5107 }
5108 if(opcode2[i]==0x11) {
5109 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
5110 }
5111 if(((source[i]>>11)&0x1f)!=((source[i]>>16)&0x1f)) {
5112 if(opcode2[i]==0x10) {
5113 emit_readword((int)&reg_cop1_simple[(source[i]>>16)&0x1f],HOST_TEMPREG);
5114 emit_flds(temp,15);
5115 emit_flds(HOST_TEMPREG,13);
5116 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
5117 if(((source[i]>>16)&0x1f)!=((source[i]>>6)&0x1f)) {
5118 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
5119 }
5120 }
5121 if((source[i]&0x3f)==0) emit_fadds(15,13,15);
5122 if((source[i]&0x3f)==1) emit_fsubs(15,13,15);
5123 if((source[i]&0x3f)==2) emit_fmuls(15,13,15);
5124 if((source[i]&0x3f)==3) emit_fdivs(15,13,15);
5125 if(((source[i]>>16)&0x1f)==((source[i]>>6)&0x1f)) {
5126 emit_fsts(15,HOST_TEMPREG);
5127 }else{
5128 emit_fsts(15,temp);
5129 }
5130 }
5131 else if(opcode2[i]==0x11) {
5132 emit_readword((int)&reg_cop1_double[(source[i]>>16)&0x1f],HOST_TEMPREG);
5133 emit_vldr(temp,7);
5134 emit_vldr(HOST_TEMPREG,6);
5135 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
5136 if(((source[i]>>16)&0x1f)!=((source[i]>>6)&0x1f)) {
5137 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
5138 }
5139 }
5140 if((source[i]&0x3f)==0) emit_faddd(7,6,7);
5141 if((source[i]&0x3f)==1) emit_fsubd(7,6,7);
5142 if((source[i]&0x3f)==2) emit_fmuld(7,6,7);
5143 if((source[i]&0x3f)==3) emit_fdivd(7,6,7);
5144 if(((source[i]>>16)&0x1f)==((source[i]>>6)&0x1f)) {
5145 emit_vstr(7,HOST_TEMPREG);
5146 }else{
5147 emit_vstr(7,temp);
5148 }
5149 }
5150 }
5151 else {
5152 if(opcode2[i]==0x10) {
5153 emit_flds(temp,15);
5154 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
5155 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
5156 }
5157 if((source[i]&0x3f)==0) emit_fadds(15,15,15);
5158 if((source[i]&0x3f)==1) emit_fsubs(15,15,15);
5159 if((source[i]&0x3f)==2) emit_fmuls(15,15,15);
5160 if((source[i]&0x3f)==3) emit_fdivs(15,15,15);
5161 emit_fsts(15,temp);
5162 }
5163 else if(opcode2[i]==0x11) {
5164 emit_vldr(temp,7);
5165 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
5166 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
5167 }
5168 if((source[i]&0x3f)==0) emit_faddd(7,7,7);
5169 if((source[i]&0x3f)==1) emit_fsubd(7,7,7);
5170 if((source[i]&0x3f)==2) emit_fmuld(7,7,7);
5171 if((source[i]&0x3f)==3) emit_fdivd(7,7,7);
5172 emit_vstr(7,temp);
5173 }
5174 }
5175 return;
5176 }
5177 #endif
5178
5179 u_int hr,reglist=0;
5180 for(hr=0;hr<HOST_REGS;hr++) {
5181 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
5182 }
5183 if(opcode2[i]==0x10) { // Single precision
5184 save_regs(reglist);
5185 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
5186 if((source[i]&0x3f)<4) {
5187 emit_readword((int)&reg_cop1_simple[(source[i]>>16)&0x1f],ARG2_REG);
5188 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG3_REG);
5189 }else{
5190 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
5191 }
5192 switch(source[i]&0x3f)
5193 {
5194 case 0x00: emit_call((int)add_s);break;
5195 case 0x01: emit_call((int)sub_s);break;
5196 case 0x02: emit_call((int)mul_s);break;
5197 case 0x03: emit_call((int)div_s);break;
5198 case 0x04: emit_call((int)sqrt_s);break;
5199 case 0x05: emit_call((int)abs_s);break;
5200 case 0x06: emit_call((int)mov_s);break;
5201 case 0x07: emit_call((int)neg_s);break;
5202 }
5203 restore_regs(reglist);
5204 }
5205 if(opcode2[i]==0x11) { // Double precision
5206 save_regs(reglist);
5207 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
5208 if((source[i]&0x3f)<4) {
5209 emit_readword((int)&reg_cop1_double[(source[i]>>16)&0x1f],ARG2_REG);
5210 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG3_REG);
5211 }else{
5212 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
5213 }
5214 switch(source[i]&0x3f)
5215 {
5216 case 0x00: emit_call((int)add_d);break;
5217 case 0x01: emit_call((int)sub_d);break;
5218 case 0x02: emit_call((int)mul_d);break;
5219 case 0x03: emit_call((int)div_d);break;
5220 case 0x04: emit_call((int)sqrt_d);break;
5221 case 0x05: emit_call((int)abs_d);break;
5222 case 0x06: emit_call((int)mov_d);break;
5223 case 0x07: emit_call((int)neg_d);break;
5224 }
5225 restore_regs(reglist);
5226 }
3d624f89 5227#else
5228 cop1_unusable(i, i_regs);
5229#endif
57871462 5230}
5231
5232void multdiv_assemble_arm(int i,struct regstat *i_regs)
5233{
5234 // case 0x18: MULT
5235 // case 0x19: MULTU
5236 // case 0x1A: DIV
5237 // case 0x1B: DIVU
5238 // case 0x1C: DMULT
5239 // case 0x1D: DMULTU
5240 // case 0x1E: DDIV
5241 // case 0x1F: DDIVU
5242 if(rs1[i]&&rs2[i])
5243 {
5244 if((opcode2[i]&4)==0) // 32-bit
5245 {
5246 if(opcode2[i]==0x18) // MULT
5247 {
5248 signed char m1=get_reg(i_regs->regmap,rs1[i]);
5249 signed char m2=get_reg(i_regs->regmap,rs2[i]);
5250 signed char hi=get_reg(i_regs->regmap,HIREG);
5251 signed char lo=get_reg(i_regs->regmap,LOREG);
5252 assert(m1>=0);
5253 assert(m2>=0);
5254 assert(hi>=0);
5255 assert(lo>=0);
5256 emit_smull(m1,m2,hi,lo);
5257 }
5258 if(opcode2[i]==0x19) // MULTU
5259 {
5260 signed char m1=get_reg(i_regs->regmap,rs1[i]);
5261 signed char m2=get_reg(i_regs->regmap,rs2[i]);
5262 signed char hi=get_reg(i_regs->regmap,HIREG);
5263 signed char lo=get_reg(i_regs->regmap,LOREG);
5264 assert(m1>=0);
5265 assert(m2>=0);
5266 assert(hi>=0);
5267 assert(lo>=0);
5268 emit_umull(m1,m2,hi,lo);
5269 }
5270 if(opcode2[i]==0x1A) // DIV
5271 {
5272 signed char d1=get_reg(i_regs->regmap,rs1[i]);
5273 signed char d2=get_reg(i_regs->regmap,rs2[i]);
5274 assert(d1>=0);
5275 assert(d2>=0);
5276 signed char quotient=get_reg(i_regs->regmap,LOREG);
5277 signed char remainder=get_reg(i_regs->regmap,HIREG);
5278 assert(quotient>=0);
5279 assert(remainder>=0);
5280 emit_movs(d1,remainder);
44a80f6a 5281 emit_movimm(0xffffffff,quotient);
5282 emit_negmi(quotient,quotient); // .. quotient and ..
5283 emit_negmi(remainder,remainder); // .. remainder for div0 case (will be negated back after jump)
57871462 5284 emit_movs(d2,HOST_TEMPREG);
5285 emit_jeq((int)out+52); // Division by zero
5286 emit_negmi(HOST_TEMPREG,HOST_TEMPREG);
5287 emit_clz(HOST_TEMPREG,quotient);
5288 emit_shl(HOST_TEMPREG,quotient,HOST_TEMPREG);
5289 emit_orimm(quotient,1<<31,quotient);
5290 emit_shr(quotient,quotient,quotient);
5291 emit_cmp(remainder,HOST_TEMPREG);
5292 emit_subcs(remainder,HOST_TEMPREG,remainder);
5293 emit_adcs(quotient,quotient,quotient);
5294 emit_shrimm(HOST_TEMPREG,1,HOST_TEMPREG);
5295 emit_jcc((int)out-16); // -4
5296 emit_teq(d1,d2);
5297 emit_negmi(quotient,quotient);
5298 emit_test(d1,d1);
5299 emit_negmi(remainder,remainder);
5300 }
5301 if(opcode2[i]==0x1B) // DIVU
5302 {
5303 signed char d1=get_reg(i_regs->regmap,rs1[i]); // dividend
5304 signed char d2=get_reg(i_regs->regmap,rs2[i]); // divisor
5305 assert(d1>=0);
5306 assert(d2>=0);
5307 signed char quotient=get_reg(i_regs->regmap,LOREG);
5308 signed char remainder=get_reg(i_regs->regmap,HIREG);
5309 assert(quotient>=0);
5310 assert(remainder>=0);
44a80f6a 5311 emit_mov(d1,remainder);
5312 emit_movimm(0xffffffff,quotient); // div0 case
57871462 5313 emit_test(d2,d2);
44a80f6a 5314 emit_jeq((int)out+40); // Division by zero
57871462 5315 emit_clz(d2,HOST_TEMPREG);
5316 emit_movimm(1<<31,quotient);
5317 emit_shl(d2,HOST_TEMPREG,d2);
57871462 5318 emit_shr(quotient,HOST_TEMPREG,quotient);
5319 emit_cmp(remainder,d2);
5320 emit_subcs(remainder,d2,remainder);
5321 emit_adcs(quotient,quotient,quotient);
5322 emit_shrcc_imm(d2,1,d2);
5323 emit_jcc((int)out-16); // -4
5324 }
5325 }
5326 else // 64-bit
4600ba03 5327#ifndef FORCE32
57871462 5328 {
5329 if(opcode2[i]==0x1C) // DMULT
5330 {
5331 assert(opcode2[i]!=0x1C);
5332 signed char m1h=get_reg(i_regs->regmap,rs1[i]|64);
5333 signed char m1l=get_reg(i_regs->regmap,rs1[i]);
5334 signed char m2h=get_reg(i_regs->regmap,rs2[i]|64);
5335 signed char m2l=get_reg(i_regs->regmap,rs2[i]);
5336 assert(m1h>=0);
5337 assert(m2h>=0);
5338 assert(m1l>=0);
5339 assert(m2l>=0);
5340 emit_pushreg(m2h);
5341 emit_pushreg(m2l);
5342 emit_pushreg(m1h);
5343 emit_pushreg(m1l);
5344 emit_call((int)&mult64);
5345 emit_popreg(m1l);
5346 emit_popreg(m1h);
5347 emit_popreg(m2l);
5348 emit_popreg(m2h);
5349 signed char hih=get_reg(i_regs->regmap,HIREG|64);
5350 signed char hil=get_reg(i_regs->regmap,HIREG);
5351 if(hih>=0) emit_loadreg(HIREG|64,hih);
5352 if(hil>=0) emit_loadreg(HIREG,hil);
5353 signed char loh=get_reg(i_regs->regmap,LOREG|64);
5354 signed char lol=get_reg(i_regs->regmap,LOREG);
5355 if(loh>=0) emit_loadreg(LOREG|64,loh);
5356 if(lol>=0) emit_loadreg(LOREG,lol);
5357 }
5358 if(opcode2[i]==0x1D) // DMULTU
5359 {
5360 signed char m1h=get_reg(i_regs->regmap,rs1[i]|64);
5361 signed char m1l=get_reg(i_regs->regmap,rs1[i]);
5362 signed char m2h=get_reg(i_regs->regmap,rs2[i]|64);
5363 signed char m2l=get_reg(i_regs->regmap,rs2[i]);
5364 assert(m1h>=0);
5365 assert(m2h>=0);
5366 assert(m1l>=0);
5367 assert(m2l>=0);
5368 save_regs(0x100f);
5369 if(m1l!=0) emit_mov(m1l,0);
5370 if(m1h==0) emit_readword((int)&dynarec_local,1);
5371 else if(m1h>1) emit_mov(m1h,1);
5372 if(m2l<2) emit_readword((int)&dynarec_local+m2l*4,2);
5373 else if(m2l>2) emit_mov(m2l,2);
5374 if(m2h<3) emit_readword((int)&dynarec_local+m2h*4,3);
5375 else if(m2h>3) emit_mov(m2h,3);
5376 emit_call((int)&multu64);
5377 restore_regs(0x100f);
5378 signed char hih=get_reg(i_regs->regmap,HIREG|64);
5379 signed char hil=get_reg(i_regs->regmap,HIREG);
5380 signed char loh=get_reg(i_regs->regmap,LOREG|64);
5381 signed char lol=get_reg(i_regs->regmap,LOREG);
5382 /*signed char temp=get_reg(i_regs->regmap,-1);
5383 signed char rh=get_reg(i_regs->regmap,HIREG|64);
5384 signed char rl=get_reg(i_regs->regmap,HIREG);
5385 assert(m1h>=0);
5386 assert(m2h>=0);
5387 assert(m1l>=0);
5388 assert(m2l>=0);
5389 assert(temp>=0);
5390 //emit_mov(m1l,EAX);
5391 //emit_mul(m2l);
5392 emit_umull(rl,rh,m1l,m2l);
5393 emit_storereg(LOREG,rl);
5394 emit_mov(rh,temp);
5395 //emit_mov(m1h,EAX);
5396 //emit_mul(m2l);
5397 emit_umull(rl,rh,m1h,m2l);
5398 emit_adds(rl,temp,temp);
5399 emit_adcimm(rh,0,rh);
5400 emit_storereg(HIREG,rh);
5401 //emit_mov(m2h,EAX);
5402 //emit_mul(m1l);
5403 emit_umull(rl,rh,m1l,m2h);
5404 emit_adds(rl,temp,temp);
5405 emit_adcimm(rh,0,rh);
5406 emit_storereg(LOREG|64,temp);
5407 emit_mov(rh,temp);
5408 //emit_mov(m2h,EAX);
5409 //emit_mul(m1h);
5410 emit_umull(rl,rh,m1h,m2h);
5411 emit_adds(rl,temp,rl);
5412 emit_loadreg(HIREG,temp);
5413 emit_adcimm(rh,0,rh);
5414 emit_adds(rl,temp,rl);
5415 emit_adcimm(rh,0,rh);
5416 // DEBUG
5417 /*
5418 emit_pushreg(m2h);
5419 emit_pushreg(m2l);
5420 emit_pushreg(m1h);
5421 emit_pushreg(m1l);
5422 emit_call((int)&multu64);
5423 emit_popreg(m1l);
5424 emit_popreg(m1h);
5425 emit_popreg(m2l);
5426 emit_popreg(m2h);
5427 signed char hih=get_reg(i_regs->regmap,HIREG|64);
5428 signed char hil=get_reg(i_regs->regmap,HIREG);
5429 if(hih>=0) emit_loadreg(HIREG|64,hih); // DEBUG
5430 if(hil>=0) emit_loadreg(HIREG,hil); // DEBUG
5431 */
5432 // Shouldn't be necessary
5433 //char loh=get_reg(i_regs->regmap,LOREG|64);
5434 //char lol=get_reg(i_regs->regmap,LOREG);
5435 //if(loh>=0) emit_loadreg(LOREG|64,loh);
5436 //if(lol>=0) emit_loadreg(LOREG,lol);
5437 }
5438 if(opcode2[i]==0x1E) // DDIV
5439 {
5440 signed char d1h=get_reg(i_regs->regmap,rs1[i]|64);
5441 signed char d1l=get_reg(i_regs->regmap,rs1[i]);
5442 signed char d2h=get_reg(i_regs->regmap,rs2[i]|64);
5443 signed char d2l=get_reg(i_regs->regmap,rs2[i]);
5444 assert(d1h>=0);
5445 assert(d2h>=0);
5446 assert(d1l>=0);
5447 assert(d2l>=0);
5448 save_regs(0x100f);
5449 if(d1l!=0) emit_mov(d1l,0);
5450 if(d1h==0) emit_readword((int)&dynarec_local,1);
5451 else if(d1h>1) emit_mov(d1h,1);
5452 if(d2l<2) emit_readword((int)&dynarec_local+d2l*4,2);
5453 else if(d2l>2) emit_mov(d2l,2);
5454 if(d2h<3) emit_readword((int)&dynarec_local+d2h*4,3);
5455 else if(d2h>3) emit_mov(d2h,3);
5456 emit_call((int)&div64);
5457 restore_regs(0x100f);
5458 signed char hih=get_reg(i_regs->regmap,HIREG|64);
5459 signed char hil=get_reg(i_regs->regmap,HIREG);
5460 signed char loh=get_reg(i_regs->regmap,LOREG|64);
5461 signed char lol=get_reg(i_regs->regmap,LOREG);
5462 if(hih>=0) emit_loadreg(HIREG|64,hih);
5463 if(hil>=0) emit_loadreg(HIREG,hil);
5464 if(loh>=0) emit_loadreg(LOREG|64,loh);
5465 if(lol>=0) emit_loadreg(LOREG,lol);
5466 }
5467 if(opcode2[i]==0x1F) // DDIVU
5468 {
5469 //u_int hr,reglist=0;
5470 //for(hr=0;hr<HOST_REGS;hr++) {
5471 // if(i_regs->regmap[hr]>=0 && (i_regs->regmap[hr]&62)!=HIREG) reglist|=1<<hr;
5472 //}
5473 signed char d1h=get_reg(i_regs->regmap,rs1[i]|64);
5474 signed char d1l=get_reg(i_regs->regmap,rs1[i]);
5475 signed char d2h=get_reg(i_regs->regmap,rs2[i]|64);
5476 signed char d2l=get_reg(i_regs->regmap,rs2[i]);
5477 assert(d1h>=0);
5478 assert(d2h>=0);
5479 assert(d1l>=0);
5480 assert(d2l>=0);
5481 save_regs(0x100f);
5482 if(d1l!=0) emit_mov(d1l,0);
5483 if(d1h==0) emit_readword((int)&dynarec_local,1);
5484 else if(d1h>1) emit_mov(d1h,1);
5485 if(d2l<2) emit_readword((int)&dynarec_local+d2l*4,2);
5486 else if(d2l>2) emit_mov(d2l,2);
5487 if(d2h<3) emit_readword((int)&dynarec_local+d2h*4,3);
5488 else if(d2h>3) emit_mov(d2h,3);
5489 emit_call((int)&divu64);
5490 restore_regs(0x100f);
5491 signed char hih=get_reg(i_regs->regmap,HIREG|64);
5492 signed char hil=get_reg(i_regs->regmap,HIREG);
5493 signed char loh=get_reg(i_regs->regmap,LOREG|64);
5494 signed char lol=get_reg(i_regs->regmap,LOREG);
5495 if(hih>=0) emit_loadreg(HIREG|64,hih);
5496 if(hil>=0) emit_loadreg(HIREG,hil);
5497 if(loh>=0) emit_loadreg(LOREG|64,loh);
5498 if(lol>=0) emit_loadreg(LOREG,lol);
5499 }
5500 }
4600ba03 5501#else
5502 assert(0);
5503#endif
57871462 5504 }
5505 else
5506 {
5507 // Multiply by zero is zero.
5508 // MIPS does not have a divide by zero exception.
5509 // The result is undefined, we return zero.
5510 signed char hr=get_reg(i_regs->regmap,HIREG);
5511 signed char lr=get_reg(i_regs->regmap,LOREG);
5512 if(hr>=0) emit_zeroreg(hr);
5513 if(lr>=0) emit_zeroreg(lr);
5514 }
5515}
5516#define multdiv_assemble multdiv_assemble_arm
5517
5518void do_preload_rhash(int r) {
5519 // Don't need this for ARM. On x86, this puts the value 0xf8 into the
5520 // register. On ARM the hash can be done with a single instruction (below)
5521}
5522
5523void do_preload_rhtbl(int ht) {
5524 emit_addimm(FP,(int)&mini_ht-(int)&dynarec_local,ht);
5525}
5526
5527void do_rhash(int rs,int rh) {
5528 emit_andimm(rs,0xf8,rh);
5529}
5530
5531void do_miniht_load(int ht,int rh) {
5532 assem_debug("ldr %s,[%s,%s]!\n",regname[rh],regname[ht],regname[rh]);
5533 output_w32(0xe7b00000|rd_rn_rm(rh,ht,rh));
5534}
5535
5536void do_miniht_jump(int rs,int rh,int ht) {
5537 emit_cmp(rh,rs);
5538 emit_ldreq_indexed(ht,4,15);
5539 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
5540 emit_mov(rs,7);
5541 emit_jmp(jump_vaddr_reg[7]);
5542 #else
5543 emit_jmp(jump_vaddr_reg[rs]);
5544 #endif
5545}
5546
5547void do_miniht_insert(u_int return_address,int rt,int temp) {
5548 #ifdef ARMv5_ONLY
5549 emit_movimm(return_address,rt); // PC into link register
5550 add_to_linker((int)out,return_address,1);
5551 emit_pcreladdr(temp);
5552 emit_writeword(rt,(int)&mini_ht[(return_address&0xFF)>>3][0]);
5553 emit_writeword(temp,(int)&mini_ht[(return_address&0xFF)>>3][1]);
5554 #else
5555 emit_movw(return_address&0x0000FFFF,rt);
5556 add_to_linker((int)out,return_address,1);
5557 emit_pcreladdr(temp);
5558 emit_writeword(temp,(int)&mini_ht[(return_address&0xFF)>>3][1]);
5559 emit_movt(return_address&0xFFFF0000,rt);
5560 emit_writeword(rt,(int)&mini_ht[(return_address&0xFF)>>3][0]);
5561 #endif
5562}
5563
5564// Sign-extend to 64 bits and write out upper half of a register
5565// This is useful where we have a 32-bit value in a register, and want to
5566// keep it in a 32-bit register, but can't guarantee that it won't be read
5567// as a 64-bit value later.
5568void wb_sx(signed char pre[],signed char entry[],uint64_t dirty,uint64_t is32_pre,uint64_t is32,uint64_t u,uint64_t uu)
5569{
24385cae 5570#ifndef FORCE32
57871462 5571 if(is32_pre==is32) return;
5572 int hr,reg;
5573 for(hr=0;hr<HOST_REGS;hr++) {
5574 if(hr!=EXCLUDE_REG) {
5575 //if(pre[hr]==entry[hr]) {
5576 if((reg=pre[hr])>=0) {
5577 if((dirty>>hr)&1) {
5578 if( ((is32_pre&~is32&~uu)>>reg)&1 ) {
5579 emit_sarimm(hr,31,HOST_TEMPREG);
5580 emit_storereg(reg|64,HOST_TEMPREG);
5581 }
5582 }
5583 }
5584 //}
5585 }
5586 }
24385cae 5587#endif
57871462 5588}
5589
5590void wb_valid(signed char pre[],signed char entry[],u_int dirty_pre,u_int dirty,uint64_t is32_pre,uint64_t u,uint64_t uu)
5591{
5592 //if(dirty_pre==dirty) return;
5593 int hr,reg,new_hr;
5594 for(hr=0;hr<HOST_REGS;hr++) {
5595 if(hr!=EXCLUDE_REG) {
5596 reg=pre[hr];
5597 if(((~u)>>(reg&63))&1) {
f776eb14 5598 if(reg>0) {
57871462 5599 if(((dirty_pre&~dirty)>>hr)&1) {
5600 if(reg>0&&reg<34) {
5601 emit_storereg(reg,hr);
5602 if( ((is32_pre&~uu)>>reg)&1 ) {
5603 emit_sarimm(hr,31,HOST_TEMPREG);
5604 emit_storereg(reg|64,HOST_TEMPREG);
5605 }
5606 }
5607 else if(reg>=64) {
5608 emit_storereg(reg,hr);
5609 }
5610 }
5611 }
57871462 5612 }
5613 }
5614 }
5615}
5616
5617
5618/* using strd could possibly help but you'd have to allocate registers in pairs
5619void wb_invalidate_arm(signed char pre[],signed char entry[],uint64_t dirty,uint64_t is32,uint64_t u,uint64_t uu)
5620{
5621 int hr;
5622 int wrote=-1;
5623 for(hr=HOST_REGS-1;hr>=0;hr--) {
5624 if(hr!=EXCLUDE_REG) {
5625 if(pre[hr]!=entry[hr]) {
5626 if(pre[hr]>=0) {
5627 if((dirty>>hr)&1) {
5628 if(get_reg(entry,pre[hr])<0) {
5629 if(pre[hr]<64) {
5630 if(!((u>>pre[hr])&1)) {
5631 if(hr<10&&(~hr&1)&&(pre[hr+1]<0||wrote==hr+1)) {
5632 if( ((is32>>pre[hr])&1) && !((uu>>pre[hr])&1) ) {
5633 emit_sarimm(hr,31,hr+1);
5634 emit_strdreg(pre[hr],hr);
5635 }
5636 else
5637 emit_storereg(pre[hr],hr);
5638 }else{
5639 emit_storereg(pre[hr],hr);
5640 if( ((is32>>pre[hr])&1) && !((uu>>pre[hr])&1) ) {
5641 emit_sarimm(hr,31,hr);
5642 emit_storereg(pre[hr]|64,hr);
5643 }
5644 }
5645 }
5646 }else{
5647 if(!((uu>>(pre[hr]&63))&1) && !((is32>>(pre[hr]&63))&1)) {
5648 emit_storereg(pre[hr],hr);
5649 }
5650 }
5651 wrote=hr;
5652 }
5653 }
5654 }
5655 }
5656 }
5657 }
5658 for(hr=0;hr<HOST_REGS;hr++) {
5659 if(hr!=EXCLUDE_REG) {
5660 if(pre[hr]!=entry[hr]) {
5661 if(pre[hr]>=0) {
5662 int nr;
5663 if((nr=get_reg(entry,pre[hr]))>=0) {
5664 emit_mov(hr,nr);
5665 }
5666 }
5667 }
5668 }
5669 }
5670}
5671#define wb_invalidate wb_invalidate_arm
5672*/
5673
dd3a91a1 5674// Clearing the cache is rather slow on ARM Linux, so mark the areas
5675// that need to be cleared, and then only clear these areas once.
5676void do_clear_cache()
5677{
5678 int i,j;
5679 for (i=0;i<(1<<(TARGET_SIZE_2-17));i++)
5680 {
5681 u_int bitmap=needs_clear_cache[i];
5682 if(bitmap) {
5683 u_int start,end;
5684 for(j=0;j<32;j++)
5685 {
5686 if(bitmap&(1<<j)) {
bdeade46 5687 start=(u_int)BASE_ADDR+i*131072+j*4096;
dd3a91a1 5688 end=start+4095;
5689 j++;
5690 while(j<32) {
5691 if(bitmap&(1<<j)) {
5692 end+=4096;
5693 j++;
5694 }else{
5695 __clear_cache((void *)start,(void *)end);
5696 break;
5697 }
5698 }
5699 }
5700 }
5701 needs_clear_cache[i]=0;
5702 }
5703 }
5704}
5705
57871462 5706// CPU-architecture-specific initialization
5707void arch_init() {
3d624f89 5708#ifndef DISABLE_COP1
57871462 5709 rounding_modes[0]=0x0<<22; // round
5710 rounding_modes[1]=0x3<<22; // trunc
5711 rounding_modes[2]=0x1<<22; // ceil
5712 rounding_modes[3]=0x2<<22; // floor
3d624f89 5713#endif
57871462 5714}
b9b61529 5715
5716// vim:shiftwidth=2:expandtab