support emulated RAM mapped at offset
[pcsx_rearmed.git] / libpcsxcore / new_dynarec / assem_arm.c
CommitLineData
57871462 1/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
c6c3b1b3 2 * Mupen64plus/PCSX - assem_arm.c *
20d507ba 3 * Copyright (C) 2009-2011 Ari64 *
c6c3b1b3 4 * Copyright (C) 2010-2011 GraÅžvydas "notaz" Ignotas *
57871462 5 * *
6 * This program is free software; you can redistribute it and/or modify *
7 * it under the terms of the GNU General Public License as published by *
8 * the Free Software Foundation; either version 2 of the License, or *
9 * (at your option) any later version. *
10 * *
11 * This program is distributed in the hope that it will be useful, *
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
14 * GNU General Public License for more details. *
15 * *
16 * You should have received a copy of the GNU General Public License *
17 * along with this program; if not, write to the *
18 * Free Software Foundation, Inc., *
19 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
20 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
21
054175e9 22#ifdef PCSX
6c0eefaf 23#include "../gte.h"
24#define FLAGLESS
25#include "../gte.h"
26#undef FLAGLESS
054175e9 27#include "../gte_arm.h"
28#include "../gte_neon.h"
29#include "pcnt.h"
30#endif
31
a327ad27 32#if !BASE_ADDR_FIXED
bdeade46 33char translation_cache[1 << TARGET_SIZE_2] __attribute__((aligned(4096)));
34#endif
35
57871462 36extern int cycle_count;
37extern int last_count;
38extern int pcaddr;
39extern int pending_exception;
40extern int branch_target;
41extern uint64_t readmem_dword;
3d624f89 42#ifdef MUPEN64
57871462 43extern precomp_instr fake_pc;
3d624f89 44#endif
57871462 45extern void *dynarec_local;
46extern u_int memory_map[1048576];
47extern u_int mini_ht[32][2];
48extern u_int rounding_modes[4];
49
50void indirect_jump_indexed();
51void indirect_jump();
52void do_interrupt();
53void jump_vaddr_r0();
54void jump_vaddr_r1();
55void jump_vaddr_r2();
56void jump_vaddr_r3();
57void jump_vaddr_r4();
58void jump_vaddr_r5();
59void jump_vaddr_r6();
60void jump_vaddr_r7();
61void jump_vaddr_r8();
62void jump_vaddr_r9();
63void jump_vaddr_r10();
64void jump_vaddr_r12();
65
66const u_int jump_vaddr_reg[16] = {
67 (int)jump_vaddr_r0,
68 (int)jump_vaddr_r1,
69 (int)jump_vaddr_r2,
70 (int)jump_vaddr_r3,
71 (int)jump_vaddr_r4,
72 (int)jump_vaddr_r5,
73 (int)jump_vaddr_r6,
74 (int)jump_vaddr_r7,
75 (int)jump_vaddr_r8,
76 (int)jump_vaddr_r9,
77 (int)jump_vaddr_r10,
78 0,
79 (int)jump_vaddr_r12,
80 0,
81 0,
82 0};
83
0bbd1454 84void invalidate_addr_r0();
85void invalidate_addr_r1();
86void invalidate_addr_r2();
87void invalidate_addr_r3();
88void invalidate_addr_r4();
89void invalidate_addr_r5();
90void invalidate_addr_r6();
91void invalidate_addr_r7();
92void invalidate_addr_r8();
93void invalidate_addr_r9();
94void invalidate_addr_r10();
95void invalidate_addr_r12();
96
97const u_int invalidate_addr_reg[16] = {
98 (int)invalidate_addr_r0,
99 (int)invalidate_addr_r1,
100 (int)invalidate_addr_r2,
101 (int)invalidate_addr_r3,
102 (int)invalidate_addr_r4,
103 (int)invalidate_addr_r5,
104 (int)invalidate_addr_r6,
105 (int)invalidate_addr_r7,
106 (int)invalidate_addr_r8,
107 (int)invalidate_addr_r9,
108 (int)invalidate_addr_r10,
109 0,
110 (int)invalidate_addr_r12,
111 0,
112 0,
113 0};
114
57871462 115#include "fpu.h"
116
dd3a91a1 117unsigned int needs_clear_cache[1<<(TARGET_SIZE_2-17)];
118
57871462 119/* Linker */
120
121void set_jump_target(int addr,u_int target)
122{
123 u_char *ptr=(u_char *)addr;
124 u_int *ptr2=(u_int *)ptr;
125 if(ptr[3]==0xe2) {
126 assert((target-(u_int)ptr2-8)<1024);
127 assert((addr&3)==0);
128 assert((target&3)==0);
129 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
130 //printf("target=%x addr=%x insn=%x\n",target,addr,*ptr2);
131 }
132 else if(ptr[3]==0x72) {
133 // generated by emit_jno_unlikely
134 if((target-(u_int)ptr2-8)<1024) {
135 assert((addr&3)==0);
136 assert((target&3)==0);
137 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
138 }
139 else if((target-(u_int)ptr2-8)<4096&&!((target-(u_int)ptr2-8)&15)) {
140 assert((addr&3)==0);
141 assert((target&3)==0);
142 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>4)|0xE00;
143 }
144 else *ptr2=(0x7A000000)|(((target-(u_int)ptr2-8)<<6)>>8);
145 }
146 else {
147 assert((ptr[3]&0x0e)==0xa);
148 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
149 }
150}
151
152// This optionally copies the instruction from the target of the branch into
153// the space before the branch. Works, but the difference in speed is
154// usually insignificant.
155void set_jump_target_fillslot(int addr,u_int target,int copy)
156{
157 u_char *ptr=(u_char *)addr;
158 u_int *ptr2=(u_int *)ptr;
159 assert(!copy||ptr2[-1]==0xe28dd000);
160 if(ptr[3]==0xe2) {
161 assert(!copy);
162 assert((target-(u_int)ptr2-8)<4096);
163 *ptr2=(*ptr2&0xFFFFF000)|(target-(u_int)ptr2-8);
164 }
165 else {
166 assert((ptr[3]&0x0e)==0xa);
167 u_int target_insn=*(u_int *)target;
168 if((target_insn&0x0e100000)==0) { // ALU, no immediate, no flags
169 copy=0;
170 }
171 if((target_insn&0x0c100000)==0x04100000) { // Load
172 copy=0;
173 }
174 if(target_insn&0x08000000) {
175 copy=0;
176 }
177 if(copy) {
178 ptr2[-1]=target_insn;
179 target+=4;
180 }
181 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
182 }
183}
184
185/* Literal pool */
186add_literal(int addr,int val)
187{
15776b68 188 assert(literalcount<sizeof(literals)/sizeof(literals[0]));
57871462 189 literals[literalcount][0]=addr;
190 literals[literalcount][1]=val;
191 literalcount++;
192}
193
f76eeef9 194void *kill_pointer(void *stub)
57871462 195{
196 int *ptr=(int *)(stub+4);
197 assert((*ptr&0x0ff00000)==0x05900000);
198 u_int offset=*ptr&0xfff;
199 int **l_ptr=(void *)ptr+offset+8;
200 int *i_ptr=*l_ptr;
201 set_jump_target((int)i_ptr,(int)stub);
f76eeef9 202 return i_ptr;
57871462 203}
204
f968d35d 205// find where external branch is liked to using addr of it's stub:
206// get address that insn one after stub loads (dyna_linker arg1),
207// treat it as a pointer to branch insn,
208// return addr where that branch jumps to
57871462 209int get_pointer(void *stub)
210{
211 //printf("get_pointer(%x)\n",(int)stub);
212 int *ptr=(int *)(stub+4);
f968d35d 213 assert((*ptr&0x0fff0000)==0x059f0000);
57871462 214 u_int offset=*ptr&0xfff;
215 int **l_ptr=(void *)ptr+offset+8;
216 int *i_ptr=*l_ptr;
217 assert((*i_ptr&0x0f000000)==0x0a000000);
218 return (int)i_ptr+((*i_ptr<<8)>>6)+8;
219}
220
221// Find the "clean" entry point from a "dirty" entry point
222// by skipping past the call to verify_code
223u_int get_clean_addr(int addr)
224{
225 int *ptr=(int *)addr;
226 #ifdef ARMv5_ONLY
227 ptr+=4;
228 #else
229 ptr+=6;
230 #endif
231 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
232 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
233 ptr++;
234 if((*ptr&0xFF000000)==0xea000000) {
235 return (int)ptr+((*ptr<<8)>>6)+8; // follow jump
236 }
237 return (u_int)ptr;
238}
239
240int verify_dirty(int addr)
241{
242 u_int *ptr=(u_int *)addr;
243 #ifdef ARMv5_ONLY
244 // get from literal pool
15776b68 245 assert((*ptr&0xFFFF0000)==0xe59f0000);
57871462 246 u_int offset=*ptr&0xfff;
247 u_int *l_ptr=(void *)ptr+offset+8;
248 u_int source=l_ptr[0];
249 u_int copy=l_ptr[1];
250 u_int len=l_ptr[2];
251 ptr+=4;
252 #else
253 // ARMv7 movw/movt
254 assert((*ptr&0xFFF00000)==0xe3000000);
255 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
256 u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
257 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
258 ptr+=6;
259 #endif
260 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
261 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
63cb0298 262#ifndef DISABLE_TLB
cfcba99a 263 u_int verifier=(int)ptr+((signed int)(*ptr<<8)>>6)+8; // get target of bl
57871462 264 if(verifier==(u_int)verify_code_vm||verifier==(u_int)verify_code_ds) {
265 unsigned int page=source>>12;
266 unsigned int map_value=memory_map[page];
267 if(map_value>=0x80000000) return 0;
268 while(page<((source+len-1)>>12)) {
269 if((memory_map[++page]<<2)!=(map_value<<2)) return 0;
270 }
271 source = source+(map_value<<2);
272 }
63cb0298 273#endif
57871462 274 //printf("verify_dirty: %x %x %x\n",source,copy,len);
275 return !memcmp((void *)source,(void *)copy,len);
276}
277
278// This doesn't necessarily find all clean entry points, just
279// guarantees that it's not dirty
280int isclean(int addr)
281{
282 #ifdef ARMv5_ONLY
283 int *ptr=((u_int *)addr)+4;
284 #else
285 int *ptr=((u_int *)addr)+6;
286 #endif
287 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
288 if((*ptr&0xFF000000)!=0xeb000000) return 1; // bl instruction
289 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code) return 0;
290 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_vm) return 0;
291 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_ds) return 0;
292 return 1;
293}
294
295void get_bounds(int addr,u_int *start,u_int *end)
296{
297 u_int *ptr=(u_int *)addr;
298 #ifdef ARMv5_ONLY
299 // get from literal pool
15776b68 300 assert((*ptr&0xFFFF0000)==0xe59f0000);
57871462 301 u_int offset=*ptr&0xfff;
302 u_int *l_ptr=(void *)ptr+offset+8;
303 u_int source=l_ptr[0];
304 //u_int copy=l_ptr[1];
305 u_int len=l_ptr[2];
306 ptr+=4;
307 #else
308 // ARMv7 movw/movt
309 assert((*ptr&0xFFF00000)==0xe3000000);
310 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
311 //u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
312 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
313 ptr+=6;
314 #endif
315 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
316 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
63cb0298 317#ifndef DISABLE_TLB
cfcba99a 318 u_int verifier=(int)ptr+((signed int)(*ptr<<8)>>6)+8; // get target of bl
57871462 319 if(verifier==(u_int)verify_code_vm||verifier==(u_int)verify_code_ds) {
320 if(memory_map[source>>12]>=0x80000000) source = 0;
321 else source = source+(memory_map[source>>12]<<2);
322 }
63cb0298 323#endif
57871462 324 *start=source;
325 *end=source+len;
326}
327
328/* Register allocation */
329
330// Note: registers are allocated clean (unmodified state)
331// if you intend to modify the register, you must call dirty_reg().
332void alloc_reg(struct regstat *cur,int i,signed char reg)
333{
334 int r,hr;
335 int preferred_reg = (reg&7);
336 if(reg==CCREG) preferred_reg=HOST_CCREG;
337 if(reg==PTEMP||reg==FTEMP) preferred_reg=12;
338
339 // Don't allocate unused registers
340 if((cur->u>>reg)&1) return;
341
342 // see if it's already allocated
343 for(hr=0;hr<HOST_REGS;hr++)
344 {
345 if(cur->regmap[hr]==reg) return;
346 }
347
348 // Keep the same mapping if the register was already allocated in a loop
349 preferred_reg = loop_reg(i,reg,preferred_reg);
350
351 // Try to allocate the preferred register
352 if(cur->regmap[preferred_reg]==-1) {
353 cur->regmap[preferred_reg]=reg;
354 cur->dirty&=~(1<<preferred_reg);
355 cur->isconst&=~(1<<preferred_reg);
356 return;
357 }
358 r=cur->regmap[preferred_reg];
359 if(r<64&&((cur->u>>r)&1)) {
360 cur->regmap[preferred_reg]=reg;
361 cur->dirty&=~(1<<preferred_reg);
362 cur->isconst&=~(1<<preferred_reg);
363 return;
364 }
365 if(r>=64&&((cur->uu>>(r&63))&1)) {
366 cur->regmap[preferred_reg]=reg;
367 cur->dirty&=~(1<<preferred_reg);
368 cur->isconst&=~(1<<preferred_reg);
369 return;
370 }
371
372 // Clear any unneeded registers
373 // We try to keep the mapping consistent, if possible, because it
374 // makes branches easier (especially loops). So we try to allocate
375 // first (see above) before removing old mappings. If this is not
376 // possible then go ahead and clear out the registers that are no
377 // longer needed.
378 for(hr=0;hr<HOST_REGS;hr++)
379 {
380 r=cur->regmap[hr];
381 if(r>=0) {
382 if(r<64) {
383 if((cur->u>>r)&1) {cur->regmap[hr]=-1;break;}
384 }
385 else
386 {
387 if((cur->uu>>(r&63))&1) {cur->regmap[hr]=-1;break;}
388 }
389 }
390 }
391 // Try to allocate any available register, but prefer
392 // registers that have not been used recently.
393 if(i>0) {
394 for(hr=0;hr<HOST_REGS;hr++) {
395 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
396 if(regs[i-1].regmap[hr]!=rs1[i-1]&&regs[i-1].regmap[hr]!=rs2[i-1]&&regs[i-1].regmap[hr]!=rt1[i-1]&&regs[i-1].regmap[hr]!=rt2[i-1]) {
397 cur->regmap[hr]=reg;
398 cur->dirty&=~(1<<hr);
399 cur->isconst&=~(1<<hr);
400 return;
401 }
402 }
403 }
404 }
405 // Try to allocate any available register
406 for(hr=0;hr<HOST_REGS;hr++) {
407 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
408 cur->regmap[hr]=reg;
409 cur->dirty&=~(1<<hr);
410 cur->isconst&=~(1<<hr);
411 return;
412 }
413 }
414
415 // Ok, now we have to evict someone
416 // Pick a register we hopefully won't need soon
417 u_char hsn[MAXREG+1];
418 memset(hsn,10,sizeof(hsn));
419 int j;
420 lsn(hsn,i,&preferred_reg);
421 //printf("eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",cur->regmap[0],cur->regmap[1],cur->regmap[2],cur->regmap[3],cur->regmap[5],cur->regmap[6],cur->regmap[7]);
422 //printf("hsn(%x): %d %d %d %d %d %d %d\n",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
423 if(i>0) {
424 // Don't evict the cycle count at entry points, otherwise the entry
425 // stub will have to write it.
426 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
427 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
428 for(j=10;j>=3;j--)
429 {
430 // Alloc preferred register if available
431 if(hsn[r=cur->regmap[preferred_reg]&63]==j) {
432 for(hr=0;hr<HOST_REGS;hr++) {
433 // Evict both parts of a 64-bit register
434 if((cur->regmap[hr]&63)==r) {
435 cur->regmap[hr]=-1;
436 cur->dirty&=~(1<<hr);
437 cur->isconst&=~(1<<hr);
438 }
439 }
440 cur->regmap[preferred_reg]=reg;
441 return;
442 }
443 for(r=1;r<=MAXREG;r++)
444 {
445 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
446 for(hr=0;hr<HOST_REGS;hr++) {
447 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
448 if(cur->regmap[hr]==r+64) {
449 cur->regmap[hr]=reg;
450 cur->dirty&=~(1<<hr);
451 cur->isconst&=~(1<<hr);
452 return;
453 }
454 }
455 }
456 for(hr=0;hr<HOST_REGS;hr++) {
457 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
458 if(cur->regmap[hr]==r) {
459 cur->regmap[hr]=reg;
460 cur->dirty&=~(1<<hr);
461 cur->isconst&=~(1<<hr);
462 return;
463 }
464 }
465 }
466 }
467 }
468 }
469 }
470 for(j=10;j>=0;j--)
471 {
472 for(r=1;r<=MAXREG;r++)
473 {
474 if(hsn[r]==j) {
475 for(hr=0;hr<HOST_REGS;hr++) {
476 if(cur->regmap[hr]==r+64) {
477 cur->regmap[hr]=reg;
478 cur->dirty&=~(1<<hr);
479 cur->isconst&=~(1<<hr);
480 return;
481 }
482 }
483 for(hr=0;hr<HOST_REGS;hr++) {
484 if(cur->regmap[hr]==r) {
485 cur->regmap[hr]=reg;
486 cur->dirty&=~(1<<hr);
487 cur->isconst&=~(1<<hr);
488 return;
489 }
490 }
491 }
492 }
493 }
494 printf("This shouldn't happen (alloc_reg)");exit(1);
495}
496
497void alloc_reg64(struct regstat *cur,int i,signed char reg)
498{
499 int preferred_reg = 8+(reg&1);
500 int r,hr;
501
502 // allocate the lower 32 bits
503 alloc_reg(cur,i,reg);
504
505 // Don't allocate unused registers
506 if((cur->uu>>reg)&1) return;
507
508 // see if the upper half is already allocated
509 for(hr=0;hr<HOST_REGS;hr++)
510 {
511 if(cur->regmap[hr]==reg+64) return;
512 }
513
514 // Keep the same mapping if the register was already allocated in a loop
515 preferred_reg = loop_reg(i,reg,preferred_reg);
516
517 // Try to allocate the preferred register
518 if(cur->regmap[preferred_reg]==-1) {
519 cur->regmap[preferred_reg]=reg|64;
520 cur->dirty&=~(1<<preferred_reg);
521 cur->isconst&=~(1<<preferred_reg);
522 return;
523 }
524 r=cur->regmap[preferred_reg];
525 if(r<64&&((cur->u>>r)&1)) {
526 cur->regmap[preferred_reg]=reg|64;
527 cur->dirty&=~(1<<preferred_reg);
528 cur->isconst&=~(1<<preferred_reg);
529 return;
530 }
531 if(r>=64&&((cur->uu>>(r&63))&1)) {
532 cur->regmap[preferred_reg]=reg|64;
533 cur->dirty&=~(1<<preferred_reg);
534 cur->isconst&=~(1<<preferred_reg);
535 return;
536 }
537
538 // Clear any unneeded registers
539 // We try to keep the mapping consistent, if possible, because it
540 // makes branches easier (especially loops). So we try to allocate
541 // first (see above) before removing old mappings. If this is not
542 // possible then go ahead and clear out the registers that are no
543 // longer needed.
544 for(hr=HOST_REGS-1;hr>=0;hr--)
545 {
546 r=cur->regmap[hr];
547 if(r>=0) {
548 if(r<64) {
549 if((cur->u>>r)&1) {cur->regmap[hr]=-1;break;}
550 }
551 else
552 {
553 if((cur->uu>>(r&63))&1) {cur->regmap[hr]=-1;break;}
554 }
555 }
556 }
557 // Try to allocate any available register, but prefer
558 // registers that have not been used recently.
559 if(i>0) {
560 for(hr=0;hr<HOST_REGS;hr++) {
561 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
562 if(regs[i-1].regmap[hr]!=rs1[i-1]&&regs[i-1].regmap[hr]!=rs2[i-1]&&regs[i-1].regmap[hr]!=rt1[i-1]&&regs[i-1].regmap[hr]!=rt2[i-1]) {
563 cur->regmap[hr]=reg|64;
564 cur->dirty&=~(1<<hr);
565 cur->isconst&=~(1<<hr);
566 return;
567 }
568 }
569 }
570 }
571 // Try to allocate any available register
572 for(hr=0;hr<HOST_REGS;hr++) {
573 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
574 cur->regmap[hr]=reg|64;
575 cur->dirty&=~(1<<hr);
576 cur->isconst&=~(1<<hr);
577 return;
578 }
579 }
580
581 // Ok, now we have to evict someone
582 // Pick a register we hopefully won't need soon
583 u_char hsn[MAXREG+1];
584 memset(hsn,10,sizeof(hsn));
585 int j;
586 lsn(hsn,i,&preferred_reg);
587 //printf("eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",cur->regmap[0],cur->regmap[1],cur->regmap[2],cur->regmap[3],cur->regmap[5],cur->regmap[6],cur->regmap[7]);
588 //printf("hsn(%x): %d %d %d %d %d %d %d\n",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
589 if(i>0) {
590 // Don't evict the cycle count at entry points, otherwise the entry
591 // stub will have to write it.
592 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
593 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
594 for(j=10;j>=3;j--)
595 {
596 // Alloc preferred register if available
597 if(hsn[r=cur->regmap[preferred_reg]&63]==j) {
598 for(hr=0;hr<HOST_REGS;hr++) {
599 // Evict both parts of a 64-bit register
600 if((cur->regmap[hr]&63)==r) {
601 cur->regmap[hr]=-1;
602 cur->dirty&=~(1<<hr);
603 cur->isconst&=~(1<<hr);
604 }
605 }
606 cur->regmap[preferred_reg]=reg|64;
607 return;
608 }
609 for(r=1;r<=MAXREG;r++)
610 {
611 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
612 for(hr=0;hr<HOST_REGS;hr++) {
613 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
614 if(cur->regmap[hr]==r+64) {
615 cur->regmap[hr]=reg|64;
616 cur->dirty&=~(1<<hr);
617 cur->isconst&=~(1<<hr);
618 return;
619 }
620 }
621 }
622 for(hr=0;hr<HOST_REGS;hr++) {
623 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
624 if(cur->regmap[hr]==r) {
625 cur->regmap[hr]=reg|64;
626 cur->dirty&=~(1<<hr);
627 cur->isconst&=~(1<<hr);
628 return;
629 }
630 }
631 }
632 }
633 }
634 }
635 }
636 for(j=10;j>=0;j--)
637 {
638 for(r=1;r<=MAXREG;r++)
639 {
640 if(hsn[r]==j) {
641 for(hr=0;hr<HOST_REGS;hr++) {
642 if(cur->regmap[hr]==r+64) {
643 cur->regmap[hr]=reg|64;
644 cur->dirty&=~(1<<hr);
645 cur->isconst&=~(1<<hr);
646 return;
647 }
648 }
649 for(hr=0;hr<HOST_REGS;hr++) {
650 if(cur->regmap[hr]==r) {
651 cur->regmap[hr]=reg|64;
652 cur->dirty&=~(1<<hr);
653 cur->isconst&=~(1<<hr);
654 return;
655 }
656 }
657 }
658 }
659 }
660 printf("This shouldn't happen");exit(1);
661}
662
663// Allocate a temporary register. This is done without regard to
664// dirty status or whether the register we request is on the unneeded list
665// Note: This will only allocate one register, even if called multiple times
666void alloc_reg_temp(struct regstat *cur,int i,signed char reg)
667{
668 int r,hr;
669 int preferred_reg = -1;
670
671 // see if it's already allocated
672 for(hr=0;hr<HOST_REGS;hr++)
673 {
674 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==reg) return;
675 }
676
677 // Try to allocate any available register
678 for(hr=HOST_REGS-1;hr>=0;hr--) {
679 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
680 cur->regmap[hr]=reg;
681 cur->dirty&=~(1<<hr);
682 cur->isconst&=~(1<<hr);
683 return;
684 }
685 }
686
687 // Find an unneeded register
688 for(hr=HOST_REGS-1;hr>=0;hr--)
689 {
690 r=cur->regmap[hr];
691 if(r>=0) {
692 if(r<64) {
693 if((cur->u>>r)&1) {
694 if(i==0||((unneeded_reg[i-1]>>r)&1)) {
695 cur->regmap[hr]=reg;
696 cur->dirty&=~(1<<hr);
697 cur->isconst&=~(1<<hr);
698 return;
699 }
700 }
701 }
702 else
703 {
704 if((cur->uu>>(r&63))&1) {
705 if(i==0||((unneeded_reg_upper[i-1]>>(r&63))&1)) {
706 cur->regmap[hr]=reg;
707 cur->dirty&=~(1<<hr);
708 cur->isconst&=~(1<<hr);
709 return;
710 }
711 }
712 }
713 }
714 }
715
716 // Ok, now we have to evict someone
717 // Pick a register we hopefully won't need soon
718 // TODO: we might want to follow unconditional jumps here
719 // TODO: get rid of dupe code and make this into a function
720 u_char hsn[MAXREG+1];
721 memset(hsn,10,sizeof(hsn));
722 int j;
723 lsn(hsn,i,&preferred_reg);
724 //printf("hsn: %d %d %d %d %d %d %d\n",hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
725 if(i>0) {
726 // Don't evict the cycle count at entry points, otherwise the entry
727 // stub will have to write it.
728 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
729 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
730 for(j=10;j>=3;j--)
731 {
732 for(r=1;r<=MAXREG;r++)
733 {
734 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
735 for(hr=0;hr<HOST_REGS;hr++) {
736 if(hr!=HOST_CCREG||hsn[CCREG]>2) {
737 if(cur->regmap[hr]==r+64) {
738 cur->regmap[hr]=reg;
739 cur->dirty&=~(1<<hr);
740 cur->isconst&=~(1<<hr);
741 return;
742 }
743 }
744 }
745 for(hr=0;hr<HOST_REGS;hr++) {
746 if(hr!=HOST_CCREG||hsn[CCREG]>2) {
747 if(cur->regmap[hr]==r) {
748 cur->regmap[hr]=reg;
749 cur->dirty&=~(1<<hr);
750 cur->isconst&=~(1<<hr);
751 return;
752 }
753 }
754 }
755 }
756 }
757 }
758 }
759 for(j=10;j>=0;j--)
760 {
761 for(r=1;r<=MAXREG;r++)
762 {
763 if(hsn[r]==j) {
764 for(hr=0;hr<HOST_REGS;hr++) {
765 if(cur->regmap[hr]==r+64) {
766 cur->regmap[hr]=reg;
767 cur->dirty&=~(1<<hr);
768 cur->isconst&=~(1<<hr);
769 return;
770 }
771 }
772 for(hr=0;hr<HOST_REGS;hr++) {
773 if(cur->regmap[hr]==r) {
774 cur->regmap[hr]=reg;
775 cur->dirty&=~(1<<hr);
776 cur->isconst&=~(1<<hr);
777 return;
778 }
779 }
780 }
781 }
782 }
783 printf("This shouldn't happen");exit(1);
784}
785// Allocate a specific ARM register.
786void alloc_arm_reg(struct regstat *cur,int i,signed char reg,char hr)
787{
788 int n;
f776eb14 789 int dirty=0;
57871462 790
791 // see if it's already allocated (and dealloc it)
792 for(n=0;n<HOST_REGS;n++)
793 {
f776eb14 794 if(n!=EXCLUDE_REG&&cur->regmap[n]==reg) {
795 dirty=(cur->dirty>>n)&1;
796 cur->regmap[n]=-1;
797 }
57871462 798 }
799
800 cur->regmap[hr]=reg;
801 cur->dirty&=~(1<<hr);
f776eb14 802 cur->dirty|=dirty<<hr;
57871462 803 cur->isconst&=~(1<<hr);
804}
805
806// Alloc cycle count into dedicated register
807alloc_cc(struct regstat *cur,int i)
808{
809 alloc_arm_reg(cur,i,CCREG,HOST_CCREG);
810}
811
812/* Special alloc */
813
814
815/* Assembler */
816
817char regname[16][4] = {
818 "r0",
819 "r1",
820 "r2",
821 "r3",
822 "r4",
823 "r5",
824 "r6",
825 "r7",
826 "r8",
827 "r9",
828 "r10",
829 "fp",
830 "r12",
831 "sp",
832 "lr",
833 "pc"};
834
835void output_byte(u_char byte)
836{
837 *(out++)=byte;
838}
839void output_modrm(u_char mod,u_char rm,u_char ext)
840{
841 assert(mod<4);
842 assert(rm<8);
843 assert(ext<8);
844 u_char byte=(mod<<6)|(ext<<3)|rm;
845 *(out++)=byte;
846}
847void output_sib(u_char scale,u_char index,u_char base)
848{
849 assert(scale<4);
850 assert(index<8);
851 assert(base<8);
852 u_char byte=(scale<<6)|(index<<3)|base;
853 *(out++)=byte;
854}
855void output_w32(u_int word)
856{
857 *((u_int *)out)=word;
858 out+=4;
859}
860u_int rd_rn_rm(u_int rd, u_int rn, u_int rm)
861{
862 assert(rd<16);
863 assert(rn<16);
864 assert(rm<16);
865 return((rn<<16)|(rd<<12)|rm);
866}
867u_int rd_rn_imm_shift(u_int rd, u_int rn, u_int imm, u_int shift)
868{
869 assert(rd<16);
870 assert(rn<16);
871 assert(imm<256);
872 assert((shift&1)==0);
873 return((rn<<16)|(rd<<12)|(((32-shift)&30)<<7)|imm);
874}
875u_int genimm(u_int imm,u_int *encoded)
876{
c2e3bd42 877 *encoded=0;
878 if(imm==0) return 1;
57871462 879 int i=32;
880 while(i>0)
881 {
882 if(imm<256) {
883 *encoded=((i&30)<<7)|imm;
884 return 1;
885 }
886 imm=(imm>>2)|(imm<<30);i-=2;
887 }
888 return 0;
889}
cfbd3c6e 890void genimm_checked(u_int imm,u_int *encoded)
891{
892 u_int ret=genimm(imm,encoded);
893 assert(ret);
894}
57871462 895u_int genjmp(u_int addr)
896{
897 int offset=addr-(int)out-8;
e80343e2 898 if(offset<-33554432||offset>=33554432) {
899 if (addr>2) {
900 printf("genjmp: out of range: %08x\n", offset);
901 exit(1);
902 }
903 return 0;
904 }
57871462 905 return ((u_int)offset>>2)&0xffffff;
906}
907
908void emit_mov(int rs,int rt)
909{
910 assem_debug("mov %s,%s\n",regname[rt],regname[rs]);
911 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs));
912}
913
914void emit_movs(int rs,int rt)
915{
916 assem_debug("movs %s,%s\n",regname[rt],regname[rs]);
917 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs));
918}
919
920void emit_add(int rs1,int rs2,int rt)
921{
922 assem_debug("add %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
923 output_w32(0xe0800000|rd_rn_rm(rt,rs1,rs2));
924}
925
926void emit_adds(int rs1,int rs2,int rt)
927{
928 assem_debug("adds %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
929 output_w32(0xe0900000|rd_rn_rm(rt,rs1,rs2));
930}
931
932void emit_adcs(int rs1,int rs2,int rt)
933{
934 assem_debug("adcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
935 output_w32(0xe0b00000|rd_rn_rm(rt,rs1,rs2));
936}
937
938void emit_sbc(int rs1,int rs2,int rt)
939{
940 assem_debug("sbc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
941 output_w32(0xe0c00000|rd_rn_rm(rt,rs1,rs2));
942}
943
944void emit_sbcs(int rs1,int rs2,int rt)
945{
946 assem_debug("sbcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
947 output_w32(0xe0d00000|rd_rn_rm(rt,rs1,rs2));
948}
949
950void emit_neg(int rs, int rt)
951{
952 assem_debug("rsb %s,%s,#0\n",regname[rt],regname[rs]);
953 output_w32(0xe2600000|rd_rn_rm(rt,rs,0));
954}
955
956void emit_negs(int rs, int rt)
957{
958 assem_debug("rsbs %s,%s,#0\n",regname[rt],regname[rs]);
959 output_w32(0xe2700000|rd_rn_rm(rt,rs,0));
960}
961
962void emit_sub(int rs1,int rs2,int rt)
963{
964 assem_debug("sub %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
965 output_w32(0xe0400000|rd_rn_rm(rt,rs1,rs2));
966}
967
968void emit_subs(int rs1,int rs2,int rt)
969{
970 assem_debug("subs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
971 output_w32(0xe0500000|rd_rn_rm(rt,rs1,rs2));
972}
973
974void emit_zeroreg(int rt)
975{
976 assem_debug("mov %s,#0\n",regname[rt]);
977 output_w32(0xe3a00000|rd_rn_rm(rt,0,0));
978}
979
790ee18e 980void emit_loadlp(u_int imm,u_int rt)
981{
982 add_literal((int)out,imm);
983 assem_debug("ldr %s,pc+? [=%x]\n",regname[rt],imm);
984 output_w32(0xe5900000|rd_rn_rm(rt,15,0));
985}
986void emit_movw(u_int imm,u_int rt)
987{
988 assert(imm<65536);
989 assem_debug("movw %s,#%d (0x%x)\n",regname[rt],imm,imm);
990 output_w32(0xe3000000|rd_rn_rm(rt,0,0)|(imm&0xfff)|((imm<<4)&0xf0000));
991}
992void emit_movt(u_int imm,u_int rt)
993{
994 assem_debug("movt %s,#%d (0x%x)\n",regname[rt],imm&0xffff0000,imm&0xffff0000);
995 output_w32(0xe3400000|rd_rn_rm(rt,0,0)|((imm>>16)&0xfff)|((imm>>12)&0xf0000));
996}
997void emit_movimm(u_int imm,u_int rt)
998{
999 u_int armval;
1000 if(genimm(imm,&armval)) {
1001 assem_debug("mov %s,#%d\n",regname[rt],imm);
1002 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
1003 }else if(genimm(~imm,&armval)) {
1004 assem_debug("mvn %s,#%d\n",regname[rt],imm);
1005 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
1006 }else if(imm<65536) {
1007 #ifdef ARMv5_ONLY
1008 assem_debug("mov %s,#%d\n",regname[rt],imm&0xFF00);
1009 output_w32(0xe3a00000|rd_rn_imm_shift(rt,0,imm>>8,8));
1010 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
1011 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1012 #else
1013 emit_movw(imm,rt);
1014 #endif
1015 }else{
1016 #ifdef ARMv5_ONLY
1017 emit_loadlp(imm,rt);
1018 #else
1019 emit_movw(imm&0x0000FFFF,rt);
1020 emit_movt(imm&0xFFFF0000,rt);
1021 #endif
1022 }
1023}
1024void emit_pcreladdr(u_int rt)
1025{
1026 assem_debug("add %s,pc,#?\n",regname[rt]);
1027 output_w32(0xe2800000|rd_rn_rm(rt,15,0));
1028}
1029
57871462 1030void emit_loadreg(int r, int hr)
1031{
3d624f89 1032#ifdef FORCE32
1033 if(r&64) {
1034 printf("64bit load in 32bit mode!\n");
7f2607ea 1035 assert(0);
1036 return;
3d624f89 1037 }
1038#endif
57871462 1039 if((r&63)==0)
1040 emit_zeroreg(hr);
1041 else {
3d624f89 1042 int addr=((int)reg)+((r&63)<<REG_SHIFT)+((r&64)>>4);
57871462 1043 if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4);
1044 if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4);
1045 if(r==CCREG) addr=(int)&cycle_count;
1046 if(r==CSREG) addr=(int)&Status;
1047 if(r==FSREG) addr=(int)&FCR31;
1048 if(r==INVCP) addr=(int)&invc_ptr;
1049 u_int offset = addr-(u_int)&dynarec_local;
1050 assert(offset<4096);
1051 assem_debug("ldr %s,fp+%d\n",regname[hr],offset);
1052 output_w32(0xe5900000|rd_rn_rm(hr,FP,0)|offset);
1053 }
1054}
1055void emit_storereg(int r, int hr)
1056{
3d624f89 1057#ifdef FORCE32
1058 if(r&64) {
1059 printf("64bit store in 32bit mode!\n");
7f2607ea 1060 assert(0);
1061 return;
3d624f89 1062 }
1063#endif
1064 int addr=((int)reg)+((r&63)<<REG_SHIFT)+((r&64)>>4);
57871462 1065 if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4);
1066 if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4);
1067 if(r==CCREG) addr=(int)&cycle_count;
1068 if(r==FSREG) addr=(int)&FCR31;
1069 u_int offset = addr-(u_int)&dynarec_local;
1070 assert(offset<4096);
1071 assem_debug("str %s,fp+%d\n",regname[hr],offset);
1072 output_w32(0xe5800000|rd_rn_rm(hr,FP,0)|offset);
1073}
1074
1075void emit_test(int rs, int rt)
1076{
1077 assem_debug("tst %s,%s\n",regname[rs],regname[rt]);
1078 output_w32(0xe1100000|rd_rn_rm(0,rs,rt));
1079}
1080
1081void emit_testimm(int rs,int imm)
1082{
1083 u_int armval;
5a05d80c 1084 assem_debug("tst %s,#%d\n",regname[rs],imm);
cfbd3c6e 1085 genimm_checked(imm,&armval);
57871462 1086 output_w32(0xe3100000|rd_rn_rm(0,rs,0)|armval);
1087}
1088
b9b61529 1089void emit_testeqimm(int rs,int imm)
1090{
1091 u_int armval;
1092 assem_debug("tsteq %s,$%d\n",regname[rs],imm);
cfbd3c6e 1093 genimm_checked(imm,&armval);
b9b61529 1094 output_w32(0x03100000|rd_rn_rm(0,rs,0)|armval);
1095}
1096
57871462 1097void emit_not(int rs,int rt)
1098{
1099 assem_debug("mvn %s,%s\n",regname[rt],regname[rs]);
1100 output_w32(0xe1e00000|rd_rn_rm(rt,0,rs));
1101}
1102
b9b61529 1103void emit_mvnmi(int rs,int rt)
1104{
1105 assem_debug("mvnmi %s,%s\n",regname[rt],regname[rs]);
1106 output_w32(0x41e00000|rd_rn_rm(rt,0,rs));
1107}
1108
57871462 1109void emit_and(u_int rs1,u_int rs2,u_int rt)
1110{
1111 assem_debug("and %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1112 output_w32(0xe0000000|rd_rn_rm(rt,rs1,rs2));
1113}
1114
1115void emit_or(u_int rs1,u_int rs2,u_int rt)
1116{
1117 assem_debug("orr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1118 output_w32(0xe1800000|rd_rn_rm(rt,rs1,rs2));
1119}
1120void emit_or_and_set_flags(int rs1,int rs2,int rt)
1121{
1122 assem_debug("orrs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1123 output_w32(0xe1900000|rd_rn_rm(rt,rs1,rs2));
1124}
1125
f70d384d 1126void emit_orrshl_imm(u_int rs,u_int imm,u_int rt)
1127{
1128 assert(rs<16);
1129 assert(rt<16);
1130 assert(imm<32);
1131 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs],imm);
1132 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|(imm<<7));
1133}
1134
576bbd8f 1135void emit_orrshr_imm(u_int rs,u_int imm,u_int rt)
1136{
1137 assert(rs<16);
1138 assert(rt<16);
1139 assert(imm<32);
1140 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs],imm);
1141 output_w32(0xe1800020|rd_rn_rm(rt,rt,rs)|(imm<<7));
1142}
1143
57871462 1144void emit_xor(u_int rs1,u_int rs2,u_int rt)
1145{
1146 assem_debug("eor %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1147 output_w32(0xe0200000|rd_rn_rm(rt,rs1,rs2));
1148}
1149
57871462 1150void emit_addimm(u_int rs,int imm,u_int rt)
1151{
1152 assert(rs<16);
1153 assert(rt<16);
1154 if(imm!=0) {
57871462 1155 u_int armval;
1156 if(genimm(imm,&armval)) {
1157 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],imm);
1158 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
1159 }else if(genimm(-imm,&armval)) {
8a0a8423 1160 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],-imm);
57871462 1161 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
1162 }else if(imm<0) {
ffb0b9e0 1163 assert(imm>-65536);
57871462 1164 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],(-imm)&0xFF00);
1165 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
1166 output_w32(0xe2400000|rd_rn_imm_shift(rt,rs,(-imm)>>8,8));
1167 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
1168 }else{
ffb0b9e0 1169 assert(imm<65536);
57871462 1170 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1171 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
1172 output_w32(0xe2800000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1173 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1174 }
1175 }
1176 else if(rs!=rt) emit_mov(rs,rt);
1177}
1178
1179void emit_addimm_and_set_flags(int imm,int rt)
1180{
1181 assert(imm>-65536&&imm<65536);
1182 u_int armval;
1183 if(genimm(imm,&armval)) {
1184 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm);
1185 output_w32(0xe2900000|rd_rn_rm(rt,rt,0)|armval);
1186 }else if(genimm(-imm,&armval)) {
1187 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],imm);
1188 output_w32(0xe2500000|rd_rn_rm(rt,rt,0)|armval);
1189 }else if(imm<0) {
1190 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF00);
1191 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
1192 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)>>8,8));
1193 output_w32(0xe2500000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
1194 }else{
1195 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF00);
1196 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
1197 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm>>8,8));
1198 output_w32(0xe2900000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1199 }
1200}
1201void emit_addimm_no_flags(u_int imm,u_int rt)
1202{
1203 emit_addimm(rt,imm,rt);
1204}
1205
1206void emit_addnop(u_int r)
1207{
1208 assert(r<16);
1209 assem_debug("add %s,%s,#0 (nop)\n",regname[r],regname[r]);
1210 output_w32(0xe2800000|rd_rn_rm(r,r,0));
1211}
1212
1213void emit_adcimm(u_int rs,int imm,u_int rt)
1214{
1215 u_int armval;
cfbd3c6e 1216 genimm_checked(imm,&armval);
57871462 1217 assem_debug("adc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1218 output_w32(0xe2a00000|rd_rn_rm(rt,rs,0)|armval);
1219}
1220/*void emit_sbcimm(int imm,u_int rt)
1221{
1222 u_int armval;
cfbd3c6e 1223 genimm_checked(imm,&armval);
57871462 1224 assem_debug("sbc %s,%s,#%d\n",regname[rt],regname[rt],imm);
1225 output_w32(0xe2c00000|rd_rn_rm(rt,rt,0)|armval);
1226}*/
1227void emit_sbbimm(int imm,u_int rt)
1228{
1229 assem_debug("sbb $%d,%%%s\n",imm,regname[rt]);
1230 assert(rt<8);
1231 if(imm<128&&imm>=-128) {
1232 output_byte(0x83);
1233 output_modrm(3,rt,3);
1234 output_byte(imm);
1235 }
1236 else
1237 {
1238 output_byte(0x81);
1239 output_modrm(3,rt,3);
1240 output_w32(imm);
1241 }
1242}
1243void emit_rscimm(int rs,int imm,u_int rt)
1244{
1245 assert(0);
1246 u_int armval;
cfbd3c6e 1247 genimm_checked(imm,&armval);
57871462 1248 assem_debug("rsc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1249 output_w32(0xe2e00000|rd_rn_rm(rt,rs,0)|armval);
1250}
1251
1252void emit_addimm64_32(int rsh,int rsl,int imm,int rth,int rtl)
1253{
1254 // TODO: if(genimm(imm,&armval)) ...
1255 // else
1256 emit_movimm(imm,HOST_TEMPREG);
1257 emit_adds(HOST_TEMPREG,rsl,rtl);
1258 emit_adcimm(rsh,0,rth);
1259}
1260
1261void emit_sbb(int rs1,int rs2)
1262{
1263 assem_debug("sbb %%%s,%%%s\n",regname[rs2],regname[rs1]);
1264 output_byte(0x19);
1265 output_modrm(3,rs1,rs2);
1266}
1267
1268void emit_andimm(int rs,int imm,int rt)
1269{
1270 u_int armval;
790ee18e 1271 if(imm==0) {
1272 emit_zeroreg(rt);
1273 }else if(genimm(imm,&armval)) {
57871462 1274 assem_debug("and %s,%s,#%d\n",regname[rt],regname[rs],imm);
1275 output_w32(0xe2000000|rd_rn_rm(rt,rs,0)|armval);
1276 }else if(genimm(~imm,&armval)) {
1277 assem_debug("bic %s,%s,#%d\n",regname[rt],regname[rs],imm);
1278 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|armval);
1279 }else if(imm==65535) {
1280 #ifdef ARMv5_ONLY
1281 assem_debug("bic %s,%s,#FF000000\n",regname[rt],regname[rs]);
1282 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|0x4FF);
1283 assem_debug("bic %s,%s,#00FF0000\n",regname[rt],regname[rt]);
1284 output_w32(0xe3c00000|rd_rn_rm(rt,rt,0)|0x8FF);
1285 #else
1286 assem_debug("uxth %s,%s\n",regname[rt],regname[rs]);
1287 output_w32(0xe6ff0070|rd_rn_rm(rt,0,rs));
1288 #endif
1289 }else{
1290 assert(imm>0&&imm<65535);
1291 #ifdef ARMv5_ONLY
1292 assem_debug("mov r14,#%d\n",imm&0xFF00);
1293 output_w32(0xe3a00000|rd_rn_imm_shift(HOST_TEMPREG,0,imm>>8,8));
1294 assem_debug("add r14,r14,#%d\n",imm&0xFF);
1295 output_w32(0xe2800000|rd_rn_imm_shift(HOST_TEMPREG,HOST_TEMPREG,imm&0xff,0));
1296 #else
1297 emit_movw(imm,HOST_TEMPREG);
1298 #endif
1299 assem_debug("and %s,%s,r14\n",regname[rt],regname[rs]);
1300 output_w32(0xe0000000|rd_rn_rm(rt,rs,HOST_TEMPREG));
1301 }
1302}
1303
1304void emit_orimm(int rs,int imm,int rt)
1305{
1306 u_int armval;
790ee18e 1307 if(imm==0) {
1308 if(rs!=rt) emit_mov(rs,rt);
1309 }else if(genimm(imm,&armval)) {
57871462 1310 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1311 output_w32(0xe3800000|rd_rn_rm(rt,rs,0)|armval);
1312 }else{
1313 assert(imm>0&&imm<65536);
1314 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1315 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
1316 output_w32(0xe3800000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1317 output_w32(0xe3800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1318 }
1319}
1320
1321void emit_xorimm(int rs,int imm,int rt)
1322{
57871462 1323 u_int armval;
790ee18e 1324 if(imm==0) {
1325 if(rs!=rt) emit_mov(rs,rt);
1326 }else if(genimm(imm,&armval)) {
57871462 1327 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm);
1328 output_w32(0xe2200000|rd_rn_rm(rt,rs,0)|armval);
1329 }else{
514ed0d9 1330 assert(imm>0&&imm<65536);
57871462 1331 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1332 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
1333 output_w32(0xe2200000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1334 output_w32(0xe2200000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1335 }
1336}
1337
1338void emit_shlimm(int rs,u_int imm,int rt)
1339{
1340 assert(imm>0);
1341 assert(imm<32);
1342 //if(imm==1) ...
1343 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1344 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1345}
1346
c6c3b1b3 1347void emit_lsls_imm(int rs,int imm,int rt)
1348{
1349 assert(imm>0);
1350 assert(imm<32);
1351 assem_debug("lsls %s,%s,#%d\n",regname[rt],regname[rs],imm);
1352 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1353}
1354
57871462 1355void emit_shrimm(int rs,u_int imm,int rt)
1356{
1357 assert(imm>0);
1358 assert(imm<32);
1359 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1360 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
1361}
1362
1363void emit_sarimm(int rs,u_int imm,int rt)
1364{
1365 assert(imm>0);
1366 assert(imm<32);
1367 assem_debug("asr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1368 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x40|(imm<<7));
1369}
1370
1371void emit_rorimm(int rs,u_int imm,int rt)
1372{
1373 assert(imm>0);
1374 assert(imm<32);
1375 assem_debug("ror %s,%s,#%d\n",regname[rt],regname[rs],imm);
1376 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x60|(imm<<7));
1377}
1378
1379void emit_shldimm(int rs,int rs2,u_int imm,int rt)
1380{
1381 assem_debug("shld %%%s,%%%s,%d\n",regname[rt],regname[rs2],imm);
1382 assert(imm>0);
1383 assert(imm<32);
1384 //if(imm==1) ...
1385 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1386 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1387 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs2],32-imm);
1388 output_w32(0xe1800020|rd_rn_rm(rt,rt,rs2)|((32-imm)<<7));
1389}
1390
1391void emit_shrdimm(int rs,int rs2,u_int imm,int rt)
1392{
1393 assem_debug("shrd %%%s,%%%s,%d\n",regname[rt],regname[rs2],imm);
1394 assert(imm>0);
1395 assert(imm<32);
1396 //if(imm==1) ...
1397 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1398 output_w32(0xe1a00020|rd_rn_rm(rt,0,rs)|(imm<<7));
1399 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs2],32-imm);
1400 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs2)|((32-imm)<<7));
1401}
1402
b9b61529 1403void emit_signextend16(int rs,int rt)
1404{
1405 #ifdef ARMv5_ONLY
1406 emit_shlimm(rs,16,rt);
1407 emit_sarimm(rt,16,rt);
1408 #else
1409 assem_debug("sxth %s,%s\n",regname[rt],regname[rs]);
1410 output_w32(0xe6bf0070|rd_rn_rm(rt,0,rs));
1411 #endif
1412}
1413
c6c3b1b3 1414void emit_signextend8(int rs,int rt)
1415{
1416 #ifdef ARMv5_ONLY
1417 emit_shlimm(rs,24,rt);
1418 emit_sarimm(rt,24,rt);
1419 #else
1420 assem_debug("sxtb %s,%s\n",regname[rt],regname[rs]);
1421 output_w32(0xe6af0070|rd_rn_rm(rt,0,rs));
1422 #endif
1423}
1424
57871462 1425void emit_shl(u_int rs,u_int shift,u_int rt)
1426{
1427 assert(rs<16);
1428 assert(rt<16);
1429 assert(shift<16);
1430 //if(imm==1) ...
1431 assem_debug("lsl %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1432 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x10|(shift<<8));
1433}
1434void emit_shr(u_int rs,u_int shift,u_int rt)
1435{
1436 assert(rs<16);
1437 assert(rt<16);
1438 assert(shift<16);
1439 assem_debug("lsr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1440 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x30|(shift<<8));
1441}
1442void emit_sar(u_int rs,u_int shift,u_int rt)
1443{
1444 assert(rs<16);
1445 assert(rt<16);
1446 assert(shift<16);
1447 assem_debug("asr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1448 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x50|(shift<<8));
1449}
1450void emit_shlcl(int r)
1451{
1452 assem_debug("shl %%%s,%%cl\n",regname[r]);
1453 assert(0);
1454}
1455void emit_shrcl(int r)
1456{
1457 assem_debug("shr %%%s,%%cl\n",regname[r]);
1458 assert(0);
1459}
1460void emit_sarcl(int r)
1461{
1462 assem_debug("sar %%%s,%%cl\n",regname[r]);
1463 assert(0);
1464}
1465
1466void emit_shldcl(int r1,int r2)
1467{
1468 assem_debug("shld %%%s,%%%s,%%cl\n",regname[r1],regname[r2]);
1469 assert(0);
1470}
1471void emit_shrdcl(int r1,int r2)
1472{
1473 assem_debug("shrd %%%s,%%%s,%%cl\n",regname[r1],regname[r2]);
1474 assert(0);
1475}
1476void emit_orrshl(u_int rs,u_int shift,u_int rt)
1477{
1478 assert(rs<16);
1479 assert(rt<16);
1480 assert(shift<16);
1481 assem_debug("orr %s,%s,%s,lsl %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
1482 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x10|(shift<<8));
1483}
1484void emit_orrshr(u_int rs,u_int shift,u_int rt)
1485{
1486 assert(rs<16);
1487 assert(rt<16);
1488 assert(shift<16);
1489 assem_debug("orr %s,%s,%s,lsr %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
1490 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x30|(shift<<8));
1491}
1492
1493void emit_cmpimm(int rs,int imm)
1494{
1495 u_int armval;
1496 if(genimm(imm,&armval)) {
5a05d80c 1497 assem_debug("cmp %s,#%d\n",regname[rs],imm);
57871462 1498 output_w32(0xe3500000|rd_rn_rm(0,rs,0)|armval);
1499 }else if(genimm(-imm,&armval)) {
5a05d80c 1500 assem_debug("cmn %s,#%d\n",regname[rs],imm);
57871462 1501 output_w32(0xe3700000|rd_rn_rm(0,rs,0)|armval);
1502 }else if(imm>0) {
1503 assert(imm<65536);
1504 #ifdef ARMv5_ONLY
1505 emit_movimm(imm,HOST_TEMPREG);
1506 #else
1507 emit_movw(imm,HOST_TEMPREG);
1508 #endif
1509 assem_debug("cmp %s,r14\n",regname[rs]);
1510 output_w32(0xe1500000|rd_rn_rm(0,rs,HOST_TEMPREG));
1511 }else{
1512 assert(imm>-65536);
1513 #ifdef ARMv5_ONLY
1514 emit_movimm(-imm,HOST_TEMPREG);
1515 #else
1516 emit_movw(-imm,HOST_TEMPREG);
1517 #endif
1518 assem_debug("cmn %s,r14\n",regname[rs]);
1519 output_w32(0xe1700000|rd_rn_rm(0,rs,HOST_TEMPREG));
1520 }
1521}
1522
1523void emit_cmovne(u_int *addr,int rt)
1524{
1525 assem_debug("cmovne %x,%%%s",(int)addr,regname[rt]);
1526 assert(0);
1527}
1528void emit_cmovl(u_int *addr,int rt)
1529{
1530 assem_debug("cmovl %x,%%%s",(int)addr,regname[rt]);
1531 assert(0);
1532}
1533void emit_cmovs(u_int *addr,int rt)
1534{
1535 assem_debug("cmovs %x,%%%s",(int)addr,regname[rt]);
1536 assert(0);
1537}
1538void emit_cmovne_imm(int imm,int rt)
1539{
1540 assem_debug("movne %s,#%d\n",regname[rt],imm);
1541 u_int armval;
cfbd3c6e 1542 genimm_checked(imm,&armval);
57871462 1543 output_w32(0x13a00000|rd_rn_rm(rt,0,0)|armval);
1544}
1545void emit_cmovl_imm(int imm,int rt)
1546{
1547 assem_debug("movlt %s,#%d\n",regname[rt],imm);
1548 u_int armval;
cfbd3c6e 1549 genimm_checked(imm,&armval);
57871462 1550 output_w32(0xb3a00000|rd_rn_rm(rt,0,0)|armval);
1551}
1552void emit_cmovb_imm(int imm,int rt)
1553{
1554 assem_debug("movcc %s,#%d\n",regname[rt],imm);
1555 u_int armval;
cfbd3c6e 1556 genimm_checked(imm,&armval);
57871462 1557 output_w32(0x33a00000|rd_rn_rm(rt,0,0)|armval);
1558}
1559void emit_cmovs_imm(int imm,int rt)
1560{
1561 assem_debug("movmi %s,#%d\n",regname[rt],imm);
1562 u_int armval;
cfbd3c6e 1563 genimm_checked(imm,&armval);
57871462 1564 output_w32(0x43a00000|rd_rn_rm(rt,0,0)|armval);
1565}
1566void emit_cmove_reg(int rs,int rt)
1567{
1568 assem_debug("moveq %s,%s\n",regname[rt],regname[rs]);
1569 output_w32(0x01a00000|rd_rn_rm(rt,0,rs));
1570}
1571void emit_cmovne_reg(int rs,int rt)
1572{
1573 assem_debug("movne %s,%s\n",regname[rt],regname[rs]);
1574 output_w32(0x11a00000|rd_rn_rm(rt,0,rs));
1575}
1576void emit_cmovl_reg(int rs,int rt)
1577{
1578 assem_debug("movlt %s,%s\n",regname[rt],regname[rs]);
1579 output_w32(0xb1a00000|rd_rn_rm(rt,0,rs));
1580}
1581void emit_cmovs_reg(int rs,int rt)
1582{
1583 assem_debug("movmi %s,%s\n",regname[rt],regname[rs]);
1584 output_w32(0x41a00000|rd_rn_rm(rt,0,rs));
1585}
1586
1587void emit_slti32(int rs,int imm,int rt)
1588{
1589 if(rs!=rt) emit_zeroreg(rt);
1590 emit_cmpimm(rs,imm);
1591 if(rs==rt) emit_movimm(0,rt);
1592 emit_cmovl_imm(1,rt);
1593}
1594void emit_sltiu32(int rs,int imm,int rt)
1595{
1596 if(rs!=rt) emit_zeroreg(rt);
1597 emit_cmpimm(rs,imm);
1598 if(rs==rt) emit_movimm(0,rt);
1599 emit_cmovb_imm(1,rt);
1600}
1601void emit_slti64_32(int rsh,int rsl,int imm,int rt)
1602{
1603 assert(rsh!=rt);
1604 emit_slti32(rsl,imm,rt);
1605 if(imm>=0)
1606 {
1607 emit_test(rsh,rsh);
1608 emit_cmovne_imm(0,rt);
1609 emit_cmovs_imm(1,rt);
1610 }
1611 else
1612 {
1613 emit_cmpimm(rsh,-1);
1614 emit_cmovne_imm(0,rt);
1615 emit_cmovl_imm(1,rt);
1616 }
1617}
1618void emit_sltiu64_32(int rsh,int rsl,int imm,int rt)
1619{
1620 assert(rsh!=rt);
1621 emit_sltiu32(rsl,imm,rt);
1622 if(imm>=0)
1623 {
1624 emit_test(rsh,rsh);
1625 emit_cmovne_imm(0,rt);
1626 }
1627 else
1628 {
1629 emit_cmpimm(rsh,-1);
1630 emit_cmovne_imm(1,rt);
1631 }
1632}
1633
1634void emit_cmp(int rs,int rt)
1635{
1636 assem_debug("cmp %s,%s\n",regname[rs],regname[rt]);
1637 output_w32(0xe1500000|rd_rn_rm(0,rs,rt));
1638}
1639void emit_set_gz32(int rs, int rt)
1640{
1641 //assem_debug("set_gz32\n");
1642 emit_cmpimm(rs,1);
1643 emit_movimm(1,rt);
1644 emit_cmovl_imm(0,rt);
1645}
1646void emit_set_nz32(int rs, int rt)
1647{
1648 //assem_debug("set_nz32\n");
1649 if(rs!=rt) emit_movs(rs,rt);
1650 else emit_test(rs,rs);
1651 emit_cmovne_imm(1,rt);
1652}
1653void emit_set_gz64_32(int rsh, int rsl, int rt)
1654{
1655 //assem_debug("set_gz64\n");
1656 emit_set_gz32(rsl,rt);
1657 emit_test(rsh,rsh);
1658 emit_cmovne_imm(1,rt);
1659 emit_cmovs_imm(0,rt);
1660}
1661void emit_set_nz64_32(int rsh, int rsl, int rt)
1662{
1663 //assem_debug("set_nz64\n");
1664 emit_or_and_set_flags(rsh,rsl,rt);
1665 emit_cmovne_imm(1,rt);
1666}
1667void emit_set_if_less32(int rs1, int rs2, int rt)
1668{
1669 //assem_debug("set if less (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1670 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1671 emit_cmp(rs1,rs2);
1672 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1673 emit_cmovl_imm(1,rt);
1674}
1675void emit_set_if_carry32(int rs1, int rs2, int rt)
1676{
1677 //assem_debug("set if carry (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1678 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1679 emit_cmp(rs1,rs2);
1680 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1681 emit_cmovb_imm(1,rt);
1682}
1683void emit_set_if_less64_32(int u1, int l1, int u2, int l2, int rt)
1684{
1685 //assem_debug("set if less64 (%%%s,%%%s,%%%s,%%%s),%%%s\n",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]);
1686 assert(u1!=rt);
1687 assert(u2!=rt);
1688 emit_cmp(l1,l2);
1689 emit_movimm(0,rt);
1690 emit_sbcs(u1,u2,HOST_TEMPREG);
1691 emit_cmovl_imm(1,rt);
1692}
1693void emit_set_if_carry64_32(int u1, int l1, int u2, int l2, int rt)
1694{
1695 //assem_debug("set if carry64 (%%%s,%%%s,%%%s,%%%s),%%%s\n",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]);
1696 assert(u1!=rt);
1697 assert(u2!=rt);
1698 emit_cmp(l1,l2);
1699 emit_movimm(0,rt);
1700 emit_sbcs(u1,u2,HOST_TEMPREG);
1701 emit_cmovb_imm(1,rt);
1702}
1703
1704void emit_call(int a)
1705{
1706 assem_debug("bl %x (%x+%x)\n",a,(int)out,a-(int)out-8);
1707 u_int offset=genjmp(a);
1708 output_w32(0xeb000000|offset);
1709}
1710void emit_jmp(int a)
1711{
1712 assem_debug("b %x (%x+%x)\n",a,(int)out,a-(int)out-8);
1713 u_int offset=genjmp(a);
1714 output_w32(0xea000000|offset);
1715}
1716void emit_jne(int a)
1717{
1718 assem_debug("bne %x\n",a);
1719 u_int offset=genjmp(a);
1720 output_w32(0x1a000000|offset);
1721}
1722void emit_jeq(int a)
1723{
1724 assem_debug("beq %x\n",a);
1725 u_int offset=genjmp(a);
1726 output_w32(0x0a000000|offset);
1727}
1728void emit_js(int a)
1729{
1730 assem_debug("bmi %x\n",a);
1731 u_int offset=genjmp(a);
1732 output_w32(0x4a000000|offset);
1733}
1734void emit_jns(int a)
1735{
1736 assem_debug("bpl %x\n",a);
1737 u_int offset=genjmp(a);
1738 output_w32(0x5a000000|offset);
1739}
1740void emit_jl(int a)
1741{
1742 assem_debug("blt %x\n",a);
1743 u_int offset=genjmp(a);
1744 output_w32(0xba000000|offset);
1745}
1746void emit_jge(int a)
1747{
1748 assem_debug("bge %x\n",a);
1749 u_int offset=genjmp(a);
1750 output_w32(0xaa000000|offset);
1751}
1752void emit_jno(int a)
1753{
1754 assem_debug("bvc %x\n",a);
1755 u_int offset=genjmp(a);
1756 output_w32(0x7a000000|offset);
1757}
1758void emit_jc(int a)
1759{
1760 assem_debug("bcs %x\n",a);
1761 u_int offset=genjmp(a);
1762 output_w32(0x2a000000|offset);
1763}
1764void emit_jcc(int a)
1765{
1766 assem_debug("bcc %x\n",a);
1767 u_int offset=genjmp(a);
1768 output_w32(0x3a000000|offset);
1769}
1770
1771void emit_pushimm(int imm)
1772{
1773 assem_debug("push $%x\n",imm);
1774 assert(0);
1775}
1776void emit_pusha()
1777{
1778 assem_debug("pusha\n");
1779 assert(0);
1780}
1781void emit_popa()
1782{
1783 assem_debug("popa\n");
1784 assert(0);
1785}
1786void emit_pushreg(u_int r)
1787{
1788 assem_debug("push %%%s\n",regname[r]);
1789 assert(0);
1790}
1791void emit_popreg(u_int r)
1792{
1793 assem_debug("pop %%%s\n",regname[r]);
1794 assert(0);
1795}
1796void emit_callreg(u_int r)
1797{
c6c3b1b3 1798 assert(r<15);
1799 assem_debug("blx %s\n",regname[r]);
1800 output_w32(0xe12fff30|r);
57871462 1801}
1802void emit_jmpreg(u_int r)
1803{
1804 assem_debug("mov pc,%s\n",regname[r]);
1805 output_w32(0xe1a00000|rd_rn_rm(15,0,r));
1806}
1807
1808void emit_readword_indexed(int offset, int rs, int rt)
1809{
1810 assert(offset>-4096&&offset<4096);
1811 assem_debug("ldr %s,%s+%d\n",regname[rt],regname[rs],offset);
1812 if(offset>=0) {
1813 output_w32(0xe5900000|rd_rn_rm(rt,rs,0)|offset);
1814 }else{
1815 output_w32(0xe5100000|rd_rn_rm(rt,rs,0)|(-offset));
1816 }
1817}
1818void emit_readword_dualindexedx4(int rs1, int rs2, int rt)
1819{
1820 assem_debug("ldr %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1821 output_w32(0xe7900000|rd_rn_rm(rt,rs1,rs2)|0x100);
1822}
c6c3b1b3 1823void emit_ldrcc_dualindexed(int rs1, int rs2, int rt)
1824{
1825 assem_debug("ldrcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1826 output_w32(0x37900000|rd_rn_rm(rt,rs1,rs2));
1827}
1828void emit_ldrccb_dualindexed(int rs1, int rs2, int rt)
1829{
1830 assem_debug("ldrccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1831 output_w32(0x37d00000|rd_rn_rm(rt,rs1,rs2));
1832}
1833void emit_ldrccsb_dualindexed(int rs1, int rs2, int rt)
1834{
1835 assem_debug("ldrccsb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1836 output_w32(0x319000d0|rd_rn_rm(rt,rs1,rs2));
1837}
1838void emit_ldrcch_dualindexed(int rs1, int rs2, int rt)
1839{
1840 assem_debug("ldrcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1841 output_w32(0x319000b0|rd_rn_rm(rt,rs1,rs2));
1842}
1843void emit_ldrccsh_dualindexed(int rs1, int rs2, int rt)
1844{
1845 assem_debug("ldrccsh %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1846 output_w32(0x319000f0|rd_rn_rm(rt,rs1,rs2));
1847}
57871462 1848void emit_readword_indexed_tlb(int addr, int rs, int map, int rt)
1849{
1850 if(map<0) emit_readword_indexed(addr, rs, rt);
1851 else {
1852 assert(addr==0);
1853 emit_readword_dualindexedx4(rs, map, rt);
1854 }
1855}
1856void emit_readdword_indexed_tlb(int addr, int rs, int map, int rh, int rl)
1857{
1858 if(map<0) {
1859 if(rh>=0) emit_readword_indexed(addr, rs, rh);
1860 emit_readword_indexed(addr+4, rs, rl);
1861 }else{
1862 assert(rh!=rs);
1863 if(rh>=0) emit_readword_indexed_tlb(addr, rs, map, rh);
1864 emit_addimm(map,1,map);
1865 emit_readword_indexed_tlb(addr, rs, map, rl);
1866 }
1867}
1868void emit_movsbl_indexed(int offset, int rs, int rt)
1869{
1870 assert(offset>-256&&offset<256);
1871 assem_debug("ldrsb %s,%s+%d\n",regname[rt],regname[rs],offset);
1872 if(offset>=0) {
1873 output_w32(0xe1d000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1874 }else{
1875 output_w32(0xe15000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1876 }
1877}
1878void emit_movsbl_indexed_tlb(int addr, int rs, int map, int rt)
1879{
1880 if(map<0) emit_movsbl_indexed(addr, rs, rt);
1881 else {
1882 if(addr==0) {
1883 emit_shlimm(map,2,map);
1884 assem_debug("ldrsb %s,%s+%s\n",regname[rt],regname[rs],regname[map]);
1885 output_w32(0xe19000d0|rd_rn_rm(rt,rs,map));
1886 }else{
1887 assert(addr>-256&&addr<256);
1888 assem_debug("add %s,%s,%s,lsl #2\n",regname[rt],regname[rs],regname[map]);
1889 output_w32(0xe0800000|rd_rn_rm(rt,rs,map)|(2<<7));
1890 emit_movsbl_indexed(addr, rt, rt);
1891 }
1892 }
1893}
1894void emit_movswl_indexed(int offset, int rs, int rt)
1895{
1896 assert(offset>-256&&offset<256);
1897 assem_debug("ldrsh %s,%s+%d\n",regname[rt],regname[rs],offset);
1898 if(offset>=0) {
1899 output_w32(0xe1d000f0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1900 }else{
1901 output_w32(0xe15000f0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1902 }
1903}
1904void emit_movzbl_indexed(int offset, int rs, int rt)
1905{
1906 assert(offset>-4096&&offset<4096);
1907 assem_debug("ldrb %s,%s+%d\n",regname[rt],regname[rs],offset);
1908 if(offset>=0) {
1909 output_w32(0xe5d00000|rd_rn_rm(rt,rs,0)|offset);
1910 }else{
1911 output_w32(0xe5500000|rd_rn_rm(rt,rs,0)|(-offset));
1912 }
1913}
1914void emit_movzbl_dualindexedx4(int rs1, int rs2, int rt)
1915{
1916 assem_debug("ldrb %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1917 output_w32(0xe7d00000|rd_rn_rm(rt,rs1,rs2)|0x100);
1918}
1919void emit_movzbl_indexed_tlb(int addr, int rs, int map, int rt)
1920{
1921 if(map<0) emit_movzbl_indexed(addr, rs, rt);
1922 else {
1923 if(addr==0) {
1924 emit_movzbl_dualindexedx4(rs, map, rt);
1925 }else{
1926 emit_addimm(rs,addr,rt);
1927 emit_movzbl_dualindexedx4(rt, map, rt);
1928 }
1929 }
1930}
1931void emit_movzwl_indexed(int offset, int rs, int rt)
1932{
1933 assert(offset>-256&&offset<256);
1934 assem_debug("ldrh %s,%s+%d\n",regname[rt],regname[rs],offset);
1935 if(offset>=0) {
1936 output_w32(0xe1d000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1937 }else{
1938 output_w32(0xe15000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1939 }
1940}
054175e9 1941static void emit_ldrd(int offset, int rs, int rt)
1942{
1943 assert(offset>-256&&offset<256);
1944 assem_debug("ldrd %s,%s+%d\n",regname[rt],regname[rs],offset);
1945 if(offset>=0) {
1946 output_w32(0xe1c000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1947 }else{
1948 output_w32(0xe14000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1949 }
1950}
57871462 1951void emit_readword(int addr, int rt)
1952{
1953 u_int offset = addr-(u_int)&dynarec_local;
1954 assert(offset<4096);
1955 assem_debug("ldr %s,fp+%d\n",regname[rt],offset);
1956 output_w32(0xe5900000|rd_rn_rm(rt,FP,0)|offset);
1957}
1958void emit_movsbl(int addr, int rt)
1959{
1960 u_int offset = addr-(u_int)&dynarec_local;
1961 assert(offset<256);
1962 assem_debug("ldrsb %s,fp+%d\n",regname[rt],offset);
1963 output_w32(0xe1d000d0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1964}
1965void emit_movswl(int addr, int rt)
1966{
1967 u_int offset = addr-(u_int)&dynarec_local;
1968 assert(offset<256);
1969 assem_debug("ldrsh %s,fp+%d\n",regname[rt],offset);
1970 output_w32(0xe1d000f0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1971}
1972void emit_movzbl(int addr, int rt)
1973{
1974 u_int offset = addr-(u_int)&dynarec_local;
1975 assert(offset<4096);
1976 assem_debug("ldrb %s,fp+%d\n",regname[rt],offset);
1977 output_w32(0xe5d00000|rd_rn_rm(rt,FP,0)|offset);
1978}
1979void emit_movzwl(int addr, int rt)
1980{
1981 u_int offset = addr-(u_int)&dynarec_local;
1982 assert(offset<256);
1983 assem_debug("ldrh %s,fp+%d\n",regname[rt],offset);
1984 output_w32(0xe1d000b0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1985}
1986void emit_movzwl_reg(int rs, int rt)
1987{
1988 assem_debug("movzwl %%%s,%%%s\n",regname[rs]+1,regname[rt]);
1989 assert(0);
1990}
1991
1992void emit_xchg(int rs, int rt)
1993{
1994 assem_debug("xchg %%%s,%%%s\n",regname[rs],regname[rt]);
1995 assert(0);
1996}
1997void emit_writeword_indexed(int rt, int offset, int rs)
1998{
1999 assert(offset>-4096&&offset<4096);
2000 assem_debug("str %s,%s+%d\n",regname[rt],regname[rs],offset);
2001 if(offset>=0) {
2002 output_w32(0xe5800000|rd_rn_rm(rt,rs,0)|offset);
2003 }else{
2004 output_w32(0xe5000000|rd_rn_rm(rt,rs,0)|(-offset));
2005 }
2006}
2007void emit_writeword_dualindexedx4(int rt, int rs1, int rs2)
2008{
2009 assem_debug("str %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
2010 output_w32(0xe7800000|rd_rn_rm(rt,rs1,rs2)|0x100);
2011}
2012void emit_writeword_indexed_tlb(int rt, int addr, int rs, int map, int temp)
2013{
2014 if(map<0) emit_writeword_indexed(rt, addr, rs);
2015 else {
2016 assert(addr==0);
2017 emit_writeword_dualindexedx4(rt, rs, map);
2018 }
2019}
2020void emit_writedword_indexed_tlb(int rh, int rl, int addr, int rs, int map, int temp)
2021{
2022 if(map<0) {
2023 if(rh>=0) emit_writeword_indexed(rh, addr, rs);
2024 emit_writeword_indexed(rl, addr+4, rs);
2025 }else{
2026 assert(rh>=0);
2027 if(temp!=rs) emit_addimm(map,1,temp);
2028 emit_writeword_indexed_tlb(rh, addr, rs, map, temp);
2029 if(temp!=rs) emit_writeword_indexed_tlb(rl, addr, rs, temp, temp);
2030 else {
2031 emit_addimm(rs,4,rs);
2032 emit_writeword_indexed_tlb(rl, addr, rs, map, temp);
2033 }
2034 }
2035}
2036void emit_writehword_indexed(int rt, int offset, int rs)
2037{
2038 assert(offset>-256&&offset<256);
2039 assem_debug("strh %s,%s+%d\n",regname[rt],regname[rs],offset);
2040 if(offset>=0) {
2041 output_w32(0xe1c000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
2042 }else{
2043 output_w32(0xe14000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
2044 }
2045}
2046void emit_writebyte_indexed(int rt, int offset, int rs)
2047{
2048 assert(offset>-4096&&offset<4096);
2049 assem_debug("strb %s,%s+%d\n",regname[rt],regname[rs],offset);
2050 if(offset>=0) {
2051 output_w32(0xe5c00000|rd_rn_rm(rt,rs,0)|offset);
2052 }else{
2053 output_w32(0xe5400000|rd_rn_rm(rt,rs,0)|(-offset));
2054 }
2055}
2056void emit_writebyte_dualindexedx4(int rt, int rs1, int rs2)
2057{
2058 assem_debug("strb %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
2059 output_w32(0xe7c00000|rd_rn_rm(rt,rs1,rs2)|0x100);
2060}
2061void emit_writebyte_indexed_tlb(int rt, int addr, int rs, int map, int temp)
2062{
2063 if(map<0) emit_writebyte_indexed(rt, addr, rs);
2064 else {
2065 if(addr==0) {
2066 emit_writebyte_dualindexedx4(rt, rs, map);
2067 }else{
2068 emit_addimm(rs,addr,temp);
2069 emit_writebyte_dualindexedx4(rt, temp, map);
2070 }
2071 }
2072}
b96d3df7 2073void emit_strcc_dualindexed(int rs1, int rs2, int rt)
2074{
2075 assem_debug("strcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2076 output_w32(0x37800000|rd_rn_rm(rt,rs1,rs2));
2077}
2078void emit_strccb_dualindexed(int rs1, int rs2, int rt)
2079{
2080 assem_debug("strccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2081 output_w32(0x37c00000|rd_rn_rm(rt,rs1,rs2));
2082}
2083void emit_strcch_dualindexed(int rs1, int rs2, int rt)
2084{
2085 assem_debug("strcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2086 output_w32(0x318000b0|rd_rn_rm(rt,rs1,rs2));
2087}
57871462 2088void emit_writeword(int rt, int addr)
2089{
2090 u_int offset = addr-(u_int)&dynarec_local;
2091 assert(offset<4096);
2092 assem_debug("str %s,fp+%d\n",regname[rt],offset);
2093 output_w32(0xe5800000|rd_rn_rm(rt,FP,0)|offset);
2094}
2095void emit_writehword(int rt, int addr)
2096{
2097 u_int offset = addr-(u_int)&dynarec_local;
2098 assert(offset<256);
2099 assem_debug("strh %s,fp+%d\n",regname[rt],offset);
2100 output_w32(0xe1c000b0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
2101}
2102void emit_writebyte(int rt, int addr)
2103{
2104 u_int offset = addr-(u_int)&dynarec_local;
2105 assert(offset<4096);
74426039 2106 assem_debug("strb %s,fp+%d\n",regname[rt],offset);
57871462 2107 output_w32(0xe5c00000|rd_rn_rm(rt,FP,0)|offset);
2108}
2109void emit_writeword_imm(int imm, int addr)
2110{
2111 assem_debug("movl $%x,%x\n",imm,addr);
2112 assert(0);
2113}
2114void emit_writebyte_imm(int imm, int addr)
2115{
2116 assem_debug("movb $%x,%x\n",imm,addr);
2117 assert(0);
2118}
2119
2120void emit_mul(int rs)
2121{
2122 assem_debug("mul %%%s\n",regname[rs]);
2123 assert(0);
2124}
2125void emit_imul(int rs)
2126{
2127 assem_debug("imul %%%s\n",regname[rs]);
2128 assert(0);
2129}
2130void emit_umull(u_int rs1, u_int rs2, u_int hi, u_int lo)
2131{
2132 assem_debug("umull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
2133 assert(rs1<16);
2134 assert(rs2<16);
2135 assert(hi<16);
2136 assert(lo<16);
2137 output_w32(0xe0800090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
2138}
2139void emit_smull(u_int rs1, u_int rs2, u_int hi, u_int lo)
2140{
2141 assem_debug("smull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
2142 assert(rs1<16);
2143 assert(rs2<16);
2144 assert(hi<16);
2145 assert(lo<16);
2146 output_w32(0xe0c00090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
2147}
2148
2149void emit_div(int rs)
2150{
2151 assem_debug("div %%%s\n",regname[rs]);
2152 assert(0);
2153}
2154void emit_idiv(int rs)
2155{
2156 assem_debug("idiv %%%s\n",regname[rs]);
2157 assert(0);
2158}
2159void emit_cdq()
2160{
2161 assem_debug("cdq\n");
2162 assert(0);
2163}
2164
2165void emit_clz(int rs,int rt)
2166{
2167 assem_debug("clz %s,%s\n",regname[rt],regname[rs]);
2168 output_w32(0xe16f0f10|rd_rn_rm(rt,0,rs));
2169}
2170
2171void emit_subcs(int rs1,int rs2,int rt)
2172{
2173 assem_debug("subcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2174 output_w32(0x20400000|rd_rn_rm(rt,rs1,rs2));
2175}
2176
2177void emit_shrcc_imm(int rs,u_int imm,int rt)
2178{
2179 assert(imm>0);
2180 assert(imm<32);
2181 assem_debug("lsrcc %s,%s,#%d\n",regname[rt],regname[rs],imm);
2182 output_w32(0x31a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
2183}
2184
b1be1eee 2185void emit_shrne_imm(int rs,u_int imm,int rt)
2186{
2187 assert(imm>0);
2188 assert(imm<32);
2189 assem_debug("lsrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2190 output_w32(0x11a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
2191}
2192
57871462 2193void emit_negmi(int rs, int rt)
2194{
2195 assem_debug("rsbmi %s,%s,#0\n",regname[rt],regname[rs]);
2196 output_w32(0x42600000|rd_rn_rm(rt,rs,0));
2197}
2198
2199void emit_negsmi(int rs, int rt)
2200{
2201 assem_debug("rsbsmi %s,%s,#0\n",regname[rt],regname[rs]);
2202 output_w32(0x42700000|rd_rn_rm(rt,rs,0));
2203}
2204
2205void emit_orreq(u_int rs1,u_int rs2,u_int rt)
2206{
2207 assem_debug("orreq %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2208 output_w32(0x01800000|rd_rn_rm(rt,rs1,rs2));
2209}
2210
2211void emit_orrne(u_int rs1,u_int rs2,u_int rt)
2212{
2213 assem_debug("orrne %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2214 output_w32(0x11800000|rd_rn_rm(rt,rs1,rs2));
2215}
2216
2217void emit_bic_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2218{
2219 assem_debug("bic %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2220 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2221}
2222
2223void emit_biceq_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2224{
2225 assem_debug("biceq %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2226 output_w32(0x01C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2227}
2228
2229void emit_bicne_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2230{
2231 assem_debug("bicne %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2232 output_w32(0x11C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2233}
2234
2235void emit_bic_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2236{
2237 assem_debug("bic %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2238 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2239}
2240
2241void emit_biceq_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2242{
2243 assem_debug("biceq %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2244 output_w32(0x01C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2245}
2246
2247void emit_bicne_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2248{
2249 assem_debug("bicne %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2250 output_w32(0x11C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2251}
2252
2253void emit_teq(int rs, int rt)
2254{
2255 assem_debug("teq %s,%s\n",regname[rs],regname[rt]);
2256 output_w32(0xe1300000|rd_rn_rm(0,rs,rt));
2257}
2258
2259void emit_rsbimm(int rs, int imm, int rt)
2260{
2261 u_int armval;
cfbd3c6e 2262 genimm_checked(imm,&armval);
57871462 2263 assem_debug("rsb %s,%s,#%d\n",regname[rt],regname[rs],imm);
2264 output_w32(0xe2600000|rd_rn_rm(rt,rs,0)|armval);
2265}
2266
2267// Load 2 immediates optimizing for small code size
2268void emit_mov2imm_compact(int imm1,u_int rt1,int imm2,u_int rt2)
2269{
2270 emit_movimm(imm1,rt1);
2271 u_int armval;
2272 if(genimm(imm2-imm1,&armval)) {
2273 assem_debug("add %s,%s,#%d\n",regname[rt2],regname[rt1],imm2-imm1);
2274 output_w32(0xe2800000|rd_rn_rm(rt2,rt1,0)|armval);
2275 }else if(genimm(imm1-imm2,&armval)) {
2276 assem_debug("sub %s,%s,#%d\n",regname[rt2],regname[rt1],imm1-imm2);
2277 output_w32(0xe2400000|rd_rn_rm(rt2,rt1,0)|armval);
2278 }
2279 else emit_movimm(imm2,rt2);
2280}
2281
2282// Conditionally select one of two immediates, optimizing for small code size
2283// This will only be called if HAVE_CMOV_IMM is defined
2284void emit_cmov2imm_e_ne_compact(int imm1,int imm2,u_int rt)
2285{
2286 u_int armval;
2287 if(genimm(imm2-imm1,&armval)) {
2288 emit_movimm(imm1,rt);
2289 assem_debug("addne %s,%s,#%d\n",regname[rt],regname[rt],imm2-imm1);
2290 output_w32(0x12800000|rd_rn_rm(rt,rt,0)|armval);
2291 }else if(genimm(imm1-imm2,&armval)) {
2292 emit_movimm(imm1,rt);
2293 assem_debug("subne %s,%s,#%d\n",regname[rt],regname[rt],imm1-imm2);
2294 output_w32(0x12400000|rd_rn_rm(rt,rt,0)|armval);
2295 }
2296 else {
2297 #ifdef ARMv5_ONLY
2298 emit_movimm(imm1,rt);
2299 add_literal((int)out,imm2);
2300 assem_debug("ldrne %s,pc+? [=%x]\n",regname[rt],imm2);
2301 output_w32(0x15900000|rd_rn_rm(rt,15,0));
2302 #else
2303 emit_movw(imm1&0x0000FFFF,rt);
2304 if((imm1&0xFFFF)!=(imm2&0xFFFF)) {
2305 assem_debug("movwne %s,#%d (0x%x)\n",regname[rt],imm2&0xFFFF,imm2&0xFFFF);
2306 output_w32(0x13000000|rd_rn_rm(rt,0,0)|(imm2&0xfff)|((imm2<<4)&0xf0000));
2307 }
2308 emit_movt(imm1&0xFFFF0000,rt);
2309 if((imm1&0xFFFF0000)!=(imm2&0xFFFF0000)) {
2310 assem_debug("movtne %s,#%d (0x%x)\n",regname[rt],imm2&0xffff0000,imm2&0xffff0000);
2311 output_w32(0x13400000|rd_rn_rm(rt,0,0)|((imm2>>16)&0xfff)|((imm2>>12)&0xf0000));
2312 }
2313 #endif
2314 }
2315}
2316
2317// special case for checking invalid_code
2318void emit_cmpmem_indexedsr12_imm(int addr,int r,int imm)
2319{
2320 assert(0);
2321}
2322
2323// special case for checking invalid_code
2324void emit_cmpmem_indexedsr12_reg(int base,int r,int imm)
2325{
2326 assert(imm<128&&imm>=0);
2327 assert(r>=0&&r<16);
2328 assem_debug("ldrb lr,%s,%s lsr #12\n",regname[base],regname[r]);
2329 output_w32(0xe7d00000|rd_rn_rm(HOST_TEMPREG,base,r)|0x620);
2330 emit_cmpimm(HOST_TEMPREG,imm);
2331}
2332
2333// special case for tlb mapping
2334void emit_addsr12(int rs1,int rs2,int rt)
2335{
2336 assem_debug("add %s,%s,%s lsr #12\n",regname[rt],regname[rs1],regname[rs2]);
2337 output_w32(0xe0800620|rd_rn_rm(rt,rs1,rs2));
2338}
2339
0bbd1454 2340void emit_callne(int a)
2341{
2342 assem_debug("blne %x\n",a);
2343 u_int offset=genjmp(a);
2344 output_w32(0x1b000000|offset);
2345}
2346
57871462 2347// Used to preload hash table entries
2348void emit_prefetch(void *addr)
2349{
2350 assem_debug("prefetch %x\n",(int)addr);
2351 output_byte(0x0F);
2352 output_byte(0x18);
2353 output_modrm(0,5,1);
2354 output_w32((int)addr);
2355}
2356void emit_prefetchreg(int r)
2357{
2358 assem_debug("pld %s\n",regname[r]);
2359 output_w32(0xf5d0f000|rd_rn_rm(0,r,0));
2360}
2361
2362// Special case for mini_ht
2363void emit_ldreq_indexed(int rs, u_int offset, int rt)
2364{
2365 assert(offset<4096);
2366 assem_debug("ldreq %s,[%s, #%d]\n",regname[rt],regname[rs],offset);
2367 output_w32(0x05900000|rd_rn_rm(rt,rs,0)|offset);
2368}
2369
2370void emit_flds(int r,int sr)
2371{
2372 assem_debug("flds s%d,[%s]\n",sr,regname[r]);
2373 output_w32(0xed900a00|((sr&14)<<11)|((sr&1)<<22)|(r<<16));
2374}
2375
2376void emit_vldr(int r,int vr)
2377{
2378 assem_debug("vldr d%d,[%s]\n",vr,regname[r]);
2379 output_w32(0xed900b00|(vr<<12)|(r<<16));
2380}
2381
2382void emit_fsts(int sr,int r)
2383{
2384 assem_debug("fsts s%d,[%s]\n",sr,regname[r]);
2385 output_w32(0xed800a00|((sr&14)<<11)|((sr&1)<<22)|(r<<16));
2386}
2387
2388void emit_vstr(int vr,int r)
2389{
2390 assem_debug("vstr d%d,[%s]\n",vr,regname[r]);
2391 output_w32(0xed800b00|(vr<<12)|(r<<16));
2392}
2393
2394void emit_ftosizs(int s,int d)
2395{
2396 assem_debug("ftosizs s%d,s%d\n",d,s);
2397 output_w32(0xeebd0ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2398}
2399
2400void emit_ftosizd(int s,int d)
2401{
2402 assem_debug("ftosizd s%d,d%d\n",d,s);
2403 output_w32(0xeebd0bc0|((d&14)<<11)|((d&1)<<22)|(s&7));
2404}
2405
2406void emit_fsitos(int s,int d)
2407{
2408 assem_debug("fsitos s%d,s%d\n",d,s);
2409 output_w32(0xeeb80ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2410}
2411
2412void emit_fsitod(int s,int d)
2413{
2414 assem_debug("fsitod d%d,s%d\n",d,s);
2415 output_w32(0xeeb80bc0|((d&7)<<12)|((s&14)>>1)|((s&1)<<5));
2416}
2417
2418void emit_fcvtds(int s,int d)
2419{
2420 assem_debug("fcvtds d%d,s%d\n",d,s);
2421 output_w32(0xeeb70ac0|((d&7)<<12)|((s&14)>>1)|((s&1)<<5));
2422}
2423
2424void emit_fcvtsd(int s,int d)
2425{
2426 assem_debug("fcvtsd s%d,d%d\n",d,s);
2427 output_w32(0xeeb70bc0|((d&14)<<11)|((d&1)<<22)|(s&7));
2428}
2429
2430void emit_fsqrts(int s,int d)
2431{
2432 assem_debug("fsqrts d%d,s%d\n",d,s);
2433 output_w32(0xeeb10ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2434}
2435
2436void emit_fsqrtd(int s,int d)
2437{
2438 assem_debug("fsqrtd s%d,d%d\n",d,s);
2439 output_w32(0xeeb10bc0|((d&7)<<12)|(s&7));
2440}
2441
2442void emit_fabss(int s,int d)
2443{
2444 assem_debug("fabss d%d,s%d\n",d,s);
2445 output_w32(0xeeb00ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2446}
2447
2448void emit_fabsd(int s,int d)
2449{
2450 assem_debug("fabsd s%d,d%d\n",d,s);
2451 output_w32(0xeeb00bc0|((d&7)<<12)|(s&7));
2452}
2453
2454void emit_fnegs(int s,int d)
2455{
2456 assem_debug("fnegs d%d,s%d\n",d,s);
2457 output_w32(0xeeb10a40|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2458}
2459
2460void emit_fnegd(int s,int d)
2461{
2462 assem_debug("fnegd s%d,d%d\n",d,s);
2463 output_w32(0xeeb10b40|((d&7)<<12)|(s&7));
2464}
2465
2466void emit_fadds(int s1,int s2,int d)
2467{
2468 assem_debug("fadds s%d,s%d,s%d\n",d,s1,s2);
2469 output_w32(0xee300a00|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2470}
2471
2472void emit_faddd(int s1,int s2,int d)
2473{
2474 assem_debug("faddd d%d,d%d,d%d\n",d,s1,s2);
2475 output_w32(0xee300b00|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2476}
2477
2478void emit_fsubs(int s1,int s2,int d)
2479{
2480 assem_debug("fsubs s%d,s%d,s%d\n",d,s1,s2);
2481 output_w32(0xee300a40|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2482}
2483
2484void emit_fsubd(int s1,int s2,int d)
2485{
2486 assem_debug("fsubd d%d,d%d,d%d\n",d,s1,s2);
2487 output_w32(0xee300b40|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2488}
2489
2490void emit_fmuls(int s1,int s2,int d)
2491{
2492 assem_debug("fmuls s%d,s%d,s%d\n",d,s1,s2);
2493 output_w32(0xee200a00|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2494}
2495
2496void emit_fmuld(int s1,int s2,int d)
2497{
2498 assem_debug("fmuld d%d,d%d,d%d\n",d,s1,s2);
2499 output_w32(0xee200b00|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2500}
2501
2502void emit_fdivs(int s1,int s2,int d)
2503{
2504 assem_debug("fdivs s%d,s%d,s%d\n",d,s1,s2);
2505 output_w32(0xee800a00|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2506}
2507
2508void emit_fdivd(int s1,int s2,int d)
2509{
2510 assem_debug("fdivd d%d,d%d,d%d\n",d,s1,s2);
2511 output_w32(0xee800b00|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2512}
2513
2514void emit_fcmps(int x,int y)
2515{
2516 assem_debug("fcmps s14, s15\n");
2517 output_w32(0xeeb47a67);
2518}
2519
2520void emit_fcmpd(int x,int y)
2521{
2522 assem_debug("fcmpd d6, d7\n");
2523 output_w32(0xeeb46b47);
2524}
2525
2526void emit_fmstat()
2527{
2528 assem_debug("fmstat\n");
2529 output_w32(0xeef1fa10);
2530}
2531
2532void emit_bicne_imm(int rs,int imm,int rt)
2533{
2534 u_int armval;
cfbd3c6e 2535 genimm_checked(imm,&armval);
57871462 2536 assem_debug("bicne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2537 output_w32(0x13c00000|rd_rn_rm(rt,rs,0)|armval);
2538}
2539
2540void emit_biccs_imm(int rs,int imm,int rt)
2541{
2542 u_int armval;
cfbd3c6e 2543 genimm_checked(imm,&armval);
57871462 2544 assem_debug("biccs %s,%s,#%d\n",regname[rt],regname[rs],imm);
2545 output_w32(0x23c00000|rd_rn_rm(rt,rs,0)|armval);
2546}
2547
2548void emit_bicvc_imm(int rs,int imm,int rt)
2549{
2550 u_int armval;
cfbd3c6e 2551 genimm_checked(imm,&armval);
57871462 2552 assem_debug("bicvc %s,%s,#%d\n",regname[rt],regname[rs],imm);
2553 output_w32(0x73c00000|rd_rn_rm(rt,rs,0)|armval);
2554}
2555
2556void emit_bichi_imm(int rs,int imm,int rt)
2557{
2558 u_int armval;
cfbd3c6e 2559 genimm_checked(imm,&armval);
57871462 2560 assem_debug("bichi %s,%s,#%d\n",regname[rt],regname[rs],imm);
2561 output_w32(0x83c00000|rd_rn_rm(rt,rs,0)|armval);
2562}
2563
2564void emit_orrvs_imm(int rs,int imm,int rt)
2565{
2566 u_int armval;
cfbd3c6e 2567 genimm_checked(imm,&armval);
57871462 2568 assem_debug("orrvs %s,%s,#%d\n",regname[rt],regname[rs],imm);
2569 output_w32(0x63800000|rd_rn_rm(rt,rs,0)|armval);
2570}
2571
b9b61529 2572void emit_orrne_imm(int rs,int imm,int rt)
2573{
2574 u_int armval;
cfbd3c6e 2575 genimm_checked(imm,&armval);
b9b61529 2576 assem_debug("orrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2577 output_w32(0x13800000|rd_rn_rm(rt,rs,0)|armval);
2578}
2579
2580void emit_andne_imm(int rs,int imm,int rt)
2581{
2582 u_int armval;
cfbd3c6e 2583 genimm_checked(imm,&armval);
b9b61529 2584 assem_debug("andne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2585 output_w32(0x12000000|rd_rn_rm(rt,rs,0)|armval);
2586}
2587
57871462 2588void emit_jno_unlikely(int a)
2589{
2590 //emit_jno(a);
2591 assem_debug("addvc pc,pc,#? (%x)\n",/*a-(int)out-8,*/a);
2592 output_w32(0x72800000|rd_rn_rm(15,15,0));
2593}
2594
054175e9 2595static void save_regs_all(u_int reglist)
57871462 2596{
054175e9 2597 int i;
57871462 2598 if(!reglist) return;
2599 assem_debug("stmia fp,{");
054175e9 2600 for(i=0;i<16;i++)
2601 if(reglist&(1<<i))
2602 assem_debug("r%d,",i);
57871462 2603 assem_debug("}\n");
2604 output_w32(0xe88b0000|reglist);
2605}
054175e9 2606static void restore_regs_all(u_int reglist)
57871462 2607{
054175e9 2608 int i;
57871462 2609 if(!reglist) return;
2610 assem_debug("ldmia fp,{");
054175e9 2611 for(i=0;i<16;i++)
2612 if(reglist&(1<<i))
2613 assem_debug("r%d,",i);
57871462 2614 assem_debug("}\n");
2615 output_w32(0xe89b0000|reglist);
2616}
054175e9 2617// Save registers before function call
2618static void save_regs(u_int reglist)
2619{
2620 reglist&=0x100f; // only save the caller-save registers, r0-r3, r12
2621 save_regs_all(reglist);
2622}
2623// Restore registers after function call
2624static void restore_regs(u_int reglist)
2625{
2626 reglist&=0x100f; // only restore the caller-save registers, r0-r3, r12
2627 restore_regs_all(reglist);
2628}
57871462 2629
2630// Write back consts using r14 so we don't disturb the other registers
2631void wb_consts(signed char i_regmap[],uint64_t i_is32,u_int i_dirty,int i)
2632{
2633 int hr;
2634 for(hr=0;hr<HOST_REGS;hr++) {
2635 if(hr!=EXCLUDE_REG&&i_regmap[hr]>=0&&((i_dirty>>hr)&1)) {
2636 if(((regs[i].isconst>>hr)&1)&&i_regmap[hr]>0) {
2637 if(i_regmap[hr]<64 || !((i_is32>>(i_regmap[hr]&63))&1) ) {
2638 int value=constmap[i][hr];
2639 if(value==0) {
2640 emit_zeroreg(HOST_TEMPREG);
2641 }
2642 else {
2643 emit_movimm(value,HOST_TEMPREG);
2644 }
2645 emit_storereg(i_regmap[hr],HOST_TEMPREG);
24385cae 2646#ifndef FORCE32
57871462 2647 if((i_is32>>i_regmap[hr])&1) {
2648 if(value!=-1&&value!=0) emit_sarimm(HOST_TEMPREG,31,HOST_TEMPREG);
2649 emit_storereg(i_regmap[hr]|64,HOST_TEMPREG);
2650 }
24385cae 2651#endif
57871462 2652 }
2653 }
2654 }
2655 }
2656}
2657
2658/* Stubs/epilogue */
2659
2660void literal_pool(int n)
2661{
2662 if(!literalcount) return;
2663 if(n) {
2664 if((int)out-literals[0][0]<4096-n) return;
2665 }
2666 u_int *ptr;
2667 int i;
2668 for(i=0;i<literalcount;i++)
2669 {
77750690 2670 u_int l_addr=(u_int)out;
2671 int j;
2672 for(j=0;j<i;j++) {
2673 if(literals[j][1]==literals[i][1]) {
2674 //printf("dup %08x\n",literals[i][1]);
2675 l_addr=literals[j][0];
2676 break;
2677 }
2678 }
57871462 2679 ptr=(u_int *)literals[i][0];
77750690 2680 u_int offset=l_addr-(u_int)ptr-8;
57871462 2681 assert(offset<4096);
2682 assert(!(offset&3));
2683 *ptr|=offset;
77750690 2684 if(l_addr==(u_int)out) {
2685 literals[i][0]=l_addr; // remember for dupes
2686 output_w32(literals[i][1]);
2687 }
57871462 2688 }
2689 literalcount=0;
2690}
2691
2692void literal_pool_jumpover(int n)
2693{
2694 if(!literalcount) return;
2695 if(n) {
2696 if((int)out-literals[0][0]<4096-n) return;
2697 }
2698 int jaddr=(int)out;
2699 emit_jmp(0);
2700 literal_pool(0);
2701 set_jump_target(jaddr,(int)out);
2702}
2703
2704emit_extjump2(int addr, int target, int linker)
2705{
2706 u_char *ptr=(u_char *)addr;
2707 assert((ptr[3]&0x0e)==0xa);
2708 emit_loadlp(target,0);
2709 emit_loadlp(addr,1);
24385cae 2710 assert(addr>=BASE_ADDR&&addr<(BASE_ADDR+(1<<TARGET_SIZE_2)));
57871462 2711 //assert((target>=0x80000000&&target<0x80800000)||(target>0xA4000000&&target<0xA4001000));
2712//DEBUG >
2713#ifdef DEBUG_CYCLE_COUNT
2714 emit_readword((int)&last_count,ECX);
2715 emit_add(HOST_CCREG,ECX,HOST_CCREG);
2716 emit_readword((int)&next_interupt,ECX);
2717 emit_writeword(HOST_CCREG,(int)&Count);
2718 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
2719 emit_writeword(ECX,(int)&last_count);
2720#endif
2721//DEBUG <
2722 emit_jmp(linker);
2723}
2724
2725emit_extjump(int addr, int target)
2726{
2727 emit_extjump2(addr, target, (int)dyna_linker);
2728}
2729emit_extjump_ds(int addr, int target)
2730{
2731 emit_extjump2(addr, target, (int)dyna_linker_ds);
2732}
2733
13e35c04 2734// put rt_val into rt, potentially making use of rs with value rs_val
2735static void emit_movimm_from(u_int rs_val,int rs,u_int rt_val,int rt)
2736{
8575a877 2737 u_int armval;
2738 int diff;
2739 if(genimm(rt_val,&armval)) {
2740 assem_debug("mov %s,#%d\n",regname[rt],rt_val);
2741 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
2742 return;
2743 }
2744 if(genimm(~rt_val,&armval)) {
2745 assem_debug("mvn %s,#%d\n",regname[rt],rt_val);
2746 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
2747 return;
2748 }
2749 diff=rt_val-rs_val;
2750 if(genimm(diff,&armval)) {
2751 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],diff);
2752 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
2753 return;
2754 }else if(genimm(-diff,&armval)) {
2755 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],-diff);
2756 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
2757 return;
2758 }
2759 emit_movimm(rt_val,rt);
2760}
2761
2762// return 1 if above function can do it's job cheaply
2763static int is_similar_value(u_int v1,u_int v2)
2764{
13e35c04 2765 u_int xs;
8575a877 2766 int diff;
2767 if(v1==v2) return 1;
2768 diff=v2-v1;
2769 for(xs=diff;xs!=0&&(xs&3)==0;xs>>=2)
13e35c04 2770 ;
8575a877 2771 if(xs<0x100) return 1;
2772 for(xs=-diff;xs!=0&&(xs&3)==0;xs>>=2)
2773 ;
2774 if(xs<0x100) return 1;
2775 return 0;
13e35c04 2776}
cbbab9cd 2777
b96d3df7 2778// trashes r2
2779static void pass_args(int a0, int a1)
2780{
2781 if(a0==1&&a1==0) {
2782 // must swap
2783 emit_mov(a0,2); emit_mov(a1,1); emit_mov(2,0);
2784 }
2785 else if(a0!=0&&a1==0) {
2786 emit_mov(a1,1);
2787 if (a0>=0) emit_mov(a0,0);
2788 }
2789 else {
2790 if(a0>=0&&a0!=0) emit_mov(a0,0);
2791 if(a1>=0&&a1!=1) emit_mov(a1,1);
2792 }
2793}
2794
b1be1eee 2795static void mov_loadtype_adj(int type,int rs,int rt)
2796{
2797 switch(type) {
2798 case LOADB_STUB: emit_signextend8(rs,rt); break;
2799 case LOADBU_STUB: emit_andimm(rs,0xff,rt); break;
2800 case LOADH_STUB: emit_signextend16(rs,rt); break;
2801 case LOADHU_STUB: emit_andimm(rs,0xffff,rt); break;
2802 case LOADW_STUB: if(rs!=rt) emit_mov(rs,rt); break;
2803 default: assert(0);
2804 }
2805}
2806
2807#ifdef PCSX
2808#include "pcsxmem.h"
2809#include "pcsxmem_inline.c"
2810#endif
2811
57871462 2812do_readstub(int n)
2813{
2814 assem_debug("do_readstub %x\n",start+stubs[n][3]*4);
2815 literal_pool(256);
2816 set_jump_target(stubs[n][1],(int)out);
2817 int type=stubs[n][0];
2818 int i=stubs[n][3];
2819 int rs=stubs[n][4];
2820 struct regstat *i_regs=(struct regstat *)stubs[n][5];
2821 u_int reglist=stubs[n][7];
2822 signed char *i_regmap=i_regs->regmap;
2823 int addr=get_reg(i_regmap,AGEN1+(i&1));
2824 int rth,rt;
2825 int ds;
b9b61529 2826 if(itype[i]==C1LS||itype[i]==C2LS||itype[i]==LOADLR) {
57871462 2827 rth=get_reg(i_regmap,FTEMP|64);
2828 rt=get_reg(i_regmap,FTEMP);
2829 }else{
2830 rth=get_reg(i_regmap,rt1[i]|64);
2831 rt=get_reg(i_regmap,rt1[i]);
2832 }
2833 assert(rs>=0);
c6c3b1b3 2834#ifdef PCSX
2835 int r,temp=-1,temp2=HOST_TEMPREG,regs_saved=0,restore_jump=0;
2836 reglist|=(1<<rs);
2837 for(r=0;r<=12;r++) {
2838 if(((1<<r)&0x13ff)&&((1<<r)&reglist)==0) {
2839 temp=r; break;
2840 }
2841 }
db829eeb 2842 if(rt>=0&&rt1[i]!=0)
c6c3b1b3 2843 reglist&=~(1<<rt);
2844 if(temp==-1) {
2845 save_regs(reglist);
2846 regs_saved=1;
2847 temp=(rs==0)?2:0;
2848 }
2849 if((regs_saved||(reglist&2)==0)&&temp!=1&&rs!=1)
2850 temp2=1;
2851 emit_readword((int)&mem_rtab,temp);
2852 emit_shrimm(rs,12,temp2);
2853 emit_readword_dualindexedx4(temp,temp2,temp2);
2854 emit_lsls_imm(temp2,1,temp2);
2855 if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) {
2856 switch(type) {
2857 case LOADB_STUB: emit_ldrccsb_dualindexed(temp2,rs,rt); break;
2858 case LOADBU_STUB: emit_ldrccb_dualindexed(temp2,rs,rt); break;
2859 case LOADH_STUB: emit_ldrccsh_dualindexed(temp2,rs,rt); break;
2860 case LOADHU_STUB: emit_ldrcch_dualindexed(temp2,rs,rt); break;
2861 case LOADW_STUB: emit_ldrcc_dualindexed(temp2,rs,rt); break;
2862 }
2863 }
2864 if(regs_saved) {
2865 restore_jump=(int)out;
2866 emit_jcc(0); // jump to reg restore
2867 }
2868 else
2869 emit_jcc(stubs[n][2]); // return address
2870
2871 if(!regs_saved)
2872 save_regs(reglist);
2873 int handler=0;
2874 if(type==LOADB_STUB||type==LOADBU_STUB)
2875 handler=(int)jump_handler_read8;
2876 if(type==LOADH_STUB||type==LOADHU_STUB)
2877 handler=(int)jump_handler_read16;
2878 if(type==LOADW_STUB)
2879 handler=(int)jump_handler_read32;
2880 assert(handler!=0);
b96d3df7 2881 pass_args(rs,temp2);
c6c3b1b3 2882 int cc=get_reg(i_regmap,CCREG);
2883 if(cc<0)
2884 emit_loadreg(CCREG,2);
2573466a 2885 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n][6]+1),2);
c6c3b1b3 2886 emit_call(handler);
2887 if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) {
b1be1eee 2888 mov_loadtype_adj(type,0,rt);
c6c3b1b3 2889 }
2890 if(restore_jump)
2891 set_jump_target(restore_jump,(int)out);
2892 restore_regs(reglist);
2893 emit_jmp(stubs[n][2]); // return address
2894#else // !PCSX
57871462 2895 if(addr<0) addr=rt;
535d208a 2896 if(addr<0&&itype[i]!=C1LS&&itype[i]!=C2LS&&itype[i]!=LOADLR) addr=get_reg(i_regmap,-1);
57871462 2897 assert(addr>=0);
2898 int ftable=0;
2899 if(type==LOADB_STUB||type==LOADBU_STUB)
2900 ftable=(int)readmemb;
2901 if(type==LOADH_STUB||type==LOADHU_STUB)
2902 ftable=(int)readmemh;
2903 if(type==LOADW_STUB)
2904 ftable=(int)readmem;
24385cae 2905#ifndef FORCE32
57871462 2906 if(type==LOADD_STUB)
2907 ftable=(int)readmemd;
24385cae 2908#endif
2909 assert(ftable!=0);
57871462 2910 emit_writeword(rs,(int)&address);
2911 //emit_pusha();
2912 save_regs(reglist);
97a238a6 2913#ifndef PCSX
57871462 2914 ds=i_regs!=&regs[i];
2915 int real_rs=(itype[i]==LOADLR)?-1:get_reg(i_regmap,rs1[i]);
2916 u_int cmask=ds?-1:(0x100f|~i_regs->wasconst);
2917 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&0x100f,i);
2918 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty&cmask&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs)));
2919 if(!ds) wb_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&~0x100f,i);
97a238a6 2920#endif
57871462 2921 emit_shrimm(rs,16,1);
2922 int cc=get_reg(i_regmap,CCREG);
2923 if(cc<0) {
2924 emit_loadreg(CCREG,2);
2925 }
2926 emit_movimm(ftable,0);
2927 emit_addimm(cc<0?2:cc,2*stubs[n][6]+2,2);
f51dc36c 2928#ifndef PCSX
57871462 2929 emit_movimm(start+stubs[n][3]*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
f51dc36c 2930#endif
57871462 2931 //emit_readword((int)&last_count,temp);
2932 //emit_add(cc,temp,cc);
2933 //emit_writeword(cc,(int)&Count);
2934 //emit_mov(15,14);
2935 emit_call((int)&indirect_jump_indexed);
2936 //emit_callreg(rs);
2937 //emit_readword_dualindexedx4(rs,HOST_TEMPREG,15);
f51dc36c 2938#ifndef PCSX
57871462 2939 // We really shouldn't need to update the count here,
2940 // but not doing so causes random crashes...
2941 emit_readword((int)&Count,HOST_TEMPREG);
2942 emit_readword((int)&next_interupt,2);
2943 emit_addimm(HOST_TEMPREG,-2*stubs[n][6]-2,HOST_TEMPREG);
2944 emit_writeword(2,(int)&last_count);
2945 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2946 if(cc<0) {
2947 emit_storereg(CCREG,HOST_TEMPREG);
2948 }
f51dc36c 2949#endif
57871462 2950 //emit_popa();
2951 restore_regs(reglist);
2952 //if((cc=get_reg(regmap,CCREG))>=0) {
2953 // emit_loadreg(CCREG,cc);
2954 //}
f18c0f46 2955 if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) {
2956 assert(rt>=0);
2957 if(type==LOADB_STUB)
2958 emit_movsbl((int)&readmem_dword,rt);
2959 if(type==LOADBU_STUB)
2960 emit_movzbl((int)&readmem_dword,rt);
2961 if(type==LOADH_STUB)
2962 emit_movswl((int)&readmem_dword,rt);
2963 if(type==LOADHU_STUB)
2964 emit_movzwl((int)&readmem_dword,rt);
2965 if(type==LOADW_STUB)
2966 emit_readword((int)&readmem_dword,rt);
2967 if(type==LOADD_STUB) {
2968 emit_readword((int)&readmem_dword,rt);
2969 if(rth>=0) emit_readword(((int)&readmem_dword)+4,rth);
2970 }
57871462 2971 }
2972 emit_jmp(stubs[n][2]); // return address
c6c3b1b3 2973#endif // !PCSX
57871462 2974}
2975
c6c3b1b3 2976#ifdef PCSX
2977// return memhandler, or get directly accessable address and return 0
2978u_int get_direct_memhandler(void *table,u_int addr,int type,u_int *addr_host)
2979{
2980 u_int l1,l2=0;
2981 l1=((u_int *)table)[addr>>12];
2982 if((l1&(1<<31))==0) {
2983 u_int v=l1<<1;
2984 *addr_host=v+addr;
2985 return 0;
2986 }
2987 else {
2988 l1<<=1;
2989 if(type==LOADB_STUB||type==LOADBU_STUB||type==STOREB_STUB)
2990 l2=((u_int *)l1)[0x1000/4 + 0x1000/2 + (addr&0xfff)];
b96d3df7 2991 else if(type==LOADH_STUB||type==LOADHU_STUB||type==STOREH_STUB)
c6c3b1b3 2992 l2=((u_int *)l1)[0x1000/4 + (addr&0xfff)/2];
2993 else
2994 l2=((u_int *)l1)[(addr&0xfff)/4];
2995 if((l2&(1<<31))==0) {
2996 u_int v=l2<<1;
2997 *addr_host=v+(addr&0xfff);
2998 return 0;
2999 }
3000 return l2<<1;
3001 }
3002}
3003#endif
3004
57871462 3005inline_readstub(int type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
3006{
3007 int rs=get_reg(regmap,target);
3008 int rth=get_reg(regmap,target|64);
3009 int rt=get_reg(regmap,target);
535d208a 3010 if(rs<0) rs=get_reg(regmap,-1);
57871462 3011 assert(rs>=0);
c6c3b1b3 3012#ifdef PCSX
b1be1eee 3013 u_int handler,host_addr=0,is_dynamic,far_call=0;
3014 int cc=get_reg(regmap,CCREG);
3015 if(pcsx_direct_read(type,addr,CLOCK_ADJUST(adj+1),cc,target?rs:-1,rt))
3016 return;
c6c3b1b3 3017 handler=get_direct_memhandler(mem_rtab,addr,type,&host_addr);
3018 if (handler==0) {
db829eeb 3019 if(rt<0||rt1[i]==0)
c6c3b1b3 3020 return;
13e35c04 3021 if(addr!=host_addr)
3022 emit_movimm_from(addr,rs,host_addr,rs);
c6c3b1b3 3023 switch(type) {
3024 case LOADB_STUB: emit_movsbl_indexed(0,rs,rt); break;
3025 case LOADBU_STUB: emit_movzbl_indexed(0,rs,rt); break;
3026 case LOADH_STUB: emit_movswl_indexed(0,rs,rt); break;
3027 case LOADHU_STUB: emit_movzwl_indexed(0,rs,rt); break;
3028 case LOADW_STUB: emit_readword_indexed(0,rs,rt); break;
3029 default: assert(0);
3030 }
3031 return;
3032 }
b1be1eee 3033 is_dynamic=pcsxmem_is_handler_dynamic(addr);
3034 if(is_dynamic) {
3035 if(type==LOADB_STUB||type==LOADBU_STUB)
3036 handler=(int)jump_handler_read8;
3037 if(type==LOADH_STUB||type==LOADHU_STUB)
3038 handler=(int)jump_handler_read16;
3039 if(type==LOADW_STUB)
3040 handler=(int)jump_handler_read32;
3041 }
c6c3b1b3 3042
3043 // call a memhandler
db829eeb 3044 if(rt>=0&&rt1[i]!=0)
c6c3b1b3 3045 reglist&=~(1<<rt);
3046 save_regs(reglist);
3047 if(target==0)
3048 emit_movimm(addr,0);
3049 else if(rs!=0)
3050 emit_mov(rs,0);
c6c3b1b3 3051 int offset=(int)handler-(int)out-8;
3052 if(offset<-33554432||offset>=33554432) {
3053 // unreachable memhandler, a plugin func perhaps
b1be1eee 3054 emit_movimm(handler,12);
3055 far_call=1;
3056 }
3057 if(cc<0)
3058 emit_loadreg(CCREG,2);
3059 if(is_dynamic) {
3060 emit_movimm(((u_int *)mem_rtab)[addr>>12]<<1,1);
3061 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2);
c6c3b1b3 3062 }
b1be1eee 3063 else {
3064 emit_readword((int)&last_count,3);
3065 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2);
3066 emit_add(2,3,2);
3067 emit_writeword(2,(int)&Count);
3068 }
3069
3070 if(far_call)
3071 emit_callreg(12);
c6c3b1b3 3072 else
3073 emit_call(handler);
b1be1eee 3074
db829eeb 3075 if(rt>=0&&rt1[i]!=0) {
c6c3b1b3 3076 switch(type) {
3077 case LOADB_STUB: emit_signextend8(0,rt); break;
3078 case LOADBU_STUB: emit_andimm(0,0xff,rt); break;
3079 case LOADH_STUB: emit_signextend16(0,rt); break;
3080 case LOADHU_STUB: emit_andimm(0,0xffff,rt); break;
3081 case LOADW_STUB: if(rt!=0) emit_mov(0,rt); break;
3082 default: assert(0);
3083 }
3084 }
3085 restore_regs(reglist);
3086#else // if !PCSX
57871462 3087 int ftable=0;
3088 if(type==LOADB_STUB||type==LOADBU_STUB)
3089 ftable=(int)readmemb;
3090 if(type==LOADH_STUB||type==LOADHU_STUB)
3091 ftable=(int)readmemh;
3092 if(type==LOADW_STUB)
3093 ftable=(int)readmem;
24385cae 3094#ifndef FORCE32
57871462 3095 if(type==LOADD_STUB)
3096 ftable=(int)readmemd;
24385cae 3097#endif
3098 assert(ftable!=0);
fd99c415 3099 if(target==0)
3100 emit_movimm(addr,rs);
57871462 3101 emit_writeword(rs,(int)&address);
3102 //emit_pusha();
3103 save_regs(reglist);
0c1fe38b 3104#ifndef PCSX
3105 if((signed int)addr>=(signed int)0xC0000000) {
3106 // Theoretically we can have a pagefault here, if the TLB has never
3107 // been enabled and the address is outside the range 80000000..BFFFFFFF
3108 // Write out the registers so the pagefault can be handled. This is
3109 // a very rare case and likely represents a bug.
3110 int ds=regmap!=regs[i].regmap;
3111 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty,i);
3112 if(!ds) wb_dirtys(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty);
3113 else wb_dirtys(branch_regs[i-1].regmap_entry,branch_regs[i-1].was32,branch_regs[i-1].wasdirty);
3114 }
3115#endif
57871462 3116 //emit_shrimm(rs,16,1);
3117 int cc=get_reg(regmap,CCREG);
3118 if(cc<0) {
3119 emit_loadreg(CCREG,2);
3120 }
3121 //emit_movimm(ftable,0);
3122 emit_movimm(((u_int *)ftable)[addr>>16],0);
3123 //emit_readword((int)&last_count,12);
2573466a 3124 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2);
f51dc36c 3125#ifndef PCSX
57871462 3126 if((signed int)addr>=(signed int)0xC0000000) {
3127 // Pagefault address
3128 int ds=regmap!=regs[i].regmap;
3129 emit_movimm(start+i*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
3130 }
f51dc36c 3131#endif
57871462 3132 //emit_add(12,2,2);
3133 //emit_writeword(2,(int)&Count);
3134 //emit_call(((u_int *)ftable)[addr>>16]);
3135 emit_call((int)&indirect_jump);
f51dc36c 3136#ifndef PCSX
57871462 3137 // We really shouldn't need to update the count here,
3138 // but not doing so causes random crashes...
3139 emit_readword((int)&Count,HOST_TEMPREG);
3140 emit_readword((int)&next_interupt,2);
2573466a 3141 emit_addimm(HOST_TEMPREG,-CLOCK_ADJUST(adj+1),HOST_TEMPREG);
57871462 3142 emit_writeword(2,(int)&last_count);
3143 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
3144 if(cc<0) {
3145 emit_storereg(CCREG,HOST_TEMPREG);
3146 }
f51dc36c 3147#endif
57871462 3148 //emit_popa();
3149 restore_regs(reglist);
fd99c415 3150 if(rt>=0) {
3151 if(type==LOADB_STUB)
3152 emit_movsbl((int)&readmem_dword,rt);
3153 if(type==LOADBU_STUB)
3154 emit_movzbl((int)&readmem_dword,rt);
3155 if(type==LOADH_STUB)
3156 emit_movswl((int)&readmem_dword,rt);
3157 if(type==LOADHU_STUB)
3158 emit_movzwl((int)&readmem_dword,rt);
3159 if(type==LOADW_STUB)
3160 emit_readword((int)&readmem_dword,rt);
3161 if(type==LOADD_STUB) {
3162 emit_readword((int)&readmem_dword,rt);
3163 if(rth>=0) emit_readword(((int)&readmem_dword)+4,rth);
3164 }
57871462 3165 }
c6c3b1b3 3166#endif // !PCSX
57871462 3167}
3168
3169do_writestub(int n)
3170{
3171 assem_debug("do_writestub %x\n",start+stubs[n][3]*4);
3172 literal_pool(256);
3173 set_jump_target(stubs[n][1],(int)out);
3174 int type=stubs[n][0];
3175 int i=stubs[n][3];
3176 int rs=stubs[n][4];
3177 struct regstat *i_regs=(struct regstat *)stubs[n][5];
3178 u_int reglist=stubs[n][7];
3179 signed char *i_regmap=i_regs->regmap;
3180 int addr=get_reg(i_regmap,AGEN1+(i&1));
3181 int rth,rt,r;
3182 int ds;
b9b61529 3183 if(itype[i]==C1LS||itype[i]==C2LS) {
57871462 3184 rth=get_reg(i_regmap,FTEMP|64);
3185 rt=get_reg(i_regmap,r=FTEMP);
3186 }else{
3187 rth=get_reg(i_regmap,rs2[i]|64);
3188 rt=get_reg(i_regmap,r=rs2[i]);
3189 }
3190 assert(rs>=0);
3191 assert(rt>=0);
b96d3df7 3192#ifdef PCSX
3193 int rtmp,temp=-1,temp2=HOST_TEMPREG,regs_saved=0,restore_jump=0,ra;
3194 int reglist2=reglist|(1<<rs)|(1<<rt);
3195 for(rtmp=0;rtmp<=12;rtmp++) {
3196 if(((1<<rtmp)&0x13ff)&&((1<<rtmp)&reglist2)==0) {
3197 temp=rtmp; break;
3198 }
3199 }
3200 if(temp==-1) {
3201 save_regs(reglist);
3202 regs_saved=1;
3203 for(rtmp=0;rtmp<=3;rtmp++)
3204 if(rtmp!=rs&&rtmp!=rt)
3205 {temp=rtmp;break;}
3206 }
3207 if((regs_saved||(reglist2&8)==0)&&temp!=3&&rs!=3&&rt!=3)
3208 temp2=3;
3209 emit_readword((int)&mem_wtab,temp);
3210 emit_shrimm(rs,12,temp2);
3211 emit_readword_dualindexedx4(temp,temp2,temp2);
3212 emit_lsls_imm(temp2,1,temp2);
3213 switch(type) {
3214 case STOREB_STUB: emit_strccb_dualindexed(temp2,rs,rt); break;
3215 case STOREH_STUB: emit_strcch_dualindexed(temp2,rs,rt); break;
3216 case STOREW_STUB: emit_strcc_dualindexed(temp2,rs,rt); break;
3217 default: assert(0);
3218 }
3219 if(regs_saved) {
3220 restore_jump=(int)out;
3221 emit_jcc(0); // jump to reg restore
3222 }
3223 else
3224 emit_jcc(stubs[n][2]); // return address (invcode check)
3225
3226 if(!regs_saved)
3227 save_regs(reglist);
3228 int handler=0;
3229 switch(type) {
3230 case STOREB_STUB: handler=(int)jump_handler_write8; break;
3231 case STOREH_STUB: handler=(int)jump_handler_write16; break;
3232 case STOREW_STUB: handler=(int)jump_handler_write32; break;
3233 }
3234 assert(handler!=0);
3235 pass_args(rs,rt);
3236 if(temp2!=3)
3237 emit_mov(temp2,3);
3238 int cc=get_reg(i_regmap,CCREG);
3239 if(cc<0)
3240 emit_loadreg(CCREG,2);
2573466a 3241 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n][6]+1),2);
b96d3df7 3242 // returns new cycle_count
3243 emit_call(handler);
2573466a 3244 emit_addimm(0,-CLOCK_ADJUST((int)stubs[n][6]+1),cc<0?2:cc);
b96d3df7 3245 if(cc<0)
3246 emit_storereg(CCREG,2);
3247 if(restore_jump)
3248 set_jump_target(restore_jump,(int)out);
3249 restore_regs(reglist);
3250 ra=stubs[n][2];
b96d3df7 3251 emit_jmp(ra);
3252#else // if !PCSX
57871462 3253 if(addr<0) addr=get_reg(i_regmap,-1);
3254 assert(addr>=0);
3255 int ftable=0;
3256 if(type==STOREB_STUB)
3257 ftable=(int)writememb;
3258 if(type==STOREH_STUB)
3259 ftable=(int)writememh;
3260 if(type==STOREW_STUB)
3261 ftable=(int)writemem;
24385cae 3262#ifndef FORCE32
57871462 3263 if(type==STORED_STUB)
3264 ftable=(int)writememd;
24385cae 3265#endif
3266 assert(ftable!=0);
57871462 3267 emit_writeword(rs,(int)&address);
3268 //emit_shrimm(rs,16,rs);
3269 //emit_movmem_indexedx4(ftable,rs,rs);
3270 if(type==STOREB_STUB)
3271 emit_writebyte(rt,(int)&byte);
3272 if(type==STOREH_STUB)
3273 emit_writehword(rt,(int)&hword);
3274 if(type==STOREW_STUB)
3275 emit_writeword(rt,(int)&word);
3276 if(type==STORED_STUB) {
3d624f89 3277#ifndef FORCE32
57871462 3278 emit_writeword(rt,(int)&dword);
3279 emit_writeword(r?rth:rt,(int)&dword+4);
3d624f89 3280#else
3281 printf("STORED_STUB\n");
3282#endif
57871462 3283 }
3284 //emit_pusha();
3285 save_regs(reglist);
97a238a6 3286#ifndef PCSX
57871462 3287 ds=i_regs!=&regs[i];
3288 int real_rs=get_reg(i_regmap,rs1[i]);
3289 u_int cmask=ds?-1:(0x100f|~i_regs->wasconst);
3290 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&0x100f,i);
3291 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty&cmask&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs)));
3292 if(!ds) wb_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&~0x100f,i);
97a238a6 3293#endif
57871462 3294 emit_shrimm(rs,16,1);
3295 int cc=get_reg(i_regmap,CCREG);
3296 if(cc<0) {
3297 emit_loadreg(CCREG,2);
3298 }
3299 emit_movimm(ftable,0);
3300 emit_addimm(cc<0?2:cc,2*stubs[n][6]+2,2);
f51dc36c 3301#ifndef PCSX
57871462 3302 emit_movimm(start+stubs[n][3]*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
f51dc36c 3303#endif
57871462 3304 //emit_readword((int)&last_count,temp);
3305 //emit_addimm(cc,2*stubs[n][5]+2,cc);
3306 //emit_add(cc,temp,cc);
3307 //emit_writeword(cc,(int)&Count);
3308 emit_call((int)&indirect_jump_indexed);
3309 //emit_callreg(rs);
3310 emit_readword((int)&Count,HOST_TEMPREG);
3311 emit_readword((int)&next_interupt,2);
3312 emit_addimm(HOST_TEMPREG,-2*stubs[n][6]-2,HOST_TEMPREG);
3313 emit_writeword(2,(int)&last_count);
3314 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
3315 if(cc<0) {
3316 emit_storereg(CCREG,HOST_TEMPREG);
3317 }
3318 //emit_popa();
3319 restore_regs(reglist);
3320 //if((cc=get_reg(regmap,CCREG))>=0) {
3321 // emit_loadreg(CCREG,cc);
3322 //}
3323 emit_jmp(stubs[n][2]); // return address
b96d3df7 3324#endif // !PCSX
57871462 3325}
3326
3327inline_writestub(int type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
3328{
3329 int rs=get_reg(regmap,-1);
3330 int rth=get_reg(regmap,target|64);
3331 int rt=get_reg(regmap,target);
3332 assert(rs>=0);
3333 assert(rt>=0);
cbbab9cd 3334#ifdef PCSX
b96d3df7 3335 u_int handler,host_addr=0;
b96d3df7 3336 handler=get_direct_memhandler(mem_wtab,addr,type,&host_addr);
3337 if (handler==0) {
13e35c04 3338 if(addr!=host_addr)
3339 emit_movimm_from(addr,rs,host_addr,rs);
b96d3df7 3340 switch(type) {
3341 case STOREB_STUB: emit_writebyte_indexed(rt,0,rs); break;
3342 case STOREH_STUB: emit_writehword_indexed(rt,0,rs); break;
3343 case STOREW_STUB: emit_writeword_indexed(rt,0,rs); break;
3344 default: assert(0);
3345 }
3346 return;
3347 }
3348
3349 // call a memhandler
3350 save_regs(reglist);
13e35c04 3351 pass_args(rs,rt);
b96d3df7 3352 int cc=get_reg(regmap,CCREG);
3353 if(cc<0)
3354 emit_loadreg(CCREG,2);
2573466a 3355 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2);
b96d3df7 3356 emit_movimm(handler,3);
3357 // returns new cycle_count
3358 emit_call((int)jump_handler_write_h);
2573466a 3359 emit_addimm(0,-CLOCK_ADJUST(adj+1),cc<0?2:cc);
b96d3df7 3360 if(cc<0)
3361 emit_storereg(CCREG,2);
3362 restore_regs(reglist);
3363#else // if !pcsx
57871462 3364 int ftable=0;
3365 if(type==STOREB_STUB)
3366 ftable=(int)writememb;
3367 if(type==STOREH_STUB)
3368 ftable=(int)writememh;
3369 if(type==STOREW_STUB)
3370 ftable=(int)writemem;
24385cae 3371#ifndef FORCE32
57871462 3372 if(type==STORED_STUB)
3373 ftable=(int)writememd;
24385cae 3374#endif
3375 assert(ftable!=0);
57871462 3376 emit_writeword(rs,(int)&address);
3377 //emit_shrimm(rs,16,rs);
3378 //emit_movmem_indexedx4(ftable,rs,rs);
3379 if(type==STOREB_STUB)
3380 emit_writebyte(rt,(int)&byte);
3381 if(type==STOREH_STUB)
3382 emit_writehword(rt,(int)&hword);
3383 if(type==STOREW_STUB)
3384 emit_writeword(rt,(int)&word);
3385 if(type==STORED_STUB) {
3d624f89 3386#ifndef FORCE32
57871462 3387 emit_writeword(rt,(int)&dword);
3388 emit_writeword(target?rth:rt,(int)&dword+4);
3d624f89 3389#else
3390 printf("STORED_STUB\n");
3391#endif
57871462 3392 }
3393 //emit_pusha();
3394 save_regs(reglist);
0c1fe38b 3395#ifndef PCSX
3396 // rearmed note: load_all_consts prevents BIOS boot, some bug?
3397 if((signed int)addr>=(signed int)0xC0000000) {
3398 // Theoretically we can have a pagefault here, if the TLB has never
3399 // been enabled and the address is outside the range 80000000..BFFFFFFF
3400 // Write out the registers so the pagefault can be handled. This is
3401 // a very rare case and likely represents a bug.
3402 int ds=regmap!=regs[i].regmap;
3403 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty,i);
3404 if(!ds) wb_dirtys(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty);
3405 else wb_dirtys(branch_regs[i-1].regmap_entry,branch_regs[i-1].was32,branch_regs[i-1].wasdirty);
3406 }
3407#endif
57871462 3408 //emit_shrimm(rs,16,1);
3409 int cc=get_reg(regmap,CCREG);
3410 if(cc<0) {
3411 emit_loadreg(CCREG,2);
3412 }
3413 //emit_movimm(ftable,0);
3414 emit_movimm(((u_int *)ftable)[addr>>16],0);
3415 //emit_readword((int)&last_count,12);
2573466a 3416 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2);
f51dc36c 3417#ifndef PCSX
57871462 3418 if((signed int)addr>=(signed int)0xC0000000) {
3419 // Pagefault address
3420 int ds=regmap!=regs[i].regmap;
3421 emit_movimm(start+i*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
3422 }
f51dc36c 3423#endif
57871462 3424 //emit_add(12,2,2);
3425 //emit_writeword(2,(int)&Count);
3426 //emit_call(((u_int *)ftable)[addr>>16]);
3427 emit_call((int)&indirect_jump);
3428 emit_readword((int)&Count,HOST_TEMPREG);
3429 emit_readword((int)&next_interupt,2);
2573466a 3430 emit_addimm(HOST_TEMPREG,-CLOCK_ADJUST(adj+1),HOST_TEMPREG);
57871462 3431 emit_writeword(2,(int)&last_count);
3432 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
3433 if(cc<0) {
3434 emit_storereg(CCREG,HOST_TEMPREG);
3435 }
3436 //emit_popa();
3437 restore_regs(reglist);
b96d3df7 3438#endif
57871462 3439}
3440
3441do_unalignedwritestub(int n)
3442{
b7918751 3443 assem_debug("do_unalignedwritestub %x\n",start+stubs[n][3]*4);
3444 literal_pool(256);
57871462 3445 set_jump_target(stubs[n][1],(int)out);
b7918751 3446
3447 int i=stubs[n][3];
3448 struct regstat *i_regs=(struct regstat *)stubs[n][4];
3449 int addr=stubs[n][5];
3450 u_int reglist=stubs[n][7];
3451 signed char *i_regmap=i_regs->regmap;
3452 int temp2=get_reg(i_regmap,FTEMP);
3453 int rt;
3454 int ds, real_rs;
3455 rt=get_reg(i_regmap,rs2[i]);
3456 assert(rt>=0);
3457 assert(addr>=0);
3458 assert(opcode[i]==0x2a||opcode[i]==0x2e); // SWL/SWR only implemented
3459 reglist|=(1<<addr);
3460 reglist&=~(1<<temp2);
3461
b96d3df7 3462#if 1
3463 // don't bother with it and call write handler
3464 save_regs(reglist);
3465 pass_args(addr,rt);
3466 int cc=get_reg(i_regmap,CCREG);
3467 if(cc<0)
3468 emit_loadreg(CCREG,2);
2573466a 3469 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n][6]+1),2);
b96d3df7 3470 emit_call((int)(opcode[i]==0x2a?jump_handle_swl:jump_handle_swr));
2573466a 3471 emit_addimm(0,-CLOCK_ADJUST((int)stubs[n][6]+1),cc<0?2:cc);
b96d3df7 3472 if(cc<0)
3473 emit_storereg(CCREG,2);
3474 restore_regs(reglist);
3475 emit_jmp(stubs[n][2]); // return address
3476#else
b7918751 3477 emit_andimm(addr,0xfffffffc,temp2);
3478 emit_writeword(temp2,(int)&address);
3479
3480 save_regs(reglist);
97a238a6 3481#ifndef PCSX
b7918751 3482 ds=i_regs!=&regs[i];
3483 real_rs=get_reg(i_regmap,rs1[i]);
3484 u_int cmask=ds?-1:(0x100f|~i_regs->wasconst);
3485 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&0x100f,i);
3486 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty&cmask&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs)));
3487 if(!ds) wb_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&~0x100f,i);
97a238a6 3488#endif
b7918751 3489 emit_shrimm(addr,16,1);
3490 int cc=get_reg(i_regmap,CCREG);
3491 if(cc<0) {
3492 emit_loadreg(CCREG,2);
3493 }
3494 emit_movimm((u_int)readmem,0);
3495 emit_addimm(cc<0?2:cc,2*stubs[n][6]+2,2);
f51dc36c 3496#ifndef PCSX
3497 // pagefault address
3498 emit_movimm(start+stubs[n][3]*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
3499#endif
b7918751 3500 emit_call((int)&indirect_jump_indexed);
3501 restore_regs(reglist);
3502
3503 emit_readword((int)&readmem_dword,temp2);
3504 int temp=addr; //hmh
3505 emit_shlimm(addr,3,temp);
3506 emit_andimm(temp,24,temp);
3507#ifdef BIG_ENDIAN_MIPS
3508 if (opcode[i]==0x2e) // SWR
3509#else
3510 if (opcode[i]==0x2a) // SWL
3511#endif
3512 emit_xorimm(temp,24,temp);
3513 emit_movimm(-1,HOST_TEMPREG);
55439448 3514 if (opcode[i]==0x2a) { // SWL
b7918751 3515 emit_bic_lsr(temp2,HOST_TEMPREG,temp,temp2);
3516 emit_orrshr(rt,temp,temp2);
3517 }else{
3518 emit_bic_lsl(temp2,HOST_TEMPREG,temp,temp2);
3519 emit_orrshl(rt,temp,temp2);
3520 }
3521 emit_readword((int)&address,addr);
3522 emit_writeword(temp2,(int)&word);
3523 //save_regs(reglist); // don't need to, no state changes
3524 emit_shrimm(addr,16,1);
3525 emit_movimm((u_int)writemem,0);
3526 //emit_call((int)&indirect_jump_indexed);
3527 emit_mov(15,14);
3528 emit_readword_dualindexedx4(0,1,15);
3529 emit_readword((int)&Count,HOST_TEMPREG);
3530 emit_readword((int)&next_interupt,2);
3531 emit_addimm(HOST_TEMPREG,-2*stubs[n][6]-2,HOST_TEMPREG);
3532 emit_writeword(2,(int)&last_count);
3533 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
3534 if(cc<0) {
3535 emit_storereg(CCREG,HOST_TEMPREG);
3536 }
3537 restore_regs(reglist);
57871462 3538 emit_jmp(stubs[n][2]); // return address
b96d3df7 3539#endif
57871462 3540}
3541
3542void printregs(int edi,int esi,int ebp,int esp,int b,int d,int c,int a)
3543{
3544 printf("regs: %x %x %x %x %x %x %x (%x)\n",a,b,c,d,ebp,esi,edi,(&edi)[-1]);
3545}
3546
3547do_invstub(int n)
3548{
3549 literal_pool(20);
3550 u_int reglist=stubs[n][3];
3551 set_jump_target(stubs[n][1],(int)out);
3552 save_regs(reglist);
3553 if(stubs[n][4]!=0) emit_mov(stubs[n][4],0);
3554 emit_call((int)&invalidate_addr);
3555 restore_regs(reglist);
3556 emit_jmp(stubs[n][2]); // return address
3557}
3558
3559int do_dirty_stub(int i)
3560{
3561 assem_debug("do_dirty_stub %x\n",start+i*4);
ac545b3a 3562 u_int addr=(int)start<(int)0xC0000000?(u_int)source:(u_int)start;
3563 #ifdef PCSX
3564 addr=(u_int)source;
3565 #endif
57871462 3566 // Careful about the code output here, verify_dirty needs to parse it.
3567 #ifdef ARMv5_ONLY
ac545b3a 3568 emit_loadlp(addr,1);
57871462 3569 emit_loadlp((int)copy,2);
3570 emit_loadlp(slen*4,3);
3571 #else
ac545b3a 3572 emit_movw(addr&0x0000FFFF,1);
57871462 3573 emit_movw(((u_int)copy)&0x0000FFFF,2);
ac545b3a 3574 emit_movt(addr&0xFFFF0000,1);
57871462 3575 emit_movt(((u_int)copy)&0xFFFF0000,2);
3576 emit_movw(slen*4,3);
3577 #endif
3578 emit_movimm(start+i*4,0);
3579 emit_call((int)start<(int)0xC0000000?(int)&verify_code:(int)&verify_code_vm);
3580 int entry=(int)out;
3581 load_regs_entry(i);
3582 if(entry==(int)out) entry=instr_addr[i];
3583 emit_jmp(instr_addr[i]);
3584 return entry;
3585}
3586
3587void do_dirty_stub_ds()
3588{
3589 // Careful about the code output here, verify_dirty needs to parse it.
3590 #ifdef ARMv5_ONLY
3591 emit_loadlp((int)start<(int)0xC0000000?(int)source:(int)start,1);
3592 emit_loadlp((int)copy,2);
3593 emit_loadlp(slen*4,3);
3594 #else
3595 emit_movw(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0x0000FFFF,1);
3596 emit_movw(((u_int)copy)&0x0000FFFF,2);
3597 emit_movt(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0xFFFF0000,1);
3598 emit_movt(((u_int)copy)&0xFFFF0000,2);
3599 emit_movw(slen*4,3);
3600 #endif
3601 emit_movimm(start+1,0);
3602 emit_call((int)&verify_code_ds);
3603}
3604
3605do_cop1stub(int n)
3606{
3607 literal_pool(256);
3608 assem_debug("do_cop1stub %x\n",start+stubs[n][3]*4);
3609 set_jump_target(stubs[n][1],(int)out);
3610 int i=stubs[n][3];
3d624f89 3611// int rs=stubs[n][4];
57871462 3612 struct regstat *i_regs=(struct regstat *)stubs[n][5];
3613 int ds=stubs[n][6];
3614 if(!ds) {
3615 load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty,i);
3616 //if(i_regs!=&regs[i]) printf("oops: regs[i]=%x i_regs=%x",(int)&regs[i],(int)i_regs);
3617 }
3618 //else {printf("fp exception in delay slot\n");}
3619 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty);
3620 if(regs[i].regmap_entry[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
3621 emit_movimm(start+(i-ds)*4,EAX); // Get PC
2573466a 3622 emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]),HOST_CCREG); // CHECK: is this right? There should probably be an extra cycle...
57871462 3623 emit_jmp(ds?(int)fp_exception_ds:(int)fp_exception);
3624}
3625
63cb0298 3626#ifndef DISABLE_TLB
3627
57871462 3628/* TLB */
3629
3630int do_tlb_r(int s,int ar,int map,int x,int a,int shift,int c,u_int addr)
3631{
3632 if(c) {
3633 if((signed int)addr>=(signed int)0xC0000000) {
3634 // address_generation already loaded the const
3635 emit_readword_dualindexedx4(FP,map,map);
3636 }
3637 else
3638 return -1; // No mapping
3639 }
3640 else {
3641 assert(s!=map);
3642 emit_movimm(((int)memory_map-(int)&dynarec_local)>>2,map);
3643 emit_addsr12(map,s,map);
3644 // Schedule this while we wait on the load
3645 //if(x) emit_xorimm(s,x,ar);
3646 if(shift>=0) emit_shlimm(s,3,shift);
3647 if(~a) emit_andimm(s,a,ar);
3648 emit_readword_dualindexedx4(FP,map,map);
3649 }
3650 return map;
3651}
3652int do_tlb_r_branch(int map, int c, u_int addr, int *jaddr)
3653{
3654 if(!c||(signed int)addr>=(signed int)0xC0000000) {
3655 emit_test(map,map);
3656 *jaddr=(int)out;
3657 emit_js(0);
3658 }
3659 return map;
3660}
3661
3662int gen_tlb_addr_r(int ar, int map) {
3663 if(map>=0) {
3664 assem_debug("add %s,%s,%s lsl #2\n",regname[ar],regname[ar],regname[map]);
3665 output_w32(0xe0800100|rd_rn_rm(ar,ar,map));
3666 }
3667}
3668
3669int do_tlb_w(int s,int ar,int map,int x,int c,u_int addr)
3670{
3671 if(c) {
3672 if(addr<0x80800000||addr>=0xC0000000) {
3673 // address_generation already loaded the const
3674 emit_readword_dualindexedx4(FP,map,map);
3675 }
3676 else
3677 return -1; // No mapping
3678 }
3679 else {
3680 assert(s!=map);
3681 emit_movimm(((int)memory_map-(int)&dynarec_local)>>2,map);
3682 emit_addsr12(map,s,map);
3683 // Schedule this while we wait on the load
3684 //if(x) emit_xorimm(s,x,ar);
3685 emit_readword_dualindexedx4(FP,map,map);
3686 }
3687 return map;
3688}
3689int do_tlb_w_branch(int map, int c, u_int addr, int *jaddr)
3690{
3691 if(!c||addr<0x80800000||addr>=0xC0000000) {
3692 emit_testimm(map,0x40000000);
3693 *jaddr=(int)out;
3694 emit_jne(0);
3695 }
3696}
3697
3698int gen_tlb_addr_w(int ar, int map) {
3699 if(map>=0) {
3700 assem_debug("add %s,%s,%s lsl #2\n",regname[ar],regname[ar],regname[map]);
3701 output_w32(0xe0800100|rd_rn_rm(ar,ar,map));
3702 }
3703}
3704
3705// Generate the address of the memory_map entry, relative to dynarec_local
3706generate_map_const(u_int addr,int reg) {
3707 //printf("generate_map_const(%x,%s)\n",addr,regname[reg]);
3708 emit_movimm((addr>>12)+(((u_int)memory_map-(u_int)&dynarec_local)>>2),reg);
3709}
3710
63cb0298 3711#else
3712
3713static int do_tlb_r() { return 0; }
3714static int do_tlb_r_branch() { return 0; }
3715static int gen_tlb_addr_r() { return 0; }
3716static int do_tlb_w() { return 0; }
3717static int do_tlb_w_branch() { return 0; }
3718static int gen_tlb_addr_w() { return 0; }
3719
3720#endif // DISABLE_TLB
3721
57871462 3722/* Special assem */
3723
3724void shift_assemble_arm(int i,struct regstat *i_regs)
3725{
3726 if(rt1[i]) {
3727 if(opcode2[i]<=0x07) // SLLV/SRLV/SRAV
3728 {
3729 signed char s,t,shift;
3730 t=get_reg(i_regs->regmap,rt1[i]);
3731 s=get_reg(i_regs->regmap,rs1[i]);
3732 shift=get_reg(i_regs->regmap,rs2[i]);
3733 if(t>=0){
3734 if(rs1[i]==0)
3735 {
3736 emit_zeroreg(t);
3737 }
3738 else if(rs2[i]==0)
3739 {
3740 assert(s>=0);
3741 if(s!=t) emit_mov(s,t);
3742 }
3743 else
3744 {
3745 emit_andimm(shift,31,HOST_TEMPREG);
3746 if(opcode2[i]==4) // SLLV
3747 {
3748 emit_shl(s,HOST_TEMPREG,t);
3749 }
3750 if(opcode2[i]==6) // SRLV
3751 {
3752 emit_shr(s,HOST_TEMPREG,t);
3753 }
3754 if(opcode2[i]==7) // SRAV
3755 {
3756 emit_sar(s,HOST_TEMPREG,t);
3757 }
3758 }
3759 }
3760 } else { // DSLLV/DSRLV/DSRAV
3761 signed char sh,sl,th,tl,shift;
3762 th=get_reg(i_regs->regmap,rt1[i]|64);
3763 tl=get_reg(i_regs->regmap,rt1[i]);
3764 sh=get_reg(i_regs->regmap,rs1[i]|64);
3765 sl=get_reg(i_regs->regmap,rs1[i]);
3766 shift=get_reg(i_regs->regmap,rs2[i]);
3767 if(tl>=0){
3768 if(rs1[i]==0)
3769 {
3770 emit_zeroreg(tl);
3771 if(th>=0) emit_zeroreg(th);
3772 }
3773 else if(rs2[i]==0)
3774 {
3775 assert(sl>=0);
3776 if(sl!=tl) emit_mov(sl,tl);
3777 if(th>=0&&sh!=th) emit_mov(sh,th);
3778 }
3779 else
3780 {
3781 // FIXME: What if shift==tl ?
3782 assert(shift!=tl);
3783 int temp=get_reg(i_regs->regmap,-1);
3784 int real_th=th;
3785 if(th<0&&opcode2[i]!=0x14) {th=temp;} // DSLLV doesn't need a temporary register
3786 assert(sl>=0);
3787 assert(sh>=0);
3788 emit_andimm(shift,31,HOST_TEMPREG);
3789 if(opcode2[i]==0x14) // DSLLV
3790 {
3791 if(th>=0) emit_shl(sh,HOST_TEMPREG,th);
3792 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3793 emit_orrshr(sl,HOST_TEMPREG,th);
3794 emit_andimm(shift,31,HOST_TEMPREG);
3795 emit_testimm(shift,32);
3796 emit_shl(sl,HOST_TEMPREG,tl);
3797 if(th>=0) emit_cmovne_reg(tl,th);
3798 emit_cmovne_imm(0,tl);
3799 }
3800 if(opcode2[i]==0x16) // DSRLV
3801 {
3802 assert(th>=0);
3803 emit_shr(sl,HOST_TEMPREG,tl);
3804 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3805 emit_orrshl(sh,HOST_TEMPREG,tl);
3806 emit_andimm(shift,31,HOST_TEMPREG);
3807 emit_testimm(shift,32);
3808 emit_shr(sh,HOST_TEMPREG,th);
3809 emit_cmovne_reg(th,tl);
3810 if(real_th>=0) emit_cmovne_imm(0,th);
3811 }
3812 if(opcode2[i]==0x17) // DSRAV
3813 {
3814 assert(th>=0);
3815 emit_shr(sl,HOST_TEMPREG,tl);
3816 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3817 if(real_th>=0) {
3818 assert(temp>=0);
3819 emit_sarimm(th,31,temp);
3820 }
3821 emit_orrshl(sh,HOST_TEMPREG,tl);
3822 emit_andimm(shift,31,HOST_TEMPREG);
3823 emit_testimm(shift,32);
3824 emit_sar(sh,HOST_TEMPREG,th);
3825 emit_cmovne_reg(th,tl);
3826 if(real_th>=0) emit_cmovne_reg(temp,th);
3827 }
3828 }
3829 }
3830 }
3831 }
3832}
ffb0b9e0 3833
3834#ifdef PCSX
3835static void speculate_mov(int rs,int rt)
3836{
3837 if(rt!=0) {
3838 smrv_strong_next|=1<<rt;
3839 smrv[rt]=smrv[rs];
3840 }
3841}
3842
3843static void speculate_mov_weak(int rs,int rt)
3844{
3845 if(rt!=0) {
3846 smrv_weak_next|=1<<rt;
3847 smrv[rt]=smrv[rs];
3848 }
3849}
3850
3851static void speculate_register_values(int i)
3852{
3853 if(i==0) {
3854 memcpy(smrv,psxRegs.GPR.r,sizeof(smrv));
3855 // gp,sp are likely to stay the same throughout the block
3856 smrv_strong_next=(1<<28)|(1<<29)|(1<<30);
3857 smrv_weak_next=~smrv_strong_next;
3858 //printf(" llr %08x\n", smrv[4]);
3859 }
3860 smrv_strong=smrv_strong_next;
3861 smrv_weak=smrv_weak_next;
3862 switch(itype[i]) {
3863 case ALU:
3864 if ((smrv_strong>>rs1[i])&1) speculate_mov(rs1[i],rt1[i]);
3865 else if((smrv_strong>>rs2[i])&1) speculate_mov(rs2[i],rt1[i]);
3866 else if((smrv_weak>>rs1[i])&1) speculate_mov_weak(rs1[i],rt1[i]);
3867 else if((smrv_weak>>rs2[i])&1) speculate_mov_weak(rs2[i],rt1[i]);
3868 else {
3869 smrv_strong_next&=~(1<<rt1[i]);
3870 smrv_weak_next&=~(1<<rt1[i]);
3871 }
3872 break;
3873 case SHIFTIMM:
3874 smrv_strong_next&=~(1<<rt1[i]);
3875 smrv_weak_next&=~(1<<rt1[i]);
3876 // fallthrough
3877 case IMM16:
3878 if(rt1[i]&&is_const(&regs[i],rt1[i])) {
3879 int value,hr=get_reg(regs[i].regmap,rt1[i]);
3880 if(hr>=0) {
3881 if(get_final_value(hr,i,&value))
3882 smrv[rt1[i]]=value;
3883 else smrv[rt1[i]]=constmap[i][hr];
3884 smrv_strong_next|=1<<rt1[i];
3885 }
3886 }
3887 else {
3888 if ((smrv_strong>>rs1[i])&1) speculate_mov(rs1[i],rt1[i]);
3889 else if((smrv_weak>>rs1[i])&1) speculate_mov_weak(rs1[i],rt1[i]);
3890 }
3891 break;
3892 case LOAD:
3893 if(start<0x2000&&(rt1[i]==26||(smrv[rt1[i]]>>24)==0xa0)) {
3894 // special case for BIOS
3895 smrv[rt1[i]]=0xa0000000;
3896 smrv_strong_next|=1<<rt1[i];
3897 break;
3898 }
3899 // fallthrough
3900 case SHIFT:
3901 case LOADLR:
3902 case MOV:
3903 smrv_strong_next&=~(1<<rt1[i]);
3904 smrv_weak_next&=~(1<<rt1[i]);
3905 break;
3906 case COP0:
3907 case COP2:
3908 if(opcode2[i]==0||opcode2[i]==2) { // MFC/CFC
3909 smrv_strong_next&=~(1<<rt1[i]);
3910 smrv_weak_next&=~(1<<rt1[i]);
3911 }
3912 break;
3913 case C2LS:
3914 if (opcode[i]==0x32) { // LWC2
3915 smrv_strong_next&=~(1<<rt1[i]);
3916 smrv_weak_next&=~(1<<rt1[i]);
3917 }
3918 break;
3919 }
3920#if 0
3921 int r=4;
3922 printf("x %08x %08x %d %d c %08x %08x\n",smrv[r],start+i*4,
3923 ((smrv_strong>>r)&1),(smrv_weak>>r)&1,regs[i].isconst,regs[i].wasconst);
3924#endif
3925}
3926
3927enum {
3928 MTYPE_8000 = 0,
3929 MTYPE_8020,
3930 MTYPE_0000,
3931 MTYPE_A000,
3932 MTYPE_1F80,
3933};
3934
3935static int get_ptr_mem_type(u_int a)
3936{
3937 if(a < 0x00200000) {
3938 if(a<0x1000&&((start>>20)==0xbfc||(start>>24)==0xa0))
3939 // return wrong, must use memhandler for BIOS self-test to pass
3940 // 007 does similar stuff from a00 mirror, weird stuff
3941 return MTYPE_8000;
3942 return MTYPE_0000;
3943 }
3944 if(0x1f800000 <= a && a < 0x1f801000)
3945 return MTYPE_1F80;
3946 if(0x80200000 <= a && a < 0x80800000)
3947 return MTYPE_8020;
3948 if(0xa0000000 <= a && a < 0xa0200000)
3949 return MTYPE_A000;
3950 return MTYPE_8000;
3951}
3952#endif
3953
3954static int emit_fastpath_cmp_jump(int i,int addr,int *addr_reg_override)
3955{
3956 int jaddr,type=0;
3957
3958#ifdef PCSX
3959 int mr=rs1[i];
3960 if(((smrv_strong|smrv_weak)>>mr)&1) {
3961 type=get_ptr_mem_type(smrv[mr]);
3962 //printf("set %08x @%08x r%d %d\n", smrv[mr], start+i*4, mr, type);
3963 }
3964 else {
3965 // use the mirror we are running on
3966 type=get_ptr_mem_type(start);
3967 //printf("set nospec @%08x r%d %d\n", start+i*4, mr, type);
3968 }
3969
3970 if(type==MTYPE_8020) { // RAM 80200000+ mirror
3971 emit_andimm(addr,~0x00e00000,HOST_TEMPREG);
3972 addr=*addr_reg_override=HOST_TEMPREG;
3973 type=0;
3974 }
3975 else if(type==MTYPE_0000) { // RAM 0 mirror
3976 emit_orimm(addr,0x80000000,HOST_TEMPREG);
3977 addr=*addr_reg_override=HOST_TEMPREG;
3978 type=0;
3979 }
3980 else if(type==MTYPE_A000) { // RAM A mirror
3981 emit_andimm(addr,~0x20000000,HOST_TEMPREG);
3982 addr=*addr_reg_override=HOST_TEMPREG;
3983 type=0;
3984 }
3985 else if(type==MTYPE_1F80) { // scratchpad
3986 emit_addimm(addr,-0x1f800000,HOST_TEMPREG);
3987 emit_cmpimm(HOST_TEMPREG,0x1000);
3988 jaddr=(int)out;
3989 emit_jc(0);
3990 }
3991#endif
3992
3993 if(type==0)
3994 {
3995 emit_cmpimm(addr,RAM_SIZE);
3996 jaddr=(int)out;
3997 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
3998 // Hint to branch predictor that the branch is unlikely to be taken
3999 if(rs1[i]>=28)
4000 emit_jno_unlikely(0);
4001 else
4002 #endif
4003 emit_jno(0);
a327ad27 4004 if(ram_offset!=0) {
4005 emit_addimm(addr,ram_offset,HOST_TEMPREG);
4006 addr=*addr_reg_override=HOST_TEMPREG;
4007 }
ffb0b9e0 4008 }
4009
4010 return jaddr;
4011}
4012
57871462 4013#define shift_assemble shift_assemble_arm
4014
4015void loadlr_assemble_arm(int i,struct regstat *i_regs)
4016{
4017 int s,th,tl,temp,temp2,addr,map=-1;
4018 int offset;
4019 int jaddr=0;
af4ee1fe 4020 int memtarget=0,c=0;
ffb0b9e0 4021 int fastload_reg_override=0;
57871462 4022 u_int hr,reglist=0;
4023 th=get_reg(i_regs->regmap,rt1[i]|64);
4024 tl=get_reg(i_regs->regmap,rt1[i]);
4025 s=get_reg(i_regs->regmap,rs1[i]);
4026 temp=get_reg(i_regs->regmap,-1);
4027 temp2=get_reg(i_regs->regmap,FTEMP);
4028 addr=get_reg(i_regs->regmap,AGEN1+(i&1));
4029 assert(addr<0);
4030 offset=imm[i];
4031 for(hr=0;hr<HOST_REGS;hr++) {
4032 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
4033 }
4034 reglist|=1<<temp;
4035 if(offset||s<0||c) addr=temp2;
4036 else addr=s;
4037 if(s>=0) {
4038 c=(i_regs->wasconst>>s)&1;
af4ee1fe 4039 if(c) {
4040 memtarget=((signed int)(constmap[i][s]+offset))<(signed int)0x80000000+RAM_SIZE;
4041 if(using_tlb&&((signed int)(constmap[i][s]+offset))>=(signed int)0xC0000000) memtarget=1;
4042 }
57871462 4043 }
535d208a 4044 if(!using_tlb) {
4045 if(!c) {
4046 #ifdef RAM_OFFSET
4047 map=get_reg(i_regs->regmap,ROREG);
4048 if(map<0) emit_loadreg(ROREG,map=HOST_TEMPREG);
4049 #endif
4050 emit_shlimm(addr,3,temp);
4051 if (opcode[i]==0x22||opcode[i]==0x26) {
4052 emit_andimm(addr,0xFFFFFFFC,temp2); // LWL/LWR
57871462 4053 }else{
535d208a 4054 emit_andimm(addr,0xFFFFFFF8,temp2); // LDL/LDR
57871462 4055 }
ffb0b9e0 4056 jaddr=emit_fastpath_cmp_jump(i,temp2,&fastload_reg_override);
535d208a 4057 }
4058 else {
a327ad27 4059 if(ram_offset&&memtarget) {
4060 emit_addimm(temp2,ram_offset,HOST_TEMPREG);
4061 fastload_reg_override=HOST_TEMPREG;
4062 }
535d208a 4063 if (opcode[i]==0x22||opcode[i]==0x26) {
4064 emit_movimm(((constmap[i][s]+offset)<<3)&24,temp); // LWL/LWR
4065 }else{
4066 emit_movimm(((constmap[i][s]+offset)<<3)&56,temp); // LDL/LDR
57871462 4067 }
57871462 4068 }
535d208a 4069 }else{ // using tlb
4070 int a;
4071 if(c) {
4072 a=-1;
4073 }else if (opcode[i]==0x22||opcode[i]==0x26) {
4074 a=0xFFFFFFFC; // LWL/LWR
4075 }else{
4076 a=0xFFFFFFF8; // LDL/LDR
4077 }
4078 map=get_reg(i_regs->regmap,TLREG);
4079 assert(map>=0);
ea3d2e6e 4080 reglist&=~(1<<map);
535d208a 4081 map=do_tlb_r(addr,temp2,map,0,a,c?-1:temp,c,constmap[i][s]+offset);
4082 if(c) {
4083 if (opcode[i]==0x22||opcode[i]==0x26) {
4084 emit_movimm(((constmap[i][s]+offset)<<3)&24,temp); // LWL/LWR
4085 }else{
4086 emit_movimm(((constmap[i][s]+offset)<<3)&56,temp); // LDL/LDR
57871462 4087 }
535d208a 4088 }
4089 do_tlb_r_branch(map,c,constmap[i][s]+offset,&jaddr);
4090 }
4091 if (opcode[i]==0x22||opcode[i]==0x26) { // LWL/LWR
4092 if(!c||memtarget) {
ffb0b9e0 4093 int a=temp2;
4094 if(fastload_reg_override) a=fastload_reg_override;
535d208a 4095 //emit_readword_indexed((int)rdram-0x80000000,temp2,temp2);
ffb0b9e0 4096 emit_readword_indexed_tlb(0,a,map,temp2);
535d208a 4097 if(jaddr) add_stub(LOADW_STUB,jaddr,(int)out,i,temp2,(int)i_regs,ccadj[i],reglist);
4098 }
4099 else
4100 inline_readstub(LOADW_STUB,i,(constmap[i][s]+offset)&0xFFFFFFFC,i_regs->regmap,FTEMP,ccadj[i],reglist);
4101 if(rt1[i]) {
4102 assert(tl>=0);
57871462 4103 emit_andimm(temp,24,temp);
2002a1db 4104#ifdef BIG_ENDIAN_MIPS
4105 if (opcode[i]==0x26) // LWR
4106#else
4107 if (opcode[i]==0x22) // LWL
4108#endif
4109 emit_xorimm(temp,24,temp);
57871462 4110 emit_movimm(-1,HOST_TEMPREG);
4111 if (opcode[i]==0x26) {
4112 emit_shr(temp2,temp,temp2);
4113 emit_bic_lsr(tl,HOST_TEMPREG,temp,tl);
4114 }else{
4115 emit_shl(temp2,temp,temp2);
4116 emit_bic_lsl(tl,HOST_TEMPREG,temp,tl);
4117 }
4118 emit_or(temp2,tl,tl);
57871462 4119 }
535d208a 4120 //emit_storereg(rt1[i],tl); // DEBUG
4121 }
4122 if (opcode[i]==0x1A||opcode[i]==0x1B) { // LDL/LDR
ffb0b9e0 4123 // FIXME: little endian, fastload_reg_override
535d208a 4124 int temp2h=get_reg(i_regs->regmap,FTEMP|64);
4125 if(!c||memtarget) {
4126 //if(th>=0) emit_readword_indexed((int)rdram-0x80000000,temp2,temp2h);
4127 //emit_readword_indexed((int)rdram-0x7FFFFFFC,temp2,temp2);
4128 emit_readdword_indexed_tlb(0,temp2,map,temp2h,temp2);
4129 if(jaddr) add_stub(LOADD_STUB,jaddr,(int)out,i,temp2,(int)i_regs,ccadj[i],reglist);
4130 }
4131 else
4132 inline_readstub(LOADD_STUB,i,(constmap[i][s]+offset)&0xFFFFFFF8,i_regs->regmap,FTEMP,ccadj[i],reglist);
4133 if(rt1[i]) {
4134 assert(th>=0);
4135 assert(tl>=0);
57871462 4136 emit_testimm(temp,32);
4137 emit_andimm(temp,24,temp);
4138 if (opcode[i]==0x1A) { // LDL
4139 emit_rsbimm(temp,32,HOST_TEMPREG);
4140 emit_shl(temp2h,temp,temp2h);
4141 emit_orrshr(temp2,HOST_TEMPREG,temp2h);
4142 emit_movimm(-1,HOST_TEMPREG);
4143 emit_shl(temp2,temp,temp2);
4144 emit_cmove_reg(temp2h,th);
4145 emit_biceq_lsl(tl,HOST_TEMPREG,temp,tl);
4146 emit_bicne_lsl(th,HOST_TEMPREG,temp,th);
4147 emit_orreq(temp2,tl,tl);
4148 emit_orrne(temp2,th,th);
4149 }
4150 if (opcode[i]==0x1B) { // LDR
4151 emit_xorimm(temp,24,temp);
4152 emit_rsbimm(temp,32,HOST_TEMPREG);
4153 emit_shr(temp2,temp,temp2);
4154 emit_orrshl(temp2h,HOST_TEMPREG,temp2);
4155 emit_movimm(-1,HOST_TEMPREG);
4156 emit_shr(temp2h,temp,temp2h);
4157 emit_cmovne_reg(temp2,tl);
4158 emit_bicne_lsr(th,HOST_TEMPREG,temp,th);
4159 emit_biceq_lsr(tl,HOST_TEMPREG,temp,tl);
4160 emit_orrne(temp2h,th,th);
4161 emit_orreq(temp2h,tl,tl);
4162 }
4163 }
4164 }
4165}
4166#define loadlr_assemble loadlr_assemble_arm
4167
4168void cop0_assemble(int i,struct regstat *i_regs)
4169{
4170 if(opcode2[i]==0) // MFC0
4171 {
4172 signed char t=get_reg(i_regs->regmap,rt1[i]);
4173 char copr=(source[i]>>11)&0x1f;
4174 //assert(t>=0); // Why does this happen? OOT is weird
f1b3b369 4175 if(t>=0&&rt1[i]!=0) {
7139f3c8 4176#ifdef MUPEN64
57871462 4177 emit_addimm(FP,(int)&fake_pc-(int)&dynarec_local,0);
4178 emit_movimm((source[i]>>11)&0x1f,1);
4179 emit_writeword(0,(int)&PC);
4180 emit_writebyte(1,(int)&(fake_pc.f.r.nrd));
4181 if(copr==9) {
4182 emit_readword((int)&last_count,ECX);
4183 emit_loadreg(CCREG,HOST_CCREG); // TODO: do proper reg alloc
4184 emit_add(HOST_CCREG,ECX,HOST_CCREG);
2573466a 4185 emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]),HOST_CCREG);
57871462 4186 emit_writeword(HOST_CCREG,(int)&Count);
4187 }
4188 emit_call((int)MFC0);
4189 emit_readword((int)&readmem_dword,t);
7139f3c8 4190#else
4191 emit_readword((int)&reg_cop0+copr*4,t);
4192#endif
57871462 4193 }
4194 }
4195 else if(opcode2[i]==4) // MTC0
4196 {
4197 signed char s=get_reg(i_regs->regmap,rs1[i]);
4198 char copr=(source[i]>>11)&0x1f;
4199 assert(s>=0);
63cb0298 4200#ifdef MUPEN64
57871462 4201 emit_writeword(s,(int)&readmem_dword);
4202 wb_register(rs1[i],i_regs->regmap,i_regs->dirty,i_regs->is32);
4203 emit_addimm(FP,(int)&fake_pc-(int)&dynarec_local,0);
4204 emit_movimm((source[i]>>11)&0x1f,1);
4205 emit_writeword(0,(int)&PC);
4206 emit_writebyte(1,(int)&(fake_pc.f.r.nrd));
63cb0298 4207#else
4208 wb_register(rs1[i],i_regs->regmap,i_regs->dirty,i_regs->is32);
7139f3c8 4209#endif
4210 if(copr==9||copr==11||copr==12||copr==13) {
63cb0298 4211 emit_readword((int)&last_count,HOST_TEMPREG);
57871462 4212 emit_loadreg(CCREG,HOST_CCREG); // TODO: do proper reg alloc
63cb0298 4213 emit_add(HOST_CCREG,HOST_TEMPREG,HOST_CCREG);
2573466a 4214 emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]),HOST_CCREG);
57871462 4215 emit_writeword(HOST_CCREG,(int)&Count);
4216 }
4217 // What a mess. The status register (12) can enable interrupts,
4218 // so needs a special case to handle a pending interrupt.
4219 // The interrupt must be taken immediately, because a subsequent
4220 // instruction might disable interrupts again.
7139f3c8 4221 if(copr==12||copr==13) {
fca1aef2 4222#ifdef PCSX
4223 if (is_delayslot) {
4224 // burn cycles to cause cc_interrupt, which will
4225 // reschedule next_interupt. Relies on CCREG from above.
4226 assem_debug("MTC0 DS %d\n", copr);
4227 emit_writeword(HOST_CCREG,(int)&last_count);
4228 emit_movimm(0,HOST_CCREG);
4229 emit_storereg(CCREG,HOST_CCREG);
caeefe31 4230 emit_loadreg(rs1[i],1);
fca1aef2 4231 emit_movimm(copr,0);
4232 emit_call((int)pcsx_mtc0_ds);
042c7287 4233 emit_loadreg(rs1[i],s);
fca1aef2 4234 return;
4235 }
4236#endif
63cb0298 4237 emit_movimm(start+i*4+4,HOST_TEMPREG);
4238 emit_writeword(HOST_TEMPREG,(int)&pcaddr);
4239 emit_movimm(0,HOST_TEMPREG);
4240 emit_writeword(HOST_TEMPREG,(int)&pending_exception);
57871462 4241 }
4242 //else if(copr==12&&is_delayslot) emit_call((int)MTC0_R12);
4243 //else
fca1aef2 4244#ifdef PCSX
caeefe31 4245 if(s==HOST_CCREG)
4246 emit_loadreg(rs1[i],1);
4247 else if(s!=1)
63cb0298 4248 emit_mov(s,1);
fca1aef2 4249 emit_movimm(copr,0);
4250 emit_call((int)pcsx_mtc0);
4251#else
57871462 4252 emit_call((int)MTC0);
fca1aef2 4253#endif
7139f3c8 4254 if(copr==9||copr==11||copr==12||copr==13) {
57871462 4255 emit_readword((int)&Count,HOST_CCREG);
042c7287 4256 emit_readword((int)&next_interupt,HOST_TEMPREG);
2573466a 4257 emit_addimm(HOST_CCREG,-CLOCK_ADJUST(ccadj[i]),HOST_CCREG);
042c7287 4258 emit_sub(HOST_CCREG,HOST_TEMPREG,HOST_CCREG);
4259 emit_writeword(HOST_TEMPREG,(int)&last_count);
57871462 4260 emit_storereg(CCREG,HOST_CCREG);
4261 }
7139f3c8 4262 if(copr==12||copr==13) {
57871462 4263 assert(!is_delayslot);
4264 emit_readword((int)&pending_exception,14);
042c7287 4265 emit_test(14,14);
4266 emit_jne((int)&do_interrupt);
57871462 4267 }
4268 emit_loadreg(rs1[i],s);
4269 if(get_reg(i_regs->regmap,rs1[i]|64)>=0)
4270 emit_loadreg(rs1[i]|64,get_reg(i_regs->regmap,rs1[i]|64));
57871462 4271 cop1_usable=0;
4272 }
4273 else
4274 {
4275 assert(opcode2[i]==0x10);
3d624f89 4276#ifndef DISABLE_TLB
57871462 4277 if((source[i]&0x3f)==0x01) // TLBR
4278 emit_call((int)TLBR);
4279 if((source[i]&0x3f)==0x02) // TLBWI
4280 emit_call((int)TLBWI_new);
4281 if((source[i]&0x3f)==0x06) { // TLBWR
4282 // The TLB entry written by TLBWR is dependent on the count,
4283 // so update the cycle count
4284 emit_readword((int)&last_count,ECX);
4285 if(i_regs->regmap[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
4286 emit_add(HOST_CCREG,ECX,HOST_CCREG);
2573466a 4287 emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]),HOST_CCREG);
57871462 4288 emit_writeword(HOST_CCREG,(int)&Count);
4289 emit_call((int)TLBWR_new);
4290 }
4291 if((source[i]&0x3f)==0x08) // TLBP
4292 emit_call((int)TLBP);
3d624f89 4293#endif
576bbd8f 4294#ifdef PCSX
4295 if((source[i]&0x3f)==0x10) // RFE
4296 {
4297 emit_readword((int)&Status,0);
4298 emit_andimm(0,0x3c,1);
4299 emit_andimm(0,~0xf,0);
4300 emit_orrshr_imm(1,2,0);
4301 emit_writeword(0,(int)&Status);
4302 }
4303#else
57871462 4304 if((source[i]&0x3f)==0x18) // ERET
4305 {
4306 int count=ccadj[i];
4307 if(i_regs->regmap[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
2573466a 4308 emit_addimm(HOST_CCREG,CLOCK_ADJUST(count),HOST_CCREG); // TODO: Should there be an extra cycle here?
57871462 4309 emit_jmp((int)jump_eret);
4310 }
576bbd8f 4311#endif
57871462 4312 }
4313}
4314
b9b61529 4315static void cop2_get_dreg(u_int copr,signed char tl,signed char temp)
4316{
4317 switch (copr) {
4318 case 1:
4319 case 3:
4320 case 5:
4321 case 8:
4322 case 9:
4323 case 10:
4324 case 11:
4325 emit_readword((int)&reg_cop2d[copr],tl);
4326 emit_signextend16(tl,tl);
4327 emit_writeword(tl,(int)&reg_cop2d[copr]); // hmh
4328 break;
4329 case 7:
4330 case 16:
4331 case 17:
4332 case 18:
4333 case 19:
4334 emit_readword((int)&reg_cop2d[copr],tl);
4335 emit_andimm(tl,0xffff,tl);
4336 emit_writeword(tl,(int)&reg_cop2d[copr]);
4337 break;
4338 case 15:
4339 emit_readword((int)&reg_cop2d[14],tl); // SXY2
4340 emit_writeword(tl,(int)&reg_cop2d[copr]);
4341 break;
4342 case 28:
b9b61529 4343 case 29:
4344 emit_readword((int)&reg_cop2d[9],temp);
4345 emit_testimm(temp,0x8000); // do we need this?
4346 emit_andimm(temp,0xf80,temp);
4347 emit_andne_imm(temp,0,temp);
f70d384d 4348 emit_shrimm(temp,7,tl);
b9b61529 4349 emit_readword((int)&reg_cop2d[10],temp);
4350 emit_testimm(temp,0x8000);
4351 emit_andimm(temp,0xf80,temp);
4352 emit_andne_imm(temp,0,temp);
f70d384d 4353 emit_orrshr_imm(temp,2,tl);
b9b61529 4354 emit_readword((int)&reg_cop2d[11],temp);
4355 emit_testimm(temp,0x8000);
4356 emit_andimm(temp,0xf80,temp);
4357 emit_andne_imm(temp,0,temp);
f70d384d 4358 emit_orrshl_imm(temp,3,tl);
b9b61529 4359 emit_writeword(tl,(int)&reg_cop2d[copr]);
4360 break;
4361 default:
4362 emit_readword((int)&reg_cop2d[copr],tl);
4363 break;
4364 }
4365}
4366
4367static void cop2_put_dreg(u_int copr,signed char sl,signed char temp)
4368{
4369 switch (copr) {
4370 case 15:
4371 emit_readword((int)&reg_cop2d[13],temp); // SXY1
4372 emit_writeword(sl,(int)&reg_cop2d[copr]);
4373 emit_writeword(temp,(int)&reg_cop2d[12]); // SXY0
4374 emit_readword((int)&reg_cop2d[14],temp); // SXY2
4375 emit_writeword(sl,(int)&reg_cop2d[14]);
4376 emit_writeword(temp,(int)&reg_cop2d[13]); // SXY1
4377 break;
4378 case 28:
4379 emit_andimm(sl,0x001f,temp);
f70d384d 4380 emit_shlimm(temp,7,temp);
b9b61529 4381 emit_writeword(temp,(int)&reg_cop2d[9]);
4382 emit_andimm(sl,0x03e0,temp);
f70d384d 4383 emit_shlimm(temp,2,temp);
b9b61529 4384 emit_writeword(temp,(int)&reg_cop2d[10]);
4385 emit_andimm(sl,0x7c00,temp);
f70d384d 4386 emit_shrimm(temp,3,temp);
b9b61529 4387 emit_writeword(temp,(int)&reg_cop2d[11]);
4388 emit_writeword(sl,(int)&reg_cop2d[28]);
4389 break;
4390 case 30:
4391 emit_movs(sl,temp);
4392 emit_mvnmi(temp,temp);
4393 emit_clz(temp,temp);
4394 emit_writeword(sl,(int)&reg_cop2d[30]);
4395 emit_writeword(temp,(int)&reg_cop2d[31]);
4396 break;
b9b61529 4397 case 31:
4398 break;
4399 default:
4400 emit_writeword(sl,(int)&reg_cop2d[copr]);
4401 break;
4402 }
4403}
4404
4405void cop2_assemble(int i,struct regstat *i_regs)
4406{
4407 u_int copr=(source[i]>>11)&0x1f;
4408 signed char temp=get_reg(i_regs->regmap,-1);
4409 if (opcode2[i]==0) { // MFC2
4410 signed char tl=get_reg(i_regs->regmap,rt1[i]);
f1b3b369 4411 if(tl>=0&&rt1[i]!=0)
b9b61529 4412 cop2_get_dreg(copr,tl,temp);
4413 }
4414 else if (opcode2[i]==4) { // MTC2
4415 signed char sl=get_reg(i_regs->regmap,rs1[i]);
4416 cop2_put_dreg(copr,sl,temp);
4417 }
4418 else if (opcode2[i]==2) // CFC2
4419 {
4420 signed char tl=get_reg(i_regs->regmap,rt1[i]);
f1b3b369 4421 if(tl>=0&&rt1[i]!=0)
b9b61529 4422 emit_readword((int)&reg_cop2c[copr],tl);
4423 }
4424 else if (opcode2[i]==6) // CTC2
4425 {
4426 signed char sl=get_reg(i_regs->regmap,rs1[i]);
4427 switch(copr) {
4428 case 4:
4429 case 12:
4430 case 20:
4431 case 26:
4432 case 27:
4433 case 29:
4434 case 30:
4435 emit_signextend16(sl,temp);
4436 break;
4437 case 31:
4438 //value = value & 0x7ffff000;
4439 //if (value & 0x7f87e000) value |= 0x80000000;
4440 emit_shrimm(sl,12,temp);
4441 emit_shlimm(temp,12,temp);
4442 emit_testimm(temp,0x7f000000);
4443 emit_testeqimm(temp,0x00870000);
4444 emit_testeqimm(temp,0x0000e000);
4445 emit_orrne_imm(temp,0x80000000,temp);
4446 break;
4447 default:
4448 temp=sl;
4449 break;
4450 }
4451 emit_writeword(temp,(int)&reg_cop2c[copr]);
4452 assert(sl>=0);
4453 }
4454}
4455
054175e9 4456static void c2op_prologue(u_int op,u_int reglist)
4457{
4458 save_regs_all(reglist);
82ed88eb 4459#ifdef PCNT
4460 emit_movimm(op,0);
4461 emit_call((int)pcnt_gte_start);
4462#endif
054175e9 4463 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0); // cop2 regs
4464}
4465
4466static void c2op_epilogue(u_int op,u_int reglist)
4467{
82ed88eb 4468#ifdef PCNT
4469 emit_movimm(op,0);
4470 emit_call((int)pcnt_gte_end);
4471#endif
054175e9 4472 restore_regs_all(reglist);
4473}
4474
6c0eefaf 4475static void c2op_call_MACtoIR(int lm,int need_flags)
4476{
4477 if(need_flags)
4478 emit_call((int)(lm?gteMACtoIR_lm1:gteMACtoIR_lm0));
4479 else
4480 emit_call((int)(lm?gteMACtoIR_lm1_nf:gteMACtoIR_lm0_nf));
4481}
4482
4483static void c2op_call_rgb_func(void *func,int lm,int need_ir,int need_flags)
4484{
4485 emit_call((int)func);
4486 // func is C code and trashes r0
4487 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
4488 if(need_flags||need_ir)
4489 c2op_call_MACtoIR(lm,need_flags);
4490 emit_call((int)(need_flags?gteMACtoRGB:gteMACtoRGB_nf));
4491}
4492
054175e9 4493static void c2op_assemble(int i,struct regstat *i_regs)
b9b61529 4494{
4495 signed char temp=get_reg(i_regs->regmap,-1);
4496 u_int c2op=source[i]&0x3f;
6c0eefaf 4497 u_int hr,reglist_full=0,reglist;
054175e9 4498 int need_flags,need_ir;
b9b61529 4499 for(hr=0;hr<HOST_REGS;hr++) {
6c0eefaf 4500 if(i_regs->regmap[hr]>=0) reglist_full|=1<<hr;
b9b61529 4501 }
6c0eefaf 4502 reglist=reglist_full&0x100f;
b9b61529 4503
4504 if (gte_handlers[c2op]!=NULL) {
bedfea38 4505 need_flags=!(gte_unneeded[i+1]>>63); // +1 because of how liveness detection works
054175e9 4506 need_ir=(gte_unneeded[i+1]&0xe00)!=0xe00;
cbbd8dd7 4507 assem_debug("gte op %08x, unneeded %016llx, need_flags %d, need_ir %d\n",
4508 source[i],gte_unneeded[i+1],need_flags,need_ir);
0ff8c62c 4509 if(new_dynarec_hacks&NDHACK_GTE_NO_FLAGS)
4510 need_flags=0;
6c0eefaf 4511 int shift = (source[i] >> 19) & 1;
4512 int lm = (source[i] >> 10) & 1;
054175e9 4513 switch(c2op) {
19776aef 4514#ifndef DRC_DBG
054175e9 4515 case GTE_MVMVA: {
054175e9 4516 int v = (source[i] >> 15) & 3;
4517 int cv = (source[i] >> 13) & 3;
4518 int mx = (source[i] >> 17) & 3;
6c0eefaf 4519 reglist=reglist_full&0x10ff; // +{r4-r7}
054175e9 4520 c2op_prologue(c2op,reglist);
4521 /* r4,r5 = VXYZ(v) packed; r6 = &MX11(mx); r7 = &CV1(cv) */
4522 if(v<3)
4523 emit_ldrd(v*8,0,4);
4524 else {
4525 emit_movzwl_indexed(9*4,0,4); // gteIR
4526 emit_movzwl_indexed(10*4,0,6);
4527 emit_movzwl_indexed(11*4,0,5);
4528 emit_orrshl_imm(6,16,4);
4529 }
4530 if(mx<3)
4531 emit_addimm(0,32*4+mx*8*4,6);
4532 else
4533 emit_readword((int)&zeromem_ptr,6);
4534 if(cv<3)
4535 emit_addimm(0,32*4+(cv*8+5)*4,7);
4536 else
4537 emit_readword((int)&zeromem_ptr,7);
4538#ifdef __ARM_NEON__
4539 emit_movimm(source[i],1); // opcode
4540 emit_call((int)gteMVMVA_part_neon);
4541 if(need_flags) {
4542 emit_movimm(lm,1);
4543 emit_call((int)gteMACtoIR_flags_neon);
4544 }
4545#else
4546 if(cv==3&&shift)
4547 emit_call((int)gteMVMVA_part_cv3sh12_arm);
4548 else {
4549 emit_movimm(shift,1);
4550 emit_call((int)(need_flags?gteMVMVA_part_arm:gteMVMVA_part_nf_arm));
4551 }
6c0eefaf 4552 if(need_flags||need_ir)
4553 c2op_call_MACtoIR(lm,need_flags);
054175e9 4554#endif
4555 break;
4556 }
6c0eefaf 4557 case GTE_OP:
4558 c2op_prologue(c2op,reglist);
4559 emit_call((int)(shift?gteOP_part_shift:gteOP_part_noshift));
4560 if(need_flags||need_ir) {
4561 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
4562 c2op_call_MACtoIR(lm,need_flags);
4563 }
4564 break;
4565 case GTE_DPCS:
4566 c2op_prologue(c2op,reglist);
4567 c2op_call_rgb_func(shift?gteDPCS_part_shift:gteDPCS_part_noshift,lm,need_ir,need_flags);
4568 break;
4569 case GTE_INTPL:
4570 c2op_prologue(c2op,reglist);
4571 c2op_call_rgb_func(shift?gteINTPL_part_shift:gteINTPL_part_noshift,lm,need_ir,need_flags);
4572 break;
4573 case GTE_SQR:
4574 c2op_prologue(c2op,reglist);
4575 emit_call((int)(shift?gteSQR_part_shift:gteSQR_part_noshift));
4576 if(need_flags||need_ir) {
4577 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
4578 c2op_call_MACtoIR(lm,need_flags);
4579 }
4580 break;
4581 case GTE_DCPL:
4582 c2op_prologue(c2op,reglist);
4583 c2op_call_rgb_func(gteDCPL_part,lm,need_ir,need_flags);
4584 break;
4585 case GTE_GPF:
4586 c2op_prologue(c2op,reglist);
4587 c2op_call_rgb_func(shift?gteGPF_part_shift:gteGPF_part_noshift,lm,need_ir,need_flags);
4588 break;
4589 case GTE_GPL:
4590 c2op_prologue(c2op,reglist);
4591 c2op_call_rgb_func(shift?gteGPL_part_shift:gteGPL_part_noshift,lm,need_ir,need_flags);
4592 break;
19776aef 4593#endif
054175e9 4594 default:
054175e9 4595 c2op_prologue(c2op,reglist);
19776aef 4596#ifdef DRC_DBG
4597 emit_movimm(source[i],1); // opcode
4598 emit_writeword(1,(int)&psxRegs.code);
4599#endif
054175e9 4600 emit_call((int)(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]));
4601 break;
4602 }
4603 c2op_epilogue(c2op,reglist);
4604 }
b9b61529 4605}
4606
4607void cop1_unusable(int i,struct regstat *i_regs)
3d624f89 4608{
4609 // XXX: should just just do the exception instead
4610 if(!cop1_usable) {
4611 int jaddr=(int)out;
4612 emit_jmp(0);
4613 add_stub(FP_STUB,jaddr,(int)out,i,0,(int)i_regs,is_delayslot,0);
4614 cop1_usable=1;
4615 }
4616}
4617
57871462 4618void cop1_assemble(int i,struct regstat *i_regs)
4619{
3d624f89 4620#ifndef DISABLE_COP1
57871462 4621 // Check cop1 unusable
4622 if(!cop1_usable) {
4623 signed char rs=get_reg(i_regs->regmap,CSREG);
4624 assert(rs>=0);
4625 emit_testimm(rs,0x20000000);
4626 int jaddr=(int)out;
4627 emit_jeq(0);
4628 add_stub(FP_STUB,jaddr,(int)out,i,rs,(int)i_regs,is_delayslot,0);
4629 cop1_usable=1;
4630 }
4631 if (opcode2[i]==0) { // MFC1
4632 signed char tl=get_reg(i_regs->regmap,rt1[i]);
4633 if(tl>=0) {
4634 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],tl);
4635 emit_readword_indexed(0,tl,tl);
4636 }
4637 }
4638 else if (opcode2[i]==1) { // DMFC1
4639 signed char tl=get_reg(i_regs->regmap,rt1[i]);
4640 signed char th=get_reg(i_regs->regmap,rt1[i]|64);
4641 if(tl>=0) {
4642 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],tl);
4643 if(th>=0) emit_readword_indexed(4,tl,th);
4644 emit_readword_indexed(0,tl,tl);
4645 }
4646 }
4647 else if (opcode2[i]==4) { // MTC1
4648 signed char sl=get_reg(i_regs->regmap,rs1[i]);
4649 signed char temp=get_reg(i_regs->regmap,-1);
4650 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4651 emit_writeword_indexed(sl,0,temp);
4652 }
4653 else if (opcode2[i]==5) { // DMTC1
4654 signed char sl=get_reg(i_regs->regmap,rs1[i]);
4655 signed char sh=rs1[i]>0?get_reg(i_regs->regmap,rs1[i]|64):sl;
4656 signed char temp=get_reg(i_regs->regmap,-1);
4657 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
4658 emit_writeword_indexed(sh,4,temp);
4659 emit_writeword_indexed(sl,0,temp);
4660 }
4661 else if (opcode2[i]==2) // CFC1
4662 {
4663 signed char tl=get_reg(i_regs->regmap,rt1[i]);
4664 if(tl>=0) {
4665 u_int copr=(source[i]>>11)&0x1f;
4666 if(copr==0) emit_readword((int)&FCR0,tl);
4667 if(copr==31) emit_readword((int)&FCR31,tl);
4668 }
4669 }
4670 else if (opcode2[i]==6) // CTC1
4671 {
4672 signed char sl=get_reg(i_regs->regmap,rs1[i]);
4673 u_int copr=(source[i]>>11)&0x1f;
4674 assert(sl>=0);
4675 if(copr==31)
4676 {
4677 emit_writeword(sl,(int)&FCR31);
4678 // Set the rounding mode
4679 //FIXME
4680 //char temp=get_reg(i_regs->regmap,-1);
4681 //emit_andimm(sl,3,temp);
4682 //emit_fldcw_indexed((int)&rounding_modes,temp);
4683 }
4684 }
3d624f89 4685#else
4686 cop1_unusable(i, i_regs);
4687#endif
57871462 4688}
4689
4690void fconv_assemble_arm(int i,struct regstat *i_regs)
4691{
3d624f89 4692#ifndef DISABLE_COP1
57871462 4693 signed char temp=get_reg(i_regs->regmap,-1);
4694 assert(temp>=0);
4695 // Check cop1 unusable
4696 if(!cop1_usable) {
4697 signed char rs=get_reg(i_regs->regmap,CSREG);
4698 assert(rs>=0);
4699 emit_testimm(rs,0x20000000);
4700 int jaddr=(int)out;
4701 emit_jeq(0);
4702 add_stub(FP_STUB,jaddr,(int)out,i,rs,(int)i_regs,is_delayslot,0);
4703 cop1_usable=1;
4704 }
4705
4706 #if(defined(__VFP_FP__) && !defined(__SOFTFP__))
4707 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0d) { // trunc_w_s
4708 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4709 emit_flds(temp,15);
4710 emit_ftosizs(15,15); // float->int, truncate
4711 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f))
4712 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
4713 emit_fsts(15,temp);
4714 return;
4715 }
4716 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0d) { // trunc_w_d
4717 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
4718 emit_vldr(temp,7);
4719 emit_ftosizd(7,13); // double->int, truncate
4720 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
4721 emit_fsts(13,temp);
4722 return;
4723 }
4724
4725 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x20) { // cvt_s_w
4726 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4727 emit_flds(temp,13);
4728 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f))
4729 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
4730 emit_fsitos(13,15);
4731 emit_fsts(15,temp);
4732 return;
4733 }
4734 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x21) { // cvt_d_w
4735 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4736 emit_flds(temp,13);
4737 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
4738 emit_fsitod(13,7);
4739 emit_vstr(7,temp);
4740 return;
4741 }
4742
4743 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x21) { // cvt_d_s
4744 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4745 emit_flds(temp,13);
4746 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
4747 emit_fcvtds(13,7);
4748 emit_vstr(7,temp);
4749 return;
4750 }
4751 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x20) { // cvt_s_d
4752 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
4753 emit_vldr(temp,7);
4754 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
4755 emit_fcvtsd(7,13);
4756 emit_fsts(13,temp);
4757 return;
4758 }
4759 #endif
4760
4761 // C emulation code
4762
4763 u_int hr,reglist=0;
4764 for(hr=0;hr<HOST_REGS;hr++) {
4765 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
4766 }
4767 save_regs(reglist);
4768
4769 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x20) {
4770 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4771 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4772 emit_call((int)cvt_s_w);
4773 }
4774 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x21) {
4775 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4776 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4777 emit_call((int)cvt_d_w);
4778 }
4779 if(opcode2[i]==0x15&&(source[i]&0x3f)==0x20) {
4780 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4781 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4782 emit_call((int)cvt_s_l);
4783 }
4784 if(opcode2[i]==0x15&&(source[i]&0x3f)==0x21) {
4785 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4786 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4787 emit_call((int)cvt_d_l);
4788 }
4789
4790 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x21) {
4791 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4792 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4793 emit_call((int)cvt_d_s);
4794 }
4795 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x24) {
4796 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4797 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4798 emit_call((int)cvt_w_s);
4799 }
4800 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x25) {
4801 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4802 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4803 emit_call((int)cvt_l_s);
4804 }
4805
4806 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x20) {
4807 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4808 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4809 emit_call((int)cvt_s_d);
4810 }
4811 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x24) {
4812 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4813 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4814 emit_call((int)cvt_w_d);
4815 }
4816 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x25) {
4817 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4818 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4819 emit_call((int)cvt_l_d);
4820 }
4821
4822 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x08) {
4823 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4824 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4825 emit_call((int)round_l_s);
4826 }
4827 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x09) {
4828 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4829 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4830 emit_call((int)trunc_l_s);
4831 }
4832 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0a) {
4833 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4834 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4835 emit_call((int)ceil_l_s);
4836 }
4837 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0b) {
4838 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4839 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4840 emit_call((int)floor_l_s);
4841 }
4842 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0c) {
4843 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4844 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4845 emit_call((int)round_w_s);
4846 }
4847 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0d) {
4848 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4849 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4850 emit_call((int)trunc_w_s);
4851 }
4852 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0e) {
4853 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4854 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4855 emit_call((int)ceil_w_s);
4856 }
4857 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0f) {
4858 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4859 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4860 emit_call((int)floor_w_s);
4861 }
4862
4863 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x08) {
4864 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4865 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4866 emit_call((int)round_l_d);
4867 }
4868 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x09) {
4869 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4870 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4871 emit_call((int)trunc_l_d);
4872 }
4873 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0a) {
4874 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4875 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4876 emit_call((int)ceil_l_d);
4877 }
4878 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0b) {
4879 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4880 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4881 emit_call((int)floor_l_d);
4882 }
4883 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0c) {
4884 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4885 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4886 emit_call((int)round_w_d);
4887 }
4888 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0d) {
4889 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4890 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4891 emit_call((int)trunc_w_d);
4892 }
4893 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0e) {
4894 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4895 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4896 emit_call((int)ceil_w_d);
4897 }
4898 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0f) {
4899 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4900 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4901 emit_call((int)floor_w_d);
4902 }
4903
4904 restore_regs(reglist);
3d624f89 4905#else
4906 cop1_unusable(i, i_regs);
4907#endif
57871462 4908}
4909#define fconv_assemble fconv_assemble_arm
4910
4911void fcomp_assemble(int i,struct regstat *i_regs)
4912{
3d624f89 4913#ifndef DISABLE_COP1
57871462 4914 signed char fs=get_reg(i_regs->regmap,FSREG);
4915 signed char temp=get_reg(i_regs->regmap,-1);
4916 assert(temp>=0);
4917 // Check cop1 unusable
4918 if(!cop1_usable) {
4919 signed char cs=get_reg(i_regs->regmap,CSREG);
4920 assert(cs>=0);
4921 emit_testimm(cs,0x20000000);
4922 int jaddr=(int)out;
4923 emit_jeq(0);
4924 add_stub(FP_STUB,jaddr,(int)out,i,cs,(int)i_regs,is_delayslot,0);
4925 cop1_usable=1;
4926 }
4927
4928 if((source[i]&0x3f)==0x30) {
4929 emit_andimm(fs,~0x800000,fs);
4930 return;
4931 }
4932
4933 if((source[i]&0x3e)==0x38) {
4934 // sf/ngle - these should throw exceptions for NaNs
4935 emit_andimm(fs,~0x800000,fs);
4936 return;
4937 }
4938
4939 #if(defined(__VFP_FP__) && !defined(__SOFTFP__))
4940 if(opcode2[i]==0x10) {
4941 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4942 emit_readword((int)&reg_cop1_simple[(source[i]>>16)&0x1f],HOST_TEMPREG);
4943 emit_orimm(fs,0x800000,fs);
4944 emit_flds(temp,14);
4945 emit_flds(HOST_TEMPREG,15);
4946 emit_fcmps(14,15);
4947 emit_fmstat();
4948 if((source[i]&0x3f)==0x31) emit_bicvc_imm(fs,0x800000,fs); // c_un_s
4949 if((source[i]&0x3f)==0x32) emit_bicne_imm(fs,0x800000,fs); // c_eq_s
4950 if((source[i]&0x3f)==0x33) {emit_bicne_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ueq_s
4951 if((source[i]&0x3f)==0x34) emit_biccs_imm(fs,0x800000,fs); // c_olt_s
4952 if((source[i]&0x3f)==0x35) {emit_biccs_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ult_s
4953 if((source[i]&0x3f)==0x36) emit_bichi_imm(fs,0x800000,fs); // c_ole_s
4954 if((source[i]&0x3f)==0x37) {emit_bichi_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ule_s
4955 if((source[i]&0x3f)==0x3a) emit_bicne_imm(fs,0x800000,fs); // c_seq_s
4956 if((source[i]&0x3f)==0x3b) emit_bicne_imm(fs,0x800000,fs); // c_ngl_s
4957 if((source[i]&0x3f)==0x3c) emit_biccs_imm(fs,0x800000,fs); // c_lt_s
4958 if((source[i]&0x3f)==0x3d) emit_biccs_imm(fs,0x800000,fs); // c_nge_s
4959 if((source[i]&0x3f)==0x3e) emit_bichi_imm(fs,0x800000,fs); // c_le_s
4960 if((source[i]&0x3f)==0x3f) emit_bichi_imm(fs,0x800000,fs); // c_ngt_s
4961 return;
4962 }
4963 if(opcode2[i]==0x11) {
4964 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
4965 emit_readword((int)&reg_cop1_double[(source[i]>>16)&0x1f],HOST_TEMPREG);
4966 emit_orimm(fs,0x800000,fs);
4967 emit_vldr(temp,6);
4968 emit_vldr(HOST_TEMPREG,7);
4969 emit_fcmpd(6,7);
4970 emit_fmstat();
4971 if((source[i]&0x3f)==0x31) emit_bicvc_imm(fs,0x800000,fs); // c_un_d
4972 if((source[i]&0x3f)==0x32) emit_bicne_imm(fs,0x800000,fs); // c_eq_d
4973 if((source[i]&0x3f)==0x33) {emit_bicne_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ueq_d
4974 if((source[i]&0x3f)==0x34) emit_biccs_imm(fs,0x800000,fs); // c_olt_d
4975 if((source[i]&0x3f)==0x35) {emit_biccs_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ult_d
4976 if((source[i]&0x3f)==0x36) emit_bichi_imm(fs,0x800000,fs); // c_ole_d
4977 if((source[i]&0x3f)==0x37) {emit_bichi_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ule_d
4978 if((source[i]&0x3f)==0x3a) emit_bicne_imm(fs,0x800000,fs); // c_seq_d
4979 if((source[i]&0x3f)==0x3b) emit_bicne_imm(fs,0x800000,fs); // c_ngl_d
4980 if((source[i]&0x3f)==0x3c) emit_biccs_imm(fs,0x800000,fs); // c_lt_d
4981 if((source[i]&0x3f)==0x3d) emit_biccs_imm(fs,0x800000,fs); // c_nge_d
4982 if((source[i]&0x3f)==0x3e) emit_bichi_imm(fs,0x800000,fs); // c_le_d
4983 if((source[i]&0x3f)==0x3f) emit_bichi_imm(fs,0x800000,fs); // c_ngt_d
4984 return;
4985 }
4986 #endif
4987
4988 // C only
4989
4990 u_int hr,reglist=0;
4991 for(hr=0;hr<HOST_REGS;hr++) {
4992 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
4993 }
4994 reglist&=~(1<<fs);
4995 save_regs(reglist);
4996 if(opcode2[i]==0x10) {
4997 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4998 emit_readword((int)&reg_cop1_simple[(source[i]>>16)&0x1f],ARG2_REG);
4999 if((source[i]&0x3f)==0x30) emit_call((int)c_f_s);
5000 if((source[i]&0x3f)==0x31) emit_call((int)c_un_s);
5001 if((source[i]&0x3f)==0x32) emit_call((int)c_eq_s);
5002 if((source[i]&0x3f)==0x33) emit_call((int)c_ueq_s);
5003 if((source[i]&0x3f)==0x34) emit_call((int)c_olt_s);
5004 if((source[i]&0x3f)==0x35) emit_call((int)c_ult_s);
5005 if((source[i]&0x3f)==0x36) emit_call((int)c_ole_s);
5006 if((source[i]&0x3f)==0x37) emit_call((int)c_ule_s);
5007 if((source[i]&0x3f)==0x38) emit_call((int)c_sf_s);
5008 if((source[i]&0x3f)==0x39) emit_call((int)c_ngle_s);
5009 if((source[i]&0x3f)==0x3a) emit_call((int)c_seq_s);
5010 if((source[i]&0x3f)==0x3b) emit_call((int)c_ngl_s);
5011 if((source[i]&0x3f)==0x3c) emit_call((int)c_lt_s);
5012 if((source[i]&0x3f)==0x3d) emit_call((int)c_nge_s);
5013 if((source[i]&0x3f)==0x3e) emit_call((int)c_le_s);
5014 if((source[i]&0x3f)==0x3f) emit_call((int)c_ngt_s);
5015 }
5016 if(opcode2[i]==0x11) {
5017 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
5018 emit_readword((int)&reg_cop1_double[(source[i]>>16)&0x1f],ARG2_REG);
5019 if((source[i]&0x3f)==0x30) emit_call((int)c_f_d);
5020 if((source[i]&0x3f)==0x31) emit_call((int)c_un_d);
5021 if((source[i]&0x3f)==0x32) emit_call((int)c_eq_d);
5022 if((source[i]&0x3f)==0x33) emit_call((int)c_ueq_d);
5023 if((source[i]&0x3f)==0x34) emit_call((int)c_olt_d);
5024 if((source[i]&0x3f)==0x35) emit_call((int)c_ult_d);
5025 if((source[i]&0x3f)==0x36) emit_call((int)c_ole_d);
5026 if((source[i]&0x3f)==0x37) emit_call((int)c_ule_d);
5027 if((source[i]&0x3f)==0x38) emit_call((int)c_sf_d);
5028 if((source[i]&0x3f)==0x39) emit_call((int)c_ngle_d);
5029 if((source[i]&0x3f)==0x3a) emit_call((int)c_seq_d);
5030 if((source[i]&0x3f)==0x3b) emit_call((int)c_ngl_d);
5031 if((source[i]&0x3f)==0x3c) emit_call((int)c_lt_d);
5032 if((source[i]&0x3f)==0x3d) emit_call((int)c_nge_d);
5033 if((source[i]&0x3f)==0x3e) emit_call((int)c_le_d);
5034 if((source[i]&0x3f)==0x3f) emit_call((int)c_ngt_d);
5035 }
5036 restore_regs(reglist);
5037 emit_loadreg(FSREG,fs);
3d624f89 5038#else
5039 cop1_unusable(i, i_regs);
5040#endif
57871462 5041}
5042
5043void float_assemble(int i,struct regstat *i_regs)
5044{
3d624f89 5045#ifndef DISABLE_COP1
57871462 5046 signed char temp=get_reg(i_regs->regmap,-1);
5047 assert(temp>=0);
5048 // Check cop1 unusable
5049 if(!cop1_usable) {
5050 signed char cs=get_reg(i_regs->regmap,CSREG);
5051 assert(cs>=0);
5052 emit_testimm(cs,0x20000000);
5053 int jaddr=(int)out;
5054 emit_jeq(0);
5055 add_stub(FP_STUB,jaddr,(int)out,i,cs,(int)i_regs,is_delayslot,0);
5056 cop1_usable=1;
5057 }
5058
5059 #if(defined(__VFP_FP__) && !defined(__SOFTFP__))
5060 if((source[i]&0x3f)==6) // mov
5061 {
5062 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
5063 if(opcode2[i]==0x10) {
5064 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
5065 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],HOST_TEMPREG);
5066 emit_readword_indexed(0,temp,temp);
5067 emit_writeword_indexed(temp,0,HOST_TEMPREG);
5068 }
5069 if(opcode2[i]==0x11) {
5070 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
5071 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],HOST_TEMPREG);
5072 emit_vldr(temp,7);
5073 emit_vstr(7,HOST_TEMPREG);
5074 }
5075 }
5076 return;
5077 }
5078
5079 if((source[i]&0x3f)>3)
5080 {
5081 if(opcode2[i]==0x10) {
5082 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
5083 emit_flds(temp,15);
5084 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
5085 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
5086 }
5087 if((source[i]&0x3f)==4) // sqrt
5088 emit_fsqrts(15,15);
5089 if((source[i]&0x3f)==5) // abs
5090 emit_fabss(15,15);
5091 if((source[i]&0x3f)==7) // neg
5092 emit_fnegs(15,15);
5093 emit_fsts(15,temp);
5094 }
5095 if(opcode2[i]==0x11) {
5096 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
5097 emit_vldr(temp,7);
5098 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
5099 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
5100 }
5101 if((source[i]&0x3f)==4) // sqrt
5102 emit_fsqrtd(7,7);
5103 if((source[i]&0x3f)==5) // abs
5104 emit_fabsd(7,7);
5105 if((source[i]&0x3f)==7) // neg
5106 emit_fnegd(7,7);
5107 emit_vstr(7,temp);
5108 }
5109 return;
5110 }
5111 if((source[i]&0x3f)<4)
5112 {
5113 if(opcode2[i]==0x10) {
5114 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
5115 }
5116 if(opcode2[i]==0x11) {
5117 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
5118 }
5119 if(((source[i]>>11)&0x1f)!=((source[i]>>16)&0x1f)) {
5120 if(opcode2[i]==0x10) {
5121 emit_readword((int)&reg_cop1_simple[(source[i]>>16)&0x1f],HOST_TEMPREG);
5122 emit_flds(temp,15);
5123 emit_flds(HOST_TEMPREG,13);
5124 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
5125 if(((source[i]>>16)&0x1f)!=((source[i]>>6)&0x1f)) {
5126 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
5127 }
5128 }
5129 if((source[i]&0x3f)==0) emit_fadds(15,13,15);
5130 if((source[i]&0x3f)==1) emit_fsubs(15,13,15);
5131 if((source[i]&0x3f)==2) emit_fmuls(15,13,15);
5132 if((source[i]&0x3f)==3) emit_fdivs(15,13,15);
5133 if(((source[i]>>16)&0x1f)==((source[i]>>6)&0x1f)) {
5134 emit_fsts(15,HOST_TEMPREG);
5135 }else{
5136 emit_fsts(15,temp);
5137 }
5138 }
5139 else if(opcode2[i]==0x11) {
5140 emit_readword((int)&reg_cop1_double[(source[i]>>16)&0x1f],HOST_TEMPREG);
5141 emit_vldr(temp,7);
5142 emit_vldr(HOST_TEMPREG,6);
5143 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
5144 if(((source[i]>>16)&0x1f)!=((source[i]>>6)&0x1f)) {
5145 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
5146 }
5147 }
5148 if((source[i]&0x3f)==0) emit_faddd(7,6,7);
5149 if((source[i]&0x3f)==1) emit_fsubd(7,6,7);
5150 if((source[i]&0x3f)==2) emit_fmuld(7,6,7);
5151 if((source[i]&0x3f)==3) emit_fdivd(7,6,7);
5152 if(((source[i]>>16)&0x1f)==((source[i]>>6)&0x1f)) {
5153 emit_vstr(7,HOST_TEMPREG);
5154 }else{
5155 emit_vstr(7,temp);
5156 }
5157 }
5158 }
5159 else {
5160 if(opcode2[i]==0x10) {
5161 emit_flds(temp,15);
5162 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
5163 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
5164 }
5165 if((source[i]&0x3f)==0) emit_fadds(15,15,15);
5166 if((source[i]&0x3f)==1) emit_fsubs(15,15,15);
5167 if((source[i]&0x3f)==2) emit_fmuls(15,15,15);
5168 if((source[i]&0x3f)==3) emit_fdivs(15,15,15);
5169 emit_fsts(15,temp);
5170 }
5171 else if(opcode2[i]==0x11) {
5172 emit_vldr(temp,7);
5173 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
5174 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
5175 }
5176 if((source[i]&0x3f)==0) emit_faddd(7,7,7);
5177 if((source[i]&0x3f)==1) emit_fsubd(7,7,7);
5178 if((source[i]&0x3f)==2) emit_fmuld(7,7,7);
5179 if((source[i]&0x3f)==3) emit_fdivd(7,7,7);
5180 emit_vstr(7,temp);
5181 }
5182 }
5183 return;
5184 }
5185 #endif
5186
5187 u_int hr,reglist=0;
5188 for(hr=0;hr<HOST_REGS;hr++) {
5189 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
5190 }
5191 if(opcode2[i]==0x10) { // Single precision
5192 save_regs(reglist);
5193 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
5194 if((source[i]&0x3f)<4) {
5195 emit_readword((int)&reg_cop1_simple[(source[i]>>16)&0x1f],ARG2_REG);
5196 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG3_REG);
5197 }else{
5198 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
5199 }
5200 switch(source[i]&0x3f)
5201 {
5202 case 0x00: emit_call((int)add_s);break;
5203 case 0x01: emit_call((int)sub_s);break;
5204 case 0x02: emit_call((int)mul_s);break;
5205 case 0x03: emit_call((int)div_s);break;
5206 case 0x04: emit_call((int)sqrt_s);break;
5207 case 0x05: emit_call((int)abs_s);break;
5208 case 0x06: emit_call((int)mov_s);break;
5209 case 0x07: emit_call((int)neg_s);break;
5210 }
5211 restore_regs(reglist);
5212 }
5213 if(opcode2[i]==0x11) { // Double precision
5214 save_regs(reglist);
5215 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
5216 if((source[i]&0x3f)<4) {
5217 emit_readword((int)&reg_cop1_double[(source[i]>>16)&0x1f],ARG2_REG);
5218 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG3_REG);
5219 }else{
5220 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
5221 }
5222 switch(source[i]&0x3f)
5223 {
5224 case 0x00: emit_call((int)add_d);break;
5225 case 0x01: emit_call((int)sub_d);break;
5226 case 0x02: emit_call((int)mul_d);break;
5227 case 0x03: emit_call((int)div_d);break;
5228 case 0x04: emit_call((int)sqrt_d);break;
5229 case 0x05: emit_call((int)abs_d);break;
5230 case 0x06: emit_call((int)mov_d);break;
5231 case 0x07: emit_call((int)neg_d);break;
5232 }
5233 restore_regs(reglist);
5234 }
3d624f89 5235#else
5236 cop1_unusable(i, i_regs);
5237#endif
57871462 5238}
5239
5240void multdiv_assemble_arm(int i,struct regstat *i_regs)
5241{
5242 // case 0x18: MULT
5243 // case 0x19: MULTU
5244 // case 0x1A: DIV
5245 // case 0x1B: DIVU
5246 // case 0x1C: DMULT
5247 // case 0x1D: DMULTU
5248 // case 0x1E: DDIV
5249 // case 0x1F: DDIVU
5250 if(rs1[i]&&rs2[i])
5251 {
5252 if((opcode2[i]&4)==0) // 32-bit
5253 {
5254 if(opcode2[i]==0x18) // MULT
5255 {
5256 signed char m1=get_reg(i_regs->regmap,rs1[i]);
5257 signed char m2=get_reg(i_regs->regmap,rs2[i]);
5258 signed char hi=get_reg(i_regs->regmap,HIREG);
5259 signed char lo=get_reg(i_regs->regmap,LOREG);
5260 assert(m1>=0);
5261 assert(m2>=0);
5262 assert(hi>=0);
5263 assert(lo>=0);
5264 emit_smull(m1,m2,hi,lo);
5265 }
5266 if(opcode2[i]==0x19) // MULTU
5267 {
5268 signed char m1=get_reg(i_regs->regmap,rs1[i]);
5269 signed char m2=get_reg(i_regs->regmap,rs2[i]);
5270 signed char hi=get_reg(i_regs->regmap,HIREG);
5271 signed char lo=get_reg(i_regs->regmap,LOREG);
5272 assert(m1>=0);
5273 assert(m2>=0);
5274 assert(hi>=0);
5275 assert(lo>=0);
5276 emit_umull(m1,m2,hi,lo);
5277 }
5278 if(opcode2[i]==0x1A) // DIV
5279 {
5280 signed char d1=get_reg(i_regs->regmap,rs1[i]);
5281 signed char d2=get_reg(i_regs->regmap,rs2[i]);
5282 assert(d1>=0);
5283 assert(d2>=0);
5284 signed char quotient=get_reg(i_regs->regmap,LOREG);
5285 signed char remainder=get_reg(i_regs->regmap,HIREG);
5286 assert(quotient>=0);
5287 assert(remainder>=0);
5288 emit_movs(d1,remainder);
44a80f6a 5289 emit_movimm(0xffffffff,quotient);
5290 emit_negmi(quotient,quotient); // .. quotient and ..
5291 emit_negmi(remainder,remainder); // .. remainder for div0 case (will be negated back after jump)
57871462 5292 emit_movs(d2,HOST_TEMPREG);
5293 emit_jeq((int)out+52); // Division by zero
5294 emit_negmi(HOST_TEMPREG,HOST_TEMPREG);
5295 emit_clz(HOST_TEMPREG,quotient);
5296 emit_shl(HOST_TEMPREG,quotient,HOST_TEMPREG);
5297 emit_orimm(quotient,1<<31,quotient);
5298 emit_shr(quotient,quotient,quotient);
5299 emit_cmp(remainder,HOST_TEMPREG);
5300 emit_subcs(remainder,HOST_TEMPREG,remainder);
5301 emit_adcs(quotient,quotient,quotient);
5302 emit_shrimm(HOST_TEMPREG,1,HOST_TEMPREG);
5303 emit_jcc((int)out-16); // -4
5304 emit_teq(d1,d2);
5305 emit_negmi(quotient,quotient);
5306 emit_test(d1,d1);
5307 emit_negmi(remainder,remainder);
5308 }
5309 if(opcode2[i]==0x1B) // DIVU
5310 {
5311 signed char d1=get_reg(i_regs->regmap,rs1[i]); // dividend
5312 signed char d2=get_reg(i_regs->regmap,rs2[i]); // divisor
5313 assert(d1>=0);
5314 assert(d2>=0);
5315 signed char quotient=get_reg(i_regs->regmap,LOREG);
5316 signed char remainder=get_reg(i_regs->regmap,HIREG);
5317 assert(quotient>=0);
5318 assert(remainder>=0);
44a80f6a 5319 emit_mov(d1,remainder);
5320 emit_movimm(0xffffffff,quotient); // div0 case
57871462 5321 emit_test(d2,d2);
44a80f6a 5322 emit_jeq((int)out+40); // Division by zero
57871462 5323 emit_clz(d2,HOST_TEMPREG);
5324 emit_movimm(1<<31,quotient);
5325 emit_shl(d2,HOST_TEMPREG,d2);
57871462 5326 emit_shr(quotient,HOST_TEMPREG,quotient);
5327 emit_cmp(remainder,d2);
5328 emit_subcs(remainder,d2,remainder);
5329 emit_adcs(quotient,quotient,quotient);
5330 emit_shrcc_imm(d2,1,d2);
5331 emit_jcc((int)out-16); // -4
5332 }
5333 }
5334 else // 64-bit
4600ba03 5335#ifndef FORCE32
57871462 5336 {
5337 if(opcode2[i]==0x1C) // DMULT
5338 {
5339 assert(opcode2[i]!=0x1C);
5340 signed char m1h=get_reg(i_regs->regmap,rs1[i]|64);
5341 signed char m1l=get_reg(i_regs->regmap,rs1[i]);
5342 signed char m2h=get_reg(i_regs->regmap,rs2[i]|64);
5343 signed char m2l=get_reg(i_regs->regmap,rs2[i]);
5344 assert(m1h>=0);
5345 assert(m2h>=0);
5346 assert(m1l>=0);
5347 assert(m2l>=0);
5348 emit_pushreg(m2h);
5349 emit_pushreg(m2l);
5350 emit_pushreg(m1h);
5351 emit_pushreg(m1l);
5352 emit_call((int)&mult64);
5353 emit_popreg(m1l);
5354 emit_popreg(m1h);
5355 emit_popreg(m2l);
5356 emit_popreg(m2h);
5357 signed char hih=get_reg(i_regs->regmap,HIREG|64);
5358 signed char hil=get_reg(i_regs->regmap,HIREG);
5359 if(hih>=0) emit_loadreg(HIREG|64,hih);
5360 if(hil>=0) emit_loadreg(HIREG,hil);
5361 signed char loh=get_reg(i_regs->regmap,LOREG|64);
5362 signed char lol=get_reg(i_regs->regmap,LOREG);
5363 if(loh>=0) emit_loadreg(LOREG|64,loh);
5364 if(lol>=0) emit_loadreg(LOREG,lol);
5365 }
5366 if(opcode2[i]==0x1D) // DMULTU
5367 {
5368 signed char m1h=get_reg(i_regs->regmap,rs1[i]|64);
5369 signed char m1l=get_reg(i_regs->regmap,rs1[i]);
5370 signed char m2h=get_reg(i_regs->regmap,rs2[i]|64);
5371 signed char m2l=get_reg(i_regs->regmap,rs2[i]);
5372 assert(m1h>=0);
5373 assert(m2h>=0);
5374 assert(m1l>=0);
5375 assert(m2l>=0);
5376 save_regs(0x100f);
5377 if(m1l!=0) emit_mov(m1l,0);
5378 if(m1h==0) emit_readword((int)&dynarec_local,1);
5379 else if(m1h>1) emit_mov(m1h,1);
5380 if(m2l<2) emit_readword((int)&dynarec_local+m2l*4,2);
5381 else if(m2l>2) emit_mov(m2l,2);
5382 if(m2h<3) emit_readword((int)&dynarec_local+m2h*4,3);
5383 else if(m2h>3) emit_mov(m2h,3);
5384 emit_call((int)&multu64);
5385 restore_regs(0x100f);
5386 signed char hih=get_reg(i_regs->regmap,HIREG|64);
5387 signed char hil=get_reg(i_regs->regmap,HIREG);
5388 signed char loh=get_reg(i_regs->regmap,LOREG|64);
5389 signed char lol=get_reg(i_regs->regmap,LOREG);
5390 /*signed char temp=get_reg(i_regs->regmap,-1);
5391 signed char rh=get_reg(i_regs->regmap,HIREG|64);
5392 signed char rl=get_reg(i_regs->regmap,HIREG);
5393 assert(m1h>=0);
5394 assert(m2h>=0);
5395 assert(m1l>=0);
5396 assert(m2l>=0);
5397 assert(temp>=0);
5398 //emit_mov(m1l,EAX);
5399 //emit_mul(m2l);
5400 emit_umull(rl,rh,m1l,m2l);
5401 emit_storereg(LOREG,rl);
5402 emit_mov(rh,temp);
5403 //emit_mov(m1h,EAX);
5404 //emit_mul(m2l);
5405 emit_umull(rl,rh,m1h,m2l);
5406 emit_adds(rl,temp,temp);
5407 emit_adcimm(rh,0,rh);
5408 emit_storereg(HIREG,rh);
5409 //emit_mov(m2h,EAX);
5410 //emit_mul(m1l);
5411 emit_umull(rl,rh,m1l,m2h);
5412 emit_adds(rl,temp,temp);
5413 emit_adcimm(rh,0,rh);
5414 emit_storereg(LOREG|64,temp);
5415 emit_mov(rh,temp);
5416 //emit_mov(m2h,EAX);
5417 //emit_mul(m1h);
5418 emit_umull(rl,rh,m1h,m2h);
5419 emit_adds(rl,temp,rl);
5420 emit_loadreg(HIREG,temp);
5421 emit_adcimm(rh,0,rh);
5422 emit_adds(rl,temp,rl);
5423 emit_adcimm(rh,0,rh);
5424 // DEBUG
5425 /*
5426 emit_pushreg(m2h);
5427 emit_pushreg(m2l);
5428 emit_pushreg(m1h);
5429 emit_pushreg(m1l);
5430 emit_call((int)&multu64);
5431 emit_popreg(m1l);
5432 emit_popreg(m1h);
5433 emit_popreg(m2l);
5434 emit_popreg(m2h);
5435 signed char hih=get_reg(i_regs->regmap,HIREG|64);
5436 signed char hil=get_reg(i_regs->regmap,HIREG);
5437 if(hih>=0) emit_loadreg(HIREG|64,hih); // DEBUG
5438 if(hil>=0) emit_loadreg(HIREG,hil); // DEBUG
5439 */
5440 // Shouldn't be necessary
5441 //char loh=get_reg(i_regs->regmap,LOREG|64);
5442 //char lol=get_reg(i_regs->regmap,LOREG);
5443 //if(loh>=0) emit_loadreg(LOREG|64,loh);
5444 //if(lol>=0) emit_loadreg(LOREG,lol);
5445 }
5446 if(opcode2[i]==0x1E) // DDIV
5447 {
5448 signed char d1h=get_reg(i_regs->regmap,rs1[i]|64);
5449 signed char d1l=get_reg(i_regs->regmap,rs1[i]);
5450 signed char d2h=get_reg(i_regs->regmap,rs2[i]|64);
5451 signed char d2l=get_reg(i_regs->regmap,rs2[i]);
5452 assert(d1h>=0);
5453 assert(d2h>=0);
5454 assert(d1l>=0);
5455 assert(d2l>=0);
5456 save_regs(0x100f);
5457 if(d1l!=0) emit_mov(d1l,0);
5458 if(d1h==0) emit_readword((int)&dynarec_local,1);
5459 else if(d1h>1) emit_mov(d1h,1);
5460 if(d2l<2) emit_readword((int)&dynarec_local+d2l*4,2);
5461 else if(d2l>2) emit_mov(d2l,2);
5462 if(d2h<3) emit_readword((int)&dynarec_local+d2h*4,3);
5463 else if(d2h>3) emit_mov(d2h,3);
5464 emit_call((int)&div64);
5465 restore_regs(0x100f);
5466 signed char hih=get_reg(i_regs->regmap,HIREG|64);
5467 signed char hil=get_reg(i_regs->regmap,HIREG);
5468 signed char loh=get_reg(i_regs->regmap,LOREG|64);
5469 signed char lol=get_reg(i_regs->regmap,LOREG);
5470 if(hih>=0) emit_loadreg(HIREG|64,hih);
5471 if(hil>=0) emit_loadreg(HIREG,hil);
5472 if(loh>=0) emit_loadreg(LOREG|64,loh);
5473 if(lol>=0) emit_loadreg(LOREG,lol);
5474 }
5475 if(opcode2[i]==0x1F) // DDIVU
5476 {
5477 //u_int hr,reglist=0;
5478 //for(hr=0;hr<HOST_REGS;hr++) {
5479 // if(i_regs->regmap[hr]>=0 && (i_regs->regmap[hr]&62)!=HIREG) reglist|=1<<hr;
5480 //}
5481 signed char d1h=get_reg(i_regs->regmap,rs1[i]|64);
5482 signed char d1l=get_reg(i_regs->regmap,rs1[i]);
5483 signed char d2h=get_reg(i_regs->regmap,rs2[i]|64);
5484 signed char d2l=get_reg(i_regs->regmap,rs2[i]);
5485 assert(d1h>=0);
5486 assert(d2h>=0);
5487 assert(d1l>=0);
5488 assert(d2l>=0);
5489 save_regs(0x100f);
5490 if(d1l!=0) emit_mov(d1l,0);
5491 if(d1h==0) emit_readword((int)&dynarec_local,1);
5492 else if(d1h>1) emit_mov(d1h,1);
5493 if(d2l<2) emit_readword((int)&dynarec_local+d2l*4,2);
5494 else if(d2l>2) emit_mov(d2l,2);
5495 if(d2h<3) emit_readword((int)&dynarec_local+d2h*4,3);
5496 else if(d2h>3) emit_mov(d2h,3);
5497 emit_call((int)&divu64);
5498 restore_regs(0x100f);
5499 signed char hih=get_reg(i_regs->regmap,HIREG|64);
5500 signed char hil=get_reg(i_regs->regmap,HIREG);
5501 signed char loh=get_reg(i_regs->regmap,LOREG|64);
5502 signed char lol=get_reg(i_regs->regmap,LOREG);
5503 if(hih>=0) emit_loadreg(HIREG|64,hih);
5504 if(hil>=0) emit_loadreg(HIREG,hil);
5505 if(loh>=0) emit_loadreg(LOREG|64,loh);
5506 if(lol>=0) emit_loadreg(LOREG,lol);
5507 }
5508 }
4600ba03 5509#else
5510 assert(0);
5511#endif
57871462 5512 }
5513 else
5514 {
5515 // Multiply by zero is zero.
5516 // MIPS does not have a divide by zero exception.
5517 // The result is undefined, we return zero.
5518 signed char hr=get_reg(i_regs->regmap,HIREG);
5519 signed char lr=get_reg(i_regs->regmap,LOREG);
5520 if(hr>=0) emit_zeroreg(hr);
5521 if(lr>=0) emit_zeroreg(lr);
5522 }
5523}
5524#define multdiv_assemble multdiv_assemble_arm
5525
5526void do_preload_rhash(int r) {
5527 // Don't need this for ARM. On x86, this puts the value 0xf8 into the
5528 // register. On ARM the hash can be done with a single instruction (below)
5529}
5530
5531void do_preload_rhtbl(int ht) {
5532 emit_addimm(FP,(int)&mini_ht-(int)&dynarec_local,ht);
5533}
5534
5535void do_rhash(int rs,int rh) {
5536 emit_andimm(rs,0xf8,rh);
5537}
5538
5539void do_miniht_load(int ht,int rh) {
5540 assem_debug("ldr %s,[%s,%s]!\n",regname[rh],regname[ht],regname[rh]);
5541 output_w32(0xe7b00000|rd_rn_rm(rh,ht,rh));
5542}
5543
5544void do_miniht_jump(int rs,int rh,int ht) {
5545 emit_cmp(rh,rs);
5546 emit_ldreq_indexed(ht,4,15);
5547 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
5548 emit_mov(rs,7);
5549 emit_jmp(jump_vaddr_reg[7]);
5550 #else
5551 emit_jmp(jump_vaddr_reg[rs]);
5552 #endif
5553}
5554
5555void do_miniht_insert(u_int return_address,int rt,int temp) {
5556 #ifdef ARMv5_ONLY
5557 emit_movimm(return_address,rt); // PC into link register
5558 add_to_linker((int)out,return_address,1);
5559 emit_pcreladdr(temp);
5560 emit_writeword(rt,(int)&mini_ht[(return_address&0xFF)>>3][0]);
5561 emit_writeword(temp,(int)&mini_ht[(return_address&0xFF)>>3][1]);
5562 #else
5563 emit_movw(return_address&0x0000FFFF,rt);
5564 add_to_linker((int)out,return_address,1);
5565 emit_pcreladdr(temp);
5566 emit_writeword(temp,(int)&mini_ht[(return_address&0xFF)>>3][1]);
5567 emit_movt(return_address&0xFFFF0000,rt);
5568 emit_writeword(rt,(int)&mini_ht[(return_address&0xFF)>>3][0]);
5569 #endif
5570}
5571
5572// Sign-extend to 64 bits and write out upper half of a register
5573// This is useful where we have a 32-bit value in a register, and want to
5574// keep it in a 32-bit register, but can't guarantee that it won't be read
5575// as a 64-bit value later.
5576void wb_sx(signed char pre[],signed char entry[],uint64_t dirty,uint64_t is32_pre,uint64_t is32,uint64_t u,uint64_t uu)
5577{
24385cae 5578#ifndef FORCE32
57871462 5579 if(is32_pre==is32) return;
5580 int hr,reg;
5581 for(hr=0;hr<HOST_REGS;hr++) {
5582 if(hr!=EXCLUDE_REG) {
5583 //if(pre[hr]==entry[hr]) {
5584 if((reg=pre[hr])>=0) {
5585 if((dirty>>hr)&1) {
5586 if( ((is32_pre&~is32&~uu)>>reg)&1 ) {
5587 emit_sarimm(hr,31,HOST_TEMPREG);
5588 emit_storereg(reg|64,HOST_TEMPREG);
5589 }
5590 }
5591 }
5592 //}
5593 }
5594 }
24385cae 5595#endif
57871462 5596}
5597
5598void wb_valid(signed char pre[],signed char entry[],u_int dirty_pre,u_int dirty,uint64_t is32_pre,uint64_t u,uint64_t uu)
5599{
5600 //if(dirty_pre==dirty) return;
5601 int hr,reg,new_hr;
5602 for(hr=0;hr<HOST_REGS;hr++) {
5603 if(hr!=EXCLUDE_REG) {
5604 reg=pre[hr];
5605 if(((~u)>>(reg&63))&1) {
f776eb14 5606 if(reg>0) {
57871462 5607 if(((dirty_pre&~dirty)>>hr)&1) {
5608 if(reg>0&&reg<34) {
5609 emit_storereg(reg,hr);
5610 if( ((is32_pre&~uu)>>reg)&1 ) {
5611 emit_sarimm(hr,31,HOST_TEMPREG);
5612 emit_storereg(reg|64,HOST_TEMPREG);
5613 }
5614 }
5615 else if(reg>=64) {
5616 emit_storereg(reg,hr);
5617 }
5618 }
5619 }
57871462 5620 }
5621 }
5622 }
5623}
5624
5625
5626/* using strd could possibly help but you'd have to allocate registers in pairs
5627void wb_invalidate_arm(signed char pre[],signed char entry[],uint64_t dirty,uint64_t is32,uint64_t u,uint64_t uu)
5628{
5629 int hr;
5630 int wrote=-1;
5631 for(hr=HOST_REGS-1;hr>=0;hr--) {
5632 if(hr!=EXCLUDE_REG) {
5633 if(pre[hr]!=entry[hr]) {
5634 if(pre[hr]>=0) {
5635 if((dirty>>hr)&1) {
5636 if(get_reg(entry,pre[hr])<0) {
5637 if(pre[hr]<64) {
5638 if(!((u>>pre[hr])&1)) {
5639 if(hr<10&&(~hr&1)&&(pre[hr+1]<0||wrote==hr+1)) {
5640 if( ((is32>>pre[hr])&1) && !((uu>>pre[hr])&1) ) {
5641 emit_sarimm(hr,31,hr+1);
5642 emit_strdreg(pre[hr],hr);
5643 }
5644 else
5645 emit_storereg(pre[hr],hr);
5646 }else{
5647 emit_storereg(pre[hr],hr);
5648 if( ((is32>>pre[hr])&1) && !((uu>>pre[hr])&1) ) {
5649 emit_sarimm(hr,31,hr);
5650 emit_storereg(pre[hr]|64,hr);
5651 }
5652 }
5653 }
5654 }else{
5655 if(!((uu>>(pre[hr]&63))&1) && !((is32>>(pre[hr]&63))&1)) {
5656 emit_storereg(pre[hr],hr);
5657 }
5658 }
5659 wrote=hr;
5660 }
5661 }
5662 }
5663 }
5664 }
5665 }
5666 for(hr=0;hr<HOST_REGS;hr++) {
5667 if(hr!=EXCLUDE_REG) {
5668 if(pre[hr]!=entry[hr]) {
5669 if(pre[hr]>=0) {
5670 int nr;
5671 if((nr=get_reg(entry,pre[hr]))>=0) {
5672 emit_mov(hr,nr);
5673 }
5674 }
5675 }
5676 }
5677 }
5678}
5679#define wb_invalidate wb_invalidate_arm
5680*/
5681
dd3a91a1 5682// Clearing the cache is rather slow on ARM Linux, so mark the areas
5683// that need to be cleared, and then only clear these areas once.
5684void do_clear_cache()
5685{
5686 int i,j;
5687 for (i=0;i<(1<<(TARGET_SIZE_2-17));i++)
5688 {
5689 u_int bitmap=needs_clear_cache[i];
5690 if(bitmap) {
5691 u_int start,end;
5692 for(j=0;j<32;j++)
5693 {
5694 if(bitmap&(1<<j)) {
bdeade46 5695 start=(u_int)BASE_ADDR+i*131072+j*4096;
dd3a91a1 5696 end=start+4095;
5697 j++;
5698 while(j<32) {
5699 if(bitmap&(1<<j)) {
5700 end+=4096;
5701 j++;
5702 }else{
5703 __clear_cache((void *)start,(void *)end);
5704 break;
5705 }
5706 }
5707 }
5708 }
5709 needs_clear_cache[i]=0;
5710 }
5711 }
5712}
5713
57871462 5714// CPU-architecture-specific initialization
5715void arch_init() {
3d624f89 5716#ifndef DISABLE_COP1
57871462 5717 rounding_modes[0]=0x0<<22; // round
5718 rounding_modes[1]=0x3<<22; // trunc
5719 rounding_modes[2]=0x1<<22; // ceil
5720 rounding_modes[3]=0x2<<22; // floor
3d624f89 5721#endif
57871462 5722}
b9b61529 5723
5724// vim:shiftwidth=2:expandtab