gpu_unai: fix build on older toolchains
[pcsx_rearmed.git] / libpcsxcore / new_dynarec / assem_arm.c
CommitLineData
57871462 1/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
c6c3b1b3 2 * Mupen64plus/PCSX - assem_arm.c *
20d507ba 3 * Copyright (C) 2009-2011 Ari64 *
c6c3b1b3 4 * Copyright (C) 2010-2011 GraÅžvydas "notaz" Ignotas *
57871462 5 * *
6 * This program is free software; you can redistribute it and/or modify *
7 * it under the terms of the GNU General Public License as published by *
8 * the Free Software Foundation; either version 2 of the License, or *
9 * (at your option) any later version. *
10 * *
11 * This program is distributed in the hope that it will be useful, *
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
14 * GNU General Public License for more details. *
15 * *
16 * You should have received a copy of the GNU General Public License *
17 * along with this program; if not, write to the *
18 * Free Software Foundation, Inc., *
19 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
20 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
21
054175e9 22#ifdef PCSX
6c0eefaf 23#include "../gte.h"
24#define FLAGLESS
25#include "../gte.h"
26#undef FLAGLESS
054175e9 27#include "../gte_arm.h"
28#include "../gte_neon.h"
29#include "pcnt.h"
30#endif
31
57871462 32extern int cycle_count;
33extern int last_count;
34extern int pcaddr;
35extern int pending_exception;
36extern int branch_target;
37extern uint64_t readmem_dword;
3d624f89 38#ifdef MUPEN64
57871462 39extern precomp_instr fake_pc;
3d624f89 40#endif
57871462 41extern void *dynarec_local;
42extern u_int memory_map[1048576];
43extern u_int mini_ht[32][2];
44extern u_int rounding_modes[4];
45
46void indirect_jump_indexed();
47void indirect_jump();
48void do_interrupt();
49void jump_vaddr_r0();
50void jump_vaddr_r1();
51void jump_vaddr_r2();
52void jump_vaddr_r3();
53void jump_vaddr_r4();
54void jump_vaddr_r5();
55void jump_vaddr_r6();
56void jump_vaddr_r7();
57void jump_vaddr_r8();
58void jump_vaddr_r9();
59void jump_vaddr_r10();
60void jump_vaddr_r12();
61
62const u_int jump_vaddr_reg[16] = {
63 (int)jump_vaddr_r0,
64 (int)jump_vaddr_r1,
65 (int)jump_vaddr_r2,
66 (int)jump_vaddr_r3,
67 (int)jump_vaddr_r4,
68 (int)jump_vaddr_r5,
69 (int)jump_vaddr_r6,
70 (int)jump_vaddr_r7,
71 (int)jump_vaddr_r8,
72 (int)jump_vaddr_r9,
73 (int)jump_vaddr_r10,
74 0,
75 (int)jump_vaddr_r12,
76 0,
77 0,
78 0};
79
0bbd1454 80void invalidate_addr_r0();
81void invalidate_addr_r1();
82void invalidate_addr_r2();
83void invalidate_addr_r3();
84void invalidate_addr_r4();
85void invalidate_addr_r5();
86void invalidate_addr_r6();
87void invalidate_addr_r7();
88void invalidate_addr_r8();
89void invalidate_addr_r9();
90void invalidate_addr_r10();
91void invalidate_addr_r12();
92
93const u_int invalidate_addr_reg[16] = {
94 (int)invalidate_addr_r0,
95 (int)invalidate_addr_r1,
96 (int)invalidate_addr_r2,
97 (int)invalidate_addr_r3,
98 (int)invalidate_addr_r4,
99 (int)invalidate_addr_r5,
100 (int)invalidate_addr_r6,
101 (int)invalidate_addr_r7,
102 (int)invalidate_addr_r8,
103 (int)invalidate_addr_r9,
104 (int)invalidate_addr_r10,
105 0,
106 (int)invalidate_addr_r12,
107 0,
108 0,
109 0};
110
57871462 111#include "fpu.h"
112
dd3a91a1 113unsigned int needs_clear_cache[1<<(TARGET_SIZE_2-17)];
114
57871462 115/* Linker */
116
117void set_jump_target(int addr,u_int target)
118{
119 u_char *ptr=(u_char *)addr;
120 u_int *ptr2=(u_int *)ptr;
121 if(ptr[3]==0xe2) {
122 assert((target-(u_int)ptr2-8)<1024);
123 assert((addr&3)==0);
124 assert((target&3)==0);
125 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
126 //printf("target=%x addr=%x insn=%x\n",target,addr,*ptr2);
127 }
128 else if(ptr[3]==0x72) {
129 // generated by emit_jno_unlikely
130 if((target-(u_int)ptr2-8)<1024) {
131 assert((addr&3)==0);
132 assert((target&3)==0);
133 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
134 }
135 else if((target-(u_int)ptr2-8)<4096&&!((target-(u_int)ptr2-8)&15)) {
136 assert((addr&3)==0);
137 assert((target&3)==0);
138 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>4)|0xE00;
139 }
140 else *ptr2=(0x7A000000)|(((target-(u_int)ptr2-8)<<6)>>8);
141 }
142 else {
143 assert((ptr[3]&0x0e)==0xa);
144 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
145 }
146}
147
148// This optionally copies the instruction from the target of the branch into
149// the space before the branch. Works, but the difference in speed is
150// usually insignificant.
151void set_jump_target_fillslot(int addr,u_int target,int copy)
152{
153 u_char *ptr=(u_char *)addr;
154 u_int *ptr2=(u_int *)ptr;
155 assert(!copy||ptr2[-1]==0xe28dd000);
156 if(ptr[3]==0xe2) {
157 assert(!copy);
158 assert((target-(u_int)ptr2-8)<4096);
159 *ptr2=(*ptr2&0xFFFFF000)|(target-(u_int)ptr2-8);
160 }
161 else {
162 assert((ptr[3]&0x0e)==0xa);
163 u_int target_insn=*(u_int *)target;
164 if((target_insn&0x0e100000)==0) { // ALU, no immediate, no flags
165 copy=0;
166 }
167 if((target_insn&0x0c100000)==0x04100000) { // Load
168 copy=0;
169 }
170 if(target_insn&0x08000000) {
171 copy=0;
172 }
173 if(copy) {
174 ptr2[-1]=target_insn;
175 target+=4;
176 }
177 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
178 }
179}
180
181/* Literal pool */
182add_literal(int addr,int val)
183{
15776b68 184 assert(literalcount<sizeof(literals)/sizeof(literals[0]));
57871462 185 literals[literalcount][0]=addr;
186 literals[literalcount][1]=val;
187 literalcount++;
188}
189
f76eeef9 190void *kill_pointer(void *stub)
57871462 191{
192 int *ptr=(int *)(stub+4);
193 assert((*ptr&0x0ff00000)==0x05900000);
194 u_int offset=*ptr&0xfff;
195 int **l_ptr=(void *)ptr+offset+8;
196 int *i_ptr=*l_ptr;
197 set_jump_target((int)i_ptr,(int)stub);
f76eeef9 198 return i_ptr;
57871462 199}
200
f968d35d 201// find where external branch is liked to using addr of it's stub:
202// get address that insn one after stub loads (dyna_linker arg1),
203// treat it as a pointer to branch insn,
204// return addr where that branch jumps to
57871462 205int get_pointer(void *stub)
206{
207 //printf("get_pointer(%x)\n",(int)stub);
208 int *ptr=(int *)(stub+4);
f968d35d 209 assert((*ptr&0x0fff0000)==0x059f0000);
57871462 210 u_int offset=*ptr&0xfff;
211 int **l_ptr=(void *)ptr+offset+8;
212 int *i_ptr=*l_ptr;
213 assert((*i_ptr&0x0f000000)==0x0a000000);
214 return (int)i_ptr+((*i_ptr<<8)>>6)+8;
215}
216
217// Find the "clean" entry point from a "dirty" entry point
218// by skipping past the call to verify_code
219u_int get_clean_addr(int addr)
220{
221 int *ptr=(int *)addr;
222 #ifdef ARMv5_ONLY
223 ptr+=4;
224 #else
225 ptr+=6;
226 #endif
227 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
228 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
229 ptr++;
230 if((*ptr&0xFF000000)==0xea000000) {
231 return (int)ptr+((*ptr<<8)>>6)+8; // follow jump
232 }
233 return (u_int)ptr;
234}
235
236int verify_dirty(int addr)
237{
238 u_int *ptr=(u_int *)addr;
239 #ifdef ARMv5_ONLY
240 // get from literal pool
15776b68 241 assert((*ptr&0xFFFF0000)==0xe59f0000);
57871462 242 u_int offset=*ptr&0xfff;
243 u_int *l_ptr=(void *)ptr+offset+8;
244 u_int source=l_ptr[0];
245 u_int copy=l_ptr[1];
246 u_int len=l_ptr[2];
247 ptr+=4;
248 #else
249 // ARMv7 movw/movt
250 assert((*ptr&0xFFF00000)==0xe3000000);
251 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
252 u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
253 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
254 ptr+=6;
255 #endif
256 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
257 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
63cb0298 258#ifndef DISABLE_TLB
cfcba99a 259 u_int verifier=(int)ptr+((signed int)(*ptr<<8)>>6)+8; // get target of bl
57871462 260 if(verifier==(u_int)verify_code_vm||verifier==(u_int)verify_code_ds) {
261 unsigned int page=source>>12;
262 unsigned int map_value=memory_map[page];
263 if(map_value>=0x80000000) return 0;
264 while(page<((source+len-1)>>12)) {
265 if((memory_map[++page]<<2)!=(map_value<<2)) return 0;
266 }
267 source = source+(map_value<<2);
268 }
63cb0298 269#endif
57871462 270 //printf("verify_dirty: %x %x %x\n",source,copy,len);
271 return !memcmp((void *)source,(void *)copy,len);
272}
273
274// This doesn't necessarily find all clean entry points, just
275// guarantees that it's not dirty
276int isclean(int addr)
277{
278 #ifdef ARMv5_ONLY
279 int *ptr=((u_int *)addr)+4;
280 #else
281 int *ptr=((u_int *)addr)+6;
282 #endif
283 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
284 if((*ptr&0xFF000000)!=0xeb000000) return 1; // bl instruction
285 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code) return 0;
286 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_vm) return 0;
287 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_ds) return 0;
288 return 1;
289}
290
291void get_bounds(int addr,u_int *start,u_int *end)
292{
293 u_int *ptr=(u_int *)addr;
294 #ifdef ARMv5_ONLY
295 // get from literal pool
15776b68 296 assert((*ptr&0xFFFF0000)==0xe59f0000);
57871462 297 u_int offset=*ptr&0xfff;
298 u_int *l_ptr=(void *)ptr+offset+8;
299 u_int source=l_ptr[0];
300 //u_int copy=l_ptr[1];
301 u_int len=l_ptr[2];
302 ptr+=4;
303 #else
304 // ARMv7 movw/movt
305 assert((*ptr&0xFFF00000)==0xe3000000);
306 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
307 //u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
308 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
309 ptr+=6;
310 #endif
311 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
312 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
63cb0298 313#ifndef DISABLE_TLB
cfcba99a 314 u_int verifier=(int)ptr+((signed int)(*ptr<<8)>>6)+8; // get target of bl
57871462 315 if(verifier==(u_int)verify_code_vm||verifier==(u_int)verify_code_ds) {
316 if(memory_map[source>>12]>=0x80000000) source = 0;
317 else source = source+(memory_map[source>>12]<<2);
318 }
63cb0298 319#endif
57871462 320 *start=source;
321 *end=source+len;
322}
323
324/* Register allocation */
325
326// Note: registers are allocated clean (unmodified state)
327// if you intend to modify the register, you must call dirty_reg().
328void alloc_reg(struct regstat *cur,int i,signed char reg)
329{
330 int r,hr;
331 int preferred_reg = (reg&7);
332 if(reg==CCREG) preferred_reg=HOST_CCREG;
333 if(reg==PTEMP||reg==FTEMP) preferred_reg=12;
334
335 // Don't allocate unused registers
336 if((cur->u>>reg)&1) return;
337
338 // see if it's already allocated
339 for(hr=0;hr<HOST_REGS;hr++)
340 {
341 if(cur->regmap[hr]==reg) return;
342 }
343
344 // Keep the same mapping if the register was already allocated in a loop
345 preferred_reg = loop_reg(i,reg,preferred_reg);
346
347 // Try to allocate the preferred register
348 if(cur->regmap[preferred_reg]==-1) {
349 cur->regmap[preferred_reg]=reg;
350 cur->dirty&=~(1<<preferred_reg);
351 cur->isconst&=~(1<<preferred_reg);
352 return;
353 }
354 r=cur->regmap[preferred_reg];
355 if(r<64&&((cur->u>>r)&1)) {
356 cur->regmap[preferred_reg]=reg;
357 cur->dirty&=~(1<<preferred_reg);
358 cur->isconst&=~(1<<preferred_reg);
359 return;
360 }
361 if(r>=64&&((cur->uu>>(r&63))&1)) {
362 cur->regmap[preferred_reg]=reg;
363 cur->dirty&=~(1<<preferred_reg);
364 cur->isconst&=~(1<<preferred_reg);
365 return;
366 }
367
368 // Clear any unneeded registers
369 // We try to keep the mapping consistent, if possible, because it
370 // makes branches easier (especially loops). So we try to allocate
371 // first (see above) before removing old mappings. If this is not
372 // possible then go ahead and clear out the registers that are no
373 // longer needed.
374 for(hr=0;hr<HOST_REGS;hr++)
375 {
376 r=cur->regmap[hr];
377 if(r>=0) {
378 if(r<64) {
379 if((cur->u>>r)&1) {cur->regmap[hr]=-1;break;}
380 }
381 else
382 {
383 if((cur->uu>>(r&63))&1) {cur->regmap[hr]=-1;break;}
384 }
385 }
386 }
387 // Try to allocate any available register, but prefer
388 // registers that have not been used recently.
389 if(i>0) {
390 for(hr=0;hr<HOST_REGS;hr++) {
391 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
392 if(regs[i-1].regmap[hr]!=rs1[i-1]&&regs[i-1].regmap[hr]!=rs2[i-1]&&regs[i-1].regmap[hr]!=rt1[i-1]&&regs[i-1].regmap[hr]!=rt2[i-1]) {
393 cur->regmap[hr]=reg;
394 cur->dirty&=~(1<<hr);
395 cur->isconst&=~(1<<hr);
396 return;
397 }
398 }
399 }
400 }
401 // Try to allocate any available register
402 for(hr=0;hr<HOST_REGS;hr++) {
403 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
404 cur->regmap[hr]=reg;
405 cur->dirty&=~(1<<hr);
406 cur->isconst&=~(1<<hr);
407 return;
408 }
409 }
410
411 // Ok, now we have to evict someone
412 // Pick a register we hopefully won't need soon
413 u_char hsn[MAXREG+1];
414 memset(hsn,10,sizeof(hsn));
415 int j;
416 lsn(hsn,i,&preferred_reg);
417 //printf("eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",cur->regmap[0],cur->regmap[1],cur->regmap[2],cur->regmap[3],cur->regmap[5],cur->regmap[6],cur->regmap[7]);
418 //printf("hsn(%x): %d %d %d %d %d %d %d\n",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
419 if(i>0) {
420 // Don't evict the cycle count at entry points, otherwise the entry
421 // stub will have to write it.
422 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
423 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
424 for(j=10;j>=3;j--)
425 {
426 // Alloc preferred register if available
427 if(hsn[r=cur->regmap[preferred_reg]&63]==j) {
428 for(hr=0;hr<HOST_REGS;hr++) {
429 // Evict both parts of a 64-bit register
430 if((cur->regmap[hr]&63)==r) {
431 cur->regmap[hr]=-1;
432 cur->dirty&=~(1<<hr);
433 cur->isconst&=~(1<<hr);
434 }
435 }
436 cur->regmap[preferred_reg]=reg;
437 return;
438 }
439 for(r=1;r<=MAXREG;r++)
440 {
441 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
442 for(hr=0;hr<HOST_REGS;hr++) {
443 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
444 if(cur->regmap[hr]==r+64) {
445 cur->regmap[hr]=reg;
446 cur->dirty&=~(1<<hr);
447 cur->isconst&=~(1<<hr);
448 return;
449 }
450 }
451 }
452 for(hr=0;hr<HOST_REGS;hr++) {
453 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
454 if(cur->regmap[hr]==r) {
455 cur->regmap[hr]=reg;
456 cur->dirty&=~(1<<hr);
457 cur->isconst&=~(1<<hr);
458 return;
459 }
460 }
461 }
462 }
463 }
464 }
465 }
466 for(j=10;j>=0;j--)
467 {
468 for(r=1;r<=MAXREG;r++)
469 {
470 if(hsn[r]==j) {
471 for(hr=0;hr<HOST_REGS;hr++) {
472 if(cur->regmap[hr]==r+64) {
473 cur->regmap[hr]=reg;
474 cur->dirty&=~(1<<hr);
475 cur->isconst&=~(1<<hr);
476 return;
477 }
478 }
479 for(hr=0;hr<HOST_REGS;hr++) {
480 if(cur->regmap[hr]==r) {
481 cur->regmap[hr]=reg;
482 cur->dirty&=~(1<<hr);
483 cur->isconst&=~(1<<hr);
484 return;
485 }
486 }
487 }
488 }
489 }
490 printf("This shouldn't happen (alloc_reg)");exit(1);
491}
492
493void alloc_reg64(struct regstat *cur,int i,signed char reg)
494{
495 int preferred_reg = 8+(reg&1);
496 int r,hr;
497
498 // allocate the lower 32 bits
499 alloc_reg(cur,i,reg);
500
501 // Don't allocate unused registers
502 if((cur->uu>>reg)&1) return;
503
504 // see if the upper half is already allocated
505 for(hr=0;hr<HOST_REGS;hr++)
506 {
507 if(cur->regmap[hr]==reg+64) return;
508 }
509
510 // Keep the same mapping if the register was already allocated in a loop
511 preferred_reg = loop_reg(i,reg,preferred_reg);
512
513 // Try to allocate the preferred register
514 if(cur->regmap[preferred_reg]==-1) {
515 cur->regmap[preferred_reg]=reg|64;
516 cur->dirty&=~(1<<preferred_reg);
517 cur->isconst&=~(1<<preferred_reg);
518 return;
519 }
520 r=cur->regmap[preferred_reg];
521 if(r<64&&((cur->u>>r)&1)) {
522 cur->regmap[preferred_reg]=reg|64;
523 cur->dirty&=~(1<<preferred_reg);
524 cur->isconst&=~(1<<preferred_reg);
525 return;
526 }
527 if(r>=64&&((cur->uu>>(r&63))&1)) {
528 cur->regmap[preferred_reg]=reg|64;
529 cur->dirty&=~(1<<preferred_reg);
530 cur->isconst&=~(1<<preferred_reg);
531 return;
532 }
533
534 // Clear any unneeded registers
535 // We try to keep the mapping consistent, if possible, because it
536 // makes branches easier (especially loops). So we try to allocate
537 // first (see above) before removing old mappings. If this is not
538 // possible then go ahead and clear out the registers that are no
539 // longer needed.
540 for(hr=HOST_REGS-1;hr>=0;hr--)
541 {
542 r=cur->regmap[hr];
543 if(r>=0) {
544 if(r<64) {
545 if((cur->u>>r)&1) {cur->regmap[hr]=-1;break;}
546 }
547 else
548 {
549 if((cur->uu>>(r&63))&1) {cur->regmap[hr]=-1;break;}
550 }
551 }
552 }
553 // Try to allocate any available register, but prefer
554 // registers that have not been used recently.
555 if(i>0) {
556 for(hr=0;hr<HOST_REGS;hr++) {
557 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
558 if(regs[i-1].regmap[hr]!=rs1[i-1]&&regs[i-1].regmap[hr]!=rs2[i-1]&&regs[i-1].regmap[hr]!=rt1[i-1]&&regs[i-1].regmap[hr]!=rt2[i-1]) {
559 cur->regmap[hr]=reg|64;
560 cur->dirty&=~(1<<hr);
561 cur->isconst&=~(1<<hr);
562 return;
563 }
564 }
565 }
566 }
567 // Try to allocate any available register
568 for(hr=0;hr<HOST_REGS;hr++) {
569 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
570 cur->regmap[hr]=reg|64;
571 cur->dirty&=~(1<<hr);
572 cur->isconst&=~(1<<hr);
573 return;
574 }
575 }
576
577 // Ok, now we have to evict someone
578 // Pick a register we hopefully won't need soon
579 u_char hsn[MAXREG+1];
580 memset(hsn,10,sizeof(hsn));
581 int j;
582 lsn(hsn,i,&preferred_reg);
583 //printf("eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",cur->regmap[0],cur->regmap[1],cur->regmap[2],cur->regmap[3],cur->regmap[5],cur->regmap[6],cur->regmap[7]);
584 //printf("hsn(%x): %d %d %d %d %d %d %d\n",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
585 if(i>0) {
586 // Don't evict the cycle count at entry points, otherwise the entry
587 // stub will have to write it.
588 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
589 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
590 for(j=10;j>=3;j--)
591 {
592 // Alloc preferred register if available
593 if(hsn[r=cur->regmap[preferred_reg]&63]==j) {
594 for(hr=0;hr<HOST_REGS;hr++) {
595 // Evict both parts of a 64-bit register
596 if((cur->regmap[hr]&63)==r) {
597 cur->regmap[hr]=-1;
598 cur->dirty&=~(1<<hr);
599 cur->isconst&=~(1<<hr);
600 }
601 }
602 cur->regmap[preferred_reg]=reg|64;
603 return;
604 }
605 for(r=1;r<=MAXREG;r++)
606 {
607 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
608 for(hr=0;hr<HOST_REGS;hr++) {
609 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
610 if(cur->regmap[hr]==r+64) {
611 cur->regmap[hr]=reg|64;
612 cur->dirty&=~(1<<hr);
613 cur->isconst&=~(1<<hr);
614 return;
615 }
616 }
617 }
618 for(hr=0;hr<HOST_REGS;hr++) {
619 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
620 if(cur->regmap[hr]==r) {
621 cur->regmap[hr]=reg|64;
622 cur->dirty&=~(1<<hr);
623 cur->isconst&=~(1<<hr);
624 return;
625 }
626 }
627 }
628 }
629 }
630 }
631 }
632 for(j=10;j>=0;j--)
633 {
634 for(r=1;r<=MAXREG;r++)
635 {
636 if(hsn[r]==j) {
637 for(hr=0;hr<HOST_REGS;hr++) {
638 if(cur->regmap[hr]==r+64) {
639 cur->regmap[hr]=reg|64;
640 cur->dirty&=~(1<<hr);
641 cur->isconst&=~(1<<hr);
642 return;
643 }
644 }
645 for(hr=0;hr<HOST_REGS;hr++) {
646 if(cur->regmap[hr]==r) {
647 cur->regmap[hr]=reg|64;
648 cur->dirty&=~(1<<hr);
649 cur->isconst&=~(1<<hr);
650 return;
651 }
652 }
653 }
654 }
655 }
656 printf("This shouldn't happen");exit(1);
657}
658
659// Allocate a temporary register. This is done without regard to
660// dirty status or whether the register we request is on the unneeded list
661// Note: This will only allocate one register, even if called multiple times
662void alloc_reg_temp(struct regstat *cur,int i,signed char reg)
663{
664 int r,hr;
665 int preferred_reg = -1;
666
667 // see if it's already allocated
668 for(hr=0;hr<HOST_REGS;hr++)
669 {
670 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==reg) return;
671 }
672
673 // Try to allocate any available register
674 for(hr=HOST_REGS-1;hr>=0;hr--) {
675 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
676 cur->regmap[hr]=reg;
677 cur->dirty&=~(1<<hr);
678 cur->isconst&=~(1<<hr);
679 return;
680 }
681 }
682
683 // Find an unneeded register
684 for(hr=HOST_REGS-1;hr>=0;hr--)
685 {
686 r=cur->regmap[hr];
687 if(r>=0) {
688 if(r<64) {
689 if((cur->u>>r)&1) {
690 if(i==0||((unneeded_reg[i-1]>>r)&1)) {
691 cur->regmap[hr]=reg;
692 cur->dirty&=~(1<<hr);
693 cur->isconst&=~(1<<hr);
694 return;
695 }
696 }
697 }
698 else
699 {
700 if((cur->uu>>(r&63))&1) {
701 if(i==0||((unneeded_reg_upper[i-1]>>(r&63))&1)) {
702 cur->regmap[hr]=reg;
703 cur->dirty&=~(1<<hr);
704 cur->isconst&=~(1<<hr);
705 return;
706 }
707 }
708 }
709 }
710 }
711
712 // Ok, now we have to evict someone
713 // Pick a register we hopefully won't need soon
714 // TODO: we might want to follow unconditional jumps here
715 // TODO: get rid of dupe code and make this into a function
716 u_char hsn[MAXREG+1];
717 memset(hsn,10,sizeof(hsn));
718 int j;
719 lsn(hsn,i,&preferred_reg);
720 //printf("hsn: %d %d %d %d %d %d %d\n",hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
721 if(i>0) {
722 // Don't evict the cycle count at entry points, otherwise the entry
723 // stub will have to write it.
724 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
725 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
726 for(j=10;j>=3;j--)
727 {
728 for(r=1;r<=MAXREG;r++)
729 {
730 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
731 for(hr=0;hr<HOST_REGS;hr++) {
732 if(hr!=HOST_CCREG||hsn[CCREG]>2) {
733 if(cur->regmap[hr]==r+64) {
734 cur->regmap[hr]=reg;
735 cur->dirty&=~(1<<hr);
736 cur->isconst&=~(1<<hr);
737 return;
738 }
739 }
740 }
741 for(hr=0;hr<HOST_REGS;hr++) {
742 if(hr!=HOST_CCREG||hsn[CCREG]>2) {
743 if(cur->regmap[hr]==r) {
744 cur->regmap[hr]=reg;
745 cur->dirty&=~(1<<hr);
746 cur->isconst&=~(1<<hr);
747 return;
748 }
749 }
750 }
751 }
752 }
753 }
754 }
755 for(j=10;j>=0;j--)
756 {
757 for(r=1;r<=MAXREG;r++)
758 {
759 if(hsn[r]==j) {
760 for(hr=0;hr<HOST_REGS;hr++) {
761 if(cur->regmap[hr]==r+64) {
762 cur->regmap[hr]=reg;
763 cur->dirty&=~(1<<hr);
764 cur->isconst&=~(1<<hr);
765 return;
766 }
767 }
768 for(hr=0;hr<HOST_REGS;hr++) {
769 if(cur->regmap[hr]==r) {
770 cur->regmap[hr]=reg;
771 cur->dirty&=~(1<<hr);
772 cur->isconst&=~(1<<hr);
773 return;
774 }
775 }
776 }
777 }
778 }
779 printf("This shouldn't happen");exit(1);
780}
781// Allocate a specific ARM register.
782void alloc_arm_reg(struct regstat *cur,int i,signed char reg,char hr)
783{
784 int n;
f776eb14 785 int dirty=0;
57871462 786
787 // see if it's already allocated (and dealloc it)
788 for(n=0;n<HOST_REGS;n++)
789 {
f776eb14 790 if(n!=EXCLUDE_REG&&cur->regmap[n]==reg) {
791 dirty=(cur->dirty>>n)&1;
792 cur->regmap[n]=-1;
793 }
57871462 794 }
795
796 cur->regmap[hr]=reg;
797 cur->dirty&=~(1<<hr);
f776eb14 798 cur->dirty|=dirty<<hr;
57871462 799 cur->isconst&=~(1<<hr);
800}
801
802// Alloc cycle count into dedicated register
803alloc_cc(struct regstat *cur,int i)
804{
805 alloc_arm_reg(cur,i,CCREG,HOST_CCREG);
806}
807
808/* Special alloc */
809
810
811/* Assembler */
812
813char regname[16][4] = {
814 "r0",
815 "r1",
816 "r2",
817 "r3",
818 "r4",
819 "r5",
820 "r6",
821 "r7",
822 "r8",
823 "r9",
824 "r10",
825 "fp",
826 "r12",
827 "sp",
828 "lr",
829 "pc"};
830
831void output_byte(u_char byte)
832{
833 *(out++)=byte;
834}
835void output_modrm(u_char mod,u_char rm,u_char ext)
836{
837 assert(mod<4);
838 assert(rm<8);
839 assert(ext<8);
840 u_char byte=(mod<<6)|(ext<<3)|rm;
841 *(out++)=byte;
842}
843void output_sib(u_char scale,u_char index,u_char base)
844{
845 assert(scale<4);
846 assert(index<8);
847 assert(base<8);
848 u_char byte=(scale<<6)|(index<<3)|base;
849 *(out++)=byte;
850}
851void output_w32(u_int word)
852{
853 *((u_int *)out)=word;
854 out+=4;
855}
856u_int rd_rn_rm(u_int rd, u_int rn, u_int rm)
857{
858 assert(rd<16);
859 assert(rn<16);
860 assert(rm<16);
861 return((rn<<16)|(rd<<12)|rm);
862}
863u_int rd_rn_imm_shift(u_int rd, u_int rn, u_int imm, u_int shift)
864{
865 assert(rd<16);
866 assert(rn<16);
867 assert(imm<256);
868 assert((shift&1)==0);
869 return((rn<<16)|(rd<<12)|(((32-shift)&30)<<7)|imm);
870}
871u_int genimm(u_int imm,u_int *encoded)
872{
c2e3bd42 873 *encoded=0;
874 if(imm==0) return 1;
57871462 875 int i=32;
876 while(i>0)
877 {
878 if(imm<256) {
879 *encoded=((i&30)<<7)|imm;
880 return 1;
881 }
882 imm=(imm>>2)|(imm<<30);i-=2;
883 }
884 return 0;
885}
cfbd3c6e 886void genimm_checked(u_int imm,u_int *encoded)
887{
888 u_int ret=genimm(imm,encoded);
889 assert(ret);
890}
57871462 891u_int genjmp(u_int addr)
892{
893 int offset=addr-(int)out-8;
e80343e2 894 if(offset<-33554432||offset>=33554432) {
895 if (addr>2) {
896 printf("genjmp: out of range: %08x\n", offset);
897 exit(1);
898 }
899 return 0;
900 }
57871462 901 return ((u_int)offset>>2)&0xffffff;
902}
903
904void emit_mov(int rs,int rt)
905{
906 assem_debug("mov %s,%s\n",regname[rt],regname[rs]);
907 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs));
908}
909
910void emit_movs(int rs,int rt)
911{
912 assem_debug("movs %s,%s\n",regname[rt],regname[rs]);
913 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs));
914}
915
916void emit_add(int rs1,int rs2,int rt)
917{
918 assem_debug("add %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
919 output_w32(0xe0800000|rd_rn_rm(rt,rs1,rs2));
920}
921
922void emit_adds(int rs1,int rs2,int rt)
923{
924 assem_debug("adds %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
925 output_w32(0xe0900000|rd_rn_rm(rt,rs1,rs2));
926}
927
928void emit_adcs(int rs1,int rs2,int rt)
929{
930 assem_debug("adcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
931 output_w32(0xe0b00000|rd_rn_rm(rt,rs1,rs2));
932}
933
934void emit_sbc(int rs1,int rs2,int rt)
935{
936 assem_debug("sbc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
937 output_w32(0xe0c00000|rd_rn_rm(rt,rs1,rs2));
938}
939
940void emit_sbcs(int rs1,int rs2,int rt)
941{
942 assem_debug("sbcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
943 output_w32(0xe0d00000|rd_rn_rm(rt,rs1,rs2));
944}
945
946void emit_neg(int rs, int rt)
947{
948 assem_debug("rsb %s,%s,#0\n",regname[rt],regname[rs]);
949 output_w32(0xe2600000|rd_rn_rm(rt,rs,0));
950}
951
952void emit_negs(int rs, int rt)
953{
954 assem_debug("rsbs %s,%s,#0\n",regname[rt],regname[rs]);
955 output_w32(0xe2700000|rd_rn_rm(rt,rs,0));
956}
957
958void emit_sub(int rs1,int rs2,int rt)
959{
960 assem_debug("sub %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
961 output_w32(0xe0400000|rd_rn_rm(rt,rs1,rs2));
962}
963
964void emit_subs(int rs1,int rs2,int rt)
965{
966 assem_debug("subs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
967 output_w32(0xe0500000|rd_rn_rm(rt,rs1,rs2));
968}
969
970void emit_zeroreg(int rt)
971{
972 assem_debug("mov %s,#0\n",regname[rt]);
973 output_w32(0xe3a00000|rd_rn_rm(rt,0,0));
974}
975
790ee18e 976void emit_loadlp(u_int imm,u_int rt)
977{
978 add_literal((int)out,imm);
979 assem_debug("ldr %s,pc+? [=%x]\n",regname[rt],imm);
980 output_w32(0xe5900000|rd_rn_rm(rt,15,0));
981}
982void emit_movw(u_int imm,u_int rt)
983{
984 assert(imm<65536);
985 assem_debug("movw %s,#%d (0x%x)\n",regname[rt],imm,imm);
986 output_w32(0xe3000000|rd_rn_rm(rt,0,0)|(imm&0xfff)|((imm<<4)&0xf0000));
987}
988void emit_movt(u_int imm,u_int rt)
989{
990 assem_debug("movt %s,#%d (0x%x)\n",regname[rt],imm&0xffff0000,imm&0xffff0000);
991 output_w32(0xe3400000|rd_rn_rm(rt,0,0)|((imm>>16)&0xfff)|((imm>>12)&0xf0000));
992}
993void emit_movimm(u_int imm,u_int rt)
994{
995 u_int armval;
996 if(genimm(imm,&armval)) {
997 assem_debug("mov %s,#%d\n",regname[rt],imm);
998 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
999 }else if(genimm(~imm,&armval)) {
1000 assem_debug("mvn %s,#%d\n",regname[rt],imm);
1001 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
1002 }else if(imm<65536) {
1003 #ifdef ARMv5_ONLY
1004 assem_debug("mov %s,#%d\n",regname[rt],imm&0xFF00);
1005 output_w32(0xe3a00000|rd_rn_imm_shift(rt,0,imm>>8,8));
1006 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
1007 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1008 #else
1009 emit_movw(imm,rt);
1010 #endif
1011 }else{
1012 #ifdef ARMv5_ONLY
1013 emit_loadlp(imm,rt);
1014 #else
1015 emit_movw(imm&0x0000FFFF,rt);
1016 emit_movt(imm&0xFFFF0000,rt);
1017 #endif
1018 }
1019}
1020void emit_pcreladdr(u_int rt)
1021{
1022 assem_debug("add %s,pc,#?\n",regname[rt]);
1023 output_w32(0xe2800000|rd_rn_rm(rt,15,0));
1024}
1025
57871462 1026void emit_loadreg(int r, int hr)
1027{
3d624f89 1028#ifdef FORCE32
1029 if(r&64) {
1030 printf("64bit load in 32bit mode!\n");
7f2607ea 1031 assert(0);
1032 return;
3d624f89 1033 }
1034#endif
57871462 1035 if((r&63)==0)
1036 emit_zeroreg(hr);
1037 else {
3d624f89 1038 int addr=((int)reg)+((r&63)<<REG_SHIFT)+((r&64)>>4);
57871462 1039 if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4);
1040 if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4);
1041 if(r==CCREG) addr=(int)&cycle_count;
1042 if(r==CSREG) addr=(int)&Status;
1043 if(r==FSREG) addr=(int)&FCR31;
1044 if(r==INVCP) addr=(int)&invc_ptr;
1045 u_int offset = addr-(u_int)&dynarec_local;
1046 assert(offset<4096);
1047 assem_debug("ldr %s,fp+%d\n",regname[hr],offset);
1048 output_w32(0xe5900000|rd_rn_rm(hr,FP,0)|offset);
1049 }
1050}
1051void emit_storereg(int r, int hr)
1052{
3d624f89 1053#ifdef FORCE32
1054 if(r&64) {
1055 printf("64bit store in 32bit mode!\n");
7f2607ea 1056 assert(0);
1057 return;
3d624f89 1058 }
1059#endif
1060 int addr=((int)reg)+((r&63)<<REG_SHIFT)+((r&64)>>4);
57871462 1061 if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4);
1062 if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4);
1063 if(r==CCREG) addr=(int)&cycle_count;
1064 if(r==FSREG) addr=(int)&FCR31;
1065 u_int offset = addr-(u_int)&dynarec_local;
1066 assert(offset<4096);
1067 assem_debug("str %s,fp+%d\n",regname[hr],offset);
1068 output_w32(0xe5800000|rd_rn_rm(hr,FP,0)|offset);
1069}
1070
1071void emit_test(int rs, int rt)
1072{
1073 assem_debug("tst %s,%s\n",regname[rs],regname[rt]);
1074 output_w32(0xe1100000|rd_rn_rm(0,rs,rt));
1075}
1076
1077void emit_testimm(int rs,int imm)
1078{
1079 u_int armval;
5a05d80c 1080 assem_debug("tst %s,#%d\n",regname[rs],imm);
cfbd3c6e 1081 genimm_checked(imm,&armval);
57871462 1082 output_w32(0xe3100000|rd_rn_rm(0,rs,0)|armval);
1083}
1084
b9b61529 1085void emit_testeqimm(int rs,int imm)
1086{
1087 u_int armval;
1088 assem_debug("tsteq %s,$%d\n",regname[rs],imm);
cfbd3c6e 1089 genimm_checked(imm,&armval);
b9b61529 1090 output_w32(0x03100000|rd_rn_rm(0,rs,0)|armval);
1091}
1092
57871462 1093void emit_not(int rs,int rt)
1094{
1095 assem_debug("mvn %s,%s\n",regname[rt],regname[rs]);
1096 output_w32(0xe1e00000|rd_rn_rm(rt,0,rs));
1097}
1098
b9b61529 1099void emit_mvnmi(int rs,int rt)
1100{
1101 assem_debug("mvnmi %s,%s\n",regname[rt],regname[rs]);
1102 output_w32(0x41e00000|rd_rn_rm(rt,0,rs));
1103}
1104
57871462 1105void emit_and(u_int rs1,u_int rs2,u_int rt)
1106{
1107 assem_debug("and %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1108 output_w32(0xe0000000|rd_rn_rm(rt,rs1,rs2));
1109}
1110
1111void emit_or(u_int rs1,u_int rs2,u_int rt)
1112{
1113 assem_debug("orr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1114 output_w32(0xe1800000|rd_rn_rm(rt,rs1,rs2));
1115}
1116void emit_or_and_set_flags(int rs1,int rs2,int rt)
1117{
1118 assem_debug("orrs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1119 output_w32(0xe1900000|rd_rn_rm(rt,rs1,rs2));
1120}
1121
f70d384d 1122void emit_orrshl_imm(u_int rs,u_int imm,u_int rt)
1123{
1124 assert(rs<16);
1125 assert(rt<16);
1126 assert(imm<32);
1127 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs],imm);
1128 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|(imm<<7));
1129}
1130
576bbd8f 1131void emit_orrshr_imm(u_int rs,u_int imm,u_int rt)
1132{
1133 assert(rs<16);
1134 assert(rt<16);
1135 assert(imm<32);
1136 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs],imm);
1137 output_w32(0xe1800020|rd_rn_rm(rt,rt,rs)|(imm<<7));
1138}
1139
57871462 1140void emit_xor(u_int rs1,u_int rs2,u_int rt)
1141{
1142 assem_debug("eor %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1143 output_w32(0xe0200000|rd_rn_rm(rt,rs1,rs2));
1144}
1145
57871462 1146void emit_addimm(u_int rs,int imm,u_int rt)
1147{
1148 assert(rs<16);
1149 assert(rt<16);
1150 if(imm!=0) {
57871462 1151 u_int armval;
1152 if(genimm(imm,&armval)) {
1153 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],imm);
1154 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
1155 }else if(genimm(-imm,&armval)) {
8a0a8423 1156 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],-imm);
57871462 1157 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
1158 }else if(imm<0) {
ffb0b9e0 1159 assert(imm>-65536);
57871462 1160 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],(-imm)&0xFF00);
1161 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
1162 output_w32(0xe2400000|rd_rn_imm_shift(rt,rs,(-imm)>>8,8));
1163 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
1164 }else{
ffb0b9e0 1165 assert(imm<65536);
57871462 1166 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1167 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
1168 output_w32(0xe2800000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1169 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1170 }
1171 }
1172 else if(rs!=rt) emit_mov(rs,rt);
1173}
1174
1175void emit_addimm_and_set_flags(int imm,int rt)
1176{
1177 assert(imm>-65536&&imm<65536);
1178 u_int armval;
1179 if(genimm(imm,&armval)) {
1180 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm);
1181 output_w32(0xe2900000|rd_rn_rm(rt,rt,0)|armval);
1182 }else if(genimm(-imm,&armval)) {
1183 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],imm);
1184 output_w32(0xe2500000|rd_rn_rm(rt,rt,0)|armval);
1185 }else if(imm<0) {
1186 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF00);
1187 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
1188 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)>>8,8));
1189 output_w32(0xe2500000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
1190 }else{
1191 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF00);
1192 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
1193 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm>>8,8));
1194 output_w32(0xe2900000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1195 }
1196}
1197void emit_addimm_no_flags(u_int imm,u_int rt)
1198{
1199 emit_addimm(rt,imm,rt);
1200}
1201
1202void emit_addnop(u_int r)
1203{
1204 assert(r<16);
1205 assem_debug("add %s,%s,#0 (nop)\n",regname[r],regname[r]);
1206 output_w32(0xe2800000|rd_rn_rm(r,r,0));
1207}
1208
1209void emit_adcimm(u_int rs,int imm,u_int rt)
1210{
1211 u_int armval;
cfbd3c6e 1212 genimm_checked(imm,&armval);
57871462 1213 assem_debug("adc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1214 output_w32(0xe2a00000|rd_rn_rm(rt,rs,0)|armval);
1215}
1216/*void emit_sbcimm(int imm,u_int rt)
1217{
1218 u_int armval;
cfbd3c6e 1219 genimm_checked(imm,&armval);
57871462 1220 assem_debug("sbc %s,%s,#%d\n",regname[rt],regname[rt],imm);
1221 output_w32(0xe2c00000|rd_rn_rm(rt,rt,0)|armval);
1222}*/
1223void emit_sbbimm(int imm,u_int rt)
1224{
1225 assem_debug("sbb $%d,%%%s\n",imm,regname[rt]);
1226 assert(rt<8);
1227 if(imm<128&&imm>=-128) {
1228 output_byte(0x83);
1229 output_modrm(3,rt,3);
1230 output_byte(imm);
1231 }
1232 else
1233 {
1234 output_byte(0x81);
1235 output_modrm(3,rt,3);
1236 output_w32(imm);
1237 }
1238}
1239void emit_rscimm(int rs,int imm,u_int rt)
1240{
1241 assert(0);
1242 u_int armval;
cfbd3c6e 1243 genimm_checked(imm,&armval);
57871462 1244 assem_debug("rsc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1245 output_w32(0xe2e00000|rd_rn_rm(rt,rs,0)|armval);
1246}
1247
1248void emit_addimm64_32(int rsh,int rsl,int imm,int rth,int rtl)
1249{
1250 // TODO: if(genimm(imm,&armval)) ...
1251 // else
1252 emit_movimm(imm,HOST_TEMPREG);
1253 emit_adds(HOST_TEMPREG,rsl,rtl);
1254 emit_adcimm(rsh,0,rth);
1255}
1256
1257void emit_sbb(int rs1,int rs2)
1258{
1259 assem_debug("sbb %%%s,%%%s\n",regname[rs2],regname[rs1]);
1260 output_byte(0x19);
1261 output_modrm(3,rs1,rs2);
1262}
1263
1264void emit_andimm(int rs,int imm,int rt)
1265{
1266 u_int armval;
790ee18e 1267 if(imm==0) {
1268 emit_zeroreg(rt);
1269 }else if(genimm(imm,&armval)) {
57871462 1270 assem_debug("and %s,%s,#%d\n",regname[rt],regname[rs],imm);
1271 output_w32(0xe2000000|rd_rn_rm(rt,rs,0)|armval);
1272 }else if(genimm(~imm,&armval)) {
1273 assem_debug("bic %s,%s,#%d\n",regname[rt],regname[rs],imm);
1274 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|armval);
1275 }else if(imm==65535) {
1276 #ifdef ARMv5_ONLY
1277 assem_debug("bic %s,%s,#FF000000\n",regname[rt],regname[rs]);
1278 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|0x4FF);
1279 assem_debug("bic %s,%s,#00FF0000\n",regname[rt],regname[rt]);
1280 output_w32(0xe3c00000|rd_rn_rm(rt,rt,0)|0x8FF);
1281 #else
1282 assem_debug("uxth %s,%s\n",regname[rt],regname[rs]);
1283 output_w32(0xe6ff0070|rd_rn_rm(rt,0,rs));
1284 #endif
1285 }else{
1286 assert(imm>0&&imm<65535);
1287 #ifdef ARMv5_ONLY
1288 assem_debug("mov r14,#%d\n",imm&0xFF00);
1289 output_w32(0xe3a00000|rd_rn_imm_shift(HOST_TEMPREG,0,imm>>8,8));
1290 assem_debug("add r14,r14,#%d\n",imm&0xFF);
1291 output_w32(0xe2800000|rd_rn_imm_shift(HOST_TEMPREG,HOST_TEMPREG,imm&0xff,0));
1292 #else
1293 emit_movw(imm,HOST_TEMPREG);
1294 #endif
1295 assem_debug("and %s,%s,r14\n",regname[rt],regname[rs]);
1296 output_w32(0xe0000000|rd_rn_rm(rt,rs,HOST_TEMPREG));
1297 }
1298}
1299
1300void emit_orimm(int rs,int imm,int rt)
1301{
1302 u_int armval;
790ee18e 1303 if(imm==0) {
1304 if(rs!=rt) emit_mov(rs,rt);
1305 }else if(genimm(imm,&armval)) {
57871462 1306 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1307 output_w32(0xe3800000|rd_rn_rm(rt,rs,0)|armval);
1308 }else{
1309 assert(imm>0&&imm<65536);
1310 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1311 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
1312 output_w32(0xe3800000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1313 output_w32(0xe3800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1314 }
1315}
1316
1317void emit_xorimm(int rs,int imm,int rt)
1318{
57871462 1319 u_int armval;
790ee18e 1320 if(imm==0) {
1321 if(rs!=rt) emit_mov(rs,rt);
1322 }else if(genimm(imm,&armval)) {
57871462 1323 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm);
1324 output_w32(0xe2200000|rd_rn_rm(rt,rs,0)|armval);
1325 }else{
514ed0d9 1326 assert(imm>0&&imm<65536);
57871462 1327 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1328 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
1329 output_w32(0xe2200000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1330 output_w32(0xe2200000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1331 }
1332}
1333
1334void emit_shlimm(int rs,u_int imm,int rt)
1335{
1336 assert(imm>0);
1337 assert(imm<32);
1338 //if(imm==1) ...
1339 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1340 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1341}
1342
c6c3b1b3 1343void emit_lsls_imm(int rs,int imm,int rt)
1344{
1345 assert(imm>0);
1346 assert(imm<32);
1347 assem_debug("lsls %s,%s,#%d\n",regname[rt],regname[rs],imm);
1348 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1349}
1350
57871462 1351void emit_shrimm(int rs,u_int imm,int rt)
1352{
1353 assert(imm>0);
1354 assert(imm<32);
1355 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1356 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
1357}
1358
1359void emit_sarimm(int rs,u_int imm,int rt)
1360{
1361 assert(imm>0);
1362 assert(imm<32);
1363 assem_debug("asr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1364 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x40|(imm<<7));
1365}
1366
1367void emit_rorimm(int rs,u_int imm,int rt)
1368{
1369 assert(imm>0);
1370 assert(imm<32);
1371 assem_debug("ror %s,%s,#%d\n",regname[rt],regname[rs],imm);
1372 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x60|(imm<<7));
1373}
1374
1375void emit_shldimm(int rs,int rs2,u_int imm,int rt)
1376{
1377 assem_debug("shld %%%s,%%%s,%d\n",regname[rt],regname[rs2],imm);
1378 assert(imm>0);
1379 assert(imm<32);
1380 //if(imm==1) ...
1381 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1382 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1383 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs2],32-imm);
1384 output_w32(0xe1800020|rd_rn_rm(rt,rt,rs2)|((32-imm)<<7));
1385}
1386
1387void emit_shrdimm(int rs,int rs2,u_int imm,int rt)
1388{
1389 assem_debug("shrd %%%s,%%%s,%d\n",regname[rt],regname[rs2],imm);
1390 assert(imm>0);
1391 assert(imm<32);
1392 //if(imm==1) ...
1393 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1394 output_w32(0xe1a00020|rd_rn_rm(rt,0,rs)|(imm<<7));
1395 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs2],32-imm);
1396 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs2)|((32-imm)<<7));
1397}
1398
b9b61529 1399void emit_signextend16(int rs,int rt)
1400{
1401 #ifdef ARMv5_ONLY
1402 emit_shlimm(rs,16,rt);
1403 emit_sarimm(rt,16,rt);
1404 #else
1405 assem_debug("sxth %s,%s\n",regname[rt],regname[rs]);
1406 output_w32(0xe6bf0070|rd_rn_rm(rt,0,rs));
1407 #endif
1408}
1409
c6c3b1b3 1410void emit_signextend8(int rs,int rt)
1411{
1412 #ifdef ARMv5_ONLY
1413 emit_shlimm(rs,24,rt);
1414 emit_sarimm(rt,24,rt);
1415 #else
1416 assem_debug("sxtb %s,%s\n",regname[rt],regname[rs]);
1417 output_w32(0xe6af0070|rd_rn_rm(rt,0,rs));
1418 #endif
1419}
1420
57871462 1421void emit_shl(u_int rs,u_int shift,u_int rt)
1422{
1423 assert(rs<16);
1424 assert(rt<16);
1425 assert(shift<16);
1426 //if(imm==1) ...
1427 assem_debug("lsl %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1428 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x10|(shift<<8));
1429}
1430void emit_shr(u_int rs,u_int shift,u_int rt)
1431{
1432 assert(rs<16);
1433 assert(rt<16);
1434 assert(shift<16);
1435 assem_debug("lsr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1436 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x30|(shift<<8));
1437}
1438void emit_sar(u_int rs,u_int shift,u_int rt)
1439{
1440 assert(rs<16);
1441 assert(rt<16);
1442 assert(shift<16);
1443 assem_debug("asr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1444 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x50|(shift<<8));
1445}
1446void emit_shlcl(int r)
1447{
1448 assem_debug("shl %%%s,%%cl\n",regname[r]);
1449 assert(0);
1450}
1451void emit_shrcl(int r)
1452{
1453 assem_debug("shr %%%s,%%cl\n",regname[r]);
1454 assert(0);
1455}
1456void emit_sarcl(int r)
1457{
1458 assem_debug("sar %%%s,%%cl\n",regname[r]);
1459 assert(0);
1460}
1461
1462void emit_shldcl(int r1,int r2)
1463{
1464 assem_debug("shld %%%s,%%%s,%%cl\n",regname[r1],regname[r2]);
1465 assert(0);
1466}
1467void emit_shrdcl(int r1,int r2)
1468{
1469 assem_debug("shrd %%%s,%%%s,%%cl\n",regname[r1],regname[r2]);
1470 assert(0);
1471}
1472void emit_orrshl(u_int rs,u_int shift,u_int rt)
1473{
1474 assert(rs<16);
1475 assert(rt<16);
1476 assert(shift<16);
1477 assem_debug("orr %s,%s,%s,lsl %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
1478 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x10|(shift<<8));
1479}
1480void emit_orrshr(u_int rs,u_int shift,u_int rt)
1481{
1482 assert(rs<16);
1483 assert(rt<16);
1484 assert(shift<16);
1485 assem_debug("orr %s,%s,%s,lsr %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
1486 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x30|(shift<<8));
1487}
1488
1489void emit_cmpimm(int rs,int imm)
1490{
1491 u_int armval;
1492 if(genimm(imm,&armval)) {
5a05d80c 1493 assem_debug("cmp %s,#%d\n",regname[rs],imm);
57871462 1494 output_w32(0xe3500000|rd_rn_rm(0,rs,0)|armval);
1495 }else if(genimm(-imm,&armval)) {
5a05d80c 1496 assem_debug("cmn %s,#%d\n",regname[rs],imm);
57871462 1497 output_w32(0xe3700000|rd_rn_rm(0,rs,0)|armval);
1498 }else if(imm>0) {
1499 assert(imm<65536);
1500 #ifdef ARMv5_ONLY
1501 emit_movimm(imm,HOST_TEMPREG);
1502 #else
1503 emit_movw(imm,HOST_TEMPREG);
1504 #endif
1505 assem_debug("cmp %s,r14\n",regname[rs]);
1506 output_w32(0xe1500000|rd_rn_rm(0,rs,HOST_TEMPREG));
1507 }else{
1508 assert(imm>-65536);
1509 #ifdef ARMv5_ONLY
1510 emit_movimm(-imm,HOST_TEMPREG);
1511 #else
1512 emit_movw(-imm,HOST_TEMPREG);
1513 #endif
1514 assem_debug("cmn %s,r14\n",regname[rs]);
1515 output_w32(0xe1700000|rd_rn_rm(0,rs,HOST_TEMPREG));
1516 }
1517}
1518
1519void emit_cmovne(u_int *addr,int rt)
1520{
1521 assem_debug("cmovne %x,%%%s",(int)addr,regname[rt]);
1522 assert(0);
1523}
1524void emit_cmovl(u_int *addr,int rt)
1525{
1526 assem_debug("cmovl %x,%%%s",(int)addr,regname[rt]);
1527 assert(0);
1528}
1529void emit_cmovs(u_int *addr,int rt)
1530{
1531 assem_debug("cmovs %x,%%%s",(int)addr,regname[rt]);
1532 assert(0);
1533}
1534void emit_cmovne_imm(int imm,int rt)
1535{
1536 assem_debug("movne %s,#%d\n",regname[rt],imm);
1537 u_int armval;
cfbd3c6e 1538 genimm_checked(imm,&armval);
57871462 1539 output_w32(0x13a00000|rd_rn_rm(rt,0,0)|armval);
1540}
1541void emit_cmovl_imm(int imm,int rt)
1542{
1543 assem_debug("movlt %s,#%d\n",regname[rt],imm);
1544 u_int armval;
cfbd3c6e 1545 genimm_checked(imm,&armval);
57871462 1546 output_w32(0xb3a00000|rd_rn_rm(rt,0,0)|armval);
1547}
1548void emit_cmovb_imm(int imm,int rt)
1549{
1550 assem_debug("movcc %s,#%d\n",regname[rt],imm);
1551 u_int armval;
cfbd3c6e 1552 genimm_checked(imm,&armval);
57871462 1553 output_w32(0x33a00000|rd_rn_rm(rt,0,0)|armval);
1554}
1555void emit_cmovs_imm(int imm,int rt)
1556{
1557 assem_debug("movmi %s,#%d\n",regname[rt],imm);
1558 u_int armval;
cfbd3c6e 1559 genimm_checked(imm,&armval);
57871462 1560 output_w32(0x43a00000|rd_rn_rm(rt,0,0)|armval);
1561}
1562void emit_cmove_reg(int rs,int rt)
1563{
1564 assem_debug("moveq %s,%s\n",regname[rt],regname[rs]);
1565 output_w32(0x01a00000|rd_rn_rm(rt,0,rs));
1566}
1567void emit_cmovne_reg(int rs,int rt)
1568{
1569 assem_debug("movne %s,%s\n",regname[rt],regname[rs]);
1570 output_w32(0x11a00000|rd_rn_rm(rt,0,rs));
1571}
1572void emit_cmovl_reg(int rs,int rt)
1573{
1574 assem_debug("movlt %s,%s\n",regname[rt],regname[rs]);
1575 output_w32(0xb1a00000|rd_rn_rm(rt,0,rs));
1576}
1577void emit_cmovs_reg(int rs,int rt)
1578{
1579 assem_debug("movmi %s,%s\n",regname[rt],regname[rs]);
1580 output_w32(0x41a00000|rd_rn_rm(rt,0,rs));
1581}
1582
1583void emit_slti32(int rs,int imm,int rt)
1584{
1585 if(rs!=rt) emit_zeroreg(rt);
1586 emit_cmpimm(rs,imm);
1587 if(rs==rt) emit_movimm(0,rt);
1588 emit_cmovl_imm(1,rt);
1589}
1590void emit_sltiu32(int rs,int imm,int rt)
1591{
1592 if(rs!=rt) emit_zeroreg(rt);
1593 emit_cmpimm(rs,imm);
1594 if(rs==rt) emit_movimm(0,rt);
1595 emit_cmovb_imm(1,rt);
1596}
1597void emit_slti64_32(int rsh,int rsl,int imm,int rt)
1598{
1599 assert(rsh!=rt);
1600 emit_slti32(rsl,imm,rt);
1601 if(imm>=0)
1602 {
1603 emit_test(rsh,rsh);
1604 emit_cmovne_imm(0,rt);
1605 emit_cmovs_imm(1,rt);
1606 }
1607 else
1608 {
1609 emit_cmpimm(rsh,-1);
1610 emit_cmovne_imm(0,rt);
1611 emit_cmovl_imm(1,rt);
1612 }
1613}
1614void emit_sltiu64_32(int rsh,int rsl,int imm,int rt)
1615{
1616 assert(rsh!=rt);
1617 emit_sltiu32(rsl,imm,rt);
1618 if(imm>=0)
1619 {
1620 emit_test(rsh,rsh);
1621 emit_cmovne_imm(0,rt);
1622 }
1623 else
1624 {
1625 emit_cmpimm(rsh,-1);
1626 emit_cmovne_imm(1,rt);
1627 }
1628}
1629
1630void emit_cmp(int rs,int rt)
1631{
1632 assem_debug("cmp %s,%s\n",regname[rs],regname[rt]);
1633 output_w32(0xe1500000|rd_rn_rm(0,rs,rt));
1634}
1635void emit_set_gz32(int rs, int rt)
1636{
1637 //assem_debug("set_gz32\n");
1638 emit_cmpimm(rs,1);
1639 emit_movimm(1,rt);
1640 emit_cmovl_imm(0,rt);
1641}
1642void emit_set_nz32(int rs, int rt)
1643{
1644 //assem_debug("set_nz32\n");
1645 if(rs!=rt) emit_movs(rs,rt);
1646 else emit_test(rs,rs);
1647 emit_cmovne_imm(1,rt);
1648}
1649void emit_set_gz64_32(int rsh, int rsl, int rt)
1650{
1651 //assem_debug("set_gz64\n");
1652 emit_set_gz32(rsl,rt);
1653 emit_test(rsh,rsh);
1654 emit_cmovne_imm(1,rt);
1655 emit_cmovs_imm(0,rt);
1656}
1657void emit_set_nz64_32(int rsh, int rsl, int rt)
1658{
1659 //assem_debug("set_nz64\n");
1660 emit_or_and_set_flags(rsh,rsl,rt);
1661 emit_cmovne_imm(1,rt);
1662}
1663void emit_set_if_less32(int rs1, int rs2, int rt)
1664{
1665 //assem_debug("set if less (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1666 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1667 emit_cmp(rs1,rs2);
1668 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1669 emit_cmovl_imm(1,rt);
1670}
1671void emit_set_if_carry32(int rs1, int rs2, int rt)
1672{
1673 //assem_debug("set if carry (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1674 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1675 emit_cmp(rs1,rs2);
1676 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1677 emit_cmovb_imm(1,rt);
1678}
1679void emit_set_if_less64_32(int u1, int l1, int u2, int l2, int rt)
1680{
1681 //assem_debug("set if less64 (%%%s,%%%s,%%%s,%%%s),%%%s\n",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]);
1682 assert(u1!=rt);
1683 assert(u2!=rt);
1684 emit_cmp(l1,l2);
1685 emit_movimm(0,rt);
1686 emit_sbcs(u1,u2,HOST_TEMPREG);
1687 emit_cmovl_imm(1,rt);
1688}
1689void emit_set_if_carry64_32(int u1, int l1, int u2, int l2, int rt)
1690{
1691 //assem_debug("set if carry64 (%%%s,%%%s,%%%s,%%%s),%%%s\n",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]);
1692 assert(u1!=rt);
1693 assert(u2!=rt);
1694 emit_cmp(l1,l2);
1695 emit_movimm(0,rt);
1696 emit_sbcs(u1,u2,HOST_TEMPREG);
1697 emit_cmovb_imm(1,rt);
1698}
1699
1700void emit_call(int a)
1701{
1702 assem_debug("bl %x (%x+%x)\n",a,(int)out,a-(int)out-8);
1703 u_int offset=genjmp(a);
1704 output_w32(0xeb000000|offset);
1705}
1706void emit_jmp(int a)
1707{
1708 assem_debug("b %x (%x+%x)\n",a,(int)out,a-(int)out-8);
1709 u_int offset=genjmp(a);
1710 output_w32(0xea000000|offset);
1711}
1712void emit_jne(int a)
1713{
1714 assem_debug("bne %x\n",a);
1715 u_int offset=genjmp(a);
1716 output_w32(0x1a000000|offset);
1717}
1718void emit_jeq(int a)
1719{
1720 assem_debug("beq %x\n",a);
1721 u_int offset=genjmp(a);
1722 output_w32(0x0a000000|offset);
1723}
1724void emit_js(int a)
1725{
1726 assem_debug("bmi %x\n",a);
1727 u_int offset=genjmp(a);
1728 output_w32(0x4a000000|offset);
1729}
1730void emit_jns(int a)
1731{
1732 assem_debug("bpl %x\n",a);
1733 u_int offset=genjmp(a);
1734 output_w32(0x5a000000|offset);
1735}
1736void emit_jl(int a)
1737{
1738 assem_debug("blt %x\n",a);
1739 u_int offset=genjmp(a);
1740 output_w32(0xba000000|offset);
1741}
1742void emit_jge(int a)
1743{
1744 assem_debug("bge %x\n",a);
1745 u_int offset=genjmp(a);
1746 output_w32(0xaa000000|offset);
1747}
1748void emit_jno(int a)
1749{
1750 assem_debug("bvc %x\n",a);
1751 u_int offset=genjmp(a);
1752 output_w32(0x7a000000|offset);
1753}
1754void emit_jc(int a)
1755{
1756 assem_debug("bcs %x\n",a);
1757 u_int offset=genjmp(a);
1758 output_w32(0x2a000000|offset);
1759}
1760void emit_jcc(int a)
1761{
1762 assem_debug("bcc %x\n",a);
1763 u_int offset=genjmp(a);
1764 output_w32(0x3a000000|offset);
1765}
1766
1767void emit_pushimm(int imm)
1768{
1769 assem_debug("push $%x\n",imm);
1770 assert(0);
1771}
1772void emit_pusha()
1773{
1774 assem_debug("pusha\n");
1775 assert(0);
1776}
1777void emit_popa()
1778{
1779 assem_debug("popa\n");
1780 assert(0);
1781}
1782void emit_pushreg(u_int r)
1783{
1784 assem_debug("push %%%s\n",regname[r]);
1785 assert(0);
1786}
1787void emit_popreg(u_int r)
1788{
1789 assem_debug("pop %%%s\n",regname[r]);
1790 assert(0);
1791}
1792void emit_callreg(u_int r)
1793{
c6c3b1b3 1794 assert(r<15);
1795 assem_debug("blx %s\n",regname[r]);
1796 output_w32(0xe12fff30|r);
57871462 1797}
1798void emit_jmpreg(u_int r)
1799{
1800 assem_debug("mov pc,%s\n",regname[r]);
1801 output_w32(0xe1a00000|rd_rn_rm(15,0,r));
1802}
1803
1804void emit_readword_indexed(int offset, int rs, int rt)
1805{
1806 assert(offset>-4096&&offset<4096);
1807 assem_debug("ldr %s,%s+%d\n",regname[rt],regname[rs],offset);
1808 if(offset>=0) {
1809 output_w32(0xe5900000|rd_rn_rm(rt,rs,0)|offset);
1810 }else{
1811 output_w32(0xe5100000|rd_rn_rm(rt,rs,0)|(-offset));
1812 }
1813}
1814void emit_readword_dualindexedx4(int rs1, int rs2, int rt)
1815{
1816 assem_debug("ldr %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1817 output_w32(0xe7900000|rd_rn_rm(rt,rs1,rs2)|0x100);
1818}
c6c3b1b3 1819void emit_ldrcc_dualindexed(int rs1, int rs2, int rt)
1820{
1821 assem_debug("ldrcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1822 output_w32(0x37900000|rd_rn_rm(rt,rs1,rs2));
1823}
1824void emit_ldrccb_dualindexed(int rs1, int rs2, int rt)
1825{
1826 assem_debug("ldrccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1827 output_w32(0x37d00000|rd_rn_rm(rt,rs1,rs2));
1828}
1829void emit_ldrccsb_dualindexed(int rs1, int rs2, int rt)
1830{
1831 assem_debug("ldrccsb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1832 output_w32(0x319000d0|rd_rn_rm(rt,rs1,rs2));
1833}
1834void emit_ldrcch_dualindexed(int rs1, int rs2, int rt)
1835{
1836 assem_debug("ldrcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1837 output_w32(0x319000b0|rd_rn_rm(rt,rs1,rs2));
1838}
1839void emit_ldrccsh_dualindexed(int rs1, int rs2, int rt)
1840{
1841 assem_debug("ldrccsh %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1842 output_w32(0x319000f0|rd_rn_rm(rt,rs1,rs2));
1843}
57871462 1844void emit_readword_indexed_tlb(int addr, int rs, int map, int rt)
1845{
1846 if(map<0) emit_readword_indexed(addr, rs, rt);
1847 else {
1848 assert(addr==0);
1849 emit_readword_dualindexedx4(rs, map, rt);
1850 }
1851}
1852void emit_readdword_indexed_tlb(int addr, int rs, int map, int rh, int rl)
1853{
1854 if(map<0) {
1855 if(rh>=0) emit_readword_indexed(addr, rs, rh);
1856 emit_readword_indexed(addr+4, rs, rl);
1857 }else{
1858 assert(rh!=rs);
1859 if(rh>=0) emit_readword_indexed_tlb(addr, rs, map, rh);
1860 emit_addimm(map,1,map);
1861 emit_readword_indexed_tlb(addr, rs, map, rl);
1862 }
1863}
1864void emit_movsbl_indexed(int offset, int rs, int rt)
1865{
1866 assert(offset>-256&&offset<256);
1867 assem_debug("ldrsb %s,%s+%d\n",regname[rt],regname[rs],offset);
1868 if(offset>=0) {
1869 output_w32(0xe1d000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1870 }else{
1871 output_w32(0xe15000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1872 }
1873}
1874void emit_movsbl_indexed_tlb(int addr, int rs, int map, int rt)
1875{
1876 if(map<0) emit_movsbl_indexed(addr, rs, rt);
1877 else {
1878 if(addr==0) {
1879 emit_shlimm(map,2,map);
1880 assem_debug("ldrsb %s,%s+%s\n",regname[rt],regname[rs],regname[map]);
1881 output_w32(0xe19000d0|rd_rn_rm(rt,rs,map));
1882 }else{
1883 assert(addr>-256&&addr<256);
1884 assem_debug("add %s,%s,%s,lsl #2\n",regname[rt],regname[rs],regname[map]);
1885 output_w32(0xe0800000|rd_rn_rm(rt,rs,map)|(2<<7));
1886 emit_movsbl_indexed(addr, rt, rt);
1887 }
1888 }
1889}
1890void emit_movswl_indexed(int offset, int rs, int rt)
1891{
1892 assert(offset>-256&&offset<256);
1893 assem_debug("ldrsh %s,%s+%d\n",regname[rt],regname[rs],offset);
1894 if(offset>=0) {
1895 output_w32(0xe1d000f0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1896 }else{
1897 output_w32(0xe15000f0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1898 }
1899}
1900void emit_movzbl_indexed(int offset, int rs, int rt)
1901{
1902 assert(offset>-4096&&offset<4096);
1903 assem_debug("ldrb %s,%s+%d\n",regname[rt],regname[rs],offset);
1904 if(offset>=0) {
1905 output_w32(0xe5d00000|rd_rn_rm(rt,rs,0)|offset);
1906 }else{
1907 output_w32(0xe5500000|rd_rn_rm(rt,rs,0)|(-offset));
1908 }
1909}
1910void emit_movzbl_dualindexedx4(int rs1, int rs2, int rt)
1911{
1912 assem_debug("ldrb %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1913 output_w32(0xe7d00000|rd_rn_rm(rt,rs1,rs2)|0x100);
1914}
1915void emit_movzbl_indexed_tlb(int addr, int rs, int map, int rt)
1916{
1917 if(map<0) emit_movzbl_indexed(addr, rs, rt);
1918 else {
1919 if(addr==0) {
1920 emit_movzbl_dualindexedx4(rs, map, rt);
1921 }else{
1922 emit_addimm(rs,addr,rt);
1923 emit_movzbl_dualindexedx4(rt, map, rt);
1924 }
1925 }
1926}
1927void emit_movzwl_indexed(int offset, int rs, int rt)
1928{
1929 assert(offset>-256&&offset<256);
1930 assem_debug("ldrh %s,%s+%d\n",regname[rt],regname[rs],offset);
1931 if(offset>=0) {
1932 output_w32(0xe1d000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1933 }else{
1934 output_w32(0xe15000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1935 }
1936}
054175e9 1937static void emit_ldrd(int offset, int rs, int rt)
1938{
1939 assert(offset>-256&&offset<256);
1940 assem_debug("ldrd %s,%s+%d\n",regname[rt],regname[rs],offset);
1941 if(offset>=0) {
1942 output_w32(0xe1c000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1943 }else{
1944 output_w32(0xe14000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1945 }
1946}
57871462 1947void emit_readword(int addr, int rt)
1948{
1949 u_int offset = addr-(u_int)&dynarec_local;
1950 assert(offset<4096);
1951 assem_debug("ldr %s,fp+%d\n",regname[rt],offset);
1952 output_w32(0xe5900000|rd_rn_rm(rt,FP,0)|offset);
1953}
1954void emit_movsbl(int addr, int rt)
1955{
1956 u_int offset = addr-(u_int)&dynarec_local;
1957 assert(offset<256);
1958 assem_debug("ldrsb %s,fp+%d\n",regname[rt],offset);
1959 output_w32(0xe1d000d0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1960}
1961void emit_movswl(int addr, int rt)
1962{
1963 u_int offset = addr-(u_int)&dynarec_local;
1964 assert(offset<256);
1965 assem_debug("ldrsh %s,fp+%d\n",regname[rt],offset);
1966 output_w32(0xe1d000f0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1967}
1968void emit_movzbl(int addr, int rt)
1969{
1970 u_int offset = addr-(u_int)&dynarec_local;
1971 assert(offset<4096);
1972 assem_debug("ldrb %s,fp+%d\n",regname[rt],offset);
1973 output_w32(0xe5d00000|rd_rn_rm(rt,FP,0)|offset);
1974}
1975void emit_movzwl(int addr, int rt)
1976{
1977 u_int offset = addr-(u_int)&dynarec_local;
1978 assert(offset<256);
1979 assem_debug("ldrh %s,fp+%d\n",regname[rt],offset);
1980 output_w32(0xe1d000b0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1981}
1982void emit_movzwl_reg(int rs, int rt)
1983{
1984 assem_debug("movzwl %%%s,%%%s\n",regname[rs]+1,regname[rt]);
1985 assert(0);
1986}
1987
1988void emit_xchg(int rs, int rt)
1989{
1990 assem_debug("xchg %%%s,%%%s\n",regname[rs],regname[rt]);
1991 assert(0);
1992}
1993void emit_writeword_indexed(int rt, int offset, int rs)
1994{
1995 assert(offset>-4096&&offset<4096);
1996 assem_debug("str %s,%s+%d\n",regname[rt],regname[rs],offset);
1997 if(offset>=0) {
1998 output_w32(0xe5800000|rd_rn_rm(rt,rs,0)|offset);
1999 }else{
2000 output_w32(0xe5000000|rd_rn_rm(rt,rs,0)|(-offset));
2001 }
2002}
2003void emit_writeword_dualindexedx4(int rt, int rs1, int rs2)
2004{
2005 assem_debug("str %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
2006 output_w32(0xe7800000|rd_rn_rm(rt,rs1,rs2)|0x100);
2007}
2008void emit_writeword_indexed_tlb(int rt, int addr, int rs, int map, int temp)
2009{
2010 if(map<0) emit_writeword_indexed(rt, addr, rs);
2011 else {
2012 assert(addr==0);
2013 emit_writeword_dualindexedx4(rt, rs, map);
2014 }
2015}
2016void emit_writedword_indexed_tlb(int rh, int rl, int addr, int rs, int map, int temp)
2017{
2018 if(map<0) {
2019 if(rh>=0) emit_writeword_indexed(rh, addr, rs);
2020 emit_writeword_indexed(rl, addr+4, rs);
2021 }else{
2022 assert(rh>=0);
2023 if(temp!=rs) emit_addimm(map,1,temp);
2024 emit_writeword_indexed_tlb(rh, addr, rs, map, temp);
2025 if(temp!=rs) emit_writeword_indexed_tlb(rl, addr, rs, temp, temp);
2026 else {
2027 emit_addimm(rs,4,rs);
2028 emit_writeword_indexed_tlb(rl, addr, rs, map, temp);
2029 }
2030 }
2031}
2032void emit_writehword_indexed(int rt, int offset, int rs)
2033{
2034 assert(offset>-256&&offset<256);
2035 assem_debug("strh %s,%s+%d\n",regname[rt],regname[rs],offset);
2036 if(offset>=0) {
2037 output_w32(0xe1c000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
2038 }else{
2039 output_w32(0xe14000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
2040 }
2041}
2042void emit_writebyte_indexed(int rt, int offset, int rs)
2043{
2044 assert(offset>-4096&&offset<4096);
2045 assem_debug("strb %s,%s+%d\n",regname[rt],regname[rs],offset);
2046 if(offset>=0) {
2047 output_w32(0xe5c00000|rd_rn_rm(rt,rs,0)|offset);
2048 }else{
2049 output_w32(0xe5400000|rd_rn_rm(rt,rs,0)|(-offset));
2050 }
2051}
2052void emit_writebyte_dualindexedx4(int rt, int rs1, int rs2)
2053{
2054 assem_debug("strb %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
2055 output_w32(0xe7c00000|rd_rn_rm(rt,rs1,rs2)|0x100);
2056}
2057void emit_writebyte_indexed_tlb(int rt, int addr, int rs, int map, int temp)
2058{
2059 if(map<0) emit_writebyte_indexed(rt, addr, rs);
2060 else {
2061 if(addr==0) {
2062 emit_writebyte_dualindexedx4(rt, rs, map);
2063 }else{
2064 emit_addimm(rs,addr,temp);
2065 emit_writebyte_dualindexedx4(rt, temp, map);
2066 }
2067 }
2068}
b96d3df7 2069void emit_strcc_dualindexed(int rs1, int rs2, int rt)
2070{
2071 assem_debug("strcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2072 output_w32(0x37800000|rd_rn_rm(rt,rs1,rs2));
2073}
2074void emit_strccb_dualindexed(int rs1, int rs2, int rt)
2075{
2076 assem_debug("strccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2077 output_w32(0x37c00000|rd_rn_rm(rt,rs1,rs2));
2078}
2079void emit_strcch_dualindexed(int rs1, int rs2, int rt)
2080{
2081 assem_debug("strcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2082 output_w32(0x318000b0|rd_rn_rm(rt,rs1,rs2));
2083}
57871462 2084void emit_writeword(int rt, int addr)
2085{
2086 u_int offset = addr-(u_int)&dynarec_local;
2087 assert(offset<4096);
2088 assem_debug("str %s,fp+%d\n",regname[rt],offset);
2089 output_w32(0xe5800000|rd_rn_rm(rt,FP,0)|offset);
2090}
2091void emit_writehword(int rt, int addr)
2092{
2093 u_int offset = addr-(u_int)&dynarec_local;
2094 assert(offset<256);
2095 assem_debug("strh %s,fp+%d\n",regname[rt],offset);
2096 output_w32(0xe1c000b0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
2097}
2098void emit_writebyte(int rt, int addr)
2099{
2100 u_int offset = addr-(u_int)&dynarec_local;
2101 assert(offset<4096);
74426039 2102 assem_debug("strb %s,fp+%d\n",regname[rt],offset);
57871462 2103 output_w32(0xe5c00000|rd_rn_rm(rt,FP,0)|offset);
2104}
2105void emit_writeword_imm(int imm, int addr)
2106{
2107 assem_debug("movl $%x,%x\n",imm,addr);
2108 assert(0);
2109}
2110void emit_writebyte_imm(int imm, int addr)
2111{
2112 assem_debug("movb $%x,%x\n",imm,addr);
2113 assert(0);
2114}
2115
2116void emit_mul(int rs)
2117{
2118 assem_debug("mul %%%s\n",regname[rs]);
2119 assert(0);
2120}
2121void emit_imul(int rs)
2122{
2123 assem_debug("imul %%%s\n",regname[rs]);
2124 assert(0);
2125}
2126void emit_umull(u_int rs1, u_int rs2, u_int hi, u_int lo)
2127{
2128 assem_debug("umull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
2129 assert(rs1<16);
2130 assert(rs2<16);
2131 assert(hi<16);
2132 assert(lo<16);
2133 output_w32(0xe0800090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
2134}
2135void emit_smull(u_int rs1, u_int rs2, u_int hi, u_int lo)
2136{
2137 assem_debug("smull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
2138 assert(rs1<16);
2139 assert(rs2<16);
2140 assert(hi<16);
2141 assert(lo<16);
2142 output_w32(0xe0c00090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
2143}
2144
2145void emit_div(int rs)
2146{
2147 assem_debug("div %%%s\n",regname[rs]);
2148 assert(0);
2149}
2150void emit_idiv(int rs)
2151{
2152 assem_debug("idiv %%%s\n",regname[rs]);
2153 assert(0);
2154}
2155void emit_cdq()
2156{
2157 assem_debug("cdq\n");
2158 assert(0);
2159}
2160
2161void emit_clz(int rs,int rt)
2162{
2163 assem_debug("clz %s,%s\n",regname[rt],regname[rs]);
2164 output_w32(0xe16f0f10|rd_rn_rm(rt,0,rs));
2165}
2166
2167void emit_subcs(int rs1,int rs2,int rt)
2168{
2169 assem_debug("subcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2170 output_w32(0x20400000|rd_rn_rm(rt,rs1,rs2));
2171}
2172
2173void emit_shrcc_imm(int rs,u_int imm,int rt)
2174{
2175 assert(imm>0);
2176 assert(imm<32);
2177 assem_debug("lsrcc %s,%s,#%d\n",regname[rt],regname[rs],imm);
2178 output_w32(0x31a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
2179}
2180
b1be1eee 2181void emit_shrne_imm(int rs,u_int imm,int rt)
2182{
2183 assert(imm>0);
2184 assert(imm<32);
2185 assem_debug("lsrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2186 output_w32(0x11a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
2187}
2188
57871462 2189void emit_negmi(int rs, int rt)
2190{
2191 assem_debug("rsbmi %s,%s,#0\n",regname[rt],regname[rs]);
2192 output_w32(0x42600000|rd_rn_rm(rt,rs,0));
2193}
2194
2195void emit_negsmi(int rs, int rt)
2196{
2197 assem_debug("rsbsmi %s,%s,#0\n",regname[rt],regname[rs]);
2198 output_w32(0x42700000|rd_rn_rm(rt,rs,0));
2199}
2200
2201void emit_orreq(u_int rs1,u_int rs2,u_int rt)
2202{
2203 assem_debug("orreq %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2204 output_w32(0x01800000|rd_rn_rm(rt,rs1,rs2));
2205}
2206
2207void emit_orrne(u_int rs1,u_int rs2,u_int rt)
2208{
2209 assem_debug("orrne %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2210 output_w32(0x11800000|rd_rn_rm(rt,rs1,rs2));
2211}
2212
2213void emit_bic_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2214{
2215 assem_debug("bic %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2216 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2217}
2218
2219void emit_biceq_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2220{
2221 assem_debug("biceq %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2222 output_w32(0x01C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2223}
2224
2225void emit_bicne_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2226{
2227 assem_debug("bicne %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2228 output_w32(0x11C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2229}
2230
2231void emit_bic_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2232{
2233 assem_debug("bic %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2234 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2235}
2236
2237void emit_biceq_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2238{
2239 assem_debug("biceq %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2240 output_w32(0x01C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2241}
2242
2243void emit_bicne_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2244{
2245 assem_debug("bicne %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2246 output_w32(0x11C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2247}
2248
2249void emit_teq(int rs, int rt)
2250{
2251 assem_debug("teq %s,%s\n",regname[rs],regname[rt]);
2252 output_w32(0xe1300000|rd_rn_rm(0,rs,rt));
2253}
2254
2255void emit_rsbimm(int rs, int imm, int rt)
2256{
2257 u_int armval;
cfbd3c6e 2258 genimm_checked(imm,&armval);
57871462 2259 assem_debug("rsb %s,%s,#%d\n",regname[rt],regname[rs],imm);
2260 output_w32(0xe2600000|rd_rn_rm(rt,rs,0)|armval);
2261}
2262
2263// Load 2 immediates optimizing for small code size
2264void emit_mov2imm_compact(int imm1,u_int rt1,int imm2,u_int rt2)
2265{
2266 emit_movimm(imm1,rt1);
2267 u_int armval;
2268 if(genimm(imm2-imm1,&armval)) {
2269 assem_debug("add %s,%s,#%d\n",regname[rt2],regname[rt1],imm2-imm1);
2270 output_w32(0xe2800000|rd_rn_rm(rt2,rt1,0)|armval);
2271 }else if(genimm(imm1-imm2,&armval)) {
2272 assem_debug("sub %s,%s,#%d\n",regname[rt2],regname[rt1],imm1-imm2);
2273 output_w32(0xe2400000|rd_rn_rm(rt2,rt1,0)|armval);
2274 }
2275 else emit_movimm(imm2,rt2);
2276}
2277
2278// Conditionally select one of two immediates, optimizing for small code size
2279// This will only be called if HAVE_CMOV_IMM is defined
2280void emit_cmov2imm_e_ne_compact(int imm1,int imm2,u_int rt)
2281{
2282 u_int armval;
2283 if(genimm(imm2-imm1,&armval)) {
2284 emit_movimm(imm1,rt);
2285 assem_debug("addne %s,%s,#%d\n",regname[rt],regname[rt],imm2-imm1);
2286 output_w32(0x12800000|rd_rn_rm(rt,rt,0)|armval);
2287 }else if(genimm(imm1-imm2,&armval)) {
2288 emit_movimm(imm1,rt);
2289 assem_debug("subne %s,%s,#%d\n",regname[rt],regname[rt],imm1-imm2);
2290 output_w32(0x12400000|rd_rn_rm(rt,rt,0)|armval);
2291 }
2292 else {
2293 #ifdef ARMv5_ONLY
2294 emit_movimm(imm1,rt);
2295 add_literal((int)out,imm2);
2296 assem_debug("ldrne %s,pc+? [=%x]\n",regname[rt],imm2);
2297 output_w32(0x15900000|rd_rn_rm(rt,15,0));
2298 #else
2299 emit_movw(imm1&0x0000FFFF,rt);
2300 if((imm1&0xFFFF)!=(imm2&0xFFFF)) {
2301 assem_debug("movwne %s,#%d (0x%x)\n",regname[rt],imm2&0xFFFF,imm2&0xFFFF);
2302 output_w32(0x13000000|rd_rn_rm(rt,0,0)|(imm2&0xfff)|((imm2<<4)&0xf0000));
2303 }
2304 emit_movt(imm1&0xFFFF0000,rt);
2305 if((imm1&0xFFFF0000)!=(imm2&0xFFFF0000)) {
2306 assem_debug("movtne %s,#%d (0x%x)\n",regname[rt],imm2&0xffff0000,imm2&0xffff0000);
2307 output_w32(0x13400000|rd_rn_rm(rt,0,0)|((imm2>>16)&0xfff)|((imm2>>12)&0xf0000));
2308 }
2309 #endif
2310 }
2311}
2312
2313// special case for checking invalid_code
2314void emit_cmpmem_indexedsr12_imm(int addr,int r,int imm)
2315{
2316 assert(0);
2317}
2318
2319// special case for checking invalid_code
2320void emit_cmpmem_indexedsr12_reg(int base,int r,int imm)
2321{
2322 assert(imm<128&&imm>=0);
2323 assert(r>=0&&r<16);
2324 assem_debug("ldrb lr,%s,%s lsr #12\n",regname[base],regname[r]);
2325 output_w32(0xe7d00000|rd_rn_rm(HOST_TEMPREG,base,r)|0x620);
2326 emit_cmpimm(HOST_TEMPREG,imm);
2327}
2328
2329// special case for tlb mapping
2330void emit_addsr12(int rs1,int rs2,int rt)
2331{
2332 assem_debug("add %s,%s,%s lsr #12\n",regname[rt],regname[rs1],regname[rs2]);
2333 output_w32(0xe0800620|rd_rn_rm(rt,rs1,rs2));
2334}
2335
0bbd1454 2336void emit_callne(int a)
2337{
2338 assem_debug("blne %x\n",a);
2339 u_int offset=genjmp(a);
2340 output_w32(0x1b000000|offset);
2341}
2342
57871462 2343// Used to preload hash table entries
2344void emit_prefetch(void *addr)
2345{
2346 assem_debug("prefetch %x\n",(int)addr);
2347 output_byte(0x0F);
2348 output_byte(0x18);
2349 output_modrm(0,5,1);
2350 output_w32((int)addr);
2351}
2352void emit_prefetchreg(int r)
2353{
2354 assem_debug("pld %s\n",regname[r]);
2355 output_w32(0xf5d0f000|rd_rn_rm(0,r,0));
2356}
2357
2358// Special case for mini_ht
2359void emit_ldreq_indexed(int rs, u_int offset, int rt)
2360{
2361 assert(offset<4096);
2362 assem_debug("ldreq %s,[%s, #%d]\n",regname[rt],regname[rs],offset);
2363 output_w32(0x05900000|rd_rn_rm(rt,rs,0)|offset);
2364}
2365
2366void emit_flds(int r,int sr)
2367{
2368 assem_debug("flds s%d,[%s]\n",sr,regname[r]);
2369 output_w32(0xed900a00|((sr&14)<<11)|((sr&1)<<22)|(r<<16));
2370}
2371
2372void emit_vldr(int r,int vr)
2373{
2374 assem_debug("vldr d%d,[%s]\n",vr,regname[r]);
2375 output_w32(0xed900b00|(vr<<12)|(r<<16));
2376}
2377
2378void emit_fsts(int sr,int r)
2379{
2380 assem_debug("fsts s%d,[%s]\n",sr,regname[r]);
2381 output_w32(0xed800a00|((sr&14)<<11)|((sr&1)<<22)|(r<<16));
2382}
2383
2384void emit_vstr(int vr,int r)
2385{
2386 assem_debug("vstr d%d,[%s]\n",vr,regname[r]);
2387 output_w32(0xed800b00|(vr<<12)|(r<<16));
2388}
2389
2390void emit_ftosizs(int s,int d)
2391{
2392 assem_debug("ftosizs s%d,s%d\n",d,s);
2393 output_w32(0xeebd0ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2394}
2395
2396void emit_ftosizd(int s,int d)
2397{
2398 assem_debug("ftosizd s%d,d%d\n",d,s);
2399 output_w32(0xeebd0bc0|((d&14)<<11)|((d&1)<<22)|(s&7));
2400}
2401
2402void emit_fsitos(int s,int d)
2403{
2404 assem_debug("fsitos s%d,s%d\n",d,s);
2405 output_w32(0xeeb80ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2406}
2407
2408void emit_fsitod(int s,int d)
2409{
2410 assem_debug("fsitod d%d,s%d\n",d,s);
2411 output_w32(0xeeb80bc0|((d&7)<<12)|((s&14)>>1)|((s&1)<<5));
2412}
2413
2414void emit_fcvtds(int s,int d)
2415{
2416 assem_debug("fcvtds d%d,s%d\n",d,s);
2417 output_w32(0xeeb70ac0|((d&7)<<12)|((s&14)>>1)|((s&1)<<5));
2418}
2419
2420void emit_fcvtsd(int s,int d)
2421{
2422 assem_debug("fcvtsd s%d,d%d\n",d,s);
2423 output_w32(0xeeb70bc0|((d&14)<<11)|((d&1)<<22)|(s&7));
2424}
2425
2426void emit_fsqrts(int s,int d)
2427{
2428 assem_debug("fsqrts d%d,s%d\n",d,s);
2429 output_w32(0xeeb10ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2430}
2431
2432void emit_fsqrtd(int s,int d)
2433{
2434 assem_debug("fsqrtd s%d,d%d\n",d,s);
2435 output_w32(0xeeb10bc0|((d&7)<<12)|(s&7));
2436}
2437
2438void emit_fabss(int s,int d)
2439{
2440 assem_debug("fabss d%d,s%d\n",d,s);
2441 output_w32(0xeeb00ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2442}
2443
2444void emit_fabsd(int s,int d)
2445{
2446 assem_debug("fabsd s%d,d%d\n",d,s);
2447 output_w32(0xeeb00bc0|((d&7)<<12)|(s&7));
2448}
2449
2450void emit_fnegs(int s,int d)
2451{
2452 assem_debug("fnegs d%d,s%d\n",d,s);
2453 output_w32(0xeeb10a40|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2454}
2455
2456void emit_fnegd(int s,int d)
2457{
2458 assem_debug("fnegd s%d,d%d\n",d,s);
2459 output_w32(0xeeb10b40|((d&7)<<12)|(s&7));
2460}
2461
2462void emit_fadds(int s1,int s2,int d)
2463{
2464 assem_debug("fadds s%d,s%d,s%d\n",d,s1,s2);
2465 output_w32(0xee300a00|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2466}
2467
2468void emit_faddd(int s1,int s2,int d)
2469{
2470 assem_debug("faddd d%d,d%d,d%d\n",d,s1,s2);
2471 output_w32(0xee300b00|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2472}
2473
2474void emit_fsubs(int s1,int s2,int d)
2475{
2476 assem_debug("fsubs s%d,s%d,s%d\n",d,s1,s2);
2477 output_w32(0xee300a40|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2478}
2479
2480void emit_fsubd(int s1,int s2,int d)
2481{
2482 assem_debug("fsubd d%d,d%d,d%d\n",d,s1,s2);
2483 output_w32(0xee300b40|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2484}
2485
2486void emit_fmuls(int s1,int s2,int d)
2487{
2488 assem_debug("fmuls s%d,s%d,s%d\n",d,s1,s2);
2489 output_w32(0xee200a00|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2490}
2491
2492void emit_fmuld(int s1,int s2,int d)
2493{
2494 assem_debug("fmuld d%d,d%d,d%d\n",d,s1,s2);
2495 output_w32(0xee200b00|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2496}
2497
2498void emit_fdivs(int s1,int s2,int d)
2499{
2500 assem_debug("fdivs s%d,s%d,s%d\n",d,s1,s2);
2501 output_w32(0xee800a00|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2502}
2503
2504void emit_fdivd(int s1,int s2,int d)
2505{
2506 assem_debug("fdivd d%d,d%d,d%d\n",d,s1,s2);
2507 output_w32(0xee800b00|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2508}
2509
2510void emit_fcmps(int x,int y)
2511{
2512 assem_debug("fcmps s14, s15\n");
2513 output_w32(0xeeb47a67);
2514}
2515
2516void emit_fcmpd(int x,int y)
2517{
2518 assem_debug("fcmpd d6, d7\n");
2519 output_w32(0xeeb46b47);
2520}
2521
2522void emit_fmstat()
2523{
2524 assem_debug("fmstat\n");
2525 output_w32(0xeef1fa10);
2526}
2527
2528void emit_bicne_imm(int rs,int imm,int rt)
2529{
2530 u_int armval;
cfbd3c6e 2531 genimm_checked(imm,&armval);
57871462 2532 assem_debug("bicne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2533 output_w32(0x13c00000|rd_rn_rm(rt,rs,0)|armval);
2534}
2535
2536void emit_biccs_imm(int rs,int imm,int rt)
2537{
2538 u_int armval;
cfbd3c6e 2539 genimm_checked(imm,&armval);
57871462 2540 assem_debug("biccs %s,%s,#%d\n",regname[rt],regname[rs],imm);
2541 output_w32(0x23c00000|rd_rn_rm(rt,rs,0)|armval);
2542}
2543
2544void emit_bicvc_imm(int rs,int imm,int rt)
2545{
2546 u_int armval;
cfbd3c6e 2547 genimm_checked(imm,&armval);
57871462 2548 assem_debug("bicvc %s,%s,#%d\n",regname[rt],regname[rs],imm);
2549 output_w32(0x73c00000|rd_rn_rm(rt,rs,0)|armval);
2550}
2551
2552void emit_bichi_imm(int rs,int imm,int rt)
2553{
2554 u_int armval;
cfbd3c6e 2555 genimm_checked(imm,&armval);
57871462 2556 assem_debug("bichi %s,%s,#%d\n",regname[rt],regname[rs],imm);
2557 output_w32(0x83c00000|rd_rn_rm(rt,rs,0)|armval);
2558}
2559
2560void emit_orrvs_imm(int rs,int imm,int rt)
2561{
2562 u_int armval;
cfbd3c6e 2563 genimm_checked(imm,&armval);
57871462 2564 assem_debug("orrvs %s,%s,#%d\n",regname[rt],regname[rs],imm);
2565 output_w32(0x63800000|rd_rn_rm(rt,rs,0)|armval);
2566}
2567
b9b61529 2568void emit_orrne_imm(int rs,int imm,int rt)
2569{
2570 u_int armval;
cfbd3c6e 2571 genimm_checked(imm,&armval);
b9b61529 2572 assem_debug("orrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2573 output_w32(0x13800000|rd_rn_rm(rt,rs,0)|armval);
2574}
2575
2576void emit_andne_imm(int rs,int imm,int rt)
2577{
2578 u_int armval;
cfbd3c6e 2579 genimm_checked(imm,&armval);
b9b61529 2580 assem_debug("andne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2581 output_w32(0x12000000|rd_rn_rm(rt,rs,0)|armval);
2582}
2583
57871462 2584void emit_jno_unlikely(int a)
2585{
2586 //emit_jno(a);
2587 assem_debug("addvc pc,pc,#? (%x)\n",/*a-(int)out-8,*/a);
2588 output_w32(0x72800000|rd_rn_rm(15,15,0));
2589}
2590
054175e9 2591static void save_regs_all(u_int reglist)
57871462 2592{
054175e9 2593 int i;
57871462 2594 if(!reglist) return;
2595 assem_debug("stmia fp,{");
054175e9 2596 for(i=0;i<16;i++)
2597 if(reglist&(1<<i))
2598 assem_debug("r%d,",i);
57871462 2599 assem_debug("}\n");
2600 output_w32(0xe88b0000|reglist);
2601}
054175e9 2602static void restore_regs_all(u_int reglist)
57871462 2603{
054175e9 2604 int i;
57871462 2605 if(!reglist) return;
2606 assem_debug("ldmia fp,{");
054175e9 2607 for(i=0;i<16;i++)
2608 if(reglist&(1<<i))
2609 assem_debug("r%d,",i);
57871462 2610 assem_debug("}\n");
2611 output_w32(0xe89b0000|reglist);
2612}
054175e9 2613// Save registers before function call
2614static void save_regs(u_int reglist)
2615{
2616 reglist&=0x100f; // only save the caller-save registers, r0-r3, r12
2617 save_regs_all(reglist);
2618}
2619// Restore registers after function call
2620static void restore_regs(u_int reglist)
2621{
2622 reglist&=0x100f; // only restore the caller-save registers, r0-r3, r12
2623 restore_regs_all(reglist);
2624}
57871462 2625
2626// Write back consts using r14 so we don't disturb the other registers
2627void wb_consts(signed char i_regmap[],uint64_t i_is32,u_int i_dirty,int i)
2628{
2629 int hr;
2630 for(hr=0;hr<HOST_REGS;hr++) {
2631 if(hr!=EXCLUDE_REG&&i_regmap[hr]>=0&&((i_dirty>>hr)&1)) {
2632 if(((regs[i].isconst>>hr)&1)&&i_regmap[hr]>0) {
2633 if(i_regmap[hr]<64 || !((i_is32>>(i_regmap[hr]&63))&1) ) {
2634 int value=constmap[i][hr];
2635 if(value==0) {
2636 emit_zeroreg(HOST_TEMPREG);
2637 }
2638 else {
2639 emit_movimm(value,HOST_TEMPREG);
2640 }
2641 emit_storereg(i_regmap[hr],HOST_TEMPREG);
24385cae 2642#ifndef FORCE32
57871462 2643 if((i_is32>>i_regmap[hr])&1) {
2644 if(value!=-1&&value!=0) emit_sarimm(HOST_TEMPREG,31,HOST_TEMPREG);
2645 emit_storereg(i_regmap[hr]|64,HOST_TEMPREG);
2646 }
24385cae 2647#endif
57871462 2648 }
2649 }
2650 }
2651 }
2652}
2653
2654/* Stubs/epilogue */
2655
2656void literal_pool(int n)
2657{
2658 if(!literalcount) return;
2659 if(n) {
2660 if((int)out-literals[0][0]<4096-n) return;
2661 }
2662 u_int *ptr;
2663 int i;
2664 for(i=0;i<literalcount;i++)
2665 {
77750690 2666 u_int l_addr=(u_int)out;
2667 int j;
2668 for(j=0;j<i;j++) {
2669 if(literals[j][1]==literals[i][1]) {
2670 //printf("dup %08x\n",literals[i][1]);
2671 l_addr=literals[j][0];
2672 break;
2673 }
2674 }
57871462 2675 ptr=(u_int *)literals[i][0];
77750690 2676 u_int offset=l_addr-(u_int)ptr-8;
57871462 2677 assert(offset<4096);
2678 assert(!(offset&3));
2679 *ptr|=offset;
77750690 2680 if(l_addr==(u_int)out) {
2681 literals[i][0]=l_addr; // remember for dupes
2682 output_w32(literals[i][1]);
2683 }
57871462 2684 }
2685 literalcount=0;
2686}
2687
2688void literal_pool_jumpover(int n)
2689{
2690 if(!literalcount) return;
2691 if(n) {
2692 if((int)out-literals[0][0]<4096-n) return;
2693 }
2694 int jaddr=(int)out;
2695 emit_jmp(0);
2696 literal_pool(0);
2697 set_jump_target(jaddr,(int)out);
2698}
2699
2700emit_extjump2(int addr, int target, int linker)
2701{
2702 u_char *ptr=(u_char *)addr;
2703 assert((ptr[3]&0x0e)==0xa);
2704 emit_loadlp(target,0);
2705 emit_loadlp(addr,1);
24385cae 2706 assert(addr>=BASE_ADDR&&addr<(BASE_ADDR+(1<<TARGET_SIZE_2)));
57871462 2707 //assert((target>=0x80000000&&target<0x80800000)||(target>0xA4000000&&target<0xA4001000));
2708//DEBUG >
2709#ifdef DEBUG_CYCLE_COUNT
2710 emit_readword((int)&last_count,ECX);
2711 emit_add(HOST_CCREG,ECX,HOST_CCREG);
2712 emit_readword((int)&next_interupt,ECX);
2713 emit_writeword(HOST_CCREG,(int)&Count);
2714 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
2715 emit_writeword(ECX,(int)&last_count);
2716#endif
2717//DEBUG <
2718 emit_jmp(linker);
2719}
2720
2721emit_extjump(int addr, int target)
2722{
2723 emit_extjump2(addr, target, (int)dyna_linker);
2724}
2725emit_extjump_ds(int addr, int target)
2726{
2727 emit_extjump2(addr, target, (int)dyna_linker_ds);
2728}
2729
13e35c04 2730// put rt_val into rt, potentially making use of rs with value rs_val
2731static void emit_movimm_from(u_int rs_val,int rs,u_int rt_val,int rt)
2732{
8575a877 2733 u_int armval;
2734 int diff;
2735 if(genimm(rt_val,&armval)) {
2736 assem_debug("mov %s,#%d\n",regname[rt],rt_val);
2737 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
2738 return;
2739 }
2740 if(genimm(~rt_val,&armval)) {
2741 assem_debug("mvn %s,#%d\n",regname[rt],rt_val);
2742 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
2743 return;
2744 }
2745 diff=rt_val-rs_val;
2746 if(genimm(diff,&armval)) {
2747 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],diff);
2748 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
2749 return;
2750 }else if(genimm(-diff,&armval)) {
2751 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],-diff);
2752 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
2753 return;
2754 }
2755 emit_movimm(rt_val,rt);
2756}
2757
2758// return 1 if above function can do it's job cheaply
2759static int is_similar_value(u_int v1,u_int v2)
2760{
13e35c04 2761 u_int xs;
8575a877 2762 int diff;
2763 if(v1==v2) return 1;
2764 diff=v2-v1;
2765 for(xs=diff;xs!=0&&(xs&3)==0;xs>>=2)
13e35c04 2766 ;
8575a877 2767 if(xs<0x100) return 1;
2768 for(xs=-diff;xs!=0&&(xs&3)==0;xs>>=2)
2769 ;
2770 if(xs<0x100) return 1;
2771 return 0;
13e35c04 2772}
cbbab9cd 2773
b96d3df7 2774// trashes r2
2775static void pass_args(int a0, int a1)
2776{
2777 if(a0==1&&a1==0) {
2778 // must swap
2779 emit_mov(a0,2); emit_mov(a1,1); emit_mov(2,0);
2780 }
2781 else if(a0!=0&&a1==0) {
2782 emit_mov(a1,1);
2783 if (a0>=0) emit_mov(a0,0);
2784 }
2785 else {
2786 if(a0>=0&&a0!=0) emit_mov(a0,0);
2787 if(a1>=0&&a1!=1) emit_mov(a1,1);
2788 }
2789}
2790
b1be1eee 2791static void mov_loadtype_adj(int type,int rs,int rt)
2792{
2793 switch(type) {
2794 case LOADB_STUB: emit_signextend8(rs,rt); break;
2795 case LOADBU_STUB: emit_andimm(rs,0xff,rt); break;
2796 case LOADH_STUB: emit_signextend16(rs,rt); break;
2797 case LOADHU_STUB: emit_andimm(rs,0xffff,rt); break;
2798 case LOADW_STUB: if(rs!=rt) emit_mov(rs,rt); break;
2799 default: assert(0);
2800 }
2801}
2802
2803#ifdef PCSX
2804#include "pcsxmem.h"
2805#include "pcsxmem_inline.c"
2806#endif
2807
57871462 2808do_readstub(int n)
2809{
2810 assem_debug("do_readstub %x\n",start+stubs[n][3]*4);
2811 literal_pool(256);
2812 set_jump_target(stubs[n][1],(int)out);
2813 int type=stubs[n][0];
2814 int i=stubs[n][3];
2815 int rs=stubs[n][4];
2816 struct regstat *i_regs=(struct regstat *)stubs[n][5];
2817 u_int reglist=stubs[n][7];
2818 signed char *i_regmap=i_regs->regmap;
2819 int addr=get_reg(i_regmap,AGEN1+(i&1));
2820 int rth,rt;
2821 int ds;
b9b61529 2822 if(itype[i]==C1LS||itype[i]==C2LS||itype[i]==LOADLR) {
57871462 2823 rth=get_reg(i_regmap,FTEMP|64);
2824 rt=get_reg(i_regmap,FTEMP);
2825 }else{
2826 rth=get_reg(i_regmap,rt1[i]|64);
2827 rt=get_reg(i_regmap,rt1[i]);
2828 }
2829 assert(rs>=0);
c6c3b1b3 2830#ifdef PCSX
2831 int r,temp=-1,temp2=HOST_TEMPREG,regs_saved=0,restore_jump=0;
2832 reglist|=(1<<rs);
2833 for(r=0;r<=12;r++) {
2834 if(((1<<r)&0x13ff)&&((1<<r)&reglist)==0) {
2835 temp=r; break;
2836 }
2837 }
db829eeb 2838 if(rt>=0&&rt1[i]!=0)
c6c3b1b3 2839 reglist&=~(1<<rt);
2840 if(temp==-1) {
2841 save_regs(reglist);
2842 regs_saved=1;
2843 temp=(rs==0)?2:0;
2844 }
2845 if((regs_saved||(reglist&2)==0)&&temp!=1&&rs!=1)
2846 temp2=1;
2847 emit_readword((int)&mem_rtab,temp);
2848 emit_shrimm(rs,12,temp2);
2849 emit_readword_dualindexedx4(temp,temp2,temp2);
2850 emit_lsls_imm(temp2,1,temp2);
2851 if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) {
2852 switch(type) {
2853 case LOADB_STUB: emit_ldrccsb_dualindexed(temp2,rs,rt); break;
2854 case LOADBU_STUB: emit_ldrccb_dualindexed(temp2,rs,rt); break;
2855 case LOADH_STUB: emit_ldrccsh_dualindexed(temp2,rs,rt); break;
2856 case LOADHU_STUB: emit_ldrcch_dualindexed(temp2,rs,rt); break;
2857 case LOADW_STUB: emit_ldrcc_dualindexed(temp2,rs,rt); break;
2858 }
2859 }
2860 if(regs_saved) {
2861 restore_jump=(int)out;
2862 emit_jcc(0); // jump to reg restore
2863 }
2864 else
2865 emit_jcc(stubs[n][2]); // return address
2866
2867 if(!regs_saved)
2868 save_regs(reglist);
2869 int handler=0;
2870 if(type==LOADB_STUB||type==LOADBU_STUB)
2871 handler=(int)jump_handler_read8;
2872 if(type==LOADH_STUB||type==LOADHU_STUB)
2873 handler=(int)jump_handler_read16;
2874 if(type==LOADW_STUB)
2875 handler=(int)jump_handler_read32;
2876 assert(handler!=0);
b96d3df7 2877 pass_args(rs,temp2);
c6c3b1b3 2878 int cc=get_reg(i_regmap,CCREG);
2879 if(cc<0)
2880 emit_loadreg(CCREG,2);
2573466a 2881 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n][6]+1),2);
c6c3b1b3 2882 emit_call(handler);
2883 if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) {
b1be1eee 2884 mov_loadtype_adj(type,0,rt);
c6c3b1b3 2885 }
2886 if(restore_jump)
2887 set_jump_target(restore_jump,(int)out);
2888 restore_regs(reglist);
2889 emit_jmp(stubs[n][2]); // return address
2890#else // !PCSX
57871462 2891 if(addr<0) addr=rt;
535d208a 2892 if(addr<0&&itype[i]!=C1LS&&itype[i]!=C2LS&&itype[i]!=LOADLR) addr=get_reg(i_regmap,-1);
57871462 2893 assert(addr>=0);
2894 int ftable=0;
2895 if(type==LOADB_STUB||type==LOADBU_STUB)
2896 ftable=(int)readmemb;
2897 if(type==LOADH_STUB||type==LOADHU_STUB)
2898 ftable=(int)readmemh;
2899 if(type==LOADW_STUB)
2900 ftable=(int)readmem;
24385cae 2901#ifndef FORCE32
57871462 2902 if(type==LOADD_STUB)
2903 ftable=(int)readmemd;
24385cae 2904#endif
2905 assert(ftable!=0);
57871462 2906 emit_writeword(rs,(int)&address);
2907 //emit_pusha();
2908 save_regs(reglist);
97a238a6 2909#ifndef PCSX
57871462 2910 ds=i_regs!=&regs[i];
2911 int real_rs=(itype[i]==LOADLR)?-1:get_reg(i_regmap,rs1[i]);
2912 u_int cmask=ds?-1:(0x100f|~i_regs->wasconst);
2913 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&0x100f,i);
2914 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty&cmask&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs)));
2915 if(!ds) wb_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&~0x100f,i);
97a238a6 2916#endif
57871462 2917 emit_shrimm(rs,16,1);
2918 int cc=get_reg(i_regmap,CCREG);
2919 if(cc<0) {
2920 emit_loadreg(CCREG,2);
2921 }
2922 emit_movimm(ftable,0);
2923 emit_addimm(cc<0?2:cc,2*stubs[n][6]+2,2);
f51dc36c 2924#ifndef PCSX
57871462 2925 emit_movimm(start+stubs[n][3]*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
f51dc36c 2926#endif
57871462 2927 //emit_readword((int)&last_count,temp);
2928 //emit_add(cc,temp,cc);
2929 //emit_writeword(cc,(int)&Count);
2930 //emit_mov(15,14);
2931 emit_call((int)&indirect_jump_indexed);
2932 //emit_callreg(rs);
2933 //emit_readword_dualindexedx4(rs,HOST_TEMPREG,15);
f51dc36c 2934#ifndef PCSX
57871462 2935 // We really shouldn't need to update the count here,
2936 // but not doing so causes random crashes...
2937 emit_readword((int)&Count,HOST_TEMPREG);
2938 emit_readword((int)&next_interupt,2);
2939 emit_addimm(HOST_TEMPREG,-2*stubs[n][6]-2,HOST_TEMPREG);
2940 emit_writeword(2,(int)&last_count);
2941 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2942 if(cc<0) {
2943 emit_storereg(CCREG,HOST_TEMPREG);
2944 }
f51dc36c 2945#endif
57871462 2946 //emit_popa();
2947 restore_regs(reglist);
2948 //if((cc=get_reg(regmap,CCREG))>=0) {
2949 // emit_loadreg(CCREG,cc);
2950 //}
f18c0f46 2951 if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) {
2952 assert(rt>=0);
2953 if(type==LOADB_STUB)
2954 emit_movsbl((int)&readmem_dword,rt);
2955 if(type==LOADBU_STUB)
2956 emit_movzbl((int)&readmem_dword,rt);
2957 if(type==LOADH_STUB)
2958 emit_movswl((int)&readmem_dword,rt);
2959 if(type==LOADHU_STUB)
2960 emit_movzwl((int)&readmem_dword,rt);
2961 if(type==LOADW_STUB)
2962 emit_readword((int)&readmem_dword,rt);
2963 if(type==LOADD_STUB) {
2964 emit_readword((int)&readmem_dword,rt);
2965 if(rth>=0) emit_readword(((int)&readmem_dword)+4,rth);
2966 }
57871462 2967 }
2968 emit_jmp(stubs[n][2]); // return address
c6c3b1b3 2969#endif // !PCSX
57871462 2970}
2971
c6c3b1b3 2972#ifdef PCSX
2973// return memhandler, or get directly accessable address and return 0
2974u_int get_direct_memhandler(void *table,u_int addr,int type,u_int *addr_host)
2975{
2976 u_int l1,l2=0;
2977 l1=((u_int *)table)[addr>>12];
2978 if((l1&(1<<31))==0) {
2979 u_int v=l1<<1;
2980 *addr_host=v+addr;
2981 return 0;
2982 }
2983 else {
2984 l1<<=1;
2985 if(type==LOADB_STUB||type==LOADBU_STUB||type==STOREB_STUB)
2986 l2=((u_int *)l1)[0x1000/4 + 0x1000/2 + (addr&0xfff)];
b96d3df7 2987 else if(type==LOADH_STUB||type==LOADHU_STUB||type==STOREH_STUB)
c6c3b1b3 2988 l2=((u_int *)l1)[0x1000/4 + (addr&0xfff)/2];
2989 else
2990 l2=((u_int *)l1)[(addr&0xfff)/4];
2991 if((l2&(1<<31))==0) {
2992 u_int v=l2<<1;
2993 *addr_host=v+(addr&0xfff);
2994 return 0;
2995 }
2996 return l2<<1;
2997 }
2998}
2999#endif
3000
57871462 3001inline_readstub(int type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
3002{
3003 int rs=get_reg(regmap,target);
3004 int rth=get_reg(regmap,target|64);
3005 int rt=get_reg(regmap,target);
535d208a 3006 if(rs<0) rs=get_reg(regmap,-1);
57871462 3007 assert(rs>=0);
c6c3b1b3 3008#ifdef PCSX
b1be1eee 3009 u_int handler,host_addr=0,is_dynamic,far_call=0;
3010 int cc=get_reg(regmap,CCREG);
3011 if(pcsx_direct_read(type,addr,CLOCK_ADJUST(adj+1),cc,target?rs:-1,rt))
3012 return;
c6c3b1b3 3013 handler=get_direct_memhandler(mem_rtab,addr,type,&host_addr);
3014 if (handler==0) {
db829eeb 3015 if(rt<0||rt1[i]==0)
c6c3b1b3 3016 return;
13e35c04 3017 if(addr!=host_addr)
3018 emit_movimm_from(addr,rs,host_addr,rs);
c6c3b1b3 3019 switch(type) {
3020 case LOADB_STUB: emit_movsbl_indexed(0,rs,rt); break;
3021 case LOADBU_STUB: emit_movzbl_indexed(0,rs,rt); break;
3022 case LOADH_STUB: emit_movswl_indexed(0,rs,rt); break;
3023 case LOADHU_STUB: emit_movzwl_indexed(0,rs,rt); break;
3024 case LOADW_STUB: emit_readword_indexed(0,rs,rt); break;
3025 default: assert(0);
3026 }
3027 return;
3028 }
b1be1eee 3029 is_dynamic=pcsxmem_is_handler_dynamic(addr);
3030 if(is_dynamic) {
3031 if(type==LOADB_STUB||type==LOADBU_STUB)
3032 handler=(int)jump_handler_read8;
3033 if(type==LOADH_STUB||type==LOADHU_STUB)
3034 handler=(int)jump_handler_read16;
3035 if(type==LOADW_STUB)
3036 handler=(int)jump_handler_read32;
3037 }
c6c3b1b3 3038
3039 // call a memhandler
db829eeb 3040 if(rt>=0&&rt1[i]!=0)
c6c3b1b3 3041 reglist&=~(1<<rt);
3042 save_regs(reglist);
3043 if(target==0)
3044 emit_movimm(addr,0);
3045 else if(rs!=0)
3046 emit_mov(rs,0);
c6c3b1b3 3047 int offset=(int)handler-(int)out-8;
3048 if(offset<-33554432||offset>=33554432) {
3049 // unreachable memhandler, a plugin func perhaps
b1be1eee 3050 emit_movimm(handler,12);
3051 far_call=1;
3052 }
3053 if(cc<0)
3054 emit_loadreg(CCREG,2);
3055 if(is_dynamic) {
3056 emit_movimm(((u_int *)mem_rtab)[addr>>12]<<1,1);
3057 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2);
c6c3b1b3 3058 }
b1be1eee 3059 else {
3060 emit_readword((int)&last_count,3);
3061 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2);
3062 emit_add(2,3,2);
3063 emit_writeword(2,(int)&Count);
3064 }
3065
3066 if(far_call)
3067 emit_callreg(12);
c6c3b1b3 3068 else
3069 emit_call(handler);
b1be1eee 3070
db829eeb 3071 if(rt>=0&&rt1[i]!=0) {
c6c3b1b3 3072 switch(type) {
3073 case LOADB_STUB: emit_signextend8(0,rt); break;
3074 case LOADBU_STUB: emit_andimm(0,0xff,rt); break;
3075 case LOADH_STUB: emit_signextend16(0,rt); break;
3076 case LOADHU_STUB: emit_andimm(0,0xffff,rt); break;
3077 case LOADW_STUB: if(rt!=0) emit_mov(0,rt); break;
3078 default: assert(0);
3079 }
3080 }
3081 restore_regs(reglist);
3082#else // if !PCSX
57871462 3083 int ftable=0;
3084 if(type==LOADB_STUB||type==LOADBU_STUB)
3085 ftable=(int)readmemb;
3086 if(type==LOADH_STUB||type==LOADHU_STUB)
3087 ftable=(int)readmemh;
3088 if(type==LOADW_STUB)
3089 ftable=(int)readmem;
24385cae 3090#ifndef FORCE32
57871462 3091 if(type==LOADD_STUB)
3092 ftable=(int)readmemd;
24385cae 3093#endif
3094 assert(ftable!=0);
fd99c415 3095 if(target==0)
3096 emit_movimm(addr,rs);
57871462 3097 emit_writeword(rs,(int)&address);
3098 //emit_pusha();
3099 save_regs(reglist);
0c1fe38b 3100#ifndef PCSX
3101 if((signed int)addr>=(signed int)0xC0000000) {
3102 // Theoretically we can have a pagefault here, if the TLB has never
3103 // been enabled and the address is outside the range 80000000..BFFFFFFF
3104 // Write out the registers so the pagefault can be handled. This is
3105 // a very rare case and likely represents a bug.
3106 int ds=regmap!=regs[i].regmap;
3107 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty,i);
3108 if(!ds) wb_dirtys(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty);
3109 else wb_dirtys(branch_regs[i-1].regmap_entry,branch_regs[i-1].was32,branch_regs[i-1].wasdirty);
3110 }
3111#endif
57871462 3112 //emit_shrimm(rs,16,1);
3113 int cc=get_reg(regmap,CCREG);
3114 if(cc<0) {
3115 emit_loadreg(CCREG,2);
3116 }
3117 //emit_movimm(ftable,0);
3118 emit_movimm(((u_int *)ftable)[addr>>16],0);
3119 //emit_readword((int)&last_count,12);
2573466a 3120 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2);
f51dc36c 3121#ifndef PCSX
57871462 3122 if((signed int)addr>=(signed int)0xC0000000) {
3123 // Pagefault address
3124 int ds=regmap!=regs[i].regmap;
3125 emit_movimm(start+i*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
3126 }
f51dc36c 3127#endif
57871462 3128 //emit_add(12,2,2);
3129 //emit_writeword(2,(int)&Count);
3130 //emit_call(((u_int *)ftable)[addr>>16]);
3131 emit_call((int)&indirect_jump);
f51dc36c 3132#ifndef PCSX
57871462 3133 // We really shouldn't need to update the count here,
3134 // but not doing so causes random crashes...
3135 emit_readword((int)&Count,HOST_TEMPREG);
3136 emit_readword((int)&next_interupt,2);
2573466a 3137 emit_addimm(HOST_TEMPREG,-CLOCK_ADJUST(adj+1),HOST_TEMPREG);
57871462 3138 emit_writeword(2,(int)&last_count);
3139 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
3140 if(cc<0) {
3141 emit_storereg(CCREG,HOST_TEMPREG);
3142 }
f51dc36c 3143#endif
57871462 3144 //emit_popa();
3145 restore_regs(reglist);
fd99c415 3146 if(rt>=0) {
3147 if(type==LOADB_STUB)
3148 emit_movsbl((int)&readmem_dword,rt);
3149 if(type==LOADBU_STUB)
3150 emit_movzbl((int)&readmem_dword,rt);
3151 if(type==LOADH_STUB)
3152 emit_movswl((int)&readmem_dword,rt);
3153 if(type==LOADHU_STUB)
3154 emit_movzwl((int)&readmem_dword,rt);
3155 if(type==LOADW_STUB)
3156 emit_readword((int)&readmem_dword,rt);
3157 if(type==LOADD_STUB) {
3158 emit_readword((int)&readmem_dword,rt);
3159 if(rth>=0) emit_readword(((int)&readmem_dword)+4,rth);
3160 }
57871462 3161 }
c6c3b1b3 3162#endif // !PCSX
57871462 3163}
3164
3165do_writestub(int n)
3166{
3167 assem_debug("do_writestub %x\n",start+stubs[n][3]*4);
3168 literal_pool(256);
3169 set_jump_target(stubs[n][1],(int)out);
3170 int type=stubs[n][0];
3171 int i=stubs[n][3];
3172 int rs=stubs[n][4];
3173 struct regstat *i_regs=(struct regstat *)stubs[n][5];
3174 u_int reglist=stubs[n][7];
3175 signed char *i_regmap=i_regs->regmap;
3176 int addr=get_reg(i_regmap,AGEN1+(i&1));
3177 int rth,rt,r;
3178 int ds;
b9b61529 3179 if(itype[i]==C1LS||itype[i]==C2LS) {
57871462 3180 rth=get_reg(i_regmap,FTEMP|64);
3181 rt=get_reg(i_regmap,r=FTEMP);
3182 }else{
3183 rth=get_reg(i_regmap,rs2[i]|64);
3184 rt=get_reg(i_regmap,r=rs2[i]);
3185 }
3186 assert(rs>=0);
3187 assert(rt>=0);
b96d3df7 3188#ifdef PCSX
3189 int rtmp,temp=-1,temp2=HOST_TEMPREG,regs_saved=0,restore_jump=0,ra;
3190 int reglist2=reglist|(1<<rs)|(1<<rt);
3191 for(rtmp=0;rtmp<=12;rtmp++) {
3192 if(((1<<rtmp)&0x13ff)&&((1<<rtmp)&reglist2)==0) {
3193 temp=rtmp; break;
3194 }
3195 }
3196 if(temp==-1) {
3197 save_regs(reglist);
3198 regs_saved=1;
3199 for(rtmp=0;rtmp<=3;rtmp++)
3200 if(rtmp!=rs&&rtmp!=rt)
3201 {temp=rtmp;break;}
3202 }
3203 if((regs_saved||(reglist2&8)==0)&&temp!=3&&rs!=3&&rt!=3)
3204 temp2=3;
3205 emit_readword((int)&mem_wtab,temp);
3206 emit_shrimm(rs,12,temp2);
3207 emit_readword_dualindexedx4(temp,temp2,temp2);
3208 emit_lsls_imm(temp2,1,temp2);
3209 switch(type) {
3210 case STOREB_STUB: emit_strccb_dualindexed(temp2,rs,rt); break;
3211 case STOREH_STUB: emit_strcch_dualindexed(temp2,rs,rt); break;
3212 case STOREW_STUB: emit_strcc_dualindexed(temp2,rs,rt); break;
3213 default: assert(0);
3214 }
3215 if(regs_saved) {
3216 restore_jump=(int)out;
3217 emit_jcc(0); // jump to reg restore
3218 }
3219 else
3220 emit_jcc(stubs[n][2]); // return address (invcode check)
3221
3222 if(!regs_saved)
3223 save_regs(reglist);
3224 int handler=0;
3225 switch(type) {
3226 case STOREB_STUB: handler=(int)jump_handler_write8; break;
3227 case STOREH_STUB: handler=(int)jump_handler_write16; break;
3228 case STOREW_STUB: handler=(int)jump_handler_write32; break;
3229 }
3230 assert(handler!=0);
3231 pass_args(rs,rt);
3232 if(temp2!=3)
3233 emit_mov(temp2,3);
3234 int cc=get_reg(i_regmap,CCREG);
3235 if(cc<0)
3236 emit_loadreg(CCREG,2);
2573466a 3237 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n][6]+1),2);
b96d3df7 3238 // returns new cycle_count
3239 emit_call(handler);
2573466a 3240 emit_addimm(0,-CLOCK_ADJUST((int)stubs[n][6]+1),cc<0?2:cc);
b96d3df7 3241 if(cc<0)
3242 emit_storereg(CCREG,2);
3243 if(restore_jump)
3244 set_jump_target(restore_jump,(int)out);
3245 restore_regs(reglist);
3246 ra=stubs[n][2];
b96d3df7 3247 emit_jmp(ra);
3248#else // if !PCSX
57871462 3249 if(addr<0) addr=get_reg(i_regmap,-1);
3250 assert(addr>=0);
3251 int ftable=0;
3252 if(type==STOREB_STUB)
3253 ftable=(int)writememb;
3254 if(type==STOREH_STUB)
3255 ftable=(int)writememh;
3256 if(type==STOREW_STUB)
3257 ftable=(int)writemem;
24385cae 3258#ifndef FORCE32
57871462 3259 if(type==STORED_STUB)
3260 ftable=(int)writememd;
24385cae 3261#endif
3262 assert(ftable!=0);
57871462 3263 emit_writeword(rs,(int)&address);
3264 //emit_shrimm(rs,16,rs);
3265 //emit_movmem_indexedx4(ftable,rs,rs);
3266 if(type==STOREB_STUB)
3267 emit_writebyte(rt,(int)&byte);
3268 if(type==STOREH_STUB)
3269 emit_writehword(rt,(int)&hword);
3270 if(type==STOREW_STUB)
3271 emit_writeword(rt,(int)&word);
3272 if(type==STORED_STUB) {
3d624f89 3273#ifndef FORCE32
57871462 3274 emit_writeword(rt,(int)&dword);
3275 emit_writeword(r?rth:rt,(int)&dword+4);
3d624f89 3276#else
3277 printf("STORED_STUB\n");
3278#endif
57871462 3279 }
3280 //emit_pusha();
3281 save_regs(reglist);
97a238a6 3282#ifndef PCSX
57871462 3283 ds=i_regs!=&regs[i];
3284 int real_rs=get_reg(i_regmap,rs1[i]);
3285 u_int cmask=ds?-1:(0x100f|~i_regs->wasconst);
3286 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&0x100f,i);
3287 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty&cmask&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs)));
3288 if(!ds) wb_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&~0x100f,i);
97a238a6 3289#endif
57871462 3290 emit_shrimm(rs,16,1);
3291 int cc=get_reg(i_regmap,CCREG);
3292 if(cc<0) {
3293 emit_loadreg(CCREG,2);
3294 }
3295 emit_movimm(ftable,0);
3296 emit_addimm(cc<0?2:cc,2*stubs[n][6]+2,2);
f51dc36c 3297#ifndef PCSX
57871462 3298 emit_movimm(start+stubs[n][3]*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
f51dc36c 3299#endif
57871462 3300 //emit_readword((int)&last_count,temp);
3301 //emit_addimm(cc,2*stubs[n][5]+2,cc);
3302 //emit_add(cc,temp,cc);
3303 //emit_writeword(cc,(int)&Count);
3304 emit_call((int)&indirect_jump_indexed);
3305 //emit_callreg(rs);
3306 emit_readword((int)&Count,HOST_TEMPREG);
3307 emit_readword((int)&next_interupt,2);
3308 emit_addimm(HOST_TEMPREG,-2*stubs[n][6]-2,HOST_TEMPREG);
3309 emit_writeword(2,(int)&last_count);
3310 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
3311 if(cc<0) {
3312 emit_storereg(CCREG,HOST_TEMPREG);
3313 }
3314 //emit_popa();
3315 restore_regs(reglist);
3316 //if((cc=get_reg(regmap,CCREG))>=0) {
3317 // emit_loadreg(CCREG,cc);
3318 //}
3319 emit_jmp(stubs[n][2]); // return address
b96d3df7 3320#endif // !PCSX
57871462 3321}
3322
3323inline_writestub(int type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
3324{
3325 int rs=get_reg(regmap,-1);
3326 int rth=get_reg(regmap,target|64);
3327 int rt=get_reg(regmap,target);
3328 assert(rs>=0);
3329 assert(rt>=0);
cbbab9cd 3330#ifdef PCSX
b96d3df7 3331 u_int handler,host_addr=0;
b96d3df7 3332 handler=get_direct_memhandler(mem_wtab,addr,type,&host_addr);
3333 if (handler==0) {
13e35c04 3334 if(addr!=host_addr)
3335 emit_movimm_from(addr,rs,host_addr,rs);
b96d3df7 3336 switch(type) {
3337 case STOREB_STUB: emit_writebyte_indexed(rt,0,rs); break;
3338 case STOREH_STUB: emit_writehword_indexed(rt,0,rs); break;
3339 case STOREW_STUB: emit_writeword_indexed(rt,0,rs); break;
3340 default: assert(0);
3341 }
3342 return;
3343 }
3344
3345 // call a memhandler
3346 save_regs(reglist);
13e35c04 3347 pass_args(rs,rt);
b96d3df7 3348 int cc=get_reg(regmap,CCREG);
3349 if(cc<0)
3350 emit_loadreg(CCREG,2);
2573466a 3351 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2);
b96d3df7 3352 emit_movimm(handler,3);
3353 // returns new cycle_count
3354 emit_call((int)jump_handler_write_h);
2573466a 3355 emit_addimm(0,-CLOCK_ADJUST(adj+1),cc<0?2:cc);
b96d3df7 3356 if(cc<0)
3357 emit_storereg(CCREG,2);
3358 restore_regs(reglist);
3359#else // if !pcsx
57871462 3360 int ftable=0;
3361 if(type==STOREB_STUB)
3362 ftable=(int)writememb;
3363 if(type==STOREH_STUB)
3364 ftable=(int)writememh;
3365 if(type==STOREW_STUB)
3366 ftable=(int)writemem;
24385cae 3367#ifndef FORCE32
57871462 3368 if(type==STORED_STUB)
3369 ftable=(int)writememd;
24385cae 3370#endif
3371 assert(ftable!=0);
57871462 3372 emit_writeword(rs,(int)&address);
3373 //emit_shrimm(rs,16,rs);
3374 //emit_movmem_indexedx4(ftable,rs,rs);
3375 if(type==STOREB_STUB)
3376 emit_writebyte(rt,(int)&byte);
3377 if(type==STOREH_STUB)
3378 emit_writehword(rt,(int)&hword);
3379 if(type==STOREW_STUB)
3380 emit_writeword(rt,(int)&word);
3381 if(type==STORED_STUB) {
3d624f89 3382#ifndef FORCE32
57871462 3383 emit_writeword(rt,(int)&dword);
3384 emit_writeword(target?rth:rt,(int)&dword+4);
3d624f89 3385#else
3386 printf("STORED_STUB\n");
3387#endif
57871462 3388 }
3389 //emit_pusha();
3390 save_regs(reglist);
0c1fe38b 3391#ifndef PCSX
3392 // rearmed note: load_all_consts prevents BIOS boot, some bug?
3393 if((signed int)addr>=(signed int)0xC0000000) {
3394 // Theoretically we can have a pagefault here, if the TLB has never
3395 // been enabled and the address is outside the range 80000000..BFFFFFFF
3396 // Write out the registers so the pagefault can be handled. This is
3397 // a very rare case and likely represents a bug.
3398 int ds=regmap!=regs[i].regmap;
3399 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty,i);
3400 if(!ds) wb_dirtys(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty);
3401 else wb_dirtys(branch_regs[i-1].regmap_entry,branch_regs[i-1].was32,branch_regs[i-1].wasdirty);
3402 }
3403#endif
57871462 3404 //emit_shrimm(rs,16,1);
3405 int cc=get_reg(regmap,CCREG);
3406 if(cc<0) {
3407 emit_loadreg(CCREG,2);
3408 }
3409 //emit_movimm(ftable,0);
3410 emit_movimm(((u_int *)ftable)[addr>>16],0);
3411 //emit_readword((int)&last_count,12);
2573466a 3412 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2);
f51dc36c 3413#ifndef PCSX
57871462 3414 if((signed int)addr>=(signed int)0xC0000000) {
3415 // Pagefault address
3416 int ds=regmap!=regs[i].regmap;
3417 emit_movimm(start+i*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
3418 }
f51dc36c 3419#endif
57871462 3420 //emit_add(12,2,2);
3421 //emit_writeword(2,(int)&Count);
3422 //emit_call(((u_int *)ftable)[addr>>16]);
3423 emit_call((int)&indirect_jump);
3424 emit_readword((int)&Count,HOST_TEMPREG);
3425 emit_readword((int)&next_interupt,2);
2573466a 3426 emit_addimm(HOST_TEMPREG,-CLOCK_ADJUST(adj+1),HOST_TEMPREG);
57871462 3427 emit_writeword(2,(int)&last_count);
3428 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
3429 if(cc<0) {
3430 emit_storereg(CCREG,HOST_TEMPREG);
3431 }
3432 //emit_popa();
3433 restore_regs(reglist);
b96d3df7 3434#endif
57871462 3435}
3436
3437do_unalignedwritestub(int n)
3438{
b7918751 3439 assem_debug("do_unalignedwritestub %x\n",start+stubs[n][3]*4);
3440 literal_pool(256);
57871462 3441 set_jump_target(stubs[n][1],(int)out);
b7918751 3442
3443 int i=stubs[n][3];
3444 struct regstat *i_regs=(struct regstat *)stubs[n][4];
3445 int addr=stubs[n][5];
3446 u_int reglist=stubs[n][7];
3447 signed char *i_regmap=i_regs->regmap;
3448 int temp2=get_reg(i_regmap,FTEMP);
3449 int rt;
3450 int ds, real_rs;
3451 rt=get_reg(i_regmap,rs2[i]);
3452 assert(rt>=0);
3453 assert(addr>=0);
3454 assert(opcode[i]==0x2a||opcode[i]==0x2e); // SWL/SWR only implemented
3455 reglist|=(1<<addr);
3456 reglist&=~(1<<temp2);
3457
b96d3df7 3458#if 1
3459 // don't bother with it and call write handler
3460 save_regs(reglist);
3461 pass_args(addr,rt);
3462 int cc=get_reg(i_regmap,CCREG);
3463 if(cc<0)
3464 emit_loadreg(CCREG,2);
2573466a 3465 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n][6]+1),2);
b96d3df7 3466 emit_call((int)(opcode[i]==0x2a?jump_handle_swl:jump_handle_swr));
2573466a 3467 emit_addimm(0,-CLOCK_ADJUST((int)stubs[n][6]+1),cc<0?2:cc);
b96d3df7 3468 if(cc<0)
3469 emit_storereg(CCREG,2);
3470 restore_regs(reglist);
3471 emit_jmp(stubs[n][2]); // return address
3472#else
b7918751 3473 emit_andimm(addr,0xfffffffc,temp2);
3474 emit_writeword(temp2,(int)&address);
3475
3476 save_regs(reglist);
97a238a6 3477#ifndef PCSX
b7918751 3478 ds=i_regs!=&regs[i];
3479 real_rs=get_reg(i_regmap,rs1[i]);
3480 u_int cmask=ds?-1:(0x100f|~i_regs->wasconst);
3481 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&0x100f,i);
3482 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty&cmask&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs)));
3483 if(!ds) wb_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&~0x100f,i);
97a238a6 3484#endif
b7918751 3485 emit_shrimm(addr,16,1);
3486 int cc=get_reg(i_regmap,CCREG);
3487 if(cc<0) {
3488 emit_loadreg(CCREG,2);
3489 }
3490 emit_movimm((u_int)readmem,0);
3491 emit_addimm(cc<0?2:cc,2*stubs[n][6]+2,2);
f51dc36c 3492#ifndef PCSX
3493 // pagefault address
3494 emit_movimm(start+stubs[n][3]*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
3495#endif
b7918751 3496 emit_call((int)&indirect_jump_indexed);
3497 restore_regs(reglist);
3498
3499 emit_readword((int)&readmem_dword,temp2);
3500 int temp=addr; //hmh
3501 emit_shlimm(addr,3,temp);
3502 emit_andimm(temp,24,temp);
3503#ifdef BIG_ENDIAN_MIPS
3504 if (opcode[i]==0x2e) // SWR
3505#else
3506 if (opcode[i]==0x2a) // SWL
3507#endif
3508 emit_xorimm(temp,24,temp);
3509 emit_movimm(-1,HOST_TEMPREG);
55439448 3510 if (opcode[i]==0x2a) { // SWL
b7918751 3511 emit_bic_lsr(temp2,HOST_TEMPREG,temp,temp2);
3512 emit_orrshr(rt,temp,temp2);
3513 }else{
3514 emit_bic_lsl(temp2,HOST_TEMPREG,temp,temp2);
3515 emit_orrshl(rt,temp,temp2);
3516 }
3517 emit_readword((int)&address,addr);
3518 emit_writeword(temp2,(int)&word);
3519 //save_regs(reglist); // don't need to, no state changes
3520 emit_shrimm(addr,16,1);
3521 emit_movimm((u_int)writemem,0);
3522 //emit_call((int)&indirect_jump_indexed);
3523 emit_mov(15,14);
3524 emit_readword_dualindexedx4(0,1,15);
3525 emit_readword((int)&Count,HOST_TEMPREG);
3526 emit_readword((int)&next_interupt,2);
3527 emit_addimm(HOST_TEMPREG,-2*stubs[n][6]-2,HOST_TEMPREG);
3528 emit_writeword(2,(int)&last_count);
3529 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
3530 if(cc<0) {
3531 emit_storereg(CCREG,HOST_TEMPREG);
3532 }
3533 restore_regs(reglist);
57871462 3534 emit_jmp(stubs[n][2]); // return address
b96d3df7 3535#endif
57871462 3536}
3537
3538void printregs(int edi,int esi,int ebp,int esp,int b,int d,int c,int a)
3539{
3540 printf("regs: %x %x %x %x %x %x %x (%x)\n",a,b,c,d,ebp,esi,edi,(&edi)[-1]);
3541}
3542
3543do_invstub(int n)
3544{
3545 literal_pool(20);
3546 u_int reglist=stubs[n][3];
3547 set_jump_target(stubs[n][1],(int)out);
3548 save_regs(reglist);
3549 if(stubs[n][4]!=0) emit_mov(stubs[n][4],0);
3550 emit_call((int)&invalidate_addr);
3551 restore_regs(reglist);
3552 emit_jmp(stubs[n][2]); // return address
3553}
3554
3555int do_dirty_stub(int i)
3556{
3557 assem_debug("do_dirty_stub %x\n",start+i*4);
ac545b3a 3558 u_int addr=(int)start<(int)0xC0000000?(u_int)source:(u_int)start;
3559 #ifdef PCSX
3560 addr=(u_int)source;
3561 #endif
57871462 3562 // Careful about the code output here, verify_dirty needs to parse it.
3563 #ifdef ARMv5_ONLY
ac545b3a 3564 emit_loadlp(addr,1);
57871462 3565 emit_loadlp((int)copy,2);
3566 emit_loadlp(slen*4,3);
3567 #else
ac545b3a 3568 emit_movw(addr&0x0000FFFF,1);
57871462 3569 emit_movw(((u_int)copy)&0x0000FFFF,2);
ac545b3a 3570 emit_movt(addr&0xFFFF0000,1);
57871462 3571 emit_movt(((u_int)copy)&0xFFFF0000,2);
3572 emit_movw(slen*4,3);
3573 #endif
3574 emit_movimm(start+i*4,0);
3575 emit_call((int)start<(int)0xC0000000?(int)&verify_code:(int)&verify_code_vm);
3576 int entry=(int)out;
3577 load_regs_entry(i);
3578 if(entry==(int)out) entry=instr_addr[i];
3579 emit_jmp(instr_addr[i]);
3580 return entry;
3581}
3582
3583void do_dirty_stub_ds()
3584{
3585 // Careful about the code output here, verify_dirty needs to parse it.
3586 #ifdef ARMv5_ONLY
3587 emit_loadlp((int)start<(int)0xC0000000?(int)source:(int)start,1);
3588 emit_loadlp((int)copy,2);
3589 emit_loadlp(slen*4,3);
3590 #else
3591 emit_movw(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0x0000FFFF,1);
3592 emit_movw(((u_int)copy)&0x0000FFFF,2);
3593 emit_movt(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0xFFFF0000,1);
3594 emit_movt(((u_int)copy)&0xFFFF0000,2);
3595 emit_movw(slen*4,3);
3596 #endif
3597 emit_movimm(start+1,0);
3598 emit_call((int)&verify_code_ds);
3599}
3600
3601do_cop1stub(int n)
3602{
3603 literal_pool(256);
3604 assem_debug("do_cop1stub %x\n",start+stubs[n][3]*4);
3605 set_jump_target(stubs[n][1],(int)out);
3606 int i=stubs[n][3];
3d624f89 3607// int rs=stubs[n][4];
57871462 3608 struct regstat *i_regs=(struct regstat *)stubs[n][5];
3609 int ds=stubs[n][6];
3610 if(!ds) {
3611 load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty,i);
3612 //if(i_regs!=&regs[i]) printf("oops: regs[i]=%x i_regs=%x",(int)&regs[i],(int)i_regs);
3613 }
3614 //else {printf("fp exception in delay slot\n");}
3615 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty);
3616 if(regs[i].regmap_entry[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
3617 emit_movimm(start+(i-ds)*4,EAX); // Get PC
2573466a 3618 emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]),HOST_CCREG); // CHECK: is this right? There should probably be an extra cycle...
57871462 3619 emit_jmp(ds?(int)fp_exception_ds:(int)fp_exception);
3620}
3621
63cb0298 3622#ifndef DISABLE_TLB
3623
57871462 3624/* TLB */
3625
3626int do_tlb_r(int s,int ar,int map,int x,int a,int shift,int c,u_int addr)
3627{
3628 if(c) {
3629 if((signed int)addr>=(signed int)0xC0000000) {
3630 // address_generation already loaded the const
3631 emit_readword_dualindexedx4(FP,map,map);
3632 }
3633 else
3634 return -1; // No mapping
3635 }
3636 else {
3637 assert(s!=map);
3638 emit_movimm(((int)memory_map-(int)&dynarec_local)>>2,map);
3639 emit_addsr12(map,s,map);
3640 // Schedule this while we wait on the load
3641 //if(x) emit_xorimm(s,x,ar);
3642 if(shift>=0) emit_shlimm(s,3,shift);
3643 if(~a) emit_andimm(s,a,ar);
3644 emit_readword_dualindexedx4(FP,map,map);
3645 }
3646 return map;
3647}
3648int do_tlb_r_branch(int map, int c, u_int addr, int *jaddr)
3649{
3650 if(!c||(signed int)addr>=(signed int)0xC0000000) {
3651 emit_test(map,map);
3652 *jaddr=(int)out;
3653 emit_js(0);
3654 }
3655 return map;
3656}
3657
3658int gen_tlb_addr_r(int ar, int map) {
3659 if(map>=0) {
3660 assem_debug("add %s,%s,%s lsl #2\n",regname[ar],regname[ar],regname[map]);
3661 output_w32(0xe0800100|rd_rn_rm(ar,ar,map));
3662 }
3663}
3664
3665int do_tlb_w(int s,int ar,int map,int x,int c,u_int addr)
3666{
3667 if(c) {
3668 if(addr<0x80800000||addr>=0xC0000000) {
3669 // address_generation already loaded the const
3670 emit_readword_dualindexedx4(FP,map,map);
3671 }
3672 else
3673 return -1; // No mapping
3674 }
3675 else {
3676 assert(s!=map);
3677 emit_movimm(((int)memory_map-(int)&dynarec_local)>>2,map);
3678 emit_addsr12(map,s,map);
3679 // Schedule this while we wait on the load
3680 //if(x) emit_xorimm(s,x,ar);
3681 emit_readword_dualindexedx4(FP,map,map);
3682 }
3683 return map;
3684}
3685int do_tlb_w_branch(int map, int c, u_int addr, int *jaddr)
3686{
3687 if(!c||addr<0x80800000||addr>=0xC0000000) {
3688 emit_testimm(map,0x40000000);
3689 *jaddr=(int)out;
3690 emit_jne(0);
3691 }
3692}
3693
3694int gen_tlb_addr_w(int ar, int map) {
3695 if(map>=0) {
3696 assem_debug("add %s,%s,%s lsl #2\n",regname[ar],regname[ar],regname[map]);
3697 output_w32(0xe0800100|rd_rn_rm(ar,ar,map));
3698 }
3699}
3700
3701// Generate the address of the memory_map entry, relative to dynarec_local
3702generate_map_const(u_int addr,int reg) {
3703 //printf("generate_map_const(%x,%s)\n",addr,regname[reg]);
3704 emit_movimm((addr>>12)+(((u_int)memory_map-(u_int)&dynarec_local)>>2),reg);
3705}
3706
63cb0298 3707#else
3708
3709static int do_tlb_r() { return 0; }
3710static int do_tlb_r_branch() { return 0; }
3711static int gen_tlb_addr_r() { return 0; }
3712static int do_tlb_w() { return 0; }
3713static int do_tlb_w_branch() { return 0; }
3714static int gen_tlb_addr_w() { return 0; }
3715
3716#endif // DISABLE_TLB
3717
57871462 3718/* Special assem */
3719
3720void shift_assemble_arm(int i,struct regstat *i_regs)
3721{
3722 if(rt1[i]) {
3723 if(opcode2[i]<=0x07) // SLLV/SRLV/SRAV
3724 {
3725 signed char s,t,shift;
3726 t=get_reg(i_regs->regmap,rt1[i]);
3727 s=get_reg(i_regs->regmap,rs1[i]);
3728 shift=get_reg(i_regs->regmap,rs2[i]);
3729 if(t>=0){
3730 if(rs1[i]==0)
3731 {
3732 emit_zeroreg(t);
3733 }
3734 else if(rs2[i]==0)
3735 {
3736 assert(s>=0);
3737 if(s!=t) emit_mov(s,t);
3738 }
3739 else
3740 {
3741 emit_andimm(shift,31,HOST_TEMPREG);
3742 if(opcode2[i]==4) // SLLV
3743 {
3744 emit_shl(s,HOST_TEMPREG,t);
3745 }
3746 if(opcode2[i]==6) // SRLV
3747 {
3748 emit_shr(s,HOST_TEMPREG,t);
3749 }
3750 if(opcode2[i]==7) // SRAV
3751 {
3752 emit_sar(s,HOST_TEMPREG,t);
3753 }
3754 }
3755 }
3756 } else { // DSLLV/DSRLV/DSRAV
3757 signed char sh,sl,th,tl,shift;
3758 th=get_reg(i_regs->regmap,rt1[i]|64);
3759 tl=get_reg(i_regs->regmap,rt1[i]);
3760 sh=get_reg(i_regs->regmap,rs1[i]|64);
3761 sl=get_reg(i_regs->regmap,rs1[i]);
3762 shift=get_reg(i_regs->regmap,rs2[i]);
3763 if(tl>=0){
3764 if(rs1[i]==0)
3765 {
3766 emit_zeroreg(tl);
3767 if(th>=0) emit_zeroreg(th);
3768 }
3769 else if(rs2[i]==0)
3770 {
3771 assert(sl>=0);
3772 if(sl!=tl) emit_mov(sl,tl);
3773 if(th>=0&&sh!=th) emit_mov(sh,th);
3774 }
3775 else
3776 {
3777 // FIXME: What if shift==tl ?
3778 assert(shift!=tl);
3779 int temp=get_reg(i_regs->regmap,-1);
3780 int real_th=th;
3781 if(th<0&&opcode2[i]!=0x14) {th=temp;} // DSLLV doesn't need a temporary register
3782 assert(sl>=0);
3783 assert(sh>=0);
3784 emit_andimm(shift,31,HOST_TEMPREG);
3785 if(opcode2[i]==0x14) // DSLLV
3786 {
3787 if(th>=0) emit_shl(sh,HOST_TEMPREG,th);
3788 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3789 emit_orrshr(sl,HOST_TEMPREG,th);
3790 emit_andimm(shift,31,HOST_TEMPREG);
3791 emit_testimm(shift,32);
3792 emit_shl(sl,HOST_TEMPREG,tl);
3793 if(th>=0) emit_cmovne_reg(tl,th);
3794 emit_cmovne_imm(0,tl);
3795 }
3796 if(opcode2[i]==0x16) // DSRLV
3797 {
3798 assert(th>=0);
3799 emit_shr(sl,HOST_TEMPREG,tl);
3800 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3801 emit_orrshl(sh,HOST_TEMPREG,tl);
3802 emit_andimm(shift,31,HOST_TEMPREG);
3803 emit_testimm(shift,32);
3804 emit_shr(sh,HOST_TEMPREG,th);
3805 emit_cmovne_reg(th,tl);
3806 if(real_th>=0) emit_cmovne_imm(0,th);
3807 }
3808 if(opcode2[i]==0x17) // DSRAV
3809 {
3810 assert(th>=0);
3811 emit_shr(sl,HOST_TEMPREG,tl);
3812 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3813 if(real_th>=0) {
3814 assert(temp>=0);
3815 emit_sarimm(th,31,temp);
3816 }
3817 emit_orrshl(sh,HOST_TEMPREG,tl);
3818 emit_andimm(shift,31,HOST_TEMPREG);
3819 emit_testimm(shift,32);
3820 emit_sar(sh,HOST_TEMPREG,th);
3821 emit_cmovne_reg(th,tl);
3822 if(real_th>=0) emit_cmovne_reg(temp,th);
3823 }
3824 }
3825 }
3826 }
3827 }
3828}
ffb0b9e0 3829
3830#ifdef PCSX
3831static void speculate_mov(int rs,int rt)
3832{
3833 if(rt!=0) {
3834 smrv_strong_next|=1<<rt;
3835 smrv[rt]=smrv[rs];
3836 }
3837}
3838
3839static void speculate_mov_weak(int rs,int rt)
3840{
3841 if(rt!=0) {
3842 smrv_weak_next|=1<<rt;
3843 smrv[rt]=smrv[rs];
3844 }
3845}
3846
3847static void speculate_register_values(int i)
3848{
3849 if(i==0) {
3850 memcpy(smrv,psxRegs.GPR.r,sizeof(smrv));
3851 // gp,sp are likely to stay the same throughout the block
3852 smrv_strong_next=(1<<28)|(1<<29)|(1<<30);
3853 smrv_weak_next=~smrv_strong_next;
3854 //printf(" llr %08x\n", smrv[4]);
3855 }
3856 smrv_strong=smrv_strong_next;
3857 smrv_weak=smrv_weak_next;
3858 switch(itype[i]) {
3859 case ALU:
3860 if ((smrv_strong>>rs1[i])&1) speculate_mov(rs1[i],rt1[i]);
3861 else if((smrv_strong>>rs2[i])&1) speculate_mov(rs2[i],rt1[i]);
3862 else if((smrv_weak>>rs1[i])&1) speculate_mov_weak(rs1[i],rt1[i]);
3863 else if((smrv_weak>>rs2[i])&1) speculate_mov_weak(rs2[i],rt1[i]);
3864 else {
3865 smrv_strong_next&=~(1<<rt1[i]);
3866 smrv_weak_next&=~(1<<rt1[i]);
3867 }
3868 break;
3869 case SHIFTIMM:
3870 smrv_strong_next&=~(1<<rt1[i]);
3871 smrv_weak_next&=~(1<<rt1[i]);
3872 // fallthrough
3873 case IMM16:
3874 if(rt1[i]&&is_const(&regs[i],rt1[i])) {
3875 int value,hr=get_reg(regs[i].regmap,rt1[i]);
3876 if(hr>=0) {
3877 if(get_final_value(hr,i,&value))
3878 smrv[rt1[i]]=value;
3879 else smrv[rt1[i]]=constmap[i][hr];
3880 smrv_strong_next|=1<<rt1[i];
3881 }
3882 }
3883 else {
3884 if ((smrv_strong>>rs1[i])&1) speculate_mov(rs1[i],rt1[i]);
3885 else if((smrv_weak>>rs1[i])&1) speculate_mov_weak(rs1[i],rt1[i]);
3886 }
3887 break;
3888 case LOAD:
3889 if(start<0x2000&&(rt1[i]==26||(smrv[rt1[i]]>>24)==0xa0)) {
3890 // special case for BIOS
3891 smrv[rt1[i]]=0xa0000000;
3892 smrv_strong_next|=1<<rt1[i];
3893 break;
3894 }
3895 // fallthrough
3896 case SHIFT:
3897 case LOADLR:
3898 case MOV:
3899 smrv_strong_next&=~(1<<rt1[i]);
3900 smrv_weak_next&=~(1<<rt1[i]);
3901 break;
3902 case COP0:
3903 case COP2:
3904 if(opcode2[i]==0||opcode2[i]==2) { // MFC/CFC
3905 smrv_strong_next&=~(1<<rt1[i]);
3906 smrv_weak_next&=~(1<<rt1[i]);
3907 }
3908 break;
3909 case C2LS:
3910 if (opcode[i]==0x32) { // LWC2
3911 smrv_strong_next&=~(1<<rt1[i]);
3912 smrv_weak_next&=~(1<<rt1[i]);
3913 }
3914 break;
3915 }
3916#if 0
3917 int r=4;
3918 printf("x %08x %08x %d %d c %08x %08x\n",smrv[r],start+i*4,
3919 ((smrv_strong>>r)&1),(smrv_weak>>r)&1,regs[i].isconst,regs[i].wasconst);
3920#endif
3921}
3922
3923enum {
3924 MTYPE_8000 = 0,
3925 MTYPE_8020,
3926 MTYPE_0000,
3927 MTYPE_A000,
3928 MTYPE_1F80,
3929};
3930
3931static int get_ptr_mem_type(u_int a)
3932{
3933 if(a < 0x00200000) {
3934 if(a<0x1000&&((start>>20)==0xbfc||(start>>24)==0xa0))
3935 // return wrong, must use memhandler for BIOS self-test to pass
3936 // 007 does similar stuff from a00 mirror, weird stuff
3937 return MTYPE_8000;
3938 return MTYPE_0000;
3939 }
3940 if(0x1f800000 <= a && a < 0x1f801000)
3941 return MTYPE_1F80;
3942 if(0x80200000 <= a && a < 0x80800000)
3943 return MTYPE_8020;
3944 if(0xa0000000 <= a && a < 0xa0200000)
3945 return MTYPE_A000;
3946 return MTYPE_8000;
3947}
3948#endif
3949
3950static int emit_fastpath_cmp_jump(int i,int addr,int *addr_reg_override)
3951{
3952 int jaddr,type=0;
3953
3954#ifdef PCSX
3955 int mr=rs1[i];
3956 if(((smrv_strong|smrv_weak)>>mr)&1) {
3957 type=get_ptr_mem_type(smrv[mr]);
3958 //printf("set %08x @%08x r%d %d\n", smrv[mr], start+i*4, mr, type);
3959 }
3960 else {
3961 // use the mirror we are running on
3962 type=get_ptr_mem_type(start);
3963 //printf("set nospec @%08x r%d %d\n", start+i*4, mr, type);
3964 }
3965
3966 if(type==MTYPE_8020) { // RAM 80200000+ mirror
3967 emit_andimm(addr,~0x00e00000,HOST_TEMPREG);
3968 addr=*addr_reg_override=HOST_TEMPREG;
3969 type=0;
3970 }
3971 else if(type==MTYPE_0000) { // RAM 0 mirror
3972 emit_orimm(addr,0x80000000,HOST_TEMPREG);
3973 addr=*addr_reg_override=HOST_TEMPREG;
3974 type=0;
3975 }
3976 else if(type==MTYPE_A000) { // RAM A mirror
3977 emit_andimm(addr,~0x20000000,HOST_TEMPREG);
3978 addr=*addr_reg_override=HOST_TEMPREG;
3979 type=0;
3980 }
3981 else if(type==MTYPE_1F80) { // scratchpad
3982 emit_addimm(addr,-0x1f800000,HOST_TEMPREG);
3983 emit_cmpimm(HOST_TEMPREG,0x1000);
3984 jaddr=(int)out;
3985 emit_jc(0);
3986 }
3987#endif
3988
3989 if(type==0)
3990 {
3991 emit_cmpimm(addr,RAM_SIZE);
3992 jaddr=(int)out;
3993 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
3994 // Hint to branch predictor that the branch is unlikely to be taken
3995 if(rs1[i]>=28)
3996 emit_jno_unlikely(0);
3997 else
3998 #endif
3999 emit_jno(0);
4000 }
4001
4002 return jaddr;
4003}
4004
57871462 4005#define shift_assemble shift_assemble_arm
4006
4007void loadlr_assemble_arm(int i,struct regstat *i_regs)
4008{
4009 int s,th,tl,temp,temp2,addr,map=-1;
4010 int offset;
4011 int jaddr=0;
af4ee1fe 4012 int memtarget=0,c=0;
ffb0b9e0 4013 int fastload_reg_override=0;
57871462 4014 u_int hr,reglist=0;
4015 th=get_reg(i_regs->regmap,rt1[i]|64);
4016 tl=get_reg(i_regs->regmap,rt1[i]);
4017 s=get_reg(i_regs->regmap,rs1[i]);
4018 temp=get_reg(i_regs->regmap,-1);
4019 temp2=get_reg(i_regs->regmap,FTEMP);
4020 addr=get_reg(i_regs->regmap,AGEN1+(i&1));
4021 assert(addr<0);
4022 offset=imm[i];
4023 for(hr=0;hr<HOST_REGS;hr++) {
4024 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
4025 }
4026 reglist|=1<<temp;
4027 if(offset||s<0||c) addr=temp2;
4028 else addr=s;
4029 if(s>=0) {
4030 c=(i_regs->wasconst>>s)&1;
af4ee1fe 4031 if(c) {
4032 memtarget=((signed int)(constmap[i][s]+offset))<(signed int)0x80000000+RAM_SIZE;
4033 if(using_tlb&&((signed int)(constmap[i][s]+offset))>=(signed int)0xC0000000) memtarget=1;
4034 }
57871462 4035 }
535d208a 4036 if(!using_tlb) {
4037 if(!c) {
4038 #ifdef RAM_OFFSET
4039 map=get_reg(i_regs->regmap,ROREG);
4040 if(map<0) emit_loadreg(ROREG,map=HOST_TEMPREG);
4041 #endif
4042 emit_shlimm(addr,3,temp);
4043 if (opcode[i]==0x22||opcode[i]==0x26) {
4044 emit_andimm(addr,0xFFFFFFFC,temp2); // LWL/LWR
57871462 4045 }else{
535d208a 4046 emit_andimm(addr,0xFFFFFFF8,temp2); // LDL/LDR
57871462 4047 }
ffb0b9e0 4048 jaddr=emit_fastpath_cmp_jump(i,temp2,&fastload_reg_override);
535d208a 4049 }
4050 else {
4051 if (opcode[i]==0x22||opcode[i]==0x26) {
4052 emit_movimm(((constmap[i][s]+offset)<<3)&24,temp); // LWL/LWR
4053 }else{
4054 emit_movimm(((constmap[i][s]+offset)<<3)&56,temp); // LDL/LDR
57871462 4055 }
57871462 4056 }
535d208a 4057 }else{ // using tlb
4058 int a;
4059 if(c) {
4060 a=-1;
4061 }else if (opcode[i]==0x22||opcode[i]==0x26) {
4062 a=0xFFFFFFFC; // LWL/LWR
4063 }else{
4064 a=0xFFFFFFF8; // LDL/LDR
4065 }
4066 map=get_reg(i_regs->regmap,TLREG);
4067 assert(map>=0);
ea3d2e6e 4068 reglist&=~(1<<map);
535d208a 4069 map=do_tlb_r(addr,temp2,map,0,a,c?-1:temp,c,constmap[i][s]+offset);
4070 if(c) {
4071 if (opcode[i]==0x22||opcode[i]==0x26) {
4072 emit_movimm(((constmap[i][s]+offset)<<3)&24,temp); // LWL/LWR
4073 }else{
4074 emit_movimm(((constmap[i][s]+offset)<<3)&56,temp); // LDL/LDR
57871462 4075 }
535d208a 4076 }
4077 do_tlb_r_branch(map,c,constmap[i][s]+offset,&jaddr);
4078 }
4079 if (opcode[i]==0x22||opcode[i]==0x26) { // LWL/LWR
4080 if(!c||memtarget) {
ffb0b9e0 4081 int a=temp2;
4082 if(fastload_reg_override) a=fastload_reg_override;
535d208a 4083 //emit_readword_indexed((int)rdram-0x80000000,temp2,temp2);
ffb0b9e0 4084 emit_readword_indexed_tlb(0,a,map,temp2);
535d208a 4085 if(jaddr) add_stub(LOADW_STUB,jaddr,(int)out,i,temp2,(int)i_regs,ccadj[i],reglist);
4086 }
4087 else
4088 inline_readstub(LOADW_STUB,i,(constmap[i][s]+offset)&0xFFFFFFFC,i_regs->regmap,FTEMP,ccadj[i],reglist);
4089 if(rt1[i]) {
4090 assert(tl>=0);
57871462 4091 emit_andimm(temp,24,temp);
2002a1db 4092#ifdef BIG_ENDIAN_MIPS
4093 if (opcode[i]==0x26) // LWR
4094#else
4095 if (opcode[i]==0x22) // LWL
4096#endif
4097 emit_xorimm(temp,24,temp);
57871462 4098 emit_movimm(-1,HOST_TEMPREG);
4099 if (opcode[i]==0x26) {
4100 emit_shr(temp2,temp,temp2);
4101 emit_bic_lsr(tl,HOST_TEMPREG,temp,tl);
4102 }else{
4103 emit_shl(temp2,temp,temp2);
4104 emit_bic_lsl(tl,HOST_TEMPREG,temp,tl);
4105 }
4106 emit_or(temp2,tl,tl);
57871462 4107 }
535d208a 4108 //emit_storereg(rt1[i],tl); // DEBUG
4109 }
4110 if (opcode[i]==0x1A||opcode[i]==0x1B) { // LDL/LDR
ffb0b9e0 4111 // FIXME: little endian, fastload_reg_override
535d208a 4112 int temp2h=get_reg(i_regs->regmap,FTEMP|64);
4113 if(!c||memtarget) {
4114 //if(th>=0) emit_readword_indexed((int)rdram-0x80000000,temp2,temp2h);
4115 //emit_readword_indexed((int)rdram-0x7FFFFFFC,temp2,temp2);
4116 emit_readdword_indexed_tlb(0,temp2,map,temp2h,temp2);
4117 if(jaddr) add_stub(LOADD_STUB,jaddr,(int)out,i,temp2,(int)i_regs,ccadj[i],reglist);
4118 }
4119 else
4120 inline_readstub(LOADD_STUB,i,(constmap[i][s]+offset)&0xFFFFFFF8,i_regs->regmap,FTEMP,ccadj[i],reglist);
4121 if(rt1[i]) {
4122 assert(th>=0);
4123 assert(tl>=0);
57871462 4124 emit_testimm(temp,32);
4125 emit_andimm(temp,24,temp);
4126 if (opcode[i]==0x1A) { // LDL
4127 emit_rsbimm(temp,32,HOST_TEMPREG);
4128 emit_shl(temp2h,temp,temp2h);
4129 emit_orrshr(temp2,HOST_TEMPREG,temp2h);
4130 emit_movimm(-1,HOST_TEMPREG);
4131 emit_shl(temp2,temp,temp2);
4132 emit_cmove_reg(temp2h,th);
4133 emit_biceq_lsl(tl,HOST_TEMPREG,temp,tl);
4134 emit_bicne_lsl(th,HOST_TEMPREG,temp,th);
4135 emit_orreq(temp2,tl,tl);
4136 emit_orrne(temp2,th,th);
4137 }
4138 if (opcode[i]==0x1B) { // LDR
4139 emit_xorimm(temp,24,temp);
4140 emit_rsbimm(temp,32,HOST_TEMPREG);
4141 emit_shr(temp2,temp,temp2);
4142 emit_orrshl(temp2h,HOST_TEMPREG,temp2);
4143 emit_movimm(-1,HOST_TEMPREG);
4144 emit_shr(temp2h,temp,temp2h);
4145 emit_cmovne_reg(temp2,tl);
4146 emit_bicne_lsr(th,HOST_TEMPREG,temp,th);
4147 emit_biceq_lsr(tl,HOST_TEMPREG,temp,tl);
4148 emit_orrne(temp2h,th,th);
4149 emit_orreq(temp2h,tl,tl);
4150 }
4151 }
4152 }
4153}
4154#define loadlr_assemble loadlr_assemble_arm
4155
4156void cop0_assemble(int i,struct regstat *i_regs)
4157{
4158 if(opcode2[i]==0) // MFC0
4159 {
4160 signed char t=get_reg(i_regs->regmap,rt1[i]);
4161 char copr=(source[i]>>11)&0x1f;
4162 //assert(t>=0); // Why does this happen? OOT is weird
f1b3b369 4163 if(t>=0&&rt1[i]!=0) {
7139f3c8 4164#ifdef MUPEN64
57871462 4165 emit_addimm(FP,(int)&fake_pc-(int)&dynarec_local,0);
4166 emit_movimm((source[i]>>11)&0x1f,1);
4167 emit_writeword(0,(int)&PC);
4168 emit_writebyte(1,(int)&(fake_pc.f.r.nrd));
4169 if(copr==9) {
4170 emit_readword((int)&last_count,ECX);
4171 emit_loadreg(CCREG,HOST_CCREG); // TODO: do proper reg alloc
4172 emit_add(HOST_CCREG,ECX,HOST_CCREG);
2573466a 4173 emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]),HOST_CCREG);
57871462 4174 emit_writeword(HOST_CCREG,(int)&Count);
4175 }
4176 emit_call((int)MFC0);
4177 emit_readword((int)&readmem_dword,t);
7139f3c8 4178#else
4179 emit_readword((int)&reg_cop0+copr*4,t);
4180#endif
57871462 4181 }
4182 }
4183 else if(opcode2[i]==4) // MTC0
4184 {
4185 signed char s=get_reg(i_regs->regmap,rs1[i]);
4186 char copr=(source[i]>>11)&0x1f;
4187 assert(s>=0);
63cb0298 4188#ifdef MUPEN64
57871462 4189 emit_writeword(s,(int)&readmem_dword);
4190 wb_register(rs1[i],i_regs->regmap,i_regs->dirty,i_regs->is32);
4191 emit_addimm(FP,(int)&fake_pc-(int)&dynarec_local,0);
4192 emit_movimm((source[i]>>11)&0x1f,1);
4193 emit_writeword(0,(int)&PC);
4194 emit_writebyte(1,(int)&(fake_pc.f.r.nrd));
63cb0298 4195#else
4196 wb_register(rs1[i],i_regs->regmap,i_regs->dirty,i_regs->is32);
7139f3c8 4197#endif
4198 if(copr==9||copr==11||copr==12||copr==13) {
63cb0298 4199 emit_readword((int)&last_count,HOST_TEMPREG);
57871462 4200 emit_loadreg(CCREG,HOST_CCREG); // TODO: do proper reg alloc
63cb0298 4201 emit_add(HOST_CCREG,HOST_TEMPREG,HOST_CCREG);
2573466a 4202 emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]),HOST_CCREG);
57871462 4203 emit_writeword(HOST_CCREG,(int)&Count);
4204 }
4205 // What a mess. The status register (12) can enable interrupts,
4206 // so needs a special case to handle a pending interrupt.
4207 // The interrupt must be taken immediately, because a subsequent
4208 // instruction might disable interrupts again.
7139f3c8 4209 if(copr==12||copr==13) {
fca1aef2 4210#ifdef PCSX
4211 if (is_delayslot) {
4212 // burn cycles to cause cc_interrupt, which will
4213 // reschedule next_interupt. Relies on CCREG from above.
4214 assem_debug("MTC0 DS %d\n", copr);
4215 emit_writeword(HOST_CCREG,(int)&last_count);
4216 emit_movimm(0,HOST_CCREG);
4217 emit_storereg(CCREG,HOST_CCREG);
caeefe31 4218 emit_loadreg(rs1[i],1);
fca1aef2 4219 emit_movimm(copr,0);
4220 emit_call((int)pcsx_mtc0_ds);
042c7287 4221 emit_loadreg(rs1[i],s);
fca1aef2 4222 return;
4223 }
4224#endif
63cb0298 4225 emit_movimm(start+i*4+4,HOST_TEMPREG);
4226 emit_writeword(HOST_TEMPREG,(int)&pcaddr);
4227 emit_movimm(0,HOST_TEMPREG);
4228 emit_writeword(HOST_TEMPREG,(int)&pending_exception);
57871462 4229 }
4230 //else if(copr==12&&is_delayslot) emit_call((int)MTC0_R12);
4231 //else
fca1aef2 4232#ifdef PCSX
caeefe31 4233 if(s==HOST_CCREG)
4234 emit_loadreg(rs1[i],1);
4235 else if(s!=1)
63cb0298 4236 emit_mov(s,1);
fca1aef2 4237 emit_movimm(copr,0);
4238 emit_call((int)pcsx_mtc0);
4239#else
57871462 4240 emit_call((int)MTC0);
fca1aef2 4241#endif
7139f3c8 4242 if(copr==9||copr==11||copr==12||copr==13) {
57871462 4243 emit_readword((int)&Count,HOST_CCREG);
042c7287 4244 emit_readword((int)&next_interupt,HOST_TEMPREG);
2573466a 4245 emit_addimm(HOST_CCREG,-CLOCK_ADJUST(ccadj[i]),HOST_CCREG);
042c7287 4246 emit_sub(HOST_CCREG,HOST_TEMPREG,HOST_CCREG);
4247 emit_writeword(HOST_TEMPREG,(int)&last_count);
57871462 4248 emit_storereg(CCREG,HOST_CCREG);
4249 }
7139f3c8 4250 if(copr==12||copr==13) {
57871462 4251 assert(!is_delayslot);
4252 emit_readword((int)&pending_exception,14);
042c7287 4253 emit_test(14,14);
4254 emit_jne((int)&do_interrupt);
57871462 4255 }
4256 emit_loadreg(rs1[i],s);
4257 if(get_reg(i_regs->regmap,rs1[i]|64)>=0)
4258 emit_loadreg(rs1[i]|64,get_reg(i_regs->regmap,rs1[i]|64));
57871462 4259 cop1_usable=0;
4260 }
4261 else
4262 {
4263 assert(opcode2[i]==0x10);
3d624f89 4264#ifndef DISABLE_TLB
57871462 4265 if((source[i]&0x3f)==0x01) // TLBR
4266 emit_call((int)TLBR);
4267 if((source[i]&0x3f)==0x02) // TLBWI
4268 emit_call((int)TLBWI_new);
4269 if((source[i]&0x3f)==0x06) { // TLBWR
4270 // The TLB entry written by TLBWR is dependent on the count,
4271 // so update the cycle count
4272 emit_readword((int)&last_count,ECX);
4273 if(i_regs->regmap[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
4274 emit_add(HOST_CCREG,ECX,HOST_CCREG);
2573466a 4275 emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]),HOST_CCREG);
57871462 4276 emit_writeword(HOST_CCREG,(int)&Count);
4277 emit_call((int)TLBWR_new);
4278 }
4279 if((source[i]&0x3f)==0x08) // TLBP
4280 emit_call((int)TLBP);
3d624f89 4281#endif
576bbd8f 4282#ifdef PCSX
4283 if((source[i]&0x3f)==0x10) // RFE
4284 {
4285 emit_readword((int)&Status,0);
4286 emit_andimm(0,0x3c,1);
4287 emit_andimm(0,~0xf,0);
4288 emit_orrshr_imm(1,2,0);
4289 emit_writeword(0,(int)&Status);
4290 }
4291#else
57871462 4292 if((source[i]&0x3f)==0x18) // ERET
4293 {
4294 int count=ccadj[i];
4295 if(i_regs->regmap[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
2573466a 4296 emit_addimm(HOST_CCREG,CLOCK_ADJUST(count),HOST_CCREG); // TODO: Should there be an extra cycle here?
57871462 4297 emit_jmp((int)jump_eret);
4298 }
576bbd8f 4299#endif
57871462 4300 }
4301}
4302
b9b61529 4303static void cop2_get_dreg(u_int copr,signed char tl,signed char temp)
4304{
4305 switch (copr) {
4306 case 1:
4307 case 3:
4308 case 5:
4309 case 8:
4310 case 9:
4311 case 10:
4312 case 11:
4313 emit_readword((int)&reg_cop2d[copr],tl);
4314 emit_signextend16(tl,tl);
4315 emit_writeword(tl,(int)&reg_cop2d[copr]); // hmh
4316 break;
4317 case 7:
4318 case 16:
4319 case 17:
4320 case 18:
4321 case 19:
4322 emit_readword((int)&reg_cop2d[copr],tl);
4323 emit_andimm(tl,0xffff,tl);
4324 emit_writeword(tl,(int)&reg_cop2d[copr]);
4325 break;
4326 case 15:
4327 emit_readword((int)&reg_cop2d[14],tl); // SXY2
4328 emit_writeword(tl,(int)&reg_cop2d[copr]);
4329 break;
4330 case 28:
b9b61529 4331 case 29:
4332 emit_readword((int)&reg_cop2d[9],temp);
4333 emit_testimm(temp,0x8000); // do we need this?
4334 emit_andimm(temp,0xf80,temp);
4335 emit_andne_imm(temp,0,temp);
f70d384d 4336 emit_shrimm(temp,7,tl);
b9b61529 4337 emit_readword((int)&reg_cop2d[10],temp);
4338 emit_testimm(temp,0x8000);
4339 emit_andimm(temp,0xf80,temp);
4340 emit_andne_imm(temp,0,temp);
f70d384d 4341 emit_orrshr_imm(temp,2,tl);
b9b61529 4342 emit_readword((int)&reg_cop2d[11],temp);
4343 emit_testimm(temp,0x8000);
4344 emit_andimm(temp,0xf80,temp);
4345 emit_andne_imm(temp,0,temp);
f70d384d 4346 emit_orrshl_imm(temp,3,tl);
b9b61529 4347 emit_writeword(tl,(int)&reg_cop2d[copr]);
4348 break;
4349 default:
4350 emit_readword((int)&reg_cop2d[copr],tl);
4351 break;
4352 }
4353}
4354
4355static void cop2_put_dreg(u_int copr,signed char sl,signed char temp)
4356{
4357 switch (copr) {
4358 case 15:
4359 emit_readword((int)&reg_cop2d[13],temp); // SXY1
4360 emit_writeword(sl,(int)&reg_cop2d[copr]);
4361 emit_writeword(temp,(int)&reg_cop2d[12]); // SXY0
4362 emit_readword((int)&reg_cop2d[14],temp); // SXY2
4363 emit_writeword(sl,(int)&reg_cop2d[14]);
4364 emit_writeword(temp,(int)&reg_cop2d[13]); // SXY1
4365 break;
4366 case 28:
4367 emit_andimm(sl,0x001f,temp);
f70d384d 4368 emit_shlimm(temp,7,temp);
b9b61529 4369 emit_writeword(temp,(int)&reg_cop2d[9]);
4370 emit_andimm(sl,0x03e0,temp);
f70d384d 4371 emit_shlimm(temp,2,temp);
b9b61529 4372 emit_writeword(temp,(int)&reg_cop2d[10]);
4373 emit_andimm(sl,0x7c00,temp);
f70d384d 4374 emit_shrimm(temp,3,temp);
b9b61529 4375 emit_writeword(temp,(int)&reg_cop2d[11]);
4376 emit_writeword(sl,(int)&reg_cop2d[28]);
4377 break;
4378 case 30:
4379 emit_movs(sl,temp);
4380 emit_mvnmi(temp,temp);
4381 emit_clz(temp,temp);
4382 emit_writeword(sl,(int)&reg_cop2d[30]);
4383 emit_writeword(temp,(int)&reg_cop2d[31]);
4384 break;
b9b61529 4385 case 31:
4386 break;
4387 default:
4388 emit_writeword(sl,(int)&reg_cop2d[copr]);
4389 break;
4390 }
4391}
4392
4393void cop2_assemble(int i,struct regstat *i_regs)
4394{
4395 u_int copr=(source[i]>>11)&0x1f;
4396 signed char temp=get_reg(i_regs->regmap,-1);
4397 if (opcode2[i]==0) { // MFC2
4398 signed char tl=get_reg(i_regs->regmap,rt1[i]);
f1b3b369 4399 if(tl>=0&&rt1[i]!=0)
b9b61529 4400 cop2_get_dreg(copr,tl,temp);
4401 }
4402 else if (opcode2[i]==4) { // MTC2
4403 signed char sl=get_reg(i_regs->regmap,rs1[i]);
4404 cop2_put_dreg(copr,sl,temp);
4405 }
4406 else if (opcode2[i]==2) // CFC2
4407 {
4408 signed char tl=get_reg(i_regs->regmap,rt1[i]);
f1b3b369 4409 if(tl>=0&&rt1[i]!=0)
b9b61529 4410 emit_readword((int)&reg_cop2c[copr],tl);
4411 }
4412 else if (opcode2[i]==6) // CTC2
4413 {
4414 signed char sl=get_reg(i_regs->regmap,rs1[i]);
4415 switch(copr) {
4416 case 4:
4417 case 12:
4418 case 20:
4419 case 26:
4420 case 27:
4421 case 29:
4422 case 30:
4423 emit_signextend16(sl,temp);
4424 break;
4425 case 31:
4426 //value = value & 0x7ffff000;
4427 //if (value & 0x7f87e000) value |= 0x80000000;
4428 emit_shrimm(sl,12,temp);
4429 emit_shlimm(temp,12,temp);
4430 emit_testimm(temp,0x7f000000);
4431 emit_testeqimm(temp,0x00870000);
4432 emit_testeqimm(temp,0x0000e000);
4433 emit_orrne_imm(temp,0x80000000,temp);
4434 break;
4435 default:
4436 temp=sl;
4437 break;
4438 }
4439 emit_writeword(temp,(int)&reg_cop2c[copr]);
4440 assert(sl>=0);
4441 }
4442}
4443
054175e9 4444static void c2op_prologue(u_int op,u_int reglist)
4445{
4446 save_regs_all(reglist);
82ed88eb 4447#ifdef PCNT
4448 emit_movimm(op,0);
4449 emit_call((int)pcnt_gte_start);
4450#endif
054175e9 4451 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0); // cop2 regs
4452}
4453
4454static void c2op_epilogue(u_int op,u_int reglist)
4455{
82ed88eb 4456#ifdef PCNT
4457 emit_movimm(op,0);
4458 emit_call((int)pcnt_gte_end);
4459#endif
054175e9 4460 restore_regs_all(reglist);
4461}
4462
6c0eefaf 4463static void c2op_call_MACtoIR(int lm,int need_flags)
4464{
4465 if(need_flags)
4466 emit_call((int)(lm?gteMACtoIR_lm1:gteMACtoIR_lm0));
4467 else
4468 emit_call((int)(lm?gteMACtoIR_lm1_nf:gteMACtoIR_lm0_nf));
4469}
4470
4471static void c2op_call_rgb_func(void *func,int lm,int need_ir,int need_flags)
4472{
4473 emit_call((int)func);
4474 // func is C code and trashes r0
4475 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
4476 if(need_flags||need_ir)
4477 c2op_call_MACtoIR(lm,need_flags);
4478 emit_call((int)(need_flags?gteMACtoRGB:gteMACtoRGB_nf));
4479}
4480
054175e9 4481static void c2op_assemble(int i,struct regstat *i_regs)
b9b61529 4482{
4483 signed char temp=get_reg(i_regs->regmap,-1);
4484 u_int c2op=source[i]&0x3f;
6c0eefaf 4485 u_int hr,reglist_full=0,reglist;
054175e9 4486 int need_flags,need_ir;
b9b61529 4487 for(hr=0;hr<HOST_REGS;hr++) {
6c0eefaf 4488 if(i_regs->regmap[hr]>=0) reglist_full|=1<<hr;
b9b61529 4489 }
6c0eefaf 4490 reglist=reglist_full&0x100f;
b9b61529 4491
4492 if (gte_handlers[c2op]!=NULL) {
bedfea38 4493 need_flags=!(gte_unneeded[i+1]>>63); // +1 because of how liveness detection works
054175e9 4494 need_ir=(gte_unneeded[i+1]&0xe00)!=0xe00;
cbbd8dd7 4495 assem_debug("gte op %08x, unneeded %016llx, need_flags %d, need_ir %d\n",
4496 source[i],gte_unneeded[i+1],need_flags,need_ir);
0ff8c62c 4497 if(new_dynarec_hacks&NDHACK_GTE_NO_FLAGS)
4498 need_flags=0;
6c0eefaf 4499 int shift = (source[i] >> 19) & 1;
4500 int lm = (source[i] >> 10) & 1;
054175e9 4501 switch(c2op) {
19776aef 4502#ifndef DRC_DBG
054175e9 4503 case GTE_MVMVA: {
054175e9 4504 int v = (source[i] >> 15) & 3;
4505 int cv = (source[i] >> 13) & 3;
4506 int mx = (source[i] >> 17) & 3;
6c0eefaf 4507 reglist=reglist_full&0x10ff; // +{r4-r7}
054175e9 4508 c2op_prologue(c2op,reglist);
4509 /* r4,r5 = VXYZ(v) packed; r6 = &MX11(mx); r7 = &CV1(cv) */
4510 if(v<3)
4511 emit_ldrd(v*8,0,4);
4512 else {
4513 emit_movzwl_indexed(9*4,0,4); // gteIR
4514 emit_movzwl_indexed(10*4,0,6);
4515 emit_movzwl_indexed(11*4,0,5);
4516 emit_orrshl_imm(6,16,4);
4517 }
4518 if(mx<3)
4519 emit_addimm(0,32*4+mx*8*4,6);
4520 else
4521 emit_readword((int)&zeromem_ptr,6);
4522 if(cv<3)
4523 emit_addimm(0,32*4+(cv*8+5)*4,7);
4524 else
4525 emit_readword((int)&zeromem_ptr,7);
4526#ifdef __ARM_NEON__
4527 emit_movimm(source[i],1); // opcode
4528 emit_call((int)gteMVMVA_part_neon);
4529 if(need_flags) {
4530 emit_movimm(lm,1);
4531 emit_call((int)gteMACtoIR_flags_neon);
4532 }
4533#else
4534 if(cv==3&&shift)
4535 emit_call((int)gteMVMVA_part_cv3sh12_arm);
4536 else {
4537 emit_movimm(shift,1);
4538 emit_call((int)(need_flags?gteMVMVA_part_arm:gteMVMVA_part_nf_arm));
4539 }
6c0eefaf 4540 if(need_flags||need_ir)
4541 c2op_call_MACtoIR(lm,need_flags);
054175e9 4542#endif
4543 break;
4544 }
6c0eefaf 4545 case GTE_OP:
4546 c2op_prologue(c2op,reglist);
4547 emit_call((int)(shift?gteOP_part_shift:gteOP_part_noshift));
4548 if(need_flags||need_ir) {
4549 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
4550 c2op_call_MACtoIR(lm,need_flags);
4551 }
4552 break;
4553 case GTE_DPCS:
4554 c2op_prologue(c2op,reglist);
4555 c2op_call_rgb_func(shift?gteDPCS_part_shift:gteDPCS_part_noshift,lm,need_ir,need_flags);
4556 break;
4557 case GTE_INTPL:
4558 c2op_prologue(c2op,reglist);
4559 c2op_call_rgb_func(shift?gteINTPL_part_shift:gteINTPL_part_noshift,lm,need_ir,need_flags);
4560 break;
4561 case GTE_SQR:
4562 c2op_prologue(c2op,reglist);
4563 emit_call((int)(shift?gteSQR_part_shift:gteSQR_part_noshift));
4564 if(need_flags||need_ir) {
4565 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
4566 c2op_call_MACtoIR(lm,need_flags);
4567 }
4568 break;
4569 case GTE_DCPL:
4570 c2op_prologue(c2op,reglist);
4571 c2op_call_rgb_func(gteDCPL_part,lm,need_ir,need_flags);
4572 break;
4573 case GTE_GPF:
4574 c2op_prologue(c2op,reglist);
4575 c2op_call_rgb_func(shift?gteGPF_part_shift:gteGPF_part_noshift,lm,need_ir,need_flags);
4576 break;
4577 case GTE_GPL:
4578 c2op_prologue(c2op,reglist);
4579 c2op_call_rgb_func(shift?gteGPL_part_shift:gteGPL_part_noshift,lm,need_ir,need_flags);
4580 break;
19776aef 4581#endif
054175e9 4582 default:
054175e9 4583 c2op_prologue(c2op,reglist);
19776aef 4584#ifdef DRC_DBG
4585 emit_movimm(source[i],1); // opcode
4586 emit_writeword(1,(int)&psxRegs.code);
4587#endif
054175e9 4588 emit_call((int)(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]));
4589 break;
4590 }
4591 c2op_epilogue(c2op,reglist);
4592 }
b9b61529 4593}
4594
4595void cop1_unusable(int i,struct regstat *i_regs)
3d624f89 4596{
4597 // XXX: should just just do the exception instead
4598 if(!cop1_usable) {
4599 int jaddr=(int)out;
4600 emit_jmp(0);
4601 add_stub(FP_STUB,jaddr,(int)out,i,0,(int)i_regs,is_delayslot,0);
4602 cop1_usable=1;
4603 }
4604}
4605
57871462 4606void cop1_assemble(int i,struct regstat *i_regs)
4607{
3d624f89 4608#ifndef DISABLE_COP1
57871462 4609 // Check cop1 unusable
4610 if(!cop1_usable) {
4611 signed char rs=get_reg(i_regs->regmap,CSREG);
4612 assert(rs>=0);
4613 emit_testimm(rs,0x20000000);
4614 int jaddr=(int)out;
4615 emit_jeq(0);
4616 add_stub(FP_STUB,jaddr,(int)out,i,rs,(int)i_regs,is_delayslot,0);
4617 cop1_usable=1;
4618 }
4619 if (opcode2[i]==0) { // MFC1
4620 signed char tl=get_reg(i_regs->regmap,rt1[i]);
4621 if(tl>=0) {
4622 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],tl);
4623 emit_readword_indexed(0,tl,tl);
4624 }
4625 }
4626 else if (opcode2[i]==1) { // DMFC1
4627 signed char tl=get_reg(i_regs->regmap,rt1[i]);
4628 signed char th=get_reg(i_regs->regmap,rt1[i]|64);
4629 if(tl>=0) {
4630 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],tl);
4631 if(th>=0) emit_readword_indexed(4,tl,th);
4632 emit_readword_indexed(0,tl,tl);
4633 }
4634 }
4635 else if (opcode2[i]==4) { // MTC1
4636 signed char sl=get_reg(i_regs->regmap,rs1[i]);
4637 signed char temp=get_reg(i_regs->regmap,-1);
4638 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4639 emit_writeword_indexed(sl,0,temp);
4640 }
4641 else if (opcode2[i]==5) { // DMTC1
4642 signed char sl=get_reg(i_regs->regmap,rs1[i]);
4643 signed char sh=rs1[i]>0?get_reg(i_regs->regmap,rs1[i]|64):sl;
4644 signed char temp=get_reg(i_regs->regmap,-1);
4645 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
4646 emit_writeword_indexed(sh,4,temp);
4647 emit_writeword_indexed(sl,0,temp);
4648 }
4649 else if (opcode2[i]==2) // CFC1
4650 {
4651 signed char tl=get_reg(i_regs->regmap,rt1[i]);
4652 if(tl>=0) {
4653 u_int copr=(source[i]>>11)&0x1f;
4654 if(copr==0) emit_readword((int)&FCR0,tl);
4655 if(copr==31) emit_readword((int)&FCR31,tl);
4656 }
4657 }
4658 else if (opcode2[i]==6) // CTC1
4659 {
4660 signed char sl=get_reg(i_regs->regmap,rs1[i]);
4661 u_int copr=(source[i]>>11)&0x1f;
4662 assert(sl>=0);
4663 if(copr==31)
4664 {
4665 emit_writeword(sl,(int)&FCR31);
4666 // Set the rounding mode
4667 //FIXME
4668 //char temp=get_reg(i_regs->regmap,-1);
4669 //emit_andimm(sl,3,temp);
4670 //emit_fldcw_indexed((int)&rounding_modes,temp);
4671 }
4672 }
3d624f89 4673#else
4674 cop1_unusable(i, i_regs);
4675#endif
57871462 4676}
4677
4678void fconv_assemble_arm(int i,struct regstat *i_regs)
4679{
3d624f89 4680#ifndef DISABLE_COP1
57871462 4681 signed char temp=get_reg(i_regs->regmap,-1);
4682 assert(temp>=0);
4683 // Check cop1 unusable
4684 if(!cop1_usable) {
4685 signed char rs=get_reg(i_regs->regmap,CSREG);
4686 assert(rs>=0);
4687 emit_testimm(rs,0x20000000);
4688 int jaddr=(int)out;
4689 emit_jeq(0);
4690 add_stub(FP_STUB,jaddr,(int)out,i,rs,(int)i_regs,is_delayslot,0);
4691 cop1_usable=1;
4692 }
4693
4694 #if(defined(__VFP_FP__) && !defined(__SOFTFP__))
4695 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0d) { // trunc_w_s
4696 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4697 emit_flds(temp,15);
4698 emit_ftosizs(15,15); // float->int, truncate
4699 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f))
4700 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
4701 emit_fsts(15,temp);
4702 return;
4703 }
4704 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0d) { // trunc_w_d
4705 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
4706 emit_vldr(temp,7);
4707 emit_ftosizd(7,13); // double->int, truncate
4708 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
4709 emit_fsts(13,temp);
4710 return;
4711 }
4712
4713 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x20) { // cvt_s_w
4714 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4715 emit_flds(temp,13);
4716 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f))
4717 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
4718 emit_fsitos(13,15);
4719 emit_fsts(15,temp);
4720 return;
4721 }
4722 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x21) { // cvt_d_w
4723 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4724 emit_flds(temp,13);
4725 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
4726 emit_fsitod(13,7);
4727 emit_vstr(7,temp);
4728 return;
4729 }
4730
4731 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x21) { // cvt_d_s
4732 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4733 emit_flds(temp,13);
4734 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
4735 emit_fcvtds(13,7);
4736 emit_vstr(7,temp);
4737 return;
4738 }
4739 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x20) { // cvt_s_d
4740 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
4741 emit_vldr(temp,7);
4742 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
4743 emit_fcvtsd(7,13);
4744 emit_fsts(13,temp);
4745 return;
4746 }
4747 #endif
4748
4749 // C emulation code
4750
4751 u_int hr,reglist=0;
4752 for(hr=0;hr<HOST_REGS;hr++) {
4753 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
4754 }
4755 save_regs(reglist);
4756
4757 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x20) {
4758 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4759 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4760 emit_call((int)cvt_s_w);
4761 }
4762 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x21) {
4763 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4764 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4765 emit_call((int)cvt_d_w);
4766 }
4767 if(opcode2[i]==0x15&&(source[i]&0x3f)==0x20) {
4768 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4769 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4770 emit_call((int)cvt_s_l);
4771 }
4772 if(opcode2[i]==0x15&&(source[i]&0x3f)==0x21) {
4773 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4774 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4775 emit_call((int)cvt_d_l);
4776 }
4777
4778 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x21) {
4779 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4780 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4781 emit_call((int)cvt_d_s);
4782 }
4783 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x24) {
4784 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4785 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4786 emit_call((int)cvt_w_s);
4787 }
4788 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x25) {
4789 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4790 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4791 emit_call((int)cvt_l_s);
4792 }
4793
4794 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x20) {
4795 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4796 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4797 emit_call((int)cvt_s_d);
4798 }
4799 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x24) {
4800 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4801 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4802 emit_call((int)cvt_w_d);
4803 }
4804 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x25) {
4805 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4806 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4807 emit_call((int)cvt_l_d);
4808 }
4809
4810 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x08) {
4811 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4812 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4813 emit_call((int)round_l_s);
4814 }
4815 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x09) {
4816 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4817 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4818 emit_call((int)trunc_l_s);
4819 }
4820 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0a) {
4821 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4822 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4823 emit_call((int)ceil_l_s);
4824 }
4825 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0b) {
4826 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4827 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4828 emit_call((int)floor_l_s);
4829 }
4830 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0c) {
4831 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4832 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4833 emit_call((int)round_w_s);
4834 }
4835 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0d) {
4836 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4837 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4838 emit_call((int)trunc_w_s);
4839 }
4840 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0e) {
4841 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4842 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4843 emit_call((int)ceil_w_s);
4844 }
4845 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0f) {
4846 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4847 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4848 emit_call((int)floor_w_s);
4849 }
4850
4851 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x08) {
4852 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4853 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4854 emit_call((int)round_l_d);
4855 }
4856 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x09) {
4857 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4858 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4859 emit_call((int)trunc_l_d);
4860 }
4861 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0a) {
4862 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4863 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4864 emit_call((int)ceil_l_d);
4865 }
4866 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0b) {
4867 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4868 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4869 emit_call((int)floor_l_d);
4870 }
4871 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0c) {
4872 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4873 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4874 emit_call((int)round_w_d);
4875 }
4876 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0d) {
4877 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4878 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4879 emit_call((int)trunc_w_d);
4880 }
4881 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0e) {
4882 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4883 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4884 emit_call((int)ceil_w_d);
4885 }
4886 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0f) {
4887 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4888 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4889 emit_call((int)floor_w_d);
4890 }
4891
4892 restore_regs(reglist);
3d624f89 4893#else
4894 cop1_unusable(i, i_regs);
4895#endif
57871462 4896}
4897#define fconv_assemble fconv_assemble_arm
4898
4899void fcomp_assemble(int i,struct regstat *i_regs)
4900{
3d624f89 4901#ifndef DISABLE_COP1
57871462 4902 signed char fs=get_reg(i_regs->regmap,FSREG);
4903 signed char temp=get_reg(i_regs->regmap,-1);
4904 assert(temp>=0);
4905 // Check cop1 unusable
4906 if(!cop1_usable) {
4907 signed char cs=get_reg(i_regs->regmap,CSREG);
4908 assert(cs>=0);
4909 emit_testimm(cs,0x20000000);
4910 int jaddr=(int)out;
4911 emit_jeq(0);
4912 add_stub(FP_STUB,jaddr,(int)out,i,cs,(int)i_regs,is_delayslot,0);
4913 cop1_usable=1;
4914 }
4915
4916 if((source[i]&0x3f)==0x30) {
4917 emit_andimm(fs,~0x800000,fs);
4918 return;
4919 }
4920
4921 if((source[i]&0x3e)==0x38) {
4922 // sf/ngle - these should throw exceptions for NaNs
4923 emit_andimm(fs,~0x800000,fs);
4924 return;
4925 }
4926
4927 #if(defined(__VFP_FP__) && !defined(__SOFTFP__))
4928 if(opcode2[i]==0x10) {
4929 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4930 emit_readword((int)&reg_cop1_simple[(source[i]>>16)&0x1f],HOST_TEMPREG);
4931 emit_orimm(fs,0x800000,fs);
4932 emit_flds(temp,14);
4933 emit_flds(HOST_TEMPREG,15);
4934 emit_fcmps(14,15);
4935 emit_fmstat();
4936 if((source[i]&0x3f)==0x31) emit_bicvc_imm(fs,0x800000,fs); // c_un_s
4937 if((source[i]&0x3f)==0x32) emit_bicne_imm(fs,0x800000,fs); // c_eq_s
4938 if((source[i]&0x3f)==0x33) {emit_bicne_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ueq_s
4939 if((source[i]&0x3f)==0x34) emit_biccs_imm(fs,0x800000,fs); // c_olt_s
4940 if((source[i]&0x3f)==0x35) {emit_biccs_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ult_s
4941 if((source[i]&0x3f)==0x36) emit_bichi_imm(fs,0x800000,fs); // c_ole_s
4942 if((source[i]&0x3f)==0x37) {emit_bichi_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ule_s
4943 if((source[i]&0x3f)==0x3a) emit_bicne_imm(fs,0x800000,fs); // c_seq_s
4944 if((source[i]&0x3f)==0x3b) emit_bicne_imm(fs,0x800000,fs); // c_ngl_s
4945 if((source[i]&0x3f)==0x3c) emit_biccs_imm(fs,0x800000,fs); // c_lt_s
4946 if((source[i]&0x3f)==0x3d) emit_biccs_imm(fs,0x800000,fs); // c_nge_s
4947 if((source[i]&0x3f)==0x3e) emit_bichi_imm(fs,0x800000,fs); // c_le_s
4948 if((source[i]&0x3f)==0x3f) emit_bichi_imm(fs,0x800000,fs); // c_ngt_s
4949 return;
4950 }
4951 if(opcode2[i]==0x11) {
4952 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
4953 emit_readword((int)&reg_cop1_double[(source[i]>>16)&0x1f],HOST_TEMPREG);
4954 emit_orimm(fs,0x800000,fs);
4955 emit_vldr(temp,6);
4956 emit_vldr(HOST_TEMPREG,7);
4957 emit_fcmpd(6,7);
4958 emit_fmstat();
4959 if((source[i]&0x3f)==0x31) emit_bicvc_imm(fs,0x800000,fs); // c_un_d
4960 if((source[i]&0x3f)==0x32) emit_bicne_imm(fs,0x800000,fs); // c_eq_d
4961 if((source[i]&0x3f)==0x33) {emit_bicne_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ueq_d
4962 if((source[i]&0x3f)==0x34) emit_biccs_imm(fs,0x800000,fs); // c_olt_d
4963 if((source[i]&0x3f)==0x35) {emit_biccs_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ult_d
4964 if((source[i]&0x3f)==0x36) emit_bichi_imm(fs,0x800000,fs); // c_ole_d
4965 if((source[i]&0x3f)==0x37) {emit_bichi_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ule_d
4966 if((source[i]&0x3f)==0x3a) emit_bicne_imm(fs,0x800000,fs); // c_seq_d
4967 if((source[i]&0x3f)==0x3b) emit_bicne_imm(fs,0x800000,fs); // c_ngl_d
4968 if((source[i]&0x3f)==0x3c) emit_biccs_imm(fs,0x800000,fs); // c_lt_d
4969 if((source[i]&0x3f)==0x3d) emit_biccs_imm(fs,0x800000,fs); // c_nge_d
4970 if((source[i]&0x3f)==0x3e) emit_bichi_imm(fs,0x800000,fs); // c_le_d
4971 if((source[i]&0x3f)==0x3f) emit_bichi_imm(fs,0x800000,fs); // c_ngt_d
4972 return;
4973 }
4974 #endif
4975
4976 // C only
4977
4978 u_int hr,reglist=0;
4979 for(hr=0;hr<HOST_REGS;hr++) {
4980 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
4981 }
4982 reglist&=~(1<<fs);
4983 save_regs(reglist);
4984 if(opcode2[i]==0x10) {
4985 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4986 emit_readword((int)&reg_cop1_simple[(source[i]>>16)&0x1f],ARG2_REG);
4987 if((source[i]&0x3f)==0x30) emit_call((int)c_f_s);
4988 if((source[i]&0x3f)==0x31) emit_call((int)c_un_s);
4989 if((source[i]&0x3f)==0x32) emit_call((int)c_eq_s);
4990 if((source[i]&0x3f)==0x33) emit_call((int)c_ueq_s);
4991 if((source[i]&0x3f)==0x34) emit_call((int)c_olt_s);
4992 if((source[i]&0x3f)==0x35) emit_call((int)c_ult_s);
4993 if((source[i]&0x3f)==0x36) emit_call((int)c_ole_s);
4994 if((source[i]&0x3f)==0x37) emit_call((int)c_ule_s);
4995 if((source[i]&0x3f)==0x38) emit_call((int)c_sf_s);
4996 if((source[i]&0x3f)==0x39) emit_call((int)c_ngle_s);
4997 if((source[i]&0x3f)==0x3a) emit_call((int)c_seq_s);
4998 if((source[i]&0x3f)==0x3b) emit_call((int)c_ngl_s);
4999 if((source[i]&0x3f)==0x3c) emit_call((int)c_lt_s);
5000 if((source[i]&0x3f)==0x3d) emit_call((int)c_nge_s);
5001 if((source[i]&0x3f)==0x3e) emit_call((int)c_le_s);
5002 if((source[i]&0x3f)==0x3f) emit_call((int)c_ngt_s);
5003 }
5004 if(opcode2[i]==0x11) {
5005 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
5006 emit_readword((int)&reg_cop1_double[(source[i]>>16)&0x1f],ARG2_REG);
5007 if((source[i]&0x3f)==0x30) emit_call((int)c_f_d);
5008 if((source[i]&0x3f)==0x31) emit_call((int)c_un_d);
5009 if((source[i]&0x3f)==0x32) emit_call((int)c_eq_d);
5010 if((source[i]&0x3f)==0x33) emit_call((int)c_ueq_d);
5011 if((source[i]&0x3f)==0x34) emit_call((int)c_olt_d);
5012 if((source[i]&0x3f)==0x35) emit_call((int)c_ult_d);
5013 if((source[i]&0x3f)==0x36) emit_call((int)c_ole_d);
5014 if((source[i]&0x3f)==0x37) emit_call((int)c_ule_d);
5015 if((source[i]&0x3f)==0x38) emit_call((int)c_sf_d);
5016 if((source[i]&0x3f)==0x39) emit_call((int)c_ngle_d);
5017 if((source[i]&0x3f)==0x3a) emit_call((int)c_seq_d);
5018 if((source[i]&0x3f)==0x3b) emit_call((int)c_ngl_d);
5019 if((source[i]&0x3f)==0x3c) emit_call((int)c_lt_d);
5020 if((source[i]&0x3f)==0x3d) emit_call((int)c_nge_d);
5021 if((source[i]&0x3f)==0x3e) emit_call((int)c_le_d);
5022 if((source[i]&0x3f)==0x3f) emit_call((int)c_ngt_d);
5023 }
5024 restore_regs(reglist);
5025 emit_loadreg(FSREG,fs);
3d624f89 5026#else
5027 cop1_unusable(i, i_regs);
5028#endif
57871462 5029}
5030
5031void float_assemble(int i,struct regstat *i_regs)
5032{
3d624f89 5033#ifndef DISABLE_COP1
57871462 5034 signed char temp=get_reg(i_regs->regmap,-1);
5035 assert(temp>=0);
5036 // Check cop1 unusable
5037 if(!cop1_usable) {
5038 signed char cs=get_reg(i_regs->regmap,CSREG);
5039 assert(cs>=0);
5040 emit_testimm(cs,0x20000000);
5041 int jaddr=(int)out;
5042 emit_jeq(0);
5043 add_stub(FP_STUB,jaddr,(int)out,i,cs,(int)i_regs,is_delayslot,0);
5044 cop1_usable=1;
5045 }
5046
5047 #if(defined(__VFP_FP__) && !defined(__SOFTFP__))
5048 if((source[i]&0x3f)==6) // mov
5049 {
5050 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
5051 if(opcode2[i]==0x10) {
5052 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
5053 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],HOST_TEMPREG);
5054 emit_readword_indexed(0,temp,temp);
5055 emit_writeword_indexed(temp,0,HOST_TEMPREG);
5056 }
5057 if(opcode2[i]==0x11) {
5058 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
5059 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],HOST_TEMPREG);
5060 emit_vldr(temp,7);
5061 emit_vstr(7,HOST_TEMPREG);
5062 }
5063 }
5064 return;
5065 }
5066
5067 if((source[i]&0x3f)>3)
5068 {
5069 if(opcode2[i]==0x10) {
5070 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
5071 emit_flds(temp,15);
5072 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
5073 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
5074 }
5075 if((source[i]&0x3f)==4) // sqrt
5076 emit_fsqrts(15,15);
5077 if((source[i]&0x3f)==5) // abs
5078 emit_fabss(15,15);
5079 if((source[i]&0x3f)==7) // neg
5080 emit_fnegs(15,15);
5081 emit_fsts(15,temp);
5082 }
5083 if(opcode2[i]==0x11) {
5084 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
5085 emit_vldr(temp,7);
5086 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
5087 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
5088 }
5089 if((source[i]&0x3f)==4) // sqrt
5090 emit_fsqrtd(7,7);
5091 if((source[i]&0x3f)==5) // abs
5092 emit_fabsd(7,7);
5093 if((source[i]&0x3f)==7) // neg
5094 emit_fnegd(7,7);
5095 emit_vstr(7,temp);
5096 }
5097 return;
5098 }
5099 if((source[i]&0x3f)<4)
5100 {
5101 if(opcode2[i]==0x10) {
5102 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
5103 }
5104 if(opcode2[i]==0x11) {
5105 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
5106 }
5107 if(((source[i]>>11)&0x1f)!=((source[i]>>16)&0x1f)) {
5108 if(opcode2[i]==0x10) {
5109 emit_readword((int)&reg_cop1_simple[(source[i]>>16)&0x1f],HOST_TEMPREG);
5110 emit_flds(temp,15);
5111 emit_flds(HOST_TEMPREG,13);
5112 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
5113 if(((source[i]>>16)&0x1f)!=((source[i]>>6)&0x1f)) {
5114 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
5115 }
5116 }
5117 if((source[i]&0x3f)==0) emit_fadds(15,13,15);
5118 if((source[i]&0x3f)==1) emit_fsubs(15,13,15);
5119 if((source[i]&0x3f)==2) emit_fmuls(15,13,15);
5120 if((source[i]&0x3f)==3) emit_fdivs(15,13,15);
5121 if(((source[i]>>16)&0x1f)==((source[i]>>6)&0x1f)) {
5122 emit_fsts(15,HOST_TEMPREG);
5123 }else{
5124 emit_fsts(15,temp);
5125 }
5126 }
5127 else if(opcode2[i]==0x11) {
5128 emit_readword((int)&reg_cop1_double[(source[i]>>16)&0x1f],HOST_TEMPREG);
5129 emit_vldr(temp,7);
5130 emit_vldr(HOST_TEMPREG,6);
5131 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
5132 if(((source[i]>>16)&0x1f)!=((source[i]>>6)&0x1f)) {
5133 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
5134 }
5135 }
5136 if((source[i]&0x3f)==0) emit_faddd(7,6,7);
5137 if((source[i]&0x3f)==1) emit_fsubd(7,6,7);
5138 if((source[i]&0x3f)==2) emit_fmuld(7,6,7);
5139 if((source[i]&0x3f)==3) emit_fdivd(7,6,7);
5140 if(((source[i]>>16)&0x1f)==((source[i]>>6)&0x1f)) {
5141 emit_vstr(7,HOST_TEMPREG);
5142 }else{
5143 emit_vstr(7,temp);
5144 }
5145 }
5146 }
5147 else {
5148 if(opcode2[i]==0x10) {
5149 emit_flds(temp,15);
5150 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
5151 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
5152 }
5153 if((source[i]&0x3f)==0) emit_fadds(15,15,15);
5154 if((source[i]&0x3f)==1) emit_fsubs(15,15,15);
5155 if((source[i]&0x3f)==2) emit_fmuls(15,15,15);
5156 if((source[i]&0x3f)==3) emit_fdivs(15,15,15);
5157 emit_fsts(15,temp);
5158 }
5159 else if(opcode2[i]==0x11) {
5160 emit_vldr(temp,7);
5161 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
5162 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
5163 }
5164 if((source[i]&0x3f)==0) emit_faddd(7,7,7);
5165 if((source[i]&0x3f)==1) emit_fsubd(7,7,7);
5166 if((source[i]&0x3f)==2) emit_fmuld(7,7,7);
5167 if((source[i]&0x3f)==3) emit_fdivd(7,7,7);
5168 emit_vstr(7,temp);
5169 }
5170 }
5171 return;
5172 }
5173 #endif
5174
5175 u_int hr,reglist=0;
5176 for(hr=0;hr<HOST_REGS;hr++) {
5177 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
5178 }
5179 if(opcode2[i]==0x10) { // Single precision
5180 save_regs(reglist);
5181 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
5182 if((source[i]&0x3f)<4) {
5183 emit_readword((int)&reg_cop1_simple[(source[i]>>16)&0x1f],ARG2_REG);
5184 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG3_REG);
5185 }else{
5186 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
5187 }
5188 switch(source[i]&0x3f)
5189 {
5190 case 0x00: emit_call((int)add_s);break;
5191 case 0x01: emit_call((int)sub_s);break;
5192 case 0x02: emit_call((int)mul_s);break;
5193 case 0x03: emit_call((int)div_s);break;
5194 case 0x04: emit_call((int)sqrt_s);break;
5195 case 0x05: emit_call((int)abs_s);break;
5196 case 0x06: emit_call((int)mov_s);break;
5197 case 0x07: emit_call((int)neg_s);break;
5198 }
5199 restore_regs(reglist);
5200 }
5201 if(opcode2[i]==0x11) { // Double precision
5202 save_regs(reglist);
5203 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
5204 if((source[i]&0x3f)<4) {
5205 emit_readword((int)&reg_cop1_double[(source[i]>>16)&0x1f],ARG2_REG);
5206 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG3_REG);
5207 }else{
5208 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
5209 }
5210 switch(source[i]&0x3f)
5211 {
5212 case 0x00: emit_call((int)add_d);break;
5213 case 0x01: emit_call((int)sub_d);break;
5214 case 0x02: emit_call((int)mul_d);break;
5215 case 0x03: emit_call((int)div_d);break;
5216 case 0x04: emit_call((int)sqrt_d);break;
5217 case 0x05: emit_call((int)abs_d);break;
5218 case 0x06: emit_call((int)mov_d);break;
5219 case 0x07: emit_call((int)neg_d);break;
5220 }
5221 restore_regs(reglist);
5222 }
3d624f89 5223#else
5224 cop1_unusable(i, i_regs);
5225#endif
57871462 5226}
5227
5228void multdiv_assemble_arm(int i,struct regstat *i_regs)
5229{
5230 // case 0x18: MULT
5231 // case 0x19: MULTU
5232 // case 0x1A: DIV
5233 // case 0x1B: DIVU
5234 // case 0x1C: DMULT
5235 // case 0x1D: DMULTU
5236 // case 0x1E: DDIV
5237 // case 0x1F: DDIVU
5238 if(rs1[i]&&rs2[i])
5239 {
5240 if((opcode2[i]&4)==0) // 32-bit
5241 {
5242 if(opcode2[i]==0x18) // MULT
5243 {
5244 signed char m1=get_reg(i_regs->regmap,rs1[i]);
5245 signed char m2=get_reg(i_regs->regmap,rs2[i]);
5246 signed char hi=get_reg(i_regs->regmap,HIREG);
5247 signed char lo=get_reg(i_regs->regmap,LOREG);
5248 assert(m1>=0);
5249 assert(m2>=0);
5250 assert(hi>=0);
5251 assert(lo>=0);
5252 emit_smull(m1,m2,hi,lo);
5253 }
5254 if(opcode2[i]==0x19) // MULTU
5255 {
5256 signed char m1=get_reg(i_regs->regmap,rs1[i]);
5257 signed char m2=get_reg(i_regs->regmap,rs2[i]);
5258 signed char hi=get_reg(i_regs->regmap,HIREG);
5259 signed char lo=get_reg(i_regs->regmap,LOREG);
5260 assert(m1>=0);
5261 assert(m2>=0);
5262 assert(hi>=0);
5263 assert(lo>=0);
5264 emit_umull(m1,m2,hi,lo);
5265 }
5266 if(opcode2[i]==0x1A) // DIV
5267 {
5268 signed char d1=get_reg(i_regs->regmap,rs1[i]);
5269 signed char d2=get_reg(i_regs->regmap,rs2[i]);
5270 assert(d1>=0);
5271 assert(d2>=0);
5272 signed char quotient=get_reg(i_regs->regmap,LOREG);
5273 signed char remainder=get_reg(i_regs->regmap,HIREG);
5274 assert(quotient>=0);
5275 assert(remainder>=0);
5276 emit_movs(d1,remainder);
44a80f6a 5277 emit_movimm(0xffffffff,quotient);
5278 emit_negmi(quotient,quotient); // .. quotient and ..
5279 emit_negmi(remainder,remainder); // .. remainder for div0 case (will be negated back after jump)
57871462 5280 emit_movs(d2,HOST_TEMPREG);
5281 emit_jeq((int)out+52); // Division by zero
5282 emit_negmi(HOST_TEMPREG,HOST_TEMPREG);
5283 emit_clz(HOST_TEMPREG,quotient);
5284 emit_shl(HOST_TEMPREG,quotient,HOST_TEMPREG);
5285 emit_orimm(quotient,1<<31,quotient);
5286 emit_shr(quotient,quotient,quotient);
5287 emit_cmp(remainder,HOST_TEMPREG);
5288 emit_subcs(remainder,HOST_TEMPREG,remainder);
5289 emit_adcs(quotient,quotient,quotient);
5290 emit_shrimm(HOST_TEMPREG,1,HOST_TEMPREG);
5291 emit_jcc((int)out-16); // -4
5292 emit_teq(d1,d2);
5293 emit_negmi(quotient,quotient);
5294 emit_test(d1,d1);
5295 emit_negmi(remainder,remainder);
5296 }
5297 if(opcode2[i]==0x1B) // DIVU
5298 {
5299 signed char d1=get_reg(i_regs->regmap,rs1[i]); // dividend
5300 signed char d2=get_reg(i_regs->regmap,rs2[i]); // divisor
5301 assert(d1>=0);
5302 assert(d2>=0);
5303 signed char quotient=get_reg(i_regs->regmap,LOREG);
5304 signed char remainder=get_reg(i_regs->regmap,HIREG);
5305 assert(quotient>=0);
5306 assert(remainder>=0);
44a80f6a 5307 emit_mov(d1,remainder);
5308 emit_movimm(0xffffffff,quotient); // div0 case
57871462 5309 emit_test(d2,d2);
44a80f6a 5310 emit_jeq((int)out+40); // Division by zero
57871462 5311 emit_clz(d2,HOST_TEMPREG);
5312 emit_movimm(1<<31,quotient);
5313 emit_shl(d2,HOST_TEMPREG,d2);
57871462 5314 emit_shr(quotient,HOST_TEMPREG,quotient);
5315 emit_cmp(remainder,d2);
5316 emit_subcs(remainder,d2,remainder);
5317 emit_adcs(quotient,quotient,quotient);
5318 emit_shrcc_imm(d2,1,d2);
5319 emit_jcc((int)out-16); // -4
5320 }
5321 }
5322 else // 64-bit
4600ba03 5323#ifndef FORCE32
57871462 5324 {
5325 if(opcode2[i]==0x1C) // DMULT
5326 {
5327 assert(opcode2[i]!=0x1C);
5328 signed char m1h=get_reg(i_regs->regmap,rs1[i]|64);
5329 signed char m1l=get_reg(i_regs->regmap,rs1[i]);
5330 signed char m2h=get_reg(i_regs->regmap,rs2[i]|64);
5331 signed char m2l=get_reg(i_regs->regmap,rs2[i]);
5332 assert(m1h>=0);
5333 assert(m2h>=0);
5334 assert(m1l>=0);
5335 assert(m2l>=0);
5336 emit_pushreg(m2h);
5337 emit_pushreg(m2l);
5338 emit_pushreg(m1h);
5339 emit_pushreg(m1l);
5340 emit_call((int)&mult64);
5341 emit_popreg(m1l);
5342 emit_popreg(m1h);
5343 emit_popreg(m2l);
5344 emit_popreg(m2h);
5345 signed char hih=get_reg(i_regs->regmap,HIREG|64);
5346 signed char hil=get_reg(i_regs->regmap,HIREG);
5347 if(hih>=0) emit_loadreg(HIREG|64,hih);
5348 if(hil>=0) emit_loadreg(HIREG,hil);
5349 signed char loh=get_reg(i_regs->regmap,LOREG|64);
5350 signed char lol=get_reg(i_regs->regmap,LOREG);
5351 if(loh>=0) emit_loadreg(LOREG|64,loh);
5352 if(lol>=0) emit_loadreg(LOREG,lol);
5353 }
5354 if(opcode2[i]==0x1D) // DMULTU
5355 {
5356 signed char m1h=get_reg(i_regs->regmap,rs1[i]|64);
5357 signed char m1l=get_reg(i_regs->regmap,rs1[i]);
5358 signed char m2h=get_reg(i_regs->regmap,rs2[i]|64);
5359 signed char m2l=get_reg(i_regs->regmap,rs2[i]);
5360 assert(m1h>=0);
5361 assert(m2h>=0);
5362 assert(m1l>=0);
5363 assert(m2l>=0);
5364 save_regs(0x100f);
5365 if(m1l!=0) emit_mov(m1l,0);
5366 if(m1h==0) emit_readword((int)&dynarec_local,1);
5367 else if(m1h>1) emit_mov(m1h,1);
5368 if(m2l<2) emit_readword((int)&dynarec_local+m2l*4,2);
5369 else if(m2l>2) emit_mov(m2l,2);
5370 if(m2h<3) emit_readword((int)&dynarec_local+m2h*4,3);
5371 else if(m2h>3) emit_mov(m2h,3);
5372 emit_call((int)&multu64);
5373 restore_regs(0x100f);
5374 signed char hih=get_reg(i_regs->regmap,HIREG|64);
5375 signed char hil=get_reg(i_regs->regmap,HIREG);
5376 signed char loh=get_reg(i_regs->regmap,LOREG|64);
5377 signed char lol=get_reg(i_regs->regmap,LOREG);
5378 /*signed char temp=get_reg(i_regs->regmap,-1);
5379 signed char rh=get_reg(i_regs->regmap,HIREG|64);
5380 signed char rl=get_reg(i_regs->regmap,HIREG);
5381 assert(m1h>=0);
5382 assert(m2h>=0);
5383 assert(m1l>=0);
5384 assert(m2l>=0);
5385 assert(temp>=0);
5386 //emit_mov(m1l,EAX);
5387 //emit_mul(m2l);
5388 emit_umull(rl,rh,m1l,m2l);
5389 emit_storereg(LOREG,rl);
5390 emit_mov(rh,temp);
5391 //emit_mov(m1h,EAX);
5392 //emit_mul(m2l);
5393 emit_umull(rl,rh,m1h,m2l);
5394 emit_adds(rl,temp,temp);
5395 emit_adcimm(rh,0,rh);
5396 emit_storereg(HIREG,rh);
5397 //emit_mov(m2h,EAX);
5398 //emit_mul(m1l);
5399 emit_umull(rl,rh,m1l,m2h);
5400 emit_adds(rl,temp,temp);
5401 emit_adcimm(rh,0,rh);
5402 emit_storereg(LOREG|64,temp);
5403 emit_mov(rh,temp);
5404 //emit_mov(m2h,EAX);
5405 //emit_mul(m1h);
5406 emit_umull(rl,rh,m1h,m2h);
5407 emit_adds(rl,temp,rl);
5408 emit_loadreg(HIREG,temp);
5409 emit_adcimm(rh,0,rh);
5410 emit_adds(rl,temp,rl);
5411 emit_adcimm(rh,0,rh);
5412 // DEBUG
5413 /*
5414 emit_pushreg(m2h);
5415 emit_pushreg(m2l);
5416 emit_pushreg(m1h);
5417 emit_pushreg(m1l);
5418 emit_call((int)&multu64);
5419 emit_popreg(m1l);
5420 emit_popreg(m1h);
5421 emit_popreg(m2l);
5422 emit_popreg(m2h);
5423 signed char hih=get_reg(i_regs->regmap,HIREG|64);
5424 signed char hil=get_reg(i_regs->regmap,HIREG);
5425 if(hih>=0) emit_loadreg(HIREG|64,hih); // DEBUG
5426 if(hil>=0) emit_loadreg(HIREG,hil); // DEBUG
5427 */
5428 // Shouldn't be necessary
5429 //char loh=get_reg(i_regs->regmap,LOREG|64);
5430 //char lol=get_reg(i_regs->regmap,LOREG);
5431 //if(loh>=0) emit_loadreg(LOREG|64,loh);
5432 //if(lol>=0) emit_loadreg(LOREG,lol);
5433 }
5434 if(opcode2[i]==0x1E) // DDIV
5435 {
5436 signed char d1h=get_reg(i_regs->regmap,rs1[i]|64);
5437 signed char d1l=get_reg(i_regs->regmap,rs1[i]);
5438 signed char d2h=get_reg(i_regs->regmap,rs2[i]|64);
5439 signed char d2l=get_reg(i_regs->regmap,rs2[i]);
5440 assert(d1h>=0);
5441 assert(d2h>=0);
5442 assert(d1l>=0);
5443 assert(d2l>=0);
5444 save_regs(0x100f);
5445 if(d1l!=0) emit_mov(d1l,0);
5446 if(d1h==0) emit_readword((int)&dynarec_local,1);
5447 else if(d1h>1) emit_mov(d1h,1);
5448 if(d2l<2) emit_readword((int)&dynarec_local+d2l*4,2);
5449 else if(d2l>2) emit_mov(d2l,2);
5450 if(d2h<3) emit_readword((int)&dynarec_local+d2h*4,3);
5451 else if(d2h>3) emit_mov(d2h,3);
5452 emit_call((int)&div64);
5453 restore_regs(0x100f);
5454 signed char hih=get_reg(i_regs->regmap,HIREG|64);
5455 signed char hil=get_reg(i_regs->regmap,HIREG);
5456 signed char loh=get_reg(i_regs->regmap,LOREG|64);
5457 signed char lol=get_reg(i_regs->regmap,LOREG);
5458 if(hih>=0) emit_loadreg(HIREG|64,hih);
5459 if(hil>=0) emit_loadreg(HIREG,hil);
5460 if(loh>=0) emit_loadreg(LOREG|64,loh);
5461 if(lol>=0) emit_loadreg(LOREG,lol);
5462 }
5463 if(opcode2[i]==0x1F) // DDIVU
5464 {
5465 //u_int hr,reglist=0;
5466 //for(hr=0;hr<HOST_REGS;hr++) {
5467 // if(i_regs->regmap[hr]>=0 && (i_regs->regmap[hr]&62)!=HIREG) reglist|=1<<hr;
5468 //}
5469 signed char d1h=get_reg(i_regs->regmap,rs1[i]|64);
5470 signed char d1l=get_reg(i_regs->regmap,rs1[i]);
5471 signed char d2h=get_reg(i_regs->regmap,rs2[i]|64);
5472 signed char d2l=get_reg(i_regs->regmap,rs2[i]);
5473 assert(d1h>=0);
5474 assert(d2h>=0);
5475 assert(d1l>=0);
5476 assert(d2l>=0);
5477 save_regs(0x100f);
5478 if(d1l!=0) emit_mov(d1l,0);
5479 if(d1h==0) emit_readword((int)&dynarec_local,1);
5480 else if(d1h>1) emit_mov(d1h,1);
5481 if(d2l<2) emit_readword((int)&dynarec_local+d2l*4,2);
5482 else if(d2l>2) emit_mov(d2l,2);
5483 if(d2h<3) emit_readword((int)&dynarec_local+d2h*4,3);
5484 else if(d2h>3) emit_mov(d2h,3);
5485 emit_call((int)&divu64);
5486 restore_regs(0x100f);
5487 signed char hih=get_reg(i_regs->regmap,HIREG|64);
5488 signed char hil=get_reg(i_regs->regmap,HIREG);
5489 signed char loh=get_reg(i_regs->regmap,LOREG|64);
5490 signed char lol=get_reg(i_regs->regmap,LOREG);
5491 if(hih>=0) emit_loadreg(HIREG|64,hih);
5492 if(hil>=0) emit_loadreg(HIREG,hil);
5493 if(loh>=0) emit_loadreg(LOREG|64,loh);
5494 if(lol>=0) emit_loadreg(LOREG,lol);
5495 }
5496 }
4600ba03 5497#else
5498 assert(0);
5499#endif
57871462 5500 }
5501 else
5502 {
5503 // Multiply by zero is zero.
5504 // MIPS does not have a divide by zero exception.
5505 // The result is undefined, we return zero.
5506 signed char hr=get_reg(i_regs->regmap,HIREG);
5507 signed char lr=get_reg(i_regs->regmap,LOREG);
5508 if(hr>=0) emit_zeroreg(hr);
5509 if(lr>=0) emit_zeroreg(lr);
5510 }
5511}
5512#define multdiv_assemble multdiv_assemble_arm
5513
5514void do_preload_rhash(int r) {
5515 // Don't need this for ARM. On x86, this puts the value 0xf8 into the
5516 // register. On ARM the hash can be done with a single instruction (below)
5517}
5518
5519void do_preload_rhtbl(int ht) {
5520 emit_addimm(FP,(int)&mini_ht-(int)&dynarec_local,ht);
5521}
5522
5523void do_rhash(int rs,int rh) {
5524 emit_andimm(rs,0xf8,rh);
5525}
5526
5527void do_miniht_load(int ht,int rh) {
5528 assem_debug("ldr %s,[%s,%s]!\n",regname[rh],regname[ht],regname[rh]);
5529 output_w32(0xe7b00000|rd_rn_rm(rh,ht,rh));
5530}
5531
5532void do_miniht_jump(int rs,int rh,int ht) {
5533 emit_cmp(rh,rs);
5534 emit_ldreq_indexed(ht,4,15);
5535 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
5536 emit_mov(rs,7);
5537 emit_jmp(jump_vaddr_reg[7]);
5538 #else
5539 emit_jmp(jump_vaddr_reg[rs]);
5540 #endif
5541}
5542
5543void do_miniht_insert(u_int return_address,int rt,int temp) {
5544 #ifdef ARMv5_ONLY
5545 emit_movimm(return_address,rt); // PC into link register
5546 add_to_linker((int)out,return_address,1);
5547 emit_pcreladdr(temp);
5548 emit_writeword(rt,(int)&mini_ht[(return_address&0xFF)>>3][0]);
5549 emit_writeword(temp,(int)&mini_ht[(return_address&0xFF)>>3][1]);
5550 #else
5551 emit_movw(return_address&0x0000FFFF,rt);
5552 add_to_linker((int)out,return_address,1);
5553 emit_pcreladdr(temp);
5554 emit_writeword(temp,(int)&mini_ht[(return_address&0xFF)>>3][1]);
5555 emit_movt(return_address&0xFFFF0000,rt);
5556 emit_writeword(rt,(int)&mini_ht[(return_address&0xFF)>>3][0]);
5557 #endif
5558}
5559
5560// Sign-extend to 64 bits and write out upper half of a register
5561// This is useful where we have a 32-bit value in a register, and want to
5562// keep it in a 32-bit register, but can't guarantee that it won't be read
5563// as a 64-bit value later.
5564void wb_sx(signed char pre[],signed char entry[],uint64_t dirty,uint64_t is32_pre,uint64_t is32,uint64_t u,uint64_t uu)
5565{
24385cae 5566#ifndef FORCE32
57871462 5567 if(is32_pre==is32) return;
5568 int hr,reg;
5569 for(hr=0;hr<HOST_REGS;hr++) {
5570 if(hr!=EXCLUDE_REG) {
5571 //if(pre[hr]==entry[hr]) {
5572 if((reg=pre[hr])>=0) {
5573 if((dirty>>hr)&1) {
5574 if( ((is32_pre&~is32&~uu)>>reg)&1 ) {
5575 emit_sarimm(hr,31,HOST_TEMPREG);
5576 emit_storereg(reg|64,HOST_TEMPREG);
5577 }
5578 }
5579 }
5580 //}
5581 }
5582 }
24385cae 5583#endif
57871462 5584}
5585
5586void wb_valid(signed char pre[],signed char entry[],u_int dirty_pre,u_int dirty,uint64_t is32_pre,uint64_t u,uint64_t uu)
5587{
5588 //if(dirty_pre==dirty) return;
5589 int hr,reg,new_hr;
5590 for(hr=0;hr<HOST_REGS;hr++) {
5591 if(hr!=EXCLUDE_REG) {
5592 reg=pre[hr];
5593 if(((~u)>>(reg&63))&1) {
f776eb14 5594 if(reg>0) {
57871462 5595 if(((dirty_pre&~dirty)>>hr)&1) {
5596 if(reg>0&&reg<34) {
5597 emit_storereg(reg,hr);
5598 if( ((is32_pre&~uu)>>reg)&1 ) {
5599 emit_sarimm(hr,31,HOST_TEMPREG);
5600 emit_storereg(reg|64,HOST_TEMPREG);
5601 }
5602 }
5603 else if(reg>=64) {
5604 emit_storereg(reg,hr);
5605 }
5606 }
5607 }
57871462 5608 }
5609 }
5610 }
5611}
5612
5613
5614/* using strd could possibly help but you'd have to allocate registers in pairs
5615void wb_invalidate_arm(signed char pre[],signed char entry[],uint64_t dirty,uint64_t is32,uint64_t u,uint64_t uu)
5616{
5617 int hr;
5618 int wrote=-1;
5619 for(hr=HOST_REGS-1;hr>=0;hr--) {
5620 if(hr!=EXCLUDE_REG) {
5621 if(pre[hr]!=entry[hr]) {
5622 if(pre[hr]>=0) {
5623 if((dirty>>hr)&1) {
5624 if(get_reg(entry,pre[hr])<0) {
5625 if(pre[hr]<64) {
5626 if(!((u>>pre[hr])&1)) {
5627 if(hr<10&&(~hr&1)&&(pre[hr+1]<0||wrote==hr+1)) {
5628 if( ((is32>>pre[hr])&1) && !((uu>>pre[hr])&1) ) {
5629 emit_sarimm(hr,31,hr+1);
5630 emit_strdreg(pre[hr],hr);
5631 }
5632 else
5633 emit_storereg(pre[hr],hr);
5634 }else{
5635 emit_storereg(pre[hr],hr);
5636 if( ((is32>>pre[hr])&1) && !((uu>>pre[hr])&1) ) {
5637 emit_sarimm(hr,31,hr);
5638 emit_storereg(pre[hr]|64,hr);
5639 }
5640 }
5641 }
5642 }else{
5643 if(!((uu>>(pre[hr]&63))&1) && !((is32>>(pre[hr]&63))&1)) {
5644 emit_storereg(pre[hr],hr);
5645 }
5646 }
5647 wrote=hr;
5648 }
5649 }
5650 }
5651 }
5652 }
5653 }
5654 for(hr=0;hr<HOST_REGS;hr++) {
5655 if(hr!=EXCLUDE_REG) {
5656 if(pre[hr]!=entry[hr]) {
5657 if(pre[hr]>=0) {
5658 int nr;
5659 if((nr=get_reg(entry,pre[hr]))>=0) {
5660 emit_mov(hr,nr);
5661 }
5662 }
5663 }
5664 }
5665 }
5666}
5667#define wb_invalidate wb_invalidate_arm
5668*/
5669
dd3a91a1 5670// Clearing the cache is rather slow on ARM Linux, so mark the areas
5671// that need to be cleared, and then only clear these areas once.
5672void do_clear_cache()
5673{
5674 int i,j;
5675 for (i=0;i<(1<<(TARGET_SIZE_2-17));i++)
5676 {
5677 u_int bitmap=needs_clear_cache[i];
5678 if(bitmap) {
5679 u_int start,end;
5680 for(j=0;j<32;j++)
5681 {
5682 if(bitmap&(1<<j)) {
5683 start=BASE_ADDR+i*131072+j*4096;
5684 end=start+4095;
5685 j++;
5686 while(j<32) {
5687 if(bitmap&(1<<j)) {
5688 end+=4096;
5689 j++;
5690 }else{
5691 __clear_cache((void *)start,(void *)end);
5692 break;
5693 }
5694 }
5695 }
5696 }
5697 needs_clear_cache[i]=0;
5698 }
5699 }
5700}
5701
57871462 5702// CPU-architecture-specific initialization
5703void arch_init() {
3d624f89 5704#ifndef DISABLE_COP1
57871462 5705 rounding_modes[0]=0x0<<22; // round
5706 rounding_modes[1]=0x3<<22; // trunc
5707 rounding_modes[2]=0x1<<22; // ceil
5708 rounding_modes[3]=0x2<<22; // floor
3d624f89 5709#endif
57871462 5710}
b9b61529 5711
5712// vim:shiftwidth=2:expandtab