drc: avoid invcode checks on close addesses
[pcsx_rearmed.git] / libpcsxcore / new_dynarec / assem_arm.c
CommitLineData
57871462 1/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
c6c3b1b3 2 * Mupen64plus/PCSX - assem_arm.c *
20d507ba 3 * Copyright (C) 2009-2011 Ari64 *
c6c3b1b3 4 * Copyright (C) 2010-2011 GraÅžvydas "notaz" Ignotas *
57871462 5 * *
6 * This program is free software; you can redistribute it and/or modify *
7 * it under the terms of the GNU General Public License as published by *
8 * the Free Software Foundation; either version 2 of the License, or *
9 * (at your option) any later version. *
10 * *
11 * This program is distributed in the hope that it will be useful, *
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
14 * GNU General Public License for more details. *
15 * *
16 * You should have received a copy of the GNU General Public License *
17 * along with this program; if not, write to the *
18 * Free Software Foundation, Inc., *
19 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
20 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
21
054175e9 22#ifdef PCSX
23#include "../gte_arm.h"
24#include "../gte_neon.h"
25#include "pcnt.h"
26#endif
27
57871462 28extern int cycle_count;
29extern int last_count;
30extern int pcaddr;
31extern int pending_exception;
32extern int branch_target;
33extern uint64_t readmem_dword;
3d624f89 34#ifdef MUPEN64
57871462 35extern precomp_instr fake_pc;
3d624f89 36#endif
57871462 37extern void *dynarec_local;
38extern u_int memory_map[1048576];
39extern u_int mini_ht[32][2];
40extern u_int rounding_modes[4];
41
42void indirect_jump_indexed();
43void indirect_jump();
44void do_interrupt();
45void jump_vaddr_r0();
46void jump_vaddr_r1();
47void jump_vaddr_r2();
48void jump_vaddr_r3();
49void jump_vaddr_r4();
50void jump_vaddr_r5();
51void jump_vaddr_r6();
52void jump_vaddr_r7();
53void jump_vaddr_r8();
54void jump_vaddr_r9();
55void jump_vaddr_r10();
56void jump_vaddr_r12();
57
58const u_int jump_vaddr_reg[16] = {
59 (int)jump_vaddr_r0,
60 (int)jump_vaddr_r1,
61 (int)jump_vaddr_r2,
62 (int)jump_vaddr_r3,
63 (int)jump_vaddr_r4,
64 (int)jump_vaddr_r5,
65 (int)jump_vaddr_r6,
66 (int)jump_vaddr_r7,
67 (int)jump_vaddr_r8,
68 (int)jump_vaddr_r9,
69 (int)jump_vaddr_r10,
70 0,
71 (int)jump_vaddr_r12,
72 0,
73 0,
74 0};
75
0bbd1454 76void invalidate_addr_r0();
77void invalidate_addr_r1();
78void invalidate_addr_r2();
79void invalidate_addr_r3();
80void invalidate_addr_r4();
81void invalidate_addr_r5();
82void invalidate_addr_r6();
83void invalidate_addr_r7();
84void invalidate_addr_r8();
85void invalidate_addr_r9();
86void invalidate_addr_r10();
87void invalidate_addr_r12();
88
89const u_int invalidate_addr_reg[16] = {
90 (int)invalidate_addr_r0,
91 (int)invalidate_addr_r1,
92 (int)invalidate_addr_r2,
93 (int)invalidate_addr_r3,
94 (int)invalidate_addr_r4,
95 (int)invalidate_addr_r5,
96 (int)invalidate_addr_r6,
97 (int)invalidate_addr_r7,
98 (int)invalidate_addr_r8,
99 (int)invalidate_addr_r9,
100 (int)invalidate_addr_r10,
101 0,
102 (int)invalidate_addr_r12,
103 0,
104 0,
105 0};
106
57871462 107#include "fpu.h"
108
dd3a91a1 109unsigned int needs_clear_cache[1<<(TARGET_SIZE_2-17)];
110
57871462 111/* Linker */
112
113void set_jump_target(int addr,u_int target)
114{
115 u_char *ptr=(u_char *)addr;
116 u_int *ptr2=(u_int *)ptr;
117 if(ptr[3]==0xe2) {
118 assert((target-(u_int)ptr2-8)<1024);
119 assert((addr&3)==0);
120 assert((target&3)==0);
121 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
122 //printf("target=%x addr=%x insn=%x\n",target,addr,*ptr2);
123 }
124 else if(ptr[3]==0x72) {
125 // generated by emit_jno_unlikely
126 if((target-(u_int)ptr2-8)<1024) {
127 assert((addr&3)==0);
128 assert((target&3)==0);
129 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
130 }
131 else if((target-(u_int)ptr2-8)<4096&&!((target-(u_int)ptr2-8)&15)) {
132 assert((addr&3)==0);
133 assert((target&3)==0);
134 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>4)|0xE00;
135 }
136 else *ptr2=(0x7A000000)|(((target-(u_int)ptr2-8)<<6)>>8);
137 }
138 else {
139 assert((ptr[3]&0x0e)==0xa);
140 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
141 }
142}
143
144// This optionally copies the instruction from the target of the branch into
145// the space before the branch. Works, but the difference in speed is
146// usually insignificant.
147void set_jump_target_fillslot(int addr,u_int target,int copy)
148{
149 u_char *ptr=(u_char *)addr;
150 u_int *ptr2=(u_int *)ptr;
151 assert(!copy||ptr2[-1]==0xe28dd000);
152 if(ptr[3]==0xe2) {
153 assert(!copy);
154 assert((target-(u_int)ptr2-8)<4096);
155 *ptr2=(*ptr2&0xFFFFF000)|(target-(u_int)ptr2-8);
156 }
157 else {
158 assert((ptr[3]&0x0e)==0xa);
159 u_int target_insn=*(u_int *)target;
160 if((target_insn&0x0e100000)==0) { // ALU, no immediate, no flags
161 copy=0;
162 }
163 if((target_insn&0x0c100000)==0x04100000) { // Load
164 copy=0;
165 }
166 if(target_insn&0x08000000) {
167 copy=0;
168 }
169 if(copy) {
170 ptr2[-1]=target_insn;
171 target+=4;
172 }
173 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
174 }
175}
176
177/* Literal pool */
178add_literal(int addr,int val)
179{
15776b68 180 assert(literalcount<sizeof(literals)/sizeof(literals[0]));
57871462 181 literals[literalcount][0]=addr;
182 literals[literalcount][1]=val;
183 literalcount++;
184}
185
f76eeef9 186void *kill_pointer(void *stub)
57871462 187{
188 int *ptr=(int *)(stub+4);
189 assert((*ptr&0x0ff00000)==0x05900000);
190 u_int offset=*ptr&0xfff;
191 int **l_ptr=(void *)ptr+offset+8;
192 int *i_ptr=*l_ptr;
193 set_jump_target((int)i_ptr,(int)stub);
f76eeef9 194 return i_ptr;
57871462 195}
196
f968d35d 197// find where external branch is liked to using addr of it's stub:
198// get address that insn one after stub loads (dyna_linker arg1),
199// treat it as a pointer to branch insn,
200// return addr where that branch jumps to
57871462 201int get_pointer(void *stub)
202{
203 //printf("get_pointer(%x)\n",(int)stub);
204 int *ptr=(int *)(stub+4);
f968d35d 205 assert((*ptr&0x0fff0000)==0x059f0000);
57871462 206 u_int offset=*ptr&0xfff;
207 int **l_ptr=(void *)ptr+offset+8;
208 int *i_ptr=*l_ptr;
209 assert((*i_ptr&0x0f000000)==0x0a000000);
210 return (int)i_ptr+((*i_ptr<<8)>>6)+8;
211}
212
213// Find the "clean" entry point from a "dirty" entry point
214// by skipping past the call to verify_code
215u_int get_clean_addr(int addr)
216{
217 int *ptr=(int *)addr;
218 #ifdef ARMv5_ONLY
219 ptr+=4;
220 #else
221 ptr+=6;
222 #endif
223 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
224 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
225 ptr++;
226 if((*ptr&0xFF000000)==0xea000000) {
227 return (int)ptr+((*ptr<<8)>>6)+8; // follow jump
228 }
229 return (u_int)ptr;
230}
231
232int verify_dirty(int addr)
233{
234 u_int *ptr=(u_int *)addr;
235 #ifdef ARMv5_ONLY
236 // get from literal pool
15776b68 237 assert((*ptr&0xFFFF0000)==0xe59f0000);
57871462 238 u_int offset=*ptr&0xfff;
239 u_int *l_ptr=(void *)ptr+offset+8;
240 u_int source=l_ptr[0];
241 u_int copy=l_ptr[1];
242 u_int len=l_ptr[2];
243 ptr+=4;
244 #else
245 // ARMv7 movw/movt
246 assert((*ptr&0xFFF00000)==0xe3000000);
247 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
248 u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
249 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
250 ptr+=6;
251 #endif
252 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
253 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
63cb0298 254#ifndef DISABLE_TLB
cfcba99a 255 u_int verifier=(int)ptr+((signed int)(*ptr<<8)>>6)+8; // get target of bl
57871462 256 if(verifier==(u_int)verify_code_vm||verifier==(u_int)verify_code_ds) {
257 unsigned int page=source>>12;
258 unsigned int map_value=memory_map[page];
259 if(map_value>=0x80000000) return 0;
260 while(page<((source+len-1)>>12)) {
261 if((memory_map[++page]<<2)!=(map_value<<2)) return 0;
262 }
263 source = source+(map_value<<2);
264 }
63cb0298 265#endif
57871462 266 //printf("verify_dirty: %x %x %x\n",source,copy,len);
267 return !memcmp((void *)source,(void *)copy,len);
268}
269
270// This doesn't necessarily find all clean entry points, just
271// guarantees that it's not dirty
272int isclean(int addr)
273{
274 #ifdef ARMv5_ONLY
275 int *ptr=((u_int *)addr)+4;
276 #else
277 int *ptr=((u_int *)addr)+6;
278 #endif
279 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
280 if((*ptr&0xFF000000)!=0xeb000000) return 1; // bl instruction
281 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code) return 0;
282 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_vm) return 0;
283 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_ds) return 0;
284 return 1;
285}
286
287void get_bounds(int addr,u_int *start,u_int *end)
288{
289 u_int *ptr=(u_int *)addr;
290 #ifdef ARMv5_ONLY
291 // get from literal pool
15776b68 292 assert((*ptr&0xFFFF0000)==0xe59f0000);
57871462 293 u_int offset=*ptr&0xfff;
294 u_int *l_ptr=(void *)ptr+offset+8;
295 u_int source=l_ptr[0];
296 //u_int copy=l_ptr[1];
297 u_int len=l_ptr[2];
298 ptr+=4;
299 #else
300 // ARMv7 movw/movt
301 assert((*ptr&0xFFF00000)==0xe3000000);
302 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
303 //u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
304 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
305 ptr+=6;
306 #endif
307 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
308 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
63cb0298 309#ifndef DISABLE_TLB
cfcba99a 310 u_int verifier=(int)ptr+((signed int)(*ptr<<8)>>6)+8; // get target of bl
57871462 311 if(verifier==(u_int)verify_code_vm||verifier==(u_int)verify_code_ds) {
312 if(memory_map[source>>12]>=0x80000000) source = 0;
313 else source = source+(memory_map[source>>12]<<2);
314 }
63cb0298 315#endif
57871462 316 *start=source;
317 *end=source+len;
318}
319
320/* Register allocation */
321
322// Note: registers are allocated clean (unmodified state)
323// if you intend to modify the register, you must call dirty_reg().
324void alloc_reg(struct regstat *cur,int i,signed char reg)
325{
326 int r,hr;
327 int preferred_reg = (reg&7);
328 if(reg==CCREG) preferred_reg=HOST_CCREG;
329 if(reg==PTEMP||reg==FTEMP) preferred_reg=12;
330
331 // Don't allocate unused registers
332 if((cur->u>>reg)&1) return;
333
334 // see if it's already allocated
335 for(hr=0;hr<HOST_REGS;hr++)
336 {
337 if(cur->regmap[hr]==reg) return;
338 }
339
340 // Keep the same mapping if the register was already allocated in a loop
341 preferred_reg = loop_reg(i,reg,preferred_reg);
342
343 // Try to allocate the preferred register
344 if(cur->regmap[preferred_reg]==-1) {
345 cur->regmap[preferred_reg]=reg;
346 cur->dirty&=~(1<<preferred_reg);
347 cur->isconst&=~(1<<preferred_reg);
348 return;
349 }
350 r=cur->regmap[preferred_reg];
351 if(r<64&&((cur->u>>r)&1)) {
352 cur->regmap[preferred_reg]=reg;
353 cur->dirty&=~(1<<preferred_reg);
354 cur->isconst&=~(1<<preferred_reg);
355 return;
356 }
357 if(r>=64&&((cur->uu>>(r&63))&1)) {
358 cur->regmap[preferred_reg]=reg;
359 cur->dirty&=~(1<<preferred_reg);
360 cur->isconst&=~(1<<preferred_reg);
361 return;
362 }
363
364 // Clear any unneeded registers
365 // We try to keep the mapping consistent, if possible, because it
366 // makes branches easier (especially loops). So we try to allocate
367 // first (see above) before removing old mappings. If this is not
368 // possible then go ahead and clear out the registers that are no
369 // longer needed.
370 for(hr=0;hr<HOST_REGS;hr++)
371 {
372 r=cur->regmap[hr];
373 if(r>=0) {
374 if(r<64) {
375 if((cur->u>>r)&1) {cur->regmap[hr]=-1;break;}
376 }
377 else
378 {
379 if((cur->uu>>(r&63))&1) {cur->regmap[hr]=-1;break;}
380 }
381 }
382 }
383 // Try to allocate any available register, but prefer
384 // registers that have not been used recently.
385 if(i>0) {
386 for(hr=0;hr<HOST_REGS;hr++) {
387 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
388 if(regs[i-1].regmap[hr]!=rs1[i-1]&&regs[i-1].regmap[hr]!=rs2[i-1]&&regs[i-1].regmap[hr]!=rt1[i-1]&&regs[i-1].regmap[hr]!=rt2[i-1]) {
389 cur->regmap[hr]=reg;
390 cur->dirty&=~(1<<hr);
391 cur->isconst&=~(1<<hr);
392 return;
393 }
394 }
395 }
396 }
397 // Try to allocate any available register
398 for(hr=0;hr<HOST_REGS;hr++) {
399 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
400 cur->regmap[hr]=reg;
401 cur->dirty&=~(1<<hr);
402 cur->isconst&=~(1<<hr);
403 return;
404 }
405 }
406
407 // Ok, now we have to evict someone
408 // Pick a register we hopefully won't need soon
409 u_char hsn[MAXREG+1];
410 memset(hsn,10,sizeof(hsn));
411 int j;
412 lsn(hsn,i,&preferred_reg);
413 //printf("eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",cur->regmap[0],cur->regmap[1],cur->regmap[2],cur->regmap[3],cur->regmap[5],cur->regmap[6],cur->regmap[7]);
414 //printf("hsn(%x): %d %d %d %d %d %d %d\n",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
415 if(i>0) {
416 // Don't evict the cycle count at entry points, otherwise the entry
417 // stub will have to write it.
418 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
419 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
420 for(j=10;j>=3;j--)
421 {
422 // Alloc preferred register if available
423 if(hsn[r=cur->regmap[preferred_reg]&63]==j) {
424 for(hr=0;hr<HOST_REGS;hr++) {
425 // Evict both parts of a 64-bit register
426 if((cur->regmap[hr]&63)==r) {
427 cur->regmap[hr]=-1;
428 cur->dirty&=~(1<<hr);
429 cur->isconst&=~(1<<hr);
430 }
431 }
432 cur->regmap[preferred_reg]=reg;
433 return;
434 }
435 for(r=1;r<=MAXREG;r++)
436 {
437 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
438 for(hr=0;hr<HOST_REGS;hr++) {
439 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
440 if(cur->regmap[hr]==r+64) {
441 cur->regmap[hr]=reg;
442 cur->dirty&=~(1<<hr);
443 cur->isconst&=~(1<<hr);
444 return;
445 }
446 }
447 }
448 for(hr=0;hr<HOST_REGS;hr++) {
449 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
450 if(cur->regmap[hr]==r) {
451 cur->regmap[hr]=reg;
452 cur->dirty&=~(1<<hr);
453 cur->isconst&=~(1<<hr);
454 return;
455 }
456 }
457 }
458 }
459 }
460 }
461 }
462 for(j=10;j>=0;j--)
463 {
464 for(r=1;r<=MAXREG;r++)
465 {
466 if(hsn[r]==j) {
467 for(hr=0;hr<HOST_REGS;hr++) {
468 if(cur->regmap[hr]==r+64) {
469 cur->regmap[hr]=reg;
470 cur->dirty&=~(1<<hr);
471 cur->isconst&=~(1<<hr);
472 return;
473 }
474 }
475 for(hr=0;hr<HOST_REGS;hr++) {
476 if(cur->regmap[hr]==r) {
477 cur->regmap[hr]=reg;
478 cur->dirty&=~(1<<hr);
479 cur->isconst&=~(1<<hr);
480 return;
481 }
482 }
483 }
484 }
485 }
486 printf("This shouldn't happen (alloc_reg)");exit(1);
487}
488
489void alloc_reg64(struct regstat *cur,int i,signed char reg)
490{
491 int preferred_reg = 8+(reg&1);
492 int r,hr;
493
494 // allocate the lower 32 bits
495 alloc_reg(cur,i,reg);
496
497 // Don't allocate unused registers
498 if((cur->uu>>reg)&1) return;
499
500 // see if the upper half is already allocated
501 for(hr=0;hr<HOST_REGS;hr++)
502 {
503 if(cur->regmap[hr]==reg+64) return;
504 }
505
506 // Keep the same mapping if the register was already allocated in a loop
507 preferred_reg = loop_reg(i,reg,preferred_reg);
508
509 // Try to allocate the preferred register
510 if(cur->regmap[preferred_reg]==-1) {
511 cur->regmap[preferred_reg]=reg|64;
512 cur->dirty&=~(1<<preferred_reg);
513 cur->isconst&=~(1<<preferred_reg);
514 return;
515 }
516 r=cur->regmap[preferred_reg];
517 if(r<64&&((cur->u>>r)&1)) {
518 cur->regmap[preferred_reg]=reg|64;
519 cur->dirty&=~(1<<preferred_reg);
520 cur->isconst&=~(1<<preferred_reg);
521 return;
522 }
523 if(r>=64&&((cur->uu>>(r&63))&1)) {
524 cur->regmap[preferred_reg]=reg|64;
525 cur->dirty&=~(1<<preferred_reg);
526 cur->isconst&=~(1<<preferred_reg);
527 return;
528 }
529
530 // Clear any unneeded registers
531 // We try to keep the mapping consistent, if possible, because it
532 // makes branches easier (especially loops). So we try to allocate
533 // first (see above) before removing old mappings. If this is not
534 // possible then go ahead and clear out the registers that are no
535 // longer needed.
536 for(hr=HOST_REGS-1;hr>=0;hr--)
537 {
538 r=cur->regmap[hr];
539 if(r>=0) {
540 if(r<64) {
541 if((cur->u>>r)&1) {cur->regmap[hr]=-1;break;}
542 }
543 else
544 {
545 if((cur->uu>>(r&63))&1) {cur->regmap[hr]=-1;break;}
546 }
547 }
548 }
549 // Try to allocate any available register, but prefer
550 // registers that have not been used recently.
551 if(i>0) {
552 for(hr=0;hr<HOST_REGS;hr++) {
553 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
554 if(regs[i-1].regmap[hr]!=rs1[i-1]&&regs[i-1].regmap[hr]!=rs2[i-1]&&regs[i-1].regmap[hr]!=rt1[i-1]&&regs[i-1].regmap[hr]!=rt2[i-1]) {
555 cur->regmap[hr]=reg|64;
556 cur->dirty&=~(1<<hr);
557 cur->isconst&=~(1<<hr);
558 return;
559 }
560 }
561 }
562 }
563 // Try to allocate any available register
564 for(hr=0;hr<HOST_REGS;hr++) {
565 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
566 cur->regmap[hr]=reg|64;
567 cur->dirty&=~(1<<hr);
568 cur->isconst&=~(1<<hr);
569 return;
570 }
571 }
572
573 // Ok, now we have to evict someone
574 // Pick a register we hopefully won't need soon
575 u_char hsn[MAXREG+1];
576 memset(hsn,10,sizeof(hsn));
577 int j;
578 lsn(hsn,i,&preferred_reg);
579 //printf("eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",cur->regmap[0],cur->regmap[1],cur->regmap[2],cur->regmap[3],cur->regmap[5],cur->regmap[6],cur->regmap[7]);
580 //printf("hsn(%x): %d %d %d %d %d %d %d\n",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
581 if(i>0) {
582 // Don't evict the cycle count at entry points, otherwise the entry
583 // stub will have to write it.
584 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
585 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
586 for(j=10;j>=3;j--)
587 {
588 // Alloc preferred register if available
589 if(hsn[r=cur->regmap[preferred_reg]&63]==j) {
590 for(hr=0;hr<HOST_REGS;hr++) {
591 // Evict both parts of a 64-bit register
592 if((cur->regmap[hr]&63)==r) {
593 cur->regmap[hr]=-1;
594 cur->dirty&=~(1<<hr);
595 cur->isconst&=~(1<<hr);
596 }
597 }
598 cur->regmap[preferred_reg]=reg|64;
599 return;
600 }
601 for(r=1;r<=MAXREG;r++)
602 {
603 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
604 for(hr=0;hr<HOST_REGS;hr++) {
605 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
606 if(cur->regmap[hr]==r+64) {
607 cur->regmap[hr]=reg|64;
608 cur->dirty&=~(1<<hr);
609 cur->isconst&=~(1<<hr);
610 return;
611 }
612 }
613 }
614 for(hr=0;hr<HOST_REGS;hr++) {
615 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
616 if(cur->regmap[hr]==r) {
617 cur->regmap[hr]=reg|64;
618 cur->dirty&=~(1<<hr);
619 cur->isconst&=~(1<<hr);
620 return;
621 }
622 }
623 }
624 }
625 }
626 }
627 }
628 for(j=10;j>=0;j--)
629 {
630 for(r=1;r<=MAXREG;r++)
631 {
632 if(hsn[r]==j) {
633 for(hr=0;hr<HOST_REGS;hr++) {
634 if(cur->regmap[hr]==r+64) {
635 cur->regmap[hr]=reg|64;
636 cur->dirty&=~(1<<hr);
637 cur->isconst&=~(1<<hr);
638 return;
639 }
640 }
641 for(hr=0;hr<HOST_REGS;hr++) {
642 if(cur->regmap[hr]==r) {
643 cur->regmap[hr]=reg|64;
644 cur->dirty&=~(1<<hr);
645 cur->isconst&=~(1<<hr);
646 return;
647 }
648 }
649 }
650 }
651 }
652 printf("This shouldn't happen");exit(1);
653}
654
655// Allocate a temporary register. This is done without regard to
656// dirty status or whether the register we request is on the unneeded list
657// Note: This will only allocate one register, even if called multiple times
658void alloc_reg_temp(struct regstat *cur,int i,signed char reg)
659{
660 int r,hr;
661 int preferred_reg = -1;
662
663 // see if it's already allocated
664 for(hr=0;hr<HOST_REGS;hr++)
665 {
666 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==reg) return;
667 }
668
669 // Try to allocate any available register
670 for(hr=HOST_REGS-1;hr>=0;hr--) {
671 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
672 cur->regmap[hr]=reg;
673 cur->dirty&=~(1<<hr);
674 cur->isconst&=~(1<<hr);
675 return;
676 }
677 }
678
679 // Find an unneeded register
680 for(hr=HOST_REGS-1;hr>=0;hr--)
681 {
682 r=cur->regmap[hr];
683 if(r>=0) {
684 if(r<64) {
685 if((cur->u>>r)&1) {
686 if(i==0||((unneeded_reg[i-1]>>r)&1)) {
687 cur->regmap[hr]=reg;
688 cur->dirty&=~(1<<hr);
689 cur->isconst&=~(1<<hr);
690 return;
691 }
692 }
693 }
694 else
695 {
696 if((cur->uu>>(r&63))&1) {
697 if(i==0||((unneeded_reg_upper[i-1]>>(r&63))&1)) {
698 cur->regmap[hr]=reg;
699 cur->dirty&=~(1<<hr);
700 cur->isconst&=~(1<<hr);
701 return;
702 }
703 }
704 }
705 }
706 }
707
708 // Ok, now we have to evict someone
709 // Pick a register we hopefully won't need soon
710 // TODO: we might want to follow unconditional jumps here
711 // TODO: get rid of dupe code and make this into a function
712 u_char hsn[MAXREG+1];
713 memset(hsn,10,sizeof(hsn));
714 int j;
715 lsn(hsn,i,&preferred_reg);
716 //printf("hsn: %d %d %d %d %d %d %d\n",hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
717 if(i>0) {
718 // Don't evict the cycle count at entry points, otherwise the entry
719 // stub will have to write it.
720 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
721 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
722 for(j=10;j>=3;j--)
723 {
724 for(r=1;r<=MAXREG;r++)
725 {
726 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
727 for(hr=0;hr<HOST_REGS;hr++) {
728 if(hr!=HOST_CCREG||hsn[CCREG]>2) {
729 if(cur->regmap[hr]==r+64) {
730 cur->regmap[hr]=reg;
731 cur->dirty&=~(1<<hr);
732 cur->isconst&=~(1<<hr);
733 return;
734 }
735 }
736 }
737 for(hr=0;hr<HOST_REGS;hr++) {
738 if(hr!=HOST_CCREG||hsn[CCREG]>2) {
739 if(cur->regmap[hr]==r) {
740 cur->regmap[hr]=reg;
741 cur->dirty&=~(1<<hr);
742 cur->isconst&=~(1<<hr);
743 return;
744 }
745 }
746 }
747 }
748 }
749 }
750 }
751 for(j=10;j>=0;j--)
752 {
753 for(r=1;r<=MAXREG;r++)
754 {
755 if(hsn[r]==j) {
756 for(hr=0;hr<HOST_REGS;hr++) {
757 if(cur->regmap[hr]==r+64) {
758 cur->regmap[hr]=reg;
759 cur->dirty&=~(1<<hr);
760 cur->isconst&=~(1<<hr);
761 return;
762 }
763 }
764 for(hr=0;hr<HOST_REGS;hr++) {
765 if(cur->regmap[hr]==r) {
766 cur->regmap[hr]=reg;
767 cur->dirty&=~(1<<hr);
768 cur->isconst&=~(1<<hr);
769 return;
770 }
771 }
772 }
773 }
774 }
775 printf("This shouldn't happen");exit(1);
776}
777// Allocate a specific ARM register.
778void alloc_arm_reg(struct regstat *cur,int i,signed char reg,char hr)
779{
780 int n;
f776eb14 781 int dirty=0;
57871462 782
783 // see if it's already allocated (and dealloc it)
784 for(n=0;n<HOST_REGS;n++)
785 {
f776eb14 786 if(n!=EXCLUDE_REG&&cur->regmap[n]==reg) {
787 dirty=(cur->dirty>>n)&1;
788 cur->regmap[n]=-1;
789 }
57871462 790 }
791
792 cur->regmap[hr]=reg;
793 cur->dirty&=~(1<<hr);
f776eb14 794 cur->dirty|=dirty<<hr;
57871462 795 cur->isconst&=~(1<<hr);
796}
797
798// Alloc cycle count into dedicated register
799alloc_cc(struct regstat *cur,int i)
800{
801 alloc_arm_reg(cur,i,CCREG,HOST_CCREG);
802}
803
804/* Special alloc */
805
806
807/* Assembler */
808
809char regname[16][4] = {
810 "r0",
811 "r1",
812 "r2",
813 "r3",
814 "r4",
815 "r5",
816 "r6",
817 "r7",
818 "r8",
819 "r9",
820 "r10",
821 "fp",
822 "r12",
823 "sp",
824 "lr",
825 "pc"};
826
827void output_byte(u_char byte)
828{
829 *(out++)=byte;
830}
831void output_modrm(u_char mod,u_char rm,u_char ext)
832{
833 assert(mod<4);
834 assert(rm<8);
835 assert(ext<8);
836 u_char byte=(mod<<6)|(ext<<3)|rm;
837 *(out++)=byte;
838}
839void output_sib(u_char scale,u_char index,u_char base)
840{
841 assert(scale<4);
842 assert(index<8);
843 assert(base<8);
844 u_char byte=(scale<<6)|(index<<3)|base;
845 *(out++)=byte;
846}
847void output_w32(u_int word)
848{
849 *((u_int *)out)=word;
850 out+=4;
851}
852u_int rd_rn_rm(u_int rd, u_int rn, u_int rm)
853{
854 assert(rd<16);
855 assert(rn<16);
856 assert(rm<16);
857 return((rn<<16)|(rd<<12)|rm);
858}
859u_int rd_rn_imm_shift(u_int rd, u_int rn, u_int imm, u_int shift)
860{
861 assert(rd<16);
862 assert(rn<16);
863 assert(imm<256);
864 assert((shift&1)==0);
865 return((rn<<16)|(rd<<12)|(((32-shift)&30)<<7)|imm);
866}
867u_int genimm(u_int imm,u_int *encoded)
868{
c2e3bd42 869 *encoded=0;
870 if(imm==0) return 1;
57871462 871 int i=32;
872 while(i>0)
873 {
874 if(imm<256) {
875 *encoded=((i&30)<<7)|imm;
876 return 1;
877 }
878 imm=(imm>>2)|(imm<<30);i-=2;
879 }
880 return 0;
881}
cfbd3c6e 882void genimm_checked(u_int imm,u_int *encoded)
883{
884 u_int ret=genimm(imm,encoded);
885 assert(ret);
886}
57871462 887u_int genjmp(u_int addr)
888{
889 int offset=addr-(int)out-8;
e80343e2 890 if(offset<-33554432||offset>=33554432) {
891 if (addr>2) {
892 printf("genjmp: out of range: %08x\n", offset);
893 exit(1);
894 }
895 return 0;
896 }
57871462 897 return ((u_int)offset>>2)&0xffffff;
898}
899
900void emit_mov(int rs,int rt)
901{
902 assem_debug("mov %s,%s\n",regname[rt],regname[rs]);
903 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs));
904}
905
906void emit_movs(int rs,int rt)
907{
908 assem_debug("movs %s,%s\n",regname[rt],regname[rs]);
909 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs));
910}
911
912void emit_add(int rs1,int rs2,int rt)
913{
914 assem_debug("add %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
915 output_w32(0xe0800000|rd_rn_rm(rt,rs1,rs2));
916}
917
918void emit_adds(int rs1,int rs2,int rt)
919{
920 assem_debug("adds %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
921 output_w32(0xe0900000|rd_rn_rm(rt,rs1,rs2));
922}
923
924void emit_adcs(int rs1,int rs2,int rt)
925{
926 assem_debug("adcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
927 output_w32(0xe0b00000|rd_rn_rm(rt,rs1,rs2));
928}
929
930void emit_sbc(int rs1,int rs2,int rt)
931{
932 assem_debug("sbc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
933 output_w32(0xe0c00000|rd_rn_rm(rt,rs1,rs2));
934}
935
936void emit_sbcs(int rs1,int rs2,int rt)
937{
938 assem_debug("sbcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
939 output_w32(0xe0d00000|rd_rn_rm(rt,rs1,rs2));
940}
941
942void emit_neg(int rs, int rt)
943{
944 assem_debug("rsb %s,%s,#0\n",regname[rt],regname[rs]);
945 output_w32(0xe2600000|rd_rn_rm(rt,rs,0));
946}
947
948void emit_negs(int rs, int rt)
949{
950 assem_debug("rsbs %s,%s,#0\n",regname[rt],regname[rs]);
951 output_w32(0xe2700000|rd_rn_rm(rt,rs,0));
952}
953
954void emit_sub(int rs1,int rs2,int rt)
955{
956 assem_debug("sub %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
957 output_w32(0xe0400000|rd_rn_rm(rt,rs1,rs2));
958}
959
960void emit_subs(int rs1,int rs2,int rt)
961{
962 assem_debug("subs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
963 output_w32(0xe0500000|rd_rn_rm(rt,rs1,rs2));
964}
965
966void emit_zeroreg(int rt)
967{
968 assem_debug("mov %s,#0\n",regname[rt]);
969 output_w32(0xe3a00000|rd_rn_rm(rt,0,0));
970}
971
790ee18e 972void emit_loadlp(u_int imm,u_int rt)
973{
974 add_literal((int)out,imm);
975 assem_debug("ldr %s,pc+? [=%x]\n",regname[rt],imm);
976 output_w32(0xe5900000|rd_rn_rm(rt,15,0));
977}
978void emit_movw(u_int imm,u_int rt)
979{
980 assert(imm<65536);
981 assem_debug("movw %s,#%d (0x%x)\n",regname[rt],imm,imm);
982 output_w32(0xe3000000|rd_rn_rm(rt,0,0)|(imm&0xfff)|((imm<<4)&0xf0000));
983}
984void emit_movt(u_int imm,u_int rt)
985{
986 assem_debug("movt %s,#%d (0x%x)\n",regname[rt],imm&0xffff0000,imm&0xffff0000);
987 output_w32(0xe3400000|rd_rn_rm(rt,0,0)|((imm>>16)&0xfff)|((imm>>12)&0xf0000));
988}
989void emit_movimm(u_int imm,u_int rt)
990{
991 u_int armval;
992 if(genimm(imm,&armval)) {
993 assem_debug("mov %s,#%d\n",regname[rt],imm);
994 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
995 }else if(genimm(~imm,&armval)) {
996 assem_debug("mvn %s,#%d\n",regname[rt],imm);
997 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
998 }else if(imm<65536) {
999 #ifdef ARMv5_ONLY
1000 assem_debug("mov %s,#%d\n",regname[rt],imm&0xFF00);
1001 output_w32(0xe3a00000|rd_rn_imm_shift(rt,0,imm>>8,8));
1002 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
1003 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1004 #else
1005 emit_movw(imm,rt);
1006 #endif
1007 }else{
1008 #ifdef ARMv5_ONLY
1009 emit_loadlp(imm,rt);
1010 #else
1011 emit_movw(imm&0x0000FFFF,rt);
1012 emit_movt(imm&0xFFFF0000,rt);
1013 #endif
1014 }
1015}
1016void emit_pcreladdr(u_int rt)
1017{
1018 assem_debug("add %s,pc,#?\n",regname[rt]);
1019 output_w32(0xe2800000|rd_rn_rm(rt,15,0));
1020}
1021
57871462 1022void emit_loadreg(int r, int hr)
1023{
3d624f89 1024#ifdef FORCE32
1025 if(r&64) {
1026 printf("64bit load in 32bit mode!\n");
7f2607ea 1027 assert(0);
1028 return;
3d624f89 1029 }
1030#endif
57871462 1031 if((r&63)==0)
1032 emit_zeroreg(hr);
1033 else {
3d624f89 1034 int addr=((int)reg)+((r&63)<<REG_SHIFT)+((r&64)>>4);
57871462 1035 if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4);
1036 if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4);
1037 if(r==CCREG) addr=(int)&cycle_count;
1038 if(r==CSREG) addr=(int)&Status;
1039 if(r==FSREG) addr=(int)&FCR31;
1040 if(r==INVCP) addr=(int)&invc_ptr;
1041 u_int offset = addr-(u_int)&dynarec_local;
1042 assert(offset<4096);
1043 assem_debug("ldr %s,fp+%d\n",regname[hr],offset);
1044 output_w32(0xe5900000|rd_rn_rm(hr,FP,0)|offset);
1045 }
1046}
1047void emit_storereg(int r, int hr)
1048{
3d624f89 1049#ifdef FORCE32
1050 if(r&64) {
1051 printf("64bit store in 32bit mode!\n");
7f2607ea 1052 assert(0);
1053 return;
3d624f89 1054 }
1055#endif
1056 int addr=((int)reg)+((r&63)<<REG_SHIFT)+((r&64)>>4);
57871462 1057 if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4);
1058 if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4);
1059 if(r==CCREG) addr=(int)&cycle_count;
1060 if(r==FSREG) addr=(int)&FCR31;
1061 u_int offset = addr-(u_int)&dynarec_local;
1062 assert(offset<4096);
1063 assem_debug("str %s,fp+%d\n",regname[hr],offset);
1064 output_w32(0xe5800000|rd_rn_rm(hr,FP,0)|offset);
1065}
1066
1067void emit_test(int rs, int rt)
1068{
1069 assem_debug("tst %s,%s\n",regname[rs],regname[rt]);
1070 output_w32(0xe1100000|rd_rn_rm(0,rs,rt));
1071}
1072
1073void emit_testimm(int rs,int imm)
1074{
1075 u_int armval;
5a05d80c 1076 assem_debug("tst %s,#%d\n",regname[rs],imm);
cfbd3c6e 1077 genimm_checked(imm,&armval);
57871462 1078 output_w32(0xe3100000|rd_rn_rm(0,rs,0)|armval);
1079}
1080
b9b61529 1081void emit_testeqimm(int rs,int imm)
1082{
1083 u_int armval;
1084 assem_debug("tsteq %s,$%d\n",regname[rs],imm);
cfbd3c6e 1085 genimm_checked(imm,&armval);
b9b61529 1086 output_w32(0x03100000|rd_rn_rm(0,rs,0)|armval);
1087}
1088
57871462 1089void emit_not(int rs,int rt)
1090{
1091 assem_debug("mvn %s,%s\n",regname[rt],regname[rs]);
1092 output_w32(0xe1e00000|rd_rn_rm(rt,0,rs));
1093}
1094
b9b61529 1095void emit_mvnmi(int rs,int rt)
1096{
1097 assem_debug("mvnmi %s,%s\n",regname[rt],regname[rs]);
1098 output_w32(0x41e00000|rd_rn_rm(rt,0,rs));
1099}
1100
57871462 1101void emit_and(u_int rs1,u_int rs2,u_int rt)
1102{
1103 assem_debug("and %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1104 output_w32(0xe0000000|rd_rn_rm(rt,rs1,rs2));
1105}
1106
1107void emit_or(u_int rs1,u_int rs2,u_int rt)
1108{
1109 assem_debug("orr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1110 output_w32(0xe1800000|rd_rn_rm(rt,rs1,rs2));
1111}
1112void emit_or_and_set_flags(int rs1,int rs2,int rt)
1113{
1114 assem_debug("orrs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1115 output_w32(0xe1900000|rd_rn_rm(rt,rs1,rs2));
1116}
1117
f70d384d 1118void emit_orrshl_imm(u_int rs,u_int imm,u_int rt)
1119{
1120 assert(rs<16);
1121 assert(rt<16);
1122 assert(imm<32);
1123 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs],imm);
1124 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|(imm<<7));
1125}
1126
576bbd8f 1127void emit_orrshr_imm(u_int rs,u_int imm,u_int rt)
1128{
1129 assert(rs<16);
1130 assert(rt<16);
1131 assert(imm<32);
1132 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs],imm);
1133 output_w32(0xe1800020|rd_rn_rm(rt,rt,rs)|(imm<<7));
1134}
1135
57871462 1136void emit_xor(u_int rs1,u_int rs2,u_int rt)
1137{
1138 assem_debug("eor %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1139 output_w32(0xe0200000|rd_rn_rm(rt,rs1,rs2));
1140}
1141
57871462 1142void emit_addimm(u_int rs,int imm,u_int rt)
1143{
1144 assert(rs<16);
1145 assert(rt<16);
1146 if(imm!=0) {
57871462 1147 u_int armval;
1148 if(genimm(imm,&armval)) {
1149 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],imm);
1150 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
1151 }else if(genimm(-imm,&armval)) {
1152 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],imm);
1153 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
1154 }else if(imm<0) {
ffb0b9e0 1155 assert(imm>-65536);
57871462 1156 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],(-imm)&0xFF00);
1157 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
1158 output_w32(0xe2400000|rd_rn_imm_shift(rt,rs,(-imm)>>8,8));
1159 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
1160 }else{
ffb0b9e0 1161 assert(imm<65536);
57871462 1162 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1163 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
1164 output_w32(0xe2800000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1165 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1166 }
1167 }
1168 else if(rs!=rt) emit_mov(rs,rt);
1169}
1170
1171void emit_addimm_and_set_flags(int imm,int rt)
1172{
1173 assert(imm>-65536&&imm<65536);
1174 u_int armval;
1175 if(genimm(imm,&armval)) {
1176 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm);
1177 output_w32(0xe2900000|rd_rn_rm(rt,rt,0)|armval);
1178 }else if(genimm(-imm,&armval)) {
1179 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],imm);
1180 output_w32(0xe2500000|rd_rn_rm(rt,rt,0)|armval);
1181 }else if(imm<0) {
1182 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF00);
1183 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
1184 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)>>8,8));
1185 output_w32(0xe2500000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
1186 }else{
1187 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF00);
1188 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
1189 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm>>8,8));
1190 output_w32(0xe2900000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1191 }
1192}
1193void emit_addimm_no_flags(u_int imm,u_int rt)
1194{
1195 emit_addimm(rt,imm,rt);
1196}
1197
1198void emit_addnop(u_int r)
1199{
1200 assert(r<16);
1201 assem_debug("add %s,%s,#0 (nop)\n",regname[r],regname[r]);
1202 output_w32(0xe2800000|rd_rn_rm(r,r,0));
1203}
1204
1205void emit_adcimm(u_int rs,int imm,u_int rt)
1206{
1207 u_int armval;
cfbd3c6e 1208 genimm_checked(imm,&armval);
57871462 1209 assem_debug("adc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1210 output_w32(0xe2a00000|rd_rn_rm(rt,rs,0)|armval);
1211}
1212/*void emit_sbcimm(int imm,u_int rt)
1213{
1214 u_int armval;
cfbd3c6e 1215 genimm_checked(imm,&armval);
57871462 1216 assem_debug("sbc %s,%s,#%d\n",regname[rt],regname[rt],imm);
1217 output_w32(0xe2c00000|rd_rn_rm(rt,rt,0)|armval);
1218}*/
1219void emit_sbbimm(int imm,u_int rt)
1220{
1221 assem_debug("sbb $%d,%%%s\n",imm,regname[rt]);
1222 assert(rt<8);
1223 if(imm<128&&imm>=-128) {
1224 output_byte(0x83);
1225 output_modrm(3,rt,3);
1226 output_byte(imm);
1227 }
1228 else
1229 {
1230 output_byte(0x81);
1231 output_modrm(3,rt,3);
1232 output_w32(imm);
1233 }
1234}
1235void emit_rscimm(int rs,int imm,u_int rt)
1236{
1237 assert(0);
1238 u_int armval;
cfbd3c6e 1239 genimm_checked(imm,&armval);
57871462 1240 assem_debug("rsc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1241 output_w32(0xe2e00000|rd_rn_rm(rt,rs,0)|armval);
1242}
1243
1244void emit_addimm64_32(int rsh,int rsl,int imm,int rth,int rtl)
1245{
1246 // TODO: if(genimm(imm,&armval)) ...
1247 // else
1248 emit_movimm(imm,HOST_TEMPREG);
1249 emit_adds(HOST_TEMPREG,rsl,rtl);
1250 emit_adcimm(rsh,0,rth);
1251}
1252
1253void emit_sbb(int rs1,int rs2)
1254{
1255 assem_debug("sbb %%%s,%%%s\n",regname[rs2],regname[rs1]);
1256 output_byte(0x19);
1257 output_modrm(3,rs1,rs2);
1258}
1259
1260void emit_andimm(int rs,int imm,int rt)
1261{
1262 u_int armval;
790ee18e 1263 if(imm==0) {
1264 emit_zeroreg(rt);
1265 }else if(genimm(imm,&armval)) {
57871462 1266 assem_debug("and %s,%s,#%d\n",regname[rt],regname[rs],imm);
1267 output_w32(0xe2000000|rd_rn_rm(rt,rs,0)|armval);
1268 }else if(genimm(~imm,&armval)) {
1269 assem_debug("bic %s,%s,#%d\n",regname[rt],regname[rs],imm);
1270 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|armval);
1271 }else if(imm==65535) {
1272 #ifdef ARMv5_ONLY
1273 assem_debug("bic %s,%s,#FF000000\n",regname[rt],regname[rs]);
1274 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|0x4FF);
1275 assem_debug("bic %s,%s,#00FF0000\n",regname[rt],regname[rt]);
1276 output_w32(0xe3c00000|rd_rn_rm(rt,rt,0)|0x8FF);
1277 #else
1278 assem_debug("uxth %s,%s\n",regname[rt],regname[rs]);
1279 output_w32(0xe6ff0070|rd_rn_rm(rt,0,rs));
1280 #endif
1281 }else{
1282 assert(imm>0&&imm<65535);
1283 #ifdef ARMv5_ONLY
1284 assem_debug("mov r14,#%d\n",imm&0xFF00);
1285 output_w32(0xe3a00000|rd_rn_imm_shift(HOST_TEMPREG,0,imm>>8,8));
1286 assem_debug("add r14,r14,#%d\n",imm&0xFF);
1287 output_w32(0xe2800000|rd_rn_imm_shift(HOST_TEMPREG,HOST_TEMPREG,imm&0xff,0));
1288 #else
1289 emit_movw(imm,HOST_TEMPREG);
1290 #endif
1291 assem_debug("and %s,%s,r14\n",regname[rt],regname[rs]);
1292 output_w32(0xe0000000|rd_rn_rm(rt,rs,HOST_TEMPREG));
1293 }
1294}
1295
1296void emit_orimm(int rs,int imm,int rt)
1297{
1298 u_int armval;
790ee18e 1299 if(imm==0) {
1300 if(rs!=rt) emit_mov(rs,rt);
1301 }else if(genimm(imm,&armval)) {
57871462 1302 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1303 output_w32(0xe3800000|rd_rn_rm(rt,rs,0)|armval);
1304 }else{
1305 assert(imm>0&&imm<65536);
1306 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1307 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
1308 output_w32(0xe3800000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1309 output_w32(0xe3800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1310 }
1311}
1312
1313void emit_xorimm(int rs,int imm,int rt)
1314{
57871462 1315 u_int armval;
790ee18e 1316 if(imm==0) {
1317 if(rs!=rt) emit_mov(rs,rt);
1318 }else if(genimm(imm,&armval)) {
57871462 1319 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm);
1320 output_w32(0xe2200000|rd_rn_rm(rt,rs,0)|armval);
1321 }else{
514ed0d9 1322 assert(imm>0&&imm<65536);
57871462 1323 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1324 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
1325 output_w32(0xe2200000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1326 output_w32(0xe2200000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1327 }
1328}
1329
1330void emit_shlimm(int rs,u_int imm,int rt)
1331{
1332 assert(imm>0);
1333 assert(imm<32);
1334 //if(imm==1) ...
1335 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1336 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1337}
1338
c6c3b1b3 1339void emit_lsls_imm(int rs,int imm,int rt)
1340{
1341 assert(imm>0);
1342 assert(imm<32);
1343 assem_debug("lsls %s,%s,#%d\n",regname[rt],regname[rs],imm);
1344 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1345}
1346
57871462 1347void emit_shrimm(int rs,u_int imm,int rt)
1348{
1349 assert(imm>0);
1350 assert(imm<32);
1351 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1352 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
1353}
1354
1355void emit_sarimm(int rs,u_int imm,int rt)
1356{
1357 assert(imm>0);
1358 assert(imm<32);
1359 assem_debug("asr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1360 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x40|(imm<<7));
1361}
1362
1363void emit_rorimm(int rs,u_int imm,int rt)
1364{
1365 assert(imm>0);
1366 assert(imm<32);
1367 assem_debug("ror %s,%s,#%d\n",regname[rt],regname[rs],imm);
1368 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x60|(imm<<7));
1369}
1370
1371void emit_shldimm(int rs,int rs2,u_int imm,int rt)
1372{
1373 assem_debug("shld %%%s,%%%s,%d\n",regname[rt],regname[rs2],imm);
1374 assert(imm>0);
1375 assert(imm<32);
1376 //if(imm==1) ...
1377 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1378 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1379 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs2],32-imm);
1380 output_w32(0xe1800020|rd_rn_rm(rt,rt,rs2)|((32-imm)<<7));
1381}
1382
1383void emit_shrdimm(int rs,int rs2,u_int imm,int rt)
1384{
1385 assem_debug("shrd %%%s,%%%s,%d\n",regname[rt],regname[rs2],imm);
1386 assert(imm>0);
1387 assert(imm<32);
1388 //if(imm==1) ...
1389 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1390 output_w32(0xe1a00020|rd_rn_rm(rt,0,rs)|(imm<<7));
1391 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs2],32-imm);
1392 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs2)|((32-imm)<<7));
1393}
1394
b9b61529 1395void emit_signextend16(int rs,int rt)
1396{
1397 #ifdef ARMv5_ONLY
1398 emit_shlimm(rs,16,rt);
1399 emit_sarimm(rt,16,rt);
1400 #else
1401 assem_debug("sxth %s,%s\n",regname[rt],regname[rs]);
1402 output_w32(0xe6bf0070|rd_rn_rm(rt,0,rs));
1403 #endif
1404}
1405
c6c3b1b3 1406void emit_signextend8(int rs,int rt)
1407{
1408 #ifdef ARMv5_ONLY
1409 emit_shlimm(rs,24,rt);
1410 emit_sarimm(rt,24,rt);
1411 #else
1412 assem_debug("sxtb %s,%s\n",regname[rt],regname[rs]);
1413 output_w32(0xe6af0070|rd_rn_rm(rt,0,rs));
1414 #endif
1415}
1416
57871462 1417void emit_shl(u_int rs,u_int shift,u_int rt)
1418{
1419 assert(rs<16);
1420 assert(rt<16);
1421 assert(shift<16);
1422 //if(imm==1) ...
1423 assem_debug("lsl %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1424 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x10|(shift<<8));
1425}
1426void emit_shr(u_int rs,u_int shift,u_int rt)
1427{
1428 assert(rs<16);
1429 assert(rt<16);
1430 assert(shift<16);
1431 assem_debug("lsr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1432 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x30|(shift<<8));
1433}
1434void emit_sar(u_int rs,u_int shift,u_int rt)
1435{
1436 assert(rs<16);
1437 assert(rt<16);
1438 assert(shift<16);
1439 assem_debug("asr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1440 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x50|(shift<<8));
1441}
1442void emit_shlcl(int r)
1443{
1444 assem_debug("shl %%%s,%%cl\n",regname[r]);
1445 assert(0);
1446}
1447void emit_shrcl(int r)
1448{
1449 assem_debug("shr %%%s,%%cl\n",regname[r]);
1450 assert(0);
1451}
1452void emit_sarcl(int r)
1453{
1454 assem_debug("sar %%%s,%%cl\n",regname[r]);
1455 assert(0);
1456}
1457
1458void emit_shldcl(int r1,int r2)
1459{
1460 assem_debug("shld %%%s,%%%s,%%cl\n",regname[r1],regname[r2]);
1461 assert(0);
1462}
1463void emit_shrdcl(int r1,int r2)
1464{
1465 assem_debug("shrd %%%s,%%%s,%%cl\n",regname[r1],regname[r2]);
1466 assert(0);
1467}
1468void emit_orrshl(u_int rs,u_int shift,u_int rt)
1469{
1470 assert(rs<16);
1471 assert(rt<16);
1472 assert(shift<16);
1473 assem_debug("orr %s,%s,%s,lsl %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
1474 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x10|(shift<<8));
1475}
1476void emit_orrshr(u_int rs,u_int shift,u_int rt)
1477{
1478 assert(rs<16);
1479 assert(rt<16);
1480 assert(shift<16);
1481 assem_debug("orr %s,%s,%s,lsr %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
1482 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x30|(shift<<8));
1483}
1484
1485void emit_cmpimm(int rs,int imm)
1486{
1487 u_int armval;
1488 if(genimm(imm,&armval)) {
5a05d80c 1489 assem_debug("cmp %s,#%d\n",regname[rs],imm);
57871462 1490 output_w32(0xe3500000|rd_rn_rm(0,rs,0)|armval);
1491 }else if(genimm(-imm,&armval)) {
5a05d80c 1492 assem_debug("cmn %s,#%d\n",regname[rs],imm);
57871462 1493 output_w32(0xe3700000|rd_rn_rm(0,rs,0)|armval);
1494 }else if(imm>0) {
1495 assert(imm<65536);
1496 #ifdef ARMv5_ONLY
1497 emit_movimm(imm,HOST_TEMPREG);
1498 #else
1499 emit_movw(imm,HOST_TEMPREG);
1500 #endif
1501 assem_debug("cmp %s,r14\n",regname[rs]);
1502 output_w32(0xe1500000|rd_rn_rm(0,rs,HOST_TEMPREG));
1503 }else{
1504 assert(imm>-65536);
1505 #ifdef ARMv5_ONLY
1506 emit_movimm(-imm,HOST_TEMPREG);
1507 #else
1508 emit_movw(-imm,HOST_TEMPREG);
1509 #endif
1510 assem_debug("cmn %s,r14\n",regname[rs]);
1511 output_w32(0xe1700000|rd_rn_rm(0,rs,HOST_TEMPREG));
1512 }
1513}
1514
1515void emit_cmovne(u_int *addr,int rt)
1516{
1517 assem_debug("cmovne %x,%%%s",(int)addr,regname[rt]);
1518 assert(0);
1519}
1520void emit_cmovl(u_int *addr,int rt)
1521{
1522 assem_debug("cmovl %x,%%%s",(int)addr,regname[rt]);
1523 assert(0);
1524}
1525void emit_cmovs(u_int *addr,int rt)
1526{
1527 assem_debug("cmovs %x,%%%s",(int)addr,regname[rt]);
1528 assert(0);
1529}
1530void emit_cmovne_imm(int imm,int rt)
1531{
1532 assem_debug("movne %s,#%d\n",regname[rt],imm);
1533 u_int armval;
cfbd3c6e 1534 genimm_checked(imm,&armval);
57871462 1535 output_w32(0x13a00000|rd_rn_rm(rt,0,0)|armval);
1536}
1537void emit_cmovl_imm(int imm,int rt)
1538{
1539 assem_debug("movlt %s,#%d\n",regname[rt],imm);
1540 u_int armval;
cfbd3c6e 1541 genimm_checked(imm,&armval);
57871462 1542 output_w32(0xb3a00000|rd_rn_rm(rt,0,0)|armval);
1543}
1544void emit_cmovb_imm(int imm,int rt)
1545{
1546 assem_debug("movcc %s,#%d\n",regname[rt],imm);
1547 u_int armval;
cfbd3c6e 1548 genimm_checked(imm,&armval);
57871462 1549 output_w32(0x33a00000|rd_rn_rm(rt,0,0)|armval);
1550}
1551void emit_cmovs_imm(int imm,int rt)
1552{
1553 assem_debug("movmi %s,#%d\n",regname[rt],imm);
1554 u_int armval;
cfbd3c6e 1555 genimm_checked(imm,&armval);
57871462 1556 output_w32(0x43a00000|rd_rn_rm(rt,0,0)|armval);
1557}
1558void emit_cmove_reg(int rs,int rt)
1559{
1560 assem_debug("moveq %s,%s\n",regname[rt],regname[rs]);
1561 output_w32(0x01a00000|rd_rn_rm(rt,0,rs));
1562}
1563void emit_cmovne_reg(int rs,int rt)
1564{
1565 assem_debug("movne %s,%s\n",regname[rt],regname[rs]);
1566 output_w32(0x11a00000|rd_rn_rm(rt,0,rs));
1567}
1568void emit_cmovl_reg(int rs,int rt)
1569{
1570 assem_debug("movlt %s,%s\n",regname[rt],regname[rs]);
1571 output_w32(0xb1a00000|rd_rn_rm(rt,0,rs));
1572}
1573void emit_cmovs_reg(int rs,int rt)
1574{
1575 assem_debug("movmi %s,%s\n",regname[rt],regname[rs]);
1576 output_w32(0x41a00000|rd_rn_rm(rt,0,rs));
1577}
1578
1579void emit_slti32(int rs,int imm,int rt)
1580{
1581 if(rs!=rt) emit_zeroreg(rt);
1582 emit_cmpimm(rs,imm);
1583 if(rs==rt) emit_movimm(0,rt);
1584 emit_cmovl_imm(1,rt);
1585}
1586void emit_sltiu32(int rs,int imm,int rt)
1587{
1588 if(rs!=rt) emit_zeroreg(rt);
1589 emit_cmpimm(rs,imm);
1590 if(rs==rt) emit_movimm(0,rt);
1591 emit_cmovb_imm(1,rt);
1592}
1593void emit_slti64_32(int rsh,int rsl,int imm,int rt)
1594{
1595 assert(rsh!=rt);
1596 emit_slti32(rsl,imm,rt);
1597 if(imm>=0)
1598 {
1599 emit_test(rsh,rsh);
1600 emit_cmovne_imm(0,rt);
1601 emit_cmovs_imm(1,rt);
1602 }
1603 else
1604 {
1605 emit_cmpimm(rsh,-1);
1606 emit_cmovne_imm(0,rt);
1607 emit_cmovl_imm(1,rt);
1608 }
1609}
1610void emit_sltiu64_32(int rsh,int rsl,int imm,int rt)
1611{
1612 assert(rsh!=rt);
1613 emit_sltiu32(rsl,imm,rt);
1614 if(imm>=0)
1615 {
1616 emit_test(rsh,rsh);
1617 emit_cmovne_imm(0,rt);
1618 }
1619 else
1620 {
1621 emit_cmpimm(rsh,-1);
1622 emit_cmovne_imm(1,rt);
1623 }
1624}
1625
1626void emit_cmp(int rs,int rt)
1627{
1628 assem_debug("cmp %s,%s\n",regname[rs],regname[rt]);
1629 output_w32(0xe1500000|rd_rn_rm(0,rs,rt));
1630}
1631void emit_set_gz32(int rs, int rt)
1632{
1633 //assem_debug("set_gz32\n");
1634 emit_cmpimm(rs,1);
1635 emit_movimm(1,rt);
1636 emit_cmovl_imm(0,rt);
1637}
1638void emit_set_nz32(int rs, int rt)
1639{
1640 //assem_debug("set_nz32\n");
1641 if(rs!=rt) emit_movs(rs,rt);
1642 else emit_test(rs,rs);
1643 emit_cmovne_imm(1,rt);
1644}
1645void emit_set_gz64_32(int rsh, int rsl, int rt)
1646{
1647 //assem_debug("set_gz64\n");
1648 emit_set_gz32(rsl,rt);
1649 emit_test(rsh,rsh);
1650 emit_cmovne_imm(1,rt);
1651 emit_cmovs_imm(0,rt);
1652}
1653void emit_set_nz64_32(int rsh, int rsl, int rt)
1654{
1655 //assem_debug("set_nz64\n");
1656 emit_or_and_set_flags(rsh,rsl,rt);
1657 emit_cmovne_imm(1,rt);
1658}
1659void emit_set_if_less32(int rs1, int rs2, int rt)
1660{
1661 //assem_debug("set if less (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1662 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1663 emit_cmp(rs1,rs2);
1664 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1665 emit_cmovl_imm(1,rt);
1666}
1667void emit_set_if_carry32(int rs1, int rs2, int rt)
1668{
1669 //assem_debug("set if carry (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1670 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1671 emit_cmp(rs1,rs2);
1672 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1673 emit_cmovb_imm(1,rt);
1674}
1675void emit_set_if_less64_32(int u1, int l1, int u2, int l2, int rt)
1676{
1677 //assem_debug("set if less64 (%%%s,%%%s,%%%s,%%%s),%%%s\n",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]);
1678 assert(u1!=rt);
1679 assert(u2!=rt);
1680 emit_cmp(l1,l2);
1681 emit_movimm(0,rt);
1682 emit_sbcs(u1,u2,HOST_TEMPREG);
1683 emit_cmovl_imm(1,rt);
1684}
1685void emit_set_if_carry64_32(int u1, int l1, int u2, int l2, int rt)
1686{
1687 //assem_debug("set if carry64 (%%%s,%%%s,%%%s,%%%s),%%%s\n",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]);
1688 assert(u1!=rt);
1689 assert(u2!=rt);
1690 emit_cmp(l1,l2);
1691 emit_movimm(0,rt);
1692 emit_sbcs(u1,u2,HOST_TEMPREG);
1693 emit_cmovb_imm(1,rt);
1694}
1695
1696void emit_call(int a)
1697{
1698 assem_debug("bl %x (%x+%x)\n",a,(int)out,a-(int)out-8);
1699 u_int offset=genjmp(a);
1700 output_w32(0xeb000000|offset);
1701}
1702void emit_jmp(int a)
1703{
1704 assem_debug("b %x (%x+%x)\n",a,(int)out,a-(int)out-8);
1705 u_int offset=genjmp(a);
1706 output_w32(0xea000000|offset);
1707}
1708void emit_jne(int a)
1709{
1710 assem_debug("bne %x\n",a);
1711 u_int offset=genjmp(a);
1712 output_w32(0x1a000000|offset);
1713}
1714void emit_jeq(int a)
1715{
1716 assem_debug("beq %x\n",a);
1717 u_int offset=genjmp(a);
1718 output_w32(0x0a000000|offset);
1719}
1720void emit_js(int a)
1721{
1722 assem_debug("bmi %x\n",a);
1723 u_int offset=genjmp(a);
1724 output_w32(0x4a000000|offset);
1725}
1726void emit_jns(int a)
1727{
1728 assem_debug("bpl %x\n",a);
1729 u_int offset=genjmp(a);
1730 output_w32(0x5a000000|offset);
1731}
1732void emit_jl(int a)
1733{
1734 assem_debug("blt %x\n",a);
1735 u_int offset=genjmp(a);
1736 output_w32(0xba000000|offset);
1737}
1738void emit_jge(int a)
1739{
1740 assem_debug("bge %x\n",a);
1741 u_int offset=genjmp(a);
1742 output_w32(0xaa000000|offset);
1743}
1744void emit_jno(int a)
1745{
1746 assem_debug("bvc %x\n",a);
1747 u_int offset=genjmp(a);
1748 output_w32(0x7a000000|offset);
1749}
1750void emit_jc(int a)
1751{
1752 assem_debug("bcs %x\n",a);
1753 u_int offset=genjmp(a);
1754 output_w32(0x2a000000|offset);
1755}
1756void emit_jcc(int a)
1757{
1758 assem_debug("bcc %x\n",a);
1759 u_int offset=genjmp(a);
1760 output_w32(0x3a000000|offset);
1761}
1762
1763void emit_pushimm(int imm)
1764{
1765 assem_debug("push $%x\n",imm);
1766 assert(0);
1767}
1768void emit_pusha()
1769{
1770 assem_debug("pusha\n");
1771 assert(0);
1772}
1773void emit_popa()
1774{
1775 assem_debug("popa\n");
1776 assert(0);
1777}
1778void emit_pushreg(u_int r)
1779{
1780 assem_debug("push %%%s\n",regname[r]);
1781 assert(0);
1782}
1783void emit_popreg(u_int r)
1784{
1785 assem_debug("pop %%%s\n",regname[r]);
1786 assert(0);
1787}
1788void emit_callreg(u_int r)
1789{
c6c3b1b3 1790 assert(r<15);
1791 assem_debug("blx %s\n",regname[r]);
1792 output_w32(0xe12fff30|r);
57871462 1793}
1794void emit_jmpreg(u_int r)
1795{
1796 assem_debug("mov pc,%s\n",regname[r]);
1797 output_w32(0xe1a00000|rd_rn_rm(15,0,r));
1798}
1799
1800void emit_readword_indexed(int offset, int rs, int rt)
1801{
1802 assert(offset>-4096&&offset<4096);
1803 assem_debug("ldr %s,%s+%d\n",regname[rt],regname[rs],offset);
1804 if(offset>=0) {
1805 output_w32(0xe5900000|rd_rn_rm(rt,rs,0)|offset);
1806 }else{
1807 output_w32(0xe5100000|rd_rn_rm(rt,rs,0)|(-offset));
1808 }
1809}
1810void emit_readword_dualindexedx4(int rs1, int rs2, int rt)
1811{
1812 assem_debug("ldr %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1813 output_w32(0xe7900000|rd_rn_rm(rt,rs1,rs2)|0x100);
1814}
c6c3b1b3 1815void emit_ldrcc_dualindexed(int rs1, int rs2, int rt)
1816{
1817 assem_debug("ldrcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1818 output_w32(0x37900000|rd_rn_rm(rt,rs1,rs2));
1819}
1820void emit_ldrccb_dualindexed(int rs1, int rs2, int rt)
1821{
1822 assem_debug("ldrccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1823 output_w32(0x37d00000|rd_rn_rm(rt,rs1,rs2));
1824}
1825void emit_ldrccsb_dualindexed(int rs1, int rs2, int rt)
1826{
1827 assem_debug("ldrccsb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1828 output_w32(0x319000d0|rd_rn_rm(rt,rs1,rs2));
1829}
1830void emit_ldrcch_dualindexed(int rs1, int rs2, int rt)
1831{
1832 assem_debug("ldrcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1833 output_w32(0x319000b0|rd_rn_rm(rt,rs1,rs2));
1834}
1835void emit_ldrccsh_dualindexed(int rs1, int rs2, int rt)
1836{
1837 assem_debug("ldrccsh %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1838 output_w32(0x319000f0|rd_rn_rm(rt,rs1,rs2));
1839}
57871462 1840void emit_readword_indexed_tlb(int addr, int rs, int map, int rt)
1841{
1842 if(map<0) emit_readword_indexed(addr, rs, rt);
1843 else {
1844 assert(addr==0);
1845 emit_readword_dualindexedx4(rs, map, rt);
1846 }
1847}
1848void emit_readdword_indexed_tlb(int addr, int rs, int map, int rh, int rl)
1849{
1850 if(map<0) {
1851 if(rh>=0) emit_readword_indexed(addr, rs, rh);
1852 emit_readword_indexed(addr+4, rs, rl);
1853 }else{
1854 assert(rh!=rs);
1855 if(rh>=0) emit_readword_indexed_tlb(addr, rs, map, rh);
1856 emit_addimm(map,1,map);
1857 emit_readword_indexed_tlb(addr, rs, map, rl);
1858 }
1859}
1860void emit_movsbl_indexed(int offset, int rs, int rt)
1861{
1862 assert(offset>-256&&offset<256);
1863 assem_debug("ldrsb %s,%s+%d\n",regname[rt],regname[rs],offset);
1864 if(offset>=0) {
1865 output_w32(0xe1d000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1866 }else{
1867 output_w32(0xe15000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1868 }
1869}
1870void emit_movsbl_indexed_tlb(int addr, int rs, int map, int rt)
1871{
1872 if(map<0) emit_movsbl_indexed(addr, rs, rt);
1873 else {
1874 if(addr==0) {
1875 emit_shlimm(map,2,map);
1876 assem_debug("ldrsb %s,%s+%s\n",regname[rt],regname[rs],regname[map]);
1877 output_w32(0xe19000d0|rd_rn_rm(rt,rs,map));
1878 }else{
1879 assert(addr>-256&&addr<256);
1880 assem_debug("add %s,%s,%s,lsl #2\n",regname[rt],regname[rs],regname[map]);
1881 output_w32(0xe0800000|rd_rn_rm(rt,rs,map)|(2<<7));
1882 emit_movsbl_indexed(addr, rt, rt);
1883 }
1884 }
1885}
1886void emit_movswl_indexed(int offset, int rs, int rt)
1887{
1888 assert(offset>-256&&offset<256);
1889 assem_debug("ldrsh %s,%s+%d\n",regname[rt],regname[rs],offset);
1890 if(offset>=0) {
1891 output_w32(0xe1d000f0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1892 }else{
1893 output_w32(0xe15000f0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1894 }
1895}
1896void emit_movzbl_indexed(int offset, int rs, int rt)
1897{
1898 assert(offset>-4096&&offset<4096);
1899 assem_debug("ldrb %s,%s+%d\n",regname[rt],regname[rs],offset);
1900 if(offset>=0) {
1901 output_w32(0xe5d00000|rd_rn_rm(rt,rs,0)|offset);
1902 }else{
1903 output_w32(0xe5500000|rd_rn_rm(rt,rs,0)|(-offset));
1904 }
1905}
1906void emit_movzbl_dualindexedx4(int rs1, int rs2, int rt)
1907{
1908 assem_debug("ldrb %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1909 output_w32(0xe7d00000|rd_rn_rm(rt,rs1,rs2)|0x100);
1910}
1911void emit_movzbl_indexed_tlb(int addr, int rs, int map, int rt)
1912{
1913 if(map<0) emit_movzbl_indexed(addr, rs, rt);
1914 else {
1915 if(addr==0) {
1916 emit_movzbl_dualindexedx4(rs, map, rt);
1917 }else{
1918 emit_addimm(rs,addr,rt);
1919 emit_movzbl_dualindexedx4(rt, map, rt);
1920 }
1921 }
1922}
1923void emit_movzwl_indexed(int offset, int rs, int rt)
1924{
1925 assert(offset>-256&&offset<256);
1926 assem_debug("ldrh %s,%s+%d\n",regname[rt],regname[rs],offset);
1927 if(offset>=0) {
1928 output_w32(0xe1d000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1929 }else{
1930 output_w32(0xe15000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1931 }
1932}
054175e9 1933static void emit_ldrd(int offset, int rs, int rt)
1934{
1935 assert(offset>-256&&offset<256);
1936 assem_debug("ldrd %s,%s+%d\n",regname[rt],regname[rs],offset);
1937 if(offset>=0) {
1938 output_w32(0xe1c000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1939 }else{
1940 output_w32(0xe14000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1941 }
1942}
57871462 1943void emit_readword(int addr, int rt)
1944{
1945 u_int offset = addr-(u_int)&dynarec_local;
1946 assert(offset<4096);
1947 assem_debug("ldr %s,fp+%d\n",regname[rt],offset);
1948 output_w32(0xe5900000|rd_rn_rm(rt,FP,0)|offset);
1949}
1950void emit_movsbl(int addr, int rt)
1951{
1952 u_int offset = addr-(u_int)&dynarec_local;
1953 assert(offset<256);
1954 assem_debug("ldrsb %s,fp+%d\n",regname[rt],offset);
1955 output_w32(0xe1d000d0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1956}
1957void emit_movswl(int addr, int rt)
1958{
1959 u_int offset = addr-(u_int)&dynarec_local;
1960 assert(offset<256);
1961 assem_debug("ldrsh %s,fp+%d\n",regname[rt],offset);
1962 output_w32(0xe1d000f0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1963}
1964void emit_movzbl(int addr, int rt)
1965{
1966 u_int offset = addr-(u_int)&dynarec_local;
1967 assert(offset<4096);
1968 assem_debug("ldrb %s,fp+%d\n",regname[rt],offset);
1969 output_w32(0xe5d00000|rd_rn_rm(rt,FP,0)|offset);
1970}
1971void emit_movzwl(int addr, int rt)
1972{
1973 u_int offset = addr-(u_int)&dynarec_local;
1974 assert(offset<256);
1975 assem_debug("ldrh %s,fp+%d\n",regname[rt],offset);
1976 output_w32(0xe1d000b0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1977}
1978void emit_movzwl_reg(int rs, int rt)
1979{
1980 assem_debug("movzwl %%%s,%%%s\n",regname[rs]+1,regname[rt]);
1981 assert(0);
1982}
1983
1984void emit_xchg(int rs, int rt)
1985{
1986 assem_debug("xchg %%%s,%%%s\n",regname[rs],regname[rt]);
1987 assert(0);
1988}
1989void emit_writeword_indexed(int rt, int offset, int rs)
1990{
1991 assert(offset>-4096&&offset<4096);
1992 assem_debug("str %s,%s+%d\n",regname[rt],regname[rs],offset);
1993 if(offset>=0) {
1994 output_w32(0xe5800000|rd_rn_rm(rt,rs,0)|offset);
1995 }else{
1996 output_w32(0xe5000000|rd_rn_rm(rt,rs,0)|(-offset));
1997 }
1998}
1999void emit_writeword_dualindexedx4(int rt, int rs1, int rs2)
2000{
2001 assem_debug("str %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
2002 output_w32(0xe7800000|rd_rn_rm(rt,rs1,rs2)|0x100);
2003}
2004void emit_writeword_indexed_tlb(int rt, int addr, int rs, int map, int temp)
2005{
2006 if(map<0) emit_writeword_indexed(rt, addr, rs);
2007 else {
2008 assert(addr==0);
2009 emit_writeword_dualindexedx4(rt, rs, map);
2010 }
2011}
2012void emit_writedword_indexed_tlb(int rh, int rl, int addr, int rs, int map, int temp)
2013{
2014 if(map<0) {
2015 if(rh>=0) emit_writeword_indexed(rh, addr, rs);
2016 emit_writeword_indexed(rl, addr+4, rs);
2017 }else{
2018 assert(rh>=0);
2019 if(temp!=rs) emit_addimm(map,1,temp);
2020 emit_writeword_indexed_tlb(rh, addr, rs, map, temp);
2021 if(temp!=rs) emit_writeword_indexed_tlb(rl, addr, rs, temp, temp);
2022 else {
2023 emit_addimm(rs,4,rs);
2024 emit_writeword_indexed_tlb(rl, addr, rs, map, temp);
2025 }
2026 }
2027}
2028void emit_writehword_indexed(int rt, int offset, int rs)
2029{
2030 assert(offset>-256&&offset<256);
2031 assem_debug("strh %s,%s+%d\n",regname[rt],regname[rs],offset);
2032 if(offset>=0) {
2033 output_w32(0xe1c000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
2034 }else{
2035 output_w32(0xe14000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
2036 }
2037}
2038void emit_writebyte_indexed(int rt, int offset, int rs)
2039{
2040 assert(offset>-4096&&offset<4096);
2041 assem_debug("strb %s,%s+%d\n",regname[rt],regname[rs],offset);
2042 if(offset>=0) {
2043 output_w32(0xe5c00000|rd_rn_rm(rt,rs,0)|offset);
2044 }else{
2045 output_w32(0xe5400000|rd_rn_rm(rt,rs,0)|(-offset));
2046 }
2047}
2048void emit_writebyte_dualindexedx4(int rt, int rs1, int rs2)
2049{
2050 assem_debug("strb %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
2051 output_w32(0xe7c00000|rd_rn_rm(rt,rs1,rs2)|0x100);
2052}
2053void emit_writebyte_indexed_tlb(int rt, int addr, int rs, int map, int temp)
2054{
2055 if(map<0) emit_writebyte_indexed(rt, addr, rs);
2056 else {
2057 if(addr==0) {
2058 emit_writebyte_dualindexedx4(rt, rs, map);
2059 }else{
2060 emit_addimm(rs,addr,temp);
2061 emit_writebyte_dualindexedx4(rt, temp, map);
2062 }
2063 }
2064}
b96d3df7 2065void emit_strcc_dualindexed(int rs1, int rs2, int rt)
2066{
2067 assem_debug("strcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2068 output_w32(0x37800000|rd_rn_rm(rt,rs1,rs2));
2069}
2070void emit_strccb_dualindexed(int rs1, int rs2, int rt)
2071{
2072 assem_debug("strccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2073 output_w32(0x37c00000|rd_rn_rm(rt,rs1,rs2));
2074}
2075void emit_strcch_dualindexed(int rs1, int rs2, int rt)
2076{
2077 assem_debug("strcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2078 output_w32(0x318000b0|rd_rn_rm(rt,rs1,rs2));
2079}
57871462 2080void emit_writeword(int rt, int addr)
2081{
2082 u_int offset = addr-(u_int)&dynarec_local;
2083 assert(offset<4096);
2084 assem_debug("str %s,fp+%d\n",regname[rt],offset);
2085 output_w32(0xe5800000|rd_rn_rm(rt,FP,0)|offset);
2086}
2087void emit_writehword(int rt, int addr)
2088{
2089 u_int offset = addr-(u_int)&dynarec_local;
2090 assert(offset<256);
2091 assem_debug("strh %s,fp+%d\n",regname[rt],offset);
2092 output_w32(0xe1c000b0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
2093}
2094void emit_writebyte(int rt, int addr)
2095{
2096 u_int offset = addr-(u_int)&dynarec_local;
2097 assert(offset<4096);
74426039 2098 assem_debug("strb %s,fp+%d\n",regname[rt],offset);
57871462 2099 output_w32(0xe5c00000|rd_rn_rm(rt,FP,0)|offset);
2100}
2101void emit_writeword_imm(int imm, int addr)
2102{
2103 assem_debug("movl $%x,%x\n",imm,addr);
2104 assert(0);
2105}
2106void emit_writebyte_imm(int imm, int addr)
2107{
2108 assem_debug("movb $%x,%x\n",imm,addr);
2109 assert(0);
2110}
2111
2112void emit_mul(int rs)
2113{
2114 assem_debug("mul %%%s\n",regname[rs]);
2115 assert(0);
2116}
2117void emit_imul(int rs)
2118{
2119 assem_debug("imul %%%s\n",regname[rs]);
2120 assert(0);
2121}
2122void emit_umull(u_int rs1, u_int rs2, u_int hi, u_int lo)
2123{
2124 assem_debug("umull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
2125 assert(rs1<16);
2126 assert(rs2<16);
2127 assert(hi<16);
2128 assert(lo<16);
2129 output_w32(0xe0800090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
2130}
2131void emit_smull(u_int rs1, u_int rs2, u_int hi, u_int lo)
2132{
2133 assem_debug("smull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
2134 assert(rs1<16);
2135 assert(rs2<16);
2136 assert(hi<16);
2137 assert(lo<16);
2138 output_w32(0xe0c00090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
2139}
2140
2141void emit_div(int rs)
2142{
2143 assem_debug("div %%%s\n",regname[rs]);
2144 assert(0);
2145}
2146void emit_idiv(int rs)
2147{
2148 assem_debug("idiv %%%s\n",regname[rs]);
2149 assert(0);
2150}
2151void emit_cdq()
2152{
2153 assem_debug("cdq\n");
2154 assert(0);
2155}
2156
2157void emit_clz(int rs,int rt)
2158{
2159 assem_debug("clz %s,%s\n",regname[rt],regname[rs]);
2160 output_w32(0xe16f0f10|rd_rn_rm(rt,0,rs));
2161}
2162
2163void emit_subcs(int rs1,int rs2,int rt)
2164{
2165 assem_debug("subcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2166 output_w32(0x20400000|rd_rn_rm(rt,rs1,rs2));
2167}
2168
2169void emit_shrcc_imm(int rs,u_int imm,int rt)
2170{
2171 assert(imm>0);
2172 assert(imm<32);
2173 assem_debug("lsrcc %s,%s,#%d\n",regname[rt],regname[rs],imm);
2174 output_w32(0x31a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
2175}
2176
b1be1eee 2177void emit_shrne_imm(int rs,u_int imm,int rt)
2178{
2179 assert(imm>0);
2180 assert(imm<32);
2181 assem_debug("lsrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2182 output_w32(0x11a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
2183}
2184
57871462 2185void emit_negmi(int rs, int rt)
2186{
2187 assem_debug("rsbmi %s,%s,#0\n",regname[rt],regname[rs]);
2188 output_w32(0x42600000|rd_rn_rm(rt,rs,0));
2189}
2190
2191void emit_negsmi(int rs, int rt)
2192{
2193 assem_debug("rsbsmi %s,%s,#0\n",regname[rt],regname[rs]);
2194 output_w32(0x42700000|rd_rn_rm(rt,rs,0));
2195}
2196
2197void emit_orreq(u_int rs1,u_int rs2,u_int rt)
2198{
2199 assem_debug("orreq %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2200 output_w32(0x01800000|rd_rn_rm(rt,rs1,rs2));
2201}
2202
2203void emit_orrne(u_int rs1,u_int rs2,u_int rt)
2204{
2205 assem_debug("orrne %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2206 output_w32(0x11800000|rd_rn_rm(rt,rs1,rs2));
2207}
2208
2209void emit_bic_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2210{
2211 assem_debug("bic %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2212 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2213}
2214
2215void emit_biceq_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2216{
2217 assem_debug("biceq %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2218 output_w32(0x01C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2219}
2220
2221void emit_bicne_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2222{
2223 assem_debug("bicne %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2224 output_w32(0x11C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2225}
2226
2227void emit_bic_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2228{
2229 assem_debug("bic %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2230 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2231}
2232
2233void emit_biceq_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2234{
2235 assem_debug("biceq %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2236 output_w32(0x01C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2237}
2238
2239void emit_bicne_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2240{
2241 assem_debug("bicne %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2242 output_w32(0x11C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2243}
2244
2245void emit_teq(int rs, int rt)
2246{
2247 assem_debug("teq %s,%s\n",regname[rs],regname[rt]);
2248 output_w32(0xe1300000|rd_rn_rm(0,rs,rt));
2249}
2250
2251void emit_rsbimm(int rs, int imm, int rt)
2252{
2253 u_int armval;
cfbd3c6e 2254 genimm_checked(imm,&armval);
57871462 2255 assem_debug("rsb %s,%s,#%d\n",regname[rt],regname[rs],imm);
2256 output_w32(0xe2600000|rd_rn_rm(rt,rs,0)|armval);
2257}
2258
2259// Load 2 immediates optimizing for small code size
2260void emit_mov2imm_compact(int imm1,u_int rt1,int imm2,u_int rt2)
2261{
2262 emit_movimm(imm1,rt1);
2263 u_int armval;
2264 if(genimm(imm2-imm1,&armval)) {
2265 assem_debug("add %s,%s,#%d\n",regname[rt2],regname[rt1],imm2-imm1);
2266 output_w32(0xe2800000|rd_rn_rm(rt2,rt1,0)|armval);
2267 }else if(genimm(imm1-imm2,&armval)) {
2268 assem_debug("sub %s,%s,#%d\n",regname[rt2],regname[rt1],imm1-imm2);
2269 output_w32(0xe2400000|rd_rn_rm(rt2,rt1,0)|armval);
2270 }
2271 else emit_movimm(imm2,rt2);
2272}
2273
2274// Conditionally select one of two immediates, optimizing for small code size
2275// This will only be called if HAVE_CMOV_IMM is defined
2276void emit_cmov2imm_e_ne_compact(int imm1,int imm2,u_int rt)
2277{
2278 u_int armval;
2279 if(genimm(imm2-imm1,&armval)) {
2280 emit_movimm(imm1,rt);
2281 assem_debug("addne %s,%s,#%d\n",regname[rt],regname[rt],imm2-imm1);
2282 output_w32(0x12800000|rd_rn_rm(rt,rt,0)|armval);
2283 }else if(genimm(imm1-imm2,&armval)) {
2284 emit_movimm(imm1,rt);
2285 assem_debug("subne %s,%s,#%d\n",regname[rt],regname[rt],imm1-imm2);
2286 output_w32(0x12400000|rd_rn_rm(rt,rt,0)|armval);
2287 }
2288 else {
2289 #ifdef ARMv5_ONLY
2290 emit_movimm(imm1,rt);
2291 add_literal((int)out,imm2);
2292 assem_debug("ldrne %s,pc+? [=%x]\n",regname[rt],imm2);
2293 output_w32(0x15900000|rd_rn_rm(rt,15,0));
2294 #else
2295 emit_movw(imm1&0x0000FFFF,rt);
2296 if((imm1&0xFFFF)!=(imm2&0xFFFF)) {
2297 assem_debug("movwne %s,#%d (0x%x)\n",regname[rt],imm2&0xFFFF,imm2&0xFFFF);
2298 output_w32(0x13000000|rd_rn_rm(rt,0,0)|(imm2&0xfff)|((imm2<<4)&0xf0000));
2299 }
2300 emit_movt(imm1&0xFFFF0000,rt);
2301 if((imm1&0xFFFF0000)!=(imm2&0xFFFF0000)) {
2302 assem_debug("movtne %s,#%d (0x%x)\n",regname[rt],imm2&0xffff0000,imm2&0xffff0000);
2303 output_w32(0x13400000|rd_rn_rm(rt,0,0)|((imm2>>16)&0xfff)|((imm2>>12)&0xf0000));
2304 }
2305 #endif
2306 }
2307}
2308
2309// special case for checking invalid_code
2310void emit_cmpmem_indexedsr12_imm(int addr,int r,int imm)
2311{
2312 assert(0);
2313}
2314
2315// special case for checking invalid_code
2316void emit_cmpmem_indexedsr12_reg(int base,int r,int imm)
2317{
2318 assert(imm<128&&imm>=0);
2319 assert(r>=0&&r<16);
2320 assem_debug("ldrb lr,%s,%s lsr #12\n",regname[base],regname[r]);
2321 output_w32(0xe7d00000|rd_rn_rm(HOST_TEMPREG,base,r)|0x620);
2322 emit_cmpimm(HOST_TEMPREG,imm);
2323}
2324
2325// special case for tlb mapping
2326void emit_addsr12(int rs1,int rs2,int rt)
2327{
2328 assem_debug("add %s,%s,%s lsr #12\n",regname[rt],regname[rs1],regname[rs2]);
2329 output_w32(0xe0800620|rd_rn_rm(rt,rs1,rs2));
2330}
2331
0bbd1454 2332void emit_callne(int a)
2333{
2334 assem_debug("blne %x\n",a);
2335 u_int offset=genjmp(a);
2336 output_w32(0x1b000000|offset);
2337}
2338
57871462 2339// Used to preload hash table entries
2340void emit_prefetch(void *addr)
2341{
2342 assem_debug("prefetch %x\n",(int)addr);
2343 output_byte(0x0F);
2344 output_byte(0x18);
2345 output_modrm(0,5,1);
2346 output_w32((int)addr);
2347}
2348void emit_prefetchreg(int r)
2349{
2350 assem_debug("pld %s\n",regname[r]);
2351 output_w32(0xf5d0f000|rd_rn_rm(0,r,0));
2352}
2353
2354// Special case for mini_ht
2355void emit_ldreq_indexed(int rs, u_int offset, int rt)
2356{
2357 assert(offset<4096);
2358 assem_debug("ldreq %s,[%s, #%d]\n",regname[rt],regname[rs],offset);
2359 output_w32(0x05900000|rd_rn_rm(rt,rs,0)|offset);
2360}
2361
2362void emit_flds(int r,int sr)
2363{
2364 assem_debug("flds s%d,[%s]\n",sr,regname[r]);
2365 output_w32(0xed900a00|((sr&14)<<11)|((sr&1)<<22)|(r<<16));
2366}
2367
2368void emit_vldr(int r,int vr)
2369{
2370 assem_debug("vldr d%d,[%s]\n",vr,regname[r]);
2371 output_w32(0xed900b00|(vr<<12)|(r<<16));
2372}
2373
2374void emit_fsts(int sr,int r)
2375{
2376 assem_debug("fsts s%d,[%s]\n",sr,regname[r]);
2377 output_w32(0xed800a00|((sr&14)<<11)|((sr&1)<<22)|(r<<16));
2378}
2379
2380void emit_vstr(int vr,int r)
2381{
2382 assem_debug("vstr d%d,[%s]\n",vr,regname[r]);
2383 output_w32(0xed800b00|(vr<<12)|(r<<16));
2384}
2385
2386void emit_ftosizs(int s,int d)
2387{
2388 assem_debug("ftosizs s%d,s%d\n",d,s);
2389 output_w32(0xeebd0ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2390}
2391
2392void emit_ftosizd(int s,int d)
2393{
2394 assem_debug("ftosizd s%d,d%d\n",d,s);
2395 output_w32(0xeebd0bc0|((d&14)<<11)|((d&1)<<22)|(s&7));
2396}
2397
2398void emit_fsitos(int s,int d)
2399{
2400 assem_debug("fsitos s%d,s%d\n",d,s);
2401 output_w32(0xeeb80ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2402}
2403
2404void emit_fsitod(int s,int d)
2405{
2406 assem_debug("fsitod d%d,s%d\n",d,s);
2407 output_w32(0xeeb80bc0|((d&7)<<12)|((s&14)>>1)|((s&1)<<5));
2408}
2409
2410void emit_fcvtds(int s,int d)
2411{
2412 assem_debug("fcvtds d%d,s%d\n",d,s);
2413 output_w32(0xeeb70ac0|((d&7)<<12)|((s&14)>>1)|((s&1)<<5));
2414}
2415
2416void emit_fcvtsd(int s,int d)
2417{
2418 assem_debug("fcvtsd s%d,d%d\n",d,s);
2419 output_w32(0xeeb70bc0|((d&14)<<11)|((d&1)<<22)|(s&7));
2420}
2421
2422void emit_fsqrts(int s,int d)
2423{
2424 assem_debug("fsqrts d%d,s%d\n",d,s);
2425 output_w32(0xeeb10ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2426}
2427
2428void emit_fsqrtd(int s,int d)
2429{
2430 assem_debug("fsqrtd s%d,d%d\n",d,s);
2431 output_w32(0xeeb10bc0|((d&7)<<12)|(s&7));
2432}
2433
2434void emit_fabss(int s,int d)
2435{
2436 assem_debug("fabss d%d,s%d\n",d,s);
2437 output_w32(0xeeb00ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2438}
2439
2440void emit_fabsd(int s,int d)
2441{
2442 assem_debug("fabsd s%d,d%d\n",d,s);
2443 output_w32(0xeeb00bc0|((d&7)<<12)|(s&7));
2444}
2445
2446void emit_fnegs(int s,int d)
2447{
2448 assem_debug("fnegs d%d,s%d\n",d,s);
2449 output_w32(0xeeb10a40|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2450}
2451
2452void emit_fnegd(int s,int d)
2453{
2454 assem_debug("fnegd s%d,d%d\n",d,s);
2455 output_w32(0xeeb10b40|((d&7)<<12)|(s&7));
2456}
2457
2458void emit_fadds(int s1,int s2,int d)
2459{
2460 assem_debug("fadds s%d,s%d,s%d\n",d,s1,s2);
2461 output_w32(0xee300a00|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2462}
2463
2464void emit_faddd(int s1,int s2,int d)
2465{
2466 assem_debug("faddd d%d,d%d,d%d\n",d,s1,s2);
2467 output_w32(0xee300b00|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2468}
2469
2470void emit_fsubs(int s1,int s2,int d)
2471{
2472 assem_debug("fsubs s%d,s%d,s%d\n",d,s1,s2);
2473 output_w32(0xee300a40|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2474}
2475
2476void emit_fsubd(int s1,int s2,int d)
2477{
2478 assem_debug("fsubd d%d,d%d,d%d\n",d,s1,s2);
2479 output_w32(0xee300b40|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2480}
2481
2482void emit_fmuls(int s1,int s2,int d)
2483{
2484 assem_debug("fmuls s%d,s%d,s%d\n",d,s1,s2);
2485 output_w32(0xee200a00|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2486}
2487
2488void emit_fmuld(int s1,int s2,int d)
2489{
2490 assem_debug("fmuld d%d,d%d,d%d\n",d,s1,s2);
2491 output_w32(0xee200b00|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2492}
2493
2494void emit_fdivs(int s1,int s2,int d)
2495{
2496 assem_debug("fdivs s%d,s%d,s%d\n",d,s1,s2);
2497 output_w32(0xee800a00|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2498}
2499
2500void emit_fdivd(int s1,int s2,int d)
2501{
2502 assem_debug("fdivd d%d,d%d,d%d\n",d,s1,s2);
2503 output_w32(0xee800b00|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2504}
2505
2506void emit_fcmps(int x,int y)
2507{
2508 assem_debug("fcmps s14, s15\n");
2509 output_w32(0xeeb47a67);
2510}
2511
2512void emit_fcmpd(int x,int y)
2513{
2514 assem_debug("fcmpd d6, d7\n");
2515 output_w32(0xeeb46b47);
2516}
2517
2518void emit_fmstat()
2519{
2520 assem_debug("fmstat\n");
2521 output_w32(0xeef1fa10);
2522}
2523
2524void emit_bicne_imm(int rs,int imm,int rt)
2525{
2526 u_int armval;
cfbd3c6e 2527 genimm_checked(imm,&armval);
57871462 2528 assem_debug("bicne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2529 output_w32(0x13c00000|rd_rn_rm(rt,rs,0)|armval);
2530}
2531
2532void emit_biccs_imm(int rs,int imm,int rt)
2533{
2534 u_int armval;
cfbd3c6e 2535 genimm_checked(imm,&armval);
57871462 2536 assem_debug("biccs %s,%s,#%d\n",regname[rt],regname[rs],imm);
2537 output_w32(0x23c00000|rd_rn_rm(rt,rs,0)|armval);
2538}
2539
2540void emit_bicvc_imm(int rs,int imm,int rt)
2541{
2542 u_int armval;
cfbd3c6e 2543 genimm_checked(imm,&armval);
57871462 2544 assem_debug("bicvc %s,%s,#%d\n",regname[rt],regname[rs],imm);
2545 output_w32(0x73c00000|rd_rn_rm(rt,rs,0)|armval);
2546}
2547
2548void emit_bichi_imm(int rs,int imm,int rt)
2549{
2550 u_int armval;
cfbd3c6e 2551 genimm_checked(imm,&armval);
57871462 2552 assem_debug("bichi %s,%s,#%d\n",regname[rt],regname[rs],imm);
2553 output_w32(0x83c00000|rd_rn_rm(rt,rs,0)|armval);
2554}
2555
2556void emit_orrvs_imm(int rs,int imm,int rt)
2557{
2558 u_int armval;
cfbd3c6e 2559 genimm_checked(imm,&armval);
57871462 2560 assem_debug("orrvs %s,%s,#%d\n",regname[rt],regname[rs],imm);
2561 output_w32(0x63800000|rd_rn_rm(rt,rs,0)|armval);
2562}
2563
b9b61529 2564void emit_orrne_imm(int rs,int imm,int rt)
2565{
2566 u_int armval;
cfbd3c6e 2567 genimm_checked(imm,&armval);
b9b61529 2568 assem_debug("orrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2569 output_w32(0x13800000|rd_rn_rm(rt,rs,0)|armval);
2570}
2571
2572void emit_andne_imm(int rs,int imm,int rt)
2573{
2574 u_int armval;
cfbd3c6e 2575 genimm_checked(imm,&armval);
b9b61529 2576 assem_debug("andne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2577 output_w32(0x12000000|rd_rn_rm(rt,rs,0)|armval);
2578}
2579
57871462 2580void emit_jno_unlikely(int a)
2581{
2582 //emit_jno(a);
2583 assem_debug("addvc pc,pc,#? (%x)\n",/*a-(int)out-8,*/a);
2584 output_w32(0x72800000|rd_rn_rm(15,15,0));
2585}
2586
054175e9 2587static void save_regs_all(u_int reglist)
57871462 2588{
054175e9 2589 int i;
57871462 2590 if(!reglist) return;
2591 assem_debug("stmia fp,{");
054175e9 2592 for(i=0;i<16;i++)
2593 if(reglist&(1<<i))
2594 assem_debug("r%d,",i);
57871462 2595 assem_debug("}\n");
2596 output_w32(0xe88b0000|reglist);
2597}
054175e9 2598static void restore_regs_all(u_int reglist)
57871462 2599{
054175e9 2600 int i;
57871462 2601 if(!reglist) return;
2602 assem_debug("ldmia fp,{");
054175e9 2603 for(i=0;i<16;i++)
2604 if(reglist&(1<<i))
2605 assem_debug("r%d,",i);
57871462 2606 assem_debug("}\n");
2607 output_w32(0xe89b0000|reglist);
2608}
054175e9 2609// Save registers before function call
2610static void save_regs(u_int reglist)
2611{
2612 reglist&=0x100f; // only save the caller-save registers, r0-r3, r12
2613 save_regs_all(reglist);
2614}
2615// Restore registers after function call
2616static void restore_regs(u_int reglist)
2617{
2618 reglist&=0x100f; // only restore the caller-save registers, r0-r3, r12
2619 restore_regs_all(reglist);
2620}
57871462 2621
2622// Write back consts using r14 so we don't disturb the other registers
2623void wb_consts(signed char i_regmap[],uint64_t i_is32,u_int i_dirty,int i)
2624{
2625 int hr;
2626 for(hr=0;hr<HOST_REGS;hr++) {
2627 if(hr!=EXCLUDE_REG&&i_regmap[hr]>=0&&((i_dirty>>hr)&1)) {
2628 if(((regs[i].isconst>>hr)&1)&&i_regmap[hr]>0) {
2629 if(i_regmap[hr]<64 || !((i_is32>>(i_regmap[hr]&63))&1) ) {
2630 int value=constmap[i][hr];
2631 if(value==0) {
2632 emit_zeroreg(HOST_TEMPREG);
2633 }
2634 else {
2635 emit_movimm(value,HOST_TEMPREG);
2636 }
2637 emit_storereg(i_regmap[hr],HOST_TEMPREG);
24385cae 2638#ifndef FORCE32
57871462 2639 if((i_is32>>i_regmap[hr])&1) {
2640 if(value!=-1&&value!=0) emit_sarimm(HOST_TEMPREG,31,HOST_TEMPREG);
2641 emit_storereg(i_regmap[hr]|64,HOST_TEMPREG);
2642 }
24385cae 2643#endif
57871462 2644 }
2645 }
2646 }
2647 }
2648}
2649
2650/* Stubs/epilogue */
2651
2652void literal_pool(int n)
2653{
2654 if(!literalcount) return;
2655 if(n) {
2656 if((int)out-literals[0][0]<4096-n) return;
2657 }
2658 u_int *ptr;
2659 int i;
2660 for(i=0;i<literalcount;i++)
2661 {
77750690 2662 u_int l_addr=(u_int)out;
2663 int j;
2664 for(j=0;j<i;j++) {
2665 if(literals[j][1]==literals[i][1]) {
2666 //printf("dup %08x\n",literals[i][1]);
2667 l_addr=literals[j][0];
2668 break;
2669 }
2670 }
57871462 2671 ptr=(u_int *)literals[i][0];
77750690 2672 u_int offset=l_addr-(u_int)ptr-8;
57871462 2673 assert(offset<4096);
2674 assert(!(offset&3));
2675 *ptr|=offset;
77750690 2676 if(l_addr==(u_int)out) {
2677 literals[i][0]=l_addr; // remember for dupes
2678 output_w32(literals[i][1]);
2679 }
57871462 2680 }
2681 literalcount=0;
2682}
2683
2684void literal_pool_jumpover(int n)
2685{
2686 if(!literalcount) return;
2687 if(n) {
2688 if((int)out-literals[0][0]<4096-n) return;
2689 }
2690 int jaddr=(int)out;
2691 emit_jmp(0);
2692 literal_pool(0);
2693 set_jump_target(jaddr,(int)out);
2694}
2695
2696emit_extjump2(int addr, int target, int linker)
2697{
2698 u_char *ptr=(u_char *)addr;
2699 assert((ptr[3]&0x0e)==0xa);
2700 emit_loadlp(target,0);
2701 emit_loadlp(addr,1);
24385cae 2702 assert(addr>=BASE_ADDR&&addr<(BASE_ADDR+(1<<TARGET_SIZE_2)));
57871462 2703 //assert((target>=0x80000000&&target<0x80800000)||(target>0xA4000000&&target<0xA4001000));
2704//DEBUG >
2705#ifdef DEBUG_CYCLE_COUNT
2706 emit_readword((int)&last_count,ECX);
2707 emit_add(HOST_CCREG,ECX,HOST_CCREG);
2708 emit_readword((int)&next_interupt,ECX);
2709 emit_writeword(HOST_CCREG,(int)&Count);
2710 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
2711 emit_writeword(ECX,(int)&last_count);
2712#endif
2713//DEBUG <
2714 emit_jmp(linker);
2715}
2716
2717emit_extjump(int addr, int target)
2718{
2719 emit_extjump2(addr, target, (int)dyna_linker);
2720}
2721emit_extjump_ds(int addr, int target)
2722{
2723 emit_extjump2(addr, target, (int)dyna_linker_ds);
2724}
2725
13e35c04 2726// put rt_val into rt, potentially making use of rs with value rs_val
2727static void emit_movimm_from(u_int rs_val,int rs,u_int rt_val,int rt)
2728{
2729 u_int xor=rs_val^rt_val;
2730 u_int xs;
2731 for(xs=xor;xs!=0&&(xs&3)==0;xs>>=2)
2732 ;
2733 if(xs<0x100)
2734 emit_xorimm(rs,xor,rt);
2735 else
2736 emit_movimm(rt_val,rt);
2737}
cbbab9cd 2738
b96d3df7 2739// trashes r2
2740static void pass_args(int a0, int a1)
2741{
2742 if(a0==1&&a1==0) {
2743 // must swap
2744 emit_mov(a0,2); emit_mov(a1,1); emit_mov(2,0);
2745 }
2746 else if(a0!=0&&a1==0) {
2747 emit_mov(a1,1);
2748 if (a0>=0) emit_mov(a0,0);
2749 }
2750 else {
2751 if(a0>=0&&a0!=0) emit_mov(a0,0);
2752 if(a1>=0&&a1!=1) emit_mov(a1,1);
2753 }
2754}
2755
b1be1eee 2756static void mov_loadtype_adj(int type,int rs,int rt)
2757{
2758 switch(type) {
2759 case LOADB_STUB: emit_signextend8(rs,rt); break;
2760 case LOADBU_STUB: emit_andimm(rs,0xff,rt); break;
2761 case LOADH_STUB: emit_signextend16(rs,rt); break;
2762 case LOADHU_STUB: emit_andimm(rs,0xffff,rt); break;
2763 case LOADW_STUB: if(rs!=rt) emit_mov(rs,rt); break;
2764 default: assert(0);
2765 }
2766}
2767
2768#ifdef PCSX
2769#include "pcsxmem.h"
2770#include "pcsxmem_inline.c"
2771#endif
2772
57871462 2773do_readstub(int n)
2774{
2775 assem_debug("do_readstub %x\n",start+stubs[n][3]*4);
2776 literal_pool(256);
2777 set_jump_target(stubs[n][1],(int)out);
2778 int type=stubs[n][0];
2779 int i=stubs[n][3];
2780 int rs=stubs[n][4];
2781 struct regstat *i_regs=(struct regstat *)stubs[n][5];
2782 u_int reglist=stubs[n][7];
2783 signed char *i_regmap=i_regs->regmap;
2784 int addr=get_reg(i_regmap,AGEN1+(i&1));
2785 int rth,rt;
2786 int ds;
b9b61529 2787 if(itype[i]==C1LS||itype[i]==C2LS||itype[i]==LOADLR) {
57871462 2788 rth=get_reg(i_regmap,FTEMP|64);
2789 rt=get_reg(i_regmap,FTEMP);
2790 }else{
2791 rth=get_reg(i_regmap,rt1[i]|64);
2792 rt=get_reg(i_regmap,rt1[i]);
2793 }
2794 assert(rs>=0);
c6c3b1b3 2795#ifdef PCSX
2796 int r,temp=-1,temp2=HOST_TEMPREG,regs_saved=0,restore_jump=0;
2797 reglist|=(1<<rs);
2798 for(r=0;r<=12;r++) {
2799 if(((1<<r)&0x13ff)&&((1<<r)&reglist)==0) {
2800 temp=r; break;
2801 }
2802 }
2803 if(rt>=0)
2804 reglist&=~(1<<rt);
2805 if(temp==-1) {
2806 save_regs(reglist);
2807 regs_saved=1;
2808 temp=(rs==0)?2:0;
2809 }
2810 if((regs_saved||(reglist&2)==0)&&temp!=1&&rs!=1)
2811 temp2=1;
2812 emit_readword((int)&mem_rtab,temp);
2813 emit_shrimm(rs,12,temp2);
2814 emit_readword_dualindexedx4(temp,temp2,temp2);
2815 emit_lsls_imm(temp2,1,temp2);
2816 if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) {
2817 switch(type) {
2818 case LOADB_STUB: emit_ldrccsb_dualindexed(temp2,rs,rt); break;
2819 case LOADBU_STUB: emit_ldrccb_dualindexed(temp2,rs,rt); break;
2820 case LOADH_STUB: emit_ldrccsh_dualindexed(temp2,rs,rt); break;
2821 case LOADHU_STUB: emit_ldrcch_dualindexed(temp2,rs,rt); break;
2822 case LOADW_STUB: emit_ldrcc_dualindexed(temp2,rs,rt); break;
2823 }
2824 }
2825 if(regs_saved) {
2826 restore_jump=(int)out;
2827 emit_jcc(0); // jump to reg restore
2828 }
2829 else
2830 emit_jcc(stubs[n][2]); // return address
2831
2832 if(!regs_saved)
2833 save_regs(reglist);
2834 int handler=0;
2835 if(type==LOADB_STUB||type==LOADBU_STUB)
2836 handler=(int)jump_handler_read8;
2837 if(type==LOADH_STUB||type==LOADHU_STUB)
2838 handler=(int)jump_handler_read16;
2839 if(type==LOADW_STUB)
2840 handler=(int)jump_handler_read32;
2841 assert(handler!=0);
b96d3df7 2842 pass_args(rs,temp2);
c6c3b1b3 2843 int cc=get_reg(i_regmap,CCREG);
2844 if(cc<0)
2845 emit_loadreg(CCREG,2);
2573466a 2846 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n][6]+1),2);
c6c3b1b3 2847 emit_call(handler);
2848 if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) {
b1be1eee 2849 mov_loadtype_adj(type,0,rt);
c6c3b1b3 2850 }
2851 if(restore_jump)
2852 set_jump_target(restore_jump,(int)out);
2853 restore_regs(reglist);
2854 emit_jmp(stubs[n][2]); // return address
2855#else // !PCSX
57871462 2856 if(addr<0) addr=rt;
535d208a 2857 if(addr<0&&itype[i]!=C1LS&&itype[i]!=C2LS&&itype[i]!=LOADLR) addr=get_reg(i_regmap,-1);
57871462 2858 assert(addr>=0);
2859 int ftable=0;
2860 if(type==LOADB_STUB||type==LOADBU_STUB)
2861 ftable=(int)readmemb;
2862 if(type==LOADH_STUB||type==LOADHU_STUB)
2863 ftable=(int)readmemh;
2864 if(type==LOADW_STUB)
2865 ftable=(int)readmem;
24385cae 2866#ifndef FORCE32
57871462 2867 if(type==LOADD_STUB)
2868 ftable=(int)readmemd;
24385cae 2869#endif
2870 assert(ftable!=0);
57871462 2871 emit_writeword(rs,(int)&address);
2872 //emit_pusha();
2873 save_regs(reglist);
97a238a6 2874#ifndef PCSX
57871462 2875 ds=i_regs!=&regs[i];
2876 int real_rs=(itype[i]==LOADLR)?-1:get_reg(i_regmap,rs1[i]);
2877 u_int cmask=ds?-1:(0x100f|~i_regs->wasconst);
2878 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&0x100f,i);
2879 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty&cmask&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs)));
2880 if(!ds) wb_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&~0x100f,i);
97a238a6 2881#endif
57871462 2882 emit_shrimm(rs,16,1);
2883 int cc=get_reg(i_regmap,CCREG);
2884 if(cc<0) {
2885 emit_loadreg(CCREG,2);
2886 }
2887 emit_movimm(ftable,0);
2888 emit_addimm(cc<0?2:cc,2*stubs[n][6]+2,2);
f51dc36c 2889#ifndef PCSX
57871462 2890 emit_movimm(start+stubs[n][3]*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
f51dc36c 2891#endif
57871462 2892 //emit_readword((int)&last_count,temp);
2893 //emit_add(cc,temp,cc);
2894 //emit_writeword(cc,(int)&Count);
2895 //emit_mov(15,14);
2896 emit_call((int)&indirect_jump_indexed);
2897 //emit_callreg(rs);
2898 //emit_readword_dualindexedx4(rs,HOST_TEMPREG,15);
f51dc36c 2899#ifndef PCSX
57871462 2900 // We really shouldn't need to update the count here,
2901 // but not doing so causes random crashes...
2902 emit_readword((int)&Count,HOST_TEMPREG);
2903 emit_readword((int)&next_interupt,2);
2904 emit_addimm(HOST_TEMPREG,-2*stubs[n][6]-2,HOST_TEMPREG);
2905 emit_writeword(2,(int)&last_count);
2906 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2907 if(cc<0) {
2908 emit_storereg(CCREG,HOST_TEMPREG);
2909 }
f51dc36c 2910#endif
57871462 2911 //emit_popa();
2912 restore_regs(reglist);
2913 //if((cc=get_reg(regmap,CCREG))>=0) {
2914 // emit_loadreg(CCREG,cc);
2915 //}
f18c0f46 2916 if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) {
2917 assert(rt>=0);
2918 if(type==LOADB_STUB)
2919 emit_movsbl((int)&readmem_dword,rt);
2920 if(type==LOADBU_STUB)
2921 emit_movzbl((int)&readmem_dword,rt);
2922 if(type==LOADH_STUB)
2923 emit_movswl((int)&readmem_dword,rt);
2924 if(type==LOADHU_STUB)
2925 emit_movzwl((int)&readmem_dword,rt);
2926 if(type==LOADW_STUB)
2927 emit_readword((int)&readmem_dword,rt);
2928 if(type==LOADD_STUB) {
2929 emit_readword((int)&readmem_dword,rt);
2930 if(rth>=0) emit_readword(((int)&readmem_dword)+4,rth);
2931 }
57871462 2932 }
2933 emit_jmp(stubs[n][2]); // return address
c6c3b1b3 2934#endif // !PCSX
57871462 2935}
2936
c6c3b1b3 2937#ifdef PCSX
2938// return memhandler, or get directly accessable address and return 0
2939u_int get_direct_memhandler(void *table,u_int addr,int type,u_int *addr_host)
2940{
2941 u_int l1,l2=0;
2942 l1=((u_int *)table)[addr>>12];
2943 if((l1&(1<<31))==0) {
2944 u_int v=l1<<1;
2945 *addr_host=v+addr;
2946 return 0;
2947 }
2948 else {
2949 l1<<=1;
2950 if(type==LOADB_STUB||type==LOADBU_STUB||type==STOREB_STUB)
2951 l2=((u_int *)l1)[0x1000/4 + 0x1000/2 + (addr&0xfff)];
b96d3df7 2952 else if(type==LOADH_STUB||type==LOADHU_STUB||type==STOREH_STUB)
c6c3b1b3 2953 l2=((u_int *)l1)[0x1000/4 + (addr&0xfff)/2];
2954 else
2955 l2=((u_int *)l1)[(addr&0xfff)/4];
2956 if((l2&(1<<31))==0) {
2957 u_int v=l2<<1;
2958 *addr_host=v+(addr&0xfff);
2959 return 0;
2960 }
2961 return l2<<1;
2962 }
2963}
2964#endif
2965
57871462 2966inline_readstub(int type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
2967{
2968 int rs=get_reg(regmap,target);
2969 int rth=get_reg(regmap,target|64);
2970 int rt=get_reg(regmap,target);
535d208a 2971 if(rs<0) rs=get_reg(regmap,-1);
57871462 2972 assert(rs>=0);
c6c3b1b3 2973#ifdef PCSX
b1be1eee 2974 u_int handler,host_addr=0,is_dynamic,far_call=0;
2975 int cc=get_reg(regmap,CCREG);
2976 if(pcsx_direct_read(type,addr,CLOCK_ADJUST(adj+1),cc,target?rs:-1,rt))
2977 return;
c6c3b1b3 2978 handler=get_direct_memhandler(mem_rtab,addr,type,&host_addr);
2979 if (handler==0) {
2980 if(rt<0)
2981 return;
13e35c04 2982 if(addr!=host_addr)
2983 emit_movimm_from(addr,rs,host_addr,rs);
c6c3b1b3 2984 switch(type) {
2985 case LOADB_STUB: emit_movsbl_indexed(0,rs,rt); break;
2986 case LOADBU_STUB: emit_movzbl_indexed(0,rs,rt); break;
2987 case LOADH_STUB: emit_movswl_indexed(0,rs,rt); break;
2988 case LOADHU_STUB: emit_movzwl_indexed(0,rs,rt); break;
2989 case LOADW_STUB: emit_readword_indexed(0,rs,rt); break;
2990 default: assert(0);
2991 }
2992 return;
2993 }
b1be1eee 2994 is_dynamic=pcsxmem_is_handler_dynamic(addr);
2995 if(is_dynamic) {
2996 if(type==LOADB_STUB||type==LOADBU_STUB)
2997 handler=(int)jump_handler_read8;
2998 if(type==LOADH_STUB||type==LOADHU_STUB)
2999 handler=(int)jump_handler_read16;
3000 if(type==LOADW_STUB)
3001 handler=(int)jump_handler_read32;
3002 }
c6c3b1b3 3003
3004 // call a memhandler
3005 if(rt>=0)
3006 reglist&=~(1<<rt);
3007 save_regs(reglist);
3008 if(target==0)
3009 emit_movimm(addr,0);
3010 else if(rs!=0)
3011 emit_mov(rs,0);
c6c3b1b3 3012 int offset=(int)handler-(int)out-8;
3013 if(offset<-33554432||offset>=33554432) {
3014 // unreachable memhandler, a plugin func perhaps
b1be1eee 3015 emit_movimm(handler,12);
3016 far_call=1;
3017 }
3018 if(cc<0)
3019 emit_loadreg(CCREG,2);
3020 if(is_dynamic) {
3021 emit_movimm(((u_int *)mem_rtab)[addr>>12]<<1,1);
3022 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2);
c6c3b1b3 3023 }
b1be1eee 3024 else {
3025 emit_readword((int)&last_count,3);
3026 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2);
3027 emit_add(2,3,2);
3028 emit_writeword(2,(int)&Count);
3029 }
3030
3031 if(far_call)
3032 emit_callreg(12);
c6c3b1b3 3033 else
3034 emit_call(handler);
b1be1eee 3035
c6c3b1b3 3036 if(rt>=0) {
3037 switch(type) {
3038 case LOADB_STUB: emit_signextend8(0,rt); break;
3039 case LOADBU_STUB: emit_andimm(0,0xff,rt); break;
3040 case LOADH_STUB: emit_signextend16(0,rt); break;
3041 case LOADHU_STUB: emit_andimm(0,0xffff,rt); break;
3042 case LOADW_STUB: if(rt!=0) emit_mov(0,rt); break;
3043 default: assert(0);
3044 }
3045 }
3046 restore_regs(reglist);
3047#else // if !PCSX
57871462 3048 int ftable=0;
3049 if(type==LOADB_STUB||type==LOADBU_STUB)
3050 ftable=(int)readmemb;
3051 if(type==LOADH_STUB||type==LOADHU_STUB)
3052 ftable=(int)readmemh;
3053 if(type==LOADW_STUB)
3054 ftable=(int)readmem;
24385cae 3055#ifndef FORCE32
57871462 3056 if(type==LOADD_STUB)
3057 ftable=(int)readmemd;
24385cae 3058#endif
3059 assert(ftable!=0);
fd99c415 3060 if(target==0)
3061 emit_movimm(addr,rs);
57871462 3062 emit_writeword(rs,(int)&address);
3063 //emit_pusha();
3064 save_regs(reglist);
0c1fe38b 3065#ifndef PCSX
3066 if((signed int)addr>=(signed int)0xC0000000) {
3067 // Theoretically we can have a pagefault here, if the TLB has never
3068 // been enabled and the address is outside the range 80000000..BFFFFFFF
3069 // Write out the registers so the pagefault can be handled. This is
3070 // a very rare case and likely represents a bug.
3071 int ds=regmap!=regs[i].regmap;
3072 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty,i);
3073 if(!ds) wb_dirtys(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty);
3074 else wb_dirtys(branch_regs[i-1].regmap_entry,branch_regs[i-1].was32,branch_regs[i-1].wasdirty);
3075 }
3076#endif
57871462 3077 //emit_shrimm(rs,16,1);
3078 int cc=get_reg(regmap,CCREG);
3079 if(cc<0) {
3080 emit_loadreg(CCREG,2);
3081 }
3082 //emit_movimm(ftable,0);
3083 emit_movimm(((u_int *)ftable)[addr>>16],0);
3084 //emit_readword((int)&last_count,12);
2573466a 3085 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2);
f51dc36c 3086#ifndef PCSX
57871462 3087 if((signed int)addr>=(signed int)0xC0000000) {
3088 // Pagefault address
3089 int ds=regmap!=regs[i].regmap;
3090 emit_movimm(start+i*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
3091 }
f51dc36c 3092#endif
57871462 3093 //emit_add(12,2,2);
3094 //emit_writeword(2,(int)&Count);
3095 //emit_call(((u_int *)ftable)[addr>>16]);
3096 emit_call((int)&indirect_jump);
f51dc36c 3097#ifndef PCSX
57871462 3098 // We really shouldn't need to update the count here,
3099 // but not doing so causes random crashes...
3100 emit_readword((int)&Count,HOST_TEMPREG);
3101 emit_readword((int)&next_interupt,2);
2573466a 3102 emit_addimm(HOST_TEMPREG,-CLOCK_ADJUST(adj+1),HOST_TEMPREG);
57871462 3103 emit_writeword(2,(int)&last_count);
3104 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
3105 if(cc<0) {
3106 emit_storereg(CCREG,HOST_TEMPREG);
3107 }
f51dc36c 3108#endif
57871462 3109 //emit_popa();
3110 restore_regs(reglist);
fd99c415 3111 if(rt>=0) {
3112 if(type==LOADB_STUB)
3113 emit_movsbl((int)&readmem_dword,rt);
3114 if(type==LOADBU_STUB)
3115 emit_movzbl((int)&readmem_dword,rt);
3116 if(type==LOADH_STUB)
3117 emit_movswl((int)&readmem_dword,rt);
3118 if(type==LOADHU_STUB)
3119 emit_movzwl((int)&readmem_dword,rt);
3120 if(type==LOADW_STUB)
3121 emit_readword((int)&readmem_dword,rt);
3122 if(type==LOADD_STUB) {
3123 emit_readword((int)&readmem_dword,rt);
3124 if(rth>=0) emit_readword(((int)&readmem_dword)+4,rth);
3125 }
57871462 3126 }
c6c3b1b3 3127#endif // !PCSX
57871462 3128}
3129
3130do_writestub(int n)
3131{
3132 assem_debug("do_writestub %x\n",start+stubs[n][3]*4);
3133 literal_pool(256);
3134 set_jump_target(stubs[n][1],(int)out);
3135 int type=stubs[n][0];
3136 int i=stubs[n][3];
3137 int rs=stubs[n][4];
3138 struct regstat *i_regs=(struct regstat *)stubs[n][5];
3139 u_int reglist=stubs[n][7];
3140 signed char *i_regmap=i_regs->regmap;
3141 int addr=get_reg(i_regmap,AGEN1+(i&1));
3142 int rth,rt,r;
3143 int ds;
b9b61529 3144 if(itype[i]==C1LS||itype[i]==C2LS) {
57871462 3145 rth=get_reg(i_regmap,FTEMP|64);
3146 rt=get_reg(i_regmap,r=FTEMP);
3147 }else{
3148 rth=get_reg(i_regmap,rs2[i]|64);
3149 rt=get_reg(i_regmap,r=rs2[i]);
3150 }
3151 assert(rs>=0);
3152 assert(rt>=0);
b96d3df7 3153#ifdef PCSX
3154 int rtmp,temp=-1,temp2=HOST_TEMPREG,regs_saved=0,restore_jump=0,ra;
3155 int reglist2=reglist|(1<<rs)|(1<<rt);
3156 for(rtmp=0;rtmp<=12;rtmp++) {
3157 if(((1<<rtmp)&0x13ff)&&((1<<rtmp)&reglist2)==0) {
3158 temp=rtmp; break;
3159 }
3160 }
3161 if(temp==-1) {
3162 save_regs(reglist);
3163 regs_saved=1;
3164 for(rtmp=0;rtmp<=3;rtmp++)
3165 if(rtmp!=rs&&rtmp!=rt)
3166 {temp=rtmp;break;}
3167 }
3168 if((regs_saved||(reglist2&8)==0)&&temp!=3&&rs!=3&&rt!=3)
3169 temp2=3;
3170 emit_readword((int)&mem_wtab,temp);
3171 emit_shrimm(rs,12,temp2);
3172 emit_readword_dualindexedx4(temp,temp2,temp2);
3173 emit_lsls_imm(temp2,1,temp2);
3174 switch(type) {
3175 case STOREB_STUB: emit_strccb_dualindexed(temp2,rs,rt); break;
3176 case STOREH_STUB: emit_strcch_dualindexed(temp2,rs,rt); break;
3177 case STOREW_STUB: emit_strcc_dualindexed(temp2,rs,rt); break;
3178 default: assert(0);
3179 }
3180 if(regs_saved) {
3181 restore_jump=(int)out;
3182 emit_jcc(0); // jump to reg restore
3183 }
3184 else
3185 emit_jcc(stubs[n][2]); // return address (invcode check)
3186
3187 if(!regs_saved)
3188 save_regs(reglist);
3189 int handler=0;
3190 switch(type) {
3191 case STOREB_STUB: handler=(int)jump_handler_write8; break;
3192 case STOREH_STUB: handler=(int)jump_handler_write16; break;
3193 case STOREW_STUB: handler=(int)jump_handler_write32; break;
3194 }
3195 assert(handler!=0);
3196 pass_args(rs,rt);
3197 if(temp2!=3)
3198 emit_mov(temp2,3);
3199 int cc=get_reg(i_regmap,CCREG);
3200 if(cc<0)
3201 emit_loadreg(CCREG,2);
2573466a 3202 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n][6]+1),2);
b96d3df7 3203 // returns new cycle_count
3204 emit_call(handler);
2573466a 3205 emit_addimm(0,-CLOCK_ADJUST((int)stubs[n][6]+1),cc<0?2:cc);
b96d3df7 3206 if(cc<0)
3207 emit_storereg(CCREG,2);
3208 if(restore_jump)
3209 set_jump_target(restore_jump,(int)out);
3210 restore_regs(reglist);
3211 ra=stubs[n][2];
b96d3df7 3212 emit_jmp(ra);
3213#else // if !PCSX
57871462 3214 if(addr<0) addr=get_reg(i_regmap,-1);
3215 assert(addr>=0);
3216 int ftable=0;
3217 if(type==STOREB_STUB)
3218 ftable=(int)writememb;
3219 if(type==STOREH_STUB)
3220 ftable=(int)writememh;
3221 if(type==STOREW_STUB)
3222 ftable=(int)writemem;
24385cae 3223#ifndef FORCE32
57871462 3224 if(type==STORED_STUB)
3225 ftable=(int)writememd;
24385cae 3226#endif
3227 assert(ftable!=0);
57871462 3228 emit_writeword(rs,(int)&address);
3229 //emit_shrimm(rs,16,rs);
3230 //emit_movmem_indexedx4(ftable,rs,rs);
3231 if(type==STOREB_STUB)
3232 emit_writebyte(rt,(int)&byte);
3233 if(type==STOREH_STUB)
3234 emit_writehword(rt,(int)&hword);
3235 if(type==STOREW_STUB)
3236 emit_writeword(rt,(int)&word);
3237 if(type==STORED_STUB) {
3d624f89 3238#ifndef FORCE32
57871462 3239 emit_writeword(rt,(int)&dword);
3240 emit_writeword(r?rth:rt,(int)&dword+4);
3d624f89 3241#else
3242 printf("STORED_STUB\n");
3243#endif
57871462 3244 }
3245 //emit_pusha();
3246 save_regs(reglist);
97a238a6 3247#ifndef PCSX
57871462 3248 ds=i_regs!=&regs[i];
3249 int real_rs=get_reg(i_regmap,rs1[i]);
3250 u_int cmask=ds?-1:(0x100f|~i_regs->wasconst);
3251 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&0x100f,i);
3252 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty&cmask&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs)));
3253 if(!ds) wb_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&~0x100f,i);
97a238a6 3254#endif
57871462 3255 emit_shrimm(rs,16,1);
3256 int cc=get_reg(i_regmap,CCREG);
3257 if(cc<0) {
3258 emit_loadreg(CCREG,2);
3259 }
3260 emit_movimm(ftable,0);
3261 emit_addimm(cc<0?2:cc,2*stubs[n][6]+2,2);
f51dc36c 3262#ifndef PCSX
57871462 3263 emit_movimm(start+stubs[n][3]*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
f51dc36c 3264#endif
57871462 3265 //emit_readword((int)&last_count,temp);
3266 //emit_addimm(cc,2*stubs[n][5]+2,cc);
3267 //emit_add(cc,temp,cc);
3268 //emit_writeword(cc,(int)&Count);
3269 emit_call((int)&indirect_jump_indexed);
3270 //emit_callreg(rs);
3271 emit_readword((int)&Count,HOST_TEMPREG);
3272 emit_readword((int)&next_interupt,2);
3273 emit_addimm(HOST_TEMPREG,-2*stubs[n][6]-2,HOST_TEMPREG);
3274 emit_writeword(2,(int)&last_count);
3275 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
3276 if(cc<0) {
3277 emit_storereg(CCREG,HOST_TEMPREG);
3278 }
3279 //emit_popa();
3280 restore_regs(reglist);
3281 //if((cc=get_reg(regmap,CCREG))>=0) {
3282 // emit_loadreg(CCREG,cc);
3283 //}
3284 emit_jmp(stubs[n][2]); // return address
b96d3df7 3285#endif // !PCSX
57871462 3286}
3287
3288inline_writestub(int type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
3289{
3290 int rs=get_reg(regmap,-1);
3291 int rth=get_reg(regmap,target|64);
3292 int rt=get_reg(regmap,target);
3293 assert(rs>=0);
3294 assert(rt>=0);
cbbab9cd 3295#ifdef PCSX
b96d3df7 3296 u_int handler,host_addr=0;
b96d3df7 3297 handler=get_direct_memhandler(mem_wtab,addr,type,&host_addr);
3298 if (handler==0) {
13e35c04 3299 if(addr!=host_addr)
3300 emit_movimm_from(addr,rs,host_addr,rs);
b96d3df7 3301 switch(type) {
3302 case STOREB_STUB: emit_writebyte_indexed(rt,0,rs); break;
3303 case STOREH_STUB: emit_writehword_indexed(rt,0,rs); break;
3304 case STOREW_STUB: emit_writeword_indexed(rt,0,rs); break;
3305 default: assert(0);
3306 }
3307 return;
3308 }
3309
3310 // call a memhandler
3311 save_regs(reglist);
13e35c04 3312 pass_args(rs,rt);
b96d3df7 3313 int cc=get_reg(regmap,CCREG);
3314 if(cc<0)
3315 emit_loadreg(CCREG,2);
2573466a 3316 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2);
b96d3df7 3317 emit_movimm(handler,3);
3318 // returns new cycle_count
3319 emit_call((int)jump_handler_write_h);
2573466a 3320 emit_addimm(0,-CLOCK_ADJUST(adj+1),cc<0?2:cc);
b96d3df7 3321 if(cc<0)
3322 emit_storereg(CCREG,2);
3323 restore_regs(reglist);
3324#else // if !pcsx
57871462 3325 int ftable=0;
3326 if(type==STOREB_STUB)
3327 ftable=(int)writememb;
3328 if(type==STOREH_STUB)
3329 ftable=(int)writememh;
3330 if(type==STOREW_STUB)
3331 ftable=(int)writemem;
24385cae 3332#ifndef FORCE32
57871462 3333 if(type==STORED_STUB)
3334 ftable=(int)writememd;
24385cae 3335#endif
3336 assert(ftable!=0);
57871462 3337 emit_writeword(rs,(int)&address);
3338 //emit_shrimm(rs,16,rs);
3339 //emit_movmem_indexedx4(ftable,rs,rs);
3340 if(type==STOREB_STUB)
3341 emit_writebyte(rt,(int)&byte);
3342 if(type==STOREH_STUB)
3343 emit_writehword(rt,(int)&hword);
3344 if(type==STOREW_STUB)
3345 emit_writeword(rt,(int)&word);
3346 if(type==STORED_STUB) {
3d624f89 3347#ifndef FORCE32
57871462 3348 emit_writeword(rt,(int)&dword);
3349 emit_writeword(target?rth:rt,(int)&dword+4);
3d624f89 3350#else
3351 printf("STORED_STUB\n");
3352#endif
57871462 3353 }
3354 //emit_pusha();
3355 save_regs(reglist);
0c1fe38b 3356#ifndef PCSX
3357 // rearmed note: load_all_consts prevents BIOS boot, some bug?
3358 if((signed int)addr>=(signed int)0xC0000000) {
3359 // Theoretically we can have a pagefault here, if the TLB has never
3360 // been enabled and the address is outside the range 80000000..BFFFFFFF
3361 // Write out the registers so the pagefault can be handled. This is
3362 // a very rare case and likely represents a bug.
3363 int ds=regmap!=regs[i].regmap;
3364 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty,i);
3365 if(!ds) wb_dirtys(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty);
3366 else wb_dirtys(branch_regs[i-1].regmap_entry,branch_regs[i-1].was32,branch_regs[i-1].wasdirty);
3367 }
3368#endif
57871462 3369 //emit_shrimm(rs,16,1);
3370 int cc=get_reg(regmap,CCREG);
3371 if(cc<0) {
3372 emit_loadreg(CCREG,2);
3373 }
3374 //emit_movimm(ftable,0);
3375 emit_movimm(((u_int *)ftable)[addr>>16],0);
3376 //emit_readword((int)&last_count,12);
2573466a 3377 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2);
f51dc36c 3378#ifndef PCSX
57871462 3379 if((signed int)addr>=(signed int)0xC0000000) {
3380 // Pagefault address
3381 int ds=regmap!=regs[i].regmap;
3382 emit_movimm(start+i*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
3383 }
f51dc36c 3384#endif
57871462 3385 //emit_add(12,2,2);
3386 //emit_writeword(2,(int)&Count);
3387 //emit_call(((u_int *)ftable)[addr>>16]);
3388 emit_call((int)&indirect_jump);
3389 emit_readword((int)&Count,HOST_TEMPREG);
3390 emit_readword((int)&next_interupt,2);
2573466a 3391 emit_addimm(HOST_TEMPREG,-CLOCK_ADJUST(adj+1),HOST_TEMPREG);
57871462 3392 emit_writeword(2,(int)&last_count);
3393 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
3394 if(cc<0) {
3395 emit_storereg(CCREG,HOST_TEMPREG);
3396 }
3397 //emit_popa();
3398 restore_regs(reglist);
b96d3df7 3399#endif
57871462 3400}
3401
3402do_unalignedwritestub(int n)
3403{
b7918751 3404 assem_debug("do_unalignedwritestub %x\n",start+stubs[n][3]*4);
3405 literal_pool(256);
57871462 3406 set_jump_target(stubs[n][1],(int)out);
b7918751 3407
3408 int i=stubs[n][3];
3409 struct regstat *i_regs=(struct regstat *)stubs[n][4];
3410 int addr=stubs[n][5];
3411 u_int reglist=stubs[n][7];
3412 signed char *i_regmap=i_regs->regmap;
3413 int temp2=get_reg(i_regmap,FTEMP);
3414 int rt;
3415 int ds, real_rs;
3416 rt=get_reg(i_regmap,rs2[i]);
3417 assert(rt>=0);
3418 assert(addr>=0);
3419 assert(opcode[i]==0x2a||opcode[i]==0x2e); // SWL/SWR only implemented
3420 reglist|=(1<<addr);
3421 reglist&=~(1<<temp2);
3422
b96d3df7 3423#if 1
3424 // don't bother with it and call write handler
3425 save_regs(reglist);
3426 pass_args(addr,rt);
3427 int cc=get_reg(i_regmap,CCREG);
3428 if(cc<0)
3429 emit_loadreg(CCREG,2);
2573466a 3430 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n][6]+1),2);
b96d3df7 3431 emit_call((int)(opcode[i]==0x2a?jump_handle_swl:jump_handle_swr));
2573466a 3432 emit_addimm(0,-CLOCK_ADJUST((int)stubs[n][6]+1),cc<0?2:cc);
b96d3df7 3433 if(cc<0)
3434 emit_storereg(CCREG,2);
3435 restore_regs(reglist);
3436 emit_jmp(stubs[n][2]); // return address
3437#else
b7918751 3438 emit_andimm(addr,0xfffffffc,temp2);
3439 emit_writeword(temp2,(int)&address);
3440
3441 save_regs(reglist);
97a238a6 3442#ifndef PCSX
b7918751 3443 ds=i_regs!=&regs[i];
3444 real_rs=get_reg(i_regmap,rs1[i]);
3445 u_int cmask=ds?-1:(0x100f|~i_regs->wasconst);
3446 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&0x100f,i);
3447 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty&cmask&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs)));
3448 if(!ds) wb_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&~0x100f,i);
97a238a6 3449#endif
b7918751 3450 emit_shrimm(addr,16,1);
3451 int cc=get_reg(i_regmap,CCREG);
3452 if(cc<0) {
3453 emit_loadreg(CCREG,2);
3454 }
3455 emit_movimm((u_int)readmem,0);
3456 emit_addimm(cc<0?2:cc,2*stubs[n][6]+2,2);
f51dc36c 3457#ifndef PCSX
3458 // pagefault address
3459 emit_movimm(start+stubs[n][3]*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
3460#endif
b7918751 3461 emit_call((int)&indirect_jump_indexed);
3462 restore_regs(reglist);
3463
3464 emit_readword((int)&readmem_dword,temp2);
3465 int temp=addr; //hmh
3466 emit_shlimm(addr,3,temp);
3467 emit_andimm(temp,24,temp);
3468#ifdef BIG_ENDIAN_MIPS
3469 if (opcode[i]==0x2e) // SWR
3470#else
3471 if (opcode[i]==0x2a) // SWL
3472#endif
3473 emit_xorimm(temp,24,temp);
3474 emit_movimm(-1,HOST_TEMPREG);
55439448 3475 if (opcode[i]==0x2a) { // SWL
b7918751 3476 emit_bic_lsr(temp2,HOST_TEMPREG,temp,temp2);
3477 emit_orrshr(rt,temp,temp2);
3478 }else{
3479 emit_bic_lsl(temp2,HOST_TEMPREG,temp,temp2);
3480 emit_orrshl(rt,temp,temp2);
3481 }
3482 emit_readword((int)&address,addr);
3483 emit_writeword(temp2,(int)&word);
3484 //save_regs(reglist); // don't need to, no state changes
3485 emit_shrimm(addr,16,1);
3486 emit_movimm((u_int)writemem,0);
3487 //emit_call((int)&indirect_jump_indexed);
3488 emit_mov(15,14);
3489 emit_readword_dualindexedx4(0,1,15);
3490 emit_readword((int)&Count,HOST_TEMPREG);
3491 emit_readword((int)&next_interupt,2);
3492 emit_addimm(HOST_TEMPREG,-2*stubs[n][6]-2,HOST_TEMPREG);
3493 emit_writeword(2,(int)&last_count);
3494 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
3495 if(cc<0) {
3496 emit_storereg(CCREG,HOST_TEMPREG);
3497 }
3498 restore_regs(reglist);
57871462 3499 emit_jmp(stubs[n][2]); // return address
b96d3df7 3500#endif
57871462 3501}
3502
3503void printregs(int edi,int esi,int ebp,int esp,int b,int d,int c,int a)
3504{
3505 printf("regs: %x %x %x %x %x %x %x (%x)\n",a,b,c,d,ebp,esi,edi,(&edi)[-1]);
3506}
3507
3508do_invstub(int n)
3509{
3510 literal_pool(20);
3511 u_int reglist=stubs[n][3];
3512 set_jump_target(stubs[n][1],(int)out);
3513 save_regs(reglist);
3514 if(stubs[n][4]!=0) emit_mov(stubs[n][4],0);
3515 emit_call((int)&invalidate_addr);
3516 restore_regs(reglist);
3517 emit_jmp(stubs[n][2]); // return address
3518}
3519
3520int do_dirty_stub(int i)
3521{
3522 assem_debug("do_dirty_stub %x\n",start+i*4);
ac545b3a 3523 u_int addr=(int)start<(int)0xC0000000?(u_int)source:(u_int)start;
3524 #ifdef PCSX
3525 addr=(u_int)source;
3526 #endif
57871462 3527 // Careful about the code output here, verify_dirty needs to parse it.
3528 #ifdef ARMv5_ONLY
ac545b3a 3529 emit_loadlp(addr,1);
57871462 3530 emit_loadlp((int)copy,2);
3531 emit_loadlp(slen*4,3);
3532 #else
ac545b3a 3533 emit_movw(addr&0x0000FFFF,1);
57871462 3534 emit_movw(((u_int)copy)&0x0000FFFF,2);
ac545b3a 3535 emit_movt(addr&0xFFFF0000,1);
57871462 3536 emit_movt(((u_int)copy)&0xFFFF0000,2);
3537 emit_movw(slen*4,3);
3538 #endif
3539 emit_movimm(start+i*4,0);
3540 emit_call((int)start<(int)0xC0000000?(int)&verify_code:(int)&verify_code_vm);
3541 int entry=(int)out;
3542 load_regs_entry(i);
3543 if(entry==(int)out) entry=instr_addr[i];
3544 emit_jmp(instr_addr[i]);
3545 return entry;
3546}
3547
3548void do_dirty_stub_ds()
3549{
3550 // Careful about the code output here, verify_dirty needs to parse it.
3551 #ifdef ARMv5_ONLY
3552 emit_loadlp((int)start<(int)0xC0000000?(int)source:(int)start,1);
3553 emit_loadlp((int)copy,2);
3554 emit_loadlp(slen*4,3);
3555 #else
3556 emit_movw(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0x0000FFFF,1);
3557 emit_movw(((u_int)copy)&0x0000FFFF,2);
3558 emit_movt(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0xFFFF0000,1);
3559 emit_movt(((u_int)copy)&0xFFFF0000,2);
3560 emit_movw(slen*4,3);
3561 #endif
3562 emit_movimm(start+1,0);
3563 emit_call((int)&verify_code_ds);
3564}
3565
3566do_cop1stub(int n)
3567{
3568 literal_pool(256);
3569 assem_debug("do_cop1stub %x\n",start+stubs[n][3]*4);
3570 set_jump_target(stubs[n][1],(int)out);
3571 int i=stubs[n][3];
3d624f89 3572// int rs=stubs[n][4];
57871462 3573 struct regstat *i_regs=(struct regstat *)stubs[n][5];
3574 int ds=stubs[n][6];
3575 if(!ds) {
3576 load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty,i);
3577 //if(i_regs!=&regs[i]) printf("oops: regs[i]=%x i_regs=%x",(int)&regs[i],(int)i_regs);
3578 }
3579 //else {printf("fp exception in delay slot\n");}
3580 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty);
3581 if(regs[i].regmap_entry[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
3582 emit_movimm(start+(i-ds)*4,EAX); // Get PC
2573466a 3583 emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]),HOST_CCREG); // CHECK: is this right? There should probably be an extra cycle...
57871462 3584 emit_jmp(ds?(int)fp_exception_ds:(int)fp_exception);
3585}
3586
63cb0298 3587#ifndef DISABLE_TLB
3588
57871462 3589/* TLB */
3590
3591int do_tlb_r(int s,int ar,int map,int x,int a,int shift,int c,u_int addr)
3592{
3593 if(c) {
3594 if((signed int)addr>=(signed int)0xC0000000) {
3595 // address_generation already loaded the const
3596 emit_readword_dualindexedx4(FP,map,map);
3597 }
3598 else
3599 return -1; // No mapping
3600 }
3601 else {
3602 assert(s!=map);
3603 emit_movimm(((int)memory_map-(int)&dynarec_local)>>2,map);
3604 emit_addsr12(map,s,map);
3605 // Schedule this while we wait on the load
3606 //if(x) emit_xorimm(s,x,ar);
3607 if(shift>=0) emit_shlimm(s,3,shift);
3608 if(~a) emit_andimm(s,a,ar);
3609 emit_readword_dualindexedx4(FP,map,map);
3610 }
3611 return map;
3612}
3613int do_tlb_r_branch(int map, int c, u_int addr, int *jaddr)
3614{
3615 if(!c||(signed int)addr>=(signed int)0xC0000000) {
3616 emit_test(map,map);
3617 *jaddr=(int)out;
3618 emit_js(0);
3619 }
3620 return map;
3621}
3622
3623int gen_tlb_addr_r(int ar, int map) {
3624 if(map>=0) {
3625 assem_debug("add %s,%s,%s lsl #2\n",regname[ar],regname[ar],regname[map]);
3626 output_w32(0xe0800100|rd_rn_rm(ar,ar,map));
3627 }
3628}
3629
3630int do_tlb_w(int s,int ar,int map,int x,int c,u_int addr)
3631{
3632 if(c) {
3633 if(addr<0x80800000||addr>=0xC0000000) {
3634 // address_generation already loaded the const
3635 emit_readword_dualindexedx4(FP,map,map);
3636 }
3637 else
3638 return -1; // No mapping
3639 }
3640 else {
3641 assert(s!=map);
3642 emit_movimm(((int)memory_map-(int)&dynarec_local)>>2,map);
3643 emit_addsr12(map,s,map);
3644 // Schedule this while we wait on the load
3645 //if(x) emit_xorimm(s,x,ar);
3646 emit_readword_dualindexedx4(FP,map,map);
3647 }
3648 return map;
3649}
3650int do_tlb_w_branch(int map, int c, u_int addr, int *jaddr)
3651{
3652 if(!c||addr<0x80800000||addr>=0xC0000000) {
3653 emit_testimm(map,0x40000000);
3654 *jaddr=(int)out;
3655 emit_jne(0);
3656 }
3657}
3658
3659int gen_tlb_addr_w(int ar, int map) {
3660 if(map>=0) {
3661 assem_debug("add %s,%s,%s lsl #2\n",regname[ar],regname[ar],regname[map]);
3662 output_w32(0xe0800100|rd_rn_rm(ar,ar,map));
3663 }
3664}
3665
3666// Generate the address of the memory_map entry, relative to dynarec_local
3667generate_map_const(u_int addr,int reg) {
3668 //printf("generate_map_const(%x,%s)\n",addr,regname[reg]);
3669 emit_movimm((addr>>12)+(((u_int)memory_map-(u_int)&dynarec_local)>>2),reg);
3670}
3671
63cb0298 3672#else
3673
3674static int do_tlb_r() { return 0; }
3675static int do_tlb_r_branch() { return 0; }
3676static int gen_tlb_addr_r() { return 0; }
3677static int do_tlb_w() { return 0; }
3678static int do_tlb_w_branch() { return 0; }
3679static int gen_tlb_addr_w() { return 0; }
3680
3681#endif // DISABLE_TLB
3682
57871462 3683/* Special assem */
3684
3685void shift_assemble_arm(int i,struct regstat *i_regs)
3686{
3687 if(rt1[i]) {
3688 if(opcode2[i]<=0x07) // SLLV/SRLV/SRAV
3689 {
3690 signed char s,t,shift;
3691 t=get_reg(i_regs->regmap,rt1[i]);
3692 s=get_reg(i_regs->regmap,rs1[i]);
3693 shift=get_reg(i_regs->regmap,rs2[i]);
3694 if(t>=0){
3695 if(rs1[i]==0)
3696 {
3697 emit_zeroreg(t);
3698 }
3699 else if(rs2[i]==0)
3700 {
3701 assert(s>=0);
3702 if(s!=t) emit_mov(s,t);
3703 }
3704 else
3705 {
3706 emit_andimm(shift,31,HOST_TEMPREG);
3707 if(opcode2[i]==4) // SLLV
3708 {
3709 emit_shl(s,HOST_TEMPREG,t);
3710 }
3711 if(opcode2[i]==6) // SRLV
3712 {
3713 emit_shr(s,HOST_TEMPREG,t);
3714 }
3715 if(opcode2[i]==7) // SRAV
3716 {
3717 emit_sar(s,HOST_TEMPREG,t);
3718 }
3719 }
3720 }
3721 } else { // DSLLV/DSRLV/DSRAV
3722 signed char sh,sl,th,tl,shift;
3723 th=get_reg(i_regs->regmap,rt1[i]|64);
3724 tl=get_reg(i_regs->regmap,rt1[i]);
3725 sh=get_reg(i_regs->regmap,rs1[i]|64);
3726 sl=get_reg(i_regs->regmap,rs1[i]);
3727 shift=get_reg(i_regs->regmap,rs2[i]);
3728 if(tl>=0){
3729 if(rs1[i]==0)
3730 {
3731 emit_zeroreg(tl);
3732 if(th>=0) emit_zeroreg(th);
3733 }
3734 else if(rs2[i]==0)
3735 {
3736 assert(sl>=0);
3737 if(sl!=tl) emit_mov(sl,tl);
3738 if(th>=0&&sh!=th) emit_mov(sh,th);
3739 }
3740 else
3741 {
3742 // FIXME: What if shift==tl ?
3743 assert(shift!=tl);
3744 int temp=get_reg(i_regs->regmap,-1);
3745 int real_th=th;
3746 if(th<0&&opcode2[i]!=0x14) {th=temp;} // DSLLV doesn't need a temporary register
3747 assert(sl>=0);
3748 assert(sh>=0);
3749 emit_andimm(shift,31,HOST_TEMPREG);
3750 if(opcode2[i]==0x14) // DSLLV
3751 {
3752 if(th>=0) emit_shl(sh,HOST_TEMPREG,th);
3753 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3754 emit_orrshr(sl,HOST_TEMPREG,th);
3755 emit_andimm(shift,31,HOST_TEMPREG);
3756 emit_testimm(shift,32);
3757 emit_shl(sl,HOST_TEMPREG,tl);
3758 if(th>=0) emit_cmovne_reg(tl,th);
3759 emit_cmovne_imm(0,tl);
3760 }
3761 if(opcode2[i]==0x16) // DSRLV
3762 {
3763 assert(th>=0);
3764 emit_shr(sl,HOST_TEMPREG,tl);
3765 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3766 emit_orrshl(sh,HOST_TEMPREG,tl);
3767 emit_andimm(shift,31,HOST_TEMPREG);
3768 emit_testimm(shift,32);
3769 emit_shr(sh,HOST_TEMPREG,th);
3770 emit_cmovne_reg(th,tl);
3771 if(real_th>=0) emit_cmovne_imm(0,th);
3772 }
3773 if(opcode2[i]==0x17) // DSRAV
3774 {
3775 assert(th>=0);
3776 emit_shr(sl,HOST_TEMPREG,tl);
3777 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3778 if(real_th>=0) {
3779 assert(temp>=0);
3780 emit_sarimm(th,31,temp);
3781 }
3782 emit_orrshl(sh,HOST_TEMPREG,tl);
3783 emit_andimm(shift,31,HOST_TEMPREG);
3784 emit_testimm(shift,32);
3785 emit_sar(sh,HOST_TEMPREG,th);
3786 emit_cmovne_reg(th,tl);
3787 if(real_th>=0) emit_cmovne_reg(temp,th);
3788 }
3789 }
3790 }
3791 }
3792 }
3793}
ffb0b9e0 3794
3795#ifdef PCSX
3796static void speculate_mov(int rs,int rt)
3797{
3798 if(rt!=0) {
3799 smrv_strong_next|=1<<rt;
3800 smrv[rt]=smrv[rs];
3801 }
3802}
3803
3804static void speculate_mov_weak(int rs,int rt)
3805{
3806 if(rt!=0) {
3807 smrv_weak_next|=1<<rt;
3808 smrv[rt]=smrv[rs];
3809 }
3810}
3811
3812static void speculate_register_values(int i)
3813{
3814 if(i==0) {
3815 memcpy(smrv,psxRegs.GPR.r,sizeof(smrv));
3816 // gp,sp are likely to stay the same throughout the block
3817 smrv_strong_next=(1<<28)|(1<<29)|(1<<30);
3818 smrv_weak_next=~smrv_strong_next;
3819 //printf(" llr %08x\n", smrv[4]);
3820 }
3821 smrv_strong=smrv_strong_next;
3822 smrv_weak=smrv_weak_next;
3823 switch(itype[i]) {
3824 case ALU:
3825 if ((smrv_strong>>rs1[i])&1) speculate_mov(rs1[i],rt1[i]);
3826 else if((smrv_strong>>rs2[i])&1) speculate_mov(rs2[i],rt1[i]);
3827 else if((smrv_weak>>rs1[i])&1) speculate_mov_weak(rs1[i],rt1[i]);
3828 else if((smrv_weak>>rs2[i])&1) speculate_mov_weak(rs2[i],rt1[i]);
3829 else {
3830 smrv_strong_next&=~(1<<rt1[i]);
3831 smrv_weak_next&=~(1<<rt1[i]);
3832 }
3833 break;
3834 case SHIFTIMM:
3835 smrv_strong_next&=~(1<<rt1[i]);
3836 smrv_weak_next&=~(1<<rt1[i]);
3837 // fallthrough
3838 case IMM16:
3839 if(rt1[i]&&is_const(&regs[i],rt1[i])) {
3840 int value,hr=get_reg(regs[i].regmap,rt1[i]);
3841 if(hr>=0) {
3842 if(get_final_value(hr,i,&value))
3843 smrv[rt1[i]]=value;
3844 else smrv[rt1[i]]=constmap[i][hr];
3845 smrv_strong_next|=1<<rt1[i];
3846 }
3847 }
3848 else {
3849 if ((smrv_strong>>rs1[i])&1) speculate_mov(rs1[i],rt1[i]);
3850 else if((smrv_weak>>rs1[i])&1) speculate_mov_weak(rs1[i],rt1[i]);
3851 }
3852 break;
3853 case LOAD:
3854 if(start<0x2000&&(rt1[i]==26||(smrv[rt1[i]]>>24)==0xa0)) {
3855 // special case for BIOS
3856 smrv[rt1[i]]=0xa0000000;
3857 smrv_strong_next|=1<<rt1[i];
3858 break;
3859 }
3860 // fallthrough
3861 case SHIFT:
3862 case LOADLR:
3863 case MOV:
3864 smrv_strong_next&=~(1<<rt1[i]);
3865 smrv_weak_next&=~(1<<rt1[i]);
3866 break;
3867 case COP0:
3868 case COP2:
3869 if(opcode2[i]==0||opcode2[i]==2) { // MFC/CFC
3870 smrv_strong_next&=~(1<<rt1[i]);
3871 smrv_weak_next&=~(1<<rt1[i]);
3872 }
3873 break;
3874 case C2LS:
3875 if (opcode[i]==0x32) { // LWC2
3876 smrv_strong_next&=~(1<<rt1[i]);
3877 smrv_weak_next&=~(1<<rt1[i]);
3878 }
3879 break;
3880 }
3881#if 0
3882 int r=4;
3883 printf("x %08x %08x %d %d c %08x %08x\n",smrv[r],start+i*4,
3884 ((smrv_strong>>r)&1),(smrv_weak>>r)&1,regs[i].isconst,regs[i].wasconst);
3885#endif
3886}
3887
3888enum {
3889 MTYPE_8000 = 0,
3890 MTYPE_8020,
3891 MTYPE_0000,
3892 MTYPE_A000,
3893 MTYPE_1F80,
3894};
3895
3896static int get_ptr_mem_type(u_int a)
3897{
3898 if(a < 0x00200000) {
3899 if(a<0x1000&&((start>>20)==0xbfc||(start>>24)==0xa0))
3900 // return wrong, must use memhandler for BIOS self-test to pass
3901 // 007 does similar stuff from a00 mirror, weird stuff
3902 return MTYPE_8000;
3903 return MTYPE_0000;
3904 }
3905 if(0x1f800000 <= a && a < 0x1f801000)
3906 return MTYPE_1F80;
3907 if(0x80200000 <= a && a < 0x80800000)
3908 return MTYPE_8020;
3909 if(0xa0000000 <= a && a < 0xa0200000)
3910 return MTYPE_A000;
3911 return MTYPE_8000;
3912}
3913#endif
3914
3915static int emit_fastpath_cmp_jump(int i,int addr,int *addr_reg_override)
3916{
3917 int jaddr,type=0;
3918
3919#ifdef PCSX
3920 int mr=rs1[i];
3921 if(((smrv_strong|smrv_weak)>>mr)&1) {
3922 type=get_ptr_mem_type(smrv[mr]);
3923 //printf("set %08x @%08x r%d %d\n", smrv[mr], start+i*4, mr, type);
3924 }
3925 else {
3926 // use the mirror we are running on
3927 type=get_ptr_mem_type(start);
3928 //printf("set nospec @%08x r%d %d\n", start+i*4, mr, type);
3929 }
3930
3931 if(type==MTYPE_8020) { // RAM 80200000+ mirror
3932 emit_andimm(addr,~0x00e00000,HOST_TEMPREG);
3933 addr=*addr_reg_override=HOST_TEMPREG;
3934 type=0;
3935 }
3936 else if(type==MTYPE_0000) { // RAM 0 mirror
3937 emit_orimm(addr,0x80000000,HOST_TEMPREG);
3938 addr=*addr_reg_override=HOST_TEMPREG;
3939 type=0;
3940 }
3941 else if(type==MTYPE_A000) { // RAM A mirror
3942 emit_andimm(addr,~0x20000000,HOST_TEMPREG);
3943 addr=*addr_reg_override=HOST_TEMPREG;
3944 type=0;
3945 }
3946 else if(type==MTYPE_1F80) { // scratchpad
3947 emit_addimm(addr,-0x1f800000,HOST_TEMPREG);
3948 emit_cmpimm(HOST_TEMPREG,0x1000);
3949 jaddr=(int)out;
3950 emit_jc(0);
3951 }
3952#endif
3953
3954 if(type==0)
3955 {
3956 emit_cmpimm(addr,RAM_SIZE);
3957 jaddr=(int)out;
3958 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
3959 // Hint to branch predictor that the branch is unlikely to be taken
3960 if(rs1[i]>=28)
3961 emit_jno_unlikely(0);
3962 else
3963 #endif
3964 emit_jno(0);
3965 }
3966
3967 return jaddr;
3968}
3969
57871462 3970#define shift_assemble shift_assemble_arm
3971
3972void loadlr_assemble_arm(int i,struct regstat *i_regs)
3973{
3974 int s,th,tl,temp,temp2,addr,map=-1;
3975 int offset;
3976 int jaddr=0;
af4ee1fe 3977 int memtarget=0,c=0;
ffb0b9e0 3978 int fastload_reg_override=0;
57871462 3979 u_int hr,reglist=0;
3980 th=get_reg(i_regs->regmap,rt1[i]|64);
3981 tl=get_reg(i_regs->regmap,rt1[i]);
3982 s=get_reg(i_regs->regmap,rs1[i]);
3983 temp=get_reg(i_regs->regmap,-1);
3984 temp2=get_reg(i_regs->regmap,FTEMP);
3985 addr=get_reg(i_regs->regmap,AGEN1+(i&1));
3986 assert(addr<0);
3987 offset=imm[i];
3988 for(hr=0;hr<HOST_REGS;hr++) {
3989 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
3990 }
3991 reglist|=1<<temp;
3992 if(offset||s<0||c) addr=temp2;
3993 else addr=s;
3994 if(s>=0) {
3995 c=(i_regs->wasconst>>s)&1;
af4ee1fe 3996 if(c) {
3997 memtarget=((signed int)(constmap[i][s]+offset))<(signed int)0x80000000+RAM_SIZE;
3998 if(using_tlb&&((signed int)(constmap[i][s]+offset))>=(signed int)0xC0000000) memtarget=1;
3999 }
57871462 4000 }
535d208a 4001 if(!using_tlb) {
4002 if(!c) {
4003 #ifdef RAM_OFFSET
4004 map=get_reg(i_regs->regmap,ROREG);
4005 if(map<0) emit_loadreg(ROREG,map=HOST_TEMPREG);
4006 #endif
4007 emit_shlimm(addr,3,temp);
4008 if (opcode[i]==0x22||opcode[i]==0x26) {
4009 emit_andimm(addr,0xFFFFFFFC,temp2); // LWL/LWR
57871462 4010 }else{
535d208a 4011 emit_andimm(addr,0xFFFFFFF8,temp2); // LDL/LDR
57871462 4012 }
ffb0b9e0 4013 jaddr=emit_fastpath_cmp_jump(i,temp2,&fastload_reg_override);
535d208a 4014 }
4015 else {
4016 if (opcode[i]==0x22||opcode[i]==0x26) {
4017 emit_movimm(((constmap[i][s]+offset)<<3)&24,temp); // LWL/LWR
4018 }else{
4019 emit_movimm(((constmap[i][s]+offset)<<3)&56,temp); // LDL/LDR
57871462 4020 }
57871462 4021 }
535d208a 4022 }else{ // using tlb
4023 int a;
4024 if(c) {
4025 a=-1;
4026 }else if (opcode[i]==0x22||opcode[i]==0x26) {
4027 a=0xFFFFFFFC; // LWL/LWR
4028 }else{
4029 a=0xFFFFFFF8; // LDL/LDR
4030 }
4031 map=get_reg(i_regs->regmap,TLREG);
4032 assert(map>=0);
ea3d2e6e 4033 reglist&=~(1<<map);
535d208a 4034 map=do_tlb_r(addr,temp2,map,0,a,c?-1:temp,c,constmap[i][s]+offset);
4035 if(c) {
4036 if (opcode[i]==0x22||opcode[i]==0x26) {
4037 emit_movimm(((constmap[i][s]+offset)<<3)&24,temp); // LWL/LWR
4038 }else{
4039 emit_movimm(((constmap[i][s]+offset)<<3)&56,temp); // LDL/LDR
57871462 4040 }
535d208a 4041 }
4042 do_tlb_r_branch(map,c,constmap[i][s]+offset,&jaddr);
4043 }
4044 if (opcode[i]==0x22||opcode[i]==0x26) { // LWL/LWR
4045 if(!c||memtarget) {
ffb0b9e0 4046 int a=temp2;
4047 if(fastload_reg_override) a=fastload_reg_override;
535d208a 4048 //emit_readword_indexed((int)rdram-0x80000000,temp2,temp2);
ffb0b9e0 4049 emit_readword_indexed_tlb(0,a,map,temp2);
535d208a 4050 if(jaddr) add_stub(LOADW_STUB,jaddr,(int)out,i,temp2,(int)i_regs,ccadj[i],reglist);
4051 }
4052 else
4053 inline_readstub(LOADW_STUB,i,(constmap[i][s]+offset)&0xFFFFFFFC,i_regs->regmap,FTEMP,ccadj[i],reglist);
4054 if(rt1[i]) {
4055 assert(tl>=0);
57871462 4056 emit_andimm(temp,24,temp);
2002a1db 4057#ifdef BIG_ENDIAN_MIPS
4058 if (opcode[i]==0x26) // LWR
4059#else
4060 if (opcode[i]==0x22) // LWL
4061#endif
4062 emit_xorimm(temp,24,temp);
57871462 4063 emit_movimm(-1,HOST_TEMPREG);
4064 if (opcode[i]==0x26) {
4065 emit_shr(temp2,temp,temp2);
4066 emit_bic_lsr(tl,HOST_TEMPREG,temp,tl);
4067 }else{
4068 emit_shl(temp2,temp,temp2);
4069 emit_bic_lsl(tl,HOST_TEMPREG,temp,tl);
4070 }
4071 emit_or(temp2,tl,tl);
57871462 4072 }
535d208a 4073 //emit_storereg(rt1[i],tl); // DEBUG
4074 }
4075 if (opcode[i]==0x1A||opcode[i]==0x1B) { // LDL/LDR
ffb0b9e0 4076 // FIXME: little endian, fastload_reg_override
535d208a 4077 int temp2h=get_reg(i_regs->regmap,FTEMP|64);
4078 if(!c||memtarget) {
4079 //if(th>=0) emit_readword_indexed((int)rdram-0x80000000,temp2,temp2h);
4080 //emit_readword_indexed((int)rdram-0x7FFFFFFC,temp2,temp2);
4081 emit_readdword_indexed_tlb(0,temp2,map,temp2h,temp2);
4082 if(jaddr) add_stub(LOADD_STUB,jaddr,(int)out,i,temp2,(int)i_regs,ccadj[i],reglist);
4083 }
4084 else
4085 inline_readstub(LOADD_STUB,i,(constmap[i][s]+offset)&0xFFFFFFF8,i_regs->regmap,FTEMP,ccadj[i],reglist);
4086 if(rt1[i]) {
4087 assert(th>=0);
4088 assert(tl>=0);
57871462 4089 emit_testimm(temp,32);
4090 emit_andimm(temp,24,temp);
4091 if (opcode[i]==0x1A) { // LDL
4092 emit_rsbimm(temp,32,HOST_TEMPREG);
4093 emit_shl(temp2h,temp,temp2h);
4094 emit_orrshr(temp2,HOST_TEMPREG,temp2h);
4095 emit_movimm(-1,HOST_TEMPREG);
4096 emit_shl(temp2,temp,temp2);
4097 emit_cmove_reg(temp2h,th);
4098 emit_biceq_lsl(tl,HOST_TEMPREG,temp,tl);
4099 emit_bicne_lsl(th,HOST_TEMPREG,temp,th);
4100 emit_orreq(temp2,tl,tl);
4101 emit_orrne(temp2,th,th);
4102 }
4103 if (opcode[i]==0x1B) { // LDR
4104 emit_xorimm(temp,24,temp);
4105 emit_rsbimm(temp,32,HOST_TEMPREG);
4106 emit_shr(temp2,temp,temp2);
4107 emit_orrshl(temp2h,HOST_TEMPREG,temp2);
4108 emit_movimm(-1,HOST_TEMPREG);
4109 emit_shr(temp2h,temp,temp2h);
4110 emit_cmovne_reg(temp2,tl);
4111 emit_bicne_lsr(th,HOST_TEMPREG,temp,th);
4112 emit_biceq_lsr(tl,HOST_TEMPREG,temp,tl);
4113 emit_orrne(temp2h,th,th);
4114 emit_orreq(temp2h,tl,tl);
4115 }
4116 }
4117 }
4118}
4119#define loadlr_assemble loadlr_assemble_arm
4120
4121void cop0_assemble(int i,struct regstat *i_regs)
4122{
4123 if(opcode2[i]==0) // MFC0
4124 {
4125 signed char t=get_reg(i_regs->regmap,rt1[i]);
4126 char copr=(source[i]>>11)&0x1f;
4127 //assert(t>=0); // Why does this happen? OOT is weird
f1b3b369 4128 if(t>=0&&rt1[i]!=0) {
7139f3c8 4129#ifdef MUPEN64
57871462 4130 emit_addimm(FP,(int)&fake_pc-(int)&dynarec_local,0);
4131 emit_movimm((source[i]>>11)&0x1f,1);
4132 emit_writeword(0,(int)&PC);
4133 emit_writebyte(1,(int)&(fake_pc.f.r.nrd));
4134 if(copr==9) {
4135 emit_readword((int)&last_count,ECX);
4136 emit_loadreg(CCREG,HOST_CCREG); // TODO: do proper reg alloc
4137 emit_add(HOST_CCREG,ECX,HOST_CCREG);
2573466a 4138 emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]),HOST_CCREG);
57871462 4139 emit_writeword(HOST_CCREG,(int)&Count);
4140 }
4141 emit_call((int)MFC0);
4142 emit_readword((int)&readmem_dword,t);
7139f3c8 4143#else
4144 emit_readword((int)&reg_cop0+copr*4,t);
4145#endif
57871462 4146 }
4147 }
4148 else if(opcode2[i]==4) // MTC0
4149 {
4150 signed char s=get_reg(i_regs->regmap,rs1[i]);
4151 char copr=(source[i]>>11)&0x1f;
4152 assert(s>=0);
63cb0298 4153#ifdef MUPEN64
57871462 4154 emit_writeword(s,(int)&readmem_dword);
4155 wb_register(rs1[i],i_regs->regmap,i_regs->dirty,i_regs->is32);
4156 emit_addimm(FP,(int)&fake_pc-(int)&dynarec_local,0);
4157 emit_movimm((source[i]>>11)&0x1f,1);
4158 emit_writeword(0,(int)&PC);
4159 emit_writebyte(1,(int)&(fake_pc.f.r.nrd));
63cb0298 4160#else
4161 wb_register(rs1[i],i_regs->regmap,i_regs->dirty,i_regs->is32);
7139f3c8 4162#endif
4163 if(copr==9||copr==11||copr==12||copr==13) {
63cb0298 4164 emit_readword((int)&last_count,HOST_TEMPREG);
57871462 4165 emit_loadreg(CCREG,HOST_CCREG); // TODO: do proper reg alloc
63cb0298 4166 emit_add(HOST_CCREG,HOST_TEMPREG,HOST_CCREG);
2573466a 4167 emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]),HOST_CCREG);
57871462 4168 emit_writeword(HOST_CCREG,(int)&Count);
4169 }
4170 // What a mess. The status register (12) can enable interrupts,
4171 // so needs a special case to handle a pending interrupt.
4172 // The interrupt must be taken immediately, because a subsequent
4173 // instruction might disable interrupts again.
7139f3c8 4174 if(copr==12||copr==13) {
fca1aef2 4175#ifdef PCSX
4176 if (is_delayslot) {
4177 // burn cycles to cause cc_interrupt, which will
4178 // reschedule next_interupt. Relies on CCREG from above.
4179 assem_debug("MTC0 DS %d\n", copr);
4180 emit_writeword(HOST_CCREG,(int)&last_count);
4181 emit_movimm(0,HOST_CCREG);
4182 emit_storereg(CCREG,HOST_CCREG);
63cb0298 4183 if(s!=1)
4184 emit_mov(s,1);
fca1aef2 4185 emit_movimm(copr,0);
4186 emit_call((int)pcsx_mtc0_ds);
4187 return;
4188 }
4189#endif
63cb0298 4190 emit_movimm(start+i*4+4,HOST_TEMPREG);
4191 emit_writeword(HOST_TEMPREG,(int)&pcaddr);
4192 emit_movimm(0,HOST_TEMPREG);
4193 emit_writeword(HOST_TEMPREG,(int)&pending_exception);
57871462 4194 }
4195 //else if(copr==12&&is_delayslot) emit_call((int)MTC0_R12);
4196 //else
fca1aef2 4197#ifdef PCSX
63cb0298 4198 if(s!=1)
4199 emit_mov(s,1);
fca1aef2 4200 emit_movimm(copr,0);
4201 emit_call((int)pcsx_mtc0);
4202#else
57871462 4203 emit_call((int)MTC0);
fca1aef2 4204#endif
7139f3c8 4205 if(copr==9||copr==11||copr==12||copr==13) {
57871462 4206 emit_readword((int)&Count,HOST_CCREG);
4207 emit_readword((int)&next_interupt,ECX);
2573466a 4208 emit_addimm(HOST_CCREG,-CLOCK_ADJUST(ccadj[i]),HOST_CCREG);
57871462 4209 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
4210 emit_writeword(ECX,(int)&last_count);
4211 emit_storereg(CCREG,HOST_CCREG);
4212 }
7139f3c8 4213 if(copr==12||copr==13) {
57871462 4214 assert(!is_delayslot);
4215 emit_readword((int)&pending_exception,14);
4216 }
4217 emit_loadreg(rs1[i],s);
4218 if(get_reg(i_regs->regmap,rs1[i]|64)>=0)
4219 emit_loadreg(rs1[i]|64,get_reg(i_regs->regmap,rs1[i]|64));
7139f3c8 4220 if(copr==12||copr==13) {
57871462 4221 emit_test(14,14);
4222 emit_jne((int)&do_interrupt);
4223 }
4224 cop1_usable=0;
4225 }
4226 else
4227 {
4228 assert(opcode2[i]==0x10);
3d624f89 4229#ifndef DISABLE_TLB
57871462 4230 if((source[i]&0x3f)==0x01) // TLBR
4231 emit_call((int)TLBR);
4232 if((source[i]&0x3f)==0x02) // TLBWI
4233 emit_call((int)TLBWI_new);
4234 if((source[i]&0x3f)==0x06) { // TLBWR
4235 // The TLB entry written by TLBWR is dependent on the count,
4236 // so update the cycle count
4237 emit_readword((int)&last_count,ECX);
4238 if(i_regs->regmap[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
4239 emit_add(HOST_CCREG,ECX,HOST_CCREG);
2573466a 4240 emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]),HOST_CCREG);
57871462 4241 emit_writeword(HOST_CCREG,(int)&Count);
4242 emit_call((int)TLBWR_new);
4243 }
4244 if((source[i]&0x3f)==0x08) // TLBP
4245 emit_call((int)TLBP);
3d624f89 4246#endif
576bbd8f 4247#ifdef PCSX
4248 if((source[i]&0x3f)==0x10) // RFE
4249 {
4250 emit_readword((int)&Status,0);
4251 emit_andimm(0,0x3c,1);
4252 emit_andimm(0,~0xf,0);
4253 emit_orrshr_imm(1,2,0);
4254 emit_writeword(0,(int)&Status);
4255 }
4256#else
57871462 4257 if((source[i]&0x3f)==0x18) // ERET
4258 {
4259 int count=ccadj[i];
4260 if(i_regs->regmap[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
2573466a 4261 emit_addimm(HOST_CCREG,CLOCK_ADJUST(count),HOST_CCREG); // TODO: Should there be an extra cycle here?
57871462 4262 emit_jmp((int)jump_eret);
4263 }
576bbd8f 4264#endif
57871462 4265 }
4266}
4267
b9b61529 4268static void cop2_get_dreg(u_int copr,signed char tl,signed char temp)
4269{
4270 switch (copr) {
4271 case 1:
4272 case 3:
4273 case 5:
4274 case 8:
4275 case 9:
4276 case 10:
4277 case 11:
4278 emit_readword((int)&reg_cop2d[copr],tl);
4279 emit_signextend16(tl,tl);
4280 emit_writeword(tl,(int)&reg_cop2d[copr]); // hmh
4281 break;
4282 case 7:
4283 case 16:
4284 case 17:
4285 case 18:
4286 case 19:
4287 emit_readword((int)&reg_cop2d[copr],tl);
4288 emit_andimm(tl,0xffff,tl);
4289 emit_writeword(tl,(int)&reg_cop2d[copr]);
4290 break;
4291 case 15:
4292 emit_readword((int)&reg_cop2d[14],tl); // SXY2
4293 emit_writeword(tl,(int)&reg_cop2d[copr]);
4294 break;
4295 case 28:
b9b61529 4296 case 29:
4297 emit_readword((int)&reg_cop2d[9],temp);
4298 emit_testimm(temp,0x8000); // do we need this?
4299 emit_andimm(temp,0xf80,temp);
4300 emit_andne_imm(temp,0,temp);
f70d384d 4301 emit_shrimm(temp,7,tl);
b9b61529 4302 emit_readword((int)&reg_cop2d[10],temp);
4303 emit_testimm(temp,0x8000);
4304 emit_andimm(temp,0xf80,temp);
4305 emit_andne_imm(temp,0,temp);
f70d384d 4306 emit_orrshr_imm(temp,2,tl);
b9b61529 4307 emit_readword((int)&reg_cop2d[11],temp);
4308 emit_testimm(temp,0x8000);
4309 emit_andimm(temp,0xf80,temp);
4310 emit_andne_imm(temp,0,temp);
f70d384d 4311 emit_orrshl_imm(temp,3,tl);
b9b61529 4312 emit_writeword(tl,(int)&reg_cop2d[copr]);
4313 break;
4314 default:
4315 emit_readword((int)&reg_cop2d[copr],tl);
4316 break;
4317 }
4318}
4319
4320static void cop2_put_dreg(u_int copr,signed char sl,signed char temp)
4321{
4322 switch (copr) {
4323 case 15:
4324 emit_readword((int)&reg_cop2d[13],temp); // SXY1
4325 emit_writeword(sl,(int)&reg_cop2d[copr]);
4326 emit_writeword(temp,(int)&reg_cop2d[12]); // SXY0
4327 emit_readword((int)&reg_cop2d[14],temp); // SXY2
4328 emit_writeword(sl,(int)&reg_cop2d[14]);
4329 emit_writeword(temp,(int)&reg_cop2d[13]); // SXY1
4330 break;
4331 case 28:
4332 emit_andimm(sl,0x001f,temp);
f70d384d 4333 emit_shlimm(temp,7,temp);
b9b61529 4334 emit_writeword(temp,(int)&reg_cop2d[9]);
4335 emit_andimm(sl,0x03e0,temp);
f70d384d 4336 emit_shlimm(temp,2,temp);
b9b61529 4337 emit_writeword(temp,(int)&reg_cop2d[10]);
4338 emit_andimm(sl,0x7c00,temp);
f70d384d 4339 emit_shrimm(temp,3,temp);
b9b61529 4340 emit_writeword(temp,(int)&reg_cop2d[11]);
4341 emit_writeword(sl,(int)&reg_cop2d[28]);
4342 break;
4343 case 30:
4344 emit_movs(sl,temp);
4345 emit_mvnmi(temp,temp);
4346 emit_clz(temp,temp);
4347 emit_writeword(sl,(int)&reg_cop2d[30]);
4348 emit_writeword(temp,(int)&reg_cop2d[31]);
4349 break;
b9b61529 4350 case 31:
4351 break;
4352 default:
4353 emit_writeword(sl,(int)&reg_cop2d[copr]);
4354 break;
4355 }
4356}
4357
4358void cop2_assemble(int i,struct regstat *i_regs)
4359{
4360 u_int copr=(source[i]>>11)&0x1f;
4361 signed char temp=get_reg(i_regs->regmap,-1);
4362 if (opcode2[i]==0) { // MFC2
4363 signed char tl=get_reg(i_regs->regmap,rt1[i]);
f1b3b369 4364 if(tl>=0&&rt1[i]!=0)
b9b61529 4365 cop2_get_dreg(copr,tl,temp);
4366 }
4367 else if (opcode2[i]==4) { // MTC2
4368 signed char sl=get_reg(i_regs->regmap,rs1[i]);
4369 cop2_put_dreg(copr,sl,temp);
4370 }
4371 else if (opcode2[i]==2) // CFC2
4372 {
4373 signed char tl=get_reg(i_regs->regmap,rt1[i]);
f1b3b369 4374 if(tl>=0&&rt1[i]!=0)
b9b61529 4375 emit_readword((int)&reg_cop2c[copr],tl);
4376 }
4377 else if (opcode2[i]==6) // CTC2
4378 {
4379 signed char sl=get_reg(i_regs->regmap,rs1[i]);
4380 switch(copr) {
4381 case 4:
4382 case 12:
4383 case 20:
4384 case 26:
4385 case 27:
4386 case 29:
4387 case 30:
4388 emit_signextend16(sl,temp);
4389 break;
4390 case 31:
4391 //value = value & 0x7ffff000;
4392 //if (value & 0x7f87e000) value |= 0x80000000;
4393 emit_shrimm(sl,12,temp);
4394 emit_shlimm(temp,12,temp);
4395 emit_testimm(temp,0x7f000000);
4396 emit_testeqimm(temp,0x00870000);
4397 emit_testeqimm(temp,0x0000e000);
4398 emit_orrne_imm(temp,0x80000000,temp);
4399 break;
4400 default:
4401 temp=sl;
4402 break;
4403 }
4404 emit_writeword(temp,(int)&reg_cop2c[copr]);
4405 assert(sl>=0);
4406 }
4407}
4408
054175e9 4409static void c2op_prologue(u_int op,u_int reglist)
4410{
4411 save_regs_all(reglist);
82ed88eb 4412#ifdef PCNT
4413 emit_movimm(op,0);
4414 emit_call((int)pcnt_gte_start);
4415#endif
054175e9 4416 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0); // cop2 regs
4417}
4418
4419static void c2op_epilogue(u_int op,u_int reglist)
4420{
82ed88eb 4421#ifdef PCNT
4422 emit_movimm(op,0);
4423 emit_call((int)pcnt_gte_end);
4424#endif
054175e9 4425 restore_regs_all(reglist);
4426}
4427
4428static void c2op_assemble(int i,struct regstat *i_regs)
b9b61529 4429{
4430 signed char temp=get_reg(i_regs->regmap,-1);
4431 u_int c2op=source[i]&0x3f;
4432 u_int hr,reglist=0;
054175e9 4433 int need_flags,need_ir;
b9b61529 4434 for(hr=0;hr<HOST_REGS;hr++) {
4435 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
4436 }
b9b61529 4437
4438 if (gte_handlers[c2op]!=NULL) {
bedfea38 4439 need_flags=!(gte_unneeded[i+1]>>63); // +1 because of how liveness detection works
054175e9 4440 need_ir=(gte_unneeded[i+1]&0xe00)!=0xe00;
4441 assem_debug("gte unneeded %016llx, need_flags %d, need_ir %d\n",
4442 gte_unneeded[i+1],need_flags,need_ir);
bedfea38 4443#ifdef ARMv5_ONLY
4444 // let's take more risk here
4445 need_flags=need_flags&&gte_reads_flags;
4446#endif
054175e9 4447 switch(c2op) {
4448 case GTE_MVMVA: {
4449 int shift = (source[i] >> 19) & 1;
4450 int v = (source[i] >> 15) & 3;
4451 int cv = (source[i] >> 13) & 3;
4452 int mx = (source[i] >> 17) & 3;
4453 int lm = (source[i] >> 10) & 1;
4454 reglist&=0x10ff; // +{r4-r7}
4455 c2op_prologue(c2op,reglist);
4456 /* r4,r5 = VXYZ(v) packed; r6 = &MX11(mx); r7 = &CV1(cv) */
4457 if(v<3)
4458 emit_ldrd(v*8,0,4);
4459 else {
4460 emit_movzwl_indexed(9*4,0,4); // gteIR
4461 emit_movzwl_indexed(10*4,0,6);
4462 emit_movzwl_indexed(11*4,0,5);
4463 emit_orrshl_imm(6,16,4);
4464 }
4465 if(mx<3)
4466 emit_addimm(0,32*4+mx*8*4,6);
4467 else
4468 emit_readword((int)&zeromem_ptr,6);
4469 if(cv<3)
4470 emit_addimm(0,32*4+(cv*8+5)*4,7);
4471 else
4472 emit_readword((int)&zeromem_ptr,7);
4473#ifdef __ARM_NEON__
4474 emit_movimm(source[i],1); // opcode
4475 emit_call((int)gteMVMVA_part_neon);
4476 if(need_flags) {
4477 emit_movimm(lm,1);
4478 emit_call((int)gteMACtoIR_flags_neon);
4479 }
4480#else
4481 if(cv==3&&shift)
4482 emit_call((int)gteMVMVA_part_cv3sh12_arm);
4483 else {
4484 emit_movimm(shift,1);
4485 emit_call((int)(need_flags?gteMVMVA_part_arm:gteMVMVA_part_nf_arm));
4486 }
4487 if(need_flags||need_ir) {
4488 if(need_flags)
4489 emit_call((int)(lm?gteMACtoIR_lm1:gteMACtoIR_lm0));
4490 else
4491 emit_call((int)(lm?gteMACtoIR_lm1_nf:gteMACtoIR_lm0_nf)); // lm0 borked
4492 }
4493#endif
4494 break;
4495 }
b9b61529 4496
054175e9 4497 default:
4498 reglist&=0x100f;
4499 c2op_prologue(c2op,reglist);
4500 emit_movimm(source[i],1); // opcode
4501 emit_writeword(1,(int)&psxRegs.code);
4502 emit_call((int)(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]));
4503 break;
4504 }
4505 c2op_epilogue(c2op,reglist);
4506 }
b9b61529 4507}
4508
4509void cop1_unusable(int i,struct regstat *i_regs)
3d624f89 4510{
4511 // XXX: should just just do the exception instead
4512 if(!cop1_usable) {
4513 int jaddr=(int)out;
4514 emit_jmp(0);
4515 add_stub(FP_STUB,jaddr,(int)out,i,0,(int)i_regs,is_delayslot,0);
4516 cop1_usable=1;
4517 }
4518}
4519
57871462 4520void cop1_assemble(int i,struct regstat *i_regs)
4521{
3d624f89 4522#ifndef DISABLE_COP1
57871462 4523 // Check cop1 unusable
4524 if(!cop1_usable) {
4525 signed char rs=get_reg(i_regs->regmap,CSREG);
4526 assert(rs>=0);
4527 emit_testimm(rs,0x20000000);
4528 int jaddr=(int)out;
4529 emit_jeq(0);
4530 add_stub(FP_STUB,jaddr,(int)out,i,rs,(int)i_regs,is_delayslot,0);
4531 cop1_usable=1;
4532 }
4533 if (opcode2[i]==0) { // MFC1
4534 signed char tl=get_reg(i_regs->regmap,rt1[i]);
4535 if(tl>=0) {
4536 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],tl);
4537 emit_readword_indexed(0,tl,tl);
4538 }
4539 }
4540 else if (opcode2[i]==1) { // DMFC1
4541 signed char tl=get_reg(i_regs->regmap,rt1[i]);
4542 signed char th=get_reg(i_regs->regmap,rt1[i]|64);
4543 if(tl>=0) {
4544 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],tl);
4545 if(th>=0) emit_readword_indexed(4,tl,th);
4546 emit_readword_indexed(0,tl,tl);
4547 }
4548 }
4549 else if (opcode2[i]==4) { // MTC1
4550 signed char sl=get_reg(i_regs->regmap,rs1[i]);
4551 signed char temp=get_reg(i_regs->regmap,-1);
4552 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4553 emit_writeword_indexed(sl,0,temp);
4554 }
4555 else if (opcode2[i]==5) { // DMTC1
4556 signed char sl=get_reg(i_regs->regmap,rs1[i]);
4557 signed char sh=rs1[i]>0?get_reg(i_regs->regmap,rs1[i]|64):sl;
4558 signed char temp=get_reg(i_regs->regmap,-1);
4559 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
4560 emit_writeword_indexed(sh,4,temp);
4561 emit_writeword_indexed(sl,0,temp);
4562 }
4563 else if (opcode2[i]==2) // CFC1
4564 {
4565 signed char tl=get_reg(i_regs->regmap,rt1[i]);
4566 if(tl>=0) {
4567 u_int copr=(source[i]>>11)&0x1f;
4568 if(copr==0) emit_readword((int)&FCR0,tl);
4569 if(copr==31) emit_readword((int)&FCR31,tl);
4570 }
4571 }
4572 else if (opcode2[i]==6) // CTC1
4573 {
4574 signed char sl=get_reg(i_regs->regmap,rs1[i]);
4575 u_int copr=(source[i]>>11)&0x1f;
4576 assert(sl>=0);
4577 if(copr==31)
4578 {
4579 emit_writeword(sl,(int)&FCR31);
4580 // Set the rounding mode
4581 //FIXME
4582 //char temp=get_reg(i_regs->regmap,-1);
4583 //emit_andimm(sl,3,temp);
4584 //emit_fldcw_indexed((int)&rounding_modes,temp);
4585 }
4586 }
3d624f89 4587#else
4588 cop1_unusable(i, i_regs);
4589#endif
57871462 4590}
4591
4592void fconv_assemble_arm(int i,struct regstat *i_regs)
4593{
3d624f89 4594#ifndef DISABLE_COP1
57871462 4595 signed char temp=get_reg(i_regs->regmap,-1);
4596 assert(temp>=0);
4597 // Check cop1 unusable
4598 if(!cop1_usable) {
4599 signed char rs=get_reg(i_regs->regmap,CSREG);
4600 assert(rs>=0);
4601 emit_testimm(rs,0x20000000);
4602 int jaddr=(int)out;
4603 emit_jeq(0);
4604 add_stub(FP_STUB,jaddr,(int)out,i,rs,(int)i_regs,is_delayslot,0);
4605 cop1_usable=1;
4606 }
4607
4608 #if(defined(__VFP_FP__) && !defined(__SOFTFP__))
4609 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0d) { // trunc_w_s
4610 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4611 emit_flds(temp,15);
4612 emit_ftosizs(15,15); // float->int, truncate
4613 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f))
4614 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
4615 emit_fsts(15,temp);
4616 return;
4617 }
4618 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0d) { // trunc_w_d
4619 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
4620 emit_vldr(temp,7);
4621 emit_ftosizd(7,13); // double->int, truncate
4622 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
4623 emit_fsts(13,temp);
4624 return;
4625 }
4626
4627 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x20) { // cvt_s_w
4628 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4629 emit_flds(temp,13);
4630 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f))
4631 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
4632 emit_fsitos(13,15);
4633 emit_fsts(15,temp);
4634 return;
4635 }
4636 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x21) { // cvt_d_w
4637 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4638 emit_flds(temp,13);
4639 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
4640 emit_fsitod(13,7);
4641 emit_vstr(7,temp);
4642 return;
4643 }
4644
4645 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x21) { // cvt_d_s
4646 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4647 emit_flds(temp,13);
4648 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
4649 emit_fcvtds(13,7);
4650 emit_vstr(7,temp);
4651 return;
4652 }
4653 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x20) { // cvt_s_d
4654 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
4655 emit_vldr(temp,7);
4656 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
4657 emit_fcvtsd(7,13);
4658 emit_fsts(13,temp);
4659 return;
4660 }
4661 #endif
4662
4663 // C emulation code
4664
4665 u_int hr,reglist=0;
4666 for(hr=0;hr<HOST_REGS;hr++) {
4667 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
4668 }
4669 save_regs(reglist);
4670
4671 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x20) {
4672 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4673 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4674 emit_call((int)cvt_s_w);
4675 }
4676 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x21) {
4677 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4678 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4679 emit_call((int)cvt_d_w);
4680 }
4681 if(opcode2[i]==0x15&&(source[i]&0x3f)==0x20) {
4682 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4683 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4684 emit_call((int)cvt_s_l);
4685 }
4686 if(opcode2[i]==0x15&&(source[i]&0x3f)==0x21) {
4687 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4688 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4689 emit_call((int)cvt_d_l);
4690 }
4691
4692 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x21) {
4693 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4694 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4695 emit_call((int)cvt_d_s);
4696 }
4697 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x24) {
4698 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4699 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4700 emit_call((int)cvt_w_s);
4701 }
4702 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x25) {
4703 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4704 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4705 emit_call((int)cvt_l_s);
4706 }
4707
4708 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x20) {
4709 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4710 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4711 emit_call((int)cvt_s_d);
4712 }
4713 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x24) {
4714 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4715 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4716 emit_call((int)cvt_w_d);
4717 }
4718 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x25) {
4719 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4720 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4721 emit_call((int)cvt_l_d);
4722 }
4723
4724 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x08) {
4725 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4726 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4727 emit_call((int)round_l_s);
4728 }
4729 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x09) {
4730 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4731 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4732 emit_call((int)trunc_l_s);
4733 }
4734 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0a) {
4735 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4736 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4737 emit_call((int)ceil_l_s);
4738 }
4739 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0b) {
4740 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4741 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4742 emit_call((int)floor_l_s);
4743 }
4744 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0c) {
4745 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4746 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4747 emit_call((int)round_w_s);
4748 }
4749 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0d) {
4750 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4751 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4752 emit_call((int)trunc_w_s);
4753 }
4754 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0e) {
4755 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4756 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4757 emit_call((int)ceil_w_s);
4758 }
4759 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0f) {
4760 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4761 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4762 emit_call((int)floor_w_s);
4763 }
4764
4765 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x08) {
4766 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4767 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4768 emit_call((int)round_l_d);
4769 }
4770 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x09) {
4771 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4772 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4773 emit_call((int)trunc_l_d);
4774 }
4775 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0a) {
4776 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4777 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4778 emit_call((int)ceil_l_d);
4779 }
4780 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0b) {
4781 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4782 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4783 emit_call((int)floor_l_d);
4784 }
4785 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0c) {
4786 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4787 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4788 emit_call((int)round_w_d);
4789 }
4790 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0d) {
4791 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4792 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4793 emit_call((int)trunc_w_d);
4794 }
4795 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0e) {
4796 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4797 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4798 emit_call((int)ceil_w_d);
4799 }
4800 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0f) {
4801 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4802 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4803 emit_call((int)floor_w_d);
4804 }
4805
4806 restore_regs(reglist);
3d624f89 4807#else
4808 cop1_unusable(i, i_regs);
4809#endif
57871462 4810}
4811#define fconv_assemble fconv_assemble_arm
4812
4813void fcomp_assemble(int i,struct regstat *i_regs)
4814{
3d624f89 4815#ifndef DISABLE_COP1
57871462 4816 signed char fs=get_reg(i_regs->regmap,FSREG);
4817 signed char temp=get_reg(i_regs->regmap,-1);
4818 assert(temp>=0);
4819 // Check cop1 unusable
4820 if(!cop1_usable) {
4821 signed char cs=get_reg(i_regs->regmap,CSREG);
4822 assert(cs>=0);
4823 emit_testimm(cs,0x20000000);
4824 int jaddr=(int)out;
4825 emit_jeq(0);
4826 add_stub(FP_STUB,jaddr,(int)out,i,cs,(int)i_regs,is_delayslot,0);
4827 cop1_usable=1;
4828 }
4829
4830 if((source[i]&0x3f)==0x30) {
4831 emit_andimm(fs,~0x800000,fs);
4832 return;
4833 }
4834
4835 if((source[i]&0x3e)==0x38) {
4836 // sf/ngle - these should throw exceptions for NaNs
4837 emit_andimm(fs,~0x800000,fs);
4838 return;
4839 }
4840
4841 #if(defined(__VFP_FP__) && !defined(__SOFTFP__))
4842 if(opcode2[i]==0x10) {
4843 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4844 emit_readword((int)&reg_cop1_simple[(source[i]>>16)&0x1f],HOST_TEMPREG);
4845 emit_orimm(fs,0x800000,fs);
4846 emit_flds(temp,14);
4847 emit_flds(HOST_TEMPREG,15);
4848 emit_fcmps(14,15);
4849 emit_fmstat();
4850 if((source[i]&0x3f)==0x31) emit_bicvc_imm(fs,0x800000,fs); // c_un_s
4851 if((source[i]&0x3f)==0x32) emit_bicne_imm(fs,0x800000,fs); // c_eq_s
4852 if((source[i]&0x3f)==0x33) {emit_bicne_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ueq_s
4853 if((source[i]&0x3f)==0x34) emit_biccs_imm(fs,0x800000,fs); // c_olt_s
4854 if((source[i]&0x3f)==0x35) {emit_biccs_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ult_s
4855 if((source[i]&0x3f)==0x36) emit_bichi_imm(fs,0x800000,fs); // c_ole_s
4856 if((source[i]&0x3f)==0x37) {emit_bichi_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ule_s
4857 if((source[i]&0x3f)==0x3a) emit_bicne_imm(fs,0x800000,fs); // c_seq_s
4858 if((source[i]&0x3f)==0x3b) emit_bicne_imm(fs,0x800000,fs); // c_ngl_s
4859 if((source[i]&0x3f)==0x3c) emit_biccs_imm(fs,0x800000,fs); // c_lt_s
4860 if((source[i]&0x3f)==0x3d) emit_biccs_imm(fs,0x800000,fs); // c_nge_s
4861 if((source[i]&0x3f)==0x3e) emit_bichi_imm(fs,0x800000,fs); // c_le_s
4862 if((source[i]&0x3f)==0x3f) emit_bichi_imm(fs,0x800000,fs); // c_ngt_s
4863 return;
4864 }
4865 if(opcode2[i]==0x11) {
4866 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
4867 emit_readword((int)&reg_cop1_double[(source[i]>>16)&0x1f],HOST_TEMPREG);
4868 emit_orimm(fs,0x800000,fs);
4869 emit_vldr(temp,6);
4870 emit_vldr(HOST_TEMPREG,7);
4871 emit_fcmpd(6,7);
4872 emit_fmstat();
4873 if((source[i]&0x3f)==0x31) emit_bicvc_imm(fs,0x800000,fs); // c_un_d
4874 if((source[i]&0x3f)==0x32) emit_bicne_imm(fs,0x800000,fs); // c_eq_d
4875 if((source[i]&0x3f)==0x33) {emit_bicne_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ueq_d
4876 if((source[i]&0x3f)==0x34) emit_biccs_imm(fs,0x800000,fs); // c_olt_d
4877 if((source[i]&0x3f)==0x35) {emit_biccs_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ult_d
4878 if((source[i]&0x3f)==0x36) emit_bichi_imm(fs,0x800000,fs); // c_ole_d
4879 if((source[i]&0x3f)==0x37) {emit_bichi_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ule_d
4880 if((source[i]&0x3f)==0x3a) emit_bicne_imm(fs,0x800000,fs); // c_seq_d
4881 if((source[i]&0x3f)==0x3b) emit_bicne_imm(fs,0x800000,fs); // c_ngl_d
4882 if((source[i]&0x3f)==0x3c) emit_biccs_imm(fs,0x800000,fs); // c_lt_d
4883 if((source[i]&0x3f)==0x3d) emit_biccs_imm(fs,0x800000,fs); // c_nge_d
4884 if((source[i]&0x3f)==0x3e) emit_bichi_imm(fs,0x800000,fs); // c_le_d
4885 if((source[i]&0x3f)==0x3f) emit_bichi_imm(fs,0x800000,fs); // c_ngt_d
4886 return;
4887 }
4888 #endif
4889
4890 // C only
4891
4892 u_int hr,reglist=0;
4893 for(hr=0;hr<HOST_REGS;hr++) {
4894 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
4895 }
4896 reglist&=~(1<<fs);
4897 save_regs(reglist);
4898 if(opcode2[i]==0x10) {
4899 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4900 emit_readword((int)&reg_cop1_simple[(source[i]>>16)&0x1f],ARG2_REG);
4901 if((source[i]&0x3f)==0x30) emit_call((int)c_f_s);
4902 if((source[i]&0x3f)==0x31) emit_call((int)c_un_s);
4903 if((source[i]&0x3f)==0x32) emit_call((int)c_eq_s);
4904 if((source[i]&0x3f)==0x33) emit_call((int)c_ueq_s);
4905 if((source[i]&0x3f)==0x34) emit_call((int)c_olt_s);
4906 if((source[i]&0x3f)==0x35) emit_call((int)c_ult_s);
4907 if((source[i]&0x3f)==0x36) emit_call((int)c_ole_s);
4908 if((source[i]&0x3f)==0x37) emit_call((int)c_ule_s);
4909 if((source[i]&0x3f)==0x38) emit_call((int)c_sf_s);
4910 if((source[i]&0x3f)==0x39) emit_call((int)c_ngle_s);
4911 if((source[i]&0x3f)==0x3a) emit_call((int)c_seq_s);
4912 if((source[i]&0x3f)==0x3b) emit_call((int)c_ngl_s);
4913 if((source[i]&0x3f)==0x3c) emit_call((int)c_lt_s);
4914 if((source[i]&0x3f)==0x3d) emit_call((int)c_nge_s);
4915 if((source[i]&0x3f)==0x3e) emit_call((int)c_le_s);
4916 if((source[i]&0x3f)==0x3f) emit_call((int)c_ngt_s);
4917 }
4918 if(opcode2[i]==0x11) {
4919 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4920 emit_readword((int)&reg_cop1_double[(source[i]>>16)&0x1f],ARG2_REG);
4921 if((source[i]&0x3f)==0x30) emit_call((int)c_f_d);
4922 if((source[i]&0x3f)==0x31) emit_call((int)c_un_d);
4923 if((source[i]&0x3f)==0x32) emit_call((int)c_eq_d);
4924 if((source[i]&0x3f)==0x33) emit_call((int)c_ueq_d);
4925 if((source[i]&0x3f)==0x34) emit_call((int)c_olt_d);
4926 if((source[i]&0x3f)==0x35) emit_call((int)c_ult_d);
4927 if((source[i]&0x3f)==0x36) emit_call((int)c_ole_d);
4928 if((source[i]&0x3f)==0x37) emit_call((int)c_ule_d);
4929 if((source[i]&0x3f)==0x38) emit_call((int)c_sf_d);
4930 if((source[i]&0x3f)==0x39) emit_call((int)c_ngle_d);
4931 if((source[i]&0x3f)==0x3a) emit_call((int)c_seq_d);
4932 if((source[i]&0x3f)==0x3b) emit_call((int)c_ngl_d);
4933 if((source[i]&0x3f)==0x3c) emit_call((int)c_lt_d);
4934 if((source[i]&0x3f)==0x3d) emit_call((int)c_nge_d);
4935 if((source[i]&0x3f)==0x3e) emit_call((int)c_le_d);
4936 if((source[i]&0x3f)==0x3f) emit_call((int)c_ngt_d);
4937 }
4938 restore_regs(reglist);
4939 emit_loadreg(FSREG,fs);
3d624f89 4940#else
4941 cop1_unusable(i, i_regs);
4942#endif
57871462 4943}
4944
4945void float_assemble(int i,struct regstat *i_regs)
4946{
3d624f89 4947#ifndef DISABLE_COP1
57871462 4948 signed char temp=get_reg(i_regs->regmap,-1);
4949 assert(temp>=0);
4950 // Check cop1 unusable
4951 if(!cop1_usable) {
4952 signed char cs=get_reg(i_regs->regmap,CSREG);
4953 assert(cs>=0);
4954 emit_testimm(cs,0x20000000);
4955 int jaddr=(int)out;
4956 emit_jeq(0);
4957 add_stub(FP_STUB,jaddr,(int)out,i,cs,(int)i_regs,is_delayslot,0);
4958 cop1_usable=1;
4959 }
4960
4961 #if(defined(__VFP_FP__) && !defined(__SOFTFP__))
4962 if((source[i]&0x3f)==6) // mov
4963 {
4964 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4965 if(opcode2[i]==0x10) {
4966 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4967 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],HOST_TEMPREG);
4968 emit_readword_indexed(0,temp,temp);
4969 emit_writeword_indexed(temp,0,HOST_TEMPREG);
4970 }
4971 if(opcode2[i]==0x11) {
4972 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
4973 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],HOST_TEMPREG);
4974 emit_vldr(temp,7);
4975 emit_vstr(7,HOST_TEMPREG);
4976 }
4977 }
4978 return;
4979 }
4980
4981 if((source[i]&0x3f)>3)
4982 {
4983 if(opcode2[i]==0x10) {
4984 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4985 emit_flds(temp,15);
4986 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4987 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
4988 }
4989 if((source[i]&0x3f)==4) // sqrt
4990 emit_fsqrts(15,15);
4991 if((source[i]&0x3f)==5) // abs
4992 emit_fabss(15,15);
4993 if((source[i]&0x3f)==7) // neg
4994 emit_fnegs(15,15);
4995 emit_fsts(15,temp);
4996 }
4997 if(opcode2[i]==0x11) {
4998 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
4999 emit_vldr(temp,7);
5000 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
5001 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
5002 }
5003 if((source[i]&0x3f)==4) // sqrt
5004 emit_fsqrtd(7,7);
5005 if((source[i]&0x3f)==5) // abs
5006 emit_fabsd(7,7);
5007 if((source[i]&0x3f)==7) // neg
5008 emit_fnegd(7,7);
5009 emit_vstr(7,temp);
5010 }
5011 return;
5012 }
5013 if((source[i]&0x3f)<4)
5014 {
5015 if(opcode2[i]==0x10) {
5016 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
5017 }
5018 if(opcode2[i]==0x11) {
5019 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
5020 }
5021 if(((source[i]>>11)&0x1f)!=((source[i]>>16)&0x1f)) {
5022 if(opcode2[i]==0x10) {
5023 emit_readword((int)&reg_cop1_simple[(source[i]>>16)&0x1f],HOST_TEMPREG);
5024 emit_flds(temp,15);
5025 emit_flds(HOST_TEMPREG,13);
5026 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
5027 if(((source[i]>>16)&0x1f)!=((source[i]>>6)&0x1f)) {
5028 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
5029 }
5030 }
5031 if((source[i]&0x3f)==0) emit_fadds(15,13,15);
5032 if((source[i]&0x3f)==1) emit_fsubs(15,13,15);
5033 if((source[i]&0x3f)==2) emit_fmuls(15,13,15);
5034 if((source[i]&0x3f)==3) emit_fdivs(15,13,15);
5035 if(((source[i]>>16)&0x1f)==((source[i]>>6)&0x1f)) {
5036 emit_fsts(15,HOST_TEMPREG);
5037 }else{
5038 emit_fsts(15,temp);
5039 }
5040 }
5041 else if(opcode2[i]==0x11) {
5042 emit_readword((int)&reg_cop1_double[(source[i]>>16)&0x1f],HOST_TEMPREG);
5043 emit_vldr(temp,7);
5044 emit_vldr(HOST_TEMPREG,6);
5045 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
5046 if(((source[i]>>16)&0x1f)!=((source[i]>>6)&0x1f)) {
5047 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
5048 }
5049 }
5050 if((source[i]&0x3f)==0) emit_faddd(7,6,7);
5051 if((source[i]&0x3f)==1) emit_fsubd(7,6,7);
5052 if((source[i]&0x3f)==2) emit_fmuld(7,6,7);
5053 if((source[i]&0x3f)==3) emit_fdivd(7,6,7);
5054 if(((source[i]>>16)&0x1f)==((source[i]>>6)&0x1f)) {
5055 emit_vstr(7,HOST_TEMPREG);
5056 }else{
5057 emit_vstr(7,temp);
5058 }
5059 }
5060 }
5061 else {
5062 if(opcode2[i]==0x10) {
5063 emit_flds(temp,15);
5064 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
5065 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
5066 }
5067 if((source[i]&0x3f)==0) emit_fadds(15,15,15);
5068 if((source[i]&0x3f)==1) emit_fsubs(15,15,15);
5069 if((source[i]&0x3f)==2) emit_fmuls(15,15,15);
5070 if((source[i]&0x3f)==3) emit_fdivs(15,15,15);
5071 emit_fsts(15,temp);
5072 }
5073 else if(opcode2[i]==0x11) {
5074 emit_vldr(temp,7);
5075 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
5076 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
5077 }
5078 if((source[i]&0x3f)==0) emit_faddd(7,7,7);
5079 if((source[i]&0x3f)==1) emit_fsubd(7,7,7);
5080 if((source[i]&0x3f)==2) emit_fmuld(7,7,7);
5081 if((source[i]&0x3f)==3) emit_fdivd(7,7,7);
5082 emit_vstr(7,temp);
5083 }
5084 }
5085 return;
5086 }
5087 #endif
5088
5089 u_int hr,reglist=0;
5090 for(hr=0;hr<HOST_REGS;hr++) {
5091 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
5092 }
5093 if(opcode2[i]==0x10) { // Single precision
5094 save_regs(reglist);
5095 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
5096 if((source[i]&0x3f)<4) {
5097 emit_readword((int)&reg_cop1_simple[(source[i]>>16)&0x1f],ARG2_REG);
5098 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG3_REG);
5099 }else{
5100 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
5101 }
5102 switch(source[i]&0x3f)
5103 {
5104 case 0x00: emit_call((int)add_s);break;
5105 case 0x01: emit_call((int)sub_s);break;
5106 case 0x02: emit_call((int)mul_s);break;
5107 case 0x03: emit_call((int)div_s);break;
5108 case 0x04: emit_call((int)sqrt_s);break;
5109 case 0x05: emit_call((int)abs_s);break;
5110 case 0x06: emit_call((int)mov_s);break;
5111 case 0x07: emit_call((int)neg_s);break;
5112 }
5113 restore_regs(reglist);
5114 }
5115 if(opcode2[i]==0x11) { // Double precision
5116 save_regs(reglist);
5117 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
5118 if((source[i]&0x3f)<4) {
5119 emit_readword((int)&reg_cop1_double[(source[i]>>16)&0x1f],ARG2_REG);
5120 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG3_REG);
5121 }else{
5122 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
5123 }
5124 switch(source[i]&0x3f)
5125 {
5126 case 0x00: emit_call((int)add_d);break;
5127 case 0x01: emit_call((int)sub_d);break;
5128 case 0x02: emit_call((int)mul_d);break;
5129 case 0x03: emit_call((int)div_d);break;
5130 case 0x04: emit_call((int)sqrt_d);break;
5131 case 0x05: emit_call((int)abs_d);break;
5132 case 0x06: emit_call((int)mov_d);break;
5133 case 0x07: emit_call((int)neg_d);break;
5134 }
5135 restore_regs(reglist);
5136 }
3d624f89 5137#else
5138 cop1_unusable(i, i_regs);
5139#endif
57871462 5140}
5141
5142void multdiv_assemble_arm(int i,struct regstat *i_regs)
5143{
5144 // case 0x18: MULT
5145 // case 0x19: MULTU
5146 // case 0x1A: DIV
5147 // case 0x1B: DIVU
5148 // case 0x1C: DMULT
5149 // case 0x1D: DMULTU
5150 // case 0x1E: DDIV
5151 // case 0x1F: DDIVU
5152 if(rs1[i]&&rs2[i])
5153 {
5154 if((opcode2[i]&4)==0) // 32-bit
5155 {
5156 if(opcode2[i]==0x18) // MULT
5157 {
5158 signed char m1=get_reg(i_regs->regmap,rs1[i]);
5159 signed char m2=get_reg(i_regs->regmap,rs2[i]);
5160 signed char hi=get_reg(i_regs->regmap,HIREG);
5161 signed char lo=get_reg(i_regs->regmap,LOREG);
5162 assert(m1>=0);
5163 assert(m2>=0);
5164 assert(hi>=0);
5165 assert(lo>=0);
5166 emit_smull(m1,m2,hi,lo);
5167 }
5168 if(opcode2[i]==0x19) // MULTU
5169 {
5170 signed char m1=get_reg(i_regs->regmap,rs1[i]);
5171 signed char m2=get_reg(i_regs->regmap,rs2[i]);
5172 signed char hi=get_reg(i_regs->regmap,HIREG);
5173 signed char lo=get_reg(i_regs->regmap,LOREG);
5174 assert(m1>=0);
5175 assert(m2>=0);
5176 assert(hi>=0);
5177 assert(lo>=0);
5178 emit_umull(m1,m2,hi,lo);
5179 }
5180 if(opcode2[i]==0x1A) // DIV
5181 {
5182 signed char d1=get_reg(i_regs->regmap,rs1[i]);
5183 signed char d2=get_reg(i_regs->regmap,rs2[i]);
5184 assert(d1>=0);
5185 assert(d2>=0);
5186 signed char quotient=get_reg(i_regs->regmap,LOREG);
5187 signed char remainder=get_reg(i_regs->regmap,HIREG);
5188 assert(quotient>=0);
5189 assert(remainder>=0);
5190 emit_movs(d1,remainder);
44a80f6a 5191 emit_movimm(0xffffffff,quotient);
5192 emit_negmi(quotient,quotient); // .. quotient and ..
5193 emit_negmi(remainder,remainder); // .. remainder for div0 case (will be negated back after jump)
57871462 5194 emit_movs(d2,HOST_TEMPREG);
5195 emit_jeq((int)out+52); // Division by zero
5196 emit_negmi(HOST_TEMPREG,HOST_TEMPREG);
5197 emit_clz(HOST_TEMPREG,quotient);
5198 emit_shl(HOST_TEMPREG,quotient,HOST_TEMPREG);
5199 emit_orimm(quotient,1<<31,quotient);
5200 emit_shr(quotient,quotient,quotient);
5201 emit_cmp(remainder,HOST_TEMPREG);
5202 emit_subcs(remainder,HOST_TEMPREG,remainder);
5203 emit_adcs(quotient,quotient,quotient);
5204 emit_shrimm(HOST_TEMPREG,1,HOST_TEMPREG);
5205 emit_jcc((int)out-16); // -4
5206 emit_teq(d1,d2);
5207 emit_negmi(quotient,quotient);
5208 emit_test(d1,d1);
5209 emit_negmi(remainder,remainder);
5210 }
5211 if(opcode2[i]==0x1B) // DIVU
5212 {
5213 signed char d1=get_reg(i_regs->regmap,rs1[i]); // dividend
5214 signed char d2=get_reg(i_regs->regmap,rs2[i]); // divisor
5215 assert(d1>=0);
5216 assert(d2>=0);
5217 signed char quotient=get_reg(i_regs->regmap,LOREG);
5218 signed char remainder=get_reg(i_regs->regmap,HIREG);
5219 assert(quotient>=0);
5220 assert(remainder>=0);
44a80f6a 5221 emit_mov(d1,remainder);
5222 emit_movimm(0xffffffff,quotient); // div0 case
57871462 5223 emit_test(d2,d2);
44a80f6a 5224 emit_jeq((int)out+40); // Division by zero
57871462 5225 emit_clz(d2,HOST_TEMPREG);
5226 emit_movimm(1<<31,quotient);
5227 emit_shl(d2,HOST_TEMPREG,d2);
57871462 5228 emit_shr(quotient,HOST_TEMPREG,quotient);
5229 emit_cmp(remainder,d2);
5230 emit_subcs(remainder,d2,remainder);
5231 emit_adcs(quotient,quotient,quotient);
5232 emit_shrcc_imm(d2,1,d2);
5233 emit_jcc((int)out-16); // -4
5234 }
5235 }
5236 else // 64-bit
4600ba03 5237#ifndef FORCE32
57871462 5238 {
5239 if(opcode2[i]==0x1C) // DMULT
5240 {
5241 assert(opcode2[i]!=0x1C);
5242 signed char m1h=get_reg(i_regs->regmap,rs1[i]|64);
5243 signed char m1l=get_reg(i_regs->regmap,rs1[i]);
5244 signed char m2h=get_reg(i_regs->regmap,rs2[i]|64);
5245 signed char m2l=get_reg(i_regs->regmap,rs2[i]);
5246 assert(m1h>=0);
5247 assert(m2h>=0);
5248 assert(m1l>=0);
5249 assert(m2l>=0);
5250 emit_pushreg(m2h);
5251 emit_pushreg(m2l);
5252 emit_pushreg(m1h);
5253 emit_pushreg(m1l);
5254 emit_call((int)&mult64);
5255 emit_popreg(m1l);
5256 emit_popreg(m1h);
5257 emit_popreg(m2l);
5258 emit_popreg(m2h);
5259 signed char hih=get_reg(i_regs->regmap,HIREG|64);
5260 signed char hil=get_reg(i_regs->regmap,HIREG);
5261 if(hih>=0) emit_loadreg(HIREG|64,hih);
5262 if(hil>=0) emit_loadreg(HIREG,hil);
5263 signed char loh=get_reg(i_regs->regmap,LOREG|64);
5264 signed char lol=get_reg(i_regs->regmap,LOREG);
5265 if(loh>=0) emit_loadreg(LOREG|64,loh);
5266 if(lol>=0) emit_loadreg(LOREG,lol);
5267 }
5268 if(opcode2[i]==0x1D) // DMULTU
5269 {
5270 signed char m1h=get_reg(i_regs->regmap,rs1[i]|64);
5271 signed char m1l=get_reg(i_regs->regmap,rs1[i]);
5272 signed char m2h=get_reg(i_regs->regmap,rs2[i]|64);
5273 signed char m2l=get_reg(i_regs->regmap,rs2[i]);
5274 assert(m1h>=0);
5275 assert(m2h>=0);
5276 assert(m1l>=0);
5277 assert(m2l>=0);
5278 save_regs(0x100f);
5279 if(m1l!=0) emit_mov(m1l,0);
5280 if(m1h==0) emit_readword((int)&dynarec_local,1);
5281 else if(m1h>1) emit_mov(m1h,1);
5282 if(m2l<2) emit_readword((int)&dynarec_local+m2l*4,2);
5283 else if(m2l>2) emit_mov(m2l,2);
5284 if(m2h<3) emit_readword((int)&dynarec_local+m2h*4,3);
5285 else if(m2h>3) emit_mov(m2h,3);
5286 emit_call((int)&multu64);
5287 restore_regs(0x100f);
5288 signed char hih=get_reg(i_regs->regmap,HIREG|64);
5289 signed char hil=get_reg(i_regs->regmap,HIREG);
5290 signed char loh=get_reg(i_regs->regmap,LOREG|64);
5291 signed char lol=get_reg(i_regs->regmap,LOREG);
5292 /*signed char temp=get_reg(i_regs->regmap,-1);
5293 signed char rh=get_reg(i_regs->regmap,HIREG|64);
5294 signed char rl=get_reg(i_regs->regmap,HIREG);
5295 assert(m1h>=0);
5296 assert(m2h>=0);
5297 assert(m1l>=0);
5298 assert(m2l>=0);
5299 assert(temp>=0);
5300 //emit_mov(m1l,EAX);
5301 //emit_mul(m2l);
5302 emit_umull(rl,rh,m1l,m2l);
5303 emit_storereg(LOREG,rl);
5304 emit_mov(rh,temp);
5305 //emit_mov(m1h,EAX);
5306 //emit_mul(m2l);
5307 emit_umull(rl,rh,m1h,m2l);
5308 emit_adds(rl,temp,temp);
5309 emit_adcimm(rh,0,rh);
5310 emit_storereg(HIREG,rh);
5311 //emit_mov(m2h,EAX);
5312 //emit_mul(m1l);
5313 emit_umull(rl,rh,m1l,m2h);
5314 emit_adds(rl,temp,temp);
5315 emit_adcimm(rh,0,rh);
5316 emit_storereg(LOREG|64,temp);
5317 emit_mov(rh,temp);
5318 //emit_mov(m2h,EAX);
5319 //emit_mul(m1h);
5320 emit_umull(rl,rh,m1h,m2h);
5321 emit_adds(rl,temp,rl);
5322 emit_loadreg(HIREG,temp);
5323 emit_adcimm(rh,0,rh);
5324 emit_adds(rl,temp,rl);
5325 emit_adcimm(rh,0,rh);
5326 // DEBUG
5327 /*
5328 emit_pushreg(m2h);
5329 emit_pushreg(m2l);
5330 emit_pushreg(m1h);
5331 emit_pushreg(m1l);
5332 emit_call((int)&multu64);
5333 emit_popreg(m1l);
5334 emit_popreg(m1h);
5335 emit_popreg(m2l);
5336 emit_popreg(m2h);
5337 signed char hih=get_reg(i_regs->regmap,HIREG|64);
5338 signed char hil=get_reg(i_regs->regmap,HIREG);
5339 if(hih>=0) emit_loadreg(HIREG|64,hih); // DEBUG
5340 if(hil>=0) emit_loadreg(HIREG,hil); // DEBUG
5341 */
5342 // Shouldn't be necessary
5343 //char loh=get_reg(i_regs->regmap,LOREG|64);
5344 //char lol=get_reg(i_regs->regmap,LOREG);
5345 //if(loh>=0) emit_loadreg(LOREG|64,loh);
5346 //if(lol>=0) emit_loadreg(LOREG,lol);
5347 }
5348 if(opcode2[i]==0x1E) // DDIV
5349 {
5350 signed char d1h=get_reg(i_regs->regmap,rs1[i]|64);
5351 signed char d1l=get_reg(i_regs->regmap,rs1[i]);
5352 signed char d2h=get_reg(i_regs->regmap,rs2[i]|64);
5353 signed char d2l=get_reg(i_regs->regmap,rs2[i]);
5354 assert(d1h>=0);
5355 assert(d2h>=0);
5356 assert(d1l>=0);
5357 assert(d2l>=0);
5358 save_regs(0x100f);
5359 if(d1l!=0) emit_mov(d1l,0);
5360 if(d1h==0) emit_readword((int)&dynarec_local,1);
5361 else if(d1h>1) emit_mov(d1h,1);
5362 if(d2l<2) emit_readword((int)&dynarec_local+d2l*4,2);
5363 else if(d2l>2) emit_mov(d2l,2);
5364 if(d2h<3) emit_readword((int)&dynarec_local+d2h*4,3);
5365 else if(d2h>3) emit_mov(d2h,3);
5366 emit_call((int)&div64);
5367 restore_regs(0x100f);
5368 signed char hih=get_reg(i_regs->regmap,HIREG|64);
5369 signed char hil=get_reg(i_regs->regmap,HIREG);
5370 signed char loh=get_reg(i_regs->regmap,LOREG|64);
5371 signed char lol=get_reg(i_regs->regmap,LOREG);
5372 if(hih>=0) emit_loadreg(HIREG|64,hih);
5373 if(hil>=0) emit_loadreg(HIREG,hil);
5374 if(loh>=0) emit_loadreg(LOREG|64,loh);
5375 if(lol>=0) emit_loadreg(LOREG,lol);
5376 }
5377 if(opcode2[i]==0x1F) // DDIVU
5378 {
5379 //u_int hr,reglist=0;
5380 //for(hr=0;hr<HOST_REGS;hr++) {
5381 // if(i_regs->regmap[hr]>=0 && (i_regs->regmap[hr]&62)!=HIREG) reglist|=1<<hr;
5382 //}
5383 signed char d1h=get_reg(i_regs->regmap,rs1[i]|64);
5384 signed char d1l=get_reg(i_regs->regmap,rs1[i]);
5385 signed char d2h=get_reg(i_regs->regmap,rs2[i]|64);
5386 signed char d2l=get_reg(i_regs->regmap,rs2[i]);
5387 assert(d1h>=0);
5388 assert(d2h>=0);
5389 assert(d1l>=0);
5390 assert(d2l>=0);
5391 save_regs(0x100f);
5392 if(d1l!=0) emit_mov(d1l,0);
5393 if(d1h==0) emit_readword((int)&dynarec_local,1);
5394 else if(d1h>1) emit_mov(d1h,1);
5395 if(d2l<2) emit_readword((int)&dynarec_local+d2l*4,2);
5396 else if(d2l>2) emit_mov(d2l,2);
5397 if(d2h<3) emit_readword((int)&dynarec_local+d2h*4,3);
5398 else if(d2h>3) emit_mov(d2h,3);
5399 emit_call((int)&divu64);
5400 restore_regs(0x100f);
5401 signed char hih=get_reg(i_regs->regmap,HIREG|64);
5402 signed char hil=get_reg(i_regs->regmap,HIREG);
5403 signed char loh=get_reg(i_regs->regmap,LOREG|64);
5404 signed char lol=get_reg(i_regs->regmap,LOREG);
5405 if(hih>=0) emit_loadreg(HIREG|64,hih);
5406 if(hil>=0) emit_loadreg(HIREG,hil);
5407 if(loh>=0) emit_loadreg(LOREG|64,loh);
5408 if(lol>=0) emit_loadreg(LOREG,lol);
5409 }
5410 }
4600ba03 5411#else
5412 assert(0);
5413#endif
57871462 5414 }
5415 else
5416 {
5417 // Multiply by zero is zero.
5418 // MIPS does not have a divide by zero exception.
5419 // The result is undefined, we return zero.
5420 signed char hr=get_reg(i_regs->regmap,HIREG);
5421 signed char lr=get_reg(i_regs->regmap,LOREG);
5422 if(hr>=0) emit_zeroreg(hr);
5423 if(lr>=0) emit_zeroreg(lr);
5424 }
5425}
5426#define multdiv_assemble multdiv_assemble_arm
5427
5428void do_preload_rhash(int r) {
5429 // Don't need this for ARM. On x86, this puts the value 0xf8 into the
5430 // register. On ARM the hash can be done with a single instruction (below)
5431}
5432
5433void do_preload_rhtbl(int ht) {
5434 emit_addimm(FP,(int)&mini_ht-(int)&dynarec_local,ht);
5435}
5436
5437void do_rhash(int rs,int rh) {
5438 emit_andimm(rs,0xf8,rh);
5439}
5440
5441void do_miniht_load(int ht,int rh) {
5442 assem_debug("ldr %s,[%s,%s]!\n",regname[rh],regname[ht],regname[rh]);
5443 output_w32(0xe7b00000|rd_rn_rm(rh,ht,rh));
5444}
5445
5446void do_miniht_jump(int rs,int rh,int ht) {
5447 emit_cmp(rh,rs);
5448 emit_ldreq_indexed(ht,4,15);
5449 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
5450 emit_mov(rs,7);
5451 emit_jmp(jump_vaddr_reg[7]);
5452 #else
5453 emit_jmp(jump_vaddr_reg[rs]);
5454 #endif
5455}
5456
5457void do_miniht_insert(u_int return_address,int rt,int temp) {
5458 #ifdef ARMv5_ONLY
5459 emit_movimm(return_address,rt); // PC into link register
5460 add_to_linker((int)out,return_address,1);
5461 emit_pcreladdr(temp);
5462 emit_writeword(rt,(int)&mini_ht[(return_address&0xFF)>>3][0]);
5463 emit_writeword(temp,(int)&mini_ht[(return_address&0xFF)>>3][1]);
5464 #else
5465 emit_movw(return_address&0x0000FFFF,rt);
5466 add_to_linker((int)out,return_address,1);
5467 emit_pcreladdr(temp);
5468 emit_writeword(temp,(int)&mini_ht[(return_address&0xFF)>>3][1]);
5469 emit_movt(return_address&0xFFFF0000,rt);
5470 emit_writeword(rt,(int)&mini_ht[(return_address&0xFF)>>3][0]);
5471 #endif
5472}
5473
5474// Sign-extend to 64 bits and write out upper half of a register
5475// This is useful where we have a 32-bit value in a register, and want to
5476// keep it in a 32-bit register, but can't guarantee that it won't be read
5477// as a 64-bit value later.
5478void wb_sx(signed char pre[],signed char entry[],uint64_t dirty,uint64_t is32_pre,uint64_t is32,uint64_t u,uint64_t uu)
5479{
24385cae 5480#ifndef FORCE32
57871462 5481 if(is32_pre==is32) return;
5482 int hr,reg;
5483 for(hr=0;hr<HOST_REGS;hr++) {
5484 if(hr!=EXCLUDE_REG) {
5485 //if(pre[hr]==entry[hr]) {
5486 if((reg=pre[hr])>=0) {
5487 if((dirty>>hr)&1) {
5488 if( ((is32_pre&~is32&~uu)>>reg)&1 ) {
5489 emit_sarimm(hr,31,HOST_TEMPREG);
5490 emit_storereg(reg|64,HOST_TEMPREG);
5491 }
5492 }
5493 }
5494 //}
5495 }
5496 }
24385cae 5497#endif
57871462 5498}
5499
5500void wb_valid(signed char pre[],signed char entry[],u_int dirty_pre,u_int dirty,uint64_t is32_pre,uint64_t u,uint64_t uu)
5501{
5502 //if(dirty_pre==dirty) return;
5503 int hr,reg,new_hr;
5504 for(hr=0;hr<HOST_REGS;hr++) {
5505 if(hr!=EXCLUDE_REG) {
5506 reg=pre[hr];
5507 if(((~u)>>(reg&63))&1) {
f776eb14 5508 if(reg>0) {
57871462 5509 if(((dirty_pre&~dirty)>>hr)&1) {
5510 if(reg>0&&reg<34) {
5511 emit_storereg(reg,hr);
5512 if( ((is32_pre&~uu)>>reg)&1 ) {
5513 emit_sarimm(hr,31,HOST_TEMPREG);
5514 emit_storereg(reg|64,HOST_TEMPREG);
5515 }
5516 }
5517 else if(reg>=64) {
5518 emit_storereg(reg,hr);
5519 }
5520 }
5521 }
57871462 5522 }
5523 }
5524 }
5525}
5526
5527
5528/* using strd could possibly help but you'd have to allocate registers in pairs
5529void wb_invalidate_arm(signed char pre[],signed char entry[],uint64_t dirty,uint64_t is32,uint64_t u,uint64_t uu)
5530{
5531 int hr;
5532 int wrote=-1;
5533 for(hr=HOST_REGS-1;hr>=0;hr--) {
5534 if(hr!=EXCLUDE_REG) {
5535 if(pre[hr]!=entry[hr]) {
5536 if(pre[hr]>=0) {
5537 if((dirty>>hr)&1) {
5538 if(get_reg(entry,pre[hr])<0) {
5539 if(pre[hr]<64) {
5540 if(!((u>>pre[hr])&1)) {
5541 if(hr<10&&(~hr&1)&&(pre[hr+1]<0||wrote==hr+1)) {
5542 if( ((is32>>pre[hr])&1) && !((uu>>pre[hr])&1) ) {
5543 emit_sarimm(hr,31,hr+1);
5544 emit_strdreg(pre[hr],hr);
5545 }
5546 else
5547 emit_storereg(pre[hr],hr);
5548 }else{
5549 emit_storereg(pre[hr],hr);
5550 if( ((is32>>pre[hr])&1) && !((uu>>pre[hr])&1) ) {
5551 emit_sarimm(hr,31,hr);
5552 emit_storereg(pre[hr]|64,hr);
5553 }
5554 }
5555 }
5556 }else{
5557 if(!((uu>>(pre[hr]&63))&1) && !((is32>>(pre[hr]&63))&1)) {
5558 emit_storereg(pre[hr],hr);
5559 }
5560 }
5561 wrote=hr;
5562 }
5563 }
5564 }
5565 }
5566 }
5567 }
5568 for(hr=0;hr<HOST_REGS;hr++) {
5569 if(hr!=EXCLUDE_REG) {
5570 if(pre[hr]!=entry[hr]) {
5571 if(pre[hr]>=0) {
5572 int nr;
5573 if((nr=get_reg(entry,pre[hr]))>=0) {
5574 emit_mov(hr,nr);
5575 }
5576 }
5577 }
5578 }
5579 }
5580}
5581#define wb_invalidate wb_invalidate_arm
5582*/
5583
dd3a91a1 5584// Clearing the cache is rather slow on ARM Linux, so mark the areas
5585// that need to be cleared, and then only clear these areas once.
5586void do_clear_cache()
5587{
5588 int i,j;
5589 for (i=0;i<(1<<(TARGET_SIZE_2-17));i++)
5590 {
5591 u_int bitmap=needs_clear_cache[i];
5592 if(bitmap) {
5593 u_int start,end;
5594 for(j=0;j<32;j++)
5595 {
5596 if(bitmap&(1<<j)) {
5597 start=BASE_ADDR+i*131072+j*4096;
5598 end=start+4095;
5599 j++;
5600 while(j<32) {
5601 if(bitmap&(1<<j)) {
5602 end+=4096;
5603 j++;
5604 }else{
5605 __clear_cache((void *)start,(void *)end);
5606 break;
5607 }
5608 }
5609 }
5610 }
5611 needs_clear_cache[i]=0;
5612 }
5613 }
5614}
5615
57871462 5616// CPU-architecture-specific initialization
5617void arch_init() {
3d624f89 5618#ifndef DISABLE_COP1
57871462 5619 rounding_modes[0]=0x0<<22; // round
5620 rounding_modes[1]=0x3<<22; // trunc
5621 rounding_modes[2]=0x1<<22; // ceil
5622 rounding_modes[3]=0x2<<22; // floor
3d624f89 5623#endif
57871462 5624}
b9b61529 5625
5626// vim:shiftwidth=2:expandtab