drc: implement literal dupe merging
[pcsx_rearmed.git] / libpcsxcore / new_dynarec / assem_arm.c
CommitLineData
57871462 1/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
c6c3b1b3 2 * Mupen64plus/PCSX - assem_arm.c *
20d507ba 3 * Copyright (C) 2009-2011 Ari64 *
c6c3b1b3 4 * Copyright (C) 2010-2011 GraÅžvydas "notaz" Ignotas *
57871462 5 * *
6 * This program is free software; you can redistribute it and/or modify *
7 * it under the terms of the GNU General Public License as published by *
8 * the Free Software Foundation; either version 2 of the License, or *
9 * (at your option) any later version. *
10 * *
11 * This program is distributed in the hope that it will be useful, *
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
14 * GNU General Public License for more details. *
15 * *
16 * You should have received a copy of the GNU General Public License *
17 * along with this program; if not, write to the *
18 * Free Software Foundation, Inc., *
19 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
20 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
21
22extern int cycle_count;
23extern int last_count;
24extern int pcaddr;
25extern int pending_exception;
26extern int branch_target;
27extern uint64_t readmem_dword;
3d624f89 28#ifdef MUPEN64
57871462 29extern precomp_instr fake_pc;
3d624f89 30#endif
57871462 31extern void *dynarec_local;
32extern u_int memory_map[1048576];
33extern u_int mini_ht[32][2];
34extern u_int rounding_modes[4];
35
36void indirect_jump_indexed();
37void indirect_jump();
38void do_interrupt();
39void jump_vaddr_r0();
40void jump_vaddr_r1();
41void jump_vaddr_r2();
42void jump_vaddr_r3();
43void jump_vaddr_r4();
44void jump_vaddr_r5();
45void jump_vaddr_r6();
46void jump_vaddr_r7();
47void jump_vaddr_r8();
48void jump_vaddr_r9();
49void jump_vaddr_r10();
50void jump_vaddr_r12();
51
52const u_int jump_vaddr_reg[16] = {
53 (int)jump_vaddr_r0,
54 (int)jump_vaddr_r1,
55 (int)jump_vaddr_r2,
56 (int)jump_vaddr_r3,
57 (int)jump_vaddr_r4,
58 (int)jump_vaddr_r5,
59 (int)jump_vaddr_r6,
60 (int)jump_vaddr_r7,
61 (int)jump_vaddr_r8,
62 (int)jump_vaddr_r9,
63 (int)jump_vaddr_r10,
64 0,
65 (int)jump_vaddr_r12,
66 0,
67 0,
68 0};
69
0bbd1454 70void invalidate_addr_r0();
71void invalidate_addr_r1();
72void invalidate_addr_r2();
73void invalidate_addr_r3();
74void invalidate_addr_r4();
75void invalidate_addr_r5();
76void invalidate_addr_r6();
77void invalidate_addr_r7();
78void invalidate_addr_r8();
79void invalidate_addr_r9();
80void invalidate_addr_r10();
81void invalidate_addr_r12();
82
83const u_int invalidate_addr_reg[16] = {
84 (int)invalidate_addr_r0,
85 (int)invalidate_addr_r1,
86 (int)invalidate_addr_r2,
87 (int)invalidate_addr_r3,
88 (int)invalidate_addr_r4,
89 (int)invalidate_addr_r5,
90 (int)invalidate_addr_r6,
91 (int)invalidate_addr_r7,
92 (int)invalidate_addr_r8,
93 (int)invalidate_addr_r9,
94 (int)invalidate_addr_r10,
95 0,
96 (int)invalidate_addr_r12,
97 0,
98 0,
99 0};
100
57871462 101#include "fpu.h"
102
dd3a91a1 103unsigned int needs_clear_cache[1<<(TARGET_SIZE_2-17)];
104
57871462 105/* Linker */
106
107void set_jump_target(int addr,u_int target)
108{
109 u_char *ptr=(u_char *)addr;
110 u_int *ptr2=(u_int *)ptr;
111 if(ptr[3]==0xe2) {
112 assert((target-(u_int)ptr2-8)<1024);
113 assert((addr&3)==0);
114 assert((target&3)==0);
115 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
116 //printf("target=%x addr=%x insn=%x\n",target,addr,*ptr2);
117 }
118 else if(ptr[3]==0x72) {
119 // generated by emit_jno_unlikely
120 if((target-(u_int)ptr2-8)<1024) {
121 assert((addr&3)==0);
122 assert((target&3)==0);
123 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
124 }
125 else if((target-(u_int)ptr2-8)<4096&&!((target-(u_int)ptr2-8)&15)) {
126 assert((addr&3)==0);
127 assert((target&3)==0);
128 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>4)|0xE00;
129 }
130 else *ptr2=(0x7A000000)|(((target-(u_int)ptr2-8)<<6)>>8);
131 }
132 else {
133 assert((ptr[3]&0x0e)==0xa);
134 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
135 }
136}
137
138// This optionally copies the instruction from the target of the branch into
139// the space before the branch. Works, but the difference in speed is
140// usually insignificant.
141void set_jump_target_fillslot(int addr,u_int target,int copy)
142{
143 u_char *ptr=(u_char *)addr;
144 u_int *ptr2=(u_int *)ptr;
145 assert(!copy||ptr2[-1]==0xe28dd000);
146 if(ptr[3]==0xe2) {
147 assert(!copy);
148 assert((target-(u_int)ptr2-8)<4096);
149 *ptr2=(*ptr2&0xFFFFF000)|(target-(u_int)ptr2-8);
150 }
151 else {
152 assert((ptr[3]&0x0e)==0xa);
153 u_int target_insn=*(u_int *)target;
154 if((target_insn&0x0e100000)==0) { // ALU, no immediate, no flags
155 copy=0;
156 }
157 if((target_insn&0x0c100000)==0x04100000) { // Load
158 copy=0;
159 }
160 if(target_insn&0x08000000) {
161 copy=0;
162 }
163 if(copy) {
164 ptr2[-1]=target_insn;
165 target+=4;
166 }
167 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
168 }
169}
170
171/* Literal pool */
172add_literal(int addr,int val)
173{
15776b68 174 assert(literalcount<sizeof(literals)/sizeof(literals[0]));
57871462 175 literals[literalcount][0]=addr;
176 literals[literalcount][1]=val;
177 literalcount++;
178}
179
f76eeef9 180void *kill_pointer(void *stub)
57871462 181{
182 int *ptr=(int *)(stub+4);
183 assert((*ptr&0x0ff00000)==0x05900000);
184 u_int offset=*ptr&0xfff;
185 int **l_ptr=(void *)ptr+offset+8;
186 int *i_ptr=*l_ptr;
187 set_jump_target((int)i_ptr,(int)stub);
f76eeef9 188 return i_ptr;
57871462 189}
190
f968d35d 191// find where external branch is liked to using addr of it's stub:
192// get address that insn one after stub loads (dyna_linker arg1),
193// treat it as a pointer to branch insn,
194// return addr where that branch jumps to
57871462 195int get_pointer(void *stub)
196{
197 //printf("get_pointer(%x)\n",(int)stub);
198 int *ptr=(int *)(stub+4);
f968d35d 199 assert((*ptr&0x0fff0000)==0x059f0000);
57871462 200 u_int offset=*ptr&0xfff;
201 int **l_ptr=(void *)ptr+offset+8;
202 int *i_ptr=*l_ptr;
203 assert((*i_ptr&0x0f000000)==0x0a000000);
204 return (int)i_ptr+((*i_ptr<<8)>>6)+8;
205}
206
207// Find the "clean" entry point from a "dirty" entry point
208// by skipping past the call to verify_code
209u_int get_clean_addr(int addr)
210{
211 int *ptr=(int *)addr;
212 #ifdef ARMv5_ONLY
213 ptr+=4;
214 #else
215 ptr+=6;
216 #endif
217 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
218 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
219 ptr++;
220 if((*ptr&0xFF000000)==0xea000000) {
221 return (int)ptr+((*ptr<<8)>>6)+8; // follow jump
222 }
223 return (u_int)ptr;
224}
225
226int verify_dirty(int addr)
227{
228 u_int *ptr=(u_int *)addr;
229 #ifdef ARMv5_ONLY
230 // get from literal pool
15776b68 231 assert((*ptr&0xFFFF0000)==0xe59f0000);
57871462 232 u_int offset=*ptr&0xfff;
233 u_int *l_ptr=(void *)ptr+offset+8;
234 u_int source=l_ptr[0];
235 u_int copy=l_ptr[1];
236 u_int len=l_ptr[2];
237 ptr+=4;
238 #else
239 // ARMv7 movw/movt
240 assert((*ptr&0xFFF00000)==0xe3000000);
241 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
242 u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
243 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
244 ptr+=6;
245 #endif
246 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
247 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
63cb0298 248#ifndef DISABLE_TLB
cfcba99a 249 u_int verifier=(int)ptr+((signed int)(*ptr<<8)>>6)+8; // get target of bl
57871462 250 if(verifier==(u_int)verify_code_vm||verifier==(u_int)verify_code_ds) {
251 unsigned int page=source>>12;
252 unsigned int map_value=memory_map[page];
253 if(map_value>=0x80000000) return 0;
254 while(page<((source+len-1)>>12)) {
255 if((memory_map[++page]<<2)!=(map_value<<2)) return 0;
256 }
257 source = source+(map_value<<2);
258 }
63cb0298 259#endif
57871462 260 //printf("verify_dirty: %x %x %x\n",source,copy,len);
261 return !memcmp((void *)source,(void *)copy,len);
262}
263
264// This doesn't necessarily find all clean entry points, just
265// guarantees that it's not dirty
266int isclean(int addr)
267{
268 #ifdef ARMv5_ONLY
269 int *ptr=((u_int *)addr)+4;
270 #else
271 int *ptr=((u_int *)addr)+6;
272 #endif
273 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
274 if((*ptr&0xFF000000)!=0xeb000000) return 1; // bl instruction
275 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code) return 0;
276 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_vm) return 0;
277 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_ds) return 0;
278 return 1;
279}
280
281void get_bounds(int addr,u_int *start,u_int *end)
282{
283 u_int *ptr=(u_int *)addr;
284 #ifdef ARMv5_ONLY
285 // get from literal pool
15776b68 286 assert((*ptr&0xFFFF0000)==0xe59f0000);
57871462 287 u_int offset=*ptr&0xfff;
288 u_int *l_ptr=(void *)ptr+offset+8;
289 u_int source=l_ptr[0];
290 //u_int copy=l_ptr[1];
291 u_int len=l_ptr[2];
292 ptr+=4;
293 #else
294 // ARMv7 movw/movt
295 assert((*ptr&0xFFF00000)==0xe3000000);
296 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
297 //u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
298 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
299 ptr+=6;
300 #endif
301 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
302 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
63cb0298 303#ifndef DISABLE_TLB
cfcba99a 304 u_int verifier=(int)ptr+((signed int)(*ptr<<8)>>6)+8; // get target of bl
57871462 305 if(verifier==(u_int)verify_code_vm||verifier==(u_int)verify_code_ds) {
306 if(memory_map[source>>12]>=0x80000000) source = 0;
307 else source = source+(memory_map[source>>12]<<2);
308 }
63cb0298 309#endif
57871462 310 *start=source;
311 *end=source+len;
312}
313
314/* Register allocation */
315
316// Note: registers are allocated clean (unmodified state)
317// if you intend to modify the register, you must call dirty_reg().
318void alloc_reg(struct regstat *cur,int i,signed char reg)
319{
320 int r,hr;
321 int preferred_reg = (reg&7);
322 if(reg==CCREG) preferred_reg=HOST_CCREG;
323 if(reg==PTEMP||reg==FTEMP) preferred_reg=12;
324
325 // Don't allocate unused registers
326 if((cur->u>>reg)&1) return;
327
328 // see if it's already allocated
329 for(hr=0;hr<HOST_REGS;hr++)
330 {
331 if(cur->regmap[hr]==reg) return;
332 }
333
334 // Keep the same mapping if the register was already allocated in a loop
335 preferred_reg = loop_reg(i,reg,preferred_reg);
336
337 // Try to allocate the preferred register
338 if(cur->regmap[preferred_reg]==-1) {
339 cur->regmap[preferred_reg]=reg;
340 cur->dirty&=~(1<<preferred_reg);
341 cur->isconst&=~(1<<preferred_reg);
342 return;
343 }
344 r=cur->regmap[preferred_reg];
345 if(r<64&&((cur->u>>r)&1)) {
346 cur->regmap[preferred_reg]=reg;
347 cur->dirty&=~(1<<preferred_reg);
348 cur->isconst&=~(1<<preferred_reg);
349 return;
350 }
351 if(r>=64&&((cur->uu>>(r&63))&1)) {
352 cur->regmap[preferred_reg]=reg;
353 cur->dirty&=~(1<<preferred_reg);
354 cur->isconst&=~(1<<preferred_reg);
355 return;
356 }
357
358 // Clear any unneeded registers
359 // We try to keep the mapping consistent, if possible, because it
360 // makes branches easier (especially loops). So we try to allocate
361 // first (see above) before removing old mappings. If this is not
362 // possible then go ahead and clear out the registers that are no
363 // longer needed.
364 for(hr=0;hr<HOST_REGS;hr++)
365 {
366 r=cur->regmap[hr];
367 if(r>=0) {
368 if(r<64) {
369 if((cur->u>>r)&1) {cur->regmap[hr]=-1;break;}
370 }
371 else
372 {
373 if((cur->uu>>(r&63))&1) {cur->regmap[hr]=-1;break;}
374 }
375 }
376 }
377 // Try to allocate any available register, but prefer
378 // registers that have not been used recently.
379 if(i>0) {
380 for(hr=0;hr<HOST_REGS;hr++) {
381 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
382 if(regs[i-1].regmap[hr]!=rs1[i-1]&&regs[i-1].regmap[hr]!=rs2[i-1]&&regs[i-1].regmap[hr]!=rt1[i-1]&&regs[i-1].regmap[hr]!=rt2[i-1]) {
383 cur->regmap[hr]=reg;
384 cur->dirty&=~(1<<hr);
385 cur->isconst&=~(1<<hr);
386 return;
387 }
388 }
389 }
390 }
391 // Try to allocate any available register
392 for(hr=0;hr<HOST_REGS;hr++) {
393 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
394 cur->regmap[hr]=reg;
395 cur->dirty&=~(1<<hr);
396 cur->isconst&=~(1<<hr);
397 return;
398 }
399 }
400
401 // Ok, now we have to evict someone
402 // Pick a register we hopefully won't need soon
403 u_char hsn[MAXREG+1];
404 memset(hsn,10,sizeof(hsn));
405 int j;
406 lsn(hsn,i,&preferred_reg);
407 //printf("eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",cur->regmap[0],cur->regmap[1],cur->regmap[2],cur->regmap[3],cur->regmap[5],cur->regmap[6],cur->regmap[7]);
408 //printf("hsn(%x): %d %d %d %d %d %d %d\n",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
409 if(i>0) {
410 // Don't evict the cycle count at entry points, otherwise the entry
411 // stub will have to write it.
412 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
413 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
414 for(j=10;j>=3;j--)
415 {
416 // Alloc preferred register if available
417 if(hsn[r=cur->regmap[preferred_reg]&63]==j) {
418 for(hr=0;hr<HOST_REGS;hr++) {
419 // Evict both parts of a 64-bit register
420 if((cur->regmap[hr]&63)==r) {
421 cur->regmap[hr]=-1;
422 cur->dirty&=~(1<<hr);
423 cur->isconst&=~(1<<hr);
424 }
425 }
426 cur->regmap[preferred_reg]=reg;
427 return;
428 }
429 for(r=1;r<=MAXREG;r++)
430 {
431 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
432 for(hr=0;hr<HOST_REGS;hr++) {
433 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
434 if(cur->regmap[hr]==r+64) {
435 cur->regmap[hr]=reg;
436 cur->dirty&=~(1<<hr);
437 cur->isconst&=~(1<<hr);
438 return;
439 }
440 }
441 }
442 for(hr=0;hr<HOST_REGS;hr++) {
443 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
444 if(cur->regmap[hr]==r) {
445 cur->regmap[hr]=reg;
446 cur->dirty&=~(1<<hr);
447 cur->isconst&=~(1<<hr);
448 return;
449 }
450 }
451 }
452 }
453 }
454 }
455 }
456 for(j=10;j>=0;j--)
457 {
458 for(r=1;r<=MAXREG;r++)
459 {
460 if(hsn[r]==j) {
461 for(hr=0;hr<HOST_REGS;hr++) {
462 if(cur->regmap[hr]==r+64) {
463 cur->regmap[hr]=reg;
464 cur->dirty&=~(1<<hr);
465 cur->isconst&=~(1<<hr);
466 return;
467 }
468 }
469 for(hr=0;hr<HOST_REGS;hr++) {
470 if(cur->regmap[hr]==r) {
471 cur->regmap[hr]=reg;
472 cur->dirty&=~(1<<hr);
473 cur->isconst&=~(1<<hr);
474 return;
475 }
476 }
477 }
478 }
479 }
480 printf("This shouldn't happen (alloc_reg)");exit(1);
481}
482
483void alloc_reg64(struct regstat *cur,int i,signed char reg)
484{
485 int preferred_reg = 8+(reg&1);
486 int r,hr;
487
488 // allocate the lower 32 bits
489 alloc_reg(cur,i,reg);
490
491 // Don't allocate unused registers
492 if((cur->uu>>reg)&1) return;
493
494 // see if the upper half is already allocated
495 for(hr=0;hr<HOST_REGS;hr++)
496 {
497 if(cur->regmap[hr]==reg+64) return;
498 }
499
500 // Keep the same mapping if the register was already allocated in a loop
501 preferred_reg = loop_reg(i,reg,preferred_reg);
502
503 // Try to allocate the preferred register
504 if(cur->regmap[preferred_reg]==-1) {
505 cur->regmap[preferred_reg]=reg|64;
506 cur->dirty&=~(1<<preferred_reg);
507 cur->isconst&=~(1<<preferred_reg);
508 return;
509 }
510 r=cur->regmap[preferred_reg];
511 if(r<64&&((cur->u>>r)&1)) {
512 cur->regmap[preferred_reg]=reg|64;
513 cur->dirty&=~(1<<preferred_reg);
514 cur->isconst&=~(1<<preferred_reg);
515 return;
516 }
517 if(r>=64&&((cur->uu>>(r&63))&1)) {
518 cur->regmap[preferred_reg]=reg|64;
519 cur->dirty&=~(1<<preferred_reg);
520 cur->isconst&=~(1<<preferred_reg);
521 return;
522 }
523
524 // Clear any unneeded registers
525 // We try to keep the mapping consistent, if possible, because it
526 // makes branches easier (especially loops). So we try to allocate
527 // first (see above) before removing old mappings. If this is not
528 // possible then go ahead and clear out the registers that are no
529 // longer needed.
530 for(hr=HOST_REGS-1;hr>=0;hr--)
531 {
532 r=cur->regmap[hr];
533 if(r>=0) {
534 if(r<64) {
535 if((cur->u>>r)&1) {cur->regmap[hr]=-1;break;}
536 }
537 else
538 {
539 if((cur->uu>>(r&63))&1) {cur->regmap[hr]=-1;break;}
540 }
541 }
542 }
543 // Try to allocate any available register, but prefer
544 // registers that have not been used recently.
545 if(i>0) {
546 for(hr=0;hr<HOST_REGS;hr++) {
547 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
548 if(regs[i-1].regmap[hr]!=rs1[i-1]&&regs[i-1].regmap[hr]!=rs2[i-1]&&regs[i-1].regmap[hr]!=rt1[i-1]&&regs[i-1].regmap[hr]!=rt2[i-1]) {
549 cur->regmap[hr]=reg|64;
550 cur->dirty&=~(1<<hr);
551 cur->isconst&=~(1<<hr);
552 return;
553 }
554 }
555 }
556 }
557 // Try to allocate any available register
558 for(hr=0;hr<HOST_REGS;hr++) {
559 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
560 cur->regmap[hr]=reg|64;
561 cur->dirty&=~(1<<hr);
562 cur->isconst&=~(1<<hr);
563 return;
564 }
565 }
566
567 // Ok, now we have to evict someone
568 // Pick a register we hopefully won't need soon
569 u_char hsn[MAXREG+1];
570 memset(hsn,10,sizeof(hsn));
571 int j;
572 lsn(hsn,i,&preferred_reg);
573 //printf("eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",cur->regmap[0],cur->regmap[1],cur->regmap[2],cur->regmap[3],cur->regmap[5],cur->regmap[6],cur->regmap[7]);
574 //printf("hsn(%x): %d %d %d %d %d %d %d\n",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
575 if(i>0) {
576 // Don't evict the cycle count at entry points, otherwise the entry
577 // stub will have to write it.
578 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
579 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
580 for(j=10;j>=3;j--)
581 {
582 // Alloc preferred register if available
583 if(hsn[r=cur->regmap[preferred_reg]&63]==j) {
584 for(hr=0;hr<HOST_REGS;hr++) {
585 // Evict both parts of a 64-bit register
586 if((cur->regmap[hr]&63)==r) {
587 cur->regmap[hr]=-1;
588 cur->dirty&=~(1<<hr);
589 cur->isconst&=~(1<<hr);
590 }
591 }
592 cur->regmap[preferred_reg]=reg|64;
593 return;
594 }
595 for(r=1;r<=MAXREG;r++)
596 {
597 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
598 for(hr=0;hr<HOST_REGS;hr++) {
599 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
600 if(cur->regmap[hr]==r+64) {
601 cur->regmap[hr]=reg|64;
602 cur->dirty&=~(1<<hr);
603 cur->isconst&=~(1<<hr);
604 return;
605 }
606 }
607 }
608 for(hr=0;hr<HOST_REGS;hr++) {
609 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
610 if(cur->regmap[hr]==r) {
611 cur->regmap[hr]=reg|64;
612 cur->dirty&=~(1<<hr);
613 cur->isconst&=~(1<<hr);
614 return;
615 }
616 }
617 }
618 }
619 }
620 }
621 }
622 for(j=10;j>=0;j--)
623 {
624 for(r=1;r<=MAXREG;r++)
625 {
626 if(hsn[r]==j) {
627 for(hr=0;hr<HOST_REGS;hr++) {
628 if(cur->regmap[hr]==r+64) {
629 cur->regmap[hr]=reg|64;
630 cur->dirty&=~(1<<hr);
631 cur->isconst&=~(1<<hr);
632 return;
633 }
634 }
635 for(hr=0;hr<HOST_REGS;hr++) {
636 if(cur->regmap[hr]==r) {
637 cur->regmap[hr]=reg|64;
638 cur->dirty&=~(1<<hr);
639 cur->isconst&=~(1<<hr);
640 return;
641 }
642 }
643 }
644 }
645 }
646 printf("This shouldn't happen");exit(1);
647}
648
649// Allocate a temporary register. This is done without regard to
650// dirty status or whether the register we request is on the unneeded list
651// Note: This will only allocate one register, even if called multiple times
652void alloc_reg_temp(struct regstat *cur,int i,signed char reg)
653{
654 int r,hr;
655 int preferred_reg = -1;
656
657 // see if it's already allocated
658 for(hr=0;hr<HOST_REGS;hr++)
659 {
660 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==reg) return;
661 }
662
663 // Try to allocate any available register
664 for(hr=HOST_REGS-1;hr>=0;hr--) {
665 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
666 cur->regmap[hr]=reg;
667 cur->dirty&=~(1<<hr);
668 cur->isconst&=~(1<<hr);
669 return;
670 }
671 }
672
673 // Find an unneeded register
674 for(hr=HOST_REGS-1;hr>=0;hr--)
675 {
676 r=cur->regmap[hr];
677 if(r>=0) {
678 if(r<64) {
679 if((cur->u>>r)&1) {
680 if(i==0||((unneeded_reg[i-1]>>r)&1)) {
681 cur->regmap[hr]=reg;
682 cur->dirty&=~(1<<hr);
683 cur->isconst&=~(1<<hr);
684 return;
685 }
686 }
687 }
688 else
689 {
690 if((cur->uu>>(r&63))&1) {
691 if(i==0||((unneeded_reg_upper[i-1]>>(r&63))&1)) {
692 cur->regmap[hr]=reg;
693 cur->dirty&=~(1<<hr);
694 cur->isconst&=~(1<<hr);
695 return;
696 }
697 }
698 }
699 }
700 }
701
702 // Ok, now we have to evict someone
703 // Pick a register we hopefully won't need soon
704 // TODO: we might want to follow unconditional jumps here
705 // TODO: get rid of dupe code and make this into a function
706 u_char hsn[MAXREG+1];
707 memset(hsn,10,sizeof(hsn));
708 int j;
709 lsn(hsn,i,&preferred_reg);
710 //printf("hsn: %d %d %d %d %d %d %d\n",hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
711 if(i>0) {
712 // Don't evict the cycle count at entry points, otherwise the entry
713 // stub will have to write it.
714 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
715 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
716 for(j=10;j>=3;j--)
717 {
718 for(r=1;r<=MAXREG;r++)
719 {
720 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
721 for(hr=0;hr<HOST_REGS;hr++) {
722 if(hr!=HOST_CCREG||hsn[CCREG]>2) {
723 if(cur->regmap[hr]==r+64) {
724 cur->regmap[hr]=reg;
725 cur->dirty&=~(1<<hr);
726 cur->isconst&=~(1<<hr);
727 return;
728 }
729 }
730 }
731 for(hr=0;hr<HOST_REGS;hr++) {
732 if(hr!=HOST_CCREG||hsn[CCREG]>2) {
733 if(cur->regmap[hr]==r) {
734 cur->regmap[hr]=reg;
735 cur->dirty&=~(1<<hr);
736 cur->isconst&=~(1<<hr);
737 return;
738 }
739 }
740 }
741 }
742 }
743 }
744 }
745 for(j=10;j>=0;j--)
746 {
747 for(r=1;r<=MAXREG;r++)
748 {
749 if(hsn[r]==j) {
750 for(hr=0;hr<HOST_REGS;hr++) {
751 if(cur->regmap[hr]==r+64) {
752 cur->regmap[hr]=reg;
753 cur->dirty&=~(1<<hr);
754 cur->isconst&=~(1<<hr);
755 return;
756 }
757 }
758 for(hr=0;hr<HOST_REGS;hr++) {
759 if(cur->regmap[hr]==r) {
760 cur->regmap[hr]=reg;
761 cur->dirty&=~(1<<hr);
762 cur->isconst&=~(1<<hr);
763 return;
764 }
765 }
766 }
767 }
768 }
769 printf("This shouldn't happen");exit(1);
770}
771// Allocate a specific ARM register.
772void alloc_arm_reg(struct regstat *cur,int i,signed char reg,char hr)
773{
774 int n;
f776eb14 775 int dirty=0;
57871462 776
777 // see if it's already allocated (and dealloc it)
778 for(n=0;n<HOST_REGS;n++)
779 {
f776eb14 780 if(n!=EXCLUDE_REG&&cur->regmap[n]==reg) {
781 dirty=(cur->dirty>>n)&1;
782 cur->regmap[n]=-1;
783 }
57871462 784 }
785
786 cur->regmap[hr]=reg;
787 cur->dirty&=~(1<<hr);
f776eb14 788 cur->dirty|=dirty<<hr;
57871462 789 cur->isconst&=~(1<<hr);
790}
791
792// Alloc cycle count into dedicated register
793alloc_cc(struct regstat *cur,int i)
794{
795 alloc_arm_reg(cur,i,CCREG,HOST_CCREG);
796}
797
798/* Special alloc */
799
800
801/* Assembler */
802
803char regname[16][4] = {
804 "r0",
805 "r1",
806 "r2",
807 "r3",
808 "r4",
809 "r5",
810 "r6",
811 "r7",
812 "r8",
813 "r9",
814 "r10",
815 "fp",
816 "r12",
817 "sp",
818 "lr",
819 "pc"};
820
821void output_byte(u_char byte)
822{
823 *(out++)=byte;
824}
825void output_modrm(u_char mod,u_char rm,u_char ext)
826{
827 assert(mod<4);
828 assert(rm<8);
829 assert(ext<8);
830 u_char byte=(mod<<6)|(ext<<3)|rm;
831 *(out++)=byte;
832}
833void output_sib(u_char scale,u_char index,u_char base)
834{
835 assert(scale<4);
836 assert(index<8);
837 assert(base<8);
838 u_char byte=(scale<<6)|(index<<3)|base;
839 *(out++)=byte;
840}
841void output_w32(u_int word)
842{
843 *((u_int *)out)=word;
844 out+=4;
845}
846u_int rd_rn_rm(u_int rd, u_int rn, u_int rm)
847{
848 assert(rd<16);
849 assert(rn<16);
850 assert(rm<16);
851 return((rn<<16)|(rd<<12)|rm);
852}
853u_int rd_rn_imm_shift(u_int rd, u_int rn, u_int imm, u_int shift)
854{
855 assert(rd<16);
856 assert(rn<16);
857 assert(imm<256);
858 assert((shift&1)==0);
859 return((rn<<16)|(rd<<12)|(((32-shift)&30)<<7)|imm);
860}
861u_int genimm(u_int imm,u_int *encoded)
862{
c2e3bd42 863 *encoded=0;
864 if(imm==0) return 1;
57871462 865 int i=32;
866 while(i>0)
867 {
868 if(imm<256) {
869 *encoded=((i&30)<<7)|imm;
870 return 1;
871 }
872 imm=(imm>>2)|(imm<<30);i-=2;
873 }
874 return 0;
875}
cfbd3c6e 876void genimm_checked(u_int imm,u_int *encoded)
877{
878 u_int ret=genimm(imm,encoded);
879 assert(ret);
880}
57871462 881u_int genjmp(u_int addr)
882{
883 int offset=addr-(int)out-8;
e80343e2 884 if(offset<-33554432||offset>=33554432) {
885 if (addr>2) {
886 printf("genjmp: out of range: %08x\n", offset);
887 exit(1);
888 }
889 return 0;
890 }
57871462 891 return ((u_int)offset>>2)&0xffffff;
892}
893
894void emit_mov(int rs,int rt)
895{
896 assem_debug("mov %s,%s\n",regname[rt],regname[rs]);
897 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs));
898}
899
900void emit_movs(int rs,int rt)
901{
902 assem_debug("movs %s,%s\n",regname[rt],regname[rs]);
903 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs));
904}
905
906void emit_add(int rs1,int rs2,int rt)
907{
908 assem_debug("add %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
909 output_w32(0xe0800000|rd_rn_rm(rt,rs1,rs2));
910}
911
912void emit_adds(int rs1,int rs2,int rt)
913{
914 assem_debug("adds %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
915 output_w32(0xe0900000|rd_rn_rm(rt,rs1,rs2));
916}
917
918void emit_adcs(int rs1,int rs2,int rt)
919{
920 assem_debug("adcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
921 output_w32(0xe0b00000|rd_rn_rm(rt,rs1,rs2));
922}
923
924void emit_sbc(int rs1,int rs2,int rt)
925{
926 assem_debug("sbc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
927 output_w32(0xe0c00000|rd_rn_rm(rt,rs1,rs2));
928}
929
930void emit_sbcs(int rs1,int rs2,int rt)
931{
932 assem_debug("sbcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
933 output_w32(0xe0d00000|rd_rn_rm(rt,rs1,rs2));
934}
935
936void emit_neg(int rs, int rt)
937{
938 assem_debug("rsb %s,%s,#0\n",regname[rt],regname[rs]);
939 output_w32(0xe2600000|rd_rn_rm(rt,rs,0));
940}
941
942void emit_negs(int rs, int rt)
943{
944 assem_debug("rsbs %s,%s,#0\n",regname[rt],regname[rs]);
945 output_w32(0xe2700000|rd_rn_rm(rt,rs,0));
946}
947
948void emit_sub(int rs1,int rs2,int rt)
949{
950 assem_debug("sub %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
951 output_w32(0xe0400000|rd_rn_rm(rt,rs1,rs2));
952}
953
954void emit_subs(int rs1,int rs2,int rt)
955{
956 assem_debug("subs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
957 output_w32(0xe0500000|rd_rn_rm(rt,rs1,rs2));
958}
959
960void emit_zeroreg(int rt)
961{
962 assem_debug("mov %s,#0\n",regname[rt]);
963 output_w32(0xe3a00000|rd_rn_rm(rt,0,0));
964}
965
790ee18e 966void emit_loadlp(u_int imm,u_int rt)
967{
968 add_literal((int)out,imm);
969 assem_debug("ldr %s,pc+? [=%x]\n",regname[rt],imm);
970 output_w32(0xe5900000|rd_rn_rm(rt,15,0));
971}
972void emit_movw(u_int imm,u_int rt)
973{
974 assert(imm<65536);
975 assem_debug("movw %s,#%d (0x%x)\n",regname[rt],imm,imm);
976 output_w32(0xe3000000|rd_rn_rm(rt,0,0)|(imm&0xfff)|((imm<<4)&0xf0000));
977}
978void emit_movt(u_int imm,u_int rt)
979{
980 assem_debug("movt %s,#%d (0x%x)\n",regname[rt],imm&0xffff0000,imm&0xffff0000);
981 output_w32(0xe3400000|rd_rn_rm(rt,0,0)|((imm>>16)&0xfff)|((imm>>12)&0xf0000));
982}
983void emit_movimm(u_int imm,u_int rt)
984{
985 u_int armval;
986 if(genimm(imm,&armval)) {
987 assem_debug("mov %s,#%d\n",regname[rt],imm);
988 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
989 }else if(genimm(~imm,&armval)) {
990 assem_debug("mvn %s,#%d\n",regname[rt],imm);
991 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
992 }else if(imm<65536) {
993 #ifdef ARMv5_ONLY
994 assem_debug("mov %s,#%d\n",regname[rt],imm&0xFF00);
995 output_w32(0xe3a00000|rd_rn_imm_shift(rt,0,imm>>8,8));
996 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
997 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
998 #else
999 emit_movw(imm,rt);
1000 #endif
1001 }else{
1002 #ifdef ARMv5_ONLY
1003 emit_loadlp(imm,rt);
1004 #else
1005 emit_movw(imm&0x0000FFFF,rt);
1006 emit_movt(imm&0xFFFF0000,rt);
1007 #endif
1008 }
1009}
1010void emit_pcreladdr(u_int rt)
1011{
1012 assem_debug("add %s,pc,#?\n",regname[rt]);
1013 output_w32(0xe2800000|rd_rn_rm(rt,15,0));
1014}
1015
57871462 1016void emit_loadreg(int r, int hr)
1017{
3d624f89 1018#ifdef FORCE32
1019 if(r&64) {
1020 printf("64bit load in 32bit mode!\n");
7f2607ea 1021 assert(0);
1022 return;
3d624f89 1023 }
1024#endif
57871462 1025 if((r&63)==0)
1026 emit_zeroreg(hr);
1027 else {
3d624f89 1028 int addr=((int)reg)+((r&63)<<REG_SHIFT)+((r&64)>>4);
57871462 1029 if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4);
1030 if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4);
1031 if(r==CCREG) addr=(int)&cycle_count;
1032 if(r==CSREG) addr=(int)&Status;
1033 if(r==FSREG) addr=(int)&FCR31;
1034 if(r==INVCP) addr=(int)&invc_ptr;
1035 u_int offset = addr-(u_int)&dynarec_local;
1036 assert(offset<4096);
1037 assem_debug("ldr %s,fp+%d\n",regname[hr],offset);
1038 output_w32(0xe5900000|rd_rn_rm(hr,FP,0)|offset);
1039 }
1040}
1041void emit_storereg(int r, int hr)
1042{
3d624f89 1043#ifdef FORCE32
1044 if(r&64) {
1045 printf("64bit store in 32bit mode!\n");
7f2607ea 1046 assert(0);
1047 return;
3d624f89 1048 }
1049#endif
1050 int addr=((int)reg)+((r&63)<<REG_SHIFT)+((r&64)>>4);
57871462 1051 if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4);
1052 if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4);
1053 if(r==CCREG) addr=(int)&cycle_count;
1054 if(r==FSREG) addr=(int)&FCR31;
1055 u_int offset = addr-(u_int)&dynarec_local;
1056 assert(offset<4096);
1057 assem_debug("str %s,fp+%d\n",regname[hr],offset);
1058 output_w32(0xe5800000|rd_rn_rm(hr,FP,0)|offset);
1059}
1060
1061void emit_test(int rs, int rt)
1062{
1063 assem_debug("tst %s,%s\n",regname[rs],regname[rt]);
1064 output_w32(0xe1100000|rd_rn_rm(0,rs,rt));
1065}
1066
1067void emit_testimm(int rs,int imm)
1068{
1069 u_int armval;
5a05d80c 1070 assem_debug("tst %s,#%d\n",regname[rs],imm);
cfbd3c6e 1071 genimm_checked(imm,&armval);
57871462 1072 output_w32(0xe3100000|rd_rn_rm(0,rs,0)|armval);
1073}
1074
b9b61529 1075void emit_testeqimm(int rs,int imm)
1076{
1077 u_int armval;
1078 assem_debug("tsteq %s,$%d\n",regname[rs],imm);
cfbd3c6e 1079 genimm_checked(imm,&armval);
b9b61529 1080 output_w32(0x03100000|rd_rn_rm(0,rs,0)|armval);
1081}
1082
57871462 1083void emit_not(int rs,int rt)
1084{
1085 assem_debug("mvn %s,%s\n",regname[rt],regname[rs]);
1086 output_w32(0xe1e00000|rd_rn_rm(rt,0,rs));
1087}
1088
b9b61529 1089void emit_mvnmi(int rs,int rt)
1090{
1091 assem_debug("mvnmi %s,%s\n",regname[rt],regname[rs]);
1092 output_w32(0x41e00000|rd_rn_rm(rt,0,rs));
1093}
1094
57871462 1095void emit_and(u_int rs1,u_int rs2,u_int rt)
1096{
1097 assem_debug("and %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1098 output_w32(0xe0000000|rd_rn_rm(rt,rs1,rs2));
1099}
1100
1101void emit_or(u_int rs1,u_int rs2,u_int rt)
1102{
1103 assem_debug("orr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1104 output_w32(0xe1800000|rd_rn_rm(rt,rs1,rs2));
1105}
1106void emit_or_and_set_flags(int rs1,int rs2,int rt)
1107{
1108 assem_debug("orrs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1109 output_w32(0xe1900000|rd_rn_rm(rt,rs1,rs2));
1110}
1111
f70d384d 1112void emit_orrshl_imm(u_int rs,u_int imm,u_int rt)
1113{
1114 assert(rs<16);
1115 assert(rt<16);
1116 assert(imm<32);
1117 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs],imm);
1118 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|(imm<<7));
1119}
1120
576bbd8f 1121void emit_orrshr_imm(u_int rs,u_int imm,u_int rt)
1122{
1123 assert(rs<16);
1124 assert(rt<16);
1125 assert(imm<32);
1126 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs],imm);
1127 output_w32(0xe1800020|rd_rn_rm(rt,rt,rs)|(imm<<7));
1128}
1129
57871462 1130void emit_xor(u_int rs1,u_int rs2,u_int rt)
1131{
1132 assem_debug("eor %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1133 output_w32(0xe0200000|rd_rn_rm(rt,rs1,rs2));
1134}
1135
57871462 1136void emit_addimm(u_int rs,int imm,u_int rt)
1137{
1138 assert(rs<16);
1139 assert(rt<16);
1140 if(imm!=0) {
57871462 1141 u_int armval;
1142 if(genimm(imm,&armval)) {
1143 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],imm);
1144 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
1145 }else if(genimm(-imm,&armval)) {
1146 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],imm);
1147 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
1148 }else if(imm<0) {
ffb0b9e0 1149 assert(imm>-65536);
57871462 1150 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],(-imm)&0xFF00);
1151 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
1152 output_w32(0xe2400000|rd_rn_imm_shift(rt,rs,(-imm)>>8,8));
1153 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
1154 }else{
ffb0b9e0 1155 assert(imm<65536);
57871462 1156 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1157 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
1158 output_w32(0xe2800000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1159 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1160 }
1161 }
1162 else if(rs!=rt) emit_mov(rs,rt);
1163}
1164
1165void emit_addimm_and_set_flags(int imm,int rt)
1166{
1167 assert(imm>-65536&&imm<65536);
1168 u_int armval;
1169 if(genimm(imm,&armval)) {
1170 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm);
1171 output_w32(0xe2900000|rd_rn_rm(rt,rt,0)|armval);
1172 }else if(genimm(-imm,&armval)) {
1173 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],imm);
1174 output_w32(0xe2500000|rd_rn_rm(rt,rt,0)|armval);
1175 }else if(imm<0) {
1176 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF00);
1177 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
1178 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)>>8,8));
1179 output_w32(0xe2500000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
1180 }else{
1181 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF00);
1182 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
1183 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm>>8,8));
1184 output_w32(0xe2900000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1185 }
1186}
1187void emit_addimm_no_flags(u_int imm,u_int rt)
1188{
1189 emit_addimm(rt,imm,rt);
1190}
1191
1192void emit_addnop(u_int r)
1193{
1194 assert(r<16);
1195 assem_debug("add %s,%s,#0 (nop)\n",regname[r],regname[r]);
1196 output_w32(0xe2800000|rd_rn_rm(r,r,0));
1197}
1198
1199void emit_adcimm(u_int rs,int imm,u_int rt)
1200{
1201 u_int armval;
cfbd3c6e 1202 genimm_checked(imm,&armval);
57871462 1203 assem_debug("adc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1204 output_w32(0xe2a00000|rd_rn_rm(rt,rs,0)|armval);
1205}
1206/*void emit_sbcimm(int imm,u_int rt)
1207{
1208 u_int armval;
cfbd3c6e 1209 genimm_checked(imm,&armval);
57871462 1210 assem_debug("sbc %s,%s,#%d\n",regname[rt],regname[rt],imm);
1211 output_w32(0xe2c00000|rd_rn_rm(rt,rt,0)|armval);
1212}*/
1213void emit_sbbimm(int imm,u_int rt)
1214{
1215 assem_debug("sbb $%d,%%%s\n",imm,regname[rt]);
1216 assert(rt<8);
1217 if(imm<128&&imm>=-128) {
1218 output_byte(0x83);
1219 output_modrm(3,rt,3);
1220 output_byte(imm);
1221 }
1222 else
1223 {
1224 output_byte(0x81);
1225 output_modrm(3,rt,3);
1226 output_w32(imm);
1227 }
1228}
1229void emit_rscimm(int rs,int imm,u_int rt)
1230{
1231 assert(0);
1232 u_int armval;
cfbd3c6e 1233 genimm_checked(imm,&armval);
57871462 1234 assem_debug("rsc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1235 output_w32(0xe2e00000|rd_rn_rm(rt,rs,0)|armval);
1236}
1237
1238void emit_addimm64_32(int rsh,int rsl,int imm,int rth,int rtl)
1239{
1240 // TODO: if(genimm(imm,&armval)) ...
1241 // else
1242 emit_movimm(imm,HOST_TEMPREG);
1243 emit_adds(HOST_TEMPREG,rsl,rtl);
1244 emit_adcimm(rsh,0,rth);
1245}
1246
1247void emit_sbb(int rs1,int rs2)
1248{
1249 assem_debug("sbb %%%s,%%%s\n",regname[rs2],regname[rs1]);
1250 output_byte(0x19);
1251 output_modrm(3,rs1,rs2);
1252}
1253
1254void emit_andimm(int rs,int imm,int rt)
1255{
1256 u_int armval;
790ee18e 1257 if(imm==0) {
1258 emit_zeroreg(rt);
1259 }else if(genimm(imm,&armval)) {
57871462 1260 assem_debug("and %s,%s,#%d\n",regname[rt],regname[rs],imm);
1261 output_w32(0xe2000000|rd_rn_rm(rt,rs,0)|armval);
1262 }else if(genimm(~imm,&armval)) {
1263 assem_debug("bic %s,%s,#%d\n",regname[rt],regname[rs],imm);
1264 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|armval);
1265 }else if(imm==65535) {
1266 #ifdef ARMv5_ONLY
1267 assem_debug("bic %s,%s,#FF000000\n",regname[rt],regname[rs]);
1268 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|0x4FF);
1269 assem_debug("bic %s,%s,#00FF0000\n",regname[rt],regname[rt]);
1270 output_w32(0xe3c00000|rd_rn_rm(rt,rt,0)|0x8FF);
1271 #else
1272 assem_debug("uxth %s,%s\n",regname[rt],regname[rs]);
1273 output_w32(0xe6ff0070|rd_rn_rm(rt,0,rs));
1274 #endif
1275 }else{
1276 assert(imm>0&&imm<65535);
1277 #ifdef ARMv5_ONLY
1278 assem_debug("mov r14,#%d\n",imm&0xFF00);
1279 output_w32(0xe3a00000|rd_rn_imm_shift(HOST_TEMPREG,0,imm>>8,8));
1280 assem_debug("add r14,r14,#%d\n",imm&0xFF);
1281 output_w32(0xe2800000|rd_rn_imm_shift(HOST_TEMPREG,HOST_TEMPREG,imm&0xff,0));
1282 #else
1283 emit_movw(imm,HOST_TEMPREG);
1284 #endif
1285 assem_debug("and %s,%s,r14\n",regname[rt],regname[rs]);
1286 output_w32(0xe0000000|rd_rn_rm(rt,rs,HOST_TEMPREG));
1287 }
1288}
1289
1290void emit_orimm(int rs,int imm,int rt)
1291{
1292 u_int armval;
790ee18e 1293 if(imm==0) {
1294 if(rs!=rt) emit_mov(rs,rt);
1295 }else if(genimm(imm,&armval)) {
57871462 1296 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1297 output_w32(0xe3800000|rd_rn_rm(rt,rs,0)|armval);
1298 }else{
1299 assert(imm>0&&imm<65536);
1300 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1301 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
1302 output_w32(0xe3800000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1303 output_w32(0xe3800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1304 }
1305}
1306
1307void emit_xorimm(int rs,int imm,int rt)
1308{
57871462 1309 u_int armval;
790ee18e 1310 if(imm==0) {
1311 if(rs!=rt) emit_mov(rs,rt);
1312 }else if(genimm(imm,&armval)) {
57871462 1313 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm);
1314 output_w32(0xe2200000|rd_rn_rm(rt,rs,0)|armval);
1315 }else{
514ed0d9 1316 assert(imm>0&&imm<65536);
57871462 1317 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1318 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
1319 output_w32(0xe2200000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1320 output_w32(0xe2200000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1321 }
1322}
1323
1324void emit_shlimm(int rs,u_int imm,int rt)
1325{
1326 assert(imm>0);
1327 assert(imm<32);
1328 //if(imm==1) ...
1329 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1330 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1331}
1332
c6c3b1b3 1333void emit_lsls_imm(int rs,int imm,int rt)
1334{
1335 assert(imm>0);
1336 assert(imm<32);
1337 assem_debug("lsls %s,%s,#%d\n",regname[rt],regname[rs],imm);
1338 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1339}
1340
57871462 1341void emit_shrimm(int rs,u_int imm,int rt)
1342{
1343 assert(imm>0);
1344 assert(imm<32);
1345 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1346 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
1347}
1348
1349void emit_sarimm(int rs,u_int imm,int rt)
1350{
1351 assert(imm>0);
1352 assert(imm<32);
1353 assem_debug("asr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1354 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x40|(imm<<7));
1355}
1356
1357void emit_rorimm(int rs,u_int imm,int rt)
1358{
1359 assert(imm>0);
1360 assert(imm<32);
1361 assem_debug("ror %s,%s,#%d\n",regname[rt],regname[rs],imm);
1362 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x60|(imm<<7));
1363}
1364
1365void emit_shldimm(int rs,int rs2,u_int imm,int rt)
1366{
1367 assem_debug("shld %%%s,%%%s,%d\n",regname[rt],regname[rs2],imm);
1368 assert(imm>0);
1369 assert(imm<32);
1370 //if(imm==1) ...
1371 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1372 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1373 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs2],32-imm);
1374 output_w32(0xe1800020|rd_rn_rm(rt,rt,rs2)|((32-imm)<<7));
1375}
1376
1377void emit_shrdimm(int rs,int rs2,u_int imm,int rt)
1378{
1379 assem_debug("shrd %%%s,%%%s,%d\n",regname[rt],regname[rs2],imm);
1380 assert(imm>0);
1381 assert(imm<32);
1382 //if(imm==1) ...
1383 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1384 output_w32(0xe1a00020|rd_rn_rm(rt,0,rs)|(imm<<7));
1385 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs2],32-imm);
1386 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs2)|((32-imm)<<7));
1387}
1388
b9b61529 1389void emit_signextend16(int rs,int rt)
1390{
1391 #ifdef ARMv5_ONLY
1392 emit_shlimm(rs,16,rt);
1393 emit_sarimm(rt,16,rt);
1394 #else
1395 assem_debug("sxth %s,%s\n",regname[rt],regname[rs]);
1396 output_w32(0xe6bf0070|rd_rn_rm(rt,0,rs));
1397 #endif
1398}
1399
c6c3b1b3 1400void emit_signextend8(int rs,int rt)
1401{
1402 #ifdef ARMv5_ONLY
1403 emit_shlimm(rs,24,rt);
1404 emit_sarimm(rt,24,rt);
1405 #else
1406 assem_debug("sxtb %s,%s\n",regname[rt],regname[rs]);
1407 output_w32(0xe6af0070|rd_rn_rm(rt,0,rs));
1408 #endif
1409}
1410
57871462 1411void emit_shl(u_int rs,u_int shift,u_int rt)
1412{
1413 assert(rs<16);
1414 assert(rt<16);
1415 assert(shift<16);
1416 //if(imm==1) ...
1417 assem_debug("lsl %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1418 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x10|(shift<<8));
1419}
1420void emit_shr(u_int rs,u_int shift,u_int rt)
1421{
1422 assert(rs<16);
1423 assert(rt<16);
1424 assert(shift<16);
1425 assem_debug("lsr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1426 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x30|(shift<<8));
1427}
1428void emit_sar(u_int rs,u_int shift,u_int rt)
1429{
1430 assert(rs<16);
1431 assert(rt<16);
1432 assert(shift<16);
1433 assem_debug("asr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1434 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x50|(shift<<8));
1435}
1436void emit_shlcl(int r)
1437{
1438 assem_debug("shl %%%s,%%cl\n",regname[r]);
1439 assert(0);
1440}
1441void emit_shrcl(int r)
1442{
1443 assem_debug("shr %%%s,%%cl\n",regname[r]);
1444 assert(0);
1445}
1446void emit_sarcl(int r)
1447{
1448 assem_debug("sar %%%s,%%cl\n",regname[r]);
1449 assert(0);
1450}
1451
1452void emit_shldcl(int r1,int r2)
1453{
1454 assem_debug("shld %%%s,%%%s,%%cl\n",regname[r1],regname[r2]);
1455 assert(0);
1456}
1457void emit_shrdcl(int r1,int r2)
1458{
1459 assem_debug("shrd %%%s,%%%s,%%cl\n",regname[r1],regname[r2]);
1460 assert(0);
1461}
1462void emit_orrshl(u_int rs,u_int shift,u_int rt)
1463{
1464 assert(rs<16);
1465 assert(rt<16);
1466 assert(shift<16);
1467 assem_debug("orr %s,%s,%s,lsl %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
1468 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x10|(shift<<8));
1469}
1470void emit_orrshr(u_int rs,u_int shift,u_int rt)
1471{
1472 assert(rs<16);
1473 assert(rt<16);
1474 assert(shift<16);
1475 assem_debug("orr %s,%s,%s,lsr %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
1476 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x30|(shift<<8));
1477}
1478
1479void emit_cmpimm(int rs,int imm)
1480{
1481 u_int armval;
1482 if(genimm(imm,&armval)) {
5a05d80c 1483 assem_debug("cmp %s,#%d\n",regname[rs],imm);
57871462 1484 output_w32(0xe3500000|rd_rn_rm(0,rs,0)|armval);
1485 }else if(genimm(-imm,&armval)) {
5a05d80c 1486 assem_debug("cmn %s,#%d\n",regname[rs],imm);
57871462 1487 output_w32(0xe3700000|rd_rn_rm(0,rs,0)|armval);
1488 }else if(imm>0) {
1489 assert(imm<65536);
1490 #ifdef ARMv5_ONLY
1491 emit_movimm(imm,HOST_TEMPREG);
1492 #else
1493 emit_movw(imm,HOST_TEMPREG);
1494 #endif
1495 assem_debug("cmp %s,r14\n",regname[rs]);
1496 output_w32(0xe1500000|rd_rn_rm(0,rs,HOST_TEMPREG));
1497 }else{
1498 assert(imm>-65536);
1499 #ifdef ARMv5_ONLY
1500 emit_movimm(-imm,HOST_TEMPREG);
1501 #else
1502 emit_movw(-imm,HOST_TEMPREG);
1503 #endif
1504 assem_debug("cmn %s,r14\n",regname[rs]);
1505 output_w32(0xe1700000|rd_rn_rm(0,rs,HOST_TEMPREG));
1506 }
1507}
1508
1509void emit_cmovne(u_int *addr,int rt)
1510{
1511 assem_debug("cmovne %x,%%%s",(int)addr,regname[rt]);
1512 assert(0);
1513}
1514void emit_cmovl(u_int *addr,int rt)
1515{
1516 assem_debug("cmovl %x,%%%s",(int)addr,regname[rt]);
1517 assert(0);
1518}
1519void emit_cmovs(u_int *addr,int rt)
1520{
1521 assem_debug("cmovs %x,%%%s",(int)addr,regname[rt]);
1522 assert(0);
1523}
1524void emit_cmovne_imm(int imm,int rt)
1525{
1526 assem_debug("movne %s,#%d\n",regname[rt],imm);
1527 u_int armval;
cfbd3c6e 1528 genimm_checked(imm,&armval);
57871462 1529 output_w32(0x13a00000|rd_rn_rm(rt,0,0)|armval);
1530}
1531void emit_cmovl_imm(int imm,int rt)
1532{
1533 assem_debug("movlt %s,#%d\n",regname[rt],imm);
1534 u_int armval;
cfbd3c6e 1535 genimm_checked(imm,&armval);
57871462 1536 output_w32(0xb3a00000|rd_rn_rm(rt,0,0)|armval);
1537}
1538void emit_cmovb_imm(int imm,int rt)
1539{
1540 assem_debug("movcc %s,#%d\n",regname[rt],imm);
1541 u_int armval;
cfbd3c6e 1542 genimm_checked(imm,&armval);
57871462 1543 output_w32(0x33a00000|rd_rn_rm(rt,0,0)|armval);
1544}
1545void emit_cmovs_imm(int imm,int rt)
1546{
1547 assem_debug("movmi %s,#%d\n",regname[rt],imm);
1548 u_int armval;
cfbd3c6e 1549 genimm_checked(imm,&armval);
57871462 1550 output_w32(0x43a00000|rd_rn_rm(rt,0,0)|armval);
1551}
1552void emit_cmove_reg(int rs,int rt)
1553{
1554 assem_debug("moveq %s,%s\n",regname[rt],regname[rs]);
1555 output_w32(0x01a00000|rd_rn_rm(rt,0,rs));
1556}
1557void emit_cmovne_reg(int rs,int rt)
1558{
1559 assem_debug("movne %s,%s\n",regname[rt],regname[rs]);
1560 output_w32(0x11a00000|rd_rn_rm(rt,0,rs));
1561}
1562void emit_cmovl_reg(int rs,int rt)
1563{
1564 assem_debug("movlt %s,%s\n",regname[rt],regname[rs]);
1565 output_w32(0xb1a00000|rd_rn_rm(rt,0,rs));
1566}
1567void emit_cmovs_reg(int rs,int rt)
1568{
1569 assem_debug("movmi %s,%s\n",regname[rt],regname[rs]);
1570 output_w32(0x41a00000|rd_rn_rm(rt,0,rs));
1571}
1572
1573void emit_slti32(int rs,int imm,int rt)
1574{
1575 if(rs!=rt) emit_zeroreg(rt);
1576 emit_cmpimm(rs,imm);
1577 if(rs==rt) emit_movimm(0,rt);
1578 emit_cmovl_imm(1,rt);
1579}
1580void emit_sltiu32(int rs,int imm,int rt)
1581{
1582 if(rs!=rt) emit_zeroreg(rt);
1583 emit_cmpimm(rs,imm);
1584 if(rs==rt) emit_movimm(0,rt);
1585 emit_cmovb_imm(1,rt);
1586}
1587void emit_slti64_32(int rsh,int rsl,int imm,int rt)
1588{
1589 assert(rsh!=rt);
1590 emit_slti32(rsl,imm,rt);
1591 if(imm>=0)
1592 {
1593 emit_test(rsh,rsh);
1594 emit_cmovne_imm(0,rt);
1595 emit_cmovs_imm(1,rt);
1596 }
1597 else
1598 {
1599 emit_cmpimm(rsh,-1);
1600 emit_cmovne_imm(0,rt);
1601 emit_cmovl_imm(1,rt);
1602 }
1603}
1604void emit_sltiu64_32(int rsh,int rsl,int imm,int rt)
1605{
1606 assert(rsh!=rt);
1607 emit_sltiu32(rsl,imm,rt);
1608 if(imm>=0)
1609 {
1610 emit_test(rsh,rsh);
1611 emit_cmovne_imm(0,rt);
1612 }
1613 else
1614 {
1615 emit_cmpimm(rsh,-1);
1616 emit_cmovne_imm(1,rt);
1617 }
1618}
1619
1620void emit_cmp(int rs,int rt)
1621{
1622 assem_debug("cmp %s,%s\n",regname[rs],regname[rt]);
1623 output_w32(0xe1500000|rd_rn_rm(0,rs,rt));
1624}
1625void emit_set_gz32(int rs, int rt)
1626{
1627 //assem_debug("set_gz32\n");
1628 emit_cmpimm(rs,1);
1629 emit_movimm(1,rt);
1630 emit_cmovl_imm(0,rt);
1631}
1632void emit_set_nz32(int rs, int rt)
1633{
1634 //assem_debug("set_nz32\n");
1635 if(rs!=rt) emit_movs(rs,rt);
1636 else emit_test(rs,rs);
1637 emit_cmovne_imm(1,rt);
1638}
1639void emit_set_gz64_32(int rsh, int rsl, int rt)
1640{
1641 //assem_debug("set_gz64\n");
1642 emit_set_gz32(rsl,rt);
1643 emit_test(rsh,rsh);
1644 emit_cmovne_imm(1,rt);
1645 emit_cmovs_imm(0,rt);
1646}
1647void emit_set_nz64_32(int rsh, int rsl, int rt)
1648{
1649 //assem_debug("set_nz64\n");
1650 emit_or_and_set_flags(rsh,rsl,rt);
1651 emit_cmovne_imm(1,rt);
1652}
1653void emit_set_if_less32(int rs1, int rs2, int rt)
1654{
1655 //assem_debug("set if less (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1656 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1657 emit_cmp(rs1,rs2);
1658 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1659 emit_cmovl_imm(1,rt);
1660}
1661void emit_set_if_carry32(int rs1, int rs2, int rt)
1662{
1663 //assem_debug("set if carry (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1664 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1665 emit_cmp(rs1,rs2);
1666 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1667 emit_cmovb_imm(1,rt);
1668}
1669void emit_set_if_less64_32(int u1, int l1, int u2, int l2, int rt)
1670{
1671 //assem_debug("set if less64 (%%%s,%%%s,%%%s,%%%s),%%%s\n",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]);
1672 assert(u1!=rt);
1673 assert(u2!=rt);
1674 emit_cmp(l1,l2);
1675 emit_movimm(0,rt);
1676 emit_sbcs(u1,u2,HOST_TEMPREG);
1677 emit_cmovl_imm(1,rt);
1678}
1679void emit_set_if_carry64_32(int u1, int l1, int u2, int l2, int rt)
1680{
1681 //assem_debug("set if carry64 (%%%s,%%%s,%%%s,%%%s),%%%s\n",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]);
1682 assert(u1!=rt);
1683 assert(u2!=rt);
1684 emit_cmp(l1,l2);
1685 emit_movimm(0,rt);
1686 emit_sbcs(u1,u2,HOST_TEMPREG);
1687 emit_cmovb_imm(1,rt);
1688}
1689
1690void emit_call(int a)
1691{
1692 assem_debug("bl %x (%x+%x)\n",a,(int)out,a-(int)out-8);
1693 u_int offset=genjmp(a);
1694 output_w32(0xeb000000|offset);
1695}
1696void emit_jmp(int a)
1697{
1698 assem_debug("b %x (%x+%x)\n",a,(int)out,a-(int)out-8);
1699 u_int offset=genjmp(a);
1700 output_w32(0xea000000|offset);
1701}
1702void emit_jne(int a)
1703{
1704 assem_debug("bne %x\n",a);
1705 u_int offset=genjmp(a);
1706 output_w32(0x1a000000|offset);
1707}
1708void emit_jeq(int a)
1709{
1710 assem_debug("beq %x\n",a);
1711 u_int offset=genjmp(a);
1712 output_w32(0x0a000000|offset);
1713}
1714void emit_js(int a)
1715{
1716 assem_debug("bmi %x\n",a);
1717 u_int offset=genjmp(a);
1718 output_w32(0x4a000000|offset);
1719}
1720void emit_jns(int a)
1721{
1722 assem_debug("bpl %x\n",a);
1723 u_int offset=genjmp(a);
1724 output_w32(0x5a000000|offset);
1725}
1726void emit_jl(int a)
1727{
1728 assem_debug("blt %x\n",a);
1729 u_int offset=genjmp(a);
1730 output_w32(0xba000000|offset);
1731}
1732void emit_jge(int a)
1733{
1734 assem_debug("bge %x\n",a);
1735 u_int offset=genjmp(a);
1736 output_w32(0xaa000000|offset);
1737}
1738void emit_jno(int a)
1739{
1740 assem_debug("bvc %x\n",a);
1741 u_int offset=genjmp(a);
1742 output_w32(0x7a000000|offset);
1743}
1744void emit_jc(int a)
1745{
1746 assem_debug("bcs %x\n",a);
1747 u_int offset=genjmp(a);
1748 output_w32(0x2a000000|offset);
1749}
1750void emit_jcc(int a)
1751{
1752 assem_debug("bcc %x\n",a);
1753 u_int offset=genjmp(a);
1754 output_w32(0x3a000000|offset);
1755}
1756
1757void emit_pushimm(int imm)
1758{
1759 assem_debug("push $%x\n",imm);
1760 assert(0);
1761}
1762void emit_pusha()
1763{
1764 assem_debug("pusha\n");
1765 assert(0);
1766}
1767void emit_popa()
1768{
1769 assem_debug("popa\n");
1770 assert(0);
1771}
1772void emit_pushreg(u_int r)
1773{
1774 assem_debug("push %%%s\n",regname[r]);
1775 assert(0);
1776}
1777void emit_popreg(u_int r)
1778{
1779 assem_debug("pop %%%s\n",regname[r]);
1780 assert(0);
1781}
1782void emit_callreg(u_int r)
1783{
c6c3b1b3 1784 assert(r<15);
1785 assem_debug("blx %s\n",regname[r]);
1786 output_w32(0xe12fff30|r);
57871462 1787}
1788void emit_jmpreg(u_int r)
1789{
1790 assem_debug("mov pc,%s\n",regname[r]);
1791 output_w32(0xe1a00000|rd_rn_rm(15,0,r));
1792}
1793
1794void emit_readword_indexed(int offset, int rs, int rt)
1795{
1796 assert(offset>-4096&&offset<4096);
1797 assem_debug("ldr %s,%s+%d\n",regname[rt],regname[rs],offset);
1798 if(offset>=0) {
1799 output_w32(0xe5900000|rd_rn_rm(rt,rs,0)|offset);
1800 }else{
1801 output_w32(0xe5100000|rd_rn_rm(rt,rs,0)|(-offset));
1802 }
1803}
1804void emit_readword_dualindexedx4(int rs1, int rs2, int rt)
1805{
1806 assem_debug("ldr %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1807 output_w32(0xe7900000|rd_rn_rm(rt,rs1,rs2)|0x100);
1808}
c6c3b1b3 1809void emit_ldrcc_dualindexed(int rs1, int rs2, int rt)
1810{
1811 assem_debug("ldrcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1812 output_w32(0x37900000|rd_rn_rm(rt,rs1,rs2));
1813}
1814void emit_ldrccb_dualindexed(int rs1, int rs2, int rt)
1815{
1816 assem_debug("ldrccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1817 output_w32(0x37d00000|rd_rn_rm(rt,rs1,rs2));
1818}
1819void emit_ldrccsb_dualindexed(int rs1, int rs2, int rt)
1820{
1821 assem_debug("ldrccsb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1822 output_w32(0x319000d0|rd_rn_rm(rt,rs1,rs2));
1823}
1824void emit_ldrcch_dualindexed(int rs1, int rs2, int rt)
1825{
1826 assem_debug("ldrcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1827 output_w32(0x319000b0|rd_rn_rm(rt,rs1,rs2));
1828}
1829void emit_ldrccsh_dualindexed(int rs1, int rs2, int rt)
1830{
1831 assem_debug("ldrccsh %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1832 output_w32(0x319000f0|rd_rn_rm(rt,rs1,rs2));
1833}
57871462 1834void emit_readword_indexed_tlb(int addr, int rs, int map, int rt)
1835{
1836 if(map<0) emit_readword_indexed(addr, rs, rt);
1837 else {
1838 assert(addr==0);
1839 emit_readword_dualindexedx4(rs, map, rt);
1840 }
1841}
1842void emit_readdword_indexed_tlb(int addr, int rs, int map, int rh, int rl)
1843{
1844 if(map<0) {
1845 if(rh>=0) emit_readword_indexed(addr, rs, rh);
1846 emit_readword_indexed(addr+4, rs, rl);
1847 }else{
1848 assert(rh!=rs);
1849 if(rh>=0) emit_readword_indexed_tlb(addr, rs, map, rh);
1850 emit_addimm(map,1,map);
1851 emit_readword_indexed_tlb(addr, rs, map, rl);
1852 }
1853}
1854void emit_movsbl_indexed(int offset, int rs, int rt)
1855{
1856 assert(offset>-256&&offset<256);
1857 assem_debug("ldrsb %s,%s+%d\n",regname[rt],regname[rs],offset);
1858 if(offset>=0) {
1859 output_w32(0xe1d000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1860 }else{
1861 output_w32(0xe15000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1862 }
1863}
1864void emit_movsbl_indexed_tlb(int addr, int rs, int map, int rt)
1865{
1866 if(map<0) emit_movsbl_indexed(addr, rs, rt);
1867 else {
1868 if(addr==0) {
1869 emit_shlimm(map,2,map);
1870 assem_debug("ldrsb %s,%s+%s\n",regname[rt],regname[rs],regname[map]);
1871 output_w32(0xe19000d0|rd_rn_rm(rt,rs,map));
1872 }else{
1873 assert(addr>-256&&addr<256);
1874 assem_debug("add %s,%s,%s,lsl #2\n",regname[rt],regname[rs],regname[map]);
1875 output_w32(0xe0800000|rd_rn_rm(rt,rs,map)|(2<<7));
1876 emit_movsbl_indexed(addr, rt, rt);
1877 }
1878 }
1879}
1880void emit_movswl_indexed(int offset, int rs, int rt)
1881{
1882 assert(offset>-256&&offset<256);
1883 assem_debug("ldrsh %s,%s+%d\n",regname[rt],regname[rs],offset);
1884 if(offset>=0) {
1885 output_w32(0xe1d000f0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1886 }else{
1887 output_w32(0xe15000f0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1888 }
1889}
1890void emit_movzbl_indexed(int offset, int rs, int rt)
1891{
1892 assert(offset>-4096&&offset<4096);
1893 assem_debug("ldrb %s,%s+%d\n",regname[rt],regname[rs],offset);
1894 if(offset>=0) {
1895 output_w32(0xe5d00000|rd_rn_rm(rt,rs,0)|offset);
1896 }else{
1897 output_w32(0xe5500000|rd_rn_rm(rt,rs,0)|(-offset));
1898 }
1899}
1900void emit_movzbl_dualindexedx4(int rs1, int rs2, int rt)
1901{
1902 assem_debug("ldrb %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1903 output_w32(0xe7d00000|rd_rn_rm(rt,rs1,rs2)|0x100);
1904}
1905void emit_movzbl_indexed_tlb(int addr, int rs, int map, int rt)
1906{
1907 if(map<0) emit_movzbl_indexed(addr, rs, rt);
1908 else {
1909 if(addr==0) {
1910 emit_movzbl_dualindexedx4(rs, map, rt);
1911 }else{
1912 emit_addimm(rs,addr,rt);
1913 emit_movzbl_dualindexedx4(rt, map, rt);
1914 }
1915 }
1916}
1917void emit_movzwl_indexed(int offset, int rs, int rt)
1918{
1919 assert(offset>-256&&offset<256);
1920 assem_debug("ldrh %s,%s+%d\n",regname[rt],regname[rs],offset);
1921 if(offset>=0) {
1922 output_w32(0xe1d000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1923 }else{
1924 output_w32(0xe15000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1925 }
1926}
1927void emit_readword(int addr, int rt)
1928{
1929 u_int offset = addr-(u_int)&dynarec_local;
1930 assert(offset<4096);
1931 assem_debug("ldr %s,fp+%d\n",regname[rt],offset);
1932 output_w32(0xe5900000|rd_rn_rm(rt,FP,0)|offset);
1933}
1934void emit_movsbl(int addr, int rt)
1935{
1936 u_int offset = addr-(u_int)&dynarec_local;
1937 assert(offset<256);
1938 assem_debug("ldrsb %s,fp+%d\n",regname[rt],offset);
1939 output_w32(0xe1d000d0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1940}
1941void emit_movswl(int addr, int rt)
1942{
1943 u_int offset = addr-(u_int)&dynarec_local;
1944 assert(offset<256);
1945 assem_debug("ldrsh %s,fp+%d\n",regname[rt],offset);
1946 output_w32(0xe1d000f0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1947}
1948void emit_movzbl(int addr, int rt)
1949{
1950 u_int offset = addr-(u_int)&dynarec_local;
1951 assert(offset<4096);
1952 assem_debug("ldrb %s,fp+%d\n",regname[rt],offset);
1953 output_w32(0xe5d00000|rd_rn_rm(rt,FP,0)|offset);
1954}
1955void emit_movzwl(int addr, int rt)
1956{
1957 u_int offset = addr-(u_int)&dynarec_local;
1958 assert(offset<256);
1959 assem_debug("ldrh %s,fp+%d\n",regname[rt],offset);
1960 output_w32(0xe1d000b0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1961}
1962void emit_movzwl_reg(int rs, int rt)
1963{
1964 assem_debug("movzwl %%%s,%%%s\n",regname[rs]+1,regname[rt]);
1965 assert(0);
1966}
1967
1968void emit_xchg(int rs, int rt)
1969{
1970 assem_debug("xchg %%%s,%%%s\n",regname[rs],regname[rt]);
1971 assert(0);
1972}
1973void emit_writeword_indexed(int rt, int offset, int rs)
1974{
1975 assert(offset>-4096&&offset<4096);
1976 assem_debug("str %s,%s+%d\n",regname[rt],regname[rs],offset);
1977 if(offset>=0) {
1978 output_w32(0xe5800000|rd_rn_rm(rt,rs,0)|offset);
1979 }else{
1980 output_w32(0xe5000000|rd_rn_rm(rt,rs,0)|(-offset));
1981 }
1982}
1983void emit_writeword_dualindexedx4(int rt, int rs1, int rs2)
1984{
1985 assem_debug("str %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1986 output_w32(0xe7800000|rd_rn_rm(rt,rs1,rs2)|0x100);
1987}
1988void emit_writeword_indexed_tlb(int rt, int addr, int rs, int map, int temp)
1989{
1990 if(map<0) emit_writeword_indexed(rt, addr, rs);
1991 else {
1992 assert(addr==0);
1993 emit_writeword_dualindexedx4(rt, rs, map);
1994 }
1995}
1996void emit_writedword_indexed_tlb(int rh, int rl, int addr, int rs, int map, int temp)
1997{
1998 if(map<0) {
1999 if(rh>=0) emit_writeword_indexed(rh, addr, rs);
2000 emit_writeword_indexed(rl, addr+4, rs);
2001 }else{
2002 assert(rh>=0);
2003 if(temp!=rs) emit_addimm(map,1,temp);
2004 emit_writeword_indexed_tlb(rh, addr, rs, map, temp);
2005 if(temp!=rs) emit_writeword_indexed_tlb(rl, addr, rs, temp, temp);
2006 else {
2007 emit_addimm(rs,4,rs);
2008 emit_writeword_indexed_tlb(rl, addr, rs, map, temp);
2009 }
2010 }
2011}
2012void emit_writehword_indexed(int rt, int offset, int rs)
2013{
2014 assert(offset>-256&&offset<256);
2015 assem_debug("strh %s,%s+%d\n",regname[rt],regname[rs],offset);
2016 if(offset>=0) {
2017 output_w32(0xe1c000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
2018 }else{
2019 output_w32(0xe14000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
2020 }
2021}
2022void emit_writebyte_indexed(int rt, int offset, int rs)
2023{
2024 assert(offset>-4096&&offset<4096);
2025 assem_debug("strb %s,%s+%d\n",regname[rt],regname[rs],offset);
2026 if(offset>=0) {
2027 output_w32(0xe5c00000|rd_rn_rm(rt,rs,0)|offset);
2028 }else{
2029 output_w32(0xe5400000|rd_rn_rm(rt,rs,0)|(-offset));
2030 }
2031}
2032void emit_writebyte_dualindexedx4(int rt, int rs1, int rs2)
2033{
2034 assem_debug("strb %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
2035 output_w32(0xe7c00000|rd_rn_rm(rt,rs1,rs2)|0x100);
2036}
2037void emit_writebyte_indexed_tlb(int rt, int addr, int rs, int map, int temp)
2038{
2039 if(map<0) emit_writebyte_indexed(rt, addr, rs);
2040 else {
2041 if(addr==0) {
2042 emit_writebyte_dualindexedx4(rt, rs, map);
2043 }else{
2044 emit_addimm(rs,addr,temp);
2045 emit_writebyte_dualindexedx4(rt, temp, map);
2046 }
2047 }
2048}
b96d3df7 2049void emit_strcc_dualindexed(int rs1, int rs2, int rt)
2050{
2051 assem_debug("strcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2052 output_w32(0x37800000|rd_rn_rm(rt,rs1,rs2));
2053}
2054void emit_strccb_dualindexed(int rs1, int rs2, int rt)
2055{
2056 assem_debug("strccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2057 output_w32(0x37c00000|rd_rn_rm(rt,rs1,rs2));
2058}
2059void emit_strcch_dualindexed(int rs1, int rs2, int rt)
2060{
2061 assem_debug("strcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2062 output_w32(0x318000b0|rd_rn_rm(rt,rs1,rs2));
2063}
57871462 2064void emit_writeword(int rt, int addr)
2065{
2066 u_int offset = addr-(u_int)&dynarec_local;
2067 assert(offset<4096);
2068 assem_debug("str %s,fp+%d\n",regname[rt],offset);
2069 output_w32(0xe5800000|rd_rn_rm(rt,FP,0)|offset);
2070}
2071void emit_writehword(int rt, int addr)
2072{
2073 u_int offset = addr-(u_int)&dynarec_local;
2074 assert(offset<256);
2075 assem_debug("strh %s,fp+%d\n",regname[rt],offset);
2076 output_w32(0xe1c000b0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
2077}
2078void emit_writebyte(int rt, int addr)
2079{
2080 u_int offset = addr-(u_int)&dynarec_local;
2081 assert(offset<4096);
74426039 2082 assem_debug("strb %s,fp+%d\n",regname[rt],offset);
57871462 2083 output_w32(0xe5c00000|rd_rn_rm(rt,FP,0)|offset);
2084}
2085void emit_writeword_imm(int imm, int addr)
2086{
2087 assem_debug("movl $%x,%x\n",imm,addr);
2088 assert(0);
2089}
2090void emit_writebyte_imm(int imm, int addr)
2091{
2092 assem_debug("movb $%x,%x\n",imm,addr);
2093 assert(0);
2094}
2095
2096void emit_mul(int rs)
2097{
2098 assem_debug("mul %%%s\n",regname[rs]);
2099 assert(0);
2100}
2101void emit_imul(int rs)
2102{
2103 assem_debug("imul %%%s\n",regname[rs]);
2104 assert(0);
2105}
2106void emit_umull(u_int rs1, u_int rs2, u_int hi, u_int lo)
2107{
2108 assem_debug("umull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
2109 assert(rs1<16);
2110 assert(rs2<16);
2111 assert(hi<16);
2112 assert(lo<16);
2113 output_w32(0xe0800090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
2114}
2115void emit_smull(u_int rs1, u_int rs2, u_int hi, u_int lo)
2116{
2117 assem_debug("smull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
2118 assert(rs1<16);
2119 assert(rs2<16);
2120 assert(hi<16);
2121 assert(lo<16);
2122 output_w32(0xe0c00090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
2123}
2124
2125void emit_div(int rs)
2126{
2127 assem_debug("div %%%s\n",regname[rs]);
2128 assert(0);
2129}
2130void emit_idiv(int rs)
2131{
2132 assem_debug("idiv %%%s\n",regname[rs]);
2133 assert(0);
2134}
2135void emit_cdq()
2136{
2137 assem_debug("cdq\n");
2138 assert(0);
2139}
2140
2141void emit_clz(int rs,int rt)
2142{
2143 assem_debug("clz %s,%s\n",regname[rt],regname[rs]);
2144 output_w32(0xe16f0f10|rd_rn_rm(rt,0,rs));
2145}
2146
2147void emit_subcs(int rs1,int rs2,int rt)
2148{
2149 assem_debug("subcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2150 output_w32(0x20400000|rd_rn_rm(rt,rs1,rs2));
2151}
2152
2153void emit_shrcc_imm(int rs,u_int imm,int rt)
2154{
2155 assert(imm>0);
2156 assert(imm<32);
2157 assem_debug("lsrcc %s,%s,#%d\n",regname[rt],regname[rs],imm);
2158 output_w32(0x31a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
2159}
2160
b1be1eee 2161void emit_shrne_imm(int rs,u_int imm,int rt)
2162{
2163 assert(imm>0);
2164 assert(imm<32);
2165 assem_debug("lsrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2166 output_w32(0x11a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
2167}
2168
57871462 2169void emit_negmi(int rs, int rt)
2170{
2171 assem_debug("rsbmi %s,%s,#0\n",regname[rt],regname[rs]);
2172 output_w32(0x42600000|rd_rn_rm(rt,rs,0));
2173}
2174
2175void emit_negsmi(int rs, int rt)
2176{
2177 assem_debug("rsbsmi %s,%s,#0\n",regname[rt],regname[rs]);
2178 output_w32(0x42700000|rd_rn_rm(rt,rs,0));
2179}
2180
2181void emit_orreq(u_int rs1,u_int rs2,u_int rt)
2182{
2183 assem_debug("orreq %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2184 output_w32(0x01800000|rd_rn_rm(rt,rs1,rs2));
2185}
2186
2187void emit_orrne(u_int rs1,u_int rs2,u_int rt)
2188{
2189 assem_debug("orrne %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2190 output_w32(0x11800000|rd_rn_rm(rt,rs1,rs2));
2191}
2192
2193void emit_bic_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2194{
2195 assem_debug("bic %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2196 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2197}
2198
2199void emit_biceq_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2200{
2201 assem_debug("biceq %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2202 output_w32(0x01C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2203}
2204
2205void emit_bicne_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2206{
2207 assem_debug("bicne %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2208 output_w32(0x11C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2209}
2210
2211void emit_bic_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2212{
2213 assem_debug("bic %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2214 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2215}
2216
2217void emit_biceq_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2218{
2219 assem_debug("biceq %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2220 output_w32(0x01C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2221}
2222
2223void emit_bicne_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2224{
2225 assem_debug("bicne %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2226 output_w32(0x11C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2227}
2228
2229void emit_teq(int rs, int rt)
2230{
2231 assem_debug("teq %s,%s\n",regname[rs],regname[rt]);
2232 output_w32(0xe1300000|rd_rn_rm(0,rs,rt));
2233}
2234
2235void emit_rsbimm(int rs, int imm, int rt)
2236{
2237 u_int armval;
cfbd3c6e 2238 genimm_checked(imm,&armval);
57871462 2239 assem_debug("rsb %s,%s,#%d\n",regname[rt],regname[rs],imm);
2240 output_w32(0xe2600000|rd_rn_rm(rt,rs,0)|armval);
2241}
2242
2243// Load 2 immediates optimizing for small code size
2244void emit_mov2imm_compact(int imm1,u_int rt1,int imm2,u_int rt2)
2245{
2246 emit_movimm(imm1,rt1);
2247 u_int armval;
2248 if(genimm(imm2-imm1,&armval)) {
2249 assem_debug("add %s,%s,#%d\n",regname[rt2],regname[rt1],imm2-imm1);
2250 output_w32(0xe2800000|rd_rn_rm(rt2,rt1,0)|armval);
2251 }else if(genimm(imm1-imm2,&armval)) {
2252 assem_debug("sub %s,%s,#%d\n",regname[rt2],regname[rt1],imm1-imm2);
2253 output_w32(0xe2400000|rd_rn_rm(rt2,rt1,0)|armval);
2254 }
2255 else emit_movimm(imm2,rt2);
2256}
2257
2258// Conditionally select one of two immediates, optimizing for small code size
2259// This will only be called if HAVE_CMOV_IMM is defined
2260void emit_cmov2imm_e_ne_compact(int imm1,int imm2,u_int rt)
2261{
2262 u_int armval;
2263 if(genimm(imm2-imm1,&armval)) {
2264 emit_movimm(imm1,rt);
2265 assem_debug("addne %s,%s,#%d\n",regname[rt],regname[rt],imm2-imm1);
2266 output_w32(0x12800000|rd_rn_rm(rt,rt,0)|armval);
2267 }else if(genimm(imm1-imm2,&armval)) {
2268 emit_movimm(imm1,rt);
2269 assem_debug("subne %s,%s,#%d\n",regname[rt],regname[rt],imm1-imm2);
2270 output_w32(0x12400000|rd_rn_rm(rt,rt,0)|armval);
2271 }
2272 else {
2273 #ifdef ARMv5_ONLY
2274 emit_movimm(imm1,rt);
2275 add_literal((int)out,imm2);
2276 assem_debug("ldrne %s,pc+? [=%x]\n",regname[rt],imm2);
2277 output_w32(0x15900000|rd_rn_rm(rt,15,0));
2278 #else
2279 emit_movw(imm1&0x0000FFFF,rt);
2280 if((imm1&0xFFFF)!=(imm2&0xFFFF)) {
2281 assem_debug("movwne %s,#%d (0x%x)\n",regname[rt],imm2&0xFFFF,imm2&0xFFFF);
2282 output_w32(0x13000000|rd_rn_rm(rt,0,0)|(imm2&0xfff)|((imm2<<4)&0xf0000));
2283 }
2284 emit_movt(imm1&0xFFFF0000,rt);
2285 if((imm1&0xFFFF0000)!=(imm2&0xFFFF0000)) {
2286 assem_debug("movtne %s,#%d (0x%x)\n",regname[rt],imm2&0xffff0000,imm2&0xffff0000);
2287 output_w32(0x13400000|rd_rn_rm(rt,0,0)|((imm2>>16)&0xfff)|((imm2>>12)&0xf0000));
2288 }
2289 #endif
2290 }
2291}
2292
2293// special case for checking invalid_code
2294void emit_cmpmem_indexedsr12_imm(int addr,int r,int imm)
2295{
2296 assert(0);
2297}
2298
2299// special case for checking invalid_code
2300void emit_cmpmem_indexedsr12_reg(int base,int r,int imm)
2301{
2302 assert(imm<128&&imm>=0);
2303 assert(r>=0&&r<16);
2304 assem_debug("ldrb lr,%s,%s lsr #12\n",regname[base],regname[r]);
2305 output_w32(0xe7d00000|rd_rn_rm(HOST_TEMPREG,base,r)|0x620);
2306 emit_cmpimm(HOST_TEMPREG,imm);
2307}
2308
2309// special case for tlb mapping
2310void emit_addsr12(int rs1,int rs2,int rt)
2311{
2312 assem_debug("add %s,%s,%s lsr #12\n",regname[rt],regname[rs1],regname[rs2]);
2313 output_w32(0xe0800620|rd_rn_rm(rt,rs1,rs2));
2314}
2315
0bbd1454 2316void emit_callne(int a)
2317{
2318 assem_debug("blne %x\n",a);
2319 u_int offset=genjmp(a);
2320 output_w32(0x1b000000|offset);
2321}
2322
57871462 2323// Used to preload hash table entries
2324void emit_prefetch(void *addr)
2325{
2326 assem_debug("prefetch %x\n",(int)addr);
2327 output_byte(0x0F);
2328 output_byte(0x18);
2329 output_modrm(0,5,1);
2330 output_w32((int)addr);
2331}
2332void emit_prefetchreg(int r)
2333{
2334 assem_debug("pld %s\n",regname[r]);
2335 output_w32(0xf5d0f000|rd_rn_rm(0,r,0));
2336}
2337
2338// Special case for mini_ht
2339void emit_ldreq_indexed(int rs, u_int offset, int rt)
2340{
2341 assert(offset<4096);
2342 assem_debug("ldreq %s,[%s, #%d]\n",regname[rt],regname[rs],offset);
2343 output_w32(0x05900000|rd_rn_rm(rt,rs,0)|offset);
2344}
2345
2346void emit_flds(int r,int sr)
2347{
2348 assem_debug("flds s%d,[%s]\n",sr,regname[r]);
2349 output_w32(0xed900a00|((sr&14)<<11)|((sr&1)<<22)|(r<<16));
2350}
2351
2352void emit_vldr(int r,int vr)
2353{
2354 assem_debug("vldr d%d,[%s]\n",vr,regname[r]);
2355 output_w32(0xed900b00|(vr<<12)|(r<<16));
2356}
2357
2358void emit_fsts(int sr,int r)
2359{
2360 assem_debug("fsts s%d,[%s]\n",sr,regname[r]);
2361 output_w32(0xed800a00|((sr&14)<<11)|((sr&1)<<22)|(r<<16));
2362}
2363
2364void emit_vstr(int vr,int r)
2365{
2366 assem_debug("vstr d%d,[%s]\n",vr,regname[r]);
2367 output_w32(0xed800b00|(vr<<12)|(r<<16));
2368}
2369
2370void emit_ftosizs(int s,int d)
2371{
2372 assem_debug("ftosizs s%d,s%d\n",d,s);
2373 output_w32(0xeebd0ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2374}
2375
2376void emit_ftosizd(int s,int d)
2377{
2378 assem_debug("ftosizd s%d,d%d\n",d,s);
2379 output_w32(0xeebd0bc0|((d&14)<<11)|((d&1)<<22)|(s&7));
2380}
2381
2382void emit_fsitos(int s,int d)
2383{
2384 assem_debug("fsitos s%d,s%d\n",d,s);
2385 output_w32(0xeeb80ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2386}
2387
2388void emit_fsitod(int s,int d)
2389{
2390 assem_debug("fsitod d%d,s%d\n",d,s);
2391 output_w32(0xeeb80bc0|((d&7)<<12)|((s&14)>>1)|((s&1)<<5));
2392}
2393
2394void emit_fcvtds(int s,int d)
2395{
2396 assem_debug("fcvtds d%d,s%d\n",d,s);
2397 output_w32(0xeeb70ac0|((d&7)<<12)|((s&14)>>1)|((s&1)<<5));
2398}
2399
2400void emit_fcvtsd(int s,int d)
2401{
2402 assem_debug("fcvtsd s%d,d%d\n",d,s);
2403 output_w32(0xeeb70bc0|((d&14)<<11)|((d&1)<<22)|(s&7));
2404}
2405
2406void emit_fsqrts(int s,int d)
2407{
2408 assem_debug("fsqrts d%d,s%d\n",d,s);
2409 output_w32(0xeeb10ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2410}
2411
2412void emit_fsqrtd(int s,int d)
2413{
2414 assem_debug("fsqrtd s%d,d%d\n",d,s);
2415 output_w32(0xeeb10bc0|((d&7)<<12)|(s&7));
2416}
2417
2418void emit_fabss(int s,int d)
2419{
2420 assem_debug("fabss d%d,s%d\n",d,s);
2421 output_w32(0xeeb00ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2422}
2423
2424void emit_fabsd(int s,int d)
2425{
2426 assem_debug("fabsd s%d,d%d\n",d,s);
2427 output_w32(0xeeb00bc0|((d&7)<<12)|(s&7));
2428}
2429
2430void emit_fnegs(int s,int d)
2431{
2432 assem_debug("fnegs d%d,s%d\n",d,s);
2433 output_w32(0xeeb10a40|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2434}
2435
2436void emit_fnegd(int s,int d)
2437{
2438 assem_debug("fnegd s%d,d%d\n",d,s);
2439 output_w32(0xeeb10b40|((d&7)<<12)|(s&7));
2440}
2441
2442void emit_fadds(int s1,int s2,int d)
2443{
2444 assem_debug("fadds s%d,s%d,s%d\n",d,s1,s2);
2445 output_w32(0xee300a00|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2446}
2447
2448void emit_faddd(int s1,int s2,int d)
2449{
2450 assem_debug("faddd d%d,d%d,d%d\n",d,s1,s2);
2451 output_w32(0xee300b00|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2452}
2453
2454void emit_fsubs(int s1,int s2,int d)
2455{
2456 assem_debug("fsubs s%d,s%d,s%d\n",d,s1,s2);
2457 output_w32(0xee300a40|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2458}
2459
2460void emit_fsubd(int s1,int s2,int d)
2461{
2462 assem_debug("fsubd d%d,d%d,d%d\n",d,s1,s2);
2463 output_w32(0xee300b40|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2464}
2465
2466void emit_fmuls(int s1,int s2,int d)
2467{
2468 assem_debug("fmuls s%d,s%d,s%d\n",d,s1,s2);
2469 output_w32(0xee200a00|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2470}
2471
2472void emit_fmuld(int s1,int s2,int d)
2473{
2474 assem_debug("fmuld d%d,d%d,d%d\n",d,s1,s2);
2475 output_w32(0xee200b00|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2476}
2477
2478void emit_fdivs(int s1,int s2,int d)
2479{
2480 assem_debug("fdivs s%d,s%d,s%d\n",d,s1,s2);
2481 output_w32(0xee800a00|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2482}
2483
2484void emit_fdivd(int s1,int s2,int d)
2485{
2486 assem_debug("fdivd d%d,d%d,d%d\n",d,s1,s2);
2487 output_w32(0xee800b00|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2488}
2489
2490void emit_fcmps(int x,int y)
2491{
2492 assem_debug("fcmps s14, s15\n");
2493 output_w32(0xeeb47a67);
2494}
2495
2496void emit_fcmpd(int x,int y)
2497{
2498 assem_debug("fcmpd d6, d7\n");
2499 output_w32(0xeeb46b47);
2500}
2501
2502void emit_fmstat()
2503{
2504 assem_debug("fmstat\n");
2505 output_w32(0xeef1fa10);
2506}
2507
2508void emit_bicne_imm(int rs,int imm,int rt)
2509{
2510 u_int armval;
cfbd3c6e 2511 genimm_checked(imm,&armval);
57871462 2512 assem_debug("bicne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2513 output_w32(0x13c00000|rd_rn_rm(rt,rs,0)|armval);
2514}
2515
2516void emit_biccs_imm(int rs,int imm,int rt)
2517{
2518 u_int armval;
cfbd3c6e 2519 genimm_checked(imm,&armval);
57871462 2520 assem_debug("biccs %s,%s,#%d\n",regname[rt],regname[rs],imm);
2521 output_w32(0x23c00000|rd_rn_rm(rt,rs,0)|armval);
2522}
2523
2524void emit_bicvc_imm(int rs,int imm,int rt)
2525{
2526 u_int armval;
cfbd3c6e 2527 genimm_checked(imm,&armval);
57871462 2528 assem_debug("bicvc %s,%s,#%d\n",regname[rt],regname[rs],imm);
2529 output_w32(0x73c00000|rd_rn_rm(rt,rs,0)|armval);
2530}
2531
2532void emit_bichi_imm(int rs,int imm,int rt)
2533{
2534 u_int armval;
cfbd3c6e 2535 genimm_checked(imm,&armval);
57871462 2536 assem_debug("bichi %s,%s,#%d\n",regname[rt],regname[rs],imm);
2537 output_w32(0x83c00000|rd_rn_rm(rt,rs,0)|armval);
2538}
2539
2540void emit_orrvs_imm(int rs,int imm,int rt)
2541{
2542 u_int armval;
cfbd3c6e 2543 genimm_checked(imm,&armval);
57871462 2544 assem_debug("orrvs %s,%s,#%d\n",regname[rt],regname[rs],imm);
2545 output_w32(0x63800000|rd_rn_rm(rt,rs,0)|armval);
2546}
2547
b9b61529 2548void emit_orrne_imm(int rs,int imm,int rt)
2549{
2550 u_int armval;
cfbd3c6e 2551 genimm_checked(imm,&armval);
b9b61529 2552 assem_debug("orrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2553 output_w32(0x13800000|rd_rn_rm(rt,rs,0)|armval);
2554}
2555
2556void emit_andne_imm(int rs,int imm,int rt)
2557{
2558 u_int armval;
cfbd3c6e 2559 genimm_checked(imm,&armval);
b9b61529 2560 assem_debug("andne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2561 output_w32(0x12000000|rd_rn_rm(rt,rs,0)|armval);
2562}
2563
57871462 2564void emit_jno_unlikely(int a)
2565{
2566 //emit_jno(a);
2567 assem_debug("addvc pc,pc,#? (%x)\n",/*a-(int)out-8,*/a);
2568 output_w32(0x72800000|rd_rn_rm(15,15,0));
2569}
2570
2571// Save registers before function call
2572void save_regs(u_int reglist)
2573{
2574 reglist&=0x100f; // only save the caller-save registers, r0-r3, r12
2575 if(!reglist) return;
2576 assem_debug("stmia fp,{");
2577 if(reglist&1) assem_debug("r0, ");
2578 if(reglist&2) assem_debug("r1, ");
2579 if(reglist&4) assem_debug("r2, ");
2580 if(reglist&8) assem_debug("r3, ");
2581 if(reglist&0x1000) assem_debug("r12");
2582 assem_debug("}\n");
2583 output_w32(0xe88b0000|reglist);
2584}
2585// Restore registers after function call
2586void restore_regs(u_int reglist)
2587{
2588 reglist&=0x100f; // only restore the caller-save registers, r0-r3, r12
2589 if(!reglist) return;
2590 assem_debug("ldmia fp,{");
2591 if(reglist&1) assem_debug("r0, ");
2592 if(reglist&2) assem_debug("r1, ");
2593 if(reglist&4) assem_debug("r2, ");
2594 if(reglist&8) assem_debug("r3, ");
2595 if(reglist&0x1000) assem_debug("r12");
2596 assem_debug("}\n");
2597 output_w32(0xe89b0000|reglist);
2598}
2599
2600// Write back consts using r14 so we don't disturb the other registers
2601void wb_consts(signed char i_regmap[],uint64_t i_is32,u_int i_dirty,int i)
2602{
2603 int hr;
2604 for(hr=0;hr<HOST_REGS;hr++) {
2605 if(hr!=EXCLUDE_REG&&i_regmap[hr]>=0&&((i_dirty>>hr)&1)) {
2606 if(((regs[i].isconst>>hr)&1)&&i_regmap[hr]>0) {
2607 if(i_regmap[hr]<64 || !((i_is32>>(i_regmap[hr]&63))&1) ) {
2608 int value=constmap[i][hr];
2609 if(value==0) {
2610 emit_zeroreg(HOST_TEMPREG);
2611 }
2612 else {
2613 emit_movimm(value,HOST_TEMPREG);
2614 }
2615 emit_storereg(i_regmap[hr],HOST_TEMPREG);
24385cae 2616#ifndef FORCE32
57871462 2617 if((i_is32>>i_regmap[hr])&1) {
2618 if(value!=-1&&value!=0) emit_sarimm(HOST_TEMPREG,31,HOST_TEMPREG);
2619 emit_storereg(i_regmap[hr]|64,HOST_TEMPREG);
2620 }
24385cae 2621#endif
57871462 2622 }
2623 }
2624 }
2625 }
2626}
2627
2628/* Stubs/epilogue */
2629
2630void literal_pool(int n)
2631{
2632 if(!literalcount) return;
2633 if(n) {
2634 if((int)out-literals[0][0]<4096-n) return;
2635 }
2636 u_int *ptr;
2637 int i;
2638 for(i=0;i<literalcount;i++)
2639 {
77750690 2640 u_int l_addr=(u_int)out;
2641 int j;
2642 for(j=0;j<i;j++) {
2643 if(literals[j][1]==literals[i][1]) {
2644 //printf("dup %08x\n",literals[i][1]);
2645 l_addr=literals[j][0];
2646 break;
2647 }
2648 }
57871462 2649 ptr=(u_int *)literals[i][0];
77750690 2650 u_int offset=l_addr-(u_int)ptr-8;
57871462 2651 assert(offset<4096);
2652 assert(!(offset&3));
2653 *ptr|=offset;
77750690 2654 if(l_addr==(u_int)out) {
2655 literals[i][0]=l_addr; // remember for dupes
2656 output_w32(literals[i][1]);
2657 }
57871462 2658 }
2659 literalcount=0;
2660}
2661
2662void literal_pool_jumpover(int n)
2663{
2664 if(!literalcount) return;
2665 if(n) {
2666 if((int)out-literals[0][0]<4096-n) return;
2667 }
2668 int jaddr=(int)out;
2669 emit_jmp(0);
2670 literal_pool(0);
2671 set_jump_target(jaddr,(int)out);
2672}
2673
2674emit_extjump2(int addr, int target, int linker)
2675{
2676 u_char *ptr=(u_char *)addr;
2677 assert((ptr[3]&0x0e)==0xa);
2678 emit_loadlp(target,0);
2679 emit_loadlp(addr,1);
24385cae 2680 assert(addr>=BASE_ADDR&&addr<(BASE_ADDR+(1<<TARGET_SIZE_2)));
57871462 2681 //assert((target>=0x80000000&&target<0x80800000)||(target>0xA4000000&&target<0xA4001000));
2682//DEBUG >
2683#ifdef DEBUG_CYCLE_COUNT
2684 emit_readword((int)&last_count,ECX);
2685 emit_add(HOST_CCREG,ECX,HOST_CCREG);
2686 emit_readword((int)&next_interupt,ECX);
2687 emit_writeword(HOST_CCREG,(int)&Count);
2688 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
2689 emit_writeword(ECX,(int)&last_count);
2690#endif
2691//DEBUG <
2692 emit_jmp(linker);
2693}
2694
2695emit_extjump(int addr, int target)
2696{
2697 emit_extjump2(addr, target, (int)dyna_linker);
2698}
2699emit_extjump_ds(int addr, int target)
2700{
2701 emit_extjump2(addr, target, (int)dyna_linker_ds);
2702}
2703
13e35c04 2704// put rt_val into rt, potentially making use of rs with value rs_val
2705static void emit_movimm_from(u_int rs_val,int rs,u_int rt_val,int rt)
2706{
2707 u_int xor=rs_val^rt_val;
2708 u_int xs;
2709 for(xs=xor;xs!=0&&(xs&3)==0;xs>>=2)
2710 ;
2711 if(xs<0x100)
2712 emit_xorimm(rs,xor,rt);
2713 else
2714 emit_movimm(rt_val,rt);
2715}
cbbab9cd 2716
b96d3df7 2717// trashes r2
2718static void pass_args(int a0, int a1)
2719{
2720 if(a0==1&&a1==0) {
2721 // must swap
2722 emit_mov(a0,2); emit_mov(a1,1); emit_mov(2,0);
2723 }
2724 else if(a0!=0&&a1==0) {
2725 emit_mov(a1,1);
2726 if (a0>=0) emit_mov(a0,0);
2727 }
2728 else {
2729 if(a0>=0&&a0!=0) emit_mov(a0,0);
2730 if(a1>=0&&a1!=1) emit_mov(a1,1);
2731 }
2732}
2733
b1be1eee 2734static void mov_loadtype_adj(int type,int rs,int rt)
2735{
2736 switch(type) {
2737 case LOADB_STUB: emit_signextend8(rs,rt); break;
2738 case LOADBU_STUB: emit_andimm(rs,0xff,rt); break;
2739 case LOADH_STUB: emit_signextend16(rs,rt); break;
2740 case LOADHU_STUB: emit_andimm(rs,0xffff,rt); break;
2741 case LOADW_STUB: if(rs!=rt) emit_mov(rs,rt); break;
2742 default: assert(0);
2743 }
2744}
2745
2746#ifdef PCSX
2747#include "pcsxmem.h"
2748#include "pcsxmem_inline.c"
2749#endif
2750
57871462 2751do_readstub(int n)
2752{
2753 assem_debug("do_readstub %x\n",start+stubs[n][3]*4);
2754 literal_pool(256);
2755 set_jump_target(stubs[n][1],(int)out);
2756 int type=stubs[n][0];
2757 int i=stubs[n][3];
2758 int rs=stubs[n][4];
2759 struct regstat *i_regs=(struct regstat *)stubs[n][5];
2760 u_int reglist=stubs[n][7];
2761 signed char *i_regmap=i_regs->regmap;
2762 int addr=get_reg(i_regmap,AGEN1+(i&1));
2763 int rth,rt;
2764 int ds;
b9b61529 2765 if(itype[i]==C1LS||itype[i]==C2LS||itype[i]==LOADLR) {
57871462 2766 rth=get_reg(i_regmap,FTEMP|64);
2767 rt=get_reg(i_regmap,FTEMP);
2768 }else{
2769 rth=get_reg(i_regmap,rt1[i]|64);
2770 rt=get_reg(i_regmap,rt1[i]);
2771 }
2772 assert(rs>=0);
c6c3b1b3 2773#ifdef PCSX
2774 int r,temp=-1,temp2=HOST_TEMPREG,regs_saved=0,restore_jump=0;
2775 reglist|=(1<<rs);
2776 for(r=0;r<=12;r++) {
2777 if(((1<<r)&0x13ff)&&((1<<r)&reglist)==0) {
2778 temp=r; break;
2779 }
2780 }
2781 if(rt>=0)
2782 reglist&=~(1<<rt);
2783 if(temp==-1) {
2784 save_regs(reglist);
2785 regs_saved=1;
2786 temp=(rs==0)?2:0;
2787 }
2788 if((regs_saved||(reglist&2)==0)&&temp!=1&&rs!=1)
2789 temp2=1;
2790 emit_readword((int)&mem_rtab,temp);
2791 emit_shrimm(rs,12,temp2);
2792 emit_readword_dualindexedx4(temp,temp2,temp2);
2793 emit_lsls_imm(temp2,1,temp2);
2794 if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) {
2795 switch(type) {
2796 case LOADB_STUB: emit_ldrccsb_dualindexed(temp2,rs,rt); break;
2797 case LOADBU_STUB: emit_ldrccb_dualindexed(temp2,rs,rt); break;
2798 case LOADH_STUB: emit_ldrccsh_dualindexed(temp2,rs,rt); break;
2799 case LOADHU_STUB: emit_ldrcch_dualindexed(temp2,rs,rt); break;
2800 case LOADW_STUB: emit_ldrcc_dualindexed(temp2,rs,rt); break;
2801 }
2802 }
2803 if(regs_saved) {
2804 restore_jump=(int)out;
2805 emit_jcc(0); // jump to reg restore
2806 }
2807 else
2808 emit_jcc(stubs[n][2]); // return address
2809
2810 if(!regs_saved)
2811 save_regs(reglist);
2812 int handler=0;
2813 if(type==LOADB_STUB||type==LOADBU_STUB)
2814 handler=(int)jump_handler_read8;
2815 if(type==LOADH_STUB||type==LOADHU_STUB)
2816 handler=(int)jump_handler_read16;
2817 if(type==LOADW_STUB)
2818 handler=(int)jump_handler_read32;
2819 assert(handler!=0);
b96d3df7 2820 pass_args(rs,temp2);
c6c3b1b3 2821 int cc=get_reg(i_regmap,CCREG);
2822 if(cc<0)
2823 emit_loadreg(CCREG,2);
2573466a 2824 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n][6]+1),2);
c6c3b1b3 2825 emit_call(handler);
2826 if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) {
b1be1eee 2827 mov_loadtype_adj(type,0,rt);
c6c3b1b3 2828 }
2829 if(restore_jump)
2830 set_jump_target(restore_jump,(int)out);
2831 restore_regs(reglist);
2832 emit_jmp(stubs[n][2]); // return address
2833#else // !PCSX
57871462 2834 if(addr<0) addr=rt;
535d208a 2835 if(addr<0&&itype[i]!=C1LS&&itype[i]!=C2LS&&itype[i]!=LOADLR) addr=get_reg(i_regmap,-1);
57871462 2836 assert(addr>=0);
2837 int ftable=0;
2838 if(type==LOADB_STUB||type==LOADBU_STUB)
2839 ftable=(int)readmemb;
2840 if(type==LOADH_STUB||type==LOADHU_STUB)
2841 ftable=(int)readmemh;
2842 if(type==LOADW_STUB)
2843 ftable=(int)readmem;
24385cae 2844#ifndef FORCE32
57871462 2845 if(type==LOADD_STUB)
2846 ftable=(int)readmemd;
24385cae 2847#endif
2848 assert(ftable!=0);
57871462 2849 emit_writeword(rs,(int)&address);
2850 //emit_pusha();
2851 save_regs(reglist);
97a238a6 2852#ifndef PCSX
57871462 2853 ds=i_regs!=&regs[i];
2854 int real_rs=(itype[i]==LOADLR)?-1:get_reg(i_regmap,rs1[i]);
2855 u_int cmask=ds?-1:(0x100f|~i_regs->wasconst);
2856 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&0x100f,i);
2857 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty&cmask&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs)));
2858 if(!ds) wb_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&~0x100f,i);
97a238a6 2859#endif
57871462 2860 emit_shrimm(rs,16,1);
2861 int cc=get_reg(i_regmap,CCREG);
2862 if(cc<0) {
2863 emit_loadreg(CCREG,2);
2864 }
2865 emit_movimm(ftable,0);
2866 emit_addimm(cc<0?2:cc,2*stubs[n][6]+2,2);
f51dc36c 2867#ifndef PCSX
57871462 2868 emit_movimm(start+stubs[n][3]*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
f51dc36c 2869#endif
57871462 2870 //emit_readword((int)&last_count,temp);
2871 //emit_add(cc,temp,cc);
2872 //emit_writeword(cc,(int)&Count);
2873 //emit_mov(15,14);
2874 emit_call((int)&indirect_jump_indexed);
2875 //emit_callreg(rs);
2876 //emit_readword_dualindexedx4(rs,HOST_TEMPREG,15);
f51dc36c 2877#ifndef PCSX
57871462 2878 // We really shouldn't need to update the count here,
2879 // but not doing so causes random crashes...
2880 emit_readword((int)&Count,HOST_TEMPREG);
2881 emit_readword((int)&next_interupt,2);
2882 emit_addimm(HOST_TEMPREG,-2*stubs[n][6]-2,HOST_TEMPREG);
2883 emit_writeword(2,(int)&last_count);
2884 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2885 if(cc<0) {
2886 emit_storereg(CCREG,HOST_TEMPREG);
2887 }
f51dc36c 2888#endif
57871462 2889 //emit_popa();
2890 restore_regs(reglist);
2891 //if((cc=get_reg(regmap,CCREG))>=0) {
2892 // emit_loadreg(CCREG,cc);
2893 //}
f18c0f46 2894 if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) {
2895 assert(rt>=0);
2896 if(type==LOADB_STUB)
2897 emit_movsbl((int)&readmem_dword,rt);
2898 if(type==LOADBU_STUB)
2899 emit_movzbl((int)&readmem_dword,rt);
2900 if(type==LOADH_STUB)
2901 emit_movswl((int)&readmem_dword,rt);
2902 if(type==LOADHU_STUB)
2903 emit_movzwl((int)&readmem_dword,rt);
2904 if(type==LOADW_STUB)
2905 emit_readword((int)&readmem_dword,rt);
2906 if(type==LOADD_STUB) {
2907 emit_readword((int)&readmem_dword,rt);
2908 if(rth>=0) emit_readword(((int)&readmem_dword)+4,rth);
2909 }
57871462 2910 }
2911 emit_jmp(stubs[n][2]); // return address
c6c3b1b3 2912#endif // !PCSX
57871462 2913}
2914
c6c3b1b3 2915#ifdef PCSX
2916// return memhandler, or get directly accessable address and return 0
2917u_int get_direct_memhandler(void *table,u_int addr,int type,u_int *addr_host)
2918{
2919 u_int l1,l2=0;
2920 l1=((u_int *)table)[addr>>12];
2921 if((l1&(1<<31))==0) {
2922 u_int v=l1<<1;
2923 *addr_host=v+addr;
2924 return 0;
2925 }
2926 else {
2927 l1<<=1;
2928 if(type==LOADB_STUB||type==LOADBU_STUB||type==STOREB_STUB)
2929 l2=((u_int *)l1)[0x1000/4 + 0x1000/2 + (addr&0xfff)];
b96d3df7 2930 else if(type==LOADH_STUB||type==LOADHU_STUB||type==STOREH_STUB)
c6c3b1b3 2931 l2=((u_int *)l1)[0x1000/4 + (addr&0xfff)/2];
2932 else
2933 l2=((u_int *)l1)[(addr&0xfff)/4];
2934 if((l2&(1<<31))==0) {
2935 u_int v=l2<<1;
2936 *addr_host=v+(addr&0xfff);
2937 return 0;
2938 }
2939 return l2<<1;
2940 }
2941}
2942#endif
2943
57871462 2944inline_readstub(int type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
2945{
2946 int rs=get_reg(regmap,target);
2947 int rth=get_reg(regmap,target|64);
2948 int rt=get_reg(regmap,target);
535d208a 2949 if(rs<0) rs=get_reg(regmap,-1);
57871462 2950 assert(rs>=0);
c6c3b1b3 2951#ifdef PCSX
b1be1eee 2952 u_int handler,host_addr=0,is_dynamic,far_call=0;
2953 int cc=get_reg(regmap,CCREG);
2954 if(pcsx_direct_read(type,addr,CLOCK_ADJUST(adj+1),cc,target?rs:-1,rt))
2955 return;
c6c3b1b3 2956 handler=get_direct_memhandler(mem_rtab,addr,type,&host_addr);
2957 if (handler==0) {
2958 if(rt<0)
2959 return;
13e35c04 2960 if(addr!=host_addr)
2961 emit_movimm_from(addr,rs,host_addr,rs);
c6c3b1b3 2962 switch(type) {
2963 case LOADB_STUB: emit_movsbl_indexed(0,rs,rt); break;
2964 case LOADBU_STUB: emit_movzbl_indexed(0,rs,rt); break;
2965 case LOADH_STUB: emit_movswl_indexed(0,rs,rt); break;
2966 case LOADHU_STUB: emit_movzwl_indexed(0,rs,rt); break;
2967 case LOADW_STUB: emit_readword_indexed(0,rs,rt); break;
2968 default: assert(0);
2969 }
2970 return;
2971 }
b1be1eee 2972 is_dynamic=pcsxmem_is_handler_dynamic(addr);
2973 if(is_dynamic) {
2974 if(type==LOADB_STUB||type==LOADBU_STUB)
2975 handler=(int)jump_handler_read8;
2976 if(type==LOADH_STUB||type==LOADHU_STUB)
2977 handler=(int)jump_handler_read16;
2978 if(type==LOADW_STUB)
2979 handler=(int)jump_handler_read32;
2980 }
c6c3b1b3 2981
2982 // call a memhandler
2983 if(rt>=0)
2984 reglist&=~(1<<rt);
2985 save_regs(reglist);
2986 if(target==0)
2987 emit_movimm(addr,0);
2988 else if(rs!=0)
2989 emit_mov(rs,0);
c6c3b1b3 2990 int offset=(int)handler-(int)out-8;
2991 if(offset<-33554432||offset>=33554432) {
2992 // unreachable memhandler, a plugin func perhaps
b1be1eee 2993 emit_movimm(handler,12);
2994 far_call=1;
2995 }
2996 if(cc<0)
2997 emit_loadreg(CCREG,2);
2998 if(is_dynamic) {
2999 emit_movimm(((u_int *)mem_rtab)[addr>>12]<<1,1);
3000 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2);
c6c3b1b3 3001 }
b1be1eee 3002 else {
3003 emit_readword((int)&last_count,3);
3004 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2);
3005 emit_add(2,3,2);
3006 emit_writeword(2,(int)&Count);
3007 }
3008
3009 if(far_call)
3010 emit_callreg(12);
c6c3b1b3 3011 else
3012 emit_call(handler);
b1be1eee 3013
c6c3b1b3 3014 if(rt>=0) {
3015 switch(type) {
3016 case LOADB_STUB: emit_signextend8(0,rt); break;
3017 case LOADBU_STUB: emit_andimm(0,0xff,rt); break;
3018 case LOADH_STUB: emit_signextend16(0,rt); break;
3019 case LOADHU_STUB: emit_andimm(0,0xffff,rt); break;
3020 case LOADW_STUB: if(rt!=0) emit_mov(0,rt); break;
3021 default: assert(0);
3022 }
3023 }
3024 restore_regs(reglist);
3025#else // if !PCSX
57871462 3026 int ftable=0;
3027 if(type==LOADB_STUB||type==LOADBU_STUB)
3028 ftable=(int)readmemb;
3029 if(type==LOADH_STUB||type==LOADHU_STUB)
3030 ftable=(int)readmemh;
3031 if(type==LOADW_STUB)
3032 ftable=(int)readmem;
24385cae 3033#ifndef FORCE32
57871462 3034 if(type==LOADD_STUB)
3035 ftable=(int)readmemd;
24385cae 3036#endif
3037 assert(ftable!=0);
fd99c415 3038 if(target==0)
3039 emit_movimm(addr,rs);
57871462 3040 emit_writeword(rs,(int)&address);
3041 //emit_pusha();
3042 save_regs(reglist);
0c1fe38b 3043#ifndef PCSX
3044 if((signed int)addr>=(signed int)0xC0000000) {
3045 // Theoretically we can have a pagefault here, if the TLB has never
3046 // been enabled and the address is outside the range 80000000..BFFFFFFF
3047 // Write out the registers so the pagefault can be handled. This is
3048 // a very rare case and likely represents a bug.
3049 int ds=regmap!=regs[i].regmap;
3050 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty,i);
3051 if(!ds) wb_dirtys(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty);
3052 else wb_dirtys(branch_regs[i-1].regmap_entry,branch_regs[i-1].was32,branch_regs[i-1].wasdirty);
3053 }
3054#endif
57871462 3055 //emit_shrimm(rs,16,1);
3056 int cc=get_reg(regmap,CCREG);
3057 if(cc<0) {
3058 emit_loadreg(CCREG,2);
3059 }
3060 //emit_movimm(ftable,0);
3061 emit_movimm(((u_int *)ftable)[addr>>16],0);
3062 //emit_readword((int)&last_count,12);
2573466a 3063 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2);
f51dc36c 3064#ifndef PCSX
57871462 3065 if((signed int)addr>=(signed int)0xC0000000) {
3066 // Pagefault address
3067 int ds=regmap!=regs[i].regmap;
3068 emit_movimm(start+i*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
3069 }
f51dc36c 3070#endif
57871462 3071 //emit_add(12,2,2);
3072 //emit_writeword(2,(int)&Count);
3073 //emit_call(((u_int *)ftable)[addr>>16]);
3074 emit_call((int)&indirect_jump);
f51dc36c 3075#ifndef PCSX
57871462 3076 // We really shouldn't need to update the count here,
3077 // but not doing so causes random crashes...
3078 emit_readword((int)&Count,HOST_TEMPREG);
3079 emit_readword((int)&next_interupt,2);
2573466a 3080 emit_addimm(HOST_TEMPREG,-CLOCK_ADJUST(adj+1),HOST_TEMPREG);
57871462 3081 emit_writeword(2,(int)&last_count);
3082 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
3083 if(cc<0) {
3084 emit_storereg(CCREG,HOST_TEMPREG);
3085 }
f51dc36c 3086#endif
57871462 3087 //emit_popa();
3088 restore_regs(reglist);
fd99c415 3089 if(rt>=0) {
3090 if(type==LOADB_STUB)
3091 emit_movsbl((int)&readmem_dword,rt);
3092 if(type==LOADBU_STUB)
3093 emit_movzbl((int)&readmem_dword,rt);
3094 if(type==LOADH_STUB)
3095 emit_movswl((int)&readmem_dword,rt);
3096 if(type==LOADHU_STUB)
3097 emit_movzwl((int)&readmem_dword,rt);
3098 if(type==LOADW_STUB)
3099 emit_readword((int)&readmem_dword,rt);
3100 if(type==LOADD_STUB) {
3101 emit_readword((int)&readmem_dword,rt);
3102 if(rth>=0) emit_readword(((int)&readmem_dword)+4,rth);
3103 }
57871462 3104 }
c6c3b1b3 3105#endif // !PCSX
57871462 3106}
3107
3108do_writestub(int n)
3109{
3110 assem_debug("do_writestub %x\n",start+stubs[n][3]*4);
3111 literal_pool(256);
3112 set_jump_target(stubs[n][1],(int)out);
3113 int type=stubs[n][0];
3114 int i=stubs[n][3];
3115 int rs=stubs[n][4];
3116 struct regstat *i_regs=(struct regstat *)stubs[n][5];
3117 u_int reglist=stubs[n][7];
3118 signed char *i_regmap=i_regs->regmap;
3119 int addr=get_reg(i_regmap,AGEN1+(i&1));
3120 int rth,rt,r;
3121 int ds;
b9b61529 3122 if(itype[i]==C1LS||itype[i]==C2LS) {
57871462 3123 rth=get_reg(i_regmap,FTEMP|64);
3124 rt=get_reg(i_regmap,r=FTEMP);
3125 }else{
3126 rth=get_reg(i_regmap,rs2[i]|64);
3127 rt=get_reg(i_regmap,r=rs2[i]);
3128 }
3129 assert(rs>=0);
3130 assert(rt>=0);
b96d3df7 3131#ifdef PCSX
3132 int rtmp,temp=-1,temp2=HOST_TEMPREG,regs_saved=0,restore_jump=0,ra;
3133 int reglist2=reglist|(1<<rs)|(1<<rt);
3134 for(rtmp=0;rtmp<=12;rtmp++) {
3135 if(((1<<rtmp)&0x13ff)&&((1<<rtmp)&reglist2)==0) {
3136 temp=rtmp; break;
3137 }
3138 }
3139 if(temp==-1) {
3140 save_regs(reglist);
3141 regs_saved=1;
3142 for(rtmp=0;rtmp<=3;rtmp++)
3143 if(rtmp!=rs&&rtmp!=rt)
3144 {temp=rtmp;break;}
3145 }
3146 if((regs_saved||(reglist2&8)==0)&&temp!=3&&rs!=3&&rt!=3)
3147 temp2=3;
3148 emit_readword((int)&mem_wtab,temp);
3149 emit_shrimm(rs,12,temp2);
3150 emit_readword_dualindexedx4(temp,temp2,temp2);
3151 emit_lsls_imm(temp2,1,temp2);
3152 switch(type) {
3153 case STOREB_STUB: emit_strccb_dualindexed(temp2,rs,rt); break;
3154 case STOREH_STUB: emit_strcch_dualindexed(temp2,rs,rt); break;
3155 case STOREW_STUB: emit_strcc_dualindexed(temp2,rs,rt); break;
3156 default: assert(0);
3157 }
3158 if(regs_saved) {
3159 restore_jump=(int)out;
3160 emit_jcc(0); // jump to reg restore
3161 }
3162 else
3163 emit_jcc(stubs[n][2]); // return address (invcode check)
3164
3165 if(!regs_saved)
3166 save_regs(reglist);
3167 int handler=0;
3168 switch(type) {
3169 case STOREB_STUB: handler=(int)jump_handler_write8; break;
3170 case STOREH_STUB: handler=(int)jump_handler_write16; break;
3171 case STOREW_STUB: handler=(int)jump_handler_write32; break;
3172 }
3173 assert(handler!=0);
3174 pass_args(rs,rt);
3175 if(temp2!=3)
3176 emit_mov(temp2,3);
3177 int cc=get_reg(i_regmap,CCREG);
3178 if(cc<0)
3179 emit_loadreg(CCREG,2);
2573466a 3180 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n][6]+1),2);
b96d3df7 3181 // returns new cycle_count
3182 emit_call(handler);
2573466a 3183 emit_addimm(0,-CLOCK_ADJUST((int)stubs[n][6]+1),cc<0?2:cc);
b96d3df7 3184 if(cc<0)
3185 emit_storereg(CCREG,2);
3186 if(restore_jump)
3187 set_jump_target(restore_jump,(int)out);
3188 restore_regs(reglist);
3189 ra=stubs[n][2];
3190 if(!restore_jump) ra+=4*3; // skip invcode check
3191 emit_jmp(ra);
3192#else // if !PCSX
57871462 3193 if(addr<0) addr=get_reg(i_regmap,-1);
3194 assert(addr>=0);
3195 int ftable=0;
3196 if(type==STOREB_STUB)
3197 ftable=(int)writememb;
3198 if(type==STOREH_STUB)
3199 ftable=(int)writememh;
3200 if(type==STOREW_STUB)
3201 ftable=(int)writemem;
24385cae 3202#ifndef FORCE32
57871462 3203 if(type==STORED_STUB)
3204 ftable=(int)writememd;
24385cae 3205#endif
3206 assert(ftable!=0);
57871462 3207 emit_writeword(rs,(int)&address);
3208 //emit_shrimm(rs,16,rs);
3209 //emit_movmem_indexedx4(ftable,rs,rs);
3210 if(type==STOREB_STUB)
3211 emit_writebyte(rt,(int)&byte);
3212 if(type==STOREH_STUB)
3213 emit_writehword(rt,(int)&hword);
3214 if(type==STOREW_STUB)
3215 emit_writeword(rt,(int)&word);
3216 if(type==STORED_STUB) {
3d624f89 3217#ifndef FORCE32
57871462 3218 emit_writeword(rt,(int)&dword);
3219 emit_writeword(r?rth:rt,(int)&dword+4);
3d624f89 3220#else
3221 printf("STORED_STUB\n");
3222#endif
57871462 3223 }
3224 //emit_pusha();
3225 save_regs(reglist);
97a238a6 3226#ifndef PCSX
57871462 3227 ds=i_regs!=&regs[i];
3228 int real_rs=get_reg(i_regmap,rs1[i]);
3229 u_int cmask=ds?-1:(0x100f|~i_regs->wasconst);
3230 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&0x100f,i);
3231 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty&cmask&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs)));
3232 if(!ds) wb_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&~0x100f,i);
97a238a6 3233#endif
57871462 3234 emit_shrimm(rs,16,1);
3235 int cc=get_reg(i_regmap,CCREG);
3236 if(cc<0) {
3237 emit_loadreg(CCREG,2);
3238 }
3239 emit_movimm(ftable,0);
3240 emit_addimm(cc<0?2:cc,2*stubs[n][6]+2,2);
f51dc36c 3241#ifndef PCSX
57871462 3242 emit_movimm(start+stubs[n][3]*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
f51dc36c 3243#endif
57871462 3244 //emit_readword((int)&last_count,temp);
3245 //emit_addimm(cc,2*stubs[n][5]+2,cc);
3246 //emit_add(cc,temp,cc);
3247 //emit_writeword(cc,(int)&Count);
3248 emit_call((int)&indirect_jump_indexed);
3249 //emit_callreg(rs);
3250 emit_readword((int)&Count,HOST_TEMPREG);
3251 emit_readword((int)&next_interupt,2);
3252 emit_addimm(HOST_TEMPREG,-2*stubs[n][6]-2,HOST_TEMPREG);
3253 emit_writeword(2,(int)&last_count);
3254 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
3255 if(cc<0) {
3256 emit_storereg(CCREG,HOST_TEMPREG);
3257 }
3258 //emit_popa();
3259 restore_regs(reglist);
3260 //if((cc=get_reg(regmap,CCREG))>=0) {
3261 // emit_loadreg(CCREG,cc);
3262 //}
3263 emit_jmp(stubs[n][2]); // return address
b96d3df7 3264#endif // !PCSX
57871462 3265}
3266
3267inline_writestub(int type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
3268{
3269 int rs=get_reg(regmap,-1);
3270 int rth=get_reg(regmap,target|64);
3271 int rt=get_reg(regmap,target);
3272 assert(rs>=0);
3273 assert(rt>=0);
cbbab9cd 3274#ifdef PCSX
b96d3df7 3275 u_int handler,host_addr=0;
b96d3df7 3276 handler=get_direct_memhandler(mem_wtab,addr,type,&host_addr);
3277 if (handler==0) {
13e35c04 3278 if(addr!=host_addr)
3279 emit_movimm_from(addr,rs,host_addr,rs);
b96d3df7 3280 switch(type) {
3281 case STOREB_STUB: emit_writebyte_indexed(rt,0,rs); break;
3282 case STOREH_STUB: emit_writehword_indexed(rt,0,rs); break;
3283 case STOREW_STUB: emit_writeword_indexed(rt,0,rs); break;
3284 default: assert(0);
3285 }
3286 return;
3287 }
3288
3289 // call a memhandler
3290 save_regs(reglist);
13e35c04 3291 pass_args(rs,rt);
b96d3df7 3292 int cc=get_reg(regmap,CCREG);
3293 if(cc<0)
3294 emit_loadreg(CCREG,2);
2573466a 3295 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2);
b96d3df7 3296 emit_movimm(handler,3);
3297 // returns new cycle_count
3298 emit_call((int)jump_handler_write_h);
2573466a 3299 emit_addimm(0,-CLOCK_ADJUST(adj+1),cc<0?2:cc);
b96d3df7 3300 if(cc<0)
3301 emit_storereg(CCREG,2);
3302 restore_regs(reglist);
3303#else // if !pcsx
57871462 3304 int ftable=0;
3305 if(type==STOREB_STUB)
3306 ftable=(int)writememb;
3307 if(type==STOREH_STUB)
3308 ftable=(int)writememh;
3309 if(type==STOREW_STUB)
3310 ftable=(int)writemem;
24385cae 3311#ifndef FORCE32
57871462 3312 if(type==STORED_STUB)
3313 ftable=(int)writememd;
24385cae 3314#endif
3315 assert(ftable!=0);
57871462 3316 emit_writeword(rs,(int)&address);
3317 //emit_shrimm(rs,16,rs);
3318 //emit_movmem_indexedx4(ftable,rs,rs);
3319 if(type==STOREB_STUB)
3320 emit_writebyte(rt,(int)&byte);
3321 if(type==STOREH_STUB)
3322 emit_writehword(rt,(int)&hword);
3323 if(type==STOREW_STUB)
3324 emit_writeword(rt,(int)&word);
3325 if(type==STORED_STUB) {
3d624f89 3326#ifndef FORCE32
57871462 3327 emit_writeword(rt,(int)&dword);
3328 emit_writeword(target?rth:rt,(int)&dword+4);
3d624f89 3329#else
3330 printf("STORED_STUB\n");
3331#endif
57871462 3332 }
3333 //emit_pusha();
3334 save_regs(reglist);
0c1fe38b 3335#ifndef PCSX
3336 // rearmed note: load_all_consts prevents BIOS boot, some bug?
3337 if((signed int)addr>=(signed int)0xC0000000) {
3338 // Theoretically we can have a pagefault here, if the TLB has never
3339 // been enabled and the address is outside the range 80000000..BFFFFFFF
3340 // Write out the registers so the pagefault can be handled. This is
3341 // a very rare case and likely represents a bug.
3342 int ds=regmap!=regs[i].regmap;
3343 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty,i);
3344 if(!ds) wb_dirtys(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty);
3345 else wb_dirtys(branch_regs[i-1].regmap_entry,branch_regs[i-1].was32,branch_regs[i-1].wasdirty);
3346 }
3347#endif
57871462 3348 //emit_shrimm(rs,16,1);
3349 int cc=get_reg(regmap,CCREG);
3350 if(cc<0) {
3351 emit_loadreg(CCREG,2);
3352 }
3353 //emit_movimm(ftable,0);
3354 emit_movimm(((u_int *)ftable)[addr>>16],0);
3355 //emit_readword((int)&last_count,12);
2573466a 3356 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2);
f51dc36c 3357#ifndef PCSX
57871462 3358 if((signed int)addr>=(signed int)0xC0000000) {
3359 // Pagefault address
3360 int ds=regmap!=regs[i].regmap;
3361 emit_movimm(start+i*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
3362 }
f51dc36c 3363#endif
57871462 3364 //emit_add(12,2,2);
3365 //emit_writeword(2,(int)&Count);
3366 //emit_call(((u_int *)ftable)[addr>>16]);
3367 emit_call((int)&indirect_jump);
3368 emit_readword((int)&Count,HOST_TEMPREG);
3369 emit_readword((int)&next_interupt,2);
2573466a 3370 emit_addimm(HOST_TEMPREG,-CLOCK_ADJUST(adj+1),HOST_TEMPREG);
57871462 3371 emit_writeword(2,(int)&last_count);
3372 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
3373 if(cc<0) {
3374 emit_storereg(CCREG,HOST_TEMPREG);
3375 }
3376 //emit_popa();
3377 restore_regs(reglist);
b96d3df7 3378#endif
57871462 3379}
3380
3381do_unalignedwritestub(int n)
3382{
b7918751 3383 assem_debug("do_unalignedwritestub %x\n",start+stubs[n][3]*4);
3384 literal_pool(256);
57871462 3385 set_jump_target(stubs[n][1],(int)out);
b7918751 3386
3387 int i=stubs[n][3];
3388 struct regstat *i_regs=(struct regstat *)stubs[n][4];
3389 int addr=stubs[n][5];
3390 u_int reglist=stubs[n][7];
3391 signed char *i_regmap=i_regs->regmap;
3392 int temp2=get_reg(i_regmap,FTEMP);
3393 int rt;
3394 int ds, real_rs;
3395 rt=get_reg(i_regmap,rs2[i]);
3396 assert(rt>=0);
3397 assert(addr>=0);
3398 assert(opcode[i]==0x2a||opcode[i]==0x2e); // SWL/SWR only implemented
3399 reglist|=(1<<addr);
3400 reglist&=~(1<<temp2);
3401
b96d3df7 3402#if 1
3403 // don't bother with it and call write handler
3404 save_regs(reglist);
3405 pass_args(addr,rt);
3406 int cc=get_reg(i_regmap,CCREG);
3407 if(cc<0)
3408 emit_loadreg(CCREG,2);
2573466a 3409 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n][6]+1),2);
b96d3df7 3410 emit_call((int)(opcode[i]==0x2a?jump_handle_swl:jump_handle_swr));
2573466a 3411 emit_addimm(0,-CLOCK_ADJUST((int)stubs[n][6]+1),cc<0?2:cc);
b96d3df7 3412 if(cc<0)
3413 emit_storereg(CCREG,2);
3414 restore_regs(reglist);
3415 emit_jmp(stubs[n][2]); // return address
3416#else
b7918751 3417 emit_andimm(addr,0xfffffffc,temp2);
3418 emit_writeword(temp2,(int)&address);
3419
3420 save_regs(reglist);
97a238a6 3421#ifndef PCSX
b7918751 3422 ds=i_regs!=&regs[i];
3423 real_rs=get_reg(i_regmap,rs1[i]);
3424 u_int cmask=ds?-1:(0x100f|~i_regs->wasconst);
3425 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&0x100f,i);
3426 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty&cmask&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs)));
3427 if(!ds) wb_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&~0x100f,i);
97a238a6 3428#endif
b7918751 3429 emit_shrimm(addr,16,1);
3430 int cc=get_reg(i_regmap,CCREG);
3431 if(cc<0) {
3432 emit_loadreg(CCREG,2);
3433 }
3434 emit_movimm((u_int)readmem,0);
3435 emit_addimm(cc<0?2:cc,2*stubs[n][6]+2,2);
f51dc36c 3436#ifndef PCSX
3437 // pagefault address
3438 emit_movimm(start+stubs[n][3]*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
3439#endif
b7918751 3440 emit_call((int)&indirect_jump_indexed);
3441 restore_regs(reglist);
3442
3443 emit_readword((int)&readmem_dword,temp2);
3444 int temp=addr; //hmh
3445 emit_shlimm(addr,3,temp);
3446 emit_andimm(temp,24,temp);
3447#ifdef BIG_ENDIAN_MIPS
3448 if (opcode[i]==0x2e) // SWR
3449#else
3450 if (opcode[i]==0x2a) // SWL
3451#endif
3452 emit_xorimm(temp,24,temp);
3453 emit_movimm(-1,HOST_TEMPREG);
55439448 3454 if (opcode[i]==0x2a) { // SWL
b7918751 3455 emit_bic_lsr(temp2,HOST_TEMPREG,temp,temp2);
3456 emit_orrshr(rt,temp,temp2);
3457 }else{
3458 emit_bic_lsl(temp2,HOST_TEMPREG,temp,temp2);
3459 emit_orrshl(rt,temp,temp2);
3460 }
3461 emit_readword((int)&address,addr);
3462 emit_writeword(temp2,(int)&word);
3463 //save_regs(reglist); // don't need to, no state changes
3464 emit_shrimm(addr,16,1);
3465 emit_movimm((u_int)writemem,0);
3466 //emit_call((int)&indirect_jump_indexed);
3467 emit_mov(15,14);
3468 emit_readword_dualindexedx4(0,1,15);
3469 emit_readword((int)&Count,HOST_TEMPREG);
3470 emit_readword((int)&next_interupt,2);
3471 emit_addimm(HOST_TEMPREG,-2*stubs[n][6]-2,HOST_TEMPREG);
3472 emit_writeword(2,(int)&last_count);
3473 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
3474 if(cc<0) {
3475 emit_storereg(CCREG,HOST_TEMPREG);
3476 }
3477 restore_regs(reglist);
57871462 3478 emit_jmp(stubs[n][2]); // return address
b96d3df7 3479#endif
57871462 3480}
3481
3482void printregs(int edi,int esi,int ebp,int esp,int b,int d,int c,int a)
3483{
3484 printf("regs: %x %x %x %x %x %x %x (%x)\n",a,b,c,d,ebp,esi,edi,(&edi)[-1]);
3485}
3486
3487do_invstub(int n)
3488{
3489 literal_pool(20);
3490 u_int reglist=stubs[n][3];
3491 set_jump_target(stubs[n][1],(int)out);
3492 save_regs(reglist);
3493 if(stubs[n][4]!=0) emit_mov(stubs[n][4],0);
3494 emit_call((int)&invalidate_addr);
3495 restore_regs(reglist);
3496 emit_jmp(stubs[n][2]); // return address
3497}
3498
3499int do_dirty_stub(int i)
3500{
3501 assem_debug("do_dirty_stub %x\n",start+i*4);
ac545b3a 3502 u_int addr=(int)start<(int)0xC0000000?(u_int)source:(u_int)start;
3503 #ifdef PCSX
3504 addr=(u_int)source;
3505 #endif
57871462 3506 // Careful about the code output here, verify_dirty needs to parse it.
3507 #ifdef ARMv5_ONLY
ac545b3a 3508 emit_loadlp(addr,1);
57871462 3509 emit_loadlp((int)copy,2);
3510 emit_loadlp(slen*4,3);
3511 #else
ac545b3a 3512 emit_movw(addr&0x0000FFFF,1);
57871462 3513 emit_movw(((u_int)copy)&0x0000FFFF,2);
ac545b3a 3514 emit_movt(addr&0xFFFF0000,1);
57871462 3515 emit_movt(((u_int)copy)&0xFFFF0000,2);
3516 emit_movw(slen*4,3);
3517 #endif
3518 emit_movimm(start+i*4,0);
3519 emit_call((int)start<(int)0xC0000000?(int)&verify_code:(int)&verify_code_vm);
3520 int entry=(int)out;
3521 load_regs_entry(i);
3522 if(entry==(int)out) entry=instr_addr[i];
3523 emit_jmp(instr_addr[i]);
3524 return entry;
3525}
3526
3527void do_dirty_stub_ds()
3528{
3529 // Careful about the code output here, verify_dirty needs to parse it.
3530 #ifdef ARMv5_ONLY
3531 emit_loadlp((int)start<(int)0xC0000000?(int)source:(int)start,1);
3532 emit_loadlp((int)copy,2);
3533 emit_loadlp(slen*4,3);
3534 #else
3535 emit_movw(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0x0000FFFF,1);
3536 emit_movw(((u_int)copy)&0x0000FFFF,2);
3537 emit_movt(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0xFFFF0000,1);
3538 emit_movt(((u_int)copy)&0xFFFF0000,2);
3539 emit_movw(slen*4,3);
3540 #endif
3541 emit_movimm(start+1,0);
3542 emit_call((int)&verify_code_ds);
3543}
3544
3545do_cop1stub(int n)
3546{
3547 literal_pool(256);
3548 assem_debug("do_cop1stub %x\n",start+stubs[n][3]*4);
3549 set_jump_target(stubs[n][1],(int)out);
3550 int i=stubs[n][3];
3d624f89 3551// int rs=stubs[n][4];
57871462 3552 struct regstat *i_regs=(struct regstat *)stubs[n][5];
3553 int ds=stubs[n][6];
3554 if(!ds) {
3555 load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty,i);
3556 //if(i_regs!=&regs[i]) printf("oops: regs[i]=%x i_regs=%x",(int)&regs[i],(int)i_regs);
3557 }
3558 //else {printf("fp exception in delay slot\n");}
3559 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty);
3560 if(regs[i].regmap_entry[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
3561 emit_movimm(start+(i-ds)*4,EAX); // Get PC
2573466a 3562 emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]),HOST_CCREG); // CHECK: is this right? There should probably be an extra cycle...
57871462 3563 emit_jmp(ds?(int)fp_exception_ds:(int)fp_exception);
3564}
3565
63cb0298 3566#ifndef DISABLE_TLB
3567
57871462 3568/* TLB */
3569
3570int do_tlb_r(int s,int ar,int map,int x,int a,int shift,int c,u_int addr)
3571{
3572 if(c) {
3573 if((signed int)addr>=(signed int)0xC0000000) {
3574 // address_generation already loaded the const
3575 emit_readword_dualindexedx4(FP,map,map);
3576 }
3577 else
3578 return -1; // No mapping
3579 }
3580 else {
3581 assert(s!=map);
3582 emit_movimm(((int)memory_map-(int)&dynarec_local)>>2,map);
3583 emit_addsr12(map,s,map);
3584 // Schedule this while we wait on the load
3585 //if(x) emit_xorimm(s,x,ar);
3586 if(shift>=0) emit_shlimm(s,3,shift);
3587 if(~a) emit_andimm(s,a,ar);
3588 emit_readword_dualindexedx4(FP,map,map);
3589 }
3590 return map;
3591}
3592int do_tlb_r_branch(int map, int c, u_int addr, int *jaddr)
3593{
3594 if(!c||(signed int)addr>=(signed int)0xC0000000) {
3595 emit_test(map,map);
3596 *jaddr=(int)out;
3597 emit_js(0);
3598 }
3599 return map;
3600}
3601
3602int gen_tlb_addr_r(int ar, int map) {
3603 if(map>=0) {
3604 assem_debug("add %s,%s,%s lsl #2\n",regname[ar],regname[ar],regname[map]);
3605 output_w32(0xe0800100|rd_rn_rm(ar,ar,map));
3606 }
3607}
3608
3609int do_tlb_w(int s,int ar,int map,int x,int c,u_int addr)
3610{
3611 if(c) {
3612 if(addr<0x80800000||addr>=0xC0000000) {
3613 // address_generation already loaded the const
3614 emit_readword_dualindexedx4(FP,map,map);
3615 }
3616 else
3617 return -1; // No mapping
3618 }
3619 else {
3620 assert(s!=map);
3621 emit_movimm(((int)memory_map-(int)&dynarec_local)>>2,map);
3622 emit_addsr12(map,s,map);
3623 // Schedule this while we wait on the load
3624 //if(x) emit_xorimm(s,x,ar);
3625 emit_readword_dualindexedx4(FP,map,map);
3626 }
3627 return map;
3628}
3629int do_tlb_w_branch(int map, int c, u_int addr, int *jaddr)
3630{
3631 if(!c||addr<0x80800000||addr>=0xC0000000) {
3632 emit_testimm(map,0x40000000);
3633 *jaddr=(int)out;
3634 emit_jne(0);
3635 }
3636}
3637
3638int gen_tlb_addr_w(int ar, int map) {
3639 if(map>=0) {
3640 assem_debug("add %s,%s,%s lsl #2\n",regname[ar],regname[ar],regname[map]);
3641 output_w32(0xe0800100|rd_rn_rm(ar,ar,map));
3642 }
3643}
3644
3645// Generate the address of the memory_map entry, relative to dynarec_local
3646generate_map_const(u_int addr,int reg) {
3647 //printf("generate_map_const(%x,%s)\n",addr,regname[reg]);
3648 emit_movimm((addr>>12)+(((u_int)memory_map-(u_int)&dynarec_local)>>2),reg);
3649}
3650
63cb0298 3651#else
3652
3653static int do_tlb_r() { return 0; }
3654static int do_tlb_r_branch() { return 0; }
3655static int gen_tlb_addr_r() { return 0; }
3656static int do_tlb_w() { return 0; }
3657static int do_tlb_w_branch() { return 0; }
3658static int gen_tlb_addr_w() { return 0; }
3659
3660#endif // DISABLE_TLB
3661
57871462 3662/* Special assem */
3663
3664void shift_assemble_arm(int i,struct regstat *i_regs)
3665{
3666 if(rt1[i]) {
3667 if(opcode2[i]<=0x07) // SLLV/SRLV/SRAV
3668 {
3669 signed char s,t,shift;
3670 t=get_reg(i_regs->regmap,rt1[i]);
3671 s=get_reg(i_regs->regmap,rs1[i]);
3672 shift=get_reg(i_regs->regmap,rs2[i]);
3673 if(t>=0){
3674 if(rs1[i]==0)
3675 {
3676 emit_zeroreg(t);
3677 }
3678 else if(rs2[i]==0)
3679 {
3680 assert(s>=0);
3681 if(s!=t) emit_mov(s,t);
3682 }
3683 else
3684 {
3685 emit_andimm(shift,31,HOST_TEMPREG);
3686 if(opcode2[i]==4) // SLLV
3687 {
3688 emit_shl(s,HOST_TEMPREG,t);
3689 }
3690 if(opcode2[i]==6) // SRLV
3691 {
3692 emit_shr(s,HOST_TEMPREG,t);
3693 }
3694 if(opcode2[i]==7) // SRAV
3695 {
3696 emit_sar(s,HOST_TEMPREG,t);
3697 }
3698 }
3699 }
3700 } else { // DSLLV/DSRLV/DSRAV
3701 signed char sh,sl,th,tl,shift;
3702 th=get_reg(i_regs->regmap,rt1[i]|64);
3703 tl=get_reg(i_regs->regmap,rt1[i]);
3704 sh=get_reg(i_regs->regmap,rs1[i]|64);
3705 sl=get_reg(i_regs->regmap,rs1[i]);
3706 shift=get_reg(i_regs->regmap,rs2[i]);
3707 if(tl>=0){
3708 if(rs1[i]==0)
3709 {
3710 emit_zeroreg(tl);
3711 if(th>=0) emit_zeroreg(th);
3712 }
3713 else if(rs2[i]==0)
3714 {
3715 assert(sl>=0);
3716 if(sl!=tl) emit_mov(sl,tl);
3717 if(th>=0&&sh!=th) emit_mov(sh,th);
3718 }
3719 else
3720 {
3721 // FIXME: What if shift==tl ?
3722 assert(shift!=tl);
3723 int temp=get_reg(i_regs->regmap,-1);
3724 int real_th=th;
3725 if(th<0&&opcode2[i]!=0x14) {th=temp;} // DSLLV doesn't need a temporary register
3726 assert(sl>=0);
3727 assert(sh>=0);
3728 emit_andimm(shift,31,HOST_TEMPREG);
3729 if(opcode2[i]==0x14) // DSLLV
3730 {
3731 if(th>=0) emit_shl(sh,HOST_TEMPREG,th);
3732 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3733 emit_orrshr(sl,HOST_TEMPREG,th);
3734 emit_andimm(shift,31,HOST_TEMPREG);
3735 emit_testimm(shift,32);
3736 emit_shl(sl,HOST_TEMPREG,tl);
3737 if(th>=0) emit_cmovne_reg(tl,th);
3738 emit_cmovne_imm(0,tl);
3739 }
3740 if(opcode2[i]==0x16) // DSRLV
3741 {
3742 assert(th>=0);
3743 emit_shr(sl,HOST_TEMPREG,tl);
3744 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3745 emit_orrshl(sh,HOST_TEMPREG,tl);
3746 emit_andimm(shift,31,HOST_TEMPREG);
3747 emit_testimm(shift,32);
3748 emit_shr(sh,HOST_TEMPREG,th);
3749 emit_cmovne_reg(th,tl);
3750 if(real_th>=0) emit_cmovne_imm(0,th);
3751 }
3752 if(opcode2[i]==0x17) // DSRAV
3753 {
3754 assert(th>=0);
3755 emit_shr(sl,HOST_TEMPREG,tl);
3756 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3757 if(real_th>=0) {
3758 assert(temp>=0);
3759 emit_sarimm(th,31,temp);
3760 }
3761 emit_orrshl(sh,HOST_TEMPREG,tl);
3762 emit_andimm(shift,31,HOST_TEMPREG);
3763 emit_testimm(shift,32);
3764 emit_sar(sh,HOST_TEMPREG,th);
3765 emit_cmovne_reg(th,tl);
3766 if(real_th>=0) emit_cmovne_reg(temp,th);
3767 }
3768 }
3769 }
3770 }
3771 }
3772}
ffb0b9e0 3773
3774#ifdef PCSX
3775static void speculate_mov(int rs,int rt)
3776{
3777 if(rt!=0) {
3778 smrv_strong_next|=1<<rt;
3779 smrv[rt]=smrv[rs];
3780 }
3781}
3782
3783static void speculate_mov_weak(int rs,int rt)
3784{
3785 if(rt!=0) {
3786 smrv_weak_next|=1<<rt;
3787 smrv[rt]=smrv[rs];
3788 }
3789}
3790
3791static void speculate_register_values(int i)
3792{
3793 if(i==0) {
3794 memcpy(smrv,psxRegs.GPR.r,sizeof(smrv));
3795 // gp,sp are likely to stay the same throughout the block
3796 smrv_strong_next=(1<<28)|(1<<29)|(1<<30);
3797 smrv_weak_next=~smrv_strong_next;
3798 //printf(" llr %08x\n", smrv[4]);
3799 }
3800 smrv_strong=smrv_strong_next;
3801 smrv_weak=smrv_weak_next;
3802 switch(itype[i]) {
3803 case ALU:
3804 if ((smrv_strong>>rs1[i])&1) speculate_mov(rs1[i],rt1[i]);
3805 else if((smrv_strong>>rs2[i])&1) speculate_mov(rs2[i],rt1[i]);
3806 else if((smrv_weak>>rs1[i])&1) speculate_mov_weak(rs1[i],rt1[i]);
3807 else if((smrv_weak>>rs2[i])&1) speculate_mov_weak(rs2[i],rt1[i]);
3808 else {
3809 smrv_strong_next&=~(1<<rt1[i]);
3810 smrv_weak_next&=~(1<<rt1[i]);
3811 }
3812 break;
3813 case SHIFTIMM:
3814 smrv_strong_next&=~(1<<rt1[i]);
3815 smrv_weak_next&=~(1<<rt1[i]);
3816 // fallthrough
3817 case IMM16:
3818 if(rt1[i]&&is_const(&regs[i],rt1[i])) {
3819 int value,hr=get_reg(regs[i].regmap,rt1[i]);
3820 if(hr>=0) {
3821 if(get_final_value(hr,i,&value))
3822 smrv[rt1[i]]=value;
3823 else smrv[rt1[i]]=constmap[i][hr];
3824 smrv_strong_next|=1<<rt1[i];
3825 }
3826 }
3827 else {
3828 if ((smrv_strong>>rs1[i])&1) speculate_mov(rs1[i],rt1[i]);
3829 else if((smrv_weak>>rs1[i])&1) speculate_mov_weak(rs1[i],rt1[i]);
3830 }
3831 break;
3832 case LOAD:
3833 if(start<0x2000&&(rt1[i]==26||(smrv[rt1[i]]>>24)==0xa0)) {
3834 // special case for BIOS
3835 smrv[rt1[i]]=0xa0000000;
3836 smrv_strong_next|=1<<rt1[i];
3837 break;
3838 }
3839 // fallthrough
3840 case SHIFT:
3841 case LOADLR:
3842 case MOV:
3843 smrv_strong_next&=~(1<<rt1[i]);
3844 smrv_weak_next&=~(1<<rt1[i]);
3845 break;
3846 case COP0:
3847 case COP2:
3848 if(opcode2[i]==0||opcode2[i]==2) { // MFC/CFC
3849 smrv_strong_next&=~(1<<rt1[i]);
3850 smrv_weak_next&=~(1<<rt1[i]);
3851 }
3852 break;
3853 case C2LS:
3854 if (opcode[i]==0x32) { // LWC2
3855 smrv_strong_next&=~(1<<rt1[i]);
3856 smrv_weak_next&=~(1<<rt1[i]);
3857 }
3858 break;
3859 }
3860#if 0
3861 int r=4;
3862 printf("x %08x %08x %d %d c %08x %08x\n",smrv[r],start+i*4,
3863 ((smrv_strong>>r)&1),(smrv_weak>>r)&1,regs[i].isconst,regs[i].wasconst);
3864#endif
3865}
3866
3867enum {
3868 MTYPE_8000 = 0,
3869 MTYPE_8020,
3870 MTYPE_0000,
3871 MTYPE_A000,
3872 MTYPE_1F80,
3873};
3874
3875static int get_ptr_mem_type(u_int a)
3876{
3877 if(a < 0x00200000) {
3878 if(a<0x1000&&((start>>20)==0xbfc||(start>>24)==0xa0))
3879 // return wrong, must use memhandler for BIOS self-test to pass
3880 // 007 does similar stuff from a00 mirror, weird stuff
3881 return MTYPE_8000;
3882 return MTYPE_0000;
3883 }
3884 if(0x1f800000 <= a && a < 0x1f801000)
3885 return MTYPE_1F80;
3886 if(0x80200000 <= a && a < 0x80800000)
3887 return MTYPE_8020;
3888 if(0xa0000000 <= a && a < 0xa0200000)
3889 return MTYPE_A000;
3890 return MTYPE_8000;
3891}
3892#endif
3893
3894static int emit_fastpath_cmp_jump(int i,int addr,int *addr_reg_override)
3895{
3896 int jaddr,type=0;
3897
3898#ifdef PCSX
3899 int mr=rs1[i];
3900 if(((smrv_strong|smrv_weak)>>mr)&1) {
3901 type=get_ptr_mem_type(smrv[mr]);
3902 //printf("set %08x @%08x r%d %d\n", smrv[mr], start+i*4, mr, type);
3903 }
3904 else {
3905 // use the mirror we are running on
3906 type=get_ptr_mem_type(start);
3907 //printf("set nospec @%08x r%d %d\n", start+i*4, mr, type);
3908 }
3909
3910 if(type==MTYPE_8020) { // RAM 80200000+ mirror
3911 emit_andimm(addr,~0x00e00000,HOST_TEMPREG);
3912 addr=*addr_reg_override=HOST_TEMPREG;
3913 type=0;
3914 }
3915 else if(type==MTYPE_0000) { // RAM 0 mirror
3916 emit_orimm(addr,0x80000000,HOST_TEMPREG);
3917 addr=*addr_reg_override=HOST_TEMPREG;
3918 type=0;
3919 }
3920 else if(type==MTYPE_A000) { // RAM A mirror
3921 emit_andimm(addr,~0x20000000,HOST_TEMPREG);
3922 addr=*addr_reg_override=HOST_TEMPREG;
3923 type=0;
3924 }
3925 else if(type==MTYPE_1F80) { // scratchpad
3926 emit_addimm(addr,-0x1f800000,HOST_TEMPREG);
3927 emit_cmpimm(HOST_TEMPREG,0x1000);
3928 jaddr=(int)out;
3929 emit_jc(0);
3930 }
3931#endif
3932
3933 if(type==0)
3934 {
3935 emit_cmpimm(addr,RAM_SIZE);
3936 jaddr=(int)out;
3937 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
3938 // Hint to branch predictor that the branch is unlikely to be taken
3939 if(rs1[i]>=28)
3940 emit_jno_unlikely(0);
3941 else
3942 #endif
3943 emit_jno(0);
3944 }
3945
3946 return jaddr;
3947}
3948
57871462 3949#define shift_assemble shift_assemble_arm
3950
3951void loadlr_assemble_arm(int i,struct regstat *i_regs)
3952{
3953 int s,th,tl,temp,temp2,addr,map=-1;
3954 int offset;
3955 int jaddr=0;
af4ee1fe 3956 int memtarget=0,c=0;
ffb0b9e0 3957 int fastload_reg_override=0;
57871462 3958 u_int hr,reglist=0;
3959 th=get_reg(i_regs->regmap,rt1[i]|64);
3960 tl=get_reg(i_regs->regmap,rt1[i]);
3961 s=get_reg(i_regs->regmap,rs1[i]);
3962 temp=get_reg(i_regs->regmap,-1);
3963 temp2=get_reg(i_regs->regmap,FTEMP);
3964 addr=get_reg(i_regs->regmap,AGEN1+(i&1));
3965 assert(addr<0);
3966 offset=imm[i];
3967 for(hr=0;hr<HOST_REGS;hr++) {
3968 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
3969 }
3970 reglist|=1<<temp;
3971 if(offset||s<0||c) addr=temp2;
3972 else addr=s;
3973 if(s>=0) {
3974 c=(i_regs->wasconst>>s)&1;
af4ee1fe 3975 if(c) {
3976 memtarget=((signed int)(constmap[i][s]+offset))<(signed int)0x80000000+RAM_SIZE;
3977 if(using_tlb&&((signed int)(constmap[i][s]+offset))>=(signed int)0xC0000000) memtarget=1;
3978 }
57871462 3979 }
535d208a 3980 if(!using_tlb) {
3981 if(!c) {
3982 #ifdef RAM_OFFSET
3983 map=get_reg(i_regs->regmap,ROREG);
3984 if(map<0) emit_loadreg(ROREG,map=HOST_TEMPREG);
3985 #endif
3986 emit_shlimm(addr,3,temp);
3987 if (opcode[i]==0x22||opcode[i]==0x26) {
3988 emit_andimm(addr,0xFFFFFFFC,temp2); // LWL/LWR
57871462 3989 }else{
535d208a 3990 emit_andimm(addr,0xFFFFFFF8,temp2); // LDL/LDR
57871462 3991 }
ffb0b9e0 3992 jaddr=emit_fastpath_cmp_jump(i,temp2,&fastload_reg_override);
535d208a 3993 }
3994 else {
3995 if (opcode[i]==0x22||opcode[i]==0x26) {
3996 emit_movimm(((constmap[i][s]+offset)<<3)&24,temp); // LWL/LWR
3997 }else{
3998 emit_movimm(((constmap[i][s]+offset)<<3)&56,temp); // LDL/LDR
57871462 3999 }
57871462 4000 }
535d208a 4001 }else{ // using tlb
4002 int a;
4003 if(c) {
4004 a=-1;
4005 }else if (opcode[i]==0x22||opcode[i]==0x26) {
4006 a=0xFFFFFFFC; // LWL/LWR
4007 }else{
4008 a=0xFFFFFFF8; // LDL/LDR
4009 }
4010 map=get_reg(i_regs->regmap,TLREG);
4011 assert(map>=0);
ea3d2e6e 4012 reglist&=~(1<<map);
535d208a 4013 map=do_tlb_r(addr,temp2,map,0,a,c?-1:temp,c,constmap[i][s]+offset);
4014 if(c) {
4015 if (opcode[i]==0x22||opcode[i]==0x26) {
4016 emit_movimm(((constmap[i][s]+offset)<<3)&24,temp); // LWL/LWR
4017 }else{
4018 emit_movimm(((constmap[i][s]+offset)<<3)&56,temp); // LDL/LDR
57871462 4019 }
535d208a 4020 }
4021 do_tlb_r_branch(map,c,constmap[i][s]+offset,&jaddr);
4022 }
4023 if (opcode[i]==0x22||opcode[i]==0x26) { // LWL/LWR
4024 if(!c||memtarget) {
ffb0b9e0 4025 int a=temp2;
4026 if(fastload_reg_override) a=fastload_reg_override;
535d208a 4027 //emit_readword_indexed((int)rdram-0x80000000,temp2,temp2);
ffb0b9e0 4028 emit_readword_indexed_tlb(0,a,map,temp2);
535d208a 4029 if(jaddr) add_stub(LOADW_STUB,jaddr,(int)out,i,temp2,(int)i_regs,ccadj[i],reglist);
4030 }
4031 else
4032 inline_readstub(LOADW_STUB,i,(constmap[i][s]+offset)&0xFFFFFFFC,i_regs->regmap,FTEMP,ccadj[i],reglist);
4033 if(rt1[i]) {
4034 assert(tl>=0);
57871462 4035 emit_andimm(temp,24,temp);
2002a1db 4036#ifdef BIG_ENDIAN_MIPS
4037 if (opcode[i]==0x26) // LWR
4038#else
4039 if (opcode[i]==0x22) // LWL
4040#endif
4041 emit_xorimm(temp,24,temp);
57871462 4042 emit_movimm(-1,HOST_TEMPREG);
4043 if (opcode[i]==0x26) {
4044 emit_shr(temp2,temp,temp2);
4045 emit_bic_lsr(tl,HOST_TEMPREG,temp,tl);
4046 }else{
4047 emit_shl(temp2,temp,temp2);
4048 emit_bic_lsl(tl,HOST_TEMPREG,temp,tl);
4049 }
4050 emit_or(temp2,tl,tl);
57871462 4051 }
535d208a 4052 //emit_storereg(rt1[i],tl); // DEBUG
4053 }
4054 if (opcode[i]==0x1A||opcode[i]==0x1B) { // LDL/LDR
ffb0b9e0 4055 // FIXME: little endian, fastload_reg_override
535d208a 4056 int temp2h=get_reg(i_regs->regmap,FTEMP|64);
4057 if(!c||memtarget) {
4058 //if(th>=0) emit_readword_indexed((int)rdram-0x80000000,temp2,temp2h);
4059 //emit_readword_indexed((int)rdram-0x7FFFFFFC,temp2,temp2);
4060 emit_readdword_indexed_tlb(0,temp2,map,temp2h,temp2);
4061 if(jaddr) add_stub(LOADD_STUB,jaddr,(int)out,i,temp2,(int)i_regs,ccadj[i],reglist);
4062 }
4063 else
4064 inline_readstub(LOADD_STUB,i,(constmap[i][s]+offset)&0xFFFFFFF8,i_regs->regmap,FTEMP,ccadj[i],reglist);
4065 if(rt1[i]) {
4066 assert(th>=0);
4067 assert(tl>=0);
57871462 4068 emit_testimm(temp,32);
4069 emit_andimm(temp,24,temp);
4070 if (opcode[i]==0x1A) { // LDL
4071 emit_rsbimm(temp,32,HOST_TEMPREG);
4072 emit_shl(temp2h,temp,temp2h);
4073 emit_orrshr(temp2,HOST_TEMPREG,temp2h);
4074 emit_movimm(-1,HOST_TEMPREG);
4075 emit_shl(temp2,temp,temp2);
4076 emit_cmove_reg(temp2h,th);
4077 emit_biceq_lsl(tl,HOST_TEMPREG,temp,tl);
4078 emit_bicne_lsl(th,HOST_TEMPREG,temp,th);
4079 emit_orreq(temp2,tl,tl);
4080 emit_orrne(temp2,th,th);
4081 }
4082 if (opcode[i]==0x1B) { // LDR
4083 emit_xorimm(temp,24,temp);
4084 emit_rsbimm(temp,32,HOST_TEMPREG);
4085 emit_shr(temp2,temp,temp2);
4086 emit_orrshl(temp2h,HOST_TEMPREG,temp2);
4087 emit_movimm(-1,HOST_TEMPREG);
4088 emit_shr(temp2h,temp,temp2h);
4089 emit_cmovne_reg(temp2,tl);
4090 emit_bicne_lsr(th,HOST_TEMPREG,temp,th);
4091 emit_biceq_lsr(tl,HOST_TEMPREG,temp,tl);
4092 emit_orrne(temp2h,th,th);
4093 emit_orreq(temp2h,tl,tl);
4094 }
4095 }
4096 }
4097}
4098#define loadlr_assemble loadlr_assemble_arm
4099
4100void cop0_assemble(int i,struct regstat *i_regs)
4101{
4102 if(opcode2[i]==0) // MFC0
4103 {
4104 signed char t=get_reg(i_regs->regmap,rt1[i]);
4105 char copr=(source[i]>>11)&0x1f;
4106 //assert(t>=0); // Why does this happen? OOT is weird
f1b3b369 4107 if(t>=0&&rt1[i]!=0) {
7139f3c8 4108#ifdef MUPEN64
57871462 4109 emit_addimm(FP,(int)&fake_pc-(int)&dynarec_local,0);
4110 emit_movimm((source[i]>>11)&0x1f,1);
4111 emit_writeword(0,(int)&PC);
4112 emit_writebyte(1,(int)&(fake_pc.f.r.nrd));
4113 if(copr==9) {
4114 emit_readword((int)&last_count,ECX);
4115 emit_loadreg(CCREG,HOST_CCREG); // TODO: do proper reg alloc
4116 emit_add(HOST_CCREG,ECX,HOST_CCREG);
2573466a 4117 emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]),HOST_CCREG);
57871462 4118 emit_writeword(HOST_CCREG,(int)&Count);
4119 }
4120 emit_call((int)MFC0);
4121 emit_readword((int)&readmem_dword,t);
7139f3c8 4122#else
4123 emit_readword((int)&reg_cop0+copr*4,t);
4124#endif
57871462 4125 }
4126 }
4127 else if(opcode2[i]==4) // MTC0
4128 {
4129 signed char s=get_reg(i_regs->regmap,rs1[i]);
4130 char copr=(source[i]>>11)&0x1f;
4131 assert(s>=0);
63cb0298 4132#ifdef MUPEN64
57871462 4133 emit_writeword(s,(int)&readmem_dword);
4134 wb_register(rs1[i],i_regs->regmap,i_regs->dirty,i_regs->is32);
4135 emit_addimm(FP,(int)&fake_pc-(int)&dynarec_local,0);
4136 emit_movimm((source[i]>>11)&0x1f,1);
4137 emit_writeword(0,(int)&PC);
4138 emit_writebyte(1,(int)&(fake_pc.f.r.nrd));
63cb0298 4139#else
4140 wb_register(rs1[i],i_regs->regmap,i_regs->dirty,i_regs->is32);
7139f3c8 4141#endif
4142 if(copr==9||copr==11||copr==12||copr==13) {
63cb0298 4143 emit_readword((int)&last_count,HOST_TEMPREG);
57871462 4144 emit_loadreg(CCREG,HOST_CCREG); // TODO: do proper reg alloc
63cb0298 4145 emit_add(HOST_CCREG,HOST_TEMPREG,HOST_CCREG);
2573466a 4146 emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]),HOST_CCREG);
57871462 4147 emit_writeword(HOST_CCREG,(int)&Count);
4148 }
4149 // What a mess. The status register (12) can enable interrupts,
4150 // so needs a special case to handle a pending interrupt.
4151 // The interrupt must be taken immediately, because a subsequent
4152 // instruction might disable interrupts again.
7139f3c8 4153 if(copr==12||copr==13) {
fca1aef2 4154#ifdef PCSX
4155 if (is_delayslot) {
4156 // burn cycles to cause cc_interrupt, which will
4157 // reschedule next_interupt. Relies on CCREG from above.
4158 assem_debug("MTC0 DS %d\n", copr);
4159 emit_writeword(HOST_CCREG,(int)&last_count);
4160 emit_movimm(0,HOST_CCREG);
4161 emit_storereg(CCREG,HOST_CCREG);
63cb0298 4162 if(s!=1)
4163 emit_mov(s,1);
fca1aef2 4164 emit_movimm(copr,0);
4165 emit_call((int)pcsx_mtc0_ds);
4166 return;
4167 }
4168#endif
63cb0298 4169 emit_movimm(start+i*4+4,HOST_TEMPREG);
4170 emit_writeword(HOST_TEMPREG,(int)&pcaddr);
4171 emit_movimm(0,HOST_TEMPREG);
4172 emit_writeword(HOST_TEMPREG,(int)&pending_exception);
57871462 4173 }
4174 //else if(copr==12&&is_delayslot) emit_call((int)MTC0_R12);
4175 //else
fca1aef2 4176#ifdef PCSX
63cb0298 4177 if(s!=1)
4178 emit_mov(s,1);
fca1aef2 4179 emit_movimm(copr,0);
4180 emit_call((int)pcsx_mtc0);
4181#else
57871462 4182 emit_call((int)MTC0);
fca1aef2 4183#endif
7139f3c8 4184 if(copr==9||copr==11||copr==12||copr==13) {
57871462 4185 emit_readword((int)&Count,HOST_CCREG);
4186 emit_readword((int)&next_interupt,ECX);
2573466a 4187 emit_addimm(HOST_CCREG,-CLOCK_ADJUST(ccadj[i]),HOST_CCREG);
57871462 4188 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
4189 emit_writeword(ECX,(int)&last_count);
4190 emit_storereg(CCREG,HOST_CCREG);
4191 }
7139f3c8 4192 if(copr==12||copr==13) {
57871462 4193 assert(!is_delayslot);
4194 emit_readword((int)&pending_exception,14);
4195 }
4196 emit_loadreg(rs1[i],s);
4197 if(get_reg(i_regs->regmap,rs1[i]|64)>=0)
4198 emit_loadreg(rs1[i]|64,get_reg(i_regs->regmap,rs1[i]|64));
7139f3c8 4199 if(copr==12||copr==13) {
57871462 4200 emit_test(14,14);
4201 emit_jne((int)&do_interrupt);
4202 }
4203 cop1_usable=0;
4204 }
4205 else
4206 {
4207 assert(opcode2[i]==0x10);
3d624f89 4208#ifndef DISABLE_TLB
57871462 4209 if((source[i]&0x3f)==0x01) // TLBR
4210 emit_call((int)TLBR);
4211 if((source[i]&0x3f)==0x02) // TLBWI
4212 emit_call((int)TLBWI_new);
4213 if((source[i]&0x3f)==0x06) { // TLBWR
4214 // The TLB entry written by TLBWR is dependent on the count,
4215 // so update the cycle count
4216 emit_readword((int)&last_count,ECX);
4217 if(i_regs->regmap[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
4218 emit_add(HOST_CCREG,ECX,HOST_CCREG);
2573466a 4219 emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]),HOST_CCREG);
57871462 4220 emit_writeword(HOST_CCREG,(int)&Count);
4221 emit_call((int)TLBWR_new);
4222 }
4223 if((source[i]&0x3f)==0x08) // TLBP
4224 emit_call((int)TLBP);
3d624f89 4225#endif
576bbd8f 4226#ifdef PCSX
4227 if((source[i]&0x3f)==0x10) // RFE
4228 {
4229 emit_readword((int)&Status,0);
4230 emit_andimm(0,0x3c,1);
4231 emit_andimm(0,~0xf,0);
4232 emit_orrshr_imm(1,2,0);
4233 emit_writeword(0,(int)&Status);
4234 }
4235#else
57871462 4236 if((source[i]&0x3f)==0x18) // ERET
4237 {
4238 int count=ccadj[i];
4239 if(i_regs->regmap[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
2573466a 4240 emit_addimm(HOST_CCREG,CLOCK_ADJUST(count),HOST_CCREG); // TODO: Should there be an extra cycle here?
57871462 4241 emit_jmp((int)jump_eret);
4242 }
576bbd8f 4243#endif
57871462 4244 }
4245}
4246
b9b61529 4247static void cop2_get_dreg(u_int copr,signed char tl,signed char temp)
4248{
4249 switch (copr) {
4250 case 1:
4251 case 3:
4252 case 5:
4253 case 8:
4254 case 9:
4255 case 10:
4256 case 11:
4257 emit_readword((int)&reg_cop2d[copr],tl);
4258 emit_signextend16(tl,tl);
4259 emit_writeword(tl,(int)&reg_cop2d[copr]); // hmh
4260 break;
4261 case 7:
4262 case 16:
4263 case 17:
4264 case 18:
4265 case 19:
4266 emit_readword((int)&reg_cop2d[copr],tl);
4267 emit_andimm(tl,0xffff,tl);
4268 emit_writeword(tl,(int)&reg_cop2d[copr]);
4269 break;
4270 case 15:
4271 emit_readword((int)&reg_cop2d[14],tl); // SXY2
4272 emit_writeword(tl,(int)&reg_cop2d[copr]);
4273 break;
4274 case 28:
b9b61529 4275 case 29:
4276 emit_readword((int)&reg_cop2d[9],temp);
4277 emit_testimm(temp,0x8000); // do we need this?
4278 emit_andimm(temp,0xf80,temp);
4279 emit_andne_imm(temp,0,temp);
f70d384d 4280 emit_shrimm(temp,7,tl);
b9b61529 4281 emit_readword((int)&reg_cop2d[10],temp);
4282 emit_testimm(temp,0x8000);
4283 emit_andimm(temp,0xf80,temp);
4284 emit_andne_imm(temp,0,temp);
f70d384d 4285 emit_orrshr_imm(temp,2,tl);
b9b61529 4286 emit_readword((int)&reg_cop2d[11],temp);
4287 emit_testimm(temp,0x8000);
4288 emit_andimm(temp,0xf80,temp);
4289 emit_andne_imm(temp,0,temp);
f70d384d 4290 emit_orrshl_imm(temp,3,tl);
b9b61529 4291 emit_writeword(tl,(int)&reg_cop2d[copr]);
4292 break;
4293 default:
4294 emit_readword((int)&reg_cop2d[copr],tl);
4295 break;
4296 }
4297}
4298
4299static void cop2_put_dreg(u_int copr,signed char sl,signed char temp)
4300{
4301 switch (copr) {
4302 case 15:
4303 emit_readword((int)&reg_cop2d[13],temp); // SXY1
4304 emit_writeword(sl,(int)&reg_cop2d[copr]);
4305 emit_writeword(temp,(int)&reg_cop2d[12]); // SXY0
4306 emit_readword((int)&reg_cop2d[14],temp); // SXY2
4307 emit_writeword(sl,(int)&reg_cop2d[14]);
4308 emit_writeword(temp,(int)&reg_cop2d[13]); // SXY1
4309 break;
4310 case 28:
4311 emit_andimm(sl,0x001f,temp);
f70d384d 4312 emit_shlimm(temp,7,temp);
b9b61529 4313 emit_writeword(temp,(int)&reg_cop2d[9]);
4314 emit_andimm(sl,0x03e0,temp);
f70d384d 4315 emit_shlimm(temp,2,temp);
b9b61529 4316 emit_writeword(temp,(int)&reg_cop2d[10]);
4317 emit_andimm(sl,0x7c00,temp);
f70d384d 4318 emit_shrimm(temp,3,temp);
b9b61529 4319 emit_writeword(temp,(int)&reg_cop2d[11]);
4320 emit_writeword(sl,(int)&reg_cop2d[28]);
4321 break;
4322 case 30:
4323 emit_movs(sl,temp);
4324 emit_mvnmi(temp,temp);
4325 emit_clz(temp,temp);
4326 emit_writeword(sl,(int)&reg_cop2d[30]);
4327 emit_writeword(temp,(int)&reg_cop2d[31]);
4328 break;
b9b61529 4329 case 31:
4330 break;
4331 default:
4332 emit_writeword(sl,(int)&reg_cop2d[copr]);
4333 break;
4334 }
4335}
4336
4337void cop2_assemble(int i,struct regstat *i_regs)
4338{
4339 u_int copr=(source[i]>>11)&0x1f;
4340 signed char temp=get_reg(i_regs->regmap,-1);
4341 if (opcode2[i]==0) { // MFC2
4342 signed char tl=get_reg(i_regs->regmap,rt1[i]);
f1b3b369 4343 if(tl>=0&&rt1[i]!=0)
b9b61529 4344 cop2_get_dreg(copr,tl,temp);
4345 }
4346 else if (opcode2[i]==4) { // MTC2
4347 signed char sl=get_reg(i_regs->regmap,rs1[i]);
4348 cop2_put_dreg(copr,sl,temp);
4349 }
4350 else if (opcode2[i]==2) // CFC2
4351 {
4352 signed char tl=get_reg(i_regs->regmap,rt1[i]);
f1b3b369 4353 if(tl>=0&&rt1[i]!=0)
b9b61529 4354 emit_readword((int)&reg_cop2c[copr],tl);
4355 }
4356 else if (opcode2[i]==6) // CTC2
4357 {
4358 signed char sl=get_reg(i_regs->regmap,rs1[i]);
4359 switch(copr) {
4360 case 4:
4361 case 12:
4362 case 20:
4363 case 26:
4364 case 27:
4365 case 29:
4366 case 30:
4367 emit_signextend16(sl,temp);
4368 break;
4369 case 31:
4370 //value = value & 0x7ffff000;
4371 //if (value & 0x7f87e000) value |= 0x80000000;
4372 emit_shrimm(sl,12,temp);
4373 emit_shlimm(temp,12,temp);
4374 emit_testimm(temp,0x7f000000);
4375 emit_testeqimm(temp,0x00870000);
4376 emit_testeqimm(temp,0x0000e000);
4377 emit_orrne_imm(temp,0x80000000,temp);
4378 break;
4379 default:
4380 temp=sl;
4381 break;
4382 }
4383 emit_writeword(temp,(int)&reg_cop2c[copr]);
4384 assert(sl>=0);
4385 }
4386}
4387
4388void c2op_assemble(int i,struct regstat *i_regs)
4389{
4390 signed char temp=get_reg(i_regs->regmap,-1);
4391 u_int c2op=source[i]&0x3f;
4392 u_int hr,reglist=0;
bedfea38 4393 int need_flags;
b9b61529 4394 for(hr=0;hr<HOST_REGS;hr++) {
4395 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
4396 }
4397 if(i==0||itype[i-1]!=C2OP)
4398 save_regs(reglist);
4399
4400 if (gte_handlers[c2op]!=NULL) {
4401 int cc=get_reg(i_regs->regmap,CCREG);
009faf24 4402 emit_movimm(source[i],1); // opcode
b9b61529 4403 if (cc>=0&&gte_cycletab[c2op])
009faf24 4404 emit_addimm(cc,gte_cycletab[c2op]/2,cc); // XXX: could just adjust ccadj?
4405 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0); // cop2 regs
4406 emit_writeword(1,(int)&psxRegs.code);
bedfea38 4407 need_flags=!(gte_unneeded[i+1]>>63); // +1 because of how liveness detection works
4408 assem_debug("gte unneeded %016llx, need_flags %d\n",gte_unneeded[i+1],need_flags);
4409#ifdef ARMv5_ONLY
4410 // let's take more risk here
4411 need_flags=need_flags&&gte_reads_flags;
4412#endif
4413 emit_call((int)(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]));
b9b61529 4414 }
4415
4416 if(i>=slen-1||itype[i+1]!=C2OP)
4417 restore_regs(reglist);
4418}
4419
4420void cop1_unusable(int i,struct regstat *i_regs)
3d624f89 4421{
4422 // XXX: should just just do the exception instead
4423 if(!cop1_usable) {
4424 int jaddr=(int)out;
4425 emit_jmp(0);
4426 add_stub(FP_STUB,jaddr,(int)out,i,0,(int)i_regs,is_delayslot,0);
4427 cop1_usable=1;
4428 }
4429}
4430
57871462 4431void cop1_assemble(int i,struct regstat *i_regs)
4432{
3d624f89 4433#ifndef DISABLE_COP1
57871462 4434 // Check cop1 unusable
4435 if(!cop1_usable) {
4436 signed char rs=get_reg(i_regs->regmap,CSREG);
4437 assert(rs>=0);
4438 emit_testimm(rs,0x20000000);
4439 int jaddr=(int)out;
4440 emit_jeq(0);
4441 add_stub(FP_STUB,jaddr,(int)out,i,rs,(int)i_regs,is_delayslot,0);
4442 cop1_usable=1;
4443 }
4444 if (opcode2[i]==0) { // MFC1
4445 signed char tl=get_reg(i_regs->regmap,rt1[i]);
4446 if(tl>=0) {
4447 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],tl);
4448 emit_readword_indexed(0,tl,tl);
4449 }
4450 }
4451 else if (opcode2[i]==1) { // DMFC1
4452 signed char tl=get_reg(i_regs->regmap,rt1[i]);
4453 signed char th=get_reg(i_regs->regmap,rt1[i]|64);
4454 if(tl>=0) {
4455 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],tl);
4456 if(th>=0) emit_readword_indexed(4,tl,th);
4457 emit_readword_indexed(0,tl,tl);
4458 }
4459 }
4460 else if (opcode2[i]==4) { // MTC1
4461 signed char sl=get_reg(i_regs->regmap,rs1[i]);
4462 signed char temp=get_reg(i_regs->regmap,-1);
4463 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4464 emit_writeword_indexed(sl,0,temp);
4465 }
4466 else if (opcode2[i]==5) { // DMTC1
4467 signed char sl=get_reg(i_regs->regmap,rs1[i]);
4468 signed char sh=rs1[i]>0?get_reg(i_regs->regmap,rs1[i]|64):sl;
4469 signed char temp=get_reg(i_regs->regmap,-1);
4470 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
4471 emit_writeword_indexed(sh,4,temp);
4472 emit_writeword_indexed(sl,0,temp);
4473 }
4474 else if (opcode2[i]==2) // CFC1
4475 {
4476 signed char tl=get_reg(i_regs->regmap,rt1[i]);
4477 if(tl>=0) {
4478 u_int copr=(source[i]>>11)&0x1f;
4479 if(copr==0) emit_readword((int)&FCR0,tl);
4480 if(copr==31) emit_readword((int)&FCR31,tl);
4481 }
4482 }
4483 else if (opcode2[i]==6) // CTC1
4484 {
4485 signed char sl=get_reg(i_regs->regmap,rs1[i]);
4486 u_int copr=(source[i]>>11)&0x1f;
4487 assert(sl>=0);
4488 if(copr==31)
4489 {
4490 emit_writeword(sl,(int)&FCR31);
4491 // Set the rounding mode
4492 //FIXME
4493 //char temp=get_reg(i_regs->regmap,-1);
4494 //emit_andimm(sl,3,temp);
4495 //emit_fldcw_indexed((int)&rounding_modes,temp);
4496 }
4497 }
3d624f89 4498#else
4499 cop1_unusable(i, i_regs);
4500#endif
57871462 4501}
4502
4503void fconv_assemble_arm(int i,struct regstat *i_regs)
4504{
3d624f89 4505#ifndef DISABLE_COP1
57871462 4506 signed char temp=get_reg(i_regs->regmap,-1);
4507 assert(temp>=0);
4508 // Check cop1 unusable
4509 if(!cop1_usable) {
4510 signed char rs=get_reg(i_regs->regmap,CSREG);
4511 assert(rs>=0);
4512 emit_testimm(rs,0x20000000);
4513 int jaddr=(int)out;
4514 emit_jeq(0);
4515 add_stub(FP_STUB,jaddr,(int)out,i,rs,(int)i_regs,is_delayslot,0);
4516 cop1_usable=1;
4517 }
4518
4519 #if(defined(__VFP_FP__) && !defined(__SOFTFP__))
4520 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0d) { // trunc_w_s
4521 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4522 emit_flds(temp,15);
4523 emit_ftosizs(15,15); // float->int, truncate
4524 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f))
4525 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
4526 emit_fsts(15,temp);
4527 return;
4528 }
4529 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0d) { // trunc_w_d
4530 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
4531 emit_vldr(temp,7);
4532 emit_ftosizd(7,13); // double->int, truncate
4533 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
4534 emit_fsts(13,temp);
4535 return;
4536 }
4537
4538 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x20) { // cvt_s_w
4539 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4540 emit_flds(temp,13);
4541 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f))
4542 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
4543 emit_fsitos(13,15);
4544 emit_fsts(15,temp);
4545 return;
4546 }
4547 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x21) { // cvt_d_w
4548 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4549 emit_flds(temp,13);
4550 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
4551 emit_fsitod(13,7);
4552 emit_vstr(7,temp);
4553 return;
4554 }
4555
4556 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x21) { // cvt_d_s
4557 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4558 emit_flds(temp,13);
4559 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
4560 emit_fcvtds(13,7);
4561 emit_vstr(7,temp);
4562 return;
4563 }
4564 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x20) { // cvt_s_d
4565 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
4566 emit_vldr(temp,7);
4567 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
4568 emit_fcvtsd(7,13);
4569 emit_fsts(13,temp);
4570 return;
4571 }
4572 #endif
4573
4574 // C emulation code
4575
4576 u_int hr,reglist=0;
4577 for(hr=0;hr<HOST_REGS;hr++) {
4578 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
4579 }
4580 save_regs(reglist);
4581
4582 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x20) {
4583 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4584 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4585 emit_call((int)cvt_s_w);
4586 }
4587 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x21) {
4588 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4589 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4590 emit_call((int)cvt_d_w);
4591 }
4592 if(opcode2[i]==0x15&&(source[i]&0x3f)==0x20) {
4593 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4594 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4595 emit_call((int)cvt_s_l);
4596 }
4597 if(opcode2[i]==0x15&&(source[i]&0x3f)==0x21) {
4598 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4599 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4600 emit_call((int)cvt_d_l);
4601 }
4602
4603 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x21) {
4604 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4605 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4606 emit_call((int)cvt_d_s);
4607 }
4608 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x24) {
4609 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4610 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4611 emit_call((int)cvt_w_s);
4612 }
4613 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x25) {
4614 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4615 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4616 emit_call((int)cvt_l_s);
4617 }
4618
4619 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x20) {
4620 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4621 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4622 emit_call((int)cvt_s_d);
4623 }
4624 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x24) {
4625 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4626 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4627 emit_call((int)cvt_w_d);
4628 }
4629 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x25) {
4630 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4631 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4632 emit_call((int)cvt_l_d);
4633 }
4634
4635 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x08) {
4636 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4637 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4638 emit_call((int)round_l_s);
4639 }
4640 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x09) {
4641 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4642 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4643 emit_call((int)trunc_l_s);
4644 }
4645 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0a) {
4646 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4647 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4648 emit_call((int)ceil_l_s);
4649 }
4650 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0b) {
4651 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4652 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4653 emit_call((int)floor_l_s);
4654 }
4655 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0c) {
4656 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4657 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4658 emit_call((int)round_w_s);
4659 }
4660 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0d) {
4661 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4662 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4663 emit_call((int)trunc_w_s);
4664 }
4665 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0e) {
4666 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4667 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4668 emit_call((int)ceil_w_s);
4669 }
4670 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0f) {
4671 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4672 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4673 emit_call((int)floor_w_s);
4674 }
4675
4676 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x08) {
4677 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4678 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4679 emit_call((int)round_l_d);
4680 }
4681 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x09) {
4682 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4683 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4684 emit_call((int)trunc_l_d);
4685 }
4686 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0a) {
4687 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4688 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4689 emit_call((int)ceil_l_d);
4690 }
4691 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0b) {
4692 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4693 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4694 emit_call((int)floor_l_d);
4695 }
4696 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0c) {
4697 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4698 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4699 emit_call((int)round_w_d);
4700 }
4701 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0d) {
4702 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4703 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4704 emit_call((int)trunc_w_d);
4705 }
4706 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0e) {
4707 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4708 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4709 emit_call((int)ceil_w_d);
4710 }
4711 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0f) {
4712 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4713 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4714 emit_call((int)floor_w_d);
4715 }
4716
4717 restore_regs(reglist);
3d624f89 4718#else
4719 cop1_unusable(i, i_regs);
4720#endif
57871462 4721}
4722#define fconv_assemble fconv_assemble_arm
4723
4724void fcomp_assemble(int i,struct regstat *i_regs)
4725{
3d624f89 4726#ifndef DISABLE_COP1
57871462 4727 signed char fs=get_reg(i_regs->regmap,FSREG);
4728 signed char temp=get_reg(i_regs->regmap,-1);
4729 assert(temp>=0);
4730 // Check cop1 unusable
4731 if(!cop1_usable) {
4732 signed char cs=get_reg(i_regs->regmap,CSREG);
4733 assert(cs>=0);
4734 emit_testimm(cs,0x20000000);
4735 int jaddr=(int)out;
4736 emit_jeq(0);
4737 add_stub(FP_STUB,jaddr,(int)out,i,cs,(int)i_regs,is_delayslot,0);
4738 cop1_usable=1;
4739 }
4740
4741 if((source[i]&0x3f)==0x30) {
4742 emit_andimm(fs,~0x800000,fs);
4743 return;
4744 }
4745
4746 if((source[i]&0x3e)==0x38) {
4747 // sf/ngle - these should throw exceptions for NaNs
4748 emit_andimm(fs,~0x800000,fs);
4749 return;
4750 }
4751
4752 #if(defined(__VFP_FP__) && !defined(__SOFTFP__))
4753 if(opcode2[i]==0x10) {
4754 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4755 emit_readword((int)&reg_cop1_simple[(source[i]>>16)&0x1f],HOST_TEMPREG);
4756 emit_orimm(fs,0x800000,fs);
4757 emit_flds(temp,14);
4758 emit_flds(HOST_TEMPREG,15);
4759 emit_fcmps(14,15);
4760 emit_fmstat();
4761 if((source[i]&0x3f)==0x31) emit_bicvc_imm(fs,0x800000,fs); // c_un_s
4762 if((source[i]&0x3f)==0x32) emit_bicne_imm(fs,0x800000,fs); // c_eq_s
4763 if((source[i]&0x3f)==0x33) {emit_bicne_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ueq_s
4764 if((source[i]&0x3f)==0x34) emit_biccs_imm(fs,0x800000,fs); // c_olt_s
4765 if((source[i]&0x3f)==0x35) {emit_biccs_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ult_s
4766 if((source[i]&0x3f)==0x36) emit_bichi_imm(fs,0x800000,fs); // c_ole_s
4767 if((source[i]&0x3f)==0x37) {emit_bichi_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ule_s
4768 if((source[i]&0x3f)==0x3a) emit_bicne_imm(fs,0x800000,fs); // c_seq_s
4769 if((source[i]&0x3f)==0x3b) emit_bicne_imm(fs,0x800000,fs); // c_ngl_s
4770 if((source[i]&0x3f)==0x3c) emit_biccs_imm(fs,0x800000,fs); // c_lt_s
4771 if((source[i]&0x3f)==0x3d) emit_biccs_imm(fs,0x800000,fs); // c_nge_s
4772 if((source[i]&0x3f)==0x3e) emit_bichi_imm(fs,0x800000,fs); // c_le_s
4773 if((source[i]&0x3f)==0x3f) emit_bichi_imm(fs,0x800000,fs); // c_ngt_s
4774 return;
4775 }
4776 if(opcode2[i]==0x11) {
4777 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
4778 emit_readword((int)&reg_cop1_double[(source[i]>>16)&0x1f],HOST_TEMPREG);
4779 emit_orimm(fs,0x800000,fs);
4780 emit_vldr(temp,6);
4781 emit_vldr(HOST_TEMPREG,7);
4782 emit_fcmpd(6,7);
4783 emit_fmstat();
4784 if((source[i]&0x3f)==0x31) emit_bicvc_imm(fs,0x800000,fs); // c_un_d
4785 if((source[i]&0x3f)==0x32) emit_bicne_imm(fs,0x800000,fs); // c_eq_d
4786 if((source[i]&0x3f)==0x33) {emit_bicne_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ueq_d
4787 if((source[i]&0x3f)==0x34) emit_biccs_imm(fs,0x800000,fs); // c_olt_d
4788 if((source[i]&0x3f)==0x35) {emit_biccs_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ult_d
4789 if((source[i]&0x3f)==0x36) emit_bichi_imm(fs,0x800000,fs); // c_ole_d
4790 if((source[i]&0x3f)==0x37) {emit_bichi_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ule_d
4791 if((source[i]&0x3f)==0x3a) emit_bicne_imm(fs,0x800000,fs); // c_seq_d
4792 if((source[i]&0x3f)==0x3b) emit_bicne_imm(fs,0x800000,fs); // c_ngl_d
4793 if((source[i]&0x3f)==0x3c) emit_biccs_imm(fs,0x800000,fs); // c_lt_d
4794 if((source[i]&0x3f)==0x3d) emit_biccs_imm(fs,0x800000,fs); // c_nge_d
4795 if((source[i]&0x3f)==0x3e) emit_bichi_imm(fs,0x800000,fs); // c_le_d
4796 if((source[i]&0x3f)==0x3f) emit_bichi_imm(fs,0x800000,fs); // c_ngt_d
4797 return;
4798 }
4799 #endif
4800
4801 // C only
4802
4803 u_int hr,reglist=0;
4804 for(hr=0;hr<HOST_REGS;hr++) {
4805 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
4806 }
4807 reglist&=~(1<<fs);
4808 save_regs(reglist);
4809 if(opcode2[i]==0x10) {
4810 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4811 emit_readword((int)&reg_cop1_simple[(source[i]>>16)&0x1f],ARG2_REG);
4812 if((source[i]&0x3f)==0x30) emit_call((int)c_f_s);
4813 if((source[i]&0x3f)==0x31) emit_call((int)c_un_s);
4814 if((source[i]&0x3f)==0x32) emit_call((int)c_eq_s);
4815 if((source[i]&0x3f)==0x33) emit_call((int)c_ueq_s);
4816 if((source[i]&0x3f)==0x34) emit_call((int)c_olt_s);
4817 if((source[i]&0x3f)==0x35) emit_call((int)c_ult_s);
4818 if((source[i]&0x3f)==0x36) emit_call((int)c_ole_s);
4819 if((source[i]&0x3f)==0x37) emit_call((int)c_ule_s);
4820 if((source[i]&0x3f)==0x38) emit_call((int)c_sf_s);
4821 if((source[i]&0x3f)==0x39) emit_call((int)c_ngle_s);
4822 if((source[i]&0x3f)==0x3a) emit_call((int)c_seq_s);
4823 if((source[i]&0x3f)==0x3b) emit_call((int)c_ngl_s);
4824 if((source[i]&0x3f)==0x3c) emit_call((int)c_lt_s);
4825 if((source[i]&0x3f)==0x3d) emit_call((int)c_nge_s);
4826 if((source[i]&0x3f)==0x3e) emit_call((int)c_le_s);
4827 if((source[i]&0x3f)==0x3f) emit_call((int)c_ngt_s);
4828 }
4829 if(opcode2[i]==0x11) {
4830 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4831 emit_readword((int)&reg_cop1_double[(source[i]>>16)&0x1f],ARG2_REG);
4832 if((source[i]&0x3f)==0x30) emit_call((int)c_f_d);
4833 if((source[i]&0x3f)==0x31) emit_call((int)c_un_d);
4834 if((source[i]&0x3f)==0x32) emit_call((int)c_eq_d);
4835 if((source[i]&0x3f)==0x33) emit_call((int)c_ueq_d);
4836 if((source[i]&0x3f)==0x34) emit_call((int)c_olt_d);
4837 if((source[i]&0x3f)==0x35) emit_call((int)c_ult_d);
4838 if((source[i]&0x3f)==0x36) emit_call((int)c_ole_d);
4839 if((source[i]&0x3f)==0x37) emit_call((int)c_ule_d);
4840 if((source[i]&0x3f)==0x38) emit_call((int)c_sf_d);
4841 if((source[i]&0x3f)==0x39) emit_call((int)c_ngle_d);
4842 if((source[i]&0x3f)==0x3a) emit_call((int)c_seq_d);
4843 if((source[i]&0x3f)==0x3b) emit_call((int)c_ngl_d);
4844 if((source[i]&0x3f)==0x3c) emit_call((int)c_lt_d);
4845 if((source[i]&0x3f)==0x3d) emit_call((int)c_nge_d);
4846 if((source[i]&0x3f)==0x3e) emit_call((int)c_le_d);
4847 if((source[i]&0x3f)==0x3f) emit_call((int)c_ngt_d);
4848 }
4849 restore_regs(reglist);
4850 emit_loadreg(FSREG,fs);
3d624f89 4851#else
4852 cop1_unusable(i, i_regs);
4853#endif
57871462 4854}
4855
4856void float_assemble(int i,struct regstat *i_regs)
4857{
3d624f89 4858#ifndef DISABLE_COP1
57871462 4859 signed char temp=get_reg(i_regs->regmap,-1);
4860 assert(temp>=0);
4861 // Check cop1 unusable
4862 if(!cop1_usable) {
4863 signed char cs=get_reg(i_regs->regmap,CSREG);
4864 assert(cs>=0);
4865 emit_testimm(cs,0x20000000);
4866 int jaddr=(int)out;
4867 emit_jeq(0);
4868 add_stub(FP_STUB,jaddr,(int)out,i,cs,(int)i_regs,is_delayslot,0);
4869 cop1_usable=1;
4870 }
4871
4872 #if(defined(__VFP_FP__) && !defined(__SOFTFP__))
4873 if((source[i]&0x3f)==6) // mov
4874 {
4875 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4876 if(opcode2[i]==0x10) {
4877 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4878 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],HOST_TEMPREG);
4879 emit_readword_indexed(0,temp,temp);
4880 emit_writeword_indexed(temp,0,HOST_TEMPREG);
4881 }
4882 if(opcode2[i]==0x11) {
4883 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
4884 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],HOST_TEMPREG);
4885 emit_vldr(temp,7);
4886 emit_vstr(7,HOST_TEMPREG);
4887 }
4888 }
4889 return;
4890 }
4891
4892 if((source[i]&0x3f)>3)
4893 {
4894 if(opcode2[i]==0x10) {
4895 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4896 emit_flds(temp,15);
4897 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4898 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
4899 }
4900 if((source[i]&0x3f)==4) // sqrt
4901 emit_fsqrts(15,15);
4902 if((source[i]&0x3f)==5) // abs
4903 emit_fabss(15,15);
4904 if((source[i]&0x3f)==7) // neg
4905 emit_fnegs(15,15);
4906 emit_fsts(15,temp);
4907 }
4908 if(opcode2[i]==0x11) {
4909 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
4910 emit_vldr(temp,7);
4911 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4912 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
4913 }
4914 if((source[i]&0x3f)==4) // sqrt
4915 emit_fsqrtd(7,7);
4916 if((source[i]&0x3f)==5) // abs
4917 emit_fabsd(7,7);
4918 if((source[i]&0x3f)==7) // neg
4919 emit_fnegd(7,7);
4920 emit_vstr(7,temp);
4921 }
4922 return;
4923 }
4924 if((source[i]&0x3f)<4)
4925 {
4926 if(opcode2[i]==0x10) {
4927 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4928 }
4929 if(opcode2[i]==0x11) {
4930 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
4931 }
4932 if(((source[i]>>11)&0x1f)!=((source[i]>>16)&0x1f)) {
4933 if(opcode2[i]==0x10) {
4934 emit_readword((int)&reg_cop1_simple[(source[i]>>16)&0x1f],HOST_TEMPREG);
4935 emit_flds(temp,15);
4936 emit_flds(HOST_TEMPREG,13);
4937 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4938 if(((source[i]>>16)&0x1f)!=((source[i]>>6)&0x1f)) {
4939 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
4940 }
4941 }
4942 if((source[i]&0x3f)==0) emit_fadds(15,13,15);
4943 if((source[i]&0x3f)==1) emit_fsubs(15,13,15);
4944 if((source[i]&0x3f)==2) emit_fmuls(15,13,15);
4945 if((source[i]&0x3f)==3) emit_fdivs(15,13,15);
4946 if(((source[i]>>16)&0x1f)==((source[i]>>6)&0x1f)) {
4947 emit_fsts(15,HOST_TEMPREG);
4948 }else{
4949 emit_fsts(15,temp);
4950 }
4951 }
4952 else if(opcode2[i]==0x11) {
4953 emit_readword((int)&reg_cop1_double[(source[i]>>16)&0x1f],HOST_TEMPREG);
4954 emit_vldr(temp,7);
4955 emit_vldr(HOST_TEMPREG,6);
4956 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4957 if(((source[i]>>16)&0x1f)!=((source[i]>>6)&0x1f)) {
4958 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
4959 }
4960 }
4961 if((source[i]&0x3f)==0) emit_faddd(7,6,7);
4962 if((source[i]&0x3f)==1) emit_fsubd(7,6,7);
4963 if((source[i]&0x3f)==2) emit_fmuld(7,6,7);
4964 if((source[i]&0x3f)==3) emit_fdivd(7,6,7);
4965 if(((source[i]>>16)&0x1f)==((source[i]>>6)&0x1f)) {
4966 emit_vstr(7,HOST_TEMPREG);
4967 }else{
4968 emit_vstr(7,temp);
4969 }
4970 }
4971 }
4972 else {
4973 if(opcode2[i]==0x10) {
4974 emit_flds(temp,15);
4975 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4976 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
4977 }
4978 if((source[i]&0x3f)==0) emit_fadds(15,15,15);
4979 if((source[i]&0x3f)==1) emit_fsubs(15,15,15);
4980 if((source[i]&0x3f)==2) emit_fmuls(15,15,15);
4981 if((source[i]&0x3f)==3) emit_fdivs(15,15,15);
4982 emit_fsts(15,temp);
4983 }
4984 else if(opcode2[i]==0x11) {
4985 emit_vldr(temp,7);
4986 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4987 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
4988 }
4989 if((source[i]&0x3f)==0) emit_faddd(7,7,7);
4990 if((source[i]&0x3f)==1) emit_fsubd(7,7,7);
4991 if((source[i]&0x3f)==2) emit_fmuld(7,7,7);
4992 if((source[i]&0x3f)==3) emit_fdivd(7,7,7);
4993 emit_vstr(7,temp);
4994 }
4995 }
4996 return;
4997 }
4998 #endif
4999
5000 u_int hr,reglist=0;
5001 for(hr=0;hr<HOST_REGS;hr++) {
5002 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
5003 }
5004 if(opcode2[i]==0x10) { // Single precision
5005 save_regs(reglist);
5006 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
5007 if((source[i]&0x3f)<4) {
5008 emit_readword((int)&reg_cop1_simple[(source[i]>>16)&0x1f],ARG2_REG);
5009 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG3_REG);
5010 }else{
5011 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
5012 }
5013 switch(source[i]&0x3f)
5014 {
5015 case 0x00: emit_call((int)add_s);break;
5016 case 0x01: emit_call((int)sub_s);break;
5017 case 0x02: emit_call((int)mul_s);break;
5018 case 0x03: emit_call((int)div_s);break;
5019 case 0x04: emit_call((int)sqrt_s);break;
5020 case 0x05: emit_call((int)abs_s);break;
5021 case 0x06: emit_call((int)mov_s);break;
5022 case 0x07: emit_call((int)neg_s);break;
5023 }
5024 restore_regs(reglist);
5025 }
5026 if(opcode2[i]==0x11) { // Double precision
5027 save_regs(reglist);
5028 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
5029 if((source[i]&0x3f)<4) {
5030 emit_readword((int)&reg_cop1_double[(source[i]>>16)&0x1f],ARG2_REG);
5031 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG3_REG);
5032 }else{
5033 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
5034 }
5035 switch(source[i]&0x3f)
5036 {
5037 case 0x00: emit_call((int)add_d);break;
5038 case 0x01: emit_call((int)sub_d);break;
5039 case 0x02: emit_call((int)mul_d);break;
5040 case 0x03: emit_call((int)div_d);break;
5041 case 0x04: emit_call((int)sqrt_d);break;
5042 case 0x05: emit_call((int)abs_d);break;
5043 case 0x06: emit_call((int)mov_d);break;
5044 case 0x07: emit_call((int)neg_d);break;
5045 }
5046 restore_regs(reglist);
5047 }
3d624f89 5048#else
5049 cop1_unusable(i, i_regs);
5050#endif
57871462 5051}
5052
5053void multdiv_assemble_arm(int i,struct regstat *i_regs)
5054{
5055 // case 0x18: MULT
5056 // case 0x19: MULTU
5057 // case 0x1A: DIV
5058 // case 0x1B: DIVU
5059 // case 0x1C: DMULT
5060 // case 0x1D: DMULTU
5061 // case 0x1E: DDIV
5062 // case 0x1F: DDIVU
5063 if(rs1[i]&&rs2[i])
5064 {
5065 if((opcode2[i]&4)==0) // 32-bit
5066 {
5067 if(opcode2[i]==0x18) // MULT
5068 {
5069 signed char m1=get_reg(i_regs->regmap,rs1[i]);
5070 signed char m2=get_reg(i_regs->regmap,rs2[i]);
5071 signed char hi=get_reg(i_regs->regmap,HIREG);
5072 signed char lo=get_reg(i_regs->regmap,LOREG);
5073 assert(m1>=0);
5074 assert(m2>=0);
5075 assert(hi>=0);
5076 assert(lo>=0);
5077 emit_smull(m1,m2,hi,lo);
5078 }
5079 if(opcode2[i]==0x19) // MULTU
5080 {
5081 signed char m1=get_reg(i_regs->regmap,rs1[i]);
5082 signed char m2=get_reg(i_regs->regmap,rs2[i]);
5083 signed char hi=get_reg(i_regs->regmap,HIREG);
5084 signed char lo=get_reg(i_regs->regmap,LOREG);
5085 assert(m1>=0);
5086 assert(m2>=0);
5087 assert(hi>=0);
5088 assert(lo>=0);
5089 emit_umull(m1,m2,hi,lo);
5090 }
5091 if(opcode2[i]==0x1A) // DIV
5092 {
5093 signed char d1=get_reg(i_regs->regmap,rs1[i]);
5094 signed char d2=get_reg(i_regs->regmap,rs2[i]);
5095 assert(d1>=0);
5096 assert(d2>=0);
5097 signed char quotient=get_reg(i_regs->regmap,LOREG);
5098 signed char remainder=get_reg(i_regs->regmap,HIREG);
5099 assert(quotient>=0);
5100 assert(remainder>=0);
5101 emit_movs(d1,remainder);
44a80f6a 5102 emit_movimm(0xffffffff,quotient);
5103 emit_negmi(quotient,quotient); // .. quotient and ..
5104 emit_negmi(remainder,remainder); // .. remainder for div0 case (will be negated back after jump)
57871462 5105 emit_movs(d2,HOST_TEMPREG);
5106 emit_jeq((int)out+52); // Division by zero
5107 emit_negmi(HOST_TEMPREG,HOST_TEMPREG);
5108 emit_clz(HOST_TEMPREG,quotient);
5109 emit_shl(HOST_TEMPREG,quotient,HOST_TEMPREG);
5110 emit_orimm(quotient,1<<31,quotient);
5111 emit_shr(quotient,quotient,quotient);
5112 emit_cmp(remainder,HOST_TEMPREG);
5113 emit_subcs(remainder,HOST_TEMPREG,remainder);
5114 emit_adcs(quotient,quotient,quotient);
5115 emit_shrimm(HOST_TEMPREG,1,HOST_TEMPREG);
5116 emit_jcc((int)out-16); // -4
5117 emit_teq(d1,d2);
5118 emit_negmi(quotient,quotient);
5119 emit_test(d1,d1);
5120 emit_negmi(remainder,remainder);
5121 }
5122 if(opcode2[i]==0x1B) // DIVU
5123 {
5124 signed char d1=get_reg(i_regs->regmap,rs1[i]); // dividend
5125 signed char d2=get_reg(i_regs->regmap,rs2[i]); // divisor
5126 assert(d1>=0);
5127 assert(d2>=0);
5128 signed char quotient=get_reg(i_regs->regmap,LOREG);
5129 signed char remainder=get_reg(i_regs->regmap,HIREG);
5130 assert(quotient>=0);
5131 assert(remainder>=0);
44a80f6a 5132 emit_mov(d1,remainder);
5133 emit_movimm(0xffffffff,quotient); // div0 case
57871462 5134 emit_test(d2,d2);
44a80f6a 5135 emit_jeq((int)out+40); // Division by zero
57871462 5136 emit_clz(d2,HOST_TEMPREG);
5137 emit_movimm(1<<31,quotient);
5138 emit_shl(d2,HOST_TEMPREG,d2);
57871462 5139 emit_shr(quotient,HOST_TEMPREG,quotient);
5140 emit_cmp(remainder,d2);
5141 emit_subcs(remainder,d2,remainder);
5142 emit_adcs(quotient,quotient,quotient);
5143 emit_shrcc_imm(d2,1,d2);
5144 emit_jcc((int)out-16); // -4
5145 }
5146 }
5147 else // 64-bit
4600ba03 5148#ifndef FORCE32
57871462 5149 {
5150 if(opcode2[i]==0x1C) // DMULT
5151 {
5152 assert(opcode2[i]!=0x1C);
5153 signed char m1h=get_reg(i_regs->regmap,rs1[i]|64);
5154 signed char m1l=get_reg(i_regs->regmap,rs1[i]);
5155 signed char m2h=get_reg(i_regs->regmap,rs2[i]|64);
5156 signed char m2l=get_reg(i_regs->regmap,rs2[i]);
5157 assert(m1h>=0);
5158 assert(m2h>=0);
5159 assert(m1l>=0);
5160 assert(m2l>=0);
5161 emit_pushreg(m2h);
5162 emit_pushreg(m2l);
5163 emit_pushreg(m1h);
5164 emit_pushreg(m1l);
5165 emit_call((int)&mult64);
5166 emit_popreg(m1l);
5167 emit_popreg(m1h);
5168 emit_popreg(m2l);
5169 emit_popreg(m2h);
5170 signed char hih=get_reg(i_regs->regmap,HIREG|64);
5171 signed char hil=get_reg(i_regs->regmap,HIREG);
5172 if(hih>=0) emit_loadreg(HIREG|64,hih);
5173 if(hil>=0) emit_loadreg(HIREG,hil);
5174 signed char loh=get_reg(i_regs->regmap,LOREG|64);
5175 signed char lol=get_reg(i_regs->regmap,LOREG);
5176 if(loh>=0) emit_loadreg(LOREG|64,loh);
5177 if(lol>=0) emit_loadreg(LOREG,lol);
5178 }
5179 if(opcode2[i]==0x1D) // DMULTU
5180 {
5181 signed char m1h=get_reg(i_regs->regmap,rs1[i]|64);
5182 signed char m1l=get_reg(i_regs->regmap,rs1[i]);
5183 signed char m2h=get_reg(i_regs->regmap,rs2[i]|64);
5184 signed char m2l=get_reg(i_regs->regmap,rs2[i]);
5185 assert(m1h>=0);
5186 assert(m2h>=0);
5187 assert(m1l>=0);
5188 assert(m2l>=0);
5189 save_regs(0x100f);
5190 if(m1l!=0) emit_mov(m1l,0);
5191 if(m1h==0) emit_readword((int)&dynarec_local,1);
5192 else if(m1h>1) emit_mov(m1h,1);
5193 if(m2l<2) emit_readword((int)&dynarec_local+m2l*4,2);
5194 else if(m2l>2) emit_mov(m2l,2);
5195 if(m2h<3) emit_readword((int)&dynarec_local+m2h*4,3);
5196 else if(m2h>3) emit_mov(m2h,3);
5197 emit_call((int)&multu64);
5198 restore_regs(0x100f);
5199 signed char hih=get_reg(i_regs->regmap,HIREG|64);
5200 signed char hil=get_reg(i_regs->regmap,HIREG);
5201 signed char loh=get_reg(i_regs->regmap,LOREG|64);
5202 signed char lol=get_reg(i_regs->regmap,LOREG);
5203 /*signed char temp=get_reg(i_regs->regmap,-1);
5204 signed char rh=get_reg(i_regs->regmap,HIREG|64);
5205 signed char rl=get_reg(i_regs->regmap,HIREG);
5206 assert(m1h>=0);
5207 assert(m2h>=0);
5208 assert(m1l>=0);
5209 assert(m2l>=0);
5210 assert(temp>=0);
5211 //emit_mov(m1l,EAX);
5212 //emit_mul(m2l);
5213 emit_umull(rl,rh,m1l,m2l);
5214 emit_storereg(LOREG,rl);
5215 emit_mov(rh,temp);
5216 //emit_mov(m1h,EAX);
5217 //emit_mul(m2l);
5218 emit_umull(rl,rh,m1h,m2l);
5219 emit_adds(rl,temp,temp);
5220 emit_adcimm(rh,0,rh);
5221 emit_storereg(HIREG,rh);
5222 //emit_mov(m2h,EAX);
5223 //emit_mul(m1l);
5224 emit_umull(rl,rh,m1l,m2h);
5225 emit_adds(rl,temp,temp);
5226 emit_adcimm(rh,0,rh);
5227 emit_storereg(LOREG|64,temp);
5228 emit_mov(rh,temp);
5229 //emit_mov(m2h,EAX);
5230 //emit_mul(m1h);
5231 emit_umull(rl,rh,m1h,m2h);
5232 emit_adds(rl,temp,rl);
5233 emit_loadreg(HIREG,temp);
5234 emit_adcimm(rh,0,rh);
5235 emit_adds(rl,temp,rl);
5236 emit_adcimm(rh,0,rh);
5237 // DEBUG
5238 /*
5239 emit_pushreg(m2h);
5240 emit_pushreg(m2l);
5241 emit_pushreg(m1h);
5242 emit_pushreg(m1l);
5243 emit_call((int)&multu64);
5244 emit_popreg(m1l);
5245 emit_popreg(m1h);
5246 emit_popreg(m2l);
5247 emit_popreg(m2h);
5248 signed char hih=get_reg(i_regs->regmap,HIREG|64);
5249 signed char hil=get_reg(i_regs->regmap,HIREG);
5250 if(hih>=0) emit_loadreg(HIREG|64,hih); // DEBUG
5251 if(hil>=0) emit_loadreg(HIREG,hil); // DEBUG
5252 */
5253 // Shouldn't be necessary
5254 //char loh=get_reg(i_regs->regmap,LOREG|64);
5255 //char lol=get_reg(i_regs->regmap,LOREG);
5256 //if(loh>=0) emit_loadreg(LOREG|64,loh);
5257 //if(lol>=0) emit_loadreg(LOREG,lol);
5258 }
5259 if(opcode2[i]==0x1E) // DDIV
5260 {
5261 signed char d1h=get_reg(i_regs->regmap,rs1[i]|64);
5262 signed char d1l=get_reg(i_regs->regmap,rs1[i]);
5263 signed char d2h=get_reg(i_regs->regmap,rs2[i]|64);
5264 signed char d2l=get_reg(i_regs->regmap,rs2[i]);
5265 assert(d1h>=0);
5266 assert(d2h>=0);
5267 assert(d1l>=0);
5268 assert(d2l>=0);
5269 save_regs(0x100f);
5270 if(d1l!=0) emit_mov(d1l,0);
5271 if(d1h==0) emit_readword((int)&dynarec_local,1);
5272 else if(d1h>1) emit_mov(d1h,1);
5273 if(d2l<2) emit_readword((int)&dynarec_local+d2l*4,2);
5274 else if(d2l>2) emit_mov(d2l,2);
5275 if(d2h<3) emit_readword((int)&dynarec_local+d2h*4,3);
5276 else if(d2h>3) emit_mov(d2h,3);
5277 emit_call((int)&div64);
5278 restore_regs(0x100f);
5279 signed char hih=get_reg(i_regs->regmap,HIREG|64);
5280 signed char hil=get_reg(i_regs->regmap,HIREG);
5281 signed char loh=get_reg(i_regs->regmap,LOREG|64);
5282 signed char lol=get_reg(i_regs->regmap,LOREG);
5283 if(hih>=0) emit_loadreg(HIREG|64,hih);
5284 if(hil>=0) emit_loadreg(HIREG,hil);
5285 if(loh>=0) emit_loadreg(LOREG|64,loh);
5286 if(lol>=0) emit_loadreg(LOREG,lol);
5287 }
5288 if(opcode2[i]==0x1F) // DDIVU
5289 {
5290 //u_int hr,reglist=0;
5291 //for(hr=0;hr<HOST_REGS;hr++) {
5292 // if(i_regs->regmap[hr]>=0 && (i_regs->regmap[hr]&62)!=HIREG) reglist|=1<<hr;
5293 //}
5294 signed char d1h=get_reg(i_regs->regmap,rs1[i]|64);
5295 signed char d1l=get_reg(i_regs->regmap,rs1[i]);
5296 signed char d2h=get_reg(i_regs->regmap,rs2[i]|64);
5297 signed char d2l=get_reg(i_regs->regmap,rs2[i]);
5298 assert(d1h>=0);
5299 assert(d2h>=0);
5300 assert(d1l>=0);
5301 assert(d2l>=0);
5302 save_regs(0x100f);
5303 if(d1l!=0) emit_mov(d1l,0);
5304 if(d1h==0) emit_readword((int)&dynarec_local,1);
5305 else if(d1h>1) emit_mov(d1h,1);
5306 if(d2l<2) emit_readword((int)&dynarec_local+d2l*4,2);
5307 else if(d2l>2) emit_mov(d2l,2);
5308 if(d2h<3) emit_readword((int)&dynarec_local+d2h*4,3);
5309 else if(d2h>3) emit_mov(d2h,3);
5310 emit_call((int)&divu64);
5311 restore_regs(0x100f);
5312 signed char hih=get_reg(i_regs->regmap,HIREG|64);
5313 signed char hil=get_reg(i_regs->regmap,HIREG);
5314 signed char loh=get_reg(i_regs->regmap,LOREG|64);
5315 signed char lol=get_reg(i_regs->regmap,LOREG);
5316 if(hih>=0) emit_loadreg(HIREG|64,hih);
5317 if(hil>=0) emit_loadreg(HIREG,hil);
5318 if(loh>=0) emit_loadreg(LOREG|64,loh);
5319 if(lol>=0) emit_loadreg(LOREG,lol);
5320 }
5321 }
4600ba03 5322#else
5323 assert(0);
5324#endif
57871462 5325 }
5326 else
5327 {
5328 // Multiply by zero is zero.
5329 // MIPS does not have a divide by zero exception.
5330 // The result is undefined, we return zero.
5331 signed char hr=get_reg(i_regs->regmap,HIREG);
5332 signed char lr=get_reg(i_regs->regmap,LOREG);
5333 if(hr>=0) emit_zeroreg(hr);
5334 if(lr>=0) emit_zeroreg(lr);
5335 }
5336}
5337#define multdiv_assemble multdiv_assemble_arm
5338
5339void do_preload_rhash(int r) {
5340 // Don't need this for ARM. On x86, this puts the value 0xf8 into the
5341 // register. On ARM the hash can be done with a single instruction (below)
5342}
5343
5344void do_preload_rhtbl(int ht) {
5345 emit_addimm(FP,(int)&mini_ht-(int)&dynarec_local,ht);
5346}
5347
5348void do_rhash(int rs,int rh) {
5349 emit_andimm(rs,0xf8,rh);
5350}
5351
5352void do_miniht_load(int ht,int rh) {
5353 assem_debug("ldr %s,[%s,%s]!\n",regname[rh],regname[ht],regname[rh]);
5354 output_w32(0xe7b00000|rd_rn_rm(rh,ht,rh));
5355}
5356
5357void do_miniht_jump(int rs,int rh,int ht) {
5358 emit_cmp(rh,rs);
5359 emit_ldreq_indexed(ht,4,15);
5360 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
5361 emit_mov(rs,7);
5362 emit_jmp(jump_vaddr_reg[7]);
5363 #else
5364 emit_jmp(jump_vaddr_reg[rs]);
5365 #endif
5366}
5367
5368void do_miniht_insert(u_int return_address,int rt,int temp) {
5369 #ifdef ARMv5_ONLY
5370 emit_movimm(return_address,rt); // PC into link register
5371 add_to_linker((int)out,return_address,1);
5372 emit_pcreladdr(temp);
5373 emit_writeword(rt,(int)&mini_ht[(return_address&0xFF)>>3][0]);
5374 emit_writeword(temp,(int)&mini_ht[(return_address&0xFF)>>3][1]);
5375 #else
5376 emit_movw(return_address&0x0000FFFF,rt);
5377 add_to_linker((int)out,return_address,1);
5378 emit_pcreladdr(temp);
5379 emit_writeword(temp,(int)&mini_ht[(return_address&0xFF)>>3][1]);
5380 emit_movt(return_address&0xFFFF0000,rt);
5381 emit_writeword(rt,(int)&mini_ht[(return_address&0xFF)>>3][0]);
5382 #endif
5383}
5384
5385// Sign-extend to 64 bits and write out upper half of a register
5386// This is useful where we have a 32-bit value in a register, and want to
5387// keep it in a 32-bit register, but can't guarantee that it won't be read
5388// as a 64-bit value later.
5389void wb_sx(signed char pre[],signed char entry[],uint64_t dirty,uint64_t is32_pre,uint64_t is32,uint64_t u,uint64_t uu)
5390{
24385cae 5391#ifndef FORCE32
57871462 5392 if(is32_pre==is32) return;
5393 int hr,reg;
5394 for(hr=0;hr<HOST_REGS;hr++) {
5395 if(hr!=EXCLUDE_REG) {
5396 //if(pre[hr]==entry[hr]) {
5397 if((reg=pre[hr])>=0) {
5398 if((dirty>>hr)&1) {
5399 if( ((is32_pre&~is32&~uu)>>reg)&1 ) {
5400 emit_sarimm(hr,31,HOST_TEMPREG);
5401 emit_storereg(reg|64,HOST_TEMPREG);
5402 }
5403 }
5404 }
5405 //}
5406 }
5407 }
24385cae 5408#endif
57871462 5409}
5410
5411void wb_valid(signed char pre[],signed char entry[],u_int dirty_pre,u_int dirty,uint64_t is32_pre,uint64_t u,uint64_t uu)
5412{
5413 //if(dirty_pre==dirty) return;
5414 int hr,reg,new_hr;
5415 for(hr=0;hr<HOST_REGS;hr++) {
5416 if(hr!=EXCLUDE_REG) {
5417 reg=pre[hr];
5418 if(((~u)>>(reg&63))&1) {
f776eb14 5419 if(reg>0) {
57871462 5420 if(((dirty_pre&~dirty)>>hr)&1) {
5421 if(reg>0&&reg<34) {
5422 emit_storereg(reg,hr);
5423 if( ((is32_pre&~uu)>>reg)&1 ) {
5424 emit_sarimm(hr,31,HOST_TEMPREG);
5425 emit_storereg(reg|64,HOST_TEMPREG);
5426 }
5427 }
5428 else if(reg>=64) {
5429 emit_storereg(reg,hr);
5430 }
5431 }
5432 }
57871462 5433 }
5434 }
5435 }
5436}
5437
5438
5439/* using strd could possibly help but you'd have to allocate registers in pairs
5440void wb_invalidate_arm(signed char pre[],signed char entry[],uint64_t dirty,uint64_t is32,uint64_t u,uint64_t uu)
5441{
5442 int hr;
5443 int wrote=-1;
5444 for(hr=HOST_REGS-1;hr>=0;hr--) {
5445 if(hr!=EXCLUDE_REG) {
5446 if(pre[hr]!=entry[hr]) {
5447 if(pre[hr]>=0) {
5448 if((dirty>>hr)&1) {
5449 if(get_reg(entry,pre[hr])<0) {
5450 if(pre[hr]<64) {
5451 if(!((u>>pre[hr])&1)) {
5452 if(hr<10&&(~hr&1)&&(pre[hr+1]<0||wrote==hr+1)) {
5453 if( ((is32>>pre[hr])&1) && !((uu>>pre[hr])&1) ) {
5454 emit_sarimm(hr,31,hr+1);
5455 emit_strdreg(pre[hr],hr);
5456 }
5457 else
5458 emit_storereg(pre[hr],hr);
5459 }else{
5460 emit_storereg(pre[hr],hr);
5461 if( ((is32>>pre[hr])&1) && !((uu>>pre[hr])&1) ) {
5462 emit_sarimm(hr,31,hr);
5463 emit_storereg(pre[hr]|64,hr);
5464 }
5465 }
5466 }
5467 }else{
5468 if(!((uu>>(pre[hr]&63))&1) && !((is32>>(pre[hr]&63))&1)) {
5469 emit_storereg(pre[hr],hr);
5470 }
5471 }
5472 wrote=hr;
5473 }
5474 }
5475 }
5476 }
5477 }
5478 }
5479 for(hr=0;hr<HOST_REGS;hr++) {
5480 if(hr!=EXCLUDE_REG) {
5481 if(pre[hr]!=entry[hr]) {
5482 if(pre[hr]>=0) {
5483 int nr;
5484 if((nr=get_reg(entry,pre[hr]))>=0) {
5485 emit_mov(hr,nr);
5486 }
5487 }
5488 }
5489 }
5490 }
5491}
5492#define wb_invalidate wb_invalidate_arm
5493*/
5494
dd3a91a1 5495// Clearing the cache is rather slow on ARM Linux, so mark the areas
5496// that need to be cleared, and then only clear these areas once.
5497void do_clear_cache()
5498{
5499 int i,j;
5500 for (i=0;i<(1<<(TARGET_SIZE_2-17));i++)
5501 {
5502 u_int bitmap=needs_clear_cache[i];
5503 if(bitmap) {
5504 u_int start,end;
5505 for(j=0;j<32;j++)
5506 {
5507 if(bitmap&(1<<j)) {
5508 start=BASE_ADDR+i*131072+j*4096;
5509 end=start+4095;
5510 j++;
5511 while(j<32) {
5512 if(bitmap&(1<<j)) {
5513 end+=4096;
5514 j++;
5515 }else{
5516 __clear_cache((void *)start,(void *)end);
5517 break;
5518 }
5519 }
5520 }
5521 }
5522 needs_clear_cache[i]=0;
5523 }
5524 }
5525}
5526
57871462 5527// CPU-architecture-specific initialization
5528void arch_init() {
3d624f89 5529#ifndef DISABLE_COP1
57871462 5530 rounding_modes[0]=0x0<<22; // round
5531 rounding_modes[1]=0x3<<22; // trunc
5532 rounding_modes[2]=0x1<<22; // ceil
5533 rounding_modes[3]=0x2<<22; // floor
3d624f89 5534#endif
57871462 5535}
b9b61529 5536
5537// vim:shiftwidth=2:expandtab