drc: remove some old inline memhandlers
[pcsx_rearmed.git] / libpcsxcore / new_dynarec / assem_arm.c
... / ...
CommitLineData
1/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2 * Mupen64plus/PCSX - assem_arm.c *
3 * Copyright (C) 2009-2011 Ari64 *
4 * Copyright (C) 2010-2011 GraÅžvydas "notaz" Ignotas *
5 * *
6 * This program is free software; you can redistribute it and/or modify *
7 * it under the terms of the GNU General Public License as published by *
8 * the Free Software Foundation; either version 2 of the License, or *
9 * (at your option) any later version. *
10 * *
11 * This program is distributed in the hope that it will be useful, *
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
14 * GNU General Public License for more details. *
15 * *
16 * You should have received a copy of the GNU General Public License *
17 * along with this program; if not, write to the *
18 * Free Software Foundation, Inc., *
19 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
20 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
21
22extern int cycle_count;
23extern int last_count;
24extern int pcaddr;
25extern int pending_exception;
26extern int branch_target;
27extern uint64_t readmem_dword;
28#ifdef MUPEN64
29extern precomp_instr fake_pc;
30#endif
31extern void *dynarec_local;
32extern u_int memory_map[1048576];
33extern u_int mini_ht[32][2];
34extern u_int rounding_modes[4];
35
36void indirect_jump_indexed();
37void indirect_jump();
38void do_interrupt();
39void jump_vaddr_r0();
40void jump_vaddr_r1();
41void jump_vaddr_r2();
42void jump_vaddr_r3();
43void jump_vaddr_r4();
44void jump_vaddr_r5();
45void jump_vaddr_r6();
46void jump_vaddr_r7();
47void jump_vaddr_r8();
48void jump_vaddr_r9();
49void jump_vaddr_r10();
50void jump_vaddr_r12();
51
52const u_int jump_vaddr_reg[16] = {
53 (int)jump_vaddr_r0,
54 (int)jump_vaddr_r1,
55 (int)jump_vaddr_r2,
56 (int)jump_vaddr_r3,
57 (int)jump_vaddr_r4,
58 (int)jump_vaddr_r5,
59 (int)jump_vaddr_r6,
60 (int)jump_vaddr_r7,
61 (int)jump_vaddr_r8,
62 (int)jump_vaddr_r9,
63 (int)jump_vaddr_r10,
64 0,
65 (int)jump_vaddr_r12,
66 0,
67 0,
68 0};
69
70void invalidate_addr_r0();
71void invalidate_addr_r1();
72void invalidate_addr_r2();
73void invalidate_addr_r3();
74void invalidate_addr_r4();
75void invalidate_addr_r5();
76void invalidate_addr_r6();
77void invalidate_addr_r7();
78void invalidate_addr_r8();
79void invalidate_addr_r9();
80void invalidate_addr_r10();
81void invalidate_addr_r12();
82
83const u_int invalidate_addr_reg[16] = {
84 (int)invalidate_addr_r0,
85 (int)invalidate_addr_r1,
86 (int)invalidate_addr_r2,
87 (int)invalidate_addr_r3,
88 (int)invalidate_addr_r4,
89 (int)invalidate_addr_r5,
90 (int)invalidate_addr_r6,
91 (int)invalidate_addr_r7,
92 (int)invalidate_addr_r8,
93 (int)invalidate_addr_r9,
94 (int)invalidate_addr_r10,
95 0,
96 (int)invalidate_addr_r12,
97 0,
98 0,
99 0};
100
101#include "fpu.h"
102
103unsigned int needs_clear_cache[1<<(TARGET_SIZE_2-17)];
104
105/* Linker */
106
107void set_jump_target(int addr,u_int target)
108{
109 u_char *ptr=(u_char *)addr;
110 u_int *ptr2=(u_int *)ptr;
111 if(ptr[3]==0xe2) {
112 assert((target-(u_int)ptr2-8)<1024);
113 assert((addr&3)==0);
114 assert((target&3)==0);
115 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
116 //printf("target=%x addr=%x insn=%x\n",target,addr,*ptr2);
117 }
118 else if(ptr[3]==0x72) {
119 // generated by emit_jno_unlikely
120 if((target-(u_int)ptr2-8)<1024) {
121 assert((addr&3)==0);
122 assert((target&3)==0);
123 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
124 }
125 else if((target-(u_int)ptr2-8)<4096&&!((target-(u_int)ptr2-8)&15)) {
126 assert((addr&3)==0);
127 assert((target&3)==0);
128 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>4)|0xE00;
129 }
130 else *ptr2=(0x7A000000)|(((target-(u_int)ptr2-8)<<6)>>8);
131 }
132 else {
133 assert((ptr[3]&0x0e)==0xa);
134 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
135 }
136}
137
138// This optionally copies the instruction from the target of the branch into
139// the space before the branch. Works, but the difference in speed is
140// usually insignificant.
141void set_jump_target_fillslot(int addr,u_int target,int copy)
142{
143 u_char *ptr=(u_char *)addr;
144 u_int *ptr2=(u_int *)ptr;
145 assert(!copy||ptr2[-1]==0xe28dd000);
146 if(ptr[3]==0xe2) {
147 assert(!copy);
148 assert((target-(u_int)ptr2-8)<4096);
149 *ptr2=(*ptr2&0xFFFFF000)|(target-(u_int)ptr2-8);
150 }
151 else {
152 assert((ptr[3]&0x0e)==0xa);
153 u_int target_insn=*(u_int *)target;
154 if((target_insn&0x0e100000)==0) { // ALU, no immediate, no flags
155 copy=0;
156 }
157 if((target_insn&0x0c100000)==0x04100000) { // Load
158 copy=0;
159 }
160 if(target_insn&0x08000000) {
161 copy=0;
162 }
163 if(copy) {
164 ptr2[-1]=target_insn;
165 target+=4;
166 }
167 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
168 }
169}
170
171/* Literal pool */
172add_literal(int addr,int val)
173{
174 assert(literalcount<sizeof(literals)/sizeof(literals[0]));
175 literals[literalcount][0]=addr;
176 literals[literalcount][1]=val;
177 literalcount++;
178}
179
180void *kill_pointer(void *stub)
181{
182 int *ptr=(int *)(stub+4);
183 assert((*ptr&0x0ff00000)==0x05900000);
184 u_int offset=*ptr&0xfff;
185 int **l_ptr=(void *)ptr+offset+8;
186 int *i_ptr=*l_ptr;
187 set_jump_target((int)i_ptr,(int)stub);
188 return i_ptr;
189}
190
191// find where external branch is liked to using addr of it's stub:
192// get address that insn one after stub loads (dyna_linker arg1),
193// treat it as a pointer to branch insn,
194// return addr where that branch jumps to
195int get_pointer(void *stub)
196{
197 //printf("get_pointer(%x)\n",(int)stub);
198 int *ptr=(int *)(stub+4);
199 assert((*ptr&0x0fff0000)==0x059f0000);
200 u_int offset=*ptr&0xfff;
201 int **l_ptr=(void *)ptr+offset+8;
202 int *i_ptr=*l_ptr;
203 assert((*i_ptr&0x0f000000)==0x0a000000);
204 return (int)i_ptr+((*i_ptr<<8)>>6)+8;
205}
206
207// Find the "clean" entry point from a "dirty" entry point
208// by skipping past the call to verify_code
209u_int get_clean_addr(int addr)
210{
211 int *ptr=(int *)addr;
212 #ifdef ARMv5_ONLY
213 ptr+=4;
214 #else
215 ptr+=6;
216 #endif
217 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
218 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
219 ptr++;
220 if((*ptr&0xFF000000)==0xea000000) {
221 return (int)ptr+((*ptr<<8)>>6)+8; // follow jump
222 }
223 return (u_int)ptr;
224}
225
226int verify_dirty(int addr)
227{
228 u_int *ptr=(u_int *)addr;
229 #ifdef ARMv5_ONLY
230 // get from literal pool
231 assert((*ptr&0xFFFF0000)==0xe59f0000);
232 u_int offset=*ptr&0xfff;
233 u_int *l_ptr=(void *)ptr+offset+8;
234 u_int source=l_ptr[0];
235 u_int copy=l_ptr[1];
236 u_int len=l_ptr[2];
237 ptr+=4;
238 #else
239 // ARMv7 movw/movt
240 assert((*ptr&0xFFF00000)==0xe3000000);
241 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
242 u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
243 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
244 ptr+=6;
245 #endif
246 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
247 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
248 u_int verifier=(int)ptr+((signed int)(*ptr<<8)>>6)+8; // get target of bl
249 if(verifier==(u_int)verify_code_vm||verifier==(u_int)verify_code_ds) {
250 unsigned int page=source>>12;
251 unsigned int map_value=memory_map[page];
252 if(map_value>=0x80000000) return 0;
253 while(page<((source+len-1)>>12)) {
254 if((memory_map[++page]<<2)!=(map_value<<2)) return 0;
255 }
256 source = source+(map_value<<2);
257 }
258 //printf("verify_dirty: %x %x %x\n",source,copy,len);
259 return !memcmp((void *)source,(void *)copy,len);
260}
261
262// This doesn't necessarily find all clean entry points, just
263// guarantees that it's not dirty
264int isclean(int addr)
265{
266 #ifdef ARMv5_ONLY
267 int *ptr=((u_int *)addr)+4;
268 #else
269 int *ptr=((u_int *)addr)+6;
270 #endif
271 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
272 if((*ptr&0xFF000000)!=0xeb000000) return 1; // bl instruction
273 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code) return 0;
274 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_vm) return 0;
275 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_ds) return 0;
276 return 1;
277}
278
279void get_bounds(int addr,u_int *start,u_int *end)
280{
281 u_int *ptr=(u_int *)addr;
282 #ifdef ARMv5_ONLY
283 // get from literal pool
284 assert((*ptr&0xFFFF0000)==0xe59f0000);
285 u_int offset=*ptr&0xfff;
286 u_int *l_ptr=(void *)ptr+offset+8;
287 u_int source=l_ptr[0];
288 //u_int copy=l_ptr[1];
289 u_int len=l_ptr[2];
290 ptr+=4;
291 #else
292 // ARMv7 movw/movt
293 assert((*ptr&0xFFF00000)==0xe3000000);
294 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
295 //u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
296 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
297 ptr+=6;
298 #endif
299 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
300 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
301 u_int verifier=(int)ptr+((signed int)(*ptr<<8)>>6)+8; // get target of bl
302 if(verifier==(u_int)verify_code_vm||verifier==(u_int)verify_code_ds) {
303 if(memory_map[source>>12]>=0x80000000) source = 0;
304 else source = source+(memory_map[source>>12]<<2);
305 }
306 *start=source;
307 *end=source+len;
308}
309
310/* Register allocation */
311
312// Note: registers are allocated clean (unmodified state)
313// if you intend to modify the register, you must call dirty_reg().
314void alloc_reg(struct regstat *cur,int i,signed char reg)
315{
316 int r,hr;
317 int preferred_reg = (reg&7);
318 if(reg==CCREG) preferred_reg=HOST_CCREG;
319 if(reg==PTEMP||reg==FTEMP) preferred_reg=12;
320
321 // Don't allocate unused registers
322 if((cur->u>>reg)&1) return;
323
324 // see if it's already allocated
325 for(hr=0;hr<HOST_REGS;hr++)
326 {
327 if(cur->regmap[hr]==reg) return;
328 }
329
330 // Keep the same mapping if the register was already allocated in a loop
331 preferred_reg = loop_reg(i,reg,preferred_reg);
332
333 // Try to allocate the preferred register
334 if(cur->regmap[preferred_reg]==-1) {
335 cur->regmap[preferred_reg]=reg;
336 cur->dirty&=~(1<<preferred_reg);
337 cur->isconst&=~(1<<preferred_reg);
338 return;
339 }
340 r=cur->regmap[preferred_reg];
341 if(r<64&&((cur->u>>r)&1)) {
342 cur->regmap[preferred_reg]=reg;
343 cur->dirty&=~(1<<preferred_reg);
344 cur->isconst&=~(1<<preferred_reg);
345 return;
346 }
347 if(r>=64&&((cur->uu>>(r&63))&1)) {
348 cur->regmap[preferred_reg]=reg;
349 cur->dirty&=~(1<<preferred_reg);
350 cur->isconst&=~(1<<preferred_reg);
351 return;
352 }
353
354 // Clear any unneeded registers
355 // We try to keep the mapping consistent, if possible, because it
356 // makes branches easier (especially loops). So we try to allocate
357 // first (see above) before removing old mappings. If this is not
358 // possible then go ahead and clear out the registers that are no
359 // longer needed.
360 for(hr=0;hr<HOST_REGS;hr++)
361 {
362 r=cur->regmap[hr];
363 if(r>=0) {
364 if(r<64) {
365 if((cur->u>>r)&1) {cur->regmap[hr]=-1;break;}
366 }
367 else
368 {
369 if((cur->uu>>(r&63))&1) {cur->regmap[hr]=-1;break;}
370 }
371 }
372 }
373 // Try to allocate any available register, but prefer
374 // registers that have not been used recently.
375 if(i>0) {
376 for(hr=0;hr<HOST_REGS;hr++) {
377 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
378 if(regs[i-1].regmap[hr]!=rs1[i-1]&&regs[i-1].regmap[hr]!=rs2[i-1]&&regs[i-1].regmap[hr]!=rt1[i-1]&&regs[i-1].regmap[hr]!=rt2[i-1]) {
379 cur->regmap[hr]=reg;
380 cur->dirty&=~(1<<hr);
381 cur->isconst&=~(1<<hr);
382 return;
383 }
384 }
385 }
386 }
387 // Try to allocate any available register
388 for(hr=0;hr<HOST_REGS;hr++) {
389 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
390 cur->regmap[hr]=reg;
391 cur->dirty&=~(1<<hr);
392 cur->isconst&=~(1<<hr);
393 return;
394 }
395 }
396
397 // Ok, now we have to evict someone
398 // Pick a register we hopefully won't need soon
399 u_char hsn[MAXREG+1];
400 memset(hsn,10,sizeof(hsn));
401 int j;
402 lsn(hsn,i,&preferred_reg);
403 //printf("eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",cur->regmap[0],cur->regmap[1],cur->regmap[2],cur->regmap[3],cur->regmap[5],cur->regmap[6],cur->regmap[7]);
404 //printf("hsn(%x): %d %d %d %d %d %d %d\n",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
405 if(i>0) {
406 // Don't evict the cycle count at entry points, otherwise the entry
407 // stub will have to write it.
408 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
409 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
410 for(j=10;j>=3;j--)
411 {
412 // Alloc preferred register if available
413 if(hsn[r=cur->regmap[preferred_reg]&63]==j) {
414 for(hr=0;hr<HOST_REGS;hr++) {
415 // Evict both parts of a 64-bit register
416 if((cur->regmap[hr]&63)==r) {
417 cur->regmap[hr]=-1;
418 cur->dirty&=~(1<<hr);
419 cur->isconst&=~(1<<hr);
420 }
421 }
422 cur->regmap[preferred_reg]=reg;
423 return;
424 }
425 for(r=1;r<=MAXREG;r++)
426 {
427 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
428 for(hr=0;hr<HOST_REGS;hr++) {
429 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
430 if(cur->regmap[hr]==r+64) {
431 cur->regmap[hr]=reg;
432 cur->dirty&=~(1<<hr);
433 cur->isconst&=~(1<<hr);
434 return;
435 }
436 }
437 }
438 for(hr=0;hr<HOST_REGS;hr++) {
439 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
440 if(cur->regmap[hr]==r) {
441 cur->regmap[hr]=reg;
442 cur->dirty&=~(1<<hr);
443 cur->isconst&=~(1<<hr);
444 return;
445 }
446 }
447 }
448 }
449 }
450 }
451 }
452 for(j=10;j>=0;j--)
453 {
454 for(r=1;r<=MAXREG;r++)
455 {
456 if(hsn[r]==j) {
457 for(hr=0;hr<HOST_REGS;hr++) {
458 if(cur->regmap[hr]==r+64) {
459 cur->regmap[hr]=reg;
460 cur->dirty&=~(1<<hr);
461 cur->isconst&=~(1<<hr);
462 return;
463 }
464 }
465 for(hr=0;hr<HOST_REGS;hr++) {
466 if(cur->regmap[hr]==r) {
467 cur->regmap[hr]=reg;
468 cur->dirty&=~(1<<hr);
469 cur->isconst&=~(1<<hr);
470 return;
471 }
472 }
473 }
474 }
475 }
476 printf("This shouldn't happen (alloc_reg)");exit(1);
477}
478
479void alloc_reg64(struct regstat *cur,int i,signed char reg)
480{
481 int preferred_reg = 8+(reg&1);
482 int r,hr;
483
484 // allocate the lower 32 bits
485 alloc_reg(cur,i,reg);
486
487 // Don't allocate unused registers
488 if((cur->uu>>reg)&1) return;
489
490 // see if the upper half is already allocated
491 for(hr=0;hr<HOST_REGS;hr++)
492 {
493 if(cur->regmap[hr]==reg+64) return;
494 }
495
496 // Keep the same mapping if the register was already allocated in a loop
497 preferred_reg = loop_reg(i,reg,preferred_reg);
498
499 // Try to allocate the preferred register
500 if(cur->regmap[preferred_reg]==-1) {
501 cur->regmap[preferred_reg]=reg|64;
502 cur->dirty&=~(1<<preferred_reg);
503 cur->isconst&=~(1<<preferred_reg);
504 return;
505 }
506 r=cur->regmap[preferred_reg];
507 if(r<64&&((cur->u>>r)&1)) {
508 cur->regmap[preferred_reg]=reg|64;
509 cur->dirty&=~(1<<preferred_reg);
510 cur->isconst&=~(1<<preferred_reg);
511 return;
512 }
513 if(r>=64&&((cur->uu>>(r&63))&1)) {
514 cur->regmap[preferred_reg]=reg|64;
515 cur->dirty&=~(1<<preferred_reg);
516 cur->isconst&=~(1<<preferred_reg);
517 return;
518 }
519
520 // Clear any unneeded registers
521 // We try to keep the mapping consistent, if possible, because it
522 // makes branches easier (especially loops). So we try to allocate
523 // first (see above) before removing old mappings. If this is not
524 // possible then go ahead and clear out the registers that are no
525 // longer needed.
526 for(hr=HOST_REGS-1;hr>=0;hr--)
527 {
528 r=cur->regmap[hr];
529 if(r>=0) {
530 if(r<64) {
531 if((cur->u>>r)&1) {cur->regmap[hr]=-1;break;}
532 }
533 else
534 {
535 if((cur->uu>>(r&63))&1) {cur->regmap[hr]=-1;break;}
536 }
537 }
538 }
539 // Try to allocate any available register, but prefer
540 // registers that have not been used recently.
541 if(i>0) {
542 for(hr=0;hr<HOST_REGS;hr++) {
543 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
544 if(regs[i-1].regmap[hr]!=rs1[i-1]&&regs[i-1].regmap[hr]!=rs2[i-1]&&regs[i-1].regmap[hr]!=rt1[i-1]&&regs[i-1].regmap[hr]!=rt2[i-1]) {
545 cur->regmap[hr]=reg|64;
546 cur->dirty&=~(1<<hr);
547 cur->isconst&=~(1<<hr);
548 return;
549 }
550 }
551 }
552 }
553 // Try to allocate any available register
554 for(hr=0;hr<HOST_REGS;hr++) {
555 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
556 cur->regmap[hr]=reg|64;
557 cur->dirty&=~(1<<hr);
558 cur->isconst&=~(1<<hr);
559 return;
560 }
561 }
562
563 // Ok, now we have to evict someone
564 // Pick a register we hopefully won't need soon
565 u_char hsn[MAXREG+1];
566 memset(hsn,10,sizeof(hsn));
567 int j;
568 lsn(hsn,i,&preferred_reg);
569 //printf("eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",cur->regmap[0],cur->regmap[1],cur->regmap[2],cur->regmap[3],cur->regmap[5],cur->regmap[6],cur->regmap[7]);
570 //printf("hsn(%x): %d %d %d %d %d %d %d\n",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
571 if(i>0) {
572 // Don't evict the cycle count at entry points, otherwise the entry
573 // stub will have to write it.
574 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
575 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
576 for(j=10;j>=3;j--)
577 {
578 // Alloc preferred register if available
579 if(hsn[r=cur->regmap[preferred_reg]&63]==j) {
580 for(hr=0;hr<HOST_REGS;hr++) {
581 // Evict both parts of a 64-bit register
582 if((cur->regmap[hr]&63)==r) {
583 cur->regmap[hr]=-1;
584 cur->dirty&=~(1<<hr);
585 cur->isconst&=~(1<<hr);
586 }
587 }
588 cur->regmap[preferred_reg]=reg|64;
589 return;
590 }
591 for(r=1;r<=MAXREG;r++)
592 {
593 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
594 for(hr=0;hr<HOST_REGS;hr++) {
595 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
596 if(cur->regmap[hr]==r+64) {
597 cur->regmap[hr]=reg|64;
598 cur->dirty&=~(1<<hr);
599 cur->isconst&=~(1<<hr);
600 return;
601 }
602 }
603 }
604 for(hr=0;hr<HOST_REGS;hr++) {
605 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
606 if(cur->regmap[hr]==r) {
607 cur->regmap[hr]=reg|64;
608 cur->dirty&=~(1<<hr);
609 cur->isconst&=~(1<<hr);
610 return;
611 }
612 }
613 }
614 }
615 }
616 }
617 }
618 for(j=10;j>=0;j--)
619 {
620 for(r=1;r<=MAXREG;r++)
621 {
622 if(hsn[r]==j) {
623 for(hr=0;hr<HOST_REGS;hr++) {
624 if(cur->regmap[hr]==r+64) {
625 cur->regmap[hr]=reg|64;
626 cur->dirty&=~(1<<hr);
627 cur->isconst&=~(1<<hr);
628 return;
629 }
630 }
631 for(hr=0;hr<HOST_REGS;hr++) {
632 if(cur->regmap[hr]==r) {
633 cur->regmap[hr]=reg|64;
634 cur->dirty&=~(1<<hr);
635 cur->isconst&=~(1<<hr);
636 return;
637 }
638 }
639 }
640 }
641 }
642 printf("This shouldn't happen");exit(1);
643}
644
645// Allocate a temporary register. This is done without regard to
646// dirty status or whether the register we request is on the unneeded list
647// Note: This will only allocate one register, even if called multiple times
648void alloc_reg_temp(struct regstat *cur,int i,signed char reg)
649{
650 int r,hr;
651 int preferred_reg = -1;
652
653 // see if it's already allocated
654 for(hr=0;hr<HOST_REGS;hr++)
655 {
656 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==reg) return;
657 }
658
659 // Try to allocate any available register
660 for(hr=HOST_REGS-1;hr>=0;hr--) {
661 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
662 cur->regmap[hr]=reg;
663 cur->dirty&=~(1<<hr);
664 cur->isconst&=~(1<<hr);
665 return;
666 }
667 }
668
669 // Find an unneeded register
670 for(hr=HOST_REGS-1;hr>=0;hr--)
671 {
672 r=cur->regmap[hr];
673 if(r>=0) {
674 if(r<64) {
675 if((cur->u>>r)&1) {
676 if(i==0||((unneeded_reg[i-1]>>r)&1)) {
677 cur->regmap[hr]=reg;
678 cur->dirty&=~(1<<hr);
679 cur->isconst&=~(1<<hr);
680 return;
681 }
682 }
683 }
684 else
685 {
686 if((cur->uu>>(r&63))&1) {
687 if(i==0||((unneeded_reg_upper[i-1]>>(r&63))&1)) {
688 cur->regmap[hr]=reg;
689 cur->dirty&=~(1<<hr);
690 cur->isconst&=~(1<<hr);
691 return;
692 }
693 }
694 }
695 }
696 }
697
698 // Ok, now we have to evict someone
699 // Pick a register we hopefully won't need soon
700 // TODO: we might want to follow unconditional jumps here
701 // TODO: get rid of dupe code and make this into a function
702 u_char hsn[MAXREG+1];
703 memset(hsn,10,sizeof(hsn));
704 int j;
705 lsn(hsn,i,&preferred_reg);
706 //printf("hsn: %d %d %d %d %d %d %d\n",hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
707 if(i>0) {
708 // Don't evict the cycle count at entry points, otherwise the entry
709 // stub will have to write it.
710 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
711 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
712 for(j=10;j>=3;j--)
713 {
714 for(r=1;r<=MAXREG;r++)
715 {
716 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
717 for(hr=0;hr<HOST_REGS;hr++) {
718 if(hr!=HOST_CCREG||hsn[CCREG]>2) {
719 if(cur->regmap[hr]==r+64) {
720 cur->regmap[hr]=reg;
721 cur->dirty&=~(1<<hr);
722 cur->isconst&=~(1<<hr);
723 return;
724 }
725 }
726 }
727 for(hr=0;hr<HOST_REGS;hr++) {
728 if(hr!=HOST_CCREG||hsn[CCREG]>2) {
729 if(cur->regmap[hr]==r) {
730 cur->regmap[hr]=reg;
731 cur->dirty&=~(1<<hr);
732 cur->isconst&=~(1<<hr);
733 return;
734 }
735 }
736 }
737 }
738 }
739 }
740 }
741 for(j=10;j>=0;j--)
742 {
743 for(r=1;r<=MAXREG;r++)
744 {
745 if(hsn[r]==j) {
746 for(hr=0;hr<HOST_REGS;hr++) {
747 if(cur->regmap[hr]==r+64) {
748 cur->regmap[hr]=reg;
749 cur->dirty&=~(1<<hr);
750 cur->isconst&=~(1<<hr);
751 return;
752 }
753 }
754 for(hr=0;hr<HOST_REGS;hr++) {
755 if(cur->regmap[hr]==r) {
756 cur->regmap[hr]=reg;
757 cur->dirty&=~(1<<hr);
758 cur->isconst&=~(1<<hr);
759 return;
760 }
761 }
762 }
763 }
764 }
765 printf("This shouldn't happen");exit(1);
766}
767// Allocate a specific ARM register.
768void alloc_arm_reg(struct regstat *cur,int i,signed char reg,char hr)
769{
770 int n;
771 int dirty=0;
772
773 // see if it's already allocated (and dealloc it)
774 for(n=0;n<HOST_REGS;n++)
775 {
776 if(n!=EXCLUDE_REG&&cur->regmap[n]==reg) {
777 dirty=(cur->dirty>>n)&1;
778 cur->regmap[n]=-1;
779 }
780 }
781
782 cur->regmap[hr]=reg;
783 cur->dirty&=~(1<<hr);
784 cur->dirty|=dirty<<hr;
785 cur->isconst&=~(1<<hr);
786}
787
788// Alloc cycle count into dedicated register
789alloc_cc(struct regstat *cur,int i)
790{
791 alloc_arm_reg(cur,i,CCREG,HOST_CCREG);
792}
793
794/* Special alloc */
795
796
797/* Assembler */
798
799char regname[16][4] = {
800 "r0",
801 "r1",
802 "r2",
803 "r3",
804 "r4",
805 "r5",
806 "r6",
807 "r7",
808 "r8",
809 "r9",
810 "r10",
811 "fp",
812 "r12",
813 "sp",
814 "lr",
815 "pc"};
816
817void output_byte(u_char byte)
818{
819 *(out++)=byte;
820}
821void output_modrm(u_char mod,u_char rm,u_char ext)
822{
823 assert(mod<4);
824 assert(rm<8);
825 assert(ext<8);
826 u_char byte=(mod<<6)|(ext<<3)|rm;
827 *(out++)=byte;
828}
829void output_sib(u_char scale,u_char index,u_char base)
830{
831 assert(scale<4);
832 assert(index<8);
833 assert(base<8);
834 u_char byte=(scale<<6)|(index<<3)|base;
835 *(out++)=byte;
836}
837void output_w32(u_int word)
838{
839 *((u_int *)out)=word;
840 out+=4;
841}
842u_int rd_rn_rm(u_int rd, u_int rn, u_int rm)
843{
844 assert(rd<16);
845 assert(rn<16);
846 assert(rm<16);
847 return((rn<<16)|(rd<<12)|rm);
848}
849u_int rd_rn_imm_shift(u_int rd, u_int rn, u_int imm, u_int shift)
850{
851 assert(rd<16);
852 assert(rn<16);
853 assert(imm<256);
854 assert((shift&1)==0);
855 return((rn<<16)|(rd<<12)|(((32-shift)&30)<<7)|imm);
856}
857u_int genimm(u_int imm,u_int *encoded)
858{
859 *encoded=0;
860 if(imm==0) return 1;
861 int i=32;
862 while(i>0)
863 {
864 if(imm<256) {
865 *encoded=((i&30)<<7)|imm;
866 return 1;
867 }
868 imm=(imm>>2)|(imm<<30);i-=2;
869 }
870 return 0;
871}
872void genimm_checked(u_int imm,u_int *encoded)
873{
874 u_int ret=genimm(imm,encoded);
875 assert(ret);
876}
877u_int genjmp(u_int addr)
878{
879 int offset=addr-(int)out-8;
880 if(offset<-33554432||offset>=33554432) {
881 if (addr>2) {
882 printf("genjmp: out of range: %08x\n", offset);
883 exit(1);
884 }
885 return 0;
886 }
887 return ((u_int)offset>>2)&0xffffff;
888}
889
890void emit_mov(int rs,int rt)
891{
892 assem_debug("mov %s,%s\n",regname[rt],regname[rs]);
893 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs));
894}
895
896void emit_movs(int rs,int rt)
897{
898 assem_debug("movs %s,%s\n",regname[rt],regname[rs]);
899 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs));
900}
901
902void emit_add(int rs1,int rs2,int rt)
903{
904 assem_debug("add %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
905 output_w32(0xe0800000|rd_rn_rm(rt,rs1,rs2));
906}
907
908void emit_adds(int rs1,int rs2,int rt)
909{
910 assem_debug("adds %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
911 output_w32(0xe0900000|rd_rn_rm(rt,rs1,rs2));
912}
913
914void emit_adcs(int rs1,int rs2,int rt)
915{
916 assem_debug("adcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
917 output_w32(0xe0b00000|rd_rn_rm(rt,rs1,rs2));
918}
919
920void emit_sbc(int rs1,int rs2,int rt)
921{
922 assem_debug("sbc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
923 output_w32(0xe0c00000|rd_rn_rm(rt,rs1,rs2));
924}
925
926void emit_sbcs(int rs1,int rs2,int rt)
927{
928 assem_debug("sbcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
929 output_w32(0xe0d00000|rd_rn_rm(rt,rs1,rs2));
930}
931
932void emit_neg(int rs, int rt)
933{
934 assem_debug("rsb %s,%s,#0\n",regname[rt],regname[rs]);
935 output_w32(0xe2600000|rd_rn_rm(rt,rs,0));
936}
937
938void emit_negs(int rs, int rt)
939{
940 assem_debug("rsbs %s,%s,#0\n",regname[rt],regname[rs]);
941 output_w32(0xe2700000|rd_rn_rm(rt,rs,0));
942}
943
944void emit_sub(int rs1,int rs2,int rt)
945{
946 assem_debug("sub %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
947 output_w32(0xe0400000|rd_rn_rm(rt,rs1,rs2));
948}
949
950void emit_subs(int rs1,int rs2,int rt)
951{
952 assem_debug("subs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
953 output_w32(0xe0500000|rd_rn_rm(rt,rs1,rs2));
954}
955
956void emit_zeroreg(int rt)
957{
958 assem_debug("mov %s,#0\n",regname[rt]);
959 output_w32(0xe3a00000|rd_rn_rm(rt,0,0));
960}
961
962void emit_loadlp(u_int imm,u_int rt)
963{
964 add_literal((int)out,imm);
965 assem_debug("ldr %s,pc+? [=%x]\n",regname[rt],imm);
966 output_w32(0xe5900000|rd_rn_rm(rt,15,0));
967}
968void emit_movw(u_int imm,u_int rt)
969{
970 assert(imm<65536);
971 assem_debug("movw %s,#%d (0x%x)\n",regname[rt],imm,imm);
972 output_w32(0xe3000000|rd_rn_rm(rt,0,0)|(imm&0xfff)|((imm<<4)&0xf0000));
973}
974void emit_movt(u_int imm,u_int rt)
975{
976 assem_debug("movt %s,#%d (0x%x)\n",regname[rt],imm&0xffff0000,imm&0xffff0000);
977 output_w32(0xe3400000|rd_rn_rm(rt,0,0)|((imm>>16)&0xfff)|((imm>>12)&0xf0000));
978}
979void emit_movimm(u_int imm,u_int rt)
980{
981 u_int armval;
982 if(genimm(imm,&armval)) {
983 assem_debug("mov %s,#%d\n",regname[rt],imm);
984 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
985 }else if(genimm(~imm,&armval)) {
986 assem_debug("mvn %s,#%d\n",regname[rt],imm);
987 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
988 }else if(imm<65536) {
989 #ifdef ARMv5_ONLY
990 assem_debug("mov %s,#%d\n",regname[rt],imm&0xFF00);
991 output_w32(0xe3a00000|rd_rn_imm_shift(rt,0,imm>>8,8));
992 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
993 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
994 #else
995 emit_movw(imm,rt);
996 #endif
997 }else{
998 #ifdef ARMv5_ONLY
999 emit_loadlp(imm,rt);
1000 #else
1001 emit_movw(imm&0x0000FFFF,rt);
1002 emit_movt(imm&0xFFFF0000,rt);
1003 #endif
1004 }
1005}
1006void emit_pcreladdr(u_int rt)
1007{
1008 assem_debug("add %s,pc,#?\n",regname[rt]);
1009 output_w32(0xe2800000|rd_rn_rm(rt,15,0));
1010}
1011
1012void emit_loadreg(int r, int hr)
1013{
1014#ifdef FORCE32
1015 if(r&64) {
1016 printf("64bit load in 32bit mode!\n");
1017 assert(0);
1018 return;
1019 }
1020#endif
1021 if((r&63)==0)
1022 emit_zeroreg(hr);
1023 else {
1024 int addr=((int)reg)+((r&63)<<REG_SHIFT)+((r&64)>>4);
1025 if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4);
1026 if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4);
1027 if(r==CCREG) addr=(int)&cycle_count;
1028 if(r==CSREG) addr=(int)&Status;
1029 if(r==FSREG) addr=(int)&FCR31;
1030 if(r==INVCP) addr=(int)&invc_ptr;
1031 u_int offset = addr-(u_int)&dynarec_local;
1032 assert(offset<4096);
1033 assem_debug("ldr %s,fp+%d\n",regname[hr],offset);
1034 output_w32(0xe5900000|rd_rn_rm(hr,FP,0)|offset);
1035 }
1036}
1037void emit_storereg(int r, int hr)
1038{
1039#ifdef FORCE32
1040 if(r&64) {
1041 printf("64bit store in 32bit mode!\n");
1042 assert(0);
1043 return;
1044 }
1045#endif
1046 int addr=((int)reg)+((r&63)<<REG_SHIFT)+((r&64)>>4);
1047 if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4);
1048 if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4);
1049 if(r==CCREG) addr=(int)&cycle_count;
1050 if(r==FSREG) addr=(int)&FCR31;
1051 u_int offset = addr-(u_int)&dynarec_local;
1052 assert(offset<4096);
1053 assem_debug("str %s,fp+%d\n",regname[hr],offset);
1054 output_w32(0xe5800000|rd_rn_rm(hr,FP,0)|offset);
1055}
1056
1057void emit_test(int rs, int rt)
1058{
1059 assem_debug("tst %s,%s\n",regname[rs],regname[rt]);
1060 output_w32(0xe1100000|rd_rn_rm(0,rs,rt));
1061}
1062
1063void emit_testimm(int rs,int imm)
1064{
1065 u_int armval;
1066 assem_debug("tst %s,#%d\n",regname[rs],imm);
1067 genimm_checked(imm,&armval);
1068 output_w32(0xe3100000|rd_rn_rm(0,rs,0)|armval);
1069}
1070
1071void emit_testeqimm(int rs,int imm)
1072{
1073 u_int armval;
1074 assem_debug("tsteq %s,$%d\n",regname[rs],imm);
1075 genimm_checked(imm,&armval);
1076 output_w32(0x03100000|rd_rn_rm(0,rs,0)|armval);
1077}
1078
1079void emit_not(int rs,int rt)
1080{
1081 assem_debug("mvn %s,%s\n",regname[rt],regname[rs]);
1082 output_w32(0xe1e00000|rd_rn_rm(rt,0,rs));
1083}
1084
1085void emit_mvnmi(int rs,int rt)
1086{
1087 assem_debug("mvnmi %s,%s\n",regname[rt],regname[rs]);
1088 output_w32(0x41e00000|rd_rn_rm(rt,0,rs));
1089}
1090
1091void emit_and(u_int rs1,u_int rs2,u_int rt)
1092{
1093 assem_debug("and %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1094 output_w32(0xe0000000|rd_rn_rm(rt,rs1,rs2));
1095}
1096
1097void emit_or(u_int rs1,u_int rs2,u_int rt)
1098{
1099 assem_debug("orr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1100 output_w32(0xe1800000|rd_rn_rm(rt,rs1,rs2));
1101}
1102void emit_or_and_set_flags(int rs1,int rs2,int rt)
1103{
1104 assem_debug("orrs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1105 output_w32(0xe1900000|rd_rn_rm(rt,rs1,rs2));
1106}
1107
1108void emit_orrshl_imm(u_int rs,u_int imm,u_int rt)
1109{
1110 assert(rs<16);
1111 assert(rt<16);
1112 assert(imm<32);
1113 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs],imm);
1114 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|(imm<<7));
1115}
1116
1117void emit_orrshr_imm(u_int rs,u_int imm,u_int rt)
1118{
1119 assert(rs<16);
1120 assert(rt<16);
1121 assert(imm<32);
1122 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs],imm);
1123 output_w32(0xe1800020|rd_rn_rm(rt,rt,rs)|(imm<<7));
1124}
1125
1126void emit_xor(u_int rs1,u_int rs2,u_int rt)
1127{
1128 assem_debug("eor %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1129 output_w32(0xe0200000|rd_rn_rm(rt,rs1,rs2));
1130}
1131
1132void emit_addimm(u_int rs,int imm,u_int rt)
1133{
1134 assert(rs<16);
1135 assert(rt<16);
1136 if(imm!=0) {
1137 u_int armval;
1138 if(genimm(imm,&armval)) {
1139 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],imm);
1140 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
1141 }else if(genimm(-imm,&armval)) {
1142 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],imm);
1143 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
1144 }else if(imm<0) {
1145 assert(imm>-65536);
1146 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],(-imm)&0xFF00);
1147 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
1148 output_w32(0xe2400000|rd_rn_imm_shift(rt,rs,(-imm)>>8,8));
1149 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
1150 }else{
1151 assert(imm<65536);
1152 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1153 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
1154 output_w32(0xe2800000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1155 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1156 }
1157 }
1158 else if(rs!=rt) emit_mov(rs,rt);
1159}
1160
1161void emit_addimm_and_set_flags(int imm,int rt)
1162{
1163 assert(imm>-65536&&imm<65536);
1164 u_int armval;
1165 if(genimm(imm,&armval)) {
1166 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm);
1167 output_w32(0xe2900000|rd_rn_rm(rt,rt,0)|armval);
1168 }else if(genimm(-imm,&armval)) {
1169 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],imm);
1170 output_w32(0xe2500000|rd_rn_rm(rt,rt,0)|armval);
1171 }else if(imm<0) {
1172 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF00);
1173 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
1174 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)>>8,8));
1175 output_w32(0xe2500000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
1176 }else{
1177 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF00);
1178 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
1179 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm>>8,8));
1180 output_w32(0xe2900000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1181 }
1182}
1183void emit_addimm_no_flags(u_int imm,u_int rt)
1184{
1185 emit_addimm(rt,imm,rt);
1186}
1187
1188void emit_addnop(u_int r)
1189{
1190 assert(r<16);
1191 assem_debug("add %s,%s,#0 (nop)\n",regname[r],regname[r]);
1192 output_w32(0xe2800000|rd_rn_rm(r,r,0));
1193}
1194
1195void emit_adcimm(u_int rs,int imm,u_int rt)
1196{
1197 u_int armval;
1198 genimm_checked(imm,&armval);
1199 assem_debug("adc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1200 output_w32(0xe2a00000|rd_rn_rm(rt,rs,0)|armval);
1201}
1202/*void emit_sbcimm(int imm,u_int rt)
1203{
1204 u_int armval;
1205 genimm_checked(imm,&armval);
1206 assem_debug("sbc %s,%s,#%d\n",regname[rt],regname[rt],imm);
1207 output_w32(0xe2c00000|rd_rn_rm(rt,rt,0)|armval);
1208}*/
1209void emit_sbbimm(int imm,u_int rt)
1210{
1211 assem_debug("sbb $%d,%%%s\n",imm,regname[rt]);
1212 assert(rt<8);
1213 if(imm<128&&imm>=-128) {
1214 output_byte(0x83);
1215 output_modrm(3,rt,3);
1216 output_byte(imm);
1217 }
1218 else
1219 {
1220 output_byte(0x81);
1221 output_modrm(3,rt,3);
1222 output_w32(imm);
1223 }
1224}
1225void emit_rscimm(int rs,int imm,u_int rt)
1226{
1227 assert(0);
1228 u_int armval;
1229 genimm_checked(imm,&armval);
1230 assem_debug("rsc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1231 output_w32(0xe2e00000|rd_rn_rm(rt,rs,0)|armval);
1232}
1233
1234void emit_addimm64_32(int rsh,int rsl,int imm,int rth,int rtl)
1235{
1236 // TODO: if(genimm(imm,&armval)) ...
1237 // else
1238 emit_movimm(imm,HOST_TEMPREG);
1239 emit_adds(HOST_TEMPREG,rsl,rtl);
1240 emit_adcimm(rsh,0,rth);
1241}
1242
1243void emit_sbb(int rs1,int rs2)
1244{
1245 assem_debug("sbb %%%s,%%%s\n",regname[rs2],regname[rs1]);
1246 output_byte(0x19);
1247 output_modrm(3,rs1,rs2);
1248}
1249
1250void emit_andimm(int rs,int imm,int rt)
1251{
1252 u_int armval;
1253 if(imm==0) {
1254 emit_zeroreg(rt);
1255 }else if(genimm(imm,&armval)) {
1256 assem_debug("and %s,%s,#%d\n",regname[rt],regname[rs],imm);
1257 output_w32(0xe2000000|rd_rn_rm(rt,rs,0)|armval);
1258 }else if(genimm(~imm,&armval)) {
1259 assem_debug("bic %s,%s,#%d\n",regname[rt],regname[rs],imm);
1260 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|armval);
1261 }else if(imm==65535) {
1262 #ifdef ARMv5_ONLY
1263 assem_debug("bic %s,%s,#FF000000\n",regname[rt],regname[rs]);
1264 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|0x4FF);
1265 assem_debug("bic %s,%s,#00FF0000\n",regname[rt],regname[rt]);
1266 output_w32(0xe3c00000|rd_rn_rm(rt,rt,0)|0x8FF);
1267 #else
1268 assem_debug("uxth %s,%s\n",regname[rt],regname[rs]);
1269 output_w32(0xe6ff0070|rd_rn_rm(rt,0,rs));
1270 #endif
1271 }else{
1272 assert(imm>0&&imm<65535);
1273 #ifdef ARMv5_ONLY
1274 assem_debug("mov r14,#%d\n",imm&0xFF00);
1275 output_w32(0xe3a00000|rd_rn_imm_shift(HOST_TEMPREG,0,imm>>8,8));
1276 assem_debug("add r14,r14,#%d\n",imm&0xFF);
1277 output_w32(0xe2800000|rd_rn_imm_shift(HOST_TEMPREG,HOST_TEMPREG,imm&0xff,0));
1278 #else
1279 emit_movw(imm,HOST_TEMPREG);
1280 #endif
1281 assem_debug("and %s,%s,r14\n",regname[rt],regname[rs]);
1282 output_w32(0xe0000000|rd_rn_rm(rt,rs,HOST_TEMPREG));
1283 }
1284}
1285
1286void emit_orimm(int rs,int imm,int rt)
1287{
1288 u_int armval;
1289 if(imm==0) {
1290 if(rs!=rt) emit_mov(rs,rt);
1291 }else if(genimm(imm,&armval)) {
1292 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1293 output_w32(0xe3800000|rd_rn_rm(rt,rs,0)|armval);
1294 }else{
1295 assert(imm>0&&imm<65536);
1296 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1297 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
1298 output_w32(0xe3800000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1299 output_w32(0xe3800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1300 }
1301}
1302
1303void emit_xorimm(int rs,int imm,int rt)
1304{
1305 u_int armval;
1306 if(imm==0) {
1307 if(rs!=rt) emit_mov(rs,rt);
1308 }else if(genimm(imm,&armval)) {
1309 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm);
1310 output_w32(0xe2200000|rd_rn_rm(rt,rs,0)|armval);
1311 }else{
1312 assert(imm>0&&imm<65536);
1313 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1314 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
1315 output_w32(0xe2200000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1316 output_w32(0xe2200000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1317 }
1318}
1319
1320void emit_shlimm(int rs,u_int imm,int rt)
1321{
1322 assert(imm>0);
1323 assert(imm<32);
1324 //if(imm==1) ...
1325 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1326 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1327}
1328
1329void emit_lsls_imm(int rs,int imm,int rt)
1330{
1331 assert(imm>0);
1332 assert(imm<32);
1333 assem_debug("lsls %s,%s,#%d\n",regname[rt],regname[rs],imm);
1334 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1335}
1336
1337void emit_shrimm(int rs,u_int imm,int rt)
1338{
1339 assert(imm>0);
1340 assert(imm<32);
1341 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1342 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
1343}
1344
1345void emit_sarimm(int rs,u_int imm,int rt)
1346{
1347 assert(imm>0);
1348 assert(imm<32);
1349 assem_debug("asr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1350 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x40|(imm<<7));
1351}
1352
1353void emit_rorimm(int rs,u_int imm,int rt)
1354{
1355 assert(imm>0);
1356 assert(imm<32);
1357 assem_debug("ror %s,%s,#%d\n",regname[rt],regname[rs],imm);
1358 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x60|(imm<<7));
1359}
1360
1361void emit_shldimm(int rs,int rs2,u_int imm,int rt)
1362{
1363 assem_debug("shld %%%s,%%%s,%d\n",regname[rt],regname[rs2],imm);
1364 assert(imm>0);
1365 assert(imm<32);
1366 //if(imm==1) ...
1367 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1368 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1369 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs2],32-imm);
1370 output_w32(0xe1800020|rd_rn_rm(rt,rt,rs2)|((32-imm)<<7));
1371}
1372
1373void emit_shrdimm(int rs,int rs2,u_int imm,int rt)
1374{
1375 assem_debug("shrd %%%s,%%%s,%d\n",regname[rt],regname[rs2],imm);
1376 assert(imm>0);
1377 assert(imm<32);
1378 //if(imm==1) ...
1379 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1380 output_w32(0xe1a00020|rd_rn_rm(rt,0,rs)|(imm<<7));
1381 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs2],32-imm);
1382 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs2)|((32-imm)<<7));
1383}
1384
1385void emit_signextend16(int rs,int rt)
1386{
1387 #ifdef ARMv5_ONLY
1388 emit_shlimm(rs,16,rt);
1389 emit_sarimm(rt,16,rt);
1390 #else
1391 assem_debug("sxth %s,%s\n",regname[rt],regname[rs]);
1392 output_w32(0xe6bf0070|rd_rn_rm(rt,0,rs));
1393 #endif
1394}
1395
1396void emit_signextend8(int rs,int rt)
1397{
1398 #ifdef ARMv5_ONLY
1399 emit_shlimm(rs,24,rt);
1400 emit_sarimm(rt,24,rt);
1401 #else
1402 assem_debug("sxtb %s,%s\n",regname[rt],regname[rs]);
1403 output_w32(0xe6af0070|rd_rn_rm(rt,0,rs));
1404 #endif
1405}
1406
1407void emit_shl(u_int rs,u_int shift,u_int rt)
1408{
1409 assert(rs<16);
1410 assert(rt<16);
1411 assert(shift<16);
1412 //if(imm==1) ...
1413 assem_debug("lsl %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1414 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x10|(shift<<8));
1415}
1416void emit_shr(u_int rs,u_int shift,u_int rt)
1417{
1418 assert(rs<16);
1419 assert(rt<16);
1420 assert(shift<16);
1421 assem_debug("lsr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1422 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x30|(shift<<8));
1423}
1424void emit_sar(u_int rs,u_int shift,u_int rt)
1425{
1426 assert(rs<16);
1427 assert(rt<16);
1428 assert(shift<16);
1429 assem_debug("asr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1430 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x50|(shift<<8));
1431}
1432void emit_shlcl(int r)
1433{
1434 assem_debug("shl %%%s,%%cl\n",regname[r]);
1435 assert(0);
1436}
1437void emit_shrcl(int r)
1438{
1439 assem_debug("shr %%%s,%%cl\n",regname[r]);
1440 assert(0);
1441}
1442void emit_sarcl(int r)
1443{
1444 assem_debug("sar %%%s,%%cl\n",regname[r]);
1445 assert(0);
1446}
1447
1448void emit_shldcl(int r1,int r2)
1449{
1450 assem_debug("shld %%%s,%%%s,%%cl\n",regname[r1],regname[r2]);
1451 assert(0);
1452}
1453void emit_shrdcl(int r1,int r2)
1454{
1455 assem_debug("shrd %%%s,%%%s,%%cl\n",regname[r1],regname[r2]);
1456 assert(0);
1457}
1458void emit_orrshl(u_int rs,u_int shift,u_int rt)
1459{
1460 assert(rs<16);
1461 assert(rt<16);
1462 assert(shift<16);
1463 assem_debug("orr %s,%s,%s,lsl %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
1464 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x10|(shift<<8));
1465}
1466void emit_orrshr(u_int rs,u_int shift,u_int rt)
1467{
1468 assert(rs<16);
1469 assert(rt<16);
1470 assert(shift<16);
1471 assem_debug("orr %s,%s,%s,lsr %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
1472 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x30|(shift<<8));
1473}
1474
1475void emit_cmpimm(int rs,int imm)
1476{
1477 u_int armval;
1478 if(genimm(imm,&armval)) {
1479 assem_debug("cmp %s,#%d\n",regname[rs],imm);
1480 output_w32(0xe3500000|rd_rn_rm(0,rs,0)|armval);
1481 }else if(genimm(-imm,&armval)) {
1482 assem_debug("cmn %s,#%d\n",regname[rs],imm);
1483 output_w32(0xe3700000|rd_rn_rm(0,rs,0)|armval);
1484 }else if(imm>0) {
1485 assert(imm<65536);
1486 #ifdef ARMv5_ONLY
1487 emit_movimm(imm,HOST_TEMPREG);
1488 #else
1489 emit_movw(imm,HOST_TEMPREG);
1490 #endif
1491 assem_debug("cmp %s,r14\n",regname[rs]);
1492 output_w32(0xe1500000|rd_rn_rm(0,rs,HOST_TEMPREG));
1493 }else{
1494 assert(imm>-65536);
1495 #ifdef ARMv5_ONLY
1496 emit_movimm(-imm,HOST_TEMPREG);
1497 #else
1498 emit_movw(-imm,HOST_TEMPREG);
1499 #endif
1500 assem_debug("cmn %s,r14\n",regname[rs]);
1501 output_w32(0xe1700000|rd_rn_rm(0,rs,HOST_TEMPREG));
1502 }
1503}
1504
1505void emit_cmovne(u_int *addr,int rt)
1506{
1507 assem_debug("cmovne %x,%%%s",(int)addr,regname[rt]);
1508 assert(0);
1509}
1510void emit_cmovl(u_int *addr,int rt)
1511{
1512 assem_debug("cmovl %x,%%%s",(int)addr,regname[rt]);
1513 assert(0);
1514}
1515void emit_cmovs(u_int *addr,int rt)
1516{
1517 assem_debug("cmovs %x,%%%s",(int)addr,regname[rt]);
1518 assert(0);
1519}
1520void emit_cmovne_imm(int imm,int rt)
1521{
1522 assem_debug("movne %s,#%d\n",regname[rt],imm);
1523 u_int armval;
1524 genimm_checked(imm,&armval);
1525 output_w32(0x13a00000|rd_rn_rm(rt,0,0)|armval);
1526}
1527void emit_cmovl_imm(int imm,int rt)
1528{
1529 assem_debug("movlt %s,#%d\n",regname[rt],imm);
1530 u_int armval;
1531 genimm_checked(imm,&armval);
1532 output_w32(0xb3a00000|rd_rn_rm(rt,0,0)|armval);
1533}
1534void emit_cmovb_imm(int imm,int rt)
1535{
1536 assem_debug("movcc %s,#%d\n",regname[rt],imm);
1537 u_int armval;
1538 genimm_checked(imm,&armval);
1539 output_w32(0x33a00000|rd_rn_rm(rt,0,0)|armval);
1540}
1541void emit_cmovs_imm(int imm,int rt)
1542{
1543 assem_debug("movmi %s,#%d\n",regname[rt],imm);
1544 u_int armval;
1545 genimm_checked(imm,&armval);
1546 output_w32(0x43a00000|rd_rn_rm(rt,0,0)|armval);
1547}
1548void emit_cmove_reg(int rs,int rt)
1549{
1550 assem_debug("moveq %s,%s\n",regname[rt],regname[rs]);
1551 output_w32(0x01a00000|rd_rn_rm(rt,0,rs));
1552}
1553void emit_cmovne_reg(int rs,int rt)
1554{
1555 assem_debug("movne %s,%s\n",regname[rt],regname[rs]);
1556 output_w32(0x11a00000|rd_rn_rm(rt,0,rs));
1557}
1558void emit_cmovl_reg(int rs,int rt)
1559{
1560 assem_debug("movlt %s,%s\n",regname[rt],regname[rs]);
1561 output_w32(0xb1a00000|rd_rn_rm(rt,0,rs));
1562}
1563void emit_cmovs_reg(int rs,int rt)
1564{
1565 assem_debug("movmi %s,%s\n",regname[rt],regname[rs]);
1566 output_w32(0x41a00000|rd_rn_rm(rt,0,rs));
1567}
1568
1569void emit_slti32(int rs,int imm,int rt)
1570{
1571 if(rs!=rt) emit_zeroreg(rt);
1572 emit_cmpimm(rs,imm);
1573 if(rs==rt) emit_movimm(0,rt);
1574 emit_cmovl_imm(1,rt);
1575}
1576void emit_sltiu32(int rs,int imm,int rt)
1577{
1578 if(rs!=rt) emit_zeroreg(rt);
1579 emit_cmpimm(rs,imm);
1580 if(rs==rt) emit_movimm(0,rt);
1581 emit_cmovb_imm(1,rt);
1582}
1583void emit_slti64_32(int rsh,int rsl,int imm,int rt)
1584{
1585 assert(rsh!=rt);
1586 emit_slti32(rsl,imm,rt);
1587 if(imm>=0)
1588 {
1589 emit_test(rsh,rsh);
1590 emit_cmovne_imm(0,rt);
1591 emit_cmovs_imm(1,rt);
1592 }
1593 else
1594 {
1595 emit_cmpimm(rsh,-1);
1596 emit_cmovne_imm(0,rt);
1597 emit_cmovl_imm(1,rt);
1598 }
1599}
1600void emit_sltiu64_32(int rsh,int rsl,int imm,int rt)
1601{
1602 assert(rsh!=rt);
1603 emit_sltiu32(rsl,imm,rt);
1604 if(imm>=0)
1605 {
1606 emit_test(rsh,rsh);
1607 emit_cmovne_imm(0,rt);
1608 }
1609 else
1610 {
1611 emit_cmpimm(rsh,-1);
1612 emit_cmovne_imm(1,rt);
1613 }
1614}
1615
1616void emit_cmp(int rs,int rt)
1617{
1618 assem_debug("cmp %s,%s\n",regname[rs],regname[rt]);
1619 output_w32(0xe1500000|rd_rn_rm(0,rs,rt));
1620}
1621void emit_set_gz32(int rs, int rt)
1622{
1623 //assem_debug("set_gz32\n");
1624 emit_cmpimm(rs,1);
1625 emit_movimm(1,rt);
1626 emit_cmovl_imm(0,rt);
1627}
1628void emit_set_nz32(int rs, int rt)
1629{
1630 //assem_debug("set_nz32\n");
1631 if(rs!=rt) emit_movs(rs,rt);
1632 else emit_test(rs,rs);
1633 emit_cmovne_imm(1,rt);
1634}
1635void emit_set_gz64_32(int rsh, int rsl, int rt)
1636{
1637 //assem_debug("set_gz64\n");
1638 emit_set_gz32(rsl,rt);
1639 emit_test(rsh,rsh);
1640 emit_cmovne_imm(1,rt);
1641 emit_cmovs_imm(0,rt);
1642}
1643void emit_set_nz64_32(int rsh, int rsl, int rt)
1644{
1645 //assem_debug("set_nz64\n");
1646 emit_or_and_set_flags(rsh,rsl,rt);
1647 emit_cmovne_imm(1,rt);
1648}
1649void emit_set_if_less32(int rs1, int rs2, int rt)
1650{
1651 //assem_debug("set if less (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1652 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1653 emit_cmp(rs1,rs2);
1654 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1655 emit_cmovl_imm(1,rt);
1656}
1657void emit_set_if_carry32(int rs1, int rs2, int rt)
1658{
1659 //assem_debug("set if carry (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1660 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1661 emit_cmp(rs1,rs2);
1662 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1663 emit_cmovb_imm(1,rt);
1664}
1665void emit_set_if_less64_32(int u1, int l1, int u2, int l2, int rt)
1666{
1667 //assem_debug("set if less64 (%%%s,%%%s,%%%s,%%%s),%%%s\n",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]);
1668 assert(u1!=rt);
1669 assert(u2!=rt);
1670 emit_cmp(l1,l2);
1671 emit_movimm(0,rt);
1672 emit_sbcs(u1,u2,HOST_TEMPREG);
1673 emit_cmovl_imm(1,rt);
1674}
1675void emit_set_if_carry64_32(int u1, int l1, int u2, int l2, int rt)
1676{
1677 //assem_debug("set if carry64 (%%%s,%%%s,%%%s,%%%s),%%%s\n",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]);
1678 assert(u1!=rt);
1679 assert(u2!=rt);
1680 emit_cmp(l1,l2);
1681 emit_movimm(0,rt);
1682 emit_sbcs(u1,u2,HOST_TEMPREG);
1683 emit_cmovb_imm(1,rt);
1684}
1685
1686void emit_call(int a)
1687{
1688 assem_debug("bl %x (%x+%x)\n",a,(int)out,a-(int)out-8);
1689 u_int offset=genjmp(a);
1690 output_w32(0xeb000000|offset);
1691}
1692void emit_jmp(int a)
1693{
1694 assem_debug("b %x (%x+%x)\n",a,(int)out,a-(int)out-8);
1695 u_int offset=genjmp(a);
1696 output_w32(0xea000000|offset);
1697}
1698void emit_jne(int a)
1699{
1700 assem_debug("bne %x\n",a);
1701 u_int offset=genjmp(a);
1702 output_w32(0x1a000000|offset);
1703}
1704void emit_jeq(int a)
1705{
1706 assem_debug("beq %x\n",a);
1707 u_int offset=genjmp(a);
1708 output_w32(0x0a000000|offset);
1709}
1710void emit_js(int a)
1711{
1712 assem_debug("bmi %x\n",a);
1713 u_int offset=genjmp(a);
1714 output_w32(0x4a000000|offset);
1715}
1716void emit_jns(int a)
1717{
1718 assem_debug("bpl %x\n",a);
1719 u_int offset=genjmp(a);
1720 output_w32(0x5a000000|offset);
1721}
1722void emit_jl(int a)
1723{
1724 assem_debug("blt %x\n",a);
1725 u_int offset=genjmp(a);
1726 output_w32(0xba000000|offset);
1727}
1728void emit_jge(int a)
1729{
1730 assem_debug("bge %x\n",a);
1731 u_int offset=genjmp(a);
1732 output_w32(0xaa000000|offset);
1733}
1734void emit_jno(int a)
1735{
1736 assem_debug("bvc %x\n",a);
1737 u_int offset=genjmp(a);
1738 output_w32(0x7a000000|offset);
1739}
1740void emit_jc(int a)
1741{
1742 assem_debug("bcs %x\n",a);
1743 u_int offset=genjmp(a);
1744 output_w32(0x2a000000|offset);
1745}
1746void emit_jcc(int a)
1747{
1748 assem_debug("bcc %x\n",a);
1749 u_int offset=genjmp(a);
1750 output_w32(0x3a000000|offset);
1751}
1752
1753void emit_pushimm(int imm)
1754{
1755 assem_debug("push $%x\n",imm);
1756 assert(0);
1757}
1758void emit_pusha()
1759{
1760 assem_debug("pusha\n");
1761 assert(0);
1762}
1763void emit_popa()
1764{
1765 assem_debug("popa\n");
1766 assert(0);
1767}
1768void emit_pushreg(u_int r)
1769{
1770 assem_debug("push %%%s\n",regname[r]);
1771 assert(0);
1772}
1773void emit_popreg(u_int r)
1774{
1775 assem_debug("pop %%%s\n",regname[r]);
1776 assert(0);
1777}
1778void emit_callreg(u_int r)
1779{
1780 assert(r<15);
1781 assem_debug("blx %s\n",regname[r]);
1782 output_w32(0xe12fff30|r);
1783}
1784void emit_jmpreg(u_int r)
1785{
1786 assem_debug("mov pc,%s\n",regname[r]);
1787 output_w32(0xe1a00000|rd_rn_rm(15,0,r));
1788}
1789
1790void emit_readword_indexed(int offset, int rs, int rt)
1791{
1792 assert(offset>-4096&&offset<4096);
1793 assem_debug("ldr %s,%s+%d\n",regname[rt],regname[rs],offset);
1794 if(offset>=0) {
1795 output_w32(0xe5900000|rd_rn_rm(rt,rs,0)|offset);
1796 }else{
1797 output_w32(0xe5100000|rd_rn_rm(rt,rs,0)|(-offset));
1798 }
1799}
1800void emit_readword_dualindexedx4(int rs1, int rs2, int rt)
1801{
1802 assem_debug("ldr %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1803 output_w32(0xe7900000|rd_rn_rm(rt,rs1,rs2)|0x100);
1804}
1805void emit_ldrcc_dualindexed(int rs1, int rs2, int rt)
1806{
1807 assem_debug("ldrcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1808 output_w32(0x37900000|rd_rn_rm(rt,rs1,rs2));
1809}
1810void emit_ldrccb_dualindexed(int rs1, int rs2, int rt)
1811{
1812 assem_debug("ldrccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1813 output_w32(0x37d00000|rd_rn_rm(rt,rs1,rs2));
1814}
1815void emit_ldrccsb_dualindexed(int rs1, int rs2, int rt)
1816{
1817 assem_debug("ldrccsb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1818 output_w32(0x319000d0|rd_rn_rm(rt,rs1,rs2));
1819}
1820void emit_ldrcch_dualindexed(int rs1, int rs2, int rt)
1821{
1822 assem_debug("ldrcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1823 output_w32(0x319000b0|rd_rn_rm(rt,rs1,rs2));
1824}
1825void emit_ldrccsh_dualindexed(int rs1, int rs2, int rt)
1826{
1827 assem_debug("ldrccsh %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1828 output_w32(0x319000f0|rd_rn_rm(rt,rs1,rs2));
1829}
1830void emit_readword_indexed_tlb(int addr, int rs, int map, int rt)
1831{
1832 if(map<0) emit_readword_indexed(addr, rs, rt);
1833 else {
1834 assert(addr==0);
1835 emit_readword_dualindexedx4(rs, map, rt);
1836 }
1837}
1838void emit_readdword_indexed_tlb(int addr, int rs, int map, int rh, int rl)
1839{
1840 if(map<0) {
1841 if(rh>=0) emit_readword_indexed(addr, rs, rh);
1842 emit_readword_indexed(addr+4, rs, rl);
1843 }else{
1844 assert(rh!=rs);
1845 if(rh>=0) emit_readword_indexed_tlb(addr, rs, map, rh);
1846 emit_addimm(map,1,map);
1847 emit_readword_indexed_tlb(addr, rs, map, rl);
1848 }
1849}
1850void emit_movsbl_indexed(int offset, int rs, int rt)
1851{
1852 assert(offset>-256&&offset<256);
1853 assem_debug("ldrsb %s,%s+%d\n",regname[rt],regname[rs],offset);
1854 if(offset>=0) {
1855 output_w32(0xe1d000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1856 }else{
1857 output_w32(0xe15000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1858 }
1859}
1860void emit_movsbl_indexed_tlb(int addr, int rs, int map, int rt)
1861{
1862 if(map<0) emit_movsbl_indexed(addr, rs, rt);
1863 else {
1864 if(addr==0) {
1865 emit_shlimm(map,2,map);
1866 assem_debug("ldrsb %s,%s+%s\n",regname[rt],regname[rs],regname[map]);
1867 output_w32(0xe19000d0|rd_rn_rm(rt,rs,map));
1868 }else{
1869 assert(addr>-256&&addr<256);
1870 assem_debug("add %s,%s,%s,lsl #2\n",regname[rt],regname[rs],regname[map]);
1871 output_w32(0xe0800000|rd_rn_rm(rt,rs,map)|(2<<7));
1872 emit_movsbl_indexed(addr, rt, rt);
1873 }
1874 }
1875}
1876void emit_movswl_indexed(int offset, int rs, int rt)
1877{
1878 assert(offset>-256&&offset<256);
1879 assem_debug("ldrsh %s,%s+%d\n",regname[rt],regname[rs],offset);
1880 if(offset>=0) {
1881 output_w32(0xe1d000f0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1882 }else{
1883 output_w32(0xe15000f0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1884 }
1885}
1886void emit_movzbl_indexed(int offset, int rs, int rt)
1887{
1888 assert(offset>-4096&&offset<4096);
1889 assem_debug("ldrb %s,%s+%d\n",regname[rt],regname[rs],offset);
1890 if(offset>=0) {
1891 output_w32(0xe5d00000|rd_rn_rm(rt,rs,0)|offset);
1892 }else{
1893 output_w32(0xe5500000|rd_rn_rm(rt,rs,0)|(-offset));
1894 }
1895}
1896void emit_movzbl_dualindexedx4(int rs1, int rs2, int rt)
1897{
1898 assem_debug("ldrb %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1899 output_w32(0xe7d00000|rd_rn_rm(rt,rs1,rs2)|0x100);
1900}
1901void emit_movzbl_indexed_tlb(int addr, int rs, int map, int rt)
1902{
1903 if(map<0) emit_movzbl_indexed(addr, rs, rt);
1904 else {
1905 if(addr==0) {
1906 emit_movzbl_dualindexedx4(rs, map, rt);
1907 }else{
1908 emit_addimm(rs,addr,rt);
1909 emit_movzbl_dualindexedx4(rt, map, rt);
1910 }
1911 }
1912}
1913void emit_movzwl_indexed(int offset, int rs, int rt)
1914{
1915 assert(offset>-256&&offset<256);
1916 assem_debug("ldrh %s,%s+%d\n",regname[rt],regname[rs],offset);
1917 if(offset>=0) {
1918 output_w32(0xe1d000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1919 }else{
1920 output_w32(0xe15000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1921 }
1922}
1923void emit_readword(int addr, int rt)
1924{
1925 u_int offset = addr-(u_int)&dynarec_local;
1926 assert(offset<4096);
1927 assem_debug("ldr %s,fp+%d\n",regname[rt],offset);
1928 output_w32(0xe5900000|rd_rn_rm(rt,FP,0)|offset);
1929}
1930void emit_movsbl(int addr, int rt)
1931{
1932 u_int offset = addr-(u_int)&dynarec_local;
1933 assert(offset<256);
1934 assem_debug("ldrsb %s,fp+%d\n",regname[rt],offset);
1935 output_w32(0xe1d000d0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1936}
1937void emit_movswl(int addr, int rt)
1938{
1939 u_int offset = addr-(u_int)&dynarec_local;
1940 assert(offset<256);
1941 assem_debug("ldrsh %s,fp+%d\n",regname[rt],offset);
1942 output_w32(0xe1d000f0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1943}
1944void emit_movzbl(int addr, int rt)
1945{
1946 u_int offset = addr-(u_int)&dynarec_local;
1947 assert(offset<4096);
1948 assem_debug("ldrb %s,fp+%d\n",regname[rt],offset);
1949 output_w32(0xe5d00000|rd_rn_rm(rt,FP,0)|offset);
1950}
1951void emit_movzwl(int addr, int rt)
1952{
1953 u_int offset = addr-(u_int)&dynarec_local;
1954 assert(offset<256);
1955 assem_debug("ldrh %s,fp+%d\n",regname[rt],offset);
1956 output_w32(0xe1d000b0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1957}
1958void emit_movzwl_reg(int rs, int rt)
1959{
1960 assem_debug("movzwl %%%s,%%%s\n",regname[rs]+1,regname[rt]);
1961 assert(0);
1962}
1963
1964void emit_xchg(int rs, int rt)
1965{
1966 assem_debug("xchg %%%s,%%%s\n",regname[rs],regname[rt]);
1967 assert(0);
1968}
1969void emit_writeword_indexed(int rt, int offset, int rs)
1970{
1971 assert(offset>-4096&&offset<4096);
1972 assem_debug("str %s,%s+%d\n",regname[rt],regname[rs],offset);
1973 if(offset>=0) {
1974 output_w32(0xe5800000|rd_rn_rm(rt,rs,0)|offset);
1975 }else{
1976 output_w32(0xe5000000|rd_rn_rm(rt,rs,0)|(-offset));
1977 }
1978}
1979void emit_writeword_dualindexedx4(int rt, int rs1, int rs2)
1980{
1981 assem_debug("str %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1982 output_w32(0xe7800000|rd_rn_rm(rt,rs1,rs2)|0x100);
1983}
1984void emit_writeword_indexed_tlb(int rt, int addr, int rs, int map, int temp)
1985{
1986 if(map<0) emit_writeword_indexed(rt, addr, rs);
1987 else {
1988 assert(addr==0);
1989 emit_writeword_dualindexedx4(rt, rs, map);
1990 }
1991}
1992void emit_writedword_indexed_tlb(int rh, int rl, int addr, int rs, int map, int temp)
1993{
1994 if(map<0) {
1995 if(rh>=0) emit_writeword_indexed(rh, addr, rs);
1996 emit_writeword_indexed(rl, addr+4, rs);
1997 }else{
1998 assert(rh>=0);
1999 if(temp!=rs) emit_addimm(map,1,temp);
2000 emit_writeword_indexed_tlb(rh, addr, rs, map, temp);
2001 if(temp!=rs) emit_writeword_indexed_tlb(rl, addr, rs, temp, temp);
2002 else {
2003 emit_addimm(rs,4,rs);
2004 emit_writeword_indexed_tlb(rl, addr, rs, map, temp);
2005 }
2006 }
2007}
2008void emit_writehword_indexed(int rt, int offset, int rs)
2009{
2010 assert(offset>-256&&offset<256);
2011 assem_debug("strh %s,%s+%d\n",regname[rt],regname[rs],offset);
2012 if(offset>=0) {
2013 output_w32(0xe1c000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
2014 }else{
2015 output_w32(0xe14000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
2016 }
2017}
2018void emit_writebyte_indexed(int rt, int offset, int rs)
2019{
2020 assert(offset>-4096&&offset<4096);
2021 assem_debug("strb %s,%s+%d\n",regname[rt],regname[rs],offset);
2022 if(offset>=0) {
2023 output_w32(0xe5c00000|rd_rn_rm(rt,rs,0)|offset);
2024 }else{
2025 output_w32(0xe5400000|rd_rn_rm(rt,rs,0)|(-offset));
2026 }
2027}
2028void emit_writebyte_dualindexedx4(int rt, int rs1, int rs2)
2029{
2030 assem_debug("strb %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
2031 output_w32(0xe7c00000|rd_rn_rm(rt,rs1,rs2)|0x100);
2032}
2033void emit_writebyte_indexed_tlb(int rt, int addr, int rs, int map, int temp)
2034{
2035 if(map<0) emit_writebyte_indexed(rt, addr, rs);
2036 else {
2037 if(addr==0) {
2038 emit_writebyte_dualindexedx4(rt, rs, map);
2039 }else{
2040 emit_addimm(rs,addr,temp);
2041 emit_writebyte_dualindexedx4(rt, temp, map);
2042 }
2043 }
2044}
2045void emit_strcc_dualindexed(int rs1, int rs2, int rt)
2046{
2047 assem_debug("strcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2048 output_w32(0x37800000|rd_rn_rm(rt,rs1,rs2));
2049}
2050void emit_strccb_dualindexed(int rs1, int rs2, int rt)
2051{
2052 assem_debug("strccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2053 output_w32(0x37c00000|rd_rn_rm(rt,rs1,rs2));
2054}
2055void emit_strcch_dualindexed(int rs1, int rs2, int rt)
2056{
2057 assem_debug("strcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2058 output_w32(0x318000b0|rd_rn_rm(rt,rs1,rs2));
2059}
2060void emit_writeword(int rt, int addr)
2061{
2062 u_int offset = addr-(u_int)&dynarec_local;
2063 assert(offset<4096);
2064 assem_debug("str %s,fp+%d\n",regname[rt],offset);
2065 output_w32(0xe5800000|rd_rn_rm(rt,FP,0)|offset);
2066}
2067void emit_writehword(int rt, int addr)
2068{
2069 u_int offset = addr-(u_int)&dynarec_local;
2070 assert(offset<256);
2071 assem_debug("strh %s,fp+%d\n",regname[rt],offset);
2072 output_w32(0xe1c000b0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
2073}
2074void emit_writebyte(int rt, int addr)
2075{
2076 u_int offset = addr-(u_int)&dynarec_local;
2077 assert(offset<4096);
2078 assem_debug("strb %s,fp+%d\n",regname[rt],offset);
2079 output_w32(0xe5c00000|rd_rn_rm(rt,FP,0)|offset);
2080}
2081void emit_writeword_imm(int imm, int addr)
2082{
2083 assem_debug("movl $%x,%x\n",imm,addr);
2084 assert(0);
2085}
2086void emit_writebyte_imm(int imm, int addr)
2087{
2088 assem_debug("movb $%x,%x\n",imm,addr);
2089 assert(0);
2090}
2091
2092void emit_mul(int rs)
2093{
2094 assem_debug("mul %%%s\n",regname[rs]);
2095 assert(0);
2096}
2097void emit_imul(int rs)
2098{
2099 assem_debug("imul %%%s\n",regname[rs]);
2100 assert(0);
2101}
2102void emit_umull(u_int rs1, u_int rs2, u_int hi, u_int lo)
2103{
2104 assem_debug("umull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
2105 assert(rs1<16);
2106 assert(rs2<16);
2107 assert(hi<16);
2108 assert(lo<16);
2109 output_w32(0xe0800090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
2110}
2111void emit_smull(u_int rs1, u_int rs2, u_int hi, u_int lo)
2112{
2113 assem_debug("smull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
2114 assert(rs1<16);
2115 assert(rs2<16);
2116 assert(hi<16);
2117 assert(lo<16);
2118 output_w32(0xe0c00090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
2119}
2120
2121void emit_div(int rs)
2122{
2123 assem_debug("div %%%s\n",regname[rs]);
2124 assert(0);
2125}
2126void emit_idiv(int rs)
2127{
2128 assem_debug("idiv %%%s\n",regname[rs]);
2129 assert(0);
2130}
2131void emit_cdq()
2132{
2133 assem_debug("cdq\n");
2134 assert(0);
2135}
2136
2137void emit_clz(int rs,int rt)
2138{
2139 assem_debug("clz %s,%s\n",regname[rt],regname[rs]);
2140 output_w32(0xe16f0f10|rd_rn_rm(rt,0,rs));
2141}
2142
2143void emit_subcs(int rs1,int rs2,int rt)
2144{
2145 assem_debug("subcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2146 output_w32(0x20400000|rd_rn_rm(rt,rs1,rs2));
2147}
2148
2149void emit_shrcc_imm(int rs,u_int imm,int rt)
2150{
2151 assert(imm>0);
2152 assert(imm<32);
2153 assem_debug("lsrcc %s,%s,#%d\n",regname[rt],regname[rs],imm);
2154 output_w32(0x31a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
2155}
2156
2157void emit_negmi(int rs, int rt)
2158{
2159 assem_debug("rsbmi %s,%s,#0\n",regname[rt],regname[rs]);
2160 output_w32(0x42600000|rd_rn_rm(rt,rs,0));
2161}
2162
2163void emit_negsmi(int rs, int rt)
2164{
2165 assem_debug("rsbsmi %s,%s,#0\n",regname[rt],regname[rs]);
2166 output_w32(0x42700000|rd_rn_rm(rt,rs,0));
2167}
2168
2169void emit_orreq(u_int rs1,u_int rs2,u_int rt)
2170{
2171 assem_debug("orreq %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2172 output_w32(0x01800000|rd_rn_rm(rt,rs1,rs2));
2173}
2174
2175void emit_orrne(u_int rs1,u_int rs2,u_int rt)
2176{
2177 assem_debug("orrne %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2178 output_w32(0x11800000|rd_rn_rm(rt,rs1,rs2));
2179}
2180
2181void emit_bic_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2182{
2183 assem_debug("bic %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2184 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2185}
2186
2187void emit_biceq_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2188{
2189 assem_debug("biceq %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2190 output_w32(0x01C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2191}
2192
2193void emit_bicne_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2194{
2195 assem_debug("bicne %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2196 output_w32(0x11C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2197}
2198
2199void emit_bic_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2200{
2201 assem_debug("bic %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2202 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2203}
2204
2205void emit_biceq_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2206{
2207 assem_debug("biceq %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2208 output_w32(0x01C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2209}
2210
2211void emit_bicne_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2212{
2213 assem_debug("bicne %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2214 output_w32(0x11C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2215}
2216
2217void emit_teq(int rs, int rt)
2218{
2219 assem_debug("teq %s,%s\n",regname[rs],regname[rt]);
2220 output_w32(0xe1300000|rd_rn_rm(0,rs,rt));
2221}
2222
2223void emit_rsbimm(int rs, int imm, int rt)
2224{
2225 u_int armval;
2226 genimm_checked(imm,&armval);
2227 assem_debug("rsb %s,%s,#%d\n",regname[rt],regname[rs],imm);
2228 output_w32(0xe2600000|rd_rn_rm(rt,rs,0)|armval);
2229}
2230
2231// Load 2 immediates optimizing for small code size
2232void emit_mov2imm_compact(int imm1,u_int rt1,int imm2,u_int rt2)
2233{
2234 emit_movimm(imm1,rt1);
2235 u_int armval;
2236 if(genimm(imm2-imm1,&armval)) {
2237 assem_debug("add %s,%s,#%d\n",regname[rt2],regname[rt1],imm2-imm1);
2238 output_w32(0xe2800000|rd_rn_rm(rt2,rt1,0)|armval);
2239 }else if(genimm(imm1-imm2,&armval)) {
2240 assem_debug("sub %s,%s,#%d\n",regname[rt2],regname[rt1],imm1-imm2);
2241 output_w32(0xe2400000|rd_rn_rm(rt2,rt1,0)|armval);
2242 }
2243 else emit_movimm(imm2,rt2);
2244}
2245
2246// Conditionally select one of two immediates, optimizing for small code size
2247// This will only be called if HAVE_CMOV_IMM is defined
2248void emit_cmov2imm_e_ne_compact(int imm1,int imm2,u_int rt)
2249{
2250 u_int armval;
2251 if(genimm(imm2-imm1,&armval)) {
2252 emit_movimm(imm1,rt);
2253 assem_debug("addne %s,%s,#%d\n",regname[rt],regname[rt],imm2-imm1);
2254 output_w32(0x12800000|rd_rn_rm(rt,rt,0)|armval);
2255 }else if(genimm(imm1-imm2,&armval)) {
2256 emit_movimm(imm1,rt);
2257 assem_debug("subne %s,%s,#%d\n",regname[rt],regname[rt],imm1-imm2);
2258 output_w32(0x12400000|rd_rn_rm(rt,rt,0)|armval);
2259 }
2260 else {
2261 #ifdef ARMv5_ONLY
2262 emit_movimm(imm1,rt);
2263 add_literal((int)out,imm2);
2264 assem_debug("ldrne %s,pc+? [=%x]\n",regname[rt],imm2);
2265 output_w32(0x15900000|rd_rn_rm(rt,15,0));
2266 #else
2267 emit_movw(imm1&0x0000FFFF,rt);
2268 if((imm1&0xFFFF)!=(imm2&0xFFFF)) {
2269 assem_debug("movwne %s,#%d (0x%x)\n",regname[rt],imm2&0xFFFF,imm2&0xFFFF);
2270 output_w32(0x13000000|rd_rn_rm(rt,0,0)|(imm2&0xfff)|((imm2<<4)&0xf0000));
2271 }
2272 emit_movt(imm1&0xFFFF0000,rt);
2273 if((imm1&0xFFFF0000)!=(imm2&0xFFFF0000)) {
2274 assem_debug("movtne %s,#%d (0x%x)\n",regname[rt],imm2&0xffff0000,imm2&0xffff0000);
2275 output_w32(0x13400000|rd_rn_rm(rt,0,0)|((imm2>>16)&0xfff)|((imm2>>12)&0xf0000));
2276 }
2277 #endif
2278 }
2279}
2280
2281// special case for checking invalid_code
2282void emit_cmpmem_indexedsr12_imm(int addr,int r,int imm)
2283{
2284 assert(0);
2285}
2286
2287// special case for checking invalid_code
2288void emit_cmpmem_indexedsr12_reg(int base,int r,int imm)
2289{
2290 assert(imm<128&&imm>=0);
2291 assert(r>=0&&r<16);
2292 assem_debug("ldrb lr,%s,%s lsr #12\n",regname[base],regname[r]);
2293 output_w32(0xe7d00000|rd_rn_rm(HOST_TEMPREG,base,r)|0x620);
2294 emit_cmpimm(HOST_TEMPREG,imm);
2295}
2296
2297// special case for tlb mapping
2298void emit_addsr12(int rs1,int rs2,int rt)
2299{
2300 assem_debug("add %s,%s,%s lsr #12\n",regname[rt],regname[rs1],regname[rs2]);
2301 output_w32(0xe0800620|rd_rn_rm(rt,rs1,rs2));
2302}
2303
2304void emit_callne(int a)
2305{
2306 assem_debug("blne %x\n",a);
2307 u_int offset=genjmp(a);
2308 output_w32(0x1b000000|offset);
2309}
2310
2311// Used to preload hash table entries
2312void emit_prefetch(void *addr)
2313{
2314 assem_debug("prefetch %x\n",(int)addr);
2315 output_byte(0x0F);
2316 output_byte(0x18);
2317 output_modrm(0,5,1);
2318 output_w32((int)addr);
2319}
2320void emit_prefetchreg(int r)
2321{
2322 assem_debug("pld %s\n",regname[r]);
2323 output_w32(0xf5d0f000|rd_rn_rm(0,r,0));
2324}
2325
2326// Special case for mini_ht
2327void emit_ldreq_indexed(int rs, u_int offset, int rt)
2328{
2329 assert(offset<4096);
2330 assem_debug("ldreq %s,[%s, #%d]\n",regname[rt],regname[rs],offset);
2331 output_w32(0x05900000|rd_rn_rm(rt,rs,0)|offset);
2332}
2333
2334void emit_flds(int r,int sr)
2335{
2336 assem_debug("flds s%d,[%s]\n",sr,regname[r]);
2337 output_w32(0xed900a00|((sr&14)<<11)|((sr&1)<<22)|(r<<16));
2338}
2339
2340void emit_vldr(int r,int vr)
2341{
2342 assem_debug("vldr d%d,[%s]\n",vr,regname[r]);
2343 output_w32(0xed900b00|(vr<<12)|(r<<16));
2344}
2345
2346void emit_fsts(int sr,int r)
2347{
2348 assem_debug("fsts s%d,[%s]\n",sr,regname[r]);
2349 output_w32(0xed800a00|((sr&14)<<11)|((sr&1)<<22)|(r<<16));
2350}
2351
2352void emit_vstr(int vr,int r)
2353{
2354 assem_debug("vstr d%d,[%s]\n",vr,regname[r]);
2355 output_w32(0xed800b00|(vr<<12)|(r<<16));
2356}
2357
2358void emit_ftosizs(int s,int d)
2359{
2360 assem_debug("ftosizs s%d,s%d\n",d,s);
2361 output_w32(0xeebd0ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2362}
2363
2364void emit_ftosizd(int s,int d)
2365{
2366 assem_debug("ftosizd s%d,d%d\n",d,s);
2367 output_w32(0xeebd0bc0|((d&14)<<11)|((d&1)<<22)|(s&7));
2368}
2369
2370void emit_fsitos(int s,int d)
2371{
2372 assem_debug("fsitos s%d,s%d\n",d,s);
2373 output_w32(0xeeb80ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2374}
2375
2376void emit_fsitod(int s,int d)
2377{
2378 assem_debug("fsitod d%d,s%d\n",d,s);
2379 output_w32(0xeeb80bc0|((d&7)<<12)|((s&14)>>1)|((s&1)<<5));
2380}
2381
2382void emit_fcvtds(int s,int d)
2383{
2384 assem_debug("fcvtds d%d,s%d\n",d,s);
2385 output_w32(0xeeb70ac0|((d&7)<<12)|((s&14)>>1)|((s&1)<<5));
2386}
2387
2388void emit_fcvtsd(int s,int d)
2389{
2390 assem_debug("fcvtsd s%d,d%d\n",d,s);
2391 output_w32(0xeeb70bc0|((d&14)<<11)|((d&1)<<22)|(s&7));
2392}
2393
2394void emit_fsqrts(int s,int d)
2395{
2396 assem_debug("fsqrts d%d,s%d\n",d,s);
2397 output_w32(0xeeb10ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2398}
2399
2400void emit_fsqrtd(int s,int d)
2401{
2402 assem_debug("fsqrtd s%d,d%d\n",d,s);
2403 output_w32(0xeeb10bc0|((d&7)<<12)|(s&7));
2404}
2405
2406void emit_fabss(int s,int d)
2407{
2408 assem_debug("fabss d%d,s%d\n",d,s);
2409 output_w32(0xeeb00ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2410}
2411
2412void emit_fabsd(int s,int d)
2413{
2414 assem_debug("fabsd s%d,d%d\n",d,s);
2415 output_w32(0xeeb00bc0|((d&7)<<12)|(s&7));
2416}
2417
2418void emit_fnegs(int s,int d)
2419{
2420 assem_debug("fnegs d%d,s%d\n",d,s);
2421 output_w32(0xeeb10a40|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2422}
2423
2424void emit_fnegd(int s,int d)
2425{
2426 assem_debug("fnegd s%d,d%d\n",d,s);
2427 output_w32(0xeeb10b40|((d&7)<<12)|(s&7));
2428}
2429
2430void emit_fadds(int s1,int s2,int d)
2431{
2432 assem_debug("fadds s%d,s%d,s%d\n",d,s1,s2);
2433 output_w32(0xee300a00|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2434}
2435
2436void emit_faddd(int s1,int s2,int d)
2437{
2438 assem_debug("faddd d%d,d%d,d%d\n",d,s1,s2);
2439 output_w32(0xee300b00|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2440}
2441
2442void emit_fsubs(int s1,int s2,int d)
2443{
2444 assem_debug("fsubs s%d,s%d,s%d\n",d,s1,s2);
2445 output_w32(0xee300a40|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2446}
2447
2448void emit_fsubd(int s1,int s2,int d)
2449{
2450 assem_debug("fsubd d%d,d%d,d%d\n",d,s1,s2);
2451 output_w32(0xee300b40|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2452}
2453
2454void emit_fmuls(int s1,int s2,int d)
2455{
2456 assem_debug("fmuls s%d,s%d,s%d\n",d,s1,s2);
2457 output_w32(0xee200a00|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2458}
2459
2460void emit_fmuld(int s1,int s2,int d)
2461{
2462 assem_debug("fmuld d%d,d%d,d%d\n",d,s1,s2);
2463 output_w32(0xee200b00|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2464}
2465
2466void emit_fdivs(int s1,int s2,int d)
2467{
2468 assem_debug("fdivs s%d,s%d,s%d\n",d,s1,s2);
2469 output_w32(0xee800a00|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2470}
2471
2472void emit_fdivd(int s1,int s2,int d)
2473{
2474 assem_debug("fdivd d%d,d%d,d%d\n",d,s1,s2);
2475 output_w32(0xee800b00|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2476}
2477
2478void emit_fcmps(int x,int y)
2479{
2480 assem_debug("fcmps s14, s15\n");
2481 output_w32(0xeeb47a67);
2482}
2483
2484void emit_fcmpd(int x,int y)
2485{
2486 assem_debug("fcmpd d6, d7\n");
2487 output_w32(0xeeb46b47);
2488}
2489
2490void emit_fmstat()
2491{
2492 assem_debug("fmstat\n");
2493 output_w32(0xeef1fa10);
2494}
2495
2496void emit_bicne_imm(int rs,int imm,int rt)
2497{
2498 u_int armval;
2499 genimm_checked(imm,&armval);
2500 assem_debug("bicne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2501 output_w32(0x13c00000|rd_rn_rm(rt,rs,0)|armval);
2502}
2503
2504void emit_biccs_imm(int rs,int imm,int rt)
2505{
2506 u_int armval;
2507 genimm_checked(imm,&armval);
2508 assem_debug("biccs %s,%s,#%d\n",regname[rt],regname[rs],imm);
2509 output_w32(0x23c00000|rd_rn_rm(rt,rs,0)|armval);
2510}
2511
2512void emit_bicvc_imm(int rs,int imm,int rt)
2513{
2514 u_int armval;
2515 genimm_checked(imm,&armval);
2516 assem_debug("bicvc %s,%s,#%d\n",regname[rt],regname[rs],imm);
2517 output_w32(0x73c00000|rd_rn_rm(rt,rs,0)|armval);
2518}
2519
2520void emit_bichi_imm(int rs,int imm,int rt)
2521{
2522 u_int armval;
2523 genimm_checked(imm,&armval);
2524 assem_debug("bichi %s,%s,#%d\n",regname[rt],regname[rs],imm);
2525 output_w32(0x83c00000|rd_rn_rm(rt,rs,0)|armval);
2526}
2527
2528void emit_orrvs_imm(int rs,int imm,int rt)
2529{
2530 u_int armval;
2531 genimm_checked(imm,&armval);
2532 assem_debug("orrvs %s,%s,#%d\n",regname[rt],regname[rs],imm);
2533 output_w32(0x63800000|rd_rn_rm(rt,rs,0)|armval);
2534}
2535
2536void emit_orrne_imm(int rs,int imm,int rt)
2537{
2538 u_int armval;
2539 genimm_checked(imm,&armval);
2540 assem_debug("orrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2541 output_w32(0x13800000|rd_rn_rm(rt,rs,0)|armval);
2542}
2543
2544void emit_andne_imm(int rs,int imm,int rt)
2545{
2546 u_int armval;
2547 genimm_checked(imm,&armval);
2548 assem_debug("andne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2549 output_w32(0x12000000|rd_rn_rm(rt,rs,0)|armval);
2550}
2551
2552void emit_jno_unlikely(int a)
2553{
2554 //emit_jno(a);
2555 assem_debug("addvc pc,pc,#? (%x)\n",/*a-(int)out-8,*/a);
2556 output_w32(0x72800000|rd_rn_rm(15,15,0));
2557}
2558
2559// Save registers before function call
2560void save_regs(u_int reglist)
2561{
2562 reglist&=0x100f; // only save the caller-save registers, r0-r3, r12
2563 if(!reglist) return;
2564 assem_debug("stmia fp,{");
2565 if(reglist&1) assem_debug("r0, ");
2566 if(reglist&2) assem_debug("r1, ");
2567 if(reglist&4) assem_debug("r2, ");
2568 if(reglist&8) assem_debug("r3, ");
2569 if(reglist&0x1000) assem_debug("r12");
2570 assem_debug("}\n");
2571 output_w32(0xe88b0000|reglist);
2572}
2573// Restore registers after function call
2574void restore_regs(u_int reglist)
2575{
2576 reglist&=0x100f; // only restore the caller-save registers, r0-r3, r12
2577 if(!reglist) return;
2578 assem_debug("ldmia fp,{");
2579 if(reglist&1) assem_debug("r0, ");
2580 if(reglist&2) assem_debug("r1, ");
2581 if(reglist&4) assem_debug("r2, ");
2582 if(reglist&8) assem_debug("r3, ");
2583 if(reglist&0x1000) assem_debug("r12");
2584 assem_debug("}\n");
2585 output_w32(0xe89b0000|reglist);
2586}
2587
2588// Write back consts using r14 so we don't disturb the other registers
2589void wb_consts(signed char i_regmap[],uint64_t i_is32,u_int i_dirty,int i)
2590{
2591 int hr;
2592 for(hr=0;hr<HOST_REGS;hr++) {
2593 if(hr!=EXCLUDE_REG&&i_regmap[hr]>=0&&((i_dirty>>hr)&1)) {
2594 if(((regs[i].isconst>>hr)&1)&&i_regmap[hr]>0) {
2595 if(i_regmap[hr]<64 || !((i_is32>>(i_regmap[hr]&63))&1) ) {
2596 int value=constmap[i][hr];
2597 if(value==0) {
2598 emit_zeroreg(HOST_TEMPREG);
2599 }
2600 else {
2601 emit_movimm(value,HOST_TEMPREG);
2602 }
2603 emit_storereg(i_regmap[hr],HOST_TEMPREG);
2604#ifndef FORCE32
2605 if((i_is32>>i_regmap[hr])&1) {
2606 if(value!=-1&&value!=0) emit_sarimm(HOST_TEMPREG,31,HOST_TEMPREG);
2607 emit_storereg(i_regmap[hr]|64,HOST_TEMPREG);
2608 }
2609#endif
2610 }
2611 }
2612 }
2613 }
2614}
2615
2616/* Stubs/epilogue */
2617
2618void literal_pool(int n)
2619{
2620 if(!literalcount) return;
2621 if(n) {
2622 if((int)out-literals[0][0]<4096-n) return;
2623 }
2624 u_int *ptr;
2625 int i;
2626 for(i=0;i<literalcount;i++)
2627 {
2628 ptr=(u_int *)literals[i][0];
2629 u_int offset=(u_int)out-(u_int)ptr-8;
2630 assert(offset<4096);
2631 assert(!(offset&3));
2632 *ptr|=offset;
2633 output_w32(literals[i][1]);
2634 }
2635 literalcount=0;
2636}
2637
2638void literal_pool_jumpover(int n)
2639{
2640 if(!literalcount) return;
2641 if(n) {
2642 if((int)out-literals[0][0]<4096-n) return;
2643 }
2644 int jaddr=(int)out;
2645 emit_jmp(0);
2646 literal_pool(0);
2647 set_jump_target(jaddr,(int)out);
2648}
2649
2650emit_extjump2(int addr, int target, int linker)
2651{
2652 u_char *ptr=(u_char *)addr;
2653 assert((ptr[3]&0x0e)==0xa);
2654 emit_loadlp(target,0);
2655 emit_loadlp(addr,1);
2656 assert(addr>=BASE_ADDR&&addr<(BASE_ADDR+(1<<TARGET_SIZE_2)));
2657 //assert((target>=0x80000000&&target<0x80800000)||(target>0xA4000000&&target<0xA4001000));
2658//DEBUG >
2659#ifdef DEBUG_CYCLE_COUNT
2660 emit_readword((int)&last_count,ECX);
2661 emit_add(HOST_CCREG,ECX,HOST_CCREG);
2662 emit_readword((int)&next_interupt,ECX);
2663 emit_writeword(HOST_CCREG,(int)&Count);
2664 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
2665 emit_writeword(ECX,(int)&last_count);
2666#endif
2667//DEBUG <
2668 emit_jmp(linker);
2669}
2670
2671emit_extjump(int addr, int target)
2672{
2673 emit_extjump2(addr, target, (int)dyna_linker);
2674}
2675emit_extjump_ds(int addr, int target)
2676{
2677 emit_extjump2(addr, target, (int)dyna_linker_ds);
2678}
2679
2680// put rt_val into rt, potentially making use of rs with value rs_val
2681static void emit_movimm_from(u_int rs_val,int rs,u_int rt_val,int rt)
2682{
2683 u_int xor=rs_val^rt_val;
2684 u_int xs;
2685 for(xs=xor;xs!=0&&(xs&3)==0;xs>>=2)
2686 ;
2687 if(xs<0x100)
2688 emit_xorimm(rs,xor,rt);
2689 else
2690 emit_movimm(rt_val,rt);
2691}
2692
2693// trashes r2
2694static void pass_args(int a0, int a1)
2695{
2696 if(a0==1&&a1==0) {
2697 // must swap
2698 emit_mov(a0,2); emit_mov(a1,1); emit_mov(2,0);
2699 }
2700 else if(a0!=0&&a1==0) {
2701 emit_mov(a1,1);
2702 if (a0>=0) emit_mov(a0,0);
2703 }
2704 else {
2705 if(a0>=0&&a0!=0) emit_mov(a0,0);
2706 if(a1>=0&&a1!=1) emit_mov(a1,1);
2707 }
2708}
2709
2710do_readstub(int n)
2711{
2712 assem_debug("do_readstub %x\n",start+stubs[n][3]*4);
2713 literal_pool(256);
2714 set_jump_target(stubs[n][1],(int)out);
2715 int type=stubs[n][0];
2716 int i=stubs[n][3];
2717 int rs=stubs[n][4];
2718 struct regstat *i_regs=(struct regstat *)stubs[n][5];
2719 u_int reglist=stubs[n][7];
2720 signed char *i_regmap=i_regs->regmap;
2721 int addr=get_reg(i_regmap,AGEN1+(i&1));
2722 int rth,rt;
2723 int ds;
2724 if(itype[i]==C1LS||itype[i]==C2LS||itype[i]==LOADLR) {
2725 rth=get_reg(i_regmap,FTEMP|64);
2726 rt=get_reg(i_regmap,FTEMP);
2727 }else{
2728 rth=get_reg(i_regmap,rt1[i]|64);
2729 rt=get_reg(i_regmap,rt1[i]);
2730 }
2731 assert(rs>=0);
2732#ifdef PCSX
2733 int r,temp=-1,temp2=HOST_TEMPREG,regs_saved=0,restore_jump=0;
2734 reglist|=(1<<rs);
2735 for(r=0;r<=12;r++) {
2736 if(((1<<r)&0x13ff)&&((1<<r)&reglist)==0) {
2737 temp=r; break;
2738 }
2739 }
2740 if(rt>=0)
2741 reglist&=~(1<<rt);
2742 if(temp==-1) {
2743 save_regs(reglist);
2744 regs_saved=1;
2745 temp=(rs==0)?2:0;
2746 }
2747 if((regs_saved||(reglist&2)==0)&&temp!=1&&rs!=1)
2748 temp2=1;
2749 emit_readword((int)&mem_rtab,temp);
2750 emit_shrimm(rs,12,temp2);
2751 emit_readword_dualindexedx4(temp,temp2,temp2);
2752 emit_lsls_imm(temp2,1,temp2);
2753 if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) {
2754 switch(type) {
2755 case LOADB_STUB: emit_ldrccsb_dualindexed(temp2,rs,rt); break;
2756 case LOADBU_STUB: emit_ldrccb_dualindexed(temp2,rs,rt); break;
2757 case LOADH_STUB: emit_ldrccsh_dualindexed(temp2,rs,rt); break;
2758 case LOADHU_STUB: emit_ldrcch_dualindexed(temp2,rs,rt); break;
2759 case LOADW_STUB: emit_ldrcc_dualindexed(temp2,rs,rt); break;
2760 }
2761 }
2762 if(regs_saved) {
2763 restore_jump=(int)out;
2764 emit_jcc(0); // jump to reg restore
2765 }
2766 else
2767 emit_jcc(stubs[n][2]); // return address
2768
2769 if(!regs_saved)
2770 save_regs(reglist);
2771 int handler=0;
2772 if(type==LOADB_STUB||type==LOADBU_STUB)
2773 handler=(int)jump_handler_read8;
2774 if(type==LOADH_STUB||type==LOADHU_STUB)
2775 handler=(int)jump_handler_read16;
2776 if(type==LOADW_STUB)
2777 handler=(int)jump_handler_read32;
2778 assert(handler!=0);
2779 pass_args(rs,temp2);
2780 int cc=get_reg(i_regmap,CCREG);
2781 if(cc<0)
2782 emit_loadreg(CCREG,2);
2783 emit_addimm(cc<0?2:cc,CLOCK_DIVIDER*stubs[n][6]+2,2);
2784 emit_call(handler);
2785 if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) {
2786 switch(type) {
2787 case LOADB_STUB: emit_signextend8(0,rt); break;
2788 case LOADBU_STUB: emit_andimm(0,0xff,rt); break;
2789 case LOADH_STUB: emit_signextend16(0,rt); break;
2790 case LOADHU_STUB: emit_andimm(0,0xffff,rt); break;
2791 case LOADW_STUB: if(rt!=0) emit_mov(0,rt); break;
2792 }
2793 }
2794 if(restore_jump)
2795 set_jump_target(restore_jump,(int)out);
2796 restore_regs(reglist);
2797 emit_jmp(stubs[n][2]); // return address
2798#else // !PCSX
2799 if(addr<0) addr=rt;
2800 if(addr<0&&itype[i]!=C1LS&&itype[i]!=C2LS&&itype[i]!=LOADLR) addr=get_reg(i_regmap,-1);
2801 assert(addr>=0);
2802 int ftable=0;
2803 if(type==LOADB_STUB||type==LOADBU_STUB)
2804 ftable=(int)readmemb;
2805 if(type==LOADH_STUB||type==LOADHU_STUB)
2806 ftable=(int)readmemh;
2807 if(type==LOADW_STUB)
2808 ftable=(int)readmem;
2809#ifndef FORCE32
2810 if(type==LOADD_STUB)
2811 ftable=(int)readmemd;
2812#endif
2813 assert(ftable!=0);
2814 emit_writeword(rs,(int)&address);
2815 //emit_pusha();
2816 save_regs(reglist);
2817#ifndef PCSX
2818 ds=i_regs!=&regs[i];
2819 int real_rs=(itype[i]==LOADLR)?-1:get_reg(i_regmap,rs1[i]);
2820 u_int cmask=ds?-1:(0x100f|~i_regs->wasconst);
2821 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&0x100f,i);
2822 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty&cmask&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs)));
2823 if(!ds) wb_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&~0x100f,i);
2824#endif
2825 emit_shrimm(rs,16,1);
2826 int cc=get_reg(i_regmap,CCREG);
2827 if(cc<0) {
2828 emit_loadreg(CCREG,2);
2829 }
2830 emit_movimm(ftable,0);
2831 emit_addimm(cc<0?2:cc,2*stubs[n][6]+2,2);
2832#ifndef PCSX
2833 emit_movimm(start+stubs[n][3]*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
2834#endif
2835 //emit_readword((int)&last_count,temp);
2836 //emit_add(cc,temp,cc);
2837 //emit_writeword(cc,(int)&Count);
2838 //emit_mov(15,14);
2839 emit_call((int)&indirect_jump_indexed);
2840 //emit_callreg(rs);
2841 //emit_readword_dualindexedx4(rs,HOST_TEMPREG,15);
2842#ifndef PCSX
2843 // We really shouldn't need to update the count here,
2844 // but not doing so causes random crashes...
2845 emit_readword((int)&Count,HOST_TEMPREG);
2846 emit_readword((int)&next_interupt,2);
2847 emit_addimm(HOST_TEMPREG,-2*stubs[n][6]-2,HOST_TEMPREG);
2848 emit_writeword(2,(int)&last_count);
2849 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2850 if(cc<0) {
2851 emit_storereg(CCREG,HOST_TEMPREG);
2852 }
2853#endif
2854 //emit_popa();
2855 restore_regs(reglist);
2856 //if((cc=get_reg(regmap,CCREG))>=0) {
2857 // emit_loadreg(CCREG,cc);
2858 //}
2859 if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) {
2860 assert(rt>=0);
2861 if(type==LOADB_STUB)
2862 emit_movsbl((int)&readmem_dword,rt);
2863 if(type==LOADBU_STUB)
2864 emit_movzbl((int)&readmem_dword,rt);
2865 if(type==LOADH_STUB)
2866 emit_movswl((int)&readmem_dword,rt);
2867 if(type==LOADHU_STUB)
2868 emit_movzwl((int)&readmem_dword,rt);
2869 if(type==LOADW_STUB)
2870 emit_readword((int)&readmem_dword,rt);
2871 if(type==LOADD_STUB) {
2872 emit_readword((int)&readmem_dword,rt);
2873 if(rth>=0) emit_readword(((int)&readmem_dword)+4,rth);
2874 }
2875 }
2876 emit_jmp(stubs[n][2]); // return address
2877#endif // !PCSX
2878}
2879
2880#ifdef PCSX
2881// return memhandler, or get directly accessable address and return 0
2882u_int get_direct_memhandler(void *table,u_int addr,int type,u_int *addr_host)
2883{
2884 u_int l1,l2=0;
2885 l1=((u_int *)table)[addr>>12];
2886 if((l1&(1<<31))==0) {
2887 u_int v=l1<<1;
2888 *addr_host=v+addr;
2889 return 0;
2890 }
2891 else {
2892 l1<<=1;
2893 if(type==LOADB_STUB||type==LOADBU_STUB||type==STOREB_STUB)
2894 l2=((u_int *)l1)[0x1000/4 + 0x1000/2 + (addr&0xfff)];
2895 else if(type==LOADH_STUB||type==LOADHU_STUB||type==STOREH_STUB)
2896 l2=((u_int *)l1)[0x1000/4 + (addr&0xfff)/2];
2897 else
2898 l2=((u_int *)l1)[(addr&0xfff)/4];
2899 if((l2&(1<<31))==0) {
2900 u_int v=l2<<1;
2901 *addr_host=v+(addr&0xfff);
2902 return 0;
2903 }
2904 return l2<<1;
2905 }
2906}
2907#endif
2908
2909inline_readstub(int type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
2910{
2911 int rs=get_reg(regmap,target);
2912 int rth=get_reg(regmap,target|64);
2913 int rt=get_reg(regmap,target);
2914 if(rs<0) rs=get_reg(regmap,-1);
2915 assert(rs>=0);
2916#ifdef PCSX
2917 u_int handler,host_addr=0;
2918 handler=get_direct_memhandler(mem_rtab,addr,type,&host_addr);
2919 if (handler==0) {
2920 if(rt<0)
2921 return;
2922 if(addr!=host_addr)
2923 emit_movimm_from(addr,rs,host_addr,rs);
2924 switch(type) {
2925 case LOADB_STUB: emit_movsbl_indexed(0,rs,rt); break;
2926 case LOADBU_STUB: emit_movzbl_indexed(0,rs,rt); break;
2927 case LOADH_STUB: emit_movswl_indexed(0,rs,rt); break;
2928 case LOADHU_STUB: emit_movzwl_indexed(0,rs,rt); break;
2929 case LOADW_STUB: emit_readword_indexed(0,rs,rt); break;
2930 default: assert(0);
2931 }
2932 return;
2933 }
2934
2935 // call a memhandler
2936 if(rt>=0)
2937 reglist&=~(1<<rt);
2938 save_regs(reglist);
2939 if(target==0)
2940 emit_movimm(addr,0);
2941 else if(rs!=0)
2942 emit_mov(rs,0);
2943 int cc=get_reg(regmap,CCREG);
2944 if(cc<0)
2945 emit_loadreg(CCREG,2);
2946 emit_readword((int)&last_count,3);
2947 emit_addimm(cc<0?2:cc,CLOCK_DIVIDER*(adj+1),2);
2948 emit_add(2,3,3);
2949 emit_writeword(3,(int)&Count);
2950
2951 int offset=(int)handler-(int)out-8;
2952 if(offset<-33554432||offset>=33554432) {
2953 // unreachable memhandler, a plugin func perhaps
2954 emit_movimm(handler,1);
2955 emit_callreg(1);
2956 }
2957 else
2958 emit_call(handler);
2959 if(rt>=0) {
2960 switch(type) {
2961 case LOADB_STUB: emit_signextend8(0,rt); break;
2962 case LOADBU_STUB: emit_andimm(0,0xff,rt); break;
2963 case LOADH_STUB: emit_signextend16(0,rt); break;
2964 case LOADHU_STUB: emit_andimm(0,0xffff,rt); break;
2965 case LOADW_STUB: if(rt!=0) emit_mov(0,rt); break;
2966 default: assert(0);
2967 }
2968 }
2969 restore_regs(reglist);
2970#else // if !PCSX
2971 int ftable=0;
2972 if(type==LOADB_STUB||type==LOADBU_STUB)
2973 ftable=(int)readmemb;
2974 if(type==LOADH_STUB||type==LOADHU_STUB)
2975 ftable=(int)readmemh;
2976 if(type==LOADW_STUB)
2977 ftable=(int)readmem;
2978#ifndef FORCE32
2979 if(type==LOADD_STUB)
2980 ftable=(int)readmemd;
2981#endif
2982 assert(ftable!=0);
2983 if(target==0)
2984 emit_movimm(addr,rs);
2985 emit_writeword(rs,(int)&address);
2986 //emit_pusha();
2987 save_regs(reglist);
2988#ifndef PCSX
2989 if((signed int)addr>=(signed int)0xC0000000) {
2990 // Theoretically we can have a pagefault here, if the TLB has never
2991 // been enabled and the address is outside the range 80000000..BFFFFFFF
2992 // Write out the registers so the pagefault can be handled. This is
2993 // a very rare case and likely represents a bug.
2994 int ds=regmap!=regs[i].regmap;
2995 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty,i);
2996 if(!ds) wb_dirtys(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty);
2997 else wb_dirtys(branch_regs[i-1].regmap_entry,branch_regs[i-1].was32,branch_regs[i-1].wasdirty);
2998 }
2999#endif
3000 //emit_shrimm(rs,16,1);
3001 int cc=get_reg(regmap,CCREG);
3002 if(cc<0) {
3003 emit_loadreg(CCREG,2);
3004 }
3005 //emit_movimm(ftable,0);
3006 emit_movimm(((u_int *)ftable)[addr>>16],0);
3007 //emit_readword((int)&last_count,12);
3008 emit_addimm(cc<0?2:cc,CLOCK_DIVIDER*(adj+1),2);
3009#ifndef PCSX
3010 if((signed int)addr>=(signed int)0xC0000000) {
3011 // Pagefault address
3012 int ds=regmap!=regs[i].regmap;
3013 emit_movimm(start+i*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
3014 }
3015#endif
3016 //emit_add(12,2,2);
3017 //emit_writeword(2,(int)&Count);
3018 //emit_call(((u_int *)ftable)[addr>>16]);
3019 emit_call((int)&indirect_jump);
3020#ifndef PCSX
3021 // We really shouldn't need to update the count here,
3022 // but not doing so causes random crashes...
3023 emit_readword((int)&Count,HOST_TEMPREG);
3024 emit_readword((int)&next_interupt,2);
3025 emit_addimm(HOST_TEMPREG,-CLOCK_DIVIDER*(adj+1),HOST_TEMPREG);
3026 emit_writeword(2,(int)&last_count);
3027 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
3028 if(cc<0) {
3029 emit_storereg(CCREG,HOST_TEMPREG);
3030 }
3031#endif
3032 //emit_popa();
3033 restore_regs(reglist);
3034 if(rt>=0) {
3035 if(type==LOADB_STUB)
3036 emit_movsbl((int)&readmem_dword,rt);
3037 if(type==LOADBU_STUB)
3038 emit_movzbl((int)&readmem_dword,rt);
3039 if(type==LOADH_STUB)
3040 emit_movswl((int)&readmem_dword,rt);
3041 if(type==LOADHU_STUB)
3042 emit_movzwl((int)&readmem_dword,rt);
3043 if(type==LOADW_STUB)
3044 emit_readword((int)&readmem_dword,rt);
3045 if(type==LOADD_STUB) {
3046 emit_readword((int)&readmem_dword,rt);
3047 if(rth>=0) emit_readword(((int)&readmem_dword)+4,rth);
3048 }
3049 }
3050#endif // !PCSX
3051}
3052
3053do_writestub(int n)
3054{
3055 assem_debug("do_writestub %x\n",start+stubs[n][3]*4);
3056 literal_pool(256);
3057 set_jump_target(stubs[n][1],(int)out);
3058 int type=stubs[n][0];
3059 int i=stubs[n][3];
3060 int rs=stubs[n][4];
3061 struct regstat *i_regs=(struct regstat *)stubs[n][5];
3062 u_int reglist=stubs[n][7];
3063 signed char *i_regmap=i_regs->regmap;
3064 int addr=get_reg(i_regmap,AGEN1+(i&1));
3065 int rth,rt,r;
3066 int ds;
3067 if(itype[i]==C1LS||itype[i]==C2LS) {
3068 rth=get_reg(i_regmap,FTEMP|64);
3069 rt=get_reg(i_regmap,r=FTEMP);
3070 }else{
3071 rth=get_reg(i_regmap,rs2[i]|64);
3072 rt=get_reg(i_regmap,r=rs2[i]);
3073 }
3074 assert(rs>=0);
3075 assert(rt>=0);
3076#ifdef PCSX
3077 int rtmp,temp=-1,temp2=HOST_TEMPREG,regs_saved=0,restore_jump=0,ra;
3078 int reglist2=reglist|(1<<rs)|(1<<rt);
3079 for(rtmp=0;rtmp<=12;rtmp++) {
3080 if(((1<<rtmp)&0x13ff)&&((1<<rtmp)&reglist2)==0) {
3081 temp=rtmp; break;
3082 }
3083 }
3084 if(temp==-1) {
3085 save_regs(reglist);
3086 regs_saved=1;
3087 for(rtmp=0;rtmp<=3;rtmp++)
3088 if(rtmp!=rs&&rtmp!=rt)
3089 {temp=rtmp;break;}
3090 }
3091 if((regs_saved||(reglist2&8)==0)&&temp!=3&&rs!=3&&rt!=3)
3092 temp2=3;
3093 emit_readword((int)&mem_wtab,temp);
3094 emit_shrimm(rs,12,temp2);
3095 emit_readword_dualindexedx4(temp,temp2,temp2);
3096 emit_lsls_imm(temp2,1,temp2);
3097 switch(type) {
3098 case STOREB_STUB: emit_strccb_dualindexed(temp2,rs,rt); break;
3099 case STOREH_STUB: emit_strcch_dualindexed(temp2,rs,rt); break;
3100 case STOREW_STUB: emit_strcc_dualindexed(temp2,rs,rt); break;
3101 default: assert(0);
3102 }
3103 if(regs_saved) {
3104 restore_jump=(int)out;
3105 emit_jcc(0); // jump to reg restore
3106 }
3107 else
3108 emit_jcc(stubs[n][2]); // return address (invcode check)
3109
3110 if(!regs_saved)
3111 save_regs(reglist);
3112 int handler=0;
3113 switch(type) {
3114 case STOREB_STUB: handler=(int)jump_handler_write8; break;
3115 case STOREH_STUB: handler=(int)jump_handler_write16; break;
3116 case STOREW_STUB: handler=(int)jump_handler_write32; break;
3117 }
3118 assert(handler!=0);
3119 pass_args(rs,rt);
3120 if(temp2!=3)
3121 emit_mov(temp2,3);
3122 int cc=get_reg(i_regmap,CCREG);
3123 if(cc<0)
3124 emit_loadreg(CCREG,2);
3125 emit_addimm(cc<0?2:cc,CLOCK_DIVIDER*stubs[n][6]+2,2);
3126 // returns new cycle_count
3127 emit_call(handler);
3128 emit_addimm(0,-CLOCK_DIVIDER*stubs[n][6]-2,cc<0?2:cc);
3129 if(cc<0)
3130 emit_storereg(CCREG,2);
3131 if(restore_jump)
3132 set_jump_target(restore_jump,(int)out);
3133 restore_regs(reglist);
3134 ra=stubs[n][2];
3135 if(!restore_jump) ra+=4*3; // skip invcode check
3136 emit_jmp(ra);
3137#else // if !PCSX
3138 if(addr<0) addr=get_reg(i_regmap,-1);
3139 assert(addr>=0);
3140 int ftable=0;
3141 if(type==STOREB_STUB)
3142 ftable=(int)writememb;
3143 if(type==STOREH_STUB)
3144 ftable=(int)writememh;
3145 if(type==STOREW_STUB)
3146 ftable=(int)writemem;
3147#ifndef FORCE32
3148 if(type==STORED_STUB)
3149 ftable=(int)writememd;
3150#endif
3151 assert(ftable!=0);
3152 emit_writeword(rs,(int)&address);
3153 //emit_shrimm(rs,16,rs);
3154 //emit_movmem_indexedx4(ftable,rs,rs);
3155 if(type==STOREB_STUB)
3156 emit_writebyte(rt,(int)&byte);
3157 if(type==STOREH_STUB)
3158 emit_writehword(rt,(int)&hword);
3159 if(type==STOREW_STUB)
3160 emit_writeword(rt,(int)&word);
3161 if(type==STORED_STUB) {
3162#ifndef FORCE32
3163 emit_writeword(rt,(int)&dword);
3164 emit_writeword(r?rth:rt,(int)&dword+4);
3165#else
3166 printf("STORED_STUB\n");
3167#endif
3168 }
3169 //emit_pusha();
3170 save_regs(reglist);
3171#ifndef PCSX
3172 ds=i_regs!=&regs[i];
3173 int real_rs=get_reg(i_regmap,rs1[i]);
3174 u_int cmask=ds?-1:(0x100f|~i_regs->wasconst);
3175 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&0x100f,i);
3176 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty&cmask&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs)));
3177 if(!ds) wb_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&~0x100f,i);
3178#endif
3179 emit_shrimm(rs,16,1);
3180 int cc=get_reg(i_regmap,CCREG);
3181 if(cc<0) {
3182 emit_loadreg(CCREG,2);
3183 }
3184 emit_movimm(ftable,0);
3185 emit_addimm(cc<0?2:cc,2*stubs[n][6]+2,2);
3186#ifndef PCSX
3187 emit_movimm(start+stubs[n][3]*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
3188#endif
3189 //emit_readword((int)&last_count,temp);
3190 //emit_addimm(cc,2*stubs[n][5]+2,cc);
3191 //emit_add(cc,temp,cc);
3192 //emit_writeword(cc,(int)&Count);
3193 emit_call((int)&indirect_jump_indexed);
3194 //emit_callreg(rs);
3195 emit_readword((int)&Count,HOST_TEMPREG);
3196 emit_readword((int)&next_interupt,2);
3197 emit_addimm(HOST_TEMPREG,-2*stubs[n][6]-2,HOST_TEMPREG);
3198 emit_writeword(2,(int)&last_count);
3199 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
3200 if(cc<0) {
3201 emit_storereg(CCREG,HOST_TEMPREG);
3202 }
3203 //emit_popa();
3204 restore_regs(reglist);
3205 //if((cc=get_reg(regmap,CCREG))>=0) {
3206 // emit_loadreg(CCREG,cc);
3207 //}
3208 emit_jmp(stubs[n][2]); // return address
3209#endif // !PCSX
3210}
3211
3212inline_writestub(int type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
3213{
3214 int rs=get_reg(regmap,-1);
3215 int rth=get_reg(regmap,target|64);
3216 int rt=get_reg(regmap,target);
3217 assert(rs>=0);
3218 assert(rt>=0);
3219#ifdef PCSX
3220 u_int handler,host_addr=0;
3221 handler=get_direct_memhandler(mem_wtab,addr,type,&host_addr);
3222 if (handler==0) {
3223 if(addr!=host_addr)
3224 emit_movimm_from(addr,rs,host_addr,rs);
3225 switch(type) {
3226 case STOREB_STUB: emit_writebyte_indexed(rt,0,rs); break;
3227 case STOREH_STUB: emit_writehword_indexed(rt,0,rs); break;
3228 case STOREW_STUB: emit_writeword_indexed(rt,0,rs); break;
3229 default: assert(0);
3230 }
3231 return;
3232 }
3233
3234 // call a memhandler
3235 save_regs(reglist);
3236 pass_args(rs,rt);
3237 int cc=get_reg(regmap,CCREG);
3238 if(cc<0)
3239 emit_loadreg(CCREG,2);
3240 emit_addimm(cc<0?2:cc,CLOCK_DIVIDER*(adj+1),2);
3241 emit_movimm(handler,3);
3242 // returns new cycle_count
3243 emit_call((int)jump_handler_write_h);
3244 emit_addimm(0,-CLOCK_DIVIDER*(adj+1),cc<0?2:cc);
3245 if(cc<0)
3246 emit_storereg(CCREG,2);
3247 restore_regs(reglist);
3248#else // if !pcsx
3249 int ftable=0;
3250 if(type==STOREB_STUB)
3251 ftable=(int)writememb;
3252 if(type==STOREH_STUB)
3253 ftable=(int)writememh;
3254 if(type==STOREW_STUB)
3255 ftable=(int)writemem;
3256#ifndef FORCE32
3257 if(type==STORED_STUB)
3258 ftable=(int)writememd;
3259#endif
3260 assert(ftable!=0);
3261 emit_writeword(rs,(int)&address);
3262 //emit_shrimm(rs,16,rs);
3263 //emit_movmem_indexedx4(ftable,rs,rs);
3264 if(type==STOREB_STUB)
3265 emit_writebyte(rt,(int)&byte);
3266 if(type==STOREH_STUB)
3267 emit_writehword(rt,(int)&hword);
3268 if(type==STOREW_STUB)
3269 emit_writeword(rt,(int)&word);
3270 if(type==STORED_STUB) {
3271#ifndef FORCE32
3272 emit_writeword(rt,(int)&dword);
3273 emit_writeword(target?rth:rt,(int)&dword+4);
3274#else
3275 printf("STORED_STUB\n");
3276#endif
3277 }
3278 //emit_pusha();
3279 save_regs(reglist);
3280#ifndef PCSX
3281 // rearmed note: load_all_consts prevents BIOS boot, some bug?
3282 if((signed int)addr>=(signed int)0xC0000000) {
3283 // Theoretically we can have a pagefault here, if the TLB has never
3284 // been enabled and the address is outside the range 80000000..BFFFFFFF
3285 // Write out the registers so the pagefault can be handled. This is
3286 // a very rare case and likely represents a bug.
3287 int ds=regmap!=regs[i].regmap;
3288 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty,i);
3289 if(!ds) wb_dirtys(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty);
3290 else wb_dirtys(branch_regs[i-1].regmap_entry,branch_regs[i-1].was32,branch_regs[i-1].wasdirty);
3291 }
3292#endif
3293 //emit_shrimm(rs,16,1);
3294 int cc=get_reg(regmap,CCREG);
3295 if(cc<0) {
3296 emit_loadreg(CCREG,2);
3297 }
3298 //emit_movimm(ftable,0);
3299 emit_movimm(((u_int *)ftable)[addr>>16],0);
3300 //emit_readword((int)&last_count,12);
3301 emit_addimm(cc<0?2:cc,CLOCK_DIVIDER*(adj+1),2);
3302#ifndef PCSX
3303 if((signed int)addr>=(signed int)0xC0000000) {
3304 // Pagefault address
3305 int ds=regmap!=regs[i].regmap;
3306 emit_movimm(start+i*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
3307 }
3308#endif
3309 //emit_add(12,2,2);
3310 //emit_writeword(2,(int)&Count);
3311 //emit_call(((u_int *)ftable)[addr>>16]);
3312 emit_call((int)&indirect_jump);
3313 emit_readword((int)&Count,HOST_TEMPREG);
3314 emit_readword((int)&next_interupt,2);
3315 emit_addimm(HOST_TEMPREG,-CLOCK_DIVIDER*(adj+1),HOST_TEMPREG);
3316 emit_writeword(2,(int)&last_count);
3317 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
3318 if(cc<0) {
3319 emit_storereg(CCREG,HOST_TEMPREG);
3320 }
3321 //emit_popa();
3322 restore_regs(reglist);
3323#endif
3324}
3325
3326do_unalignedwritestub(int n)
3327{
3328 assem_debug("do_unalignedwritestub %x\n",start+stubs[n][3]*4);
3329 literal_pool(256);
3330 set_jump_target(stubs[n][1],(int)out);
3331
3332 int i=stubs[n][3];
3333 struct regstat *i_regs=(struct regstat *)stubs[n][4];
3334 int addr=stubs[n][5];
3335 u_int reglist=stubs[n][7];
3336 signed char *i_regmap=i_regs->regmap;
3337 int temp2=get_reg(i_regmap,FTEMP);
3338 int rt;
3339 int ds, real_rs;
3340 rt=get_reg(i_regmap,rs2[i]);
3341 assert(rt>=0);
3342 assert(addr>=0);
3343 assert(opcode[i]==0x2a||opcode[i]==0x2e); // SWL/SWR only implemented
3344 reglist|=(1<<addr);
3345 reglist&=~(1<<temp2);
3346
3347#if 1
3348 // don't bother with it and call write handler
3349 save_regs(reglist);
3350 pass_args(addr,rt);
3351 int cc=get_reg(i_regmap,CCREG);
3352 if(cc<0)
3353 emit_loadreg(CCREG,2);
3354 emit_addimm(cc<0?2:cc,CLOCK_DIVIDER*stubs[n][6]+2,2);
3355 emit_call((int)(opcode[i]==0x2a?jump_handle_swl:jump_handle_swr));
3356 emit_addimm(0,-CLOCK_DIVIDER*stubs[n][6]-2,cc<0?2:cc);
3357 if(cc<0)
3358 emit_storereg(CCREG,2);
3359 restore_regs(reglist);
3360 emit_jmp(stubs[n][2]); // return address
3361#else
3362 emit_andimm(addr,0xfffffffc,temp2);
3363 emit_writeword(temp2,(int)&address);
3364
3365 save_regs(reglist);
3366#ifndef PCSX
3367 ds=i_regs!=&regs[i];
3368 real_rs=get_reg(i_regmap,rs1[i]);
3369 u_int cmask=ds?-1:(0x100f|~i_regs->wasconst);
3370 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&0x100f,i);
3371 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty&cmask&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs)));
3372 if(!ds) wb_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&~0x100f,i);
3373#endif
3374 emit_shrimm(addr,16,1);
3375 int cc=get_reg(i_regmap,CCREG);
3376 if(cc<0) {
3377 emit_loadreg(CCREG,2);
3378 }
3379 emit_movimm((u_int)readmem,0);
3380 emit_addimm(cc<0?2:cc,2*stubs[n][6]+2,2);
3381#ifndef PCSX
3382 // pagefault address
3383 emit_movimm(start+stubs[n][3]*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
3384#endif
3385 emit_call((int)&indirect_jump_indexed);
3386 restore_regs(reglist);
3387
3388 emit_readword((int)&readmem_dword,temp2);
3389 int temp=addr; //hmh
3390 emit_shlimm(addr,3,temp);
3391 emit_andimm(temp,24,temp);
3392#ifdef BIG_ENDIAN_MIPS
3393 if (opcode[i]==0x2e) // SWR
3394#else
3395 if (opcode[i]==0x2a) // SWL
3396#endif
3397 emit_xorimm(temp,24,temp);
3398 emit_movimm(-1,HOST_TEMPREG);
3399 if (opcode[i]==0x2a) { // SWL
3400 emit_bic_lsr(temp2,HOST_TEMPREG,temp,temp2);
3401 emit_orrshr(rt,temp,temp2);
3402 }else{
3403 emit_bic_lsl(temp2,HOST_TEMPREG,temp,temp2);
3404 emit_orrshl(rt,temp,temp2);
3405 }
3406 emit_readword((int)&address,addr);
3407 emit_writeword(temp2,(int)&word);
3408 //save_regs(reglist); // don't need to, no state changes
3409 emit_shrimm(addr,16,1);
3410 emit_movimm((u_int)writemem,0);
3411 //emit_call((int)&indirect_jump_indexed);
3412 emit_mov(15,14);
3413 emit_readword_dualindexedx4(0,1,15);
3414 emit_readword((int)&Count,HOST_TEMPREG);
3415 emit_readword((int)&next_interupt,2);
3416 emit_addimm(HOST_TEMPREG,-2*stubs[n][6]-2,HOST_TEMPREG);
3417 emit_writeword(2,(int)&last_count);
3418 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
3419 if(cc<0) {
3420 emit_storereg(CCREG,HOST_TEMPREG);
3421 }
3422 restore_regs(reglist);
3423 emit_jmp(stubs[n][2]); // return address
3424#endif
3425}
3426
3427void printregs(int edi,int esi,int ebp,int esp,int b,int d,int c,int a)
3428{
3429 printf("regs: %x %x %x %x %x %x %x (%x)\n",a,b,c,d,ebp,esi,edi,(&edi)[-1]);
3430}
3431
3432do_invstub(int n)
3433{
3434 literal_pool(20);
3435 u_int reglist=stubs[n][3];
3436 set_jump_target(stubs[n][1],(int)out);
3437 save_regs(reglist);
3438 if(stubs[n][4]!=0) emit_mov(stubs[n][4],0);
3439 emit_call((int)&invalidate_addr);
3440 restore_regs(reglist);
3441 emit_jmp(stubs[n][2]); // return address
3442}
3443
3444int do_dirty_stub(int i)
3445{
3446 assem_debug("do_dirty_stub %x\n",start+i*4);
3447 u_int addr=(int)start<(int)0xC0000000?(u_int)source:(u_int)start;
3448 #ifdef PCSX
3449 addr=(u_int)source;
3450 #endif
3451 // Careful about the code output here, verify_dirty needs to parse it.
3452 #ifdef ARMv5_ONLY
3453 emit_loadlp(addr,1);
3454 emit_loadlp((int)copy,2);
3455 emit_loadlp(slen*4,3);
3456 #else
3457 emit_movw(addr&0x0000FFFF,1);
3458 emit_movw(((u_int)copy)&0x0000FFFF,2);
3459 emit_movt(addr&0xFFFF0000,1);
3460 emit_movt(((u_int)copy)&0xFFFF0000,2);
3461 emit_movw(slen*4,3);
3462 #endif
3463 emit_movimm(start+i*4,0);
3464 emit_call((int)start<(int)0xC0000000?(int)&verify_code:(int)&verify_code_vm);
3465 int entry=(int)out;
3466 load_regs_entry(i);
3467 if(entry==(int)out) entry=instr_addr[i];
3468 emit_jmp(instr_addr[i]);
3469 return entry;
3470}
3471
3472void do_dirty_stub_ds()
3473{
3474 // Careful about the code output here, verify_dirty needs to parse it.
3475 #ifdef ARMv5_ONLY
3476 emit_loadlp((int)start<(int)0xC0000000?(int)source:(int)start,1);
3477 emit_loadlp((int)copy,2);
3478 emit_loadlp(slen*4,3);
3479 #else
3480 emit_movw(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0x0000FFFF,1);
3481 emit_movw(((u_int)copy)&0x0000FFFF,2);
3482 emit_movt(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0xFFFF0000,1);
3483 emit_movt(((u_int)copy)&0xFFFF0000,2);
3484 emit_movw(slen*4,3);
3485 #endif
3486 emit_movimm(start+1,0);
3487 emit_call((int)&verify_code_ds);
3488}
3489
3490do_cop1stub(int n)
3491{
3492 literal_pool(256);
3493 assem_debug("do_cop1stub %x\n",start+stubs[n][3]*4);
3494 set_jump_target(stubs[n][1],(int)out);
3495 int i=stubs[n][3];
3496// int rs=stubs[n][4];
3497 struct regstat *i_regs=(struct regstat *)stubs[n][5];
3498 int ds=stubs[n][6];
3499 if(!ds) {
3500 load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty,i);
3501 //if(i_regs!=&regs[i]) printf("oops: regs[i]=%x i_regs=%x",(int)&regs[i],(int)i_regs);
3502 }
3503 //else {printf("fp exception in delay slot\n");}
3504 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty);
3505 if(regs[i].regmap_entry[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
3506 emit_movimm(start+(i-ds)*4,EAX); // Get PC
3507 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG); // CHECK: is this right? There should probably be an extra cycle...
3508 emit_jmp(ds?(int)fp_exception_ds:(int)fp_exception);
3509}
3510
3511/* TLB */
3512
3513int do_tlb_r(int s,int ar,int map,int x,int a,int shift,int c,u_int addr)
3514{
3515 if(c) {
3516 if((signed int)addr>=(signed int)0xC0000000) {
3517 // address_generation already loaded the const
3518 emit_readword_dualindexedx4(FP,map,map);
3519 }
3520 else
3521 return -1; // No mapping
3522 }
3523 else {
3524 assert(s!=map);
3525 emit_movimm(((int)memory_map-(int)&dynarec_local)>>2,map);
3526 emit_addsr12(map,s,map);
3527 // Schedule this while we wait on the load
3528 //if(x) emit_xorimm(s,x,ar);
3529 if(shift>=0) emit_shlimm(s,3,shift);
3530 if(~a) emit_andimm(s,a,ar);
3531 emit_readword_dualindexedx4(FP,map,map);
3532 }
3533 return map;
3534}
3535int do_tlb_r_branch(int map, int c, u_int addr, int *jaddr)
3536{
3537 if(!c||(signed int)addr>=(signed int)0xC0000000) {
3538 emit_test(map,map);
3539 *jaddr=(int)out;
3540 emit_js(0);
3541 }
3542 return map;
3543}
3544
3545int gen_tlb_addr_r(int ar, int map) {
3546 if(map>=0) {
3547 assem_debug("add %s,%s,%s lsl #2\n",regname[ar],regname[ar],regname[map]);
3548 output_w32(0xe0800100|rd_rn_rm(ar,ar,map));
3549 }
3550}
3551
3552int do_tlb_w(int s,int ar,int map,int x,int c,u_int addr)
3553{
3554 if(c) {
3555 if(addr<0x80800000||addr>=0xC0000000) {
3556 // address_generation already loaded the const
3557 emit_readword_dualindexedx4(FP,map,map);
3558 }
3559 else
3560 return -1; // No mapping
3561 }
3562 else {
3563 assert(s!=map);
3564 emit_movimm(((int)memory_map-(int)&dynarec_local)>>2,map);
3565 emit_addsr12(map,s,map);
3566 // Schedule this while we wait on the load
3567 //if(x) emit_xorimm(s,x,ar);
3568 emit_readword_dualindexedx4(FP,map,map);
3569 }
3570 return map;
3571}
3572int do_tlb_w_branch(int map, int c, u_int addr, int *jaddr)
3573{
3574 if(!c||addr<0x80800000||addr>=0xC0000000) {
3575 emit_testimm(map,0x40000000);
3576 *jaddr=(int)out;
3577 emit_jne(0);
3578 }
3579}
3580
3581int gen_tlb_addr_w(int ar, int map) {
3582 if(map>=0) {
3583 assem_debug("add %s,%s,%s lsl #2\n",regname[ar],regname[ar],regname[map]);
3584 output_w32(0xe0800100|rd_rn_rm(ar,ar,map));
3585 }
3586}
3587
3588// Generate the address of the memory_map entry, relative to dynarec_local
3589generate_map_const(u_int addr,int reg) {
3590 //printf("generate_map_const(%x,%s)\n",addr,regname[reg]);
3591 emit_movimm((addr>>12)+(((u_int)memory_map-(u_int)&dynarec_local)>>2),reg);
3592}
3593
3594/* Special assem */
3595
3596void shift_assemble_arm(int i,struct regstat *i_regs)
3597{
3598 if(rt1[i]) {
3599 if(opcode2[i]<=0x07) // SLLV/SRLV/SRAV
3600 {
3601 signed char s,t,shift;
3602 t=get_reg(i_regs->regmap,rt1[i]);
3603 s=get_reg(i_regs->regmap,rs1[i]);
3604 shift=get_reg(i_regs->regmap,rs2[i]);
3605 if(t>=0){
3606 if(rs1[i]==0)
3607 {
3608 emit_zeroreg(t);
3609 }
3610 else if(rs2[i]==0)
3611 {
3612 assert(s>=0);
3613 if(s!=t) emit_mov(s,t);
3614 }
3615 else
3616 {
3617 emit_andimm(shift,31,HOST_TEMPREG);
3618 if(opcode2[i]==4) // SLLV
3619 {
3620 emit_shl(s,HOST_TEMPREG,t);
3621 }
3622 if(opcode2[i]==6) // SRLV
3623 {
3624 emit_shr(s,HOST_TEMPREG,t);
3625 }
3626 if(opcode2[i]==7) // SRAV
3627 {
3628 emit_sar(s,HOST_TEMPREG,t);
3629 }
3630 }
3631 }
3632 } else { // DSLLV/DSRLV/DSRAV
3633 signed char sh,sl,th,tl,shift;
3634 th=get_reg(i_regs->regmap,rt1[i]|64);
3635 tl=get_reg(i_regs->regmap,rt1[i]);
3636 sh=get_reg(i_regs->regmap,rs1[i]|64);
3637 sl=get_reg(i_regs->regmap,rs1[i]);
3638 shift=get_reg(i_regs->regmap,rs2[i]);
3639 if(tl>=0){
3640 if(rs1[i]==0)
3641 {
3642 emit_zeroreg(tl);
3643 if(th>=0) emit_zeroreg(th);
3644 }
3645 else if(rs2[i]==0)
3646 {
3647 assert(sl>=0);
3648 if(sl!=tl) emit_mov(sl,tl);
3649 if(th>=0&&sh!=th) emit_mov(sh,th);
3650 }
3651 else
3652 {
3653 // FIXME: What if shift==tl ?
3654 assert(shift!=tl);
3655 int temp=get_reg(i_regs->regmap,-1);
3656 int real_th=th;
3657 if(th<0&&opcode2[i]!=0x14) {th=temp;} // DSLLV doesn't need a temporary register
3658 assert(sl>=0);
3659 assert(sh>=0);
3660 emit_andimm(shift,31,HOST_TEMPREG);
3661 if(opcode2[i]==0x14) // DSLLV
3662 {
3663 if(th>=0) emit_shl(sh,HOST_TEMPREG,th);
3664 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3665 emit_orrshr(sl,HOST_TEMPREG,th);
3666 emit_andimm(shift,31,HOST_TEMPREG);
3667 emit_testimm(shift,32);
3668 emit_shl(sl,HOST_TEMPREG,tl);
3669 if(th>=0) emit_cmovne_reg(tl,th);
3670 emit_cmovne_imm(0,tl);
3671 }
3672 if(opcode2[i]==0x16) // DSRLV
3673 {
3674 assert(th>=0);
3675 emit_shr(sl,HOST_TEMPREG,tl);
3676 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3677 emit_orrshl(sh,HOST_TEMPREG,tl);
3678 emit_andimm(shift,31,HOST_TEMPREG);
3679 emit_testimm(shift,32);
3680 emit_shr(sh,HOST_TEMPREG,th);
3681 emit_cmovne_reg(th,tl);
3682 if(real_th>=0) emit_cmovne_imm(0,th);
3683 }
3684 if(opcode2[i]==0x17) // DSRAV
3685 {
3686 assert(th>=0);
3687 emit_shr(sl,HOST_TEMPREG,tl);
3688 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3689 if(real_th>=0) {
3690 assert(temp>=0);
3691 emit_sarimm(th,31,temp);
3692 }
3693 emit_orrshl(sh,HOST_TEMPREG,tl);
3694 emit_andimm(shift,31,HOST_TEMPREG);
3695 emit_testimm(shift,32);
3696 emit_sar(sh,HOST_TEMPREG,th);
3697 emit_cmovne_reg(th,tl);
3698 if(real_th>=0) emit_cmovne_reg(temp,th);
3699 }
3700 }
3701 }
3702 }
3703 }
3704}
3705
3706#ifdef PCSX
3707static void speculate_mov(int rs,int rt)
3708{
3709 if(rt!=0) {
3710 smrv_strong_next|=1<<rt;
3711 smrv[rt]=smrv[rs];
3712 }
3713}
3714
3715static void speculate_mov_weak(int rs,int rt)
3716{
3717 if(rt!=0) {
3718 smrv_weak_next|=1<<rt;
3719 smrv[rt]=smrv[rs];
3720 }
3721}
3722
3723static void speculate_register_values(int i)
3724{
3725 if(i==0) {
3726 memcpy(smrv,psxRegs.GPR.r,sizeof(smrv));
3727 // gp,sp are likely to stay the same throughout the block
3728 smrv_strong_next=(1<<28)|(1<<29)|(1<<30);
3729 smrv_weak_next=~smrv_strong_next;
3730 //printf(" llr %08x\n", smrv[4]);
3731 }
3732 smrv_strong=smrv_strong_next;
3733 smrv_weak=smrv_weak_next;
3734 switch(itype[i]) {
3735 case ALU:
3736 if ((smrv_strong>>rs1[i])&1) speculate_mov(rs1[i],rt1[i]);
3737 else if((smrv_strong>>rs2[i])&1) speculate_mov(rs2[i],rt1[i]);
3738 else if((smrv_weak>>rs1[i])&1) speculate_mov_weak(rs1[i],rt1[i]);
3739 else if((smrv_weak>>rs2[i])&1) speculate_mov_weak(rs2[i],rt1[i]);
3740 else {
3741 smrv_strong_next&=~(1<<rt1[i]);
3742 smrv_weak_next&=~(1<<rt1[i]);
3743 }
3744 break;
3745 case SHIFTIMM:
3746 smrv_strong_next&=~(1<<rt1[i]);
3747 smrv_weak_next&=~(1<<rt1[i]);
3748 // fallthrough
3749 case IMM16:
3750 if(rt1[i]&&is_const(&regs[i],rt1[i])) {
3751 int value,hr=get_reg(regs[i].regmap,rt1[i]);
3752 if(hr>=0) {
3753 if(get_final_value(hr,i,&value))
3754 smrv[rt1[i]]=value;
3755 else smrv[rt1[i]]=constmap[i][hr];
3756 smrv_strong_next|=1<<rt1[i];
3757 }
3758 }
3759 else {
3760 if ((smrv_strong>>rs1[i])&1) speculate_mov(rs1[i],rt1[i]);
3761 else if((smrv_weak>>rs1[i])&1) speculate_mov_weak(rs1[i],rt1[i]);
3762 }
3763 break;
3764 case LOAD:
3765 if(start<0x2000&&(rt1[i]==26||(smrv[rt1[i]]>>24)==0xa0)) {
3766 // special case for BIOS
3767 smrv[rt1[i]]=0xa0000000;
3768 smrv_strong_next|=1<<rt1[i];
3769 break;
3770 }
3771 // fallthrough
3772 case SHIFT:
3773 case LOADLR:
3774 case MOV:
3775 smrv_strong_next&=~(1<<rt1[i]);
3776 smrv_weak_next&=~(1<<rt1[i]);
3777 break;
3778 case COP0:
3779 case COP2:
3780 if(opcode2[i]==0||opcode2[i]==2) { // MFC/CFC
3781 smrv_strong_next&=~(1<<rt1[i]);
3782 smrv_weak_next&=~(1<<rt1[i]);
3783 }
3784 break;
3785 case C2LS:
3786 if (opcode[i]==0x32) { // LWC2
3787 smrv_strong_next&=~(1<<rt1[i]);
3788 smrv_weak_next&=~(1<<rt1[i]);
3789 }
3790 break;
3791 }
3792#if 0
3793 int r=4;
3794 printf("x %08x %08x %d %d c %08x %08x\n",smrv[r],start+i*4,
3795 ((smrv_strong>>r)&1),(smrv_weak>>r)&1,regs[i].isconst,regs[i].wasconst);
3796#endif
3797}
3798
3799enum {
3800 MTYPE_8000 = 0,
3801 MTYPE_8020,
3802 MTYPE_0000,
3803 MTYPE_A000,
3804 MTYPE_1F80,
3805};
3806
3807static int get_ptr_mem_type(u_int a)
3808{
3809 if(a < 0x00200000) {
3810 if(a<0x1000&&((start>>20)==0xbfc||(start>>24)==0xa0))
3811 // return wrong, must use memhandler for BIOS self-test to pass
3812 // 007 does similar stuff from a00 mirror, weird stuff
3813 return MTYPE_8000;
3814 return MTYPE_0000;
3815 }
3816 if(0x1f800000 <= a && a < 0x1f801000)
3817 return MTYPE_1F80;
3818 if(0x80200000 <= a && a < 0x80800000)
3819 return MTYPE_8020;
3820 if(0xa0000000 <= a && a < 0xa0200000)
3821 return MTYPE_A000;
3822 return MTYPE_8000;
3823}
3824#endif
3825
3826static int emit_fastpath_cmp_jump(int i,int addr,int *addr_reg_override)
3827{
3828 int jaddr,type=0;
3829
3830#ifdef PCSX
3831 int mr=rs1[i];
3832 if(((smrv_strong|smrv_weak)>>mr)&1) {
3833 type=get_ptr_mem_type(smrv[mr]);
3834 //printf("set %08x @%08x r%d %d\n", smrv[mr], start+i*4, mr, type);
3835 }
3836 else {
3837 // use the mirror we are running on
3838 type=get_ptr_mem_type(start);
3839 //printf("set nospec @%08x r%d %d\n", start+i*4, mr, type);
3840 }
3841
3842 if(type==MTYPE_8020) { // RAM 80200000+ mirror
3843 emit_andimm(addr,~0x00e00000,HOST_TEMPREG);
3844 addr=*addr_reg_override=HOST_TEMPREG;
3845 type=0;
3846 }
3847 else if(type==MTYPE_0000) { // RAM 0 mirror
3848 emit_orimm(addr,0x80000000,HOST_TEMPREG);
3849 addr=*addr_reg_override=HOST_TEMPREG;
3850 type=0;
3851 }
3852 else if(type==MTYPE_A000) { // RAM A mirror
3853 emit_andimm(addr,~0x20000000,HOST_TEMPREG);
3854 addr=*addr_reg_override=HOST_TEMPREG;
3855 type=0;
3856 }
3857 else if(type==MTYPE_1F80) { // scratchpad
3858 emit_addimm(addr,-0x1f800000,HOST_TEMPREG);
3859 emit_cmpimm(HOST_TEMPREG,0x1000);
3860 jaddr=(int)out;
3861 emit_jc(0);
3862 }
3863#endif
3864
3865 if(type==0)
3866 {
3867 emit_cmpimm(addr,RAM_SIZE);
3868 jaddr=(int)out;
3869 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
3870 // Hint to branch predictor that the branch is unlikely to be taken
3871 if(rs1[i]>=28)
3872 emit_jno_unlikely(0);
3873 else
3874 #endif
3875 emit_jno(0);
3876 }
3877
3878 return jaddr;
3879}
3880
3881#define shift_assemble shift_assemble_arm
3882
3883void loadlr_assemble_arm(int i,struct regstat *i_regs)
3884{
3885 int s,th,tl,temp,temp2,addr,map=-1;
3886 int offset;
3887 int jaddr=0;
3888 int memtarget=0,c=0;
3889 int fastload_reg_override=0;
3890 u_int hr,reglist=0;
3891 th=get_reg(i_regs->regmap,rt1[i]|64);
3892 tl=get_reg(i_regs->regmap,rt1[i]);
3893 s=get_reg(i_regs->regmap,rs1[i]);
3894 temp=get_reg(i_regs->regmap,-1);
3895 temp2=get_reg(i_regs->regmap,FTEMP);
3896 addr=get_reg(i_regs->regmap,AGEN1+(i&1));
3897 assert(addr<0);
3898 offset=imm[i];
3899 for(hr=0;hr<HOST_REGS;hr++) {
3900 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
3901 }
3902 reglist|=1<<temp;
3903 if(offset||s<0||c) addr=temp2;
3904 else addr=s;
3905 if(s>=0) {
3906 c=(i_regs->wasconst>>s)&1;
3907 if(c) {
3908 memtarget=((signed int)(constmap[i][s]+offset))<(signed int)0x80000000+RAM_SIZE;
3909 if(using_tlb&&((signed int)(constmap[i][s]+offset))>=(signed int)0xC0000000) memtarget=1;
3910 }
3911 }
3912 if(!using_tlb) {
3913 if(!c) {
3914 #ifdef RAM_OFFSET
3915 map=get_reg(i_regs->regmap,ROREG);
3916 if(map<0) emit_loadreg(ROREG,map=HOST_TEMPREG);
3917 #endif
3918 emit_shlimm(addr,3,temp);
3919 if (opcode[i]==0x22||opcode[i]==0x26) {
3920 emit_andimm(addr,0xFFFFFFFC,temp2); // LWL/LWR
3921 }else{
3922 emit_andimm(addr,0xFFFFFFF8,temp2); // LDL/LDR
3923 }
3924 jaddr=emit_fastpath_cmp_jump(i,temp2,&fastload_reg_override);
3925 }
3926 else {
3927 if (opcode[i]==0x22||opcode[i]==0x26) {
3928 emit_movimm(((constmap[i][s]+offset)<<3)&24,temp); // LWL/LWR
3929 }else{
3930 emit_movimm(((constmap[i][s]+offset)<<3)&56,temp); // LDL/LDR
3931 }
3932 }
3933 }else{ // using tlb
3934 int a;
3935 if(c) {
3936 a=-1;
3937 }else if (opcode[i]==0x22||opcode[i]==0x26) {
3938 a=0xFFFFFFFC; // LWL/LWR
3939 }else{
3940 a=0xFFFFFFF8; // LDL/LDR
3941 }
3942 map=get_reg(i_regs->regmap,TLREG);
3943 assert(map>=0);
3944 reglist&=~(1<<map);
3945 map=do_tlb_r(addr,temp2,map,0,a,c?-1:temp,c,constmap[i][s]+offset);
3946 if(c) {
3947 if (opcode[i]==0x22||opcode[i]==0x26) {
3948 emit_movimm(((constmap[i][s]+offset)<<3)&24,temp); // LWL/LWR
3949 }else{
3950 emit_movimm(((constmap[i][s]+offset)<<3)&56,temp); // LDL/LDR
3951 }
3952 }
3953 do_tlb_r_branch(map,c,constmap[i][s]+offset,&jaddr);
3954 }
3955 if (opcode[i]==0x22||opcode[i]==0x26) { // LWL/LWR
3956 if(!c||memtarget) {
3957 int a=temp2;
3958 if(fastload_reg_override) a=fastload_reg_override;
3959 //emit_readword_indexed((int)rdram-0x80000000,temp2,temp2);
3960 emit_readword_indexed_tlb(0,a,map,temp2);
3961 if(jaddr) add_stub(LOADW_STUB,jaddr,(int)out,i,temp2,(int)i_regs,ccadj[i],reglist);
3962 }
3963 else
3964 inline_readstub(LOADW_STUB,i,(constmap[i][s]+offset)&0xFFFFFFFC,i_regs->regmap,FTEMP,ccadj[i],reglist);
3965 if(rt1[i]) {
3966 assert(tl>=0);
3967 emit_andimm(temp,24,temp);
3968#ifdef BIG_ENDIAN_MIPS
3969 if (opcode[i]==0x26) // LWR
3970#else
3971 if (opcode[i]==0x22) // LWL
3972#endif
3973 emit_xorimm(temp,24,temp);
3974 emit_movimm(-1,HOST_TEMPREG);
3975 if (opcode[i]==0x26) {
3976 emit_shr(temp2,temp,temp2);
3977 emit_bic_lsr(tl,HOST_TEMPREG,temp,tl);
3978 }else{
3979 emit_shl(temp2,temp,temp2);
3980 emit_bic_lsl(tl,HOST_TEMPREG,temp,tl);
3981 }
3982 emit_or(temp2,tl,tl);
3983 }
3984 //emit_storereg(rt1[i],tl); // DEBUG
3985 }
3986 if (opcode[i]==0x1A||opcode[i]==0x1B) { // LDL/LDR
3987 // FIXME: little endian, fastload_reg_override
3988 int temp2h=get_reg(i_regs->regmap,FTEMP|64);
3989 if(!c||memtarget) {
3990 //if(th>=0) emit_readword_indexed((int)rdram-0x80000000,temp2,temp2h);
3991 //emit_readword_indexed((int)rdram-0x7FFFFFFC,temp2,temp2);
3992 emit_readdword_indexed_tlb(0,temp2,map,temp2h,temp2);
3993 if(jaddr) add_stub(LOADD_STUB,jaddr,(int)out,i,temp2,(int)i_regs,ccadj[i],reglist);
3994 }
3995 else
3996 inline_readstub(LOADD_STUB,i,(constmap[i][s]+offset)&0xFFFFFFF8,i_regs->regmap,FTEMP,ccadj[i],reglist);
3997 if(rt1[i]) {
3998 assert(th>=0);
3999 assert(tl>=0);
4000 emit_testimm(temp,32);
4001 emit_andimm(temp,24,temp);
4002 if (opcode[i]==0x1A) { // LDL
4003 emit_rsbimm(temp,32,HOST_TEMPREG);
4004 emit_shl(temp2h,temp,temp2h);
4005 emit_orrshr(temp2,HOST_TEMPREG,temp2h);
4006 emit_movimm(-1,HOST_TEMPREG);
4007 emit_shl(temp2,temp,temp2);
4008 emit_cmove_reg(temp2h,th);
4009 emit_biceq_lsl(tl,HOST_TEMPREG,temp,tl);
4010 emit_bicne_lsl(th,HOST_TEMPREG,temp,th);
4011 emit_orreq(temp2,tl,tl);
4012 emit_orrne(temp2,th,th);
4013 }
4014 if (opcode[i]==0x1B) { // LDR
4015 emit_xorimm(temp,24,temp);
4016 emit_rsbimm(temp,32,HOST_TEMPREG);
4017 emit_shr(temp2,temp,temp2);
4018 emit_orrshl(temp2h,HOST_TEMPREG,temp2);
4019 emit_movimm(-1,HOST_TEMPREG);
4020 emit_shr(temp2h,temp,temp2h);
4021 emit_cmovne_reg(temp2,tl);
4022 emit_bicne_lsr(th,HOST_TEMPREG,temp,th);
4023 emit_biceq_lsr(tl,HOST_TEMPREG,temp,tl);
4024 emit_orrne(temp2h,th,th);
4025 emit_orreq(temp2h,tl,tl);
4026 }
4027 }
4028 }
4029}
4030#define loadlr_assemble loadlr_assemble_arm
4031
4032void cop0_assemble(int i,struct regstat *i_regs)
4033{
4034 if(opcode2[i]==0) // MFC0
4035 {
4036 signed char t=get_reg(i_regs->regmap,rt1[i]);
4037 char copr=(source[i]>>11)&0x1f;
4038 //assert(t>=0); // Why does this happen? OOT is weird
4039 if(t>=0&&rt1[i]!=0) {
4040#ifdef MUPEN64
4041 emit_addimm(FP,(int)&fake_pc-(int)&dynarec_local,0);
4042 emit_movimm((source[i]>>11)&0x1f,1);
4043 emit_writeword(0,(int)&PC);
4044 emit_writebyte(1,(int)&(fake_pc.f.r.nrd));
4045 if(copr==9) {
4046 emit_readword((int)&last_count,ECX);
4047 emit_loadreg(CCREG,HOST_CCREG); // TODO: do proper reg alloc
4048 emit_add(HOST_CCREG,ECX,HOST_CCREG);
4049 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG);
4050 emit_writeword(HOST_CCREG,(int)&Count);
4051 }
4052 emit_call((int)MFC0);
4053 emit_readword((int)&readmem_dword,t);
4054#else
4055 emit_readword((int)&reg_cop0+copr*4,t);
4056#endif
4057 }
4058 }
4059 else if(opcode2[i]==4) // MTC0
4060 {
4061 signed char s=get_reg(i_regs->regmap,rs1[i]);
4062 char copr=(source[i]>>11)&0x1f;
4063 assert(s>=0);
4064 emit_writeword(s,(int)&readmem_dword);
4065 wb_register(rs1[i],i_regs->regmap,i_regs->dirty,i_regs->is32);
4066#ifdef MUPEN64
4067 emit_addimm(FP,(int)&fake_pc-(int)&dynarec_local,0);
4068 emit_movimm((source[i]>>11)&0x1f,1);
4069 emit_writeword(0,(int)&PC);
4070 emit_writebyte(1,(int)&(fake_pc.f.r.nrd));
4071#endif
4072 if(copr==9||copr==11||copr==12||copr==13) {
4073 emit_readword((int)&last_count,ECX);
4074 emit_loadreg(CCREG,HOST_CCREG); // TODO: do proper reg alloc
4075 emit_add(HOST_CCREG,ECX,HOST_CCREG);
4076 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG);
4077 emit_writeword(HOST_CCREG,(int)&Count);
4078 }
4079 // What a mess. The status register (12) can enable interrupts,
4080 // so needs a special case to handle a pending interrupt.
4081 // The interrupt must be taken immediately, because a subsequent
4082 // instruction might disable interrupts again.
4083 if(copr==12||copr==13) {
4084#ifdef PCSX
4085 if (is_delayslot) {
4086 // burn cycles to cause cc_interrupt, which will
4087 // reschedule next_interupt. Relies on CCREG from above.
4088 assem_debug("MTC0 DS %d\n", copr);
4089 emit_writeword(HOST_CCREG,(int)&last_count);
4090 emit_movimm(0,HOST_CCREG);
4091 emit_storereg(CCREG,HOST_CCREG);
4092 emit_movimm(copr,0);
4093 emit_call((int)pcsx_mtc0_ds);
4094 return;
4095 }
4096#endif
4097 emit_movimm(start+i*4+4,0);
4098 emit_movimm(0,1);
4099 emit_writeword(0,(int)&pcaddr);
4100 emit_writeword(1,(int)&pending_exception);
4101 }
4102 //else if(copr==12&&is_delayslot) emit_call((int)MTC0_R12);
4103 //else
4104#ifdef PCSX
4105 emit_movimm(copr,0);
4106 emit_call((int)pcsx_mtc0);
4107#else
4108 emit_call((int)MTC0);
4109#endif
4110 if(copr==9||copr==11||copr==12||copr==13) {
4111 emit_readword((int)&Count,HOST_CCREG);
4112 emit_readword((int)&next_interupt,ECX);
4113 emit_addimm(HOST_CCREG,-CLOCK_DIVIDER*ccadj[i],HOST_CCREG);
4114 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
4115 emit_writeword(ECX,(int)&last_count);
4116 emit_storereg(CCREG,HOST_CCREG);
4117 }
4118 if(copr==12||copr==13) {
4119 assert(!is_delayslot);
4120 emit_readword((int)&pending_exception,14);
4121 }
4122 emit_loadreg(rs1[i],s);
4123 if(get_reg(i_regs->regmap,rs1[i]|64)>=0)
4124 emit_loadreg(rs1[i]|64,get_reg(i_regs->regmap,rs1[i]|64));
4125 if(copr==12||copr==13) {
4126 emit_test(14,14);
4127 emit_jne((int)&do_interrupt);
4128 }
4129 cop1_usable=0;
4130 }
4131 else
4132 {
4133 assert(opcode2[i]==0x10);
4134#ifndef DISABLE_TLB
4135 if((source[i]&0x3f)==0x01) // TLBR
4136 emit_call((int)TLBR);
4137 if((source[i]&0x3f)==0x02) // TLBWI
4138 emit_call((int)TLBWI_new);
4139 if((source[i]&0x3f)==0x06) { // TLBWR
4140 // The TLB entry written by TLBWR is dependent on the count,
4141 // so update the cycle count
4142 emit_readword((int)&last_count,ECX);
4143 if(i_regs->regmap[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
4144 emit_add(HOST_CCREG,ECX,HOST_CCREG);
4145 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG);
4146 emit_writeword(HOST_CCREG,(int)&Count);
4147 emit_call((int)TLBWR_new);
4148 }
4149 if((source[i]&0x3f)==0x08) // TLBP
4150 emit_call((int)TLBP);
4151#endif
4152#ifdef PCSX
4153 if((source[i]&0x3f)==0x10) // RFE
4154 {
4155 emit_readword((int)&Status,0);
4156 emit_andimm(0,0x3c,1);
4157 emit_andimm(0,~0xf,0);
4158 emit_orrshr_imm(1,2,0);
4159 emit_writeword(0,(int)&Status);
4160 }
4161#else
4162 if((source[i]&0x3f)==0x18) // ERET
4163 {
4164 int count=ccadj[i];
4165 if(i_regs->regmap[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
4166 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*count,HOST_CCREG); // TODO: Should there be an extra cycle here?
4167 emit_jmp((int)jump_eret);
4168 }
4169#endif
4170 }
4171}
4172
4173static void cop2_get_dreg(u_int copr,signed char tl,signed char temp)
4174{
4175 switch (copr) {
4176 case 1:
4177 case 3:
4178 case 5:
4179 case 8:
4180 case 9:
4181 case 10:
4182 case 11:
4183 emit_readword((int)&reg_cop2d[copr],tl);
4184 emit_signextend16(tl,tl);
4185 emit_writeword(tl,(int)&reg_cop2d[copr]); // hmh
4186 break;
4187 case 7:
4188 case 16:
4189 case 17:
4190 case 18:
4191 case 19:
4192 emit_readword((int)&reg_cop2d[copr],tl);
4193 emit_andimm(tl,0xffff,tl);
4194 emit_writeword(tl,(int)&reg_cop2d[copr]);
4195 break;
4196 case 15:
4197 emit_readword((int)&reg_cop2d[14],tl); // SXY2
4198 emit_writeword(tl,(int)&reg_cop2d[copr]);
4199 break;
4200 case 28:
4201 case 29:
4202 emit_readword((int)&reg_cop2d[9],temp);
4203 emit_testimm(temp,0x8000); // do we need this?
4204 emit_andimm(temp,0xf80,temp);
4205 emit_andne_imm(temp,0,temp);
4206 emit_shrimm(temp,7,tl);
4207 emit_readword((int)&reg_cop2d[10],temp);
4208 emit_testimm(temp,0x8000);
4209 emit_andimm(temp,0xf80,temp);
4210 emit_andne_imm(temp,0,temp);
4211 emit_orrshr_imm(temp,2,tl);
4212 emit_readword((int)&reg_cop2d[11],temp);
4213 emit_testimm(temp,0x8000);
4214 emit_andimm(temp,0xf80,temp);
4215 emit_andne_imm(temp,0,temp);
4216 emit_orrshl_imm(temp,3,tl);
4217 emit_writeword(tl,(int)&reg_cop2d[copr]);
4218 break;
4219 default:
4220 emit_readword((int)&reg_cop2d[copr],tl);
4221 break;
4222 }
4223}
4224
4225static void cop2_put_dreg(u_int copr,signed char sl,signed char temp)
4226{
4227 switch (copr) {
4228 case 15:
4229 emit_readword((int)&reg_cop2d[13],temp); // SXY1
4230 emit_writeword(sl,(int)&reg_cop2d[copr]);
4231 emit_writeword(temp,(int)&reg_cop2d[12]); // SXY0
4232 emit_readword((int)&reg_cop2d[14],temp); // SXY2
4233 emit_writeword(sl,(int)&reg_cop2d[14]);
4234 emit_writeword(temp,(int)&reg_cop2d[13]); // SXY1
4235 break;
4236 case 28:
4237 emit_andimm(sl,0x001f,temp);
4238 emit_shlimm(temp,7,temp);
4239 emit_writeword(temp,(int)&reg_cop2d[9]);
4240 emit_andimm(sl,0x03e0,temp);
4241 emit_shlimm(temp,2,temp);
4242 emit_writeword(temp,(int)&reg_cop2d[10]);
4243 emit_andimm(sl,0x7c00,temp);
4244 emit_shrimm(temp,3,temp);
4245 emit_writeword(temp,(int)&reg_cop2d[11]);
4246 emit_writeword(sl,(int)&reg_cop2d[28]);
4247 break;
4248 case 30:
4249 emit_movs(sl,temp);
4250 emit_mvnmi(temp,temp);
4251 emit_clz(temp,temp);
4252 emit_writeword(sl,(int)&reg_cop2d[30]);
4253 emit_writeword(temp,(int)&reg_cop2d[31]);
4254 break;
4255 case 31:
4256 break;
4257 default:
4258 emit_writeword(sl,(int)&reg_cop2d[copr]);
4259 break;
4260 }
4261}
4262
4263void cop2_assemble(int i,struct regstat *i_regs)
4264{
4265 u_int copr=(source[i]>>11)&0x1f;
4266 signed char temp=get_reg(i_regs->regmap,-1);
4267 if (opcode2[i]==0) { // MFC2
4268 signed char tl=get_reg(i_regs->regmap,rt1[i]);
4269 if(tl>=0&&rt1[i]!=0)
4270 cop2_get_dreg(copr,tl,temp);
4271 }
4272 else if (opcode2[i]==4) { // MTC2
4273 signed char sl=get_reg(i_regs->regmap,rs1[i]);
4274 cop2_put_dreg(copr,sl,temp);
4275 }
4276 else if (opcode2[i]==2) // CFC2
4277 {
4278 signed char tl=get_reg(i_regs->regmap,rt1[i]);
4279 if(tl>=0&&rt1[i]!=0)
4280 emit_readword((int)&reg_cop2c[copr],tl);
4281 }
4282 else if (opcode2[i]==6) // CTC2
4283 {
4284 signed char sl=get_reg(i_regs->regmap,rs1[i]);
4285 switch(copr) {
4286 case 4:
4287 case 12:
4288 case 20:
4289 case 26:
4290 case 27:
4291 case 29:
4292 case 30:
4293 emit_signextend16(sl,temp);
4294 break;
4295 case 31:
4296 //value = value & 0x7ffff000;
4297 //if (value & 0x7f87e000) value |= 0x80000000;
4298 emit_shrimm(sl,12,temp);
4299 emit_shlimm(temp,12,temp);
4300 emit_testimm(temp,0x7f000000);
4301 emit_testeqimm(temp,0x00870000);
4302 emit_testeqimm(temp,0x0000e000);
4303 emit_orrne_imm(temp,0x80000000,temp);
4304 break;
4305 default:
4306 temp=sl;
4307 break;
4308 }
4309 emit_writeword(temp,(int)&reg_cop2c[copr]);
4310 assert(sl>=0);
4311 }
4312}
4313
4314void c2op_assemble(int i,struct regstat *i_regs)
4315{
4316 signed char temp=get_reg(i_regs->regmap,-1);
4317 u_int c2op=source[i]&0x3f;
4318 u_int hr,reglist=0;
4319 int need_flags;
4320 for(hr=0;hr<HOST_REGS;hr++) {
4321 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
4322 }
4323 if(i==0||itype[i-1]!=C2OP)
4324 save_regs(reglist);
4325
4326 if (gte_handlers[c2op]!=NULL) {
4327 int cc=get_reg(i_regs->regmap,CCREG);
4328 emit_movimm(source[i],1); // opcode
4329 if (cc>=0&&gte_cycletab[c2op])
4330 emit_addimm(cc,gte_cycletab[c2op]/2,cc); // XXX: could just adjust ccadj?
4331 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0); // cop2 regs
4332 emit_writeword(1,(int)&psxRegs.code);
4333 need_flags=!(gte_unneeded[i+1]>>63); // +1 because of how liveness detection works
4334 assem_debug("gte unneeded %016llx, need_flags %d\n",gte_unneeded[i+1],need_flags);
4335#ifdef ARMv5_ONLY
4336 // let's take more risk here
4337 need_flags=need_flags&&gte_reads_flags;
4338#endif
4339 emit_call((int)(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]));
4340 }
4341
4342 if(i>=slen-1||itype[i+1]!=C2OP)
4343 restore_regs(reglist);
4344}
4345
4346void cop1_unusable(int i,struct regstat *i_regs)
4347{
4348 // XXX: should just just do the exception instead
4349 if(!cop1_usable) {
4350 int jaddr=(int)out;
4351 emit_jmp(0);
4352 add_stub(FP_STUB,jaddr,(int)out,i,0,(int)i_regs,is_delayslot,0);
4353 cop1_usable=1;
4354 }
4355}
4356
4357void cop1_assemble(int i,struct regstat *i_regs)
4358{
4359#ifndef DISABLE_COP1
4360 // Check cop1 unusable
4361 if(!cop1_usable) {
4362 signed char rs=get_reg(i_regs->regmap,CSREG);
4363 assert(rs>=0);
4364 emit_testimm(rs,0x20000000);
4365 int jaddr=(int)out;
4366 emit_jeq(0);
4367 add_stub(FP_STUB,jaddr,(int)out,i,rs,(int)i_regs,is_delayslot,0);
4368 cop1_usable=1;
4369 }
4370 if (opcode2[i]==0) { // MFC1
4371 signed char tl=get_reg(i_regs->regmap,rt1[i]);
4372 if(tl>=0) {
4373 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],tl);
4374 emit_readword_indexed(0,tl,tl);
4375 }
4376 }
4377 else if (opcode2[i]==1) { // DMFC1
4378 signed char tl=get_reg(i_regs->regmap,rt1[i]);
4379 signed char th=get_reg(i_regs->regmap,rt1[i]|64);
4380 if(tl>=0) {
4381 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],tl);
4382 if(th>=0) emit_readword_indexed(4,tl,th);
4383 emit_readword_indexed(0,tl,tl);
4384 }
4385 }
4386 else if (opcode2[i]==4) { // MTC1
4387 signed char sl=get_reg(i_regs->regmap,rs1[i]);
4388 signed char temp=get_reg(i_regs->regmap,-1);
4389 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4390 emit_writeword_indexed(sl,0,temp);
4391 }
4392 else if (opcode2[i]==5) { // DMTC1
4393 signed char sl=get_reg(i_regs->regmap,rs1[i]);
4394 signed char sh=rs1[i]>0?get_reg(i_regs->regmap,rs1[i]|64):sl;
4395 signed char temp=get_reg(i_regs->regmap,-1);
4396 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
4397 emit_writeword_indexed(sh,4,temp);
4398 emit_writeword_indexed(sl,0,temp);
4399 }
4400 else if (opcode2[i]==2) // CFC1
4401 {
4402 signed char tl=get_reg(i_regs->regmap,rt1[i]);
4403 if(tl>=0) {
4404 u_int copr=(source[i]>>11)&0x1f;
4405 if(copr==0) emit_readword((int)&FCR0,tl);
4406 if(copr==31) emit_readword((int)&FCR31,tl);
4407 }
4408 }
4409 else if (opcode2[i]==6) // CTC1
4410 {
4411 signed char sl=get_reg(i_regs->regmap,rs1[i]);
4412 u_int copr=(source[i]>>11)&0x1f;
4413 assert(sl>=0);
4414 if(copr==31)
4415 {
4416 emit_writeword(sl,(int)&FCR31);
4417 // Set the rounding mode
4418 //FIXME
4419 //char temp=get_reg(i_regs->regmap,-1);
4420 //emit_andimm(sl,3,temp);
4421 //emit_fldcw_indexed((int)&rounding_modes,temp);
4422 }
4423 }
4424#else
4425 cop1_unusable(i, i_regs);
4426#endif
4427}
4428
4429void fconv_assemble_arm(int i,struct regstat *i_regs)
4430{
4431#ifndef DISABLE_COP1
4432 signed char temp=get_reg(i_regs->regmap,-1);
4433 assert(temp>=0);
4434 // Check cop1 unusable
4435 if(!cop1_usable) {
4436 signed char rs=get_reg(i_regs->regmap,CSREG);
4437 assert(rs>=0);
4438 emit_testimm(rs,0x20000000);
4439 int jaddr=(int)out;
4440 emit_jeq(0);
4441 add_stub(FP_STUB,jaddr,(int)out,i,rs,(int)i_regs,is_delayslot,0);
4442 cop1_usable=1;
4443 }
4444
4445 #if(defined(__VFP_FP__) && !defined(__SOFTFP__))
4446 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0d) { // trunc_w_s
4447 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4448 emit_flds(temp,15);
4449 emit_ftosizs(15,15); // float->int, truncate
4450 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f))
4451 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
4452 emit_fsts(15,temp);
4453 return;
4454 }
4455 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0d) { // trunc_w_d
4456 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
4457 emit_vldr(temp,7);
4458 emit_ftosizd(7,13); // double->int, truncate
4459 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
4460 emit_fsts(13,temp);
4461 return;
4462 }
4463
4464 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x20) { // cvt_s_w
4465 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4466 emit_flds(temp,13);
4467 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f))
4468 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
4469 emit_fsitos(13,15);
4470 emit_fsts(15,temp);
4471 return;
4472 }
4473 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x21) { // cvt_d_w
4474 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4475 emit_flds(temp,13);
4476 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
4477 emit_fsitod(13,7);
4478 emit_vstr(7,temp);
4479 return;
4480 }
4481
4482 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x21) { // cvt_d_s
4483 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4484 emit_flds(temp,13);
4485 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
4486 emit_fcvtds(13,7);
4487 emit_vstr(7,temp);
4488 return;
4489 }
4490 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x20) { // cvt_s_d
4491 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
4492 emit_vldr(temp,7);
4493 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
4494 emit_fcvtsd(7,13);
4495 emit_fsts(13,temp);
4496 return;
4497 }
4498 #endif
4499
4500 // C emulation code
4501
4502 u_int hr,reglist=0;
4503 for(hr=0;hr<HOST_REGS;hr++) {
4504 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
4505 }
4506 save_regs(reglist);
4507
4508 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x20) {
4509 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4510 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4511 emit_call((int)cvt_s_w);
4512 }
4513 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x21) {
4514 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4515 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4516 emit_call((int)cvt_d_w);
4517 }
4518 if(opcode2[i]==0x15&&(source[i]&0x3f)==0x20) {
4519 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4520 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4521 emit_call((int)cvt_s_l);
4522 }
4523 if(opcode2[i]==0x15&&(source[i]&0x3f)==0x21) {
4524 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4525 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4526 emit_call((int)cvt_d_l);
4527 }
4528
4529 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x21) {
4530 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4531 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4532 emit_call((int)cvt_d_s);
4533 }
4534 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x24) {
4535 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4536 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4537 emit_call((int)cvt_w_s);
4538 }
4539 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x25) {
4540 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4541 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4542 emit_call((int)cvt_l_s);
4543 }
4544
4545 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x20) {
4546 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4547 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4548 emit_call((int)cvt_s_d);
4549 }
4550 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x24) {
4551 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4552 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4553 emit_call((int)cvt_w_d);
4554 }
4555 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x25) {
4556 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4557 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4558 emit_call((int)cvt_l_d);
4559 }
4560
4561 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x08) {
4562 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4563 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4564 emit_call((int)round_l_s);
4565 }
4566 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x09) {
4567 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4568 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4569 emit_call((int)trunc_l_s);
4570 }
4571 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0a) {
4572 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4573 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4574 emit_call((int)ceil_l_s);
4575 }
4576 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0b) {
4577 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4578 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4579 emit_call((int)floor_l_s);
4580 }
4581 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0c) {
4582 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4583 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4584 emit_call((int)round_w_s);
4585 }
4586 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0d) {
4587 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4588 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4589 emit_call((int)trunc_w_s);
4590 }
4591 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0e) {
4592 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4593 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4594 emit_call((int)ceil_w_s);
4595 }
4596 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0f) {
4597 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4598 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4599 emit_call((int)floor_w_s);
4600 }
4601
4602 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x08) {
4603 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4604 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4605 emit_call((int)round_l_d);
4606 }
4607 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x09) {
4608 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4609 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4610 emit_call((int)trunc_l_d);
4611 }
4612 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0a) {
4613 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4614 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4615 emit_call((int)ceil_l_d);
4616 }
4617 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0b) {
4618 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4619 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4620 emit_call((int)floor_l_d);
4621 }
4622 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0c) {
4623 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4624 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4625 emit_call((int)round_w_d);
4626 }
4627 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0d) {
4628 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4629 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4630 emit_call((int)trunc_w_d);
4631 }
4632 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0e) {
4633 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4634 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4635 emit_call((int)ceil_w_d);
4636 }
4637 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0f) {
4638 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4639 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4640 emit_call((int)floor_w_d);
4641 }
4642
4643 restore_regs(reglist);
4644#else
4645 cop1_unusable(i, i_regs);
4646#endif
4647}
4648#define fconv_assemble fconv_assemble_arm
4649
4650void fcomp_assemble(int i,struct regstat *i_regs)
4651{
4652#ifndef DISABLE_COP1
4653 signed char fs=get_reg(i_regs->regmap,FSREG);
4654 signed char temp=get_reg(i_regs->regmap,-1);
4655 assert(temp>=0);
4656 // Check cop1 unusable
4657 if(!cop1_usable) {
4658 signed char cs=get_reg(i_regs->regmap,CSREG);
4659 assert(cs>=0);
4660 emit_testimm(cs,0x20000000);
4661 int jaddr=(int)out;
4662 emit_jeq(0);
4663 add_stub(FP_STUB,jaddr,(int)out,i,cs,(int)i_regs,is_delayslot,0);
4664 cop1_usable=1;
4665 }
4666
4667 if((source[i]&0x3f)==0x30) {
4668 emit_andimm(fs,~0x800000,fs);
4669 return;
4670 }
4671
4672 if((source[i]&0x3e)==0x38) {
4673 // sf/ngle - these should throw exceptions for NaNs
4674 emit_andimm(fs,~0x800000,fs);
4675 return;
4676 }
4677
4678 #if(defined(__VFP_FP__) && !defined(__SOFTFP__))
4679 if(opcode2[i]==0x10) {
4680 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4681 emit_readword((int)&reg_cop1_simple[(source[i]>>16)&0x1f],HOST_TEMPREG);
4682 emit_orimm(fs,0x800000,fs);
4683 emit_flds(temp,14);
4684 emit_flds(HOST_TEMPREG,15);
4685 emit_fcmps(14,15);
4686 emit_fmstat();
4687 if((source[i]&0x3f)==0x31) emit_bicvc_imm(fs,0x800000,fs); // c_un_s
4688 if((source[i]&0x3f)==0x32) emit_bicne_imm(fs,0x800000,fs); // c_eq_s
4689 if((source[i]&0x3f)==0x33) {emit_bicne_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ueq_s
4690 if((source[i]&0x3f)==0x34) emit_biccs_imm(fs,0x800000,fs); // c_olt_s
4691 if((source[i]&0x3f)==0x35) {emit_biccs_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ult_s
4692 if((source[i]&0x3f)==0x36) emit_bichi_imm(fs,0x800000,fs); // c_ole_s
4693 if((source[i]&0x3f)==0x37) {emit_bichi_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ule_s
4694 if((source[i]&0x3f)==0x3a) emit_bicne_imm(fs,0x800000,fs); // c_seq_s
4695 if((source[i]&0x3f)==0x3b) emit_bicne_imm(fs,0x800000,fs); // c_ngl_s
4696 if((source[i]&0x3f)==0x3c) emit_biccs_imm(fs,0x800000,fs); // c_lt_s
4697 if((source[i]&0x3f)==0x3d) emit_biccs_imm(fs,0x800000,fs); // c_nge_s
4698 if((source[i]&0x3f)==0x3e) emit_bichi_imm(fs,0x800000,fs); // c_le_s
4699 if((source[i]&0x3f)==0x3f) emit_bichi_imm(fs,0x800000,fs); // c_ngt_s
4700 return;
4701 }
4702 if(opcode2[i]==0x11) {
4703 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
4704 emit_readword((int)&reg_cop1_double[(source[i]>>16)&0x1f],HOST_TEMPREG);
4705 emit_orimm(fs,0x800000,fs);
4706 emit_vldr(temp,6);
4707 emit_vldr(HOST_TEMPREG,7);
4708 emit_fcmpd(6,7);
4709 emit_fmstat();
4710 if((source[i]&0x3f)==0x31) emit_bicvc_imm(fs,0x800000,fs); // c_un_d
4711 if((source[i]&0x3f)==0x32) emit_bicne_imm(fs,0x800000,fs); // c_eq_d
4712 if((source[i]&0x3f)==0x33) {emit_bicne_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ueq_d
4713 if((source[i]&0x3f)==0x34) emit_biccs_imm(fs,0x800000,fs); // c_olt_d
4714 if((source[i]&0x3f)==0x35) {emit_biccs_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ult_d
4715 if((source[i]&0x3f)==0x36) emit_bichi_imm(fs,0x800000,fs); // c_ole_d
4716 if((source[i]&0x3f)==0x37) {emit_bichi_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ule_d
4717 if((source[i]&0x3f)==0x3a) emit_bicne_imm(fs,0x800000,fs); // c_seq_d
4718 if((source[i]&0x3f)==0x3b) emit_bicne_imm(fs,0x800000,fs); // c_ngl_d
4719 if((source[i]&0x3f)==0x3c) emit_biccs_imm(fs,0x800000,fs); // c_lt_d
4720 if((source[i]&0x3f)==0x3d) emit_biccs_imm(fs,0x800000,fs); // c_nge_d
4721 if((source[i]&0x3f)==0x3e) emit_bichi_imm(fs,0x800000,fs); // c_le_d
4722 if((source[i]&0x3f)==0x3f) emit_bichi_imm(fs,0x800000,fs); // c_ngt_d
4723 return;
4724 }
4725 #endif
4726
4727 // C only
4728
4729 u_int hr,reglist=0;
4730 for(hr=0;hr<HOST_REGS;hr++) {
4731 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
4732 }
4733 reglist&=~(1<<fs);
4734 save_regs(reglist);
4735 if(opcode2[i]==0x10) {
4736 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4737 emit_readword((int)&reg_cop1_simple[(source[i]>>16)&0x1f],ARG2_REG);
4738 if((source[i]&0x3f)==0x30) emit_call((int)c_f_s);
4739 if((source[i]&0x3f)==0x31) emit_call((int)c_un_s);
4740 if((source[i]&0x3f)==0x32) emit_call((int)c_eq_s);
4741 if((source[i]&0x3f)==0x33) emit_call((int)c_ueq_s);
4742 if((source[i]&0x3f)==0x34) emit_call((int)c_olt_s);
4743 if((source[i]&0x3f)==0x35) emit_call((int)c_ult_s);
4744 if((source[i]&0x3f)==0x36) emit_call((int)c_ole_s);
4745 if((source[i]&0x3f)==0x37) emit_call((int)c_ule_s);
4746 if((source[i]&0x3f)==0x38) emit_call((int)c_sf_s);
4747 if((source[i]&0x3f)==0x39) emit_call((int)c_ngle_s);
4748 if((source[i]&0x3f)==0x3a) emit_call((int)c_seq_s);
4749 if((source[i]&0x3f)==0x3b) emit_call((int)c_ngl_s);
4750 if((source[i]&0x3f)==0x3c) emit_call((int)c_lt_s);
4751 if((source[i]&0x3f)==0x3d) emit_call((int)c_nge_s);
4752 if((source[i]&0x3f)==0x3e) emit_call((int)c_le_s);
4753 if((source[i]&0x3f)==0x3f) emit_call((int)c_ngt_s);
4754 }
4755 if(opcode2[i]==0x11) {
4756 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4757 emit_readword((int)&reg_cop1_double[(source[i]>>16)&0x1f],ARG2_REG);
4758 if((source[i]&0x3f)==0x30) emit_call((int)c_f_d);
4759 if((source[i]&0x3f)==0x31) emit_call((int)c_un_d);
4760 if((source[i]&0x3f)==0x32) emit_call((int)c_eq_d);
4761 if((source[i]&0x3f)==0x33) emit_call((int)c_ueq_d);
4762 if((source[i]&0x3f)==0x34) emit_call((int)c_olt_d);
4763 if((source[i]&0x3f)==0x35) emit_call((int)c_ult_d);
4764 if((source[i]&0x3f)==0x36) emit_call((int)c_ole_d);
4765 if((source[i]&0x3f)==0x37) emit_call((int)c_ule_d);
4766 if((source[i]&0x3f)==0x38) emit_call((int)c_sf_d);
4767 if((source[i]&0x3f)==0x39) emit_call((int)c_ngle_d);
4768 if((source[i]&0x3f)==0x3a) emit_call((int)c_seq_d);
4769 if((source[i]&0x3f)==0x3b) emit_call((int)c_ngl_d);
4770 if((source[i]&0x3f)==0x3c) emit_call((int)c_lt_d);
4771 if((source[i]&0x3f)==0x3d) emit_call((int)c_nge_d);
4772 if((source[i]&0x3f)==0x3e) emit_call((int)c_le_d);
4773 if((source[i]&0x3f)==0x3f) emit_call((int)c_ngt_d);
4774 }
4775 restore_regs(reglist);
4776 emit_loadreg(FSREG,fs);
4777#else
4778 cop1_unusable(i, i_regs);
4779#endif
4780}
4781
4782void float_assemble(int i,struct regstat *i_regs)
4783{
4784#ifndef DISABLE_COP1
4785 signed char temp=get_reg(i_regs->regmap,-1);
4786 assert(temp>=0);
4787 // Check cop1 unusable
4788 if(!cop1_usable) {
4789 signed char cs=get_reg(i_regs->regmap,CSREG);
4790 assert(cs>=0);
4791 emit_testimm(cs,0x20000000);
4792 int jaddr=(int)out;
4793 emit_jeq(0);
4794 add_stub(FP_STUB,jaddr,(int)out,i,cs,(int)i_regs,is_delayslot,0);
4795 cop1_usable=1;
4796 }
4797
4798 #if(defined(__VFP_FP__) && !defined(__SOFTFP__))
4799 if((source[i]&0x3f)==6) // mov
4800 {
4801 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4802 if(opcode2[i]==0x10) {
4803 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4804 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],HOST_TEMPREG);
4805 emit_readword_indexed(0,temp,temp);
4806 emit_writeword_indexed(temp,0,HOST_TEMPREG);
4807 }
4808 if(opcode2[i]==0x11) {
4809 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
4810 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],HOST_TEMPREG);
4811 emit_vldr(temp,7);
4812 emit_vstr(7,HOST_TEMPREG);
4813 }
4814 }
4815 return;
4816 }
4817
4818 if((source[i]&0x3f)>3)
4819 {
4820 if(opcode2[i]==0x10) {
4821 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4822 emit_flds(temp,15);
4823 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4824 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
4825 }
4826 if((source[i]&0x3f)==4) // sqrt
4827 emit_fsqrts(15,15);
4828 if((source[i]&0x3f)==5) // abs
4829 emit_fabss(15,15);
4830 if((source[i]&0x3f)==7) // neg
4831 emit_fnegs(15,15);
4832 emit_fsts(15,temp);
4833 }
4834 if(opcode2[i]==0x11) {
4835 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
4836 emit_vldr(temp,7);
4837 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4838 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
4839 }
4840 if((source[i]&0x3f)==4) // sqrt
4841 emit_fsqrtd(7,7);
4842 if((source[i]&0x3f)==5) // abs
4843 emit_fabsd(7,7);
4844 if((source[i]&0x3f)==7) // neg
4845 emit_fnegd(7,7);
4846 emit_vstr(7,temp);
4847 }
4848 return;
4849 }
4850 if((source[i]&0x3f)<4)
4851 {
4852 if(opcode2[i]==0x10) {
4853 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4854 }
4855 if(opcode2[i]==0x11) {
4856 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
4857 }
4858 if(((source[i]>>11)&0x1f)!=((source[i]>>16)&0x1f)) {
4859 if(opcode2[i]==0x10) {
4860 emit_readword((int)&reg_cop1_simple[(source[i]>>16)&0x1f],HOST_TEMPREG);
4861 emit_flds(temp,15);
4862 emit_flds(HOST_TEMPREG,13);
4863 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4864 if(((source[i]>>16)&0x1f)!=((source[i]>>6)&0x1f)) {
4865 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
4866 }
4867 }
4868 if((source[i]&0x3f)==0) emit_fadds(15,13,15);
4869 if((source[i]&0x3f)==1) emit_fsubs(15,13,15);
4870 if((source[i]&0x3f)==2) emit_fmuls(15,13,15);
4871 if((source[i]&0x3f)==3) emit_fdivs(15,13,15);
4872 if(((source[i]>>16)&0x1f)==((source[i]>>6)&0x1f)) {
4873 emit_fsts(15,HOST_TEMPREG);
4874 }else{
4875 emit_fsts(15,temp);
4876 }
4877 }
4878 else if(opcode2[i]==0x11) {
4879 emit_readword((int)&reg_cop1_double[(source[i]>>16)&0x1f],HOST_TEMPREG);
4880 emit_vldr(temp,7);
4881 emit_vldr(HOST_TEMPREG,6);
4882 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4883 if(((source[i]>>16)&0x1f)!=((source[i]>>6)&0x1f)) {
4884 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
4885 }
4886 }
4887 if((source[i]&0x3f)==0) emit_faddd(7,6,7);
4888 if((source[i]&0x3f)==1) emit_fsubd(7,6,7);
4889 if((source[i]&0x3f)==2) emit_fmuld(7,6,7);
4890 if((source[i]&0x3f)==3) emit_fdivd(7,6,7);
4891 if(((source[i]>>16)&0x1f)==((source[i]>>6)&0x1f)) {
4892 emit_vstr(7,HOST_TEMPREG);
4893 }else{
4894 emit_vstr(7,temp);
4895 }
4896 }
4897 }
4898 else {
4899 if(opcode2[i]==0x10) {
4900 emit_flds(temp,15);
4901 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4902 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
4903 }
4904 if((source[i]&0x3f)==0) emit_fadds(15,15,15);
4905 if((source[i]&0x3f)==1) emit_fsubs(15,15,15);
4906 if((source[i]&0x3f)==2) emit_fmuls(15,15,15);
4907 if((source[i]&0x3f)==3) emit_fdivs(15,15,15);
4908 emit_fsts(15,temp);
4909 }
4910 else if(opcode2[i]==0x11) {
4911 emit_vldr(temp,7);
4912 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4913 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
4914 }
4915 if((source[i]&0x3f)==0) emit_faddd(7,7,7);
4916 if((source[i]&0x3f)==1) emit_fsubd(7,7,7);
4917 if((source[i]&0x3f)==2) emit_fmuld(7,7,7);
4918 if((source[i]&0x3f)==3) emit_fdivd(7,7,7);
4919 emit_vstr(7,temp);
4920 }
4921 }
4922 return;
4923 }
4924 #endif
4925
4926 u_int hr,reglist=0;
4927 for(hr=0;hr<HOST_REGS;hr++) {
4928 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
4929 }
4930 if(opcode2[i]==0x10) { // Single precision
4931 save_regs(reglist);
4932 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4933 if((source[i]&0x3f)<4) {
4934 emit_readword((int)&reg_cop1_simple[(source[i]>>16)&0x1f],ARG2_REG);
4935 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG3_REG);
4936 }else{
4937 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4938 }
4939 switch(source[i]&0x3f)
4940 {
4941 case 0x00: emit_call((int)add_s);break;
4942 case 0x01: emit_call((int)sub_s);break;
4943 case 0x02: emit_call((int)mul_s);break;
4944 case 0x03: emit_call((int)div_s);break;
4945 case 0x04: emit_call((int)sqrt_s);break;
4946 case 0x05: emit_call((int)abs_s);break;
4947 case 0x06: emit_call((int)mov_s);break;
4948 case 0x07: emit_call((int)neg_s);break;
4949 }
4950 restore_regs(reglist);
4951 }
4952 if(opcode2[i]==0x11) { // Double precision
4953 save_regs(reglist);
4954 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4955 if((source[i]&0x3f)<4) {
4956 emit_readword((int)&reg_cop1_double[(source[i]>>16)&0x1f],ARG2_REG);
4957 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG3_REG);
4958 }else{
4959 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4960 }
4961 switch(source[i]&0x3f)
4962 {
4963 case 0x00: emit_call((int)add_d);break;
4964 case 0x01: emit_call((int)sub_d);break;
4965 case 0x02: emit_call((int)mul_d);break;
4966 case 0x03: emit_call((int)div_d);break;
4967 case 0x04: emit_call((int)sqrt_d);break;
4968 case 0x05: emit_call((int)abs_d);break;
4969 case 0x06: emit_call((int)mov_d);break;
4970 case 0x07: emit_call((int)neg_d);break;
4971 }
4972 restore_regs(reglist);
4973 }
4974#else
4975 cop1_unusable(i, i_regs);
4976#endif
4977}
4978
4979void multdiv_assemble_arm(int i,struct regstat *i_regs)
4980{
4981 // case 0x18: MULT
4982 // case 0x19: MULTU
4983 // case 0x1A: DIV
4984 // case 0x1B: DIVU
4985 // case 0x1C: DMULT
4986 // case 0x1D: DMULTU
4987 // case 0x1E: DDIV
4988 // case 0x1F: DDIVU
4989 if(rs1[i]&&rs2[i])
4990 {
4991 if((opcode2[i]&4)==0) // 32-bit
4992 {
4993 if(opcode2[i]==0x18) // MULT
4994 {
4995 signed char m1=get_reg(i_regs->regmap,rs1[i]);
4996 signed char m2=get_reg(i_regs->regmap,rs2[i]);
4997 signed char hi=get_reg(i_regs->regmap,HIREG);
4998 signed char lo=get_reg(i_regs->regmap,LOREG);
4999 assert(m1>=0);
5000 assert(m2>=0);
5001 assert(hi>=0);
5002 assert(lo>=0);
5003 emit_smull(m1,m2,hi,lo);
5004 }
5005 if(opcode2[i]==0x19) // MULTU
5006 {
5007 signed char m1=get_reg(i_regs->regmap,rs1[i]);
5008 signed char m2=get_reg(i_regs->regmap,rs2[i]);
5009 signed char hi=get_reg(i_regs->regmap,HIREG);
5010 signed char lo=get_reg(i_regs->regmap,LOREG);
5011 assert(m1>=0);
5012 assert(m2>=0);
5013 assert(hi>=0);
5014 assert(lo>=0);
5015 emit_umull(m1,m2,hi,lo);
5016 }
5017 if(opcode2[i]==0x1A) // DIV
5018 {
5019 signed char d1=get_reg(i_regs->regmap,rs1[i]);
5020 signed char d2=get_reg(i_regs->regmap,rs2[i]);
5021 assert(d1>=0);
5022 assert(d2>=0);
5023 signed char quotient=get_reg(i_regs->regmap,LOREG);
5024 signed char remainder=get_reg(i_regs->regmap,HIREG);
5025 assert(quotient>=0);
5026 assert(remainder>=0);
5027 emit_movs(d1,remainder);
5028 emit_movimm(0xffffffff,quotient);
5029 emit_negmi(quotient,quotient); // .. quotient and ..
5030 emit_negmi(remainder,remainder); // .. remainder for div0 case (will be negated back after jump)
5031 emit_movs(d2,HOST_TEMPREG);
5032 emit_jeq((int)out+52); // Division by zero
5033 emit_negmi(HOST_TEMPREG,HOST_TEMPREG);
5034 emit_clz(HOST_TEMPREG,quotient);
5035 emit_shl(HOST_TEMPREG,quotient,HOST_TEMPREG);
5036 emit_orimm(quotient,1<<31,quotient);
5037 emit_shr(quotient,quotient,quotient);
5038 emit_cmp(remainder,HOST_TEMPREG);
5039 emit_subcs(remainder,HOST_TEMPREG,remainder);
5040 emit_adcs(quotient,quotient,quotient);
5041 emit_shrimm(HOST_TEMPREG,1,HOST_TEMPREG);
5042 emit_jcc((int)out-16); // -4
5043 emit_teq(d1,d2);
5044 emit_negmi(quotient,quotient);
5045 emit_test(d1,d1);
5046 emit_negmi(remainder,remainder);
5047 }
5048 if(opcode2[i]==0x1B) // DIVU
5049 {
5050 signed char d1=get_reg(i_regs->regmap,rs1[i]); // dividend
5051 signed char d2=get_reg(i_regs->regmap,rs2[i]); // divisor
5052 assert(d1>=0);
5053 assert(d2>=0);
5054 signed char quotient=get_reg(i_regs->regmap,LOREG);
5055 signed char remainder=get_reg(i_regs->regmap,HIREG);
5056 assert(quotient>=0);
5057 assert(remainder>=0);
5058 emit_mov(d1,remainder);
5059 emit_movimm(0xffffffff,quotient); // div0 case
5060 emit_test(d2,d2);
5061 emit_jeq((int)out+40); // Division by zero
5062 emit_clz(d2,HOST_TEMPREG);
5063 emit_movimm(1<<31,quotient);
5064 emit_shl(d2,HOST_TEMPREG,d2);
5065 emit_shr(quotient,HOST_TEMPREG,quotient);
5066 emit_cmp(remainder,d2);
5067 emit_subcs(remainder,d2,remainder);
5068 emit_adcs(quotient,quotient,quotient);
5069 emit_shrcc_imm(d2,1,d2);
5070 emit_jcc((int)out-16); // -4
5071 }
5072 }
5073 else // 64-bit
5074#ifndef FORCE32
5075 {
5076 if(opcode2[i]==0x1C) // DMULT
5077 {
5078 assert(opcode2[i]!=0x1C);
5079 signed char m1h=get_reg(i_regs->regmap,rs1[i]|64);
5080 signed char m1l=get_reg(i_regs->regmap,rs1[i]);
5081 signed char m2h=get_reg(i_regs->regmap,rs2[i]|64);
5082 signed char m2l=get_reg(i_regs->regmap,rs2[i]);
5083 assert(m1h>=0);
5084 assert(m2h>=0);
5085 assert(m1l>=0);
5086 assert(m2l>=0);
5087 emit_pushreg(m2h);
5088 emit_pushreg(m2l);
5089 emit_pushreg(m1h);
5090 emit_pushreg(m1l);
5091 emit_call((int)&mult64);
5092 emit_popreg(m1l);
5093 emit_popreg(m1h);
5094 emit_popreg(m2l);
5095 emit_popreg(m2h);
5096 signed char hih=get_reg(i_regs->regmap,HIREG|64);
5097 signed char hil=get_reg(i_regs->regmap,HIREG);
5098 if(hih>=0) emit_loadreg(HIREG|64,hih);
5099 if(hil>=0) emit_loadreg(HIREG,hil);
5100 signed char loh=get_reg(i_regs->regmap,LOREG|64);
5101 signed char lol=get_reg(i_regs->regmap,LOREG);
5102 if(loh>=0) emit_loadreg(LOREG|64,loh);
5103 if(lol>=0) emit_loadreg(LOREG,lol);
5104 }
5105 if(opcode2[i]==0x1D) // DMULTU
5106 {
5107 signed char m1h=get_reg(i_regs->regmap,rs1[i]|64);
5108 signed char m1l=get_reg(i_regs->regmap,rs1[i]);
5109 signed char m2h=get_reg(i_regs->regmap,rs2[i]|64);
5110 signed char m2l=get_reg(i_regs->regmap,rs2[i]);
5111 assert(m1h>=0);
5112 assert(m2h>=0);
5113 assert(m1l>=0);
5114 assert(m2l>=0);
5115 save_regs(0x100f);
5116 if(m1l!=0) emit_mov(m1l,0);
5117 if(m1h==0) emit_readword((int)&dynarec_local,1);
5118 else if(m1h>1) emit_mov(m1h,1);
5119 if(m2l<2) emit_readword((int)&dynarec_local+m2l*4,2);
5120 else if(m2l>2) emit_mov(m2l,2);
5121 if(m2h<3) emit_readword((int)&dynarec_local+m2h*4,3);
5122 else if(m2h>3) emit_mov(m2h,3);
5123 emit_call((int)&multu64);
5124 restore_regs(0x100f);
5125 signed char hih=get_reg(i_regs->regmap,HIREG|64);
5126 signed char hil=get_reg(i_regs->regmap,HIREG);
5127 signed char loh=get_reg(i_regs->regmap,LOREG|64);
5128 signed char lol=get_reg(i_regs->regmap,LOREG);
5129 /*signed char temp=get_reg(i_regs->regmap,-1);
5130 signed char rh=get_reg(i_regs->regmap,HIREG|64);
5131 signed char rl=get_reg(i_regs->regmap,HIREG);
5132 assert(m1h>=0);
5133 assert(m2h>=0);
5134 assert(m1l>=0);
5135 assert(m2l>=0);
5136 assert(temp>=0);
5137 //emit_mov(m1l,EAX);
5138 //emit_mul(m2l);
5139 emit_umull(rl,rh,m1l,m2l);
5140 emit_storereg(LOREG,rl);
5141 emit_mov(rh,temp);
5142 //emit_mov(m1h,EAX);
5143 //emit_mul(m2l);
5144 emit_umull(rl,rh,m1h,m2l);
5145 emit_adds(rl,temp,temp);
5146 emit_adcimm(rh,0,rh);
5147 emit_storereg(HIREG,rh);
5148 //emit_mov(m2h,EAX);
5149 //emit_mul(m1l);
5150 emit_umull(rl,rh,m1l,m2h);
5151 emit_adds(rl,temp,temp);
5152 emit_adcimm(rh,0,rh);
5153 emit_storereg(LOREG|64,temp);
5154 emit_mov(rh,temp);
5155 //emit_mov(m2h,EAX);
5156 //emit_mul(m1h);
5157 emit_umull(rl,rh,m1h,m2h);
5158 emit_adds(rl,temp,rl);
5159 emit_loadreg(HIREG,temp);
5160 emit_adcimm(rh,0,rh);
5161 emit_adds(rl,temp,rl);
5162 emit_adcimm(rh,0,rh);
5163 // DEBUG
5164 /*
5165 emit_pushreg(m2h);
5166 emit_pushreg(m2l);
5167 emit_pushreg(m1h);
5168 emit_pushreg(m1l);
5169 emit_call((int)&multu64);
5170 emit_popreg(m1l);
5171 emit_popreg(m1h);
5172 emit_popreg(m2l);
5173 emit_popreg(m2h);
5174 signed char hih=get_reg(i_regs->regmap,HIREG|64);
5175 signed char hil=get_reg(i_regs->regmap,HIREG);
5176 if(hih>=0) emit_loadreg(HIREG|64,hih); // DEBUG
5177 if(hil>=0) emit_loadreg(HIREG,hil); // DEBUG
5178 */
5179 // Shouldn't be necessary
5180 //char loh=get_reg(i_regs->regmap,LOREG|64);
5181 //char lol=get_reg(i_regs->regmap,LOREG);
5182 //if(loh>=0) emit_loadreg(LOREG|64,loh);
5183 //if(lol>=0) emit_loadreg(LOREG,lol);
5184 }
5185 if(opcode2[i]==0x1E) // DDIV
5186 {
5187 signed char d1h=get_reg(i_regs->regmap,rs1[i]|64);
5188 signed char d1l=get_reg(i_regs->regmap,rs1[i]);
5189 signed char d2h=get_reg(i_regs->regmap,rs2[i]|64);
5190 signed char d2l=get_reg(i_regs->regmap,rs2[i]);
5191 assert(d1h>=0);
5192 assert(d2h>=0);
5193 assert(d1l>=0);
5194 assert(d2l>=0);
5195 save_regs(0x100f);
5196 if(d1l!=0) emit_mov(d1l,0);
5197 if(d1h==0) emit_readword((int)&dynarec_local,1);
5198 else if(d1h>1) emit_mov(d1h,1);
5199 if(d2l<2) emit_readword((int)&dynarec_local+d2l*4,2);
5200 else if(d2l>2) emit_mov(d2l,2);
5201 if(d2h<3) emit_readword((int)&dynarec_local+d2h*4,3);
5202 else if(d2h>3) emit_mov(d2h,3);
5203 emit_call((int)&div64);
5204 restore_regs(0x100f);
5205 signed char hih=get_reg(i_regs->regmap,HIREG|64);
5206 signed char hil=get_reg(i_regs->regmap,HIREG);
5207 signed char loh=get_reg(i_regs->regmap,LOREG|64);
5208 signed char lol=get_reg(i_regs->regmap,LOREG);
5209 if(hih>=0) emit_loadreg(HIREG|64,hih);
5210 if(hil>=0) emit_loadreg(HIREG,hil);
5211 if(loh>=0) emit_loadreg(LOREG|64,loh);
5212 if(lol>=0) emit_loadreg(LOREG,lol);
5213 }
5214 if(opcode2[i]==0x1F) // DDIVU
5215 {
5216 //u_int hr,reglist=0;
5217 //for(hr=0;hr<HOST_REGS;hr++) {
5218 // if(i_regs->regmap[hr]>=0 && (i_regs->regmap[hr]&62)!=HIREG) reglist|=1<<hr;
5219 //}
5220 signed char d1h=get_reg(i_regs->regmap,rs1[i]|64);
5221 signed char d1l=get_reg(i_regs->regmap,rs1[i]);
5222 signed char d2h=get_reg(i_regs->regmap,rs2[i]|64);
5223 signed char d2l=get_reg(i_regs->regmap,rs2[i]);
5224 assert(d1h>=0);
5225 assert(d2h>=0);
5226 assert(d1l>=0);
5227 assert(d2l>=0);
5228 save_regs(0x100f);
5229 if(d1l!=0) emit_mov(d1l,0);
5230 if(d1h==0) emit_readword((int)&dynarec_local,1);
5231 else if(d1h>1) emit_mov(d1h,1);
5232 if(d2l<2) emit_readword((int)&dynarec_local+d2l*4,2);
5233 else if(d2l>2) emit_mov(d2l,2);
5234 if(d2h<3) emit_readword((int)&dynarec_local+d2h*4,3);
5235 else if(d2h>3) emit_mov(d2h,3);
5236 emit_call((int)&divu64);
5237 restore_regs(0x100f);
5238 signed char hih=get_reg(i_regs->regmap,HIREG|64);
5239 signed char hil=get_reg(i_regs->regmap,HIREG);
5240 signed char loh=get_reg(i_regs->regmap,LOREG|64);
5241 signed char lol=get_reg(i_regs->regmap,LOREG);
5242 if(hih>=0) emit_loadreg(HIREG|64,hih);
5243 if(hil>=0) emit_loadreg(HIREG,hil);
5244 if(loh>=0) emit_loadreg(LOREG|64,loh);
5245 if(lol>=0) emit_loadreg(LOREG,lol);
5246 }
5247 }
5248#else
5249 assert(0);
5250#endif
5251 }
5252 else
5253 {
5254 // Multiply by zero is zero.
5255 // MIPS does not have a divide by zero exception.
5256 // The result is undefined, we return zero.
5257 signed char hr=get_reg(i_regs->regmap,HIREG);
5258 signed char lr=get_reg(i_regs->regmap,LOREG);
5259 if(hr>=0) emit_zeroreg(hr);
5260 if(lr>=0) emit_zeroreg(lr);
5261 }
5262}
5263#define multdiv_assemble multdiv_assemble_arm
5264
5265void do_preload_rhash(int r) {
5266 // Don't need this for ARM. On x86, this puts the value 0xf8 into the
5267 // register. On ARM the hash can be done with a single instruction (below)
5268}
5269
5270void do_preload_rhtbl(int ht) {
5271 emit_addimm(FP,(int)&mini_ht-(int)&dynarec_local,ht);
5272}
5273
5274void do_rhash(int rs,int rh) {
5275 emit_andimm(rs,0xf8,rh);
5276}
5277
5278void do_miniht_load(int ht,int rh) {
5279 assem_debug("ldr %s,[%s,%s]!\n",regname[rh],regname[ht],regname[rh]);
5280 output_w32(0xe7b00000|rd_rn_rm(rh,ht,rh));
5281}
5282
5283void do_miniht_jump(int rs,int rh,int ht) {
5284 emit_cmp(rh,rs);
5285 emit_ldreq_indexed(ht,4,15);
5286 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
5287 emit_mov(rs,7);
5288 emit_jmp(jump_vaddr_reg[7]);
5289 #else
5290 emit_jmp(jump_vaddr_reg[rs]);
5291 #endif
5292}
5293
5294void do_miniht_insert(u_int return_address,int rt,int temp) {
5295 #ifdef ARMv5_ONLY
5296 emit_movimm(return_address,rt); // PC into link register
5297 add_to_linker((int)out,return_address,1);
5298 emit_pcreladdr(temp);
5299 emit_writeword(rt,(int)&mini_ht[(return_address&0xFF)>>3][0]);
5300 emit_writeword(temp,(int)&mini_ht[(return_address&0xFF)>>3][1]);
5301 #else
5302 emit_movw(return_address&0x0000FFFF,rt);
5303 add_to_linker((int)out,return_address,1);
5304 emit_pcreladdr(temp);
5305 emit_writeword(temp,(int)&mini_ht[(return_address&0xFF)>>3][1]);
5306 emit_movt(return_address&0xFFFF0000,rt);
5307 emit_writeword(rt,(int)&mini_ht[(return_address&0xFF)>>3][0]);
5308 #endif
5309}
5310
5311// Sign-extend to 64 bits and write out upper half of a register
5312// This is useful where we have a 32-bit value in a register, and want to
5313// keep it in a 32-bit register, but can't guarantee that it won't be read
5314// as a 64-bit value later.
5315void wb_sx(signed char pre[],signed char entry[],uint64_t dirty,uint64_t is32_pre,uint64_t is32,uint64_t u,uint64_t uu)
5316{
5317#ifndef FORCE32
5318 if(is32_pre==is32) return;
5319 int hr,reg;
5320 for(hr=0;hr<HOST_REGS;hr++) {
5321 if(hr!=EXCLUDE_REG) {
5322 //if(pre[hr]==entry[hr]) {
5323 if((reg=pre[hr])>=0) {
5324 if((dirty>>hr)&1) {
5325 if( ((is32_pre&~is32&~uu)>>reg)&1 ) {
5326 emit_sarimm(hr,31,HOST_TEMPREG);
5327 emit_storereg(reg|64,HOST_TEMPREG);
5328 }
5329 }
5330 }
5331 //}
5332 }
5333 }
5334#endif
5335}
5336
5337void wb_valid(signed char pre[],signed char entry[],u_int dirty_pre,u_int dirty,uint64_t is32_pre,uint64_t u,uint64_t uu)
5338{
5339 //if(dirty_pre==dirty) return;
5340 int hr,reg,new_hr;
5341 for(hr=0;hr<HOST_REGS;hr++) {
5342 if(hr!=EXCLUDE_REG) {
5343 reg=pre[hr];
5344 if(((~u)>>(reg&63))&1) {
5345 if(reg>0) {
5346 if(((dirty_pre&~dirty)>>hr)&1) {
5347 if(reg>0&&reg<34) {
5348 emit_storereg(reg,hr);
5349 if( ((is32_pre&~uu)>>reg)&1 ) {
5350 emit_sarimm(hr,31,HOST_TEMPREG);
5351 emit_storereg(reg|64,HOST_TEMPREG);
5352 }
5353 }
5354 else if(reg>=64) {
5355 emit_storereg(reg,hr);
5356 }
5357 }
5358 }
5359 }
5360 }
5361 }
5362}
5363
5364
5365/* using strd could possibly help but you'd have to allocate registers in pairs
5366void wb_invalidate_arm(signed char pre[],signed char entry[],uint64_t dirty,uint64_t is32,uint64_t u,uint64_t uu)
5367{
5368 int hr;
5369 int wrote=-1;
5370 for(hr=HOST_REGS-1;hr>=0;hr--) {
5371 if(hr!=EXCLUDE_REG) {
5372 if(pre[hr]!=entry[hr]) {
5373 if(pre[hr]>=0) {
5374 if((dirty>>hr)&1) {
5375 if(get_reg(entry,pre[hr])<0) {
5376 if(pre[hr]<64) {
5377 if(!((u>>pre[hr])&1)) {
5378 if(hr<10&&(~hr&1)&&(pre[hr+1]<0||wrote==hr+1)) {
5379 if( ((is32>>pre[hr])&1) && !((uu>>pre[hr])&1) ) {
5380 emit_sarimm(hr,31,hr+1);
5381 emit_strdreg(pre[hr],hr);
5382 }
5383 else
5384 emit_storereg(pre[hr],hr);
5385 }else{
5386 emit_storereg(pre[hr],hr);
5387 if( ((is32>>pre[hr])&1) && !((uu>>pre[hr])&1) ) {
5388 emit_sarimm(hr,31,hr);
5389 emit_storereg(pre[hr]|64,hr);
5390 }
5391 }
5392 }
5393 }else{
5394 if(!((uu>>(pre[hr]&63))&1) && !((is32>>(pre[hr]&63))&1)) {
5395 emit_storereg(pre[hr],hr);
5396 }
5397 }
5398 wrote=hr;
5399 }
5400 }
5401 }
5402 }
5403 }
5404 }
5405 for(hr=0;hr<HOST_REGS;hr++) {
5406 if(hr!=EXCLUDE_REG) {
5407 if(pre[hr]!=entry[hr]) {
5408 if(pre[hr]>=0) {
5409 int nr;
5410 if((nr=get_reg(entry,pre[hr]))>=0) {
5411 emit_mov(hr,nr);
5412 }
5413 }
5414 }
5415 }
5416 }
5417}
5418#define wb_invalidate wb_invalidate_arm
5419*/
5420
5421// Clearing the cache is rather slow on ARM Linux, so mark the areas
5422// that need to be cleared, and then only clear these areas once.
5423void do_clear_cache()
5424{
5425 int i,j;
5426 for (i=0;i<(1<<(TARGET_SIZE_2-17));i++)
5427 {
5428 u_int bitmap=needs_clear_cache[i];
5429 if(bitmap) {
5430 u_int start,end;
5431 for(j=0;j<32;j++)
5432 {
5433 if(bitmap&(1<<j)) {
5434 start=BASE_ADDR+i*131072+j*4096;
5435 end=start+4095;
5436 j++;
5437 while(j<32) {
5438 if(bitmap&(1<<j)) {
5439 end+=4096;
5440 j++;
5441 }else{
5442 __clear_cache((void *)start,(void *)end);
5443 break;
5444 }
5445 }
5446 }
5447 }
5448 needs_clear_cache[i]=0;
5449 }
5450 }
5451}
5452
5453// CPU-architecture-specific initialization
5454void arch_init() {
5455#ifndef DISABLE_COP1
5456 rounding_modes[0]=0x0<<22; // round
5457 rounding_modes[1]=0x3<<22; // trunc
5458 rounding_modes[2]=0x1<<22; // ceil
5459 rounding_modes[3]=0x2<<22; // floor
5460#endif
5461}
5462
5463// vim:shiftwidth=2:expandtab