drc: try even more to not compile code as 64bit
[pcsx_rearmed.git] / libpcsxcore / new_dynarec / assem_arm.c
CommitLineData
57871462 1/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2 * Mupen64plus - assem_arm.c *
3 * Copyright (C) 2009-2010 Ari64 *
4 * *
5 * This program is free software; you can redistribute it and/or modify *
6 * it under the terms of the GNU General Public License as published by *
7 * the Free Software Foundation; either version 2 of the License, or *
8 * (at your option) any later version. *
9 * *
10 * This program is distributed in the hope that it will be useful, *
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
13 * GNU General Public License for more details. *
14 * *
15 * You should have received a copy of the GNU General Public License *
16 * along with this program; if not, write to the *
17 * Free Software Foundation, Inc., *
18 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
19 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
20
21extern int cycle_count;
22extern int last_count;
23extern int pcaddr;
24extern int pending_exception;
25extern int branch_target;
26extern uint64_t readmem_dword;
3d624f89 27#ifdef MUPEN64
57871462 28extern precomp_instr fake_pc;
3d624f89 29#endif
57871462 30extern void *dynarec_local;
31extern u_int memory_map[1048576];
32extern u_int mini_ht[32][2];
33extern u_int rounding_modes[4];
34
35void indirect_jump_indexed();
36void indirect_jump();
37void do_interrupt();
38void jump_vaddr_r0();
39void jump_vaddr_r1();
40void jump_vaddr_r2();
41void jump_vaddr_r3();
42void jump_vaddr_r4();
43void jump_vaddr_r5();
44void jump_vaddr_r6();
45void jump_vaddr_r7();
46void jump_vaddr_r8();
47void jump_vaddr_r9();
48void jump_vaddr_r10();
49void jump_vaddr_r12();
50
51const u_int jump_vaddr_reg[16] = {
52 (int)jump_vaddr_r0,
53 (int)jump_vaddr_r1,
54 (int)jump_vaddr_r2,
55 (int)jump_vaddr_r3,
56 (int)jump_vaddr_r4,
57 (int)jump_vaddr_r5,
58 (int)jump_vaddr_r6,
59 (int)jump_vaddr_r7,
60 (int)jump_vaddr_r8,
61 (int)jump_vaddr_r9,
62 (int)jump_vaddr_r10,
63 0,
64 (int)jump_vaddr_r12,
65 0,
66 0,
67 0};
68
0bbd1454 69void invalidate_addr_r0();
70void invalidate_addr_r1();
71void invalidate_addr_r2();
72void invalidate_addr_r3();
73void invalidate_addr_r4();
74void invalidate_addr_r5();
75void invalidate_addr_r6();
76void invalidate_addr_r7();
77void invalidate_addr_r8();
78void invalidate_addr_r9();
79void invalidate_addr_r10();
80void invalidate_addr_r12();
81
82const u_int invalidate_addr_reg[16] = {
83 (int)invalidate_addr_r0,
84 (int)invalidate_addr_r1,
85 (int)invalidate_addr_r2,
86 (int)invalidate_addr_r3,
87 (int)invalidate_addr_r4,
88 (int)invalidate_addr_r5,
89 (int)invalidate_addr_r6,
90 (int)invalidate_addr_r7,
91 (int)invalidate_addr_r8,
92 (int)invalidate_addr_r9,
93 (int)invalidate_addr_r10,
94 0,
95 (int)invalidate_addr_r12,
96 0,
97 0,
98 0};
99
57871462 100#include "fpu.h"
101
dd3a91a1 102unsigned int needs_clear_cache[1<<(TARGET_SIZE_2-17)];
103
57871462 104/* Linker */
105
106void set_jump_target(int addr,u_int target)
107{
108 u_char *ptr=(u_char *)addr;
109 u_int *ptr2=(u_int *)ptr;
110 if(ptr[3]==0xe2) {
111 assert((target-(u_int)ptr2-8)<1024);
112 assert((addr&3)==0);
113 assert((target&3)==0);
114 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
115 //printf("target=%x addr=%x insn=%x\n",target,addr,*ptr2);
116 }
117 else if(ptr[3]==0x72) {
118 // generated by emit_jno_unlikely
119 if((target-(u_int)ptr2-8)<1024) {
120 assert((addr&3)==0);
121 assert((target&3)==0);
122 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
123 }
124 else if((target-(u_int)ptr2-8)<4096&&!((target-(u_int)ptr2-8)&15)) {
125 assert((addr&3)==0);
126 assert((target&3)==0);
127 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>4)|0xE00;
128 }
129 else *ptr2=(0x7A000000)|(((target-(u_int)ptr2-8)<<6)>>8);
130 }
131 else {
132 assert((ptr[3]&0x0e)==0xa);
133 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
134 }
135}
136
137// This optionally copies the instruction from the target of the branch into
138// the space before the branch. Works, but the difference in speed is
139// usually insignificant.
140void set_jump_target_fillslot(int addr,u_int target,int copy)
141{
142 u_char *ptr=(u_char *)addr;
143 u_int *ptr2=(u_int *)ptr;
144 assert(!copy||ptr2[-1]==0xe28dd000);
145 if(ptr[3]==0xe2) {
146 assert(!copy);
147 assert((target-(u_int)ptr2-8)<4096);
148 *ptr2=(*ptr2&0xFFFFF000)|(target-(u_int)ptr2-8);
149 }
150 else {
151 assert((ptr[3]&0x0e)==0xa);
152 u_int target_insn=*(u_int *)target;
153 if((target_insn&0x0e100000)==0) { // ALU, no immediate, no flags
154 copy=0;
155 }
156 if((target_insn&0x0c100000)==0x04100000) { // Load
157 copy=0;
158 }
159 if(target_insn&0x08000000) {
160 copy=0;
161 }
162 if(copy) {
163 ptr2[-1]=target_insn;
164 target+=4;
165 }
166 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
167 }
168}
169
170/* Literal pool */
171add_literal(int addr,int val)
172{
173 literals[literalcount][0]=addr;
174 literals[literalcount][1]=val;
175 literalcount++;
176}
177
f76eeef9 178void *kill_pointer(void *stub)
57871462 179{
180 int *ptr=(int *)(stub+4);
181 assert((*ptr&0x0ff00000)==0x05900000);
182 u_int offset=*ptr&0xfff;
183 int **l_ptr=(void *)ptr+offset+8;
184 int *i_ptr=*l_ptr;
185 set_jump_target((int)i_ptr,(int)stub);
f76eeef9 186 return i_ptr;
57871462 187}
188
189int get_pointer(void *stub)
190{
191 //printf("get_pointer(%x)\n",(int)stub);
192 int *ptr=(int *)(stub+4);
193 assert((*ptr&0x0ff00000)==0x05900000);
194 u_int offset=*ptr&0xfff;
195 int **l_ptr=(void *)ptr+offset+8;
196 int *i_ptr=*l_ptr;
197 assert((*i_ptr&0x0f000000)==0x0a000000);
198 return (int)i_ptr+((*i_ptr<<8)>>6)+8;
199}
200
201// Find the "clean" entry point from a "dirty" entry point
202// by skipping past the call to verify_code
203u_int get_clean_addr(int addr)
204{
205 int *ptr=(int *)addr;
206 #ifdef ARMv5_ONLY
207 ptr+=4;
208 #else
209 ptr+=6;
210 #endif
211 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
212 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
213 ptr++;
214 if((*ptr&0xFF000000)==0xea000000) {
215 return (int)ptr+((*ptr<<8)>>6)+8; // follow jump
216 }
217 return (u_int)ptr;
218}
219
220int verify_dirty(int addr)
221{
222 u_int *ptr=(u_int *)addr;
223 #ifdef ARMv5_ONLY
224 // get from literal pool
225 assert((*ptr&0xFFF00000)==0xe5900000);
226 u_int offset=*ptr&0xfff;
227 u_int *l_ptr=(void *)ptr+offset+8;
228 u_int source=l_ptr[0];
229 u_int copy=l_ptr[1];
230 u_int len=l_ptr[2];
231 ptr+=4;
232 #else
233 // ARMv7 movw/movt
234 assert((*ptr&0xFFF00000)==0xe3000000);
235 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
236 u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
237 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
238 ptr+=6;
239 #endif
240 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
241 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
cfcba99a 242 u_int verifier=(int)ptr+((signed int)(*ptr<<8)>>6)+8; // get target of bl
57871462 243 if(verifier==(u_int)verify_code_vm||verifier==(u_int)verify_code_ds) {
244 unsigned int page=source>>12;
245 unsigned int map_value=memory_map[page];
246 if(map_value>=0x80000000) return 0;
247 while(page<((source+len-1)>>12)) {
248 if((memory_map[++page]<<2)!=(map_value<<2)) return 0;
249 }
250 source = source+(map_value<<2);
251 }
252 //printf("verify_dirty: %x %x %x\n",source,copy,len);
253 return !memcmp((void *)source,(void *)copy,len);
254}
255
256// This doesn't necessarily find all clean entry points, just
257// guarantees that it's not dirty
258int isclean(int addr)
259{
260 #ifdef ARMv5_ONLY
261 int *ptr=((u_int *)addr)+4;
262 #else
263 int *ptr=((u_int *)addr)+6;
264 #endif
265 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
266 if((*ptr&0xFF000000)!=0xeb000000) return 1; // bl instruction
267 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code) return 0;
268 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_vm) return 0;
269 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_ds) return 0;
270 return 1;
271}
272
273void get_bounds(int addr,u_int *start,u_int *end)
274{
275 u_int *ptr=(u_int *)addr;
276 #ifdef ARMv5_ONLY
277 // get from literal pool
278 assert((*ptr&0xFFF00000)==0xe5900000);
279 u_int offset=*ptr&0xfff;
280 u_int *l_ptr=(void *)ptr+offset+8;
281 u_int source=l_ptr[0];
282 //u_int copy=l_ptr[1];
283 u_int len=l_ptr[2];
284 ptr+=4;
285 #else
286 // ARMv7 movw/movt
287 assert((*ptr&0xFFF00000)==0xe3000000);
288 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
289 //u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
290 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
291 ptr+=6;
292 #endif
293 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
294 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
cfcba99a 295 u_int verifier=(int)ptr+((signed int)(*ptr<<8)>>6)+8; // get target of bl
57871462 296 if(verifier==(u_int)verify_code_vm||verifier==(u_int)verify_code_ds) {
297 if(memory_map[source>>12]>=0x80000000) source = 0;
298 else source = source+(memory_map[source>>12]<<2);
299 }
300 *start=source;
301 *end=source+len;
302}
303
304/* Register allocation */
305
306// Note: registers are allocated clean (unmodified state)
307// if you intend to modify the register, you must call dirty_reg().
308void alloc_reg(struct regstat *cur,int i,signed char reg)
309{
310 int r,hr;
311 int preferred_reg = (reg&7);
312 if(reg==CCREG) preferred_reg=HOST_CCREG;
313 if(reg==PTEMP||reg==FTEMP) preferred_reg=12;
314
315 // Don't allocate unused registers
316 if((cur->u>>reg)&1) return;
317
318 // see if it's already allocated
319 for(hr=0;hr<HOST_REGS;hr++)
320 {
321 if(cur->regmap[hr]==reg) return;
322 }
323
324 // Keep the same mapping if the register was already allocated in a loop
325 preferred_reg = loop_reg(i,reg,preferred_reg);
326
327 // Try to allocate the preferred register
328 if(cur->regmap[preferred_reg]==-1) {
329 cur->regmap[preferred_reg]=reg;
330 cur->dirty&=~(1<<preferred_reg);
331 cur->isconst&=~(1<<preferred_reg);
332 return;
333 }
334 r=cur->regmap[preferred_reg];
335 if(r<64&&((cur->u>>r)&1)) {
336 cur->regmap[preferred_reg]=reg;
337 cur->dirty&=~(1<<preferred_reg);
338 cur->isconst&=~(1<<preferred_reg);
339 return;
340 }
341 if(r>=64&&((cur->uu>>(r&63))&1)) {
342 cur->regmap[preferred_reg]=reg;
343 cur->dirty&=~(1<<preferred_reg);
344 cur->isconst&=~(1<<preferred_reg);
345 return;
346 }
347
348 // Clear any unneeded registers
349 // We try to keep the mapping consistent, if possible, because it
350 // makes branches easier (especially loops). So we try to allocate
351 // first (see above) before removing old mappings. If this is not
352 // possible then go ahead and clear out the registers that are no
353 // longer needed.
354 for(hr=0;hr<HOST_REGS;hr++)
355 {
356 r=cur->regmap[hr];
357 if(r>=0) {
358 if(r<64) {
359 if((cur->u>>r)&1) {cur->regmap[hr]=-1;break;}
360 }
361 else
362 {
363 if((cur->uu>>(r&63))&1) {cur->regmap[hr]=-1;break;}
364 }
365 }
366 }
367 // Try to allocate any available register, but prefer
368 // registers that have not been used recently.
369 if(i>0) {
370 for(hr=0;hr<HOST_REGS;hr++) {
371 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
372 if(regs[i-1].regmap[hr]!=rs1[i-1]&&regs[i-1].regmap[hr]!=rs2[i-1]&&regs[i-1].regmap[hr]!=rt1[i-1]&&regs[i-1].regmap[hr]!=rt2[i-1]) {
373 cur->regmap[hr]=reg;
374 cur->dirty&=~(1<<hr);
375 cur->isconst&=~(1<<hr);
376 return;
377 }
378 }
379 }
380 }
381 // Try to allocate any available register
382 for(hr=0;hr<HOST_REGS;hr++) {
383 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
384 cur->regmap[hr]=reg;
385 cur->dirty&=~(1<<hr);
386 cur->isconst&=~(1<<hr);
387 return;
388 }
389 }
390
391 // Ok, now we have to evict someone
392 // Pick a register we hopefully won't need soon
393 u_char hsn[MAXREG+1];
394 memset(hsn,10,sizeof(hsn));
395 int j;
396 lsn(hsn,i,&preferred_reg);
397 //printf("eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",cur->regmap[0],cur->regmap[1],cur->regmap[2],cur->regmap[3],cur->regmap[5],cur->regmap[6],cur->regmap[7]);
398 //printf("hsn(%x): %d %d %d %d %d %d %d\n",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
399 if(i>0) {
400 // Don't evict the cycle count at entry points, otherwise the entry
401 // stub will have to write it.
402 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
403 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
404 for(j=10;j>=3;j--)
405 {
406 // Alloc preferred register if available
407 if(hsn[r=cur->regmap[preferred_reg]&63]==j) {
408 for(hr=0;hr<HOST_REGS;hr++) {
409 // Evict both parts of a 64-bit register
410 if((cur->regmap[hr]&63)==r) {
411 cur->regmap[hr]=-1;
412 cur->dirty&=~(1<<hr);
413 cur->isconst&=~(1<<hr);
414 }
415 }
416 cur->regmap[preferred_reg]=reg;
417 return;
418 }
419 for(r=1;r<=MAXREG;r++)
420 {
421 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
422 for(hr=0;hr<HOST_REGS;hr++) {
423 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
424 if(cur->regmap[hr]==r+64) {
425 cur->regmap[hr]=reg;
426 cur->dirty&=~(1<<hr);
427 cur->isconst&=~(1<<hr);
428 return;
429 }
430 }
431 }
432 for(hr=0;hr<HOST_REGS;hr++) {
433 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
434 if(cur->regmap[hr]==r) {
435 cur->regmap[hr]=reg;
436 cur->dirty&=~(1<<hr);
437 cur->isconst&=~(1<<hr);
438 return;
439 }
440 }
441 }
442 }
443 }
444 }
445 }
446 for(j=10;j>=0;j--)
447 {
448 for(r=1;r<=MAXREG;r++)
449 {
450 if(hsn[r]==j) {
451 for(hr=0;hr<HOST_REGS;hr++) {
452 if(cur->regmap[hr]==r+64) {
453 cur->regmap[hr]=reg;
454 cur->dirty&=~(1<<hr);
455 cur->isconst&=~(1<<hr);
456 return;
457 }
458 }
459 for(hr=0;hr<HOST_REGS;hr++) {
460 if(cur->regmap[hr]==r) {
461 cur->regmap[hr]=reg;
462 cur->dirty&=~(1<<hr);
463 cur->isconst&=~(1<<hr);
464 return;
465 }
466 }
467 }
468 }
469 }
470 printf("This shouldn't happen (alloc_reg)");exit(1);
471}
472
473void alloc_reg64(struct regstat *cur,int i,signed char reg)
474{
475 int preferred_reg = 8+(reg&1);
476 int r,hr;
477
478 // allocate the lower 32 bits
479 alloc_reg(cur,i,reg);
480
481 // Don't allocate unused registers
482 if((cur->uu>>reg)&1) return;
483
484 // see if the upper half is already allocated
485 for(hr=0;hr<HOST_REGS;hr++)
486 {
487 if(cur->regmap[hr]==reg+64) return;
488 }
489
490 // Keep the same mapping if the register was already allocated in a loop
491 preferred_reg = loop_reg(i,reg,preferred_reg);
492
493 // Try to allocate the preferred register
494 if(cur->regmap[preferred_reg]==-1) {
495 cur->regmap[preferred_reg]=reg|64;
496 cur->dirty&=~(1<<preferred_reg);
497 cur->isconst&=~(1<<preferred_reg);
498 return;
499 }
500 r=cur->regmap[preferred_reg];
501 if(r<64&&((cur->u>>r)&1)) {
502 cur->regmap[preferred_reg]=reg|64;
503 cur->dirty&=~(1<<preferred_reg);
504 cur->isconst&=~(1<<preferred_reg);
505 return;
506 }
507 if(r>=64&&((cur->uu>>(r&63))&1)) {
508 cur->regmap[preferred_reg]=reg|64;
509 cur->dirty&=~(1<<preferred_reg);
510 cur->isconst&=~(1<<preferred_reg);
511 return;
512 }
513
514 // Clear any unneeded registers
515 // We try to keep the mapping consistent, if possible, because it
516 // makes branches easier (especially loops). So we try to allocate
517 // first (see above) before removing old mappings. If this is not
518 // possible then go ahead and clear out the registers that are no
519 // longer needed.
520 for(hr=HOST_REGS-1;hr>=0;hr--)
521 {
522 r=cur->regmap[hr];
523 if(r>=0) {
524 if(r<64) {
525 if((cur->u>>r)&1) {cur->regmap[hr]=-1;break;}
526 }
527 else
528 {
529 if((cur->uu>>(r&63))&1) {cur->regmap[hr]=-1;break;}
530 }
531 }
532 }
533 // Try to allocate any available register, but prefer
534 // registers that have not been used recently.
535 if(i>0) {
536 for(hr=0;hr<HOST_REGS;hr++) {
537 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
538 if(regs[i-1].regmap[hr]!=rs1[i-1]&&regs[i-1].regmap[hr]!=rs2[i-1]&&regs[i-1].regmap[hr]!=rt1[i-1]&&regs[i-1].regmap[hr]!=rt2[i-1]) {
539 cur->regmap[hr]=reg|64;
540 cur->dirty&=~(1<<hr);
541 cur->isconst&=~(1<<hr);
542 return;
543 }
544 }
545 }
546 }
547 // Try to allocate any available register
548 for(hr=0;hr<HOST_REGS;hr++) {
549 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
550 cur->regmap[hr]=reg|64;
551 cur->dirty&=~(1<<hr);
552 cur->isconst&=~(1<<hr);
553 return;
554 }
555 }
556
557 // Ok, now we have to evict someone
558 // Pick a register we hopefully won't need soon
559 u_char hsn[MAXREG+1];
560 memset(hsn,10,sizeof(hsn));
561 int j;
562 lsn(hsn,i,&preferred_reg);
563 //printf("eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",cur->regmap[0],cur->regmap[1],cur->regmap[2],cur->regmap[3],cur->regmap[5],cur->regmap[6],cur->regmap[7]);
564 //printf("hsn(%x): %d %d %d %d %d %d %d\n",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
565 if(i>0) {
566 // Don't evict the cycle count at entry points, otherwise the entry
567 // stub will have to write it.
568 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
569 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
570 for(j=10;j>=3;j--)
571 {
572 // Alloc preferred register if available
573 if(hsn[r=cur->regmap[preferred_reg]&63]==j) {
574 for(hr=0;hr<HOST_REGS;hr++) {
575 // Evict both parts of a 64-bit register
576 if((cur->regmap[hr]&63)==r) {
577 cur->regmap[hr]=-1;
578 cur->dirty&=~(1<<hr);
579 cur->isconst&=~(1<<hr);
580 }
581 }
582 cur->regmap[preferred_reg]=reg|64;
583 return;
584 }
585 for(r=1;r<=MAXREG;r++)
586 {
587 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
588 for(hr=0;hr<HOST_REGS;hr++) {
589 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
590 if(cur->regmap[hr]==r+64) {
591 cur->regmap[hr]=reg|64;
592 cur->dirty&=~(1<<hr);
593 cur->isconst&=~(1<<hr);
594 return;
595 }
596 }
597 }
598 for(hr=0;hr<HOST_REGS;hr++) {
599 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
600 if(cur->regmap[hr]==r) {
601 cur->regmap[hr]=reg|64;
602 cur->dirty&=~(1<<hr);
603 cur->isconst&=~(1<<hr);
604 return;
605 }
606 }
607 }
608 }
609 }
610 }
611 }
612 for(j=10;j>=0;j--)
613 {
614 for(r=1;r<=MAXREG;r++)
615 {
616 if(hsn[r]==j) {
617 for(hr=0;hr<HOST_REGS;hr++) {
618 if(cur->regmap[hr]==r+64) {
619 cur->regmap[hr]=reg|64;
620 cur->dirty&=~(1<<hr);
621 cur->isconst&=~(1<<hr);
622 return;
623 }
624 }
625 for(hr=0;hr<HOST_REGS;hr++) {
626 if(cur->regmap[hr]==r) {
627 cur->regmap[hr]=reg|64;
628 cur->dirty&=~(1<<hr);
629 cur->isconst&=~(1<<hr);
630 return;
631 }
632 }
633 }
634 }
635 }
636 printf("This shouldn't happen");exit(1);
637}
638
639// Allocate a temporary register. This is done without regard to
640// dirty status or whether the register we request is on the unneeded list
641// Note: This will only allocate one register, even if called multiple times
642void alloc_reg_temp(struct regstat *cur,int i,signed char reg)
643{
644 int r,hr;
645 int preferred_reg = -1;
646
647 // see if it's already allocated
648 for(hr=0;hr<HOST_REGS;hr++)
649 {
650 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==reg) return;
651 }
652
653 // Try to allocate any available register
654 for(hr=HOST_REGS-1;hr>=0;hr--) {
655 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
656 cur->regmap[hr]=reg;
657 cur->dirty&=~(1<<hr);
658 cur->isconst&=~(1<<hr);
659 return;
660 }
661 }
662
663 // Find an unneeded register
664 for(hr=HOST_REGS-1;hr>=0;hr--)
665 {
666 r=cur->regmap[hr];
667 if(r>=0) {
668 if(r<64) {
669 if((cur->u>>r)&1) {
670 if(i==0||((unneeded_reg[i-1]>>r)&1)) {
671 cur->regmap[hr]=reg;
672 cur->dirty&=~(1<<hr);
673 cur->isconst&=~(1<<hr);
674 return;
675 }
676 }
677 }
678 else
679 {
680 if((cur->uu>>(r&63))&1) {
681 if(i==0||((unneeded_reg_upper[i-1]>>(r&63))&1)) {
682 cur->regmap[hr]=reg;
683 cur->dirty&=~(1<<hr);
684 cur->isconst&=~(1<<hr);
685 return;
686 }
687 }
688 }
689 }
690 }
691
692 // Ok, now we have to evict someone
693 // Pick a register we hopefully won't need soon
694 // TODO: we might want to follow unconditional jumps here
695 // TODO: get rid of dupe code and make this into a function
696 u_char hsn[MAXREG+1];
697 memset(hsn,10,sizeof(hsn));
698 int j;
699 lsn(hsn,i,&preferred_reg);
700 //printf("hsn: %d %d %d %d %d %d %d\n",hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
701 if(i>0) {
702 // Don't evict the cycle count at entry points, otherwise the entry
703 // stub will have to write it.
704 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
705 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
706 for(j=10;j>=3;j--)
707 {
708 for(r=1;r<=MAXREG;r++)
709 {
710 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
711 for(hr=0;hr<HOST_REGS;hr++) {
712 if(hr!=HOST_CCREG||hsn[CCREG]>2) {
713 if(cur->regmap[hr]==r+64) {
714 cur->regmap[hr]=reg;
715 cur->dirty&=~(1<<hr);
716 cur->isconst&=~(1<<hr);
717 return;
718 }
719 }
720 }
721 for(hr=0;hr<HOST_REGS;hr++) {
722 if(hr!=HOST_CCREG||hsn[CCREG]>2) {
723 if(cur->regmap[hr]==r) {
724 cur->regmap[hr]=reg;
725 cur->dirty&=~(1<<hr);
726 cur->isconst&=~(1<<hr);
727 return;
728 }
729 }
730 }
731 }
732 }
733 }
734 }
735 for(j=10;j>=0;j--)
736 {
737 for(r=1;r<=MAXREG;r++)
738 {
739 if(hsn[r]==j) {
740 for(hr=0;hr<HOST_REGS;hr++) {
741 if(cur->regmap[hr]==r+64) {
742 cur->regmap[hr]=reg;
743 cur->dirty&=~(1<<hr);
744 cur->isconst&=~(1<<hr);
745 return;
746 }
747 }
748 for(hr=0;hr<HOST_REGS;hr++) {
749 if(cur->regmap[hr]==r) {
750 cur->regmap[hr]=reg;
751 cur->dirty&=~(1<<hr);
752 cur->isconst&=~(1<<hr);
753 return;
754 }
755 }
756 }
757 }
758 }
759 printf("This shouldn't happen");exit(1);
760}
761// Allocate a specific ARM register.
762void alloc_arm_reg(struct regstat *cur,int i,signed char reg,char hr)
763{
764 int n;
765
766 // see if it's already allocated (and dealloc it)
767 for(n=0;n<HOST_REGS;n++)
768 {
769 if(n!=EXCLUDE_REG&&cur->regmap[n]==reg) {cur->regmap[n]=-1;}
770 }
771
772 cur->regmap[hr]=reg;
773 cur->dirty&=~(1<<hr);
774 cur->isconst&=~(1<<hr);
775}
776
777// Alloc cycle count into dedicated register
778alloc_cc(struct regstat *cur,int i)
779{
780 alloc_arm_reg(cur,i,CCREG,HOST_CCREG);
781}
782
783/* Special alloc */
784
785
786/* Assembler */
787
788char regname[16][4] = {
789 "r0",
790 "r1",
791 "r2",
792 "r3",
793 "r4",
794 "r5",
795 "r6",
796 "r7",
797 "r8",
798 "r9",
799 "r10",
800 "fp",
801 "r12",
802 "sp",
803 "lr",
804 "pc"};
805
806void output_byte(u_char byte)
807{
808 *(out++)=byte;
809}
810void output_modrm(u_char mod,u_char rm,u_char ext)
811{
812 assert(mod<4);
813 assert(rm<8);
814 assert(ext<8);
815 u_char byte=(mod<<6)|(ext<<3)|rm;
816 *(out++)=byte;
817}
818void output_sib(u_char scale,u_char index,u_char base)
819{
820 assert(scale<4);
821 assert(index<8);
822 assert(base<8);
823 u_char byte=(scale<<6)|(index<<3)|base;
824 *(out++)=byte;
825}
826void output_w32(u_int word)
827{
828 *((u_int *)out)=word;
829 out+=4;
830}
831u_int rd_rn_rm(u_int rd, u_int rn, u_int rm)
832{
833 assert(rd<16);
834 assert(rn<16);
835 assert(rm<16);
836 return((rn<<16)|(rd<<12)|rm);
837}
838u_int rd_rn_imm_shift(u_int rd, u_int rn, u_int imm, u_int shift)
839{
840 assert(rd<16);
841 assert(rn<16);
842 assert(imm<256);
843 assert((shift&1)==0);
844 return((rn<<16)|(rd<<12)|(((32-shift)&30)<<7)|imm);
845}
846u_int genimm(u_int imm,u_int *encoded)
847{
848 if(imm==0) {*encoded=0;return 1;}
849 int i=32;
850 while(i>0)
851 {
852 if(imm<256) {
853 *encoded=((i&30)<<7)|imm;
854 return 1;
855 }
856 imm=(imm>>2)|(imm<<30);i-=2;
857 }
858 return 0;
859}
cfbd3c6e 860void genimm_checked(u_int imm,u_int *encoded)
861{
862 u_int ret=genimm(imm,encoded);
863 assert(ret);
864}
57871462 865u_int genjmp(u_int addr)
866{
867 int offset=addr-(int)out-8;
e80343e2 868 if(offset<-33554432||offset>=33554432) {
869 if (addr>2) {
870 printf("genjmp: out of range: %08x\n", offset);
871 exit(1);
872 }
873 return 0;
874 }
57871462 875 return ((u_int)offset>>2)&0xffffff;
876}
877
878void emit_mov(int rs,int rt)
879{
880 assem_debug("mov %s,%s\n",regname[rt],regname[rs]);
881 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs));
882}
883
884void emit_movs(int rs,int rt)
885{
886 assem_debug("movs %s,%s\n",regname[rt],regname[rs]);
887 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs));
888}
889
890void emit_add(int rs1,int rs2,int rt)
891{
892 assem_debug("add %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
893 output_w32(0xe0800000|rd_rn_rm(rt,rs1,rs2));
894}
895
896void emit_adds(int rs1,int rs2,int rt)
897{
898 assem_debug("adds %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
899 output_w32(0xe0900000|rd_rn_rm(rt,rs1,rs2));
900}
901
902void emit_adcs(int rs1,int rs2,int rt)
903{
904 assem_debug("adcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
905 output_w32(0xe0b00000|rd_rn_rm(rt,rs1,rs2));
906}
907
908void emit_sbc(int rs1,int rs2,int rt)
909{
910 assem_debug("sbc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
911 output_w32(0xe0c00000|rd_rn_rm(rt,rs1,rs2));
912}
913
914void emit_sbcs(int rs1,int rs2,int rt)
915{
916 assem_debug("sbcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
917 output_w32(0xe0d00000|rd_rn_rm(rt,rs1,rs2));
918}
919
920void emit_neg(int rs, int rt)
921{
922 assem_debug("rsb %s,%s,#0\n",regname[rt],regname[rs]);
923 output_w32(0xe2600000|rd_rn_rm(rt,rs,0));
924}
925
926void emit_negs(int rs, int rt)
927{
928 assem_debug("rsbs %s,%s,#0\n",regname[rt],regname[rs]);
929 output_w32(0xe2700000|rd_rn_rm(rt,rs,0));
930}
931
932void emit_sub(int rs1,int rs2,int rt)
933{
934 assem_debug("sub %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
935 output_w32(0xe0400000|rd_rn_rm(rt,rs1,rs2));
936}
937
938void emit_subs(int rs1,int rs2,int rt)
939{
940 assem_debug("subs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
941 output_w32(0xe0500000|rd_rn_rm(rt,rs1,rs2));
942}
943
944void emit_zeroreg(int rt)
945{
946 assem_debug("mov %s,#0\n",regname[rt]);
947 output_w32(0xe3a00000|rd_rn_rm(rt,0,0));
948}
949
790ee18e 950void emit_loadlp(u_int imm,u_int rt)
951{
952 add_literal((int)out,imm);
953 assem_debug("ldr %s,pc+? [=%x]\n",regname[rt],imm);
954 output_w32(0xe5900000|rd_rn_rm(rt,15,0));
955}
956void emit_movw(u_int imm,u_int rt)
957{
958 assert(imm<65536);
959 assem_debug("movw %s,#%d (0x%x)\n",regname[rt],imm,imm);
960 output_w32(0xe3000000|rd_rn_rm(rt,0,0)|(imm&0xfff)|((imm<<4)&0xf0000));
961}
962void emit_movt(u_int imm,u_int rt)
963{
964 assem_debug("movt %s,#%d (0x%x)\n",regname[rt],imm&0xffff0000,imm&0xffff0000);
965 output_w32(0xe3400000|rd_rn_rm(rt,0,0)|((imm>>16)&0xfff)|((imm>>12)&0xf0000));
966}
967void emit_movimm(u_int imm,u_int rt)
968{
969 u_int armval;
970 if(genimm(imm,&armval)) {
971 assem_debug("mov %s,#%d\n",regname[rt],imm);
972 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
973 }else if(genimm(~imm,&armval)) {
974 assem_debug("mvn %s,#%d\n",regname[rt],imm);
975 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
976 }else if(imm<65536) {
977 #ifdef ARMv5_ONLY
978 assem_debug("mov %s,#%d\n",regname[rt],imm&0xFF00);
979 output_w32(0xe3a00000|rd_rn_imm_shift(rt,0,imm>>8,8));
980 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
981 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
982 #else
983 emit_movw(imm,rt);
984 #endif
985 }else{
986 #ifdef ARMv5_ONLY
987 emit_loadlp(imm,rt);
988 #else
989 emit_movw(imm&0x0000FFFF,rt);
990 emit_movt(imm&0xFFFF0000,rt);
991 #endif
992 }
993}
994void emit_pcreladdr(u_int rt)
995{
996 assem_debug("add %s,pc,#?\n",regname[rt]);
997 output_w32(0xe2800000|rd_rn_rm(rt,15,0));
998}
999
57871462 1000void emit_loadreg(int r, int hr)
1001{
3d624f89 1002#ifdef FORCE32
1003 if(r&64) {
1004 printf("64bit load in 32bit mode!\n");
7f2607ea 1005 assert(0);
1006 return;
3d624f89 1007 }
1008#endif
57871462 1009 if((r&63)==0)
1010 emit_zeroreg(hr);
1011 else {
3d624f89 1012 int addr=((int)reg)+((r&63)<<REG_SHIFT)+((r&64)>>4);
57871462 1013 if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4);
1014 if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4);
1015 if(r==CCREG) addr=(int)&cycle_count;
1016 if(r==CSREG) addr=(int)&Status;
1017 if(r==FSREG) addr=(int)&FCR31;
1018 if(r==INVCP) addr=(int)&invc_ptr;
1019 u_int offset = addr-(u_int)&dynarec_local;
1020 assert(offset<4096);
1021 assem_debug("ldr %s,fp+%d\n",regname[hr],offset);
1022 output_w32(0xe5900000|rd_rn_rm(hr,FP,0)|offset);
1023 }
1024}
1025void emit_storereg(int r, int hr)
1026{
3d624f89 1027#ifdef FORCE32
1028 if(r&64) {
1029 printf("64bit store in 32bit mode!\n");
7f2607ea 1030 assert(0);
1031 return;
3d624f89 1032 }
1033#endif
1034 int addr=((int)reg)+((r&63)<<REG_SHIFT)+((r&64)>>4);
57871462 1035 if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4);
1036 if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4);
1037 if(r==CCREG) addr=(int)&cycle_count;
1038 if(r==FSREG) addr=(int)&FCR31;
1039 u_int offset = addr-(u_int)&dynarec_local;
1040 assert(offset<4096);
1041 assem_debug("str %s,fp+%d\n",regname[hr],offset);
1042 output_w32(0xe5800000|rd_rn_rm(hr,FP,0)|offset);
1043}
1044
1045void emit_test(int rs, int rt)
1046{
1047 assem_debug("tst %s,%s\n",regname[rs],regname[rt]);
1048 output_w32(0xe1100000|rd_rn_rm(0,rs,rt));
1049}
1050
1051void emit_testimm(int rs,int imm)
1052{
1053 u_int armval;
1054 assem_debug("tst %s,$%d\n",regname[rs],imm);
cfbd3c6e 1055 genimm_checked(imm,&armval);
57871462 1056 output_w32(0xe3100000|rd_rn_rm(0,rs,0)|armval);
1057}
1058
b9b61529 1059void emit_testeqimm(int rs,int imm)
1060{
1061 u_int armval;
1062 assem_debug("tsteq %s,$%d\n",regname[rs],imm);
cfbd3c6e 1063 genimm_checked(imm,&armval);
b9b61529 1064 output_w32(0x03100000|rd_rn_rm(0,rs,0)|armval);
1065}
1066
57871462 1067void emit_not(int rs,int rt)
1068{
1069 assem_debug("mvn %s,%s\n",regname[rt],regname[rs]);
1070 output_w32(0xe1e00000|rd_rn_rm(rt,0,rs));
1071}
1072
b9b61529 1073void emit_mvnmi(int rs,int rt)
1074{
1075 assem_debug("mvnmi %s,%s\n",regname[rt],regname[rs]);
1076 output_w32(0x41e00000|rd_rn_rm(rt,0,rs));
1077}
1078
57871462 1079void emit_and(u_int rs1,u_int rs2,u_int rt)
1080{
1081 assem_debug("and %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1082 output_w32(0xe0000000|rd_rn_rm(rt,rs1,rs2));
1083}
1084
1085void emit_or(u_int rs1,u_int rs2,u_int rt)
1086{
1087 assem_debug("orr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1088 output_w32(0xe1800000|rd_rn_rm(rt,rs1,rs2));
1089}
1090void emit_or_and_set_flags(int rs1,int rs2,int rt)
1091{
1092 assem_debug("orrs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1093 output_w32(0xe1900000|rd_rn_rm(rt,rs1,rs2));
1094}
1095
f70d384d 1096void emit_orrshl_imm(u_int rs,u_int imm,u_int rt)
1097{
1098 assert(rs<16);
1099 assert(rt<16);
1100 assert(imm<32);
1101 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs],imm);
1102 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|(imm<<7));
1103}
1104
576bbd8f 1105void emit_orrshr_imm(u_int rs,u_int imm,u_int rt)
1106{
1107 assert(rs<16);
1108 assert(rt<16);
1109 assert(imm<32);
1110 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs],imm);
1111 output_w32(0xe1800020|rd_rn_rm(rt,rt,rs)|(imm<<7));
1112}
1113
57871462 1114void emit_xor(u_int rs1,u_int rs2,u_int rt)
1115{
1116 assem_debug("eor %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1117 output_w32(0xe0200000|rd_rn_rm(rt,rs1,rs2));
1118}
1119
57871462 1120void emit_addimm(u_int rs,int imm,u_int rt)
1121{
1122 assert(rs<16);
1123 assert(rt<16);
1124 if(imm!=0) {
1125 assert(imm>-65536&&imm<65536);
1126 u_int armval;
1127 if(genimm(imm,&armval)) {
1128 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],imm);
1129 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
1130 }else if(genimm(-imm,&armval)) {
1131 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],imm);
1132 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
1133 }else if(imm<0) {
1134 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],(-imm)&0xFF00);
1135 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
1136 output_w32(0xe2400000|rd_rn_imm_shift(rt,rs,(-imm)>>8,8));
1137 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
1138 }else{
1139 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1140 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
1141 output_w32(0xe2800000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1142 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1143 }
1144 }
1145 else if(rs!=rt) emit_mov(rs,rt);
1146}
1147
1148void emit_addimm_and_set_flags(int imm,int rt)
1149{
1150 assert(imm>-65536&&imm<65536);
1151 u_int armval;
1152 if(genimm(imm,&armval)) {
1153 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm);
1154 output_w32(0xe2900000|rd_rn_rm(rt,rt,0)|armval);
1155 }else if(genimm(-imm,&armval)) {
1156 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],imm);
1157 output_w32(0xe2500000|rd_rn_rm(rt,rt,0)|armval);
1158 }else if(imm<0) {
1159 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF00);
1160 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
1161 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)>>8,8));
1162 output_w32(0xe2500000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
1163 }else{
1164 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF00);
1165 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
1166 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm>>8,8));
1167 output_w32(0xe2900000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1168 }
1169}
1170void emit_addimm_no_flags(u_int imm,u_int rt)
1171{
1172 emit_addimm(rt,imm,rt);
1173}
1174
1175void emit_addnop(u_int r)
1176{
1177 assert(r<16);
1178 assem_debug("add %s,%s,#0 (nop)\n",regname[r],regname[r]);
1179 output_w32(0xe2800000|rd_rn_rm(r,r,0));
1180}
1181
1182void emit_adcimm(u_int rs,int imm,u_int rt)
1183{
1184 u_int armval;
cfbd3c6e 1185 genimm_checked(imm,&armval);
57871462 1186 assem_debug("adc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1187 output_w32(0xe2a00000|rd_rn_rm(rt,rs,0)|armval);
1188}
1189/*void emit_sbcimm(int imm,u_int rt)
1190{
1191 u_int armval;
cfbd3c6e 1192 genimm_checked(imm,&armval);
57871462 1193 assem_debug("sbc %s,%s,#%d\n",regname[rt],regname[rt],imm);
1194 output_w32(0xe2c00000|rd_rn_rm(rt,rt,0)|armval);
1195}*/
1196void emit_sbbimm(int imm,u_int rt)
1197{
1198 assem_debug("sbb $%d,%%%s\n",imm,regname[rt]);
1199 assert(rt<8);
1200 if(imm<128&&imm>=-128) {
1201 output_byte(0x83);
1202 output_modrm(3,rt,3);
1203 output_byte(imm);
1204 }
1205 else
1206 {
1207 output_byte(0x81);
1208 output_modrm(3,rt,3);
1209 output_w32(imm);
1210 }
1211}
1212void emit_rscimm(int rs,int imm,u_int rt)
1213{
1214 assert(0);
1215 u_int armval;
cfbd3c6e 1216 genimm_checked(imm,&armval);
57871462 1217 assem_debug("rsc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1218 output_w32(0xe2e00000|rd_rn_rm(rt,rs,0)|armval);
1219}
1220
1221void emit_addimm64_32(int rsh,int rsl,int imm,int rth,int rtl)
1222{
1223 // TODO: if(genimm(imm,&armval)) ...
1224 // else
1225 emit_movimm(imm,HOST_TEMPREG);
1226 emit_adds(HOST_TEMPREG,rsl,rtl);
1227 emit_adcimm(rsh,0,rth);
1228}
1229
1230void emit_sbb(int rs1,int rs2)
1231{
1232 assem_debug("sbb %%%s,%%%s\n",regname[rs2],regname[rs1]);
1233 output_byte(0x19);
1234 output_modrm(3,rs1,rs2);
1235}
1236
1237void emit_andimm(int rs,int imm,int rt)
1238{
1239 u_int armval;
790ee18e 1240 if(imm==0) {
1241 emit_zeroreg(rt);
1242 }else if(genimm(imm,&armval)) {
57871462 1243 assem_debug("and %s,%s,#%d\n",regname[rt],regname[rs],imm);
1244 output_w32(0xe2000000|rd_rn_rm(rt,rs,0)|armval);
1245 }else if(genimm(~imm,&armval)) {
1246 assem_debug("bic %s,%s,#%d\n",regname[rt],regname[rs],imm);
1247 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|armval);
1248 }else if(imm==65535) {
1249 #ifdef ARMv5_ONLY
1250 assem_debug("bic %s,%s,#FF000000\n",regname[rt],regname[rs]);
1251 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|0x4FF);
1252 assem_debug("bic %s,%s,#00FF0000\n",regname[rt],regname[rt]);
1253 output_w32(0xe3c00000|rd_rn_rm(rt,rt,0)|0x8FF);
1254 #else
1255 assem_debug("uxth %s,%s\n",regname[rt],regname[rs]);
1256 output_w32(0xe6ff0070|rd_rn_rm(rt,0,rs));
1257 #endif
1258 }else{
1259 assert(imm>0&&imm<65535);
1260 #ifdef ARMv5_ONLY
1261 assem_debug("mov r14,#%d\n",imm&0xFF00);
1262 output_w32(0xe3a00000|rd_rn_imm_shift(HOST_TEMPREG,0,imm>>8,8));
1263 assem_debug("add r14,r14,#%d\n",imm&0xFF);
1264 output_w32(0xe2800000|rd_rn_imm_shift(HOST_TEMPREG,HOST_TEMPREG,imm&0xff,0));
1265 #else
1266 emit_movw(imm,HOST_TEMPREG);
1267 #endif
1268 assem_debug("and %s,%s,r14\n",regname[rt],regname[rs]);
1269 output_w32(0xe0000000|rd_rn_rm(rt,rs,HOST_TEMPREG));
1270 }
1271}
1272
1273void emit_orimm(int rs,int imm,int rt)
1274{
1275 u_int armval;
790ee18e 1276 if(imm==0) {
1277 if(rs!=rt) emit_mov(rs,rt);
1278 }else if(genimm(imm,&armval)) {
57871462 1279 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1280 output_w32(0xe3800000|rd_rn_rm(rt,rs,0)|armval);
1281 }else{
1282 assert(imm>0&&imm<65536);
1283 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1284 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
1285 output_w32(0xe3800000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1286 output_w32(0xe3800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1287 }
1288}
1289
1290void emit_xorimm(int rs,int imm,int rt)
1291{
57871462 1292 u_int armval;
790ee18e 1293 if(imm==0) {
1294 if(rs!=rt) emit_mov(rs,rt);
1295 }else if(genimm(imm,&armval)) {
57871462 1296 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm);
1297 output_w32(0xe2200000|rd_rn_rm(rt,rs,0)|armval);
1298 }else{
514ed0d9 1299 assert(imm>0&&imm<65536);
57871462 1300 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1301 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
1302 output_w32(0xe2200000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1303 output_w32(0xe2200000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1304 }
1305}
1306
1307void emit_shlimm(int rs,u_int imm,int rt)
1308{
1309 assert(imm>0);
1310 assert(imm<32);
1311 //if(imm==1) ...
1312 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1313 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1314}
1315
1316void emit_shrimm(int rs,u_int imm,int rt)
1317{
1318 assert(imm>0);
1319 assert(imm<32);
1320 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1321 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
1322}
1323
1324void emit_sarimm(int rs,u_int imm,int rt)
1325{
1326 assert(imm>0);
1327 assert(imm<32);
1328 assem_debug("asr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1329 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x40|(imm<<7));
1330}
1331
1332void emit_rorimm(int rs,u_int imm,int rt)
1333{
1334 assert(imm>0);
1335 assert(imm<32);
1336 assem_debug("ror %s,%s,#%d\n",regname[rt],regname[rs],imm);
1337 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x60|(imm<<7));
1338}
1339
1340void emit_shldimm(int rs,int rs2,u_int imm,int rt)
1341{
1342 assem_debug("shld %%%s,%%%s,%d\n",regname[rt],regname[rs2],imm);
1343 assert(imm>0);
1344 assert(imm<32);
1345 //if(imm==1) ...
1346 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1347 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1348 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs2],32-imm);
1349 output_w32(0xe1800020|rd_rn_rm(rt,rt,rs2)|((32-imm)<<7));
1350}
1351
1352void emit_shrdimm(int rs,int rs2,u_int imm,int rt)
1353{
1354 assem_debug("shrd %%%s,%%%s,%d\n",regname[rt],regname[rs2],imm);
1355 assert(imm>0);
1356 assert(imm<32);
1357 //if(imm==1) ...
1358 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1359 output_w32(0xe1a00020|rd_rn_rm(rt,0,rs)|(imm<<7));
1360 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs2],32-imm);
1361 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs2)|((32-imm)<<7));
1362}
1363
b9b61529 1364void emit_signextend16(int rs,int rt)
1365{
1366 #ifdef ARMv5_ONLY
1367 emit_shlimm(rs,16,rt);
1368 emit_sarimm(rt,16,rt);
1369 #else
1370 assem_debug("sxth %s,%s\n",regname[rt],regname[rs]);
1371 output_w32(0xe6bf0070|rd_rn_rm(rt,0,rs));
1372 #endif
1373}
1374
57871462 1375void emit_shl(u_int rs,u_int shift,u_int rt)
1376{
1377 assert(rs<16);
1378 assert(rt<16);
1379 assert(shift<16);
1380 //if(imm==1) ...
1381 assem_debug("lsl %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1382 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x10|(shift<<8));
1383}
1384void emit_shr(u_int rs,u_int shift,u_int rt)
1385{
1386 assert(rs<16);
1387 assert(rt<16);
1388 assert(shift<16);
1389 assem_debug("lsr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1390 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x30|(shift<<8));
1391}
1392void emit_sar(u_int rs,u_int shift,u_int rt)
1393{
1394 assert(rs<16);
1395 assert(rt<16);
1396 assert(shift<16);
1397 assem_debug("asr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1398 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x50|(shift<<8));
1399}
1400void emit_shlcl(int r)
1401{
1402 assem_debug("shl %%%s,%%cl\n",regname[r]);
1403 assert(0);
1404}
1405void emit_shrcl(int r)
1406{
1407 assem_debug("shr %%%s,%%cl\n",regname[r]);
1408 assert(0);
1409}
1410void emit_sarcl(int r)
1411{
1412 assem_debug("sar %%%s,%%cl\n",regname[r]);
1413 assert(0);
1414}
1415
1416void emit_shldcl(int r1,int r2)
1417{
1418 assem_debug("shld %%%s,%%%s,%%cl\n",regname[r1],regname[r2]);
1419 assert(0);
1420}
1421void emit_shrdcl(int r1,int r2)
1422{
1423 assem_debug("shrd %%%s,%%%s,%%cl\n",regname[r1],regname[r2]);
1424 assert(0);
1425}
1426void emit_orrshl(u_int rs,u_int shift,u_int rt)
1427{
1428 assert(rs<16);
1429 assert(rt<16);
1430 assert(shift<16);
1431 assem_debug("orr %s,%s,%s,lsl %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
1432 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x10|(shift<<8));
1433}
1434void emit_orrshr(u_int rs,u_int shift,u_int rt)
1435{
1436 assert(rs<16);
1437 assert(rt<16);
1438 assert(shift<16);
1439 assem_debug("orr %s,%s,%s,lsr %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
1440 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x30|(shift<<8));
1441}
1442
1443void emit_cmpimm(int rs,int imm)
1444{
1445 u_int armval;
1446 if(genimm(imm,&armval)) {
1447 assem_debug("cmp %s,$%d\n",regname[rs],imm);
1448 output_w32(0xe3500000|rd_rn_rm(0,rs,0)|armval);
1449 }else if(genimm(-imm,&armval)) {
1450 assem_debug("cmn %s,$%d\n",regname[rs],imm);
1451 output_w32(0xe3700000|rd_rn_rm(0,rs,0)|armval);
1452 }else if(imm>0) {
1453 assert(imm<65536);
1454 #ifdef ARMv5_ONLY
1455 emit_movimm(imm,HOST_TEMPREG);
1456 #else
1457 emit_movw(imm,HOST_TEMPREG);
1458 #endif
1459 assem_debug("cmp %s,r14\n",regname[rs]);
1460 output_w32(0xe1500000|rd_rn_rm(0,rs,HOST_TEMPREG));
1461 }else{
1462 assert(imm>-65536);
1463 #ifdef ARMv5_ONLY
1464 emit_movimm(-imm,HOST_TEMPREG);
1465 #else
1466 emit_movw(-imm,HOST_TEMPREG);
1467 #endif
1468 assem_debug("cmn %s,r14\n",regname[rs]);
1469 output_w32(0xe1700000|rd_rn_rm(0,rs,HOST_TEMPREG));
1470 }
1471}
1472
1473void emit_cmovne(u_int *addr,int rt)
1474{
1475 assem_debug("cmovne %x,%%%s",(int)addr,regname[rt]);
1476 assert(0);
1477}
1478void emit_cmovl(u_int *addr,int rt)
1479{
1480 assem_debug("cmovl %x,%%%s",(int)addr,regname[rt]);
1481 assert(0);
1482}
1483void emit_cmovs(u_int *addr,int rt)
1484{
1485 assem_debug("cmovs %x,%%%s",(int)addr,regname[rt]);
1486 assert(0);
1487}
1488void emit_cmovne_imm(int imm,int rt)
1489{
1490 assem_debug("movne %s,#%d\n",regname[rt],imm);
1491 u_int armval;
cfbd3c6e 1492 genimm_checked(imm,&armval);
57871462 1493 output_w32(0x13a00000|rd_rn_rm(rt,0,0)|armval);
1494}
1495void emit_cmovl_imm(int imm,int rt)
1496{
1497 assem_debug("movlt %s,#%d\n",regname[rt],imm);
1498 u_int armval;
cfbd3c6e 1499 genimm_checked(imm,&armval);
57871462 1500 output_w32(0xb3a00000|rd_rn_rm(rt,0,0)|armval);
1501}
1502void emit_cmovb_imm(int imm,int rt)
1503{
1504 assem_debug("movcc %s,#%d\n",regname[rt],imm);
1505 u_int armval;
cfbd3c6e 1506 genimm_checked(imm,&armval);
57871462 1507 output_w32(0x33a00000|rd_rn_rm(rt,0,0)|armval);
1508}
1509void emit_cmovs_imm(int imm,int rt)
1510{
1511 assem_debug("movmi %s,#%d\n",regname[rt],imm);
1512 u_int armval;
cfbd3c6e 1513 genimm_checked(imm,&armval);
57871462 1514 output_w32(0x43a00000|rd_rn_rm(rt,0,0)|armval);
1515}
1516void emit_cmove_reg(int rs,int rt)
1517{
1518 assem_debug("moveq %s,%s\n",regname[rt],regname[rs]);
1519 output_w32(0x01a00000|rd_rn_rm(rt,0,rs));
1520}
1521void emit_cmovne_reg(int rs,int rt)
1522{
1523 assem_debug("movne %s,%s\n",regname[rt],regname[rs]);
1524 output_w32(0x11a00000|rd_rn_rm(rt,0,rs));
1525}
1526void emit_cmovl_reg(int rs,int rt)
1527{
1528 assem_debug("movlt %s,%s\n",regname[rt],regname[rs]);
1529 output_w32(0xb1a00000|rd_rn_rm(rt,0,rs));
1530}
1531void emit_cmovs_reg(int rs,int rt)
1532{
1533 assem_debug("movmi %s,%s\n",regname[rt],regname[rs]);
1534 output_w32(0x41a00000|rd_rn_rm(rt,0,rs));
1535}
1536
1537void emit_slti32(int rs,int imm,int rt)
1538{
1539 if(rs!=rt) emit_zeroreg(rt);
1540 emit_cmpimm(rs,imm);
1541 if(rs==rt) emit_movimm(0,rt);
1542 emit_cmovl_imm(1,rt);
1543}
1544void emit_sltiu32(int rs,int imm,int rt)
1545{
1546 if(rs!=rt) emit_zeroreg(rt);
1547 emit_cmpimm(rs,imm);
1548 if(rs==rt) emit_movimm(0,rt);
1549 emit_cmovb_imm(1,rt);
1550}
1551void emit_slti64_32(int rsh,int rsl,int imm,int rt)
1552{
1553 assert(rsh!=rt);
1554 emit_slti32(rsl,imm,rt);
1555 if(imm>=0)
1556 {
1557 emit_test(rsh,rsh);
1558 emit_cmovne_imm(0,rt);
1559 emit_cmovs_imm(1,rt);
1560 }
1561 else
1562 {
1563 emit_cmpimm(rsh,-1);
1564 emit_cmovne_imm(0,rt);
1565 emit_cmovl_imm(1,rt);
1566 }
1567}
1568void emit_sltiu64_32(int rsh,int rsl,int imm,int rt)
1569{
1570 assert(rsh!=rt);
1571 emit_sltiu32(rsl,imm,rt);
1572 if(imm>=0)
1573 {
1574 emit_test(rsh,rsh);
1575 emit_cmovne_imm(0,rt);
1576 }
1577 else
1578 {
1579 emit_cmpimm(rsh,-1);
1580 emit_cmovne_imm(1,rt);
1581 }
1582}
1583
1584void emit_cmp(int rs,int rt)
1585{
1586 assem_debug("cmp %s,%s\n",regname[rs],regname[rt]);
1587 output_w32(0xe1500000|rd_rn_rm(0,rs,rt));
1588}
1589void emit_set_gz32(int rs, int rt)
1590{
1591 //assem_debug("set_gz32\n");
1592 emit_cmpimm(rs,1);
1593 emit_movimm(1,rt);
1594 emit_cmovl_imm(0,rt);
1595}
1596void emit_set_nz32(int rs, int rt)
1597{
1598 //assem_debug("set_nz32\n");
1599 if(rs!=rt) emit_movs(rs,rt);
1600 else emit_test(rs,rs);
1601 emit_cmovne_imm(1,rt);
1602}
1603void emit_set_gz64_32(int rsh, int rsl, int rt)
1604{
1605 //assem_debug("set_gz64\n");
1606 emit_set_gz32(rsl,rt);
1607 emit_test(rsh,rsh);
1608 emit_cmovne_imm(1,rt);
1609 emit_cmovs_imm(0,rt);
1610}
1611void emit_set_nz64_32(int rsh, int rsl, int rt)
1612{
1613 //assem_debug("set_nz64\n");
1614 emit_or_and_set_flags(rsh,rsl,rt);
1615 emit_cmovne_imm(1,rt);
1616}
1617void emit_set_if_less32(int rs1, int rs2, int rt)
1618{
1619 //assem_debug("set if less (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1620 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1621 emit_cmp(rs1,rs2);
1622 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1623 emit_cmovl_imm(1,rt);
1624}
1625void emit_set_if_carry32(int rs1, int rs2, int rt)
1626{
1627 //assem_debug("set if carry (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1628 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1629 emit_cmp(rs1,rs2);
1630 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1631 emit_cmovb_imm(1,rt);
1632}
1633void emit_set_if_less64_32(int u1, int l1, int u2, int l2, int rt)
1634{
1635 //assem_debug("set if less64 (%%%s,%%%s,%%%s,%%%s),%%%s\n",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]);
1636 assert(u1!=rt);
1637 assert(u2!=rt);
1638 emit_cmp(l1,l2);
1639 emit_movimm(0,rt);
1640 emit_sbcs(u1,u2,HOST_TEMPREG);
1641 emit_cmovl_imm(1,rt);
1642}
1643void emit_set_if_carry64_32(int u1, int l1, int u2, int l2, int rt)
1644{
1645 //assem_debug("set if carry64 (%%%s,%%%s,%%%s,%%%s),%%%s\n",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]);
1646 assert(u1!=rt);
1647 assert(u2!=rt);
1648 emit_cmp(l1,l2);
1649 emit_movimm(0,rt);
1650 emit_sbcs(u1,u2,HOST_TEMPREG);
1651 emit_cmovb_imm(1,rt);
1652}
1653
1654void emit_call(int a)
1655{
1656 assem_debug("bl %x (%x+%x)\n",a,(int)out,a-(int)out-8);
1657 u_int offset=genjmp(a);
1658 output_w32(0xeb000000|offset);
1659}
1660void emit_jmp(int a)
1661{
1662 assem_debug("b %x (%x+%x)\n",a,(int)out,a-(int)out-8);
1663 u_int offset=genjmp(a);
1664 output_w32(0xea000000|offset);
1665}
1666void emit_jne(int a)
1667{
1668 assem_debug("bne %x\n",a);
1669 u_int offset=genjmp(a);
1670 output_w32(0x1a000000|offset);
1671}
1672void emit_jeq(int a)
1673{
1674 assem_debug("beq %x\n",a);
1675 u_int offset=genjmp(a);
1676 output_w32(0x0a000000|offset);
1677}
1678void emit_js(int a)
1679{
1680 assem_debug("bmi %x\n",a);
1681 u_int offset=genjmp(a);
1682 output_w32(0x4a000000|offset);
1683}
1684void emit_jns(int a)
1685{
1686 assem_debug("bpl %x\n",a);
1687 u_int offset=genjmp(a);
1688 output_w32(0x5a000000|offset);
1689}
1690void emit_jl(int a)
1691{
1692 assem_debug("blt %x\n",a);
1693 u_int offset=genjmp(a);
1694 output_w32(0xba000000|offset);
1695}
1696void emit_jge(int a)
1697{
1698 assem_debug("bge %x\n",a);
1699 u_int offset=genjmp(a);
1700 output_w32(0xaa000000|offset);
1701}
1702void emit_jno(int a)
1703{
1704 assem_debug("bvc %x\n",a);
1705 u_int offset=genjmp(a);
1706 output_w32(0x7a000000|offset);
1707}
1708void emit_jc(int a)
1709{
1710 assem_debug("bcs %x\n",a);
1711 u_int offset=genjmp(a);
1712 output_w32(0x2a000000|offset);
1713}
1714void emit_jcc(int a)
1715{
1716 assem_debug("bcc %x\n",a);
1717 u_int offset=genjmp(a);
1718 output_w32(0x3a000000|offset);
1719}
1720
1721void emit_pushimm(int imm)
1722{
1723 assem_debug("push $%x\n",imm);
1724 assert(0);
1725}
1726void emit_pusha()
1727{
1728 assem_debug("pusha\n");
1729 assert(0);
1730}
1731void emit_popa()
1732{
1733 assem_debug("popa\n");
1734 assert(0);
1735}
1736void emit_pushreg(u_int r)
1737{
1738 assem_debug("push %%%s\n",regname[r]);
1739 assert(0);
1740}
1741void emit_popreg(u_int r)
1742{
1743 assem_debug("pop %%%s\n",regname[r]);
1744 assert(0);
1745}
1746void emit_callreg(u_int r)
1747{
1748 assem_debug("call *%%%s\n",regname[r]);
1749 assert(0);
1750}
1751void emit_jmpreg(u_int r)
1752{
1753 assem_debug("mov pc,%s\n",regname[r]);
1754 output_w32(0xe1a00000|rd_rn_rm(15,0,r));
1755}
1756
1757void emit_readword_indexed(int offset, int rs, int rt)
1758{
1759 assert(offset>-4096&&offset<4096);
1760 assem_debug("ldr %s,%s+%d\n",regname[rt],regname[rs],offset);
1761 if(offset>=0) {
1762 output_w32(0xe5900000|rd_rn_rm(rt,rs,0)|offset);
1763 }else{
1764 output_w32(0xe5100000|rd_rn_rm(rt,rs,0)|(-offset));
1765 }
1766}
1767void emit_readword_dualindexedx4(int rs1, int rs2, int rt)
1768{
1769 assem_debug("ldr %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1770 output_w32(0xe7900000|rd_rn_rm(rt,rs1,rs2)|0x100);
1771}
1772void emit_readword_indexed_tlb(int addr, int rs, int map, int rt)
1773{
1774 if(map<0) emit_readword_indexed(addr, rs, rt);
1775 else {
1776 assert(addr==0);
1777 emit_readword_dualindexedx4(rs, map, rt);
1778 }
1779}
1780void emit_readdword_indexed_tlb(int addr, int rs, int map, int rh, int rl)
1781{
1782 if(map<0) {
1783 if(rh>=0) emit_readword_indexed(addr, rs, rh);
1784 emit_readword_indexed(addr+4, rs, rl);
1785 }else{
1786 assert(rh!=rs);
1787 if(rh>=0) emit_readword_indexed_tlb(addr, rs, map, rh);
1788 emit_addimm(map,1,map);
1789 emit_readword_indexed_tlb(addr, rs, map, rl);
1790 }
1791}
1792void emit_movsbl_indexed(int offset, int rs, int rt)
1793{
1794 assert(offset>-256&&offset<256);
1795 assem_debug("ldrsb %s,%s+%d\n",regname[rt],regname[rs],offset);
1796 if(offset>=0) {
1797 output_w32(0xe1d000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1798 }else{
1799 output_w32(0xe15000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1800 }
1801}
1802void emit_movsbl_indexed_tlb(int addr, int rs, int map, int rt)
1803{
1804 if(map<0) emit_movsbl_indexed(addr, rs, rt);
1805 else {
1806 if(addr==0) {
1807 emit_shlimm(map,2,map);
1808 assem_debug("ldrsb %s,%s+%s\n",regname[rt],regname[rs],regname[map]);
1809 output_w32(0xe19000d0|rd_rn_rm(rt,rs,map));
1810 }else{
1811 assert(addr>-256&&addr<256);
1812 assem_debug("add %s,%s,%s,lsl #2\n",regname[rt],regname[rs],regname[map]);
1813 output_w32(0xe0800000|rd_rn_rm(rt,rs,map)|(2<<7));
1814 emit_movsbl_indexed(addr, rt, rt);
1815 }
1816 }
1817}
1818void emit_movswl_indexed(int offset, int rs, int rt)
1819{
1820 assert(offset>-256&&offset<256);
1821 assem_debug("ldrsh %s,%s+%d\n",regname[rt],regname[rs],offset);
1822 if(offset>=0) {
1823 output_w32(0xe1d000f0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1824 }else{
1825 output_w32(0xe15000f0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1826 }
1827}
1828void emit_movzbl_indexed(int offset, int rs, int rt)
1829{
1830 assert(offset>-4096&&offset<4096);
1831 assem_debug("ldrb %s,%s+%d\n",regname[rt],regname[rs],offset);
1832 if(offset>=0) {
1833 output_w32(0xe5d00000|rd_rn_rm(rt,rs,0)|offset);
1834 }else{
1835 output_w32(0xe5500000|rd_rn_rm(rt,rs,0)|(-offset));
1836 }
1837}
1838void emit_movzbl_dualindexedx4(int rs1, int rs2, int rt)
1839{
1840 assem_debug("ldrb %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1841 output_w32(0xe7d00000|rd_rn_rm(rt,rs1,rs2)|0x100);
1842}
1843void emit_movzbl_indexed_tlb(int addr, int rs, int map, int rt)
1844{
1845 if(map<0) emit_movzbl_indexed(addr, rs, rt);
1846 else {
1847 if(addr==0) {
1848 emit_movzbl_dualindexedx4(rs, map, rt);
1849 }else{
1850 emit_addimm(rs,addr,rt);
1851 emit_movzbl_dualindexedx4(rt, map, rt);
1852 }
1853 }
1854}
1855void emit_movzwl_indexed(int offset, int rs, int rt)
1856{
1857 assert(offset>-256&&offset<256);
1858 assem_debug("ldrh %s,%s+%d\n",regname[rt],regname[rs],offset);
1859 if(offset>=0) {
1860 output_w32(0xe1d000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1861 }else{
1862 output_w32(0xe15000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1863 }
1864}
1865void emit_readword(int addr, int rt)
1866{
1867 u_int offset = addr-(u_int)&dynarec_local;
1868 assert(offset<4096);
1869 assem_debug("ldr %s,fp+%d\n",regname[rt],offset);
1870 output_w32(0xe5900000|rd_rn_rm(rt,FP,0)|offset);
1871}
1872void emit_movsbl(int addr, int rt)
1873{
1874 u_int offset = addr-(u_int)&dynarec_local;
1875 assert(offset<256);
1876 assem_debug("ldrsb %s,fp+%d\n",regname[rt],offset);
1877 output_w32(0xe1d000d0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1878}
1879void emit_movswl(int addr, int rt)
1880{
1881 u_int offset = addr-(u_int)&dynarec_local;
1882 assert(offset<256);
1883 assem_debug("ldrsh %s,fp+%d\n",regname[rt],offset);
1884 output_w32(0xe1d000f0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1885}
1886void emit_movzbl(int addr, int rt)
1887{
1888 u_int offset = addr-(u_int)&dynarec_local;
1889 assert(offset<4096);
1890 assem_debug("ldrb %s,fp+%d\n",regname[rt],offset);
1891 output_w32(0xe5d00000|rd_rn_rm(rt,FP,0)|offset);
1892}
1893void emit_movzwl(int addr, int rt)
1894{
1895 u_int offset = addr-(u_int)&dynarec_local;
1896 assert(offset<256);
1897 assem_debug("ldrh %s,fp+%d\n",regname[rt],offset);
1898 output_w32(0xe1d000b0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1899}
1900void emit_movzwl_reg(int rs, int rt)
1901{
1902 assem_debug("movzwl %%%s,%%%s\n",regname[rs]+1,regname[rt]);
1903 assert(0);
1904}
1905
1906void emit_xchg(int rs, int rt)
1907{
1908 assem_debug("xchg %%%s,%%%s\n",regname[rs],regname[rt]);
1909 assert(0);
1910}
1911void emit_writeword_indexed(int rt, int offset, int rs)
1912{
1913 assert(offset>-4096&&offset<4096);
1914 assem_debug("str %s,%s+%d\n",regname[rt],regname[rs],offset);
1915 if(offset>=0) {
1916 output_w32(0xe5800000|rd_rn_rm(rt,rs,0)|offset);
1917 }else{
1918 output_w32(0xe5000000|rd_rn_rm(rt,rs,0)|(-offset));
1919 }
1920}
1921void emit_writeword_dualindexedx4(int rt, int rs1, int rs2)
1922{
1923 assem_debug("str %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1924 output_w32(0xe7800000|rd_rn_rm(rt,rs1,rs2)|0x100);
1925}
1926void emit_writeword_indexed_tlb(int rt, int addr, int rs, int map, int temp)
1927{
1928 if(map<0) emit_writeword_indexed(rt, addr, rs);
1929 else {
1930 assert(addr==0);
1931 emit_writeword_dualindexedx4(rt, rs, map);
1932 }
1933}
1934void emit_writedword_indexed_tlb(int rh, int rl, int addr, int rs, int map, int temp)
1935{
1936 if(map<0) {
1937 if(rh>=0) emit_writeword_indexed(rh, addr, rs);
1938 emit_writeword_indexed(rl, addr+4, rs);
1939 }else{
1940 assert(rh>=0);
1941 if(temp!=rs) emit_addimm(map,1,temp);
1942 emit_writeword_indexed_tlb(rh, addr, rs, map, temp);
1943 if(temp!=rs) emit_writeword_indexed_tlb(rl, addr, rs, temp, temp);
1944 else {
1945 emit_addimm(rs,4,rs);
1946 emit_writeword_indexed_tlb(rl, addr, rs, map, temp);
1947 }
1948 }
1949}
1950void emit_writehword_indexed(int rt, int offset, int rs)
1951{
1952 assert(offset>-256&&offset<256);
1953 assem_debug("strh %s,%s+%d\n",regname[rt],regname[rs],offset);
1954 if(offset>=0) {
1955 output_w32(0xe1c000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1956 }else{
1957 output_w32(0xe14000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1958 }
1959}
1960void emit_writebyte_indexed(int rt, int offset, int rs)
1961{
1962 assert(offset>-4096&&offset<4096);
1963 assem_debug("strb %s,%s+%d\n",regname[rt],regname[rs],offset);
1964 if(offset>=0) {
1965 output_w32(0xe5c00000|rd_rn_rm(rt,rs,0)|offset);
1966 }else{
1967 output_w32(0xe5400000|rd_rn_rm(rt,rs,0)|(-offset));
1968 }
1969}
1970void emit_writebyte_dualindexedx4(int rt, int rs1, int rs2)
1971{
1972 assem_debug("strb %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1973 output_w32(0xe7c00000|rd_rn_rm(rt,rs1,rs2)|0x100);
1974}
1975void emit_writebyte_indexed_tlb(int rt, int addr, int rs, int map, int temp)
1976{
1977 if(map<0) emit_writebyte_indexed(rt, addr, rs);
1978 else {
1979 if(addr==0) {
1980 emit_writebyte_dualindexedx4(rt, rs, map);
1981 }else{
1982 emit_addimm(rs,addr,temp);
1983 emit_writebyte_dualindexedx4(rt, temp, map);
1984 }
1985 }
1986}
1987void emit_writeword(int rt, int addr)
1988{
1989 u_int offset = addr-(u_int)&dynarec_local;
1990 assert(offset<4096);
1991 assem_debug("str %s,fp+%d\n",regname[rt],offset);
1992 output_w32(0xe5800000|rd_rn_rm(rt,FP,0)|offset);
1993}
1994void emit_writehword(int rt, int addr)
1995{
1996 u_int offset = addr-(u_int)&dynarec_local;
1997 assert(offset<256);
1998 assem_debug("strh %s,fp+%d\n",regname[rt],offset);
1999 output_w32(0xe1c000b0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
2000}
2001void emit_writebyte(int rt, int addr)
2002{
2003 u_int offset = addr-(u_int)&dynarec_local;
2004 assert(offset<4096);
74426039 2005 assem_debug("strb %s,fp+%d\n",regname[rt],offset);
57871462 2006 output_w32(0xe5c00000|rd_rn_rm(rt,FP,0)|offset);
2007}
2008void emit_writeword_imm(int imm, int addr)
2009{
2010 assem_debug("movl $%x,%x\n",imm,addr);
2011 assert(0);
2012}
2013void emit_writebyte_imm(int imm, int addr)
2014{
2015 assem_debug("movb $%x,%x\n",imm,addr);
2016 assert(0);
2017}
2018
2019void emit_mul(int rs)
2020{
2021 assem_debug("mul %%%s\n",regname[rs]);
2022 assert(0);
2023}
2024void emit_imul(int rs)
2025{
2026 assem_debug("imul %%%s\n",regname[rs]);
2027 assert(0);
2028}
2029void emit_umull(u_int rs1, u_int rs2, u_int hi, u_int lo)
2030{
2031 assem_debug("umull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
2032 assert(rs1<16);
2033 assert(rs2<16);
2034 assert(hi<16);
2035 assert(lo<16);
2036 output_w32(0xe0800090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
2037}
2038void emit_smull(u_int rs1, u_int rs2, u_int hi, u_int lo)
2039{
2040 assem_debug("smull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
2041 assert(rs1<16);
2042 assert(rs2<16);
2043 assert(hi<16);
2044 assert(lo<16);
2045 output_w32(0xe0c00090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
2046}
2047
2048void emit_div(int rs)
2049{
2050 assem_debug("div %%%s\n",regname[rs]);
2051 assert(0);
2052}
2053void emit_idiv(int rs)
2054{
2055 assem_debug("idiv %%%s\n",regname[rs]);
2056 assert(0);
2057}
2058void emit_cdq()
2059{
2060 assem_debug("cdq\n");
2061 assert(0);
2062}
2063
2064void emit_clz(int rs,int rt)
2065{
2066 assem_debug("clz %s,%s\n",regname[rt],regname[rs]);
2067 output_w32(0xe16f0f10|rd_rn_rm(rt,0,rs));
2068}
2069
2070void emit_subcs(int rs1,int rs2,int rt)
2071{
2072 assem_debug("subcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2073 output_w32(0x20400000|rd_rn_rm(rt,rs1,rs2));
2074}
2075
2076void emit_shrcc_imm(int rs,u_int imm,int rt)
2077{
2078 assert(imm>0);
2079 assert(imm<32);
2080 assem_debug("lsrcc %s,%s,#%d\n",regname[rt],regname[rs],imm);
2081 output_w32(0x31a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
2082}
2083
2084void emit_negmi(int rs, int rt)
2085{
2086 assem_debug("rsbmi %s,%s,#0\n",regname[rt],regname[rs]);
2087 output_w32(0x42600000|rd_rn_rm(rt,rs,0));
2088}
2089
2090void emit_negsmi(int rs, int rt)
2091{
2092 assem_debug("rsbsmi %s,%s,#0\n",regname[rt],regname[rs]);
2093 output_w32(0x42700000|rd_rn_rm(rt,rs,0));
2094}
2095
2096void emit_orreq(u_int rs1,u_int rs2,u_int rt)
2097{
2098 assem_debug("orreq %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2099 output_w32(0x01800000|rd_rn_rm(rt,rs1,rs2));
2100}
2101
2102void emit_orrne(u_int rs1,u_int rs2,u_int rt)
2103{
2104 assem_debug("orrne %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2105 output_w32(0x11800000|rd_rn_rm(rt,rs1,rs2));
2106}
2107
2108void emit_bic_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2109{
2110 assem_debug("bic %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2111 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2112}
2113
2114void emit_biceq_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2115{
2116 assem_debug("biceq %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2117 output_w32(0x01C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2118}
2119
2120void emit_bicne_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2121{
2122 assem_debug("bicne %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2123 output_w32(0x11C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2124}
2125
2126void emit_bic_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2127{
2128 assem_debug("bic %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2129 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2130}
2131
2132void emit_biceq_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2133{
2134 assem_debug("biceq %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2135 output_w32(0x01C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2136}
2137
2138void emit_bicne_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2139{
2140 assem_debug("bicne %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2141 output_w32(0x11C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2142}
2143
2144void emit_teq(int rs, int rt)
2145{
2146 assem_debug("teq %s,%s\n",regname[rs],regname[rt]);
2147 output_w32(0xe1300000|rd_rn_rm(0,rs,rt));
2148}
2149
2150void emit_rsbimm(int rs, int imm, int rt)
2151{
2152 u_int armval;
cfbd3c6e 2153 genimm_checked(imm,&armval);
57871462 2154 assem_debug("rsb %s,%s,#%d\n",regname[rt],regname[rs],imm);
2155 output_w32(0xe2600000|rd_rn_rm(rt,rs,0)|armval);
2156}
2157
2158// Load 2 immediates optimizing for small code size
2159void emit_mov2imm_compact(int imm1,u_int rt1,int imm2,u_int rt2)
2160{
2161 emit_movimm(imm1,rt1);
2162 u_int armval;
2163 if(genimm(imm2-imm1,&armval)) {
2164 assem_debug("add %s,%s,#%d\n",regname[rt2],regname[rt1],imm2-imm1);
2165 output_w32(0xe2800000|rd_rn_rm(rt2,rt1,0)|armval);
2166 }else if(genimm(imm1-imm2,&armval)) {
2167 assem_debug("sub %s,%s,#%d\n",regname[rt2],regname[rt1],imm1-imm2);
2168 output_w32(0xe2400000|rd_rn_rm(rt2,rt1,0)|armval);
2169 }
2170 else emit_movimm(imm2,rt2);
2171}
2172
2173// Conditionally select one of two immediates, optimizing for small code size
2174// This will only be called if HAVE_CMOV_IMM is defined
2175void emit_cmov2imm_e_ne_compact(int imm1,int imm2,u_int rt)
2176{
2177 u_int armval;
2178 if(genimm(imm2-imm1,&armval)) {
2179 emit_movimm(imm1,rt);
2180 assem_debug("addne %s,%s,#%d\n",regname[rt],regname[rt],imm2-imm1);
2181 output_w32(0x12800000|rd_rn_rm(rt,rt,0)|armval);
2182 }else if(genimm(imm1-imm2,&armval)) {
2183 emit_movimm(imm1,rt);
2184 assem_debug("subne %s,%s,#%d\n",regname[rt],regname[rt],imm1-imm2);
2185 output_w32(0x12400000|rd_rn_rm(rt,rt,0)|armval);
2186 }
2187 else {
2188 #ifdef ARMv5_ONLY
2189 emit_movimm(imm1,rt);
2190 add_literal((int)out,imm2);
2191 assem_debug("ldrne %s,pc+? [=%x]\n",regname[rt],imm2);
2192 output_w32(0x15900000|rd_rn_rm(rt,15,0));
2193 #else
2194 emit_movw(imm1&0x0000FFFF,rt);
2195 if((imm1&0xFFFF)!=(imm2&0xFFFF)) {
2196 assem_debug("movwne %s,#%d (0x%x)\n",regname[rt],imm2&0xFFFF,imm2&0xFFFF);
2197 output_w32(0x13000000|rd_rn_rm(rt,0,0)|(imm2&0xfff)|((imm2<<4)&0xf0000));
2198 }
2199 emit_movt(imm1&0xFFFF0000,rt);
2200 if((imm1&0xFFFF0000)!=(imm2&0xFFFF0000)) {
2201 assem_debug("movtne %s,#%d (0x%x)\n",regname[rt],imm2&0xffff0000,imm2&0xffff0000);
2202 output_w32(0x13400000|rd_rn_rm(rt,0,0)|((imm2>>16)&0xfff)|((imm2>>12)&0xf0000));
2203 }
2204 #endif
2205 }
2206}
2207
2208// special case for checking invalid_code
2209void emit_cmpmem_indexedsr12_imm(int addr,int r,int imm)
2210{
2211 assert(0);
2212}
2213
2214// special case for checking invalid_code
2215void emit_cmpmem_indexedsr12_reg(int base,int r,int imm)
2216{
2217 assert(imm<128&&imm>=0);
2218 assert(r>=0&&r<16);
2219 assem_debug("ldrb lr,%s,%s lsr #12\n",regname[base],regname[r]);
2220 output_w32(0xe7d00000|rd_rn_rm(HOST_TEMPREG,base,r)|0x620);
2221 emit_cmpimm(HOST_TEMPREG,imm);
2222}
2223
2224// special case for tlb mapping
2225void emit_addsr12(int rs1,int rs2,int rt)
2226{
2227 assem_debug("add %s,%s,%s lsr #12\n",regname[rt],regname[rs1],regname[rs2]);
2228 output_w32(0xe0800620|rd_rn_rm(rt,rs1,rs2));
2229}
2230
0bbd1454 2231void emit_callne(int a)
2232{
2233 assem_debug("blne %x\n",a);
2234 u_int offset=genjmp(a);
2235 output_w32(0x1b000000|offset);
2236}
2237
57871462 2238// Used to preload hash table entries
2239void emit_prefetch(void *addr)
2240{
2241 assem_debug("prefetch %x\n",(int)addr);
2242 output_byte(0x0F);
2243 output_byte(0x18);
2244 output_modrm(0,5,1);
2245 output_w32((int)addr);
2246}
2247void emit_prefetchreg(int r)
2248{
2249 assem_debug("pld %s\n",regname[r]);
2250 output_w32(0xf5d0f000|rd_rn_rm(0,r,0));
2251}
2252
2253// Special case for mini_ht
2254void emit_ldreq_indexed(int rs, u_int offset, int rt)
2255{
2256 assert(offset<4096);
2257 assem_debug("ldreq %s,[%s, #%d]\n",regname[rt],regname[rs],offset);
2258 output_w32(0x05900000|rd_rn_rm(rt,rs,0)|offset);
2259}
2260
2261void emit_flds(int r,int sr)
2262{
2263 assem_debug("flds s%d,[%s]\n",sr,regname[r]);
2264 output_w32(0xed900a00|((sr&14)<<11)|((sr&1)<<22)|(r<<16));
2265}
2266
2267void emit_vldr(int r,int vr)
2268{
2269 assem_debug("vldr d%d,[%s]\n",vr,regname[r]);
2270 output_w32(0xed900b00|(vr<<12)|(r<<16));
2271}
2272
2273void emit_fsts(int sr,int r)
2274{
2275 assem_debug("fsts s%d,[%s]\n",sr,regname[r]);
2276 output_w32(0xed800a00|((sr&14)<<11)|((sr&1)<<22)|(r<<16));
2277}
2278
2279void emit_vstr(int vr,int r)
2280{
2281 assem_debug("vstr d%d,[%s]\n",vr,regname[r]);
2282 output_w32(0xed800b00|(vr<<12)|(r<<16));
2283}
2284
2285void emit_ftosizs(int s,int d)
2286{
2287 assem_debug("ftosizs s%d,s%d\n",d,s);
2288 output_w32(0xeebd0ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2289}
2290
2291void emit_ftosizd(int s,int d)
2292{
2293 assem_debug("ftosizd s%d,d%d\n",d,s);
2294 output_w32(0xeebd0bc0|((d&14)<<11)|((d&1)<<22)|(s&7));
2295}
2296
2297void emit_fsitos(int s,int d)
2298{
2299 assem_debug("fsitos s%d,s%d\n",d,s);
2300 output_w32(0xeeb80ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2301}
2302
2303void emit_fsitod(int s,int d)
2304{
2305 assem_debug("fsitod d%d,s%d\n",d,s);
2306 output_w32(0xeeb80bc0|((d&7)<<12)|((s&14)>>1)|((s&1)<<5));
2307}
2308
2309void emit_fcvtds(int s,int d)
2310{
2311 assem_debug("fcvtds d%d,s%d\n",d,s);
2312 output_w32(0xeeb70ac0|((d&7)<<12)|((s&14)>>1)|((s&1)<<5));
2313}
2314
2315void emit_fcvtsd(int s,int d)
2316{
2317 assem_debug("fcvtsd s%d,d%d\n",d,s);
2318 output_w32(0xeeb70bc0|((d&14)<<11)|((d&1)<<22)|(s&7));
2319}
2320
2321void emit_fsqrts(int s,int d)
2322{
2323 assem_debug("fsqrts d%d,s%d\n",d,s);
2324 output_w32(0xeeb10ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2325}
2326
2327void emit_fsqrtd(int s,int d)
2328{
2329 assem_debug("fsqrtd s%d,d%d\n",d,s);
2330 output_w32(0xeeb10bc0|((d&7)<<12)|(s&7));
2331}
2332
2333void emit_fabss(int s,int d)
2334{
2335 assem_debug("fabss d%d,s%d\n",d,s);
2336 output_w32(0xeeb00ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2337}
2338
2339void emit_fabsd(int s,int d)
2340{
2341 assem_debug("fabsd s%d,d%d\n",d,s);
2342 output_w32(0xeeb00bc0|((d&7)<<12)|(s&7));
2343}
2344
2345void emit_fnegs(int s,int d)
2346{
2347 assem_debug("fnegs d%d,s%d\n",d,s);
2348 output_w32(0xeeb10a40|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2349}
2350
2351void emit_fnegd(int s,int d)
2352{
2353 assem_debug("fnegd s%d,d%d\n",d,s);
2354 output_w32(0xeeb10b40|((d&7)<<12)|(s&7));
2355}
2356
2357void emit_fadds(int s1,int s2,int d)
2358{
2359 assem_debug("fadds s%d,s%d,s%d\n",d,s1,s2);
2360 output_w32(0xee300a00|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2361}
2362
2363void emit_faddd(int s1,int s2,int d)
2364{
2365 assem_debug("faddd d%d,d%d,d%d\n",d,s1,s2);
2366 output_w32(0xee300b00|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2367}
2368
2369void emit_fsubs(int s1,int s2,int d)
2370{
2371 assem_debug("fsubs s%d,s%d,s%d\n",d,s1,s2);
2372 output_w32(0xee300a40|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2373}
2374
2375void emit_fsubd(int s1,int s2,int d)
2376{
2377 assem_debug("fsubd d%d,d%d,d%d\n",d,s1,s2);
2378 output_w32(0xee300b40|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2379}
2380
2381void emit_fmuls(int s1,int s2,int d)
2382{
2383 assem_debug("fmuls s%d,s%d,s%d\n",d,s1,s2);
2384 output_w32(0xee200a00|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2385}
2386
2387void emit_fmuld(int s1,int s2,int d)
2388{
2389 assem_debug("fmuld d%d,d%d,d%d\n",d,s1,s2);
2390 output_w32(0xee200b00|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2391}
2392
2393void emit_fdivs(int s1,int s2,int d)
2394{
2395 assem_debug("fdivs s%d,s%d,s%d\n",d,s1,s2);
2396 output_w32(0xee800a00|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2397}
2398
2399void emit_fdivd(int s1,int s2,int d)
2400{
2401 assem_debug("fdivd d%d,d%d,d%d\n",d,s1,s2);
2402 output_w32(0xee800b00|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2403}
2404
2405void emit_fcmps(int x,int y)
2406{
2407 assem_debug("fcmps s14, s15\n");
2408 output_w32(0xeeb47a67);
2409}
2410
2411void emit_fcmpd(int x,int y)
2412{
2413 assem_debug("fcmpd d6, d7\n");
2414 output_w32(0xeeb46b47);
2415}
2416
2417void emit_fmstat()
2418{
2419 assem_debug("fmstat\n");
2420 output_w32(0xeef1fa10);
2421}
2422
2423void emit_bicne_imm(int rs,int imm,int rt)
2424{
2425 u_int armval;
cfbd3c6e 2426 genimm_checked(imm,&armval);
57871462 2427 assem_debug("bicne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2428 output_w32(0x13c00000|rd_rn_rm(rt,rs,0)|armval);
2429}
2430
2431void emit_biccs_imm(int rs,int imm,int rt)
2432{
2433 u_int armval;
cfbd3c6e 2434 genimm_checked(imm,&armval);
57871462 2435 assem_debug("biccs %s,%s,#%d\n",regname[rt],regname[rs],imm);
2436 output_w32(0x23c00000|rd_rn_rm(rt,rs,0)|armval);
2437}
2438
2439void emit_bicvc_imm(int rs,int imm,int rt)
2440{
2441 u_int armval;
cfbd3c6e 2442 genimm_checked(imm,&armval);
57871462 2443 assem_debug("bicvc %s,%s,#%d\n",regname[rt],regname[rs],imm);
2444 output_w32(0x73c00000|rd_rn_rm(rt,rs,0)|armval);
2445}
2446
2447void emit_bichi_imm(int rs,int imm,int rt)
2448{
2449 u_int armval;
cfbd3c6e 2450 genimm_checked(imm,&armval);
57871462 2451 assem_debug("bichi %s,%s,#%d\n",regname[rt],regname[rs],imm);
2452 output_w32(0x83c00000|rd_rn_rm(rt,rs,0)|armval);
2453}
2454
2455void emit_orrvs_imm(int rs,int imm,int rt)
2456{
2457 u_int armval;
cfbd3c6e 2458 genimm_checked(imm,&armval);
57871462 2459 assem_debug("orrvs %s,%s,#%d\n",regname[rt],regname[rs],imm);
2460 output_w32(0x63800000|rd_rn_rm(rt,rs,0)|armval);
2461}
2462
b9b61529 2463void emit_orrne_imm(int rs,int imm,int rt)
2464{
2465 u_int armval;
cfbd3c6e 2466 genimm_checked(imm,&armval);
b9b61529 2467 assem_debug("orrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2468 output_w32(0x13800000|rd_rn_rm(rt,rs,0)|armval);
2469}
2470
2471void emit_andne_imm(int rs,int imm,int rt)
2472{
2473 u_int armval;
cfbd3c6e 2474 genimm_checked(imm,&armval);
b9b61529 2475 assem_debug("andne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2476 output_w32(0x12000000|rd_rn_rm(rt,rs,0)|armval);
2477}
2478
57871462 2479void emit_jno_unlikely(int a)
2480{
2481 //emit_jno(a);
2482 assem_debug("addvc pc,pc,#? (%x)\n",/*a-(int)out-8,*/a);
2483 output_w32(0x72800000|rd_rn_rm(15,15,0));
2484}
2485
2486// Save registers before function call
2487void save_regs(u_int reglist)
2488{
2489 reglist&=0x100f; // only save the caller-save registers, r0-r3, r12
2490 if(!reglist) return;
2491 assem_debug("stmia fp,{");
2492 if(reglist&1) assem_debug("r0, ");
2493 if(reglist&2) assem_debug("r1, ");
2494 if(reglist&4) assem_debug("r2, ");
2495 if(reglist&8) assem_debug("r3, ");
2496 if(reglist&0x1000) assem_debug("r12");
2497 assem_debug("}\n");
2498 output_w32(0xe88b0000|reglist);
2499}
2500// Restore registers after function call
2501void restore_regs(u_int reglist)
2502{
2503 reglist&=0x100f; // only restore the caller-save registers, r0-r3, r12
2504 if(!reglist) return;
2505 assem_debug("ldmia fp,{");
2506 if(reglist&1) assem_debug("r0, ");
2507 if(reglist&2) assem_debug("r1, ");
2508 if(reglist&4) assem_debug("r2, ");
2509 if(reglist&8) assem_debug("r3, ");
2510 if(reglist&0x1000) assem_debug("r12");
2511 assem_debug("}\n");
2512 output_w32(0xe89b0000|reglist);
2513}
2514
2515// Write back consts using r14 so we don't disturb the other registers
2516void wb_consts(signed char i_regmap[],uint64_t i_is32,u_int i_dirty,int i)
2517{
2518 int hr;
2519 for(hr=0;hr<HOST_REGS;hr++) {
2520 if(hr!=EXCLUDE_REG&&i_regmap[hr]>=0&&((i_dirty>>hr)&1)) {
2521 if(((regs[i].isconst>>hr)&1)&&i_regmap[hr]>0) {
2522 if(i_regmap[hr]<64 || !((i_is32>>(i_regmap[hr]&63))&1) ) {
2523 int value=constmap[i][hr];
2524 if(value==0) {
2525 emit_zeroreg(HOST_TEMPREG);
2526 }
2527 else {
2528 emit_movimm(value,HOST_TEMPREG);
2529 }
2530 emit_storereg(i_regmap[hr],HOST_TEMPREG);
24385cae 2531#ifndef FORCE32
57871462 2532 if((i_is32>>i_regmap[hr])&1) {
2533 if(value!=-1&&value!=0) emit_sarimm(HOST_TEMPREG,31,HOST_TEMPREG);
2534 emit_storereg(i_regmap[hr]|64,HOST_TEMPREG);
2535 }
24385cae 2536#endif
57871462 2537 }
2538 }
2539 }
2540 }
2541}
2542
2543/* Stubs/epilogue */
2544
2545void literal_pool(int n)
2546{
2547 if(!literalcount) return;
2548 if(n) {
2549 if((int)out-literals[0][0]<4096-n) return;
2550 }
2551 u_int *ptr;
2552 int i;
2553 for(i=0;i<literalcount;i++)
2554 {
2555 ptr=(u_int *)literals[i][0];
2556 u_int offset=(u_int)out-(u_int)ptr-8;
2557 assert(offset<4096);
2558 assert(!(offset&3));
2559 *ptr|=offset;
2560 output_w32(literals[i][1]);
2561 }
2562 literalcount=0;
2563}
2564
2565void literal_pool_jumpover(int n)
2566{
2567 if(!literalcount) return;
2568 if(n) {
2569 if((int)out-literals[0][0]<4096-n) return;
2570 }
2571 int jaddr=(int)out;
2572 emit_jmp(0);
2573 literal_pool(0);
2574 set_jump_target(jaddr,(int)out);
2575}
2576
2577emit_extjump2(int addr, int target, int linker)
2578{
2579 u_char *ptr=(u_char *)addr;
2580 assert((ptr[3]&0x0e)==0xa);
2581 emit_loadlp(target,0);
2582 emit_loadlp(addr,1);
24385cae 2583 assert(addr>=BASE_ADDR&&addr<(BASE_ADDR+(1<<TARGET_SIZE_2)));
57871462 2584 //assert((target>=0x80000000&&target<0x80800000)||(target>0xA4000000&&target<0xA4001000));
2585//DEBUG >
2586#ifdef DEBUG_CYCLE_COUNT
2587 emit_readword((int)&last_count,ECX);
2588 emit_add(HOST_CCREG,ECX,HOST_CCREG);
2589 emit_readword((int)&next_interupt,ECX);
2590 emit_writeword(HOST_CCREG,(int)&Count);
2591 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
2592 emit_writeword(ECX,(int)&last_count);
2593#endif
2594//DEBUG <
2595 emit_jmp(linker);
2596}
2597
2598emit_extjump(int addr, int target)
2599{
2600 emit_extjump2(addr, target, (int)dyna_linker);
2601}
2602emit_extjump_ds(int addr, int target)
2603{
2604 emit_extjump2(addr, target, (int)dyna_linker_ds);
2605}
2606
cbbab9cd 2607#ifdef PCSX
2608#include "pcsxmem_inline.c"
2609#endif
2610
57871462 2611do_readstub(int n)
2612{
2613 assem_debug("do_readstub %x\n",start+stubs[n][3]*4);
2614 literal_pool(256);
2615 set_jump_target(stubs[n][1],(int)out);
2616 int type=stubs[n][0];
2617 int i=stubs[n][3];
2618 int rs=stubs[n][4];
2619 struct regstat *i_regs=(struct regstat *)stubs[n][5];
2620 u_int reglist=stubs[n][7];
2621 signed char *i_regmap=i_regs->regmap;
2622 int addr=get_reg(i_regmap,AGEN1+(i&1));
2623 int rth,rt;
2624 int ds;
b9b61529 2625 if(itype[i]==C1LS||itype[i]==C2LS||itype[i]==LOADLR) {
57871462 2626 rth=get_reg(i_regmap,FTEMP|64);
2627 rt=get_reg(i_regmap,FTEMP);
2628 }else{
2629 rth=get_reg(i_regmap,rt1[i]|64);
2630 rt=get_reg(i_regmap,rt1[i]);
2631 }
2632 assert(rs>=0);
57871462 2633 if(addr<0) addr=rt;
535d208a 2634 if(addr<0&&itype[i]!=C1LS&&itype[i]!=C2LS&&itype[i]!=LOADLR) addr=get_reg(i_regmap,-1);
57871462 2635 assert(addr>=0);
2636 int ftable=0;
2637 if(type==LOADB_STUB||type==LOADBU_STUB)
2638 ftable=(int)readmemb;
2639 if(type==LOADH_STUB||type==LOADHU_STUB)
2640 ftable=(int)readmemh;
2641 if(type==LOADW_STUB)
2642 ftable=(int)readmem;
24385cae 2643#ifndef FORCE32
57871462 2644 if(type==LOADD_STUB)
2645 ftable=(int)readmemd;
24385cae 2646#endif
2647 assert(ftable!=0);
57871462 2648 emit_writeword(rs,(int)&address);
2649 //emit_pusha();
2650 save_regs(reglist);
97a238a6 2651#ifndef PCSX
57871462 2652 ds=i_regs!=&regs[i];
2653 int real_rs=(itype[i]==LOADLR)?-1:get_reg(i_regmap,rs1[i]);
2654 u_int cmask=ds?-1:(0x100f|~i_regs->wasconst);
2655 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&0x100f,i);
2656 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty&cmask&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs)));
2657 if(!ds) wb_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&~0x100f,i);
97a238a6 2658#endif
57871462 2659 emit_shrimm(rs,16,1);
2660 int cc=get_reg(i_regmap,CCREG);
2661 if(cc<0) {
2662 emit_loadreg(CCREG,2);
2663 }
2664 emit_movimm(ftable,0);
2665 emit_addimm(cc<0?2:cc,2*stubs[n][6]+2,2);
f51dc36c 2666#ifndef PCSX
57871462 2667 emit_movimm(start+stubs[n][3]*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
f51dc36c 2668#endif
57871462 2669 //emit_readword((int)&last_count,temp);
2670 //emit_add(cc,temp,cc);
2671 //emit_writeword(cc,(int)&Count);
2672 //emit_mov(15,14);
2673 emit_call((int)&indirect_jump_indexed);
2674 //emit_callreg(rs);
2675 //emit_readword_dualindexedx4(rs,HOST_TEMPREG,15);
f51dc36c 2676#ifndef PCSX
57871462 2677 // We really shouldn't need to update the count here,
2678 // but not doing so causes random crashes...
2679 emit_readword((int)&Count,HOST_TEMPREG);
2680 emit_readword((int)&next_interupt,2);
2681 emit_addimm(HOST_TEMPREG,-2*stubs[n][6]-2,HOST_TEMPREG);
2682 emit_writeword(2,(int)&last_count);
2683 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2684 if(cc<0) {
2685 emit_storereg(CCREG,HOST_TEMPREG);
2686 }
f51dc36c 2687#endif
57871462 2688 //emit_popa();
2689 restore_regs(reglist);
2690 //if((cc=get_reg(regmap,CCREG))>=0) {
2691 // emit_loadreg(CCREG,cc);
2692 //}
f18c0f46 2693 if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) {
2694 assert(rt>=0);
2695 if(type==LOADB_STUB)
2696 emit_movsbl((int)&readmem_dword,rt);
2697 if(type==LOADBU_STUB)
2698 emit_movzbl((int)&readmem_dword,rt);
2699 if(type==LOADH_STUB)
2700 emit_movswl((int)&readmem_dword,rt);
2701 if(type==LOADHU_STUB)
2702 emit_movzwl((int)&readmem_dword,rt);
2703 if(type==LOADW_STUB)
2704 emit_readword((int)&readmem_dword,rt);
2705 if(type==LOADD_STUB) {
2706 emit_readword((int)&readmem_dword,rt);
2707 if(rth>=0) emit_readword(((int)&readmem_dword)+4,rth);
2708 }
57871462 2709 }
2710 emit_jmp(stubs[n][2]); // return address
2711}
2712
2713inline_readstub(int type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
2714{
2715 int rs=get_reg(regmap,target);
2716 int rth=get_reg(regmap,target|64);
2717 int rt=get_reg(regmap,target);
535d208a 2718 if(rs<0) rs=get_reg(regmap,-1);
57871462 2719 assert(rs>=0);
57871462 2720 int ftable=0;
2721 if(type==LOADB_STUB||type==LOADBU_STUB)
2722 ftable=(int)readmemb;
2723 if(type==LOADH_STUB||type==LOADHU_STUB)
2724 ftable=(int)readmemh;
2725 if(type==LOADW_STUB)
2726 ftable=(int)readmem;
24385cae 2727#ifndef FORCE32
57871462 2728 if(type==LOADD_STUB)
2729 ftable=(int)readmemd;
24385cae 2730#endif
2731 assert(ftable!=0);
cbbab9cd 2732#ifdef PCSX
2733 if(pcsx_direct_read(type,addr,target?rs:-1,rt))
2734 return;
2735#endif
fd99c415 2736 if(target==0)
2737 emit_movimm(addr,rs);
57871462 2738 emit_writeword(rs,(int)&address);
2739 //emit_pusha();
2740 save_regs(reglist);
2741 //emit_shrimm(rs,16,1);
2742 int cc=get_reg(regmap,CCREG);
2743 if(cc<0) {
2744 emit_loadreg(CCREG,2);
2745 }
2746 //emit_movimm(ftable,0);
2747 emit_movimm(((u_int *)ftable)[addr>>16],0);
2748 //emit_readword((int)&last_count,12);
2749 emit_addimm(cc<0?2:cc,CLOCK_DIVIDER*(adj+1),2);
f51dc36c 2750#ifndef PCSX
57871462 2751 if((signed int)addr>=(signed int)0xC0000000) {
2752 // Pagefault address
2753 int ds=regmap!=regs[i].regmap;
2754 emit_movimm(start+i*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
2755 }
f51dc36c 2756#endif
57871462 2757 //emit_add(12,2,2);
2758 //emit_writeword(2,(int)&Count);
2759 //emit_call(((u_int *)ftable)[addr>>16]);
2760 emit_call((int)&indirect_jump);
f51dc36c 2761#ifndef PCSX
57871462 2762 // We really shouldn't need to update the count here,
2763 // but not doing so causes random crashes...
2764 emit_readword((int)&Count,HOST_TEMPREG);
2765 emit_readword((int)&next_interupt,2);
2766 emit_addimm(HOST_TEMPREG,-CLOCK_DIVIDER*(adj+1),HOST_TEMPREG);
2767 emit_writeword(2,(int)&last_count);
2768 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2769 if(cc<0) {
2770 emit_storereg(CCREG,HOST_TEMPREG);
2771 }
f51dc36c 2772#endif
57871462 2773 //emit_popa();
2774 restore_regs(reglist);
fd99c415 2775 if(rt>=0) {
2776 if(type==LOADB_STUB)
2777 emit_movsbl((int)&readmem_dword,rt);
2778 if(type==LOADBU_STUB)
2779 emit_movzbl((int)&readmem_dword,rt);
2780 if(type==LOADH_STUB)
2781 emit_movswl((int)&readmem_dword,rt);
2782 if(type==LOADHU_STUB)
2783 emit_movzwl((int)&readmem_dword,rt);
2784 if(type==LOADW_STUB)
2785 emit_readword((int)&readmem_dword,rt);
2786 if(type==LOADD_STUB) {
2787 emit_readword((int)&readmem_dword,rt);
2788 if(rth>=0) emit_readword(((int)&readmem_dword)+4,rth);
2789 }
57871462 2790 }
2791}
2792
2793do_writestub(int n)
2794{
2795 assem_debug("do_writestub %x\n",start+stubs[n][3]*4);
2796 literal_pool(256);
2797 set_jump_target(stubs[n][1],(int)out);
2798 int type=stubs[n][0];
2799 int i=stubs[n][3];
2800 int rs=stubs[n][4];
2801 struct regstat *i_regs=(struct regstat *)stubs[n][5];
2802 u_int reglist=stubs[n][7];
2803 signed char *i_regmap=i_regs->regmap;
2804 int addr=get_reg(i_regmap,AGEN1+(i&1));
2805 int rth,rt,r;
2806 int ds;
b9b61529 2807 if(itype[i]==C1LS||itype[i]==C2LS) {
57871462 2808 rth=get_reg(i_regmap,FTEMP|64);
2809 rt=get_reg(i_regmap,r=FTEMP);
2810 }else{
2811 rth=get_reg(i_regmap,rs2[i]|64);
2812 rt=get_reg(i_regmap,r=rs2[i]);
2813 }
2814 assert(rs>=0);
2815 assert(rt>=0);
2816 if(addr<0) addr=get_reg(i_regmap,-1);
2817 assert(addr>=0);
2818 int ftable=0;
2819 if(type==STOREB_STUB)
2820 ftable=(int)writememb;
2821 if(type==STOREH_STUB)
2822 ftable=(int)writememh;
2823 if(type==STOREW_STUB)
2824 ftable=(int)writemem;
24385cae 2825#ifndef FORCE32
57871462 2826 if(type==STORED_STUB)
2827 ftable=(int)writememd;
24385cae 2828#endif
2829 assert(ftable!=0);
57871462 2830 emit_writeword(rs,(int)&address);
2831 //emit_shrimm(rs,16,rs);
2832 //emit_movmem_indexedx4(ftable,rs,rs);
2833 if(type==STOREB_STUB)
2834 emit_writebyte(rt,(int)&byte);
2835 if(type==STOREH_STUB)
2836 emit_writehword(rt,(int)&hword);
2837 if(type==STOREW_STUB)
2838 emit_writeword(rt,(int)&word);
2839 if(type==STORED_STUB) {
3d624f89 2840#ifndef FORCE32
57871462 2841 emit_writeword(rt,(int)&dword);
2842 emit_writeword(r?rth:rt,(int)&dword+4);
3d624f89 2843#else
2844 printf("STORED_STUB\n");
2845#endif
57871462 2846 }
2847 //emit_pusha();
2848 save_regs(reglist);
97a238a6 2849#ifndef PCSX
57871462 2850 ds=i_regs!=&regs[i];
2851 int real_rs=get_reg(i_regmap,rs1[i]);
2852 u_int cmask=ds?-1:(0x100f|~i_regs->wasconst);
2853 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&0x100f,i);
2854 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty&cmask&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs)));
2855 if(!ds) wb_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&~0x100f,i);
97a238a6 2856#endif
57871462 2857 emit_shrimm(rs,16,1);
2858 int cc=get_reg(i_regmap,CCREG);
2859 if(cc<0) {
2860 emit_loadreg(CCREG,2);
2861 }
2862 emit_movimm(ftable,0);
2863 emit_addimm(cc<0?2:cc,2*stubs[n][6]+2,2);
f51dc36c 2864#ifndef PCSX
57871462 2865 emit_movimm(start+stubs[n][3]*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
f51dc36c 2866#endif
57871462 2867 //emit_readword((int)&last_count,temp);
2868 //emit_addimm(cc,2*stubs[n][5]+2,cc);
2869 //emit_add(cc,temp,cc);
2870 //emit_writeword(cc,(int)&Count);
2871 emit_call((int)&indirect_jump_indexed);
2872 //emit_callreg(rs);
2873 emit_readword((int)&Count,HOST_TEMPREG);
2874 emit_readword((int)&next_interupt,2);
2875 emit_addimm(HOST_TEMPREG,-2*stubs[n][6]-2,HOST_TEMPREG);
2876 emit_writeword(2,(int)&last_count);
2877 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2878 if(cc<0) {
2879 emit_storereg(CCREG,HOST_TEMPREG);
2880 }
2881 //emit_popa();
2882 restore_regs(reglist);
2883 //if((cc=get_reg(regmap,CCREG))>=0) {
2884 // emit_loadreg(CCREG,cc);
2885 //}
2886 emit_jmp(stubs[n][2]); // return address
2887}
2888
2889inline_writestub(int type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
2890{
2891 int rs=get_reg(regmap,-1);
2892 int rth=get_reg(regmap,target|64);
2893 int rt=get_reg(regmap,target);
2894 assert(rs>=0);
2895 assert(rt>=0);
cbbab9cd 2896#ifdef PCSX
2897 if(pcsx_direct_write(type,addr,rs,rt,regmap))
2898 return;
2899#endif
57871462 2900 int ftable=0;
2901 if(type==STOREB_STUB)
2902 ftable=(int)writememb;
2903 if(type==STOREH_STUB)
2904 ftable=(int)writememh;
2905 if(type==STOREW_STUB)
2906 ftable=(int)writemem;
24385cae 2907#ifndef FORCE32
57871462 2908 if(type==STORED_STUB)
2909 ftable=(int)writememd;
24385cae 2910#endif
2911 assert(ftable!=0);
57871462 2912 emit_writeword(rs,(int)&address);
2913 //emit_shrimm(rs,16,rs);
2914 //emit_movmem_indexedx4(ftable,rs,rs);
2915 if(type==STOREB_STUB)
2916 emit_writebyte(rt,(int)&byte);
2917 if(type==STOREH_STUB)
2918 emit_writehword(rt,(int)&hword);
2919 if(type==STOREW_STUB)
2920 emit_writeword(rt,(int)&word);
2921 if(type==STORED_STUB) {
3d624f89 2922#ifndef FORCE32
57871462 2923 emit_writeword(rt,(int)&dword);
2924 emit_writeword(target?rth:rt,(int)&dword+4);
3d624f89 2925#else
2926 printf("STORED_STUB\n");
2927#endif
57871462 2928 }
2929 //emit_pusha();
2930 save_regs(reglist);
2931 //emit_shrimm(rs,16,1);
2932 int cc=get_reg(regmap,CCREG);
2933 if(cc<0) {
2934 emit_loadreg(CCREG,2);
2935 }
2936 //emit_movimm(ftable,0);
2937 emit_movimm(((u_int *)ftable)[addr>>16],0);
2938 //emit_readword((int)&last_count,12);
2939 emit_addimm(cc<0?2:cc,CLOCK_DIVIDER*(adj+1),2);
f51dc36c 2940#ifndef PCSX
57871462 2941 if((signed int)addr>=(signed int)0xC0000000) {
2942 // Pagefault address
2943 int ds=regmap!=regs[i].regmap;
2944 emit_movimm(start+i*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
2945 }
f51dc36c 2946#endif
57871462 2947 //emit_add(12,2,2);
2948 //emit_writeword(2,(int)&Count);
2949 //emit_call(((u_int *)ftable)[addr>>16]);
2950 emit_call((int)&indirect_jump);
2951 emit_readword((int)&Count,HOST_TEMPREG);
2952 emit_readword((int)&next_interupt,2);
2953 emit_addimm(HOST_TEMPREG,-CLOCK_DIVIDER*(adj+1),HOST_TEMPREG);
2954 emit_writeword(2,(int)&last_count);
2955 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2956 if(cc<0) {
2957 emit_storereg(CCREG,HOST_TEMPREG);
2958 }
2959 //emit_popa();
2960 restore_regs(reglist);
2961}
2962
2963do_unalignedwritestub(int n)
2964{
b7918751 2965 assem_debug("do_unalignedwritestub %x\n",start+stubs[n][3]*4);
2966 literal_pool(256);
57871462 2967 set_jump_target(stubs[n][1],(int)out);
b7918751 2968
2969 int i=stubs[n][3];
2970 struct regstat *i_regs=(struct regstat *)stubs[n][4];
2971 int addr=stubs[n][5];
2972 u_int reglist=stubs[n][7];
2973 signed char *i_regmap=i_regs->regmap;
2974 int temp2=get_reg(i_regmap,FTEMP);
2975 int rt;
2976 int ds, real_rs;
2977 rt=get_reg(i_regmap,rs2[i]);
2978 assert(rt>=0);
2979 assert(addr>=0);
2980 assert(opcode[i]==0x2a||opcode[i]==0x2e); // SWL/SWR only implemented
2981 reglist|=(1<<addr);
2982 reglist&=~(1<<temp2);
2983
2984 emit_andimm(addr,0xfffffffc,temp2);
2985 emit_writeword(temp2,(int)&address);
2986
2987 save_regs(reglist);
97a238a6 2988#ifndef PCSX
b7918751 2989 ds=i_regs!=&regs[i];
2990 real_rs=get_reg(i_regmap,rs1[i]);
2991 u_int cmask=ds?-1:(0x100f|~i_regs->wasconst);
2992 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&0x100f,i);
2993 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty&cmask&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs)));
2994 if(!ds) wb_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&~0x100f,i);
97a238a6 2995#endif
b7918751 2996 emit_shrimm(addr,16,1);
2997 int cc=get_reg(i_regmap,CCREG);
2998 if(cc<0) {
2999 emit_loadreg(CCREG,2);
3000 }
3001 emit_movimm((u_int)readmem,0);
3002 emit_addimm(cc<0?2:cc,2*stubs[n][6]+2,2);
f51dc36c 3003#ifndef PCSX
3004 // pagefault address
3005 emit_movimm(start+stubs[n][3]*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
3006#endif
b7918751 3007 emit_call((int)&indirect_jump_indexed);
3008 restore_regs(reglist);
3009
3010 emit_readword((int)&readmem_dword,temp2);
3011 int temp=addr; //hmh
3012 emit_shlimm(addr,3,temp);
3013 emit_andimm(temp,24,temp);
3014#ifdef BIG_ENDIAN_MIPS
3015 if (opcode[i]==0x2e) // SWR
3016#else
3017 if (opcode[i]==0x2a) // SWL
3018#endif
3019 emit_xorimm(temp,24,temp);
3020 emit_movimm(-1,HOST_TEMPREG);
55439448 3021 if (opcode[i]==0x2a) { // SWL
b7918751 3022 emit_bic_lsr(temp2,HOST_TEMPREG,temp,temp2);
3023 emit_orrshr(rt,temp,temp2);
3024 }else{
3025 emit_bic_lsl(temp2,HOST_TEMPREG,temp,temp2);
3026 emit_orrshl(rt,temp,temp2);
3027 }
3028 emit_readword((int)&address,addr);
3029 emit_writeword(temp2,(int)&word);
3030 //save_regs(reglist); // don't need to, no state changes
3031 emit_shrimm(addr,16,1);
3032 emit_movimm((u_int)writemem,0);
3033 //emit_call((int)&indirect_jump_indexed);
3034 emit_mov(15,14);
3035 emit_readword_dualindexedx4(0,1,15);
3036 emit_readword((int)&Count,HOST_TEMPREG);
3037 emit_readword((int)&next_interupt,2);
3038 emit_addimm(HOST_TEMPREG,-2*stubs[n][6]-2,HOST_TEMPREG);
3039 emit_writeword(2,(int)&last_count);
3040 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
3041 if(cc<0) {
3042 emit_storereg(CCREG,HOST_TEMPREG);
3043 }
3044 restore_regs(reglist);
57871462 3045 emit_jmp(stubs[n][2]); // return address
3046}
3047
3048void printregs(int edi,int esi,int ebp,int esp,int b,int d,int c,int a)
3049{
3050 printf("regs: %x %x %x %x %x %x %x (%x)\n",a,b,c,d,ebp,esi,edi,(&edi)[-1]);
3051}
3052
3053do_invstub(int n)
3054{
3055 literal_pool(20);
3056 u_int reglist=stubs[n][3];
3057 set_jump_target(stubs[n][1],(int)out);
3058 save_regs(reglist);
3059 if(stubs[n][4]!=0) emit_mov(stubs[n][4],0);
3060 emit_call((int)&invalidate_addr);
3061 restore_regs(reglist);
3062 emit_jmp(stubs[n][2]); // return address
3063}
3064
3065int do_dirty_stub(int i)
3066{
3067 assem_debug("do_dirty_stub %x\n",start+i*4);
ac545b3a 3068 u_int addr=(int)start<(int)0xC0000000?(u_int)source:(u_int)start;
3069 #ifdef PCSX
3070 addr=(u_int)source;
3071 #endif
57871462 3072 // Careful about the code output here, verify_dirty needs to parse it.
3073 #ifdef ARMv5_ONLY
ac545b3a 3074 emit_loadlp(addr,1);
57871462 3075 emit_loadlp((int)copy,2);
3076 emit_loadlp(slen*4,3);
3077 #else
ac545b3a 3078 emit_movw(addr&0x0000FFFF,1);
57871462 3079 emit_movw(((u_int)copy)&0x0000FFFF,2);
ac545b3a 3080 emit_movt(addr&0xFFFF0000,1);
57871462 3081 emit_movt(((u_int)copy)&0xFFFF0000,2);
3082 emit_movw(slen*4,3);
3083 #endif
3084 emit_movimm(start+i*4,0);
3085 emit_call((int)start<(int)0xC0000000?(int)&verify_code:(int)&verify_code_vm);
3086 int entry=(int)out;
3087 load_regs_entry(i);
3088 if(entry==(int)out) entry=instr_addr[i];
3089 emit_jmp(instr_addr[i]);
3090 return entry;
3091}
3092
3093void do_dirty_stub_ds()
3094{
3095 // Careful about the code output here, verify_dirty needs to parse it.
3096 #ifdef ARMv5_ONLY
3097 emit_loadlp((int)start<(int)0xC0000000?(int)source:(int)start,1);
3098 emit_loadlp((int)copy,2);
3099 emit_loadlp(slen*4,3);
3100 #else
3101 emit_movw(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0x0000FFFF,1);
3102 emit_movw(((u_int)copy)&0x0000FFFF,2);
3103 emit_movt(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0xFFFF0000,1);
3104 emit_movt(((u_int)copy)&0xFFFF0000,2);
3105 emit_movw(slen*4,3);
3106 #endif
3107 emit_movimm(start+1,0);
3108 emit_call((int)&verify_code_ds);
3109}
3110
3111do_cop1stub(int n)
3112{
3113 literal_pool(256);
3114 assem_debug("do_cop1stub %x\n",start+stubs[n][3]*4);
3115 set_jump_target(stubs[n][1],(int)out);
3116 int i=stubs[n][3];
3d624f89 3117// int rs=stubs[n][4];
57871462 3118 struct regstat *i_regs=(struct regstat *)stubs[n][5];
3119 int ds=stubs[n][6];
3120 if(!ds) {
3121 load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty,i);
3122 //if(i_regs!=&regs[i]) printf("oops: regs[i]=%x i_regs=%x",(int)&regs[i],(int)i_regs);
3123 }
3124 //else {printf("fp exception in delay slot\n");}
3125 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty);
3126 if(regs[i].regmap_entry[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
3127 emit_movimm(start+(i-ds)*4,EAX); // Get PC
3128 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG); // CHECK: is this right? There should probably be an extra cycle...
3129 emit_jmp(ds?(int)fp_exception_ds:(int)fp_exception);
3130}
3131
3132/* TLB */
3133
3134int do_tlb_r(int s,int ar,int map,int x,int a,int shift,int c,u_int addr)
3135{
3136 if(c) {
3137 if((signed int)addr>=(signed int)0xC0000000) {
3138 // address_generation already loaded the const
3139 emit_readword_dualindexedx4(FP,map,map);
3140 }
3141 else
3142 return -1; // No mapping
3143 }
3144 else {
3145 assert(s!=map);
3146 emit_movimm(((int)memory_map-(int)&dynarec_local)>>2,map);
3147 emit_addsr12(map,s,map);
3148 // Schedule this while we wait on the load
3149 //if(x) emit_xorimm(s,x,ar);
3150 if(shift>=0) emit_shlimm(s,3,shift);
3151 if(~a) emit_andimm(s,a,ar);
3152 emit_readword_dualindexedx4(FP,map,map);
3153 }
3154 return map;
3155}
3156int do_tlb_r_branch(int map, int c, u_int addr, int *jaddr)
3157{
3158 if(!c||(signed int)addr>=(signed int)0xC0000000) {
3159 emit_test(map,map);
3160 *jaddr=(int)out;
3161 emit_js(0);
3162 }
3163 return map;
3164}
3165
3166int gen_tlb_addr_r(int ar, int map) {
3167 if(map>=0) {
3168 assem_debug("add %s,%s,%s lsl #2\n",regname[ar],regname[ar],regname[map]);
3169 output_w32(0xe0800100|rd_rn_rm(ar,ar,map));
3170 }
3171}
3172
3173int do_tlb_w(int s,int ar,int map,int x,int c,u_int addr)
3174{
3175 if(c) {
3176 if(addr<0x80800000||addr>=0xC0000000) {
3177 // address_generation already loaded the const
3178 emit_readword_dualindexedx4(FP,map,map);
3179 }
3180 else
3181 return -1; // No mapping
3182 }
3183 else {
3184 assert(s!=map);
3185 emit_movimm(((int)memory_map-(int)&dynarec_local)>>2,map);
3186 emit_addsr12(map,s,map);
3187 // Schedule this while we wait on the load
3188 //if(x) emit_xorimm(s,x,ar);
3189 emit_readword_dualindexedx4(FP,map,map);
3190 }
3191 return map;
3192}
3193int do_tlb_w_branch(int map, int c, u_int addr, int *jaddr)
3194{
3195 if(!c||addr<0x80800000||addr>=0xC0000000) {
3196 emit_testimm(map,0x40000000);
3197 *jaddr=(int)out;
3198 emit_jne(0);
3199 }
3200}
3201
3202int gen_tlb_addr_w(int ar, int map) {
3203 if(map>=0) {
3204 assem_debug("add %s,%s,%s lsl #2\n",regname[ar],regname[ar],regname[map]);
3205 output_w32(0xe0800100|rd_rn_rm(ar,ar,map));
3206 }
3207}
3208
3209// Generate the address of the memory_map entry, relative to dynarec_local
3210generate_map_const(u_int addr,int reg) {
3211 //printf("generate_map_const(%x,%s)\n",addr,regname[reg]);
3212 emit_movimm((addr>>12)+(((u_int)memory_map-(u_int)&dynarec_local)>>2),reg);
3213}
3214
3215/* Special assem */
3216
3217void shift_assemble_arm(int i,struct regstat *i_regs)
3218{
3219 if(rt1[i]) {
3220 if(opcode2[i]<=0x07) // SLLV/SRLV/SRAV
3221 {
3222 signed char s,t,shift;
3223 t=get_reg(i_regs->regmap,rt1[i]);
3224 s=get_reg(i_regs->regmap,rs1[i]);
3225 shift=get_reg(i_regs->regmap,rs2[i]);
3226 if(t>=0){
3227 if(rs1[i]==0)
3228 {
3229 emit_zeroreg(t);
3230 }
3231 else if(rs2[i]==0)
3232 {
3233 assert(s>=0);
3234 if(s!=t) emit_mov(s,t);
3235 }
3236 else
3237 {
3238 emit_andimm(shift,31,HOST_TEMPREG);
3239 if(opcode2[i]==4) // SLLV
3240 {
3241 emit_shl(s,HOST_TEMPREG,t);
3242 }
3243 if(opcode2[i]==6) // SRLV
3244 {
3245 emit_shr(s,HOST_TEMPREG,t);
3246 }
3247 if(opcode2[i]==7) // SRAV
3248 {
3249 emit_sar(s,HOST_TEMPREG,t);
3250 }
3251 }
3252 }
3253 } else { // DSLLV/DSRLV/DSRAV
3254 signed char sh,sl,th,tl,shift;
3255 th=get_reg(i_regs->regmap,rt1[i]|64);
3256 tl=get_reg(i_regs->regmap,rt1[i]);
3257 sh=get_reg(i_regs->regmap,rs1[i]|64);
3258 sl=get_reg(i_regs->regmap,rs1[i]);
3259 shift=get_reg(i_regs->regmap,rs2[i]);
3260 if(tl>=0){
3261 if(rs1[i]==0)
3262 {
3263 emit_zeroreg(tl);
3264 if(th>=0) emit_zeroreg(th);
3265 }
3266 else if(rs2[i]==0)
3267 {
3268 assert(sl>=0);
3269 if(sl!=tl) emit_mov(sl,tl);
3270 if(th>=0&&sh!=th) emit_mov(sh,th);
3271 }
3272 else
3273 {
3274 // FIXME: What if shift==tl ?
3275 assert(shift!=tl);
3276 int temp=get_reg(i_regs->regmap,-1);
3277 int real_th=th;
3278 if(th<0&&opcode2[i]!=0x14) {th=temp;} // DSLLV doesn't need a temporary register
3279 assert(sl>=0);
3280 assert(sh>=0);
3281 emit_andimm(shift,31,HOST_TEMPREG);
3282 if(opcode2[i]==0x14) // DSLLV
3283 {
3284 if(th>=0) emit_shl(sh,HOST_TEMPREG,th);
3285 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3286 emit_orrshr(sl,HOST_TEMPREG,th);
3287 emit_andimm(shift,31,HOST_TEMPREG);
3288 emit_testimm(shift,32);
3289 emit_shl(sl,HOST_TEMPREG,tl);
3290 if(th>=0) emit_cmovne_reg(tl,th);
3291 emit_cmovne_imm(0,tl);
3292 }
3293 if(opcode2[i]==0x16) // DSRLV
3294 {
3295 assert(th>=0);
3296 emit_shr(sl,HOST_TEMPREG,tl);
3297 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3298 emit_orrshl(sh,HOST_TEMPREG,tl);
3299 emit_andimm(shift,31,HOST_TEMPREG);
3300 emit_testimm(shift,32);
3301 emit_shr(sh,HOST_TEMPREG,th);
3302 emit_cmovne_reg(th,tl);
3303 if(real_th>=0) emit_cmovne_imm(0,th);
3304 }
3305 if(opcode2[i]==0x17) // DSRAV
3306 {
3307 assert(th>=0);
3308 emit_shr(sl,HOST_TEMPREG,tl);
3309 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3310 if(real_th>=0) {
3311 assert(temp>=0);
3312 emit_sarimm(th,31,temp);
3313 }
3314 emit_orrshl(sh,HOST_TEMPREG,tl);
3315 emit_andimm(shift,31,HOST_TEMPREG);
3316 emit_testimm(shift,32);
3317 emit_sar(sh,HOST_TEMPREG,th);
3318 emit_cmovne_reg(th,tl);
3319 if(real_th>=0) emit_cmovne_reg(temp,th);
3320 }
3321 }
3322 }
3323 }
3324 }
3325}
3326#define shift_assemble shift_assemble_arm
3327
3328void loadlr_assemble_arm(int i,struct regstat *i_regs)
3329{
3330 int s,th,tl,temp,temp2,addr,map=-1;
3331 int offset;
3332 int jaddr=0;
af4ee1fe 3333 int memtarget=0,c=0;
57871462 3334 u_int hr,reglist=0;
3335 th=get_reg(i_regs->regmap,rt1[i]|64);
3336 tl=get_reg(i_regs->regmap,rt1[i]);
3337 s=get_reg(i_regs->regmap,rs1[i]);
3338 temp=get_reg(i_regs->regmap,-1);
3339 temp2=get_reg(i_regs->regmap,FTEMP);
3340 addr=get_reg(i_regs->regmap,AGEN1+(i&1));
3341 assert(addr<0);
3342 offset=imm[i];
3343 for(hr=0;hr<HOST_REGS;hr++) {
3344 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
3345 }
3346 reglist|=1<<temp;
3347 if(offset||s<0||c) addr=temp2;
3348 else addr=s;
3349 if(s>=0) {
3350 c=(i_regs->wasconst>>s)&1;
af4ee1fe 3351 if(c) {
3352 memtarget=((signed int)(constmap[i][s]+offset))<(signed int)0x80000000+RAM_SIZE;
3353 if(using_tlb&&((signed int)(constmap[i][s]+offset))>=(signed int)0xC0000000) memtarget=1;
3354 }
57871462 3355 }
535d208a 3356 if(!using_tlb) {
3357 if(!c) {
3358 #ifdef RAM_OFFSET
3359 map=get_reg(i_regs->regmap,ROREG);
3360 if(map<0) emit_loadreg(ROREG,map=HOST_TEMPREG);
3361 #endif
3362 emit_shlimm(addr,3,temp);
3363 if (opcode[i]==0x22||opcode[i]==0x26) {
3364 emit_andimm(addr,0xFFFFFFFC,temp2); // LWL/LWR
57871462 3365 }else{
535d208a 3366 emit_andimm(addr,0xFFFFFFF8,temp2); // LDL/LDR
57871462 3367 }
535d208a 3368 emit_cmpimm(addr,RAM_SIZE);
3369 jaddr=(int)out;
3370 emit_jno(0);
3371 }
3372 else {
3373 if (opcode[i]==0x22||opcode[i]==0x26) {
3374 emit_movimm(((constmap[i][s]+offset)<<3)&24,temp); // LWL/LWR
3375 }else{
3376 emit_movimm(((constmap[i][s]+offset)<<3)&56,temp); // LDL/LDR
57871462 3377 }
57871462 3378 }
535d208a 3379 }else{ // using tlb
3380 int a;
3381 if(c) {
3382 a=-1;
3383 }else if (opcode[i]==0x22||opcode[i]==0x26) {
3384 a=0xFFFFFFFC; // LWL/LWR
3385 }else{
3386 a=0xFFFFFFF8; // LDL/LDR
3387 }
3388 map=get_reg(i_regs->regmap,TLREG);
3389 assert(map>=0);
3390 map=do_tlb_r(addr,temp2,map,0,a,c?-1:temp,c,constmap[i][s]+offset);
3391 if(c) {
3392 if (opcode[i]==0x22||opcode[i]==0x26) {
3393 emit_movimm(((constmap[i][s]+offset)<<3)&24,temp); // LWL/LWR
3394 }else{
3395 emit_movimm(((constmap[i][s]+offset)<<3)&56,temp); // LDL/LDR
57871462 3396 }
535d208a 3397 }
3398 do_tlb_r_branch(map,c,constmap[i][s]+offset,&jaddr);
3399 }
3400 if (opcode[i]==0x22||opcode[i]==0x26) { // LWL/LWR
3401 if(!c||memtarget) {
3402 //emit_readword_indexed((int)rdram-0x80000000,temp2,temp2);
3403 emit_readword_indexed_tlb(0,temp2,map,temp2);
3404 if(jaddr) add_stub(LOADW_STUB,jaddr,(int)out,i,temp2,(int)i_regs,ccadj[i],reglist);
3405 }
3406 else
3407 inline_readstub(LOADW_STUB,i,(constmap[i][s]+offset)&0xFFFFFFFC,i_regs->regmap,FTEMP,ccadj[i],reglist);
3408 if(rt1[i]) {
3409 assert(tl>=0);
57871462 3410 emit_andimm(temp,24,temp);
2002a1db 3411#ifdef BIG_ENDIAN_MIPS
3412 if (opcode[i]==0x26) // LWR
3413#else
3414 if (opcode[i]==0x22) // LWL
3415#endif
3416 emit_xorimm(temp,24,temp);
57871462 3417 emit_movimm(-1,HOST_TEMPREG);
3418 if (opcode[i]==0x26) {
3419 emit_shr(temp2,temp,temp2);
3420 emit_bic_lsr(tl,HOST_TEMPREG,temp,tl);
3421 }else{
3422 emit_shl(temp2,temp,temp2);
3423 emit_bic_lsl(tl,HOST_TEMPREG,temp,tl);
3424 }
3425 emit_or(temp2,tl,tl);
57871462 3426 }
535d208a 3427 //emit_storereg(rt1[i],tl); // DEBUG
3428 }
3429 if (opcode[i]==0x1A||opcode[i]==0x1B) { // LDL/LDR
3430 // FIXME: little endian
3431 int temp2h=get_reg(i_regs->regmap,FTEMP|64);
3432 if(!c||memtarget) {
3433 //if(th>=0) emit_readword_indexed((int)rdram-0x80000000,temp2,temp2h);
3434 //emit_readword_indexed((int)rdram-0x7FFFFFFC,temp2,temp2);
3435 emit_readdword_indexed_tlb(0,temp2,map,temp2h,temp2);
3436 if(jaddr) add_stub(LOADD_STUB,jaddr,(int)out,i,temp2,(int)i_regs,ccadj[i],reglist);
3437 }
3438 else
3439 inline_readstub(LOADD_STUB,i,(constmap[i][s]+offset)&0xFFFFFFF8,i_regs->regmap,FTEMP,ccadj[i],reglist);
3440 if(rt1[i]) {
3441 assert(th>=0);
3442 assert(tl>=0);
57871462 3443 emit_testimm(temp,32);
3444 emit_andimm(temp,24,temp);
3445 if (opcode[i]==0x1A) { // LDL
3446 emit_rsbimm(temp,32,HOST_TEMPREG);
3447 emit_shl(temp2h,temp,temp2h);
3448 emit_orrshr(temp2,HOST_TEMPREG,temp2h);
3449 emit_movimm(-1,HOST_TEMPREG);
3450 emit_shl(temp2,temp,temp2);
3451 emit_cmove_reg(temp2h,th);
3452 emit_biceq_lsl(tl,HOST_TEMPREG,temp,tl);
3453 emit_bicne_lsl(th,HOST_TEMPREG,temp,th);
3454 emit_orreq(temp2,tl,tl);
3455 emit_orrne(temp2,th,th);
3456 }
3457 if (opcode[i]==0x1B) { // LDR
3458 emit_xorimm(temp,24,temp);
3459 emit_rsbimm(temp,32,HOST_TEMPREG);
3460 emit_shr(temp2,temp,temp2);
3461 emit_orrshl(temp2h,HOST_TEMPREG,temp2);
3462 emit_movimm(-1,HOST_TEMPREG);
3463 emit_shr(temp2h,temp,temp2h);
3464 emit_cmovne_reg(temp2,tl);
3465 emit_bicne_lsr(th,HOST_TEMPREG,temp,th);
3466 emit_biceq_lsr(tl,HOST_TEMPREG,temp,tl);
3467 emit_orrne(temp2h,th,th);
3468 emit_orreq(temp2h,tl,tl);
3469 }
3470 }
3471 }
3472}
3473#define loadlr_assemble loadlr_assemble_arm
3474
3475void cop0_assemble(int i,struct regstat *i_regs)
3476{
3477 if(opcode2[i]==0) // MFC0
3478 {
3479 signed char t=get_reg(i_regs->regmap,rt1[i]);
3480 char copr=(source[i]>>11)&0x1f;
3481 //assert(t>=0); // Why does this happen? OOT is weird
f1b3b369 3482 if(t>=0&&rt1[i]!=0) {
7139f3c8 3483#ifdef MUPEN64
57871462 3484 emit_addimm(FP,(int)&fake_pc-(int)&dynarec_local,0);
3485 emit_movimm((source[i]>>11)&0x1f,1);
3486 emit_writeword(0,(int)&PC);
3487 emit_writebyte(1,(int)&(fake_pc.f.r.nrd));
3488 if(copr==9) {
3489 emit_readword((int)&last_count,ECX);
3490 emit_loadreg(CCREG,HOST_CCREG); // TODO: do proper reg alloc
3491 emit_add(HOST_CCREG,ECX,HOST_CCREG);
3492 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG);
3493 emit_writeword(HOST_CCREG,(int)&Count);
3494 }
3495 emit_call((int)MFC0);
3496 emit_readword((int)&readmem_dword,t);
7139f3c8 3497#else
3498 emit_readword((int)&reg_cop0+copr*4,t);
3499#endif
57871462 3500 }
3501 }
3502 else if(opcode2[i]==4) // MTC0
3503 {
3504 signed char s=get_reg(i_regs->regmap,rs1[i]);
3505 char copr=(source[i]>>11)&0x1f;
3506 assert(s>=0);
3507 emit_writeword(s,(int)&readmem_dword);
3508 wb_register(rs1[i],i_regs->regmap,i_regs->dirty,i_regs->is32);
fca1aef2 3509#ifdef MUPEN64
57871462 3510 emit_addimm(FP,(int)&fake_pc-(int)&dynarec_local,0);
3511 emit_movimm((source[i]>>11)&0x1f,1);
3512 emit_writeword(0,(int)&PC);
3513 emit_writebyte(1,(int)&(fake_pc.f.r.nrd));
7139f3c8 3514#endif
3515 if(copr==9||copr==11||copr==12||copr==13) {
57871462 3516 emit_readword((int)&last_count,ECX);
3517 emit_loadreg(CCREG,HOST_CCREG); // TODO: do proper reg alloc
3518 emit_add(HOST_CCREG,ECX,HOST_CCREG);
3519 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG);
3520 emit_writeword(HOST_CCREG,(int)&Count);
3521 }
3522 // What a mess. The status register (12) can enable interrupts,
3523 // so needs a special case to handle a pending interrupt.
3524 // The interrupt must be taken immediately, because a subsequent
3525 // instruction might disable interrupts again.
7139f3c8 3526 if(copr==12||copr==13) {
fca1aef2 3527#ifdef PCSX
3528 if (is_delayslot) {
3529 // burn cycles to cause cc_interrupt, which will
3530 // reschedule next_interupt. Relies on CCREG from above.
3531 assem_debug("MTC0 DS %d\n", copr);
3532 emit_writeword(HOST_CCREG,(int)&last_count);
3533 emit_movimm(0,HOST_CCREG);
3534 emit_storereg(CCREG,HOST_CCREG);
3535 emit_movimm(copr,0);
3536 emit_call((int)pcsx_mtc0_ds);
3537 return;
3538 }
3539#endif
57871462 3540 emit_movimm(start+i*4+4,0);
3541 emit_movimm(0,1);
3542 emit_writeword(0,(int)&pcaddr);
3543 emit_writeword(1,(int)&pending_exception);
3544 }
3545 //else if(copr==12&&is_delayslot) emit_call((int)MTC0_R12);
3546 //else
fca1aef2 3547#ifdef PCSX
3548 emit_movimm(copr,0);
3549 emit_call((int)pcsx_mtc0);
3550#else
57871462 3551 emit_call((int)MTC0);
fca1aef2 3552#endif
7139f3c8 3553 if(copr==9||copr==11||copr==12||copr==13) {
57871462 3554 emit_readword((int)&Count,HOST_CCREG);
3555 emit_readword((int)&next_interupt,ECX);
3556 emit_addimm(HOST_CCREG,-CLOCK_DIVIDER*ccadj[i],HOST_CCREG);
3557 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
3558 emit_writeword(ECX,(int)&last_count);
3559 emit_storereg(CCREG,HOST_CCREG);
3560 }
7139f3c8 3561 if(copr==12||copr==13) {
57871462 3562 assert(!is_delayslot);
3563 emit_readword((int)&pending_exception,14);
3564 }
3565 emit_loadreg(rs1[i],s);
3566 if(get_reg(i_regs->regmap,rs1[i]|64)>=0)
3567 emit_loadreg(rs1[i]|64,get_reg(i_regs->regmap,rs1[i]|64));
7139f3c8 3568 if(copr==12||copr==13) {
57871462 3569 emit_test(14,14);
3570 emit_jne((int)&do_interrupt);
3571 }
3572 cop1_usable=0;
3573 }
3574 else
3575 {
3576 assert(opcode2[i]==0x10);
3d624f89 3577#ifndef DISABLE_TLB
57871462 3578 if((source[i]&0x3f)==0x01) // TLBR
3579 emit_call((int)TLBR);
3580 if((source[i]&0x3f)==0x02) // TLBWI
3581 emit_call((int)TLBWI_new);
3582 if((source[i]&0x3f)==0x06) { // TLBWR
3583 // The TLB entry written by TLBWR is dependent on the count,
3584 // so update the cycle count
3585 emit_readword((int)&last_count,ECX);
3586 if(i_regs->regmap[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
3587 emit_add(HOST_CCREG,ECX,HOST_CCREG);
3588 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG);
3589 emit_writeword(HOST_CCREG,(int)&Count);
3590 emit_call((int)TLBWR_new);
3591 }
3592 if((source[i]&0x3f)==0x08) // TLBP
3593 emit_call((int)TLBP);
3d624f89 3594#endif
576bbd8f 3595#ifdef PCSX
3596 if((source[i]&0x3f)==0x10) // RFE
3597 {
3598 emit_readword((int)&Status,0);
3599 emit_andimm(0,0x3c,1);
3600 emit_andimm(0,~0xf,0);
3601 emit_orrshr_imm(1,2,0);
3602 emit_writeword(0,(int)&Status);
3603 }
3604#else
57871462 3605 if((source[i]&0x3f)==0x18) // ERET
3606 {
3607 int count=ccadj[i];
3608 if(i_regs->regmap[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
3609 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*count,HOST_CCREG); // TODO: Should there be an extra cycle here?
3610 emit_jmp((int)jump_eret);
3611 }
576bbd8f 3612#endif
57871462 3613 }
3614}
3615
b9b61529 3616static void cop2_get_dreg(u_int copr,signed char tl,signed char temp)
3617{
3618 switch (copr) {
3619 case 1:
3620 case 3:
3621 case 5:
3622 case 8:
3623 case 9:
3624 case 10:
3625 case 11:
3626 emit_readword((int)&reg_cop2d[copr],tl);
3627 emit_signextend16(tl,tl);
3628 emit_writeword(tl,(int)&reg_cop2d[copr]); // hmh
3629 break;
3630 case 7:
3631 case 16:
3632 case 17:
3633 case 18:
3634 case 19:
3635 emit_readword((int)&reg_cop2d[copr],tl);
3636 emit_andimm(tl,0xffff,tl);
3637 emit_writeword(tl,(int)&reg_cop2d[copr]);
3638 break;
3639 case 15:
3640 emit_readword((int)&reg_cop2d[14],tl); // SXY2
3641 emit_writeword(tl,(int)&reg_cop2d[copr]);
3642 break;
3643 case 28:
b9b61529 3644 case 29:
3645 emit_readword((int)&reg_cop2d[9],temp);
3646 emit_testimm(temp,0x8000); // do we need this?
3647 emit_andimm(temp,0xf80,temp);
3648 emit_andne_imm(temp,0,temp);
f70d384d 3649 emit_shrimm(temp,7,tl);
b9b61529 3650 emit_readword((int)&reg_cop2d[10],temp);
3651 emit_testimm(temp,0x8000);
3652 emit_andimm(temp,0xf80,temp);
3653 emit_andne_imm(temp,0,temp);
f70d384d 3654 emit_orrshr_imm(temp,2,tl);
b9b61529 3655 emit_readword((int)&reg_cop2d[11],temp);
3656 emit_testimm(temp,0x8000);
3657 emit_andimm(temp,0xf80,temp);
3658 emit_andne_imm(temp,0,temp);
f70d384d 3659 emit_orrshl_imm(temp,3,tl);
b9b61529 3660 emit_writeword(tl,(int)&reg_cop2d[copr]);
3661 break;
3662 default:
3663 emit_readword((int)&reg_cop2d[copr],tl);
3664 break;
3665 }
3666}
3667
3668static void cop2_put_dreg(u_int copr,signed char sl,signed char temp)
3669{
3670 switch (copr) {
3671 case 15:
3672 emit_readword((int)&reg_cop2d[13],temp); // SXY1
3673 emit_writeword(sl,(int)&reg_cop2d[copr]);
3674 emit_writeword(temp,(int)&reg_cop2d[12]); // SXY0
3675 emit_readword((int)&reg_cop2d[14],temp); // SXY2
3676 emit_writeword(sl,(int)&reg_cop2d[14]);
3677 emit_writeword(temp,(int)&reg_cop2d[13]); // SXY1
3678 break;
3679 case 28:
3680 emit_andimm(sl,0x001f,temp);
f70d384d 3681 emit_shlimm(temp,7,temp);
b9b61529 3682 emit_writeword(temp,(int)&reg_cop2d[9]);
3683 emit_andimm(sl,0x03e0,temp);
f70d384d 3684 emit_shlimm(temp,2,temp);
b9b61529 3685 emit_writeword(temp,(int)&reg_cop2d[10]);
3686 emit_andimm(sl,0x7c00,temp);
f70d384d 3687 emit_shrimm(temp,3,temp);
b9b61529 3688 emit_writeword(temp,(int)&reg_cop2d[11]);
3689 emit_writeword(sl,(int)&reg_cop2d[28]);
3690 break;
3691 case 30:
3692 emit_movs(sl,temp);
3693 emit_mvnmi(temp,temp);
3694 emit_clz(temp,temp);
3695 emit_writeword(sl,(int)&reg_cop2d[30]);
3696 emit_writeword(temp,(int)&reg_cop2d[31]);
3697 break;
b9b61529 3698 case 31:
3699 break;
3700 default:
3701 emit_writeword(sl,(int)&reg_cop2d[copr]);
3702 break;
3703 }
3704}
3705
3706void cop2_assemble(int i,struct regstat *i_regs)
3707{
3708 u_int copr=(source[i]>>11)&0x1f;
3709 signed char temp=get_reg(i_regs->regmap,-1);
3710 if (opcode2[i]==0) { // MFC2
3711 signed char tl=get_reg(i_regs->regmap,rt1[i]);
f1b3b369 3712 if(tl>=0&&rt1[i]!=0)
b9b61529 3713 cop2_get_dreg(copr,tl,temp);
3714 }
3715 else if (opcode2[i]==4) { // MTC2
3716 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3717 cop2_put_dreg(copr,sl,temp);
3718 }
3719 else if (opcode2[i]==2) // CFC2
3720 {
3721 signed char tl=get_reg(i_regs->regmap,rt1[i]);
f1b3b369 3722 if(tl>=0&&rt1[i]!=0)
b9b61529 3723 emit_readword((int)&reg_cop2c[copr],tl);
3724 }
3725 else if (opcode2[i]==6) // CTC2
3726 {
3727 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3728 switch(copr) {
3729 case 4:
3730 case 12:
3731 case 20:
3732 case 26:
3733 case 27:
3734 case 29:
3735 case 30:
3736 emit_signextend16(sl,temp);
3737 break;
3738 case 31:
3739 //value = value & 0x7ffff000;
3740 //if (value & 0x7f87e000) value |= 0x80000000;
3741 emit_shrimm(sl,12,temp);
3742 emit_shlimm(temp,12,temp);
3743 emit_testimm(temp,0x7f000000);
3744 emit_testeqimm(temp,0x00870000);
3745 emit_testeqimm(temp,0x0000e000);
3746 emit_orrne_imm(temp,0x80000000,temp);
3747 break;
3748 default:
3749 temp=sl;
3750 break;
3751 }
3752 emit_writeword(temp,(int)&reg_cop2c[copr]);
3753 assert(sl>=0);
3754 }
3755}
3756
3757void c2op_assemble(int i,struct regstat *i_regs)
3758{
3759 signed char temp=get_reg(i_regs->regmap,-1);
3760 u_int c2op=source[i]&0x3f;
3761 u_int hr,reglist=0;
3762 for(hr=0;hr<HOST_REGS;hr++) {
3763 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
3764 }
3765 if(i==0||itype[i-1]!=C2OP)
3766 save_regs(reglist);
3767
3768 if (gte_handlers[c2op]!=NULL) {
3769 int cc=get_reg(i_regs->regmap,CCREG);
009faf24 3770 emit_movimm(source[i],1); // opcode
b9b61529 3771 if (cc>=0&&gte_cycletab[c2op])
009faf24 3772 emit_addimm(cc,gte_cycletab[c2op]/2,cc); // XXX: could just adjust ccadj?
3773 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0); // cop2 regs
3774 emit_writeword(1,(int)&psxRegs.code);
b9b61529 3775 emit_call((int)gte_handlers[c2op]);
3776 }
3777
3778 if(i>=slen-1||itype[i+1]!=C2OP)
3779 restore_regs(reglist);
3780}
3781
3782void cop1_unusable(int i,struct regstat *i_regs)
3d624f89 3783{
3784 // XXX: should just just do the exception instead
3785 if(!cop1_usable) {
3786 int jaddr=(int)out;
3787 emit_jmp(0);
3788 add_stub(FP_STUB,jaddr,(int)out,i,0,(int)i_regs,is_delayslot,0);
3789 cop1_usable=1;
3790 }
3791}
3792
57871462 3793void cop1_assemble(int i,struct regstat *i_regs)
3794{
3d624f89 3795#ifndef DISABLE_COP1
57871462 3796 // Check cop1 unusable
3797 if(!cop1_usable) {
3798 signed char rs=get_reg(i_regs->regmap,CSREG);
3799 assert(rs>=0);
3800 emit_testimm(rs,0x20000000);
3801 int jaddr=(int)out;
3802 emit_jeq(0);
3803 add_stub(FP_STUB,jaddr,(int)out,i,rs,(int)i_regs,is_delayslot,0);
3804 cop1_usable=1;
3805 }
3806 if (opcode2[i]==0) { // MFC1
3807 signed char tl=get_reg(i_regs->regmap,rt1[i]);
3808 if(tl>=0) {
3809 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],tl);
3810 emit_readword_indexed(0,tl,tl);
3811 }
3812 }
3813 else if (opcode2[i]==1) { // DMFC1
3814 signed char tl=get_reg(i_regs->regmap,rt1[i]);
3815 signed char th=get_reg(i_regs->regmap,rt1[i]|64);
3816 if(tl>=0) {
3817 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],tl);
3818 if(th>=0) emit_readword_indexed(4,tl,th);
3819 emit_readword_indexed(0,tl,tl);
3820 }
3821 }
3822 else if (opcode2[i]==4) { // MTC1
3823 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3824 signed char temp=get_reg(i_regs->regmap,-1);
3825 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
3826 emit_writeword_indexed(sl,0,temp);
3827 }
3828 else if (opcode2[i]==5) { // DMTC1
3829 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3830 signed char sh=rs1[i]>0?get_reg(i_regs->regmap,rs1[i]|64):sl;
3831 signed char temp=get_reg(i_regs->regmap,-1);
3832 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
3833 emit_writeword_indexed(sh,4,temp);
3834 emit_writeword_indexed(sl,0,temp);
3835 }
3836 else if (opcode2[i]==2) // CFC1
3837 {
3838 signed char tl=get_reg(i_regs->regmap,rt1[i]);
3839 if(tl>=0) {
3840 u_int copr=(source[i]>>11)&0x1f;
3841 if(copr==0) emit_readword((int)&FCR0,tl);
3842 if(copr==31) emit_readword((int)&FCR31,tl);
3843 }
3844 }
3845 else if (opcode2[i]==6) // CTC1
3846 {
3847 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3848 u_int copr=(source[i]>>11)&0x1f;
3849 assert(sl>=0);
3850 if(copr==31)
3851 {
3852 emit_writeword(sl,(int)&FCR31);
3853 // Set the rounding mode
3854 //FIXME
3855 //char temp=get_reg(i_regs->regmap,-1);
3856 //emit_andimm(sl,3,temp);
3857 //emit_fldcw_indexed((int)&rounding_modes,temp);
3858 }
3859 }
3d624f89 3860#else
3861 cop1_unusable(i, i_regs);
3862#endif
57871462 3863}
3864
3865void fconv_assemble_arm(int i,struct regstat *i_regs)
3866{
3d624f89 3867#ifndef DISABLE_COP1
57871462 3868 signed char temp=get_reg(i_regs->regmap,-1);
3869 assert(temp>=0);
3870 // Check cop1 unusable
3871 if(!cop1_usable) {
3872 signed char rs=get_reg(i_regs->regmap,CSREG);
3873 assert(rs>=0);
3874 emit_testimm(rs,0x20000000);
3875 int jaddr=(int)out;
3876 emit_jeq(0);
3877 add_stub(FP_STUB,jaddr,(int)out,i,rs,(int)i_regs,is_delayslot,0);
3878 cop1_usable=1;
3879 }
3880
3881 #if(defined(__VFP_FP__) && !defined(__SOFTFP__))
3882 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0d) { // trunc_w_s
3883 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
3884 emit_flds(temp,15);
3885 emit_ftosizs(15,15); // float->int, truncate
3886 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f))
3887 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
3888 emit_fsts(15,temp);
3889 return;
3890 }
3891 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0d) { // trunc_w_d
3892 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
3893 emit_vldr(temp,7);
3894 emit_ftosizd(7,13); // double->int, truncate
3895 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
3896 emit_fsts(13,temp);
3897 return;
3898 }
3899
3900 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x20) { // cvt_s_w
3901 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
3902 emit_flds(temp,13);
3903 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f))
3904 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
3905 emit_fsitos(13,15);
3906 emit_fsts(15,temp);
3907 return;
3908 }
3909 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x21) { // cvt_d_w
3910 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
3911 emit_flds(temp,13);
3912 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
3913 emit_fsitod(13,7);
3914 emit_vstr(7,temp);
3915 return;
3916 }
3917
3918 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x21) { // cvt_d_s
3919 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
3920 emit_flds(temp,13);
3921 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
3922 emit_fcvtds(13,7);
3923 emit_vstr(7,temp);
3924 return;
3925 }
3926 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x20) { // cvt_s_d
3927 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
3928 emit_vldr(temp,7);
3929 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
3930 emit_fcvtsd(7,13);
3931 emit_fsts(13,temp);
3932 return;
3933 }
3934 #endif
3935
3936 // C emulation code
3937
3938 u_int hr,reglist=0;
3939 for(hr=0;hr<HOST_REGS;hr++) {
3940 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
3941 }
3942 save_regs(reglist);
3943
3944 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x20) {
3945 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3946 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3947 emit_call((int)cvt_s_w);
3948 }
3949 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x21) {
3950 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3951 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3952 emit_call((int)cvt_d_w);
3953 }
3954 if(opcode2[i]==0x15&&(source[i]&0x3f)==0x20) {
3955 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3956 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3957 emit_call((int)cvt_s_l);
3958 }
3959 if(opcode2[i]==0x15&&(source[i]&0x3f)==0x21) {
3960 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3961 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3962 emit_call((int)cvt_d_l);
3963 }
3964
3965 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x21) {
3966 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3967 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3968 emit_call((int)cvt_d_s);
3969 }
3970 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x24) {
3971 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3972 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3973 emit_call((int)cvt_w_s);
3974 }
3975 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x25) {
3976 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3977 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3978 emit_call((int)cvt_l_s);
3979 }
3980
3981 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x20) {
3982 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3983 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3984 emit_call((int)cvt_s_d);
3985 }
3986 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x24) {
3987 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3988 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3989 emit_call((int)cvt_w_d);
3990 }
3991 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x25) {
3992 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3993 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3994 emit_call((int)cvt_l_d);
3995 }
3996
3997 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x08) {
3998 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3999 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4000 emit_call((int)round_l_s);
4001 }
4002 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x09) {
4003 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4004 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4005 emit_call((int)trunc_l_s);
4006 }
4007 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0a) {
4008 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4009 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4010 emit_call((int)ceil_l_s);
4011 }
4012 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0b) {
4013 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4014 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4015 emit_call((int)floor_l_s);
4016 }
4017 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0c) {
4018 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4019 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4020 emit_call((int)round_w_s);
4021 }
4022 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0d) {
4023 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4024 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4025 emit_call((int)trunc_w_s);
4026 }
4027 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0e) {
4028 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4029 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4030 emit_call((int)ceil_w_s);
4031 }
4032 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0f) {
4033 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4034 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4035 emit_call((int)floor_w_s);
4036 }
4037
4038 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x08) {
4039 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4040 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4041 emit_call((int)round_l_d);
4042 }
4043 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x09) {
4044 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4045 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4046 emit_call((int)trunc_l_d);
4047 }
4048 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0a) {
4049 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4050 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4051 emit_call((int)ceil_l_d);
4052 }
4053 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0b) {
4054 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4055 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4056 emit_call((int)floor_l_d);
4057 }
4058 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0c) {
4059 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4060 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4061 emit_call((int)round_w_d);
4062 }
4063 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0d) {
4064 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4065 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4066 emit_call((int)trunc_w_d);
4067 }
4068 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0e) {
4069 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4070 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4071 emit_call((int)ceil_w_d);
4072 }
4073 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0f) {
4074 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4075 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4076 emit_call((int)floor_w_d);
4077 }
4078
4079 restore_regs(reglist);
3d624f89 4080#else
4081 cop1_unusable(i, i_regs);
4082#endif
57871462 4083}
4084#define fconv_assemble fconv_assemble_arm
4085
4086void fcomp_assemble(int i,struct regstat *i_regs)
4087{
3d624f89 4088#ifndef DISABLE_COP1
57871462 4089 signed char fs=get_reg(i_regs->regmap,FSREG);
4090 signed char temp=get_reg(i_regs->regmap,-1);
4091 assert(temp>=0);
4092 // Check cop1 unusable
4093 if(!cop1_usable) {
4094 signed char cs=get_reg(i_regs->regmap,CSREG);
4095 assert(cs>=0);
4096 emit_testimm(cs,0x20000000);
4097 int jaddr=(int)out;
4098 emit_jeq(0);
4099 add_stub(FP_STUB,jaddr,(int)out,i,cs,(int)i_regs,is_delayslot,0);
4100 cop1_usable=1;
4101 }
4102
4103 if((source[i]&0x3f)==0x30) {
4104 emit_andimm(fs,~0x800000,fs);
4105 return;
4106 }
4107
4108 if((source[i]&0x3e)==0x38) {
4109 // sf/ngle - these should throw exceptions for NaNs
4110 emit_andimm(fs,~0x800000,fs);
4111 return;
4112 }
4113
4114 #if(defined(__VFP_FP__) && !defined(__SOFTFP__))
4115 if(opcode2[i]==0x10) {
4116 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4117 emit_readword((int)&reg_cop1_simple[(source[i]>>16)&0x1f],HOST_TEMPREG);
4118 emit_orimm(fs,0x800000,fs);
4119 emit_flds(temp,14);
4120 emit_flds(HOST_TEMPREG,15);
4121 emit_fcmps(14,15);
4122 emit_fmstat();
4123 if((source[i]&0x3f)==0x31) emit_bicvc_imm(fs,0x800000,fs); // c_un_s
4124 if((source[i]&0x3f)==0x32) emit_bicne_imm(fs,0x800000,fs); // c_eq_s
4125 if((source[i]&0x3f)==0x33) {emit_bicne_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ueq_s
4126 if((source[i]&0x3f)==0x34) emit_biccs_imm(fs,0x800000,fs); // c_olt_s
4127 if((source[i]&0x3f)==0x35) {emit_biccs_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ult_s
4128 if((source[i]&0x3f)==0x36) emit_bichi_imm(fs,0x800000,fs); // c_ole_s
4129 if((source[i]&0x3f)==0x37) {emit_bichi_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ule_s
4130 if((source[i]&0x3f)==0x3a) emit_bicne_imm(fs,0x800000,fs); // c_seq_s
4131 if((source[i]&0x3f)==0x3b) emit_bicne_imm(fs,0x800000,fs); // c_ngl_s
4132 if((source[i]&0x3f)==0x3c) emit_biccs_imm(fs,0x800000,fs); // c_lt_s
4133 if((source[i]&0x3f)==0x3d) emit_biccs_imm(fs,0x800000,fs); // c_nge_s
4134 if((source[i]&0x3f)==0x3e) emit_bichi_imm(fs,0x800000,fs); // c_le_s
4135 if((source[i]&0x3f)==0x3f) emit_bichi_imm(fs,0x800000,fs); // c_ngt_s
4136 return;
4137 }
4138 if(opcode2[i]==0x11) {
4139 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
4140 emit_readword((int)&reg_cop1_double[(source[i]>>16)&0x1f],HOST_TEMPREG);
4141 emit_orimm(fs,0x800000,fs);
4142 emit_vldr(temp,6);
4143 emit_vldr(HOST_TEMPREG,7);
4144 emit_fcmpd(6,7);
4145 emit_fmstat();
4146 if((source[i]&0x3f)==0x31) emit_bicvc_imm(fs,0x800000,fs); // c_un_d
4147 if((source[i]&0x3f)==0x32) emit_bicne_imm(fs,0x800000,fs); // c_eq_d
4148 if((source[i]&0x3f)==0x33) {emit_bicne_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ueq_d
4149 if((source[i]&0x3f)==0x34) emit_biccs_imm(fs,0x800000,fs); // c_olt_d
4150 if((source[i]&0x3f)==0x35) {emit_biccs_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ult_d
4151 if((source[i]&0x3f)==0x36) emit_bichi_imm(fs,0x800000,fs); // c_ole_d
4152 if((source[i]&0x3f)==0x37) {emit_bichi_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ule_d
4153 if((source[i]&0x3f)==0x3a) emit_bicne_imm(fs,0x800000,fs); // c_seq_d
4154 if((source[i]&0x3f)==0x3b) emit_bicne_imm(fs,0x800000,fs); // c_ngl_d
4155 if((source[i]&0x3f)==0x3c) emit_biccs_imm(fs,0x800000,fs); // c_lt_d
4156 if((source[i]&0x3f)==0x3d) emit_biccs_imm(fs,0x800000,fs); // c_nge_d
4157 if((source[i]&0x3f)==0x3e) emit_bichi_imm(fs,0x800000,fs); // c_le_d
4158 if((source[i]&0x3f)==0x3f) emit_bichi_imm(fs,0x800000,fs); // c_ngt_d
4159 return;
4160 }
4161 #endif
4162
4163 // C only
4164
4165 u_int hr,reglist=0;
4166 for(hr=0;hr<HOST_REGS;hr++) {
4167 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
4168 }
4169 reglist&=~(1<<fs);
4170 save_regs(reglist);
4171 if(opcode2[i]==0x10) {
4172 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4173 emit_readword((int)&reg_cop1_simple[(source[i]>>16)&0x1f],ARG2_REG);
4174 if((source[i]&0x3f)==0x30) emit_call((int)c_f_s);
4175 if((source[i]&0x3f)==0x31) emit_call((int)c_un_s);
4176 if((source[i]&0x3f)==0x32) emit_call((int)c_eq_s);
4177 if((source[i]&0x3f)==0x33) emit_call((int)c_ueq_s);
4178 if((source[i]&0x3f)==0x34) emit_call((int)c_olt_s);
4179 if((source[i]&0x3f)==0x35) emit_call((int)c_ult_s);
4180 if((source[i]&0x3f)==0x36) emit_call((int)c_ole_s);
4181 if((source[i]&0x3f)==0x37) emit_call((int)c_ule_s);
4182 if((source[i]&0x3f)==0x38) emit_call((int)c_sf_s);
4183 if((source[i]&0x3f)==0x39) emit_call((int)c_ngle_s);
4184 if((source[i]&0x3f)==0x3a) emit_call((int)c_seq_s);
4185 if((source[i]&0x3f)==0x3b) emit_call((int)c_ngl_s);
4186 if((source[i]&0x3f)==0x3c) emit_call((int)c_lt_s);
4187 if((source[i]&0x3f)==0x3d) emit_call((int)c_nge_s);
4188 if((source[i]&0x3f)==0x3e) emit_call((int)c_le_s);
4189 if((source[i]&0x3f)==0x3f) emit_call((int)c_ngt_s);
4190 }
4191 if(opcode2[i]==0x11) {
4192 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4193 emit_readword((int)&reg_cop1_double[(source[i]>>16)&0x1f],ARG2_REG);
4194 if((source[i]&0x3f)==0x30) emit_call((int)c_f_d);
4195 if((source[i]&0x3f)==0x31) emit_call((int)c_un_d);
4196 if((source[i]&0x3f)==0x32) emit_call((int)c_eq_d);
4197 if((source[i]&0x3f)==0x33) emit_call((int)c_ueq_d);
4198 if((source[i]&0x3f)==0x34) emit_call((int)c_olt_d);
4199 if((source[i]&0x3f)==0x35) emit_call((int)c_ult_d);
4200 if((source[i]&0x3f)==0x36) emit_call((int)c_ole_d);
4201 if((source[i]&0x3f)==0x37) emit_call((int)c_ule_d);
4202 if((source[i]&0x3f)==0x38) emit_call((int)c_sf_d);
4203 if((source[i]&0x3f)==0x39) emit_call((int)c_ngle_d);
4204 if((source[i]&0x3f)==0x3a) emit_call((int)c_seq_d);
4205 if((source[i]&0x3f)==0x3b) emit_call((int)c_ngl_d);
4206 if((source[i]&0x3f)==0x3c) emit_call((int)c_lt_d);
4207 if((source[i]&0x3f)==0x3d) emit_call((int)c_nge_d);
4208 if((source[i]&0x3f)==0x3e) emit_call((int)c_le_d);
4209 if((source[i]&0x3f)==0x3f) emit_call((int)c_ngt_d);
4210 }
4211 restore_regs(reglist);
4212 emit_loadreg(FSREG,fs);
3d624f89 4213#else
4214 cop1_unusable(i, i_regs);
4215#endif
57871462 4216}
4217
4218void float_assemble(int i,struct regstat *i_regs)
4219{
3d624f89 4220#ifndef DISABLE_COP1
57871462 4221 signed char temp=get_reg(i_regs->regmap,-1);
4222 assert(temp>=0);
4223 // Check cop1 unusable
4224 if(!cop1_usable) {
4225 signed char cs=get_reg(i_regs->regmap,CSREG);
4226 assert(cs>=0);
4227 emit_testimm(cs,0x20000000);
4228 int jaddr=(int)out;
4229 emit_jeq(0);
4230 add_stub(FP_STUB,jaddr,(int)out,i,cs,(int)i_regs,is_delayslot,0);
4231 cop1_usable=1;
4232 }
4233
4234 #if(defined(__VFP_FP__) && !defined(__SOFTFP__))
4235 if((source[i]&0x3f)==6) // mov
4236 {
4237 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4238 if(opcode2[i]==0x10) {
4239 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4240 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],HOST_TEMPREG);
4241 emit_readword_indexed(0,temp,temp);
4242 emit_writeword_indexed(temp,0,HOST_TEMPREG);
4243 }
4244 if(opcode2[i]==0x11) {
4245 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
4246 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],HOST_TEMPREG);
4247 emit_vldr(temp,7);
4248 emit_vstr(7,HOST_TEMPREG);
4249 }
4250 }
4251 return;
4252 }
4253
4254 if((source[i]&0x3f)>3)
4255 {
4256 if(opcode2[i]==0x10) {
4257 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4258 emit_flds(temp,15);
4259 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4260 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
4261 }
4262 if((source[i]&0x3f)==4) // sqrt
4263 emit_fsqrts(15,15);
4264 if((source[i]&0x3f)==5) // abs
4265 emit_fabss(15,15);
4266 if((source[i]&0x3f)==7) // neg
4267 emit_fnegs(15,15);
4268 emit_fsts(15,temp);
4269 }
4270 if(opcode2[i]==0x11) {
4271 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
4272 emit_vldr(temp,7);
4273 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4274 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
4275 }
4276 if((source[i]&0x3f)==4) // sqrt
4277 emit_fsqrtd(7,7);
4278 if((source[i]&0x3f)==5) // abs
4279 emit_fabsd(7,7);
4280 if((source[i]&0x3f)==7) // neg
4281 emit_fnegd(7,7);
4282 emit_vstr(7,temp);
4283 }
4284 return;
4285 }
4286 if((source[i]&0x3f)<4)
4287 {
4288 if(opcode2[i]==0x10) {
4289 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4290 }
4291 if(opcode2[i]==0x11) {
4292 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
4293 }
4294 if(((source[i]>>11)&0x1f)!=((source[i]>>16)&0x1f)) {
4295 if(opcode2[i]==0x10) {
4296 emit_readword((int)&reg_cop1_simple[(source[i]>>16)&0x1f],HOST_TEMPREG);
4297 emit_flds(temp,15);
4298 emit_flds(HOST_TEMPREG,13);
4299 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4300 if(((source[i]>>16)&0x1f)!=((source[i]>>6)&0x1f)) {
4301 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
4302 }
4303 }
4304 if((source[i]&0x3f)==0) emit_fadds(15,13,15);
4305 if((source[i]&0x3f)==1) emit_fsubs(15,13,15);
4306 if((source[i]&0x3f)==2) emit_fmuls(15,13,15);
4307 if((source[i]&0x3f)==3) emit_fdivs(15,13,15);
4308 if(((source[i]>>16)&0x1f)==((source[i]>>6)&0x1f)) {
4309 emit_fsts(15,HOST_TEMPREG);
4310 }else{
4311 emit_fsts(15,temp);
4312 }
4313 }
4314 else if(opcode2[i]==0x11) {
4315 emit_readword((int)&reg_cop1_double[(source[i]>>16)&0x1f],HOST_TEMPREG);
4316 emit_vldr(temp,7);
4317 emit_vldr(HOST_TEMPREG,6);
4318 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4319 if(((source[i]>>16)&0x1f)!=((source[i]>>6)&0x1f)) {
4320 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
4321 }
4322 }
4323 if((source[i]&0x3f)==0) emit_faddd(7,6,7);
4324 if((source[i]&0x3f)==1) emit_fsubd(7,6,7);
4325 if((source[i]&0x3f)==2) emit_fmuld(7,6,7);
4326 if((source[i]&0x3f)==3) emit_fdivd(7,6,7);
4327 if(((source[i]>>16)&0x1f)==((source[i]>>6)&0x1f)) {
4328 emit_vstr(7,HOST_TEMPREG);
4329 }else{
4330 emit_vstr(7,temp);
4331 }
4332 }
4333 }
4334 else {
4335 if(opcode2[i]==0x10) {
4336 emit_flds(temp,15);
4337 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4338 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
4339 }
4340 if((source[i]&0x3f)==0) emit_fadds(15,15,15);
4341 if((source[i]&0x3f)==1) emit_fsubs(15,15,15);
4342 if((source[i]&0x3f)==2) emit_fmuls(15,15,15);
4343 if((source[i]&0x3f)==3) emit_fdivs(15,15,15);
4344 emit_fsts(15,temp);
4345 }
4346 else if(opcode2[i]==0x11) {
4347 emit_vldr(temp,7);
4348 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4349 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
4350 }
4351 if((source[i]&0x3f)==0) emit_faddd(7,7,7);
4352 if((source[i]&0x3f)==1) emit_fsubd(7,7,7);
4353 if((source[i]&0x3f)==2) emit_fmuld(7,7,7);
4354 if((source[i]&0x3f)==3) emit_fdivd(7,7,7);
4355 emit_vstr(7,temp);
4356 }
4357 }
4358 return;
4359 }
4360 #endif
4361
4362 u_int hr,reglist=0;
4363 for(hr=0;hr<HOST_REGS;hr++) {
4364 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
4365 }
4366 if(opcode2[i]==0x10) { // Single precision
4367 save_regs(reglist);
4368 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4369 if((source[i]&0x3f)<4) {
4370 emit_readword((int)&reg_cop1_simple[(source[i]>>16)&0x1f],ARG2_REG);
4371 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG3_REG);
4372 }else{
4373 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4374 }
4375 switch(source[i]&0x3f)
4376 {
4377 case 0x00: emit_call((int)add_s);break;
4378 case 0x01: emit_call((int)sub_s);break;
4379 case 0x02: emit_call((int)mul_s);break;
4380 case 0x03: emit_call((int)div_s);break;
4381 case 0x04: emit_call((int)sqrt_s);break;
4382 case 0x05: emit_call((int)abs_s);break;
4383 case 0x06: emit_call((int)mov_s);break;
4384 case 0x07: emit_call((int)neg_s);break;
4385 }
4386 restore_regs(reglist);
4387 }
4388 if(opcode2[i]==0x11) { // Double precision
4389 save_regs(reglist);
4390 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4391 if((source[i]&0x3f)<4) {
4392 emit_readword((int)&reg_cop1_double[(source[i]>>16)&0x1f],ARG2_REG);
4393 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG3_REG);
4394 }else{
4395 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4396 }
4397 switch(source[i]&0x3f)
4398 {
4399 case 0x00: emit_call((int)add_d);break;
4400 case 0x01: emit_call((int)sub_d);break;
4401 case 0x02: emit_call((int)mul_d);break;
4402 case 0x03: emit_call((int)div_d);break;
4403 case 0x04: emit_call((int)sqrt_d);break;
4404 case 0x05: emit_call((int)abs_d);break;
4405 case 0x06: emit_call((int)mov_d);break;
4406 case 0x07: emit_call((int)neg_d);break;
4407 }
4408 restore_regs(reglist);
4409 }
3d624f89 4410#else
4411 cop1_unusable(i, i_regs);
4412#endif
57871462 4413}
4414
4415void multdiv_assemble_arm(int i,struct regstat *i_regs)
4416{
4417 // case 0x18: MULT
4418 // case 0x19: MULTU
4419 // case 0x1A: DIV
4420 // case 0x1B: DIVU
4421 // case 0x1C: DMULT
4422 // case 0x1D: DMULTU
4423 // case 0x1E: DDIV
4424 // case 0x1F: DDIVU
4425 if(rs1[i]&&rs2[i])
4426 {
4427 if((opcode2[i]&4)==0) // 32-bit
4428 {
4429 if(opcode2[i]==0x18) // MULT
4430 {
4431 signed char m1=get_reg(i_regs->regmap,rs1[i]);
4432 signed char m2=get_reg(i_regs->regmap,rs2[i]);
4433 signed char hi=get_reg(i_regs->regmap,HIREG);
4434 signed char lo=get_reg(i_regs->regmap,LOREG);
4435 assert(m1>=0);
4436 assert(m2>=0);
4437 assert(hi>=0);
4438 assert(lo>=0);
4439 emit_smull(m1,m2,hi,lo);
4440 }
4441 if(opcode2[i]==0x19) // MULTU
4442 {
4443 signed char m1=get_reg(i_regs->regmap,rs1[i]);
4444 signed char m2=get_reg(i_regs->regmap,rs2[i]);
4445 signed char hi=get_reg(i_regs->regmap,HIREG);
4446 signed char lo=get_reg(i_regs->regmap,LOREG);
4447 assert(m1>=0);
4448 assert(m2>=0);
4449 assert(hi>=0);
4450 assert(lo>=0);
4451 emit_umull(m1,m2,hi,lo);
4452 }
4453 if(opcode2[i]==0x1A) // DIV
4454 {
4455 signed char d1=get_reg(i_regs->regmap,rs1[i]);
4456 signed char d2=get_reg(i_regs->regmap,rs2[i]);
4457 assert(d1>=0);
4458 assert(d2>=0);
4459 signed char quotient=get_reg(i_regs->regmap,LOREG);
4460 signed char remainder=get_reg(i_regs->regmap,HIREG);
4461 assert(quotient>=0);
4462 assert(remainder>=0);
4463 emit_movs(d1,remainder);
4464 emit_negmi(remainder,remainder);
4465 emit_movs(d2,HOST_TEMPREG);
4466 emit_jeq((int)out+52); // Division by zero
4467 emit_negmi(HOST_TEMPREG,HOST_TEMPREG);
4468 emit_clz(HOST_TEMPREG,quotient);
4469 emit_shl(HOST_TEMPREG,quotient,HOST_TEMPREG);
4470 emit_orimm(quotient,1<<31,quotient);
4471 emit_shr(quotient,quotient,quotient);
4472 emit_cmp(remainder,HOST_TEMPREG);
4473 emit_subcs(remainder,HOST_TEMPREG,remainder);
4474 emit_adcs(quotient,quotient,quotient);
4475 emit_shrimm(HOST_TEMPREG,1,HOST_TEMPREG);
4476 emit_jcc((int)out-16); // -4
4477 emit_teq(d1,d2);
4478 emit_negmi(quotient,quotient);
4479 emit_test(d1,d1);
4480 emit_negmi(remainder,remainder);
4481 }
4482 if(opcode2[i]==0x1B) // DIVU
4483 {
4484 signed char d1=get_reg(i_regs->regmap,rs1[i]); // dividend
4485 signed char d2=get_reg(i_regs->regmap,rs2[i]); // divisor
4486 assert(d1>=0);
4487 assert(d2>=0);
4488 signed char quotient=get_reg(i_regs->regmap,LOREG);
4489 signed char remainder=get_reg(i_regs->regmap,HIREG);
4490 assert(quotient>=0);
4491 assert(remainder>=0);
4492 emit_test(d2,d2);
4493 emit_jeq((int)out+44); // Division by zero
4494 emit_clz(d2,HOST_TEMPREG);
4495 emit_movimm(1<<31,quotient);
4496 emit_shl(d2,HOST_TEMPREG,d2);
4497 emit_mov(d1,remainder);
4498 emit_shr(quotient,HOST_TEMPREG,quotient);
4499 emit_cmp(remainder,d2);
4500 emit_subcs(remainder,d2,remainder);
4501 emit_adcs(quotient,quotient,quotient);
4502 emit_shrcc_imm(d2,1,d2);
4503 emit_jcc((int)out-16); // -4
4504 }
4505 }
4506 else // 64-bit
4507 {
4508 if(opcode2[i]==0x1C) // DMULT
4509 {
4510 assert(opcode2[i]!=0x1C);
4511 signed char m1h=get_reg(i_regs->regmap,rs1[i]|64);
4512 signed char m1l=get_reg(i_regs->regmap,rs1[i]);
4513 signed char m2h=get_reg(i_regs->regmap,rs2[i]|64);
4514 signed char m2l=get_reg(i_regs->regmap,rs2[i]);
4515 assert(m1h>=0);
4516 assert(m2h>=0);
4517 assert(m1l>=0);
4518 assert(m2l>=0);
4519 emit_pushreg(m2h);
4520 emit_pushreg(m2l);
4521 emit_pushreg(m1h);
4522 emit_pushreg(m1l);
4523 emit_call((int)&mult64);
4524 emit_popreg(m1l);
4525 emit_popreg(m1h);
4526 emit_popreg(m2l);
4527 emit_popreg(m2h);
4528 signed char hih=get_reg(i_regs->regmap,HIREG|64);
4529 signed char hil=get_reg(i_regs->regmap,HIREG);
4530 if(hih>=0) emit_loadreg(HIREG|64,hih);
4531 if(hil>=0) emit_loadreg(HIREG,hil);
4532 signed char loh=get_reg(i_regs->regmap,LOREG|64);
4533 signed char lol=get_reg(i_regs->regmap,LOREG);
4534 if(loh>=0) emit_loadreg(LOREG|64,loh);
4535 if(lol>=0) emit_loadreg(LOREG,lol);
4536 }
4537 if(opcode2[i]==0x1D) // DMULTU
4538 {
4539 signed char m1h=get_reg(i_regs->regmap,rs1[i]|64);
4540 signed char m1l=get_reg(i_regs->regmap,rs1[i]);
4541 signed char m2h=get_reg(i_regs->regmap,rs2[i]|64);
4542 signed char m2l=get_reg(i_regs->regmap,rs2[i]);
4543 assert(m1h>=0);
4544 assert(m2h>=0);
4545 assert(m1l>=0);
4546 assert(m2l>=0);
4547 save_regs(0x100f);
4548 if(m1l!=0) emit_mov(m1l,0);
4549 if(m1h==0) emit_readword((int)&dynarec_local,1);
4550 else if(m1h>1) emit_mov(m1h,1);
4551 if(m2l<2) emit_readword((int)&dynarec_local+m2l*4,2);
4552 else if(m2l>2) emit_mov(m2l,2);
4553 if(m2h<3) emit_readword((int)&dynarec_local+m2h*4,3);
4554 else if(m2h>3) emit_mov(m2h,3);
4555 emit_call((int)&multu64);
4556 restore_regs(0x100f);
4557 signed char hih=get_reg(i_regs->regmap,HIREG|64);
4558 signed char hil=get_reg(i_regs->regmap,HIREG);
4559 signed char loh=get_reg(i_regs->regmap,LOREG|64);
4560 signed char lol=get_reg(i_regs->regmap,LOREG);
4561 /*signed char temp=get_reg(i_regs->regmap,-1);
4562 signed char rh=get_reg(i_regs->regmap,HIREG|64);
4563 signed char rl=get_reg(i_regs->regmap,HIREG);
4564 assert(m1h>=0);
4565 assert(m2h>=0);
4566 assert(m1l>=0);
4567 assert(m2l>=0);
4568 assert(temp>=0);
4569 //emit_mov(m1l,EAX);
4570 //emit_mul(m2l);
4571 emit_umull(rl,rh,m1l,m2l);
4572 emit_storereg(LOREG,rl);
4573 emit_mov(rh,temp);
4574 //emit_mov(m1h,EAX);
4575 //emit_mul(m2l);
4576 emit_umull(rl,rh,m1h,m2l);
4577 emit_adds(rl,temp,temp);
4578 emit_adcimm(rh,0,rh);
4579 emit_storereg(HIREG,rh);
4580 //emit_mov(m2h,EAX);
4581 //emit_mul(m1l);
4582 emit_umull(rl,rh,m1l,m2h);
4583 emit_adds(rl,temp,temp);
4584 emit_adcimm(rh,0,rh);
4585 emit_storereg(LOREG|64,temp);
4586 emit_mov(rh,temp);
4587 //emit_mov(m2h,EAX);
4588 //emit_mul(m1h);
4589 emit_umull(rl,rh,m1h,m2h);
4590 emit_adds(rl,temp,rl);
4591 emit_loadreg(HIREG,temp);
4592 emit_adcimm(rh,0,rh);
4593 emit_adds(rl,temp,rl);
4594 emit_adcimm(rh,0,rh);
4595 // DEBUG
4596 /*
4597 emit_pushreg(m2h);
4598 emit_pushreg(m2l);
4599 emit_pushreg(m1h);
4600 emit_pushreg(m1l);
4601 emit_call((int)&multu64);
4602 emit_popreg(m1l);
4603 emit_popreg(m1h);
4604 emit_popreg(m2l);
4605 emit_popreg(m2h);
4606 signed char hih=get_reg(i_regs->regmap,HIREG|64);
4607 signed char hil=get_reg(i_regs->regmap,HIREG);
4608 if(hih>=0) emit_loadreg(HIREG|64,hih); // DEBUG
4609 if(hil>=0) emit_loadreg(HIREG,hil); // DEBUG
4610 */
4611 // Shouldn't be necessary
4612 //char loh=get_reg(i_regs->regmap,LOREG|64);
4613 //char lol=get_reg(i_regs->regmap,LOREG);
4614 //if(loh>=0) emit_loadreg(LOREG|64,loh);
4615 //if(lol>=0) emit_loadreg(LOREG,lol);
4616 }
4617 if(opcode2[i]==0x1E) // DDIV
4618 {
4619 signed char d1h=get_reg(i_regs->regmap,rs1[i]|64);
4620 signed char d1l=get_reg(i_regs->regmap,rs1[i]);
4621 signed char d2h=get_reg(i_regs->regmap,rs2[i]|64);
4622 signed char d2l=get_reg(i_regs->regmap,rs2[i]);
4623 assert(d1h>=0);
4624 assert(d2h>=0);
4625 assert(d1l>=0);
4626 assert(d2l>=0);
4627 save_regs(0x100f);
4628 if(d1l!=0) emit_mov(d1l,0);
4629 if(d1h==0) emit_readword((int)&dynarec_local,1);
4630 else if(d1h>1) emit_mov(d1h,1);
4631 if(d2l<2) emit_readword((int)&dynarec_local+d2l*4,2);
4632 else if(d2l>2) emit_mov(d2l,2);
4633 if(d2h<3) emit_readword((int)&dynarec_local+d2h*4,3);
4634 else if(d2h>3) emit_mov(d2h,3);
4635 emit_call((int)&div64);
4636 restore_regs(0x100f);
4637 signed char hih=get_reg(i_regs->regmap,HIREG|64);
4638 signed char hil=get_reg(i_regs->regmap,HIREG);
4639 signed char loh=get_reg(i_regs->regmap,LOREG|64);
4640 signed char lol=get_reg(i_regs->regmap,LOREG);
4641 if(hih>=0) emit_loadreg(HIREG|64,hih);
4642 if(hil>=0) emit_loadreg(HIREG,hil);
4643 if(loh>=0) emit_loadreg(LOREG|64,loh);
4644 if(lol>=0) emit_loadreg(LOREG,lol);
4645 }
4646 if(opcode2[i]==0x1F) // DDIVU
4647 {
4648 //u_int hr,reglist=0;
4649 //for(hr=0;hr<HOST_REGS;hr++) {
4650 // if(i_regs->regmap[hr]>=0 && (i_regs->regmap[hr]&62)!=HIREG) reglist|=1<<hr;
4651 //}
4652 signed char d1h=get_reg(i_regs->regmap,rs1[i]|64);
4653 signed char d1l=get_reg(i_regs->regmap,rs1[i]);
4654 signed char d2h=get_reg(i_regs->regmap,rs2[i]|64);
4655 signed char d2l=get_reg(i_regs->regmap,rs2[i]);
4656 assert(d1h>=0);
4657 assert(d2h>=0);
4658 assert(d1l>=0);
4659 assert(d2l>=0);
4660 save_regs(0x100f);
4661 if(d1l!=0) emit_mov(d1l,0);
4662 if(d1h==0) emit_readword((int)&dynarec_local,1);
4663 else if(d1h>1) emit_mov(d1h,1);
4664 if(d2l<2) emit_readword((int)&dynarec_local+d2l*4,2);
4665 else if(d2l>2) emit_mov(d2l,2);
4666 if(d2h<3) emit_readword((int)&dynarec_local+d2h*4,3);
4667 else if(d2h>3) emit_mov(d2h,3);
4668 emit_call((int)&divu64);
4669 restore_regs(0x100f);
4670 signed char hih=get_reg(i_regs->regmap,HIREG|64);
4671 signed char hil=get_reg(i_regs->regmap,HIREG);
4672 signed char loh=get_reg(i_regs->regmap,LOREG|64);
4673 signed char lol=get_reg(i_regs->regmap,LOREG);
4674 if(hih>=0) emit_loadreg(HIREG|64,hih);
4675 if(hil>=0) emit_loadreg(HIREG,hil);
4676 if(loh>=0) emit_loadreg(LOREG|64,loh);
4677 if(lol>=0) emit_loadreg(LOREG,lol);
4678 }
4679 }
4680 }
4681 else
4682 {
4683 // Multiply by zero is zero.
4684 // MIPS does not have a divide by zero exception.
4685 // The result is undefined, we return zero.
4686 signed char hr=get_reg(i_regs->regmap,HIREG);
4687 signed char lr=get_reg(i_regs->regmap,LOREG);
4688 if(hr>=0) emit_zeroreg(hr);
4689 if(lr>=0) emit_zeroreg(lr);
4690 }
4691}
4692#define multdiv_assemble multdiv_assemble_arm
4693
4694void do_preload_rhash(int r) {
4695 // Don't need this for ARM. On x86, this puts the value 0xf8 into the
4696 // register. On ARM the hash can be done with a single instruction (below)
4697}
4698
4699void do_preload_rhtbl(int ht) {
4700 emit_addimm(FP,(int)&mini_ht-(int)&dynarec_local,ht);
4701}
4702
4703void do_rhash(int rs,int rh) {
4704 emit_andimm(rs,0xf8,rh);
4705}
4706
4707void do_miniht_load(int ht,int rh) {
4708 assem_debug("ldr %s,[%s,%s]!\n",regname[rh],regname[ht],regname[rh]);
4709 output_w32(0xe7b00000|rd_rn_rm(rh,ht,rh));
4710}
4711
4712void do_miniht_jump(int rs,int rh,int ht) {
4713 emit_cmp(rh,rs);
4714 emit_ldreq_indexed(ht,4,15);
4715 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
4716 emit_mov(rs,7);
4717 emit_jmp(jump_vaddr_reg[7]);
4718 #else
4719 emit_jmp(jump_vaddr_reg[rs]);
4720 #endif
4721}
4722
4723void do_miniht_insert(u_int return_address,int rt,int temp) {
4724 #ifdef ARMv5_ONLY
4725 emit_movimm(return_address,rt); // PC into link register
4726 add_to_linker((int)out,return_address,1);
4727 emit_pcreladdr(temp);
4728 emit_writeword(rt,(int)&mini_ht[(return_address&0xFF)>>3][0]);
4729 emit_writeword(temp,(int)&mini_ht[(return_address&0xFF)>>3][1]);
4730 #else
4731 emit_movw(return_address&0x0000FFFF,rt);
4732 add_to_linker((int)out,return_address,1);
4733 emit_pcreladdr(temp);
4734 emit_writeword(temp,(int)&mini_ht[(return_address&0xFF)>>3][1]);
4735 emit_movt(return_address&0xFFFF0000,rt);
4736 emit_writeword(rt,(int)&mini_ht[(return_address&0xFF)>>3][0]);
4737 #endif
4738}
4739
4740// Sign-extend to 64 bits and write out upper half of a register
4741// This is useful where we have a 32-bit value in a register, and want to
4742// keep it in a 32-bit register, but can't guarantee that it won't be read
4743// as a 64-bit value later.
4744void wb_sx(signed char pre[],signed char entry[],uint64_t dirty,uint64_t is32_pre,uint64_t is32,uint64_t u,uint64_t uu)
4745{
24385cae 4746#ifndef FORCE32
57871462 4747 if(is32_pre==is32) return;
4748 int hr,reg;
4749 for(hr=0;hr<HOST_REGS;hr++) {
4750 if(hr!=EXCLUDE_REG) {
4751 //if(pre[hr]==entry[hr]) {
4752 if((reg=pre[hr])>=0) {
4753 if((dirty>>hr)&1) {
4754 if( ((is32_pre&~is32&~uu)>>reg)&1 ) {
4755 emit_sarimm(hr,31,HOST_TEMPREG);
4756 emit_storereg(reg|64,HOST_TEMPREG);
4757 }
4758 }
4759 }
4760 //}
4761 }
4762 }
24385cae 4763#endif
57871462 4764}
4765
4766void wb_valid(signed char pre[],signed char entry[],u_int dirty_pre,u_int dirty,uint64_t is32_pre,uint64_t u,uint64_t uu)
4767{
4768 //if(dirty_pre==dirty) return;
4769 int hr,reg,new_hr;
4770 for(hr=0;hr<HOST_REGS;hr++) {
4771 if(hr!=EXCLUDE_REG) {
4772 reg=pre[hr];
4773 if(((~u)>>(reg&63))&1) {
4774 if(reg==entry[hr]||(reg>0&&entry[hr]<0)) {
4775 if(((dirty_pre&~dirty)>>hr)&1) {
4776 if(reg>0&&reg<34) {
4777 emit_storereg(reg,hr);
4778 if( ((is32_pre&~uu)>>reg)&1 ) {
4779 emit_sarimm(hr,31,HOST_TEMPREG);
4780 emit_storereg(reg|64,HOST_TEMPREG);
4781 }
4782 }
4783 else if(reg>=64) {
4784 emit_storereg(reg,hr);
4785 }
4786 }
4787 }
4788 else // Check if register moved to a different register
4789 if((new_hr=get_reg(entry,reg))>=0) {
4790 if((dirty_pre>>hr)&(~dirty>>new_hr)&1) {
4791 if(reg>0&&reg<34) {
4792 emit_storereg(reg,hr);
4793 if( ((is32_pre&~uu)>>reg)&1 ) {
4794 emit_sarimm(hr,31,HOST_TEMPREG);
4795 emit_storereg(reg|64,HOST_TEMPREG);
4796 }
4797 }
4798 else if(reg>=64) {
4799 emit_storereg(reg,hr);
4800 }
4801 }
4802 }
4803 }
4804 }
4805 }
4806}
4807
4808
4809/* using strd could possibly help but you'd have to allocate registers in pairs
4810void wb_invalidate_arm(signed char pre[],signed char entry[],uint64_t dirty,uint64_t is32,uint64_t u,uint64_t uu)
4811{
4812 int hr;
4813 int wrote=-1;
4814 for(hr=HOST_REGS-1;hr>=0;hr--) {
4815 if(hr!=EXCLUDE_REG) {
4816 if(pre[hr]!=entry[hr]) {
4817 if(pre[hr]>=0) {
4818 if((dirty>>hr)&1) {
4819 if(get_reg(entry,pre[hr])<0) {
4820 if(pre[hr]<64) {
4821 if(!((u>>pre[hr])&1)) {
4822 if(hr<10&&(~hr&1)&&(pre[hr+1]<0||wrote==hr+1)) {
4823 if( ((is32>>pre[hr])&1) && !((uu>>pre[hr])&1) ) {
4824 emit_sarimm(hr,31,hr+1);
4825 emit_strdreg(pre[hr],hr);
4826 }
4827 else
4828 emit_storereg(pre[hr],hr);
4829 }else{
4830 emit_storereg(pre[hr],hr);
4831 if( ((is32>>pre[hr])&1) && !((uu>>pre[hr])&1) ) {
4832 emit_sarimm(hr,31,hr);
4833 emit_storereg(pre[hr]|64,hr);
4834 }
4835 }
4836 }
4837 }else{
4838 if(!((uu>>(pre[hr]&63))&1) && !((is32>>(pre[hr]&63))&1)) {
4839 emit_storereg(pre[hr],hr);
4840 }
4841 }
4842 wrote=hr;
4843 }
4844 }
4845 }
4846 }
4847 }
4848 }
4849 for(hr=0;hr<HOST_REGS;hr++) {
4850 if(hr!=EXCLUDE_REG) {
4851 if(pre[hr]!=entry[hr]) {
4852 if(pre[hr]>=0) {
4853 int nr;
4854 if((nr=get_reg(entry,pre[hr]))>=0) {
4855 emit_mov(hr,nr);
4856 }
4857 }
4858 }
4859 }
4860 }
4861}
4862#define wb_invalidate wb_invalidate_arm
4863*/
4864
dd3a91a1 4865// Clearing the cache is rather slow on ARM Linux, so mark the areas
4866// that need to be cleared, and then only clear these areas once.
4867void do_clear_cache()
4868{
4869 int i,j;
4870 for (i=0;i<(1<<(TARGET_SIZE_2-17));i++)
4871 {
4872 u_int bitmap=needs_clear_cache[i];
4873 if(bitmap) {
4874 u_int start,end;
4875 for(j=0;j<32;j++)
4876 {
4877 if(bitmap&(1<<j)) {
4878 start=BASE_ADDR+i*131072+j*4096;
4879 end=start+4095;
4880 j++;
4881 while(j<32) {
4882 if(bitmap&(1<<j)) {
4883 end+=4096;
4884 j++;
4885 }else{
4886 __clear_cache((void *)start,(void *)end);
4887 break;
4888 }
4889 }
4890 }
4891 }
4892 needs_clear_cache[i]=0;
4893 }
4894 }
4895}
4896
57871462 4897// CPU-architecture-specific initialization
4898void arch_init() {
3d624f89 4899#ifndef DISABLE_COP1
57871462 4900 rounding_modes[0]=0x0<<22; // round
4901 rounding_modes[1]=0x3<<22; // trunc
4902 rounding_modes[2]=0x1<<22; // ceil
4903 rounding_modes[3]=0x2<<22; // floor
3d624f89 4904#endif
57871462 4905}
b9b61529 4906
4907// vim:shiftwidth=2:expandtab