drc: handle division by 0 correctly
[pcsx_rearmed.git] / libpcsxcore / new_dynarec / assem_arm.c
CommitLineData
57871462 1/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2 * Mupen64plus - assem_arm.c *
20d507ba 3 * Copyright (C) 2009-2011 Ari64 *
57871462 4 * *
5 * This program is free software; you can redistribute it and/or modify *
6 * it under the terms of the GNU General Public License as published by *
7 * the Free Software Foundation; either version 2 of the License, or *
8 * (at your option) any later version. *
9 * *
10 * This program is distributed in the hope that it will be useful, *
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
13 * GNU General Public License for more details. *
14 * *
15 * You should have received a copy of the GNU General Public License *
16 * along with this program; if not, write to the *
17 * Free Software Foundation, Inc., *
18 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
19 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
20
21extern int cycle_count;
22extern int last_count;
23extern int pcaddr;
24extern int pending_exception;
25extern int branch_target;
26extern uint64_t readmem_dword;
3d624f89 27#ifdef MUPEN64
57871462 28extern precomp_instr fake_pc;
3d624f89 29#endif
57871462 30extern void *dynarec_local;
31extern u_int memory_map[1048576];
32extern u_int mini_ht[32][2];
33extern u_int rounding_modes[4];
34
35void indirect_jump_indexed();
36void indirect_jump();
37void do_interrupt();
38void jump_vaddr_r0();
39void jump_vaddr_r1();
40void jump_vaddr_r2();
41void jump_vaddr_r3();
42void jump_vaddr_r4();
43void jump_vaddr_r5();
44void jump_vaddr_r6();
45void jump_vaddr_r7();
46void jump_vaddr_r8();
47void jump_vaddr_r9();
48void jump_vaddr_r10();
49void jump_vaddr_r12();
50
51const u_int jump_vaddr_reg[16] = {
52 (int)jump_vaddr_r0,
53 (int)jump_vaddr_r1,
54 (int)jump_vaddr_r2,
55 (int)jump_vaddr_r3,
56 (int)jump_vaddr_r4,
57 (int)jump_vaddr_r5,
58 (int)jump_vaddr_r6,
59 (int)jump_vaddr_r7,
60 (int)jump_vaddr_r8,
61 (int)jump_vaddr_r9,
62 (int)jump_vaddr_r10,
63 0,
64 (int)jump_vaddr_r12,
65 0,
66 0,
67 0};
68
0bbd1454 69void invalidate_addr_r0();
70void invalidate_addr_r1();
71void invalidate_addr_r2();
72void invalidate_addr_r3();
73void invalidate_addr_r4();
74void invalidate_addr_r5();
75void invalidate_addr_r6();
76void invalidate_addr_r7();
77void invalidate_addr_r8();
78void invalidate_addr_r9();
79void invalidate_addr_r10();
80void invalidate_addr_r12();
81
82const u_int invalidate_addr_reg[16] = {
83 (int)invalidate_addr_r0,
84 (int)invalidate_addr_r1,
85 (int)invalidate_addr_r2,
86 (int)invalidate_addr_r3,
87 (int)invalidate_addr_r4,
88 (int)invalidate_addr_r5,
89 (int)invalidate_addr_r6,
90 (int)invalidate_addr_r7,
91 (int)invalidate_addr_r8,
92 (int)invalidate_addr_r9,
93 (int)invalidate_addr_r10,
94 0,
95 (int)invalidate_addr_r12,
96 0,
97 0,
98 0};
99
57871462 100#include "fpu.h"
101
dd3a91a1 102unsigned int needs_clear_cache[1<<(TARGET_SIZE_2-17)];
103
57871462 104/* Linker */
105
106void set_jump_target(int addr,u_int target)
107{
108 u_char *ptr=(u_char *)addr;
109 u_int *ptr2=(u_int *)ptr;
110 if(ptr[3]==0xe2) {
111 assert((target-(u_int)ptr2-8)<1024);
112 assert((addr&3)==0);
113 assert((target&3)==0);
114 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
115 //printf("target=%x addr=%x insn=%x\n",target,addr,*ptr2);
116 }
117 else if(ptr[3]==0x72) {
118 // generated by emit_jno_unlikely
119 if((target-(u_int)ptr2-8)<1024) {
120 assert((addr&3)==0);
121 assert((target&3)==0);
122 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
123 }
124 else if((target-(u_int)ptr2-8)<4096&&!((target-(u_int)ptr2-8)&15)) {
125 assert((addr&3)==0);
126 assert((target&3)==0);
127 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>4)|0xE00;
128 }
129 else *ptr2=(0x7A000000)|(((target-(u_int)ptr2-8)<<6)>>8);
130 }
131 else {
132 assert((ptr[3]&0x0e)==0xa);
133 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
134 }
135}
136
137// This optionally copies the instruction from the target of the branch into
138// the space before the branch. Works, but the difference in speed is
139// usually insignificant.
140void set_jump_target_fillslot(int addr,u_int target,int copy)
141{
142 u_char *ptr=(u_char *)addr;
143 u_int *ptr2=(u_int *)ptr;
144 assert(!copy||ptr2[-1]==0xe28dd000);
145 if(ptr[3]==0xe2) {
146 assert(!copy);
147 assert((target-(u_int)ptr2-8)<4096);
148 *ptr2=(*ptr2&0xFFFFF000)|(target-(u_int)ptr2-8);
149 }
150 else {
151 assert((ptr[3]&0x0e)==0xa);
152 u_int target_insn=*(u_int *)target;
153 if((target_insn&0x0e100000)==0) { // ALU, no immediate, no flags
154 copy=0;
155 }
156 if((target_insn&0x0c100000)==0x04100000) { // Load
157 copy=0;
158 }
159 if(target_insn&0x08000000) {
160 copy=0;
161 }
162 if(copy) {
163 ptr2[-1]=target_insn;
164 target+=4;
165 }
166 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
167 }
168}
169
170/* Literal pool */
171add_literal(int addr,int val)
172{
173 literals[literalcount][0]=addr;
174 literals[literalcount][1]=val;
175 literalcount++;
176}
177
f76eeef9 178void *kill_pointer(void *stub)
57871462 179{
180 int *ptr=(int *)(stub+4);
181 assert((*ptr&0x0ff00000)==0x05900000);
182 u_int offset=*ptr&0xfff;
183 int **l_ptr=(void *)ptr+offset+8;
184 int *i_ptr=*l_ptr;
185 set_jump_target((int)i_ptr,(int)stub);
f76eeef9 186 return i_ptr;
57871462 187}
188
189int get_pointer(void *stub)
190{
191 //printf("get_pointer(%x)\n",(int)stub);
192 int *ptr=(int *)(stub+4);
193 assert((*ptr&0x0ff00000)==0x05900000);
194 u_int offset=*ptr&0xfff;
195 int **l_ptr=(void *)ptr+offset+8;
196 int *i_ptr=*l_ptr;
197 assert((*i_ptr&0x0f000000)==0x0a000000);
198 return (int)i_ptr+((*i_ptr<<8)>>6)+8;
199}
200
201// Find the "clean" entry point from a "dirty" entry point
202// by skipping past the call to verify_code
203u_int get_clean_addr(int addr)
204{
205 int *ptr=(int *)addr;
206 #ifdef ARMv5_ONLY
207 ptr+=4;
208 #else
209 ptr+=6;
210 #endif
211 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
212 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
213 ptr++;
214 if((*ptr&0xFF000000)==0xea000000) {
215 return (int)ptr+((*ptr<<8)>>6)+8; // follow jump
216 }
217 return (u_int)ptr;
218}
219
220int verify_dirty(int addr)
221{
222 u_int *ptr=(u_int *)addr;
223 #ifdef ARMv5_ONLY
224 // get from literal pool
225 assert((*ptr&0xFFF00000)==0xe5900000);
226 u_int offset=*ptr&0xfff;
227 u_int *l_ptr=(void *)ptr+offset+8;
228 u_int source=l_ptr[0];
229 u_int copy=l_ptr[1];
230 u_int len=l_ptr[2];
231 ptr+=4;
232 #else
233 // ARMv7 movw/movt
234 assert((*ptr&0xFFF00000)==0xe3000000);
235 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
236 u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
237 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
238 ptr+=6;
239 #endif
240 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
241 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
cfcba99a 242 u_int verifier=(int)ptr+((signed int)(*ptr<<8)>>6)+8; // get target of bl
57871462 243 if(verifier==(u_int)verify_code_vm||verifier==(u_int)verify_code_ds) {
244 unsigned int page=source>>12;
245 unsigned int map_value=memory_map[page];
246 if(map_value>=0x80000000) return 0;
247 while(page<((source+len-1)>>12)) {
248 if((memory_map[++page]<<2)!=(map_value<<2)) return 0;
249 }
250 source = source+(map_value<<2);
251 }
252 //printf("verify_dirty: %x %x %x\n",source,copy,len);
253 return !memcmp((void *)source,(void *)copy,len);
254}
255
256// This doesn't necessarily find all clean entry points, just
257// guarantees that it's not dirty
258int isclean(int addr)
259{
260 #ifdef ARMv5_ONLY
261 int *ptr=((u_int *)addr)+4;
262 #else
263 int *ptr=((u_int *)addr)+6;
264 #endif
265 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
266 if((*ptr&0xFF000000)!=0xeb000000) return 1; // bl instruction
267 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code) return 0;
268 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_vm) return 0;
269 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_ds) return 0;
270 return 1;
271}
272
273void get_bounds(int addr,u_int *start,u_int *end)
274{
275 u_int *ptr=(u_int *)addr;
276 #ifdef ARMv5_ONLY
277 // get from literal pool
278 assert((*ptr&0xFFF00000)==0xe5900000);
279 u_int offset=*ptr&0xfff;
280 u_int *l_ptr=(void *)ptr+offset+8;
281 u_int source=l_ptr[0];
282 //u_int copy=l_ptr[1];
283 u_int len=l_ptr[2];
284 ptr+=4;
285 #else
286 // ARMv7 movw/movt
287 assert((*ptr&0xFFF00000)==0xe3000000);
288 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
289 //u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
290 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
291 ptr+=6;
292 #endif
293 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
294 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
cfcba99a 295 u_int verifier=(int)ptr+((signed int)(*ptr<<8)>>6)+8; // get target of bl
57871462 296 if(verifier==(u_int)verify_code_vm||verifier==(u_int)verify_code_ds) {
297 if(memory_map[source>>12]>=0x80000000) source = 0;
298 else source = source+(memory_map[source>>12]<<2);
299 }
300 *start=source;
301 *end=source+len;
302}
303
304/* Register allocation */
305
306// Note: registers are allocated clean (unmodified state)
307// if you intend to modify the register, you must call dirty_reg().
308void alloc_reg(struct regstat *cur,int i,signed char reg)
309{
310 int r,hr;
311 int preferred_reg = (reg&7);
312 if(reg==CCREG) preferred_reg=HOST_CCREG;
313 if(reg==PTEMP||reg==FTEMP) preferred_reg=12;
314
315 // Don't allocate unused registers
316 if((cur->u>>reg)&1) return;
317
318 // see if it's already allocated
319 for(hr=0;hr<HOST_REGS;hr++)
320 {
321 if(cur->regmap[hr]==reg) return;
322 }
323
324 // Keep the same mapping if the register was already allocated in a loop
325 preferred_reg = loop_reg(i,reg,preferred_reg);
326
327 // Try to allocate the preferred register
328 if(cur->regmap[preferred_reg]==-1) {
329 cur->regmap[preferred_reg]=reg;
330 cur->dirty&=~(1<<preferred_reg);
331 cur->isconst&=~(1<<preferred_reg);
332 return;
333 }
334 r=cur->regmap[preferred_reg];
335 if(r<64&&((cur->u>>r)&1)) {
336 cur->regmap[preferred_reg]=reg;
337 cur->dirty&=~(1<<preferred_reg);
338 cur->isconst&=~(1<<preferred_reg);
339 return;
340 }
341 if(r>=64&&((cur->uu>>(r&63))&1)) {
342 cur->regmap[preferred_reg]=reg;
343 cur->dirty&=~(1<<preferred_reg);
344 cur->isconst&=~(1<<preferred_reg);
345 return;
346 }
347
348 // Clear any unneeded registers
349 // We try to keep the mapping consistent, if possible, because it
350 // makes branches easier (especially loops). So we try to allocate
351 // first (see above) before removing old mappings. If this is not
352 // possible then go ahead and clear out the registers that are no
353 // longer needed.
354 for(hr=0;hr<HOST_REGS;hr++)
355 {
356 r=cur->regmap[hr];
357 if(r>=0) {
358 if(r<64) {
359 if((cur->u>>r)&1) {cur->regmap[hr]=-1;break;}
360 }
361 else
362 {
363 if((cur->uu>>(r&63))&1) {cur->regmap[hr]=-1;break;}
364 }
365 }
366 }
367 // Try to allocate any available register, but prefer
368 // registers that have not been used recently.
369 if(i>0) {
370 for(hr=0;hr<HOST_REGS;hr++) {
371 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
372 if(regs[i-1].regmap[hr]!=rs1[i-1]&&regs[i-1].regmap[hr]!=rs2[i-1]&&regs[i-1].regmap[hr]!=rt1[i-1]&&regs[i-1].regmap[hr]!=rt2[i-1]) {
373 cur->regmap[hr]=reg;
374 cur->dirty&=~(1<<hr);
375 cur->isconst&=~(1<<hr);
376 return;
377 }
378 }
379 }
380 }
381 // Try to allocate any available register
382 for(hr=0;hr<HOST_REGS;hr++) {
383 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
384 cur->regmap[hr]=reg;
385 cur->dirty&=~(1<<hr);
386 cur->isconst&=~(1<<hr);
387 return;
388 }
389 }
390
391 // Ok, now we have to evict someone
392 // Pick a register we hopefully won't need soon
393 u_char hsn[MAXREG+1];
394 memset(hsn,10,sizeof(hsn));
395 int j;
396 lsn(hsn,i,&preferred_reg);
397 //printf("eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",cur->regmap[0],cur->regmap[1],cur->regmap[2],cur->regmap[3],cur->regmap[5],cur->regmap[6],cur->regmap[7]);
398 //printf("hsn(%x): %d %d %d %d %d %d %d\n",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
399 if(i>0) {
400 // Don't evict the cycle count at entry points, otherwise the entry
401 // stub will have to write it.
402 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
403 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
404 for(j=10;j>=3;j--)
405 {
406 // Alloc preferred register if available
407 if(hsn[r=cur->regmap[preferred_reg]&63]==j) {
408 for(hr=0;hr<HOST_REGS;hr++) {
409 // Evict both parts of a 64-bit register
410 if((cur->regmap[hr]&63)==r) {
411 cur->regmap[hr]=-1;
412 cur->dirty&=~(1<<hr);
413 cur->isconst&=~(1<<hr);
414 }
415 }
416 cur->regmap[preferred_reg]=reg;
417 return;
418 }
419 for(r=1;r<=MAXREG;r++)
420 {
421 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
422 for(hr=0;hr<HOST_REGS;hr++) {
423 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
424 if(cur->regmap[hr]==r+64) {
425 cur->regmap[hr]=reg;
426 cur->dirty&=~(1<<hr);
427 cur->isconst&=~(1<<hr);
428 return;
429 }
430 }
431 }
432 for(hr=0;hr<HOST_REGS;hr++) {
433 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
434 if(cur->regmap[hr]==r) {
435 cur->regmap[hr]=reg;
436 cur->dirty&=~(1<<hr);
437 cur->isconst&=~(1<<hr);
438 return;
439 }
440 }
441 }
442 }
443 }
444 }
445 }
446 for(j=10;j>=0;j--)
447 {
448 for(r=1;r<=MAXREG;r++)
449 {
450 if(hsn[r]==j) {
451 for(hr=0;hr<HOST_REGS;hr++) {
452 if(cur->regmap[hr]==r+64) {
453 cur->regmap[hr]=reg;
454 cur->dirty&=~(1<<hr);
455 cur->isconst&=~(1<<hr);
456 return;
457 }
458 }
459 for(hr=0;hr<HOST_REGS;hr++) {
460 if(cur->regmap[hr]==r) {
461 cur->regmap[hr]=reg;
462 cur->dirty&=~(1<<hr);
463 cur->isconst&=~(1<<hr);
464 return;
465 }
466 }
467 }
468 }
469 }
470 printf("This shouldn't happen (alloc_reg)");exit(1);
471}
472
473void alloc_reg64(struct regstat *cur,int i,signed char reg)
474{
475 int preferred_reg = 8+(reg&1);
476 int r,hr;
477
478 // allocate the lower 32 bits
479 alloc_reg(cur,i,reg);
480
481 // Don't allocate unused registers
482 if((cur->uu>>reg)&1) return;
483
484 // see if the upper half is already allocated
485 for(hr=0;hr<HOST_REGS;hr++)
486 {
487 if(cur->regmap[hr]==reg+64) return;
488 }
489
490 // Keep the same mapping if the register was already allocated in a loop
491 preferred_reg = loop_reg(i,reg,preferred_reg);
492
493 // Try to allocate the preferred register
494 if(cur->regmap[preferred_reg]==-1) {
495 cur->regmap[preferred_reg]=reg|64;
496 cur->dirty&=~(1<<preferred_reg);
497 cur->isconst&=~(1<<preferred_reg);
498 return;
499 }
500 r=cur->regmap[preferred_reg];
501 if(r<64&&((cur->u>>r)&1)) {
502 cur->regmap[preferred_reg]=reg|64;
503 cur->dirty&=~(1<<preferred_reg);
504 cur->isconst&=~(1<<preferred_reg);
505 return;
506 }
507 if(r>=64&&((cur->uu>>(r&63))&1)) {
508 cur->regmap[preferred_reg]=reg|64;
509 cur->dirty&=~(1<<preferred_reg);
510 cur->isconst&=~(1<<preferred_reg);
511 return;
512 }
513
514 // Clear any unneeded registers
515 // We try to keep the mapping consistent, if possible, because it
516 // makes branches easier (especially loops). So we try to allocate
517 // first (see above) before removing old mappings. If this is not
518 // possible then go ahead and clear out the registers that are no
519 // longer needed.
520 for(hr=HOST_REGS-1;hr>=0;hr--)
521 {
522 r=cur->regmap[hr];
523 if(r>=0) {
524 if(r<64) {
525 if((cur->u>>r)&1) {cur->regmap[hr]=-1;break;}
526 }
527 else
528 {
529 if((cur->uu>>(r&63))&1) {cur->regmap[hr]=-1;break;}
530 }
531 }
532 }
533 // Try to allocate any available register, but prefer
534 // registers that have not been used recently.
535 if(i>0) {
536 for(hr=0;hr<HOST_REGS;hr++) {
537 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
538 if(regs[i-1].regmap[hr]!=rs1[i-1]&&regs[i-1].regmap[hr]!=rs2[i-1]&&regs[i-1].regmap[hr]!=rt1[i-1]&&regs[i-1].regmap[hr]!=rt2[i-1]) {
539 cur->regmap[hr]=reg|64;
540 cur->dirty&=~(1<<hr);
541 cur->isconst&=~(1<<hr);
542 return;
543 }
544 }
545 }
546 }
547 // Try to allocate any available register
548 for(hr=0;hr<HOST_REGS;hr++) {
549 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
550 cur->regmap[hr]=reg|64;
551 cur->dirty&=~(1<<hr);
552 cur->isconst&=~(1<<hr);
553 return;
554 }
555 }
556
557 // Ok, now we have to evict someone
558 // Pick a register we hopefully won't need soon
559 u_char hsn[MAXREG+1];
560 memset(hsn,10,sizeof(hsn));
561 int j;
562 lsn(hsn,i,&preferred_reg);
563 //printf("eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",cur->regmap[0],cur->regmap[1],cur->regmap[2],cur->regmap[3],cur->regmap[5],cur->regmap[6],cur->regmap[7]);
564 //printf("hsn(%x): %d %d %d %d %d %d %d\n",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
565 if(i>0) {
566 // Don't evict the cycle count at entry points, otherwise the entry
567 // stub will have to write it.
568 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
569 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
570 for(j=10;j>=3;j--)
571 {
572 // Alloc preferred register if available
573 if(hsn[r=cur->regmap[preferred_reg]&63]==j) {
574 for(hr=0;hr<HOST_REGS;hr++) {
575 // Evict both parts of a 64-bit register
576 if((cur->regmap[hr]&63)==r) {
577 cur->regmap[hr]=-1;
578 cur->dirty&=~(1<<hr);
579 cur->isconst&=~(1<<hr);
580 }
581 }
582 cur->regmap[preferred_reg]=reg|64;
583 return;
584 }
585 for(r=1;r<=MAXREG;r++)
586 {
587 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
588 for(hr=0;hr<HOST_REGS;hr++) {
589 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
590 if(cur->regmap[hr]==r+64) {
591 cur->regmap[hr]=reg|64;
592 cur->dirty&=~(1<<hr);
593 cur->isconst&=~(1<<hr);
594 return;
595 }
596 }
597 }
598 for(hr=0;hr<HOST_REGS;hr++) {
599 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
600 if(cur->regmap[hr]==r) {
601 cur->regmap[hr]=reg|64;
602 cur->dirty&=~(1<<hr);
603 cur->isconst&=~(1<<hr);
604 return;
605 }
606 }
607 }
608 }
609 }
610 }
611 }
612 for(j=10;j>=0;j--)
613 {
614 for(r=1;r<=MAXREG;r++)
615 {
616 if(hsn[r]==j) {
617 for(hr=0;hr<HOST_REGS;hr++) {
618 if(cur->regmap[hr]==r+64) {
619 cur->regmap[hr]=reg|64;
620 cur->dirty&=~(1<<hr);
621 cur->isconst&=~(1<<hr);
622 return;
623 }
624 }
625 for(hr=0;hr<HOST_REGS;hr++) {
626 if(cur->regmap[hr]==r) {
627 cur->regmap[hr]=reg|64;
628 cur->dirty&=~(1<<hr);
629 cur->isconst&=~(1<<hr);
630 return;
631 }
632 }
633 }
634 }
635 }
636 printf("This shouldn't happen");exit(1);
637}
638
639// Allocate a temporary register. This is done without regard to
640// dirty status or whether the register we request is on the unneeded list
641// Note: This will only allocate one register, even if called multiple times
642void alloc_reg_temp(struct regstat *cur,int i,signed char reg)
643{
644 int r,hr;
645 int preferred_reg = -1;
646
647 // see if it's already allocated
648 for(hr=0;hr<HOST_REGS;hr++)
649 {
650 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==reg) return;
651 }
652
653 // Try to allocate any available register
654 for(hr=HOST_REGS-1;hr>=0;hr--) {
655 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
656 cur->regmap[hr]=reg;
657 cur->dirty&=~(1<<hr);
658 cur->isconst&=~(1<<hr);
659 return;
660 }
661 }
662
663 // Find an unneeded register
664 for(hr=HOST_REGS-1;hr>=0;hr--)
665 {
666 r=cur->regmap[hr];
667 if(r>=0) {
668 if(r<64) {
669 if((cur->u>>r)&1) {
670 if(i==0||((unneeded_reg[i-1]>>r)&1)) {
671 cur->regmap[hr]=reg;
672 cur->dirty&=~(1<<hr);
673 cur->isconst&=~(1<<hr);
674 return;
675 }
676 }
677 }
678 else
679 {
680 if((cur->uu>>(r&63))&1) {
681 if(i==0||((unneeded_reg_upper[i-1]>>(r&63))&1)) {
682 cur->regmap[hr]=reg;
683 cur->dirty&=~(1<<hr);
684 cur->isconst&=~(1<<hr);
685 return;
686 }
687 }
688 }
689 }
690 }
691
692 // Ok, now we have to evict someone
693 // Pick a register we hopefully won't need soon
694 // TODO: we might want to follow unconditional jumps here
695 // TODO: get rid of dupe code and make this into a function
696 u_char hsn[MAXREG+1];
697 memset(hsn,10,sizeof(hsn));
698 int j;
699 lsn(hsn,i,&preferred_reg);
700 //printf("hsn: %d %d %d %d %d %d %d\n",hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
701 if(i>0) {
702 // Don't evict the cycle count at entry points, otherwise the entry
703 // stub will have to write it.
704 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
705 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
706 for(j=10;j>=3;j--)
707 {
708 for(r=1;r<=MAXREG;r++)
709 {
710 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
711 for(hr=0;hr<HOST_REGS;hr++) {
712 if(hr!=HOST_CCREG||hsn[CCREG]>2) {
713 if(cur->regmap[hr]==r+64) {
714 cur->regmap[hr]=reg;
715 cur->dirty&=~(1<<hr);
716 cur->isconst&=~(1<<hr);
717 return;
718 }
719 }
720 }
721 for(hr=0;hr<HOST_REGS;hr++) {
722 if(hr!=HOST_CCREG||hsn[CCREG]>2) {
723 if(cur->regmap[hr]==r) {
724 cur->regmap[hr]=reg;
725 cur->dirty&=~(1<<hr);
726 cur->isconst&=~(1<<hr);
727 return;
728 }
729 }
730 }
731 }
732 }
733 }
734 }
735 for(j=10;j>=0;j--)
736 {
737 for(r=1;r<=MAXREG;r++)
738 {
739 if(hsn[r]==j) {
740 for(hr=0;hr<HOST_REGS;hr++) {
741 if(cur->regmap[hr]==r+64) {
742 cur->regmap[hr]=reg;
743 cur->dirty&=~(1<<hr);
744 cur->isconst&=~(1<<hr);
745 return;
746 }
747 }
748 for(hr=0;hr<HOST_REGS;hr++) {
749 if(cur->regmap[hr]==r) {
750 cur->regmap[hr]=reg;
751 cur->dirty&=~(1<<hr);
752 cur->isconst&=~(1<<hr);
753 return;
754 }
755 }
756 }
757 }
758 }
759 printf("This shouldn't happen");exit(1);
760}
761// Allocate a specific ARM register.
762void alloc_arm_reg(struct regstat *cur,int i,signed char reg,char hr)
763{
764 int n;
f776eb14 765 int dirty=0;
57871462 766
767 // see if it's already allocated (and dealloc it)
768 for(n=0;n<HOST_REGS;n++)
769 {
f776eb14 770 if(n!=EXCLUDE_REG&&cur->regmap[n]==reg) {
771 dirty=(cur->dirty>>n)&1;
772 cur->regmap[n]=-1;
773 }
57871462 774 }
775
776 cur->regmap[hr]=reg;
777 cur->dirty&=~(1<<hr);
f776eb14 778 cur->dirty|=dirty<<hr;
57871462 779 cur->isconst&=~(1<<hr);
780}
781
782// Alloc cycle count into dedicated register
783alloc_cc(struct regstat *cur,int i)
784{
785 alloc_arm_reg(cur,i,CCREG,HOST_CCREG);
786}
787
788/* Special alloc */
789
790
791/* Assembler */
792
793char regname[16][4] = {
794 "r0",
795 "r1",
796 "r2",
797 "r3",
798 "r4",
799 "r5",
800 "r6",
801 "r7",
802 "r8",
803 "r9",
804 "r10",
805 "fp",
806 "r12",
807 "sp",
808 "lr",
809 "pc"};
810
811void output_byte(u_char byte)
812{
813 *(out++)=byte;
814}
815void output_modrm(u_char mod,u_char rm,u_char ext)
816{
817 assert(mod<4);
818 assert(rm<8);
819 assert(ext<8);
820 u_char byte=(mod<<6)|(ext<<3)|rm;
821 *(out++)=byte;
822}
823void output_sib(u_char scale,u_char index,u_char base)
824{
825 assert(scale<4);
826 assert(index<8);
827 assert(base<8);
828 u_char byte=(scale<<6)|(index<<3)|base;
829 *(out++)=byte;
830}
831void output_w32(u_int word)
832{
833 *((u_int *)out)=word;
834 out+=4;
835}
836u_int rd_rn_rm(u_int rd, u_int rn, u_int rm)
837{
838 assert(rd<16);
839 assert(rn<16);
840 assert(rm<16);
841 return((rn<<16)|(rd<<12)|rm);
842}
843u_int rd_rn_imm_shift(u_int rd, u_int rn, u_int imm, u_int shift)
844{
845 assert(rd<16);
846 assert(rn<16);
847 assert(imm<256);
848 assert((shift&1)==0);
849 return((rn<<16)|(rd<<12)|(((32-shift)&30)<<7)|imm);
850}
851u_int genimm(u_int imm,u_int *encoded)
852{
c2e3bd42 853 *encoded=0;
854 if(imm==0) return 1;
57871462 855 int i=32;
856 while(i>0)
857 {
858 if(imm<256) {
859 *encoded=((i&30)<<7)|imm;
860 return 1;
861 }
862 imm=(imm>>2)|(imm<<30);i-=2;
863 }
864 return 0;
865}
cfbd3c6e 866void genimm_checked(u_int imm,u_int *encoded)
867{
868 u_int ret=genimm(imm,encoded);
869 assert(ret);
870}
57871462 871u_int genjmp(u_int addr)
872{
873 int offset=addr-(int)out-8;
e80343e2 874 if(offset<-33554432||offset>=33554432) {
875 if (addr>2) {
876 printf("genjmp: out of range: %08x\n", offset);
877 exit(1);
878 }
879 return 0;
880 }
57871462 881 return ((u_int)offset>>2)&0xffffff;
882}
883
884void emit_mov(int rs,int rt)
885{
886 assem_debug("mov %s,%s\n",regname[rt],regname[rs]);
887 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs));
888}
889
890void emit_movs(int rs,int rt)
891{
892 assem_debug("movs %s,%s\n",regname[rt],regname[rs]);
893 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs));
894}
895
896void emit_add(int rs1,int rs2,int rt)
897{
898 assem_debug("add %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
899 output_w32(0xe0800000|rd_rn_rm(rt,rs1,rs2));
900}
901
902void emit_adds(int rs1,int rs2,int rt)
903{
904 assem_debug("adds %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
905 output_w32(0xe0900000|rd_rn_rm(rt,rs1,rs2));
906}
907
908void emit_adcs(int rs1,int rs2,int rt)
909{
910 assem_debug("adcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
911 output_w32(0xe0b00000|rd_rn_rm(rt,rs1,rs2));
912}
913
914void emit_sbc(int rs1,int rs2,int rt)
915{
916 assem_debug("sbc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
917 output_w32(0xe0c00000|rd_rn_rm(rt,rs1,rs2));
918}
919
920void emit_sbcs(int rs1,int rs2,int rt)
921{
922 assem_debug("sbcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
923 output_w32(0xe0d00000|rd_rn_rm(rt,rs1,rs2));
924}
925
926void emit_neg(int rs, int rt)
927{
928 assem_debug("rsb %s,%s,#0\n",regname[rt],regname[rs]);
929 output_w32(0xe2600000|rd_rn_rm(rt,rs,0));
930}
931
932void emit_negs(int rs, int rt)
933{
934 assem_debug("rsbs %s,%s,#0\n",regname[rt],regname[rs]);
935 output_w32(0xe2700000|rd_rn_rm(rt,rs,0));
936}
937
938void emit_sub(int rs1,int rs2,int rt)
939{
940 assem_debug("sub %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
941 output_w32(0xe0400000|rd_rn_rm(rt,rs1,rs2));
942}
943
944void emit_subs(int rs1,int rs2,int rt)
945{
946 assem_debug("subs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
947 output_w32(0xe0500000|rd_rn_rm(rt,rs1,rs2));
948}
949
950void emit_zeroreg(int rt)
951{
952 assem_debug("mov %s,#0\n",regname[rt]);
953 output_w32(0xe3a00000|rd_rn_rm(rt,0,0));
954}
955
790ee18e 956void emit_loadlp(u_int imm,u_int rt)
957{
958 add_literal((int)out,imm);
959 assem_debug("ldr %s,pc+? [=%x]\n",regname[rt],imm);
960 output_w32(0xe5900000|rd_rn_rm(rt,15,0));
961}
962void emit_movw(u_int imm,u_int rt)
963{
964 assert(imm<65536);
965 assem_debug("movw %s,#%d (0x%x)\n",regname[rt],imm,imm);
966 output_w32(0xe3000000|rd_rn_rm(rt,0,0)|(imm&0xfff)|((imm<<4)&0xf0000));
967}
968void emit_movt(u_int imm,u_int rt)
969{
970 assem_debug("movt %s,#%d (0x%x)\n",regname[rt],imm&0xffff0000,imm&0xffff0000);
971 output_w32(0xe3400000|rd_rn_rm(rt,0,0)|((imm>>16)&0xfff)|((imm>>12)&0xf0000));
972}
973void emit_movimm(u_int imm,u_int rt)
974{
975 u_int armval;
976 if(genimm(imm,&armval)) {
977 assem_debug("mov %s,#%d\n",regname[rt],imm);
978 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
979 }else if(genimm(~imm,&armval)) {
980 assem_debug("mvn %s,#%d\n",regname[rt],imm);
981 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
982 }else if(imm<65536) {
983 #ifdef ARMv5_ONLY
984 assem_debug("mov %s,#%d\n",regname[rt],imm&0xFF00);
985 output_w32(0xe3a00000|rd_rn_imm_shift(rt,0,imm>>8,8));
986 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
987 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
988 #else
989 emit_movw(imm,rt);
990 #endif
991 }else{
992 #ifdef ARMv5_ONLY
993 emit_loadlp(imm,rt);
994 #else
995 emit_movw(imm&0x0000FFFF,rt);
996 emit_movt(imm&0xFFFF0000,rt);
997 #endif
998 }
999}
1000void emit_pcreladdr(u_int rt)
1001{
1002 assem_debug("add %s,pc,#?\n",regname[rt]);
1003 output_w32(0xe2800000|rd_rn_rm(rt,15,0));
1004}
1005
57871462 1006void emit_loadreg(int r, int hr)
1007{
3d624f89 1008#ifdef FORCE32
1009 if(r&64) {
1010 printf("64bit load in 32bit mode!\n");
7f2607ea 1011 assert(0);
1012 return;
3d624f89 1013 }
1014#endif
57871462 1015 if((r&63)==0)
1016 emit_zeroreg(hr);
1017 else {
3d624f89 1018 int addr=((int)reg)+((r&63)<<REG_SHIFT)+((r&64)>>4);
57871462 1019 if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4);
1020 if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4);
1021 if(r==CCREG) addr=(int)&cycle_count;
1022 if(r==CSREG) addr=(int)&Status;
1023 if(r==FSREG) addr=(int)&FCR31;
1024 if(r==INVCP) addr=(int)&invc_ptr;
1025 u_int offset = addr-(u_int)&dynarec_local;
1026 assert(offset<4096);
1027 assem_debug("ldr %s,fp+%d\n",regname[hr],offset);
1028 output_w32(0xe5900000|rd_rn_rm(hr,FP,0)|offset);
1029 }
1030}
1031void emit_storereg(int r, int hr)
1032{
3d624f89 1033#ifdef FORCE32
1034 if(r&64) {
1035 printf("64bit store in 32bit mode!\n");
7f2607ea 1036 assert(0);
1037 return;
3d624f89 1038 }
1039#endif
1040 int addr=((int)reg)+((r&63)<<REG_SHIFT)+((r&64)>>4);
57871462 1041 if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4);
1042 if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4);
1043 if(r==CCREG) addr=(int)&cycle_count;
1044 if(r==FSREG) addr=(int)&FCR31;
1045 u_int offset = addr-(u_int)&dynarec_local;
1046 assert(offset<4096);
1047 assem_debug("str %s,fp+%d\n",regname[hr],offset);
1048 output_w32(0xe5800000|rd_rn_rm(hr,FP,0)|offset);
1049}
1050
1051void emit_test(int rs, int rt)
1052{
1053 assem_debug("tst %s,%s\n",regname[rs],regname[rt]);
1054 output_w32(0xe1100000|rd_rn_rm(0,rs,rt));
1055}
1056
1057void emit_testimm(int rs,int imm)
1058{
1059 u_int armval;
5a05d80c 1060 assem_debug("tst %s,#%d\n",regname[rs],imm);
cfbd3c6e 1061 genimm_checked(imm,&armval);
57871462 1062 output_w32(0xe3100000|rd_rn_rm(0,rs,0)|armval);
1063}
1064
b9b61529 1065void emit_testeqimm(int rs,int imm)
1066{
1067 u_int armval;
1068 assem_debug("tsteq %s,$%d\n",regname[rs],imm);
cfbd3c6e 1069 genimm_checked(imm,&armval);
b9b61529 1070 output_w32(0x03100000|rd_rn_rm(0,rs,0)|armval);
1071}
1072
57871462 1073void emit_not(int rs,int rt)
1074{
1075 assem_debug("mvn %s,%s\n",regname[rt],regname[rs]);
1076 output_w32(0xe1e00000|rd_rn_rm(rt,0,rs));
1077}
1078
b9b61529 1079void emit_mvnmi(int rs,int rt)
1080{
1081 assem_debug("mvnmi %s,%s\n",regname[rt],regname[rs]);
1082 output_w32(0x41e00000|rd_rn_rm(rt,0,rs));
1083}
1084
57871462 1085void emit_and(u_int rs1,u_int rs2,u_int rt)
1086{
1087 assem_debug("and %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1088 output_w32(0xe0000000|rd_rn_rm(rt,rs1,rs2));
1089}
1090
1091void emit_or(u_int rs1,u_int rs2,u_int rt)
1092{
1093 assem_debug("orr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1094 output_w32(0xe1800000|rd_rn_rm(rt,rs1,rs2));
1095}
1096void emit_or_and_set_flags(int rs1,int rs2,int rt)
1097{
1098 assem_debug("orrs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1099 output_w32(0xe1900000|rd_rn_rm(rt,rs1,rs2));
1100}
1101
f70d384d 1102void emit_orrshl_imm(u_int rs,u_int imm,u_int rt)
1103{
1104 assert(rs<16);
1105 assert(rt<16);
1106 assert(imm<32);
1107 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs],imm);
1108 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|(imm<<7));
1109}
1110
576bbd8f 1111void emit_orrshr_imm(u_int rs,u_int imm,u_int rt)
1112{
1113 assert(rs<16);
1114 assert(rt<16);
1115 assert(imm<32);
1116 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs],imm);
1117 output_w32(0xe1800020|rd_rn_rm(rt,rt,rs)|(imm<<7));
1118}
1119
57871462 1120void emit_xor(u_int rs1,u_int rs2,u_int rt)
1121{
1122 assem_debug("eor %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1123 output_w32(0xe0200000|rd_rn_rm(rt,rs1,rs2));
1124}
1125
57871462 1126void emit_addimm(u_int rs,int imm,u_int rt)
1127{
1128 assert(rs<16);
1129 assert(rt<16);
1130 if(imm!=0) {
1131 assert(imm>-65536&&imm<65536);
1132 u_int armval;
1133 if(genimm(imm,&armval)) {
1134 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],imm);
1135 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
1136 }else if(genimm(-imm,&armval)) {
1137 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],imm);
1138 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
1139 }else if(imm<0) {
1140 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],(-imm)&0xFF00);
1141 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
1142 output_w32(0xe2400000|rd_rn_imm_shift(rt,rs,(-imm)>>8,8));
1143 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
1144 }else{
1145 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1146 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
1147 output_w32(0xe2800000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1148 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1149 }
1150 }
1151 else if(rs!=rt) emit_mov(rs,rt);
1152}
1153
1154void emit_addimm_and_set_flags(int imm,int rt)
1155{
1156 assert(imm>-65536&&imm<65536);
1157 u_int armval;
1158 if(genimm(imm,&armval)) {
1159 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm);
1160 output_w32(0xe2900000|rd_rn_rm(rt,rt,0)|armval);
1161 }else if(genimm(-imm,&armval)) {
1162 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],imm);
1163 output_w32(0xe2500000|rd_rn_rm(rt,rt,0)|armval);
1164 }else if(imm<0) {
1165 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF00);
1166 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
1167 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)>>8,8));
1168 output_w32(0xe2500000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
1169 }else{
1170 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF00);
1171 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
1172 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm>>8,8));
1173 output_w32(0xe2900000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1174 }
1175}
1176void emit_addimm_no_flags(u_int imm,u_int rt)
1177{
1178 emit_addimm(rt,imm,rt);
1179}
1180
1181void emit_addnop(u_int r)
1182{
1183 assert(r<16);
1184 assem_debug("add %s,%s,#0 (nop)\n",regname[r],regname[r]);
1185 output_w32(0xe2800000|rd_rn_rm(r,r,0));
1186}
1187
1188void emit_adcimm(u_int rs,int imm,u_int rt)
1189{
1190 u_int armval;
cfbd3c6e 1191 genimm_checked(imm,&armval);
57871462 1192 assem_debug("adc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1193 output_w32(0xe2a00000|rd_rn_rm(rt,rs,0)|armval);
1194}
1195/*void emit_sbcimm(int imm,u_int rt)
1196{
1197 u_int armval;
cfbd3c6e 1198 genimm_checked(imm,&armval);
57871462 1199 assem_debug("sbc %s,%s,#%d\n",regname[rt],regname[rt],imm);
1200 output_w32(0xe2c00000|rd_rn_rm(rt,rt,0)|armval);
1201}*/
1202void emit_sbbimm(int imm,u_int rt)
1203{
1204 assem_debug("sbb $%d,%%%s\n",imm,regname[rt]);
1205 assert(rt<8);
1206 if(imm<128&&imm>=-128) {
1207 output_byte(0x83);
1208 output_modrm(3,rt,3);
1209 output_byte(imm);
1210 }
1211 else
1212 {
1213 output_byte(0x81);
1214 output_modrm(3,rt,3);
1215 output_w32(imm);
1216 }
1217}
1218void emit_rscimm(int rs,int imm,u_int rt)
1219{
1220 assert(0);
1221 u_int armval;
cfbd3c6e 1222 genimm_checked(imm,&armval);
57871462 1223 assem_debug("rsc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1224 output_w32(0xe2e00000|rd_rn_rm(rt,rs,0)|armval);
1225}
1226
1227void emit_addimm64_32(int rsh,int rsl,int imm,int rth,int rtl)
1228{
1229 // TODO: if(genimm(imm,&armval)) ...
1230 // else
1231 emit_movimm(imm,HOST_TEMPREG);
1232 emit_adds(HOST_TEMPREG,rsl,rtl);
1233 emit_adcimm(rsh,0,rth);
1234}
1235
1236void emit_sbb(int rs1,int rs2)
1237{
1238 assem_debug("sbb %%%s,%%%s\n",regname[rs2],regname[rs1]);
1239 output_byte(0x19);
1240 output_modrm(3,rs1,rs2);
1241}
1242
1243void emit_andimm(int rs,int imm,int rt)
1244{
1245 u_int armval;
790ee18e 1246 if(imm==0) {
1247 emit_zeroreg(rt);
1248 }else if(genimm(imm,&armval)) {
57871462 1249 assem_debug("and %s,%s,#%d\n",regname[rt],regname[rs],imm);
1250 output_w32(0xe2000000|rd_rn_rm(rt,rs,0)|armval);
1251 }else if(genimm(~imm,&armval)) {
1252 assem_debug("bic %s,%s,#%d\n",regname[rt],regname[rs],imm);
1253 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|armval);
1254 }else if(imm==65535) {
1255 #ifdef ARMv5_ONLY
1256 assem_debug("bic %s,%s,#FF000000\n",regname[rt],regname[rs]);
1257 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|0x4FF);
1258 assem_debug("bic %s,%s,#00FF0000\n",regname[rt],regname[rt]);
1259 output_w32(0xe3c00000|rd_rn_rm(rt,rt,0)|0x8FF);
1260 #else
1261 assem_debug("uxth %s,%s\n",regname[rt],regname[rs]);
1262 output_w32(0xe6ff0070|rd_rn_rm(rt,0,rs));
1263 #endif
1264 }else{
1265 assert(imm>0&&imm<65535);
1266 #ifdef ARMv5_ONLY
1267 assem_debug("mov r14,#%d\n",imm&0xFF00);
1268 output_w32(0xe3a00000|rd_rn_imm_shift(HOST_TEMPREG,0,imm>>8,8));
1269 assem_debug("add r14,r14,#%d\n",imm&0xFF);
1270 output_w32(0xe2800000|rd_rn_imm_shift(HOST_TEMPREG,HOST_TEMPREG,imm&0xff,0));
1271 #else
1272 emit_movw(imm,HOST_TEMPREG);
1273 #endif
1274 assem_debug("and %s,%s,r14\n",regname[rt],regname[rs]);
1275 output_w32(0xe0000000|rd_rn_rm(rt,rs,HOST_TEMPREG));
1276 }
1277}
1278
1279void emit_orimm(int rs,int imm,int rt)
1280{
1281 u_int armval;
790ee18e 1282 if(imm==0) {
1283 if(rs!=rt) emit_mov(rs,rt);
1284 }else if(genimm(imm,&armval)) {
57871462 1285 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1286 output_w32(0xe3800000|rd_rn_rm(rt,rs,0)|armval);
1287 }else{
1288 assert(imm>0&&imm<65536);
1289 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1290 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
1291 output_w32(0xe3800000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1292 output_w32(0xe3800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1293 }
1294}
1295
1296void emit_xorimm(int rs,int imm,int rt)
1297{
57871462 1298 u_int armval;
790ee18e 1299 if(imm==0) {
1300 if(rs!=rt) emit_mov(rs,rt);
1301 }else if(genimm(imm,&armval)) {
57871462 1302 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm);
1303 output_w32(0xe2200000|rd_rn_rm(rt,rs,0)|armval);
1304 }else{
514ed0d9 1305 assert(imm>0&&imm<65536);
57871462 1306 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1307 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
1308 output_w32(0xe2200000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1309 output_w32(0xe2200000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1310 }
1311}
1312
1313void emit_shlimm(int rs,u_int imm,int rt)
1314{
1315 assert(imm>0);
1316 assert(imm<32);
1317 //if(imm==1) ...
1318 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1319 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1320}
1321
1322void emit_shrimm(int rs,u_int imm,int rt)
1323{
1324 assert(imm>0);
1325 assert(imm<32);
1326 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1327 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
1328}
1329
1330void emit_sarimm(int rs,u_int imm,int rt)
1331{
1332 assert(imm>0);
1333 assert(imm<32);
1334 assem_debug("asr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1335 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x40|(imm<<7));
1336}
1337
1338void emit_rorimm(int rs,u_int imm,int rt)
1339{
1340 assert(imm>0);
1341 assert(imm<32);
1342 assem_debug("ror %s,%s,#%d\n",regname[rt],regname[rs],imm);
1343 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x60|(imm<<7));
1344}
1345
1346void emit_shldimm(int rs,int rs2,u_int imm,int rt)
1347{
1348 assem_debug("shld %%%s,%%%s,%d\n",regname[rt],regname[rs2],imm);
1349 assert(imm>0);
1350 assert(imm<32);
1351 //if(imm==1) ...
1352 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1353 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1354 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs2],32-imm);
1355 output_w32(0xe1800020|rd_rn_rm(rt,rt,rs2)|((32-imm)<<7));
1356}
1357
1358void emit_shrdimm(int rs,int rs2,u_int imm,int rt)
1359{
1360 assem_debug("shrd %%%s,%%%s,%d\n",regname[rt],regname[rs2],imm);
1361 assert(imm>0);
1362 assert(imm<32);
1363 //if(imm==1) ...
1364 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1365 output_w32(0xe1a00020|rd_rn_rm(rt,0,rs)|(imm<<7));
1366 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs2],32-imm);
1367 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs2)|((32-imm)<<7));
1368}
1369
b9b61529 1370void emit_signextend16(int rs,int rt)
1371{
1372 #ifdef ARMv5_ONLY
1373 emit_shlimm(rs,16,rt);
1374 emit_sarimm(rt,16,rt);
1375 #else
1376 assem_debug("sxth %s,%s\n",regname[rt],regname[rs]);
1377 output_w32(0xe6bf0070|rd_rn_rm(rt,0,rs));
1378 #endif
1379}
1380
57871462 1381void emit_shl(u_int rs,u_int shift,u_int rt)
1382{
1383 assert(rs<16);
1384 assert(rt<16);
1385 assert(shift<16);
1386 //if(imm==1) ...
1387 assem_debug("lsl %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1388 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x10|(shift<<8));
1389}
1390void emit_shr(u_int rs,u_int shift,u_int rt)
1391{
1392 assert(rs<16);
1393 assert(rt<16);
1394 assert(shift<16);
1395 assem_debug("lsr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1396 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x30|(shift<<8));
1397}
1398void emit_sar(u_int rs,u_int shift,u_int rt)
1399{
1400 assert(rs<16);
1401 assert(rt<16);
1402 assert(shift<16);
1403 assem_debug("asr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1404 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x50|(shift<<8));
1405}
1406void emit_shlcl(int r)
1407{
1408 assem_debug("shl %%%s,%%cl\n",regname[r]);
1409 assert(0);
1410}
1411void emit_shrcl(int r)
1412{
1413 assem_debug("shr %%%s,%%cl\n",regname[r]);
1414 assert(0);
1415}
1416void emit_sarcl(int r)
1417{
1418 assem_debug("sar %%%s,%%cl\n",regname[r]);
1419 assert(0);
1420}
1421
1422void emit_shldcl(int r1,int r2)
1423{
1424 assem_debug("shld %%%s,%%%s,%%cl\n",regname[r1],regname[r2]);
1425 assert(0);
1426}
1427void emit_shrdcl(int r1,int r2)
1428{
1429 assem_debug("shrd %%%s,%%%s,%%cl\n",regname[r1],regname[r2]);
1430 assert(0);
1431}
1432void emit_orrshl(u_int rs,u_int shift,u_int rt)
1433{
1434 assert(rs<16);
1435 assert(rt<16);
1436 assert(shift<16);
1437 assem_debug("orr %s,%s,%s,lsl %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
1438 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x10|(shift<<8));
1439}
1440void emit_orrshr(u_int rs,u_int shift,u_int rt)
1441{
1442 assert(rs<16);
1443 assert(rt<16);
1444 assert(shift<16);
1445 assem_debug("orr %s,%s,%s,lsr %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
1446 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x30|(shift<<8));
1447}
1448
1449void emit_cmpimm(int rs,int imm)
1450{
1451 u_int armval;
1452 if(genimm(imm,&armval)) {
5a05d80c 1453 assem_debug("cmp %s,#%d\n",regname[rs],imm);
57871462 1454 output_w32(0xe3500000|rd_rn_rm(0,rs,0)|armval);
1455 }else if(genimm(-imm,&armval)) {
5a05d80c 1456 assem_debug("cmn %s,#%d\n",regname[rs],imm);
57871462 1457 output_w32(0xe3700000|rd_rn_rm(0,rs,0)|armval);
1458 }else if(imm>0) {
1459 assert(imm<65536);
1460 #ifdef ARMv5_ONLY
1461 emit_movimm(imm,HOST_TEMPREG);
1462 #else
1463 emit_movw(imm,HOST_TEMPREG);
1464 #endif
1465 assem_debug("cmp %s,r14\n",regname[rs]);
1466 output_w32(0xe1500000|rd_rn_rm(0,rs,HOST_TEMPREG));
1467 }else{
1468 assert(imm>-65536);
1469 #ifdef ARMv5_ONLY
1470 emit_movimm(-imm,HOST_TEMPREG);
1471 #else
1472 emit_movw(-imm,HOST_TEMPREG);
1473 #endif
1474 assem_debug("cmn %s,r14\n",regname[rs]);
1475 output_w32(0xe1700000|rd_rn_rm(0,rs,HOST_TEMPREG));
1476 }
1477}
1478
1479void emit_cmovne(u_int *addr,int rt)
1480{
1481 assem_debug("cmovne %x,%%%s",(int)addr,regname[rt]);
1482 assert(0);
1483}
1484void emit_cmovl(u_int *addr,int rt)
1485{
1486 assem_debug("cmovl %x,%%%s",(int)addr,regname[rt]);
1487 assert(0);
1488}
1489void emit_cmovs(u_int *addr,int rt)
1490{
1491 assem_debug("cmovs %x,%%%s",(int)addr,regname[rt]);
1492 assert(0);
1493}
1494void emit_cmovne_imm(int imm,int rt)
1495{
1496 assem_debug("movne %s,#%d\n",regname[rt],imm);
1497 u_int armval;
cfbd3c6e 1498 genimm_checked(imm,&armval);
57871462 1499 output_w32(0x13a00000|rd_rn_rm(rt,0,0)|armval);
1500}
1501void emit_cmovl_imm(int imm,int rt)
1502{
1503 assem_debug("movlt %s,#%d\n",regname[rt],imm);
1504 u_int armval;
cfbd3c6e 1505 genimm_checked(imm,&armval);
57871462 1506 output_w32(0xb3a00000|rd_rn_rm(rt,0,0)|armval);
1507}
1508void emit_cmovb_imm(int imm,int rt)
1509{
1510 assem_debug("movcc %s,#%d\n",regname[rt],imm);
1511 u_int armval;
cfbd3c6e 1512 genimm_checked(imm,&armval);
57871462 1513 output_w32(0x33a00000|rd_rn_rm(rt,0,0)|armval);
1514}
1515void emit_cmovs_imm(int imm,int rt)
1516{
1517 assem_debug("movmi %s,#%d\n",regname[rt],imm);
1518 u_int armval;
cfbd3c6e 1519 genimm_checked(imm,&armval);
57871462 1520 output_w32(0x43a00000|rd_rn_rm(rt,0,0)|armval);
1521}
1522void emit_cmove_reg(int rs,int rt)
1523{
1524 assem_debug("moveq %s,%s\n",regname[rt],regname[rs]);
1525 output_w32(0x01a00000|rd_rn_rm(rt,0,rs));
1526}
1527void emit_cmovne_reg(int rs,int rt)
1528{
1529 assem_debug("movne %s,%s\n",regname[rt],regname[rs]);
1530 output_w32(0x11a00000|rd_rn_rm(rt,0,rs));
1531}
1532void emit_cmovl_reg(int rs,int rt)
1533{
1534 assem_debug("movlt %s,%s\n",regname[rt],regname[rs]);
1535 output_w32(0xb1a00000|rd_rn_rm(rt,0,rs));
1536}
1537void emit_cmovs_reg(int rs,int rt)
1538{
1539 assem_debug("movmi %s,%s\n",regname[rt],regname[rs]);
1540 output_w32(0x41a00000|rd_rn_rm(rt,0,rs));
1541}
1542
1543void emit_slti32(int rs,int imm,int rt)
1544{
1545 if(rs!=rt) emit_zeroreg(rt);
1546 emit_cmpimm(rs,imm);
1547 if(rs==rt) emit_movimm(0,rt);
1548 emit_cmovl_imm(1,rt);
1549}
1550void emit_sltiu32(int rs,int imm,int rt)
1551{
1552 if(rs!=rt) emit_zeroreg(rt);
1553 emit_cmpimm(rs,imm);
1554 if(rs==rt) emit_movimm(0,rt);
1555 emit_cmovb_imm(1,rt);
1556}
1557void emit_slti64_32(int rsh,int rsl,int imm,int rt)
1558{
1559 assert(rsh!=rt);
1560 emit_slti32(rsl,imm,rt);
1561 if(imm>=0)
1562 {
1563 emit_test(rsh,rsh);
1564 emit_cmovne_imm(0,rt);
1565 emit_cmovs_imm(1,rt);
1566 }
1567 else
1568 {
1569 emit_cmpimm(rsh,-1);
1570 emit_cmovne_imm(0,rt);
1571 emit_cmovl_imm(1,rt);
1572 }
1573}
1574void emit_sltiu64_32(int rsh,int rsl,int imm,int rt)
1575{
1576 assert(rsh!=rt);
1577 emit_sltiu32(rsl,imm,rt);
1578 if(imm>=0)
1579 {
1580 emit_test(rsh,rsh);
1581 emit_cmovne_imm(0,rt);
1582 }
1583 else
1584 {
1585 emit_cmpimm(rsh,-1);
1586 emit_cmovne_imm(1,rt);
1587 }
1588}
1589
1590void emit_cmp(int rs,int rt)
1591{
1592 assem_debug("cmp %s,%s\n",regname[rs],regname[rt]);
1593 output_w32(0xe1500000|rd_rn_rm(0,rs,rt));
1594}
1595void emit_set_gz32(int rs, int rt)
1596{
1597 //assem_debug("set_gz32\n");
1598 emit_cmpimm(rs,1);
1599 emit_movimm(1,rt);
1600 emit_cmovl_imm(0,rt);
1601}
1602void emit_set_nz32(int rs, int rt)
1603{
1604 //assem_debug("set_nz32\n");
1605 if(rs!=rt) emit_movs(rs,rt);
1606 else emit_test(rs,rs);
1607 emit_cmovne_imm(1,rt);
1608}
1609void emit_set_gz64_32(int rsh, int rsl, int rt)
1610{
1611 //assem_debug("set_gz64\n");
1612 emit_set_gz32(rsl,rt);
1613 emit_test(rsh,rsh);
1614 emit_cmovne_imm(1,rt);
1615 emit_cmovs_imm(0,rt);
1616}
1617void emit_set_nz64_32(int rsh, int rsl, int rt)
1618{
1619 //assem_debug("set_nz64\n");
1620 emit_or_and_set_flags(rsh,rsl,rt);
1621 emit_cmovne_imm(1,rt);
1622}
1623void emit_set_if_less32(int rs1, int rs2, int rt)
1624{
1625 //assem_debug("set if less (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1626 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1627 emit_cmp(rs1,rs2);
1628 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1629 emit_cmovl_imm(1,rt);
1630}
1631void emit_set_if_carry32(int rs1, int rs2, int rt)
1632{
1633 //assem_debug("set if carry (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1634 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1635 emit_cmp(rs1,rs2);
1636 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1637 emit_cmovb_imm(1,rt);
1638}
1639void emit_set_if_less64_32(int u1, int l1, int u2, int l2, int rt)
1640{
1641 //assem_debug("set if less64 (%%%s,%%%s,%%%s,%%%s),%%%s\n",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]);
1642 assert(u1!=rt);
1643 assert(u2!=rt);
1644 emit_cmp(l1,l2);
1645 emit_movimm(0,rt);
1646 emit_sbcs(u1,u2,HOST_TEMPREG);
1647 emit_cmovl_imm(1,rt);
1648}
1649void emit_set_if_carry64_32(int u1, int l1, int u2, int l2, int rt)
1650{
1651 //assem_debug("set if carry64 (%%%s,%%%s,%%%s,%%%s),%%%s\n",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]);
1652 assert(u1!=rt);
1653 assert(u2!=rt);
1654 emit_cmp(l1,l2);
1655 emit_movimm(0,rt);
1656 emit_sbcs(u1,u2,HOST_TEMPREG);
1657 emit_cmovb_imm(1,rt);
1658}
1659
1660void emit_call(int a)
1661{
1662 assem_debug("bl %x (%x+%x)\n",a,(int)out,a-(int)out-8);
1663 u_int offset=genjmp(a);
1664 output_w32(0xeb000000|offset);
1665}
1666void emit_jmp(int a)
1667{
1668 assem_debug("b %x (%x+%x)\n",a,(int)out,a-(int)out-8);
1669 u_int offset=genjmp(a);
1670 output_w32(0xea000000|offset);
1671}
1672void emit_jne(int a)
1673{
1674 assem_debug("bne %x\n",a);
1675 u_int offset=genjmp(a);
1676 output_w32(0x1a000000|offset);
1677}
1678void emit_jeq(int a)
1679{
1680 assem_debug("beq %x\n",a);
1681 u_int offset=genjmp(a);
1682 output_w32(0x0a000000|offset);
1683}
1684void emit_js(int a)
1685{
1686 assem_debug("bmi %x\n",a);
1687 u_int offset=genjmp(a);
1688 output_w32(0x4a000000|offset);
1689}
1690void emit_jns(int a)
1691{
1692 assem_debug("bpl %x\n",a);
1693 u_int offset=genjmp(a);
1694 output_w32(0x5a000000|offset);
1695}
1696void emit_jl(int a)
1697{
1698 assem_debug("blt %x\n",a);
1699 u_int offset=genjmp(a);
1700 output_w32(0xba000000|offset);
1701}
1702void emit_jge(int a)
1703{
1704 assem_debug("bge %x\n",a);
1705 u_int offset=genjmp(a);
1706 output_w32(0xaa000000|offset);
1707}
1708void emit_jno(int a)
1709{
1710 assem_debug("bvc %x\n",a);
1711 u_int offset=genjmp(a);
1712 output_w32(0x7a000000|offset);
1713}
1714void emit_jc(int a)
1715{
1716 assem_debug("bcs %x\n",a);
1717 u_int offset=genjmp(a);
1718 output_w32(0x2a000000|offset);
1719}
1720void emit_jcc(int a)
1721{
1722 assem_debug("bcc %x\n",a);
1723 u_int offset=genjmp(a);
1724 output_w32(0x3a000000|offset);
1725}
1726
1727void emit_pushimm(int imm)
1728{
1729 assem_debug("push $%x\n",imm);
1730 assert(0);
1731}
1732void emit_pusha()
1733{
1734 assem_debug("pusha\n");
1735 assert(0);
1736}
1737void emit_popa()
1738{
1739 assem_debug("popa\n");
1740 assert(0);
1741}
1742void emit_pushreg(u_int r)
1743{
1744 assem_debug("push %%%s\n",regname[r]);
1745 assert(0);
1746}
1747void emit_popreg(u_int r)
1748{
1749 assem_debug("pop %%%s\n",regname[r]);
1750 assert(0);
1751}
1752void emit_callreg(u_int r)
1753{
1754 assem_debug("call *%%%s\n",regname[r]);
1755 assert(0);
1756}
1757void emit_jmpreg(u_int r)
1758{
1759 assem_debug("mov pc,%s\n",regname[r]);
1760 output_w32(0xe1a00000|rd_rn_rm(15,0,r));
1761}
1762
1763void emit_readword_indexed(int offset, int rs, int rt)
1764{
1765 assert(offset>-4096&&offset<4096);
1766 assem_debug("ldr %s,%s+%d\n",regname[rt],regname[rs],offset);
1767 if(offset>=0) {
1768 output_w32(0xe5900000|rd_rn_rm(rt,rs,0)|offset);
1769 }else{
1770 output_w32(0xe5100000|rd_rn_rm(rt,rs,0)|(-offset));
1771 }
1772}
1773void emit_readword_dualindexedx4(int rs1, int rs2, int rt)
1774{
1775 assem_debug("ldr %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1776 output_w32(0xe7900000|rd_rn_rm(rt,rs1,rs2)|0x100);
1777}
1778void emit_readword_indexed_tlb(int addr, int rs, int map, int rt)
1779{
1780 if(map<0) emit_readword_indexed(addr, rs, rt);
1781 else {
1782 assert(addr==0);
1783 emit_readword_dualindexedx4(rs, map, rt);
1784 }
1785}
1786void emit_readdword_indexed_tlb(int addr, int rs, int map, int rh, int rl)
1787{
1788 if(map<0) {
1789 if(rh>=0) emit_readword_indexed(addr, rs, rh);
1790 emit_readword_indexed(addr+4, rs, rl);
1791 }else{
1792 assert(rh!=rs);
1793 if(rh>=0) emit_readword_indexed_tlb(addr, rs, map, rh);
1794 emit_addimm(map,1,map);
1795 emit_readword_indexed_tlb(addr, rs, map, rl);
1796 }
1797}
1798void emit_movsbl_indexed(int offset, int rs, int rt)
1799{
1800 assert(offset>-256&&offset<256);
1801 assem_debug("ldrsb %s,%s+%d\n",regname[rt],regname[rs],offset);
1802 if(offset>=0) {
1803 output_w32(0xe1d000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1804 }else{
1805 output_w32(0xe15000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1806 }
1807}
1808void emit_movsbl_indexed_tlb(int addr, int rs, int map, int rt)
1809{
1810 if(map<0) emit_movsbl_indexed(addr, rs, rt);
1811 else {
1812 if(addr==0) {
1813 emit_shlimm(map,2,map);
1814 assem_debug("ldrsb %s,%s+%s\n",regname[rt],regname[rs],regname[map]);
1815 output_w32(0xe19000d0|rd_rn_rm(rt,rs,map));
1816 }else{
1817 assert(addr>-256&&addr<256);
1818 assem_debug("add %s,%s,%s,lsl #2\n",regname[rt],regname[rs],regname[map]);
1819 output_w32(0xe0800000|rd_rn_rm(rt,rs,map)|(2<<7));
1820 emit_movsbl_indexed(addr, rt, rt);
1821 }
1822 }
1823}
1824void emit_movswl_indexed(int offset, int rs, int rt)
1825{
1826 assert(offset>-256&&offset<256);
1827 assem_debug("ldrsh %s,%s+%d\n",regname[rt],regname[rs],offset);
1828 if(offset>=0) {
1829 output_w32(0xe1d000f0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1830 }else{
1831 output_w32(0xe15000f0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1832 }
1833}
1834void emit_movzbl_indexed(int offset, int rs, int rt)
1835{
1836 assert(offset>-4096&&offset<4096);
1837 assem_debug("ldrb %s,%s+%d\n",regname[rt],regname[rs],offset);
1838 if(offset>=0) {
1839 output_w32(0xe5d00000|rd_rn_rm(rt,rs,0)|offset);
1840 }else{
1841 output_w32(0xe5500000|rd_rn_rm(rt,rs,0)|(-offset));
1842 }
1843}
1844void emit_movzbl_dualindexedx4(int rs1, int rs2, int rt)
1845{
1846 assem_debug("ldrb %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1847 output_w32(0xe7d00000|rd_rn_rm(rt,rs1,rs2)|0x100);
1848}
1849void emit_movzbl_indexed_tlb(int addr, int rs, int map, int rt)
1850{
1851 if(map<0) emit_movzbl_indexed(addr, rs, rt);
1852 else {
1853 if(addr==0) {
1854 emit_movzbl_dualindexedx4(rs, map, rt);
1855 }else{
1856 emit_addimm(rs,addr,rt);
1857 emit_movzbl_dualindexedx4(rt, map, rt);
1858 }
1859 }
1860}
1861void emit_movzwl_indexed(int offset, int rs, int rt)
1862{
1863 assert(offset>-256&&offset<256);
1864 assem_debug("ldrh %s,%s+%d\n",regname[rt],regname[rs],offset);
1865 if(offset>=0) {
1866 output_w32(0xe1d000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1867 }else{
1868 output_w32(0xe15000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1869 }
1870}
1871void emit_readword(int addr, int rt)
1872{
1873 u_int offset = addr-(u_int)&dynarec_local;
1874 assert(offset<4096);
1875 assem_debug("ldr %s,fp+%d\n",regname[rt],offset);
1876 output_w32(0xe5900000|rd_rn_rm(rt,FP,0)|offset);
1877}
1878void emit_movsbl(int addr, int rt)
1879{
1880 u_int offset = addr-(u_int)&dynarec_local;
1881 assert(offset<256);
1882 assem_debug("ldrsb %s,fp+%d\n",regname[rt],offset);
1883 output_w32(0xe1d000d0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1884}
1885void emit_movswl(int addr, int rt)
1886{
1887 u_int offset = addr-(u_int)&dynarec_local;
1888 assert(offset<256);
1889 assem_debug("ldrsh %s,fp+%d\n",regname[rt],offset);
1890 output_w32(0xe1d000f0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1891}
1892void emit_movzbl(int addr, int rt)
1893{
1894 u_int offset = addr-(u_int)&dynarec_local;
1895 assert(offset<4096);
1896 assem_debug("ldrb %s,fp+%d\n",regname[rt],offset);
1897 output_w32(0xe5d00000|rd_rn_rm(rt,FP,0)|offset);
1898}
1899void emit_movzwl(int addr, int rt)
1900{
1901 u_int offset = addr-(u_int)&dynarec_local;
1902 assert(offset<256);
1903 assem_debug("ldrh %s,fp+%d\n",regname[rt],offset);
1904 output_w32(0xe1d000b0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1905}
1906void emit_movzwl_reg(int rs, int rt)
1907{
1908 assem_debug("movzwl %%%s,%%%s\n",regname[rs]+1,regname[rt]);
1909 assert(0);
1910}
1911
1912void emit_xchg(int rs, int rt)
1913{
1914 assem_debug("xchg %%%s,%%%s\n",regname[rs],regname[rt]);
1915 assert(0);
1916}
1917void emit_writeword_indexed(int rt, int offset, int rs)
1918{
1919 assert(offset>-4096&&offset<4096);
1920 assem_debug("str %s,%s+%d\n",regname[rt],regname[rs],offset);
1921 if(offset>=0) {
1922 output_w32(0xe5800000|rd_rn_rm(rt,rs,0)|offset);
1923 }else{
1924 output_w32(0xe5000000|rd_rn_rm(rt,rs,0)|(-offset));
1925 }
1926}
1927void emit_writeword_dualindexedx4(int rt, int rs1, int rs2)
1928{
1929 assem_debug("str %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1930 output_w32(0xe7800000|rd_rn_rm(rt,rs1,rs2)|0x100);
1931}
1932void emit_writeword_indexed_tlb(int rt, int addr, int rs, int map, int temp)
1933{
1934 if(map<0) emit_writeword_indexed(rt, addr, rs);
1935 else {
1936 assert(addr==0);
1937 emit_writeword_dualindexedx4(rt, rs, map);
1938 }
1939}
1940void emit_writedword_indexed_tlb(int rh, int rl, int addr, int rs, int map, int temp)
1941{
1942 if(map<0) {
1943 if(rh>=0) emit_writeword_indexed(rh, addr, rs);
1944 emit_writeword_indexed(rl, addr+4, rs);
1945 }else{
1946 assert(rh>=0);
1947 if(temp!=rs) emit_addimm(map,1,temp);
1948 emit_writeword_indexed_tlb(rh, addr, rs, map, temp);
1949 if(temp!=rs) emit_writeword_indexed_tlb(rl, addr, rs, temp, temp);
1950 else {
1951 emit_addimm(rs,4,rs);
1952 emit_writeword_indexed_tlb(rl, addr, rs, map, temp);
1953 }
1954 }
1955}
1956void emit_writehword_indexed(int rt, int offset, int rs)
1957{
1958 assert(offset>-256&&offset<256);
1959 assem_debug("strh %s,%s+%d\n",regname[rt],regname[rs],offset);
1960 if(offset>=0) {
1961 output_w32(0xe1c000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1962 }else{
1963 output_w32(0xe14000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1964 }
1965}
1966void emit_writebyte_indexed(int rt, int offset, int rs)
1967{
1968 assert(offset>-4096&&offset<4096);
1969 assem_debug("strb %s,%s+%d\n",regname[rt],regname[rs],offset);
1970 if(offset>=0) {
1971 output_w32(0xe5c00000|rd_rn_rm(rt,rs,0)|offset);
1972 }else{
1973 output_w32(0xe5400000|rd_rn_rm(rt,rs,0)|(-offset));
1974 }
1975}
1976void emit_writebyte_dualindexedx4(int rt, int rs1, int rs2)
1977{
1978 assem_debug("strb %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1979 output_w32(0xe7c00000|rd_rn_rm(rt,rs1,rs2)|0x100);
1980}
1981void emit_writebyte_indexed_tlb(int rt, int addr, int rs, int map, int temp)
1982{
1983 if(map<0) emit_writebyte_indexed(rt, addr, rs);
1984 else {
1985 if(addr==0) {
1986 emit_writebyte_dualindexedx4(rt, rs, map);
1987 }else{
1988 emit_addimm(rs,addr,temp);
1989 emit_writebyte_dualindexedx4(rt, temp, map);
1990 }
1991 }
1992}
1993void emit_writeword(int rt, int addr)
1994{
1995 u_int offset = addr-(u_int)&dynarec_local;
1996 assert(offset<4096);
1997 assem_debug("str %s,fp+%d\n",regname[rt],offset);
1998 output_w32(0xe5800000|rd_rn_rm(rt,FP,0)|offset);
1999}
2000void emit_writehword(int rt, int addr)
2001{
2002 u_int offset = addr-(u_int)&dynarec_local;
2003 assert(offset<256);
2004 assem_debug("strh %s,fp+%d\n",regname[rt],offset);
2005 output_w32(0xe1c000b0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
2006}
2007void emit_writebyte(int rt, int addr)
2008{
2009 u_int offset = addr-(u_int)&dynarec_local;
2010 assert(offset<4096);
74426039 2011 assem_debug("strb %s,fp+%d\n",regname[rt],offset);
57871462 2012 output_w32(0xe5c00000|rd_rn_rm(rt,FP,0)|offset);
2013}
2014void emit_writeword_imm(int imm, int addr)
2015{
2016 assem_debug("movl $%x,%x\n",imm,addr);
2017 assert(0);
2018}
2019void emit_writebyte_imm(int imm, int addr)
2020{
2021 assem_debug("movb $%x,%x\n",imm,addr);
2022 assert(0);
2023}
2024
2025void emit_mul(int rs)
2026{
2027 assem_debug("mul %%%s\n",regname[rs]);
2028 assert(0);
2029}
2030void emit_imul(int rs)
2031{
2032 assem_debug("imul %%%s\n",regname[rs]);
2033 assert(0);
2034}
2035void emit_umull(u_int rs1, u_int rs2, u_int hi, u_int lo)
2036{
2037 assem_debug("umull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
2038 assert(rs1<16);
2039 assert(rs2<16);
2040 assert(hi<16);
2041 assert(lo<16);
2042 output_w32(0xe0800090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
2043}
2044void emit_smull(u_int rs1, u_int rs2, u_int hi, u_int lo)
2045{
2046 assem_debug("smull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
2047 assert(rs1<16);
2048 assert(rs2<16);
2049 assert(hi<16);
2050 assert(lo<16);
2051 output_w32(0xe0c00090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
2052}
2053
2054void emit_div(int rs)
2055{
2056 assem_debug("div %%%s\n",regname[rs]);
2057 assert(0);
2058}
2059void emit_idiv(int rs)
2060{
2061 assem_debug("idiv %%%s\n",regname[rs]);
2062 assert(0);
2063}
2064void emit_cdq()
2065{
2066 assem_debug("cdq\n");
2067 assert(0);
2068}
2069
2070void emit_clz(int rs,int rt)
2071{
2072 assem_debug("clz %s,%s\n",regname[rt],regname[rs]);
2073 output_w32(0xe16f0f10|rd_rn_rm(rt,0,rs));
2074}
2075
2076void emit_subcs(int rs1,int rs2,int rt)
2077{
2078 assem_debug("subcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2079 output_w32(0x20400000|rd_rn_rm(rt,rs1,rs2));
2080}
2081
2082void emit_shrcc_imm(int rs,u_int imm,int rt)
2083{
2084 assert(imm>0);
2085 assert(imm<32);
2086 assem_debug("lsrcc %s,%s,#%d\n",regname[rt],regname[rs],imm);
2087 output_w32(0x31a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
2088}
2089
2090void emit_negmi(int rs, int rt)
2091{
2092 assem_debug("rsbmi %s,%s,#0\n",regname[rt],regname[rs]);
2093 output_w32(0x42600000|rd_rn_rm(rt,rs,0));
2094}
2095
2096void emit_negsmi(int rs, int rt)
2097{
2098 assem_debug("rsbsmi %s,%s,#0\n",regname[rt],regname[rs]);
2099 output_w32(0x42700000|rd_rn_rm(rt,rs,0));
2100}
2101
2102void emit_orreq(u_int rs1,u_int rs2,u_int rt)
2103{
2104 assem_debug("orreq %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2105 output_w32(0x01800000|rd_rn_rm(rt,rs1,rs2));
2106}
2107
2108void emit_orrne(u_int rs1,u_int rs2,u_int rt)
2109{
2110 assem_debug("orrne %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2111 output_w32(0x11800000|rd_rn_rm(rt,rs1,rs2));
2112}
2113
2114void emit_bic_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2115{
2116 assem_debug("bic %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2117 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2118}
2119
2120void emit_biceq_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2121{
2122 assem_debug("biceq %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2123 output_w32(0x01C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2124}
2125
2126void emit_bicne_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2127{
2128 assem_debug("bicne %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2129 output_w32(0x11C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2130}
2131
2132void emit_bic_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2133{
2134 assem_debug("bic %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2135 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2136}
2137
2138void emit_biceq_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2139{
2140 assem_debug("biceq %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2141 output_w32(0x01C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2142}
2143
2144void emit_bicne_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2145{
2146 assem_debug("bicne %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2147 output_w32(0x11C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2148}
2149
2150void emit_teq(int rs, int rt)
2151{
2152 assem_debug("teq %s,%s\n",regname[rs],regname[rt]);
2153 output_w32(0xe1300000|rd_rn_rm(0,rs,rt));
2154}
2155
2156void emit_rsbimm(int rs, int imm, int rt)
2157{
2158 u_int armval;
cfbd3c6e 2159 genimm_checked(imm,&armval);
57871462 2160 assem_debug("rsb %s,%s,#%d\n",regname[rt],regname[rs],imm);
2161 output_w32(0xe2600000|rd_rn_rm(rt,rs,0)|armval);
2162}
2163
2164// Load 2 immediates optimizing for small code size
2165void emit_mov2imm_compact(int imm1,u_int rt1,int imm2,u_int rt2)
2166{
2167 emit_movimm(imm1,rt1);
2168 u_int armval;
2169 if(genimm(imm2-imm1,&armval)) {
2170 assem_debug("add %s,%s,#%d\n",regname[rt2],regname[rt1],imm2-imm1);
2171 output_w32(0xe2800000|rd_rn_rm(rt2,rt1,0)|armval);
2172 }else if(genimm(imm1-imm2,&armval)) {
2173 assem_debug("sub %s,%s,#%d\n",regname[rt2],regname[rt1],imm1-imm2);
2174 output_w32(0xe2400000|rd_rn_rm(rt2,rt1,0)|armval);
2175 }
2176 else emit_movimm(imm2,rt2);
2177}
2178
2179// Conditionally select one of two immediates, optimizing for small code size
2180// This will only be called if HAVE_CMOV_IMM is defined
2181void emit_cmov2imm_e_ne_compact(int imm1,int imm2,u_int rt)
2182{
2183 u_int armval;
2184 if(genimm(imm2-imm1,&armval)) {
2185 emit_movimm(imm1,rt);
2186 assem_debug("addne %s,%s,#%d\n",regname[rt],regname[rt],imm2-imm1);
2187 output_w32(0x12800000|rd_rn_rm(rt,rt,0)|armval);
2188 }else if(genimm(imm1-imm2,&armval)) {
2189 emit_movimm(imm1,rt);
2190 assem_debug("subne %s,%s,#%d\n",regname[rt],regname[rt],imm1-imm2);
2191 output_w32(0x12400000|rd_rn_rm(rt,rt,0)|armval);
2192 }
2193 else {
2194 #ifdef ARMv5_ONLY
2195 emit_movimm(imm1,rt);
2196 add_literal((int)out,imm2);
2197 assem_debug("ldrne %s,pc+? [=%x]\n",regname[rt],imm2);
2198 output_w32(0x15900000|rd_rn_rm(rt,15,0));
2199 #else
2200 emit_movw(imm1&0x0000FFFF,rt);
2201 if((imm1&0xFFFF)!=(imm2&0xFFFF)) {
2202 assem_debug("movwne %s,#%d (0x%x)\n",regname[rt],imm2&0xFFFF,imm2&0xFFFF);
2203 output_w32(0x13000000|rd_rn_rm(rt,0,0)|(imm2&0xfff)|((imm2<<4)&0xf0000));
2204 }
2205 emit_movt(imm1&0xFFFF0000,rt);
2206 if((imm1&0xFFFF0000)!=(imm2&0xFFFF0000)) {
2207 assem_debug("movtne %s,#%d (0x%x)\n",regname[rt],imm2&0xffff0000,imm2&0xffff0000);
2208 output_w32(0x13400000|rd_rn_rm(rt,0,0)|((imm2>>16)&0xfff)|((imm2>>12)&0xf0000));
2209 }
2210 #endif
2211 }
2212}
2213
2214// special case for checking invalid_code
2215void emit_cmpmem_indexedsr12_imm(int addr,int r,int imm)
2216{
2217 assert(0);
2218}
2219
2220// special case for checking invalid_code
2221void emit_cmpmem_indexedsr12_reg(int base,int r,int imm)
2222{
2223 assert(imm<128&&imm>=0);
2224 assert(r>=0&&r<16);
2225 assem_debug("ldrb lr,%s,%s lsr #12\n",regname[base],regname[r]);
2226 output_w32(0xe7d00000|rd_rn_rm(HOST_TEMPREG,base,r)|0x620);
2227 emit_cmpimm(HOST_TEMPREG,imm);
2228}
2229
2230// special case for tlb mapping
2231void emit_addsr12(int rs1,int rs2,int rt)
2232{
2233 assem_debug("add %s,%s,%s lsr #12\n",regname[rt],regname[rs1],regname[rs2]);
2234 output_w32(0xe0800620|rd_rn_rm(rt,rs1,rs2));
2235}
2236
0bbd1454 2237void emit_callne(int a)
2238{
2239 assem_debug("blne %x\n",a);
2240 u_int offset=genjmp(a);
2241 output_w32(0x1b000000|offset);
2242}
2243
57871462 2244// Used to preload hash table entries
2245void emit_prefetch(void *addr)
2246{
2247 assem_debug("prefetch %x\n",(int)addr);
2248 output_byte(0x0F);
2249 output_byte(0x18);
2250 output_modrm(0,5,1);
2251 output_w32((int)addr);
2252}
2253void emit_prefetchreg(int r)
2254{
2255 assem_debug("pld %s\n",regname[r]);
2256 output_w32(0xf5d0f000|rd_rn_rm(0,r,0));
2257}
2258
2259// Special case for mini_ht
2260void emit_ldreq_indexed(int rs, u_int offset, int rt)
2261{
2262 assert(offset<4096);
2263 assem_debug("ldreq %s,[%s, #%d]\n",regname[rt],regname[rs],offset);
2264 output_w32(0x05900000|rd_rn_rm(rt,rs,0)|offset);
2265}
2266
2267void emit_flds(int r,int sr)
2268{
2269 assem_debug("flds s%d,[%s]\n",sr,regname[r]);
2270 output_w32(0xed900a00|((sr&14)<<11)|((sr&1)<<22)|(r<<16));
2271}
2272
2273void emit_vldr(int r,int vr)
2274{
2275 assem_debug("vldr d%d,[%s]\n",vr,regname[r]);
2276 output_w32(0xed900b00|(vr<<12)|(r<<16));
2277}
2278
2279void emit_fsts(int sr,int r)
2280{
2281 assem_debug("fsts s%d,[%s]\n",sr,regname[r]);
2282 output_w32(0xed800a00|((sr&14)<<11)|((sr&1)<<22)|(r<<16));
2283}
2284
2285void emit_vstr(int vr,int r)
2286{
2287 assem_debug("vstr d%d,[%s]\n",vr,regname[r]);
2288 output_w32(0xed800b00|(vr<<12)|(r<<16));
2289}
2290
2291void emit_ftosizs(int s,int d)
2292{
2293 assem_debug("ftosizs s%d,s%d\n",d,s);
2294 output_w32(0xeebd0ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2295}
2296
2297void emit_ftosizd(int s,int d)
2298{
2299 assem_debug("ftosizd s%d,d%d\n",d,s);
2300 output_w32(0xeebd0bc0|((d&14)<<11)|((d&1)<<22)|(s&7));
2301}
2302
2303void emit_fsitos(int s,int d)
2304{
2305 assem_debug("fsitos s%d,s%d\n",d,s);
2306 output_w32(0xeeb80ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2307}
2308
2309void emit_fsitod(int s,int d)
2310{
2311 assem_debug("fsitod d%d,s%d\n",d,s);
2312 output_w32(0xeeb80bc0|((d&7)<<12)|((s&14)>>1)|((s&1)<<5));
2313}
2314
2315void emit_fcvtds(int s,int d)
2316{
2317 assem_debug("fcvtds d%d,s%d\n",d,s);
2318 output_w32(0xeeb70ac0|((d&7)<<12)|((s&14)>>1)|((s&1)<<5));
2319}
2320
2321void emit_fcvtsd(int s,int d)
2322{
2323 assem_debug("fcvtsd s%d,d%d\n",d,s);
2324 output_w32(0xeeb70bc0|((d&14)<<11)|((d&1)<<22)|(s&7));
2325}
2326
2327void emit_fsqrts(int s,int d)
2328{
2329 assem_debug("fsqrts d%d,s%d\n",d,s);
2330 output_w32(0xeeb10ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2331}
2332
2333void emit_fsqrtd(int s,int d)
2334{
2335 assem_debug("fsqrtd s%d,d%d\n",d,s);
2336 output_w32(0xeeb10bc0|((d&7)<<12)|(s&7));
2337}
2338
2339void emit_fabss(int s,int d)
2340{
2341 assem_debug("fabss d%d,s%d\n",d,s);
2342 output_w32(0xeeb00ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2343}
2344
2345void emit_fabsd(int s,int d)
2346{
2347 assem_debug("fabsd s%d,d%d\n",d,s);
2348 output_w32(0xeeb00bc0|((d&7)<<12)|(s&7));
2349}
2350
2351void emit_fnegs(int s,int d)
2352{
2353 assem_debug("fnegs d%d,s%d\n",d,s);
2354 output_w32(0xeeb10a40|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2355}
2356
2357void emit_fnegd(int s,int d)
2358{
2359 assem_debug("fnegd s%d,d%d\n",d,s);
2360 output_w32(0xeeb10b40|((d&7)<<12)|(s&7));
2361}
2362
2363void emit_fadds(int s1,int s2,int d)
2364{
2365 assem_debug("fadds s%d,s%d,s%d\n",d,s1,s2);
2366 output_w32(0xee300a00|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2367}
2368
2369void emit_faddd(int s1,int s2,int d)
2370{
2371 assem_debug("faddd d%d,d%d,d%d\n",d,s1,s2);
2372 output_w32(0xee300b00|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2373}
2374
2375void emit_fsubs(int s1,int s2,int d)
2376{
2377 assem_debug("fsubs s%d,s%d,s%d\n",d,s1,s2);
2378 output_w32(0xee300a40|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2379}
2380
2381void emit_fsubd(int s1,int s2,int d)
2382{
2383 assem_debug("fsubd d%d,d%d,d%d\n",d,s1,s2);
2384 output_w32(0xee300b40|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2385}
2386
2387void emit_fmuls(int s1,int s2,int d)
2388{
2389 assem_debug("fmuls s%d,s%d,s%d\n",d,s1,s2);
2390 output_w32(0xee200a00|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2391}
2392
2393void emit_fmuld(int s1,int s2,int d)
2394{
2395 assem_debug("fmuld d%d,d%d,d%d\n",d,s1,s2);
2396 output_w32(0xee200b00|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2397}
2398
2399void emit_fdivs(int s1,int s2,int d)
2400{
2401 assem_debug("fdivs s%d,s%d,s%d\n",d,s1,s2);
2402 output_w32(0xee800a00|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2403}
2404
2405void emit_fdivd(int s1,int s2,int d)
2406{
2407 assem_debug("fdivd d%d,d%d,d%d\n",d,s1,s2);
2408 output_w32(0xee800b00|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2409}
2410
2411void emit_fcmps(int x,int y)
2412{
2413 assem_debug("fcmps s14, s15\n");
2414 output_w32(0xeeb47a67);
2415}
2416
2417void emit_fcmpd(int x,int y)
2418{
2419 assem_debug("fcmpd d6, d7\n");
2420 output_w32(0xeeb46b47);
2421}
2422
2423void emit_fmstat()
2424{
2425 assem_debug("fmstat\n");
2426 output_w32(0xeef1fa10);
2427}
2428
2429void emit_bicne_imm(int rs,int imm,int rt)
2430{
2431 u_int armval;
cfbd3c6e 2432 genimm_checked(imm,&armval);
57871462 2433 assem_debug("bicne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2434 output_w32(0x13c00000|rd_rn_rm(rt,rs,0)|armval);
2435}
2436
2437void emit_biccs_imm(int rs,int imm,int rt)
2438{
2439 u_int armval;
cfbd3c6e 2440 genimm_checked(imm,&armval);
57871462 2441 assem_debug("biccs %s,%s,#%d\n",regname[rt],regname[rs],imm);
2442 output_w32(0x23c00000|rd_rn_rm(rt,rs,0)|armval);
2443}
2444
2445void emit_bicvc_imm(int rs,int imm,int rt)
2446{
2447 u_int armval;
cfbd3c6e 2448 genimm_checked(imm,&armval);
57871462 2449 assem_debug("bicvc %s,%s,#%d\n",regname[rt],regname[rs],imm);
2450 output_w32(0x73c00000|rd_rn_rm(rt,rs,0)|armval);
2451}
2452
2453void emit_bichi_imm(int rs,int imm,int rt)
2454{
2455 u_int armval;
cfbd3c6e 2456 genimm_checked(imm,&armval);
57871462 2457 assem_debug("bichi %s,%s,#%d\n",regname[rt],regname[rs],imm);
2458 output_w32(0x83c00000|rd_rn_rm(rt,rs,0)|armval);
2459}
2460
2461void emit_orrvs_imm(int rs,int imm,int rt)
2462{
2463 u_int armval;
cfbd3c6e 2464 genimm_checked(imm,&armval);
57871462 2465 assem_debug("orrvs %s,%s,#%d\n",regname[rt],regname[rs],imm);
2466 output_w32(0x63800000|rd_rn_rm(rt,rs,0)|armval);
2467}
2468
b9b61529 2469void emit_orrne_imm(int rs,int imm,int rt)
2470{
2471 u_int armval;
cfbd3c6e 2472 genimm_checked(imm,&armval);
b9b61529 2473 assem_debug("orrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2474 output_w32(0x13800000|rd_rn_rm(rt,rs,0)|armval);
2475}
2476
2477void emit_andne_imm(int rs,int imm,int rt)
2478{
2479 u_int armval;
cfbd3c6e 2480 genimm_checked(imm,&armval);
b9b61529 2481 assem_debug("andne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2482 output_w32(0x12000000|rd_rn_rm(rt,rs,0)|armval);
2483}
2484
57871462 2485void emit_jno_unlikely(int a)
2486{
2487 //emit_jno(a);
2488 assem_debug("addvc pc,pc,#? (%x)\n",/*a-(int)out-8,*/a);
2489 output_w32(0x72800000|rd_rn_rm(15,15,0));
2490}
2491
2492// Save registers before function call
2493void save_regs(u_int reglist)
2494{
2495 reglist&=0x100f; // only save the caller-save registers, r0-r3, r12
2496 if(!reglist) return;
2497 assem_debug("stmia fp,{");
2498 if(reglist&1) assem_debug("r0, ");
2499 if(reglist&2) assem_debug("r1, ");
2500 if(reglist&4) assem_debug("r2, ");
2501 if(reglist&8) assem_debug("r3, ");
2502 if(reglist&0x1000) assem_debug("r12");
2503 assem_debug("}\n");
2504 output_w32(0xe88b0000|reglist);
2505}
2506// Restore registers after function call
2507void restore_regs(u_int reglist)
2508{
2509 reglist&=0x100f; // only restore the caller-save registers, r0-r3, r12
2510 if(!reglist) return;
2511 assem_debug("ldmia fp,{");
2512 if(reglist&1) assem_debug("r0, ");
2513 if(reglist&2) assem_debug("r1, ");
2514 if(reglist&4) assem_debug("r2, ");
2515 if(reglist&8) assem_debug("r3, ");
2516 if(reglist&0x1000) assem_debug("r12");
2517 assem_debug("}\n");
2518 output_w32(0xe89b0000|reglist);
2519}
2520
2521// Write back consts using r14 so we don't disturb the other registers
2522void wb_consts(signed char i_regmap[],uint64_t i_is32,u_int i_dirty,int i)
2523{
2524 int hr;
2525 for(hr=0;hr<HOST_REGS;hr++) {
2526 if(hr!=EXCLUDE_REG&&i_regmap[hr]>=0&&((i_dirty>>hr)&1)) {
2527 if(((regs[i].isconst>>hr)&1)&&i_regmap[hr]>0) {
2528 if(i_regmap[hr]<64 || !((i_is32>>(i_regmap[hr]&63))&1) ) {
2529 int value=constmap[i][hr];
2530 if(value==0) {
2531 emit_zeroreg(HOST_TEMPREG);
2532 }
2533 else {
2534 emit_movimm(value,HOST_TEMPREG);
2535 }
2536 emit_storereg(i_regmap[hr],HOST_TEMPREG);
24385cae 2537#ifndef FORCE32
57871462 2538 if((i_is32>>i_regmap[hr])&1) {
2539 if(value!=-1&&value!=0) emit_sarimm(HOST_TEMPREG,31,HOST_TEMPREG);
2540 emit_storereg(i_regmap[hr]|64,HOST_TEMPREG);
2541 }
24385cae 2542#endif
57871462 2543 }
2544 }
2545 }
2546 }
2547}
2548
2549/* Stubs/epilogue */
2550
2551void literal_pool(int n)
2552{
2553 if(!literalcount) return;
2554 if(n) {
2555 if((int)out-literals[0][0]<4096-n) return;
2556 }
2557 u_int *ptr;
2558 int i;
2559 for(i=0;i<literalcount;i++)
2560 {
2561 ptr=(u_int *)literals[i][0];
2562 u_int offset=(u_int)out-(u_int)ptr-8;
2563 assert(offset<4096);
2564 assert(!(offset&3));
2565 *ptr|=offset;
2566 output_w32(literals[i][1]);
2567 }
2568 literalcount=0;
2569}
2570
2571void literal_pool_jumpover(int n)
2572{
2573 if(!literalcount) return;
2574 if(n) {
2575 if((int)out-literals[0][0]<4096-n) return;
2576 }
2577 int jaddr=(int)out;
2578 emit_jmp(0);
2579 literal_pool(0);
2580 set_jump_target(jaddr,(int)out);
2581}
2582
2583emit_extjump2(int addr, int target, int linker)
2584{
2585 u_char *ptr=(u_char *)addr;
2586 assert((ptr[3]&0x0e)==0xa);
2587 emit_loadlp(target,0);
2588 emit_loadlp(addr,1);
24385cae 2589 assert(addr>=BASE_ADDR&&addr<(BASE_ADDR+(1<<TARGET_SIZE_2)));
57871462 2590 //assert((target>=0x80000000&&target<0x80800000)||(target>0xA4000000&&target<0xA4001000));
2591//DEBUG >
2592#ifdef DEBUG_CYCLE_COUNT
2593 emit_readword((int)&last_count,ECX);
2594 emit_add(HOST_CCREG,ECX,HOST_CCREG);
2595 emit_readword((int)&next_interupt,ECX);
2596 emit_writeword(HOST_CCREG,(int)&Count);
2597 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
2598 emit_writeword(ECX,(int)&last_count);
2599#endif
2600//DEBUG <
2601 emit_jmp(linker);
2602}
2603
2604emit_extjump(int addr, int target)
2605{
2606 emit_extjump2(addr, target, (int)dyna_linker);
2607}
2608emit_extjump_ds(int addr, int target)
2609{
2610 emit_extjump2(addr, target, (int)dyna_linker_ds);
2611}
2612
cbbab9cd 2613#ifdef PCSX
2614#include "pcsxmem_inline.c"
2615#endif
2616
57871462 2617do_readstub(int n)
2618{
2619 assem_debug("do_readstub %x\n",start+stubs[n][3]*4);
2620 literal_pool(256);
2621 set_jump_target(stubs[n][1],(int)out);
2622 int type=stubs[n][0];
2623 int i=stubs[n][3];
2624 int rs=stubs[n][4];
2625 struct regstat *i_regs=(struct regstat *)stubs[n][5];
2626 u_int reglist=stubs[n][7];
2627 signed char *i_regmap=i_regs->regmap;
2628 int addr=get_reg(i_regmap,AGEN1+(i&1));
2629 int rth,rt;
2630 int ds;
b9b61529 2631 if(itype[i]==C1LS||itype[i]==C2LS||itype[i]==LOADLR) {
57871462 2632 rth=get_reg(i_regmap,FTEMP|64);
2633 rt=get_reg(i_regmap,FTEMP);
2634 }else{
2635 rth=get_reg(i_regmap,rt1[i]|64);
2636 rt=get_reg(i_regmap,rt1[i]);
2637 }
2638 assert(rs>=0);
57871462 2639 if(addr<0) addr=rt;
535d208a 2640 if(addr<0&&itype[i]!=C1LS&&itype[i]!=C2LS&&itype[i]!=LOADLR) addr=get_reg(i_regmap,-1);
57871462 2641 assert(addr>=0);
2642 int ftable=0;
2643 if(type==LOADB_STUB||type==LOADBU_STUB)
2644 ftable=(int)readmemb;
2645 if(type==LOADH_STUB||type==LOADHU_STUB)
2646 ftable=(int)readmemh;
2647 if(type==LOADW_STUB)
2648 ftable=(int)readmem;
24385cae 2649#ifndef FORCE32
57871462 2650 if(type==LOADD_STUB)
2651 ftable=(int)readmemd;
24385cae 2652#endif
2653 assert(ftable!=0);
57871462 2654 emit_writeword(rs,(int)&address);
2655 //emit_pusha();
2656 save_regs(reglist);
97a238a6 2657#ifndef PCSX
57871462 2658 ds=i_regs!=&regs[i];
2659 int real_rs=(itype[i]==LOADLR)?-1:get_reg(i_regmap,rs1[i]);
2660 u_int cmask=ds?-1:(0x100f|~i_regs->wasconst);
2661 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&0x100f,i);
2662 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty&cmask&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs)));
2663 if(!ds) wb_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&~0x100f,i);
97a238a6 2664#endif
57871462 2665 emit_shrimm(rs,16,1);
2666 int cc=get_reg(i_regmap,CCREG);
2667 if(cc<0) {
2668 emit_loadreg(CCREG,2);
2669 }
2670 emit_movimm(ftable,0);
2671 emit_addimm(cc<0?2:cc,2*stubs[n][6]+2,2);
f51dc36c 2672#ifndef PCSX
57871462 2673 emit_movimm(start+stubs[n][3]*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
f51dc36c 2674#endif
57871462 2675 //emit_readword((int)&last_count,temp);
2676 //emit_add(cc,temp,cc);
2677 //emit_writeword(cc,(int)&Count);
2678 //emit_mov(15,14);
2679 emit_call((int)&indirect_jump_indexed);
2680 //emit_callreg(rs);
2681 //emit_readword_dualindexedx4(rs,HOST_TEMPREG,15);
f51dc36c 2682#ifndef PCSX
57871462 2683 // We really shouldn't need to update the count here,
2684 // but not doing so causes random crashes...
2685 emit_readword((int)&Count,HOST_TEMPREG);
2686 emit_readword((int)&next_interupt,2);
2687 emit_addimm(HOST_TEMPREG,-2*stubs[n][6]-2,HOST_TEMPREG);
2688 emit_writeword(2,(int)&last_count);
2689 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2690 if(cc<0) {
2691 emit_storereg(CCREG,HOST_TEMPREG);
2692 }
f51dc36c 2693#endif
57871462 2694 //emit_popa();
2695 restore_regs(reglist);
2696 //if((cc=get_reg(regmap,CCREG))>=0) {
2697 // emit_loadreg(CCREG,cc);
2698 //}
f18c0f46 2699 if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) {
2700 assert(rt>=0);
2701 if(type==LOADB_STUB)
2702 emit_movsbl((int)&readmem_dword,rt);
2703 if(type==LOADBU_STUB)
2704 emit_movzbl((int)&readmem_dword,rt);
2705 if(type==LOADH_STUB)
2706 emit_movswl((int)&readmem_dword,rt);
2707 if(type==LOADHU_STUB)
2708 emit_movzwl((int)&readmem_dword,rt);
2709 if(type==LOADW_STUB)
2710 emit_readword((int)&readmem_dword,rt);
2711 if(type==LOADD_STUB) {
2712 emit_readword((int)&readmem_dword,rt);
2713 if(rth>=0) emit_readword(((int)&readmem_dword)+4,rth);
2714 }
57871462 2715 }
2716 emit_jmp(stubs[n][2]); // return address
2717}
2718
2719inline_readstub(int type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
2720{
2721 int rs=get_reg(regmap,target);
2722 int rth=get_reg(regmap,target|64);
2723 int rt=get_reg(regmap,target);
535d208a 2724 if(rs<0) rs=get_reg(regmap,-1);
57871462 2725 assert(rs>=0);
57871462 2726 int ftable=0;
2727 if(type==LOADB_STUB||type==LOADBU_STUB)
2728 ftable=(int)readmemb;
2729 if(type==LOADH_STUB||type==LOADHU_STUB)
2730 ftable=(int)readmemh;
2731 if(type==LOADW_STUB)
2732 ftable=(int)readmem;
24385cae 2733#ifndef FORCE32
57871462 2734 if(type==LOADD_STUB)
2735 ftable=(int)readmemd;
24385cae 2736#endif
2737 assert(ftable!=0);
cbbab9cd 2738#ifdef PCSX
2739 if(pcsx_direct_read(type,addr,target?rs:-1,rt))
2740 return;
2741#endif
fd99c415 2742 if(target==0)
2743 emit_movimm(addr,rs);
57871462 2744 emit_writeword(rs,(int)&address);
2745 //emit_pusha();
2746 save_regs(reglist);
0c1fe38b 2747#ifndef PCSX
2748 if((signed int)addr>=(signed int)0xC0000000) {
2749 // Theoretically we can have a pagefault here, if the TLB has never
2750 // been enabled and the address is outside the range 80000000..BFFFFFFF
2751 // Write out the registers so the pagefault can be handled. This is
2752 // a very rare case and likely represents a bug.
2753 int ds=regmap!=regs[i].regmap;
2754 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty,i);
2755 if(!ds) wb_dirtys(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty);
2756 else wb_dirtys(branch_regs[i-1].regmap_entry,branch_regs[i-1].was32,branch_regs[i-1].wasdirty);
2757 }
2758#endif
57871462 2759 //emit_shrimm(rs,16,1);
2760 int cc=get_reg(regmap,CCREG);
2761 if(cc<0) {
2762 emit_loadreg(CCREG,2);
2763 }
2764 //emit_movimm(ftable,0);
2765 emit_movimm(((u_int *)ftable)[addr>>16],0);
2766 //emit_readword((int)&last_count,12);
2767 emit_addimm(cc<0?2:cc,CLOCK_DIVIDER*(adj+1),2);
f51dc36c 2768#ifndef PCSX
57871462 2769 if((signed int)addr>=(signed int)0xC0000000) {
2770 // Pagefault address
2771 int ds=regmap!=regs[i].regmap;
2772 emit_movimm(start+i*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
2773 }
f51dc36c 2774#endif
57871462 2775 //emit_add(12,2,2);
2776 //emit_writeword(2,(int)&Count);
2777 //emit_call(((u_int *)ftable)[addr>>16]);
2778 emit_call((int)&indirect_jump);
f51dc36c 2779#ifndef PCSX
57871462 2780 // We really shouldn't need to update the count here,
2781 // but not doing so causes random crashes...
2782 emit_readword((int)&Count,HOST_TEMPREG);
2783 emit_readword((int)&next_interupt,2);
2784 emit_addimm(HOST_TEMPREG,-CLOCK_DIVIDER*(adj+1),HOST_TEMPREG);
2785 emit_writeword(2,(int)&last_count);
2786 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2787 if(cc<0) {
2788 emit_storereg(CCREG,HOST_TEMPREG);
2789 }
f51dc36c 2790#endif
57871462 2791 //emit_popa();
2792 restore_regs(reglist);
fd99c415 2793 if(rt>=0) {
2794 if(type==LOADB_STUB)
2795 emit_movsbl((int)&readmem_dword,rt);
2796 if(type==LOADBU_STUB)
2797 emit_movzbl((int)&readmem_dword,rt);
2798 if(type==LOADH_STUB)
2799 emit_movswl((int)&readmem_dword,rt);
2800 if(type==LOADHU_STUB)
2801 emit_movzwl((int)&readmem_dword,rt);
2802 if(type==LOADW_STUB)
2803 emit_readword((int)&readmem_dword,rt);
2804 if(type==LOADD_STUB) {
2805 emit_readword((int)&readmem_dword,rt);
2806 if(rth>=0) emit_readword(((int)&readmem_dword)+4,rth);
2807 }
57871462 2808 }
2809}
2810
2811do_writestub(int n)
2812{
2813 assem_debug("do_writestub %x\n",start+stubs[n][3]*4);
2814 literal_pool(256);
2815 set_jump_target(stubs[n][1],(int)out);
2816 int type=stubs[n][0];
2817 int i=stubs[n][3];
2818 int rs=stubs[n][4];
2819 struct regstat *i_regs=(struct regstat *)stubs[n][5];
2820 u_int reglist=stubs[n][7];
2821 signed char *i_regmap=i_regs->regmap;
2822 int addr=get_reg(i_regmap,AGEN1+(i&1));
2823 int rth,rt,r;
2824 int ds;
b9b61529 2825 if(itype[i]==C1LS||itype[i]==C2LS) {
57871462 2826 rth=get_reg(i_regmap,FTEMP|64);
2827 rt=get_reg(i_regmap,r=FTEMP);
2828 }else{
2829 rth=get_reg(i_regmap,rs2[i]|64);
2830 rt=get_reg(i_regmap,r=rs2[i]);
2831 }
2832 assert(rs>=0);
2833 assert(rt>=0);
2834 if(addr<0) addr=get_reg(i_regmap,-1);
2835 assert(addr>=0);
2836 int ftable=0;
2837 if(type==STOREB_STUB)
2838 ftable=(int)writememb;
2839 if(type==STOREH_STUB)
2840 ftable=(int)writememh;
2841 if(type==STOREW_STUB)
2842 ftable=(int)writemem;
24385cae 2843#ifndef FORCE32
57871462 2844 if(type==STORED_STUB)
2845 ftable=(int)writememd;
24385cae 2846#endif
2847 assert(ftable!=0);
57871462 2848 emit_writeword(rs,(int)&address);
2849 //emit_shrimm(rs,16,rs);
2850 //emit_movmem_indexedx4(ftable,rs,rs);
2851 if(type==STOREB_STUB)
2852 emit_writebyte(rt,(int)&byte);
2853 if(type==STOREH_STUB)
2854 emit_writehword(rt,(int)&hword);
2855 if(type==STOREW_STUB)
2856 emit_writeword(rt,(int)&word);
2857 if(type==STORED_STUB) {
3d624f89 2858#ifndef FORCE32
57871462 2859 emit_writeword(rt,(int)&dword);
2860 emit_writeword(r?rth:rt,(int)&dword+4);
3d624f89 2861#else
2862 printf("STORED_STUB\n");
2863#endif
57871462 2864 }
2865 //emit_pusha();
2866 save_regs(reglist);
97a238a6 2867#ifndef PCSX
57871462 2868 ds=i_regs!=&regs[i];
2869 int real_rs=get_reg(i_regmap,rs1[i]);
2870 u_int cmask=ds?-1:(0x100f|~i_regs->wasconst);
2871 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&0x100f,i);
2872 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty&cmask&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs)));
2873 if(!ds) wb_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&~0x100f,i);
97a238a6 2874#endif
57871462 2875 emit_shrimm(rs,16,1);
2876 int cc=get_reg(i_regmap,CCREG);
2877 if(cc<0) {
2878 emit_loadreg(CCREG,2);
2879 }
2880 emit_movimm(ftable,0);
2881 emit_addimm(cc<0?2:cc,2*stubs[n][6]+2,2);
f51dc36c 2882#ifndef PCSX
57871462 2883 emit_movimm(start+stubs[n][3]*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
f51dc36c 2884#endif
57871462 2885 //emit_readword((int)&last_count,temp);
2886 //emit_addimm(cc,2*stubs[n][5]+2,cc);
2887 //emit_add(cc,temp,cc);
2888 //emit_writeword(cc,(int)&Count);
2889 emit_call((int)&indirect_jump_indexed);
2890 //emit_callreg(rs);
2891 emit_readword((int)&Count,HOST_TEMPREG);
2892 emit_readword((int)&next_interupt,2);
2893 emit_addimm(HOST_TEMPREG,-2*stubs[n][6]-2,HOST_TEMPREG);
2894 emit_writeword(2,(int)&last_count);
2895 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2896 if(cc<0) {
2897 emit_storereg(CCREG,HOST_TEMPREG);
2898 }
2899 //emit_popa();
2900 restore_regs(reglist);
2901 //if((cc=get_reg(regmap,CCREG))>=0) {
2902 // emit_loadreg(CCREG,cc);
2903 //}
2904 emit_jmp(stubs[n][2]); // return address
2905}
2906
2907inline_writestub(int type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
2908{
2909 int rs=get_reg(regmap,-1);
2910 int rth=get_reg(regmap,target|64);
2911 int rt=get_reg(regmap,target);
2912 assert(rs>=0);
2913 assert(rt>=0);
cbbab9cd 2914#ifdef PCSX
2915 if(pcsx_direct_write(type,addr,rs,rt,regmap))
2916 return;
2917#endif
57871462 2918 int ftable=0;
2919 if(type==STOREB_STUB)
2920 ftable=(int)writememb;
2921 if(type==STOREH_STUB)
2922 ftable=(int)writememh;
2923 if(type==STOREW_STUB)
2924 ftable=(int)writemem;
24385cae 2925#ifndef FORCE32
57871462 2926 if(type==STORED_STUB)
2927 ftable=(int)writememd;
24385cae 2928#endif
2929 assert(ftable!=0);
57871462 2930 emit_writeword(rs,(int)&address);
2931 //emit_shrimm(rs,16,rs);
2932 //emit_movmem_indexedx4(ftable,rs,rs);
2933 if(type==STOREB_STUB)
2934 emit_writebyte(rt,(int)&byte);
2935 if(type==STOREH_STUB)
2936 emit_writehword(rt,(int)&hword);
2937 if(type==STOREW_STUB)
2938 emit_writeword(rt,(int)&word);
2939 if(type==STORED_STUB) {
3d624f89 2940#ifndef FORCE32
57871462 2941 emit_writeword(rt,(int)&dword);
2942 emit_writeword(target?rth:rt,(int)&dword+4);
3d624f89 2943#else
2944 printf("STORED_STUB\n");
2945#endif
57871462 2946 }
2947 //emit_pusha();
2948 save_regs(reglist);
0c1fe38b 2949#ifndef PCSX
2950 // rearmed note: load_all_consts prevents BIOS boot, some bug?
2951 if((signed int)addr>=(signed int)0xC0000000) {
2952 // Theoretically we can have a pagefault here, if the TLB has never
2953 // been enabled and the address is outside the range 80000000..BFFFFFFF
2954 // Write out the registers so the pagefault can be handled. This is
2955 // a very rare case and likely represents a bug.
2956 int ds=regmap!=regs[i].regmap;
2957 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty,i);
2958 if(!ds) wb_dirtys(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty);
2959 else wb_dirtys(branch_regs[i-1].regmap_entry,branch_regs[i-1].was32,branch_regs[i-1].wasdirty);
2960 }
2961#endif
57871462 2962 //emit_shrimm(rs,16,1);
2963 int cc=get_reg(regmap,CCREG);
2964 if(cc<0) {
2965 emit_loadreg(CCREG,2);
2966 }
2967 //emit_movimm(ftable,0);
2968 emit_movimm(((u_int *)ftable)[addr>>16],0);
2969 //emit_readword((int)&last_count,12);
2970 emit_addimm(cc<0?2:cc,CLOCK_DIVIDER*(adj+1),2);
f51dc36c 2971#ifndef PCSX
57871462 2972 if((signed int)addr>=(signed int)0xC0000000) {
2973 // Pagefault address
2974 int ds=regmap!=regs[i].regmap;
2975 emit_movimm(start+i*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
2976 }
f51dc36c 2977#endif
57871462 2978 //emit_add(12,2,2);
2979 //emit_writeword(2,(int)&Count);
2980 //emit_call(((u_int *)ftable)[addr>>16]);
2981 emit_call((int)&indirect_jump);
2982 emit_readword((int)&Count,HOST_TEMPREG);
2983 emit_readword((int)&next_interupt,2);
2984 emit_addimm(HOST_TEMPREG,-CLOCK_DIVIDER*(adj+1),HOST_TEMPREG);
2985 emit_writeword(2,(int)&last_count);
2986 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2987 if(cc<0) {
2988 emit_storereg(CCREG,HOST_TEMPREG);
2989 }
2990 //emit_popa();
2991 restore_regs(reglist);
2992}
2993
2994do_unalignedwritestub(int n)
2995{
b7918751 2996 assem_debug("do_unalignedwritestub %x\n",start+stubs[n][3]*4);
2997 literal_pool(256);
57871462 2998 set_jump_target(stubs[n][1],(int)out);
b7918751 2999
3000 int i=stubs[n][3];
3001 struct regstat *i_regs=(struct regstat *)stubs[n][4];
3002 int addr=stubs[n][5];
3003 u_int reglist=stubs[n][7];
3004 signed char *i_regmap=i_regs->regmap;
3005 int temp2=get_reg(i_regmap,FTEMP);
3006 int rt;
3007 int ds, real_rs;
3008 rt=get_reg(i_regmap,rs2[i]);
3009 assert(rt>=0);
3010 assert(addr>=0);
3011 assert(opcode[i]==0x2a||opcode[i]==0x2e); // SWL/SWR only implemented
3012 reglist|=(1<<addr);
3013 reglist&=~(1<<temp2);
3014
3015 emit_andimm(addr,0xfffffffc,temp2);
3016 emit_writeword(temp2,(int)&address);
3017
3018 save_regs(reglist);
97a238a6 3019#ifndef PCSX
b7918751 3020 ds=i_regs!=&regs[i];
3021 real_rs=get_reg(i_regmap,rs1[i]);
3022 u_int cmask=ds?-1:(0x100f|~i_regs->wasconst);
3023 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&0x100f,i);
3024 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty&cmask&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs)));
3025 if(!ds) wb_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&~0x100f,i);
97a238a6 3026#endif
b7918751 3027 emit_shrimm(addr,16,1);
3028 int cc=get_reg(i_regmap,CCREG);
3029 if(cc<0) {
3030 emit_loadreg(CCREG,2);
3031 }
3032 emit_movimm((u_int)readmem,0);
3033 emit_addimm(cc<0?2:cc,2*stubs[n][6]+2,2);
f51dc36c 3034#ifndef PCSX
3035 // pagefault address
3036 emit_movimm(start+stubs[n][3]*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
3037#endif
b7918751 3038 emit_call((int)&indirect_jump_indexed);
3039 restore_regs(reglist);
3040
3041 emit_readword((int)&readmem_dword,temp2);
3042 int temp=addr; //hmh
3043 emit_shlimm(addr,3,temp);
3044 emit_andimm(temp,24,temp);
3045#ifdef BIG_ENDIAN_MIPS
3046 if (opcode[i]==0x2e) // SWR
3047#else
3048 if (opcode[i]==0x2a) // SWL
3049#endif
3050 emit_xorimm(temp,24,temp);
3051 emit_movimm(-1,HOST_TEMPREG);
55439448 3052 if (opcode[i]==0x2a) { // SWL
b7918751 3053 emit_bic_lsr(temp2,HOST_TEMPREG,temp,temp2);
3054 emit_orrshr(rt,temp,temp2);
3055 }else{
3056 emit_bic_lsl(temp2,HOST_TEMPREG,temp,temp2);
3057 emit_orrshl(rt,temp,temp2);
3058 }
3059 emit_readword((int)&address,addr);
3060 emit_writeword(temp2,(int)&word);
3061 //save_regs(reglist); // don't need to, no state changes
3062 emit_shrimm(addr,16,1);
3063 emit_movimm((u_int)writemem,0);
3064 //emit_call((int)&indirect_jump_indexed);
3065 emit_mov(15,14);
3066 emit_readword_dualindexedx4(0,1,15);
3067 emit_readword((int)&Count,HOST_TEMPREG);
3068 emit_readword((int)&next_interupt,2);
3069 emit_addimm(HOST_TEMPREG,-2*stubs[n][6]-2,HOST_TEMPREG);
3070 emit_writeword(2,(int)&last_count);
3071 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
3072 if(cc<0) {
3073 emit_storereg(CCREG,HOST_TEMPREG);
3074 }
3075 restore_regs(reglist);
57871462 3076 emit_jmp(stubs[n][2]); // return address
3077}
3078
3079void printregs(int edi,int esi,int ebp,int esp,int b,int d,int c,int a)
3080{
3081 printf("regs: %x %x %x %x %x %x %x (%x)\n",a,b,c,d,ebp,esi,edi,(&edi)[-1]);
3082}
3083
3084do_invstub(int n)
3085{
3086 literal_pool(20);
3087 u_int reglist=stubs[n][3];
3088 set_jump_target(stubs[n][1],(int)out);
3089 save_regs(reglist);
3090 if(stubs[n][4]!=0) emit_mov(stubs[n][4],0);
3091 emit_call((int)&invalidate_addr);
3092 restore_regs(reglist);
3093 emit_jmp(stubs[n][2]); // return address
3094}
3095
3096int do_dirty_stub(int i)
3097{
3098 assem_debug("do_dirty_stub %x\n",start+i*4);
ac545b3a 3099 u_int addr=(int)start<(int)0xC0000000?(u_int)source:(u_int)start;
3100 #ifdef PCSX
3101 addr=(u_int)source;
3102 #endif
57871462 3103 // Careful about the code output here, verify_dirty needs to parse it.
3104 #ifdef ARMv5_ONLY
ac545b3a 3105 emit_loadlp(addr,1);
57871462 3106 emit_loadlp((int)copy,2);
3107 emit_loadlp(slen*4,3);
3108 #else
ac545b3a 3109 emit_movw(addr&0x0000FFFF,1);
57871462 3110 emit_movw(((u_int)copy)&0x0000FFFF,2);
ac545b3a 3111 emit_movt(addr&0xFFFF0000,1);
57871462 3112 emit_movt(((u_int)copy)&0xFFFF0000,2);
3113 emit_movw(slen*4,3);
3114 #endif
3115 emit_movimm(start+i*4,0);
3116 emit_call((int)start<(int)0xC0000000?(int)&verify_code:(int)&verify_code_vm);
3117 int entry=(int)out;
3118 load_regs_entry(i);
3119 if(entry==(int)out) entry=instr_addr[i];
3120 emit_jmp(instr_addr[i]);
3121 return entry;
3122}
3123
3124void do_dirty_stub_ds()
3125{
3126 // Careful about the code output here, verify_dirty needs to parse it.
3127 #ifdef ARMv5_ONLY
3128 emit_loadlp((int)start<(int)0xC0000000?(int)source:(int)start,1);
3129 emit_loadlp((int)copy,2);
3130 emit_loadlp(slen*4,3);
3131 #else
3132 emit_movw(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0x0000FFFF,1);
3133 emit_movw(((u_int)copy)&0x0000FFFF,2);
3134 emit_movt(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0xFFFF0000,1);
3135 emit_movt(((u_int)copy)&0xFFFF0000,2);
3136 emit_movw(slen*4,3);
3137 #endif
3138 emit_movimm(start+1,0);
3139 emit_call((int)&verify_code_ds);
3140}
3141
3142do_cop1stub(int n)
3143{
3144 literal_pool(256);
3145 assem_debug("do_cop1stub %x\n",start+stubs[n][3]*4);
3146 set_jump_target(stubs[n][1],(int)out);
3147 int i=stubs[n][3];
3d624f89 3148// int rs=stubs[n][4];
57871462 3149 struct regstat *i_regs=(struct regstat *)stubs[n][5];
3150 int ds=stubs[n][6];
3151 if(!ds) {
3152 load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty,i);
3153 //if(i_regs!=&regs[i]) printf("oops: regs[i]=%x i_regs=%x",(int)&regs[i],(int)i_regs);
3154 }
3155 //else {printf("fp exception in delay slot\n");}
3156 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty);
3157 if(regs[i].regmap_entry[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
3158 emit_movimm(start+(i-ds)*4,EAX); // Get PC
3159 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG); // CHECK: is this right? There should probably be an extra cycle...
3160 emit_jmp(ds?(int)fp_exception_ds:(int)fp_exception);
3161}
3162
3163/* TLB */
3164
3165int do_tlb_r(int s,int ar,int map,int x,int a,int shift,int c,u_int addr)
3166{
3167 if(c) {
3168 if((signed int)addr>=(signed int)0xC0000000) {
3169 // address_generation already loaded the const
3170 emit_readword_dualindexedx4(FP,map,map);
3171 }
3172 else
3173 return -1; // No mapping
3174 }
3175 else {
3176 assert(s!=map);
3177 emit_movimm(((int)memory_map-(int)&dynarec_local)>>2,map);
3178 emit_addsr12(map,s,map);
3179 // Schedule this while we wait on the load
3180 //if(x) emit_xorimm(s,x,ar);
3181 if(shift>=0) emit_shlimm(s,3,shift);
3182 if(~a) emit_andimm(s,a,ar);
3183 emit_readword_dualindexedx4(FP,map,map);
3184 }
3185 return map;
3186}
3187int do_tlb_r_branch(int map, int c, u_int addr, int *jaddr)
3188{
3189 if(!c||(signed int)addr>=(signed int)0xC0000000) {
3190 emit_test(map,map);
3191 *jaddr=(int)out;
3192 emit_js(0);
3193 }
3194 return map;
3195}
3196
3197int gen_tlb_addr_r(int ar, int map) {
3198 if(map>=0) {
3199 assem_debug("add %s,%s,%s lsl #2\n",regname[ar],regname[ar],regname[map]);
3200 output_w32(0xe0800100|rd_rn_rm(ar,ar,map));
3201 }
3202}
3203
3204int do_tlb_w(int s,int ar,int map,int x,int c,u_int addr)
3205{
3206 if(c) {
3207 if(addr<0x80800000||addr>=0xC0000000) {
3208 // address_generation already loaded the const
3209 emit_readword_dualindexedx4(FP,map,map);
3210 }
3211 else
3212 return -1; // No mapping
3213 }
3214 else {
3215 assert(s!=map);
3216 emit_movimm(((int)memory_map-(int)&dynarec_local)>>2,map);
3217 emit_addsr12(map,s,map);
3218 // Schedule this while we wait on the load
3219 //if(x) emit_xorimm(s,x,ar);
3220 emit_readword_dualindexedx4(FP,map,map);
3221 }
3222 return map;
3223}
3224int do_tlb_w_branch(int map, int c, u_int addr, int *jaddr)
3225{
3226 if(!c||addr<0x80800000||addr>=0xC0000000) {
3227 emit_testimm(map,0x40000000);
3228 *jaddr=(int)out;
3229 emit_jne(0);
3230 }
3231}
3232
3233int gen_tlb_addr_w(int ar, int map) {
3234 if(map>=0) {
3235 assem_debug("add %s,%s,%s lsl #2\n",regname[ar],regname[ar],regname[map]);
3236 output_w32(0xe0800100|rd_rn_rm(ar,ar,map));
3237 }
3238}
3239
3240// Generate the address of the memory_map entry, relative to dynarec_local
3241generate_map_const(u_int addr,int reg) {
3242 //printf("generate_map_const(%x,%s)\n",addr,regname[reg]);
3243 emit_movimm((addr>>12)+(((u_int)memory_map-(u_int)&dynarec_local)>>2),reg);
3244}
3245
3246/* Special assem */
3247
3248void shift_assemble_arm(int i,struct regstat *i_regs)
3249{
3250 if(rt1[i]) {
3251 if(opcode2[i]<=0x07) // SLLV/SRLV/SRAV
3252 {
3253 signed char s,t,shift;
3254 t=get_reg(i_regs->regmap,rt1[i]);
3255 s=get_reg(i_regs->regmap,rs1[i]);
3256 shift=get_reg(i_regs->regmap,rs2[i]);
3257 if(t>=0){
3258 if(rs1[i]==0)
3259 {
3260 emit_zeroreg(t);
3261 }
3262 else if(rs2[i]==0)
3263 {
3264 assert(s>=0);
3265 if(s!=t) emit_mov(s,t);
3266 }
3267 else
3268 {
3269 emit_andimm(shift,31,HOST_TEMPREG);
3270 if(opcode2[i]==4) // SLLV
3271 {
3272 emit_shl(s,HOST_TEMPREG,t);
3273 }
3274 if(opcode2[i]==6) // SRLV
3275 {
3276 emit_shr(s,HOST_TEMPREG,t);
3277 }
3278 if(opcode2[i]==7) // SRAV
3279 {
3280 emit_sar(s,HOST_TEMPREG,t);
3281 }
3282 }
3283 }
3284 } else { // DSLLV/DSRLV/DSRAV
3285 signed char sh,sl,th,tl,shift;
3286 th=get_reg(i_regs->regmap,rt1[i]|64);
3287 tl=get_reg(i_regs->regmap,rt1[i]);
3288 sh=get_reg(i_regs->regmap,rs1[i]|64);
3289 sl=get_reg(i_regs->regmap,rs1[i]);
3290 shift=get_reg(i_regs->regmap,rs2[i]);
3291 if(tl>=0){
3292 if(rs1[i]==0)
3293 {
3294 emit_zeroreg(tl);
3295 if(th>=0) emit_zeroreg(th);
3296 }
3297 else if(rs2[i]==0)
3298 {
3299 assert(sl>=0);
3300 if(sl!=tl) emit_mov(sl,tl);
3301 if(th>=0&&sh!=th) emit_mov(sh,th);
3302 }
3303 else
3304 {
3305 // FIXME: What if shift==tl ?
3306 assert(shift!=tl);
3307 int temp=get_reg(i_regs->regmap,-1);
3308 int real_th=th;
3309 if(th<0&&opcode2[i]!=0x14) {th=temp;} // DSLLV doesn't need a temporary register
3310 assert(sl>=0);
3311 assert(sh>=0);
3312 emit_andimm(shift,31,HOST_TEMPREG);
3313 if(opcode2[i]==0x14) // DSLLV
3314 {
3315 if(th>=0) emit_shl(sh,HOST_TEMPREG,th);
3316 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3317 emit_orrshr(sl,HOST_TEMPREG,th);
3318 emit_andimm(shift,31,HOST_TEMPREG);
3319 emit_testimm(shift,32);
3320 emit_shl(sl,HOST_TEMPREG,tl);
3321 if(th>=0) emit_cmovne_reg(tl,th);
3322 emit_cmovne_imm(0,tl);
3323 }
3324 if(opcode2[i]==0x16) // DSRLV
3325 {
3326 assert(th>=0);
3327 emit_shr(sl,HOST_TEMPREG,tl);
3328 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3329 emit_orrshl(sh,HOST_TEMPREG,tl);
3330 emit_andimm(shift,31,HOST_TEMPREG);
3331 emit_testimm(shift,32);
3332 emit_shr(sh,HOST_TEMPREG,th);
3333 emit_cmovne_reg(th,tl);
3334 if(real_th>=0) emit_cmovne_imm(0,th);
3335 }
3336 if(opcode2[i]==0x17) // DSRAV
3337 {
3338 assert(th>=0);
3339 emit_shr(sl,HOST_TEMPREG,tl);
3340 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3341 if(real_th>=0) {
3342 assert(temp>=0);
3343 emit_sarimm(th,31,temp);
3344 }
3345 emit_orrshl(sh,HOST_TEMPREG,tl);
3346 emit_andimm(shift,31,HOST_TEMPREG);
3347 emit_testimm(shift,32);
3348 emit_sar(sh,HOST_TEMPREG,th);
3349 emit_cmovne_reg(th,tl);
3350 if(real_th>=0) emit_cmovne_reg(temp,th);
3351 }
3352 }
3353 }
3354 }
3355 }
3356}
3357#define shift_assemble shift_assemble_arm
3358
3359void loadlr_assemble_arm(int i,struct regstat *i_regs)
3360{
3361 int s,th,tl,temp,temp2,addr,map=-1;
3362 int offset;
3363 int jaddr=0;
af4ee1fe 3364 int memtarget=0,c=0;
57871462 3365 u_int hr,reglist=0;
3366 th=get_reg(i_regs->regmap,rt1[i]|64);
3367 tl=get_reg(i_regs->regmap,rt1[i]);
3368 s=get_reg(i_regs->regmap,rs1[i]);
3369 temp=get_reg(i_regs->regmap,-1);
3370 temp2=get_reg(i_regs->regmap,FTEMP);
3371 addr=get_reg(i_regs->regmap,AGEN1+(i&1));
3372 assert(addr<0);
3373 offset=imm[i];
3374 for(hr=0;hr<HOST_REGS;hr++) {
3375 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
3376 }
3377 reglist|=1<<temp;
3378 if(offset||s<0||c) addr=temp2;
3379 else addr=s;
3380 if(s>=0) {
3381 c=(i_regs->wasconst>>s)&1;
af4ee1fe 3382 if(c) {
3383 memtarget=((signed int)(constmap[i][s]+offset))<(signed int)0x80000000+RAM_SIZE;
3384 if(using_tlb&&((signed int)(constmap[i][s]+offset))>=(signed int)0xC0000000) memtarget=1;
3385 }
57871462 3386 }
535d208a 3387 if(!using_tlb) {
3388 if(!c) {
3389 #ifdef RAM_OFFSET
3390 map=get_reg(i_regs->regmap,ROREG);
3391 if(map<0) emit_loadreg(ROREG,map=HOST_TEMPREG);
3392 #endif
3393 emit_shlimm(addr,3,temp);
3394 if (opcode[i]==0x22||opcode[i]==0x26) {
3395 emit_andimm(addr,0xFFFFFFFC,temp2); // LWL/LWR
57871462 3396 }else{
535d208a 3397 emit_andimm(addr,0xFFFFFFF8,temp2); // LDL/LDR
57871462 3398 }
535d208a 3399 emit_cmpimm(addr,RAM_SIZE);
3400 jaddr=(int)out;
3401 emit_jno(0);
3402 }
3403 else {
3404 if (opcode[i]==0x22||opcode[i]==0x26) {
3405 emit_movimm(((constmap[i][s]+offset)<<3)&24,temp); // LWL/LWR
3406 }else{
3407 emit_movimm(((constmap[i][s]+offset)<<3)&56,temp); // LDL/LDR
57871462 3408 }
57871462 3409 }
535d208a 3410 }else{ // using tlb
3411 int a;
3412 if(c) {
3413 a=-1;
3414 }else if (opcode[i]==0x22||opcode[i]==0x26) {
3415 a=0xFFFFFFFC; // LWL/LWR
3416 }else{
3417 a=0xFFFFFFF8; // LDL/LDR
3418 }
3419 map=get_reg(i_regs->regmap,TLREG);
3420 assert(map>=0);
ea3d2e6e 3421 reglist&=~(1<<map);
535d208a 3422 map=do_tlb_r(addr,temp2,map,0,a,c?-1:temp,c,constmap[i][s]+offset);
3423 if(c) {
3424 if (opcode[i]==0x22||opcode[i]==0x26) {
3425 emit_movimm(((constmap[i][s]+offset)<<3)&24,temp); // LWL/LWR
3426 }else{
3427 emit_movimm(((constmap[i][s]+offset)<<3)&56,temp); // LDL/LDR
57871462 3428 }
535d208a 3429 }
3430 do_tlb_r_branch(map,c,constmap[i][s]+offset,&jaddr);
3431 }
3432 if (opcode[i]==0x22||opcode[i]==0x26) { // LWL/LWR
3433 if(!c||memtarget) {
3434 //emit_readword_indexed((int)rdram-0x80000000,temp2,temp2);
3435 emit_readword_indexed_tlb(0,temp2,map,temp2);
3436 if(jaddr) add_stub(LOADW_STUB,jaddr,(int)out,i,temp2,(int)i_regs,ccadj[i],reglist);
3437 }
3438 else
3439 inline_readstub(LOADW_STUB,i,(constmap[i][s]+offset)&0xFFFFFFFC,i_regs->regmap,FTEMP,ccadj[i],reglist);
3440 if(rt1[i]) {
3441 assert(tl>=0);
57871462 3442 emit_andimm(temp,24,temp);
2002a1db 3443#ifdef BIG_ENDIAN_MIPS
3444 if (opcode[i]==0x26) // LWR
3445#else
3446 if (opcode[i]==0x22) // LWL
3447#endif
3448 emit_xorimm(temp,24,temp);
57871462 3449 emit_movimm(-1,HOST_TEMPREG);
3450 if (opcode[i]==0x26) {
3451 emit_shr(temp2,temp,temp2);
3452 emit_bic_lsr(tl,HOST_TEMPREG,temp,tl);
3453 }else{
3454 emit_shl(temp2,temp,temp2);
3455 emit_bic_lsl(tl,HOST_TEMPREG,temp,tl);
3456 }
3457 emit_or(temp2,tl,tl);
57871462 3458 }
535d208a 3459 //emit_storereg(rt1[i],tl); // DEBUG
3460 }
3461 if (opcode[i]==0x1A||opcode[i]==0x1B) { // LDL/LDR
3462 // FIXME: little endian
3463 int temp2h=get_reg(i_regs->regmap,FTEMP|64);
3464 if(!c||memtarget) {
3465 //if(th>=0) emit_readword_indexed((int)rdram-0x80000000,temp2,temp2h);
3466 //emit_readword_indexed((int)rdram-0x7FFFFFFC,temp2,temp2);
3467 emit_readdword_indexed_tlb(0,temp2,map,temp2h,temp2);
3468 if(jaddr) add_stub(LOADD_STUB,jaddr,(int)out,i,temp2,(int)i_regs,ccadj[i],reglist);
3469 }
3470 else
3471 inline_readstub(LOADD_STUB,i,(constmap[i][s]+offset)&0xFFFFFFF8,i_regs->regmap,FTEMP,ccadj[i],reglist);
3472 if(rt1[i]) {
3473 assert(th>=0);
3474 assert(tl>=0);
57871462 3475 emit_testimm(temp,32);
3476 emit_andimm(temp,24,temp);
3477 if (opcode[i]==0x1A) { // LDL
3478 emit_rsbimm(temp,32,HOST_TEMPREG);
3479 emit_shl(temp2h,temp,temp2h);
3480 emit_orrshr(temp2,HOST_TEMPREG,temp2h);
3481 emit_movimm(-1,HOST_TEMPREG);
3482 emit_shl(temp2,temp,temp2);
3483 emit_cmove_reg(temp2h,th);
3484 emit_biceq_lsl(tl,HOST_TEMPREG,temp,tl);
3485 emit_bicne_lsl(th,HOST_TEMPREG,temp,th);
3486 emit_orreq(temp2,tl,tl);
3487 emit_orrne(temp2,th,th);
3488 }
3489 if (opcode[i]==0x1B) { // LDR
3490 emit_xorimm(temp,24,temp);
3491 emit_rsbimm(temp,32,HOST_TEMPREG);
3492 emit_shr(temp2,temp,temp2);
3493 emit_orrshl(temp2h,HOST_TEMPREG,temp2);
3494 emit_movimm(-1,HOST_TEMPREG);
3495 emit_shr(temp2h,temp,temp2h);
3496 emit_cmovne_reg(temp2,tl);
3497 emit_bicne_lsr(th,HOST_TEMPREG,temp,th);
3498 emit_biceq_lsr(tl,HOST_TEMPREG,temp,tl);
3499 emit_orrne(temp2h,th,th);
3500 emit_orreq(temp2h,tl,tl);
3501 }
3502 }
3503 }
3504}
3505#define loadlr_assemble loadlr_assemble_arm
3506
3507void cop0_assemble(int i,struct regstat *i_regs)
3508{
3509 if(opcode2[i]==0) // MFC0
3510 {
3511 signed char t=get_reg(i_regs->regmap,rt1[i]);
3512 char copr=(source[i]>>11)&0x1f;
3513 //assert(t>=0); // Why does this happen? OOT is weird
f1b3b369 3514 if(t>=0&&rt1[i]!=0) {
7139f3c8 3515#ifdef MUPEN64
57871462 3516 emit_addimm(FP,(int)&fake_pc-(int)&dynarec_local,0);
3517 emit_movimm((source[i]>>11)&0x1f,1);
3518 emit_writeword(0,(int)&PC);
3519 emit_writebyte(1,(int)&(fake_pc.f.r.nrd));
3520 if(copr==9) {
3521 emit_readword((int)&last_count,ECX);
3522 emit_loadreg(CCREG,HOST_CCREG); // TODO: do proper reg alloc
3523 emit_add(HOST_CCREG,ECX,HOST_CCREG);
3524 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG);
3525 emit_writeword(HOST_CCREG,(int)&Count);
3526 }
3527 emit_call((int)MFC0);
3528 emit_readword((int)&readmem_dword,t);
7139f3c8 3529#else
3530 emit_readword((int)&reg_cop0+copr*4,t);
3531#endif
57871462 3532 }
3533 }
3534 else if(opcode2[i]==4) // MTC0
3535 {
3536 signed char s=get_reg(i_regs->regmap,rs1[i]);
3537 char copr=(source[i]>>11)&0x1f;
3538 assert(s>=0);
3539 emit_writeword(s,(int)&readmem_dword);
3540 wb_register(rs1[i],i_regs->regmap,i_regs->dirty,i_regs->is32);
fca1aef2 3541#ifdef MUPEN64
57871462 3542 emit_addimm(FP,(int)&fake_pc-(int)&dynarec_local,0);
3543 emit_movimm((source[i]>>11)&0x1f,1);
3544 emit_writeword(0,(int)&PC);
3545 emit_writebyte(1,(int)&(fake_pc.f.r.nrd));
7139f3c8 3546#endif
3547 if(copr==9||copr==11||copr==12||copr==13) {
57871462 3548 emit_readword((int)&last_count,ECX);
3549 emit_loadreg(CCREG,HOST_CCREG); // TODO: do proper reg alloc
3550 emit_add(HOST_CCREG,ECX,HOST_CCREG);
3551 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG);
3552 emit_writeword(HOST_CCREG,(int)&Count);
3553 }
3554 // What a mess. The status register (12) can enable interrupts,
3555 // so needs a special case to handle a pending interrupt.
3556 // The interrupt must be taken immediately, because a subsequent
3557 // instruction might disable interrupts again.
7139f3c8 3558 if(copr==12||copr==13) {
fca1aef2 3559#ifdef PCSX
3560 if (is_delayslot) {
3561 // burn cycles to cause cc_interrupt, which will
3562 // reschedule next_interupt. Relies on CCREG from above.
3563 assem_debug("MTC0 DS %d\n", copr);
3564 emit_writeword(HOST_CCREG,(int)&last_count);
3565 emit_movimm(0,HOST_CCREG);
3566 emit_storereg(CCREG,HOST_CCREG);
3567 emit_movimm(copr,0);
3568 emit_call((int)pcsx_mtc0_ds);
3569 return;
3570 }
3571#endif
57871462 3572 emit_movimm(start+i*4+4,0);
3573 emit_movimm(0,1);
3574 emit_writeword(0,(int)&pcaddr);
3575 emit_writeword(1,(int)&pending_exception);
3576 }
3577 //else if(copr==12&&is_delayslot) emit_call((int)MTC0_R12);
3578 //else
fca1aef2 3579#ifdef PCSX
3580 emit_movimm(copr,0);
3581 emit_call((int)pcsx_mtc0);
3582#else
57871462 3583 emit_call((int)MTC0);
fca1aef2 3584#endif
7139f3c8 3585 if(copr==9||copr==11||copr==12||copr==13) {
57871462 3586 emit_readword((int)&Count,HOST_CCREG);
3587 emit_readword((int)&next_interupt,ECX);
3588 emit_addimm(HOST_CCREG,-CLOCK_DIVIDER*ccadj[i],HOST_CCREG);
3589 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
3590 emit_writeword(ECX,(int)&last_count);
3591 emit_storereg(CCREG,HOST_CCREG);
3592 }
7139f3c8 3593 if(copr==12||copr==13) {
57871462 3594 assert(!is_delayslot);
3595 emit_readword((int)&pending_exception,14);
3596 }
3597 emit_loadreg(rs1[i],s);
3598 if(get_reg(i_regs->regmap,rs1[i]|64)>=0)
3599 emit_loadreg(rs1[i]|64,get_reg(i_regs->regmap,rs1[i]|64));
7139f3c8 3600 if(copr==12||copr==13) {
57871462 3601 emit_test(14,14);
3602 emit_jne((int)&do_interrupt);
3603 }
3604 cop1_usable=0;
3605 }
3606 else
3607 {
3608 assert(opcode2[i]==0x10);
3d624f89 3609#ifndef DISABLE_TLB
57871462 3610 if((source[i]&0x3f)==0x01) // TLBR
3611 emit_call((int)TLBR);
3612 if((source[i]&0x3f)==0x02) // TLBWI
3613 emit_call((int)TLBWI_new);
3614 if((source[i]&0x3f)==0x06) { // TLBWR
3615 // The TLB entry written by TLBWR is dependent on the count,
3616 // so update the cycle count
3617 emit_readword((int)&last_count,ECX);
3618 if(i_regs->regmap[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
3619 emit_add(HOST_CCREG,ECX,HOST_CCREG);
3620 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG);
3621 emit_writeword(HOST_CCREG,(int)&Count);
3622 emit_call((int)TLBWR_new);
3623 }
3624 if((source[i]&0x3f)==0x08) // TLBP
3625 emit_call((int)TLBP);
3d624f89 3626#endif
576bbd8f 3627#ifdef PCSX
3628 if((source[i]&0x3f)==0x10) // RFE
3629 {
3630 emit_readword((int)&Status,0);
3631 emit_andimm(0,0x3c,1);
3632 emit_andimm(0,~0xf,0);
3633 emit_orrshr_imm(1,2,0);
3634 emit_writeword(0,(int)&Status);
3635 }
3636#else
57871462 3637 if((source[i]&0x3f)==0x18) // ERET
3638 {
3639 int count=ccadj[i];
3640 if(i_regs->regmap[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
3641 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*count,HOST_CCREG); // TODO: Should there be an extra cycle here?
3642 emit_jmp((int)jump_eret);
3643 }
576bbd8f 3644#endif
57871462 3645 }
3646}
3647
b9b61529 3648static void cop2_get_dreg(u_int copr,signed char tl,signed char temp)
3649{
3650 switch (copr) {
3651 case 1:
3652 case 3:
3653 case 5:
3654 case 8:
3655 case 9:
3656 case 10:
3657 case 11:
3658 emit_readword((int)&reg_cop2d[copr],tl);
3659 emit_signextend16(tl,tl);
3660 emit_writeword(tl,(int)&reg_cop2d[copr]); // hmh
3661 break;
3662 case 7:
3663 case 16:
3664 case 17:
3665 case 18:
3666 case 19:
3667 emit_readword((int)&reg_cop2d[copr],tl);
3668 emit_andimm(tl,0xffff,tl);
3669 emit_writeword(tl,(int)&reg_cop2d[copr]);
3670 break;
3671 case 15:
3672 emit_readword((int)&reg_cop2d[14],tl); // SXY2
3673 emit_writeword(tl,(int)&reg_cop2d[copr]);
3674 break;
3675 case 28:
b9b61529 3676 case 29:
3677 emit_readword((int)&reg_cop2d[9],temp);
3678 emit_testimm(temp,0x8000); // do we need this?
3679 emit_andimm(temp,0xf80,temp);
3680 emit_andne_imm(temp,0,temp);
f70d384d 3681 emit_shrimm(temp,7,tl);
b9b61529 3682 emit_readword((int)&reg_cop2d[10],temp);
3683 emit_testimm(temp,0x8000);
3684 emit_andimm(temp,0xf80,temp);
3685 emit_andne_imm(temp,0,temp);
f70d384d 3686 emit_orrshr_imm(temp,2,tl);
b9b61529 3687 emit_readword((int)&reg_cop2d[11],temp);
3688 emit_testimm(temp,0x8000);
3689 emit_andimm(temp,0xf80,temp);
3690 emit_andne_imm(temp,0,temp);
f70d384d 3691 emit_orrshl_imm(temp,3,tl);
b9b61529 3692 emit_writeword(tl,(int)&reg_cop2d[copr]);
3693 break;
3694 default:
3695 emit_readword((int)&reg_cop2d[copr],tl);
3696 break;
3697 }
3698}
3699
3700static void cop2_put_dreg(u_int copr,signed char sl,signed char temp)
3701{
3702 switch (copr) {
3703 case 15:
3704 emit_readword((int)&reg_cop2d[13],temp); // SXY1
3705 emit_writeword(sl,(int)&reg_cop2d[copr]);
3706 emit_writeword(temp,(int)&reg_cop2d[12]); // SXY0
3707 emit_readword((int)&reg_cop2d[14],temp); // SXY2
3708 emit_writeword(sl,(int)&reg_cop2d[14]);
3709 emit_writeword(temp,(int)&reg_cop2d[13]); // SXY1
3710 break;
3711 case 28:
3712 emit_andimm(sl,0x001f,temp);
f70d384d 3713 emit_shlimm(temp,7,temp);
b9b61529 3714 emit_writeword(temp,(int)&reg_cop2d[9]);
3715 emit_andimm(sl,0x03e0,temp);
f70d384d 3716 emit_shlimm(temp,2,temp);
b9b61529 3717 emit_writeword(temp,(int)&reg_cop2d[10]);
3718 emit_andimm(sl,0x7c00,temp);
f70d384d 3719 emit_shrimm(temp,3,temp);
b9b61529 3720 emit_writeword(temp,(int)&reg_cop2d[11]);
3721 emit_writeword(sl,(int)&reg_cop2d[28]);
3722 break;
3723 case 30:
3724 emit_movs(sl,temp);
3725 emit_mvnmi(temp,temp);
3726 emit_clz(temp,temp);
3727 emit_writeword(sl,(int)&reg_cop2d[30]);
3728 emit_writeword(temp,(int)&reg_cop2d[31]);
3729 break;
b9b61529 3730 case 31:
3731 break;
3732 default:
3733 emit_writeword(sl,(int)&reg_cop2d[copr]);
3734 break;
3735 }
3736}
3737
3738void cop2_assemble(int i,struct regstat *i_regs)
3739{
3740 u_int copr=(source[i]>>11)&0x1f;
3741 signed char temp=get_reg(i_regs->regmap,-1);
3742 if (opcode2[i]==0) { // MFC2
3743 signed char tl=get_reg(i_regs->regmap,rt1[i]);
f1b3b369 3744 if(tl>=0&&rt1[i]!=0)
b9b61529 3745 cop2_get_dreg(copr,tl,temp);
3746 }
3747 else if (opcode2[i]==4) { // MTC2
3748 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3749 cop2_put_dreg(copr,sl,temp);
3750 }
3751 else if (opcode2[i]==2) // CFC2
3752 {
3753 signed char tl=get_reg(i_regs->regmap,rt1[i]);
f1b3b369 3754 if(tl>=0&&rt1[i]!=0)
b9b61529 3755 emit_readword((int)&reg_cop2c[copr],tl);
3756 }
3757 else if (opcode2[i]==6) // CTC2
3758 {
3759 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3760 switch(copr) {
3761 case 4:
3762 case 12:
3763 case 20:
3764 case 26:
3765 case 27:
3766 case 29:
3767 case 30:
3768 emit_signextend16(sl,temp);
3769 break;
3770 case 31:
3771 //value = value & 0x7ffff000;
3772 //if (value & 0x7f87e000) value |= 0x80000000;
3773 emit_shrimm(sl,12,temp);
3774 emit_shlimm(temp,12,temp);
3775 emit_testimm(temp,0x7f000000);
3776 emit_testeqimm(temp,0x00870000);
3777 emit_testeqimm(temp,0x0000e000);
3778 emit_orrne_imm(temp,0x80000000,temp);
3779 break;
3780 default:
3781 temp=sl;
3782 break;
3783 }
3784 emit_writeword(temp,(int)&reg_cop2c[copr]);
3785 assert(sl>=0);
3786 }
3787}
3788
3789void c2op_assemble(int i,struct regstat *i_regs)
3790{
3791 signed char temp=get_reg(i_regs->regmap,-1);
3792 u_int c2op=source[i]&0x3f;
3793 u_int hr,reglist=0;
3794 for(hr=0;hr<HOST_REGS;hr++) {
3795 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
3796 }
3797 if(i==0||itype[i-1]!=C2OP)
3798 save_regs(reglist);
3799
3800 if (gte_handlers[c2op]!=NULL) {
3801 int cc=get_reg(i_regs->regmap,CCREG);
009faf24 3802 emit_movimm(source[i],1); // opcode
b9b61529 3803 if (cc>=0&&gte_cycletab[c2op])
009faf24 3804 emit_addimm(cc,gte_cycletab[c2op]/2,cc); // XXX: could just adjust ccadj?
3805 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0); // cop2 regs
3806 emit_writeword(1,(int)&psxRegs.code);
b9b61529 3807 emit_call((int)gte_handlers[c2op]);
3808 }
3809
3810 if(i>=slen-1||itype[i+1]!=C2OP)
3811 restore_regs(reglist);
3812}
3813
3814void cop1_unusable(int i,struct regstat *i_regs)
3d624f89 3815{
3816 // XXX: should just just do the exception instead
3817 if(!cop1_usable) {
3818 int jaddr=(int)out;
3819 emit_jmp(0);
3820 add_stub(FP_STUB,jaddr,(int)out,i,0,(int)i_regs,is_delayslot,0);
3821 cop1_usable=1;
3822 }
3823}
3824
57871462 3825void cop1_assemble(int i,struct regstat *i_regs)
3826{
3d624f89 3827#ifndef DISABLE_COP1
57871462 3828 // Check cop1 unusable
3829 if(!cop1_usable) {
3830 signed char rs=get_reg(i_regs->regmap,CSREG);
3831 assert(rs>=0);
3832 emit_testimm(rs,0x20000000);
3833 int jaddr=(int)out;
3834 emit_jeq(0);
3835 add_stub(FP_STUB,jaddr,(int)out,i,rs,(int)i_regs,is_delayslot,0);
3836 cop1_usable=1;
3837 }
3838 if (opcode2[i]==0) { // MFC1
3839 signed char tl=get_reg(i_regs->regmap,rt1[i]);
3840 if(tl>=0) {
3841 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],tl);
3842 emit_readword_indexed(0,tl,tl);
3843 }
3844 }
3845 else if (opcode2[i]==1) { // DMFC1
3846 signed char tl=get_reg(i_regs->regmap,rt1[i]);
3847 signed char th=get_reg(i_regs->regmap,rt1[i]|64);
3848 if(tl>=0) {
3849 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],tl);
3850 if(th>=0) emit_readword_indexed(4,tl,th);
3851 emit_readword_indexed(0,tl,tl);
3852 }
3853 }
3854 else if (opcode2[i]==4) { // MTC1
3855 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3856 signed char temp=get_reg(i_regs->regmap,-1);
3857 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
3858 emit_writeword_indexed(sl,0,temp);
3859 }
3860 else if (opcode2[i]==5) { // DMTC1
3861 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3862 signed char sh=rs1[i]>0?get_reg(i_regs->regmap,rs1[i]|64):sl;
3863 signed char temp=get_reg(i_regs->regmap,-1);
3864 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
3865 emit_writeword_indexed(sh,4,temp);
3866 emit_writeword_indexed(sl,0,temp);
3867 }
3868 else if (opcode2[i]==2) // CFC1
3869 {
3870 signed char tl=get_reg(i_regs->regmap,rt1[i]);
3871 if(tl>=0) {
3872 u_int copr=(source[i]>>11)&0x1f;
3873 if(copr==0) emit_readword((int)&FCR0,tl);
3874 if(copr==31) emit_readword((int)&FCR31,tl);
3875 }
3876 }
3877 else if (opcode2[i]==6) // CTC1
3878 {
3879 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3880 u_int copr=(source[i]>>11)&0x1f;
3881 assert(sl>=0);
3882 if(copr==31)
3883 {
3884 emit_writeword(sl,(int)&FCR31);
3885 // Set the rounding mode
3886 //FIXME
3887 //char temp=get_reg(i_regs->regmap,-1);
3888 //emit_andimm(sl,3,temp);
3889 //emit_fldcw_indexed((int)&rounding_modes,temp);
3890 }
3891 }
3d624f89 3892#else
3893 cop1_unusable(i, i_regs);
3894#endif
57871462 3895}
3896
3897void fconv_assemble_arm(int i,struct regstat *i_regs)
3898{
3d624f89 3899#ifndef DISABLE_COP1
57871462 3900 signed char temp=get_reg(i_regs->regmap,-1);
3901 assert(temp>=0);
3902 // Check cop1 unusable
3903 if(!cop1_usable) {
3904 signed char rs=get_reg(i_regs->regmap,CSREG);
3905 assert(rs>=0);
3906 emit_testimm(rs,0x20000000);
3907 int jaddr=(int)out;
3908 emit_jeq(0);
3909 add_stub(FP_STUB,jaddr,(int)out,i,rs,(int)i_regs,is_delayslot,0);
3910 cop1_usable=1;
3911 }
3912
3913 #if(defined(__VFP_FP__) && !defined(__SOFTFP__))
3914 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0d) { // trunc_w_s
3915 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
3916 emit_flds(temp,15);
3917 emit_ftosizs(15,15); // float->int, truncate
3918 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f))
3919 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
3920 emit_fsts(15,temp);
3921 return;
3922 }
3923 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0d) { // trunc_w_d
3924 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
3925 emit_vldr(temp,7);
3926 emit_ftosizd(7,13); // double->int, truncate
3927 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
3928 emit_fsts(13,temp);
3929 return;
3930 }
3931
3932 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x20) { // cvt_s_w
3933 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
3934 emit_flds(temp,13);
3935 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f))
3936 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
3937 emit_fsitos(13,15);
3938 emit_fsts(15,temp);
3939 return;
3940 }
3941 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x21) { // cvt_d_w
3942 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
3943 emit_flds(temp,13);
3944 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
3945 emit_fsitod(13,7);
3946 emit_vstr(7,temp);
3947 return;
3948 }
3949
3950 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x21) { // cvt_d_s
3951 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
3952 emit_flds(temp,13);
3953 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
3954 emit_fcvtds(13,7);
3955 emit_vstr(7,temp);
3956 return;
3957 }
3958 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x20) { // cvt_s_d
3959 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
3960 emit_vldr(temp,7);
3961 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
3962 emit_fcvtsd(7,13);
3963 emit_fsts(13,temp);
3964 return;
3965 }
3966 #endif
3967
3968 // C emulation code
3969
3970 u_int hr,reglist=0;
3971 for(hr=0;hr<HOST_REGS;hr++) {
3972 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
3973 }
3974 save_regs(reglist);
3975
3976 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x20) {
3977 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3978 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3979 emit_call((int)cvt_s_w);
3980 }
3981 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x21) {
3982 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3983 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3984 emit_call((int)cvt_d_w);
3985 }
3986 if(opcode2[i]==0x15&&(source[i]&0x3f)==0x20) {
3987 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3988 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3989 emit_call((int)cvt_s_l);
3990 }
3991 if(opcode2[i]==0x15&&(source[i]&0x3f)==0x21) {
3992 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3993 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3994 emit_call((int)cvt_d_l);
3995 }
3996
3997 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x21) {
3998 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3999 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4000 emit_call((int)cvt_d_s);
4001 }
4002 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x24) {
4003 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4004 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4005 emit_call((int)cvt_w_s);
4006 }
4007 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x25) {
4008 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4009 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4010 emit_call((int)cvt_l_s);
4011 }
4012
4013 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x20) {
4014 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4015 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4016 emit_call((int)cvt_s_d);
4017 }
4018 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x24) {
4019 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4020 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4021 emit_call((int)cvt_w_d);
4022 }
4023 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x25) {
4024 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4025 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4026 emit_call((int)cvt_l_d);
4027 }
4028
4029 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x08) {
4030 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4031 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4032 emit_call((int)round_l_s);
4033 }
4034 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x09) {
4035 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4036 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4037 emit_call((int)trunc_l_s);
4038 }
4039 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0a) {
4040 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4041 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4042 emit_call((int)ceil_l_s);
4043 }
4044 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0b) {
4045 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4046 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4047 emit_call((int)floor_l_s);
4048 }
4049 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0c) {
4050 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4051 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4052 emit_call((int)round_w_s);
4053 }
4054 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0d) {
4055 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4056 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4057 emit_call((int)trunc_w_s);
4058 }
4059 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0e) {
4060 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4061 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4062 emit_call((int)ceil_w_s);
4063 }
4064 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0f) {
4065 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4066 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4067 emit_call((int)floor_w_s);
4068 }
4069
4070 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x08) {
4071 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4072 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4073 emit_call((int)round_l_d);
4074 }
4075 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x09) {
4076 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4077 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4078 emit_call((int)trunc_l_d);
4079 }
4080 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0a) {
4081 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4082 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4083 emit_call((int)ceil_l_d);
4084 }
4085 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0b) {
4086 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4087 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4088 emit_call((int)floor_l_d);
4089 }
4090 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0c) {
4091 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4092 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4093 emit_call((int)round_w_d);
4094 }
4095 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0d) {
4096 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4097 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4098 emit_call((int)trunc_w_d);
4099 }
4100 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0e) {
4101 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4102 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4103 emit_call((int)ceil_w_d);
4104 }
4105 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0f) {
4106 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4107 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4108 emit_call((int)floor_w_d);
4109 }
4110
4111 restore_regs(reglist);
3d624f89 4112#else
4113 cop1_unusable(i, i_regs);
4114#endif
57871462 4115}
4116#define fconv_assemble fconv_assemble_arm
4117
4118void fcomp_assemble(int i,struct regstat *i_regs)
4119{
3d624f89 4120#ifndef DISABLE_COP1
57871462 4121 signed char fs=get_reg(i_regs->regmap,FSREG);
4122 signed char temp=get_reg(i_regs->regmap,-1);
4123 assert(temp>=0);
4124 // Check cop1 unusable
4125 if(!cop1_usable) {
4126 signed char cs=get_reg(i_regs->regmap,CSREG);
4127 assert(cs>=0);
4128 emit_testimm(cs,0x20000000);
4129 int jaddr=(int)out;
4130 emit_jeq(0);
4131 add_stub(FP_STUB,jaddr,(int)out,i,cs,(int)i_regs,is_delayslot,0);
4132 cop1_usable=1;
4133 }
4134
4135 if((source[i]&0x3f)==0x30) {
4136 emit_andimm(fs,~0x800000,fs);
4137 return;
4138 }
4139
4140 if((source[i]&0x3e)==0x38) {
4141 // sf/ngle - these should throw exceptions for NaNs
4142 emit_andimm(fs,~0x800000,fs);
4143 return;
4144 }
4145
4146 #if(defined(__VFP_FP__) && !defined(__SOFTFP__))
4147 if(opcode2[i]==0x10) {
4148 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4149 emit_readword((int)&reg_cop1_simple[(source[i]>>16)&0x1f],HOST_TEMPREG);
4150 emit_orimm(fs,0x800000,fs);
4151 emit_flds(temp,14);
4152 emit_flds(HOST_TEMPREG,15);
4153 emit_fcmps(14,15);
4154 emit_fmstat();
4155 if((source[i]&0x3f)==0x31) emit_bicvc_imm(fs,0x800000,fs); // c_un_s
4156 if((source[i]&0x3f)==0x32) emit_bicne_imm(fs,0x800000,fs); // c_eq_s
4157 if((source[i]&0x3f)==0x33) {emit_bicne_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ueq_s
4158 if((source[i]&0x3f)==0x34) emit_biccs_imm(fs,0x800000,fs); // c_olt_s
4159 if((source[i]&0x3f)==0x35) {emit_biccs_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ult_s
4160 if((source[i]&0x3f)==0x36) emit_bichi_imm(fs,0x800000,fs); // c_ole_s
4161 if((source[i]&0x3f)==0x37) {emit_bichi_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ule_s
4162 if((source[i]&0x3f)==0x3a) emit_bicne_imm(fs,0x800000,fs); // c_seq_s
4163 if((source[i]&0x3f)==0x3b) emit_bicne_imm(fs,0x800000,fs); // c_ngl_s
4164 if((source[i]&0x3f)==0x3c) emit_biccs_imm(fs,0x800000,fs); // c_lt_s
4165 if((source[i]&0x3f)==0x3d) emit_biccs_imm(fs,0x800000,fs); // c_nge_s
4166 if((source[i]&0x3f)==0x3e) emit_bichi_imm(fs,0x800000,fs); // c_le_s
4167 if((source[i]&0x3f)==0x3f) emit_bichi_imm(fs,0x800000,fs); // c_ngt_s
4168 return;
4169 }
4170 if(opcode2[i]==0x11) {
4171 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
4172 emit_readword((int)&reg_cop1_double[(source[i]>>16)&0x1f],HOST_TEMPREG);
4173 emit_orimm(fs,0x800000,fs);
4174 emit_vldr(temp,6);
4175 emit_vldr(HOST_TEMPREG,7);
4176 emit_fcmpd(6,7);
4177 emit_fmstat();
4178 if((source[i]&0x3f)==0x31) emit_bicvc_imm(fs,0x800000,fs); // c_un_d
4179 if((source[i]&0x3f)==0x32) emit_bicne_imm(fs,0x800000,fs); // c_eq_d
4180 if((source[i]&0x3f)==0x33) {emit_bicne_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ueq_d
4181 if((source[i]&0x3f)==0x34) emit_biccs_imm(fs,0x800000,fs); // c_olt_d
4182 if((source[i]&0x3f)==0x35) {emit_biccs_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ult_d
4183 if((source[i]&0x3f)==0x36) emit_bichi_imm(fs,0x800000,fs); // c_ole_d
4184 if((source[i]&0x3f)==0x37) {emit_bichi_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ule_d
4185 if((source[i]&0x3f)==0x3a) emit_bicne_imm(fs,0x800000,fs); // c_seq_d
4186 if((source[i]&0x3f)==0x3b) emit_bicne_imm(fs,0x800000,fs); // c_ngl_d
4187 if((source[i]&0x3f)==0x3c) emit_biccs_imm(fs,0x800000,fs); // c_lt_d
4188 if((source[i]&0x3f)==0x3d) emit_biccs_imm(fs,0x800000,fs); // c_nge_d
4189 if((source[i]&0x3f)==0x3e) emit_bichi_imm(fs,0x800000,fs); // c_le_d
4190 if((source[i]&0x3f)==0x3f) emit_bichi_imm(fs,0x800000,fs); // c_ngt_d
4191 return;
4192 }
4193 #endif
4194
4195 // C only
4196
4197 u_int hr,reglist=0;
4198 for(hr=0;hr<HOST_REGS;hr++) {
4199 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
4200 }
4201 reglist&=~(1<<fs);
4202 save_regs(reglist);
4203 if(opcode2[i]==0x10) {
4204 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4205 emit_readword((int)&reg_cop1_simple[(source[i]>>16)&0x1f],ARG2_REG);
4206 if((source[i]&0x3f)==0x30) emit_call((int)c_f_s);
4207 if((source[i]&0x3f)==0x31) emit_call((int)c_un_s);
4208 if((source[i]&0x3f)==0x32) emit_call((int)c_eq_s);
4209 if((source[i]&0x3f)==0x33) emit_call((int)c_ueq_s);
4210 if((source[i]&0x3f)==0x34) emit_call((int)c_olt_s);
4211 if((source[i]&0x3f)==0x35) emit_call((int)c_ult_s);
4212 if((source[i]&0x3f)==0x36) emit_call((int)c_ole_s);
4213 if((source[i]&0x3f)==0x37) emit_call((int)c_ule_s);
4214 if((source[i]&0x3f)==0x38) emit_call((int)c_sf_s);
4215 if((source[i]&0x3f)==0x39) emit_call((int)c_ngle_s);
4216 if((source[i]&0x3f)==0x3a) emit_call((int)c_seq_s);
4217 if((source[i]&0x3f)==0x3b) emit_call((int)c_ngl_s);
4218 if((source[i]&0x3f)==0x3c) emit_call((int)c_lt_s);
4219 if((source[i]&0x3f)==0x3d) emit_call((int)c_nge_s);
4220 if((source[i]&0x3f)==0x3e) emit_call((int)c_le_s);
4221 if((source[i]&0x3f)==0x3f) emit_call((int)c_ngt_s);
4222 }
4223 if(opcode2[i]==0x11) {
4224 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4225 emit_readword((int)&reg_cop1_double[(source[i]>>16)&0x1f],ARG2_REG);
4226 if((source[i]&0x3f)==0x30) emit_call((int)c_f_d);
4227 if((source[i]&0x3f)==0x31) emit_call((int)c_un_d);
4228 if((source[i]&0x3f)==0x32) emit_call((int)c_eq_d);
4229 if((source[i]&0x3f)==0x33) emit_call((int)c_ueq_d);
4230 if((source[i]&0x3f)==0x34) emit_call((int)c_olt_d);
4231 if((source[i]&0x3f)==0x35) emit_call((int)c_ult_d);
4232 if((source[i]&0x3f)==0x36) emit_call((int)c_ole_d);
4233 if((source[i]&0x3f)==0x37) emit_call((int)c_ule_d);
4234 if((source[i]&0x3f)==0x38) emit_call((int)c_sf_d);
4235 if((source[i]&0x3f)==0x39) emit_call((int)c_ngle_d);
4236 if((source[i]&0x3f)==0x3a) emit_call((int)c_seq_d);
4237 if((source[i]&0x3f)==0x3b) emit_call((int)c_ngl_d);
4238 if((source[i]&0x3f)==0x3c) emit_call((int)c_lt_d);
4239 if((source[i]&0x3f)==0x3d) emit_call((int)c_nge_d);
4240 if((source[i]&0x3f)==0x3e) emit_call((int)c_le_d);
4241 if((source[i]&0x3f)==0x3f) emit_call((int)c_ngt_d);
4242 }
4243 restore_regs(reglist);
4244 emit_loadreg(FSREG,fs);
3d624f89 4245#else
4246 cop1_unusable(i, i_regs);
4247#endif
57871462 4248}
4249
4250void float_assemble(int i,struct regstat *i_regs)
4251{
3d624f89 4252#ifndef DISABLE_COP1
57871462 4253 signed char temp=get_reg(i_regs->regmap,-1);
4254 assert(temp>=0);
4255 // Check cop1 unusable
4256 if(!cop1_usable) {
4257 signed char cs=get_reg(i_regs->regmap,CSREG);
4258 assert(cs>=0);
4259 emit_testimm(cs,0x20000000);
4260 int jaddr=(int)out;
4261 emit_jeq(0);
4262 add_stub(FP_STUB,jaddr,(int)out,i,cs,(int)i_regs,is_delayslot,0);
4263 cop1_usable=1;
4264 }
4265
4266 #if(defined(__VFP_FP__) && !defined(__SOFTFP__))
4267 if((source[i]&0x3f)==6) // mov
4268 {
4269 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4270 if(opcode2[i]==0x10) {
4271 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4272 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],HOST_TEMPREG);
4273 emit_readword_indexed(0,temp,temp);
4274 emit_writeword_indexed(temp,0,HOST_TEMPREG);
4275 }
4276 if(opcode2[i]==0x11) {
4277 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
4278 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],HOST_TEMPREG);
4279 emit_vldr(temp,7);
4280 emit_vstr(7,HOST_TEMPREG);
4281 }
4282 }
4283 return;
4284 }
4285
4286 if((source[i]&0x3f)>3)
4287 {
4288 if(opcode2[i]==0x10) {
4289 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4290 emit_flds(temp,15);
4291 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4292 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
4293 }
4294 if((source[i]&0x3f)==4) // sqrt
4295 emit_fsqrts(15,15);
4296 if((source[i]&0x3f)==5) // abs
4297 emit_fabss(15,15);
4298 if((source[i]&0x3f)==7) // neg
4299 emit_fnegs(15,15);
4300 emit_fsts(15,temp);
4301 }
4302 if(opcode2[i]==0x11) {
4303 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
4304 emit_vldr(temp,7);
4305 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4306 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
4307 }
4308 if((source[i]&0x3f)==4) // sqrt
4309 emit_fsqrtd(7,7);
4310 if((source[i]&0x3f)==5) // abs
4311 emit_fabsd(7,7);
4312 if((source[i]&0x3f)==7) // neg
4313 emit_fnegd(7,7);
4314 emit_vstr(7,temp);
4315 }
4316 return;
4317 }
4318 if((source[i]&0x3f)<4)
4319 {
4320 if(opcode2[i]==0x10) {
4321 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4322 }
4323 if(opcode2[i]==0x11) {
4324 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
4325 }
4326 if(((source[i]>>11)&0x1f)!=((source[i]>>16)&0x1f)) {
4327 if(opcode2[i]==0x10) {
4328 emit_readword((int)&reg_cop1_simple[(source[i]>>16)&0x1f],HOST_TEMPREG);
4329 emit_flds(temp,15);
4330 emit_flds(HOST_TEMPREG,13);
4331 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4332 if(((source[i]>>16)&0x1f)!=((source[i]>>6)&0x1f)) {
4333 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
4334 }
4335 }
4336 if((source[i]&0x3f)==0) emit_fadds(15,13,15);
4337 if((source[i]&0x3f)==1) emit_fsubs(15,13,15);
4338 if((source[i]&0x3f)==2) emit_fmuls(15,13,15);
4339 if((source[i]&0x3f)==3) emit_fdivs(15,13,15);
4340 if(((source[i]>>16)&0x1f)==((source[i]>>6)&0x1f)) {
4341 emit_fsts(15,HOST_TEMPREG);
4342 }else{
4343 emit_fsts(15,temp);
4344 }
4345 }
4346 else if(opcode2[i]==0x11) {
4347 emit_readword((int)&reg_cop1_double[(source[i]>>16)&0x1f],HOST_TEMPREG);
4348 emit_vldr(temp,7);
4349 emit_vldr(HOST_TEMPREG,6);
4350 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4351 if(((source[i]>>16)&0x1f)!=((source[i]>>6)&0x1f)) {
4352 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
4353 }
4354 }
4355 if((source[i]&0x3f)==0) emit_faddd(7,6,7);
4356 if((source[i]&0x3f)==1) emit_fsubd(7,6,7);
4357 if((source[i]&0x3f)==2) emit_fmuld(7,6,7);
4358 if((source[i]&0x3f)==3) emit_fdivd(7,6,7);
4359 if(((source[i]>>16)&0x1f)==((source[i]>>6)&0x1f)) {
4360 emit_vstr(7,HOST_TEMPREG);
4361 }else{
4362 emit_vstr(7,temp);
4363 }
4364 }
4365 }
4366 else {
4367 if(opcode2[i]==0x10) {
4368 emit_flds(temp,15);
4369 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4370 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
4371 }
4372 if((source[i]&0x3f)==0) emit_fadds(15,15,15);
4373 if((source[i]&0x3f)==1) emit_fsubs(15,15,15);
4374 if((source[i]&0x3f)==2) emit_fmuls(15,15,15);
4375 if((source[i]&0x3f)==3) emit_fdivs(15,15,15);
4376 emit_fsts(15,temp);
4377 }
4378 else if(opcode2[i]==0x11) {
4379 emit_vldr(temp,7);
4380 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4381 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
4382 }
4383 if((source[i]&0x3f)==0) emit_faddd(7,7,7);
4384 if((source[i]&0x3f)==1) emit_fsubd(7,7,7);
4385 if((source[i]&0x3f)==2) emit_fmuld(7,7,7);
4386 if((source[i]&0x3f)==3) emit_fdivd(7,7,7);
4387 emit_vstr(7,temp);
4388 }
4389 }
4390 return;
4391 }
4392 #endif
4393
4394 u_int hr,reglist=0;
4395 for(hr=0;hr<HOST_REGS;hr++) {
4396 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
4397 }
4398 if(opcode2[i]==0x10) { // Single precision
4399 save_regs(reglist);
4400 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4401 if((source[i]&0x3f)<4) {
4402 emit_readword((int)&reg_cop1_simple[(source[i]>>16)&0x1f],ARG2_REG);
4403 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG3_REG);
4404 }else{
4405 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4406 }
4407 switch(source[i]&0x3f)
4408 {
4409 case 0x00: emit_call((int)add_s);break;
4410 case 0x01: emit_call((int)sub_s);break;
4411 case 0x02: emit_call((int)mul_s);break;
4412 case 0x03: emit_call((int)div_s);break;
4413 case 0x04: emit_call((int)sqrt_s);break;
4414 case 0x05: emit_call((int)abs_s);break;
4415 case 0x06: emit_call((int)mov_s);break;
4416 case 0x07: emit_call((int)neg_s);break;
4417 }
4418 restore_regs(reglist);
4419 }
4420 if(opcode2[i]==0x11) { // Double precision
4421 save_regs(reglist);
4422 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4423 if((source[i]&0x3f)<4) {
4424 emit_readword((int)&reg_cop1_double[(source[i]>>16)&0x1f],ARG2_REG);
4425 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG3_REG);
4426 }else{
4427 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4428 }
4429 switch(source[i]&0x3f)
4430 {
4431 case 0x00: emit_call((int)add_d);break;
4432 case 0x01: emit_call((int)sub_d);break;
4433 case 0x02: emit_call((int)mul_d);break;
4434 case 0x03: emit_call((int)div_d);break;
4435 case 0x04: emit_call((int)sqrt_d);break;
4436 case 0x05: emit_call((int)abs_d);break;
4437 case 0x06: emit_call((int)mov_d);break;
4438 case 0x07: emit_call((int)neg_d);break;
4439 }
4440 restore_regs(reglist);
4441 }
3d624f89 4442#else
4443 cop1_unusable(i, i_regs);
4444#endif
57871462 4445}
4446
4447void multdiv_assemble_arm(int i,struct regstat *i_regs)
4448{
4449 // case 0x18: MULT
4450 // case 0x19: MULTU
4451 // case 0x1A: DIV
4452 // case 0x1B: DIVU
4453 // case 0x1C: DMULT
4454 // case 0x1D: DMULTU
4455 // case 0x1E: DDIV
4456 // case 0x1F: DDIVU
4457 if(rs1[i]&&rs2[i])
4458 {
4459 if((opcode2[i]&4)==0) // 32-bit
4460 {
4461 if(opcode2[i]==0x18) // MULT
4462 {
4463 signed char m1=get_reg(i_regs->regmap,rs1[i]);
4464 signed char m2=get_reg(i_regs->regmap,rs2[i]);
4465 signed char hi=get_reg(i_regs->regmap,HIREG);
4466 signed char lo=get_reg(i_regs->regmap,LOREG);
4467 assert(m1>=0);
4468 assert(m2>=0);
4469 assert(hi>=0);
4470 assert(lo>=0);
4471 emit_smull(m1,m2,hi,lo);
4472 }
4473 if(opcode2[i]==0x19) // MULTU
4474 {
4475 signed char m1=get_reg(i_regs->regmap,rs1[i]);
4476 signed char m2=get_reg(i_regs->regmap,rs2[i]);
4477 signed char hi=get_reg(i_regs->regmap,HIREG);
4478 signed char lo=get_reg(i_regs->regmap,LOREG);
4479 assert(m1>=0);
4480 assert(m2>=0);
4481 assert(hi>=0);
4482 assert(lo>=0);
4483 emit_umull(m1,m2,hi,lo);
4484 }
4485 if(opcode2[i]==0x1A) // DIV
4486 {
4487 signed char d1=get_reg(i_regs->regmap,rs1[i]);
4488 signed char d2=get_reg(i_regs->regmap,rs2[i]);
4489 assert(d1>=0);
4490 assert(d2>=0);
4491 signed char quotient=get_reg(i_regs->regmap,LOREG);
4492 signed char remainder=get_reg(i_regs->regmap,HIREG);
4493 assert(quotient>=0);
4494 assert(remainder>=0);
4495 emit_movs(d1,remainder);
44a80f6a 4496 emit_movimm(0xffffffff,quotient);
4497 emit_negmi(quotient,quotient); // .. quotient and ..
4498 emit_negmi(remainder,remainder); // .. remainder for div0 case (will be negated back after jump)
57871462 4499 emit_movs(d2,HOST_TEMPREG);
4500 emit_jeq((int)out+52); // Division by zero
4501 emit_negmi(HOST_TEMPREG,HOST_TEMPREG);
4502 emit_clz(HOST_TEMPREG,quotient);
4503 emit_shl(HOST_TEMPREG,quotient,HOST_TEMPREG);
4504 emit_orimm(quotient,1<<31,quotient);
4505 emit_shr(quotient,quotient,quotient);
4506 emit_cmp(remainder,HOST_TEMPREG);
4507 emit_subcs(remainder,HOST_TEMPREG,remainder);
4508 emit_adcs(quotient,quotient,quotient);
4509 emit_shrimm(HOST_TEMPREG,1,HOST_TEMPREG);
4510 emit_jcc((int)out-16); // -4
4511 emit_teq(d1,d2);
4512 emit_negmi(quotient,quotient);
4513 emit_test(d1,d1);
4514 emit_negmi(remainder,remainder);
4515 }
4516 if(opcode2[i]==0x1B) // DIVU
4517 {
4518 signed char d1=get_reg(i_regs->regmap,rs1[i]); // dividend
4519 signed char d2=get_reg(i_regs->regmap,rs2[i]); // divisor
4520 assert(d1>=0);
4521 assert(d2>=0);
4522 signed char quotient=get_reg(i_regs->regmap,LOREG);
4523 signed char remainder=get_reg(i_regs->regmap,HIREG);
4524 assert(quotient>=0);
4525 assert(remainder>=0);
44a80f6a 4526 emit_mov(d1,remainder);
4527 emit_movimm(0xffffffff,quotient); // div0 case
57871462 4528 emit_test(d2,d2);
44a80f6a 4529 emit_jeq((int)out+40); // Division by zero
57871462 4530 emit_clz(d2,HOST_TEMPREG);
4531 emit_movimm(1<<31,quotient);
4532 emit_shl(d2,HOST_TEMPREG,d2);
57871462 4533 emit_shr(quotient,HOST_TEMPREG,quotient);
4534 emit_cmp(remainder,d2);
4535 emit_subcs(remainder,d2,remainder);
4536 emit_adcs(quotient,quotient,quotient);
4537 emit_shrcc_imm(d2,1,d2);
4538 emit_jcc((int)out-16); // -4
4539 }
4540 }
4541 else // 64-bit
4542 {
4543 if(opcode2[i]==0x1C) // DMULT
4544 {
4545 assert(opcode2[i]!=0x1C);
4546 signed char m1h=get_reg(i_regs->regmap,rs1[i]|64);
4547 signed char m1l=get_reg(i_regs->regmap,rs1[i]);
4548 signed char m2h=get_reg(i_regs->regmap,rs2[i]|64);
4549 signed char m2l=get_reg(i_regs->regmap,rs2[i]);
4550 assert(m1h>=0);
4551 assert(m2h>=0);
4552 assert(m1l>=0);
4553 assert(m2l>=0);
4554 emit_pushreg(m2h);
4555 emit_pushreg(m2l);
4556 emit_pushreg(m1h);
4557 emit_pushreg(m1l);
4558 emit_call((int)&mult64);
4559 emit_popreg(m1l);
4560 emit_popreg(m1h);
4561 emit_popreg(m2l);
4562 emit_popreg(m2h);
4563 signed char hih=get_reg(i_regs->regmap,HIREG|64);
4564 signed char hil=get_reg(i_regs->regmap,HIREG);
4565 if(hih>=0) emit_loadreg(HIREG|64,hih);
4566 if(hil>=0) emit_loadreg(HIREG,hil);
4567 signed char loh=get_reg(i_regs->regmap,LOREG|64);
4568 signed char lol=get_reg(i_regs->regmap,LOREG);
4569 if(loh>=0) emit_loadreg(LOREG|64,loh);
4570 if(lol>=0) emit_loadreg(LOREG,lol);
4571 }
4572 if(opcode2[i]==0x1D) // DMULTU
4573 {
4574 signed char m1h=get_reg(i_regs->regmap,rs1[i]|64);
4575 signed char m1l=get_reg(i_regs->regmap,rs1[i]);
4576 signed char m2h=get_reg(i_regs->regmap,rs2[i]|64);
4577 signed char m2l=get_reg(i_regs->regmap,rs2[i]);
4578 assert(m1h>=0);
4579 assert(m2h>=0);
4580 assert(m1l>=0);
4581 assert(m2l>=0);
4582 save_regs(0x100f);
4583 if(m1l!=0) emit_mov(m1l,0);
4584 if(m1h==0) emit_readword((int)&dynarec_local,1);
4585 else if(m1h>1) emit_mov(m1h,1);
4586 if(m2l<2) emit_readword((int)&dynarec_local+m2l*4,2);
4587 else if(m2l>2) emit_mov(m2l,2);
4588 if(m2h<3) emit_readword((int)&dynarec_local+m2h*4,3);
4589 else if(m2h>3) emit_mov(m2h,3);
4590 emit_call((int)&multu64);
4591 restore_regs(0x100f);
4592 signed char hih=get_reg(i_regs->regmap,HIREG|64);
4593 signed char hil=get_reg(i_regs->regmap,HIREG);
4594 signed char loh=get_reg(i_regs->regmap,LOREG|64);
4595 signed char lol=get_reg(i_regs->regmap,LOREG);
4596 /*signed char temp=get_reg(i_regs->regmap,-1);
4597 signed char rh=get_reg(i_regs->regmap,HIREG|64);
4598 signed char rl=get_reg(i_regs->regmap,HIREG);
4599 assert(m1h>=0);
4600 assert(m2h>=0);
4601 assert(m1l>=0);
4602 assert(m2l>=0);
4603 assert(temp>=0);
4604 //emit_mov(m1l,EAX);
4605 //emit_mul(m2l);
4606 emit_umull(rl,rh,m1l,m2l);
4607 emit_storereg(LOREG,rl);
4608 emit_mov(rh,temp);
4609 //emit_mov(m1h,EAX);
4610 //emit_mul(m2l);
4611 emit_umull(rl,rh,m1h,m2l);
4612 emit_adds(rl,temp,temp);
4613 emit_adcimm(rh,0,rh);
4614 emit_storereg(HIREG,rh);
4615 //emit_mov(m2h,EAX);
4616 //emit_mul(m1l);
4617 emit_umull(rl,rh,m1l,m2h);
4618 emit_adds(rl,temp,temp);
4619 emit_adcimm(rh,0,rh);
4620 emit_storereg(LOREG|64,temp);
4621 emit_mov(rh,temp);
4622 //emit_mov(m2h,EAX);
4623 //emit_mul(m1h);
4624 emit_umull(rl,rh,m1h,m2h);
4625 emit_adds(rl,temp,rl);
4626 emit_loadreg(HIREG,temp);
4627 emit_adcimm(rh,0,rh);
4628 emit_adds(rl,temp,rl);
4629 emit_adcimm(rh,0,rh);
4630 // DEBUG
4631 /*
4632 emit_pushreg(m2h);
4633 emit_pushreg(m2l);
4634 emit_pushreg(m1h);
4635 emit_pushreg(m1l);
4636 emit_call((int)&multu64);
4637 emit_popreg(m1l);
4638 emit_popreg(m1h);
4639 emit_popreg(m2l);
4640 emit_popreg(m2h);
4641 signed char hih=get_reg(i_regs->regmap,HIREG|64);
4642 signed char hil=get_reg(i_regs->regmap,HIREG);
4643 if(hih>=0) emit_loadreg(HIREG|64,hih); // DEBUG
4644 if(hil>=0) emit_loadreg(HIREG,hil); // DEBUG
4645 */
4646 // Shouldn't be necessary
4647 //char loh=get_reg(i_regs->regmap,LOREG|64);
4648 //char lol=get_reg(i_regs->regmap,LOREG);
4649 //if(loh>=0) emit_loadreg(LOREG|64,loh);
4650 //if(lol>=0) emit_loadreg(LOREG,lol);
4651 }
4652 if(opcode2[i]==0x1E) // DDIV
4653 {
4654 signed char d1h=get_reg(i_regs->regmap,rs1[i]|64);
4655 signed char d1l=get_reg(i_regs->regmap,rs1[i]);
4656 signed char d2h=get_reg(i_regs->regmap,rs2[i]|64);
4657 signed char d2l=get_reg(i_regs->regmap,rs2[i]);
4658 assert(d1h>=0);
4659 assert(d2h>=0);
4660 assert(d1l>=0);
4661 assert(d2l>=0);
4662 save_regs(0x100f);
4663 if(d1l!=0) emit_mov(d1l,0);
4664 if(d1h==0) emit_readword((int)&dynarec_local,1);
4665 else if(d1h>1) emit_mov(d1h,1);
4666 if(d2l<2) emit_readword((int)&dynarec_local+d2l*4,2);
4667 else if(d2l>2) emit_mov(d2l,2);
4668 if(d2h<3) emit_readword((int)&dynarec_local+d2h*4,3);
4669 else if(d2h>3) emit_mov(d2h,3);
4670 emit_call((int)&div64);
4671 restore_regs(0x100f);
4672 signed char hih=get_reg(i_regs->regmap,HIREG|64);
4673 signed char hil=get_reg(i_regs->regmap,HIREG);
4674 signed char loh=get_reg(i_regs->regmap,LOREG|64);
4675 signed char lol=get_reg(i_regs->regmap,LOREG);
4676 if(hih>=0) emit_loadreg(HIREG|64,hih);
4677 if(hil>=0) emit_loadreg(HIREG,hil);
4678 if(loh>=0) emit_loadreg(LOREG|64,loh);
4679 if(lol>=0) emit_loadreg(LOREG,lol);
4680 }
4681 if(opcode2[i]==0x1F) // DDIVU
4682 {
4683 //u_int hr,reglist=0;
4684 //for(hr=0;hr<HOST_REGS;hr++) {
4685 // if(i_regs->regmap[hr]>=0 && (i_regs->regmap[hr]&62)!=HIREG) reglist|=1<<hr;
4686 //}
4687 signed char d1h=get_reg(i_regs->regmap,rs1[i]|64);
4688 signed char d1l=get_reg(i_regs->regmap,rs1[i]);
4689 signed char d2h=get_reg(i_regs->regmap,rs2[i]|64);
4690 signed char d2l=get_reg(i_regs->regmap,rs2[i]);
4691 assert(d1h>=0);
4692 assert(d2h>=0);
4693 assert(d1l>=0);
4694 assert(d2l>=0);
4695 save_regs(0x100f);
4696 if(d1l!=0) emit_mov(d1l,0);
4697 if(d1h==0) emit_readword((int)&dynarec_local,1);
4698 else if(d1h>1) emit_mov(d1h,1);
4699 if(d2l<2) emit_readword((int)&dynarec_local+d2l*4,2);
4700 else if(d2l>2) emit_mov(d2l,2);
4701 if(d2h<3) emit_readword((int)&dynarec_local+d2h*4,3);
4702 else if(d2h>3) emit_mov(d2h,3);
4703 emit_call((int)&divu64);
4704 restore_regs(0x100f);
4705 signed char hih=get_reg(i_regs->regmap,HIREG|64);
4706 signed char hil=get_reg(i_regs->regmap,HIREG);
4707 signed char loh=get_reg(i_regs->regmap,LOREG|64);
4708 signed char lol=get_reg(i_regs->regmap,LOREG);
4709 if(hih>=0) emit_loadreg(HIREG|64,hih);
4710 if(hil>=0) emit_loadreg(HIREG,hil);
4711 if(loh>=0) emit_loadreg(LOREG|64,loh);
4712 if(lol>=0) emit_loadreg(LOREG,lol);
4713 }
4714 }
4715 }
4716 else
4717 {
4718 // Multiply by zero is zero.
4719 // MIPS does not have a divide by zero exception.
4720 // The result is undefined, we return zero.
4721 signed char hr=get_reg(i_regs->regmap,HIREG);
4722 signed char lr=get_reg(i_regs->regmap,LOREG);
4723 if(hr>=0) emit_zeroreg(hr);
4724 if(lr>=0) emit_zeroreg(lr);
4725 }
4726}
4727#define multdiv_assemble multdiv_assemble_arm
4728
4729void do_preload_rhash(int r) {
4730 // Don't need this for ARM. On x86, this puts the value 0xf8 into the
4731 // register. On ARM the hash can be done with a single instruction (below)
4732}
4733
4734void do_preload_rhtbl(int ht) {
4735 emit_addimm(FP,(int)&mini_ht-(int)&dynarec_local,ht);
4736}
4737
4738void do_rhash(int rs,int rh) {
4739 emit_andimm(rs,0xf8,rh);
4740}
4741
4742void do_miniht_load(int ht,int rh) {
4743 assem_debug("ldr %s,[%s,%s]!\n",regname[rh],regname[ht],regname[rh]);
4744 output_w32(0xe7b00000|rd_rn_rm(rh,ht,rh));
4745}
4746
4747void do_miniht_jump(int rs,int rh,int ht) {
4748 emit_cmp(rh,rs);
4749 emit_ldreq_indexed(ht,4,15);
4750 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
4751 emit_mov(rs,7);
4752 emit_jmp(jump_vaddr_reg[7]);
4753 #else
4754 emit_jmp(jump_vaddr_reg[rs]);
4755 #endif
4756}
4757
4758void do_miniht_insert(u_int return_address,int rt,int temp) {
4759 #ifdef ARMv5_ONLY
4760 emit_movimm(return_address,rt); // PC into link register
4761 add_to_linker((int)out,return_address,1);
4762 emit_pcreladdr(temp);
4763 emit_writeword(rt,(int)&mini_ht[(return_address&0xFF)>>3][0]);
4764 emit_writeword(temp,(int)&mini_ht[(return_address&0xFF)>>3][1]);
4765 #else
4766 emit_movw(return_address&0x0000FFFF,rt);
4767 add_to_linker((int)out,return_address,1);
4768 emit_pcreladdr(temp);
4769 emit_writeword(temp,(int)&mini_ht[(return_address&0xFF)>>3][1]);
4770 emit_movt(return_address&0xFFFF0000,rt);
4771 emit_writeword(rt,(int)&mini_ht[(return_address&0xFF)>>3][0]);
4772 #endif
4773}
4774
4775// Sign-extend to 64 bits and write out upper half of a register
4776// This is useful where we have a 32-bit value in a register, and want to
4777// keep it in a 32-bit register, but can't guarantee that it won't be read
4778// as a 64-bit value later.
4779void wb_sx(signed char pre[],signed char entry[],uint64_t dirty,uint64_t is32_pre,uint64_t is32,uint64_t u,uint64_t uu)
4780{
24385cae 4781#ifndef FORCE32
57871462 4782 if(is32_pre==is32) return;
4783 int hr,reg;
4784 for(hr=0;hr<HOST_REGS;hr++) {
4785 if(hr!=EXCLUDE_REG) {
4786 //if(pre[hr]==entry[hr]) {
4787 if((reg=pre[hr])>=0) {
4788 if((dirty>>hr)&1) {
4789 if( ((is32_pre&~is32&~uu)>>reg)&1 ) {
4790 emit_sarimm(hr,31,HOST_TEMPREG);
4791 emit_storereg(reg|64,HOST_TEMPREG);
4792 }
4793 }
4794 }
4795 //}
4796 }
4797 }
24385cae 4798#endif
57871462 4799}
4800
4801void wb_valid(signed char pre[],signed char entry[],u_int dirty_pre,u_int dirty,uint64_t is32_pre,uint64_t u,uint64_t uu)
4802{
4803 //if(dirty_pre==dirty) return;
4804 int hr,reg,new_hr;
4805 for(hr=0;hr<HOST_REGS;hr++) {
4806 if(hr!=EXCLUDE_REG) {
4807 reg=pre[hr];
4808 if(((~u)>>(reg&63))&1) {
f776eb14 4809 if(reg>0) {
57871462 4810 if(((dirty_pre&~dirty)>>hr)&1) {
4811 if(reg>0&&reg<34) {
4812 emit_storereg(reg,hr);
4813 if( ((is32_pre&~uu)>>reg)&1 ) {
4814 emit_sarimm(hr,31,HOST_TEMPREG);
4815 emit_storereg(reg|64,HOST_TEMPREG);
4816 }
4817 }
4818 else if(reg>=64) {
4819 emit_storereg(reg,hr);
4820 }
4821 }
4822 }
57871462 4823 }
4824 }
4825 }
4826}
4827
4828
4829/* using strd could possibly help but you'd have to allocate registers in pairs
4830void wb_invalidate_arm(signed char pre[],signed char entry[],uint64_t dirty,uint64_t is32,uint64_t u,uint64_t uu)
4831{
4832 int hr;
4833 int wrote=-1;
4834 for(hr=HOST_REGS-1;hr>=0;hr--) {
4835 if(hr!=EXCLUDE_REG) {
4836 if(pre[hr]!=entry[hr]) {
4837 if(pre[hr]>=0) {
4838 if((dirty>>hr)&1) {
4839 if(get_reg(entry,pre[hr])<0) {
4840 if(pre[hr]<64) {
4841 if(!((u>>pre[hr])&1)) {
4842 if(hr<10&&(~hr&1)&&(pre[hr+1]<0||wrote==hr+1)) {
4843 if( ((is32>>pre[hr])&1) && !((uu>>pre[hr])&1) ) {
4844 emit_sarimm(hr,31,hr+1);
4845 emit_strdreg(pre[hr],hr);
4846 }
4847 else
4848 emit_storereg(pre[hr],hr);
4849 }else{
4850 emit_storereg(pre[hr],hr);
4851 if( ((is32>>pre[hr])&1) && !((uu>>pre[hr])&1) ) {
4852 emit_sarimm(hr,31,hr);
4853 emit_storereg(pre[hr]|64,hr);
4854 }
4855 }
4856 }
4857 }else{
4858 if(!((uu>>(pre[hr]&63))&1) && !((is32>>(pre[hr]&63))&1)) {
4859 emit_storereg(pre[hr],hr);
4860 }
4861 }
4862 wrote=hr;
4863 }
4864 }
4865 }
4866 }
4867 }
4868 }
4869 for(hr=0;hr<HOST_REGS;hr++) {
4870 if(hr!=EXCLUDE_REG) {
4871 if(pre[hr]!=entry[hr]) {
4872 if(pre[hr]>=0) {
4873 int nr;
4874 if((nr=get_reg(entry,pre[hr]))>=0) {
4875 emit_mov(hr,nr);
4876 }
4877 }
4878 }
4879 }
4880 }
4881}
4882#define wb_invalidate wb_invalidate_arm
4883*/
4884
dd3a91a1 4885// Clearing the cache is rather slow on ARM Linux, so mark the areas
4886// that need to be cleared, and then only clear these areas once.
4887void do_clear_cache()
4888{
4889 int i,j;
4890 for (i=0;i<(1<<(TARGET_SIZE_2-17));i++)
4891 {
4892 u_int bitmap=needs_clear_cache[i];
4893 if(bitmap) {
4894 u_int start,end;
4895 for(j=0;j<32;j++)
4896 {
4897 if(bitmap&(1<<j)) {
4898 start=BASE_ADDR+i*131072+j*4096;
4899 end=start+4095;
4900 j++;
4901 while(j<32) {
4902 if(bitmap&(1<<j)) {
4903 end+=4096;
4904 j++;
4905 }else{
4906 __clear_cache((void *)start,(void *)end);
4907 break;
4908 }
4909 }
4910 }
4911 }
4912 needs_clear_cache[i]=0;
4913 }
4914 }
4915}
4916
57871462 4917// CPU-architecture-specific initialization
4918void arch_init() {
3d624f89 4919#ifndef DISABLE_COP1
57871462 4920 rounding_modes[0]=0x0<<22; // round
4921 rounding_modes[1]=0x3<<22; // trunc
4922 rounding_modes[2]=0x1<<22; // ceil
4923 rounding_modes[3]=0x2<<22; // floor
3d624f89 4924#endif
57871462 4925}
b9b61529 4926
4927// vim:shiftwidth=2:expandtab