spu: disable SPUIRQWait, it only seems to cause problems
[pcsx_rearmed.git] / libpcsxcore / new_dynarec / assem_arm.c
CommitLineData
57871462 1/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2 * Mupen64plus - assem_arm.c *
3 * Copyright (C) 2009-2010 Ari64 *
4 * *
5 * This program is free software; you can redistribute it and/or modify *
6 * it under the terms of the GNU General Public License as published by *
7 * the Free Software Foundation; either version 2 of the License, or *
8 * (at your option) any later version. *
9 * *
10 * This program is distributed in the hope that it will be useful, *
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
13 * GNU General Public License for more details. *
14 * *
15 * You should have received a copy of the GNU General Public License *
16 * along with this program; if not, write to the *
17 * Free Software Foundation, Inc., *
18 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
19 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
20
21extern int cycle_count;
22extern int last_count;
23extern int pcaddr;
24extern int pending_exception;
25extern int branch_target;
26extern uint64_t readmem_dword;
3d624f89 27#ifdef MUPEN64
57871462 28extern precomp_instr fake_pc;
3d624f89 29#endif
57871462 30extern void *dynarec_local;
31extern u_int memory_map[1048576];
32extern u_int mini_ht[32][2];
33extern u_int rounding_modes[4];
34
35void indirect_jump_indexed();
36void indirect_jump();
37void do_interrupt();
38void jump_vaddr_r0();
39void jump_vaddr_r1();
40void jump_vaddr_r2();
41void jump_vaddr_r3();
42void jump_vaddr_r4();
43void jump_vaddr_r5();
44void jump_vaddr_r6();
45void jump_vaddr_r7();
46void jump_vaddr_r8();
47void jump_vaddr_r9();
48void jump_vaddr_r10();
49void jump_vaddr_r12();
50
51const u_int jump_vaddr_reg[16] = {
52 (int)jump_vaddr_r0,
53 (int)jump_vaddr_r1,
54 (int)jump_vaddr_r2,
55 (int)jump_vaddr_r3,
56 (int)jump_vaddr_r4,
57 (int)jump_vaddr_r5,
58 (int)jump_vaddr_r6,
59 (int)jump_vaddr_r7,
60 (int)jump_vaddr_r8,
61 (int)jump_vaddr_r9,
62 (int)jump_vaddr_r10,
63 0,
64 (int)jump_vaddr_r12,
65 0,
66 0,
67 0};
68
69#include "fpu.h"
70
71/* Linker */
72
73void set_jump_target(int addr,u_int target)
74{
75 u_char *ptr=(u_char *)addr;
76 u_int *ptr2=(u_int *)ptr;
77 if(ptr[3]==0xe2) {
78 assert((target-(u_int)ptr2-8)<1024);
79 assert((addr&3)==0);
80 assert((target&3)==0);
81 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
82 //printf("target=%x addr=%x insn=%x\n",target,addr,*ptr2);
83 }
84 else if(ptr[3]==0x72) {
85 // generated by emit_jno_unlikely
86 if((target-(u_int)ptr2-8)<1024) {
87 assert((addr&3)==0);
88 assert((target&3)==0);
89 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
90 }
91 else if((target-(u_int)ptr2-8)<4096&&!((target-(u_int)ptr2-8)&15)) {
92 assert((addr&3)==0);
93 assert((target&3)==0);
94 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>4)|0xE00;
95 }
96 else *ptr2=(0x7A000000)|(((target-(u_int)ptr2-8)<<6)>>8);
97 }
98 else {
99 assert((ptr[3]&0x0e)==0xa);
100 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
101 }
102}
103
104// This optionally copies the instruction from the target of the branch into
105// the space before the branch. Works, but the difference in speed is
106// usually insignificant.
107void set_jump_target_fillslot(int addr,u_int target,int copy)
108{
109 u_char *ptr=(u_char *)addr;
110 u_int *ptr2=(u_int *)ptr;
111 assert(!copy||ptr2[-1]==0xe28dd000);
112 if(ptr[3]==0xe2) {
113 assert(!copy);
114 assert((target-(u_int)ptr2-8)<4096);
115 *ptr2=(*ptr2&0xFFFFF000)|(target-(u_int)ptr2-8);
116 }
117 else {
118 assert((ptr[3]&0x0e)==0xa);
119 u_int target_insn=*(u_int *)target;
120 if((target_insn&0x0e100000)==0) { // ALU, no immediate, no flags
121 copy=0;
122 }
123 if((target_insn&0x0c100000)==0x04100000) { // Load
124 copy=0;
125 }
126 if(target_insn&0x08000000) {
127 copy=0;
128 }
129 if(copy) {
130 ptr2[-1]=target_insn;
131 target+=4;
132 }
133 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
134 }
135}
136
137/* Literal pool */
138add_literal(int addr,int val)
139{
140 literals[literalcount][0]=addr;
141 literals[literalcount][1]=val;
142 literalcount++;
143}
144
145void kill_pointer(void *stub)
146{
147 int *ptr=(int *)(stub+4);
148 assert((*ptr&0x0ff00000)==0x05900000);
149 u_int offset=*ptr&0xfff;
150 int **l_ptr=(void *)ptr+offset+8;
151 int *i_ptr=*l_ptr;
152 set_jump_target((int)i_ptr,(int)stub);
153}
154
155int get_pointer(void *stub)
156{
157 //printf("get_pointer(%x)\n",(int)stub);
158 int *ptr=(int *)(stub+4);
159 assert((*ptr&0x0ff00000)==0x05900000);
160 u_int offset=*ptr&0xfff;
161 int **l_ptr=(void *)ptr+offset+8;
162 int *i_ptr=*l_ptr;
163 assert((*i_ptr&0x0f000000)==0x0a000000);
164 return (int)i_ptr+((*i_ptr<<8)>>6)+8;
165}
166
167// Find the "clean" entry point from a "dirty" entry point
168// by skipping past the call to verify_code
169u_int get_clean_addr(int addr)
170{
171 int *ptr=(int *)addr;
172 #ifdef ARMv5_ONLY
173 ptr+=4;
174 #else
175 ptr+=6;
176 #endif
177 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
178 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
179 ptr++;
180 if((*ptr&0xFF000000)==0xea000000) {
181 return (int)ptr+((*ptr<<8)>>6)+8; // follow jump
182 }
183 return (u_int)ptr;
184}
185
186int verify_dirty(int addr)
187{
188 u_int *ptr=(u_int *)addr;
189 #ifdef ARMv5_ONLY
190 // get from literal pool
191 assert((*ptr&0xFFF00000)==0xe5900000);
192 u_int offset=*ptr&0xfff;
193 u_int *l_ptr=(void *)ptr+offset+8;
194 u_int source=l_ptr[0];
195 u_int copy=l_ptr[1];
196 u_int len=l_ptr[2];
197 ptr+=4;
198 #else
199 // ARMv7 movw/movt
200 assert((*ptr&0xFFF00000)==0xe3000000);
201 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
202 u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
203 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
204 ptr+=6;
205 #endif
206 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
207 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
cfcba99a 208 u_int verifier=(int)ptr+((signed int)(*ptr<<8)>>6)+8; // get target of bl
57871462 209 if(verifier==(u_int)verify_code_vm||verifier==(u_int)verify_code_ds) {
210 unsigned int page=source>>12;
211 unsigned int map_value=memory_map[page];
212 if(map_value>=0x80000000) return 0;
213 while(page<((source+len-1)>>12)) {
214 if((memory_map[++page]<<2)!=(map_value<<2)) return 0;
215 }
216 source = source+(map_value<<2);
217 }
218 //printf("verify_dirty: %x %x %x\n",source,copy,len);
219 return !memcmp((void *)source,(void *)copy,len);
220}
221
222// This doesn't necessarily find all clean entry points, just
223// guarantees that it's not dirty
224int isclean(int addr)
225{
226 #ifdef ARMv5_ONLY
227 int *ptr=((u_int *)addr)+4;
228 #else
229 int *ptr=((u_int *)addr)+6;
230 #endif
231 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
232 if((*ptr&0xFF000000)!=0xeb000000) return 1; // bl instruction
233 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code) return 0;
234 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_vm) return 0;
235 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_ds) return 0;
236 return 1;
237}
238
239void get_bounds(int addr,u_int *start,u_int *end)
240{
241 u_int *ptr=(u_int *)addr;
242 #ifdef ARMv5_ONLY
243 // get from literal pool
244 assert((*ptr&0xFFF00000)==0xe5900000);
245 u_int offset=*ptr&0xfff;
246 u_int *l_ptr=(void *)ptr+offset+8;
247 u_int source=l_ptr[0];
248 //u_int copy=l_ptr[1];
249 u_int len=l_ptr[2];
250 ptr+=4;
251 #else
252 // ARMv7 movw/movt
253 assert((*ptr&0xFFF00000)==0xe3000000);
254 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
255 //u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
256 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
257 ptr+=6;
258 #endif
259 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
260 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
cfcba99a 261 u_int verifier=(int)ptr+((signed int)(*ptr<<8)>>6)+8; // get target of bl
57871462 262 if(verifier==(u_int)verify_code_vm||verifier==(u_int)verify_code_ds) {
263 if(memory_map[source>>12]>=0x80000000) source = 0;
264 else source = source+(memory_map[source>>12]<<2);
265 }
266 *start=source;
267 *end=source+len;
268}
269
270/* Register allocation */
271
272// Note: registers are allocated clean (unmodified state)
273// if you intend to modify the register, you must call dirty_reg().
274void alloc_reg(struct regstat *cur,int i,signed char reg)
275{
276 int r,hr;
277 int preferred_reg = (reg&7);
278 if(reg==CCREG) preferred_reg=HOST_CCREG;
279 if(reg==PTEMP||reg==FTEMP) preferred_reg=12;
280
281 // Don't allocate unused registers
282 if((cur->u>>reg)&1) return;
283
284 // see if it's already allocated
285 for(hr=0;hr<HOST_REGS;hr++)
286 {
287 if(cur->regmap[hr]==reg) return;
288 }
289
290 // Keep the same mapping if the register was already allocated in a loop
291 preferred_reg = loop_reg(i,reg,preferred_reg);
292
293 // Try to allocate the preferred register
294 if(cur->regmap[preferred_reg]==-1) {
295 cur->regmap[preferred_reg]=reg;
296 cur->dirty&=~(1<<preferred_reg);
297 cur->isconst&=~(1<<preferred_reg);
298 return;
299 }
300 r=cur->regmap[preferred_reg];
301 if(r<64&&((cur->u>>r)&1)) {
302 cur->regmap[preferred_reg]=reg;
303 cur->dirty&=~(1<<preferred_reg);
304 cur->isconst&=~(1<<preferred_reg);
305 return;
306 }
307 if(r>=64&&((cur->uu>>(r&63))&1)) {
308 cur->regmap[preferred_reg]=reg;
309 cur->dirty&=~(1<<preferred_reg);
310 cur->isconst&=~(1<<preferred_reg);
311 return;
312 }
313
314 // Clear any unneeded registers
315 // We try to keep the mapping consistent, if possible, because it
316 // makes branches easier (especially loops). So we try to allocate
317 // first (see above) before removing old mappings. If this is not
318 // possible then go ahead and clear out the registers that are no
319 // longer needed.
320 for(hr=0;hr<HOST_REGS;hr++)
321 {
322 r=cur->regmap[hr];
323 if(r>=0) {
324 if(r<64) {
325 if((cur->u>>r)&1) {cur->regmap[hr]=-1;break;}
326 }
327 else
328 {
329 if((cur->uu>>(r&63))&1) {cur->regmap[hr]=-1;break;}
330 }
331 }
332 }
333 // Try to allocate any available register, but prefer
334 // registers that have not been used recently.
335 if(i>0) {
336 for(hr=0;hr<HOST_REGS;hr++) {
337 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
338 if(regs[i-1].regmap[hr]!=rs1[i-1]&&regs[i-1].regmap[hr]!=rs2[i-1]&&regs[i-1].regmap[hr]!=rt1[i-1]&&regs[i-1].regmap[hr]!=rt2[i-1]) {
339 cur->regmap[hr]=reg;
340 cur->dirty&=~(1<<hr);
341 cur->isconst&=~(1<<hr);
342 return;
343 }
344 }
345 }
346 }
347 // Try to allocate any available register
348 for(hr=0;hr<HOST_REGS;hr++) {
349 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
350 cur->regmap[hr]=reg;
351 cur->dirty&=~(1<<hr);
352 cur->isconst&=~(1<<hr);
353 return;
354 }
355 }
356
357 // Ok, now we have to evict someone
358 // Pick a register we hopefully won't need soon
359 u_char hsn[MAXREG+1];
360 memset(hsn,10,sizeof(hsn));
361 int j;
362 lsn(hsn,i,&preferred_reg);
363 //printf("eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",cur->regmap[0],cur->regmap[1],cur->regmap[2],cur->regmap[3],cur->regmap[5],cur->regmap[6],cur->regmap[7]);
364 //printf("hsn(%x): %d %d %d %d %d %d %d\n",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
365 if(i>0) {
366 // Don't evict the cycle count at entry points, otherwise the entry
367 // stub will have to write it.
368 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
369 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
370 for(j=10;j>=3;j--)
371 {
372 // Alloc preferred register if available
373 if(hsn[r=cur->regmap[preferred_reg]&63]==j) {
374 for(hr=0;hr<HOST_REGS;hr++) {
375 // Evict both parts of a 64-bit register
376 if((cur->regmap[hr]&63)==r) {
377 cur->regmap[hr]=-1;
378 cur->dirty&=~(1<<hr);
379 cur->isconst&=~(1<<hr);
380 }
381 }
382 cur->regmap[preferred_reg]=reg;
383 return;
384 }
385 for(r=1;r<=MAXREG;r++)
386 {
387 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
388 for(hr=0;hr<HOST_REGS;hr++) {
389 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
390 if(cur->regmap[hr]==r+64) {
391 cur->regmap[hr]=reg;
392 cur->dirty&=~(1<<hr);
393 cur->isconst&=~(1<<hr);
394 return;
395 }
396 }
397 }
398 for(hr=0;hr<HOST_REGS;hr++) {
399 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
400 if(cur->regmap[hr]==r) {
401 cur->regmap[hr]=reg;
402 cur->dirty&=~(1<<hr);
403 cur->isconst&=~(1<<hr);
404 return;
405 }
406 }
407 }
408 }
409 }
410 }
411 }
412 for(j=10;j>=0;j--)
413 {
414 for(r=1;r<=MAXREG;r++)
415 {
416 if(hsn[r]==j) {
417 for(hr=0;hr<HOST_REGS;hr++) {
418 if(cur->regmap[hr]==r+64) {
419 cur->regmap[hr]=reg;
420 cur->dirty&=~(1<<hr);
421 cur->isconst&=~(1<<hr);
422 return;
423 }
424 }
425 for(hr=0;hr<HOST_REGS;hr++) {
426 if(cur->regmap[hr]==r) {
427 cur->regmap[hr]=reg;
428 cur->dirty&=~(1<<hr);
429 cur->isconst&=~(1<<hr);
430 return;
431 }
432 }
433 }
434 }
435 }
436 printf("This shouldn't happen (alloc_reg)");exit(1);
437}
438
439void alloc_reg64(struct regstat *cur,int i,signed char reg)
440{
441 int preferred_reg = 8+(reg&1);
442 int r,hr;
443
444 // allocate the lower 32 bits
445 alloc_reg(cur,i,reg);
446
447 // Don't allocate unused registers
448 if((cur->uu>>reg)&1) return;
449
450 // see if the upper half is already allocated
451 for(hr=0;hr<HOST_REGS;hr++)
452 {
453 if(cur->regmap[hr]==reg+64) return;
454 }
455
456 // Keep the same mapping if the register was already allocated in a loop
457 preferred_reg = loop_reg(i,reg,preferred_reg);
458
459 // Try to allocate the preferred register
460 if(cur->regmap[preferred_reg]==-1) {
461 cur->regmap[preferred_reg]=reg|64;
462 cur->dirty&=~(1<<preferred_reg);
463 cur->isconst&=~(1<<preferred_reg);
464 return;
465 }
466 r=cur->regmap[preferred_reg];
467 if(r<64&&((cur->u>>r)&1)) {
468 cur->regmap[preferred_reg]=reg|64;
469 cur->dirty&=~(1<<preferred_reg);
470 cur->isconst&=~(1<<preferred_reg);
471 return;
472 }
473 if(r>=64&&((cur->uu>>(r&63))&1)) {
474 cur->regmap[preferred_reg]=reg|64;
475 cur->dirty&=~(1<<preferred_reg);
476 cur->isconst&=~(1<<preferred_reg);
477 return;
478 }
479
480 // Clear any unneeded registers
481 // We try to keep the mapping consistent, if possible, because it
482 // makes branches easier (especially loops). So we try to allocate
483 // first (see above) before removing old mappings. If this is not
484 // possible then go ahead and clear out the registers that are no
485 // longer needed.
486 for(hr=HOST_REGS-1;hr>=0;hr--)
487 {
488 r=cur->regmap[hr];
489 if(r>=0) {
490 if(r<64) {
491 if((cur->u>>r)&1) {cur->regmap[hr]=-1;break;}
492 }
493 else
494 {
495 if((cur->uu>>(r&63))&1) {cur->regmap[hr]=-1;break;}
496 }
497 }
498 }
499 // Try to allocate any available register, but prefer
500 // registers that have not been used recently.
501 if(i>0) {
502 for(hr=0;hr<HOST_REGS;hr++) {
503 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
504 if(regs[i-1].regmap[hr]!=rs1[i-1]&&regs[i-1].regmap[hr]!=rs2[i-1]&&regs[i-1].regmap[hr]!=rt1[i-1]&&regs[i-1].regmap[hr]!=rt2[i-1]) {
505 cur->regmap[hr]=reg|64;
506 cur->dirty&=~(1<<hr);
507 cur->isconst&=~(1<<hr);
508 return;
509 }
510 }
511 }
512 }
513 // Try to allocate any available register
514 for(hr=0;hr<HOST_REGS;hr++) {
515 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
516 cur->regmap[hr]=reg|64;
517 cur->dirty&=~(1<<hr);
518 cur->isconst&=~(1<<hr);
519 return;
520 }
521 }
522
523 // Ok, now we have to evict someone
524 // Pick a register we hopefully won't need soon
525 u_char hsn[MAXREG+1];
526 memset(hsn,10,sizeof(hsn));
527 int j;
528 lsn(hsn,i,&preferred_reg);
529 //printf("eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",cur->regmap[0],cur->regmap[1],cur->regmap[2],cur->regmap[3],cur->regmap[5],cur->regmap[6],cur->regmap[7]);
530 //printf("hsn(%x): %d %d %d %d %d %d %d\n",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
531 if(i>0) {
532 // Don't evict the cycle count at entry points, otherwise the entry
533 // stub will have to write it.
534 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
535 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
536 for(j=10;j>=3;j--)
537 {
538 // Alloc preferred register if available
539 if(hsn[r=cur->regmap[preferred_reg]&63]==j) {
540 for(hr=0;hr<HOST_REGS;hr++) {
541 // Evict both parts of a 64-bit register
542 if((cur->regmap[hr]&63)==r) {
543 cur->regmap[hr]=-1;
544 cur->dirty&=~(1<<hr);
545 cur->isconst&=~(1<<hr);
546 }
547 }
548 cur->regmap[preferred_reg]=reg|64;
549 return;
550 }
551 for(r=1;r<=MAXREG;r++)
552 {
553 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
554 for(hr=0;hr<HOST_REGS;hr++) {
555 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
556 if(cur->regmap[hr]==r+64) {
557 cur->regmap[hr]=reg|64;
558 cur->dirty&=~(1<<hr);
559 cur->isconst&=~(1<<hr);
560 return;
561 }
562 }
563 }
564 for(hr=0;hr<HOST_REGS;hr++) {
565 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
566 if(cur->regmap[hr]==r) {
567 cur->regmap[hr]=reg|64;
568 cur->dirty&=~(1<<hr);
569 cur->isconst&=~(1<<hr);
570 return;
571 }
572 }
573 }
574 }
575 }
576 }
577 }
578 for(j=10;j>=0;j--)
579 {
580 for(r=1;r<=MAXREG;r++)
581 {
582 if(hsn[r]==j) {
583 for(hr=0;hr<HOST_REGS;hr++) {
584 if(cur->regmap[hr]==r+64) {
585 cur->regmap[hr]=reg|64;
586 cur->dirty&=~(1<<hr);
587 cur->isconst&=~(1<<hr);
588 return;
589 }
590 }
591 for(hr=0;hr<HOST_REGS;hr++) {
592 if(cur->regmap[hr]==r) {
593 cur->regmap[hr]=reg|64;
594 cur->dirty&=~(1<<hr);
595 cur->isconst&=~(1<<hr);
596 return;
597 }
598 }
599 }
600 }
601 }
602 printf("This shouldn't happen");exit(1);
603}
604
605// Allocate a temporary register. This is done without regard to
606// dirty status or whether the register we request is on the unneeded list
607// Note: This will only allocate one register, even if called multiple times
608void alloc_reg_temp(struct regstat *cur,int i,signed char reg)
609{
610 int r,hr;
611 int preferred_reg = -1;
612
613 // see if it's already allocated
614 for(hr=0;hr<HOST_REGS;hr++)
615 {
616 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==reg) return;
617 }
618
619 // Try to allocate any available register
620 for(hr=HOST_REGS-1;hr>=0;hr--) {
621 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
622 cur->regmap[hr]=reg;
623 cur->dirty&=~(1<<hr);
624 cur->isconst&=~(1<<hr);
625 return;
626 }
627 }
628
629 // Find an unneeded register
630 for(hr=HOST_REGS-1;hr>=0;hr--)
631 {
632 r=cur->regmap[hr];
633 if(r>=0) {
634 if(r<64) {
635 if((cur->u>>r)&1) {
636 if(i==0||((unneeded_reg[i-1]>>r)&1)) {
637 cur->regmap[hr]=reg;
638 cur->dirty&=~(1<<hr);
639 cur->isconst&=~(1<<hr);
640 return;
641 }
642 }
643 }
644 else
645 {
646 if((cur->uu>>(r&63))&1) {
647 if(i==0||((unneeded_reg_upper[i-1]>>(r&63))&1)) {
648 cur->regmap[hr]=reg;
649 cur->dirty&=~(1<<hr);
650 cur->isconst&=~(1<<hr);
651 return;
652 }
653 }
654 }
655 }
656 }
657
658 // Ok, now we have to evict someone
659 // Pick a register we hopefully won't need soon
660 // TODO: we might want to follow unconditional jumps here
661 // TODO: get rid of dupe code and make this into a function
662 u_char hsn[MAXREG+1];
663 memset(hsn,10,sizeof(hsn));
664 int j;
665 lsn(hsn,i,&preferred_reg);
666 //printf("hsn: %d %d %d %d %d %d %d\n",hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
667 if(i>0) {
668 // Don't evict the cycle count at entry points, otherwise the entry
669 // stub will have to write it.
670 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
671 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
672 for(j=10;j>=3;j--)
673 {
674 for(r=1;r<=MAXREG;r++)
675 {
676 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
677 for(hr=0;hr<HOST_REGS;hr++) {
678 if(hr!=HOST_CCREG||hsn[CCREG]>2) {
679 if(cur->regmap[hr]==r+64) {
680 cur->regmap[hr]=reg;
681 cur->dirty&=~(1<<hr);
682 cur->isconst&=~(1<<hr);
683 return;
684 }
685 }
686 }
687 for(hr=0;hr<HOST_REGS;hr++) {
688 if(hr!=HOST_CCREG||hsn[CCREG]>2) {
689 if(cur->regmap[hr]==r) {
690 cur->regmap[hr]=reg;
691 cur->dirty&=~(1<<hr);
692 cur->isconst&=~(1<<hr);
693 return;
694 }
695 }
696 }
697 }
698 }
699 }
700 }
701 for(j=10;j>=0;j--)
702 {
703 for(r=1;r<=MAXREG;r++)
704 {
705 if(hsn[r]==j) {
706 for(hr=0;hr<HOST_REGS;hr++) {
707 if(cur->regmap[hr]==r+64) {
708 cur->regmap[hr]=reg;
709 cur->dirty&=~(1<<hr);
710 cur->isconst&=~(1<<hr);
711 return;
712 }
713 }
714 for(hr=0;hr<HOST_REGS;hr++) {
715 if(cur->regmap[hr]==r) {
716 cur->regmap[hr]=reg;
717 cur->dirty&=~(1<<hr);
718 cur->isconst&=~(1<<hr);
719 return;
720 }
721 }
722 }
723 }
724 }
725 printf("This shouldn't happen");exit(1);
726}
727// Allocate a specific ARM register.
728void alloc_arm_reg(struct regstat *cur,int i,signed char reg,char hr)
729{
730 int n;
731
732 // see if it's already allocated (and dealloc it)
733 for(n=0;n<HOST_REGS;n++)
734 {
735 if(n!=EXCLUDE_REG&&cur->regmap[n]==reg) {cur->regmap[n]=-1;}
736 }
737
738 cur->regmap[hr]=reg;
739 cur->dirty&=~(1<<hr);
740 cur->isconst&=~(1<<hr);
741}
742
743// Alloc cycle count into dedicated register
744alloc_cc(struct regstat *cur,int i)
745{
746 alloc_arm_reg(cur,i,CCREG,HOST_CCREG);
747}
748
749/* Special alloc */
750
751
752/* Assembler */
753
754char regname[16][4] = {
755 "r0",
756 "r1",
757 "r2",
758 "r3",
759 "r4",
760 "r5",
761 "r6",
762 "r7",
763 "r8",
764 "r9",
765 "r10",
766 "fp",
767 "r12",
768 "sp",
769 "lr",
770 "pc"};
771
772void output_byte(u_char byte)
773{
774 *(out++)=byte;
775}
776void output_modrm(u_char mod,u_char rm,u_char ext)
777{
778 assert(mod<4);
779 assert(rm<8);
780 assert(ext<8);
781 u_char byte=(mod<<6)|(ext<<3)|rm;
782 *(out++)=byte;
783}
784void output_sib(u_char scale,u_char index,u_char base)
785{
786 assert(scale<4);
787 assert(index<8);
788 assert(base<8);
789 u_char byte=(scale<<6)|(index<<3)|base;
790 *(out++)=byte;
791}
792void output_w32(u_int word)
793{
794 *((u_int *)out)=word;
795 out+=4;
796}
797u_int rd_rn_rm(u_int rd, u_int rn, u_int rm)
798{
799 assert(rd<16);
800 assert(rn<16);
801 assert(rm<16);
802 return((rn<<16)|(rd<<12)|rm);
803}
804u_int rd_rn_imm_shift(u_int rd, u_int rn, u_int imm, u_int shift)
805{
806 assert(rd<16);
807 assert(rn<16);
808 assert(imm<256);
809 assert((shift&1)==0);
810 return((rn<<16)|(rd<<12)|(((32-shift)&30)<<7)|imm);
811}
812u_int genimm(u_int imm,u_int *encoded)
813{
814 if(imm==0) {*encoded=0;return 1;}
815 int i=32;
816 while(i>0)
817 {
818 if(imm<256) {
819 *encoded=((i&30)<<7)|imm;
820 return 1;
821 }
822 imm=(imm>>2)|(imm<<30);i-=2;
823 }
824 return 0;
825}
826u_int genjmp(u_int addr)
827{
828 int offset=addr-(int)out-8;
e80343e2 829 if(offset<-33554432||offset>=33554432) {
830 if (addr>2) {
831 printf("genjmp: out of range: %08x\n", offset);
832 exit(1);
833 }
834 return 0;
835 }
57871462 836 return ((u_int)offset>>2)&0xffffff;
837}
838
839void emit_mov(int rs,int rt)
840{
841 assem_debug("mov %s,%s\n",regname[rt],regname[rs]);
842 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs));
843}
844
845void emit_movs(int rs,int rt)
846{
847 assem_debug("movs %s,%s\n",regname[rt],regname[rs]);
848 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs));
849}
850
851void emit_add(int rs1,int rs2,int rt)
852{
853 assem_debug("add %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
854 output_w32(0xe0800000|rd_rn_rm(rt,rs1,rs2));
855}
856
857void emit_adds(int rs1,int rs2,int rt)
858{
859 assem_debug("adds %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
860 output_w32(0xe0900000|rd_rn_rm(rt,rs1,rs2));
861}
862
863void emit_adcs(int rs1,int rs2,int rt)
864{
865 assem_debug("adcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
866 output_w32(0xe0b00000|rd_rn_rm(rt,rs1,rs2));
867}
868
869void emit_sbc(int rs1,int rs2,int rt)
870{
871 assem_debug("sbc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
872 output_w32(0xe0c00000|rd_rn_rm(rt,rs1,rs2));
873}
874
875void emit_sbcs(int rs1,int rs2,int rt)
876{
877 assem_debug("sbcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
878 output_w32(0xe0d00000|rd_rn_rm(rt,rs1,rs2));
879}
880
881void emit_neg(int rs, int rt)
882{
883 assem_debug("rsb %s,%s,#0\n",regname[rt],regname[rs]);
884 output_w32(0xe2600000|rd_rn_rm(rt,rs,0));
885}
886
887void emit_negs(int rs, int rt)
888{
889 assem_debug("rsbs %s,%s,#0\n",regname[rt],regname[rs]);
890 output_w32(0xe2700000|rd_rn_rm(rt,rs,0));
891}
892
893void emit_sub(int rs1,int rs2,int rt)
894{
895 assem_debug("sub %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
896 output_w32(0xe0400000|rd_rn_rm(rt,rs1,rs2));
897}
898
899void emit_subs(int rs1,int rs2,int rt)
900{
901 assem_debug("subs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
902 output_w32(0xe0500000|rd_rn_rm(rt,rs1,rs2));
903}
904
905void emit_zeroreg(int rt)
906{
907 assem_debug("mov %s,#0\n",regname[rt]);
908 output_w32(0xe3a00000|rd_rn_rm(rt,0,0));
909}
910
911void emit_loadreg(int r, int hr)
912{
3d624f89 913#ifdef FORCE32
914 if(r&64) {
915 printf("64bit load in 32bit mode!\n");
916 exit(1);
917 }
918#endif
57871462 919 if((r&63)==0)
920 emit_zeroreg(hr);
921 else {
3d624f89 922 int addr=((int)reg)+((r&63)<<REG_SHIFT)+((r&64)>>4);
57871462 923 if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4);
924 if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4);
925 if(r==CCREG) addr=(int)&cycle_count;
926 if(r==CSREG) addr=(int)&Status;
927 if(r==FSREG) addr=(int)&FCR31;
928 if(r==INVCP) addr=(int)&invc_ptr;
929 u_int offset = addr-(u_int)&dynarec_local;
930 assert(offset<4096);
931 assem_debug("ldr %s,fp+%d\n",regname[hr],offset);
932 output_w32(0xe5900000|rd_rn_rm(hr,FP,0)|offset);
933 }
934}
935void emit_storereg(int r, int hr)
936{
3d624f89 937#ifdef FORCE32
938 if(r&64) {
939 printf("64bit store in 32bit mode!\n");
940 exit(1);
941 }
942#endif
943 int addr=((int)reg)+((r&63)<<REG_SHIFT)+((r&64)>>4);
57871462 944 if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4);
945 if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4);
946 if(r==CCREG) addr=(int)&cycle_count;
947 if(r==FSREG) addr=(int)&FCR31;
948 u_int offset = addr-(u_int)&dynarec_local;
949 assert(offset<4096);
950 assem_debug("str %s,fp+%d\n",regname[hr],offset);
951 output_w32(0xe5800000|rd_rn_rm(hr,FP,0)|offset);
952}
953
954void emit_test(int rs, int rt)
955{
956 assem_debug("tst %s,%s\n",regname[rs],regname[rt]);
957 output_w32(0xe1100000|rd_rn_rm(0,rs,rt));
958}
959
960void emit_testimm(int rs,int imm)
961{
962 u_int armval;
963 assem_debug("tst %s,$%d\n",regname[rs],imm);
964 assert(genimm(imm,&armval));
965 output_w32(0xe3100000|rd_rn_rm(0,rs,0)|armval);
966}
967
b9b61529 968void emit_testeqimm(int rs,int imm)
969{
970 u_int armval;
971 assem_debug("tsteq %s,$%d\n",regname[rs],imm);
972 assert(genimm(imm,&armval));
973 output_w32(0x03100000|rd_rn_rm(0,rs,0)|armval);
974}
975
57871462 976void emit_not(int rs,int rt)
977{
978 assem_debug("mvn %s,%s\n",regname[rt],regname[rs]);
979 output_w32(0xe1e00000|rd_rn_rm(rt,0,rs));
980}
981
b9b61529 982void emit_mvnmi(int rs,int rt)
983{
984 assem_debug("mvnmi %s,%s\n",regname[rt],regname[rs]);
985 output_w32(0x41e00000|rd_rn_rm(rt,0,rs));
986}
987
57871462 988void emit_and(u_int rs1,u_int rs2,u_int rt)
989{
990 assem_debug("and %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
991 output_w32(0xe0000000|rd_rn_rm(rt,rs1,rs2));
992}
993
994void emit_or(u_int rs1,u_int rs2,u_int rt)
995{
996 assem_debug("orr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
997 output_w32(0xe1800000|rd_rn_rm(rt,rs1,rs2));
998}
999void emit_or_and_set_flags(int rs1,int rs2,int rt)
1000{
1001 assem_debug("orrs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1002 output_w32(0xe1900000|rd_rn_rm(rt,rs1,rs2));
1003}
1004
1005void emit_xor(u_int rs1,u_int rs2,u_int rt)
1006{
1007 assem_debug("eor %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1008 output_w32(0xe0200000|rd_rn_rm(rt,rs1,rs2));
1009}
1010
1011void emit_loadlp(u_int imm,u_int rt)
1012{
1013 add_literal((int)out,imm);
1014 assem_debug("ldr %s,pc+? [=%x]\n",regname[rt],imm);
1015 output_w32(0xe5900000|rd_rn_rm(rt,15,0));
1016}
1017void emit_movw(u_int imm,u_int rt)
1018{
1019 assert(imm<65536);
1020 assem_debug("movw %s,#%d (0x%x)\n",regname[rt],imm,imm);
1021 output_w32(0xe3000000|rd_rn_rm(rt,0,0)|(imm&0xfff)|((imm<<4)&0xf0000));
1022}
1023void emit_movt(u_int imm,u_int rt)
1024{
1025 assem_debug("movt %s,#%d (0x%x)\n",regname[rt],imm&0xffff0000,imm&0xffff0000);
1026 output_w32(0xe3400000|rd_rn_rm(rt,0,0)|((imm>>16)&0xfff)|((imm>>12)&0xf0000));
1027}
1028void emit_movimm(u_int imm,u_int rt)
1029{
1030 u_int armval;
1031 if(genimm(imm,&armval)) {
1032 assem_debug("mov %s,#%d\n",regname[rt],imm);
1033 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
1034 }else if(genimm(~imm,&armval)) {
1035 assem_debug("mvn %s,#%d\n",regname[rt],imm);
1036 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
1037 }else if(imm<65536) {
1038 #ifdef ARMv5_ONLY
1039 assem_debug("mov %s,#%d\n",regname[rt],imm&0xFF00);
1040 output_w32(0xe3a00000|rd_rn_imm_shift(rt,0,imm>>8,8));
1041 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
1042 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1043 #else
1044 emit_movw(imm,rt);
1045 #endif
1046 }else{
1047 #ifdef ARMv5_ONLY
1048 emit_loadlp(imm,rt);
1049 #else
1050 emit_movw(imm&0x0000FFFF,rt);
1051 emit_movt(imm&0xFFFF0000,rt);
1052 #endif
1053 }
1054}
1055void emit_pcreladdr(u_int rt)
1056{
1057 assem_debug("add %s,pc,#?\n",regname[rt]);
1058 output_w32(0xe2800000|rd_rn_rm(rt,15,0));
1059}
1060
1061void emit_addimm(u_int rs,int imm,u_int rt)
1062{
1063 assert(rs<16);
1064 assert(rt<16);
1065 if(imm!=0) {
1066 assert(imm>-65536&&imm<65536);
1067 u_int armval;
1068 if(genimm(imm,&armval)) {
1069 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],imm);
1070 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
1071 }else if(genimm(-imm,&armval)) {
1072 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],imm);
1073 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
1074 }else if(imm<0) {
1075 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],(-imm)&0xFF00);
1076 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
1077 output_w32(0xe2400000|rd_rn_imm_shift(rt,rs,(-imm)>>8,8));
1078 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
1079 }else{
1080 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1081 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
1082 output_w32(0xe2800000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1083 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1084 }
1085 }
1086 else if(rs!=rt) emit_mov(rs,rt);
1087}
1088
1089void emit_addimm_and_set_flags(int imm,int rt)
1090{
1091 assert(imm>-65536&&imm<65536);
1092 u_int armval;
1093 if(genimm(imm,&armval)) {
1094 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm);
1095 output_w32(0xe2900000|rd_rn_rm(rt,rt,0)|armval);
1096 }else if(genimm(-imm,&armval)) {
1097 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],imm);
1098 output_w32(0xe2500000|rd_rn_rm(rt,rt,0)|armval);
1099 }else if(imm<0) {
1100 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF00);
1101 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
1102 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)>>8,8));
1103 output_w32(0xe2500000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
1104 }else{
1105 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF00);
1106 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
1107 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm>>8,8));
1108 output_w32(0xe2900000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1109 }
1110}
1111void emit_addimm_no_flags(u_int imm,u_int rt)
1112{
1113 emit_addimm(rt,imm,rt);
1114}
1115
1116void emit_addnop(u_int r)
1117{
1118 assert(r<16);
1119 assem_debug("add %s,%s,#0 (nop)\n",regname[r],regname[r]);
1120 output_w32(0xe2800000|rd_rn_rm(r,r,0));
1121}
1122
1123void emit_adcimm(u_int rs,int imm,u_int rt)
1124{
1125 u_int armval;
1126 assert(genimm(imm,&armval));
1127 assem_debug("adc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1128 output_w32(0xe2a00000|rd_rn_rm(rt,rs,0)|armval);
1129}
1130/*void emit_sbcimm(int imm,u_int rt)
1131{
1132 u_int armval;
1133 assert(genimm(imm,&armval));
1134 assem_debug("sbc %s,%s,#%d\n",regname[rt],regname[rt],imm);
1135 output_w32(0xe2c00000|rd_rn_rm(rt,rt,0)|armval);
1136}*/
1137void emit_sbbimm(int imm,u_int rt)
1138{
1139 assem_debug("sbb $%d,%%%s\n",imm,regname[rt]);
1140 assert(rt<8);
1141 if(imm<128&&imm>=-128) {
1142 output_byte(0x83);
1143 output_modrm(3,rt,3);
1144 output_byte(imm);
1145 }
1146 else
1147 {
1148 output_byte(0x81);
1149 output_modrm(3,rt,3);
1150 output_w32(imm);
1151 }
1152}
1153void emit_rscimm(int rs,int imm,u_int rt)
1154{
1155 assert(0);
1156 u_int armval;
1157 assert(genimm(imm,&armval));
1158 assem_debug("rsc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1159 output_w32(0xe2e00000|rd_rn_rm(rt,rs,0)|armval);
1160}
1161
1162void emit_addimm64_32(int rsh,int rsl,int imm,int rth,int rtl)
1163{
1164 // TODO: if(genimm(imm,&armval)) ...
1165 // else
1166 emit_movimm(imm,HOST_TEMPREG);
1167 emit_adds(HOST_TEMPREG,rsl,rtl);
1168 emit_adcimm(rsh,0,rth);
1169}
1170
1171void emit_sbb(int rs1,int rs2)
1172{
1173 assem_debug("sbb %%%s,%%%s\n",regname[rs2],regname[rs1]);
1174 output_byte(0x19);
1175 output_modrm(3,rs1,rs2);
1176}
1177
1178void emit_andimm(int rs,int imm,int rt)
1179{
1180 u_int armval;
1181 if(genimm(imm,&armval)) {
1182 assem_debug("and %s,%s,#%d\n",regname[rt],regname[rs],imm);
1183 output_w32(0xe2000000|rd_rn_rm(rt,rs,0)|armval);
1184 }else if(genimm(~imm,&armval)) {
1185 assem_debug("bic %s,%s,#%d\n",regname[rt],regname[rs],imm);
1186 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|armval);
1187 }else if(imm==65535) {
1188 #ifdef ARMv5_ONLY
1189 assem_debug("bic %s,%s,#FF000000\n",regname[rt],regname[rs]);
1190 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|0x4FF);
1191 assem_debug("bic %s,%s,#00FF0000\n",regname[rt],regname[rt]);
1192 output_w32(0xe3c00000|rd_rn_rm(rt,rt,0)|0x8FF);
1193 #else
1194 assem_debug("uxth %s,%s\n",regname[rt],regname[rs]);
1195 output_w32(0xe6ff0070|rd_rn_rm(rt,0,rs));
1196 #endif
1197 }else{
1198 assert(imm>0&&imm<65535);
1199 #ifdef ARMv5_ONLY
1200 assem_debug("mov r14,#%d\n",imm&0xFF00);
1201 output_w32(0xe3a00000|rd_rn_imm_shift(HOST_TEMPREG,0,imm>>8,8));
1202 assem_debug("add r14,r14,#%d\n",imm&0xFF);
1203 output_w32(0xe2800000|rd_rn_imm_shift(HOST_TEMPREG,HOST_TEMPREG,imm&0xff,0));
1204 #else
1205 emit_movw(imm,HOST_TEMPREG);
1206 #endif
1207 assem_debug("and %s,%s,r14\n",regname[rt],regname[rs]);
1208 output_w32(0xe0000000|rd_rn_rm(rt,rs,HOST_TEMPREG));
1209 }
1210}
1211
1212void emit_orimm(int rs,int imm,int rt)
1213{
1214 u_int armval;
1215 if(genimm(imm,&armval)) {
1216 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1217 output_w32(0xe3800000|rd_rn_rm(rt,rs,0)|armval);
1218 }else{
1219 assert(imm>0&&imm<65536);
1220 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1221 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
1222 output_w32(0xe3800000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1223 output_w32(0xe3800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1224 }
1225}
1226
1227void emit_xorimm(int rs,int imm,int rt)
1228{
57871462 1229 u_int armval;
1230 if(genimm(imm,&armval)) {
1231 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm);
1232 output_w32(0xe2200000|rd_rn_rm(rt,rs,0)|armval);
1233 }else{
514ed0d9 1234 assert(imm>0&&imm<65536);
57871462 1235 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1236 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
1237 output_w32(0xe2200000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1238 output_w32(0xe2200000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1239 }
1240}
1241
1242void emit_shlimm(int rs,u_int imm,int rt)
1243{
1244 assert(imm>0);
1245 assert(imm<32);
1246 //if(imm==1) ...
1247 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1248 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1249}
1250
1251void emit_shrimm(int rs,u_int imm,int rt)
1252{
1253 assert(imm>0);
1254 assert(imm<32);
1255 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1256 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
1257}
1258
1259void emit_sarimm(int rs,u_int imm,int rt)
1260{
1261 assert(imm>0);
1262 assert(imm<32);
1263 assem_debug("asr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1264 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x40|(imm<<7));
1265}
1266
1267void emit_rorimm(int rs,u_int imm,int rt)
1268{
1269 assert(imm>0);
1270 assert(imm<32);
1271 assem_debug("ror %s,%s,#%d\n",regname[rt],regname[rs],imm);
1272 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x60|(imm<<7));
1273}
1274
1275void emit_shldimm(int rs,int rs2,u_int imm,int rt)
1276{
1277 assem_debug("shld %%%s,%%%s,%d\n",regname[rt],regname[rs2],imm);
1278 assert(imm>0);
1279 assert(imm<32);
1280 //if(imm==1) ...
1281 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1282 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1283 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs2],32-imm);
1284 output_w32(0xe1800020|rd_rn_rm(rt,rt,rs2)|((32-imm)<<7));
1285}
1286
1287void emit_shrdimm(int rs,int rs2,u_int imm,int rt)
1288{
1289 assem_debug("shrd %%%s,%%%s,%d\n",regname[rt],regname[rs2],imm);
1290 assert(imm>0);
1291 assert(imm<32);
1292 //if(imm==1) ...
1293 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1294 output_w32(0xe1a00020|rd_rn_rm(rt,0,rs)|(imm<<7));
1295 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs2],32-imm);
1296 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs2)|((32-imm)<<7));
1297}
1298
b9b61529 1299void emit_signextend16(int rs,int rt)
1300{
1301 #ifdef ARMv5_ONLY
1302 emit_shlimm(rs,16,rt);
1303 emit_sarimm(rt,16,rt);
1304 #else
1305 assem_debug("sxth %s,%s\n",regname[rt],regname[rs]);
1306 output_w32(0xe6bf0070|rd_rn_rm(rt,0,rs));
1307 #endif
1308}
1309
57871462 1310void emit_shl(u_int rs,u_int shift,u_int rt)
1311{
1312 assert(rs<16);
1313 assert(rt<16);
1314 assert(shift<16);
1315 //if(imm==1) ...
1316 assem_debug("lsl %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1317 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x10|(shift<<8));
1318}
1319void emit_shr(u_int rs,u_int shift,u_int rt)
1320{
1321 assert(rs<16);
1322 assert(rt<16);
1323 assert(shift<16);
1324 assem_debug("lsr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1325 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x30|(shift<<8));
1326}
1327void emit_sar(u_int rs,u_int shift,u_int rt)
1328{
1329 assert(rs<16);
1330 assert(rt<16);
1331 assert(shift<16);
1332 assem_debug("asr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1333 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x50|(shift<<8));
1334}
1335void emit_shlcl(int r)
1336{
1337 assem_debug("shl %%%s,%%cl\n",regname[r]);
1338 assert(0);
1339}
1340void emit_shrcl(int r)
1341{
1342 assem_debug("shr %%%s,%%cl\n",regname[r]);
1343 assert(0);
1344}
1345void emit_sarcl(int r)
1346{
1347 assem_debug("sar %%%s,%%cl\n",regname[r]);
1348 assert(0);
1349}
1350
1351void emit_shldcl(int r1,int r2)
1352{
1353 assem_debug("shld %%%s,%%%s,%%cl\n",regname[r1],regname[r2]);
1354 assert(0);
1355}
1356void emit_shrdcl(int r1,int r2)
1357{
1358 assem_debug("shrd %%%s,%%%s,%%cl\n",regname[r1],regname[r2]);
1359 assert(0);
1360}
1361void emit_orrshl(u_int rs,u_int shift,u_int rt)
1362{
1363 assert(rs<16);
1364 assert(rt<16);
1365 assert(shift<16);
1366 assem_debug("orr %s,%s,%s,lsl %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
1367 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x10|(shift<<8));
1368}
1369void emit_orrshr(u_int rs,u_int shift,u_int rt)
1370{
1371 assert(rs<16);
1372 assert(rt<16);
1373 assert(shift<16);
1374 assem_debug("orr %s,%s,%s,lsr %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
1375 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x30|(shift<<8));
1376}
1377
1378void emit_cmpimm(int rs,int imm)
1379{
1380 u_int armval;
1381 if(genimm(imm,&armval)) {
1382 assem_debug("cmp %s,$%d\n",regname[rs],imm);
1383 output_w32(0xe3500000|rd_rn_rm(0,rs,0)|armval);
1384 }else if(genimm(-imm,&armval)) {
1385 assem_debug("cmn %s,$%d\n",regname[rs],imm);
1386 output_w32(0xe3700000|rd_rn_rm(0,rs,0)|armval);
1387 }else if(imm>0) {
1388 assert(imm<65536);
1389 #ifdef ARMv5_ONLY
1390 emit_movimm(imm,HOST_TEMPREG);
1391 #else
1392 emit_movw(imm,HOST_TEMPREG);
1393 #endif
1394 assem_debug("cmp %s,r14\n",regname[rs]);
1395 output_w32(0xe1500000|rd_rn_rm(0,rs,HOST_TEMPREG));
1396 }else{
1397 assert(imm>-65536);
1398 #ifdef ARMv5_ONLY
1399 emit_movimm(-imm,HOST_TEMPREG);
1400 #else
1401 emit_movw(-imm,HOST_TEMPREG);
1402 #endif
1403 assem_debug("cmn %s,r14\n",regname[rs]);
1404 output_w32(0xe1700000|rd_rn_rm(0,rs,HOST_TEMPREG));
1405 }
1406}
1407
1408void emit_cmovne(u_int *addr,int rt)
1409{
1410 assem_debug("cmovne %x,%%%s",(int)addr,regname[rt]);
1411 assert(0);
1412}
1413void emit_cmovl(u_int *addr,int rt)
1414{
1415 assem_debug("cmovl %x,%%%s",(int)addr,regname[rt]);
1416 assert(0);
1417}
1418void emit_cmovs(u_int *addr,int rt)
1419{
1420 assem_debug("cmovs %x,%%%s",(int)addr,regname[rt]);
1421 assert(0);
1422}
1423void emit_cmovne_imm(int imm,int rt)
1424{
1425 assem_debug("movne %s,#%d\n",regname[rt],imm);
1426 u_int armval;
1427 assert(genimm(imm,&armval));
1428 output_w32(0x13a00000|rd_rn_rm(rt,0,0)|armval);
1429}
1430void emit_cmovl_imm(int imm,int rt)
1431{
1432 assem_debug("movlt %s,#%d\n",regname[rt],imm);
1433 u_int armval;
1434 assert(genimm(imm,&armval));
1435 output_w32(0xb3a00000|rd_rn_rm(rt,0,0)|armval);
1436}
1437void emit_cmovb_imm(int imm,int rt)
1438{
1439 assem_debug("movcc %s,#%d\n",regname[rt],imm);
1440 u_int armval;
1441 assert(genimm(imm,&armval));
1442 output_w32(0x33a00000|rd_rn_rm(rt,0,0)|armval);
1443}
1444void emit_cmovs_imm(int imm,int rt)
1445{
1446 assem_debug("movmi %s,#%d\n",regname[rt],imm);
1447 u_int armval;
1448 assert(genimm(imm,&armval));
1449 output_w32(0x43a00000|rd_rn_rm(rt,0,0)|armval);
1450}
1451void emit_cmove_reg(int rs,int rt)
1452{
1453 assem_debug("moveq %s,%s\n",regname[rt],regname[rs]);
1454 output_w32(0x01a00000|rd_rn_rm(rt,0,rs));
1455}
1456void emit_cmovne_reg(int rs,int rt)
1457{
1458 assem_debug("movne %s,%s\n",regname[rt],regname[rs]);
1459 output_w32(0x11a00000|rd_rn_rm(rt,0,rs));
1460}
1461void emit_cmovl_reg(int rs,int rt)
1462{
1463 assem_debug("movlt %s,%s\n",regname[rt],regname[rs]);
1464 output_w32(0xb1a00000|rd_rn_rm(rt,0,rs));
1465}
1466void emit_cmovs_reg(int rs,int rt)
1467{
1468 assem_debug("movmi %s,%s\n",regname[rt],regname[rs]);
1469 output_w32(0x41a00000|rd_rn_rm(rt,0,rs));
1470}
1471
1472void emit_slti32(int rs,int imm,int rt)
1473{
1474 if(rs!=rt) emit_zeroreg(rt);
1475 emit_cmpimm(rs,imm);
1476 if(rs==rt) emit_movimm(0,rt);
1477 emit_cmovl_imm(1,rt);
1478}
1479void emit_sltiu32(int rs,int imm,int rt)
1480{
1481 if(rs!=rt) emit_zeroreg(rt);
1482 emit_cmpimm(rs,imm);
1483 if(rs==rt) emit_movimm(0,rt);
1484 emit_cmovb_imm(1,rt);
1485}
1486void emit_slti64_32(int rsh,int rsl,int imm,int rt)
1487{
1488 assert(rsh!=rt);
1489 emit_slti32(rsl,imm,rt);
1490 if(imm>=0)
1491 {
1492 emit_test(rsh,rsh);
1493 emit_cmovne_imm(0,rt);
1494 emit_cmovs_imm(1,rt);
1495 }
1496 else
1497 {
1498 emit_cmpimm(rsh,-1);
1499 emit_cmovne_imm(0,rt);
1500 emit_cmovl_imm(1,rt);
1501 }
1502}
1503void emit_sltiu64_32(int rsh,int rsl,int imm,int rt)
1504{
1505 assert(rsh!=rt);
1506 emit_sltiu32(rsl,imm,rt);
1507 if(imm>=0)
1508 {
1509 emit_test(rsh,rsh);
1510 emit_cmovne_imm(0,rt);
1511 }
1512 else
1513 {
1514 emit_cmpimm(rsh,-1);
1515 emit_cmovne_imm(1,rt);
1516 }
1517}
1518
1519void emit_cmp(int rs,int rt)
1520{
1521 assem_debug("cmp %s,%s\n",regname[rs],regname[rt]);
1522 output_w32(0xe1500000|rd_rn_rm(0,rs,rt));
1523}
1524void emit_set_gz32(int rs, int rt)
1525{
1526 //assem_debug("set_gz32\n");
1527 emit_cmpimm(rs,1);
1528 emit_movimm(1,rt);
1529 emit_cmovl_imm(0,rt);
1530}
1531void emit_set_nz32(int rs, int rt)
1532{
1533 //assem_debug("set_nz32\n");
1534 if(rs!=rt) emit_movs(rs,rt);
1535 else emit_test(rs,rs);
1536 emit_cmovne_imm(1,rt);
1537}
1538void emit_set_gz64_32(int rsh, int rsl, int rt)
1539{
1540 //assem_debug("set_gz64\n");
1541 emit_set_gz32(rsl,rt);
1542 emit_test(rsh,rsh);
1543 emit_cmovne_imm(1,rt);
1544 emit_cmovs_imm(0,rt);
1545}
1546void emit_set_nz64_32(int rsh, int rsl, int rt)
1547{
1548 //assem_debug("set_nz64\n");
1549 emit_or_and_set_flags(rsh,rsl,rt);
1550 emit_cmovne_imm(1,rt);
1551}
1552void emit_set_if_less32(int rs1, int rs2, int rt)
1553{
1554 //assem_debug("set if less (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1555 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1556 emit_cmp(rs1,rs2);
1557 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1558 emit_cmovl_imm(1,rt);
1559}
1560void emit_set_if_carry32(int rs1, int rs2, int rt)
1561{
1562 //assem_debug("set if carry (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1563 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1564 emit_cmp(rs1,rs2);
1565 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1566 emit_cmovb_imm(1,rt);
1567}
1568void emit_set_if_less64_32(int u1, int l1, int u2, int l2, int rt)
1569{
1570 //assem_debug("set if less64 (%%%s,%%%s,%%%s,%%%s),%%%s\n",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]);
1571 assert(u1!=rt);
1572 assert(u2!=rt);
1573 emit_cmp(l1,l2);
1574 emit_movimm(0,rt);
1575 emit_sbcs(u1,u2,HOST_TEMPREG);
1576 emit_cmovl_imm(1,rt);
1577}
1578void emit_set_if_carry64_32(int u1, int l1, int u2, int l2, int rt)
1579{
1580 //assem_debug("set if carry64 (%%%s,%%%s,%%%s,%%%s),%%%s\n",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]);
1581 assert(u1!=rt);
1582 assert(u2!=rt);
1583 emit_cmp(l1,l2);
1584 emit_movimm(0,rt);
1585 emit_sbcs(u1,u2,HOST_TEMPREG);
1586 emit_cmovb_imm(1,rt);
1587}
1588
1589void emit_call(int a)
1590{
1591 assem_debug("bl %x (%x+%x)\n",a,(int)out,a-(int)out-8);
1592 u_int offset=genjmp(a);
1593 output_w32(0xeb000000|offset);
1594}
1595void emit_jmp(int a)
1596{
1597 assem_debug("b %x (%x+%x)\n",a,(int)out,a-(int)out-8);
1598 u_int offset=genjmp(a);
1599 output_w32(0xea000000|offset);
1600}
1601void emit_jne(int a)
1602{
1603 assem_debug("bne %x\n",a);
1604 u_int offset=genjmp(a);
1605 output_w32(0x1a000000|offset);
1606}
1607void emit_jeq(int a)
1608{
1609 assem_debug("beq %x\n",a);
1610 u_int offset=genjmp(a);
1611 output_w32(0x0a000000|offset);
1612}
1613void emit_js(int a)
1614{
1615 assem_debug("bmi %x\n",a);
1616 u_int offset=genjmp(a);
1617 output_w32(0x4a000000|offset);
1618}
1619void emit_jns(int a)
1620{
1621 assem_debug("bpl %x\n",a);
1622 u_int offset=genjmp(a);
1623 output_w32(0x5a000000|offset);
1624}
1625void emit_jl(int a)
1626{
1627 assem_debug("blt %x\n",a);
1628 u_int offset=genjmp(a);
1629 output_w32(0xba000000|offset);
1630}
1631void emit_jge(int a)
1632{
1633 assem_debug("bge %x\n",a);
1634 u_int offset=genjmp(a);
1635 output_w32(0xaa000000|offset);
1636}
1637void emit_jno(int a)
1638{
1639 assem_debug("bvc %x\n",a);
1640 u_int offset=genjmp(a);
1641 output_w32(0x7a000000|offset);
1642}
1643void emit_jc(int a)
1644{
1645 assem_debug("bcs %x\n",a);
1646 u_int offset=genjmp(a);
1647 output_w32(0x2a000000|offset);
1648}
1649void emit_jcc(int a)
1650{
1651 assem_debug("bcc %x\n",a);
1652 u_int offset=genjmp(a);
1653 output_w32(0x3a000000|offset);
1654}
1655
1656void emit_pushimm(int imm)
1657{
1658 assem_debug("push $%x\n",imm);
1659 assert(0);
1660}
1661void emit_pusha()
1662{
1663 assem_debug("pusha\n");
1664 assert(0);
1665}
1666void emit_popa()
1667{
1668 assem_debug("popa\n");
1669 assert(0);
1670}
1671void emit_pushreg(u_int r)
1672{
1673 assem_debug("push %%%s\n",regname[r]);
1674 assert(0);
1675}
1676void emit_popreg(u_int r)
1677{
1678 assem_debug("pop %%%s\n",regname[r]);
1679 assert(0);
1680}
1681void emit_callreg(u_int r)
1682{
1683 assem_debug("call *%%%s\n",regname[r]);
1684 assert(0);
1685}
1686void emit_jmpreg(u_int r)
1687{
1688 assem_debug("mov pc,%s\n",regname[r]);
1689 output_w32(0xe1a00000|rd_rn_rm(15,0,r));
1690}
1691
1692void emit_readword_indexed(int offset, int rs, int rt)
1693{
1694 assert(offset>-4096&&offset<4096);
1695 assem_debug("ldr %s,%s+%d\n",regname[rt],regname[rs],offset);
1696 if(offset>=0) {
1697 output_w32(0xe5900000|rd_rn_rm(rt,rs,0)|offset);
1698 }else{
1699 output_w32(0xe5100000|rd_rn_rm(rt,rs,0)|(-offset));
1700 }
1701}
1702void emit_readword_dualindexedx4(int rs1, int rs2, int rt)
1703{
1704 assem_debug("ldr %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1705 output_w32(0xe7900000|rd_rn_rm(rt,rs1,rs2)|0x100);
1706}
1707void emit_readword_indexed_tlb(int addr, int rs, int map, int rt)
1708{
1709 if(map<0) emit_readword_indexed(addr, rs, rt);
1710 else {
1711 assert(addr==0);
1712 emit_readword_dualindexedx4(rs, map, rt);
1713 }
1714}
1715void emit_readdword_indexed_tlb(int addr, int rs, int map, int rh, int rl)
1716{
1717 if(map<0) {
1718 if(rh>=0) emit_readword_indexed(addr, rs, rh);
1719 emit_readword_indexed(addr+4, rs, rl);
1720 }else{
1721 assert(rh!=rs);
1722 if(rh>=0) emit_readword_indexed_tlb(addr, rs, map, rh);
1723 emit_addimm(map,1,map);
1724 emit_readword_indexed_tlb(addr, rs, map, rl);
1725 }
1726}
1727void emit_movsbl_indexed(int offset, int rs, int rt)
1728{
1729 assert(offset>-256&&offset<256);
1730 assem_debug("ldrsb %s,%s+%d\n",regname[rt],regname[rs],offset);
1731 if(offset>=0) {
1732 output_w32(0xe1d000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1733 }else{
1734 output_w32(0xe15000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1735 }
1736}
1737void emit_movsbl_indexed_tlb(int addr, int rs, int map, int rt)
1738{
1739 if(map<0) emit_movsbl_indexed(addr, rs, rt);
1740 else {
1741 if(addr==0) {
1742 emit_shlimm(map,2,map);
1743 assem_debug("ldrsb %s,%s+%s\n",regname[rt],regname[rs],regname[map]);
1744 output_w32(0xe19000d0|rd_rn_rm(rt,rs,map));
1745 }else{
1746 assert(addr>-256&&addr<256);
1747 assem_debug("add %s,%s,%s,lsl #2\n",regname[rt],regname[rs],regname[map]);
1748 output_w32(0xe0800000|rd_rn_rm(rt,rs,map)|(2<<7));
1749 emit_movsbl_indexed(addr, rt, rt);
1750 }
1751 }
1752}
1753void emit_movswl_indexed(int offset, int rs, int rt)
1754{
1755 assert(offset>-256&&offset<256);
1756 assem_debug("ldrsh %s,%s+%d\n",regname[rt],regname[rs],offset);
1757 if(offset>=0) {
1758 output_w32(0xe1d000f0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1759 }else{
1760 output_w32(0xe15000f0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1761 }
1762}
1763void emit_movzbl_indexed(int offset, int rs, int rt)
1764{
1765 assert(offset>-4096&&offset<4096);
1766 assem_debug("ldrb %s,%s+%d\n",regname[rt],regname[rs],offset);
1767 if(offset>=0) {
1768 output_w32(0xe5d00000|rd_rn_rm(rt,rs,0)|offset);
1769 }else{
1770 output_w32(0xe5500000|rd_rn_rm(rt,rs,0)|(-offset));
1771 }
1772}
1773void emit_movzbl_dualindexedx4(int rs1, int rs2, int rt)
1774{
1775 assem_debug("ldrb %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1776 output_w32(0xe7d00000|rd_rn_rm(rt,rs1,rs2)|0x100);
1777}
1778void emit_movzbl_indexed_tlb(int addr, int rs, int map, int rt)
1779{
1780 if(map<0) emit_movzbl_indexed(addr, rs, rt);
1781 else {
1782 if(addr==0) {
1783 emit_movzbl_dualindexedx4(rs, map, rt);
1784 }else{
1785 emit_addimm(rs,addr,rt);
1786 emit_movzbl_dualindexedx4(rt, map, rt);
1787 }
1788 }
1789}
1790void emit_movzwl_indexed(int offset, int rs, int rt)
1791{
1792 assert(offset>-256&&offset<256);
1793 assem_debug("ldrh %s,%s+%d\n",regname[rt],regname[rs],offset);
1794 if(offset>=0) {
1795 output_w32(0xe1d000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1796 }else{
1797 output_w32(0xe15000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1798 }
1799}
1800void emit_readword(int addr, int rt)
1801{
1802 u_int offset = addr-(u_int)&dynarec_local;
1803 assert(offset<4096);
1804 assem_debug("ldr %s,fp+%d\n",regname[rt],offset);
1805 output_w32(0xe5900000|rd_rn_rm(rt,FP,0)|offset);
1806}
1807void emit_movsbl(int addr, int rt)
1808{
1809 u_int offset = addr-(u_int)&dynarec_local;
1810 assert(offset<256);
1811 assem_debug("ldrsb %s,fp+%d\n",regname[rt],offset);
1812 output_w32(0xe1d000d0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1813}
1814void emit_movswl(int addr, int rt)
1815{
1816 u_int offset = addr-(u_int)&dynarec_local;
1817 assert(offset<256);
1818 assem_debug("ldrsh %s,fp+%d\n",regname[rt],offset);
1819 output_w32(0xe1d000f0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1820}
1821void emit_movzbl(int addr, int rt)
1822{
1823 u_int offset = addr-(u_int)&dynarec_local;
1824 assert(offset<4096);
1825 assem_debug("ldrb %s,fp+%d\n",regname[rt],offset);
1826 output_w32(0xe5d00000|rd_rn_rm(rt,FP,0)|offset);
1827}
1828void emit_movzwl(int addr, int rt)
1829{
1830 u_int offset = addr-(u_int)&dynarec_local;
1831 assert(offset<256);
1832 assem_debug("ldrh %s,fp+%d\n",regname[rt],offset);
1833 output_w32(0xe1d000b0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1834}
1835void emit_movzwl_reg(int rs, int rt)
1836{
1837 assem_debug("movzwl %%%s,%%%s\n",regname[rs]+1,regname[rt]);
1838 assert(0);
1839}
1840
1841void emit_xchg(int rs, int rt)
1842{
1843 assem_debug("xchg %%%s,%%%s\n",regname[rs],regname[rt]);
1844 assert(0);
1845}
1846void emit_writeword_indexed(int rt, int offset, int rs)
1847{
1848 assert(offset>-4096&&offset<4096);
1849 assem_debug("str %s,%s+%d\n",regname[rt],regname[rs],offset);
1850 if(offset>=0) {
1851 output_w32(0xe5800000|rd_rn_rm(rt,rs,0)|offset);
1852 }else{
1853 output_w32(0xe5000000|rd_rn_rm(rt,rs,0)|(-offset));
1854 }
1855}
1856void emit_writeword_dualindexedx4(int rt, int rs1, int rs2)
1857{
1858 assem_debug("str %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1859 output_w32(0xe7800000|rd_rn_rm(rt,rs1,rs2)|0x100);
1860}
1861void emit_writeword_indexed_tlb(int rt, int addr, int rs, int map, int temp)
1862{
1863 if(map<0) emit_writeword_indexed(rt, addr, rs);
1864 else {
1865 assert(addr==0);
1866 emit_writeword_dualindexedx4(rt, rs, map);
1867 }
1868}
1869void emit_writedword_indexed_tlb(int rh, int rl, int addr, int rs, int map, int temp)
1870{
1871 if(map<0) {
1872 if(rh>=0) emit_writeword_indexed(rh, addr, rs);
1873 emit_writeword_indexed(rl, addr+4, rs);
1874 }else{
1875 assert(rh>=0);
1876 if(temp!=rs) emit_addimm(map,1,temp);
1877 emit_writeword_indexed_tlb(rh, addr, rs, map, temp);
1878 if(temp!=rs) emit_writeword_indexed_tlb(rl, addr, rs, temp, temp);
1879 else {
1880 emit_addimm(rs,4,rs);
1881 emit_writeword_indexed_tlb(rl, addr, rs, map, temp);
1882 }
1883 }
1884}
1885void emit_writehword_indexed(int rt, int offset, int rs)
1886{
1887 assert(offset>-256&&offset<256);
1888 assem_debug("strh %s,%s+%d\n",regname[rt],regname[rs],offset);
1889 if(offset>=0) {
1890 output_w32(0xe1c000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1891 }else{
1892 output_w32(0xe14000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1893 }
1894}
1895void emit_writebyte_indexed(int rt, int offset, int rs)
1896{
1897 assert(offset>-4096&&offset<4096);
1898 assem_debug("strb %s,%s+%d\n",regname[rt],regname[rs],offset);
1899 if(offset>=0) {
1900 output_w32(0xe5c00000|rd_rn_rm(rt,rs,0)|offset);
1901 }else{
1902 output_w32(0xe5400000|rd_rn_rm(rt,rs,0)|(-offset));
1903 }
1904}
1905void emit_writebyte_dualindexedx4(int rt, int rs1, int rs2)
1906{
1907 assem_debug("strb %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1908 output_w32(0xe7c00000|rd_rn_rm(rt,rs1,rs2)|0x100);
1909}
1910void emit_writebyte_indexed_tlb(int rt, int addr, int rs, int map, int temp)
1911{
1912 if(map<0) emit_writebyte_indexed(rt, addr, rs);
1913 else {
1914 if(addr==0) {
1915 emit_writebyte_dualindexedx4(rt, rs, map);
1916 }else{
1917 emit_addimm(rs,addr,temp);
1918 emit_writebyte_dualindexedx4(rt, temp, map);
1919 }
1920 }
1921}
1922void emit_writeword(int rt, int addr)
1923{
1924 u_int offset = addr-(u_int)&dynarec_local;
1925 assert(offset<4096);
1926 assem_debug("str %s,fp+%d\n",regname[rt],offset);
1927 output_w32(0xe5800000|rd_rn_rm(rt,FP,0)|offset);
1928}
1929void emit_writehword(int rt, int addr)
1930{
1931 u_int offset = addr-(u_int)&dynarec_local;
1932 assert(offset<256);
1933 assem_debug("strh %s,fp+%d\n",regname[rt],offset);
1934 output_w32(0xe1c000b0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1935}
1936void emit_writebyte(int rt, int addr)
1937{
1938 u_int offset = addr-(u_int)&dynarec_local;
1939 assert(offset<4096);
1940 assem_debug("str %s,fp+%d\n",regname[rt],offset);
1941 output_w32(0xe5c00000|rd_rn_rm(rt,FP,0)|offset);
1942}
1943void emit_writeword_imm(int imm, int addr)
1944{
1945 assem_debug("movl $%x,%x\n",imm,addr);
1946 assert(0);
1947}
1948void emit_writebyte_imm(int imm, int addr)
1949{
1950 assem_debug("movb $%x,%x\n",imm,addr);
1951 assert(0);
1952}
1953
1954void emit_mul(int rs)
1955{
1956 assem_debug("mul %%%s\n",regname[rs]);
1957 assert(0);
1958}
1959void emit_imul(int rs)
1960{
1961 assem_debug("imul %%%s\n",regname[rs]);
1962 assert(0);
1963}
1964void emit_umull(u_int rs1, u_int rs2, u_int hi, u_int lo)
1965{
1966 assem_debug("umull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
1967 assert(rs1<16);
1968 assert(rs2<16);
1969 assert(hi<16);
1970 assert(lo<16);
1971 output_w32(0xe0800090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
1972}
1973void emit_smull(u_int rs1, u_int rs2, u_int hi, u_int lo)
1974{
1975 assem_debug("smull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
1976 assert(rs1<16);
1977 assert(rs2<16);
1978 assert(hi<16);
1979 assert(lo<16);
1980 output_w32(0xe0c00090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
1981}
1982
1983void emit_div(int rs)
1984{
1985 assem_debug("div %%%s\n",regname[rs]);
1986 assert(0);
1987}
1988void emit_idiv(int rs)
1989{
1990 assem_debug("idiv %%%s\n",regname[rs]);
1991 assert(0);
1992}
1993void emit_cdq()
1994{
1995 assem_debug("cdq\n");
1996 assert(0);
1997}
1998
1999void emit_clz(int rs,int rt)
2000{
2001 assem_debug("clz %s,%s\n",regname[rt],regname[rs]);
2002 output_w32(0xe16f0f10|rd_rn_rm(rt,0,rs));
2003}
2004
2005void emit_subcs(int rs1,int rs2,int rt)
2006{
2007 assem_debug("subcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2008 output_w32(0x20400000|rd_rn_rm(rt,rs1,rs2));
2009}
2010
2011void emit_shrcc_imm(int rs,u_int imm,int rt)
2012{
2013 assert(imm>0);
2014 assert(imm<32);
2015 assem_debug("lsrcc %s,%s,#%d\n",regname[rt],regname[rs],imm);
2016 output_w32(0x31a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
2017}
2018
2019void emit_negmi(int rs, int rt)
2020{
2021 assem_debug("rsbmi %s,%s,#0\n",regname[rt],regname[rs]);
2022 output_w32(0x42600000|rd_rn_rm(rt,rs,0));
2023}
2024
2025void emit_negsmi(int rs, int rt)
2026{
2027 assem_debug("rsbsmi %s,%s,#0\n",regname[rt],regname[rs]);
2028 output_w32(0x42700000|rd_rn_rm(rt,rs,0));
2029}
2030
2031void emit_orreq(u_int rs1,u_int rs2,u_int rt)
2032{
2033 assem_debug("orreq %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2034 output_w32(0x01800000|rd_rn_rm(rt,rs1,rs2));
2035}
2036
2037void emit_orrne(u_int rs1,u_int rs2,u_int rt)
2038{
2039 assem_debug("orrne %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2040 output_w32(0x11800000|rd_rn_rm(rt,rs1,rs2));
2041}
2042
2043void emit_bic_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2044{
2045 assem_debug("bic %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2046 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2047}
2048
2049void emit_biceq_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2050{
2051 assem_debug("biceq %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2052 output_w32(0x01C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2053}
2054
2055void emit_bicne_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2056{
2057 assem_debug("bicne %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2058 output_w32(0x11C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2059}
2060
2061void emit_bic_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2062{
2063 assem_debug("bic %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2064 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2065}
2066
2067void emit_biceq_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2068{
2069 assem_debug("biceq %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2070 output_w32(0x01C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2071}
2072
2073void emit_bicne_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2074{
2075 assem_debug("bicne %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2076 output_w32(0x11C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2077}
2078
2079void emit_teq(int rs, int rt)
2080{
2081 assem_debug("teq %s,%s\n",regname[rs],regname[rt]);
2082 output_w32(0xe1300000|rd_rn_rm(0,rs,rt));
2083}
2084
2085void emit_rsbimm(int rs, int imm, int rt)
2086{
2087 u_int armval;
2088 assert(genimm(imm,&armval));
2089 assem_debug("rsb %s,%s,#%d\n",regname[rt],regname[rs],imm);
2090 output_w32(0xe2600000|rd_rn_rm(rt,rs,0)|armval);
2091}
2092
2093// Load 2 immediates optimizing for small code size
2094void emit_mov2imm_compact(int imm1,u_int rt1,int imm2,u_int rt2)
2095{
2096 emit_movimm(imm1,rt1);
2097 u_int armval;
2098 if(genimm(imm2-imm1,&armval)) {
2099 assem_debug("add %s,%s,#%d\n",regname[rt2],regname[rt1],imm2-imm1);
2100 output_w32(0xe2800000|rd_rn_rm(rt2,rt1,0)|armval);
2101 }else if(genimm(imm1-imm2,&armval)) {
2102 assem_debug("sub %s,%s,#%d\n",regname[rt2],regname[rt1],imm1-imm2);
2103 output_w32(0xe2400000|rd_rn_rm(rt2,rt1,0)|armval);
2104 }
2105 else emit_movimm(imm2,rt2);
2106}
2107
2108// Conditionally select one of two immediates, optimizing for small code size
2109// This will only be called if HAVE_CMOV_IMM is defined
2110void emit_cmov2imm_e_ne_compact(int imm1,int imm2,u_int rt)
2111{
2112 u_int armval;
2113 if(genimm(imm2-imm1,&armval)) {
2114 emit_movimm(imm1,rt);
2115 assem_debug("addne %s,%s,#%d\n",regname[rt],regname[rt],imm2-imm1);
2116 output_w32(0x12800000|rd_rn_rm(rt,rt,0)|armval);
2117 }else if(genimm(imm1-imm2,&armval)) {
2118 emit_movimm(imm1,rt);
2119 assem_debug("subne %s,%s,#%d\n",regname[rt],regname[rt],imm1-imm2);
2120 output_w32(0x12400000|rd_rn_rm(rt,rt,0)|armval);
2121 }
2122 else {
2123 #ifdef ARMv5_ONLY
2124 emit_movimm(imm1,rt);
2125 add_literal((int)out,imm2);
2126 assem_debug("ldrne %s,pc+? [=%x]\n",regname[rt],imm2);
2127 output_w32(0x15900000|rd_rn_rm(rt,15,0));
2128 #else
2129 emit_movw(imm1&0x0000FFFF,rt);
2130 if((imm1&0xFFFF)!=(imm2&0xFFFF)) {
2131 assem_debug("movwne %s,#%d (0x%x)\n",regname[rt],imm2&0xFFFF,imm2&0xFFFF);
2132 output_w32(0x13000000|rd_rn_rm(rt,0,0)|(imm2&0xfff)|((imm2<<4)&0xf0000));
2133 }
2134 emit_movt(imm1&0xFFFF0000,rt);
2135 if((imm1&0xFFFF0000)!=(imm2&0xFFFF0000)) {
2136 assem_debug("movtne %s,#%d (0x%x)\n",regname[rt],imm2&0xffff0000,imm2&0xffff0000);
2137 output_w32(0x13400000|rd_rn_rm(rt,0,0)|((imm2>>16)&0xfff)|((imm2>>12)&0xf0000));
2138 }
2139 #endif
2140 }
2141}
2142
2143// special case for checking invalid_code
2144void emit_cmpmem_indexedsr12_imm(int addr,int r,int imm)
2145{
2146 assert(0);
2147}
2148
2149// special case for checking invalid_code
2150void emit_cmpmem_indexedsr12_reg(int base,int r,int imm)
2151{
2152 assert(imm<128&&imm>=0);
2153 assert(r>=0&&r<16);
2154 assem_debug("ldrb lr,%s,%s lsr #12\n",regname[base],regname[r]);
2155 output_w32(0xe7d00000|rd_rn_rm(HOST_TEMPREG,base,r)|0x620);
2156 emit_cmpimm(HOST_TEMPREG,imm);
2157}
2158
2159// special case for tlb mapping
2160void emit_addsr12(int rs1,int rs2,int rt)
2161{
2162 assem_debug("add %s,%s,%s lsr #12\n",regname[rt],regname[rs1],regname[rs2]);
2163 output_w32(0xe0800620|rd_rn_rm(rt,rs1,rs2));
2164}
2165
2166// Used to preload hash table entries
2167void emit_prefetch(void *addr)
2168{
2169 assem_debug("prefetch %x\n",(int)addr);
2170 output_byte(0x0F);
2171 output_byte(0x18);
2172 output_modrm(0,5,1);
2173 output_w32((int)addr);
2174}
2175void emit_prefetchreg(int r)
2176{
2177 assem_debug("pld %s\n",regname[r]);
2178 output_w32(0xf5d0f000|rd_rn_rm(0,r,0));
2179}
2180
2181// Special case for mini_ht
2182void emit_ldreq_indexed(int rs, u_int offset, int rt)
2183{
2184 assert(offset<4096);
2185 assem_debug("ldreq %s,[%s, #%d]\n",regname[rt],regname[rs],offset);
2186 output_w32(0x05900000|rd_rn_rm(rt,rs,0)|offset);
2187}
2188
2189void emit_flds(int r,int sr)
2190{
2191 assem_debug("flds s%d,[%s]\n",sr,regname[r]);
2192 output_w32(0xed900a00|((sr&14)<<11)|((sr&1)<<22)|(r<<16));
2193}
2194
2195void emit_vldr(int r,int vr)
2196{
2197 assem_debug("vldr d%d,[%s]\n",vr,regname[r]);
2198 output_w32(0xed900b00|(vr<<12)|(r<<16));
2199}
2200
2201void emit_fsts(int sr,int r)
2202{
2203 assem_debug("fsts s%d,[%s]\n",sr,regname[r]);
2204 output_w32(0xed800a00|((sr&14)<<11)|((sr&1)<<22)|(r<<16));
2205}
2206
2207void emit_vstr(int vr,int r)
2208{
2209 assem_debug("vstr d%d,[%s]\n",vr,regname[r]);
2210 output_w32(0xed800b00|(vr<<12)|(r<<16));
2211}
2212
2213void emit_ftosizs(int s,int d)
2214{
2215 assem_debug("ftosizs s%d,s%d\n",d,s);
2216 output_w32(0xeebd0ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2217}
2218
2219void emit_ftosizd(int s,int d)
2220{
2221 assem_debug("ftosizd s%d,d%d\n",d,s);
2222 output_w32(0xeebd0bc0|((d&14)<<11)|((d&1)<<22)|(s&7));
2223}
2224
2225void emit_fsitos(int s,int d)
2226{
2227 assem_debug("fsitos s%d,s%d\n",d,s);
2228 output_w32(0xeeb80ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2229}
2230
2231void emit_fsitod(int s,int d)
2232{
2233 assem_debug("fsitod d%d,s%d\n",d,s);
2234 output_w32(0xeeb80bc0|((d&7)<<12)|((s&14)>>1)|((s&1)<<5));
2235}
2236
2237void emit_fcvtds(int s,int d)
2238{
2239 assem_debug("fcvtds d%d,s%d\n",d,s);
2240 output_w32(0xeeb70ac0|((d&7)<<12)|((s&14)>>1)|((s&1)<<5));
2241}
2242
2243void emit_fcvtsd(int s,int d)
2244{
2245 assem_debug("fcvtsd s%d,d%d\n",d,s);
2246 output_w32(0xeeb70bc0|((d&14)<<11)|((d&1)<<22)|(s&7));
2247}
2248
2249void emit_fsqrts(int s,int d)
2250{
2251 assem_debug("fsqrts d%d,s%d\n",d,s);
2252 output_w32(0xeeb10ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2253}
2254
2255void emit_fsqrtd(int s,int d)
2256{
2257 assem_debug("fsqrtd s%d,d%d\n",d,s);
2258 output_w32(0xeeb10bc0|((d&7)<<12)|(s&7));
2259}
2260
2261void emit_fabss(int s,int d)
2262{
2263 assem_debug("fabss d%d,s%d\n",d,s);
2264 output_w32(0xeeb00ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2265}
2266
2267void emit_fabsd(int s,int d)
2268{
2269 assem_debug("fabsd s%d,d%d\n",d,s);
2270 output_w32(0xeeb00bc0|((d&7)<<12)|(s&7));
2271}
2272
2273void emit_fnegs(int s,int d)
2274{
2275 assem_debug("fnegs d%d,s%d\n",d,s);
2276 output_w32(0xeeb10a40|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2277}
2278
2279void emit_fnegd(int s,int d)
2280{
2281 assem_debug("fnegd s%d,d%d\n",d,s);
2282 output_w32(0xeeb10b40|((d&7)<<12)|(s&7));
2283}
2284
2285void emit_fadds(int s1,int s2,int d)
2286{
2287 assem_debug("fadds s%d,s%d,s%d\n",d,s1,s2);
2288 output_w32(0xee300a00|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2289}
2290
2291void emit_faddd(int s1,int s2,int d)
2292{
2293 assem_debug("faddd d%d,d%d,d%d\n",d,s1,s2);
2294 output_w32(0xee300b00|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2295}
2296
2297void emit_fsubs(int s1,int s2,int d)
2298{
2299 assem_debug("fsubs s%d,s%d,s%d\n",d,s1,s2);
2300 output_w32(0xee300a40|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2301}
2302
2303void emit_fsubd(int s1,int s2,int d)
2304{
2305 assem_debug("fsubd d%d,d%d,d%d\n",d,s1,s2);
2306 output_w32(0xee300b40|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2307}
2308
2309void emit_fmuls(int s1,int s2,int d)
2310{
2311 assem_debug("fmuls s%d,s%d,s%d\n",d,s1,s2);
2312 output_w32(0xee200a00|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2313}
2314
2315void emit_fmuld(int s1,int s2,int d)
2316{
2317 assem_debug("fmuld d%d,d%d,d%d\n",d,s1,s2);
2318 output_w32(0xee200b00|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2319}
2320
2321void emit_fdivs(int s1,int s2,int d)
2322{
2323 assem_debug("fdivs s%d,s%d,s%d\n",d,s1,s2);
2324 output_w32(0xee800a00|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2325}
2326
2327void emit_fdivd(int s1,int s2,int d)
2328{
2329 assem_debug("fdivd d%d,d%d,d%d\n",d,s1,s2);
2330 output_w32(0xee800b00|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2331}
2332
2333void emit_fcmps(int x,int y)
2334{
2335 assem_debug("fcmps s14, s15\n");
2336 output_w32(0xeeb47a67);
2337}
2338
2339void emit_fcmpd(int x,int y)
2340{
2341 assem_debug("fcmpd d6, d7\n");
2342 output_w32(0xeeb46b47);
2343}
2344
2345void emit_fmstat()
2346{
2347 assem_debug("fmstat\n");
2348 output_w32(0xeef1fa10);
2349}
2350
2351void emit_bicne_imm(int rs,int imm,int rt)
2352{
2353 u_int armval;
2354 assert(genimm(imm,&armval));
2355 assem_debug("bicne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2356 output_w32(0x13c00000|rd_rn_rm(rt,rs,0)|armval);
2357}
2358
2359void emit_biccs_imm(int rs,int imm,int rt)
2360{
2361 u_int armval;
2362 assert(genimm(imm,&armval));
2363 assem_debug("biccs %s,%s,#%d\n",regname[rt],regname[rs],imm);
2364 output_w32(0x23c00000|rd_rn_rm(rt,rs,0)|armval);
2365}
2366
2367void emit_bicvc_imm(int rs,int imm,int rt)
2368{
2369 u_int armval;
2370 assert(genimm(imm,&armval));
2371 assem_debug("bicvc %s,%s,#%d\n",regname[rt],regname[rs],imm);
2372 output_w32(0x73c00000|rd_rn_rm(rt,rs,0)|armval);
2373}
2374
2375void emit_bichi_imm(int rs,int imm,int rt)
2376{
2377 u_int armval;
2378 assert(genimm(imm,&armval));
2379 assem_debug("bichi %s,%s,#%d\n",regname[rt],regname[rs],imm);
2380 output_w32(0x83c00000|rd_rn_rm(rt,rs,0)|armval);
2381}
2382
2383void emit_orrvs_imm(int rs,int imm,int rt)
2384{
2385 u_int armval;
2386 assert(genimm(imm,&armval));
2387 assem_debug("orrvs %s,%s,#%d\n",regname[rt],regname[rs],imm);
2388 output_w32(0x63800000|rd_rn_rm(rt,rs,0)|armval);
2389}
2390
b9b61529 2391void emit_orrne_imm(int rs,int imm,int rt)
2392{
2393 u_int armval;
2394 assert(genimm(imm,&armval));
2395 assem_debug("orrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2396 output_w32(0x13800000|rd_rn_rm(rt,rs,0)|armval);
2397}
2398
2399void emit_andne_imm(int rs,int imm,int rt)
2400{
2401 u_int armval;
2402 assert(genimm(imm,&armval));
2403 assem_debug("andne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2404 output_w32(0x12000000|rd_rn_rm(rt,rs,0)|armval);
2405}
2406
57871462 2407void emit_jno_unlikely(int a)
2408{
2409 //emit_jno(a);
2410 assem_debug("addvc pc,pc,#? (%x)\n",/*a-(int)out-8,*/a);
2411 output_w32(0x72800000|rd_rn_rm(15,15,0));
2412}
2413
2414// Save registers before function call
2415void save_regs(u_int reglist)
2416{
2417 reglist&=0x100f; // only save the caller-save registers, r0-r3, r12
2418 if(!reglist) return;
2419 assem_debug("stmia fp,{");
2420 if(reglist&1) assem_debug("r0, ");
2421 if(reglist&2) assem_debug("r1, ");
2422 if(reglist&4) assem_debug("r2, ");
2423 if(reglist&8) assem_debug("r3, ");
2424 if(reglist&0x1000) assem_debug("r12");
2425 assem_debug("}\n");
2426 output_w32(0xe88b0000|reglist);
2427}
2428// Restore registers after function call
2429void restore_regs(u_int reglist)
2430{
2431 reglist&=0x100f; // only restore the caller-save registers, r0-r3, r12
2432 if(!reglist) return;
2433 assem_debug("ldmia fp,{");
2434 if(reglist&1) assem_debug("r0, ");
2435 if(reglist&2) assem_debug("r1, ");
2436 if(reglist&4) assem_debug("r2, ");
2437 if(reglist&8) assem_debug("r3, ");
2438 if(reglist&0x1000) assem_debug("r12");
2439 assem_debug("}\n");
2440 output_w32(0xe89b0000|reglist);
2441}
2442
2443// Write back consts using r14 so we don't disturb the other registers
2444void wb_consts(signed char i_regmap[],uint64_t i_is32,u_int i_dirty,int i)
2445{
2446 int hr;
2447 for(hr=0;hr<HOST_REGS;hr++) {
2448 if(hr!=EXCLUDE_REG&&i_regmap[hr]>=0&&((i_dirty>>hr)&1)) {
2449 if(((regs[i].isconst>>hr)&1)&&i_regmap[hr]>0) {
2450 if(i_regmap[hr]<64 || !((i_is32>>(i_regmap[hr]&63))&1) ) {
2451 int value=constmap[i][hr];
2452 if(value==0) {
2453 emit_zeroreg(HOST_TEMPREG);
2454 }
2455 else {
2456 emit_movimm(value,HOST_TEMPREG);
2457 }
2458 emit_storereg(i_regmap[hr],HOST_TEMPREG);
24385cae 2459#ifndef FORCE32
57871462 2460 if((i_is32>>i_regmap[hr])&1) {
2461 if(value!=-1&&value!=0) emit_sarimm(HOST_TEMPREG,31,HOST_TEMPREG);
2462 emit_storereg(i_regmap[hr]|64,HOST_TEMPREG);
2463 }
24385cae 2464#endif
57871462 2465 }
2466 }
2467 }
2468 }
2469}
2470
2471/* Stubs/epilogue */
2472
2473void literal_pool(int n)
2474{
2475 if(!literalcount) return;
2476 if(n) {
2477 if((int)out-literals[0][0]<4096-n) return;
2478 }
2479 u_int *ptr;
2480 int i;
2481 for(i=0;i<literalcount;i++)
2482 {
2483 ptr=(u_int *)literals[i][0];
2484 u_int offset=(u_int)out-(u_int)ptr-8;
2485 assert(offset<4096);
2486 assert(!(offset&3));
2487 *ptr|=offset;
2488 output_w32(literals[i][1]);
2489 }
2490 literalcount=0;
2491}
2492
2493void literal_pool_jumpover(int n)
2494{
2495 if(!literalcount) return;
2496 if(n) {
2497 if((int)out-literals[0][0]<4096-n) return;
2498 }
2499 int jaddr=(int)out;
2500 emit_jmp(0);
2501 literal_pool(0);
2502 set_jump_target(jaddr,(int)out);
2503}
2504
2505emit_extjump2(int addr, int target, int linker)
2506{
2507 u_char *ptr=(u_char *)addr;
2508 assert((ptr[3]&0x0e)==0xa);
2509 emit_loadlp(target,0);
2510 emit_loadlp(addr,1);
24385cae 2511 assert(addr>=BASE_ADDR&&addr<(BASE_ADDR+(1<<TARGET_SIZE_2)));
57871462 2512 //assert((target>=0x80000000&&target<0x80800000)||(target>0xA4000000&&target<0xA4001000));
2513//DEBUG >
2514#ifdef DEBUG_CYCLE_COUNT
2515 emit_readword((int)&last_count,ECX);
2516 emit_add(HOST_CCREG,ECX,HOST_CCREG);
2517 emit_readword((int)&next_interupt,ECX);
2518 emit_writeword(HOST_CCREG,(int)&Count);
2519 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
2520 emit_writeword(ECX,(int)&last_count);
2521#endif
2522//DEBUG <
2523 emit_jmp(linker);
2524}
2525
2526emit_extjump(int addr, int target)
2527{
2528 emit_extjump2(addr, target, (int)dyna_linker);
2529}
2530emit_extjump_ds(int addr, int target)
2531{
2532 emit_extjump2(addr, target, (int)dyna_linker_ds);
2533}
2534
2535do_readstub(int n)
2536{
2537 assem_debug("do_readstub %x\n",start+stubs[n][3]*4);
2538 literal_pool(256);
2539 set_jump_target(stubs[n][1],(int)out);
2540 int type=stubs[n][0];
2541 int i=stubs[n][3];
2542 int rs=stubs[n][4];
2543 struct regstat *i_regs=(struct regstat *)stubs[n][5];
2544 u_int reglist=stubs[n][7];
2545 signed char *i_regmap=i_regs->regmap;
2546 int addr=get_reg(i_regmap,AGEN1+(i&1));
2547 int rth,rt;
2548 int ds;
b9b61529 2549 if(itype[i]==C1LS||itype[i]==C2LS||itype[i]==LOADLR) {
57871462 2550 rth=get_reg(i_regmap,FTEMP|64);
2551 rt=get_reg(i_regmap,FTEMP);
2552 }else{
2553 rth=get_reg(i_regmap,rt1[i]|64);
2554 rt=get_reg(i_regmap,rt1[i]);
2555 }
5bf843dc 2556#ifdef PCSX
2557 if(rt<0)
2558 // assume forced dummy read
2559 rt=get_reg(i_regmap,-1);
2560#endif
57871462 2561 assert(rs>=0);
2562 assert(rt>=0);
2563 if(addr<0) addr=rt;
2564 assert(addr>=0);
2565 int ftable=0;
2566 if(type==LOADB_STUB||type==LOADBU_STUB)
2567 ftable=(int)readmemb;
2568 if(type==LOADH_STUB||type==LOADHU_STUB)
2569 ftable=(int)readmemh;
2570 if(type==LOADW_STUB)
2571 ftable=(int)readmem;
24385cae 2572#ifndef FORCE32
57871462 2573 if(type==LOADD_STUB)
2574 ftable=(int)readmemd;
24385cae 2575#endif
2576 assert(ftable!=0);
57871462 2577 emit_writeword(rs,(int)&address);
2578 //emit_pusha();
2579 save_regs(reglist);
2580 ds=i_regs!=&regs[i];
2581 int real_rs=(itype[i]==LOADLR)?-1:get_reg(i_regmap,rs1[i]);
2582 u_int cmask=ds?-1:(0x100f|~i_regs->wasconst);
2583 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&0x100f,i);
2584 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty&cmask&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs)));
2585 if(!ds) wb_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&~0x100f,i);
2586 emit_shrimm(rs,16,1);
2587 int cc=get_reg(i_regmap,CCREG);
2588 if(cc<0) {
2589 emit_loadreg(CCREG,2);
2590 }
2591 emit_movimm(ftable,0);
2592 emit_addimm(cc<0?2:cc,2*stubs[n][6]+2,2);
2593 emit_movimm(start+stubs[n][3]*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
2594 //emit_readword((int)&last_count,temp);
2595 //emit_add(cc,temp,cc);
2596 //emit_writeword(cc,(int)&Count);
2597 //emit_mov(15,14);
2598 emit_call((int)&indirect_jump_indexed);
2599 //emit_callreg(rs);
2600 //emit_readword_dualindexedx4(rs,HOST_TEMPREG,15);
2601 // We really shouldn't need to update the count here,
2602 // but not doing so causes random crashes...
2603 emit_readword((int)&Count,HOST_TEMPREG);
2604 emit_readword((int)&next_interupt,2);
2605 emit_addimm(HOST_TEMPREG,-2*stubs[n][6]-2,HOST_TEMPREG);
2606 emit_writeword(2,(int)&last_count);
2607 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2608 if(cc<0) {
2609 emit_storereg(CCREG,HOST_TEMPREG);
2610 }
2611 //emit_popa();
2612 restore_regs(reglist);
2613 //if((cc=get_reg(regmap,CCREG))>=0) {
2614 // emit_loadreg(CCREG,cc);
2615 //}
2616 if(type==LOADB_STUB)
2617 emit_movsbl((int)&readmem_dword,rt);
2618 if(type==LOADBU_STUB)
2619 emit_movzbl((int)&readmem_dword,rt);
2620 if(type==LOADH_STUB)
2621 emit_movswl((int)&readmem_dword,rt);
2622 if(type==LOADHU_STUB)
2623 emit_movzwl((int)&readmem_dword,rt);
2624 if(type==LOADW_STUB)
2625 emit_readword((int)&readmem_dword,rt);
2626 if(type==LOADD_STUB) {
2627 emit_readword((int)&readmem_dword,rt);
2628 if(rth>=0) emit_readword(((int)&readmem_dword)+4,rth);
2629 }
2630 emit_jmp(stubs[n][2]); // return address
2631}
2632
2633inline_readstub(int type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
2634{
2635 int rs=get_reg(regmap,target);
2636 int rth=get_reg(regmap,target|64);
2637 int rt=get_reg(regmap,target);
2638 assert(rs>=0);
2639 assert(rt>=0);
2640 int ftable=0;
2641 if(type==LOADB_STUB||type==LOADBU_STUB)
2642 ftable=(int)readmemb;
2643 if(type==LOADH_STUB||type==LOADHU_STUB)
2644 ftable=(int)readmemh;
2645 if(type==LOADW_STUB)
2646 ftable=(int)readmem;
24385cae 2647#ifndef FORCE32
57871462 2648 if(type==LOADD_STUB)
2649 ftable=(int)readmemd;
24385cae 2650#endif
2651 assert(ftable!=0);
57871462 2652 emit_writeword(rs,(int)&address);
2653 //emit_pusha();
2654 save_regs(reglist);
2655 //emit_shrimm(rs,16,1);
2656 int cc=get_reg(regmap,CCREG);
2657 if(cc<0) {
2658 emit_loadreg(CCREG,2);
2659 }
2660 //emit_movimm(ftable,0);
2661 emit_movimm(((u_int *)ftable)[addr>>16],0);
2662 //emit_readword((int)&last_count,12);
2663 emit_addimm(cc<0?2:cc,CLOCK_DIVIDER*(adj+1),2);
2664 if((signed int)addr>=(signed int)0xC0000000) {
2665 // Pagefault address
2666 int ds=regmap!=regs[i].regmap;
2667 emit_movimm(start+i*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
2668 }
2669 //emit_add(12,2,2);
2670 //emit_writeword(2,(int)&Count);
2671 //emit_call(((u_int *)ftable)[addr>>16]);
2672 emit_call((int)&indirect_jump);
2673 // We really shouldn't need to update the count here,
2674 // but not doing so causes random crashes...
2675 emit_readword((int)&Count,HOST_TEMPREG);
2676 emit_readword((int)&next_interupt,2);
2677 emit_addimm(HOST_TEMPREG,-CLOCK_DIVIDER*(adj+1),HOST_TEMPREG);
2678 emit_writeword(2,(int)&last_count);
2679 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2680 if(cc<0) {
2681 emit_storereg(CCREG,HOST_TEMPREG);
2682 }
2683 //emit_popa();
2684 restore_regs(reglist);
2685 if(type==LOADB_STUB)
2686 emit_movsbl((int)&readmem_dword,rt);
2687 if(type==LOADBU_STUB)
2688 emit_movzbl((int)&readmem_dword,rt);
2689 if(type==LOADH_STUB)
2690 emit_movswl((int)&readmem_dword,rt);
2691 if(type==LOADHU_STUB)
2692 emit_movzwl((int)&readmem_dword,rt);
2693 if(type==LOADW_STUB)
2694 emit_readword((int)&readmem_dword,rt);
2695 if(type==LOADD_STUB) {
2696 emit_readword((int)&readmem_dword,rt);
2697 if(rth>=0) emit_readword(((int)&readmem_dword)+4,rth);
2698 }
2699}
2700
2701do_writestub(int n)
2702{
2703 assem_debug("do_writestub %x\n",start+stubs[n][3]*4);
2704 literal_pool(256);
2705 set_jump_target(stubs[n][1],(int)out);
2706 int type=stubs[n][0];
2707 int i=stubs[n][3];
2708 int rs=stubs[n][4];
2709 struct regstat *i_regs=(struct regstat *)stubs[n][5];
2710 u_int reglist=stubs[n][7];
2711 signed char *i_regmap=i_regs->regmap;
2712 int addr=get_reg(i_regmap,AGEN1+(i&1));
2713 int rth,rt,r;
2714 int ds;
b9b61529 2715 if(itype[i]==C1LS||itype[i]==C2LS) {
57871462 2716 rth=get_reg(i_regmap,FTEMP|64);
2717 rt=get_reg(i_regmap,r=FTEMP);
2718 }else{
2719 rth=get_reg(i_regmap,rs2[i]|64);
2720 rt=get_reg(i_regmap,r=rs2[i]);
2721 }
2722 assert(rs>=0);
2723 assert(rt>=0);
2724 if(addr<0) addr=get_reg(i_regmap,-1);
2725 assert(addr>=0);
2726 int ftable=0;
2727 if(type==STOREB_STUB)
2728 ftable=(int)writememb;
2729 if(type==STOREH_STUB)
2730 ftable=(int)writememh;
2731 if(type==STOREW_STUB)
2732 ftable=(int)writemem;
24385cae 2733#ifndef FORCE32
57871462 2734 if(type==STORED_STUB)
2735 ftable=(int)writememd;
24385cae 2736#endif
2737 assert(ftable!=0);
57871462 2738 emit_writeword(rs,(int)&address);
2739 //emit_shrimm(rs,16,rs);
2740 //emit_movmem_indexedx4(ftable,rs,rs);
2741 if(type==STOREB_STUB)
2742 emit_writebyte(rt,(int)&byte);
2743 if(type==STOREH_STUB)
2744 emit_writehword(rt,(int)&hword);
2745 if(type==STOREW_STUB)
2746 emit_writeword(rt,(int)&word);
2747 if(type==STORED_STUB) {
3d624f89 2748#ifndef FORCE32
57871462 2749 emit_writeword(rt,(int)&dword);
2750 emit_writeword(r?rth:rt,(int)&dword+4);
3d624f89 2751#else
2752 printf("STORED_STUB\n");
2753#endif
57871462 2754 }
2755 //emit_pusha();
2756 save_regs(reglist);
2757 ds=i_regs!=&regs[i];
2758 int real_rs=get_reg(i_regmap,rs1[i]);
2759 u_int cmask=ds?-1:(0x100f|~i_regs->wasconst);
2760 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&0x100f,i);
2761 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty&cmask&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs)));
2762 if(!ds) wb_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&~0x100f,i);
2763 emit_shrimm(rs,16,1);
2764 int cc=get_reg(i_regmap,CCREG);
2765 if(cc<0) {
2766 emit_loadreg(CCREG,2);
2767 }
2768 emit_movimm(ftable,0);
2769 emit_addimm(cc<0?2:cc,2*stubs[n][6]+2,2);
2770 emit_movimm(start+stubs[n][3]*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
2771 //emit_readword((int)&last_count,temp);
2772 //emit_addimm(cc,2*stubs[n][5]+2,cc);
2773 //emit_add(cc,temp,cc);
2774 //emit_writeword(cc,(int)&Count);
2775 emit_call((int)&indirect_jump_indexed);
2776 //emit_callreg(rs);
2777 emit_readword((int)&Count,HOST_TEMPREG);
2778 emit_readword((int)&next_interupt,2);
2779 emit_addimm(HOST_TEMPREG,-2*stubs[n][6]-2,HOST_TEMPREG);
2780 emit_writeword(2,(int)&last_count);
2781 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2782 if(cc<0) {
2783 emit_storereg(CCREG,HOST_TEMPREG);
2784 }
2785 //emit_popa();
2786 restore_regs(reglist);
2787 //if((cc=get_reg(regmap,CCREG))>=0) {
2788 // emit_loadreg(CCREG,cc);
2789 //}
2790 emit_jmp(stubs[n][2]); // return address
2791}
2792
2793inline_writestub(int type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
2794{
2795 int rs=get_reg(regmap,-1);
2796 int rth=get_reg(regmap,target|64);
2797 int rt=get_reg(regmap,target);
2798 assert(rs>=0);
2799 assert(rt>=0);
2800 int ftable=0;
2801 if(type==STOREB_STUB)
2802 ftable=(int)writememb;
2803 if(type==STOREH_STUB)
2804 ftable=(int)writememh;
2805 if(type==STOREW_STUB)
2806 ftable=(int)writemem;
24385cae 2807#ifndef FORCE32
57871462 2808 if(type==STORED_STUB)
2809 ftable=(int)writememd;
24385cae 2810#endif
2811 assert(ftable!=0);
57871462 2812 emit_writeword(rs,(int)&address);
2813 //emit_shrimm(rs,16,rs);
2814 //emit_movmem_indexedx4(ftable,rs,rs);
2815 if(type==STOREB_STUB)
2816 emit_writebyte(rt,(int)&byte);
2817 if(type==STOREH_STUB)
2818 emit_writehword(rt,(int)&hword);
2819 if(type==STOREW_STUB)
2820 emit_writeword(rt,(int)&word);
2821 if(type==STORED_STUB) {
3d624f89 2822#ifndef FORCE32
57871462 2823 emit_writeword(rt,(int)&dword);
2824 emit_writeword(target?rth:rt,(int)&dword+4);
3d624f89 2825#else
2826 printf("STORED_STUB\n");
2827#endif
57871462 2828 }
2829 //emit_pusha();
2830 save_regs(reglist);
2831 //emit_shrimm(rs,16,1);
2832 int cc=get_reg(regmap,CCREG);
2833 if(cc<0) {
2834 emit_loadreg(CCREG,2);
2835 }
2836 //emit_movimm(ftable,0);
2837 emit_movimm(((u_int *)ftable)[addr>>16],0);
2838 //emit_readword((int)&last_count,12);
2839 emit_addimm(cc<0?2:cc,CLOCK_DIVIDER*(adj+1),2);
2840 if((signed int)addr>=(signed int)0xC0000000) {
2841 // Pagefault address
2842 int ds=regmap!=regs[i].regmap;
2843 emit_movimm(start+i*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
2844 }
2845 //emit_add(12,2,2);
2846 //emit_writeword(2,(int)&Count);
2847 //emit_call(((u_int *)ftable)[addr>>16]);
2848 emit_call((int)&indirect_jump);
2849 emit_readword((int)&Count,HOST_TEMPREG);
2850 emit_readword((int)&next_interupt,2);
2851 emit_addimm(HOST_TEMPREG,-CLOCK_DIVIDER*(adj+1),HOST_TEMPREG);
2852 emit_writeword(2,(int)&last_count);
2853 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2854 if(cc<0) {
2855 emit_storereg(CCREG,HOST_TEMPREG);
2856 }
2857 //emit_popa();
2858 restore_regs(reglist);
2859}
2860
2861do_unalignedwritestub(int n)
2862{
2863 set_jump_target(stubs[n][1],(int)out);
2864 output_w32(0xef000000);
2865 emit_jmp(stubs[n][2]); // return address
2866}
2867
2868void printregs(int edi,int esi,int ebp,int esp,int b,int d,int c,int a)
2869{
2870 printf("regs: %x %x %x %x %x %x %x (%x)\n",a,b,c,d,ebp,esi,edi,(&edi)[-1]);
2871}
2872
2873do_invstub(int n)
2874{
2875 literal_pool(20);
2876 u_int reglist=stubs[n][3];
2877 set_jump_target(stubs[n][1],(int)out);
2878 save_regs(reglist);
2879 if(stubs[n][4]!=0) emit_mov(stubs[n][4],0);
2880 emit_call((int)&invalidate_addr);
2881 restore_regs(reglist);
2882 emit_jmp(stubs[n][2]); // return address
2883}
2884
2885int do_dirty_stub(int i)
2886{
2887 assem_debug("do_dirty_stub %x\n",start+i*4);
2888 // Careful about the code output here, verify_dirty needs to parse it.
2889 #ifdef ARMv5_ONLY
2890 emit_loadlp((int)start<(int)0xC0000000?(int)source:(int)start,1);
2891 emit_loadlp((int)copy,2);
2892 emit_loadlp(slen*4,3);
2893 #else
2894 emit_movw(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0x0000FFFF,1);
2895 emit_movw(((u_int)copy)&0x0000FFFF,2);
2896 emit_movt(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0xFFFF0000,1);
2897 emit_movt(((u_int)copy)&0xFFFF0000,2);
2898 emit_movw(slen*4,3);
2899 #endif
2900 emit_movimm(start+i*4,0);
2901 emit_call((int)start<(int)0xC0000000?(int)&verify_code:(int)&verify_code_vm);
2902 int entry=(int)out;
2903 load_regs_entry(i);
2904 if(entry==(int)out) entry=instr_addr[i];
2905 emit_jmp(instr_addr[i]);
2906 return entry;
2907}
2908
2909void do_dirty_stub_ds()
2910{
2911 // Careful about the code output here, verify_dirty needs to parse it.
2912 #ifdef ARMv5_ONLY
2913 emit_loadlp((int)start<(int)0xC0000000?(int)source:(int)start,1);
2914 emit_loadlp((int)copy,2);
2915 emit_loadlp(slen*4,3);
2916 #else
2917 emit_movw(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0x0000FFFF,1);
2918 emit_movw(((u_int)copy)&0x0000FFFF,2);
2919 emit_movt(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0xFFFF0000,1);
2920 emit_movt(((u_int)copy)&0xFFFF0000,2);
2921 emit_movw(slen*4,3);
2922 #endif
2923 emit_movimm(start+1,0);
2924 emit_call((int)&verify_code_ds);
2925}
2926
2927do_cop1stub(int n)
2928{
2929 literal_pool(256);
2930 assem_debug("do_cop1stub %x\n",start+stubs[n][3]*4);
2931 set_jump_target(stubs[n][1],(int)out);
2932 int i=stubs[n][3];
3d624f89 2933// int rs=stubs[n][4];
57871462 2934 struct regstat *i_regs=(struct regstat *)stubs[n][5];
2935 int ds=stubs[n][6];
2936 if(!ds) {
2937 load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty,i);
2938 //if(i_regs!=&regs[i]) printf("oops: regs[i]=%x i_regs=%x",(int)&regs[i],(int)i_regs);
2939 }
2940 //else {printf("fp exception in delay slot\n");}
2941 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty);
2942 if(regs[i].regmap_entry[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
2943 emit_movimm(start+(i-ds)*4,EAX); // Get PC
2944 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG); // CHECK: is this right? There should probably be an extra cycle...
2945 emit_jmp(ds?(int)fp_exception_ds:(int)fp_exception);
2946}
2947
2948/* TLB */
2949
2950int do_tlb_r(int s,int ar,int map,int x,int a,int shift,int c,u_int addr)
2951{
2952 if(c) {
2953 if((signed int)addr>=(signed int)0xC0000000) {
2954 // address_generation already loaded the const
2955 emit_readword_dualindexedx4(FP,map,map);
2956 }
2957 else
2958 return -1; // No mapping
2959 }
2960 else {
2961 assert(s!=map);
2962 emit_movimm(((int)memory_map-(int)&dynarec_local)>>2,map);
2963 emit_addsr12(map,s,map);
2964 // Schedule this while we wait on the load
2965 //if(x) emit_xorimm(s,x,ar);
2966 if(shift>=0) emit_shlimm(s,3,shift);
2967 if(~a) emit_andimm(s,a,ar);
2968 emit_readword_dualindexedx4(FP,map,map);
2969 }
2970 return map;
2971}
2972int do_tlb_r_branch(int map, int c, u_int addr, int *jaddr)
2973{
2974 if(!c||(signed int)addr>=(signed int)0xC0000000) {
2975 emit_test(map,map);
2976 *jaddr=(int)out;
2977 emit_js(0);
2978 }
2979 return map;
2980}
2981
2982int gen_tlb_addr_r(int ar, int map) {
2983 if(map>=0) {
2984 assem_debug("add %s,%s,%s lsl #2\n",regname[ar],regname[ar],regname[map]);
2985 output_w32(0xe0800100|rd_rn_rm(ar,ar,map));
2986 }
2987}
2988
2989int do_tlb_w(int s,int ar,int map,int x,int c,u_int addr)
2990{
2991 if(c) {
2992 if(addr<0x80800000||addr>=0xC0000000) {
2993 // address_generation already loaded the const
2994 emit_readword_dualindexedx4(FP,map,map);
2995 }
2996 else
2997 return -1; // No mapping
2998 }
2999 else {
3000 assert(s!=map);
3001 emit_movimm(((int)memory_map-(int)&dynarec_local)>>2,map);
3002 emit_addsr12(map,s,map);
3003 // Schedule this while we wait on the load
3004 //if(x) emit_xorimm(s,x,ar);
3005 emit_readword_dualindexedx4(FP,map,map);
3006 }
3007 return map;
3008}
3009int do_tlb_w_branch(int map, int c, u_int addr, int *jaddr)
3010{
3011 if(!c||addr<0x80800000||addr>=0xC0000000) {
3012 emit_testimm(map,0x40000000);
3013 *jaddr=(int)out;
3014 emit_jne(0);
3015 }
3016}
3017
3018int gen_tlb_addr_w(int ar, int map) {
3019 if(map>=0) {
3020 assem_debug("add %s,%s,%s lsl #2\n",regname[ar],regname[ar],regname[map]);
3021 output_w32(0xe0800100|rd_rn_rm(ar,ar,map));
3022 }
3023}
3024
3025// Generate the address of the memory_map entry, relative to dynarec_local
3026generate_map_const(u_int addr,int reg) {
3027 //printf("generate_map_const(%x,%s)\n",addr,regname[reg]);
3028 emit_movimm((addr>>12)+(((u_int)memory_map-(u_int)&dynarec_local)>>2),reg);
3029}
3030
3031/* Special assem */
3032
3033void shift_assemble_arm(int i,struct regstat *i_regs)
3034{
3035 if(rt1[i]) {
3036 if(opcode2[i]<=0x07) // SLLV/SRLV/SRAV
3037 {
3038 signed char s,t,shift;
3039 t=get_reg(i_regs->regmap,rt1[i]);
3040 s=get_reg(i_regs->regmap,rs1[i]);
3041 shift=get_reg(i_regs->regmap,rs2[i]);
3042 if(t>=0){
3043 if(rs1[i]==0)
3044 {
3045 emit_zeroreg(t);
3046 }
3047 else if(rs2[i]==0)
3048 {
3049 assert(s>=0);
3050 if(s!=t) emit_mov(s,t);
3051 }
3052 else
3053 {
3054 emit_andimm(shift,31,HOST_TEMPREG);
3055 if(opcode2[i]==4) // SLLV
3056 {
3057 emit_shl(s,HOST_TEMPREG,t);
3058 }
3059 if(opcode2[i]==6) // SRLV
3060 {
3061 emit_shr(s,HOST_TEMPREG,t);
3062 }
3063 if(opcode2[i]==7) // SRAV
3064 {
3065 emit_sar(s,HOST_TEMPREG,t);
3066 }
3067 }
3068 }
3069 } else { // DSLLV/DSRLV/DSRAV
3070 signed char sh,sl,th,tl,shift;
3071 th=get_reg(i_regs->regmap,rt1[i]|64);
3072 tl=get_reg(i_regs->regmap,rt1[i]);
3073 sh=get_reg(i_regs->regmap,rs1[i]|64);
3074 sl=get_reg(i_regs->regmap,rs1[i]);
3075 shift=get_reg(i_regs->regmap,rs2[i]);
3076 if(tl>=0){
3077 if(rs1[i]==0)
3078 {
3079 emit_zeroreg(tl);
3080 if(th>=0) emit_zeroreg(th);
3081 }
3082 else if(rs2[i]==0)
3083 {
3084 assert(sl>=0);
3085 if(sl!=tl) emit_mov(sl,tl);
3086 if(th>=0&&sh!=th) emit_mov(sh,th);
3087 }
3088 else
3089 {
3090 // FIXME: What if shift==tl ?
3091 assert(shift!=tl);
3092 int temp=get_reg(i_regs->regmap,-1);
3093 int real_th=th;
3094 if(th<0&&opcode2[i]!=0x14) {th=temp;} // DSLLV doesn't need a temporary register
3095 assert(sl>=0);
3096 assert(sh>=0);
3097 emit_andimm(shift,31,HOST_TEMPREG);
3098 if(opcode2[i]==0x14) // DSLLV
3099 {
3100 if(th>=0) emit_shl(sh,HOST_TEMPREG,th);
3101 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3102 emit_orrshr(sl,HOST_TEMPREG,th);
3103 emit_andimm(shift,31,HOST_TEMPREG);
3104 emit_testimm(shift,32);
3105 emit_shl(sl,HOST_TEMPREG,tl);
3106 if(th>=0) emit_cmovne_reg(tl,th);
3107 emit_cmovne_imm(0,tl);
3108 }
3109 if(opcode2[i]==0x16) // DSRLV
3110 {
3111 assert(th>=0);
3112 emit_shr(sl,HOST_TEMPREG,tl);
3113 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3114 emit_orrshl(sh,HOST_TEMPREG,tl);
3115 emit_andimm(shift,31,HOST_TEMPREG);
3116 emit_testimm(shift,32);
3117 emit_shr(sh,HOST_TEMPREG,th);
3118 emit_cmovne_reg(th,tl);
3119 if(real_th>=0) emit_cmovne_imm(0,th);
3120 }
3121 if(opcode2[i]==0x17) // DSRAV
3122 {
3123 assert(th>=0);
3124 emit_shr(sl,HOST_TEMPREG,tl);
3125 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3126 if(real_th>=0) {
3127 assert(temp>=0);
3128 emit_sarimm(th,31,temp);
3129 }
3130 emit_orrshl(sh,HOST_TEMPREG,tl);
3131 emit_andimm(shift,31,HOST_TEMPREG);
3132 emit_testimm(shift,32);
3133 emit_sar(sh,HOST_TEMPREG,th);
3134 emit_cmovne_reg(th,tl);
3135 if(real_th>=0) emit_cmovne_reg(temp,th);
3136 }
3137 }
3138 }
3139 }
3140 }
3141}
3142#define shift_assemble shift_assemble_arm
3143
3144void loadlr_assemble_arm(int i,struct regstat *i_regs)
3145{
3146 int s,th,tl,temp,temp2,addr,map=-1;
3147 int offset;
3148 int jaddr=0;
3149 int memtarget,c=0;
3150 u_int hr,reglist=0;
3151 th=get_reg(i_regs->regmap,rt1[i]|64);
3152 tl=get_reg(i_regs->regmap,rt1[i]);
3153 s=get_reg(i_regs->regmap,rs1[i]);
3154 temp=get_reg(i_regs->regmap,-1);
3155 temp2=get_reg(i_regs->regmap,FTEMP);
3156 addr=get_reg(i_regs->regmap,AGEN1+(i&1));
3157 assert(addr<0);
3158 offset=imm[i];
3159 for(hr=0;hr<HOST_REGS;hr++) {
3160 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
3161 }
3162 reglist|=1<<temp;
3163 if(offset||s<0||c) addr=temp2;
3164 else addr=s;
3165 if(s>=0) {
3166 c=(i_regs->wasconst>>s)&1;
3167 memtarget=((signed int)(constmap[i][s]+offset))<(signed int)0x80800000;
3168 if(using_tlb&&((signed int)(constmap[i][s]+offset))>=(signed int)0xC0000000) memtarget=1;
3169 }
3170 if(tl>=0) {
3171 //assert(tl>=0);
3172 //assert(rt1[i]);
3173 if(!using_tlb) {
3174 if(!c) {
3175 emit_shlimm(addr,3,temp);
3176 if (opcode[i]==0x22||opcode[i]==0x26) {
3177 emit_andimm(addr,0xFFFFFFFC,temp2); // LWL/LWR
3178 }else{
3179 emit_andimm(addr,0xFFFFFFF8,temp2); // LDL/LDR
3180 }
3181 emit_cmpimm(addr,0x800000);
3182 jaddr=(int)out;
3183 emit_jno(0);
3184 }
3185 else {
3186 if (opcode[i]==0x22||opcode[i]==0x26) {
3187 emit_movimm(((constmap[i][s]+offset)<<3)&24,temp); // LWL/LWR
3188 }else{
3189 emit_movimm(((constmap[i][s]+offset)<<3)&56,temp); // LDL/LDR
3190 }
3191 }
3192 }else{ // using tlb
3193 int a;
3194 if(c) {
3195 a=-1;
3196 }else if (opcode[i]==0x22||opcode[i]==0x26) {
3197 a=0xFFFFFFFC; // LWL/LWR
3198 }else{
3199 a=0xFFFFFFF8; // LDL/LDR
3200 }
3201 map=get_reg(i_regs->regmap,TLREG);
3202 assert(map>=0);
3203 map=do_tlb_r(addr,temp2,map,0,a,c?-1:temp,c,constmap[i][s]+offset);
3204 if(c) {
3205 if (opcode[i]==0x22||opcode[i]==0x26) {
3206 emit_movimm(((constmap[i][s]+offset)<<3)&24,temp); // LWL/LWR
3207 }else{
3208 emit_movimm(((constmap[i][s]+offset)<<3)&56,temp); // LDL/LDR
3209 }
3210 }
3211 do_tlb_r_branch(map,c,constmap[i][s]+offset,&jaddr);
3212 }
3213 if (opcode[i]==0x22||opcode[i]==0x26) { // LWL/LWR
3214 if(!c||memtarget) {
3215 //emit_readword_indexed((int)rdram-0x80000000,temp2,temp2);
3216 emit_readword_indexed_tlb((int)rdram-0x80000000,temp2,map,temp2);
3217 if(jaddr) add_stub(LOADW_STUB,jaddr,(int)out,i,temp2,(int)i_regs,ccadj[i],reglist);
3218 }
3219 else
3220 inline_readstub(LOADW_STUB,i,(constmap[i][s]+offset)&0xFFFFFFFC,i_regs->regmap,FTEMP,ccadj[i],reglist);
3221 emit_andimm(temp,24,temp);
2002a1db 3222#ifdef BIG_ENDIAN_MIPS
3223 if (opcode[i]==0x26) // LWR
3224#else
3225 if (opcode[i]==0x22) // LWL
3226#endif
3227 emit_xorimm(temp,24,temp);
57871462 3228 emit_movimm(-1,HOST_TEMPREG);
3229 if (opcode[i]==0x26) {
3230 emit_shr(temp2,temp,temp2);
3231 emit_bic_lsr(tl,HOST_TEMPREG,temp,tl);
3232 }else{
3233 emit_shl(temp2,temp,temp2);
3234 emit_bic_lsl(tl,HOST_TEMPREG,temp,tl);
3235 }
3236 emit_or(temp2,tl,tl);
3237 //emit_storereg(rt1[i],tl); // DEBUG
3238 }
3239 if (opcode[i]==0x1A||opcode[i]==0x1B) { // LDL/LDR
2002a1db 3240 // FIXME: little endian
57871462 3241 int temp2h=get_reg(i_regs->regmap,FTEMP|64);
3242 if(!c||memtarget) {
3243 //if(th>=0) emit_readword_indexed((int)rdram-0x80000000,temp2,temp2h);
3244 //emit_readword_indexed((int)rdram-0x7FFFFFFC,temp2,temp2);
3245 emit_readdword_indexed_tlb((int)rdram-0x80000000,temp2,map,temp2h,temp2);
3246 if(jaddr) add_stub(LOADD_STUB,jaddr,(int)out,i,temp2,(int)i_regs,ccadj[i],reglist);
3247 }
3248 else
3249 inline_readstub(LOADD_STUB,i,(constmap[i][s]+offset)&0xFFFFFFF8,i_regs->regmap,FTEMP,ccadj[i],reglist);
3250 emit_testimm(temp,32);
3251 emit_andimm(temp,24,temp);
3252 if (opcode[i]==0x1A) { // LDL
3253 emit_rsbimm(temp,32,HOST_TEMPREG);
3254 emit_shl(temp2h,temp,temp2h);
3255 emit_orrshr(temp2,HOST_TEMPREG,temp2h);
3256 emit_movimm(-1,HOST_TEMPREG);
3257 emit_shl(temp2,temp,temp2);
3258 emit_cmove_reg(temp2h,th);
3259 emit_biceq_lsl(tl,HOST_TEMPREG,temp,tl);
3260 emit_bicne_lsl(th,HOST_TEMPREG,temp,th);
3261 emit_orreq(temp2,tl,tl);
3262 emit_orrne(temp2,th,th);
3263 }
3264 if (opcode[i]==0x1B) { // LDR
3265 emit_xorimm(temp,24,temp);
3266 emit_rsbimm(temp,32,HOST_TEMPREG);
3267 emit_shr(temp2,temp,temp2);
3268 emit_orrshl(temp2h,HOST_TEMPREG,temp2);
3269 emit_movimm(-1,HOST_TEMPREG);
3270 emit_shr(temp2h,temp,temp2h);
3271 emit_cmovne_reg(temp2,tl);
3272 emit_bicne_lsr(th,HOST_TEMPREG,temp,th);
3273 emit_biceq_lsr(tl,HOST_TEMPREG,temp,tl);
3274 emit_orrne(temp2h,th,th);
3275 emit_orreq(temp2h,tl,tl);
3276 }
3277 }
3278 }
3279}
3280#define loadlr_assemble loadlr_assemble_arm
3281
3282void cop0_assemble(int i,struct regstat *i_regs)
3283{
3284 if(opcode2[i]==0) // MFC0
3285 {
3286 signed char t=get_reg(i_regs->regmap,rt1[i]);
3287 char copr=(source[i]>>11)&0x1f;
3288 //assert(t>=0); // Why does this happen? OOT is weird
3289 if(t>=0) {
7139f3c8 3290#ifdef MUPEN64
57871462 3291 emit_addimm(FP,(int)&fake_pc-(int)&dynarec_local,0);
3292 emit_movimm((source[i]>>11)&0x1f,1);
3293 emit_writeword(0,(int)&PC);
3294 emit_writebyte(1,(int)&(fake_pc.f.r.nrd));
3295 if(copr==9) {
3296 emit_readword((int)&last_count,ECX);
3297 emit_loadreg(CCREG,HOST_CCREG); // TODO: do proper reg alloc
3298 emit_add(HOST_CCREG,ECX,HOST_CCREG);
3299 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG);
3300 emit_writeword(HOST_CCREG,(int)&Count);
3301 }
3302 emit_call((int)MFC0);
3303 emit_readword((int)&readmem_dword,t);
7139f3c8 3304#else
3305 emit_readword((int)&reg_cop0+copr*4,t);
3306#endif
57871462 3307 }
3308 }
3309 else if(opcode2[i]==4) // MTC0
3310 {
3311 signed char s=get_reg(i_regs->regmap,rs1[i]);
3312 char copr=(source[i]>>11)&0x1f;
3313 assert(s>=0);
3314 emit_writeword(s,(int)&readmem_dword);
3315 wb_register(rs1[i],i_regs->regmap,i_regs->dirty,i_regs->is32);
3d624f89 3316#ifdef MUPEN64 /// FIXME
57871462 3317 emit_addimm(FP,(int)&fake_pc-(int)&dynarec_local,0);
3318 emit_movimm((source[i]>>11)&0x1f,1);
3319 emit_writeword(0,(int)&PC);
3320 emit_writebyte(1,(int)&(fake_pc.f.r.nrd));
3d624f89 3321#endif
7139f3c8 3322#ifdef PCSX
3323 emit_movimm(source[i],0);
3324 emit_writeword(0,(int)&psxRegs.code);
3325#endif
3326 if(copr==9||copr==11||copr==12||copr==13) {
57871462 3327 emit_readword((int)&last_count,ECX);
3328 emit_loadreg(CCREG,HOST_CCREG); // TODO: do proper reg alloc
3329 emit_add(HOST_CCREG,ECX,HOST_CCREG);
3330 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG);
3331 emit_writeword(HOST_CCREG,(int)&Count);
3332 }
3333 // What a mess. The status register (12) can enable interrupts,
3334 // so needs a special case to handle a pending interrupt.
3335 // The interrupt must be taken immediately, because a subsequent
3336 // instruction might disable interrupts again.
7139f3c8 3337 if(copr==12||copr==13) {
57871462 3338 emit_movimm(start+i*4+4,0);
3339 emit_movimm(0,1);
3340 emit_writeword(0,(int)&pcaddr);
3341 emit_writeword(1,(int)&pending_exception);
3342 }
3343 //else if(copr==12&&is_delayslot) emit_call((int)MTC0_R12);
3344 //else
3345 emit_call((int)MTC0);
7139f3c8 3346 if(copr==9||copr==11||copr==12||copr==13) {
57871462 3347 emit_readword((int)&Count,HOST_CCREG);
3348 emit_readword((int)&next_interupt,ECX);
3349 emit_addimm(HOST_CCREG,-CLOCK_DIVIDER*ccadj[i],HOST_CCREG);
3350 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
3351 emit_writeword(ECX,(int)&last_count);
3352 emit_storereg(CCREG,HOST_CCREG);
3353 }
7139f3c8 3354 if(copr==12||copr==13) {
57871462 3355 assert(!is_delayslot);
3356 emit_readword((int)&pending_exception,14);
3357 }
3358 emit_loadreg(rs1[i],s);
3359 if(get_reg(i_regs->regmap,rs1[i]|64)>=0)
3360 emit_loadreg(rs1[i]|64,get_reg(i_regs->regmap,rs1[i]|64));
7139f3c8 3361 if(copr==12||copr==13) {
57871462 3362 emit_test(14,14);
3363 emit_jne((int)&do_interrupt);
3364 }
3365 cop1_usable=0;
3366 }
3367 else
3368 {
3369 assert(opcode2[i]==0x10);
3d624f89 3370#ifndef DISABLE_TLB
57871462 3371 if((source[i]&0x3f)==0x01) // TLBR
3372 emit_call((int)TLBR);
3373 if((source[i]&0x3f)==0x02) // TLBWI
3374 emit_call((int)TLBWI_new);
3375 if((source[i]&0x3f)==0x06) { // TLBWR
3376 // The TLB entry written by TLBWR is dependent on the count,
3377 // so update the cycle count
3378 emit_readword((int)&last_count,ECX);
3379 if(i_regs->regmap[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
3380 emit_add(HOST_CCREG,ECX,HOST_CCREG);
3381 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG);
3382 emit_writeword(HOST_CCREG,(int)&Count);
3383 emit_call((int)TLBWR_new);
3384 }
3385 if((source[i]&0x3f)==0x08) // TLBP
3386 emit_call((int)TLBP);
3d624f89 3387#endif
57871462 3388 if((source[i]&0x3f)==0x18) // ERET
3389 {
3390 int count=ccadj[i];
3391 if(i_regs->regmap[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
3392 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*count,HOST_CCREG); // TODO: Should there be an extra cycle here?
3393 emit_jmp((int)jump_eret);
3394 }
3395 }
3396}
3397
b9b61529 3398static void cop2_get_dreg(u_int copr,signed char tl,signed char temp)
3399{
3400 switch (copr) {
3401 case 1:
3402 case 3:
3403 case 5:
3404 case 8:
3405 case 9:
3406 case 10:
3407 case 11:
3408 emit_readword((int)&reg_cop2d[copr],tl);
3409 emit_signextend16(tl,tl);
3410 emit_writeword(tl,(int)&reg_cop2d[copr]); // hmh
3411 break;
3412 case 7:
3413 case 16:
3414 case 17:
3415 case 18:
3416 case 19:
3417 emit_readword((int)&reg_cop2d[copr],tl);
3418 emit_andimm(tl,0xffff,tl);
3419 emit_writeword(tl,(int)&reg_cop2d[copr]);
3420 break;
3421 case 15:
3422 emit_readword((int)&reg_cop2d[14],tl); // SXY2
3423 emit_writeword(tl,(int)&reg_cop2d[copr]);
3424 break;
3425 case 28:
3426 case 30:
3427 emit_movimm(0,tl);
3428 break;
3429 case 29:
3430 emit_readword((int)&reg_cop2d[9],temp);
3431 emit_testimm(temp,0x8000); // do we need this?
3432 emit_andimm(temp,0xf80,temp);
3433 emit_andne_imm(temp,0,temp);
3434 emit_shr(temp,7,tl);
3435 emit_readword((int)&reg_cop2d[10],temp);
3436 emit_testimm(temp,0x8000);
3437 emit_andimm(temp,0xf80,temp);
3438 emit_andne_imm(temp,0,temp);
3439 emit_orrshr(temp,2,tl);
3440 emit_readword((int)&reg_cop2d[11],temp);
3441 emit_testimm(temp,0x8000);
3442 emit_andimm(temp,0xf80,temp);
3443 emit_andne_imm(temp,0,temp);
3444 emit_orrshl(temp,3,tl);
3445 emit_writeword(tl,(int)&reg_cop2d[copr]);
3446 break;
3447 default:
3448 emit_readword((int)&reg_cop2d[copr],tl);
3449 break;
3450 }
3451}
3452
3453static void cop2_put_dreg(u_int copr,signed char sl,signed char temp)
3454{
3455 switch (copr) {
3456 case 15:
3457 emit_readword((int)&reg_cop2d[13],temp); // SXY1
3458 emit_writeword(sl,(int)&reg_cop2d[copr]);
3459 emit_writeword(temp,(int)&reg_cop2d[12]); // SXY0
3460 emit_readword((int)&reg_cop2d[14],temp); // SXY2
3461 emit_writeword(sl,(int)&reg_cop2d[14]);
3462 emit_writeword(temp,(int)&reg_cop2d[13]); // SXY1
3463 break;
3464 case 28:
3465 emit_andimm(sl,0x001f,temp);
3466 emit_shl(temp,7,temp);
3467 emit_writeword(temp,(int)&reg_cop2d[9]);
3468 emit_andimm(sl,0x03e0,temp);
3469 emit_shl(temp,2,temp);
3470 emit_writeword(temp,(int)&reg_cop2d[10]);
3471 emit_andimm(sl,0x7c00,temp);
3472 emit_shr(temp,3,temp);
3473 emit_writeword(temp,(int)&reg_cop2d[11]);
3474 emit_writeword(sl,(int)&reg_cop2d[28]);
3475 break;
3476 case 30:
3477 emit_movs(sl,temp);
3478 emit_mvnmi(temp,temp);
3479 emit_clz(temp,temp);
3480 emit_writeword(sl,(int)&reg_cop2d[30]);
3481 emit_writeword(temp,(int)&reg_cop2d[31]);
3482 break;
3483 case 7:
3484 case 29:
3485 case 31:
3486 break;
3487 default:
3488 emit_writeword(sl,(int)&reg_cop2d[copr]);
3489 break;
3490 }
3491}
3492
3493void cop2_assemble(int i,struct regstat *i_regs)
3494{
3495 u_int copr=(source[i]>>11)&0x1f;
3496 signed char temp=get_reg(i_regs->regmap,-1);
3497 if (opcode2[i]==0) { // MFC2
3498 signed char tl=get_reg(i_regs->regmap,rt1[i]);
3499 if(tl>=0)
3500 cop2_get_dreg(copr,tl,temp);
3501 }
3502 else if (opcode2[i]==4) { // MTC2
3503 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3504 cop2_put_dreg(copr,sl,temp);
3505 }
3506 else if (opcode2[i]==2) // CFC2
3507 {
3508 signed char tl=get_reg(i_regs->regmap,rt1[i]);
3509 if(tl>=0)
3510 emit_readword((int)&reg_cop2c[copr],tl);
3511 }
3512 else if (opcode2[i]==6) // CTC2
3513 {
3514 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3515 switch(copr) {
3516 case 4:
3517 case 12:
3518 case 20:
3519 case 26:
3520 case 27:
3521 case 29:
3522 case 30:
3523 emit_signextend16(sl,temp);
3524 break;
3525 case 31:
3526 //value = value & 0x7ffff000;
3527 //if (value & 0x7f87e000) value |= 0x80000000;
3528 emit_shrimm(sl,12,temp);
3529 emit_shlimm(temp,12,temp);
3530 emit_testimm(temp,0x7f000000);
3531 emit_testeqimm(temp,0x00870000);
3532 emit_testeqimm(temp,0x0000e000);
3533 emit_orrne_imm(temp,0x80000000,temp);
3534 break;
3535 default:
3536 temp=sl;
3537 break;
3538 }
3539 emit_writeword(temp,(int)&reg_cop2c[copr]);
3540 assert(sl>=0);
3541 }
3542}
3543
3544void c2op_assemble(int i,struct regstat *i_regs)
3545{
3546 signed char temp=get_reg(i_regs->regmap,-1);
3547 u_int c2op=source[i]&0x3f;
3548 u_int hr,reglist=0;
3549 for(hr=0;hr<HOST_REGS;hr++) {
3550 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
3551 }
3552 if(i==0||itype[i-1]!=C2OP)
3553 save_regs(reglist);
3554
3555 if (gte_handlers[c2op]!=NULL) {
3556 int cc=get_reg(i_regs->regmap,CCREG);
3557 emit_movimm(source[i],temp); // opcode
3558 if (cc>=0&&gte_cycletab[c2op])
3559 emit_addimm(cc,gte_cycletab[c2op]/2,cc); // XXX: cound just adjust ccadj?
3560 emit_writeword(temp,(int)&psxRegs.code);
3561 emit_call((int)gte_handlers[c2op]);
3562 }
3563
3564 if(i>=slen-1||itype[i+1]!=C2OP)
3565 restore_regs(reglist);
3566}
3567
3568void cop1_unusable(int i,struct regstat *i_regs)
3d624f89 3569{
3570 // XXX: should just just do the exception instead
3571 if(!cop1_usable) {
3572 int jaddr=(int)out;
3573 emit_jmp(0);
3574 add_stub(FP_STUB,jaddr,(int)out,i,0,(int)i_regs,is_delayslot,0);
3575 cop1_usable=1;
3576 }
3577}
3578
57871462 3579void cop1_assemble(int i,struct regstat *i_regs)
3580{
3d624f89 3581#ifndef DISABLE_COP1
57871462 3582 // Check cop1 unusable
3583 if(!cop1_usable) {
3584 signed char rs=get_reg(i_regs->regmap,CSREG);
3585 assert(rs>=0);
3586 emit_testimm(rs,0x20000000);
3587 int jaddr=(int)out;
3588 emit_jeq(0);
3589 add_stub(FP_STUB,jaddr,(int)out,i,rs,(int)i_regs,is_delayslot,0);
3590 cop1_usable=1;
3591 }
3592 if (opcode2[i]==0) { // MFC1
3593 signed char tl=get_reg(i_regs->regmap,rt1[i]);
3594 if(tl>=0) {
3595 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],tl);
3596 emit_readword_indexed(0,tl,tl);
3597 }
3598 }
3599 else if (opcode2[i]==1) { // DMFC1
3600 signed char tl=get_reg(i_regs->regmap,rt1[i]);
3601 signed char th=get_reg(i_regs->regmap,rt1[i]|64);
3602 if(tl>=0) {
3603 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],tl);
3604 if(th>=0) emit_readword_indexed(4,tl,th);
3605 emit_readword_indexed(0,tl,tl);
3606 }
3607 }
3608 else if (opcode2[i]==4) { // MTC1
3609 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3610 signed char temp=get_reg(i_regs->regmap,-1);
3611 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
3612 emit_writeword_indexed(sl,0,temp);
3613 }
3614 else if (opcode2[i]==5) { // DMTC1
3615 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3616 signed char sh=rs1[i]>0?get_reg(i_regs->regmap,rs1[i]|64):sl;
3617 signed char temp=get_reg(i_regs->regmap,-1);
3618 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
3619 emit_writeword_indexed(sh,4,temp);
3620 emit_writeword_indexed(sl,0,temp);
3621 }
3622 else if (opcode2[i]==2) // CFC1
3623 {
3624 signed char tl=get_reg(i_regs->regmap,rt1[i]);
3625 if(tl>=0) {
3626 u_int copr=(source[i]>>11)&0x1f;
3627 if(copr==0) emit_readword((int)&FCR0,tl);
3628 if(copr==31) emit_readword((int)&FCR31,tl);
3629 }
3630 }
3631 else if (opcode2[i]==6) // CTC1
3632 {
3633 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3634 u_int copr=(source[i]>>11)&0x1f;
3635 assert(sl>=0);
3636 if(copr==31)
3637 {
3638 emit_writeword(sl,(int)&FCR31);
3639 // Set the rounding mode
3640 //FIXME
3641 //char temp=get_reg(i_regs->regmap,-1);
3642 //emit_andimm(sl,3,temp);
3643 //emit_fldcw_indexed((int)&rounding_modes,temp);
3644 }
3645 }
3d624f89 3646#else
3647 cop1_unusable(i, i_regs);
3648#endif
57871462 3649}
3650
3651void fconv_assemble_arm(int i,struct regstat *i_regs)
3652{
3d624f89 3653#ifndef DISABLE_COP1
57871462 3654 signed char temp=get_reg(i_regs->regmap,-1);
3655 assert(temp>=0);
3656 // Check cop1 unusable
3657 if(!cop1_usable) {
3658 signed char rs=get_reg(i_regs->regmap,CSREG);
3659 assert(rs>=0);
3660 emit_testimm(rs,0x20000000);
3661 int jaddr=(int)out;
3662 emit_jeq(0);
3663 add_stub(FP_STUB,jaddr,(int)out,i,rs,(int)i_regs,is_delayslot,0);
3664 cop1_usable=1;
3665 }
3666
3667 #if(defined(__VFP_FP__) && !defined(__SOFTFP__))
3668 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0d) { // trunc_w_s
3669 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
3670 emit_flds(temp,15);
3671 emit_ftosizs(15,15); // float->int, truncate
3672 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f))
3673 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
3674 emit_fsts(15,temp);
3675 return;
3676 }
3677 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0d) { // trunc_w_d
3678 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
3679 emit_vldr(temp,7);
3680 emit_ftosizd(7,13); // double->int, truncate
3681 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
3682 emit_fsts(13,temp);
3683 return;
3684 }
3685
3686 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x20) { // cvt_s_w
3687 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
3688 emit_flds(temp,13);
3689 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f))
3690 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
3691 emit_fsitos(13,15);
3692 emit_fsts(15,temp);
3693 return;
3694 }
3695 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x21) { // cvt_d_w
3696 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
3697 emit_flds(temp,13);
3698 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
3699 emit_fsitod(13,7);
3700 emit_vstr(7,temp);
3701 return;
3702 }
3703
3704 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x21) { // cvt_d_s
3705 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
3706 emit_flds(temp,13);
3707 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
3708 emit_fcvtds(13,7);
3709 emit_vstr(7,temp);
3710 return;
3711 }
3712 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x20) { // cvt_s_d
3713 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
3714 emit_vldr(temp,7);
3715 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
3716 emit_fcvtsd(7,13);
3717 emit_fsts(13,temp);
3718 return;
3719 }
3720 #endif
3721
3722 // C emulation code
3723
3724 u_int hr,reglist=0;
3725 for(hr=0;hr<HOST_REGS;hr++) {
3726 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
3727 }
3728 save_regs(reglist);
3729
3730 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x20) {
3731 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3732 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3733 emit_call((int)cvt_s_w);
3734 }
3735 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x21) {
3736 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3737 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3738 emit_call((int)cvt_d_w);
3739 }
3740 if(opcode2[i]==0x15&&(source[i]&0x3f)==0x20) {
3741 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3742 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3743 emit_call((int)cvt_s_l);
3744 }
3745 if(opcode2[i]==0x15&&(source[i]&0x3f)==0x21) {
3746 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3747 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3748 emit_call((int)cvt_d_l);
3749 }
3750
3751 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x21) {
3752 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3753 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3754 emit_call((int)cvt_d_s);
3755 }
3756 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x24) {
3757 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3758 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3759 emit_call((int)cvt_w_s);
3760 }
3761 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x25) {
3762 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3763 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3764 emit_call((int)cvt_l_s);
3765 }
3766
3767 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x20) {
3768 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3769 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3770 emit_call((int)cvt_s_d);
3771 }
3772 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x24) {
3773 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3774 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3775 emit_call((int)cvt_w_d);
3776 }
3777 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x25) {
3778 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3779 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3780 emit_call((int)cvt_l_d);
3781 }
3782
3783 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x08) {
3784 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3785 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3786 emit_call((int)round_l_s);
3787 }
3788 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x09) {
3789 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3790 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3791 emit_call((int)trunc_l_s);
3792 }
3793 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0a) {
3794 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3795 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3796 emit_call((int)ceil_l_s);
3797 }
3798 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0b) {
3799 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3800 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3801 emit_call((int)floor_l_s);
3802 }
3803 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0c) {
3804 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3805 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3806 emit_call((int)round_w_s);
3807 }
3808 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0d) {
3809 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3810 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3811 emit_call((int)trunc_w_s);
3812 }
3813 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0e) {
3814 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3815 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3816 emit_call((int)ceil_w_s);
3817 }
3818 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0f) {
3819 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3820 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3821 emit_call((int)floor_w_s);
3822 }
3823
3824 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x08) {
3825 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3826 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3827 emit_call((int)round_l_d);
3828 }
3829 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x09) {
3830 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3831 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3832 emit_call((int)trunc_l_d);
3833 }
3834 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0a) {
3835 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3836 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3837 emit_call((int)ceil_l_d);
3838 }
3839 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0b) {
3840 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3841 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3842 emit_call((int)floor_l_d);
3843 }
3844 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0c) {
3845 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3846 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3847 emit_call((int)round_w_d);
3848 }
3849 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0d) {
3850 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3851 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3852 emit_call((int)trunc_w_d);
3853 }
3854 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0e) {
3855 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3856 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3857 emit_call((int)ceil_w_d);
3858 }
3859 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0f) {
3860 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3861 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3862 emit_call((int)floor_w_d);
3863 }
3864
3865 restore_regs(reglist);
3d624f89 3866#else
3867 cop1_unusable(i, i_regs);
3868#endif
57871462 3869}
3870#define fconv_assemble fconv_assemble_arm
3871
3872void fcomp_assemble(int i,struct regstat *i_regs)
3873{
3d624f89 3874#ifndef DISABLE_COP1
57871462 3875 signed char fs=get_reg(i_regs->regmap,FSREG);
3876 signed char temp=get_reg(i_regs->regmap,-1);
3877 assert(temp>=0);
3878 // Check cop1 unusable
3879 if(!cop1_usable) {
3880 signed char cs=get_reg(i_regs->regmap,CSREG);
3881 assert(cs>=0);
3882 emit_testimm(cs,0x20000000);
3883 int jaddr=(int)out;
3884 emit_jeq(0);
3885 add_stub(FP_STUB,jaddr,(int)out,i,cs,(int)i_regs,is_delayslot,0);
3886 cop1_usable=1;
3887 }
3888
3889 if((source[i]&0x3f)==0x30) {
3890 emit_andimm(fs,~0x800000,fs);
3891 return;
3892 }
3893
3894 if((source[i]&0x3e)==0x38) {
3895 // sf/ngle - these should throw exceptions for NaNs
3896 emit_andimm(fs,~0x800000,fs);
3897 return;
3898 }
3899
3900 #if(defined(__VFP_FP__) && !defined(__SOFTFP__))
3901 if(opcode2[i]==0x10) {
3902 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
3903 emit_readword((int)&reg_cop1_simple[(source[i]>>16)&0x1f],HOST_TEMPREG);
3904 emit_orimm(fs,0x800000,fs);
3905 emit_flds(temp,14);
3906 emit_flds(HOST_TEMPREG,15);
3907 emit_fcmps(14,15);
3908 emit_fmstat();
3909 if((source[i]&0x3f)==0x31) emit_bicvc_imm(fs,0x800000,fs); // c_un_s
3910 if((source[i]&0x3f)==0x32) emit_bicne_imm(fs,0x800000,fs); // c_eq_s
3911 if((source[i]&0x3f)==0x33) {emit_bicne_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ueq_s
3912 if((source[i]&0x3f)==0x34) emit_biccs_imm(fs,0x800000,fs); // c_olt_s
3913 if((source[i]&0x3f)==0x35) {emit_biccs_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ult_s
3914 if((source[i]&0x3f)==0x36) emit_bichi_imm(fs,0x800000,fs); // c_ole_s
3915 if((source[i]&0x3f)==0x37) {emit_bichi_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ule_s
3916 if((source[i]&0x3f)==0x3a) emit_bicne_imm(fs,0x800000,fs); // c_seq_s
3917 if((source[i]&0x3f)==0x3b) emit_bicne_imm(fs,0x800000,fs); // c_ngl_s
3918 if((source[i]&0x3f)==0x3c) emit_biccs_imm(fs,0x800000,fs); // c_lt_s
3919 if((source[i]&0x3f)==0x3d) emit_biccs_imm(fs,0x800000,fs); // c_nge_s
3920 if((source[i]&0x3f)==0x3e) emit_bichi_imm(fs,0x800000,fs); // c_le_s
3921 if((source[i]&0x3f)==0x3f) emit_bichi_imm(fs,0x800000,fs); // c_ngt_s
3922 return;
3923 }
3924 if(opcode2[i]==0x11) {
3925 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
3926 emit_readword((int)&reg_cop1_double[(source[i]>>16)&0x1f],HOST_TEMPREG);
3927 emit_orimm(fs,0x800000,fs);
3928 emit_vldr(temp,6);
3929 emit_vldr(HOST_TEMPREG,7);
3930 emit_fcmpd(6,7);
3931 emit_fmstat();
3932 if((source[i]&0x3f)==0x31) emit_bicvc_imm(fs,0x800000,fs); // c_un_d
3933 if((source[i]&0x3f)==0x32) emit_bicne_imm(fs,0x800000,fs); // c_eq_d
3934 if((source[i]&0x3f)==0x33) {emit_bicne_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ueq_d
3935 if((source[i]&0x3f)==0x34) emit_biccs_imm(fs,0x800000,fs); // c_olt_d
3936 if((source[i]&0x3f)==0x35) {emit_biccs_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ult_d
3937 if((source[i]&0x3f)==0x36) emit_bichi_imm(fs,0x800000,fs); // c_ole_d
3938 if((source[i]&0x3f)==0x37) {emit_bichi_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ule_d
3939 if((source[i]&0x3f)==0x3a) emit_bicne_imm(fs,0x800000,fs); // c_seq_d
3940 if((source[i]&0x3f)==0x3b) emit_bicne_imm(fs,0x800000,fs); // c_ngl_d
3941 if((source[i]&0x3f)==0x3c) emit_biccs_imm(fs,0x800000,fs); // c_lt_d
3942 if((source[i]&0x3f)==0x3d) emit_biccs_imm(fs,0x800000,fs); // c_nge_d
3943 if((source[i]&0x3f)==0x3e) emit_bichi_imm(fs,0x800000,fs); // c_le_d
3944 if((source[i]&0x3f)==0x3f) emit_bichi_imm(fs,0x800000,fs); // c_ngt_d
3945 return;
3946 }
3947 #endif
3948
3949 // C only
3950
3951 u_int hr,reglist=0;
3952 for(hr=0;hr<HOST_REGS;hr++) {
3953 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
3954 }
3955 reglist&=~(1<<fs);
3956 save_regs(reglist);
3957 if(opcode2[i]==0x10) {
3958 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3959 emit_readword((int)&reg_cop1_simple[(source[i]>>16)&0x1f],ARG2_REG);
3960 if((source[i]&0x3f)==0x30) emit_call((int)c_f_s);
3961 if((source[i]&0x3f)==0x31) emit_call((int)c_un_s);
3962 if((source[i]&0x3f)==0x32) emit_call((int)c_eq_s);
3963 if((source[i]&0x3f)==0x33) emit_call((int)c_ueq_s);
3964 if((source[i]&0x3f)==0x34) emit_call((int)c_olt_s);
3965 if((source[i]&0x3f)==0x35) emit_call((int)c_ult_s);
3966 if((source[i]&0x3f)==0x36) emit_call((int)c_ole_s);
3967 if((source[i]&0x3f)==0x37) emit_call((int)c_ule_s);
3968 if((source[i]&0x3f)==0x38) emit_call((int)c_sf_s);
3969 if((source[i]&0x3f)==0x39) emit_call((int)c_ngle_s);
3970 if((source[i]&0x3f)==0x3a) emit_call((int)c_seq_s);
3971 if((source[i]&0x3f)==0x3b) emit_call((int)c_ngl_s);
3972 if((source[i]&0x3f)==0x3c) emit_call((int)c_lt_s);
3973 if((source[i]&0x3f)==0x3d) emit_call((int)c_nge_s);
3974 if((source[i]&0x3f)==0x3e) emit_call((int)c_le_s);
3975 if((source[i]&0x3f)==0x3f) emit_call((int)c_ngt_s);
3976 }
3977 if(opcode2[i]==0x11) {
3978 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3979 emit_readword((int)&reg_cop1_double[(source[i]>>16)&0x1f],ARG2_REG);
3980 if((source[i]&0x3f)==0x30) emit_call((int)c_f_d);
3981 if((source[i]&0x3f)==0x31) emit_call((int)c_un_d);
3982 if((source[i]&0x3f)==0x32) emit_call((int)c_eq_d);
3983 if((source[i]&0x3f)==0x33) emit_call((int)c_ueq_d);
3984 if((source[i]&0x3f)==0x34) emit_call((int)c_olt_d);
3985 if((source[i]&0x3f)==0x35) emit_call((int)c_ult_d);
3986 if((source[i]&0x3f)==0x36) emit_call((int)c_ole_d);
3987 if((source[i]&0x3f)==0x37) emit_call((int)c_ule_d);
3988 if((source[i]&0x3f)==0x38) emit_call((int)c_sf_d);
3989 if((source[i]&0x3f)==0x39) emit_call((int)c_ngle_d);
3990 if((source[i]&0x3f)==0x3a) emit_call((int)c_seq_d);
3991 if((source[i]&0x3f)==0x3b) emit_call((int)c_ngl_d);
3992 if((source[i]&0x3f)==0x3c) emit_call((int)c_lt_d);
3993 if((source[i]&0x3f)==0x3d) emit_call((int)c_nge_d);
3994 if((source[i]&0x3f)==0x3e) emit_call((int)c_le_d);
3995 if((source[i]&0x3f)==0x3f) emit_call((int)c_ngt_d);
3996 }
3997 restore_regs(reglist);
3998 emit_loadreg(FSREG,fs);
3d624f89 3999#else
4000 cop1_unusable(i, i_regs);
4001#endif
57871462 4002}
4003
4004void float_assemble(int i,struct regstat *i_regs)
4005{
3d624f89 4006#ifndef DISABLE_COP1
57871462 4007 signed char temp=get_reg(i_regs->regmap,-1);
4008 assert(temp>=0);
4009 // Check cop1 unusable
4010 if(!cop1_usable) {
4011 signed char cs=get_reg(i_regs->regmap,CSREG);
4012 assert(cs>=0);
4013 emit_testimm(cs,0x20000000);
4014 int jaddr=(int)out;
4015 emit_jeq(0);
4016 add_stub(FP_STUB,jaddr,(int)out,i,cs,(int)i_regs,is_delayslot,0);
4017 cop1_usable=1;
4018 }
4019
4020 #if(defined(__VFP_FP__) && !defined(__SOFTFP__))
4021 if((source[i]&0x3f)==6) // mov
4022 {
4023 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4024 if(opcode2[i]==0x10) {
4025 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4026 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],HOST_TEMPREG);
4027 emit_readword_indexed(0,temp,temp);
4028 emit_writeword_indexed(temp,0,HOST_TEMPREG);
4029 }
4030 if(opcode2[i]==0x11) {
4031 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
4032 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],HOST_TEMPREG);
4033 emit_vldr(temp,7);
4034 emit_vstr(7,HOST_TEMPREG);
4035 }
4036 }
4037 return;
4038 }
4039
4040 if((source[i]&0x3f)>3)
4041 {
4042 if(opcode2[i]==0x10) {
4043 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4044 emit_flds(temp,15);
4045 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4046 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
4047 }
4048 if((source[i]&0x3f)==4) // sqrt
4049 emit_fsqrts(15,15);
4050 if((source[i]&0x3f)==5) // abs
4051 emit_fabss(15,15);
4052 if((source[i]&0x3f)==7) // neg
4053 emit_fnegs(15,15);
4054 emit_fsts(15,temp);
4055 }
4056 if(opcode2[i]==0x11) {
4057 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
4058 emit_vldr(temp,7);
4059 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4060 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
4061 }
4062 if((source[i]&0x3f)==4) // sqrt
4063 emit_fsqrtd(7,7);
4064 if((source[i]&0x3f)==5) // abs
4065 emit_fabsd(7,7);
4066 if((source[i]&0x3f)==7) // neg
4067 emit_fnegd(7,7);
4068 emit_vstr(7,temp);
4069 }
4070 return;
4071 }
4072 if((source[i]&0x3f)<4)
4073 {
4074 if(opcode2[i]==0x10) {
4075 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4076 }
4077 if(opcode2[i]==0x11) {
4078 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
4079 }
4080 if(((source[i]>>11)&0x1f)!=((source[i]>>16)&0x1f)) {
4081 if(opcode2[i]==0x10) {
4082 emit_readword((int)&reg_cop1_simple[(source[i]>>16)&0x1f],HOST_TEMPREG);
4083 emit_flds(temp,15);
4084 emit_flds(HOST_TEMPREG,13);
4085 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4086 if(((source[i]>>16)&0x1f)!=((source[i]>>6)&0x1f)) {
4087 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
4088 }
4089 }
4090 if((source[i]&0x3f)==0) emit_fadds(15,13,15);
4091 if((source[i]&0x3f)==1) emit_fsubs(15,13,15);
4092 if((source[i]&0x3f)==2) emit_fmuls(15,13,15);
4093 if((source[i]&0x3f)==3) emit_fdivs(15,13,15);
4094 if(((source[i]>>16)&0x1f)==((source[i]>>6)&0x1f)) {
4095 emit_fsts(15,HOST_TEMPREG);
4096 }else{
4097 emit_fsts(15,temp);
4098 }
4099 }
4100 else if(opcode2[i]==0x11) {
4101 emit_readword((int)&reg_cop1_double[(source[i]>>16)&0x1f],HOST_TEMPREG);
4102 emit_vldr(temp,7);
4103 emit_vldr(HOST_TEMPREG,6);
4104 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4105 if(((source[i]>>16)&0x1f)!=((source[i]>>6)&0x1f)) {
4106 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
4107 }
4108 }
4109 if((source[i]&0x3f)==0) emit_faddd(7,6,7);
4110 if((source[i]&0x3f)==1) emit_fsubd(7,6,7);
4111 if((source[i]&0x3f)==2) emit_fmuld(7,6,7);
4112 if((source[i]&0x3f)==3) emit_fdivd(7,6,7);
4113 if(((source[i]>>16)&0x1f)==((source[i]>>6)&0x1f)) {
4114 emit_vstr(7,HOST_TEMPREG);
4115 }else{
4116 emit_vstr(7,temp);
4117 }
4118 }
4119 }
4120 else {
4121 if(opcode2[i]==0x10) {
4122 emit_flds(temp,15);
4123 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4124 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
4125 }
4126 if((source[i]&0x3f)==0) emit_fadds(15,15,15);
4127 if((source[i]&0x3f)==1) emit_fsubs(15,15,15);
4128 if((source[i]&0x3f)==2) emit_fmuls(15,15,15);
4129 if((source[i]&0x3f)==3) emit_fdivs(15,15,15);
4130 emit_fsts(15,temp);
4131 }
4132 else if(opcode2[i]==0x11) {
4133 emit_vldr(temp,7);
4134 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4135 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
4136 }
4137 if((source[i]&0x3f)==0) emit_faddd(7,7,7);
4138 if((source[i]&0x3f)==1) emit_fsubd(7,7,7);
4139 if((source[i]&0x3f)==2) emit_fmuld(7,7,7);
4140 if((source[i]&0x3f)==3) emit_fdivd(7,7,7);
4141 emit_vstr(7,temp);
4142 }
4143 }
4144 return;
4145 }
4146 #endif
4147
4148 u_int hr,reglist=0;
4149 for(hr=0;hr<HOST_REGS;hr++) {
4150 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
4151 }
4152 if(opcode2[i]==0x10) { // Single precision
4153 save_regs(reglist);
4154 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4155 if((source[i]&0x3f)<4) {
4156 emit_readword((int)&reg_cop1_simple[(source[i]>>16)&0x1f],ARG2_REG);
4157 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG3_REG);
4158 }else{
4159 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4160 }
4161 switch(source[i]&0x3f)
4162 {
4163 case 0x00: emit_call((int)add_s);break;
4164 case 0x01: emit_call((int)sub_s);break;
4165 case 0x02: emit_call((int)mul_s);break;
4166 case 0x03: emit_call((int)div_s);break;
4167 case 0x04: emit_call((int)sqrt_s);break;
4168 case 0x05: emit_call((int)abs_s);break;
4169 case 0x06: emit_call((int)mov_s);break;
4170 case 0x07: emit_call((int)neg_s);break;
4171 }
4172 restore_regs(reglist);
4173 }
4174 if(opcode2[i]==0x11) { // Double precision
4175 save_regs(reglist);
4176 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4177 if((source[i]&0x3f)<4) {
4178 emit_readword((int)&reg_cop1_double[(source[i]>>16)&0x1f],ARG2_REG);
4179 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG3_REG);
4180 }else{
4181 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4182 }
4183 switch(source[i]&0x3f)
4184 {
4185 case 0x00: emit_call((int)add_d);break;
4186 case 0x01: emit_call((int)sub_d);break;
4187 case 0x02: emit_call((int)mul_d);break;
4188 case 0x03: emit_call((int)div_d);break;
4189 case 0x04: emit_call((int)sqrt_d);break;
4190 case 0x05: emit_call((int)abs_d);break;
4191 case 0x06: emit_call((int)mov_d);break;
4192 case 0x07: emit_call((int)neg_d);break;
4193 }
4194 restore_regs(reglist);
4195 }
3d624f89 4196#else
4197 cop1_unusable(i, i_regs);
4198#endif
57871462 4199}
4200
4201void multdiv_assemble_arm(int i,struct regstat *i_regs)
4202{
4203 // case 0x18: MULT
4204 // case 0x19: MULTU
4205 // case 0x1A: DIV
4206 // case 0x1B: DIVU
4207 // case 0x1C: DMULT
4208 // case 0x1D: DMULTU
4209 // case 0x1E: DDIV
4210 // case 0x1F: DDIVU
4211 if(rs1[i]&&rs2[i])
4212 {
4213 if((opcode2[i]&4)==0) // 32-bit
4214 {
4215 if(opcode2[i]==0x18) // MULT
4216 {
4217 signed char m1=get_reg(i_regs->regmap,rs1[i]);
4218 signed char m2=get_reg(i_regs->regmap,rs2[i]);
4219 signed char hi=get_reg(i_regs->regmap,HIREG);
4220 signed char lo=get_reg(i_regs->regmap,LOREG);
4221 assert(m1>=0);
4222 assert(m2>=0);
4223 assert(hi>=0);
4224 assert(lo>=0);
4225 emit_smull(m1,m2,hi,lo);
4226 }
4227 if(opcode2[i]==0x19) // MULTU
4228 {
4229 signed char m1=get_reg(i_regs->regmap,rs1[i]);
4230 signed char m2=get_reg(i_regs->regmap,rs2[i]);
4231 signed char hi=get_reg(i_regs->regmap,HIREG);
4232 signed char lo=get_reg(i_regs->regmap,LOREG);
4233 assert(m1>=0);
4234 assert(m2>=0);
4235 assert(hi>=0);
4236 assert(lo>=0);
4237 emit_umull(m1,m2,hi,lo);
4238 }
4239 if(opcode2[i]==0x1A) // DIV
4240 {
4241 signed char d1=get_reg(i_regs->regmap,rs1[i]);
4242 signed char d2=get_reg(i_regs->regmap,rs2[i]);
4243 assert(d1>=0);
4244 assert(d2>=0);
4245 signed char quotient=get_reg(i_regs->regmap,LOREG);
4246 signed char remainder=get_reg(i_regs->regmap,HIREG);
4247 assert(quotient>=0);
4248 assert(remainder>=0);
4249 emit_movs(d1,remainder);
4250 emit_negmi(remainder,remainder);
4251 emit_movs(d2,HOST_TEMPREG);
4252 emit_jeq((int)out+52); // Division by zero
4253 emit_negmi(HOST_TEMPREG,HOST_TEMPREG);
4254 emit_clz(HOST_TEMPREG,quotient);
4255 emit_shl(HOST_TEMPREG,quotient,HOST_TEMPREG);
4256 emit_orimm(quotient,1<<31,quotient);
4257 emit_shr(quotient,quotient,quotient);
4258 emit_cmp(remainder,HOST_TEMPREG);
4259 emit_subcs(remainder,HOST_TEMPREG,remainder);
4260 emit_adcs(quotient,quotient,quotient);
4261 emit_shrimm(HOST_TEMPREG,1,HOST_TEMPREG);
4262 emit_jcc((int)out-16); // -4
4263 emit_teq(d1,d2);
4264 emit_negmi(quotient,quotient);
4265 emit_test(d1,d1);
4266 emit_negmi(remainder,remainder);
4267 }
4268 if(opcode2[i]==0x1B) // DIVU
4269 {
4270 signed char d1=get_reg(i_regs->regmap,rs1[i]); // dividend
4271 signed char d2=get_reg(i_regs->regmap,rs2[i]); // divisor
4272 assert(d1>=0);
4273 assert(d2>=0);
4274 signed char quotient=get_reg(i_regs->regmap,LOREG);
4275 signed char remainder=get_reg(i_regs->regmap,HIREG);
4276 assert(quotient>=0);
4277 assert(remainder>=0);
4278 emit_test(d2,d2);
4279 emit_jeq((int)out+44); // Division by zero
4280 emit_clz(d2,HOST_TEMPREG);
4281 emit_movimm(1<<31,quotient);
4282 emit_shl(d2,HOST_TEMPREG,d2);
4283 emit_mov(d1,remainder);
4284 emit_shr(quotient,HOST_TEMPREG,quotient);
4285 emit_cmp(remainder,d2);
4286 emit_subcs(remainder,d2,remainder);
4287 emit_adcs(quotient,quotient,quotient);
4288 emit_shrcc_imm(d2,1,d2);
4289 emit_jcc((int)out-16); // -4
4290 }
4291 }
4292 else // 64-bit
4293 {
4294 if(opcode2[i]==0x1C) // DMULT
4295 {
4296 assert(opcode2[i]!=0x1C);
4297 signed char m1h=get_reg(i_regs->regmap,rs1[i]|64);
4298 signed char m1l=get_reg(i_regs->regmap,rs1[i]);
4299 signed char m2h=get_reg(i_regs->regmap,rs2[i]|64);
4300 signed char m2l=get_reg(i_regs->regmap,rs2[i]);
4301 assert(m1h>=0);
4302 assert(m2h>=0);
4303 assert(m1l>=0);
4304 assert(m2l>=0);
4305 emit_pushreg(m2h);
4306 emit_pushreg(m2l);
4307 emit_pushreg(m1h);
4308 emit_pushreg(m1l);
4309 emit_call((int)&mult64);
4310 emit_popreg(m1l);
4311 emit_popreg(m1h);
4312 emit_popreg(m2l);
4313 emit_popreg(m2h);
4314 signed char hih=get_reg(i_regs->regmap,HIREG|64);
4315 signed char hil=get_reg(i_regs->regmap,HIREG);
4316 if(hih>=0) emit_loadreg(HIREG|64,hih);
4317 if(hil>=0) emit_loadreg(HIREG,hil);
4318 signed char loh=get_reg(i_regs->regmap,LOREG|64);
4319 signed char lol=get_reg(i_regs->regmap,LOREG);
4320 if(loh>=0) emit_loadreg(LOREG|64,loh);
4321 if(lol>=0) emit_loadreg(LOREG,lol);
4322 }
4323 if(opcode2[i]==0x1D) // DMULTU
4324 {
4325 signed char m1h=get_reg(i_regs->regmap,rs1[i]|64);
4326 signed char m1l=get_reg(i_regs->regmap,rs1[i]);
4327 signed char m2h=get_reg(i_regs->regmap,rs2[i]|64);
4328 signed char m2l=get_reg(i_regs->regmap,rs2[i]);
4329 assert(m1h>=0);
4330 assert(m2h>=0);
4331 assert(m1l>=0);
4332 assert(m2l>=0);
4333 save_regs(0x100f);
4334 if(m1l!=0) emit_mov(m1l,0);
4335 if(m1h==0) emit_readword((int)&dynarec_local,1);
4336 else if(m1h>1) emit_mov(m1h,1);
4337 if(m2l<2) emit_readword((int)&dynarec_local+m2l*4,2);
4338 else if(m2l>2) emit_mov(m2l,2);
4339 if(m2h<3) emit_readword((int)&dynarec_local+m2h*4,3);
4340 else if(m2h>3) emit_mov(m2h,3);
4341 emit_call((int)&multu64);
4342 restore_regs(0x100f);
4343 signed char hih=get_reg(i_regs->regmap,HIREG|64);
4344 signed char hil=get_reg(i_regs->regmap,HIREG);
4345 signed char loh=get_reg(i_regs->regmap,LOREG|64);
4346 signed char lol=get_reg(i_regs->regmap,LOREG);
4347 /*signed char temp=get_reg(i_regs->regmap,-1);
4348 signed char rh=get_reg(i_regs->regmap,HIREG|64);
4349 signed char rl=get_reg(i_regs->regmap,HIREG);
4350 assert(m1h>=0);
4351 assert(m2h>=0);
4352 assert(m1l>=0);
4353 assert(m2l>=0);
4354 assert(temp>=0);
4355 //emit_mov(m1l,EAX);
4356 //emit_mul(m2l);
4357 emit_umull(rl,rh,m1l,m2l);
4358 emit_storereg(LOREG,rl);
4359 emit_mov(rh,temp);
4360 //emit_mov(m1h,EAX);
4361 //emit_mul(m2l);
4362 emit_umull(rl,rh,m1h,m2l);
4363 emit_adds(rl,temp,temp);
4364 emit_adcimm(rh,0,rh);
4365 emit_storereg(HIREG,rh);
4366 //emit_mov(m2h,EAX);
4367 //emit_mul(m1l);
4368 emit_umull(rl,rh,m1l,m2h);
4369 emit_adds(rl,temp,temp);
4370 emit_adcimm(rh,0,rh);
4371 emit_storereg(LOREG|64,temp);
4372 emit_mov(rh,temp);
4373 //emit_mov(m2h,EAX);
4374 //emit_mul(m1h);
4375 emit_umull(rl,rh,m1h,m2h);
4376 emit_adds(rl,temp,rl);
4377 emit_loadreg(HIREG,temp);
4378 emit_adcimm(rh,0,rh);
4379 emit_adds(rl,temp,rl);
4380 emit_adcimm(rh,0,rh);
4381 // DEBUG
4382 /*
4383 emit_pushreg(m2h);
4384 emit_pushreg(m2l);
4385 emit_pushreg(m1h);
4386 emit_pushreg(m1l);
4387 emit_call((int)&multu64);
4388 emit_popreg(m1l);
4389 emit_popreg(m1h);
4390 emit_popreg(m2l);
4391 emit_popreg(m2h);
4392 signed char hih=get_reg(i_regs->regmap,HIREG|64);
4393 signed char hil=get_reg(i_regs->regmap,HIREG);
4394 if(hih>=0) emit_loadreg(HIREG|64,hih); // DEBUG
4395 if(hil>=0) emit_loadreg(HIREG,hil); // DEBUG
4396 */
4397 // Shouldn't be necessary
4398 //char loh=get_reg(i_regs->regmap,LOREG|64);
4399 //char lol=get_reg(i_regs->regmap,LOREG);
4400 //if(loh>=0) emit_loadreg(LOREG|64,loh);
4401 //if(lol>=0) emit_loadreg(LOREG,lol);
4402 }
4403 if(opcode2[i]==0x1E) // DDIV
4404 {
4405 signed char d1h=get_reg(i_regs->regmap,rs1[i]|64);
4406 signed char d1l=get_reg(i_regs->regmap,rs1[i]);
4407 signed char d2h=get_reg(i_regs->regmap,rs2[i]|64);
4408 signed char d2l=get_reg(i_regs->regmap,rs2[i]);
4409 assert(d1h>=0);
4410 assert(d2h>=0);
4411 assert(d1l>=0);
4412 assert(d2l>=0);
4413 save_regs(0x100f);
4414 if(d1l!=0) emit_mov(d1l,0);
4415 if(d1h==0) emit_readword((int)&dynarec_local,1);
4416 else if(d1h>1) emit_mov(d1h,1);
4417 if(d2l<2) emit_readword((int)&dynarec_local+d2l*4,2);
4418 else if(d2l>2) emit_mov(d2l,2);
4419 if(d2h<3) emit_readword((int)&dynarec_local+d2h*4,3);
4420 else if(d2h>3) emit_mov(d2h,3);
4421 emit_call((int)&div64);
4422 restore_regs(0x100f);
4423 signed char hih=get_reg(i_regs->regmap,HIREG|64);
4424 signed char hil=get_reg(i_regs->regmap,HIREG);
4425 signed char loh=get_reg(i_regs->regmap,LOREG|64);
4426 signed char lol=get_reg(i_regs->regmap,LOREG);
4427 if(hih>=0) emit_loadreg(HIREG|64,hih);
4428 if(hil>=0) emit_loadreg(HIREG,hil);
4429 if(loh>=0) emit_loadreg(LOREG|64,loh);
4430 if(lol>=0) emit_loadreg(LOREG,lol);
4431 }
4432 if(opcode2[i]==0x1F) // DDIVU
4433 {
4434 //u_int hr,reglist=0;
4435 //for(hr=0;hr<HOST_REGS;hr++) {
4436 // if(i_regs->regmap[hr]>=0 && (i_regs->regmap[hr]&62)!=HIREG) reglist|=1<<hr;
4437 //}
4438 signed char d1h=get_reg(i_regs->regmap,rs1[i]|64);
4439 signed char d1l=get_reg(i_regs->regmap,rs1[i]);
4440 signed char d2h=get_reg(i_regs->regmap,rs2[i]|64);
4441 signed char d2l=get_reg(i_regs->regmap,rs2[i]);
4442 assert(d1h>=0);
4443 assert(d2h>=0);
4444 assert(d1l>=0);
4445 assert(d2l>=0);
4446 save_regs(0x100f);
4447 if(d1l!=0) emit_mov(d1l,0);
4448 if(d1h==0) emit_readword((int)&dynarec_local,1);
4449 else if(d1h>1) emit_mov(d1h,1);
4450 if(d2l<2) emit_readword((int)&dynarec_local+d2l*4,2);
4451 else if(d2l>2) emit_mov(d2l,2);
4452 if(d2h<3) emit_readword((int)&dynarec_local+d2h*4,3);
4453 else if(d2h>3) emit_mov(d2h,3);
4454 emit_call((int)&divu64);
4455 restore_regs(0x100f);
4456 signed char hih=get_reg(i_regs->regmap,HIREG|64);
4457 signed char hil=get_reg(i_regs->regmap,HIREG);
4458 signed char loh=get_reg(i_regs->regmap,LOREG|64);
4459 signed char lol=get_reg(i_regs->regmap,LOREG);
4460 if(hih>=0) emit_loadreg(HIREG|64,hih);
4461 if(hil>=0) emit_loadreg(HIREG,hil);
4462 if(loh>=0) emit_loadreg(LOREG|64,loh);
4463 if(lol>=0) emit_loadreg(LOREG,lol);
4464 }
4465 }
4466 }
4467 else
4468 {
4469 // Multiply by zero is zero.
4470 // MIPS does not have a divide by zero exception.
4471 // The result is undefined, we return zero.
4472 signed char hr=get_reg(i_regs->regmap,HIREG);
4473 signed char lr=get_reg(i_regs->regmap,LOREG);
4474 if(hr>=0) emit_zeroreg(hr);
4475 if(lr>=0) emit_zeroreg(lr);
4476 }
4477}
4478#define multdiv_assemble multdiv_assemble_arm
4479
4480void do_preload_rhash(int r) {
4481 // Don't need this for ARM. On x86, this puts the value 0xf8 into the
4482 // register. On ARM the hash can be done with a single instruction (below)
4483}
4484
4485void do_preload_rhtbl(int ht) {
4486 emit_addimm(FP,(int)&mini_ht-(int)&dynarec_local,ht);
4487}
4488
4489void do_rhash(int rs,int rh) {
4490 emit_andimm(rs,0xf8,rh);
4491}
4492
4493void do_miniht_load(int ht,int rh) {
4494 assem_debug("ldr %s,[%s,%s]!\n",regname[rh],regname[ht],regname[rh]);
4495 output_w32(0xe7b00000|rd_rn_rm(rh,ht,rh));
4496}
4497
4498void do_miniht_jump(int rs,int rh,int ht) {
4499 emit_cmp(rh,rs);
4500 emit_ldreq_indexed(ht,4,15);
4501 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
4502 emit_mov(rs,7);
4503 emit_jmp(jump_vaddr_reg[7]);
4504 #else
4505 emit_jmp(jump_vaddr_reg[rs]);
4506 #endif
4507}
4508
4509void do_miniht_insert(u_int return_address,int rt,int temp) {
4510 #ifdef ARMv5_ONLY
4511 emit_movimm(return_address,rt); // PC into link register
4512 add_to_linker((int)out,return_address,1);
4513 emit_pcreladdr(temp);
4514 emit_writeword(rt,(int)&mini_ht[(return_address&0xFF)>>3][0]);
4515 emit_writeword(temp,(int)&mini_ht[(return_address&0xFF)>>3][1]);
4516 #else
4517 emit_movw(return_address&0x0000FFFF,rt);
4518 add_to_linker((int)out,return_address,1);
4519 emit_pcreladdr(temp);
4520 emit_writeword(temp,(int)&mini_ht[(return_address&0xFF)>>3][1]);
4521 emit_movt(return_address&0xFFFF0000,rt);
4522 emit_writeword(rt,(int)&mini_ht[(return_address&0xFF)>>3][0]);
4523 #endif
4524}
4525
4526// Sign-extend to 64 bits and write out upper half of a register
4527// This is useful where we have a 32-bit value in a register, and want to
4528// keep it in a 32-bit register, but can't guarantee that it won't be read
4529// as a 64-bit value later.
4530void wb_sx(signed char pre[],signed char entry[],uint64_t dirty,uint64_t is32_pre,uint64_t is32,uint64_t u,uint64_t uu)
4531{
24385cae 4532#ifndef FORCE32
57871462 4533 if(is32_pre==is32) return;
4534 int hr,reg;
4535 for(hr=0;hr<HOST_REGS;hr++) {
4536 if(hr!=EXCLUDE_REG) {
4537 //if(pre[hr]==entry[hr]) {
4538 if((reg=pre[hr])>=0) {
4539 if((dirty>>hr)&1) {
4540 if( ((is32_pre&~is32&~uu)>>reg)&1 ) {
4541 emit_sarimm(hr,31,HOST_TEMPREG);
4542 emit_storereg(reg|64,HOST_TEMPREG);
4543 }
4544 }
4545 }
4546 //}
4547 }
4548 }
24385cae 4549#endif
57871462 4550}
4551
4552void wb_valid(signed char pre[],signed char entry[],u_int dirty_pre,u_int dirty,uint64_t is32_pre,uint64_t u,uint64_t uu)
4553{
4554 //if(dirty_pre==dirty) return;
4555 int hr,reg,new_hr;
4556 for(hr=0;hr<HOST_REGS;hr++) {
4557 if(hr!=EXCLUDE_REG) {
4558 reg=pre[hr];
4559 if(((~u)>>(reg&63))&1) {
4560 if(reg==entry[hr]||(reg>0&&entry[hr]<0)) {
4561 if(((dirty_pre&~dirty)>>hr)&1) {
4562 if(reg>0&&reg<34) {
4563 emit_storereg(reg,hr);
4564 if( ((is32_pre&~uu)>>reg)&1 ) {
4565 emit_sarimm(hr,31,HOST_TEMPREG);
4566 emit_storereg(reg|64,HOST_TEMPREG);
4567 }
4568 }
4569 else if(reg>=64) {
4570 emit_storereg(reg,hr);
4571 }
4572 }
4573 }
4574 else // Check if register moved to a different register
4575 if((new_hr=get_reg(entry,reg))>=0) {
4576 if((dirty_pre>>hr)&(~dirty>>new_hr)&1) {
4577 if(reg>0&&reg<34) {
4578 emit_storereg(reg,hr);
4579 if( ((is32_pre&~uu)>>reg)&1 ) {
4580 emit_sarimm(hr,31,HOST_TEMPREG);
4581 emit_storereg(reg|64,HOST_TEMPREG);
4582 }
4583 }
4584 else if(reg>=64) {
4585 emit_storereg(reg,hr);
4586 }
4587 }
4588 }
4589 }
4590 }
4591 }
4592}
4593
4594
4595/* using strd could possibly help but you'd have to allocate registers in pairs
4596void wb_invalidate_arm(signed char pre[],signed char entry[],uint64_t dirty,uint64_t is32,uint64_t u,uint64_t uu)
4597{
4598 int hr;
4599 int wrote=-1;
4600 for(hr=HOST_REGS-1;hr>=0;hr--) {
4601 if(hr!=EXCLUDE_REG) {
4602 if(pre[hr]!=entry[hr]) {
4603 if(pre[hr]>=0) {
4604 if((dirty>>hr)&1) {
4605 if(get_reg(entry,pre[hr])<0) {
4606 if(pre[hr]<64) {
4607 if(!((u>>pre[hr])&1)) {
4608 if(hr<10&&(~hr&1)&&(pre[hr+1]<0||wrote==hr+1)) {
4609 if( ((is32>>pre[hr])&1) && !((uu>>pre[hr])&1) ) {
4610 emit_sarimm(hr,31,hr+1);
4611 emit_strdreg(pre[hr],hr);
4612 }
4613 else
4614 emit_storereg(pre[hr],hr);
4615 }else{
4616 emit_storereg(pre[hr],hr);
4617 if( ((is32>>pre[hr])&1) && !((uu>>pre[hr])&1) ) {
4618 emit_sarimm(hr,31,hr);
4619 emit_storereg(pre[hr]|64,hr);
4620 }
4621 }
4622 }
4623 }else{
4624 if(!((uu>>(pre[hr]&63))&1) && !((is32>>(pre[hr]&63))&1)) {
4625 emit_storereg(pre[hr],hr);
4626 }
4627 }
4628 wrote=hr;
4629 }
4630 }
4631 }
4632 }
4633 }
4634 }
4635 for(hr=0;hr<HOST_REGS;hr++) {
4636 if(hr!=EXCLUDE_REG) {
4637 if(pre[hr]!=entry[hr]) {
4638 if(pre[hr]>=0) {
4639 int nr;
4640 if((nr=get_reg(entry,pre[hr]))>=0) {
4641 emit_mov(hr,nr);
4642 }
4643 }
4644 }
4645 }
4646 }
4647}
4648#define wb_invalidate wb_invalidate_arm
4649*/
4650
4651// CPU-architecture-specific initialization
4652void arch_init() {
3d624f89 4653#ifndef DISABLE_COP1
57871462 4654 rounding_modes[0]=0x0<<22; // round
4655 rounding_modes[1]=0x3<<22; // trunc
4656 rounding_modes[2]=0x1<<22; // ceil
4657 rounding_modes[3]=0x2<<22; // floor
3d624f89 4658#endif
57871462 4659}
b9b61529 4660
4661// vim:shiftwidth=2:expandtab