try to make drc more configurable
[pcsx_rearmed.git] / libpcsxcore / new_dynarec / assem_arm.c
CommitLineData
57871462 1/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2 * Mupen64plus - assem_arm.c *
3 * Copyright (C) 2009-2010 Ari64 *
4 * *
5 * This program is free software; you can redistribute it and/or modify *
6 * it under the terms of the GNU General Public License as published by *
7 * the Free Software Foundation; either version 2 of the License, or *
8 * (at your option) any later version. *
9 * *
10 * This program is distributed in the hope that it will be useful, *
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
13 * GNU General Public License for more details. *
14 * *
15 * You should have received a copy of the GNU General Public License *
16 * along with this program; if not, write to the *
17 * Free Software Foundation, Inc., *
18 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
19 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
20
21extern int cycle_count;
22extern int last_count;
23extern int pcaddr;
24extern int pending_exception;
25extern int branch_target;
26extern uint64_t readmem_dword;
3d624f89 27#ifdef MUPEN64
57871462 28extern precomp_instr fake_pc;
3d624f89 29#endif
57871462 30extern void *dynarec_local;
31extern u_int memory_map[1048576];
32extern u_int mini_ht[32][2];
33extern u_int rounding_modes[4];
34
35void indirect_jump_indexed();
36void indirect_jump();
37void do_interrupt();
38void jump_vaddr_r0();
39void jump_vaddr_r1();
40void jump_vaddr_r2();
41void jump_vaddr_r3();
42void jump_vaddr_r4();
43void jump_vaddr_r5();
44void jump_vaddr_r6();
45void jump_vaddr_r7();
46void jump_vaddr_r8();
47void jump_vaddr_r9();
48void jump_vaddr_r10();
49void jump_vaddr_r12();
50
51const u_int jump_vaddr_reg[16] = {
52 (int)jump_vaddr_r0,
53 (int)jump_vaddr_r1,
54 (int)jump_vaddr_r2,
55 (int)jump_vaddr_r3,
56 (int)jump_vaddr_r4,
57 (int)jump_vaddr_r5,
58 (int)jump_vaddr_r6,
59 (int)jump_vaddr_r7,
60 (int)jump_vaddr_r8,
61 (int)jump_vaddr_r9,
62 (int)jump_vaddr_r10,
63 0,
64 (int)jump_vaddr_r12,
65 0,
66 0,
67 0};
68
69#include "fpu.h"
70
71/* Linker */
72
73void set_jump_target(int addr,u_int target)
74{
75 u_char *ptr=(u_char *)addr;
76 u_int *ptr2=(u_int *)ptr;
77 if(ptr[3]==0xe2) {
78 assert((target-(u_int)ptr2-8)<1024);
79 assert((addr&3)==0);
80 assert((target&3)==0);
81 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
82 //printf("target=%x addr=%x insn=%x\n",target,addr,*ptr2);
83 }
84 else if(ptr[3]==0x72) {
85 // generated by emit_jno_unlikely
86 if((target-(u_int)ptr2-8)<1024) {
87 assert((addr&3)==0);
88 assert((target&3)==0);
89 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
90 }
91 else if((target-(u_int)ptr2-8)<4096&&!((target-(u_int)ptr2-8)&15)) {
92 assert((addr&3)==0);
93 assert((target&3)==0);
94 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>4)|0xE00;
95 }
96 else *ptr2=(0x7A000000)|(((target-(u_int)ptr2-8)<<6)>>8);
97 }
98 else {
99 assert((ptr[3]&0x0e)==0xa);
100 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
101 }
102}
103
104// This optionally copies the instruction from the target of the branch into
105// the space before the branch. Works, but the difference in speed is
106// usually insignificant.
107void set_jump_target_fillslot(int addr,u_int target,int copy)
108{
109 u_char *ptr=(u_char *)addr;
110 u_int *ptr2=(u_int *)ptr;
111 assert(!copy||ptr2[-1]==0xe28dd000);
112 if(ptr[3]==0xe2) {
113 assert(!copy);
114 assert((target-(u_int)ptr2-8)<4096);
115 *ptr2=(*ptr2&0xFFFFF000)|(target-(u_int)ptr2-8);
116 }
117 else {
118 assert((ptr[3]&0x0e)==0xa);
119 u_int target_insn=*(u_int *)target;
120 if((target_insn&0x0e100000)==0) { // ALU, no immediate, no flags
121 copy=0;
122 }
123 if((target_insn&0x0c100000)==0x04100000) { // Load
124 copy=0;
125 }
126 if(target_insn&0x08000000) {
127 copy=0;
128 }
129 if(copy) {
130 ptr2[-1]=target_insn;
131 target+=4;
132 }
133 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
134 }
135}
136
137/* Literal pool */
138add_literal(int addr,int val)
139{
140 literals[literalcount][0]=addr;
141 literals[literalcount][1]=val;
142 literalcount++;
143}
144
145void kill_pointer(void *stub)
146{
147 int *ptr=(int *)(stub+4);
148 assert((*ptr&0x0ff00000)==0x05900000);
149 u_int offset=*ptr&0xfff;
150 int **l_ptr=(void *)ptr+offset+8;
151 int *i_ptr=*l_ptr;
152 set_jump_target((int)i_ptr,(int)stub);
153}
154
155int get_pointer(void *stub)
156{
157 //printf("get_pointer(%x)\n",(int)stub);
158 int *ptr=(int *)(stub+4);
159 assert((*ptr&0x0ff00000)==0x05900000);
160 u_int offset=*ptr&0xfff;
161 int **l_ptr=(void *)ptr+offset+8;
162 int *i_ptr=*l_ptr;
163 assert((*i_ptr&0x0f000000)==0x0a000000);
164 return (int)i_ptr+((*i_ptr<<8)>>6)+8;
165}
166
167// Find the "clean" entry point from a "dirty" entry point
168// by skipping past the call to verify_code
169u_int get_clean_addr(int addr)
170{
171 int *ptr=(int *)addr;
172 #ifdef ARMv5_ONLY
173 ptr+=4;
174 #else
175 ptr+=6;
176 #endif
177 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
178 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
179 ptr++;
180 if((*ptr&0xFF000000)==0xea000000) {
181 return (int)ptr+((*ptr<<8)>>6)+8; // follow jump
182 }
183 return (u_int)ptr;
184}
185
186int verify_dirty(int addr)
187{
188 u_int *ptr=(u_int *)addr;
189 #ifdef ARMv5_ONLY
190 // get from literal pool
191 assert((*ptr&0xFFF00000)==0xe5900000);
192 u_int offset=*ptr&0xfff;
193 u_int *l_ptr=(void *)ptr+offset+8;
194 u_int source=l_ptr[0];
195 u_int copy=l_ptr[1];
196 u_int len=l_ptr[2];
197 ptr+=4;
198 #else
199 // ARMv7 movw/movt
200 assert((*ptr&0xFFF00000)==0xe3000000);
201 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
202 u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
203 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
204 ptr+=6;
205 #endif
206 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
207 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
208 u_int verifier=(int)ptr+((*ptr<<8)>>6)+8; // get target of bl
209 if(verifier==(u_int)verify_code_vm||verifier==(u_int)verify_code_ds) {
210 unsigned int page=source>>12;
211 unsigned int map_value=memory_map[page];
212 if(map_value>=0x80000000) return 0;
213 while(page<((source+len-1)>>12)) {
214 if((memory_map[++page]<<2)!=(map_value<<2)) return 0;
215 }
216 source = source+(map_value<<2);
217 }
218 //printf("verify_dirty: %x %x %x\n",source,copy,len);
219 return !memcmp((void *)source,(void *)copy,len);
220}
221
222// This doesn't necessarily find all clean entry points, just
223// guarantees that it's not dirty
224int isclean(int addr)
225{
226 #ifdef ARMv5_ONLY
227 int *ptr=((u_int *)addr)+4;
228 #else
229 int *ptr=((u_int *)addr)+6;
230 #endif
231 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
232 if((*ptr&0xFF000000)!=0xeb000000) return 1; // bl instruction
233 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code) return 0;
234 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_vm) return 0;
235 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_ds) return 0;
236 return 1;
237}
238
239void get_bounds(int addr,u_int *start,u_int *end)
240{
241 u_int *ptr=(u_int *)addr;
242 #ifdef ARMv5_ONLY
243 // get from literal pool
244 assert((*ptr&0xFFF00000)==0xe5900000);
245 u_int offset=*ptr&0xfff;
246 u_int *l_ptr=(void *)ptr+offset+8;
247 u_int source=l_ptr[0];
248 //u_int copy=l_ptr[1];
249 u_int len=l_ptr[2];
250 ptr+=4;
251 #else
252 // ARMv7 movw/movt
253 assert((*ptr&0xFFF00000)==0xe3000000);
254 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
255 //u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
256 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
257 ptr+=6;
258 #endif
259 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
260 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
261 u_int verifier=(int)ptr+((*ptr<<8)>>6)+8; // get target of bl
262 if(verifier==(u_int)verify_code_vm||verifier==(u_int)verify_code_ds) {
263 if(memory_map[source>>12]>=0x80000000) source = 0;
264 else source = source+(memory_map[source>>12]<<2);
265 }
266 *start=source;
267 *end=source+len;
268}
269
270/* Register allocation */
271
272// Note: registers are allocated clean (unmodified state)
273// if you intend to modify the register, you must call dirty_reg().
274void alloc_reg(struct regstat *cur,int i,signed char reg)
275{
276 int r,hr;
277 int preferred_reg = (reg&7);
278 if(reg==CCREG) preferred_reg=HOST_CCREG;
279 if(reg==PTEMP||reg==FTEMP) preferred_reg=12;
280
281 // Don't allocate unused registers
282 if((cur->u>>reg)&1) return;
283
284 // see if it's already allocated
285 for(hr=0;hr<HOST_REGS;hr++)
286 {
287 if(cur->regmap[hr]==reg) return;
288 }
289
290 // Keep the same mapping if the register was already allocated in a loop
291 preferred_reg = loop_reg(i,reg,preferred_reg);
292
293 // Try to allocate the preferred register
294 if(cur->regmap[preferred_reg]==-1) {
295 cur->regmap[preferred_reg]=reg;
296 cur->dirty&=~(1<<preferred_reg);
297 cur->isconst&=~(1<<preferred_reg);
298 return;
299 }
300 r=cur->regmap[preferred_reg];
301 if(r<64&&((cur->u>>r)&1)) {
302 cur->regmap[preferred_reg]=reg;
303 cur->dirty&=~(1<<preferred_reg);
304 cur->isconst&=~(1<<preferred_reg);
305 return;
306 }
307 if(r>=64&&((cur->uu>>(r&63))&1)) {
308 cur->regmap[preferred_reg]=reg;
309 cur->dirty&=~(1<<preferred_reg);
310 cur->isconst&=~(1<<preferred_reg);
311 return;
312 }
313
314 // Clear any unneeded registers
315 // We try to keep the mapping consistent, if possible, because it
316 // makes branches easier (especially loops). So we try to allocate
317 // first (see above) before removing old mappings. If this is not
318 // possible then go ahead and clear out the registers that are no
319 // longer needed.
320 for(hr=0;hr<HOST_REGS;hr++)
321 {
322 r=cur->regmap[hr];
323 if(r>=0) {
324 if(r<64) {
325 if((cur->u>>r)&1) {cur->regmap[hr]=-1;break;}
326 }
327 else
328 {
329 if((cur->uu>>(r&63))&1) {cur->regmap[hr]=-1;break;}
330 }
331 }
332 }
333 // Try to allocate any available register, but prefer
334 // registers that have not been used recently.
335 if(i>0) {
336 for(hr=0;hr<HOST_REGS;hr++) {
337 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
338 if(regs[i-1].regmap[hr]!=rs1[i-1]&&regs[i-1].regmap[hr]!=rs2[i-1]&&regs[i-1].regmap[hr]!=rt1[i-1]&&regs[i-1].regmap[hr]!=rt2[i-1]) {
339 cur->regmap[hr]=reg;
340 cur->dirty&=~(1<<hr);
341 cur->isconst&=~(1<<hr);
342 return;
343 }
344 }
345 }
346 }
347 // Try to allocate any available register
348 for(hr=0;hr<HOST_REGS;hr++) {
349 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
350 cur->regmap[hr]=reg;
351 cur->dirty&=~(1<<hr);
352 cur->isconst&=~(1<<hr);
353 return;
354 }
355 }
356
357 // Ok, now we have to evict someone
358 // Pick a register we hopefully won't need soon
359 u_char hsn[MAXREG+1];
360 memset(hsn,10,sizeof(hsn));
361 int j;
362 lsn(hsn,i,&preferred_reg);
363 //printf("eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",cur->regmap[0],cur->regmap[1],cur->regmap[2],cur->regmap[3],cur->regmap[5],cur->regmap[6],cur->regmap[7]);
364 //printf("hsn(%x): %d %d %d %d %d %d %d\n",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
365 if(i>0) {
366 // Don't evict the cycle count at entry points, otherwise the entry
367 // stub will have to write it.
368 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
369 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
370 for(j=10;j>=3;j--)
371 {
372 // Alloc preferred register if available
373 if(hsn[r=cur->regmap[preferred_reg]&63]==j) {
374 for(hr=0;hr<HOST_REGS;hr++) {
375 // Evict both parts of a 64-bit register
376 if((cur->regmap[hr]&63)==r) {
377 cur->regmap[hr]=-1;
378 cur->dirty&=~(1<<hr);
379 cur->isconst&=~(1<<hr);
380 }
381 }
382 cur->regmap[preferred_reg]=reg;
383 return;
384 }
385 for(r=1;r<=MAXREG;r++)
386 {
387 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
388 for(hr=0;hr<HOST_REGS;hr++) {
389 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
390 if(cur->regmap[hr]==r+64) {
391 cur->regmap[hr]=reg;
392 cur->dirty&=~(1<<hr);
393 cur->isconst&=~(1<<hr);
394 return;
395 }
396 }
397 }
398 for(hr=0;hr<HOST_REGS;hr++) {
399 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
400 if(cur->regmap[hr]==r) {
401 cur->regmap[hr]=reg;
402 cur->dirty&=~(1<<hr);
403 cur->isconst&=~(1<<hr);
404 return;
405 }
406 }
407 }
408 }
409 }
410 }
411 }
412 for(j=10;j>=0;j--)
413 {
414 for(r=1;r<=MAXREG;r++)
415 {
416 if(hsn[r]==j) {
417 for(hr=0;hr<HOST_REGS;hr++) {
418 if(cur->regmap[hr]==r+64) {
419 cur->regmap[hr]=reg;
420 cur->dirty&=~(1<<hr);
421 cur->isconst&=~(1<<hr);
422 return;
423 }
424 }
425 for(hr=0;hr<HOST_REGS;hr++) {
426 if(cur->regmap[hr]==r) {
427 cur->regmap[hr]=reg;
428 cur->dirty&=~(1<<hr);
429 cur->isconst&=~(1<<hr);
430 return;
431 }
432 }
433 }
434 }
435 }
436 printf("This shouldn't happen (alloc_reg)");exit(1);
437}
438
439void alloc_reg64(struct regstat *cur,int i,signed char reg)
440{
441 int preferred_reg = 8+(reg&1);
442 int r,hr;
443
444 // allocate the lower 32 bits
445 alloc_reg(cur,i,reg);
446
447 // Don't allocate unused registers
448 if((cur->uu>>reg)&1) return;
449
450 // see if the upper half is already allocated
451 for(hr=0;hr<HOST_REGS;hr++)
452 {
453 if(cur->regmap[hr]==reg+64) return;
454 }
455
456 // Keep the same mapping if the register was already allocated in a loop
457 preferred_reg = loop_reg(i,reg,preferred_reg);
458
459 // Try to allocate the preferred register
460 if(cur->regmap[preferred_reg]==-1) {
461 cur->regmap[preferred_reg]=reg|64;
462 cur->dirty&=~(1<<preferred_reg);
463 cur->isconst&=~(1<<preferred_reg);
464 return;
465 }
466 r=cur->regmap[preferred_reg];
467 if(r<64&&((cur->u>>r)&1)) {
468 cur->regmap[preferred_reg]=reg|64;
469 cur->dirty&=~(1<<preferred_reg);
470 cur->isconst&=~(1<<preferred_reg);
471 return;
472 }
473 if(r>=64&&((cur->uu>>(r&63))&1)) {
474 cur->regmap[preferred_reg]=reg|64;
475 cur->dirty&=~(1<<preferred_reg);
476 cur->isconst&=~(1<<preferred_reg);
477 return;
478 }
479
480 // Clear any unneeded registers
481 // We try to keep the mapping consistent, if possible, because it
482 // makes branches easier (especially loops). So we try to allocate
483 // first (see above) before removing old mappings. If this is not
484 // possible then go ahead and clear out the registers that are no
485 // longer needed.
486 for(hr=HOST_REGS-1;hr>=0;hr--)
487 {
488 r=cur->regmap[hr];
489 if(r>=0) {
490 if(r<64) {
491 if((cur->u>>r)&1) {cur->regmap[hr]=-1;break;}
492 }
493 else
494 {
495 if((cur->uu>>(r&63))&1) {cur->regmap[hr]=-1;break;}
496 }
497 }
498 }
499 // Try to allocate any available register, but prefer
500 // registers that have not been used recently.
501 if(i>0) {
502 for(hr=0;hr<HOST_REGS;hr++) {
503 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
504 if(regs[i-1].regmap[hr]!=rs1[i-1]&&regs[i-1].regmap[hr]!=rs2[i-1]&&regs[i-1].regmap[hr]!=rt1[i-1]&&regs[i-1].regmap[hr]!=rt2[i-1]) {
505 cur->regmap[hr]=reg|64;
506 cur->dirty&=~(1<<hr);
507 cur->isconst&=~(1<<hr);
508 return;
509 }
510 }
511 }
512 }
513 // Try to allocate any available register
514 for(hr=0;hr<HOST_REGS;hr++) {
515 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
516 cur->regmap[hr]=reg|64;
517 cur->dirty&=~(1<<hr);
518 cur->isconst&=~(1<<hr);
519 return;
520 }
521 }
522
523 // Ok, now we have to evict someone
524 // Pick a register we hopefully won't need soon
525 u_char hsn[MAXREG+1];
526 memset(hsn,10,sizeof(hsn));
527 int j;
528 lsn(hsn,i,&preferred_reg);
529 //printf("eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",cur->regmap[0],cur->regmap[1],cur->regmap[2],cur->regmap[3],cur->regmap[5],cur->regmap[6],cur->regmap[7]);
530 //printf("hsn(%x): %d %d %d %d %d %d %d\n",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
531 if(i>0) {
532 // Don't evict the cycle count at entry points, otherwise the entry
533 // stub will have to write it.
534 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
535 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
536 for(j=10;j>=3;j--)
537 {
538 // Alloc preferred register if available
539 if(hsn[r=cur->regmap[preferred_reg]&63]==j) {
540 for(hr=0;hr<HOST_REGS;hr++) {
541 // Evict both parts of a 64-bit register
542 if((cur->regmap[hr]&63)==r) {
543 cur->regmap[hr]=-1;
544 cur->dirty&=~(1<<hr);
545 cur->isconst&=~(1<<hr);
546 }
547 }
548 cur->regmap[preferred_reg]=reg|64;
549 return;
550 }
551 for(r=1;r<=MAXREG;r++)
552 {
553 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
554 for(hr=0;hr<HOST_REGS;hr++) {
555 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
556 if(cur->regmap[hr]==r+64) {
557 cur->regmap[hr]=reg|64;
558 cur->dirty&=~(1<<hr);
559 cur->isconst&=~(1<<hr);
560 return;
561 }
562 }
563 }
564 for(hr=0;hr<HOST_REGS;hr++) {
565 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
566 if(cur->regmap[hr]==r) {
567 cur->regmap[hr]=reg|64;
568 cur->dirty&=~(1<<hr);
569 cur->isconst&=~(1<<hr);
570 return;
571 }
572 }
573 }
574 }
575 }
576 }
577 }
578 for(j=10;j>=0;j--)
579 {
580 for(r=1;r<=MAXREG;r++)
581 {
582 if(hsn[r]==j) {
583 for(hr=0;hr<HOST_REGS;hr++) {
584 if(cur->regmap[hr]==r+64) {
585 cur->regmap[hr]=reg|64;
586 cur->dirty&=~(1<<hr);
587 cur->isconst&=~(1<<hr);
588 return;
589 }
590 }
591 for(hr=0;hr<HOST_REGS;hr++) {
592 if(cur->regmap[hr]==r) {
593 cur->regmap[hr]=reg|64;
594 cur->dirty&=~(1<<hr);
595 cur->isconst&=~(1<<hr);
596 return;
597 }
598 }
599 }
600 }
601 }
602 printf("This shouldn't happen");exit(1);
603}
604
605// Allocate a temporary register. This is done without regard to
606// dirty status or whether the register we request is on the unneeded list
607// Note: This will only allocate one register, even if called multiple times
608void alloc_reg_temp(struct regstat *cur,int i,signed char reg)
609{
610 int r,hr;
611 int preferred_reg = -1;
612
613 // see if it's already allocated
614 for(hr=0;hr<HOST_REGS;hr++)
615 {
616 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==reg) return;
617 }
618
619 // Try to allocate any available register
620 for(hr=HOST_REGS-1;hr>=0;hr--) {
621 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
622 cur->regmap[hr]=reg;
623 cur->dirty&=~(1<<hr);
624 cur->isconst&=~(1<<hr);
625 return;
626 }
627 }
628
629 // Find an unneeded register
630 for(hr=HOST_REGS-1;hr>=0;hr--)
631 {
632 r=cur->regmap[hr];
633 if(r>=0) {
634 if(r<64) {
635 if((cur->u>>r)&1) {
636 if(i==0||((unneeded_reg[i-1]>>r)&1)) {
637 cur->regmap[hr]=reg;
638 cur->dirty&=~(1<<hr);
639 cur->isconst&=~(1<<hr);
640 return;
641 }
642 }
643 }
644 else
645 {
646 if((cur->uu>>(r&63))&1) {
647 if(i==0||((unneeded_reg_upper[i-1]>>(r&63))&1)) {
648 cur->regmap[hr]=reg;
649 cur->dirty&=~(1<<hr);
650 cur->isconst&=~(1<<hr);
651 return;
652 }
653 }
654 }
655 }
656 }
657
658 // Ok, now we have to evict someone
659 // Pick a register we hopefully won't need soon
660 // TODO: we might want to follow unconditional jumps here
661 // TODO: get rid of dupe code and make this into a function
662 u_char hsn[MAXREG+1];
663 memset(hsn,10,sizeof(hsn));
664 int j;
665 lsn(hsn,i,&preferred_reg);
666 //printf("hsn: %d %d %d %d %d %d %d\n",hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
667 if(i>0) {
668 // Don't evict the cycle count at entry points, otherwise the entry
669 // stub will have to write it.
670 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
671 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
672 for(j=10;j>=3;j--)
673 {
674 for(r=1;r<=MAXREG;r++)
675 {
676 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
677 for(hr=0;hr<HOST_REGS;hr++) {
678 if(hr!=HOST_CCREG||hsn[CCREG]>2) {
679 if(cur->regmap[hr]==r+64) {
680 cur->regmap[hr]=reg;
681 cur->dirty&=~(1<<hr);
682 cur->isconst&=~(1<<hr);
683 return;
684 }
685 }
686 }
687 for(hr=0;hr<HOST_REGS;hr++) {
688 if(hr!=HOST_CCREG||hsn[CCREG]>2) {
689 if(cur->regmap[hr]==r) {
690 cur->regmap[hr]=reg;
691 cur->dirty&=~(1<<hr);
692 cur->isconst&=~(1<<hr);
693 return;
694 }
695 }
696 }
697 }
698 }
699 }
700 }
701 for(j=10;j>=0;j--)
702 {
703 for(r=1;r<=MAXREG;r++)
704 {
705 if(hsn[r]==j) {
706 for(hr=0;hr<HOST_REGS;hr++) {
707 if(cur->regmap[hr]==r+64) {
708 cur->regmap[hr]=reg;
709 cur->dirty&=~(1<<hr);
710 cur->isconst&=~(1<<hr);
711 return;
712 }
713 }
714 for(hr=0;hr<HOST_REGS;hr++) {
715 if(cur->regmap[hr]==r) {
716 cur->regmap[hr]=reg;
717 cur->dirty&=~(1<<hr);
718 cur->isconst&=~(1<<hr);
719 return;
720 }
721 }
722 }
723 }
724 }
725 printf("This shouldn't happen");exit(1);
726}
727// Allocate a specific ARM register.
728void alloc_arm_reg(struct regstat *cur,int i,signed char reg,char hr)
729{
730 int n;
731
732 // see if it's already allocated (and dealloc it)
733 for(n=0;n<HOST_REGS;n++)
734 {
735 if(n!=EXCLUDE_REG&&cur->regmap[n]==reg) {cur->regmap[n]=-1;}
736 }
737
738 cur->regmap[hr]=reg;
739 cur->dirty&=~(1<<hr);
740 cur->isconst&=~(1<<hr);
741}
742
743// Alloc cycle count into dedicated register
744alloc_cc(struct regstat *cur,int i)
745{
746 alloc_arm_reg(cur,i,CCREG,HOST_CCREG);
747}
748
749/* Special alloc */
750
751
752/* Assembler */
753
754char regname[16][4] = {
755 "r0",
756 "r1",
757 "r2",
758 "r3",
759 "r4",
760 "r5",
761 "r6",
762 "r7",
763 "r8",
764 "r9",
765 "r10",
766 "fp",
767 "r12",
768 "sp",
769 "lr",
770 "pc"};
771
772void output_byte(u_char byte)
773{
774 *(out++)=byte;
775}
776void output_modrm(u_char mod,u_char rm,u_char ext)
777{
778 assert(mod<4);
779 assert(rm<8);
780 assert(ext<8);
781 u_char byte=(mod<<6)|(ext<<3)|rm;
782 *(out++)=byte;
783}
784void output_sib(u_char scale,u_char index,u_char base)
785{
786 assert(scale<4);
787 assert(index<8);
788 assert(base<8);
789 u_char byte=(scale<<6)|(index<<3)|base;
790 *(out++)=byte;
791}
792void output_w32(u_int word)
793{
794 *((u_int *)out)=word;
795 out+=4;
796}
797u_int rd_rn_rm(u_int rd, u_int rn, u_int rm)
798{
799 assert(rd<16);
800 assert(rn<16);
801 assert(rm<16);
802 return((rn<<16)|(rd<<12)|rm);
803}
804u_int rd_rn_imm_shift(u_int rd, u_int rn, u_int imm, u_int shift)
805{
806 assert(rd<16);
807 assert(rn<16);
808 assert(imm<256);
809 assert((shift&1)==0);
810 return((rn<<16)|(rd<<12)|(((32-shift)&30)<<7)|imm);
811}
812u_int genimm(u_int imm,u_int *encoded)
813{
814 if(imm==0) {*encoded=0;return 1;}
815 int i=32;
816 while(i>0)
817 {
818 if(imm<256) {
819 *encoded=((i&30)<<7)|imm;
820 return 1;
821 }
822 imm=(imm>>2)|(imm<<30);i-=2;
823 }
824 return 0;
825}
826u_int genjmp(u_int addr)
827{
828 int offset=addr-(int)out-8;
829 if(offset<-33554432||offset>=33554432) return 0;
830 return ((u_int)offset>>2)&0xffffff;
831}
832
833void emit_mov(int rs,int rt)
834{
835 assem_debug("mov %s,%s\n",regname[rt],regname[rs]);
836 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs));
837}
838
839void emit_movs(int rs,int rt)
840{
841 assem_debug("movs %s,%s\n",regname[rt],regname[rs]);
842 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs));
843}
844
845void emit_add(int rs1,int rs2,int rt)
846{
847 assem_debug("add %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
848 output_w32(0xe0800000|rd_rn_rm(rt,rs1,rs2));
849}
850
851void emit_adds(int rs1,int rs2,int rt)
852{
853 assem_debug("adds %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
854 output_w32(0xe0900000|rd_rn_rm(rt,rs1,rs2));
855}
856
857void emit_adcs(int rs1,int rs2,int rt)
858{
859 assem_debug("adcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
860 output_w32(0xe0b00000|rd_rn_rm(rt,rs1,rs2));
861}
862
863void emit_sbc(int rs1,int rs2,int rt)
864{
865 assem_debug("sbc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
866 output_w32(0xe0c00000|rd_rn_rm(rt,rs1,rs2));
867}
868
869void emit_sbcs(int rs1,int rs2,int rt)
870{
871 assem_debug("sbcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
872 output_w32(0xe0d00000|rd_rn_rm(rt,rs1,rs2));
873}
874
875void emit_neg(int rs, int rt)
876{
877 assem_debug("rsb %s,%s,#0\n",regname[rt],regname[rs]);
878 output_w32(0xe2600000|rd_rn_rm(rt,rs,0));
879}
880
881void emit_negs(int rs, int rt)
882{
883 assem_debug("rsbs %s,%s,#0\n",regname[rt],regname[rs]);
884 output_w32(0xe2700000|rd_rn_rm(rt,rs,0));
885}
886
887void emit_sub(int rs1,int rs2,int rt)
888{
889 assem_debug("sub %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
890 output_w32(0xe0400000|rd_rn_rm(rt,rs1,rs2));
891}
892
893void emit_subs(int rs1,int rs2,int rt)
894{
895 assem_debug("subs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
896 output_w32(0xe0500000|rd_rn_rm(rt,rs1,rs2));
897}
898
899void emit_zeroreg(int rt)
900{
901 assem_debug("mov %s,#0\n",regname[rt]);
902 output_w32(0xe3a00000|rd_rn_rm(rt,0,0));
903}
904
905void emit_loadreg(int r, int hr)
906{
3d624f89 907#ifdef FORCE32
908 if(r&64) {
909 printf("64bit load in 32bit mode!\n");
910 exit(1);
911 }
912#endif
57871462 913 if((r&63)==0)
914 emit_zeroreg(hr);
915 else {
3d624f89 916 int addr=((int)reg)+((r&63)<<REG_SHIFT)+((r&64)>>4);
57871462 917 if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4);
918 if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4);
919 if(r==CCREG) addr=(int)&cycle_count;
920 if(r==CSREG) addr=(int)&Status;
921 if(r==FSREG) addr=(int)&FCR31;
922 if(r==INVCP) addr=(int)&invc_ptr;
923 u_int offset = addr-(u_int)&dynarec_local;
924 assert(offset<4096);
925 assem_debug("ldr %s,fp+%d\n",regname[hr],offset);
926 output_w32(0xe5900000|rd_rn_rm(hr,FP,0)|offset);
927 }
928}
929void emit_storereg(int r, int hr)
930{
3d624f89 931#ifdef FORCE32
932 if(r&64) {
933 printf("64bit store in 32bit mode!\n");
934 exit(1);
935 }
936#endif
937 int addr=((int)reg)+((r&63)<<REG_SHIFT)+((r&64)>>4);
57871462 938 if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4);
939 if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4);
940 if(r==CCREG) addr=(int)&cycle_count;
941 if(r==FSREG) addr=(int)&FCR31;
942 u_int offset = addr-(u_int)&dynarec_local;
943 assert(offset<4096);
944 assem_debug("str %s,fp+%d\n",regname[hr],offset);
945 output_w32(0xe5800000|rd_rn_rm(hr,FP,0)|offset);
946}
947
948void emit_test(int rs, int rt)
949{
950 assem_debug("tst %s,%s\n",regname[rs],regname[rt]);
951 output_w32(0xe1100000|rd_rn_rm(0,rs,rt));
952}
953
954void emit_testimm(int rs,int imm)
955{
956 u_int armval;
957 assem_debug("tst %s,$%d\n",regname[rs],imm);
958 assert(genimm(imm,&armval));
959 output_w32(0xe3100000|rd_rn_rm(0,rs,0)|armval);
960}
961
962void emit_not(int rs,int rt)
963{
964 assem_debug("mvn %s,%s\n",regname[rt],regname[rs]);
965 output_w32(0xe1e00000|rd_rn_rm(rt,0,rs));
966}
967
968void emit_and(u_int rs1,u_int rs2,u_int rt)
969{
970 assem_debug("and %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
971 output_w32(0xe0000000|rd_rn_rm(rt,rs1,rs2));
972}
973
974void emit_or(u_int rs1,u_int rs2,u_int rt)
975{
976 assem_debug("orr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
977 output_w32(0xe1800000|rd_rn_rm(rt,rs1,rs2));
978}
979void emit_or_and_set_flags(int rs1,int rs2,int rt)
980{
981 assem_debug("orrs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
982 output_w32(0xe1900000|rd_rn_rm(rt,rs1,rs2));
983}
984
985void emit_xor(u_int rs1,u_int rs2,u_int rt)
986{
987 assem_debug("eor %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
988 output_w32(0xe0200000|rd_rn_rm(rt,rs1,rs2));
989}
990
991void emit_loadlp(u_int imm,u_int rt)
992{
993 add_literal((int)out,imm);
994 assem_debug("ldr %s,pc+? [=%x]\n",regname[rt],imm);
995 output_w32(0xe5900000|rd_rn_rm(rt,15,0));
996}
997void emit_movw(u_int imm,u_int rt)
998{
999 assert(imm<65536);
1000 assem_debug("movw %s,#%d (0x%x)\n",regname[rt],imm,imm);
1001 output_w32(0xe3000000|rd_rn_rm(rt,0,0)|(imm&0xfff)|((imm<<4)&0xf0000));
1002}
1003void emit_movt(u_int imm,u_int rt)
1004{
1005 assem_debug("movt %s,#%d (0x%x)\n",regname[rt],imm&0xffff0000,imm&0xffff0000);
1006 output_w32(0xe3400000|rd_rn_rm(rt,0,0)|((imm>>16)&0xfff)|((imm>>12)&0xf0000));
1007}
1008void emit_movimm(u_int imm,u_int rt)
1009{
1010 u_int armval;
1011 if(genimm(imm,&armval)) {
1012 assem_debug("mov %s,#%d\n",regname[rt],imm);
1013 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
1014 }else if(genimm(~imm,&armval)) {
1015 assem_debug("mvn %s,#%d\n",regname[rt],imm);
1016 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
1017 }else if(imm<65536) {
1018 #ifdef ARMv5_ONLY
1019 assem_debug("mov %s,#%d\n",regname[rt],imm&0xFF00);
1020 output_w32(0xe3a00000|rd_rn_imm_shift(rt,0,imm>>8,8));
1021 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
1022 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1023 #else
1024 emit_movw(imm,rt);
1025 #endif
1026 }else{
1027 #ifdef ARMv5_ONLY
1028 emit_loadlp(imm,rt);
1029 #else
1030 emit_movw(imm&0x0000FFFF,rt);
1031 emit_movt(imm&0xFFFF0000,rt);
1032 #endif
1033 }
1034}
1035void emit_pcreladdr(u_int rt)
1036{
1037 assem_debug("add %s,pc,#?\n",regname[rt]);
1038 output_w32(0xe2800000|rd_rn_rm(rt,15,0));
1039}
1040
1041void emit_addimm(u_int rs,int imm,u_int rt)
1042{
1043 assert(rs<16);
1044 assert(rt<16);
1045 if(imm!=0) {
1046 assert(imm>-65536&&imm<65536);
1047 u_int armval;
1048 if(genimm(imm,&armval)) {
1049 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],imm);
1050 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
1051 }else if(genimm(-imm,&armval)) {
1052 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],imm);
1053 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
1054 }else if(imm<0) {
1055 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],(-imm)&0xFF00);
1056 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
1057 output_w32(0xe2400000|rd_rn_imm_shift(rt,rs,(-imm)>>8,8));
1058 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
1059 }else{
1060 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1061 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
1062 output_w32(0xe2800000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1063 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1064 }
1065 }
1066 else if(rs!=rt) emit_mov(rs,rt);
1067}
1068
1069void emit_addimm_and_set_flags(int imm,int rt)
1070{
1071 assert(imm>-65536&&imm<65536);
1072 u_int armval;
1073 if(genimm(imm,&armval)) {
1074 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm);
1075 output_w32(0xe2900000|rd_rn_rm(rt,rt,0)|armval);
1076 }else if(genimm(-imm,&armval)) {
1077 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],imm);
1078 output_w32(0xe2500000|rd_rn_rm(rt,rt,0)|armval);
1079 }else if(imm<0) {
1080 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF00);
1081 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
1082 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)>>8,8));
1083 output_w32(0xe2500000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
1084 }else{
1085 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF00);
1086 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
1087 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm>>8,8));
1088 output_w32(0xe2900000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1089 }
1090}
1091void emit_addimm_no_flags(u_int imm,u_int rt)
1092{
1093 emit_addimm(rt,imm,rt);
1094}
1095
1096void emit_addnop(u_int r)
1097{
1098 assert(r<16);
1099 assem_debug("add %s,%s,#0 (nop)\n",regname[r],regname[r]);
1100 output_w32(0xe2800000|rd_rn_rm(r,r,0));
1101}
1102
1103void emit_adcimm(u_int rs,int imm,u_int rt)
1104{
1105 u_int armval;
1106 assert(genimm(imm,&armval));
1107 assem_debug("adc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1108 output_w32(0xe2a00000|rd_rn_rm(rt,rs,0)|armval);
1109}
1110/*void emit_sbcimm(int imm,u_int rt)
1111{
1112 u_int armval;
1113 assert(genimm(imm,&armval));
1114 assem_debug("sbc %s,%s,#%d\n",regname[rt],regname[rt],imm);
1115 output_w32(0xe2c00000|rd_rn_rm(rt,rt,0)|armval);
1116}*/
1117void emit_sbbimm(int imm,u_int rt)
1118{
1119 assem_debug("sbb $%d,%%%s\n",imm,regname[rt]);
1120 assert(rt<8);
1121 if(imm<128&&imm>=-128) {
1122 output_byte(0x83);
1123 output_modrm(3,rt,3);
1124 output_byte(imm);
1125 }
1126 else
1127 {
1128 output_byte(0x81);
1129 output_modrm(3,rt,3);
1130 output_w32(imm);
1131 }
1132}
1133void emit_rscimm(int rs,int imm,u_int rt)
1134{
1135 assert(0);
1136 u_int armval;
1137 assert(genimm(imm,&armval));
1138 assem_debug("rsc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1139 output_w32(0xe2e00000|rd_rn_rm(rt,rs,0)|armval);
1140}
1141
1142void emit_addimm64_32(int rsh,int rsl,int imm,int rth,int rtl)
1143{
1144 // TODO: if(genimm(imm,&armval)) ...
1145 // else
1146 emit_movimm(imm,HOST_TEMPREG);
1147 emit_adds(HOST_TEMPREG,rsl,rtl);
1148 emit_adcimm(rsh,0,rth);
1149}
1150
1151void emit_sbb(int rs1,int rs2)
1152{
1153 assem_debug("sbb %%%s,%%%s\n",regname[rs2],regname[rs1]);
1154 output_byte(0x19);
1155 output_modrm(3,rs1,rs2);
1156}
1157
1158void emit_andimm(int rs,int imm,int rt)
1159{
1160 u_int armval;
1161 if(genimm(imm,&armval)) {
1162 assem_debug("and %s,%s,#%d\n",regname[rt],regname[rs],imm);
1163 output_w32(0xe2000000|rd_rn_rm(rt,rs,0)|armval);
1164 }else if(genimm(~imm,&armval)) {
1165 assem_debug("bic %s,%s,#%d\n",regname[rt],regname[rs],imm);
1166 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|armval);
1167 }else if(imm==65535) {
1168 #ifdef ARMv5_ONLY
1169 assem_debug("bic %s,%s,#FF000000\n",regname[rt],regname[rs]);
1170 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|0x4FF);
1171 assem_debug("bic %s,%s,#00FF0000\n",regname[rt],regname[rt]);
1172 output_w32(0xe3c00000|rd_rn_rm(rt,rt,0)|0x8FF);
1173 #else
1174 assem_debug("uxth %s,%s\n",regname[rt],regname[rs]);
1175 output_w32(0xe6ff0070|rd_rn_rm(rt,0,rs));
1176 #endif
1177 }else{
1178 assert(imm>0&&imm<65535);
1179 #ifdef ARMv5_ONLY
1180 assem_debug("mov r14,#%d\n",imm&0xFF00);
1181 output_w32(0xe3a00000|rd_rn_imm_shift(HOST_TEMPREG,0,imm>>8,8));
1182 assem_debug("add r14,r14,#%d\n",imm&0xFF);
1183 output_w32(0xe2800000|rd_rn_imm_shift(HOST_TEMPREG,HOST_TEMPREG,imm&0xff,0));
1184 #else
1185 emit_movw(imm,HOST_TEMPREG);
1186 #endif
1187 assem_debug("and %s,%s,r14\n",regname[rt],regname[rs]);
1188 output_w32(0xe0000000|rd_rn_rm(rt,rs,HOST_TEMPREG));
1189 }
1190}
1191
1192void emit_orimm(int rs,int imm,int rt)
1193{
1194 u_int armval;
1195 if(genimm(imm,&armval)) {
1196 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1197 output_w32(0xe3800000|rd_rn_rm(rt,rs,0)|armval);
1198 }else{
1199 assert(imm>0&&imm<65536);
1200 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1201 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
1202 output_w32(0xe3800000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1203 output_w32(0xe3800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1204 }
1205}
1206
1207void emit_xorimm(int rs,int imm,int rt)
1208{
1209 assert(imm>0&&imm<65536);
1210 u_int armval;
1211 if(genimm(imm,&armval)) {
1212 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm);
1213 output_w32(0xe2200000|rd_rn_rm(rt,rs,0)|armval);
1214 }else{
1215 assert(imm>0);
1216 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1217 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
1218 output_w32(0xe2200000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1219 output_w32(0xe2200000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1220 }
1221}
1222
1223void emit_shlimm(int rs,u_int imm,int rt)
1224{
1225 assert(imm>0);
1226 assert(imm<32);
1227 //if(imm==1) ...
1228 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1229 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1230}
1231
1232void emit_shrimm(int rs,u_int imm,int rt)
1233{
1234 assert(imm>0);
1235 assert(imm<32);
1236 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1237 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
1238}
1239
1240void emit_sarimm(int rs,u_int imm,int rt)
1241{
1242 assert(imm>0);
1243 assert(imm<32);
1244 assem_debug("asr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1245 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x40|(imm<<7));
1246}
1247
1248void emit_rorimm(int rs,u_int imm,int rt)
1249{
1250 assert(imm>0);
1251 assert(imm<32);
1252 assem_debug("ror %s,%s,#%d\n",regname[rt],regname[rs],imm);
1253 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x60|(imm<<7));
1254}
1255
1256void emit_shldimm(int rs,int rs2,u_int imm,int rt)
1257{
1258 assem_debug("shld %%%s,%%%s,%d\n",regname[rt],regname[rs2],imm);
1259 assert(imm>0);
1260 assert(imm<32);
1261 //if(imm==1) ...
1262 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1263 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1264 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs2],32-imm);
1265 output_w32(0xe1800020|rd_rn_rm(rt,rt,rs2)|((32-imm)<<7));
1266}
1267
1268void emit_shrdimm(int rs,int rs2,u_int imm,int rt)
1269{
1270 assem_debug("shrd %%%s,%%%s,%d\n",regname[rt],regname[rs2],imm);
1271 assert(imm>0);
1272 assert(imm<32);
1273 //if(imm==1) ...
1274 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1275 output_w32(0xe1a00020|rd_rn_rm(rt,0,rs)|(imm<<7));
1276 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs2],32-imm);
1277 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs2)|((32-imm)<<7));
1278}
1279
1280void emit_shl(u_int rs,u_int shift,u_int rt)
1281{
1282 assert(rs<16);
1283 assert(rt<16);
1284 assert(shift<16);
1285 //if(imm==1) ...
1286 assem_debug("lsl %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1287 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x10|(shift<<8));
1288}
1289void emit_shr(u_int rs,u_int shift,u_int rt)
1290{
1291 assert(rs<16);
1292 assert(rt<16);
1293 assert(shift<16);
1294 assem_debug("lsr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1295 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x30|(shift<<8));
1296}
1297void emit_sar(u_int rs,u_int shift,u_int rt)
1298{
1299 assert(rs<16);
1300 assert(rt<16);
1301 assert(shift<16);
1302 assem_debug("asr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1303 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x50|(shift<<8));
1304}
1305void emit_shlcl(int r)
1306{
1307 assem_debug("shl %%%s,%%cl\n",regname[r]);
1308 assert(0);
1309}
1310void emit_shrcl(int r)
1311{
1312 assem_debug("shr %%%s,%%cl\n",regname[r]);
1313 assert(0);
1314}
1315void emit_sarcl(int r)
1316{
1317 assem_debug("sar %%%s,%%cl\n",regname[r]);
1318 assert(0);
1319}
1320
1321void emit_shldcl(int r1,int r2)
1322{
1323 assem_debug("shld %%%s,%%%s,%%cl\n",regname[r1],regname[r2]);
1324 assert(0);
1325}
1326void emit_shrdcl(int r1,int r2)
1327{
1328 assem_debug("shrd %%%s,%%%s,%%cl\n",regname[r1],regname[r2]);
1329 assert(0);
1330}
1331void emit_orrshl(u_int rs,u_int shift,u_int rt)
1332{
1333 assert(rs<16);
1334 assert(rt<16);
1335 assert(shift<16);
1336 assem_debug("orr %s,%s,%s,lsl %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
1337 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x10|(shift<<8));
1338}
1339void emit_orrshr(u_int rs,u_int shift,u_int rt)
1340{
1341 assert(rs<16);
1342 assert(rt<16);
1343 assert(shift<16);
1344 assem_debug("orr %s,%s,%s,lsr %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
1345 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x30|(shift<<8));
1346}
1347
1348void emit_cmpimm(int rs,int imm)
1349{
1350 u_int armval;
1351 if(genimm(imm,&armval)) {
1352 assem_debug("cmp %s,$%d\n",regname[rs],imm);
1353 output_w32(0xe3500000|rd_rn_rm(0,rs,0)|armval);
1354 }else if(genimm(-imm,&armval)) {
1355 assem_debug("cmn %s,$%d\n",regname[rs],imm);
1356 output_w32(0xe3700000|rd_rn_rm(0,rs,0)|armval);
1357 }else if(imm>0) {
1358 assert(imm<65536);
1359 #ifdef ARMv5_ONLY
1360 emit_movimm(imm,HOST_TEMPREG);
1361 #else
1362 emit_movw(imm,HOST_TEMPREG);
1363 #endif
1364 assem_debug("cmp %s,r14\n",regname[rs]);
1365 output_w32(0xe1500000|rd_rn_rm(0,rs,HOST_TEMPREG));
1366 }else{
1367 assert(imm>-65536);
1368 #ifdef ARMv5_ONLY
1369 emit_movimm(-imm,HOST_TEMPREG);
1370 #else
1371 emit_movw(-imm,HOST_TEMPREG);
1372 #endif
1373 assem_debug("cmn %s,r14\n",regname[rs]);
1374 output_w32(0xe1700000|rd_rn_rm(0,rs,HOST_TEMPREG));
1375 }
1376}
1377
1378void emit_cmovne(u_int *addr,int rt)
1379{
1380 assem_debug("cmovne %x,%%%s",(int)addr,regname[rt]);
1381 assert(0);
1382}
1383void emit_cmovl(u_int *addr,int rt)
1384{
1385 assem_debug("cmovl %x,%%%s",(int)addr,regname[rt]);
1386 assert(0);
1387}
1388void emit_cmovs(u_int *addr,int rt)
1389{
1390 assem_debug("cmovs %x,%%%s",(int)addr,regname[rt]);
1391 assert(0);
1392}
1393void emit_cmovne_imm(int imm,int rt)
1394{
1395 assem_debug("movne %s,#%d\n",regname[rt],imm);
1396 u_int armval;
1397 assert(genimm(imm,&armval));
1398 output_w32(0x13a00000|rd_rn_rm(rt,0,0)|armval);
1399}
1400void emit_cmovl_imm(int imm,int rt)
1401{
1402 assem_debug("movlt %s,#%d\n",regname[rt],imm);
1403 u_int armval;
1404 assert(genimm(imm,&armval));
1405 output_w32(0xb3a00000|rd_rn_rm(rt,0,0)|armval);
1406}
1407void emit_cmovb_imm(int imm,int rt)
1408{
1409 assem_debug("movcc %s,#%d\n",regname[rt],imm);
1410 u_int armval;
1411 assert(genimm(imm,&armval));
1412 output_w32(0x33a00000|rd_rn_rm(rt,0,0)|armval);
1413}
1414void emit_cmovs_imm(int imm,int rt)
1415{
1416 assem_debug("movmi %s,#%d\n",regname[rt],imm);
1417 u_int armval;
1418 assert(genimm(imm,&armval));
1419 output_w32(0x43a00000|rd_rn_rm(rt,0,0)|armval);
1420}
1421void emit_cmove_reg(int rs,int rt)
1422{
1423 assem_debug("moveq %s,%s\n",regname[rt],regname[rs]);
1424 output_w32(0x01a00000|rd_rn_rm(rt,0,rs));
1425}
1426void emit_cmovne_reg(int rs,int rt)
1427{
1428 assem_debug("movne %s,%s\n",regname[rt],regname[rs]);
1429 output_w32(0x11a00000|rd_rn_rm(rt,0,rs));
1430}
1431void emit_cmovl_reg(int rs,int rt)
1432{
1433 assem_debug("movlt %s,%s\n",regname[rt],regname[rs]);
1434 output_w32(0xb1a00000|rd_rn_rm(rt,0,rs));
1435}
1436void emit_cmovs_reg(int rs,int rt)
1437{
1438 assem_debug("movmi %s,%s\n",regname[rt],regname[rs]);
1439 output_w32(0x41a00000|rd_rn_rm(rt,0,rs));
1440}
1441
1442void emit_slti32(int rs,int imm,int rt)
1443{
1444 if(rs!=rt) emit_zeroreg(rt);
1445 emit_cmpimm(rs,imm);
1446 if(rs==rt) emit_movimm(0,rt);
1447 emit_cmovl_imm(1,rt);
1448}
1449void emit_sltiu32(int rs,int imm,int rt)
1450{
1451 if(rs!=rt) emit_zeroreg(rt);
1452 emit_cmpimm(rs,imm);
1453 if(rs==rt) emit_movimm(0,rt);
1454 emit_cmovb_imm(1,rt);
1455}
1456void emit_slti64_32(int rsh,int rsl,int imm,int rt)
1457{
1458 assert(rsh!=rt);
1459 emit_slti32(rsl,imm,rt);
1460 if(imm>=0)
1461 {
1462 emit_test(rsh,rsh);
1463 emit_cmovne_imm(0,rt);
1464 emit_cmovs_imm(1,rt);
1465 }
1466 else
1467 {
1468 emit_cmpimm(rsh,-1);
1469 emit_cmovne_imm(0,rt);
1470 emit_cmovl_imm(1,rt);
1471 }
1472}
1473void emit_sltiu64_32(int rsh,int rsl,int imm,int rt)
1474{
1475 assert(rsh!=rt);
1476 emit_sltiu32(rsl,imm,rt);
1477 if(imm>=0)
1478 {
1479 emit_test(rsh,rsh);
1480 emit_cmovne_imm(0,rt);
1481 }
1482 else
1483 {
1484 emit_cmpimm(rsh,-1);
1485 emit_cmovne_imm(1,rt);
1486 }
1487}
1488
1489void emit_cmp(int rs,int rt)
1490{
1491 assem_debug("cmp %s,%s\n",regname[rs],regname[rt]);
1492 output_w32(0xe1500000|rd_rn_rm(0,rs,rt));
1493}
1494void emit_set_gz32(int rs, int rt)
1495{
1496 //assem_debug("set_gz32\n");
1497 emit_cmpimm(rs,1);
1498 emit_movimm(1,rt);
1499 emit_cmovl_imm(0,rt);
1500}
1501void emit_set_nz32(int rs, int rt)
1502{
1503 //assem_debug("set_nz32\n");
1504 if(rs!=rt) emit_movs(rs,rt);
1505 else emit_test(rs,rs);
1506 emit_cmovne_imm(1,rt);
1507}
1508void emit_set_gz64_32(int rsh, int rsl, int rt)
1509{
1510 //assem_debug("set_gz64\n");
1511 emit_set_gz32(rsl,rt);
1512 emit_test(rsh,rsh);
1513 emit_cmovne_imm(1,rt);
1514 emit_cmovs_imm(0,rt);
1515}
1516void emit_set_nz64_32(int rsh, int rsl, int rt)
1517{
1518 //assem_debug("set_nz64\n");
1519 emit_or_and_set_flags(rsh,rsl,rt);
1520 emit_cmovne_imm(1,rt);
1521}
1522void emit_set_if_less32(int rs1, int rs2, int rt)
1523{
1524 //assem_debug("set if less (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1525 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1526 emit_cmp(rs1,rs2);
1527 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1528 emit_cmovl_imm(1,rt);
1529}
1530void emit_set_if_carry32(int rs1, int rs2, int rt)
1531{
1532 //assem_debug("set if carry (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1533 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1534 emit_cmp(rs1,rs2);
1535 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1536 emit_cmovb_imm(1,rt);
1537}
1538void emit_set_if_less64_32(int u1, int l1, int u2, int l2, int rt)
1539{
1540 //assem_debug("set if less64 (%%%s,%%%s,%%%s,%%%s),%%%s\n",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]);
1541 assert(u1!=rt);
1542 assert(u2!=rt);
1543 emit_cmp(l1,l2);
1544 emit_movimm(0,rt);
1545 emit_sbcs(u1,u2,HOST_TEMPREG);
1546 emit_cmovl_imm(1,rt);
1547}
1548void emit_set_if_carry64_32(int u1, int l1, int u2, int l2, int rt)
1549{
1550 //assem_debug("set if carry64 (%%%s,%%%s,%%%s,%%%s),%%%s\n",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]);
1551 assert(u1!=rt);
1552 assert(u2!=rt);
1553 emit_cmp(l1,l2);
1554 emit_movimm(0,rt);
1555 emit_sbcs(u1,u2,HOST_TEMPREG);
1556 emit_cmovb_imm(1,rt);
1557}
1558
1559void emit_call(int a)
1560{
1561 assem_debug("bl %x (%x+%x)\n",a,(int)out,a-(int)out-8);
1562 u_int offset=genjmp(a);
1563 output_w32(0xeb000000|offset);
1564}
1565void emit_jmp(int a)
1566{
1567 assem_debug("b %x (%x+%x)\n",a,(int)out,a-(int)out-8);
1568 u_int offset=genjmp(a);
1569 output_w32(0xea000000|offset);
1570}
1571void emit_jne(int a)
1572{
1573 assem_debug("bne %x\n",a);
1574 u_int offset=genjmp(a);
1575 output_w32(0x1a000000|offset);
1576}
1577void emit_jeq(int a)
1578{
1579 assem_debug("beq %x\n",a);
1580 u_int offset=genjmp(a);
1581 output_w32(0x0a000000|offset);
1582}
1583void emit_js(int a)
1584{
1585 assem_debug("bmi %x\n",a);
1586 u_int offset=genjmp(a);
1587 output_w32(0x4a000000|offset);
1588}
1589void emit_jns(int a)
1590{
1591 assem_debug("bpl %x\n",a);
1592 u_int offset=genjmp(a);
1593 output_w32(0x5a000000|offset);
1594}
1595void emit_jl(int a)
1596{
1597 assem_debug("blt %x\n",a);
1598 u_int offset=genjmp(a);
1599 output_w32(0xba000000|offset);
1600}
1601void emit_jge(int a)
1602{
1603 assem_debug("bge %x\n",a);
1604 u_int offset=genjmp(a);
1605 output_w32(0xaa000000|offset);
1606}
1607void emit_jno(int a)
1608{
1609 assem_debug("bvc %x\n",a);
1610 u_int offset=genjmp(a);
1611 output_w32(0x7a000000|offset);
1612}
1613void emit_jc(int a)
1614{
1615 assem_debug("bcs %x\n",a);
1616 u_int offset=genjmp(a);
1617 output_w32(0x2a000000|offset);
1618}
1619void emit_jcc(int a)
1620{
1621 assem_debug("bcc %x\n",a);
1622 u_int offset=genjmp(a);
1623 output_w32(0x3a000000|offset);
1624}
1625
1626void emit_pushimm(int imm)
1627{
1628 assem_debug("push $%x\n",imm);
1629 assert(0);
1630}
1631void emit_pusha()
1632{
1633 assem_debug("pusha\n");
1634 assert(0);
1635}
1636void emit_popa()
1637{
1638 assem_debug("popa\n");
1639 assert(0);
1640}
1641void emit_pushreg(u_int r)
1642{
1643 assem_debug("push %%%s\n",regname[r]);
1644 assert(0);
1645}
1646void emit_popreg(u_int r)
1647{
1648 assem_debug("pop %%%s\n",regname[r]);
1649 assert(0);
1650}
1651void emit_callreg(u_int r)
1652{
1653 assem_debug("call *%%%s\n",regname[r]);
1654 assert(0);
1655}
1656void emit_jmpreg(u_int r)
1657{
1658 assem_debug("mov pc,%s\n",regname[r]);
1659 output_w32(0xe1a00000|rd_rn_rm(15,0,r));
1660}
1661
1662void emit_readword_indexed(int offset, int rs, int rt)
1663{
1664 assert(offset>-4096&&offset<4096);
1665 assem_debug("ldr %s,%s+%d\n",regname[rt],regname[rs],offset);
1666 if(offset>=0) {
1667 output_w32(0xe5900000|rd_rn_rm(rt,rs,0)|offset);
1668 }else{
1669 output_w32(0xe5100000|rd_rn_rm(rt,rs,0)|(-offset));
1670 }
1671}
1672void emit_readword_dualindexedx4(int rs1, int rs2, int rt)
1673{
1674 assem_debug("ldr %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1675 output_w32(0xe7900000|rd_rn_rm(rt,rs1,rs2)|0x100);
1676}
1677void emit_readword_indexed_tlb(int addr, int rs, int map, int rt)
1678{
1679 if(map<0) emit_readword_indexed(addr, rs, rt);
1680 else {
1681 assert(addr==0);
1682 emit_readword_dualindexedx4(rs, map, rt);
1683 }
1684}
1685void emit_readdword_indexed_tlb(int addr, int rs, int map, int rh, int rl)
1686{
1687 if(map<0) {
1688 if(rh>=0) emit_readword_indexed(addr, rs, rh);
1689 emit_readword_indexed(addr+4, rs, rl);
1690 }else{
1691 assert(rh!=rs);
1692 if(rh>=0) emit_readword_indexed_tlb(addr, rs, map, rh);
1693 emit_addimm(map,1,map);
1694 emit_readword_indexed_tlb(addr, rs, map, rl);
1695 }
1696}
1697void emit_movsbl_indexed(int offset, int rs, int rt)
1698{
1699 assert(offset>-256&&offset<256);
1700 assem_debug("ldrsb %s,%s+%d\n",regname[rt],regname[rs],offset);
1701 if(offset>=0) {
1702 output_w32(0xe1d000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1703 }else{
1704 output_w32(0xe15000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1705 }
1706}
1707void emit_movsbl_indexed_tlb(int addr, int rs, int map, int rt)
1708{
1709 if(map<0) emit_movsbl_indexed(addr, rs, rt);
1710 else {
1711 if(addr==0) {
1712 emit_shlimm(map,2,map);
1713 assem_debug("ldrsb %s,%s+%s\n",regname[rt],regname[rs],regname[map]);
1714 output_w32(0xe19000d0|rd_rn_rm(rt,rs,map));
1715 }else{
1716 assert(addr>-256&&addr<256);
1717 assem_debug("add %s,%s,%s,lsl #2\n",regname[rt],regname[rs],regname[map]);
1718 output_w32(0xe0800000|rd_rn_rm(rt,rs,map)|(2<<7));
1719 emit_movsbl_indexed(addr, rt, rt);
1720 }
1721 }
1722}
1723void emit_movswl_indexed(int offset, int rs, int rt)
1724{
1725 assert(offset>-256&&offset<256);
1726 assem_debug("ldrsh %s,%s+%d\n",regname[rt],regname[rs],offset);
1727 if(offset>=0) {
1728 output_w32(0xe1d000f0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1729 }else{
1730 output_w32(0xe15000f0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1731 }
1732}
1733void emit_movzbl_indexed(int offset, int rs, int rt)
1734{
1735 assert(offset>-4096&&offset<4096);
1736 assem_debug("ldrb %s,%s+%d\n",regname[rt],regname[rs],offset);
1737 if(offset>=0) {
1738 output_w32(0xe5d00000|rd_rn_rm(rt,rs,0)|offset);
1739 }else{
1740 output_w32(0xe5500000|rd_rn_rm(rt,rs,0)|(-offset));
1741 }
1742}
1743void emit_movzbl_dualindexedx4(int rs1, int rs2, int rt)
1744{
1745 assem_debug("ldrb %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1746 output_w32(0xe7d00000|rd_rn_rm(rt,rs1,rs2)|0x100);
1747}
1748void emit_movzbl_indexed_tlb(int addr, int rs, int map, int rt)
1749{
1750 if(map<0) emit_movzbl_indexed(addr, rs, rt);
1751 else {
1752 if(addr==0) {
1753 emit_movzbl_dualindexedx4(rs, map, rt);
1754 }else{
1755 emit_addimm(rs,addr,rt);
1756 emit_movzbl_dualindexedx4(rt, map, rt);
1757 }
1758 }
1759}
1760void emit_movzwl_indexed(int offset, int rs, int rt)
1761{
1762 assert(offset>-256&&offset<256);
1763 assem_debug("ldrh %s,%s+%d\n",regname[rt],regname[rs],offset);
1764 if(offset>=0) {
1765 output_w32(0xe1d000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1766 }else{
1767 output_w32(0xe15000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1768 }
1769}
1770void emit_readword(int addr, int rt)
1771{
1772 u_int offset = addr-(u_int)&dynarec_local;
1773 assert(offset<4096);
1774 assem_debug("ldr %s,fp+%d\n",regname[rt],offset);
1775 output_w32(0xe5900000|rd_rn_rm(rt,FP,0)|offset);
1776}
1777void emit_movsbl(int addr, int rt)
1778{
1779 u_int offset = addr-(u_int)&dynarec_local;
1780 assert(offset<256);
1781 assem_debug("ldrsb %s,fp+%d\n",regname[rt],offset);
1782 output_w32(0xe1d000d0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1783}
1784void emit_movswl(int addr, int rt)
1785{
1786 u_int offset = addr-(u_int)&dynarec_local;
1787 assert(offset<256);
1788 assem_debug("ldrsh %s,fp+%d\n",regname[rt],offset);
1789 output_w32(0xe1d000f0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1790}
1791void emit_movzbl(int addr, int rt)
1792{
1793 u_int offset = addr-(u_int)&dynarec_local;
1794 assert(offset<4096);
1795 assem_debug("ldrb %s,fp+%d\n",regname[rt],offset);
1796 output_w32(0xe5d00000|rd_rn_rm(rt,FP,0)|offset);
1797}
1798void emit_movzwl(int addr, int rt)
1799{
1800 u_int offset = addr-(u_int)&dynarec_local;
1801 assert(offset<256);
1802 assem_debug("ldrh %s,fp+%d\n",regname[rt],offset);
1803 output_w32(0xe1d000b0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1804}
1805void emit_movzwl_reg(int rs, int rt)
1806{
1807 assem_debug("movzwl %%%s,%%%s\n",regname[rs]+1,regname[rt]);
1808 assert(0);
1809}
1810
1811void emit_xchg(int rs, int rt)
1812{
1813 assem_debug("xchg %%%s,%%%s\n",regname[rs],regname[rt]);
1814 assert(0);
1815}
1816void emit_writeword_indexed(int rt, int offset, int rs)
1817{
1818 assert(offset>-4096&&offset<4096);
1819 assem_debug("str %s,%s+%d\n",regname[rt],regname[rs],offset);
1820 if(offset>=0) {
1821 output_w32(0xe5800000|rd_rn_rm(rt,rs,0)|offset);
1822 }else{
1823 output_w32(0xe5000000|rd_rn_rm(rt,rs,0)|(-offset));
1824 }
1825}
1826void emit_writeword_dualindexedx4(int rt, int rs1, int rs2)
1827{
1828 assem_debug("str %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1829 output_w32(0xe7800000|rd_rn_rm(rt,rs1,rs2)|0x100);
1830}
1831void emit_writeword_indexed_tlb(int rt, int addr, int rs, int map, int temp)
1832{
1833 if(map<0) emit_writeword_indexed(rt, addr, rs);
1834 else {
1835 assert(addr==0);
1836 emit_writeword_dualindexedx4(rt, rs, map);
1837 }
1838}
1839void emit_writedword_indexed_tlb(int rh, int rl, int addr, int rs, int map, int temp)
1840{
1841 if(map<0) {
1842 if(rh>=0) emit_writeword_indexed(rh, addr, rs);
1843 emit_writeword_indexed(rl, addr+4, rs);
1844 }else{
1845 assert(rh>=0);
1846 if(temp!=rs) emit_addimm(map,1,temp);
1847 emit_writeword_indexed_tlb(rh, addr, rs, map, temp);
1848 if(temp!=rs) emit_writeword_indexed_tlb(rl, addr, rs, temp, temp);
1849 else {
1850 emit_addimm(rs,4,rs);
1851 emit_writeword_indexed_tlb(rl, addr, rs, map, temp);
1852 }
1853 }
1854}
1855void emit_writehword_indexed(int rt, int offset, int rs)
1856{
1857 assert(offset>-256&&offset<256);
1858 assem_debug("strh %s,%s+%d\n",regname[rt],regname[rs],offset);
1859 if(offset>=0) {
1860 output_w32(0xe1c000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1861 }else{
1862 output_w32(0xe14000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1863 }
1864}
1865void emit_writebyte_indexed(int rt, int offset, int rs)
1866{
1867 assert(offset>-4096&&offset<4096);
1868 assem_debug("strb %s,%s+%d\n",regname[rt],regname[rs],offset);
1869 if(offset>=0) {
1870 output_w32(0xe5c00000|rd_rn_rm(rt,rs,0)|offset);
1871 }else{
1872 output_w32(0xe5400000|rd_rn_rm(rt,rs,0)|(-offset));
1873 }
1874}
1875void emit_writebyte_dualindexedx4(int rt, int rs1, int rs2)
1876{
1877 assem_debug("strb %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1878 output_w32(0xe7c00000|rd_rn_rm(rt,rs1,rs2)|0x100);
1879}
1880void emit_writebyte_indexed_tlb(int rt, int addr, int rs, int map, int temp)
1881{
1882 if(map<0) emit_writebyte_indexed(rt, addr, rs);
1883 else {
1884 if(addr==0) {
1885 emit_writebyte_dualindexedx4(rt, rs, map);
1886 }else{
1887 emit_addimm(rs,addr,temp);
1888 emit_writebyte_dualindexedx4(rt, temp, map);
1889 }
1890 }
1891}
1892void emit_writeword(int rt, int addr)
1893{
1894 u_int offset = addr-(u_int)&dynarec_local;
1895 assert(offset<4096);
1896 assem_debug("str %s,fp+%d\n",regname[rt],offset);
1897 output_w32(0xe5800000|rd_rn_rm(rt,FP,0)|offset);
1898}
1899void emit_writehword(int rt, int addr)
1900{
1901 u_int offset = addr-(u_int)&dynarec_local;
1902 assert(offset<256);
1903 assem_debug("strh %s,fp+%d\n",regname[rt],offset);
1904 output_w32(0xe1c000b0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1905}
1906void emit_writebyte(int rt, int addr)
1907{
1908 u_int offset = addr-(u_int)&dynarec_local;
1909 assert(offset<4096);
1910 assem_debug("str %s,fp+%d\n",regname[rt],offset);
1911 output_w32(0xe5c00000|rd_rn_rm(rt,FP,0)|offset);
1912}
1913void emit_writeword_imm(int imm, int addr)
1914{
1915 assem_debug("movl $%x,%x\n",imm,addr);
1916 assert(0);
1917}
1918void emit_writebyte_imm(int imm, int addr)
1919{
1920 assem_debug("movb $%x,%x\n",imm,addr);
1921 assert(0);
1922}
1923
1924void emit_mul(int rs)
1925{
1926 assem_debug("mul %%%s\n",regname[rs]);
1927 assert(0);
1928}
1929void emit_imul(int rs)
1930{
1931 assem_debug("imul %%%s\n",regname[rs]);
1932 assert(0);
1933}
1934void emit_umull(u_int rs1, u_int rs2, u_int hi, u_int lo)
1935{
1936 assem_debug("umull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
1937 assert(rs1<16);
1938 assert(rs2<16);
1939 assert(hi<16);
1940 assert(lo<16);
1941 output_w32(0xe0800090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
1942}
1943void emit_smull(u_int rs1, u_int rs2, u_int hi, u_int lo)
1944{
1945 assem_debug("smull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
1946 assert(rs1<16);
1947 assert(rs2<16);
1948 assert(hi<16);
1949 assert(lo<16);
1950 output_w32(0xe0c00090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
1951}
1952
1953void emit_div(int rs)
1954{
1955 assem_debug("div %%%s\n",regname[rs]);
1956 assert(0);
1957}
1958void emit_idiv(int rs)
1959{
1960 assem_debug("idiv %%%s\n",regname[rs]);
1961 assert(0);
1962}
1963void emit_cdq()
1964{
1965 assem_debug("cdq\n");
1966 assert(0);
1967}
1968
1969void emit_clz(int rs,int rt)
1970{
1971 assem_debug("clz %s,%s\n",regname[rt],regname[rs]);
1972 output_w32(0xe16f0f10|rd_rn_rm(rt,0,rs));
1973}
1974
1975void emit_subcs(int rs1,int rs2,int rt)
1976{
1977 assem_debug("subcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1978 output_w32(0x20400000|rd_rn_rm(rt,rs1,rs2));
1979}
1980
1981void emit_shrcc_imm(int rs,u_int imm,int rt)
1982{
1983 assert(imm>0);
1984 assert(imm<32);
1985 assem_debug("lsrcc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1986 output_w32(0x31a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
1987}
1988
1989void emit_negmi(int rs, int rt)
1990{
1991 assem_debug("rsbmi %s,%s,#0\n",regname[rt],regname[rs]);
1992 output_w32(0x42600000|rd_rn_rm(rt,rs,0));
1993}
1994
1995void emit_negsmi(int rs, int rt)
1996{
1997 assem_debug("rsbsmi %s,%s,#0\n",regname[rt],regname[rs]);
1998 output_w32(0x42700000|rd_rn_rm(rt,rs,0));
1999}
2000
2001void emit_orreq(u_int rs1,u_int rs2,u_int rt)
2002{
2003 assem_debug("orreq %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2004 output_w32(0x01800000|rd_rn_rm(rt,rs1,rs2));
2005}
2006
2007void emit_orrne(u_int rs1,u_int rs2,u_int rt)
2008{
2009 assem_debug("orrne %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2010 output_w32(0x11800000|rd_rn_rm(rt,rs1,rs2));
2011}
2012
2013void emit_bic_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2014{
2015 assem_debug("bic %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2016 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2017}
2018
2019void emit_biceq_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2020{
2021 assem_debug("biceq %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2022 output_w32(0x01C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2023}
2024
2025void emit_bicne_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2026{
2027 assem_debug("bicne %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2028 output_w32(0x11C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2029}
2030
2031void emit_bic_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2032{
2033 assem_debug("bic %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2034 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2035}
2036
2037void emit_biceq_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2038{
2039 assem_debug("biceq %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2040 output_w32(0x01C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2041}
2042
2043void emit_bicne_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2044{
2045 assem_debug("bicne %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2046 output_w32(0x11C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2047}
2048
2049void emit_teq(int rs, int rt)
2050{
2051 assem_debug("teq %s,%s\n",regname[rs],regname[rt]);
2052 output_w32(0xe1300000|rd_rn_rm(0,rs,rt));
2053}
2054
2055void emit_rsbimm(int rs, int imm, int rt)
2056{
2057 u_int armval;
2058 assert(genimm(imm,&armval));
2059 assem_debug("rsb %s,%s,#%d\n",regname[rt],regname[rs],imm);
2060 output_w32(0xe2600000|rd_rn_rm(rt,rs,0)|armval);
2061}
2062
2063// Load 2 immediates optimizing for small code size
2064void emit_mov2imm_compact(int imm1,u_int rt1,int imm2,u_int rt2)
2065{
2066 emit_movimm(imm1,rt1);
2067 u_int armval;
2068 if(genimm(imm2-imm1,&armval)) {
2069 assem_debug("add %s,%s,#%d\n",regname[rt2],regname[rt1],imm2-imm1);
2070 output_w32(0xe2800000|rd_rn_rm(rt2,rt1,0)|armval);
2071 }else if(genimm(imm1-imm2,&armval)) {
2072 assem_debug("sub %s,%s,#%d\n",regname[rt2],regname[rt1],imm1-imm2);
2073 output_w32(0xe2400000|rd_rn_rm(rt2,rt1,0)|armval);
2074 }
2075 else emit_movimm(imm2,rt2);
2076}
2077
2078// Conditionally select one of two immediates, optimizing for small code size
2079// This will only be called if HAVE_CMOV_IMM is defined
2080void emit_cmov2imm_e_ne_compact(int imm1,int imm2,u_int rt)
2081{
2082 u_int armval;
2083 if(genimm(imm2-imm1,&armval)) {
2084 emit_movimm(imm1,rt);
2085 assem_debug("addne %s,%s,#%d\n",regname[rt],regname[rt],imm2-imm1);
2086 output_w32(0x12800000|rd_rn_rm(rt,rt,0)|armval);
2087 }else if(genimm(imm1-imm2,&armval)) {
2088 emit_movimm(imm1,rt);
2089 assem_debug("subne %s,%s,#%d\n",regname[rt],regname[rt],imm1-imm2);
2090 output_w32(0x12400000|rd_rn_rm(rt,rt,0)|armval);
2091 }
2092 else {
2093 #ifdef ARMv5_ONLY
2094 emit_movimm(imm1,rt);
2095 add_literal((int)out,imm2);
2096 assem_debug("ldrne %s,pc+? [=%x]\n",regname[rt],imm2);
2097 output_w32(0x15900000|rd_rn_rm(rt,15,0));
2098 #else
2099 emit_movw(imm1&0x0000FFFF,rt);
2100 if((imm1&0xFFFF)!=(imm2&0xFFFF)) {
2101 assem_debug("movwne %s,#%d (0x%x)\n",regname[rt],imm2&0xFFFF,imm2&0xFFFF);
2102 output_w32(0x13000000|rd_rn_rm(rt,0,0)|(imm2&0xfff)|((imm2<<4)&0xf0000));
2103 }
2104 emit_movt(imm1&0xFFFF0000,rt);
2105 if((imm1&0xFFFF0000)!=(imm2&0xFFFF0000)) {
2106 assem_debug("movtne %s,#%d (0x%x)\n",regname[rt],imm2&0xffff0000,imm2&0xffff0000);
2107 output_w32(0x13400000|rd_rn_rm(rt,0,0)|((imm2>>16)&0xfff)|((imm2>>12)&0xf0000));
2108 }
2109 #endif
2110 }
2111}
2112
2113// special case for checking invalid_code
2114void emit_cmpmem_indexedsr12_imm(int addr,int r,int imm)
2115{
2116 assert(0);
2117}
2118
2119// special case for checking invalid_code
2120void emit_cmpmem_indexedsr12_reg(int base,int r,int imm)
2121{
2122 assert(imm<128&&imm>=0);
2123 assert(r>=0&&r<16);
2124 assem_debug("ldrb lr,%s,%s lsr #12\n",regname[base],regname[r]);
2125 output_w32(0xe7d00000|rd_rn_rm(HOST_TEMPREG,base,r)|0x620);
2126 emit_cmpimm(HOST_TEMPREG,imm);
2127}
2128
2129// special case for tlb mapping
2130void emit_addsr12(int rs1,int rs2,int rt)
2131{
2132 assem_debug("add %s,%s,%s lsr #12\n",regname[rt],regname[rs1],regname[rs2]);
2133 output_w32(0xe0800620|rd_rn_rm(rt,rs1,rs2));
2134}
2135
2136// Used to preload hash table entries
2137void emit_prefetch(void *addr)
2138{
2139 assem_debug("prefetch %x\n",(int)addr);
2140 output_byte(0x0F);
2141 output_byte(0x18);
2142 output_modrm(0,5,1);
2143 output_w32((int)addr);
2144}
2145void emit_prefetchreg(int r)
2146{
2147 assem_debug("pld %s\n",regname[r]);
2148 output_w32(0xf5d0f000|rd_rn_rm(0,r,0));
2149}
2150
2151// Special case for mini_ht
2152void emit_ldreq_indexed(int rs, u_int offset, int rt)
2153{
2154 assert(offset<4096);
2155 assem_debug("ldreq %s,[%s, #%d]\n",regname[rt],regname[rs],offset);
2156 output_w32(0x05900000|rd_rn_rm(rt,rs,0)|offset);
2157}
2158
2159void emit_flds(int r,int sr)
2160{
2161 assem_debug("flds s%d,[%s]\n",sr,regname[r]);
2162 output_w32(0xed900a00|((sr&14)<<11)|((sr&1)<<22)|(r<<16));
2163}
2164
2165void emit_vldr(int r,int vr)
2166{
2167 assem_debug("vldr d%d,[%s]\n",vr,regname[r]);
2168 output_w32(0xed900b00|(vr<<12)|(r<<16));
2169}
2170
2171void emit_fsts(int sr,int r)
2172{
2173 assem_debug("fsts s%d,[%s]\n",sr,regname[r]);
2174 output_w32(0xed800a00|((sr&14)<<11)|((sr&1)<<22)|(r<<16));
2175}
2176
2177void emit_vstr(int vr,int r)
2178{
2179 assem_debug("vstr d%d,[%s]\n",vr,regname[r]);
2180 output_w32(0xed800b00|(vr<<12)|(r<<16));
2181}
2182
2183void emit_ftosizs(int s,int d)
2184{
2185 assem_debug("ftosizs s%d,s%d\n",d,s);
2186 output_w32(0xeebd0ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2187}
2188
2189void emit_ftosizd(int s,int d)
2190{
2191 assem_debug("ftosizd s%d,d%d\n",d,s);
2192 output_w32(0xeebd0bc0|((d&14)<<11)|((d&1)<<22)|(s&7));
2193}
2194
2195void emit_fsitos(int s,int d)
2196{
2197 assem_debug("fsitos s%d,s%d\n",d,s);
2198 output_w32(0xeeb80ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2199}
2200
2201void emit_fsitod(int s,int d)
2202{
2203 assem_debug("fsitod d%d,s%d\n",d,s);
2204 output_w32(0xeeb80bc0|((d&7)<<12)|((s&14)>>1)|((s&1)<<5));
2205}
2206
2207void emit_fcvtds(int s,int d)
2208{
2209 assem_debug("fcvtds d%d,s%d\n",d,s);
2210 output_w32(0xeeb70ac0|((d&7)<<12)|((s&14)>>1)|((s&1)<<5));
2211}
2212
2213void emit_fcvtsd(int s,int d)
2214{
2215 assem_debug("fcvtsd s%d,d%d\n",d,s);
2216 output_w32(0xeeb70bc0|((d&14)<<11)|((d&1)<<22)|(s&7));
2217}
2218
2219void emit_fsqrts(int s,int d)
2220{
2221 assem_debug("fsqrts d%d,s%d\n",d,s);
2222 output_w32(0xeeb10ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2223}
2224
2225void emit_fsqrtd(int s,int d)
2226{
2227 assem_debug("fsqrtd s%d,d%d\n",d,s);
2228 output_w32(0xeeb10bc0|((d&7)<<12)|(s&7));
2229}
2230
2231void emit_fabss(int s,int d)
2232{
2233 assem_debug("fabss d%d,s%d\n",d,s);
2234 output_w32(0xeeb00ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2235}
2236
2237void emit_fabsd(int s,int d)
2238{
2239 assem_debug("fabsd s%d,d%d\n",d,s);
2240 output_w32(0xeeb00bc0|((d&7)<<12)|(s&7));
2241}
2242
2243void emit_fnegs(int s,int d)
2244{
2245 assem_debug("fnegs d%d,s%d\n",d,s);
2246 output_w32(0xeeb10a40|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2247}
2248
2249void emit_fnegd(int s,int d)
2250{
2251 assem_debug("fnegd s%d,d%d\n",d,s);
2252 output_w32(0xeeb10b40|((d&7)<<12)|(s&7));
2253}
2254
2255void emit_fadds(int s1,int s2,int d)
2256{
2257 assem_debug("fadds s%d,s%d,s%d\n",d,s1,s2);
2258 output_w32(0xee300a00|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2259}
2260
2261void emit_faddd(int s1,int s2,int d)
2262{
2263 assem_debug("faddd d%d,d%d,d%d\n",d,s1,s2);
2264 output_w32(0xee300b00|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2265}
2266
2267void emit_fsubs(int s1,int s2,int d)
2268{
2269 assem_debug("fsubs s%d,s%d,s%d\n",d,s1,s2);
2270 output_w32(0xee300a40|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2271}
2272
2273void emit_fsubd(int s1,int s2,int d)
2274{
2275 assem_debug("fsubd d%d,d%d,d%d\n",d,s1,s2);
2276 output_w32(0xee300b40|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2277}
2278
2279void emit_fmuls(int s1,int s2,int d)
2280{
2281 assem_debug("fmuls s%d,s%d,s%d\n",d,s1,s2);
2282 output_w32(0xee200a00|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2283}
2284
2285void emit_fmuld(int s1,int s2,int d)
2286{
2287 assem_debug("fmuld d%d,d%d,d%d\n",d,s1,s2);
2288 output_w32(0xee200b00|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2289}
2290
2291void emit_fdivs(int s1,int s2,int d)
2292{
2293 assem_debug("fdivs s%d,s%d,s%d\n",d,s1,s2);
2294 output_w32(0xee800a00|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2295}
2296
2297void emit_fdivd(int s1,int s2,int d)
2298{
2299 assem_debug("fdivd d%d,d%d,d%d\n",d,s1,s2);
2300 output_w32(0xee800b00|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2301}
2302
2303void emit_fcmps(int x,int y)
2304{
2305 assem_debug("fcmps s14, s15\n");
2306 output_w32(0xeeb47a67);
2307}
2308
2309void emit_fcmpd(int x,int y)
2310{
2311 assem_debug("fcmpd d6, d7\n");
2312 output_w32(0xeeb46b47);
2313}
2314
2315void emit_fmstat()
2316{
2317 assem_debug("fmstat\n");
2318 output_w32(0xeef1fa10);
2319}
2320
2321void emit_bicne_imm(int rs,int imm,int rt)
2322{
2323 u_int armval;
2324 assert(genimm(imm,&armval));
2325 assem_debug("bicne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2326 output_w32(0x13c00000|rd_rn_rm(rt,rs,0)|armval);
2327}
2328
2329void emit_biccs_imm(int rs,int imm,int rt)
2330{
2331 u_int armval;
2332 assert(genimm(imm,&armval));
2333 assem_debug("biccs %s,%s,#%d\n",regname[rt],regname[rs],imm);
2334 output_w32(0x23c00000|rd_rn_rm(rt,rs,0)|armval);
2335}
2336
2337void emit_bicvc_imm(int rs,int imm,int rt)
2338{
2339 u_int armval;
2340 assert(genimm(imm,&armval));
2341 assem_debug("bicvc %s,%s,#%d\n",regname[rt],regname[rs],imm);
2342 output_w32(0x73c00000|rd_rn_rm(rt,rs,0)|armval);
2343}
2344
2345void emit_bichi_imm(int rs,int imm,int rt)
2346{
2347 u_int armval;
2348 assert(genimm(imm,&armval));
2349 assem_debug("bichi %s,%s,#%d\n",regname[rt],regname[rs],imm);
2350 output_w32(0x83c00000|rd_rn_rm(rt,rs,0)|armval);
2351}
2352
2353void emit_orrvs_imm(int rs,int imm,int rt)
2354{
2355 u_int armval;
2356 assert(genimm(imm,&armval));
2357 assem_debug("orrvs %s,%s,#%d\n",regname[rt],regname[rs],imm);
2358 output_w32(0x63800000|rd_rn_rm(rt,rs,0)|armval);
2359}
2360
2361void emit_jno_unlikely(int a)
2362{
2363 //emit_jno(a);
2364 assem_debug("addvc pc,pc,#? (%x)\n",/*a-(int)out-8,*/a);
2365 output_w32(0x72800000|rd_rn_rm(15,15,0));
2366}
2367
2368// Save registers before function call
2369void save_regs(u_int reglist)
2370{
2371 reglist&=0x100f; // only save the caller-save registers, r0-r3, r12
2372 if(!reglist) return;
2373 assem_debug("stmia fp,{");
2374 if(reglist&1) assem_debug("r0, ");
2375 if(reglist&2) assem_debug("r1, ");
2376 if(reglist&4) assem_debug("r2, ");
2377 if(reglist&8) assem_debug("r3, ");
2378 if(reglist&0x1000) assem_debug("r12");
2379 assem_debug("}\n");
2380 output_w32(0xe88b0000|reglist);
2381}
2382// Restore registers after function call
2383void restore_regs(u_int reglist)
2384{
2385 reglist&=0x100f; // only restore the caller-save registers, r0-r3, r12
2386 if(!reglist) return;
2387 assem_debug("ldmia fp,{");
2388 if(reglist&1) assem_debug("r0, ");
2389 if(reglist&2) assem_debug("r1, ");
2390 if(reglist&4) assem_debug("r2, ");
2391 if(reglist&8) assem_debug("r3, ");
2392 if(reglist&0x1000) assem_debug("r12");
2393 assem_debug("}\n");
2394 output_w32(0xe89b0000|reglist);
2395}
2396
2397// Write back consts using r14 so we don't disturb the other registers
2398void wb_consts(signed char i_regmap[],uint64_t i_is32,u_int i_dirty,int i)
2399{
2400 int hr;
2401 for(hr=0;hr<HOST_REGS;hr++) {
2402 if(hr!=EXCLUDE_REG&&i_regmap[hr]>=0&&((i_dirty>>hr)&1)) {
2403 if(((regs[i].isconst>>hr)&1)&&i_regmap[hr]>0) {
2404 if(i_regmap[hr]<64 || !((i_is32>>(i_regmap[hr]&63))&1) ) {
2405 int value=constmap[i][hr];
2406 if(value==0) {
2407 emit_zeroreg(HOST_TEMPREG);
2408 }
2409 else {
2410 emit_movimm(value,HOST_TEMPREG);
2411 }
2412 emit_storereg(i_regmap[hr],HOST_TEMPREG);
2413 if((i_is32>>i_regmap[hr])&1) {
2414 if(value!=-1&&value!=0) emit_sarimm(HOST_TEMPREG,31,HOST_TEMPREG);
2415 emit_storereg(i_regmap[hr]|64,HOST_TEMPREG);
2416 }
2417 }
2418 }
2419 }
2420 }
2421}
2422
2423/* Stubs/epilogue */
2424
2425void literal_pool(int n)
2426{
2427 if(!literalcount) return;
2428 if(n) {
2429 if((int)out-literals[0][0]<4096-n) return;
2430 }
2431 u_int *ptr;
2432 int i;
2433 for(i=0;i<literalcount;i++)
2434 {
2435 ptr=(u_int *)literals[i][0];
2436 u_int offset=(u_int)out-(u_int)ptr-8;
2437 assert(offset<4096);
2438 assert(!(offset&3));
2439 *ptr|=offset;
2440 output_w32(literals[i][1]);
2441 }
2442 literalcount=0;
2443}
2444
2445void literal_pool_jumpover(int n)
2446{
2447 if(!literalcount) return;
2448 if(n) {
2449 if((int)out-literals[0][0]<4096-n) return;
2450 }
2451 int jaddr=(int)out;
2452 emit_jmp(0);
2453 literal_pool(0);
2454 set_jump_target(jaddr,(int)out);
2455}
2456
2457emit_extjump2(int addr, int target, int linker)
2458{
2459 u_char *ptr=(u_char *)addr;
2460 assert((ptr[3]&0x0e)==0xa);
2461 emit_loadlp(target,0);
2462 emit_loadlp(addr,1);
2463 assert(addr>=0x7000000&&addr<0x7FFFFFF);
2464 //assert((target>=0x80000000&&target<0x80800000)||(target>0xA4000000&&target<0xA4001000));
2465//DEBUG >
2466#ifdef DEBUG_CYCLE_COUNT
2467 emit_readword((int)&last_count,ECX);
2468 emit_add(HOST_CCREG,ECX,HOST_CCREG);
2469 emit_readword((int)&next_interupt,ECX);
2470 emit_writeword(HOST_CCREG,(int)&Count);
2471 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
2472 emit_writeword(ECX,(int)&last_count);
2473#endif
2474//DEBUG <
2475 emit_jmp(linker);
2476}
2477
2478emit_extjump(int addr, int target)
2479{
2480 emit_extjump2(addr, target, (int)dyna_linker);
2481}
2482emit_extjump_ds(int addr, int target)
2483{
2484 emit_extjump2(addr, target, (int)dyna_linker_ds);
2485}
2486
2487do_readstub(int n)
2488{
2489 assem_debug("do_readstub %x\n",start+stubs[n][3]*4);
2490 literal_pool(256);
2491 set_jump_target(stubs[n][1],(int)out);
2492 int type=stubs[n][0];
2493 int i=stubs[n][3];
2494 int rs=stubs[n][4];
2495 struct regstat *i_regs=(struct regstat *)stubs[n][5];
2496 u_int reglist=stubs[n][7];
2497 signed char *i_regmap=i_regs->regmap;
2498 int addr=get_reg(i_regmap,AGEN1+(i&1));
2499 int rth,rt;
2500 int ds;
2501 if(itype[i]==C1LS||itype[i]==LOADLR) {
2502 rth=get_reg(i_regmap,FTEMP|64);
2503 rt=get_reg(i_regmap,FTEMP);
2504 }else{
2505 rth=get_reg(i_regmap,rt1[i]|64);
2506 rt=get_reg(i_regmap,rt1[i]);
2507 }
2508 assert(rs>=0);
2509 assert(rt>=0);
2510 if(addr<0) addr=rt;
2511 assert(addr>=0);
2512 int ftable=0;
2513 if(type==LOADB_STUB||type==LOADBU_STUB)
2514 ftable=(int)readmemb;
2515 if(type==LOADH_STUB||type==LOADHU_STUB)
2516 ftable=(int)readmemh;
2517 if(type==LOADW_STUB)
2518 ftable=(int)readmem;
2519 if(type==LOADD_STUB)
2520 ftable=(int)readmemd;
2521 emit_writeword(rs,(int)&address);
2522 //emit_pusha();
2523 save_regs(reglist);
2524 ds=i_regs!=&regs[i];
2525 int real_rs=(itype[i]==LOADLR)?-1:get_reg(i_regmap,rs1[i]);
2526 u_int cmask=ds?-1:(0x100f|~i_regs->wasconst);
2527 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&0x100f,i);
2528 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty&cmask&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs)));
2529 if(!ds) wb_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&~0x100f,i);
2530 emit_shrimm(rs,16,1);
2531 int cc=get_reg(i_regmap,CCREG);
2532 if(cc<0) {
2533 emit_loadreg(CCREG,2);
2534 }
2535 emit_movimm(ftable,0);
2536 emit_addimm(cc<0?2:cc,2*stubs[n][6]+2,2);
2537 emit_movimm(start+stubs[n][3]*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
2538 //emit_readword((int)&last_count,temp);
2539 //emit_add(cc,temp,cc);
2540 //emit_writeword(cc,(int)&Count);
2541 //emit_mov(15,14);
2542 emit_call((int)&indirect_jump_indexed);
2543 //emit_callreg(rs);
2544 //emit_readword_dualindexedx4(rs,HOST_TEMPREG,15);
2545 // We really shouldn't need to update the count here,
2546 // but not doing so causes random crashes...
2547 emit_readword((int)&Count,HOST_TEMPREG);
2548 emit_readword((int)&next_interupt,2);
2549 emit_addimm(HOST_TEMPREG,-2*stubs[n][6]-2,HOST_TEMPREG);
2550 emit_writeword(2,(int)&last_count);
2551 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2552 if(cc<0) {
2553 emit_storereg(CCREG,HOST_TEMPREG);
2554 }
2555 //emit_popa();
2556 restore_regs(reglist);
2557 //if((cc=get_reg(regmap,CCREG))>=0) {
2558 // emit_loadreg(CCREG,cc);
2559 //}
2560 if(type==LOADB_STUB)
2561 emit_movsbl((int)&readmem_dword,rt);
2562 if(type==LOADBU_STUB)
2563 emit_movzbl((int)&readmem_dword,rt);
2564 if(type==LOADH_STUB)
2565 emit_movswl((int)&readmem_dword,rt);
2566 if(type==LOADHU_STUB)
2567 emit_movzwl((int)&readmem_dword,rt);
2568 if(type==LOADW_STUB)
2569 emit_readword((int)&readmem_dword,rt);
2570 if(type==LOADD_STUB) {
2571 emit_readword((int)&readmem_dword,rt);
2572 if(rth>=0) emit_readword(((int)&readmem_dword)+4,rth);
2573 }
2574 emit_jmp(stubs[n][2]); // return address
2575}
2576
2577inline_readstub(int type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
2578{
2579 int rs=get_reg(regmap,target);
2580 int rth=get_reg(regmap,target|64);
2581 int rt=get_reg(regmap,target);
2582 assert(rs>=0);
2583 assert(rt>=0);
2584 int ftable=0;
2585 if(type==LOADB_STUB||type==LOADBU_STUB)
2586 ftable=(int)readmemb;
2587 if(type==LOADH_STUB||type==LOADHU_STUB)
2588 ftable=(int)readmemh;
2589 if(type==LOADW_STUB)
2590 ftable=(int)readmem;
2591 if(type==LOADD_STUB)
2592 ftable=(int)readmemd;
2593 emit_writeword(rs,(int)&address);
2594 //emit_pusha();
2595 save_regs(reglist);
2596 //emit_shrimm(rs,16,1);
2597 int cc=get_reg(regmap,CCREG);
2598 if(cc<0) {
2599 emit_loadreg(CCREG,2);
2600 }
2601 //emit_movimm(ftable,0);
2602 emit_movimm(((u_int *)ftable)[addr>>16],0);
2603 //emit_readword((int)&last_count,12);
2604 emit_addimm(cc<0?2:cc,CLOCK_DIVIDER*(adj+1),2);
2605 if((signed int)addr>=(signed int)0xC0000000) {
2606 // Pagefault address
2607 int ds=regmap!=regs[i].regmap;
2608 emit_movimm(start+i*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
2609 }
2610 //emit_add(12,2,2);
2611 //emit_writeword(2,(int)&Count);
2612 //emit_call(((u_int *)ftable)[addr>>16]);
2613 emit_call((int)&indirect_jump);
2614 // We really shouldn't need to update the count here,
2615 // but not doing so causes random crashes...
2616 emit_readword((int)&Count,HOST_TEMPREG);
2617 emit_readword((int)&next_interupt,2);
2618 emit_addimm(HOST_TEMPREG,-CLOCK_DIVIDER*(adj+1),HOST_TEMPREG);
2619 emit_writeword(2,(int)&last_count);
2620 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2621 if(cc<0) {
2622 emit_storereg(CCREG,HOST_TEMPREG);
2623 }
2624 //emit_popa();
2625 restore_regs(reglist);
2626 if(type==LOADB_STUB)
2627 emit_movsbl((int)&readmem_dword,rt);
2628 if(type==LOADBU_STUB)
2629 emit_movzbl((int)&readmem_dword,rt);
2630 if(type==LOADH_STUB)
2631 emit_movswl((int)&readmem_dword,rt);
2632 if(type==LOADHU_STUB)
2633 emit_movzwl((int)&readmem_dword,rt);
2634 if(type==LOADW_STUB)
2635 emit_readword((int)&readmem_dword,rt);
2636 if(type==LOADD_STUB) {
2637 emit_readword((int)&readmem_dword,rt);
2638 if(rth>=0) emit_readword(((int)&readmem_dword)+4,rth);
2639 }
2640}
2641
2642do_writestub(int n)
2643{
2644 assem_debug("do_writestub %x\n",start+stubs[n][3]*4);
2645 literal_pool(256);
2646 set_jump_target(stubs[n][1],(int)out);
2647 int type=stubs[n][0];
2648 int i=stubs[n][3];
2649 int rs=stubs[n][4];
2650 struct regstat *i_regs=(struct regstat *)stubs[n][5];
2651 u_int reglist=stubs[n][7];
2652 signed char *i_regmap=i_regs->regmap;
2653 int addr=get_reg(i_regmap,AGEN1+(i&1));
2654 int rth,rt,r;
2655 int ds;
2656 if(itype[i]==C1LS) {
2657 rth=get_reg(i_regmap,FTEMP|64);
2658 rt=get_reg(i_regmap,r=FTEMP);
2659 }else{
2660 rth=get_reg(i_regmap,rs2[i]|64);
2661 rt=get_reg(i_regmap,r=rs2[i]);
2662 }
2663 assert(rs>=0);
2664 assert(rt>=0);
2665 if(addr<0) addr=get_reg(i_regmap,-1);
2666 assert(addr>=0);
2667 int ftable=0;
2668 if(type==STOREB_STUB)
2669 ftable=(int)writememb;
2670 if(type==STOREH_STUB)
2671 ftable=(int)writememh;
2672 if(type==STOREW_STUB)
2673 ftable=(int)writemem;
2674 if(type==STORED_STUB)
2675 ftable=(int)writememd;
2676 emit_writeword(rs,(int)&address);
2677 //emit_shrimm(rs,16,rs);
2678 //emit_movmem_indexedx4(ftable,rs,rs);
2679 if(type==STOREB_STUB)
2680 emit_writebyte(rt,(int)&byte);
2681 if(type==STOREH_STUB)
2682 emit_writehword(rt,(int)&hword);
2683 if(type==STOREW_STUB)
2684 emit_writeword(rt,(int)&word);
2685 if(type==STORED_STUB) {
3d624f89 2686#ifndef FORCE32
57871462 2687 emit_writeword(rt,(int)&dword);
2688 emit_writeword(r?rth:rt,(int)&dword+4);
3d624f89 2689#else
2690 printf("STORED_STUB\n");
2691#endif
57871462 2692 }
2693 //emit_pusha();
2694 save_regs(reglist);
2695 ds=i_regs!=&regs[i];
2696 int real_rs=get_reg(i_regmap,rs1[i]);
2697 u_int cmask=ds?-1:(0x100f|~i_regs->wasconst);
2698 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&0x100f,i);
2699 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty&cmask&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs)));
2700 if(!ds) wb_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&~0x100f,i);
2701 emit_shrimm(rs,16,1);
2702 int cc=get_reg(i_regmap,CCREG);
2703 if(cc<0) {
2704 emit_loadreg(CCREG,2);
2705 }
2706 emit_movimm(ftable,0);
2707 emit_addimm(cc<0?2:cc,2*stubs[n][6]+2,2);
2708 emit_movimm(start+stubs[n][3]*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
2709 //emit_readword((int)&last_count,temp);
2710 //emit_addimm(cc,2*stubs[n][5]+2,cc);
2711 //emit_add(cc,temp,cc);
2712 //emit_writeword(cc,(int)&Count);
2713 emit_call((int)&indirect_jump_indexed);
2714 //emit_callreg(rs);
2715 emit_readword((int)&Count,HOST_TEMPREG);
2716 emit_readword((int)&next_interupt,2);
2717 emit_addimm(HOST_TEMPREG,-2*stubs[n][6]-2,HOST_TEMPREG);
2718 emit_writeword(2,(int)&last_count);
2719 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2720 if(cc<0) {
2721 emit_storereg(CCREG,HOST_TEMPREG);
2722 }
2723 //emit_popa();
2724 restore_regs(reglist);
2725 //if((cc=get_reg(regmap,CCREG))>=0) {
2726 // emit_loadreg(CCREG,cc);
2727 //}
2728 emit_jmp(stubs[n][2]); // return address
2729}
2730
2731inline_writestub(int type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
2732{
2733 int rs=get_reg(regmap,-1);
2734 int rth=get_reg(regmap,target|64);
2735 int rt=get_reg(regmap,target);
2736 assert(rs>=0);
2737 assert(rt>=0);
2738 int ftable=0;
2739 if(type==STOREB_STUB)
2740 ftable=(int)writememb;
2741 if(type==STOREH_STUB)
2742 ftable=(int)writememh;
2743 if(type==STOREW_STUB)
2744 ftable=(int)writemem;
2745 if(type==STORED_STUB)
2746 ftable=(int)writememd;
2747 emit_writeword(rs,(int)&address);
2748 //emit_shrimm(rs,16,rs);
2749 //emit_movmem_indexedx4(ftable,rs,rs);
2750 if(type==STOREB_STUB)
2751 emit_writebyte(rt,(int)&byte);
2752 if(type==STOREH_STUB)
2753 emit_writehword(rt,(int)&hword);
2754 if(type==STOREW_STUB)
2755 emit_writeword(rt,(int)&word);
2756 if(type==STORED_STUB) {
3d624f89 2757#ifndef FORCE32
57871462 2758 emit_writeword(rt,(int)&dword);
2759 emit_writeword(target?rth:rt,(int)&dword+4);
3d624f89 2760#else
2761 printf("STORED_STUB\n");
2762#endif
57871462 2763 }
2764 //emit_pusha();
2765 save_regs(reglist);
2766 //emit_shrimm(rs,16,1);
2767 int cc=get_reg(regmap,CCREG);
2768 if(cc<0) {
2769 emit_loadreg(CCREG,2);
2770 }
2771 //emit_movimm(ftable,0);
2772 emit_movimm(((u_int *)ftable)[addr>>16],0);
2773 //emit_readword((int)&last_count,12);
2774 emit_addimm(cc<0?2:cc,CLOCK_DIVIDER*(adj+1),2);
2775 if((signed int)addr>=(signed int)0xC0000000) {
2776 // Pagefault address
2777 int ds=regmap!=regs[i].regmap;
2778 emit_movimm(start+i*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
2779 }
2780 //emit_add(12,2,2);
2781 //emit_writeword(2,(int)&Count);
2782 //emit_call(((u_int *)ftable)[addr>>16]);
2783 emit_call((int)&indirect_jump);
2784 emit_readword((int)&Count,HOST_TEMPREG);
2785 emit_readword((int)&next_interupt,2);
2786 emit_addimm(HOST_TEMPREG,-CLOCK_DIVIDER*(adj+1),HOST_TEMPREG);
2787 emit_writeword(2,(int)&last_count);
2788 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2789 if(cc<0) {
2790 emit_storereg(CCREG,HOST_TEMPREG);
2791 }
2792 //emit_popa();
2793 restore_regs(reglist);
2794}
2795
2796do_unalignedwritestub(int n)
2797{
2798 set_jump_target(stubs[n][1],(int)out);
2799 output_w32(0xef000000);
2800 emit_jmp(stubs[n][2]); // return address
2801}
2802
2803void printregs(int edi,int esi,int ebp,int esp,int b,int d,int c,int a)
2804{
2805 printf("regs: %x %x %x %x %x %x %x (%x)\n",a,b,c,d,ebp,esi,edi,(&edi)[-1]);
2806}
2807
2808do_invstub(int n)
2809{
2810 literal_pool(20);
2811 u_int reglist=stubs[n][3];
2812 set_jump_target(stubs[n][1],(int)out);
2813 save_regs(reglist);
2814 if(stubs[n][4]!=0) emit_mov(stubs[n][4],0);
2815 emit_call((int)&invalidate_addr);
2816 restore_regs(reglist);
2817 emit_jmp(stubs[n][2]); // return address
2818}
2819
2820int do_dirty_stub(int i)
2821{
2822 assem_debug("do_dirty_stub %x\n",start+i*4);
2823 // Careful about the code output here, verify_dirty needs to parse it.
2824 #ifdef ARMv5_ONLY
2825 emit_loadlp((int)start<(int)0xC0000000?(int)source:(int)start,1);
2826 emit_loadlp((int)copy,2);
2827 emit_loadlp(slen*4,3);
2828 #else
2829 emit_movw(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0x0000FFFF,1);
2830 emit_movw(((u_int)copy)&0x0000FFFF,2);
2831 emit_movt(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0xFFFF0000,1);
2832 emit_movt(((u_int)copy)&0xFFFF0000,2);
2833 emit_movw(slen*4,3);
2834 #endif
2835 emit_movimm(start+i*4,0);
2836 emit_call((int)start<(int)0xC0000000?(int)&verify_code:(int)&verify_code_vm);
2837 int entry=(int)out;
2838 load_regs_entry(i);
2839 if(entry==(int)out) entry=instr_addr[i];
2840 emit_jmp(instr_addr[i]);
2841 return entry;
2842}
2843
2844void do_dirty_stub_ds()
2845{
2846 // Careful about the code output here, verify_dirty needs to parse it.
2847 #ifdef ARMv5_ONLY
2848 emit_loadlp((int)start<(int)0xC0000000?(int)source:(int)start,1);
2849 emit_loadlp((int)copy,2);
2850 emit_loadlp(slen*4,3);
2851 #else
2852 emit_movw(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0x0000FFFF,1);
2853 emit_movw(((u_int)copy)&0x0000FFFF,2);
2854 emit_movt(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0xFFFF0000,1);
2855 emit_movt(((u_int)copy)&0xFFFF0000,2);
2856 emit_movw(slen*4,3);
2857 #endif
2858 emit_movimm(start+1,0);
2859 emit_call((int)&verify_code_ds);
2860}
2861
2862do_cop1stub(int n)
2863{
2864 literal_pool(256);
2865 assem_debug("do_cop1stub %x\n",start+stubs[n][3]*4);
2866 set_jump_target(stubs[n][1],(int)out);
2867 int i=stubs[n][3];
3d624f89 2868// int rs=stubs[n][4];
57871462 2869 struct regstat *i_regs=(struct regstat *)stubs[n][5];
2870 int ds=stubs[n][6];
2871 if(!ds) {
2872 load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty,i);
2873 //if(i_regs!=&regs[i]) printf("oops: regs[i]=%x i_regs=%x",(int)&regs[i],(int)i_regs);
2874 }
2875 //else {printf("fp exception in delay slot\n");}
2876 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty);
2877 if(regs[i].regmap_entry[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
2878 emit_movimm(start+(i-ds)*4,EAX); // Get PC
2879 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG); // CHECK: is this right? There should probably be an extra cycle...
2880 emit_jmp(ds?(int)fp_exception_ds:(int)fp_exception);
2881}
2882
2883/* TLB */
2884
2885int do_tlb_r(int s,int ar,int map,int x,int a,int shift,int c,u_int addr)
2886{
2887 if(c) {
2888 if((signed int)addr>=(signed int)0xC0000000) {
2889 // address_generation already loaded the const
2890 emit_readword_dualindexedx4(FP,map,map);
2891 }
2892 else
2893 return -1; // No mapping
2894 }
2895 else {
2896 assert(s!=map);
2897 emit_movimm(((int)memory_map-(int)&dynarec_local)>>2,map);
2898 emit_addsr12(map,s,map);
2899 // Schedule this while we wait on the load
2900 //if(x) emit_xorimm(s,x,ar);
2901 if(shift>=0) emit_shlimm(s,3,shift);
2902 if(~a) emit_andimm(s,a,ar);
2903 emit_readword_dualindexedx4(FP,map,map);
2904 }
2905 return map;
2906}
2907int do_tlb_r_branch(int map, int c, u_int addr, int *jaddr)
2908{
2909 if(!c||(signed int)addr>=(signed int)0xC0000000) {
2910 emit_test(map,map);
2911 *jaddr=(int)out;
2912 emit_js(0);
2913 }
2914 return map;
2915}
2916
2917int gen_tlb_addr_r(int ar, int map) {
2918 if(map>=0) {
2919 assem_debug("add %s,%s,%s lsl #2\n",regname[ar],regname[ar],regname[map]);
2920 output_w32(0xe0800100|rd_rn_rm(ar,ar,map));
2921 }
2922}
2923
2924int do_tlb_w(int s,int ar,int map,int x,int c,u_int addr)
2925{
2926 if(c) {
2927 if(addr<0x80800000||addr>=0xC0000000) {
2928 // address_generation already loaded the const
2929 emit_readword_dualindexedx4(FP,map,map);
2930 }
2931 else
2932 return -1; // No mapping
2933 }
2934 else {
2935 assert(s!=map);
2936 emit_movimm(((int)memory_map-(int)&dynarec_local)>>2,map);
2937 emit_addsr12(map,s,map);
2938 // Schedule this while we wait on the load
2939 //if(x) emit_xorimm(s,x,ar);
2940 emit_readword_dualindexedx4(FP,map,map);
2941 }
2942 return map;
2943}
2944int do_tlb_w_branch(int map, int c, u_int addr, int *jaddr)
2945{
2946 if(!c||addr<0x80800000||addr>=0xC0000000) {
2947 emit_testimm(map,0x40000000);
2948 *jaddr=(int)out;
2949 emit_jne(0);
2950 }
2951}
2952
2953int gen_tlb_addr_w(int ar, int map) {
2954 if(map>=0) {
2955 assem_debug("add %s,%s,%s lsl #2\n",regname[ar],regname[ar],regname[map]);
2956 output_w32(0xe0800100|rd_rn_rm(ar,ar,map));
2957 }
2958}
2959
2960// Generate the address of the memory_map entry, relative to dynarec_local
2961generate_map_const(u_int addr,int reg) {
2962 //printf("generate_map_const(%x,%s)\n",addr,regname[reg]);
2963 emit_movimm((addr>>12)+(((u_int)memory_map-(u_int)&dynarec_local)>>2),reg);
2964}
2965
2966/* Special assem */
2967
2968void shift_assemble_arm(int i,struct regstat *i_regs)
2969{
2970 if(rt1[i]) {
2971 if(opcode2[i]<=0x07) // SLLV/SRLV/SRAV
2972 {
2973 signed char s,t,shift;
2974 t=get_reg(i_regs->regmap,rt1[i]);
2975 s=get_reg(i_regs->regmap,rs1[i]);
2976 shift=get_reg(i_regs->regmap,rs2[i]);
2977 if(t>=0){
2978 if(rs1[i]==0)
2979 {
2980 emit_zeroreg(t);
2981 }
2982 else if(rs2[i]==0)
2983 {
2984 assert(s>=0);
2985 if(s!=t) emit_mov(s,t);
2986 }
2987 else
2988 {
2989 emit_andimm(shift,31,HOST_TEMPREG);
2990 if(opcode2[i]==4) // SLLV
2991 {
2992 emit_shl(s,HOST_TEMPREG,t);
2993 }
2994 if(opcode2[i]==6) // SRLV
2995 {
2996 emit_shr(s,HOST_TEMPREG,t);
2997 }
2998 if(opcode2[i]==7) // SRAV
2999 {
3000 emit_sar(s,HOST_TEMPREG,t);
3001 }
3002 }
3003 }
3004 } else { // DSLLV/DSRLV/DSRAV
3005 signed char sh,sl,th,tl,shift;
3006 th=get_reg(i_regs->regmap,rt1[i]|64);
3007 tl=get_reg(i_regs->regmap,rt1[i]);
3008 sh=get_reg(i_regs->regmap,rs1[i]|64);
3009 sl=get_reg(i_regs->regmap,rs1[i]);
3010 shift=get_reg(i_regs->regmap,rs2[i]);
3011 if(tl>=0){
3012 if(rs1[i]==0)
3013 {
3014 emit_zeroreg(tl);
3015 if(th>=0) emit_zeroreg(th);
3016 }
3017 else if(rs2[i]==0)
3018 {
3019 assert(sl>=0);
3020 if(sl!=tl) emit_mov(sl,tl);
3021 if(th>=0&&sh!=th) emit_mov(sh,th);
3022 }
3023 else
3024 {
3025 // FIXME: What if shift==tl ?
3026 assert(shift!=tl);
3027 int temp=get_reg(i_regs->regmap,-1);
3028 int real_th=th;
3029 if(th<0&&opcode2[i]!=0x14) {th=temp;} // DSLLV doesn't need a temporary register
3030 assert(sl>=0);
3031 assert(sh>=0);
3032 emit_andimm(shift,31,HOST_TEMPREG);
3033 if(opcode2[i]==0x14) // DSLLV
3034 {
3035 if(th>=0) emit_shl(sh,HOST_TEMPREG,th);
3036 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3037 emit_orrshr(sl,HOST_TEMPREG,th);
3038 emit_andimm(shift,31,HOST_TEMPREG);
3039 emit_testimm(shift,32);
3040 emit_shl(sl,HOST_TEMPREG,tl);
3041 if(th>=0) emit_cmovne_reg(tl,th);
3042 emit_cmovne_imm(0,tl);
3043 }
3044 if(opcode2[i]==0x16) // DSRLV
3045 {
3046 assert(th>=0);
3047 emit_shr(sl,HOST_TEMPREG,tl);
3048 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3049 emit_orrshl(sh,HOST_TEMPREG,tl);
3050 emit_andimm(shift,31,HOST_TEMPREG);
3051 emit_testimm(shift,32);
3052 emit_shr(sh,HOST_TEMPREG,th);
3053 emit_cmovne_reg(th,tl);
3054 if(real_th>=0) emit_cmovne_imm(0,th);
3055 }
3056 if(opcode2[i]==0x17) // DSRAV
3057 {
3058 assert(th>=0);
3059 emit_shr(sl,HOST_TEMPREG,tl);
3060 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3061 if(real_th>=0) {
3062 assert(temp>=0);
3063 emit_sarimm(th,31,temp);
3064 }
3065 emit_orrshl(sh,HOST_TEMPREG,tl);
3066 emit_andimm(shift,31,HOST_TEMPREG);
3067 emit_testimm(shift,32);
3068 emit_sar(sh,HOST_TEMPREG,th);
3069 emit_cmovne_reg(th,tl);
3070 if(real_th>=0) emit_cmovne_reg(temp,th);
3071 }
3072 }
3073 }
3074 }
3075 }
3076}
3077#define shift_assemble shift_assemble_arm
3078
3079void loadlr_assemble_arm(int i,struct regstat *i_regs)
3080{
3081 int s,th,tl,temp,temp2,addr,map=-1;
3082 int offset;
3083 int jaddr=0;
3084 int memtarget,c=0;
3085 u_int hr,reglist=0;
3086 th=get_reg(i_regs->regmap,rt1[i]|64);
3087 tl=get_reg(i_regs->regmap,rt1[i]);
3088 s=get_reg(i_regs->regmap,rs1[i]);
3089 temp=get_reg(i_regs->regmap,-1);
3090 temp2=get_reg(i_regs->regmap,FTEMP);
3091 addr=get_reg(i_regs->regmap,AGEN1+(i&1));
3092 assert(addr<0);
3093 offset=imm[i];
3094 for(hr=0;hr<HOST_REGS;hr++) {
3095 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
3096 }
3097 reglist|=1<<temp;
3098 if(offset||s<0||c) addr=temp2;
3099 else addr=s;
3100 if(s>=0) {
3101 c=(i_regs->wasconst>>s)&1;
3102 memtarget=((signed int)(constmap[i][s]+offset))<(signed int)0x80800000;
3103 if(using_tlb&&((signed int)(constmap[i][s]+offset))>=(signed int)0xC0000000) memtarget=1;
3104 }
3105 if(tl>=0) {
3106 //assert(tl>=0);
3107 //assert(rt1[i]);
3108 if(!using_tlb) {
3109 if(!c) {
3110 emit_shlimm(addr,3,temp);
3111 if (opcode[i]==0x22||opcode[i]==0x26) {
3112 emit_andimm(addr,0xFFFFFFFC,temp2); // LWL/LWR
3113 }else{
3114 emit_andimm(addr,0xFFFFFFF8,temp2); // LDL/LDR
3115 }
3116 emit_cmpimm(addr,0x800000);
3117 jaddr=(int)out;
3118 emit_jno(0);
3119 }
3120 else {
3121 if (opcode[i]==0x22||opcode[i]==0x26) {
3122 emit_movimm(((constmap[i][s]+offset)<<3)&24,temp); // LWL/LWR
3123 }else{
3124 emit_movimm(((constmap[i][s]+offset)<<3)&56,temp); // LDL/LDR
3125 }
3126 }
3127 }else{ // using tlb
3128 int a;
3129 if(c) {
3130 a=-1;
3131 }else if (opcode[i]==0x22||opcode[i]==0x26) {
3132 a=0xFFFFFFFC; // LWL/LWR
3133 }else{
3134 a=0xFFFFFFF8; // LDL/LDR
3135 }
3136 map=get_reg(i_regs->regmap,TLREG);
3137 assert(map>=0);
3138 map=do_tlb_r(addr,temp2,map,0,a,c?-1:temp,c,constmap[i][s]+offset);
3139 if(c) {
3140 if (opcode[i]==0x22||opcode[i]==0x26) {
3141 emit_movimm(((constmap[i][s]+offset)<<3)&24,temp); // LWL/LWR
3142 }else{
3143 emit_movimm(((constmap[i][s]+offset)<<3)&56,temp); // LDL/LDR
3144 }
3145 }
3146 do_tlb_r_branch(map,c,constmap[i][s]+offset,&jaddr);
3147 }
3148 if (opcode[i]==0x22||opcode[i]==0x26) { // LWL/LWR
3149 if(!c||memtarget) {
3150 //emit_readword_indexed((int)rdram-0x80000000,temp2,temp2);
3151 emit_readword_indexed_tlb((int)rdram-0x80000000,temp2,map,temp2);
3152 if(jaddr) add_stub(LOADW_STUB,jaddr,(int)out,i,temp2,(int)i_regs,ccadj[i],reglist);
3153 }
3154 else
3155 inline_readstub(LOADW_STUB,i,(constmap[i][s]+offset)&0xFFFFFFFC,i_regs->regmap,FTEMP,ccadj[i],reglist);
3156 emit_andimm(temp,24,temp);
3157 if (opcode[i]==0x26) emit_xorimm(temp,24,temp); // LWR
3158 emit_movimm(-1,HOST_TEMPREG);
3159 if (opcode[i]==0x26) {
3160 emit_shr(temp2,temp,temp2);
3161 emit_bic_lsr(tl,HOST_TEMPREG,temp,tl);
3162 }else{
3163 emit_shl(temp2,temp,temp2);
3164 emit_bic_lsl(tl,HOST_TEMPREG,temp,tl);
3165 }
3166 emit_or(temp2,tl,tl);
3167 //emit_storereg(rt1[i],tl); // DEBUG
3168 }
3169 if (opcode[i]==0x1A||opcode[i]==0x1B) { // LDL/LDR
3170 int temp2h=get_reg(i_regs->regmap,FTEMP|64);
3171 if(!c||memtarget) {
3172 //if(th>=0) emit_readword_indexed((int)rdram-0x80000000,temp2,temp2h);
3173 //emit_readword_indexed((int)rdram-0x7FFFFFFC,temp2,temp2);
3174 emit_readdword_indexed_tlb((int)rdram-0x80000000,temp2,map,temp2h,temp2);
3175 if(jaddr) add_stub(LOADD_STUB,jaddr,(int)out,i,temp2,(int)i_regs,ccadj[i],reglist);
3176 }
3177 else
3178 inline_readstub(LOADD_STUB,i,(constmap[i][s]+offset)&0xFFFFFFF8,i_regs->regmap,FTEMP,ccadj[i],reglist);
3179 emit_testimm(temp,32);
3180 emit_andimm(temp,24,temp);
3181 if (opcode[i]==0x1A) { // LDL
3182 emit_rsbimm(temp,32,HOST_TEMPREG);
3183 emit_shl(temp2h,temp,temp2h);
3184 emit_orrshr(temp2,HOST_TEMPREG,temp2h);
3185 emit_movimm(-1,HOST_TEMPREG);
3186 emit_shl(temp2,temp,temp2);
3187 emit_cmove_reg(temp2h,th);
3188 emit_biceq_lsl(tl,HOST_TEMPREG,temp,tl);
3189 emit_bicne_lsl(th,HOST_TEMPREG,temp,th);
3190 emit_orreq(temp2,tl,tl);
3191 emit_orrne(temp2,th,th);
3192 }
3193 if (opcode[i]==0x1B) { // LDR
3194 emit_xorimm(temp,24,temp);
3195 emit_rsbimm(temp,32,HOST_TEMPREG);
3196 emit_shr(temp2,temp,temp2);
3197 emit_orrshl(temp2h,HOST_TEMPREG,temp2);
3198 emit_movimm(-1,HOST_TEMPREG);
3199 emit_shr(temp2h,temp,temp2h);
3200 emit_cmovne_reg(temp2,tl);
3201 emit_bicne_lsr(th,HOST_TEMPREG,temp,th);
3202 emit_biceq_lsr(tl,HOST_TEMPREG,temp,tl);
3203 emit_orrne(temp2h,th,th);
3204 emit_orreq(temp2h,tl,tl);
3205 }
3206 }
3207 }
3208}
3209#define loadlr_assemble loadlr_assemble_arm
3210
3211void cop0_assemble(int i,struct regstat *i_regs)
3212{
3213 if(opcode2[i]==0) // MFC0
3214 {
3215 signed char t=get_reg(i_regs->regmap,rt1[i]);
3216 char copr=(source[i]>>11)&0x1f;
3217 //assert(t>=0); // Why does this happen? OOT is weird
3218 if(t>=0) {
3d624f89 3219#ifdef MUPEN64 /// FIXME
57871462 3220 emit_addimm(FP,(int)&fake_pc-(int)&dynarec_local,0);
3221 emit_movimm((source[i]>>11)&0x1f,1);
3222 emit_writeword(0,(int)&PC);
3223 emit_writebyte(1,(int)&(fake_pc.f.r.nrd));
3d624f89 3224#endif
57871462 3225 if(copr==9) {
3226 emit_readword((int)&last_count,ECX);
3227 emit_loadreg(CCREG,HOST_CCREG); // TODO: do proper reg alloc
3228 emit_add(HOST_CCREG,ECX,HOST_CCREG);
3229 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG);
3230 emit_writeword(HOST_CCREG,(int)&Count);
3231 }
3232 emit_call((int)MFC0);
3233 emit_readword((int)&readmem_dword,t);
3234 }
3235 }
3236 else if(opcode2[i]==4) // MTC0
3237 {
3238 signed char s=get_reg(i_regs->regmap,rs1[i]);
3239 char copr=(source[i]>>11)&0x1f;
3240 assert(s>=0);
3241 emit_writeword(s,(int)&readmem_dword);
3242 wb_register(rs1[i],i_regs->regmap,i_regs->dirty,i_regs->is32);
3d624f89 3243#ifdef MUPEN64 /// FIXME
57871462 3244 emit_addimm(FP,(int)&fake_pc-(int)&dynarec_local,0);
3245 emit_movimm((source[i]>>11)&0x1f,1);
3246 emit_writeword(0,(int)&PC);
3247 emit_writebyte(1,(int)&(fake_pc.f.r.nrd));
3d624f89 3248#endif
57871462 3249 if(copr==9||copr==11||copr==12) {
3250 emit_readword((int)&last_count,ECX);
3251 emit_loadreg(CCREG,HOST_CCREG); // TODO: do proper reg alloc
3252 emit_add(HOST_CCREG,ECX,HOST_CCREG);
3253 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG);
3254 emit_writeword(HOST_CCREG,(int)&Count);
3255 }
3256 // What a mess. The status register (12) can enable interrupts,
3257 // so needs a special case to handle a pending interrupt.
3258 // The interrupt must be taken immediately, because a subsequent
3259 // instruction might disable interrupts again.
3260 if(copr==12&&!is_delayslot) {
3261 emit_movimm(start+i*4+4,0);
3262 emit_movimm(0,1);
3263 emit_writeword(0,(int)&pcaddr);
3264 emit_writeword(1,(int)&pending_exception);
3265 }
3266 //else if(copr==12&&is_delayslot) emit_call((int)MTC0_R12);
3267 //else
3268 emit_call((int)MTC0);
3269 if(copr==9||copr==11||copr==12) {
3270 emit_readword((int)&Count,HOST_CCREG);
3271 emit_readword((int)&next_interupt,ECX);
3272 emit_addimm(HOST_CCREG,-CLOCK_DIVIDER*ccadj[i],HOST_CCREG);
3273 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
3274 emit_writeword(ECX,(int)&last_count);
3275 emit_storereg(CCREG,HOST_CCREG);
3276 }
3277 if(copr==12) {
3278 assert(!is_delayslot);
3279 emit_readword((int)&pending_exception,14);
3280 }
3281 emit_loadreg(rs1[i],s);
3282 if(get_reg(i_regs->regmap,rs1[i]|64)>=0)
3283 emit_loadreg(rs1[i]|64,get_reg(i_regs->regmap,rs1[i]|64));
3284 if(copr==12) {
3285 emit_test(14,14);
3286 emit_jne((int)&do_interrupt);
3287 }
3288 cop1_usable=0;
3289 }
3290 else
3291 {
3292 assert(opcode2[i]==0x10);
3d624f89 3293#ifndef DISABLE_TLB
57871462 3294 if((source[i]&0x3f)==0x01) // TLBR
3295 emit_call((int)TLBR);
3296 if((source[i]&0x3f)==0x02) // TLBWI
3297 emit_call((int)TLBWI_new);
3298 if((source[i]&0x3f)==0x06) { // TLBWR
3299 // The TLB entry written by TLBWR is dependent on the count,
3300 // so update the cycle count
3301 emit_readword((int)&last_count,ECX);
3302 if(i_regs->regmap[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
3303 emit_add(HOST_CCREG,ECX,HOST_CCREG);
3304 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG);
3305 emit_writeword(HOST_CCREG,(int)&Count);
3306 emit_call((int)TLBWR_new);
3307 }
3308 if((source[i]&0x3f)==0x08) // TLBP
3309 emit_call((int)TLBP);
3d624f89 3310#endif
57871462 3311 if((source[i]&0x3f)==0x18) // ERET
3312 {
3313 int count=ccadj[i];
3314 if(i_regs->regmap[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
3315 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*count,HOST_CCREG); // TODO: Should there be an extra cycle here?
3316 emit_jmp((int)jump_eret);
3317 }
3318 }
3319}
3320
3d624f89 3321void cop1_unusable(int i, struct regstat *i_regs)
3322{
3323 // XXX: should just just do the exception instead
3324 if(!cop1_usable) {
3325 int jaddr=(int)out;
3326 emit_jmp(0);
3327 add_stub(FP_STUB,jaddr,(int)out,i,0,(int)i_regs,is_delayslot,0);
3328 cop1_usable=1;
3329 }
3330}
3331
57871462 3332void cop1_assemble(int i,struct regstat *i_regs)
3333{
3d624f89 3334#ifndef DISABLE_COP1
57871462 3335 // Check cop1 unusable
3336 if(!cop1_usable) {
3337 signed char rs=get_reg(i_regs->regmap,CSREG);
3338 assert(rs>=0);
3339 emit_testimm(rs,0x20000000);
3340 int jaddr=(int)out;
3341 emit_jeq(0);
3342 add_stub(FP_STUB,jaddr,(int)out,i,rs,(int)i_regs,is_delayslot,0);
3343 cop1_usable=1;
3344 }
3345 if (opcode2[i]==0) { // MFC1
3346 signed char tl=get_reg(i_regs->regmap,rt1[i]);
3347 if(tl>=0) {
3348 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],tl);
3349 emit_readword_indexed(0,tl,tl);
3350 }
3351 }
3352 else if (opcode2[i]==1) { // DMFC1
3353 signed char tl=get_reg(i_regs->regmap,rt1[i]);
3354 signed char th=get_reg(i_regs->regmap,rt1[i]|64);
3355 if(tl>=0) {
3356 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],tl);
3357 if(th>=0) emit_readword_indexed(4,tl,th);
3358 emit_readword_indexed(0,tl,tl);
3359 }
3360 }
3361 else if (opcode2[i]==4) { // MTC1
3362 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3363 signed char temp=get_reg(i_regs->regmap,-1);
3364 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
3365 emit_writeword_indexed(sl,0,temp);
3366 }
3367 else if (opcode2[i]==5) { // DMTC1
3368 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3369 signed char sh=rs1[i]>0?get_reg(i_regs->regmap,rs1[i]|64):sl;
3370 signed char temp=get_reg(i_regs->regmap,-1);
3371 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
3372 emit_writeword_indexed(sh,4,temp);
3373 emit_writeword_indexed(sl,0,temp);
3374 }
3375 else if (opcode2[i]==2) // CFC1
3376 {
3377 signed char tl=get_reg(i_regs->regmap,rt1[i]);
3378 if(tl>=0) {
3379 u_int copr=(source[i]>>11)&0x1f;
3380 if(copr==0) emit_readword((int)&FCR0,tl);
3381 if(copr==31) emit_readword((int)&FCR31,tl);
3382 }
3383 }
3384 else if (opcode2[i]==6) // CTC1
3385 {
3386 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3387 u_int copr=(source[i]>>11)&0x1f;
3388 assert(sl>=0);
3389 if(copr==31)
3390 {
3391 emit_writeword(sl,(int)&FCR31);
3392 // Set the rounding mode
3393 //FIXME
3394 //char temp=get_reg(i_regs->regmap,-1);
3395 //emit_andimm(sl,3,temp);
3396 //emit_fldcw_indexed((int)&rounding_modes,temp);
3397 }
3398 }
3d624f89 3399#else
3400 cop1_unusable(i, i_regs);
3401#endif
57871462 3402}
3403
3404void fconv_assemble_arm(int i,struct regstat *i_regs)
3405{
3d624f89 3406#ifndef DISABLE_COP1
57871462 3407 signed char temp=get_reg(i_regs->regmap,-1);
3408 assert(temp>=0);
3409 // Check cop1 unusable
3410 if(!cop1_usable) {
3411 signed char rs=get_reg(i_regs->regmap,CSREG);
3412 assert(rs>=0);
3413 emit_testimm(rs,0x20000000);
3414 int jaddr=(int)out;
3415 emit_jeq(0);
3416 add_stub(FP_STUB,jaddr,(int)out,i,rs,(int)i_regs,is_delayslot,0);
3417 cop1_usable=1;
3418 }
3419
3420 #if(defined(__VFP_FP__) && !defined(__SOFTFP__))
3421 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0d) { // trunc_w_s
3422 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
3423 emit_flds(temp,15);
3424 emit_ftosizs(15,15); // float->int, truncate
3425 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f))
3426 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
3427 emit_fsts(15,temp);
3428 return;
3429 }
3430 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0d) { // trunc_w_d
3431 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
3432 emit_vldr(temp,7);
3433 emit_ftosizd(7,13); // double->int, truncate
3434 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
3435 emit_fsts(13,temp);
3436 return;
3437 }
3438
3439 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x20) { // cvt_s_w
3440 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
3441 emit_flds(temp,13);
3442 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f))
3443 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
3444 emit_fsitos(13,15);
3445 emit_fsts(15,temp);
3446 return;
3447 }
3448 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x21) { // cvt_d_w
3449 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
3450 emit_flds(temp,13);
3451 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
3452 emit_fsitod(13,7);
3453 emit_vstr(7,temp);
3454 return;
3455 }
3456
3457 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x21) { // cvt_d_s
3458 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
3459 emit_flds(temp,13);
3460 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
3461 emit_fcvtds(13,7);
3462 emit_vstr(7,temp);
3463 return;
3464 }
3465 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x20) { // cvt_s_d
3466 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
3467 emit_vldr(temp,7);
3468 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
3469 emit_fcvtsd(7,13);
3470 emit_fsts(13,temp);
3471 return;
3472 }
3473 #endif
3474
3475 // C emulation code
3476
3477 u_int hr,reglist=0;
3478 for(hr=0;hr<HOST_REGS;hr++) {
3479 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
3480 }
3481 save_regs(reglist);
3482
3483 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x20) {
3484 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3485 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3486 emit_call((int)cvt_s_w);
3487 }
3488 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x21) {
3489 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3490 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3491 emit_call((int)cvt_d_w);
3492 }
3493 if(opcode2[i]==0x15&&(source[i]&0x3f)==0x20) {
3494 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3495 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3496 emit_call((int)cvt_s_l);
3497 }
3498 if(opcode2[i]==0x15&&(source[i]&0x3f)==0x21) {
3499 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3500 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3501 emit_call((int)cvt_d_l);
3502 }
3503
3504 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x21) {
3505 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3506 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3507 emit_call((int)cvt_d_s);
3508 }
3509 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x24) {
3510 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3511 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3512 emit_call((int)cvt_w_s);
3513 }
3514 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x25) {
3515 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3516 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3517 emit_call((int)cvt_l_s);
3518 }
3519
3520 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x20) {
3521 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3522 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3523 emit_call((int)cvt_s_d);
3524 }
3525 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x24) {
3526 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3527 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3528 emit_call((int)cvt_w_d);
3529 }
3530 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x25) {
3531 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3532 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3533 emit_call((int)cvt_l_d);
3534 }
3535
3536 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x08) {
3537 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3538 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3539 emit_call((int)round_l_s);
3540 }
3541 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x09) {
3542 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3543 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3544 emit_call((int)trunc_l_s);
3545 }
3546 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0a) {
3547 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3548 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3549 emit_call((int)ceil_l_s);
3550 }
3551 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0b) {
3552 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3553 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3554 emit_call((int)floor_l_s);
3555 }
3556 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0c) {
3557 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3558 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3559 emit_call((int)round_w_s);
3560 }
3561 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0d) {
3562 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3563 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3564 emit_call((int)trunc_w_s);
3565 }
3566 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0e) {
3567 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3568 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3569 emit_call((int)ceil_w_s);
3570 }
3571 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0f) {
3572 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3573 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3574 emit_call((int)floor_w_s);
3575 }
3576
3577 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x08) {
3578 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3579 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3580 emit_call((int)round_l_d);
3581 }
3582 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x09) {
3583 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3584 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3585 emit_call((int)trunc_l_d);
3586 }
3587 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0a) {
3588 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3589 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3590 emit_call((int)ceil_l_d);
3591 }
3592 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0b) {
3593 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3594 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3595 emit_call((int)floor_l_d);
3596 }
3597 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0c) {
3598 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3599 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3600 emit_call((int)round_w_d);
3601 }
3602 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0d) {
3603 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3604 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3605 emit_call((int)trunc_w_d);
3606 }
3607 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0e) {
3608 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3609 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3610 emit_call((int)ceil_w_d);
3611 }
3612 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0f) {
3613 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3614 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3615 emit_call((int)floor_w_d);
3616 }
3617
3618 restore_regs(reglist);
3d624f89 3619#else
3620 cop1_unusable(i, i_regs);
3621#endif
57871462 3622}
3623#define fconv_assemble fconv_assemble_arm
3624
3625void fcomp_assemble(int i,struct regstat *i_regs)
3626{
3d624f89 3627#ifndef DISABLE_COP1
57871462 3628 signed char fs=get_reg(i_regs->regmap,FSREG);
3629 signed char temp=get_reg(i_regs->regmap,-1);
3630 assert(temp>=0);
3631 // Check cop1 unusable
3632 if(!cop1_usable) {
3633 signed char cs=get_reg(i_regs->regmap,CSREG);
3634 assert(cs>=0);
3635 emit_testimm(cs,0x20000000);
3636 int jaddr=(int)out;
3637 emit_jeq(0);
3638 add_stub(FP_STUB,jaddr,(int)out,i,cs,(int)i_regs,is_delayslot,0);
3639 cop1_usable=1;
3640 }
3641
3642 if((source[i]&0x3f)==0x30) {
3643 emit_andimm(fs,~0x800000,fs);
3644 return;
3645 }
3646
3647 if((source[i]&0x3e)==0x38) {
3648 // sf/ngle - these should throw exceptions for NaNs
3649 emit_andimm(fs,~0x800000,fs);
3650 return;
3651 }
3652
3653 #if(defined(__VFP_FP__) && !defined(__SOFTFP__))
3654 if(opcode2[i]==0x10) {
3655 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
3656 emit_readword((int)&reg_cop1_simple[(source[i]>>16)&0x1f],HOST_TEMPREG);
3657 emit_orimm(fs,0x800000,fs);
3658 emit_flds(temp,14);
3659 emit_flds(HOST_TEMPREG,15);
3660 emit_fcmps(14,15);
3661 emit_fmstat();
3662 if((source[i]&0x3f)==0x31) emit_bicvc_imm(fs,0x800000,fs); // c_un_s
3663 if((source[i]&0x3f)==0x32) emit_bicne_imm(fs,0x800000,fs); // c_eq_s
3664 if((source[i]&0x3f)==0x33) {emit_bicne_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ueq_s
3665 if((source[i]&0x3f)==0x34) emit_biccs_imm(fs,0x800000,fs); // c_olt_s
3666 if((source[i]&0x3f)==0x35) {emit_biccs_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ult_s
3667 if((source[i]&0x3f)==0x36) emit_bichi_imm(fs,0x800000,fs); // c_ole_s
3668 if((source[i]&0x3f)==0x37) {emit_bichi_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ule_s
3669 if((source[i]&0x3f)==0x3a) emit_bicne_imm(fs,0x800000,fs); // c_seq_s
3670 if((source[i]&0x3f)==0x3b) emit_bicne_imm(fs,0x800000,fs); // c_ngl_s
3671 if((source[i]&0x3f)==0x3c) emit_biccs_imm(fs,0x800000,fs); // c_lt_s
3672 if((source[i]&0x3f)==0x3d) emit_biccs_imm(fs,0x800000,fs); // c_nge_s
3673 if((source[i]&0x3f)==0x3e) emit_bichi_imm(fs,0x800000,fs); // c_le_s
3674 if((source[i]&0x3f)==0x3f) emit_bichi_imm(fs,0x800000,fs); // c_ngt_s
3675 return;
3676 }
3677 if(opcode2[i]==0x11) {
3678 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
3679 emit_readword((int)&reg_cop1_double[(source[i]>>16)&0x1f],HOST_TEMPREG);
3680 emit_orimm(fs,0x800000,fs);
3681 emit_vldr(temp,6);
3682 emit_vldr(HOST_TEMPREG,7);
3683 emit_fcmpd(6,7);
3684 emit_fmstat();
3685 if((source[i]&0x3f)==0x31) emit_bicvc_imm(fs,0x800000,fs); // c_un_d
3686 if((source[i]&0x3f)==0x32) emit_bicne_imm(fs,0x800000,fs); // c_eq_d
3687 if((source[i]&0x3f)==0x33) {emit_bicne_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ueq_d
3688 if((source[i]&0x3f)==0x34) emit_biccs_imm(fs,0x800000,fs); // c_olt_d
3689 if((source[i]&0x3f)==0x35) {emit_biccs_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ult_d
3690 if((source[i]&0x3f)==0x36) emit_bichi_imm(fs,0x800000,fs); // c_ole_d
3691 if((source[i]&0x3f)==0x37) {emit_bichi_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ule_d
3692 if((source[i]&0x3f)==0x3a) emit_bicne_imm(fs,0x800000,fs); // c_seq_d
3693 if((source[i]&0x3f)==0x3b) emit_bicne_imm(fs,0x800000,fs); // c_ngl_d
3694 if((source[i]&0x3f)==0x3c) emit_biccs_imm(fs,0x800000,fs); // c_lt_d
3695 if((source[i]&0x3f)==0x3d) emit_biccs_imm(fs,0x800000,fs); // c_nge_d
3696 if((source[i]&0x3f)==0x3e) emit_bichi_imm(fs,0x800000,fs); // c_le_d
3697 if((source[i]&0x3f)==0x3f) emit_bichi_imm(fs,0x800000,fs); // c_ngt_d
3698 return;
3699 }
3700 #endif
3701
3702 // C only
3703
3704 u_int hr,reglist=0;
3705 for(hr=0;hr<HOST_REGS;hr++) {
3706 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
3707 }
3708 reglist&=~(1<<fs);
3709 save_regs(reglist);
3710 if(opcode2[i]==0x10) {
3711 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3712 emit_readword((int)&reg_cop1_simple[(source[i]>>16)&0x1f],ARG2_REG);
3713 if((source[i]&0x3f)==0x30) emit_call((int)c_f_s);
3714 if((source[i]&0x3f)==0x31) emit_call((int)c_un_s);
3715 if((source[i]&0x3f)==0x32) emit_call((int)c_eq_s);
3716 if((source[i]&0x3f)==0x33) emit_call((int)c_ueq_s);
3717 if((source[i]&0x3f)==0x34) emit_call((int)c_olt_s);
3718 if((source[i]&0x3f)==0x35) emit_call((int)c_ult_s);
3719 if((source[i]&0x3f)==0x36) emit_call((int)c_ole_s);
3720 if((source[i]&0x3f)==0x37) emit_call((int)c_ule_s);
3721 if((source[i]&0x3f)==0x38) emit_call((int)c_sf_s);
3722 if((source[i]&0x3f)==0x39) emit_call((int)c_ngle_s);
3723 if((source[i]&0x3f)==0x3a) emit_call((int)c_seq_s);
3724 if((source[i]&0x3f)==0x3b) emit_call((int)c_ngl_s);
3725 if((source[i]&0x3f)==0x3c) emit_call((int)c_lt_s);
3726 if((source[i]&0x3f)==0x3d) emit_call((int)c_nge_s);
3727 if((source[i]&0x3f)==0x3e) emit_call((int)c_le_s);
3728 if((source[i]&0x3f)==0x3f) emit_call((int)c_ngt_s);
3729 }
3730 if(opcode2[i]==0x11) {
3731 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3732 emit_readword((int)&reg_cop1_double[(source[i]>>16)&0x1f],ARG2_REG);
3733 if((source[i]&0x3f)==0x30) emit_call((int)c_f_d);
3734 if((source[i]&0x3f)==0x31) emit_call((int)c_un_d);
3735 if((source[i]&0x3f)==0x32) emit_call((int)c_eq_d);
3736 if((source[i]&0x3f)==0x33) emit_call((int)c_ueq_d);
3737 if((source[i]&0x3f)==0x34) emit_call((int)c_olt_d);
3738 if((source[i]&0x3f)==0x35) emit_call((int)c_ult_d);
3739 if((source[i]&0x3f)==0x36) emit_call((int)c_ole_d);
3740 if((source[i]&0x3f)==0x37) emit_call((int)c_ule_d);
3741 if((source[i]&0x3f)==0x38) emit_call((int)c_sf_d);
3742 if((source[i]&0x3f)==0x39) emit_call((int)c_ngle_d);
3743 if((source[i]&0x3f)==0x3a) emit_call((int)c_seq_d);
3744 if((source[i]&0x3f)==0x3b) emit_call((int)c_ngl_d);
3745 if((source[i]&0x3f)==0x3c) emit_call((int)c_lt_d);
3746 if((source[i]&0x3f)==0x3d) emit_call((int)c_nge_d);
3747 if((source[i]&0x3f)==0x3e) emit_call((int)c_le_d);
3748 if((source[i]&0x3f)==0x3f) emit_call((int)c_ngt_d);
3749 }
3750 restore_regs(reglist);
3751 emit_loadreg(FSREG,fs);
3d624f89 3752#else
3753 cop1_unusable(i, i_regs);
3754#endif
57871462 3755}
3756
3757void float_assemble(int i,struct regstat *i_regs)
3758{
3d624f89 3759#ifndef DISABLE_COP1
57871462 3760 signed char temp=get_reg(i_regs->regmap,-1);
3761 assert(temp>=0);
3762 // Check cop1 unusable
3763 if(!cop1_usable) {
3764 signed char cs=get_reg(i_regs->regmap,CSREG);
3765 assert(cs>=0);
3766 emit_testimm(cs,0x20000000);
3767 int jaddr=(int)out;
3768 emit_jeq(0);
3769 add_stub(FP_STUB,jaddr,(int)out,i,cs,(int)i_regs,is_delayslot,0);
3770 cop1_usable=1;
3771 }
3772
3773 #if(defined(__VFP_FP__) && !defined(__SOFTFP__))
3774 if((source[i]&0x3f)==6) // mov
3775 {
3776 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
3777 if(opcode2[i]==0x10) {
3778 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
3779 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],HOST_TEMPREG);
3780 emit_readword_indexed(0,temp,temp);
3781 emit_writeword_indexed(temp,0,HOST_TEMPREG);
3782 }
3783 if(opcode2[i]==0x11) {
3784 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
3785 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],HOST_TEMPREG);
3786 emit_vldr(temp,7);
3787 emit_vstr(7,HOST_TEMPREG);
3788 }
3789 }
3790 return;
3791 }
3792
3793 if((source[i]&0x3f)>3)
3794 {
3795 if(opcode2[i]==0x10) {
3796 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
3797 emit_flds(temp,15);
3798 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
3799 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
3800 }
3801 if((source[i]&0x3f)==4) // sqrt
3802 emit_fsqrts(15,15);
3803 if((source[i]&0x3f)==5) // abs
3804 emit_fabss(15,15);
3805 if((source[i]&0x3f)==7) // neg
3806 emit_fnegs(15,15);
3807 emit_fsts(15,temp);
3808 }
3809 if(opcode2[i]==0x11) {
3810 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
3811 emit_vldr(temp,7);
3812 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
3813 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
3814 }
3815 if((source[i]&0x3f)==4) // sqrt
3816 emit_fsqrtd(7,7);
3817 if((source[i]&0x3f)==5) // abs
3818 emit_fabsd(7,7);
3819 if((source[i]&0x3f)==7) // neg
3820 emit_fnegd(7,7);
3821 emit_vstr(7,temp);
3822 }
3823 return;
3824 }
3825 if((source[i]&0x3f)<4)
3826 {
3827 if(opcode2[i]==0x10) {
3828 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
3829 }
3830 if(opcode2[i]==0x11) {
3831 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
3832 }
3833 if(((source[i]>>11)&0x1f)!=((source[i]>>16)&0x1f)) {
3834 if(opcode2[i]==0x10) {
3835 emit_readword((int)&reg_cop1_simple[(source[i]>>16)&0x1f],HOST_TEMPREG);
3836 emit_flds(temp,15);
3837 emit_flds(HOST_TEMPREG,13);
3838 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
3839 if(((source[i]>>16)&0x1f)!=((source[i]>>6)&0x1f)) {
3840 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
3841 }
3842 }
3843 if((source[i]&0x3f)==0) emit_fadds(15,13,15);
3844 if((source[i]&0x3f)==1) emit_fsubs(15,13,15);
3845 if((source[i]&0x3f)==2) emit_fmuls(15,13,15);
3846 if((source[i]&0x3f)==3) emit_fdivs(15,13,15);
3847 if(((source[i]>>16)&0x1f)==((source[i]>>6)&0x1f)) {
3848 emit_fsts(15,HOST_TEMPREG);
3849 }else{
3850 emit_fsts(15,temp);
3851 }
3852 }
3853 else if(opcode2[i]==0x11) {
3854 emit_readword((int)&reg_cop1_double[(source[i]>>16)&0x1f],HOST_TEMPREG);
3855 emit_vldr(temp,7);
3856 emit_vldr(HOST_TEMPREG,6);
3857 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
3858 if(((source[i]>>16)&0x1f)!=((source[i]>>6)&0x1f)) {
3859 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
3860 }
3861 }
3862 if((source[i]&0x3f)==0) emit_faddd(7,6,7);
3863 if((source[i]&0x3f)==1) emit_fsubd(7,6,7);
3864 if((source[i]&0x3f)==2) emit_fmuld(7,6,7);
3865 if((source[i]&0x3f)==3) emit_fdivd(7,6,7);
3866 if(((source[i]>>16)&0x1f)==((source[i]>>6)&0x1f)) {
3867 emit_vstr(7,HOST_TEMPREG);
3868 }else{
3869 emit_vstr(7,temp);
3870 }
3871 }
3872 }
3873 else {
3874 if(opcode2[i]==0x10) {
3875 emit_flds(temp,15);
3876 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
3877 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
3878 }
3879 if((source[i]&0x3f)==0) emit_fadds(15,15,15);
3880 if((source[i]&0x3f)==1) emit_fsubs(15,15,15);
3881 if((source[i]&0x3f)==2) emit_fmuls(15,15,15);
3882 if((source[i]&0x3f)==3) emit_fdivs(15,15,15);
3883 emit_fsts(15,temp);
3884 }
3885 else if(opcode2[i]==0x11) {
3886 emit_vldr(temp,7);
3887 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
3888 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
3889 }
3890 if((source[i]&0x3f)==0) emit_faddd(7,7,7);
3891 if((source[i]&0x3f)==1) emit_fsubd(7,7,7);
3892 if((source[i]&0x3f)==2) emit_fmuld(7,7,7);
3893 if((source[i]&0x3f)==3) emit_fdivd(7,7,7);
3894 emit_vstr(7,temp);
3895 }
3896 }
3897 return;
3898 }
3899 #endif
3900
3901 u_int hr,reglist=0;
3902 for(hr=0;hr<HOST_REGS;hr++) {
3903 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
3904 }
3905 if(opcode2[i]==0x10) { // Single precision
3906 save_regs(reglist);
3907 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3908 if((source[i]&0x3f)<4) {
3909 emit_readword((int)&reg_cop1_simple[(source[i]>>16)&0x1f],ARG2_REG);
3910 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG3_REG);
3911 }else{
3912 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3913 }
3914 switch(source[i]&0x3f)
3915 {
3916 case 0x00: emit_call((int)add_s);break;
3917 case 0x01: emit_call((int)sub_s);break;
3918 case 0x02: emit_call((int)mul_s);break;
3919 case 0x03: emit_call((int)div_s);break;
3920 case 0x04: emit_call((int)sqrt_s);break;
3921 case 0x05: emit_call((int)abs_s);break;
3922 case 0x06: emit_call((int)mov_s);break;
3923 case 0x07: emit_call((int)neg_s);break;
3924 }
3925 restore_regs(reglist);
3926 }
3927 if(opcode2[i]==0x11) { // Double precision
3928 save_regs(reglist);
3929 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3930 if((source[i]&0x3f)<4) {
3931 emit_readword((int)&reg_cop1_double[(source[i]>>16)&0x1f],ARG2_REG);
3932 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG3_REG);
3933 }else{
3934 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3935 }
3936 switch(source[i]&0x3f)
3937 {
3938 case 0x00: emit_call((int)add_d);break;
3939 case 0x01: emit_call((int)sub_d);break;
3940 case 0x02: emit_call((int)mul_d);break;
3941 case 0x03: emit_call((int)div_d);break;
3942 case 0x04: emit_call((int)sqrt_d);break;
3943 case 0x05: emit_call((int)abs_d);break;
3944 case 0x06: emit_call((int)mov_d);break;
3945 case 0x07: emit_call((int)neg_d);break;
3946 }
3947 restore_regs(reglist);
3948 }
3d624f89 3949#else
3950 cop1_unusable(i, i_regs);
3951#endif
57871462 3952}
3953
3954void multdiv_assemble_arm(int i,struct regstat *i_regs)
3955{
3956 // case 0x18: MULT
3957 // case 0x19: MULTU
3958 // case 0x1A: DIV
3959 // case 0x1B: DIVU
3960 // case 0x1C: DMULT
3961 // case 0x1D: DMULTU
3962 // case 0x1E: DDIV
3963 // case 0x1F: DDIVU
3964 if(rs1[i]&&rs2[i])
3965 {
3966 if((opcode2[i]&4)==0) // 32-bit
3967 {
3968 if(opcode2[i]==0x18) // MULT
3969 {
3970 signed char m1=get_reg(i_regs->regmap,rs1[i]);
3971 signed char m2=get_reg(i_regs->regmap,rs2[i]);
3972 signed char hi=get_reg(i_regs->regmap,HIREG);
3973 signed char lo=get_reg(i_regs->regmap,LOREG);
3974 assert(m1>=0);
3975 assert(m2>=0);
3976 assert(hi>=0);
3977 assert(lo>=0);
3978 emit_smull(m1,m2,hi,lo);
3979 }
3980 if(opcode2[i]==0x19) // MULTU
3981 {
3982 signed char m1=get_reg(i_regs->regmap,rs1[i]);
3983 signed char m2=get_reg(i_regs->regmap,rs2[i]);
3984 signed char hi=get_reg(i_regs->regmap,HIREG);
3985 signed char lo=get_reg(i_regs->regmap,LOREG);
3986 assert(m1>=0);
3987 assert(m2>=0);
3988 assert(hi>=0);
3989 assert(lo>=0);
3990 emit_umull(m1,m2,hi,lo);
3991 }
3992 if(opcode2[i]==0x1A) // DIV
3993 {
3994 signed char d1=get_reg(i_regs->regmap,rs1[i]);
3995 signed char d2=get_reg(i_regs->regmap,rs2[i]);
3996 assert(d1>=0);
3997 assert(d2>=0);
3998 signed char quotient=get_reg(i_regs->regmap,LOREG);
3999 signed char remainder=get_reg(i_regs->regmap,HIREG);
4000 assert(quotient>=0);
4001 assert(remainder>=0);
4002 emit_movs(d1,remainder);
4003 emit_negmi(remainder,remainder);
4004 emit_movs(d2,HOST_TEMPREG);
4005 emit_jeq((int)out+52); // Division by zero
4006 emit_negmi(HOST_TEMPREG,HOST_TEMPREG);
4007 emit_clz(HOST_TEMPREG,quotient);
4008 emit_shl(HOST_TEMPREG,quotient,HOST_TEMPREG);
4009 emit_orimm(quotient,1<<31,quotient);
4010 emit_shr(quotient,quotient,quotient);
4011 emit_cmp(remainder,HOST_TEMPREG);
4012 emit_subcs(remainder,HOST_TEMPREG,remainder);
4013 emit_adcs(quotient,quotient,quotient);
4014 emit_shrimm(HOST_TEMPREG,1,HOST_TEMPREG);
4015 emit_jcc((int)out-16); // -4
4016 emit_teq(d1,d2);
4017 emit_negmi(quotient,quotient);
4018 emit_test(d1,d1);
4019 emit_negmi(remainder,remainder);
4020 }
4021 if(opcode2[i]==0x1B) // DIVU
4022 {
4023 signed char d1=get_reg(i_regs->regmap,rs1[i]); // dividend
4024 signed char d2=get_reg(i_regs->regmap,rs2[i]); // divisor
4025 assert(d1>=0);
4026 assert(d2>=0);
4027 signed char quotient=get_reg(i_regs->regmap,LOREG);
4028 signed char remainder=get_reg(i_regs->regmap,HIREG);
4029 assert(quotient>=0);
4030 assert(remainder>=0);
4031 emit_test(d2,d2);
4032 emit_jeq((int)out+44); // Division by zero
4033 emit_clz(d2,HOST_TEMPREG);
4034 emit_movimm(1<<31,quotient);
4035 emit_shl(d2,HOST_TEMPREG,d2);
4036 emit_mov(d1,remainder);
4037 emit_shr(quotient,HOST_TEMPREG,quotient);
4038 emit_cmp(remainder,d2);
4039 emit_subcs(remainder,d2,remainder);
4040 emit_adcs(quotient,quotient,quotient);
4041 emit_shrcc_imm(d2,1,d2);
4042 emit_jcc((int)out-16); // -4
4043 }
4044 }
4045 else // 64-bit
4046 {
4047 if(opcode2[i]==0x1C) // DMULT
4048 {
4049 assert(opcode2[i]!=0x1C);
4050 signed char m1h=get_reg(i_regs->regmap,rs1[i]|64);
4051 signed char m1l=get_reg(i_regs->regmap,rs1[i]);
4052 signed char m2h=get_reg(i_regs->regmap,rs2[i]|64);
4053 signed char m2l=get_reg(i_regs->regmap,rs2[i]);
4054 assert(m1h>=0);
4055 assert(m2h>=0);
4056 assert(m1l>=0);
4057 assert(m2l>=0);
4058 emit_pushreg(m2h);
4059 emit_pushreg(m2l);
4060 emit_pushreg(m1h);
4061 emit_pushreg(m1l);
4062 emit_call((int)&mult64);
4063 emit_popreg(m1l);
4064 emit_popreg(m1h);
4065 emit_popreg(m2l);
4066 emit_popreg(m2h);
4067 signed char hih=get_reg(i_regs->regmap,HIREG|64);
4068 signed char hil=get_reg(i_regs->regmap,HIREG);
4069 if(hih>=0) emit_loadreg(HIREG|64,hih);
4070 if(hil>=0) emit_loadreg(HIREG,hil);
4071 signed char loh=get_reg(i_regs->regmap,LOREG|64);
4072 signed char lol=get_reg(i_regs->regmap,LOREG);
4073 if(loh>=0) emit_loadreg(LOREG|64,loh);
4074 if(lol>=0) emit_loadreg(LOREG,lol);
4075 }
4076 if(opcode2[i]==0x1D) // DMULTU
4077 {
4078 signed char m1h=get_reg(i_regs->regmap,rs1[i]|64);
4079 signed char m1l=get_reg(i_regs->regmap,rs1[i]);
4080 signed char m2h=get_reg(i_regs->regmap,rs2[i]|64);
4081 signed char m2l=get_reg(i_regs->regmap,rs2[i]);
4082 assert(m1h>=0);
4083 assert(m2h>=0);
4084 assert(m1l>=0);
4085 assert(m2l>=0);
4086 save_regs(0x100f);
4087 if(m1l!=0) emit_mov(m1l,0);
4088 if(m1h==0) emit_readword((int)&dynarec_local,1);
4089 else if(m1h>1) emit_mov(m1h,1);
4090 if(m2l<2) emit_readword((int)&dynarec_local+m2l*4,2);
4091 else if(m2l>2) emit_mov(m2l,2);
4092 if(m2h<3) emit_readword((int)&dynarec_local+m2h*4,3);
4093 else if(m2h>3) emit_mov(m2h,3);
4094 emit_call((int)&multu64);
4095 restore_regs(0x100f);
4096 signed char hih=get_reg(i_regs->regmap,HIREG|64);
4097 signed char hil=get_reg(i_regs->regmap,HIREG);
4098 signed char loh=get_reg(i_regs->regmap,LOREG|64);
4099 signed char lol=get_reg(i_regs->regmap,LOREG);
4100 /*signed char temp=get_reg(i_regs->regmap,-1);
4101 signed char rh=get_reg(i_regs->regmap,HIREG|64);
4102 signed char rl=get_reg(i_regs->regmap,HIREG);
4103 assert(m1h>=0);
4104 assert(m2h>=0);
4105 assert(m1l>=0);
4106 assert(m2l>=0);
4107 assert(temp>=0);
4108 //emit_mov(m1l,EAX);
4109 //emit_mul(m2l);
4110 emit_umull(rl,rh,m1l,m2l);
4111 emit_storereg(LOREG,rl);
4112 emit_mov(rh,temp);
4113 //emit_mov(m1h,EAX);
4114 //emit_mul(m2l);
4115 emit_umull(rl,rh,m1h,m2l);
4116 emit_adds(rl,temp,temp);
4117 emit_adcimm(rh,0,rh);
4118 emit_storereg(HIREG,rh);
4119 //emit_mov(m2h,EAX);
4120 //emit_mul(m1l);
4121 emit_umull(rl,rh,m1l,m2h);
4122 emit_adds(rl,temp,temp);
4123 emit_adcimm(rh,0,rh);
4124 emit_storereg(LOREG|64,temp);
4125 emit_mov(rh,temp);
4126 //emit_mov(m2h,EAX);
4127 //emit_mul(m1h);
4128 emit_umull(rl,rh,m1h,m2h);
4129 emit_adds(rl,temp,rl);
4130 emit_loadreg(HIREG,temp);
4131 emit_adcimm(rh,0,rh);
4132 emit_adds(rl,temp,rl);
4133 emit_adcimm(rh,0,rh);
4134 // DEBUG
4135 /*
4136 emit_pushreg(m2h);
4137 emit_pushreg(m2l);
4138 emit_pushreg(m1h);
4139 emit_pushreg(m1l);
4140 emit_call((int)&multu64);
4141 emit_popreg(m1l);
4142 emit_popreg(m1h);
4143 emit_popreg(m2l);
4144 emit_popreg(m2h);
4145 signed char hih=get_reg(i_regs->regmap,HIREG|64);
4146 signed char hil=get_reg(i_regs->regmap,HIREG);
4147 if(hih>=0) emit_loadreg(HIREG|64,hih); // DEBUG
4148 if(hil>=0) emit_loadreg(HIREG,hil); // DEBUG
4149 */
4150 // Shouldn't be necessary
4151 //char loh=get_reg(i_regs->regmap,LOREG|64);
4152 //char lol=get_reg(i_regs->regmap,LOREG);
4153 //if(loh>=0) emit_loadreg(LOREG|64,loh);
4154 //if(lol>=0) emit_loadreg(LOREG,lol);
4155 }
4156 if(opcode2[i]==0x1E) // DDIV
4157 {
4158 signed char d1h=get_reg(i_regs->regmap,rs1[i]|64);
4159 signed char d1l=get_reg(i_regs->regmap,rs1[i]);
4160 signed char d2h=get_reg(i_regs->regmap,rs2[i]|64);
4161 signed char d2l=get_reg(i_regs->regmap,rs2[i]);
4162 assert(d1h>=0);
4163 assert(d2h>=0);
4164 assert(d1l>=0);
4165 assert(d2l>=0);
4166 save_regs(0x100f);
4167 if(d1l!=0) emit_mov(d1l,0);
4168 if(d1h==0) emit_readword((int)&dynarec_local,1);
4169 else if(d1h>1) emit_mov(d1h,1);
4170 if(d2l<2) emit_readword((int)&dynarec_local+d2l*4,2);
4171 else if(d2l>2) emit_mov(d2l,2);
4172 if(d2h<3) emit_readword((int)&dynarec_local+d2h*4,3);
4173 else if(d2h>3) emit_mov(d2h,3);
4174 emit_call((int)&div64);
4175 restore_regs(0x100f);
4176 signed char hih=get_reg(i_regs->regmap,HIREG|64);
4177 signed char hil=get_reg(i_regs->regmap,HIREG);
4178 signed char loh=get_reg(i_regs->regmap,LOREG|64);
4179 signed char lol=get_reg(i_regs->regmap,LOREG);
4180 if(hih>=0) emit_loadreg(HIREG|64,hih);
4181 if(hil>=0) emit_loadreg(HIREG,hil);
4182 if(loh>=0) emit_loadreg(LOREG|64,loh);
4183 if(lol>=0) emit_loadreg(LOREG,lol);
4184 }
4185 if(opcode2[i]==0x1F) // DDIVU
4186 {
4187 //u_int hr,reglist=0;
4188 //for(hr=0;hr<HOST_REGS;hr++) {
4189 // if(i_regs->regmap[hr]>=0 && (i_regs->regmap[hr]&62)!=HIREG) reglist|=1<<hr;
4190 //}
4191 signed char d1h=get_reg(i_regs->regmap,rs1[i]|64);
4192 signed char d1l=get_reg(i_regs->regmap,rs1[i]);
4193 signed char d2h=get_reg(i_regs->regmap,rs2[i]|64);
4194 signed char d2l=get_reg(i_regs->regmap,rs2[i]);
4195 assert(d1h>=0);
4196 assert(d2h>=0);
4197 assert(d1l>=0);
4198 assert(d2l>=0);
4199 save_regs(0x100f);
4200 if(d1l!=0) emit_mov(d1l,0);
4201 if(d1h==0) emit_readword((int)&dynarec_local,1);
4202 else if(d1h>1) emit_mov(d1h,1);
4203 if(d2l<2) emit_readword((int)&dynarec_local+d2l*4,2);
4204 else if(d2l>2) emit_mov(d2l,2);
4205 if(d2h<3) emit_readword((int)&dynarec_local+d2h*4,3);
4206 else if(d2h>3) emit_mov(d2h,3);
4207 emit_call((int)&divu64);
4208 restore_regs(0x100f);
4209 signed char hih=get_reg(i_regs->regmap,HIREG|64);
4210 signed char hil=get_reg(i_regs->regmap,HIREG);
4211 signed char loh=get_reg(i_regs->regmap,LOREG|64);
4212 signed char lol=get_reg(i_regs->regmap,LOREG);
4213 if(hih>=0) emit_loadreg(HIREG|64,hih);
4214 if(hil>=0) emit_loadreg(HIREG,hil);
4215 if(loh>=0) emit_loadreg(LOREG|64,loh);
4216 if(lol>=0) emit_loadreg(LOREG,lol);
4217 }
4218 }
4219 }
4220 else
4221 {
4222 // Multiply by zero is zero.
4223 // MIPS does not have a divide by zero exception.
4224 // The result is undefined, we return zero.
4225 signed char hr=get_reg(i_regs->regmap,HIREG);
4226 signed char lr=get_reg(i_regs->regmap,LOREG);
4227 if(hr>=0) emit_zeroreg(hr);
4228 if(lr>=0) emit_zeroreg(lr);
4229 }
4230}
4231#define multdiv_assemble multdiv_assemble_arm
4232
4233void do_preload_rhash(int r) {
4234 // Don't need this for ARM. On x86, this puts the value 0xf8 into the
4235 // register. On ARM the hash can be done with a single instruction (below)
4236}
4237
4238void do_preload_rhtbl(int ht) {
4239 emit_addimm(FP,(int)&mini_ht-(int)&dynarec_local,ht);
4240}
4241
4242void do_rhash(int rs,int rh) {
4243 emit_andimm(rs,0xf8,rh);
4244}
4245
4246void do_miniht_load(int ht,int rh) {
4247 assem_debug("ldr %s,[%s,%s]!\n",regname[rh],regname[ht],regname[rh]);
4248 output_w32(0xe7b00000|rd_rn_rm(rh,ht,rh));
4249}
4250
4251void do_miniht_jump(int rs,int rh,int ht) {
4252 emit_cmp(rh,rs);
4253 emit_ldreq_indexed(ht,4,15);
4254 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
4255 emit_mov(rs,7);
4256 emit_jmp(jump_vaddr_reg[7]);
4257 #else
4258 emit_jmp(jump_vaddr_reg[rs]);
4259 #endif
4260}
4261
4262void do_miniht_insert(u_int return_address,int rt,int temp) {
4263 #ifdef ARMv5_ONLY
4264 emit_movimm(return_address,rt); // PC into link register
4265 add_to_linker((int)out,return_address,1);
4266 emit_pcreladdr(temp);
4267 emit_writeword(rt,(int)&mini_ht[(return_address&0xFF)>>3][0]);
4268 emit_writeword(temp,(int)&mini_ht[(return_address&0xFF)>>3][1]);
4269 #else
4270 emit_movw(return_address&0x0000FFFF,rt);
4271 add_to_linker((int)out,return_address,1);
4272 emit_pcreladdr(temp);
4273 emit_writeword(temp,(int)&mini_ht[(return_address&0xFF)>>3][1]);
4274 emit_movt(return_address&0xFFFF0000,rt);
4275 emit_writeword(rt,(int)&mini_ht[(return_address&0xFF)>>3][0]);
4276 #endif
4277}
4278
4279// Sign-extend to 64 bits and write out upper half of a register
4280// This is useful where we have a 32-bit value in a register, and want to
4281// keep it in a 32-bit register, but can't guarantee that it won't be read
4282// as a 64-bit value later.
4283void wb_sx(signed char pre[],signed char entry[],uint64_t dirty,uint64_t is32_pre,uint64_t is32,uint64_t u,uint64_t uu)
4284{
4285 if(is32_pre==is32) return;
4286 int hr,reg;
4287 for(hr=0;hr<HOST_REGS;hr++) {
4288 if(hr!=EXCLUDE_REG) {
4289 //if(pre[hr]==entry[hr]) {
4290 if((reg=pre[hr])>=0) {
4291 if((dirty>>hr)&1) {
4292 if( ((is32_pre&~is32&~uu)>>reg)&1 ) {
4293 emit_sarimm(hr,31,HOST_TEMPREG);
4294 emit_storereg(reg|64,HOST_TEMPREG);
4295 }
4296 }
4297 }
4298 //}
4299 }
4300 }
4301}
4302
4303void wb_valid(signed char pre[],signed char entry[],u_int dirty_pre,u_int dirty,uint64_t is32_pre,uint64_t u,uint64_t uu)
4304{
4305 //if(dirty_pre==dirty) return;
4306 int hr,reg,new_hr;
4307 for(hr=0;hr<HOST_REGS;hr++) {
4308 if(hr!=EXCLUDE_REG) {
4309 reg=pre[hr];
4310 if(((~u)>>(reg&63))&1) {
4311 if(reg==entry[hr]||(reg>0&&entry[hr]<0)) {
4312 if(((dirty_pre&~dirty)>>hr)&1) {
4313 if(reg>0&&reg<34) {
4314 emit_storereg(reg,hr);
4315 if( ((is32_pre&~uu)>>reg)&1 ) {
4316 emit_sarimm(hr,31,HOST_TEMPREG);
4317 emit_storereg(reg|64,HOST_TEMPREG);
4318 }
4319 }
4320 else if(reg>=64) {
4321 emit_storereg(reg,hr);
4322 }
4323 }
4324 }
4325 else // Check if register moved to a different register
4326 if((new_hr=get_reg(entry,reg))>=0) {
4327 if((dirty_pre>>hr)&(~dirty>>new_hr)&1) {
4328 if(reg>0&&reg<34) {
4329 emit_storereg(reg,hr);
4330 if( ((is32_pre&~uu)>>reg)&1 ) {
4331 emit_sarimm(hr,31,HOST_TEMPREG);
4332 emit_storereg(reg|64,HOST_TEMPREG);
4333 }
4334 }
4335 else if(reg>=64) {
4336 emit_storereg(reg,hr);
4337 }
4338 }
4339 }
4340 }
4341 }
4342 }
4343}
4344
4345
4346/* using strd could possibly help but you'd have to allocate registers in pairs
4347void wb_invalidate_arm(signed char pre[],signed char entry[],uint64_t dirty,uint64_t is32,uint64_t u,uint64_t uu)
4348{
4349 int hr;
4350 int wrote=-1;
4351 for(hr=HOST_REGS-1;hr>=0;hr--) {
4352 if(hr!=EXCLUDE_REG) {
4353 if(pre[hr]!=entry[hr]) {
4354 if(pre[hr]>=0) {
4355 if((dirty>>hr)&1) {
4356 if(get_reg(entry,pre[hr])<0) {
4357 if(pre[hr]<64) {
4358 if(!((u>>pre[hr])&1)) {
4359 if(hr<10&&(~hr&1)&&(pre[hr+1]<0||wrote==hr+1)) {
4360 if( ((is32>>pre[hr])&1) && !((uu>>pre[hr])&1) ) {
4361 emit_sarimm(hr,31,hr+1);
4362 emit_strdreg(pre[hr],hr);
4363 }
4364 else
4365 emit_storereg(pre[hr],hr);
4366 }else{
4367 emit_storereg(pre[hr],hr);
4368 if( ((is32>>pre[hr])&1) && !((uu>>pre[hr])&1) ) {
4369 emit_sarimm(hr,31,hr);
4370 emit_storereg(pre[hr]|64,hr);
4371 }
4372 }
4373 }
4374 }else{
4375 if(!((uu>>(pre[hr]&63))&1) && !((is32>>(pre[hr]&63))&1)) {
4376 emit_storereg(pre[hr],hr);
4377 }
4378 }
4379 wrote=hr;
4380 }
4381 }
4382 }
4383 }
4384 }
4385 }
4386 for(hr=0;hr<HOST_REGS;hr++) {
4387 if(hr!=EXCLUDE_REG) {
4388 if(pre[hr]!=entry[hr]) {
4389 if(pre[hr]>=0) {
4390 int nr;
4391 if((nr=get_reg(entry,pre[hr]))>=0) {
4392 emit_mov(hr,nr);
4393 }
4394 }
4395 }
4396 }
4397 }
4398}
4399#define wb_invalidate wb_invalidate_arm
4400*/
4401
4402// CPU-architecture-specific initialization
4403void arch_init() {
3d624f89 4404#ifndef DISABLE_COP1
57871462 4405 rounding_modes[0]=0x0<<22; // round
4406 rounding_modes[1]=0x3<<22; // trunc
4407 rounding_modes[2]=0x1<<22; // ceil
4408 rounding_modes[3]=0x2<<22; // floor
3d624f89 4409#endif
57871462 4410}