drc: further hacks, hle handling
[pcsx_rearmed.git] / libpcsxcore / new_dynarec / assem_arm.c
CommitLineData
57871462 1/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2 * Mupen64plus - assem_arm.c *
3 * Copyright (C) 2009-2010 Ari64 *
4 * *
5 * This program is free software; you can redistribute it and/or modify *
6 * it under the terms of the GNU General Public License as published by *
7 * the Free Software Foundation; either version 2 of the License, or *
8 * (at your option) any later version. *
9 * *
10 * This program is distributed in the hope that it will be useful, *
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
13 * GNU General Public License for more details. *
14 * *
15 * You should have received a copy of the GNU General Public License *
16 * along with this program; if not, write to the *
17 * Free Software Foundation, Inc., *
18 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
19 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
20
21extern int cycle_count;
22extern int last_count;
23extern int pcaddr;
24extern int pending_exception;
25extern int branch_target;
26extern uint64_t readmem_dword;
3d624f89 27#ifdef MUPEN64
57871462 28extern precomp_instr fake_pc;
3d624f89 29#endif
57871462 30extern void *dynarec_local;
31extern u_int memory_map[1048576];
32extern u_int mini_ht[32][2];
33extern u_int rounding_modes[4];
34
35void indirect_jump_indexed();
36void indirect_jump();
37void do_interrupt();
38void jump_vaddr_r0();
39void jump_vaddr_r1();
40void jump_vaddr_r2();
41void jump_vaddr_r3();
42void jump_vaddr_r4();
43void jump_vaddr_r5();
44void jump_vaddr_r6();
45void jump_vaddr_r7();
46void jump_vaddr_r8();
47void jump_vaddr_r9();
48void jump_vaddr_r10();
49void jump_vaddr_r12();
50
51const u_int jump_vaddr_reg[16] = {
52 (int)jump_vaddr_r0,
53 (int)jump_vaddr_r1,
54 (int)jump_vaddr_r2,
55 (int)jump_vaddr_r3,
56 (int)jump_vaddr_r4,
57 (int)jump_vaddr_r5,
58 (int)jump_vaddr_r6,
59 (int)jump_vaddr_r7,
60 (int)jump_vaddr_r8,
61 (int)jump_vaddr_r9,
62 (int)jump_vaddr_r10,
63 0,
64 (int)jump_vaddr_r12,
65 0,
66 0,
67 0};
68
69#include "fpu.h"
70
71/* Linker */
72
73void set_jump_target(int addr,u_int target)
74{
75 u_char *ptr=(u_char *)addr;
76 u_int *ptr2=(u_int *)ptr;
77 if(ptr[3]==0xe2) {
78 assert((target-(u_int)ptr2-8)<1024);
79 assert((addr&3)==0);
80 assert((target&3)==0);
81 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
82 //printf("target=%x addr=%x insn=%x\n",target,addr,*ptr2);
83 }
84 else if(ptr[3]==0x72) {
85 // generated by emit_jno_unlikely
86 if((target-(u_int)ptr2-8)<1024) {
87 assert((addr&3)==0);
88 assert((target&3)==0);
89 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
90 }
91 else if((target-(u_int)ptr2-8)<4096&&!((target-(u_int)ptr2-8)&15)) {
92 assert((addr&3)==0);
93 assert((target&3)==0);
94 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>4)|0xE00;
95 }
96 else *ptr2=(0x7A000000)|(((target-(u_int)ptr2-8)<<6)>>8);
97 }
98 else {
99 assert((ptr[3]&0x0e)==0xa);
100 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
101 }
102}
103
104// This optionally copies the instruction from the target of the branch into
105// the space before the branch. Works, but the difference in speed is
106// usually insignificant.
107void set_jump_target_fillslot(int addr,u_int target,int copy)
108{
109 u_char *ptr=(u_char *)addr;
110 u_int *ptr2=(u_int *)ptr;
111 assert(!copy||ptr2[-1]==0xe28dd000);
112 if(ptr[3]==0xe2) {
113 assert(!copy);
114 assert((target-(u_int)ptr2-8)<4096);
115 *ptr2=(*ptr2&0xFFFFF000)|(target-(u_int)ptr2-8);
116 }
117 else {
118 assert((ptr[3]&0x0e)==0xa);
119 u_int target_insn=*(u_int *)target;
120 if((target_insn&0x0e100000)==0) { // ALU, no immediate, no flags
121 copy=0;
122 }
123 if((target_insn&0x0c100000)==0x04100000) { // Load
124 copy=0;
125 }
126 if(target_insn&0x08000000) {
127 copy=0;
128 }
129 if(copy) {
130 ptr2[-1]=target_insn;
131 target+=4;
132 }
133 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
134 }
135}
136
137/* Literal pool */
138add_literal(int addr,int val)
139{
140 literals[literalcount][0]=addr;
141 literals[literalcount][1]=val;
142 literalcount++;
143}
144
145void kill_pointer(void *stub)
146{
147 int *ptr=(int *)(stub+4);
148 assert((*ptr&0x0ff00000)==0x05900000);
149 u_int offset=*ptr&0xfff;
150 int **l_ptr=(void *)ptr+offset+8;
151 int *i_ptr=*l_ptr;
152 set_jump_target((int)i_ptr,(int)stub);
153}
154
155int get_pointer(void *stub)
156{
157 //printf("get_pointer(%x)\n",(int)stub);
158 int *ptr=(int *)(stub+4);
159 assert((*ptr&0x0ff00000)==0x05900000);
160 u_int offset=*ptr&0xfff;
161 int **l_ptr=(void *)ptr+offset+8;
162 int *i_ptr=*l_ptr;
163 assert((*i_ptr&0x0f000000)==0x0a000000);
164 return (int)i_ptr+((*i_ptr<<8)>>6)+8;
165}
166
167// Find the "clean" entry point from a "dirty" entry point
168// by skipping past the call to verify_code
169u_int get_clean_addr(int addr)
170{
171 int *ptr=(int *)addr;
172 #ifdef ARMv5_ONLY
173 ptr+=4;
174 #else
175 ptr+=6;
176 #endif
177 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
178 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
179 ptr++;
180 if((*ptr&0xFF000000)==0xea000000) {
181 return (int)ptr+((*ptr<<8)>>6)+8; // follow jump
182 }
183 return (u_int)ptr;
184}
185
186int verify_dirty(int addr)
187{
188 u_int *ptr=(u_int *)addr;
189 #ifdef ARMv5_ONLY
190 // get from literal pool
191 assert((*ptr&0xFFF00000)==0xe5900000);
192 u_int offset=*ptr&0xfff;
193 u_int *l_ptr=(void *)ptr+offset+8;
194 u_int source=l_ptr[0];
195 u_int copy=l_ptr[1];
196 u_int len=l_ptr[2];
197 ptr+=4;
198 #else
199 // ARMv7 movw/movt
200 assert((*ptr&0xFFF00000)==0xe3000000);
201 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
202 u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
203 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
204 ptr+=6;
205 #endif
206 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
207 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
cfcba99a 208 u_int verifier=(int)ptr+((signed int)(*ptr<<8)>>6)+8; // get target of bl
57871462 209 if(verifier==(u_int)verify_code_vm||verifier==(u_int)verify_code_ds) {
210 unsigned int page=source>>12;
211 unsigned int map_value=memory_map[page];
212 if(map_value>=0x80000000) return 0;
213 while(page<((source+len-1)>>12)) {
214 if((memory_map[++page]<<2)!=(map_value<<2)) return 0;
215 }
216 source = source+(map_value<<2);
217 }
218 //printf("verify_dirty: %x %x %x\n",source,copy,len);
219 return !memcmp((void *)source,(void *)copy,len);
220}
221
222// This doesn't necessarily find all clean entry points, just
223// guarantees that it's not dirty
224int isclean(int addr)
225{
226 #ifdef ARMv5_ONLY
227 int *ptr=((u_int *)addr)+4;
228 #else
229 int *ptr=((u_int *)addr)+6;
230 #endif
231 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
232 if((*ptr&0xFF000000)!=0xeb000000) return 1; // bl instruction
233 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code) return 0;
234 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_vm) return 0;
235 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_ds) return 0;
236 return 1;
237}
238
239void get_bounds(int addr,u_int *start,u_int *end)
240{
241 u_int *ptr=(u_int *)addr;
242 #ifdef ARMv5_ONLY
243 // get from literal pool
244 assert((*ptr&0xFFF00000)==0xe5900000);
245 u_int offset=*ptr&0xfff;
246 u_int *l_ptr=(void *)ptr+offset+8;
247 u_int source=l_ptr[0];
248 //u_int copy=l_ptr[1];
249 u_int len=l_ptr[2];
250 ptr+=4;
251 #else
252 // ARMv7 movw/movt
253 assert((*ptr&0xFFF00000)==0xe3000000);
254 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
255 //u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
256 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
257 ptr+=6;
258 #endif
259 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
260 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
cfcba99a 261 u_int verifier=(int)ptr+((signed int)(*ptr<<8)>>6)+8; // get target of bl
57871462 262 if(verifier==(u_int)verify_code_vm||verifier==(u_int)verify_code_ds) {
263 if(memory_map[source>>12]>=0x80000000) source = 0;
264 else source = source+(memory_map[source>>12]<<2);
265 }
266 *start=source;
267 *end=source+len;
268}
269
270/* Register allocation */
271
272// Note: registers are allocated clean (unmodified state)
273// if you intend to modify the register, you must call dirty_reg().
274void alloc_reg(struct regstat *cur,int i,signed char reg)
275{
276 int r,hr;
277 int preferred_reg = (reg&7);
278 if(reg==CCREG) preferred_reg=HOST_CCREG;
279 if(reg==PTEMP||reg==FTEMP) preferred_reg=12;
280
281 // Don't allocate unused registers
282 if((cur->u>>reg)&1) return;
283
284 // see if it's already allocated
285 for(hr=0;hr<HOST_REGS;hr++)
286 {
287 if(cur->regmap[hr]==reg) return;
288 }
289
290 // Keep the same mapping if the register was already allocated in a loop
291 preferred_reg = loop_reg(i,reg,preferred_reg);
292
293 // Try to allocate the preferred register
294 if(cur->regmap[preferred_reg]==-1) {
295 cur->regmap[preferred_reg]=reg;
296 cur->dirty&=~(1<<preferred_reg);
297 cur->isconst&=~(1<<preferred_reg);
298 return;
299 }
300 r=cur->regmap[preferred_reg];
301 if(r<64&&((cur->u>>r)&1)) {
302 cur->regmap[preferred_reg]=reg;
303 cur->dirty&=~(1<<preferred_reg);
304 cur->isconst&=~(1<<preferred_reg);
305 return;
306 }
307 if(r>=64&&((cur->uu>>(r&63))&1)) {
308 cur->regmap[preferred_reg]=reg;
309 cur->dirty&=~(1<<preferred_reg);
310 cur->isconst&=~(1<<preferred_reg);
311 return;
312 }
313
314 // Clear any unneeded registers
315 // We try to keep the mapping consistent, if possible, because it
316 // makes branches easier (especially loops). So we try to allocate
317 // first (see above) before removing old mappings. If this is not
318 // possible then go ahead and clear out the registers that are no
319 // longer needed.
320 for(hr=0;hr<HOST_REGS;hr++)
321 {
322 r=cur->regmap[hr];
323 if(r>=0) {
324 if(r<64) {
325 if((cur->u>>r)&1) {cur->regmap[hr]=-1;break;}
326 }
327 else
328 {
329 if((cur->uu>>(r&63))&1) {cur->regmap[hr]=-1;break;}
330 }
331 }
332 }
333 // Try to allocate any available register, but prefer
334 // registers that have not been used recently.
335 if(i>0) {
336 for(hr=0;hr<HOST_REGS;hr++) {
337 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
338 if(regs[i-1].regmap[hr]!=rs1[i-1]&&regs[i-1].regmap[hr]!=rs2[i-1]&&regs[i-1].regmap[hr]!=rt1[i-1]&&regs[i-1].regmap[hr]!=rt2[i-1]) {
339 cur->regmap[hr]=reg;
340 cur->dirty&=~(1<<hr);
341 cur->isconst&=~(1<<hr);
342 return;
343 }
344 }
345 }
346 }
347 // Try to allocate any available register
348 for(hr=0;hr<HOST_REGS;hr++) {
349 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
350 cur->regmap[hr]=reg;
351 cur->dirty&=~(1<<hr);
352 cur->isconst&=~(1<<hr);
353 return;
354 }
355 }
356
357 // Ok, now we have to evict someone
358 // Pick a register we hopefully won't need soon
359 u_char hsn[MAXREG+1];
360 memset(hsn,10,sizeof(hsn));
361 int j;
362 lsn(hsn,i,&preferred_reg);
363 //printf("eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",cur->regmap[0],cur->regmap[1],cur->regmap[2],cur->regmap[3],cur->regmap[5],cur->regmap[6],cur->regmap[7]);
364 //printf("hsn(%x): %d %d %d %d %d %d %d\n",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
365 if(i>0) {
366 // Don't evict the cycle count at entry points, otherwise the entry
367 // stub will have to write it.
368 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
369 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
370 for(j=10;j>=3;j--)
371 {
372 // Alloc preferred register if available
373 if(hsn[r=cur->regmap[preferred_reg]&63]==j) {
374 for(hr=0;hr<HOST_REGS;hr++) {
375 // Evict both parts of a 64-bit register
376 if((cur->regmap[hr]&63)==r) {
377 cur->regmap[hr]=-1;
378 cur->dirty&=~(1<<hr);
379 cur->isconst&=~(1<<hr);
380 }
381 }
382 cur->regmap[preferred_reg]=reg;
383 return;
384 }
385 for(r=1;r<=MAXREG;r++)
386 {
387 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
388 for(hr=0;hr<HOST_REGS;hr++) {
389 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
390 if(cur->regmap[hr]==r+64) {
391 cur->regmap[hr]=reg;
392 cur->dirty&=~(1<<hr);
393 cur->isconst&=~(1<<hr);
394 return;
395 }
396 }
397 }
398 for(hr=0;hr<HOST_REGS;hr++) {
399 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
400 if(cur->regmap[hr]==r) {
401 cur->regmap[hr]=reg;
402 cur->dirty&=~(1<<hr);
403 cur->isconst&=~(1<<hr);
404 return;
405 }
406 }
407 }
408 }
409 }
410 }
411 }
412 for(j=10;j>=0;j--)
413 {
414 for(r=1;r<=MAXREG;r++)
415 {
416 if(hsn[r]==j) {
417 for(hr=0;hr<HOST_REGS;hr++) {
418 if(cur->regmap[hr]==r+64) {
419 cur->regmap[hr]=reg;
420 cur->dirty&=~(1<<hr);
421 cur->isconst&=~(1<<hr);
422 return;
423 }
424 }
425 for(hr=0;hr<HOST_REGS;hr++) {
426 if(cur->regmap[hr]==r) {
427 cur->regmap[hr]=reg;
428 cur->dirty&=~(1<<hr);
429 cur->isconst&=~(1<<hr);
430 return;
431 }
432 }
433 }
434 }
435 }
436 printf("This shouldn't happen (alloc_reg)");exit(1);
437}
438
439void alloc_reg64(struct regstat *cur,int i,signed char reg)
440{
441 int preferred_reg = 8+(reg&1);
442 int r,hr;
443
444 // allocate the lower 32 bits
445 alloc_reg(cur,i,reg);
446
447 // Don't allocate unused registers
448 if((cur->uu>>reg)&1) return;
449
450 // see if the upper half is already allocated
451 for(hr=0;hr<HOST_REGS;hr++)
452 {
453 if(cur->regmap[hr]==reg+64) return;
454 }
455
456 // Keep the same mapping if the register was already allocated in a loop
457 preferred_reg = loop_reg(i,reg,preferred_reg);
458
459 // Try to allocate the preferred register
460 if(cur->regmap[preferred_reg]==-1) {
461 cur->regmap[preferred_reg]=reg|64;
462 cur->dirty&=~(1<<preferred_reg);
463 cur->isconst&=~(1<<preferred_reg);
464 return;
465 }
466 r=cur->regmap[preferred_reg];
467 if(r<64&&((cur->u>>r)&1)) {
468 cur->regmap[preferred_reg]=reg|64;
469 cur->dirty&=~(1<<preferred_reg);
470 cur->isconst&=~(1<<preferred_reg);
471 return;
472 }
473 if(r>=64&&((cur->uu>>(r&63))&1)) {
474 cur->regmap[preferred_reg]=reg|64;
475 cur->dirty&=~(1<<preferred_reg);
476 cur->isconst&=~(1<<preferred_reg);
477 return;
478 }
479
480 // Clear any unneeded registers
481 // We try to keep the mapping consistent, if possible, because it
482 // makes branches easier (especially loops). So we try to allocate
483 // first (see above) before removing old mappings. If this is not
484 // possible then go ahead and clear out the registers that are no
485 // longer needed.
486 for(hr=HOST_REGS-1;hr>=0;hr--)
487 {
488 r=cur->regmap[hr];
489 if(r>=0) {
490 if(r<64) {
491 if((cur->u>>r)&1) {cur->regmap[hr]=-1;break;}
492 }
493 else
494 {
495 if((cur->uu>>(r&63))&1) {cur->regmap[hr]=-1;break;}
496 }
497 }
498 }
499 // Try to allocate any available register, but prefer
500 // registers that have not been used recently.
501 if(i>0) {
502 for(hr=0;hr<HOST_REGS;hr++) {
503 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
504 if(regs[i-1].regmap[hr]!=rs1[i-1]&&regs[i-1].regmap[hr]!=rs2[i-1]&&regs[i-1].regmap[hr]!=rt1[i-1]&&regs[i-1].regmap[hr]!=rt2[i-1]) {
505 cur->regmap[hr]=reg|64;
506 cur->dirty&=~(1<<hr);
507 cur->isconst&=~(1<<hr);
508 return;
509 }
510 }
511 }
512 }
513 // Try to allocate any available register
514 for(hr=0;hr<HOST_REGS;hr++) {
515 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
516 cur->regmap[hr]=reg|64;
517 cur->dirty&=~(1<<hr);
518 cur->isconst&=~(1<<hr);
519 return;
520 }
521 }
522
523 // Ok, now we have to evict someone
524 // Pick a register we hopefully won't need soon
525 u_char hsn[MAXREG+1];
526 memset(hsn,10,sizeof(hsn));
527 int j;
528 lsn(hsn,i,&preferred_reg);
529 //printf("eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",cur->regmap[0],cur->regmap[1],cur->regmap[2],cur->regmap[3],cur->regmap[5],cur->regmap[6],cur->regmap[7]);
530 //printf("hsn(%x): %d %d %d %d %d %d %d\n",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
531 if(i>0) {
532 // Don't evict the cycle count at entry points, otherwise the entry
533 // stub will have to write it.
534 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
535 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
536 for(j=10;j>=3;j--)
537 {
538 // Alloc preferred register if available
539 if(hsn[r=cur->regmap[preferred_reg]&63]==j) {
540 for(hr=0;hr<HOST_REGS;hr++) {
541 // Evict both parts of a 64-bit register
542 if((cur->regmap[hr]&63)==r) {
543 cur->regmap[hr]=-1;
544 cur->dirty&=~(1<<hr);
545 cur->isconst&=~(1<<hr);
546 }
547 }
548 cur->regmap[preferred_reg]=reg|64;
549 return;
550 }
551 for(r=1;r<=MAXREG;r++)
552 {
553 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
554 for(hr=0;hr<HOST_REGS;hr++) {
555 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
556 if(cur->regmap[hr]==r+64) {
557 cur->regmap[hr]=reg|64;
558 cur->dirty&=~(1<<hr);
559 cur->isconst&=~(1<<hr);
560 return;
561 }
562 }
563 }
564 for(hr=0;hr<HOST_REGS;hr++) {
565 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
566 if(cur->regmap[hr]==r) {
567 cur->regmap[hr]=reg|64;
568 cur->dirty&=~(1<<hr);
569 cur->isconst&=~(1<<hr);
570 return;
571 }
572 }
573 }
574 }
575 }
576 }
577 }
578 for(j=10;j>=0;j--)
579 {
580 for(r=1;r<=MAXREG;r++)
581 {
582 if(hsn[r]==j) {
583 for(hr=0;hr<HOST_REGS;hr++) {
584 if(cur->regmap[hr]==r+64) {
585 cur->regmap[hr]=reg|64;
586 cur->dirty&=~(1<<hr);
587 cur->isconst&=~(1<<hr);
588 return;
589 }
590 }
591 for(hr=0;hr<HOST_REGS;hr++) {
592 if(cur->regmap[hr]==r) {
593 cur->regmap[hr]=reg|64;
594 cur->dirty&=~(1<<hr);
595 cur->isconst&=~(1<<hr);
596 return;
597 }
598 }
599 }
600 }
601 }
602 printf("This shouldn't happen");exit(1);
603}
604
605// Allocate a temporary register. This is done without regard to
606// dirty status or whether the register we request is on the unneeded list
607// Note: This will only allocate one register, even if called multiple times
608void alloc_reg_temp(struct regstat *cur,int i,signed char reg)
609{
610 int r,hr;
611 int preferred_reg = -1;
612
613 // see if it's already allocated
614 for(hr=0;hr<HOST_REGS;hr++)
615 {
616 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==reg) return;
617 }
618
619 // Try to allocate any available register
620 for(hr=HOST_REGS-1;hr>=0;hr--) {
621 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
622 cur->regmap[hr]=reg;
623 cur->dirty&=~(1<<hr);
624 cur->isconst&=~(1<<hr);
625 return;
626 }
627 }
628
629 // Find an unneeded register
630 for(hr=HOST_REGS-1;hr>=0;hr--)
631 {
632 r=cur->regmap[hr];
633 if(r>=0) {
634 if(r<64) {
635 if((cur->u>>r)&1) {
636 if(i==0||((unneeded_reg[i-1]>>r)&1)) {
637 cur->regmap[hr]=reg;
638 cur->dirty&=~(1<<hr);
639 cur->isconst&=~(1<<hr);
640 return;
641 }
642 }
643 }
644 else
645 {
646 if((cur->uu>>(r&63))&1) {
647 if(i==0||((unneeded_reg_upper[i-1]>>(r&63))&1)) {
648 cur->regmap[hr]=reg;
649 cur->dirty&=~(1<<hr);
650 cur->isconst&=~(1<<hr);
651 return;
652 }
653 }
654 }
655 }
656 }
657
658 // Ok, now we have to evict someone
659 // Pick a register we hopefully won't need soon
660 // TODO: we might want to follow unconditional jumps here
661 // TODO: get rid of dupe code and make this into a function
662 u_char hsn[MAXREG+1];
663 memset(hsn,10,sizeof(hsn));
664 int j;
665 lsn(hsn,i,&preferred_reg);
666 //printf("hsn: %d %d %d %d %d %d %d\n",hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
667 if(i>0) {
668 // Don't evict the cycle count at entry points, otherwise the entry
669 // stub will have to write it.
670 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
671 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
672 for(j=10;j>=3;j--)
673 {
674 for(r=1;r<=MAXREG;r++)
675 {
676 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
677 for(hr=0;hr<HOST_REGS;hr++) {
678 if(hr!=HOST_CCREG||hsn[CCREG]>2) {
679 if(cur->regmap[hr]==r+64) {
680 cur->regmap[hr]=reg;
681 cur->dirty&=~(1<<hr);
682 cur->isconst&=~(1<<hr);
683 return;
684 }
685 }
686 }
687 for(hr=0;hr<HOST_REGS;hr++) {
688 if(hr!=HOST_CCREG||hsn[CCREG]>2) {
689 if(cur->regmap[hr]==r) {
690 cur->regmap[hr]=reg;
691 cur->dirty&=~(1<<hr);
692 cur->isconst&=~(1<<hr);
693 return;
694 }
695 }
696 }
697 }
698 }
699 }
700 }
701 for(j=10;j>=0;j--)
702 {
703 for(r=1;r<=MAXREG;r++)
704 {
705 if(hsn[r]==j) {
706 for(hr=0;hr<HOST_REGS;hr++) {
707 if(cur->regmap[hr]==r+64) {
708 cur->regmap[hr]=reg;
709 cur->dirty&=~(1<<hr);
710 cur->isconst&=~(1<<hr);
711 return;
712 }
713 }
714 for(hr=0;hr<HOST_REGS;hr++) {
715 if(cur->regmap[hr]==r) {
716 cur->regmap[hr]=reg;
717 cur->dirty&=~(1<<hr);
718 cur->isconst&=~(1<<hr);
719 return;
720 }
721 }
722 }
723 }
724 }
725 printf("This shouldn't happen");exit(1);
726}
727// Allocate a specific ARM register.
728void alloc_arm_reg(struct regstat *cur,int i,signed char reg,char hr)
729{
730 int n;
731
732 // see if it's already allocated (and dealloc it)
733 for(n=0;n<HOST_REGS;n++)
734 {
735 if(n!=EXCLUDE_REG&&cur->regmap[n]==reg) {cur->regmap[n]=-1;}
736 }
737
738 cur->regmap[hr]=reg;
739 cur->dirty&=~(1<<hr);
740 cur->isconst&=~(1<<hr);
741}
742
743// Alloc cycle count into dedicated register
744alloc_cc(struct regstat *cur,int i)
745{
746 alloc_arm_reg(cur,i,CCREG,HOST_CCREG);
747}
748
749/* Special alloc */
750
751
752/* Assembler */
753
754char regname[16][4] = {
755 "r0",
756 "r1",
757 "r2",
758 "r3",
759 "r4",
760 "r5",
761 "r6",
762 "r7",
763 "r8",
764 "r9",
765 "r10",
766 "fp",
767 "r12",
768 "sp",
769 "lr",
770 "pc"};
771
772void output_byte(u_char byte)
773{
774 *(out++)=byte;
775}
776void output_modrm(u_char mod,u_char rm,u_char ext)
777{
778 assert(mod<4);
779 assert(rm<8);
780 assert(ext<8);
781 u_char byte=(mod<<6)|(ext<<3)|rm;
782 *(out++)=byte;
783}
784void output_sib(u_char scale,u_char index,u_char base)
785{
786 assert(scale<4);
787 assert(index<8);
788 assert(base<8);
789 u_char byte=(scale<<6)|(index<<3)|base;
790 *(out++)=byte;
791}
792void output_w32(u_int word)
793{
794 *((u_int *)out)=word;
795 out+=4;
796}
797u_int rd_rn_rm(u_int rd, u_int rn, u_int rm)
798{
799 assert(rd<16);
800 assert(rn<16);
801 assert(rm<16);
802 return((rn<<16)|(rd<<12)|rm);
803}
804u_int rd_rn_imm_shift(u_int rd, u_int rn, u_int imm, u_int shift)
805{
806 assert(rd<16);
807 assert(rn<16);
808 assert(imm<256);
809 assert((shift&1)==0);
810 return((rn<<16)|(rd<<12)|(((32-shift)&30)<<7)|imm);
811}
812u_int genimm(u_int imm,u_int *encoded)
813{
814 if(imm==0) {*encoded=0;return 1;}
815 int i=32;
816 while(i>0)
817 {
818 if(imm<256) {
819 *encoded=((i&30)<<7)|imm;
820 return 1;
821 }
822 imm=(imm>>2)|(imm<<30);i-=2;
823 }
824 return 0;
825}
826u_int genjmp(u_int addr)
827{
828 int offset=addr-(int)out-8;
e80343e2 829 if(offset<-33554432||offset>=33554432) {
830 if (addr>2) {
831 printf("genjmp: out of range: %08x\n", offset);
832 exit(1);
833 }
834 return 0;
835 }
57871462 836 return ((u_int)offset>>2)&0xffffff;
837}
838
839void emit_mov(int rs,int rt)
840{
841 assem_debug("mov %s,%s\n",regname[rt],regname[rs]);
842 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs));
843}
844
845void emit_movs(int rs,int rt)
846{
847 assem_debug("movs %s,%s\n",regname[rt],regname[rs]);
848 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs));
849}
850
851void emit_add(int rs1,int rs2,int rt)
852{
853 assem_debug("add %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
854 output_w32(0xe0800000|rd_rn_rm(rt,rs1,rs2));
855}
856
857void emit_adds(int rs1,int rs2,int rt)
858{
859 assem_debug("adds %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
860 output_w32(0xe0900000|rd_rn_rm(rt,rs1,rs2));
861}
862
863void emit_adcs(int rs1,int rs2,int rt)
864{
865 assem_debug("adcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
866 output_w32(0xe0b00000|rd_rn_rm(rt,rs1,rs2));
867}
868
869void emit_sbc(int rs1,int rs2,int rt)
870{
871 assem_debug("sbc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
872 output_w32(0xe0c00000|rd_rn_rm(rt,rs1,rs2));
873}
874
875void emit_sbcs(int rs1,int rs2,int rt)
876{
877 assem_debug("sbcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
878 output_w32(0xe0d00000|rd_rn_rm(rt,rs1,rs2));
879}
880
881void emit_neg(int rs, int rt)
882{
883 assem_debug("rsb %s,%s,#0\n",regname[rt],regname[rs]);
884 output_w32(0xe2600000|rd_rn_rm(rt,rs,0));
885}
886
887void emit_negs(int rs, int rt)
888{
889 assem_debug("rsbs %s,%s,#0\n",regname[rt],regname[rs]);
890 output_w32(0xe2700000|rd_rn_rm(rt,rs,0));
891}
892
893void emit_sub(int rs1,int rs2,int rt)
894{
895 assem_debug("sub %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
896 output_w32(0xe0400000|rd_rn_rm(rt,rs1,rs2));
897}
898
899void emit_subs(int rs1,int rs2,int rt)
900{
901 assem_debug("subs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
902 output_w32(0xe0500000|rd_rn_rm(rt,rs1,rs2));
903}
904
905void emit_zeroreg(int rt)
906{
907 assem_debug("mov %s,#0\n",regname[rt]);
908 output_w32(0xe3a00000|rd_rn_rm(rt,0,0));
909}
910
911void emit_loadreg(int r, int hr)
912{
3d624f89 913#ifdef FORCE32
914 if(r&64) {
915 printf("64bit load in 32bit mode!\n");
916 exit(1);
917 }
918#endif
57871462 919 if((r&63)==0)
920 emit_zeroreg(hr);
921 else {
3d624f89 922 int addr=((int)reg)+((r&63)<<REG_SHIFT)+((r&64)>>4);
57871462 923 if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4);
924 if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4);
925 if(r==CCREG) addr=(int)&cycle_count;
926 if(r==CSREG) addr=(int)&Status;
927 if(r==FSREG) addr=(int)&FCR31;
928 if(r==INVCP) addr=(int)&invc_ptr;
929 u_int offset = addr-(u_int)&dynarec_local;
930 assert(offset<4096);
931 assem_debug("ldr %s,fp+%d\n",regname[hr],offset);
932 output_w32(0xe5900000|rd_rn_rm(hr,FP,0)|offset);
933 }
934}
935void emit_storereg(int r, int hr)
936{
3d624f89 937#ifdef FORCE32
938 if(r&64) {
939 printf("64bit store in 32bit mode!\n");
940 exit(1);
941 }
942#endif
943 int addr=((int)reg)+((r&63)<<REG_SHIFT)+((r&64)>>4);
57871462 944 if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4);
945 if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4);
946 if(r==CCREG) addr=(int)&cycle_count;
947 if(r==FSREG) addr=(int)&FCR31;
948 u_int offset = addr-(u_int)&dynarec_local;
949 assert(offset<4096);
950 assem_debug("str %s,fp+%d\n",regname[hr],offset);
951 output_w32(0xe5800000|rd_rn_rm(hr,FP,0)|offset);
952}
953
954void emit_test(int rs, int rt)
955{
956 assem_debug("tst %s,%s\n",regname[rs],regname[rt]);
957 output_w32(0xe1100000|rd_rn_rm(0,rs,rt));
958}
959
960void emit_testimm(int rs,int imm)
961{
962 u_int armval;
963 assem_debug("tst %s,$%d\n",regname[rs],imm);
964 assert(genimm(imm,&armval));
965 output_w32(0xe3100000|rd_rn_rm(0,rs,0)|armval);
966}
967
968void emit_not(int rs,int rt)
969{
970 assem_debug("mvn %s,%s\n",regname[rt],regname[rs]);
971 output_w32(0xe1e00000|rd_rn_rm(rt,0,rs));
972}
973
974void emit_and(u_int rs1,u_int rs2,u_int rt)
975{
976 assem_debug("and %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
977 output_w32(0xe0000000|rd_rn_rm(rt,rs1,rs2));
978}
979
980void emit_or(u_int rs1,u_int rs2,u_int rt)
981{
982 assem_debug("orr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
983 output_w32(0xe1800000|rd_rn_rm(rt,rs1,rs2));
984}
985void emit_or_and_set_flags(int rs1,int rs2,int rt)
986{
987 assem_debug("orrs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
988 output_w32(0xe1900000|rd_rn_rm(rt,rs1,rs2));
989}
990
991void emit_xor(u_int rs1,u_int rs2,u_int rt)
992{
993 assem_debug("eor %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
994 output_w32(0xe0200000|rd_rn_rm(rt,rs1,rs2));
995}
996
997void emit_loadlp(u_int imm,u_int rt)
998{
999 add_literal((int)out,imm);
1000 assem_debug("ldr %s,pc+? [=%x]\n",regname[rt],imm);
1001 output_w32(0xe5900000|rd_rn_rm(rt,15,0));
1002}
1003void emit_movw(u_int imm,u_int rt)
1004{
1005 assert(imm<65536);
1006 assem_debug("movw %s,#%d (0x%x)\n",regname[rt],imm,imm);
1007 output_w32(0xe3000000|rd_rn_rm(rt,0,0)|(imm&0xfff)|((imm<<4)&0xf0000));
1008}
1009void emit_movt(u_int imm,u_int rt)
1010{
1011 assem_debug("movt %s,#%d (0x%x)\n",regname[rt],imm&0xffff0000,imm&0xffff0000);
1012 output_w32(0xe3400000|rd_rn_rm(rt,0,0)|((imm>>16)&0xfff)|((imm>>12)&0xf0000));
1013}
1014void emit_movimm(u_int imm,u_int rt)
1015{
1016 u_int armval;
1017 if(genimm(imm,&armval)) {
1018 assem_debug("mov %s,#%d\n",regname[rt],imm);
1019 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
1020 }else if(genimm(~imm,&armval)) {
1021 assem_debug("mvn %s,#%d\n",regname[rt],imm);
1022 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
1023 }else if(imm<65536) {
1024 #ifdef ARMv5_ONLY
1025 assem_debug("mov %s,#%d\n",regname[rt],imm&0xFF00);
1026 output_w32(0xe3a00000|rd_rn_imm_shift(rt,0,imm>>8,8));
1027 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
1028 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1029 #else
1030 emit_movw(imm,rt);
1031 #endif
1032 }else{
1033 #ifdef ARMv5_ONLY
1034 emit_loadlp(imm,rt);
1035 #else
1036 emit_movw(imm&0x0000FFFF,rt);
1037 emit_movt(imm&0xFFFF0000,rt);
1038 #endif
1039 }
1040}
1041void emit_pcreladdr(u_int rt)
1042{
1043 assem_debug("add %s,pc,#?\n",regname[rt]);
1044 output_w32(0xe2800000|rd_rn_rm(rt,15,0));
1045}
1046
1047void emit_addimm(u_int rs,int imm,u_int rt)
1048{
1049 assert(rs<16);
1050 assert(rt<16);
1051 if(imm!=0) {
1052 assert(imm>-65536&&imm<65536);
1053 u_int armval;
1054 if(genimm(imm,&armval)) {
1055 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],imm);
1056 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
1057 }else if(genimm(-imm,&armval)) {
1058 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],imm);
1059 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
1060 }else if(imm<0) {
1061 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],(-imm)&0xFF00);
1062 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
1063 output_w32(0xe2400000|rd_rn_imm_shift(rt,rs,(-imm)>>8,8));
1064 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
1065 }else{
1066 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1067 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
1068 output_w32(0xe2800000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1069 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1070 }
1071 }
1072 else if(rs!=rt) emit_mov(rs,rt);
1073}
1074
1075void emit_addimm_and_set_flags(int imm,int rt)
1076{
1077 assert(imm>-65536&&imm<65536);
1078 u_int armval;
1079 if(genimm(imm,&armval)) {
1080 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm);
1081 output_w32(0xe2900000|rd_rn_rm(rt,rt,0)|armval);
1082 }else if(genimm(-imm,&armval)) {
1083 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],imm);
1084 output_w32(0xe2500000|rd_rn_rm(rt,rt,0)|armval);
1085 }else if(imm<0) {
1086 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF00);
1087 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
1088 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)>>8,8));
1089 output_w32(0xe2500000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
1090 }else{
1091 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF00);
1092 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
1093 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm>>8,8));
1094 output_w32(0xe2900000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1095 }
1096}
1097void emit_addimm_no_flags(u_int imm,u_int rt)
1098{
1099 emit_addimm(rt,imm,rt);
1100}
1101
1102void emit_addnop(u_int r)
1103{
1104 assert(r<16);
1105 assem_debug("add %s,%s,#0 (nop)\n",regname[r],regname[r]);
1106 output_w32(0xe2800000|rd_rn_rm(r,r,0));
1107}
1108
1109void emit_adcimm(u_int rs,int imm,u_int rt)
1110{
1111 u_int armval;
1112 assert(genimm(imm,&armval));
1113 assem_debug("adc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1114 output_w32(0xe2a00000|rd_rn_rm(rt,rs,0)|armval);
1115}
1116/*void emit_sbcimm(int imm,u_int rt)
1117{
1118 u_int armval;
1119 assert(genimm(imm,&armval));
1120 assem_debug("sbc %s,%s,#%d\n",regname[rt],regname[rt],imm);
1121 output_w32(0xe2c00000|rd_rn_rm(rt,rt,0)|armval);
1122}*/
1123void emit_sbbimm(int imm,u_int rt)
1124{
1125 assem_debug("sbb $%d,%%%s\n",imm,regname[rt]);
1126 assert(rt<8);
1127 if(imm<128&&imm>=-128) {
1128 output_byte(0x83);
1129 output_modrm(3,rt,3);
1130 output_byte(imm);
1131 }
1132 else
1133 {
1134 output_byte(0x81);
1135 output_modrm(3,rt,3);
1136 output_w32(imm);
1137 }
1138}
1139void emit_rscimm(int rs,int imm,u_int rt)
1140{
1141 assert(0);
1142 u_int armval;
1143 assert(genimm(imm,&armval));
1144 assem_debug("rsc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1145 output_w32(0xe2e00000|rd_rn_rm(rt,rs,0)|armval);
1146}
1147
1148void emit_addimm64_32(int rsh,int rsl,int imm,int rth,int rtl)
1149{
1150 // TODO: if(genimm(imm,&armval)) ...
1151 // else
1152 emit_movimm(imm,HOST_TEMPREG);
1153 emit_adds(HOST_TEMPREG,rsl,rtl);
1154 emit_adcimm(rsh,0,rth);
1155}
1156
1157void emit_sbb(int rs1,int rs2)
1158{
1159 assem_debug("sbb %%%s,%%%s\n",regname[rs2],regname[rs1]);
1160 output_byte(0x19);
1161 output_modrm(3,rs1,rs2);
1162}
1163
1164void emit_andimm(int rs,int imm,int rt)
1165{
1166 u_int armval;
1167 if(genimm(imm,&armval)) {
1168 assem_debug("and %s,%s,#%d\n",regname[rt],regname[rs],imm);
1169 output_w32(0xe2000000|rd_rn_rm(rt,rs,0)|armval);
1170 }else if(genimm(~imm,&armval)) {
1171 assem_debug("bic %s,%s,#%d\n",regname[rt],regname[rs],imm);
1172 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|armval);
1173 }else if(imm==65535) {
1174 #ifdef ARMv5_ONLY
1175 assem_debug("bic %s,%s,#FF000000\n",regname[rt],regname[rs]);
1176 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|0x4FF);
1177 assem_debug("bic %s,%s,#00FF0000\n",regname[rt],regname[rt]);
1178 output_w32(0xe3c00000|rd_rn_rm(rt,rt,0)|0x8FF);
1179 #else
1180 assem_debug("uxth %s,%s\n",regname[rt],regname[rs]);
1181 output_w32(0xe6ff0070|rd_rn_rm(rt,0,rs));
1182 #endif
1183 }else{
1184 assert(imm>0&&imm<65535);
1185 #ifdef ARMv5_ONLY
1186 assem_debug("mov r14,#%d\n",imm&0xFF00);
1187 output_w32(0xe3a00000|rd_rn_imm_shift(HOST_TEMPREG,0,imm>>8,8));
1188 assem_debug("add r14,r14,#%d\n",imm&0xFF);
1189 output_w32(0xe2800000|rd_rn_imm_shift(HOST_TEMPREG,HOST_TEMPREG,imm&0xff,0));
1190 #else
1191 emit_movw(imm,HOST_TEMPREG);
1192 #endif
1193 assem_debug("and %s,%s,r14\n",regname[rt],regname[rs]);
1194 output_w32(0xe0000000|rd_rn_rm(rt,rs,HOST_TEMPREG));
1195 }
1196}
1197
1198void emit_orimm(int rs,int imm,int rt)
1199{
1200 u_int armval;
1201 if(genimm(imm,&armval)) {
1202 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1203 output_w32(0xe3800000|rd_rn_rm(rt,rs,0)|armval);
1204 }else{
1205 assert(imm>0&&imm<65536);
1206 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1207 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
1208 output_w32(0xe3800000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1209 output_w32(0xe3800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1210 }
1211}
1212
1213void emit_xorimm(int rs,int imm,int rt)
1214{
1215 assert(imm>0&&imm<65536);
1216 u_int armval;
1217 if(genimm(imm,&armval)) {
1218 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm);
1219 output_w32(0xe2200000|rd_rn_rm(rt,rs,0)|armval);
1220 }else{
1221 assert(imm>0);
1222 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1223 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
1224 output_w32(0xe2200000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1225 output_w32(0xe2200000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1226 }
1227}
1228
1229void emit_shlimm(int rs,u_int imm,int rt)
1230{
1231 assert(imm>0);
1232 assert(imm<32);
1233 //if(imm==1) ...
1234 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1235 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1236}
1237
1238void emit_shrimm(int rs,u_int imm,int rt)
1239{
1240 assert(imm>0);
1241 assert(imm<32);
1242 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1243 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
1244}
1245
1246void emit_sarimm(int rs,u_int imm,int rt)
1247{
1248 assert(imm>0);
1249 assert(imm<32);
1250 assem_debug("asr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1251 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x40|(imm<<7));
1252}
1253
1254void emit_rorimm(int rs,u_int imm,int rt)
1255{
1256 assert(imm>0);
1257 assert(imm<32);
1258 assem_debug("ror %s,%s,#%d\n",regname[rt],regname[rs],imm);
1259 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x60|(imm<<7));
1260}
1261
1262void emit_shldimm(int rs,int rs2,u_int imm,int rt)
1263{
1264 assem_debug("shld %%%s,%%%s,%d\n",regname[rt],regname[rs2],imm);
1265 assert(imm>0);
1266 assert(imm<32);
1267 //if(imm==1) ...
1268 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1269 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1270 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs2],32-imm);
1271 output_w32(0xe1800020|rd_rn_rm(rt,rt,rs2)|((32-imm)<<7));
1272}
1273
1274void emit_shrdimm(int rs,int rs2,u_int imm,int rt)
1275{
1276 assem_debug("shrd %%%s,%%%s,%d\n",regname[rt],regname[rs2],imm);
1277 assert(imm>0);
1278 assert(imm<32);
1279 //if(imm==1) ...
1280 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1281 output_w32(0xe1a00020|rd_rn_rm(rt,0,rs)|(imm<<7));
1282 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs2],32-imm);
1283 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs2)|((32-imm)<<7));
1284}
1285
1286void emit_shl(u_int rs,u_int shift,u_int rt)
1287{
1288 assert(rs<16);
1289 assert(rt<16);
1290 assert(shift<16);
1291 //if(imm==1) ...
1292 assem_debug("lsl %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1293 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x10|(shift<<8));
1294}
1295void emit_shr(u_int rs,u_int shift,u_int rt)
1296{
1297 assert(rs<16);
1298 assert(rt<16);
1299 assert(shift<16);
1300 assem_debug("lsr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1301 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x30|(shift<<8));
1302}
1303void emit_sar(u_int rs,u_int shift,u_int rt)
1304{
1305 assert(rs<16);
1306 assert(rt<16);
1307 assert(shift<16);
1308 assem_debug("asr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1309 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x50|(shift<<8));
1310}
1311void emit_shlcl(int r)
1312{
1313 assem_debug("shl %%%s,%%cl\n",regname[r]);
1314 assert(0);
1315}
1316void emit_shrcl(int r)
1317{
1318 assem_debug("shr %%%s,%%cl\n",regname[r]);
1319 assert(0);
1320}
1321void emit_sarcl(int r)
1322{
1323 assem_debug("sar %%%s,%%cl\n",regname[r]);
1324 assert(0);
1325}
1326
1327void emit_shldcl(int r1,int r2)
1328{
1329 assem_debug("shld %%%s,%%%s,%%cl\n",regname[r1],regname[r2]);
1330 assert(0);
1331}
1332void emit_shrdcl(int r1,int r2)
1333{
1334 assem_debug("shrd %%%s,%%%s,%%cl\n",regname[r1],regname[r2]);
1335 assert(0);
1336}
1337void emit_orrshl(u_int rs,u_int shift,u_int rt)
1338{
1339 assert(rs<16);
1340 assert(rt<16);
1341 assert(shift<16);
1342 assem_debug("orr %s,%s,%s,lsl %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
1343 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x10|(shift<<8));
1344}
1345void emit_orrshr(u_int rs,u_int shift,u_int rt)
1346{
1347 assert(rs<16);
1348 assert(rt<16);
1349 assert(shift<16);
1350 assem_debug("orr %s,%s,%s,lsr %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
1351 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x30|(shift<<8));
1352}
1353
1354void emit_cmpimm(int rs,int imm)
1355{
1356 u_int armval;
1357 if(genimm(imm,&armval)) {
1358 assem_debug("cmp %s,$%d\n",regname[rs],imm);
1359 output_w32(0xe3500000|rd_rn_rm(0,rs,0)|armval);
1360 }else if(genimm(-imm,&armval)) {
1361 assem_debug("cmn %s,$%d\n",regname[rs],imm);
1362 output_w32(0xe3700000|rd_rn_rm(0,rs,0)|armval);
1363 }else if(imm>0) {
1364 assert(imm<65536);
1365 #ifdef ARMv5_ONLY
1366 emit_movimm(imm,HOST_TEMPREG);
1367 #else
1368 emit_movw(imm,HOST_TEMPREG);
1369 #endif
1370 assem_debug("cmp %s,r14\n",regname[rs]);
1371 output_w32(0xe1500000|rd_rn_rm(0,rs,HOST_TEMPREG));
1372 }else{
1373 assert(imm>-65536);
1374 #ifdef ARMv5_ONLY
1375 emit_movimm(-imm,HOST_TEMPREG);
1376 #else
1377 emit_movw(-imm,HOST_TEMPREG);
1378 #endif
1379 assem_debug("cmn %s,r14\n",regname[rs]);
1380 output_w32(0xe1700000|rd_rn_rm(0,rs,HOST_TEMPREG));
1381 }
1382}
1383
1384void emit_cmovne(u_int *addr,int rt)
1385{
1386 assem_debug("cmovne %x,%%%s",(int)addr,regname[rt]);
1387 assert(0);
1388}
1389void emit_cmovl(u_int *addr,int rt)
1390{
1391 assem_debug("cmovl %x,%%%s",(int)addr,regname[rt]);
1392 assert(0);
1393}
1394void emit_cmovs(u_int *addr,int rt)
1395{
1396 assem_debug("cmovs %x,%%%s",(int)addr,regname[rt]);
1397 assert(0);
1398}
1399void emit_cmovne_imm(int imm,int rt)
1400{
1401 assem_debug("movne %s,#%d\n",regname[rt],imm);
1402 u_int armval;
1403 assert(genimm(imm,&armval));
1404 output_w32(0x13a00000|rd_rn_rm(rt,0,0)|armval);
1405}
1406void emit_cmovl_imm(int imm,int rt)
1407{
1408 assem_debug("movlt %s,#%d\n",regname[rt],imm);
1409 u_int armval;
1410 assert(genimm(imm,&armval));
1411 output_w32(0xb3a00000|rd_rn_rm(rt,0,0)|armval);
1412}
1413void emit_cmovb_imm(int imm,int rt)
1414{
1415 assem_debug("movcc %s,#%d\n",regname[rt],imm);
1416 u_int armval;
1417 assert(genimm(imm,&armval));
1418 output_w32(0x33a00000|rd_rn_rm(rt,0,0)|armval);
1419}
1420void emit_cmovs_imm(int imm,int rt)
1421{
1422 assem_debug("movmi %s,#%d\n",regname[rt],imm);
1423 u_int armval;
1424 assert(genimm(imm,&armval));
1425 output_w32(0x43a00000|rd_rn_rm(rt,0,0)|armval);
1426}
1427void emit_cmove_reg(int rs,int rt)
1428{
1429 assem_debug("moveq %s,%s\n",regname[rt],regname[rs]);
1430 output_w32(0x01a00000|rd_rn_rm(rt,0,rs));
1431}
1432void emit_cmovne_reg(int rs,int rt)
1433{
1434 assem_debug("movne %s,%s\n",regname[rt],regname[rs]);
1435 output_w32(0x11a00000|rd_rn_rm(rt,0,rs));
1436}
1437void emit_cmovl_reg(int rs,int rt)
1438{
1439 assem_debug("movlt %s,%s\n",regname[rt],regname[rs]);
1440 output_w32(0xb1a00000|rd_rn_rm(rt,0,rs));
1441}
1442void emit_cmovs_reg(int rs,int rt)
1443{
1444 assem_debug("movmi %s,%s\n",regname[rt],regname[rs]);
1445 output_w32(0x41a00000|rd_rn_rm(rt,0,rs));
1446}
1447
1448void emit_slti32(int rs,int imm,int rt)
1449{
1450 if(rs!=rt) emit_zeroreg(rt);
1451 emit_cmpimm(rs,imm);
1452 if(rs==rt) emit_movimm(0,rt);
1453 emit_cmovl_imm(1,rt);
1454}
1455void emit_sltiu32(int rs,int imm,int rt)
1456{
1457 if(rs!=rt) emit_zeroreg(rt);
1458 emit_cmpimm(rs,imm);
1459 if(rs==rt) emit_movimm(0,rt);
1460 emit_cmovb_imm(1,rt);
1461}
1462void emit_slti64_32(int rsh,int rsl,int imm,int rt)
1463{
1464 assert(rsh!=rt);
1465 emit_slti32(rsl,imm,rt);
1466 if(imm>=0)
1467 {
1468 emit_test(rsh,rsh);
1469 emit_cmovne_imm(0,rt);
1470 emit_cmovs_imm(1,rt);
1471 }
1472 else
1473 {
1474 emit_cmpimm(rsh,-1);
1475 emit_cmovne_imm(0,rt);
1476 emit_cmovl_imm(1,rt);
1477 }
1478}
1479void emit_sltiu64_32(int rsh,int rsl,int imm,int rt)
1480{
1481 assert(rsh!=rt);
1482 emit_sltiu32(rsl,imm,rt);
1483 if(imm>=0)
1484 {
1485 emit_test(rsh,rsh);
1486 emit_cmovne_imm(0,rt);
1487 }
1488 else
1489 {
1490 emit_cmpimm(rsh,-1);
1491 emit_cmovne_imm(1,rt);
1492 }
1493}
1494
1495void emit_cmp(int rs,int rt)
1496{
1497 assem_debug("cmp %s,%s\n",regname[rs],regname[rt]);
1498 output_w32(0xe1500000|rd_rn_rm(0,rs,rt));
1499}
1500void emit_set_gz32(int rs, int rt)
1501{
1502 //assem_debug("set_gz32\n");
1503 emit_cmpimm(rs,1);
1504 emit_movimm(1,rt);
1505 emit_cmovl_imm(0,rt);
1506}
1507void emit_set_nz32(int rs, int rt)
1508{
1509 //assem_debug("set_nz32\n");
1510 if(rs!=rt) emit_movs(rs,rt);
1511 else emit_test(rs,rs);
1512 emit_cmovne_imm(1,rt);
1513}
1514void emit_set_gz64_32(int rsh, int rsl, int rt)
1515{
1516 //assem_debug("set_gz64\n");
1517 emit_set_gz32(rsl,rt);
1518 emit_test(rsh,rsh);
1519 emit_cmovne_imm(1,rt);
1520 emit_cmovs_imm(0,rt);
1521}
1522void emit_set_nz64_32(int rsh, int rsl, int rt)
1523{
1524 //assem_debug("set_nz64\n");
1525 emit_or_and_set_flags(rsh,rsl,rt);
1526 emit_cmovne_imm(1,rt);
1527}
1528void emit_set_if_less32(int rs1, int rs2, int rt)
1529{
1530 //assem_debug("set if less (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1531 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1532 emit_cmp(rs1,rs2);
1533 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1534 emit_cmovl_imm(1,rt);
1535}
1536void emit_set_if_carry32(int rs1, int rs2, int rt)
1537{
1538 //assem_debug("set if carry (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1539 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1540 emit_cmp(rs1,rs2);
1541 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1542 emit_cmovb_imm(1,rt);
1543}
1544void emit_set_if_less64_32(int u1, int l1, int u2, int l2, int rt)
1545{
1546 //assem_debug("set if less64 (%%%s,%%%s,%%%s,%%%s),%%%s\n",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]);
1547 assert(u1!=rt);
1548 assert(u2!=rt);
1549 emit_cmp(l1,l2);
1550 emit_movimm(0,rt);
1551 emit_sbcs(u1,u2,HOST_TEMPREG);
1552 emit_cmovl_imm(1,rt);
1553}
1554void emit_set_if_carry64_32(int u1, int l1, int u2, int l2, int rt)
1555{
1556 //assem_debug("set if carry64 (%%%s,%%%s,%%%s,%%%s),%%%s\n",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]);
1557 assert(u1!=rt);
1558 assert(u2!=rt);
1559 emit_cmp(l1,l2);
1560 emit_movimm(0,rt);
1561 emit_sbcs(u1,u2,HOST_TEMPREG);
1562 emit_cmovb_imm(1,rt);
1563}
1564
1565void emit_call(int a)
1566{
1567 assem_debug("bl %x (%x+%x)\n",a,(int)out,a-(int)out-8);
1568 u_int offset=genjmp(a);
1569 output_w32(0xeb000000|offset);
1570}
1571void emit_jmp(int a)
1572{
1573 assem_debug("b %x (%x+%x)\n",a,(int)out,a-(int)out-8);
1574 u_int offset=genjmp(a);
1575 output_w32(0xea000000|offset);
1576}
1577void emit_jne(int a)
1578{
1579 assem_debug("bne %x\n",a);
1580 u_int offset=genjmp(a);
1581 output_w32(0x1a000000|offset);
1582}
1583void emit_jeq(int a)
1584{
1585 assem_debug("beq %x\n",a);
1586 u_int offset=genjmp(a);
1587 output_w32(0x0a000000|offset);
1588}
1589void emit_js(int a)
1590{
1591 assem_debug("bmi %x\n",a);
1592 u_int offset=genjmp(a);
1593 output_w32(0x4a000000|offset);
1594}
1595void emit_jns(int a)
1596{
1597 assem_debug("bpl %x\n",a);
1598 u_int offset=genjmp(a);
1599 output_w32(0x5a000000|offset);
1600}
1601void emit_jl(int a)
1602{
1603 assem_debug("blt %x\n",a);
1604 u_int offset=genjmp(a);
1605 output_w32(0xba000000|offset);
1606}
1607void emit_jge(int a)
1608{
1609 assem_debug("bge %x\n",a);
1610 u_int offset=genjmp(a);
1611 output_w32(0xaa000000|offset);
1612}
1613void emit_jno(int a)
1614{
1615 assem_debug("bvc %x\n",a);
1616 u_int offset=genjmp(a);
1617 output_w32(0x7a000000|offset);
1618}
1619void emit_jc(int a)
1620{
1621 assem_debug("bcs %x\n",a);
1622 u_int offset=genjmp(a);
1623 output_w32(0x2a000000|offset);
1624}
1625void emit_jcc(int a)
1626{
1627 assem_debug("bcc %x\n",a);
1628 u_int offset=genjmp(a);
1629 output_w32(0x3a000000|offset);
1630}
1631
1632void emit_pushimm(int imm)
1633{
1634 assem_debug("push $%x\n",imm);
1635 assert(0);
1636}
1637void emit_pusha()
1638{
1639 assem_debug("pusha\n");
1640 assert(0);
1641}
1642void emit_popa()
1643{
1644 assem_debug("popa\n");
1645 assert(0);
1646}
1647void emit_pushreg(u_int r)
1648{
1649 assem_debug("push %%%s\n",regname[r]);
1650 assert(0);
1651}
1652void emit_popreg(u_int r)
1653{
1654 assem_debug("pop %%%s\n",regname[r]);
1655 assert(0);
1656}
1657void emit_callreg(u_int r)
1658{
1659 assem_debug("call *%%%s\n",regname[r]);
1660 assert(0);
1661}
1662void emit_jmpreg(u_int r)
1663{
1664 assem_debug("mov pc,%s\n",regname[r]);
1665 output_w32(0xe1a00000|rd_rn_rm(15,0,r));
1666}
1667
1668void emit_readword_indexed(int offset, int rs, int rt)
1669{
1670 assert(offset>-4096&&offset<4096);
1671 assem_debug("ldr %s,%s+%d\n",regname[rt],regname[rs],offset);
1672 if(offset>=0) {
1673 output_w32(0xe5900000|rd_rn_rm(rt,rs,0)|offset);
1674 }else{
1675 output_w32(0xe5100000|rd_rn_rm(rt,rs,0)|(-offset));
1676 }
1677}
1678void emit_readword_dualindexedx4(int rs1, int rs2, int rt)
1679{
1680 assem_debug("ldr %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1681 output_w32(0xe7900000|rd_rn_rm(rt,rs1,rs2)|0x100);
1682}
1683void emit_readword_indexed_tlb(int addr, int rs, int map, int rt)
1684{
1685 if(map<0) emit_readword_indexed(addr, rs, rt);
1686 else {
1687 assert(addr==0);
1688 emit_readword_dualindexedx4(rs, map, rt);
1689 }
1690}
1691void emit_readdword_indexed_tlb(int addr, int rs, int map, int rh, int rl)
1692{
1693 if(map<0) {
1694 if(rh>=0) emit_readword_indexed(addr, rs, rh);
1695 emit_readword_indexed(addr+4, rs, rl);
1696 }else{
1697 assert(rh!=rs);
1698 if(rh>=0) emit_readword_indexed_tlb(addr, rs, map, rh);
1699 emit_addimm(map,1,map);
1700 emit_readword_indexed_tlb(addr, rs, map, rl);
1701 }
1702}
1703void emit_movsbl_indexed(int offset, int rs, int rt)
1704{
1705 assert(offset>-256&&offset<256);
1706 assem_debug("ldrsb %s,%s+%d\n",regname[rt],regname[rs],offset);
1707 if(offset>=0) {
1708 output_w32(0xe1d000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1709 }else{
1710 output_w32(0xe15000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1711 }
1712}
1713void emit_movsbl_indexed_tlb(int addr, int rs, int map, int rt)
1714{
1715 if(map<0) emit_movsbl_indexed(addr, rs, rt);
1716 else {
1717 if(addr==0) {
1718 emit_shlimm(map,2,map);
1719 assem_debug("ldrsb %s,%s+%s\n",regname[rt],regname[rs],regname[map]);
1720 output_w32(0xe19000d0|rd_rn_rm(rt,rs,map));
1721 }else{
1722 assert(addr>-256&&addr<256);
1723 assem_debug("add %s,%s,%s,lsl #2\n",regname[rt],regname[rs],regname[map]);
1724 output_w32(0xe0800000|rd_rn_rm(rt,rs,map)|(2<<7));
1725 emit_movsbl_indexed(addr, rt, rt);
1726 }
1727 }
1728}
1729void emit_movswl_indexed(int offset, int rs, int rt)
1730{
1731 assert(offset>-256&&offset<256);
1732 assem_debug("ldrsh %s,%s+%d\n",regname[rt],regname[rs],offset);
1733 if(offset>=0) {
1734 output_w32(0xe1d000f0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1735 }else{
1736 output_w32(0xe15000f0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1737 }
1738}
1739void emit_movzbl_indexed(int offset, int rs, int rt)
1740{
1741 assert(offset>-4096&&offset<4096);
1742 assem_debug("ldrb %s,%s+%d\n",regname[rt],regname[rs],offset);
1743 if(offset>=0) {
1744 output_w32(0xe5d00000|rd_rn_rm(rt,rs,0)|offset);
1745 }else{
1746 output_w32(0xe5500000|rd_rn_rm(rt,rs,0)|(-offset));
1747 }
1748}
1749void emit_movzbl_dualindexedx4(int rs1, int rs2, int rt)
1750{
1751 assem_debug("ldrb %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1752 output_w32(0xe7d00000|rd_rn_rm(rt,rs1,rs2)|0x100);
1753}
1754void emit_movzbl_indexed_tlb(int addr, int rs, int map, int rt)
1755{
1756 if(map<0) emit_movzbl_indexed(addr, rs, rt);
1757 else {
1758 if(addr==0) {
1759 emit_movzbl_dualindexedx4(rs, map, rt);
1760 }else{
1761 emit_addimm(rs,addr,rt);
1762 emit_movzbl_dualindexedx4(rt, map, rt);
1763 }
1764 }
1765}
1766void emit_movzwl_indexed(int offset, int rs, int rt)
1767{
1768 assert(offset>-256&&offset<256);
1769 assem_debug("ldrh %s,%s+%d\n",regname[rt],regname[rs],offset);
1770 if(offset>=0) {
1771 output_w32(0xe1d000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1772 }else{
1773 output_w32(0xe15000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1774 }
1775}
1776void emit_readword(int addr, int rt)
1777{
1778 u_int offset = addr-(u_int)&dynarec_local;
1779 assert(offset<4096);
1780 assem_debug("ldr %s,fp+%d\n",regname[rt],offset);
1781 output_w32(0xe5900000|rd_rn_rm(rt,FP,0)|offset);
1782}
1783void emit_movsbl(int addr, int rt)
1784{
1785 u_int offset = addr-(u_int)&dynarec_local;
1786 assert(offset<256);
1787 assem_debug("ldrsb %s,fp+%d\n",regname[rt],offset);
1788 output_w32(0xe1d000d0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1789}
1790void emit_movswl(int addr, int rt)
1791{
1792 u_int offset = addr-(u_int)&dynarec_local;
1793 assert(offset<256);
1794 assem_debug("ldrsh %s,fp+%d\n",regname[rt],offset);
1795 output_w32(0xe1d000f0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1796}
1797void emit_movzbl(int addr, int rt)
1798{
1799 u_int offset = addr-(u_int)&dynarec_local;
1800 assert(offset<4096);
1801 assem_debug("ldrb %s,fp+%d\n",regname[rt],offset);
1802 output_w32(0xe5d00000|rd_rn_rm(rt,FP,0)|offset);
1803}
1804void emit_movzwl(int addr, int rt)
1805{
1806 u_int offset = addr-(u_int)&dynarec_local;
1807 assert(offset<256);
1808 assem_debug("ldrh %s,fp+%d\n",regname[rt],offset);
1809 output_w32(0xe1d000b0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1810}
1811void emit_movzwl_reg(int rs, int rt)
1812{
1813 assem_debug("movzwl %%%s,%%%s\n",regname[rs]+1,regname[rt]);
1814 assert(0);
1815}
1816
1817void emit_xchg(int rs, int rt)
1818{
1819 assem_debug("xchg %%%s,%%%s\n",regname[rs],regname[rt]);
1820 assert(0);
1821}
1822void emit_writeword_indexed(int rt, int offset, int rs)
1823{
1824 assert(offset>-4096&&offset<4096);
1825 assem_debug("str %s,%s+%d\n",regname[rt],regname[rs],offset);
1826 if(offset>=0) {
1827 output_w32(0xe5800000|rd_rn_rm(rt,rs,0)|offset);
1828 }else{
1829 output_w32(0xe5000000|rd_rn_rm(rt,rs,0)|(-offset));
1830 }
1831}
1832void emit_writeword_dualindexedx4(int rt, int rs1, int rs2)
1833{
1834 assem_debug("str %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1835 output_w32(0xe7800000|rd_rn_rm(rt,rs1,rs2)|0x100);
1836}
1837void emit_writeword_indexed_tlb(int rt, int addr, int rs, int map, int temp)
1838{
1839 if(map<0) emit_writeword_indexed(rt, addr, rs);
1840 else {
1841 assert(addr==0);
1842 emit_writeword_dualindexedx4(rt, rs, map);
1843 }
1844}
1845void emit_writedword_indexed_tlb(int rh, int rl, int addr, int rs, int map, int temp)
1846{
1847 if(map<0) {
1848 if(rh>=0) emit_writeword_indexed(rh, addr, rs);
1849 emit_writeword_indexed(rl, addr+4, rs);
1850 }else{
1851 assert(rh>=0);
1852 if(temp!=rs) emit_addimm(map,1,temp);
1853 emit_writeword_indexed_tlb(rh, addr, rs, map, temp);
1854 if(temp!=rs) emit_writeword_indexed_tlb(rl, addr, rs, temp, temp);
1855 else {
1856 emit_addimm(rs,4,rs);
1857 emit_writeword_indexed_tlb(rl, addr, rs, map, temp);
1858 }
1859 }
1860}
1861void emit_writehword_indexed(int rt, int offset, int rs)
1862{
1863 assert(offset>-256&&offset<256);
1864 assem_debug("strh %s,%s+%d\n",regname[rt],regname[rs],offset);
1865 if(offset>=0) {
1866 output_w32(0xe1c000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1867 }else{
1868 output_w32(0xe14000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1869 }
1870}
1871void emit_writebyte_indexed(int rt, int offset, int rs)
1872{
1873 assert(offset>-4096&&offset<4096);
1874 assem_debug("strb %s,%s+%d\n",regname[rt],regname[rs],offset);
1875 if(offset>=0) {
1876 output_w32(0xe5c00000|rd_rn_rm(rt,rs,0)|offset);
1877 }else{
1878 output_w32(0xe5400000|rd_rn_rm(rt,rs,0)|(-offset));
1879 }
1880}
1881void emit_writebyte_dualindexedx4(int rt, int rs1, int rs2)
1882{
1883 assem_debug("strb %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1884 output_w32(0xe7c00000|rd_rn_rm(rt,rs1,rs2)|0x100);
1885}
1886void emit_writebyte_indexed_tlb(int rt, int addr, int rs, int map, int temp)
1887{
1888 if(map<0) emit_writebyte_indexed(rt, addr, rs);
1889 else {
1890 if(addr==0) {
1891 emit_writebyte_dualindexedx4(rt, rs, map);
1892 }else{
1893 emit_addimm(rs,addr,temp);
1894 emit_writebyte_dualindexedx4(rt, temp, map);
1895 }
1896 }
1897}
1898void emit_writeword(int rt, int addr)
1899{
1900 u_int offset = addr-(u_int)&dynarec_local;
1901 assert(offset<4096);
1902 assem_debug("str %s,fp+%d\n",regname[rt],offset);
1903 output_w32(0xe5800000|rd_rn_rm(rt,FP,0)|offset);
1904}
1905void emit_writehword(int rt, int addr)
1906{
1907 u_int offset = addr-(u_int)&dynarec_local;
1908 assert(offset<256);
1909 assem_debug("strh %s,fp+%d\n",regname[rt],offset);
1910 output_w32(0xe1c000b0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1911}
1912void emit_writebyte(int rt, int addr)
1913{
1914 u_int offset = addr-(u_int)&dynarec_local;
1915 assert(offset<4096);
1916 assem_debug("str %s,fp+%d\n",regname[rt],offset);
1917 output_w32(0xe5c00000|rd_rn_rm(rt,FP,0)|offset);
1918}
1919void emit_writeword_imm(int imm, int addr)
1920{
1921 assem_debug("movl $%x,%x\n",imm,addr);
1922 assert(0);
1923}
1924void emit_writebyte_imm(int imm, int addr)
1925{
1926 assem_debug("movb $%x,%x\n",imm,addr);
1927 assert(0);
1928}
1929
1930void emit_mul(int rs)
1931{
1932 assem_debug("mul %%%s\n",regname[rs]);
1933 assert(0);
1934}
1935void emit_imul(int rs)
1936{
1937 assem_debug("imul %%%s\n",regname[rs]);
1938 assert(0);
1939}
1940void emit_umull(u_int rs1, u_int rs2, u_int hi, u_int lo)
1941{
1942 assem_debug("umull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
1943 assert(rs1<16);
1944 assert(rs2<16);
1945 assert(hi<16);
1946 assert(lo<16);
1947 output_w32(0xe0800090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
1948}
1949void emit_smull(u_int rs1, u_int rs2, u_int hi, u_int lo)
1950{
1951 assem_debug("smull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
1952 assert(rs1<16);
1953 assert(rs2<16);
1954 assert(hi<16);
1955 assert(lo<16);
1956 output_w32(0xe0c00090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
1957}
1958
1959void emit_div(int rs)
1960{
1961 assem_debug("div %%%s\n",regname[rs]);
1962 assert(0);
1963}
1964void emit_idiv(int rs)
1965{
1966 assem_debug("idiv %%%s\n",regname[rs]);
1967 assert(0);
1968}
1969void emit_cdq()
1970{
1971 assem_debug("cdq\n");
1972 assert(0);
1973}
1974
1975void emit_clz(int rs,int rt)
1976{
1977 assem_debug("clz %s,%s\n",regname[rt],regname[rs]);
1978 output_w32(0xe16f0f10|rd_rn_rm(rt,0,rs));
1979}
1980
1981void emit_subcs(int rs1,int rs2,int rt)
1982{
1983 assem_debug("subcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1984 output_w32(0x20400000|rd_rn_rm(rt,rs1,rs2));
1985}
1986
1987void emit_shrcc_imm(int rs,u_int imm,int rt)
1988{
1989 assert(imm>0);
1990 assert(imm<32);
1991 assem_debug("lsrcc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1992 output_w32(0x31a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
1993}
1994
1995void emit_negmi(int rs, int rt)
1996{
1997 assem_debug("rsbmi %s,%s,#0\n",regname[rt],regname[rs]);
1998 output_w32(0x42600000|rd_rn_rm(rt,rs,0));
1999}
2000
2001void emit_negsmi(int rs, int rt)
2002{
2003 assem_debug("rsbsmi %s,%s,#0\n",regname[rt],regname[rs]);
2004 output_w32(0x42700000|rd_rn_rm(rt,rs,0));
2005}
2006
2007void emit_orreq(u_int rs1,u_int rs2,u_int rt)
2008{
2009 assem_debug("orreq %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2010 output_w32(0x01800000|rd_rn_rm(rt,rs1,rs2));
2011}
2012
2013void emit_orrne(u_int rs1,u_int rs2,u_int rt)
2014{
2015 assem_debug("orrne %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2016 output_w32(0x11800000|rd_rn_rm(rt,rs1,rs2));
2017}
2018
2019void emit_bic_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2020{
2021 assem_debug("bic %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2022 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2023}
2024
2025void emit_biceq_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2026{
2027 assem_debug("biceq %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2028 output_w32(0x01C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2029}
2030
2031void emit_bicne_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2032{
2033 assem_debug("bicne %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2034 output_w32(0x11C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2035}
2036
2037void emit_bic_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2038{
2039 assem_debug("bic %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2040 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2041}
2042
2043void emit_biceq_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2044{
2045 assem_debug("biceq %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2046 output_w32(0x01C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2047}
2048
2049void emit_bicne_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2050{
2051 assem_debug("bicne %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2052 output_w32(0x11C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2053}
2054
2055void emit_teq(int rs, int rt)
2056{
2057 assem_debug("teq %s,%s\n",regname[rs],regname[rt]);
2058 output_w32(0xe1300000|rd_rn_rm(0,rs,rt));
2059}
2060
2061void emit_rsbimm(int rs, int imm, int rt)
2062{
2063 u_int armval;
2064 assert(genimm(imm,&armval));
2065 assem_debug("rsb %s,%s,#%d\n",regname[rt],regname[rs],imm);
2066 output_w32(0xe2600000|rd_rn_rm(rt,rs,0)|armval);
2067}
2068
2069// Load 2 immediates optimizing for small code size
2070void emit_mov2imm_compact(int imm1,u_int rt1,int imm2,u_int rt2)
2071{
2072 emit_movimm(imm1,rt1);
2073 u_int armval;
2074 if(genimm(imm2-imm1,&armval)) {
2075 assem_debug("add %s,%s,#%d\n",regname[rt2],regname[rt1],imm2-imm1);
2076 output_w32(0xe2800000|rd_rn_rm(rt2,rt1,0)|armval);
2077 }else if(genimm(imm1-imm2,&armval)) {
2078 assem_debug("sub %s,%s,#%d\n",regname[rt2],regname[rt1],imm1-imm2);
2079 output_w32(0xe2400000|rd_rn_rm(rt2,rt1,0)|armval);
2080 }
2081 else emit_movimm(imm2,rt2);
2082}
2083
2084// Conditionally select one of two immediates, optimizing for small code size
2085// This will only be called if HAVE_CMOV_IMM is defined
2086void emit_cmov2imm_e_ne_compact(int imm1,int imm2,u_int rt)
2087{
2088 u_int armval;
2089 if(genimm(imm2-imm1,&armval)) {
2090 emit_movimm(imm1,rt);
2091 assem_debug("addne %s,%s,#%d\n",regname[rt],regname[rt],imm2-imm1);
2092 output_w32(0x12800000|rd_rn_rm(rt,rt,0)|armval);
2093 }else if(genimm(imm1-imm2,&armval)) {
2094 emit_movimm(imm1,rt);
2095 assem_debug("subne %s,%s,#%d\n",regname[rt],regname[rt],imm1-imm2);
2096 output_w32(0x12400000|rd_rn_rm(rt,rt,0)|armval);
2097 }
2098 else {
2099 #ifdef ARMv5_ONLY
2100 emit_movimm(imm1,rt);
2101 add_literal((int)out,imm2);
2102 assem_debug("ldrne %s,pc+? [=%x]\n",regname[rt],imm2);
2103 output_w32(0x15900000|rd_rn_rm(rt,15,0));
2104 #else
2105 emit_movw(imm1&0x0000FFFF,rt);
2106 if((imm1&0xFFFF)!=(imm2&0xFFFF)) {
2107 assem_debug("movwne %s,#%d (0x%x)\n",regname[rt],imm2&0xFFFF,imm2&0xFFFF);
2108 output_w32(0x13000000|rd_rn_rm(rt,0,0)|(imm2&0xfff)|((imm2<<4)&0xf0000));
2109 }
2110 emit_movt(imm1&0xFFFF0000,rt);
2111 if((imm1&0xFFFF0000)!=(imm2&0xFFFF0000)) {
2112 assem_debug("movtne %s,#%d (0x%x)\n",regname[rt],imm2&0xffff0000,imm2&0xffff0000);
2113 output_w32(0x13400000|rd_rn_rm(rt,0,0)|((imm2>>16)&0xfff)|((imm2>>12)&0xf0000));
2114 }
2115 #endif
2116 }
2117}
2118
2119// special case for checking invalid_code
2120void emit_cmpmem_indexedsr12_imm(int addr,int r,int imm)
2121{
2122 assert(0);
2123}
2124
2125// special case for checking invalid_code
2126void emit_cmpmem_indexedsr12_reg(int base,int r,int imm)
2127{
2128 assert(imm<128&&imm>=0);
2129 assert(r>=0&&r<16);
2130 assem_debug("ldrb lr,%s,%s lsr #12\n",regname[base],regname[r]);
2131 output_w32(0xe7d00000|rd_rn_rm(HOST_TEMPREG,base,r)|0x620);
2132 emit_cmpimm(HOST_TEMPREG,imm);
2133}
2134
2135// special case for tlb mapping
2136void emit_addsr12(int rs1,int rs2,int rt)
2137{
2138 assem_debug("add %s,%s,%s lsr #12\n",regname[rt],regname[rs1],regname[rs2]);
2139 output_w32(0xe0800620|rd_rn_rm(rt,rs1,rs2));
2140}
2141
2142// Used to preload hash table entries
2143void emit_prefetch(void *addr)
2144{
2145 assem_debug("prefetch %x\n",(int)addr);
2146 output_byte(0x0F);
2147 output_byte(0x18);
2148 output_modrm(0,5,1);
2149 output_w32((int)addr);
2150}
2151void emit_prefetchreg(int r)
2152{
2153 assem_debug("pld %s\n",regname[r]);
2154 output_w32(0xf5d0f000|rd_rn_rm(0,r,0));
2155}
2156
2157// Special case for mini_ht
2158void emit_ldreq_indexed(int rs, u_int offset, int rt)
2159{
2160 assert(offset<4096);
2161 assem_debug("ldreq %s,[%s, #%d]\n",regname[rt],regname[rs],offset);
2162 output_w32(0x05900000|rd_rn_rm(rt,rs,0)|offset);
2163}
2164
2165void emit_flds(int r,int sr)
2166{
2167 assem_debug("flds s%d,[%s]\n",sr,regname[r]);
2168 output_w32(0xed900a00|((sr&14)<<11)|((sr&1)<<22)|(r<<16));
2169}
2170
2171void emit_vldr(int r,int vr)
2172{
2173 assem_debug("vldr d%d,[%s]\n",vr,regname[r]);
2174 output_w32(0xed900b00|(vr<<12)|(r<<16));
2175}
2176
2177void emit_fsts(int sr,int r)
2178{
2179 assem_debug("fsts s%d,[%s]\n",sr,regname[r]);
2180 output_w32(0xed800a00|((sr&14)<<11)|((sr&1)<<22)|(r<<16));
2181}
2182
2183void emit_vstr(int vr,int r)
2184{
2185 assem_debug("vstr d%d,[%s]\n",vr,regname[r]);
2186 output_w32(0xed800b00|(vr<<12)|(r<<16));
2187}
2188
2189void emit_ftosizs(int s,int d)
2190{
2191 assem_debug("ftosizs s%d,s%d\n",d,s);
2192 output_w32(0xeebd0ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2193}
2194
2195void emit_ftosizd(int s,int d)
2196{
2197 assem_debug("ftosizd s%d,d%d\n",d,s);
2198 output_w32(0xeebd0bc0|((d&14)<<11)|((d&1)<<22)|(s&7));
2199}
2200
2201void emit_fsitos(int s,int d)
2202{
2203 assem_debug("fsitos s%d,s%d\n",d,s);
2204 output_w32(0xeeb80ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2205}
2206
2207void emit_fsitod(int s,int d)
2208{
2209 assem_debug("fsitod d%d,s%d\n",d,s);
2210 output_w32(0xeeb80bc0|((d&7)<<12)|((s&14)>>1)|((s&1)<<5));
2211}
2212
2213void emit_fcvtds(int s,int d)
2214{
2215 assem_debug("fcvtds d%d,s%d\n",d,s);
2216 output_w32(0xeeb70ac0|((d&7)<<12)|((s&14)>>1)|((s&1)<<5));
2217}
2218
2219void emit_fcvtsd(int s,int d)
2220{
2221 assem_debug("fcvtsd s%d,d%d\n",d,s);
2222 output_w32(0xeeb70bc0|((d&14)<<11)|((d&1)<<22)|(s&7));
2223}
2224
2225void emit_fsqrts(int s,int d)
2226{
2227 assem_debug("fsqrts d%d,s%d\n",d,s);
2228 output_w32(0xeeb10ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2229}
2230
2231void emit_fsqrtd(int s,int d)
2232{
2233 assem_debug("fsqrtd s%d,d%d\n",d,s);
2234 output_w32(0xeeb10bc0|((d&7)<<12)|(s&7));
2235}
2236
2237void emit_fabss(int s,int d)
2238{
2239 assem_debug("fabss d%d,s%d\n",d,s);
2240 output_w32(0xeeb00ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2241}
2242
2243void emit_fabsd(int s,int d)
2244{
2245 assem_debug("fabsd s%d,d%d\n",d,s);
2246 output_w32(0xeeb00bc0|((d&7)<<12)|(s&7));
2247}
2248
2249void emit_fnegs(int s,int d)
2250{
2251 assem_debug("fnegs d%d,s%d\n",d,s);
2252 output_w32(0xeeb10a40|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2253}
2254
2255void emit_fnegd(int s,int d)
2256{
2257 assem_debug("fnegd s%d,d%d\n",d,s);
2258 output_w32(0xeeb10b40|((d&7)<<12)|(s&7));
2259}
2260
2261void emit_fadds(int s1,int s2,int d)
2262{
2263 assem_debug("fadds s%d,s%d,s%d\n",d,s1,s2);
2264 output_w32(0xee300a00|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2265}
2266
2267void emit_faddd(int s1,int s2,int d)
2268{
2269 assem_debug("faddd d%d,d%d,d%d\n",d,s1,s2);
2270 output_w32(0xee300b00|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2271}
2272
2273void emit_fsubs(int s1,int s2,int d)
2274{
2275 assem_debug("fsubs s%d,s%d,s%d\n",d,s1,s2);
2276 output_w32(0xee300a40|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2277}
2278
2279void emit_fsubd(int s1,int s2,int d)
2280{
2281 assem_debug("fsubd d%d,d%d,d%d\n",d,s1,s2);
2282 output_w32(0xee300b40|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2283}
2284
2285void emit_fmuls(int s1,int s2,int d)
2286{
2287 assem_debug("fmuls s%d,s%d,s%d\n",d,s1,s2);
2288 output_w32(0xee200a00|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2289}
2290
2291void emit_fmuld(int s1,int s2,int d)
2292{
2293 assem_debug("fmuld d%d,d%d,d%d\n",d,s1,s2);
2294 output_w32(0xee200b00|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2295}
2296
2297void emit_fdivs(int s1,int s2,int d)
2298{
2299 assem_debug("fdivs s%d,s%d,s%d\n",d,s1,s2);
2300 output_w32(0xee800a00|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2301}
2302
2303void emit_fdivd(int s1,int s2,int d)
2304{
2305 assem_debug("fdivd d%d,d%d,d%d\n",d,s1,s2);
2306 output_w32(0xee800b00|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2307}
2308
2309void emit_fcmps(int x,int y)
2310{
2311 assem_debug("fcmps s14, s15\n");
2312 output_w32(0xeeb47a67);
2313}
2314
2315void emit_fcmpd(int x,int y)
2316{
2317 assem_debug("fcmpd d6, d7\n");
2318 output_w32(0xeeb46b47);
2319}
2320
2321void emit_fmstat()
2322{
2323 assem_debug("fmstat\n");
2324 output_w32(0xeef1fa10);
2325}
2326
2327void emit_bicne_imm(int rs,int imm,int rt)
2328{
2329 u_int armval;
2330 assert(genimm(imm,&armval));
2331 assem_debug("bicne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2332 output_w32(0x13c00000|rd_rn_rm(rt,rs,0)|armval);
2333}
2334
2335void emit_biccs_imm(int rs,int imm,int rt)
2336{
2337 u_int armval;
2338 assert(genimm(imm,&armval));
2339 assem_debug("biccs %s,%s,#%d\n",regname[rt],regname[rs],imm);
2340 output_w32(0x23c00000|rd_rn_rm(rt,rs,0)|armval);
2341}
2342
2343void emit_bicvc_imm(int rs,int imm,int rt)
2344{
2345 u_int armval;
2346 assert(genimm(imm,&armval));
2347 assem_debug("bicvc %s,%s,#%d\n",regname[rt],regname[rs],imm);
2348 output_w32(0x73c00000|rd_rn_rm(rt,rs,0)|armval);
2349}
2350
2351void emit_bichi_imm(int rs,int imm,int rt)
2352{
2353 u_int armval;
2354 assert(genimm(imm,&armval));
2355 assem_debug("bichi %s,%s,#%d\n",regname[rt],regname[rs],imm);
2356 output_w32(0x83c00000|rd_rn_rm(rt,rs,0)|armval);
2357}
2358
2359void emit_orrvs_imm(int rs,int imm,int rt)
2360{
2361 u_int armval;
2362 assert(genimm(imm,&armval));
2363 assem_debug("orrvs %s,%s,#%d\n",regname[rt],regname[rs],imm);
2364 output_w32(0x63800000|rd_rn_rm(rt,rs,0)|armval);
2365}
2366
2367void emit_jno_unlikely(int a)
2368{
2369 //emit_jno(a);
2370 assem_debug("addvc pc,pc,#? (%x)\n",/*a-(int)out-8,*/a);
2371 output_w32(0x72800000|rd_rn_rm(15,15,0));
2372}
2373
2374// Save registers before function call
2375void save_regs(u_int reglist)
2376{
2377 reglist&=0x100f; // only save the caller-save registers, r0-r3, r12
2378 if(!reglist) return;
2379 assem_debug("stmia fp,{");
2380 if(reglist&1) assem_debug("r0, ");
2381 if(reglist&2) assem_debug("r1, ");
2382 if(reglist&4) assem_debug("r2, ");
2383 if(reglist&8) assem_debug("r3, ");
2384 if(reglist&0x1000) assem_debug("r12");
2385 assem_debug("}\n");
2386 output_w32(0xe88b0000|reglist);
2387}
2388// Restore registers after function call
2389void restore_regs(u_int reglist)
2390{
2391 reglist&=0x100f; // only restore the caller-save registers, r0-r3, r12
2392 if(!reglist) return;
2393 assem_debug("ldmia fp,{");
2394 if(reglist&1) assem_debug("r0, ");
2395 if(reglist&2) assem_debug("r1, ");
2396 if(reglist&4) assem_debug("r2, ");
2397 if(reglist&8) assem_debug("r3, ");
2398 if(reglist&0x1000) assem_debug("r12");
2399 assem_debug("}\n");
2400 output_w32(0xe89b0000|reglist);
2401}
2402
2403// Write back consts using r14 so we don't disturb the other registers
2404void wb_consts(signed char i_regmap[],uint64_t i_is32,u_int i_dirty,int i)
2405{
2406 int hr;
2407 for(hr=0;hr<HOST_REGS;hr++) {
2408 if(hr!=EXCLUDE_REG&&i_regmap[hr]>=0&&((i_dirty>>hr)&1)) {
2409 if(((regs[i].isconst>>hr)&1)&&i_regmap[hr]>0) {
2410 if(i_regmap[hr]<64 || !((i_is32>>(i_regmap[hr]&63))&1) ) {
2411 int value=constmap[i][hr];
2412 if(value==0) {
2413 emit_zeroreg(HOST_TEMPREG);
2414 }
2415 else {
2416 emit_movimm(value,HOST_TEMPREG);
2417 }
2418 emit_storereg(i_regmap[hr],HOST_TEMPREG);
24385cae 2419#ifndef FORCE32
57871462 2420 if((i_is32>>i_regmap[hr])&1) {
2421 if(value!=-1&&value!=0) emit_sarimm(HOST_TEMPREG,31,HOST_TEMPREG);
2422 emit_storereg(i_regmap[hr]|64,HOST_TEMPREG);
2423 }
24385cae 2424#endif
57871462 2425 }
2426 }
2427 }
2428 }
2429}
2430
2431/* Stubs/epilogue */
2432
2433void literal_pool(int n)
2434{
2435 if(!literalcount) return;
2436 if(n) {
2437 if((int)out-literals[0][0]<4096-n) return;
2438 }
2439 u_int *ptr;
2440 int i;
2441 for(i=0;i<literalcount;i++)
2442 {
2443 ptr=(u_int *)literals[i][0];
2444 u_int offset=(u_int)out-(u_int)ptr-8;
2445 assert(offset<4096);
2446 assert(!(offset&3));
2447 *ptr|=offset;
2448 output_w32(literals[i][1]);
2449 }
2450 literalcount=0;
2451}
2452
2453void literal_pool_jumpover(int n)
2454{
2455 if(!literalcount) return;
2456 if(n) {
2457 if((int)out-literals[0][0]<4096-n) return;
2458 }
2459 int jaddr=(int)out;
2460 emit_jmp(0);
2461 literal_pool(0);
2462 set_jump_target(jaddr,(int)out);
2463}
2464
2465emit_extjump2(int addr, int target, int linker)
2466{
2467 u_char *ptr=(u_char *)addr;
2468 assert((ptr[3]&0x0e)==0xa);
2469 emit_loadlp(target,0);
2470 emit_loadlp(addr,1);
24385cae 2471 assert(addr>=BASE_ADDR&&addr<(BASE_ADDR+(1<<TARGET_SIZE_2)));
57871462 2472 //assert((target>=0x80000000&&target<0x80800000)||(target>0xA4000000&&target<0xA4001000));
2473//DEBUG >
2474#ifdef DEBUG_CYCLE_COUNT
2475 emit_readword((int)&last_count,ECX);
2476 emit_add(HOST_CCREG,ECX,HOST_CCREG);
2477 emit_readword((int)&next_interupt,ECX);
2478 emit_writeword(HOST_CCREG,(int)&Count);
2479 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
2480 emit_writeword(ECX,(int)&last_count);
2481#endif
2482//DEBUG <
2483 emit_jmp(linker);
2484}
2485
2486emit_extjump(int addr, int target)
2487{
2488 emit_extjump2(addr, target, (int)dyna_linker);
2489}
2490emit_extjump_ds(int addr, int target)
2491{
2492 emit_extjump2(addr, target, (int)dyna_linker_ds);
2493}
2494
2495do_readstub(int n)
2496{
2497 assem_debug("do_readstub %x\n",start+stubs[n][3]*4);
2498 literal_pool(256);
2499 set_jump_target(stubs[n][1],(int)out);
2500 int type=stubs[n][0];
2501 int i=stubs[n][3];
2502 int rs=stubs[n][4];
2503 struct regstat *i_regs=(struct regstat *)stubs[n][5];
2504 u_int reglist=stubs[n][7];
2505 signed char *i_regmap=i_regs->regmap;
2506 int addr=get_reg(i_regmap,AGEN1+(i&1));
2507 int rth,rt;
2508 int ds;
2509 if(itype[i]==C1LS||itype[i]==LOADLR) {
2510 rth=get_reg(i_regmap,FTEMP|64);
2511 rt=get_reg(i_regmap,FTEMP);
2512 }else{
2513 rth=get_reg(i_regmap,rt1[i]|64);
2514 rt=get_reg(i_regmap,rt1[i]);
2515 }
2516 assert(rs>=0);
2517 assert(rt>=0);
2518 if(addr<0) addr=rt;
2519 assert(addr>=0);
2520 int ftable=0;
2521 if(type==LOADB_STUB||type==LOADBU_STUB)
2522 ftable=(int)readmemb;
2523 if(type==LOADH_STUB||type==LOADHU_STUB)
2524 ftable=(int)readmemh;
2525 if(type==LOADW_STUB)
2526 ftable=(int)readmem;
24385cae 2527#ifndef FORCE32
57871462 2528 if(type==LOADD_STUB)
2529 ftable=(int)readmemd;
24385cae 2530#endif
2531 assert(ftable!=0);
57871462 2532 emit_writeword(rs,(int)&address);
2533 //emit_pusha();
2534 save_regs(reglist);
2535 ds=i_regs!=&regs[i];
2536 int real_rs=(itype[i]==LOADLR)?-1:get_reg(i_regmap,rs1[i]);
2537 u_int cmask=ds?-1:(0x100f|~i_regs->wasconst);
2538 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&0x100f,i);
2539 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty&cmask&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs)));
2540 if(!ds) wb_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&~0x100f,i);
2541 emit_shrimm(rs,16,1);
2542 int cc=get_reg(i_regmap,CCREG);
2543 if(cc<0) {
2544 emit_loadreg(CCREG,2);
2545 }
2546 emit_movimm(ftable,0);
2547 emit_addimm(cc<0?2:cc,2*stubs[n][6]+2,2);
2548 emit_movimm(start+stubs[n][3]*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
2549 //emit_readword((int)&last_count,temp);
2550 //emit_add(cc,temp,cc);
2551 //emit_writeword(cc,(int)&Count);
2552 //emit_mov(15,14);
2553 emit_call((int)&indirect_jump_indexed);
2554 //emit_callreg(rs);
2555 //emit_readword_dualindexedx4(rs,HOST_TEMPREG,15);
2556 // We really shouldn't need to update the count here,
2557 // but not doing so causes random crashes...
2558 emit_readword((int)&Count,HOST_TEMPREG);
2559 emit_readword((int)&next_interupt,2);
2560 emit_addimm(HOST_TEMPREG,-2*stubs[n][6]-2,HOST_TEMPREG);
2561 emit_writeword(2,(int)&last_count);
2562 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2563 if(cc<0) {
2564 emit_storereg(CCREG,HOST_TEMPREG);
2565 }
2566 //emit_popa();
2567 restore_regs(reglist);
2568 //if((cc=get_reg(regmap,CCREG))>=0) {
2569 // emit_loadreg(CCREG,cc);
2570 //}
2571 if(type==LOADB_STUB)
2572 emit_movsbl((int)&readmem_dword,rt);
2573 if(type==LOADBU_STUB)
2574 emit_movzbl((int)&readmem_dword,rt);
2575 if(type==LOADH_STUB)
2576 emit_movswl((int)&readmem_dword,rt);
2577 if(type==LOADHU_STUB)
2578 emit_movzwl((int)&readmem_dword,rt);
2579 if(type==LOADW_STUB)
2580 emit_readword((int)&readmem_dword,rt);
2581 if(type==LOADD_STUB) {
2582 emit_readword((int)&readmem_dword,rt);
2583 if(rth>=0) emit_readword(((int)&readmem_dword)+4,rth);
2584 }
2585 emit_jmp(stubs[n][2]); // return address
2586}
2587
2588inline_readstub(int type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
2589{
2590 int rs=get_reg(regmap,target);
2591 int rth=get_reg(regmap,target|64);
2592 int rt=get_reg(regmap,target);
2593 assert(rs>=0);
2594 assert(rt>=0);
2595 int ftable=0;
2596 if(type==LOADB_STUB||type==LOADBU_STUB)
2597 ftable=(int)readmemb;
2598 if(type==LOADH_STUB||type==LOADHU_STUB)
2599 ftable=(int)readmemh;
2600 if(type==LOADW_STUB)
2601 ftable=(int)readmem;
24385cae 2602#ifndef FORCE32
57871462 2603 if(type==LOADD_STUB)
2604 ftable=(int)readmemd;
24385cae 2605#endif
2606 assert(ftable!=0);
57871462 2607 emit_writeword(rs,(int)&address);
2608 //emit_pusha();
2609 save_regs(reglist);
2610 //emit_shrimm(rs,16,1);
2611 int cc=get_reg(regmap,CCREG);
2612 if(cc<0) {
2613 emit_loadreg(CCREG,2);
2614 }
2615 //emit_movimm(ftable,0);
2616 emit_movimm(((u_int *)ftable)[addr>>16],0);
2617 //emit_readword((int)&last_count,12);
2618 emit_addimm(cc<0?2:cc,CLOCK_DIVIDER*(adj+1),2);
2619 if((signed int)addr>=(signed int)0xC0000000) {
2620 // Pagefault address
2621 int ds=regmap!=regs[i].regmap;
2622 emit_movimm(start+i*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
2623 }
2624 //emit_add(12,2,2);
2625 //emit_writeword(2,(int)&Count);
2626 //emit_call(((u_int *)ftable)[addr>>16]);
2627 emit_call((int)&indirect_jump);
2628 // We really shouldn't need to update the count here,
2629 // but not doing so causes random crashes...
2630 emit_readword((int)&Count,HOST_TEMPREG);
2631 emit_readword((int)&next_interupt,2);
2632 emit_addimm(HOST_TEMPREG,-CLOCK_DIVIDER*(adj+1),HOST_TEMPREG);
2633 emit_writeword(2,(int)&last_count);
2634 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2635 if(cc<0) {
2636 emit_storereg(CCREG,HOST_TEMPREG);
2637 }
2638 //emit_popa();
2639 restore_regs(reglist);
2640 if(type==LOADB_STUB)
2641 emit_movsbl((int)&readmem_dword,rt);
2642 if(type==LOADBU_STUB)
2643 emit_movzbl((int)&readmem_dword,rt);
2644 if(type==LOADH_STUB)
2645 emit_movswl((int)&readmem_dword,rt);
2646 if(type==LOADHU_STUB)
2647 emit_movzwl((int)&readmem_dword,rt);
2648 if(type==LOADW_STUB)
2649 emit_readword((int)&readmem_dword,rt);
2650 if(type==LOADD_STUB) {
2651 emit_readword((int)&readmem_dword,rt);
2652 if(rth>=0) emit_readword(((int)&readmem_dword)+4,rth);
2653 }
2654}
2655
2656do_writestub(int n)
2657{
2658 assem_debug("do_writestub %x\n",start+stubs[n][3]*4);
2659 literal_pool(256);
2660 set_jump_target(stubs[n][1],(int)out);
2661 int type=stubs[n][0];
2662 int i=stubs[n][3];
2663 int rs=stubs[n][4];
2664 struct regstat *i_regs=(struct regstat *)stubs[n][5];
2665 u_int reglist=stubs[n][7];
2666 signed char *i_regmap=i_regs->regmap;
2667 int addr=get_reg(i_regmap,AGEN1+(i&1));
2668 int rth,rt,r;
2669 int ds;
2670 if(itype[i]==C1LS) {
2671 rth=get_reg(i_regmap,FTEMP|64);
2672 rt=get_reg(i_regmap,r=FTEMP);
2673 }else{
2674 rth=get_reg(i_regmap,rs2[i]|64);
2675 rt=get_reg(i_regmap,r=rs2[i]);
2676 }
2677 assert(rs>=0);
2678 assert(rt>=0);
2679 if(addr<0) addr=get_reg(i_regmap,-1);
2680 assert(addr>=0);
2681 int ftable=0;
2682 if(type==STOREB_STUB)
2683 ftable=(int)writememb;
2684 if(type==STOREH_STUB)
2685 ftable=(int)writememh;
2686 if(type==STOREW_STUB)
2687 ftable=(int)writemem;
24385cae 2688#ifndef FORCE32
57871462 2689 if(type==STORED_STUB)
2690 ftable=(int)writememd;
24385cae 2691#endif
2692 assert(ftable!=0);
57871462 2693 emit_writeword(rs,(int)&address);
2694 //emit_shrimm(rs,16,rs);
2695 //emit_movmem_indexedx4(ftable,rs,rs);
2696 if(type==STOREB_STUB)
2697 emit_writebyte(rt,(int)&byte);
2698 if(type==STOREH_STUB)
2699 emit_writehword(rt,(int)&hword);
2700 if(type==STOREW_STUB)
2701 emit_writeword(rt,(int)&word);
2702 if(type==STORED_STUB) {
3d624f89 2703#ifndef FORCE32
57871462 2704 emit_writeword(rt,(int)&dword);
2705 emit_writeword(r?rth:rt,(int)&dword+4);
3d624f89 2706#else
2707 printf("STORED_STUB\n");
2708#endif
57871462 2709 }
2710 //emit_pusha();
2711 save_regs(reglist);
2712 ds=i_regs!=&regs[i];
2713 int real_rs=get_reg(i_regmap,rs1[i]);
2714 u_int cmask=ds?-1:(0x100f|~i_regs->wasconst);
2715 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&0x100f,i);
2716 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty&cmask&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs)));
2717 if(!ds) wb_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&~0x100f,i);
2718 emit_shrimm(rs,16,1);
2719 int cc=get_reg(i_regmap,CCREG);
2720 if(cc<0) {
2721 emit_loadreg(CCREG,2);
2722 }
2723 emit_movimm(ftable,0);
2724 emit_addimm(cc<0?2:cc,2*stubs[n][6]+2,2);
2725 emit_movimm(start+stubs[n][3]*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
2726 //emit_readword((int)&last_count,temp);
2727 //emit_addimm(cc,2*stubs[n][5]+2,cc);
2728 //emit_add(cc,temp,cc);
2729 //emit_writeword(cc,(int)&Count);
2730 emit_call((int)&indirect_jump_indexed);
2731 //emit_callreg(rs);
2732 emit_readword((int)&Count,HOST_TEMPREG);
2733 emit_readword((int)&next_interupt,2);
2734 emit_addimm(HOST_TEMPREG,-2*stubs[n][6]-2,HOST_TEMPREG);
2735 emit_writeword(2,(int)&last_count);
2736 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2737 if(cc<0) {
2738 emit_storereg(CCREG,HOST_TEMPREG);
2739 }
2740 //emit_popa();
2741 restore_regs(reglist);
2742 //if((cc=get_reg(regmap,CCREG))>=0) {
2743 // emit_loadreg(CCREG,cc);
2744 //}
2745 emit_jmp(stubs[n][2]); // return address
2746}
2747
2748inline_writestub(int type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
2749{
2750 int rs=get_reg(regmap,-1);
2751 int rth=get_reg(regmap,target|64);
2752 int rt=get_reg(regmap,target);
2753 assert(rs>=0);
2754 assert(rt>=0);
2755 int ftable=0;
2756 if(type==STOREB_STUB)
2757 ftable=(int)writememb;
2758 if(type==STOREH_STUB)
2759 ftable=(int)writememh;
2760 if(type==STOREW_STUB)
2761 ftable=(int)writemem;
24385cae 2762#ifndef FORCE32
57871462 2763 if(type==STORED_STUB)
2764 ftable=(int)writememd;
24385cae 2765#endif
2766 assert(ftable!=0);
57871462 2767 emit_writeword(rs,(int)&address);
2768 //emit_shrimm(rs,16,rs);
2769 //emit_movmem_indexedx4(ftable,rs,rs);
2770 if(type==STOREB_STUB)
2771 emit_writebyte(rt,(int)&byte);
2772 if(type==STOREH_STUB)
2773 emit_writehword(rt,(int)&hword);
2774 if(type==STOREW_STUB)
2775 emit_writeword(rt,(int)&word);
2776 if(type==STORED_STUB) {
3d624f89 2777#ifndef FORCE32
57871462 2778 emit_writeword(rt,(int)&dword);
2779 emit_writeword(target?rth:rt,(int)&dword+4);
3d624f89 2780#else
2781 printf("STORED_STUB\n");
2782#endif
57871462 2783 }
2784 //emit_pusha();
2785 save_regs(reglist);
2786 //emit_shrimm(rs,16,1);
2787 int cc=get_reg(regmap,CCREG);
2788 if(cc<0) {
2789 emit_loadreg(CCREG,2);
2790 }
2791 //emit_movimm(ftable,0);
2792 emit_movimm(((u_int *)ftable)[addr>>16],0);
2793 //emit_readword((int)&last_count,12);
2794 emit_addimm(cc<0?2:cc,CLOCK_DIVIDER*(adj+1),2);
2795 if((signed int)addr>=(signed int)0xC0000000) {
2796 // Pagefault address
2797 int ds=regmap!=regs[i].regmap;
2798 emit_movimm(start+i*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
2799 }
2800 //emit_add(12,2,2);
2801 //emit_writeword(2,(int)&Count);
2802 //emit_call(((u_int *)ftable)[addr>>16]);
2803 emit_call((int)&indirect_jump);
2804 emit_readword((int)&Count,HOST_TEMPREG);
2805 emit_readword((int)&next_interupt,2);
2806 emit_addimm(HOST_TEMPREG,-CLOCK_DIVIDER*(adj+1),HOST_TEMPREG);
2807 emit_writeword(2,(int)&last_count);
2808 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2809 if(cc<0) {
2810 emit_storereg(CCREG,HOST_TEMPREG);
2811 }
2812 //emit_popa();
2813 restore_regs(reglist);
2814}
2815
2816do_unalignedwritestub(int n)
2817{
2818 set_jump_target(stubs[n][1],(int)out);
2819 output_w32(0xef000000);
2820 emit_jmp(stubs[n][2]); // return address
2821}
2822
2823void printregs(int edi,int esi,int ebp,int esp,int b,int d,int c,int a)
2824{
2825 printf("regs: %x %x %x %x %x %x %x (%x)\n",a,b,c,d,ebp,esi,edi,(&edi)[-1]);
2826}
2827
2828do_invstub(int n)
2829{
2830 literal_pool(20);
2831 u_int reglist=stubs[n][3];
2832 set_jump_target(stubs[n][1],(int)out);
2833 save_regs(reglist);
2834 if(stubs[n][4]!=0) emit_mov(stubs[n][4],0);
2835 emit_call((int)&invalidate_addr);
2836 restore_regs(reglist);
2837 emit_jmp(stubs[n][2]); // return address
2838}
2839
2840int do_dirty_stub(int i)
2841{
2842 assem_debug("do_dirty_stub %x\n",start+i*4);
2843 // Careful about the code output here, verify_dirty needs to parse it.
2844 #ifdef ARMv5_ONLY
2845 emit_loadlp((int)start<(int)0xC0000000?(int)source:(int)start,1);
2846 emit_loadlp((int)copy,2);
2847 emit_loadlp(slen*4,3);
2848 #else
2849 emit_movw(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0x0000FFFF,1);
2850 emit_movw(((u_int)copy)&0x0000FFFF,2);
2851 emit_movt(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0xFFFF0000,1);
2852 emit_movt(((u_int)copy)&0xFFFF0000,2);
2853 emit_movw(slen*4,3);
2854 #endif
2855 emit_movimm(start+i*4,0);
2856 emit_call((int)start<(int)0xC0000000?(int)&verify_code:(int)&verify_code_vm);
2857 int entry=(int)out;
2858 load_regs_entry(i);
2859 if(entry==(int)out) entry=instr_addr[i];
2860 emit_jmp(instr_addr[i]);
2861 return entry;
2862}
2863
2864void do_dirty_stub_ds()
2865{
2866 // Careful about the code output here, verify_dirty needs to parse it.
2867 #ifdef ARMv5_ONLY
2868 emit_loadlp((int)start<(int)0xC0000000?(int)source:(int)start,1);
2869 emit_loadlp((int)copy,2);
2870 emit_loadlp(slen*4,3);
2871 #else
2872 emit_movw(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0x0000FFFF,1);
2873 emit_movw(((u_int)copy)&0x0000FFFF,2);
2874 emit_movt(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0xFFFF0000,1);
2875 emit_movt(((u_int)copy)&0xFFFF0000,2);
2876 emit_movw(slen*4,3);
2877 #endif
2878 emit_movimm(start+1,0);
2879 emit_call((int)&verify_code_ds);
2880}
2881
2882do_cop1stub(int n)
2883{
2884 literal_pool(256);
2885 assem_debug("do_cop1stub %x\n",start+stubs[n][3]*4);
2886 set_jump_target(stubs[n][1],(int)out);
2887 int i=stubs[n][3];
3d624f89 2888// int rs=stubs[n][4];
57871462 2889 struct regstat *i_regs=(struct regstat *)stubs[n][5];
2890 int ds=stubs[n][6];
2891 if(!ds) {
2892 load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty,i);
2893 //if(i_regs!=&regs[i]) printf("oops: regs[i]=%x i_regs=%x",(int)&regs[i],(int)i_regs);
2894 }
2895 //else {printf("fp exception in delay slot\n");}
2896 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty);
2897 if(regs[i].regmap_entry[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
2898 emit_movimm(start+(i-ds)*4,EAX); // Get PC
2899 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG); // CHECK: is this right? There should probably be an extra cycle...
2900 emit_jmp(ds?(int)fp_exception_ds:(int)fp_exception);
2901}
2902
2903/* TLB */
2904
2905int do_tlb_r(int s,int ar,int map,int x,int a,int shift,int c,u_int addr)
2906{
2907 if(c) {
2908 if((signed int)addr>=(signed int)0xC0000000) {
2909 // address_generation already loaded the const
2910 emit_readword_dualindexedx4(FP,map,map);
2911 }
2912 else
2913 return -1; // No mapping
2914 }
2915 else {
2916 assert(s!=map);
2917 emit_movimm(((int)memory_map-(int)&dynarec_local)>>2,map);
2918 emit_addsr12(map,s,map);
2919 // Schedule this while we wait on the load
2920 //if(x) emit_xorimm(s,x,ar);
2921 if(shift>=0) emit_shlimm(s,3,shift);
2922 if(~a) emit_andimm(s,a,ar);
2923 emit_readword_dualindexedx4(FP,map,map);
2924 }
2925 return map;
2926}
2927int do_tlb_r_branch(int map, int c, u_int addr, int *jaddr)
2928{
2929 if(!c||(signed int)addr>=(signed int)0xC0000000) {
2930 emit_test(map,map);
2931 *jaddr=(int)out;
2932 emit_js(0);
2933 }
2934 return map;
2935}
2936
2937int gen_tlb_addr_r(int ar, int map) {
2938 if(map>=0) {
2939 assem_debug("add %s,%s,%s lsl #2\n",regname[ar],regname[ar],regname[map]);
2940 output_w32(0xe0800100|rd_rn_rm(ar,ar,map));
2941 }
2942}
2943
2944int do_tlb_w(int s,int ar,int map,int x,int c,u_int addr)
2945{
2946 if(c) {
2947 if(addr<0x80800000||addr>=0xC0000000) {
2948 // address_generation already loaded the const
2949 emit_readword_dualindexedx4(FP,map,map);
2950 }
2951 else
2952 return -1; // No mapping
2953 }
2954 else {
2955 assert(s!=map);
2956 emit_movimm(((int)memory_map-(int)&dynarec_local)>>2,map);
2957 emit_addsr12(map,s,map);
2958 // Schedule this while we wait on the load
2959 //if(x) emit_xorimm(s,x,ar);
2960 emit_readword_dualindexedx4(FP,map,map);
2961 }
2962 return map;
2963}
2964int do_tlb_w_branch(int map, int c, u_int addr, int *jaddr)
2965{
2966 if(!c||addr<0x80800000||addr>=0xC0000000) {
2967 emit_testimm(map,0x40000000);
2968 *jaddr=(int)out;
2969 emit_jne(0);
2970 }
2971}
2972
2973int gen_tlb_addr_w(int ar, int map) {
2974 if(map>=0) {
2975 assem_debug("add %s,%s,%s lsl #2\n",regname[ar],regname[ar],regname[map]);
2976 output_w32(0xe0800100|rd_rn_rm(ar,ar,map));
2977 }
2978}
2979
2980// Generate the address of the memory_map entry, relative to dynarec_local
2981generate_map_const(u_int addr,int reg) {
2982 //printf("generate_map_const(%x,%s)\n",addr,regname[reg]);
2983 emit_movimm((addr>>12)+(((u_int)memory_map-(u_int)&dynarec_local)>>2),reg);
2984}
2985
2986/* Special assem */
2987
2988void shift_assemble_arm(int i,struct regstat *i_regs)
2989{
2990 if(rt1[i]) {
2991 if(opcode2[i]<=0x07) // SLLV/SRLV/SRAV
2992 {
2993 signed char s,t,shift;
2994 t=get_reg(i_regs->regmap,rt1[i]);
2995 s=get_reg(i_regs->regmap,rs1[i]);
2996 shift=get_reg(i_regs->regmap,rs2[i]);
2997 if(t>=0){
2998 if(rs1[i]==0)
2999 {
3000 emit_zeroreg(t);
3001 }
3002 else if(rs2[i]==0)
3003 {
3004 assert(s>=0);
3005 if(s!=t) emit_mov(s,t);
3006 }
3007 else
3008 {
3009 emit_andimm(shift,31,HOST_TEMPREG);
3010 if(opcode2[i]==4) // SLLV
3011 {
3012 emit_shl(s,HOST_TEMPREG,t);
3013 }
3014 if(opcode2[i]==6) // SRLV
3015 {
3016 emit_shr(s,HOST_TEMPREG,t);
3017 }
3018 if(opcode2[i]==7) // SRAV
3019 {
3020 emit_sar(s,HOST_TEMPREG,t);
3021 }
3022 }
3023 }
3024 } else { // DSLLV/DSRLV/DSRAV
3025 signed char sh,sl,th,tl,shift;
3026 th=get_reg(i_regs->regmap,rt1[i]|64);
3027 tl=get_reg(i_regs->regmap,rt1[i]);
3028 sh=get_reg(i_regs->regmap,rs1[i]|64);
3029 sl=get_reg(i_regs->regmap,rs1[i]);
3030 shift=get_reg(i_regs->regmap,rs2[i]);
3031 if(tl>=0){
3032 if(rs1[i]==0)
3033 {
3034 emit_zeroreg(tl);
3035 if(th>=0) emit_zeroreg(th);
3036 }
3037 else if(rs2[i]==0)
3038 {
3039 assert(sl>=0);
3040 if(sl!=tl) emit_mov(sl,tl);
3041 if(th>=0&&sh!=th) emit_mov(sh,th);
3042 }
3043 else
3044 {
3045 // FIXME: What if shift==tl ?
3046 assert(shift!=tl);
3047 int temp=get_reg(i_regs->regmap,-1);
3048 int real_th=th;
3049 if(th<0&&opcode2[i]!=0x14) {th=temp;} // DSLLV doesn't need a temporary register
3050 assert(sl>=0);
3051 assert(sh>=0);
3052 emit_andimm(shift,31,HOST_TEMPREG);
3053 if(opcode2[i]==0x14) // DSLLV
3054 {
3055 if(th>=0) emit_shl(sh,HOST_TEMPREG,th);
3056 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3057 emit_orrshr(sl,HOST_TEMPREG,th);
3058 emit_andimm(shift,31,HOST_TEMPREG);
3059 emit_testimm(shift,32);
3060 emit_shl(sl,HOST_TEMPREG,tl);
3061 if(th>=0) emit_cmovne_reg(tl,th);
3062 emit_cmovne_imm(0,tl);
3063 }
3064 if(opcode2[i]==0x16) // DSRLV
3065 {
3066 assert(th>=0);
3067 emit_shr(sl,HOST_TEMPREG,tl);
3068 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3069 emit_orrshl(sh,HOST_TEMPREG,tl);
3070 emit_andimm(shift,31,HOST_TEMPREG);
3071 emit_testimm(shift,32);
3072 emit_shr(sh,HOST_TEMPREG,th);
3073 emit_cmovne_reg(th,tl);
3074 if(real_th>=0) emit_cmovne_imm(0,th);
3075 }
3076 if(opcode2[i]==0x17) // DSRAV
3077 {
3078 assert(th>=0);
3079 emit_shr(sl,HOST_TEMPREG,tl);
3080 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3081 if(real_th>=0) {
3082 assert(temp>=0);
3083 emit_sarimm(th,31,temp);
3084 }
3085 emit_orrshl(sh,HOST_TEMPREG,tl);
3086 emit_andimm(shift,31,HOST_TEMPREG);
3087 emit_testimm(shift,32);
3088 emit_sar(sh,HOST_TEMPREG,th);
3089 emit_cmovne_reg(th,tl);
3090 if(real_th>=0) emit_cmovne_reg(temp,th);
3091 }
3092 }
3093 }
3094 }
3095 }
3096}
3097#define shift_assemble shift_assemble_arm
3098
3099void loadlr_assemble_arm(int i,struct regstat *i_regs)
3100{
3101 int s,th,tl,temp,temp2,addr,map=-1;
3102 int offset;
3103 int jaddr=0;
3104 int memtarget,c=0;
3105 u_int hr,reglist=0;
3106 th=get_reg(i_regs->regmap,rt1[i]|64);
3107 tl=get_reg(i_regs->regmap,rt1[i]);
3108 s=get_reg(i_regs->regmap,rs1[i]);
3109 temp=get_reg(i_regs->regmap,-1);
3110 temp2=get_reg(i_regs->regmap,FTEMP);
3111 addr=get_reg(i_regs->regmap,AGEN1+(i&1));
3112 assert(addr<0);
3113 offset=imm[i];
3114 for(hr=0;hr<HOST_REGS;hr++) {
3115 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
3116 }
3117 reglist|=1<<temp;
3118 if(offset||s<0||c) addr=temp2;
3119 else addr=s;
3120 if(s>=0) {
3121 c=(i_regs->wasconst>>s)&1;
3122 memtarget=((signed int)(constmap[i][s]+offset))<(signed int)0x80800000;
3123 if(using_tlb&&((signed int)(constmap[i][s]+offset))>=(signed int)0xC0000000) memtarget=1;
3124 }
3125 if(tl>=0) {
3126 //assert(tl>=0);
3127 //assert(rt1[i]);
3128 if(!using_tlb) {
3129 if(!c) {
3130 emit_shlimm(addr,3,temp);
3131 if (opcode[i]==0x22||opcode[i]==0x26) {
3132 emit_andimm(addr,0xFFFFFFFC,temp2); // LWL/LWR
3133 }else{
3134 emit_andimm(addr,0xFFFFFFF8,temp2); // LDL/LDR
3135 }
3136 emit_cmpimm(addr,0x800000);
3137 jaddr=(int)out;
3138 emit_jno(0);
3139 }
3140 else {
3141 if (opcode[i]==0x22||opcode[i]==0x26) {
3142 emit_movimm(((constmap[i][s]+offset)<<3)&24,temp); // LWL/LWR
3143 }else{
3144 emit_movimm(((constmap[i][s]+offset)<<3)&56,temp); // LDL/LDR
3145 }
3146 }
3147 }else{ // using tlb
3148 int a;
3149 if(c) {
3150 a=-1;
3151 }else if (opcode[i]==0x22||opcode[i]==0x26) {
3152 a=0xFFFFFFFC; // LWL/LWR
3153 }else{
3154 a=0xFFFFFFF8; // LDL/LDR
3155 }
3156 map=get_reg(i_regs->regmap,TLREG);
3157 assert(map>=0);
3158 map=do_tlb_r(addr,temp2,map,0,a,c?-1:temp,c,constmap[i][s]+offset);
3159 if(c) {
3160 if (opcode[i]==0x22||opcode[i]==0x26) {
3161 emit_movimm(((constmap[i][s]+offset)<<3)&24,temp); // LWL/LWR
3162 }else{
3163 emit_movimm(((constmap[i][s]+offset)<<3)&56,temp); // LDL/LDR
3164 }
3165 }
3166 do_tlb_r_branch(map,c,constmap[i][s]+offset,&jaddr);
3167 }
3168 if (opcode[i]==0x22||opcode[i]==0x26) { // LWL/LWR
3169 if(!c||memtarget) {
3170 //emit_readword_indexed((int)rdram-0x80000000,temp2,temp2);
3171 emit_readword_indexed_tlb((int)rdram-0x80000000,temp2,map,temp2);
3172 if(jaddr) add_stub(LOADW_STUB,jaddr,(int)out,i,temp2,(int)i_regs,ccadj[i],reglist);
3173 }
3174 else
3175 inline_readstub(LOADW_STUB,i,(constmap[i][s]+offset)&0xFFFFFFFC,i_regs->regmap,FTEMP,ccadj[i],reglist);
3176 emit_andimm(temp,24,temp);
2002a1db 3177#ifdef BIG_ENDIAN_MIPS
3178 if (opcode[i]==0x26) // LWR
3179#else
3180 if (opcode[i]==0x22) // LWL
3181#endif
3182 emit_xorimm(temp,24,temp);
57871462 3183 emit_movimm(-1,HOST_TEMPREG);
3184 if (opcode[i]==0x26) {
3185 emit_shr(temp2,temp,temp2);
3186 emit_bic_lsr(tl,HOST_TEMPREG,temp,tl);
3187 }else{
3188 emit_shl(temp2,temp,temp2);
3189 emit_bic_lsl(tl,HOST_TEMPREG,temp,tl);
3190 }
3191 emit_or(temp2,tl,tl);
3192 //emit_storereg(rt1[i],tl); // DEBUG
3193 }
3194 if (opcode[i]==0x1A||opcode[i]==0x1B) { // LDL/LDR
2002a1db 3195 // FIXME: little endian
57871462 3196 int temp2h=get_reg(i_regs->regmap,FTEMP|64);
3197 if(!c||memtarget) {
3198 //if(th>=0) emit_readword_indexed((int)rdram-0x80000000,temp2,temp2h);
3199 //emit_readword_indexed((int)rdram-0x7FFFFFFC,temp2,temp2);
3200 emit_readdword_indexed_tlb((int)rdram-0x80000000,temp2,map,temp2h,temp2);
3201 if(jaddr) add_stub(LOADD_STUB,jaddr,(int)out,i,temp2,(int)i_regs,ccadj[i],reglist);
3202 }
3203 else
3204 inline_readstub(LOADD_STUB,i,(constmap[i][s]+offset)&0xFFFFFFF8,i_regs->regmap,FTEMP,ccadj[i],reglist);
3205 emit_testimm(temp,32);
3206 emit_andimm(temp,24,temp);
3207 if (opcode[i]==0x1A) { // LDL
3208 emit_rsbimm(temp,32,HOST_TEMPREG);
3209 emit_shl(temp2h,temp,temp2h);
3210 emit_orrshr(temp2,HOST_TEMPREG,temp2h);
3211 emit_movimm(-1,HOST_TEMPREG);
3212 emit_shl(temp2,temp,temp2);
3213 emit_cmove_reg(temp2h,th);
3214 emit_biceq_lsl(tl,HOST_TEMPREG,temp,tl);
3215 emit_bicne_lsl(th,HOST_TEMPREG,temp,th);
3216 emit_orreq(temp2,tl,tl);
3217 emit_orrne(temp2,th,th);
3218 }
3219 if (opcode[i]==0x1B) { // LDR
3220 emit_xorimm(temp,24,temp);
3221 emit_rsbimm(temp,32,HOST_TEMPREG);
3222 emit_shr(temp2,temp,temp2);
3223 emit_orrshl(temp2h,HOST_TEMPREG,temp2);
3224 emit_movimm(-1,HOST_TEMPREG);
3225 emit_shr(temp2h,temp,temp2h);
3226 emit_cmovne_reg(temp2,tl);
3227 emit_bicne_lsr(th,HOST_TEMPREG,temp,th);
3228 emit_biceq_lsr(tl,HOST_TEMPREG,temp,tl);
3229 emit_orrne(temp2h,th,th);
3230 emit_orreq(temp2h,tl,tl);
3231 }
3232 }
3233 }
3234}
3235#define loadlr_assemble loadlr_assemble_arm
3236
3237void cop0_assemble(int i,struct regstat *i_regs)
3238{
3239 if(opcode2[i]==0) // MFC0
3240 {
3241 signed char t=get_reg(i_regs->regmap,rt1[i]);
3242 char copr=(source[i]>>11)&0x1f;
3243 //assert(t>=0); // Why does this happen? OOT is weird
3244 if(t>=0) {
7139f3c8 3245#ifdef MUPEN64
57871462 3246 emit_addimm(FP,(int)&fake_pc-(int)&dynarec_local,0);
3247 emit_movimm((source[i]>>11)&0x1f,1);
3248 emit_writeword(0,(int)&PC);
3249 emit_writebyte(1,(int)&(fake_pc.f.r.nrd));
3250 if(copr==9) {
3251 emit_readword((int)&last_count,ECX);
3252 emit_loadreg(CCREG,HOST_CCREG); // TODO: do proper reg alloc
3253 emit_add(HOST_CCREG,ECX,HOST_CCREG);
3254 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG);
3255 emit_writeword(HOST_CCREG,(int)&Count);
3256 }
3257 emit_call((int)MFC0);
3258 emit_readword((int)&readmem_dword,t);
7139f3c8 3259#else
3260 emit_readword((int)&reg_cop0+copr*4,t);
3261#endif
57871462 3262 }
3263 }
3264 else if(opcode2[i]==4) // MTC0
3265 {
3266 signed char s=get_reg(i_regs->regmap,rs1[i]);
3267 char copr=(source[i]>>11)&0x1f;
3268 assert(s>=0);
3269 emit_writeword(s,(int)&readmem_dword);
3270 wb_register(rs1[i],i_regs->regmap,i_regs->dirty,i_regs->is32);
3d624f89 3271#ifdef MUPEN64 /// FIXME
57871462 3272 emit_addimm(FP,(int)&fake_pc-(int)&dynarec_local,0);
3273 emit_movimm((source[i]>>11)&0x1f,1);
3274 emit_writeword(0,(int)&PC);
3275 emit_writebyte(1,(int)&(fake_pc.f.r.nrd));
3d624f89 3276#endif
7139f3c8 3277#ifdef PCSX
3278 emit_movimm(source[i],0);
3279 emit_writeword(0,(int)&psxRegs.code);
3280#endif
3281 if(copr==9||copr==11||copr==12||copr==13) {
57871462 3282 emit_readword((int)&last_count,ECX);
3283 emit_loadreg(CCREG,HOST_CCREG); // TODO: do proper reg alloc
3284 emit_add(HOST_CCREG,ECX,HOST_CCREG);
3285 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG);
3286 emit_writeword(HOST_CCREG,(int)&Count);
3287 }
3288 // What a mess. The status register (12) can enable interrupts,
3289 // so needs a special case to handle a pending interrupt.
3290 // The interrupt must be taken immediately, because a subsequent
3291 // instruction might disable interrupts again.
7139f3c8 3292 if(copr==12||copr==13) {
57871462 3293 emit_movimm(start+i*4+4,0);
3294 emit_movimm(0,1);
3295 emit_writeword(0,(int)&pcaddr);
3296 emit_writeword(1,(int)&pending_exception);
3297 }
3298 //else if(copr==12&&is_delayslot) emit_call((int)MTC0_R12);
3299 //else
3300 emit_call((int)MTC0);
7139f3c8 3301 if(copr==9||copr==11||copr==12||copr==13) {
57871462 3302 emit_readword((int)&Count,HOST_CCREG);
3303 emit_readword((int)&next_interupt,ECX);
3304 emit_addimm(HOST_CCREG,-CLOCK_DIVIDER*ccadj[i],HOST_CCREG);
3305 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
3306 emit_writeword(ECX,(int)&last_count);
3307 emit_storereg(CCREG,HOST_CCREG);
3308 }
7139f3c8 3309 if(copr==12||copr==13) {
57871462 3310 assert(!is_delayslot);
3311 emit_readword((int)&pending_exception,14);
3312 }
3313 emit_loadreg(rs1[i],s);
3314 if(get_reg(i_regs->regmap,rs1[i]|64)>=0)
3315 emit_loadreg(rs1[i]|64,get_reg(i_regs->regmap,rs1[i]|64));
7139f3c8 3316 if(copr==12||copr==13) {
57871462 3317 emit_test(14,14);
3318 emit_jne((int)&do_interrupt);
3319 }
3320 cop1_usable=0;
3321 }
3322 else
3323 {
3324 assert(opcode2[i]==0x10);
3d624f89 3325#ifndef DISABLE_TLB
57871462 3326 if((source[i]&0x3f)==0x01) // TLBR
3327 emit_call((int)TLBR);
3328 if((source[i]&0x3f)==0x02) // TLBWI
3329 emit_call((int)TLBWI_new);
3330 if((source[i]&0x3f)==0x06) { // TLBWR
3331 // The TLB entry written by TLBWR is dependent on the count,
3332 // so update the cycle count
3333 emit_readword((int)&last_count,ECX);
3334 if(i_regs->regmap[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
3335 emit_add(HOST_CCREG,ECX,HOST_CCREG);
3336 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG);
3337 emit_writeword(HOST_CCREG,(int)&Count);
3338 emit_call((int)TLBWR_new);
3339 }
3340 if((source[i]&0x3f)==0x08) // TLBP
3341 emit_call((int)TLBP);
3d624f89 3342#endif
57871462 3343 if((source[i]&0x3f)==0x18) // ERET
3344 {
3345 int count=ccadj[i];
3346 if(i_regs->regmap[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
3347 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*count,HOST_CCREG); // TODO: Should there be an extra cycle here?
3348 emit_jmp((int)jump_eret);
3349 }
3350 }
3351}
3352
3d624f89 3353void cop1_unusable(int i, struct regstat *i_regs)
3354{
3355 // XXX: should just just do the exception instead
3356 if(!cop1_usable) {
3357 int jaddr=(int)out;
3358 emit_jmp(0);
3359 add_stub(FP_STUB,jaddr,(int)out,i,0,(int)i_regs,is_delayslot,0);
3360 cop1_usable=1;
3361 }
3362}
3363
57871462 3364void cop1_assemble(int i,struct regstat *i_regs)
3365{
3d624f89 3366#ifndef DISABLE_COP1
57871462 3367 // Check cop1 unusable
3368 if(!cop1_usable) {
3369 signed char rs=get_reg(i_regs->regmap,CSREG);
3370 assert(rs>=0);
3371 emit_testimm(rs,0x20000000);
3372 int jaddr=(int)out;
3373 emit_jeq(0);
3374 add_stub(FP_STUB,jaddr,(int)out,i,rs,(int)i_regs,is_delayslot,0);
3375 cop1_usable=1;
3376 }
3377 if (opcode2[i]==0) { // MFC1
3378 signed char tl=get_reg(i_regs->regmap,rt1[i]);
3379 if(tl>=0) {
3380 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],tl);
3381 emit_readword_indexed(0,tl,tl);
3382 }
3383 }
3384 else if (opcode2[i]==1) { // DMFC1
3385 signed char tl=get_reg(i_regs->regmap,rt1[i]);
3386 signed char th=get_reg(i_regs->regmap,rt1[i]|64);
3387 if(tl>=0) {
3388 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],tl);
3389 if(th>=0) emit_readword_indexed(4,tl,th);
3390 emit_readword_indexed(0,tl,tl);
3391 }
3392 }
3393 else if (opcode2[i]==4) { // MTC1
3394 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3395 signed char temp=get_reg(i_regs->regmap,-1);
3396 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
3397 emit_writeword_indexed(sl,0,temp);
3398 }
3399 else if (opcode2[i]==5) { // DMTC1
3400 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3401 signed char sh=rs1[i]>0?get_reg(i_regs->regmap,rs1[i]|64):sl;
3402 signed char temp=get_reg(i_regs->regmap,-1);
3403 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
3404 emit_writeword_indexed(sh,4,temp);
3405 emit_writeword_indexed(sl,0,temp);
3406 }
3407 else if (opcode2[i]==2) // CFC1
3408 {
3409 signed char tl=get_reg(i_regs->regmap,rt1[i]);
3410 if(tl>=0) {
3411 u_int copr=(source[i]>>11)&0x1f;
3412 if(copr==0) emit_readword((int)&FCR0,tl);
3413 if(copr==31) emit_readword((int)&FCR31,tl);
3414 }
3415 }
3416 else if (opcode2[i]==6) // CTC1
3417 {
3418 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3419 u_int copr=(source[i]>>11)&0x1f;
3420 assert(sl>=0);
3421 if(copr==31)
3422 {
3423 emit_writeword(sl,(int)&FCR31);
3424 // Set the rounding mode
3425 //FIXME
3426 //char temp=get_reg(i_regs->regmap,-1);
3427 //emit_andimm(sl,3,temp);
3428 //emit_fldcw_indexed((int)&rounding_modes,temp);
3429 }
3430 }
3d624f89 3431#else
3432 cop1_unusable(i, i_regs);
3433#endif
57871462 3434}
3435
3436void fconv_assemble_arm(int i,struct regstat *i_regs)
3437{
3d624f89 3438#ifndef DISABLE_COP1
57871462 3439 signed char temp=get_reg(i_regs->regmap,-1);
3440 assert(temp>=0);
3441 // Check cop1 unusable
3442 if(!cop1_usable) {
3443 signed char rs=get_reg(i_regs->regmap,CSREG);
3444 assert(rs>=0);
3445 emit_testimm(rs,0x20000000);
3446 int jaddr=(int)out;
3447 emit_jeq(0);
3448 add_stub(FP_STUB,jaddr,(int)out,i,rs,(int)i_regs,is_delayslot,0);
3449 cop1_usable=1;
3450 }
3451
3452 #if(defined(__VFP_FP__) && !defined(__SOFTFP__))
3453 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0d) { // trunc_w_s
3454 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
3455 emit_flds(temp,15);
3456 emit_ftosizs(15,15); // float->int, truncate
3457 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f))
3458 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
3459 emit_fsts(15,temp);
3460 return;
3461 }
3462 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0d) { // trunc_w_d
3463 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
3464 emit_vldr(temp,7);
3465 emit_ftosizd(7,13); // double->int, truncate
3466 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
3467 emit_fsts(13,temp);
3468 return;
3469 }
3470
3471 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x20) { // cvt_s_w
3472 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
3473 emit_flds(temp,13);
3474 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f))
3475 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
3476 emit_fsitos(13,15);
3477 emit_fsts(15,temp);
3478 return;
3479 }
3480 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x21) { // cvt_d_w
3481 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
3482 emit_flds(temp,13);
3483 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
3484 emit_fsitod(13,7);
3485 emit_vstr(7,temp);
3486 return;
3487 }
3488
3489 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x21) { // cvt_d_s
3490 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
3491 emit_flds(temp,13);
3492 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
3493 emit_fcvtds(13,7);
3494 emit_vstr(7,temp);
3495 return;
3496 }
3497 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x20) { // cvt_s_d
3498 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
3499 emit_vldr(temp,7);
3500 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
3501 emit_fcvtsd(7,13);
3502 emit_fsts(13,temp);
3503 return;
3504 }
3505 #endif
3506
3507 // C emulation code
3508
3509 u_int hr,reglist=0;
3510 for(hr=0;hr<HOST_REGS;hr++) {
3511 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
3512 }
3513 save_regs(reglist);
3514
3515 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x20) {
3516 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3517 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3518 emit_call((int)cvt_s_w);
3519 }
3520 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x21) {
3521 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3522 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3523 emit_call((int)cvt_d_w);
3524 }
3525 if(opcode2[i]==0x15&&(source[i]&0x3f)==0x20) {
3526 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3527 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3528 emit_call((int)cvt_s_l);
3529 }
3530 if(opcode2[i]==0x15&&(source[i]&0x3f)==0x21) {
3531 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3532 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3533 emit_call((int)cvt_d_l);
3534 }
3535
3536 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x21) {
3537 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3538 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3539 emit_call((int)cvt_d_s);
3540 }
3541 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x24) {
3542 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3543 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3544 emit_call((int)cvt_w_s);
3545 }
3546 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x25) {
3547 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3548 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3549 emit_call((int)cvt_l_s);
3550 }
3551
3552 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x20) {
3553 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3554 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3555 emit_call((int)cvt_s_d);
3556 }
3557 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x24) {
3558 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3559 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3560 emit_call((int)cvt_w_d);
3561 }
3562 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x25) {
3563 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3564 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3565 emit_call((int)cvt_l_d);
3566 }
3567
3568 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x08) {
3569 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3570 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3571 emit_call((int)round_l_s);
3572 }
3573 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x09) {
3574 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3575 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3576 emit_call((int)trunc_l_s);
3577 }
3578 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0a) {
3579 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3580 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3581 emit_call((int)ceil_l_s);
3582 }
3583 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0b) {
3584 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3585 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3586 emit_call((int)floor_l_s);
3587 }
3588 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0c) {
3589 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3590 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3591 emit_call((int)round_w_s);
3592 }
3593 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0d) {
3594 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3595 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3596 emit_call((int)trunc_w_s);
3597 }
3598 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0e) {
3599 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3600 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3601 emit_call((int)ceil_w_s);
3602 }
3603 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0f) {
3604 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3605 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3606 emit_call((int)floor_w_s);
3607 }
3608
3609 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x08) {
3610 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3611 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3612 emit_call((int)round_l_d);
3613 }
3614 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x09) {
3615 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3616 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3617 emit_call((int)trunc_l_d);
3618 }
3619 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0a) {
3620 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3621 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3622 emit_call((int)ceil_l_d);
3623 }
3624 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0b) {
3625 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3626 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3627 emit_call((int)floor_l_d);
3628 }
3629 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0c) {
3630 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3631 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3632 emit_call((int)round_w_d);
3633 }
3634 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0d) {
3635 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3636 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3637 emit_call((int)trunc_w_d);
3638 }
3639 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0e) {
3640 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3641 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3642 emit_call((int)ceil_w_d);
3643 }
3644 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0f) {
3645 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3646 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3647 emit_call((int)floor_w_d);
3648 }
3649
3650 restore_regs(reglist);
3d624f89 3651#else
3652 cop1_unusable(i, i_regs);
3653#endif
57871462 3654}
3655#define fconv_assemble fconv_assemble_arm
3656
3657void fcomp_assemble(int i,struct regstat *i_regs)
3658{
3d624f89 3659#ifndef DISABLE_COP1
57871462 3660 signed char fs=get_reg(i_regs->regmap,FSREG);
3661 signed char temp=get_reg(i_regs->regmap,-1);
3662 assert(temp>=0);
3663 // Check cop1 unusable
3664 if(!cop1_usable) {
3665 signed char cs=get_reg(i_regs->regmap,CSREG);
3666 assert(cs>=0);
3667 emit_testimm(cs,0x20000000);
3668 int jaddr=(int)out;
3669 emit_jeq(0);
3670 add_stub(FP_STUB,jaddr,(int)out,i,cs,(int)i_regs,is_delayslot,0);
3671 cop1_usable=1;
3672 }
3673
3674 if((source[i]&0x3f)==0x30) {
3675 emit_andimm(fs,~0x800000,fs);
3676 return;
3677 }
3678
3679 if((source[i]&0x3e)==0x38) {
3680 // sf/ngle - these should throw exceptions for NaNs
3681 emit_andimm(fs,~0x800000,fs);
3682 return;
3683 }
3684
3685 #if(defined(__VFP_FP__) && !defined(__SOFTFP__))
3686 if(opcode2[i]==0x10) {
3687 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
3688 emit_readword((int)&reg_cop1_simple[(source[i]>>16)&0x1f],HOST_TEMPREG);
3689 emit_orimm(fs,0x800000,fs);
3690 emit_flds(temp,14);
3691 emit_flds(HOST_TEMPREG,15);
3692 emit_fcmps(14,15);
3693 emit_fmstat();
3694 if((source[i]&0x3f)==0x31) emit_bicvc_imm(fs,0x800000,fs); // c_un_s
3695 if((source[i]&0x3f)==0x32) emit_bicne_imm(fs,0x800000,fs); // c_eq_s
3696 if((source[i]&0x3f)==0x33) {emit_bicne_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ueq_s
3697 if((source[i]&0x3f)==0x34) emit_biccs_imm(fs,0x800000,fs); // c_olt_s
3698 if((source[i]&0x3f)==0x35) {emit_biccs_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ult_s
3699 if((source[i]&0x3f)==0x36) emit_bichi_imm(fs,0x800000,fs); // c_ole_s
3700 if((source[i]&0x3f)==0x37) {emit_bichi_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ule_s
3701 if((source[i]&0x3f)==0x3a) emit_bicne_imm(fs,0x800000,fs); // c_seq_s
3702 if((source[i]&0x3f)==0x3b) emit_bicne_imm(fs,0x800000,fs); // c_ngl_s
3703 if((source[i]&0x3f)==0x3c) emit_biccs_imm(fs,0x800000,fs); // c_lt_s
3704 if((source[i]&0x3f)==0x3d) emit_biccs_imm(fs,0x800000,fs); // c_nge_s
3705 if((source[i]&0x3f)==0x3e) emit_bichi_imm(fs,0x800000,fs); // c_le_s
3706 if((source[i]&0x3f)==0x3f) emit_bichi_imm(fs,0x800000,fs); // c_ngt_s
3707 return;
3708 }
3709 if(opcode2[i]==0x11) {
3710 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
3711 emit_readword((int)&reg_cop1_double[(source[i]>>16)&0x1f],HOST_TEMPREG);
3712 emit_orimm(fs,0x800000,fs);
3713 emit_vldr(temp,6);
3714 emit_vldr(HOST_TEMPREG,7);
3715 emit_fcmpd(6,7);
3716 emit_fmstat();
3717 if((source[i]&0x3f)==0x31) emit_bicvc_imm(fs,0x800000,fs); // c_un_d
3718 if((source[i]&0x3f)==0x32) emit_bicne_imm(fs,0x800000,fs); // c_eq_d
3719 if((source[i]&0x3f)==0x33) {emit_bicne_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ueq_d
3720 if((source[i]&0x3f)==0x34) emit_biccs_imm(fs,0x800000,fs); // c_olt_d
3721 if((source[i]&0x3f)==0x35) {emit_biccs_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ult_d
3722 if((source[i]&0x3f)==0x36) emit_bichi_imm(fs,0x800000,fs); // c_ole_d
3723 if((source[i]&0x3f)==0x37) {emit_bichi_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ule_d
3724 if((source[i]&0x3f)==0x3a) emit_bicne_imm(fs,0x800000,fs); // c_seq_d
3725 if((source[i]&0x3f)==0x3b) emit_bicne_imm(fs,0x800000,fs); // c_ngl_d
3726 if((source[i]&0x3f)==0x3c) emit_biccs_imm(fs,0x800000,fs); // c_lt_d
3727 if((source[i]&0x3f)==0x3d) emit_biccs_imm(fs,0x800000,fs); // c_nge_d
3728 if((source[i]&0x3f)==0x3e) emit_bichi_imm(fs,0x800000,fs); // c_le_d
3729 if((source[i]&0x3f)==0x3f) emit_bichi_imm(fs,0x800000,fs); // c_ngt_d
3730 return;
3731 }
3732 #endif
3733
3734 // C only
3735
3736 u_int hr,reglist=0;
3737 for(hr=0;hr<HOST_REGS;hr++) {
3738 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
3739 }
3740 reglist&=~(1<<fs);
3741 save_regs(reglist);
3742 if(opcode2[i]==0x10) {
3743 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3744 emit_readword((int)&reg_cop1_simple[(source[i]>>16)&0x1f],ARG2_REG);
3745 if((source[i]&0x3f)==0x30) emit_call((int)c_f_s);
3746 if((source[i]&0x3f)==0x31) emit_call((int)c_un_s);
3747 if((source[i]&0x3f)==0x32) emit_call((int)c_eq_s);
3748 if((source[i]&0x3f)==0x33) emit_call((int)c_ueq_s);
3749 if((source[i]&0x3f)==0x34) emit_call((int)c_olt_s);
3750 if((source[i]&0x3f)==0x35) emit_call((int)c_ult_s);
3751 if((source[i]&0x3f)==0x36) emit_call((int)c_ole_s);
3752 if((source[i]&0x3f)==0x37) emit_call((int)c_ule_s);
3753 if((source[i]&0x3f)==0x38) emit_call((int)c_sf_s);
3754 if((source[i]&0x3f)==0x39) emit_call((int)c_ngle_s);
3755 if((source[i]&0x3f)==0x3a) emit_call((int)c_seq_s);
3756 if((source[i]&0x3f)==0x3b) emit_call((int)c_ngl_s);
3757 if((source[i]&0x3f)==0x3c) emit_call((int)c_lt_s);
3758 if((source[i]&0x3f)==0x3d) emit_call((int)c_nge_s);
3759 if((source[i]&0x3f)==0x3e) emit_call((int)c_le_s);
3760 if((source[i]&0x3f)==0x3f) emit_call((int)c_ngt_s);
3761 }
3762 if(opcode2[i]==0x11) {
3763 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3764 emit_readword((int)&reg_cop1_double[(source[i]>>16)&0x1f],ARG2_REG);
3765 if((source[i]&0x3f)==0x30) emit_call((int)c_f_d);
3766 if((source[i]&0x3f)==0x31) emit_call((int)c_un_d);
3767 if((source[i]&0x3f)==0x32) emit_call((int)c_eq_d);
3768 if((source[i]&0x3f)==0x33) emit_call((int)c_ueq_d);
3769 if((source[i]&0x3f)==0x34) emit_call((int)c_olt_d);
3770 if((source[i]&0x3f)==0x35) emit_call((int)c_ult_d);
3771 if((source[i]&0x3f)==0x36) emit_call((int)c_ole_d);
3772 if((source[i]&0x3f)==0x37) emit_call((int)c_ule_d);
3773 if((source[i]&0x3f)==0x38) emit_call((int)c_sf_d);
3774 if((source[i]&0x3f)==0x39) emit_call((int)c_ngle_d);
3775 if((source[i]&0x3f)==0x3a) emit_call((int)c_seq_d);
3776 if((source[i]&0x3f)==0x3b) emit_call((int)c_ngl_d);
3777 if((source[i]&0x3f)==0x3c) emit_call((int)c_lt_d);
3778 if((source[i]&0x3f)==0x3d) emit_call((int)c_nge_d);
3779 if((source[i]&0x3f)==0x3e) emit_call((int)c_le_d);
3780 if((source[i]&0x3f)==0x3f) emit_call((int)c_ngt_d);
3781 }
3782 restore_regs(reglist);
3783 emit_loadreg(FSREG,fs);
3d624f89 3784#else
3785 cop1_unusable(i, i_regs);
3786#endif
57871462 3787}
3788
3789void float_assemble(int i,struct regstat *i_regs)
3790{
3d624f89 3791#ifndef DISABLE_COP1
57871462 3792 signed char temp=get_reg(i_regs->regmap,-1);
3793 assert(temp>=0);
3794 // Check cop1 unusable
3795 if(!cop1_usable) {
3796 signed char cs=get_reg(i_regs->regmap,CSREG);
3797 assert(cs>=0);
3798 emit_testimm(cs,0x20000000);
3799 int jaddr=(int)out;
3800 emit_jeq(0);
3801 add_stub(FP_STUB,jaddr,(int)out,i,cs,(int)i_regs,is_delayslot,0);
3802 cop1_usable=1;
3803 }
3804
3805 #if(defined(__VFP_FP__) && !defined(__SOFTFP__))
3806 if((source[i]&0x3f)==6) // mov
3807 {
3808 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
3809 if(opcode2[i]==0x10) {
3810 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
3811 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],HOST_TEMPREG);
3812 emit_readword_indexed(0,temp,temp);
3813 emit_writeword_indexed(temp,0,HOST_TEMPREG);
3814 }
3815 if(opcode2[i]==0x11) {
3816 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
3817 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],HOST_TEMPREG);
3818 emit_vldr(temp,7);
3819 emit_vstr(7,HOST_TEMPREG);
3820 }
3821 }
3822 return;
3823 }
3824
3825 if((source[i]&0x3f)>3)
3826 {
3827 if(opcode2[i]==0x10) {
3828 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
3829 emit_flds(temp,15);
3830 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
3831 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
3832 }
3833 if((source[i]&0x3f)==4) // sqrt
3834 emit_fsqrts(15,15);
3835 if((source[i]&0x3f)==5) // abs
3836 emit_fabss(15,15);
3837 if((source[i]&0x3f)==7) // neg
3838 emit_fnegs(15,15);
3839 emit_fsts(15,temp);
3840 }
3841 if(opcode2[i]==0x11) {
3842 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
3843 emit_vldr(temp,7);
3844 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
3845 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
3846 }
3847 if((source[i]&0x3f)==4) // sqrt
3848 emit_fsqrtd(7,7);
3849 if((source[i]&0x3f)==5) // abs
3850 emit_fabsd(7,7);
3851 if((source[i]&0x3f)==7) // neg
3852 emit_fnegd(7,7);
3853 emit_vstr(7,temp);
3854 }
3855 return;
3856 }
3857 if((source[i]&0x3f)<4)
3858 {
3859 if(opcode2[i]==0x10) {
3860 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
3861 }
3862 if(opcode2[i]==0x11) {
3863 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
3864 }
3865 if(((source[i]>>11)&0x1f)!=((source[i]>>16)&0x1f)) {
3866 if(opcode2[i]==0x10) {
3867 emit_readword((int)&reg_cop1_simple[(source[i]>>16)&0x1f],HOST_TEMPREG);
3868 emit_flds(temp,15);
3869 emit_flds(HOST_TEMPREG,13);
3870 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
3871 if(((source[i]>>16)&0x1f)!=((source[i]>>6)&0x1f)) {
3872 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
3873 }
3874 }
3875 if((source[i]&0x3f)==0) emit_fadds(15,13,15);
3876 if((source[i]&0x3f)==1) emit_fsubs(15,13,15);
3877 if((source[i]&0x3f)==2) emit_fmuls(15,13,15);
3878 if((source[i]&0x3f)==3) emit_fdivs(15,13,15);
3879 if(((source[i]>>16)&0x1f)==((source[i]>>6)&0x1f)) {
3880 emit_fsts(15,HOST_TEMPREG);
3881 }else{
3882 emit_fsts(15,temp);
3883 }
3884 }
3885 else if(opcode2[i]==0x11) {
3886 emit_readword((int)&reg_cop1_double[(source[i]>>16)&0x1f],HOST_TEMPREG);
3887 emit_vldr(temp,7);
3888 emit_vldr(HOST_TEMPREG,6);
3889 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
3890 if(((source[i]>>16)&0x1f)!=((source[i]>>6)&0x1f)) {
3891 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
3892 }
3893 }
3894 if((source[i]&0x3f)==0) emit_faddd(7,6,7);
3895 if((source[i]&0x3f)==1) emit_fsubd(7,6,7);
3896 if((source[i]&0x3f)==2) emit_fmuld(7,6,7);
3897 if((source[i]&0x3f)==3) emit_fdivd(7,6,7);
3898 if(((source[i]>>16)&0x1f)==((source[i]>>6)&0x1f)) {
3899 emit_vstr(7,HOST_TEMPREG);
3900 }else{
3901 emit_vstr(7,temp);
3902 }
3903 }
3904 }
3905 else {
3906 if(opcode2[i]==0x10) {
3907 emit_flds(temp,15);
3908 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
3909 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
3910 }
3911 if((source[i]&0x3f)==0) emit_fadds(15,15,15);
3912 if((source[i]&0x3f)==1) emit_fsubs(15,15,15);
3913 if((source[i]&0x3f)==2) emit_fmuls(15,15,15);
3914 if((source[i]&0x3f)==3) emit_fdivs(15,15,15);
3915 emit_fsts(15,temp);
3916 }
3917 else if(opcode2[i]==0x11) {
3918 emit_vldr(temp,7);
3919 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
3920 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
3921 }
3922 if((source[i]&0x3f)==0) emit_faddd(7,7,7);
3923 if((source[i]&0x3f)==1) emit_fsubd(7,7,7);
3924 if((source[i]&0x3f)==2) emit_fmuld(7,7,7);
3925 if((source[i]&0x3f)==3) emit_fdivd(7,7,7);
3926 emit_vstr(7,temp);
3927 }
3928 }
3929 return;
3930 }
3931 #endif
3932
3933 u_int hr,reglist=0;
3934 for(hr=0;hr<HOST_REGS;hr++) {
3935 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
3936 }
3937 if(opcode2[i]==0x10) { // Single precision
3938 save_regs(reglist);
3939 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3940 if((source[i]&0x3f)<4) {
3941 emit_readword((int)&reg_cop1_simple[(source[i]>>16)&0x1f],ARG2_REG);
3942 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG3_REG);
3943 }else{
3944 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3945 }
3946 switch(source[i]&0x3f)
3947 {
3948 case 0x00: emit_call((int)add_s);break;
3949 case 0x01: emit_call((int)sub_s);break;
3950 case 0x02: emit_call((int)mul_s);break;
3951 case 0x03: emit_call((int)div_s);break;
3952 case 0x04: emit_call((int)sqrt_s);break;
3953 case 0x05: emit_call((int)abs_s);break;
3954 case 0x06: emit_call((int)mov_s);break;
3955 case 0x07: emit_call((int)neg_s);break;
3956 }
3957 restore_regs(reglist);
3958 }
3959 if(opcode2[i]==0x11) { // Double precision
3960 save_regs(reglist);
3961 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3962 if((source[i]&0x3f)<4) {
3963 emit_readword((int)&reg_cop1_double[(source[i]>>16)&0x1f],ARG2_REG);
3964 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG3_REG);
3965 }else{
3966 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3967 }
3968 switch(source[i]&0x3f)
3969 {
3970 case 0x00: emit_call((int)add_d);break;
3971 case 0x01: emit_call((int)sub_d);break;
3972 case 0x02: emit_call((int)mul_d);break;
3973 case 0x03: emit_call((int)div_d);break;
3974 case 0x04: emit_call((int)sqrt_d);break;
3975 case 0x05: emit_call((int)abs_d);break;
3976 case 0x06: emit_call((int)mov_d);break;
3977 case 0x07: emit_call((int)neg_d);break;
3978 }
3979 restore_regs(reglist);
3980 }
3d624f89 3981#else
3982 cop1_unusable(i, i_regs);
3983#endif
57871462 3984}
3985
3986void multdiv_assemble_arm(int i,struct regstat *i_regs)
3987{
3988 // case 0x18: MULT
3989 // case 0x19: MULTU
3990 // case 0x1A: DIV
3991 // case 0x1B: DIVU
3992 // case 0x1C: DMULT
3993 // case 0x1D: DMULTU
3994 // case 0x1E: DDIV
3995 // case 0x1F: DDIVU
3996 if(rs1[i]&&rs2[i])
3997 {
3998 if((opcode2[i]&4)==0) // 32-bit
3999 {
4000 if(opcode2[i]==0x18) // MULT
4001 {
4002 signed char m1=get_reg(i_regs->regmap,rs1[i]);
4003 signed char m2=get_reg(i_regs->regmap,rs2[i]);
4004 signed char hi=get_reg(i_regs->regmap,HIREG);
4005 signed char lo=get_reg(i_regs->regmap,LOREG);
4006 assert(m1>=0);
4007 assert(m2>=0);
4008 assert(hi>=0);
4009 assert(lo>=0);
4010 emit_smull(m1,m2,hi,lo);
4011 }
4012 if(opcode2[i]==0x19) // MULTU
4013 {
4014 signed char m1=get_reg(i_regs->regmap,rs1[i]);
4015 signed char m2=get_reg(i_regs->regmap,rs2[i]);
4016 signed char hi=get_reg(i_regs->regmap,HIREG);
4017 signed char lo=get_reg(i_regs->regmap,LOREG);
4018 assert(m1>=0);
4019 assert(m2>=0);
4020 assert(hi>=0);
4021 assert(lo>=0);
4022 emit_umull(m1,m2,hi,lo);
4023 }
4024 if(opcode2[i]==0x1A) // DIV
4025 {
4026 signed char d1=get_reg(i_regs->regmap,rs1[i]);
4027 signed char d2=get_reg(i_regs->regmap,rs2[i]);
4028 assert(d1>=0);
4029 assert(d2>=0);
4030 signed char quotient=get_reg(i_regs->regmap,LOREG);
4031 signed char remainder=get_reg(i_regs->regmap,HIREG);
4032 assert(quotient>=0);
4033 assert(remainder>=0);
4034 emit_movs(d1,remainder);
4035 emit_negmi(remainder,remainder);
4036 emit_movs(d2,HOST_TEMPREG);
4037 emit_jeq((int)out+52); // Division by zero
4038 emit_negmi(HOST_TEMPREG,HOST_TEMPREG);
4039 emit_clz(HOST_TEMPREG,quotient);
4040 emit_shl(HOST_TEMPREG,quotient,HOST_TEMPREG);
4041 emit_orimm(quotient,1<<31,quotient);
4042 emit_shr(quotient,quotient,quotient);
4043 emit_cmp(remainder,HOST_TEMPREG);
4044 emit_subcs(remainder,HOST_TEMPREG,remainder);
4045 emit_adcs(quotient,quotient,quotient);
4046 emit_shrimm(HOST_TEMPREG,1,HOST_TEMPREG);
4047 emit_jcc((int)out-16); // -4
4048 emit_teq(d1,d2);
4049 emit_negmi(quotient,quotient);
4050 emit_test(d1,d1);
4051 emit_negmi(remainder,remainder);
4052 }
4053 if(opcode2[i]==0x1B) // DIVU
4054 {
4055 signed char d1=get_reg(i_regs->regmap,rs1[i]); // dividend
4056 signed char d2=get_reg(i_regs->regmap,rs2[i]); // divisor
4057 assert(d1>=0);
4058 assert(d2>=0);
4059 signed char quotient=get_reg(i_regs->regmap,LOREG);
4060 signed char remainder=get_reg(i_regs->regmap,HIREG);
4061 assert(quotient>=0);
4062 assert(remainder>=0);
4063 emit_test(d2,d2);
4064 emit_jeq((int)out+44); // Division by zero
4065 emit_clz(d2,HOST_TEMPREG);
4066 emit_movimm(1<<31,quotient);
4067 emit_shl(d2,HOST_TEMPREG,d2);
4068 emit_mov(d1,remainder);
4069 emit_shr(quotient,HOST_TEMPREG,quotient);
4070 emit_cmp(remainder,d2);
4071 emit_subcs(remainder,d2,remainder);
4072 emit_adcs(quotient,quotient,quotient);
4073 emit_shrcc_imm(d2,1,d2);
4074 emit_jcc((int)out-16); // -4
4075 }
4076 }
4077 else // 64-bit
4078 {
4079 if(opcode2[i]==0x1C) // DMULT
4080 {
4081 assert(opcode2[i]!=0x1C);
4082 signed char m1h=get_reg(i_regs->regmap,rs1[i]|64);
4083 signed char m1l=get_reg(i_regs->regmap,rs1[i]);
4084 signed char m2h=get_reg(i_regs->regmap,rs2[i]|64);
4085 signed char m2l=get_reg(i_regs->regmap,rs2[i]);
4086 assert(m1h>=0);
4087 assert(m2h>=0);
4088 assert(m1l>=0);
4089 assert(m2l>=0);
4090 emit_pushreg(m2h);
4091 emit_pushreg(m2l);
4092 emit_pushreg(m1h);
4093 emit_pushreg(m1l);
4094 emit_call((int)&mult64);
4095 emit_popreg(m1l);
4096 emit_popreg(m1h);
4097 emit_popreg(m2l);
4098 emit_popreg(m2h);
4099 signed char hih=get_reg(i_regs->regmap,HIREG|64);
4100 signed char hil=get_reg(i_regs->regmap,HIREG);
4101 if(hih>=0) emit_loadreg(HIREG|64,hih);
4102 if(hil>=0) emit_loadreg(HIREG,hil);
4103 signed char loh=get_reg(i_regs->regmap,LOREG|64);
4104 signed char lol=get_reg(i_regs->regmap,LOREG);
4105 if(loh>=0) emit_loadreg(LOREG|64,loh);
4106 if(lol>=0) emit_loadreg(LOREG,lol);
4107 }
4108 if(opcode2[i]==0x1D) // DMULTU
4109 {
4110 signed char m1h=get_reg(i_regs->regmap,rs1[i]|64);
4111 signed char m1l=get_reg(i_regs->regmap,rs1[i]);
4112 signed char m2h=get_reg(i_regs->regmap,rs2[i]|64);
4113 signed char m2l=get_reg(i_regs->regmap,rs2[i]);
4114 assert(m1h>=0);
4115 assert(m2h>=0);
4116 assert(m1l>=0);
4117 assert(m2l>=0);
4118 save_regs(0x100f);
4119 if(m1l!=0) emit_mov(m1l,0);
4120 if(m1h==0) emit_readword((int)&dynarec_local,1);
4121 else if(m1h>1) emit_mov(m1h,1);
4122 if(m2l<2) emit_readword((int)&dynarec_local+m2l*4,2);
4123 else if(m2l>2) emit_mov(m2l,2);
4124 if(m2h<3) emit_readword((int)&dynarec_local+m2h*4,3);
4125 else if(m2h>3) emit_mov(m2h,3);
4126 emit_call((int)&multu64);
4127 restore_regs(0x100f);
4128 signed char hih=get_reg(i_regs->regmap,HIREG|64);
4129 signed char hil=get_reg(i_regs->regmap,HIREG);
4130 signed char loh=get_reg(i_regs->regmap,LOREG|64);
4131 signed char lol=get_reg(i_regs->regmap,LOREG);
4132 /*signed char temp=get_reg(i_regs->regmap,-1);
4133 signed char rh=get_reg(i_regs->regmap,HIREG|64);
4134 signed char rl=get_reg(i_regs->regmap,HIREG);
4135 assert(m1h>=0);
4136 assert(m2h>=0);
4137 assert(m1l>=0);
4138 assert(m2l>=0);
4139 assert(temp>=0);
4140 //emit_mov(m1l,EAX);
4141 //emit_mul(m2l);
4142 emit_umull(rl,rh,m1l,m2l);
4143 emit_storereg(LOREG,rl);
4144 emit_mov(rh,temp);
4145 //emit_mov(m1h,EAX);
4146 //emit_mul(m2l);
4147 emit_umull(rl,rh,m1h,m2l);
4148 emit_adds(rl,temp,temp);
4149 emit_adcimm(rh,0,rh);
4150 emit_storereg(HIREG,rh);
4151 //emit_mov(m2h,EAX);
4152 //emit_mul(m1l);
4153 emit_umull(rl,rh,m1l,m2h);
4154 emit_adds(rl,temp,temp);
4155 emit_adcimm(rh,0,rh);
4156 emit_storereg(LOREG|64,temp);
4157 emit_mov(rh,temp);
4158 //emit_mov(m2h,EAX);
4159 //emit_mul(m1h);
4160 emit_umull(rl,rh,m1h,m2h);
4161 emit_adds(rl,temp,rl);
4162 emit_loadreg(HIREG,temp);
4163 emit_adcimm(rh,0,rh);
4164 emit_adds(rl,temp,rl);
4165 emit_adcimm(rh,0,rh);
4166 // DEBUG
4167 /*
4168 emit_pushreg(m2h);
4169 emit_pushreg(m2l);
4170 emit_pushreg(m1h);
4171 emit_pushreg(m1l);
4172 emit_call((int)&multu64);
4173 emit_popreg(m1l);
4174 emit_popreg(m1h);
4175 emit_popreg(m2l);
4176 emit_popreg(m2h);
4177 signed char hih=get_reg(i_regs->regmap,HIREG|64);
4178 signed char hil=get_reg(i_regs->regmap,HIREG);
4179 if(hih>=0) emit_loadreg(HIREG|64,hih); // DEBUG
4180 if(hil>=0) emit_loadreg(HIREG,hil); // DEBUG
4181 */
4182 // Shouldn't be necessary
4183 //char loh=get_reg(i_regs->regmap,LOREG|64);
4184 //char lol=get_reg(i_regs->regmap,LOREG);
4185 //if(loh>=0) emit_loadreg(LOREG|64,loh);
4186 //if(lol>=0) emit_loadreg(LOREG,lol);
4187 }
4188 if(opcode2[i]==0x1E) // DDIV
4189 {
4190 signed char d1h=get_reg(i_regs->regmap,rs1[i]|64);
4191 signed char d1l=get_reg(i_regs->regmap,rs1[i]);
4192 signed char d2h=get_reg(i_regs->regmap,rs2[i]|64);
4193 signed char d2l=get_reg(i_regs->regmap,rs2[i]);
4194 assert(d1h>=0);
4195 assert(d2h>=0);
4196 assert(d1l>=0);
4197 assert(d2l>=0);
4198 save_regs(0x100f);
4199 if(d1l!=0) emit_mov(d1l,0);
4200 if(d1h==0) emit_readword((int)&dynarec_local,1);
4201 else if(d1h>1) emit_mov(d1h,1);
4202 if(d2l<2) emit_readword((int)&dynarec_local+d2l*4,2);
4203 else if(d2l>2) emit_mov(d2l,2);
4204 if(d2h<3) emit_readword((int)&dynarec_local+d2h*4,3);
4205 else if(d2h>3) emit_mov(d2h,3);
4206 emit_call((int)&div64);
4207 restore_regs(0x100f);
4208 signed char hih=get_reg(i_regs->regmap,HIREG|64);
4209 signed char hil=get_reg(i_regs->regmap,HIREG);
4210 signed char loh=get_reg(i_regs->regmap,LOREG|64);
4211 signed char lol=get_reg(i_regs->regmap,LOREG);
4212 if(hih>=0) emit_loadreg(HIREG|64,hih);
4213 if(hil>=0) emit_loadreg(HIREG,hil);
4214 if(loh>=0) emit_loadreg(LOREG|64,loh);
4215 if(lol>=0) emit_loadreg(LOREG,lol);
4216 }
4217 if(opcode2[i]==0x1F) // DDIVU
4218 {
4219 //u_int hr,reglist=0;
4220 //for(hr=0;hr<HOST_REGS;hr++) {
4221 // if(i_regs->regmap[hr]>=0 && (i_regs->regmap[hr]&62)!=HIREG) reglist|=1<<hr;
4222 //}
4223 signed char d1h=get_reg(i_regs->regmap,rs1[i]|64);
4224 signed char d1l=get_reg(i_regs->regmap,rs1[i]);
4225 signed char d2h=get_reg(i_regs->regmap,rs2[i]|64);
4226 signed char d2l=get_reg(i_regs->regmap,rs2[i]);
4227 assert(d1h>=0);
4228 assert(d2h>=0);
4229 assert(d1l>=0);
4230 assert(d2l>=0);
4231 save_regs(0x100f);
4232 if(d1l!=0) emit_mov(d1l,0);
4233 if(d1h==0) emit_readword((int)&dynarec_local,1);
4234 else if(d1h>1) emit_mov(d1h,1);
4235 if(d2l<2) emit_readword((int)&dynarec_local+d2l*4,2);
4236 else if(d2l>2) emit_mov(d2l,2);
4237 if(d2h<3) emit_readword((int)&dynarec_local+d2h*4,3);
4238 else if(d2h>3) emit_mov(d2h,3);
4239 emit_call((int)&divu64);
4240 restore_regs(0x100f);
4241 signed char hih=get_reg(i_regs->regmap,HIREG|64);
4242 signed char hil=get_reg(i_regs->regmap,HIREG);
4243 signed char loh=get_reg(i_regs->regmap,LOREG|64);
4244 signed char lol=get_reg(i_regs->regmap,LOREG);
4245 if(hih>=0) emit_loadreg(HIREG|64,hih);
4246 if(hil>=0) emit_loadreg(HIREG,hil);
4247 if(loh>=0) emit_loadreg(LOREG|64,loh);
4248 if(lol>=0) emit_loadreg(LOREG,lol);
4249 }
4250 }
4251 }
4252 else
4253 {
4254 // Multiply by zero is zero.
4255 // MIPS does not have a divide by zero exception.
4256 // The result is undefined, we return zero.
4257 signed char hr=get_reg(i_regs->regmap,HIREG);
4258 signed char lr=get_reg(i_regs->regmap,LOREG);
4259 if(hr>=0) emit_zeroreg(hr);
4260 if(lr>=0) emit_zeroreg(lr);
4261 }
4262}
4263#define multdiv_assemble multdiv_assemble_arm
4264
4265void do_preload_rhash(int r) {
4266 // Don't need this for ARM. On x86, this puts the value 0xf8 into the
4267 // register. On ARM the hash can be done with a single instruction (below)
4268}
4269
4270void do_preload_rhtbl(int ht) {
4271 emit_addimm(FP,(int)&mini_ht-(int)&dynarec_local,ht);
4272}
4273
4274void do_rhash(int rs,int rh) {
4275 emit_andimm(rs,0xf8,rh);
4276}
4277
4278void do_miniht_load(int ht,int rh) {
4279 assem_debug("ldr %s,[%s,%s]!\n",regname[rh],regname[ht],regname[rh]);
4280 output_w32(0xe7b00000|rd_rn_rm(rh,ht,rh));
4281}
4282
4283void do_miniht_jump(int rs,int rh,int ht) {
4284 emit_cmp(rh,rs);
4285 emit_ldreq_indexed(ht,4,15);
4286 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
4287 emit_mov(rs,7);
4288 emit_jmp(jump_vaddr_reg[7]);
4289 #else
4290 emit_jmp(jump_vaddr_reg[rs]);
4291 #endif
4292}
4293
4294void do_miniht_insert(u_int return_address,int rt,int temp) {
4295 #ifdef ARMv5_ONLY
4296 emit_movimm(return_address,rt); // PC into link register
4297 add_to_linker((int)out,return_address,1);
4298 emit_pcreladdr(temp);
4299 emit_writeword(rt,(int)&mini_ht[(return_address&0xFF)>>3][0]);
4300 emit_writeword(temp,(int)&mini_ht[(return_address&0xFF)>>3][1]);
4301 #else
4302 emit_movw(return_address&0x0000FFFF,rt);
4303 add_to_linker((int)out,return_address,1);
4304 emit_pcreladdr(temp);
4305 emit_writeword(temp,(int)&mini_ht[(return_address&0xFF)>>3][1]);
4306 emit_movt(return_address&0xFFFF0000,rt);
4307 emit_writeword(rt,(int)&mini_ht[(return_address&0xFF)>>3][0]);
4308 #endif
4309}
4310
4311// Sign-extend to 64 bits and write out upper half of a register
4312// This is useful where we have a 32-bit value in a register, and want to
4313// keep it in a 32-bit register, but can't guarantee that it won't be read
4314// as a 64-bit value later.
4315void wb_sx(signed char pre[],signed char entry[],uint64_t dirty,uint64_t is32_pre,uint64_t is32,uint64_t u,uint64_t uu)
4316{
24385cae 4317#ifndef FORCE32
57871462 4318 if(is32_pre==is32) return;
4319 int hr,reg;
4320 for(hr=0;hr<HOST_REGS;hr++) {
4321 if(hr!=EXCLUDE_REG) {
4322 //if(pre[hr]==entry[hr]) {
4323 if((reg=pre[hr])>=0) {
4324 if((dirty>>hr)&1) {
4325 if( ((is32_pre&~is32&~uu)>>reg)&1 ) {
4326 emit_sarimm(hr,31,HOST_TEMPREG);
4327 emit_storereg(reg|64,HOST_TEMPREG);
4328 }
4329 }
4330 }
4331 //}
4332 }
4333 }
24385cae 4334#endif
57871462 4335}
4336
4337void wb_valid(signed char pre[],signed char entry[],u_int dirty_pre,u_int dirty,uint64_t is32_pre,uint64_t u,uint64_t uu)
4338{
4339 //if(dirty_pre==dirty) return;
4340 int hr,reg,new_hr;
4341 for(hr=0;hr<HOST_REGS;hr++) {
4342 if(hr!=EXCLUDE_REG) {
4343 reg=pre[hr];
4344 if(((~u)>>(reg&63))&1) {
4345 if(reg==entry[hr]||(reg>0&&entry[hr]<0)) {
4346 if(((dirty_pre&~dirty)>>hr)&1) {
4347 if(reg>0&&reg<34) {
4348 emit_storereg(reg,hr);
4349 if( ((is32_pre&~uu)>>reg)&1 ) {
4350 emit_sarimm(hr,31,HOST_TEMPREG);
4351 emit_storereg(reg|64,HOST_TEMPREG);
4352 }
4353 }
4354 else if(reg>=64) {
4355 emit_storereg(reg,hr);
4356 }
4357 }
4358 }
4359 else // Check if register moved to a different register
4360 if((new_hr=get_reg(entry,reg))>=0) {
4361 if((dirty_pre>>hr)&(~dirty>>new_hr)&1) {
4362 if(reg>0&&reg<34) {
4363 emit_storereg(reg,hr);
4364 if( ((is32_pre&~uu)>>reg)&1 ) {
4365 emit_sarimm(hr,31,HOST_TEMPREG);
4366 emit_storereg(reg|64,HOST_TEMPREG);
4367 }
4368 }
4369 else if(reg>=64) {
4370 emit_storereg(reg,hr);
4371 }
4372 }
4373 }
4374 }
4375 }
4376 }
4377}
4378
4379
4380/* using strd could possibly help but you'd have to allocate registers in pairs
4381void wb_invalidate_arm(signed char pre[],signed char entry[],uint64_t dirty,uint64_t is32,uint64_t u,uint64_t uu)
4382{
4383 int hr;
4384 int wrote=-1;
4385 for(hr=HOST_REGS-1;hr>=0;hr--) {
4386 if(hr!=EXCLUDE_REG) {
4387 if(pre[hr]!=entry[hr]) {
4388 if(pre[hr]>=0) {
4389 if((dirty>>hr)&1) {
4390 if(get_reg(entry,pre[hr])<0) {
4391 if(pre[hr]<64) {
4392 if(!((u>>pre[hr])&1)) {
4393 if(hr<10&&(~hr&1)&&(pre[hr+1]<0||wrote==hr+1)) {
4394 if( ((is32>>pre[hr])&1) && !((uu>>pre[hr])&1) ) {
4395 emit_sarimm(hr,31,hr+1);
4396 emit_strdreg(pre[hr],hr);
4397 }
4398 else
4399 emit_storereg(pre[hr],hr);
4400 }else{
4401 emit_storereg(pre[hr],hr);
4402 if( ((is32>>pre[hr])&1) && !((uu>>pre[hr])&1) ) {
4403 emit_sarimm(hr,31,hr);
4404 emit_storereg(pre[hr]|64,hr);
4405 }
4406 }
4407 }
4408 }else{
4409 if(!((uu>>(pre[hr]&63))&1) && !((is32>>(pre[hr]&63))&1)) {
4410 emit_storereg(pre[hr],hr);
4411 }
4412 }
4413 wrote=hr;
4414 }
4415 }
4416 }
4417 }
4418 }
4419 }
4420 for(hr=0;hr<HOST_REGS;hr++) {
4421 if(hr!=EXCLUDE_REG) {
4422 if(pre[hr]!=entry[hr]) {
4423 if(pre[hr]>=0) {
4424 int nr;
4425 if((nr=get_reg(entry,pre[hr]))>=0) {
4426 emit_mov(hr,nr);
4427 }
4428 }
4429 }
4430 }
4431 }
4432}
4433#define wb_invalidate wb_invalidate_arm
4434*/
4435
4436// CPU-architecture-specific initialization
4437void arch_init() {
3d624f89 4438#ifndef DISABLE_COP1
57871462 4439 rounding_modes[0]=0x0<<22; // round
4440 rounding_modes[1]=0x3<<22; // trunc
4441 rounding_modes[2]=0x1<<22; // ceil
4442 rounding_modes[3]=0x2<<22; // floor
3d624f89 4443#endif
57871462 4444}