drc: allow xor imm 0
[pcsx_rearmed.git] / libpcsxcore / new_dynarec / assem_arm.c
CommitLineData
57871462 1/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2 * Mupen64plus - assem_arm.c *
3 * Copyright (C) 2009-2010 Ari64 *
4 * *
5 * This program is free software; you can redistribute it and/or modify *
6 * it under the terms of the GNU General Public License as published by *
7 * the Free Software Foundation; either version 2 of the License, or *
8 * (at your option) any later version. *
9 * *
10 * This program is distributed in the hope that it will be useful, *
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
13 * GNU General Public License for more details. *
14 * *
15 * You should have received a copy of the GNU General Public License *
16 * along with this program; if not, write to the *
17 * Free Software Foundation, Inc., *
18 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
19 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
20
21extern int cycle_count;
22extern int last_count;
23extern int pcaddr;
24extern int pending_exception;
25extern int branch_target;
26extern uint64_t readmem_dword;
3d624f89 27#ifdef MUPEN64
57871462 28extern precomp_instr fake_pc;
3d624f89 29#endif
57871462 30extern void *dynarec_local;
31extern u_int memory_map[1048576];
32extern u_int mini_ht[32][2];
33extern u_int rounding_modes[4];
34
35void indirect_jump_indexed();
36void indirect_jump();
37void do_interrupt();
38void jump_vaddr_r0();
39void jump_vaddr_r1();
40void jump_vaddr_r2();
41void jump_vaddr_r3();
42void jump_vaddr_r4();
43void jump_vaddr_r5();
44void jump_vaddr_r6();
45void jump_vaddr_r7();
46void jump_vaddr_r8();
47void jump_vaddr_r9();
48void jump_vaddr_r10();
49void jump_vaddr_r12();
50
51const u_int jump_vaddr_reg[16] = {
52 (int)jump_vaddr_r0,
53 (int)jump_vaddr_r1,
54 (int)jump_vaddr_r2,
55 (int)jump_vaddr_r3,
56 (int)jump_vaddr_r4,
57 (int)jump_vaddr_r5,
58 (int)jump_vaddr_r6,
59 (int)jump_vaddr_r7,
60 (int)jump_vaddr_r8,
61 (int)jump_vaddr_r9,
62 (int)jump_vaddr_r10,
63 0,
64 (int)jump_vaddr_r12,
65 0,
66 0,
67 0};
68
69#include "fpu.h"
70
71/* Linker */
72
73void set_jump_target(int addr,u_int target)
74{
75 u_char *ptr=(u_char *)addr;
76 u_int *ptr2=(u_int *)ptr;
77 if(ptr[3]==0xe2) {
78 assert((target-(u_int)ptr2-8)<1024);
79 assert((addr&3)==0);
80 assert((target&3)==0);
81 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
82 //printf("target=%x addr=%x insn=%x\n",target,addr,*ptr2);
83 }
84 else if(ptr[3]==0x72) {
85 // generated by emit_jno_unlikely
86 if((target-(u_int)ptr2-8)<1024) {
87 assert((addr&3)==0);
88 assert((target&3)==0);
89 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
90 }
91 else if((target-(u_int)ptr2-8)<4096&&!((target-(u_int)ptr2-8)&15)) {
92 assert((addr&3)==0);
93 assert((target&3)==0);
94 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>4)|0xE00;
95 }
96 else *ptr2=(0x7A000000)|(((target-(u_int)ptr2-8)<<6)>>8);
97 }
98 else {
99 assert((ptr[3]&0x0e)==0xa);
100 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
101 }
102}
103
104// This optionally copies the instruction from the target of the branch into
105// the space before the branch. Works, but the difference in speed is
106// usually insignificant.
107void set_jump_target_fillslot(int addr,u_int target,int copy)
108{
109 u_char *ptr=(u_char *)addr;
110 u_int *ptr2=(u_int *)ptr;
111 assert(!copy||ptr2[-1]==0xe28dd000);
112 if(ptr[3]==0xe2) {
113 assert(!copy);
114 assert((target-(u_int)ptr2-8)<4096);
115 *ptr2=(*ptr2&0xFFFFF000)|(target-(u_int)ptr2-8);
116 }
117 else {
118 assert((ptr[3]&0x0e)==0xa);
119 u_int target_insn=*(u_int *)target;
120 if((target_insn&0x0e100000)==0) { // ALU, no immediate, no flags
121 copy=0;
122 }
123 if((target_insn&0x0c100000)==0x04100000) { // Load
124 copy=0;
125 }
126 if(target_insn&0x08000000) {
127 copy=0;
128 }
129 if(copy) {
130 ptr2[-1]=target_insn;
131 target+=4;
132 }
133 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
134 }
135}
136
137/* Literal pool */
138add_literal(int addr,int val)
139{
140 literals[literalcount][0]=addr;
141 literals[literalcount][1]=val;
142 literalcount++;
143}
144
145void kill_pointer(void *stub)
146{
147 int *ptr=(int *)(stub+4);
148 assert((*ptr&0x0ff00000)==0x05900000);
149 u_int offset=*ptr&0xfff;
150 int **l_ptr=(void *)ptr+offset+8;
151 int *i_ptr=*l_ptr;
152 set_jump_target((int)i_ptr,(int)stub);
153}
154
155int get_pointer(void *stub)
156{
157 //printf("get_pointer(%x)\n",(int)stub);
158 int *ptr=(int *)(stub+4);
159 assert((*ptr&0x0ff00000)==0x05900000);
160 u_int offset=*ptr&0xfff;
161 int **l_ptr=(void *)ptr+offset+8;
162 int *i_ptr=*l_ptr;
163 assert((*i_ptr&0x0f000000)==0x0a000000);
164 return (int)i_ptr+((*i_ptr<<8)>>6)+8;
165}
166
167// Find the "clean" entry point from a "dirty" entry point
168// by skipping past the call to verify_code
169u_int get_clean_addr(int addr)
170{
171 int *ptr=(int *)addr;
172 #ifdef ARMv5_ONLY
173 ptr+=4;
174 #else
175 ptr+=6;
176 #endif
177 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
178 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
179 ptr++;
180 if((*ptr&0xFF000000)==0xea000000) {
181 return (int)ptr+((*ptr<<8)>>6)+8; // follow jump
182 }
183 return (u_int)ptr;
184}
185
186int verify_dirty(int addr)
187{
188 u_int *ptr=(u_int *)addr;
189 #ifdef ARMv5_ONLY
190 // get from literal pool
191 assert((*ptr&0xFFF00000)==0xe5900000);
192 u_int offset=*ptr&0xfff;
193 u_int *l_ptr=(void *)ptr+offset+8;
194 u_int source=l_ptr[0];
195 u_int copy=l_ptr[1];
196 u_int len=l_ptr[2];
197 ptr+=4;
198 #else
199 // ARMv7 movw/movt
200 assert((*ptr&0xFFF00000)==0xe3000000);
201 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
202 u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
203 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
204 ptr+=6;
205 #endif
206 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
207 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
cfcba99a 208 u_int verifier=(int)ptr+((signed int)(*ptr<<8)>>6)+8; // get target of bl
57871462 209 if(verifier==(u_int)verify_code_vm||verifier==(u_int)verify_code_ds) {
210 unsigned int page=source>>12;
211 unsigned int map_value=memory_map[page];
212 if(map_value>=0x80000000) return 0;
213 while(page<((source+len-1)>>12)) {
214 if((memory_map[++page]<<2)!=(map_value<<2)) return 0;
215 }
216 source = source+(map_value<<2);
217 }
218 //printf("verify_dirty: %x %x %x\n",source,copy,len);
219 return !memcmp((void *)source,(void *)copy,len);
220}
221
222// This doesn't necessarily find all clean entry points, just
223// guarantees that it's not dirty
224int isclean(int addr)
225{
226 #ifdef ARMv5_ONLY
227 int *ptr=((u_int *)addr)+4;
228 #else
229 int *ptr=((u_int *)addr)+6;
230 #endif
231 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
232 if((*ptr&0xFF000000)!=0xeb000000) return 1; // bl instruction
233 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code) return 0;
234 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_vm) return 0;
235 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_ds) return 0;
236 return 1;
237}
238
239void get_bounds(int addr,u_int *start,u_int *end)
240{
241 u_int *ptr=(u_int *)addr;
242 #ifdef ARMv5_ONLY
243 // get from literal pool
244 assert((*ptr&0xFFF00000)==0xe5900000);
245 u_int offset=*ptr&0xfff;
246 u_int *l_ptr=(void *)ptr+offset+8;
247 u_int source=l_ptr[0];
248 //u_int copy=l_ptr[1];
249 u_int len=l_ptr[2];
250 ptr+=4;
251 #else
252 // ARMv7 movw/movt
253 assert((*ptr&0xFFF00000)==0xe3000000);
254 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
255 //u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
256 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
257 ptr+=6;
258 #endif
259 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
260 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
cfcba99a 261 u_int verifier=(int)ptr+((signed int)(*ptr<<8)>>6)+8; // get target of bl
57871462 262 if(verifier==(u_int)verify_code_vm||verifier==(u_int)verify_code_ds) {
263 if(memory_map[source>>12]>=0x80000000) source = 0;
264 else source = source+(memory_map[source>>12]<<2);
265 }
266 *start=source;
267 *end=source+len;
268}
269
270/* Register allocation */
271
272// Note: registers are allocated clean (unmodified state)
273// if you intend to modify the register, you must call dirty_reg().
274void alloc_reg(struct regstat *cur,int i,signed char reg)
275{
276 int r,hr;
277 int preferred_reg = (reg&7);
278 if(reg==CCREG) preferred_reg=HOST_CCREG;
279 if(reg==PTEMP||reg==FTEMP) preferred_reg=12;
280
281 // Don't allocate unused registers
282 if((cur->u>>reg)&1) return;
283
284 // see if it's already allocated
285 for(hr=0;hr<HOST_REGS;hr++)
286 {
287 if(cur->regmap[hr]==reg) return;
288 }
289
290 // Keep the same mapping if the register was already allocated in a loop
291 preferred_reg = loop_reg(i,reg,preferred_reg);
292
293 // Try to allocate the preferred register
294 if(cur->regmap[preferred_reg]==-1) {
295 cur->regmap[preferred_reg]=reg;
296 cur->dirty&=~(1<<preferred_reg);
297 cur->isconst&=~(1<<preferred_reg);
298 return;
299 }
300 r=cur->regmap[preferred_reg];
301 if(r<64&&((cur->u>>r)&1)) {
302 cur->regmap[preferred_reg]=reg;
303 cur->dirty&=~(1<<preferred_reg);
304 cur->isconst&=~(1<<preferred_reg);
305 return;
306 }
307 if(r>=64&&((cur->uu>>(r&63))&1)) {
308 cur->regmap[preferred_reg]=reg;
309 cur->dirty&=~(1<<preferred_reg);
310 cur->isconst&=~(1<<preferred_reg);
311 return;
312 }
313
314 // Clear any unneeded registers
315 // We try to keep the mapping consistent, if possible, because it
316 // makes branches easier (especially loops). So we try to allocate
317 // first (see above) before removing old mappings. If this is not
318 // possible then go ahead and clear out the registers that are no
319 // longer needed.
320 for(hr=0;hr<HOST_REGS;hr++)
321 {
322 r=cur->regmap[hr];
323 if(r>=0) {
324 if(r<64) {
325 if((cur->u>>r)&1) {cur->regmap[hr]=-1;break;}
326 }
327 else
328 {
329 if((cur->uu>>(r&63))&1) {cur->regmap[hr]=-1;break;}
330 }
331 }
332 }
333 // Try to allocate any available register, but prefer
334 // registers that have not been used recently.
335 if(i>0) {
336 for(hr=0;hr<HOST_REGS;hr++) {
337 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
338 if(regs[i-1].regmap[hr]!=rs1[i-1]&&regs[i-1].regmap[hr]!=rs2[i-1]&&regs[i-1].regmap[hr]!=rt1[i-1]&&regs[i-1].regmap[hr]!=rt2[i-1]) {
339 cur->regmap[hr]=reg;
340 cur->dirty&=~(1<<hr);
341 cur->isconst&=~(1<<hr);
342 return;
343 }
344 }
345 }
346 }
347 // Try to allocate any available register
348 for(hr=0;hr<HOST_REGS;hr++) {
349 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
350 cur->regmap[hr]=reg;
351 cur->dirty&=~(1<<hr);
352 cur->isconst&=~(1<<hr);
353 return;
354 }
355 }
356
357 // Ok, now we have to evict someone
358 // Pick a register we hopefully won't need soon
359 u_char hsn[MAXREG+1];
360 memset(hsn,10,sizeof(hsn));
361 int j;
362 lsn(hsn,i,&preferred_reg);
363 //printf("eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",cur->regmap[0],cur->regmap[1],cur->regmap[2],cur->regmap[3],cur->regmap[5],cur->regmap[6],cur->regmap[7]);
364 //printf("hsn(%x): %d %d %d %d %d %d %d\n",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
365 if(i>0) {
366 // Don't evict the cycle count at entry points, otherwise the entry
367 // stub will have to write it.
368 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
369 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
370 for(j=10;j>=3;j--)
371 {
372 // Alloc preferred register if available
373 if(hsn[r=cur->regmap[preferred_reg]&63]==j) {
374 for(hr=0;hr<HOST_REGS;hr++) {
375 // Evict both parts of a 64-bit register
376 if((cur->regmap[hr]&63)==r) {
377 cur->regmap[hr]=-1;
378 cur->dirty&=~(1<<hr);
379 cur->isconst&=~(1<<hr);
380 }
381 }
382 cur->regmap[preferred_reg]=reg;
383 return;
384 }
385 for(r=1;r<=MAXREG;r++)
386 {
387 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
388 for(hr=0;hr<HOST_REGS;hr++) {
389 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
390 if(cur->regmap[hr]==r+64) {
391 cur->regmap[hr]=reg;
392 cur->dirty&=~(1<<hr);
393 cur->isconst&=~(1<<hr);
394 return;
395 }
396 }
397 }
398 for(hr=0;hr<HOST_REGS;hr++) {
399 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
400 if(cur->regmap[hr]==r) {
401 cur->regmap[hr]=reg;
402 cur->dirty&=~(1<<hr);
403 cur->isconst&=~(1<<hr);
404 return;
405 }
406 }
407 }
408 }
409 }
410 }
411 }
412 for(j=10;j>=0;j--)
413 {
414 for(r=1;r<=MAXREG;r++)
415 {
416 if(hsn[r]==j) {
417 for(hr=0;hr<HOST_REGS;hr++) {
418 if(cur->regmap[hr]==r+64) {
419 cur->regmap[hr]=reg;
420 cur->dirty&=~(1<<hr);
421 cur->isconst&=~(1<<hr);
422 return;
423 }
424 }
425 for(hr=0;hr<HOST_REGS;hr++) {
426 if(cur->regmap[hr]==r) {
427 cur->regmap[hr]=reg;
428 cur->dirty&=~(1<<hr);
429 cur->isconst&=~(1<<hr);
430 return;
431 }
432 }
433 }
434 }
435 }
436 printf("This shouldn't happen (alloc_reg)");exit(1);
437}
438
439void alloc_reg64(struct regstat *cur,int i,signed char reg)
440{
441 int preferred_reg = 8+(reg&1);
442 int r,hr;
443
444 // allocate the lower 32 bits
445 alloc_reg(cur,i,reg);
446
447 // Don't allocate unused registers
448 if((cur->uu>>reg)&1) return;
449
450 // see if the upper half is already allocated
451 for(hr=0;hr<HOST_REGS;hr++)
452 {
453 if(cur->regmap[hr]==reg+64) return;
454 }
455
456 // Keep the same mapping if the register was already allocated in a loop
457 preferred_reg = loop_reg(i,reg,preferred_reg);
458
459 // Try to allocate the preferred register
460 if(cur->regmap[preferred_reg]==-1) {
461 cur->regmap[preferred_reg]=reg|64;
462 cur->dirty&=~(1<<preferred_reg);
463 cur->isconst&=~(1<<preferred_reg);
464 return;
465 }
466 r=cur->regmap[preferred_reg];
467 if(r<64&&((cur->u>>r)&1)) {
468 cur->regmap[preferred_reg]=reg|64;
469 cur->dirty&=~(1<<preferred_reg);
470 cur->isconst&=~(1<<preferred_reg);
471 return;
472 }
473 if(r>=64&&((cur->uu>>(r&63))&1)) {
474 cur->regmap[preferred_reg]=reg|64;
475 cur->dirty&=~(1<<preferred_reg);
476 cur->isconst&=~(1<<preferred_reg);
477 return;
478 }
479
480 // Clear any unneeded registers
481 // We try to keep the mapping consistent, if possible, because it
482 // makes branches easier (especially loops). So we try to allocate
483 // first (see above) before removing old mappings. If this is not
484 // possible then go ahead and clear out the registers that are no
485 // longer needed.
486 for(hr=HOST_REGS-1;hr>=0;hr--)
487 {
488 r=cur->regmap[hr];
489 if(r>=0) {
490 if(r<64) {
491 if((cur->u>>r)&1) {cur->regmap[hr]=-1;break;}
492 }
493 else
494 {
495 if((cur->uu>>(r&63))&1) {cur->regmap[hr]=-1;break;}
496 }
497 }
498 }
499 // Try to allocate any available register, but prefer
500 // registers that have not been used recently.
501 if(i>0) {
502 for(hr=0;hr<HOST_REGS;hr++) {
503 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
504 if(regs[i-1].regmap[hr]!=rs1[i-1]&&regs[i-1].regmap[hr]!=rs2[i-1]&&regs[i-1].regmap[hr]!=rt1[i-1]&&regs[i-1].regmap[hr]!=rt2[i-1]) {
505 cur->regmap[hr]=reg|64;
506 cur->dirty&=~(1<<hr);
507 cur->isconst&=~(1<<hr);
508 return;
509 }
510 }
511 }
512 }
513 // Try to allocate any available register
514 for(hr=0;hr<HOST_REGS;hr++) {
515 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
516 cur->regmap[hr]=reg|64;
517 cur->dirty&=~(1<<hr);
518 cur->isconst&=~(1<<hr);
519 return;
520 }
521 }
522
523 // Ok, now we have to evict someone
524 // Pick a register we hopefully won't need soon
525 u_char hsn[MAXREG+1];
526 memset(hsn,10,sizeof(hsn));
527 int j;
528 lsn(hsn,i,&preferred_reg);
529 //printf("eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",cur->regmap[0],cur->regmap[1],cur->regmap[2],cur->regmap[3],cur->regmap[5],cur->regmap[6],cur->regmap[7]);
530 //printf("hsn(%x): %d %d %d %d %d %d %d\n",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
531 if(i>0) {
532 // Don't evict the cycle count at entry points, otherwise the entry
533 // stub will have to write it.
534 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
535 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
536 for(j=10;j>=3;j--)
537 {
538 // Alloc preferred register if available
539 if(hsn[r=cur->regmap[preferred_reg]&63]==j) {
540 for(hr=0;hr<HOST_REGS;hr++) {
541 // Evict both parts of a 64-bit register
542 if((cur->regmap[hr]&63)==r) {
543 cur->regmap[hr]=-1;
544 cur->dirty&=~(1<<hr);
545 cur->isconst&=~(1<<hr);
546 }
547 }
548 cur->regmap[preferred_reg]=reg|64;
549 return;
550 }
551 for(r=1;r<=MAXREG;r++)
552 {
553 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
554 for(hr=0;hr<HOST_REGS;hr++) {
555 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
556 if(cur->regmap[hr]==r+64) {
557 cur->regmap[hr]=reg|64;
558 cur->dirty&=~(1<<hr);
559 cur->isconst&=~(1<<hr);
560 return;
561 }
562 }
563 }
564 for(hr=0;hr<HOST_REGS;hr++) {
565 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
566 if(cur->regmap[hr]==r) {
567 cur->regmap[hr]=reg|64;
568 cur->dirty&=~(1<<hr);
569 cur->isconst&=~(1<<hr);
570 return;
571 }
572 }
573 }
574 }
575 }
576 }
577 }
578 for(j=10;j>=0;j--)
579 {
580 for(r=1;r<=MAXREG;r++)
581 {
582 if(hsn[r]==j) {
583 for(hr=0;hr<HOST_REGS;hr++) {
584 if(cur->regmap[hr]==r+64) {
585 cur->regmap[hr]=reg|64;
586 cur->dirty&=~(1<<hr);
587 cur->isconst&=~(1<<hr);
588 return;
589 }
590 }
591 for(hr=0;hr<HOST_REGS;hr++) {
592 if(cur->regmap[hr]==r) {
593 cur->regmap[hr]=reg|64;
594 cur->dirty&=~(1<<hr);
595 cur->isconst&=~(1<<hr);
596 return;
597 }
598 }
599 }
600 }
601 }
602 printf("This shouldn't happen");exit(1);
603}
604
605// Allocate a temporary register. This is done without regard to
606// dirty status or whether the register we request is on the unneeded list
607// Note: This will only allocate one register, even if called multiple times
608void alloc_reg_temp(struct regstat *cur,int i,signed char reg)
609{
610 int r,hr;
611 int preferred_reg = -1;
612
613 // see if it's already allocated
614 for(hr=0;hr<HOST_REGS;hr++)
615 {
616 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==reg) return;
617 }
618
619 // Try to allocate any available register
620 for(hr=HOST_REGS-1;hr>=0;hr--) {
621 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
622 cur->regmap[hr]=reg;
623 cur->dirty&=~(1<<hr);
624 cur->isconst&=~(1<<hr);
625 return;
626 }
627 }
628
629 // Find an unneeded register
630 for(hr=HOST_REGS-1;hr>=0;hr--)
631 {
632 r=cur->regmap[hr];
633 if(r>=0) {
634 if(r<64) {
635 if((cur->u>>r)&1) {
636 if(i==0||((unneeded_reg[i-1]>>r)&1)) {
637 cur->regmap[hr]=reg;
638 cur->dirty&=~(1<<hr);
639 cur->isconst&=~(1<<hr);
640 return;
641 }
642 }
643 }
644 else
645 {
646 if((cur->uu>>(r&63))&1) {
647 if(i==0||((unneeded_reg_upper[i-1]>>(r&63))&1)) {
648 cur->regmap[hr]=reg;
649 cur->dirty&=~(1<<hr);
650 cur->isconst&=~(1<<hr);
651 return;
652 }
653 }
654 }
655 }
656 }
657
658 // Ok, now we have to evict someone
659 // Pick a register we hopefully won't need soon
660 // TODO: we might want to follow unconditional jumps here
661 // TODO: get rid of dupe code and make this into a function
662 u_char hsn[MAXREG+1];
663 memset(hsn,10,sizeof(hsn));
664 int j;
665 lsn(hsn,i,&preferred_reg);
666 //printf("hsn: %d %d %d %d %d %d %d\n",hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
667 if(i>0) {
668 // Don't evict the cycle count at entry points, otherwise the entry
669 // stub will have to write it.
670 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
671 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
672 for(j=10;j>=3;j--)
673 {
674 for(r=1;r<=MAXREG;r++)
675 {
676 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
677 for(hr=0;hr<HOST_REGS;hr++) {
678 if(hr!=HOST_CCREG||hsn[CCREG]>2) {
679 if(cur->regmap[hr]==r+64) {
680 cur->regmap[hr]=reg;
681 cur->dirty&=~(1<<hr);
682 cur->isconst&=~(1<<hr);
683 return;
684 }
685 }
686 }
687 for(hr=0;hr<HOST_REGS;hr++) {
688 if(hr!=HOST_CCREG||hsn[CCREG]>2) {
689 if(cur->regmap[hr]==r) {
690 cur->regmap[hr]=reg;
691 cur->dirty&=~(1<<hr);
692 cur->isconst&=~(1<<hr);
693 return;
694 }
695 }
696 }
697 }
698 }
699 }
700 }
701 for(j=10;j>=0;j--)
702 {
703 for(r=1;r<=MAXREG;r++)
704 {
705 if(hsn[r]==j) {
706 for(hr=0;hr<HOST_REGS;hr++) {
707 if(cur->regmap[hr]==r+64) {
708 cur->regmap[hr]=reg;
709 cur->dirty&=~(1<<hr);
710 cur->isconst&=~(1<<hr);
711 return;
712 }
713 }
714 for(hr=0;hr<HOST_REGS;hr++) {
715 if(cur->regmap[hr]==r) {
716 cur->regmap[hr]=reg;
717 cur->dirty&=~(1<<hr);
718 cur->isconst&=~(1<<hr);
719 return;
720 }
721 }
722 }
723 }
724 }
725 printf("This shouldn't happen");exit(1);
726}
727// Allocate a specific ARM register.
728void alloc_arm_reg(struct regstat *cur,int i,signed char reg,char hr)
729{
730 int n;
731
732 // see if it's already allocated (and dealloc it)
733 for(n=0;n<HOST_REGS;n++)
734 {
735 if(n!=EXCLUDE_REG&&cur->regmap[n]==reg) {cur->regmap[n]=-1;}
736 }
737
738 cur->regmap[hr]=reg;
739 cur->dirty&=~(1<<hr);
740 cur->isconst&=~(1<<hr);
741}
742
743// Alloc cycle count into dedicated register
744alloc_cc(struct regstat *cur,int i)
745{
746 alloc_arm_reg(cur,i,CCREG,HOST_CCREG);
747}
748
749/* Special alloc */
750
751
752/* Assembler */
753
754char regname[16][4] = {
755 "r0",
756 "r1",
757 "r2",
758 "r3",
759 "r4",
760 "r5",
761 "r6",
762 "r7",
763 "r8",
764 "r9",
765 "r10",
766 "fp",
767 "r12",
768 "sp",
769 "lr",
770 "pc"};
771
772void output_byte(u_char byte)
773{
774 *(out++)=byte;
775}
776void output_modrm(u_char mod,u_char rm,u_char ext)
777{
778 assert(mod<4);
779 assert(rm<8);
780 assert(ext<8);
781 u_char byte=(mod<<6)|(ext<<3)|rm;
782 *(out++)=byte;
783}
784void output_sib(u_char scale,u_char index,u_char base)
785{
786 assert(scale<4);
787 assert(index<8);
788 assert(base<8);
789 u_char byte=(scale<<6)|(index<<3)|base;
790 *(out++)=byte;
791}
792void output_w32(u_int word)
793{
794 *((u_int *)out)=word;
795 out+=4;
796}
797u_int rd_rn_rm(u_int rd, u_int rn, u_int rm)
798{
799 assert(rd<16);
800 assert(rn<16);
801 assert(rm<16);
802 return((rn<<16)|(rd<<12)|rm);
803}
804u_int rd_rn_imm_shift(u_int rd, u_int rn, u_int imm, u_int shift)
805{
806 assert(rd<16);
807 assert(rn<16);
808 assert(imm<256);
809 assert((shift&1)==0);
810 return((rn<<16)|(rd<<12)|(((32-shift)&30)<<7)|imm);
811}
812u_int genimm(u_int imm,u_int *encoded)
813{
814 if(imm==0) {*encoded=0;return 1;}
815 int i=32;
816 while(i>0)
817 {
818 if(imm<256) {
819 *encoded=((i&30)<<7)|imm;
820 return 1;
821 }
822 imm=(imm>>2)|(imm<<30);i-=2;
823 }
824 return 0;
825}
826u_int genjmp(u_int addr)
827{
828 int offset=addr-(int)out-8;
e80343e2 829 if(offset<-33554432||offset>=33554432) {
830 if (addr>2) {
831 printf("genjmp: out of range: %08x\n", offset);
832 exit(1);
833 }
834 return 0;
835 }
57871462 836 return ((u_int)offset>>2)&0xffffff;
837}
838
839void emit_mov(int rs,int rt)
840{
841 assem_debug("mov %s,%s\n",regname[rt],regname[rs]);
842 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs));
843}
844
845void emit_movs(int rs,int rt)
846{
847 assem_debug("movs %s,%s\n",regname[rt],regname[rs]);
848 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs));
849}
850
851void emit_add(int rs1,int rs2,int rt)
852{
853 assem_debug("add %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
854 output_w32(0xe0800000|rd_rn_rm(rt,rs1,rs2));
855}
856
857void emit_adds(int rs1,int rs2,int rt)
858{
859 assem_debug("adds %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
860 output_w32(0xe0900000|rd_rn_rm(rt,rs1,rs2));
861}
862
863void emit_adcs(int rs1,int rs2,int rt)
864{
865 assem_debug("adcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
866 output_w32(0xe0b00000|rd_rn_rm(rt,rs1,rs2));
867}
868
869void emit_sbc(int rs1,int rs2,int rt)
870{
871 assem_debug("sbc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
872 output_w32(0xe0c00000|rd_rn_rm(rt,rs1,rs2));
873}
874
875void emit_sbcs(int rs1,int rs2,int rt)
876{
877 assem_debug("sbcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
878 output_w32(0xe0d00000|rd_rn_rm(rt,rs1,rs2));
879}
880
881void emit_neg(int rs, int rt)
882{
883 assem_debug("rsb %s,%s,#0\n",regname[rt],regname[rs]);
884 output_w32(0xe2600000|rd_rn_rm(rt,rs,0));
885}
886
887void emit_negs(int rs, int rt)
888{
889 assem_debug("rsbs %s,%s,#0\n",regname[rt],regname[rs]);
890 output_w32(0xe2700000|rd_rn_rm(rt,rs,0));
891}
892
893void emit_sub(int rs1,int rs2,int rt)
894{
895 assem_debug("sub %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
896 output_w32(0xe0400000|rd_rn_rm(rt,rs1,rs2));
897}
898
899void emit_subs(int rs1,int rs2,int rt)
900{
901 assem_debug("subs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
902 output_w32(0xe0500000|rd_rn_rm(rt,rs1,rs2));
903}
904
905void emit_zeroreg(int rt)
906{
907 assem_debug("mov %s,#0\n",regname[rt]);
908 output_w32(0xe3a00000|rd_rn_rm(rt,0,0));
909}
910
911void emit_loadreg(int r, int hr)
912{
3d624f89 913#ifdef FORCE32
914 if(r&64) {
915 printf("64bit load in 32bit mode!\n");
916 exit(1);
917 }
918#endif
57871462 919 if((r&63)==0)
920 emit_zeroreg(hr);
921 else {
3d624f89 922 int addr=((int)reg)+((r&63)<<REG_SHIFT)+((r&64)>>4);
57871462 923 if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4);
924 if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4);
925 if(r==CCREG) addr=(int)&cycle_count;
926 if(r==CSREG) addr=(int)&Status;
927 if(r==FSREG) addr=(int)&FCR31;
928 if(r==INVCP) addr=(int)&invc_ptr;
929 u_int offset = addr-(u_int)&dynarec_local;
930 assert(offset<4096);
931 assem_debug("ldr %s,fp+%d\n",regname[hr],offset);
932 output_w32(0xe5900000|rd_rn_rm(hr,FP,0)|offset);
933 }
934}
935void emit_storereg(int r, int hr)
936{
3d624f89 937#ifdef FORCE32
938 if(r&64) {
939 printf("64bit store in 32bit mode!\n");
940 exit(1);
941 }
942#endif
943 int addr=((int)reg)+((r&63)<<REG_SHIFT)+((r&64)>>4);
57871462 944 if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4);
945 if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4);
946 if(r==CCREG) addr=(int)&cycle_count;
947 if(r==FSREG) addr=(int)&FCR31;
948 u_int offset = addr-(u_int)&dynarec_local;
949 assert(offset<4096);
950 assem_debug("str %s,fp+%d\n",regname[hr],offset);
951 output_w32(0xe5800000|rd_rn_rm(hr,FP,0)|offset);
952}
953
954void emit_test(int rs, int rt)
955{
956 assem_debug("tst %s,%s\n",regname[rs],regname[rt]);
957 output_w32(0xe1100000|rd_rn_rm(0,rs,rt));
958}
959
960void emit_testimm(int rs,int imm)
961{
962 u_int armval;
963 assem_debug("tst %s,$%d\n",regname[rs],imm);
964 assert(genimm(imm,&armval));
965 output_w32(0xe3100000|rd_rn_rm(0,rs,0)|armval);
966}
967
b9b61529 968void emit_testeqimm(int rs,int imm)
969{
970 u_int armval;
971 assem_debug("tsteq %s,$%d\n",regname[rs],imm);
972 assert(genimm(imm,&armval));
973 output_w32(0x03100000|rd_rn_rm(0,rs,0)|armval);
974}
975
57871462 976void emit_not(int rs,int rt)
977{
978 assem_debug("mvn %s,%s\n",regname[rt],regname[rs]);
979 output_w32(0xe1e00000|rd_rn_rm(rt,0,rs));
980}
981
b9b61529 982void emit_mvnmi(int rs,int rt)
983{
984 assem_debug("mvnmi %s,%s\n",regname[rt],regname[rs]);
985 output_w32(0x41e00000|rd_rn_rm(rt,0,rs));
986}
987
57871462 988void emit_and(u_int rs1,u_int rs2,u_int rt)
989{
990 assem_debug("and %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
991 output_w32(0xe0000000|rd_rn_rm(rt,rs1,rs2));
992}
993
994void emit_or(u_int rs1,u_int rs2,u_int rt)
995{
996 assem_debug("orr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
997 output_w32(0xe1800000|rd_rn_rm(rt,rs1,rs2));
998}
999void emit_or_and_set_flags(int rs1,int rs2,int rt)
1000{
1001 assem_debug("orrs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1002 output_w32(0xe1900000|rd_rn_rm(rt,rs1,rs2));
1003}
1004
1005void emit_xor(u_int rs1,u_int rs2,u_int rt)
1006{
1007 assem_debug("eor %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1008 output_w32(0xe0200000|rd_rn_rm(rt,rs1,rs2));
1009}
1010
1011void emit_loadlp(u_int imm,u_int rt)
1012{
1013 add_literal((int)out,imm);
1014 assem_debug("ldr %s,pc+? [=%x]\n",regname[rt],imm);
1015 output_w32(0xe5900000|rd_rn_rm(rt,15,0));
1016}
1017void emit_movw(u_int imm,u_int rt)
1018{
1019 assert(imm<65536);
1020 assem_debug("movw %s,#%d (0x%x)\n",regname[rt],imm,imm);
1021 output_w32(0xe3000000|rd_rn_rm(rt,0,0)|(imm&0xfff)|((imm<<4)&0xf0000));
1022}
1023void emit_movt(u_int imm,u_int rt)
1024{
1025 assem_debug("movt %s,#%d (0x%x)\n",regname[rt],imm&0xffff0000,imm&0xffff0000);
1026 output_w32(0xe3400000|rd_rn_rm(rt,0,0)|((imm>>16)&0xfff)|((imm>>12)&0xf0000));
1027}
1028void emit_movimm(u_int imm,u_int rt)
1029{
1030 u_int armval;
1031 if(genimm(imm,&armval)) {
1032 assem_debug("mov %s,#%d\n",regname[rt],imm);
1033 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
1034 }else if(genimm(~imm,&armval)) {
1035 assem_debug("mvn %s,#%d\n",regname[rt],imm);
1036 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
1037 }else if(imm<65536) {
1038 #ifdef ARMv5_ONLY
1039 assem_debug("mov %s,#%d\n",regname[rt],imm&0xFF00);
1040 output_w32(0xe3a00000|rd_rn_imm_shift(rt,0,imm>>8,8));
1041 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
1042 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1043 #else
1044 emit_movw(imm,rt);
1045 #endif
1046 }else{
1047 #ifdef ARMv5_ONLY
1048 emit_loadlp(imm,rt);
1049 #else
1050 emit_movw(imm&0x0000FFFF,rt);
1051 emit_movt(imm&0xFFFF0000,rt);
1052 #endif
1053 }
1054}
1055void emit_pcreladdr(u_int rt)
1056{
1057 assem_debug("add %s,pc,#?\n",regname[rt]);
1058 output_w32(0xe2800000|rd_rn_rm(rt,15,0));
1059}
1060
1061void emit_addimm(u_int rs,int imm,u_int rt)
1062{
1063 assert(rs<16);
1064 assert(rt<16);
1065 if(imm!=0) {
1066 assert(imm>-65536&&imm<65536);
1067 u_int armval;
1068 if(genimm(imm,&armval)) {
1069 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],imm);
1070 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
1071 }else if(genimm(-imm,&armval)) {
1072 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],imm);
1073 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
1074 }else if(imm<0) {
1075 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],(-imm)&0xFF00);
1076 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
1077 output_w32(0xe2400000|rd_rn_imm_shift(rt,rs,(-imm)>>8,8));
1078 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
1079 }else{
1080 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1081 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
1082 output_w32(0xe2800000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1083 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1084 }
1085 }
1086 else if(rs!=rt) emit_mov(rs,rt);
1087}
1088
1089void emit_addimm_and_set_flags(int imm,int rt)
1090{
1091 assert(imm>-65536&&imm<65536);
1092 u_int armval;
1093 if(genimm(imm,&armval)) {
1094 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm);
1095 output_w32(0xe2900000|rd_rn_rm(rt,rt,0)|armval);
1096 }else if(genimm(-imm,&armval)) {
1097 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],imm);
1098 output_w32(0xe2500000|rd_rn_rm(rt,rt,0)|armval);
1099 }else if(imm<0) {
1100 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF00);
1101 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
1102 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)>>8,8));
1103 output_w32(0xe2500000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
1104 }else{
1105 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF00);
1106 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
1107 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm>>8,8));
1108 output_w32(0xe2900000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1109 }
1110}
1111void emit_addimm_no_flags(u_int imm,u_int rt)
1112{
1113 emit_addimm(rt,imm,rt);
1114}
1115
1116void emit_addnop(u_int r)
1117{
1118 assert(r<16);
1119 assem_debug("add %s,%s,#0 (nop)\n",regname[r],regname[r]);
1120 output_w32(0xe2800000|rd_rn_rm(r,r,0));
1121}
1122
1123void emit_adcimm(u_int rs,int imm,u_int rt)
1124{
1125 u_int armval;
1126 assert(genimm(imm,&armval));
1127 assem_debug("adc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1128 output_w32(0xe2a00000|rd_rn_rm(rt,rs,0)|armval);
1129}
1130/*void emit_sbcimm(int imm,u_int rt)
1131{
1132 u_int armval;
1133 assert(genimm(imm,&armval));
1134 assem_debug("sbc %s,%s,#%d\n",regname[rt],regname[rt],imm);
1135 output_w32(0xe2c00000|rd_rn_rm(rt,rt,0)|armval);
1136}*/
1137void emit_sbbimm(int imm,u_int rt)
1138{
1139 assem_debug("sbb $%d,%%%s\n",imm,regname[rt]);
1140 assert(rt<8);
1141 if(imm<128&&imm>=-128) {
1142 output_byte(0x83);
1143 output_modrm(3,rt,3);
1144 output_byte(imm);
1145 }
1146 else
1147 {
1148 output_byte(0x81);
1149 output_modrm(3,rt,3);
1150 output_w32(imm);
1151 }
1152}
1153void emit_rscimm(int rs,int imm,u_int rt)
1154{
1155 assert(0);
1156 u_int armval;
1157 assert(genimm(imm,&armval));
1158 assem_debug("rsc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1159 output_w32(0xe2e00000|rd_rn_rm(rt,rs,0)|armval);
1160}
1161
1162void emit_addimm64_32(int rsh,int rsl,int imm,int rth,int rtl)
1163{
1164 // TODO: if(genimm(imm,&armval)) ...
1165 // else
1166 emit_movimm(imm,HOST_TEMPREG);
1167 emit_adds(HOST_TEMPREG,rsl,rtl);
1168 emit_adcimm(rsh,0,rth);
1169}
1170
1171void emit_sbb(int rs1,int rs2)
1172{
1173 assem_debug("sbb %%%s,%%%s\n",regname[rs2],regname[rs1]);
1174 output_byte(0x19);
1175 output_modrm(3,rs1,rs2);
1176}
1177
1178void emit_andimm(int rs,int imm,int rt)
1179{
1180 u_int armval;
1181 if(genimm(imm,&armval)) {
1182 assem_debug("and %s,%s,#%d\n",regname[rt],regname[rs],imm);
1183 output_w32(0xe2000000|rd_rn_rm(rt,rs,0)|armval);
1184 }else if(genimm(~imm,&armval)) {
1185 assem_debug("bic %s,%s,#%d\n",regname[rt],regname[rs],imm);
1186 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|armval);
1187 }else if(imm==65535) {
1188 #ifdef ARMv5_ONLY
1189 assem_debug("bic %s,%s,#FF000000\n",regname[rt],regname[rs]);
1190 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|0x4FF);
1191 assem_debug("bic %s,%s,#00FF0000\n",regname[rt],regname[rt]);
1192 output_w32(0xe3c00000|rd_rn_rm(rt,rt,0)|0x8FF);
1193 #else
1194 assem_debug("uxth %s,%s\n",regname[rt],regname[rs]);
1195 output_w32(0xe6ff0070|rd_rn_rm(rt,0,rs));
1196 #endif
1197 }else{
1198 assert(imm>0&&imm<65535);
1199 #ifdef ARMv5_ONLY
1200 assem_debug("mov r14,#%d\n",imm&0xFF00);
1201 output_w32(0xe3a00000|rd_rn_imm_shift(HOST_TEMPREG,0,imm>>8,8));
1202 assem_debug("add r14,r14,#%d\n",imm&0xFF);
1203 output_w32(0xe2800000|rd_rn_imm_shift(HOST_TEMPREG,HOST_TEMPREG,imm&0xff,0));
1204 #else
1205 emit_movw(imm,HOST_TEMPREG);
1206 #endif
1207 assem_debug("and %s,%s,r14\n",regname[rt],regname[rs]);
1208 output_w32(0xe0000000|rd_rn_rm(rt,rs,HOST_TEMPREG));
1209 }
1210}
1211
1212void emit_orimm(int rs,int imm,int rt)
1213{
1214 u_int armval;
1215 if(genimm(imm,&armval)) {
1216 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1217 output_w32(0xe3800000|rd_rn_rm(rt,rs,0)|armval);
1218 }else{
1219 assert(imm>0&&imm<65536);
1220 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1221 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
1222 output_w32(0xe3800000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1223 output_w32(0xe3800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1224 }
1225}
1226
1227void emit_xorimm(int rs,int imm,int rt)
1228{
57871462 1229 u_int armval;
1230 if(genimm(imm,&armval)) {
1231 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm);
1232 output_w32(0xe2200000|rd_rn_rm(rt,rs,0)|armval);
1233 }else{
514ed0d9 1234 assert(imm>0&&imm<65536);
57871462 1235 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1236 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
1237 output_w32(0xe2200000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1238 output_w32(0xe2200000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1239 }
1240}
1241
1242void emit_shlimm(int rs,u_int imm,int rt)
1243{
1244 assert(imm>0);
1245 assert(imm<32);
1246 //if(imm==1) ...
1247 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1248 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1249}
1250
1251void emit_shrimm(int rs,u_int imm,int rt)
1252{
1253 assert(imm>0);
1254 assert(imm<32);
1255 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1256 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
1257}
1258
1259void emit_sarimm(int rs,u_int imm,int rt)
1260{
1261 assert(imm>0);
1262 assert(imm<32);
1263 assem_debug("asr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1264 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x40|(imm<<7));
1265}
1266
1267void emit_rorimm(int rs,u_int imm,int rt)
1268{
1269 assert(imm>0);
1270 assert(imm<32);
1271 assem_debug("ror %s,%s,#%d\n",regname[rt],regname[rs],imm);
1272 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x60|(imm<<7));
1273}
1274
1275void emit_shldimm(int rs,int rs2,u_int imm,int rt)
1276{
1277 assem_debug("shld %%%s,%%%s,%d\n",regname[rt],regname[rs2],imm);
1278 assert(imm>0);
1279 assert(imm<32);
1280 //if(imm==1) ...
1281 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1282 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1283 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs2],32-imm);
1284 output_w32(0xe1800020|rd_rn_rm(rt,rt,rs2)|((32-imm)<<7));
1285}
1286
1287void emit_shrdimm(int rs,int rs2,u_int imm,int rt)
1288{
1289 assem_debug("shrd %%%s,%%%s,%d\n",regname[rt],regname[rs2],imm);
1290 assert(imm>0);
1291 assert(imm<32);
1292 //if(imm==1) ...
1293 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1294 output_w32(0xe1a00020|rd_rn_rm(rt,0,rs)|(imm<<7));
1295 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs2],32-imm);
1296 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs2)|((32-imm)<<7));
1297}
1298
b9b61529 1299void emit_signextend16(int rs,int rt)
1300{
1301 #ifdef ARMv5_ONLY
1302 emit_shlimm(rs,16,rt);
1303 emit_sarimm(rt,16,rt);
1304 #else
1305 assem_debug("sxth %s,%s\n",regname[rt],regname[rs]);
1306 output_w32(0xe6bf0070|rd_rn_rm(rt,0,rs));
1307 #endif
1308}
1309
57871462 1310void emit_shl(u_int rs,u_int shift,u_int rt)
1311{
1312 assert(rs<16);
1313 assert(rt<16);
1314 assert(shift<16);
1315 //if(imm==1) ...
1316 assem_debug("lsl %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1317 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x10|(shift<<8));
1318}
1319void emit_shr(u_int rs,u_int shift,u_int rt)
1320{
1321 assert(rs<16);
1322 assert(rt<16);
1323 assert(shift<16);
1324 assem_debug("lsr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1325 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x30|(shift<<8));
1326}
1327void emit_sar(u_int rs,u_int shift,u_int rt)
1328{
1329 assert(rs<16);
1330 assert(rt<16);
1331 assert(shift<16);
1332 assem_debug("asr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1333 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x50|(shift<<8));
1334}
1335void emit_shlcl(int r)
1336{
1337 assem_debug("shl %%%s,%%cl\n",regname[r]);
1338 assert(0);
1339}
1340void emit_shrcl(int r)
1341{
1342 assem_debug("shr %%%s,%%cl\n",regname[r]);
1343 assert(0);
1344}
1345void emit_sarcl(int r)
1346{
1347 assem_debug("sar %%%s,%%cl\n",regname[r]);
1348 assert(0);
1349}
1350
1351void emit_shldcl(int r1,int r2)
1352{
1353 assem_debug("shld %%%s,%%%s,%%cl\n",regname[r1],regname[r2]);
1354 assert(0);
1355}
1356void emit_shrdcl(int r1,int r2)
1357{
1358 assem_debug("shrd %%%s,%%%s,%%cl\n",regname[r1],regname[r2]);
1359 assert(0);
1360}
1361void emit_orrshl(u_int rs,u_int shift,u_int rt)
1362{
1363 assert(rs<16);
1364 assert(rt<16);
1365 assert(shift<16);
1366 assem_debug("orr %s,%s,%s,lsl %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
1367 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x10|(shift<<8));
1368}
1369void emit_orrshr(u_int rs,u_int shift,u_int rt)
1370{
1371 assert(rs<16);
1372 assert(rt<16);
1373 assert(shift<16);
1374 assem_debug("orr %s,%s,%s,lsr %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
1375 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x30|(shift<<8));
1376}
1377
1378void emit_cmpimm(int rs,int imm)
1379{
1380 u_int armval;
1381 if(genimm(imm,&armval)) {
1382 assem_debug("cmp %s,$%d\n",regname[rs],imm);
1383 output_w32(0xe3500000|rd_rn_rm(0,rs,0)|armval);
1384 }else if(genimm(-imm,&armval)) {
1385 assem_debug("cmn %s,$%d\n",regname[rs],imm);
1386 output_w32(0xe3700000|rd_rn_rm(0,rs,0)|armval);
1387 }else if(imm>0) {
1388 assert(imm<65536);
1389 #ifdef ARMv5_ONLY
1390 emit_movimm(imm,HOST_TEMPREG);
1391 #else
1392 emit_movw(imm,HOST_TEMPREG);
1393 #endif
1394 assem_debug("cmp %s,r14\n",regname[rs]);
1395 output_w32(0xe1500000|rd_rn_rm(0,rs,HOST_TEMPREG));
1396 }else{
1397 assert(imm>-65536);
1398 #ifdef ARMv5_ONLY
1399 emit_movimm(-imm,HOST_TEMPREG);
1400 #else
1401 emit_movw(-imm,HOST_TEMPREG);
1402 #endif
1403 assem_debug("cmn %s,r14\n",regname[rs]);
1404 output_w32(0xe1700000|rd_rn_rm(0,rs,HOST_TEMPREG));
1405 }
1406}
1407
1408void emit_cmovne(u_int *addr,int rt)
1409{
1410 assem_debug("cmovne %x,%%%s",(int)addr,regname[rt]);
1411 assert(0);
1412}
1413void emit_cmovl(u_int *addr,int rt)
1414{
1415 assem_debug("cmovl %x,%%%s",(int)addr,regname[rt]);
1416 assert(0);
1417}
1418void emit_cmovs(u_int *addr,int rt)
1419{
1420 assem_debug("cmovs %x,%%%s",(int)addr,regname[rt]);
1421 assert(0);
1422}
1423void emit_cmovne_imm(int imm,int rt)
1424{
1425 assem_debug("movne %s,#%d\n",regname[rt],imm);
1426 u_int armval;
1427 assert(genimm(imm,&armval));
1428 output_w32(0x13a00000|rd_rn_rm(rt,0,0)|armval);
1429}
1430void emit_cmovl_imm(int imm,int rt)
1431{
1432 assem_debug("movlt %s,#%d\n",regname[rt],imm);
1433 u_int armval;
1434 assert(genimm(imm,&armval));
1435 output_w32(0xb3a00000|rd_rn_rm(rt,0,0)|armval);
1436}
1437void emit_cmovb_imm(int imm,int rt)
1438{
1439 assem_debug("movcc %s,#%d\n",regname[rt],imm);
1440 u_int armval;
1441 assert(genimm(imm,&armval));
1442 output_w32(0x33a00000|rd_rn_rm(rt,0,0)|armval);
1443}
1444void emit_cmovs_imm(int imm,int rt)
1445{
1446 assem_debug("movmi %s,#%d\n",regname[rt],imm);
1447 u_int armval;
1448 assert(genimm(imm,&armval));
1449 output_w32(0x43a00000|rd_rn_rm(rt,0,0)|armval);
1450}
1451void emit_cmove_reg(int rs,int rt)
1452{
1453 assem_debug("moveq %s,%s\n",regname[rt],regname[rs]);
1454 output_w32(0x01a00000|rd_rn_rm(rt,0,rs));
1455}
1456void emit_cmovne_reg(int rs,int rt)
1457{
1458 assem_debug("movne %s,%s\n",regname[rt],regname[rs]);
1459 output_w32(0x11a00000|rd_rn_rm(rt,0,rs));
1460}
1461void emit_cmovl_reg(int rs,int rt)
1462{
1463 assem_debug("movlt %s,%s\n",regname[rt],regname[rs]);
1464 output_w32(0xb1a00000|rd_rn_rm(rt,0,rs));
1465}
1466void emit_cmovs_reg(int rs,int rt)
1467{
1468 assem_debug("movmi %s,%s\n",regname[rt],regname[rs]);
1469 output_w32(0x41a00000|rd_rn_rm(rt,0,rs));
1470}
1471
1472void emit_slti32(int rs,int imm,int rt)
1473{
1474 if(rs!=rt) emit_zeroreg(rt);
1475 emit_cmpimm(rs,imm);
1476 if(rs==rt) emit_movimm(0,rt);
1477 emit_cmovl_imm(1,rt);
1478}
1479void emit_sltiu32(int rs,int imm,int rt)
1480{
1481 if(rs!=rt) emit_zeroreg(rt);
1482 emit_cmpimm(rs,imm);
1483 if(rs==rt) emit_movimm(0,rt);
1484 emit_cmovb_imm(1,rt);
1485}
1486void emit_slti64_32(int rsh,int rsl,int imm,int rt)
1487{
1488 assert(rsh!=rt);
1489 emit_slti32(rsl,imm,rt);
1490 if(imm>=0)
1491 {
1492 emit_test(rsh,rsh);
1493 emit_cmovne_imm(0,rt);
1494 emit_cmovs_imm(1,rt);
1495 }
1496 else
1497 {
1498 emit_cmpimm(rsh,-1);
1499 emit_cmovne_imm(0,rt);
1500 emit_cmovl_imm(1,rt);
1501 }
1502}
1503void emit_sltiu64_32(int rsh,int rsl,int imm,int rt)
1504{
1505 assert(rsh!=rt);
1506 emit_sltiu32(rsl,imm,rt);
1507 if(imm>=0)
1508 {
1509 emit_test(rsh,rsh);
1510 emit_cmovne_imm(0,rt);
1511 }
1512 else
1513 {
1514 emit_cmpimm(rsh,-1);
1515 emit_cmovne_imm(1,rt);
1516 }
1517}
1518
1519void emit_cmp(int rs,int rt)
1520{
1521 assem_debug("cmp %s,%s\n",regname[rs],regname[rt]);
1522 output_w32(0xe1500000|rd_rn_rm(0,rs,rt));
1523}
1524void emit_set_gz32(int rs, int rt)
1525{
1526 //assem_debug("set_gz32\n");
1527 emit_cmpimm(rs,1);
1528 emit_movimm(1,rt);
1529 emit_cmovl_imm(0,rt);
1530}
1531void emit_set_nz32(int rs, int rt)
1532{
1533 //assem_debug("set_nz32\n");
1534 if(rs!=rt) emit_movs(rs,rt);
1535 else emit_test(rs,rs);
1536 emit_cmovne_imm(1,rt);
1537}
1538void emit_set_gz64_32(int rsh, int rsl, int rt)
1539{
1540 //assem_debug("set_gz64\n");
1541 emit_set_gz32(rsl,rt);
1542 emit_test(rsh,rsh);
1543 emit_cmovne_imm(1,rt);
1544 emit_cmovs_imm(0,rt);
1545}
1546void emit_set_nz64_32(int rsh, int rsl, int rt)
1547{
1548 //assem_debug("set_nz64\n");
1549 emit_or_and_set_flags(rsh,rsl,rt);
1550 emit_cmovne_imm(1,rt);
1551}
1552void emit_set_if_less32(int rs1, int rs2, int rt)
1553{
1554 //assem_debug("set if less (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1555 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1556 emit_cmp(rs1,rs2);
1557 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1558 emit_cmovl_imm(1,rt);
1559}
1560void emit_set_if_carry32(int rs1, int rs2, int rt)
1561{
1562 //assem_debug("set if carry (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1563 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1564 emit_cmp(rs1,rs2);
1565 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1566 emit_cmovb_imm(1,rt);
1567}
1568void emit_set_if_less64_32(int u1, int l1, int u2, int l2, int rt)
1569{
1570 //assem_debug("set if less64 (%%%s,%%%s,%%%s,%%%s),%%%s\n",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]);
1571 assert(u1!=rt);
1572 assert(u2!=rt);
1573 emit_cmp(l1,l2);
1574 emit_movimm(0,rt);
1575 emit_sbcs(u1,u2,HOST_TEMPREG);
1576 emit_cmovl_imm(1,rt);
1577}
1578void emit_set_if_carry64_32(int u1, int l1, int u2, int l2, int rt)
1579{
1580 //assem_debug("set if carry64 (%%%s,%%%s,%%%s,%%%s),%%%s\n",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]);
1581 assert(u1!=rt);
1582 assert(u2!=rt);
1583 emit_cmp(l1,l2);
1584 emit_movimm(0,rt);
1585 emit_sbcs(u1,u2,HOST_TEMPREG);
1586 emit_cmovb_imm(1,rt);
1587}
1588
1589void emit_call(int a)
1590{
1591 assem_debug("bl %x (%x+%x)\n",a,(int)out,a-(int)out-8);
1592 u_int offset=genjmp(a);
1593 output_w32(0xeb000000|offset);
1594}
1595void emit_jmp(int a)
1596{
1597 assem_debug("b %x (%x+%x)\n",a,(int)out,a-(int)out-8);
1598 u_int offset=genjmp(a);
1599 output_w32(0xea000000|offset);
1600}
1601void emit_jne(int a)
1602{
1603 assem_debug("bne %x\n",a);
1604 u_int offset=genjmp(a);
1605 output_w32(0x1a000000|offset);
1606}
1607void emit_jeq(int a)
1608{
1609 assem_debug("beq %x\n",a);
1610 u_int offset=genjmp(a);
1611 output_w32(0x0a000000|offset);
1612}
1613void emit_js(int a)
1614{
1615 assem_debug("bmi %x\n",a);
1616 u_int offset=genjmp(a);
1617 output_w32(0x4a000000|offset);
1618}
1619void emit_jns(int a)
1620{
1621 assem_debug("bpl %x\n",a);
1622 u_int offset=genjmp(a);
1623 output_w32(0x5a000000|offset);
1624}
1625void emit_jl(int a)
1626{
1627 assem_debug("blt %x\n",a);
1628 u_int offset=genjmp(a);
1629 output_w32(0xba000000|offset);
1630}
1631void emit_jge(int a)
1632{
1633 assem_debug("bge %x\n",a);
1634 u_int offset=genjmp(a);
1635 output_w32(0xaa000000|offset);
1636}
1637void emit_jno(int a)
1638{
1639 assem_debug("bvc %x\n",a);
1640 u_int offset=genjmp(a);
1641 output_w32(0x7a000000|offset);
1642}
1643void emit_jc(int a)
1644{
1645 assem_debug("bcs %x\n",a);
1646 u_int offset=genjmp(a);
1647 output_w32(0x2a000000|offset);
1648}
1649void emit_jcc(int a)
1650{
1651 assem_debug("bcc %x\n",a);
1652 u_int offset=genjmp(a);
1653 output_w32(0x3a000000|offset);
1654}
1655
1656void emit_pushimm(int imm)
1657{
1658 assem_debug("push $%x\n",imm);
1659 assert(0);
1660}
1661void emit_pusha()
1662{
1663 assem_debug("pusha\n");
1664 assert(0);
1665}
1666void emit_popa()
1667{
1668 assem_debug("popa\n");
1669 assert(0);
1670}
1671void emit_pushreg(u_int r)
1672{
1673 assem_debug("push %%%s\n",regname[r]);
1674 assert(0);
1675}
1676void emit_popreg(u_int r)
1677{
1678 assem_debug("pop %%%s\n",regname[r]);
1679 assert(0);
1680}
1681void emit_callreg(u_int r)
1682{
1683 assem_debug("call *%%%s\n",regname[r]);
1684 assert(0);
1685}
1686void emit_jmpreg(u_int r)
1687{
1688 assem_debug("mov pc,%s\n",regname[r]);
1689 output_w32(0xe1a00000|rd_rn_rm(15,0,r));
1690}
1691
1692void emit_readword_indexed(int offset, int rs, int rt)
1693{
1694 assert(offset>-4096&&offset<4096);
1695 assem_debug("ldr %s,%s+%d\n",regname[rt],regname[rs],offset);
1696 if(offset>=0) {
1697 output_w32(0xe5900000|rd_rn_rm(rt,rs,0)|offset);
1698 }else{
1699 output_w32(0xe5100000|rd_rn_rm(rt,rs,0)|(-offset));
1700 }
1701}
1702void emit_readword_dualindexedx4(int rs1, int rs2, int rt)
1703{
1704 assem_debug("ldr %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1705 output_w32(0xe7900000|rd_rn_rm(rt,rs1,rs2)|0x100);
1706}
1707void emit_readword_indexed_tlb(int addr, int rs, int map, int rt)
1708{
1709 if(map<0) emit_readword_indexed(addr, rs, rt);
1710 else {
1711 assert(addr==0);
1712 emit_readword_dualindexedx4(rs, map, rt);
1713 }
1714}
1715void emit_readdword_indexed_tlb(int addr, int rs, int map, int rh, int rl)
1716{
1717 if(map<0) {
1718 if(rh>=0) emit_readword_indexed(addr, rs, rh);
1719 emit_readword_indexed(addr+4, rs, rl);
1720 }else{
1721 assert(rh!=rs);
1722 if(rh>=0) emit_readword_indexed_tlb(addr, rs, map, rh);
1723 emit_addimm(map,1,map);
1724 emit_readword_indexed_tlb(addr, rs, map, rl);
1725 }
1726}
1727void emit_movsbl_indexed(int offset, int rs, int rt)
1728{
1729 assert(offset>-256&&offset<256);
1730 assem_debug("ldrsb %s,%s+%d\n",regname[rt],regname[rs],offset);
1731 if(offset>=0) {
1732 output_w32(0xe1d000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1733 }else{
1734 output_w32(0xe15000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1735 }
1736}
1737void emit_movsbl_indexed_tlb(int addr, int rs, int map, int rt)
1738{
1739 if(map<0) emit_movsbl_indexed(addr, rs, rt);
1740 else {
1741 if(addr==0) {
1742 emit_shlimm(map,2,map);
1743 assem_debug("ldrsb %s,%s+%s\n",regname[rt],regname[rs],regname[map]);
1744 output_w32(0xe19000d0|rd_rn_rm(rt,rs,map));
1745 }else{
1746 assert(addr>-256&&addr<256);
1747 assem_debug("add %s,%s,%s,lsl #2\n",regname[rt],regname[rs],regname[map]);
1748 output_w32(0xe0800000|rd_rn_rm(rt,rs,map)|(2<<7));
1749 emit_movsbl_indexed(addr, rt, rt);
1750 }
1751 }
1752}
1753void emit_movswl_indexed(int offset, int rs, int rt)
1754{
1755 assert(offset>-256&&offset<256);
1756 assem_debug("ldrsh %s,%s+%d\n",regname[rt],regname[rs],offset);
1757 if(offset>=0) {
1758 output_w32(0xe1d000f0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1759 }else{
1760 output_w32(0xe15000f0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1761 }
1762}
1763void emit_movzbl_indexed(int offset, int rs, int rt)
1764{
1765 assert(offset>-4096&&offset<4096);
1766 assem_debug("ldrb %s,%s+%d\n",regname[rt],regname[rs],offset);
1767 if(offset>=0) {
1768 output_w32(0xe5d00000|rd_rn_rm(rt,rs,0)|offset);
1769 }else{
1770 output_w32(0xe5500000|rd_rn_rm(rt,rs,0)|(-offset));
1771 }
1772}
1773void emit_movzbl_dualindexedx4(int rs1, int rs2, int rt)
1774{
1775 assem_debug("ldrb %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1776 output_w32(0xe7d00000|rd_rn_rm(rt,rs1,rs2)|0x100);
1777}
1778void emit_movzbl_indexed_tlb(int addr, int rs, int map, int rt)
1779{
1780 if(map<0) emit_movzbl_indexed(addr, rs, rt);
1781 else {
1782 if(addr==0) {
1783 emit_movzbl_dualindexedx4(rs, map, rt);
1784 }else{
1785 emit_addimm(rs,addr,rt);
1786 emit_movzbl_dualindexedx4(rt, map, rt);
1787 }
1788 }
1789}
1790void emit_movzwl_indexed(int offset, int rs, int rt)
1791{
1792 assert(offset>-256&&offset<256);
1793 assem_debug("ldrh %s,%s+%d\n",regname[rt],regname[rs],offset);
1794 if(offset>=0) {
1795 output_w32(0xe1d000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1796 }else{
1797 output_w32(0xe15000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1798 }
1799}
1800void emit_readword(int addr, int rt)
1801{
1802 u_int offset = addr-(u_int)&dynarec_local;
1803 assert(offset<4096);
1804 assem_debug("ldr %s,fp+%d\n",regname[rt],offset);
1805 output_w32(0xe5900000|rd_rn_rm(rt,FP,0)|offset);
1806}
1807void emit_movsbl(int addr, int rt)
1808{
1809 u_int offset = addr-(u_int)&dynarec_local;
1810 assert(offset<256);
1811 assem_debug("ldrsb %s,fp+%d\n",regname[rt],offset);
1812 output_w32(0xe1d000d0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1813}
1814void emit_movswl(int addr, int rt)
1815{
1816 u_int offset = addr-(u_int)&dynarec_local;
1817 assert(offset<256);
1818 assem_debug("ldrsh %s,fp+%d\n",regname[rt],offset);
1819 output_w32(0xe1d000f0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1820}
1821void emit_movzbl(int addr, int rt)
1822{
1823 u_int offset = addr-(u_int)&dynarec_local;
1824 assert(offset<4096);
1825 assem_debug("ldrb %s,fp+%d\n",regname[rt],offset);
1826 output_w32(0xe5d00000|rd_rn_rm(rt,FP,0)|offset);
1827}
1828void emit_movzwl(int addr, int rt)
1829{
1830 u_int offset = addr-(u_int)&dynarec_local;
1831 assert(offset<256);
1832 assem_debug("ldrh %s,fp+%d\n",regname[rt],offset);
1833 output_w32(0xe1d000b0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1834}
1835void emit_movzwl_reg(int rs, int rt)
1836{
1837 assem_debug("movzwl %%%s,%%%s\n",regname[rs]+1,regname[rt]);
1838 assert(0);
1839}
1840
1841void emit_xchg(int rs, int rt)
1842{
1843 assem_debug("xchg %%%s,%%%s\n",regname[rs],regname[rt]);
1844 assert(0);
1845}
1846void emit_writeword_indexed(int rt, int offset, int rs)
1847{
1848 assert(offset>-4096&&offset<4096);
1849 assem_debug("str %s,%s+%d\n",regname[rt],regname[rs],offset);
1850 if(offset>=0) {
1851 output_w32(0xe5800000|rd_rn_rm(rt,rs,0)|offset);
1852 }else{
1853 output_w32(0xe5000000|rd_rn_rm(rt,rs,0)|(-offset));
1854 }
1855}
1856void emit_writeword_dualindexedx4(int rt, int rs1, int rs2)
1857{
1858 assem_debug("str %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1859 output_w32(0xe7800000|rd_rn_rm(rt,rs1,rs2)|0x100);
1860}
1861void emit_writeword_indexed_tlb(int rt, int addr, int rs, int map, int temp)
1862{
1863 if(map<0) emit_writeword_indexed(rt, addr, rs);
1864 else {
1865 assert(addr==0);
1866 emit_writeword_dualindexedx4(rt, rs, map);
1867 }
1868}
1869void emit_writedword_indexed_tlb(int rh, int rl, int addr, int rs, int map, int temp)
1870{
1871 if(map<0) {
1872 if(rh>=0) emit_writeword_indexed(rh, addr, rs);
1873 emit_writeword_indexed(rl, addr+4, rs);
1874 }else{
1875 assert(rh>=0);
1876 if(temp!=rs) emit_addimm(map,1,temp);
1877 emit_writeword_indexed_tlb(rh, addr, rs, map, temp);
1878 if(temp!=rs) emit_writeword_indexed_tlb(rl, addr, rs, temp, temp);
1879 else {
1880 emit_addimm(rs,4,rs);
1881 emit_writeword_indexed_tlb(rl, addr, rs, map, temp);
1882 }
1883 }
1884}
1885void emit_writehword_indexed(int rt, int offset, int rs)
1886{
1887 assert(offset>-256&&offset<256);
1888 assem_debug("strh %s,%s+%d\n",regname[rt],regname[rs],offset);
1889 if(offset>=0) {
1890 output_w32(0xe1c000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1891 }else{
1892 output_w32(0xe14000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1893 }
1894}
1895void emit_writebyte_indexed(int rt, int offset, int rs)
1896{
1897 assert(offset>-4096&&offset<4096);
1898 assem_debug("strb %s,%s+%d\n",regname[rt],regname[rs],offset);
1899 if(offset>=0) {
1900 output_w32(0xe5c00000|rd_rn_rm(rt,rs,0)|offset);
1901 }else{
1902 output_w32(0xe5400000|rd_rn_rm(rt,rs,0)|(-offset));
1903 }
1904}
1905void emit_writebyte_dualindexedx4(int rt, int rs1, int rs2)
1906{
1907 assem_debug("strb %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1908 output_w32(0xe7c00000|rd_rn_rm(rt,rs1,rs2)|0x100);
1909}
1910void emit_writebyte_indexed_tlb(int rt, int addr, int rs, int map, int temp)
1911{
1912 if(map<0) emit_writebyte_indexed(rt, addr, rs);
1913 else {
1914 if(addr==0) {
1915 emit_writebyte_dualindexedx4(rt, rs, map);
1916 }else{
1917 emit_addimm(rs,addr,temp);
1918 emit_writebyte_dualindexedx4(rt, temp, map);
1919 }
1920 }
1921}
1922void emit_writeword(int rt, int addr)
1923{
1924 u_int offset = addr-(u_int)&dynarec_local;
1925 assert(offset<4096);
1926 assem_debug("str %s,fp+%d\n",regname[rt],offset);
1927 output_w32(0xe5800000|rd_rn_rm(rt,FP,0)|offset);
1928}
1929void emit_writehword(int rt, int addr)
1930{
1931 u_int offset = addr-(u_int)&dynarec_local;
1932 assert(offset<256);
1933 assem_debug("strh %s,fp+%d\n",regname[rt],offset);
1934 output_w32(0xe1c000b0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1935}
1936void emit_writebyte(int rt, int addr)
1937{
1938 u_int offset = addr-(u_int)&dynarec_local;
1939 assert(offset<4096);
1940 assem_debug("str %s,fp+%d\n",regname[rt],offset);
1941 output_w32(0xe5c00000|rd_rn_rm(rt,FP,0)|offset);
1942}
1943void emit_writeword_imm(int imm, int addr)
1944{
1945 assem_debug("movl $%x,%x\n",imm,addr);
1946 assert(0);
1947}
1948void emit_writebyte_imm(int imm, int addr)
1949{
1950 assem_debug("movb $%x,%x\n",imm,addr);
1951 assert(0);
1952}
1953
1954void emit_mul(int rs)
1955{
1956 assem_debug("mul %%%s\n",regname[rs]);
1957 assert(0);
1958}
1959void emit_imul(int rs)
1960{
1961 assem_debug("imul %%%s\n",regname[rs]);
1962 assert(0);
1963}
1964void emit_umull(u_int rs1, u_int rs2, u_int hi, u_int lo)
1965{
1966 assem_debug("umull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
1967 assert(rs1<16);
1968 assert(rs2<16);
1969 assert(hi<16);
1970 assert(lo<16);
1971 output_w32(0xe0800090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
1972}
1973void emit_smull(u_int rs1, u_int rs2, u_int hi, u_int lo)
1974{
1975 assem_debug("smull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
1976 assert(rs1<16);
1977 assert(rs2<16);
1978 assert(hi<16);
1979 assert(lo<16);
1980 output_w32(0xe0c00090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
1981}
1982
1983void emit_div(int rs)
1984{
1985 assem_debug("div %%%s\n",regname[rs]);
1986 assert(0);
1987}
1988void emit_idiv(int rs)
1989{
1990 assem_debug("idiv %%%s\n",regname[rs]);
1991 assert(0);
1992}
1993void emit_cdq()
1994{
1995 assem_debug("cdq\n");
1996 assert(0);
1997}
1998
1999void emit_clz(int rs,int rt)
2000{
2001 assem_debug("clz %s,%s\n",regname[rt],regname[rs]);
2002 output_w32(0xe16f0f10|rd_rn_rm(rt,0,rs));
2003}
2004
2005void emit_subcs(int rs1,int rs2,int rt)
2006{
2007 assem_debug("subcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2008 output_w32(0x20400000|rd_rn_rm(rt,rs1,rs2));
2009}
2010
2011void emit_shrcc_imm(int rs,u_int imm,int rt)
2012{
2013 assert(imm>0);
2014 assert(imm<32);
2015 assem_debug("lsrcc %s,%s,#%d\n",regname[rt],regname[rs],imm);
2016 output_w32(0x31a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
2017}
2018
2019void emit_negmi(int rs, int rt)
2020{
2021 assem_debug("rsbmi %s,%s,#0\n",regname[rt],regname[rs]);
2022 output_w32(0x42600000|rd_rn_rm(rt,rs,0));
2023}
2024
2025void emit_negsmi(int rs, int rt)
2026{
2027 assem_debug("rsbsmi %s,%s,#0\n",regname[rt],regname[rs]);
2028 output_w32(0x42700000|rd_rn_rm(rt,rs,0));
2029}
2030
2031void emit_orreq(u_int rs1,u_int rs2,u_int rt)
2032{
2033 assem_debug("orreq %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2034 output_w32(0x01800000|rd_rn_rm(rt,rs1,rs2));
2035}
2036
2037void emit_orrne(u_int rs1,u_int rs2,u_int rt)
2038{
2039 assem_debug("orrne %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2040 output_w32(0x11800000|rd_rn_rm(rt,rs1,rs2));
2041}
2042
2043void emit_bic_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2044{
2045 assem_debug("bic %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2046 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2047}
2048
2049void emit_biceq_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2050{
2051 assem_debug("biceq %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2052 output_w32(0x01C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2053}
2054
2055void emit_bicne_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2056{
2057 assem_debug("bicne %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2058 output_w32(0x11C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2059}
2060
2061void emit_bic_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2062{
2063 assem_debug("bic %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2064 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2065}
2066
2067void emit_biceq_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2068{
2069 assem_debug("biceq %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2070 output_w32(0x01C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2071}
2072
2073void emit_bicne_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2074{
2075 assem_debug("bicne %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2076 output_w32(0x11C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2077}
2078
2079void emit_teq(int rs, int rt)
2080{
2081 assem_debug("teq %s,%s\n",regname[rs],regname[rt]);
2082 output_w32(0xe1300000|rd_rn_rm(0,rs,rt));
2083}
2084
2085void emit_rsbimm(int rs, int imm, int rt)
2086{
2087 u_int armval;
2088 assert(genimm(imm,&armval));
2089 assem_debug("rsb %s,%s,#%d\n",regname[rt],regname[rs],imm);
2090 output_w32(0xe2600000|rd_rn_rm(rt,rs,0)|armval);
2091}
2092
2093// Load 2 immediates optimizing for small code size
2094void emit_mov2imm_compact(int imm1,u_int rt1,int imm2,u_int rt2)
2095{
2096 emit_movimm(imm1,rt1);
2097 u_int armval;
2098 if(genimm(imm2-imm1,&armval)) {
2099 assem_debug("add %s,%s,#%d\n",regname[rt2],regname[rt1],imm2-imm1);
2100 output_w32(0xe2800000|rd_rn_rm(rt2,rt1,0)|armval);
2101 }else if(genimm(imm1-imm2,&armval)) {
2102 assem_debug("sub %s,%s,#%d\n",regname[rt2],regname[rt1],imm1-imm2);
2103 output_w32(0xe2400000|rd_rn_rm(rt2,rt1,0)|armval);
2104 }
2105 else emit_movimm(imm2,rt2);
2106}
2107
2108// Conditionally select one of two immediates, optimizing for small code size
2109// This will only be called if HAVE_CMOV_IMM is defined
2110void emit_cmov2imm_e_ne_compact(int imm1,int imm2,u_int rt)
2111{
2112 u_int armval;
2113 if(genimm(imm2-imm1,&armval)) {
2114 emit_movimm(imm1,rt);
2115 assem_debug("addne %s,%s,#%d\n",regname[rt],regname[rt],imm2-imm1);
2116 output_w32(0x12800000|rd_rn_rm(rt,rt,0)|armval);
2117 }else if(genimm(imm1-imm2,&armval)) {
2118 emit_movimm(imm1,rt);
2119 assem_debug("subne %s,%s,#%d\n",regname[rt],regname[rt],imm1-imm2);
2120 output_w32(0x12400000|rd_rn_rm(rt,rt,0)|armval);
2121 }
2122 else {
2123 #ifdef ARMv5_ONLY
2124 emit_movimm(imm1,rt);
2125 add_literal((int)out,imm2);
2126 assem_debug("ldrne %s,pc+? [=%x]\n",regname[rt],imm2);
2127 output_w32(0x15900000|rd_rn_rm(rt,15,0));
2128 #else
2129 emit_movw(imm1&0x0000FFFF,rt);
2130 if((imm1&0xFFFF)!=(imm2&0xFFFF)) {
2131 assem_debug("movwne %s,#%d (0x%x)\n",regname[rt],imm2&0xFFFF,imm2&0xFFFF);
2132 output_w32(0x13000000|rd_rn_rm(rt,0,0)|(imm2&0xfff)|((imm2<<4)&0xf0000));
2133 }
2134 emit_movt(imm1&0xFFFF0000,rt);
2135 if((imm1&0xFFFF0000)!=(imm2&0xFFFF0000)) {
2136 assem_debug("movtne %s,#%d (0x%x)\n",regname[rt],imm2&0xffff0000,imm2&0xffff0000);
2137 output_w32(0x13400000|rd_rn_rm(rt,0,0)|((imm2>>16)&0xfff)|((imm2>>12)&0xf0000));
2138 }
2139 #endif
2140 }
2141}
2142
2143// special case for checking invalid_code
2144void emit_cmpmem_indexedsr12_imm(int addr,int r,int imm)
2145{
2146 assert(0);
2147}
2148
2149// special case for checking invalid_code
2150void emit_cmpmem_indexedsr12_reg(int base,int r,int imm)
2151{
2152 assert(imm<128&&imm>=0);
2153 assert(r>=0&&r<16);
2154 assem_debug("ldrb lr,%s,%s lsr #12\n",regname[base],regname[r]);
2155 output_w32(0xe7d00000|rd_rn_rm(HOST_TEMPREG,base,r)|0x620);
2156 emit_cmpimm(HOST_TEMPREG,imm);
2157}
2158
2159// special case for tlb mapping
2160void emit_addsr12(int rs1,int rs2,int rt)
2161{
2162 assem_debug("add %s,%s,%s lsr #12\n",regname[rt],regname[rs1],regname[rs2]);
2163 output_w32(0xe0800620|rd_rn_rm(rt,rs1,rs2));
2164}
2165
2166// Used to preload hash table entries
2167void emit_prefetch(void *addr)
2168{
2169 assem_debug("prefetch %x\n",(int)addr);
2170 output_byte(0x0F);
2171 output_byte(0x18);
2172 output_modrm(0,5,1);
2173 output_w32((int)addr);
2174}
2175void emit_prefetchreg(int r)
2176{
2177 assem_debug("pld %s\n",regname[r]);
2178 output_w32(0xf5d0f000|rd_rn_rm(0,r,0));
2179}
2180
2181// Special case for mini_ht
2182void emit_ldreq_indexed(int rs, u_int offset, int rt)
2183{
2184 assert(offset<4096);
2185 assem_debug("ldreq %s,[%s, #%d]\n",regname[rt],regname[rs],offset);
2186 output_w32(0x05900000|rd_rn_rm(rt,rs,0)|offset);
2187}
2188
2189void emit_flds(int r,int sr)
2190{
2191 assem_debug("flds s%d,[%s]\n",sr,regname[r]);
2192 output_w32(0xed900a00|((sr&14)<<11)|((sr&1)<<22)|(r<<16));
2193}
2194
2195void emit_vldr(int r,int vr)
2196{
2197 assem_debug("vldr d%d,[%s]\n",vr,regname[r]);
2198 output_w32(0xed900b00|(vr<<12)|(r<<16));
2199}
2200
2201void emit_fsts(int sr,int r)
2202{
2203 assem_debug("fsts s%d,[%s]\n",sr,regname[r]);
2204 output_w32(0xed800a00|((sr&14)<<11)|((sr&1)<<22)|(r<<16));
2205}
2206
2207void emit_vstr(int vr,int r)
2208{
2209 assem_debug("vstr d%d,[%s]\n",vr,regname[r]);
2210 output_w32(0xed800b00|(vr<<12)|(r<<16));
2211}
2212
2213void emit_ftosizs(int s,int d)
2214{
2215 assem_debug("ftosizs s%d,s%d\n",d,s);
2216 output_w32(0xeebd0ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2217}
2218
2219void emit_ftosizd(int s,int d)
2220{
2221 assem_debug("ftosizd s%d,d%d\n",d,s);
2222 output_w32(0xeebd0bc0|((d&14)<<11)|((d&1)<<22)|(s&7));
2223}
2224
2225void emit_fsitos(int s,int d)
2226{
2227 assem_debug("fsitos s%d,s%d\n",d,s);
2228 output_w32(0xeeb80ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2229}
2230
2231void emit_fsitod(int s,int d)
2232{
2233 assem_debug("fsitod d%d,s%d\n",d,s);
2234 output_w32(0xeeb80bc0|((d&7)<<12)|((s&14)>>1)|((s&1)<<5));
2235}
2236
2237void emit_fcvtds(int s,int d)
2238{
2239 assem_debug("fcvtds d%d,s%d\n",d,s);
2240 output_w32(0xeeb70ac0|((d&7)<<12)|((s&14)>>1)|((s&1)<<5));
2241}
2242
2243void emit_fcvtsd(int s,int d)
2244{
2245 assem_debug("fcvtsd s%d,d%d\n",d,s);
2246 output_w32(0xeeb70bc0|((d&14)<<11)|((d&1)<<22)|(s&7));
2247}
2248
2249void emit_fsqrts(int s,int d)
2250{
2251 assem_debug("fsqrts d%d,s%d\n",d,s);
2252 output_w32(0xeeb10ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2253}
2254
2255void emit_fsqrtd(int s,int d)
2256{
2257 assem_debug("fsqrtd s%d,d%d\n",d,s);
2258 output_w32(0xeeb10bc0|((d&7)<<12)|(s&7));
2259}
2260
2261void emit_fabss(int s,int d)
2262{
2263 assem_debug("fabss d%d,s%d\n",d,s);
2264 output_w32(0xeeb00ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2265}
2266
2267void emit_fabsd(int s,int d)
2268{
2269 assem_debug("fabsd s%d,d%d\n",d,s);
2270 output_w32(0xeeb00bc0|((d&7)<<12)|(s&7));
2271}
2272
2273void emit_fnegs(int s,int d)
2274{
2275 assem_debug("fnegs d%d,s%d\n",d,s);
2276 output_w32(0xeeb10a40|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2277}
2278
2279void emit_fnegd(int s,int d)
2280{
2281 assem_debug("fnegd s%d,d%d\n",d,s);
2282 output_w32(0xeeb10b40|((d&7)<<12)|(s&7));
2283}
2284
2285void emit_fadds(int s1,int s2,int d)
2286{
2287 assem_debug("fadds s%d,s%d,s%d\n",d,s1,s2);
2288 output_w32(0xee300a00|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2289}
2290
2291void emit_faddd(int s1,int s2,int d)
2292{
2293 assem_debug("faddd d%d,d%d,d%d\n",d,s1,s2);
2294 output_w32(0xee300b00|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2295}
2296
2297void emit_fsubs(int s1,int s2,int d)
2298{
2299 assem_debug("fsubs s%d,s%d,s%d\n",d,s1,s2);
2300 output_w32(0xee300a40|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2301}
2302
2303void emit_fsubd(int s1,int s2,int d)
2304{
2305 assem_debug("fsubd d%d,d%d,d%d\n",d,s1,s2);
2306 output_w32(0xee300b40|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2307}
2308
2309void emit_fmuls(int s1,int s2,int d)
2310{
2311 assem_debug("fmuls s%d,s%d,s%d\n",d,s1,s2);
2312 output_w32(0xee200a00|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2313}
2314
2315void emit_fmuld(int s1,int s2,int d)
2316{
2317 assem_debug("fmuld d%d,d%d,d%d\n",d,s1,s2);
2318 output_w32(0xee200b00|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2319}
2320
2321void emit_fdivs(int s1,int s2,int d)
2322{
2323 assem_debug("fdivs s%d,s%d,s%d\n",d,s1,s2);
2324 output_w32(0xee800a00|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2325}
2326
2327void emit_fdivd(int s1,int s2,int d)
2328{
2329 assem_debug("fdivd d%d,d%d,d%d\n",d,s1,s2);
2330 output_w32(0xee800b00|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2331}
2332
2333void emit_fcmps(int x,int y)
2334{
2335 assem_debug("fcmps s14, s15\n");
2336 output_w32(0xeeb47a67);
2337}
2338
2339void emit_fcmpd(int x,int y)
2340{
2341 assem_debug("fcmpd d6, d7\n");
2342 output_w32(0xeeb46b47);
2343}
2344
2345void emit_fmstat()
2346{
2347 assem_debug("fmstat\n");
2348 output_w32(0xeef1fa10);
2349}
2350
2351void emit_bicne_imm(int rs,int imm,int rt)
2352{
2353 u_int armval;
2354 assert(genimm(imm,&armval));
2355 assem_debug("bicne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2356 output_w32(0x13c00000|rd_rn_rm(rt,rs,0)|armval);
2357}
2358
2359void emit_biccs_imm(int rs,int imm,int rt)
2360{
2361 u_int armval;
2362 assert(genimm(imm,&armval));
2363 assem_debug("biccs %s,%s,#%d\n",regname[rt],regname[rs],imm);
2364 output_w32(0x23c00000|rd_rn_rm(rt,rs,0)|armval);
2365}
2366
2367void emit_bicvc_imm(int rs,int imm,int rt)
2368{
2369 u_int armval;
2370 assert(genimm(imm,&armval));
2371 assem_debug("bicvc %s,%s,#%d\n",regname[rt],regname[rs],imm);
2372 output_w32(0x73c00000|rd_rn_rm(rt,rs,0)|armval);
2373}
2374
2375void emit_bichi_imm(int rs,int imm,int rt)
2376{
2377 u_int armval;
2378 assert(genimm(imm,&armval));
2379 assem_debug("bichi %s,%s,#%d\n",regname[rt],regname[rs],imm);
2380 output_w32(0x83c00000|rd_rn_rm(rt,rs,0)|armval);
2381}
2382
2383void emit_orrvs_imm(int rs,int imm,int rt)
2384{
2385 u_int armval;
2386 assert(genimm(imm,&armval));
2387 assem_debug("orrvs %s,%s,#%d\n",regname[rt],regname[rs],imm);
2388 output_w32(0x63800000|rd_rn_rm(rt,rs,0)|armval);
2389}
2390
b9b61529 2391void emit_orrne_imm(int rs,int imm,int rt)
2392{
2393 u_int armval;
2394 assert(genimm(imm,&armval));
2395 assem_debug("orrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2396 output_w32(0x13800000|rd_rn_rm(rt,rs,0)|armval);
2397}
2398
2399void emit_andne_imm(int rs,int imm,int rt)
2400{
2401 u_int armval;
2402 assert(genimm(imm,&armval));
2403 assem_debug("andne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2404 output_w32(0x12000000|rd_rn_rm(rt,rs,0)|armval);
2405}
2406
57871462 2407void emit_jno_unlikely(int a)
2408{
2409 //emit_jno(a);
2410 assem_debug("addvc pc,pc,#? (%x)\n",/*a-(int)out-8,*/a);
2411 output_w32(0x72800000|rd_rn_rm(15,15,0));
2412}
2413
2414// Save registers before function call
2415void save_regs(u_int reglist)
2416{
2417 reglist&=0x100f; // only save the caller-save registers, r0-r3, r12
2418 if(!reglist) return;
2419 assem_debug("stmia fp,{");
2420 if(reglist&1) assem_debug("r0, ");
2421 if(reglist&2) assem_debug("r1, ");
2422 if(reglist&4) assem_debug("r2, ");
2423 if(reglist&8) assem_debug("r3, ");
2424 if(reglist&0x1000) assem_debug("r12");
2425 assem_debug("}\n");
2426 output_w32(0xe88b0000|reglist);
2427}
2428// Restore registers after function call
2429void restore_regs(u_int reglist)
2430{
2431 reglist&=0x100f; // only restore the caller-save registers, r0-r3, r12
2432 if(!reglist) return;
2433 assem_debug("ldmia fp,{");
2434 if(reglist&1) assem_debug("r0, ");
2435 if(reglist&2) assem_debug("r1, ");
2436 if(reglist&4) assem_debug("r2, ");
2437 if(reglist&8) assem_debug("r3, ");
2438 if(reglist&0x1000) assem_debug("r12");
2439 assem_debug("}\n");
2440 output_w32(0xe89b0000|reglist);
2441}
2442
2443// Write back consts using r14 so we don't disturb the other registers
2444void wb_consts(signed char i_regmap[],uint64_t i_is32,u_int i_dirty,int i)
2445{
2446 int hr;
2447 for(hr=0;hr<HOST_REGS;hr++) {
2448 if(hr!=EXCLUDE_REG&&i_regmap[hr]>=0&&((i_dirty>>hr)&1)) {
2449 if(((regs[i].isconst>>hr)&1)&&i_regmap[hr]>0) {
2450 if(i_regmap[hr]<64 || !((i_is32>>(i_regmap[hr]&63))&1) ) {
2451 int value=constmap[i][hr];
2452 if(value==0) {
2453 emit_zeroreg(HOST_TEMPREG);
2454 }
2455 else {
2456 emit_movimm(value,HOST_TEMPREG);
2457 }
2458 emit_storereg(i_regmap[hr],HOST_TEMPREG);
24385cae 2459#ifndef FORCE32
57871462 2460 if((i_is32>>i_regmap[hr])&1) {
2461 if(value!=-1&&value!=0) emit_sarimm(HOST_TEMPREG,31,HOST_TEMPREG);
2462 emit_storereg(i_regmap[hr]|64,HOST_TEMPREG);
2463 }
24385cae 2464#endif
57871462 2465 }
2466 }
2467 }
2468 }
2469}
2470
2471/* Stubs/epilogue */
2472
2473void literal_pool(int n)
2474{
2475 if(!literalcount) return;
2476 if(n) {
2477 if((int)out-literals[0][0]<4096-n) return;
2478 }
2479 u_int *ptr;
2480 int i;
2481 for(i=0;i<literalcount;i++)
2482 {
2483 ptr=(u_int *)literals[i][0];
2484 u_int offset=(u_int)out-(u_int)ptr-8;
2485 assert(offset<4096);
2486 assert(!(offset&3));
2487 *ptr|=offset;
2488 output_w32(literals[i][1]);
2489 }
2490 literalcount=0;
2491}
2492
2493void literal_pool_jumpover(int n)
2494{
2495 if(!literalcount) return;
2496 if(n) {
2497 if((int)out-literals[0][0]<4096-n) return;
2498 }
2499 int jaddr=(int)out;
2500 emit_jmp(0);
2501 literal_pool(0);
2502 set_jump_target(jaddr,(int)out);
2503}
2504
2505emit_extjump2(int addr, int target, int linker)
2506{
2507 u_char *ptr=(u_char *)addr;
2508 assert((ptr[3]&0x0e)==0xa);
2509 emit_loadlp(target,0);
2510 emit_loadlp(addr,1);
24385cae 2511 assert(addr>=BASE_ADDR&&addr<(BASE_ADDR+(1<<TARGET_SIZE_2)));
57871462 2512 //assert((target>=0x80000000&&target<0x80800000)||(target>0xA4000000&&target<0xA4001000));
2513//DEBUG >
2514#ifdef DEBUG_CYCLE_COUNT
2515 emit_readword((int)&last_count,ECX);
2516 emit_add(HOST_CCREG,ECX,HOST_CCREG);
2517 emit_readword((int)&next_interupt,ECX);
2518 emit_writeword(HOST_CCREG,(int)&Count);
2519 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
2520 emit_writeword(ECX,(int)&last_count);
2521#endif
2522//DEBUG <
2523 emit_jmp(linker);
2524}
2525
2526emit_extjump(int addr, int target)
2527{
2528 emit_extjump2(addr, target, (int)dyna_linker);
2529}
2530emit_extjump_ds(int addr, int target)
2531{
2532 emit_extjump2(addr, target, (int)dyna_linker_ds);
2533}
2534
2535do_readstub(int n)
2536{
2537 assem_debug("do_readstub %x\n",start+stubs[n][3]*4);
2538 literal_pool(256);
2539 set_jump_target(stubs[n][1],(int)out);
2540 int type=stubs[n][0];
2541 int i=stubs[n][3];
2542 int rs=stubs[n][4];
2543 struct regstat *i_regs=(struct regstat *)stubs[n][5];
2544 u_int reglist=stubs[n][7];
2545 signed char *i_regmap=i_regs->regmap;
2546 int addr=get_reg(i_regmap,AGEN1+(i&1));
2547 int rth,rt;
2548 int ds;
b9b61529 2549 if(itype[i]==C1LS||itype[i]==C2LS||itype[i]==LOADLR) {
57871462 2550 rth=get_reg(i_regmap,FTEMP|64);
2551 rt=get_reg(i_regmap,FTEMP);
2552 }else{
2553 rth=get_reg(i_regmap,rt1[i]|64);
2554 rt=get_reg(i_regmap,rt1[i]);
2555 }
2556 assert(rs>=0);
2557 assert(rt>=0);
2558 if(addr<0) addr=rt;
2559 assert(addr>=0);
2560 int ftable=0;
2561 if(type==LOADB_STUB||type==LOADBU_STUB)
2562 ftable=(int)readmemb;
2563 if(type==LOADH_STUB||type==LOADHU_STUB)
2564 ftable=(int)readmemh;
2565 if(type==LOADW_STUB)
2566 ftable=(int)readmem;
24385cae 2567#ifndef FORCE32
57871462 2568 if(type==LOADD_STUB)
2569 ftable=(int)readmemd;
24385cae 2570#endif
2571 assert(ftable!=0);
57871462 2572 emit_writeword(rs,(int)&address);
2573 //emit_pusha();
2574 save_regs(reglist);
2575 ds=i_regs!=&regs[i];
2576 int real_rs=(itype[i]==LOADLR)?-1:get_reg(i_regmap,rs1[i]);
2577 u_int cmask=ds?-1:(0x100f|~i_regs->wasconst);
2578 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&0x100f,i);
2579 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty&cmask&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs)));
2580 if(!ds) wb_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&~0x100f,i);
2581 emit_shrimm(rs,16,1);
2582 int cc=get_reg(i_regmap,CCREG);
2583 if(cc<0) {
2584 emit_loadreg(CCREG,2);
2585 }
2586 emit_movimm(ftable,0);
2587 emit_addimm(cc<0?2:cc,2*stubs[n][6]+2,2);
2588 emit_movimm(start+stubs[n][3]*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
2589 //emit_readword((int)&last_count,temp);
2590 //emit_add(cc,temp,cc);
2591 //emit_writeword(cc,(int)&Count);
2592 //emit_mov(15,14);
2593 emit_call((int)&indirect_jump_indexed);
2594 //emit_callreg(rs);
2595 //emit_readword_dualindexedx4(rs,HOST_TEMPREG,15);
2596 // We really shouldn't need to update the count here,
2597 // but not doing so causes random crashes...
2598 emit_readword((int)&Count,HOST_TEMPREG);
2599 emit_readword((int)&next_interupt,2);
2600 emit_addimm(HOST_TEMPREG,-2*stubs[n][6]-2,HOST_TEMPREG);
2601 emit_writeword(2,(int)&last_count);
2602 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2603 if(cc<0) {
2604 emit_storereg(CCREG,HOST_TEMPREG);
2605 }
2606 //emit_popa();
2607 restore_regs(reglist);
2608 //if((cc=get_reg(regmap,CCREG))>=0) {
2609 // emit_loadreg(CCREG,cc);
2610 //}
2611 if(type==LOADB_STUB)
2612 emit_movsbl((int)&readmem_dword,rt);
2613 if(type==LOADBU_STUB)
2614 emit_movzbl((int)&readmem_dword,rt);
2615 if(type==LOADH_STUB)
2616 emit_movswl((int)&readmem_dword,rt);
2617 if(type==LOADHU_STUB)
2618 emit_movzwl((int)&readmem_dword,rt);
2619 if(type==LOADW_STUB)
2620 emit_readword((int)&readmem_dword,rt);
2621 if(type==LOADD_STUB) {
2622 emit_readword((int)&readmem_dword,rt);
2623 if(rth>=0) emit_readword(((int)&readmem_dword)+4,rth);
2624 }
2625 emit_jmp(stubs[n][2]); // return address
2626}
2627
2628inline_readstub(int type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
2629{
2630 int rs=get_reg(regmap,target);
2631 int rth=get_reg(regmap,target|64);
2632 int rt=get_reg(regmap,target);
2633 assert(rs>=0);
2634 assert(rt>=0);
2635 int ftable=0;
2636 if(type==LOADB_STUB||type==LOADBU_STUB)
2637 ftable=(int)readmemb;
2638 if(type==LOADH_STUB||type==LOADHU_STUB)
2639 ftable=(int)readmemh;
2640 if(type==LOADW_STUB)
2641 ftable=(int)readmem;
24385cae 2642#ifndef FORCE32
57871462 2643 if(type==LOADD_STUB)
2644 ftable=(int)readmemd;
24385cae 2645#endif
2646 assert(ftable!=0);
57871462 2647 emit_writeword(rs,(int)&address);
2648 //emit_pusha();
2649 save_regs(reglist);
2650 //emit_shrimm(rs,16,1);
2651 int cc=get_reg(regmap,CCREG);
2652 if(cc<0) {
2653 emit_loadreg(CCREG,2);
2654 }
2655 //emit_movimm(ftable,0);
2656 emit_movimm(((u_int *)ftable)[addr>>16],0);
2657 //emit_readword((int)&last_count,12);
2658 emit_addimm(cc<0?2:cc,CLOCK_DIVIDER*(adj+1),2);
2659 if((signed int)addr>=(signed int)0xC0000000) {
2660 // Pagefault address
2661 int ds=regmap!=regs[i].regmap;
2662 emit_movimm(start+i*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
2663 }
2664 //emit_add(12,2,2);
2665 //emit_writeword(2,(int)&Count);
2666 //emit_call(((u_int *)ftable)[addr>>16]);
2667 emit_call((int)&indirect_jump);
2668 // We really shouldn't need to update the count here,
2669 // but not doing so causes random crashes...
2670 emit_readword((int)&Count,HOST_TEMPREG);
2671 emit_readword((int)&next_interupt,2);
2672 emit_addimm(HOST_TEMPREG,-CLOCK_DIVIDER*(adj+1),HOST_TEMPREG);
2673 emit_writeword(2,(int)&last_count);
2674 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2675 if(cc<0) {
2676 emit_storereg(CCREG,HOST_TEMPREG);
2677 }
2678 //emit_popa();
2679 restore_regs(reglist);
2680 if(type==LOADB_STUB)
2681 emit_movsbl((int)&readmem_dword,rt);
2682 if(type==LOADBU_STUB)
2683 emit_movzbl((int)&readmem_dword,rt);
2684 if(type==LOADH_STUB)
2685 emit_movswl((int)&readmem_dword,rt);
2686 if(type==LOADHU_STUB)
2687 emit_movzwl((int)&readmem_dword,rt);
2688 if(type==LOADW_STUB)
2689 emit_readword((int)&readmem_dword,rt);
2690 if(type==LOADD_STUB) {
2691 emit_readword((int)&readmem_dword,rt);
2692 if(rth>=0) emit_readword(((int)&readmem_dword)+4,rth);
2693 }
2694}
2695
2696do_writestub(int n)
2697{
2698 assem_debug("do_writestub %x\n",start+stubs[n][3]*4);
2699 literal_pool(256);
2700 set_jump_target(stubs[n][1],(int)out);
2701 int type=stubs[n][0];
2702 int i=stubs[n][3];
2703 int rs=stubs[n][4];
2704 struct regstat *i_regs=(struct regstat *)stubs[n][5];
2705 u_int reglist=stubs[n][7];
2706 signed char *i_regmap=i_regs->regmap;
2707 int addr=get_reg(i_regmap,AGEN1+(i&1));
2708 int rth,rt,r;
2709 int ds;
b9b61529 2710 if(itype[i]==C1LS||itype[i]==C2LS) {
57871462 2711 rth=get_reg(i_regmap,FTEMP|64);
2712 rt=get_reg(i_regmap,r=FTEMP);
2713 }else{
2714 rth=get_reg(i_regmap,rs2[i]|64);
2715 rt=get_reg(i_regmap,r=rs2[i]);
2716 }
2717 assert(rs>=0);
2718 assert(rt>=0);
2719 if(addr<0) addr=get_reg(i_regmap,-1);
2720 assert(addr>=0);
2721 int ftable=0;
2722 if(type==STOREB_STUB)
2723 ftable=(int)writememb;
2724 if(type==STOREH_STUB)
2725 ftable=(int)writememh;
2726 if(type==STOREW_STUB)
2727 ftable=(int)writemem;
24385cae 2728#ifndef FORCE32
57871462 2729 if(type==STORED_STUB)
2730 ftable=(int)writememd;
24385cae 2731#endif
2732 assert(ftable!=0);
57871462 2733 emit_writeword(rs,(int)&address);
2734 //emit_shrimm(rs,16,rs);
2735 //emit_movmem_indexedx4(ftable,rs,rs);
2736 if(type==STOREB_STUB)
2737 emit_writebyte(rt,(int)&byte);
2738 if(type==STOREH_STUB)
2739 emit_writehword(rt,(int)&hword);
2740 if(type==STOREW_STUB)
2741 emit_writeword(rt,(int)&word);
2742 if(type==STORED_STUB) {
3d624f89 2743#ifndef FORCE32
57871462 2744 emit_writeword(rt,(int)&dword);
2745 emit_writeword(r?rth:rt,(int)&dword+4);
3d624f89 2746#else
2747 printf("STORED_STUB\n");
2748#endif
57871462 2749 }
2750 //emit_pusha();
2751 save_regs(reglist);
2752 ds=i_regs!=&regs[i];
2753 int real_rs=get_reg(i_regmap,rs1[i]);
2754 u_int cmask=ds?-1:(0x100f|~i_regs->wasconst);
2755 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&0x100f,i);
2756 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty&cmask&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs)));
2757 if(!ds) wb_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&~0x100f,i);
2758 emit_shrimm(rs,16,1);
2759 int cc=get_reg(i_regmap,CCREG);
2760 if(cc<0) {
2761 emit_loadreg(CCREG,2);
2762 }
2763 emit_movimm(ftable,0);
2764 emit_addimm(cc<0?2:cc,2*stubs[n][6]+2,2);
2765 emit_movimm(start+stubs[n][3]*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
2766 //emit_readword((int)&last_count,temp);
2767 //emit_addimm(cc,2*stubs[n][5]+2,cc);
2768 //emit_add(cc,temp,cc);
2769 //emit_writeword(cc,(int)&Count);
2770 emit_call((int)&indirect_jump_indexed);
2771 //emit_callreg(rs);
2772 emit_readword((int)&Count,HOST_TEMPREG);
2773 emit_readword((int)&next_interupt,2);
2774 emit_addimm(HOST_TEMPREG,-2*stubs[n][6]-2,HOST_TEMPREG);
2775 emit_writeword(2,(int)&last_count);
2776 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2777 if(cc<0) {
2778 emit_storereg(CCREG,HOST_TEMPREG);
2779 }
2780 //emit_popa();
2781 restore_regs(reglist);
2782 //if((cc=get_reg(regmap,CCREG))>=0) {
2783 // emit_loadreg(CCREG,cc);
2784 //}
2785 emit_jmp(stubs[n][2]); // return address
2786}
2787
2788inline_writestub(int type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
2789{
2790 int rs=get_reg(regmap,-1);
2791 int rth=get_reg(regmap,target|64);
2792 int rt=get_reg(regmap,target);
2793 assert(rs>=0);
2794 assert(rt>=0);
2795 int ftable=0;
2796 if(type==STOREB_STUB)
2797 ftable=(int)writememb;
2798 if(type==STOREH_STUB)
2799 ftable=(int)writememh;
2800 if(type==STOREW_STUB)
2801 ftable=(int)writemem;
24385cae 2802#ifndef FORCE32
57871462 2803 if(type==STORED_STUB)
2804 ftable=(int)writememd;
24385cae 2805#endif
2806 assert(ftable!=0);
57871462 2807 emit_writeword(rs,(int)&address);
2808 //emit_shrimm(rs,16,rs);
2809 //emit_movmem_indexedx4(ftable,rs,rs);
2810 if(type==STOREB_STUB)
2811 emit_writebyte(rt,(int)&byte);
2812 if(type==STOREH_STUB)
2813 emit_writehword(rt,(int)&hword);
2814 if(type==STOREW_STUB)
2815 emit_writeword(rt,(int)&word);
2816 if(type==STORED_STUB) {
3d624f89 2817#ifndef FORCE32
57871462 2818 emit_writeword(rt,(int)&dword);
2819 emit_writeword(target?rth:rt,(int)&dword+4);
3d624f89 2820#else
2821 printf("STORED_STUB\n");
2822#endif
57871462 2823 }
2824 //emit_pusha();
2825 save_regs(reglist);
2826 //emit_shrimm(rs,16,1);
2827 int cc=get_reg(regmap,CCREG);
2828 if(cc<0) {
2829 emit_loadreg(CCREG,2);
2830 }
2831 //emit_movimm(ftable,0);
2832 emit_movimm(((u_int *)ftable)[addr>>16],0);
2833 //emit_readword((int)&last_count,12);
2834 emit_addimm(cc<0?2:cc,CLOCK_DIVIDER*(adj+1),2);
2835 if((signed int)addr>=(signed int)0xC0000000) {
2836 // Pagefault address
2837 int ds=regmap!=regs[i].regmap;
2838 emit_movimm(start+i*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
2839 }
2840 //emit_add(12,2,2);
2841 //emit_writeword(2,(int)&Count);
2842 //emit_call(((u_int *)ftable)[addr>>16]);
2843 emit_call((int)&indirect_jump);
2844 emit_readword((int)&Count,HOST_TEMPREG);
2845 emit_readword((int)&next_interupt,2);
2846 emit_addimm(HOST_TEMPREG,-CLOCK_DIVIDER*(adj+1),HOST_TEMPREG);
2847 emit_writeword(2,(int)&last_count);
2848 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2849 if(cc<0) {
2850 emit_storereg(CCREG,HOST_TEMPREG);
2851 }
2852 //emit_popa();
2853 restore_regs(reglist);
2854}
2855
2856do_unalignedwritestub(int n)
2857{
2858 set_jump_target(stubs[n][1],(int)out);
2859 output_w32(0xef000000);
2860 emit_jmp(stubs[n][2]); // return address
2861}
2862
2863void printregs(int edi,int esi,int ebp,int esp,int b,int d,int c,int a)
2864{
2865 printf("regs: %x %x %x %x %x %x %x (%x)\n",a,b,c,d,ebp,esi,edi,(&edi)[-1]);
2866}
2867
2868do_invstub(int n)
2869{
2870 literal_pool(20);
2871 u_int reglist=stubs[n][3];
2872 set_jump_target(stubs[n][1],(int)out);
2873 save_regs(reglist);
2874 if(stubs[n][4]!=0) emit_mov(stubs[n][4],0);
2875 emit_call((int)&invalidate_addr);
2876 restore_regs(reglist);
2877 emit_jmp(stubs[n][2]); // return address
2878}
2879
2880int do_dirty_stub(int i)
2881{
2882 assem_debug("do_dirty_stub %x\n",start+i*4);
2883 // Careful about the code output here, verify_dirty needs to parse it.
2884 #ifdef ARMv5_ONLY
2885 emit_loadlp((int)start<(int)0xC0000000?(int)source:(int)start,1);
2886 emit_loadlp((int)copy,2);
2887 emit_loadlp(slen*4,3);
2888 #else
2889 emit_movw(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0x0000FFFF,1);
2890 emit_movw(((u_int)copy)&0x0000FFFF,2);
2891 emit_movt(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0xFFFF0000,1);
2892 emit_movt(((u_int)copy)&0xFFFF0000,2);
2893 emit_movw(slen*4,3);
2894 #endif
2895 emit_movimm(start+i*4,0);
2896 emit_call((int)start<(int)0xC0000000?(int)&verify_code:(int)&verify_code_vm);
2897 int entry=(int)out;
2898 load_regs_entry(i);
2899 if(entry==(int)out) entry=instr_addr[i];
2900 emit_jmp(instr_addr[i]);
2901 return entry;
2902}
2903
2904void do_dirty_stub_ds()
2905{
2906 // Careful about the code output here, verify_dirty needs to parse it.
2907 #ifdef ARMv5_ONLY
2908 emit_loadlp((int)start<(int)0xC0000000?(int)source:(int)start,1);
2909 emit_loadlp((int)copy,2);
2910 emit_loadlp(slen*4,3);
2911 #else
2912 emit_movw(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0x0000FFFF,1);
2913 emit_movw(((u_int)copy)&0x0000FFFF,2);
2914 emit_movt(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0xFFFF0000,1);
2915 emit_movt(((u_int)copy)&0xFFFF0000,2);
2916 emit_movw(slen*4,3);
2917 #endif
2918 emit_movimm(start+1,0);
2919 emit_call((int)&verify_code_ds);
2920}
2921
2922do_cop1stub(int n)
2923{
2924 literal_pool(256);
2925 assem_debug("do_cop1stub %x\n",start+stubs[n][3]*4);
2926 set_jump_target(stubs[n][1],(int)out);
2927 int i=stubs[n][3];
3d624f89 2928// int rs=stubs[n][4];
57871462 2929 struct regstat *i_regs=(struct regstat *)stubs[n][5];
2930 int ds=stubs[n][6];
2931 if(!ds) {
2932 load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty,i);
2933 //if(i_regs!=&regs[i]) printf("oops: regs[i]=%x i_regs=%x",(int)&regs[i],(int)i_regs);
2934 }
2935 //else {printf("fp exception in delay slot\n");}
2936 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty);
2937 if(regs[i].regmap_entry[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
2938 emit_movimm(start+(i-ds)*4,EAX); // Get PC
2939 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG); // CHECK: is this right? There should probably be an extra cycle...
2940 emit_jmp(ds?(int)fp_exception_ds:(int)fp_exception);
2941}
2942
2943/* TLB */
2944
2945int do_tlb_r(int s,int ar,int map,int x,int a,int shift,int c,u_int addr)
2946{
2947 if(c) {
2948 if((signed int)addr>=(signed int)0xC0000000) {
2949 // address_generation already loaded the const
2950 emit_readword_dualindexedx4(FP,map,map);
2951 }
2952 else
2953 return -1; // No mapping
2954 }
2955 else {
2956 assert(s!=map);
2957 emit_movimm(((int)memory_map-(int)&dynarec_local)>>2,map);
2958 emit_addsr12(map,s,map);
2959 // Schedule this while we wait on the load
2960 //if(x) emit_xorimm(s,x,ar);
2961 if(shift>=0) emit_shlimm(s,3,shift);
2962 if(~a) emit_andimm(s,a,ar);
2963 emit_readword_dualindexedx4(FP,map,map);
2964 }
2965 return map;
2966}
2967int do_tlb_r_branch(int map, int c, u_int addr, int *jaddr)
2968{
2969 if(!c||(signed int)addr>=(signed int)0xC0000000) {
2970 emit_test(map,map);
2971 *jaddr=(int)out;
2972 emit_js(0);
2973 }
2974 return map;
2975}
2976
2977int gen_tlb_addr_r(int ar, int map) {
2978 if(map>=0) {
2979 assem_debug("add %s,%s,%s lsl #2\n",regname[ar],regname[ar],regname[map]);
2980 output_w32(0xe0800100|rd_rn_rm(ar,ar,map));
2981 }
2982}
2983
2984int do_tlb_w(int s,int ar,int map,int x,int c,u_int addr)
2985{
2986 if(c) {
2987 if(addr<0x80800000||addr>=0xC0000000) {
2988 // address_generation already loaded the const
2989 emit_readword_dualindexedx4(FP,map,map);
2990 }
2991 else
2992 return -1; // No mapping
2993 }
2994 else {
2995 assert(s!=map);
2996 emit_movimm(((int)memory_map-(int)&dynarec_local)>>2,map);
2997 emit_addsr12(map,s,map);
2998 // Schedule this while we wait on the load
2999 //if(x) emit_xorimm(s,x,ar);
3000 emit_readword_dualindexedx4(FP,map,map);
3001 }
3002 return map;
3003}
3004int do_tlb_w_branch(int map, int c, u_int addr, int *jaddr)
3005{
3006 if(!c||addr<0x80800000||addr>=0xC0000000) {
3007 emit_testimm(map,0x40000000);
3008 *jaddr=(int)out;
3009 emit_jne(0);
3010 }
3011}
3012
3013int gen_tlb_addr_w(int ar, int map) {
3014 if(map>=0) {
3015 assem_debug("add %s,%s,%s lsl #2\n",regname[ar],regname[ar],regname[map]);
3016 output_w32(0xe0800100|rd_rn_rm(ar,ar,map));
3017 }
3018}
3019
3020// Generate the address of the memory_map entry, relative to dynarec_local
3021generate_map_const(u_int addr,int reg) {
3022 //printf("generate_map_const(%x,%s)\n",addr,regname[reg]);
3023 emit_movimm((addr>>12)+(((u_int)memory_map-(u_int)&dynarec_local)>>2),reg);
3024}
3025
3026/* Special assem */
3027
3028void shift_assemble_arm(int i,struct regstat *i_regs)
3029{
3030 if(rt1[i]) {
3031 if(opcode2[i]<=0x07) // SLLV/SRLV/SRAV
3032 {
3033 signed char s,t,shift;
3034 t=get_reg(i_regs->regmap,rt1[i]);
3035 s=get_reg(i_regs->regmap,rs1[i]);
3036 shift=get_reg(i_regs->regmap,rs2[i]);
3037 if(t>=0){
3038 if(rs1[i]==0)
3039 {
3040 emit_zeroreg(t);
3041 }
3042 else if(rs2[i]==0)
3043 {
3044 assert(s>=0);
3045 if(s!=t) emit_mov(s,t);
3046 }
3047 else
3048 {
3049 emit_andimm(shift,31,HOST_TEMPREG);
3050 if(opcode2[i]==4) // SLLV
3051 {
3052 emit_shl(s,HOST_TEMPREG,t);
3053 }
3054 if(opcode2[i]==6) // SRLV
3055 {
3056 emit_shr(s,HOST_TEMPREG,t);
3057 }
3058 if(opcode2[i]==7) // SRAV
3059 {
3060 emit_sar(s,HOST_TEMPREG,t);
3061 }
3062 }
3063 }
3064 } else { // DSLLV/DSRLV/DSRAV
3065 signed char sh,sl,th,tl,shift;
3066 th=get_reg(i_regs->regmap,rt1[i]|64);
3067 tl=get_reg(i_regs->regmap,rt1[i]);
3068 sh=get_reg(i_regs->regmap,rs1[i]|64);
3069 sl=get_reg(i_regs->regmap,rs1[i]);
3070 shift=get_reg(i_regs->regmap,rs2[i]);
3071 if(tl>=0){
3072 if(rs1[i]==0)
3073 {
3074 emit_zeroreg(tl);
3075 if(th>=0) emit_zeroreg(th);
3076 }
3077 else if(rs2[i]==0)
3078 {
3079 assert(sl>=0);
3080 if(sl!=tl) emit_mov(sl,tl);
3081 if(th>=0&&sh!=th) emit_mov(sh,th);
3082 }
3083 else
3084 {
3085 // FIXME: What if shift==tl ?
3086 assert(shift!=tl);
3087 int temp=get_reg(i_regs->regmap,-1);
3088 int real_th=th;
3089 if(th<0&&opcode2[i]!=0x14) {th=temp;} // DSLLV doesn't need a temporary register
3090 assert(sl>=0);
3091 assert(sh>=0);
3092 emit_andimm(shift,31,HOST_TEMPREG);
3093 if(opcode2[i]==0x14) // DSLLV
3094 {
3095 if(th>=0) emit_shl(sh,HOST_TEMPREG,th);
3096 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3097 emit_orrshr(sl,HOST_TEMPREG,th);
3098 emit_andimm(shift,31,HOST_TEMPREG);
3099 emit_testimm(shift,32);
3100 emit_shl(sl,HOST_TEMPREG,tl);
3101 if(th>=0) emit_cmovne_reg(tl,th);
3102 emit_cmovne_imm(0,tl);
3103 }
3104 if(opcode2[i]==0x16) // DSRLV
3105 {
3106 assert(th>=0);
3107 emit_shr(sl,HOST_TEMPREG,tl);
3108 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3109 emit_orrshl(sh,HOST_TEMPREG,tl);
3110 emit_andimm(shift,31,HOST_TEMPREG);
3111 emit_testimm(shift,32);
3112 emit_shr(sh,HOST_TEMPREG,th);
3113 emit_cmovne_reg(th,tl);
3114 if(real_th>=0) emit_cmovne_imm(0,th);
3115 }
3116 if(opcode2[i]==0x17) // DSRAV
3117 {
3118 assert(th>=0);
3119 emit_shr(sl,HOST_TEMPREG,tl);
3120 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3121 if(real_th>=0) {
3122 assert(temp>=0);
3123 emit_sarimm(th,31,temp);
3124 }
3125 emit_orrshl(sh,HOST_TEMPREG,tl);
3126 emit_andimm(shift,31,HOST_TEMPREG);
3127 emit_testimm(shift,32);
3128 emit_sar(sh,HOST_TEMPREG,th);
3129 emit_cmovne_reg(th,tl);
3130 if(real_th>=0) emit_cmovne_reg(temp,th);
3131 }
3132 }
3133 }
3134 }
3135 }
3136}
3137#define shift_assemble shift_assemble_arm
3138
3139void loadlr_assemble_arm(int i,struct regstat *i_regs)
3140{
3141 int s,th,tl,temp,temp2,addr,map=-1;
3142 int offset;
3143 int jaddr=0;
3144 int memtarget,c=0;
3145 u_int hr,reglist=0;
3146 th=get_reg(i_regs->regmap,rt1[i]|64);
3147 tl=get_reg(i_regs->regmap,rt1[i]);
3148 s=get_reg(i_regs->regmap,rs1[i]);
3149 temp=get_reg(i_regs->regmap,-1);
3150 temp2=get_reg(i_regs->regmap,FTEMP);
3151 addr=get_reg(i_regs->regmap,AGEN1+(i&1));
3152 assert(addr<0);
3153 offset=imm[i];
3154 for(hr=0;hr<HOST_REGS;hr++) {
3155 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
3156 }
3157 reglist|=1<<temp;
3158 if(offset||s<0||c) addr=temp2;
3159 else addr=s;
3160 if(s>=0) {
3161 c=(i_regs->wasconst>>s)&1;
3162 memtarget=((signed int)(constmap[i][s]+offset))<(signed int)0x80800000;
3163 if(using_tlb&&((signed int)(constmap[i][s]+offset))>=(signed int)0xC0000000) memtarget=1;
3164 }
3165 if(tl>=0) {
3166 //assert(tl>=0);
3167 //assert(rt1[i]);
3168 if(!using_tlb) {
3169 if(!c) {
3170 emit_shlimm(addr,3,temp);
3171 if (opcode[i]==0x22||opcode[i]==0x26) {
3172 emit_andimm(addr,0xFFFFFFFC,temp2); // LWL/LWR
3173 }else{
3174 emit_andimm(addr,0xFFFFFFF8,temp2); // LDL/LDR
3175 }
3176 emit_cmpimm(addr,0x800000);
3177 jaddr=(int)out;
3178 emit_jno(0);
3179 }
3180 else {
3181 if (opcode[i]==0x22||opcode[i]==0x26) {
3182 emit_movimm(((constmap[i][s]+offset)<<3)&24,temp); // LWL/LWR
3183 }else{
3184 emit_movimm(((constmap[i][s]+offset)<<3)&56,temp); // LDL/LDR
3185 }
3186 }
3187 }else{ // using tlb
3188 int a;
3189 if(c) {
3190 a=-1;
3191 }else if (opcode[i]==0x22||opcode[i]==0x26) {
3192 a=0xFFFFFFFC; // LWL/LWR
3193 }else{
3194 a=0xFFFFFFF8; // LDL/LDR
3195 }
3196 map=get_reg(i_regs->regmap,TLREG);
3197 assert(map>=0);
3198 map=do_tlb_r(addr,temp2,map,0,a,c?-1:temp,c,constmap[i][s]+offset);
3199 if(c) {
3200 if (opcode[i]==0x22||opcode[i]==0x26) {
3201 emit_movimm(((constmap[i][s]+offset)<<3)&24,temp); // LWL/LWR
3202 }else{
3203 emit_movimm(((constmap[i][s]+offset)<<3)&56,temp); // LDL/LDR
3204 }
3205 }
3206 do_tlb_r_branch(map,c,constmap[i][s]+offset,&jaddr);
3207 }
3208 if (opcode[i]==0x22||opcode[i]==0x26) { // LWL/LWR
3209 if(!c||memtarget) {
3210 //emit_readword_indexed((int)rdram-0x80000000,temp2,temp2);
3211 emit_readword_indexed_tlb((int)rdram-0x80000000,temp2,map,temp2);
3212 if(jaddr) add_stub(LOADW_STUB,jaddr,(int)out,i,temp2,(int)i_regs,ccadj[i],reglist);
3213 }
3214 else
3215 inline_readstub(LOADW_STUB,i,(constmap[i][s]+offset)&0xFFFFFFFC,i_regs->regmap,FTEMP,ccadj[i],reglist);
3216 emit_andimm(temp,24,temp);
2002a1db 3217#ifdef BIG_ENDIAN_MIPS
3218 if (opcode[i]==0x26) // LWR
3219#else
3220 if (opcode[i]==0x22) // LWL
3221#endif
3222 emit_xorimm(temp,24,temp);
57871462 3223 emit_movimm(-1,HOST_TEMPREG);
3224 if (opcode[i]==0x26) {
3225 emit_shr(temp2,temp,temp2);
3226 emit_bic_lsr(tl,HOST_TEMPREG,temp,tl);
3227 }else{
3228 emit_shl(temp2,temp,temp2);
3229 emit_bic_lsl(tl,HOST_TEMPREG,temp,tl);
3230 }
3231 emit_or(temp2,tl,tl);
3232 //emit_storereg(rt1[i],tl); // DEBUG
3233 }
3234 if (opcode[i]==0x1A||opcode[i]==0x1B) { // LDL/LDR
2002a1db 3235 // FIXME: little endian
57871462 3236 int temp2h=get_reg(i_regs->regmap,FTEMP|64);
3237 if(!c||memtarget) {
3238 //if(th>=0) emit_readword_indexed((int)rdram-0x80000000,temp2,temp2h);
3239 //emit_readword_indexed((int)rdram-0x7FFFFFFC,temp2,temp2);
3240 emit_readdword_indexed_tlb((int)rdram-0x80000000,temp2,map,temp2h,temp2);
3241 if(jaddr) add_stub(LOADD_STUB,jaddr,(int)out,i,temp2,(int)i_regs,ccadj[i],reglist);
3242 }
3243 else
3244 inline_readstub(LOADD_STUB,i,(constmap[i][s]+offset)&0xFFFFFFF8,i_regs->regmap,FTEMP,ccadj[i],reglist);
3245 emit_testimm(temp,32);
3246 emit_andimm(temp,24,temp);
3247 if (opcode[i]==0x1A) { // LDL
3248 emit_rsbimm(temp,32,HOST_TEMPREG);
3249 emit_shl(temp2h,temp,temp2h);
3250 emit_orrshr(temp2,HOST_TEMPREG,temp2h);
3251 emit_movimm(-1,HOST_TEMPREG);
3252 emit_shl(temp2,temp,temp2);
3253 emit_cmove_reg(temp2h,th);
3254 emit_biceq_lsl(tl,HOST_TEMPREG,temp,tl);
3255 emit_bicne_lsl(th,HOST_TEMPREG,temp,th);
3256 emit_orreq(temp2,tl,tl);
3257 emit_orrne(temp2,th,th);
3258 }
3259 if (opcode[i]==0x1B) { // LDR
3260 emit_xorimm(temp,24,temp);
3261 emit_rsbimm(temp,32,HOST_TEMPREG);
3262 emit_shr(temp2,temp,temp2);
3263 emit_orrshl(temp2h,HOST_TEMPREG,temp2);
3264 emit_movimm(-1,HOST_TEMPREG);
3265 emit_shr(temp2h,temp,temp2h);
3266 emit_cmovne_reg(temp2,tl);
3267 emit_bicne_lsr(th,HOST_TEMPREG,temp,th);
3268 emit_biceq_lsr(tl,HOST_TEMPREG,temp,tl);
3269 emit_orrne(temp2h,th,th);
3270 emit_orreq(temp2h,tl,tl);
3271 }
3272 }
3273 }
3274}
3275#define loadlr_assemble loadlr_assemble_arm
3276
3277void cop0_assemble(int i,struct regstat *i_regs)
3278{
3279 if(opcode2[i]==0) // MFC0
3280 {
3281 signed char t=get_reg(i_regs->regmap,rt1[i]);
3282 char copr=(source[i]>>11)&0x1f;
3283 //assert(t>=0); // Why does this happen? OOT is weird
3284 if(t>=0) {
7139f3c8 3285#ifdef MUPEN64
57871462 3286 emit_addimm(FP,(int)&fake_pc-(int)&dynarec_local,0);
3287 emit_movimm((source[i]>>11)&0x1f,1);
3288 emit_writeword(0,(int)&PC);
3289 emit_writebyte(1,(int)&(fake_pc.f.r.nrd));
3290 if(copr==9) {
3291 emit_readword((int)&last_count,ECX);
3292 emit_loadreg(CCREG,HOST_CCREG); // TODO: do proper reg alloc
3293 emit_add(HOST_CCREG,ECX,HOST_CCREG);
3294 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG);
3295 emit_writeword(HOST_CCREG,(int)&Count);
3296 }
3297 emit_call((int)MFC0);
3298 emit_readword((int)&readmem_dword,t);
7139f3c8 3299#else
3300 emit_readword((int)&reg_cop0+copr*4,t);
3301#endif
57871462 3302 }
3303 }
3304 else if(opcode2[i]==4) // MTC0
3305 {
3306 signed char s=get_reg(i_regs->regmap,rs1[i]);
3307 char copr=(source[i]>>11)&0x1f;
3308 assert(s>=0);
3309 emit_writeword(s,(int)&readmem_dword);
3310 wb_register(rs1[i],i_regs->regmap,i_regs->dirty,i_regs->is32);
3d624f89 3311#ifdef MUPEN64 /// FIXME
57871462 3312 emit_addimm(FP,(int)&fake_pc-(int)&dynarec_local,0);
3313 emit_movimm((source[i]>>11)&0x1f,1);
3314 emit_writeword(0,(int)&PC);
3315 emit_writebyte(1,(int)&(fake_pc.f.r.nrd));
3d624f89 3316#endif
7139f3c8 3317#ifdef PCSX
3318 emit_movimm(source[i],0);
3319 emit_writeword(0,(int)&psxRegs.code);
3320#endif
3321 if(copr==9||copr==11||copr==12||copr==13) {
57871462 3322 emit_readword((int)&last_count,ECX);
3323 emit_loadreg(CCREG,HOST_CCREG); // TODO: do proper reg alloc
3324 emit_add(HOST_CCREG,ECX,HOST_CCREG);
3325 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG);
3326 emit_writeword(HOST_CCREG,(int)&Count);
3327 }
3328 // What a mess. The status register (12) can enable interrupts,
3329 // so needs a special case to handle a pending interrupt.
3330 // The interrupt must be taken immediately, because a subsequent
3331 // instruction might disable interrupts again.
7139f3c8 3332 if(copr==12||copr==13) {
57871462 3333 emit_movimm(start+i*4+4,0);
3334 emit_movimm(0,1);
3335 emit_writeword(0,(int)&pcaddr);
3336 emit_writeword(1,(int)&pending_exception);
3337 }
3338 //else if(copr==12&&is_delayslot) emit_call((int)MTC0_R12);
3339 //else
3340 emit_call((int)MTC0);
7139f3c8 3341 if(copr==9||copr==11||copr==12||copr==13) {
57871462 3342 emit_readword((int)&Count,HOST_CCREG);
3343 emit_readword((int)&next_interupt,ECX);
3344 emit_addimm(HOST_CCREG,-CLOCK_DIVIDER*ccadj[i],HOST_CCREG);
3345 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
3346 emit_writeword(ECX,(int)&last_count);
3347 emit_storereg(CCREG,HOST_CCREG);
3348 }
7139f3c8 3349 if(copr==12||copr==13) {
57871462 3350 assert(!is_delayslot);
3351 emit_readword((int)&pending_exception,14);
3352 }
3353 emit_loadreg(rs1[i],s);
3354 if(get_reg(i_regs->regmap,rs1[i]|64)>=0)
3355 emit_loadreg(rs1[i]|64,get_reg(i_regs->regmap,rs1[i]|64));
7139f3c8 3356 if(copr==12||copr==13) {
57871462 3357 emit_test(14,14);
3358 emit_jne((int)&do_interrupt);
3359 }
3360 cop1_usable=0;
3361 }
3362 else
3363 {
3364 assert(opcode2[i]==0x10);
3d624f89 3365#ifndef DISABLE_TLB
57871462 3366 if((source[i]&0x3f)==0x01) // TLBR
3367 emit_call((int)TLBR);
3368 if((source[i]&0x3f)==0x02) // TLBWI
3369 emit_call((int)TLBWI_new);
3370 if((source[i]&0x3f)==0x06) { // TLBWR
3371 // The TLB entry written by TLBWR is dependent on the count,
3372 // so update the cycle count
3373 emit_readword((int)&last_count,ECX);
3374 if(i_regs->regmap[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
3375 emit_add(HOST_CCREG,ECX,HOST_CCREG);
3376 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG);
3377 emit_writeword(HOST_CCREG,(int)&Count);
3378 emit_call((int)TLBWR_new);
3379 }
3380 if((source[i]&0x3f)==0x08) // TLBP
3381 emit_call((int)TLBP);
3d624f89 3382#endif
57871462 3383 if((source[i]&0x3f)==0x18) // ERET
3384 {
3385 int count=ccadj[i];
3386 if(i_regs->regmap[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
3387 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*count,HOST_CCREG); // TODO: Should there be an extra cycle here?
3388 emit_jmp((int)jump_eret);
3389 }
3390 }
3391}
3392
b9b61529 3393static void cop2_get_dreg(u_int copr,signed char tl,signed char temp)
3394{
3395 switch (copr) {
3396 case 1:
3397 case 3:
3398 case 5:
3399 case 8:
3400 case 9:
3401 case 10:
3402 case 11:
3403 emit_readword((int)&reg_cop2d[copr],tl);
3404 emit_signextend16(tl,tl);
3405 emit_writeword(tl,(int)&reg_cop2d[copr]); // hmh
3406 break;
3407 case 7:
3408 case 16:
3409 case 17:
3410 case 18:
3411 case 19:
3412 emit_readword((int)&reg_cop2d[copr],tl);
3413 emit_andimm(tl,0xffff,tl);
3414 emit_writeword(tl,(int)&reg_cop2d[copr]);
3415 break;
3416 case 15:
3417 emit_readword((int)&reg_cop2d[14],tl); // SXY2
3418 emit_writeword(tl,(int)&reg_cop2d[copr]);
3419 break;
3420 case 28:
3421 case 30:
3422 emit_movimm(0,tl);
3423 break;
3424 case 29:
3425 emit_readword((int)&reg_cop2d[9],temp);
3426 emit_testimm(temp,0x8000); // do we need this?
3427 emit_andimm(temp,0xf80,temp);
3428 emit_andne_imm(temp,0,temp);
3429 emit_shr(temp,7,tl);
3430 emit_readword((int)&reg_cop2d[10],temp);
3431 emit_testimm(temp,0x8000);
3432 emit_andimm(temp,0xf80,temp);
3433 emit_andne_imm(temp,0,temp);
3434 emit_orrshr(temp,2,tl);
3435 emit_readword((int)&reg_cop2d[11],temp);
3436 emit_testimm(temp,0x8000);
3437 emit_andimm(temp,0xf80,temp);
3438 emit_andne_imm(temp,0,temp);
3439 emit_orrshl(temp,3,tl);
3440 emit_writeword(tl,(int)&reg_cop2d[copr]);
3441 break;
3442 default:
3443 emit_readword((int)&reg_cop2d[copr],tl);
3444 break;
3445 }
3446}
3447
3448static void cop2_put_dreg(u_int copr,signed char sl,signed char temp)
3449{
3450 switch (copr) {
3451 case 15:
3452 emit_readword((int)&reg_cop2d[13],temp); // SXY1
3453 emit_writeword(sl,(int)&reg_cop2d[copr]);
3454 emit_writeword(temp,(int)&reg_cop2d[12]); // SXY0
3455 emit_readword((int)&reg_cop2d[14],temp); // SXY2
3456 emit_writeword(sl,(int)&reg_cop2d[14]);
3457 emit_writeword(temp,(int)&reg_cop2d[13]); // SXY1
3458 break;
3459 case 28:
3460 emit_andimm(sl,0x001f,temp);
3461 emit_shl(temp,7,temp);
3462 emit_writeword(temp,(int)&reg_cop2d[9]);
3463 emit_andimm(sl,0x03e0,temp);
3464 emit_shl(temp,2,temp);
3465 emit_writeword(temp,(int)&reg_cop2d[10]);
3466 emit_andimm(sl,0x7c00,temp);
3467 emit_shr(temp,3,temp);
3468 emit_writeword(temp,(int)&reg_cop2d[11]);
3469 emit_writeword(sl,(int)&reg_cop2d[28]);
3470 break;
3471 case 30:
3472 emit_movs(sl,temp);
3473 emit_mvnmi(temp,temp);
3474 emit_clz(temp,temp);
3475 emit_writeword(sl,(int)&reg_cop2d[30]);
3476 emit_writeword(temp,(int)&reg_cop2d[31]);
3477 break;
3478 case 7:
3479 case 29:
3480 case 31:
3481 break;
3482 default:
3483 emit_writeword(sl,(int)&reg_cop2d[copr]);
3484 break;
3485 }
3486}
3487
3488void cop2_assemble(int i,struct regstat *i_regs)
3489{
3490 u_int copr=(source[i]>>11)&0x1f;
3491 signed char temp=get_reg(i_regs->regmap,-1);
3492 if (opcode2[i]==0) { // MFC2
3493 signed char tl=get_reg(i_regs->regmap,rt1[i]);
3494 if(tl>=0)
3495 cop2_get_dreg(copr,tl,temp);
3496 }
3497 else if (opcode2[i]==4) { // MTC2
3498 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3499 cop2_put_dreg(copr,sl,temp);
3500 }
3501 else if (opcode2[i]==2) // CFC2
3502 {
3503 signed char tl=get_reg(i_regs->regmap,rt1[i]);
3504 if(tl>=0)
3505 emit_readword((int)&reg_cop2c[copr],tl);
3506 }
3507 else if (opcode2[i]==6) // CTC2
3508 {
3509 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3510 switch(copr) {
3511 case 4:
3512 case 12:
3513 case 20:
3514 case 26:
3515 case 27:
3516 case 29:
3517 case 30:
3518 emit_signextend16(sl,temp);
3519 break;
3520 case 31:
3521 //value = value & 0x7ffff000;
3522 //if (value & 0x7f87e000) value |= 0x80000000;
3523 emit_shrimm(sl,12,temp);
3524 emit_shlimm(temp,12,temp);
3525 emit_testimm(temp,0x7f000000);
3526 emit_testeqimm(temp,0x00870000);
3527 emit_testeqimm(temp,0x0000e000);
3528 emit_orrne_imm(temp,0x80000000,temp);
3529 break;
3530 default:
3531 temp=sl;
3532 break;
3533 }
3534 emit_writeword(temp,(int)&reg_cop2c[copr]);
3535 assert(sl>=0);
3536 }
3537}
3538
3539void c2op_assemble(int i,struct regstat *i_regs)
3540{
3541 signed char temp=get_reg(i_regs->regmap,-1);
3542 u_int c2op=source[i]&0x3f;
3543 u_int hr,reglist=0;
3544 for(hr=0;hr<HOST_REGS;hr++) {
3545 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
3546 }
3547 if(i==0||itype[i-1]!=C2OP)
3548 save_regs(reglist);
3549
3550 if (gte_handlers[c2op]!=NULL) {
3551 int cc=get_reg(i_regs->regmap,CCREG);
3552 emit_movimm(source[i],temp); // opcode
3553 if (cc>=0&&gte_cycletab[c2op])
3554 emit_addimm(cc,gte_cycletab[c2op]/2,cc); // XXX: cound just adjust ccadj?
3555 emit_writeword(temp,(int)&psxRegs.code);
3556 emit_call((int)gte_handlers[c2op]);
3557 }
3558
3559 if(i>=slen-1||itype[i+1]!=C2OP)
3560 restore_regs(reglist);
3561}
3562
3563void cop1_unusable(int i,struct regstat *i_regs)
3d624f89 3564{
3565 // XXX: should just just do the exception instead
3566 if(!cop1_usable) {
3567 int jaddr=(int)out;
3568 emit_jmp(0);
3569 add_stub(FP_STUB,jaddr,(int)out,i,0,(int)i_regs,is_delayslot,0);
3570 cop1_usable=1;
3571 }
3572}
3573
57871462 3574void cop1_assemble(int i,struct regstat *i_regs)
3575{
3d624f89 3576#ifndef DISABLE_COP1
57871462 3577 // Check cop1 unusable
3578 if(!cop1_usable) {
3579 signed char rs=get_reg(i_regs->regmap,CSREG);
3580 assert(rs>=0);
3581 emit_testimm(rs,0x20000000);
3582 int jaddr=(int)out;
3583 emit_jeq(0);
3584 add_stub(FP_STUB,jaddr,(int)out,i,rs,(int)i_regs,is_delayslot,0);
3585 cop1_usable=1;
3586 }
3587 if (opcode2[i]==0) { // MFC1
3588 signed char tl=get_reg(i_regs->regmap,rt1[i]);
3589 if(tl>=0) {
3590 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],tl);
3591 emit_readword_indexed(0,tl,tl);
3592 }
3593 }
3594 else if (opcode2[i]==1) { // DMFC1
3595 signed char tl=get_reg(i_regs->regmap,rt1[i]);
3596 signed char th=get_reg(i_regs->regmap,rt1[i]|64);
3597 if(tl>=0) {
3598 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],tl);
3599 if(th>=0) emit_readword_indexed(4,tl,th);
3600 emit_readword_indexed(0,tl,tl);
3601 }
3602 }
3603 else if (opcode2[i]==4) { // MTC1
3604 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3605 signed char temp=get_reg(i_regs->regmap,-1);
3606 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
3607 emit_writeword_indexed(sl,0,temp);
3608 }
3609 else if (opcode2[i]==5) { // DMTC1
3610 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3611 signed char sh=rs1[i]>0?get_reg(i_regs->regmap,rs1[i]|64):sl;
3612 signed char temp=get_reg(i_regs->regmap,-1);
3613 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
3614 emit_writeword_indexed(sh,4,temp);
3615 emit_writeword_indexed(sl,0,temp);
3616 }
3617 else if (opcode2[i]==2) // CFC1
3618 {
3619 signed char tl=get_reg(i_regs->regmap,rt1[i]);
3620 if(tl>=0) {
3621 u_int copr=(source[i]>>11)&0x1f;
3622 if(copr==0) emit_readword((int)&FCR0,tl);
3623 if(copr==31) emit_readword((int)&FCR31,tl);
3624 }
3625 }
3626 else if (opcode2[i]==6) // CTC1
3627 {
3628 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3629 u_int copr=(source[i]>>11)&0x1f;
3630 assert(sl>=0);
3631 if(copr==31)
3632 {
3633 emit_writeword(sl,(int)&FCR31);
3634 // Set the rounding mode
3635 //FIXME
3636 //char temp=get_reg(i_regs->regmap,-1);
3637 //emit_andimm(sl,3,temp);
3638 //emit_fldcw_indexed((int)&rounding_modes,temp);
3639 }
3640 }
3d624f89 3641#else
3642 cop1_unusable(i, i_regs);
3643#endif
57871462 3644}
3645
3646void fconv_assemble_arm(int i,struct regstat *i_regs)
3647{
3d624f89 3648#ifndef DISABLE_COP1
57871462 3649 signed char temp=get_reg(i_regs->regmap,-1);
3650 assert(temp>=0);
3651 // Check cop1 unusable
3652 if(!cop1_usable) {
3653 signed char rs=get_reg(i_regs->regmap,CSREG);
3654 assert(rs>=0);
3655 emit_testimm(rs,0x20000000);
3656 int jaddr=(int)out;
3657 emit_jeq(0);
3658 add_stub(FP_STUB,jaddr,(int)out,i,rs,(int)i_regs,is_delayslot,0);
3659 cop1_usable=1;
3660 }
3661
3662 #if(defined(__VFP_FP__) && !defined(__SOFTFP__))
3663 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0d) { // trunc_w_s
3664 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
3665 emit_flds(temp,15);
3666 emit_ftosizs(15,15); // float->int, truncate
3667 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f))
3668 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
3669 emit_fsts(15,temp);
3670 return;
3671 }
3672 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0d) { // trunc_w_d
3673 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
3674 emit_vldr(temp,7);
3675 emit_ftosizd(7,13); // double->int, truncate
3676 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
3677 emit_fsts(13,temp);
3678 return;
3679 }
3680
3681 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x20) { // cvt_s_w
3682 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
3683 emit_flds(temp,13);
3684 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f))
3685 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
3686 emit_fsitos(13,15);
3687 emit_fsts(15,temp);
3688 return;
3689 }
3690 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x21) { // cvt_d_w
3691 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
3692 emit_flds(temp,13);
3693 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
3694 emit_fsitod(13,7);
3695 emit_vstr(7,temp);
3696 return;
3697 }
3698
3699 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x21) { // cvt_d_s
3700 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
3701 emit_flds(temp,13);
3702 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
3703 emit_fcvtds(13,7);
3704 emit_vstr(7,temp);
3705 return;
3706 }
3707 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x20) { // cvt_s_d
3708 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
3709 emit_vldr(temp,7);
3710 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
3711 emit_fcvtsd(7,13);
3712 emit_fsts(13,temp);
3713 return;
3714 }
3715 #endif
3716
3717 // C emulation code
3718
3719 u_int hr,reglist=0;
3720 for(hr=0;hr<HOST_REGS;hr++) {
3721 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
3722 }
3723 save_regs(reglist);
3724
3725 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x20) {
3726 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3727 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3728 emit_call((int)cvt_s_w);
3729 }
3730 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x21) {
3731 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3732 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3733 emit_call((int)cvt_d_w);
3734 }
3735 if(opcode2[i]==0x15&&(source[i]&0x3f)==0x20) {
3736 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3737 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3738 emit_call((int)cvt_s_l);
3739 }
3740 if(opcode2[i]==0x15&&(source[i]&0x3f)==0x21) {
3741 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3742 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3743 emit_call((int)cvt_d_l);
3744 }
3745
3746 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x21) {
3747 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3748 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3749 emit_call((int)cvt_d_s);
3750 }
3751 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x24) {
3752 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3753 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3754 emit_call((int)cvt_w_s);
3755 }
3756 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x25) {
3757 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3758 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3759 emit_call((int)cvt_l_s);
3760 }
3761
3762 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x20) {
3763 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3764 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3765 emit_call((int)cvt_s_d);
3766 }
3767 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x24) {
3768 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3769 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3770 emit_call((int)cvt_w_d);
3771 }
3772 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x25) {
3773 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3774 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3775 emit_call((int)cvt_l_d);
3776 }
3777
3778 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x08) {
3779 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3780 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3781 emit_call((int)round_l_s);
3782 }
3783 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x09) {
3784 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3785 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3786 emit_call((int)trunc_l_s);
3787 }
3788 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0a) {
3789 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3790 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3791 emit_call((int)ceil_l_s);
3792 }
3793 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0b) {
3794 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3795 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3796 emit_call((int)floor_l_s);
3797 }
3798 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0c) {
3799 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3800 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3801 emit_call((int)round_w_s);
3802 }
3803 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0d) {
3804 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3805 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3806 emit_call((int)trunc_w_s);
3807 }
3808 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0e) {
3809 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3810 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3811 emit_call((int)ceil_w_s);
3812 }
3813 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0f) {
3814 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3815 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3816 emit_call((int)floor_w_s);
3817 }
3818
3819 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x08) {
3820 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3821 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3822 emit_call((int)round_l_d);
3823 }
3824 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x09) {
3825 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3826 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3827 emit_call((int)trunc_l_d);
3828 }
3829 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0a) {
3830 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3831 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3832 emit_call((int)ceil_l_d);
3833 }
3834 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0b) {
3835 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3836 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3837 emit_call((int)floor_l_d);
3838 }
3839 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0c) {
3840 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3841 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3842 emit_call((int)round_w_d);
3843 }
3844 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0d) {
3845 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3846 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3847 emit_call((int)trunc_w_d);
3848 }
3849 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0e) {
3850 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3851 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3852 emit_call((int)ceil_w_d);
3853 }
3854 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0f) {
3855 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3856 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3857 emit_call((int)floor_w_d);
3858 }
3859
3860 restore_regs(reglist);
3d624f89 3861#else
3862 cop1_unusable(i, i_regs);
3863#endif
57871462 3864}
3865#define fconv_assemble fconv_assemble_arm
3866
3867void fcomp_assemble(int i,struct regstat *i_regs)
3868{
3d624f89 3869#ifndef DISABLE_COP1
57871462 3870 signed char fs=get_reg(i_regs->regmap,FSREG);
3871 signed char temp=get_reg(i_regs->regmap,-1);
3872 assert(temp>=0);
3873 // Check cop1 unusable
3874 if(!cop1_usable) {
3875 signed char cs=get_reg(i_regs->regmap,CSREG);
3876 assert(cs>=0);
3877 emit_testimm(cs,0x20000000);
3878 int jaddr=(int)out;
3879 emit_jeq(0);
3880 add_stub(FP_STUB,jaddr,(int)out,i,cs,(int)i_regs,is_delayslot,0);
3881 cop1_usable=1;
3882 }
3883
3884 if((source[i]&0x3f)==0x30) {
3885 emit_andimm(fs,~0x800000,fs);
3886 return;
3887 }
3888
3889 if((source[i]&0x3e)==0x38) {
3890 // sf/ngle - these should throw exceptions for NaNs
3891 emit_andimm(fs,~0x800000,fs);
3892 return;
3893 }
3894
3895 #if(defined(__VFP_FP__) && !defined(__SOFTFP__))
3896 if(opcode2[i]==0x10) {
3897 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
3898 emit_readword((int)&reg_cop1_simple[(source[i]>>16)&0x1f],HOST_TEMPREG);
3899 emit_orimm(fs,0x800000,fs);
3900 emit_flds(temp,14);
3901 emit_flds(HOST_TEMPREG,15);
3902 emit_fcmps(14,15);
3903 emit_fmstat();
3904 if((source[i]&0x3f)==0x31) emit_bicvc_imm(fs,0x800000,fs); // c_un_s
3905 if((source[i]&0x3f)==0x32) emit_bicne_imm(fs,0x800000,fs); // c_eq_s
3906 if((source[i]&0x3f)==0x33) {emit_bicne_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ueq_s
3907 if((source[i]&0x3f)==0x34) emit_biccs_imm(fs,0x800000,fs); // c_olt_s
3908 if((source[i]&0x3f)==0x35) {emit_biccs_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ult_s
3909 if((source[i]&0x3f)==0x36) emit_bichi_imm(fs,0x800000,fs); // c_ole_s
3910 if((source[i]&0x3f)==0x37) {emit_bichi_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ule_s
3911 if((source[i]&0x3f)==0x3a) emit_bicne_imm(fs,0x800000,fs); // c_seq_s
3912 if((source[i]&0x3f)==0x3b) emit_bicne_imm(fs,0x800000,fs); // c_ngl_s
3913 if((source[i]&0x3f)==0x3c) emit_biccs_imm(fs,0x800000,fs); // c_lt_s
3914 if((source[i]&0x3f)==0x3d) emit_biccs_imm(fs,0x800000,fs); // c_nge_s
3915 if((source[i]&0x3f)==0x3e) emit_bichi_imm(fs,0x800000,fs); // c_le_s
3916 if((source[i]&0x3f)==0x3f) emit_bichi_imm(fs,0x800000,fs); // c_ngt_s
3917 return;
3918 }
3919 if(opcode2[i]==0x11) {
3920 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
3921 emit_readword((int)&reg_cop1_double[(source[i]>>16)&0x1f],HOST_TEMPREG);
3922 emit_orimm(fs,0x800000,fs);
3923 emit_vldr(temp,6);
3924 emit_vldr(HOST_TEMPREG,7);
3925 emit_fcmpd(6,7);
3926 emit_fmstat();
3927 if((source[i]&0x3f)==0x31) emit_bicvc_imm(fs,0x800000,fs); // c_un_d
3928 if((source[i]&0x3f)==0x32) emit_bicne_imm(fs,0x800000,fs); // c_eq_d
3929 if((source[i]&0x3f)==0x33) {emit_bicne_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ueq_d
3930 if((source[i]&0x3f)==0x34) emit_biccs_imm(fs,0x800000,fs); // c_olt_d
3931 if((source[i]&0x3f)==0x35) {emit_biccs_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ult_d
3932 if((source[i]&0x3f)==0x36) emit_bichi_imm(fs,0x800000,fs); // c_ole_d
3933 if((source[i]&0x3f)==0x37) {emit_bichi_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ule_d
3934 if((source[i]&0x3f)==0x3a) emit_bicne_imm(fs,0x800000,fs); // c_seq_d
3935 if((source[i]&0x3f)==0x3b) emit_bicne_imm(fs,0x800000,fs); // c_ngl_d
3936 if((source[i]&0x3f)==0x3c) emit_biccs_imm(fs,0x800000,fs); // c_lt_d
3937 if((source[i]&0x3f)==0x3d) emit_biccs_imm(fs,0x800000,fs); // c_nge_d
3938 if((source[i]&0x3f)==0x3e) emit_bichi_imm(fs,0x800000,fs); // c_le_d
3939 if((source[i]&0x3f)==0x3f) emit_bichi_imm(fs,0x800000,fs); // c_ngt_d
3940 return;
3941 }
3942 #endif
3943
3944 // C only
3945
3946 u_int hr,reglist=0;
3947 for(hr=0;hr<HOST_REGS;hr++) {
3948 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
3949 }
3950 reglist&=~(1<<fs);
3951 save_regs(reglist);
3952 if(opcode2[i]==0x10) {
3953 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3954 emit_readword((int)&reg_cop1_simple[(source[i]>>16)&0x1f],ARG2_REG);
3955 if((source[i]&0x3f)==0x30) emit_call((int)c_f_s);
3956 if((source[i]&0x3f)==0x31) emit_call((int)c_un_s);
3957 if((source[i]&0x3f)==0x32) emit_call((int)c_eq_s);
3958 if((source[i]&0x3f)==0x33) emit_call((int)c_ueq_s);
3959 if((source[i]&0x3f)==0x34) emit_call((int)c_olt_s);
3960 if((source[i]&0x3f)==0x35) emit_call((int)c_ult_s);
3961 if((source[i]&0x3f)==0x36) emit_call((int)c_ole_s);
3962 if((source[i]&0x3f)==0x37) emit_call((int)c_ule_s);
3963 if((source[i]&0x3f)==0x38) emit_call((int)c_sf_s);
3964 if((source[i]&0x3f)==0x39) emit_call((int)c_ngle_s);
3965 if((source[i]&0x3f)==0x3a) emit_call((int)c_seq_s);
3966 if((source[i]&0x3f)==0x3b) emit_call((int)c_ngl_s);
3967 if((source[i]&0x3f)==0x3c) emit_call((int)c_lt_s);
3968 if((source[i]&0x3f)==0x3d) emit_call((int)c_nge_s);
3969 if((source[i]&0x3f)==0x3e) emit_call((int)c_le_s);
3970 if((source[i]&0x3f)==0x3f) emit_call((int)c_ngt_s);
3971 }
3972 if(opcode2[i]==0x11) {
3973 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3974 emit_readword((int)&reg_cop1_double[(source[i]>>16)&0x1f],ARG2_REG);
3975 if((source[i]&0x3f)==0x30) emit_call((int)c_f_d);
3976 if((source[i]&0x3f)==0x31) emit_call((int)c_un_d);
3977 if((source[i]&0x3f)==0x32) emit_call((int)c_eq_d);
3978 if((source[i]&0x3f)==0x33) emit_call((int)c_ueq_d);
3979 if((source[i]&0x3f)==0x34) emit_call((int)c_olt_d);
3980 if((source[i]&0x3f)==0x35) emit_call((int)c_ult_d);
3981 if((source[i]&0x3f)==0x36) emit_call((int)c_ole_d);
3982 if((source[i]&0x3f)==0x37) emit_call((int)c_ule_d);
3983 if((source[i]&0x3f)==0x38) emit_call((int)c_sf_d);
3984 if((source[i]&0x3f)==0x39) emit_call((int)c_ngle_d);
3985 if((source[i]&0x3f)==0x3a) emit_call((int)c_seq_d);
3986 if((source[i]&0x3f)==0x3b) emit_call((int)c_ngl_d);
3987 if((source[i]&0x3f)==0x3c) emit_call((int)c_lt_d);
3988 if((source[i]&0x3f)==0x3d) emit_call((int)c_nge_d);
3989 if((source[i]&0x3f)==0x3e) emit_call((int)c_le_d);
3990 if((source[i]&0x3f)==0x3f) emit_call((int)c_ngt_d);
3991 }
3992 restore_regs(reglist);
3993 emit_loadreg(FSREG,fs);
3d624f89 3994#else
3995 cop1_unusable(i, i_regs);
3996#endif
57871462 3997}
3998
3999void float_assemble(int i,struct regstat *i_regs)
4000{
3d624f89 4001#ifndef DISABLE_COP1
57871462 4002 signed char temp=get_reg(i_regs->regmap,-1);
4003 assert(temp>=0);
4004 // Check cop1 unusable
4005 if(!cop1_usable) {
4006 signed char cs=get_reg(i_regs->regmap,CSREG);
4007 assert(cs>=0);
4008 emit_testimm(cs,0x20000000);
4009 int jaddr=(int)out;
4010 emit_jeq(0);
4011 add_stub(FP_STUB,jaddr,(int)out,i,cs,(int)i_regs,is_delayslot,0);
4012 cop1_usable=1;
4013 }
4014
4015 #if(defined(__VFP_FP__) && !defined(__SOFTFP__))
4016 if((source[i]&0x3f)==6) // mov
4017 {
4018 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4019 if(opcode2[i]==0x10) {
4020 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4021 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],HOST_TEMPREG);
4022 emit_readword_indexed(0,temp,temp);
4023 emit_writeword_indexed(temp,0,HOST_TEMPREG);
4024 }
4025 if(opcode2[i]==0x11) {
4026 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
4027 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],HOST_TEMPREG);
4028 emit_vldr(temp,7);
4029 emit_vstr(7,HOST_TEMPREG);
4030 }
4031 }
4032 return;
4033 }
4034
4035 if((source[i]&0x3f)>3)
4036 {
4037 if(opcode2[i]==0x10) {
4038 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4039 emit_flds(temp,15);
4040 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4041 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
4042 }
4043 if((source[i]&0x3f)==4) // sqrt
4044 emit_fsqrts(15,15);
4045 if((source[i]&0x3f)==5) // abs
4046 emit_fabss(15,15);
4047 if((source[i]&0x3f)==7) // neg
4048 emit_fnegs(15,15);
4049 emit_fsts(15,temp);
4050 }
4051 if(opcode2[i]==0x11) {
4052 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
4053 emit_vldr(temp,7);
4054 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4055 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
4056 }
4057 if((source[i]&0x3f)==4) // sqrt
4058 emit_fsqrtd(7,7);
4059 if((source[i]&0x3f)==5) // abs
4060 emit_fabsd(7,7);
4061 if((source[i]&0x3f)==7) // neg
4062 emit_fnegd(7,7);
4063 emit_vstr(7,temp);
4064 }
4065 return;
4066 }
4067 if((source[i]&0x3f)<4)
4068 {
4069 if(opcode2[i]==0x10) {
4070 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4071 }
4072 if(opcode2[i]==0x11) {
4073 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
4074 }
4075 if(((source[i]>>11)&0x1f)!=((source[i]>>16)&0x1f)) {
4076 if(opcode2[i]==0x10) {
4077 emit_readword((int)&reg_cop1_simple[(source[i]>>16)&0x1f],HOST_TEMPREG);
4078 emit_flds(temp,15);
4079 emit_flds(HOST_TEMPREG,13);
4080 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4081 if(((source[i]>>16)&0x1f)!=((source[i]>>6)&0x1f)) {
4082 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
4083 }
4084 }
4085 if((source[i]&0x3f)==0) emit_fadds(15,13,15);
4086 if((source[i]&0x3f)==1) emit_fsubs(15,13,15);
4087 if((source[i]&0x3f)==2) emit_fmuls(15,13,15);
4088 if((source[i]&0x3f)==3) emit_fdivs(15,13,15);
4089 if(((source[i]>>16)&0x1f)==((source[i]>>6)&0x1f)) {
4090 emit_fsts(15,HOST_TEMPREG);
4091 }else{
4092 emit_fsts(15,temp);
4093 }
4094 }
4095 else if(opcode2[i]==0x11) {
4096 emit_readword((int)&reg_cop1_double[(source[i]>>16)&0x1f],HOST_TEMPREG);
4097 emit_vldr(temp,7);
4098 emit_vldr(HOST_TEMPREG,6);
4099 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4100 if(((source[i]>>16)&0x1f)!=((source[i]>>6)&0x1f)) {
4101 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
4102 }
4103 }
4104 if((source[i]&0x3f)==0) emit_faddd(7,6,7);
4105 if((source[i]&0x3f)==1) emit_fsubd(7,6,7);
4106 if((source[i]&0x3f)==2) emit_fmuld(7,6,7);
4107 if((source[i]&0x3f)==3) emit_fdivd(7,6,7);
4108 if(((source[i]>>16)&0x1f)==((source[i]>>6)&0x1f)) {
4109 emit_vstr(7,HOST_TEMPREG);
4110 }else{
4111 emit_vstr(7,temp);
4112 }
4113 }
4114 }
4115 else {
4116 if(opcode2[i]==0x10) {
4117 emit_flds(temp,15);
4118 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4119 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
4120 }
4121 if((source[i]&0x3f)==0) emit_fadds(15,15,15);
4122 if((source[i]&0x3f)==1) emit_fsubs(15,15,15);
4123 if((source[i]&0x3f)==2) emit_fmuls(15,15,15);
4124 if((source[i]&0x3f)==3) emit_fdivs(15,15,15);
4125 emit_fsts(15,temp);
4126 }
4127 else if(opcode2[i]==0x11) {
4128 emit_vldr(temp,7);
4129 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4130 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
4131 }
4132 if((source[i]&0x3f)==0) emit_faddd(7,7,7);
4133 if((source[i]&0x3f)==1) emit_fsubd(7,7,7);
4134 if((source[i]&0x3f)==2) emit_fmuld(7,7,7);
4135 if((source[i]&0x3f)==3) emit_fdivd(7,7,7);
4136 emit_vstr(7,temp);
4137 }
4138 }
4139 return;
4140 }
4141 #endif
4142
4143 u_int hr,reglist=0;
4144 for(hr=0;hr<HOST_REGS;hr++) {
4145 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
4146 }
4147 if(opcode2[i]==0x10) { // Single precision
4148 save_regs(reglist);
4149 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4150 if((source[i]&0x3f)<4) {
4151 emit_readword((int)&reg_cop1_simple[(source[i]>>16)&0x1f],ARG2_REG);
4152 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG3_REG);
4153 }else{
4154 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4155 }
4156 switch(source[i]&0x3f)
4157 {
4158 case 0x00: emit_call((int)add_s);break;
4159 case 0x01: emit_call((int)sub_s);break;
4160 case 0x02: emit_call((int)mul_s);break;
4161 case 0x03: emit_call((int)div_s);break;
4162 case 0x04: emit_call((int)sqrt_s);break;
4163 case 0x05: emit_call((int)abs_s);break;
4164 case 0x06: emit_call((int)mov_s);break;
4165 case 0x07: emit_call((int)neg_s);break;
4166 }
4167 restore_regs(reglist);
4168 }
4169 if(opcode2[i]==0x11) { // Double precision
4170 save_regs(reglist);
4171 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4172 if((source[i]&0x3f)<4) {
4173 emit_readword((int)&reg_cop1_double[(source[i]>>16)&0x1f],ARG2_REG);
4174 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG3_REG);
4175 }else{
4176 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4177 }
4178 switch(source[i]&0x3f)
4179 {
4180 case 0x00: emit_call((int)add_d);break;
4181 case 0x01: emit_call((int)sub_d);break;
4182 case 0x02: emit_call((int)mul_d);break;
4183 case 0x03: emit_call((int)div_d);break;
4184 case 0x04: emit_call((int)sqrt_d);break;
4185 case 0x05: emit_call((int)abs_d);break;
4186 case 0x06: emit_call((int)mov_d);break;
4187 case 0x07: emit_call((int)neg_d);break;
4188 }
4189 restore_regs(reglist);
4190 }
3d624f89 4191#else
4192 cop1_unusable(i, i_regs);
4193#endif
57871462 4194}
4195
4196void multdiv_assemble_arm(int i,struct regstat *i_regs)
4197{
4198 // case 0x18: MULT
4199 // case 0x19: MULTU
4200 // case 0x1A: DIV
4201 // case 0x1B: DIVU
4202 // case 0x1C: DMULT
4203 // case 0x1D: DMULTU
4204 // case 0x1E: DDIV
4205 // case 0x1F: DDIVU
4206 if(rs1[i]&&rs2[i])
4207 {
4208 if((opcode2[i]&4)==0) // 32-bit
4209 {
4210 if(opcode2[i]==0x18) // MULT
4211 {
4212 signed char m1=get_reg(i_regs->regmap,rs1[i]);
4213 signed char m2=get_reg(i_regs->regmap,rs2[i]);
4214 signed char hi=get_reg(i_regs->regmap,HIREG);
4215 signed char lo=get_reg(i_regs->regmap,LOREG);
4216 assert(m1>=0);
4217 assert(m2>=0);
4218 assert(hi>=0);
4219 assert(lo>=0);
4220 emit_smull(m1,m2,hi,lo);
4221 }
4222 if(opcode2[i]==0x19) // MULTU
4223 {
4224 signed char m1=get_reg(i_regs->regmap,rs1[i]);
4225 signed char m2=get_reg(i_regs->regmap,rs2[i]);
4226 signed char hi=get_reg(i_regs->regmap,HIREG);
4227 signed char lo=get_reg(i_regs->regmap,LOREG);
4228 assert(m1>=0);
4229 assert(m2>=0);
4230 assert(hi>=0);
4231 assert(lo>=0);
4232 emit_umull(m1,m2,hi,lo);
4233 }
4234 if(opcode2[i]==0x1A) // DIV
4235 {
4236 signed char d1=get_reg(i_regs->regmap,rs1[i]);
4237 signed char d2=get_reg(i_regs->regmap,rs2[i]);
4238 assert(d1>=0);
4239 assert(d2>=0);
4240 signed char quotient=get_reg(i_regs->regmap,LOREG);
4241 signed char remainder=get_reg(i_regs->regmap,HIREG);
4242 assert(quotient>=0);
4243 assert(remainder>=0);
4244 emit_movs(d1,remainder);
4245 emit_negmi(remainder,remainder);
4246 emit_movs(d2,HOST_TEMPREG);
4247 emit_jeq((int)out+52); // Division by zero
4248 emit_negmi(HOST_TEMPREG,HOST_TEMPREG);
4249 emit_clz(HOST_TEMPREG,quotient);
4250 emit_shl(HOST_TEMPREG,quotient,HOST_TEMPREG);
4251 emit_orimm(quotient,1<<31,quotient);
4252 emit_shr(quotient,quotient,quotient);
4253 emit_cmp(remainder,HOST_TEMPREG);
4254 emit_subcs(remainder,HOST_TEMPREG,remainder);
4255 emit_adcs(quotient,quotient,quotient);
4256 emit_shrimm(HOST_TEMPREG,1,HOST_TEMPREG);
4257 emit_jcc((int)out-16); // -4
4258 emit_teq(d1,d2);
4259 emit_negmi(quotient,quotient);
4260 emit_test(d1,d1);
4261 emit_negmi(remainder,remainder);
4262 }
4263 if(opcode2[i]==0x1B) // DIVU
4264 {
4265 signed char d1=get_reg(i_regs->regmap,rs1[i]); // dividend
4266 signed char d2=get_reg(i_regs->regmap,rs2[i]); // divisor
4267 assert(d1>=0);
4268 assert(d2>=0);
4269 signed char quotient=get_reg(i_regs->regmap,LOREG);
4270 signed char remainder=get_reg(i_regs->regmap,HIREG);
4271 assert(quotient>=0);
4272 assert(remainder>=0);
4273 emit_test(d2,d2);
4274 emit_jeq((int)out+44); // Division by zero
4275 emit_clz(d2,HOST_TEMPREG);
4276 emit_movimm(1<<31,quotient);
4277 emit_shl(d2,HOST_TEMPREG,d2);
4278 emit_mov(d1,remainder);
4279 emit_shr(quotient,HOST_TEMPREG,quotient);
4280 emit_cmp(remainder,d2);
4281 emit_subcs(remainder,d2,remainder);
4282 emit_adcs(quotient,quotient,quotient);
4283 emit_shrcc_imm(d2,1,d2);
4284 emit_jcc((int)out-16); // -4
4285 }
4286 }
4287 else // 64-bit
4288 {
4289 if(opcode2[i]==0x1C) // DMULT
4290 {
4291 assert(opcode2[i]!=0x1C);
4292 signed char m1h=get_reg(i_regs->regmap,rs1[i]|64);
4293 signed char m1l=get_reg(i_regs->regmap,rs1[i]);
4294 signed char m2h=get_reg(i_regs->regmap,rs2[i]|64);
4295 signed char m2l=get_reg(i_regs->regmap,rs2[i]);
4296 assert(m1h>=0);
4297 assert(m2h>=0);
4298 assert(m1l>=0);
4299 assert(m2l>=0);
4300 emit_pushreg(m2h);
4301 emit_pushreg(m2l);
4302 emit_pushreg(m1h);
4303 emit_pushreg(m1l);
4304 emit_call((int)&mult64);
4305 emit_popreg(m1l);
4306 emit_popreg(m1h);
4307 emit_popreg(m2l);
4308 emit_popreg(m2h);
4309 signed char hih=get_reg(i_regs->regmap,HIREG|64);
4310 signed char hil=get_reg(i_regs->regmap,HIREG);
4311 if(hih>=0) emit_loadreg(HIREG|64,hih);
4312 if(hil>=0) emit_loadreg(HIREG,hil);
4313 signed char loh=get_reg(i_regs->regmap,LOREG|64);
4314 signed char lol=get_reg(i_regs->regmap,LOREG);
4315 if(loh>=0) emit_loadreg(LOREG|64,loh);
4316 if(lol>=0) emit_loadreg(LOREG,lol);
4317 }
4318 if(opcode2[i]==0x1D) // DMULTU
4319 {
4320 signed char m1h=get_reg(i_regs->regmap,rs1[i]|64);
4321 signed char m1l=get_reg(i_regs->regmap,rs1[i]);
4322 signed char m2h=get_reg(i_regs->regmap,rs2[i]|64);
4323 signed char m2l=get_reg(i_regs->regmap,rs2[i]);
4324 assert(m1h>=0);
4325 assert(m2h>=0);
4326 assert(m1l>=0);
4327 assert(m2l>=0);
4328 save_regs(0x100f);
4329 if(m1l!=0) emit_mov(m1l,0);
4330 if(m1h==0) emit_readword((int)&dynarec_local,1);
4331 else if(m1h>1) emit_mov(m1h,1);
4332 if(m2l<2) emit_readword((int)&dynarec_local+m2l*4,2);
4333 else if(m2l>2) emit_mov(m2l,2);
4334 if(m2h<3) emit_readword((int)&dynarec_local+m2h*4,3);
4335 else if(m2h>3) emit_mov(m2h,3);
4336 emit_call((int)&multu64);
4337 restore_regs(0x100f);
4338 signed char hih=get_reg(i_regs->regmap,HIREG|64);
4339 signed char hil=get_reg(i_regs->regmap,HIREG);
4340 signed char loh=get_reg(i_regs->regmap,LOREG|64);
4341 signed char lol=get_reg(i_regs->regmap,LOREG);
4342 /*signed char temp=get_reg(i_regs->regmap,-1);
4343 signed char rh=get_reg(i_regs->regmap,HIREG|64);
4344 signed char rl=get_reg(i_regs->regmap,HIREG);
4345 assert(m1h>=0);
4346 assert(m2h>=0);
4347 assert(m1l>=0);
4348 assert(m2l>=0);
4349 assert(temp>=0);
4350 //emit_mov(m1l,EAX);
4351 //emit_mul(m2l);
4352 emit_umull(rl,rh,m1l,m2l);
4353 emit_storereg(LOREG,rl);
4354 emit_mov(rh,temp);
4355 //emit_mov(m1h,EAX);
4356 //emit_mul(m2l);
4357 emit_umull(rl,rh,m1h,m2l);
4358 emit_adds(rl,temp,temp);
4359 emit_adcimm(rh,0,rh);
4360 emit_storereg(HIREG,rh);
4361 //emit_mov(m2h,EAX);
4362 //emit_mul(m1l);
4363 emit_umull(rl,rh,m1l,m2h);
4364 emit_adds(rl,temp,temp);
4365 emit_adcimm(rh,0,rh);
4366 emit_storereg(LOREG|64,temp);
4367 emit_mov(rh,temp);
4368 //emit_mov(m2h,EAX);
4369 //emit_mul(m1h);
4370 emit_umull(rl,rh,m1h,m2h);
4371 emit_adds(rl,temp,rl);
4372 emit_loadreg(HIREG,temp);
4373 emit_adcimm(rh,0,rh);
4374 emit_adds(rl,temp,rl);
4375 emit_adcimm(rh,0,rh);
4376 // DEBUG
4377 /*
4378 emit_pushreg(m2h);
4379 emit_pushreg(m2l);
4380 emit_pushreg(m1h);
4381 emit_pushreg(m1l);
4382 emit_call((int)&multu64);
4383 emit_popreg(m1l);
4384 emit_popreg(m1h);
4385 emit_popreg(m2l);
4386 emit_popreg(m2h);
4387 signed char hih=get_reg(i_regs->regmap,HIREG|64);
4388 signed char hil=get_reg(i_regs->regmap,HIREG);
4389 if(hih>=0) emit_loadreg(HIREG|64,hih); // DEBUG
4390 if(hil>=0) emit_loadreg(HIREG,hil); // DEBUG
4391 */
4392 // Shouldn't be necessary
4393 //char loh=get_reg(i_regs->regmap,LOREG|64);
4394 //char lol=get_reg(i_regs->regmap,LOREG);
4395 //if(loh>=0) emit_loadreg(LOREG|64,loh);
4396 //if(lol>=0) emit_loadreg(LOREG,lol);
4397 }
4398 if(opcode2[i]==0x1E) // DDIV
4399 {
4400 signed char d1h=get_reg(i_regs->regmap,rs1[i]|64);
4401 signed char d1l=get_reg(i_regs->regmap,rs1[i]);
4402 signed char d2h=get_reg(i_regs->regmap,rs2[i]|64);
4403 signed char d2l=get_reg(i_regs->regmap,rs2[i]);
4404 assert(d1h>=0);
4405 assert(d2h>=0);
4406 assert(d1l>=0);
4407 assert(d2l>=0);
4408 save_regs(0x100f);
4409 if(d1l!=0) emit_mov(d1l,0);
4410 if(d1h==0) emit_readword((int)&dynarec_local,1);
4411 else if(d1h>1) emit_mov(d1h,1);
4412 if(d2l<2) emit_readword((int)&dynarec_local+d2l*4,2);
4413 else if(d2l>2) emit_mov(d2l,2);
4414 if(d2h<3) emit_readword((int)&dynarec_local+d2h*4,3);
4415 else if(d2h>3) emit_mov(d2h,3);
4416 emit_call((int)&div64);
4417 restore_regs(0x100f);
4418 signed char hih=get_reg(i_regs->regmap,HIREG|64);
4419 signed char hil=get_reg(i_regs->regmap,HIREG);
4420 signed char loh=get_reg(i_regs->regmap,LOREG|64);
4421 signed char lol=get_reg(i_regs->regmap,LOREG);
4422 if(hih>=0) emit_loadreg(HIREG|64,hih);
4423 if(hil>=0) emit_loadreg(HIREG,hil);
4424 if(loh>=0) emit_loadreg(LOREG|64,loh);
4425 if(lol>=0) emit_loadreg(LOREG,lol);
4426 }
4427 if(opcode2[i]==0x1F) // DDIVU
4428 {
4429 //u_int hr,reglist=0;
4430 //for(hr=0;hr<HOST_REGS;hr++) {
4431 // if(i_regs->regmap[hr]>=0 && (i_regs->regmap[hr]&62)!=HIREG) reglist|=1<<hr;
4432 //}
4433 signed char d1h=get_reg(i_regs->regmap,rs1[i]|64);
4434 signed char d1l=get_reg(i_regs->regmap,rs1[i]);
4435 signed char d2h=get_reg(i_regs->regmap,rs2[i]|64);
4436 signed char d2l=get_reg(i_regs->regmap,rs2[i]);
4437 assert(d1h>=0);
4438 assert(d2h>=0);
4439 assert(d1l>=0);
4440 assert(d2l>=0);
4441 save_regs(0x100f);
4442 if(d1l!=0) emit_mov(d1l,0);
4443 if(d1h==0) emit_readword((int)&dynarec_local,1);
4444 else if(d1h>1) emit_mov(d1h,1);
4445 if(d2l<2) emit_readword((int)&dynarec_local+d2l*4,2);
4446 else if(d2l>2) emit_mov(d2l,2);
4447 if(d2h<3) emit_readword((int)&dynarec_local+d2h*4,3);
4448 else if(d2h>3) emit_mov(d2h,3);
4449 emit_call((int)&divu64);
4450 restore_regs(0x100f);
4451 signed char hih=get_reg(i_regs->regmap,HIREG|64);
4452 signed char hil=get_reg(i_regs->regmap,HIREG);
4453 signed char loh=get_reg(i_regs->regmap,LOREG|64);
4454 signed char lol=get_reg(i_regs->regmap,LOREG);
4455 if(hih>=0) emit_loadreg(HIREG|64,hih);
4456 if(hil>=0) emit_loadreg(HIREG,hil);
4457 if(loh>=0) emit_loadreg(LOREG|64,loh);
4458 if(lol>=0) emit_loadreg(LOREG,lol);
4459 }
4460 }
4461 }
4462 else
4463 {
4464 // Multiply by zero is zero.
4465 // MIPS does not have a divide by zero exception.
4466 // The result is undefined, we return zero.
4467 signed char hr=get_reg(i_regs->regmap,HIREG);
4468 signed char lr=get_reg(i_regs->regmap,LOREG);
4469 if(hr>=0) emit_zeroreg(hr);
4470 if(lr>=0) emit_zeroreg(lr);
4471 }
4472}
4473#define multdiv_assemble multdiv_assemble_arm
4474
4475void do_preload_rhash(int r) {
4476 // Don't need this for ARM. On x86, this puts the value 0xf8 into the
4477 // register. On ARM the hash can be done with a single instruction (below)
4478}
4479
4480void do_preload_rhtbl(int ht) {
4481 emit_addimm(FP,(int)&mini_ht-(int)&dynarec_local,ht);
4482}
4483
4484void do_rhash(int rs,int rh) {
4485 emit_andimm(rs,0xf8,rh);
4486}
4487
4488void do_miniht_load(int ht,int rh) {
4489 assem_debug("ldr %s,[%s,%s]!\n",regname[rh],regname[ht],regname[rh]);
4490 output_w32(0xe7b00000|rd_rn_rm(rh,ht,rh));
4491}
4492
4493void do_miniht_jump(int rs,int rh,int ht) {
4494 emit_cmp(rh,rs);
4495 emit_ldreq_indexed(ht,4,15);
4496 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
4497 emit_mov(rs,7);
4498 emit_jmp(jump_vaddr_reg[7]);
4499 #else
4500 emit_jmp(jump_vaddr_reg[rs]);
4501 #endif
4502}
4503
4504void do_miniht_insert(u_int return_address,int rt,int temp) {
4505 #ifdef ARMv5_ONLY
4506 emit_movimm(return_address,rt); // PC into link register
4507 add_to_linker((int)out,return_address,1);
4508 emit_pcreladdr(temp);
4509 emit_writeword(rt,(int)&mini_ht[(return_address&0xFF)>>3][0]);
4510 emit_writeword(temp,(int)&mini_ht[(return_address&0xFF)>>3][1]);
4511 #else
4512 emit_movw(return_address&0x0000FFFF,rt);
4513 add_to_linker((int)out,return_address,1);
4514 emit_pcreladdr(temp);
4515 emit_writeword(temp,(int)&mini_ht[(return_address&0xFF)>>3][1]);
4516 emit_movt(return_address&0xFFFF0000,rt);
4517 emit_writeword(rt,(int)&mini_ht[(return_address&0xFF)>>3][0]);
4518 #endif
4519}
4520
4521// Sign-extend to 64 bits and write out upper half of a register
4522// This is useful where we have a 32-bit value in a register, and want to
4523// keep it in a 32-bit register, but can't guarantee that it won't be read
4524// as a 64-bit value later.
4525void wb_sx(signed char pre[],signed char entry[],uint64_t dirty,uint64_t is32_pre,uint64_t is32,uint64_t u,uint64_t uu)
4526{
24385cae 4527#ifndef FORCE32
57871462 4528 if(is32_pre==is32) return;
4529 int hr,reg;
4530 for(hr=0;hr<HOST_REGS;hr++) {
4531 if(hr!=EXCLUDE_REG) {
4532 //if(pre[hr]==entry[hr]) {
4533 if((reg=pre[hr])>=0) {
4534 if((dirty>>hr)&1) {
4535 if( ((is32_pre&~is32&~uu)>>reg)&1 ) {
4536 emit_sarimm(hr,31,HOST_TEMPREG);
4537 emit_storereg(reg|64,HOST_TEMPREG);
4538 }
4539 }
4540 }
4541 //}
4542 }
4543 }
24385cae 4544#endif
57871462 4545}
4546
4547void wb_valid(signed char pre[],signed char entry[],u_int dirty_pre,u_int dirty,uint64_t is32_pre,uint64_t u,uint64_t uu)
4548{
4549 //if(dirty_pre==dirty) return;
4550 int hr,reg,new_hr;
4551 for(hr=0;hr<HOST_REGS;hr++) {
4552 if(hr!=EXCLUDE_REG) {
4553 reg=pre[hr];
4554 if(((~u)>>(reg&63))&1) {
4555 if(reg==entry[hr]||(reg>0&&entry[hr]<0)) {
4556 if(((dirty_pre&~dirty)>>hr)&1) {
4557 if(reg>0&&reg<34) {
4558 emit_storereg(reg,hr);
4559 if( ((is32_pre&~uu)>>reg)&1 ) {
4560 emit_sarimm(hr,31,HOST_TEMPREG);
4561 emit_storereg(reg|64,HOST_TEMPREG);
4562 }
4563 }
4564 else if(reg>=64) {
4565 emit_storereg(reg,hr);
4566 }
4567 }
4568 }
4569 else // Check if register moved to a different register
4570 if((new_hr=get_reg(entry,reg))>=0) {
4571 if((dirty_pre>>hr)&(~dirty>>new_hr)&1) {
4572 if(reg>0&&reg<34) {
4573 emit_storereg(reg,hr);
4574 if( ((is32_pre&~uu)>>reg)&1 ) {
4575 emit_sarimm(hr,31,HOST_TEMPREG);
4576 emit_storereg(reg|64,HOST_TEMPREG);
4577 }
4578 }
4579 else if(reg>=64) {
4580 emit_storereg(reg,hr);
4581 }
4582 }
4583 }
4584 }
4585 }
4586 }
4587}
4588
4589
4590/* using strd could possibly help but you'd have to allocate registers in pairs
4591void wb_invalidate_arm(signed char pre[],signed char entry[],uint64_t dirty,uint64_t is32,uint64_t u,uint64_t uu)
4592{
4593 int hr;
4594 int wrote=-1;
4595 for(hr=HOST_REGS-1;hr>=0;hr--) {
4596 if(hr!=EXCLUDE_REG) {
4597 if(pre[hr]!=entry[hr]) {
4598 if(pre[hr]>=0) {
4599 if((dirty>>hr)&1) {
4600 if(get_reg(entry,pre[hr])<0) {
4601 if(pre[hr]<64) {
4602 if(!((u>>pre[hr])&1)) {
4603 if(hr<10&&(~hr&1)&&(pre[hr+1]<0||wrote==hr+1)) {
4604 if( ((is32>>pre[hr])&1) && !((uu>>pre[hr])&1) ) {
4605 emit_sarimm(hr,31,hr+1);
4606 emit_strdreg(pre[hr],hr);
4607 }
4608 else
4609 emit_storereg(pre[hr],hr);
4610 }else{
4611 emit_storereg(pre[hr],hr);
4612 if( ((is32>>pre[hr])&1) && !((uu>>pre[hr])&1) ) {
4613 emit_sarimm(hr,31,hr);
4614 emit_storereg(pre[hr]|64,hr);
4615 }
4616 }
4617 }
4618 }else{
4619 if(!((uu>>(pre[hr]&63))&1) && !((is32>>(pre[hr]&63))&1)) {
4620 emit_storereg(pre[hr],hr);
4621 }
4622 }
4623 wrote=hr;
4624 }
4625 }
4626 }
4627 }
4628 }
4629 }
4630 for(hr=0;hr<HOST_REGS;hr++) {
4631 if(hr!=EXCLUDE_REG) {
4632 if(pre[hr]!=entry[hr]) {
4633 if(pre[hr]>=0) {
4634 int nr;
4635 if((nr=get_reg(entry,pre[hr]))>=0) {
4636 emit_mov(hr,nr);
4637 }
4638 }
4639 }
4640 }
4641 }
4642}
4643#define wb_invalidate wb_invalidate_arm
4644*/
4645
4646// CPU-architecture-specific initialization
4647void arch_init() {
3d624f89 4648#ifndef DISABLE_COP1
57871462 4649 rounding_modes[0]=0x0<<22; // round
4650 rounding_modes[1]=0x3<<22; // trunc
4651 rounding_modes[2]=0x1<<22; // ceil
4652 rounding_modes[3]=0x2<<22; // floor
3d624f89 4653#endif
57871462 4654}
b9b61529 4655
4656// vim:shiftwidth=2:expandtab