drc: fix page get code in linkage_arm.s
[pcsx_rearmed.git] / libpcsxcore / new_dynarec / assem_arm.c
CommitLineData
57871462 1/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2 * Mupen64plus - assem_arm.c *
20d507ba 3 * Copyright (C) 2009-2011 Ari64 *
57871462 4 * *
5 * This program is free software; you can redistribute it and/or modify *
6 * it under the terms of the GNU General Public License as published by *
7 * the Free Software Foundation; either version 2 of the License, or *
8 * (at your option) any later version. *
9 * *
10 * This program is distributed in the hope that it will be useful, *
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
13 * GNU General Public License for more details. *
14 * *
15 * You should have received a copy of the GNU General Public License *
16 * along with this program; if not, write to the *
17 * Free Software Foundation, Inc., *
18 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
19 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
20
21extern int cycle_count;
22extern int last_count;
23extern int pcaddr;
24extern int pending_exception;
25extern int branch_target;
26extern uint64_t readmem_dword;
3d624f89 27#ifdef MUPEN64
57871462 28extern precomp_instr fake_pc;
3d624f89 29#endif
57871462 30extern void *dynarec_local;
31extern u_int memory_map[1048576];
32extern u_int mini_ht[32][2];
33extern u_int rounding_modes[4];
34
35void indirect_jump_indexed();
36void indirect_jump();
37void do_interrupt();
38void jump_vaddr_r0();
39void jump_vaddr_r1();
40void jump_vaddr_r2();
41void jump_vaddr_r3();
42void jump_vaddr_r4();
43void jump_vaddr_r5();
44void jump_vaddr_r6();
45void jump_vaddr_r7();
46void jump_vaddr_r8();
47void jump_vaddr_r9();
48void jump_vaddr_r10();
49void jump_vaddr_r12();
50
51const u_int jump_vaddr_reg[16] = {
52 (int)jump_vaddr_r0,
53 (int)jump_vaddr_r1,
54 (int)jump_vaddr_r2,
55 (int)jump_vaddr_r3,
56 (int)jump_vaddr_r4,
57 (int)jump_vaddr_r5,
58 (int)jump_vaddr_r6,
59 (int)jump_vaddr_r7,
60 (int)jump_vaddr_r8,
61 (int)jump_vaddr_r9,
62 (int)jump_vaddr_r10,
63 0,
64 (int)jump_vaddr_r12,
65 0,
66 0,
67 0};
68
0bbd1454 69void invalidate_addr_r0();
70void invalidate_addr_r1();
71void invalidate_addr_r2();
72void invalidate_addr_r3();
73void invalidate_addr_r4();
74void invalidate_addr_r5();
75void invalidate_addr_r6();
76void invalidate_addr_r7();
77void invalidate_addr_r8();
78void invalidate_addr_r9();
79void invalidate_addr_r10();
80void invalidate_addr_r12();
81
82const u_int invalidate_addr_reg[16] = {
83 (int)invalidate_addr_r0,
84 (int)invalidate_addr_r1,
85 (int)invalidate_addr_r2,
86 (int)invalidate_addr_r3,
87 (int)invalidate_addr_r4,
88 (int)invalidate_addr_r5,
89 (int)invalidate_addr_r6,
90 (int)invalidate_addr_r7,
91 (int)invalidate_addr_r8,
92 (int)invalidate_addr_r9,
93 (int)invalidate_addr_r10,
94 0,
95 (int)invalidate_addr_r12,
96 0,
97 0,
98 0};
99
57871462 100#include "fpu.h"
101
dd3a91a1 102unsigned int needs_clear_cache[1<<(TARGET_SIZE_2-17)];
103
57871462 104/* Linker */
105
106void set_jump_target(int addr,u_int target)
107{
108 u_char *ptr=(u_char *)addr;
109 u_int *ptr2=(u_int *)ptr;
110 if(ptr[3]==0xe2) {
111 assert((target-(u_int)ptr2-8)<1024);
112 assert((addr&3)==0);
113 assert((target&3)==0);
114 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
115 //printf("target=%x addr=%x insn=%x\n",target,addr,*ptr2);
116 }
117 else if(ptr[3]==0x72) {
118 // generated by emit_jno_unlikely
119 if((target-(u_int)ptr2-8)<1024) {
120 assert((addr&3)==0);
121 assert((target&3)==0);
122 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
123 }
124 else if((target-(u_int)ptr2-8)<4096&&!((target-(u_int)ptr2-8)&15)) {
125 assert((addr&3)==0);
126 assert((target&3)==0);
127 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>4)|0xE00;
128 }
129 else *ptr2=(0x7A000000)|(((target-(u_int)ptr2-8)<<6)>>8);
130 }
131 else {
132 assert((ptr[3]&0x0e)==0xa);
133 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
134 }
135}
136
137// This optionally copies the instruction from the target of the branch into
138// the space before the branch. Works, but the difference in speed is
139// usually insignificant.
140void set_jump_target_fillslot(int addr,u_int target,int copy)
141{
142 u_char *ptr=(u_char *)addr;
143 u_int *ptr2=(u_int *)ptr;
144 assert(!copy||ptr2[-1]==0xe28dd000);
145 if(ptr[3]==0xe2) {
146 assert(!copy);
147 assert((target-(u_int)ptr2-8)<4096);
148 *ptr2=(*ptr2&0xFFFFF000)|(target-(u_int)ptr2-8);
149 }
150 else {
151 assert((ptr[3]&0x0e)==0xa);
152 u_int target_insn=*(u_int *)target;
153 if((target_insn&0x0e100000)==0) { // ALU, no immediate, no flags
154 copy=0;
155 }
156 if((target_insn&0x0c100000)==0x04100000) { // Load
157 copy=0;
158 }
159 if(target_insn&0x08000000) {
160 copy=0;
161 }
162 if(copy) {
163 ptr2[-1]=target_insn;
164 target+=4;
165 }
166 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
167 }
168}
169
170/* Literal pool */
171add_literal(int addr,int val)
172{
173 literals[literalcount][0]=addr;
174 literals[literalcount][1]=val;
175 literalcount++;
176}
177
f76eeef9 178void *kill_pointer(void *stub)
57871462 179{
180 int *ptr=(int *)(stub+4);
181 assert((*ptr&0x0ff00000)==0x05900000);
182 u_int offset=*ptr&0xfff;
183 int **l_ptr=(void *)ptr+offset+8;
184 int *i_ptr=*l_ptr;
185 set_jump_target((int)i_ptr,(int)stub);
f76eeef9 186 return i_ptr;
57871462 187}
188
f968d35d 189// find where external branch is liked to using addr of it's stub:
190// get address that insn one after stub loads (dyna_linker arg1),
191// treat it as a pointer to branch insn,
192// return addr where that branch jumps to
57871462 193int get_pointer(void *stub)
194{
195 //printf("get_pointer(%x)\n",(int)stub);
196 int *ptr=(int *)(stub+4);
f968d35d 197 assert((*ptr&0x0fff0000)==0x059f0000);
57871462 198 u_int offset=*ptr&0xfff;
199 int **l_ptr=(void *)ptr+offset+8;
200 int *i_ptr=*l_ptr;
201 assert((*i_ptr&0x0f000000)==0x0a000000);
202 return (int)i_ptr+((*i_ptr<<8)>>6)+8;
203}
204
205// Find the "clean" entry point from a "dirty" entry point
206// by skipping past the call to verify_code
207u_int get_clean_addr(int addr)
208{
209 int *ptr=(int *)addr;
210 #ifdef ARMv5_ONLY
211 ptr+=4;
212 #else
213 ptr+=6;
214 #endif
215 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
216 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
217 ptr++;
218 if((*ptr&0xFF000000)==0xea000000) {
219 return (int)ptr+((*ptr<<8)>>6)+8; // follow jump
220 }
221 return (u_int)ptr;
222}
223
224int verify_dirty(int addr)
225{
226 u_int *ptr=(u_int *)addr;
227 #ifdef ARMv5_ONLY
228 // get from literal pool
229 assert((*ptr&0xFFF00000)==0xe5900000);
230 u_int offset=*ptr&0xfff;
231 u_int *l_ptr=(void *)ptr+offset+8;
232 u_int source=l_ptr[0];
233 u_int copy=l_ptr[1];
234 u_int len=l_ptr[2];
235 ptr+=4;
236 #else
237 // ARMv7 movw/movt
238 assert((*ptr&0xFFF00000)==0xe3000000);
239 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
240 u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
241 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
242 ptr+=6;
243 #endif
244 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
245 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
cfcba99a 246 u_int verifier=(int)ptr+((signed int)(*ptr<<8)>>6)+8; // get target of bl
57871462 247 if(verifier==(u_int)verify_code_vm||verifier==(u_int)verify_code_ds) {
248 unsigned int page=source>>12;
249 unsigned int map_value=memory_map[page];
250 if(map_value>=0x80000000) return 0;
251 while(page<((source+len-1)>>12)) {
252 if((memory_map[++page]<<2)!=(map_value<<2)) return 0;
253 }
254 source = source+(map_value<<2);
255 }
256 //printf("verify_dirty: %x %x %x\n",source,copy,len);
257 return !memcmp((void *)source,(void *)copy,len);
258}
259
260// This doesn't necessarily find all clean entry points, just
261// guarantees that it's not dirty
262int isclean(int addr)
263{
264 #ifdef ARMv5_ONLY
265 int *ptr=((u_int *)addr)+4;
266 #else
267 int *ptr=((u_int *)addr)+6;
268 #endif
269 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
270 if((*ptr&0xFF000000)!=0xeb000000) return 1; // bl instruction
271 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code) return 0;
272 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_vm) return 0;
273 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_ds) return 0;
274 return 1;
275}
276
277void get_bounds(int addr,u_int *start,u_int *end)
278{
279 u_int *ptr=(u_int *)addr;
280 #ifdef ARMv5_ONLY
281 // get from literal pool
282 assert((*ptr&0xFFF00000)==0xe5900000);
283 u_int offset=*ptr&0xfff;
284 u_int *l_ptr=(void *)ptr+offset+8;
285 u_int source=l_ptr[0];
286 //u_int copy=l_ptr[1];
287 u_int len=l_ptr[2];
288 ptr+=4;
289 #else
290 // ARMv7 movw/movt
291 assert((*ptr&0xFFF00000)==0xe3000000);
292 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
293 //u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
294 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
295 ptr+=6;
296 #endif
297 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
298 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
cfcba99a 299 u_int verifier=(int)ptr+((signed int)(*ptr<<8)>>6)+8; // get target of bl
57871462 300 if(verifier==(u_int)verify_code_vm||verifier==(u_int)verify_code_ds) {
301 if(memory_map[source>>12]>=0x80000000) source = 0;
302 else source = source+(memory_map[source>>12]<<2);
303 }
304 *start=source;
305 *end=source+len;
306}
307
308/* Register allocation */
309
310// Note: registers are allocated clean (unmodified state)
311// if you intend to modify the register, you must call dirty_reg().
312void alloc_reg(struct regstat *cur,int i,signed char reg)
313{
314 int r,hr;
315 int preferred_reg = (reg&7);
316 if(reg==CCREG) preferred_reg=HOST_CCREG;
317 if(reg==PTEMP||reg==FTEMP) preferred_reg=12;
318
319 // Don't allocate unused registers
320 if((cur->u>>reg)&1) return;
321
322 // see if it's already allocated
323 for(hr=0;hr<HOST_REGS;hr++)
324 {
325 if(cur->regmap[hr]==reg) return;
326 }
327
328 // Keep the same mapping if the register was already allocated in a loop
329 preferred_reg = loop_reg(i,reg,preferred_reg);
330
331 // Try to allocate the preferred register
332 if(cur->regmap[preferred_reg]==-1) {
333 cur->regmap[preferred_reg]=reg;
334 cur->dirty&=~(1<<preferred_reg);
335 cur->isconst&=~(1<<preferred_reg);
336 return;
337 }
338 r=cur->regmap[preferred_reg];
339 if(r<64&&((cur->u>>r)&1)) {
340 cur->regmap[preferred_reg]=reg;
341 cur->dirty&=~(1<<preferred_reg);
342 cur->isconst&=~(1<<preferred_reg);
343 return;
344 }
345 if(r>=64&&((cur->uu>>(r&63))&1)) {
346 cur->regmap[preferred_reg]=reg;
347 cur->dirty&=~(1<<preferred_reg);
348 cur->isconst&=~(1<<preferred_reg);
349 return;
350 }
351
352 // Clear any unneeded registers
353 // We try to keep the mapping consistent, if possible, because it
354 // makes branches easier (especially loops). So we try to allocate
355 // first (see above) before removing old mappings. If this is not
356 // possible then go ahead and clear out the registers that are no
357 // longer needed.
358 for(hr=0;hr<HOST_REGS;hr++)
359 {
360 r=cur->regmap[hr];
361 if(r>=0) {
362 if(r<64) {
363 if((cur->u>>r)&1) {cur->regmap[hr]=-1;break;}
364 }
365 else
366 {
367 if((cur->uu>>(r&63))&1) {cur->regmap[hr]=-1;break;}
368 }
369 }
370 }
371 // Try to allocate any available register, but prefer
372 // registers that have not been used recently.
373 if(i>0) {
374 for(hr=0;hr<HOST_REGS;hr++) {
375 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
376 if(regs[i-1].regmap[hr]!=rs1[i-1]&&regs[i-1].regmap[hr]!=rs2[i-1]&&regs[i-1].regmap[hr]!=rt1[i-1]&&regs[i-1].regmap[hr]!=rt2[i-1]) {
377 cur->regmap[hr]=reg;
378 cur->dirty&=~(1<<hr);
379 cur->isconst&=~(1<<hr);
380 return;
381 }
382 }
383 }
384 }
385 // Try to allocate any available register
386 for(hr=0;hr<HOST_REGS;hr++) {
387 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
388 cur->regmap[hr]=reg;
389 cur->dirty&=~(1<<hr);
390 cur->isconst&=~(1<<hr);
391 return;
392 }
393 }
394
395 // Ok, now we have to evict someone
396 // Pick a register we hopefully won't need soon
397 u_char hsn[MAXREG+1];
398 memset(hsn,10,sizeof(hsn));
399 int j;
400 lsn(hsn,i,&preferred_reg);
401 //printf("eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",cur->regmap[0],cur->regmap[1],cur->regmap[2],cur->regmap[3],cur->regmap[5],cur->regmap[6],cur->regmap[7]);
402 //printf("hsn(%x): %d %d %d %d %d %d %d\n",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
403 if(i>0) {
404 // Don't evict the cycle count at entry points, otherwise the entry
405 // stub will have to write it.
406 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
407 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
408 for(j=10;j>=3;j--)
409 {
410 // Alloc preferred register if available
411 if(hsn[r=cur->regmap[preferred_reg]&63]==j) {
412 for(hr=0;hr<HOST_REGS;hr++) {
413 // Evict both parts of a 64-bit register
414 if((cur->regmap[hr]&63)==r) {
415 cur->regmap[hr]=-1;
416 cur->dirty&=~(1<<hr);
417 cur->isconst&=~(1<<hr);
418 }
419 }
420 cur->regmap[preferred_reg]=reg;
421 return;
422 }
423 for(r=1;r<=MAXREG;r++)
424 {
425 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
426 for(hr=0;hr<HOST_REGS;hr++) {
427 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
428 if(cur->regmap[hr]==r+64) {
429 cur->regmap[hr]=reg;
430 cur->dirty&=~(1<<hr);
431 cur->isconst&=~(1<<hr);
432 return;
433 }
434 }
435 }
436 for(hr=0;hr<HOST_REGS;hr++) {
437 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
438 if(cur->regmap[hr]==r) {
439 cur->regmap[hr]=reg;
440 cur->dirty&=~(1<<hr);
441 cur->isconst&=~(1<<hr);
442 return;
443 }
444 }
445 }
446 }
447 }
448 }
449 }
450 for(j=10;j>=0;j--)
451 {
452 for(r=1;r<=MAXREG;r++)
453 {
454 if(hsn[r]==j) {
455 for(hr=0;hr<HOST_REGS;hr++) {
456 if(cur->regmap[hr]==r+64) {
457 cur->regmap[hr]=reg;
458 cur->dirty&=~(1<<hr);
459 cur->isconst&=~(1<<hr);
460 return;
461 }
462 }
463 for(hr=0;hr<HOST_REGS;hr++) {
464 if(cur->regmap[hr]==r) {
465 cur->regmap[hr]=reg;
466 cur->dirty&=~(1<<hr);
467 cur->isconst&=~(1<<hr);
468 return;
469 }
470 }
471 }
472 }
473 }
474 printf("This shouldn't happen (alloc_reg)");exit(1);
475}
476
477void alloc_reg64(struct regstat *cur,int i,signed char reg)
478{
479 int preferred_reg = 8+(reg&1);
480 int r,hr;
481
482 // allocate the lower 32 bits
483 alloc_reg(cur,i,reg);
484
485 // Don't allocate unused registers
486 if((cur->uu>>reg)&1) return;
487
488 // see if the upper half is already allocated
489 for(hr=0;hr<HOST_REGS;hr++)
490 {
491 if(cur->regmap[hr]==reg+64) return;
492 }
493
494 // Keep the same mapping if the register was already allocated in a loop
495 preferred_reg = loop_reg(i,reg,preferred_reg);
496
497 // Try to allocate the preferred register
498 if(cur->regmap[preferred_reg]==-1) {
499 cur->regmap[preferred_reg]=reg|64;
500 cur->dirty&=~(1<<preferred_reg);
501 cur->isconst&=~(1<<preferred_reg);
502 return;
503 }
504 r=cur->regmap[preferred_reg];
505 if(r<64&&((cur->u>>r)&1)) {
506 cur->regmap[preferred_reg]=reg|64;
507 cur->dirty&=~(1<<preferred_reg);
508 cur->isconst&=~(1<<preferred_reg);
509 return;
510 }
511 if(r>=64&&((cur->uu>>(r&63))&1)) {
512 cur->regmap[preferred_reg]=reg|64;
513 cur->dirty&=~(1<<preferred_reg);
514 cur->isconst&=~(1<<preferred_reg);
515 return;
516 }
517
518 // Clear any unneeded registers
519 // We try to keep the mapping consistent, if possible, because it
520 // makes branches easier (especially loops). So we try to allocate
521 // first (see above) before removing old mappings. If this is not
522 // possible then go ahead and clear out the registers that are no
523 // longer needed.
524 for(hr=HOST_REGS-1;hr>=0;hr--)
525 {
526 r=cur->regmap[hr];
527 if(r>=0) {
528 if(r<64) {
529 if((cur->u>>r)&1) {cur->regmap[hr]=-1;break;}
530 }
531 else
532 {
533 if((cur->uu>>(r&63))&1) {cur->regmap[hr]=-1;break;}
534 }
535 }
536 }
537 // Try to allocate any available register, but prefer
538 // registers that have not been used recently.
539 if(i>0) {
540 for(hr=0;hr<HOST_REGS;hr++) {
541 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
542 if(regs[i-1].regmap[hr]!=rs1[i-1]&&regs[i-1].regmap[hr]!=rs2[i-1]&&regs[i-1].regmap[hr]!=rt1[i-1]&&regs[i-1].regmap[hr]!=rt2[i-1]) {
543 cur->regmap[hr]=reg|64;
544 cur->dirty&=~(1<<hr);
545 cur->isconst&=~(1<<hr);
546 return;
547 }
548 }
549 }
550 }
551 // Try to allocate any available register
552 for(hr=0;hr<HOST_REGS;hr++) {
553 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
554 cur->regmap[hr]=reg|64;
555 cur->dirty&=~(1<<hr);
556 cur->isconst&=~(1<<hr);
557 return;
558 }
559 }
560
561 // Ok, now we have to evict someone
562 // Pick a register we hopefully won't need soon
563 u_char hsn[MAXREG+1];
564 memset(hsn,10,sizeof(hsn));
565 int j;
566 lsn(hsn,i,&preferred_reg);
567 //printf("eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",cur->regmap[0],cur->regmap[1],cur->regmap[2],cur->regmap[3],cur->regmap[5],cur->regmap[6],cur->regmap[7]);
568 //printf("hsn(%x): %d %d %d %d %d %d %d\n",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
569 if(i>0) {
570 // Don't evict the cycle count at entry points, otherwise the entry
571 // stub will have to write it.
572 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
573 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
574 for(j=10;j>=3;j--)
575 {
576 // Alloc preferred register if available
577 if(hsn[r=cur->regmap[preferred_reg]&63]==j) {
578 for(hr=0;hr<HOST_REGS;hr++) {
579 // Evict both parts of a 64-bit register
580 if((cur->regmap[hr]&63)==r) {
581 cur->regmap[hr]=-1;
582 cur->dirty&=~(1<<hr);
583 cur->isconst&=~(1<<hr);
584 }
585 }
586 cur->regmap[preferred_reg]=reg|64;
587 return;
588 }
589 for(r=1;r<=MAXREG;r++)
590 {
591 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
592 for(hr=0;hr<HOST_REGS;hr++) {
593 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
594 if(cur->regmap[hr]==r+64) {
595 cur->regmap[hr]=reg|64;
596 cur->dirty&=~(1<<hr);
597 cur->isconst&=~(1<<hr);
598 return;
599 }
600 }
601 }
602 for(hr=0;hr<HOST_REGS;hr++) {
603 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
604 if(cur->regmap[hr]==r) {
605 cur->regmap[hr]=reg|64;
606 cur->dirty&=~(1<<hr);
607 cur->isconst&=~(1<<hr);
608 return;
609 }
610 }
611 }
612 }
613 }
614 }
615 }
616 for(j=10;j>=0;j--)
617 {
618 for(r=1;r<=MAXREG;r++)
619 {
620 if(hsn[r]==j) {
621 for(hr=0;hr<HOST_REGS;hr++) {
622 if(cur->regmap[hr]==r+64) {
623 cur->regmap[hr]=reg|64;
624 cur->dirty&=~(1<<hr);
625 cur->isconst&=~(1<<hr);
626 return;
627 }
628 }
629 for(hr=0;hr<HOST_REGS;hr++) {
630 if(cur->regmap[hr]==r) {
631 cur->regmap[hr]=reg|64;
632 cur->dirty&=~(1<<hr);
633 cur->isconst&=~(1<<hr);
634 return;
635 }
636 }
637 }
638 }
639 }
640 printf("This shouldn't happen");exit(1);
641}
642
643// Allocate a temporary register. This is done without regard to
644// dirty status or whether the register we request is on the unneeded list
645// Note: This will only allocate one register, even if called multiple times
646void alloc_reg_temp(struct regstat *cur,int i,signed char reg)
647{
648 int r,hr;
649 int preferred_reg = -1;
650
651 // see if it's already allocated
652 for(hr=0;hr<HOST_REGS;hr++)
653 {
654 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==reg) return;
655 }
656
657 // Try to allocate any available register
658 for(hr=HOST_REGS-1;hr>=0;hr--) {
659 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
660 cur->regmap[hr]=reg;
661 cur->dirty&=~(1<<hr);
662 cur->isconst&=~(1<<hr);
663 return;
664 }
665 }
666
667 // Find an unneeded register
668 for(hr=HOST_REGS-1;hr>=0;hr--)
669 {
670 r=cur->regmap[hr];
671 if(r>=0) {
672 if(r<64) {
673 if((cur->u>>r)&1) {
674 if(i==0||((unneeded_reg[i-1]>>r)&1)) {
675 cur->regmap[hr]=reg;
676 cur->dirty&=~(1<<hr);
677 cur->isconst&=~(1<<hr);
678 return;
679 }
680 }
681 }
682 else
683 {
684 if((cur->uu>>(r&63))&1) {
685 if(i==0||((unneeded_reg_upper[i-1]>>(r&63))&1)) {
686 cur->regmap[hr]=reg;
687 cur->dirty&=~(1<<hr);
688 cur->isconst&=~(1<<hr);
689 return;
690 }
691 }
692 }
693 }
694 }
695
696 // Ok, now we have to evict someone
697 // Pick a register we hopefully won't need soon
698 // TODO: we might want to follow unconditional jumps here
699 // TODO: get rid of dupe code and make this into a function
700 u_char hsn[MAXREG+1];
701 memset(hsn,10,sizeof(hsn));
702 int j;
703 lsn(hsn,i,&preferred_reg);
704 //printf("hsn: %d %d %d %d %d %d %d\n",hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
705 if(i>0) {
706 // Don't evict the cycle count at entry points, otherwise the entry
707 // stub will have to write it.
708 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
709 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
710 for(j=10;j>=3;j--)
711 {
712 for(r=1;r<=MAXREG;r++)
713 {
714 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
715 for(hr=0;hr<HOST_REGS;hr++) {
716 if(hr!=HOST_CCREG||hsn[CCREG]>2) {
717 if(cur->regmap[hr]==r+64) {
718 cur->regmap[hr]=reg;
719 cur->dirty&=~(1<<hr);
720 cur->isconst&=~(1<<hr);
721 return;
722 }
723 }
724 }
725 for(hr=0;hr<HOST_REGS;hr++) {
726 if(hr!=HOST_CCREG||hsn[CCREG]>2) {
727 if(cur->regmap[hr]==r) {
728 cur->regmap[hr]=reg;
729 cur->dirty&=~(1<<hr);
730 cur->isconst&=~(1<<hr);
731 return;
732 }
733 }
734 }
735 }
736 }
737 }
738 }
739 for(j=10;j>=0;j--)
740 {
741 for(r=1;r<=MAXREG;r++)
742 {
743 if(hsn[r]==j) {
744 for(hr=0;hr<HOST_REGS;hr++) {
745 if(cur->regmap[hr]==r+64) {
746 cur->regmap[hr]=reg;
747 cur->dirty&=~(1<<hr);
748 cur->isconst&=~(1<<hr);
749 return;
750 }
751 }
752 for(hr=0;hr<HOST_REGS;hr++) {
753 if(cur->regmap[hr]==r) {
754 cur->regmap[hr]=reg;
755 cur->dirty&=~(1<<hr);
756 cur->isconst&=~(1<<hr);
757 return;
758 }
759 }
760 }
761 }
762 }
763 printf("This shouldn't happen");exit(1);
764}
765// Allocate a specific ARM register.
766void alloc_arm_reg(struct regstat *cur,int i,signed char reg,char hr)
767{
768 int n;
f776eb14 769 int dirty=0;
57871462 770
771 // see if it's already allocated (and dealloc it)
772 for(n=0;n<HOST_REGS;n++)
773 {
f776eb14 774 if(n!=EXCLUDE_REG&&cur->regmap[n]==reg) {
775 dirty=(cur->dirty>>n)&1;
776 cur->regmap[n]=-1;
777 }
57871462 778 }
779
780 cur->regmap[hr]=reg;
781 cur->dirty&=~(1<<hr);
f776eb14 782 cur->dirty|=dirty<<hr;
57871462 783 cur->isconst&=~(1<<hr);
784}
785
786// Alloc cycle count into dedicated register
787alloc_cc(struct regstat *cur,int i)
788{
789 alloc_arm_reg(cur,i,CCREG,HOST_CCREG);
790}
791
792/* Special alloc */
793
794
795/* Assembler */
796
797char regname[16][4] = {
798 "r0",
799 "r1",
800 "r2",
801 "r3",
802 "r4",
803 "r5",
804 "r6",
805 "r7",
806 "r8",
807 "r9",
808 "r10",
809 "fp",
810 "r12",
811 "sp",
812 "lr",
813 "pc"};
814
815void output_byte(u_char byte)
816{
817 *(out++)=byte;
818}
819void output_modrm(u_char mod,u_char rm,u_char ext)
820{
821 assert(mod<4);
822 assert(rm<8);
823 assert(ext<8);
824 u_char byte=(mod<<6)|(ext<<3)|rm;
825 *(out++)=byte;
826}
827void output_sib(u_char scale,u_char index,u_char base)
828{
829 assert(scale<4);
830 assert(index<8);
831 assert(base<8);
832 u_char byte=(scale<<6)|(index<<3)|base;
833 *(out++)=byte;
834}
835void output_w32(u_int word)
836{
837 *((u_int *)out)=word;
838 out+=4;
839}
840u_int rd_rn_rm(u_int rd, u_int rn, u_int rm)
841{
842 assert(rd<16);
843 assert(rn<16);
844 assert(rm<16);
845 return((rn<<16)|(rd<<12)|rm);
846}
847u_int rd_rn_imm_shift(u_int rd, u_int rn, u_int imm, u_int shift)
848{
849 assert(rd<16);
850 assert(rn<16);
851 assert(imm<256);
852 assert((shift&1)==0);
853 return((rn<<16)|(rd<<12)|(((32-shift)&30)<<7)|imm);
854}
855u_int genimm(u_int imm,u_int *encoded)
856{
c2e3bd42 857 *encoded=0;
858 if(imm==0) return 1;
57871462 859 int i=32;
860 while(i>0)
861 {
862 if(imm<256) {
863 *encoded=((i&30)<<7)|imm;
864 return 1;
865 }
866 imm=(imm>>2)|(imm<<30);i-=2;
867 }
868 return 0;
869}
cfbd3c6e 870void genimm_checked(u_int imm,u_int *encoded)
871{
872 u_int ret=genimm(imm,encoded);
873 assert(ret);
874}
57871462 875u_int genjmp(u_int addr)
876{
877 int offset=addr-(int)out-8;
e80343e2 878 if(offset<-33554432||offset>=33554432) {
879 if (addr>2) {
880 printf("genjmp: out of range: %08x\n", offset);
881 exit(1);
882 }
883 return 0;
884 }
57871462 885 return ((u_int)offset>>2)&0xffffff;
886}
887
888void emit_mov(int rs,int rt)
889{
890 assem_debug("mov %s,%s\n",regname[rt],regname[rs]);
891 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs));
892}
893
894void emit_movs(int rs,int rt)
895{
896 assem_debug("movs %s,%s\n",regname[rt],regname[rs]);
897 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs));
898}
899
900void emit_add(int rs1,int rs2,int rt)
901{
902 assem_debug("add %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
903 output_w32(0xe0800000|rd_rn_rm(rt,rs1,rs2));
904}
905
906void emit_adds(int rs1,int rs2,int rt)
907{
908 assem_debug("adds %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
909 output_w32(0xe0900000|rd_rn_rm(rt,rs1,rs2));
910}
911
912void emit_adcs(int rs1,int rs2,int rt)
913{
914 assem_debug("adcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
915 output_w32(0xe0b00000|rd_rn_rm(rt,rs1,rs2));
916}
917
918void emit_sbc(int rs1,int rs2,int rt)
919{
920 assem_debug("sbc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
921 output_w32(0xe0c00000|rd_rn_rm(rt,rs1,rs2));
922}
923
924void emit_sbcs(int rs1,int rs2,int rt)
925{
926 assem_debug("sbcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
927 output_w32(0xe0d00000|rd_rn_rm(rt,rs1,rs2));
928}
929
930void emit_neg(int rs, int rt)
931{
932 assem_debug("rsb %s,%s,#0\n",regname[rt],regname[rs]);
933 output_w32(0xe2600000|rd_rn_rm(rt,rs,0));
934}
935
936void emit_negs(int rs, int rt)
937{
938 assem_debug("rsbs %s,%s,#0\n",regname[rt],regname[rs]);
939 output_w32(0xe2700000|rd_rn_rm(rt,rs,0));
940}
941
942void emit_sub(int rs1,int rs2,int rt)
943{
944 assem_debug("sub %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
945 output_w32(0xe0400000|rd_rn_rm(rt,rs1,rs2));
946}
947
948void emit_subs(int rs1,int rs2,int rt)
949{
950 assem_debug("subs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
951 output_w32(0xe0500000|rd_rn_rm(rt,rs1,rs2));
952}
953
954void emit_zeroreg(int rt)
955{
956 assem_debug("mov %s,#0\n",regname[rt]);
957 output_w32(0xe3a00000|rd_rn_rm(rt,0,0));
958}
959
790ee18e 960void emit_loadlp(u_int imm,u_int rt)
961{
962 add_literal((int)out,imm);
963 assem_debug("ldr %s,pc+? [=%x]\n",regname[rt],imm);
964 output_w32(0xe5900000|rd_rn_rm(rt,15,0));
965}
966void emit_movw(u_int imm,u_int rt)
967{
968 assert(imm<65536);
969 assem_debug("movw %s,#%d (0x%x)\n",regname[rt],imm,imm);
970 output_w32(0xe3000000|rd_rn_rm(rt,0,0)|(imm&0xfff)|((imm<<4)&0xf0000));
971}
972void emit_movt(u_int imm,u_int rt)
973{
974 assem_debug("movt %s,#%d (0x%x)\n",regname[rt],imm&0xffff0000,imm&0xffff0000);
975 output_w32(0xe3400000|rd_rn_rm(rt,0,0)|((imm>>16)&0xfff)|((imm>>12)&0xf0000));
976}
977void emit_movimm(u_int imm,u_int rt)
978{
979 u_int armval;
980 if(genimm(imm,&armval)) {
981 assem_debug("mov %s,#%d\n",regname[rt],imm);
982 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
983 }else if(genimm(~imm,&armval)) {
984 assem_debug("mvn %s,#%d\n",regname[rt],imm);
985 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
986 }else if(imm<65536) {
987 #ifdef ARMv5_ONLY
988 assem_debug("mov %s,#%d\n",regname[rt],imm&0xFF00);
989 output_w32(0xe3a00000|rd_rn_imm_shift(rt,0,imm>>8,8));
990 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
991 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
992 #else
993 emit_movw(imm,rt);
994 #endif
995 }else{
996 #ifdef ARMv5_ONLY
997 emit_loadlp(imm,rt);
998 #else
999 emit_movw(imm&0x0000FFFF,rt);
1000 emit_movt(imm&0xFFFF0000,rt);
1001 #endif
1002 }
1003}
1004void emit_pcreladdr(u_int rt)
1005{
1006 assem_debug("add %s,pc,#?\n",regname[rt]);
1007 output_w32(0xe2800000|rd_rn_rm(rt,15,0));
1008}
1009
57871462 1010void emit_loadreg(int r, int hr)
1011{
3d624f89 1012#ifdef FORCE32
1013 if(r&64) {
1014 printf("64bit load in 32bit mode!\n");
7f2607ea 1015 assert(0);
1016 return;
3d624f89 1017 }
1018#endif
57871462 1019 if((r&63)==0)
1020 emit_zeroreg(hr);
1021 else {
3d624f89 1022 int addr=((int)reg)+((r&63)<<REG_SHIFT)+((r&64)>>4);
57871462 1023 if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4);
1024 if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4);
1025 if(r==CCREG) addr=(int)&cycle_count;
1026 if(r==CSREG) addr=(int)&Status;
1027 if(r==FSREG) addr=(int)&FCR31;
1028 if(r==INVCP) addr=(int)&invc_ptr;
1029 u_int offset = addr-(u_int)&dynarec_local;
1030 assert(offset<4096);
1031 assem_debug("ldr %s,fp+%d\n",regname[hr],offset);
1032 output_w32(0xe5900000|rd_rn_rm(hr,FP,0)|offset);
1033 }
1034}
1035void emit_storereg(int r, int hr)
1036{
3d624f89 1037#ifdef FORCE32
1038 if(r&64) {
1039 printf("64bit store in 32bit mode!\n");
7f2607ea 1040 assert(0);
1041 return;
3d624f89 1042 }
1043#endif
1044 int addr=((int)reg)+((r&63)<<REG_SHIFT)+((r&64)>>4);
57871462 1045 if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4);
1046 if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4);
1047 if(r==CCREG) addr=(int)&cycle_count;
1048 if(r==FSREG) addr=(int)&FCR31;
1049 u_int offset = addr-(u_int)&dynarec_local;
1050 assert(offset<4096);
1051 assem_debug("str %s,fp+%d\n",regname[hr],offset);
1052 output_w32(0xe5800000|rd_rn_rm(hr,FP,0)|offset);
1053}
1054
1055void emit_test(int rs, int rt)
1056{
1057 assem_debug("tst %s,%s\n",regname[rs],regname[rt]);
1058 output_w32(0xe1100000|rd_rn_rm(0,rs,rt));
1059}
1060
1061void emit_testimm(int rs,int imm)
1062{
1063 u_int armval;
5a05d80c 1064 assem_debug("tst %s,#%d\n",regname[rs],imm);
cfbd3c6e 1065 genimm_checked(imm,&armval);
57871462 1066 output_w32(0xe3100000|rd_rn_rm(0,rs,0)|armval);
1067}
1068
b9b61529 1069void emit_testeqimm(int rs,int imm)
1070{
1071 u_int armval;
1072 assem_debug("tsteq %s,$%d\n",regname[rs],imm);
cfbd3c6e 1073 genimm_checked(imm,&armval);
b9b61529 1074 output_w32(0x03100000|rd_rn_rm(0,rs,0)|armval);
1075}
1076
57871462 1077void emit_not(int rs,int rt)
1078{
1079 assem_debug("mvn %s,%s\n",regname[rt],regname[rs]);
1080 output_w32(0xe1e00000|rd_rn_rm(rt,0,rs));
1081}
1082
b9b61529 1083void emit_mvnmi(int rs,int rt)
1084{
1085 assem_debug("mvnmi %s,%s\n",regname[rt],regname[rs]);
1086 output_w32(0x41e00000|rd_rn_rm(rt,0,rs));
1087}
1088
57871462 1089void emit_and(u_int rs1,u_int rs2,u_int rt)
1090{
1091 assem_debug("and %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1092 output_w32(0xe0000000|rd_rn_rm(rt,rs1,rs2));
1093}
1094
1095void emit_or(u_int rs1,u_int rs2,u_int rt)
1096{
1097 assem_debug("orr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1098 output_w32(0xe1800000|rd_rn_rm(rt,rs1,rs2));
1099}
1100void emit_or_and_set_flags(int rs1,int rs2,int rt)
1101{
1102 assem_debug("orrs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1103 output_w32(0xe1900000|rd_rn_rm(rt,rs1,rs2));
1104}
1105
f70d384d 1106void emit_orrshl_imm(u_int rs,u_int imm,u_int rt)
1107{
1108 assert(rs<16);
1109 assert(rt<16);
1110 assert(imm<32);
1111 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs],imm);
1112 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|(imm<<7));
1113}
1114
576bbd8f 1115void emit_orrshr_imm(u_int rs,u_int imm,u_int rt)
1116{
1117 assert(rs<16);
1118 assert(rt<16);
1119 assert(imm<32);
1120 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs],imm);
1121 output_w32(0xe1800020|rd_rn_rm(rt,rt,rs)|(imm<<7));
1122}
1123
57871462 1124void emit_xor(u_int rs1,u_int rs2,u_int rt)
1125{
1126 assem_debug("eor %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1127 output_w32(0xe0200000|rd_rn_rm(rt,rs1,rs2));
1128}
1129
57871462 1130void emit_addimm(u_int rs,int imm,u_int rt)
1131{
1132 assert(rs<16);
1133 assert(rt<16);
1134 if(imm!=0) {
1135 assert(imm>-65536&&imm<65536);
1136 u_int armval;
1137 if(genimm(imm,&armval)) {
1138 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],imm);
1139 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
1140 }else if(genimm(-imm,&armval)) {
1141 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],imm);
1142 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
1143 }else if(imm<0) {
1144 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],(-imm)&0xFF00);
1145 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
1146 output_w32(0xe2400000|rd_rn_imm_shift(rt,rs,(-imm)>>8,8));
1147 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
1148 }else{
1149 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1150 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
1151 output_w32(0xe2800000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1152 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1153 }
1154 }
1155 else if(rs!=rt) emit_mov(rs,rt);
1156}
1157
1158void emit_addimm_and_set_flags(int imm,int rt)
1159{
1160 assert(imm>-65536&&imm<65536);
1161 u_int armval;
1162 if(genimm(imm,&armval)) {
1163 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm);
1164 output_w32(0xe2900000|rd_rn_rm(rt,rt,0)|armval);
1165 }else if(genimm(-imm,&armval)) {
1166 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],imm);
1167 output_w32(0xe2500000|rd_rn_rm(rt,rt,0)|armval);
1168 }else if(imm<0) {
1169 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF00);
1170 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
1171 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)>>8,8));
1172 output_w32(0xe2500000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
1173 }else{
1174 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF00);
1175 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
1176 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm>>8,8));
1177 output_w32(0xe2900000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1178 }
1179}
1180void emit_addimm_no_flags(u_int imm,u_int rt)
1181{
1182 emit_addimm(rt,imm,rt);
1183}
1184
1185void emit_addnop(u_int r)
1186{
1187 assert(r<16);
1188 assem_debug("add %s,%s,#0 (nop)\n",regname[r],regname[r]);
1189 output_w32(0xe2800000|rd_rn_rm(r,r,0));
1190}
1191
1192void emit_adcimm(u_int rs,int imm,u_int rt)
1193{
1194 u_int armval;
cfbd3c6e 1195 genimm_checked(imm,&armval);
57871462 1196 assem_debug("adc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1197 output_w32(0xe2a00000|rd_rn_rm(rt,rs,0)|armval);
1198}
1199/*void emit_sbcimm(int imm,u_int rt)
1200{
1201 u_int armval;
cfbd3c6e 1202 genimm_checked(imm,&armval);
57871462 1203 assem_debug("sbc %s,%s,#%d\n",regname[rt],regname[rt],imm);
1204 output_w32(0xe2c00000|rd_rn_rm(rt,rt,0)|armval);
1205}*/
1206void emit_sbbimm(int imm,u_int rt)
1207{
1208 assem_debug("sbb $%d,%%%s\n",imm,regname[rt]);
1209 assert(rt<8);
1210 if(imm<128&&imm>=-128) {
1211 output_byte(0x83);
1212 output_modrm(3,rt,3);
1213 output_byte(imm);
1214 }
1215 else
1216 {
1217 output_byte(0x81);
1218 output_modrm(3,rt,3);
1219 output_w32(imm);
1220 }
1221}
1222void emit_rscimm(int rs,int imm,u_int rt)
1223{
1224 assert(0);
1225 u_int armval;
cfbd3c6e 1226 genimm_checked(imm,&armval);
57871462 1227 assem_debug("rsc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1228 output_w32(0xe2e00000|rd_rn_rm(rt,rs,0)|armval);
1229}
1230
1231void emit_addimm64_32(int rsh,int rsl,int imm,int rth,int rtl)
1232{
1233 // TODO: if(genimm(imm,&armval)) ...
1234 // else
1235 emit_movimm(imm,HOST_TEMPREG);
1236 emit_adds(HOST_TEMPREG,rsl,rtl);
1237 emit_adcimm(rsh,0,rth);
1238}
1239
1240void emit_sbb(int rs1,int rs2)
1241{
1242 assem_debug("sbb %%%s,%%%s\n",regname[rs2],regname[rs1]);
1243 output_byte(0x19);
1244 output_modrm(3,rs1,rs2);
1245}
1246
1247void emit_andimm(int rs,int imm,int rt)
1248{
1249 u_int armval;
790ee18e 1250 if(imm==0) {
1251 emit_zeroreg(rt);
1252 }else if(genimm(imm,&armval)) {
57871462 1253 assem_debug("and %s,%s,#%d\n",regname[rt],regname[rs],imm);
1254 output_w32(0xe2000000|rd_rn_rm(rt,rs,0)|armval);
1255 }else if(genimm(~imm,&armval)) {
1256 assem_debug("bic %s,%s,#%d\n",regname[rt],regname[rs],imm);
1257 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|armval);
1258 }else if(imm==65535) {
1259 #ifdef ARMv5_ONLY
1260 assem_debug("bic %s,%s,#FF000000\n",regname[rt],regname[rs]);
1261 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|0x4FF);
1262 assem_debug("bic %s,%s,#00FF0000\n",regname[rt],regname[rt]);
1263 output_w32(0xe3c00000|rd_rn_rm(rt,rt,0)|0x8FF);
1264 #else
1265 assem_debug("uxth %s,%s\n",regname[rt],regname[rs]);
1266 output_w32(0xe6ff0070|rd_rn_rm(rt,0,rs));
1267 #endif
1268 }else{
1269 assert(imm>0&&imm<65535);
1270 #ifdef ARMv5_ONLY
1271 assem_debug("mov r14,#%d\n",imm&0xFF00);
1272 output_w32(0xe3a00000|rd_rn_imm_shift(HOST_TEMPREG,0,imm>>8,8));
1273 assem_debug("add r14,r14,#%d\n",imm&0xFF);
1274 output_w32(0xe2800000|rd_rn_imm_shift(HOST_TEMPREG,HOST_TEMPREG,imm&0xff,0));
1275 #else
1276 emit_movw(imm,HOST_TEMPREG);
1277 #endif
1278 assem_debug("and %s,%s,r14\n",regname[rt],regname[rs]);
1279 output_w32(0xe0000000|rd_rn_rm(rt,rs,HOST_TEMPREG));
1280 }
1281}
1282
1283void emit_orimm(int rs,int imm,int rt)
1284{
1285 u_int armval;
790ee18e 1286 if(imm==0) {
1287 if(rs!=rt) emit_mov(rs,rt);
1288 }else if(genimm(imm,&armval)) {
57871462 1289 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1290 output_w32(0xe3800000|rd_rn_rm(rt,rs,0)|armval);
1291 }else{
1292 assert(imm>0&&imm<65536);
1293 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1294 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
1295 output_w32(0xe3800000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1296 output_w32(0xe3800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1297 }
1298}
1299
1300void emit_xorimm(int rs,int imm,int rt)
1301{
57871462 1302 u_int armval;
790ee18e 1303 if(imm==0) {
1304 if(rs!=rt) emit_mov(rs,rt);
1305 }else if(genimm(imm,&armval)) {
57871462 1306 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm);
1307 output_w32(0xe2200000|rd_rn_rm(rt,rs,0)|armval);
1308 }else{
514ed0d9 1309 assert(imm>0&&imm<65536);
57871462 1310 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1311 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
1312 output_w32(0xe2200000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1313 output_w32(0xe2200000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1314 }
1315}
1316
1317void emit_shlimm(int rs,u_int imm,int rt)
1318{
1319 assert(imm>0);
1320 assert(imm<32);
1321 //if(imm==1) ...
1322 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1323 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1324}
1325
1326void emit_shrimm(int rs,u_int imm,int rt)
1327{
1328 assert(imm>0);
1329 assert(imm<32);
1330 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1331 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
1332}
1333
1334void emit_sarimm(int rs,u_int imm,int rt)
1335{
1336 assert(imm>0);
1337 assert(imm<32);
1338 assem_debug("asr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1339 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x40|(imm<<7));
1340}
1341
1342void emit_rorimm(int rs,u_int imm,int rt)
1343{
1344 assert(imm>0);
1345 assert(imm<32);
1346 assem_debug("ror %s,%s,#%d\n",regname[rt],regname[rs],imm);
1347 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x60|(imm<<7));
1348}
1349
1350void emit_shldimm(int rs,int rs2,u_int imm,int rt)
1351{
1352 assem_debug("shld %%%s,%%%s,%d\n",regname[rt],regname[rs2],imm);
1353 assert(imm>0);
1354 assert(imm<32);
1355 //if(imm==1) ...
1356 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1357 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1358 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs2],32-imm);
1359 output_w32(0xe1800020|rd_rn_rm(rt,rt,rs2)|((32-imm)<<7));
1360}
1361
1362void emit_shrdimm(int rs,int rs2,u_int imm,int rt)
1363{
1364 assem_debug("shrd %%%s,%%%s,%d\n",regname[rt],regname[rs2],imm);
1365 assert(imm>0);
1366 assert(imm<32);
1367 //if(imm==1) ...
1368 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1369 output_w32(0xe1a00020|rd_rn_rm(rt,0,rs)|(imm<<7));
1370 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs2],32-imm);
1371 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs2)|((32-imm)<<7));
1372}
1373
b9b61529 1374void emit_signextend16(int rs,int rt)
1375{
1376 #ifdef ARMv5_ONLY
1377 emit_shlimm(rs,16,rt);
1378 emit_sarimm(rt,16,rt);
1379 #else
1380 assem_debug("sxth %s,%s\n",regname[rt],regname[rs]);
1381 output_w32(0xe6bf0070|rd_rn_rm(rt,0,rs));
1382 #endif
1383}
1384
57871462 1385void emit_shl(u_int rs,u_int shift,u_int rt)
1386{
1387 assert(rs<16);
1388 assert(rt<16);
1389 assert(shift<16);
1390 //if(imm==1) ...
1391 assem_debug("lsl %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1392 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x10|(shift<<8));
1393}
1394void emit_shr(u_int rs,u_int shift,u_int rt)
1395{
1396 assert(rs<16);
1397 assert(rt<16);
1398 assert(shift<16);
1399 assem_debug("lsr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1400 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x30|(shift<<8));
1401}
1402void emit_sar(u_int rs,u_int shift,u_int rt)
1403{
1404 assert(rs<16);
1405 assert(rt<16);
1406 assert(shift<16);
1407 assem_debug("asr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1408 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x50|(shift<<8));
1409}
1410void emit_shlcl(int r)
1411{
1412 assem_debug("shl %%%s,%%cl\n",regname[r]);
1413 assert(0);
1414}
1415void emit_shrcl(int r)
1416{
1417 assem_debug("shr %%%s,%%cl\n",regname[r]);
1418 assert(0);
1419}
1420void emit_sarcl(int r)
1421{
1422 assem_debug("sar %%%s,%%cl\n",regname[r]);
1423 assert(0);
1424}
1425
1426void emit_shldcl(int r1,int r2)
1427{
1428 assem_debug("shld %%%s,%%%s,%%cl\n",regname[r1],regname[r2]);
1429 assert(0);
1430}
1431void emit_shrdcl(int r1,int r2)
1432{
1433 assem_debug("shrd %%%s,%%%s,%%cl\n",regname[r1],regname[r2]);
1434 assert(0);
1435}
1436void emit_orrshl(u_int rs,u_int shift,u_int rt)
1437{
1438 assert(rs<16);
1439 assert(rt<16);
1440 assert(shift<16);
1441 assem_debug("orr %s,%s,%s,lsl %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
1442 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x10|(shift<<8));
1443}
1444void emit_orrshr(u_int rs,u_int shift,u_int rt)
1445{
1446 assert(rs<16);
1447 assert(rt<16);
1448 assert(shift<16);
1449 assem_debug("orr %s,%s,%s,lsr %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
1450 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x30|(shift<<8));
1451}
1452
1453void emit_cmpimm(int rs,int imm)
1454{
1455 u_int armval;
1456 if(genimm(imm,&armval)) {
5a05d80c 1457 assem_debug("cmp %s,#%d\n",regname[rs],imm);
57871462 1458 output_w32(0xe3500000|rd_rn_rm(0,rs,0)|armval);
1459 }else if(genimm(-imm,&armval)) {
5a05d80c 1460 assem_debug("cmn %s,#%d\n",regname[rs],imm);
57871462 1461 output_w32(0xe3700000|rd_rn_rm(0,rs,0)|armval);
1462 }else if(imm>0) {
1463 assert(imm<65536);
1464 #ifdef ARMv5_ONLY
1465 emit_movimm(imm,HOST_TEMPREG);
1466 #else
1467 emit_movw(imm,HOST_TEMPREG);
1468 #endif
1469 assem_debug("cmp %s,r14\n",regname[rs]);
1470 output_w32(0xe1500000|rd_rn_rm(0,rs,HOST_TEMPREG));
1471 }else{
1472 assert(imm>-65536);
1473 #ifdef ARMv5_ONLY
1474 emit_movimm(-imm,HOST_TEMPREG);
1475 #else
1476 emit_movw(-imm,HOST_TEMPREG);
1477 #endif
1478 assem_debug("cmn %s,r14\n",regname[rs]);
1479 output_w32(0xe1700000|rd_rn_rm(0,rs,HOST_TEMPREG));
1480 }
1481}
1482
1483void emit_cmovne(u_int *addr,int rt)
1484{
1485 assem_debug("cmovne %x,%%%s",(int)addr,regname[rt]);
1486 assert(0);
1487}
1488void emit_cmovl(u_int *addr,int rt)
1489{
1490 assem_debug("cmovl %x,%%%s",(int)addr,regname[rt]);
1491 assert(0);
1492}
1493void emit_cmovs(u_int *addr,int rt)
1494{
1495 assem_debug("cmovs %x,%%%s",(int)addr,regname[rt]);
1496 assert(0);
1497}
1498void emit_cmovne_imm(int imm,int rt)
1499{
1500 assem_debug("movne %s,#%d\n",regname[rt],imm);
1501 u_int armval;
cfbd3c6e 1502 genimm_checked(imm,&armval);
57871462 1503 output_w32(0x13a00000|rd_rn_rm(rt,0,0)|armval);
1504}
1505void emit_cmovl_imm(int imm,int rt)
1506{
1507 assem_debug("movlt %s,#%d\n",regname[rt],imm);
1508 u_int armval;
cfbd3c6e 1509 genimm_checked(imm,&armval);
57871462 1510 output_w32(0xb3a00000|rd_rn_rm(rt,0,0)|armval);
1511}
1512void emit_cmovb_imm(int imm,int rt)
1513{
1514 assem_debug("movcc %s,#%d\n",regname[rt],imm);
1515 u_int armval;
cfbd3c6e 1516 genimm_checked(imm,&armval);
57871462 1517 output_w32(0x33a00000|rd_rn_rm(rt,0,0)|armval);
1518}
1519void emit_cmovs_imm(int imm,int rt)
1520{
1521 assem_debug("movmi %s,#%d\n",regname[rt],imm);
1522 u_int armval;
cfbd3c6e 1523 genimm_checked(imm,&armval);
57871462 1524 output_w32(0x43a00000|rd_rn_rm(rt,0,0)|armval);
1525}
1526void emit_cmove_reg(int rs,int rt)
1527{
1528 assem_debug("moveq %s,%s\n",regname[rt],regname[rs]);
1529 output_w32(0x01a00000|rd_rn_rm(rt,0,rs));
1530}
1531void emit_cmovne_reg(int rs,int rt)
1532{
1533 assem_debug("movne %s,%s\n",regname[rt],regname[rs]);
1534 output_w32(0x11a00000|rd_rn_rm(rt,0,rs));
1535}
1536void emit_cmovl_reg(int rs,int rt)
1537{
1538 assem_debug("movlt %s,%s\n",regname[rt],regname[rs]);
1539 output_w32(0xb1a00000|rd_rn_rm(rt,0,rs));
1540}
1541void emit_cmovs_reg(int rs,int rt)
1542{
1543 assem_debug("movmi %s,%s\n",regname[rt],regname[rs]);
1544 output_w32(0x41a00000|rd_rn_rm(rt,0,rs));
1545}
1546
1547void emit_slti32(int rs,int imm,int rt)
1548{
1549 if(rs!=rt) emit_zeroreg(rt);
1550 emit_cmpimm(rs,imm);
1551 if(rs==rt) emit_movimm(0,rt);
1552 emit_cmovl_imm(1,rt);
1553}
1554void emit_sltiu32(int rs,int imm,int rt)
1555{
1556 if(rs!=rt) emit_zeroreg(rt);
1557 emit_cmpimm(rs,imm);
1558 if(rs==rt) emit_movimm(0,rt);
1559 emit_cmovb_imm(1,rt);
1560}
1561void emit_slti64_32(int rsh,int rsl,int imm,int rt)
1562{
1563 assert(rsh!=rt);
1564 emit_slti32(rsl,imm,rt);
1565 if(imm>=0)
1566 {
1567 emit_test(rsh,rsh);
1568 emit_cmovne_imm(0,rt);
1569 emit_cmovs_imm(1,rt);
1570 }
1571 else
1572 {
1573 emit_cmpimm(rsh,-1);
1574 emit_cmovne_imm(0,rt);
1575 emit_cmovl_imm(1,rt);
1576 }
1577}
1578void emit_sltiu64_32(int rsh,int rsl,int imm,int rt)
1579{
1580 assert(rsh!=rt);
1581 emit_sltiu32(rsl,imm,rt);
1582 if(imm>=0)
1583 {
1584 emit_test(rsh,rsh);
1585 emit_cmovne_imm(0,rt);
1586 }
1587 else
1588 {
1589 emit_cmpimm(rsh,-1);
1590 emit_cmovne_imm(1,rt);
1591 }
1592}
1593
1594void emit_cmp(int rs,int rt)
1595{
1596 assem_debug("cmp %s,%s\n",regname[rs],regname[rt]);
1597 output_w32(0xe1500000|rd_rn_rm(0,rs,rt));
1598}
1599void emit_set_gz32(int rs, int rt)
1600{
1601 //assem_debug("set_gz32\n");
1602 emit_cmpimm(rs,1);
1603 emit_movimm(1,rt);
1604 emit_cmovl_imm(0,rt);
1605}
1606void emit_set_nz32(int rs, int rt)
1607{
1608 //assem_debug("set_nz32\n");
1609 if(rs!=rt) emit_movs(rs,rt);
1610 else emit_test(rs,rs);
1611 emit_cmovne_imm(1,rt);
1612}
1613void emit_set_gz64_32(int rsh, int rsl, int rt)
1614{
1615 //assem_debug("set_gz64\n");
1616 emit_set_gz32(rsl,rt);
1617 emit_test(rsh,rsh);
1618 emit_cmovne_imm(1,rt);
1619 emit_cmovs_imm(0,rt);
1620}
1621void emit_set_nz64_32(int rsh, int rsl, int rt)
1622{
1623 //assem_debug("set_nz64\n");
1624 emit_or_and_set_flags(rsh,rsl,rt);
1625 emit_cmovne_imm(1,rt);
1626}
1627void emit_set_if_less32(int rs1, int rs2, int rt)
1628{
1629 //assem_debug("set if less (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1630 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1631 emit_cmp(rs1,rs2);
1632 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1633 emit_cmovl_imm(1,rt);
1634}
1635void emit_set_if_carry32(int rs1, int rs2, int rt)
1636{
1637 //assem_debug("set if carry (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1638 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1639 emit_cmp(rs1,rs2);
1640 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1641 emit_cmovb_imm(1,rt);
1642}
1643void emit_set_if_less64_32(int u1, int l1, int u2, int l2, int rt)
1644{
1645 //assem_debug("set if less64 (%%%s,%%%s,%%%s,%%%s),%%%s\n",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]);
1646 assert(u1!=rt);
1647 assert(u2!=rt);
1648 emit_cmp(l1,l2);
1649 emit_movimm(0,rt);
1650 emit_sbcs(u1,u2,HOST_TEMPREG);
1651 emit_cmovl_imm(1,rt);
1652}
1653void emit_set_if_carry64_32(int u1, int l1, int u2, int l2, int rt)
1654{
1655 //assem_debug("set if carry64 (%%%s,%%%s,%%%s,%%%s),%%%s\n",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]);
1656 assert(u1!=rt);
1657 assert(u2!=rt);
1658 emit_cmp(l1,l2);
1659 emit_movimm(0,rt);
1660 emit_sbcs(u1,u2,HOST_TEMPREG);
1661 emit_cmovb_imm(1,rt);
1662}
1663
1664void emit_call(int a)
1665{
1666 assem_debug("bl %x (%x+%x)\n",a,(int)out,a-(int)out-8);
1667 u_int offset=genjmp(a);
1668 output_w32(0xeb000000|offset);
1669}
1670void emit_jmp(int a)
1671{
1672 assem_debug("b %x (%x+%x)\n",a,(int)out,a-(int)out-8);
1673 u_int offset=genjmp(a);
1674 output_w32(0xea000000|offset);
1675}
1676void emit_jne(int a)
1677{
1678 assem_debug("bne %x\n",a);
1679 u_int offset=genjmp(a);
1680 output_w32(0x1a000000|offset);
1681}
1682void emit_jeq(int a)
1683{
1684 assem_debug("beq %x\n",a);
1685 u_int offset=genjmp(a);
1686 output_w32(0x0a000000|offset);
1687}
1688void emit_js(int a)
1689{
1690 assem_debug("bmi %x\n",a);
1691 u_int offset=genjmp(a);
1692 output_w32(0x4a000000|offset);
1693}
1694void emit_jns(int a)
1695{
1696 assem_debug("bpl %x\n",a);
1697 u_int offset=genjmp(a);
1698 output_w32(0x5a000000|offset);
1699}
1700void emit_jl(int a)
1701{
1702 assem_debug("blt %x\n",a);
1703 u_int offset=genjmp(a);
1704 output_w32(0xba000000|offset);
1705}
1706void emit_jge(int a)
1707{
1708 assem_debug("bge %x\n",a);
1709 u_int offset=genjmp(a);
1710 output_w32(0xaa000000|offset);
1711}
1712void emit_jno(int a)
1713{
1714 assem_debug("bvc %x\n",a);
1715 u_int offset=genjmp(a);
1716 output_w32(0x7a000000|offset);
1717}
1718void emit_jc(int a)
1719{
1720 assem_debug("bcs %x\n",a);
1721 u_int offset=genjmp(a);
1722 output_w32(0x2a000000|offset);
1723}
1724void emit_jcc(int a)
1725{
1726 assem_debug("bcc %x\n",a);
1727 u_int offset=genjmp(a);
1728 output_w32(0x3a000000|offset);
1729}
1730
1731void emit_pushimm(int imm)
1732{
1733 assem_debug("push $%x\n",imm);
1734 assert(0);
1735}
1736void emit_pusha()
1737{
1738 assem_debug("pusha\n");
1739 assert(0);
1740}
1741void emit_popa()
1742{
1743 assem_debug("popa\n");
1744 assert(0);
1745}
1746void emit_pushreg(u_int r)
1747{
1748 assem_debug("push %%%s\n",regname[r]);
1749 assert(0);
1750}
1751void emit_popreg(u_int r)
1752{
1753 assem_debug("pop %%%s\n",regname[r]);
1754 assert(0);
1755}
1756void emit_callreg(u_int r)
1757{
1758 assem_debug("call *%%%s\n",regname[r]);
1759 assert(0);
1760}
1761void emit_jmpreg(u_int r)
1762{
1763 assem_debug("mov pc,%s\n",regname[r]);
1764 output_w32(0xe1a00000|rd_rn_rm(15,0,r));
1765}
1766
1767void emit_readword_indexed(int offset, int rs, int rt)
1768{
1769 assert(offset>-4096&&offset<4096);
1770 assem_debug("ldr %s,%s+%d\n",regname[rt],regname[rs],offset);
1771 if(offset>=0) {
1772 output_w32(0xe5900000|rd_rn_rm(rt,rs,0)|offset);
1773 }else{
1774 output_w32(0xe5100000|rd_rn_rm(rt,rs,0)|(-offset));
1775 }
1776}
1777void emit_readword_dualindexedx4(int rs1, int rs2, int rt)
1778{
1779 assem_debug("ldr %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1780 output_w32(0xe7900000|rd_rn_rm(rt,rs1,rs2)|0x100);
1781}
1782void emit_readword_indexed_tlb(int addr, int rs, int map, int rt)
1783{
1784 if(map<0) emit_readword_indexed(addr, rs, rt);
1785 else {
1786 assert(addr==0);
1787 emit_readword_dualindexedx4(rs, map, rt);
1788 }
1789}
1790void emit_readdword_indexed_tlb(int addr, int rs, int map, int rh, int rl)
1791{
1792 if(map<0) {
1793 if(rh>=0) emit_readword_indexed(addr, rs, rh);
1794 emit_readword_indexed(addr+4, rs, rl);
1795 }else{
1796 assert(rh!=rs);
1797 if(rh>=0) emit_readword_indexed_tlb(addr, rs, map, rh);
1798 emit_addimm(map,1,map);
1799 emit_readword_indexed_tlb(addr, rs, map, rl);
1800 }
1801}
1802void emit_movsbl_indexed(int offset, int rs, int rt)
1803{
1804 assert(offset>-256&&offset<256);
1805 assem_debug("ldrsb %s,%s+%d\n",regname[rt],regname[rs],offset);
1806 if(offset>=0) {
1807 output_w32(0xe1d000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1808 }else{
1809 output_w32(0xe15000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1810 }
1811}
1812void emit_movsbl_indexed_tlb(int addr, int rs, int map, int rt)
1813{
1814 if(map<0) emit_movsbl_indexed(addr, rs, rt);
1815 else {
1816 if(addr==0) {
1817 emit_shlimm(map,2,map);
1818 assem_debug("ldrsb %s,%s+%s\n",regname[rt],regname[rs],regname[map]);
1819 output_w32(0xe19000d0|rd_rn_rm(rt,rs,map));
1820 }else{
1821 assert(addr>-256&&addr<256);
1822 assem_debug("add %s,%s,%s,lsl #2\n",regname[rt],regname[rs],regname[map]);
1823 output_w32(0xe0800000|rd_rn_rm(rt,rs,map)|(2<<7));
1824 emit_movsbl_indexed(addr, rt, rt);
1825 }
1826 }
1827}
1828void emit_movswl_indexed(int offset, int rs, int rt)
1829{
1830 assert(offset>-256&&offset<256);
1831 assem_debug("ldrsh %s,%s+%d\n",regname[rt],regname[rs],offset);
1832 if(offset>=0) {
1833 output_w32(0xe1d000f0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1834 }else{
1835 output_w32(0xe15000f0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1836 }
1837}
1838void emit_movzbl_indexed(int offset, int rs, int rt)
1839{
1840 assert(offset>-4096&&offset<4096);
1841 assem_debug("ldrb %s,%s+%d\n",regname[rt],regname[rs],offset);
1842 if(offset>=0) {
1843 output_w32(0xe5d00000|rd_rn_rm(rt,rs,0)|offset);
1844 }else{
1845 output_w32(0xe5500000|rd_rn_rm(rt,rs,0)|(-offset));
1846 }
1847}
1848void emit_movzbl_dualindexedx4(int rs1, int rs2, int rt)
1849{
1850 assem_debug("ldrb %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1851 output_w32(0xe7d00000|rd_rn_rm(rt,rs1,rs2)|0x100);
1852}
1853void emit_movzbl_indexed_tlb(int addr, int rs, int map, int rt)
1854{
1855 if(map<0) emit_movzbl_indexed(addr, rs, rt);
1856 else {
1857 if(addr==0) {
1858 emit_movzbl_dualindexedx4(rs, map, rt);
1859 }else{
1860 emit_addimm(rs,addr,rt);
1861 emit_movzbl_dualindexedx4(rt, map, rt);
1862 }
1863 }
1864}
1865void emit_movzwl_indexed(int offset, int rs, int rt)
1866{
1867 assert(offset>-256&&offset<256);
1868 assem_debug("ldrh %s,%s+%d\n",regname[rt],regname[rs],offset);
1869 if(offset>=0) {
1870 output_w32(0xe1d000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1871 }else{
1872 output_w32(0xe15000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1873 }
1874}
1875void emit_readword(int addr, int rt)
1876{
1877 u_int offset = addr-(u_int)&dynarec_local;
1878 assert(offset<4096);
1879 assem_debug("ldr %s,fp+%d\n",regname[rt],offset);
1880 output_w32(0xe5900000|rd_rn_rm(rt,FP,0)|offset);
1881}
1882void emit_movsbl(int addr, int rt)
1883{
1884 u_int offset = addr-(u_int)&dynarec_local;
1885 assert(offset<256);
1886 assem_debug("ldrsb %s,fp+%d\n",regname[rt],offset);
1887 output_w32(0xe1d000d0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1888}
1889void emit_movswl(int addr, int rt)
1890{
1891 u_int offset = addr-(u_int)&dynarec_local;
1892 assert(offset<256);
1893 assem_debug("ldrsh %s,fp+%d\n",regname[rt],offset);
1894 output_w32(0xe1d000f0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1895}
1896void emit_movzbl(int addr, int rt)
1897{
1898 u_int offset = addr-(u_int)&dynarec_local;
1899 assert(offset<4096);
1900 assem_debug("ldrb %s,fp+%d\n",regname[rt],offset);
1901 output_w32(0xe5d00000|rd_rn_rm(rt,FP,0)|offset);
1902}
1903void emit_movzwl(int addr, int rt)
1904{
1905 u_int offset = addr-(u_int)&dynarec_local;
1906 assert(offset<256);
1907 assem_debug("ldrh %s,fp+%d\n",regname[rt],offset);
1908 output_w32(0xe1d000b0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1909}
1910void emit_movzwl_reg(int rs, int rt)
1911{
1912 assem_debug("movzwl %%%s,%%%s\n",regname[rs]+1,regname[rt]);
1913 assert(0);
1914}
1915
1916void emit_xchg(int rs, int rt)
1917{
1918 assem_debug("xchg %%%s,%%%s\n",regname[rs],regname[rt]);
1919 assert(0);
1920}
1921void emit_writeword_indexed(int rt, int offset, int rs)
1922{
1923 assert(offset>-4096&&offset<4096);
1924 assem_debug("str %s,%s+%d\n",regname[rt],regname[rs],offset);
1925 if(offset>=0) {
1926 output_w32(0xe5800000|rd_rn_rm(rt,rs,0)|offset);
1927 }else{
1928 output_w32(0xe5000000|rd_rn_rm(rt,rs,0)|(-offset));
1929 }
1930}
1931void emit_writeword_dualindexedx4(int rt, int rs1, int rs2)
1932{
1933 assem_debug("str %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1934 output_w32(0xe7800000|rd_rn_rm(rt,rs1,rs2)|0x100);
1935}
1936void emit_writeword_indexed_tlb(int rt, int addr, int rs, int map, int temp)
1937{
1938 if(map<0) emit_writeword_indexed(rt, addr, rs);
1939 else {
1940 assert(addr==0);
1941 emit_writeword_dualindexedx4(rt, rs, map);
1942 }
1943}
1944void emit_writedword_indexed_tlb(int rh, int rl, int addr, int rs, int map, int temp)
1945{
1946 if(map<0) {
1947 if(rh>=0) emit_writeword_indexed(rh, addr, rs);
1948 emit_writeword_indexed(rl, addr+4, rs);
1949 }else{
1950 assert(rh>=0);
1951 if(temp!=rs) emit_addimm(map,1,temp);
1952 emit_writeword_indexed_tlb(rh, addr, rs, map, temp);
1953 if(temp!=rs) emit_writeword_indexed_tlb(rl, addr, rs, temp, temp);
1954 else {
1955 emit_addimm(rs,4,rs);
1956 emit_writeword_indexed_tlb(rl, addr, rs, map, temp);
1957 }
1958 }
1959}
1960void emit_writehword_indexed(int rt, int offset, int rs)
1961{
1962 assert(offset>-256&&offset<256);
1963 assem_debug("strh %s,%s+%d\n",regname[rt],regname[rs],offset);
1964 if(offset>=0) {
1965 output_w32(0xe1c000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1966 }else{
1967 output_w32(0xe14000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1968 }
1969}
1970void emit_writebyte_indexed(int rt, int offset, int rs)
1971{
1972 assert(offset>-4096&&offset<4096);
1973 assem_debug("strb %s,%s+%d\n",regname[rt],regname[rs],offset);
1974 if(offset>=0) {
1975 output_w32(0xe5c00000|rd_rn_rm(rt,rs,0)|offset);
1976 }else{
1977 output_w32(0xe5400000|rd_rn_rm(rt,rs,0)|(-offset));
1978 }
1979}
1980void emit_writebyte_dualindexedx4(int rt, int rs1, int rs2)
1981{
1982 assem_debug("strb %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1983 output_w32(0xe7c00000|rd_rn_rm(rt,rs1,rs2)|0x100);
1984}
1985void emit_writebyte_indexed_tlb(int rt, int addr, int rs, int map, int temp)
1986{
1987 if(map<0) emit_writebyte_indexed(rt, addr, rs);
1988 else {
1989 if(addr==0) {
1990 emit_writebyte_dualindexedx4(rt, rs, map);
1991 }else{
1992 emit_addimm(rs,addr,temp);
1993 emit_writebyte_dualindexedx4(rt, temp, map);
1994 }
1995 }
1996}
1997void emit_writeword(int rt, int addr)
1998{
1999 u_int offset = addr-(u_int)&dynarec_local;
2000 assert(offset<4096);
2001 assem_debug("str %s,fp+%d\n",regname[rt],offset);
2002 output_w32(0xe5800000|rd_rn_rm(rt,FP,0)|offset);
2003}
2004void emit_writehword(int rt, int addr)
2005{
2006 u_int offset = addr-(u_int)&dynarec_local;
2007 assert(offset<256);
2008 assem_debug("strh %s,fp+%d\n",regname[rt],offset);
2009 output_w32(0xe1c000b0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
2010}
2011void emit_writebyte(int rt, int addr)
2012{
2013 u_int offset = addr-(u_int)&dynarec_local;
2014 assert(offset<4096);
74426039 2015 assem_debug("strb %s,fp+%d\n",regname[rt],offset);
57871462 2016 output_w32(0xe5c00000|rd_rn_rm(rt,FP,0)|offset);
2017}
2018void emit_writeword_imm(int imm, int addr)
2019{
2020 assem_debug("movl $%x,%x\n",imm,addr);
2021 assert(0);
2022}
2023void emit_writebyte_imm(int imm, int addr)
2024{
2025 assem_debug("movb $%x,%x\n",imm,addr);
2026 assert(0);
2027}
2028
2029void emit_mul(int rs)
2030{
2031 assem_debug("mul %%%s\n",regname[rs]);
2032 assert(0);
2033}
2034void emit_imul(int rs)
2035{
2036 assem_debug("imul %%%s\n",regname[rs]);
2037 assert(0);
2038}
2039void emit_umull(u_int rs1, u_int rs2, u_int hi, u_int lo)
2040{
2041 assem_debug("umull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
2042 assert(rs1<16);
2043 assert(rs2<16);
2044 assert(hi<16);
2045 assert(lo<16);
2046 output_w32(0xe0800090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
2047}
2048void emit_smull(u_int rs1, u_int rs2, u_int hi, u_int lo)
2049{
2050 assem_debug("smull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
2051 assert(rs1<16);
2052 assert(rs2<16);
2053 assert(hi<16);
2054 assert(lo<16);
2055 output_w32(0xe0c00090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
2056}
2057
2058void emit_div(int rs)
2059{
2060 assem_debug("div %%%s\n",regname[rs]);
2061 assert(0);
2062}
2063void emit_idiv(int rs)
2064{
2065 assem_debug("idiv %%%s\n",regname[rs]);
2066 assert(0);
2067}
2068void emit_cdq()
2069{
2070 assem_debug("cdq\n");
2071 assert(0);
2072}
2073
2074void emit_clz(int rs,int rt)
2075{
2076 assem_debug("clz %s,%s\n",regname[rt],regname[rs]);
2077 output_w32(0xe16f0f10|rd_rn_rm(rt,0,rs));
2078}
2079
2080void emit_subcs(int rs1,int rs2,int rt)
2081{
2082 assem_debug("subcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2083 output_w32(0x20400000|rd_rn_rm(rt,rs1,rs2));
2084}
2085
2086void emit_shrcc_imm(int rs,u_int imm,int rt)
2087{
2088 assert(imm>0);
2089 assert(imm<32);
2090 assem_debug("lsrcc %s,%s,#%d\n",regname[rt],regname[rs],imm);
2091 output_w32(0x31a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
2092}
2093
2094void emit_negmi(int rs, int rt)
2095{
2096 assem_debug("rsbmi %s,%s,#0\n",regname[rt],regname[rs]);
2097 output_w32(0x42600000|rd_rn_rm(rt,rs,0));
2098}
2099
2100void emit_negsmi(int rs, int rt)
2101{
2102 assem_debug("rsbsmi %s,%s,#0\n",regname[rt],regname[rs]);
2103 output_w32(0x42700000|rd_rn_rm(rt,rs,0));
2104}
2105
2106void emit_orreq(u_int rs1,u_int rs2,u_int rt)
2107{
2108 assem_debug("orreq %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2109 output_w32(0x01800000|rd_rn_rm(rt,rs1,rs2));
2110}
2111
2112void emit_orrne(u_int rs1,u_int rs2,u_int rt)
2113{
2114 assem_debug("orrne %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2115 output_w32(0x11800000|rd_rn_rm(rt,rs1,rs2));
2116}
2117
2118void emit_bic_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2119{
2120 assem_debug("bic %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2121 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2122}
2123
2124void emit_biceq_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2125{
2126 assem_debug("biceq %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2127 output_w32(0x01C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2128}
2129
2130void emit_bicne_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2131{
2132 assem_debug("bicne %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2133 output_w32(0x11C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2134}
2135
2136void emit_bic_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2137{
2138 assem_debug("bic %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2139 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2140}
2141
2142void emit_biceq_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2143{
2144 assem_debug("biceq %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2145 output_w32(0x01C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2146}
2147
2148void emit_bicne_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2149{
2150 assem_debug("bicne %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2151 output_w32(0x11C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2152}
2153
2154void emit_teq(int rs, int rt)
2155{
2156 assem_debug("teq %s,%s\n",regname[rs],regname[rt]);
2157 output_w32(0xe1300000|rd_rn_rm(0,rs,rt));
2158}
2159
2160void emit_rsbimm(int rs, int imm, int rt)
2161{
2162 u_int armval;
cfbd3c6e 2163 genimm_checked(imm,&armval);
57871462 2164 assem_debug("rsb %s,%s,#%d\n",regname[rt],regname[rs],imm);
2165 output_w32(0xe2600000|rd_rn_rm(rt,rs,0)|armval);
2166}
2167
2168// Load 2 immediates optimizing for small code size
2169void emit_mov2imm_compact(int imm1,u_int rt1,int imm2,u_int rt2)
2170{
2171 emit_movimm(imm1,rt1);
2172 u_int armval;
2173 if(genimm(imm2-imm1,&armval)) {
2174 assem_debug("add %s,%s,#%d\n",regname[rt2],regname[rt1],imm2-imm1);
2175 output_w32(0xe2800000|rd_rn_rm(rt2,rt1,0)|armval);
2176 }else if(genimm(imm1-imm2,&armval)) {
2177 assem_debug("sub %s,%s,#%d\n",regname[rt2],regname[rt1],imm1-imm2);
2178 output_w32(0xe2400000|rd_rn_rm(rt2,rt1,0)|armval);
2179 }
2180 else emit_movimm(imm2,rt2);
2181}
2182
2183// Conditionally select one of two immediates, optimizing for small code size
2184// This will only be called if HAVE_CMOV_IMM is defined
2185void emit_cmov2imm_e_ne_compact(int imm1,int imm2,u_int rt)
2186{
2187 u_int armval;
2188 if(genimm(imm2-imm1,&armval)) {
2189 emit_movimm(imm1,rt);
2190 assem_debug("addne %s,%s,#%d\n",regname[rt],regname[rt],imm2-imm1);
2191 output_w32(0x12800000|rd_rn_rm(rt,rt,0)|armval);
2192 }else if(genimm(imm1-imm2,&armval)) {
2193 emit_movimm(imm1,rt);
2194 assem_debug("subne %s,%s,#%d\n",regname[rt],regname[rt],imm1-imm2);
2195 output_w32(0x12400000|rd_rn_rm(rt,rt,0)|armval);
2196 }
2197 else {
2198 #ifdef ARMv5_ONLY
2199 emit_movimm(imm1,rt);
2200 add_literal((int)out,imm2);
2201 assem_debug("ldrne %s,pc+? [=%x]\n",regname[rt],imm2);
2202 output_w32(0x15900000|rd_rn_rm(rt,15,0));
2203 #else
2204 emit_movw(imm1&0x0000FFFF,rt);
2205 if((imm1&0xFFFF)!=(imm2&0xFFFF)) {
2206 assem_debug("movwne %s,#%d (0x%x)\n",regname[rt],imm2&0xFFFF,imm2&0xFFFF);
2207 output_w32(0x13000000|rd_rn_rm(rt,0,0)|(imm2&0xfff)|((imm2<<4)&0xf0000));
2208 }
2209 emit_movt(imm1&0xFFFF0000,rt);
2210 if((imm1&0xFFFF0000)!=(imm2&0xFFFF0000)) {
2211 assem_debug("movtne %s,#%d (0x%x)\n",regname[rt],imm2&0xffff0000,imm2&0xffff0000);
2212 output_w32(0x13400000|rd_rn_rm(rt,0,0)|((imm2>>16)&0xfff)|((imm2>>12)&0xf0000));
2213 }
2214 #endif
2215 }
2216}
2217
2218// special case for checking invalid_code
2219void emit_cmpmem_indexedsr12_imm(int addr,int r,int imm)
2220{
2221 assert(0);
2222}
2223
2224// special case for checking invalid_code
2225void emit_cmpmem_indexedsr12_reg(int base,int r,int imm)
2226{
2227 assert(imm<128&&imm>=0);
2228 assert(r>=0&&r<16);
2229 assem_debug("ldrb lr,%s,%s lsr #12\n",regname[base],regname[r]);
2230 output_w32(0xe7d00000|rd_rn_rm(HOST_TEMPREG,base,r)|0x620);
2231 emit_cmpimm(HOST_TEMPREG,imm);
2232}
2233
2234// special case for tlb mapping
2235void emit_addsr12(int rs1,int rs2,int rt)
2236{
2237 assem_debug("add %s,%s,%s lsr #12\n",regname[rt],regname[rs1],regname[rs2]);
2238 output_w32(0xe0800620|rd_rn_rm(rt,rs1,rs2));
2239}
2240
0bbd1454 2241void emit_callne(int a)
2242{
2243 assem_debug("blne %x\n",a);
2244 u_int offset=genjmp(a);
2245 output_w32(0x1b000000|offset);
2246}
2247
57871462 2248// Used to preload hash table entries
2249void emit_prefetch(void *addr)
2250{
2251 assem_debug("prefetch %x\n",(int)addr);
2252 output_byte(0x0F);
2253 output_byte(0x18);
2254 output_modrm(0,5,1);
2255 output_w32((int)addr);
2256}
2257void emit_prefetchreg(int r)
2258{
2259 assem_debug("pld %s\n",regname[r]);
2260 output_w32(0xf5d0f000|rd_rn_rm(0,r,0));
2261}
2262
2263// Special case for mini_ht
2264void emit_ldreq_indexed(int rs, u_int offset, int rt)
2265{
2266 assert(offset<4096);
2267 assem_debug("ldreq %s,[%s, #%d]\n",regname[rt],regname[rs],offset);
2268 output_w32(0x05900000|rd_rn_rm(rt,rs,0)|offset);
2269}
2270
2271void emit_flds(int r,int sr)
2272{
2273 assem_debug("flds s%d,[%s]\n",sr,regname[r]);
2274 output_w32(0xed900a00|((sr&14)<<11)|((sr&1)<<22)|(r<<16));
2275}
2276
2277void emit_vldr(int r,int vr)
2278{
2279 assem_debug("vldr d%d,[%s]\n",vr,regname[r]);
2280 output_w32(0xed900b00|(vr<<12)|(r<<16));
2281}
2282
2283void emit_fsts(int sr,int r)
2284{
2285 assem_debug("fsts s%d,[%s]\n",sr,regname[r]);
2286 output_w32(0xed800a00|((sr&14)<<11)|((sr&1)<<22)|(r<<16));
2287}
2288
2289void emit_vstr(int vr,int r)
2290{
2291 assem_debug("vstr d%d,[%s]\n",vr,regname[r]);
2292 output_w32(0xed800b00|(vr<<12)|(r<<16));
2293}
2294
2295void emit_ftosizs(int s,int d)
2296{
2297 assem_debug("ftosizs s%d,s%d\n",d,s);
2298 output_w32(0xeebd0ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2299}
2300
2301void emit_ftosizd(int s,int d)
2302{
2303 assem_debug("ftosizd s%d,d%d\n",d,s);
2304 output_w32(0xeebd0bc0|((d&14)<<11)|((d&1)<<22)|(s&7));
2305}
2306
2307void emit_fsitos(int s,int d)
2308{
2309 assem_debug("fsitos s%d,s%d\n",d,s);
2310 output_w32(0xeeb80ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2311}
2312
2313void emit_fsitod(int s,int d)
2314{
2315 assem_debug("fsitod d%d,s%d\n",d,s);
2316 output_w32(0xeeb80bc0|((d&7)<<12)|((s&14)>>1)|((s&1)<<5));
2317}
2318
2319void emit_fcvtds(int s,int d)
2320{
2321 assem_debug("fcvtds d%d,s%d\n",d,s);
2322 output_w32(0xeeb70ac0|((d&7)<<12)|((s&14)>>1)|((s&1)<<5));
2323}
2324
2325void emit_fcvtsd(int s,int d)
2326{
2327 assem_debug("fcvtsd s%d,d%d\n",d,s);
2328 output_w32(0xeeb70bc0|((d&14)<<11)|((d&1)<<22)|(s&7));
2329}
2330
2331void emit_fsqrts(int s,int d)
2332{
2333 assem_debug("fsqrts d%d,s%d\n",d,s);
2334 output_w32(0xeeb10ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2335}
2336
2337void emit_fsqrtd(int s,int d)
2338{
2339 assem_debug("fsqrtd s%d,d%d\n",d,s);
2340 output_w32(0xeeb10bc0|((d&7)<<12)|(s&7));
2341}
2342
2343void emit_fabss(int s,int d)
2344{
2345 assem_debug("fabss d%d,s%d\n",d,s);
2346 output_w32(0xeeb00ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2347}
2348
2349void emit_fabsd(int s,int d)
2350{
2351 assem_debug("fabsd s%d,d%d\n",d,s);
2352 output_w32(0xeeb00bc0|((d&7)<<12)|(s&7));
2353}
2354
2355void emit_fnegs(int s,int d)
2356{
2357 assem_debug("fnegs d%d,s%d\n",d,s);
2358 output_w32(0xeeb10a40|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2359}
2360
2361void emit_fnegd(int s,int d)
2362{
2363 assem_debug("fnegd s%d,d%d\n",d,s);
2364 output_w32(0xeeb10b40|((d&7)<<12)|(s&7));
2365}
2366
2367void emit_fadds(int s1,int s2,int d)
2368{
2369 assem_debug("fadds s%d,s%d,s%d\n",d,s1,s2);
2370 output_w32(0xee300a00|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2371}
2372
2373void emit_faddd(int s1,int s2,int d)
2374{
2375 assem_debug("faddd d%d,d%d,d%d\n",d,s1,s2);
2376 output_w32(0xee300b00|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2377}
2378
2379void emit_fsubs(int s1,int s2,int d)
2380{
2381 assem_debug("fsubs s%d,s%d,s%d\n",d,s1,s2);
2382 output_w32(0xee300a40|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2383}
2384
2385void emit_fsubd(int s1,int s2,int d)
2386{
2387 assem_debug("fsubd d%d,d%d,d%d\n",d,s1,s2);
2388 output_w32(0xee300b40|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2389}
2390
2391void emit_fmuls(int s1,int s2,int d)
2392{
2393 assem_debug("fmuls s%d,s%d,s%d\n",d,s1,s2);
2394 output_w32(0xee200a00|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2395}
2396
2397void emit_fmuld(int s1,int s2,int d)
2398{
2399 assem_debug("fmuld d%d,d%d,d%d\n",d,s1,s2);
2400 output_w32(0xee200b00|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2401}
2402
2403void emit_fdivs(int s1,int s2,int d)
2404{
2405 assem_debug("fdivs s%d,s%d,s%d\n",d,s1,s2);
2406 output_w32(0xee800a00|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2407}
2408
2409void emit_fdivd(int s1,int s2,int d)
2410{
2411 assem_debug("fdivd d%d,d%d,d%d\n",d,s1,s2);
2412 output_w32(0xee800b00|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2413}
2414
2415void emit_fcmps(int x,int y)
2416{
2417 assem_debug("fcmps s14, s15\n");
2418 output_w32(0xeeb47a67);
2419}
2420
2421void emit_fcmpd(int x,int y)
2422{
2423 assem_debug("fcmpd d6, d7\n");
2424 output_w32(0xeeb46b47);
2425}
2426
2427void emit_fmstat()
2428{
2429 assem_debug("fmstat\n");
2430 output_w32(0xeef1fa10);
2431}
2432
2433void emit_bicne_imm(int rs,int imm,int rt)
2434{
2435 u_int armval;
cfbd3c6e 2436 genimm_checked(imm,&armval);
57871462 2437 assem_debug("bicne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2438 output_w32(0x13c00000|rd_rn_rm(rt,rs,0)|armval);
2439}
2440
2441void emit_biccs_imm(int rs,int imm,int rt)
2442{
2443 u_int armval;
cfbd3c6e 2444 genimm_checked(imm,&armval);
57871462 2445 assem_debug("biccs %s,%s,#%d\n",regname[rt],regname[rs],imm);
2446 output_w32(0x23c00000|rd_rn_rm(rt,rs,0)|armval);
2447}
2448
2449void emit_bicvc_imm(int rs,int imm,int rt)
2450{
2451 u_int armval;
cfbd3c6e 2452 genimm_checked(imm,&armval);
57871462 2453 assem_debug("bicvc %s,%s,#%d\n",regname[rt],regname[rs],imm);
2454 output_w32(0x73c00000|rd_rn_rm(rt,rs,0)|armval);
2455}
2456
2457void emit_bichi_imm(int rs,int imm,int rt)
2458{
2459 u_int armval;
cfbd3c6e 2460 genimm_checked(imm,&armval);
57871462 2461 assem_debug("bichi %s,%s,#%d\n",regname[rt],regname[rs],imm);
2462 output_w32(0x83c00000|rd_rn_rm(rt,rs,0)|armval);
2463}
2464
2465void emit_orrvs_imm(int rs,int imm,int rt)
2466{
2467 u_int armval;
cfbd3c6e 2468 genimm_checked(imm,&armval);
57871462 2469 assem_debug("orrvs %s,%s,#%d\n",regname[rt],regname[rs],imm);
2470 output_w32(0x63800000|rd_rn_rm(rt,rs,0)|armval);
2471}
2472
b9b61529 2473void emit_orrne_imm(int rs,int imm,int rt)
2474{
2475 u_int armval;
cfbd3c6e 2476 genimm_checked(imm,&armval);
b9b61529 2477 assem_debug("orrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2478 output_w32(0x13800000|rd_rn_rm(rt,rs,0)|armval);
2479}
2480
2481void emit_andne_imm(int rs,int imm,int rt)
2482{
2483 u_int armval;
cfbd3c6e 2484 genimm_checked(imm,&armval);
b9b61529 2485 assem_debug("andne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2486 output_w32(0x12000000|rd_rn_rm(rt,rs,0)|armval);
2487}
2488
57871462 2489void emit_jno_unlikely(int a)
2490{
2491 //emit_jno(a);
2492 assem_debug("addvc pc,pc,#? (%x)\n",/*a-(int)out-8,*/a);
2493 output_w32(0x72800000|rd_rn_rm(15,15,0));
2494}
2495
2496// Save registers before function call
2497void save_regs(u_int reglist)
2498{
2499 reglist&=0x100f; // only save the caller-save registers, r0-r3, r12
2500 if(!reglist) return;
2501 assem_debug("stmia fp,{");
2502 if(reglist&1) assem_debug("r0, ");
2503 if(reglist&2) assem_debug("r1, ");
2504 if(reglist&4) assem_debug("r2, ");
2505 if(reglist&8) assem_debug("r3, ");
2506 if(reglist&0x1000) assem_debug("r12");
2507 assem_debug("}\n");
2508 output_w32(0xe88b0000|reglist);
2509}
2510// Restore registers after function call
2511void restore_regs(u_int reglist)
2512{
2513 reglist&=0x100f; // only restore the caller-save registers, r0-r3, r12
2514 if(!reglist) return;
2515 assem_debug("ldmia fp,{");
2516 if(reglist&1) assem_debug("r0, ");
2517 if(reglist&2) assem_debug("r1, ");
2518 if(reglist&4) assem_debug("r2, ");
2519 if(reglist&8) assem_debug("r3, ");
2520 if(reglist&0x1000) assem_debug("r12");
2521 assem_debug("}\n");
2522 output_w32(0xe89b0000|reglist);
2523}
2524
2525// Write back consts using r14 so we don't disturb the other registers
2526void wb_consts(signed char i_regmap[],uint64_t i_is32,u_int i_dirty,int i)
2527{
2528 int hr;
2529 for(hr=0;hr<HOST_REGS;hr++) {
2530 if(hr!=EXCLUDE_REG&&i_regmap[hr]>=0&&((i_dirty>>hr)&1)) {
2531 if(((regs[i].isconst>>hr)&1)&&i_regmap[hr]>0) {
2532 if(i_regmap[hr]<64 || !((i_is32>>(i_regmap[hr]&63))&1) ) {
2533 int value=constmap[i][hr];
2534 if(value==0) {
2535 emit_zeroreg(HOST_TEMPREG);
2536 }
2537 else {
2538 emit_movimm(value,HOST_TEMPREG);
2539 }
2540 emit_storereg(i_regmap[hr],HOST_TEMPREG);
24385cae 2541#ifndef FORCE32
57871462 2542 if((i_is32>>i_regmap[hr])&1) {
2543 if(value!=-1&&value!=0) emit_sarimm(HOST_TEMPREG,31,HOST_TEMPREG);
2544 emit_storereg(i_regmap[hr]|64,HOST_TEMPREG);
2545 }
24385cae 2546#endif
57871462 2547 }
2548 }
2549 }
2550 }
2551}
2552
2553/* Stubs/epilogue */
2554
2555void literal_pool(int n)
2556{
2557 if(!literalcount) return;
2558 if(n) {
2559 if((int)out-literals[0][0]<4096-n) return;
2560 }
2561 u_int *ptr;
2562 int i;
2563 for(i=0;i<literalcount;i++)
2564 {
2565 ptr=(u_int *)literals[i][0];
2566 u_int offset=(u_int)out-(u_int)ptr-8;
2567 assert(offset<4096);
2568 assert(!(offset&3));
2569 *ptr|=offset;
2570 output_w32(literals[i][1]);
2571 }
2572 literalcount=0;
2573}
2574
2575void literal_pool_jumpover(int n)
2576{
2577 if(!literalcount) return;
2578 if(n) {
2579 if((int)out-literals[0][0]<4096-n) return;
2580 }
2581 int jaddr=(int)out;
2582 emit_jmp(0);
2583 literal_pool(0);
2584 set_jump_target(jaddr,(int)out);
2585}
2586
2587emit_extjump2(int addr, int target, int linker)
2588{
2589 u_char *ptr=(u_char *)addr;
2590 assert((ptr[3]&0x0e)==0xa);
2591 emit_loadlp(target,0);
2592 emit_loadlp(addr,1);
24385cae 2593 assert(addr>=BASE_ADDR&&addr<(BASE_ADDR+(1<<TARGET_SIZE_2)));
57871462 2594 //assert((target>=0x80000000&&target<0x80800000)||(target>0xA4000000&&target<0xA4001000));
2595//DEBUG >
2596#ifdef DEBUG_CYCLE_COUNT
2597 emit_readword((int)&last_count,ECX);
2598 emit_add(HOST_CCREG,ECX,HOST_CCREG);
2599 emit_readword((int)&next_interupt,ECX);
2600 emit_writeword(HOST_CCREG,(int)&Count);
2601 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
2602 emit_writeword(ECX,(int)&last_count);
2603#endif
2604//DEBUG <
2605 emit_jmp(linker);
2606}
2607
2608emit_extjump(int addr, int target)
2609{
2610 emit_extjump2(addr, target, (int)dyna_linker);
2611}
2612emit_extjump_ds(int addr, int target)
2613{
2614 emit_extjump2(addr, target, (int)dyna_linker_ds);
2615}
2616
cbbab9cd 2617#ifdef PCSX
2618#include "pcsxmem_inline.c"
2619#endif
2620
57871462 2621do_readstub(int n)
2622{
2623 assem_debug("do_readstub %x\n",start+stubs[n][3]*4);
2624 literal_pool(256);
2625 set_jump_target(stubs[n][1],(int)out);
2626 int type=stubs[n][0];
2627 int i=stubs[n][3];
2628 int rs=stubs[n][4];
2629 struct regstat *i_regs=(struct regstat *)stubs[n][5];
2630 u_int reglist=stubs[n][7];
2631 signed char *i_regmap=i_regs->regmap;
2632 int addr=get_reg(i_regmap,AGEN1+(i&1));
2633 int rth,rt;
2634 int ds;
b9b61529 2635 if(itype[i]==C1LS||itype[i]==C2LS||itype[i]==LOADLR) {
57871462 2636 rth=get_reg(i_regmap,FTEMP|64);
2637 rt=get_reg(i_regmap,FTEMP);
2638 }else{
2639 rth=get_reg(i_regmap,rt1[i]|64);
2640 rt=get_reg(i_regmap,rt1[i]);
2641 }
2642 assert(rs>=0);
57871462 2643 if(addr<0) addr=rt;
535d208a 2644 if(addr<0&&itype[i]!=C1LS&&itype[i]!=C2LS&&itype[i]!=LOADLR) addr=get_reg(i_regmap,-1);
57871462 2645 assert(addr>=0);
2646 int ftable=0;
2647 if(type==LOADB_STUB||type==LOADBU_STUB)
2648 ftable=(int)readmemb;
2649 if(type==LOADH_STUB||type==LOADHU_STUB)
2650 ftable=(int)readmemh;
2651 if(type==LOADW_STUB)
2652 ftable=(int)readmem;
24385cae 2653#ifndef FORCE32
57871462 2654 if(type==LOADD_STUB)
2655 ftable=(int)readmemd;
24385cae 2656#endif
2657 assert(ftable!=0);
57871462 2658 emit_writeword(rs,(int)&address);
2659 //emit_pusha();
2660 save_regs(reglist);
97a238a6 2661#ifndef PCSX
57871462 2662 ds=i_regs!=&regs[i];
2663 int real_rs=(itype[i]==LOADLR)?-1:get_reg(i_regmap,rs1[i]);
2664 u_int cmask=ds?-1:(0x100f|~i_regs->wasconst);
2665 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&0x100f,i);
2666 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty&cmask&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs)));
2667 if(!ds) wb_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&~0x100f,i);
97a238a6 2668#endif
57871462 2669 emit_shrimm(rs,16,1);
2670 int cc=get_reg(i_regmap,CCREG);
2671 if(cc<0) {
2672 emit_loadreg(CCREG,2);
2673 }
2674 emit_movimm(ftable,0);
2675 emit_addimm(cc<0?2:cc,2*stubs[n][6]+2,2);
f51dc36c 2676#ifndef PCSX
57871462 2677 emit_movimm(start+stubs[n][3]*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
f51dc36c 2678#endif
57871462 2679 //emit_readword((int)&last_count,temp);
2680 //emit_add(cc,temp,cc);
2681 //emit_writeword(cc,(int)&Count);
2682 //emit_mov(15,14);
2683 emit_call((int)&indirect_jump_indexed);
2684 //emit_callreg(rs);
2685 //emit_readword_dualindexedx4(rs,HOST_TEMPREG,15);
f51dc36c 2686#ifndef PCSX
57871462 2687 // We really shouldn't need to update the count here,
2688 // but not doing so causes random crashes...
2689 emit_readword((int)&Count,HOST_TEMPREG);
2690 emit_readword((int)&next_interupt,2);
2691 emit_addimm(HOST_TEMPREG,-2*stubs[n][6]-2,HOST_TEMPREG);
2692 emit_writeword(2,(int)&last_count);
2693 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2694 if(cc<0) {
2695 emit_storereg(CCREG,HOST_TEMPREG);
2696 }
f51dc36c 2697#endif
57871462 2698 //emit_popa();
2699 restore_regs(reglist);
2700 //if((cc=get_reg(regmap,CCREG))>=0) {
2701 // emit_loadreg(CCREG,cc);
2702 //}
f18c0f46 2703 if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) {
2704 assert(rt>=0);
2705 if(type==LOADB_STUB)
2706 emit_movsbl((int)&readmem_dword,rt);
2707 if(type==LOADBU_STUB)
2708 emit_movzbl((int)&readmem_dword,rt);
2709 if(type==LOADH_STUB)
2710 emit_movswl((int)&readmem_dword,rt);
2711 if(type==LOADHU_STUB)
2712 emit_movzwl((int)&readmem_dword,rt);
2713 if(type==LOADW_STUB)
2714 emit_readword((int)&readmem_dword,rt);
2715 if(type==LOADD_STUB) {
2716 emit_readword((int)&readmem_dword,rt);
2717 if(rth>=0) emit_readword(((int)&readmem_dword)+4,rth);
2718 }
57871462 2719 }
2720 emit_jmp(stubs[n][2]); // return address
2721}
2722
2723inline_readstub(int type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
2724{
2725 int rs=get_reg(regmap,target);
2726 int rth=get_reg(regmap,target|64);
2727 int rt=get_reg(regmap,target);
535d208a 2728 if(rs<0) rs=get_reg(regmap,-1);
57871462 2729 assert(rs>=0);
57871462 2730 int ftable=0;
2731 if(type==LOADB_STUB||type==LOADBU_STUB)
2732 ftable=(int)readmemb;
2733 if(type==LOADH_STUB||type==LOADHU_STUB)
2734 ftable=(int)readmemh;
2735 if(type==LOADW_STUB)
2736 ftable=(int)readmem;
24385cae 2737#ifndef FORCE32
57871462 2738 if(type==LOADD_STUB)
2739 ftable=(int)readmemd;
24385cae 2740#endif
2741 assert(ftable!=0);
cbbab9cd 2742#ifdef PCSX
2743 if(pcsx_direct_read(type,addr,target?rs:-1,rt))
2744 return;
2745#endif
fd99c415 2746 if(target==0)
2747 emit_movimm(addr,rs);
57871462 2748 emit_writeword(rs,(int)&address);
2749 //emit_pusha();
2750 save_regs(reglist);
0c1fe38b 2751#ifndef PCSX
2752 if((signed int)addr>=(signed int)0xC0000000) {
2753 // Theoretically we can have a pagefault here, if the TLB has never
2754 // been enabled and the address is outside the range 80000000..BFFFFFFF
2755 // Write out the registers so the pagefault can be handled. This is
2756 // a very rare case and likely represents a bug.
2757 int ds=regmap!=regs[i].regmap;
2758 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty,i);
2759 if(!ds) wb_dirtys(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty);
2760 else wb_dirtys(branch_regs[i-1].regmap_entry,branch_regs[i-1].was32,branch_regs[i-1].wasdirty);
2761 }
2762#endif
57871462 2763 //emit_shrimm(rs,16,1);
2764 int cc=get_reg(regmap,CCREG);
2765 if(cc<0) {
2766 emit_loadreg(CCREG,2);
2767 }
2768 //emit_movimm(ftable,0);
2769 emit_movimm(((u_int *)ftable)[addr>>16],0);
2770 //emit_readword((int)&last_count,12);
2771 emit_addimm(cc<0?2:cc,CLOCK_DIVIDER*(adj+1),2);
f51dc36c 2772#ifndef PCSX
57871462 2773 if((signed int)addr>=(signed int)0xC0000000) {
2774 // Pagefault address
2775 int ds=regmap!=regs[i].regmap;
2776 emit_movimm(start+i*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
2777 }
f51dc36c 2778#endif
57871462 2779 //emit_add(12,2,2);
2780 //emit_writeword(2,(int)&Count);
2781 //emit_call(((u_int *)ftable)[addr>>16]);
2782 emit_call((int)&indirect_jump);
f51dc36c 2783#ifndef PCSX
57871462 2784 // We really shouldn't need to update the count here,
2785 // but not doing so causes random crashes...
2786 emit_readword((int)&Count,HOST_TEMPREG);
2787 emit_readword((int)&next_interupt,2);
2788 emit_addimm(HOST_TEMPREG,-CLOCK_DIVIDER*(adj+1),HOST_TEMPREG);
2789 emit_writeword(2,(int)&last_count);
2790 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2791 if(cc<0) {
2792 emit_storereg(CCREG,HOST_TEMPREG);
2793 }
f51dc36c 2794#endif
57871462 2795 //emit_popa();
2796 restore_regs(reglist);
fd99c415 2797 if(rt>=0) {
2798 if(type==LOADB_STUB)
2799 emit_movsbl((int)&readmem_dword,rt);
2800 if(type==LOADBU_STUB)
2801 emit_movzbl((int)&readmem_dword,rt);
2802 if(type==LOADH_STUB)
2803 emit_movswl((int)&readmem_dword,rt);
2804 if(type==LOADHU_STUB)
2805 emit_movzwl((int)&readmem_dword,rt);
2806 if(type==LOADW_STUB)
2807 emit_readword((int)&readmem_dword,rt);
2808 if(type==LOADD_STUB) {
2809 emit_readword((int)&readmem_dword,rt);
2810 if(rth>=0) emit_readword(((int)&readmem_dword)+4,rth);
2811 }
57871462 2812 }
2813}
2814
2815do_writestub(int n)
2816{
2817 assem_debug("do_writestub %x\n",start+stubs[n][3]*4);
2818 literal_pool(256);
2819 set_jump_target(stubs[n][1],(int)out);
2820 int type=stubs[n][0];
2821 int i=stubs[n][3];
2822 int rs=stubs[n][4];
2823 struct regstat *i_regs=(struct regstat *)stubs[n][5];
2824 u_int reglist=stubs[n][7];
2825 signed char *i_regmap=i_regs->regmap;
2826 int addr=get_reg(i_regmap,AGEN1+(i&1));
2827 int rth,rt,r;
2828 int ds;
b9b61529 2829 if(itype[i]==C1LS||itype[i]==C2LS) {
57871462 2830 rth=get_reg(i_regmap,FTEMP|64);
2831 rt=get_reg(i_regmap,r=FTEMP);
2832 }else{
2833 rth=get_reg(i_regmap,rs2[i]|64);
2834 rt=get_reg(i_regmap,r=rs2[i]);
2835 }
2836 assert(rs>=0);
2837 assert(rt>=0);
2838 if(addr<0) addr=get_reg(i_regmap,-1);
2839 assert(addr>=0);
2840 int ftable=0;
2841 if(type==STOREB_STUB)
2842 ftable=(int)writememb;
2843 if(type==STOREH_STUB)
2844 ftable=(int)writememh;
2845 if(type==STOREW_STUB)
2846 ftable=(int)writemem;
24385cae 2847#ifndef FORCE32
57871462 2848 if(type==STORED_STUB)
2849 ftable=(int)writememd;
24385cae 2850#endif
2851 assert(ftable!=0);
57871462 2852 emit_writeword(rs,(int)&address);
2853 //emit_shrimm(rs,16,rs);
2854 //emit_movmem_indexedx4(ftable,rs,rs);
2855 if(type==STOREB_STUB)
2856 emit_writebyte(rt,(int)&byte);
2857 if(type==STOREH_STUB)
2858 emit_writehword(rt,(int)&hword);
2859 if(type==STOREW_STUB)
2860 emit_writeword(rt,(int)&word);
2861 if(type==STORED_STUB) {
3d624f89 2862#ifndef FORCE32
57871462 2863 emit_writeword(rt,(int)&dword);
2864 emit_writeword(r?rth:rt,(int)&dword+4);
3d624f89 2865#else
2866 printf("STORED_STUB\n");
2867#endif
57871462 2868 }
2869 //emit_pusha();
2870 save_regs(reglist);
97a238a6 2871#ifndef PCSX
57871462 2872 ds=i_regs!=&regs[i];
2873 int real_rs=get_reg(i_regmap,rs1[i]);
2874 u_int cmask=ds?-1:(0x100f|~i_regs->wasconst);
2875 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&0x100f,i);
2876 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty&cmask&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs)));
2877 if(!ds) wb_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&~0x100f,i);
97a238a6 2878#endif
57871462 2879 emit_shrimm(rs,16,1);
2880 int cc=get_reg(i_regmap,CCREG);
2881 if(cc<0) {
2882 emit_loadreg(CCREG,2);
2883 }
2884 emit_movimm(ftable,0);
2885 emit_addimm(cc<0?2:cc,2*stubs[n][6]+2,2);
f51dc36c 2886#ifndef PCSX
57871462 2887 emit_movimm(start+stubs[n][3]*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
f51dc36c 2888#endif
57871462 2889 //emit_readword((int)&last_count,temp);
2890 //emit_addimm(cc,2*stubs[n][5]+2,cc);
2891 //emit_add(cc,temp,cc);
2892 //emit_writeword(cc,(int)&Count);
2893 emit_call((int)&indirect_jump_indexed);
2894 //emit_callreg(rs);
2895 emit_readword((int)&Count,HOST_TEMPREG);
2896 emit_readword((int)&next_interupt,2);
2897 emit_addimm(HOST_TEMPREG,-2*stubs[n][6]-2,HOST_TEMPREG);
2898 emit_writeword(2,(int)&last_count);
2899 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2900 if(cc<0) {
2901 emit_storereg(CCREG,HOST_TEMPREG);
2902 }
2903 //emit_popa();
2904 restore_regs(reglist);
2905 //if((cc=get_reg(regmap,CCREG))>=0) {
2906 // emit_loadreg(CCREG,cc);
2907 //}
2908 emit_jmp(stubs[n][2]); // return address
2909}
2910
2911inline_writestub(int type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
2912{
2913 int rs=get_reg(regmap,-1);
2914 int rth=get_reg(regmap,target|64);
2915 int rt=get_reg(regmap,target);
2916 assert(rs>=0);
2917 assert(rt>=0);
cbbab9cd 2918#ifdef PCSX
2919 if(pcsx_direct_write(type,addr,rs,rt,regmap))
2920 return;
2921#endif
57871462 2922 int ftable=0;
2923 if(type==STOREB_STUB)
2924 ftable=(int)writememb;
2925 if(type==STOREH_STUB)
2926 ftable=(int)writememh;
2927 if(type==STOREW_STUB)
2928 ftable=(int)writemem;
24385cae 2929#ifndef FORCE32
57871462 2930 if(type==STORED_STUB)
2931 ftable=(int)writememd;
24385cae 2932#endif
2933 assert(ftable!=0);
57871462 2934 emit_writeword(rs,(int)&address);
2935 //emit_shrimm(rs,16,rs);
2936 //emit_movmem_indexedx4(ftable,rs,rs);
2937 if(type==STOREB_STUB)
2938 emit_writebyte(rt,(int)&byte);
2939 if(type==STOREH_STUB)
2940 emit_writehword(rt,(int)&hword);
2941 if(type==STOREW_STUB)
2942 emit_writeword(rt,(int)&word);
2943 if(type==STORED_STUB) {
3d624f89 2944#ifndef FORCE32
57871462 2945 emit_writeword(rt,(int)&dword);
2946 emit_writeword(target?rth:rt,(int)&dword+4);
3d624f89 2947#else
2948 printf("STORED_STUB\n");
2949#endif
57871462 2950 }
2951 //emit_pusha();
2952 save_regs(reglist);
0c1fe38b 2953#ifndef PCSX
2954 // rearmed note: load_all_consts prevents BIOS boot, some bug?
2955 if((signed int)addr>=(signed int)0xC0000000) {
2956 // Theoretically we can have a pagefault here, if the TLB has never
2957 // been enabled and the address is outside the range 80000000..BFFFFFFF
2958 // Write out the registers so the pagefault can be handled. This is
2959 // a very rare case and likely represents a bug.
2960 int ds=regmap!=regs[i].regmap;
2961 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty,i);
2962 if(!ds) wb_dirtys(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty);
2963 else wb_dirtys(branch_regs[i-1].regmap_entry,branch_regs[i-1].was32,branch_regs[i-1].wasdirty);
2964 }
2965#endif
57871462 2966 //emit_shrimm(rs,16,1);
2967 int cc=get_reg(regmap,CCREG);
2968 if(cc<0) {
2969 emit_loadreg(CCREG,2);
2970 }
2971 //emit_movimm(ftable,0);
2972 emit_movimm(((u_int *)ftable)[addr>>16],0);
2973 //emit_readword((int)&last_count,12);
2974 emit_addimm(cc<0?2:cc,CLOCK_DIVIDER*(adj+1),2);
f51dc36c 2975#ifndef PCSX
57871462 2976 if((signed int)addr>=(signed int)0xC0000000) {
2977 // Pagefault address
2978 int ds=regmap!=regs[i].regmap;
2979 emit_movimm(start+i*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
2980 }
f51dc36c 2981#endif
57871462 2982 //emit_add(12,2,2);
2983 //emit_writeword(2,(int)&Count);
2984 //emit_call(((u_int *)ftable)[addr>>16]);
2985 emit_call((int)&indirect_jump);
2986 emit_readword((int)&Count,HOST_TEMPREG);
2987 emit_readword((int)&next_interupt,2);
2988 emit_addimm(HOST_TEMPREG,-CLOCK_DIVIDER*(adj+1),HOST_TEMPREG);
2989 emit_writeword(2,(int)&last_count);
2990 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2991 if(cc<0) {
2992 emit_storereg(CCREG,HOST_TEMPREG);
2993 }
2994 //emit_popa();
2995 restore_regs(reglist);
2996}
2997
2998do_unalignedwritestub(int n)
2999{
b7918751 3000 assem_debug("do_unalignedwritestub %x\n",start+stubs[n][3]*4);
3001 literal_pool(256);
57871462 3002 set_jump_target(stubs[n][1],(int)out);
b7918751 3003
3004 int i=stubs[n][3];
3005 struct regstat *i_regs=(struct regstat *)stubs[n][4];
3006 int addr=stubs[n][5];
3007 u_int reglist=stubs[n][7];
3008 signed char *i_regmap=i_regs->regmap;
3009 int temp2=get_reg(i_regmap,FTEMP);
3010 int rt;
3011 int ds, real_rs;
3012 rt=get_reg(i_regmap,rs2[i]);
3013 assert(rt>=0);
3014 assert(addr>=0);
3015 assert(opcode[i]==0x2a||opcode[i]==0x2e); // SWL/SWR only implemented
3016 reglist|=(1<<addr);
3017 reglist&=~(1<<temp2);
3018
3019 emit_andimm(addr,0xfffffffc,temp2);
3020 emit_writeword(temp2,(int)&address);
3021
3022 save_regs(reglist);
97a238a6 3023#ifndef PCSX
b7918751 3024 ds=i_regs!=&regs[i];
3025 real_rs=get_reg(i_regmap,rs1[i]);
3026 u_int cmask=ds?-1:(0x100f|~i_regs->wasconst);
3027 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&0x100f,i);
3028 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty&cmask&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs)));
3029 if(!ds) wb_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&~0x100f,i);
97a238a6 3030#endif
b7918751 3031 emit_shrimm(addr,16,1);
3032 int cc=get_reg(i_regmap,CCREG);
3033 if(cc<0) {
3034 emit_loadreg(CCREG,2);
3035 }
3036 emit_movimm((u_int)readmem,0);
3037 emit_addimm(cc<0?2:cc,2*stubs[n][6]+2,2);
f51dc36c 3038#ifndef PCSX
3039 // pagefault address
3040 emit_movimm(start+stubs[n][3]*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
3041#endif
b7918751 3042 emit_call((int)&indirect_jump_indexed);
3043 restore_regs(reglist);
3044
3045 emit_readword((int)&readmem_dword,temp2);
3046 int temp=addr; //hmh
3047 emit_shlimm(addr,3,temp);
3048 emit_andimm(temp,24,temp);
3049#ifdef BIG_ENDIAN_MIPS
3050 if (opcode[i]==0x2e) // SWR
3051#else
3052 if (opcode[i]==0x2a) // SWL
3053#endif
3054 emit_xorimm(temp,24,temp);
3055 emit_movimm(-1,HOST_TEMPREG);
55439448 3056 if (opcode[i]==0x2a) { // SWL
b7918751 3057 emit_bic_lsr(temp2,HOST_TEMPREG,temp,temp2);
3058 emit_orrshr(rt,temp,temp2);
3059 }else{
3060 emit_bic_lsl(temp2,HOST_TEMPREG,temp,temp2);
3061 emit_orrshl(rt,temp,temp2);
3062 }
3063 emit_readword((int)&address,addr);
3064 emit_writeword(temp2,(int)&word);
3065 //save_regs(reglist); // don't need to, no state changes
3066 emit_shrimm(addr,16,1);
3067 emit_movimm((u_int)writemem,0);
3068 //emit_call((int)&indirect_jump_indexed);
3069 emit_mov(15,14);
3070 emit_readword_dualindexedx4(0,1,15);
3071 emit_readword((int)&Count,HOST_TEMPREG);
3072 emit_readword((int)&next_interupt,2);
3073 emit_addimm(HOST_TEMPREG,-2*stubs[n][6]-2,HOST_TEMPREG);
3074 emit_writeword(2,(int)&last_count);
3075 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
3076 if(cc<0) {
3077 emit_storereg(CCREG,HOST_TEMPREG);
3078 }
3079 restore_regs(reglist);
57871462 3080 emit_jmp(stubs[n][2]); // return address
3081}
3082
3083void printregs(int edi,int esi,int ebp,int esp,int b,int d,int c,int a)
3084{
3085 printf("regs: %x %x %x %x %x %x %x (%x)\n",a,b,c,d,ebp,esi,edi,(&edi)[-1]);
3086}
3087
3088do_invstub(int n)
3089{
3090 literal_pool(20);
3091 u_int reglist=stubs[n][3];
3092 set_jump_target(stubs[n][1],(int)out);
3093 save_regs(reglist);
3094 if(stubs[n][4]!=0) emit_mov(stubs[n][4],0);
3095 emit_call((int)&invalidate_addr);
3096 restore_regs(reglist);
3097 emit_jmp(stubs[n][2]); // return address
3098}
3099
3100int do_dirty_stub(int i)
3101{
3102 assem_debug("do_dirty_stub %x\n",start+i*4);
ac545b3a 3103 u_int addr=(int)start<(int)0xC0000000?(u_int)source:(u_int)start;
3104 #ifdef PCSX
3105 addr=(u_int)source;
3106 #endif
57871462 3107 // Careful about the code output here, verify_dirty needs to parse it.
3108 #ifdef ARMv5_ONLY
ac545b3a 3109 emit_loadlp(addr,1);
57871462 3110 emit_loadlp((int)copy,2);
3111 emit_loadlp(slen*4,3);
3112 #else
ac545b3a 3113 emit_movw(addr&0x0000FFFF,1);
57871462 3114 emit_movw(((u_int)copy)&0x0000FFFF,2);
ac545b3a 3115 emit_movt(addr&0xFFFF0000,1);
57871462 3116 emit_movt(((u_int)copy)&0xFFFF0000,2);
3117 emit_movw(slen*4,3);
3118 #endif
3119 emit_movimm(start+i*4,0);
3120 emit_call((int)start<(int)0xC0000000?(int)&verify_code:(int)&verify_code_vm);
3121 int entry=(int)out;
3122 load_regs_entry(i);
3123 if(entry==(int)out) entry=instr_addr[i];
3124 emit_jmp(instr_addr[i]);
3125 return entry;
3126}
3127
3128void do_dirty_stub_ds()
3129{
3130 // Careful about the code output here, verify_dirty needs to parse it.
3131 #ifdef ARMv5_ONLY
3132 emit_loadlp((int)start<(int)0xC0000000?(int)source:(int)start,1);
3133 emit_loadlp((int)copy,2);
3134 emit_loadlp(slen*4,3);
3135 #else
3136 emit_movw(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0x0000FFFF,1);
3137 emit_movw(((u_int)copy)&0x0000FFFF,2);
3138 emit_movt(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0xFFFF0000,1);
3139 emit_movt(((u_int)copy)&0xFFFF0000,2);
3140 emit_movw(slen*4,3);
3141 #endif
3142 emit_movimm(start+1,0);
3143 emit_call((int)&verify_code_ds);
3144}
3145
3146do_cop1stub(int n)
3147{
3148 literal_pool(256);
3149 assem_debug("do_cop1stub %x\n",start+stubs[n][3]*4);
3150 set_jump_target(stubs[n][1],(int)out);
3151 int i=stubs[n][3];
3d624f89 3152// int rs=stubs[n][4];
57871462 3153 struct regstat *i_regs=(struct regstat *)stubs[n][5];
3154 int ds=stubs[n][6];
3155 if(!ds) {
3156 load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty,i);
3157 //if(i_regs!=&regs[i]) printf("oops: regs[i]=%x i_regs=%x",(int)&regs[i],(int)i_regs);
3158 }
3159 //else {printf("fp exception in delay slot\n");}
3160 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty);
3161 if(regs[i].regmap_entry[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
3162 emit_movimm(start+(i-ds)*4,EAX); // Get PC
3163 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG); // CHECK: is this right? There should probably be an extra cycle...
3164 emit_jmp(ds?(int)fp_exception_ds:(int)fp_exception);
3165}
3166
3167/* TLB */
3168
3169int do_tlb_r(int s,int ar,int map,int x,int a,int shift,int c,u_int addr)
3170{
3171 if(c) {
3172 if((signed int)addr>=(signed int)0xC0000000) {
3173 // address_generation already loaded the const
3174 emit_readword_dualindexedx4(FP,map,map);
3175 }
3176 else
3177 return -1; // No mapping
3178 }
3179 else {
3180 assert(s!=map);
3181 emit_movimm(((int)memory_map-(int)&dynarec_local)>>2,map);
3182 emit_addsr12(map,s,map);
3183 // Schedule this while we wait on the load
3184 //if(x) emit_xorimm(s,x,ar);
3185 if(shift>=0) emit_shlimm(s,3,shift);
3186 if(~a) emit_andimm(s,a,ar);
3187 emit_readword_dualindexedx4(FP,map,map);
3188 }
3189 return map;
3190}
3191int do_tlb_r_branch(int map, int c, u_int addr, int *jaddr)
3192{
3193 if(!c||(signed int)addr>=(signed int)0xC0000000) {
3194 emit_test(map,map);
3195 *jaddr=(int)out;
3196 emit_js(0);
3197 }
3198 return map;
3199}
3200
3201int gen_tlb_addr_r(int ar, int map) {
3202 if(map>=0) {
3203 assem_debug("add %s,%s,%s lsl #2\n",regname[ar],regname[ar],regname[map]);
3204 output_w32(0xe0800100|rd_rn_rm(ar,ar,map));
3205 }
3206}
3207
3208int do_tlb_w(int s,int ar,int map,int x,int c,u_int addr)
3209{
3210 if(c) {
3211 if(addr<0x80800000||addr>=0xC0000000) {
3212 // address_generation already loaded the const
3213 emit_readword_dualindexedx4(FP,map,map);
3214 }
3215 else
3216 return -1; // No mapping
3217 }
3218 else {
3219 assert(s!=map);
3220 emit_movimm(((int)memory_map-(int)&dynarec_local)>>2,map);
3221 emit_addsr12(map,s,map);
3222 // Schedule this while we wait on the load
3223 //if(x) emit_xorimm(s,x,ar);
3224 emit_readword_dualindexedx4(FP,map,map);
3225 }
3226 return map;
3227}
3228int do_tlb_w_branch(int map, int c, u_int addr, int *jaddr)
3229{
3230 if(!c||addr<0x80800000||addr>=0xC0000000) {
3231 emit_testimm(map,0x40000000);
3232 *jaddr=(int)out;
3233 emit_jne(0);
3234 }
3235}
3236
3237int gen_tlb_addr_w(int ar, int map) {
3238 if(map>=0) {
3239 assem_debug("add %s,%s,%s lsl #2\n",regname[ar],regname[ar],regname[map]);
3240 output_w32(0xe0800100|rd_rn_rm(ar,ar,map));
3241 }
3242}
3243
3244// Generate the address of the memory_map entry, relative to dynarec_local
3245generate_map_const(u_int addr,int reg) {
3246 //printf("generate_map_const(%x,%s)\n",addr,regname[reg]);
3247 emit_movimm((addr>>12)+(((u_int)memory_map-(u_int)&dynarec_local)>>2),reg);
3248}
3249
3250/* Special assem */
3251
3252void shift_assemble_arm(int i,struct regstat *i_regs)
3253{
3254 if(rt1[i]) {
3255 if(opcode2[i]<=0x07) // SLLV/SRLV/SRAV
3256 {
3257 signed char s,t,shift;
3258 t=get_reg(i_regs->regmap,rt1[i]);
3259 s=get_reg(i_regs->regmap,rs1[i]);
3260 shift=get_reg(i_regs->regmap,rs2[i]);
3261 if(t>=0){
3262 if(rs1[i]==0)
3263 {
3264 emit_zeroreg(t);
3265 }
3266 else if(rs2[i]==0)
3267 {
3268 assert(s>=0);
3269 if(s!=t) emit_mov(s,t);
3270 }
3271 else
3272 {
3273 emit_andimm(shift,31,HOST_TEMPREG);
3274 if(opcode2[i]==4) // SLLV
3275 {
3276 emit_shl(s,HOST_TEMPREG,t);
3277 }
3278 if(opcode2[i]==6) // SRLV
3279 {
3280 emit_shr(s,HOST_TEMPREG,t);
3281 }
3282 if(opcode2[i]==7) // SRAV
3283 {
3284 emit_sar(s,HOST_TEMPREG,t);
3285 }
3286 }
3287 }
3288 } else { // DSLLV/DSRLV/DSRAV
3289 signed char sh,sl,th,tl,shift;
3290 th=get_reg(i_regs->regmap,rt1[i]|64);
3291 tl=get_reg(i_regs->regmap,rt1[i]);
3292 sh=get_reg(i_regs->regmap,rs1[i]|64);
3293 sl=get_reg(i_regs->regmap,rs1[i]);
3294 shift=get_reg(i_regs->regmap,rs2[i]);
3295 if(tl>=0){
3296 if(rs1[i]==0)
3297 {
3298 emit_zeroreg(tl);
3299 if(th>=0) emit_zeroreg(th);
3300 }
3301 else if(rs2[i]==0)
3302 {
3303 assert(sl>=0);
3304 if(sl!=tl) emit_mov(sl,tl);
3305 if(th>=0&&sh!=th) emit_mov(sh,th);
3306 }
3307 else
3308 {
3309 // FIXME: What if shift==tl ?
3310 assert(shift!=tl);
3311 int temp=get_reg(i_regs->regmap,-1);
3312 int real_th=th;
3313 if(th<0&&opcode2[i]!=0x14) {th=temp;} // DSLLV doesn't need a temporary register
3314 assert(sl>=0);
3315 assert(sh>=0);
3316 emit_andimm(shift,31,HOST_TEMPREG);
3317 if(opcode2[i]==0x14) // DSLLV
3318 {
3319 if(th>=0) emit_shl(sh,HOST_TEMPREG,th);
3320 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3321 emit_orrshr(sl,HOST_TEMPREG,th);
3322 emit_andimm(shift,31,HOST_TEMPREG);
3323 emit_testimm(shift,32);
3324 emit_shl(sl,HOST_TEMPREG,tl);
3325 if(th>=0) emit_cmovne_reg(tl,th);
3326 emit_cmovne_imm(0,tl);
3327 }
3328 if(opcode2[i]==0x16) // DSRLV
3329 {
3330 assert(th>=0);
3331 emit_shr(sl,HOST_TEMPREG,tl);
3332 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3333 emit_orrshl(sh,HOST_TEMPREG,tl);
3334 emit_andimm(shift,31,HOST_TEMPREG);
3335 emit_testimm(shift,32);
3336 emit_shr(sh,HOST_TEMPREG,th);
3337 emit_cmovne_reg(th,tl);
3338 if(real_th>=0) emit_cmovne_imm(0,th);
3339 }
3340 if(opcode2[i]==0x17) // DSRAV
3341 {
3342 assert(th>=0);
3343 emit_shr(sl,HOST_TEMPREG,tl);
3344 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3345 if(real_th>=0) {
3346 assert(temp>=0);
3347 emit_sarimm(th,31,temp);
3348 }
3349 emit_orrshl(sh,HOST_TEMPREG,tl);
3350 emit_andimm(shift,31,HOST_TEMPREG);
3351 emit_testimm(shift,32);
3352 emit_sar(sh,HOST_TEMPREG,th);
3353 emit_cmovne_reg(th,tl);
3354 if(real_th>=0) emit_cmovne_reg(temp,th);
3355 }
3356 }
3357 }
3358 }
3359 }
3360}
3361#define shift_assemble shift_assemble_arm
3362
3363void loadlr_assemble_arm(int i,struct regstat *i_regs)
3364{
3365 int s,th,tl,temp,temp2,addr,map=-1;
3366 int offset;
3367 int jaddr=0;
af4ee1fe 3368 int memtarget=0,c=0;
57871462 3369 u_int hr,reglist=0;
3370 th=get_reg(i_regs->regmap,rt1[i]|64);
3371 tl=get_reg(i_regs->regmap,rt1[i]);
3372 s=get_reg(i_regs->regmap,rs1[i]);
3373 temp=get_reg(i_regs->regmap,-1);
3374 temp2=get_reg(i_regs->regmap,FTEMP);
3375 addr=get_reg(i_regs->regmap,AGEN1+(i&1));
3376 assert(addr<0);
3377 offset=imm[i];
3378 for(hr=0;hr<HOST_REGS;hr++) {
3379 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
3380 }
3381 reglist|=1<<temp;
3382 if(offset||s<0||c) addr=temp2;
3383 else addr=s;
3384 if(s>=0) {
3385 c=(i_regs->wasconst>>s)&1;
af4ee1fe 3386 if(c) {
3387 memtarget=((signed int)(constmap[i][s]+offset))<(signed int)0x80000000+RAM_SIZE;
3388 if(using_tlb&&((signed int)(constmap[i][s]+offset))>=(signed int)0xC0000000) memtarget=1;
3389 }
57871462 3390 }
535d208a 3391 if(!using_tlb) {
3392 if(!c) {
3393 #ifdef RAM_OFFSET
3394 map=get_reg(i_regs->regmap,ROREG);
3395 if(map<0) emit_loadreg(ROREG,map=HOST_TEMPREG);
3396 #endif
3397 emit_shlimm(addr,3,temp);
3398 if (opcode[i]==0x22||opcode[i]==0x26) {
3399 emit_andimm(addr,0xFFFFFFFC,temp2); // LWL/LWR
57871462 3400 }else{
535d208a 3401 emit_andimm(addr,0xFFFFFFF8,temp2); // LDL/LDR
57871462 3402 }
535d208a 3403 emit_cmpimm(addr,RAM_SIZE);
3404 jaddr=(int)out;
3405 emit_jno(0);
3406 }
3407 else {
3408 if (opcode[i]==0x22||opcode[i]==0x26) {
3409 emit_movimm(((constmap[i][s]+offset)<<3)&24,temp); // LWL/LWR
3410 }else{
3411 emit_movimm(((constmap[i][s]+offset)<<3)&56,temp); // LDL/LDR
57871462 3412 }
57871462 3413 }
535d208a 3414 }else{ // using tlb
3415 int a;
3416 if(c) {
3417 a=-1;
3418 }else if (opcode[i]==0x22||opcode[i]==0x26) {
3419 a=0xFFFFFFFC; // LWL/LWR
3420 }else{
3421 a=0xFFFFFFF8; // LDL/LDR
3422 }
3423 map=get_reg(i_regs->regmap,TLREG);
3424 assert(map>=0);
ea3d2e6e 3425 reglist&=~(1<<map);
535d208a 3426 map=do_tlb_r(addr,temp2,map,0,a,c?-1:temp,c,constmap[i][s]+offset);
3427 if(c) {
3428 if (opcode[i]==0x22||opcode[i]==0x26) {
3429 emit_movimm(((constmap[i][s]+offset)<<3)&24,temp); // LWL/LWR
3430 }else{
3431 emit_movimm(((constmap[i][s]+offset)<<3)&56,temp); // LDL/LDR
57871462 3432 }
535d208a 3433 }
3434 do_tlb_r_branch(map,c,constmap[i][s]+offset,&jaddr);
3435 }
3436 if (opcode[i]==0x22||opcode[i]==0x26) { // LWL/LWR
3437 if(!c||memtarget) {
3438 //emit_readword_indexed((int)rdram-0x80000000,temp2,temp2);
3439 emit_readword_indexed_tlb(0,temp2,map,temp2);
3440 if(jaddr) add_stub(LOADW_STUB,jaddr,(int)out,i,temp2,(int)i_regs,ccadj[i],reglist);
3441 }
3442 else
3443 inline_readstub(LOADW_STUB,i,(constmap[i][s]+offset)&0xFFFFFFFC,i_regs->regmap,FTEMP,ccadj[i],reglist);
3444 if(rt1[i]) {
3445 assert(tl>=0);
57871462 3446 emit_andimm(temp,24,temp);
2002a1db 3447#ifdef BIG_ENDIAN_MIPS
3448 if (opcode[i]==0x26) // LWR
3449#else
3450 if (opcode[i]==0x22) // LWL
3451#endif
3452 emit_xorimm(temp,24,temp);
57871462 3453 emit_movimm(-1,HOST_TEMPREG);
3454 if (opcode[i]==0x26) {
3455 emit_shr(temp2,temp,temp2);
3456 emit_bic_lsr(tl,HOST_TEMPREG,temp,tl);
3457 }else{
3458 emit_shl(temp2,temp,temp2);
3459 emit_bic_lsl(tl,HOST_TEMPREG,temp,tl);
3460 }
3461 emit_or(temp2,tl,tl);
57871462 3462 }
535d208a 3463 //emit_storereg(rt1[i],tl); // DEBUG
3464 }
3465 if (opcode[i]==0x1A||opcode[i]==0x1B) { // LDL/LDR
3466 // FIXME: little endian
3467 int temp2h=get_reg(i_regs->regmap,FTEMP|64);
3468 if(!c||memtarget) {
3469 //if(th>=0) emit_readword_indexed((int)rdram-0x80000000,temp2,temp2h);
3470 //emit_readword_indexed((int)rdram-0x7FFFFFFC,temp2,temp2);
3471 emit_readdword_indexed_tlb(0,temp2,map,temp2h,temp2);
3472 if(jaddr) add_stub(LOADD_STUB,jaddr,(int)out,i,temp2,(int)i_regs,ccadj[i],reglist);
3473 }
3474 else
3475 inline_readstub(LOADD_STUB,i,(constmap[i][s]+offset)&0xFFFFFFF8,i_regs->regmap,FTEMP,ccadj[i],reglist);
3476 if(rt1[i]) {
3477 assert(th>=0);
3478 assert(tl>=0);
57871462 3479 emit_testimm(temp,32);
3480 emit_andimm(temp,24,temp);
3481 if (opcode[i]==0x1A) { // LDL
3482 emit_rsbimm(temp,32,HOST_TEMPREG);
3483 emit_shl(temp2h,temp,temp2h);
3484 emit_orrshr(temp2,HOST_TEMPREG,temp2h);
3485 emit_movimm(-1,HOST_TEMPREG);
3486 emit_shl(temp2,temp,temp2);
3487 emit_cmove_reg(temp2h,th);
3488 emit_biceq_lsl(tl,HOST_TEMPREG,temp,tl);
3489 emit_bicne_lsl(th,HOST_TEMPREG,temp,th);
3490 emit_orreq(temp2,tl,tl);
3491 emit_orrne(temp2,th,th);
3492 }
3493 if (opcode[i]==0x1B) { // LDR
3494 emit_xorimm(temp,24,temp);
3495 emit_rsbimm(temp,32,HOST_TEMPREG);
3496 emit_shr(temp2,temp,temp2);
3497 emit_orrshl(temp2h,HOST_TEMPREG,temp2);
3498 emit_movimm(-1,HOST_TEMPREG);
3499 emit_shr(temp2h,temp,temp2h);
3500 emit_cmovne_reg(temp2,tl);
3501 emit_bicne_lsr(th,HOST_TEMPREG,temp,th);
3502 emit_biceq_lsr(tl,HOST_TEMPREG,temp,tl);
3503 emit_orrne(temp2h,th,th);
3504 emit_orreq(temp2h,tl,tl);
3505 }
3506 }
3507 }
3508}
3509#define loadlr_assemble loadlr_assemble_arm
3510
3511void cop0_assemble(int i,struct regstat *i_regs)
3512{
3513 if(opcode2[i]==0) // MFC0
3514 {
3515 signed char t=get_reg(i_regs->regmap,rt1[i]);
3516 char copr=(source[i]>>11)&0x1f;
3517 //assert(t>=0); // Why does this happen? OOT is weird
f1b3b369 3518 if(t>=0&&rt1[i]!=0) {
7139f3c8 3519#ifdef MUPEN64
57871462 3520 emit_addimm(FP,(int)&fake_pc-(int)&dynarec_local,0);
3521 emit_movimm((source[i]>>11)&0x1f,1);
3522 emit_writeword(0,(int)&PC);
3523 emit_writebyte(1,(int)&(fake_pc.f.r.nrd));
3524 if(copr==9) {
3525 emit_readword((int)&last_count,ECX);
3526 emit_loadreg(CCREG,HOST_CCREG); // TODO: do proper reg alloc
3527 emit_add(HOST_CCREG,ECX,HOST_CCREG);
3528 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG);
3529 emit_writeword(HOST_CCREG,(int)&Count);
3530 }
3531 emit_call((int)MFC0);
3532 emit_readword((int)&readmem_dword,t);
7139f3c8 3533#else
3534 emit_readword((int)&reg_cop0+copr*4,t);
3535#endif
57871462 3536 }
3537 }
3538 else if(opcode2[i]==4) // MTC0
3539 {
3540 signed char s=get_reg(i_regs->regmap,rs1[i]);
3541 char copr=(source[i]>>11)&0x1f;
3542 assert(s>=0);
3543 emit_writeword(s,(int)&readmem_dword);
3544 wb_register(rs1[i],i_regs->regmap,i_regs->dirty,i_regs->is32);
fca1aef2 3545#ifdef MUPEN64
57871462 3546 emit_addimm(FP,(int)&fake_pc-(int)&dynarec_local,0);
3547 emit_movimm((source[i]>>11)&0x1f,1);
3548 emit_writeword(0,(int)&PC);
3549 emit_writebyte(1,(int)&(fake_pc.f.r.nrd));
7139f3c8 3550#endif
3551 if(copr==9||copr==11||copr==12||copr==13) {
57871462 3552 emit_readword((int)&last_count,ECX);
3553 emit_loadreg(CCREG,HOST_CCREG); // TODO: do proper reg alloc
3554 emit_add(HOST_CCREG,ECX,HOST_CCREG);
3555 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG);
3556 emit_writeword(HOST_CCREG,(int)&Count);
3557 }
3558 // What a mess. The status register (12) can enable interrupts,
3559 // so needs a special case to handle a pending interrupt.
3560 // The interrupt must be taken immediately, because a subsequent
3561 // instruction might disable interrupts again.
7139f3c8 3562 if(copr==12||copr==13) {
fca1aef2 3563#ifdef PCSX
3564 if (is_delayslot) {
3565 // burn cycles to cause cc_interrupt, which will
3566 // reschedule next_interupt. Relies on CCREG from above.
3567 assem_debug("MTC0 DS %d\n", copr);
3568 emit_writeword(HOST_CCREG,(int)&last_count);
3569 emit_movimm(0,HOST_CCREG);
3570 emit_storereg(CCREG,HOST_CCREG);
3571 emit_movimm(copr,0);
3572 emit_call((int)pcsx_mtc0_ds);
3573 return;
3574 }
3575#endif
57871462 3576 emit_movimm(start+i*4+4,0);
3577 emit_movimm(0,1);
3578 emit_writeword(0,(int)&pcaddr);
3579 emit_writeword(1,(int)&pending_exception);
3580 }
3581 //else if(copr==12&&is_delayslot) emit_call((int)MTC0_R12);
3582 //else
fca1aef2 3583#ifdef PCSX
3584 emit_movimm(copr,0);
3585 emit_call((int)pcsx_mtc0);
3586#else
57871462 3587 emit_call((int)MTC0);
fca1aef2 3588#endif
7139f3c8 3589 if(copr==9||copr==11||copr==12||copr==13) {
57871462 3590 emit_readword((int)&Count,HOST_CCREG);
3591 emit_readword((int)&next_interupt,ECX);
3592 emit_addimm(HOST_CCREG,-CLOCK_DIVIDER*ccadj[i],HOST_CCREG);
3593 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
3594 emit_writeword(ECX,(int)&last_count);
3595 emit_storereg(CCREG,HOST_CCREG);
3596 }
7139f3c8 3597 if(copr==12||copr==13) {
57871462 3598 assert(!is_delayslot);
3599 emit_readword((int)&pending_exception,14);
3600 }
3601 emit_loadreg(rs1[i],s);
3602 if(get_reg(i_regs->regmap,rs1[i]|64)>=0)
3603 emit_loadreg(rs1[i]|64,get_reg(i_regs->regmap,rs1[i]|64));
7139f3c8 3604 if(copr==12||copr==13) {
57871462 3605 emit_test(14,14);
3606 emit_jne((int)&do_interrupt);
3607 }
3608 cop1_usable=0;
3609 }
3610 else
3611 {
3612 assert(opcode2[i]==0x10);
3d624f89 3613#ifndef DISABLE_TLB
57871462 3614 if((source[i]&0x3f)==0x01) // TLBR
3615 emit_call((int)TLBR);
3616 if((source[i]&0x3f)==0x02) // TLBWI
3617 emit_call((int)TLBWI_new);
3618 if((source[i]&0x3f)==0x06) { // TLBWR
3619 // The TLB entry written by TLBWR is dependent on the count,
3620 // so update the cycle count
3621 emit_readword((int)&last_count,ECX);
3622 if(i_regs->regmap[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
3623 emit_add(HOST_CCREG,ECX,HOST_CCREG);
3624 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG);
3625 emit_writeword(HOST_CCREG,(int)&Count);
3626 emit_call((int)TLBWR_new);
3627 }
3628 if((source[i]&0x3f)==0x08) // TLBP
3629 emit_call((int)TLBP);
3d624f89 3630#endif
576bbd8f 3631#ifdef PCSX
3632 if((source[i]&0x3f)==0x10) // RFE
3633 {
3634 emit_readword((int)&Status,0);
3635 emit_andimm(0,0x3c,1);
3636 emit_andimm(0,~0xf,0);
3637 emit_orrshr_imm(1,2,0);
3638 emit_writeword(0,(int)&Status);
3639 }
3640#else
57871462 3641 if((source[i]&0x3f)==0x18) // ERET
3642 {
3643 int count=ccadj[i];
3644 if(i_regs->regmap[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
3645 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*count,HOST_CCREG); // TODO: Should there be an extra cycle here?
3646 emit_jmp((int)jump_eret);
3647 }
576bbd8f 3648#endif
57871462 3649 }
3650}
3651
b9b61529 3652static void cop2_get_dreg(u_int copr,signed char tl,signed char temp)
3653{
3654 switch (copr) {
3655 case 1:
3656 case 3:
3657 case 5:
3658 case 8:
3659 case 9:
3660 case 10:
3661 case 11:
3662 emit_readword((int)&reg_cop2d[copr],tl);
3663 emit_signextend16(tl,tl);
3664 emit_writeword(tl,(int)&reg_cop2d[copr]); // hmh
3665 break;
3666 case 7:
3667 case 16:
3668 case 17:
3669 case 18:
3670 case 19:
3671 emit_readword((int)&reg_cop2d[copr],tl);
3672 emit_andimm(tl,0xffff,tl);
3673 emit_writeword(tl,(int)&reg_cop2d[copr]);
3674 break;
3675 case 15:
3676 emit_readword((int)&reg_cop2d[14],tl); // SXY2
3677 emit_writeword(tl,(int)&reg_cop2d[copr]);
3678 break;
3679 case 28:
b9b61529 3680 case 29:
3681 emit_readword((int)&reg_cop2d[9],temp);
3682 emit_testimm(temp,0x8000); // do we need this?
3683 emit_andimm(temp,0xf80,temp);
3684 emit_andne_imm(temp,0,temp);
f70d384d 3685 emit_shrimm(temp,7,tl);
b9b61529 3686 emit_readword((int)&reg_cop2d[10],temp);
3687 emit_testimm(temp,0x8000);
3688 emit_andimm(temp,0xf80,temp);
3689 emit_andne_imm(temp,0,temp);
f70d384d 3690 emit_orrshr_imm(temp,2,tl);
b9b61529 3691 emit_readword((int)&reg_cop2d[11],temp);
3692 emit_testimm(temp,0x8000);
3693 emit_andimm(temp,0xf80,temp);
3694 emit_andne_imm(temp,0,temp);
f70d384d 3695 emit_orrshl_imm(temp,3,tl);
b9b61529 3696 emit_writeword(tl,(int)&reg_cop2d[copr]);
3697 break;
3698 default:
3699 emit_readword((int)&reg_cop2d[copr],tl);
3700 break;
3701 }
3702}
3703
3704static void cop2_put_dreg(u_int copr,signed char sl,signed char temp)
3705{
3706 switch (copr) {
3707 case 15:
3708 emit_readword((int)&reg_cop2d[13],temp); // SXY1
3709 emit_writeword(sl,(int)&reg_cop2d[copr]);
3710 emit_writeword(temp,(int)&reg_cop2d[12]); // SXY0
3711 emit_readword((int)&reg_cop2d[14],temp); // SXY2
3712 emit_writeword(sl,(int)&reg_cop2d[14]);
3713 emit_writeword(temp,(int)&reg_cop2d[13]); // SXY1
3714 break;
3715 case 28:
3716 emit_andimm(sl,0x001f,temp);
f70d384d 3717 emit_shlimm(temp,7,temp);
b9b61529 3718 emit_writeword(temp,(int)&reg_cop2d[9]);
3719 emit_andimm(sl,0x03e0,temp);
f70d384d 3720 emit_shlimm(temp,2,temp);
b9b61529 3721 emit_writeword(temp,(int)&reg_cop2d[10]);
3722 emit_andimm(sl,0x7c00,temp);
f70d384d 3723 emit_shrimm(temp,3,temp);
b9b61529 3724 emit_writeword(temp,(int)&reg_cop2d[11]);
3725 emit_writeword(sl,(int)&reg_cop2d[28]);
3726 break;
3727 case 30:
3728 emit_movs(sl,temp);
3729 emit_mvnmi(temp,temp);
3730 emit_clz(temp,temp);
3731 emit_writeword(sl,(int)&reg_cop2d[30]);
3732 emit_writeword(temp,(int)&reg_cop2d[31]);
3733 break;
b9b61529 3734 case 31:
3735 break;
3736 default:
3737 emit_writeword(sl,(int)&reg_cop2d[copr]);
3738 break;
3739 }
3740}
3741
3742void cop2_assemble(int i,struct regstat *i_regs)
3743{
3744 u_int copr=(source[i]>>11)&0x1f;
3745 signed char temp=get_reg(i_regs->regmap,-1);
3746 if (opcode2[i]==0) { // MFC2
3747 signed char tl=get_reg(i_regs->regmap,rt1[i]);
f1b3b369 3748 if(tl>=0&&rt1[i]!=0)
b9b61529 3749 cop2_get_dreg(copr,tl,temp);
3750 }
3751 else if (opcode2[i]==4) { // MTC2
3752 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3753 cop2_put_dreg(copr,sl,temp);
3754 }
3755 else if (opcode2[i]==2) // CFC2
3756 {
3757 signed char tl=get_reg(i_regs->regmap,rt1[i]);
f1b3b369 3758 if(tl>=0&&rt1[i]!=0)
b9b61529 3759 emit_readword((int)&reg_cop2c[copr],tl);
3760 }
3761 else if (opcode2[i]==6) // CTC2
3762 {
3763 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3764 switch(copr) {
3765 case 4:
3766 case 12:
3767 case 20:
3768 case 26:
3769 case 27:
3770 case 29:
3771 case 30:
3772 emit_signextend16(sl,temp);
3773 break;
3774 case 31:
3775 //value = value & 0x7ffff000;
3776 //if (value & 0x7f87e000) value |= 0x80000000;
3777 emit_shrimm(sl,12,temp);
3778 emit_shlimm(temp,12,temp);
3779 emit_testimm(temp,0x7f000000);
3780 emit_testeqimm(temp,0x00870000);
3781 emit_testeqimm(temp,0x0000e000);
3782 emit_orrne_imm(temp,0x80000000,temp);
3783 break;
3784 default:
3785 temp=sl;
3786 break;
3787 }
3788 emit_writeword(temp,(int)&reg_cop2c[copr]);
3789 assert(sl>=0);
3790 }
3791}
3792
3793void c2op_assemble(int i,struct regstat *i_regs)
3794{
3795 signed char temp=get_reg(i_regs->regmap,-1);
3796 u_int c2op=source[i]&0x3f;
3797 u_int hr,reglist=0;
3798 for(hr=0;hr<HOST_REGS;hr++) {
3799 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
3800 }
3801 if(i==0||itype[i-1]!=C2OP)
3802 save_regs(reglist);
3803
3804 if (gte_handlers[c2op]!=NULL) {
3805 int cc=get_reg(i_regs->regmap,CCREG);
009faf24 3806 emit_movimm(source[i],1); // opcode
b9b61529 3807 if (cc>=0&&gte_cycletab[c2op])
009faf24 3808 emit_addimm(cc,gte_cycletab[c2op]/2,cc); // XXX: could just adjust ccadj?
3809 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0); // cop2 regs
3810 emit_writeword(1,(int)&psxRegs.code);
b9b61529 3811 emit_call((int)gte_handlers[c2op]);
3812 }
3813
3814 if(i>=slen-1||itype[i+1]!=C2OP)
3815 restore_regs(reglist);
3816}
3817
3818void cop1_unusable(int i,struct regstat *i_regs)
3d624f89 3819{
3820 // XXX: should just just do the exception instead
3821 if(!cop1_usable) {
3822 int jaddr=(int)out;
3823 emit_jmp(0);
3824 add_stub(FP_STUB,jaddr,(int)out,i,0,(int)i_regs,is_delayslot,0);
3825 cop1_usable=1;
3826 }
3827}
3828
57871462 3829void cop1_assemble(int i,struct regstat *i_regs)
3830{
3d624f89 3831#ifndef DISABLE_COP1
57871462 3832 // Check cop1 unusable
3833 if(!cop1_usable) {
3834 signed char rs=get_reg(i_regs->regmap,CSREG);
3835 assert(rs>=0);
3836 emit_testimm(rs,0x20000000);
3837 int jaddr=(int)out;
3838 emit_jeq(0);
3839 add_stub(FP_STUB,jaddr,(int)out,i,rs,(int)i_regs,is_delayslot,0);
3840 cop1_usable=1;
3841 }
3842 if (opcode2[i]==0) { // MFC1
3843 signed char tl=get_reg(i_regs->regmap,rt1[i]);
3844 if(tl>=0) {
3845 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],tl);
3846 emit_readword_indexed(0,tl,tl);
3847 }
3848 }
3849 else if (opcode2[i]==1) { // DMFC1
3850 signed char tl=get_reg(i_regs->regmap,rt1[i]);
3851 signed char th=get_reg(i_regs->regmap,rt1[i]|64);
3852 if(tl>=0) {
3853 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],tl);
3854 if(th>=0) emit_readword_indexed(4,tl,th);
3855 emit_readword_indexed(0,tl,tl);
3856 }
3857 }
3858 else if (opcode2[i]==4) { // MTC1
3859 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3860 signed char temp=get_reg(i_regs->regmap,-1);
3861 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
3862 emit_writeword_indexed(sl,0,temp);
3863 }
3864 else if (opcode2[i]==5) { // DMTC1
3865 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3866 signed char sh=rs1[i]>0?get_reg(i_regs->regmap,rs1[i]|64):sl;
3867 signed char temp=get_reg(i_regs->regmap,-1);
3868 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
3869 emit_writeword_indexed(sh,4,temp);
3870 emit_writeword_indexed(sl,0,temp);
3871 }
3872 else if (opcode2[i]==2) // CFC1
3873 {
3874 signed char tl=get_reg(i_regs->regmap,rt1[i]);
3875 if(tl>=0) {
3876 u_int copr=(source[i]>>11)&0x1f;
3877 if(copr==0) emit_readword((int)&FCR0,tl);
3878 if(copr==31) emit_readword((int)&FCR31,tl);
3879 }
3880 }
3881 else if (opcode2[i]==6) // CTC1
3882 {
3883 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3884 u_int copr=(source[i]>>11)&0x1f;
3885 assert(sl>=0);
3886 if(copr==31)
3887 {
3888 emit_writeword(sl,(int)&FCR31);
3889 // Set the rounding mode
3890 //FIXME
3891 //char temp=get_reg(i_regs->regmap,-1);
3892 //emit_andimm(sl,3,temp);
3893 //emit_fldcw_indexed((int)&rounding_modes,temp);
3894 }
3895 }
3d624f89 3896#else
3897 cop1_unusable(i, i_regs);
3898#endif
57871462 3899}
3900
3901void fconv_assemble_arm(int i,struct regstat *i_regs)
3902{
3d624f89 3903#ifndef DISABLE_COP1
57871462 3904 signed char temp=get_reg(i_regs->regmap,-1);
3905 assert(temp>=0);
3906 // Check cop1 unusable
3907 if(!cop1_usable) {
3908 signed char rs=get_reg(i_regs->regmap,CSREG);
3909 assert(rs>=0);
3910 emit_testimm(rs,0x20000000);
3911 int jaddr=(int)out;
3912 emit_jeq(0);
3913 add_stub(FP_STUB,jaddr,(int)out,i,rs,(int)i_regs,is_delayslot,0);
3914 cop1_usable=1;
3915 }
3916
3917 #if(defined(__VFP_FP__) && !defined(__SOFTFP__))
3918 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0d) { // trunc_w_s
3919 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
3920 emit_flds(temp,15);
3921 emit_ftosizs(15,15); // float->int, truncate
3922 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f))
3923 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
3924 emit_fsts(15,temp);
3925 return;
3926 }
3927 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0d) { // trunc_w_d
3928 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
3929 emit_vldr(temp,7);
3930 emit_ftosizd(7,13); // double->int, truncate
3931 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
3932 emit_fsts(13,temp);
3933 return;
3934 }
3935
3936 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x20) { // cvt_s_w
3937 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
3938 emit_flds(temp,13);
3939 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f))
3940 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
3941 emit_fsitos(13,15);
3942 emit_fsts(15,temp);
3943 return;
3944 }
3945 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x21) { // cvt_d_w
3946 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
3947 emit_flds(temp,13);
3948 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
3949 emit_fsitod(13,7);
3950 emit_vstr(7,temp);
3951 return;
3952 }
3953
3954 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x21) { // cvt_d_s
3955 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
3956 emit_flds(temp,13);
3957 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
3958 emit_fcvtds(13,7);
3959 emit_vstr(7,temp);
3960 return;
3961 }
3962 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x20) { // cvt_s_d
3963 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
3964 emit_vldr(temp,7);
3965 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
3966 emit_fcvtsd(7,13);
3967 emit_fsts(13,temp);
3968 return;
3969 }
3970 #endif
3971
3972 // C emulation code
3973
3974 u_int hr,reglist=0;
3975 for(hr=0;hr<HOST_REGS;hr++) {
3976 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
3977 }
3978 save_regs(reglist);
3979
3980 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x20) {
3981 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3982 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3983 emit_call((int)cvt_s_w);
3984 }
3985 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x21) {
3986 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3987 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3988 emit_call((int)cvt_d_w);
3989 }
3990 if(opcode2[i]==0x15&&(source[i]&0x3f)==0x20) {
3991 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3992 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3993 emit_call((int)cvt_s_l);
3994 }
3995 if(opcode2[i]==0x15&&(source[i]&0x3f)==0x21) {
3996 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3997 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3998 emit_call((int)cvt_d_l);
3999 }
4000
4001 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x21) {
4002 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4003 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4004 emit_call((int)cvt_d_s);
4005 }
4006 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x24) {
4007 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4008 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4009 emit_call((int)cvt_w_s);
4010 }
4011 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x25) {
4012 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4013 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4014 emit_call((int)cvt_l_s);
4015 }
4016
4017 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x20) {
4018 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4019 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4020 emit_call((int)cvt_s_d);
4021 }
4022 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x24) {
4023 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4024 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4025 emit_call((int)cvt_w_d);
4026 }
4027 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x25) {
4028 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4029 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4030 emit_call((int)cvt_l_d);
4031 }
4032
4033 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x08) {
4034 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4035 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4036 emit_call((int)round_l_s);
4037 }
4038 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x09) {
4039 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4040 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4041 emit_call((int)trunc_l_s);
4042 }
4043 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0a) {
4044 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4045 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4046 emit_call((int)ceil_l_s);
4047 }
4048 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0b) {
4049 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4050 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4051 emit_call((int)floor_l_s);
4052 }
4053 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0c) {
4054 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4055 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4056 emit_call((int)round_w_s);
4057 }
4058 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0d) {
4059 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4060 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4061 emit_call((int)trunc_w_s);
4062 }
4063 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0e) {
4064 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4065 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4066 emit_call((int)ceil_w_s);
4067 }
4068 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0f) {
4069 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4070 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4071 emit_call((int)floor_w_s);
4072 }
4073
4074 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x08) {
4075 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4076 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4077 emit_call((int)round_l_d);
4078 }
4079 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x09) {
4080 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4081 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4082 emit_call((int)trunc_l_d);
4083 }
4084 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0a) {
4085 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4086 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4087 emit_call((int)ceil_l_d);
4088 }
4089 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0b) {
4090 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4091 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4092 emit_call((int)floor_l_d);
4093 }
4094 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0c) {
4095 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4096 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4097 emit_call((int)round_w_d);
4098 }
4099 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0d) {
4100 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4101 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4102 emit_call((int)trunc_w_d);
4103 }
4104 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0e) {
4105 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4106 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4107 emit_call((int)ceil_w_d);
4108 }
4109 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0f) {
4110 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4111 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4112 emit_call((int)floor_w_d);
4113 }
4114
4115 restore_regs(reglist);
3d624f89 4116#else
4117 cop1_unusable(i, i_regs);
4118#endif
57871462 4119}
4120#define fconv_assemble fconv_assemble_arm
4121
4122void fcomp_assemble(int i,struct regstat *i_regs)
4123{
3d624f89 4124#ifndef DISABLE_COP1
57871462 4125 signed char fs=get_reg(i_regs->regmap,FSREG);
4126 signed char temp=get_reg(i_regs->regmap,-1);
4127 assert(temp>=0);
4128 // Check cop1 unusable
4129 if(!cop1_usable) {
4130 signed char cs=get_reg(i_regs->regmap,CSREG);
4131 assert(cs>=0);
4132 emit_testimm(cs,0x20000000);
4133 int jaddr=(int)out;
4134 emit_jeq(0);
4135 add_stub(FP_STUB,jaddr,(int)out,i,cs,(int)i_regs,is_delayslot,0);
4136 cop1_usable=1;
4137 }
4138
4139 if((source[i]&0x3f)==0x30) {
4140 emit_andimm(fs,~0x800000,fs);
4141 return;
4142 }
4143
4144 if((source[i]&0x3e)==0x38) {
4145 // sf/ngle - these should throw exceptions for NaNs
4146 emit_andimm(fs,~0x800000,fs);
4147 return;
4148 }
4149
4150 #if(defined(__VFP_FP__) && !defined(__SOFTFP__))
4151 if(opcode2[i]==0x10) {
4152 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4153 emit_readword((int)&reg_cop1_simple[(source[i]>>16)&0x1f],HOST_TEMPREG);
4154 emit_orimm(fs,0x800000,fs);
4155 emit_flds(temp,14);
4156 emit_flds(HOST_TEMPREG,15);
4157 emit_fcmps(14,15);
4158 emit_fmstat();
4159 if((source[i]&0x3f)==0x31) emit_bicvc_imm(fs,0x800000,fs); // c_un_s
4160 if((source[i]&0x3f)==0x32) emit_bicne_imm(fs,0x800000,fs); // c_eq_s
4161 if((source[i]&0x3f)==0x33) {emit_bicne_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ueq_s
4162 if((source[i]&0x3f)==0x34) emit_biccs_imm(fs,0x800000,fs); // c_olt_s
4163 if((source[i]&0x3f)==0x35) {emit_biccs_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ult_s
4164 if((source[i]&0x3f)==0x36) emit_bichi_imm(fs,0x800000,fs); // c_ole_s
4165 if((source[i]&0x3f)==0x37) {emit_bichi_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ule_s
4166 if((source[i]&0x3f)==0x3a) emit_bicne_imm(fs,0x800000,fs); // c_seq_s
4167 if((source[i]&0x3f)==0x3b) emit_bicne_imm(fs,0x800000,fs); // c_ngl_s
4168 if((source[i]&0x3f)==0x3c) emit_biccs_imm(fs,0x800000,fs); // c_lt_s
4169 if((source[i]&0x3f)==0x3d) emit_biccs_imm(fs,0x800000,fs); // c_nge_s
4170 if((source[i]&0x3f)==0x3e) emit_bichi_imm(fs,0x800000,fs); // c_le_s
4171 if((source[i]&0x3f)==0x3f) emit_bichi_imm(fs,0x800000,fs); // c_ngt_s
4172 return;
4173 }
4174 if(opcode2[i]==0x11) {
4175 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
4176 emit_readword((int)&reg_cop1_double[(source[i]>>16)&0x1f],HOST_TEMPREG);
4177 emit_orimm(fs,0x800000,fs);
4178 emit_vldr(temp,6);
4179 emit_vldr(HOST_TEMPREG,7);
4180 emit_fcmpd(6,7);
4181 emit_fmstat();
4182 if((source[i]&0x3f)==0x31) emit_bicvc_imm(fs,0x800000,fs); // c_un_d
4183 if((source[i]&0x3f)==0x32) emit_bicne_imm(fs,0x800000,fs); // c_eq_d
4184 if((source[i]&0x3f)==0x33) {emit_bicne_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ueq_d
4185 if((source[i]&0x3f)==0x34) emit_biccs_imm(fs,0x800000,fs); // c_olt_d
4186 if((source[i]&0x3f)==0x35) {emit_biccs_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ult_d
4187 if((source[i]&0x3f)==0x36) emit_bichi_imm(fs,0x800000,fs); // c_ole_d
4188 if((source[i]&0x3f)==0x37) {emit_bichi_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ule_d
4189 if((source[i]&0x3f)==0x3a) emit_bicne_imm(fs,0x800000,fs); // c_seq_d
4190 if((source[i]&0x3f)==0x3b) emit_bicne_imm(fs,0x800000,fs); // c_ngl_d
4191 if((source[i]&0x3f)==0x3c) emit_biccs_imm(fs,0x800000,fs); // c_lt_d
4192 if((source[i]&0x3f)==0x3d) emit_biccs_imm(fs,0x800000,fs); // c_nge_d
4193 if((source[i]&0x3f)==0x3e) emit_bichi_imm(fs,0x800000,fs); // c_le_d
4194 if((source[i]&0x3f)==0x3f) emit_bichi_imm(fs,0x800000,fs); // c_ngt_d
4195 return;
4196 }
4197 #endif
4198
4199 // C only
4200
4201 u_int hr,reglist=0;
4202 for(hr=0;hr<HOST_REGS;hr++) {
4203 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
4204 }
4205 reglist&=~(1<<fs);
4206 save_regs(reglist);
4207 if(opcode2[i]==0x10) {
4208 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4209 emit_readword((int)&reg_cop1_simple[(source[i]>>16)&0x1f],ARG2_REG);
4210 if((source[i]&0x3f)==0x30) emit_call((int)c_f_s);
4211 if((source[i]&0x3f)==0x31) emit_call((int)c_un_s);
4212 if((source[i]&0x3f)==0x32) emit_call((int)c_eq_s);
4213 if((source[i]&0x3f)==0x33) emit_call((int)c_ueq_s);
4214 if((source[i]&0x3f)==0x34) emit_call((int)c_olt_s);
4215 if((source[i]&0x3f)==0x35) emit_call((int)c_ult_s);
4216 if((source[i]&0x3f)==0x36) emit_call((int)c_ole_s);
4217 if((source[i]&0x3f)==0x37) emit_call((int)c_ule_s);
4218 if((source[i]&0x3f)==0x38) emit_call((int)c_sf_s);
4219 if((source[i]&0x3f)==0x39) emit_call((int)c_ngle_s);
4220 if((source[i]&0x3f)==0x3a) emit_call((int)c_seq_s);
4221 if((source[i]&0x3f)==0x3b) emit_call((int)c_ngl_s);
4222 if((source[i]&0x3f)==0x3c) emit_call((int)c_lt_s);
4223 if((source[i]&0x3f)==0x3d) emit_call((int)c_nge_s);
4224 if((source[i]&0x3f)==0x3e) emit_call((int)c_le_s);
4225 if((source[i]&0x3f)==0x3f) emit_call((int)c_ngt_s);
4226 }
4227 if(opcode2[i]==0x11) {
4228 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4229 emit_readword((int)&reg_cop1_double[(source[i]>>16)&0x1f],ARG2_REG);
4230 if((source[i]&0x3f)==0x30) emit_call((int)c_f_d);
4231 if((source[i]&0x3f)==0x31) emit_call((int)c_un_d);
4232 if((source[i]&0x3f)==0x32) emit_call((int)c_eq_d);
4233 if((source[i]&0x3f)==0x33) emit_call((int)c_ueq_d);
4234 if((source[i]&0x3f)==0x34) emit_call((int)c_olt_d);
4235 if((source[i]&0x3f)==0x35) emit_call((int)c_ult_d);
4236 if((source[i]&0x3f)==0x36) emit_call((int)c_ole_d);
4237 if((source[i]&0x3f)==0x37) emit_call((int)c_ule_d);
4238 if((source[i]&0x3f)==0x38) emit_call((int)c_sf_d);
4239 if((source[i]&0x3f)==0x39) emit_call((int)c_ngle_d);
4240 if((source[i]&0x3f)==0x3a) emit_call((int)c_seq_d);
4241 if((source[i]&0x3f)==0x3b) emit_call((int)c_ngl_d);
4242 if((source[i]&0x3f)==0x3c) emit_call((int)c_lt_d);
4243 if((source[i]&0x3f)==0x3d) emit_call((int)c_nge_d);
4244 if((source[i]&0x3f)==0x3e) emit_call((int)c_le_d);
4245 if((source[i]&0x3f)==0x3f) emit_call((int)c_ngt_d);
4246 }
4247 restore_regs(reglist);
4248 emit_loadreg(FSREG,fs);
3d624f89 4249#else
4250 cop1_unusable(i, i_regs);
4251#endif
57871462 4252}
4253
4254void float_assemble(int i,struct regstat *i_regs)
4255{
3d624f89 4256#ifndef DISABLE_COP1
57871462 4257 signed char temp=get_reg(i_regs->regmap,-1);
4258 assert(temp>=0);
4259 // Check cop1 unusable
4260 if(!cop1_usable) {
4261 signed char cs=get_reg(i_regs->regmap,CSREG);
4262 assert(cs>=0);
4263 emit_testimm(cs,0x20000000);
4264 int jaddr=(int)out;
4265 emit_jeq(0);
4266 add_stub(FP_STUB,jaddr,(int)out,i,cs,(int)i_regs,is_delayslot,0);
4267 cop1_usable=1;
4268 }
4269
4270 #if(defined(__VFP_FP__) && !defined(__SOFTFP__))
4271 if((source[i]&0x3f)==6) // mov
4272 {
4273 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4274 if(opcode2[i]==0x10) {
4275 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4276 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],HOST_TEMPREG);
4277 emit_readword_indexed(0,temp,temp);
4278 emit_writeword_indexed(temp,0,HOST_TEMPREG);
4279 }
4280 if(opcode2[i]==0x11) {
4281 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
4282 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],HOST_TEMPREG);
4283 emit_vldr(temp,7);
4284 emit_vstr(7,HOST_TEMPREG);
4285 }
4286 }
4287 return;
4288 }
4289
4290 if((source[i]&0x3f)>3)
4291 {
4292 if(opcode2[i]==0x10) {
4293 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4294 emit_flds(temp,15);
4295 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4296 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
4297 }
4298 if((source[i]&0x3f)==4) // sqrt
4299 emit_fsqrts(15,15);
4300 if((source[i]&0x3f)==5) // abs
4301 emit_fabss(15,15);
4302 if((source[i]&0x3f)==7) // neg
4303 emit_fnegs(15,15);
4304 emit_fsts(15,temp);
4305 }
4306 if(opcode2[i]==0x11) {
4307 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
4308 emit_vldr(temp,7);
4309 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4310 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
4311 }
4312 if((source[i]&0x3f)==4) // sqrt
4313 emit_fsqrtd(7,7);
4314 if((source[i]&0x3f)==5) // abs
4315 emit_fabsd(7,7);
4316 if((source[i]&0x3f)==7) // neg
4317 emit_fnegd(7,7);
4318 emit_vstr(7,temp);
4319 }
4320 return;
4321 }
4322 if((source[i]&0x3f)<4)
4323 {
4324 if(opcode2[i]==0x10) {
4325 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4326 }
4327 if(opcode2[i]==0x11) {
4328 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
4329 }
4330 if(((source[i]>>11)&0x1f)!=((source[i]>>16)&0x1f)) {
4331 if(opcode2[i]==0x10) {
4332 emit_readword((int)&reg_cop1_simple[(source[i]>>16)&0x1f],HOST_TEMPREG);
4333 emit_flds(temp,15);
4334 emit_flds(HOST_TEMPREG,13);
4335 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4336 if(((source[i]>>16)&0x1f)!=((source[i]>>6)&0x1f)) {
4337 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
4338 }
4339 }
4340 if((source[i]&0x3f)==0) emit_fadds(15,13,15);
4341 if((source[i]&0x3f)==1) emit_fsubs(15,13,15);
4342 if((source[i]&0x3f)==2) emit_fmuls(15,13,15);
4343 if((source[i]&0x3f)==3) emit_fdivs(15,13,15);
4344 if(((source[i]>>16)&0x1f)==((source[i]>>6)&0x1f)) {
4345 emit_fsts(15,HOST_TEMPREG);
4346 }else{
4347 emit_fsts(15,temp);
4348 }
4349 }
4350 else if(opcode2[i]==0x11) {
4351 emit_readword((int)&reg_cop1_double[(source[i]>>16)&0x1f],HOST_TEMPREG);
4352 emit_vldr(temp,7);
4353 emit_vldr(HOST_TEMPREG,6);
4354 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4355 if(((source[i]>>16)&0x1f)!=((source[i]>>6)&0x1f)) {
4356 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
4357 }
4358 }
4359 if((source[i]&0x3f)==0) emit_faddd(7,6,7);
4360 if((source[i]&0x3f)==1) emit_fsubd(7,6,7);
4361 if((source[i]&0x3f)==2) emit_fmuld(7,6,7);
4362 if((source[i]&0x3f)==3) emit_fdivd(7,6,7);
4363 if(((source[i]>>16)&0x1f)==((source[i]>>6)&0x1f)) {
4364 emit_vstr(7,HOST_TEMPREG);
4365 }else{
4366 emit_vstr(7,temp);
4367 }
4368 }
4369 }
4370 else {
4371 if(opcode2[i]==0x10) {
4372 emit_flds(temp,15);
4373 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4374 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
4375 }
4376 if((source[i]&0x3f)==0) emit_fadds(15,15,15);
4377 if((source[i]&0x3f)==1) emit_fsubs(15,15,15);
4378 if((source[i]&0x3f)==2) emit_fmuls(15,15,15);
4379 if((source[i]&0x3f)==3) emit_fdivs(15,15,15);
4380 emit_fsts(15,temp);
4381 }
4382 else if(opcode2[i]==0x11) {
4383 emit_vldr(temp,7);
4384 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4385 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
4386 }
4387 if((source[i]&0x3f)==0) emit_faddd(7,7,7);
4388 if((source[i]&0x3f)==1) emit_fsubd(7,7,7);
4389 if((source[i]&0x3f)==2) emit_fmuld(7,7,7);
4390 if((source[i]&0x3f)==3) emit_fdivd(7,7,7);
4391 emit_vstr(7,temp);
4392 }
4393 }
4394 return;
4395 }
4396 #endif
4397
4398 u_int hr,reglist=0;
4399 for(hr=0;hr<HOST_REGS;hr++) {
4400 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
4401 }
4402 if(opcode2[i]==0x10) { // Single precision
4403 save_regs(reglist);
4404 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4405 if((source[i]&0x3f)<4) {
4406 emit_readword((int)&reg_cop1_simple[(source[i]>>16)&0x1f],ARG2_REG);
4407 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG3_REG);
4408 }else{
4409 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4410 }
4411 switch(source[i]&0x3f)
4412 {
4413 case 0x00: emit_call((int)add_s);break;
4414 case 0x01: emit_call((int)sub_s);break;
4415 case 0x02: emit_call((int)mul_s);break;
4416 case 0x03: emit_call((int)div_s);break;
4417 case 0x04: emit_call((int)sqrt_s);break;
4418 case 0x05: emit_call((int)abs_s);break;
4419 case 0x06: emit_call((int)mov_s);break;
4420 case 0x07: emit_call((int)neg_s);break;
4421 }
4422 restore_regs(reglist);
4423 }
4424 if(opcode2[i]==0x11) { // Double precision
4425 save_regs(reglist);
4426 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4427 if((source[i]&0x3f)<4) {
4428 emit_readword((int)&reg_cop1_double[(source[i]>>16)&0x1f],ARG2_REG);
4429 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG3_REG);
4430 }else{
4431 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4432 }
4433 switch(source[i]&0x3f)
4434 {
4435 case 0x00: emit_call((int)add_d);break;
4436 case 0x01: emit_call((int)sub_d);break;
4437 case 0x02: emit_call((int)mul_d);break;
4438 case 0x03: emit_call((int)div_d);break;
4439 case 0x04: emit_call((int)sqrt_d);break;
4440 case 0x05: emit_call((int)abs_d);break;
4441 case 0x06: emit_call((int)mov_d);break;
4442 case 0x07: emit_call((int)neg_d);break;
4443 }
4444 restore_regs(reglist);
4445 }
3d624f89 4446#else
4447 cop1_unusable(i, i_regs);
4448#endif
57871462 4449}
4450
4451void multdiv_assemble_arm(int i,struct regstat *i_regs)
4452{
4453 // case 0x18: MULT
4454 // case 0x19: MULTU
4455 // case 0x1A: DIV
4456 // case 0x1B: DIVU
4457 // case 0x1C: DMULT
4458 // case 0x1D: DMULTU
4459 // case 0x1E: DDIV
4460 // case 0x1F: DDIVU
4461 if(rs1[i]&&rs2[i])
4462 {
4463 if((opcode2[i]&4)==0) // 32-bit
4464 {
4465 if(opcode2[i]==0x18) // MULT
4466 {
4467 signed char m1=get_reg(i_regs->regmap,rs1[i]);
4468 signed char m2=get_reg(i_regs->regmap,rs2[i]);
4469 signed char hi=get_reg(i_regs->regmap,HIREG);
4470 signed char lo=get_reg(i_regs->regmap,LOREG);
4471 assert(m1>=0);
4472 assert(m2>=0);
4473 assert(hi>=0);
4474 assert(lo>=0);
4475 emit_smull(m1,m2,hi,lo);
4476 }
4477 if(opcode2[i]==0x19) // MULTU
4478 {
4479 signed char m1=get_reg(i_regs->regmap,rs1[i]);
4480 signed char m2=get_reg(i_regs->regmap,rs2[i]);
4481 signed char hi=get_reg(i_regs->regmap,HIREG);
4482 signed char lo=get_reg(i_regs->regmap,LOREG);
4483 assert(m1>=0);
4484 assert(m2>=0);
4485 assert(hi>=0);
4486 assert(lo>=0);
4487 emit_umull(m1,m2,hi,lo);
4488 }
4489 if(opcode2[i]==0x1A) // DIV
4490 {
4491 signed char d1=get_reg(i_regs->regmap,rs1[i]);
4492 signed char d2=get_reg(i_regs->regmap,rs2[i]);
4493 assert(d1>=0);
4494 assert(d2>=0);
4495 signed char quotient=get_reg(i_regs->regmap,LOREG);
4496 signed char remainder=get_reg(i_regs->regmap,HIREG);
4497 assert(quotient>=0);
4498 assert(remainder>=0);
4499 emit_movs(d1,remainder);
44a80f6a 4500 emit_movimm(0xffffffff,quotient);
4501 emit_negmi(quotient,quotient); // .. quotient and ..
4502 emit_negmi(remainder,remainder); // .. remainder for div0 case (will be negated back after jump)
57871462 4503 emit_movs(d2,HOST_TEMPREG);
4504 emit_jeq((int)out+52); // Division by zero
4505 emit_negmi(HOST_TEMPREG,HOST_TEMPREG);
4506 emit_clz(HOST_TEMPREG,quotient);
4507 emit_shl(HOST_TEMPREG,quotient,HOST_TEMPREG);
4508 emit_orimm(quotient,1<<31,quotient);
4509 emit_shr(quotient,quotient,quotient);
4510 emit_cmp(remainder,HOST_TEMPREG);
4511 emit_subcs(remainder,HOST_TEMPREG,remainder);
4512 emit_adcs(quotient,quotient,quotient);
4513 emit_shrimm(HOST_TEMPREG,1,HOST_TEMPREG);
4514 emit_jcc((int)out-16); // -4
4515 emit_teq(d1,d2);
4516 emit_negmi(quotient,quotient);
4517 emit_test(d1,d1);
4518 emit_negmi(remainder,remainder);
4519 }
4520 if(opcode2[i]==0x1B) // DIVU
4521 {
4522 signed char d1=get_reg(i_regs->regmap,rs1[i]); // dividend
4523 signed char d2=get_reg(i_regs->regmap,rs2[i]); // divisor
4524 assert(d1>=0);
4525 assert(d2>=0);
4526 signed char quotient=get_reg(i_regs->regmap,LOREG);
4527 signed char remainder=get_reg(i_regs->regmap,HIREG);
4528 assert(quotient>=0);
4529 assert(remainder>=0);
44a80f6a 4530 emit_mov(d1,remainder);
4531 emit_movimm(0xffffffff,quotient); // div0 case
57871462 4532 emit_test(d2,d2);
44a80f6a 4533 emit_jeq((int)out+40); // Division by zero
57871462 4534 emit_clz(d2,HOST_TEMPREG);
4535 emit_movimm(1<<31,quotient);
4536 emit_shl(d2,HOST_TEMPREG,d2);
57871462 4537 emit_shr(quotient,HOST_TEMPREG,quotient);
4538 emit_cmp(remainder,d2);
4539 emit_subcs(remainder,d2,remainder);
4540 emit_adcs(quotient,quotient,quotient);
4541 emit_shrcc_imm(d2,1,d2);
4542 emit_jcc((int)out-16); // -4
4543 }
4544 }
4545 else // 64-bit
4546 {
4547 if(opcode2[i]==0x1C) // DMULT
4548 {
4549 assert(opcode2[i]!=0x1C);
4550 signed char m1h=get_reg(i_regs->regmap,rs1[i]|64);
4551 signed char m1l=get_reg(i_regs->regmap,rs1[i]);
4552 signed char m2h=get_reg(i_regs->regmap,rs2[i]|64);
4553 signed char m2l=get_reg(i_regs->regmap,rs2[i]);
4554 assert(m1h>=0);
4555 assert(m2h>=0);
4556 assert(m1l>=0);
4557 assert(m2l>=0);
4558 emit_pushreg(m2h);
4559 emit_pushreg(m2l);
4560 emit_pushreg(m1h);
4561 emit_pushreg(m1l);
4562 emit_call((int)&mult64);
4563 emit_popreg(m1l);
4564 emit_popreg(m1h);
4565 emit_popreg(m2l);
4566 emit_popreg(m2h);
4567 signed char hih=get_reg(i_regs->regmap,HIREG|64);
4568 signed char hil=get_reg(i_regs->regmap,HIREG);
4569 if(hih>=0) emit_loadreg(HIREG|64,hih);
4570 if(hil>=0) emit_loadreg(HIREG,hil);
4571 signed char loh=get_reg(i_regs->regmap,LOREG|64);
4572 signed char lol=get_reg(i_regs->regmap,LOREG);
4573 if(loh>=0) emit_loadreg(LOREG|64,loh);
4574 if(lol>=0) emit_loadreg(LOREG,lol);
4575 }
4576 if(opcode2[i]==0x1D) // DMULTU
4577 {
4578 signed char m1h=get_reg(i_regs->regmap,rs1[i]|64);
4579 signed char m1l=get_reg(i_regs->regmap,rs1[i]);
4580 signed char m2h=get_reg(i_regs->regmap,rs2[i]|64);
4581 signed char m2l=get_reg(i_regs->regmap,rs2[i]);
4582 assert(m1h>=0);
4583 assert(m2h>=0);
4584 assert(m1l>=0);
4585 assert(m2l>=0);
4586 save_regs(0x100f);
4587 if(m1l!=0) emit_mov(m1l,0);
4588 if(m1h==0) emit_readword((int)&dynarec_local,1);
4589 else if(m1h>1) emit_mov(m1h,1);
4590 if(m2l<2) emit_readword((int)&dynarec_local+m2l*4,2);
4591 else if(m2l>2) emit_mov(m2l,2);
4592 if(m2h<3) emit_readword((int)&dynarec_local+m2h*4,3);
4593 else if(m2h>3) emit_mov(m2h,3);
4594 emit_call((int)&multu64);
4595 restore_regs(0x100f);
4596 signed char hih=get_reg(i_regs->regmap,HIREG|64);
4597 signed char hil=get_reg(i_regs->regmap,HIREG);
4598 signed char loh=get_reg(i_regs->regmap,LOREG|64);
4599 signed char lol=get_reg(i_regs->regmap,LOREG);
4600 /*signed char temp=get_reg(i_regs->regmap,-1);
4601 signed char rh=get_reg(i_regs->regmap,HIREG|64);
4602 signed char rl=get_reg(i_regs->regmap,HIREG);
4603 assert(m1h>=0);
4604 assert(m2h>=0);
4605 assert(m1l>=0);
4606 assert(m2l>=0);
4607 assert(temp>=0);
4608 //emit_mov(m1l,EAX);
4609 //emit_mul(m2l);
4610 emit_umull(rl,rh,m1l,m2l);
4611 emit_storereg(LOREG,rl);
4612 emit_mov(rh,temp);
4613 //emit_mov(m1h,EAX);
4614 //emit_mul(m2l);
4615 emit_umull(rl,rh,m1h,m2l);
4616 emit_adds(rl,temp,temp);
4617 emit_adcimm(rh,0,rh);
4618 emit_storereg(HIREG,rh);
4619 //emit_mov(m2h,EAX);
4620 //emit_mul(m1l);
4621 emit_umull(rl,rh,m1l,m2h);
4622 emit_adds(rl,temp,temp);
4623 emit_adcimm(rh,0,rh);
4624 emit_storereg(LOREG|64,temp);
4625 emit_mov(rh,temp);
4626 //emit_mov(m2h,EAX);
4627 //emit_mul(m1h);
4628 emit_umull(rl,rh,m1h,m2h);
4629 emit_adds(rl,temp,rl);
4630 emit_loadreg(HIREG,temp);
4631 emit_adcimm(rh,0,rh);
4632 emit_adds(rl,temp,rl);
4633 emit_adcimm(rh,0,rh);
4634 // DEBUG
4635 /*
4636 emit_pushreg(m2h);
4637 emit_pushreg(m2l);
4638 emit_pushreg(m1h);
4639 emit_pushreg(m1l);
4640 emit_call((int)&multu64);
4641 emit_popreg(m1l);
4642 emit_popreg(m1h);
4643 emit_popreg(m2l);
4644 emit_popreg(m2h);
4645 signed char hih=get_reg(i_regs->regmap,HIREG|64);
4646 signed char hil=get_reg(i_regs->regmap,HIREG);
4647 if(hih>=0) emit_loadreg(HIREG|64,hih); // DEBUG
4648 if(hil>=0) emit_loadreg(HIREG,hil); // DEBUG
4649 */
4650 // Shouldn't be necessary
4651 //char loh=get_reg(i_regs->regmap,LOREG|64);
4652 //char lol=get_reg(i_regs->regmap,LOREG);
4653 //if(loh>=0) emit_loadreg(LOREG|64,loh);
4654 //if(lol>=0) emit_loadreg(LOREG,lol);
4655 }
4656 if(opcode2[i]==0x1E) // DDIV
4657 {
4658 signed char d1h=get_reg(i_regs->regmap,rs1[i]|64);
4659 signed char d1l=get_reg(i_regs->regmap,rs1[i]);
4660 signed char d2h=get_reg(i_regs->regmap,rs2[i]|64);
4661 signed char d2l=get_reg(i_regs->regmap,rs2[i]);
4662 assert(d1h>=0);
4663 assert(d2h>=0);
4664 assert(d1l>=0);
4665 assert(d2l>=0);
4666 save_regs(0x100f);
4667 if(d1l!=0) emit_mov(d1l,0);
4668 if(d1h==0) emit_readword((int)&dynarec_local,1);
4669 else if(d1h>1) emit_mov(d1h,1);
4670 if(d2l<2) emit_readword((int)&dynarec_local+d2l*4,2);
4671 else if(d2l>2) emit_mov(d2l,2);
4672 if(d2h<3) emit_readword((int)&dynarec_local+d2h*4,3);
4673 else if(d2h>3) emit_mov(d2h,3);
4674 emit_call((int)&div64);
4675 restore_regs(0x100f);
4676 signed char hih=get_reg(i_regs->regmap,HIREG|64);
4677 signed char hil=get_reg(i_regs->regmap,HIREG);
4678 signed char loh=get_reg(i_regs->regmap,LOREG|64);
4679 signed char lol=get_reg(i_regs->regmap,LOREG);
4680 if(hih>=0) emit_loadreg(HIREG|64,hih);
4681 if(hil>=0) emit_loadreg(HIREG,hil);
4682 if(loh>=0) emit_loadreg(LOREG|64,loh);
4683 if(lol>=0) emit_loadreg(LOREG,lol);
4684 }
4685 if(opcode2[i]==0x1F) // DDIVU
4686 {
4687 //u_int hr,reglist=0;
4688 //for(hr=0;hr<HOST_REGS;hr++) {
4689 // if(i_regs->regmap[hr]>=0 && (i_regs->regmap[hr]&62)!=HIREG) reglist|=1<<hr;
4690 //}
4691 signed char d1h=get_reg(i_regs->regmap,rs1[i]|64);
4692 signed char d1l=get_reg(i_regs->regmap,rs1[i]);
4693 signed char d2h=get_reg(i_regs->regmap,rs2[i]|64);
4694 signed char d2l=get_reg(i_regs->regmap,rs2[i]);
4695 assert(d1h>=0);
4696 assert(d2h>=0);
4697 assert(d1l>=0);
4698 assert(d2l>=0);
4699 save_regs(0x100f);
4700 if(d1l!=0) emit_mov(d1l,0);
4701 if(d1h==0) emit_readword((int)&dynarec_local,1);
4702 else if(d1h>1) emit_mov(d1h,1);
4703 if(d2l<2) emit_readword((int)&dynarec_local+d2l*4,2);
4704 else if(d2l>2) emit_mov(d2l,2);
4705 if(d2h<3) emit_readword((int)&dynarec_local+d2h*4,3);
4706 else if(d2h>3) emit_mov(d2h,3);
4707 emit_call((int)&divu64);
4708 restore_regs(0x100f);
4709 signed char hih=get_reg(i_regs->regmap,HIREG|64);
4710 signed char hil=get_reg(i_regs->regmap,HIREG);
4711 signed char loh=get_reg(i_regs->regmap,LOREG|64);
4712 signed char lol=get_reg(i_regs->regmap,LOREG);
4713 if(hih>=0) emit_loadreg(HIREG|64,hih);
4714 if(hil>=0) emit_loadreg(HIREG,hil);
4715 if(loh>=0) emit_loadreg(LOREG|64,loh);
4716 if(lol>=0) emit_loadreg(LOREG,lol);
4717 }
4718 }
4719 }
4720 else
4721 {
4722 // Multiply by zero is zero.
4723 // MIPS does not have a divide by zero exception.
4724 // The result is undefined, we return zero.
4725 signed char hr=get_reg(i_regs->regmap,HIREG);
4726 signed char lr=get_reg(i_regs->regmap,LOREG);
4727 if(hr>=0) emit_zeroreg(hr);
4728 if(lr>=0) emit_zeroreg(lr);
4729 }
4730}
4731#define multdiv_assemble multdiv_assemble_arm
4732
4733void do_preload_rhash(int r) {
4734 // Don't need this for ARM. On x86, this puts the value 0xf8 into the
4735 // register. On ARM the hash can be done with a single instruction (below)
4736}
4737
4738void do_preload_rhtbl(int ht) {
4739 emit_addimm(FP,(int)&mini_ht-(int)&dynarec_local,ht);
4740}
4741
4742void do_rhash(int rs,int rh) {
4743 emit_andimm(rs,0xf8,rh);
4744}
4745
4746void do_miniht_load(int ht,int rh) {
4747 assem_debug("ldr %s,[%s,%s]!\n",regname[rh],regname[ht],regname[rh]);
4748 output_w32(0xe7b00000|rd_rn_rm(rh,ht,rh));
4749}
4750
4751void do_miniht_jump(int rs,int rh,int ht) {
4752 emit_cmp(rh,rs);
4753 emit_ldreq_indexed(ht,4,15);
4754 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
4755 emit_mov(rs,7);
4756 emit_jmp(jump_vaddr_reg[7]);
4757 #else
4758 emit_jmp(jump_vaddr_reg[rs]);
4759 #endif
4760}
4761
4762void do_miniht_insert(u_int return_address,int rt,int temp) {
4763 #ifdef ARMv5_ONLY
4764 emit_movimm(return_address,rt); // PC into link register
4765 add_to_linker((int)out,return_address,1);
4766 emit_pcreladdr(temp);
4767 emit_writeword(rt,(int)&mini_ht[(return_address&0xFF)>>3][0]);
4768 emit_writeword(temp,(int)&mini_ht[(return_address&0xFF)>>3][1]);
4769 #else
4770 emit_movw(return_address&0x0000FFFF,rt);
4771 add_to_linker((int)out,return_address,1);
4772 emit_pcreladdr(temp);
4773 emit_writeword(temp,(int)&mini_ht[(return_address&0xFF)>>3][1]);
4774 emit_movt(return_address&0xFFFF0000,rt);
4775 emit_writeword(rt,(int)&mini_ht[(return_address&0xFF)>>3][0]);
4776 #endif
4777}
4778
4779// Sign-extend to 64 bits and write out upper half of a register
4780// This is useful where we have a 32-bit value in a register, and want to
4781// keep it in a 32-bit register, but can't guarantee that it won't be read
4782// as a 64-bit value later.
4783void wb_sx(signed char pre[],signed char entry[],uint64_t dirty,uint64_t is32_pre,uint64_t is32,uint64_t u,uint64_t uu)
4784{
24385cae 4785#ifndef FORCE32
57871462 4786 if(is32_pre==is32) return;
4787 int hr,reg;
4788 for(hr=0;hr<HOST_REGS;hr++) {
4789 if(hr!=EXCLUDE_REG) {
4790 //if(pre[hr]==entry[hr]) {
4791 if((reg=pre[hr])>=0) {
4792 if((dirty>>hr)&1) {
4793 if( ((is32_pre&~is32&~uu)>>reg)&1 ) {
4794 emit_sarimm(hr,31,HOST_TEMPREG);
4795 emit_storereg(reg|64,HOST_TEMPREG);
4796 }
4797 }
4798 }
4799 //}
4800 }
4801 }
24385cae 4802#endif
57871462 4803}
4804
4805void wb_valid(signed char pre[],signed char entry[],u_int dirty_pre,u_int dirty,uint64_t is32_pre,uint64_t u,uint64_t uu)
4806{
4807 //if(dirty_pre==dirty) return;
4808 int hr,reg,new_hr;
4809 for(hr=0;hr<HOST_REGS;hr++) {
4810 if(hr!=EXCLUDE_REG) {
4811 reg=pre[hr];
4812 if(((~u)>>(reg&63))&1) {
f776eb14 4813 if(reg>0) {
57871462 4814 if(((dirty_pre&~dirty)>>hr)&1) {
4815 if(reg>0&&reg<34) {
4816 emit_storereg(reg,hr);
4817 if( ((is32_pre&~uu)>>reg)&1 ) {
4818 emit_sarimm(hr,31,HOST_TEMPREG);
4819 emit_storereg(reg|64,HOST_TEMPREG);
4820 }
4821 }
4822 else if(reg>=64) {
4823 emit_storereg(reg,hr);
4824 }
4825 }
4826 }
57871462 4827 }
4828 }
4829 }
4830}
4831
4832
4833/* using strd could possibly help but you'd have to allocate registers in pairs
4834void wb_invalidate_arm(signed char pre[],signed char entry[],uint64_t dirty,uint64_t is32,uint64_t u,uint64_t uu)
4835{
4836 int hr;
4837 int wrote=-1;
4838 for(hr=HOST_REGS-1;hr>=0;hr--) {
4839 if(hr!=EXCLUDE_REG) {
4840 if(pre[hr]!=entry[hr]) {
4841 if(pre[hr]>=0) {
4842 if((dirty>>hr)&1) {
4843 if(get_reg(entry,pre[hr])<0) {
4844 if(pre[hr]<64) {
4845 if(!((u>>pre[hr])&1)) {
4846 if(hr<10&&(~hr&1)&&(pre[hr+1]<0||wrote==hr+1)) {
4847 if( ((is32>>pre[hr])&1) && !((uu>>pre[hr])&1) ) {
4848 emit_sarimm(hr,31,hr+1);
4849 emit_strdreg(pre[hr],hr);
4850 }
4851 else
4852 emit_storereg(pre[hr],hr);
4853 }else{
4854 emit_storereg(pre[hr],hr);
4855 if( ((is32>>pre[hr])&1) && !((uu>>pre[hr])&1) ) {
4856 emit_sarimm(hr,31,hr);
4857 emit_storereg(pre[hr]|64,hr);
4858 }
4859 }
4860 }
4861 }else{
4862 if(!((uu>>(pre[hr]&63))&1) && !((is32>>(pre[hr]&63))&1)) {
4863 emit_storereg(pre[hr],hr);
4864 }
4865 }
4866 wrote=hr;
4867 }
4868 }
4869 }
4870 }
4871 }
4872 }
4873 for(hr=0;hr<HOST_REGS;hr++) {
4874 if(hr!=EXCLUDE_REG) {
4875 if(pre[hr]!=entry[hr]) {
4876 if(pre[hr]>=0) {
4877 int nr;
4878 if((nr=get_reg(entry,pre[hr]))>=0) {
4879 emit_mov(hr,nr);
4880 }
4881 }
4882 }
4883 }
4884 }
4885}
4886#define wb_invalidate wb_invalidate_arm
4887*/
4888
dd3a91a1 4889// Clearing the cache is rather slow on ARM Linux, so mark the areas
4890// that need to be cleared, and then only clear these areas once.
4891void do_clear_cache()
4892{
4893 int i,j;
4894 for (i=0;i<(1<<(TARGET_SIZE_2-17));i++)
4895 {
4896 u_int bitmap=needs_clear_cache[i];
4897 if(bitmap) {
4898 u_int start,end;
4899 for(j=0;j<32;j++)
4900 {
4901 if(bitmap&(1<<j)) {
4902 start=BASE_ADDR+i*131072+j*4096;
4903 end=start+4095;
4904 j++;
4905 while(j<32) {
4906 if(bitmap&(1<<j)) {
4907 end+=4096;
4908 j++;
4909 }else{
4910 __clear_cache((void *)start,(void *)end);
4911 break;
4912 }
4913 }
4914 }
4915 }
4916 needs_clear_cache[i]=0;
4917 }
4918 }
4919}
4920
57871462 4921// CPU-architecture-specific initialization
4922void arch_init() {
3d624f89 4923#ifndef DISABLE_COP1
57871462 4924 rounding_modes[0]=0x0<<22; // round
4925 rounding_modes[1]=0x3<<22; // trunc
4926 rounding_modes[2]=0x1<<22; // ceil
4927 rounding_modes[3]=0x2<<22; // floor
3d624f89 4928#endif
57871462 4929}
b9b61529 4930
4931// vim:shiftwidth=2:expandtab