rewrite memhandlers (read)
[pcsx_rearmed.git] / libpcsxcore / new_dynarec / assem_arm.c
... / ...
CommitLineData
1/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2 * Mupen64plus/PCSX - assem_arm.c *
3 * Copyright (C) 2009-2011 Ari64 *
4 * Copyright (C) 2010-2011 GraÅžvydas "notaz" Ignotas *
5 * *
6 * This program is free software; you can redistribute it and/or modify *
7 * it under the terms of the GNU General Public License as published by *
8 * the Free Software Foundation; either version 2 of the License, or *
9 * (at your option) any later version. *
10 * *
11 * This program is distributed in the hope that it will be useful, *
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
14 * GNU General Public License for more details. *
15 * *
16 * You should have received a copy of the GNU General Public License *
17 * along with this program; if not, write to the *
18 * Free Software Foundation, Inc., *
19 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
20 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
21
22extern int cycle_count;
23extern int last_count;
24extern int pcaddr;
25extern int pending_exception;
26extern int branch_target;
27extern uint64_t readmem_dword;
28#ifdef MUPEN64
29extern precomp_instr fake_pc;
30#endif
31extern void *dynarec_local;
32extern u_int memory_map[1048576];
33extern u_int mini_ht[32][2];
34extern u_int rounding_modes[4];
35
36void indirect_jump_indexed();
37void indirect_jump();
38void do_interrupt();
39void jump_vaddr_r0();
40void jump_vaddr_r1();
41void jump_vaddr_r2();
42void jump_vaddr_r3();
43void jump_vaddr_r4();
44void jump_vaddr_r5();
45void jump_vaddr_r6();
46void jump_vaddr_r7();
47void jump_vaddr_r8();
48void jump_vaddr_r9();
49void jump_vaddr_r10();
50void jump_vaddr_r12();
51
52const u_int jump_vaddr_reg[16] = {
53 (int)jump_vaddr_r0,
54 (int)jump_vaddr_r1,
55 (int)jump_vaddr_r2,
56 (int)jump_vaddr_r3,
57 (int)jump_vaddr_r4,
58 (int)jump_vaddr_r5,
59 (int)jump_vaddr_r6,
60 (int)jump_vaddr_r7,
61 (int)jump_vaddr_r8,
62 (int)jump_vaddr_r9,
63 (int)jump_vaddr_r10,
64 0,
65 (int)jump_vaddr_r12,
66 0,
67 0,
68 0};
69
70void invalidate_addr_r0();
71void invalidate_addr_r1();
72void invalidate_addr_r2();
73void invalidate_addr_r3();
74void invalidate_addr_r4();
75void invalidate_addr_r5();
76void invalidate_addr_r6();
77void invalidate_addr_r7();
78void invalidate_addr_r8();
79void invalidate_addr_r9();
80void invalidate_addr_r10();
81void invalidate_addr_r12();
82
83const u_int invalidate_addr_reg[16] = {
84 (int)invalidate_addr_r0,
85 (int)invalidate_addr_r1,
86 (int)invalidate_addr_r2,
87 (int)invalidate_addr_r3,
88 (int)invalidate_addr_r4,
89 (int)invalidate_addr_r5,
90 (int)invalidate_addr_r6,
91 (int)invalidate_addr_r7,
92 (int)invalidate_addr_r8,
93 (int)invalidate_addr_r9,
94 (int)invalidate_addr_r10,
95 0,
96 (int)invalidate_addr_r12,
97 0,
98 0,
99 0};
100
101#include "fpu.h"
102
103unsigned int needs_clear_cache[1<<(TARGET_SIZE_2-17)];
104
105/* Linker */
106
107void set_jump_target(int addr,u_int target)
108{
109 u_char *ptr=(u_char *)addr;
110 u_int *ptr2=(u_int *)ptr;
111 if(ptr[3]==0xe2) {
112 assert((target-(u_int)ptr2-8)<1024);
113 assert((addr&3)==0);
114 assert((target&3)==0);
115 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
116 //printf("target=%x addr=%x insn=%x\n",target,addr,*ptr2);
117 }
118 else if(ptr[3]==0x72) {
119 // generated by emit_jno_unlikely
120 if((target-(u_int)ptr2-8)<1024) {
121 assert((addr&3)==0);
122 assert((target&3)==0);
123 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
124 }
125 else if((target-(u_int)ptr2-8)<4096&&!((target-(u_int)ptr2-8)&15)) {
126 assert((addr&3)==0);
127 assert((target&3)==0);
128 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>4)|0xE00;
129 }
130 else *ptr2=(0x7A000000)|(((target-(u_int)ptr2-8)<<6)>>8);
131 }
132 else {
133 assert((ptr[3]&0x0e)==0xa);
134 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
135 }
136}
137
138// This optionally copies the instruction from the target of the branch into
139// the space before the branch. Works, but the difference in speed is
140// usually insignificant.
141void set_jump_target_fillslot(int addr,u_int target,int copy)
142{
143 u_char *ptr=(u_char *)addr;
144 u_int *ptr2=(u_int *)ptr;
145 assert(!copy||ptr2[-1]==0xe28dd000);
146 if(ptr[3]==0xe2) {
147 assert(!copy);
148 assert((target-(u_int)ptr2-8)<4096);
149 *ptr2=(*ptr2&0xFFFFF000)|(target-(u_int)ptr2-8);
150 }
151 else {
152 assert((ptr[3]&0x0e)==0xa);
153 u_int target_insn=*(u_int *)target;
154 if((target_insn&0x0e100000)==0) { // ALU, no immediate, no flags
155 copy=0;
156 }
157 if((target_insn&0x0c100000)==0x04100000) { // Load
158 copy=0;
159 }
160 if(target_insn&0x08000000) {
161 copy=0;
162 }
163 if(copy) {
164 ptr2[-1]=target_insn;
165 target+=4;
166 }
167 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
168 }
169}
170
171/* Literal pool */
172add_literal(int addr,int val)
173{
174 assert(literalcount<sizeof(literals)/sizeof(literals[0]));
175 literals[literalcount][0]=addr;
176 literals[literalcount][1]=val;
177 literalcount++;
178}
179
180void *kill_pointer(void *stub)
181{
182 int *ptr=(int *)(stub+4);
183 assert((*ptr&0x0ff00000)==0x05900000);
184 u_int offset=*ptr&0xfff;
185 int **l_ptr=(void *)ptr+offset+8;
186 int *i_ptr=*l_ptr;
187 set_jump_target((int)i_ptr,(int)stub);
188 return i_ptr;
189}
190
191// find where external branch is liked to using addr of it's stub:
192// get address that insn one after stub loads (dyna_linker arg1),
193// treat it as a pointer to branch insn,
194// return addr where that branch jumps to
195int get_pointer(void *stub)
196{
197 //printf("get_pointer(%x)\n",(int)stub);
198 int *ptr=(int *)(stub+4);
199 assert((*ptr&0x0fff0000)==0x059f0000);
200 u_int offset=*ptr&0xfff;
201 int **l_ptr=(void *)ptr+offset+8;
202 int *i_ptr=*l_ptr;
203 assert((*i_ptr&0x0f000000)==0x0a000000);
204 return (int)i_ptr+((*i_ptr<<8)>>6)+8;
205}
206
207// Find the "clean" entry point from a "dirty" entry point
208// by skipping past the call to verify_code
209u_int get_clean_addr(int addr)
210{
211 int *ptr=(int *)addr;
212 #ifdef ARMv5_ONLY
213 ptr+=4;
214 #else
215 ptr+=6;
216 #endif
217 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
218 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
219 ptr++;
220 if((*ptr&0xFF000000)==0xea000000) {
221 return (int)ptr+((*ptr<<8)>>6)+8; // follow jump
222 }
223 return (u_int)ptr;
224}
225
226int verify_dirty(int addr)
227{
228 u_int *ptr=(u_int *)addr;
229 #ifdef ARMv5_ONLY
230 // get from literal pool
231 assert((*ptr&0xFFFF0000)==0xe59f0000);
232 u_int offset=*ptr&0xfff;
233 u_int *l_ptr=(void *)ptr+offset+8;
234 u_int source=l_ptr[0];
235 u_int copy=l_ptr[1];
236 u_int len=l_ptr[2];
237 ptr+=4;
238 #else
239 // ARMv7 movw/movt
240 assert((*ptr&0xFFF00000)==0xe3000000);
241 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
242 u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
243 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
244 ptr+=6;
245 #endif
246 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
247 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
248 u_int verifier=(int)ptr+((signed int)(*ptr<<8)>>6)+8; // get target of bl
249 if(verifier==(u_int)verify_code_vm||verifier==(u_int)verify_code_ds) {
250 unsigned int page=source>>12;
251 unsigned int map_value=memory_map[page];
252 if(map_value>=0x80000000) return 0;
253 while(page<((source+len-1)>>12)) {
254 if((memory_map[++page]<<2)!=(map_value<<2)) return 0;
255 }
256 source = source+(map_value<<2);
257 }
258 //printf("verify_dirty: %x %x %x\n",source,copy,len);
259 return !memcmp((void *)source,(void *)copy,len);
260}
261
262// This doesn't necessarily find all clean entry points, just
263// guarantees that it's not dirty
264int isclean(int addr)
265{
266 #ifdef ARMv5_ONLY
267 int *ptr=((u_int *)addr)+4;
268 #else
269 int *ptr=((u_int *)addr)+6;
270 #endif
271 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
272 if((*ptr&0xFF000000)!=0xeb000000) return 1; // bl instruction
273 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code) return 0;
274 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_vm) return 0;
275 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_ds) return 0;
276 return 1;
277}
278
279void get_bounds(int addr,u_int *start,u_int *end)
280{
281 u_int *ptr=(u_int *)addr;
282 #ifdef ARMv5_ONLY
283 // get from literal pool
284 assert((*ptr&0xFFFF0000)==0xe59f0000);
285 u_int offset=*ptr&0xfff;
286 u_int *l_ptr=(void *)ptr+offset+8;
287 u_int source=l_ptr[0];
288 //u_int copy=l_ptr[1];
289 u_int len=l_ptr[2];
290 ptr+=4;
291 #else
292 // ARMv7 movw/movt
293 assert((*ptr&0xFFF00000)==0xe3000000);
294 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
295 //u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
296 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
297 ptr+=6;
298 #endif
299 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
300 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
301 u_int verifier=(int)ptr+((signed int)(*ptr<<8)>>6)+8; // get target of bl
302 if(verifier==(u_int)verify_code_vm||verifier==(u_int)verify_code_ds) {
303 if(memory_map[source>>12]>=0x80000000) source = 0;
304 else source = source+(memory_map[source>>12]<<2);
305 }
306 *start=source;
307 *end=source+len;
308}
309
310/* Register allocation */
311
312// Note: registers are allocated clean (unmodified state)
313// if you intend to modify the register, you must call dirty_reg().
314void alloc_reg(struct regstat *cur,int i,signed char reg)
315{
316 int r,hr;
317 int preferred_reg = (reg&7);
318 if(reg==CCREG) preferred_reg=HOST_CCREG;
319 if(reg==PTEMP||reg==FTEMP) preferred_reg=12;
320
321 // Don't allocate unused registers
322 if((cur->u>>reg)&1) return;
323
324 // see if it's already allocated
325 for(hr=0;hr<HOST_REGS;hr++)
326 {
327 if(cur->regmap[hr]==reg) return;
328 }
329
330 // Keep the same mapping if the register was already allocated in a loop
331 preferred_reg = loop_reg(i,reg,preferred_reg);
332
333 // Try to allocate the preferred register
334 if(cur->regmap[preferred_reg]==-1) {
335 cur->regmap[preferred_reg]=reg;
336 cur->dirty&=~(1<<preferred_reg);
337 cur->isconst&=~(1<<preferred_reg);
338 return;
339 }
340 r=cur->regmap[preferred_reg];
341 if(r<64&&((cur->u>>r)&1)) {
342 cur->regmap[preferred_reg]=reg;
343 cur->dirty&=~(1<<preferred_reg);
344 cur->isconst&=~(1<<preferred_reg);
345 return;
346 }
347 if(r>=64&&((cur->uu>>(r&63))&1)) {
348 cur->regmap[preferred_reg]=reg;
349 cur->dirty&=~(1<<preferred_reg);
350 cur->isconst&=~(1<<preferred_reg);
351 return;
352 }
353
354 // Clear any unneeded registers
355 // We try to keep the mapping consistent, if possible, because it
356 // makes branches easier (especially loops). So we try to allocate
357 // first (see above) before removing old mappings. If this is not
358 // possible then go ahead and clear out the registers that are no
359 // longer needed.
360 for(hr=0;hr<HOST_REGS;hr++)
361 {
362 r=cur->regmap[hr];
363 if(r>=0) {
364 if(r<64) {
365 if((cur->u>>r)&1) {cur->regmap[hr]=-1;break;}
366 }
367 else
368 {
369 if((cur->uu>>(r&63))&1) {cur->regmap[hr]=-1;break;}
370 }
371 }
372 }
373 // Try to allocate any available register, but prefer
374 // registers that have not been used recently.
375 if(i>0) {
376 for(hr=0;hr<HOST_REGS;hr++) {
377 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
378 if(regs[i-1].regmap[hr]!=rs1[i-1]&&regs[i-1].regmap[hr]!=rs2[i-1]&&regs[i-1].regmap[hr]!=rt1[i-1]&&regs[i-1].regmap[hr]!=rt2[i-1]) {
379 cur->regmap[hr]=reg;
380 cur->dirty&=~(1<<hr);
381 cur->isconst&=~(1<<hr);
382 return;
383 }
384 }
385 }
386 }
387 // Try to allocate any available register
388 for(hr=0;hr<HOST_REGS;hr++) {
389 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
390 cur->regmap[hr]=reg;
391 cur->dirty&=~(1<<hr);
392 cur->isconst&=~(1<<hr);
393 return;
394 }
395 }
396
397 // Ok, now we have to evict someone
398 // Pick a register we hopefully won't need soon
399 u_char hsn[MAXREG+1];
400 memset(hsn,10,sizeof(hsn));
401 int j;
402 lsn(hsn,i,&preferred_reg);
403 //printf("eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",cur->regmap[0],cur->regmap[1],cur->regmap[2],cur->regmap[3],cur->regmap[5],cur->regmap[6],cur->regmap[7]);
404 //printf("hsn(%x): %d %d %d %d %d %d %d\n",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
405 if(i>0) {
406 // Don't evict the cycle count at entry points, otherwise the entry
407 // stub will have to write it.
408 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
409 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
410 for(j=10;j>=3;j--)
411 {
412 // Alloc preferred register if available
413 if(hsn[r=cur->regmap[preferred_reg]&63]==j) {
414 for(hr=0;hr<HOST_REGS;hr++) {
415 // Evict both parts of a 64-bit register
416 if((cur->regmap[hr]&63)==r) {
417 cur->regmap[hr]=-1;
418 cur->dirty&=~(1<<hr);
419 cur->isconst&=~(1<<hr);
420 }
421 }
422 cur->regmap[preferred_reg]=reg;
423 return;
424 }
425 for(r=1;r<=MAXREG;r++)
426 {
427 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
428 for(hr=0;hr<HOST_REGS;hr++) {
429 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
430 if(cur->regmap[hr]==r+64) {
431 cur->regmap[hr]=reg;
432 cur->dirty&=~(1<<hr);
433 cur->isconst&=~(1<<hr);
434 return;
435 }
436 }
437 }
438 for(hr=0;hr<HOST_REGS;hr++) {
439 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
440 if(cur->regmap[hr]==r) {
441 cur->regmap[hr]=reg;
442 cur->dirty&=~(1<<hr);
443 cur->isconst&=~(1<<hr);
444 return;
445 }
446 }
447 }
448 }
449 }
450 }
451 }
452 for(j=10;j>=0;j--)
453 {
454 for(r=1;r<=MAXREG;r++)
455 {
456 if(hsn[r]==j) {
457 for(hr=0;hr<HOST_REGS;hr++) {
458 if(cur->regmap[hr]==r+64) {
459 cur->regmap[hr]=reg;
460 cur->dirty&=~(1<<hr);
461 cur->isconst&=~(1<<hr);
462 return;
463 }
464 }
465 for(hr=0;hr<HOST_REGS;hr++) {
466 if(cur->regmap[hr]==r) {
467 cur->regmap[hr]=reg;
468 cur->dirty&=~(1<<hr);
469 cur->isconst&=~(1<<hr);
470 return;
471 }
472 }
473 }
474 }
475 }
476 printf("This shouldn't happen (alloc_reg)");exit(1);
477}
478
479void alloc_reg64(struct regstat *cur,int i,signed char reg)
480{
481 int preferred_reg = 8+(reg&1);
482 int r,hr;
483
484 // allocate the lower 32 bits
485 alloc_reg(cur,i,reg);
486
487 // Don't allocate unused registers
488 if((cur->uu>>reg)&1) return;
489
490 // see if the upper half is already allocated
491 for(hr=0;hr<HOST_REGS;hr++)
492 {
493 if(cur->regmap[hr]==reg+64) return;
494 }
495
496 // Keep the same mapping if the register was already allocated in a loop
497 preferred_reg = loop_reg(i,reg,preferred_reg);
498
499 // Try to allocate the preferred register
500 if(cur->regmap[preferred_reg]==-1) {
501 cur->regmap[preferred_reg]=reg|64;
502 cur->dirty&=~(1<<preferred_reg);
503 cur->isconst&=~(1<<preferred_reg);
504 return;
505 }
506 r=cur->regmap[preferred_reg];
507 if(r<64&&((cur->u>>r)&1)) {
508 cur->regmap[preferred_reg]=reg|64;
509 cur->dirty&=~(1<<preferred_reg);
510 cur->isconst&=~(1<<preferred_reg);
511 return;
512 }
513 if(r>=64&&((cur->uu>>(r&63))&1)) {
514 cur->regmap[preferred_reg]=reg|64;
515 cur->dirty&=~(1<<preferred_reg);
516 cur->isconst&=~(1<<preferred_reg);
517 return;
518 }
519
520 // Clear any unneeded registers
521 // We try to keep the mapping consistent, if possible, because it
522 // makes branches easier (especially loops). So we try to allocate
523 // first (see above) before removing old mappings. If this is not
524 // possible then go ahead and clear out the registers that are no
525 // longer needed.
526 for(hr=HOST_REGS-1;hr>=0;hr--)
527 {
528 r=cur->regmap[hr];
529 if(r>=0) {
530 if(r<64) {
531 if((cur->u>>r)&1) {cur->regmap[hr]=-1;break;}
532 }
533 else
534 {
535 if((cur->uu>>(r&63))&1) {cur->regmap[hr]=-1;break;}
536 }
537 }
538 }
539 // Try to allocate any available register, but prefer
540 // registers that have not been used recently.
541 if(i>0) {
542 for(hr=0;hr<HOST_REGS;hr++) {
543 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
544 if(regs[i-1].regmap[hr]!=rs1[i-1]&&regs[i-1].regmap[hr]!=rs2[i-1]&&regs[i-1].regmap[hr]!=rt1[i-1]&&regs[i-1].regmap[hr]!=rt2[i-1]) {
545 cur->regmap[hr]=reg|64;
546 cur->dirty&=~(1<<hr);
547 cur->isconst&=~(1<<hr);
548 return;
549 }
550 }
551 }
552 }
553 // Try to allocate any available register
554 for(hr=0;hr<HOST_REGS;hr++) {
555 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
556 cur->regmap[hr]=reg|64;
557 cur->dirty&=~(1<<hr);
558 cur->isconst&=~(1<<hr);
559 return;
560 }
561 }
562
563 // Ok, now we have to evict someone
564 // Pick a register we hopefully won't need soon
565 u_char hsn[MAXREG+1];
566 memset(hsn,10,sizeof(hsn));
567 int j;
568 lsn(hsn,i,&preferred_reg);
569 //printf("eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",cur->regmap[0],cur->regmap[1],cur->regmap[2],cur->regmap[3],cur->regmap[5],cur->regmap[6],cur->regmap[7]);
570 //printf("hsn(%x): %d %d %d %d %d %d %d\n",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
571 if(i>0) {
572 // Don't evict the cycle count at entry points, otherwise the entry
573 // stub will have to write it.
574 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
575 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
576 for(j=10;j>=3;j--)
577 {
578 // Alloc preferred register if available
579 if(hsn[r=cur->regmap[preferred_reg]&63]==j) {
580 for(hr=0;hr<HOST_REGS;hr++) {
581 // Evict both parts of a 64-bit register
582 if((cur->regmap[hr]&63)==r) {
583 cur->regmap[hr]=-1;
584 cur->dirty&=~(1<<hr);
585 cur->isconst&=~(1<<hr);
586 }
587 }
588 cur->regmap[preferred_reg]=reg|64;
589 return;
590 }
591 for(r=1;r<=MAXREG;r++)
592 {
593 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
594 for(hr=0;hr<HOST_REGS;hr++) {
595 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
596 if(cur->regmap[hr]==r+64) {
597 cur->regmap[hr]=reg|64;
598 cur->dirty&=~(1<<hr);
599 cur->isconst&=~(1<<hr);
600 return;
601 }
602 }
603 }
604 for(hr=0;hr<HOST_REGS;hr++) {
605 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
606 if(cur->regmap[hr]==r) {
607 cur->regmap[hr]=reg|64;
608 cur->dirty&=~(1<<hr);
609 cur->isconst&=~(1<<hr);
610 return;
611 }
612 }
613 }
614 }
615 }
616 }
617 }
618 for(j=10;j>=0;j--)
619 {
620 for(r=1;r<=MAXREG;r++)
621 {
622 if(hsn[r]==j) {
623 for(hr=0;hr<HOST_REGS;hr++) {
624 if(cur->regmap[hr]==r+64) {
625 cur->regmap[hr]=reg|64;
626 cur->dirty&=~(1<<hr);
627 cur->isconst&=~(1<<hr);
628 return;
629 }
630 }
631 for(hr=0;hr<HOST_REGS;hr++) {
632 if(cur->regmap[hr]==r) {
633 cur->regmap[hr]=reg|64;
634 cur->dirty&=~(1<<hr);
635 cur->isconst&=~(1<<hr);
636 return;
637 }
638 }
639 }
640 }
641 }
642 printf("This shouldn't happen");exit(1);
643}
644
645// Allocate a temporary register. This is done without regard to
646// dirty status or whether the register we request is on the unneeded list
647// Note: This will only allocate one register, even if called multiple times
648void alloc_reg_temp(struct regstat *cur,int i,signed char reg)
649{
650 int r,hr;
651 int preferred_reg = -1;
652
653 // see if it's already allocated
654 for(hr=0;hr<HOST_REGS;hr++)
655 {
656 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==reg) return;
657 }
658
659 // Try to allocate any available register
660 for(hr=HOST_REGS-1;hr>=0;hr--) {
661 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
662 cur->regmap[hr]=reg;
663 cur->dirty&=~(1<<hr);
664 cur->isconst&=~(1<<hr);
665 return;
666 }
667 }
668
669 // Find an unneeded register
670 for(hr=HOST_REGS-1;hr>=0;hr--)
671 {
672 r=cur->regmap[hr];
673 if(r>=0) {
674 if(r<64) {
675 if((cur->u>>r)&1) {
676 if(i==0||((unneeded_reg[i-1]>>r)&1)) {
677 cur->regmap[hr]=reg;
678 cur->dirty&=~(1<<hr);
679 cur->isconst&=~(1<<hr);
680 return;
681 }
682 }
683 }
684 else
685 {
686 if((cur->uu>>(r&63))&1) {
687 if(i==0||((unneeded_reg_upper[i-1]>>(r&63))&1)) {
688 cur->regmap[hr]=reg;
689 cur->dirty&=~(1<<hr);
690 cur->isconst&=~(1<<hr);
691 return;
692 }
693 }
694 }
695 }
696 }
697
698 // Ok, now we have to evict someone
699 // Pick a register we hopefully won't need soon
700 // TODO: we might want to follow unconditional jumps here
701 // TODO: get rid of dupe code and make this into a function
702 u_char hsn[MAXREG+1];
703 memset(hsn,10,sizeof(hsn));
704 int j;
705 lsn(hsn,i,&preferred_reg);
706 //printf("hsn: %d %d %d %d %d %d %d\n",hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
707 if(i>0) {
708 // Don't evict the cycle count at entry points, otherwise the entry
709 // stub will have to write it.
710 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
711 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
712 for(j=10;j>=3;j--)
713 {
714 for(r=1;r<=MAXREG;r++)
715 {
716 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
717 for(hr=0;hr<HOST_REGS;hr++) {
718 if(hr!=HOST_CCREG||hsn[CCREG]>2) {
719 if(cur->regmap[hr]==r+64) {
720 cur->regmap[hr]=reg;
721 cur->dirty&=~(1<<hr);
722 cur->isconst&=~(1<<hr);
723 return;
724 }
725 }
726 }
727 for(hr=0;hr<HOST_REGS;hr++) {
728 if(hr!=HOST_CCREG||hsn[CCREG]>2) {
729 if(cur->regmap[hr]==r) {
730 cur->regmap[hr]=reg;
731 cur->dirty&=~(1<<hr);
732 cur->isconst&=~(1<<hr);
733 return;
734 }
735 }
736 }
737 }
738 }
739 }
740 }
741 for(j=10;j>=0;j--)
742 {
743 for(r=1;r<=MAXREG;r++)
744 {
745 if(hsn[r]==j) {
746 for(hr=0;hr<HOST_REGS;hr++) {
747 if(cur->regmap[hr]==r+64) {
748 cur->regmap[hr]=reg;
749 cur->dirty&=~(1<<hr);
750 cur->isconst&=~(1<<hr);
751 return;
752 }
753 }
754 for(hr=0;hr<HOST_REGS;hr++) {
755 if(cur->regmap[hr]==r) {
756 cur->regmap[hr]=reg;
757 cur->dirty&=~(1<<hr);
758 cur->isconst&=~(1<<hr);
759 return;
760 }
761 }
762 }
763 }
764 }
765 printf("This shouldn't happen");exit(1);
766}
767// Allocate a specific ARM register.
768void alloc_arm_reg(struct regstat *cur,int i,signed char reg,char hr)
769{
770 int n;
771 int dirty=0;
772
773 // see if it's already allocated (and dealloc it)
774 for(n=0;n<HOST_REGS;n++)
775 {
776 if(n!=EXCLUDE_REG&&cur->regmap[n]==reg) {
777 dirty=(cur->dirty>>n)&1;
778 cur->regmap[n]=-1;
779 }
780 }
781
782 cur->regmap[hr]=reg;
783 cur->dirty&=~(1<<hr);
784 cur->dirty|=dirty<<hr;
785 cur->isconst&=~(1<<hr);
786}
787
788// Alloc cycle count into dedicated register
789alloc_cc(struct regstat *cur,int i)
790{
791 alloc_arm_reg(cur,i,CCREG,HOST_CCREG);
792}
793
794/* Special alloc */
795
796
797/* Assembler */
798
799char regname[16][4] = {
800 "r0",
801 "r1",
802 "r2",
803 "r3",
804 "r4",
805 "r5",
806 "r6",
807 "r7",
808 "r8",
809 "r9",
810 "r10",
811 "fp",
812 "r12",
813 "sp",
814 "lr",
815 "pc"};
816
817void output_byte(u_char byte)
818{
819 *(out++)=byte;
820}
821void output_modrm(u_char mod,u_char rm,u_char ext)
822{
823 assert(mod<4);
824 assert(rm<8);
825 assert(ext<8);
826 u_char byte=(mod<<6)|(ext<<3)|rm;
827 *(out++)=byte;
828}
829void output_sib(u_char scale,u_char index,u_char base)
830{
831 assert(scale<4);
832 assert(index<8);
833 assert(base<8);
834 u_char byte=(scale<<6)|(index<<3)|base;
835 *(out++)=byte;
836}
837void output_w32(u_int word)
838{
839 *((u_int *)out)=word;
840 out+=4;
841}
842u_int rd_rn_rm(u_int rd, u_int rn, u_int rm)
843{
844 assert(rd<16);
845 assert(rn<16);
846 assert(rm<16);
847 return((rn<<16)|(rd<<12)|rm);
848}
849u_int rd_rn_imm_shift(u_int rd, u_int rn, u_int imm, u_int shift)
850{
851 assert(rd<16);
852 assert(rn<16);
853 assert(imm<256);
854 assert((shift&1)==0);
855 return((rn<<16)|(rd<<12)|(((32-shift)&30)<<7)|imm);
856}
857u_int genimm(u_int imm,u_int *encoded)
858{
859 *encoded=0;
860 if(imm==0) return 1;
861 int i=32;
862 while(i>0)
863 {
864 if(imm<256) {
865 *encoded=((i&30)<<7)|imm;
866 return 1;
867 }
868 imm=(imm>>2)|(imm<<30);i-=2;
869 }
870 return 0;
871}
872void genimm_checked(u_int imm,u_int *encoded)
873{
874 u_int ret=genimm(imm,encoded);
875 assert(ret);
876}
877u_int genjmp(u_int addr)
878{
879 int offset=addr-(int)out-8;
880 if(offset<-33554432||offset>=33554432) {
881 if (addr>2) {
882 printf("genjmp: out of range: %08x\n", offset);
883 exit(1);
884 }
885 return 0;
886 }
887 return ((u_int)offset>>2)&0xffffff;
888}
889
890void emit_mov(int rs,int rt)
891{
892 assem_debug("mov %s,%s\n",regname[rt],regname[rs]);
893 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs));
894}
895
896void emit_movs(int rs,int rt)
897{
898 assem_debug("movs %s,%s\n",regname[rt],regname[rs]);
899 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs));
900}
901
902void emit_add(int rs1,int rs2,int rt)
903{
904 assem_debug("add %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
905 output_w32(0xe0800000|rd_rn_rm(rt,rs1,rs2));
906}
907
908void emit_adds(int rs1,int rs2,int rt)
909{
910 assem_debug("adds %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
911 output_w32(0xe0900000|rd_rn_rm(rt,rs1,rs2));
912}
913
914void emit_adcs(int rs1,int rs2,int rt)
915{
916 assem_debug("adcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
917 output_w32(0xe0b00000|rd_rn_rm(rt,rs1,rs2));
918}
919
920void emit_sbc(int rs1,int rs2,int rt)
921{
922 assem_debug("sbc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
923 output_w32(0xe0c00000|rd_rn_rm(rt,rs1,rs2));
924}
925
926void emit_sbcs(int rs1,int rs2,int rt)
927{
928 assem_debug("sbcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
929 output_w32(0xe0d00000|rd_rn_rm(rt,rs1,rs2));
930}
931
932void emit_neg(int rs, int rt)
933{
934 assem_debug("rsb %s,%s,#0\n",regname[rt],regname[rs]);
935 output_w32(0xe2600000|rd_rn_rm(rt,rs,0));
936}
937
938void emit_negs(int rs, int rt)
939{
940 assem_debug("rsbs %s,%s,#0\n",regname[rt],regname[rs]);
941 output_w32(0xe2700000|rd_rn_rm(rt,rs,0));
942}
943
944void emit_sub(int rs1,int rs2,int rt)
945{
946 assem_debug("sub %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
947 output_w32(0xe0400000|rd_rn_rm(rt,rs1,rs2));
948}
949
950void emit_subs(int rs1,int rs2,int rt)
951{
952 assem_debug("subs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
953 output_w32(0xe0500000|rd_rn_rm(rt,rs1,rs2));
954}
955
956void emit_zeroreg(int rt)
957{
958 assem_debug("mov %s,#0\n",regname[rt]);
959 output_w32(0xe3a00000|rd_rn_rm(rt,0,0));
960}
961
962void emit_loadlp(u_int imm,u_int rt)
963{
964 add_literal((int)out,imm);
965 assem_debug("ldr %s,pc+? [=%x]\n",regname[rt],imm);
966 output_w32(0xe5900000|rd_rn_rm(rt,15,0));
967}
968void emit_movw(u_int imm,u_int rt)
969{
970 assert(imm<65536);
971 assem_debug("movw %s,#%d (0x%x)\n",regname[rt],imm,imm);
972 output_w32(0xe3000000|rd_rn_rm(rt,0,0)|(imm&0xfff)|((imm<<4)&0xf0000));
973}
974void emit_movt(u_int imm,u_int rt)
975{
976 assem_debug("movt %s,#%d (0x%x)\n",regname[rt],imm&0xffff0000,imm&0xffff0000);
977 output_w32(0xe3400000|rd_rn_rm(rt,0,0)|((imm>>16)&0xfff)|((imm>>12)&0xf0000));
978}
979void emit_movimm(u_int imm,u_int rt)
980{
981 u_int armval;
982 if(genimm(imm,&armval)) {
983 assem_debug("mov %s,#%d\n",regname[rt],imm);
984 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
985 }else if(genimm(~imm,&armval)) {
986 assem_debug("mvn %s,#%d\n",regname[rt],imm);
987 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
988 }else if(imm<65536) {
989 #ifdef ARMv5_ONLY
990 assem_debug("mov %s,#%d\n",regname[rt],imm&0xFF00);
991 output_w32(0xe3a00000|rd_rn_imm_shift(rt,0,imm>>8,8));
992 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
993 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
994 #else
995 emit_movw(imm,rt);
996 #endif
997 }else{
998 #ifdef ARMv5_ONLY
999 emit_loadlp(imm,rt);
1000 #else
1001 emit_movw(imm&0x0000FFFF,rt);
1002 emit_movt(imm&0xFFFF0000,rt);
1003 #endif
1004 }
1005}
1006void emit_pcreladdr(u_int rt)
1007{
1008 assem_debug("add %s,pc,#?\n",regname[rt]);
1009 output_w32(0xe2800000|rd_rn_rm(rt,15,0));
1010}
1011
1012void emit_loadreg(int r, int hr)
1013{
1014#ifdef FORCE32
1015 if(r&64) {
1016 printf("64bit load in 32bit mode!\n");
1017 assert(0);
1018 return;
1019 }
1020#endif
1021 if((r&63)==0)
1022 emit_zeroreg(hr);
1023 else {
1024 int addr=((int)reg)+((r&63)<<REG_SHIFT)+((r&64)>>4);
1025 if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4);
1026 if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4);
1027 if(r==CCREG) addr=(int)&cycle_count;
1028 if(r==CSREG) addr=(int)&Status;
1029 if(r==FSREG) addr=(int)&FCR31;
1030 if(r==INVCP) addr=(int)&invc_ptr;
1031 u_int offset = addr-(u_int)&dynarec_local;
1032 assert(offset<4096);
1033 assem_debug("ldr %s,fp+%d\n",regname[hr],offset);
1034 output_w32(0xe5900000|rd_rn_rm(hr,FP,0)|offset);
1035 }
1036}
1037void emit_storereg(int r, int hr)
1038{
1039#ifdef FORCE32
1040 if(r&64) {
1041 printf("64bit store in 32bit mode!\n");
1042 assert(0);
1043 return;
1044 }
1045#endif
1046 int addr=((int)reg)+((r&63)<<REG_SHIFT)+((r&64)>>4);
1047 if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4);
1048 if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4);
1049 if(r==CCREG) addr=(int)&cycle_count;
1050 if(r==FSREG) addr=(int)&FCR31;
1051 u_int offset = addr-(u_int)&dynarec_local;
1052 assert(offset<4096);
1053 assem_debug("str %s,fp+%d\n",regname[hr],offset);
1054 output_w32(0xe5800000|rd_rn_rm(hr,FP,0)|offset);
1055}
1056
1057void emit_test(int rs, int rt)
1058{
1059 assem_debug("tst %s,%s\n",regname[rs],regname[rt]);
1060 output_w32(0xe1100000|rd_rn_rm(0,rs,rt));
1061}
1062
1063void emit_testimm(int rs,int imm)
1064{
1065 u_int armval;
1066 assem_debug("tst %s,#%d\n",regname[rs],imm);
1067 genimm_checked(imm,&armval);
1068 output_w32(0xe3100000|rd_rn_rm(0,rs,0)|armval);
1069}
1070
1071void emit_testeqimm(int rs,int imm)
1072{
1073 u_int armval;
1074 assem_debug("tsteq %s,$%d\n",regname[rs],imm);
1075 genimm_checked(imm,&armval);
1076 output_w32(0x03100000|rd_rn_rm(0,rs,0)|armval);
1077}
1078
1079void emit_not(int rs,int rt)
1080{
1081 assem_debug("mvn %s,%s\n",regname[rt],regname[rs]);
1082 output_w32(0xe1e00000|rd_rn_rm(rt,0,rs));
1083}
1084
1085void emit_mvnmi(int rs,int rt)
1086{
1087 assem_debug("mvnmi %s,%s\n",regname[rt],regname[rs]);
1088 output_w32(0x41e00000|rd_rn_rm(rt,0,rs));
1089}
1090
1091void emit_and(u_int rs1,u_int rs2,u_int rt)
1092{
1093 assem_debug("and %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1094 output_w32(0xe0000000|rd_rn_rm(rt,rs1,rs2));
1095}
1096
1097void emit_or(u_int rs1,u_int rs2,u_int rt)
1098{
1099 assem_debug("orr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1100 output_w32(0xe1800000|rd_rn_rm(rt,rs1,rs2));
1101}
1102void emit_or_and_set_flags(int rs1,int rs2,int rt)
1103{
1104 assem_debug("orrs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1105 output_w32(0xe1900000|rd_rn_rm(rt,rs1,rs2));
1106}
1107
1108void emit_orrshl_imm(u_int rs,u_int imm,u_int rt)
1109{
1110 assert(rs<16);
1111 assert(rt<16);
1112 assert(imm<32);
1113 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs],imm);
1114 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|(imm<<7));
1115}
1116
1117void emit_orrshr_imm(u_int rs,u_int imm,u_int rt)
1118{
1119 assert(rs<16);
1120 assert(rt<16);
1121 assert(imm<32);
1122 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs],imm);
1123 output_w32(0xe1800020|rd_rn_rm(rt,rt,rs)|(imm<<7));
1124}
1125
1126void emit_xor(u_int rs1,u_int rs2,u_int rt)
1127{
1128 assem_debug("eor %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1129 output_w32(0xe0200000|rd_rn_rm(rt,rs1,rs2));
1130}
1131
1132void emit_addimm(u_int rs,int imm,u_int rt)
1133{
1134 assert(rs<16);
1135 assert(rt<16);
1136 if(imm!=0) {
1137 assert(imm>-65536&&imm<65536);
1138 u_int armval;
1139 if(genimm(imm,&armval)) {
1140 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],imm);
1141 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
1142 }else if(genimm(-imm,&armval)) {
1143 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],imm);
1144 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
1145 }else if(imm<0) {
1146 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],(-imm)&0xFF00);
1147 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
1148 output_w32(0xe2400000|rd_rn_imm_shift(rt,rs,(-imm)>>8,8));
1149 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
1150 }else{
1151 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1152 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
1153 output_w32(0xe2800000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1154 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1155 }
1156 }
1157 else if(rs!=rt) emit_mov(rs,rt);
1158}
1159
1160void emit_addimm_and_set_flags(int imm,int rt)
1161{
1162 assert(imm>-65536&&imm<65536);
1163 u_int armval;
1164 if(genimm(imm,&armval)) {
1165 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm);
1166 output_w32(0xe2900000|rd_rn_rm(rt,rt,0)|armval);
1167 }else if(genimm(-imm,&armval)) {
1168 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],imm);
1169 output_w32(0xe2500000|rd_rn_rm(rt,rt,0)|armval);
1170 }else if(imm<0) {
1171 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF00);
1172 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
1173 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)>>8,8));
1174 output_w32(0xe2500000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
1175 }else{
1176 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF00);
1177 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
1178 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm>>8,8));
1179 output_w32(0xe2900000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1180 }
1181}
1182void emit_addimm_no_flags(u_int imm,u_int rt)
1183{
1184 emit_addimm(rt,imm,rt);
1185}
1186
1187void emit_addnop(u_int r)
1188{
1189 assert(r<16);
1190 assem_debug("add %s,%s,#0 (nop)\n",regname[r],regname[r]);
1191 output_w32(0xe2800000|rd_rn_rm(r,r,0));
1192}
1193
1194void emit_adcimm(u_int rs,int imm,u_int rt)
1195{
1196 u_int armval;
1197 genimm_checked(imm,&armval);
1198 assem_debug("adc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1199 output_w32(0xe2a00000|rd_rn_rm(rt,rs,0)|armval);
1200}
1201/*void emit_sbcimm(int imm,u_int rt)
1202{
1203 u_int armval;
1204 genimm_checked(imm,&armval);
1205 assem_debug("sbc %s,%s,#%d\n",regname[rt],regname[rt],imm);
1206 output_w32(0xe2c00000|rd_rn_rm(rt,rt,0)|armval);
1207}*/
1208void emit_sbbimm(int imm,u_int rt)
1209{
1210 assem_debug("sbb $%d,%%%s\n",imm,regname[rt]);
1211 assert(rt<8);
1212 if(imm<128&&imm>=-128) {
1213 output_byte(0x83);
1214 output_modrm(3,rt,3);
1215 output_byte(imm);
1216 }
1217 else
1218 {
1219 output_byte(0x81);
1220 output_modrm(3,rt,3);
1221 output_w32(imm);
1222 }
1223}
1224void emit_rscimm(int rs,int imm,u_int rt)
1225{
1226 assert(0);
1227 u_int armval;
1228 genimm_checked(imm,&armval);
1229 assem_debug("rsc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1230 output_w32(0xe2e00000|rd_rn_rm(rt,rs,0)|armval);
1231}
1232
1233void emit_addimm64_32(int rsh,int rsl,int imm,int rth,int rtl)
1234{
1235 // TODO: if(genimm(imm,&armval)) ...
1236 // else
1237 emit_movimm(imm,HOST_TEMPREG);
1238 emit_adds(HOST_TEMPREG,rsl,rtl);
1239 emit_adcimm(rsh,0,rth);
1240}
1241
1242void emit_sbb(int rs1,int rs2)
1243{
1244 assem_debug("sbb %%%s,%%%s\n",regname[rs2],regname[rs1]);
1245 output_byte(0x19);
1246 output_modrm(3,rs1,rs2);
1247}
1248
1249void emit_andimm(int rs,int imm,int rt)
1250{
1251 u_int armval;
1252 if(imm==0) {
1253 emit_zeroreg(rt);
1254 }else if(genimm(imm,&armval)) {
1255 assem_debug("and %s,%s,#%d\n",regname[rt],regname[rs],imm);
1256 output_w32(0xe2000000|rd_rn_rm(rt,rs,0)|armval);
1257 }else if(genimm(~imm,&armval)) {
1258 assem_debug("bic %s,%s,#%d\n",regname[rt],regname[rs],imm);
1259 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|armval);
1260 }else if(imm==65535) {
1261 #ifdef ARMv5_ONLY
1262 assem_debug("bic %s,%s,#FF000000\n",regname[rt],regname[rs]);
1263 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|0x4FF);
1264 assem_debug("bic %s,%s,#00FF0000\n",regname[rt],regname[rt]);
1265 output_w32(0xe3c00000|rd_rn_rm(rt,rt,0)|0x8FF);
1266 #else
1267 assem_debug("uxth %s,%s\n",regname[rt],regname[rs]);
1268 output_w32(0xe6ff0070|rd_rn_rm(rt,0,rs));
1269 #endif
1270 }else{
1271 assert(imm>0&&imm<65535);
1272 #ifdef ARMv5_ONLY
1273 assem_debug("mov r14,#%d\n",imm&0xFF00);
1274 output_w32(0xe3a00000|rd_rn_imm_shift(HOST_TEMPREG,0,imm>>8,8));
1275 assem_debug("add r14,r14,#%d\n",imm&0xFF);
1276 output_w32(0xe2800000|rd_rn_imm_shift(HOST_TEMPREG,HOST_TEMPREG,imm&0xff,0));
1277 #else
1278 emit_movw(imm,HOST_TEMPREG);
1279 #endif
1280 assem_debug("and %s,%s,r14\n",regname[rt],regname[rs]);
1281 output_w32(0xe0000000|rd_rn_rm(rt,rs,HOST_TEMPREG));
1282 }
1283}
1284
1285void emit_orimm(int rs,int imm,int rt)
1286{
1287 u_int armval;
1288 if(imm==0) {
1289 if(rs!=rt) emit_mov(rs,rt);
1290 }else if(genimm(imm,&armval)) {
1291 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1292 output_w32(0xe3800000|rd_rn_rm(rt,rs,0)|armval);
1293 }else{
1294 assert(imm>0&&imm<65536);
1295 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1296 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
1297 output_w32(0xe3800000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1298 output_w32(0xe3800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1299 }
1300}
1301
1302void emit_xorimm(int rs,int imm,int rt)
1303{
1304 u_int armval;
1305 if(imm==0) {
1306 if(rs!=rt) emit_mov(rs,rt);
1307 }else if(genimm(imm,&armval)) {
1308 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm);
1309 output_w32(0xe2200000|rd_rn_rm(rt,rs,0)|armval);
1310 }else{
1311 assert(imm>0&&imm<65536);
1312 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1313 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
1314 output_w32(0xe2200000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1315 output_w32(0xe2200000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1316 }
1317}
1318
1319void emit_shlimm(int rs,u_int imm,int rt)
1320{
1321 assert(imm>0);
1322 assert(imm<32);
1323 //if(imm==1) ...
1324 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1325 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1326}
1327
1328void emit_lsls_imm(int rs,int imm,int rt)
1329{
1330 assert(imm>0);
1331 assert(imm<32);
1332 assem_debug("lsls %s,%s,#%d\n",regname[rt],regname[rs],imm);
1333 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1334}
1335
1336void emit_shrimm(int rs,u_int imm,int rt)
1337{
1338 assert(imm>0);
1339 assert(imm<32);
1340 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1341 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
1342}
1343
1344void emit_sarimm(int rs,u_int imm,int rt)
1345{
1346 assert(imm>0);
1347 assert(imm<32);
1348 assem_debug("asr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1349 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x40|(imm<<7));
1350}
1351
1352void emit_rorimm(int rs,u_int imm,int rt)
1353{
1354 assert(imm>0);
1355 assert(imm<32);
1356 assem_debug("ror %s,%s,#%d\n",regname[rt],regname[rs],imm);
1357 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x60|(imm<<7));
1358}
1359
1360void emit_shldimm(int rs,int rs2,u_int imm,int rt)
1361{
1362 assem_debug("shld %%%s,%%%s,%d\n",regname[rt],regname[rs2],imm);
1363 assert(imm>0);
1364 assert(imm<32);
1365 //if(imm==1) ...
1366 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1367 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1368 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs2],32-imm);
1369 output_w32(0xe1800020|rd_rn_rm(rt,rt,rs2)|((32-imm)<<7));
1370}
1371
1372void emit_shrdimm(int rs,int rs2,u_int imm,int rt)
1373{
1374 assem_debug("shrd %%%s,%%%s,%d\n",regname[rt],regname[rs2],imm);
1375 assert(imm>0);
1376 assert(imm<32);
1377 //if(imm==1) ...
1378 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1379 output_w32(0xe1a00020|rd_rn_rm(rt,0,rs)|(imm<<7));
1380 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs2],32-imm);
1381 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs2)|((32-imm)<<7));
1382}
1383
1384void emit_signextend16(int rs,int rt)
1385{
1386 #ifdef ARMv5_ONLY
1387 emit_shlimm(rs,16,rt);
1388 emit_sarimm(rt,16,rt);
1389 #else
1390 assem_debug("sxth %s,%s\n",regname[rt],regname[rs]);
1391 output_w32(0xe6bf0070|rd_rn_rm(rt,0,rs));
1392 #endif
1393}
1394
1395void emit_signextend8(int rs,int rt)
1396{
1397 #ifdef ARMv5_ONLY
1398 emit_shlimm(rs,24,rt);
1399 emit_sarimm(rt,24,rt);
1400 #else
1401 assem_debug("sxtb %s,%s\n",regname[rt],regname[rs]);
1402 output_w32(0xe6af0070|rd_rn_rm(rt,0,rs));
1403 #endif
1404}
1405
1406void emit_shl(u_int rs,u_int shift,u_int rt)
1407{
1408 assert(rs<16);
1409 assert(rt<16);
1410 assert(shift<16);
1411 //if(imm==1) ...
1412 assem_debug("lsl %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1413 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x10|(shift<<8));
1414}
1415void emit_shr(u_int rs,u_int shift,u_int rt)
1416{
1417 assert(rs<16);
1418 assert(rt<16);
1419 assert(shift<16);
1420 assem_debug("lsr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1421 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x30|(shift<<8));
1422}
1423void emit_sar(u_int rs,u_int shift,u_int rt)
1424{
1425 assert(rs<16);
1426 assert(rt<16);
1427 assert(shift<16);
1428 assem_debug("asr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1429 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x50|(shift<<8));
1430}
1431void emit_shlcl(int r)
1432{
1433 assem_debug("shl %%%s,%%cl\n",regname[r]);
1434 assert(0);
1435}
1436void emit_shrcl(int r)
1437{
1438 assem_debug("shr %%%s,%%cl\n",regname[r]);
1439 assert(0);
1440}
1441void emit_sarcl(int r)
1442{
1443 assem_debug("sar %%%s,%%cl\n",regname[r]);
1444 assert(0);
1445}
1446
1447void emit_shldcl(int r1,int r2)
1448{
1449 assem_debug("shld %%%s,%%%s,%%cl\n",regname[r1],regname[r2]);
1450 assert(0);
1451}
1452void emit_shrdcl(int r1,int r2)
1453{
1454 assem_debug("shrd %%%s,%%%s,%%cl\n",regname[r1],regname[r2]);
1455 assert(0);
1456}
1457void emit_orrshl(u_int rs,u_int shift,u_int rt)
1458{
1459 assert(rs<16);
1460 assert(rt<16);
1461 assert(shift<16);
1462 assem_debug("orr %s,%s,%s,lsl %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
1463 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x10|(shift<<8));
1464}
1465void emit_orrshr(u_int rs,u_int shift,u_int rt)
1466{
1467 assert(rs<16);
1468 assert(rt<16);
1469 assert(shift<16);
1470 assem_debug("orr %s,%s,%s,lsr %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
1471 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x30|(shift<<8));
1472}
1473
1474void emit_cmpimm(int rs,int imm)
1475{
1476 u_int armval;
1477 if(genimm(imm,&armval)) {
1478 assem_debug("cmp %s,#%d\n",regname[rs],imm);
1479 output_w32(0xe3500000|rd_rn_rm(0,rs,0)|armval);
1480 }else if(genimm(-imm,&armval)) {
1481 assem_debug("cmn %s,#%d\n",regname[rs],imm);
1482 output_w32(0xe3700000|rd_rn_rm(0,rs,0)|armval);
1483 }else if(imm>0) {
1484 assert(imm<65536);
1485 #ifdef ARMv5_ONLY
1486 emit_movimm(imm,HOST_TEMPREG);
1487 #else
1488 emit_movw(imm,HOST_TEMPREG);
1489 #endif
1490 assem_debug("cmp %s,r14\n",regname[rs]);
1491 output_w32(0xe1500000|rd_rn_rm(0,rs,HOST_TEMPREG));
1492 }else{
1493 assert(imm>-65536);
1494 #ifdef ARMv5_ONLY
1495 emit_movimm(-imm,HOST_TEMPREG);
1496 #else
1497 emit_movw(-imm,HOST_TEMPREG);
1498 #endif
1499 assem_debug("cmn %s,r14\n",regname[rs]);
1500 output_w32(0xe1700000|rd_rn_rm(0,rs,HOST_TEMPREG));
1501 }
1502}
1503
1504void emit_cmovne(u_int *addr,int rt)
1505{
1506 assem_debug("cmovne %x,%%%s",(int)addr,regname[rt]);
1507 assert(0);
1508}
1509void emit_cmovl(u_int *addr,int rt)
1510{
1511 assem_debug("cmovl %x,%%%s",(int)addr,regname[rt]);
1512 assert(0);
1513}
1514void emit_cmovs(u_int *addr,int rt)
1515{
1516 assem_debug("cmovs %x,%%%s",(int)addr,regname[rt]);
1517 assert(0);
1518}
1519void emit_cmovne_imm(int imm,int rt)
1520{
1521 assem_debug("movne %s,#%d\n",regname[rt],imm);
1522 u_int armval;
1523 genimm_checked(imm,&armval);
1524 output_w32(0x13a00000|rd_rn_rm(rt,0,0)|armval);
1525}
1526void emit_cmovl_imm(int imm,int rt)
1527{
1528 assem_debug("movlt %s,#%d\n",regname[rt],imm);
1529 u_int armval;
1530 genimm_checked(imm,&armval);
1531 output_w32(0xb3a00000|rd_rn_rm(rt,0,0)|armval);
1532}
1533void emit_cmovb_imm(int imm,int rt)
1534{
1535 assem_debug("movcc %s,#%d\n",regname[rt],imm);
1536 u_int armval;
1537 genimm_checked(imm,&armval);
1538 output_w32(0x33a00000|rd_rn_rm(rt,0,0)|armval);
1539}
1540void emit_cmovs_imm(int imm,int rt)
1541{
1542 assem_debug("movmi %s,#%d\n",regname[rt],imm);
1543 u_int armval;
1544 genimm_checked(imm,&armval);
1545 output_w32(0x43a00000|rd_rn_rm(rt,0,0)|armval);
1546}
1547void emit_cmove_reg(int rs,int rt)
1548{
1549 assem_debug("moveq %s,%s\n",regname[rt],regname[rs]);
1550 output_w32(0x01a00000|rd_rn_rm(rt,0,rs));
1551}
1552void emit_cmovne_reg(int rs,int rt)
1553{
1554 assem_debug("movne %s,%s\n",regname[rt],regname[rs]);
1555 output_w32(0x11a00000|rd_rn_rm(rt,0,rs));
1556}
1557void emit_cmovl_reg(int rs,int rt)
1558{
1559 assem_debug("movlt %s,%s\n",regname[rt],regname[rs]);
1560 output_w32(0xb1a00000|rd_rn_rm(rt,0,rs));
1561}
1562void emit_cmovs_reg(int rs,int rt)
1563{
1564 assem_debug("movmi %s,%s\n",regname[rt],regname[rs]);
1565 output_w32(0x41a00000|rd_rn_rm(rt,0,rs));
1566}
1567
1568void emit_slti32(int rs,int imm,int rt)
1569{
1570 if(rs!=rt) emit_zeroreg(rt);
1571 emit_cmpimm(rs,imm);
1572 if(rs==rt) emit_movimm(0,rt);
1573 emit_cmovl_imm(1,rt);
1574}
1575void emit_sltiu32(int rs,int imm,int rt)
1576{
1577 if(rs!=rt) emit_zeroreg(rt);
1578 emit_cmpimm(rs,imm);
1579 if(rs==rt) emit_movimm(0,rt);
1580 emit_cmovb_imm(1,rt);
1581}
1582void emit_slti64_32(int rsh,int rsl,int imm,int rt)
1583{
1584 assert(rsh!=rt);
1585 emit_slti32(rsl,imm,rt);
1586 if(imm>=0)
1587 {
1588 emit_test(rsh,rsh);
1589 emit_cmovne_imm(0,rt);
1590 emit_cmovs_imm(1,rt);
1591 }
1592 else
1593 {
1594 emit_cmpimm(rsh,-1);
1595 emit_cmovne_imm(0,rt);
1596 emit_cmovl_imm(1,rt);
1597 }
1598}
1599void emit_sltiu64_32(int rsh,int rsl,int imm,int rt)
1600{
1601 assert(rsh!=rt);
1602 emit_sltiu32(rsl,imm,rt);
1603 if(imm>=0)
1604 {
1605 emit_test(rsh,rsh);
1606 emit_cmovne_imm(0,rt);
1607 }
1608 else
1609 {
1610 emit_cmpimm(rsh,-1);
1611 emit_cmovne_imm(1,rt);
1612 }
1613}
1614
1615void emit_cmp(int rs,int rt)
1616{
1617 assem_debug("cmp %s,%s\n",regname[rs],regname[rt]);
1618 output_w32(0xe1500000|rd_rn_rm(0,rs,rt));
1619}
1620void emit_set_gz32(int rs, int rt)
1621{
1622 //assem_debug("set_gz32\n");
1623 emit_cmpimm(rs,1);
1624 emit_movimm(1,rt);
1625 emit_cmovl_imm(0,rt);
1626}
1627void emit_set_nz32(int rs, int rt)
1628{
1629 //assem_debug("set_nz32\n");
1630 if(rs!=rt) emit_movs(rs,rt);
1631 else emit_test(rs,rs);
1632 emit_cmovne_imm(1,rt);
1633}
1634void emit_set_gz64_32(int rsh, int rsl, int rt)
1635{
1636 //assem_debug("set_gz64\n");
1637 emit_set_gz32(rsl,rt);
1638 emit_test(rsh,rsh);
1639 emit_cmovne_imm(1,rt);
1640 emit_cmovs_imm(0,rt);
1641}
1642void emit_set_nz64_32(int rsh, int rsl, int rt)
1643{
1644 //assem_debug("set_nz64\n");
1645 emit_or_and_set_flags(rsh,rsl,rt);
1646 emit_cmovne_imm(1,rt);
1647}
1648void emit_set_if_less32(int rs1, int rs2, int rt)
1649{
1650 //assem_debug("set if less (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1651 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1652 emit_cmp(rs1,rs2);
1653 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1654 emit_cmovl_imm(1,rt);
1655}
1656void emit_set_if_carry32(int rs1, int rs2, int rt)
1657{
1658 //assem_debug("set if carry (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1659 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1660 emit_cmp(rs1,rs2);
1661 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1662 emit_cmovb_imm(1,rt);
1663}
1664void emit_set_if_less64_32(int u1, int l1, int u2, int l2, int rt)
1665{
1666 //assem_debug("set if less64 (%%%s,%%%s,%%%s,%%%s),%%%s\n",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]);
1667 assert(u1!=rt);
1668 assert(u2!=rt);
1669 emit_cmp(l1,l2);
1670 emit_movimm(0,rt);
1671 emit_sbcs(u1,u2,HOST_TEMPREG);
1672 emit_cmovl_imm(1,rt);
1673}
1674void emit_set_if_carry64_32(int u1, int l1, int u2, int l2, int rt)
1675{
1676 //assem_debug("set if carry64 (%%%s,%%%s,%%%s,%%%s),%%%s\n",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]);
1677 assert(u1!=rt);
1678 assert(u2!=rt);
1679 emit_cmp(l1,l2);
1680 emit_movimm(0,rt);
1681 emit_sbcs(u1,u2,HOST_TEMPREG);
1682 emit_cmovb_imm(1,rt);
1683}
1684
1685void emit_call(int a)
1686{
1687 assem_debug("bl %x (%x+%x)\n",a,(int)out,a-(int)out-8);
1688 u_int offset=genjmp(a);
1689 output_w32(0xeb000000|offset);
1690}
1691void emit_jmp(int a)
1692{
1693 assem_debug("b %x (%x+%x)\n",a,(int)out,a-(int)out-8);
1694 u_int offset=genjmp(a);
1695 output_w32(0xea000000|offset);
1696}
1697void emit_jne(int a)
1698{
1699 assem_debug("bne %x\n",a);
1700 u_int offset=genjmp(a);
1701 output_w32(0x1a000000|offset);
1702}
1703void emit_jeq(int a)
1704{
1705 assem_debug("beq %x\n",a);
1706 u_int offset=genjmp(a);
1707 output_w32(0x0a000000|offset);
1708}
1709void emit_js(int a)
1710{
1711 assem_debug("bmi %x\n",a);
1712 u_int offset=genjmp(a);
1713 output_w32(0x4a000000|offset);
1714}
1715void emit_jns(int a)
1716{
1717 assem_debug("bpl %x\n",a);
1718 u_int offset=genjmp(a);
1719 output_w32(0x5a000000|offset);
1720}
1721void emit_jl(int a)
1722{
1723 assem_debug("blt %x\n",a);
1724 u_int offset=genjmp(a);
1725 output_w32(0xba000000|offset);
1726}
1727void emit_jge(int a)
1728{
1729 assem_debug("bge %x\n",a);
1730 u_int offset=genjmp(a);
1731 output_w32(0xaa000000|offset);
1732}
1733void emit_jno(int a)
1734{
1735 assem_debug("bvc %x\n",a);
1736 u_int offset=genjmp(a);
1737 output_w32(0x7a000000|offset);
1738}
1739void emit_jc(int a)
1740{
1741 assem_debug("bcs %x\n",a);
1742 u_int offset=genjmp(a);
1743 output_w32(0x2a000000|offset);
1744}
1745void emit_jcc(int a)
1746{
1747 assem_debug("bcc %x\n",a);
1748 u_int offset=genjmp(a);
1749 output_w32(0x3a000000|offset);
1750}
1751
1752void emit_pushimm(int imm)
1753{
1754 assem_debug("push $%x\n",imm);
1755 assert(0);
1756}
1757void emit_pusha()
1758{
1759 assem_debug("pusha\n");
1760 assert(0);
1761}
1762void emit_popa()
1763{
1764 assem_debug("popa\n");
1765 assert(0);
1766}
1767void emit_pushreg(u_int r)
1768{
1769 assem_debug("push %%%s\n",regname[r]);
1770 assert(0);
1771}
1772void emit_popreg(u_int r)
1773{
1774 assem_debug("pop %%%s\n",regname[r]);
1775 assert(0);
1776}
1777void emit_callreg(u_int r)
1778{
1779 assert(r<15);
1780 assem_debug("blx %s\n",regname[r]);
1781 output_w32(0xe12fff30|r);
1782}
1783void emit_jmpreg(u_int r)
1784{
1785 assem_debug("mov pc,%s\n",regname[r]);
1786 output_w32(0xe1a00000|rd_rn_rm(15,0,r));
1787}
1788
1789void emit_readword_indexed(int offset, int rs, int rt)
1790{
1791 assert(offset>-4096&&offset<4096);
1792 assem_debug("ldr %s,%s+%d\n",regname[rt],regname[rs],offset);
1793 if(offset>=0) {
1794 output_w32(0xe5900000|rd_rn_rm(rt,rs,0)|offset);
1795 }else{
1796 output_w32(0xe5100000|rd_rn_rm(rt,rs,0)|(-offset));
1797 }
1798}
1799void emit_readword_dualindexedx4(int rs1, int rs2, int rt)
1800{
1801 assem_debug("ldr %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1802 output_w32(0xe7900000|rd_rn_rm(rt,rs1,rs2)|0x100);
1803}
1804void emit_ldrcc_dualindexed(int rs1, int rs2, int rt)
1805{
1806 assem_debug("ldrcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1807 output_w32(0x37900000|rd_rn_rm(rt,rs1,rs2));
1808}
1809void emit_ldrccb_dualindexed(int rs1, int rs2, int rt)
1810{
1811 assem_debug("ldrccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1812 output_w32(0x37d00000|rd_rn_rm(rt,rs1,rs2));
1813}
1814void emit_ldrccsb_dualindexed(int rs1, int rs2, int rt)
1815{
1816 assem_debug("ldrccsb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1817 output_w32(0x319000d0|rd_rn_rm(rt,rs1,rs2));
1818}
1819void emit_ldrcch_dualindexed(int rs1, int rs2, int rt)
1820{
1821 assem_debug("ldrcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1822 output_w32(0x319000b0|rd_rn_rm(rt,rs1,rs2));
1823}
1824void emit_ldrccsh_dualindexed(int rs1, int rs2, int rt)
1825{
1826 assem_debug("ldrccsh %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1827 output_w32(0x319000f0|rd_rn_rm(rt,rs1,rs2));
1828}
1829void emit_readword_indexed_tlb(int addr, int rs, int map, int rt)
1830{
1831 if(map<0) emit_readword_indexed(addr, rs, rt);
1832 else {
1833 assert(addr==0);
1834 emit_readword_dualindexedx4(rs, map, rt);
1835 }
1836}
1837void emit_readdword_indexed_tlb(int addr, int rs, int map, int rh, int rl)
1838{
1839 if(map<0) {
1840 if(rh>=0) emit_readword_indexed(addr, rs, rh);
1841 emit_readword_indexed(addr+4, rs, rl);
1842 }else{
1843 assert(rh!=rs);
1844 if(rh>=0) emit_readword_indexed_tlb(addr, rs, map, rh);
1845 emit_addimm(map,1,map);
1846 emit_readword_indexed_tlb(addr, rs, map, rl);
1847 }
1848}
1849void emit_movsbl_indexed(int offset, int rs, int rt)
1850{
1851 assert(offset>-256&&offset<256);
1852 assem_debug("ldrsb %s,%s+%d\n",regname[rt],regname[rs],offset);
1853 if(offset>=0) {
1854 output_w32(0xe1d000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1855 }else{
1856 output_w32(0xe15000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1857 }
1858}
1859void emit_movsbl_indexed_tlb(int addr, int rs, int map, int rt)
1860{
1861 if(map<0) emit_movsbl_indexed(addr, rs, rt);
1862 else {
1863 if(addr==0) {
1864 emit_shlimm(map,2,map);
1865 assem_debug("ldrsb %s,%s+%s\n",regname[rt],regname[rs],regname[map]);
1866 output_w32(0xe19000d0|rd_rn_rm(rt,rs,map));
1867 }else{
1868 assert(addr>-256&&addr<256);
1869 assem_debug("add %s,%s,%s,lsl #2\n",regname[rt],regname[rs],regname[map]);
1870 output_w32(0xe0800000|rd_rn_rm(rt,rs,map)|(2<<7));
1871 emit_movsbl_indexed(addr, rt, rt);
1872 }
1873 }
1874}
1875void emit_movswl_indexed(int offset, int rs, int rt)
1876{
1877 assert(offset>-256&&offset<256);
1878 assem_debug("ldrsh %s,%s+%d\n",regname[rt],regname[rs],offset);
1879 if(offset>=0) {
1880 output_w32(0xe1d000f0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1881 }else{
1882 output_w32(0xe15000f0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1883 }
1884}
1885void emit_movzbl_indexed(int offset, int rs, int rt)
1886{
1887 assert(offset>-4096&&offset<4096);
1888 assem_debug("ldrb %s,%s+%d\n",regname[rt],regname[rs],offset);
1889 if(offset>=0) {
1890 output_w32(0xe5d00000|rd_rn_rm(rt,rs,0)|offset);
1891 }else{
1892 output_w32(0xe5500000|rd_rn_rm(rt,rs,0)|(-offset));
1893 }
1894}
1895void emit_movzbl_dualindexedx4(int rs1, int rs2, int rt)
1896{
1897 assem_debug("ldrb %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1898 output_w32(0xe7d00000|rd_rn_rm(rt,rs1,rs2)|0x100);
1899}
1900void emit_movzbl_indexed_tlb(int addr, int rs, int map, int rt)
1901{
1902 if(map<0) emit_movzbl_indexed(addr, rs, rt);
1903 else {
1904 if(addr==0) {
1905 emit_movzbl_dualindexedx4(rs, map, rt);
1906 }else{
1907 emit_addimm(rs,addr,rt);
1908 emit_movzbl_dualindexedx4(rt, map, rt);
1909 }
1910 }
1911}
1912void emit_movzwl_indexed(int offset, int rs, int rt)
1913{
1914 assert(offset>-256&&offset<256);
1915 assem_debug("ldrh %s,%s+%d\n",regname[rt],regname[rs],offset);
1916 if(offset>=0) {
1917 output_w32(0xe1d000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1918 }else{
1919 output_w32(0xe15000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1920 }
1921}
1922void emit_readword(int addr, int rt)
1923{
1924 u_int offset = addr-(u_int)&dynarec_local;
1925 assert(offset<4096);
1926 assem_debug("ldr %s,fp+%d\n",regname[rt],offset);
1927 output_w32(0xe5900000|rd_rn_rm(rt,FP,0)|offset);
1928}
1929void emit_movsbl(int addr, int rt)
1930{
1931 u_int offset = addr-(u_int)&dynarec_local;
1932 assert(offset<256);
1933 assem_debug("ldrsb %s,fp+%d\n",regname[rt],offset);
1934 output_w32(0xe1d000d0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1935}
1936void emit_movswl(int addr, int rt)
1937{
1938 u_int offset = addr-(u_int)&dynarec_local;
1939 assert(offset<256);
1940 assem_debug("ldrsh %s,fp+%d\n",regname[rt],offset);
1941 output_w32(0xe1d000f0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1942}
1943void emit_movzbl(int addr, int rt)
1944{
1945 u_int offset = addr-(u_int)&dynarec_local;
1946 assert(offset<4096);
1947 assem_debug("ldrb %s,fp+%d\n",regname[rt],offset);
1948 output_w32(0xe5d00000|rd_rn_rm(rt,FP,0)|offset);
1949}
1950void emit_movzwl(int addr, int rt)
1951{
1952 u_int offset = addr-(u_int)&dynarec_local;
1953 assert(offset<256);
1954 assem_debug("ldrh %s,fp+%d\n",regname[rt],offset);
1955 output_w32(0xe1d000b0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1956}
1957void emit_movzwl_reg(int rs, int rt)
1958{
1959 assem_debug("movzwl %%%s,%%%s\n",regname[rs]+1,regname[rt]);
1960 assert(0);
1961}
1962
1963void emit_xchg(int rs, int rt)
1964{
1965 assem_debug("xchg %%%s,%%%s\n",regname[rs],regname[rt]);
1966 assert(0);
1967}
1968void emit_writeword_indexed(int rt, int offset, int rs)
1969{
1970 assert(offset>-4096&&offset<4096);
1971 assem_debug("str %s,%s+%d\n",regname[rt],regname[rs],offset);
1972 if(offset>=0) {
1973 output_w32(0xe5800000|rd_rn_rm(rt,rs,0)|offset);
1974 }else{
1975 output_w32(0xe5000000|rd_rn_rm(rt,rs,0)|(-offset));
1976 }
1977}
1978void emit_writeword_dualindexedx4(int rt, int rs1, int rs2)
1979{
1980 assem_debug("str %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1981 output_w32(0xe7800000|rd_rn_rm(rt,rs1,rs2)|0x100);
1982}
1983void emit_writeword_indexed_tlb(int rt, int addr, int rs, int map, int temp)
1984{
1985 if(map<0) emit_writeword_indexed(rt, addr, rs);
1986 else {
1987 assert(addr==0);
1988 emit_writeword_dualindexedx4(rt, rs, map);
1989 }
1990}
1991void emit_writedword_indexed_tlb(int rh, int rl, int addr, int rs, int map, int temp)
1992{
1993 if(map<0) {
1994 if(rh>=0) emit_writeword_indexed(rh, addr, rs);
1995 emit_writeword_indexed(rl, addr+4, rs);
1996 }else{
1997 assert(rh>=0);
1998 if(temp!=rs) emit_addimm(map,1,temp);
1999 emit_writeword_indexed_tlb(rh, addr, rs, map, temp);
2000 if(temp!=rs) emit_writeword_indexed_tlb(rl, addr, rs, temp, temp);
2001 else {
2002 emit_addimm(rs,4,rs);
2003 emit_writeword_indexed_tlb(rl, addr, rs, map, temp);
2004 }
2005 }
2006}
2007void emit_writehword_indexed(int rt, int offset, int rs)
2008{
2009 assert(offset>-256&&offset<256);
2010 assem_debug("strh %s,%s+%d\n",regname[rt],regname[rs],offset);
2011 if(offset>=0) {
2012 output_w32(0xe1c000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
2013 }else{
2014 output_w32(0xe14000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
2015 }
2016}
2017void emit_writebyte_indexed(int rt, int offset, int rs)
2018{
2019 assert(offset>-4096&&offset<4096);
2020 assem_debug("strb %s,%s+%d\n",regname[rt],regname[rs],offset);
2021 if(offset>=0) {
2022 output_w32(0xe5c00000|rd_rn_rm(rt,rs,0)|offset);
2023 }else{
2024 output_w32(0xe5400000|rd_rn_rm(rt,rs,0)|(-offset));
2025 }
2026}
2027void emit_writebyte_dualindexedx4(int rt, int rs1, int rs2)
2028{
2029 assem_debug("strb %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
2030 output_w32(0xe7c00000|rd_rn_rm(rt,rs1,rs2)|0x100);
2031}
2032void emit_writebyte_indexed_tlb(int rt, int addr, int rs, int map, int temp)
2033{
2034 if(map<0) emit_writebyte_indexed(rt, addr, rs);
2035 else {
2036 if(addr==0) {
2037 emit_writebyte_dualindexedx4(rt, rs, map);
2038 }else{
2039 emit_addimm(rs,addr,temp);
2040 emit_writebyte_dualindexedx4(rt, temp, map);
2041 }
2042 }
2043}
2044void emit_writeword(int rt, int addr)
2045{
2046 u_int offset = addr-(u_int)&dynarec_local;
2047 assert(offset<4096);
2048 assem_debug("str %s,fp+%d\n",regname[rt],offset);
2049 output_w32(0xe5800000|rd_rn_rm(rt,FP,0)|offset);
2050}
2051void emit_writehword(int rt, int addr)
2052{
2053 u_int offset = addr-(u_int)&dynarec_local;
2054 assert(offset<256);
2055 assem_debug("strh %s,fp+%d\n",regname[rt],offset);
2056 output_w32(0xe1c000b0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
2057}
2058void emit_writebyte(int rt, int addr)
2059{
2060 u_int offset = addr-(u_int)&dynarec_local;
2061 assert(offset<4096);
2062 assem_debug("strb %s,fp+%d\n",regname[rt],offset);
2063 output_w32(0xe5c00000|rd_rn_rm(rt,FP,0)|offset);
2064}
2065void emit_writeword_imm(int imm, int addr)
2066{
2067 assem_debug("movl $%x,%x\n",imm,addr);
2068 assert(0);
2069}
2070void emit_writebyte_imm(int imm, int addr)
2071{
2072 assem_debug("movb $%x,%x\n",imm,addr);
2073 assert(0);
2074}
2075
2076void emit_mul(int rs)
2077{
2078 assem_debug("mul %%%s\n",regname[rs]);
2079 assert(0);
2080}
2081void emit_imul(int rs)
2082{
2083 assem_debug("imul %%%s\n",regname[rs]);
2084 assert(0);
2085}
2086void emit_umull(u_int rs1, u_int rs2, u_int hi, u_int lo)
2087{
2088 assem_debug("umull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
2089 assert(rs1<16);
2090 assert(rs2<16);
2091 assert(hi<16);
2092 assert(lo<16);
2093 output_w32(0xe0800090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
2094}
2095void emit_smull(u_int rs1, u_int rs2, u_int hi, u_int lo)
2096{
2097 assem_debug("smull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
2098 assert(rs1<16);
2099 assert(rs2<16);
2100 assert(hi<16);
2101 assert(lo<16);
2102 output_w32(0xe0c00090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
2103}
2104
2105void emit_div(int rs)
2106{
2107 assem_debug("div %%%s\n",regname[rs]);
2108 assert(0);
2109}
2110void emit_idiv(int rs)
2111{
2112 assem_debug("idiv %%%s\n",regname[rs]);
2113 assert(0);
2114}
2115void emit_cdq()
2116{
2117 assem_debug("cdq\n");
2118 assert(0);
2119}
2120
2121void emit_clz(int rs,int rt)
2122{
2123 assem_debug("clz %s,%s\n",regname[rt],regname[rs]);
2124 output_w32(0xe16f0f10|rd_rn_rm(rt,0,rs));
2125}
2126
2127void emit_subcs(int rs1,int rs2,int rt)
2128{
2129 assem_debug("subcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2130 output_w32(0x20400000|rd_rn_rm(rt,rs1,rs2));
2131}
2132
2133void emit_shrcc_imm(int rs,u_int imm,int rt)
2134{
2135 assert(imm>0);
2136 assert(imm<32);
2137 assem_debug("lsrcc %s,%s,#%d\n",regname[rt],regname[rs],imm);
2138 output_w32(0x31a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
2139}
2140
2141void emit_negmi(int rs, int rt)
2142{
2143 assem_debug("rsbmi %s,%s,#0\n",regname[rt],regname[rs]);
2144 output_w32(0x42600000|rd_rn_rm(rt,rs,0));
2145}
2146
2147void emit_negsmi(int rs, int rt)
2148{
2149 assem_debug("rsbsmi %s,%s,#0\n",regname[rt],regname[rs]);
2150 output_w32(0x42700000|rd_rn_rm(rt,rs,0));
2151}
2152
2153void emit_orreq(u_int rs1,u_int rs2,u_int rt)
2154{
2155 assem_debug("orreq %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2156 output_w32(0x01800000|rd_rn_rm(rt,rs1,rs2));
2157}
2158
2159void emit_orrne(u_int rs1,u_int rs2,u_int rt)
2160{
2161 assem_debug("orrne %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2162 output_w32(0x11800000|rd_rn_rm(rt,rs1,rs2));
2163}
2164
2165void emit_bic_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2166{
2167 assem_debug("bic %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2168 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2169}
2170
2171void emit_biceq_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2172{
2173 assem_debug("biceq %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2174 output_w32(0x01C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2175}
2176
2177void emit_bicne_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2178{
2179 assem_debug("bicne %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2180 output_w32(0x11C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2181}
2182
2183void emit_bic_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2184{
2185 assem_debug("bic %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2186 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2187}
2188
2189void emit_biceq_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2190{
2191 assem_debug("biceq %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2192 output_w32(0x01C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2193}
2194
2195void emit_bicne_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2196{
2197 assem_debug("bicne %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2198 output_w32(0x11C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2199}
2200
2201void emit_teq(int rs, int rt)
2202{
2203 assem_debug("teq %s,%s\n",regname[rs],regname[rt]);
2204 output_w32(0xe1300000|rd_rn_rm(0,rs,rt));
2205}
2206
2207void emit_rsbimm(int rs, int imm, int rt)
2208{
2209 u_int armval;
2210 genimm_checked(imm,&armval);
2211 assem_debug("rsb %s,%s,#%d\n",regname[rt],regname[rs],imm);
2212 output_w32(0xe2600000|rd_rn_rm(rt,rs,0)|armval);
2213}
2214
2215// Load 2 immediates optimizing for small code size
2216void emit_mov2imm_compact(int imm1,u_int rt1,int imm2,u_int rt2)
2217{
2218 emit_movimm(imm1,rt1);
2219 u_int armval;
2220 if(genimm(imm2-imm1,&armval)) {
2221 assem_debug("add %s,%s,#%d\n",regname[rt2],regname[rt1],imm2-imm1);
2222 output_w32(0xe2800000|rd_rn_rm(rt2,rt1,0)|armval);
2223 }else if(genimm(imm1-imm2,&armval)) {
2224 assem_debug("sub %s,%s,#%d\n",regname[rt2],regname[rt1],imm1-imm2);
2225 output_w32(0xe2400000|rd_rn_rm(rt2,rt1,0)|armval);
2226 }
2227 else emit_movimm(imm2,rt2);
2228}
2229
2230// Conditionally select one of two immediates, optimizing for small code size
2231// This will only be called if HAVE_CMOV_IMM is defined
2232void emit_cmov2imm_e_ne_compact(int imm1,int imm2,u_int rt)
2233{
2234 u_int armval;
2235 if(genimm(imm2-imm1,&armval)) {
2236 emit_movimm(imm1,rt);
2237 assem_debug("addne %s,%s,#%d\n",regname[rt],regname[rt],imm2-imm1);
2238 output_w32(0x12800000|rd_rn_rm(rt,rt,0)|armval);
2239 }else if(genimm(imm1-imm2,&armval)) {
2240 emit_movimm(imm1,rt);
2241 assem_debug("subne %s,%s,#%d\n",regname[rt],regname[rt],imm1-imm2);
2242 output_w32(0x12400000|rd_rn_rm(rt,rt,0)|armval);
2243 }
2244 else {
2245 #ifdef ARMv5_ONLY
2246 emit_movimm(imm1,rt);
2247 add_literal((int)out,imm2);
2248 assem_debug("ldrne %s,pc+? [=%x]\n",regname[rt],imm2);
2249 output_w32(0x15900000|rd_rn_rm(rt,15,0));
2250 #else
2251 emit_movw(imm1&0x0000FFFF,rt);
2252 if((imm1&0xFFFF)!=(imm2&0xFFFF)) {
2253 assem_debug("movwne %s,#%d (0x%x)\n",regname[rt],imm2&0xFFFF,imm2&0xFFFF);
2254 output_w32(0x13000000|rd_rn_rm(rt,0,0)|(imm2&0xfff)|((imm2<<4)&0xf0000));
2255 }
2256 emit_movt(imm1&0xFFFF0000,rt);
2257 if((imm1&0xFFFF0000)!=(imm2&0xFFFF0000)) {
2258 assem_debug("movtne %s,#%d (0x%x)\n",regname[rt],imm2&0xffff0000,imm2&0xffff0000);
2259 output_w32(0x13400000|rd_rn_rm(rt,0,0)|((imm2>>16)&0xfff)|((imm2>>12)&0xf0000));
2260 }
2261 #endif
2262 }
2263}
2264
2265// special case for checking invalid_code
2266void emit_cmpmem_indexedsr12_imm(int addr,int r,int imm)
2267{
2268 assert(0);
2269}
2270
2271// special case for checking invalid_code
2272void emit_cmpmem_indexedsr12_reg(int base,int r,int imm)
2273{
2274 assert(imm<128&&imm>=0);
2275 assert(r>=0&&r<16);
2276 assem_debug("ldrb lr,%s,%s lsr #12\n",regname[base],regname[r]);
2277 output_w32(0xe7d00000|rd_rn_rm(HOST_TEMPREG,base,r)|0x620);
2278 emit_cmpimm(HOST_TEMPREG,imm);
2279}
2280
2281// special case for tlb mapping
2282void emit_addsr12(int rs1,int rs2,int rt)
2283{
2284 assem_debug("add %s,%s,%s lsr #12\n",regname[rt],regname[rs1],regname[rs2]);
2285 output_w32(0xe0800620|rd_rn_rm(rt,rs1,rs2));
2286}
2287
2288void emit_callne(int a)
2289{
2290 assem_debug("blne %x\n",a);
2291 u_int offset=genjmp(a);
2292 output_w32(0x1b000000|offset);
2293}
2294
2295// Used to preload hash table entries
2296void emit_prefetch(void *addr)
2297{
2298 assem_debug("prefetch %x\n",(int)addr);
2299 output_byte(0x0F);
2300 output_byte(0x18);
2301 output_modrm(0,5,1);
2302 output_w32((int)addr);
2303}
2304void emit_prefetchreg(int r)
2305{
2306 assem_debug("pld %s\n",regname[r]);
2307 output_w32(0xf5d0f000|rd_rn_rm(0,r,0));
2308}
2309
2310// Special case for mini_ht
2311void emit_ldreq_indexed(int rs, u_int offset, int rt)
2312{
2313 assert(offset<4096);
2314 assem_debug("ldreq %s,[%s, #%d]\n",regname[rt],regname[rs],offset);
2315 output_w32(0x05900000|rd_rn_rm(rt,rs,0)|offset);
2316}
2317
2318void emit_flds(int r,int sr)
2319{
2320 assem_debug("flds s%d,[%s]\n",sr,regname[r]);
2321 output_w32(0xed900a00|((sr&14)<<11)|((sr&1)<<22)|(r<<16));
2322}
2323
2324void emit_vldr(int r,int vr)
2325{
2326 assem_debug("vldr d%d,[%s]\n",vr,regname[r]);
2327 output_w32(0xed900b00|(vr<<12)|(r<<16));
2328}
2329
2330void emit_fsts(int sr,int r)
2331{
2332 assem_debug("fsts s%d,[%s]\n",sr,regname[r]);
2333 output_w32(0xed800a00|((sr&14)<<11)|((sr&1)<<22)|(r<<16));
2334}
2335
2336void emit_vstr(int vr,int r)
2337{
2338 assem_debug("vstr d%d,[%s]\n",vr,regname[r]);
2339 output_w32(0xed800b00|(vr<<12)|(r<<16));
2340}
2341
2342void emit_ftosizs(int s,int d)
2343{
2344 assem_debug("ftosizs s%d,s%d\n",d,s);
2345 output_w32(0xeebd0ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2346}
2347
2348void emit_ftosizd(int s,int d)
2349{
2350 assem_debug("ftosizd s%d,d%d\n",d,s);
2351 output_w32(0xeebd0bc0|((d&14)<<11)|((d&1)<<22)|(s&7));
2352}
2353
2354void emit_fsitos(int s,int d)
2355{
2356 assem_debug("fsitos s%d,s%d\n",d,s);
2357 output_w32(0xeeb80ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2358}
2359
2360void emit_fsitod(int s,int d)
2361{
2362 assem_debug("fsitod d%d,s%d\n",d,s);
2363 output_w32(0xeeb80bc0|((d&7)<<12)|((s&14)>>1)|((s&1)<<5));
2364}
2365
2366void emit_fcvtds(int s,int d)
2367{
2368 assem_debug("fcvtds d%d,s%d\n",d,s);
2369 output_w32(0xeeb70ac0|((d&7)<<12)|((s&14)>>1)|((s&1)<<5));
2370}
2371
2372void emit_fcvtsd(int s,int d)
2373{
2374 assem_debug("fcvtsd s%d,d%d\n",d,s);
2375 output_w32(0xeeb70bc0|((d&14)<<11)|((d&1)<<22)|(s&7));
2376}
2377
2378void emit_fsqrts(int s,int d)
2379{
2380 assem_debug("fsqrts d%d,s%d\n",d,s);
2381 output_w32(0xeeb10ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2382}
2383
2384void emit_fsqrtd(int s,int d)
2385{
2386 assem_debug("fsqrtd s%d,d%d\n",d,s);
2387 output_w32(0xeeb10bc0|((d&7)<<12)|(s&7));
2388}
2389
2390void emit_fabss(int s,int d)
2391{
2392 assem_debug("fabss d%d,s%d\n",d,s);
2393 output_w32(0xeeb00ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2394}
2395
2396void emit_fabsd(int s,int d)
2397{
2398 assem_debug("fabsd s%d,d%d\n",d,s);
2399 output_w32(0xeeb00bc0|((d&7)<<12)|(s&7));
2400}
2401
2402void emit_fnegs(int s,int d)
2403{
2404 assem_debug("fnegs d%d,s%d\n",d,s);
2405 output_w32(0xeeb10a40|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2406}
2407
2408void emit_fnegd(int s,int d)
2409{
2410 assem_debug("fnegd s%d,d%d\n",d,s);
2411 output_w32(0xeeb10b40|((d&7)<<12)|(s&7));
2412}
2413
2414void emit_fadds(int s1,int s2,int d)
2415{
2416 assem_debug("fadds s%d,s%d,s%d\n",d,s1,s2);
2417 output_w32(0xee300a00|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2418}
2419
2420void emit_faddd(int s1,int s2,int d)
2421{
2422 assem_debug("faddd d%d,d%d,d%d\n",d,s1,s2);
2423 output_w32(0xee300b00|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2424}
2425
2426void emit_fsubs(int s1,int s2,int d)
2427{
2428 assem_debug("fsubs s%d,s%d,s%d\n",d,s1,s2);
2429 output_w32(0xee300a40|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2430}
2431
2432void emit_fsubd(int s1,int s2,int d)
2433{
2434 assem_debug("fsubd d%d,d%d,d%d\n",d,s1,s2);
2435 output_w32(0xee300b40|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2436}
2437
2438void emit_fmuls(int s1,int s2,int d)
2439{
2440 assem_debug("fmuls s%d,s%d,s%d\n",d,s1,s2);
2441 output_w32(0xee200a00|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2442}
2443
2444void emit_fmuld(int s1,int s2,int d)
2445{
2446 assem_debug("fmuld d%d,d%d,d%d\n",d,s1,s2);
2447 output_w32(0xee200b00|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2448}
2449
2450void emit_fdivs(int s1,int s2,int d)
2451{
2452 assem_debug("fdivs s%d,s%d,s%d\n",d,s1,s2);
2453 output_w32(0xee800a00|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2454}
2455
2456void emit_fdivd(int s1,int s2,int d)
2457{
2458 assem_debug("fdivd d%d,d%d,d%d\n",d,s1,s2);
2459 output_w32(0xee800b00|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2460}
2461
2462void emit_fcmps(int x,int y)
2463{
2464 assem_debug("fcmps s14, s15\n");
2465 output_w32(0xeeb47a67);
2466}
2467
2468void emit_fcmpd(int x,int y)
2469{
2470 assem_debug("fcmpd d6, d7\n");
2471 output_w32(0xeeb46b47);
2472}
2473
2474void emit_fmstat()
2475{
2476 assem_debug("fmstat\n");
2477 output_w32(0xeef1fa10);
2478}
2479
2480void emit_bicne_imm(int rs,int imm,int rt)
2481{
2482 u_int armval;
2483 genimm_checked(imm,&armval);
2484 assem_debug("bicne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2485 output_w32(0x13c00000|rd_rn_rm(rt,rs,0)|armval);
2486}
2487
2488void emit_biccs_imm(int rs,int imm,int rt)
2489{
2490 u_int armval;
2491 genimm_checked(imm,&armval);
2492 assem_debug("biccs %s,%s,#%d\n",regname[rt],regname[rs],imm);
2493 output_w32(0x23c00000|rd_rn_rm(rt,rs,0)|armval);
2494}
2495
2496void emit_bicvc_imm(int rs,int imm,int rt)
2497{
2498 u_int armval;
2499 genimm_checked(imm,&armval);
2500 assem_debug("bicvc %s,%s,#%d\n",regname[rt],regname[rs],imm);
2501 output_w32(0x73c00000|rd_rn_rm(rt,rs,0)|armval);
2502}
2503
2504void emit_bichi_imm(int rs,int imm,int rt)
2505{
2506 u_int armval;
2507 genimm_checked(imm,&armval);
2508 assem_debug("bichi %s,%s,#%d\n",regname[rt],regname[rs],imm);
2509 output_w32(0x83c00000|rd_rn_rm(rt,rs,0)|armval);
2510}
2511
2512void emit_orrvs_imm(int rs,int imm,int rt)
2513{
2514 u_int armval;
2515 genimm_checked(imm,&armval);
2516 assem_debug("orrvs %s,%s,#%d\n",regname[rt],regname[rs],imm);
2517 output_w32(0x63800000|rd_rn_rm(rt,rs,0)|armval);
2518}
2519
2520void emit_orrne_imm(int rs,int imm,int rt)
2521{
2522 u_int armval;
2523 genimm_checked(imm,&armval);
2524 assem_debug("orrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2525 output_w32(0x13800000|rd_rn_rm(rt,rs,0)|armval);
2526}
2527
2528void emit_andne_imm(int rs,int imm,int rt)
2529{
2530 u_int armval;
2531 genimm_checked(imm,&armval);
2532 assem_debug("andne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2533 output_w32(0x12000000|rd_rn_rm(rt,rs,0)|armval);
2534}
2535
2536void emit_jno_unlikely(int a)
2537{
2538 //emit_jno(a);
2539 assem_debug("addvc pc,pc,#? (%x)\n",/*a-(int)out-8,*/a);
2540 output_w32(0x72800000|rd_rn_rm(15,15,0));
2541}
2542
2543// Save registers before function call
2544void save_regs(u_int reglist)
2545{
2546 reglist&=0x100f; // only save the caller-save registers, r0-r3, r12
2547 if(!reglist) return;
2548 assem_debug("stmia fp,{");
2549 if(reglist&1) assem_debug("r0, ");
2550 if(reglist&2) assem_debug("r1, ");
2551 if(reglist&4) assem_debug("r2, ");
2552 if(reglist&8) assem_debug("r3, ");
2553 if(reglist&0x1000) assem_debug("r12");
2554 assem_debug("}\n");
2555 output_w32(0xe88b0000|reglist);
2556}
2557// Restore registers after function call
2558void restore_regs(u_int reglist)
2559{
2560 reglist&=0x100f; // only restore the caller-save registers, r0-r3, r12
2561 if(!reglist) return;
2562 assem_debug("ldmia fp,{");
2563 if(reglist&1) assem_debug("r0, ");
2564 if(reglist&2) assem_debug("r1, ");
2565 if(reglist&4) assem_debug("r2, ");
2566 if(reglist&8) assem_debug("r3, ");
2567 if(reglist&0x1000) assem_debug("r12");
2568 assem_debug("}\n");
2569 output_w32(0xe89b0000|reglist);
2570}
2571
2572// Write back consts using r14 so we don't disturb the other registers
2573void wb_consts(signed char i_regmap[],uint64_t i_is32,u_int i_dirty,int i)
2574{
2575 int hr;
2576 for(hr=0;hr<HOST_REGS;hr++) {
2577 if(hr!=EXCLUDE_REG&&i_regmap[hr]>=0&&((i_dirty>>hr)&1)) {
2578 if(((regs[i].isconst>>hr)&1)&&i_regmap[hr]>0) {
2579 if(i_regmap[hr]<64 || !((i_is32>>(i_regmap[hr]&63))&1) ) {
2580 int value=constmap[i][hr];
2581 if(value==0) {
2582 emit_zeroreg(HOST_TEMPREG);
2583 }
2584 else {
2585 emit_movimm(value,HOST_TEMPREG);
2586 }
2587 emit_storereg(i_regmap[hr],HOST_TEMPREG);
2588#ifndef FORCE32
2589 if((i_is32>>i_regmap[hr])&1) {
2590 if(value!=-1&&value!=0) emit_sarimm(HOST_TEMPREG,31,HOST_TEMPREG);
2591 emit_storereg(i_regmap[hr]|64,HOST_TEMPREG);
2592 }
2593#endif
2594 }
2595 }
2596 }
2597 }
2598}
2599
2600/* Stubs/epilogue */
2601
2602void literal_pool(int n)
2603{
2604 if(!literalcount) return;
2605 if(n) {
2606 if((int)out-literals[0][0]<4096-n) return;
2607 }
2608 u_int *ptr;
2609 int i;
2610 for(i=0;i<literalcount;i++)
2611 {
2612 ptr=(u_int *)literals[i][0];
2613 u_int offset=(u_int)out-(u_int)ptr-8;
2614 assert(offset<4096);
2615 assert(!(offset&3));
2616 *ptr|=offset;
2617 output_w32(literals[i][1]);
2618 }
2619 literalcount=0;
2620}
2621
2622void literal_pool_jumpover(int n)
2623{
2624 if(!literalcount) return;
2625 if(n) {
2626 if((int)out-literals[0][0]<4096-n) return;
2627 }
2628 int jaddr=(int)out;
2629 emit_jmp(0);
2630 literal_pool(0);
2631 set_jump_target(jaddr,(int)out);
2632}
2633
2634emit_extjump2(int addr, int target, int linker)
2635{
2636 u_char *ptr=(u_char *)addr;
2637 assert((ptr[3]&0x0e)==0xa);
2638 emit_loadlp(target,0);
2639 emit_loadlp(addr,1);
2640 assert(addr>=BASE_ADDR&&addr<(BASE_ADDR+(1<<TARGET_SIZE_2)));
2641 //assert((target>=0x80000000&&target<0x80800000)||(target>0xA4000000&&target<0xA4001000));
2642//DEBUG >
2643#ifdef DEBUG_CYCLE_COUNT
2644 emit_readword((int)&last_count,ECX);
2645 emit_add(HOST_CCREG,ECX,HOST_CCREG);
2646 emit_readword((int)&next_interupt,ECX);
2647 emit_writeword(HOST_CCREG,(int)&Count);
2648 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
2649 emit_writeword(ECX,(int)&last_count);
2650#endif
2651//DEBUG <
2652 emit_jmp(linker);
2653}
2654
2655emit_extjump(int addr, int target)
2656{
2657 emit_extjump2(addr, target, (int)dyna_linker);
2658}
2659emit_extjump_ds(int addr, int target)
2660{
2661 emit_extjump2(addr, target, (int)dyna_linker_ds);
2662}
2663
2664#ifdef PCSX
2665#include "pcsxmem_inline.c"
2666#endif
2667
2668do_readstub(int n)
2669{
2670 assem_debug("do_readstub %x\n",start+stubs[n][3]*4);
2671 literal_pool(256);
2672 set_jump_target(stubs[n][1],(int)out);
2673 int type=stubs[n][0];
2674 int i=stubs[n][3];
2675 int rs=stubs[n][4];
2676 struct regstat *i_regs=(struct regstat *)stubs[n][5];
2677 u_int reglist=stubs[n][7];
2678 signed char *i_regmap=i_regs->regmap;
2679 int addr=get_reg(i_regmap,AGEN1+(i&1));
2680 int rth,rt;
2681 int ds;
2682 if(itype[i]==C1LS||itype[i]==C2LS||itype[i]==LOADLR) {
2683 rth=get_reg(i_regmap,FTEMP|64);
2684 rt=get_reg(i_regmap,FTEMP);
2685 }else{
2686 rth=get_reg(i_regmap,rt1[i]|64);
2687 rt=get_reg(i_regmap,rt1[i]);
2688 }
2689 assert(rs>=0);
2690#ifdef PCSX
2691 int r,temp=-1,temp2=HOST_TEMPREG,regs_saved=0,restore_jump=0;
2692 reglist|=(1<<rs);
2693 for(r=0;r<=12;r++) {
2694 if(((1<<r)&0x13ff)&&((1<<r)&reglist)==0) {
2695 temp=r; break;
2696 }
2697 }
2698 if(rt>=0)
2699 reglist&=~(1<<rt);
2700 if(temp==-1) {
2701 save_regs(reglist);
2702 regs_saved=1;
2703 temp=(rs==0)?2:0;
2704 }
2705 if((regs_saved||(reglist&2)==0)&&temp!=1&&rs!=1)
2706 temp2=1;
2707 emit_readword((int)&mem_rtab,temp);
2708 emit_shrimm(rs,12,temp2);
2709 emit_readword_dualindexedx4(temp,temp2,temp2);
2710 emit_lsls_imm(temp2,1,temp2);
2711 if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) {
2712 switch(type) {
2713 case LOADB_STUB: emit_ldrccsb_dualindexed(temp2,rs,rt); break;
2714 case LOADBU_STUB: emit_ldrccb_dualindexed(temp2,rs,rt); break;
2715 case LOADH_STUB: emit_ldrccsh_dualindexed(temp2,rs,rt); break;
2716 case LOADHU_STUB: emit_ldrcch_dualindexed(temp2,rs,rt); break;
2717 case LOADW_STUB: emit_ldrcc_dualindexed(temp2,rs,rt); break;
2718 }
2719 }
2720 if(regs_saved) {
2721 restore_jump=(int)out;
2722 emit_jcc(0); // jump to reg restore
2723 }
2724 else
2725 emit_jcc(stubs[n][2]); // return address
2726
2727 if(!regs_saved)
2728 save_regs(reglist);
2729 int handler=0;
2730 if(type==LOADB_STUB||type==LOADBU_STUB)
2731 handler=(int)jump_handler_read8;
2732 if(type==LOADH_STUB||type==LOADHU_STUB)
2733 handler=(int)jump_handler_read16;
2734 if(type==LOADW_STUB)
2735 handler=(int)jump_handler_read32;
2736 assert(handler!=0);
2737 if(rs!=0)
2738 emit_mov(rs,0);
2739 if(temp2!=1)
2740 emit_mov(temp2,1);
2741 int cc=get_reg(i_regmap,CCREG);
2742 if(cc<0)
2743 emit_loadreg(CCREG,2);
2744 emit_addimm(cc<0?2:cc,CLOCK_DIVIDER*stubs[n][6]+2,2);
2745 emit_call(handler);
2746 if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) {
2747 switch(type) {
2748 case LOADB_STUB: emit_signextend8(0,rt); break;
2749 case LOADBU_STUB: emit_andimm(0,0xff,rt); break;
2750 case LOADH_STUB: emit_signextend16(0,rt); break;
2751 case LOADHU_STUB: emit_andimm(0,0xffff,rt); break;
2752 case LOADW_STUB: if(rt!=0) emit_mov(0,rt); break;
2753 }
2754 }
2755 if(restore_jump)
2756 set_jump_target(restore_jump,(int)out);
2757 restore_regs(reglist);
2758 emit_jmp(stubs[n][2]); // return address
2759#else // !PCSX
2760 if(addr<0) addr=rt;
2761 if(addr<0&&itype[i]!=C1LS&&itype[i]!=C2LS&&itype[i]!=LOADLR) addr=get_reg(i_regmap,-1);
2762 assert(addr>=0);
2763 int ftable=0;
2764 if(type==LOADB_STUB||type==LOADBU_STUB)
2765 ftable=(int)readmemb;
2766 if(type==LOADH_STUB||type==LOADHU_STUB)
2767 ftable=(int)readmemh;
2768 if(type==LOADW_STUB)
2769 ftable=(int)readmem;
2770#ifndef FORCE32
2771 if(type==LOADD_STUB)
2772 ftable=(int)readmemd;
2773#endif
2774 assert(ftable!=0);
2775 emit_writeword(rs,(int)&address);
2776 //emit_pusha();
2777 save_regs(reglist);
2778#ifndef PCSX
2779 ds=i_regs!=&regs[i];
2780 int real_rs=(itype[i]==LOADLR)?-1:get_reg(i_regmap,rs1[i]);
2781 u_int cmask=ds?-1:(0x100f|~i_regs->wasconst);
2782 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&0x100f,i);
2783 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty&cmask&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs)));
2784 if(!ds) wb_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&~0x100f,i);
2785#endif
2786 emit_shrimm(rs,16,1);
2787 int cc=get_reg(i_regmap,CCREG);
2788 if(cc<0) {
2789 emit_loadreg(CCREG,2);
2790 }
2791 emit_movimm(ftable,0);
2792 emit_addimm(cc<0?2:cc,2*stubs[n][6]+2,2);
2793#ifndef PCSX
2794 emit_movimm(start+stubs[n][3]*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
2795#endif
2796 //emit_readword((int)&last_count,temp);
2797 //emit_add(cc,temp,cc);
2798 //emit_writeword(cc,(int)&Count);
2799 //emit_mov(15,14);
2800 emit_call((int)&indirect_jump_indexed);
2801 //emit_callreg(rs);
2802 //emit_readword_dualindexedx4(rs,HOST_TEMPREG,15);
2803#ifndef PCSX
2804 // We really shouldn't need to update the count here,
2805 // but not doing so causes random crashes...
2806 emit_readword((int)&Count,HOST_TEMPREG);
2807 emit_readword((int)&next_interupt,2);
2808 emit_addimm(HOST_TEMPREG,-2*stubs[n][6]-2,HOST_TEMPREG);
2809 emit_writeword(2,(int)&last_count);
2810 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2811 if(cc<0) {
2812 emit_storereg(CCREG,HOST_TEMPREG);
2813 }
2814#endif
2815 //emit_popa();
2816 restore_regs(reglist);
2817 //if((cc=get_reg(regmap,CCREG))>=0) {
2818 // emit_loadreg(CCREG,cc);
2819 //}
2820 if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) {
2821 assert(rt>=0);
2822 if(type==LOADB_STUB)
2823 emit_movsbl((int)&readmem_dword,rt);
2824 if(type==LOADBU_STUB)
2825 emit_movzbl((int)&readmem_dword,rt);
2826 if(type==LOADH_STUB)
2827 emit_movswl((int)&readmem_dword,rt);
2828 if(type==LOADHU_STUB)
2829 emit_movzwl((int)&readmem_dword,rt);
2830 if(type==LOADW_STUB)
2831 emit_readword((int)&readmem_dword,rt);
2832 if(type==LOADD_STUB) {
2833 emit_readword((int)&readmem_dword,rt);
2834 if(rth>=0) emit_readword(((int)&readmem_dword)+4,rth);
2835 }
2836 }
2837 emit_jmp(stubs[n][2]); // return address
2838#endif // !PCSX
2839}
2840
2841#ifdef PCSX
2842// return memhandler, or get directly accessable address and return 0
2843u_int get_direct_memhandler(void *table,u_int addr,int type,u_int *addr_host)
2844{
2845 u_int l1,l2=0;
2846 l1=((u_int *)table)[addr>>12];
2847 if((l1&(1<<31))==0) {
2848 u_int v=l1<<1;
2849 *addr_host=v+addr;
2850 return 0;
2851 }
2852 else {
2853 l1<<=1;
2854 if(type==LOADB_STUB||type==LOADBU_STUB||type==STOREB_STUB)
2855 l2=((u_int *)l1)[0x1000/4 + 0x1000/2 + (addr&0xfff)];
2856 else if(type==LOADH_STUB||type==LOADHU_STUB||type==STOREW_STUB)
2857 l2=((u_int *)l1)[0x1000/4 + (addr&0xfff)/2];
2858 else
2859 l2=((u_int *)l1)[(addr&0xfff)/4];
2860 if((l2&(1<<31))==0) {
2861 u_int v=l2<<1;
2862 *addr_host=v+(addr&0xfff);
2863 return 0;
2864 }
2865 return l2<<1;
2866 }
2867}
2868#endif
2869
2870inline_readstub(int type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
2871{
2872 int rs=get_reg(regmap,target);
2873 int rth=get_reg(regmap,target|64);
2874 int rt=get_reg(regmap,target);
2875 if(rs<0) rs=get_reg(regmap,-1);
2876 assert(rs>=0);
2877#ifdef PCSX
2878 u_int handler,host_addr=0;
2879 if(pcsx_direct_read(type,addr,target?rs:-1,rt))
2880 return;
2881 handler=get_direct_memhandler(mem_rtab,addr,type,&host_addr);
2882 if (handler==0) {
2883 if(rt<0)
2884 return;
2885 if(target==0||addr!=host_addr)
2886 emit_movimm(host_addr,rs);
2887 switch(type) {
2888 case LOADB_STUB: emit_movsbl_indexed(0,rs,rt); break;
2889 case LOADBU_STUB: emit_movzbl_indexed(0,rs,rt); break;
2890 case LOADH_STUB: emit_movswl_indexed(0,rs,rt); break;
2891 case LOADHU_STUB: emit_movzwl_indexed(0,rs,rt); break;
2892 case LOADW_STUB: emit_readword_indexed(0,rs,rt); break;
2893 default: assert(0);
2894 }
2895 return;
2896 }
2897
2898 // call a memhandler
2899 if(rt>=0)
2900 reglist&=~(1<<rt);
2901 save_regs(reglist);
2902 if(target==0)
2903 emit_movimm(addr,0);
2904 else if(rs!=0)
2905 emit_mov(rs,0);
2906 int cc=get_reg(regmap,CCREG);
2907 if(cc<0)
2908 emit_loadreg(CCREG,2);
2909 emit_readword((int)&last_count,3);
2910 emit_addimm(cc<0?2:cc,CLOCK_DIVIDER*(adj+1),2);
2911 emit_add(2,3,3);
2912 emit_writeword(3,(int)&Count);
2913
2914 int offset=(int)handler-(int)out-8;
2915 if(offset<-33554432||offset>=33554432) {
2916 // unreachable memhandler, a plugin func perhaps
2917 emit_movimm(handler,1);
2918 emit_callreg(1);
2919 }
2920 else
2921 emit_call(handler);
2922 if(rt>=0) {
2923 switch(type) {
2924 case LOADB_STUB: emit_signextend8(0,rt); break;
2925 case LOADBU_STUB: emit_andimm(0,0xff,rt); break;
2926 case LOADH_STUB: emit_signextend16(0,rt); break;
2927 case LOADHU_STUB: emit_andimm(0,0xffff,rt); break;
2928 case LOADW_STUB: if(rt!=0) emit_mov(0,rt); break;
2929 default: assert(0);
2930 }
2931 }
2932 restore_regs(reglist);
2933#else // if !PCSX
2934 int ftable=0;
2935 if(type==LOADB_STUB||type==LOADBU_STUB)
2936 ftable=(int)readmemb;
2937 if(type==LOADH_STUB||type==LOADHU_STUB)
2938 ftable=(int)readmemh;
2939 if(type==LOADW_STUB)
2940 ftable=(int)readmem;
2941#ifndef FORCE32
2942 if(type==LOADD_STUB)
2943 ftable=(int)readmemd;
2944#endif
2945 assert(ftable!=0);
2946 if(target==0)
2947 emit_movimm(addr,rs);
2948 emit_writeword(rs,(int)&address);
2949 //emit_pusha();
2950 save_regs(reglist);
2951#ifndef PCSX
2952 if((signed int)addr>=(signed int)0xC0000000) {
2953 // Theoretically we can have a pagefault here, if the TLB has never
2954 // been enabled and the address is outside the range 80000000..BFFFFFFF
2955 // Write out the registers so the pagefault can be handled. This is
2956 // a very rare case and likely represents a bug.
2957 int ds=regmap!=regs[i].regmap;
2958 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty,i);
2959 if(!ds) wb_dirtys(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty);
2960 else wb_dirtys(branch_regs[i-1].regmap_entry,branch_regs[i-1].was32,branch_regs[i-1].wasdirty);
2961 }
2962#endif
2963 //emit_shrimm(rs,16,1);
2964 int cc=get_reg(regmap,CCREG);
2965 if(cc<0) {
2966 emit_loadreg(CCREG,2);
2967 }
2968 //emit_movimm(ftable,0);
2969 emit_movimm(((u_int *)ftable)[addr>>16],0);
2970 //emit_readword((int)&last_count,12);
2971 emit_addimm(cc<0?2:cc,CLOCK_DIVIDER*(adj+1),2);
2972#ifndef PCSX
2973 if((signed int)addr>=(signed int)0xC0000000) {
2974 // Pagefault address
2975 int ds=regmap!=regs[i].regmap;
2976 emit_movimm(start+i*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
2977 }
2978#endif
2979 //emit_add(12,2,2);
2980 //emit_writeword(2,(int)&Count);
2981 //emit_call(((u_int *)ftable)[addr>>16]);
2982 emit_call((int)&indirect_jump);
2983#ifndef PCSX
2984 // We really shouldn't need to update the count here,
2985 // but not doing so causes random crashes...
2986 emit_readword((int)&Count,HOST_TEMPREG);
2987 emit_readword((int)&next_interupt,2);
2988 emit_addimm(HOST_TEMPREG,-CLOCK_DIVIDER*(adj+1),HOST_TEMPREG);
2989 emit_writeword(2,(int)&last_count);
2990 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2991 if(cc<0) {
2992 emit_storereg(CCREG,HOST_TEMPREG);
2993 }
2994#endif
2995 //emit_popa();
2996 restore_regs(reglist);
2997 if(rt>=0) {
2998 if(type==LOADB_STUB)
2999 emit_movsbl((int)&readmem_dword,rt);
3000 if(type==LOADBU_STUB)
3001 emit_movzbl((int)&readmem_dword,rt);
3002 if(type==LOADH_STUB)
3003 emit_movswl((int)&readmem_dword,rt);
3004 if(type==LOADHU_STUB)
3005 emit_movzwl((int)&readmem_dword,rt);
3006 if(type==LOADW_STUB)
3007 emit_readword((int)&readmem_dword,rt);
3008 if(type==LOADD_STUB) {
3009 emit_readword((int)&readmem_dword,rt);
3010 if(rth>=0) emit_readword(((int)&readmem_dword)+4,rth);
3011 }
3012 }
3013#endif // !PCSX
3014}
3015
3016do_writestub(int n)
3017{
3018 assem_debug("do_writestub %x\n",start+stubs[n][3]*4);
3019 literal_pool(256);
3020 set_jump_target(stubs[n][1],(int)out);
3021 int type=stubs[n][0];
3022 int i=stubs[n][3];
3023 int rs=stubs[n][4];
3024 struct regstat *i_regs=(struct regstat *)stubs[n][5];
3025 u_int reglist=stubs[n][7];
3026 signed char *i_regmap=i_regs->regmap;
3027 int addr=get_reg(i_regmap,AGEN1+(i&1));
3028 int rth,rt,r;
3029 int ds;
3030 if(itype[i]==C1LS||itype[i]==C2LS) {
3031 rth=get_reg(i_regmap,FTEMP|64);
3032 rt=get_reg(i_regmap,r=FTEMP);
3033 }else{
3034 rth=get_reg(i_regmap,rs2[i]|64);
3035 rt=get_reg(i_regmap,r=rs2[i]);
3036 }
3037 assert(rs>=0);
3038 assert(rt>=0);
3039 if(addr<0) addr=get_reg(i_regmap,-1);
3040 assert(addr>=0);
3041 int ftable=0;
3042 if(type==STOREB_STUB)
3043 ftable=(int)writememb;
3044 if(type==STOREH_STUB)
3045 ftable=(int)writememh;
3046 if(type==STOREW_STUB)
3047 ftable=(int)writemem;
3048#ifndef FORCE32
3049 if(type==STORED_STUB)
3050 ftable=(int)writememd;
3051#endif
3052 assert(ftable!=0);
3053 emit_writeword(rs,(int)&address);
3054 //emit_shrimm(rs,16,rs);
3055 //emit_movmem_indexedx4(ftable,rs,rs);
3056 if(type==STOREB_STUB)
3057 emit_writebyte(rt,(int)&byte);
3058 if(type==STOREH_STUB)
3059 emit_writehword(rt,(int)&hword);
3060 if(type==STOREW_STUB)
3061 emit_writeword(rt,(int)&word);
3062 if(type==STORED_STUB) {
3063#ifndef FORCE32
3064 emit_writeword(rt,(int)&dword);
3065 emit_writeword(r?rth:rt,(int)&dword+4);
3066#else
3067 printf("STORED_STUB\n");
3068#endif
3069 }
3070 //emit_pusha();
3071 save_regs(reglist);
3072#ifndef PCSX
3073 ds=i_regs!=&regs[i];
3074 int real_rs=get_reg(i_regmap,rs1[i]);
3075 u_int cmask=ds?-1:(0x100f|~i_regs->wasconst);
3076 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&0x100f,i);
3077 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty&cmask&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs)));
3078 if(!ds) wb_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&~0x100f,i);
3079#endif
3080 emit_shrimm(rs,16,1);
3081 int cc=get_reg(i_regmap,CCREG);
3082 if(cc<0) {
3083 emit_loadreg(CCREG,2);
3084 }
3085 emit_movimm(ftable,0);
3086 emit_addimm(cc<0?2:cc,2*stubs[n][6]+2,2);
3087#ifndef PCSX
3088 emit_movimm(start+stubs[n][3]*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
3089#endif
3090 //emit_readword((int)&last_count,temp);
3091 //emit_addimm(cc,2*stubs[n][5]+2,cc);
3092 //emit_add(cc,temp,cc);
3093 //emit_writeword(cc,(int)&Count);
3094 emit_call((int)&indirect_jump_indexed);
3095 //emit_callreg(rs);
3096 emit_readword((int)&Count,HOST_TEMPREG);
3097 emit_readword((int)&next_interupt,2);
3098 emit_addimm(HOST_TEMPREG,-2*stubs[n][6]-2,HOST_TEMPREG);
3099 emit_writeword(2,(int)&last_count);
3100 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
3101 if(cc<0) {
3102 emit_storereg(CCREG,HOST_TEMPREG);
3103 }
3104 //emit_popa();
3105 restore_regs(reglist);
3106 //if((cc=get_reg(regmap,CCREG))>=0) {
3107 // emit_loadreg(CCREG,cc);
3108 //}
3109 emit_jmp(stubs[n][2]); // return address
3110}
3111
3112inline_writestub(int type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
3113{
3114 int rs=get_reg(regmap,-1);
3115 int rth=get_reg(regmap,target|64);
3116 int rt=get_reg(regmap,target);
3117 assert(rs>=0);
3118 assert(rt>=0);
3119#ifdef PCSX
3120 if(pcsx_direct_write(type,addr,rs,rt,regmap))
3121 return;
3122#endif
3123 int ftable=0;
3124 if(type==STOREB_STUB)
3125 ftable=(int)writememb;
3126 if(type==STOREH_STUB)
3127 ftable=(int)writememh;
3128 if(type==STOREW_STUB)
3129 ftable=(int)writemem;
3130#ifndef FORCE32
3131 if(type==STORED_STUB)
3132 ftable=(int)writememd;
3133#endif
3134 assert(ftable!=0);
3135 emit_writeword(rs,(int)&address);
3136 //emit_shrimm(rs,16,rs);
3137 //emit_movmem_indexedx4(ftable,rs,rs);
3138 if(type==STOREB_STUB)
3139 emit_writebyte(rt,(int)&byte);
3140 if(type==STOREH_STUB)
3141 emit_writehword(rt,(int)&hword);
3142 if(type==STOREW_STUB)
3143 emit_writeword(rt,(int)&word);
3144 if(type==STORED_STUB) {
3145#ifndef FORCE32
3146 emit_writeword(rt,(int)&dword);
3147 emit_writeword(target?rth:rt,(int)&dword+4);
3148#else
3149 printf("STORED_STUB\n");
3150#endif
3151 }
3152 //emit_pusha();
3153 save_regs(reglist);
3154#ifndef PCSX
3155 // rearmed note: load_all_consts prevents BIOS boot, some bug?
3156 if((signed int)addr>=(signed int)0xC0000000) {
3157 // Theoretically we can have a pagefault here, if the TLB has never
3158 // been enabled and the address is outside the range 80000000..BFFFFFFF
3159 // Write out the registers so the pagefault can be handled. This is
3160 // a very rare case and likely represents a bug.
3161 int ds=regmap!=regs[i].regmap;
3162 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty,i);
3163 if(!ds) wb_dirtys(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty);
3164 else wb_dirtys(branch_regs[i-1].regmap_entry,branch_regs[i-1].was32,branch_regs[i-1].wasdirty);
3165 }
3166#endif
3167 //emit_shrimm(rs,16,1);
3168 int cc=get_reg(regmap,CCREG);
3169 if(cc<0) {
3170 emit_loadreg(CCREG,2);
3171 }
3172 //emit_movimm(ftable,0);
3173 emit_movimm(((u_int *)ftable)[addr>>16],0);
3174 //emit_readword((int)&last_count,12);
3175 emit_addimm(cc<0?2:cc,CLOCK_DIVIDER*(adj+1),2);
3176#ifndef PCSX
3177 if((signed int)addr>=(signed int)0xC0000000) {
3178 // Pagefault address
3179 int ds=regmap!=regs[i].regmap;
3180 emit_movimm(start+i*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
3181 }
3182#endif
3183 //emit_add(12,2,2);
3184 //emit_writeword(2,(int)&Count);
3185 //emit_call(((u_int *)ftable)[addr>>16]);
3186 emit_call((int)&indirect_jump);
3187 emit_readword((int)&Count,HOST_TEMPREG);
3188 emit_readword((int)&next_interupt,2);
3189 emit_addimm(HOST_TEMPREG,-CLOCK_DIVIDER*(adj+1),HOST_TEMPREG);
3190 emit_writeword(2,(int)&last_count);
3191 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
3192 if(cc<0) {
3193 emit_storereg(CCREG,HOST_TEMPREG);
3194 }
3195 //emit_popa();
3196 restore_regs(reglist);
3197}
3198
3199do_unalignedwritestub(int n)
3200{
3201 assem_debug("do_unalignedwritestub %x\n",start+stubs[n][3]*4);
3202 literal_pool(256);
3203 set_jump_target(stubs[n][1],(int)out);
3204
3205 int i=stubs[n][3];
3206 struct regstat *i_regs=(struct regstat *)stubs[n][4];
3207 int addr=stubs[n][5];
3208 u_int reglist=stubs[n][7];
3209 signed char *i_regmap=i_regs->regmap;
3210 int temp2=get_reg(i_regmap,FTEMP);
3211 int rt;
3212 int ds, real_rs;
3213 rt=get_reg(i_regmap,rs2[i]);
3214 assert(rt>=0);
3215 assert(addr>=0);
3216 assert(opcode[i]==0x2a||opcode[i]==0x2e); // SWL/SWR only implemented
3217 reglist|=(1<<addr);
3218 reglist&=~(1<<temp2);
3219
3220 emit_andimm(addr,0xfffffffc,temp2);
3221 emit_writeword(temp2,(int)&address);
3222
3223 save_regs(reglist);
3224#ifndef PCSX
3225 ds=i_regs!=&regs[i];
3226 real_rs=get_reg(i_regmap,rs1[i]);
3227 u_int cmask=ds?-1:(0x100f|~i_regs->wasconst);
3228 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&0x100f,i);
3229 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty&cmask&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs)));
3230 if(!ds) wb_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&~0x100f,i);
3231#endif
3232 emit_shrimm(addr,16,1);
3233 int cc=get_reg(i_regmap,CCREG);
3234 if(cc<0) {
3235 emit_loadreg(CCREG,2);
3236 }
3237 emit_movimm((u_int)readmem,0);
3238 emit_addimm(cc<0?2:cc,2*stubs[n][6]+2,2);
3239#ifndef PCSX
3240 // pagefault address
3241 emit_movimm(start+stubs[n][3]*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
3242#endif
3243 emit_call((int)&indirect_jump_indexed);
3244 restore_regs(reglist);
3245
3246 emit_readword((int)&readmem_dword,temp2);
3247 int temp=addr; //hmh
3248 emit_shlimm(addr,3,temp);
3249 emit_andimm(temp,24,temp);
3250#ifdef BIG_ENDIAN_MIPS
3251 if (opcode[i]==0x2e) // SWR
3252#else
3253 if (opcode[i]==0x2a) // SWL
3254#endif
3255 emit_xorimm(temp,24,temp);
3256 emit_movimm(-1,HOST_TEMPREG);
3257 if (opcode[i]==0x2a) { // SWL
3258 emit_bic_lsr(temp2,HOST_TEMPREG,temp,temp2);
3259 emit_orrshr(rt,temp,temp2);
3260 }else{
3261 emit_bic_lsl(temp2,HOST_TEMPREG,temp,temp2);
3262 emit_orrshl(rt,temp,temp2);
3263 }
3264 emit_readword((int)&address,addr);
3265 emit_writeword(temp2,(int)&word);
3266 //save_regs(reglist); // don't need to, no state changes
3267 emit_shrimm(addr,16,1);
3268 emit_movimm((u_int)writemem,0);
3269 //emit_call((int)&indirect_jump_indexed);
3270 emit_mov(15,14);
3271 emit_readword_dualindexedx4(0,1,15);
3272 emit_readword((int)&Count,HOST_TEMPREG);
3273 emit_readword((int)&next_interupt,2);
3274 emit_addimm(HOST_TEMPREG,-2*stubs[n][6]-2,HOST_TEMPREG);
3275 emit_writeword(2,(int)&last_count);
3276 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
3277 if(cc<0) {
3278 emit_storereg(CCREG,HOST_TEMPREG);
3279 }
3280 restore_regs(reglist);
3281 emit_jmp(stubs[n][2]); // return address
3282}
3283
3284void printregs(int edi,int esi,int ebp,int esp,int b,int d,int c,int a)
3285{
3286 printf("regs: %x %x %x %x %x %x %x (%x)\n",a,b,c,d,ebp,esi,edi,(&edi)[-1]);
3287}
3288
3289do_invstub(int n)
3290{
3291 literal_pool(20);
3292 u_int reglist=stubs[n][3];
3293 set_jump_target(stubs[n][1],(int)out);
3294 save_regs(reglist);
3295 if(stubs[n][4]!=0) emit_mov(stubs[n][4],0);
3296 emit_call((int)&invalidate_addr);
3297 restore_regs(reglist);
3298 emit_jmp(stubs[n][2]); // return address
3299}
3300
3301int do_dirty_stub(int i)
3302{
3303 assem_debug("do_dirty_stub %x\n",start+i*4);
3304 u_int addr=(int)start<(int)0xC0000000?(u_int)source:(u_int)start;
3305 #ifdef PCSX
3306 addr=(u_int)source;
3307 #endif
3308 // Careful about the code output here, verify_dirty needs to parse it.
3309 #ifdef ARMv5_ONLY
3310 emit_loadlp(addr,1);
3311 emit_loadlp((int)copy,2);
3312 emit_loadlp(slen*4,3);
3313 #else
3314 emit_movw(addr&0x0000FFFF,1);
3315 emit_movw(((u_int)copy)&0x0000FFFF,2);
3316 emit_movt(addr&0xFFFF0000,1);
3317 emit_movt(((u_int)copy)&0xFFFF0000,2);
3318 emit_movw(slen*4,3);
3319 #endif
3320 emit_movimm(start+i*4,0);
3321 emit_call((int)start<(int)0xC0000000?(int)&verify_code:(int)&verify_code_vm);
3322 int entry=(int)out;
3323 load_regs_entry(i);
3324 if(entry==(int)out) entry=instr_addr[i];
3325 emit_jmp(instr_addr[i]);
3326 return entry;
3327}
3328
3329void do_dirty_stub_ds()
3330{
3331 // Careful about the code output here, verify_dirty needs to parse it.
3332 #ifdef ARMv5_ONLY
3333 emit_loadlp((int)start<(int)0xC0000000?(int)source:(int)start,1);
3334 emit_loadlp((int)copy,2);
3335 emit_loadlp(slen*4,3);
3336 #else
3337 emit_movw(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0x0000FFFF,1);
3338 emit_movw(((u_int)copy)&0x0000FFFF,2);
3339 emit_movt(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0xFFFF0000,1);
3340 emit_movt(((u_int)copy)&0xFFFF0000,2);
3341 emit_movw(slen*4,3);
3342 #endif
3343 emit_movimm(start+1,0);
3344 emit_call((int)&verify_code_ds);
3345}
3346
3347do_cop1stub(int n)
3348{
3349 literal_pool(256);
3350 assem_debug("do_cop1stub %x\n",start+stubs[n][3]*4);
3351 set_jump_target(stubs[n][1],(int)out);
3352 int i=stubs[n][3];
3353// int rs=stubs[n][4];
3354 struct regstat *i_regs=(struct regstat *)stubs[n][5];
3355 int ds=stubs[n][6];
3356 if(!ds) {
3357 load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty,i);
3358 //if(i_regs!=&regs[i]) printf("oops: regs[i]=%x i_regs=%x",(int)&regs[i],(int)i_regs);
3359 }
3360 //else {printf("fp exception in delay slot\n");}
3361 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty);
3362 if(regs[i].regmap_entry[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
3363 emit_movimm(start+(i-ds)*4,EAX); // Get PC
3364 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG); // CHECK: is this right? There should probably be an extra cycle...
3365 emit_jmp(ds?(int)fp_exception_ds:(int)fp_exception);
3366}
3367
3368/* TLB */
3369
3370int do_tlb_r(int s,int ar,int map,int x,int a,int shift,int c,u_int addr)
3371{
3372 if(c) {
3373 if((signed int)addr>=(signed int)0xC0000000) {
3374 // address_generation already loaded the const
3375 emit_readword_dualindexedx4(FP,map,map);
3376 }
3377 else
3378 return -1; // No mapping
3379 }
3380 else {
3381 assert(s!=map);
3382 emit_movimm(((int)memory_map-(int)&dynarec_local)>>2,map);
3383 emit_addsr12(map,s,map);
3384 // Schedule this while we wait on the load
3385 //if(x) emit_xorimm(s,x,ar);
3386 if(shift>=0) emit_shlimm(s,3,shift);
3387 if(~a) emit_andimm(s,a,ar);
3388 emit_readword_dualindexedx4(FP,map,map);
3389 }
3390 return map;
3391}
3392int do_tlb_r_branch(int map, int c, u_int addr, int *jaddr)
3393{
3394 if(!c||(signed int)addr>=(signed int)0xC0000000) {
3395 emit_test(map,map);
3396 *jaddr=(int)out;
3397 emit_js(0);
3398 }
3399 return map;
3400}
3401
3402int gen_tlb_addr_r(int ar, int map) {
3403 if(map>=0) {
3404 assem_debug("add %s,%s,%s lsl #2\n",regname[ar],regname[ar],regname[map]);
3405 output_w32(0xe0800100|rd_rn_rm(ar,ar,map));
3406 }
3407}
3408
3409int do_tlb_w(int s,int ar,int map,int x,int c,u_int addr)
3410{
3411 if(c) {
3412 if(addr<0x80800000||addr>=0xC0000000) {
3413 // address_generation already loaded the const
3414 emit_readword_dualindexedx4(FP,map,map);
3415 }
3416 else
3417 return -1; // No mapping
3418 }
3419 else {
3420 assert(s!=map);
3421 emit_movimm(((int)memory_map-(int)&dynarec_local)>>2,map);
3422 emit_addsr12(map,s,map);
3423 // Schedule this while we wait on the load
3424 //if(x) emit_xorimm(s,x,ar);
3425 emit_readword_dualindexedx4(FP,map,map);
3426 }
3427 return map;
3428}
3429int do_tlb_w_branch(int map, int c, u_int addr, int *jaddr)
3430{
3431 if(!c||addr<0x80800000||addr>=0xC0000000) {
3432 emit_testimm(map,0x40000000);
3433 *jaddr=(int)out;
3434 emit_jne(0);
3435 }
3436}
3437
3438int gen_tlb_addr_w(int ar, int map) {
3439 if(map>=0) {
3440 assem_debug("add %s,%s,%s lsl #2\n",regname[ar],regname[ar],regname[map]);
3441 output_w32(0xe0800100|rd_rn_rm(ar,ar,map));
3442 }
3443}
3444
3445// Generate the address of the memory_map entry, relative to dynarec_local
3446generate_map_const(u_int addr,int reg) {
3447 //printf("generate_map_const(%x,%s)\n",addr,regname[reg]);
3448 emit_movimm((addr>>12)+(((u_int)memory_map-(u_int)&dynarec_local)>>2),reg);
3449}
3450
3451/* Special assem */
3452
3453void shift_assemble_arm(int i,struct regstat *i_regs)
3454{
3455 if(rt1[i]) {
3456 if(opcode2[i]<=0x07) // SLLV/SRLV/SRAV
3457 {
3458 signed char s,t,shift;
3459 t=get_reg(i_regs->regmap,rt1[i]);
3460 s=get_reg(i_regs->regmap,rs1[i]);
3461 shift=get_reg(i_regs->regmap,rs2[i]);
3462 if(t>=0){
3463 if(rs1[i]==0)
3464 {
3465 emit_zeroreg(t);
3466 }
3467 else if(rs2[i]==0)
3468 {
3469 assert(s>=0);
3470 if(s!=t) emit_mov(s,t);
3471 }
3472 else
3473 {
3474 emit_andimm(shift,31,HOST_TEMPREG);
3475 if(opcode2[i]==4) // SLLV
3476 {
3477 emit_shl(s,HOST_TEMPREG,t);
3478 }
3479 if(opcode2[i]==6) // SRLV
3480 {
3481 emit_shr(s,HOST_TEMPREG,t);
3482 }
3483 if(opcode2[i]==7) // SRAV
3484 {
3485 emit_sar(s,HOST_TEMPREG,t);
3486 }
3487 }
3488 }
3489 } else { // DSLLV/DSRLV/DSRAV
3490 signed char sh,sl,th,tl,shift;
3491 th=get_reg(i_regs->regmap,rt1[i]|64);
3492 tl=get_reg(i_regs->regmap,rt1[i]);
3493 sh=get_reg(i_regs->regmap,rs1[i]|64);
3494 sl=get_reg(i_regs->regmap,rs1[i]);
3495 shift=get_reg(i_regs->regmap,rs2[i]);
3496 if(tl>=0){
3497 if(rs1[i]==0)
3498 {
3499 emit_zeroreg(tl);
3500 if(th>=0) emit_zeroreg(th);
3501 }
3502 else if(rs2[i]==0)
3503 {
3504 assert(sl>=0);
3505 if(sl!=tl) emit_mov(sl,tl);
3506 if(th>=0&&sh!=th) emit_mov(sh,th);
3507 }
3508 else
3509 {
3510 // FIXME: What if shift==tl ?
3511 assert(shift!=tl);
3512 int temp=get_reg(i_regs->regmap,-1);
3513 int real_th=th;
3514 if(th<0&&opcode2[i]!=0x14) {th=temp;} // DSLLV doesn't need a temporary register
3515 assert(sl>=0);
3516 assert(sh>=0);
3517 emit_andimm(shift,31,HOST_TEMPREG);
3518 if(opcode2[i]==0x14) // DSLLV
3519 {
3520 if(th>=0) emit_shl(sh,HOST_TEMPREG,th);
3521 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3522 emit_orrshr(sl,HOST_TEMPREG,th);
3523 emit_andimm(shift,31,HOST_TEMPREG);
3524 emit_testimm(shift,32);
3525 emit_shl(sl,HOST_TEMPREG,tl);
3526 if(th>=0) emit_cmovne_reg(tl,th);
3527 emit_cmovne_imm(0,tl);
3528 }
3529 if(opcode2[i]==0x16) // DSRLV
3530 {
3531 assert(th>=0);
3532 emit_shr(sl,HOST_TEMPREG,tl);
3533 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3534 emit_orrshl(sh,HOST_TEMPREG,tl);
3535 emit_andimm(shift,31,HOST_TEMPREG);
3536 emit_testimm(shift,32);
3537 emit_shr(sh,HOST_TEMPREG,th);
3538 emit_cmovne_reg(th,tl);
3539 if(real_th>=0) emit_cmovne_imm(0,th);
3540 }
3541 if(opcode2[i]==0x17) // DSRAV
3542 {
3543 assert(th>=0);
3544 emit_shr(sl,HOST_TEMPREG,tl);
3545 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3546 if(real_th>=0) {
3547 assert(temp>=0);
3548 emit_sarimm(th,31,temp);
3549 }
3550 emit_orrshl(sh,HOST_TEMPREG,tl);
3551 emit_andimm(shift,31,HOST_TEMPREG);
3552 emit_testimm(shift,32);
3553 emit_sar(sh,HOST_TEMPREG,th);
3554 emit_cmovne_reg(th,tl);
3555 if(real_th>=0) emit_cmovne_reg(temp,th);
3556 }
3557 }
3558 }
3559 }
3560 }
3561}
3562#define shift_assemble shift_assemble_arm
3563
3564void loadlr_assemble_arm(int i,struct regstat *i_regs)
3565{
3566 int s,th,tl,temp,temp2,addr,map=-1;
3567 int offset;
3568 int jaddr=0;
3569 int memtarget=0,c=0;
3570 u_int hr,reglist=0;
3571 th=get_reg(i_regs->regmap,rt1[i]|64);
3572 tl=get_reg(i_regs->regmap,rt1[i]);
3573 s=get_reg(i_regs->regmap,rs1[i]);
3574 temp=get_reg(i_regs->regmap,-1);
3575 temp2=get_reg(i_regs->regmap,FTEMP);
3576 addr=get_reg(i_regs->regmap,AGEN1+(i&1));
3577 assert(addr<0);
3578 offset=imm[i];
3579 for(hr=0;hr<HOST_REGS;hr++) {
3580 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
3581 }
3582 reglist|=1<<temp;
3583 if(offset||s<0||c) addr=temp2;
3584 else addr=s;
3585 if(s>=0) {
3586 c=(i_regs->wasconst>>s)&1;
3587 if(c) {
3588 memtarget=((signed int)(constmap[i][s]+offset))<(signed int)0x80000000+RAM_SIZE;
3589 if(using_tlb&&((signed int)(constmap[i][s]+offset))>=(signed int)0xC0000000) memtarget=1;
3590 }
3591 }
3592 if(!using_tlb) {
3593 if(!c) {
3594 #ifdef RAM_OFFSET
3595 map=get_reg(i_regs->regmap,ROREG);
3596 if(map<0) emit_loadreg(ROREG,map=HOST_TEMPREG);
3597 #endif
3598 emit_shlimm(addr,3,temp);
3599 if (opcode[i]==0x22||opcode[i]==0x26) {
3600 emit_andimm(addr,0xFFFFFFFC,temp2); // LWL/LWR
3601 }else{
3602 emit_andimm(addr,0xFFFFFFF8,temp2); // LDL/LDR
3603 }
3604 emit_cmpimm(addr,RAM_SIZE);
3605 jaddr=(int)out;
3606 emit_jno(0);
3607 }
3608 else {
3609 if (opcode[i]==0x22||opcode[i]==0x26) {
3610 emit_movimm(((constmap[i][s]+offset)<<3)&24,temp); // LWL/LWR
3611 }else{
3612 emit_movimm(((constmap[i][s]+offset)<<3)&56,temp); // LDL/LDR
3613 }
3614 }
3615 }else{ // using tlb
3616 int a;
3617 if(c) {
3618 a=-1;
3619 }else if (opcode[i]==0x22||opcode[i]==0x26) {
3620 a=0xFFFFFFFC; // LWL/LWR
3621 }else{
3622 a=0xFFFFFFF8; // LDL/LDR
3623 }
3624 map=get_reg(i_regs->regmap,TLREG);
3625 assert(map>=0);
3626 reglist&=~(1<<map);
3627 map=do_tlb_r(addr,temp2,map,0,a,c?-1:temp,c,constmap[i][s]+offset);
3628 if(c) {
3629 if (opcode[i]==0x22||opcode[i]==0x26) {
3630 emit_movimm(((constmap[i][s]+offset)<<3)&24,temp); // LWL/LWR
3631 }else{
3632 emit_movimm(((constmap[i][s]+offset)<<3)&56,temp); // LDL/LDR
3633 }
3634 }
3635 do_tlb_r_branch(map,c,constmap[i][s]+offset,&jaddr);
3636 }
3637 if (opcode[i]==0x22||opcode[i]==0x26) { // LWL/LWR
3638 if(!c||memtarget) {
3639 //emit_readword_indexed((int)rdram-0x80000000,temp2,temp2);
3640 emit_readword_indexed_tlb(0,temp2,map,temp2);
3641 if(jaddr) add_stub(LOADW_STUB,jaddr,(int)out,i,temp2,(int)i_regs,ccadj[i],reglist);
3642 }
3643 else
3644 inline_readstub(LOADW_STUB,i,(constmap[i][s]+offset)&0xFFFFFFFC,i_regs->regmap,FTEMP,ccadj[i],reglist);
3645 if(rt1[i]) {
3646 assert(tl>=0);
3647 emit_andimm(temp,24,temp);
3648#ifdef BIG_ENDIAN_MIPS
3649 if (opcode[i]==0x26) // LWR
3650#else
3651 if (opcode[i]==0x22) // LWL
3652#endif
3653 emit_xorimm(temp,24,temp);
3654 emit_movimm(-1,HOST_TEMPREG);
3655 if (opcode[i]==0x26) {
3656 emit_shr(temp2,temp,temp2);
3657 emit_bic_lsr(tl,HOST_TEMPREG,temp,tl);
3658 }else{
3659 emit_shl(temp2,temp,temp2);
3660 emit_bic_lsl(tl,HOST_TEMPREG,temp,tl);
3661 }
3662 emit_or(temp2,tl,tl);
3663 }
3664 //emit_storereg(rt1[i],tl); // DEBUG
3665 }
3666 if (opcode[i]==0x1A||opcode[i]==0x1B) { // LDL/LDR
3667 // FIXME: little endian
3668 int temp2h=get_reg(i_regs->regmap,FTEMP|64);
3669 if(!c||memtarget) {
3670 //if(th>=0) emit_readword_indexed((int)rdram-0x80000000,temp2,temp2h);
3671 //emit_readword_indexed((int)rdram-0x7FFFFFFC,temp2,temp2);
3672 emit_readdword_indexed_tlb(0,temp2,map,temp2h,temp2);
3673 if(jaddr) add_stub(LOADD_STUB,jaddr,(int)out,i,temp2,(int)i_regs,ccadj[i],reglist);
3674 }
3675 else
3676 inline_readstub(LOADD_STUB,i,(constmap[i][s]+offset)&0xFFFFFFF8,i_regs->regmap,FTEMP,ccadj[i],reglist);
3677 if(rt1[i]) {
3678 assert(th>=0);
3679 assert(tl>=0);
3680 emit_testimm(temp,32);
3681 emit_andimm(temp,24,temp);
3682 if (opcode[i]==0x1A) { // LDL
3683 emit_rsbimm(temp,32,HOST_TEMPREG);
3684 emit_shl(temp2h,temp,temp2h);
3685 emit_orrshr(temp2,HOST_TEMPREG,temp2h);
3686 emit_movimm(-1,HOST_TEMPREG);
3687 emit_shl(temp2,temp,temp2);
3688 emit_cmove_reg(temp2h,th);
3689 emit_biceq_lsl(tl,HOST_TEMPREG,temp,tl);
3690 emit_bicne_lsl(th,HOST_TEMPREG,temp,th);
3691 emit_orreq(temp2,tl,tl);
3692 emit_orrne(temp2,th,th);
3693 }
3694 if (opcode[i]==0x1B) { // LDR
3695 emit_xorimm(temp,24,temp);
3696 emit_rsbimm(temp,32,HOST_TEMPREG);
3697 emit_shr(temp2,temp,temp2);
3698 emit_orrshl(temp2h,HOST_TEMPREG,temp2);
3699 emit_movimm(-1,HOST_TEMPREG);
3700 emit_shr(temp2h,temp,temp2h);
3701 emit_cmovne_reg(temp2,tl);
3702 emit_bicne_lsr(th,HOST_TEMPREG,temp,th);
3703 emit_biceq_lsr(tl,HOST_TEMPREG,temp,tl);
3704 emit_orrne(temp2h,th,th);
3705 emit_orreq(temp2h,tl,tl);
3706 }
3707 }
3708 }
3709}
3710#define loadlr_assemble loadlr_assemble_arm
3711
3712void cop0_assemble(int i,struct regstat *i_regs)
3713{
3714 if(opcode2[i]==0) // MFC0
3715 {
3716 signed char t=get_reg(i_regs->regmap,rt1[i]);
3717 char copr=(source[i]>>11)&0x1f;
3718 //assert(t>=0); // Why does this happen? OOT is weird
3719 if(t>=0&&rt1[i]!=0) {
3720#ifdef MUPEN64
3721 emit_addimm(FP,(int)&fake_pc-(int)&dynarec_local,0);
3722 emit_movimm((source[i]>>11)&0x1f,1);
3723 emit_writeword(0,(int)&PC);
3724 emit_writebyte(1,(int)&(fake_pc.f.r.nrd));
3725 if(copr==9) {
3726 emit_readword((int)&last_count,ECX);
3727 emit_loadreg(CCREG,HOST_CCREG); // TODO: do proper reg alloc
3728 emit_add(HOST_CCREG,ECX,HOST_CCREG);
3729 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG);
3730 emit_writeword(HOST_CCREG,(int)&Count);
3731 }
3732 emit_call((int)MFC0);
3733 emit_readword((int)&readmem_dword,t);
3734#else
3735 emit_readword((int)&reg_cop0+copr*4,t);
3736#endif
3737 }
3738 }
3739 else if(opcode2[i]==4) // MTC0
3740 {
3741 signed char s=get_reg(i_regs->regmap,rs1[i]);
3742 char copr=(source[i]>>11)&0x1f;
3743 assert(s>=0);
3744 emit_writeword(s,(int)&readmem_dword);
3745 wb_register(rs1[i],i_regs->regmap,i_regs->dirty,i_regs->is32);
3746#ifdef MUPEN64
3747 emit_addimm(FP,(int)&fake_pc-(int)&dynarec_local,0);
3748 emit_movimm((source[i]>>11)&0x1f,1);
3749 emit_writeword(0,(int)&PC);
3750 emit_writebyte(1,(int)&(fake_pc.f.r.nrd));
3751#endif
3752 if(copr==9||copr==11||copr==12||copr==13) {
3753 emit_readword((int)&last_count,ECX);
3754 emit_loadreg(CCREG,HOST_CCREG); // TODO: do proper reg alloc
3755 emit_add(HOST_CCREG,ECX,HOST_CCREG);
3756 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG);
3757 emit_writeword(HOST_CCREG,(int)&Count);
3758 }
3759 // What a mess. The status register (12) can enable interrupts,
3760 // so needs a special case to handle a pending interrupt.
3761 // The interrupt must be taken immediately, because a subsequent
3762 // instruction might disable interrupts again.
3763 if(copr==12||copr==13) {
3764#ifdef PCSX
3765 if (is_delayslot) {
3766 // burn cycles to cause cc_interrupt, which will
3767 // reschedule next_interupt. Relies on CCREG from above.
3768 assem_debug("MTC0 DS %d\n", copr);
3769 emit_writeword(HOST_CCREG,(int)&last_count);
3770 emit_movimm(0,HOST_CCREG);
3771 emit_storereg(CCREG,HOST_CCREG);
3772 emit_movimm(copr,0);
3773 emit_call((int)pcsx_mtc0_ds);
3774 return;
3775 }
3776#endif
3777 emit_movimm(start+i*4+4,0);
3778 emit_movimm(0,1);
3779 emit_writeword(0,(int)&pcaddr);
3780 emit_writeword(1,(int)&pending_exception);
3781 }
3782 //else if(copr==12&&is_delayslot) emit_call((int)MTC0_R12);
3783 //else
3784#ifdef PCSX
3785 emit_movimm(copr,0);
3786 emit_call((int)pcsx_mtc0);
3787#else
3788 emit_call((int)MTC0);
3789#endif
3790 if(copr==9||copr==11||copr==12||copr==13) {
3791 emit_readword((int)&Count,HOST_CCREG);
3792 emit_readword((int)&next_interupt,ECX);
3793 emit_addimm(HOST_CCREG,-CLOCK_DIVIDER*ccadj[i],HOST_CCREG);
3794 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
3795 emit_writeword(ECX,(int)&last_count);
3796 emit_storereg(CCREG,HOST_CCREG);
3797 }
3798 if(copr==12||copr==13) {
3799 assert(!is_delayslot);
3800 emit_readword((int)&pending_exception,14);
3801 }
3802 emit_loadreg(rs1[i],s);
3803 if(get_reg(i_regs->regmap,rs1[i]|64)>=0)
3804 emit_loadreg(rs1[i]|64,get_reg(i_regs->regmap,rs1[i]|64));
3805 if(copr==12||copr==13) {
3806 emit_test(14,14);
3807 emit_jne((int)&do_interrupt);
3808 }
3809 cop1_usable=0;
3810 }
3811 else
3812 {
3813 assert(opcode2[i]==0x10);
3814#ifndef DISABLE_TLB
3815 if((source[i]&0x3f)==0x01) // TLBR
3816 emit_call((int)TLBR);
3817 if((source[i]&0x3f)==0x02) // TLBWI
3818 emit_call((int)TLBWI_new);
3819 if((source[i]&0x3f)==0x06) { // TLBWR
3820 // The TLB entry written by TLBWR is dependent on the count,
3821 // so update the cycle count
3822 emit_readword((int)&last_count,ECX);
3823 if(i_regs->regmap[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
3824 emit_add(HOST_CCREG,ECX,HOST_CCREG);
3825 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG);
3826 emit_writeword(HOST_CCREG,(int)&Count);
3827 emit_call((int)TLBWR_new);
3828 }
3829 if((source[i]&0x3f)==0x08) // TLBP
3830 emit_call((int)TLBP);
3831#endif
3832#ifdef PCSX
3833 if((source[i]&0x3f)==0x10) // RFE
3834 {
3835 emit_readword((int)&Status,0);
3836 emit_andimm(0,0x3c,1);
3837 emit_andimm(0,~0xf,0);
3838 emit_orrshr_imm(1,2,0);
3839 emit_writeword(0,(int)&Status);
3840 }
3841#else
3842 if((source[i]&0x3f)==0x18) // ERET
3843 {
3844 int count=ccadj[i];
3845 if(i_regs->regmap[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
3846 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*count,HOST_CCREG); // TODO: Should there be an extra cycle here?
3847 emit_jmp((int)jump_eret);
3848 }
3849#endif
3850 }
3851}
3852
3853static void cop2_get_dreg(u_int copr,signed char tl,signed char temp)
3854{
3855 switch (copr) {
3856 case 1:
3857 case 3:
3858 case 5:
3859 case 8:
3860 case 9:
3861 case 10:
3862 case 11:
3863 emit_readword((int)&reg_cop2d[copr],tl);
3864 emit_signextend16(tl,tl);
3865 emit_writeword(tl,(int)&reg_cop2d[copr]); // hmh
3866 break;
3867 case 7:
3868 case 16:
3869 case 17:
3870 case 18:
3871 case 19:
3872 emit_readword((int)&reg_cop2d[copr],tl);
3873 emit_andimm(tl,0xffff,tl);
3874 emit_writeword(tl,(int)&reg_cop2d[copr]);
3875 break;
3876 case 15:
3877 emit_readword((int)&reg_cop2d[14],tl); // SXY2
3878 emit_writeword(tl,(int)&reg_cop2d[copr]);
3879 break;
3880 case 28:
3881 case 29:
3882 emit_readword((int)&reg_cop2d[9],temp);
3883 emit_testimm(temp,0x8000); // do we need this?
3884 emit_andimm(temp,0xf80,temp);
3885 emit_andne_imm(temp,0,temp);
3886 emit_shrimm(temp,7,tl);
3887 emit_readword((int)&reg_cop2d[10],temp);
3888 emit_testimm(temp,0x8000);
3889 emit_andimm(temp,0xf80,temp);
3890 emit_andne_imm(temp,0,temp);
3891 emit_orrshr_imm(temp,2,tl);
3892 emit_readword((int)&reg_cop2d[11],temp);
3893 emit_testimm(temp,0x8000);
3894 emit_andimm(temp,0xf80,temp);
3895 emit_andne_imm(temp,0,temp);
3896 emit_orrshl_imm(temp,3,tl);
3897 emit_writeword(tl,(int)&reg_cop2d[copr]);
3898 break;
3899 default:
3900 emit_readword((int)&reg_cop2d[copr],tl);
3901 break;
3902 }
3903}
3904
3905static void cop2_put_dreg(u_int copr,signed char sl,signed char temp)
3906{
3907 switch (copr) {
3908 case 15:
3909 emit_readword((int)&reg_cop2d[13],temp); // SXY1
3910 emit_writeword(sl,(int)&reg_cop2d[copr]);
3911 emit_writeword(temp,(int)&reg_cop2d[12]); // SXY0
3912 emit_readword((int)&reg_cop2d[14],temp); // SXY2
3913 emit_writeword(sl,(int)&reg_cop2d[14]);
3914 emit_writeword(temp,(int)&reg_cop2d[13]); // SXY1
3915 break;
3916 case 28:
3917 emit_andimm(sl,0x001f,temp);
3918 emit_shlimm(temp,7,temp);
3919 emit_writeword(temp,(int)&reg_cop2d[9]);
3920 emit_andimm(sl,0x03e0,temp);
3921 emit_shlimm(temp,2,temp);
3922 emit_writeword(temp,(int)&reg_cop2d[10]);
3923 emit_andimm(sl,0x7c00,temp);
3924 emit_shrimm(temp,3,temp);
3925 emit_writeword(temp,(int)&reg_cop2d[11]);
3926 emit_writeword(sl,(int)&reg_cop2d[28]);
3927 break;
3928 case 30:
3929 emit_movs(sl,temp);
3930 emit_mvnmi(temp,temp);
3931 emit_clz(temp,temp);
3932 emit_writeword(sl,(int)&reg_cop2d[30]);
3933 emit_writeword(temp,(int)&reg_cop2d[31]);
3934 break;
3935 case 31:
3936 break;
3937 default:
3938 emit_writeword(sl,(int)&reg_cop2d[copr]);
3939 break;
3940 }
3941}
3942
3943void cop2_assemble(int i,struct regstat *i_regs)
3944{
3945 u_int copr=(source[i]>>11)&0x1f;
3946 signed char temp=get_reg(i_regs->regmap,-1);
3947 if (opcode2[i]==0) { // MFC2
3948 signed char tl=get_reg(i_regs->regmap,rt1[i]);
3949 if(tl>=0&&rt1[i]!=0)
3950 cop2_get_dreg(copr,tl,temp);
3951 }
3952 else if (opcode2[i]==4) { // MTC2
3953 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3954 cop2_put_dreg(copr,sl,temp);
3955 }
3956 else if (opcode2[i]==2) // CFC2
3957 {
3958 signed char tl=get_reg(i_regs->regmap,rt1[i]);
3959 if(tl>=0&&rt1[i]!=0)
3960 emit_readword((int)&reg_cop2c[copr],tl);
3961 }
3962 else if (opcode2[i]==6) // CTC2
3963 {
3964 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3965 switch(copr) {
3966 case 4:
3967 case 12:
3968 case 20:
3969 case 26:
3970 case 27:
3971 case 29:
3972 case 30:
3973 emit_signextend16(sl,temp);
3974 break;
3975 case 31:
3976 //value = value & 0x7ffff000;
3977 //if (value & 0x7f87e000) value |= 0x80000000;
3978 emit_shrimm(sl,12,temp);
3979 emit_shlimm(temp,12,temp);
3980 emit_testimm(temp,0x7f000000);
3981 emit_testeqimm(temp,0x00870000);
3982 emit_testeqimm(temp,0x0000e000);
3983 emit_orrne_imm(temp,0x80000000,temp);
3984 break;
3985 default:
3986 temp=sl;
3987 break;
3988 }
3989 emit_writeword(temp,(int)&reg_cop2c[copr]);
3990 assert(sl>=0);
3991 }
3992}
3993
3994void c2op_assemble(int i,struct regstat *i_regs)
3995{
3996 signed char temp=get_reg(i_regs->regmap,-1);
3997 u_int c2op=source[i]&0x3f;
3998 u_int hr,reglist=0;
3999 int need_flags;
4000 for(hr=0;hr<HOST_REGS;hr++) {
4001 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
4002 }
4003 if(i==0||itype[i-1]!=C2OP)
4004 save_regs(reglist);
4005
4006 if (gte_handlers[c2op]!=NULL) {
4007 int cc=get_reg(i_regs->regmap,CCREG);
4008 emit_movimm(source[i],1); // opcode
4009 if (cc>=0&&gte_cycletab[c2op])
4010 emit_addimm(cc,gte_cycletab[c2op]/2,cc); // XXX: could just adjust ccadj?
4011 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0); // cop2 regs
4012 emit_writeword(1,(int)&psxRegs.code);
4013 need_flags=!(gte_unneeded[i+1]>>63); // +1 because of how liveness detection works
4014 assem_debug("gte unneeded %016llx, need_flags %d\n",gte_unneeded[i+1],need_flags);
4015#ifdef ARMv5_ONLY
4016 // let's take more risk here
4017 need_flags=need_flags&&gte_reads_flags;
4018#endif
4019 emit_call((int)(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]));
4020 }
4021
4022 if(i>=slen-1||itype[i+1]!=C2OP)
4023 restore_regs(reglist);
4024}
4025
4026void cop1_unusable(int i,struct regstat *i_regs)
4027{
4028 // XXX: should just just do the exception instead
4029 if(!cop1_usable) {
4030 int jaddr=(int)out;
4031 emit_jmp(0);
4032 add_stub(FP_STUB,jaddr,(int)out,i,0,(int)i_regs,is_delayslot,0);
4033 cop1_usable=1;
4034 }
4035}
4036
4037void cop1_assemble(int i,struct regstat *i_regs)
4038{
4039#ifndef DISABLE_COP1
4040 // Check cop1 unusable
4041 if(!cop1_usable) {
4042 signed char rs=get_reg(i_regs->regmap,CSREG);
4043 assert(rs>=0);
4044 emit_testimm(rs,0x20000000);
4045 int jaddr=(int)out;
4046 emit_jeq(0);
4047 add_stub(FP_STUB,jaddr,(int)out,i,rs,(int)i_regs,is_delayslot,0);
4048 cop1_usable=1;
4049 }
4050 if (opcode2[i]==0) { // MFC1
4051 signed char tl=get_reg(i_regs->regmap,rt1[i]);
4052 if(tl>=0) {
4053 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],tl);
4054 emit_readword_indexed(0,tl,tl);
4055 }
4056 }
4057 else if (opcode2[i]==1) { // DMFC1
4058 signed char tl=get_reg(i_regs->regmap,rt1[i]);
4059 signed char th=get_reg(i_regs->regmap,rt1[i]|64);
4060 if(tl>=0) {
4061 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],tl);
4062 if(th>=0) emit_readword_indexed(4,tl,th);
4063 emit_readword_indexed(0,tl,tl);
4064 }
4065 }
4066 else if (opcode2[i]==4) { // MTC1
4067 signed char sl=get_reg(i_regs->regmap,rs1[i]);
4068 signed char temp=get_reg(i_regs->regmap,-1);
4069 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4070 emit_writeword_indexed(sl,0,temp);
4071 }
4072 else if (opcode2[i]==5) { // DMTC1
4073 signed char sl=get_reg(i_regs->regmap,rs1[i]);
4074 signed char sh=rs1[i]>0?get_reg(i_regs->regmap,rs1[i]|64):sl;
4075 signed char temp=get_reg(i_regs->regmap,-1);
4076 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
4077 emit_writeword_indexed(sh,4,temp);
4078 emit_writeword_indexed(sl,0,temp);
4079 }
4080 else if (opcode2[i]==2) // CFC1
4081 {
4082 signed char tl=get_reg(i_regs->regmap,rt1[i]);
4083 if(tl>=0) {
4084 u_int copr=(source[i]>>11)&0x1f;
4085 if(copr==0) emit_readword((int)&FCR0,tl);
4086 if(copr==31) emit_readword((int)&FCR31,tl);
4087 }
4088 }
4089 else if (opcode2[i]==6) // CTC1
4090 {
4091 signed char sl=get_reg(i_regs->regmap,rs1[i]);
4092 u_int copr=(source[i]>>11)&0x1f;
4093 assert(sl>=0);
4094 if(copr==31)
4095 {
4096 emit_writeword(sl,(int)&FCR31);
4097 // Set the rounding mode
4098 //FIXME
4099 //char temp=get_reg(i_regs->regmap,-1);
4100 //emit_andimm(sl,3,temp);
4101 //emit_fldcw_indexed((int)&rounding_modes,temp);
4102 }
4103 }
4104#else
4105 cop1_unusable(i, i_regs);
4106#endif
4107}
4108
4109void fconv_assemble_arm(int i,struct regstat *i_regs)
4110{
4111#ifndef DISABLE_COP1
4112 signed char temp=get_reg(i_regs->regmap,-1);
4113 assert(temp>=0);
4114 // Check cop1 unusable
4115 if(!cop1_usable) {
4116 signed char rs=get_reg(i_regs->regmap,CSREG);
4117 assert(rs>=0);
4118 emit_testimm(rs,0x20000000);
4119 int jaddr=(int)out;
4120 emit_jeq(0);
4121 add_stub(FP_STUB,jaddr,(int)out,i,rs,(int)i_regs,is_delayslot,0);
4122 cop1_usable=1;
4123 }
4124
4125 #if(defined(__VFP_FP__) && !defined(__SOFTFP__))
4126 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0d) { // trunc_w_s
4127 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4128 emit_flds(temp,15);
4129 emit_ftosizs(15,15); // float->int, truncate
4130 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f))
4131 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
4132 emit_fsts(15,temp);
4133 return;
4134 }
4135 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0d) { // trunc_w_d
4136 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
4137 emit_vldr(temp,7);
4138 emit_ftosizd(7,13); // double->int, truncate
4139 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
4140 emit_fsts(13,temp);
4141 return;
4142 }
4143
4144 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x20) { // cvt_s_w
4145 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4146 emit_flds(temp,13);
4147 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f))
4148 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
4149 emit_fsitos(13,15);
4150 emit_fsts(15,temp);
4151 return;
4152 }
4153 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x21) { // cvt_d_w
4154 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4155 emit_flds(temp,13);
4156 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
4157 emit_fsitod(13,7);
4158 emit_vstr(7,temp);
4159 return;
4160 }
4161
4162 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x21) { // cvt_d_s
4163 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4164 emit_flds(temp,13);
4165 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
4166 emit_fcvtds(13,7);
4167 emit_vstr(7,temp);
4168 return;
4169 }
4170 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x20) { // cvt_s_d
4171 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
4172 emit_vldr(temp,7);
4173 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
4174 emit_fcvtsd(7,13);
4175 emit_fsts(13,temp);
4176 return;
4177 }
4178 #endif
4179
4180 // C emulation code
4181
4182 u_int hr,reglist=0;
4183 for(hr=0;hr<HOST_REGS;hr++) {
4184 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
4185 }
4186 save_regs(reglist);
4187
4188 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x20) {
4189 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4190 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4191 emit_call((int)cvt_s_w);
4192 }
4193 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x21) {
4194 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4195 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4196 emit_call((int)cvt_d_w);
4197 }
4198 if(opcode2[i]==0x15&&(source[i]&0x3f)==0x20) {
4199 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4200 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4201 emit_call((int)cvt_s_l);
4202 }
4203 if(opcode2[i]==0x15&&(source[i]&0x3f)==0x21) {
4204 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4205 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4206 emit_call((int)cvt_d_l);
4207 }
4208
4209 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x21) {
4210 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4211 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4212 emit_call((int)cvt_d_s);
4213 }
4214 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x24) {
4215 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4216 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4217 emit_call((int)cvt_w_s);
4218 }
4219 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x25) {
4220 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4221 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4222 emit_call((int)cvt_l_s);
4223 }
4224
4225 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x20) {
4226 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4227 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4228 emit_call((int)cvt_s_d);
4229 }
4230 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x24) {
4231 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4232 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4233 emit_call((int)cvt_w_d);
4234 }
4235 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x25) {
4236 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4237 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4238 emit_call((int)cvt_l_d);
4239 }
4240
4241 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x08) {
4242 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4243 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4244 emit_call((int)round_l_s);
4245 }
4246 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x09) {
4247 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4248 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4249 emit_call((int)trunc_l_s);
4250 }
4251 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0a) {
4252 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4253 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4254 emit_call((int)ceil_l_s);
4255 }
4256 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0b) {
4257 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4258 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4259 emit_call((int)floor_l_s);
4260 }
4261 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0c) {
4262 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4263 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4264 emit_call((int)round_w_s);
4265 }
4266 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0d) {
4267 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4268 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4269 emit_call((int)trunc_w_s);
4270 }
4271 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0e) {
4272 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4273 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4274 emit_call((int)ceil_w_s);
4275 }
4276 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0f) {
4277 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4278 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4279 emit_call((int)floor_w_s);
4280 }
4281
4282 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x08) {
4283 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4284 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4285 emit_call((int)round_l_d);
4286 }
4287 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x09) {
4288 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4289 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4290 emit_call((int)trunc_l_d);
4291 }
4292 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0a) {
4293 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4294 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4295 emit_call((int)ceil_l_d);
4296 }
4297 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0b) {
4298 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4299 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4300 emit_call((int)floor_l_d);
4301 }
4302 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0c) {
4303 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4304 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4305 emit_call((int)round_w_d);
4306 }
4307 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0d) {
4308 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4309 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4310 emit_call((int)trunc_w_d);
4311 }
4312 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0e) {
4313 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4314 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4315 emit_call((int)ceil_w_d);
4316 }
4317 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0f) {
4318 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4319 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4320 emit_call((int)floor_w_d);
4321 }
4322
4323 restore_regs(reglist);
4324#else
4325 cop1_unusable(i, i_regs);
4326#endif
4327}
4328#define fconv_assemble fconv_assemble_arm
4329
4330void fcomp_assemble(int i,struct regstat *i_regs)
4331{
4332#ifndef DISABLE_COP1
4333 signed char fs=get_reg(i_regs->regmap,FSREG);
4334 signed char temp=get_reg(i_regs->regmap,-1);
4335 assert(temp>=0);
4336 // Check cop1 unusable
4337 if(!cop1_usable) {
4338 signed char cs=get_reg(i_regs->regmap,CSREG);
4339 assert(cs>=0);
4340 emit_testimm(cs,0x20000000);
4341 int jaddr=(int)out;
4342 emit_jeq(0);
4343 add_stub(FP_STUB,jaddr,(int)out,i,cs,(int)i_regs,is_delayslot,0);
4344 cop1_usable=1;
4345 }
4346
4347 if((source[i]&0x3f)==0x30) {
4348 emit_andimm(fs,~0x800000,fs);
4349 return;
4350 }
4351
4352 if((source[i]&0x3e)==0x38) {
4353 // sf/ngle - these should throw exceptions for NaNs
4354 emit_andimm(fs,~0x800000,fs);
4355 return;
4356 }
4357
4358 #if(defined(__VFP_FP__) && !defined(__SOFTFP__))
4359 if(opcode2[i]==0x10) {
4360 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4361 emit_readword((int)&reg_cop1_simple[(source[i]>>16)&0x1f],HOST_TEMPREG);
4362 emit_orimm(fs,0x800000,fs);
4363 emit_flds(temp,14);
4364 emit_flds(HOST_TEMPREG,15);
4365 emit_fcmps(14,15);
4366 emit_fmstat();
4367 if((source[i]&0x3f)==0x31) emit_bicvc_imm(fs,0x800000,fs); // c_un_s
4368 if((source[i]&0x3f)==0x32) emit_bicne_imm(fs,0x800000,fs); // c_eq_s
4369 if((source[i]&0x3f)==0x33) {emit_bicne_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ueq_s
4370 if((source[i]&0x3f)==0x34) emit_biccs_imm(fs,0x800000,fs); // c_olt_s
4371 if((source[i]&0x3f)==0x35) {emit_biccs_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ult_s
4372 if((source[i]&0x3f)==0x36) emit_bichi_imm(fs,0x800000,fs); // c_ole_s
4373 if((source[i]&0x3f)==0x37) {emit_bichi_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ule_s
4374 if((source[i]&0x3f)==0x3a) emit_bicne_imm(fs,0x800000,fs); // c_seq_s
4375 if((source[i]&0x3f)==0x3b) emit_bicne_imm(fs,0x800000,fs); // c_ngl_s
4376 if((source[i]&0x3f)==0x3c) emit_biccs_imm(fs,0x800000,fs); // c_lt_s
4377 if((source[i]&0x3f)==0x3d) emit_biccs_imm(fs,0x800000,fs); // c_nge_s
4378 if((source[i]&0x3f)==0x3e) emit_bichi_imm(fs,0x800000,fs); // c_le_s
4379 if((source[i]&0x3f)==0x3f) emit_bichi_imm(fs,0x800000,fs); // c_ngt_s
4380 return;
4381 }
4382 if(opcode2[i]==0x11) {
4383 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
4384 emit_readword((int)&reg_cop1_double[(source[i]>>16)&0x1f],HOST_TEMPREG);
4385 emit_orimm(fs,0x800000,fs);
4386 emit_vldr(temp,6);
4387 emit_vldr(HOST_TEMPREG,7);
4388 emit_fcmpd(6,7);
4389 emit_fmstat();
4390 if((source[i]&0x3f)==0x31) emit_bicvc_imm(fs,0x800000,fs); // c_un_d
4391 if((source[i]&0x3f)==0x32) emit_bicne_imm(fs,0x800000,fs); // c_eq_d
4392 if((source[i]&0x3f)==0x33) {emit_bicne_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ueq_d
4393 if((source[i]&0x3f)==0x34) emit_biccs_imm(fs,0x800000,fs); // c_olt_d
4394 if((source[i]&0x3f)==0x35) {emit_biccs_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ult_d
4395 if((source[i]&0x3f)==0x36) emit_bichi_imm(fs,0x800000,fs); // c_ole_d
4396 if((source[i]&0x3f)==0x37) {emit_bichi_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ule_d
4397 if((source[i]&0x3f)==0x3a) emit_bicne_imm(fs,0x800000,fs); // c_seq_d
4398 if((source[i]&0x3f)==0x3b) emit_bicne_imm(fs,0x800000,fs); // c_ngl_d
4399 if((source[i]&0x3f)==0x3c) emit_biccs_imm(fs,0x800000,fs); // c_lt_d
4400 if((source[i]&0x3f)==0x3d) emit_biccs_imm(fs,0x800000,fs); // c_nge_d
4401 if((source[i]&0x3f)==0x3e) emit_bichi_imm(fs,0x800000,fs); // c_le_d
4402 if((source[i]&0x3f)==0x3f) emit_bichi_imm(fs,0x800000,fs); // c_ngt_d
4403 return;
4404 }
4405 #endif
4406
4407 // C only
4408
4409 u_int hr,reglist=0;
4410 for(hr=0;hr<HOST_REGS;hr++) {
4411 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
4412 }
4413 reglist&=~(1<<fs);
4414 save_regs(reglist);
4415 if(opcode2[i]==0x10) {
4416 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4417 emit_readword((int)&reg_cop1_simple[(source[i]>>16)&0x1f],ARG2_REG);
4418 if((source[i]&0x3f)==0x30) emit_call((int)c_f_s);
4419 if((source[i]&0x3f)==0x31) emit_call((int)c_un_s);
4420 if((source[i]&0x3f)==0x32) emit_call((int)c_eq_s);
4421 if((source[i]&0x3f)==0x33) emit_call((int)c_ueq_s);
4422 if((source[i]&0x3f)==0x34) emit_call((int)c_olt_s);
4423 if((source[i]&0x3f)==0x35) emit_call((int)c_ult_s);
4424 if((source[i]&0x3f)==0x36) emit_call((int)c_ole_s);
4425 if((source[i]&0x3f)==0x37) emit_call((int)c_ule_s);
4426 if((source[i]&0x3f)==0x38) emit_call((int)c_sf_s);
4427 if((source[i]&0x3f)==0x39) emit_call((int)c_ngle_s);
4428 if((source[i]&0x3f)==0x3a) emit_call((int)c_seq_s);
4429 if((source[i]&0x3f)==0x3b) emit_call((int)c_ngl_s);
4430 if((source[i]&0x3f)==0x3c) emit_call((int)c_lt_s);
4431 if((source[i]&0x3f)==0x3d) emit_call((int)c_nge_s);
4432 if((source[i]&0x3f)==0x3e) emit_call((int)c_le_s);
4433 if((source[i]&0x3f)==0x3f) emit_call((int)c_ngt_s);
4434 }
4435 if(opcode2[i]==0x11) {
4436 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4437 emit_readword((int)&reg_cop1_double[(source[i]>>16)&0x1f],ARG2_REG);
4438 if((source[i]&0x3f)==0x30) emit_call((int)c_f_d);
4439 if((source[i]&0x3f)==0x31) emit_call((int)c_un_d);
4440 if((source[i]&0x3f)==0x32) emit_call((int)c_eq_d);
4441 if((source[i]&0x3f)==0x33) emit_call((int)c_ueq_d);
4442 if((source[i]&0x3f)==0x34) emit_call((int)c_olt_d);
4443 if((source[i]&0x3f)==0x35) emit_call((int)c_ult_d);
4444 if((source[i]&0x3f)==0x36) emit_call((int)c_ole_d);
4445 if((source[i]&0x3f)==0x37) emit_call((int)c_ule_d);
4446 if((source[i]&0x3f)==0x38) emit_call((int)c_sf_d);
4447 if((source[i]&0x3f)==0x39) emit_call((int)c_ngle_d);
4448 if((source[i]&0x3f)==0x3a) emit_call((int)c_seq_d);
4449 if((source[i]&0x3f)==0x3b) emit_call((int)c_ngl_d);
4450 if((source[i]&0x3f)==0x3c) emit_call((int)c_lt_d);
4451 if((source[i]&0x3f)==0x3d) emit_call((int)c_nge_d);
4452 if((source[i]&0x3f)==0x3e) emit_call((int)c_le_d);
4453 if((source[i]&0x3f)==0x3f) emit_call((int)c_ngt_d);
4454 }
4455 restore_regs(reglist);
4456 emit_loadreg(FSREG,fs);
4457#else
4458 cop1_unusable(i, i_regs);
4459#endif
4460}
4461
4462void float_assemble(int i,struct regstat *i_regs)
4463{
4464#ifndef DISABLE_COP1
4465 signed char temp=get_reg(i_regs->regmap,-1);
4466 assert(temp>=0);
4467 // Check cop1 unusable
4468 if(!cop1_usable) {
4469 signed char cs=get_reg(i_regs->regmap,CSREG);
4470 assert(cs>=0);
4471 emit_testimm(cs,0x20000000);
4472 int jaddr=(int)out;
4473 emit_jeq(0);
4474 add_stub(FP_STUB,jaddr,(int)out,i,cs,(int)i_regs,is_delayslot,0);
4475 cop1_usable=1;
4476 }
4477
4478 #if(defined(__VFP_FP__) && !defined(__SOFTFP__))
4479 if((source[i]&0x3f)==6) // mov
4480 {
4481 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4482 if(opcode2[i]==0x10) {
4483 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4484 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],HOST_TEMPREG);
4485 emit_readword_indexed(0,temp,temp);
4486 emit_writeword_indexed(temp,0,HOST_TEMPREG);
4487 }
4488 if(opcode2[i]==0x11) {
4489 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
4490 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],HOST_TEMPREG);
4491 emit_vldr(temp,7);
4492 emit_vstr(7,HOST_TEMPREG);
4493 }
4494 }
4495 return;
4496 }
4497
4498 if((source[i]&0x3f)>3)
4499 {
4500 if(opcode2[i]==0x10) {
4501 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4502 emit_flds(temp,15);
4503 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4504 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
4505 }
4506 if((source[i]&0x3f)==4) // sqrt
4507 emit_fsqrts(15,15);
4508 if((source[i]&0x3f)==5) // abs
4509 emit_fabss(15,15);
4510 if((source[i]&0x3f)==7) // neg
4511 emit_fnegs(15,15);
4512 emit_fsts(15,temp);
4513 }
4514 if(opcode2[i]==0x11) {
4515 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
4516 emit_vldr(temp,7);
4517 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4518 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
4519 }
4520 if((source[i]&0x3f)==4) // sqrt
4521 emit_fsqrtd(7,7);
4522 if((source[i]&0x3f)==5) // abs
4523 emit_fabsd(7,7);
4524 if((source[i]&0x3f)==7) // neg
4525 emit_fnegd(7,7);
4526 emit_vstr(7,temp);
4527 }
4528 return;
4529 }
4530 if((source[i]&0x3f)<4)
4531 {
4532 if(opcode2[i]==0x10) {
4533 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4534 }
4535 if(opcode2[i]==0x11) {
4536 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
4537 }
4538 if(((source[i]>>11)&0x1f)!=((source[i]>>16)&0x1f)) {
4539 if(opcode2[i]==0x10) {
4540 emit_readword((int)&reg_cop1_simple[(source[i]>>16)&0x1f],HOST_TEMPREG);
4541 emit_flds(temp,15);
4542 emit_flds(HOST_TEMPREG,13);
4543 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4544 if(((source[i]>>16)&0x1f)!=((source[i]>>6)&0x1f)) {
4545 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
4546 }
4547 }
4548 if((source[i]&0x3f)==0) emit_fadds(15,13,15);
4549 if((source[i]&0x3f)==1) emit_fsubs(15,13,15);
4550 if((source[i]&0x3f)==2) emit_fmuls(15,13,15);
4551 if((source[i]&0x3f)==3) emit_fdivs(15,13,15);
4552 if(((source[i]>>16)&0x1f)==((source[i]>>6)&0x1f)) {
4553 emit_fsts(15,HOST_TEMPREG);
4554 }else{
4555 emit_fsts(15,temp);
4556 }
4557 }
4558 else if(opcode2[i]==0x11) {
4559 emit_readword((int)&reg_cop1_double[(source[i]>>16)&0x1f],HOST_TEMPREG);
4560 emit_vldr(temp,7);
4561 emit_vldr(HOST_TEMPREG,6);
4562 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4563 if(((source[i]>>16)&0x1f)!=((source[i]>>6)&0x1f)) {
4564 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
4565 }
4566 }
4567 if((source[i]&0x3f)==0) emit_faddd(7,6,7);
4568 if((source[i]&0x3f)==1) emit_fsubd(7,6,7);
4569 if((source[i]&0x3f)==2) emit_fmuld(7,6,7);
4570 if((source[i]&0x3f)==3) emit_fdivd(7,6,7);
4571 if(((source[i]>>16)&0x1f)==((source[i]>>6)&0x1f)) {
4572 emit_vstr(7,HOST_TEMPREG);
4573 }else{
4574 emit_vstr(7,temp);
4575 }
4576 }
4577 }
4578 else {
4579 if(opcode2[i]==0x10) {
4580 emit_flds(temp,15);
4581 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4582 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
4583 }
4584 if((source[i]&0x3f)==0) emit_fadds(15,15,15);
4585 if((source[i]&0x3f)==1) emit_fsubs(15,15,15);
4586 if((source[i]&0x3f)==2) emit_fmuls(15,15,15);
4587 if((source[i]&0x3f)==3) emit_fdivs(15,15,15);
4588 emit_fsts(15,temp);
4589 }
4590 else if(opcode2[i]==0x11) {
4591 emit_vldr(temp,7);
4592 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4593 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
4594 }
4595 if((source[i]&0x3f)==0) emit_faddd(7,7,7);
4596 if((source[i]&0x3f)==1) emit_fsubd(7,7,7);
4597 if((source[i]&0x3f)==2) emit_fmuld(7,7,7);
4598 if((source[i]&0x3f)==3) emit_fdivd(7,7,7);
4599 emit_vstr(7,temp);
4600 }
4601 }
4602 return;
4603 }
4604 #endif
4605
4606 u_int hr,reglist=0;
4607 for(hr=0;hr<HOST_REGS;hr++) {
4608 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
4609 }
4610 if(opcode2[i]==0x10) { // Single precision
4611 save_regs(reglist);
4612 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4613 if((source[i]&0x3f)<4) {
4614 emit_readword((int)&reg_cop1_simple[(source[i]>>16)&0x1f],ARG2_REG);
4615 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG3_REG);
4616 }else{
4617 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4618 }
4619 switch(source[i]&0x3f)
4620 {
4621 case 0x00: emit_call((int)add_s);break;
4622 case 0x01: emit_call((int)sub_s);break;
4623 case 0x02: emit_call((int)mul_s);break;
4624 case 0x03: emit_call((int)div_s);break;
4625 case 0x04: emit_call((int)sqrt_s);break;
4626 case 0x05: emit_call((int)abs_s);break;
4627 case 0x06: emit_call((int)mov_s);break;
4628 case 0x07: emit_call((int)neg_s);break;
4629 }
4630 restore_regs(reglist);
4631 }
4632 if(opcode2[i]==0x11) { // Double precision
4633 save_regs(reglist);
4634 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4635 if((source[i]&0x3f)<4) {
4636 emit_readword((int)&reg_cop1_double[(source[i]>>16)&0x1f],ARG2_REG);
4637 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG3_REG);
4638 }else{
4639 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4640 }
4641 switch(source[i]&0x3f)
4642 {
4643 case 0x00: emit_call((int)add_d);break;
4644 case 0x01: emit_call((int)sub_d);break;
4645 case 0x02: emit_call((int)mul_d);break;
4646 case 0x03: emit_call((int)div_d);break;
4647 case 0x04: emit_call((int)sqrt_d);break;
4648 case 0x05: emit_call((int)abs_d);break;
4649 case 0x06: emit_call((int)mov_d);break;
4650 case 0x07: emit_call((int)neg_d);break;
4651 }
4652 restore_regs(reglist);
4653 }
4654#else
4655 cop1_unusable(i, i_regs);
4656#endif
4657}
4658
4659void multdiv_assemble_arm(int i,struct regstat *i_regs)
4660{
4661 // case 0x18: MULT
4662 // case 0x19: MULTU
4663 // case 0x1A: DIV
4664 // case 0x1B: DIVU
4665 // case 0x1C: DMULT
4666 // case 0x1D: DMULTU
4667 // case 0x1E: DDIV
4668 // case 0x1F: DDIVU
4669 if(rs1[i]&&rs2[i])
4670 {
4671 if((opcode2[i]&4)==0) // 32-bit
4672 {
4673 if(opcode2[i]==0x18) // MULT
4674 {
4675 signed char m1=get_reg(i_regs->regmap,rs1[i]);
4676 signed char m2=get_reg(i_regs->regmap,rs2[i]);
4677 signed char hi=get_reg(i_regs->regmap,HIREG);
4678 signed char lo=get_reg(i_regs->regmap,LOREG);
4679 assert(m1>=0);
4680 assert(m2>=0);
4681 assert(hi>=0);
4682 assert(lo>=0);
4683 emit_smull(m1,m2,hi,lo);
4684 }
4685 if(opcode2[i]==0x19) // MULTU
4686 {
4687 signed char m1=get_reg(i_regs->regmap,rs1[i]);
4688 signed char m2=get_reg(i_regs->regmap,rs2[i]);
4689 signed char hi=get_reg(i_regs->regmap,HIREG);
4690 signed char lo=get_reg(i_regs->regmap,LOREG);
4691 assert(m1>=0);
4692 assert(m2>=0);
4693 assert(hi>=0);
4694 assert(lo>=0);
4695 emit_umull(m1,m2,hi,lo);
4696 }
4697 if(opcode2[i]==0x1A) // DIV
4698 {
4699 signed char d1=get_reg(i_regs->regmap,rs1[i]);
4700 signed char d2=get_reg(i_regs->regmap,rs2[i]);
4701 assert(d1>=0);
4702 assert(d2>=0);
4703 signed char quotient=get_reg(i_regs->regmap,LOREG);
4704 signed char remainder=get_reg(i_regs->regmap,HIREG);
4705 assert(quotient>=0);
4706 assert(remainder>=0);
4707 emit_movs(d1,remainder);
4708 emit_movimm(0xffffffff,quotient);
4709 emit_negmi(quotient,quotient); // .. quotient and ..
4710 emit_negmi(remainder,remainder); // .. remainder for div0 case (will be negated back after jump)
4711 emit_movs(d2,HOST_TEMPREG);
4712 emit_jeq((int)out+52); // Division by zero
4713 emit_negmi(HOST_TEMPREG,HOST_TEMPREG);
4714 emit_clz(HOST_TEMPREG,quotient);
4715 emit_shl(HOST_TEMPREG,quotient,HOST_TEMPREG);
4716 emit_orimm(quotient,1<<31,quotient);
4717 emit_shr(quotient,quotient,quotient);
4718 emit_cmp(remainder,HOST_TEMPREG);
4719 emit_subcs(remainder,HOST_TEMPREG,remainder);
4720 emit_adcs(quotient,quotient,quotient);
4721 emit_shrimm(HOST_TEMPREG,1,HOST_TEMPREG);
4722 emit_jcc((int)out-16); // -4
4723 emit_teq(d1,d2);
4724 emit_negmi(quotient,quotient);
4725 emit_test(d1,d1);
4726 emit_negmi(remainder,remainder);
4727 }
4728 if(opcode2[i]==0x1B) // DIVU
4729 {
4730 signed char d1=get_reg(i_regs->regmap,rs1[i]); // dividend
4731 signed char d2=get_reg(i_regs->regmap,rs2[i]); // divisor
4732 assert(d1>=0);
4733 assert(d2>=0);
4734 signed char quotient=get_reg(i_regs->regmap,LOREG);
4735 signed char remainder=get_reg(i_regs->regmap,HIREG);
4736 assert(quotient>=0);
4737 assert(remainder>=0);
4738 emit_mov(d1,remainder);
4739 emit_movimm(0xffffffff,quotient); // div0 case
4740 emit_test(d2,d2);
4741 emit_jeq((int)out+40); // Division by zero
4742 emit_clz(d2,HOST_TEMPREG);
4743 emit_movimm(1<<31,quotient);
4744 emit_shl(d2,HOST_TEMPREG,d2);
4745 emit_shr(quotient,HOST_TEMPREG,quotient);
4746 emit_cmp(remainder,d2);
4747 emit_subcs(remainder,d2,remainder);
4748 emit_adcs(quotient,quotient,quotient);
4749 emit_shrcc_imm(d2,1,d2);
4750 emit_jcc((int)out-16); // -4
4751 }
4752 }
4753 else // 64-bit
4754 {
4755 if(opcode2[i]==0x1C) // DMULT
4756 {
4757 assert(opcode2[i]!=0x1C);
4758 signed char m1h=get_reg(i_regs->regmap,rs1[i]|64);
4759 signed char m1l=get_reg(i_regs->regmap,rs1[i]);
4760 signed char m2h=get_reg(i_regs->regmap,rs2[i]|64);
4761 signed char m2l=get_reg(i_regs->regmap,rs2[i]);
4762 assert(m1h>=0);
4763 assert(m2h>=0);
4764 assert(m1l>=0);
4765 assert(m2l>=0);
4766 emit_pushreg(m2h);
4767 emit_pushreg(m2l);
4768 emit_pushreg(m1h);
4769 emit_pushreg(m1l);
4770 emit_call((int)&mult64);
4771 emit_popreg(m1l);
4772 emit_popreg(m1h);
4773 emit_popreg(m2l);
4774 emit_popreg(m2h);
4775 signed char hih=get_reg(i_regs->regmap,HIREG|64);
4776 signed char hil=get_reg(i_regs->regmap,HIREG);
4777 if(hih>=0) emit_loadreg(HIREG|64,hih);
4778 if(hil>=0) emit_loadreg(HIREG,hil);
4779 signed char loh=get_reg(i_regs->regmap,LOREG|64);
4780 signed char lol=get_reg(i_regs->regmap,LOREG);
4781 if(loh>=0) emit_loadreg(LOREG|64,loh);
4782 if(lol>=0) emit_loadreg(LOREG,lol);
4783 }
4784 if(opcode2[i]==0x1D) // DMULTU
4785 {
4786 signed char m1h=get_reg(i_regs->regmap,rs1[i]|64);
4787 signed char m1l=get_reg(i_regs->regmap,rs1[i]);
4788 signed char m2h=get_reg(i_regs->regmap,rs2[i]|64);
4789 signed char m2l=get_reg(i_regs->regmap,rs2[i]);
4790 assert(m1h>=0);
4791 assert(m2h>=0);
4792 assert(m1l>=0);
4793 assert(m2l>=0);
4794 save_regs(0x100f);
4795 if(m1l!=0) emit_mov(m1l,0);
4796 if(m1h==0) emit_readword((int)&dynarec_local,1);
4797 else if(m1h>1) emit_mov(m1h,1);
4798 if(m2l<2) emit_readword((int)&dynarec_local+m2l*4,2);
4799 else if(m2l>2) emit_mov(m2l,2);
4800 if(m2h<3) emit_readword((int)&dynarec_local+m2h*4,3);
4801 else if(m2h>3) emit_mov(m2h,3);
4802 emit_call((int)&multu64);
4803 restore_regs(0x100f);
4804 signed char hih=get_reg(i_regs->regmap,HIREG|64);
4805 signed char hil=get_reg(i_regs->regmap,HIREG);
4806 signed char loh=get_reg(i_regs->regmap,LOREG|64);
4807 signed char lol=get_reg(i_regs->regmap,LOREG);
4808 /*signed char temp=get_reg(i_regs->regmap,-1);
4809 signed char rh=get_reg(i_regs->regmap,HIREG|64);
4810 signed char rl=get_reg(i_regs->regmap,HIREG);
4811 assert(m1h>=0);
4812 assert(m2h>=0);
4813 assert(m1l>=0);
4814 assert(m2l>=0);
4815 assert(temp>=0);
4816 //emit_mov(m1l,EAX);
4817 //emit_mul(m2l);
4818 emit_umull(rl,rh,m1l,m2l);
4819 emit_storereg(LOREG,rl);
4820 emit_mov(rh,temp);
4821 //emit_mov(m1h,EAX);
4822 //emit_mul(m2l);
4823 emit_umull(rl,rh,m1h,m2l);
4824 emit_adds(rl,temp,temp);
4825 emit_adcimm(rh,0,rh);
4826 emit_storereg(HIREG,rh);
4827 //emit_mov(m2h,EAX);
4828 //emit_mul(m1l);
4829 emit_umull(rl,rh,m1l,m2h);
4830 emit_adds(rl,temp,temp);
4831 emit_adcimm(rh,0,rh);
4832 emit_storereg(LOREG|64,temp);
4833 emit_mov(rh,temp);
4834 //emit_mov(m2h,EAX);
4835 //emit_mul(m1h);
4836 emit_umull(rl,rh,m1h,m2h);
4837 emit_adds(rl,temp,rl);
4838 emit_loadreg(HIREG,temp);
4839 emit_adcimm(rh,0,rh);
4840 emit_adds(rl,temp,rl);
4841 emit_adcimm(rh,0,rh);
4842 // DEBUG
4843 /*
4844 emit_pushreg(m2h);
4845 emit_pushreg(m2l);
4846 emit_pushreg(m1h);
4847 emit_pushreg(m1l);
4848 emit_call((int)&multu64);
4849 emit_popreg(m1l);
4850 emit_popreg(m1h);
4851 emit_popreg(m2l);
4852 emit_popreg(m2h);
4853 signed char hih=get_reg(i_regs->regmap,HIREG|64);
4854 signed char hil=get_reg(i_regs->regmap,HIREG);
4855 if(hih>=0) emit_loadreg(HIREG|64,hih); // DEBUG
4856 if(hil>=0) emit_loadreg(HIREG,hil); // DEBUG
4857 */
4858 // Shouldn't be necessary
4859 //char loh=get_reg(i_regs->regmap,LOREG|64);
4860 //char lol=get_reg(i_regs->regmap,LOREG);
4861 //if(loh>=0) emit_loadreg(LOREG|64,loh);
4862 //if(lol>=0) emit_loadreg(LOREG,lol);
4863 }
4864 if(opcode2[i]==0x1E) // DDIV
4865 {
4866 signed char d1h=get_reg(i_regs->regmap,rs1[i]|64);
4867 signed char d1l=get_reg(i_regs->regmap,rs1[i]);
4868 signed char d2h=get_reg(i_regs->regmap,rs2[i]|64);
4869 signed char d2l=get_reg(i_regs->regmap,rs2[i]);
4870 assert(d1h>=0);
4871 assert(d2h>=0);
4872 assert(d1l>=0);
4873 assert(d2l>=0);
4874 save_regs(0x100f);
4875 if(d1l!=0) emit_mov(d1l,0);
4876 if(d1h==0) emit_readword((int)&dynarec_local,1);
4877 else if(d1h>1) emit_mov(d1h,1);
4878 if(d2l<2) emit_readword((int)&dynarec_local+d2l*4,2);
4879 else if(d2l>2) emit_mov(d2l,2);
4880 if(d2h<3) emit_readword((int)&dynarec_local+d2h*4,3);
4881 else if(d2h>3) emit_mov(d2h,3);
4882 emit_call((int)&div64);
4883 restore_regs(0x100f);
4884 signed char hih=get_reg(i_regs->regmap,HIREG|64);
4885 signed char hil=get_reg(i_regs->regmap,HIREG);
4886 signed char loh=get_reg(i_regs->regmap,LOREG|64);
4887 signed char lol=get_reg(i_regs->regmap,LOREG);
4888 if(hih>=0) emit_loadreg(HIREG|64,hih);
4889 if(hil>=0) emit_loadreg(HIREG,hil);
4890 if(loh>=0) emit_loadreg(LOREG|64,loh);
4891 if(lol>=0) emit_loadreg(LOREG,lol);
4892 }
4893 if(opcode2[i]==0x1F) // DDIVU
4894 {
4895 //u_int hr,reglist=0;
4896 //for(hr=0;hr<HOST_REGS;hr++) {
4897 // if(i_regs->regmap[hr]>=0 && (i_regs->regmap[hr]&62)!=HIREG) reglist|=1<<hr;
4898 //}
4899 signed char d1h=get_reg(i_regs->regmap,rs1[i]|64);
4900 signed char d1l=get_reg(i_regs->regmap,rs1[i]);
4901 signed char d2h=get_reg(i_regs->regmap,rs2[i]|64);
4902 signed char d2l=get_reg(i_regs->regmap,rs2[i]);
4903 assert(d1h>=0);
4904 assert(d2h>=0);
4905 assert(d1l>=0);
4906 assert(d2l>=0);
4907 save_regs(0x100f);
4908 if(d1l!=0) emit_mov(d1l,0);
4909 if(d1h==0) emit_readword((int)&dynarec_local,1);
4910 else if(d1h>1) emit_mov(d1h,1);
4911 if(d2l<2) emit_readword((int)&dynarec_local+d2l*4,2);
4912 else if(d2l>2) emit_mov(d2l,2);
4913 if(d2h<3) emit_readword((int)&dynarec_local+d2h*4,3);
4914 else if(d2h>3) emit_mov(d2h,3);
4915 emit_call((int)&divu64);
4916 restore_regs(0x100f);
4917 signed char hih=get_reg(i_regs->regmap,HIREG|64);
4918 signed char hil=get_reg(i_regs->regmap,HIREG);
4919 signed char loh=get_reg(i_regs->regmap,LOREG|64);
4920 signed char lol=get_reg(i_regs->regmap,LOREG);
4921 if(hih>=0) emit_loadreg(HIREG|64,hih);
4922 if(hil>=0) emit_loadreg(HIREG,hil);
4923 if(loh>=0) emit_loadreg(LOREG|64,loh);
4924 if(lol>=0) emit_loadreg(LOREG,lol);
4925 }
4926 }
4927 }
4928 else
4929 {
4930 // Multiply by zero is zero.
4931 // MIPS does not have a divide by zero exception.
4932 // The result is undefined, we return zero.
4933 signed char hr=get_reg(i_regs->regmap,HIREG);
4934 signed char lr=get_reg(i_regs->regmap,LOREG);
4935 if(hr>=0) emit_zeroreg(hr);
4936 if(lr>=0) emit_zeroreg(lr);
4937 }
4938}
4939#define multdiv_assemble multdiv_assemble_arm
4940
4941void do_preload_rhash(int r) {
4942 // Don't need this for ARM. On x86, this puts the value 0xf8 into the
4943 // register. On ARM the hash can be done with a single instruction (below)
4944}
4945
4946void do_preload_rhtbl(int ht) {
4947 emit_addimm(FP,(int)&mini_ht-(int)&dynarec_local,ht);
4948}
4949
4950void do_rhash(int rs,int rh) {
4951 emit_andimm(rs,0xf8,rh);
4952}
4953
4954void do_miniht_load(int ht,int rh) {
4955 assem_debug("ldr %s,[%s,%s]!\n",regname[rh],regname[ht],regname[rh]);
4956 output_w32(0xe7b00000|rd_rn_rm(rh,ht,rh));
4957}
4958
4959void do_miniht_jump(int rs,int rh,int ht) {
4960 emit_cmp(rh,rs);
4961 emit_ldreq_indexed(ht,4,15);
4962 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
4963 emit_mov(rs,7);
4964 emit_jmp(jump_vaddr_reg[7]);
4965 #else
4966 emit_jmp(jump_vaddr_reg[rs]);
4967 #endif
4968}
4969
4970void do_miniht_insert(u_int return_address,int rt,int temp) {
4971 #ifdef ARMv5_ONLY
4972 emit_movimm(return_address,rt); // PC into link register
4973 add_to_linker((int)out,return_address,1);
4974 emit_pcreladdr(temp);
4975 emit_writeword(rt,(int)&mini_ht[(return_address&0xFF)>>3][0]);
4976 emit_writeword(temp,(int)&mini_ht[(return_address&0xFF)>>3][1]);
4977 #else
4978 emit_movw(return_address&0x0000FFFF,rt);
4979 add_to_linker((int)out,return_address,1);
4980 emit_pcreladdr(temp);
4981 emit_writeword(temp,(int)&mini_ht[(return_address&0xFF)>>3][1]);
4982 emit_movt(return_address&0xFFFF0000,rt);
4983 emit_writeword(rt,(int)&mini_ht[(return_address&0xFF)>>3][0]);
4984 #endif
4985}
4986
4987// Sign-extend to 64 bits and write out upper half of a register
4988// This is useful where we have a 32-bit value in a register, and want to
4989// keep it in a 32-bit register, but can't guarantee that it won't be read
4990// as a 64-bit value later.
4991void wb_sx(signed char pre[],signed char entry[],uint64_t dirty,uint64_t is32_pre,uint64_t is32,uint64_t u,uint64_t uu)
4992{
4993#ifndef FORCE32
4994 if(is32_pre==is32) return;
4995 int hr,reg;
4996 for(hr=0;hr<HOST_REGS;hr++) {
4997 if(hr!=EXCLUDE_REG) {
4998 //if(pre[hr]==entry[hr]) {
4999 if((reg=pre[hr])>=0) {
5000 if((dirty>>hr)&1) {
5001 if( ((is32_pre&~is32&~uu)>>reg)&1 ) {
5002 emit_sarimm(hr,31,HOST_TEMPREG);
5003 emit_storereg(reg|64,HOST_TEMPREG);
5004 }
5005 }
5006 }
5007 //}
5008 }
5009 }
5010#endif
5011}
5012
5013void wb_valid(signed char pre[],signed char entry[],u_int dirty_pre,u_int dirty,uint64_t is32_pre,uint64_t u,uint64_t uu)
5014{
5015 //if(dirty_pre==dirty) return;
5016 int hr,reg,new_hr;
5017 for(hr=0;hr<HOST_REGS;hr++) {
5018 if(hr!=EXCLUDE_REG) {
5019 reg=pre[hr];
5020 if(((~u)>>(reg&63))&1) {
5021 if(reg>0) {
5022 if(((dirty_pre&~dirty)>>hr)&1) {
5023 if(reg>0&&reg<34) {
5024 emit_storereg(reg,hr);
5025 if( ((is32_pre&~uu)>>reg)&1 ) {
5026 emit_sarimm(hr,31,HOST_TEMPREG);
5027 emit_storereg(reg|64,HOST_TEMPREG);
5028 }
5029 }
5030 else if(reg>=64) {
5031 emit_storereg(reg,hr);
5032 }
5033 }
5034 }
5035 }
5036 }
5037 }
5038}
5039
5040
5041/* using strd could possibly help but you'd have to allocate registers in pairs
5042void wb_invalidate_arm(signed char pre[],signed char entry[],uint64_t dirty,uint64_t is32,uint64_t u,uint64_t uu)
5043{
5044 int hr;
5045 int wrote=-1;
5046 for(hr=HOST_REGS-1;hr>=0;hr--) {
5047 if(hr!=EXCLUDE_REG) {
5048 if(pre[hr]!=entry[hr]) {
5049 if(pre[hr]>=0) {
5050 if((dirty>>hr)&1) {
5051 if(get_reg(entry,pre[hr])<0) {
5052 if(pre[hr]<64) {
5053 if(!((u>>pre[hr])&1)) {
5054 if(hr<10&&(~hr&1)&&(pre[hr+1]<0||wrote==hr+1)) {
5055 if( ((is32>>pre[hr])&1) && !((uu>>pre[hr])&1) ) {
5056 emit_sarimm(hr,31,hr+1);
5057 emit_strdreg(pre[hr],hr);
5058 }
5059 else
5060 emit_storereg(pre[hr],hr);
5061 }else{
5062 emit_storereg(pre[hr],hr);
5063 if( ((is32>>pre[hr])&1) && !((uu>>pre[hr])&1) ) {
5064 emit_sarimm(hr,31,hr);
5065 emit_storereg(pre[hr]|64,hr);
5066 }
5067 }
5068 }
5069 }else{
5070 if(!((uu>>(pre[hr]&63))&1) && !((is32>>(pre[hr]&63))&1)) {
5071 emit_storereg(pre[hr],hr);
5072 }
5073 }
5074 wrote=hr;
5075 }
5076 }
5077 }
5078 }
5079 }
5080 }
5081 for(hr=0;hr<HOST_REGS;hr++) {
5082 if(hr!=EXCLUDE_REG) {
5083 if(pre[hr]!=entry[hr]) {
5084 if(pre[hr]>=0) {
5085 int nr;
5086 if((nr=get_reg(entry,pre[hr]))>=0) {
5087 emit_mov(hr,nr);
5088 }
5089 }
5090 }
5091 }
5092 }
5093}
5094#define wb_invalidate wb_invalidate_arm
5095*/
5096
5097// Clearing the cache is rather slow on ARM Linux, so mark the areas
5098// that need to be cleared, and then only clear these areas once.
5099void do_clear_cache()
5100{
5101 int i,j;
5102 for (i=0;i<(1<<(TARGET_SIZE_2-17));i++)
5103 {
5104 u_int bitmap=needs_clear_cache[i];
5105 if(bitmap) {
5106 u_int start,end;
5107 for(j=0;j<32;j++)
5108 {
5109 if(bitmap&(1<<j)) {
5110 start=BASE_ADDR+i*131072+j*4096;
5111 end=start+4095;
5112 j++;
5113 while(j<32) {
5114 if(bitmap&(1<<j)) {
5115 end+=4096;
5116 j++;
5117 }else{
5118 __clear_cache((void *)start,(void *)end);
5119 break;
5120 }
5121 }
5122 }
5123 }
5124 needs_clear_cache[i]=0;
5125 }
5126 }
5127}
5128
5129// CPU-architecture-specific initialization
5130void arch_init() {
5131#ifndef DISABLE_COP1
5132 rounding_modes[0]=0x0<<22; // round
5133 rounding_modes[1]=0x3<<22; // trunc
5134 rounding_modes[2]=0x1<<22; // ceil
5135 rounding_modes[3]=0x2<<22; // floor
5136#endif
5137}
5138
5139// vim:shiftwidth=2:expandtab