gte: split arm code for pollux, generate flagless handlers
[pcsx_rearmed.git] / libpcsxcore / new_dynarec / assem_arm.c
CommitLineData
57871462 1/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2 * Mupen64plus - assem_arm.c *
20d507ba 3 * Copyright (C) 2009-2011 Ari64 *
57871462 4 * *
5 * This program is free software; you can redistribute it and/or modify *
6 * it under the terms of the GNU General Public License as published by *
7 * the Free Software Foundation; either version 2 of the License, or *
8 * (at your option) any later version. *
9 * *
10 * This program is distributed in the hope that it will be useful, *
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
13 * GNU General Public License for more details. *
14 * *
15 * You should have received a copy of the GNU General Public License *
16 * along with this program; if not, write to the *
17 * Free Software Foundation, Inc., *
18 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
19 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
20
21extern int cycle_count;
22extern int last_count;
23extern int pcaddr;
24extern int pending_exception;
25extern int branch_target;
26extern uint64_t readmem_dword;
3d624f89 27#ifdef MUPEN64
57871462 28extern precomp_instr fake_pc;
3d624f89 29#endif
57871462 30extern void *dynarec_local;
31extern u_int memory_map[1048576];
32extern u_int mini_ht[32][2];
33extern u_int rounding_modes[4];
34
35void indirect_jump_indexed();
36void indirect_jump();
37void do_interrupt();
38void jump_vaddr_r0();
39void jump_vaddr_r1();
40void jump_vaddr_r2();
41void jump_vaddr_r3();
42void jump_vaddr_r4();
43void jump_vaddr_r5();
44void jump_vaddr_r6();
45void jump_vaddr_r7();
46void jump_vaddr_r8();
47void jump_vaddr_r9();
48void jump_vaddr_r10();
49void jump_vaddr_r12();
50
51const u_int jump_vaddr_reg[16] = {
52 (int)jump_vaddr_r0,
53 (int)jump_vaddr_r1,
54 (int)jump_vaddr_r2,
55 (int)jump_vaddr_r3,
56 (int)jump_vaddr_r4,
57 (int)jump_vaddr_r5,
58 (int)jump_vaddr_r6,
59 (int)jump_vaddr_r7,
60 (int)jump_vaddr_r8,
61 (int)jump_vaddr_r9,
62 (int)jump_vaddr_r10,
63 0,
64 (int)jump_vaddr_r12,
65 0,
66 0,
67 0};
68
0bbd1454 69void invalidate_addr_r0();
70void invalidate_addr_r1();
71void invalidate_addr_r2();
72void invalidate_addr_r3();
73void invalidate_addr_r4();
74void invalidate_addr_r5();
75void invalidate_addr_r6();
76void invalidate_addr_r7();
77void invalidate_addr_r8();
78void invalidate_addr_r9();
79void invalidate_addr_r10();
80void invalidate_addr_r12();
81
82const u_int invalidate_addr_reg[16] = {
83 (int)invalidate_addr_r0,
84 (int)invalidate_addr_r1,
85 (int)invalidate_addr_r2,
86 (int)invalidate_addr_r3,
87 (int)invalidate_addr_r4,
88 (int)invalidate_addr_r5,
89 (int)invalidate_addr_r6,
90 (int)invalidate_addr_r7,
91 (int)invalidate_addr_r8,
92 (int)invalidate_addr_r9,
93 (int)invalidate_addr_r10,
94 0,
95 (int)invalidate_addr_r12,
96 0,
97 0,
98 0};
99
57871462 100#include "fpu.h"
101
dd3a91a1 102unsigned int needs_clear_cache[1<<(TARGET_SIZE_2-17)];
103
57871462 104/* Linker */
105
106void set_jump_target(int addr,u_int target)
107{
108 u_char *ptr=(u_char *)addr;
109 u_int *ptr2=(u_int *)ptr;
110 if(ptr[3]==0xe2) {
111 assert((target-(u_int)ptr2-8)<1024);
112 assert((addr&3)==0);
113 assert((target&3)==0);
114 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
115 //printf("target=%x addr=%x insn=%x\n",target,addr,*ptr2);
116 }
117 else if(ptr[3]==0x72) {
118 // generated by emit_jno_unlikely
119 if((target-(u_int)ptr2-8)<1024) {
120 assert((addr&3)==0);
121 assert((target&3)==0);
122 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
123 }
124 else if((target-(u_int)ptr2-8)<4096&&!((target-(u_int)ptr2-8)&15)) {
125 assert((addr&3)==0);
126 assert((target&3)==0);
127 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>4)|0xE00;
128 }
129 else *ptr2=(0x7A000000)|(((target-(u_int)ptr2-8)<<6)>>8);
130 }
131 else {
132 assert((ptr[3]&0x0e)==0xa);
133 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
134 }
135}
136
137// This optionally copies the instruction from the target of the branch into
138// the space before the branch. Works, but the difference in speed is
139// usually insignificant.
140void set_jump_target_fillslot(int addr,u_int target,int copy)
141{
142 u_char *ptr=(u_char *)addr;
143 u_int *ptr2=(u_int *)ptr;
144 assert(!copy||ptr2[-1]==0xe28dd000);
145 if(ptr[3]==0xe2) {
146 assert(!copy);
147 assert((target-(u_int)ptr2-8)<4096);
148 *ptr2=(*ptr2&0xFFFFF000)|(target-(u_int)ptr2-8);
149 }
150 else {
151 assert((ptr[3]&0x0e)==0xa);
152 u_int target_insn=*(u_int *)target;
153 if((target_insn&0x0e100000)==0) { // ALU, no immediate, no flags
154 copy=0;
155 }
156 if((target_insn&0x0c100000)==0x04100000) { // Load
157 copy=0;
158 }
159 if(target_insn&0x08000000) {
160 copy=0;
161 }
162 if(copy) {
163 ptr2[-1]=target_insn;
164 target+=4;
165 }
166 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
167 }
168}
169
170/* Literal pool */
171add_literal(int addr,int val)
172{
15776b68 173 assert(literalcount<sizeof(literals)/sizeof(literals[0]));
57871462 174 literals[literalcount][0]=addr;
175 literals[literalcount][1]=val;
176 literalcount++;
177}
178
f76eeef9 179void *kill_pointer(void *stub)
57871462 180{
181 int *ptr=(int *)(stub+4);
182 assert((*ptr&0x0ff00000)==0x05900000);
183 u_int offset=*ptr&0xfff;
184 int **l_ptr=(void *)ptr+offset+8;
185 int *i_ptr=*l_ptr;
186 set_jump_target((int)i_ptr,(int)stub);
f76eeef9 187 return i_ptr;
57871462 188}
189
f968d35d 190// find where external branch is liked to using addr of it's stub:
191// get address that insn one after stub loads (dyna_linker arg1),
192// treat it as a pointer to branch insn,
193// return addr where that branch jumps to
57871462 194int get_pointer(void *stub)
195{
196 //printf("get_pointer(%x)\n",(int)stub);
197 int *ptr=(int *)(stub+4);
f968d35d 198 assert((*ptr&0x0fff0000)==0x059f0000);
57871462 199 u_int offset=*ptr&0xfff;
200 int **l_ptr=(void *)ptr+offset+8;
201 int *i_ptr=*l_ptr;
202 assert((*i_ptr&0x0f000000)==0x0a000000);
203 return (int)i_ptr+((*i_ptr<<8)>>6)+8;
204}
205
206// Find the "clean" entry point from a "dirty" entry point
207// by skipping past the call to verify_code
208u_int get_clean_addr(int addr)
209{
210 int *ptr=(int *)addr;
211 #ifdef ARMv5_ONLY
212 ptr+=4;
213 #else
214 ptr+=6;
215 #endif
216 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
217 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
218 ptr++;
219 if((*ptr&0xFF000000)==0xea000000) {
220 return (int)ptr+((*ptr<<8)>>6)+8; // follow jump
221 }
222 return (u_int)ptr;
223}
224
225int verify_dirty(int addr)
226{
227 u_int *ptr=(u_int *)addr;
228 #ifdef ARMv5_ONLY
229 // get from literal pool
15776b68 230 assert((*ptr&0xFFFF0000)==0xe59f0000);
57871462 231 u_int offset=*ptr&0xfff;
232 u_int *l_ptr=(void *)ptr+offset+8;
233 u_int source=l_ptr[0];
234 u_int copy=l_ptr[1];
235 u_int len=l_ptr[2];
236 ptr+=4;
237 #else
238 // ARMv7 movw/movt
239 assert((*ptr&0xFFF00000)==0xe3000000);
240 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
241 u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
242 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
243 ptr+=6;
244 #endif
245 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
246 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
cfcba99a 247 u_int verifier=(int)ptr+((signed int)(*ptr<<8)>>6)+8; // get target of bl
57871462 248 if(verifier==(u_int)verify_code_vm||verifier==(u_int)verify_code_ds) {
249 unsigned int page=source>>12;
250 unsigned int map_value=memory_map[page];
251 if(map_value>=0x80000000) return 0;
252 while(page<((source+len-1)>>12)) {
253 if((memory_map[++page]<<2)!=(map_value<<2)) return 0;
254 }
255 source = source+(map_value<<2);
256 }
257 //printf("verify_dirty: %x %x %x\n",source,copy,len);
258 return !memcmp((void *)source,(void *)copy,len);
259}
260
261// This doesn't necessarily find all clean entry points, just
262// guarantees that it's not dirty
263int isclean(int addr)
264{
265 #ifdef ARMv5_ONLY
266 int *ptr=((u_int *)addr)+4;
267 #else
268 int *ptr=((u_int *)addr)+6;
269 #endif
270 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
271 if((*ptr&0xFF000000)!=0xeb000000) return 1; // bl instruction
272 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code) return 0;
273 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_vm) return 0;
274 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_ds) return 0;
275 return 1;
276}
277
278void get_bounds(int addr,u_int *start,u_int *end)
279{
280 u_int *ptr=(u_int *)addr;
281 #ifdef ARMv5_ONLY
282 // get from literal pool
15776b68 283 assert((*ptr&0xFFFF0000)==0xe59f0000);
57871462 284 u_int offset=*ptr&0xfff;
285 u_int *l_ptr=(void *)ptr+offset+8;
286 u_int source=l_ptr[0];
287 //u_int copy=l_ptr[1];
288 u_int len=l_ptr[2];
289 ptr+=4;
290 #else
291 // ARMv7 movw/movt
292 assert((*ptr&0xFFF00000)==0xe3000000);
293 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
294 //u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
295 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
296 ptr+=6;
297 #endif
298 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
299 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
cfcba99a 300 u_int verifier=(int)ptr+((signed int)(*ptr<<8)>>6)+8; // get target of bl
57871462 301 if(verifier==(u_int)verify_code_vm||verifier==(u_int)verify_code_ds) {
302 if(memory_map[source>>12]>=0x80000000) source = 0;
303 else source = source+(memory_map[source>>12]<<2);
304 }
305 *start=source;
306 *end=source+len;
307}
308
309/* Register allocation */
310
311// Note: registers are allocated clean (unmodified state)
312// if you intend to modify the register, you must call dirty_reg().
313void alloc_reg(struct regstat *cur,int i,signed char reg)
314{
315 int r,hr;
316 int preferred_reg = (reg&7);
317 if(reg==CCREG) preferred_reg=HOST_CCREG;
318 if(reg==PTEMP||reg==FTEMP) preferred_reg=12;
319
320 // Don't allocate unused registers
321 if((cur->u>>reg)&1) return;
322
323 // see if it's already allocated
324 for(hr=0;hr<HOST_REGS;hr++)
325 {
326 if(cur->regmap[hr]==reg) return;
327 }
328
329 // Keep the same mapping if the register was already allocated in a loop
330 preferred_reg = loop_reg(i,reg,preferred_reg);
331
332 // Try to allocate the preferred register
333 if(cur->regmap[preferred_reg]==-1) {
334 cur->regmap[preferred_reg]=reg;
335 cur->dirty&=~(1<<preferred_reg);
336 cur->isconst&=~(1<<preferred_reg);
337 return;
338 }
339 r=cur->regmap[preferred_reg];
340 if(r<64&&((cur->u>>r)&1)) {
341 cur->regmap[preferred_reg]=reg;
342 cur->dirty&=~(1<<preferred_reg);
343 cur->isconst&=~(1<<preferred_reg);
344 return;
345 }
346 if(r>=64&&((cur->uu>>(r&63))&1)) {
347 cur->regmap[preferred_reg]=reg;
348 cur->dirty&=~(1<<preferred_reg);
349 cur->isconst&=~(1<<preferred_reg);
350 return;
351 }
352
353 // Clear any unneeded registers
354 // We try to keep the mapping consistent, if possible, because it
355 // makes branches easier (especially loops). So we try to allocate
356 // first (see above) before removing old mappings. If this is not
357 // possible then go ahead and clear out the registers that are no
358 // longer needed.
359 for(hr=0;hr<HOST_REGS;hr++)
360 {
361 r=cur->regmap[hr];
362 if(r>=0) {
363 if(r<64) {
364 if((cur->u>>r)&1) {cur->regmap[hr]=-1;break;}
365 }
366 else
367 {
368 if((cur->uu>>(r&63))&1) {cur->regmap[hr]=-1;break;}
369 }
370 }
371 }
372 // Try to allocate any available register, but prefer
373 // registers that have not been used recently.
374 if(i>0) {
375 for(hr=0;hr<HOST_REGS;hr++) {
376 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
377 if(regs[i-1].regmap[hr]!=rs1[i-1]&&regs[i-1].regmap[hr]!=rs2[i-1]&&regs[i-1].regmap[hr]!=rt1[i-1]&&regs[i-1].regmap[hr]!=rt2[i-1]) {
378 cur->regmap[hr]=reg;
379 cur->dirty&=~(1<<hr);
380 cur->isconst&=~(1<<hr);
381 return;
382 }
383 }
384 }
385 }
386 // Try to allocate any available register
387 for(hr=0;hr<HOST_REGS;hr++) {
388 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
389 cur->regmap[hr]=reg;
390 cur->dirty&=~(1<<hr);
391 cur->isconst&=~(1<<hr);
392 return;
393 }
394 }
395
396 // Ok, now we have to evict someone
397 // Pick a register we hopefully won't need soon
398 u_char hsn[MAXREG+1];
399 memset(hsn,10,sizeof(hsn));
400 int j;
401 lsn(hsn,i,&preferred_reg);
402 //printf("eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",cur->regmap[0],cur->regmap[1],cur->regmap[2],cur->regmap[3],cur->regmap[5],cur->regmap[6],cur->regmap[7]);
403 //printf("hsn(%x): %d %d %d %d %d %d %d\n",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
404 if(i>0) {
405 // Don't evict the cycle count at entry points, otherwise the entry
406 // stub will have to write it.
407 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
408 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
409 for(j=10;j>=3;j--)
410 {
411 // Alloc preferred register if available
412 if(hsn[r=cur->regmap[preferred_reg]&63]==j) {
413 for(hr=0;hr<HOST_REGS;hr++) {
414 // Evict both parts of a 64-bit register
415 if((cur->regmap[hr]&63)==r) {
416 cur->regmap[hr]=-1;
417 cur->dirty&=~(1<<hr);
418 cur->isconst&=~(1<<hr);
419 }
420 }
421 cur->regmap[preferred_reg]=reg;
422 return;
423 }
424 for(r=1;r<=MAXREG;r++)
425 {
426 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
427 for(hr=0;hr<HOST_REGS;hr++) {
428 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
429 if(cur->regmap[hr]==r+64) {
430 cur->regmap[hr]=reg;
431 cur->dirty&=~(1<<hr);
432 cur->isconst&=~(1<<hr);
433 return;
434 }
435 }
436 }
437 for(hr=0;hr<HOST_REGS;hr++) {
438 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
439 if(cur->regmap[hr]==r) {
440 cur->regmap[hr]=reg;
441 cur->dirty&=~(1<<hr);
442 cur->isconst&=~(1<<hr);
443 return;
444 }
445 }
446 }
447 }
448 }
449 }
450 }
451 for(j=10;j>=0;j--)
452 {
453 for(r=1;r<=MAXREG;r++)
454 {
455 if(hsn[r]==j) {
456 for(hr=0;hr<HOST_REGS;hr++) {
457 if(cur->regmap[hr]==r+64) {
458 cur->regmap[hr]=reg;
459 cur->dirty&=~(1<<hr);
460 cur->isconst&=~(1<<hr);
461 return;
462 }
463 }
464 for(hr=0;hr<HOST_REGS;hr++) {
465 if(cur->regmap[hr]==r) {
466 cur->regmap[hr]=reg;
467 cur->dirty&=~(1<<hr);
468 cur->isconst&=~(1<<hr);
469 return;
470 }
471 }
472 }
473 }
474 }
475 printf("This shouldn't happen (alloc_reg)");exit(1);
476}
477
478void alloc_reg64(struct regstat *cur,int i,signed char reg)
479{
480 int preferred_reg = 8+(reg&1);
481 int r,hr;
482
483 // allocate the lower 32 bits
484 alloc_reg(cur,i,reg);
485
486 // Don't allocate unused registers
487 if((cur->uu>>reg)&1) return;
488
489 // see if the upper half is already allocated
490 for(hr=0;hr<HOST_REGS;hr++)
491 {
492 if(cur->regmap[hr]==reg+64) return;
493 }
494
495 // Keep the same mapping if the register was already allocated in a loop
496 preferred_reg = loop_reg(i,reg,preferred_reg);
497
498 // Try to allocate the preferred register
499 if(cur->regmap[preferred_reg]==-1) {
500 cur->regmap[preferred_reg]=reg|64;
501 cur->dirty&=~(1<<preferred_reg);
502 cur->isconst&=~(1<<preferred_reg);
503 return;
504 }
505 r=cur->regmap[preferred_reg];
506 if(r<64&&((cur->u>>r)&1)) {
507 cur->regmap[preferred_reg]=reg|64;
508 cur->dirty&=~(1<<preferred_reg);
509 cur->isconst&=~(1<<preferred_reg);
510 return;
511 }
512 if(r>=64&&((cur->uu>>(r&63))&1)) {
513 cur->regmap[preferred_reg]=reg|64;
514 cur->dirty&=~(1<<preferred_reg);
515 cur->isconst&=~(1<<preferred_reg);
516 return;
517 }
518
519 // Clear any unneeded registers
520 // We try to keep the mapping consistent, if possible, because it
521 // makes branches easier (especially loops). So we try to allocate
522 // first (see above) before removing old mappings. If this is not
523 // possible then go ahead and clear out the registers that are no
524 // longer needed.
525 for(hr=HOST_REGS-1;hr>=0;hr--)
526 {
527 r=cur->regmap[hr];
528 if(r>=0) {
529 if(r<64) {
530 if((cur->u>>r)&1) {cur->regmap[hr]=-1;break;}
531 }
532 else
533 {
534 if((cur->uu>>(r&63))&1) {cur->regmap[hr]=-1;break;}
535 }
536 }
537 }
538 // Try to allocate any available register, but prefer
539 // registers that have not been used recently.
540 if(i>0) {
541 for(hr=0;hr<HOST_REGS;hr++) {
542 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
543 if(regs[i-1].regmap[hr]!=rs1[i-1]&&regs[i-1].regmap[hr]!=rs2[i-1]&&regs[i-1].regmap[hr]!=rt1[i-1]&&regs[i-1].regmap[hr]!=rt2[i-1]) {
544 cur->regmap[hr]=reg|64;
545 cur->dirty&=~(1<<hr);
546 cur->isconst&=~(1<<hr);
547 return;
548 }
549 }
550 }
551 }
552 // Try to allocate any available register
553 for(hr=0;hr<HOST_REGS;hr++) {
554 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
555 cur->regmap[hr]=reg|64;
556 cur->dirty&=~(1<<hr);
557 cur->isconst&=~(1<<hr);
558 return;
559 }
560 }
561
562 // Ok, now we have to evict someone
563 // Pick a register we hopefully won't need soon
564 u_char hsn[MAXREG+1];
565 memset(hsn,10,sizeof(hsn));
566 int j;
567 lsn(hsn,i,&preferred_reg);
568 //printf("eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",cur->regmap[0],cur->regmap[1],cur->regmap[2],cur->regmap[3],cur->regmap[5],cur->regmap[6],cur->regmap[7]);
569 //printf("hsn(%x): %d %d %d %d %d %d %d\n",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
570 if(i>0) {
571 // Don't evict the cycle count at entry points, otherwise the entry
572 // stub will have to write it.
573 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
574 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
575 for(j=10;j>=3;j--)
576 {
577 // Alloc preferred register if available
578 if(hsn[r=cur->regmap[preferred_reg]&63]==j) {
579 for(hr=0;hr<HOST_REGS;hr++) {
580 // Evict both parts of a 64-bit register
581 if((cur->regmap[hr]&63)==r) {
582 cur->regmap[hr]=-1;
583 cur->dirty&=~(1<<hr);
584 cur->isconst&=~(1<<hr);
585 }
586 }
587 cur->regmap[preferred_reg]=reg|64;
588 return;
589 }
590 for(r=1;r<=MAXREG;r++)
591 {
592 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
593 for(hr=0;hr<HOST_REGS;hr++) {
594 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
595 if(cur->regmap[hr]==r+64) {
596 cur->regmap[hr]=reg|64;
597 cur->dirty&=~(1<<hr);
598 cur->isconst&=~(1<<hr);
599 return;
600 }
601 }
602 }
603 for(hr=0;hr<HOST_REGS;hr++) {
604 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
605 if(cur->regmap[hr]==r) {
606 cur->regmap[hr]=reg|64;
607 cur->dirty&=~(1<<hr);
608 cur->isconst&=~(1<<hr);
609 return;
610 }
611 }
612 }
613 }
614 }
615 }
616 }
617 for(j=10;j>=0;j--)
618 {
619 for(r=1;r<=MAXREG;r++)
620 {
621 if(hsn[r]==j) {
622 for(hr=0;hr<HOST_REGS;hr++) {
623 if(cur->regmap[hr]==r+64) {
624 cur->regmap[hr]=reg|64;
625 cur->dirty&=~(1<<hr);
626 cur->isconst&=~(1<<hr);
627 return;
628 }
629 }
630 for(hr=0;hr<HOST_REGS;hr++) {
631 if(cur->regmap[hr]==r) {
632 cur->regmap[hr]=reg|64;
633 cur->dirty&=~(1<<hr);
634 cur->isconst&=~(1<<hr);
635 return;
636 }
637 }
638 }
639 }
640 }
641 printf("This shouldn't happen");exit(1);
642}
643
644// Allocate a temporary register. This is done without regard to
645// dirty status or whether the register we request is on the unneeded list
646// Note: This will only allocate one register, even if called multiple times
647void alloc_reg_temp(struct regstat *cur,int i,signed char reg)
648{
649 int r,hr;
650 int preferred_reg = -1;
651
652 // see if it's already allocated
653 for(hr=0;hr<HOST_REGS;hr++)
654 {
655 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==reg) return;
656 }
657
658 // Try to allocate any available register
659 for(hr=HOST_REGS-1;hr>=0;hr--) {
660 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
661 cur->regmap[hr]=reg;
662 cur->dirty&=~(1<<hr);
663 cur->isconst&=~(1<<hr);
664 return;
665 }
666 }
667
668 // Find an unneeded register
669 for(hr=HOST_REGS-1;hr>=0;hr--)
670 {
671 r=cur->regmap[hr];
672 if(r>=0) {
673 if(r<64) {
674 if((cur->u>>r)&1) {
675 if(i==0||((unneeded_reg[i-1]>>r)&1)) {
676 cur->regmap[hr]=reg;
677 cur->dirty&=~(1<<hr);
678 cur->isconst&=~(1<<hr);
679 return;
680 }
681 }
682 }
683 else
684 {
685 if((cur->uu>>(r&63))&1) {
686 if(i==0||((unneeded_reg_upper[i-1]>>(r&63))&1)) {
687 cur->regmap[hr]=reg;
688 cur->dirty&=~(1<<hr);
689 cur->isconst&=~(1<<hr);
690 return;
691 }
692 }
693 }
694 }
695 }
696
697 // Ok, now we have to evict someone
698 // Pick a register we hopefully won't need soon
699 // TODO: we might want to follow unconditional jumps here
700 // TODO: get rid of dupe code and make this into a function
701 u_char hsn[MAXREG+1];
702 memset(hsn,10,sizeof(hsn));
703 int j;
704 lsn(hsn,i,&preferred_reg);
705 //printf("hsn: %d %d %d %d %d %d %d\n",hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
706 if(i>0) {
707 // Don't evict the cycle count at entry points, otherwise the entry
708 // stub will have to write it.
709 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
710 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
711 for(j=10;j>=3;j--)
712 {
713 for(r=1;r<=MAXREG;r++)
714 {
715 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
716 for(hr=0;hr<HOST_REGS;hr++) {
717 if(hr!=HOST_CCREG||hsn[CCREG]>2) {
718 if(cur->regmap[hr]==r+64) {
719 cur->regmap[hr]=reg;
720 cur->dirty&=~(1<<hr);
721 cur->isconst&=~(1<<hr);
722 return;
723 }
724 }
725 }
726 for(hr=0;hr<HOST_REGS;hr++) {
727 if(hr!=HOST_CCREG||hsn[CCREG]>2) {
728 if(cur->regmap[hr]==r) {
729 cur->regmap[hr]=reg;
730 cur->dirty&=~(1<<hr);
731 cur->isconst&=~(1<<hr);
732 return;
733 }
734 }
735 }
736 }
737 }
738 }
739 }
740 for(j=10;j>=0;j--)
741 {
742 for(r=1;r<=MAXREG;r++)
743 {
744 if(hsn[r]==j) {
745 for(hr=0;hr<HOST_REGS;hr++) {
746 if(cur->regmap[hr]==r+64) {
747 cur->regmap[hr]=reg;
748 cur->dirty&=~(1<<hr);
749 cur->isconst&=~(1<<hr);
750 return;
751 }
752 }
753 for(hr=0;hr<HOST_REGS;hr++) {
754 if(cur->regmap[hr]==r) {
755 cur->regmap[hr]=reg;
756 cur->dirty&=~(1<<hr);
757 cur->isconst&=~(1<<hr);
758 return;
759 }
760 }
761 }
762 }
763 }
764 printf("This shouldn't happen");exit(1);
765}
766// Allocate a specific ARM register.
767void alloc_arm_reg(struct regstat *cur,int i,signed char reg,char hr)
768{
769 int n;
f776eb14 770 int dirty=0;
57871462 771
772 // see if it's already allocated (and dealloc it)
773 for(n=0;n<HOST_REGS;n++)
774 {
f776eb14 775 if(n!=EXCLUDE_REG&&cur->regmap[n]==reg) {
776 dirty=(cur->dirty>>n)&1;
777 cur->regmap[n]=-1;
778 }
57871462 779 }
780
781 cur->regmap[hr]=reg;
782 cur->dirty&=~(1<<hr);
f776eb14 783 cur->dirty|=dirty<<hr;
57871462 784 cur->isconst&=~(1<<hr);
785}
786
787// Alloc cycle count into dedicated register
788alloc_cc(struct regstat *cur,int i)
789{
790 alloc_arm_reg(cur,i,CCREG,HOST_CCREG);
791}
792
793/* Special alloc */
794
795
796/* Assembler */
797
798char regname[16][4] = {
799 "r0",
800 "r1",
801 "r2",
802 "r3",
803 "r4",
804 "r5",
805 "r6",
806 "r7",
807 "r8",
808 "r9",
809 "r10",
810 "fp",
811 "r12",
812 "sp",
813 "lr",
814 "pc"};
815
816void output_byte(u_char byte)
817{
818 *(out++)=byte;
819}
820void output_modrm(u_char mod,u_char rm,u_char ext)
821{
822 assert(mod<4);
823 assert(rm<8);
824 assert(ext<8);
825 u_char byte=(mod<<6)|(ext<<3)|rm;
826 *(out++)=byte;
827}
828void output_sib(u_char scale,u_char index,u_char base)
829{
830 assert(scale<4);
831 assert(index<8);
832 assert(base<8);
833 u_char byte=(scale<<6)|(index<<3)|base;
834 *(out++)=byte;
835}
836void output_w32(u_int word)
837{
838 *((u_int *)out)=word;
839 out+=4;
840}
841u_int rd_rn_rm(u_int rd, u_int rn, u_int rm)
842{
843 assert(rd<16);
844 assert(rn<16);
845 assert(rm<16);
846 return((rn<<16)|(rd<<12)|rm);
847}
848u_int rd_rn_imm_shift(u_int rd, u_int rn, u_int imm, u_int shift)
849{
850 assert(rd<16);
851 assert(rn<16);
852 assert(imm<256);
853 assert((shift&1)==0);
854 return((rn<<16)|(rd<<12)|(((32-shift)&30)<<7)|imm);
855}
856u_int genimm(u_int imm,u_int *encoded)
857{
c2e3bd42 858 *encoded=0;
859 if(imm==0) return 1;
57871462 860 int i=32;
861 while(i>0)
862 {
863 if(imm<256) {
864 *encoded=((i&30)<<7)|imm;
865 return 1;
866 }
867 imm=(imm>>2)|(imm<<30);i-=2;
868 }
869 return 0;
870}
cfbd3c6e 871void genimm_checked(u_int imm,u_int *encoded)
872{
873 u_int ret=genimm(imm,encoded);
874 assert(ret);
875}
57871462 876u_int genjmp(u_int addr)
877{
878 int offset=addr-(int)out-8;
e80343e2 879 if(offset<-33554432||offset>=33554432) {
880 if (addr>2) {
881 printf("genjmp: out of range: %08x\n", offset);
882 exit(1);
883 }
884 return 0;
885 }
57871462 886 return ((u_int)offset>>2)&0xffffff;
887}
888
889void emit_mov(int rs,int rt)
890{
891 assem_debug("mov %s,%s\n",regname[rt],regname[rs]);
892 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs));
893}
894
895void emit_movs(int rs,int rt)
896{
897 assem_debug("movs %s,%s\n",regname[rt],regname[rs]);
898 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs));
899}
900
901void emit_add(int rs1,int rs2,int rt)
902{
903 assem_debug("add %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
904 output_w32(0xe0800000|rd_rn_rm(rt,rs1,rs2));
905}
906
907void emit_adds(int rs1,int rs2,int rt)
908{
909 assem_debug("adds %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
910 output_w32(0xe0900000|rd_rn_rm(rt,rs1,rs2));
911}
912
913void emit_adcs(int rs1,int rs2,int rt)
914{
915 assem_debug("adcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
916 output_w32(0xe0b00000|rd_rn_rm(rt,rs1,rs2));
917}
918
919void emit_sbc(int rs1,int rs2,int rt)
920{
921 assem_debug("sbc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
922 output_w32(0xe0c00000|rd_rn_rm(rt,rs1,rs2));
923}
924
925void emit_sbcs(int rs1,int rs2,int rt)
926{
927 assem_debug("sbcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
928 output_w32(0xe0d00000|rd_rn_rm(rt,rs1,rs2));
929}
930
931void emit_neg(int rs, int rt)
932{
933 assem_debug("rsb %s,%s,#0\n",regname[rt],regname[rs]);
934 output_w32(0xe2600000|rd_rn_rm(rt,rs,0));
935}
936
937void emit_negs(int rs, int rt)
938{
939 assem_debug("rsbs %s,%s,#0\n",regname[rt],regname[rs]);
940 output_w32(0xe2700000|rd_rn_rm(rt,rs,0));
941}
942
943void emit_sub(int rs1,int rs2,int rt)
944{
945 assem_debug("sub %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
946 output_w32(0xe0400000|rd_rn_rm(rt,rs1,rs2));
947}
948
949void emit_subs(int rs1,int rs2,int rt)
950{
951 assem_debug("subs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
952 output_w32(0xe0500000|rd_rn_rm(rt,rs1,rs2));
953}
954
955void emit_zeroreg(int rt)
956{
957 assem_debug("mov %s,#0\n",regname[rt]);
958 output_w32(0xe3a00000|rd_rn_rm(rt,0,0));
959}
960
790ee18e 961void emit_loadlp(u_int imm,u_int rt)
962{
963 add_literal((int)out,imm);
964 assem_debug("ldr %s,pc+? [=%x]\n",regname[rt],imm);
965 output_w32(0xe5900000|rd_rn_rm(rt,15,0));
966}
967void emit_movw(u_int imm,u_int rt)
968{
969 assert(imm<65536);
970 assem_debug("movw %s,#%d (0x%x)\n",regname[rt],imm,imm);
971 output_w32(0xe3000000|rd_rn_rm(rt,0,0)|(imm&0xfff)|((imm<<4)&0xf0000));
972}
973void emit_movt(u_int imm,u_int rt)
974{
975 assem_debug("movt %s,#%d (0x%x)\n",regname[rt],imm&0xffff0000,imm&0xffff0000);
976 output_w32(0xe3400000|rd_rn_rm(rt,0,0)|((imm>>16)&0xfff)|((imm>>12)&0xf0000));
977}
978void emit_movimm(u_int imm,u_int rt)
979{
980 u_int armval;
981 if(genimm(imm,&armval)) {
982 assem_debug("mov %s,#%d\n",regname[rt],imm);
983 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
984 }else if(genimm(~imm,&armval)) {
985 assem_debug("mvn %s,#%d\n",regname[rt],imm);
986 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
987 }else if(imm<65536) {
988 #ifdef ARMv5_ONLY
989 assem_debug("mov %s,#%d\n",regname[rt],imm&0xFF00);
990 output_w32(0xe3a00000|rd_rn_imm_shift(rt,0,imm>>8,8));
991 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
992 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
993 #else
994 emit_movw(imm,rt);
995 #endif
996 }else{
997 #ifdef ARMv5_ONLY
998 emit_loadlp(imm,rt);
999 #else
1000 emit_movw(imm&0x0000FFFF,rt);
1001 emit_movt(imm&0xFFFF0000,rt);
1002 #endif
1003 }
1004}
1005void emit_pcreladdr(u_int rt)
1006{
1007 assem_debug("add %s,pc,#?\n",regname[rt]);
1008 output_w32(0xe2800000|rd_rn_rm(rt,15,0));
1009}
1010
57871462 1011void emit_loadreg(int r, int hr)
1012{
3d624f89 1013#ifdef FORCE32
1014 if(r&64) {
1015 printf("64bit load in 32bit mode!\n");
7f2607ea 1016 assert(0);
1017 return;
3d624f89 1018 }
1019#endif
57871462 1020 if((r&63)==0)
1021 emit_zeroreg(hr);
1022 else {
3d624f89 1023 int addr=((int)reg)+((r&63)<<REG_SHIFT)+((r&64)>>4);
57871462 1024 if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4);
1025 if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4);
1026 if(r==CCREG) addr=(int)&cycle_count;
1027 if(r==CSREG) addr=(int)&Status;
1028 if(r==FSREG) addr=(int)&FCR31;
1029 if(r==INVCP) addr=(int)&invc_ptr;
1030 u_int offset = addr-(u_int)&dynarec_local;
1031 assert(offset<4096);
1032 assem_debug("ldr %s,fp+%d\n",regname[hr],offset);
1033 output_w32(0xe5900000|rd_rn_rm(hr,FP,0)|offset);
1034 }
1035}
1036void emit_storereg(int r, int hr)
1037{
3d624f89 1038#ifdef FORCE32
1039 if(r&64) {
1040 printf("64bit store in 32bit mode!\n");
7f2607ea 1041 assert(0);
1042 return;
3d624f89 1043 }
1044#endif
1045 int addr=((int)reg)+((r&63)<<REG_SHIFT)+((r&64)>>4);
57871462 1046 if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4);
1047 if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4);
1048 if(r==CCREG) addr=(int)&cycle_count;
1049 if(r==FSREG) addr=(int)&FCR31;
1050 u_int offset = addr-(u_int)&dynarec_local;
1051 assert(offset<4096);
1052 assem_debug("str %s,fp+%d\n",regname[hr],offset);
1053 output_w32(0xe5800000|rd_rn_rm(hr,FP,0)|offset);
1054}
1055
1056void emit_test(int rs, int rt)
1057{
1058 assem_debug("tst %s,%s\n",regname[rs],regname[rt]);
1059 output_w32(0xe1100000|rd_rn_rm(0,rs,rt));
1060}
1061
1062void emit_testimm(int rs,int imm)
1063{
1064 u_int armval;
5a05d80c 1065 assem_debug("tst %s,#%d\n",regname[rs],imm);
cfbd3c6e 1066 genimm_checked(imm,&armval);
57871462 1067 output_w32(0xe3100000|rd_rn_rm(0,rs,0)|armval);
1068}
1069
b9b61529 1070void emit_testeqimm(int rs,int imm)
1071{
1072 u_int armval;
1073 assem_debug("tsteq %s,$%d\n",regname[rs],imm);
cfbd3c6e 1074 genimm_checked(imm,&armval);
b9b61529 1075 output_w32(0x03100000|rd_rn_rm(0,rs,0)|armval);
1076}
1077
57871462 1078void emit_not(int rs,int rt)
1079{
1080 assem_debug("mvn %s,%s\n",regname[rt],regname[rs]);
1081 output_w32(0xe1e00000|rd_rn_rm(rt,0,rs));
1082}
1083
b9b61529 1084void emit_mvnmi(int rs,int rt)
1085{
1086 assem_debug("mvnmi %s,%s\n",regname[rt],regname[rs]);
1087 output_w32(0x41e00000|rd_rn_rm(rt,0,rs));
1088}
1089
57871462 1090void emit_and(u_int rs1,u_int rs2,u_int rt)
1091{
1092 assem_debug("and %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1093 output_w32(0xe0000000|rd_rn_rm(rt,rs1,rs2));
1094}
1095
1096void emit_or(u_int rs1,u_int rs2,u_int rt)
1097{
1098 assem_debug("orr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1099 output_w32(0xe1800000|rd_rn_rm(rt,rs1,rs2));
1100}
1101void emit_or_and_set_flags(int rs1,int rs2,int rt)
1102{
1103 assem_debug("orrs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1104 output_w32(0xe1900000|rd_rn_rm(rt,rs1,rs2));
1105}
1106
f70d384d 1107void emit_orrshl_imm(u_int rs,u_int imm,u_int rt)
1108{
1109 assert(rs<16);
1110 assert(rt<16);
1111 assert(imm<32);
1112 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs],imm);
1113 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|(imm<<7));
1114}
1115
576bbd8f 1116void emit_orrshr_imm(u_int rs,u_int imm,u_int rt)
1117{
1118 assert(rs<16);
1119 assert(rt<16);
1120 assert(imm<32);
1121 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs],imm);
1122 output_w32(0xe1800020|rd_rn_rm(rt,rt,rs)|(imm<<7));
1123}
1124
57871462 1125void emit_xor(u_int rs1,u_int rs2,u_int rt)
1126{
1127 assem_debug("eor %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1128 output_w32(0xe0200000|rd_rn_rm(rt,rs1,rs2));
1129}
1130
57871462 1131void emit_addimm(u_int rs,int imm,u_int rt)
1132{
1133 assert(rs<16);
1134 assert(rt<16);
1135 if(imm!=0) {
1136 assert(imm>-65536&&imm<65536);
1137 u_int armval;
1138 if(genimm(imm,&armval)) {
1139 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],imm);
1140 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
1141 }else if(genimm(-imm,&armval)) {
1142 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],imm);
1143 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
1144 }else if(imm<0) {
1145 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],(-imm)&0xFF00);
1146 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
1147 output_w32(0xe2400000|rd_rn_imm_shift(rt,rs,(-imm)>>8,8));
1148 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
1149 }else{
1150 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1151 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
1152 output_w32(0xe2800000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1153 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1154 }
1155 }
1156 else if(rs!=rt) emit_mov(rs,rt);
1157}
1158
1159void emit_addimm_and_set_flags(int imm,int rt)
1160{
1161 assert(imm>-65536&&imm<65536);
1162 u_int armval;
1163 if(genimm(imm,&armval)) {
1164 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm);
1165 output_w32(0xe2900000|rd_rn_rm(rt,rt,0)|armval);
1166 }else if(genimm(-imm,&armval)) {
1167 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],imm);
1168 output_w32(0xe2500000|rd_rn_rm(rt,rt,0)|armval);
1169 }else if(imm<0) {
1170 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF00);
1171 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
1172 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)>>8,8));
1173 output_w32(0xe2500000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
1174 }else{
1175 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF00);
1176 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
1177 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm>>8,8));
1178 output_w32(0xe2900000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1179 }
1180}
1181void emit_addimm_no_flags(u_int imm,u_int rt)
1182{
1183 emit_addimm(rt,imm,rt);
1184}
1185
1186void emit_addnop(u_int r)
1187{
1188 assert(r<16);
1189 assem_debug("add %s,%s,#0 (nop)\n",regname[r],regname[r]);
1190 output_w32(0xe2800000|rd_rn_rm(r,r,0));
1191}
1192
1193void emit_adcimm(u_int rs,int imm,u_int rt)
1194{
1195 u_int armval;
cfbd3c6e 1196 genimm_checked(imm,&armval);
57871462 1197 assem_debug("adc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1198 output_w32(0xe2a00000|rd_rn_rm(rt,rs,0)|armval);
1199}
1200/*void emit_sbcimm(int imm,u_int rt)
1201{
1202 u_int armval;
cfbd3c6e 1203 genimm_checked(imm,&armval);
57871462 1204 assem_debug("sbc %s,%s,#%d\n",regname[rt],regname[rt],imm);
1205 output_w32(0xe2c00000|rd_rn_rm(rt,rt,0)|armval);
1206}*/
1207void emit_sbbimm(int imm,u_int rt)
1208{
1209 assem_debug("sbb $%d,%%%s\n",imm,regname[rt]);
1210 assert(rt<8);
1211 if(imm<128&&imm>=-128) {
1212 output_byte(0x83);
1213 output_modrm(3,rt,3);
1214 output_byte(imm);
1215 }
1216 else
1217 {
1218 output_byte(0x81);
1219 output_modrm(3,rt,3);
1220 output_w32(imm);
1221 }
1222}
1223void emit_rscimm(int rs,int imm,u_int rt)
1224{
1225 assert(0);
1226 u_int armval;
cfbd3c6e 1227 genimm_checked(imm,&armval);
57871462 1228 assem_debug("rsc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1229 output_w32(0xe2e00000|rd_rn_rm(rt,rs,0)|armval);
1230}
1231
1232void emit_addimm64_32(int rsh,int rsl,int imm,int rth,int rtl)
1233{
1234 // TODO: if(genimm(imm,&armval)) ...
1235 // else
1236 emit_movimm(imm,HOST_TEMPREG);
1237 emit_adds(HOST_TEMPREG,rsl,rtl);
1238 emit_adcimm(rsh,0,rth);
1239}
1240
1241void emit_sbb(int rs1,int rs2)
1242{
1243 assem_debug("sbb %%%s,%%%s\n",regname[rs2],regname[rs1]);
1244 output_byte(0x19);
1245 output_modrm(3,rs1,rs2);
1246}
1247
1248void emit_andimm(int rs,int imm,int rt)
1249{
1250 u_int armval;
790ee18e 1251 if(imm==0) {
1252 emit_zeroreg(rt);
1253 }else if(genimm(imm,&armval)) {
57871462 1254 assem_debug("and %s,%s,#%d\n",regname[rt],regname[rs],imm);
1255 output_w32(0xe2000000|rd_rn_rm(rt,rs,0)|armval);
1256 }else if(genimm(~imm,&armval)) {
1257 assem_debug("bic %s,%s,#%d\n",regname[rt],regname[rs],imm);
1258 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|armval);
1259 }else if(imm==65535) {
1260 #ifdef ARMv5_ONLY
1261 assem_debug("bic %s,%s,#FF000000\n",regname[rt],regname[rs]);
1262 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|0x4FF);
1263 assem_debug("bic %s,%s,#00FF0000\n",regname[rt],regname[rt]);
1264 output_w32(0xe3c00000|rd_rn_rm(rt,rt,0)|0x8FF);
1265 #else
1266 assem_debug("uxth %s,%s\n",regname[rt],regname[rs]);
1267 output_w32(0xe6ff0070|rd_rn_rm(rt,0,rs));
1268 #endif
1269 }else{
1270 assert(imm>0&&imm<65535);
1271 #ifdef ARMv5_ONLY
1272 assem_debug("mov r14,#%d\n",imm&0xFF00);
1273 output_w32(0xe3a00000|rd_rn_imm_shift(HOST_TEMPREG,0,imm>>8,8));
1274 assem_debug("add r14,r14,#%d\n",imm&0xFF);
1275 output_w32(0xe2800000|rd_rn_imm_shift(HOST_TEMPREG,HOST_TEMPREG,imm&0xff,0));
1276 #else
1277 emit_movw(imm,HOST_TEMPREG);
1278 #endif
1279 assem_debug("and %s,%s,r14\n",regname[rt],regname[rs]);
1280 output_w32(0xe0000000|rd_rn_rm(rt,rs,HOST_TEMPREG));
1281 }
1282}
1283
1284void emit_orimm(int rs,int imm,int rt)
1285{
1286 u_int armval;
790ee18e 1287 if(imm==0) {
1288 if(rs!=rt) emit_mov(rs,rt);
1289 }else if(genimm(imm,&armval)) {
57871462 1290 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1291 output_w32(0xe3800000|rd_rn_rm(rt,rs,0)|armval);
1292 }else{
1293 assert(imm>0&&imm<65536);
1294 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1295 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
1296 output_w32(0xe3800000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1297 output_w32(0xe3800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1298 }
1299}
1300
1301void emit_xorimm(int rs,int imm,int rt)
1302{
57871462 1303 u_int armval;
790ee18e 1304 if(imm==0) {
1305 if(rs!=rt) emit_mov(rs,rt);
1306 }else if(genimm(imm,&armval)) {
57871462 1307 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm);
1308 output_w32(0xe2200000|rd_rn_rm(rt,rs,0)|armval);
1309 }else{
514ed0d9 1310 assert(imm>0&&imm<65536);
57871462 1311 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1312 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
1313 output_w32(0xe2200000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1314 output_w32(0xe2200000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1315 }
1316}
1317
1318void emit_shlimm(int rs,u_int imm,int rt)
1319{
1320 assert(imm>0);
1321 assert(imm<32);
1322 //if(imm==1) ...
1323 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1324 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1325}
1326
1327void emit_shrimm(int rs,u_int imm,int rt)
1328{
1329 assert(imm>0);
1330 assert(imm<32);
1331 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1332 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
1333}
1334
1335void emit_sarimm(int rs,u_int imm,int rt)
1336{
1337 assert(imm>0);
1338 assert(imm<32);
1339 assem_debug("asr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1340 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x40|(imm<<7));
1341}
1342
1343void emit_rorimm(int rs,u_int imm,int rt)
1344{
1345 assert(imm>0);
1346 assert(imm<32);
1347 assem_debug("ror %s,%s,#%d\n",regname[rt],regname[rs],imm);
1348 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x60|(imm<<7));
1349}
1350
1351void emit_shldimm(int rs,int rs2,u_int imm,int rt)
1352{
1353 assem_debug("shld %%%s,%%%s,%d\n",regname[rt],regname[rs2],imm);
1354 assert(imm>0);
1355 assert(imm<32);
1356 //if(imm==1) ...
1357 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1358 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1359 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs2],32-imm);
1360 output_w32(0xe1800020|rd_rn_rm(rt,rt,rs2)|((32-imm)<<7));
1361}
1362
1363void emit_shrdimm(int rs,int rs2,u_int imm,int rt)
1364{
1365 assem_debug("shrd %%%s,%%%s,%d\n",regname[rt],regname[rs2],imm);
1366 assert(imm>0);
1367 assert(imm<32);
1368 //if(imm==1) ...
1369 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1370 output_w32(0xe1a00020|rd_rn_rm(rt,0,rs)|(imm<<7));
1371 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs2],32-imm);
1372 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs2)|((32-imm)<<7));
1373}
1374
b9b61529 1375void emit_signextend16(int rs,int rt)
1376{
1377 #ifdef ARMv5_ONLY
1378 emit_shlimm(rs,16,rt);
1379 emit_sarimm(rt,16,rt);
1380 #else
1381 assem_debug("sxth %s,%s\n",regname[rt],regname[rs]);
1382 output_w32(0xe6bf0070|rd_rn_rm(rt,0,rs));
1383 #endif
1384}
1385
57871462 1386void emit_shl(u_int rs,u_int shift,u_int rt)
1387{
1388 assert(rs<16);
1389 assert(rt<16);
1390 assert(shift<16);
1391 //if(imm==1) ...
1392 assem_debug("lsl %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1393 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x10|(shift<<8));
1394}
1395void emit_shr(u_int rs,u_int shift,u_int rt)
1396{
1397 assert(rs<16);
1398 assert(rt<16);
1399 assert(shift<16);
1400 assem_debug("lsr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1401 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x30|(shift<<8));
1402}
1403void emit_sar(u_int rs,u_int shift,u_int rt)
1404{
1405 assert(rs<16);
1406 assert(rt<16);
1407 assert(shift<16);
1408 assem_debug("asr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1409 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x50|(shift<<8));
1410}
1411void emit_shlcl(int r)
1412{
1413 assem_debug("shl %%%s,%%cl\n",regname[r]);
1414 assert(0);
1415}
1416void emit_shrcl(int r)
1417{
1418 assem_debug("shr %%%s,%%cl\n",regname[r]);
1419 assert(0);
1420}
1421void emit_sarcl(int r)
1422{
1423 assem_debug("sar %%%s,%%cl\n",regname[r]);
1424 assert(0);
1425}
1426
1427void emit_shldcl(int r1,int r2)
1428{
1429 assem_debug("shld %%%s,%%%s,%%cl\n",regname[r1],regname[r2]);
1430 assert(0);
1431}
1432void emit_shrdcl(int r1,int r2)
1433{
1434 assem_debug("shrd %%%s,%%%s,%%cl\n",regname[r1],regname[r2]);
1435 assert(0);
1436}
1437void emit_orrshl(u_int rs,u_int shift,u_int rt)
1438{
1439 assert(rs<16);
1440 assert(rt<16);
1441 assert(shift<16);
1442 assem_debug("orr %s,%s,%s,lsl %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
1443 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x10|(shift<<8));
1444}
1445void emit_orrshr(u_int rs,u_int shift,u_int rt)
1446{
1447 assert(rs<16);
1448 assert(rt<16);
1449 assert(shift<16);
1450 assem_debug("orr %s,%s,%s,lsr %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
1451 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x30|(shift<<8));
1452}
1453
1454void emit_cmpimm(int rs,int imm)
1455{
1456 u_int armval;
1457 if(genimm(imm,&armval)) {
5a05d80c 1458 assem_debug("cmp %s,#%d\n",regname[rs],imm);
57871462 1459 output_w32(0xe3500000|rd_rn_rm(0,rs,0)|armval);
1460 }else if(genimm(-imm,&armval)) {
5a05d80c 1461 assem_debug("cmn %s,#%d\n",regname[rs],imm);
57871462 1462 output_w32(0xe3700000|rd_rn_rm(0,rs,0)|armval);
1463 }else if(imm>0) {
1464 assert(imm<65536);
1465 #ifdef ARMv5_ONLY
1466 emit_movimm(imm,HOST_TEMPREG);
1467 #else
1468 emit_movw(imm,HOST_TEMPREG);
1469 #endif
1470 assem_debug("cmp %s,r14\n",regname[rs]);
1471 output_w32(0xe1500000|rd_rn_rm(0,rs,HOST_TEMPREG));
1472 }else{
1473 assert(imm>-65536);
1474 #ifdef ARMv5_ONLY
1475 emit_movimm(-imm,HOST_TEMPREG);
1476 #else
1477 emit_movw(-imm,HOST_TEMPREG);
1478 #endif
1479 assem_debug("cmn %s,r14\n",regname[rs]);
1480 output_w32(0xe1700000|rd_rn_rm(0,rs,HOST_TEMPREG));
1481 }
1482}
1483
1484void emit_cmovne(u_int *addr,int rt)
1485{
1486 assem_debug("cmovne %x,%%%s",(int)addr,regname[rt]);
1487 assert(0);
1488}
1489void emit_cmovl(u_int *addr,int rt)
1490{
1491 assem_debug("cmovl %x,%%%s",(int)addr,regname[rt]);
1492 assert(0);
1493}
1494void emit_cmovs(u_int *addr,int rt)
1495{
1496 assem_debug("cmovs %x,%%%s",(int)addr,regname[rt]);
1497 assert(0);
1498}
1499void emit_cmovne_imm(int imm,int rt)
1500{
1501 assem_debug("movne %s,#%d\n",regname[rt],imm);
1502 u_int armval;
cfbd3c6e 1503 genimm_checked(imm,&armval);
57871462 1504 output_w32(0x13a00000|rd_rn_rm(rt,0,0)|armval);
1505}
1506void emit_cmovl_imm(int imm,int rt)
1507{
1508 assem_debug("movlt %s,#%d\n",regname[rt],imm);
1509 u_int armval;
cfbd3c6e 1510 genimm_checked(imm,&armval);
57871462 1511 output_w32(0xb3a00000|rd_rn_rm(rt,0,0)|armval);
1512}
1513void emit_cmovb_imm(int imm,int rt)
1514{
1515 assem_debug("movcc %s,#%d\n",regname[rt],imm);
1516 u_int armval;
cfbd3c6e 1517 genimm_checked(imm,&armval);
57871462 1518 output_w32(0x33a00000|rd_rn_rm(rt,0,0)|armval);
1519}
1520void emit_cmovs_imm(int imm,int rt)
1521{
1522 assem_debug("movmi %s,#%d\n",regname[rt],imm);
1523 u_int armval;
cfbd3c6e 1524 genimm_checked(imm,&armval);
57871462 1525 output_w32(0x43a00000|rd_rn_rm(rt,0,0)|armval);
1526}
1527void emit_cmove_reg(int rs,int rt)
1528{
1529 assem_debug("moveq %s,%s\n",regname[rt],regname[rs]);
1530 output_w32(0x01a00000|rd_rn_rm(rt,0,rs));
1531}
1532void emit_cmovne_reg(int rs,int rt)
1533{
1534 assem_debug("movne %s,%s\n",regname[rt],regname[rs]);
1535 output_w32(0x11a00000|rd_rn_rm(rt,0,rs));
1536}
1537void emit_cmovl_reg(int rs,int rt)
1538{
1539 assem_debug("movlt %s,%s\n",regname[rt],regname[rs]);
1540 output_w32(0xb1a00000|rd_rn_rm(rt,0,rs));
1541}
1542void emit_cmovs_reg(int rs,int rt)
1543{
1544 assem_debug("movmi %s,%s\n",regname[rt],regname[rs]);
1545 output_w32(0x41a00000|rd_rn_rm(rt,0,rs));
1546}
1547
1548void emit_slti32(int rs,int imm,int rt)
1549{
1550 if(rs!=rt) emit_zeroreg(rt);
1551 emit_cmpimm(rs,imm);
1552 if(rs==rt) emit_movimm(0,rt);
1553 emit_cmovl_imm(1,rt);
1554}
1555void emit_sltiu32(int rs,int imm,int rt)
1556{
1557 if(rs!=rt) emit_zeroreg(rt);
1558 emit_cmpimm(rs,imm);
1559 if(rs==rt) emit_movimm(0,rt);
1560 emit_cmovb_imm(1,rt);
1561}
1562void emit_slti64_32(int rsh,int rsl,int imm,int rt)
1563{
1564 assert(rsh!=rt);
1565 emit_slti32(rsl,imm,rt);
1566 if(imm>=0)
1567 {
1568 emit_test(rsh,rsh);
1569 emit_cmovne_imm(0,rt);
1570 emit_cmovs_imm(1,rt);
1571 }
1572 else
1573 {
1574 emit_cmpimm(rsh,-1);
1575 emit_cmovne_imm(0,rt);
1576 emit_cmovl_imm(1,rt);
1577 }
1578}
1579void emit_sltiu64_32(int rsh,int rsl,int imm,int rt)
1580{
1581 assert(rsh!=rt);
1582 emit_sltiu32(rsl,imm,rt);
1583 if(imm>=0)
1584 {
1585 emit_test(rsh,rsh);
1586 emit_cmovne_imm(0,rt);
1587 }
1588 else
1589 {
1590 emit_cmpimm(rsh,-1);
1591 emit_cmovne_imm(1,rt);
1592 }
1593}
1594
1595void emit_cmp(int rs,int rt)
1596{
1597 assem_debug("cmp %s,%s\n",regname[rs],regname[rt]);
1598 output_w32(0xe1500000|rd_rn_rm(0,rs,rt));
1599}
1600void emit_set_gz32(int rs, int rt)
1601{
1602 //assem_debug("set_gz32\n");
1603 emit_cmpimm(rs,1);
1604 emit_movimm(1,rt);
1605 emit_cmovl_imm(0,rt);
1606}
1607void emit_set_nz32(int rs, int rt)
1608{
1609 //assem_debug("set_nz32\n");
1610 if(rs!=rt) emit_movs(rs,rt);
1611 else emit_test(rs,rs);
1612 emit_cmovne_imm(1,rt);
1613}
1614void emit_set_gz64_32(int rsh, int rsl, int rt)
1615{
1616 //assem_debug("set_gz64\n");
1617 emit_set_gz32(rsl,rt);
1618 emit_test(rsh,rsh);
1619 emit_cmovne_imm(1,rt);
1620 emit_cmovs_imm(0,rt);
1621}
1622void emit_set_nz64_32(int rsh, int rsl, int rt)
1623{
1624 //assem_debug("set_nz64\n");
1625 emit_or_and_set_flags(rsh,rsl,rt);
1626 emit_cmovne_imm(1,rt);
1627}
1628void emit_set_if_less32(int rs1, int rs2, int rt)
1629{
1630 //assem_debug("set if less (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1631 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1632 emit_cmp(rs1,rs2);
1633 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1634 emit_cmovl_imm(1,rt);
1635}
1636void emit_set_if_carry32(int rs1, int rs2, int rt)
1637{
1638 //assem_debug("set if carry (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1639 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1640 emit_cmp(rs1,rs2);
1641 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1642 emit_cmovb_imm(1,rt);
1643}
1644void emit_set_if_less64_32(int u1, int l1, int u2, int l2, int rt)
1645{
1646 //assem_debug("set if less64 (%%%s,%%%s,%%%s,%%%s),%%%s\n",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]);
1647 assert(u1!=rt);
1648 assert(u2!=rt);
1649 emit_cmp(l1,l2);
1650 emit_movimm(0,rt);
1651 emit_sbcs(u1,u2,HOST_TEMPREG);
1652 emit_cmovl_imm(1,rt);
1653}
1654void emit_set_if_carry64_32(int u1, int l1, int u2, int l2, int rt)
1655{
1656 //assem_debug("set if carry64 (%%%s,%%%s,%%%s,%%%s),%%%s\n",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]);
1657 assert(u1!=rt);
1658 assert(u2!=rt);
1659 emit_cmp(l1,l2);
1660 emit_movimm(0,rt);
1661 emit_sbcs(u1,u2,HOST_TEMPREG);
1662 emit_cmovb_imm(1,rt);
1663}
1664
1665void emit_call(int a)
1666{
1667 assem_debug("bl %x (%x+%x)\n",a,(int)out,a-(int)out-8);
1668 u_int offset=genjmp(a);
1669 output_w32(0xeb000000|offset);
1670}
1671void emit_jmp(int a)
1672{
1673 assem_debug("b %x (%x+%x)\n",a,(int)out,a-(int)out-8);
1674 u_int offset=genjmp(a);
1675 output_w32(0xea000000|offset);
1676}
1677void emit_jne(int a)
1678{
1679 assem_debug("bne %x\n",a);
1680 u_int offset=genjmp(a);
1681 output_w32(0x1a000000|offset);
1682}
1683void emit_jeq(int a)
1684{
1685 assem_debug("beq %x\n",a);
1686 u_int offset=genjmp(a);
1687 output_w32(0x0a000000|offset);
1688}
1689void emit_js(int a)
1690{
1691 assem_debug("bmi %x\n",a);
1692 u_int offset=genjmp(a);
1693 output_w32(0x4a000000|offset);
1694}
1695void emit_jns(int a)
1696{
1697 assem_debug("bpl %x\n",a);
1698 u_int offset=genjmp(a);
1699 output_w32(0x5a000000|offset);
1700}
1701void emit_jl(int a)
1702{
1703 assem_debug("blt %x\n",a);
1704 u_int offset=genjmp(a);
1705 output_w32(0xba000000|offset);
1706}
1707void emit_jge(int a)
1708{
1709 assem_debug("bge %x\n",a);
1710 u_int offset=genjmp(a);
1711 output_w32(0xaa000000|offset);
1712}
1713void emit_jno(int a)
1714{
1715 assem_debug("bvc %x\n",a);
1716 u_int offset=genjmp(a);
1717 output_w32(0x7a000000|offset);
1718}
1719void emit_jc(int a)
1720{
1721 assem_debug("bcs %x\n",a);
1722 u_int offset=genjmp(a);
1723 output_w32(0x2a000000|offset);
1724}
1725void emit_jcc(int a)
1726{
1727 assem_debug("bcc %x\n",a);
1728 u_int offset=genjmp(a);
1729 output_w32(0x3a000000|offset);
1730}
1731
1732void emit_pushimm(int imm)
1733{
1734 assem_debug("push $%x\n",imm);
1735 assert(0);
1736}
1737void emit_pusha()
1738{
1739 assem_debug("pusha\n");
1740 assert(0);
1741}
1742void emit_popa()
1743{
1744 assem_debug("popa\n");
1745 assert(0);
1746}
1747void emit_pushreg(u_int r)
1748{
1749 assem_debug("push %%%s\n",regname[r]);
1750 assert(0);
1751}
1752void emit_popreg(u_int r)
1753{
1754 assem_debug("pop %%%s\n",regname[r]);
1755 assert(0);
1756}
1757void emit_callreg(u_int r)
1758{
1759 assem_debug("call *%%%s\n",regname[r]);
1760 assert(0);
1761}
1762void emit_jmpreg(u_int r)
1763{
1764 assem_debug("mov pc,%s\n",regname[r]);
1765 output_w32(0xe1a00000|rd_rn_rm(15,0,r));
1766}
1767
1768void emit_readword_indexed(int offset, int rs, int rt)
1769{
1770 assert(offset>-4096&&offset<4096);
1771 assem_debug("ldr %s,%s+%d\n",regname[rt],regname[rs],offset);
1772 if(offset>=0) {
1773 output_w32(0xe5900000|rd_rn_rm(rt,rs,0)|offset);
1774 }else{
1775 output_w32(0xe5100000|rd_rn_rm(rt,rs,0)|(-offset));
1776 }
1777}
1778void emit_readword_dualindexedx4(int rs1, int rs2, int rt)
1779{
1780 assem_debug("ldr %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1781 output_w32(0xe7900000|rd_rn_rm(rt,rs1,rs2)|0x100);
1782}
1783void emit_readword_indexed_tlb(int addr, int rs, int map, int rt)
1784{
1785 if(map<0) emit_readword_indexed(addr, rs, rt);
1786 else {
1787 assert(addr==0);
1788 emit_readword_dualindexedx4(rs, map, rt);
1789 }
1790}
1791void emit_readdword_indexed_tlb(int addr, int rs, int map, int rh, int rl)
1792{
1793 if(map<0) {
1794 if(rh>=0) emit_readword_indexed(addr, rs, rh);
1795 emit_readword_indexed(addr+4, rs, rl);
1796 }else{
1797 assert(rh!=rs);
1798 if(rh>=0) emit_readword_indexed_tlb(addr, rs, map, rh);
1799 emit_addimm(map,1,map);
1800 emit_readword_indexed_tlb(addr, rs, map, rl);
1801 }
1802}
1803void emit_movsbl_indexed(int offset, int rs, int rt)
1804{
1805 assert(offset>-256&&offset<256);
1806 assem_debug("ldrsb %s,%s+%d\n",regname[rt],regname[rs],offset);
1807 if(offset>=0) {
1808 output_w32(0xe1d000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1809 }else{
1810 output_w32(0xe15000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1811 }
1812}
1813void emit_movsbl_indexed_tlb(int addr, int rs, int map, int rt)
1814{
1815 if(map<0) emit_movsbl_indexed(addr, rs, rt);
1816 else {
1817 if(addr==0) {
1818 emit_shlimm(map,2,map);
1819 assem_debug("ldrsb %s,%s+%s\n",regname[rt],regname[rs],regname[map]);
1820 output_w32(0xe19000d0|rd_rn_rm(rt,rs,map));
1821 }else{
1822 assert(addr>-256&&addr<256);
1823 assem_debug("add %s,%s,%s,lsl #2\n",regname[rt],regname[rs],regname[map]);
1824 output_w32(0xe0800000|rd_rn_rm(rt,rs,map)|(2<<7));
1825 emit_movsbl_indexed(addr, rt, rt);
1826 }
1827 }
1828}
1829void emit_movswl_indexed(int offset, int rs, int rt)
1830{
1831 assert(offset>-256&&offset<256);
1832 assem_debug("ldrsh %s,%s+%d\n",regname[rt],regname[rs],offset);
1833 if(offset>=0) {
1834 output_w32(0xe1d000f0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1835 }else{
1836 output_w32(0xe15000f0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1837 }
1838}
1839void emit_movzbl_indexed(int offset, int rs, int rt)
1840{
1841 assert(offset>-4096&&offset<4096);
1842 assem_debug("ldrb %s,%s+%d\n",regname[rt],regname[rs],offset);
1843 if(offset>=0) {
1844 output_w32(0xe5d00000|rd_rn_rm(rt,rs,0)|offset);
1845 }else{
1846 output_w32(0xe5500000|rd_rn_rm(rt,rs,0)|(-offset));
1847 }
1848}
1849void emit_movzbl_dualindexedx4(int rs1, int rs2, int rt)
1850{
1851 assem_debug("ldrb %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1852 output_w32(0xe7d00000|rd_rn_rm(rt,rs1,rs2)|0x100);
1853}
1854void emit_movzbl_indexed_tlb(int addr, int rs, int map, int rt)
1855{
1856 if(map<0) emit_movzbl_indexed(addr, rs, rt);
1857 else {
1858 if(addr==0) {
1859 emit_movzbl_dualindexedx4(rs, map, rt);
1860 }else{
1861 emit_addimm(rs,addr,rt);
1862 emit_movzbl_dualindexedx4(rt, map, rt);
1863 }
1864 }
1865}
1866void emit_movzwl_indexed(int offset, int rs, int rt)
1867{
1868 assert(offset>-256&&offset<256);
1869 assem_debug("ldrh %s,%s+%d\n",regname[rt],regname[rs],offset);
1870 if(offset>=0) {
1871 output_w32(0xe1d000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1872 }else{
1873 output_w32(0xe15000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1874 }
1875}
1876void emit_readword(int addr, int rt)
1877{
1878 u_int offset = addr-(u_int)&dynarec_local;
1879 assert(offset<4096);
1880 assem_debug("ldr %s,fp+%d\n",regname[rt],offset);
1881 output_w32(0xe5900000|rd_rn_rm(rt,FP,0)|offset);
1882}
1883void emit_movsbl(int addr, int rt)
1884{
1885 u_int offset = addr-(u_int)&dynarec_local;
1886 assert(offset<256);
1887 assem_debug("ldrsb %s,fp+%d\n",regname[rt],offset);
1888 output_w32(0xe1d000d0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1889}
1890void emit_movswl(int addr, int rt)
1891{
1892 u_int offset = addr-(u_int)&dynarec_local;
1893 assert(offset<256);
1894 assem_debug("ldrsh %s,fp+%d\n",regname[rt],offset);
1895 output_w32(0xe1d000f0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1896}
1897void emit_movzbl(int addr, int rt)
1898{
1899 u_int offset = addr-(u_int)&dynarec_local;
1900 assert(offset<4096);
1901 assem_debug("ldrb %s,fp+%d\n",regname[rt],offset);
1902 output_w32(0xe5d00000|rd_rn_rm(rt,FP,0)|offset);
1903}
1904void emit_movzwl(int addr, int rt)
1905{
1906 u_int offset = addr-(u_int)&dynarec_local;
1907 assert(offset<256);
1908 assem_debug("ldrh %s,fp+%d\n",regname[rt],offset);
1909 output_w32(0xe1d000b0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1910}
1911void emit_movzwl_reg(int rs, int rt)
1912{
1913 assem_debug("movzwl %%%s,%%%s\n",regname[rs]+1,regname[rt]);
1914 assert(0);
1915}
1916
1917void emit_xchg(int rs, int rt)
1918{
1919 assem_debug("xchg %%%s,%%%s\n",regname[rs],regname[rt]);
1920 assert(0);
1921}
1922void emit_writeword_indexed(int rt, int offset, int rs)
1923{
1924 assert(offset>-4096&&offset<4096);
1925 assem_debug("str %s,%s+%d\n",regname[rt],regname[rs],offset);
1926 if(offset>=0) {
1927 output_w32(0xe5800000|rd_rn_rm(rt,rs,0)|offset);
1928 }else{
1929 output_w32(0xe5000000|rd_rn_rm(rt,rs,0)|(-offset));
1930 }
1931}
1932void emit_writeword_dualindexedx4(int rt, int rs1, int rs2)
1933{
1934 assem_debug("str %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1935 output_w32(0xe7800000|rd_rn_rm(rt,rs1,rs2)|0x100);
1936}
1937void emit_writeword_indexed_tlb(int rt, int addr, int rs, int map, int temp)
1938{
1939 if(map<0) emit_writeword_indexed(rt, addr, rs);
1940 else {
1941 assert(addr==0);
1942 emit_writeword_dualindexedx4(rt, rs, map);
1943 }
1944}
1945void emit_writedword_indexed_tlb(int rh, int rl, int addr, int rs, int map, int temp)
1946{
1947 if(map<0) {
1948 if(rh>=0) emit_writeword_indexed(rh, addr, rs);
1949 emit_writeword_indexed(rl, addr+4, rs);
1950 }else{
1951 assert(rh>=0);
1952 if(temp!=rs) emit_addimm(map,1,temp);
1953 emit_writeword_indexed_tlb(rh, addr, rs, map, temp);
1954 if(temp!=rs) emit_writeword_indexed_tlb(rl, addr, rs, temp, temp);
1955 else {
1956 emit_addimm(rs,4,rs);
1957 emit_writeword_indexed_tlb(rl, addr, rs, map, temp);
1958 }
1959 }
1960}
1961void emit_writehword_indexed(int rt, int offset, int rs)
1962{
1963 assert(offset>-256&&offset<256);
1964 assem_debug("strh %s,%s+%d\n",regname[rt],regname[rs],offset);
1965 if(offset>=0) {
1966 output_w32(0xe1c000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1967 }else{
1968 output_w32(0xe14000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1969 }
1970}
1971void emit_writebyte_indexed(int rt, int offset, int rs)
1972{
1973 assert(offset>-4096&&offset<4096);
1974 assem_debug("strb %s,%s+%d\n",regname[rt],regname[rs],offset);
1975 if(offset>=0) {
1976 output_w32(0xe5c00000|rd_rn_rm(rt,rs,0)|offset);
1977 }else{
1978 output_w32(0xe5400000|rd_rn_rm(rt,rs,0)|(-offset));
1979 }
1980}
1981void emit_writebyte_dualindexedx4(int rt, int rs1, int rs2)
1982{
1983 assem_debug("strb %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1984 output_w32(0xe7c00000|rd_rn_rm(rt,rs1,rs2)|0x100);
1985}
1986void emit_writebyte_indexed_tlb(int rt, int addr, int rs, int map, int temp)
1987{
1988 if(map<0) emit_writebyte_indexed(rt, addr, rs);
1989 else {
1990 if(addr==0) {
1991 emit_writebyte_dualindexedx4(rt, rs, map);
1992 }else{
1993 emit_addimm(rs,addr,temp);
1994 emit_writebyte_dualindexedx4(rt, temp, map);
1995 }
1996 }
1997}
1998void emit_writeword(int rt, int addr)
1999{
2000 u_int offset = addr-(u_int)&dynarec_local;
2001 assert(offset<4096);
2002 assem_debug("str %s,fp+%d\n",regname[rt],offset);
2003 output_w32(0xe5800000|rd_rn_rm(rt,FP,0)|offset);
2004}
2005void emit_writehword(int rt, int addr)
2006{
2007 u_int offset = addr-(u_int)&dynarec_local;
2008 assert(offset<256);
2009 assem_debug("strh %s,fp+%d\n",regname[rt],offset);
2010 output_w32(0xe1c000b0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
2011}
2012void emit_writebyte(int rt, int addr)
2013{
2014 u_int offset = addr-(u_int)&dynarec_local;
2015 assert(offset<4096);
74426039 2016 assem_debug("strb %s,fp+%d\n",regname[rt],offset);
57871462 2017 output_w32(0xe5c00000|rd_rn_rm(rt,FP,0)|offset);
2018}
2019void emit_writeword_imm(int imm, int addr)
2020{
2021 assem_debug("movl $%x,%x\n",imm,addr);
2022 assert(0);
2023}
2024void emit_writebyte_imm(int imm, int addr)
2025{
2026 assem_debug("movb $%x,%x\n",imm,addr);
2027 assert(0);
2028}
2029
2030void emit_mul(int rs)
2031{
2032 assem_debug("mul %%%s\n",regname[rs]);
2033 assert(0);
2034}
2035void emit_imul(int rs)
2036{
2037 assem_debug("imul %%%s\n",regname[rs]);
2038 assert(0);
2039}
2040void emit_umull(u_int rs1, u_int rs2, u_int hi, u_int lo)
2041{
2042 assem_debug("umull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
2043 assert(rs1<16);
2044 assert(rs2<16);
2045 assert(hi<16);
2046 assert(lo<16);
2047 output_w32(0xe0800090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
2048}
2049void emit_smull(u_int rs1, u_int rs2, u_int hi, u_int lo)
2050{
2051 assem_debug("smull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
2052 assert(rs1<16);
2053 assert(rs2<16);
2054 assert(hi<16);
2055 assert(lo<16);
2056 output_w32(0xe0c00090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
2057}
2058
2059void emit_div(int rs)
2060{
2061 assem_debug("div %%%s\n",regname[rs]);
2062 assert(0);
2063}
2064void emit_idiv(int rs)
2065{
2066 assem_debug("idiv %%%s\n",regname[rs]);
2067 assert(0);
2068}
2069void emit_cdq()
2070{
2071 assem_debug("cdq\n");
2072 assert(0);
2073}
2074
2075void emit_clz(int rs,int rt)
2076{
2077 assem_debug("clz %s,%s\n",regname[rt],regname[rs]);
2078 output_w32(0xe16f0f10|rd_rn_rm(rt,0,rs));
2079}
2080
2081void emit_subcs(int rs1,int rs2,int rt)
2082{
2083 assem_debug("subcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2084 output_w32(0x20400000|rd_rn_rm(rt,rs1,rs2));
2085}
2086
2087void emit_shrcc_imm(int rs,u_int imm,int rt)
2088{
2089 assert(imm>0);
2090 assert(imm<32);
2091 assem_debug("lsrcc %s,%s,#%d\n",regname[rt],regname[rs],imm);
2092 output_w32(0x31a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
2093}
2094
2095void emit_negmi(int rs, int rt)
2096{
2097 assem_debug("rsbmi %s,%s,#0\n",regname[rt],regname[rs]);
2098 output_w32(0x42600000|rd_rn_rm(rt,rs,0));
2099}
2100
2101void emit_negsmi(int rs, int rt)
2102{
2103 assem_debug("rsbsmi %s,%s,#0\n",regname[rt],regname[rs]);
2104 output_w32(0x42700000|rd_rn_rm(rt,rs,0));
2105}
2106
2107void emit_orreq(u_int rs1,u_int rs2,u_int rt)
2108{
2109 assem_debug("orreq %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2110 output_w32(0x01800000|rd_rn_rm(rt,rs1,rs2));
2111}
2112
2113void emit_orrne(u_int rs1,u_int rs2,u_int rt)
2114{
2115 assem_debug("orrne %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2116 output_w32(0x11800000|rd_rn_rm(rt,rs1,rs2));
2117}
2118
2119void emit_bic_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2120{
2121 assem_debug("bic %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2122 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2123}
2124
2125void emit_biceq_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2126{
2127 assem_debug("biceq %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2128 output_w32(0x01C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2129}
2130
2131void emit_bicne_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2132{
2133 assem_debug("bicne %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2134 output_w32(0x11C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2135}
2136
2137void emit_bic_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2138{
2139 assem_debug("bic %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2140 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2141}
2142
2143void emit_biceq_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2144{
2145 assem_debug("biceq %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2146 output_w32(0x01C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2147}
2148
2149void emit_bicne_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2150{
2151 assem_debug("bicne %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2152 output_w32(0x11C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2153}
2154
2155void emit_teq(int rs, int rt)
2156{
2157 assem_debug("teq %s,%s\n",regname[rs],regname[rt]);
2158 output_w32(0xe1300000|rd_rn_rm(0,rs,rt));
2159}
2160
2161void emit_rsbimm(int rs, int imm, int rt)
2162{
2163 u_int armval;
cfbd3c6e 2164 genimm_checked(imm,&armval);
57871462 2165 assem_debug("rsb %s,%s,#%d\n",regname[rt],regname[rs],imm);
2166 output_w32(0xe2600000|rd_rn_rm(rt,rs,0)|armval);
2167}
2168
2169// Load 2 immediates optimizing for small code size
2170void emit_mov2imm_compact(int imm1,u_int rt1,int imm2,u_int rt2)
2171{
2172 emit_movimm(imm1,rt1);
2173 u_int armval;
2174 if(genimm(imm2-imm1,&armval)) {
2175 assem_debug("add %s,%s,#%d\n",regname[rt2],regname[rt1],imm2-imm1);
2176 output_w32(0xe2800000|rd_rn_rm(rt2,rt1,0)|armval);
2177 }else if(genimm(imm1-imm2,&armval)) {
2178 assem_debug("sub %s,%s,#%d\n",regname[rt2],regname[rt1],imm1-imm2);
2179 output_w32(0xe2400000|rd_rn_rm(rt2,rt1,0)|armval);
2180 }
2181 else emit_movimm(imm2,rt2);
2182}
2183
2184// Conditionally select one of two immediates, optimizing for small code size
2185// This will only be called if HAVE_CMOV_IMM is defined
2186void emit_cmov2imm_e_ne_compact(int imm1,int imm2,u_int rt)
2187{
2188 u_int armval;
2189 if(genimm(imm2-imm1,&armval)) {
2190 emit_movimm(imm1,rt);
2191 assem_debug("addne %s,%s,#%d\n",regname[rt],regname[rt],imm2-imm1);
2192 output_w32(0x12800000|rd_rn_rm(rt,rt,0)|armval);
2193 }else if(genimm(imm1-imm2,&armval)) {
2194 emit_movimm(imm1,rt);
2195 assem_debug("subne %s,%s,#%d\n",regname[rt],regname[rt],imm1-imm2);
2196 output_w32(0x12400000|rd_rn_rm(rt,rt,0)|armval);
2197 }
2198 else {
2199 #ifdef ARMv5_ONLY
2200 emit_movimm(imm1,rt);
2201 add_literal((int)out,imm2);
2202 assem_debug("ldrne %s,pc+? [=%x]\n",regname[rt],imm2);
2203 output_w32(0x15900000|rd_rn_rm(rt,15,0));
2204 #else
2205 emit_movw(imm1&0x0000FFFF,rt);
2206 if((imm1&0xFFFF)!=(imm2&0xFFFF)) {
2207 assem_debug("movwne %s,#%d (0x%x)\n",regname[rt],imm2&0xFFFF,imm2&0xFFFF);
2208 output_w32(0x13000000|rd_rn_rm(rt,0,0)|(imm2&0xfff)|((imm2<<4)&0xf0000));
2209 }
2210 emit_movt(imm1&0xFFFF0000,rt);
2211 if((imm1&0xFFFF0000)!=(imm2&0xFFFF0000)) {
2212 assem_debug("movtne %s,#%d (0x%x)\n",regname[rt],imm2&0xffff0000,imm2&0xffff0000);
2213 output_w32(0x13400000|rd_rn_rm(rt,0,0)|((imm2>>16)&0xfff)|((imm2>>12)&0xf0000));
2214 }
2215 #endif
2216 }
2217}
2218
2219// special case for checking invalid_code
2220void emit_cmpmem_indexedsr12_imm(int addr,int r,int imm)
2221{
2222 assert(0);
2223}
2224
2225// special case for checking invalid_code
2226void emit_cmpmem_indexedsr12_reg(int base,int r,int imm)
2227{
2228 assert(imm<128&&imm>=0);
2229 assert(r>=0&&r<16);
2230 assem_debug("ldrb lr,%s,%s lsr #12\n",regname[base],regname[r]);
2231 output_w32(0xe7d00000|rd_rn_rm(HOST_TEMPREG,base,r)|0x620);
2232 emit_cmpimm(HOST_TEMPREG,imm);
2233}
2234
2235// special case for tlb mapping
2236void emit_addsr12(int rs1,int rs2,int rt)
2237{
2238 assem_debug("add %s,%s,%s lsr #12\n",regname[rt],regname[rs1],regname[rs2]);
2239 output_w32(0xe0800620|rd_rn_rm(rt,rs1,rs2));
2240}
2241
0bbd1454 2242void emit_callne(int a)
2243{
2244 assem_debug("blne %x\n",a);
2245 u_int offset=genjmp(a);
2246 output_w32(0x1b000000|offset);
2247}
2248
57871462 2249// Used to preload hash table entries
2250void emit_prefetch(void *addr)
2251{
2252 assem_debug("prefetch %x\n",(int)addr);
2253 output_byte(0x0F);
2254 output_byte(0x18);
2255 output_modrm(0,5,1);
2256 output_w32((int)addr);
2257}
2258void emit_prefetchreg(int r)
2259{
2260 assem_debug("pld %s\n",regname[r]);
2261 output_w32(0xf5d0f000|rd_rn_rm(0,r,0));
2262}
2263
2264// Special case for mini_ht
2265void emit_ldreq_indexed(int rs, u_int offset, int rt)
2266{
2267 assert(offset<4096);
2268 assem_debug("ldreq %s,[%s, #%d]\n",regname[rt],regname[rs],offset);
2269 output_w32(0x05900000|rd_rn_rm(rt,rs,0)|offset);
2270}
2271
2272void emit_flds(int r,int sr)
2273{
2274 assem_debug("flds s%d,[%s]\n",sr,regname[r]);
2275 output_w32(0xed900a00|((sr&14)<<11)|((sr&1)<<22)|(r<<16));
2276}
2277
2278void emit_vldr(int r,int vr)
2279{
2280 assem_debug("vldr d%d,[%s]\n",vr,regname[r]);
2281 output_w32(0xed900b00|(vr<<12)|(r<<16));
2282}
2283
2284void emit_fsts(int sr,int r)
2285{
2286 assem_debug("fsts s%d,[%s]\n",sr,regname[r]);
2287 output_w32(0xed800a00|((sr&14)<<11)|((sr&1)<<22)|(r<<16));
2288}
2289
2290void emit_vstr(int vr,int r)
2291{
2292 assem_debug("vstr d%d,[%s]\n",vr,regname[r]);
2293 output_w32(0xed800b00|(vr<<12)|(r<<16));
2294}
2295
2296void emit_ftosizs(int s,int d)
2297{
2298 assem_debug("ftosizs s%d,s%d\n",d,s);
2299 output_w32(0xeebd0ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2300}
2301
2302void emit_ftosizd(int s,int d)
2303{
2304 assem_debug("ftosizd s%d,d%d\n",d,s);
2305 output_w32(0xeebd0bc0|((d&14)<<11)|((d&1)<<22)|(s&7));
2306}
2307
2308void emit_fsitos(int s,int d)
2309{
2310 assem_debug("fsitos s%d,s%d\n",d,s);
2311 output_w32(0xeeb80ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2312}
2313
2314void emit_fsitod(int s,int d)
2315{
2316 assem_debug("fsitod d%d,s%d\n",d,s);
2317 output_w32(0xeeb80bc0|((d&7)<<12)|((s&14)>>1)|((s&1)<<5));
2318}
2319
2320void emit_fcvtds(int s,int d)
2321{
2322 assem_debug("fcvtds d%d,s%d\n",d,s);
2323 output_w32(0xeeb70ac0|((d&7)<<12)|((s&14)>>1)|((s&1)<<5));
2324}
2325
2326void emit_fcvtsd(int s,int d)
2327{
2328 assem_debug("fcvtsd s%d,d%d\n",d,s);
2329 output_w32(0xeeb70bc0|((d&14)<<11)|((d&1)<<22)|(s&7));
2330}
2331
2332void emit_fsqrts(int s,int d)
2333{
2334 assem_debug("fsqrts d%d,s%d\n",d,s);
2335 output_w32(0xeeb10ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2336}
2337
2338void emit_fsqrtd(int s,int d)
2339{
2340 assem_debug("fsqrtd s%d,d%d\n",d,s);
2341 output_w32(0xeeb10bc0|((d&7)<<12)|(s&7));
2342}
2343
2344void emit_fabss(int s,int d)
2345{
2346 assem_debug("fabss d%d,s%d\n",d,s);
2347 output_w32(0xeeb00ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2348}
2349
2350void emit_fabsd(int s,int d)
2351{
2352 assem_debug("fabsd s%d,d%d\n",d,s);
2353 output_w32(0xeeb00bc0|((d&7)<<12)|(s&7));
2354}
2355
2356void emit_fnegs(int s,int d)
2357{
2358 assem_debug("fnegs d%d,s%d\n",d,s);
2359 output_w32(0xeeb10a40|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2360}
2361
2362void emit_fnegd(int s,int d)
2363{
2364 assem_debug("fnegd s%d,d%d\n",d,s);
2365 output_w32(0xeeb10b40|((d&7)<<12)|(s&7));
2366}
2367
2368void emit_fadds(int s1,int s2,int d)
2369{
2370 assem_debug("fadds s%d,s%d,s%d\n",d,s1,s2);
2371 output_w32(0xee300a00|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2372}
2373
2374void emit_faddd(int s1,int s2,int d)
2375{
2376 assem_debug("faddd d%d,d%d,d%d\n",d,s1,s2);
2377 output_w32(0xee300b00|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2378}
2379
2380void emit_fsubs(int s1,int s2,int d)
2381{
2382 assem_debug("fsubs s%d,s%d,s%d\n",d,s1,s2);
2383 output_w32(0xee300a40|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2384}
2385
2386void emit_fsubd(int s1,int s2,int d)
2387{
2388 assem_debug("fsubd d%d,d%d,d%d\n",d,s1,s2);
2389 output_w32(0xee300b40|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2390}
2391
2392void emit_fmuls(int s1,int s2,int d)
2393{
2394 assem_debug("fmuls s%d,s%d,s%d\n",d,s1,s2);
2395 output_w32(0xee200a00|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2396}
2397
2398void emit_fmuld(int s1,int s2,int d)
2399{
2400 assem_debug("fmuld d%d,d%d,d%d\n",d,s1,s2);
2401 output_w32(0xee200b00|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2402}
2403
2404void emit_fdivs(int s1,int s2,int d)
2405{
2406 assem_debug("fdivs s%d,s%d,s%d\n",d,s1,s2);
2407 output_w32(0xee800a00|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2408}
2409
2410void emit_fdivd(int s1,int s2,int d)
2411{
2412 assem_debug("fdivd d%d,d%d,d%d\n",d,s1,s2);
2413 output_w32(0xee800b00|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2414}
2415
2416void emit_fcmps(int x,int y)
2417{
2418 assem_debug("fcmps s14, s15\n");
2419 output_w32(0xeeb47a67);
2420}
2421
2422void emit_fcmpd(int x,int y)
2423{
2424 assem_debug("fcmpd d6, d7\n");
2425 output_w32(0xeeb46b47);
2426}
2427
2428void emit_fmstat()
2429{
2430 assem_debug("fmstat\n");
2431 output_w32(0xeef1fa10);
2432}
2433
2434void emit_bicne_imm(int rs,int imm,int rt)
2435{
2436 u_int armval;
cfbd3c6e 2437 genimm_checked(imm,&armval);
57871462 2438 assem_debug("bicne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2439 output_w32(0x13c00000|rd_rn_rm(rt,rs,0)|armval);
2440}
2441
2442void emit_biccs_imm(int rs,int imm,int rt)
2443{
2444 u_int armval;
cfbd3c6e 2445 genimm_checked(imm,&armval);
57871462 2446 assem_debug("biccs %s,%s,#%d\n",regname[rt],regname[rs],imm);
2447 output_w32(0x23c00000|rd_rn_rm(rt,rs,0)|armval);
2448}
2449
2450void emit_bicvc_imm(int rs,int imm,int rt)
2451{
2452 u_int armval;
cfbd3c6e 2453 genimm_checked(imm,&armval);
57871462 2454 assem_debug("bicvc %s,%s,#%d\n",regname[rt],regname[rs],imm);
2455 output_w32(0x73c00000|rd_rn_rm(rt,rs,0)|armval);
2456}
2457
2458void emit_bichi_imm(int rs,int imm,int rt)
2459{
2460 u_int armval;
cfbd3c6e 2461 genimm_checked(imm,&armval);
57871462 2462 assem_debug("bichi %s,%s,#%d\n",regname[rt],regname[rs],imm);
2463 output_w32(0x83c00000|rd_rn_rm(rt,rs,0)|armval);
2464}
2465
2466void emit_orrvs_imm(int rs,int imm,int rt)
2467{
2468 u_int armval;
cfbd3c6e 2469 genimm_checked(imm,&armval);
57871462 2470 assem_debug("orrvs %s,%s,#%d\n",regname[rt],regname[rs],imm);
2471 output_w32(0x63800000|rd_rn_rm(rt,rs,0)|armval);
2472}
2473
b9b61529 2474void emit_orrne_imm(int rs,int imm,int rt)
2475{
2476 u_int armval;
cfbd3c6e 2477 genimm_checked(imm,&armval);
b9b61529 2478 assem_debug("orrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2479 output_w32(0x13800000|rd_rn_rm(rt,rs,0)|armval);
2480}
2481
2482void emit_andne_imm(int rs,int imm,int rt)
2483{
2484 u_int armval;
cfbd3c6e 2485 genimm_checked(imm,&armval);
b9b61529 2486 assem_debug("andne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2487 output_w32(0x12000000|rd_rn_rm(rt,rs,0)|armval);
2488}
2489
57871462 2490void emit_jno_unlikely(int a)
2491{
2492 //emit_jno(a);
2493 assem_debug("addvc pc,pc,#? (%x)\n",/*a-(int)out-8,*/a);
2494 output_w32(0x72800000|rd_rn_rm(15,15,0));
2495}
2496
2497// Save registers before function call
2498void save_regs(u_int reglist)
2499{
2500 reglist&=0x100f; // only save the caller-save registers, r0-r3, r12
2501 if(!reglist) return;
2502 assem_debug("stmia fp,{");
2503 if(reglist&1) assem_debug("r0, ");
2504 if(reglist&2) assem_debug("r1, ");
2505 if(reglist&4) assem_debug("r2, ");
2506 if(reglist&8) assem_debug("r3, ");
2507 if(reglist&0x1000) assem_debug("r12");
2508 assem_debug("}\n");
2509 output_w32(0xe88b0000|reglist);
2510}
2511// Restore registers after function call
2512void restore_regs(u_int reglist)
2513{
2514 reglist&=0x100f; // only restore the caller-save registers, r0-r3, r12
2515 if(!reglist) return;
2516 assem_debug("ldmia fp,{");
2517 if(reglist&1) assem_debug("r0, ");
2518 if(reglist&2) assem_debug("r1, ");
2519 if(reglist&4) assem_debug("r2, ");
2520 if(reglist&8) assem_debug("r3, ");
2521 if(reglist&0x1000) assem_debug("r12");
2522 assem_debug("}\n");
2523 output_w32(0xe89b0000|reglist);
2524}
2525
2526// Write back consts using r14 so we don't disturb the other registers
2527void wb_consts(signed char i_regmap[],uint64_t i_is32,u_int i_dirty,int i)
2528{
2529 int hr;
2530 for(hr=0;hr<HOST_REGS;hr++) {
2531 if(hr!=EXCLUDE_REG&&i_regmap[hr]>=0&&((i_dirty>>hr)&1)) {
2532 if(((regs[i].isconst>>hr)&1)&&i_regmap[hr]>0) {
2533 if(i_regmap[hr]<64 || !((i_is32>>(i_regmap[hr]&63))&1) ) {
2534 int value=constmap[i][hr];
2535 if(value==0) {
2536 emit_zeroreg(HOST_TEMPREG);
2537 }
2538 else {
2539 emit_movimm(value,HOST_TEMPREG);
2540 }
2541 emit_storereg(i_regmap[hr],HOST_TEMPREG);
24385cae 2542#ifndef FORCE32
57871462 2543 if((i_is32>>i_regmap[hr])&1) {
2544 if(value!=-1&&value!=0) emit_sarimm(HOST_TEMPREG,31,HOST_TEMPREG);
2545 emit_storereg(i_regmap[hr]|64,HOST_TEMPREG);
2546 }
24385cae 2547#endif
57871462 2548 }
2549 }
2550 }
2551 }
2552}
2553
2554/* Stubs/epilogue */
2555
2556void literal_pool(int n)
2557{
2558 if(!literalcount) return;
2559 if(n) {
2560 if((int)out-literals[0][0]<4096-n) return;
2561 }
2562 u_int *ptr;
2563 int i;
2564 for(i=0;i<literalcount;i++)
2565 {
2566 ptr=(u_int *)literals[i][0];
2567 u_int offset=(u_int)out-(u_int)ptr-8;
2568 assert(offset<4096);
2569 assert(!(offset&3));
2570 *ptr|=offset;
2571 output_w32(literals[i][1]);
2572 }
2573 literalcount=0;
2574}
2575
2576void literal_pool_jumpover(int n)
2577{
2578 if(!literalcount) return;
2579 if(n) {
2580 if((int)out-literals[0][0]<4096-n) return;
2581 }
2582 int jaddr=(int)out;
2583 emit_jmp(0);
2584 literal_pool(0);
2585 set_jump_target(jaddr,(int)out);
2586}
2587
2588emit_extjump2(int addr, int target, int linker)
2589{
2590 u_char *ptr=(u_char *)addr;
2591 assert((ptr[3]&0x0e)==0xa);
2592 emit_loadlp(target,0);
2593 emit_loadlp(addr,1);
24385cae 2594 assert(addr>=BASE_ADDR&&addr<(BASE_ADDR+(1<<TARGET_SIZE_2)));
57871462 2595 //assert((target>=0x80000000&&target<0x80800000)||(target>0xA4000000&&target<0xA4001000));
2596//DEBUG >
2597#ifdef DEBUG_CYCLE_COUNT
2598 emit_readword((int)&last_count,ECX);
2599 emit_add(HOST_CCREG,ECX,HOST_CCREG);
2600 emit_readword((int)&next_interupt,ECX);
2601 emit_writeword(HOST_CCREG,(int)&Count);
2602 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
2603 emit_writeword(ECX,(int)&last_count);
2604#endif
2605//DEBUG <
2606 emit_jmp(linker);
2607}
2608
2609emit_extjump(int addr, int target)
2610{
2611 emit_extjump2(addr, target, (int)dyna_linker);
2612}
2613emit_extjump_ds(int addr, int target)
2614{
2615 emit_extjump2(addr, target, (int)dyna_linker_ds);
2616}
2617
cbbab9cd 2618#ifdef PCSX
2619#include "pcsxmem_inline.c"
2620#endif
2621
57871462 2622do_readstub(int n)
2623{
2624 assem_debug("do_readstub %x\n",start+stubs[n][3]*4);
2625 literal_pool(256);
2626 set_jump_target(stubs[n][1],(int)out);
2627 int type=stubs[n][0];
2628 int i=stubs[n][3];
2629 int rs=stubs[n][4];
2630 struct regstat *i_regs=(struct regstat *)stubs[n][5];
2631 u_int reglist=stubs[n][7];
2632 signed char *i_regmap=i_regs->regmap;
2633 int addr=get_reg(i_regmap,AGEN1+(i&1));
2634 int rth,rt;
2635 int ds;
b9b61529 2636 if(itype[i]==C1LS||itype[i]==C2LS||itype[i]==LOADLR) {
57871462 2637 rth=get_reg(i_regmap,FTEMP|64);
2638 rt=get_reg(i_regmap,FTEMP);
2639 }else{
2640 rth=get_reg(i_regmap,rt1[i]|64);
2641 rt=get_reg(i_regmap,rt1[i]);
2642 }
2643 assert(rs>=0);
57871462 2644 if(addr<0) addr=rt;
535d208a 2645 if(addr<0&&itype[i]!=C1LS&&itype[i]!=C2LS&&itype[i]!=LOADLR) addr=get_reg(i_regmap,-1);
57871462 2646 assert(addr>=0);
2647 int ftable=0;
2648 if(type==LOADB_STUB||type==LOADBU_STUB)
2649 ftable=(int)readmemb;
2650 if(type==LOADH_STUB||type==LOADHU_STUB)
2651 ftable=(int)readmemh;
2652 if(type==LOADW_STUB)
2653 ftable=(int)readmem;
24385cae 2654#ifndef FORCE32
57871462 2655 if(type==LOADD_STUB)
2656 ftable=(int)readmemd;
24385cae 2657#endif
2658 assert(ftable!=0);
57871462 2659 emit_writeword(rs,(int)&address);
2660 //emit_pusha();
2661 save_regs(reglist);
97a238a6 2662#ifndef PCSX
57871462 2663 ds=i_regs!=&regs[i];
2664 int real_rs=(itype[i]==LOADLR)?-1:get_reg(i_regmap,rs1[i]);
2665 u_int cmask=ds?-1:(0x100f|~i_regs->wasconst);
2666 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&0x100f,i);
2667 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty&cmask&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs)));
2668 if(!ds) wb_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&~0x100f,i);
97a238a6 2669#endif
57871462 2670 emit_shrimm(rs,16,1);
2671 int cc=get_reg(i_regmap,CCREG);
2672 if(cc<0) {
2673 emit_loadreg(CCREG,2);
2674 }
2675 emit_movimm(ftable,0);
2676 emit_addimm(cc<0?2:cc,2*stubs[n][6]+2,2);
f51dc36c 2677#ifndef PCSX
57871462 2678 emit_movimm(start+stubs[n][3]*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
f51dc36c 2679#endif
57871462 2680 //emit_readword((int)&last_count,temp);
2681 //emit_add(cc,temp,cc);
2682 //emit_writeword(cc,(int)&Count);
2683 //emit_mov(15,14);
2684 emit_call((int)&indirect_jump_indexed);
2685 //emit_callreg(rs);
2686 //emit_readword_dualindexedx4(rs,HOST_TEMPREG,15);
f51dc36c 2687#ifndef PCSX
57871462 2688 // We really shouldn't need to update the count here,
2689 // but not doing so causes random crashes...
2690 emit_readword((int)&Count,HOST_TEMPREG);
2691 emit_readword((int)&next_interupt,2);
2692 emit_addimm(HOST_TEMPREG,-2*stubs[n][6]-2,HOST_TEMPREG);
2693 emit_writeword(2,(int)&last_count);
2694 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2695 if(cc<0) {
2696 emit_storereg(CCREG,HOST_TEMPREG);
2697 }
f51dc36c 2698#endif
57871462 2699 //emit_popa();
2700 restore_regs(reglist);
2701 //if((cc=get_reg(regmap,CCREG))>=0) {
2702 // emit_loadreg(CCREG,cc);
2703 //}
f18c0f46 2704 if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) {
2705 assert(rt>=0);
2706 if(type==LOADB_STUB)
2707 emit_movsbl((int)&readmem_dword,rt);
2708 if(type==LOADBU_STUB)
2709 emit_movzbl((int)&readmem_dword,rt);
2710 if(type==LOADH_STUB)
2711 emit_movswl((int)&readmem_dword,rt);
2712 if(type==LOADHU_STUB)
2713 emit_movzwl((int)&readmem_dword,rt);
2714 if(type==LOADW_STUB)
2715 emit_readword((int)&readmem_dword,rt);
2716 if(type==LOADD_STUB) {
2717 emit_readword((int)&readmem_dword,rt);
2718 if(rth>=0) emit_readword(((int)&readmem_dword)+4,rth);
2719 }
57871462 2720 }
2721 emit_jmp(stubs[n][2]); // return address
2722}
2723
2724inline_readstub(int type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
2725{
2726 int rs=get_reg(regmap,target);
2727 int rth=get_reg(regmap,target|64);
2728 int rt=get_reg(regmap,target);
535d208a 2729 if(rs<0) rs=get_reg(regmap,-1);
57871462 2730 assert(rs>=0);
57871462 2731 int ftable=0;
2732 if(type==LOADB_STUB||type==LOADBU_STUB)
2733 ftable=(int)readmemb;
2734 if(type==LOADH_STUB||type==LOADHU_STUB)
2735 ftable=(int)readmemh;
2736 if(type==LOADW_STUB)
2737 ftable=(int)readmem;
24385cae 2738#ifndef FORCE32
57871462 2739 if(type==LOADD_STUB)
2740 ftable=(int)readmemd;
24385cae 2741#endif
2742 assert(ftable!=0);
cbbab9cd 2743#ifdef PCSX
2744 if(pcsx_direct_read(type,addr,target?rs:-1,rt))
2745 return;
2746#endif
fd99c415 2747 if(target==0)
2748 emit_movimm(addr,rs);
57871462 2749 emit_writeword(rs,(int)&address);
2750 //emit_pusha();
2751 save_regs(reglist);
0c1fe38b 2752#ifndef PCSX
2753 if((signed int)addr>=(signed int)0xC0000000) {
2754 // Theoretically we can have a pagefault here, if the TLB has never
2755 // been enabled and the address is outside the range 80000000..BFFFFFFF
2756 // Write out the registers so the pagefault can be handled. This is
2757 // a very rare case and likely represents a bug.
2758 int ds=regmap!=regs[i].regmap;
2759 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty,i);
2760 if(!ds) wb_dirtys(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty);
2761 else wb_dirtys(branch_regs[i-1].regmap_entry,branch_regs[i-1].was32,branch_regs[i-1].wasdirty);
2762 }
2763#endif
57871462 2764 //emit_shrimm(rs,16,1);
2765 int cc=get_reg(regmap,CCREG);
2766 if(cc<0) {
2767 emit_loadreg(CCREG,2);
2768 }
2769 //emit_movimm(ftable,0);
2770 emit_movimm(((u_int *)ftable)[addr>>16],0);
2771 //emit_readword((int)&last_count,12);
2772 emit_addimm(cc<0?2:cc,CLOCK_DIVIDER*(adj+1),2);
f51dc36c 2773#ifndef PCSX
57871462 2774 if((signed int)addr>=(signed int)0xC0000000) {
2775 // Pagefault address
2776 int ds=regmap!=regs[i].regmap;
2777 emit_movimm(start+i*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
2778 }
f51dc36c 2779#endif
57871462 2780 //emit_add(12,2,2);
2781 //emit_writeword(2,(int)&Count);
2782 //emit_call(((u_int *)ftable)[addr>>16]);
2783 emit_call((int)&indirect_jump);
f51dc36c 2784#ifndef PCSX
57871462 2785 // We really shouldn't need to update the count here,
2786 // but not doing so causes random crashes...
2787 emit_readword((int)&Count,HOST_TEMPREG);
2788 emit_readword((int)&next_interupt,2);
2789 emit_addimm(HOST_TEMPREG,-CLOCK_DIVIDER*(adj+1),HOST_TEMPREG);
2790 emit_writeword(2,(int)&last_count);
2791 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2792 if(cc<0) {
2793 emit_storereg(CCREG,HOST_TEMPREG);
2794 }
f51dc36c 2795#endif
57871462 2796 //emit_popa();
2797 restore_regs(reglist);
fd99c415 2798 if(rt>=0) {
2799 if(type==LOADB_STUB)
2800 emit_movsbl((int)&readmem_dword,rt);
2801 if(type==LOADBU_STUB)
2802 emit_movzbl((int)&readmem_dword,rt);
2803 if(type==LOADH_STUB)
2804 emit_movswl((int)&readmem_dword,rt);
2805 if(type==LOADHU_STUB)
2806 emit_movzwl((int)&readmem_dword,rt);
2807 if(type==LOADW_STUB)
2808 emit_readword((int)&readmem_dword,rt);
2809 if(type==LOADD_STUB) {
2810 emit_readword((int)&readmem_dword,rt);
2811 if(rth>=0) emit_readword(((int)&readmem_dword)+4,rth);
2812 }
57871462 2813 }
2814}
2815
2816do_writestub(int n)
2817{
2818 assem_debug("do_writestub %x\n",start+stubs[n][3]*4);
2819 literal_pool(256);
2820 set_jump_target(stubs[n][1],(int)out);
2821 int type=stubs[n][0];
2822 int i=stubs[n][3];
2823 int rs=stubs[n][4];
2824 struct regstat *i_regs=(struct regstat *)stubs[n][5];
2825 u_int reglist=stubs[n][7];
2826 signed char *i_regmap=i_regs->regmap;
2827 int addr=get_reg(i_regmap,AGEN1+(i&1));
2828 int rth,rt,r;
2829 int ds;
b9b61529 2830 if(itype[i]==C1LS||itype[i]==C2LS) {
57871462 2831 rth=get_reg(i_regmap,FTEMP|64);
2832 rt=get_reg(i_regmap,r=FTEMP);
2833 }else{
2834 rth=get_reg(i_regmap,rs2[i]|64);
2835 rt=get_reg(i_regmap,r=rs2[i]);
2836 }
2837 assert(rs>=0);
2838 assert(rt>=0);
2839 if(addr<0) addr=get_reg(i_regmap,-1);
2840 assert(addr>=0);
2841 int ftable=0;
2842 if(type==STOREB_STUB)
2843 ftable=(int)writememb;
2844 if(type==STOREH_STUB)
2845 ftable=(int)writememh;
2846 if(type==STOREW_STUB)
2847 ftable=(int)writemem;
24385cae 2848#ifndef FORCE32
57871462 2849 if(type==STORED_STUB)
2850 ftable=(int)writememd;
24385cae 2851#endif
2852 assert(ftable!=0);
57871462 2853 emit_writeword(rs,(int)&address);
2854 //emit_shrimm(rs,16,rs);
2855 //emit_movmem_indexedx4(ftable,rs,rs);
2856 if(type==STOREB_STUB)
2857 emit_writebyte(rt,(int)&byte);
2858 if(type==STOREH_STUB)
2859 emit_writehword(rt,(int)&hword);
2860 if(type==STOREW_STUB)
2861 emit_writeword(rt,(int)&word);
2862 if(type==STORED_STUB) {
3d624f89 2863#ifndef FORCE32
57871462 2864 emit_writeword(rt,(int)&dword);
2865 emit_writeword(r?rth:rt,(int)&dword+4);
3d624f89 2866#else
2867 printf("STORED_STUB\n");
2868#endif
57871462 2869 }
2870 //emit_pusha();
2871 save_regs(reglist);
97a238a6 2872#ifndef PCSX
57871462 2873 ds=i_regs!=&regs[i];
2874 int real_rs=get_reg(i_regmap,rs1[i]);
2875 u_int cmask=ds?-1:(0x100f|~i_regs->wasconst);
2876 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&0x100f,i);
2877 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty&cmask&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs)));
2878 if(!ds) wb_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&~0x100f,i);
97a238a6 2879#endif
57871462 2880 emit_shrimm(rs,16,1);
2881 int cc=get_reg(i_regmap,CCREG);
2882 if(cc<0) {
2883 emit_loadreg(CCREG,2);
2884 }
2885 emit_movimm(ftable,0);
2886 emit_addimm(cc<0?2:cc,2*stubs[n][6]+2,2);
f51dc36c 2887#ifndef PCSX
57871462 2888 emit_movimm(start+stubs[n][3]*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
f51dc36c 2889#endif
57871462 2890 //emit_readword((int)&last_count,temp);
2891 //emit_addimm(cc,2*stubs[n][5]+2,cc);
2892 //emit_add(cc,temp,cc);
2893 //emit_writeword(cc,(int)&Count);
2894 emit_call((int)&indirect_jump_indexed);
2895 //emit_callreg(rs);
2896 emit_readword((int)&Count,HOST_TEMPREG);
2897 emit_readword((int)&next_interupt,2);
2898 emit_addimm(HOST_TEMPREG,-2*stubs[n][6]-2,HOST_TEMPREG);
2899 emit_writeword(2,(int)&last_count);
2900 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2901 if(cc<0) {
2902 emit_storereg(CCREG,HOST_TEMPREG);
2903 }
2904 //emit_popa();
2905 restore_regs(reglist);
2906 //if((cc=get_reg(regmap,CCREG))>=0) {
2907 // emit_loadreg(CCREG,cc);
2908 //}
2909 emit_jmp(stubs[n][2]); // return address
2910}
2911
2912inline_writestub(int type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
2913{
2914 int rs=get_reg(regmap,-1);
2915 int rth=get_reg(regmap,target|64);
2916 int rt=get_reg(regmap,target);
2917 assert(rs>=0);
2918 assert(rt>=0);
cbbab9cd 2919#ifdef PCSX
2920 if(pcsx_direct_write(type,addr,rs,rt,regmap))
2921 return;
2922#endif
57871462 2923 int ftable=0;
2924 if(type==STOREB_STUB)
2925 ftable=(int)writememb;
2926 if(type==STOREH_STUB)
2927 ftable=(int)writememh;
2928 if(type==STOREW_STUB)
2929 ftable=(int)writemem;
24385cae 2930#ifndef FORCE32
57871462 2931 if(type==STORED_STUB)
2932 ftable=(int)writememd;
24385cae 2933#endif
2934 assert(ftable!=0);
57871462 2935 emit_writeword(rs,(int)&address);
2936 //emit_shrimm(rs,16,rs);
2937 //emit_movmem_indexedx4(ftable,rs,rs);
2938 if(type==STOREB_STUB)
2939 emit_writebyte(rt,(int)&byte);
2940 if(type==STOREH_STUB)
2941 emit_writehword(rt,(int)&hword);
2942 if(type==STOREW_STUB)
2943 emit_writeword(rt,(int)&word);
2944 if(type==STORED_STUB) {
3d624f89 2945#ifndef FORCE32
57871462 2946 emit_writeword(rt,(int)&dword);
2947 emit_writeword(target?rth:rt,(int)&dword+4);
3d624f89 2948#else
2949 printf("STORED_STUB\n");
2950#endif
57871462 2951 }
2952 //emit_pusha();
2953 save_regs(reglist);
0c1fe38b 2954#ifndef PCSX
2955 // rearmed note: load_all_consts prevents BIOS boot, some bug?
2956 if((signed int)addr>=(signed int)0xC0000000) {
2957 // Theoretically we can have a pagefault here, if the TLB has never
2958 // been enabled and the address is outside the range 80000000..BFFFFFFF
2959 // Write out the registers so the pagefault can be handled. This is
2960 // a very rare case and likely represents a bug.
2961 int ds=regmap!=regs[i].regmap;
2962 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty,i);
2963 if(!ds) wb_dirtys(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty);
2964 else wb_dirtys(branch_regs[i-1].regmap_entry,branch_regs[i-1].was32,branch_regs[i-1].wasdirty);
2965 }
2966#endif
57871462 2967 //emit_shrimm(rs,16,1);
2968 int cc=get_reg(regmap,CCREG);
2969 if(cc<0) {
2970 emit_loadreg(CCREG,2);
2971 }
2972 //emit_movimm(ftable,0);
2973 emit_movimm(((u_int *)ftable)[addr>>16],0);
2974 //emit_readword((int)&last_count,12);
2975 emit_addimm(cc<0?2:cc,CLOCK_DIVIDER*(adj+1),2);
f51dc36c 2976#ifndef PCSX
57871462 2977 if((signed int)addr>=(signed int)0xC0000000) {
2978 // Pagefault address
2979 int ds=regmap!=regs[i].regmap;
2980 emit_movimm(start+i*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
2981 }
f51dc36c 2982#endif
57871462 2983 //emit_add(12,2,2);
2984 //emit_writeword(2,(int)&Count);
2985 //emit_call(((u_int *)ftable)[addr>>16]);
2986 emit_call((int)&indirect_jump);
2987 emit_readword((int)&Count,HOST_TEMPREG);
2988 emit_readword((int)&next_interupt,2);
2989 emit_addimm(HOST_TEMPREG,-CLOCK_DIVIDER*(adj+1),HOST_TEMPREG);
2990 emit_writeword(2,(int)&last_count);
2991 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2992 if(cc<0) {
2993 emit_storereg(CCREG,HOST_TEMPREG);
2994 }
2995 //emit_popa();
2996 restore_regs(reglist);
2997}
2998
2999do_unalignedwritestub(int n)
3000{
b7918751 3001 assem_debug("do_unalignedwritestub %x\n",start+stubs[n][3]*4);
3002 literal_pool(256);
57871462 3003 set_jump_target(stubs[n][1],(int)out);
b7918751 3004
3005 int i=stubs[n][3];
3006 struct regstat *i_regs=(struct regstat *)stubs[n][4];
3007 int addr=stubs[n][5];
3008 u_int reglist=stubs[n][7];
3009 signed char *i_regmap=i_regs->regmap;
3010 int temp2=get_reg(i_regmap,FTEMP);
3011 int rt;
3012 int ds, real_rs;
3013 rt=get_reg(i_regmap,rs2[i]);
3014 assert(rt>=0);
3015 assert(addr>=0);
3016 assert(opcode[i]==0x2a||opcode[i]==0x2e); // SWL/SWR only implemented
3017 reglist|=(1<<addr);
3018 reglist&=~(1<<temp2);
3019
3020 emit_andimm(addr,0xfffffffc,temp2);
3021 emit_writeword(temp2,(int)&address);
3022
3023 save_regs(reglist);
97a238a6 3024#ifndef PCSX
b7918751 3025 ds=i_regs!=&regs[i];
3026 real_rs=get_reg(i_regmap,rs1[i]);
3027 u_int cmask=ds?-1:(0x100f|~i_regs->wasconst);
3028 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&0x100f,i);
3029 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty&cmask&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs)));
3030 if(!ds) wb_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&~0x100f,i);
97a238a6 3031#endif
b7918751 3032 emit_shrimm(addr,16,1);
3033 int cc=get_reg(i_regmap,CCREG);
3034 if(cc<0) {
3035 emit_loadreg(CCREG,2);
3036 }
3037 emit_movimm((u_int)readmem,0);
3038 emit_addimm(cc<0?2:cc,2*stubs[n][6]+2,2);
f51dc36c 3039#ifndef PCSX
3040 // pagefault address
3041 emit_movimm(start+stubs[n][3]*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
3042#endif
b7918751 3043 emit_call((int)&indirect_jump_indexed);
3044 restore_regs(reglist);
3045
3046 emit_readword((int)&readmem_dword,temp2);
3047 int temp=addr; //hmh
3048 emit_shlimm(addr,3,temp);
3049 emit_andimm(temp,24,temp);
3050#ifdef BIG_ENDIAN_MIPS
3051 if (opcode[i]==0x2e) // SWR
3052#else
3053 if (opcode[i]==0x2a) // SWL
3054#endif
3055 emit_xorimm(temp,24,temp);
3056 emit_movimm(-1,HOST_TEMPREG);
55439448 3057 if (opcode[i]==0x2a) { // SWL
b7918751 3058 emit_bic_lsr(temp2,HOST_TEMPREG,temp,temp2);
3059 emit_orrshr(rt,temp,temp2);
3060 }else{
3061 emit_bic_lsl(temp2,HOST_TEMPREG,temp,temp2);
3062 emit_orrshl(rt,temp,temp2);
3063 }
3064 emit_readword((int)&address,addr);
3065 emit_writeword(temp2,(int)&word);
3066 //save_regs(reglist); // don't need to, no state changes
3067 emit_shrimm(addr,16,1);
3068 emit_movimm((u_int)writemem,0);
3069 //emit_call((int)&indirect_jump_indexed);
3070 emit_mov(15,14);
3071 emit_readword_dualindexedx4(0,1,15);
3072 emit_readword((int)&Count,HOST_TEMPREG);
3073 emit_readword((int)&next_interupt,2);
3074 emit_addimm(HOST_TEMPREG,-2*stubs[n][6]-2,HOST_TEMPREG);
3075 emit_writeword(2,(int)&last_count);
3076 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
3077 if(cc<0) {
3078 emit_storereg(CCREG,HOST_TEMPREG);
3079 }
3080 restore_regs(reglist);
57871462 3081 emit_jmp(stubs[n][2]); // return address
3082}
3083
3084void printregs(int edi,int esi,int ebp,int esp,int b,int d,int c,int a)
3085{
3086 printf("regs: %x %x %x %x %x %x %x (%x)\n",a,b,c,d,ebp,esi,edi,(&edi)[-1]);
3087}
3088
3089do_invstub(int n)
3090{
3091 literal_pool(20);
3092 u_int reglist=stubs[n][3];
3093 set_jump_target(stubs[n][1],(int)out);
3094 save_regs(reglist);
3095 if(stubs[n][4]!=0) emit_mov(stubs[n][4],0);
3096 emit_call((int)&invalidate_addr);
3097 restore_regs(reglist);
3098 emit_jmp(stubs[n][2]); // return address
3099}
3100
3101int do_dirty_stub(int i)
3102{
3103 assem_debug("do_dirty_stub %x\n",start+i*4);
ac545b3a 3104 u_int addr=(int)start<(int)0xC0000000?(u_int)source:(u_int)start;
3105 #ifdef PCSX
3106 addr=(u_int)source;
3107 #endif
57871462 3108 // Careful about the code output here, verify_dirty needs to parse it.
3109 #ifdef ARMv5_ONLY
ac545b3a 3110 emit_loadlp(addr,1);
57871462 3111 emit_loadlp((int)copy,2);
3112 emit_loadlp(slen*4,3);
3113 #else
ac545b3a 3114 emit_movw(addr&0x0000FFFF,1);
57871462 3115 emit_movw(((u_int)copy)&0x0000FFFF,2);
ac545b3a 3116 emit_movt(addr&0xFFFF0000,1);
57871462 3117 emit_movt(((u_int)copy)&0xFFFF0000,2);
3118 emit_movw(slen*4,3);
3119 #endif
3120 emit_movimm(start+i*4,0);
3121 emit_call((int)start<(int)0xC0000000?(int)&verify_code:(int)&verify_code_vm);
3122 int entry=(int)out;
3123 load_regs_entry(i);
3124 if(entry==(int)out) entry=instr_addr[i];
3125 emit_jmp(instr_addr[i]);
3126 return entry;
3127}
3128
3129void do_dirty_stub_ds()
3130{
3131 // Careful about the code output here, verify_dirty needs to parse it.
3132 #ifdef ARMv5_ONLY
3133 emit_loadlp((int)start<(int)0xC0000000?(int)source:(int)start,1);
3134 emit_loadlp((int)copy,2);
3135 emit_loadlp(slen*4,3);
3136 #else
3137 emit_movw(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0x0000FFFF,1);
3138 emit_movw(((u_int)copy)&0x0000FFFF,2);
3139 emit_movt(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0xFFFF0000,1);
3140 emit_movt(((u_int)copy)&0xFFFF0000,2);
3141 emit_movw(slen*4,3);
3142 #endif
3143 emit_movimm(start+1,0);
3144 emit_call((int)&verify_code_ds);
3145}
3146
3147do_cop1stub(int n)
3148{
3149 literal_pool(256);
3150 assem_debug("do_cop1stub %x\n",start+stubs[n][3]*4);
3151 set_jump_target(stubs[n][1],(int)out);
3152 int i=stubs[n][3];
3d624f89 3153// int rs=stubs[n][4];
57871462 3154 struct regstat *i_regs=(struct regstat *)stubs[n][5];
3155 int ds=stubs[n][6];
3156 if(!ds) {
3157 load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty,i);
3158 //if(i_regs!=&regs[i]) printf("oops: regs[i]=%x i_regs=%x",(int)&regs[i],(int)i_regs);
3159 }
3160 //else {printf("fp exception in delay slot\n");}
3161 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty);
3162 if(regs[i].regmap_entry[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
3163 emit_movimm(start+(i-ds)*4,EAX); // Get PC
3164 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG); // CHECK: is this right? There should probably be an extra cycle...
3165 emit_jmp(ds?(int)fp_exception_ds:(int)fp_exception);
3166}
3167
3168/* TLB */
3169
3170int do_tlb_r(int s,int ar,int map,int x,int a,int shift,int c,u_int addr)
3171{
3172 if(c) {
3173 if((signed int)addr>=(signed int)0xC0000000) {
3174 // address_generation already loaded the const
3175 emit_readword_dualindexedx4(FP,map,map);
3176 }
3177 else
3178 return -1; // No mapping
3179 }
3180 else {
3181 assert(s!=map);
3182 emit_movimm(((int)memory_map-(int)&dynarec_local)>>2,map);
3183 emit_addsr12(map,s,map);
3184 // Schedule this while we wait on the load
3185 //if(x) emit_xorimm(s,x,ar);
3186 if(shift>=0) emit_shlimm(s,3,shift);
3187 if(~a) emit_andimm(s,a,ar);
3188 emit_readword_dualindexedx4(FP,map,map);
3189 }
3190 return map;
3191}
3192int do_tlb_r_branch(int map, int c, u_int addr, int *jaddr)
3193{
3194 if(!c||(signed int)addr>=(signed int)0xC0000000) {
3195 emit_test(map,map);
3196 *jaddr=(int)out;
3197 emit_js(0);
3198 }
3199 return map;
3200}
3201
3202int gen_tlb_addr_r(int ar, int map) {
3203 if(map>=0) {
3204 assem_debug("add %s,%s,%s lsl #2\n",regname[ar],regname[ar],regname[map]);
3205 output_w32(0xe0800100|rd_rn_rm(ar,ar,map));
3206 }
3207}
3208
3209int do_tlb_w(int s,int ar,int map,int x,int c,u_int addr)
3210{
3211 if(c) {
3212 if(addr<0x80800000||addr>=0xC0000000) {
3213 // address_generation already loaded the const
3214 emit_readword_dualindexedx4(FP,map,map);
3215 }
3216 else
3217 return -1; // No mapping
3218 }
3219 else {
3220 assert(s!=map);
3221 emit_movimm(((int)memory_map-(int)&dynarec_local)>>2,map);
3222 emit_addsr12(map,s,map);
3223 // Schedule this while we wait on the load
3224 //if(x) emit_xorimm(s,x,ar);
3225 emit_readword_dualindexedx4(FP,map,map);
3226 }
3227 return map;
3228}
3229int do_tlb_w_branch(int map, int c, u_int addr, int *jaddr)
3230{
3231 if(!c||addr<0x80800000||addr>=0xC0000000) {
3232 emit_testimm(map,0x40000000);
3233 *jaddr=(int)out;
3234 emit_jne(0);
3235 }
3236}
3237
3238int gen_tlb_addr_w(int ar, int map) {
3239 if(map>=0) {
3240 assem_debug("add %s,%s,%s lsl #2\n",regname[ar],regname[ar],regname[map]);
3241 output_w32(0xe0800100|rd_rn_rm(ar,ar,map));
3242 }
3243}
3244
3245// Generate the address of the memory_map entry, relative to dynarec_local
3246generate_map_const(u_int addr,int reg) {
3247 //printf("generate_map_const(%x,%s)\n",addr,regname[reg]);
3248 emit_movimm((addr>>12)+(((u_int)memory_map-(u_int)&dynarec_local)>>2),reg);
3249}
3250
3251/* Special assem */
3252
3253void shift_assemble_arm(int i,struct regstat *i_regs)
3254{
3255 if(rt1[i]) {
3256 if(opcode2[i]<=0x07) // SLLV/SRLV/SRAV
3257 {
3258 signed char s,t,shift;
3259 t=get_reg(i_regs->regmap,rt1[i]);
3260 s=get_reg(i_regs->regmap,rs1[i]);
3261 shift=get_reg(i_regs->regmap,rs2[i]);
3262 if(t>=0){
3263 if(rs1[i]==0)
3264 {
3265 emit_zeroreg(t);
3266 }
3267 else if(rs2[i]==0)
3268 {
3269 assert(s>=0);
3270 if(s!=t) emit_mov(s,t);
3271 }
3272 else
3273 {
3274 emit_andimm(shift,31,HOST_TEMPREG);
3275 if(opcode2[i]==4) // SLLV
3276 {
3277 emit_shl(s,HOST_TEMPREG,t);
3278 }
3279 if(opcode2[i]==6) // SRLV
3280 {
3281 emit_shr(s,HOST_TEMPREG,t);
3282 }
3283 if(opcode2[i]==7) // SRAV
3284 {
3285 emit_sar(s,HOST_TEMPREG,t);
3286 }
3287 }
3288 }
3289 } else { // DSLLV/DSRLV/DSRAV
3290 signed char sh,sl,th,tl,shift;
3291 th=get_reg(i_regs->regmap,rt1[i]|64);
3292 tl=get_reg(i_regs->regmap,rt1[i]);
3293 sh=get_reg(i_regs->regmap,rs1[i]|64);
3294 sl=get_reg(i_regs->regmap,rs1[i]);
3295 shift=get_reg(i_regs->regmap,rs2[i]);
3296 if(tl>=0){
3297 if(rs1[i]==0)
3298 {
3299 emit_zeroreg(tl);
3300 if(th>=0) emit_zeroreg(th);
3301 }
3302 else if(rs2[i]==0)
3303 {
3304 assert(sl>=0);
3305 if(sl!=tl) emit_mov(sl,tl);
3306 if(th>=0&&sh!=th) emit_mov(sh,th);
3307 }
3308 else
3309 {
3310 // FIXME: What if shift==tl ?
3311 assert(shift!=tl);
3312 int temp=get_reg(i_regs->regmap,-1);
3313 int real_th=th;
3314 if(th<0&&opcode2[i]!=0x14) {th=temp;} // DSLLV doesn't need a temporary register
3315 assert(sl>=0);
3316 assert(sh>=0);
3317 emit_andimm(shift,31,HOST_TEMPREG);
3318 if(opcode2[i]==0x14) // DSLLV
3319 {
3320 if(th>=0) emit_shl(sh,HOST_TEMPREG,th);
3321 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3322 emit_orrshr(sl,HOST_TEMPREG,th);
3323 emit_andimm(shift,31,HOST_TEMPREG);
3324 emit_testimm(shift,32);
3325 emit_shl(sl,HOST_TEMPREG,tl);
3326 if(th>=0) emit_cmovne_reg(tl,th);
3327 emit_cmovne_imm(0,tl);
3328 }
3329 if(opcode2[i]==0x16) // DSRLV
3330 {
3331 assert(th>=0);
3332 emit_shr(sl,HOST_TEMPREG,tl);
3333 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3334 emit_orrshl(sh,HOST_TEMPREG,tl);
3335 emit_andimm(shift,31,HOST_TEMPREG);
3336 emit_testimm(shift,32);
3337 emit_shr(sh,HOST_TEMPREG,th);
3338 emit_cmovne_reg(th,tl);
3339 if(real_th>=0) emit_cmovne_imm(0,th);
3340 }
3341 if(opcode2[i]==0x17) // DSRAV
3342 {
3343 assert(th>=0);
3344 emit_shr(sl,HOST_TEMPREG,tl);
3345 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3346 if(real_th>=0) {
3347 assert(temp>=0);
3348 emit_sarimm(th,31,temp);
3349 }
3350 emit_orrshl(sh,HOST_TEMPREG,tl);
3351 emit_andimm(shift,31,HOST_TEMPREG);
3352 emit_testimm(shift,32);
3353 emit_sar(sh,HOST_TEMPREG,th);
3354 emit_cmovne_reg(th,tl);
3355 if(real_th>=0) emit_cmovne_reg(temp,th);
3356 }
3357 }
3358 }
3359 }
3360 }
3361}
3362#define shift_assemble shift_assemble_arm
3363
3364void loadlr_assemble_arm(int i,struct regstat *i_regs)
3365{
3366 int s,th,tl,temp,temp2,addr,map=-1;
3367 int offset;
3368 int jaddr=0;
af4ee1fe 3369 int memtarget=0,c=0;
57871462 3370 u_int hr,reglist=0;
3371 th=get_reg(i_regs->regmap,rt1[i]|64);
3372 tl=get_reg(i_regs->regmap,rt1[i]);
3373 s=get_reg(i_regs->regmap,rs1[i]);
3374 temp=get_reg(i_regs->regmap,-1);
3375 temp2=get_reg(i_regs->regmap,FTEMP);
3376 addr=get_reg(i_regs->regmap,AGEN1+(i&1));
3377 assert(addr<0);
3378 offset=imm[i];
3379 for(hr=0;hr<HOST_REGS;hr++) {
3380 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
3381 }
3382 reglist|=1<<temp;
3383 if(offset||s<0||c) addr=temp2;
3384 else addr=s;
3385 if(s>=0) {
3386 c=(i_regs->wasconst>>s)&1;
af4ee1fe 3387 if(c) {
3388 memtarget=((signed int)(constmap[i][s]+offset))<(signed int)0x80000000+RAM_SIZE;
3389 if(using_tlb&&((signed int)(constmap[i][s]+offset))>=(signed int)0xC0000000) memtarget=1;
3390 }
57871462 3391 }
535d208a 3392 if(!using_tlb) {
3393 if(!c) {
3394 #ifdef RAM_OFFSET
3395 map=get_reg(i_regs->regmap,ROREG);
3396 if(map<0) emit_loadreg(ROREG,map=HOST_TEMPREG);
3397 #endif
3398 emit_shlimm(addr,3,temp);
3399 if (opcode[i]==0x22||opcode[i]==0x26) {
3400 emit_andimm(addr,0xFFFFFFFC,temp2); // LWL/LWR
57871462 3401 }else{
535d208a 3402 emit_andimm(addr,0xFFFFFFF8,temp2); // LDL/LDR
57871462 3403 }
535d208a 3404 emit_cmpimm(addr,RAM_SIZE);
3405 jaddr=(int)out;
3406 emit_jno(0);
3407 }
3408 else {
3409 if (opcode[i]==0x22||opcode[i]==0x26) {
3410 emit_movimm(((constmap[i][s]+offset)<<3)&24,temp); // LWL/LWR
3411 }else{
3412 emit_movimm(((constmap[i][s]+offset)<<3)&56,temp); // LDL/LDR
57871462 3413 }
57871462 3414 }
535d208a 3415 }else{ // using tlb
3416 int a;
3417 if(c) {
3418 a=-1;
3419 }else if (opcode[i]==0x22||opcode[i]==0x26) {
3420 a=0xFFFFFFFC; // LWL/LWR
3421 }else{
3422 a=0xFFFFFFF8; // LDL/LDR
3423 }
3424 map=get_reg(i_regs->regmap,TLREG);
3425 assert(map>=0);
ea3d2e6e 3426 reglist&=~(1<<map);
535d208a 3427 map=do_tlb_r(addr,temp2,map,0,a,c?-1:temp,c,constmap[i][s]+offset);
3428 if(c) {
3429 if (opcode[i]==0x22||opcode[i]==0x26) {
3430 emit_movimm(((constmap[i][s]+offset)<<3)&24,temp); // LWL/LWR
3431 }else{
3432 emit_movimm(((constmap[i][s]+offset)<<3)&56,temp); // LDL/LDR
57871462 3433 }
535d208a 3434 }
3435 do_tlb_r_branch(map,c,constmap[i][s]+offset,&jaddr);
3436 }
3437 if (opcode[i]==0x22||opcode[i]==0x26) { // LWL/LWR
3438 if(!c||memtarget) {
3439 //emit_readword_indexed((int)rdram-0x80000000,temp2,temp2);
3440 emit_readword_indexed_tlb(0,temp2,map,temp2);
3441 if(jaddr) add_stub(LOADW_STUB,jaddr,(int)out,i,temp2,(int)i_regs,ccadj[i],reglist);
3442 }
3443 else
3444 inline_readstub(LOADW_STUB,i,(constmap[i][s]+offset)&0xFFFFFFFC,i_regs->regmap,FTEMP,ccadj[i],reglist);
3445 if(rt1[i]) {
3446 assert(tl>=0);
57871462 3447 emit_andimm(temp,24,temp);
2002a1db 3448#ifdef BIG_ENDIAN_MIPS
3449 if (opcode[i]==0x26) // LWR
3450#else
3451 if (opcode[i]==0x22) // LWL
3452#endif
3453 emit_xorimm(temp,24,temp);
57871462 3454 emit_movimm(-1,HOST_TEMPREG);
3455 if (opcode[i]==0x26) {
3456 emit_shr(temp2,temp,temp2);
3457 emit_bic_lsr(tl,HOST_TEMPREG,temp,tl);
3458 }else{
3459 emit_shl(temp2,temp,temp2);
3460 emit_bic_lsl(tl,HOST_TEMPREG,temp,tl);
3461 }
3462 emit_or(temp2,tl,tl);
57871462 3463 }
535d208a 3464 //emit_storereg(rt1[i],tl); // DEBUG
3465 }
3466 if (opcode[i]==0x1A||opcode[i]==0x1B) { // LDL/LDR
3467 // FIXME: little endian
3468 int temp2h=get_reg(i_regs->regmap,FTEMP|64);
3469 if(!c||memtarget) {
3470 //if(th>=0) emit_readword_indexed((int)rdram-0x80000000,temp2,temp2h);
3471 //emit_readword_indexed((int)rdram-0x7FFFFFFC,temp2,temp2);
3472 emit_readdword_indexed_tlb(0,temp2,map,temp2h,temp2);
3473 if(jaddr) add_stub(LOADD_STUB,jaddr,(int)out,i,temp2,(int)i_regs,ccadj[i],reglist);
3474 }
3475 else
3476 inline_readstub(LOADD_STUB,i,(constmap[i][s]+offset)&0xFFFFFFF8,i_regs->regmap,FTEMP,ccadj[i],reglist);
3477 if(rt1[i]) {
3478 assert(th>=0);
3479 assert(tl>=0);
57871462 3480 emit_testimm(temp,32);
3481 emit_andimm(temp,24,temp);
3482 if (opcode[i]==0x1A) { // LDL
3483 emit_rsbimm(temp,32,HOST_TEMPREG);
3484 emit_shl(temp2h,temp,temp2h);
3485 emit_orrshr(temp2,HOST_TEMPREG,temp2h);
3486 emit_movimm(-1,HOST_TEMPREG);
3487 emit_shl(temp2,temp,temp2);
3488 emit_cmove_reg(temp2h,th);
3489 emit_biceq_lsl(tl,HOST_TEMPREG,temp,tl);
3490 emit_bicne_lsl(th,HOST_TEMPREG,temp,th);
3491 emit_orreq(temp2,tl,tl);
3492 emit_orrne(temp2,th,th);
3493 }
3494 if (opcode[i]==0x1B) { // LDR
3495 emit_xorimm(temp,24,temp);
3496 emit_rsbimm(temp,32,HOST_TEMPREG);
3497 emit_shr(temp2,temp,temp2);
3498 emit_orrshl(temp2h,HOST_TEMPREG,temp2);
3499 emit_movimm(-1,HOST_TEMPREG);
3500 emit_shr(temp2h,temp,temp2h);
3501 emit_cmovne_reg(temp2,tl);
3502 emit_bicne_lsr(th,HOST_TEMPREG,temp,th);
3503 emit_biceq_lsr(tl,HOST_TEMPREG,temp,tl);
3504 emit_orrne(temp2h,th,th);
3505 emit_orreq(temp2h,tl,tl);
3506 }
3507 }
3508 }
3509}
3510#define loadlr_assemble loadlr_assemble_arm
3511
3512void cop0_assemble(int i,struct regstat *i_regs)
3513{
3514 if(opcode2[i]==0) // MFC0
3515 {
3516 signed char t=get_reg(i_regs->regmap,rt1[i]);
3517 char copr=(source[i]>>11)&0x1f;
3518 //assert(t>=0); // Why does this happen? OOT is weird
f1b3b369 3519 if(t>=0&&rt1[i]!=0) {
7139f3c8 3520#ifdef MUPEN64
57871462 3521 emit_addimm(FP,(int)&fake_pc-(int)&dynarec_local,0);
3522 emit_movimm((source[i]>>11)&0x1f,1);
3523 emit_writeword(0,(int)&PC);
3524 emit_writebyte(1,(int)&(fake_pc.f.r.nrd));
3525 if(copr==9) {
3526 emit_readword((int)&last_count,ECX);
3527 emit_loadreg(CCREG,HOST_CCREG); // TODO: do proper reg alloc
3528 emit_add(HOST_CCREG,ECX,HOST_CCREG);
3529 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG);
3530 emit_writeword(HOST_CCREG,(int)&Count);
3531 }
3532 emit_call((int)MFC0);
3533 emit_readword((int)&readmem_dword,t);
7139f3c8 3534#else
3535 emit_readword((int)&reg_cop0+copr*4,t);
3536#endif
57871462 3537 }
3538 }
3539 else if(opcode2[i]==4) // MTC0
3540 {
3541 signed char s=get_reg(i_regs->regmap,rs1[i]);
3542 char copr=(source[i]>>11)&0x1f;
3543 assert(s>=0);
3544 emit_writeword(s,(int)&readmem_dword);
3545 wb_register(rs1[i],i_regs->regmap,i_regs->dirty,i_regs->is32);
fca1aef2 3546#ifdef MUPEN64
57871462 3547 emit_addimm(FP,(int)&fake_pc-(int)&dynarec_local,0);
3548 emit_movimm((source[i]>>11)&0x1f,1);
3549 emit_writeword(0,(int)&PC);
3550 emit_writebyte(1,(int)&(fake_pc.f.r.nrd));
7139f3c8 3551#endif
3552 if(copr==9||copr==11||copr==12||copr==13) {
57871462 3553 emit_readword((int)&last_count,ECX);
3554 emit_loadreg(CCREG,HOST_CCREG); // TODO: do proper reg alloc
3555 emit_add(HOST_CCREG,ECX,HOST_CCREG);
3556 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG);
3557 emit_writeword(HOST_CCREG,(int)&Count);
3558 }
3559 // What a mess. The status register (12) can enable interrupts,
3560 // so needs a special case to handle a pending interrupt.
3561 // The interrupt must be taken immediately, because a subsequent
3562 // instruction might disable interrupts again.
7139f3c8 3563 if(copr==12||copr==13) {
fca1aef2 3564#ifdef PCSX
3565 if (is_delayslot) {
3566 // burn cycles to cause cc_interrupt, which will
3567 // reschedule next_interupt. Relies on CCREG from above.
3568 assem_debug("MTC0 DS %d\n", copr);
3569 emit_writeword(HOST_CCREG,(int)&last_count);
3570 emit_movimm(0,HOST_CCREG);
3571 emit_storereg(CCREG,HOST_CCREG);
3572 emit_movimm(copr,0);
3573 emit_call((int)pcsx_mtc0_ds);
3574 return;
3575 }
3576#endif
57871462 3577 emit_movimm(start+i*4+4,0);
3578 emit_movimm(0,1);
3579 emit_writeword(0,(int)&pcaddr);
3580 emit_writeword(1,(int)&pending_exception);
3581 }
3582 //else if(copr==12&&is_delayslot) emit_call((int)MTC0_R12);
3583 //else
fca1aef2 3584#ifdef PCSX
3585 emit_movimm(copr,0);
3586 emit_call((int)pcsx_mtc0);
3587#else
57871462 3588 emit_call((int)MTC0);
fca1aef2 3589#endif
7139f3c8 3590 if(copr==9||copr==11||copr==12||copr==13) {
57871462 3591 emit_readword((int)&Count,HOST_CCREG);
3592 emit_readword((int)&next_interupt,ECX);
3593 emit_addimm(HOST_CCREG,-CLOCK_DIVIDER*ccadj[i],HOST_CCREG);
3594 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
3595 emit_writeword(ECX,(int)&last_count);
3596 emit_storereg(CCREG,HOST_CCREG);
3597 }
7139f3c8 3598 if(copr==12||copr==13) {
57871462 3599 assert(!is_delayslot);
3600 emit_readword((int)&pending_exception,14);
3601 }
3602 emit_loadreg(rs1[i],s);
3603 if(get_reg(i_regs->regmap,rs1[i]|64)>=0)
3604 emit_loadreg(rs1[i]|64,get_reg(i_regs->regmap,rs1[i]|64));
7139f3c8 3605 if(copr==12||copr==13) {
57871462 3606 emit_test(14,14);
3607 emit_jne((int)&do_interrupt);
3608 }
3609 cop1_usable=0;
3610 }
3611 else
3612 {
3613 assert(opcode2[i]==0x10);
3d624f89 3614#ifndef DISABLE_TLB
57871462 3615 if((source[i]&0x3f)==0x01) // TLBR
3616 emit_call((int)TLBR);
3617 if((source[i]&0x3f)==0x02) // TLBWI
3618 emit_call((int)TLBWI_new);
3619 if((source[i]&0x3f)==0x06) { // TLBWR
3620 // The TLB entry written by TLBWR is dependent on the count,
3621 // so update the cycle count
3622 emit_readword((int)&last_count,ECX);
3623 if(i_regs->regmap[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
3624 emit_add(HOST_CCREG,ECX,HOST_CCREG);
3625 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG);
3626 emit_writeword(HOST_CCREG,(int)&Count);
3627 emit_call((int)TLBWR_new);
3628 }
3629 if((source[i]&0x3f)==0x08) // TLBP
3630 emit_call((int)TLBP);
3d624f89 3631#endif
576bbd8f 3632#ifdef PCSX
3633 if((source[i]&0x3f)==0x10) // RFE
3634 {
3635 emit_readword((int)&Status,0);
3636 emit_andimm(0,0x3c,1);
3637 emit_andimm(0,~0xf,0);
3638 emit_orrshr_imm(1,2,0);
3639 emit_writeword(0,(int)&Status);
3640 }
3641#else
57871462 3642 if((source[i]&0x3f)==0x18) // ERET
3643 {
3644 int count=ccadj[i];
3645 if(i_regs->regmap[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
3646 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*count,HOST_CCREG); // TODO: Should there be an extra cycle here?
3647 emit_jmp((int)jump_eret);
3648 }
576bbd8f 3649#endif
57871462 3650 }
3651}
3652
b9b61529 3653static void cop2_get_dreg(u_int copr,signed char tl,signed char temp)
3654{
3655 switch (copr) {
3656 case 1:
3657 case 3:
3658 case 5:
3659 case 8:
3660 case 9:
3661 case 10:
3662 case 11:
3663 emit_readword((int)&reg_cop2d[copr],tl);
3664 emit_signextend16(tl,tl);
3665 emit_writeword(tl,(int)&reg_cop2d[copr]); // hmh
3666 break;
3667 case 7:
3668 case 16:
3669 case 17:
3670 case 18:
3671 case 19:
3672 emit_readword((int)&reg_cop2d[copr],tl);
3673 emit_andimm(tl,0xffff,tl);
3674 emit_writeword(tl,(int)&reg_cop2d[copr]);
3675 break;
3676 case 15:
3677 emit_readword((int)&reg_cop2d[14],tl); // SXY2
3678 emit_writeword(tl,(int)&reg_cop2d[copr]);
3679 break;
3680 case 28:
b9b61529 3681 case 29:
3682 emit_readword((int)&reg_cop2d[9],temp);
3683 emit_testimm(temp,0x8000); // do we need this?
3684 emit_andimm(temp,0xf80,temp);
3685 emit_andne_imm(temp,0,temp);
f70d384d 3686 emit_shrimm(temp,7,tl);
b9b61529 3687 emit_readword((int)&reg_cop2d[10],temp);
3688 emit_testimm(temp,0x8000);
3689 emit_andimm(temp,0xf80,temp);
3690 emit_andne_imm(temp,0,temp);
f70d384d 3691 emit_orrshr_imm(temp,2,tl);
b9b61529 3692 emit_readword((int)&reg_cop2d[11],temp);
3693 emit_testimm(temp,0x8000);
3694 emit_andimm(temp,0xf80,temp);
3695 emit_andne_imm(temp,0,temp);
f70d384d 3696 emit_orrshl_imm(temp,3,tl);
b9b61529 3697 emit_writeword(tl,(int)&reg_cop2d[copr]);
3698 break;
3699 default:
3700 emit_readword((int)&reg_cop2d[copr],tl);
3701 break;
3702 }
3703}
3704
3705static void cop2_put_dreg(u_int copr,signed char sl,signed char temp)
3706{
3707 switch (copr) {
3708 case 15:
3709 emit_readword((int)&reg_cop2d[13],temp); // SXY1
3710 emit_writeword(sl,(int)&reg_cop2d[copr]);
3711 emit_writeword(temp,(int)&reg_cop2d[12]); // SXY0
3712 emit_readword((int)&reg_cop2d[14],temp); // SXY2
3713 emit_writeword(sl,(int)&reg_cop2d[14]);
3714 emit_writeword(temp,(int)&reg_cop2d[13]); // SXY1
3715 break;
3716 case 28:
3717 emit_andimm(sl,0x001f,temp);
f70d384d 3718 emit_shlimm(temp,7,temp);
b9b61529 3719 emit_writeword(temp,(int)&reg_cop2d[9]);
3720 emit_andimm(sl,0x03e0,temp);
f70d384d 3721 emit_shlimm(temp,2,temp);
b9b61529 3722 emit_writeword(temp,(int)&reg_cop2d[10]);
3723 emit_andimm(sl,0x7c00,temp);
f70d384d 3724 emit_shrimm(temp,3,temp);
b9b61529 3725 emit_writeword(temp,(int)&reg_cop2d[11]);
3726 emit_writeword(sl,(int)&reg_cop2d[28]);
3727 break;
3728 case 30:
3729 emit_movs(sl,temp);
3730 emit_mvnmi(temp,temp);
3731 emit_clz(temp,temp);
3732 emit_writeword(sl,(int)&reg_cop2d[30]);
3733 emit_writeword(temp,(int)&reg_cop2d[31]);
3734 break;
b9b61529 3735 case 31:
3736 break;
3737 default:
3738 emit_writeword(sl,(int)&reg_cop2d[copr]);
3739 break;
3740 }
3741}
3742
3743void cop2_assemble(int i,struct regstat *i_regs)
3744{
3745 u_int copr=(source[i]>>11)&0x1f;
3746 signed char temp=get_reg(i_regs->regmap,-1);
3747 if (opcode2[i]==0) { // MFC2
3748 signed char tl=get_reg(i_regs->regmap,rt1[i]);
f1b3b369 3749 if(tl>=0&&rt1[i]!=0)
b9b61529 3750 cop2_get_dreg(copr,tl,temp);
3751 }
3752 else if (opcode2[i]==4) { // MTC2
3753 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3754 cop2_put_dreg(copr,sl,temp);
3755 }
3756 else if (opcode2[i]==2) // CFC2
3757 {
3758 signed char tl=get_reg(i_regs->regmap,rt1[i]);
f1b3b369 3759 if(tl>=0&&rt1[i]!=0)
b9b61529 3760 emit_readword((int)&reg_cop2c[copr],tl);
3761 }
3762 else if (opcode2[i]==6) // CTC2
3763 {
3764 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3765 switch(copr) {
3766 case 4:
3767 case 12:
3768 case 20:
3769 case 26:
3770 case 27:
3771 case 29:
3772 case 30:
3773 emit_signextend16(sl,temp);
3774 break;
3775 case 31:
3776 //value = value & 0x7ffff000;
3777 //if (value & 0x7f87e000) value |= 0x80000000;
3778 emit_shrimm(sl,12,temp);
3779 emit_shlimm(temp,12,temp);
3780 emit_testimm(temp,0x7f000000);
3781 emit_testeqimm(temp,0x00870000);
3782 emit_testeqimm(temp,0x0000e000);
3783 emit_orrne_imm(temp,0x80000000,temp);
3784 break;
3785 default:
3786 temp=sl;
3787 break;
3788 }
3789 emit_writeword(temp,(int)&reg_cop2c[copr]);
3790 assert(sl>=0);
3791 }
3792}
3793
3794void c2op_assemble(int i,struct regstat *i_regs)
3795{
3796 signed char temp=get_reg(i_regs->regmap,-1);
3797 u_int c2op=source[i]&0x3f;
3798 u_int hr,reglist=0;
3799 for(hr=0;hr<HOST_REGS;hr++) {
3800 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
3801 }
3802 if(i==0||itype[i-1]!=C2OP)
3803 save_regs(reglist);
3804
3805 if (gte_handlers[c2op]!=NULL) {
3806 int cc=get_reg(i_regs->regmap,CCREG);
009faf24 3807 emit_movimm(source[i],1); // opcode
b9b61529 3808 if (cc>=0&&gte_cycletab[c2op])
009faf24 3809 emit_addimm(cc,gte_cycletab[c2op]/2,cc); // XXX: could just adjust ccadj?
3810 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0); // cop2 regs
3811 emit_writeword(1,(int)&psxRegs.code);
b9b61529 3812 emit_call((int)gte_handlers[c2op]);
3813 }
3814
3815 if(i>=slen-1||itype[i+1]!=C2OP)
3816 restore_regs(reglist);
3817}
3818
3819void cop1_unusable(int i,struct regstat *i_regs)
3d624f89 3820{
3821 // XXX: should just just do the exception instead
3822 if(!cop1_usable) {
3823 int jaddr=(int)out;
3824 emit_jmp(0);
3825 add_stub(FP_STUB,jaddr,(int)out,i,0,(int)i_regs,is_delayslot,0);
3826 cop1_usable=1;
3827 }
3828}
3829
57871462 3830void cop1_assemble(int i,struct regstat *i_regs)
3831{
3d624f89 3832#ifndef DISABLE_COP1
57871462 3833 // Check cop1 unusable
3834 if(!cop1_usable) {
3835 signed char rs=get_reg(i_regs->regmap,CSREG);
3836 assert(rs>=0);
3837 emit_testimm(rs,0x20000000);
3838 int jaddr=(int)out;
3839 emit_jeq(0);
3840 add_stub(FP_STUB,jaddr,(int)out,i,rs,(int)i_regs,is_delayslot,0);
3841 cop1_usable=1;
3842 }
3843 if (opcode2[i]==0) { // MFC1
3844 signed char tl=get_reg(i_regs->regmap,rt1[i]);
3845 if(tl>=0) {
3846 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],tl);
3847 emit_readword_indexed(0,tl,tl);
3848 }
3849 }
3850 else if (opcode2[i]==1) { // DMFC1
3851 signed char tl=get_reg(i_regs->regmap,rt1[i]);
3852 signed char th=get_reg(i_regs->regmap,rt1[i]|64);
3853 if(tl>=0) {
3854 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],tl);
3855 if(th>=0) emit_readword_indexed(4,tl,th);
3856 emit_readword_indexed(0,tl,tl);
3857 }
3858 }
3859 else if (opcode2[i]==4) { // MTC1
3860 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3861 signed char temp=get_reg(i_regs->regmap,-1);
3862 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
3863 emit_writeword_indexed(sl,0,temp);
3864 }
3865 else if (opcode2[i]==5) { // DMTC1
3866 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3867 signed char sh=rs1[i]>0?get_reg(i_regs->regmap,rs1[i]|64):sl;
3868 signed char temp=get_reg(i_regs->regmap,-1);
3869 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
3870 emit_writeword_indexed(sh,4,temp);
3871 emit_writeword_indexed(sl,0,temp);
3872 }
3873 else if (opcode2[i]==2) // CFC1
3874 {
3875 signed char tl=get_reg(i_regs->regmap,rt1[i]);
3876 if(tl>=0) {
3877 u_int copr=(source[i]>>11)&0x1f;
3878 if(copr==0) emit_readword((int)&FCR0,tl);
3879 if(copr==31) emit_readword((int)&FCR31,tl);
3880 }
3881 }
3882 else if (opcode2[i]==6) // CTC1
3883 {
3884 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3885 u_int copr=(source[i]>>11)&0x1f;
3886 assert(sl>=0);
3887 if(copr==31)
3888 {
3889 emit_writeword(sl,(int)&FCR31);
3890 // Set the rounding mode
3891 //FIXME
3892 //char temp=get_reg(i_regs->regmap,-1);
3893 //emit_andimm(sl,3,temp);
3894 //emit_fldcw_indexed((int)&rounding_modes,temp);
3895 }
3896 }
3d624f89 3897#else
3898 cop1_unusable(i, i_regs);
3899#endif
57871462 3900}
3901
3902void fconv_assemble_arm(int i,struct regstat *i_regs)
3903{
3d624f89 3904#ifndef DISABLE_COP1
57871462 3905 signed char temp=get_reg(i_regs->regmap,-1);
3906 assert(temp>=0);
3907 // Check cop1 unusable
3908 if(!cop1_usable) {
3909 signed char rs=get_reg(i_regs->regmap,CSREG);
3910 assert(rs>=0);
3911 emit_testimm(rs,0x20000000);
3912 int jaddr=(int)out;
3913 emit_jeq(0);
3914 add_stub(FP_STUB,jaddr,(int)out,i,rs,(int)i_regs,is_delayslot,0);
3915 cop1_usable=1;
3916 }
3917
3918 #if(defined(__VFP_FP__) && !defined(__SOFTFP__))
3919 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0d) { // trunc_w_s
3920 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
3921 emit_flds(temp,15);
3922 emit_ftosizs(15,15); // float->int, truncate
3923 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f))
3924 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
3925 emit_fsts(15,temp);
3926 return;
3927 }
3928 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0d) { // trunc_w_d
3929 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
3930 emit_vldr(temp,7);
3931 emit_ftosizd(7,13); // double->int, truncate
3932 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
3933 emit_fsts(13,temp);
3934 return;
3935 }
3936
3937 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x20) { // cvt_s_w
3938 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
3939 emit_flds(temp,13);
3940 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f))
3941 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
3942 emit_fsitos(13,15);
3943 emit_fsts(15,temp);
3944 return;
3945 }
3946 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x21) { // cvt_d_w
3947 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
3948 emit_flds(temp,13);
3949 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
3950 emit_fsitod(13,7);
3951 emit_vstr(7,temp);
3952 return;
3953 }
3954
3955 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x21) { // cvt_d_s
3956 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
3957 emit_flds(temp,13);
3958 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
3959 emit_fcvtds(13,7);
3960 emit_vstr(7,temp);
3961 return;
3962 }
3963 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x20) { // cvt_s_d
3964 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
3965 emit_vldr(temp,7);
3966 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
3967 emit_fcvtsd(7,13);
3968 emit_fsts(13,temp);
3969 return;
3970 }
3971 #endif
3972
3973 // C emulation code
3974
3975 u_int hr,reglist=0;
3976 for(hr=0;hr<HOST_REGS;hr++) {
3977 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
3978 }
3979 save_regs(reglist);
3980
3981 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x20) {
3982 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3983 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3984 emit_call((int)cvt_s_w);
3985 }
3986 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x21) {
3987 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3988 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3989 emit_call((int)cvt_d_w);
3990 }
3991 if(opcode2[i]==0x15&&(source[i]&0x3f)==0x20) {
3992 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3993 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3994 emit_call((int)cvt_s_l);
3995 }
3996 if(opcode2[i]==0x15&&(source[i]&0x3f)==0x21) {
3997 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3998 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3999 emit_call((int)cvt_d_l);
4000 }
4001
4002 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x21) {
4003 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4004 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4005 emit_call((int)cvt_d_s);
4006 }
4007 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x24) {
4008 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4009 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4010 emit_call((int)cvt_w_s);
4011 }
4012 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x25) {
4013 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4014 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4015 emit_call((int)cvt_l_s);
4016 }
4017
4018 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x20) {
4019 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4020 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4021 emit_call((int)cvt_s_d);
4022 }
4023 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x24) {
4024 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4025 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4026 emit_call((int)cvt_w_d);
4027 }
4028 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x25) {
4029 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4030 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4031 emit_call((int)cvt_l_d);
4032 }
4033
4034 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x08) {
4035 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4036 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4037 emit_call((int)round_l_s);
4038 }
4039 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x09) {
4040 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4041 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4042 emit_call((int)trunc_l_s);
4043 }
4044 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0a) {
4045 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4046 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4047 emit_call((int)ceil_l_s);
4048 }
4049 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0b) {
4050 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4051 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4052 emit_call((int)floor_l_s);
4053 }
4054 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0c) {
4055 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4056 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4057 emit_call((int)round_w_s);
4058 }
4059 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0d) {
4060 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4061 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4062 emit_call((int)trunc_w_s);
4063 }
4064 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0e) {
4065 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4066 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4067 emit_call((int)ceil_w_s);
4068 }
4069 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0f) {
4070 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4071 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4072 emit_call((int)floor_w_s);
4073 }
4074
4075 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x08) {
4076 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4077 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4078 emit_call((int)round_l_d);
4079 }
4080 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x09) {
4081 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4082 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4083 emit_call((int)trunc_l_d);
4084 }
4085 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0a) {
4086 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4087 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4088 emit_call((int)ceil_l_d);
4089 }
4090 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0b) {
4091 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4092 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4093 emit_call((int)floor_l_d);
4094 }
4095 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0c) {
4096 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4097 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4098 emit_call((int)round_w_d);
4099 }
4100 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0d) {
4101 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4102 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4103 emit_call((int)trunc_w_d);
4104 }
4105 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0e) {
4106 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4107 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4108 emit_call((int)ceil_w_d);
4109 }
4110 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0f) {
4111 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4112 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4113 emit_call((int)floor_w_d);
4114 }
4115
4116 restore_regs(reglist);
3d624f89 4117#else
4118 cop1_unusable(i, i_regs);
4119#endif
57871462 4120}
4121#define fconv_assemble fconv_assemble_arm
4122
4123void fcomp_assemble(int i,struct regstat *i_regs)
4124{
3d624f89 4125#ifndef DISABLE_COP1
57871462 4126 signed char fs=get_reg(i_regs->regmap,FSREG);
4127 signed char temp=get_reg(i_regs->regmap,-1);
4128 assert(temp>=0);
4129 // Check cop1 unusable
4130 if(!cop1_usable) {
4131 signed char cs=get_reg(i_regs->regmap,CSREG);
4132 assert(cs>=0);
4133 emit_testimm(cs,0x20000000);
4134 int jaddr=(int)out;
4135 emit_jeq(0);
4136 add_stub(FP_STUB,jaddr,(int)out,i,cs,(int)i_regs,is_delayslot,0);
4137 cop1_usable=1;
4138 }
4139
4140 if((source[i]&0x3f)==0x30) {
4141 emit_andimm(fs,~0x800000,fs);
4142 return;
4143 }
4144
4145 if((source[i]&0x3e)==0x38) {
4146 // sf/ngle - these should throw exceptions for NaNs
4147 emit_andimm(fs,~0x800000,fs);
4148 return;
4149 }
4150
4151 #if(defined(__VFP_FP__) && !defined(__SOFTFP__))
4152 if(opcode2[i]==0x10) {
4153 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4154 emit_readword((int)&reg_cop1_simple[(source[i]>>16)&0x1f],HOST_TEMPREG);
4155 emit_orimm(fs,0x800000,fs);
4156 emit_flds(temp,14);
4157 emit_flds(HOST_TEMPREG,15);
4158 emit_fcmps(14,15);
4159 emit_fmstat();
4160 if((source[i]&0x3f)==0x31) emit_bicvc_imm(fs,0x800000,fs); // c_un_s
4161 if((source[i]&0x3f)==0x32) emit_bicne_imm(fs,0x800000,fs); // c_eq_s
4162 if((source[i]&0x3f)==0x33) {emit_bicne_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ueq_s
4163 if((source[i]&0x3f)==0x34) emit_biccs_imm(fs,0x800000,fs); // c_olt_s
4164 if((source[i]&0x3f)==0x35) {emit_biccs_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ult_s
4165 if((source[i]&0x3f)==0x36) emit_bichi_imm(fs,0x800000,fs); // c_ole_s
4166 if((source[i]&0x3f)==0x37) {emit_bichi_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ule_s
4167 if((source[i]&0x3f)==0x3a) emit_bicne_imm(fs,0x800000,fs); // c_seq_s
4168 if((source[i]&0x3f)==0x3b) emit_bicne_imm(fs,0x800000,fs); // c_ngl_s
4169 if((source[i]&0x3f)==0x3c) emit_biccs_imm(fs,0x800000,fs); // c_lt_s
4170 if((source[i]&0x3f)==0x3d) emit_biccs_imm(fs,0x800000,fs); // c_nge_s
4171 if((source[i]&0x3f)==0x3e) emit_bichi_imm(fs,0x800000,fs); // c_le_s
4172 if((source[i]&0x3f)==0x3f) emit_bichi_imm(fs,0x800000,fs); // c_ngt_s
4173 return;
4174 }
4175 if(opcode2[i]==0x11) {
4176 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
4177 emit_readword((int)&reg_cop1_double[(source[i]>>16)&0x1f],HOST_TEMPREG);
4178 emit_orimm(fs,0x800000,fs);
4179 emit_vldr(temp,6);
4180 emit_vldr(HOST_TEMPREG,7);
4181 emit_fcmpd(6,7);
4182 emit_fmstat();
4183 if((source[i]&0x3f)==0x31) emit_bicvc_imm(fs,0x800000,fs); // c_un_d
4184 if((source[i]&0x3f)==0x32) emit_bicne_imm(fs,0x800000,fs); // c_eq_d
4185 if((source[i]&0x3f)==0x33) {emit_bicne_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ueq_d
4186 if((source[i]&0x3f)==0x34) emit_biccs_imm(fs,0x800000,fs); // c_olt_d
4187 if((source[i]&0x3f)==0x35) {emit_biccs_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ult_d
4188 if((source[i]&0x3f)==0x36) emit_bichi_imm(fs,0x800000,fs); // c_ole_d
4189 if((source[i]&0x3f)==0x37) {emit_bichi_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ule_d
4190 if((source[i]&0x3f)==0x3a) emit_bicne_imm(fs,0x800000,fs); // c_seq_d
4191 if((source[i]&0x3f)==0x3b) emit_bicne_imm(fs,0x800000,fs); // c_ngl_d
4192 if((source[i]&0x3f)==0x3c) emit_biccs_imm(fs,0x800000,fs); // c_lt_d
4193 if((source[i]&0x3f)==0x3d) emit_biccs_imm(fs,0x800000,fs); // c_nge_d
4194 if((source[i]&0x3f)==0x3e) emit_bichi_imm(fs,0x800000,fs); // c_le_d
4195 if((source[i]&0x3f)==0x3f) emit_bichi_imm(fs,0x800000,fs); // c_ngt_d
4196 return;
4197 }
4198 #endif
4199
4200 // C only
4201
4202 u_int hr,reglist=0;
4203 for(hr=0;hr<HOST_REGS;hr++) {
4204 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
4205 }
4206 reglist&=~(1<<fs);
4207 save_regs(reglist);
4208 if(opcode2[i]==0x10) {
4209 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4210 emit_readword((int)&reg_cop1_simple[(source[i]>>16)&0x1f],ARG2_REG);
4211 if((source[i]&0x3f)==0x30) emit_call((int)c_f_s);
4212 if((source[i]&0x3f)==0x31) emit_call((int)c_un_s);
4213 if((source[i]&0x3f)==0x32) emit_call((int)c_eq_s);
4214 if((source[i]&0x3f)==0x33) emit_call((int)c_ueq_s);
4215 if((source[i]&0x3f)==0x34) emit_call((int)c_olt_s);
4216 if((source[i]&0x3f)==0x35) emit_call((int)c_ult_s);
4217 if((source[i]&0x3f)==0x36) emit_call((int)c_ole_s);
4218 if((source[i]&0x3f)==0x37) emit_call((int)c_ule_s);
4219 if((source[i]&0x3f)==0x38) emit_call((int)c_sf_s);
4220 if((source[i]&0x3f)==0x39) emit_call((int)c_ngle_s);
4221 if((source[i]&0x3f)==0x3a) emit_call((int)c_seq_s);
4222 if((source[i]&0x3f)==0x3b) emit_call((int)c_ngl_s);
4223 if((source[i]&0x3f)==0x3c) emit_call((int)c_lt_s);
4224 if((source[i]&0x3f)==0x3d) emit_call((int)c_nge_s);
4225 if((source[i]&0x3f)==0x3e) emit_call((int)c_le_s);
4226 if((source[i]&0x3f)==0x3f) emit_call((int)c_ngt_s);
4227 }
4228 if(opcode2[i]==0x11) {
4229 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4230 emit_readword((int)&reg_cop1_double[(source[i]>>16)&0x1f],ARG2_REG);
4231 if((source[i]&0x3f)==0x30) emit_call((int)c_f_d);
4232 if((source[i]&0x3f)==0x31) emit_call((int)c_un_d);
4233 if((source[i]&0x3f)==0x32) emit_call((int)c_eq_d);
4234 if((source[i]&0x3f)==0x33) emit_call((int)c_ueq_d);
4235 if((source[i]&0x3f)==0x34) emit_call((int)c_olt_d);
4236 if((source[i]&0x3f)==0x35) emit_call((int)c_ult_d);
4237 if((source[i]&0x3f)==0x36) emit_call((int)c_ole_d);
4238 if((source[i]&0x3f)==0x37) emit_call((int)c_ule_d);
4239 if((source[i]&0x3f)==0x38) emit_call((int)c_sf_d);
4240 if((source[i]&0x3f)==0x39) emit_call((int)c_ngle_d);
4241 if((source[i]&0x3f)==0x3a) emit_call((int)c_seq_d);
4242 if((source[i]&0x3f)==0x3b) emit_call((int)c_ngl_d);
4243 if((source[i]&0x3f)==0x3c) emit_call((int)c_lt_d);
4244 if((source[i]&0x3f)==0x3d) emit_call((int)c_nge_d);
4245 if((source[i]&0x3f)==0x3e) emit_call((int)c_le_d);
4246 if((source[i]&0x3f)==0x3f) emit_call((int)c_ngt_d);
4247 }
4248 restore_regs(reglist);
4249 emit_loadreg(FSREG,fs);
3d624f89 4250#else
4251 cop1_unusable(i, i_regs);
4252#endif
57871462 4253}
4254
4255void float_assemble(int i,struct regstat *i_regs)
4256{
3d624f89 4257#ifndef DISABLE_COP1
57871462 4258 signed char temp=get_reg(i_regs->regmap,-1);
4259 assert(temp>=0);
4260 // Check cop1 unusable
4261 if(!cop1_usable) {
4262 signed char cs=get_reg(i_regs->regmap,CSREG);
4263 assert(cs>=0);
4264 emit_testimm(cs,0x20000000);
4265 int jaddr=(int)out;
4266 emit_jeq(0);
4267 add_stub(FP_STUB,jaddr,(int)out,i,cs,(int)i_regs,is_delayslot,0);
4268 cop1_usable=1;
4269 }
4270
4271 #if(defined(__VFP_FP__) && !defined(__SOFTFP__))
4272 if((source[i]&0x3f)==6) // mov
4273 {
4274 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4275 if(opcode2[i]==0x10) {
4276 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4277 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],HOST_TEMPREG);
4278 emit_readword_indexed(0,temp,temp);
4279 emit_writeword_indexed(temp,0,HOST_TEMPREG);
4280 }
4281 if(opcode2[i]==0x11) {
4282 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
4283 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],HOST_TEMPREG);
4284 emit_vldr(temp,7);
4285 emit_vstr(7,HOST_TEMPREG);
4286 }
4287 }
4288 return;
4289 }
4290
4291 if((source[i]&0x3f)>3)
4292 {
4293 if(opcode2[i]==0x10) {
4294 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4295 emit_flds(temp,15);
4296 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4297 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
4298 }
4299 if((source[i]&0x3f)==4) // sqrt
4300 emit_fsqrts(15,15);
4301 if((source[i]&0x3f)==5) // abs
4302 emit_fabss(15,15);
4303 if((source[i]&0x3f)==7) // neg
4304 emit_fnegs(15,15);
4305 emit_fsts(15,temp);
4306 }
4307 if(opcode2[i]==0x11) {
4308 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
4309 emit_vldr(temp,7);
4310 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4311 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
4312 }
4313 if((source[i]&0x3f)==4) // sqrt
4314 emit_fsqrtd(7,7);
4315 if((source[i]&0x3f)==5) // abs
4316 emit_fabsd(7,7);
4317 if((source[i]&0x3f)==7) // neg
4318 emit_fnegd(7,7);
4319 emit_vstr(7,temp);
4320 }
4321 return;
4322 }
4323 if((source[i]&0x3f)<4)
4324 {
4325 if(opcode2[i]==0x10) {
4326 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4327 }
4328 if(opcode2[i]==0x11) {
4329 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
4330 }
4331 if(((source[i]>>11)&0x1f)!=((source[i]>>16)&0x1f)) {
4332 if(opcode2[i]==0x10) {
4333 emit_readword((int)&reg_cop1_simple[(source[i]>>16)&0x1f],HOST_TEMPREG);
4334 emit_flds(temp,15);
4335 emit_flds(HOST_TEMPREG,13);
4336 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4337 if(((source[i]>>16)&0x1f)!=((source[i]>>6)&0x1f)) {
4338 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
4339 }
4340 }
4341 if((source[i]&0x3f)==0) emit_fadds(15,13,15);
4342 if((source[i]&0x3f)==1) emit_fsubs(15,13,15);
4343 if((source[i]&0x3f)==2) emit_fmuls(15,13,15);
4344 if((source[i]&0x3f)==3) emit_fdivs(15,13,15);
4345 if(((source[i]>>16)&0x1f)==((source[i]>>6)&0x1f)) {
4346 emit_fsts(15,HOST_TEMPREG);
4347 }else{
4348 emit_fsts(15,temp);
4349 }
4350 }
4351 else if(opcode2[i]==0x11) {
4352 emit_readword((int)&reg_cop1_double[(source[i]>>16)&0x1f],HOST_TEMPREG);
4353 emit_vldr(temp,7);
4354 emit_vldr(HOST_TEMPREG,6);
4355 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4356 if(((source[i]>>16)&0x1f)!=((source[i]>>6)&0x1f)) {
4357 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
4358 }
4359 }
4360 if((source[i]&0x3f)==0) emit_faddd(7,6,7);
4361 if((source[i]&0x3f)==1) emit_fsubd(7,6,7);
4362 if((source[i]&0x3f)==2) emit_fmuld(7,6,7);
4363 if((source[i]&0x3f)==3) emit_fdivd(7,6,7);
4364 if(((source[i]>>16)&0x1f)==((source[i]>>6)&0x1f)) {
4365 emit_vstr(7,HOST_TEMPREG);
4366 }else{
4367 emit_vstr(7,temp);
4368 }
4369 }
4370 }
4371 else {
4372 if(opcode2[i]==0x10) {
4373 emit_flds(temp,15);
4374 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4375 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
4376 }
4377 if((source[i]&0x3f)==0) emit_fadds(15,15,15);
4378 if((source[i]&0x3f)==1) emit_fsubs(15,15,15);
4379 if((source[i]&0x3f)==2) emit_fmuls(15,15,15);
4380 if((source[i]&0x3f)==3) emit_fdivs(15,15,15);
4381 emit_fsts(15,temp);
4382 }
4383 else if(opcode2[i]==0x11) {
4384 emit_vldr(temp,7);
4385 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4386 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
4387 }
4388 if((source[i]&0x3f)==0) emit_faddd(7,7,7);
4389 if((source[i]&0x3f)==1) emit_fsubd(7,7,7);
4390 if((source[i]&0x3f)==2) emit_fmuld(7,7,7);
4391 if((source[i]&0x3f)==3) emit_fdivd(7,7,7);
4392 emit_vstr(7,temp);
4393 }
4394 }
4395 return;
4396 }
4397 #endif
4398
4399 u_int hr,reglist=0;
4400 for(hr=0;hr<HOST_REGS;hr++) {
4401 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
4402 }
4403 if(opcode2[i]==0x10) { // Single precision
4404 save_regs(reglist);
4405 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4406 if((source[i]&0x3f)<4) {
4407 emit_readword((int)&reg_cop1_simple[(source[i]>>16)&0x1f],ARG2_REG);
4408 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG3_REG);
4409 }else{
4410 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4411 }
4412 switch(source[i]&0x3f)
4413 {
4414 case 0x00: emit_call((int)add_s);break;
4415 case 0x01: emit_call((int)sub_s);break;
4416 case 0x02: emit_call((int)mul_s);break;
4417 case 0x03: emit_call((int)div_s);break;
4418 case 0x04: emit_call((int)sqrt_s);break;
4419 case 0x05: emit_call((int)abs_s);break;
4420 case 0x06: emit_call((int)mov_s);break;
4421 case 0x07: emit_call((int)neg_s);break;
4422 }
4423 restore_regs(reglist);
4424 }
4425 if(opcode2[i]==0x11) { // Double precision
4426 save_regs(reglist);
4427 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4428 if((source[i]&0x3f)<4) {
4429 emit_readword((int)&reg_cop1_double[(source[i]>>16)&0x1f],ARG2_REG);
4430 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG3_REG);
4431 }else{
4432 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4433 }
4434 switch(source[i]&0x3f)
4435 {
4436 case 0x00: emit_call((int)add_d);break;
4437 case 0x01: emit_call((int)sub_d);break;
4438 case 0x02: emit_call((int)mul_d);break;
4439 case 0x03: emit_call((int)div_d);break;
4440 case 0x04: emit_call((int)sqrt_d);break;
4441 case 0x05: emit_call((int)abs_d);break;
4442 case 0x06: emit_call((int)mov_d);break;
4443 case 0x07: emit_call((int)neg_d);break;
4444 }
4445 restore_regs(reglist);
4446 }
3d624f89 4447#else
4448 cop1_unusable(i, i_regs);
4449#endif
57871462 4450}
4451
4452void multdiv_assemble_arm(int i,struct regstat *i_regs)
4453{
4454 // case 0x18: MULT
4455 // case 0x19: MULTU
4456 // case 0x1A: DIV
4457 // case 0x1B: DIVU
4458 // case 0x1C: DMULT
4459 // case 0x1D: DMULTU
4460 // case 0x1E: DDIV
4461 // case 0x1F: DDIVU
4462 if(rs1[i]&&rs2[i])
4463 {
4464 if((opcode2[i]&4)==0) // 32-bit
4465 {
4466 if(opcode2[i]==0x18) // MULT
4467 {
4468 signed char m1=get_reg(i_regs->regmap,rs1[i]);
4469 signed char m2=get_reg(i_regs->regmap,rs2[i]);
4470 signed char hi=get_reg(i_regs->regmap,HIREG);
4471 signed char lo=get_reg(i_regs->regmap,LOREG);
4472 assert(m1>=0);
4473 assert(m2>=0);
4474 assert(hi>=0);
4475 assert(lo>=0);
4476 emit_smull(m1,m2,hi,lo);
4477 }
4478 if(opcode2[i]==0x19) // MULTU
4479 {
4480 signed char m1=get_reg(i_regs->regmap,rs1[i]);
4481 signed char m2=get_reg(i_regs->regmap,rs2[i]);
4482 signed char hi=get_reg(i_regs->regmap,HIREG);
4483 signed char lo=get_reg(i_regs->regmap,LOREG);
4484 assert(m1>=0);
4485 assert(m2>=0);
4486 assert(hi>=0);
4487 assert(lo>=0);
4488 emit_umull(m1,m2,hi,lo);
4489 }
4490 if(opcode2[i]==0x1A) // DIV
4491 {
4492 signed char d1=get_reg(i_regs->regmap,rs1[i]);
4493 signed char d2=get_reg(i_regs->regmap,rs2[i]);
4494 assert(d1>=0);
4495 assert(d2>=0);
4496 signed char quotient=get_reg(i_regs->regmap,LOREG);
4497 signed char remainder=get_reg(i_regs->regmap,HIREG);
4498 assert(quotient>=0);
4499 assert(remainder>=0);
4500 emit_movs(d1,remainder);
44a80f6a 4501 emit_movimm(0xffffffff,quotient);
4502 emit_negmi(quotient,quotient); // .. quotient and ..
4503 emit_negmi(remainder,remainder); // .. remainder for div0 case (will be negated back after jump)
57871462 4504 emit_movs(d2,HOST_TEMPREG);
4505 emit_jeq((int)out+52); // Division by zero
4506 emit_negmi(HOST_TEMPREG,HOST_TEMPREG);
4507 emit_clz(HOST_TEMPREG,quotient);
4508 emit_shl(HOST_TEMPREG,quotient,HOST_TEMPREG);
4509 emit_orimm(quotient,1<<31,quotient);
4510 emit_shr(quotient,quotient,quotient);
4511 emit_cmp(remainder,HOST_TEMPREG);
4512 emit_subcs(remainder,HOST_TEMPREG,remainder);
4513 emit_adcs(quotient,quotient,quotient);
4514 emit_shrimm(HOST_TEMPREG,1,HOST_TEMPREG);
4515 emit_jcc((int)out-16); // -4
4516 emit_teq(d1,d2);
4517 emit_negmi(quotient,quotient);
4518 emit_test(d1,d1);
4519 emit_negmi(remainder,remainder);
4520 }
4521 if(opcode2[i]==0x1B) // DIVU
4522 {
4523 signed char d1=get_reg(i_regs->regmap,rs1[i]); // dividend
4524 signed char d2=get_reg(i_regs->regmap,rs2[i]); // divisor
4525 assert(d1>=0);
4526 assert(d2>=0);
4527 signed char quotient=get_reg(i_regs->regmap,LOREG);
4528 signed char remainder=get_reg(i_regs->regmap,HIREG);
4529 assert(quotient>=0);
4530 assert(remainder>=0);
44a80f6a 4531 emit_mov(d1,remainder);
4532 emit_movimm(0xffffffff,quotient); // div0 case
57871462 4533 emit_test(d2,d2);
44a80f6a 4534 emit_jeq((int)out+40); // Division by zero
57871462 4535 emit_clz(d2,HOST_TEMPREG);
4536 emit_movimm(1<<31,quotient);
4537 emit_shl(d2,HOST_TEMPREG,d2);
57871462 4538 emit_shr(quotient,HOST_TEMPREG,quotient);
4539 emit_cmp(remainder,d2);
4540 emit_subcs(remainder,d2,remainder);
4541 emit_adcs(quotient,quotient,quotient);
4542 emit_shrcc_imm(d2,1,d2);
4543 emit_jcc((int)out-16); // -4
4544 }
4545 }
4546 else // 64-bit
4547 {
4548 if(opcode2[i]==0x1C) // DMULT
4549 {
4550 assert(opcode2[i]!=0x1C);
4551 signed char m1h=get_reg(i_regs->regmap,rs1[i]|64);
4552 signed char m1l=get_reg(i_regs->regmap,rs1[i]);
4553 signed char m2h=get_reg(i_regs->regmap,rs2[i]|64);
4554 signed char m2l=get_reg(i_regs->regmap,rs2[i]);
4555 assert(m1h>=0);
4556 assert(m2h>=0);
4557 assert(m1l>=0);
4558 assert(m2l>=0);
4559 emit_pushreg(m2h);
4560 emit_pushreg(m2l);
4561 emit_pushreg(m1h);
4562 emit_pushreg(m1l);
4563 emit_call((int)&mult64);
4564 emit_popreg(m1l);
4565 emit_popreg(m1h);
4566 emit_popreg(m2l);
4567 emit_popreg(m2h);
4568 signed char hih=get_reg(i_regs->regmap,HIREG|64);
4569 signed char hil=get_reg(i_regs->regmap,HIREG);
4570 if(hih>=0) emit_loadreg(HIREG|64,hih);
4571 if(hil>=0) emit_loadreg(HIREG,hil);
4572 signed char loh=get_reg(i_regs->regmap,LOREG|64);
4573 signed char lol=get_reg(i_regs->regmap,LOREG);
4574 if(loh>=0) emit_loadreg(LOREG|64,loh);
4575 if(lol>=0) emit_loadreg(LOREG,lol);
4576 }
4577 if(opcode2[i]==0x1D) // DMULTU
4578 {
4579 signed char m1h=get_reg(i_regs->regmap,rs1[i]|64);
4580 signed char m1l=get_reg(i_regs->regmap,rs1[i]);
4581 signed char m2h=get_reg(i_regs->regmap,rs2[i]|64);
4582 signed char m2l=get_reg(i_regs->regmap,rs2[i]);
4583 assert(m1h>=0);
4584 assert(m2h>=0);
4585 assert(m1l>=0);
4586 assert(m2l>=0);
4587 save_regs(0x100f);
4588 if(m1l!=0) emit_mov(m1l,0);
4589 if(m1h==0) emit_readword((int)&dynarec_local,1);
4590 else if(m1h>1) emit_mov(m1h,1);
4591 if(m2l<2) emit_readword((int)&dynarec_local+m2l*4,2);
4592 else if(m2l>2) emit_mov(m2l,2);
4593 if(m2h<3) emit_readword((int)&dynarec_local+m2h*4,3);
4594 else if(m2h>3) emit_mov(m2h,3);
4595 emit_call((int)&multu64);
4596 restore_regs(0x100f);
4597 signed char hih=get_reg(i_regs->regmap,HIREG|64);
4598 signed char hil=get_reg(i_regs->regmap,HIREG);
4599 signed char loh=get_reg(i_regs->regmap,LOREG|64);
4600 signed char lol=get_reg(i_regs->regmap,LOREG);
4601 /*signed char temp=get_reg(i_regs->regmap,-1);
4602 signed char rh=get_reg(i_regs->regmap,HIREG|64);
4603 signed char rl=get_reg(i_regs->regmap,HIREG);
4604 assert(m1h>=0);
4605 assert(m2h>=0);
4606 assert(m1l>=0);
4607 assert(m2l>=0);
4608 assert(temp>=0);
4609 //emit_mov(m1l,EAX);
4610 //emit_mul(m2l);
4611 emit_umull(rl,rh,m1l,m2l);
4612 emit_storereg(LOREG,rl);
4613 emit_mov(rh,temp);
4614 //emit_mov(m1h,EAX);
4615 //emit_mul(m2l);
4616 emit_umull(rl,rh,m1h,m2l);
4617 emit_adds(rl,temp,temp);
4618 emit_adcimm(rh,0,rh);
4619 emit_storereg(HIREG,rh);
4620 //emit_mov(m2h,EAX);
4621 //emit_mul(m1l);
4622 emit_umull(rl,rh,m1l,m2h);
4623 emit_adds(rl,temp,temp);
4624 emit_adcimm(rh,0,rh);
4625 emit_storereg(LOREG|64,temp);
4626 emit_mov(rh,temp);
4627 //emit_mov(m2h,EAX);
4628 //emit_mul(m1h);
4629 emit_umull(rl,rh,m1h,m2h);
4630 emit_adds(rl,temp,rl);
4631 emit_loadreg(HIREG,temp);
4632 emit_adcimm(rh,0,rh);
4633 emit_adds(rl,temp,rl);
4634 emit_adcimm(rh,0,rh);
4635 // DEBUG
4636 /*
4637 emit_pushreg(m2h);
4638 emit_pushreg(m2l);
4639 emit_pushreg(m1h);
4640 emit_pushreg(m1l);
4641 emit_call((int)&multu64);
4642 emit_popreg(m1l);
4643 emit_popreg(m1h);
4644 emit_popreg(m2l);
4645 emit_popreg(m2h);
4646 signed char hih=get_reg(i_regs->regmap,HIREG|64);
4647 signed char hil=get_reg(i_regs->regmap,HIREG);
4648 if(hih>=0) emit_loadreg(HIREG|64,hih); // DEBUG
4649 if(hil>=0) emit_loadreg(HIREG,hil); // DEBUG
4650 */
4651 // Shouldn't be necessary
4652 //char loh=get_reg(i_regs->regmap,LOREG|64);
4653 //char lol=get_reg(i_regs->regmap,LOREG);
4654 //if(loh>=0) emit_loadreg(LOREG|64,loh);
4655 //if(lol>=0) emit_loadreg(LOREG,lol);
4656 }
4657 if(opcode2[i]==0x1E) // DDIV
4658 {
4659 signed char d1h=get_reg(i_regs->regmap,rs1[i]|64);
4660 signed char d1l=get_reg(i_regs->regmap,rs1[i]);
4661 signed char d2h=get_reg(i_regs->regmap,rs2[i]|64);
4662 signed char d2l=get_reg(i_regs->regmap,rs2[i]);
4663 assert(d1h>=0);
4664 assert(d2h>=0);
4665 assert(d1l>=0);
4666 assert(d2l>=0);
4667 save_regs(0x100f);
4668 if(d1l!=0) emit_mov(d1l,0);
4669 if(d1h==0) emit_readword((int)&dynarec_local,1);
4670 else if(d1h>1) emit_mov(d1h,1);
4671 if(d2l<2) emit_readword((int)&dynarec_local+d2l*4,2);
4672 else if(d2l>2) emit_mov(d2l,2);
4673 if(d2h<3) emit_readword((int)&dynarec_local+d2h*4,3);
4674 else if(d2h>3) emit_mov(d2h,3);
4675 emit_call((int)&div64);
4676 restore_regs(0x100f);
4677 signed char hih=get_reg(i_regs->regmap,HIREG|64);
4678 signed char hil=get_reg(i_regs->regmap,HIREG);
4679 signed char loh=get_reg(i_regs->regmap,LOREG|64);
4680 signed char lol=get_reg(i_regs->regmap,LOREG);
4681 if(hih>=0) emit_loadreg(HIREG|64,hih);
4682 if(hil>=0) emit_loadreg(HIREG,hil);
4683 if(loh>=0) emit_loadreg(LOREG|64,loh);
4684 if(lol>=0) emit_loadreg(LOREG,lol);
4685 }
4686 if(opcode2[i]==0x1F) // DDIVU
4687 {
4688 //u_int hr,reglist=0;
4689 //for(hr=0;hr<HOST_REGS;hr++) {
4690 // if(i_regs->regmap[hr]>=0 && (i_regs->regmap[hr]&62)!=HIREG) reglist|=1<<hr;
4691 //}
4692 signed char d1h=get_reg(i_regs->regmap,rs1[i]|64);
4693 signed char d1l=get_reg(i_regs->regmap,rs1[i]);
4694 signed char d2h=get_reg(i_regs->regmap,rs2[i]|64);
4695 signed char d2l=get_reg(i_regs->regmap,rs2[i]);
4696 assert(d1h>=0);
4697 assert(d2h>=0);
4698 assert(d1l>=0);
4699 assert(d2l>=0);
4700 save_regs(0x100f);
4701 if(d1l!=0) emit_mov(d1l,0);
4702 if(d1h==0) emit_readword((int)&dynarec_local,1);
4703 else if(d1h>1) emit_mov(d1h,1);
4704 if(d2l<2) emit_readword((int)&dynarec_local+d2l*4,2);
4705 else if(d2l>2) emit_mov(d2l,2);
4706 if(d2h<3) emit_readword((int)&dynarec_local+d2h*4,3);
4707 else if(d2h>3) emit_mov(d2h,3);
4708 emit_call((int)&divu64);
4709 restore_regs(0x100f);
4710 signed char hih=get_reg(i_regs->regmap,HIREG|64);
4711 signed char hil=get_reg(i_regs->regmap,HIREG);
4712 signed char loh=get_reg(i_regs->regmap,LOREG|64);
4713 signed char lol=get_reg(i_regs->regmap,LOREG);
4714 if(hih>=0) emit_loadreg(HIREG|64,hih);
4715 if(hil>=0) emit_loadreg(HIREG,hil);
4716 if(loh>=0) emit_loadreg(LOREG|64,loh);
4717 if(lol>=0) emit_loadreg(LOREG,lol);
4718 }
4719 }
4720 }
4721 else
4722 {
4723 // Multiply by zero is zero.
4724 // MIPS does not have a divide by zero exception.
4725 // The result is undefined, we return zero.
4726 signed char hr=get_reg(i_regs->regmap,HIREG);
4727 signed char lr=get_reg(i_regs->regmap,LOREG);
4728 if(hr>=0) emit_zeroreg(hr);
4729 if(lr>=0) emit_zeroreg(lr);
4730 }
4731}
4732#define multdiv_assemble multdiv_assemble_arm
4733
4734void do_preload_rhash(int r) {
4735 // Don't need this for ARM. On x86, this puts the value 0xf8 into the
4736 // register. On ARM the hash can be done with a single instruction (below)
4737}
4738
4739void do_preload_rhtbl(int ht) {
4740 emit_addimm(FP,(int)&mini_ht-(int)&dynarec_local,ht);
4741}
4742
4743void do_rhash(int rs,int rh) {
4744 emit_andimm(rs,0xf8,rh);
4745}
4746
4747void do_miniht_load(int ht,int rh) {
4748 assem_debug("ldr %s,[%s,%s]!\n",regname[rh],regname[ht],regname[rh]);
4749 output_w32(0xe7b00000|rd_rn_rm(rh,ht,rh));
4750}
4751
4752void do_miniht_jump(int rs,int rh,int ht) {
4753 emit_cmp(rh,rs);
4754 emit_ldreq_indexed(ht,4,15);
4755 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
4756 emit_mov(rs,7);
4757 emit_jmp(jump_vaddr_reg[7]);
4758 #else
4759 emit_jmp(jump_vaddr_reg[rs]);
4760 #endif
4761}
4762
4763void do_miniht_insert(u_int return_address,int rt,int temp) {
4764 #ifdef ARMv5_ONLY
4765 emit_movimm(return_address,rt); // PC into link register
4766 add_to_linker((int)out,return_address,1);
4767 emit_pcreladdr(temp);
4768 emit_writeword(rt,(int)&mini_ht[(return_address&0xFF)>>3][0]);
4769 emit_writeword(temp,(int)&mini_ht[(return_address&0xFF)>>3][1]);
4770 #else
4771 emit_movw(return_address&0x0000FFFF,rt);
4772 add_to_linker((int)out,return_address,1);
4773 emit_pcreladdr(temp);
4774 emit_writeword(temp,(int)&mini_ht[(return_address&0xFF)>>3][1]);
4775 emit_movt(return_address&0xFFFF0000,rt);
4776 emit_writeword(rt,(int)&mini_ht[(return_address&0xFF)>>3][0]);
4777 #endif
4778}
4779
4780// Sign-extend to 64 bits and write out upper half of a register
4781// This is useful where we have a 32-bit value in a register, and want to
4782// keep it in a 32-bit register, but can't guarantee that it won't be read
4783// as a 64-bit value later.
4784void wb_sx(signed char pre[],signed char entry[],uint64_t dirty,uint64_t is32_pre,uint64_t is32,uint64_t u,uint64_t uu)
4785{
24385cae 4786#ifndef FORCE32
57871462 4787 if(is32_pre==is32) return;
4788 int hr,reg;
4789 for(hr=0;hr<HOST_REGS;hr++) {
4790 if(hr!=EXCLUDE_REG) {
4791 //if(pre[hr]==entry[hr]) {
4792 if((reg=pre[hr])>=0) {
4793 if((dirty>>hr)&1) {
4794 if( ((is32_pre&~is32&~uu)>>reg)&1 ) {
4795 emit_sarimm(hr,31,HOST_TEMPREG);
4796 emit_storereg(reg|64,HOST_TEMPREG);
4797 }
4798 }
4799 }
4800 //}
4801 }
4802 }
24385cae 4803#endif
57871462 4804}
4805
4806void wb_valid(signed char pre[],signed char entry[],u_int dirty_pre,u_int dirty,uint64_t is32_pre,uint64_t u,uint64_t uu)
4807{
4808 //if(dirty_pre==dirty) return;
4809 int hr,reg,new_hr;
4810 for(hr=0;hr<HOST_REGS;hr++) {
4811 if(hr!=EXCLUDE_REG) {
4812 reg=pre[hr];
4813 if(((~u)>>(reg&63))&1) {
f776eb14 4814 if(reg>0) {
57871462 4815 if(((dirty_pre&~dirty)>>hr)&1) {
4816 if(reg>0&&reg<34) {
4817 emit_storereg(reg,hr);
4818 if( ((is32_pre&~uu)>>reg)&1 ) {
4819 emit_sarimm(hr,31,HOST_TEMPREG);
4820 emit_storereg(reg|64,HOST_TEMPREG);
4821 }
4822 }
4823 else if(reg>=64) {
4824 emit_storereg(reg,hr);
4825 }
4826 }
4827 }
57871462 4828 }
4829 }
4830 }
4831}
4832
4833
4834/* using strd could possibly help but you'd have to allocate registers in pairs
4835void wb_invalidate_arm(signed char pre[],signed char entry[],uint64_t dirty,uint64_t is32,uint64_t u,uint64_t uu)
4836{
4837 int hr;
4838 int wrote=-1;
4839 for(hr=HOST_REGS-1;hr>=0;hr--) {
4840 if(hr!=EXCLUDE_REG) {
4841 if(pre[hr]!=entry[hr]) {
4842 if(pre[hr]>=0) {
4843 if((dirty>>hr)&1) {
4844 if(get_reg(entry,pre[hr])<0) {
4845 if(pre[hr]<64) {
4846 if(!((u>>pre[hr])&1)) {
4847 if(hr<10&&(~hr&1)&&(pre[hr+1]<0||wrote==hr+1)) {
4848 if( ((is32>>pre[hr])&1) && !((uu>>pre[hr])&1) ) {
4849 emit_sarimm(hr,31,hr+1);
4850 emit_strdreg(pre[hr],hr);
4851 }
4852 else
4853 emit_storereg(pre[hr],hr);
4854 }else{
4855 emit_storereg(pre[hr],hr);
4856 if( ((is32>>pre[hr])&1) && !((uu>>pre[hr])&1) ) {
4857 emit_sarimm(hr,31,hr);
4858 emit_storereg(pre[hr]|64,hr);
4859 }
4860 }
4861 }
4862 }else{
4863 if(!((uu>>(pre[hr]&63))&1) && !((is32>>(pre[hr]&63))&1)) {
4864 emit_storereg(pre[hr],hr);
4865 }
4866 }
4867 wrote=hr;
4868 }
4869 }
4870 }
4871 }
4872 }
4873 }
4874 for(hr=0;hr<HOST_REGS;hr++) {
4875 if(hr!=EXCLUDE_REG) {
4876 if(pre[hr]!=entry[hr]) {
4877 if(pre[hr]>=0) {
4878 int nr;
4879 if((nr=get_reg(entry,pre[hr]))>=0) {
4880 emit_mov(hr,nr);
4881 }
4882 }
4883 }
4884 }
4885 }
4886}
4887#define wb_invalidate wb_invalidate_arm
4888*/
4889
dd3a91a1 4890// Clearing the cache is rather slow on ARM Linux, so mark the areas
4891// that need to be cleared, and then only clear these areas once.
4892void do_clear_cache()
4893{
4894 int i,j;
4895 for (i=0;i<(1<<(TARGET_SIZE_2-17));i++)
4896 {
4897 u_int bitmap=needs_clear_cache[i];
4898 if(bitmap) {
4899 u_int start,end;
4900 for(j=0;j<32;j++)
4901 {
4902 if(bitmap&(1<<j)) {
4903 start=BASE_ADDR+i*131072+j*4096;
4904 end=start+4095;
4905 j++;
4906 while(j<32) {
4907 if(bitmap&(1<<j)) {
4908 end+=4096;
4909 j++;
4910 }else{
4911 __clear_cache((void *)start,(void *)end);
4912 break;
4913 }
4914 }
4915 }
4916 }
4917 needs_clear_cache[i]=0;
4918 }
4919 }
4920}
4921
57871462 4922// CPU-architecture-specific initialization
4923void arch_init() {
3d624f89 4924#ifndef DISABLE_COP1
57871462 4925 rounding_modes[0]=0x0<<22; // round
4926 rounding_modes[1]=0x3<<22; // trunc
4927 rounding_modes[2]=0x1<<22; // ceil
4928 rounding_modes[3]=0x2<<22; // floor
3d624f89 4929#endif
57871462 4930}
b9b61529 4931
4932// vim:shiftwidth=2:expandtab