psxcounters: avoid doing excessive updates
[pcsx_rearmed.git] / libpcsxcore / new_dynarec / assem_arm.c
... / ...
CommitLineData
1/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2 * Mupen64plus/PCSX - assem_arm.c *
3 * Copyright (C) 2009-2011 Ari64 *
4 * Copyright (C) 2010-2011 GraÅžvydas "notaz" Ignotas *
5 * *
6 * This program is free software; you can redistribute it and/or modify *
7 * it under the terms of the GNU General Public License as published by *
8 * the Free Software Foundation; either version 2 of the License, or *
9 * (at your option) any later version. *
10 * *
11 * This program is distributed in the hope that it will be useful, *
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
14 * GNU General Public License for more details. *
15 * *
16 * You should have received a copy of the GNU General Public License *
17 * along with this program; if not, write to the *
18 * Free Software Foundation, Inc., *
19 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
20 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
21
22extern int cycle_count;
23extern int last_count;
24extern int pcaddr;
25extern int pending_exception;
26extern int branch_target;
27extern uint64_t readmem_dword;
28#ifdef MUPEN64
29extern precomp_instr fake_pc;
30#endif
31extern void *dynarec_local;
32extern u_int memory_map[1048576];
33extern u_int mini_ht[32][2];
34extern u_int rounding_modes[4];
35
36void indirect_jump_indexed();
37void indirect_jump();
38void do_interrupt();
39void jump_vaddr_r0();
40void jump_vaddr_r1();
41void jump_vaddr_r2();
42void jump_vaddr_r3();
43void jump_vaddr_r4();
44void jump_vaddr_r5();
45void jump_vaddr_r6();
46void jump_vaddr_r7();
47void jump_vaddr_r8();
48void jump_vaddr_r9();
49void jump_vaddr_r10();
50void jump_vaddr_r12();
51
52const u_int jump_vaddr_reg[16] = {
53 (int)jump_vaddr_r0,
54 (int)jump_vaddr_r1,
55 (int)jump_vaddr_r2,
56 (int)jump_vaddr_r3,
57 (int)jump_vaddr_r4,
58 (int)jump_vaddr_r5,
59 (int)jump_vaddr_r6,
60 (int)jump_vaddr_r7,
61 (int)jump_vaddr_r8,
62 (int)jump_vaddr_r9,
63 (int)jump_vaddr_r10,
64 0,
65 (int)jump_vaddr_r12,
66 0,
67 0,
68 0};
69
70void invalidate_addr_r0();
71void invalidate_addr_r1();
72void invalidate_addr_r2();
73void invalidate_addr_r3();
74void invalidate_addr_r4();
75void invalidate_addr_r5();
76void invalidate_addr_r6();
77void invalidate_addr_r7();
78void invalidate_addr_r8();
79void invalidate_addr_r9();
80void invalidate_addr_r10();
81void invalidate_addr_r12();
82
83const u_int invalidate_addr_reg[16] = {
84 (int)invalidate_addr_r0,
85 (int)invalidate_addr_r1,
86 (int)invalidate_addr_r2,
87 (int)invalidate_addr_r3,
88 (int)invalidate_addr_r4,
89 (int)invalidate_addr_r5,
90 (int)invalidate_addr_r6,
91 (int)invalidate_addr_r7,
92 (int)invalidate_addr_r8,
93 (int)invalidate_addr_r9,
94 (int)invalidate_addr_r10,
95 0,
96 (int)invalidate_addr_r12,
97 0,
98 0,
99 0};
100
101#include "fpu.h"
102
103unsigned int needs_clear_cache[1<<(TARGET_SIZE_2-17)];
104
105/* Linker */
106
107void set_jump_target(int addr,u_int target)
108{
109 u_char *ptr=(u_char *)addr;
110 u_int *ptr2=(u_int *)ptr;
111 if(ptr[3]==0xe2) {
112 assert((target-(u_int)ptr2-8)<1024);
113 assert((addr&3)==0);
114 assert((target&3)==0);
115 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
116 //printf("target=%x addr=%x insn=%x\n",target,addr,*ptr2);
117 }
118 else if(ptr[3]==0x72) {
119 // generated by emit_jno_unlikely
120 if((target-(u_int)ptr2-8)<1024) {
121 assert((addr&3)==0);
122 assert((target&3)==0);
123 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
124 }
125 else if((target-(u_int)ptr2-8)<4096&&!((target-(u_int)ptr2-8)&15)) {
126 assert((addr&3)==0);
127 assert((target&3)==0);
128 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>4)|0xE00;
129 }
130 else *ptr2=(0x7A000000)|(((target-(u_int)ptr2-8)<<6)>>8);
131 }
132 else {
133 assert((ptr[3]&0x0e)==0xa);
134 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
135 }
136}
137
138// This optionally copies the instruction from the target of the branch into
139// the space before the branch. Works, but the difference in speed is
140// usually insignificant.
141void set_jump_target_fillslot(int addr,u_int target,int copy)
142{
143 u_char *ptr=(u_char *)addr;
144 u_int *ptr2=(u_int *)ptr;
145 assert(!copy||ptr2[-1]==0xe28dd000);
146 if(ptr[3]==0xe2) {
147 assert(!copy);
148 assert((target-(u_int)ptr2-8)<4096);
149 *ptr2=(*ptr2&0xFFFFF000)|(target-(u_int)ptr2-8);
150 }
151 else {
152 assert((ptr[3]&0x0e)==0xa);
153 u_int target_insn=*(u_int *)target;
154 if((target_insn&0x0e100000)==0) { // ALU, no immediate, no flags
155 copy=0;
156 }
157 if((target_insn&0x0c100000)==0x04100000) { // Load
158 copy=0;
159 }
160 if(target_insn&0x08000000) {
161 copy=0;
162 }
163 if(copy) {
164 ptr2[-1]=target_insn;
165 target+=4;
166 }
167 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
168 }
169}
170
171/* Literal pool */
172add_literal(int addr,int val)
173{
174 assert(literalcount<sizeof(literals)/sizeof(literals[0]));
175 literals[literalcount][0]=addr;
176 literals[literalcount][1]=val;
177 literalcount++;
178}
179
180void *kill_pointer(void *stub)
181{
182 int *ptr=(int *)(stub+4);
183 assert((*ptr&0x0ff00000)==0x05900000);
184 u_int offset=*ptr&0xfff;
185 int **l_ptr=(void *)ptr+offset+8;
186 int *i_ptr=*l_ptr;
187 set_jump_target((int)i_ptr,(int)stub);
188 return i_ptr;
189}
190
191// find where external branch is liked to using addr of it's stub:
192// get address that insn one after stub loads (dyna_linker arg1),
193// treat it as a pointer to branch insn,
194// return addr where that branch jumps to
195int get_pointer(void *stub)
196{
197 //printf("get_pointer(%x)\n",(int)stub);
198 int *ptr=(int *)(stub+4);
199 assert((*ptr&0x0fff0000)==0x059f0000);
200 u_int offset=*ptr&0xfff;
201 int **l_ptr=(void *)ptr+offset+8;
202 int *i_ptr=*l_ptr;
203 assert((*i_ptr&0x0f000000)==0x0a000000);
204 return (int)i_ptr+((*i_ptr<<8)>>6)+8;
205}
206
207// Find the "clean" entry point from a "dirty" entry point
208// by skipping past the call to verify_code
209u_int get_clean_addr(int addr)
210{
211 int *ptr=(int *)addr;
212 #ifdef ARMv5_ONLY
213 ptr+=4;
214 #else
215 ptr+=6;
216 #endif
217 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
218 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
219 ptr++;
220 if((*ptr&0xFF000000)==0xea000000) {
221 return (int)ptr+((*ptr<<8)>>6)+8; // follow jump
222 }
223 return (u_int)ptr;
224}
225
226int verify_dirty(int addr)
227{
228 u_int *ptr=(u_int *)addr;
229 #ifdef ARMv5_ONLY
230 // get from literal pool
231 assert((*ptr&0xFFFF0000)==0xe59f0000);
232 u_int offset=*ptr&0xfff;
233 u_int *l_ptr=(void *)ptr+offset+8;
234 u_int source=l_ptr[0];
235 u_int copy=l_ptr[1];
236 u_int len=l_ptr[2];
237 ptr+=4;
238 #else
239 // ARMv7 movw/movt
240 assert((*ptr&0xFFF00000)==0xe3000000);
241 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
242 u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
243 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
244 ptr+=6;
245 #endif
246 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
247 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
248#ifndef DISABLE_TLB
249 u_int verifier=(int)ptr+((signed int)(*ptr<<8)>>6)+8; // get target of bl
250 if(verifier==(u_int)verify_code_vm||verifier==(u_int)verify_code_ds) {
251 unsigned int page=source>>12;
252 unsigned int map_value=memory_map[page];
253 if(map_value>=0x80000000) return 0;
254 while(page<((source+len-1)>>12)) {
255 if((memory_map[++page]<<2)!=(map_value<<2)) return 0;
256 }
257 source = source+(map_value<<2);
258 }
259#endif
260 //printf("verify_dirty: %x %x %x\n",source,copy,len);
261 return !memcmp((void *)source,(void *)copy,len);
262}
263
264// This doesn't necessarily find all clean entry points, just
265// guarantees that it's not dirty
266int isclean(int addr)
267{
268 #ifdef ARMv5_ONLY
269 int *ptr=((u_int *)addr)+4;
270 #else
271 int *ptr=((u_int *)addr)+6;
272 #endif
273 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
274 if((*ptr&0xFF000000)!=0xeb000000) return 1; // bl instruction
275 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code) return 0;
276 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_vm) return 0;
277 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_ds) return 0;
278 return 1;
279}
280
281void get_bounds(int addr,u_int *start,u_int *end)
282{
283 u_int *ptr=(u_int *)addr;
284 #ifdef ARMv5_ONLY
285 // get from literal pool
286 assert((*ptr&0xFFFF0000)==0xe59f0000);
287 u_int offset=*ptr&0xfff;
288 u_int *l_ptr=(void *)ptr+offset+8;
289 u_int source=l_ptr[0];
290 //u_int copy=l_ptr[1];
291 u_int len=l_ptr[2];
292 ptr+=4;
293 #else
294 // ARMv7 movw/movt
295 assert((*ptr&0xFFF00000)==0xe3000000);
296 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
297 //u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
298 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
299 ptr+=6;
300 #endif
301 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
302 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
303#ifndef DISABLE_TLB
304 u_int verifier=(int)ptr+((signed int)(*ptr<<8)>>6)+8; // get target of bl
305 if(verifier==(u_int)verify_code_vm||verifier==(u_int)verify_code_ds) {
306 if(memory_map[source>>12]>=0x80000000) source = 0;
307 else source = source+(memory_map[source>>12]<<2);
308 }
309#endif
310 *start=source;
311 *end=source+len;
312}
313
314/* Register allocation */
315
316// Note: registers are allocated clean (unmodified state)
317// if you intend to modify the register, you must call dirty_reg().
318void alloc_reg(struct regstat *cur,int i,signed char reg)
319{
320 int r,hr;
321 int preferred_reg = (reg&7);
322 if(reg==CCREG) preferred_reg=HOST_CCREG;
323 if(reg==PTEMP||reg==FTEMP) preferred_reg=12;
324
325 // Don't allocate unused registers
326 if((cur->u>>reg)&1) return;
327
328 // see if it's already allocated
329 for(hr=0;hr<HOST_REGS;hr++)
330 {
331 if(cur->regmap[hr]==reg) return;
332 }
333
334 // Keep the same mapping if the register was already allocated in a loop
335 preferred_reg = loop_reg(i,reg,preferred_reg);
336
337 // Try to allocate the preferred register
338 if(cur->regmap[preferred_reg]==-1) {
339 cur->regmap[preferred_reg]=reg;
340 cur->dirty&=~(1<<preferred_reg);
341 cur->isconst&=~(1<<preferred_reg);
342 return;
343 }
344 r=cur->regmap[preferred_reg];
345 if(r<64&&((cur->u>>r)&1)) {
346 cur->regmap[preferred_reg]=reg;
347 cur->dirty&=~(1<<preferred_reg);
348 cur->isconst&=~(1<<preferred_reg);
349 return;
350 }
351 if(r>=64&&((cur->uu>>(r&63))&1)) {
352 cur->regmap[preferred_reg]=reg;
353 cur->dirty&=~(1<<preferred_reg);
354 cur->isconst&=~(1<<preferred_reg);
355 return;
356 }
357
358 // Clear any unneeded registers
359 // We try to keep the mapping consistent, if possible, because it
360 // makes branches easier (especially loops). So we try to allocate
361 // first (see above) before removing old mappings. If this is not
362 // possible then go ahead and clear out the registers that are no
363 // longer needed.
364 for(hr=0;hr<HOST_REGS;hr++)
365 {
366 r=cur->regmap[hr];
367 if(r>=0) {
368 if(r<64) {
369 if((cur->u>>r)&1) {cur->regmap[hr]=-1;break;}
370 }
371 else
372 {
373 if((cur->uu>>(r&63))&1) {cur->regmap[hr]=-1;break;}
374 }
375 }
376 }
377 // Try to allocate any available register, but prefer
378 // registers that have not been used recently.
379 if(i>0) {
380 for(hr=0;hr<HOST_REGS;hr++) {
381 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
382 if(regs[i-1].regmap[hr]!=rs1[i-1]&&regs[i-1].regmap[hr]!=rs2[i-1]&&regs[i-1].regmap[hr]!=rt1[i-1]&&regs[i-1].regmap[hr]!=rt2[i-1]) {
383 cur->regmap[hr]=reg;
384 cur->dirty&=~(1<<hr);
385 cur->isconst&=~(1<<hr);
386 return;
387 }
388 }
389 }
390 }
391 // Try to allocate any available register
392 for(hr=0;hr<HOST_REGS;hr++) {
393 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
394 cur->regmap[hr]=reg;
395 cur->dirty&=~(1<<hr);
396 cur->isconst&=~(1<<hr);
397 return;
398 }
399 }
400
401 // Ok, now we have to evict someone
402 // Pick a register we hopefully won't need soon
403 u_char hsn[MAXREG+1];
404 memset(hsn,10,sizeof(hsn));
405 int j;
406 lsn(hsn,i,&preferred_reg);
407 //printf("eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",cur->regmap[0],cur->regmap[1],cur->regmap[2],cur->regmap[3],cur->regmap[5],cur->regmap[6],cur->regmap[7]);
408 //printf("hsn(%x): %d %d %d %d %d %d %d\n",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
409 if(i>0) {
410 // Don't evict the cycle count at entry points, otherwise the entry
411 // stub will have to write it.
412 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
413 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
414 for(j=10;j>=3;j--)
415 {
416 // Alloc preferred register if available
417 if(hsn[r=cur->regmap[preferred_reg]&63]==j) {
418 for(hr=0;hr<HOST_REGS;hr++) {
419 // Evict both parts of a 64-bit register
420 if((cur->regmap[hr]&63)==r) {
421 cur->regmap[hr]=-1;
422 cur->dirty&=~(1<<hr);
423 cur->isconst&=~(1<<hr);
424 }
425 }
426 cur->regmap[preferred_reg]=reg;
427 return;
428 }
429 for(r=1;r<=MAXREG;r++)
430 {
431 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
432 for(hr=0;hr<HOST_REGS;hr++) {
433 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
434 if(cur->regmap[hr]==r+64) {
435 cur->regmap[hr]=reg;
436 cur->dirty&=~(1<<hr);
437 cur->isconst&=~(1<<hr);
438 return;
439 }
440 }
441 }
442 for(hr=0;hr<HOST_REGS;hr++) {
443 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
444 if(cur->regmap[hr]==r) {
445 cur->regmap[hr]=reg;
446 cur->dirty&=~(1<<hr);
447 cur->isconst&=~(1<<hr);
448 return;
449 }
450 }
451 }
452 }
453 }
454 }
455 }
456 for(j=10;j>=0;j--)
457 {
458 for(r=1;r<=MAXREG;r++)
459 {
460 if(hsn[r]==j) {
461 for(hr=0;hr<HOST_REGS;hr++) {
462 if(cur->regmap[hr]==r+64) {
463 cur->regmap[hr]=reg;
464 cur->dirty&=~(1<<hr);
465 cur->isconst&=~(1<<hr);
466 return;
467 }
468 }
469 for(hr=0;hr<HOST_REGS;hr++) {
470 if(cur->regmap[hr]==r) {
471 cur->regmap[hr]=reg;
472 cur->dirty&=~(1<<hr);
473 cur->isconst&=~(1<<hr);
474 return;
475 }
476 }
477 }
478 }
479 }
480 printf("This shouldn't happen (alloc_reg)");exit(1);
481}
482
483void alloc_reg64(struct regstat *cur,int i,signed char reg)
484{
485 int preferred_reg = 8+(reg&1);
486 int r,hr;
487
488 // allocate the lower 32 bits
489 alloc_reg(cur,i,reg);
490
491 // Don't allocate unused registers
492 if((cur->uu>>reg)&1) return;
493
494 // see if the upper half is already allocated
495 for(hr=0;hr<HOST_REGS;hr++)
496 {
497 if(cur->regmap[hr]==reg+64) return;
498 }
499
500 // Keep the same mapping if the register was already allocated in a loop
501 preferred_reg = loop_reg(i,reg,preferred_reg);
502
503 // Try to allocate the preferred register
504 if(cur->regmap[preferred_reg]==-1) {
505 cur->regmap[preferred_reg]=reg|64;
506 cur->dirty&=~(1<<preferred_reg);
507 cur->isconst&=~(1<<preferred_reg);
508 return;
509 }
510 r=cur->regmap[preferred_reg];
511 if(r<64&&((cur->u>>r)&1)) {
512 cur->regmap[preferred_reg]=reg|64;
513 cur->dirty&=~(1<<preferred_reg);
514 cur->isconst&=~(1<<preferred_reg);
515 return;
516 }
517 if(r>=64&&((cur->uu>>(r&63))&1)) {
518 cur->regmap[preferred_reg]=reg|64;
519 cur->dirty&=~(1<<preferred_reg);
520 cur->isconst&=~(1<<preferred_reg);
521 return;
522 }
523
524 // Clear any unneeded registers
525 // We try to keep the mapping consistent, if possible, because it
526 // makes branches easier (especially loops). So we try to allocate
527 // first (see above) before removing old mappings. If this is not
528 // possible then go ahead and clear out the registers that are no
529 // longer needed.
530 for(hr=HOST_REGS-1;hr>=0;hr--)
531 {
532 r=cur->regmap[hr];
533 if(r>=0) {
534 if(r<64) {
535 if((cur->u>>r)&1) {cur->regmap[hr]=-1;break;}
536 }
537 else
538 {
539 if((cur->uu>>(r&63))&1) {cur->regmap[hr]=-1;break;}
540 }
541 }
542 }
543 // Try to allocate any available register, but prefer
544 // registers that have not been used recently.
545 if(i>0) {
546 for(hr=0;hr<HOST_REGS;hr++) {
547 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
548 if(regs[i-1].regmap[hr]!=rs1[i-1]&&regs[i-1].regmap[hr]!=rs2[i-1]&&regs[i-1].regmap[hr]!=rt1[i-1]&&regs[i-1].regmap[hr]!=rt2[i-1]) {
549 cur->regmap[hr]=reg|64;
550 cur->dirty&=~(1<<hr);
551 cur->isconst&=~(1<<hr);
552 return;
553 }
554 }
555 }
556 }
557 // Try to allocate any available register
558 for(hr=0;hr<HOST_REGS;hr++) {
559 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
560 cur->regmap[hr]=reg|64;
561 cur->dirty&=~(1<<hr);
562 cur->isconst&=~(1<<hr);
563 return;
564 }
565 }
566
567 // Ok, now we have to evict someone
568 // Pick a register we hopefully won't need soon
569 u_char hsn[MAXREG+1];
570 memset(hsn,10,sizeof(hsn));
571 int j;
572 lsn(hsn,i,&preferred_reg);
573 //printf("eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",cur->regmap[0],cur->regmap[1],cur->regmap[2],cur->regmap[3],cur->regmap[5],cur->regmap[6],cur->regmap[7]);
574 //printf("hsn(%x): %d %d %d %d %d %d %d\n",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
575 if(i>0) {
576 // Don't evict the cycle count at entry points, otherwise the entry
577 // stub will have to write it.
578 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
579 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
580 for(j=10;j>=3;j--)
581 {
582 // Alloc preferred register if available
583 if(hsn[r=cur->regmap[preferred_reg]&63]==j) {
584 for(hr=0;hr<HOST_REGS;hr++) {
585 // Evict both parts of a 64-bit register
586 if((cur->regmap[hr]&63)==r) {
587 cur->regmap[hr]=-1;
588 cur->dirty&=~(1<<hr);
589 cur->isconst&=~(1<<hr);
590 }
591 }
592 cur->regmap[preferred_reg]=reg|64;
593 return;
594 }
595 for(r=1;r<=MAXREG;r++)
596 {
597 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
598 for(hr=0;hr<HOST_REGS;hr++) {
599 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
600 if(cur->regmap[hr]==r+64) {
601 cur->regmap[hr]=reg|64;
602 cur->dirty&=~(1<<hr);
603 cur->isconst&=~(1<<hr);
604 return;
605 }
606 }
607 }
608 for(hr=0;hr<HOST_REGS;hr++) {
609 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
610 if(cur->regmap[hr]==r) {
611 cur->regmap[hr]=reg|64;
612 cur->dirty&=~(1<<hr);
613 cur->isconst&=~(1<<hr);
614 return;
615 }
616 }
617 }
618 }
619 }
620 }
621 }
622 for(j=10;j>=0;j--)
623 {
624 for(r=1;r<=MAXREG;r++)
625 {
626 if(hsn[r]==j) {
627 for(hr=0;hr<HOST_REGS;hr++) {
628 if(cur->regmap[hr]==r+64) {
629 cur->regmap[hr]=reg|64;
630 cur->dirty&=~(1<<hr);
631 cur->isconst&=~(1<<hr);
632 return;
633 }
634 }
635 for(hr=0;hr<HOST_REGS;hr++) {
636 if(cur->regmap[hr]==r) {
637 cur->regmap[hr]=reg|64;
638 cur->dirty&=~(1<<hr);
639 cur->isconst&=~(1<<hr);
640 return;
641 }
642 }
643 }
644 }
645 }
646 printf("This shouldn't happen");exit(1);
647}
648
649// Allocate a temporary register. This is done without regard to
650// dirty status or whether the register we request is on the unneeded list
651// Note: This will only allocate one register, even if called multiple times
652void alloc_reg_temp(struct regstat *cur,int i,signed char reg)
653{
654 int r,hr;
655 int preferred_reg = -1;
656
657 // see if it's already allocated
658 for(hr=0;hr<HOST_REGS;hr++)
659 {
660 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==reg) return;
661 }
662
663 // Try to allocate any available register
664 for(hr=HOST_REGS-1;hr>=0;hr--) {
665 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
666 cur->regmap[hr]=reg;
667 cur->dirty&=~(1<<hr);
668 cur->isconst&=~(1<<hr);
669 return;
670 }
671 }
672
673 // Find an unneeded register
674 for(hr=HOST_REGS-1;hr>=0;hr--)
675 {
676 r=cur->regmap[hr];
677 if(r>=0) {
678 if(r<64) {
679 if((cur->u>>r)&1) {
680 if(i==0||((unneeded_reg[i-1]>>r)&1)) {
681 cur->regmap[hr]=reg;
682 cur->dirty&=~(1<<hr);
683 cur->isconst&=~(1<<hr);
684 return;
685 }
686 }
687 }
688 else
689 {
690 if((cur->uu>>(r&63))&1) {
691 if(i==0||((unneeded_reg_upper[i-1]>>(r&63))&1)) {
692 cur->regmap[hr]=reg;
693 cur->dirty&=~(1<<hr);
694 cur->isconst&=~(1<<hr);
695 return;
696 }
697 }
698 }
699 }
700 }
701
702 // Ok, now we have to evict someone
703 // Pick a register we hopefully won't need soon
704 // TODO: we might want to follow unconditional jumps here
705 // TODO: get rid of dupe code and make this into a function
706 u_char hsn[MAXREG+1];
707 memset(hsn,10,sizeof(hsn));
708 int j;
709 lsn(hsn,i,&preferred_reg);
710 //printf("hsn: %d %d %d %d %d %d %d\n",hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
711 if(i>0) {
712 // Don't evict the cycle count at entry points, otherwise the entry
713 // stub will have to write it.
714 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
715 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
716 for(j=10;j>=3;j--)
717 {
718 for(r=1;r<=MAXREG;r++)
719 {
720 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
721 for(hr=0;hr<HOST_REGS;hr++) {
722 if(hr!=HOST_CCREG||hsn[CCREG]>2) {
723 if(cur->regmap[hr]==r+64) {
724 cur->regmap[hr]=reg;
725 cur->dirty&=~(1<<hr);
726 cur->isconst&=~(1<<hr);
727 return;
728 }
729 }
730 }
731 for(hr=0;hr<HOST_REGS;hr++) {
732 if(hr!=HOST_CCREG||hsn[CCREG]>2) {
733 if(cur->regmap[hr]==r) {
734 cur->regmap[hr]=reg;
735 cur->dirty&=~(1<<hr);
736 cur->isconst&=~(1<<hr);
737 return;
738 }
739 }
740 }
741 }
742 }
743 }
744 }
745 for(j=10;j>=0;j--)
746 {
747 for(r=1;r<=MAXREG;r++)
748 {
749 if(hsn[r]==j) {
750 for(hr=0;hr<HOST_REGS;hr++) {
751 if(cur->regmap[hr]==r+64) {
752 cur->regmap[hr]=reg;
753 cur->dirty&=~(1<<hr);
754 cur->isconst&=~(1<<hr);
755 return;
756 }
757 }
758 for(hr=0;hr<HOST_REGS;hr++) {
759 if(cur->regmap[hr]==r) {
760 cur->regmap[hr]=reg;
761 cur->dirty&=~(1<<hr);
762 cur->isconst&=~(1<<hr);
763 return;
764 }
765 }
766 }
767 }
768 }
769 printf("This shouldn't happen");exit(1);
770}
771// Allocate a specific ARM register.
772void alloc_arm_reg(struct regstat *cur,int i,signed char reg,char hr)
773{
774 int n;
775 int dirty=0;
776
777 // see if it's already allocated (and dealloc it)
778 for(n=0;n<HOST_REGS;n++)
779 {
780 if(n!=EXCLUDE_REG&&cur->regmap[n]==reg) {
781 dirty=(cur->dirty>>n)&1;
782 cur->regmap[n]=-1;
783 }
784 }
785
786 cur->regmap[hr]=reg;
787 cur->dirty&=~(1<<hr);
788 cur->dirty|=dirty<<hr;
789 cur->isconst&=~(1<<hr);
790}
791
792// Alloc cycle count into dedicated register
793alloc_cc(struct regstat *cur,int i)
794{
795 alloc_arm_reg(cur,i,CCREG,HOST_CCREG);
796}
797
798/* Special alloc */
799
800
801/* Assembler */
802
803char regname[16][4] = {
804 "r0",
805 "r1",
806 "r2",
807 "r3",
808 "r4",
809 "r5",
810 "r6",
811 "r7",
812 "r8",
813 "r9",
814 "r10",
815 "fp",
816 "r12",
817 "sp",
818 "lr",
819 "pc"};
820
821void output_byte(u_char byte)
822{
823 *(out++)=byte;
824}
825void output_modrm(u_char mod,u_char rm,u_char ext)
826{
827 assert(mod<4);
828 assert(rm<8);
829 assert(ext<8);
830 u_char byte=(mod<<6)|(ext<<3)|rm;
831 *(out++)=byte;
832}
833void output_sib(u_char scale,u_char index,u_char base)
834{
835 assert(scale<4);
836 assert(index<8);
837 assert(base<8);
838 u_char byte=(scale<<6)|(index<<3)|base;
839 *(out++)=byte;
840}
841void output_w32(u_int word)
842{
843 *((u_int *)out)=word;
844 out+=4;
845}
846u_int rd_rn_rm(u_int rd, u_int rn, u_int rm)
847{
848 assert(rd<16);
849 assert(rn<16);
850 assert(rm<16);
851 return((rn<<16)|(rd<<12)|rm);
852}
853u_int rd_rn_imm_shift(u_int rd, u_int rn, u_int imm, u_int shift)
854{
855 assert(rd<16);
856 assert(rn<16);
857 assert(imm<256);
858 assert((shift&1)==0);
859 return((rn<<16)|(rd<<12)|(((32-shift)&30)<<7)|imm);
860}
861u_int genimm(u_int imm,u_int *encoded)
862{
863 *encoded=0;
864 if(imm==0) return 1;
865 int i=32;
866 while(i>0)
867 {
868 if(imm<256) {
869 *encoded=((i&30)<<7)|imm;
870 return 1;
871 }
872 imm=(imm>>2)|(imm<<30);i-=2;
873 }
874 return 0;
875}
876void genimm_checked(u_int imm,u_int *encoded)
877{
878 u_int ret=genimm(imm,encoded);
879 assert(ret);
880}
881u_int genjmp(u_int addr)
882{
883 int offset=addr-(int)out-8;
884 if(offset<-33554432||offset>=33554432) {
885 if (addr>2) {
886 printf("genjmp: out of range: %08x\n", offset);
887 exit(1);
888 }
889 return 0;
890 }
891 return ((u_int)offset>>2)&0xffffff;
892}
893
894void emit_mov(int rs,int rt)
895{
896 assem_debug("mov %s,%s\n",regname[rt],regname[rs]);
897 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs));
898}
899
900void emit_movs(int rs,int rt)
901{
902 assem_debug("movs %s,%s\n",regname[rt],regname[rs]);
903 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs));
904}
905
906void emit_add(int rs1,int rs2,int rt)
907{
908 assem_debug("add %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
909 output_w32(0xe0800000|rd_rn_rm(rt,rs1,rs2));
910}
911
912void emit_adds(int rs1,int rs2,int rt)
913{
914 assem_debug("adds %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
915 output_w32(0xe0900000|rd_rn_rm(rt,rs1,rs2));
916}
917
918void emit_adcs(int rs1,int rs2,int rt)
919{
920 assem_debug("adcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
921 output_w32(0xe0b00000|rd_rn_rm(rt,rs1,rs2));
922}
923
924void emit_sbc(int rs1,int rs2,int rt)
925{
926 assem_debug("sbc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
927 output_w32(0xe0c00000|rd_rn_rm(rt,rs1,rs2));
928}
929
930void emit_sbcs(int rs1,int rs2,int rt)
931{
932 assem_debug("sbcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
933 output_w32(0xe0d00000|rd_rn_rm(rt,rs1,rs2));
934}
935
936void emit_neg(int rs, int rt)
937{
938 assem_debug("rsb %s,%s,#0\n",regname[rt],regname[rs]);
939 output_w32(0xe2600000|rd_rn_rm(rt,rs,0));
940}
941
942void emit_negs(int rs, int rt)
943{
944 assem_debug("rsbs %s,%s,#0\n",regname[rt],regname[rs]);
945 output_w32(0xe2700000|rd_rn_rm(rt,rs,0));
946}
947
948void emit_sub(int rs1,int rs2,int rt)
949{
950 assem_debug("sub %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
951 output_w32(0xe0400000|rd_rn_rm(rt,rs1,rs2));
952}
953
954void emit_subs(int rs1,int rs2,int rt)
955{
956 assem_debug("subs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
957 output_w32(0xe0500000|rd_rn_rm(rt,rs1,rs2));
958}
959
960void emit_zeroreg(int rt)
961{
962 assem_debug("mov %s,#0\n",regname[rt]);
963 output_w32(0xe3a00000|rd_rn_rm(rt,0,0));
964}
965
966void emit_loadlp(u_int imm,u_int rt)
967{
968 add_literal((int)out,imm);
969 assem_debug("ldr %s,pc+? [=%x]\n",regname[rt],imm);
970 output_w32(0xe5900000|rd_rn_rm(rt,15,0));
971}
972void emit_movw(u_int imm,u_int rt)
973{
974 assert(imm<65536);
975 assem_debug("movw %s,#%d (0x%x)\n",regname[rt],imm,imm);
976 output_w32(0xe3000000|rd_rn_rm(rt,0,0)|(imm&0xfff)|((imm<<4)&0xf0000));
977}
978void emit_movt(u_int imm,u_int rt)
979{
980 assem_debug("movt %s,#%d (0x%x)\n",regname[rt],imm&0xffff0000,imm&0xffff0000);
981 output_w32(0xe3400000|rd_rn_rm(rt,0,0)|((imm>>16)&0xfff)|((imm>>12)&0xf0000));
982}
983void emit_movimm(u_int imm,u_int rt)
984{
985 u_int armval;
986 if(genimm(imm,&armval)) {
987 assem_debug("mov %s,#%d\n",regname[rt],imm);
988 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
989 }else if(genimm(~imm,&armval)) {
990 assem_debug("mvn %s,#%d\n",regname[rt],imm);
991 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
992 }else if(imm<65536) {
993 #ifdef ARMv5_ONLY
994 assem_debug("mov %s,#%d\n",regname[rt],imm&0xFF00);
995 output_w32(0xe3a00000|rd_rn_imm_shift(rt,0,imm>>8,8));
996 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
997 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
998 #else
999 emit_movw(imm,rt);
1000 #endif
1001 }else{
1002 #ifdef ARMv5_ONLY
1003 emit_loadlp(imm,rt);
1004 #else
1005 emit_movw(imm&0x0000FFFF,rt);
1006 emit_movt(imm&0xFFFF0000,rt);
1007 #endif
1008 }
1009}
1010void emit_pcreladdr(u_int rt)
1011{
1012 assem_debug("add %s,pc,#?\n",regname[rt]);
1013 output_w32(0xe2800000|rd_rn_rm(rt,15,0));
1014}
1015
1016void emit_loadreg(int r, int hr)
1017{
1018#ifdef FORCE32
1019 if(r&64) {
1020 printf("64bit load in 32bit mode!\n");
1021 assert(0);
1022 return;
1023 }
1024#endif
1025 if((r&63)==0)
1026 emit_zeroreg(hr);
1027 else {
1028 int addr=((int)reg)+((r&63)<<REG_SHIFT)+((r&64)>>4);
1029 if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4);
1030 if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4);
1031 if(r==CCREG) addr=(int)&cycle_count;
1032 if(r==CSREG) addr=(int)&Status;
1033 if(r==FSREG) addr=(int)&FCR31;
1034 if(r==INVCP) addr=(int)&invc_ptr;
1035 u_int offset = addr-(u_int)&dynarec_local;
1036 assert(offset<4096);
1037 assem_debug("ldr %s,fp+%d\n",regname[hr],offset);
1038 output_w32(0xe5900000|rd_rn_rm(hr,FP,0)|offset);
1039 }
1040}
1041void emit_storereg(int r, int hr)
1042{
1043#ifdef FORCE32
1044 if(r&64) {
1045 printf("64bit store in 32bit mode!\n");
1046 assert(0);
1047 return;
1048 }
1049#endif
1050 int addr=((int)reg)+((r&63)<<REG_SHIFT)+((r&64)>>4);
1051 if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4);
1052 if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4);
1053 if(r==CCREG) addr=(int)&cycle_count;
1054 if(r==FSREG) addr=(int)&FCR31;
1055 u_int offset = addr-(u_int)&dynarec_local;
1056 assert(offset<4096);
1057 assem_debug("str %s,fp+%d\n",regname[hr],offset);
1058 output_w32(0xe5800000|rd_rn_rm(hr,FP,0)|offset);
1059}
1060
1061void emit_test(int rs, int rt)
1062{
1063 assem_debug("tst %s,%s\n",regname[rs],regname[rt]);
1064 output_w32(0xe1100000|rd_rn_rm(0,rs,rt));
1065}
1066
1067void emit_testimm(int rs,int imm)
1068{
1069 u_int armval;
1070 assem_debug("tst %s,#%d\n",regname[rs],imm);
1071 genimm_checked(imm,&armval);
1072 output_w32(0xe3100000|rd_rn_rm(0,rs,0)|armval);
1073}
1074
1075void emit_testeqimm(int rs,int imm)
1076{
1077 u_int armval;
1078 assem_debug("tsteq %s,$%d\n",regname[rs],imm);
1079 genimm_checked(imm,&armval);
1080 output_w32(0x03100000|rd_rn_rm(0,rs,0)|armval);
1081}
1082
1083void emit_not(int rs,int rt)
1084{
1085 assem_debug("mvn %s,%s\n",regname[rt],regname[rs]);
1086 output_w32(0xe1e00000|rd_rn_rm(rt,0,rs));
1087}
1088
1089void emit_mvnmi(int rs,int rt)
1090{
1091 assem_debug("mvnmi %s,%s\n",regname[rt],regname[rs]);
1092 output_w32(0x41e00000|rd_rn_rm(rt,0,rs));
1093}
1094
1095void emit_and(u_int rs1,u_int rs2,u_int rt)
1096{
1097 assem_debug("and %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1098 output_w32(0xe0000000|rd_rn_rm(rt,rs1,rs2));
1099}
1100
1101void emit_or(u_int rs1,u_int rs2,u_int rt)
1102{
1103 assem_debug("orr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1104 output_w32(0xe1800000|rd_rn_rm(rt,rs1,rs2));
1105}
1106void emit_or_and_set_flags(int rs1,int rs2,int rt)
1107{
1108 assem_debug("orrs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1109 output_w32(0xe1900000|rd_rn_rm(rt,rs1,rs2));
1110}
1111
1112void emit_orrshl_imm(u_int rs,u_int imm,u_int rt)
1113{
1114 assert(rs<16);
1115 assert(rt<16);
1116 assert(imm<32);
1117 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs],imm);
1118 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|(imm<<7));
1119}
1120
1121void emit_orrshr_imm(u_int rs,u_int imm,u_int rt)
1122{
1123 assert(rs<16);
1124 assert(rt<16);
1125 assert(imm<32);
1126 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs],imm);
1127 output_w32(0xe1800020|rd_rn_rm(rt,rt,rs)|(imm<<7));
1128}
1129
1130void emit_xor(u_int rs1,u_int rs2,u_int rt)
1131{
1132 assem_debug("eor %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1133 output_w32(0xe0200000|rd_rn_rm(rt,rs1,rs2));
1134}
1135
1136void emit_addimm(u_int rs,int imm,u_int rt)
1137{
1138 assert(rs<16);
1139 assert(rt<16);
1140 if(imm!=0) {
1141 u_int armval;
1142 if(genimm(imm,&armval)) {
1143 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],imm);
1144 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
1145 }else if(genimm(-imm,&armval)) {
1146 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],imm);
1147 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
1148 }else if(imm<0) {
1149 assert(imm>-65536);
1150 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],(-imm)&0xFF00);
1151 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
1152 output_w32(0xe2400000|rd_rn_imm_shift(rt,rs,(-imm)>>8,8));
1153 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
1154 }else{
1155 assert(imm<65536);
1156 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1157 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
1158 output_w32(0xe2800000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1159 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1160 }
1161 }
1162 else if(rs!=rt) emit_mov(rs,rt);
1163}
1164
1165void emit_addimm_and_set_flags(int imm,int rt)
1166{
1167 assert(imm>-65536&&imm<65536);
1168 u_int armval;
1169 if(genimm(imm,&armval)) {
1170 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm);
1171 output_w32(0xe2900000|rd_rn_rm(rt,rt,0)|armval);
1172 }else if(genimm(-imm,&armval)) {
1173 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],imm);
1174 output_w32(0xe2500000|rd_rn_rm(rt,rt,0)|armval);
1175 }else if(imm<0) {
1176 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF00);
1177 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
1178 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)>>8,8));
1179 output_w32(0xe2500000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
1180 }else{
1181 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF00);
1182 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
1183 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm>>8,8));
1184 output_w32(0xe2900000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1185 }
1186}
1187void emit_addimm_no_flags(u_int imm,u_int rt)
1188{
1189 emit_addimm(rt,imm,rt);
1190}
1191
1192void emit_addnop(u_int r)
1193{
1194 assert(r<16);
1195 assem_debug("add %s,%s,#0 (nop)\n",regname[r],regname[r]);
1196 output_w32(0xe2800000|rd_rn_rm(r,r,0));
1197}
1198
1199void emit_adcimm(u_int rs,int imm,u_int rt)
1200{
1201 u_int armval;
1202 genimm_checked(imm,&armval);
1203 assem_debug("adc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1204 output_w32(0xe2a00000|rd_rn_rm(rt,rs,0)|armval);
1205}
1206/*void emit_sbcimm(int imm,u_int rt)
1207{
1208 u_int armval;
1209 genimm_checked(imm,&armval);
1210 assem_debug("sbc %s,%s,#%d\n",regname[rt],regname[rt],imm);
1211 output_w32(0xe2c00000|rd_rn_rm(rt,rt,0)|armval);
1212}*/
1213void emit_sbbimm(int imm,u_int rt)
1214{
1215 assem_debug("sbb $%d,%%%s\n",imm,regname[rt]);
1216 assert(rt<8);
1217 if(imm<128&&imm>=-128) {
1218 output_byte(0x83);
1219 output_modrm(3,rt,3);
1220 output_byte(imm);
1221 }
1222 else
1223 {
1224 output_byte(0x81);
1225 output_modrm(3,rt,3);
1226 output_w32(imm);
1227 }
1228}
1229void emit_rscimm(int rs,int imm,u_int rt)
1230{
1231 assert(0);
1232 u_int armval;
1233 genimm_checked(imm,&armval);
1234 assem_debug("rsc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1235 output_w32(0xe2e00000|rd_rn_rm(rt,rs,0)|armval);
1236}
1237
1238void emit_addimm64_32(int rsh,int rsl,int imm,int rth,int rtl)
1239{
1240 // TODO: if(genimm(imm,&armval)) ...
1241 // else
1242 emit_movimm(imm,HOST_TEMPREG);
1243 emit_adds(HOST_TEMPREG,rsl,rtl);
1244 emit_adcimm(rsh,0,rth);
1245}
1246
1247void emit_sbb(int rs1,int rs2)
1248{
1249 assem_debug("sbb %%%s,%%%s\n",regname[rs2],regname[rs1]);
1250 output_byte(0x19);
1251 output_modrm(3,rs1,rs2);
1252}
1253
1254void emit_andimm(int rs,int imm,int rt)
1255{
1256 u_int armval;
1257 if(imm==0) {
1258 emit_zeroreg(rt);
1259 }else if(genimm(imm,&armval)) {
1260 assem_debug("and %s,%s,#%d\n",regname[rt],regname[rs],imm);
1261 output_w32(0xe2000000|rd_rn_rm(rt,rs,0)|armval);
1262 }else if(genimm(~imm,&armval)) {
1263 assem_debug("bic %s,%s,#%d\n",regname[rt],regname[rs],imm);
1264 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|armval);
1265 }else if(imm==65535) {
1266 #ifdef ARMv5_ONLY
1267 assem_debug("bic %s,%s,#FF000000\n",regname[rt],regname[rs]);
1268 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|0x4FF);
1269 assem_debug("bic %s,%s,#00FF0000\n",regname[rt],regname[rt]);
1270 output_w32(0xe3c00000|rd_rn_rm(rt,rt,0)|0x8FF);
1271 #else
1272 assem_debug("uxth %s,%s\n",regname[rt],regname[rs]);
1273 output_w32(0xe6ff0070|rd_rn_rm(rt,0,rs));
1274 #endif
1275 }else{
1276 assert(imm>0&&imm<65535);
1277 #ifdef ARMv5_ONLY
1278 assem_debug("mov r14,#%d\n",imm&0xFF00);
1279 output_w32(0xe3a00000|rd_rn_imm_shift(HOST_TEMPREG,0,imm>>8,8));
1280 assem_debug("add r14,r14,#%d\n",imm&0xFF);
1281 output_w32(0xe2800000|rd_rn_imm_shift(HOST_TEMPREG,HOST_TEMPREG,imm&0xff,0));
1282 #else
1283 emit_movw(imm,HOST_TEMPREG);
1284 #endif
1285 assem_debug("and %s,%s,r14\n",regname[rt],regname[rs]);
1286 output_w32(0xe0000000|rd_rn_rm(rt,rs,HOST_TEMPREG));
1287 }
1288}
1289
1290void emit_orimm(int rs,int imm,int rt)
1291{
1292 u_int armval;
1293 if(imm==0) {
1294 if(rs!=rt) emit_mov(rs,rt);
1295 }else if(genimm(imm,&armval)) {
1296 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1297 output_w32(0xe3800000|rd_rn_rm(rt,rs,0)|armval);
1298 }else{
1299 assert(imm>0&&imm<65536);
1300 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1301 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
1302 output_w32(0xe3800000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1303 output_w32(0xe3800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1304 }
1305}
1306
1307void emit_xorimm(int rs,int imm,int rt)
1308{
1309 u_int armval;
1310 if(imm==0) {
1311 if(rs!=rt) emit_mov(rs,rt);
1312 }else if(genimm(imm,&armval)) {
1313 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm);
1314 output_w32(0xe2200000|rd_rn_rm(rt,rs,0)|armval);
1315 }else{
1316 assert(imm>0&&imm<65536);
1317 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1318 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
1319 output_w32(0xe2200000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1320 output_w32(0xe2200000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1321 }
1322}
1323
1324void emit_shlimm(int rs,u_int imm,int rt)
1325{
1326 assert(imm>0);
1327 assert(imm<32);
1328 //if(imm==1) ...
1329 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1330 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1331}
1332
1333void emit_lsls_imm(int rs,int imm,int rt)
1334{
1335 assert(imm>0);
1336 assert(imm<32);
1337 assem_debug("lsls %s,%s,#%d\n",regname[rt],regname[rs],imm);
1338 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1339}
1340
1341void emit_shrimm(int rs,u_int imm,int rt)
1342{
1343 assert(imm>0);
1344 assert(imm<32);
1345 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1346 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
1347}
1348
1349void emit_sarimm(int rs,u_int imm,int rt)
1350{
1351 assert(imm>0);
1352 assert(imm<32);
1353 assem_debug("asr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1354 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x40|(imm<<7));
1355}
1356
1357void emit_rorimm(int rs,u_int imm,int rt)
1358{
1359 assert(imm>0);
1360 assert(imm<32);
1361 assem_debug("ror %s,%s,#%d\n",regname[rt],regname[rs],imm);
1362 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x60|(imm<<7));
1363}
1364
1365void emit_shldimm(int rs,int rs2,u_int imm,int rt)
1366{
1367 assem_debug("shld %%%s,%%%s,%d\n",regname[rt],regname[rs2],imm);
1368 assert(imm>0);
1369 assert(imm<32);
1370 //if(imm==1) ...
1371 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1372 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1373 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs2],32-imm);
1374 output_w32(0xe1800020|rd_rn_rm(rt,rt,rs2)|((32-imm)<<7));
1375}
1376
1377void emit_shrdimm(int rs,int rs2,u_int imm,int rt)
1378{
1379 assem_debug("shrd %%%s,%%%s,%d\n",regname[rt],regname[rs2],imm);
1380 assert(imm>0);
1381 assert(imm<32);
1382 //if(imm==1) ...
1383 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1384 output_w32(0xe1a00020|rd_rn_rm(rt,0,rs)|(imm<<7));
1385 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs2],32-imm);
1386 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs2)|((32-imm)<<7));
1387}
1388
1389void emit_signextend16(int rs,int rt)
1390{
1391 #ifdef ARMv5_ONLY
1392 emit_shlimm(rs,16,rt);
1393 emit_sarimm(rt,16,rt);
1394 #else
1395 assem_debug("sxth %s,%s\n",regname[rt],regname[rs]);
1396 output_w32(0xe6bf0070|rd_rn_rm(rt,0,rs));
1397 #endif
1398}
1399
1400void emit_signextend8(int rs,int rt)
1401{
1402 #ifdef ARMv5_ONLY
1403 emit_shlimm(rs,24,rt);
1404 emit_sarimm(rt,24,rt);
1405 #else
1406 assem_debug("sxtb %s,%s\n",regname[rt],regname[rs]);
1407 output_w32(0xe6af0070|rd_rn_rm(rt,0,rs));
1408 #endif
1409}
1410
1411void emit_shl(u_int rs,u_int shift,u_int rt)
1412{
1413 assert(rs<16);
1414 assert(rt<16);
1415 assert(shift<16);
1416 //if(imm==1) ...
1417 assem_debug("lsl %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1418 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x10|(shift<<8));
1419}
1420void emit_shr(u_int rs,u_int shift,u_int rt)
1421{
1422 assert(rs<16);
1423 assert(rt<16);
1424 assert(shift<16);
1425 assem_debug("lsr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1426 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x30|(shift<<8));
1427}
1428void emit_sar(u_int rs,u_int shift,u_int rt)
1429{
1430 assert(rs<16);
1431 assert(rt<16);
1432 assert(shift<16);
1433 assem_debug("asr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1434 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x50|(shift<<8));
1435}
1436void emit_shlcl(int r)
1437{
1438 assem_debug("shl %%%s,%%cl\n",regname[r]);
1439 assert(0);
1440}
1441void emit_shrcl(int r)
1442{
1443 assem_debug("shr %%%s,%%cl\n",regname[r]);
1444 assert(0);
1445}
1446void emit_sarcl(int r)
1447{
1448 assem_debug("sar %%%s,%%cl\n",regname[r]);
1449 assert(0);
1450}
1451
1452void emit_shldcl(int r1,int r2)
1453{
1454 assem_debug("shld %%%s,%%%s,%%cl\n",regname[r1],regname[r2]);
1455 assert(0);
1456}
1457void emit_shrdcl(int r1,int r2)
1458{
1459 assem_debug("shrd %%%s,%%%s,%%cl\n",regname[r1],regname[r2]);
1460 assert(0);
1461}
1462void emit_orrshl(u_int rs,u_int shift,u_int rt)
1463{
1464 assert(rs<16);
1465 assert(rt<16);
1466 assert(shift<16);
1467 assem_debug("orr %s,%s,%s,lsl %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
1468 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x10|(shift<<8));
1469}
1470void emit_orrshr(u_int rs,u_int shift,u_int rt)
1471{
1472 assert(rs<16);
1473 assert(rt<16);
1474 assert(shift<16);
1475 assem_debug("orr %s,%s,%s,lsr %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
1476 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x30|(shift<<8));
1477}
1478
1479void emit_cmpimm(int rs,int imm)
1480{
1481 u_int armval;
1482 if(genimm(imm,&armval)) {
1483 assem_debug("cmp %s,#%d\n",regname[rs],imm);
1484 output_w32(0xe3500000|rd_rn_rm(0,rs,0)|armval);
1485 }else if(genimm(-imm,&armval)) {
1486 assem_debug("cmn %s,#%d\n",regname[rs],imm);
1487 output_w32(0xe3700000|rd_rn_rm(0,rs,0)|armval);
1488 }else if(imm>0) {
1489 assert(imm<65536);
1490 #ifdef ARMv5_ONLY
1491 emit_movimm(imm,HOST_TEMPREG);
1492 #else
1493 emit_movw(imm,HOST_TEMPREG);
1494 #endif
1495 assem_debug("cmp %s,r14\n",regname[rs]);
1496 output_w32(0xe1500000|rd_rn_rm(0,rs,HOST_TEMPREG));
1497 }else{
1498 assert(imm>-65536);
1499 #ifdef ARMv5_ONLY
1500 emit_movimm(-imm,HOST_TEMPREG);
1501 #else
1502 emit_movw(-imm,HOST_TEMPREG);
1503 #endif
1504 assem_debug("cmn %s,r14\n",regname[rs]);
1505 output_w32(0xe1700000|rd_rn_rm(0,rs,HOST_TEMPREG));
1506 }
1507}
1508
1509void emit_cmovne(u_int *addr,int rt)
1510{
1511 assem_debug("cmovne %x,%%%s",(int)addr,regname[rt]);
1512 assert(0);
1513}
1514void emit_cmovl(u_int *addr,int rt)
1515{
1516 assem_debug("cmovl %x,%%%s",(int)addr,regname[rt]);
1517 assert(0);
1518}
1519void emit_cmovs(u_int *addr,int rt)
1520{
1521 assem_debug("cmovs %x,%%%s",(int)addr,regname[rt]);
1522 assert(0);
1523}
1524void emit_cmovne_imm(int imm,int rt)
1525{
1526 assem_debug("movne %s,#%d\n",regname[rt],imm);
1527 u_int armval;
1528 genimm_checked(imm,&armval);
1529 output_w32(0x13a00000|rd_rn_rm(rt,0,0)|armval);
1530}
1531void emit_cmovl_imm(int imm,int rt)
1532{
1533 assem_debug("movlt %s,#%d\n",regname[rt],imm);
1534 u_int armval;
1535 genimm_checked(imm,&armval);
1536 output_w32(0xb3a00000|rd_rn_rm(rt,0,0)|armval);
1537}
1538void emit_cmovb_imm(int imm,int rt)
1539{
1540 assem_debug("movcc %s,#%d\n",regname[rt],imm);
1541 u_int armval;
1542 genimm_checked(imm,&armval);
1543 output_w32(0x33a00000|rd_rn_rm(rt,0,0)|armval);
1544}
1545void emit_cmovs_imm(int imm,int rt)
1546{
1547 assem_debug("movmi %s,#%d\n",regname[rt],imm);
1548 u_int armval;
1549 genimm_checked(imm,&armval);
1550 output_w32(0x43a00000|rd_rn_rm(rt,0,0)|armval);
1551}
1552void emit_cmove_reg(int rs,int rt)
1553{
1554 assem_debug("moveq %s,%s\n",regname[rt],regname[rs]);
1555 output_w32(0x01a00000|rd_rn_rm(rt,0,rs));
1556}
1557void emit_cmovne_reg(int rs,int rt)
1558{
1559 assem_debug("movne %s,%s\n",regname[rt],regname[rs]);
1560 output_w32(0x11a00000|rd_rn_rm(rt,0,rs));
1561}
1562void emit_cmovl_reg(int rs,int rt)
1563{
1564 assem_debug("movlt %s,%s\n",regname[rt],regname[rs]);
1565 output_w32(0xb1a00000|rd_rn_rm(rt,0,rs));
1566}
1567void emit_cmovs_reg(int rs,int rt)
1568{
1569 assem_debug("movmi %s,%s\n",regname[rt],regname[rs]);
1570 output_w32(0x41a00000|rd_rn_rm(rt,0,rs));
1571}
1572
1573void emit_slti32(int rs,int imm,int rt)
1574{
1575 if(rs!=rt) emit_zeroreg(rt);
1576 emit_cmpimm(rs,imm);
1577 if(rs==rt) emit_movimm(0,rt);
1578 emit_cmovl_imm(1,rt);
1579}
1580void emit_sltiu32(int rs,int imm,int rt)
1581{
1582 if(rs!=rt) emit_zeroreg(rt);
1583 emit_cmpimm(rs,imm);
1584 if(rs==rt) emit_movimm(0,rt);
1585 emit_cmovb_imm(1,rt);
1586}
1587void emit_slti64_32(int rsh,int rsl,int imm,int rt)
1588{
1589 assert(rsh!=rt);
1590 emit_slti32(rsl,imm,rt);
1591 if(imm>=0)
1592 {
1593 emit_test(rsh,rsh);
1594 emit_cmovne_imm(0,rt);
1595 emit_cmovs_imm(1,rt);
1596 }
1597 else
1598 {
1599 emit_cmpimm(rsh,-1);
1600 emit_cmovne_imm(0,rt);
1601 emit_cmovl_imm(1,rt);
1602 }
1603}
1604void emit_sltiu64_32(int rsh,int rsl,int imm,int rt)
1605{
1606 assert(rsh!=rt);
1607 emit_sltiu32(rsl,imm,rt);
1608 if(imm>=0)
1609 {
1610 emit_test(rsh,rsh);
1611 emit_cmovne_imm(0,rt);
1612 }
1613 else
1614 {
1615 emit_cmpimm(rsh,-1);
1616 emit_cmovne_imm(1,rt);
1617 }
1618}
1619
1620void emit_cmp(int rs,int rt)
1621{
1622 assem_debug("cmp %s,%s\n",regname[rs],regname[rt]);
1623 output_w32(0xe1500000|rd_rn_rm(0,rs,rt));
1624}
1625void emit_set_gz32(int rs, int rt)
1626{
1627 //assem_debug("set_gz32\n");
1628 emit_cmpimm(rs,1);
1629 emit_movimm(1,rt);
1630 emit_cmovl_imm(0,rt);
1631}
1632void emit_set_nz32(int rs, int rt)
1633{
1634 //assem_debug("set_nz32\n");
1635 if(rs!=rt) emit_movs(rs,rt);
1636 else emit_test(rs,rs);
1637 emit_cmovne_imm(1,rt);
1638}
1639void emit_set_gz64_32(int rsh, int rsl, int rt)
1640{
1641 //assem_debug("set_gz64\n");
1642 emit_set_gz32(rsl,rt);
1643 emit_test(rsh,rsh);
1644 emit_cmovne_imm(1,rt);
1645 emit_cmovs_imm(0,rt);
1646}
1647void emit_set_nz64_32(int rsh, int rsl, int rt)
1648{
1649 //assem_debug("set_nz64\n");
1650 emit_or_and_set_flags(rsh,rsl,rt);
1651 emit_cmovne_imm(1,rt);
1652}
1653void emit_set_if_less32(int rs1, int rs2, int rt)
1654{
1655 //assem_debug("set if less (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1656 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1657 emit_cmp(rs1,rs2);
1658 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1659 emit_cmovl_imm(1,rt);
1660}
1661void emit_set_if_carry32(int rs1, int rs2, int rt)
1662{
1663 //assem_debug("set if carry (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1664 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1665 emit_cmp(rs1,rs2);
1666 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1667 emit_cmovb_imm(1,rt);
1668}
1669void emit_set_if_less64_32(int u1, int l1, int u2, int l2, int rt)
1670{
1671 //assem_debug("set if less64 (%%%s,%%%s,%%%s,%%%s),%%%s\n",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]);
1672 assert(u1!=rt);
1673 assert(u2!=rt);
1674 emit_cmp(l1,l2);
1675 emit_movimm(0,rt);
1676 emit_sbcs(u1,u2,HOST_TEMPREG);
1677 emit_cmovl_imm(1,rt);
1678}
1679void emit_set_if_carry64_32(int u1, int l1, int u2, int l2, int rt)
1680{
1681 //assem_debug("set if carry64 (%%%s,%%%s,%%%s,%%%s),%%%s\n",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]);
1682 assert(u1!=rt);
1683 assert(u2!=rt);
1684 emit_cmp(l1,l2);
1685 emit_movimm(0,rt);
1686 emit_sbcs(u1,u2,HOST_TEMPREG);
1687 emit_cmovb_imm(1,rt);
1688}
1689
1690void emit_call(int a)
1691{
1692 assem_debug("bl %x (%x+%x)\n",a,(int)out,a-(int)out-8);
1693 u_int offset=genjmp(a);
1694 output_w32(0xeb000000|offset);
1695}
1696void emit_jmp(int a)
1697{
1698 assem_debug("b %x (%x+%x)\n",a,(int)out,a-(int)out-8);
1699 u_int offset=genjmp(a);
1700 output_w32(0xea000000|offset);
1701}
1702void emit_jne(int a)
1703{
1704 assem_debug("bne %x\n",a);
1705 u_int offset=genjmp(a);
1706 output_w32(0x1a000000|offset);
1707}
1708void emit_jeq(int a)
1709{
1710 assem_debug("beq %x\n",a);
1711 u_int offset=genjmp(a);
1712 output_w32(0x0a000000|offset);
1713}
1714void emit_js(int a)
1715{
1716 assem_debug("bmi %x\n",a);
1717 u_int offset=genjmp(a);
1718 output_w32(0x4a000000|offset);
1719}
1720void emit_jns(int a)
1721{
1722 assem_debug("bpl %x\n",a);
1723 u_int offset=genjmp(a);
1724 output_w32(0x5a000000|offset);
1725}
1726void emit_jl(int a)
1727{
1728 assem_debug("blt %x\n",a);
1729 u_int offset=genjmp(a);
1730 output_w32(0xba000000|offset);
1731}
1732void emit_jge(int a)
1733{
1734 assem_debug("bge %x\n",a);
1735 u_int offset=genjmp(a);
1736 output_w32(0xaa000000|offset);
1737}
1738void emit_jno(int a)
1739{
1740 assem_debug("bvc %x\n",a);
1741 u_int offset=genjmp(a);
1742 output_w32(0x7a000000|offset);
1743}
1744void emit_jc(int a)
1745{
1746 assem_debug("bcs %x\n",a);
1747 u_int offset=genjmp(a);
1748 output_w32(0x2a000000|offset);
1749}
1750void emit_jcc(int a)
1751{
1752 assem_debug("bcc %x\n",a);
1753 u_int offset=genjmp(a);
1754 output_w32(0x3a000000|offset);
1755}
1756
1757void emit_pushimm(int imm)
1758{
1759 assem_debug("push $%x\n",imm);
1760 assert(0);
1761}
1762void emit_pusha()
1763{
1764 assem_debug("pusha\n");
1765 assert(0);
1766}
1767void emit_popa()
1768{
1769 assem_debug("popa\n");
1770 assert(0);
1771}
1772void emit_pushreg(u_int r)
1773{
1774 assem_debug("push %%%s\n",regname[r]);
1775 assert(0);
1776}
1777void emit_popreg(u_int r)
1778{
1779 assem_debug("pop %%%s\n",regname[r]);
1780 assert(0);
1781}
1782void emit_callreg(u_int r)
1783{
1784 assert(r<15);
1785 assem_debug("blx %s\n",regname[r]);
1786 output_w32(0xe12fff30|r);
1787}
1788void emit_jmpreg(u_int r)
1789{
1790 assem_debug("mov pc,%s\n",regname[r]);
1791 output_w32(0xe1a00000|rd_rn_rm(15,0,r));
1792}
1793
1794void emit_readword_indexed(int offset, int rs, int rt)
1795{
1796 assert(offset>-4096&&offset<4096);
1797 assem_debug("ldr %s,%s+%d\n",regname[rt],regname[rs],offset);
1798 if(offset>=0) {
1799 output_w32(0xe5900000|rd_rn_rm(rt,rs,0)|offset);
1800 }else{
1801 output_w32(0xe5100000|rd_rn_rm(rt,rs,0)|(-offset));
1802 }
1803}
1804void emit_readword_dualindexedx4(int rs1, int rs2, int rt)
1805{
1806 assem_debug("ldr %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1807 output_w32(0xe7900000|rd_rn_rm(rt,rs1,rs2)|0x100);
1808}
1809void emit_ldrcc_dualindexed(int rs1, int rs2, int rt)
1810{
1811 assem_debug("ldrcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1812 output_w32(0x37900000|rd_rn_rm(rt,rs1,rs2));
1813}
1814void emit_ldrccb_dualindexed(int rs1, int rs2, int rt)
1815{
1816 assem_debug("ldrccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1817 output_w32(0x37d00000|rd_rn_rm(rt,rs1,rs2));
1818}
1819void emit_ldrccsb_dualindexed(int rs1, int rs2, int rt)
1820{
1821 assem_debug("ldrccsb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1822 output_w32(0x319000d0|rd_rn_rm(rt,rs1,rs2));
1823}
1824void emit_ldrcch_dualindexed(int rs1, int rs2, int rt)
1825{
1826 assem_debug("ldrcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1827 output_w32(0x319000b0|rd_rn_rm(rt,rs1,rs2));
1828}
1829void emit_ldrccsh_dualindexed(int rs1, int rs2, int rt)
1830{
1831 assem_debug("ldrccsh %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1832 output_w32(0x319000f0|rd_rn_rm(rt,rs1,rs2));
1833}
1834void emit_readword_indexed_tlb(int addr, int rs, int map, int rt)
1835{
1836 if(map<0) emit_readword_indexed(addr, rs, rt);
1837 else {
1838 assert(addr==0);
1839 emit_readword_dualindexedx4(rs, map, rt);
1840 }
1841}
1842void emit_readdword_indexed_tlb(int addr, int rs, int map, int rh, int rl)
1843{
1844 if(map<0) {
1845 if(rh>=0) emit_readword_indexed(addr, rs, rh);
1846 emit_readword_indexed(addr+4, rs, rl);
1847 }else{
1848 assert(rh!=rs);
1849 if(rh>=0) emit_readword_indexed_tlb(addr, rs, map, rh);
1850 emit_addimm(map,1,map);
1851 emit_readword_indexed_tlb(addr, rs, map, rl);
1852 }
1853}
1854void emit_movsbl_indexed(int offset, int rs, int rt)
1855{
1856 assert(offset>-256&&offset<256);
1857 assem_debug("ldrsb %s,%s+%d\n",regname[rt],regname[rs],offset);
1858 if(offset>=0) {
1859 output_w32(0xe1d000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1860 }else{
1861 output_w32(0xe15000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1862 }
1863}
1864void emit_movsbl_indexed_tlb(int addr, int rs, int map, int rt)
1865{
1866 if(map<0) emit_movsbl_indexed(addr, rs, rt);
1867 else {
1868 if(addr==0) {
1869 emit_shlimm(map,2,map);
1870 assem_debug("ldrsb %s,%s+%s\n",regname[rt],regname[rs],regname[map]);
1871 output_w32(0xe19000d0|rd_rn_rm(rt,rs,map));
1872 }else{
1873 assert(addr>-256&&addr<256);
1874 assem_debug("add %s,%s,%s,lsl #2\n",regname[rt],regname[rs],regname[map]);
1875 output_w32(0xe0800000|rd_rn_rm(rt,rs,map)|(2<<7));
1876 emit_movsbl_indexed(addr, rt, rt);
1877 }
1878 }
1879}
1880void emit_movswl_indexed(int offset, int rs, int rt)
1881{
1882 assert(offset>-256&&offset<256);
1883 assem_debug("ldrsh %s,%s+%d\n",regname[rt],regname[rs],offset);
1884 if(offset>=0) {
1885 output_w32(0xe1d000f0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1886 }else{
1887 output_w32(0xe15000f0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1888 }
1889}
1890void emit_movzbl_indexed(int offset, int rs, int rt)
1891{
1892 assert(offset>-4096&&offset<4096);
1893 assem_debug("ldrb %s,%s+%d\n",regname[rt],regname[rs],offset);
1894 if(offset>=0) {
1895 output_w32(0xe5d00000|rd_rn_rm(rt,rs,0)|offset);
1896 }else{
1897 output_w32(0xe5500000|rd_rn_rm(rt,rs,0)|(-offset));
1898 }
1899}
1900void emit_movzbl_dualindexedx4(int rs1, int rs2, int rt)
1901{
1902 assem_debug("ldrb %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1903 output_w32(0xe7d00000|rd_rn_rm(rt,rs1,rs2)|0x100);
1904}
1905void emit_movzbl_indexed_tlb(int addr, int rs, int map, int rt)
1906{
1907 if(map<0) emit_movzbl_indexed(addr, rs, rt);
1908 else {
1909 if(addr==0) {
1910 emit_movzbl_dualindexedx4(rs, map, rt);
1911 }else{
1912 emit_addimm(rs,addr,rt);
1913 emit_movzbl_dualindexedx4(rt, map, rt);
1914 }
1915 }
1916}
1917void emit_movzwl_indexed(int offset, int rs, int rt)
1918{
1919 assert(offset>-256&&offset<256);
1920 assem_debug("ldrh %s,%s+%d\n",regname[rt],regname[rs],offset);
1921 if(offset>=0) {
1922 output_w32(0xe1d000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1923 }else{
1924 output_w32(0xe15000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1925 }
1926}
1927void emit_readword(int addr, int rt)
1928{
1929 u_int offset = addr-(u_int)&dynarec_local;
1930 assert(offset<4096);
1931 assem_debug("ldr %s,fp+%d\n",regname[rt],offset);
1932 output_w32(0xe5900000|rd_rn_rm(rt,FP,0)|offset);
1933}
1934void emit_movsbl(int addr, int rt)
1935{
1936 u_int offset = addr-(u_int)&dynarec_local;
1937 assert(offset<256);
1938 assem_debug("ldrsb %s,fp+%d\n",regname[rt],offset);
1939 output_w32(0xe1d000d0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1940}
1941void emit_movswl(int addr, int rt)
1942{
1943 u_int offset = addr-(u_int)&dynarec_local;
1944 assert(offset<256);
1945 assem_debug("ldrsh %s,fp+%d\n",regname[rt],offset);
1946 output_w32(0xe1d000f0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1947}
1948void emit_movzbl(int addr, int rt)
1949{
1950 u_int offset = addr-(u_int)&dynarec_local;
1951 assert(offset<4096);
1952 assem_debug("ldrb %s,fp+%d\n",regname[rt],offset);
1953 output_w32(0xe5d00000|rd_rn_rm(rt,FP,0)|offset);
1954}
1955void emit_movzwl(int addr, int rt)
1956{
1957 u_int offset = addr-(u_int)&dynarec_local;
1958 assert(offset<256);
1959 assem_debug("ldrh %s,fp+%d\n",regname[rt],offset);
1960 output_w32(0xe1d000b0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1961}
1962void emit_movzwl_reg(int rs, int rt)
1963{
1964 assem_debug("movzwl %%%s,%%%s\n",regname[rs]+1,regname[rt]);
1965 assert(0);
1966}
1967
1968void emit_xchg(int rs, int rt)
1969{
1970 assem_debug("xchg %%%s,%%%s\n",regname[rs],regname[rt]);
1971 assert(0);
1972}
1973void emit_writeword_indexed(int rt, int offset, int rs)
1974{
1975 assert(offset>-4096&&offset<4096);
1976 assem_debug("str %s,%s+%d\n",regname[rt],regname[rs],offset);
1977 if(offset>=0) {
1978 output_w32(0xe5800000|rd_rn_rm(rt,rs,0)|offset);
1979 }else{
1980 output_w32(0xe5000000|rd_rn_rm(rt,rs,0)|(-offset));
1981 }
1982}
1983void emit_writeword_dualindexedx4(int rt, int rs1, int rs2)
1984{
1985 assem_debug("str %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1986 output_w32(0xe7800000|rd_rn_rm(rt,rs1,rs2)|0x100);
1987}
1988void emit_writeword_indexed_tlb(int rt, int addr, int rs, int map, int temp)
1989{
1990 if(map<0) emit_writeword_indexed(rt, addr, rs);
1991 else {
1992 assert(addr==0);
1993 emit_writeword_dualindexedx4(rt, rs, map);
1994 }
1995}
1996void emit_writedword_indexed_tlb(int rh, int rl, int addr, int rs, int map, int temp)
1997{
1998 if(map<0) {
1999 if(rh>=0) emit_writeword_indexed(rh, addr, rs);
2000 emit_writeword_indexed(rl, addr+4, rs);
2001 }else{
2002 assert(rh>=0);
2003 if(temp!=rs) emit_addimm(map,1,temp);
2004 emit_writeword_indexed_tlb(rh, addr, rs, map, temp);
2005 if(temp!=rs) emit_writeword_indexed_tlb(rl, addr, rs, temp, temp);
2006 else {
2007 emit_addimm(rs,4,rs);
2008 emit_writeword_indexed_tlb(rl, addr, rs, map, temp);
2009 }
2010 }
2011}
2012void emit_writehword_indexed(int rt, int offset, int rs)
2013{
2014 assert(offset>-256&&offset<256);
2015 assem_debug("strh %s,%s+%d\n",regname[rt],regname[rs],offset);
2016 if(offset>=0) {
2017 output_w32(0xe1c000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
2018 }else{
2019 output_w32(0xe14000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
2020 }
2021}
2022void emit_writebyte_indexed(int rt, int offset, int rs)
2023{
2024 assert(offset>-4096&&offset<4096);
2025 assem_debug("strb %s,%s+%d\n",regname[rt],regname[rs],offset);
2026 if(offset>=0) {
2027 output_w32(0xe5c00000|rd_rn_rm(rt,rs,0)|offset);
2028 }else{
2029 output_w32(0xe5400000|rd_rn_rm(rt,rs,0)|(-offset));
2030 }
2031}
2032void emit_writebyte_dualindexedx4(int rt, int rs1, int rs2)
2033{
2034 assem_debug("strb %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
2035 output_w32(0xe7c00000|rd_rn_rm(rt,rs1,rs2)|0x100);
2036}
2037void emit_writebyte_indexed_tlb(int rt, int addr, int rs, int map, int temp)
2038{
2039 if(map<0) emit_writebyte_indexed(rt, addr, rs);
2040 else {
2041 if(addr==0) {
2042 emit_writebyte_dualindexedx4(rt, rs, map);
2043 }else{
2044 emit_addimm(rs,addr,temp);
2045 emit_writebyte_dualindexedx4(rt, temp, map);
2046 }
2047 }
2048}
2049void emit_strcc_dualindexed(int rs1, int rs2, int rt)
2050{
2051 assem_debug("strcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2052 output_w32(0x37800000|rd_rn_rm(rt,rs1,rs2));
2053}
2054void emit_strccb_dualindexed(int rs1, int rs2, int rt)
2055{
2056 assem_debug("strccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2057 output_w32(0x37c00000|rd_rn_rm(rt,rs1,rs2));
2058}
2059void emit_strcch_dualindexed(int rs1, int rs2, int rt)
2060{
2061 assem_debug("strcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2062 output_w32(0x318000b0|rd_rn_rm(rt,rs1,rs2));
2063}
2064void emit_writeword(int rt, int addr)
2065{
2066 u_int offset = addr-(u_int)&dynarec_local;
2067 assert(offset<4096);
2068 assem_debug("str %s,fp+%d\n",regname[rt],offset);
2069 output_w32(0xe5800000|rd_rn_rm(rt,FP,0)|offset);
2070}
2071void emit_writehword(int rt, int addr)
2072{
2073 u_int offset = addr-(u_int)&dynarec_local;
2074 assert(offset<256);
2075 assem_debug("strh %s,fp+%d\n",regname[rt],offset);
2076 output_w32(0xe1c000b0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
2077}
2078void emit_writebyte(int rt, int addr)
2079{
2080 u_int offset = addr-(u_int)&dynarec_local;
2081 assert(offset<4096);
2082 assem_debug("strb %s,fp+%d\n",regname[rt],offset);
2083 output_w32(0xe5c00000|rd_rn_rm(rt,FP,0)|offset);
2084}
2085void emit_writeword_imm(int imm, int addr)
2086{
2087 assem_debug("movl $%x,%x\n",imm,addr);
2088 assert(0);
2089}
2090void emit_writebyte_imm(int imm, int addr)
2091{
2092 assem_debug("movb $%x,%x\n",imm,addr);
2093 assert(0);
2094}
2095
2096void emit_mul(int rs)
2097{
2098 assem_debug("mul %%%s\n",regname[rs]);
2099 assert(0);
2100}
2101void emit_imul(int rs)
2102{
2103 assem_debug("imul %%%s\n",regname[rs]);
2104 assert(0);
2105}
2106void emit_umull(u_int rs1, u_int rs2, u_int hi, u_int lo)
2107{
2108 assem_debug("umull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
2109 assert(rs1<16);
2110 assert(rs2<16);
2111 assert(hi<16);
2112 assert(lo<16);
2113 output_w32(0xe0800090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
2114}
2115void emit_smull(u_int rs1, u_int rs2, u_int hi, u_int lo)
2116{
2117 assem_debug("smull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
2118 assert(rs1<16);
2119 assert(rs2<16);
2120 assert(hi<16);
2121 assert(lo<16);
2122 output_w32(0xe0c00090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
2123}
2124
2125void emit_div(int rs)
2126{
2127 assem_debug("div %%%s\n",regname[rs]);
2128 assert(0);
2129}
2130void emit_idiv(int rs)
2131{
2132 assem_debug("idiv %%%s\n",regname[rs]);
2133 assert(0);
2134}
2135void emit_cdq()
2136{
2137 assem_debug("cdq\n");
2138 assert(0);
2139}
2140
2141void emit_clz(int rs,int rt)
2142{
2143 assem_debug("clz %s,%s\n",regname[rt],regname[rs]);
2144 output_w32(0xe16f0f10|rd_rn_rm(rt,0,rs));
2145}
2146
2147void emit_subcs(int rs1,int rs2,int rt)
2148{
2149 assem_debug("subcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2150 output_w32(0x20400000|rd_rn_rm(rt,rs1,rs2));
2151}
2152
2153void emit_shrcc_imm(int rs,u_int imm,int rt)
2154{
2155 assert(imm>0);
2156 assert(imm<32);
2157 assem_debug("lsrcc %s,%s,#%d\n",regname[rt],regname[rs],imm);
2158 output_w32(0x31a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
2159}
2160
2161void emit_negmi(int rs, int rt)
2162{
2163 assem_debug("rsbmi %s,%s,#0\n",regname[rt],regname[rs]);
2164 output_w32(0x42600000|rd_rn_rm(rt,rs,0));
2165}
2166
2167void emit_negsmi(int rs, int rt)
2168{
2169 assem_debug("rsbsmi %s,%s,#0\n",regname[rt],regname[rs]);
2170 output_w32(0x42700000|rd_rn_rm(rt,rs,0));
2171}
2172
2173void emit_orreq(u_int rs1,u_int rs2,u_int rt)
2174{
2175 assem_debug("orreq %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2176 output_w32(0x01800000|rd_rn_rm(rt,rs1,rs2));
2177}
2178
2179void emit_orrne(u_int rs1,u_int rs2,u_int rt)
2180{
2181 assem_debug("orrne %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2182 output_w32(0x11800000|rd_rn_rm(rt,rs1,rs2));
2183}
2184
2185void emit_bic_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2186{
2187 assem_debug("bic %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2188 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2189}
2190
2191void emit_biceq_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2192{
2193 assem_debug("biceq %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2194 output_w32(0x01C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2195}
2196
2197void emit_bicne_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2198{
2199 assem_debug("bicne %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2200 output_w32(0x11C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2201}
2202
2203void emit_bic_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2204{
2205 assem_debug("bic %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2206 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2207}
2208
2209void emit_biceq_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2210{
2211 assem_debug("biceq %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2212 output_w32(0x01C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2213}
2214
2215void emit_bicne_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2216{
2217 assem_debug("bicne %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2218 output_w32(0x11C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2219}
2220
2221void emit_teq(int rs, int rt)
2222{
2223 assem_debug("teq %s,%s\n",regname[rs],regname[rt]);
2224 output_w32(0xe1300000|rd_rn_rm(0,rs,rt));
2225}
2226
2227void emit_rsbimm(int rs, int imm, int rt)
2228{
2229 u_int armval;
2230 genimm_checked(imm,&armval);
2231 assem_debug("rsb %s,%s,#%d\n",regname[rt],regname[rs],imm);
2232 output_w32(0xe2600000|rd_rn_rm(rt,rs,0)|armval);
2233}
2234
2235// Load 2 immediates optimizing for small code size
2236void emit_mov2imm_compact(int imm1,u_int rt1,int imm2,u_int rt2)
2237{
2238 emit_movimm(imm1,rt1);
2239 u_int armval;
2240 if(genimm(imm2-imm1,&armval)) {
2241 assem_debug("add %s,%s,#%d\n",regname[rt2],regname[rt1],imm2-imm1);
2242 output_w32(0xe2800000|rd_rn_rm(rt2,rt1,0)|armval);
2243 }else if(genimm(imm1-imm2,&armval)) {
2244 assem_debug("sub %s,%s,#%d\n",regname[rt2],regname[rt1],imm1-imm2);
2245 output_w32(0xe2400000|rd_rn_rm(rt2,rt1,0)|armval);
2246 }
2247 else emit_movimm(imm2,rt2);
2248}
2249
2250// Conditionally select one of two immediates, optimizing for small code size
2251// This will only be called if HAVE_CMOV_IMM is defined
2252void emit_cmov2imm_e_ne_compact(int imm1,int imm2,u_int rt)
2253{
2254 u_int armval;
2255 if(genimm(imm2-imm1,&armval)) {
2256 emit_movimm(imm1,rt);
2257 assem_debug("addne %s,%s,#%d\n",regname[rt],regname[rt],imm2-imm1);
2258 output_w32(0x12800000|rd_rn_rm(rt,rt,0)|armval);
2259 }else if(genimm(imm1-imm2,&armval)) {
2260 emit_movimm(imm1,rt);
2261 assem_debug("subne %s,%s,#%d\n",regname[rt],regname[rt],imm1-imm2);
2262 output_w32(0x12400000|rd_rn_rm(rt,rt,0)|armval);
2263 }
2264 else {
2265 #ifdef ARMv5_ONLY
2266 emit_movimm(imm1,rt);
2267 add_literal((int)out,imm2);
2268 assem_debug("ldrne %s,pc+? [=%x]\n",regname[rt],imm2);
2269 output_w32(0x15900000|rd_rn_rm(rt,15,0));
2270 #else
2271 emit_movw(imm1&0x0000FFFF,rt);
2272 if((imm1&0xFFFF)!=(imm2&0xFFFF)) {
2273 assem_debug("movwne %s,#%d (0x%x)\n",regname[rt],imm2&0xFFFF,imm2&0xFFFF);
2274 output_w32(0x13000000|rd_rn_rm(rt,0,0)|(imm2&0xfff)|((imm2<<4)&0xf0000));
2275 }
2276 emit_movt(imm1&0xFFFF0000,rt);
2277 if((imm1&0xFFFF0000)!=(imm2&0xFFFF0000)) {
2278 assem_debug("movtne %s,#%d (0x%x)\n",regname[rt],imm2&0xffff0000,imm2&0xffff0000);
2279 output_w32(0x13400000|rd_rn_rm(rt,0,0)|((imm2>>16)&0xfff)|((imm2>>12)&0xf0000));
2280 }
2281 #endif
2282 }
2283}
2284
2285// special case for checking invalid_code
2286void emit_cmpmem_indexedsr12_imm(int addr,int r,int imm)
2287{
2288 assert(0);
2289}
2290
2291// special case for checking invalid_code
2292void emit_cmpmem_indexedsr12_reg(int base,int r,int imm)
2293{
2294 assert(imm<128&&imm>=0);
2295 assert(r>=0&&r<16);
2296 assem_debug("ldrb lr,%s,%s lsr #12\n",regname[base],regname[r]);
2297 output_w32(0xe7d00000|rd_rn_rm(HOST_TEMPREG,base,r)|0x620);
2298 emit_cmpimm(HOST_TEMPREG,imm);
2299}
2300
2301// special case for tlb mapping
2302void emit_addsr12(int rs1,int rs2,int rt)
2303{
2304 assem_debug("add %s,%s,%s lsr #12\n",regname[rt],regname[rs1],regname[rs2]);
2305 output_w32(0xe0800620|rd_rn_rm(rt,rs1,rs2));
2306}
2307
2308void emit_callne(int a)
2309{
2310 assem_debug("blne %x\n",a);
2311 u_int offset=genjmp(a);
2312 output_w32(0x1b000000|offset);
2313}
2314
2315// Used to preload hash table entries
2316void emit_prefetch(void *addr)
2317{
2318 assem_debug("prefetch %x\n",(int)addr);
2319 output_byte(0x0F);
2320 output_byte(0x18);
2321 output_modrm(0,5,1);
2322 output_w32((int)addr);
2323}
2324void emit_prefetchreg(int r)
2325{
2326 assem_debug("pld %s\n",regname[r]);
2327 output_w32(0xf5d0f000|rd_rn_rm(0,r,0));
2328}
2329
2330// Special case for mini_ht
2331void emit_ldreq_indexed(int rs, u_int offset, int rt)
2332{
2333 assert(offset<4096);
2334 assem_debug("ldreq %s,[%s, #%d]\n",regname[rt],regname[rs],offset);
2335 output_w32(0x05900000|rd_rn_rm(rt,rs,0)|offset);
2336}
2337
2338void emit_flds(int r,int sr)
2339{
2340 assem_debug("flds s%d,[%s]\n",sr,regname[r]);
2341 output_w32(0xed900a00|((sr&14)<<11)|((sr&1)<<22)|(r<<16));
2342}
2343
2344void emit_vldr(int r,int vr)
2345{
2346 assem_debug("vldr d%d,[%s]\n",vr,regname[r]);
2347 output_w32(0xed900b00|(vr<<12)|(r<<16));
2348}
2349
2350void emit_fsts(int sr,int r)
2351{
2352 assem_debug("fsts s%d,[%s]\n",sr,regname[r]);
2353 output_w32(0xed800a00|((sr&14)<<11)|((sr&1)<<22)|(r<<16));
2354}
2355
2356void emit_vstr(int vr,int r)
2357{
2358 assem_debug("vstr d%d,[%s]\n",vr,regname[r]);
2359 output_w32(0xed800b00|(vr<<12)|(r<<16));
2360}
2361
2362void emit_ftosizs(int s,int d)
2363{
2364 assem_debug("ftosizs s%d,s%d\n",d,s);
2365 output_w32(0xeebd0ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2366}
2367
2368void emit_ftosizd(int s,int d)
2369{
2370 assem_debug("ftosizd s%d,d%d\n",d,s);
2371 output_w32(0xeebd0bc0|((d&14)<<11)|((d&1)<<22)|(s&7));
2372}
2373
2374void emit_fsitos(int s,int d)
2375{
2376 assem_debug("fsitos s%d,s%d\n",d,s);
2377 output_w32(0xeeb80ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2378}
2379
2380void emit_fsitod(int s,int d)
2381{
2382 assem_debug("fsitod d%d,s%d\n",d,s);
2383 output_w32(0xeeb80bc0|((d&7)<<12)|((s&14)>>1)|((s&1)<<5));
2384}
2385
2386void emit_fcvtds(int s,int d)
2387{
2388 assem_debug("fcvtds d%d,s%d\n",d,s);
2389 output_w32(0xeeb70ac0|((d&7)<<12)|((s&14)>>1)|((s&1)<<5));
2390}
2391
2392void emit_fcvtsd(int s,int d)
2393{
2394 assem_debug("fcvtsd s%d,d%d\n",d,s);
2395 output_w32(0xeeb70bc0|((d&14)<<11)|((d&1)<<22)|(s&7));
2396}
2397
2398void emit_fsqrts(int s,int d)
2399{
2400 assem_debug("fsqrts d%d,s%d\n",d,s);
2401 output_w32(0xeeb10ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2402}
2403
2404void emit_fsqrtd(int s,int d)
2405{
2406 assem_debug("fsqrtd s%d,d%d\n",d,s);
2407 output_w32(0xeeb10bc0|((d&7)<<12)|(s&7));
2408}
2409
2410void emit_fabss(int s,int d)
2411{
2412 assem_debug("fabss d%d,s%d\n",d,s);
2413 output_w32(0xeeb00ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2414}
2415
2416void emit_fabsd(int s,int d)
2417{
2418 assem_debug("fabsd s%d,d%d\n",d,s);
2419 output_w32(0xeeb00bc0|((d&7)<<12)|(s&7));
2420}
2421
2422void emit_fnegs(int s,int d)
2423{
2424 assem_debug("fnegs d%d,s%d\n",d,s);
2425 output_w32(0xeeb10a40|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2426}
2427
2428void emit_fnegd(int s,int d)
2429{
2430 assem_debug("fnegd s%d,d%d\n",d,s);
2431 output_w32(0xeeb10b40|((d&7)<<12)|(s&7));
2432}
2433
2434void emit_fadds(int s1,int s2,int d)
2435{
2436 assem_debug("fadds s%d,s%d,s%d\n",d,s1,s2);
2437 output_w32(0xee300a00|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2438}
2439
2440void emit_faddd(int s1,int s2,int d)
2441{
2442 assem_debug("faddd d%d,d%d,d%d\n",d,s1,s2);
2443 output_w32(0xee300b00|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2444}
2445
2446void emit_fsubs(int s1,int s2,int d)
2447{
2448 assem_debug("fsubs s%d,s%d,s%d\n",d,s1,s2);
2449 output_w32(0xee300a40|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2450}
2451
2452void emit_fsubd(int s1,int s2,int d)
2453{
2454 assem_debug("fsubd d%d,d%d,d%d\n",d,s1,s2);
2455 output_w32(0xee300b40|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2456}
2457
2458void emit_fmuls(int s1,int s2,int d)
2459{
2460 assem_debug("fmuls s%d,s%d,s%d\n",d,s1,s2);
2461 output_w32(0xee200a00|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2462}
2463
2464void emit_fmuld(int s1,int s2,int d)
2465{
2466 assem_debug("fmuld d%d,d%d,d%d\n",d,s1,s2);
2467 output_w32(0xee200b00|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2468}
2469
2470void emit_fdivs(int s1,int s2,int d)
2471{
2472 assem_debug("fdivs s%d,s%d,s%d\n",d,s1,s2);
2473 output_w32(0xee800a00|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2474}
2475
2476void emit_fdivd(int s1,int s2,int d)
2477{
2478 assem_debug("fdivd d%d,d%d,d%d\n",d,s1,s2);
2479 output_w32(0xee800b00|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2480}
2481
2482void emit_fcmps(int x,int y)
2483{
2484 assem_debug("fcmps s14, s15\n");
2485 output_w32(0xeeb47a67);
2486}
2487
2488void emit_fcmpd(int x,int y)
2489{
2490 assem_debug("fcmpd d6, d7\n");
2491 output_w32(0xeeb46b47);
2492}
2493
2494void emit_fmstat()
2495{
2496 assem_debug("fmstat\n");
2497 output_w32(0xeef1fa10);
2498}
2499
2500void emit_bicne_imm(int rs,int imm,int rt)
2501{
2502 u_int armval;
2503 genimm_checked(imm,&armval);
2504 assem_debug("bicne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2505 output_w32(0x13c00000|rd_rn_rm(rt,rs,0)|armval);
2506}
2507
2508void emit_biccs_imm(int rs,int imm,int rt)
2509{
2510 u_int armval;
2511 genimm_checked(imm,&armval);
2512 assem_debug("biccs %s,%s,#%d\n",regname[rt],regname[rs],imm);
2513 output_w32(0x23c00000|rd_rn_rm(rt,rs,0)|armval);
2514}
2515
2516void emit_bicvc_imm(int rs,int imm,int rt)
2517{
2518 u_int armval;
2519 genimm_checked(imm,&armval);
2520 assem_debug("bicvc %s,%s,#%d\n",regname[rt],regname[rs],imm);
2521 output_w32(0x73c00000|rd_rn_rm(rt,rs,0)|armval);
2522}
2523
2524void emit_bichi_imm(int rs,int imm,int rt)
2525{
2526 u_int armval;
2527 genimm_checked(imm,&armval);
2528 assem_debug("bichi %s,%s,#%d\n",regname[rt],regname[rs],imm);
2529 output_w32(0x83c00000|rd_rn_rm(rt,rs,0)|armval);
2530}
2531
2532void emit_orrvs_imm(int rs,int imm,int rt)
2533{
2534 u_int armval;
2535 genimm_checked(imm,&armval);
2536 assem_debug("orrvs %s,%s,#%d\n",regname[rt],regname[rs],imm);
2537 output_w32(0x63800000|rd_rn_rm(rt,rs,0)|armval);
2538}
2539
2540void emit_orrne_imm(int rs,int imm,int rt)
2541{
2542 u_int armval;
2543 genimm_checked(imm,&armval);
2544 assem_debug("orrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2545 output_w32(0x13800000|rd_rn_rm(rt,rs,0)|armval);
2546}
2547
2548void emit_andne_imm(int rs,int imm,int rt)
2549{
2550 u_int armval;
2551 genimm_checked(imm,&armval);
2552 assem_debug("andne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2553 output_w32(0x12000000|rd_rn_rm(rt,rs,0)|armval);
2554}
2555
2556void emit_jno_unlikely(int a)
2557{
2558 //emit_jno(a);
2559 assem_debug("addvc pc,pc,#? (%x)\n",/*a-(int)out-8,*/a);
2560 output_w32(0x72800000|rd_rn_rm(15,15,0));
2561}
2562
2563// Save registers before function call
2564void save_regs(u_int reglist)
2565{
2566 reglist&=0x100f; // only save the caller-save registers, r0-r3, r12
2567 if(!reglist) return;
2568 assem_debug("stmia fp,{");
2569 if(reglist&1) assem_debug("r0, ");
2570 if(reglist&2) assem_debug("r1, ");
2571 if(reglist&4) assem_debug("r2, ");
2572 if(reglist&8) assem_debug("r3, ");
2573 if(reglist&0x1000) assem_debug("r12");
2574 assem_debug("}\n");
2575 output_w32(0xe88b0000|reglist);
2576}
2577// Restore registers after function call
2578void restore_regs(u_int reglist)
2579{
2580 reglist&=0x100f; // only restore the caller-save registers, r0-r3, r12
2581 if(!reglist) return;
2582 assem_debug("ldmia fp,{");
2583 if(reglist&1) assem_debug("r0, ");
2584 if(reglist&2) assem_debug("r1, ");
2585 if(reglist&4) assem_debug("r2, ");
2586 if(reglist&8) assem_debug("r3, ");
2587 if(reglist&0x1000) assem_debug("r12");
2588 assem_debug("}\n");
2589 output_w32(0xe89b0000|reglist);
2590}
2591
2592// Write back consts using r14 so we don't disturb the other registers
2593void wb_consts(signed char i_regmap[],uint64_t i_is32,u_int i_dirty,int i)
2594{
2595 int hr;
2596 for(hr=0;hr<HOST_REGS;hr++) {
2597 if(hr!=EXCLUDE_REG&&i_regmap[hr]>=0&&((i_dirty>>hr)&1)) {
2598 if(((regs[i].isconst>>hr)&1)&&i_regmap[hr]>0) {
2599 if(i_regmap[hr]<64 || !((i_is32>>(i_regmap[hr]&63))&1) ) {
2600 int value=constmap[i][hr];
2601 if(value==0) {
2602 emit_zeroreg(HOST_TEMPREG);
2603 }
2604 else {
2605 emit_movimm(value,HOST_TEMPREG);
2606 }
2607 emit_storereg(i_regmap[hr],HOST_TEMPREG);
2608#ifndef FORCE32
2609 if((i_is32>>i_regmap[hr])&1) {
2610 if(value!=-1&&value!=0) emit_sarimm(HOST_TEMPREG,31,HOST_TEMPREG);
2611 emit_storereg(i_regmap[hr]|64,HOST_TEMPREG);
2612 }
2613#endif
2614 }
2615 }
2616 }
2617 }
2618}
2619
2620/* Stubs/epilogue */
2621
2622void literal_pool(int n)
2623{
2624 if(!literalcount) return;
2625 if(n) {
2626 if((int)out-literals[0][0]<4096-n) return;
2627 }
2628 u_int *ptr;
2629 int i;
2630 for(i=0;i<literalcount;i++)
2631 {
2632 ptr=(u_int *)literals[i][0];
2633 u_int offset=(u_int)out-(u_int)ptr-8;
2634 assert(offset<4096);
2635 assert(!(offset&3));
2636 *ptr|=offset;
2637 output_w32(literals[i][1]);
2638 }
2639 literalcount=0;
2640}
2641
2642void literal_pool_jumpover(int n)
2643{
2644 if(!literalcount) return;
2645 if(n) {
2646 if((int)out-literals[0][0]<4096-n) return;
2647 }
2648 int jaddr=(int)out;
2649 emit_jmp(0);
2650 literal_pool(0);
2651 set_jump_target(jaddr,(int)out);
2652}
2653
2654emit_extjump2(int addr, int target, int linker)
2655{
2656 u_char *ptr=(u_char *)addr;
2657 assert((ptr[3]&0x0e)==0xa);
2658 emit_loadlp(target,0);
2659 emit_loadlp(addr,1);
2660 assert(addr>=BASE_ADDR&&addr<(BASE_ADDR+(1<<TARGET_SIZE_2)));
2661 //assert((target>=0x80000000&&target<0x80800000)||(target>0xA4000000&&target<0xA4001000));
2662//DEBUG >
2663#ifdef DEBUG_CYCLE_COUNT
2664 emit_readword((int)&last_count,ECX);
2665 emit_add(HOST_CCREG,ECX,HOST_CCREG);
2666 emit_readword((int)&next_interupt,ECX);
2667 emit_writeword(HOST_CCREG,(int)&Count);
2668 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
2669 emit_writeword(ECX,(int)&last_count);
2670#endif
2671//DEBUG <
2672 emit_jmp(linker);
2673}
2674
2675emit_extjump(int addr, int target)
2676{
2677 emit_extjump2(addr, target, (int)dyna_linker);
2678}
2679emit_extjump_ds(int addr, int target)
2680{
2681 emit_extjump2(addr, target, (int)dyna_linker_ds);
2682}
2683
2684// put rt_val into rt, potentially making use of rs with value rs_val
2685static void emit_movimm_from(u_int rs_val,int rs,u_int rt_val,int rt)
2686{
2687 u_int xor=rs_val^rt_val;
2688 u_int xs;
2689 for(xs=xor;xs!=0&&(xs&3)==0;xs>>=2)
2690 ;
2691 if(xs<0x100)
2692 emit_xorimm(rs,xor,rt);
2693 else
2694 emit_movimm(rt_val,rt);
2695}
2696
2697// trashes r2
2698static void pass_args(int a0, int a1)
2699{
2700 if(a0==1&&a1==0) {
2701 // must swap
2702 emit_mov(a0,2); emit_mov(a1,1); emit_mov(2,0);
2703 }
2704 else if(a0!=0&&a1==0) {
2705 emit_mov(a1,1);
2706 if (a0>=0) emit_mov(a0,0);
2707 }
2708 else {
2709 if(a0>=0&&a0!=0) emit_mov(a0,0);
2710 if(a1>=0&&a1!=1) emit_mov(a1,1);
2711 }
2712}
2713
2714do_readstub(int n)
2715{
2716 assem_debug("do_readstub %x\n",start+stubs[n][3]*4);
2717 literal_pool(256);
2718 set_jump_target(stubs[n][1],(int)out);
2719 int type=stubs[n][0];
2720 int i=stubs[n][3];
2721 int rs=stubs[n][4];
2722 struct regstat *i_regs=(struct regstat *)stubs[n][5];
2723 u_int reglist=stubs[n][7];
2724 signed char *i_regmap=i_regs->regmap;
2725 int addr=get_reg(i_regmap,AGEN1+(i&1));
2726 int rth,rt;
2727 int ds;
2728 if(itype[i]==C1LS||itype[i]==C2LS||itype[i]==LOADLR) {
2729 rth=get_reg(i_regmap,FTEMP|64);
2730 rt=get_reg(i_regmap,FTEMP);
2731 }else{
2732 rth=get_reg(i_regmap,rt1[i]|64);
2733 rt=get_reg(i_regmap,rt1[i]);
2734 }
2735 assert(rs>=0);
2736#ifdef PCSX
2737 int r,temp=-1,temp2=HOST_TEMPREG,regs_saved=0,restore_jump=0;
2738 reglist|=(1<<rs);
2739 for(r=0;r<=12;r++) {
2740 if(((1<<r)&0x13ff)&&((1<<r)&reglist)==0) {
2741 temp=r; break;
2742 }
2743 }
2744 if(rt>=0)
2745 reglist&=~(1<<rt);
2746 if(temp==-1) {
2747 save_regs(reglist);
2748 regs_saved=1;
2749 temp=(rs==0)?2:0;
2750 }
2751 if((regs_saved||(reglist&2)==0)&&temp!=1&&rs!=1)
2752 temp2=1;
2753 emit_readword((int)&mem_rtab,temp);
2754 emit_shrimm(rs,12,temp2);
2755 emit_readword_dualindexedx4(temp,temp2,temp2);
2756 emit_lsls_imm(temp2,1,temp2);
2757 if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) {
2758 switch(type) {
2759 case LOADB_STUB: emit_ldrccsb_dualindexed(temp2,rs,rt); break;
2760 case LOADBU_STUB: emit_ldrccb_dualindexed(temp2,rs,rt); break;
2761 case LOADH_STUB: emit_ldrccsh_dualindexed(temp2,rs,rt); break;
2762 case LOADHU_STUB: emit_ldrcch_dualindexed(temp2,rs,rt); break;
2763 case LOADW_STUB: emit_ldrcc_dualindexed(temp2,rs,rt); break;
2764 }
2765 }
2766 if(regs_saved) {
2767 restore_jump=(int)out;
2768 emit_jcc(0); // jump to reg restore
2769 }
2770 else
2771 emit_jcc(stubs[n][2]); // return address
2772
2773 if(!regs_saved)
2774 save_regs(reglist);
2775 int handler=0;
2776 if(type==LOADB_STUB||type==LOADBU_STUB)
2777 handler=(int)jump_handler_read8;
2778 if(type==LOADH_STUB||type==LOADHU_STUB)
2779 handler=(int)jump_handler_read16;
2780 if(type==LOADW_STUB)
2781 handler=(int)jump_handler_read32;
2782 assert(handler!=0);
2783 pass_args(rs,temp2);
2784 int cc=get_reg(i_regmap,CCREG);
2785 if(cc<0)
2786 emit_loadreg(CCREG,2);
2787 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n][6]+1),2);
2788 emit_call(handler);
2789 if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) {
2790 switch(type) {
2791 case LOADB_STUB: emit_signextend8(0,rt); break;
2792 case LOADBU_STUB: emit_andimm(0,0xff,rt); break;
2793 case LOADH_STUB: emit_signextend16(0,rt); break;
2794 case LOADHU_STUB: emit_andimm(0,0xffff,rt); break;
2795 case LOADW_STUB: if(rt!=0) emit_mov(0,rt); break;
2796 }
2797 }
2798 if(restore_jump)
2799 set_jump_target(restore_jump,(int)out);
2800 restore_regs(reglist);
2801 emit_jmp(stubs[n][2]); // return address
2802#else // !PCSX
2803 if(addr<0) addr=rt;
2804 if(addr<0&&itype[i]!=C1LS&&itype[i]!=C2LS&&itype[i]!=LOADLR) addr=get_reg(i_regmap,-1);
2805 assert(addr>=0);
2806 int ftable=0;
2807 if(type==LOADB_STUB||type==LOADBU_STUB)
2808 ftable=(int)readmemb;
2809 if(type==LOADH_STUB||type==LOADHU_STUB)
2810 ftable=(int)readmemh;
2811 if(type==LOADW_STUB)
2812 ftable=(int)readmem;
2813#ifndef FORCE32
2814 if(type==LOADD_STUB)
2815 ftable=(int)readmemd;
2816#endif
2817 assert(ftable!=0);
2818 emit_writeword(rs,(int)&address);
2819 //emit_pusha();
2820 save_regs(reglist);
2821#ifndef PCSX
2822 ds=i_regs!=&regs[i];
2823 int real_rs=(itype[i]==LOADLR)?-1:get_reg(i_regmap,rs1[i]);
2824 u_int cmask=ds?-1:(0x100f|~i_regs->wasconst);
2825 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&0x100f,i);
2826 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty&cmask&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs)));
2827 if(!ds) wb_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&~0x100f,i);
2828#endif
2829 emit_shrimm(rs,16,1);
2830 int cc=get_reg(i_regmap,CCREG);
2831 if(cc<0) {
2832 emit_loadreg(CCREG,2);
2833 }
2834 emit_movimm(ftable,0);
2835 emit_addimm(cc<0?2:cc,2*stubs[n][6]+2,2);
2836#ifndef PCSX
2837 emit_movimm(start+stubs[n][3]*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
2838#endif
2839 //emit_readword((int)&last_count,temp);
2840 //emit_add(cc,temp,cc);
2841 //emit_writeword(cc,(int)&Count);
2842 //emit_mov(15,14);
2843 emit_call((int)&indirect_jump_indexed);
2844 //emit_callreg(rs);
2845 //emit_readword_dualindexedx4(rs,HOST_TEMPREG,15);
2846#ifndef PCSX
2847 // We really shouldn't need to update the count here,
2848 // but not doing so causes random crashes...
2849 emit_readword((int)&Count,HOST_TEMPREG);
2850 emit_readword((int)&next_interupt,2);
2851 emit_addimm(HOST_TEMPREG,-2*stubs[n][6]-2,HOST_TEMPREG);
2852 emit_writeword(2,(int)&last_count);
2853 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2854 if(cc<0) {
2855 emit_storereg(CCREG,HOST_TEMPREG);
2856 }
2857#endif
2858 //emit_popa();
2859 restore_regs(reglist);
2860 //if((cc=get_reg(regmap,CCREG))>=0) {
2861 // emit_loadreg(CCREG,cc);
2862 //}
2863 if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) {
2864 assert(rt>=0);
2865 if(type==LOADB_STUB)
2866 emit_movsbl((int)&readmem_dword,rt);
2867 if(type==LOADBU_STUB)
2868 emit_movzbl((int)&readmem_dword,rt);
2869 if(type==LOADH_STUB)
2870 emit_movswl((int)&readmem_dword,rt);
2871 if(type==LOADHU_STUB)
2872 emit_movzwl((int)&readmem_dword,rt);
2873 if(type==LOADW_STUB)
2874 emit_readword((int)&readmem_dword,rt);
2875 if(type==LOADD_STUB) {
2876 emit_readword((int)&readmem_dword,rt);
2877 if(rth>=0) emit_readword(((int)&readmem_dword)+4,rth);
2878 }
2879 }
2880 emit_jmp(stubs[n][2]); // return address
2881#endif // !PCSX
2882}
2883
2884#ifdef PCSX
2885// return memhandler, or get directly accessable address and return 0
2886u_int get_direct_memhandler(void *table,u_int addr,int type,u_int *addr_host)
2887{
2888 u_int l1,l2=0;
2889 l1=((u_int *)table)[addr>>12];
2890 if((l1&(1<<31))==0) {
2891 u_int v=l1<<1;
2892 *addr_host=v+addr;
2893 return 0;
2894 }
2895 else {
2896 l1<<=1;
2897 if(type==LOADB_STUB||type==LOADBU_STUB||type==STOREB_STUB)
2898 l2=((u_int *)l1)[0x1000/4 + 0x1000/2 + (addr&0xfff)];
2899 else if(type==LOADH_STUB||type==LOADHU_STUB||type==STOREH_STUB)
2900 l2=((u_int *)l1)[0x1000/4 + (addr&0xfff)/2];
2901 else
2902 l2=((u_int *)l1)[(addr&0xfff)/4];
2903 if((l2&(1<<31))==0) {
2904 u_int v=l2<<1;
2905 *addr_host=v+(addr&0xfff);
2906 return 0;
2907 }
2908 return l2<<1;
2909 }
2910}
2911#endif
2912
2913inline_readstub(int type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
2914{
2915 int rs=get_reg(regmap,target);
2916 int rth=get_reg(regmap,target|64);
2917 int rt=get_reg(regmap,target);
2918 if(rs<0) rs=get_reg(regmap,-1);
2919 assert(rs>=0);
2920#ifdef PCSX
2921 u_int handler,host_addr=0;
2922 handler=get_direct_memhandler(mem_rtab,addr,type,&host_addr);
2923 if (handler==0) {
2924 if(rt<0)
2925 return;
2926 if(addr!=host_addr)
2927 emit_movimm_from(addr,rs,host_addr,rs);
2928 switch(type) {
2929 case LOADB_STUB: emit_movsbl_indexed(0,rs,rt); break;
2930 case LOADBU_STUB: emit_movzbl_indexed(0,rs,rt); break;
2931 case LOADH_STUB: emit_movswl_indexed(0,rs,rt); break;
2932 case LOADHU_STUB: emit_movzwl_indexed(0,rs,rt); break;
2933 case LOADW_STUB: emit_readword_indexed(0,rs,rt); break;
2934 default: assert(0);
2935 }
2936 return;
2937 }
2938
2939 // call a memhandler
2940 if(rt>=0)
2941 reglist&=~(1<<rt);
2942 save_regs(reglist);
2943 if(target==0)
2944 emit_movimm(addr,0);
2945 else if(rs!=0)
2946 emit_mov(rs,0);
2947 int cc=get_reg(regmap,CCREG);
2948 if(cc<0)
2949 emit_loadreg(CCREG,2);
2950 emit_readword((int)&last_count,3);
2951 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2);
2952 emit_add(2,3,3);
2953 emit_writeword(3,(int)&Count);
2954
2955 int offset=(int)handler-(int)out-8;
2956 if(offset<-33554432||offset>=33554432) {
2957 // unreachable memhandler, a plugin func perhaps
2958 emit_movimm(handler,1);
2959 emit_callreg(1);
2960 }
2961 else
2962 emit_call(handler);
2963 if(rt>=0) {
2964 switch(type) {
2965 case LOADB_STUB: emit_signextend8(0,rt); break;
2966 case LOADBU_STUB: emit_andimm(0,0xff,rt); break;
2967 case LOADH_STUB: emit_signextend16(0,rt); break;
2968 case LOADHU_STUB: emit_andimm(0,0xffff,rt); break;
2969 case LOADW_STUB: if(rt!=0) emit_mov(0,rt); break;
2970 default: assert(0);
2971 }
2972 }
2973 restore_regs(reglist);
2974#else // if !PCSX
2975 int ftable=0;
2976 if(type==LOADB_STUB||type==LOADBU_STUB)
2977 ftable=(int)readmemb;
2978 if(type==LOADH_STUB||type==LOADHU_STUB)
2979 ftable=(int)readmemh;
2980 if(type==LOADW_STUB)
2981 ftable=(int)readmem;
2982#ifndef FORCE32
2983 if(type==LOADD_STUB)
2984 ftable=(int)readmemd;
2985#endif
2986 assert(ftable!=0);
2987 if(target==0)
2988 emit_movimm(addr,rs);
2989 emit_writeword(rs,(int)&address);
2990 //emit_pusha();
2991 save_regs(reglist);
2992#ifndef PCSX
2993 if((signed int)addr>=(signed int)0xC0000000) {
2994 // Theoretically we can have a pagefault here, if the TLB has never
2995 // been enabled and the address is outside the range 80000000..BFFFFFFF
2996 // Write out the registers so the pagefault can be handled. This is
2997 // a very rare case and likely represents a bug.
2998 int ds=regmap!=regs[i].regmap;
2999 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty,i);
3000 if(!ds) wb_dirtys(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty);
3001 else wb_dirtys(branch_regs[i-1].regmap_entry,branch_regs[i-1].was32,branch_regs[i-1].wasdirty);
3002 }
3003#endif
3004 //emit_shrimm(rs,16,1);
3005 int cc=get_reg(regmap,CCREG);
3006 if(cc<0) {
3007 emit_loadreg(CCREG,2);
3008 }
3009 //emit_movimm(ftable,0);
3010 emit_movimm(((u_int *)ftable)[addr>>16],0);
3011 //emit_readword((int)&last_count,12);
3012 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2);
3013#ifndef PCSX
3014 if((signed int)addr>=(signed int)0xC0000000) {
3015 // Pagefault address
3016 int ds=regmap!=regs[i].regmap;
3017 emit_movimm(start+i*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
3018 }
3019#endif
3020 //emit_add(12,2,2);
3021 //emit_writeword(2,(int)&Count);
3022 //emit_call(((u_int *)ftable)[addr>>16]);
3023 emit_call((int)&indirect_jump);
3024#ifndef PCSX
3025 // We really shouldn't need to update the count here,
3026 // but not doing so causes random crashes...
3027 emit_readword((int)&Count,HOST_TEMPREG);
3028 emit_readword((int)&next_interupt,2);
3029 emit_addimm(HOST_TEMPREG,-CLOCK_ADJUST(adj+1),HOST_TEMPREG);
3030 emit_writeword(2,(int)&last_count);
3031 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
3032 if(cc<0) {
3033 emit_storereg(CCREG,HOST_TEMPREG);
3034 }
3035#endif
3036 //emit_popa();
3037 restore_regs(reglist);
3038 if(rt>=0) {
3039 if(type==LOADB_STUB)
3040 emit_movsbl((int)&readmem_dword,rt);
3041 if(type==LOADBU_STUB)
3042 emit_movzbl((int)&readmem_dword,rt);
3043 if(type==LOADH_STUB)
3044 emit_movswl((int)&readmem_dword,rt);
3045 if(type==LOADHU_STUB)
3046 emit_movzwl((int)&readmem_dword,rt);
3047 if(type==LOADW_STUB)
3048 emit_readword((int)&readmem_dword,rt);
3049 if(type==LOADD_STUB) {
3050 emit_readword((int)&readmem_dword,rt);
3051 if(rth>=0) emit_readword(((int)&readmem_dword)+4,rth);
3052 }
3053 }
3054#endif // !PCSX
3055}
3056
3057do_writestub(int n)
3058{
3059 assem_debug("do_writestub %x\n",start+stubs[n][3]*4);
3060 literal_pool(256);
3061 set_jump_target(stubs[n][1],(int)out);
3062 int type=stubs[n][0];
3063 int i=stubs[n][3];
3064 int rs=stubs[n][4];
3065 struct regstat *i_regs=(struct regstat *)stubs[n][5];
3066 u_int reglist=stubs[n][7];
3067 signed char *i_regmap=i_regs->regmap;
3068 int addr=get_reg(i_regmap,AGEN1+(i&1));
3069 int rth,rt,r;
3070 int ds;
3071 if(itype[i]==C1LS||itype[i]==C2LS) {
3072 rth=get_reg(i_regmap,FTEMP|64);
3073 rt=get_reg(i_regmap,r=FTEMP);
3074 }else{
3075 rth=get_reg(i_regmap,rs2[i]|64);
3076 rt=get_reg(i_regmap,r=rs2[i]);
3077 }
3078 assert(rs>=0);
3079 assert(rt>=0);
3080#ifdef PCSX
3081 int rtmp,temp=-1,temp2=HOST_TEMPREG,regs_saved=0,restore_jump=0,ra;
3082 int reglist2=reglist|(1<<rs)|(1<<rt);
3083 for(rtmp=0;rtmp<=12;rtmp++) {
3084 if(((1<<rtmp)&0x13ff)&&((1<<rtmp)&reglist2)==0) {
3085 temp=rtmp; break;
3086 }
3087 }
3088 if(temp==-1) {
3089 save_regs(reglist);
3090 regs_saved=1;
3091 for(rtmp=0;rtmp<=3;rtmp++)
3092 if(rtmp!=rs&&rtmp!=rt)
3093 {temp=rtmp;break;}
3094 }
3095 if((regs_saved||(reglist2&8)==0)&&temp!=3&&rs!=3&&rt!=3)
3096 temp2=3;
3097 emit_readword((int)&mem_wtab,temp);
3098 emit_shrimm(rs,12,temp2);
3099 emit_readword_dualindexedx4(temp,temp2,temp2);
3100 emit_lsls_imm(temp2,1,temp2);
3101 switch(type) {
3102 case STOREB_STUB: emit_strccb_dualindexed(temp2,rs,rt); break;
3103 case STOREH_STUB: emit_strcch_dualindexed(temp2,rs,rt); break;
3104 case STOREW_STUB: emit_strcc_dualindexed(temp2,rs,rt); break;
3105 default: assert(0);
3106 }
3107 if(regs_saved) {
3108 restore_jump=(int)out;
3109 emit_jcc(0); // jump to reg restore
3110 }
3111 else
3112 emit_jcc(stubs[n][2]); // return address (invcode check)
3113
3114 if(!regs_saved)
3115 save_regs(reglist);
3116 int handler=0;
3117 switch(type) {
3118 case STOREB_STUB: handler=(int)jump_handler_write8; break;
3119 case STOREH_STUB: handler=(int)jump_handler_write16; break;
3120 case STOREW_STUB: handler=(int)jump_handler_write32; break;
3121 }
3122 assert(handler!=0);
3123 pass_args(rs,rt);
3124 if(temp2!=3)
3125 emit_mov(temp2,3);
3126 int cc=get_reg(i_regmap,CCREG);
3127 if(cc<0)
3128 emit_loadreg(CCREG,2);
3129 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n][6]+1),2);
3130 // returns new cycle_count
3131 emit_call(handler);
3132 emit_addimm(0,-CLOCK_ADJUST((int)stubs[n][6]+1),cc<0?2:cc);
3133 if(cc<0)
3134 emit_storereg(CCREG,2);
3135 if(restore_jump)
3136 set_jump_target(restore_jump,(int)out);
3137 restore_regs(reglist);
3138 ra=stubs[n][2];
3139 if(!restore_jump) ra+=4*3; // skip invcode check
3140 emit_jmp(ra);
3141#else // if !PCSX
3142 if(addr<0) addr=get_reg(i_regmap,-1);
3143 assert(addr>=0);
3144 int ftable=0;
3145 if(type==STOREB_STUB)
3146 ftable=(int)writememb;
3147 if(type==STOREH_STUB)
3148 ftable=(int)writememh;
3149 if(type==STOREW_STUB)
3150 ftable=(int)writemem;
3151#ifndef FORCE32
3152 if(type==STORED_STUB)
3153 ftable=(int)writememd;
3154#endif
3155 assert(ftable!=0);
3156 emit_writeword(rs,(int)&address);
3157 //emit_shrimm(rs,16,rs);
3158 //emit_movmem_indexedx4(ftable,rs,rs);
3159 if(type==STOREB_STUB)
3160 emit_writebyte(rt,(int)&byte);
3161 if(type==STOREH_STUB)
3162 emit_writehword(rt,(int)&hword);
3163 if(type==STOREW_STUB)
3164 emit_writeword(rt,(int)&word);
3165 if(type==STORED_STUB) {
3166#ifndef FORCE32
3167 emit_writeword(rt,(int)&dword);
3168 emit_writeword(r?rth:rt,(int)&dword+4);
3169#else
3170 printf("STORED_STUB\n");
3171#endif
3172 }
3173 //emit_pusha();
3174 save_regs(reglist);
3175#ifndef PCSX
3176 ds=i_regs!=&regs[i];
3177 int real_rs=get_reg(i_regmap,rs1[i]);
3178 u_int cmask=ds?-1:(0x100f|~i_regs->wasconst);
3179 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&0x100f,i);
3180 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty&cmask&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs)));
3181 if(!ds) wb_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&~0x100f,i);
3182#endif
3183 emit_shrimm(rs,16,1);
3184 int cc=get_reg(i_regmap,CCREG);
3185 if(cc<0) {
3186 emit_loadreg(CCREG,2);
3187 }
3188 emit_movimm(ftable,0);
3189 emit_addimm(cc<0?2:cc,2*stubs[n][6]+2,2);
3190#ifndef PCSX
3191 emit_movimm(start+stubs[n][3]*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
3192#endif
3193 //emit_readword((int)&last_count,temp);
3194 //emit_addimm(cc,2*stubs[n][5]+2,cc);
3195 //emit_add(cc,temp,cc);
3196 //emit_writeword(cc,(int)&Count);
3197 emit_call((int)&indirect_jump_indexed);
3198 //emit_callreg(rs);
3199 emit_readword((int)&Count,HOST_TEMPREG);
3200 emit_readword((int)&next_interupt,2);
3201 emit_addimm(HOST_TEMPREG,-2*stubs[n][6]-2,HOST_TEMPREG);
3202 emit_writeword(2,(int)&last_count);
3203 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
3204 if(cc<0) {
3205 emit_storereg(CCREG,HOST_TEMPREG);
3206 }
3207 //emit_popa();
3208 restore_regs(reglist);
3209 //if((cc=get_reg(regmap,CCREG))>=0) {
3210 // emit_loadreg(CCREG,cc);
3211 //}
3212 emit_jmp(stubs[n][2]); // return address
3213#endif // !PCSX
3214}
3215
3216inline_writestub(int type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
3217{
3218 int rs=get_reg(regmap,-1);
3219 int rth=get_reg(regmap,target|64);
3220 int rt=get_reg(regmap,target);
3221 assert(rs>=0);
3222 assert(rt>=0);
3223#ifdef PCSX
3224 u_int handler,host_addr=0;
3225 handler=get_direct_memhandler(mem_wtab,addr,type,&host_addr);
3226 if (handler==0) {
3227 if(addr!=host_addr)
3228 emit_movimm_from(addr,rs,host_addr,rs);
3229 switch(type) {
3230 case STOREB_STUB: emit_writebyte_indexed(rt,0,rs); break;
3231 case STOREH_STUB: emit_writehword_indexed(rt,0,rs); break;
3232 case STOREW_STUB: emit_writeword_indexed(rt,0,rs); break;
3233 default: assert(0);
3234 }
3235 return;
3236 }
3237
3238 // call a memhandler
3239 save_regs(reglist);
3240 pass_args(rs,rt);
3241 int cc=get_reg(regmap,CCREG);
3242 if(cc<0)
3243 emit_loadreg(CCREG,2);
3244 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2);
3245 emit_movimm(handler,3);
3246 // returns new cycle_count
3247 emit_call((int)jump_handler_write_h);
3248 emit_addimm(0,-CLOCK_ADJUST(adj+1),cc<0?2:cc);
3249 if(cc<0)
3250 emit_storereg(CCREG,2);
3251 restore_regs(reglist);
3252#else // if !pcsx
3253 int ftable=0;
3254 if(type==STOREB_STUB)
3255 ftable=(int)writememb;
3256 if(type==STOREH_STUB)
3257 ftable=(int)writememh;
3258 if(type==STOREW_STUB)
3259 ftable=(int)writemem;
3260#ifndef FORCE32
3261 if(type==STORED_STUB)
3262 ftable=(int)writememd;
3263#endif
3264 assert(ftable!=0);
3265 emit_writeword(rs,(int)&address);
3266 //emit_shrimm(rs,16,rs);
3267 //emit_movmem_indexedx4(ftable,rs,rs);
3268 if(type==STOREB_STUB)
3269 emit_writebyte(rt,(int)&byte);
3270 if(type==STOREH_STUB)
3271 emit_writehword(rt,(int)&hword);
3272 if(type==STOREW_STUB)
3273 emit_writeword(rt,(int)&word);
3274 if(type==STORED_STUB) {
3275#ifndef FORCE32
3276 emit_writeword(rt,(int)&dword);
3277 emit_writeword(target?rth:rt,(int)&dword+4);
3278#else
3279 printf("STORED_STUB\n");
3280#endif
3281 }
3282 //emit_pusha();
3283 save_regs(reglist);
3284#ifndef PCSX
3285 // rearmed note: load_all_consts prevents BIOS boot, some bug?
3286 if((signed int)addr>=(signed int)0xC0000000) {
3287 // Theoretically we can have a pagefault here, if the TLB has never
3288 // been enabled and the address is outside the range 80000000..BFFFFFFF
3289 // Write out the registers so the pagefault can be handled. This is
3290 // a very rare case and likely represents a bug.
3291 int ds=regmap!=regs[i].regmap;
3292 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty,i);
3293 if(!ds) wb_dirtys(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty);
3294 else wb_dirtys(branch_regs[i-1].regmap_entry,branch_regs[i-1].was32,branch_regs[i-1].wasdirty);
3295 }
3296#endif
3297 //emit_shrimm(rs,16,1);
3298 int cc=get_reg(regmap,CCREG);
3299 if(cc<0) {
3300 emit_loadreg(CCREG,2);
3301 }
3302 //emit_movimm(ftable,0);
3303 emit_movimm(((u_int *)ftable)[addr>>16],0);
3304 //emit_readword((int)&last_count,12);
3305 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2);
3306#ifndef PCSX
3307 if((signed int)addr>=(signed int)0xC0000000) {
3308 // Pagefault address
3309 int ds=regmap!=regs[i].regmap;
3310 emit_movimm(start+i*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
3311 }
3312#endif
3313 //emit_add(12,2,2);
3314 //emit_writeword(2,(int)&Count);
3315 //emit_call(((u_int *)ftable)[addr>>16]);
3316 emit_call((int)&indirect_jump);
3317 emit_readword((int)&Count,HOST_TEMPREG);
3318 emit_readword((int)&next_interupt,2);
3319 emit_addimm(HOST_TEMPREG,-CLOCK_ADJUST(adj+1),HOST_TEMPREG);
3320 emit_writeword(2,(int)&last_count);
3321 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
3322 if(cc<0) {
3323 emit_storereg(CCREG,HOST_TEMPREG);
3324 }
3325 //emit_popa();
3326 restore_regs(reglist);
3327#endif
3328}
3329
3330do_unalignedwritestub(int n)
3331{
3332 assem_debug("do_unalignedwritestub %x\n",start+stubs[n][3]*4);
3333 literal_pool(256);
3334 set_jump_target(stubs[n][1],(int)out);
3335
3336 int i=stubs[n][3];
3337 struct regstat *i_regs=(struct regstat *)stubs[n][4];
3338 int addr=stubs[n][5];
3339 u_int reglist=stubs[n][7];
3340 signed char *i_regmap=i_regs->regmap;
3341 int temp2=get_reg(i_regmap,FTEMP);
3342 int rt;
3343 int ds, real_rs;
3344 rt=get_reg(i_regmap,rs2[i]);
3345 assert(rt>=0);
3346 assert(addr>=0);
3347 assert(opcode[i]==0x2a||opcode[i]==0x2e); // SWL/SWR only implemented
3348 reglist|=(1<<addr);
3349 reglist&=~(1<<temp2);
3350
3351#if 1
3352 // don't bother with it and call write handler
3353 save_regs(reglist);
3354 pass_args(addr,rt);
3355 int cc=get_reg(i_regmap,CCREG);
3356 if(cc<0)
3357 emit_loadreg(CCREG,2);
3358 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n][6]+1),2);
3359 emit_call((int)(opcode[i]==0x2a?jump_handle_swl:jump_handle_swr));
3360 emit_addimm(0,-CLOCK_ADJUST((int)stubs[n][6]+1),cc<0?2:cc);
3361 if(cc<0)
3362 emit_storereg(CCREG,2);
3363 restore_regs(reglist);
3364 emit_jmp(stubs[n][2]); // return address
3365#else
3366 emit_andimm(addr,0xfffffffc,temp2);
3367 emit_writeword(temp2,(int)&address);
3368
3369 save_regs(reglist);
3370#ifndef PCSX
3371 ds=i_regs!=&regs[i];
3372 real_rs=get_reg(i_regmap,rs1[i]);
3373 u_int cmask=ds?-1:(0x100f|~i_regs->wasconst);
3374 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&0x100f,i);
3375 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty&cmask&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs)));
3376 if(!ds) wb_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&~0x100f,i);
3377#endif
3378 emit_shrimm(addr,16,1);
3379 int cc=get_reg(i_regmap,CCREG);
3380 if(cc<0) {
3381 emit_loadreg(CCREG,2);
3382 }
3383 emit_movimm((u_int)readmem,0);
3384 emit_addimm(cc<0?2:cc,2*stubs[n][6]+2,2);
3385#ifndef PCSX
3386 // pagefault address
3387 emit_movimm(start+stubs[n][3]*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
3388#endif
3389 emit_call((int)&indirect_jump_indexed);
3390 restore_regs(reglist);
3391
3392 emit_readword((int)&readmem_dword,temp2);
3393 int temp=addr; //hmh
3394 emit_shlimm(addr,3,temp);
3395 emit_andimm(temp,24,temp);
3396#ifdef BIG_ENDIAN_MIPS
3397 if (opcode[i]==0x2e) // SWR
3398#else
3399 if (opcode[i]==0x2a) // SWL
3400#endif
3401 emit_xorimm(temp,24,temp);
3402 emit_movimm(-1,HOST_TEMPREG);
3403 if (opcode[i]==0x2a) { // SWL
3404 emit_bic_lsr(temp2,HOST_TEMPREG,temp,temp2);
3405 emit_orrshr(rt,temp,temp2);
3406 }else{
3407 emit_bic_lsl(temp2,HOST_TEMPREG,temp,temp2);
3408 emit_orrshl(rt,temp,temp2);
3409 }
3410 emit_readword((int)&address,addr);
3411 emit_writeword(temp2,(int)&word);
3412 //save_regs(reglist); // don't need to, no state changes
3413 emit_shrimm(addr,16,1);
3414 emit_movimm((u_int)writemem,0);
3415 //emit_call((int)&indirect_jump_indexed);
3416 emit_mov(15,14);
3417 emit_readword_dualindexedx4(0,1,15);
3418 emit_readword((int)&Count,HOST_TEMPREG);
3419 emit_readword((int)&next_interupt,2);
3420 emit_addimm(HOST_TEMPREG,-2*stubs[n][6]-2,HOST_TEMPREG);
3421 emit_writeword(2,(int)&last_count);
3422 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
3423 if(cc<0) {
3424 emit_storereg(CCREG,HOST_TEMPREG);
3425 }
3426 restore_regs(reglist);
3427 emit_jmp(stubs[n][2]); // return address
3428#endif
3429}
3430
3431void printregs(int edi,int esi,int ebp,int esp,int b,int d,int c,int a)
3432{
3433 printf("regs: %x %x %x %x %x %x %x (%x)\n",a,b,c,d,ebp,esi,edi,(&edi)[-1]);
3434}
3435
3436do_invstub(int n)
3437{
3438 literal_pool(20);
3439 u_int reglist=stubs[n][3];
3440 set_jump_target(stubs[n][1],(int)out);
3441 save_regs(reglist);
3442 if(stubs[n][4]!=0) emit_mov(stubs[n][4],0);
3443 emit_call((int)&invalidate_addr);
3444 restore_regs(reglist);
3445 emit_jmp(stubs[n][2]); // return address
3446}
3447
3448int do_dirty_stub(int i)
3449{
3450 assem_debug("do_dirty_stub %x\n",start+i*4);
3451 u_int addr=(int)start<(int)0xC0000000?(u_int)source:(u_int)start;
3452 #ifdef PCSX
3453 addr=(u_int)source;
3454 #endif
3455 // Careful about the code output here, verify_dirty needs to parse it.
3456 #ifdef ARMv5_ONLY
3457 emit_loadlp(addr,1);
3458 emit_loadlp((int)copy,2);
3459 emit_loadlp(slen*4,3);
3460 #else
3461 emit_movw(addr&0x0000FFFF,1);
3462 emit_movw(((u_int)copy)&0x0000FFFF,2);
3463 emit_movt(addr&0xFFFF0000,1);
3464 emit_movt(((u_int)copy)&0xFFFF0000,2);
3465 emit_movw(slen*4,3);
3466 #endif
3467 emit_movimm(start+i*4,0);
3468 emit_call((int)start<(int)0xC0000000?(int)&verify_code:(int)&verify_code_vm);
3469 int entry=(int)out;
3470 load_regs_entry(i);
3471 if(entry==(int)out) entry=instr_addr[i];
3472 emit_jmp(instr_addr[i]);
3473 return entry;
3474}
3475
3476void do_dirty_stub_ds()
3477{
3478 // Careful about the code output here, verify_dirty needs to parse it.
3479 #ifdef ARMv5_ONLY
3480 emit_loadlp((int)start<(int)0xC0000000?(int)source:(int)start,1);
3481 emit_loadlp((int)copy,2);
3482 emit_loadlp(slen*4,3);
3483 #else
3484 emit_movw(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0x0000FFFF,1);
3485 emit_movw(((u_int)copy)&0x0000FFFF,2);
3486 emit_movt(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0xFFFF0000,1);
3487 emit_movt(((u_int)copy)&0xFFFF0000,2);
3488 emit_movw(slen*4,3);
3489 #endif
3490 emit_movimm(start+1,0);
3491 emit_call((int)&verify_code_ds);
3492}
3493
3494do_cop1stub(int n)
3495{
3496 literal_pool(256);
3497 assem_debug("do_cop1stub %x\n",start+stubs[n][3]*4);
3498 set_jump_target(stubs[n][1],(int)out);
3499 int i=stubs[n][3];
3500// int rs=stubs[n][4];
3501 struct regstat *i_regs=(struct regstat *)stubs[n][5];
3502 int ds=stubs[n][6];
3503 if(!ds) {
3504 load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty,i);
3505 //if(i_regs!=&regs[i]) printf("oops: regs[i]=%x i_regs=%x",(int)&regs[i],(int)i_regs);
3506 }
3507 //else {printf("fp exception in delay slot\n");}
3508 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty);
3509 if(regs[i].regmap_entry[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
3510 emit_movimm(start+(i-ds)*4,EAX); // Get PC
3511 emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]),HOST_CCREG); // CHECK: is this right? There should probably be an extra cycle...
3512 emit_jmp(ds?(int)fp_exception_ds:(int)fp_exception);
3513}
3514
3515#ifndef DISABLE_TLB
3516
3517/* TLB */
3518
3519int do_tlb_r(int s,int ar,int map,int x,int a,int shift,int c,u_int addr)
3520{
3521 if(c) {
3522 if((signed int)addr>=(signed int)0xC0000000) {
3523 // address_generation already loaded the const
3524 emit_readword_dualindexedx4(FP,map,map);
3525 }
3526 else
3527 return -1; // No mapping
3528 }
3529 else {
3530 assert(s!=map);
3531 emit_movimm(((int)memory_map-(int)&dynarec_local)>>2,map);
3532 emit_addsr12(map,s,map);
3533 // Schedule this while we wait on the load
3534 //if(x) emit_xorimm(s,x,ar);
3535 if(shift>=0) emit_shlimm(s,3,shift);
3536 if(~a) emit_andimm(s,a,ar);
3537 emit_readword_dualindexedx4(FP,map,map);
3538 }
3539 return map;
3540}
3541int do_tlb_r_branch(int map, int c, u_int addr, int *jaddr)
3542{
3543 if(!c||(signed int)addr>=(signed int)0xC0000000) {
3544 emit_test(map,map);
3545 *jaddr=(int)out;
3546 emit_js(0);
3547 }
3548 return map;
3549}
3550
3551int gen_tlb_addr_r(int ar, int map) {
3552 if(map>=0) {
3553 assem_debug("add %s,%s,%s lsl #2\n",regname[ar],regname[ar],regname[map]);
3554 output_w32(0xe0800100|rd_rn_rm(ar,ar,map));
3555 }
3556}
3557
3558int do_tlb_w(int s,int ar,int map,int x,int c,u_int addr)
3559{
3560 if(c) {
3561 if(addr<0x80800000||addr>=0xC0000000) {
3562 // address_generation already loaded the const
3563 emit_readword_dualindexedx4(FP,map,map);
3564 }
3565 else
3566 return -1; // No mapping
3567 }
3568 else {
3569 assert(s!=map);
3570 emit_movimm(((int)memory_map-(int)&dynarec_local)>>2,map);
3571 emit_addsr12(map,s,map);
3572 // Schedule this while we wait on the load
3573 //if(x) emit_xorimm(s,x,ar);
3574 emit_readword_dualindexedx4(FP,map,map);
3575 }
3576 return map;
3577}
3578int do_tlb_w_branch(int map, int c, u_int addr, int *jaddr)
3579{
3580 if(!c||addr<0x80800000||addr>=0xC0000000) {
3581 emit_testimm(map,0x40000000);
3582 *jaddr=(int)out;
3583 emit_jne(0);
3584 }
3585}
3586
3587int gen_tlb_addr_w(int ar, int map) {
3588 if(map>=0) {
3589 assem_debug("add %s,%s,%s lsl #2\n",regname[ar],regname[ar],regname[map]);
3590 output_w32(0xe0800100|rd_rn_rm(ar,ar,map));
3591 }
3592}
3593
3594// Generate the address of the memory_map entry, relative to dynarec_local
3595generate_map_const(u_int addr,int reg) {
3596 //printf("generate_map_const(%x,%s)\n",addr,regname[reg]);
3597 emit_movimm((addr>>12)+(((u_int)memory_map-(u_int)&dynarec_local)>>2),reg);
3598}
3599
3600#else
3601
3602static int do_tlb_r() { return 0; }
3603static int do_tlb_r_branch() { return 0; }
3604static int gen_tlb_addr_r() { return 0; }
3605static int do_tlb_w() { return 0; }
3606static int do_tlb_w_branch() { return 0; }
3607static int gen_tlb_addr_w() { return 0; }
3608
3609#endif // DISABLE_TLB
3610
3611/* Special assem */
3612
3613void shift_assemble_arm(int i,struct regstat *i_regs)
3614{
3615 if(rt1[i]) {
3616 if(opcode2[i]<=0x07) // SLLV/SRLV/SRAV
3617 {
3618 signed char s,t,shift;
3619 t=get_reg(i_regs->regmap,rt1[i]);
3620 s=get_reg(i_regs->regmap,rs1[i]);
3621 shift=get_reg(i_regs->regmap,rs2[i]);
3622 if(t>=0){
3623 if(rs1[i]==0)
3624 {
3625 emit_zeroreg(t);
3626 }
3627 else if(rs2[i]==0)
3628 {
3629 assert(s>=0);
3630 if(s!=t) emit_mov(s,t);
3631 }
3632 else
3633 {
3634 emit_andimm(shift,31,HOST_TEMPREG);
3635 if(opcode2[i]==4) // SLLV
3636 {
3637 emit_shl(s,HOST_TEMPREG,t);
3638 }
3639 if(opcode2[i]==6) // SRLV
3640 {
3641 emit_shr(s,HOST_TEMPREG,t);
3642 }
3643 if(opcode2[i]==7) // SRAV
3644 {
3645 emit_sar(s,HOST_TEMPREG,t);
3646 }
3647 }
3648 }
3649 } else { // DSLLV/DSRLV/DSRAV
3650 signed char sh,sl,th,tl,shift;
3651 th=get_reg(i_regs->regmap,rt1[i]|64);
3652 tl=get_reg(i_regs->regmap,rt1[i]);
3653 sh=get_reg(i_regs->regmap,rs1[i]|64);
3654 sl=get_reg(i_regs->regmap,rs1[i]);
3655 shift=get_reg(i_regs->regmap,rs2[i]);
3656 if(tl>=0){
3657 if(rs1[i]==0)
3658 {
3659 emit_zeroreg(tl);
3660 if(th>=0) emit_zeroreg(th);
3661 }
3662 else if(rs2[i]==0)
3663 {
3664 assert(sl>=0);
3665 if(sl!=tl) emit_mov(sl,tl);
3666 if(th>=0&&sh!=th) emit_mov(sh,th);
3667 }
3668 else
3669 {
3670 // FIXME: What if shift==tl ?
3671 assert(shift!=tl);
3672 int temp=get_reg(i_regs->regmap,-1);
3673 int real_th=th;
3674 if(th<0&&opcode2[i]!=0x14) {th=temp;} // DSLLV doesn't need a temporary register
3675 assert(sl>=0);
3676 assert(sh>=0);
3677 emit_andimm(shift,31,HOST_TEMPREG);
3678 if(opcode2[i]==0x14) // DSLLV
3679 {
3680 if(th>=0) emit_shl(sh,HOST_TEMPREG,th);
3681 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3682 emit_orrshr(sl,HOST_TEMPREG,th);
3683 emit_andimm(shift,31,HOST_TEMPREG);
3684 emit_testimm(shift,32);
3685 emit_shl(sl,HOST_TEMPREG,tl);
3686 if(th>=0) emit_cmovne_reg(tl,th);
3687 emit_cmovne_imm(0,tl);
3688 }
3689 if(opcode2[i]==0x16) // DSRLV
3690 {
3691 assert(th>=0);
3692 emit_shr(sl,HOST_TEMPREG,tl);
3693 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3694 emit_orrshl(sh,HOST_TEMPREG,tl);
3695 emit_andimm(shift,31,HOST_TEMPREG);
3696 emit_testimm(shift,32);
3697 emit_shr(sh,HOST_TEMPREG,th);
3698 emit_cmovne_reg(th,tl);
3699 if(real_th>=0) emit_cmovne_imm(0,th);
3700 }
3701 if(opcode2[i]==0x17) // DSRAV
3702 {
3703 assert(th>=0);
3704 emit_shr(sl,HOST_TEMPREG,tl);
3705 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3706 if(real_th>=0) {
3707 assert(temp>=0);
3708 emit_sarimm(th,31,temp);
3709 }
3710 emit_orrshl(sh,HOST_TEMPREG,tl);
3711 emit_andimm(shift,31,HOST_TEMPREG);
3712 emit_testimm(shift,32);
3713 emit_sar(sh,HOST_TEMPREG,th);
3714 emit_cmovne_reg(th,tl);
3715 if(real_th>=0) emit_cmovne_reg(temp,th);
3716 }
3717 }
3718 }
3719 }
3720 }
3721}
3722
3723#ifdef PCSX
3724static void speculate_mov(int rs,int rt)
3725{
3726 if(rt!=0) {
3727 smrv_strong_next|=1<<rt;
3728 smrv[rt]=smrv[rs];
3729 }
3730}
3731
3732static void speculate_mov_weak(int rs,int rt)
3733{
3734 if(rt!=0) {
3735 smrv_weak_next|=1<<rt;
3736 smrv[rt]=smrv[rs];
3737 }
3738}
3739
3740static void speculate_register_values(int i)
3741{
3742 if(i==0) {
3743 memcpy(smrv,psxRegs.GPR.r,sizeof(smrv));
3744 // gp,sp are likely to stay the same throughout the block
3745 smrv_strong_next=(1<<28)|(1<<29)|(1<<30);
3746 smrv_weak_next=~smrv_strong_next;
3747 //printf(" llr %08x\n", smrv[4]);
3748 }
3749 smrv_strong=smrv_strong_next;
3750 smrv_weak=smrv_weak_next;
3751 switch(itype[i]) {
3752 case ALU:
3753 if ((smrv_strong>>rs1[i])&1) speculate_mov(rs1[i],rt1[i]);
3754 else if((smrv_strong>>rs2[i])&1) speculate_mov(rs2[i],rt1[i]);
3755 else if((smrv_weak>>rs1[i])&1) speculate_mov_weak(rs1[i],rt1[i]);
3756 else if((smrv_weak>>rs2[i])&1) speculate_mov_weak(rs2[i],rt1[i]);
3757 else {
3758 smrv_strong_next&=~(1<<rt1[i]);
3759 smrv_weak_next&=~(1<<rt1[i]);
3760 }
3761 break;
3762 case SHIFTIMM:
3763 smrv_strong_next&=~(1<<rt1[i]);
3764 smrv_weak_next&=~(1<<rt1[i]);
3765 // fallthrough
3766 case IMM16:
3767 if(rt1[i]&&is_const(&regs[i],rt1[i])) {
3768 int value,hr=get_reg(regs[i].regmap,rt1[i]);
3769 if(hr>=0) {
3770 if(get_final_value(hr,i,&value))
3771 smrv[rt1[i]]=value;
3772 else smrv[rt1[i]]=constmap[i][hr];
3773 smrv_strong_next|=1<<rt1[i];
3774 }
3775 }
3776 else {
3777 if ((smrv_strong>>rs1[i])&1) speculate_mov(rs1[i],rt1[i]);
3778 else if((smrv_weak>>rs1[i])&1) speculate_mov_weak(rs1[i],rt1[i]);
3779 }
3780 break;
3781 case LOAD:
3782 if(start<0x2000&&(rt1[i]==26||(smrv[rt1[i]]>>24)==0xa0)) {
3783 // special case for BIOS
3784 smrv[rt1[i]]=0xa0000000;
3785 smrv_strong_next|=1<<rt1[i];
3786 break;
3787 }
3788 // fallthrough
3789 case SHIFT:
3790 case LOADLR:
3791 case MOV:
3792 smrv_strong_next&=~(1<<rt1[i]);
3793 smrv_weak_next&=~(1<<rt1[i]);
3794 break;
3795 case COP0:
3796 case COP2:
3797 if(opcode2[i]==0||opcode2[i]==2) { // MFC/CFC
3798 smrv_strong_next&=~(1<<rt1[i]);
3799 smrv_weak_next&=~(1<<rt1[i]);
3800 }
3801 break;
3802 case C2LS:
3803 if (opcode[i]==0x32) { // LWC2
3804 smrv_strong_next&=~(1<<rt1[i]);
3805 smrv_weak_next&=~(1<<rt1[i]);
3806 }
3807 break;
3808 }
3809#if 0
3810 int r=4;
3811 printf("x %08x %08x %d %d c %08x %08x\n",smrv[r],start+i*4,
3812 ((smrv_strong>>r)&1),(smrv_weak>>r)&1,regs[i].isconst,regs[i].wasconst);
3813#endif
3814}
3815
3816enum {
3817 MTYPE_8000 = 0,
3818 MTYPE_8020,
3819 MTYPE_0000,
3820 MTYPE_A000,
3821 MTYPE_1F80,
3822};
3823
3824static int get_ptr_mem_type(u_int a)
3825{
3826 if(a < 0x00200000) {
3827 if(a<0x1000&&((start>>20)==0xbfc||(start>>24)==0xa0))
3828 // return wrong, must use memhandler for BIOS self-test to pass
3829 // 007 does similar stuff from a00 mirror, weird stuff
3830 return MTYPE_8000;
3831 return MTYPE_0000;
3832 }
3833 if(0x1f800000 <= a && a < 0x1f801000)
3834 return MTYPE_1F80;
3835 if(0x80200000 <= a && a < 0x80800000)
3836 return MTYPE_8020;
3837 if(0xa0000000 <= a && a < 0xa0200000)
3838 return MTYPE_A000;
3839 return MTYPE_8000;
3840}
3841#endif
3842
3843static int emit_fastpath_cmp_jump(int i,int addr,int *addr_reg_override)
3844{
3845 int jaddr,type=0;
3846
3847#ifdef PCSX
3848 int mr=rs1[i];
3849 if(((smrv_strong|smrv_weak)>>mr)&1) {
3850 type=get_ptr_mem_type(smrv[mr]);
3851 //printf("set %08x @%08x r%d %d\n", smrv[mr], start+i*4, mr, type);
3852 }
3853 else {
3854 // use the mirror we are running on
3855 type=get_ptr_mem_type(start);
3856 //printf("set nospec @%08x r%d %d\n", start+i*4, mr, type);
3857 }
3858
3859 if(type==MTYPE_8020) { // RAM 80200000+ mirror
3860 emit_andimm(addr,~0x00e00000,HOST_TEMPREG);
3861 addr=*addr_reg_override=HOST_TEMPREG;
3862 type=0;
3863 }
3864 else if(type==MTYPE_0000) { // RAM 0 mirror
3865 emit_orimm(addr,0x80000000,HOST_TEMPREG);
3866 addr=*addr_reg_override=HOST_TEMPREG;
3867 type=0;
3868 }
3869 else if(type==MTYPE_A000) { // RAM A mirror
3870 emit_andimm(addr,~0x20000000,HOST_TEMPREG);
3871 addr=*addr_reg_override=HOST_TEMPREG;
3872 type=0;
3873 }
3874 else if(type==MTYPE_1F80) { // scratchpad
3875 emit_addimm(addr,-0x1f800000,HOST_TEMPREG);
3876 emit_cmpimm(HOST_TEMPREG,0x1000);
3877 jaddr=(int)out;
3878 emit_jc(0);
3879 }
3880#endif
3881
3882 if(type==0)
3883 {
3884 emit_cmpimm(addr,RAM_SIZE);
3885 jaddr=(int)out;
3886 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
3887 // Hint to branch predictor that the branch is unlikely to be taken
3888 if(rs1[i]>=28)
3889 emit_jno_unlikely(0);
3890 else
3891 #endif
3892 emit_jno(0);
3893 }
3894
3895 return jaddr;
3896}
3897
3898#define shift_assemble shift_assemble_arm
3899
3900void loadlr_assemble_arm(int i,struct regstat *i_regs)
3901{
3902 int s,th,tl,temp,temp2,addr,map=-1;
3903 int offset;
3904 int jaddr=0;
3905 int memtarget=0,c=0;
3906 int fastload_reg_override=0;
3907 u_int hr,reglist=0;
3908 th=get_reg(i_regs->regmap,rt1[i]|64);
3909 tl=get_reg(i_regs->regmap,rt1[i]);
3910 s=get_reg(i_regs->regmap,rs1[i]);
3911 temp=get_reg(i_regs->regmap,-1);
3912 temp2=get_reg(i_regs->regmap,FTEMP);
3913 addr=get_reg(i_regs->regmap,AGEN1+(i&1));
3914 assert(addr<0);
3915 offset=imm[i];
3916 for(hr=0;hr<HOST_REGS;hr++) {
3917 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
3918 }
3919 reglist|=1<<temp;
3920 if(offset||s<0||c) addr=temp2;
3921 else addr=s;
3922 if(s>=0) {
3923 c=(i_regs->wasconst>>s)&1;
3924 if(c) {
3925 memtarget=((signed int)(constmap[i][s]+offset))<(signed int)0x80000000+RAM_SIZE;
3926 if(using_tlb&&((signed int)(constmap[i][s]+offset))>=(signed int)0xC0000000) memtarget=1;
3927 }
3928 }
3929 if(!using_tlb) {
3930 if(!c) {
3931 #ifdef RAM_OFFSET
3932 map=get_reg(i_regs->regmap,ROREG);
3933 if(map<0) emit_loadreg(ROREG,map=HOST_TEMPREG);
3934 #endif
3935 emit_shlimm(addr,3,temp);
3936 if (opcode[i]==0x22||opcode[i]==0x26) {
3937 emit_andimm(addr,0xFFFFFFFC,temp2); // LWL/LWR
3938 }else{
3939 emit_andimm(addr,0xFFFFFFF8,temp2); // LDL/LDR
3940 }
3941 jaddr=emit_fastpath_cmp_jump(i,temp2,&fastload_reg_override);
3942 }
3943 else {
3944 if (opcode[i]==0x22||opcode[i]==0x26) {
3945 emit_movimm(((constmap[i][s]+offset)<<3)&24,temp); // LWL/LWR
3946 }else{
3947 emit_movimm(((constmap[i][s]+offset)<<3)&56,temp); // LDL/LDR
3948 }
3949 }
3950 }else{ // using tlb
3951 int a;
3952 if(c) {
3953 a=-1;
3954 }else if (opcode[i]==0x22||opcode[i]==0x26) {
3955 a=0xFFFFFFFC; // LWL/LWR
3956 }else{
3957 a=0xFFFFFFF8; // LDL/LDR
3958 }
3959 map=get_reg(i_regs->regmap,TLREG);
3960 assert(map>=0);
3961 reglist&=~(1<<map);
3962 map=do_tlb_r(addr,temp2,map,0,a,c?-1:temp,c,constmap[i][s]+offset);
3963 if(c) {
3964 if (opcode[i]==0x22||opcode[i]==0x26) {
3965 emit_movimm(((constmap[i][s]+offset)<<3)&24,temp); // LWL/LWR
3966 }else{
3967 emit_movimm(((constmap[i][s]+offset)<<3)&56,temp); // LDL/LDR
3968 }
3969 }
3970 do_tlb_r_branch(map,c,constmap[i][s]+offset,&jaddr);
3971 }
3972 if (opcode[i]==0x22||opcode[i]==0x26) { // LWL/LWR
3973 if(!c||memtarget) {
3974 int a=temp2;
3975 if(fastload_reg_override) a=fastload_reg_override;
3976 //emit_readword_indexed((int)rdram-0x80000000,temp2,temp2);
3977 emit_readword_indexed_tlb(0,a,map,temp2);
3978 if(jaddr) add_stub(LOADW_STUB,jaddr,(int)out,i,temp2,(int)i_regs,ccadj[i],reglist);
3979 }
3980 else
3981 inline_readstub(LOADW_STUB,i,(constmap[i][s]+offset)&0xFFFFFFFC,i_regs->regmap,FTEMP,ccadj[i],reglist);
3982 if(rt1[i]) {
3983 assert(tl>=0);
3984 emit_andimm(temp,24,temp);
3985#ifdef BIG_ENDIAN_MIPS
3986 if (opcode[i]==0x26) // LWR
3987#else
3988 if (opcode[i]==0x22) // LWL
3989#endif
3990 emit_xorimm(temp,24,temp);
3991 emit_movimm(-1,HOST_TEMPREG);
3992 if (opcode[i]==0x26) {
3993 emit_shr(temp2,temp,temp2);
3994 emit_bic_lsr(tl,HOST_TEMPREG,temp,tl);
3995 }else{
3996 emit_shl(temp2,temp,temp2);
3997 emit_bic_lsl(tl,HOST_TEMPREG,temp,tl);
3998 }
3999 emit_or(temp2,tl,tl);
4000 }
4001 //emit_storereg(rt1[i],tl); // DEBUG
4002 }
4003 if (opcode[i]==0x1A||opcode[i]==0x1B) { // LDL/LDR
4004 // FIXME: little endian, fastload_reg_override
4005 int temp2h=get_reg(i_regs->regmap,FTEMP|64);
4006 if(!c||memtarget) {
4007 //if(th>=0) emit_readword_indexed((int)rdram-0x80000000,temp2,temp2h);
4008 //emit_readword_indexed((int)rdram-0x7FFFFFFC,temp2,temp2);
4009 emit_readdword_indexed_tlb(0,temp2,map,temp2h,temp2);
4010 if(jaddr) add_stub(LOADD_STUB,jaddr,(int)out,i,temp2,(int)i_regs,ccadj[i],reglist);
4011 }
4012 else
4013 inline_readstub(LOADD_STUB,i,(constmap[i][s]+offset)&0xFFFFFFF8,i_regs->regmap,FTEMP,ccadj[i],reglist);
4014 if(rt1[i]) {
4015 assert(th>=0);
4016 assert(tl>=0);
4017 emit_testimm(temp,32);
4018 emit_andimm(temp,24,temp);
4019 if (opcode[i]==0x1A) { // LDL
4020 emit_rsbimm(temp,32,HOST_TEMPREG);
4021 emit_shl(temp2h,temp,temp2h);
4022 emit_orrshr(temp2,HOST_TEMPREG,temp2h);
4023 emit_movimm(-1,HOST_TEMPREG);
4024 emit_shl(temp2,temp,temp2);
4025 emit_cmove_reg(temp2h,th);
4026 emit_biceq_lsl(tl,HOST_TEMPREG,temp,tl);
4027 emit_bicne_lsl(th,HOST_TEMPREG,temp,th);
4028 emit_orreq(temp2,tl,tl);
4029 emit_orrne(temp2,th,th);
4030 }
4031 if (opcode[i]==0x1B) { // LDR
4032 emit_xorimm(temp,24,temp);
4033 emit_rsbimm(temp,32,HOST_TEMPREG);
4034 emit_shr(temp2,temp,temp2);
4035 emit_orrshl(temp2h,HOST_TEMPREG,temp2);
4036 emit_movimm(-1,HOST_TEMPREG);
4037 emit_shr(temp2h,temp,temp2h);
4038 emit_cmovne_reg(temp2,tl);
4039 emit_bicne_lsr(th,HOST_TEMPREG,temp,th);
4040 emit_biceq_lsr(tl,HOST_TEMPREG,temp,tl);
4041 emit_orrne(temp2h,th,th);
4042 emit_orreq(temp2h,tl,tl);
4043 }
4044 }
4045 }
4046}
4047#define loadlr_assemble loadlr_assemble_arm
4048
4049void cop0_assemble(int i,struct regstat *i_regs)
4050{
4051 if(opcode2[i]==0) // MFC0
4052 {
4053 signed char t=get_reg(i_regs->regmap,rt1[i]);
4054 char copr=(source[i]>>11)&0x1f;
4055 //assert(t>=0); // Why does this happen? OOT is weird
4056 if(t>=0&&rt1[i]!=0) {
4057#ifdef MUPEN64
4058 emit_addimm(FP,(int)&fake_pc-(int)&dynarec_local,0);
4059 emit_movimm((source[i]>>11)&0x1f,1);
4060 emit_writeword(0,(int)&PC);
4061 emit_writebyte(1,(int)&(fake_pc.f.r.nrd));
4062 if(copr==9) {
4063 emit_readword((int)&last_count,ECX);
4064 emit_loadreg(CCREG,HOST_CCREG); // TODO: do proper reg alloc
4065 emit_add(HOST_CCREG,ECX,HOST_CCREG);
4066 emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]),HOST_CCREG);
4067 emit_writeword(HOST_CCREG,(int)&Count);
4068 }
4069 emit_call((int)MFC0);
4070 emit_readword((int)&readmem_dword,t);
4071#else
4072 emit_readword((int)&reg_cop0+copr*4,t);
4073#endif
4074 }
4075 }
4076 else if(opcode2[i]==4) // MTC0
4077 {
4078 signed char s=get_reg(i_regs->regmap,rs1[i]);
4079 char copr=(source[i]>>11)&0x1f;
4080 assert(s>=0);
4081#ifdef MUPEN64
4082 emit_writeword(s,(int)&readmem_dword);
4083 wb_register(rs1[i],i_regs->regmap,i_regs->dirty,i_regs->is32);
4084 emit_addimm(FP,(int)&fake_pc-(int)&dynarec_local,0);
4085 emit_movimm((source[i]>>11)&0x1f,1);
4086 emit_writeword(0,(int)&PC);
4087 emit_writebyte(1,(int)&(fake_pc.f.r.nrd));
4088#else
4089 wb_register(rs1[i],i_regs->regmap,i_regs->dirty,i_regs->is32);
4090#endif
4091 if(copr==9||copr==11||copr==12||copr==13) {
4092 emit_readword((int)&last_count,HOST_TEMPREG);
4093 emit_loadreg(CCREG,HOST_CCREG); // TODO: do proper reg alloc
4094 emit_add(HOST_CCREG,HOST_TEMPREG,HOST_CCREG);
4095 emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]),HOST_CCREG);
4096 emit_writeword(HOST_CCREG,(int)&Count);
4097 }
4098 // What a mess. The status register (12) can enable interrupts,
4099 // so needs a special case to handle a pending interrupt.
4100 // The interrupt must be taken immediately, because a subsequent
4101 // instruction might disable interrupts again.
4102 if(copr==12||copr==13) {
4103#ifdef PCSX
4104 if (is_delayslot) {
4105 // burn cycles to cause cc_interrupt, which will
4106 // reschedule next_interupt. Relies on CCREG from above.
4107 assem_debug("MTC0 DS %d\n", copr);
4108 emit_writeword(HOST_CCREG,(int)&last_count);
4109 emit_movimm(0,HOST_CCREG);
4110 emit_storereg(CCREG,HOST_CCREG);
4111 if(s!=1)
4112 emit_mov(s,1);
4113 emit_movimm(copr,0);
4114 emit_call((int)pcsx_mtc0_ds);
4115 return;
4116 }
4117#endif
4118 emit_movimm(start+i*4+4,HOST_TEMPREG);
4119 emit_writeword(HOST_TEMPREG,(int)&pcaddr);
4120 emit_movimm(0,HOST_TEMPREG);
4121 emit_writeword(HOST_TEMPREG,(int)&pending_exception);
4122 }
4123 //else if(copr==12&&is_delayslot) emit_call((int)MTC0_R12);
4124 //else
4125#ifdef PCSX
4126 if(s!=1)
4127 emit_mov(s,1);
4128 emit_movimm(copr,0);
4129 emit_call((int)pcsx_mtc0);
4130#else
4131 emit_call((int)MTC0);
4132#endif
4133 if(copr==9||copr==11||copr==12||copr==13) {
4134 emit_readword((int)&Count,HOST_CCREG);
4135 emit_readword((int)&next_interupt,ECX);
4136 emit_addimm(HOST_CCREG,-CLOCK_ADJUST(ccadj[i]),HOST_CCREG);
4137 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
4138 emit_writeword(ECX,(int)&last_count);
4139 emit_storereg(CCREG,HOST_CCREG);
4140 }
4141 if(copr==12||copr==13) {
4142 assert(!is_delayslot);
4143 emit_readword((int)&pending_exception,14);
4144 }
4145 emit_loadreg(rs1[i],s);
4146 if(get_reg(i_regs->regmap,rs1[i]|64)>=0)
4147 emit_loadreg(rs1[i]|64,get_reg(i_regs->regmap,rs1[i]|64));
4148 if(copr==12||copr==13) {
4149 emit_test(14,14);
4150 emit_jne((int)&do_interrupt);
4151 }
4152 cop1_usable=0;
4153 }
4154 else
4155 {
4156 assert(opcode2[i]==0x10);
4157#ifndef DISABLE_TLB
4158 if((source[i]&0x3f)==0x01) // TLBR
4159 emit_call((int)TLBR);
4160 if((source[i]&0x3f)==0x02) // TLBWI
4161 emit_call((int)TLBWI_new);
4162 if((source[i]&0x3f)==0x06) { // TLBWR
4163 // The TLB entry written by TLBWR is dependent on the count,
4164 // so update the cycle count
4165 emit_readword((int)&last_count,ECX);
4166 if(i_regs->regmap[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
4167 emit_add(HOST_CCREG,ECX,HOST_CCREG);
4168 emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]),HOST_CCREG);
4169 emit_writeword(HOST_CCREG,(int)&Count);
4170 emit_call((int)TLBWR_new);
4171 }
4172 if((source[i]&0x3f)==0x08) // TLBP
4173 emit_call((int)TLBP);
4174#endif
4175#ifdef PCSX
4176 if((source[i]&0x3f)==0x10) // RFE
4177 {
4178 emit_readword((int)&Status,0);
4179 emit_andimm(0,0x3c,1);
4180 emit_andimm(0,~0xf,0);
4181 emit_orrshr_imm(1,2,0);
4182 emit_writeword(0,(int)&Status);
4183 }
4184#else
4185 if((source[i]&0x3f)==0x18) // ERET
4186 {
4187 int count=ccadj[i];
4188 if(i_regs->regmap[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
4189 emit_addimm(HOST_CCREG,CLOCK_ADJUST(count),HOST_CCREG); // TODO: Should there be an extra cycle here?
4190 emit_jmp((int)jump_eret);
4191 }
4192#endif
4193 }
4194}
4195
4196static void cop2_get_dreg(u_int copr,signed char tl,signed char temp)
4197{
4198 switch (copr) {
4199 case 1:
4200 case 3:
4201 case 5:
4202 case 8:
4203 case 9:
4204 case 10:
4205 case 11:
4206 emit_readword((int)&reg_cop2d[copr],tl);
4207 emit_signextend16(tl,tl);
4208 emit_writeword(tl,(int)&reg_cop2d[copr]); // hmh
4209 break;
4210 case 7:
4211 case 16:
4212 case 17:
4213 case 18:
4214 case 19:
4215 emit_readword((int)&reg_cop2d[copr],tl);
4216 emit_andimm(tl,0xffff,tl);
4217 emit_writeword(tl,(int)&reg_cop2d[copr]);
4218 break;
4219 case 15:
4220 emit_readword((int)&reg_cop2d[14],tl); // SXY2
4221 emit_writeword(tl,(int)&reg_cop2d[copr]);
4222 break;
4223 case 28:
4224 case 29:
4225 emit_readword((int)&reg_cop2d[9],temp);
4226 emit_testimm(temp,0x8000); // do we need this?
4227 emit_andimm(temp,0xf80,temp);
4228 emit_andne_imm(temp,0,temp);
4229 emit_shrimm(temp,7,tl);
4230 emit_readword((int)&reg_cop2d[10],temp);
4231 emit_testimm(temp,0x8000);
4232 emit_andimm(temp,0xf80,temp);
4233 emit_andne_imm(temp,0,temp);
4234 emit_orrshr_imm(temp,2,tl);
4235 emit_readword((int)&reg_cop2d[11],temp);
4236 emit_testimm(temp,0x8000);
4237 emit_andimm(temp,0xf80,temp);
4238 emit_andne_imm(temp,0,temp);
4239 emit_orrshl_imm(temp,3,tl);
4240 emit_writeword(tl,(int)&reg_cop2d[copr]);
4241 break;
4242 default:
4243 emit_readword((int)&reg_cop2d[copr],tl);
4244 break;
4245 }
4246}
4247
4248static void cop2_put_dreg(u_int copr,signed char sl,signed char temp)
4249{
4250 switch (copr) {
4251 case 15:
4252 emit_readword((int)&reg_cop2d[13],temp); // SXY1
4253 emit_writeword(sl,(int)&reg_cop2d[copr]);
4254 emit_writeword(temp,(int)&reg_cop2d[12]); // SXY0
4255 emit_readword((int)&reg_cop2d[14],temp); // SXY2
4256 emit_writeword(sl,(int)&reg_cop2d[14]);
4257 emit_writeword(temp,(int)&reg_cop2d[13]); // SXY1
4258 break;
4259 case 28:
4260 emit_andimm(sl,0x001f,temp);
4261 emit_shlimm(temp,7,temp);
4262 emit_writeword(temp,(int)&reg_cop2d[9]);
4263 emit_andimm(sl,0x03e0,temp);
4264 emit_shlimm(temp,2,temp);
4265 emit_writeword(temp,(int)&reg_cop2d[10]);
4266 emit_andimm(sl,0x7c00,temp);
4267 emit_shrimm(temp,3,temp);
4268 emit_writeword(temp,(int)&reg_cop2d[11]);
4269 emit_writeword(sl,(int)&reg_cop2d[28]);
4270 break;
4271 case 30:
4272 emit_movs(sl,temp);
4273 emit_mvnmi(temp,temp);
4274 emit_clz(temp,temp);
4275 emit_writeword(sl,(int)&reg_cop2d[30]);
4276 emit_writeword(temp,(int)&reg_cop2d[31]);
4277 break;
4278 case 31:
4279 break;
4280 default:
4281 emit_writeword(sl,(int)&reg_cop2d[copr]);
4282 break;
4283 }
4284}
4285
4286void cop2_assemble(int i,struct regstat *i_regs)
4287{
4288 u_int copr=(source[i]>>11)&0x1f;
4289 signed char temp=get_reg(i_regs->regmap,-1);
4290 if (opcode2[i]==0) { // MFC2
4291 signed char tl=get_reg(i_regs->regmap,rt1[i]);
4292 if(tl>=0&&rt1[i]!=0)
4293 cop2_get_dreg(copr,tl,temp);
4294 }
4295 else if (opcode2[i]==4) { // MTC2
4296 signed char sl=get_reg(i_regs->regmap,rs1[i]);
4297 cop2_put_dreg(copr,sl,temp);
4298 }
4299 else if (opcode2[i]==2) // CFC2
4300 {
4301 signed char tl=get_reg(i_regs->regmap,rt1[i]);
4302 if(tl>=0&&rt1[i]!=0)
4303 emit_readword((int)&reg_cop2c[copr],tl);
4304 }
4305 else if (opcode2[i]==6) // CTC2
4306 {
4307 signed char sl=get_reg(i_regs->regmap,rs1[i]);
4308 switch(copr) {
4309 case 4:
4310 case 12:
4311 case 20:
4312 case 26:
4313 case 27:
4314 case 29:
4315 case 30:
4316 emit_signextend16(sl,temp);
4317 break;
4318 case 31:
4319 //value = value & 0x7ffff000;
4320 //if (value & 0x7f87e000) value |= 0x80000000;
4321 emit_shrimm(sl,12,temp);
4322 emit_shlimm(temp,12,temp);
4323 emit_testimm(temp,0x7f000000);
4324 emit_testeqimm(temp,0x00870000);
4325 emit_testeqimm(temp,0x0000e000);
4326 emit_orrne_imm(temp,0x80000000,temp);
4327 break;
4328 default:
4329 temp=sl;
4330 break;
4331 }
4332 emit_writeword(temp,(int)&reg_cop2c[copr]);
4333 assert(sl>=0);
4334 }
4335}
4336
4337void c2op_assemble(int i,struct regstat *i_regs)
4338{
4339 signed char temp=get_reg(i_regs->regmap,-1);
4340 u_int c2op=source[i]&0x3f;
4341 u_int hr,reglist=0;
4342 int need_flags;
4343 for(hr=0;hr<HOST_REGS;hr++) {
4344 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
4345 }
4346 if(i==0||itype[i-1]!=C2OP)
4347 save_regs(reglist);
4348
4349 if (gte_handlers[c2op]!=NULL) {
4350 int cc=get_reg(i_regs->regmap,CCREG);
4351 emit_movimm(source[i],1); // opcode
4352 if (cc>=0&&gte_cycletab[c2op])
4353 emit_addimm(cc,gte_cycletab[c2op]/2,cc); // XXX: could just adjust ccadj?
4354 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0); // cop2 regs
4355 emit_writeword(1,(int)&psxRegs.code);
4356 need_flags=!(gte_unneeded[i+1]>>63); // +1 because of how liveness detection works
4357 assem_debug("gte unneeded %016llx, need_flags %d\n",gte_unneeded[i+1],need_flags);
4358#ifdef ARMv5_ONLY
4359 // let's take more risk here
4360 need_flags=need_flags&&gte_reads_flags;
4361#endif
4362 emit_call((int)(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]));
4363 }
4364
4365 if(i>=slen-1||itype[i+1]!=C2OP)
4366 restore_regs(reglist);
4367}
4368
4369void cop1_unusable(int i,struct regstat *i_regs)
4370{
4371 // XXX: should just just do the exception instead
4372 if(!cop1_usable) {
4373 int jaddr=(int)out;
4374 emit_jmp(0);
4375 add_stub(FP_STUB,jaddr,(int)out,i,0,(int)i_regs,is_delayslot,0);
4376 cop1_usable=1;
4377 }
4378}
4379
4380void cop1_assemble(int i,struct regstat *i_regs)
4381{
4382#ifndef DISABLE_COP1
4383 // Check cop1 unusable
4384 if(!cop1_usable) {
4385 signed char rs=get_reg(i_regs->regmap,CSREG);
4386 assert(rs>=0);
4387 emit_testimm(rs,0x20000000);
4388 int jaddr=(int)out;
4389 emit_jeq(0);
4390 add_stub(FP_STUB,jaddr,(int)out,i,rs,(int)i_regs,is_delayslot,0);
4391 cop1_usable=1;
4392 }
4393 if (opcode2[i]==0) { // MFC1
4394 signed char tl=get_reg(i_regs->regmap,rt1[i]);
4395 if(tl>=0) {
4396 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],tl);
4397 emit_readword_indexed(0,tl,tl);
4398 }
4399 }
4400 else if (opcode2[i]==1) { // DMFC1
4401 signed char tl=get_reg(i_regs->regmap,rt1[i]);
4402 signed char th=get_reg(i_regs->regmap,rt1[i]|64);
4403 if(tl>=0) {
4404 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],tl);
4405 if(th>=0) emit_readword_indexed(4,tl,th);
4406 emit_readword_indexed(0,tl,tl);
4407 }
4408 }
4409 else if (opcode2[i]==4) { // MTC1
4410 signed char sl=get_reg(i_regs->regmap,rs1[i]);
4411 signed char temp=get_reg(i_regs->regmap,-1);
4412 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4413 emit_writeword_indexed(sl,0,temp);
4414 }
4415 else if (opcode2[i]==5) { // DMTC1
4416 signed char sl=get_reg(i_regs->regmap,rs1[i]);
4417 signed char sh=rs1[i]>0?get_reg(i_regs->regmap,rs1[i]|64):sl;
4418 signed char temp=get_reg(i_regs->regmap,-1);
4419 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
4420 emit_writeword_indexed(sh,4,temp);
4421 emit_writeword_indexed(sl,0,temp);
4422 }
4423 else if (opcode2[i]==2) // CFC1
4424 {
4425 signed char tl=get_reg(i_regs->regmap,rt1[i]);
4426 if(tl>=0) {
4427 u_int copr=(source[i]>>11)&0x1f;
4428 if(copr==0) emit_readword((int)&FCR0,tl);
4429 if(copr==31) emit_readword((int)&FCR31,tl);
4430 }
4431 }
4432 else if (opcode2[i]==6) // CTC1
4433 {
4434 signed char sl=get_reg(i_regs->regmap,rs1[i]);
4435 u_int copr=(source[i]>>11)&0x1f;
4436 assert(sl>=0);
4437 if(copr==31)
4438 {
4439 emit_writeword(sl,(int)&FCR31);
4440 // Set the rounding mode
4441 //FIXME
4442 //char temp=get_reg(i_regs->regmap,-1);
4443 //emit_andimm(sl,3,temp);
4444 //emit_fldcw_indexed((int)&rounding_modes,temp);
4445 }
4446 }
4447#else
4448 cop1_unusable(i, i_regs);
4449#endif
4450}
4451
4452void fconv_assemble_arm(int i,struct regstat *i_regs)
4453{
4454#ifndef DISABLE_COP1
4455 signed char temp=get_reg(i_regs->regmap,-1);
4456 assert(temp>=0);
4457 // Check cop1 unusable
4458 if(!cop1_usable) {
4459 signed char rs=get_reg(i_regs->regmap,CSREG);
4460 assert(rs>=0);
4461 emit_testimm(rs,0x20000000);
4462 int jaddr=(int)out;
4463 emit_jeq(0);
4464 add_stub(FP_STUB,jaddr,(int)out,i,rs,(int)i_regs,is_delayslot,0);
4465 cop1_usable=1;
4466 }
4467
4468 #if(defined(__VFP_FP__) && !defined(__SOFTFP__))
4469 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0d) { // trunc_w_s
4470 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4471 emit_flds(temp,15);
4472 emit_ftosizs(15,15); // float->int, truncate
4473 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f))
4474 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
4475 emit_fsts(15,temp);
4476 return;
4477 }
4478 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0d) { // trunc_w_d
4479 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
4480 emit_vldr(temp,7);
4481 emit_ftosizd(7,13); // double->int, truncate
4482 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
4483 emit_fsts(13,temp);
4484 return;
4485 }
4486
4487 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x20) { // cvt_s_w
4488 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4489 emit_flds(temp,13);
4490 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f))
4491 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
4492 emit_fsitos(13,15);
4493 emit_fsts(15,temp);
4494 return;
4495 }
4496 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x21) { // cvt_d_w
4497 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4498 emit_flds(temp,13);
4499 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
4500 emit_fsitod(13,7);
4501 emit_vstr(7,temp);
4502 return;
4503 }
4504
4505 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x21) { // cvt_d_s
4506 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4507 emit_flds(temp,13);
4508 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
4509 emit_fcvtds(13,7);
4510 emit_vstr(7,temp);
4511 return;
4512 }
4513 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x20) { // cvt_s_d
4514 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
4515 emit_vldr(temp,7);
4516 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
4517 emit_fcvtsd(7,13);
4518 emit_fsts(13,temp);
4519 return;
4520 }
4521 #endif
4522
4523 // C emulation code
4524
4525 u_int hr,reglist=0;
4526 for(hr=0;hr<HOST_REGS;hr++) {
4527 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
4528 }
4529 save_regs(reglist);
4530
4531 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x20) {
4532 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4533 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4534 emit_call((int)cvt_s_w);
4535 }
4536 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x21) {
4537 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4538 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4539 emit_call((int)cvt_d_w);
4540 }
4541 if(opcode2[i]==0x15&&(source[i]&0x3f)==0x20) {
4542 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4543 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4544 emit_call((int)cvt_s_l);
4545 }
4546 if(opcode2[i]==0x15&&(source[i]&0x3f)==0x21) {
4547 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4548 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4549 emit_call((int)cvt_d_l);
4550 }
4551
4552 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x21) {
4553 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4554 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4555 emit_call((int)cvt_d_s);
4556 }
4557 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x24) {
4558 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4559 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4560 emit_call((int)cvt_w_s);
4561 }
4562 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x25) {
4563 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4564 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4565 emit_call((int)cvt_l_s);
4566 }
4567
4568 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x20) {
4569 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4570 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4571 emit_call((int)cvt_s_d);
4572 }
4573 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x24) {
4574 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4575 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4576 emit_call((int)cvt_w_d);
4577 }
4578 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x25) {
4579 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4580 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4581 emit_call((int)cvt_l_d);
4582 }
4583
4584 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x08) {
4585 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4586 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4587 emit_call((int)round_l_s);
4588 }
4589 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x09) {
4590 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4591 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4592 emit_call((int)trunc_l_s);
4593 }
4594 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0a) {
4595 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4596 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4597 emit_call((int)ceil_l_s);
4598 }
4599 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0b) {
4600 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4601 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4602 emit_call((int)floor_l_s);
4603 }
4604 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0c) {
4605 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4606 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4607 emit_call((int)round_w_s);
4608 }
4609 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0d) {
4610 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4611 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4612 emit_call((int)trunc_w_s);
4613 }
4614 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0e) {
4615 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4616 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4617 emit_call((int)ceil_w_s);
4618 }
4619 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0f) {
4620 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4621 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4622 emit_call((int)floor_w_s);
4623 }
4624
4625 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x08) {
4626 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4627 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4628 emit_call((int)round_l_d);
4629 }
4630 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x09) {
4631 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4632 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4633 emit_call((int)trunc_l_d);
4634 }
4635 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0a) {
4636 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4637 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4638 emit_call((int)ceil_l_d);
4639 }
4640 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0b) {
4641 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4642 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4643 emit_call((int)floor_l_d);
4644 }
4645 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0c) {
4646 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4647 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4648 emit_call((int)round_w_d);
4649 }
4650 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0d) {
4651 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4652 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4653 emit_call((int)trunc_w_d);
4654 }
4655 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0e) {
4656 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4657 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4658 emit_call((int)ceil_w_d);
4659 }
4660 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0f) {
4661 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4662 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4663 emit_call((int)floor_w_d);
4664 }
4665
4666 restore_regs(reglist);
4667#else
4668 cop1_unusable(i, i_regs);
4669#endif
4670}
4671#define fconv_assemble fconv_assemble_arm
4672
4673void fcomp_assemble(int i,struct regstat *i_regs)
4674{
4675#ifndef DISABLE_COP1
4676 signed char fs=get_reg(i_regs->regmap,FSREG);
4677 signed char temp=get_reg(i_regs->regmap,-1);
4678 assert(temp>=0);
4679 // Check cop1 unusable
4680 if(!cop1_usable) {
4681 signed char cs=get_reg(i_regs->regmap,CSREG);
4682 assert(cs>=0);
4683 emit_testimm(cs,0x20000000);
4684 int jaddr=(int)out;
4685 emit_jeq(0);
4686 add_stub(FP_STUB,jaddr,(int)out,i,cs,(int)i_regs,is_delayslot,0);
4687 cop1_usable=1;
4688 }
4689
4690 if((source[i]&0x3f)==0x30) {
4691 emit_andimm(fs,~0x800000,fs);
4692 return;
4693 }
4694
4695 if((source[i]&0x3e)==0x38) {
4696 // sf/ngle - these should throw exceptions for NaNs
4697 emit_andimm(fs,~0x800000,fs);
4698 return;
4699 }
4700
4701 #if(defined(__VFP_FP__) && !defined(__SOFTFP__))
4702 if(opcode2[i]==0x10) {
4703 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4704 emit_readword((int)&reg_cop1_simple[(source[i]>>16)&0x1f],HOST_TEMPREG);
4705 emit_orimm(fs,0x800000,fs);
4706 emit_flds(temp,14);
4707 emit_flds(HOST_TEMPREG,15);
4708 emit_fcmps(14,15);
4709 emit_fmstat();
4710 if((source[i]&0x3f)==0x31) emit_bicvc_imm(fs,0x800000,fs); // c_un_s
4711 if((source[i]&0x3f)==0x32) emit_bicne_imm(fs,0x800000,fs); // c_eq_s
4712 if((source[i]&0x3f)==0x33) {emit_bicne_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ueq_s
4713 if((source[i]&0x3f)==0x34) emit_biccs_imm(fs,0x800000,fs); // c_olt_s
4714 if((source[i]&0x3f)==0x35) {emit_biccs_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ult_s
4715 if((source[i]&0x3f)==0x36) emit_bichi_imm(fs,0x800000,fs); // c_ole_s
4716 if((source[i]&0x3f)==0x37) {emit_bichi_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ule_s
4717 if((source[i]&0x3f)==0x3a) emit_bicne_imm(fs,0x800000,fs); // c_seq_s
4718 if((source[i]&0x3f)==0x3b) emit_bicne_imm(fs,0x800000,fs); // c_ngl_s
4719 if((source[i]&0x3f)==0x3c) emit_biccs_imm(fs,0x800000,fs); // c_lt_s
4720 if((source[i]&0x3f)==0x3d) emit_biccs_imm(fs,0x800000,fs); // c_nge_s
4721 if((source[i]&0x3f)==0x3e) emit_bichi_imm(fs,0x800000,fs); // c_le_s
4722 if((source[i]&0x3f)==0x3f) emit_bichi_imm(fs,0x800000,fs); // c_ngt_s
4723 return;
4724 }
4725 if(opcode2[i]==0x11) {
4726 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
4727 emit_readword((int)&reg_cop1_double[(source[i]>>16)&0x1f],HOST_TEMPREG);
4728 emit_orimm(fs,0x800000,fs);
4729 emit_vldr(temp,6);
4730 emit_vldr(HOST_TEMPREG,7);
4731 emit_fcmpd(6,7);
4732 emit_fmstat();
4733 if((source[i]&0x3f)==0x31) emit_bicvc_imm(fs,0x800000,fs); // c_un_d
4734 if((source[i]&0x3f)==0x32) emit_bicne_imm(fs,0x800000,fs); // c_eq_d
4735 if((source[i]&0x3f)==0x33) {emit_bicne_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ueq_d
4736 if((source[i]&0x3f)==0x34) emit_biccs_imm(fs,0x800000,fs); // c_olt_d
4737 if((source[i]&0x3f)==0x35) {emit_biccs_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ult_d
4738 if((source[i]&0x3f)==0x36) emit_bichi_imm(fs,0x800000,fs); // c_ole_d
4739 if((source[i]&0x3f)==0x37) {emit_bichi_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ule_d
4740 if((source[i]&0x3f)==0x3a) emit_bicne_imm(fs,0x800000,fs); // c_seq_d
4741 if((source[i]&0x3f)==0x3b) emit_bicne_imm(fs,0x800000,fs); // c_ngl_d
4742 if((source[i]&0x3f)==0x3c) emit_biccs_imm(fs,0x800000,fs); // c_lt_d
4743 if((source[i]&0x3f)==0x3d) emit_biccs_imm(fs,0x800000,fs); // c_nge_d
4744 if((source[i]&0x3f)==0x3e) emit_bichi_imm(fs,0x800000,fs); // c_le_d
4745 if((source[i]&0x3f)==0x3f) emit_bichi_imm(fs,0x800000,fs); // c_ngt_d
4746 return;
4747 }
4748 #endif
4749
4750 // C only
4751
4752 u_int hr,reglist=0;
4753 for(hr=0;hr<HOST_REGS;hr++) {
4754 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
4755 }
4756 reglist&=~(1<<fs);
4757 save_regs(reglist);
4758 if(opcode2[i]==0x10) {
4759 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4760 emit_readword((int)&reg_cop1_simple[(source[i]>>16)&0x1f],ARG2_REG);
4761 if((source[i]&0x3f)==0x30) emit_call((int)c_f_s);
4762 if((source[i]&0x3f)==0x31) emit_call((int)c_un_s);
4763 if((source[i]&0x3f)==0x32) emit_call((int)c_eq_s);
4764 if((source[i]&0x3f)==0x33) emit_call((int)c_ueq_s);
4765 if((source[i]&0x3f)==0x34) emit_call((int)c_olt_s);
4766 if((source[i]&0x3f)==0x35) emit_call((int)c_ult_s);
4767 if((source[i]&0x3f)==0x36) emit_call((int)c_ole_s);
4768 if((source[i]&0x3f)==0x37) emit_call((int)c_ule_s);
4769 if((source[i]&0x3f)==0x38) emit_call((int)c_sf_s);
4770 if((source[i]&0x3f)==0x39) emit_call((int)c_ngle_s);
4771 if((source[i]&0x3f)==0x3a) emit_call((int)c_seq_s);
4772 if((source[i]&0x3f)==0x3b) emit_call((int)c_ngl_s);
4773 if((source[i]&0x3f)==0x3c) emit_call((int)c_lt_s);
4774 if((source[i]&0x3f)==0x3d) emit_call((int)c_nge_s);
4775 if((source[i]&0x3f)==0x3e) emit_call((int)c_le_s);
4776 if((source[i]&0x3f)==0x3f) emit_call((int)c_ngt_s);
4777 }
4778 if(opcode2[i]==0x11) {
4779 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4780 emit_readword((int)&reg_cop1_double[(source[i]>>16)&0x1f],ARG2_REG);
4781 if((source[i]&0x3f)==0x30) emit_call((int)c_f_d);
4782 if((source[i]&0x3f)==0x31) emit_call((int)c_un_d);
4783 if((source[i]&0x3f)==0x32) emit_call((int)c_eq_d);
4784 if((source[i]&0x3f)==0x33) emit_call((int)c_ueq_d);
4785 if((source[i]&0x3f)==0x34) emit_call((int)c_olt_d);
4786 if((source[i]&0x3f)==0x35) emit_call((int)c_ult_d);
4787 if((source[i]&0x3f)==0x36) emit_call((int)c_ole_d);
4788 if((source[i]&0x3f)==0x37) emit_call((int)c_ule_d);
4789 if((source[i]&0x3f)==0x38) emit_call((int)c_sf_d);
4790 if((source[i]&0x3f)==0x39) emit_call((int)c_ngle_d);
4791 if((source[i]&0x3f)==0x3a) emit_call((int)c_seq_d);
4792 if((source[i]&0x3f)==0x3b) emit_call((int)c_ngl_d);
4793 if((source[i]&0x3f)==0x3c) emit_call((int)c_lt_d);
4794 if((source[i]&0x3f)==0x3d) emit_call((int)c_nge_d);
4795 if((source[i]&0x3f)==0x3e) emit_call((int)c_le_d);
4796 if((source[i]&0x3f)==0x3f) emit_call((int)c_ngt_d);
4797 }
4798 restore_regs(reglist);
4799 emit_loadreg(FSREG,fs);
4800#else
4801 cop1_unusable(i, i_regs);
4802#endif
4803}
4804
4805void float_assemble(int i,struct regstat *i_regs)
4806{
4807#ifndef DISABLE_COP1
4808 signed char temp=get_reg(i_regs->regmap,-1);
4809 assert(temp>=0);
4810 // Check cop1 unusable
4811 if(!cop1_usable) {
4812 signed char cs=get_reg(i_regs->regmap,CSREG);
4813 assert(cs>=0);
4814 emit_testimm(cs,0x20000000);
4815 int jaddr=(int)out;
4816 emit_jeq(0);
4817 add_stub(FP_STUB,jaddr,(int)out,i,cs,(int)i_regs,is_delayslot,0);
4818 cop1_usable=1;
4819 }
4820
4821 #if(defined(__VFP_FP__) && !defined(__SOFTFP__))
4822 if((source[i]&0x3f)==6) // mov
4823 {
4824 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4825 if(opcode2[i]==0x10) {
4826 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4827 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],HOST_TEMPREG);
4828 emit_readword_indexed(0,temp,temp);
4829 emit_writeword_indexed(temp,0,HOST_TEMPREG);
4830 }
4831 if(opcode2[i]==0x11) {
4832 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
4833 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],HOST_TEMPREG);
4834 emit_vldr(temp,7);
4835 emit_vstr(7,HOST_TEMPREG);
4836 }
4837 }
4838 return;
4839 }
4840
4841 if((source[i]&0x3f)>3)
4842 {
4843 if(opcode2[i]==0x10) {
4844 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4845 emit_flds(temp,15);
4846 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4847 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
4848 }
4849 if((source[i]&0x3f)==4) // sqrt
4850 emit_fsqrts(15,15);
4851 if((source[i]&0x3f)==5) // abs
4852 emit_fabss(15,15);
4853 if((source[i]&0x3f)==7) // neg
4854 emit_fnegs(15,15);
4855 emit_fsts(15,temp);
4856 }
4857 if(opcode2[i]==0x11) {
4858 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
4859 emit_vldr(temp,7);
4860 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4861 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
4862 }
4863 if((source[i]&0x3f)==4) // sqrt
4864 emit_fsqrtd(7,7);
4865 if((source[i]&0x3f)==5) // abs
4866 emit_fabsd(7,7);
4867 if((source[i]&0x3f)==7) // neg
4868 emit_fnegd(7,7);
4869 emit_vstr(7,temp);
4870 }
4871 return;
4872 }
4873 if((source[i]&0x3f)<4)
4874 {
4875 if(opcode2[i]==0x10) {
4876 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4877 }
4878 if(opcode2[i]==0x11) {
4879 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
4880 }
4881 if(((source[i]>>11)&0x1f)!=((source[i]>>16)&0x1f)) {
4882 if(opcode2[i]==0x10) {
4883 emit_readword((int)&reg_cop1_simple[(source[i]>>16)&0x1f],HOST_TEMPREG);
4884 emit_flds(temp,15);
4885 emit_flds(HOST_TEMPREG,13);
4886 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4887 if(((source[i]>>16)&0x1f)!=((source[i]>>6)&0x1f)) {
4888 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
4889 }
4890 }
4891 if((source[i]&0x3f)==0) emit_fadds(15,13,15);
4892 if((source[i]&0x3f)==1) emit_fsubs(15,13,15);
4893 if((source[i]&0x3f)==2) emit_fmuls(15,13,15);
4894 if((source[i]&0x3f)==3) emit_fdivs(15,13,15);
4895 if(((source[i]>>16)&0x1f)==((source[i]>>6)&0x1f)) {
4896 emit_fsts(15,HOST_TEMPREG);
4897 }else{
4898 emit_fsts(15,temp);
4899 }
4900 }
4901 else if(opcode2[i]==0x11) {
4902 emit_readword((int)&reg_cop1_double[(source[i]>>16)&0x1f],HOST_TEMPREG);
4903 emit_vldr(temp,7);
4904 emit_vldr(HOST_TEMPREG,6);
4905 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4906 if(((source[i]>>16)&0x1f)!=((source[i]>>6)&0x1f)) {
4907 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
4908 }
4909 }
4910 if((source[i]&0x3f)==0) emit_faddd(7,6,7);
4911 if((source[i]&0x3f)==1) emit_fsubd(7,6,7);
4912 if((source[i]&0x3f)==2) emit_fmuld(7,6,7);
4913 if((source[i]&0x3f)==3) emit_fdivd(7,6,7);
4914 if(((source[i]>>16)&0x1f)==((source[i]>>6)&0x1f)) {
4915 emit_vstr(7,HOST_TEMPREG);
4916 }else{
4917 emit_vstr(7,temp);
4918 }
4919 }
4920 }
4921 else {
4922 if(opcode2[i]==0x10) {
4923 emit_flds(temp,15);
4924 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4925 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
4926 }
4927 if((source[i]&0x3f)==0) emit_fadds(15,15,15);
4928 if((source[i]&0x3f)==1) emit_fsubs(15,15,15);
4929 if((source[i]&0x3f)==2) emit_fmuls(15,15,15);
4930 if((source[i]&0x3f)==3) emit_fdivs(15,15,15);
4931 emit_fsts(15,temp);
4932 }
4933 else if(opcode2[i]==0x11) {
4934 emit_vldr(temp,7);
4935 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4936 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
4937 }
4938 if((source[i]&0x3f)==0) emit_faddd(7,7,7);
4939 if((source[i]&0x3f)==1) emit_fsubd(7,7,7);
4940 if((source[i]&0x3f)==2) emit_fmuld(7,7,7);
4941 if((source[i]&0x3f)==3) emit_fdivd(7,7,7);
4942 emit_vstr(7,temp);
4943 }
4944 }
4945 return;
4946 }
4947 #endif
4948
4949 u_int hr,reglist=0;
4950 for(hr=0;hr<HOST_REGS;hr++) {
4951 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
4952 }
4953 if(opcode2[i]==0x10) { // Single precision
4954 save_regs(reglist);
4955 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4956 if((source[i]&0x3f)<4) {
4957 emit_readword((int)&reg_cop1_simple[(source[i]>>16)&0x1f],ARG2_REG);
4958 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG3_REG);
4959 }else{
4960 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4961 }
4962 switch(source[i]&0x3f)
4963 {
4964 case 0x00: emit_call((int)add_s);break;
4965 case 0x01: emit_call((int)sub_s);break;
4966 case 0x02: emit_call((int)mul_s);break;
4967 case 0x03: emit_call((int)div_s);break;
4968 case 0x04: emit_call((int)sqrt_s);break;
4969 case 0x05: emit_call((int)abs_s);break;
4970 case 0x06: emit_call((int)mov_s);break;
4971 case 0x07: emit_call((int)neg_s);break;
4972 }
4973 restore_regs(reglist);
4974 }
4975 if(opcode2[i]==0x11) { // Double precision
4976 save_regs(reglist);
4977 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4978 if((source[i]&0x3f)<4) {
4979 emit_readword((int)&reg_cop1_double[(source[i]>>16)&0x1f],ARG2_REG);
4980 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG3_REG);
4981 }else{
4982 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4983 }
4984 switch(source[i]&0x3f)
4985 {
4986 case 0x00: emit_call((int)add_d);break;
4987 case 0x01: emit_call((int)sub_d);break;
4988 case 0x02: emit_call((int)mul_d);break;
4989 case 0x03: emit_call((int)div_d);break;
4990 case 0x04: emit_call((int)sqrt_d);break;
4991 case 0x05: emit_call((int)abs_d);break;
4992 case 0x06: emit_call((int)mov_d);break;
4993 case 0x07: emit_call((int)neg_d);break;
4994 }
4995 restore_regs(reglist);
4996 }
4997#else
4998 cop1_unusable(i, i_regs);
4999#endif
5000}
5001
5002void multdiv_assemble_arm(int i,struct regstat *i_regs)
5003{
5004 // case 0x18: MULT
5005 // case 0x19: MULTU
5006 // case 0x1A: DIV
5007 // case 0x1B: DIVU
5008 // case 0x1C: DMULT
5009 // case 0x1D: DMULTU
5010 // case 0x1E: DDIV
5011 // case 0x1F: DDIVU
5012 if(rs1[i]&&rs2[i])
5013 {
5014 if((opcode2[i]&4)==0) // 32-bit
5015 {
5016 if(opcode2[i]==0x18) // MULT
5017 {
5018 signed char m1=get_reg(i_regs->regmap,rs1[i]);
5019 signed char m2=get_reg(i_regs->regmap,rs2[i]);
5020 signed char hi=get_reg(i_regs->regmap,HIREG);
5021 signed char lo=get_reg(i_regs->regmap,LOREG);
5022 assert(m1>=0);
5023 assert(m2>=0);
5024 assert(hi>=0);
5025 assert(lo>=0);
5026 emit_smull(m1,m2,hi,lo);
5027 }
5028 if(opcode2[i]==0x19) // MULTU
5029 {
5030 signed char m1=get_reg(i_regs->regmap,rs1[i]);
5031 signed char m2=get_reg(i_regs->regmap,rs2[i]);
5032 signed char hi=get_reg(i_regs->regmap,HIREG);
5033 signed char lo=get_reg(i_regs->regmap,LOREG);
5034 assert(m1>=0);
5035 assert(m2>=0);
5036 assert(hi>=0);
5037 assert(lo>=0);
5038 emit_umull(m1,m2,hi,lo);
5039 }
5040 if(opcode2[i]==0x1A) // DIV
5041 {
5042 signed char d1=get_reg(i_regs->regmap,rs1[i]);
5043 signed char d2=get_reg(i_regs->regmap,rs2[i]);
5044 assert(d1>=0);
5045 assert(d2>=0);
5046 signed char quotient=get_reg(i_regs->regmap,LOREG);
5047 signed char remainder=get_reg(i_regs->regmap,HIREG);
5048 assert(quotient>=0);
5049 assert(remainder>=0);
5050 emit_movs(d1,remainder);
5051 emit_movimm(0xffffffff,quotient);
5052 emit_negmi(quotient,quotient); // .. quotient and ..
5053 emit_negmi(remainder,remainder); // .. remainder for div0 case (will be negated back after jump)
5054 emit_movs(d2,HOST_TEMPREG);
5055 emit_jeq((int)out+52); // Division by zero
5056 emit_negmi(HOST_TEMPREG,HOST_TEMPREG);
5057 emit_clz(HOST_TEMPREG,quotient);
5058 emit_shl(HOST_TEMPREG,quotient,HOST_TEMPREG);
5059 emit_orimm(quotient,1<<31,quotient);
5060 emit_shr(quotient,quotient,quotient);
5061 emit_cmp(remainder,HOST_TEMPREG);
5062 emit_subcs(remainder,HOST_TEMPREG,remainder);
5063 emit_adcs(quotient,quotient,quotient);
5064 emit_shrimm(HOST_TEMPREG,1,HOST_TEMPREG);
5065 emit_jcc((int)out-16); // -4
5066 emit_teq(d1,d2);
5067 emit_negmi(quotient,quotient);
5068 emit_test(d1,d1);
5069 emit_negmi(remainder,remainder);
5070 }
5071 if(opcode2[i]==0x1B) // DIVU
5072 {
5073 signed char d1=get_reg(i_regs->regmap,rs1[i]); // dividend
5074 signed char d2=get_reg(i_regs->regmap,rs2[i]); // divisor
5075 assert(d1>=0);
5076 assert(d2>=0);
5077 signed char quotient=get_reg(i_regs->regmap,LOREG);
5078 signed char remainder=get_reg(i_regs->regmap,HIREG);
5079 assert(quotient>=0);
5080 assert(remainder>=0);
5081 emit_mov(d1,remainder);
5082 emit_movimm(0xffffffff,quotient); // div0 case
5083 emit_test(d2,d2);
5084 emit_jeq((int)out+40); // Division by zero
5085 emit_clz(d2,HOST_TEMPREG);
5086 emit_movimm(1<<31,quotient);
5087 emit_shl(d2,HOST_TEMPREG,d2);
5088 emit_shr(quotient,HOST_TEMPREG,quotient);
5089 emit_cmp(remainder,d2);
5090 emit_subcs(remainder,d2,remainder);
5091 emit_adcs(quotient,quotient,quotient);
5092 emit_shrcc_imm(d2,1,d2);
5093 emit_jcc((int)out-16); // -4
5094 }
5095 }
5096 else // 64-bit
5097#ifndef FORCE32
5098 {
5099 if(opcode2[i]==0x1C) // DMULT
5100 {
5101 assert(opcode2[i]!=0x1C);
5102 signed char m1h=get_reg(i_regs->regmap,rs1[i]|64);
5103 signed char m1l=get_reg(i_regs->regmap,rs1[i]);
5104 signed char m2h=get_reg(i_regs->regmap,rs2[i]|64);
5105 signed char m2l=get_reg(i_regs->regmap,rs2[i]);
5106 assert(m1h>=0);
5107 assert(m2h>=0);
5108 assert(m1l>=0);
5109 assert(m2l>=0);
5110 emit_pushreg(m2h);
5111 emit_pushreg(m2l);
5112 emit_pushreg(m1h);
5113 emit_pushreg(m1l);
5114 emit_call((int)&mult64);
5115 emit_popreg(m1l);
5116 emit_popreg(m1h);
5117 emit_popreg(m2l);
5118 emit_popreg(m2h);
5119 signed char hih=get_reg(i_regs->regmap,HIREG|64);
5120 signed char hil=get_reg(i_regs->regmap,HIREG);
5121 if(hih>=0) emit_loadreg(HIREG|64,hih);
5122 if(hil>=0) emit_loadreg(HIREG,hil);
5123 signed char loh=get_reg(i_regs->regmap,LOREG|64);
5124 signed char lol=get_reg(i_regs->regmap,LOREG);
5125 if(loh>=0) emit_loadreg(LOREG|64,loh);
5126 if(lol>=0) emit_loadreg(LOREG,lol);
5127 }
5128 if(opcode2[i]==0x1D) // DMULTU
5129 {
5130 signed char m1h=get_reg(i_regs->regmap,rs1[i]|64);
5131 signed char m1l=get_reg(i_regs->regmap,rs1[i]);
5132 signed char m2h=get_reg(i_regs->regmap,rs2[i]|64);
5133 signed char m2l=get_reg(i_regs->regmap,rs2[i]);
5134 assert(m1h>=0);
5135 assert(m2h>=0);
5136 assert(m1l>=0);
5137 assert(m2l>=0);
5138 save_regs(0x100f);
5139 if(m1l!=0) emit_mov(m1l,0);
5140 if(m1h==0) emit_readword((int)&dynarec_local,1);
5141 else if(m1h>1) emit_mov(m1h,1);
5142 if(m2l<2) emit_readword((int)&dynarec_local+m2l*4,2);
5143 else if(m2l>2) emit_mov(m2l,2);
5144 if(m2h<3) emit_readword((int)&dynarec_local+m2h*4,3);
5145 else if(m2h>3) emit_mov(m2h,3);
5146 emit_call((int)&multu64);
5147 restore_regs(0x100f);
5148 signed char hih=get_reg(i_regs->regmap,HIREG|64);
5149 signed char hil=get_reg(i_regs->regmap,HIREG);
5150 signed char loh=get_reg(i_regs->regmap,LOREG|64);
5151 signed char lol=get_reg(i_regs->regmap,LOREG);
5152 /*signed char temp=get_reg(i_regs->regmap,-1);
5153 signed char rh=get_reg(i_regs->regmap,HIREG|64);
5154 signed char rl=get_reg(i_regs->regmap,HIREG);
5155 assert(m1h>=0);
5156 assert(m2h>=0);
5157 assert(m1l>=0);
5158 assert(m2l>=0);
5159 assert(temp>=0);
5160 //emit_mov(m1l,EAX);
5161 //emit_mul(m2l);
5162 emit_umull(rl,rh,m1l,m2l);
5163 emit_storereg(LOREG,rl);
5164 emit_mov(rh,temp);
5165 //emit_mov(m1h,EAX);
5166 //emit_mul(m2l);
5167 emit_umull(rl,rh,m1h,m2l);
5168 emit_adds(rl,temp,temp);
5169 emit_adcimm(rh,0,rh);
5170 emit_storereg(HIREG,rh);
5171 //emit_mov(m2h,EAX);
5172 //emit_mul(m1l);
5173 emit_umull(rl,rh,m1l,m2h);
5174 emit_adds(rl,temp,temp);
5175 emit_adcimm(rh,0,rh);
5176 emit_storereg(LOREG|64,temp);
5177 emit_mov(rh,temp);
5178 //emit_mov(m2h,EAX);
5179 //emit_mul(m1h);
5180 emit_umull(rl,rh,m1h,m2h);
5181 emit_adds(rl,temp,rl);
5182 emit_loadreg(HIREG,temp);
5183 emit_adcimm(rh,0,rh);
5184 emit_adds(rl,temp,rl);
5185 emit_adcimm(rh,0,rh);
5186 // DEBUG
5187 /*
5188 emit_pushreg(m2h);
5189 emit_pushreg(m2l);
5190 emit_pushreg(m1h);
5191 emit_pushreg(m1l);
5192 emit_call((int)&multu64);
5193 emit_popreg(m1l);
5194 emit_popreg(m1h);
5195 emit_popreg(m2l);
5196 emit_popreg(m2h);
5197 signed char hih=get_reg(i_regs->regmap,HIREG|64);
5198 signed char hil=get_reg(i_regs->regmap,HIREG);
5199 if(hih>=0) emit_loadreg(HIREG|64,hih); // DEBUG
5200 if(hil>=0) emit_loadreg(HIREG,hil); // DEBUG
5201 */
5202 // Shouldn't be necessary
5203 //char loh=get_reg(i_regs->regmap,LOREG|64);
5204 //char lol=get_reg(i_regs->regmap,LOREG);
5205 //if(loh>=0) emit_loadreg(LOREG|64,loh);
5206 //if(lol>=0) emit_loadreg(LOREG,lol);
5207 }
5208 if(opcode2[i]==0x1E) // DDIV
5209 {
5210 signed char d1h=get_reg(i_regs->regmap,rs1[i]|64);
5211 signed char d1l=get_reg(i_regs->regmap,rs1[i]);
5212 signed char d2h=get_reg(i_regs->regmap,rs2[i]|64);
5213 signed char d2l=get_reg(i_regs->regmap,rs2[i]);
5214 assert(d1h>=0);
5215 assert(d2h>=0);
5216 assert(d1l>=0);
5217 assert(d2l>=0);
5218 save_regs(0x100f);
5219 if(d1l!=0) emit_mov(d1l,0);
5220 if(d1h==0) emit_readword((int)&dynarec_local,1);
5221 else if(d1h>1) emit_mov(d1h,1);
5222 if(d2l<2) emit_readword((int)&dynarec_local+d2l*4,2);
5223 else if(d2l>2) emit_mov(d2l,2);
5224 if(d2h<3) emit_readword((int)&dynarec_local+d2h*4,3);
5225 else if(d2h>3) emit_mov(d2h,3);
5226 emit_call((int)&div64);
5227 restore_regs(0x100f);
5228 signed char hih=get_reg(i_regs->regmap,HIREG|64);
5229 signed char hil=get_reg(i_regs->regmap,HIREG);
5230 signed char loh=get_reg(i_regs->regmap,LOREG|64);
5231 signed char lol=get_reg(i_regs->regmap,LOREG);
5232 if(hih>=0) emit_loadreg(HIREG|64,hih);
5233 if(hil>=0) emit_loadreg(HIREG,hil);
5234 if(loh>=0) emit_loadreg(LOREG|64,loh);
5235 if(lol>=0) emit_loadreg(LOREG,lol);
5236 }
5237 if(opcode2[i]==0x1F) // DDIVU
5238 {
5239 //u_int hr,reglist=0;
5240 //for(hr=0;hr<HOST_REGS;hr++) {
5241 // if(i_regs->regmap[hr]>=0 && (i_regs->regmap[hr]&62)!=HIREG) reglist|=1<<hr;
5242 //}
5243 signed char d1h=get_reg(i_regs->regmap,rs1[i]|64);
5244 signed char d1l=get_reg(i_regs->regmap,rs1[i]);
5245 signed char d2h=get_reg(i_regs->regmap,rs2[i]|64);
5246 signed char d2l=get_reg(i_regs->regmap,rs2[i]);
5247 assert(d1h>=0);
5248 assert(d2h>=0);
5249 assert(d1l>=0);
5250 assert(d2l>=0);
5251 save_regs(0x100f);
5252 if(d1l!=0) emit_mov(d1l,0);
5253 if(d1h==0) emit_readword((int)&dynarec_local,1);
5254 else if(d1h>1) emit_mov(d1h,1);
5255 if(d2l<2) emit_readword((int)&dynarec_local+d2l*4,2);
5256 else if(d2l>2) emit_mov(d2l,2);
5257 if(d2h<3) emit_readword((int)&dynarec_local+d2h*4,3);
5258 else if(d2h>3) emit_mov(d2h,3);
5259 emit_call((int)&divu64);
5260 restore_regs(0x100f);
5261 signed char hih=get_reg(i_regs->regmap,HIREG|64);
5262 signed char hil=get_reg(i_regs->regmap,HIREG);
5263 signed char loh=get_reg(i_regs->regmap,LOREG|64);
5264 signed char lol=get_reg(i_regs->regmap,LOREG);
5265 if(hih>=0) emit_loadreg(HIREG|64,hih);
5266 if(hil>=0) emit_loadreg(HIREG,hil);
5267 if(loh>=0) emit_loadreg(LOREG|64,loh);
5268 if(lol>=0) emit_loadreg(LOREG,lol);
5269 }
5270 }
5271#else
5272 assert(0);
5273#endif
5274 }
5275 else
5276 {
5277 // Multiply by zero is zero.
5278 // MIPS does not have a divide by zero exception.
5279 // The result is undefined, we return zero.
5280 signed char hr=get_reg(i_regs->regmap,HIREG);
5281 signed char lr=get_reg(i_regs->regmap,LOREG);
5282 if(hr>=0) emit_zeroreg(hr);
5283 if(lr>=0) emit_zeroreg(lr);
5284 }
5285}
5286#define multdiv_assemble multdiv_assemble_arm
5287
5288void do_preload_rhash(int r) {
5289 // Don't need this for ARM. On x86, this puts the value 0xf8 into the
5290 // register. On ARM the hash can be done with a single instruction (below)
5291}
5292
5293void do_preload_rhtbl(int ht) {
5294 emit_addimm(FP,(int)&mini_ht-(int)&dynarec_local,ht);
5295}
5296
5297void do_rhash(int rs,int rh) {
5298 emit_andimm(rs,0xf8,rh);
5299}
5300
5301void do_miniht_load(int ht,int rh) {
5302 assem_debug("ldr %s,[%s,%s]!\n",regname[rh],regname[ht],regname[rh]);
5303 output_w32(0xe7b00000|rd_rn_rm(rh,ht,rh));
5304}
5305
5306void do_miniht_jump(int rs,int rh,int ht) {
5307 emit_cmp(rh,rs);
5308 emit_ldreq_indexed(ht,4,15);
5309 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
5310 emit_mov(rs,7);
5311 emit_jmp(jump_vaddr_reg[7]);
5312 #else
5313 emit_jmp(jump_vaddr_reg[rs]);
5314 #endif
5315}
5316
5317void do_miniht_insert(u_int return_address,int rt,int temp) {
5318 #ifdef ARMv5_ONLY
5319 emit_movimm(return_address,rt); // PC into link register
5320 add_to_linker((int)out,return_address,1);
5321 emit_pcreladdr(temp);
5322 emit_writeword(rt,(int)&mini_ht[(return_address&0xFF)>>3][0]);
5323 emit_writeword(temp,(int)&mini_ht[(return_address&0xFF)>>3][1]);
5324 #else
5325 emit_movw(return_address&0x0000FFFF,rt);
5326 add_to_linker((int)out,return_address,1);
5327 emit_pcreladdr(temp);
5328 emit_writeword(temp,(int)&mini_ht[(return_address&0xFF)>>3][1]);
5329 emit_movt(return_address&0xFFFF0000,rt);
5330 emit_writeword(rt,(int)&mini_ht[(return_address&0xFF)>>3][0]);
5331 #endif
5332}
5333
5334// Sign-extend to 64 bits and write out upper half of a register
5335// This is useful where we have a 32-bit value in a register, and want to
5336// keep it in a 32-bit register, but can't guarantee that it won't be read
5337// as a 64-bit value later.
5338void wb_sx(signed char pre[],signed char entry[],uint64_t dirty,uint64_t is32_pre,uint64_t is32,uint64_t u,uint64_t uu)
5339{
5340#ifndef FORCE32
5341 if(is32_pre==is32) return;
5342 int hr,reg;
5343 for(hr=0;hr<HOST_REGS;hr++) {
5344 if(hr!=EXCLUDE_REG) {
5345 //if(pre[hr]==entry[hr]) {
5346 if((reg=pre[hr])>=0) {
5347 if((dirty>>hr)&1) {
5348 if( ((is32_pre&~is32&~uu)>>reg)&1 ) {
5349 emit_sarimm(hr,31,HOST_TEMPREG);
5350 emit_storereg(reg|64,HOST_TEMPREG);
5351 }
5352 }
5353 }
5354 //}
5355 }
5356 }
5357#endif
5358}
5359
5360void wb_valid(signed char pre[],signed char entry[],u_int dirty_pre,u_int dirty,uint64_t is32_pre,uint64_t u,uint64_t uu)
5361{
5362 //if(dirty_pre==dirty) return;
5363 int hr,reg,new_hr;
5364 for(hr=0;hr<HOST_REGS;hr++) {
5365 if(hr!=EXCLUDE_REG) {
5366 reg=pre[hr];
5367 if(((~u)>>(reg&63))&1) {
5368 if(reg>0) {
5369 if(((dirty_pre&~dirty)>>hr)&1) {
5370 if(reg>0&&reg<34) {
5371 emit_storereg(reg,hr);
5372 if( ((is32_pre&~uu)>>reg)&1 ) {
5373 emit_sarimm(hr,31,HOST_TEMPREG);
5374 emit_storereg(reg|64,HOST_TEMPREG);
5375 }
5376 }
5377 else if(reg>=64) {
5378 emit_storereg(reg,hr);
5379 }
5380 }
5381 }
5382 }
5383 }
5384 }
5385}
5386
5387
5388/* using strd could possibly help but you'd have to allocate registers in pairs
5389void wb_invalidate_arm(signed char pre[],signed char entry[],uint64_t dirty,uint64_t is32,uint64_t u,uint64_t uu)
5390{
5391 int hr;
5392 int wrote=-1;
5393 for(hr=HOST_REGS-1;hr>=0;hr--) {
5394 if(hr!=EXCLUDE_REG) {
5395 if(pre[hr]!=entry[hr]) {
5396 if(pre[hr]>=0) {
5397 if((dirty>>hr)&1) {
5398 if(get_reg(entry,pre[hr])<0) {
5399 if(pre[hr]<64) {
5400 if(!((u>>pre[hr])&1)) {
5401 if(hr<10&&(~hr&1)&&(pre[hr+1]<0||wrote==hr+1)) {
5402 if( ((is32>>pre[hr])&1) && !((uu>>pre[hr])&1) ) {
5403 emit_sarimm(hr,31,hr+1);
5404 emit_strdreg(pre[hr],hr);
5405 }
5406 else
5407 emit_storereg(pre[hr],hr);
5408 }else{
5409 emit_storereg(pre[hr],hr);
5410 if( ((is32>>pre[hr])&1) && !((uu>>pre[hr])&1) ) {
5411 emit_sarimm(hr,31,hr);
5412 emit_storereg(pre[hr]|64,hr);
5413 }
5414 }
5415 }
5416 }else{
5417 if(!((uu>>(pre[hr]&63))&1) && !((is32>>(pre[hr]&63))&1)) {
5418 emit_storereg(pre[hr],hr);
5419 }
5420 }
5421 wrote=hr;
5422 }
5423 }
5424 }
5425 }
5426 }
5427 }
5428 for(hr=0;hr<HOST_REGS;hr++) {
5429 if(hr!=EXCLUDE_REG) {
5430 if(pre[hr]!=entry[hr]) {
5431 if(pre[hr]>=0) {
5432 int nr;
5433 if((nr=get_reg(entry,pre[hr]))>=0) {
5434 emit_mov(hr,nr);
5435 }
5436 }
5437 }
5438 }
5439 }
5440}
5441#define wb_invalidate wb_invalidate_arm
5442*/
5443
5444// Clearing the cache is rather slow on ARM Linux, so mark the areas
5445// that need to be cleared, and then only clear these areas once.
5446void do_clear_cache()
5447{
5448 int i,j;
5449 for (i=0;i<(1<<(TARGET_SIZE_2-17));i++)
5450 {
5451 u_int bitmap=needs_clear_cache[i];
5452 if(bitmap) {
5453 u_int start,end;
5454 for(j=0;j<32;j++)
5455 {
5456 if(bitmap&(1<<j)) {
5457 start=BASE_ADDR+i*131072+j*4096;
5458 end=start+4095;
5459 j++;
5460 while(j<32) {
5461 if(bitmap&(1<<j)) {
5462 end+=4096;
5463 j++;
5464 }else{
5465 __clear_cache((void *)start,(void *)end);
5466 break;
5467 }
5468 }
5469 }
5470 }
5471 needs_clear_cache[i]=0;
5472 }
5473 }
5474}
5475
5476// CPU-architecture-specific initialization
5477void arch_init() {
5478#ifndef DISABLE_COP1
5479 rounding_modes[0]=0x0<<22; // round
5480 rounding_modes[1]=0x3<<22; // trunc
5481 rounding_modes[2]=0x1<<22; // ceil
5482 rounding_modes[3]=0x2<<22; // floor
5483#endif
5484}
5485
5486// vim:shiftwidth=2:expandtab