drc: initial cop2/gte implementation (works, mostly)
[pcsx_rearmed.git] / libpcsxcore / new_dynarec / assem_arm.c
CommitLineData
57871462 1/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2 * Mupen64plus - assem_arm.c *
3 * Copyright (C) 2009-2010 Ari64 *
4 * *
5 * This program is free software; you can redistribute it and/or modify *
6 * it under the terms of the GNU General Public License as published by *
7 * the Free Software Foundation; either version 2 of the License, or *
8 * (at your option) any later version. *
9 * *
10 * This program is distributed in the hope that it will be useful, *
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
13 * GNU General Public License for more details. *
14 * *
15 * You should have received a copy of the GNU General Public License *
16 * along with this program; if not, write to the *
17 * Free Software Foundation, Inc., *
18 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
19 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
20
21extern int cycle_count;
22extern int last_count;
23extern int pcaddr;
24extern int pending_exception;
25extern int branch_target;
26extern uint64_t readmem_dword;
3d624f89 27#ifdef MUPEN64
57871462 28extern precomp_instr fake_pc;
3d624f89 29#endif
57871462 30extern void *dynarec_local;
31extern u_int memory_map[1048576];
32extern u_int mini_ht[32][2];
33extern u_int rounding_modes[4];
34
35void indirect_jump_indexed();
36void indirect_jump();
37void do_interrupt();
38void jump_vaddr_r0();
39void jump_vaddr_r1();
40void jump_vaddr_r2();
41void jump_vaddr_r3();
42void jump_vaddr_r4();
43void jump_vaddr_r5();
44void jump_vaddr_r6();
45void jump_vaddr_r7();
46void jump_vaddr_r8();
47void jump_vaddr_r9();
48void jump_vaddr_r10();
49void jump_vaddr_r12();
50
51const u_int jump_vaddr_reg[16] = {
52 (int)jump_vaddr_r0,
53 (int)jump_vaddr_r1,
54 (int)jump_vaddr_r2,
55 (int)jump_vaddr_r3,
56 (int)jump_vaddr_r4,
57 (int)jump_vaddr_r5,
58 (int)jump_vaddr_r6,
59 (int)jump_vaddr_r7,
60 (int)jump_vaddr_r8,
61 (int)jump_vaddr_r9,
62 (int)jump_vaddr_r10,
63 0,
64 (int)jump_vaddr_r12,
65 0,
66 0,
67 0};
68
69#include "fpu.h"
70
71/* Linker */
72
73void set_jump_target(int addr,u_int target)
74{
75 u_char *ptr=(u_char *)addr;
76 u_int *ptr2=(u_int *)ptr;
77 if(ptr[3]==0xe2) {
78 assert((target-(u_int)ptr2-8)<1024);
79 assert((addr&3)==0);
80 assert((target&3)==0);
81 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
82 //printf("target=%x addr=%x insn=%x\n",target,addr,*ptr2);
83 }
84 else if(ptr[3]==0x72) {
85 // generated by emit_jno_unlikely
86 if((target-(u_int)ptr2-8)<1024) {
87 assert((addr&3)==0);
88 assert((target&3)==0);
89 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
90 }
91 else if((target-(u_int)ptr2-8)<4096&&!((target-(u_int)ptr2-8)&15)) {
92 assert((addr&3)==0);
93 assert((target&3)==0);
94 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>4)|0xE00;
95 }
96 else *ptr2=(0x7A000000)|(((target-(u_int)ptr2-8)<<6)>>8);
97 }
98 else {
99 assert((ptr[3]&0x0e)==0xa);
100 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
101 }
102}
103
104// This optionally copies the instruction from the target of the branch into
105// the space before the branch. Works, but the difference in speed is
106// usually insignificant.
107void set_jump_target_fillslot(int addr,u_int target,int copy)
108{
109 u_char *ptr=(u_char *)addr;
110 u_int *ptr2=(u_int *)ptr;
111 assert(!copy||ptr2[-1]==0xe28dd000);
112 if(ptr[3]==0xe2) {
113 assert(!copy);
114 assert((target-(u_int)ptr2-8)<4096);
115 *ptr2=(*ptr2&0xFFFFF000)|(target-(u_int)ptr2-8);
116 }
117 else {
118 assert((ptr[3]&0x0e)==0xa);
119 u_int target_insn=*(u_int *)target;
120 if((target_insn&0x0e100000)==0) { // ALU, no immediate, no flags
121 copy=0;
122 }
123 if((target_insn&0x0c100000)==0x04100000) { // Load
124 copy=0;
125 }
126 if(target_insn&0x08000000) {
127 copy=0;
128 }
129 if(copy) {
130 ptr2[-1]=target_insn;
131 target+=4;
132 }
133 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
134 }
135}
136
137/* Literal pool */
138add_literal(int addr,int val)
139{
140 literals[literalcount][0]=addr;
141 literals[literalcount][1]=val;
142 literalcount++;
143}
144
145void kill_pointer(void *stub)
146{
147 int *ptr=(int *)(stub+4);
148 assert((*ptr&0x0ff00000)==0x05900000);
149 u_int offset=*ptr&0xfff;
150 int **l_ptr=(void *)ptr+offset+8;
151 int *i_ptr=*l_ptr;
152 set_jump_target((int)i_ptr,(int)stub);
153}
154
155int get_pointer(void *stub)
156{
157 //printf("get_pointer(%x)\n",(int)stub);
158 int *ptr=(int *)(stub+4);
159 assert((*ptr&0x0ff00000)==0x05900000);
160 u_int offset=*ptr&0xfff;
161 int **l_ptr=(void *)ptr+offset+8;
162 int *i_ptr=*l_ptr;
163 assert((*i_ptr&0x0f000000)==0x0a000000);
164 return (int)i_ptr+((*i_ptr<<8)>>6)+8;
165}
166
167// Find the "clean" entry point from a "dirty" entry point
168// by skipping past the call to verify_code
169u_int get_clean_addr(int addr)
170{
171 int *ptr=(int *)addr;
172 #ifdef ARMv5_ONLY
173 ptr+=4;
174 #else
175 ptr+=6;
176 #endif
177 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
178 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
179 ptr++;
180 if((*ptr&0xFF000000)==0xea000000) {
181 return (int)ptr+((*ptr<<8)>>6)+8; // follow jump
182 }
183 return (u_int)ptr;
184}
185
186int verify_dirty(int addr)
187{
188 u_int *ptr=(u_int *)addr;
189 #ifdef ARMv5_ONLY
190 // get from literal pool
191 assert((*ptr&0xFFF00000)==0xe5900000);
192 u_int offset=*ptr&0xfff;
193 u_int *l_ptr=(void *)ptr+offset+8;
194 u_int source=l_ptr[0];
195 u_int copy=l_ptr[1];
196 u_int len=l_ptr[2];
197 ptr+=4;
198 #else
199 // ARMv7 movw/movt
200 assert((*ptr&0xFFF00000)==0xe3000000);
201 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
202 u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
203 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
204 ptr+=6;
205 #endif
206 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
207 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
cfcba99a 208 u_int verifier=(int)ptr+((signed int)(*ptr<<8)>>6)+8; // get target of bl
57871462 209 if(verifier==(u_int)verify_code_vm||verifier==(u_int)verify_code_ds) {
210 unsigned int page=source>>12;
211 unsigned int map_value=memory_map[page];
212 if(map_value>=0x80000000) return 0;
213 while(page<((source+len-1)>>12)) {
214 if((memory_map[++page]<<2)!=(map_value<<2)) return 0;
215 }
216 source = source+(map_value<<2);
217 }
218 //printf("verify_dirty: %x %x %x\n",source,copy,len);
219 return !memcmp((void *)source,(void *)copy,len);
220}
221
222// This doesn't necessarily find all clean entry points, just
223// guarantees that it's not dirty
224int isclean(int addr)
225{
226 #ifdef ARMv5_ONLY
227 int *ptr=((u_int *)addr)+4;
228 #else
229 int *ptr=((u_int *)addr)+6;
230 #endif
231 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
232 if((*ptr&0xFF000000)!=0xeb000000) return 1; // bl instruction
233 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code) return 0;
234 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_vm) return 0;
235 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_ds) return 0;
236 return 1;
237}
238
239void get_bounds(int addr,u_int *start,u_int *end)
240{
241 u_int *ptr=(u_int *)addr;
242 #ifdef ARMv5_ONLY
243 // get from literal pool
244 assert((*ptr&0xFFF00000)==0xe5900000);
245 u_int offset=*ptr&0xfff;
246 u_int *l_ptr=(void *)ptr+offset+8;
247 u_int source=l_ptr[0];
248 //u_int copy=l_ptr[1];
249 u_int len=l_ptr[2];
250 ptr+=4;
251 #else
252 // ARMv7 movw/movt
253 assert((*ptr&0xFFF00000)==0xe3000000);
254 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
255 //u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
256 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
257 ptr+=6;
258 #endif
259 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
260 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
cfcba99a 261 u_int verifier=(int)ptr+((signed int)(*ptr<<8)>>6)+8; // get target of bl
57871462 262 if(verifier==(u_int)verify_code_vm||verifier==(u_int)verify_code_ds) {
263 if(memory_map[source>>12]>=0x80000000) source = 0;
264 else source = source+(memory_map[source>>12]<<2);
265 }
266 *start=source;
267 *end=source+len;
268}
269
270/* Register allocation */
271
272// Note: registers are allocated clean (unmodified state)
273// if you intend to modify the register, you must call dirty_reg().
274void alloc_reg(struct regstat *cur,int i,signed char reg)
275{
276 int r,hr;
277 int preferred_reg = (reg&7);
278 if(reg==CCREG) preferred_reg=HOST_CCREG;
279 if(reg==PTEMP||reg==FTEMP) preferred_reg=12;
280
281 // Don't allocate unused registers
282 if((cur->u>>reg)&1) return;
283
284 // see if it's already allocated
285 for(hr=0;hr<HOST_REGS;hr++)
286 {
287 if(cur->regmap[hr]==reg) return;
288 }
289
290 // Keep the same mapping if the register was already allocated in a loop
291 preferred_reg = loop_reg(i,reg,preferred_reg);
292
293 // Try to allocate the preferred register
294 if(cur->regmap[preferred_reg]==-1) {
295 cur->regmap[preferred_reg]=reg;
296 cur->dirty&=~(1<<preferred_reg);
297 cur->isconst&=~(1<<preferred_reg);
298 return;
299 }
300 r=cur->regmap[preferred_reg];
301 if(r<64&&((cur->u>>r)&1)) {
302 cur->regmap[preferred_reg]=reg;
303 cur->dirty&=~(1<<preferred_reg);
304 cur->isconst&=~(1<<preferred_reg);
305 return;
306 }
307 if(r>=64&&((cur->uu>>(r&63))&1)) {
308 cur->regmap[preferred_reg]=reg;
309 cur->dirty&=~(1<<preferred_reg);
310 cur->isconst&=~(1<<preferred_reg);
311 return;
312 }
313
314 // Clear any unneeded registers
315 // We try to keep the mapping consistent, if possible, because it
316 // makes branches easier (especially loops). So we try to allocate
317 // first (see above) before removing old mappings. If this is not
318 // possible then go ahead and clear out the registers that are no
319 // longer needed.
320 for(hr=0;hr<HOST_REGS;hr++)
321 {
322 r=cur->regmap[hr];
323 if(r>=0) {
324 if(r<64) {
325 if((cur->u>>r)&1) {cur->regmap[hr]=-1;break;}
326 }
327 else
328 {
329 if((cur->uu>>(r&63))&1) {cur->regmap[hr]=-1;break;}
330 }
331 }
332 }
333 // Try to allocate any available register, but prefer
334 // registers that have not been used recently.
335 if(i>0) {
336 for(hr=0;hr<HOST_REGS;hr++) {
337 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
338 if(regs[i-1].regmap[hr]!=rs1[i-1]&&regs[i-1].regmap[hr]!=rs2[i-1]&&regs[i-1].regmap[hr]!=rt1[i-1]&&regs[i-1].regmap[hr]!=rt2[i-1]) {
339 cur->regmap[hr]=reg;
340 cur->dirty&=~(1<<hr);
341 cur->isconst&=~(1<<hr);
342 return;
343 }
344 }
345 }
346 }
347 // Try to allocate any available register
348 for(hr=0;hr<HOST_REGS;hr++) {
349 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
350 cur->regmap[hr]=reg;
351 cur->dirty&=~(1<<hr);
352 cur->isconst&=~(1<<hr);
353 return;
354 }
355 }
356
357 // Ok, now we have to evict someone
358 // Pick a register we hopefully won't need soon
359 u_char hsn[MAXREG+1];
360 memset(hsn,10,sizeof(hsn));
361 int j;
362 lsn(hsn,i,&preferred_reg);
363 //printf("eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",cur->regmap[0],cur->regmap[1],cur->regmap[2],cur->regmap[3],cur->regmap[5],cur->regmap[6],cur->regmap[7]);
364 //printf("hsn(%x): %d %d %d %d %d %d %d\n",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
365 if(i>0) {
366 // Don't evict the cycle count at entry points, otherwise the entry
367 // stub will have to write it.
368 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
369 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
370 for(j=10;j>=3;j--)
371 {
372 // Alloc preferred register if available
373 if(hsn[r=cur->regmap[preferred_reg]&63]==j) {
374 for(hr=0;hr<HOST_REGS;hr++) {
375 // Evict both parts of a 64-bit register
376 if((cur->regmap[hr]&63)==r) {
377 cur->regmap[hr]=-1;
378 cur->dirty&=~(1<<hr);
379 cur->isconst&=~(1<<hr);
380 }
381 }
382 cur->regmap[preferred_reg]=reg;
383 return;
384 }
385 for(r=1;r<=MAXREG;r++)
386 {
387 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
388 for(hr=0;hr<HOST_REGS;hr++) {
389 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
390 if(cur->regmap[hr]==r+64) {
391 cur->regmap[hr]=reg;
392 cur->dirty&=~(1<<hr);
393 cur->isconst&=~(1<<hr);
394 return;
395 }
396 }
397 }
398 for(hr=0;hr<HOST_REGS;hr++) {
399 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
400 if(cur->regmap[hr]==r) {
401 cur->regmap[hr]=reg;
402 cur->dirty&=~(1<<hr);
403 cur->isconst&=~(1<<hr);
404 return;
405 }
406 }
407 }
408 }
409 }
410 }
411 }
412 for(j=10;j>=0;j--)
413 {
414 for(r=1;r<=MAXREG;r++)
415 {
416 if(hsn[r]==j) {
417 for(hr=0;hr<HOST_REGS;hr++) {
418 if(cur->regmap[hr]==r+64) {
419 cur->regmap[hr]=reg;
420 cur->dirty&=~(1<<hr);
421 cur->isconst&=~(1<<hr);
422 return;
423 }
424 }
425 for(hr=0;hr<HOST_REGS;hr++) {
426 if(cur->regmap[hr]==r) {
427 cur->regmap[hr]=reg;
428 cur->dirty&=~(1<<hr);
429 cur->isconst&=~(1<<hr);
430 return;
431 }
432 }
433 }
434 }
435 }
436 printf("This shouldn't happen (alloc_reg)");exit(1);
437}
438
439void alloc_reg64(struct regstat *cur,int i,signed char reg)
440{
441 int preferred_reg = 8+(reg&1);
442 int r,hr;
443
444 // allocate the lower 32 bits
445 alloc_reg(cur,i,reg);
446
447 // Don't allocate unused registers
448 if((cur->uu>>reg)&1) return;
449
450 // see if the upper half is already allocated
451 for(hr=0;hr<HOST_REGS;hr++)
452 {
453 if(cur->regmap[hr]==reg+64) return;
454 }
455
456 // Keep the same mapping if the register was already allocated in a loop
457 preferred_reg = loop_reg(i,reg,preferred_reg);
458
459 // Try to allocate the preferred register
460 if(cur->regmap[preferred_reg]==-1) {
461 cur->regmap[preferred_reg]=reg|64;
462 cur->dirty&=~(1<<preferred_reg);
463 cur->isconst&=~(1<<preferred_reg);
464 return;
465 }
466 r=cur->regmap[preferred_reg];
467 if(r<64&&((cur->u>>r)&1)) {
468 cur->regmap[preferred_reg]=reg|64;
469 cur->dirty&=~(1<<preferred_reg);
470 cur->isconst&=~(1<<preferred_reg);
471 return;
472 }
473 if(r>=64&&((cur->uu>>(r&63))&1)) {
474 cur->regmap[preferred_reg]=reg|64;
475 cur->dirty&=~(1<<preferred_reg);
476 cur->isconst&=~(1<<preferred_reg);
477 return;
478 }
479
480 // Clear any unneeded registers
481 // We try to keep the mapping consistent, if possible, because it
482 // makes branches easier (especially loops). So we try to allocate
483 // first (see above) before removing old mappings. If this is not
484 // possible then go ahead and clear out the registers that are no
485 // longer needed.
486 for(hr=HOST_REGS-1;hr>=0;hr--)
487 {
488 r=cur->regmap[hr];
489 if(r>=0) {
490 if(r<64) {
491 if((cur->u>>r)&1) {cur->regmap[hr]=-1;break;}
492 }
493 else
494 {
495 if((cur->uu>>(r&63))&1) {cur->regmap[hr]=-1;break;}
496 }
497 }
498 }
499 // Try to allocate any available register, but prefer
500 // registers that have not been used recently.
501 if(i>0) {
502 for(hr=0;hr<HOST_REGS;hr++) {
503 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
504 if(regs[i-1].regmap[hr]!=rs1[i-1]&&regs[i-1].regmap[hr]!=rs2[i-1]&&regs[i-1].regmap[hr]!=rt1[i-1]&&regs[i-1].regmap[hr]!=rt2[i-1]) {
505 cur->regmap[hr]=reg|64;
506 cur->dirty&=~(1<<hr);
507 cur->isconst&=~(1<<hr);
508 return;
509 }
510 }
511 }
512 }
513 // Try to allocate any available register
514 for(hr=0;hr<HOST_REGS;hr++) {
515 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
516 cur->regmap[hr]=reg|64;
517 cur->dirty&=~(1<<hr);
518 cur->isconst&=~(1<<hr);
519 return;
520 }
521 }
522
523 // Ok, now we have to evict someone
524 // Pick a register we hopefully won't need soon
525 u_char hsn[MAXREG+1];
526 memset(hsn,10,sizeof(hsn));
527 int j;
528 lsn(hsn,i,&preferred_reg);
529 //printf("eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",cur->regmap[0],cur->regmap[1],cur->regmap[2],cur->regmap[3],cur->regmap[5],cur->regmap[6],cur->regmap[7]);
530 //printf("hsn(%x): %d %d %d %d %d %d %d\n",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
531 if(i>0) {
532 // Don't evict the cycle count at entry points, otherwise the entry
533 // stub will have to write it.
534 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
535 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
536 for(j=10;j>=3;j--)
537 {
538 // Alloc preferred register if available
539 if(hsn[r=cur->regmap[preferred_reg]&63]==j) {
540 for(hr=0;hr<HOST_REGS;hr++) {
541 // Evict both parts of a 64-bit register
542 if((cur->regmap[hr]&63)==r) {
543 cur->regmap[hr]=-1;
544 cur->dirty&=~(1<<hr);
545 cur->isconst&=~(1<<hr);
546 }
547 }
548 cur->regmap[preferred_reg]=reg|64;
549 return;
550 }
551 for(r=1;r<=MAXREG;r++)
552 {
553 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
554 for(hr=0;hr<HOST_REGS;hr++) {
555 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
556 if(cur->regmap[hr]==r+64) {
557 cur->regmap[hr]=reg|64;
558 cur->dirty&=~(1<<hr);
559 cur->isconst&=~(1<<hr);
560 return;
561 }
562 }
563 }
564 for(hr=0;hr<HOST_REGS;hr++) {
565 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
566 if(cur->regmap[hr]==r) {
567 cur->regmap[hr]=reg|64;
568 cur->dirty&=~(1<<hr);
569 cur->isconst&=~(1<<hr);
570 return;
571 }
572 }
573 }
574 }
575 }
576 }
577 }
578 for(j=10;j>=0;j--)
579 {
580 for(r=1;r<=MAXREG;r++)
581 {
582 if(hsn[r]==j) {
583 for(hr=0;hr<HOST_REGS;hr++) {
584 if(cur->regmap[hr]==r+64) {
585 cur->regmap[hr]=reg|64;
586 cur->dirty&=~(1<<hr);
587 cur->isconst&=~(1<<hr);
588 return;
589 }
590 }
591 for(hr=0;hr<HOST_REGS;hr++) {
592 if(cur->regmap[hr]==r) {
593 cur->regmap[hr]=reg|64;
594 cur->dirty&=~(1<<hr);
595 cur->isconst&=~(1<<hr);
596 return;
597 }
598 }
599 }
600 }
601 }
602 printf("This shouldn't happen");exit(1);
603}
604
605// Allocate a temporary register. This is done without regard to
606// dirty status or whether the register we request is on the unneeded list
607// Note: This will only allocate one register, even if called multiple times
608void alloc_reg_temp(struct regstat *cur,int i,signed char reg)
609{
610 int r,hr;
611 int preferred_reg = -1;
612
613 // see if it's already allocated
614 for(hr=0;hr<HOST_REGS;hr++)
615 {
616 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==reg) return;
617 }
618
619 // Try to allocate any available register
620 for(hr=HOST_REGS-1;hr>=0;hr--) {
621 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
622 cur->regmap[hr]=reg;
623 cur->dirty&=~(1<<hr);
624 cur->isconst&=~(1<<hr);
625 return;
626 }
627 }
628
629 // Find an unneeded register
630 for(hr=HOST_REGS-1;hr>=0;hr--)
631 {
632 r=cur->regmap[hr];
633 if(r>=0) {
634 if(r<64) {
635 if((cur->u>>r)&1) {
636 if(i==0||((unneeded_reg[i-1]>>r)&1)) {
637 cur->regmap[hr]=reg;
638 cur->dirty&=~(1<<hr);
639 cur->isconst&=~(1<<hr);
640 return;
641 }
642 }
643 }
644 else
645 {
646 if((cur->uu>>(r&63))&1) {
647 if(i==0||((unneeded_reg_upper[i-1]>>(r&63))&1)) {
648 cur->regmap[hr]=reg;
649 cur->dirty&=~(1<<hr);
650 cur->isconst&=~(1<<hr);
651 return;
652 }
653 }
654 }
655 }
656 }
657
658 // Ok, now we have to evict someone
659 // Pick a register we hopefully won't need soon
660 // TODO: we might want to follow unconditional jumps here
661 // TODO: get rid of dupe code and make this into a function
662 u_char hsn[MAXREG+1];
663 memset(hsn,10,sizeof(hsn));
664 int j;
665 lsn(hsn,i,&preferred_reg);
666 //printf("hsn: %d %d %d %d %d %d %d\n",hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
667 if(i>0) {
668 // Don't evict the cycle count at entry points, otherwise the entry
669 // stub will have to write it.
670 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
671 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
672 for(j=10;j>=3;j--)
673 {
674 for(r=1;r<=MAXREG;r++)
675 {
676 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
677 for(hr=0;hr<HOST_REGS;hr++) {
678 if(hr!=HOST_CCREG||hsn[CCREG]>2) {
679 if(cur->regmap[hr]==r+64) {
680 cur->regmap[hr]=reg;
681 cur->dirty&=~(1<<hr);
682 cur->isconst&=~(1<<hr);
683 return;
684 }
685 }
686 }
687 for(hr=0;hr<HOST_REGS;hr++) {
688 if(hr!=HOST_CCREG||hsn[CCREG]>2) {
689 if(cur->regmap[hr]==r) {
690 cur->regmap[hr]=reg;
691 cur->dirty&=~(1<<hr);
692 cur->isconst&=~(1<<hr);
693 return;
694 }
695 }
696 }
697 }
698 }
699 }
700 }
701 for(j=10;j>=0;j--)
702 {
703 for(r=1;r<=MAXREG;r++)
704 {
705 if(hsn[r]==j) {
706 for(hr=0;hr<HOST_REGS;hr++) {
707 if(cur->regmap[hr]==r+64) {
708 cur->regmap[hr]=reg;
709 cur->dirty&=~(1<<hr);
710 cur->isconst&=~(1<<hr);
711 return;
712 }
713 }
714 for(hr=0;hr<HOST_REGS;hr++) {
715 if(cur->regmap[hr]==r) {
716 cur->regmap[hr]=reg;
717 cur->dirty&=~(1<<hr);
718 cur->isconst&=~(1<<hr);
719 return;
720 }
721 }
722 }
723 }
724 }
725 printf("This shouldn't happen");exit(1);
726}
727// Allocate a specific ARM register.
728void alloc_arm_reg(struct regstat *cur,int i,signed char reg,char hr)
729{
730 int n;
731
732 // see if it's already allocated (and dealloc it)
733 for(n=0;n<HOST_REGS;n++)
734 {
735 if(n!=EXCLUDE_REG&&cur->regmap[n]==reg) {cur->regmap[n]=-1;}
736 }
737
738 cur->regmap[hr]=reg;
739 cur->dirty&=~(1<<hr);
740 cur->isconst&=~(1<<hr);
741}
742
743// Alloc cycle count into dedicated register
744alloc_cc(struct regstat *cur,int i)
745{
746 alloc_arm_reg(cur,i,CCREG,HOST_CCREG);
747}
748
749/* Special alloc */
750
751
752/* Assembler */
753
754char regname[16][4] = {
755 "r0",
756 "r1",
757 "r2",
758 "r3",
759 "r4",
760 "r5",
761 "r6",
762 "r7",
763 "r8",
764 "r9",
765 "r10",
766 "fp",
767 "r12",
768 "sp",
769 "lr",
770 "pc"};
771
772void output_byte(u_char byte)
773{
774 *(out++)=byte;
775}
776void output_modrm(u_char mod,u_char rm,u_char ext)
777{
778 assert(mod<4);
779 assert(rm<8);
780 assert(ext<8);
781 u_char byte=(mod<<6)|(ext<<3)|rm;
782 *(out++)=byte;
783}
784void output_sib(u_char scale,u_char index,u_char base)
785{
786 assert(scale<4);
787 assert(index<8);
788 assert(base<8);
789 u_char byte=(scale<<6)|(index<<3)|base;
790 *(out++)=byte;
791}
792void output_w32(u_int word)
793{
794 *((u_int *)out)=word;
795 out+=4;
796}
797u_int rd_rn_rm(u_int rd, u_int rn, u_int rm)
798{
799 assert(rd<16);
800 assert(rn<16);
801 assert(rm<16);
802 return((rn<<16)|(rd<<12)|rm);
803}
804u_int rd_rn_imm_shift(u_int rd, u_int rn, u_int imm, u_int shift)
805{
806 assert(rd<16);
807 assert(rn<16);
808 assert(imm<256);
809 assert((shift&1)==0);
810 return((rn<<16)|(rd<<12)|(((32-shift)&30)<<7)|imm);
811}
812u_int genimm(u_int imm,u_int *encoded)
813{
814 if(imm==0) {*encoded=0;return 1;}
815 int i=32;
816 while(i>0)
817 {
818 if(imm<256) {
819 *encoded=((i&30)<<7)|imm;
820 return 1;
821 }
822 imm=(imm>>2)|(imm<<30);i-=2;
823 }
824 return 0;
825}
826u_int genjmp(u_int addr)
827{
828 int offset=addr-(int)out-8;
e80343e2 829 if(offset<-33554432||offset>=33554432) {
830 if (addr>2) {
831 printf("genjmp: out of range: %08x\n", offset);
832 exit(1);
833 }
834 return 0;
835 }
57871462 836 return ((u_int)offset>>2)&0xffffff;
837}
838
839void emit_mov(int rs,int rt)
840{
841 assem_debug("mov %s,%s\n",regname[rt],regname[rs]);
842 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs));
843}
844
845void emit_movs(int rs,int rt)
846{
847 assem_debug("movs %s,%s\n",regname[rt],regname[rs]);
848 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs));
849}
850
851void emit_add(int rs1,int rs2,int rt)
852{
853 assem_debug("add %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
854 output_w32(0xe0800000|rd_rn_rm(rt,rs1,rs2));
855}
856
857void emit_adds(int rs1,int rs2,int rt)
858{
859 assem_debug("adds %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
860 output_w32(0xe0900000|rd_rn_rm(rt,rs1,rs2));
861}
862
863void emit_adcs(int rs1,int rs2,int rt)
864{
865 assem_debug("adcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
866 output_w32(0xe0b00000|rd_rn_rm(rt,rs1,rs2));
867}
868
869void emit_sbc(int rs1,int rs2,int rt)
870{
871 assem_debug("sbc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
872 output_w32(0xe0c00000|rd_rn_rm(rt,rs1,rs2));
873}
874
875void emit_sbcs(int rs1,int rs2,int rt)
876{
877 assem_debug("sbcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
878 output_w32(0xe0d00000|rd_rn_rm(rt,rs1,rs2));
879}
880
881void emit_neg(int rs, int rt)
882{
883 assem_debug("rsb %s,%s,#0\n",regname[rt],regname[rs]);
884 output_w32(0xe2600000|rd_rn_rm(rt,rs,0));
885}
886
887void emit_negs(int rs, int rt)
888{
889 assem_debug("rsbs %s,%s,#0\n",regname[rt],regname[rs]);
890 output_w32(0xe2700000|rd_rn_rm(rt,rs,0));
891}
892
893void emit_sub(int rs1,int rs2,int rt)
894{
895 assem_debug("sub %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
896 output_w32(0xe0400000|rd_rn_rm(rt,rs1,rs2));
897}
898
899void emit_subs(int rs1,int rs2,int rt)
900{
901 assem_debug("subs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
902 output_w32(0xe0500000|rd_rn_rm(rt,rs1,rs2));
903}
904
905void emit_zeroreg(int rt)
906{
907 assem_debug("mov %s,#0\n",regname[rt]);
908 output_w32(0xe3a00000|rd_rn_rm(rt,0,0));
909}
910
911void emit_loadreg(int r, int hr)
912{
3d624f89 913#ifdef FORCE32
914 if(r&64) {
915 printf("64bit load in 32bit mode!\n");
916 exit(1);
917 }
918#endif
57871462 919 if((r&63)==0)
920 emit_zeroreg(hr);
921 else {
3d624f89 922 int addr=((int)reg)+((r&63)<<REG_SHIFT)+((r&64)>>4);
57871462 923 if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4);
924 if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4);
925 if(r==CCREG) addr=(int)&cycle_count;
926 if(r==CSREG) addr=(int)&Status;
927 if(r==FSREG) addr=(int)&FCR31;
928 if(r==INVCP) addr=(int)&invc_ptr;
929 u_int offset = addr-(u_int)&dynarec_local;
930 assert(offset<4096);
931 assem_debug("ldr %s,fp+%d\n",regname[hr],offset);
932 output_w32(0xe5900000|rd_rn_rm(hr,FP,0)|offset);
933 }
934}
935void emit_storereg(int r, int hr)
936{
3d624f89 937#ifdef FORCE32
938 if(r&64) {
939 printf("64bit store in 32bit mode!\n");
940 exit(1);
941 }
942#endif
943 int addr=((int)reg)+((r&63)<<REG_SHIFT)+((r&64)>>4);
57871462 944 if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4);
945 if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4);
946 if(r==CCREG) addr=(int)&cycle_count;
947 if(r==FSREG) addr=(int)&FCR31;
948 u_int offset = addr-(u_int)&dynarec_local;
949 assert(offset<4096);
950 assem_debug("str %s,fp+%d\n",regname[hr],offset);
951 output_w32(0xe5800000|rd_rn_rm(hr,FP,0)|offset);
952}
953
954void emit_test(int rs, int rt)
955{
956 assem_debug("tst %s,%s\n",regname[rs],regname[rt]);
957 output_w32(0xe1100000|rd_rn_rm(0,rs,rt));
958}
959
960void emit_testimm(int rs,int imm)
961{
962 u_int armval;
963 assem_debug("tst %s,$%d\n",regname[rs],imm);
964 assert(genimm(imm,&armval));
965 output_w32(0xe3100000|rd_rn_rm(0,rs,0)|armval);
966}
967
b9b61529 968void emit_testeqimm(int rs,int imm)
969{
970 u_int armval;
971 assem_debug("tsteq %s,$%d\n",regname[rs],imm);
972 assert(genimm(imm,&armval));
973 output_w32(0x03100000|rd_rn_rm(0,rs,0)|armval);
974}
975
57871462 976void emit_not(int rs,int rt)
977{
978 assem_debug("mvn %s,%s\n",regname[rt],regname[rs]);
979 output_w32(0xe1e00000|rd_rn_rm(rt,0,rs));
980}
981
b9b61529 982void emit_mvnmi(int rs,int rt)
983{
984 assem_debug("mvnmi %s,%s\n",regname[rt],regname[rs]);
985 output_w32(0x41e00000|rd_rn_rm(rt,0,rs));
986}
987
57871462 988void emit_and(u_int rs1,u_int rs2,u_int rt)
989{
990 assem_debug("and %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
991 output_w32(0xe0000000|rd_rn_rm(rt,rs1,rs2));
992}
993
994void emit_or(u_int rs1,u_int rs2,u_int rt)
995{
996 assem_debug("orr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
997 output_w32(0xe1800000|rd_rn_rm(rt,rs1,rs2));
998}
999void emit_or_and_set_flags(int rs1,int rs2,int rt)
1000{
1001 assem_debug("orrs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1002 output_w32(0xe1900000|rd_rn_rm(rt,rs1,rs2));
1003}
1004
1005void emit_xor(u_int rs1,u_int rs2,u_int rt)
1006{
1007 assem_debug("eor %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1008 output_w32(0xe0200000|rd_rn_rm(rt,rs1,rs2));
1009}
1010
1011void emit_loadlp(u_int imm,u_int rt)
1012{
1013 add_literal((int)out,imm);
1014 assem_debug("ldr %s,pc+? [=%x]\n",regname[rt],imm);
1015 output_w32(0xe5900000|rd_rn_rm(rt,15,0));
1016}
1017void emit_movw(u_int imm,u_int rt)
1018{
1019 assert(imm<65536);
1020 assem_debug("movw %s,#%d (0x%x)\n",regname[rt],imm,imm);
1021 output_w32(0xe3000000|rd_rn_rm(rt,0,0)|(imm&0xfff)|((imm<<4)&0xf0000));
1022}
1023void emit_movt(u_int imm,u_int rt)
1024{
1025 assem_debug("movt %s,#%d (0x%x)\n",regname[rt],imm&0xffff0000,imm&0xffff0000);
1026 output_w32(0xe3400000|rd_rn_rm(rt,0,0)|((imm>>16)&0xfff)|((imm>>12)&0xf0000));
1027}
1028void emit_movimm(u_int imm,u_int rt)
1029{
1030 u_int armval;
1031 if(genimm(imm,&armval)) {
1032 assem_debug("mov %s,#%d\n",regname[rt],imm);
1033 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
1034 }else if(genimm(~imm,&armval)) {
1035 assem_debug("mvn %s,#%d\n",regname[rt],imm);
1036 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
1037 }else if(imm<65536) {
1038 #ifdef ARMv5_ONLY
1039 assem_debug("mov %s,#%d\n",regname[rt],imm&0xFF00);
1040 output_w32(0xe3a00000|rd_rn_imm_shift(rt,0,imm>>8,8));
1041 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
1042 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1043 #else
1044 emit_movw(imm,rt);
1045 #endif
1046 }else{
1047 #ifdef ARMv5_ONLY
1048 emit_loadlp(imm,rt);
1049 #else
1050 emit_movw(imm&0x0000FFFF,rt);
1051 emit_movt(imm&0xFFFF0000,rt);
1052 #endif
1053 }
1054}
1055void emit_pcreladdr(u_int rt)
1056{
1057 assem_debug("add %s,pc,#?\n",regname[rt]);
1058 output_w32(0xe2800000|rd_rn_rm(rt,15,0));
1059}
1060
1061void emit_addimm(u_int rs,int imm,u_int rt)
1062{
1063 assert(rs<16);
1064 assert(rt<16);
1065 if(imm!=0) {
1066 assert(imm>-65536&&imm<65536);
1067 u_int armval;
1068 if(genimm(imm,&armval)) {
1069 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],imm);
1070 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
1071 }else if(genimm(-imm,&armval)) {
1072 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],imm);
1073 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
1074 }else if(imm<0) {
1075 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],(-imm)&0xFF00);
1076 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
1077 output_w32(0xe2400000|rd_rn_imm_shift(rt,rs,(-imm)>>8,8));
1078 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
1079 }else{
1080 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1081 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
1082 output_w32(0xe2800000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1083 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1084 }
1085 }
1086 else if(rs!=rt) emit_mov(rs,rt);
1087}
1088
1089void emit_addimm_and_set_flags(int imm,int rt)
1090{
1091 assert(imm>-65536&&imm<65536);
1092 u_int armval;
1093 if(genimm(imm,&armval)) {
1094 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm);
1095 output_w32(0xe2900000|rd_rn_rm(rt,rt,0)|armval);
1096 }else if(genimm(-imm,&armval)) {
1097 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],imm);
1098 output_w32(0xe2500000|rd_rn_rm(rt,rt,0)|armval);
1099 }else if(imm<0) {
1100 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF00);
1101 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
1102 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)>>8,8));
1103 output_w32(0xe2500000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
1104 }else{
1105 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF00);
1106 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
1107 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm>>8,8));
1108 output_w32(0xe2900000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1109 }
1110}
1111void emit_addimm_no_flags(u_int imm,u_int rt)
1112{
1113 emit_addimm(rt,imm,rt);
1114}
1115
1116void emit_addnop(u_int r)
1117{
1118 assert(r<16);
1119 assem_debug("add %s,%s,#0 (nop)\n",regname[r],regname[r]);
1120 output_w32(0xe2800000|rd_rn_rm(r,r,0));
1121}
1122
1123void emit_adcimm(u_int rs,int imm,u_int rt)
1124{
1125 u_int armval;
1126 assert(genimm(imm,&armval));
1127 assem_debug("adc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1128 output_w32(0xe2a00000|rd_rn_rm(rt,rs,0)|armval);
1129}
1130/*void emit_sbcimm(int imm,u_int rt)
1131{
1132 u_int armval;
1133 assert(genimm(imm,&armval));
1134 assem_debug("sbc %s,%s,#%d\n",regname[rt],regname[rt],imm);
1135 output_w32(0xe2c00000|rd_rn_rm(rt,rt,0)|armval);
1136}*/
1137void emit_sbbimm(int imm,u_int rt)
1138{
1139 assem_debug("sbb $%d,%%%s\n",imm,regname[rt]);
1140 assert(rt<8);
1141 if(imm<128&&imm>=-128) {
1142 output_byte(0x83);
1143 output_modrm(3,rt,3);
1144 output_byte(imm);
1145 }
1146 else
1147 {
1148 output_byte(0x81);
1149 output_modrm(3,rt,3);
1150 output_w32(imm);
1151 }
1152}
1153void emit_rscimm(int rs,int imm,u_int rt)
1154{
1155 assert(0);
1156 u_int armval;
1157 assert(genimm(imm,&armval));
1158 assem_debug("rsc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1159 output_w32(0xe2e00000|rd_rn_rm(rt,rs,0)|armval);
1160}
1161
1162void emit_addimm64_32(int rsh,int rsl,int imm,int rth,int rtl)
1163{
1164 // TODO: if(genimm(imm,&armval)) ...
1165 // else
1166 emit_movimm(imm,HOST_TEMPREG);
1167 emit_adds(HOST_TEMPREG,rsl,rtl);
1168 emit_adcimm(rsh,0,rth);
1169}
1170
1171void emit_sbb(int rs1,int rs2)
1172{
1173 assem_debug("sbb %%%s,%%%s\n",regname[rs2],regname[rs1]);
1174 output_byte(0x19);
1175 output_modrm(3,rs1,rs2);
1176}
1177
1178void emit_andimm(int rs,int imm,int rt)
1179{
1180 u_int armval;
1181 if(genimm(imm,&armval)) {
1182 assem_debug("and %s,%s,#%d\n",regname[rt],regname[rs],imm);
1183 output_w32(0xe2000000|rd_rn_rm(rt,rs,0)|armval);
1184 }else if(genimm(~imm,&armval)) {
1185 assem_debug("bic %s,%s,#%d\n",regname[rt],regname[rs],imm);
1186 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|armval);
1187 }else if(imm==65535) {
1188 #ifdef ARMv5_ONLY
1189 assem_debug("bic %s,%s,#FF000000\n",regname[rt],regname[rs]);
1190 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|0x4FF);
1191 assem_debug("bic %s,%s,#00FF0000\n",regname[rt],regname[rt]);
1192 output_w32(0xe3c00000|rd_rn_rm(rt,rt,0)|0x8FF);
1193 #else
1194 assem_debug("uxth %s,%s\n",regname[rt],regname[rs]);
1195 output_w32(0xe6ff0070|rd_rn_rm(rt,0,rs));
1196 #endif
1197 }else{
1198 assert(imm>0&&imm<65535);
1199 #ifdef ARMv5_ONLY
1200 assem_debug("mov r14,#%d\n",imm&0xFF00);
1201 output_w32(0xe3a00000|rd_rn_imm_shift(HOST_TEMPREG,0,imm>>8,8));
1202 assem_debug("add r14,r14,#%d\n",imm&0xFF);
1203 output_w32(0xe2800000|rd_rn_imm_shift(HOST_TEMPREG,HOST_TEMPREG,imm&0xff,0));
1204 #else
1205 emit_movw(imm,HOST_TEMPREG);
1206 #endif
1207 assem_debug("and %s,%s,r14\n",regname[rt],regname[rs]);
1208 output_w32(0xe0000000|rd_rn_rm(rt,rs,HOST_TEMPREG));
1209 }
1210}
1211
1212void emit_orimm(int rs,int imm,int rt)
1213{
1214 u_int armval;
1215 if(genimm(imm,&armval)) {
1216 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1217 output_w32(0xe3800000|rd_rn_rm(rt,rs,0)|armval);
1218 }else{
1219 assert(imm>0&&imm<65536);
1220 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1221 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
1222 output_w32(0xe3800000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1223 output_w32(0xe3800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1224 }
1225}
1226
1227void emit_xorimm(int rs,int imm,int rt)
1228{
1229 assert(imm>0&&imm<65536);
1230 u_int armval;
1231 if(genimm(imm,&armval)) {
1232 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm);
1233 output_w32(0xe2200000|rd_rn_rm(rt,rs,0)|armval);
1234 }else{
1235 assert(imm>0);
1236 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1237 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
1238 output_w32(0xe2200000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1239 output_w32(0xe2200000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1240 }
1241}
1242
1243void emit_shlimm(int rs,u_int imm,int rt)
1244{
1245 assert(imm>0);
1246 assert(imm<32);
1247 //if(imm==1) ...
1248 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1249 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1250}
1251
1252void emit_shrimm(int rs,u_int imm,int rt)
1253{
1254 assert(imm>0);
1255 assert(imm<32);
1256 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1257 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
1258}
1259
1260void emit_sarimm(int rs,u_int imm,int rt)
1261{
1262 assert(imm>0);
1263 assert(imm<32);
1264 assem_debug("asr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1265 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x40|(imm<<7));
1266}
1267
1268void emit_rorimm(int rs,u_int imm,int rt)
1269{
1270 assert(imm>0);
1271 assert(imm<32);
1272 assem_debug("ror %s,%s,#%d\n",regname[rt],regname[rs],imm);
1273 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x60|(imm<<7));
1274}
1275
1276void emit_shldimm(int rs,int rs2,u_int imm,int rt)
1277{
1278 assem_debug("shld %%%s,%%%s,%d\n",regname[rt],regname[rs2],imm);
1279 assert(imm>0);
1280 assert(imm<32);
1281 //if(imm==1) ...
1282 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1283 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1284 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs2],32-imm);
1285 output_w32(0xe1800020|rd_rn_rm(rt,rt,rs2)|((32-imm)<<7));
1286}
1287
1288void emit_shrdimm(int rs,int rs2,u_int imm,int rt)
1289{
1290 assem_debug("shrd %%%s,%%%s,%d\n",regname[rt],regname[rs2],imm);
1291 assert(imm>0);
1292 assert(imm<32);
1293 //if(imm==1) ...
1294 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1295 output_w32(0xe1a00020|rd_rn_rm(rt,0,rs)|(imm<<7));
1296 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs2],32-imm);
1297 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs2)|((32-imm)<<7));
1298}
1299
b9b61529 1300void emit_signextend16(int rs,int rt)
1301{
1302 #ifdef ARMv5_ONLY
1303 emit_shlimm(rs,16,rt);
1304 emit_sarimm(rt,16,rt);
1305 #else
1306 assem_debug("sxth %s,%s\n",regname[rt],regname[rs]);
1307 output_w32(0xe6bf0070|rd_rn_rm(rt,0,rs));
1308 #endif
1309}
1310
57871462 1311void emit_shl(u_int rs,u_int shift,u_int rt)
1312{
1313 assert(rs<16);
1314 assert(rt<16);
1315 assert(shift<16);
1316 //if(imm==1) ...
1317 assem_debug("lsl %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1318 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x10|(shift<<8));
1319}
1320void emit_shr(u_int rs,u_int shift,u_int rt)
1321{
1322 assert(rs<16);
1323 assert(rt<16);
1324 assert(shift<16);
1325 assem_debug("lsr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1326 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x30|(shift<<8));
1327}
1328void emit_sar(u_int rs,u_int shift,u_int rt)
1329{
1330 assert(rs<16);
1331 assert(rt<16);
1332 assert(shift<16);
1333 assem_debug("asr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1334 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x50|(shift<<8));
1335}
1336void emit_shlcl(int r)
1337{
1338 assem_debug("shl %%%s,%%cl\n",regname[r]);
1339 assert(0);
1340}
1341void emit_shrcl(int r)
1342{
1343 assem_debug("shr %%%s,%%cl\n",regname[r]);
1344 assert(0);
1345}
1346void emit_sarcl(int r)
1347{
1348 assem_debug("sar %%%s,%%cl\n",regname[r]);
1349 assert(0);
1350}
1351
1352void emit_shldcl(int r1,int r2)
1353{
1354 assem_debug("shld %%%s,%%%s,%%cl\n",regname[r1],regname[r2]);
1355 assert(0);
1356}
1357void emit_shrdcl(int r1,int r2)
1358{
1359 assem_debug("shrd %%%s,%%%s,%%cl\n",regname[r1],regname[r2]);
1360 assert(0);
1361}
1362void emit_orrshl(u_int rs,u_int shift,u_int rt)
1363{
1364 assert(rs<16);
1365 assert(rt<16);
1366 assert(shift<16);
1367 assem_debug("orr %s,%s,%s,lsl %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
1368 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x10|(shift<<8));
1369}
1370void emit_orrshr(u_int rs,u_int shift,u_int rt)
1371{
1372 assert(rs<16);
1373 assert(rt<16);
1374 assert(shift<16);
1375 assem_debug("orr %s,%s,%s,lsr %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
1376 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x30|(shift<<8));
1377}
1378
1379void emit_cmpimm(int rs,int imm)
1380{
1381 u_int armval;
1382 if(genimm(imm,&armval)) {
1383 assem_debug("cmp %s,$%d\n",regname[rs],imm);
1384 output_w32(0xe3500000|rd_rn_rm(0,rs,0)|armval);
1385 }else if(genimm(-imm,&armval)) {
1386 assem_debug("cmn %s,$%d\n",regname[rs],imm);
1387 output_w32(0xe3700000|rd_rn_rm(0,rs,0)|armval);
1388 }else if(imm>0) {
1389 assert(imm<65536);
1390 #ifdef ARMv5_ONLY
1391 emit_movimm(imm,HOST_TEMPREG);
1392 #else
1393 emit_movw(imm,HOST_TEMPREG);
1394 #endif
1395 assem_debug("cmp %s,r14\n",regname[rs]);
1396 output_w32(0xe1500000|rd_rn_rm(0,rs,HOST_TEMPREG));
1397 }else{
1398 assert(imm>-65536);
1399 #ifdef ARMv5_ONLY
1400 emit_movimm(-imm,HOST_TEMPREG);
1401 #else
1402 emit_movw(-imm,HOST_TEMPREG);
1403 #endif
1404 assem_debug("cmn %s,r14\n",regname[rs]);
1405 output_w32(0xe1700000|rd_rn_rm(0,rs,HOST_TEMPREG));
1406 }
1407}
1408
1409void emit_cmovne(u_int *addr,int rt)
1410{
1411 assem_debug("cmovne %x,%%%s",(int)addr,regname[rt]);
1412 assert(0);
1413}
1414void emit_cmovl(u_int *addr,int rt)
1415{
1416 assem_debug("cmovl %x,%%%s",(int)addr,regname[rt]);
1417 assert(0);
1418}
1419void emit_cmovs(u_int *addr,int rt)
1420{
1421 assem_debug("cmovs %x,%%%s",(int)addr,regname[rt]);
1422 assert(0);
1423}
1424void emit_cmovne_imm(int imm,int rt)
1425{
1426 assem_debug("movne %s,#%d\n",regname[rt],imm);
1427 u_int armval;
1428 assert(genimm(imm,&armval));
1429 output_w32(0x13a00000|rd_rn_rm(rt,0,0)|armval);
1430}
1431void emit_cmovl_imm(int imm,int rt)
1432{
1433 assem_debug("movlt %s,#%d\n",regname[rt],imm);
1434 u_int armval;
1435 assert(genimm(imm,&armval));
1436 output_w32(0xb3a00000|rd_rn_rm(rt,0,0)|armval);
1437}
1438void emit_cmovb_imm(int imm,int rt)
1439{
1440 assem_debug("movcc %s,#%d\n",regname[rt],imm);
1441 u_int armval;
1442 assert(genimm(imm,&armval));
1443 output_w32(0x33a00000|rd_rn_rm(rt,0,0)|armval);
1444}
1445void emit_cmovs_imm(int imm,int rt)
1446{
1447 assem_debug("movmi %s,#%d\n",regname[rt],imm);
1448 u_int armval;
1449 assert(genimm(imm,&armval));
1450 output_w32(0x43a00000|rd_rn_rm(rt,0,0)|armval);
1451}
1452void emit_cmove_reg(int rs,int rt)
1453{
1454 assem_debug("moveq %s,%s\n",regname[rt],regname[rs]);
1455 output_w32(0x01a00000|rd_rn_rm(rt,0,rs));
1456}
1457void emit_cmovne_reg(int rs,int rt)
1458{
1459 assem_debug("movne %s,%s\n",regname[rt],regname[rs]);
1460 output_w32(0x11a00000|rd_rn_rm(rt,0,rs));
1461}
1462void emit_cmovl_reg(int rs,int rt)
1463{
1464 assem_debug("movlt %s,%s\n",regname[rt],regname[rs]);
1465 output_w32(0xb1a00000|rd_rn_rm(rt,0,rs));
1466}
1467void emit_cmovs_reg(int rs,int rt)
1468{
1469 assem_debug("movmi %s,%s\n",regname[rt],regname[rs]);
1470 output_w32(0x41a00000|rd_rn_rm(rt,0,rs));
1471}
1472
1473void emit_slti32(int rs,int imm,int rt)
1474{
1475 if(rs!=rt) emit_zeroreg(rt);
1476 emit_cmpimm(rs,imm);
1477 if(rs==rt) emit_movimm(0,rt);
1478 emit_cmovl_imm(1,rt);
1479}
1480void emit_sltiu32(int rs,int imm,int rt)
1481{
1482 if(rs!=rt) emit_zeroreg(rt);
1483 emit_cmpimm(rs,imm);
1484 if(rs==rt) emit_movimm(0,rt);
1485 emit_cmovb_imm(1,rt);
1486}
1487void emit_slti64_32(int rsh,int rsl,int imm,int rt)
1488{
1489 assert(rsh!=rt);
1490 emit_slti32(rsl,imm,rt);
1491 if(imm>=0)
1492 {
1493 emit_test(rsh,rsh);
1494 emit_cmovne_imm(0,rt);
1495 emit_cmovs_imm(1,rt);
1496 }
1497 else
1498 {
1499 emit_cmpimm(rsh,-1);
1500 emit_cmovne_imm(0,rt);
1501 emit_cmovl_imm(1,rt);
1502 }
1503}
1504void emit_sltiu64_32(int rsh,int rsl,int imm,int rt)
1505{
1506 assert(rsh!=rt);
1507 emit_sltiu32(rsl,imm,rt);
1508 if(imm>=0)
1509 {
1510 emit_test(rsh,rsh);
1511 emit_cmovne_imm(0,rt);
1512 }
1513 else
1514 {
1515 emit_cmpimm(rsh,-1);
1516 emit_cmovne_imm(1,rt);
1517 }
1518}
1519
1520void emit_cmp(int rs,int rt)
1521{
1522 assem_debug("cmp %s,%s\n",regname[rs],regname[rt]);
1523 output_w32(0xe1500000|rd_rn_rm(0,rs,rt));
1524}
1525void emit_set_gz32(int rs, int rt)
1526{
1527 //assem_debug("set_gz32\n");
1528 emit_cmpimm(rs,1);
1529 emit_movimm(1,rt);
1530 emit_cmovl_imm(0,rt);
1531}
1532void emit_set_nz32(int rs, int rt)
1533{
1534 //assem_debug("set_nz32\n");
1535 if(rs!=rt) emit_movs(rs,rt);
1536 else emit_test(rs,rs);
1537 emit_cmovne_imm(1,rt);
1538}
1539void emit_set_gz64_32(int rsh, int rsl, int rt)
1540{
1541 //assem_debug("set_gz64\n");
1542 emit_set_gz32(rsl,rt);
1543 emit_test(rsh,rsh);
1544 emit_cmovne_imm(1,rt);
1545 emit_cmovs_imm(0,rt);
1546}
1547void emit_set_nz64_32(int rsh, int rsl, int rt)
1548{
1549 //assem_debug("set_nz64\n");
1550 emit_or_and_set_flags(rsh,rsl,rt);
1551 emit_cmovne_imm(1,rt);
1552}
1553void emit_set_if_less32(int rs1, int rs2, int rt)
1554{
1555 //assem_debug("set if less (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1556 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1557 emit_cmp(rs1,rs2);
1558 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1559 emit_cmovl_imm(1,rt);
1560}
1561void emit_set_if_carry32(int rs1, int rs2, int rt)
1562{
1563 //assem_debug("set if carry (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1564 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1565 emit_cmp(rs1,rs2);
1566 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1567 emit_cmovb_imm(1,rt);
1568}
1569void emit_set_if_less64_32(int u1, int l1, int u2, int l2, int rt)
1570{
1571 //assem_debug("set if less64 (%%%s,%%%s,%%%s,%%%s),%%%s\n",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]);
1572 assert(u1!=rt);
1573 assert(u2!=rt);
1574 emit_cmp(l1,l2);
1575 emit_movimm(0,rt);
1576 emit_sbcs(u1,u2,HOST_TEMPREG);
1577 emit_cmovl_imm(1,rt);
1578}
1579void emit_set_if_carry64_32(int u1, int l1, int u2, int l2, int rt)
1580{
1581 //assem_debug("set if carry64 (%%%s,%%%s,%%%s,%%%s),%%%s\n",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]);
1582 assert(u1!=rt);
1583 assert(u2!=rt);
1584 emit_cmp(l1,l2);
1585 emit_movimm(0,rt);
1586 emit_sbcs(u1,u2,HOST_TEMPREG);
1587 emit_cmovb_imm(1,rt);
1588}
1589
1590void emit_call(int a)
1591{
1592 assem_debug("bl %x (%x+%x)\n",a,(int)out,a-(int)out-8);
1593 u_int offset=genjmp(a);
1594 output_w32(0xeb000000|offset);
1595}
1596void emit_jmp(int a)
1597{
1598 assem_debug("b %x (%x+%x)\n",a,(int)out,a-(int)out-8);
1599 u_int offset=genjmp(a);
1600 output_w32(0xea000000|offset);
1601}
1602void emit_jne(int a)
1603{
1604 assem_debug("bne %x\n",a);
1605 u_int offset=genjmp(a);
1606 output_w32(0x1a000000|offset);
1607}
1608void emit_jeq(int a)
1609{
1610 assem_debug("beq %x\n",a);
1611 u_int offset=genjmp(a);
1612 output_w32(0x0a000000|offset);
1613}
1614void emit_js(int a)
1615{
1616 assem_debug("bmi %x\n",a);
1617 u_int offset=genjmp(a);
1618 output_w32(0x4a000000|offset);
1619}
1620void emit_jns(int a)
1621{
1622 assem_debug("bpl %x\n",a);
1623 u_int offset=genjmp(a);
1624 output_w32(0x5a000000|offset);
1625}
1626void emit_jl(int a)
1627{
1628 assem_debug("blt %x\n",a);
1629 u_int offset=genjmp(a);
1630 output_w32(0xba000000|offset);
1631}
1632void emit_jge(int a)
1633{
1634 assem_debug("bge %x\n",a);
1635 u_int offset=genjmp(a);
1636 output_w32(0xaa000000|offset);
1637}
1638void emit_jno(int a)
1639{
1640 assem_debug("bvc %x\n",a);
1641 u_int offset=genjmp(a);
1642 output_w32(0x7a000000|offset);
1643}
1644void emit_jc(int a)
1645{
1646 assem_debug("bcs %x\n",a);
1647 u_int offset=genjmp(a);
1648 output_w32(0x2a000000|offset);
1649}
1650void emit_jcc(int a)
1651{
1652 assem_debug("bcc %x\n",a);
1653 u_int offset=genjmp(a);
1654 output_w32(0x3a000000|offset);
1655}
1656
1657void emit_pushimm(int imm)
1658{
1659 assem_debug("push $%x\n",imm);
1660 assert(0);
1661}
1662void emit_pusha()
1663{
1664 assem_debug("pusha\n");
1665 assert(0);
1666}
1667void emit_popa()
1668{
1669 assem_debug("popa\n");
1670 assert(0);
1671}
1672void emit_pushreg(u_int r)
1673{
1674 assem_debug("push %%%s\n",regname[r]);
1675 assert(0);
1676}
1677void emit_popreg(u_int r)
1678{
1679 assem_debug("pop %%%s\n",regname[r]);
1680 assert(0);
1681}
1682void emit_callreg(u_int r)
1683{
1684 assem_debug("call *%%%s\n",regname[r]);
1685 assert(0);
1686}
1687void emit_jmpreg(u_int r)
1688{
1689 assem_debug("mov pc,%s\n",regname[r]);
1690 output_w32(0xe1a00000|rd_rn_rm(15,0,r));
1691}
1692
1693void emit_readword_indexed(int offset, int rs, int rt)
1694{
1695 assert(offset>-4096&&offset<4096);
1696 assem_debug("ldr %s,%s+%d\n",regname[rt],regname[rs],offset);
1697 if(offset>=0) {
1698 output_w32(0xe5900000|rd_rn_rm(rt,rs,0)|offset);
1699 }else{
1700 output_w32(0xe5100000|rd_rn_rm(rt,rs,0)|(-offset));
1701 }
1702}
1703void emit_readword_dualindexedx4(int rs1, int rs2, int rt)
1704{
1705 assem_debug("ldr %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1706 output_w32(0xe7900000|rd_rn_rm(rt,rs1,rs2)|0x100);
1707}
1708void emit_readword_indexed_tlb(int addr, int rs, int map, int rt)
1709{
1710 if(map<0) emit_readword_indexed(addr, rs, rt);
1711 else {
1712 assert(addr==0);
1713 emit_readword_dualindexedx4(rs, map, rt);
1714 }
1715}
1716void emit_readdword_indexed_tlb(int addr, int rs, int map, int rh, int rl)
1717{
1718 if(map<0) {
1719 if(rh>=0) emit_readword_indexed(addr, rs, rh);
1720 emit_readword_indexed(addr+4, rs, rl);
1721 }else{
1722 assert(rh!=rs);
1723 if(rh>=0) emit_readword_indexed_tlb(addr, rs, map, rh);
1724 emit_addimm(map,1,map);
1725 emit_readword_indexed_tlb(addr, rs, map, rl);
1726 }
1727}
1728void emit_movsbl_indexed(int offset, int rs, int rt)
1729{
1730 assert(offset>-256&&offset<256);
1731 assem_debug("ldrsb %s,%s+%d\n",regname[rt],regname[rs],offset);
1732 if(offset>=0) {
1733 output_w32(0xe1d000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1734 }else{
1735 output_w32(0xe15000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1736 }
1737}
1738void emit_movsbl_indexed_tlb(int addr, int rs, int map, int rt)
1739{
1740 if(map<0) emit_movsbl_indexed(addr, rs, rt);
1741 else {
1742 if(addr==0) {
1743 emit_shlimm(map,2,map);
1744 assem_debug("ldrsb %s,%s+%s\n",regname[rt],regname[rs],regname[map]);
1745 output_w32(0xe19000d0|rd_rn_rm(rt,rs,map));
1746 }else{
1747 assert(addr>-256&&addr<256);
1748 assem_debug("add %s,%s,%s,lsl #2\n",regname[rt],regname[rs],regname[map]);
1749 output_w32(0xe0800000|rd_rn_rm(rt,rs,map)|(2<<7));
1750 emit_movsbl_indexed(addr, rt, rt);
1751 }
1752 }
1753}
1754void emit_movswl_indexed(int offset, int rs, int rt)
1755{
1756 assert(offset>-256&&offset<256);
1757 assem_debug("ldrsh %s,%s+%d\n",regname[rt],regname[rs],offset);
1758 if(offset>=0) {
1759 output_w32(0xe1d000f0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1760 }else{
1761 output_w32(0xe15000f0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1762 }
1763}
1764void emit_movzbl_indexed(int offset, int rs, int rt)
1765{
1766 assert(offset>-4096&&offset<4096);
1767 assem_debug("ldrb %s,%s+%d\n",regname[rt],regname[rs],offset);
1768 if(offset>=0) {
1769 output_w32(0xe5d00000|rd_rn_rm(rt,rs,0)|offset);
1770 }else{
1771 output_w32(0xe5500000|rd_rn_rm(rt,rs,0)|(-offset));
1772 }
1773}
1774void emit_movzbl_dualindexedx4(int rs1, int rs2, int rt)
1775{
1776 assem_debug("ldrb %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1777 output_w32(0xe7d00000|rd_rn_rm(rt,rs1,rs2)|0x100);
1778}
1779void emit_movzbl_indexed_tlb(int addr, int rs, int map, int rt)
1780{
1781 if(map<0) emit_movzbl_indexed(addr, rs, rt);
1782 else {
1783 if(addr==0) {
1784 emit_movzbl_dualindexedx4(rs, map, rt);
1785 }else{
1786 emit_addimm(rs,addr,rt);
1787 emit_movzbl_dualindexedx4(rt, map, rt);
1788 }
1789 }
1790}
1791void emit_movzwl_indexed(int offset, int rs, int rt)
1792{
1793 assert(offset>-256&&offset<256);
1794 assem_debug("ldrh %s,%s+%d\n",regname[rt],regname[rs],offset);
1795 if(offset>=0) {
1796 output_w32(0xe1d000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1797 }else{
1798 output_w32(0xe15000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1799 }
1800}
1801void emit_readword(int addr, int rt)
1802{
1803 u_int offset = addr-(u_int)&dynarec_local;
1804 assert(offset<4096);
1805 assem_debug("ldr %s,fp+%d\n",regname[rt],offset);
1806 output_w32(0xe5900000|rd_rn_rm(rt,FP,0)|offset);
1807}
1808void emit_movsbl(int addr, int rt)
1809{
1810 u_int offset = addr-(u_int)&dynarec_local;
1811 assert(offset<256);
1812 assem_debug("ldrsb %s,fp+%d\n",regname[rt],offset);
1813 output_w32(0xe1d000d0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1814}
1815void emit_movswl(int addr, int rt)
1816{
1817 u_int offset = addr-(u_int)&dynarec_local;
1818 assert(offset<256);
1819 assem_debug("ldrsh %s,fp+%d\n",regname[rt],offset);
1820 output_w32(0xe1d000f0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1821}
1822void emit_movzbl(int addr, int rt)
1823{
1824 u_int offset = addr-(u_int)&dynarec_local;
1825 assert(offset<4096);
1826 assem_debug("ldrb %s,fp+%d\n",regname[rt],offset);
1827 output_w32(0xe5d00000|rd_rn_rm(rt,FP,0)|offset);
1828}
1829void emit_movzwl(int addr, int rt)
1830{
1831 u_int offset = addr-(u_int)&dynarec_local;
1832 assert(offset<256);
1833 assem_debug("ldrh %s,fp+%d\n",regname[rt],offset);
1834 output_w32(0xe1d000b0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1835}
1836void emit_movzwl_reg(int rs, int rt)
1837{
1838 assem_debug("movzwl %%%s,%%%s\n",regname[rs]+1,regname[rt]);
1839 assert(0);
1840}
1841
1842void emit_xchg(int rs, int rt)
1843{
1844 assem_debug("xchg %%%s,%%%s\n",regname[rs],regname[rt]);
1845 assert(0);
1846}
1847void emit_writeword_indexed(int rt, int offset, int rs)
1848{
1849 assert(offset>-4096&&offset<4096);
1850 assem_debug("str %s,%s+%d\n",regname[rt],regname[rs],offset);
1851 if(offset>=0) {
1852 output_w32(0xe5800000|rd_rn_rm(rt,rs,0)|offset);
1853 }else{
1854 output_w32(0xe5000000|rd_rn_rm(rt,rs,0)|(-offset));
1855 }
1856}
1857void emit_writeword_dualindexedx4(int rt, int rs1, int rs2)
1858{
1859 assem_debug("str %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1860 output_w32(0xe7800000|rd_rn_rm(rt,rs1,rs2)|0x100);
1861}
1862void emit_writeword_indexed_tlb(int rt, int addr, int rs, int map, int temp)
1863{
1864 if(map<0) emit_writeword_indexed(rt, addr, rs);
1865 else {
1866 assert(addr==0);
1867 emit_writeword_dualindexedx4(rt, rs, map);
1868 }
1869}
1870void emit_writedword_indexed_tlb(int rh, int rl, int addr, int rs, int map, int temp)
1871{
1872 if(map<0) {
1873 if(rh>=0) emit_writeword_indexed(rh, addr, rs);
1874 emit_writeword_indexed(rl, addr+4, rs);
1875 }else{
1876 assert(rh>=0);
1877 if(temp!=rs) emit_addimm(map,1,temp);
1878 emit_writeword_indexed_tlb(rh, addr, rs, map, temp);
1879 if(temp!=rs) emit_writeword_indexed_tlb(rl, addr, rs, temp, temp);
1880 else {
1881 emit_addimm(rs,4,rs);
1882 emit_writeword_indexed_tlb(rl, addr, rs, map, temp);
1883 }
1884 }
1885}
1886void emit_writehword_indexed(int rt, int offset, int rs)
1887{
1888 assert(offset>-256&&offset<256);
1889 assem_debug("strh %s,%s+%d\n",regname[rt],regname[rs],offset);
1890 if(offset>=0) {
1891 output_w32(0xe1c000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1892 }else{
1893 output_w32(0xe14000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1894 }
1895}
1896void emit_writebyte_indexed(int rt, int offset, int rs)
1897{
1898 assert(offset>-4096&&offset<4096);
1899 assem_debug("strb %s,%s+%d\n",regname[rt],regname[rs],offset);
1900 if(offset>=0) {
1901 output_w32(0xe5c00000|rd_rn_rm(rt,rs,0)|offset);
1902 }else{
1903 output_w32(0xe5400000|rd_rn_rm(rt,rs,0)|(-offset));
1904 }
1905}
1906void emit_writebyte_dualindexedx4(int rt, int rs1, int rs2)
1907{
1908 assem_debug("strb %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1909 output_w32(0xe7c00000|rd_rn_rm(rt,rs1,rs2)|0x100);
1910}
1911void emit_writebyte_indexed_tlb(int rt, int addr, int rs, int map, int temp)
1912{
1913 if(map<0) emit_writebyte_indexed(rt, addr, rs);
1914 else {
1915 if(addr==0) {
1916 emit_writebyte_dualindexedx4(rt, rs, map);
1917 }else{
1918 emit_addimm(rs,addr,temp);
1919 emit_writebyte_dualindexedx4(rt, temp, map);
1920 }
1921 }
1922}
1923void emit_writeword(int rt, int addr)
1924{
1925 u_int offset = addr-(u_int)&dynarec_local;
1926 assert(offset<4096);
1927 assem_debug("str %s,fp+%d\n",regname[rt],offset);
1928 output_w32(0xe5800000|rd_rn_rm(rt,FP,0)|offset);
1929}
1930void emit_writehword(int rt, int addr)
1931{
1932 u_int offset = addr-(u_int)&dynarec_local;
1933 assert(offset<256);
1934 assem_debug("strh %s,fp+%d\n",regname[rt],offset);
1935 output_w32(0xe1c000b0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1936}
1937void emit_writebyte(int rt, int addr)
1938{
1939 u_int offset = addr-(u_int)&dynarec_local;
1940 assert(offset<4096);
1941 assem_debug("str %s,fp+%d\n",regname[rt],offset);
1942 output_w32(0xe5c00000|rd_rn_rm(rt,FP,0)|offset);
1943}
1944void emit_writeword_imm(int imm, int addr)
1945{
1946 assem_debug("movl $%x,%x\n",imm,addr);
1947 assert(0);
1948}
1949void emit_writebyte_imm(int imm, int addr)
1950{
1951 assem_debug("movb $%x,%x\n",imm,addr);
1952 assert(0);
1953}
1954
1955void emit_mul(int rs)
1956{
1957 assem_debug("mul %%%s\n",regname[rs]);
1958 assert(0);
1959}
1960void emit_imul(int rs)
1961{
1962 assem_debug("imul %%%s\n",regname[rs]);
1963 assert(0);
1964}
1965void emit_umull(u_int rs1, u_int rs2, u_int hi, u_int lo)
1966{
1967 assem_debug("umull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
1968 assert(rs1<16);
1969 assert(rs2<16);
1970 assert(hi<16);
1971 assert(lo<16);
1972 output_w32(0xe0800090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
1973}
1974void emit_smull(u_int rs1, u_int rs2, u_int hi, u_int lo)
1975{
1976 assem_debug("smull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
1977 assert(rs1<16);
1978 assert(rs2<16);
1979 assert(hi<16);
1980 assert(lo<16);
1981 output_w32(0xe0c00090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
1982}
1983
1984void emit_div(int rs)
1985{
1986 assem_debug("div %%%s\n",regname[rs]);
1987 assert(0);
1988}
1989void emit_idiv(int rs)
1990{
1991 assem_debug("idiv %%%s\n",regname[rs]);
1992 assert(0);
1993}
1994void emit_cdq()
1995{
1996 assem_debug("cdq\n");
1997 assert(0);
1998}
1999
2000void emit_clz(int rs,int rt)
2001{
2002 assem_debug("clz %s,%s\n",regname[rt],regname[rs]);
2003 output_w32(0xe16f0f10|rd_rn_rm(rt,0,rs));
2004}
2005
2006void emit_subcs(int rs1,int rs2,int rt)
2007{
2008 assem_debug("subcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2009 output_w32(0x20400000|rd_rn_rm(rt,rs1,rs2));
2010}
2011
2012void emit_shrcc_imm(int rs,u_int imm,int rt)
2013{
2014 assert(imm>0);
2015 assert(imm<32);
2016 assem_debug("lsrcc %s,%s,#%d\n",regname[rt],regname[rs],imm);
2017 output_w32(0x31a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
2018}
2019
2020void emit_negmi(int rs, int rt)
2021{
2022 assem_debug("rsbmi %s,%s,#0\n",regname[rt],regname[rs]);
2023 output_w32(0x42600000|rd_rn_rm(rt,rs,0));
2024}
2025
2026void emit_negsmi(int rs, int rt)
2027{
2028 assem_debug("rsbsmi %s,%s,#0\n",regname[rt],regname[rs]);
2029 output_w32(0x42700000|rd_rn_rm(rt,rs,0));
2030}
2031
2032void emit_orreq(u_int rs1,u_int rs2,u_int rt)
2033{
2034 assem_debug("orreq %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2035 output_w32(0x01800000|rd_rn_rm(rt,rs1,rs2));
2036}
2037
2038void emit_orrne(u_int rs1,u_int rs2,u_int rt)
2039{
2040 assem_debug("orrne %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2041 output_w32(0x11800000|rd_rn_rm(rt,rs1,rs2));
2042}
2043
2044void emit_bic_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2045{
2046 assem_debug("bic %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2047 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2048}
2049
2050void emit_biceq_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2051{
2052 assem_debug("biceq %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2053 output_w32(0x01C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2054}
2055
2056void emit_bicne_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2057{
2058 assem_debug("bicne %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2059 output_w32(0x11C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2060}
2061
2062void emit_bic_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2063{
2064 assem_debug("bic %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2065 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2066}
2067
2068void emit_biceq_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2069{
2070 assem_debug("biceq %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2071 output_w32(0x01C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2072}
2073
2074void emit_bicne_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2075{
2076 assem_debug("bicne %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2077 output_w32(0x11C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2078}
2079
2080void emit_teq(int rs, int rt)
2081{
2082 assem_debug("teq %s,%s\n",regname[rs],regname[rt]);
2083 output_w32(0xe1300000|rd_rn_rm(0,rs,rt));
2084}
2085
2086void emit_rsbimm(int rs, int imm, int rt)
2087{
2088 u_int armval;
2089 assert(genimm(imm,&armval));
2090 assem_debug("rsb %s,%s,#%d\n",regname[rt],regname[rs],imm);
2091 output_w32(0xe2600000|rd_rn_rm(rt,rs,0)|armval);
2092}
2093
2094// Load 2 immediates optimizing for small code size
2095void emit_mov2imm_compact(int imm1,u_int rt1,int imm2,u_int rt2)
2096{
2097 emit_movimm(imm1,rt1);
2098 u_int armval;
2099 if(genimm(imm2-imm1,&armval)) {
2100 assem_debug("add %s,%s,#%d\n",regname[rt2],regname[rt1],imm2-imm1);
2101 output_w32(0xe2800000|rd_rn_rm(rt2,rt1,0)|armval);
2102 }else if(genimm(imm1-imm2,&armval)) {
2103 assem_debug("sub %s,%s,#%d\n",regname[rt2],regname[rt1],imm1-imm2);
2104 output_w32(0xe2400000|rd_rn_rm(rt2,rt1,0)|armval);
2105 }
2106 else emit_movimm(imm2,rt2);
2107}
2108
2109// Conditionally select one of two immediates, optimizing for small code size
2110// This will only be called if HAVE_CMOV_IMM is defined
2111void emit_cmov2imm_e_ne_compact(int imm1,int imm2,u_int rt)
2112{
2113 u_int armval;
2114 if(genimm(imm2-imm1,&armval)) {
2115 emit_movimm(imm1,rt);
2116 assem_debug("addne %s,%s,#%d\n",regname[rt],regname[rt],imm2-imm1);
2117 output_w32(0x12800000|rd_rn_rm(rt,rt,0)|armval);
2118 }else if(genimm(imm1-imm2,&armval)) {
2119 emit_movimm(imm1,rt);
2120 assem_debug("subne %s,%s,#%d\n",regname[rt],regname[rt],imm1-imm2);
2121 output_w32(0x12400000|rd_rn_rm(rt,rt,0)|armval);
2122 }
2123 else {
2124 #ifdef ARMv5_ONLY
2125 emit_movimm(imm1,rt);
2126 add_literal((int)out,imm2);
2127 assem_debug("ldrne %s,pc+? [=%x]\n",regname[rt],imm2);
2128 output_w32(0x15900000|rd_rn_rm(rt,15,0));
2129 #else
2130 emit_movw(imm1&0x0000FFFF,rt);
2131 if((imm1&0xFFFF)!=(imm2&0xFFFF)) {
2132 assem_debug("movwne %s,#%d (0x%x)\n",regname[rt],imm2&0xFFFF,imm2&0xFFFF);
2133 output_w32(0x13000000|rd_rn_rm(rt,0,0)|(imm2&0xfff)|((imm2<<4)&0xf0000));
2134 }
2135 emit_movt(imm1&0xFFFF0000,rt);
2136 if((imm1&0xFFFF0000)!=(imm2&0xFFFF0000)) {
2137 assem_debug("movtne %s,#%d (0x%x)\n",regname[rt],imm2&0xffff0000,imm2&0xffff0000);
2138 output_w32(0x13400000|rd_rn_rm(rt,0,0)|((imm2>>16)&0xfff)|((imm2>>12)&0xf0000));
2139 }
2140 #endif
2141 }
2142}
2143
2144// special case for checking invalid_code
2145void emit_cmpmem_indexedsr12_imm(int addr,int r,int imm)
2146{
2147 assert(0);
2148}
2149
2150// special case for checking invalid_code
2151void emit_cmpmem_indexedsr12_reg(int base,int r,int imm)
2152{
2153 assert(imm<128&&imm>=0);
2154 assert(r>=0&&r<16);
2155 assem_debug("ldrb lr,%s,%s lsr #12\n",regname[base],regname[r]);
2156 output_w32(0xe7d00000|rd_rn_rm(HOST_TEMPREG,base,r)|0x620);
2157 emit_cmpimm(HOST_TEMPREG,imm);
2158}
2159
2160// special case for tlb mapping
2161void emit_addsr12(int rs1,int rs2,int rt)
2162{
2163 assem_debug("add %s,%s,%s lsr #12\n",regname[rt],regname[rs1],regname[rs2]);
2164 output_w32(0xe0800620|rd_rn_rm(rt,rs1,rs2));
2165}
2166
2167// Used to preload hash table entries
2168void emit_prefetch(void *addr)
2169{
2170 assem_debug("prefetch %x\n",(int)addr);
2171 output_byte(0x0F);
2172 output_byte(0x18);
2173 output_modrm(0,5,1);
2174 output_w32((int)addr);
2175}
2176void emit_prefetchreg(int r)
2177{
2178 assem_debug("pld %s\n",regname[r]);
2179 output_w32(0xf5d0f000|rd_rn_rm(0,r,0));
2180}
2181
2182// Special case for mini_ht
2183void emit_ldreq_indexed(int rs, u_int offset, int rt)
2184{
2185 assert(offset<4096);
2186 assem_debug("ldreq %s,[%s, #%d]\n",regname[rt],regname[rs],offset);
2187 output_w32(0x05900000|rd_rn_rm(rt,rs,0)|offset);
2188}
2189
2190void emit_flds(int r,int sr)
2191{
2192 assem_debug("flds s%d,[%s]\n",sr,regname[r]);
2193 output_w32(0xed900a00|((sr&14)<<11)|((sr&1)<<22)|(r<<16));
2194}
2195
2196void emit_vldr(int r,int vr)
2197{
2198 assem_debug("vldr d%d,[%s]\n",vr,regname[r]);
2199 output_w32(0xed900b00|(vr<<12)|(r<<16));
2200}
2201
2202void emit_fsts(int sr,int r)
2203{
2204 assem_debug("fsts s%d,[%s]\n",sr,regname[r]);
2205 output_w32(0xed800a00|((sr&14)<<11)|((sr&1)<<22)|(r<<16));
2206}
2207
2208void emit_vstr(int vr,int r)
2209{
2210 assem_debug("vstr d%d,[%s]\n",vr,regname[r]);
2211 output_w32(0xed800b00|(vr<<12)|(r<<16));
2212}
2213
2214void emit_ftosizs(int s,int d)
2215{
2216 assem_debug("ftosizs s%d,s%d\n",d,s);
2217 output_w32(0xeebd0ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2218}
2219
2220void emit_ftosizd(int s,int d)
2221{
2222 assem_debug("ftosizd s%d,d%d\n",d,s);
2223 output_w32(0xeebd0bc0|((d&14)<<11)|((d&1)<<22)|(s&7));
2224}
2225
2226void emit_fsitos(int s,int d)
2227{
2228 assem_debug("fsitos s%d,s%d\n",d,s);
2229 output_w32(0xeeb80ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2230}
2231
2232void emit_fsitod(int s,int d)
2233{
2234 assem_debug("fsitod d%d,s%d\n",d,s);
2235 output_w32(0xeeb80bc0|((d&7)<<12)|((s&14)>>1)|((s&1)<<5));
2236}
2237
2238void emit_fcvtds(int s,int d)
2239{
2240 assem_debug("fcvtds d%d,s%d\n",d,s);
2241 output_w32(0xeeb70ac0|((d&7)<<12)|((s&14)>>1)|((s&1)<<5));
2242}
2243
2244void emit_fcvtsd(int s,int d)
2245{
2246 assem_debug("fcvtsd s%d,d%d\n",d,s);
2247 output_w32(0xeeb70bc0|((d&14)<<11)|((d&1)<<22)|(s&7));
2248}
2249
2250void emit_fsqrts(int s,int d)
2251{
2252 assem_debug("fsqrts d%d,s%d\n",d,s);
2253 output_w32(0xeeb10ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2254}
2255
2256void emit_fsqrtd(int s,int d)
2257{
2258 assem_debug("fsqrtd s%d,d%d\n",d,s);
2259 output_w32(0xeeb10bc0|((d&7)<<12)|(s&7));
2260}
2261
2262void emit_fabss(int s,int d)
2263{
2264 assem_debug("fabss d%d,s%d\n",d,s);
2265 output_w32(0xeeb00ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2266}
2267
2268void emit_fabsd(int s,int d)
2269{
2270 assem_debug("fabsd s%d,d%d\n",d,s);
2271 output_w32(0xeeb00bc0|((d&7)<<12)|(s&7));
2272}
2273
2274void emit_fnegs(int s,int d)
2275{
2276 assem_debug("fnegs d%d,s%d\n",d,s);
2277 output_w32(0xeeb10a40|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2278}
2279
2280void emit_fnegd(int s,int d)
2281{
2282 assem_debug("fnegd s%d,d%d\n",d,s);
2283 output_w32(0xeeb10b40|((d&7)<<12)|(s&7));
2284}
2285
2286void emit_fadds(int s1,int s2,int d)
2287{
2288 assem_debug("fadds s%d,s%d,s%d\n",d,s1,s2);
2289 output_w32(0xee300a00|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2290}
2291
2292void emit_faddd(int s1,int s2,int d)
2293{
2294 assem_debug("faddd d%d,d%d,d%d\n",d,s1,s2);
2295 output_w32(0xee300b00|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2296}
2297
2298void emit_fsubs(int s1,int s2,int d)
2299{
2300 assem_debug("fsubs s%d,s%d,s%d\n",d,s1,s2);
2301 output_w32(0xee300a40|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2302}
2303
2304void emit_fsubd(int s1,int s2,int d)
2305{
2306 assem_debug("fsubd d%d,d%d,d%d\n",d,s1,s2);
2307 output_w32(0xee300b40|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2308}
2309
2310void emit_fmuls(int s1,int s2,int d)
2311{
2312 assem_debug("fmuls s%d,s%d,s%d\n",d,s1,s2);
2313 output_w32(0xee200a00|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2314}
2315
2316void emit_fmuld(int s1,int s2,int d)
2317{
2318 assem_debug("fmuld d%d,d%d,d%d\n",d,s1,s2);
2319 output_w32(0xee200b00|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2320}
2321
2322void emit_fdivs(int s1,int s2,int d)
2323{
2324 assem_debug("fdivs s%d,s%d,s%d\n",d,s1,s2);
2325 output_w32(0xee800a00|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2326}
2327
2328void emit_fdivd(int s1,int s2,int d)
2329{
2330 assem_debug("fdivd d%d,d%d,d%d\n",d,s1,s2);
2331 output_w32(0xee800b00|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2332}
2333
2334void emit_fcmps(int x,int y)
2335{
2336 assem_debug("fcmps s14, s15\n");
2337 output_w32(0xeeb47a67);
2338}
2339
2340void emit_fcmpd(int x,int y)
2341{
2342 assem_debug("fcmpd d6, d7\n");
2343 output_w32(0xeeb46b47);
2344}
2345
2346void emit_fmstat()
2347{
2348 assem_debug("fmstat\n");
2349 output_w32(0xeef1fa10);
2350}
2351
2352void emit_bicne_imm(int rs,int imm,int rt)
2353{
2354 u_int armval;
2355 assert(genimm(imm,&armval));
2356 assem_debug("bicne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2357 output_w32(0x13c00000|rd_rn_rm(rt,rs,0)|armval);
2358}
2359
2360void emit_biccs_imm(int rs,int imm,int rt)
2361{
2362 u_int armval;
2363 assert(genimm(imm,&armval));
2364 assem_debug("biccs %s,%s,#%d\n",regname[rt],regname[rs],imm);
2365 output_w32(0x23c00000|rd_rn_rm(rt,rs,0)|armval);
2366}
2367
2368void emit_bicvc_imm(int rs,int imm,int rt)
2369{
2370 u_int armval;
2371 assert(genimm(imm,&armval));
2372 assem_debug("bicvc %s,%s,#%d\n",regname[rt],regname[rs],imm);
2373 output_w32(0x73c00000|rd_rn_rm(rt,rs,0)|armval);
2374}
2375
2376void emit_bichi_imm(int rs,int imm,int rt)
2377{
2378 u_int armval;
2379 assert(genimm(imm,&armval));
2380 assem_debug("bichi %s,%s,#%d\n",regname[rt],regname[rs],imm);
2381 output_w32(0x83c00000|rd_rn_rm(rt,rs,0)|armval);
2382}
2383
2384void emit_orrvs_imm(int rs,int imm,int rt)
2385{
2386 u_int armval;
2387 assert(genimm(imm,&armval));
2388 assem_debug("orrvs %s,%s,#%d\n",regname[rt],regname[rs],imm);
2389 output_w32(0x63800000|rd_rn_rm(rt,rs,0)|armval);
2390}
2391
b9b61529 2392void emit_orrne_imm(int rs,int imm,int rt)
2393{
2394 u_int armval;
2395 assert(genimm(imm,&armval));
2396 assem_debug("orrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2397 output_w32(0x13800000|rd_rn_rm(rt,rs,0)|armval);
2398}
2399
2400void emit_andne_imm(int rs,int imm,int rt)
2401{
2402 u_int armval;
2403 assert(genimm(imm,&armval));
2404 assem_debug("andne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2405 output_w32(0x12000000|rd_rn_rm(rt,rs,0)|armval);
2406}
2407
57871462 2408void emit_jno_unlikely(int a)
2409{
2410 //emit_jno(a);
2411 assem_debug("addvc pc,pc,#? (%x)\n",/*a-(int)out-8,*/a);
2412 output_w32(0x72800000|rd_rn_rm(15,15,0));
2413}
2414
2415// Save registers before function call
2416void save_regs(u_int reglist)
2417{
2418 reglist&=0x100f; // only save the caller-save registers, r0-r3, r12
2419 if(!reglist) return;
2420 assem_debug("stmia fp,{");
2421 if(reglist&1) assem_debug("r0, ");
2422 if(reglist&2) assem_debug("r1, ");
2423 if(reglist&4) assem_debug("r2, ");
2424 if(reglist&8) assem_debug("r3, ");
2425 if(reglist&0x1000) assem_debug("r12");
2426 assem_debug("}\n");
2427 output_w32(0xe88b0000|reglist);
2428}
2429// Restore registers after function call
2430void restore_regs(u_int reglist)
2431{
2432 reglist&=0x100f; // only restore the caller-save registers, r0-r3, r12
2433 if(!reglist) return;
2434 assem_debug("ldmia fp,{");
2435 if(reglist&1) assem_debug("r0, ");
2436 if(reglist&2) assem_debug("r1, ");
2437 if(reglist&4) assem_debug("r2, ");
2438 if(reglist&8) assem_debug("r3, ");
2439 if(reglist&0x1000) assem_debug("r12");
2440 assem_debug("}\n");
2441 output_w32(0xe89b0000|reglist);
2442}
2443
2444// Write back consts using r14 so we don't disturb the other registers
2445void wb_consts(signed char i_regmap[],uint64_t i_is32,u_int i_dirty,int i)
2446{
2447 int hr;
2448 for(hr=0;hr<HOST_REGS;hr++) {
2449 if(hr!=EXCLUDE_REG&&i_regmap[hr]>=0&&((i_dirty>>hr)&1)) {
2450 if(((regs[i].isconst>>hr)&1)&&i_regmap[hr]>0) {
2451 if(i_regmap[hr]<64 || !((i_is32>>(i_regmap[hr]&63))&1) ) {
2452 int value=constmap[i][hr];
2453 if(value==0) {
2454 emit_zeroreg(HOST_TEMPREG);
2455 }
2456 else {
2457 emit_movimm(value,HOST_TEMPREG);
2458 }
2459 emit_storereg(i_regmap[hr],HOST_TEMPREG);
24385cae 2460#ifndef FORCE32
57871462 2461 if((i_is32>>i_regmap[hr])&1) {
2462 if(value!=-1&&value!=0) emit_sarimm(HOST_TEMPREG,31,HOST_TEMPREG);
2463 emit_storereg(i_regmap[hr]|64,HOST_TEMPREG);
2464 }
24385cae 2465#endif
57871462 2466 }
2467 }
2468 }
2469 }
2470}
2471
2472/* Stubs/epilogue */
2473
2474void literal_pool(int n)
2475{
2476 if(!literalcount) return;
2477 if(n) {
2478 if((int)out-literals[0][0]<4096-n) return;
2479 }
2480 u_int *ptr;
2481 int i;
2482 for(i=0;i<literalcount;i++)
2483 {
2484 ptr=(u_int *)literals[i][0];
2485 u_int offset=(u_int)out-(u_int)ptr-8;
2486 assert(offset<4096);
2487 assert(!(offset&3));
2488 *ptr|=offset;
2489 output_w32(literals[i][1]);
2490 }
2491 literalcount=0;
2492}
2493
2494void literal_pool_jumpover(int n)
2495{
2496 if(!literalcount) return;
2497 if(n) {
2498 if((int)out-literals[0][0]<4096-n) return;
2499 }
2500 int jaddr=(int)out;
2501 emit_jmp(0);
2502 literal_pool(0);
2503 set_jump_target(jaddr,(int)out);
2504}
2505
2506emit_extjump2(int addr, int target, int linker)
2507{
2508 u_char *ptr=(u_char *)addr;
2509 assert((ptr[3]&0x0e)==0xa);
2510 emit_loadlp(target,0);
2511 emit_loadlp(addr,1);
24385cae 2512 assert(addr>=BASE_ADDR&&addr<(BASE_ADDR+(1<<TARGET_SIZE_2)));
57871462 2513 //assert((target>=0x80000000&&target<0x80800000)||(target>0xA4000000&&target<0xA4001000));
2514//DEBUG >
2515#ifdef DEBUG_CYCLE_COUNT
2516 emit_readword((int)&last_count,ECX);
2517 emit_add(HOST_CCREG,ECX,HOST_CCREG);
2518 emit_readword((int)&next_interupt,ECX);
2519 emit_writeword(HOST_CCREG,(int)&Count);
2520 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
2521 emit_writeword(ECX,(int)&last_count);
2522#endif
2523//DEBUG <
2524 emit_jmp(linker);
2525}
2526
2527emit_extjump(int addr, int target)
2528{
2529 emit_extjump2(addr, target, (int)dyna_linker);
2530}
2531emit_extjump_ds(int addr, int target)
2532{
2533 emit_extjump2(addr, target, (int)dyna_linker_ds);
2534}
2535
2536do_readstub(int n)
2537{
2538 assem_debug("do_readstub %x\n",start+stubs[n][3]*4);
2539 literal_pool(256);
2540 set_jump_target(stubs[n][1],(int)out);
2541 int type=stubs[n][0];
2542 int i=stubs[n][3];
2543 int rs=stubs[n][4];
2544 struct regstat *i_regs=(struct regstat *)stubs[n][5];
2545 u_int reglist=stubs[n][7];
2546 signed char *i_regmap=i_regs->regmap;
2547 int addr=get_reg(i_regmap,AGEN1+(i&1));
2548 int rth,rt;
2549 int ds;
b9b61529 2550 if(itype[i]==C1LS||itype[i]==C2LS||itype[i]==LOADLR) {
57871462 2551 rth=get_reg(i_regmap,FTEMP|64);
2552 rt=get_reg(i_regmap,FTEMP);
2553 }else{
2554 rth=get_reg(i_regmap,rt1[i]|64);
2555 rt=get_reg(i_regmap,rt1[i]);
2556 }
2557 assert(rs>=0);
2558 assert(rt>=0);
2559 if(addr<0) addr=rt;
2560 assert(addr>=0);
2561 int ftable=0;
2562 if(type==LOADB_STUB||type==LOADBU_STUB)
2563 ftable=(int)readmemb;
2564 if(type==LOADH_STUB||type==LOADHU_STUB)
2565 ftable=(int)readmemh;
2566 if(type==LOADW_STUB)
2567 ftable=(int)readmem;
24385cae 2568#ifndef FORCE32
57871462 2569 if(type==LOADD_STUB)
2570 ftable=(int)readmemd;
24385cae 2571#endif
2572 assert(ftable!=0);
57871462 2573 emit_writeword(rs,(int)&address);
2574 //emit_pusha();
2575 save_regs(reglist);
2576 ds=i_regs!=&regs[i];
2577 int real_rs=(itype[i]==LOADLR)?-1:get_reg(i_regmap,rs1[i]);
2578 u_int cmask=ds?-1:(0x100f|~i_regs->wasconst);
2579 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&0x100f,i);
2580 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty&cmask&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs)));
2581 if(!ds) wb_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&~0x100f,i);
2582 emit_shrimm(rs,16,1);
2583 int cc=get_reg(i_regmap,CCREG);
2584 if(cc<0) {
2585 emit_loadreg(CCREG,2);
2586 }
2587 emit_movimm(ftable,0);
2588 emit_addimm(cc<0?2:cc,2*stubs[n][6]+2,2);
2589 emit_movimm(start+stubs[n][3]*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
2590 //emit_readword((int)&last_count,temp);
2591 //emit_add(cc,temp,cc);
2592 //emit_writeword(cc,(int)&Count);
2593 //emit_mov(15,14);
2594 emit_call((int)&indirect_jump_indexed);
2595 //emit_callreg(rs);
2596 //emit_readword_dualindexedx4(rs,HOST_TEMPREG,15);
2597 // We really shouldn't need to update the count here,
2598 // but not doing so causes random crashes...
2599 emit_readword((int)&Count,HOST_TEMPREG);
2600 emit_readword((int)&next_interupt,2);
2601 emit_addimm(HOST_TEMPREG,-2*stubs[n][6]-2,HOST_TEMPREG);
2602 emit_writeword(2,(int)&last_count);
2603 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2604 if(cc<0) {
2605 emit_storereg(CCREG,HOST_TEMPREG);
2606 }
2607 //emit_popa();
2608 restore_regs(reglist);
2609 //if((cc=get_reg(regmap,CCREG))>=0) {
2610 // emit_loadreg(CCREG,cc);
2611 //}
2612 if(type==LOADB_STUB)
2613 emit_movsbl((int)&readmem_dword,rt);
2614 if(type==LOADBU_STUB)
2615 emit_movzbl((int)&readmem_dword,rt);
2616 if(type==LOADH_STUB)
2617 emit_movswl((int)&readmem_dword,rt);
2618 if(type==LOADHU_STUB)
2619 emit_movzwl((int)&readmem_dword,rt);
2620 if(type==LOADW_STUB)
2621 emit_readword((int)&readmem_dword,rt);
2622 if(type==LOADD_STUB) {
2623 emit_readword((int)&readmem_dword,rt);
2624 if(rth>=0) emit_readword(((int)&readmem_dword)+4,rth);
2625 }
2626 emit_jmp(stubs[n][2]); // return address
2627}
2628
2629inline_readstub(int type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
2630{
2631 int rs=get_reg(regmap,target);
2632 int rth=get_reg(regmap,target|64);
2633 int rt=get_reg(regmap,target);
2634 assert(rs>=0);
2635 assert(rt>=0);
2636 int ftable=0;
2637 if(type==LOADB_STUB||type==LOADBU_STUB)
2638 ftable=(int)readmemb;
2639 if(type==LOADH_STUB||type==LOADHU_STUB)
2640 ftable=(int)readmemh;
2641 if(type==LOADW_STUB)
2642 ftable=(int)readmem;
24385cae 2643#ifndef FORCE32
57871462 2644 if(type==LOADD_STUB)
2645 ftable=(int)readmemd;
24385cae 2646#endif
2647 assert(ftable!=0);
57871462 2648 emit_writeword(rs,(int)&address);
2649 //emit_pusha();
2650 save_regs(reglist);
2651 //emit_shrimm(rs,16,1);
2652 int cc=get_reg(regmap,CCREG);
2653 if(cc<0) {
2654 emit_loadreg(CCREG,2);
2655 }
2656 //emit_movimm(ftable,0);
2657 emit_movimm(((u_int *)ftable)[addr>>16],0);
2658 //emit_readword((int)&last_count,12);
2659 emit_addimm(cc<0?2:cc,CLOCK_DIVIDER*(adj+1),2);
2660 if((signed int)addr>=(signed int)0xC0000000) {
2661 // Pagefault address
2662 int ds=regmap!=regs[i].regmap;
2663 emit_movimm(start+i*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
2664 }
2665 //emit_add(12,2,2);
2666 //emit_writeword(2,(int)&Count);
2667 //emit_call(((u_int *)ftable)[addr>>16]);
2668 emit_call((int)&indirect_jump);
2669 // We really shouldn't need to update the count here,
2670 // but not doing so causes random crashes...
2671 emit_readword((int)&Count,HOST_TEMPREG);
2672 emit_readword((int)&next_interupt,2);
2673 emit_addimm(HOST_TEMPREG,-CLOCK_DIVIDER*(adj+1),HOST_TEMPREG);
2674 emit_writeword(2,(int)&last_count);
2675 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2676 if(cc<0) {
2677 emit_storereg(CCREG,HOST_TEMPREG);
2678 }
2679 //emit_popa();
2680 restore_regs(reglist);
2681 if(type==LOADB_STUB)
2682 emit_movsbl((int)&readmem_dword,rt);
2683 if(type==LOADBU_STUB)
2684 emit_movzbl((int)&readmem_dword,rt);
2685 if(type==LOADH_STUB)
2686 emit_movswl((int)&readmem_dword,rt);
2687 if(type==LOADHU_STUB)
2688 emit_movzwl((int)&readmem_dword,rt);
2689 if(type==LOADW_STUB)
2690 emit_readword((int)&readmem_dword,rt);
2691 if(type==LOADD_STUB) {
2692 emit_readword((int)&readmem_dword,rt);
2693 if(rth>=0) emit_readword(((int)&readmem_dword)+4,rth);
2694 }
2695}
2696
2697do_writestub(int n)
2698{
2699 assem_debug("do_writestub %x\n",start+stubs[n][3]*4);
2700 literal_pool(256);
2701 set_jump_target(stubs[n][1],(int)out);
2702 int type=stubs[n][0];
2703 int i=stubs[n][3];
2704 int rs=stubs[n][4];
2705 struct regstat *i_regs=(struct regstat *)stubs[n][5];
2706 u_int reglist=stubs[n][7];
2707 signed char *i_regmap=i_regs->regmap;
2708 int addr=get_reg(i_regmap,AGEN1+(i&1));
2709 int rth,rt,r;
2710 int ds;
b9b61529 2711 if(itype[i]==C1LS||itype[i]==C2LS) {
57871462 2712 rth=get_reg(i_regmap,FTEMP|64);
2713 rt=get_reg(i_regmap,r=FTEMP);
2714 }else{
2715 rth=get_reg(i_regmap,rs2[i]|64);
2716 rt=get_reg(i_regmap,r=rs2[i]);
2717 }
2718 assert(rs>=0);
2719 assert(rt>=0);
2720 if(addr<0) addr=get_reg(i_regmap,-1);
2721 assert(addr>=0);
2722 int ftable=0;
2723 if(type==STOREB_STUB)
2724 ftable=(int)writememb;
2725 if(type==STOREH_STUB)
2726 ftable=(int)writememh;
2727 if(type==STOREW_STUB)
2728 ftable=(int)writemem;
24385cae 2729#ifndef FORCE32
57871462 2730 if(type==STORED_STUB)
2731 ftable=(int)writememd;
24385cae 2732#endif
2733 assert(ftable!=0);
57871462 2734 emit_writeword(rs,(int)&address);
2735 //emit_shrimm(rs,16,rs);
2736 //emit_movmem_indexedx4(ftable,rs,rs);
2737 if(type==STOREB_STUB)
2738 emit_writebyte(rt,(int)&byte);
2739 if(type==STOREH_STUB)
2740 emit_writehword(rt,(int)&hword);
2741 if(type==STOREW_STUB)
2742 emit_writeword(rt,(int)&word);
2743 if(type==STORED_STUB) {
3d624f89 2744#ifndef FORCE32
57871462 2745 emit_writeword(rt,(int)&dword);
2746 emit_writeword(r?rth:rt,(int)&dword+4);
3d624f89 2747#else
2748 printf("STORED_STUB\n");
2749#endif
57871462 2750 }
2751 //emit_pusha();
2752 save_regs(reglist);
2753 ds=i_regs!=&regs[i];
2754 int real_rs=get_reg(i_regmap,rs1[i]);
2755 u_int cmask=ds?-1:(0x100f|~i_regs->wasconst);
2756 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&0x100f,i);
2757 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty&cmask&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs)));
2758 if(!ds) wb_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&~0x100f,i);
2759 emit_shrimm(rs,16,1);
2760 int cc=get_reg(i_regmap,CCREG);
2761 if(cc<0) {
2762 emit_loadreg(CCREG,2);
2763 }
2764 emit_movimm(ftable,0);
2765 emit_addimm(cc<0?2:cc,2*stubs[n][6]+2,2);
2766 emit_movimm(start+stubs[n][3]*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
2767 //emit_readword((int)&last_count,temp);
2768 //emit_addimm(cc,2*stubs[n][5]+2,cc);
2769 //emit_add(cc,temp,cc);
2770 //emit_writeword(cc,(int)&Count);
2771 emit_call((int)&indirect_jump_indexed);
2772 //emit_callreg(rs);
2773 emit_readword((int)&Count,HOST_TEMPREG);
2774 emit_readword((int)&next_interupt,2);
2775 emit_addimm(HOST_TEMPREG,-2*stubs[n][6]-2,HOST_TEMPREG);
2776 emit_writeword(2,(int)&last_count);
2777 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2778 if(cc<0) {
2779 emit_storereg(CCREG,HOST_TEMPREG);
2780 }
2781 //emit_popa();
2782 restore_regs(reglist);
2783 //if((cc=get_reg(regmap,CCREG))>=0) {
2784 // emit_loadreg(CCREG,cc);
2785 //}
2786 emit_jmp(stubs[n][2]); // return address
2787}
2788
2789inline_writestub(int type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
2790{
2791 int rs=get_reg(regmap,-1);
2792 int rth=get_reg(regmap,target|64);
2793 int rt=get_reg(regmap,target);
2794 assert(rs>=0);
2795 assert(rt>=0);
2796 int ftable=0;
2797 if(type==STOREB_STUB)
2798 ftable=(int)writememb;
2799 if(type==STOREH_STUB)
2800 ftable=(int)writememh;
2801 if(type==STOREW_STUB)
2802 ftable=(int)writemem;
24385cae 2803#ifndef FORCE32
57871462 2804 if(type==STORED_STUB)
2805 ftable=(int)writememd;
24385cae 2806#endif
2807 assert(ftable!=0);
57871462 2808 emit_writeword(rs,(int)&address);
2809 //emit_shrimm(rs,16,rs);
2810 //emit_movmem_indexedx4(ftable,rs,rs);
2811 if(type==STOREB_STUB)
2812 emit_writebyte(rt,(int)&byte);
2813 if(type==STOREH_STUB)
2814 emit_writehword(rt,(int)&hword);
2815 if(type==STOREW_STUB)
2816 emit_writeword(rt,(int)&word);
2817 if(type==STORED_STUB) {
3d624f89 2818#ifndef FORCE32
57871462 2819 emit_writeword(rt,(int)&dword);
2820 emit_writeword(target?rth:rt,(int)&dword+4);
3d624f89 2821#else
2822 printf("STORED_STUB\n");
2823#endif
57871462 2824 }
2825 //emit_pusha();
2826 save_regs(reglist);
2827 //emit_shrimm(rs,16,1);
2828 int cc=get_reg(regmap,CCREG);
2829 if(cc<0) {
2830 emit_loadreg(CCREG,2);
2831 }
2832 //emit_movimm(ftable,0);
2833 emit_movimm(((u_int *)ftable)[addr>>16],0);
2834 //emit_readword((int)&last_count,12);
2835 emit_addimm(cc<0?2:cc,CLOCK_DIVIDER*(adj+1),2);
2836 if((signed int)addr>=(signed int)0xC0000000) {
2837 // Pagefault address
2838 int ds=regmap!=regs[i].regmap;
2839 emit_movimm(start+i*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
2840 }
2841 //emit_add(12,2,2);
2842 //emit_writeword(2,(int)&Count);
2843 //emit_call(((u_int *)ftable)[addr>>16]);
2844 emit_call((int)&indirect_jump);
2845 emit_readword((int)&Count,HOST_TEMPREG);
2846 emit_readword((int)&next_interupt,2);
2847 emit_addimm(HOST_TEMPREG,-CLOCK_DIVIDER*(adj+1),HOST_TEMPREG);
2848 emit_writeword(2,(int)&last_count);
2849 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2850 if(cc<0) {
2851 emit_storereg(CCREG,HOST_TEMPREG);
2852 }
2853 //emit_popa();
2854 restore_regs(reglist);
2855}
2856
2857do_unalignedwritestub(int n)
2858{
2859 set_jump_target(stubs[n][1],(int)out);
2860 output_w32(0xef000000);
2861 emit_jmp(stubs[n][2]); // return address
2862}
2863
2864void printregs(int edi,int esi,int ebp,int esp,int b,int d,int c,int a)
2865{
2866 printf("regs: %x %x %x %x %x %x %x (%x)\n",a,b,c,d,ebp,esi,edi,(&edi)[-1]);
2867}
2868
2869do_invstub(int n)
2870{
2871 literal_pool(20);
2872 u_int reglist=stubs[n][3];
2873 set_jump_target(stubs[n][1],(int)out);
2874 save_regs(reglist);
2875 if(stubs[n][4]!=0) emit_mov(stubs[n][4],0);
2876 emit_call((int)&invalidate_addr);
2877 restore_regs(reglist);
2878 emit_jmp(stubs[n][2]); // return address
2879}
2880
2881int do_dirty_stub(int i)
2882{
2883 assem_debug("do_dirty_stub %x\n",start+i*4);
2884 // Careful about the code output here, verify_dirty needs to parse it.
2885 #ifdef ARMv5_ONLY
2886 emit_loadlp((int)start<(int)0xC0000000?(int)source:(int)start,1);
2887 emit_loadlp((int)copy,2);
2888 emit_loadlp(slen*4,3);
2889 #else
2890 emit_movw(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0x0000FFFF,1);
2891 emit_movw(((u_int)copy)&0x0000FFFF,2);
2892 emit_movt(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0xFFFF0000,1);
2893 emit_movt(((u_int)copy)&0xFFFF0000,2);
2894 emit_movw(slen*4,3);
2895 #endif
2896 emit_movimm(start+i*4,0);
2897 emit_call((int)start<(int)0xC0000000?(int)&verify_code:(int)&verify_code_vm);
2898 int entry=(int)out;
2899 load_regs_entry(i);
2900 if(entry==(int)out) entry=instr_addr[i];
2901 emit_jmp(instr_addr[i]);
2902 return entry;
2903}
2904
2905void do_dirty_stub_ds()
2906{
2907 // Careful about the code output here, verify_dirty needs to parse it.
2908 #ifdef ARMv5_ONLY
2909 emit_loadlp((int)start<(int)0xC0000000?(int)source:(int)start,1);
2910 emit_loadlp((int)copy,2);
2911 emit_loadlp(slen*4,3);
2912 #else
2913 emit_movw(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0x0000FFFF,1);
2914 emit_movw(((u_int)copy)&0x0000FFFF,2);
2915 emit_movt(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0xFFFF0000,1);
2916 emit_movt(((u_int)copy)&0xFFFF0000,2);
2917 emit_movw(slen*4,3);
2918 #endif
2919 emit_movimm(start+1,0);
2920 emit_call((int)&verify_code_ds);
2921}
2922
2923do_cop1stub(int n)
2924{
2925 literal_pool(256);
2926 assem_debug("do_cop1stub %x\n",start+stubs[n][3]*4);
2927 set_jump_target(stubs[n][1],(int)out);
2928 int i=stubs[n][3];
3d624f89 2929// int rs=stubs[n][4];
57871462 2930 struct regstat *i_regs=(struct regstat *)stubs[n][5];
2931 int ds=stubs[n][6];
2932 if(!ds) {
2933 load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty,i);
2934 //if(i_regs!=&regs[i]) printf("oops: regs[i]=%x i_regs=%x",(int)&regs[i],(int)i_regs);
2935 }
2936 //else {printf("fp exception in delay slot\n");}
2937 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty);
2938 if(regs[i].regmap_entry[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
2939 emit_movimm(start+(i-ds)*4,EAX); // Get PC
2940 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG); // CHECK: is this right? There should probably be an extra cycle...
2941 emit_jmp(ds?(int)fp_exception_ds:(int)fp_exception);
2942}
2943
2944/* TLB */
2945
2946int do_tlb_r(int s,int ar,int map,int x,int a,int shift,int c,u_int addr)
2947{
2948 if(c) {
2949 if((signed int)addr>=(signed int)0xC0000000) {
2950 // address_generation already loaded the const
2951 emit_readword_dualindexedx4(FP,map,map);
2952 }
2953 else
2954 return -1; // No mapping
2955 }
2956 else {
2957 assert(s!=map);
2958 emit_movimm(((int)memory_map-(int)&dynarec_local)>>2,map);
2959 emit_addsr12(map,s,map);
2960 // Schedule this while we wait on the load
2961 //if(x) emit_xorimm(s,x,ar);
2962 if(shift>=0) emit_shlimm(s,3,shift);
2963 if(~a) emit_andimm(s,a,ar);
2964 emit_readword_dualindexedx4(FP,map,map);
2965 }
2966 return map;
2967}
2968int do_tlb_r_branch(int map, int c, u_int addr, int *jaddr)
2969{
2970 if(!c||(signed int)addr>=(signed int)0xC0000000) {
2971 emit_test(map,map);
2972 *jaddr=(int)out;
2973 emit_js(0);
2974 }
2975 return map;
2976}
2977
2978int gen_tlb_addr_r(int ar, int map) {
2979 if(map>=0) {
2980 assem_debug("add %s,%s,%s lsl #2\n",regname[ar],regname[ar],regname[map]);
2981 output_w32(0xe0800100|rd_rn_rm(ar,ar,map));
2982 }
2983}
2984
2985int do_tlb_w(int s,int ar,int map,int x,int c,u_int addr)
2986{
2987 if(c) {
2988 if(addr<0x80800000||addr>=0xC0000000) {
2989 // address_generation already loaded the const
2990 emit_readword_dualindexedx4(FP,map,map);
2991 }
2992 else
2993 return -1; // No mapping
2994 }
2995 else {
2996 assert(s!=map);
2997 emit_movimm(((int)memory_map-(int)&dynarec_local)>>2,map);
2998 emit_addsr12(map,s,map);
2999 // Schedule this while we wait on the load
3000 //if(x) emit_xorimm(s,x,ar);
3001 emit_readword_dualindexedx4(FP,map,map);
3002 }
3003 return map;
3004}
3005int do_tlb_w_branch(int map, int c, u_int addr, int *jaddr)
3006{
3007 if(!c||addr<0x80800000||addr>=0xC0000000) {
3008 emit_testimm(map,0x40000000);
3009 *jaddr=(int)out;
3010 emit_jne(0);
3011 }
3012}
3013
3014int gen_tlb_addr_w(int ar, int map) {
3015 if(map>=0) {
3016 assem_debug("add %s,%s,%s lsl #2\n",regname[ar],regname[ar],regname[map]);
3017 output_w32(0xe0800100|rd_rn_rm(ar,ar,map));
3018 }
3019}
3020
3021// Generate the address of the memory_map entry, relative to dynarec_local
3022generate_map_const(u_int addr,int reg) {
3023 //printf("generate_map_const(%x,%s)\n",addr,regname[reg]);
3024 emit_movimm((addr>>12)+(((u_int)memory_map-(u_int)&dynarec_local)>>2),reg);
3025}
3026
3027/* Special assem */
3028
3029void shift_assemble_arm(int i,struct regstat *i_regs)
3030{
3031 if(rt1[i]) {
3032 if(opcode2[i]<=0x07) // SLLV/SRLV/SRAV
3033 {
3034 signed char s,t,shift;
3035 t=get_reg(i_regs->regmap,rt1[i]);
3036 s=get_reg(i_regs->regmap,rs1[i]);
3037 shift=get_reg(i_regs->regmap,rs2[i]);
3038 if(t>=0){
3039 if(rs1[i]==0)
3040 {
3041 emit_zeroreg(t);
3042 }
3043 else if(rs2[i]==0)
3044 {
3045 assert(s>=0);
3046 if(s!=t) emit_mov(s,t);
3047 }
3048 else
3049 {
3050 emit_andimm(shift,31,HOST_TEMPREG);
3051 if(opcode2[i]==4) // SLLV
3052 {
3053 emit_shl(s,HOST_TEMPREG,t);
3054 }
3055 if(opcode2[i]==6) // SRLV
3056 {
3057 emit_shr(s,HOST_TEMPREG,t);
3058 }
3059 if(opcode2[i]==7) // SRAV
3060 {
3061 emit_sar(s,HOST_TEMPREG,t);
3062 }
3063 }
3064 }
3065 } else { // DSLLV/DSRLV/DSRAV
3066 signed char sh,sl,th,tl,shift;
3067 th=get_reg(i_regs->regmap,rt1[i]|64);
3068 tl=get_reg(i_regs->regmap,rt1[i]);
3069 sh=get_reg(i_regs->regmap,rs1[i]|64);
3070 sl=get_reg(i_regs->regmap,rs1[i]);
3071 shift=get_reg(i_regs->regmap,rs2[i]);
3072 if(tl>=0){
3073 if(rs1[i]==0)
3074 {
3075 emit_zeroreg(tl);
3076 if(th>=0) emit_zeroreg(th);
3077 }
3078 else if(rs2[i]==0)
3079 {
3080 assert(sl>=0);
3081 if(sl!=tl) emit_mov(sl,tl);
3082 if(th>=0&&sh!=th) emit_mov(sh,th);
3083 }
3084 else
3085 {
3086 // FIXME: What if shift==tl ?
3087 assert(shift!=tl);
3088 int temp=get_reg(i_regs->regmap,-1);
3089 int real_th=th;
3090 if(th<0&&opcode2[i]!=0x14) {th=temp;} // DSLLV doesn't need a temporary register
3091 assert(sl>=0);
3092 assert(sh>=0);
3093 emit_andimm(shift,31,HOST_TEMPREG);
3094 if(opcode2[i]==0x14) // DSLLV
3095 {
3096 if(th>=0) emit_shl(sh,HOST_TEMPREG,th);
3097 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3098 emit_orrshr(sl,HOST_TEMPREG,th);
3099 emit_andimm(shift,31,HOST_TEMPREG);
3100 emit_testimm(shift,32);
3101 emit_shl(sl,HOST_TEMPREG,tl);
3102 if(th>=0) emit_cmovne_reg(tl,th);
3103 emit_cmovne_imm(0,tl);
3104 }
3105 if(opcode2[i]==0x16) // DSRLV
3106 {
3107 assert(th>=0);
3108 emit_shr(sl,HOST_TEMPREG,tl);
3109 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3110 emit_orrshl(sh,HOST_TEMPREG,tl);
3111 emit_andimm(shift,31,HOST_TEMPREG);
3112 emit_testimm(shift,32);
3113 emit_shr(sh,HOST_TEMPREG,th);
3114 emit_cmovne_reg(th,tl);
3115 if(real_th>=0) emit_cmovne_imm(0,th);
3116 }
3117 if(opcode2[i]==0x17) // DSRAV
3118 {
3119 assert(th>=0);
3120 emit_shr(sl,HOST_TEMPREG,tl);
3121 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3122 if(real_th>=0) {
3123 assert(temp>=0);
3124 emit_sarimm(th,31,temp);
3125 }
3126 emit_orrshl(sh,HOST_TEMPREG,tl);
3127 emit_andimm(shift,31,HOST_TEMPREG);
3128 emit_testimm(shift,32);
3129 emit_sar(sh,HOST_TEMPREG,th);
3130 emit_cmovne_reg(th,tl);
3131 if(real_th>=0) emit_cmovne_reg(temp,th);
3132 }
3133 }
3134 }
3135 }
3136 }
3137}
3138#define shift_assemble shift_assemble_arm
3139
3140void loadlr_assemble_arm(int i,struct regstat *i_regs)
3141{
3142 int s,th,tl,temp,temp2,addr,map=-1;
3143 int offset;
3144 int jaddr=0;
3145 int memtarget,c=0;
3146 u_int hr,reglist=0;
3147 th=get_reg(i_regs->regmap,rt1[i]|64);
3148 tl=get_reg(i_regs->regmap,rt1[i]);
3149 s=get_reg(i_regs->regmap,rs1[i]);
3150 temp=get_reg(i_regs->regmap,-1);
3151 temp2=get_reg(i_regs->regmap,FTEMP);
3152 addr=get_reg(i_regs->regmap,AGEN1+(i&1));
3153 assert(addr<0);
3154 offset=imm[i];
3155 for(hr=0;hr<HOST_REGS;hr++) {
3156 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
3157 }
3158 reglist|=1<<temp;
3159 if(offset||s<0||c) addr=temp2;
3160 else addr=s;
3161 if(s>=0) {
3162 c=(i_regs->wasconst>>s)&1;
3163 memtarget=((signed int)(constmap[i][s]+offset))<(signed int)0x80800000;
3164 if(using_tlb&&((signed int)(constmap[i][s]+offset))>=(signed int)0xC0000000) memtarget=1;
3165 }
3166 if(tl>=0) {
3167 //assert(tl>=0);
3168 //assert(rt1[i]);
3169 if(!using_tlb) {
3170 if(!c) {
3171 emit_shlimm(addr,3,temp);
3172 if (opcode[i]==0x22||opcode[i]==0x26) {
3173 emit_andimm(addr,0xFFFFFFFC,temp2); // LWL/LWR
3174 }else{
3175 emit_andimm(addr,0xFFFFFFF8,temp2); // LDL/LDR
3176 }
3177 emit_cmpimm(addr,0x800000);
3178 jaddr=(int)out;
3179 emit_jno(0);
3180 }
3181 else {
3182 if (opcode[i]==0x22||opcode[i]==0x26) {
3183 emit_movimm(((constmap[i][s]+offset)<<3)&24,temp); // LWL/LWR
3184 }else{
3185 emit_movimm(((constmap[i][s]+offset)<<3)&56,temp); // LDL/LDR
3186 }
3187 }
3188 }else{ // using tlb
3189 int a;
3190 if(c) {
3191 a=-1;
3192 }else if (opcode[i]==0x22||opcode[i]==0x26) {
3193 a=0xFFFFFFFC; // LWL/LWR
3194 }else{
3195 a=0xFFFFFFF8; // LDL/LDR
3196 }
3197 map=get_reg(i_regs->regmap,TLREG);
3198 assert(map>=0);
3199 map=do_tlb_r(addr,temp2,map,0,a,c?-1:temp,c,constmap[i][s]+offset);
3200 if(c) {
3201 if (opcode[i]==0x22||opcode[i]==0x26) {
3202 emit_movimm(((constmap[i][s]+offset)<<3)&24,temp); // LWL/LWR
3203 }else{
3204 emit_movimm(((constmap[i][s]+offset)<<3)&56,temp); // LDL/LDR
3205 }
3206 }
3207 do_tlb_r_branch(map,c,constmap[i][s]+offset,&jaddr);
3208 }
3209 if (opcode[i]==0x22||opcode[i]==0x26) { // LWL/LWR
3210 if(!c||memtarget) {
3211 //emit_readword_indexed((int)rdram-0x80000000,temp2,temp2);
3212 emit_readword_indexed_tlb((int)rdram-0x80000000,temp2,map,temp2);
3213 if(jaddr) add_stub(LOADW_STUB,jaddr,(int)out,i,temp2,(int)i_regs,ccadj[i],reglist);
3214 }
3215 else
3216 inline_readstub(LOADW_STUB,i,(constmap[i][s]+offset)&0xFFFFFFFC,i_regs->regmap,FTEMP,ccadj[i],reglist);
3217 emit_andimm(temp,24,temp);
2002a1db 3218#ifdef BIG_ENDIAN_MIPS
3219 if (opcode[i]==0x26) // LWR
3220#else
3221 if (opcode[i]==0x22) // LWL
3222#endif
3223 emit_xorimm(temp,24,temp);
57871462 3224 emit_movimm(-1,HOST_TEMPREG);
3225 if (opcode[i]==0x26) {
3226 emit_shr(temp2,temp,temp2);
3227 emit_bic_lsr(tl,HOST_TEMPREG,temp,tl);
3228 }else{
3229 emit_shl(temp2,temp,temp2);
3230 emit_bic_lsl(tl,HOST_TEMPREG,temp,tl);
3231 }
3232 emit_or(temp2,tl,tl);
3233 //emit_storereg(rt1[i],tl); // DEBUG
3234 }
3235 if (opcode[i]==0x1A||opcode[i]==0x1B) { // LDL/LDR
2002a1db 3236 // FIXME: little endian
57871462 3237 int temp2h=get_reg(i_regs->regmap,FTEMP|64);
3238 if(!c||memtarget) {
3239 //if(th>=0) emit_readword_indexed((int)rdram-0x80000000,temp2,temp2h);
3240 //emit_readword_indexed((int)rdram-0x7FFFFFFC,temp2,temp2);
3241 emit_readdword_indexed_tlb((int)rdram-0x80000000,temp2,map,temp2h,temp2);
3242 if(jaddr) add_stub(LOADD_STUB,jaddr,(int)out,i,temp2,(int)i_regs,ccadj[i],reglist);
3243 }
3244 else
3245 inline_readstub(LOADD_STUB,i,(constmap[i][s]+offset)&0xFFFFFFF8,i_regs->regmap,FTEMP,ccadj[i],reglist);
3246 emit_testimm(temp,32);
3247 emit_andimm(temp,24,temp);
3248 if (opcode[i]==0x1A) { // LDL
3249 emit_rsbimm(temp,32,HOST_TEMPREG);
3250 emit_shl(temp2h,temp,temp2h);
3251 emit_orrshr(temp2,HOST_TEMPREG,temp2h);
3252 emit_movimm(-1,HOST_TEMPREG);
3253 emit_shl(temp2,temp,temp2);
3254 emit_cmove_reg(temp2h,th);
3255 emit_biceq_lsl(tl,HOST_TEMPREG,temp,tl);
3256 emit_bicne_lsl(th,HOST_TEMPREG,temp,th);
3257 emit_orreq(temp2,tl,tl);
3258 emit_orrne(temp2,th,th);
3259 }
3260 if (opcode[i]==0x1B) { // LDR
3261 emit_xorimm(temp,24,temp);
3262 emit_rsbimm(temp,32,HOST_TEMPREG);
3263 emit_shr(temp2,temp,temp2);
3264 emit_orrshl(temp2h,HOST_TEMPREG,temp2);
3265 emit_movimm(-1,HOST_TEMPREG);
3266 emit_shr(temp2h,temp,temp2h);
3267 emit_cmovne_reg(temp2,tl);
3268 emit_bicne_lsr(th,HOST_TEMPREG,temp,th);
3269 emit_biceq_lsr(tl,HOST_TEMPREG,temp,tl);
3270 emit_orrne(temp2h,th,th);
3271 emit_orreq(temp2h,tl,tl);
3272 }
3273 }
3274 }
3275}
3276#define loadlr_assemble loadlr_assemble_arm
3277
3278void cop0_assemble(int i,struct regstat *i_regs)
3279{
3280 if(opcode2[i]==0) // MFC0
3281 {
3282 signed char t=get_reg(i_regs->regmap,rt1[i]);
3283 char copr=(source[i]>>11)&0x1f;
3284 //assert(t>=0); // Why does this happen? OOT is weird
3285 if(t>=0) {
7139f3c8 3286#ifdef MUPEN64
57871462 3287 emit_addimm(FP,(int)&fake_pc-(int)&dynarec_local,0);
3288 emit_movimm((source[i]>>11)&0x1f,1);
3289 emit_writeword(0,(int)&PC);
3290 emit_writebyte(1,(int)&(fake_pc.f.r.nrd));
3291 if(copr==9) {
3292 emit_readword((int)&last_count,ECX);
3293 emit_loadreg(CCREG,HOST_CCREG); // TODO: do proper reg alloc
3294 emit_add(HOST_CCREG,ECX,HOST_CCREG);
3295 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG);
3296 emit_writeword(HOST_CCREG,(int)&Count);
3297 }
3298 emit_call((int)MFC0);
3299 emit_readword((int)&readmem_dword,t);
7139f3c8 3300#else
3301 emit_readword((int)&reg_cop0+copr*4,t);
3302#endif
57871462 3303 }
3304 }
3305 else if(opcode2[i]==4) // MTC0
3306 {
3307 signed char s=get_reg(i_regs->regmap,rs1[i]);
3308 char copr=(source[i]>>11)&0x1f;
3309 assert(s>=0);
3310 emit_writeword(s,(int)&readmem_dword);
3311 wb_register(rs1[i],i_regs->regmap,i_regs->dirty,i_regs->is32);
3d624f89 3312#ifdef MUPEN64 /// FIXME
57871462 3313 emit_addimm(FP,(int)&fake_pc-(int)&dynarec_local,0);
3314 emit_movimm((source[i]>>11)&0x1f,1);
3315 emit_writeword(0,(int)&PC);
3316 emit_writebyte(1,(int)&(fake_pc.f.r.nrd));
3d624f89 3317#endif
7139f3c8 3318#ifdef PCSX
3319 emit_movimm(source[i],0);
3320 emit_writeword(0,(int)&psxRegs.code);
3321#endif
3322 if(copr==9||copr==11||copr==12||copr==13) {
57871462 3323 emit_readword((int)&last_count,ECX);
3324 emit_loadreg(CCREG,HOST_CCREG); // TODO: do proper reg alloc
3325 emit_add(HOST_CCREG,ECX,HOST_CCREG);
3326 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG);
3327 emit_writeword(HOST_CCREG,(int)&Count);
3328 }
3329 // What a mess. The status register (12) can enable interrupts,
3330 // so needs a special case to handle a pending interrupt.
3331 // The interrupt must be taken immediately, because a subsequent
3332 // instruction might disable interrupts again.
7139f3c8 3333 if(copr==12||copr==13) {
57871462 3334 emit_movimm(start+i*4+4,0);
3335 emit_movimm(0,1);
3336 emit_writeword(0,(int)&pcaddr);
3337 emit_writeword(1,(int)&pending_exception);
3338 }
3339 //else if(copr==12&&is_delayslot) emit_call((int)MTC0_R12);
3340 //else
3341 emit_call((int)MTC0);
7139f3c8 3342 if(copr==9||copr==11||copr==12||copr==13) {
57871462 3343 emit_readword((int)&Count,HOST_CCREG);
3344 emit_readword((int)&next_interupt,ECX);
3345 emit_addimm(HOST_CCREG,-CLOCK_DIVIDER*ccadj[i],HOST_CCREG);
3346 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
3347 emit_writeword(ECX,(int)&last_count);
3348 emit_storereg(CCREG,HOST_CCREG);
3349 }
7139f3c8 3350 if(copr==12||copr==13) {
57871462 3351 assert(!is_delayslot);
3352 emit_readword((int)&pending_exception,14);
3353 }
3354 emit_loadreg(rs1[i],s);
3355 if(get_reg(i_regs->regmap,rs1[i]|64)>=0)
3356 emit_loadreg(rs1[i]|64,get_reg(i_regs->regmap,rs1[i]|64));
7139f3c8 3357 if(copr==12||copr==13) {
57871462 3358 emit_test(14,14);
3359 emit_jne((int)&do_interrupt);
3360 }
3361 cop1_usable=0;
3362 }
3363 else
3364 {
3365 assert(opcode2[i]==0x10);
3d624f89 3366#ifndef DISABLE_TLB
57871462 3367 if((source[i]&0x3f)==0x01) // TLBR
3368 emit_call((int)TLBR);
3369 if((source[i]&0x3f)==0x02) // TLBWI
3370 emit_call((int)TLBWI_new);
3371 if((source[i]&0x3f)==0x06) { // TLBWR
3372 // The TLB entry written by TLBWR is dependent on the count,
3373 // so update the cycle count
3374 emit_readword((int)&last_count,ECX);
3375 if(i_regs->regmap[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
3376 emit_add(HOST_CCREG,ECX,HOST_CCREG);
3377 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG);
3378 emit_writeword(HOST_CCREG,(int)&Count);
3379 emit_call((int)TLBWR_new);
3380 }
3381 if((source[i]&0x3f)==0x08) // TLBP
3382 emit_call((int)TLBP);
3d624f89 3383#endif
57871462 3384 if((source[i]&0x3f)==0x18) // ERET
3385 {
3386 int count=ccadj[i];
3387 if(i_regs->regmap[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
3388 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*count,HOST_CCREG); // TODO: Should there be an extra cycle here?
3389 emit_jmp((int)jump_eret);
3390 }
3391 }
3392}
3393
b9b61529 3394static void cop2_get_dreg(u_int copr,signed char tl,signed char temp)
3395{
3396 switch (copr) {
3397 case 1:
3398 case 3:
3399 case 5:
3400 case 8:
3401 case 9:
3402 case 10:
3403 case 11:
3404 emit_readword((int)&reg_cop2d[copr],tl);
3405 emit_signextend16(tl,tl);
3406 emit_writeword(tl,(int)&reg_cop2d[copr]); // hmh
3407 break;
3408 case 7:
3409 case 16:
3410 case 17:
3411 case 18:
3412 case 19:
3413 emit_readword((int)&reg_cop2d[copr],tl);
3414 emit_andimm(tl,0xffff,tl);
3415 emit_writeword(tl,(int)&reg_cop2d[copr]);
3416 break;
3417 case 15:
3418 emit_readword((int)&reg_cop2d[14],tl); // SXY2
3419 emit_writeword(tl,(int)&reg_cop2d[copr]);
3420 break;
3421 case 28:
3422 case 30:
3423 emit_movimm(0,tl);
3424 break;
3425 case 29:
3426 emit_readword((int)&reg_cop2d[9],temp);
3427 emit_testimm(temp,0x8000); // do we need this?
3428 emit_andimm(temp,0xf80,temp);
3429 emit_andne_imm(temp,0,temp);
3430 emit_shr(temp,7,tl);
3431 emit_readword((int)&reg_cop2d[10],temp);
3432 emit_testimm(temp,0x8000);
3433 emit_andimm(temp,0xf80,temp);
3434 emit_andne_imm(temp,0,temp);
3435 emit_orrshr(temp,2,tl);
3436 emit_readword((int)&reg_cop2d[11],temp);
3437 emit_testimm(temp,0x8000);
3438 emit_andimm(temp,0xf80,temp);
3439 emit_andne_imm(temp,0,temp);
3440 emit_orrshl(temp,3,tl);
3441 emit_writeword(tl,(int)&reg_cop2d[copr]);
3442 break;
3443 default:
3444 emit_readword((int)&reg_cop2d[copr],tl);
3445 break;
3446 }
3447}
3448
3449static void cop2_put_dreg(u_int copr,signed char sl,signed char temp)
3450{
3451 switch (copr) {
3452 case 15:
3453 emit_readword((int)&reg_cop2d[13],temp); // SXY1
3454 emit_writeword(sl,(int)&reg_cop2d[copr]);
3455 emit_writeword(temp,(int)&reg_cop2d[12]); // SXY0
3456 emit_readword((int)&reg_cop2d[14],temp); // SXY2
3457 emit_writeword(sl,(int)&reg_cop2d[14]);
3458 emit_writeword(temp,(int)&reg_cop2d[13]); // SXY1
3459 break;
3460 case 28:
3461 emit_andimm(sl,0x001f,temp);
3462 emit_shl(temp,7,temp);
3463 emit_writeword(temp,(int)&reg_cop2d[9]);
3464 emit_andimm(sl,0x03e0,temp);
3465 emit_shl(temp,2,temp);
3466 emit_writeword(temp,(int)&reg_cop2d[10]);
3467 emit_andimm(sl,0x7c00,temp);
3468 emit_shr(temp,3,temp);
3469 emit_writeword(temp,(int)&reg_cop2d[11]);
3470 emit_writeword(sl,(int)&reg_cop2d[28]);
3471 break;
3472 case 30:
3473 emit_movs(sl,temp);
3474 emit_mvnmi(temp,temp);
3475 emit_clz(temp,temp);
3476 emit_writeword(sl,(int)&reg_cop2d[30]);
3477 emit_writeword(temp,(int)&reg_cop2d[31]);
3478 break;
3479 case 7:
3480 case 29:
3481 case 31:
3482 break;
3483 default:
3484 emit_writeword(sl,(int)&reg_cop2d[copr]);
3485 break;
3486 }
3487}
3488
3489void cop2_assemble(int i,struct regstat *i_regs)
3490{
3491 u_int copr=(source[i]>>11)&0x1f;
3492 signed char temp=get_reg(i_regs->regmap,-1);
3493 if (opcode2[i]==0) { // MFC2
3494 signed char tl=get_reg(i_regs->regmap,rt1[i]);
3495 if(tl>=0)
3496 cop2_get_dreg(copr,tl,temp);
3497 }
3498 else if (opcode2[i]==4) { // MTC2
3499 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3500 cop2_put_dreg(copr,sl,temp);
3501 }
3502 else if (opcode2[i]==2) // CFC2
3503 {
3504 signed char tl=get_reg(i_regs->regmap,rt1[i]);
3505 if(tl>=0)
3506 emit_readword((int)&reg_cop2c[copr],tl);
3507 }
3508 else if (opcode2[i]==6) // CTC2
3509 {
3510 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3511 switch(copr) {
3512 case 4:
3513 case 12:
3514 case 20:
3515 case 26:
3516 case 27:
3517 case 29:
3518 case 30:
3519 emit_signextend16(sl,temp);
3520 break;
3521 case 31:
3522 //value = value & 0x7ffff000;
3523 //if (value & 0x7f87e000) value |= 0x80000000;
3524 emit_shrimm(sl,12,temp);
3525 emit_shlimm(temp,12,temp);
3526 emit_testimm(temp,0x7f000000);
3527 emit_testeqimm(temp,0x00870000);
3528 emit_testeqimm(temp,0x0000e000);
3529 emit_orrne_imm(temp,0x80000000,temp);
3530 break;
3531 default:
3532 temp=sl;
3533 break;
3534 }
3535 emit_writeword(temp,(int)&reg_cop2c[copr]);
3536 assert(sl>=0);
3537 }
3538}
3539