DYNAREC: More work on DIV, no effect
[mupen64plus-pandora.git] / source / mupen64plus-core / src / r4300 / new_dynarec / assem_arm.c
CommitLineData
451ab91e 1/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2 * Mupen64plus - assem_arm.c *
3 * Copyright (C) 2009-2011 Ari64 *
4 * *
5 * This program is free software; you can redistribute it and/or modify *
6 * it under the terms of the GNU General Public License as published by *
7 * the Free Software Foundation; either version 2 of the License, or *
8 * (at your option) any later version. *
9 * *
10 * This program is distributed in the hope that it will be useful, *
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
13 * GNU General Public License for more details. *
14 * *
15 * You should have received a copy of the GNU General Public License *
16 * along with this program; if not, write to the *
17 * Free Software Foundation, Inc., *
18 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
19 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
20
21extern int cycle_count;
22extern int last_count;
23extern int pcaddr;
24extern int pending_exception;
25extern int branch_target;
26extern int ram_offset;
27extern uint64_t readmem_dword;
28extern precomp_instr fake_pc;
29extern void *dynarec_local;
30extern u_int memory_map[1048576];
31extern u_int mini_ht[32][2];
32extern u_int rounding_modes[4];
33
34static u_int literals[1024][2];
35
36void indirect_jump_indexed();
37void indirect_jump();
38void do_interrupt();
39void jump_vaddr();
40void jump_vaddr_r0();
41void jump_vaddr_r1();
42void jump_vaddr_r2();
43void jump_vaddr_r3();
44void jump_vaddr_r4();
45void jump_vaddr_r5();
46void jump_vaddr_r6();
47void jump_vaddr_r7();
48void jump_vaddr_r8();
49void jump_vaddr_r9();
50void jump_vaddr_r10();
51void jump_vaddr_r12();
52
53const u_int jump_vaddr_reg[16] = {
54 (int)jump_vaddr_r0,
55 (int)jump_vaddr_r1,
56 (int)jump_vaddr_r2,
57 (int)jump_vaddr_r3,
58 (int)jump_vaddr_r4,
59 (int)jump_vaddr_r5,
60 (int)jump_vaddr_r6,
61 (int)jump_vaddr_r7,
62 (int)jump_vaddr_r8,
63 (int)jump_vaddr_r9,
64 (int)jump_vaddr_r10,
65 0,
66 (int)jump_vaddr_r12,
67 0,
68 0,
69 0};
70
71void invalidate_addr_r0();
72void invalidate_addr_r1();
73void invalidate_addr_r2();
74void invalidate_addr_r3();
75void invalidate_addr_r4();
76void invalidate_addr_r5();
77void invalidate_addr_r6();
78void invalidate_addr_r7();
79void invalidate_addr_r8();
80void invalidate_addr_r9();
81void invalidate_addr_r10();
82void invalidate_addr_r12();
83
84const u_int invalidate_addr_reg[16] = {
85 (int)invalidate_addr_r0,
86 (int)invalidate_addr_r1,
87 (int)invalidate_addr_r2,
88 (int)invalidate_addr_r3,
89 (int)invalidate_addr_r4,
90 (int)invalidate_addr_r5,
91 (int)invalidate_addr_r6,
92 (int)invalidate_addr_r7,
93 (int)invalidate_addr_r8,
94 (int)invalidate_addr_r9,
95 (int)invalidate_addr_r10,
96 0,
97 (int)invalidate_addr_r12,
98 0,
99 0,
100 0};
101
102#include "../fpu.h"
103
104static u_int jump_table_symbols[] = {
105 (int)invalidate_addr,
106 (int)jump_vaddr,
107 (int)dyna_linker,
108 (int)dyna_linker_ds,
109 (int)verify_code,
110 (int)verify_code_vm,
111 (int)verify_code_ds,
112 (int)cc_interrupt,
113 (int)fp_exception,
114 (int)fp_exception_ds,
115 (int)jump_syscall,
116 (int)jump_eret,
117 (int)indirect_jump_indexed,
118 (int)indirect_jump,
119 (int)do_interrupt,
120 (int)NULL /*MFC0*/,
121 (int)NULL /*MTC0*/,
122 (int)NULL /*TLBR*/,
123 (int)NULL /*TLBP*/,
124 (int)TLBWI_new,
125 (int)TLBWR_new,
126 (int)jump_vaddr_r0,
127 (int)jump_vaddr_r1,
128 (int)jump_vaddr_r2,
129 (int)jump_vaddr_r3,
130 (int)jump_vaddr_r4,
131 (int)jump_vaddr_r5,
132 (int)jump_vaddr_r6,
133 (int)jump_vaddr_r7,
134 (int)jump_vaddr_r8,
135 (int)jump_vaddr_r9,
136 (int)jump_vaddr_r10,
137 (int)jump_vaddr_r12,
138 (int)invalidate_addr_r0,
139 (int)invalidate_addr_r1,
140 (int)invalidate_addr_r2,
141 (int)invalidate_addr_r3,
142 (int)invalidate_addr_r4,
143 (int)invalidate_addr_r5,
144 (int)invalidate_addr_r6,
145 (int)invalidate_addr_r7,
146 (int)invalidate_addr_r8,
147 (int)invalidate_addr_r9,
148 (int)invalidate_addr_r10,
149 (int)invalidate_addr_r12,
150 (int)mult64,
151 (int)multu64,
152 (int)div64,
153 (int)divu64,
154 (int)cvt_s_w,
155 (int)cvt_d_w,
156 (int)cvt_s_l,
157 (int)cvt_d_l,
158 (int)cvt_w_s,
159 (int)cvt_w_d,
160 (int)cvt_l_s,
161 (int)cvt_l_d,
162 (int)cvt_d_s,
163 (int)cvt_s_d,
164 (int)round_l_s,
165 (int)round_w_s,
166 (int)trunc_l_s,
167 (int)trunc_w_s,
168 (int)ceil_l_s,
169 (int)ceil_w_s,
170 (int)floor_l_s,
171 (int)floor_w_s,
172 (int)round_l_d,
173 (int)round_w_d,
174 (int)trunc_l_d,
175 (int)trunc_w_d,
176 (int)ceil_l_d,
177 (int)ceil_w_d,
178 (int)floor_l_d,
179 (int)floor_w_d,
180 (int)c_f_s,
181 (int)c_un_s,
182 (int)c_eq_s,
183 (int)c_ueq_s,
184 (int)c_olt_s,
185 (int)c_ult_s,
186 (int)c_ole_s,
187 (int)c_ule_s,
188 (int)c_sf_s,
189 (int)c_ngle_s,
190 (int)c_seq_s,
191 (int)c_ngl_s,
192 (int)c_lt_s,
193 (int)c_nge_s,
194 (int)c_le_s,
195 (int)c_ngt_s,
196 (int)c_f_d,
197 (int)c_un_d,
198 (int)c_eq_d,
199 (int)c_ueq_d,
200 (int)c_olt_d,
201 (int)c_ult_d,
202 (int)c_ole_d,
203 (int)c_ule_d,
204 (int)c_sf_d,
205 (int)c_ngle_d,
206 (int)c_seq_d,
207 (int)c_ngl_d,
208 (int)c_lt_d,
209 (int)c_nge_d,
210 (int)c_le_d,
211 (int)c_ngt_d,
212 (int)add_s,
213 (int)sub_s,
214 (int)mul_s,
215 (int)div_s,
216 (int)sqrt_s,
217 (int)abs_s,
218 (int)mov_s,
219 (int)neg_s,
220 (int)add_d,
221 (int)sub_d,
222 (int)mul_d,
223 (int)div_d,
224 (int)sqrt_d,
225 (int)abs_d,
226 (int)mov_d,
227 (int)neg_d
228};
229
230static unsigned int needs_clear_cache[1<<(TARGET_SIZE_2-17)];
231
232#define JUMP_TABLE_SIZE (sizeof(jump_table_symbols)*2)
233
234/* Linker */
235
236static void set_jump_target(int addr,u_int target)
237{
238 u_char *ptr=(u_char *)addr;
239 u_int *ptr2=(u_int *)ptr;
240 if(ptr[3]==0xe2) {
241 assert((target-(u_int)ptr2-8)<1024);
242 assert((addr&3)==0);
243 assert((target&3)==0);
244 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
245 //DebugMessage(M64MSG_VERBOSE, "target=%x addr=%x insn=%x",target,addr,*ptr2);
246 }
247 else if(ptr[3]==0x72) {
248 // generated by emit_jno_unlikely
249 if((target-(u_int)ptr2-8)<1024) {
250 assert((addr&3)==0);
251 assert((target&3)==0);
252 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
253 }
254 else if((target-(u_int)ptr2-8)<4096&&!((target-(u_int)ptr2-8)&15)) {
255 assert((addr&3)==0);
256 assert((target&3)==0);
257 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>4)|0xE00;
258 }
259 else *ptr2=(0x7A000000)|(((target-(u_int)ptr2-8)<<6)>>8);
260 }
261 else {
262 assert((ptr[3]&0x0e)==0xa);
263 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
264 }
265}
266
267// This optionally copies the instruction from the target of the branch into
268// the space before the branch. Works, but the difference in speed is
269// usually insignificant.
270/*
271static void set_jump_target_fillslot(int addr,u_int target,int copy)
272{
273 u_char *ptr=(u_char *)addr;
274 u_int *ptr2=(u_int *)ptr;
275 assert(!copy||ptr2[-1]==0xe28dd000);
276 if(ptr[3]==0xe2) {
277 assert(!copy);
278 assert((target-(u_int)ptr2-8)<4096);
279 *ptr2=(*ptr2&0xFFFFF000)|(target-(u_int)ptr2-8);
280 }
281 else {
282 assert((ptr[3]&0x0e)==0xa);
283 u_int target_insn=*(u_int *)target;
284 if((target_insn&0x0e100000)==0) { // ALU, no immediate, no flags
285 copy=0;
286 }
287 if((target_insn&0x0c100000)==0x04100000) { // Load
288 copy=0;
289 }
290 if(target_insn&0x08000000) {
291 copy=0;
292 }
293 if(copy) {
294 ptr2[-1]=target_insn;
295 target+=4;
296 }
297 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
298 }
299}
300*/
301
302/* Literal pool */
303static void add_literal(int addr,int val)
304{
305 literals[literalcount][0]=addr;
306 literals[literalcount][1]=val;
307 literalcount++;
308}
309
310static void *kill_pointer(void *stub)
311{
312 int *ptr=(int *)(stub+4);
313// assert((*ptr&0x0ff00000)==0x05900000); //*SEB* disabled, just to see...
314 u_int offset=*ptr&0xfff;
315 int **l_ptr=(void *)ptr+offset+8;
316 int *i_ptr=*l_ptr;
317 set_jump_target((int)i_ptr,(int)stub);
318 return i_ptr;
319}
320
321static int get_pointer(void *stub)
322{
323 //DebugMessage(M64MSG_VERBOSE, "get_pointer(%x)",(int)stub);
324 int *ptr=(int *)(stub+4);
325// assert((*ptr&0x0ff00000)==0x05900000); //*SEB* disabled, just to see...
326 u_int offset=*ptr&0xfff;
327 int **l_ptr=(void *)ptr+offset+8;
328 int *i_ptr=*l_ptr;
329// assert((*i_ptr&0x0f000000)==0x0a000000); //*SEB* disabled, just to see...
330 return (int)i_ptr+((*i_ptr<<8)>>6)+8;
331}
332
333// Find the "clean" entry point from a "dirty" entry point
334// by skipping past the call to verify_code
335static u_int get_clean_addr(int addr)
336{
337 int *ptr=(int *)addr;
338 #ifdef ARMv5_ONLY
339 ptr+=4;
340 #else
341 ptr+=6;
342 #endif
343 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
344 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
345 ptr++;
346 if((*ptr&0xFF000000)==0xea000000) {
347 return (int)ptr+((*ptr<<8)>>6)+8; // follow jump
348 }
349 return (u_int)ptr;
350}
351
352static int verify_dirty(void *addr)
353{
354 u_int *ptr=(u_int *)addr;
355 #ifdef ARMv5_ONLY
356 // get from literal pool
357 assert((*ptr&0xFFF00000)==0xe5900000);
358 u_int offset=*ptr&0xfff;
359 u_int *l_ptr=(void *)ptr+offset+8;
360 u_int source=l_ptr[0];
361 u_int copy=l_ptr[1];
362 u_int len=l_ptr[2];
363 ptr+=4;
364 #else
365 // ARMv7 movw/movt
366 assert((*ptr&0xFFF00000)==0xe3000000);
367 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
368 u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
369 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
370 ptr+=6;
371 #endif
372 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
373 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
374 u_int verifier=(int)ptr+((signed int)(*ptr<<8)>>6)+8; // get target of bl
375 if(verifier==(u_int)verify_code_vm||verifier==(u_int)verify_code_ds) {
376 unsigned int page=source>>12;
377 unsigned int map_value=memory_map[page];
378 if(map_value>=0x80000000) return 0;
379 while(page<((source+len-1)>>12)) {
380 if((memory_map[++page]<<2)!=(map_value<<2)) return 0;
381 }
382 source = source+(map_value<<2);
383 }
384 //DebugMessage(M64MSG_VERBOSE, "verify_dirty: %x %x %x",source,copy,len);
385 return !memcmp((void *)source,(void *)copy,len);
386}
387
388// This doesn't necessarily find all clean entry points, just
389// guarantees that it's not dirty
390static int isclean(int addr)
391{
392 #ifdef ARMv5_ONLY
393 int *ptr=((u_int *)addr)+4;
394 #else
395 int *ptr=((u_int *)addr)+6;
396 #endif
397 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
398 if((*ptr&0xFF000000)!=0xeb000000) return 1; // bl instruction
399 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code) return 0;
400 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_vm) return 0;
401 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_ds) return 0;
402 return 1;
403}
404
405static void get_bounds(int addr,u_int *start,u_int *end)
406{
407 u_int *ptr=(u_int *)addr;
408 #ifdef ARMv5_ONLY
409 // get from literal pool
410 assert((*ptr&0xFFF00000)==0xe5900000);
411 u_int offset=*ptr&0xfff;
412 u_int *l_ptr=(void *)ptr+offset+8;
413 u_int source=l_ptr[0];
414 //u_int copy=l_ptr[1];
415 u_int len=l_ptr[2];
416 ptr+=4;
417 #else
418 // ARMv7 movw/movt
419 assert((*ptr&0xFFF00000)==0xe3000000);
420 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
421 //u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
422 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
423 ptr+=6;
424 #endif
425 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
426 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
427 u_int verifier=(int)ptr+((signed int)(*ptr<<8)>>6)+8; // get target of bl
428 if(verifier==(u_int)verify_code_vm||verifier==(u_int)verify_code_ds) {
429 if(memory_map[source>>12]>=0x80000000) source = 0;
430 else source = source+(memory_map[source>>12]<<2);
431 }
432 *start=source;
433 *end=source+len;
434}
435
436/* Register allocation */
437
438// Note: registers are allocated clean (unmodified state)
439// if you intend to modify the register, you must call dirty_reg().
440static void alloc_reg(struct regstat *cur,int i,signed char reg)
441{
442 int r,hr;
443 int preferred_reg = (reg&7);
444 if(reg==CCREG) preferred_reg=HOST_CCREG;
445 if(reg==PTEMP||reg==FTEMP) preferred_reg=12;
446
447 // Don't allocate unused registers
448 if((cur->u>>reg)&1) return;
449
450 // see if it's already allocated
451 for(hr=0;hr<HOST_REGS;hr++)
452 {
453 if(cur->regmap[hr]==reg) return;
454 }
455
456 // Keep the same mapping if the register was already allocated in a loop
457 preferred_reg = loop_reg(i,reg,preferred_reg);
458
459 // Try to allocate the preferred register
460 if(cur->regmap[preferred_reg]==-1) {
461 cur->regmap[preferred_reg]=reg;
462 cur->dirty&=~(1<<preferred_reg);
463 cur->isconst&=~(1<<preferred_reg);
464 return;
465 }
466 r=cur->regmap[preferred_reg];
467 if(r<64&&((cur->u>>r)&1)) {
468 cur->regmap[preferred_reg]=reg;
469 cur->dirty&=~(1<<preferred_reg);
470 cur->isconst&=~(1<<preferred_reg);
471 return;
472 }
473 if(r>=64&&((cur->uu>>(r&63))&1)) {
474 cur->regmap[preferred_reg]=reg;
475 cur->dirty&=~(1<<preferred_reg);
476 cur->isconst&=~(1<<preferred_reg);
477 return;
478 }
479
480 // Clear any unneeded registers
481 // We try to keep the mapping consistent, if possible, because it
482 // makes branches easier (especially loops). So we try to allocate
483 // first (see above) before removing old mappings. If this is not
484 // possible then go ahead and clear out the registers that are no
485 // longer needed.
486 for(hr=0;hr<HOST_REGS;hr++)
487 {
488 r=cur->regmap[hr];
489 if(r>=0) {
490 if(r<64) {
491 if((cur->u>>r)&1) {cur->regmap[hr]=-1;break;}
492 }
493 else
494 {
495 if((cur->uu>>(r&63))&1) {cur->regmap[hr]=-1;break;}
496 }
497 }
498 }
499 // Try to allocate any available register, but prefer
500 // registers that have not been used recently.
501 if(i>0) {
502 for(hr=0;hr<HOST_REGS;hr++) {
503 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
504 if(regs[i-1].regmap[hr]!=rs1[i-1]&&regs[i-1].regmap[hr]!=rs2[i-1]&&regs[i-1].regmap[hr]!=rt1[i-1]&&regs[i-1].regmap[hr]!=rt2[i-1]) {
505 cur->regmap[hr]=reg;
506 cur->dirty&=~(1<<hr);
507 cur->isconst&=~(1<<hr);
508 return;
509 }
510 }
511 }
512 }
513 // Try to allocate any available register
514 for(hr=0;hr<HOST_REGS;hr++) {
515 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
516 cur->regmap[hr]=reg;
517 cur->dirty&=~(1<<hr);
518 cur->isconst&=~(1<<hr);
519 return;
520 }
521 }
522
523 // Ok, now we have to evict someone
524 // Pick a register we hopefully won't need soon
525 u_char hsn[MAXREG+1];
526 memset(hsn,10,sizeof(hsn));
527 int j;
528 lsn(hsn,i,&preferred_reg);
529 //DebugMessage(M64MSG_VERBOSE, "eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d",cur->regmap[0],cur->regmap[1],cur->regmap[2],cur->regmap[3],cur->regmap[5],cur->regmap[6],cur->regmap[7]);
530 //DebugMessage(M64MSG_VERBOSE, "hsn(%x): %d %d %d %d %d %d %d",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
531 if(i>0) {
532 // Don't evict the cycle count at entry points, otherwise the entry
533 // stub will have to write it.
534 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
535 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
536 for(j=10;j>=3;j--)
537 {
538 // Alloc preferred register if available
539 if(hsn[r=cur->regmap[preferred_reg]&63]==j) {
540 for(hr=0;hr<HOST_REGS;hr++) {
541 // Evict both parts of a 64-bit register
542 if((cur->regmap[hr]&63)==r) {
543 cur->regmap[hr]=-1;
544 cur->dirty&=~(1<<hr);
545 cur->isconst&=~(1<<hr);
546 }
547 }
548 cur->regmap[preferred_reg]=reg;
549 return;
550 }
551 for(r=1;r<=MAXREG;r++)
552 {
553 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
554 for(hr=0;hr<HOST_REGS;hr++) {
555 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
556 if(cur->regmap[hr]==r+64) {
557 cur->regmap[hr]=reg;
558 cur->dirty&=~(1<<hr);
559 cur->isconst&=~(1<<hr);
560 return;
561 }
562 }
563 }
564 for(hr=0;hr<HOST_REGS;hr++) {
565 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
566 if(cur->regmap[hr]==r) {
567 cur->regmap[hr]=reg;
568 cur->dirty&=~(1<<hr);
569 cur->isconst&=~(1<<hr);
570 return;
571 }
572 }
573 }
574 }
575 }
576 }
577 }
578 for(j=10;j>=0;j--)
579 {
580 for(r=1;r<=MAXREG;r++)
581 {
582 if(hsn[r]==j) {
583 for(hr=0;hr<HOST_REGS;hr++) {
584 if(cur->regmap[hr]==r+64) {
585 cur->regmap[hr]=reg;
586 cur->dirty&=~(1<<hr);
587 cur->isconst&=~(1<<hr);
588 return;
589 }
590 }
591 for(hr=0;hr<HOST_REGS;hr++) {
592 if(cur->regmap[hr]==r) {
593 cur->regmap[hr]=reg;
594 cur->dirty&=~(1<<hr);
595 cur->isconst&=~(1<<hr);
596 return;
597 }
598 }
599 }
600 }
601 }
602 DebugMessage(M64MSG_ERROR, "This shouldn't happen (alloc_reg)");exit(1);
603}
604
605static void alloc_reg64(struct regstat *cur,int i,signed char reg)
606{
607 int preferred_reg = 8+(reg&1);
608 int r,hr;
609
610 // allocate the lower 32 bits
611 alloc_reg(cur,i,reg);
612
613 // Don't allocate unused registers
614 if((cur->uu>>reg)&1) return;
615
616 // see if the upper half is already allocated
617 for(hr=0;hr<HOST_REGS;hr++)
618 {
619 if(cur->regmap[hr]==reg+64) return;
620 }
621
622 // Keep the same mapping if the register was already allocated in a loop
623 preferred_reg = loop_reg(i,reg,preferred_reg);
624
625 // Try to allocate the preferred register
626 if(cur->regmap[preferred_reg]==-1) {
627 cur->regmap[preferred_reg]=reg|64;
628 cur->dirty&=~(1<<preferred_reg);
629 cur->isconst&=~(1<<preferred_reg);
630 return;
631 }
632 r=cur->regmap[preferred_reg];
633 if(r<64&&((cur->u>>r)&1)) {
634 cur->regmap[preferred_reg]=reg|64;
635 cur->dirty&=~(1<<preferred_reg);
636 cur->isconst&=~(1<<preferred_reg);
637 return;
638 }
639 if(r>=64&&((cur->uu>>(r&63))&1)) {
640 cur->regmap[preferred_reg]=reg|64;
641 cur->dirty&=~(1<<preferred_reg);
642 cur->isconst&=~(1<<preferred_reg);
643 return;
644 }
645
646 // Clear any unneeded registers
647 // We try to keep the mapping consistent, if possible, because it
648 // makes branches easier (especially loops). So we try to allocate
649 // first (see above) before removing old mappings. If this is not
650 // possible then go ahead and clear out the registers that are no
651 // longer needed.
652 for(hr=HOST_REGS-1;hr>=0;hr--)
653 {
654 r=cur->regmap[hr];
655 if(r>=0) {
656 if(r<64) {
657 if((cur->u>>r)&1) {cur->regmap[hr]=-1;break;}
658 }
659 else
660 {
661 if((cur->uu>>(r&63))&1) {cur->regmap[hr]=-1;break;}
662 }
663 }
664 }
665 // Try to allocate any available register, but prefer
666 // registers that have not been used recently.
667 if(i>0) {
668 for(hr=0;hr<HOST_REGS;hr++) {
669 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
670 if(regs[i-1].regmap[hr]!=rs1[i-1]&&regs[i-1].regmap[hr]!=rs2[i-1]&&regs[i-1].regmap[hr]!=rt1[i-1]&&regs[i-1].regmap[hr]!=rt2[i-1]) {
671 cur->regmap[hr]=reg|64;
672 cur->dirty&=~(1<<hr);
673 cur->isconst&=~(1<<hr);
674 return;
675 }
676 }
677 }
678 }
679 // Try to allocate any available register
680 for(hr=0;hr<HOST_REGS;hr++) {
681 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
682 cur->regmap[hr]=reg|64;
683 cur->dirty&=~(1<<hr);
684 cur->isconst&=~(1<<hr);
685 return;
686 }
687 }
688
689 // Ok, now we have to evict someone
690 // Pick a register we hopefully won't need soon
691 u_char hsn[MAXREG+1];
692 memset(hsn,10,sizeof(hsn));
693 int j;
694 lsn(hsn,i,&preferred_reg);
695 //DebugMessage(M64MSG_VERBOSE, "eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d",cur->regmap[0],cur->regmap[1],cur->regmap[2],cur->regmap[3],cur->regmap[5],cur->regmap[6],cur->regmap[7]);
696 //DebugMessage(M64MSG_VERBOSE, "hsn(%x): %d %d %d %d %d %d %d",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
697 if(i>0) {
698 // Don't evict the cycle count at entry points, otherwise the entry
699 // stub will have to write it.
700 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
701 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
702 for(j=10;j>=3;j--)
703 {
704 // Alloc preferred register if available
705 if(hsn[r=cur->regmap[preferred_reg]&63]==j) {
706 for(hr=0;hr<HOST_REGS;hr++) {
707 // Evict both parts of a 64-bit register
708 if((cur->regmap[hr]&63)==r) {
709 cur->regmap[hr]=-1;
710 cur->dirty&=~(1<<hr);
711 cur->isconst&=~(1<<hr);
712 }
713 }
714 cur->regmap[preferred_reg]=reg|64;
715 return;
716 }
717 for(r=1;r<=MAXREG;r++)
718 {
719 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
720 for(hr=0;hr<HOST_REGS;hr++) {
721 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
722 if(cur->regmap[hr]==r+64) {
723 cur->regmap[hr]=reg|64;
724 cur->dirty&=~(1<<hr);
725 cur->isconst&=~(1<<hr);
726 return;
727 }
728 }
729 }
730 for(hr=0;hr<HOST_REGS;hr++) {
731 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
732 if(cur->regmap[hr]==r) {
733 cur->regmap[hr]=reg|64;
734 cur->dirty&=~(1<<hr);
735 cur->isconst&=~(1<<hr);
736 return;
737 }
738 }
739 }
740 }
741 }
742 }
743 }
744 for(j=10;j>=0;j--)
745 {
746 for(r=1;r<=MAXREG;r++)
747 {
748 if(hsn[r]==j) {
749 for(hr=0;hr<HOST_REGS;hr++) {
750 if(cur->regmap[hr]==r+64) {
751 cur->regmap[hr]=reg|64;
752 cur->dirty&=~(1<<hr);
753 cur->isconst&=~(1<<hr);
754 return;
755 }
756 }
757 for(hr=0;hr<HOST_REGS;hr++) {
758 if(cur->regmap[hr]==r) {
759 cur->regmap[hr]=reg|64;
760 cur->dirty&=~(1<<hr);
761 cur->isconst&=~(1<<hr);
762 return;
763 }
764 }
765 }
766 }
767 }
768 DebugMessage(M64MSG_ERROR, "This shouldn't happen");exit(1);
769}
770
771// Allocate a temporary register. This is done without regard to
772// dirty status or whether the register we request is on the unneeded list
773// Note: This will only allocate one register, even if called multiple times
774static void alloc_reg_temp(struct regstat *cur,int i,signed char reg)
775{
776 int r,hr;
777 int preferred_reg = -1;
778
779 // see if it's already allocated
780 for(hr=0;hr<HOST_REGS;hr++)
781 {
782 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==reg) return;
783 }
784
785 // Try to allocate any available register
786 for(hr=HOST_REGS-1;hr>=0;hr--) {
787 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
788 cur->regmap[hr]=reg;
789 cur->dirty&=~(1<<hr);
790 cur->isconst&=~(1<<hr);
791 return;
792 }
793 }
794
795 // Find an unneeded register
796 for(hr=HOST_REGS-1;hr>=0;hr--)
797 {
798 r=cur->regmap[hr];
799 if(r>=0) {
800 if(r<64) {
801 if((cur->u>>r)&1) {
802 if(i==0||((unneeded_reg[i-1]>>r)&1)) {
803 cur->regmap[hr]=reg;
804 cur->dirty&=~(1<<hr);
805 cur->isconst&=~(1<<hr);
806 return;
807 }
808 }
809 }
810 else
811 {
812 if((cur->uu>>(r&63))&1) {
813 if(i==0||((unneeded_reg_upper[i-1]>>(r&63))&1)) {
814 cur->regmap[hr]=reg;
815 cur->dirty&=~(1<<hr);
816 cur->isconst&=~(1<<hr);
817 return;
818 }
819 }
820 }
821 }
822 }
823
824 // Ok, now we have to evict someone
825 // Pick a register we hopefully won't need soon
826 // TODO: we might want to follow unconditional jumps here
827 // TODO: get rid of dupe code and make this into a function
828 u_char hsn[MAXREG+1];
829 memset(hsn,10,sizeof(hsn));
830 int j;
831 lsn(hsn,i,&preferred_reg);
832 //DebugMessage(M64MSG_VERBOSE, "hsn: %d %d %d %d %d %d %d",hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
833 if(i>0) {
834 // Don't evict the cycle count at entry points, otherwise the entry
835 // stub will have to write it.
836 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
837 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
838 for(j=10;j>=3;j--)
839 {
840 for(r=1;r<=MAXREG;r++)
841 {
842 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
843 for(hr=0;hr<HOST_REGS;hr++) {
844 if(hr!=HOST_CCREG||hsn[CCREG]>2) {
845 if(cur->regmap[hr]==r+64) {
846 cur->regmap[hr]=reg;
847 cur->dirty&=~(1<<hr);
848 cur->isconst&=~(1<<hr);
849 return;
850 }
851 }
852 }
853 for(hr=0;hr<HOST_REGS;hr++) {
854 if(hr!=HOST_CCREG||hsn[CCREG]>2) {
855 if(cur->regmap[hr]==r) {
856 cur->regmap[hr]=reg;
857 cur->dirty&=~(1<<hr);
858 cur->isconst&=~(1<<hr);
859 return;
860 }
861 }
862 }
863 }
864 }
865 }
866 }
867 for(j=10;j>=0;j--)
868 {
869 for(r=1;r<=MAXREG;r++)
870 {
871 if(hsn[r]==j) {
872 for(hr=0;hr<HOST_REGS;hr++) {
873 if(cur->regmap[hr]==r+64) {
874 cur->regmap[hr]=reg;
875 cur->dirty&=~(1<<hr);
876 cur->isconst&=~(1<<hr);
877 return;
878 }
879 }
880 for(hr=0;hr<HOST_REGS;hr++) {
881 if(cur->regmap[hr]==r) {
882 cur->regmap[hr]=reg;
883 cur->dirty&=~(1<<hr);
884 cur->isconst&=~(1<<hr);
885 return;
886 }
887 }
888 }
889 }
890 }
891 DebugMessage(M64MSG_ERROR, "This shouldn't happen");exit(1);
892}
893// Allocate a specific ARM register.
894static void alloc_arm_reg(struct regstat *cur,int i,signed char reg,char hr)
895{
896 int n;
897 int dirty=0;
898
899 // see if it's already allocated (and dealloc it)
900 for(n=0;n<HOST_REGS;n++)
901 {
902 if(n!=EXCLUDE_REG&&cur->regmap[n]==reg) {
903 dirty=(cur->dirty>>n)&1;
904 cur->regmap[n]=-1;
905 }
906 }
907
908 cur->regmap[hr]=reg;
909 cur->dirty&=~(1<<hr);
910 cur->dirty|=dirty<<hr;
911 cur->isconst&=~(1<<hr);
912}
913
914// Alloc cycle count into dedicated register
915static void alloc_cc(struct regstat *cur,int i)
916{
917 alloc_arm_reg(cur,i,CCREG,HOST_CCREG);
918}
919
920/* Special alloc */
921
922
923/* Assembler */
924
925static char regname[16][4] = {
926 "r0",
927 "r1",
928 "r2",
929 "r3",
930 "r4",
931 "r5",
932 "r6",
933 "r7",
934 "r8",
935 "r9",
936 "r10",
937 "fp",
938 "r12",
939 "sp",
940 "lr",
941 "pc"};
942
943static void output_byte(u_char byte)
944{
945 *(out++)=byte;
946}
947static void output_modrm(u_char mod,u_char rm,u_char ext)
948{
949 assert(mod<4);
950 assert(rm<8);
951 assert(ext<8);
952 u_char byte=(mod<<6)|(ext<<3)|rm;
953 *(out++)=byte;
954}
955
956static void output_w32(u_int word)
957{
958 *((u_int *)out)=word;
959 out+=4;
960}
961static u_int rd_rn_rm(u_int rd, u_int rn, u_int rm)
962{
963 assert(rd<16);
964 assert(rn<16);
965 assert(rm<16);
966 return((rn<<16)|(rd<<12)|rm);
967}
968static u_int rd_rn_imm_shift(u_int rd, u_int rn, u_int imm, u_int shift)
969{
970 assert(rd<16);
971 assert(rn<16);
972 assert(imm<256);
973 assert((shift&1)==0);
974 return((rn<<16)|(rd<<12)|(((32-shift)&30)<<7)|imm);
975}
976static u_int genimm(u_int imm,u_int *encoded)
977{
978 if(imm==0) {*encoded=0;return 1;}
979 int i=32;
980 while(i>0)
981 {
982 if(imm<256) {
983 *encoded=((i&30)<<7)|imm;
984 return 1;
985 }
986 imm=(imm>>2)|(imm<<30);i-=2;
987 }
988 return 0;
989}
990static u_int genjmp(u_int addr)
991{
992 if(addr<4) return 0;
993 int offset=addr-(int)out-8;
994 if(offset<-33554432||offset>=33554432) {
995 int n;
996 for (n=0;n<sizeof(jump_table_symbols)/4;n++)
997 {
998 if(addr==jump_table_symbols[n])
999 {
1000 offset=BASE_ADDR+(1<<TARGET_SIZE_2)-JUMP_TABLE_SIZE+n*8-(int)out-8;
1001 break;
1002 }
1003 }
1004 }
1005 assert(offset>=-33554432&&offset<33554432);
1006 return ((u_int)offset>>2)&0xffffff;
1007}
1008
1009static void emit_mov(int rs,int rt)
1010{
1011 assem_debug("mov %s,%s",regname[rt],regname[rs]);
1012 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs));
1013}
1014
1015static void emit_movs(int rs,int rt)
1016{
1017 assem_debug("movs %s,%s",regname[rt],regname[rs]);
1018 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs));
1019}
1020
1021static void emit_add(int rs1,int rs2,int rt)
1022{
1023 assem_debug("add %s,%s,%s",regname[rt],regname[rs1],regname[rs2]);
1024 output_w32(0xe0800000|rd_rn_rm(rt,rs1,rs2));
1025}
1026
1027static void emit_adds(int rs1,int rs2,int rt)
1028{
1029 assem_debug("adds %s,%s,%s",regname[rt],regname[rs1],regname[rs2]);
1030 output_w32(0xe0900000|rd_rn_rm(rt,rs1,rs2));
1031}
1032
1033static void emit_adcs(int rs1,int rs2,int rt)
1034{
1035 assem_debug("adcs %s,%s,%s",regname[rt],regname[rs1],regname[rs2]);
1036 output_w32(0xe0b00000|rd_rn_rm(rt,rs1,rs2));
1037}
1038
1039static void emit_sbc(int rs1,int rs2,int rt)
1040{
1041 assem_debug("sbc %s,%s,%s",regname[rt],regname[rs1],regname[rs2]);
1042 output_w32(0xe0c00000|rd_rn_rm(rt,rs1,rs2));
1043}
1044
1045static void emit_sbcs(int rs1,int rs2,int rt)
1046{
1047 assem_debug("sbcs %s,%s,%s",regname[rt],regname[rs1],regname[rs2]);
1048 output_w32(0xe0d00000|rd_rn_rm(rt,rs1,rs2));
1049}
1050
1051static void emit_neg(int rs, int rt)
1052{
1053 assem_debug("rsb %s,%s,#0",regname[rt],regname[rs]);
1054 output_w32(0xe2600000|rd_rn_rm(rt,rs,0));
1055}
1056
1057static void emit_negs(int rs, int rt)
1058{
1059 assem_debug("rsbs %s,%s,#0",regname[rt],regname[rs]);
1060 output_w32(0xe2700000|rd_rn_rm(rt,rs,0));
1061}
1062
1063static void emit_sub(int rs1,int rs2,int rt)
1064{
1065 assem_debug("sub %s,%s,%s",regname[rt],regname[rs1],regname[rs2]);
1066 output_w32(0xe0400000|rd_rn_rm(rt,rs1,rs2));
1067}
1068
1069static void emit_subs(int rs1,int rs2,int rt)
1070{
1071 assem_debug("subs %s,%s,%s",regname[rt],regname[rs1],regname[rs2]);
1072 output_w32(0xe0500000|rd_rn_rm(rt,rs1,rs2));
1073}
1074
1075static void emit_zeroreg(int rt)
1076{
1077 assem_debug("mov %s,#0",regname[rt]);
1078 output_w32(0xe3a00000|rd_rn_rm(rt,0,0));
1079}
1080
1081static void emit_loadlp(u_int imm,u_int rt)
1082{
1083 add_literal((int)out,imm);
1084 assem_debug("ldr %s,pc+? [=%x]",regname[rt],imm);
1085 output_w32(0xe5900000|rd_rn_rm(rt,15,0));
1086}
1087static void emit_movw(u_int imm,u_int rt)
1088{
1089 assert(imm<65536);
1090 assem_debug("movw %s,#%d (0x%x)",regname[rt],imm,imm);
1091 output_w32(0xe3000000|rd_rn_rm(rt,0,0)|(imm&0xfff)|((imm<<4)&0xf0000));
1092}
1093static void emit_movt(u_int imm,u_int rt)
1094{
1095 assem_debug("movt %s,#%d (0x%x)",regname[rt],imm&0xffff0000,imm&0xffff0000);
1096 output_w32(0xe3400000|rd_rn_rm(rt,0,0)|((imm>>16)&0xfff)|((imm>>12)&0xf0000));
1097}
1098static void emit_movimm(u_int imm,u_int rt)
1099{
1100 u_int armval;
1101 if(genimm(imm,&armval)) {
1102 assem_debug("mov %s,#%d",regname[rt],imm);
1103 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
1104 }else if(genimm(~imm,&armval)) {
1105 assem_debug("mvn %s,#%d",regname[rt],imm);
1106 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
1107 }else if(imm<65536) {
1108 #ifdef ARMv5_ONLY
1109 assem_debug("mov %s,#%d",regname[rt],imm&0xFF00);
1110 output_w32(0xe3a00000|rd_rn_imm_shift(rt,0,imm>>8,8));
1111 assem_debug("add %s,%s,#%d",regname[rt],regname[rt],imm&0xFF);
1112 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1113 #else
1114 emit_movw(imm,rt);
1115 #endif
1116 }else{
1117 #ifdef ARMv5_ONLY
1118 emit_loadlp(imm,rt);
1119 #else
1120 emit_movw(imm&0x0000FFFF,rt);
1121 emit_movt(imm&0xFFFF0000,rt);
1122 #endif
1123 }
1124}
1125static void emit_pcreladdr(u_int rt)
1126{
1127 assem_debug("add %s,pc,#?",regname[rt]);
1128 output_w32(0xe2800000|rd_rn_rm(rt,15,0));
1129}
1130
1131static void emit_loadreg(int r, int hr)
1132{
1133 if((r&63)==0)
1134 emit_zeroreg(hr);
1135 else if(r==MMREG)
1136 emit_movimm(((int)memory_map-(int)&dynarec_local)>>2,hr);
1137 else {
1138 int addr=((int)reg)+((r&63)<<3)+((r&64)>>4);
1139 if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4);
1140 if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4);
1141 if(r==CCREG) addr=(int)&cycle_count;
1142 if(r==CSREG) addr=(int)&Status;
1143 if(r==FSREG) addr=(int)&FCR31;
1144 if(r==INVCP) addr=(int)&invc_ptr;
1145 if(r==ROREG) addr=(int)&ram_offset;
1146 u_int offset = addr-(u_int)&dynarec_local;
1147 assert(offset<4096);
1148 assem_debug("ldr %s,fp+%d",regname[hr],offset);
1149 output_w32(0xe5900000|rd_rn_rm(hr,FP,0)|offset);
1150 }
1151}
1152static void emit_storereg(int r, int hr)
1153{
1154 int addr=((int)reg)+((r&63)<<3)+((r&64)>>4);
1155 if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4);
1156 if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4);
1157 if(r==CCREG) addr=(int)&cycle_count;
1158 if(r==FSREG) addr=(int)&FCR31;
1159 u_int offset = addr-(u_int)&dynarec_local;
1160 assert(offset<4096);
1161 assem_debug("str %s,fp+%d",regname[hr],offset);
1162 output_w32(0xe5800000|rd_rn_rm(hr,FP,0)|offset);
1163}
1164
1165static void emit_test(int rs, int rt)
1166{
1167 assem_debug("tst %s,%s",regname[rs],regname[rt]);
1168 output_w32(0xe1100000|rd_rn_rm(0,rs,rt));
1169}
1170
1171static void emit_testimm(int rs,int imm)
1172{
1173 u_int armval, ret;
1174 assem_debug("tst %s,#%d",regname[rs],imm);
1175 ret = genimm(imm,&armval);
1176 assert(ret);
1177 output_w32(0xe3100000|rd_rn_rm(0,rs,0)|armval);
1178}
1179
1180static void emit_not(int rs,int rt)
1181{
1182 assem_debug("mvn %s,%s",regname[rt],regname[rs]);
1183 output_w32(0xe1e00000|rd_rn_rm(rt,0,rs));
1184}
1185
1186static void emit_and(u_int rs1,u_int rs2,u_int rt)
1187{
1188 assem_debug("and %s,%s,%s",regname[rt],regname[rs1],regname[rs2]);
1189 output_w32(0xe0000000|rd_rn_rm(rt,rs1,rs2));
1190}
1191
1192static void emit_or(u_int rs1,u_int rs2,u_int rt)
1193{
1194 assem_debug("orr %s,%s,%s",regname[rt],regname[rs1],regname[rs2]);
1195 output_w32(0xe1800000|rd_rn_rm(rt,rs1,rs2));
1196}
1197static void emit_or_and_set_flags(int rs1,int rs2,int rt)
1198{
1199 assem_debug("orrs %s,%s,%s",regname[rt],regname[rs1],regname[rs2]);
1200 output_w32(0xe1900000|rd_rn_rm(rt,rs1,rs2));
1201}
1202
1203static void emit_xor(u_int rs1,u_int rs2,u_int rt)
1204{
1205 assem_debug("eor %s,%s,%s",regname[rt],regname[rs1],regname[rs2]);
1206 output_w32(0xe0200000|rd_rn_rm(rt,rs1,rs2));
1207}
1208
1209static void emit_addimm(u_int rs,int imm,u_int rt)
1210{
1211 assert(rs<16);
1212 assert(rt<16);
1213 if(imm!=0) {
1214 assert(imm>-65536&&imm<65536);
1215 u_int armval;
1216 if(genimm(imm,&armval)) {
1217 assem_debug("add %s,%s,#%d",regname[rt],regname[rs],imm);
1218 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
1219 }else if(genimm(-imm,&armval)) {
1220 assem_debug("sub %s,%s,#%d",regname[rt],regname[rs],imm);
1221 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
1222 }else if(imm<0) {
1223 assem_debug("sub %s,%s,#%d",regname[rt],regname[rs],(-imm)&0xFF00);
1224 assem_debug("sub %s,%s,#%d",regname[rt],regname[rt],(-imm)&0xFF);
1225 output_w32(0xe2400000|rd_rn_imm_shift(rt,rs,(-imm)>>8,8));
1226 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
1227 }else{
1228 assem_debug("add %s,%s,#%d",regname[rt],regname[rs],imm&0xFF00);
1229 assem_debug("add %s,%s,#%d",regname[rt],regname[rt],imm&0xFF);
1230 output_w32(0xe2800000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1231 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1232 }
1233 }
1234 else if(rs!=rt) emit_mov(rs,rt);
1235}
1236
1237static void emit_addimm_and_set_flags(int imm,int rt)
1238{
1239 assert(imm>-65536&&imm<65536);
1240 u_int armval;
1241 if(genimm(imm,&armval)) {
1242 assem_debug("adds %s,%s,#%d",regname[rt],regname[rt],imm);
1243 output_w32(0xe2900000|rd_rn_rm(rt,rt,0)|armval);
1244 }else if(genimm(-imm,&armval)) {
1245 assem_debug("subs %s,%s,#%d",regname[rt],regname[rt],imm);
1246 output_w32(0xe2500000|rd_rn_rm(rt,rt,0)|armval);
1247 }else if(imm<0) {
1248 assem_debug("sub %s,%s,#%d",regname[rt],regname[rt],(-imm)&0xFF00);
1249 assem_debug("subs %s,%s,#%d",regname[rt],regname[rt],(-imm)&0xFF);
1250 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)>>8,8));
1251 output_w32(0xe2500000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
1252 }else{
1253 assem_debug("add %s,%s,#%d",regname[rt],regname[rt],imm&0xFF00);
1254 assem_debug("adds %s,%s,#%d",regname[rt],regname[rt],imm&0xFF);
1255 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm>>8,8));
1256 output_w32(0xe2900000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1257 }
1258}
1259
1260#ifndef RAM_OFFSET
1261static void emit_addimm_no_flags(u_int imm,u_int rt)
1262{
1263 emit_addimm(rt,imm,rt);
1264}
1265#endif
1266
1267static void emit_addnop(u_int r)
1268{
1269 assert(r<16);
1270 assem_debug("add %s,%s,#0 (nop)",regname[r],regname[r]);
1271 output_w32(0xe2800000|rd_rn_rm(r,r,0));
1272}
1273
1274static void emit_adcimm(u_int rs,int imm,u_int rt)
1275{
1276 u_int armval, ret;
1277 ret = genimm(imm,&armval);
1278 assert(ret);
1279 assem_debug("adc %s,%s,#%d",regname[rt],regname[rs],imm);
1280 output_w32(0xe2a00000|rd_rn_rm(rt,rs,0)|armval);
1281}
1282/*static void emit_sbcimm(int imm,u_int rt)
1283{
1284 u_int armval, ret;
1285 ret = genimm(imm,&armval);
1286 assert(ret);
1287 assem_debug("sbc %s,%s,#%d",regname[rt],regname[rt],imm);
1288 output_w32(0xe2c00000|rd_rn_rm(rt,rt,0)|armval);
1289}*/
1290
ce68e3b9 1291static void emit_rscimm(int rs,int imm,u_int rt) //*SEB* why the assert(0) here?
451ab91e 1292{
ce68e3b9 1293// assert(0);
451ab91e 1294 u_int armval, ret;
1295 ret = genimm(imm,&armval);
1296 assert(ret);
1297 assem_debug("rsc %s,%s,#%d",regname[rt],regname[rs],imm);
1298 output_w32(0xe2e00000|rd_rn_rm(rt,rs,0)|armval);
1299}
1300
1301static void emit_addimm64_32(int rsh,int rsl,int imm,int rth,int rtl)
1302{
1303 // TODO: if(genimm(imm,&armval)) ...
1304 // else
1305 emit_movimm(imm,HOST_TEMPREG);
1306 emit_adds(HOST_TEMPREG,rsl,rtl);
1307 emit_adcimm(rsh,0,rth);
1308}
1309#ifdef INVERTED_CARRY
1310static void emit_sbb(int rs1,int rs2)
1311{
1312 assem_debug("sbb %%%s,%%%s",regname[rs2],regname[rs1]);
1313 output_byte(0x19);
1314 output_modrm(3,rs1,rs2);
1315}
1316#endif
1317
1318static void emit_andimm(int rs,int imm,int rt)
1319{
1320 u_int armval;
1321 if(imm==0) {
1322 emit_zeroreg(rt);
1323 }else if(genimm(imm,&armval)) {
1324 assem_debug("and %s,%s,#%d",regname[rt],regname[rs],imm);
1325 output_w32(0xe2000000|rd_rn_rm(rt,rs,0)|armval);
1326 }else if(genimm(~imm,&armval)) {
1327 assem_debug("bic %s,%s,#%d",regname[rt],regname[rs],imm);
1328 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|armval);
1329 }else if(imm==65535) {
1330 #ifdef ARMv5_ONLY
1331 assem_debug("bic %s,%s,#FF000000",regname[rt],regname[rs]);
1332 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|0x4FF);
1333 assem_debug("bic %s,%s,#00FF0000",regname[rt],regname[rt]);
1334 output_w32(0xe3c00000|rd_rn_rm(rt,rt,0)|0x8FF);
1335 #else
1336 assem_debug("uxth %s,%s",regname[rt],regname[rs]);
1337 output_w32(0xe6ff0070|rd_rn_rm(rt,0,rs));
1338 #endif
1339 }else{
1340 assert(imm>0&&imm<65535);
1341 #ifdef ARMv5_ONLY
1342 assem_debug("mov r14,#%d",imm&0xFF00);
1343 output_w32(0xe3a00000|rd_rn_imm_shift(HOST_TEMPREG,0,imm>>8,8));
1344 assem_debug("add r14,r14,#%d",imm&0xFF);
1345 output_w32(0xe2800000|rd_rn_imm_shift(HOST_TEMPREG,HOST_TEMPREG,imm&0xff,0));
1346 #else
1347 emit_movw(imm,HOST_TEMPREG);
1348 #endif
1349 assem_debug("and %s,%s,r14",regname[rt],regname[rs]);
1350 output_w32(0xe0000000|rd_rn_rm(rt,rs,HOST_TEMPREG));
1351 }
1352}
1353
1354static void emit_orimm(int rs,int imm,int rt)
1355{
1356 u_int armval;
1357 if(imm==0) {
1358 if(rs!=rt) emit_mov(rs,rt);
1359 }else if(genimm(imm,&armval)) {
1360 assem_debug("orr %s,%s,#%d",regname[rt],regname[rs],imm);
1361 output_w32(0xe3800000|rd_rn_rm(rt,rs,0)|armval);
1362 }else{
1363 assert(imm>0&&imm<65536);
1364 assem_debug("orr %s,%s,#%d",regname[rt],regname[rs],imm&0xFF00);
1365 assem_debug("orr %s,%s,#%d",regname[rt],regname[rs],imm&0xFF);
1366 output_w32(0xe3800000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1367 output_w32(0xe3800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1368 }
1369}
1370
1371static void emit_xorimm(int rs,int imm,int rt)
1372{
1373 u_int armval;
1374 if(imm==0) {
1375 if(rs!=rt) emit_mov(rs,rt);
1376 }else if(genimm(imm,&armval)) {
1377 assem_debug("eor %s,%s,#%d",regname[rt],regname[rs],imm);
1378 output_w32(0xe2200000|rd_rn_rm(rt,rs,0)|armval);
1379 }else{
1380 assert(imm>0&&imm<65536);
1381 assem_debug("eor %s,%s,#%d",regname[rt],regname[rs],imm&0xFF00);
1382 assem_debug("eor %s,%s,#%d",regname[rt],regname[rs],imm&0xFF);
1383 output_w32(0xe2200000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1384 output_w32(0xe2200000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1385 }
1386}
1387
1388static void emit_shlimm(int rs,u_int imm,int rt)
1389{
1390 assert(imm>0);
1391 assert(imm<32);
1392 //if(imm==1) ...
1393 assem_debug("lsl %s,%s,#%d",regname[rt],regname[rs],imm);
1394 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1395}
1396
1397static void emit_shrimm(int rs,u_int imm,int rt)
1398{
1399 assert(imm>0);
1400 assert(imm<32);
1401 assem_debug("lsr %s,%s,#%d",regname[rt],regname[rs],imm);
1402 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
1403}
1404
1405static void emit_sarimm(int rs,u_int imm,int rt)
1406{
1407 assert(imm>0);
1408 assert(imm<32);
1409 assem_debug("asr %s,%s,#%d",regname[rt],regname[rs],imm);
1410 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x40|(imm<<7));
1411}
1412
1413static void emit_rorimm(int rs,u_int imm,int rt)
1414{
1415 assert(imm>0);
1416 assert(imm<32);
1417 assem_debug("ror %s,%s,#%d",regname[rt],regname[rs],imm);
1418 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x60|(imm<<7));
1419}
1420
1421static void emit_shldimm(int rs,int rs2,u_int imm,int rt)
1422{
1423 assem_debug("shld %%%s,%%%s,%d",regname[rt],regname[rs2],imm);
1424 assert(imm>0);
1425 assert(imm<32);
1426 //if(imm==1) ...
1427 assem_debug("lsl %s,%s,#%d",regname[rt],regname[rs],imm);
1428 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1429 assem_debug("orr %s,%s,%s,lsr #%d",regname[rt],regname[rt],regname[rs2],32-imm);
1430 output_w32(0xe1800020|rd_rn_rm(rt,rt,rs2)|((32-imm)<<7));
1431}
1432
1433static void emit_shrdimm(int rs,int rs2,u_int imm,int rt)
1434{
1435 assem_debug("shrd %%%s,%%%s,%d",regname[rt],regname[rs2],imm);
1436 assert(imm>0);
1437 assert(imm<32);
1438 //if(imm==1) ...
1439 assem_debug("lsr %s,%s,#%d",regname[rt],regname[rs],imm);
1440 output_w32(0xe1a00020|rd_rn_rm(rt,0,rs)|(imm<<7));
1441 assem_debug("orr %s,%s,%s,lsl #%d",regname[rt],regname[rt],regname[rs2],32-imm);
1442 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs2)|((32-imm)<<7));
1443}
1444
1445static void emit_shl(u_int rs,u_int shift,u_int rt)
1446{
1447 assert(rs<16);
1448 assert(rt<16);
1449 assert(shift<16);
1450 //if(imm==1) ...
1451 assem_debug("lsl %s,%s,%s",regname[rt],regname[rs],regname[shift]);
1452 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x10|(shift<<8));
1453}
1454static void emit_shr(u_int rs,u_int shift,u_int rt)
1455{
1456 assert(rs<16);
1457 assert(rt<16);
1458 assert(shift<16);
1459 assem_debug("lsr %s,%s,%s",regname[rt],regname[rs],regname[shift]);
1460 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x30|(shift<<8));
1461}
1462static void emit_sar(u_int rs,u_int shift,u_int rt)
1463{
1464 assert(rs<16);
1465 assert(rt<16);
1466 assert(shift<16);
1467 assem_debug("asr %s,%s,%s",regname[rt],regname[rs],regname[shift]);
1468 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x50|(shift<<8));
1469}
1470
1471static void emit_orrshl(u_int rs,u_int shift,u_int rt)
1472{
1473 assert(rs<16);
1474 assert(rt<16);
1475 assert(shift<16);
1476 assem_debug("orr %s,%s,%s,lsl %s",regname[rt],regname[rt],regname[rs],regname[shift]);
1477 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x10|(shift<<8));
1478}
1479static void emit_orrshr(u_int rs,u_int shift,u_int rt)
1480{
1481 assert(rs<16);
1482 assert(rt<16);
1483 assert(shift<16);
1484 assem_debug("orr %s,%s,%s,lsr %s",regname[rt],regname[rt],regname[rs],regname[shift]);
1485 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x30|(shift<<8));
1486}
1487
1488static void emit_cmpimm(int rs,int imm)
1489{
1490 u_int armval;
1491 if(genimm(imm,&armval)) {
1492 assem_debug("cmp %s,#%d",regname[rs],imm);
1493 output_w32(0xe3500000|rd_rn_rm(0,rs,0)|armval);
1494 }else if(genimm(-imm,&armval)) {
1495 assem_debug("cmn %s,#%d",regname[rs],imm);
1496 output_w32(0xe3700000|rd_rn_rm(0,rs,0)|armval);
1497 }else if(imm>0) {
1498 assert(imm<65536);
1499 #ifdef ARMv5_ONLY
1500 emit_movimm(imm,HOST_TEMPREG);
1501 #else
1502 emit_movw(imm,HOST_TEMPREG);
1503 #endif
1504 assem_debug("cmp %s,r14",regname[rs]);
1505 output_w32(0xe1500000|rd_rn_rm(0,rs,HOST_TEMPREG));
1506 }else{
1507 assert(imm>-65536);
1508 #ifdef ARMv5_ONLY
1509 emit_movimm(-imm,HOST_TEMPREG);
1510 #else
1511 emit_movw(-imm,HOST_TEMPREG);
1512 #endif
1513 assem_debug("cmn %s,r14",regname[rs]);
1514 output_w32(0xe1700000|rd_rn_rm(0,rs,HOST_TEMPREG));
1515 }
1516}
1517
1518static void emit_cmovne_imm(int imm,int rt)
1519{
1520 assem_debug("movne %s,#%d",regname[rt],imm);
1521 u_int armval, ret;
1522 ret = genimm(imm,&armval);
1523 assert(ret);
1524 output_w32(0x13a00000|rd_rn_rm(rt,0,0)|armval);
1525}
1526static void emit_cmovl_imm(int imm,int rt)
1527{
1528 assem_debug("movlt %s,#%d",regname[rt],imm);
1529 u_int armval, ret;
1530 ret = genimm(imm,&armval);
1531 assert(ret);
1532 output_w32(0xb3a00000|rd_rn_rm(rt,0,0)|armval);
1533}
1534static void emit_cmovb_imm(int imm,int rt)
1535{
1536 assem_debug("movcc %s,#%d",regname[rt],imm);
1537 u_int armval, ret;
1538 ret = genimm(imm,&armval);
1539 assert(ret);
1540 output_w32(0x33a00000|rd_rn_rm(rt,0,0)|armval);
1541}
1542static void emit_cmovs_imm(int imm,int rt)
1543{
1544 assem_debug("movmi %s,#%d",regname[rt],imm);
1545 u_int armval, ret;
1546 ret = genimm(imm,&armval);
1547 assert(ret);
1548 output_w32(0x43a00000|rd_rn_rm(rt,0,0)|armval);
1549}
1550static void emit_cmove_reg(int rs,int rt)
1551{
1552 assem_debug("moveq %s,%s",regname[rt],regname[rs]);
1553 output_w32(0x01a00000|rd_rn_rm(rt,0,rs));
1554}
1555static void emit_cmovne_reg(int rs,int rt)
1556{
1557 assem_debug("movne %s,%s",regname[rt],regname[rs]);
1558 output_w32(0x11a00000|rd_rn_rm(rt,0,rs));
1559}
1560static void emit_cmovl_reg(int rs,int rt)
1561{
1562 assem_debug("movlt %s,%s",regname[rt],regname[rs]);
1563 output_w32(0xb1a00000|rd_rn_rm(rt,0,rs));
1564}
1565static void emit_cmovs_reg(int rs,int rt)
1566{
1567 assem_debug("movmi %s,%s",regname[rt],regname[rs]);
1568 output_w32(0x41a00000|rd_rn_rm(rt,0,rs));
1569}
1570
1571static void emit_slti32(int rs,int imm,int rt)
1572{
1573 if(rs!=rt) emit_zeroreg(rt);
1574 emit_cmpimm(rs,imm);
1575 if(rs==rt) emit_movimm(0,rt);
1576 emit_cmovl_imm(1,rt);
1577}
1578static void emit_sltiu32(int rs,int imm,int rt)
1579{
1580 if(rs!=rt) emit_zeroreg(rt);
1581 emit_cmpimm(rs,imm);
1582 if(rs==rt) emit_movimm(0,rt);
1583 emit_cmovb_imm(1,rt);
1584}
1585static void emit_slti64_32(int rsh,int rsl,int imm,int rt)
1586{
1587 assert(rsh!=rt);
1588 emit_slti32(rsl,imm,rt);
1589 if(imm>=0)
1590 {
1591 emit_test(rsh,rsh);
1592 emit_cmovne_imm(0,rt);
1593 emit_cmovs_imm(1,rt);
1594 }
1595 else
1596 {
1597 emit_cmpimm(rsh,-1);
1598 emit_cmovne_imm(0,rt);
1599 emit_cmovl_imm(1,rt);
1600 }
1601}
1602static void emit_sltiu64_32(int rsh,int rsl,int imm,int rt)
1603{
1604 assert(rsh!=rt);
1605 emit_sltiu32(rsl,imm,rt);
1606 if(imm>=0)
1607 {
1608 emit_test(rsh,rsh);
1609 emit_cmovne_imm(0,rt);
1610 }
1611 else
1612 {
1613 emit_cmpimm(rsh,-1);
1614 emit_cmovne_imm(1,rt);
1615 }
1616}
1617
1618static void emit_cmp(int rs,int rt)
1619{
1620 assem_debug("cmp %s,%s",regname[rs],regname[rt]);
1621 output_w32(0xe1500000|rd_rn_rm(0,rs,rt));
1622}
1623static void emit_set_gz32(int rs, int rt)
1624{
1625 //assem_debug("set_gz32");
1626 emit_cmpimm(rs,1);
1627 emit_movimm(1,rt);
1628 emit_cmovl_imm(0,rt);
1629}
1630static void emit_set_nz32(int rs, int rt)
1631{
1632 //assem_debug("set_nz32");
1633 if(rs!=rt) emit_movs(rs,rt);
1634 else emit_test(rs,rs);
1635 emit_cmovne_imm(1,rt);
1636}
1637static void emit_set_gz64_32(int rsh, int rsl, int rt)
1638{
1639 //assem_debug("set_gz64");
1640 emit_set_gz32(rsl,rt);
1641 emit_test(rsh,rsh);
1642 emit_cmovne_imm(1,rt);
1643 emit_cmovs_imm(0,rt);
1644}
1645static void emit_set_nz64_32(int rsh, int rsl, int rt)
1646{
1647 //assem_debug("set_nz64");
1648 emit_or_and_set_flags(rsh,rsl,rt);
1649 emit_cmovne_imm(1,rt);
1650}
1651static void emit_set_if_less32(int rs1, int rs2, int rt)
1652{
1653 //assem_debug("set if less (%%%s,%%%s),%%%s",regname[rs1],regname[rs2],regname[rt]);
1654 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1655 emit_cmp(rs1,rs2);
1656 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1657 emit_cmovl_imm(1,rt);
1658}
1659static void emit_set_if_carry32(int rs1, int rs2, int rt)
1660{
1661 //assem_debug("set if carry (%%%s,%%%s),%%%s",regname[rs1],regname[rs2],regname[rt]);
1662 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1663 emit_cmp(rs1,rs2);
1664 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1665 emit_cmovb_imm(1,rt);
1666}
1667static void emit_set_if_less64_32(int u1, int l1, int u2, int l2, int rt)
1668{
1669 //assem_debug("set if less64 (%%%s,%%%s,%%%s,%%%s),%%%s",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]);
1670 assert(u1!=rt);
1671 assert(u2!=rt);
1672 emit_cmp(l1,l2);
1673 emit_movimm(0,rt);
1674 emit_sbcs(u1,u2,HOST_TEMPREG);
1675 emit_cmovl_imm(1,rt);
1676}
1677static void emit_set_if_carry64_32(int u1, int l1, int u2, int l2, int rt)
1678{
1679 //assem_debug("set if carry64 (%%%s,%%%s,%%%s,%%%s),%%%s",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]);
1680 assert(u1!=rt);
1681 assert(u2!=rt);
1682 emit_cmp(l1,l2);
1683 emit_movimm(0,rt);
1684 emit_sbcs(u1,u2,HOST_TEMPREG);
1685 emit_cmovb_imm(1,rt);
1686}
1687
1688static void emit_call(int a)
1689{
1690 assem_debug("bl %x (%x+%x)",a,(int)out,a-(int)out-8);
1691 u_int offset=genjmp(a);
1692 output_w32(0xeb000000|offset);
1693}
1694static void emit_jmp(int a)
1695{
1696 assem_debug("b %x (%x+%x)",a,(int)out,a-(int)out-8);
1697 u_int offset=genjmp(a);
1698 output_w32(0xea000000|offset);
1699}
1700static void emit_jne(int a)
1701{
1702 assem_debug("bne %x",a);
1703 u_int offset=genjmp(a);
1704 output_w32(0x1a000000|offset);
1705}
1706static void emit_jeq(int a)
1707{
1708 assem_debug("beq %x",a);
1709 u_int offset=genjmp(a);
1710 output_w32(0x0a000000|offset);
1711}
1712static void emit_js(int a)
1713{
1714 assem_debug("bmi %x",a);
1715 u_int offset=genjmp(a);
1716 output_w32(0x4a000000|offset);
1717}
1718static void emit_jns(int a)
1719{
1720 assem_debug("bpl %x",a);
1721 u_int offset=genjmp(a);
1722 output_w32(0x5a000000|offset);
1723}
1724static void emit_jl(int a)
1725{
1726 assem_debug("blt %x",a);
1727 u_int offset=genjmp(a);
1728 output_w32(0xba000000|offset);
1729}
1730static void emit_jge(int a)
1731{
1732 assem_debug("bge %x",a);
1733 u_int offset=genjmp(a);
1734 output_w32(0xaa000000|offset);
1735}
1736static void emit_jno(int a)
1737{
1738 assem_debug("bvc %x",a);
1739 u_int offset=genjmp(a);
1740 output_w32(0x7a000000|offset);
1741}
1742
1743static void emit_jcc(int a)
1744{
1745 assem_debug("bcc %x",a);
1746 u_int offset=genjmp(a);
1747 output_w32(0x3a000000|offset);
1748}
1749
1750static void emit_pushreg(u_int r)
1751{
1752 assem_debug("push %%%s",regname[r]);
1753 assert(0);
1754}
1755static void emit_popreg(u_int r)
1756{
1757 assem_debug("pop %%%s",regname[r]);
1758 assert(0);
1759}
1760/*
1761static void emit_callreg(u_int r)
1762{
1763 assem_debug("call *%%%s",regname[r]);
1764 assert(0);
1765}
1766static void emit_jmpreg(u_int r)
1767{
1768 assem_debug("mov pc,%s",regname[r]);
1769 output_w32(0xe1a00000|rd_rn_rm(15,0,r));
1770}
1771*/
1772static void emit_readword_indexed(int offset, int rs, int rt)
1773{
1774 assert(offset>-4096&&offset<4096);
1775 assem_debug("ldr %s,%s+%d",regname[rt],regname[rs],offset);
1776 if(offset>=0) {
1777 output_w32(0xe5900000|rd_rn_rm(rt,rs,0)|offset);
1778 }else{
1779 output_w32(0xe5100000|rd_rn_rm(rt,rs,0)|(-offset));
1780 }
1781}
1782static void emit_readword_dualindexedx4(int rs1, int rs2, int rt)
1783{
1784 assem_debug("ldr %s,%s,%s lsl #2",regname[rt],regname[rs1],regname[rs2]);
1785 output_w32(0xe7900000|rd_rn_rm(rt,rs1,rs2)|0x100);
1786}
1787static void emit_readword_indexed_tlb(int addr, int rs, int map, int rt)
1788{
1789 if(map<0) emit_readword_indexed(addr, rs, rt);
1790 else {
1791 assert(addr==0);
1792 emit_readword_dualindexedx4(rs, map, rt);
1793 }
1794}
1795static void emit_readdword_indexed_tlb(int addr, int rs, int map, int rh, int rl)
1796{
1797 if(map<0) {
1798 if(rh>=0) emit_readword_indexed(addr, rs, rh);
1799 emit_readword_indexed(addr+4, rs, rl);
1800 }else{
1801 assert(rh!=rs);
1802 if(rh>=0) emit_readword_indexed_tlb(addr, rs, map, rh);
1803 emit_addimm(map,1,HOST_TEMPREG);
1804 emit_readword_indexed_tlb(addr, rs, HOST_TEMPREG, rl);
1805 }
1806}
1807static void emit_movsbl_indexed(int offset, int rs, int rt)
1808{
1809 assert(offset>-256&&offset<256);
1810 assem_debug("ldrsb %s,%s+%d",regname[rt],regname[rs],offset);
1811 if(offset>=0) {
1812 output_w32(0xe1d000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1813 }else{
1814 output_w32(0xe15000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1815 }
1816}
1817static void emit_movsbl_indexed_tlb(int addr, int rs, int map, int rt)
1818{
1819 if(map<0) emit_movsbl_indexed(addr, rs, rt);
1820 else {
1821 if(addr==0) {
1822 emit_shlimm(map,2,HOST_TEMPREG);
1823 assem_debug("ldrsb %s,%s+%s",regname[rt],regname[rs],regname[HOST_TEMPREG]);
1824 output_w32(0xe19000d0|rd_rn_rm(rt,rs,HOST_TEMPREG));
1825 }else{
1826 assert(addr>-256&&addr<256);
1827 assem_debug("add %s,%s,%s,lsl #2",regname[rt],regname[rs],regname[map]);
1828 output_w32(0xe0800000|rd_rn_rm(rt,rs,map)|(2<<7));
1829 emit_movsbl_indexed(addr, rt, rt);
1830 }
1831 }
1832}
1833static void emit_movswl_indexed(int offset, int rs, int rt)
1834{
1835 assert(offset>-256&&offset<256);
1836 assem_debug("ldrsh %s,%s+%d",regname[rt],regname[rs],offset);
1837 if(offset>=0) {
1838 output_w32(0xe1d000f0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1839 }else{
1840 output_w32(0xe15000f0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1841 }
1842}
1843static void emit_movzbl_indexed(int offset, int rs, int rt)
1844{
1845 assert(offset>-4096&&offset<4096);
1846 assem_debug("ldrb %s,%s+%d",regname[rt],regname[rs],offset);
1847 if(offset>=0) {
1848 output_w32(0xe5d00000|rd_rn_rm(rt,rs,0)|offset);
1849 }else{
1850 output_w32(0xe5500000|rd_rn_rm(rt,rs,0)|(-offset));
1851 }
1852}
1853static void emit_movzbl_dualindexedx4(int rs1, int rs2, int rt)
1854{
1855 assem_debug("ldrb %s,%s,%s lsl #2",regname[rt],regname[rs1],regname[rs2]);
1856 output_w32(0xe7d00000|rd_rn_rm(rt,rs1,rs2)|0x100);
1857}
1858static void emit_movzbl_indexed_tlb(int addr, int rs, int map, int rt)
1859{
1860 if(map<0) emit_movzbl_indexed(addr, rs, rt);
1861 else {
1862 if(addr==0) {
1863 emit_movzbl_dualindexedx4(rs, map, rt);
1864 }else{
1865 emit_addimm(rs,addr,rt);
1866 emit_movzbl_dualindexedx4(rt, map, rt);
1867 }
1868 }
1869}
1870static void emit_movzwl_indexed(int offset, int rs, int rt)
1871{
1872 assert(offset>-256&&offset<256);
1873 assem_debug("ldrh %s,%s+%d",regname[rt],regname[rs],offset);
1874 if(offset>=0) {
1875 output_w32(0xe1d000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1876 }else{
1877 output_w32(0xe15000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1878 }
1879}
1880static void emit_readword(int addr, int rt)
1881{
1882 u_int offset = addr-(u_int)&dynarec_local;
1883 assert(offset<4096);
1884 assem_debug("ldr %s,fp+%d",regname[rt],offset);
1885 output_w32(0xe5900000|rd_rn_rm(rt,FP,0)|offset);
1886}
1887static void emit_movsbl(int addr, int rt)
1888{
1889 u_int offset = addr-(u_int)&dynarec_local;
1890 assert(offset<256);
1891 assem_debug("ldrsb %s,fp+%d",regname[rt],offset);
1892 output_w32(0xe1d000d0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1893}
1894static void emit_movswl(int addr, int rt)
1895{
1896 u_int offset = addr-(u_int)&dynarec_local;
1897 assert(offset<256);
1898 assem_debug("ldrsh %s,fp+%d",regname[rt],offset);
1899 output_w32(0xe1d000f0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1900}
1901static void emit_movzbl(int addr, int rt)
1902{
1903 u_int offset = addr-(u_int)&dynarec_local;
1904 assert(offset<4096);
1905 assem_debug("ldrb %s,fp+%d",regname[rt],offset);
1906 output_w32(0xe5d00000|rd_rn_rm(rt,FP,0)|offset);
1907}
1908static void emit_movzwl(int addr, int rt)
1909{
1910 u_int offset = addr-(u_int)&dynarec_local;
1911 assert(offset<256);
1912 assem_debug("ldrh %s,fp+%d",regname[rt],offset);
1913 output_w32(0xe1d000b0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1914}
1915
1916/*
1917static void emit_movzwl_reg(int rs, int rt)
1918{
1919 assem_debug("movzwl %%%s,%%%s",regname[rs]+1,regname[rt]);
1920 assert(0);
1921}
1922*/
1923
1924static void emit_writeword_indexed(int rt, int offset, int rs)
1925{
1926 assert(offset>-4096&&offset<4096);
1927 assem_debug("str %s,%s+%d",regname[rt],regname[rs],offset);
1928 if(offset>=0) {
1929 output_w32(0xe5800000|rd_rn_rm(rt,rs,0)|offset);
1930 }else{
1931 output_w32(0xe5000000|rd_rn_rm(rt,rs,0)|(-offset));
1932 }
1933}
1934static void emit_writeword_dualindexedx4(int rt, int rs1, int rs2)
1935{
1936 assem_debug("str %s,%s,%s lsl #2",regname[rt],regname[rs1],regname[rs2]);
1937 output_w32(0xe7800000|rd_rn_rm(rt,rs1,rs2)|0x100);
1938}
1939static void emit_writeword_indexed_tlb(int rt, int addr, int rs, int map, int temp)
1940{
1941 if(map<0) emit_writeword_indexed(rt, addr, rs);
1942 else {
1943 assert(addr==0);
1944 emit_writeword_dualindexedx4(rt, rs, map);
1945 }
1946}
1947static void emit_writedword_indexed_tlb(int rh, int rl, int addr, int rs, int map, int temp)
1948{
1949 if(map<0) {
1950 if(rh>=0) emit_writeword_indexed(rh, addr, rs);
1951 emit_writeword_indexed(rl, addr+4, rs);
1952 }else{
1953 assert(rh>=0);
1954 if(temp!=rs) emit_addimm(map,1,temp);
1955 emit_writeword_indexed_tlb(rh, addr, rs, map, temp);
1956 if(temp!=rs) emit_writeword_indexed_tlb(rl, addr, rs, temp, temp);
1957 else {
1958 emit_addimm(rs,4,rs);
1959 emit_writeword_indexed_tlb(rl, addr, rs, map, temp);
1960 }
1961 }
1962}
1963static void emit_writehword_indexed(int rt, int offset, int rs)
1964{
1965 assert(offset>-256&&offset<256);
1966 assem_debug("strh %s,%s+%d",regname[rt],regname[rs],offset);
1967 if(offset>=0) {
1968 output_w32(0xe1c000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1969 }else{
1970 output_w32(0xe14000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1971 }
1972}
1973static void emit_writebyte_indexed(int rt, int offset, int rs)
1974{
1975 assert(offset>-4096&&offset<4096);
1976 assem_debug("strb %s,%s+%d",regname[rt],regname[rs],offset);
1977 if(offset>=0) {
1978 output_w32(0xe5c00000|rd_rn_rm(rt,rs,0)|offset);
1979 }else{
1980 output_w32(0xe5400000|rd_rn_rm(rt,rs,0)|(-offset));
1981 }
1982}
1983static void emit_writebyte_dualindexedx4(int rt, int rs1, int rs2)
1984{
1985 assem_debug("strb %s,%s,%s lsl #2",regname[rt],regname[rs1],regname[rs2]);
1986 output_w32(0xe7c00000|rd_rn_rm(rt,rs1,rs2)|0x100);
1987}
1988static void emit_writebyte_indexed_tlb(int rt, int addr, int rs, int map, int temp)
1989{
1990 if(map<0) emit_writebyte_indexed(rt, addr, rs);
1991 else {
1992 if(addr==0) {
1993 emit_writebyte_dualindexedx4(rt, rs, map);
1994 }else{
1995 emit_addimm(rs,addr,temp);
1996 emit_writebyte_dualindexedx4(rt, temp, map);
1997 }
1998 }
1999}
2000static void emit_writeword(int rt, int addr)
2001{
2002 u_int offset = addr-(u_int)&dynarec_local;
2003 assert(offset<4096);
2004 assem_debug("str %s,fp+%d",regname[rt],offset);
2005 output_w32(0xe5800000|rd_rn_rm(rt,FP,0)|offset);
2006}
2007static void emit_writehword(int rt, int addr)
2008{
2009 u_int offset = addr-(u_int)&dynarec_local;
2010 assert(offset<256);
2011 assem_debug("strh %s,fp+%d",regname[rt],offset);
2012 output_w32(0xe1c000b0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
2013}
2014static void emit_writebyte(int rt, int addr)
2015{
2016 u_int offset = addr-(u_int)&dynarec_local;
2017 assert(offset<4096);
2018 assem_debug("strb %s,fp+%d",regname[rt],offset);
2019 output_w32(0xe5c00000|rd_rn_rm(rt,FP,0)|offset);
2020}
2021
2022/*
2023static void emit_mul(int rs)
2024{
2025 assem_debug("mul %%%s",regname[rs]);
2026 assert(0);
2027}
2028*/
2029
2030static void emit_umull(u_int rs1, u_int rs2, u_int hi, u_int lo)
2031{
2032 assem_debug("umull %s, %s, %s, %s",regname[lo],regname[hi],regname[rs1],regname[rs2]);
2033 assert(rs1<16);
2034 assert(rs2<16);
2035 assert(hi<16);
2036 assert(lo<16);
2037 output_w32(0xe0800090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
2038}
2039static void emit_smull(u_int rs1, u_int rs2, u_int hi, u_int lo)
2040{
2041 assem_debug("smull %s, %s, %s, %s",regname[lo],regname[hi],regname[rs1],regname[rs2]);
2042 assert(rs1<16);
2043 assert(rs2<16);
2044 assert(hi<16);
2045 assert(lo<16);
2046 output_w32(0xe0c00090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
2047}
2048
2049static void emit_clz(int rs,int rt)
2050{
2051 assem_debug("clz %s,%s",regname[rt],regname[rs]);
2052 output_w32(0xe16f0f10|rd_rn_rm(rt,0,rs));
2053}
2054
2055static void emit_subcs(int rs1,int rs2,int rt)
2056{
2057 assem_debug("subcs %s,%s,%s",regname[rt],regname[rs1],regname[rs2]);
2058 output_w32(0x20400000|rd_rn_rm(rt,rs1,rs2));
2059}
2060
2061static void emit_shrcc_imm(int rs,u_int imm,int rt)
2062{
2063 assert(imm>0);
2064 assert(imm<32);
2065 assem_debug("lsrcc %s,%s,#%d",regname[rt],regname[rs],imm);
2066 output_w32(0x31a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
2067}
2068
2069static void emit_negmi(int rs, int rt)
2070{
2071 assem_debug("rsbmi %s,%s,#0",regname[rt],regname[rs]);
2072 output_w32(0x42600000|rd_rn_rm(rt,rs,0));
2073}
2074
2075static void emit_orreq(u_int rs1,u_int rs2,u_int rt)
2076{
2077 assem_debug("orreq %s,%s,%s",regname[rt],regname[rs1],regname[rs2]);
2078 output_w32(0x01800000|rd_rn_rm(rt,rs1,rs2));
2079}
2080
2081static void emit_orrne(u_int rs1,u_int rs2,u_int rt)
2082{
2083 assem_debug("orrne %s,%s,%s",regname[rt],regname[rs1],regname[rs2]);
2084 output_w32(0x11800000|rd_rn_rm(rt,rs1,rs2));
2085}
2086
2087static void emit_bic_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2088{
2089 assem_debug("bic %s,%s,%s lsl %s",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2090 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2091}
2092
2093static void emit_biceq_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2094{
2095 assem_debug("biceq %s,%s,%s lsl %s",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2096 output_w32(0x01C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2097}
2098
2099static void emit_bicne_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2100{
2101 assem_debug("bicne %s,%s,%s lsl %s",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2102 output_w32(0x11C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2103}
2104
2105static void emit_bic_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2106{
2107 assem_debug("bic %s,%s,%s lsr %s",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2108 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2109}
2110
2111static void emit_biceq_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2112{
2113 assem_debug("biceq %s,%s,%s lsr %s",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2114 output_w32(0x01C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2115}
2116
2117static void emit_bicne_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2118{
2119 assem_debug("bicne %s,%s,%s lsr %s",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2120 output_w32(0x11C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2121}
2122
2123static void emit_teq(int rs, int rt)
2124{
2125 assem_debug("teq %s,%s",regname[rs],regname[rt]);
2126 output_w32(0xe1300000|rd_rn_rm(0,rs,rt));
2127}
2128
2129static void emit_rsbimm(int rs, int imm, int rt)
2130{
2131 u_int armval, ret;
2132 ret = genimm(imm,&armval);
2133 assert(ret);
2134 assem_debug("rsb %s,%s,#%d",regname[rt],regname[rs],imm);
2135 output_w32(0xe2600000|rd_rn_rm(rt,rs,0)|armval);
2136}
2137
2138// Load 2 immediates optimizing for small code size
2139static void emit_mov2imm_compact(int imm1,u_int rt1,int imm2,u_int rt2)
2140{
2141 emit_movimm(imm1,rt1);
2142 u_int armval;
2143 if(genimm(imm2-imm1,&armval)) {
2144 assem_debug("add %s,%s,#%d",regname[rt2],regname[rt1],imm2-imm1);
2145 output_w32(0xe2800000|rd_rn_rm(rt2,rt1,0)|armval);
2146 }else if(genimm(imm1-imm2,&armval)) {
2147 assem_debug("sub %s,%s,#%d",regname[rt2],regname[rt1],imm1-imm2);
2148 output_w32(0xe2400000|rd_rn_rm(rt2,rt1,0)|armval);
2149 }
2150 else emit_movimm(imm2,rt2);
2151}
2152
2153// Conditionally select one of two immediates, optimizing for small code size
2154// This will only be called if HAVE_CMOV_IMM is defined
2155static void emit_cmov2imm_e_ne_compact(int imm1,int imm2,u_int rt)
2156{
2157 u_int armval;
2158 if(genimm(imm2-imm1,&armval)) {
2159 emit_movimm(imm1,rt);
2160 assem_debug("addne %s,%s,#%d",regname[rt],regname[rt],imm2-imm1);
2161 output_w32(0x12800000|rd_rn_rm(rt,rt,0)|armval);
2162 }else if(genimm(imm1-imm2,&armval)) {
2163 emit_movimm(imm1,rt);
2164 assem_debug("subne %s,%s,#%d",regname[rt],regname[rt],imm1-imm2);
2165 output_w32(0x12400000|rd_rn_rm(rt,rt,0)|armval);
2166 }
2167 else {
2168 #ifdef ARMv5_ONLY
2169 emit_movimm(imm1,rt);
2170 add_literal((int)out,imm2);
2171 assem_debug("ldrne %s,pc+? [=%x]",regname[rt],imm2);
2172 output_w32(0x15900000|rd_rn_rm(rt,15,0));
2173 #else
2174 emit_movw(imm1&0x0000FFFF,rt);
2175 if((imm1&0xFFFF)!=(imm2&0xFFFF)) {
2176 assem_debug("movwne %s,#%d (0x%x)",regname[rt],imm2&0xFFFF,imm2&0xFFFF);
2177 output_w32(0x13000000|rd_rn_rm(rt,0,0)|(imm2&0xfff)|((imm2<<4)&0xf0000));
2178 }
2179 emit_movt(imm1&0xFFFF0000,rt);
2180 if((imm1&0xFFFF0000)!=(imm2&0xFFFF0000)) {
2181 assem_debug("movtne %s,#%d (0x%x)",regname[rt],imm2&0xffff0000,imm2&0xffff0000);
2182 output_w32(0x13400000|rd_rn_rm(rt,0,0)|((imm2>>16)&0xfff)|((imm2>>12)&0xf0000));
2183 }
2184 #endif
2185 }
2186}
2187
2188#if !defined(HOST_IMM8)
2189// special case for checking invalid_code
2190static void emit_cmpmem_indexedsr12_imm(int addr,int r,int imm)
2191{
2192 assert(0);
2193}
2194#endif
2195
2196// special case for checking invalid_code
2197static void emit_cmpmem_indexedsr12_reg(int base,int r,int imm)
2198{
2199 assert(imm<128&&imm>=0);
2200 assert(r>=0&&r<16);
2201 assem_debug("ldrb lr,%s,%s lsr #12",regname[base],regname[r]);
2202 output_w32(0xe7d00000|rd_rn_rm(HOST_TEMPREG,base,r)|0x620);
2203 emit_cmpimm(HOST_TEMPREG,imm);
2204}
2205
2206// special case for tlb mapping
2207static void emit_addsr12(int rs1,int rs2,int rt)
2208{
2209 assem_debug("add %s,%s,%s lsr #12",regname[rt],regname[rs1],regname[rs2]);
2210 output_w32(0xe0800620|rd_rn_rm(rt,rs1,rs2));
2211}
2212
2213static void emit_callne(int a)
2214{
2215 assem_debug("blne %x",a);
2216 u_int offset=genjmp(a);
2217 output_w32(0x1b000000|offset);
2218}
2219
2220#ifdef IMM_PREFETCH
2221// Used to preload hash table entries
2222static void emit_prefetch(void *addr)
2223{
2224 assem_debug("prefetch %x",(int)addr);
2225 output_byte(0x0F);
2226 output_byte(0x18);
2227 output_modrm(0,5,1);
2228 output_w32((int)addr);
2229}
2230#endif
2231
2232#ifdef REG_PREFETCH
2233static void emit_prefetchreg(int r)
2234{
2235 assem_debug("pld %s",regname[r]);
2236 output_w32(0xf5d0f000|rd_rn_rm(0,r,0));
2237}
2238#endif
2239
2240// Special case for mini_ht
2241static void emit_ldreq_indexed(int rs, u_int offset, int rt)
2242{
2243 assert(offset<4096);
2244 assem_debug("ldreq %s,[%s, #%d]",regname[rt],regname[rs],offset);
2245 output_w32(0x05900000|rd_rn_rm(rt,rs,0)|offset);
2246}
2247
2248static void emit_flds(int r,int sr)
2249{
2250 assem_debug("flds s%d,[%s]",sr,regname[r]);
2251 output_w32(0xed900a00|((sr&14)<<11)|((sr&1)<<22)|(r<<16));
2252}
2253
2254static void emit_vldr(int r,int vr)
2255{
2256 assem_debug("vldr d%d,[%s]",vr,regname[r]);
2257 output_w32(0xed900b00|(vr<<12)|(r<<16));
2258}
2259
2260static void emit_fsts(int sr,int r)
2261{
2262 assem_debug("fsts s%d,[%s]",sr,regname[r]);
2263 output_w32(0xed800a00|((sr&14)<<11)|((sr&1)<<22)|(r<<16));
2264}
2265
2266static void emit_vstr(int vr,int r)
2267{
2268 assem_debug("vstr d%d,[%s]",vr,regname[r]);
2269 output_w32(0xed800b00|(vr<<12)|(r<<16));
2270}
2271
2272static void emit_ftosizs(int s,int d)
2273{
2274 assem_debug("ftosizs s%d,s%d",d,s);
2275 output_w32(0xeebd0ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2276}
2277
2278static void emit_ftosizd(int s,int d)
2279{
2280 assem_debug("ftosizd s%d,d%d",d,s);
2281 output_w32(0xeebd0bc0|((d&14)<<11)|((d&1)<<22)|(s&7));
2282}
2283
2284static void emit_fsitos(int s,int d)
2285{
2286 assem_debug("fsitos s%d,s%d",d,s);
2287 output_w32(0xeeb80ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2288}
2289
2290static void emit_fsitod(int s,int d)
2291{
2292 assem_debug("fsitod d%d,s%d",d,s);
2293 output_w32(0xeeb80bc0|((d&7)<<12)|((s&14)>>1)|((s&1)<<5));
2294}
2295
2296static void emit_fcvtds(int s,int d)
2297{
2298 assem_debug("fcvtds d%d,s%d",d,s);
2299 output_w32(0xeeb70ac0|((d&7)<<12)|((s&14)>>1)|((s&1)<<5));
2300}
2301
2302static void emit_fcvtsd(int s,int d)
2303{
2304 assem_debug("fcvtsd s%d,d%d",d,s);
2305 output_w32(0xeeb70bc0|((d&14)<<11)|((d&1)<<22)|(s&7));
2306}
2307
2308static void emit_fsqrts(int s,int d)
2309{
2310 assem_debug("fsqrts d%d,s%d",d,s);
2311 output_w32(0xeeb10ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2312}
2313
2314static void emit_fsqrtd(int s,int d)
2315{
2316 assem_debug("fsqrtd s%d,d%d",d,s);
2317 output_w32(0xeeb10bc0|((d&7)<<12)|(s&7));
2318}
2319
2320static void emit_fabss(int s,int d)
2321{
2322 assem_debug("fabss d%d,s%d",d,s);
2323 output_w32(0xeeb00ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2324}
2325
2326static void emit_fabsd(int s,int d)
2327{
2328 assem_debug("fabsd s%d,d%d",d,s);
2329 output_w32(0xeeb00bc0|((d&7)<<12)|(s&7));
2330}
2331
2332static void emit_fnegs(int s,int d)
2333{
2334 assem_debug("fnegs d%d,s%d",d,s);
2335 output_w32(0xeeb10a40|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2336}
2337
2338static void emit_fnegd(int s,int d)
2339{
2340 assem_debug("fnegd s%d,d%d",d,s);
2341 output_w32(0xeeb10b40|((d&7)<<12)|(s&7));
2342}
2343
2344static void emit_fadds(int s1,int s2,int d)
2345{
2346 assem_debug("fadds s%d,s%d,s%d",d,s1,s2);
2347 output_w32(0xee300a00|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2348}
2349
2350static void emit_faddd(int s1,int s2,int d)
2351{
2352 assem_debug("faddd d%d,d%d,d%d",d,s1,s2);
2353 output_w32(0xee300b00|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2354}
2355
2356static void emit_fsubs(int s1,int s2,int d)
2357{
2358 assem_debug("fsubs s%d,s%d,s%d",d,s1,s2);
2359 output_w32(0xee300a40|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2360}
2361
2362static void emit_fsubd(int s1,int s2,int d)
2363{
2364 assem_debug("fsubd d%d,d%d,d%d",d,s1,s2);
2365 output_w32(0xee300b40|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2366}
2367
2368static void emit_fmuls(int s1,int s2,int d)
2369{
2370 assem_debug("fmuls s%d,s%d,s%d",d,s1,s2);
2371 output_w32(0xee200a00|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2372}
2373
2374static void emit_fmuld(int s1,int s2,int d)
2375{
2376 assem_debug("fmuld d%d,d%d,d%d",d,s1,s2);
2377 output_w32(0xee200b00|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2378}
2379
2380static void emit_fdivs(int s1,int s2,int d)
2381{
2382 assem_debug("fdivs s%d,s%d,s%d",d,s1,s2);
2383 output_w32(0xee800a00|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2384}
2385
2386static void emit_fdivd(int s1,int s2,int d)
2387{
2388 assem_debug("fdivd d%d,d%d,d%d",d,s1,s2);
2389 output_w32(0xee800b00|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2390}
2391
2392static void emit_fcmps(int x,int y)
2393{
2394 assem_debug("fcmps s14, s15");
2395 output_w32(0xeeb47a67);
2396}
2397
2398static void emit_fcmpd(int x,int y)
2399{
2400 assem_debug("fcmpd d6, d7");
2401 output_w32(0xeeb46b47);
2402}
2403
2404static void emit_fmstat()
2405{
2406 assem_debug("fmstat");
2407 output_w32(0xeef1fa10);
2408}
2409
2410static void emit_bicne_imm(int rs,int imm,int rt)
2411{
2412 u_int armval, ret;
2413 ret = genimm(imm,&armval);
2414 assert(ret);
2415 assem_debug("bicne %s,%s,#%d",regname[rt],regname[rs],imm);
2416 output_w32(0x13c00000|rd_rn_rm(rt,rs,0)|armval);
2417}
2418
2419static void emit_biccs_imm(int rs,int imm,int rt)
2420{
2421 u_int armval, ret;
2422 ret = genimm(imm,&armval);
2423 assert(ret);
2424 assem_debug("biccs %s,%s,#%d",regname[rt],regname[rs],imm);
2425 output_w32(0x23c00000|rd_rn_rm(rt,rs,0)|armval);
2426}
2427
2428static void emit_bicvc_imm(int rs,int imm,int rt)
2429{
2430 u_int armval, ret;
2431 ret = genimm(imm,&armval);
2432 assert(ret);
2433 assem_debug("bicvc %s,%s,#%d",regname[rt],regname[rs],imm);
2434 output_w32(0x73c00000|rd_rn_rm(rt,rs,0)|armval);
2435}
2436
2437static void emit_bichi_imm(int rs,int imm,int rt)
2438{
2439 u_int armval, ret;
2440 ret = genimm(imm,&armval);
2441 assert(ret);
2442 assem_debug("bichi %s,%s,#%d",regname[rt],regname[rs],imm);
2443 output_w32(0x83c00000|rd_rn_rm(rt,rs,0)|armval);
2444}
2445
2446static void emit_orrvs_imm(int rs,int imm,int rt)
2447{
2448 u_int armval, ret;
2449 ret = genimm(imm,&armval);
2450 assert(ret);
2451 assem_debug("orrvs %s,%s,#%d",regname[rt],regname[rs],imm);
2452 output_w32(0x63800000|rd_rn_rm(rt,rs,0)|armval);
2453}
2454
2455static void emit_jno_unlikely(int a)
2456{
2457 //emit_jno(a);
2458 assem_debug("addvc pc,pc,#? (%x)",/*a-(int)out-8,*/a);
2459 output_w32(0x72800000|rd_rn_rm(15,15,0));
2460}
2461
2462// Save registers before function call
2463static void save_regs(u_int reglist)
2464{
2465 reglist&=0x100f; // only save the caller-save registers, r0-r3, r12
2466 if(!reglist) return;
2467 assem_debug("stmia fp,{");
2468 if(reglist&1) assem_debug("r0, ");
2469 if(reglist&2) assem_debug("r1, ");
2470 if(reglist&4) assem_debug("r2, ");
2471 if(reglist&8) assem_debug("r3, ");
2472 if(reglist&0x1000) assem_debug("r12");
2473 assem_debug("}");
2474 output_w32(0xe88b0000|reglist);
2475}
2476// Restore registers after function call
2477static void restore_regs(u_int reglist)
2478{
2479 reglist&=0x100f; // only restore the caller-save registers, r0-r3, r12
2480 if(!reglist) return;
2481 assem_debug("ldmia fp,{");
2482 if(reglist&1) assem_debug("r0, ");
2483 if(reglist&2) assem_debug("r1, ");
2484 if(reglist&4) assem_debug("r2, ");
2485 if(reglist&8) assem_debug("r3, ");
2486 if(reglist&0x1000) assem_debug("r12");
2487 assem_debug("}");
2488 output_w32(0xe89b0000|reglist);
2489}
2490
2491// Write back consts using r14 so we don't disturb the other registers
2492static void wb_consts(signed char i_regmap[],uint64_t i_is32,u_int i_dirty,int i)
2493{
2494 int hr;
2495 for(hr=0;hr<HOST_REGS;hr++) {
2496 if(hr!=EXCLUDE_REG&&i_regmap[hr]>=0&&((i_dirty>>hr)&1)) {
2497 if(((regs[i].isconst>>hr)&1)&&i_regmap[hr]>0) {
2498 if(i_regmap[hr]<64 || !((i_is32>>(i_regmap[hr]&63))&1) ) {
2499 int value=constmap[i][hr];
2500 if(value==0) {
2501 emit_zeroreg(HOST_TEMPREG);
2502 }
2503 else {
2504 emit_movimm(value,HOST_TEMPREG);
2505 }
2506 emit_storereg(i_regmap[hr],HOST_TEMPREG);
2507 if((i_is32>>i_regmap[hr])&1) {
2508 if(value!=-1&&value!=0) emit_sarimm(HOST_TEMPREG,31,HOST_TEMPREG);
2509 emit_storereg(i_regmap[hr]|64,HOST_TEMPREG);
2510 }
2511 }
2512 }
2513 }
2514 }
2515}
2516
2517/* Stubs/epilogue */
2518
2519static void literal_pool(int n)
2520{
2521 if(!literalcount) return;
2522 if(n) {
2523 if((int)out-literals[0][0]<4096-n) return;
2524 }
2525 u_int *ptr;
2526 int i;
2527 for(i=0;i<literalcount;i++)
2528 {
2529 ptr=(u_int *)literals[i][0];
2530 u_int offset=(u_int)out-(u_int)ptr-8;
2531 assert(offset<4096);
2532 assert(!(offset&3));
2533 *ptr|=offset;
2534 output_w32(literals[i][1]);
2535 }
2536 literalcount=0;
2537}
2538
2539static void literal_pool_jumpover(int n)
2540{
2541 if(!literalcount) return;
2542 if(n) {
2543 if((int)out-literals[0][0]<4096-n) return;
2544 }
2545 int jaddr=(int)out;
2546 emit_jmp(0);
2547 literal_pool(0);
2548 set_jump_target(jaddr,(int)out);
2549}
2550
2551static void emit_extjump2(int addr, int target, int linker)
2552{
2553 u_char *ptr=(u_char *)addr;
2554 assert((ptr[3]&0x0e)==0xa);
2555 emit_loadlp(target,0);
2556 emit_loadlp(addr,1);
2557 //assert(addr>=0x7000000&&addr<0x7FFFFFF);
2558 //assert((target>=0x80000000&&target<0x80800000)||(target>0xA4000000&&target<0xA4001000));
2559//DEBUG >
2560#ifdef DEBUG_CYCLE_COUNT
2561 emit_readword((int)&last_count,ECX);
2562 emit_add(HOST_CCREG,ECX,HOST_CCREG);
2563 emit_readword((int)&next_interupt,ECX);
2564 emit_writeword(HOST_CCREG,(int)&Count);
2565 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
2566 emit_writeword(ECX,(int)&last_count);
2567#endif
2568//DEBUG <
2569 emit_jmp(linker);
2570}
2571
2572static void emit_extjump(int addr, int target)
2573{
2574 emit_extjump2(addr, target, (int)dyna_linker);
2575}
2576static void emit_extjump_ds(int addr, int target)
2577{
2578 emit_extjump2(addr, target, (int)dyna_linker_ds);
2579}
2580
2581static void do_readstub(int n)
2582{
2583 assem_debug("do_readstub %x",start+stubs[n][3]*4);
2584 literal_pool(256);
2585 set_jump_target(stubs[n][1],(int)out);
2586 int type=stubs[n][0];
2587 int i=stubs[n][3];
2588 int rs=stubs[n][4];
2589 struct regstat *i_regs=(struct regstat *)stubs[n][5];
2590 u_int reglist=stubs[n][7];
2591 signed char *i_regmap=i_regs->regmap;
2592 int addr=get_reg(i_regmap,AGEN1+(i&1));
2593 int rth,rt;
2594 int ds;
2595 if(itype[i]==C1LS||itype[i]==LOADLR) {
2596 rth=get_reg(i_regmap,FTEMP|64);
2597 rt=get_reg(i_regmap,FTEMP);
2598 }else{
2599 rth=get_reg(i_regmap,rt1[i]|64);
2600 rt=get_reg(i_regmap,rt1[i]);
2601 }
2602 assert(rs>=0);
2603 if(addr<0) addr=rt;
2604 if(addr<0&&itype[i]!=C1LS&&itype[i]!=LOADLR) addr=get_reg(i_regmap,-1);
2605 assert(addr>=0);
2606 int ftable=0;
2607 if(type==LOADB_STUB||type==LOADBU_STUB)
2608 ftable=(int)readmemb;
2609 if(type==LOADH_STUB||type==LOADHU_STUB)
2610 ftable=(int)readmemh;
2611 if(type==LOADW_STUB)
2612 ftable=(int)readmem;
2613 if(type==LOADD_STUB)
2614 ftable=(int)readmemd;
2615 emit_writeword(rs,(int)&address);
2616 //emit_pusha();
2617 save_regs(reglist);
2618 ds=i_regs!=&regs[i];
2619 int real_rs=(itype[i]==LOADLR)?-1:get_reg(i_regmap,rs1[i]);
2620 u_int cmask=ds?-1:(0x100f|~i_regs->wasconst);
2621 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&0x100f,i);
2622 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty&cmask&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs)));
2623 if(!ds) wb_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&~0x100f,i);
2624 emit_shrimm(rs,16,1);
2625 int cc=get_reg(i_regmap,CCREG);
2626 if(cc<0) {
2627 emit_loadreg(CCREG,2);
2628 }
2629 emit_movimm(ftable,0);
2630 emit_addimm(cc<0?2:cc,2*stubs[n][6]+2,2);
2631 emit_movimm(start+stubs[n][3]*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
2632 //emit_readword((int)&last_count,temp);
2633 //emit_add(cc,temp,cc);
2634 //emit_writeword(cc,(int)&Count);
2635 //emit_mov(15,14);
2636 emit_call((int)&indirect_jump_indexed);
2637 //emit_callreg(rs);
2638 //emit_readword_dualindexedx4(rs,HOST_TEMPREG,15);
2639 // We really shouldn't need to update the count here,
2640 // but not doing so causes random crashes...
2641 emit_readword((int)&Count,HOST_TEMPREG);
2642 emit_readword((int)&next_interupt,2);
2643 emit_addimm(HOST_TEMPREG,-2*stubs[n][6]-2,HOST_TEMPREG);
2644 emit_writeword(2,(int)&last_count);
2645 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2646 if(cc<0) {
2647 emit_storereg(CCREG,HOST_TEMPREG);
2648 }
2649 //emit_popa();
2650 restore_regs(reglist);
2651 //if((cc=get_reg(regmap,CCREG))>=0) {
2652 // emit_loadreg(CCREG,cc);
2653 //}
2654 if(rt>=0) {
2655 if(type==LOADB_STUB)
2656 emit_movsbl((int)&readmem_dword,rt);
2657 if(type==LOADBU_STUB)
2658 emit_movzbl((int)&readmem_dword,rt);
2659 if(type==LOADH_STUB)
2660 emit_movswl((int)&readmem_dword,rt);
2661 if(type==LOADHU_STUB)
2662 emit_movzwl((int)&readmem_dword,rt);
2663 if(type==LOADW_STUB)
2664 emit_readword((int)&readmem_dword,rt);
2665 if(type==LOADD_STUB) {
2666 emit_readword((int)&readmem_dword,rt);
2667 if(rth>=0) emit_readword(((int)&readmem_dword)+4,rth);
2668 }
2669 }
2670 emit_jmp(stubs[n][2]); // return address
2671}
2672
2673static void inline_readstub(int type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
2674{
2675 int rs=get_reg(regmap,target);
2676 int rth=get_reg(regmap,target|64);
2677 int rt=get_reg(regmap,target);
2678 if(rs<0) rs=get_reg(regmap,-1);
2679 assert(rs>=0);
2680 int ftable=0;
2681 if(type==LOADB_STUB||type==LOADBU_STUB)
2682 ftable=(int)readmemb;
2683 if(type==LOADH_STUB||type==LOADHU_STUB)
2684 ftable=(int)readmemh;
2685 if(type==LOADW_STUB)
2686 ftable=(int)readmem;
2687 if(type==LOADD_STUB)
2688 ftable=(int)readmemd;
2689 emit_writeword(rs,(int)&address);
2690 //emit_pusha();
2691 save_regs(reglist);
2692 if((signed int)addr>=(signed int)0xC0000000) {
2693 // Theoretically we can have a pagefault here, if the TLB has never
2694 // been enabled and the address is outside the range 80000000..BFFFFFFF
2695 // Write out the registers so the pagefault can be handled. This is
2696 // a very rare case and likely represents a bug.
2697 int ds=regmap!=regs[i].regmap;
2698 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty,i);
2699 if(!ds) wb_dirtys(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty);
2700 else wb_dirtys(branch_regs[i-1].regmap_entry,branch_regs[i-1].was32,branch_regs[i-1].wasdirty);
2701 }
2702 //emit_shrimm(rs,16,1);
2703 int cc=get_reg(regmap,CCREG);
2704 if(cc<0) {
2705 emit_loadreg(CCREG,2);
2706 }
2707 //emit_movimm(ftable,0);
2708 emit_movimm(((u_int *)ftable)[addr>>16],0);
2709 //emit_readword((int)&last_count,12);
2710 emit_addimm(cc<0?2:cc,CLOCK_DIVIDER*(adj+1),2);
2711 if((signed int)addr>=(signed int)0xC0000000) {
2712 // Pagefault address
2713 int ds=regmap!=regs[i].regmap;
2714 emit_movimm(start+i*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
2715 }
2716 //emit_add(12,2,2);
2717 //emit_writeword(2,(int)&Count);
2718 //emit_call(((u_int *)ftable)[addr>>16]);
2719 emit_call((int)&indirect_jump);
2720 // We really shouldn't need to update the count here,
2721 // but not doing so causes random crashes...
2722 emit_readword((int)&Count,HOST_TEMPREG);
2723 emit_readword((int)&next_interupt,2);
2724 emit_addimm(HOST_TEMPREG,-CLOCK_DIVIDER*(adj+1),HOST_TEMPREG);
2725 emit_writeword(2,(int)&last_count);
2726 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2727 if(cc<0) {
2728 emit_storereg(CCREG,HOST_TEMPREG);
2729 }
2730 //emit_popa();
2731 restore_regs(reglist);
2732 if(rt>=0) {
2733 if(type==LOADB_STUB)
2734 emit_movsbl((int)&readmem_dword,rt);
2735 if(type==LOADBU_STUB)
2736 emit_movzbl((int)&readmem_dword,rt);
2737 if(type==LOADH_STUB)
2738 emit_movswl((int)&readmem_dword,rt);
2739 if(type==LOADHU_STUB)
2740 emit_movzwl((int)&readmem_dword,rt);
2741 if(type==LOADW_STUB)
2742 emit_readword((int)&readmem_dword,rt);
2743 if(type==LOADD_STUB) {
2744 emit_readword((int)&readmem_dword,rt);
2745 if(rth>=0) emit_readword(((int)&readmem_dword)+4,rth);
2746 }
2747 }
2748}
2749
2750static void do_writestub(int n)
2751{
2752 assem_debug("do_writestub %x",start+stubs[n][3]*4);
2753 literal_pool(256);
2754 set_jump_target(stubs[n][1],(int)out);
2755 int type=stubs[n][0];
2756 int i=stubs[n][3];
2757 int rs=stubs[n][4];
2758 struct regstat *i_regs=(struct regstat *)stubs[n][5];
2759 u_int reglist=stubs[n][7];
2760 signed char *i_regmap=i_regs->regmap;
2761 int addr=get_reg(i_regmap,AGEN1+(i&1));
2762 int rth,rt,r;
2763 int ds;
2764 if(itype[i]==C1LS) {
2765 rth=get_reg(i_regmap,FTEMP|64);
2766 rt=get_reg(i_regmap,r=FTEMP);
2767 }else{
2768 rth=get_reg(i_regmap,rs2[i]|64);
2769 rt=get_reg(i_regmap,r=rs2[i]);
2770 }
2771 assert(rs>=0);
2772 assert(rt>=0);
2773 if(addr<0) addr=get_reg(i_regmap,-1);
2774 assert(addr>=0);
2775 int ftable=0;
2776 if(type==STOREB_STUB)
2777 ftable=(int)writememb;
2778 if(type==STOREH_STUB)
2779 ftable=(int)writememh;
2780 if(type==STOREW_STUB)
2781 ftable=(int)writemem;
2782 if(type==STORED_STUB)
2783 ftable=(int)writememd;
2784 emit_writeword(rs,(int)&address);
2785 //emit_shrimm(rs,16,rs);
2786 //emit_movmem_indexedx4(ftable,rs,rs);
2787 if(type==STOREB_STUB)
2788 emit_writebyte(rt,(int)&cpu_byte);
2789 if(type==STOREH_STUB)
2790 emit_writehword(rt,(int)&hword);
2791 if(type==STOREW_STUB)
2792 emit_writeword(rt,(int)&word);
2793 if(type==STORED_STUB) {
2794 emit_writeword(rt,(int)&dword);
2795 emit_writeword(r?rth:rt,(int)&dword+4);
2796 }
2797 //emit_pusha();
2798 save_regs(reglist);
2799 ds=i_regs!=&regs[i];
2800 int real_rs=get_reg(i_regmap,rs1[i]);
2801 u_int cmask=ds?-1:(0x100f|~i_regs->wasconst);
2802 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&0x100f,i);
2803 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty&cmask&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs)));
2804 if(!ds) wb_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&~0x100f,i);
2805 emit_shrimm(rs,16,1);
2806 int cc=get_reg(i_regmap,CCREG);
2807 if(cc<0) {
2808 emit_loadreg(CCREG,2);
2809 }
2810 emit_movimm(ftable,0);
2811 emit_addimm(cc<0?2:cc,2*stubs[n][6]+2,2);
2812 emit_movimm(start+stubs[n][3]*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
2813 //emit_readword((int)&last_count,temp);
2814 //emit_addimm(cc,2*stubs[n][5]+2,cc);
2815 //emit_add(cc,temp,cc);
2816 //emit_writeword(cc,(int)&Count);
2817 emit_call((int)&indirect_jump_indexed);
2818 //emit_callreg(rs);
2819 emit_readword((int)&Count,HOST_TEMPREG);
2820 emit_readword((int)&next_interupt,2);
2821 emit_addimm(HOST_TEMPREG,-2*stubs[n][6]-2,HOST_TEMPREG);
2822 emit_writeword(2,(int)&last_count);
2823 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2824 if(cc<0) {
2825 emit_storereg(CCREG,HOST_TEMPREG);
2826 }
2827 //emit_popa();
2828 restore_regs(reglist);
2829 //if((cc=get_reg(regmap,CCREG))>=0) {
2830 // emit_loadreg(CCREG,cc);
2831 //}
2832 emit_jmp(stubs[n][2]); // return address
2833}
2834
2835static void inline_writestub(int type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
2836{
2837 int rs=get_reg(regmap,-1);
2838 int rth=get_reg(regmap,target|64);
2839 int rt=get_reg(regmap,target);
2840 assert(rs>=0);
2841 assert(rt>=0);
2842 int ftable=0;
2843 if(type==STOREB_STUB)
2844 ftable=(int)writememb;
2845 if(type==STOREH_STUB)
2846 ftable=(int)writememh;
2847 if(type==STOREW_STUB)
2848 ftable=(int)writemem;
2849 if(type==STORED_STUB)
2850 ftable=(int)writememd;
2851 emit_writeword(rs,(int)&address);
2852 //emit_shrimm(rs,16,rs);
2853 //emit_movmem_indexedx4(ftable,rs,rs);
2854 if(type==STOREB_STUB)
2855 emit_writebyte(rt,(int)&cpu_byte);
2856 if(type==STOREH_STUB)
2857 emit_writehword(rt,(int)&hword);
2858 if(type==STOREW_STUB)
2859 emit_writeword(rt,(int)&word);
2860 if(type==STORED_STUB) {
2861 emit_writeword(rt,(int)&dword);
2862 emit_writeword(target?rth:rt,(int)&dword+4);
2863 }
2864 //emit_pusha();
2865 save_regs(reglist);
2866 if((signed int)addr>=(signed int)0xC0000000) {
2867 // Theoretically we can have a pagefault here, if the TLB has never
2868 // been enabled and the address is outside the range 80000000..BFFFFFFF
2869 // Write out the registers so the pagefault can be handled. This is
2870 // a very rare case and likely represents a bug.
2871 int ds=regmap!=regs[i].regmap;
2872 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty,i);
2873 if(!ds) wb_dirtys(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty);
2874 else wb_dirtys(branch_regs[i-1].regmap_entry,branch_regs[i-1].was32,branch_regs[i-1].wasdirty);
2875 }
2876 //emit_shrimm(rs,16,1);
2877 int cc=get_reg(regmap,CCREG);
2878 if(cc<0) {
2879 emit_loadreg(CCREG,2);
2880 }
2881 //emit_movimm(ftable,0);
2882 emit_movimm(((u_int *)ftable)[addr>>16],0);
2883 //emit_readword((int)&last_count,12);
2884 emit_addimm(cc<0?2:cc,CLOCK_DIVIDER*(adj+1),2);
2885 if((signed int)addr>=(signed int)0xC0000000) {
2886 // Pagefault address
2887 int ds=regmap!=regs[i].regmap;
2888 emit_movimm(start+i*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
2889 }
2890 //emit_add(12,2,2);
2891 //emit_writeword(2,(int)&Count);
2892 //emit_call(((u_int *)ftable)[addr>>16]);
2893 emit_call((int)&indirect_jump);
2894 emit_readword((int)&Count,HOST_TEMPREG);
2895 emit_readword((int)&next_interupt,2);
2896 emit_addimm(HOST_TEMPREG,-CLOCK_DIVIDER*(adj+1),HOST_TEMPREG);
2897 emit_writeword(2,(int)&last_count);
2898 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2899 if(cc<0) {
2900 emit_storereg(CCREG,HOST_TEMPREG);
2901 }
2902 //emit_popa();
2903 restore_regs(reglist);
2904}
2905
2906static void do_unalignedwritestub(int n)
2907{
2908 set_jump_target(stubs[n][1],(int)out);
2909 output_w32(0xef000000);
2910 emit_jmp(stubs[n][2]); // return address
2911}
2912
2913void printregs(int edi,int esi,int ebp,int esp,int b,int d,int c,int a)
2914{
2915 DebugMessage(M64MSG_VERBOSE, "regs: %x %x %x %x %x %x %x (%x)",a,b,c,d,ebp,esi,edi,(&edi)[-1]);
2916}
2917
2918static void do_invstub(int n)
2919{
2920 literal_pool(20);
2921 u_int reglist=stubs[n][3];
2922 set_jump_target(stubs[n][1],(int)out);
2923 save_regs(reglist);
2924 if(stubs[n][4]!=0) emit_mov(stubs[n][4],0);
2925 emit_call((int)&invalidate_addr);
2926 restore_regs(reglist);
2927 emit_jmp(stubs[n][2]); // return address
2928}
2929
2930static int do_dirty_stub(int i)
2931{
2932 assem_debug("do_dirty_stub %x",start+i*4);
2933 // Careful about the code output here, verify_dirty needs to parse it.
2934 #ifdef ARMv5_ONLY
2935 emit_loadlp((int)start<(int)0xC0000000?(int)source:(int)start,1);
2936 emit_loadlp((int)copy,2);
2937 emit_loadlp(slen*4,3);
2938 #else
2939 emit_movw(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0x0000FFFF,1);
2940 emit_movw(((u_int)copy)&0x0000FFFF,2);
2941 emit_movt(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0xFFFF0000,1);
2942 emit_movt(((u_int)copy)&0xFFFF0000,2);
2943 emit_movw(slen*4,3);
2944 #endif
2945 emit_movimm(start+i*4,0);
2946 emit_call((int)start<(int)0xC0000000?(int)&verify_code:(int)&verify_code_vm);
2947 int entry=(int)out;
2948 load_regs_entry(i);
2949 if(entry==(int)out) entry=instr_addr[i];
2950 emit_jmp(instr_addr[i]);
2951 return entry;
2952}
2953
2954static void do_dirty_stub_ds()
2955{
2956 // Careful about the code output here, verify_dirty needs to parse it.
2957 #ifdef ARMv5_ONLY
2958 emit_loadlp((int)start<(int)0xC0000000?(int)source:(int)start,1);
2959 emit_loadlp((int)copy,2);
2960 emit_loadlp(slen*4,3);
2961 #else
2962 emit_movw(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0x0000FFFF,1);
2963 emit_movw(((u_int)copy)&0x0000FFFF,2);
2964 emit_movt(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0xFFFF0000,1);
2965 emit_movt(((u_int)copy)&0xFFFF0000,2);
2966 emit_movw(slen*4,3);
2967 #endif
2968 emit_movimm(start+1,0);
2969 emit_call((int)&verify_code_ds);
2970}
2971
2972static void do_cop1stub(int n)
2973{
2974 literal_pool(256);
2975 assem_debug("do_cop1stub %x",start+stubs[n][3]*4);
2976 set_jump_target(stubs[n][1],(int)out);
2977 int i=stubs[n][3];
2978 int rs=stubs[n][4];
2979 struct regstat *i_regs=(struct regstat *)stubs[n][5];
2980 int ds=stubs[n][6];
2981 if(!ds) {
2982 load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty,i);
2983 //if(i_regs!=&regs[i]) DebugMessage(M64MSG_VERBOSE, "oops: regs[i]=%x i_regs=%x",(int)&regs[i],(int)i_regs);
2984 }
2985 //else {DebugMessage(M64MSG_ERROR, "fp exception in delay slot");}
2986 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty);
2987 if(regs[i].regmap_entry[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
2988 emit_movimm(start+(i-ds)*4,EAX); // Get PC
2989 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG); // CHECK: is this right? There should probably be an extra cycle...
2990 emit_jmp(ds?(int)fp_exception_ds:(int)fp_exception);
2991}
2992
2993/* TLB */
2994
2995static int do_tlb_r(int s,int ar,int map,int cache,int x,int a,int shift,int c,u_int addr)
2996{
2997 if(c) {
2998 if((signed int)addr>=(signed int)0xC0000000) {
2999 // address_generation already loaded the const
3000 emit_readword_dualindexedx4(FP,map,map);
3001 }
3002 else
3003 return -1; // No mapping
3004 }
3005 else {
3006 assert(s!=map);
3007 if(cache>=0) {
3008 // Use cached offset to memory map
3009 emit_addsr12(cache,s,map);
3010 }else{
3011 emit_movimm(((int)memory_map-(int)&dynarec_local)>>2,map);
3012 emit_addsr12(map,s,map);
3013 }
3014 // Schedule this while we wait on the load
3015 //if(x) emit_xorimm(s,x,ar);
3016 if(shift>=0) emit_shlimm(s,3,shift);
3017 if(~a) emit_andimm(s,a,ar);
3018 emit_readword_dualindexedx4(FP,map,map);
3019 }
3020 return map;
3021}
3022static int do_tlb_r_branch(int map, int c, u_int addr, int *jaddr)
3023{
3024 if(!c||(signed int)addr>=(signed int)0xC0000000) {
3025 emit_test(map,map);
3026 *jaddr=(int)out;
3027 emit_js(0);
3028 }
3029 return map;
3030}
3031
3032static void gen_tlb_addr_r(int ar, int map) {
3033 if(map>=0) {
3034 assem_debug("add %s,%s,%s lsl #2",regname[ar],regname[ar],regname[map]);
3035 output_w32(0xe0800100|rd_rn_rm(ar,ar,map));
3036 }
3037}
3038
3039static int do_tlb_w(int s,int ar,int map,int cache,int x,int c,u_int addr)
3040{
3041 if(c) {
3042 if(addr<0x80800000||addr>=0xC0000000) {
3043 // address_generation already loaded the const
3044 emit_readword_dualindexedx4(FP,map,map);
3045 }
3046 else
3047 return -1; // No mapping
3048 }
3049 else {
3050 assert(s!=map);
3051 if(cache>=0) {
3052 // Use cached offset to memory map
3053 emit_addsr12(cache,s,map);
3054 }else{
3055 emit_movimm(((int)memory_map-(int)&dynarec_local)>>2,map);
3056 emit_addsr12(map,s,map);
3057 }
3058 // Schedule this while we wait on the load
3059 //if(x) emit_xorimm(s,x,ar);
3060 emit_readword_dualindexedx4(FP,map,map);
3061 }
3062 return map;
3063}
3064static void do_tlb_w_branch(int map, int c, u_int addr, int *jaddr)
3065{
3066 if(!c||addr<0x80800000||addr>=0xC0000000) {
3067 emit_testimm(map,0x40000000);
3068 *jaddr=(int)out;
3069 emit_jne(0);
3070 }
3071}
3072
3073static void gen_tlb_addr_w(int ar, int map) {
3074 if(map>=0) {
3075 assem_debug("add %s,%s,%s lsl #2",regname[ar],regname[ar],regname[map]);
3076 output_w32(0xe0800100|rd_rn_rm(ar,ar,map));
3077 }
3078}
3079
3080// This reverses the above operation
3081static void gen_orig_addr_w(int ar, int map) {
3082 if(map>=0) {
3083 assem_debug("sub %s,%s,%s lsl #2",regname[ar],regname[ar],regname[map]);
3084 output_w32(0xe0400100|rd_rn_rm(ar,ar,map));
3085 }
3086}
3087
3088// Generate the address of the memory_map entry, relative to dynarec_local
3089static void generate_map_const(u_int addr,int reg) {
3090 //DebugMessage(M64MSG_VERBOSE, "generate_map_const(%x,%s)",addr,regname[reg]);
3091 emit_movimm((addr>>12)+(((u_int)memory_map-(u_int)&dynarec_local)>>2),reg);
3092}
3093
3094/* Special assem */
3095
3096static void shift_assemble_arm(int i,struct regstat *i_regs)
3097{
3098 if(rt1[i]) {
3099 if(opcode2[i]<=0x07) // SLLV/SRLV/SRAV
3100 {
3101 signed char s,t,shift;
3102 t=get_reg(i_regs->regmap,rt1[i]);
3103 s=get_reg(i_regs->regmap,rs1[i]);
3104 shift=get_reg(i_regs->regmap,rs2[i]);
3105 if(t>=0){
3106 if(rs1[i]==0)
3107 {
3108 emit_zeroreg(t);
3109 }
3110 else if(rs2[i]==0)
3111 {
3112 assert(s>=0);
3113 if(s!=t) emit_mov(s,t);
3114 }
3115 else
3116 {
3117 emit_andimm(shift,31,HOST_TEMPREG);
3118 if(opcode2[i]==4) // SLLV
3119 {
3120 emit_shl(s,HOST_TEMPREG,t);
3121 }
3122 if(opcode2[i]==6) // SRLV
3123 {
3124 emit_shr(s,HOST_TEMPREG,t);
3125 }
3126 if(opcode2[i]==7) // SRAV
3127 {
3128 emit_sar(s,HOST_TEMPREG,t);
3129 }
3130 }
3131 }
3132 } else { // DSLLV/DSRLV/DSRAV
3133 signed char sh,sl,th,tl,shift;
3134 th=get_reg(i_regs->regmap,rt1[i]|64);
3135 tl=get_reg(i_regs->regmap,rt1[i]);
3136 sh=get_reg(i_regs->regmap,rs1[i]|64);
3137 sl=get_reg(i_regs->regmap,rs1[i]);
3138 shift=get_reg(i_regs->regmap,rs2[i]);
3139 if(tl>=0){
3140 if(rs1[i]==0)
3141 {
3142 emit_zeroreg(tl);
3143 if(th>=0) emit_zeroreg(th);
3144 }
3145 else if(rs2[i]==0)
3146 {
3147 assert(sl>=0);
3148 if(sl!=tl) emit_mov(sl,tl);
3149 if(th>=0&&sh!=th) emit_mov(sh,th);
3150 }
3151 else
3152 {
3153 // FIXME: What if shift==tl ?
3154 assert(shift!=tl);
3155 int temp=get_reg(i_regs->regmap,-1);
3156 int real_th=th;
3157 if(th<0&&opcode2[i]!=0x14) {th=temp;} // DSLLV doesn't need a temporary register
3158 assert(sl>=0);
3159 assert(sh>=0);
3160 emit_andimm(shift,31,HOST_TEMPREG);
3161 if(opcode2[i]==0x14) // DSLLV
3162 {
3163 if(th>=0) emit_shl(sh,HOST_TEMPREG,th);
3164 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3165 emit_orrshr(sl,HOST_TEMPREG,th);
3166 emit_andimm(shift,31,HOST_TEMPREG);
3167 emit_testimm(shift,32);
3168 emit_shl(sl,HOST_TEMPREG,tl);
3169 if(th>=0) emit_cmovne_reg(tl,th);
3170 emit_cmovne_imm(0,tl);
3171 }
3172 if(opcode2[i]==0x16) // DSRLV
3173 {
3174 assert(th>=0);
3175 emit_shr(sl,HOST_TEMPREG,tl);
3176 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3177 emit_orrshl(sh,HOST_TEMPREG,tl);
3178 emit_andimm(shift,31,HOST_TEMPREG);
3179 emit_testimm(shift,32);
3180 emit_shr(sh,HOST_TEMPREG,th);
3181 emit_cmovne_reg(th,tl);
3182 if(real_th>=0) emit_cmovne_imm(0,th);
3183 }
3184 if(opcode2[i]==0x17) // DSRAV
3185 {
3186 assert(th>=0);
3187 emit_shr(sl,HOST_TEMPREG,tl);
3188 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3189 if(real_th>=0) {
3190 assert(temp>=0);
3191 emit_sarimm(th,31,temp);
3192 }
3193 emit_orrshl(sh,HOST_TEMPREG,tl);
3194 emit_andimm(shift,31,HOST_TEMPREG);
3195 emit_testimm(shift,32);
3196 emit_sar(sh,HOST_TEMPREG,th);
3197 emit_cmovne_reg(th,tl);
3198 if(real_th>=0) emit_cmovne_reg(temp,th);
3199 }
3200 }
3201 }
3202 }
3203 }
3204}
3205#define shift_assemble shift_assemble_arm
3206
3207static void loadlr_assemble_arm(int i,struct regstat *i_regs)
3208{
3209 int s,th,tl,temp,temp2,addr,map=-1,cache=-1;
3210 int offset;
3211 int jaddr=0;
3212 int memtarget,c=0;
3213 u_int hr,reglist=0;
3214 th=get_reg(i_regs->regmap,rt1[i]|64);
3215 tl=get_reg(i_regs->regmap,rt1[i]);
3216 s=get_reg(i_regs->regmap,rs1[i]);
3217 temp=get_reg(i_regs->regmap,-1);
3218 temp2=get_reg(i_regs->regmap,FTEMP);
3219 addr=get_reg(i_regs->regmap,AGEN1+(i&1));
3220 assert(addr<0);
3221 offset=imm[i];
3222 for(hr=0;hr<HOST_REGS;hr++) {
3223 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
3224 }
3225 reglist|=1<<temp;
3226 if(offset||s<0||c) addr=temp2;
3227 else addr=s;
3228 if(s>=0) {
3229 c=(i_regs->wasconst>>s)&1;
3230 memtarget=((signed int)(constmap[i][s]+offset))<(signed int)0x80800000;
3231 if(using_tlb&&((signed int)(constmap[i][s]+offset))>=(signed int)0xC0000000) memtarget=1;
3232 }
3233 if(!using_tlb) {
3234 if(!c) {
3235 #ifdef RAM_OFFSET
3236 map=get_reg(i_regs->regmap,ROREG);
3237 if(map<0) emit_loadreg(ROREG,map=HOST_TEMPREG);
3238 #endif
3239 emit_shlimm(addr,3,temp);
3240 if (opcode[i]==0x22||opcode[i]==0x26) {
3241 emit_andimm(addr,0xFFFFFFFC,temp2); // LWL/LWR
3242 }else{
3243 emit_andimm(addr,0xFFFFFFF8,temp2); // LDL/LDR
3244 }
3245 emit_cmpimm(addr,0x800000);
3246 jaddr=(int)out;
3247 emit_jno(0);
3248 }
3249 else {
3250 if (opcode[i]==0x22||opcode[i]==0x26) {
3251 emit_movimm(((constmap[i][s]+offset)<<3)&24,temp); // LWL/LWR
3252 }else{
3253 emit_movimm(((constmap[i][s]+offset)<<3)&56,temp); // LDL/LDR
3254 }
3255 }
3256 }else{ // using tlb
3257 int a;
3258 if(c) {
3259 a=-1;
3260 }else if (opcode[i]==0x22||opcode[i]==0x26) {
3261 a=0xFFFFFFFC; // LWL/LWR
3262 }else{
3263 a=0xFFFFFFF8; // LDL/LDR
3264 }
3265 map=get_reg(i_regs->regmap,TLREG);
3266 cache=get_reg(i_regs->regmap,MMREG); // Get cached offset to memory_map
3267 assert(map>=0);
3268 reglist&=~(1<<map);
3269 map=do_tlb_r(addr,temp2,map,cache,0,a,c?-1:temp,c,constmap[i][s]+offset);
3270 if(c) {
3271 if (opcode[i]==0x22||opcode[i]==0x26) {
3272 emit_movimm(((constmap[i][s]+offset)<<3)&24,temp); // LWL/LWR
3273 }else{
3274 emit_movimm(((constmap[i][s]+offset)<<3)&56,temp); // LDL/LDR
3275 }
3276 }
3277 do_tlb_r_branch(map,c,constmap[i][s]+offset,&jaddr);
3278 }
3279 if (opcode[i]==0x22||opcode[i]==0x26) { // LWL/LWR
3280 if(!c||memtarget) {
3281 //emit_readword_indexed((int)rdram-0x80000000,temp2,temp2);
3282 emit_readword_indexed_tlb(0,temp2,map,temp2);
3283 if(jaddr) add_stub(LOADW_STUB,jaddr,(int)out,i,temp2,(int)i_regs,ccadj[i],reglist);
3284 }
3285 else
3286 inline_readstub(LOADW_STUB,i,(constmap[i][s]+offset)&0xFFFFFFFC,i_regs->regmap,FTEMP,ccadj[i],reglist);
3287 if(rt1[i]) {
3288 assert(tl>=0);
3289 emit_andimm(temp,24,temp);
3290 if (opcode[i]==0x26) emit_xorimm(temp,24,temp); // LWR
3291 emit_movimm(-1,HOST_TEMPREG);
3292 if (opcode[i]==0x26) {
3293 emit_shr(temp2,temp,temp2);
3294 emit_bic_lsr(tl,HOST_TEMPREG,temp,tl);
3295 }else{
3296 emit_shl(temp2,temp,temp2);
3297 emit_bic_lsl(tl,HOST_TEMPREG,temp,tl);
3298 }
3299 emit_or(temp2,tl,tl);
3300 }
3301 //emit_storereg(rt1[i],tl); // DEBUG
3302 }
3303 if (opcode[i]==0x1A||opcode[i]==0x1B) { // LDL/LDR
3304 int temp2h=get_reg(i_regs->regmap,FTEMP|64);
3305 if(!c||memtarget) {
3306 //if(th>=0) emit_readword_indexed((int)rdram-0x80000000,temp2,temp2h);
3307 //emit_readword_indexed((int)rdram-0x7FFFFFFC,temp2,temp2);
3308 emit_readdword_indexed_tlb(0,temp2,map,temp2h,temp2);
3309 if(jaddr) add_stub(LOADD_STUB,jaddr,(int)out,i,temp2,(int)i_regs,ccadj[i],reglist);
3310 }
3311 else
3312 inline_readstub(LOADD_STUB,i,(constmap[i][s]+offset)&0xFFFFFFF8,i_regs->regmap,FTEMP,ccadj[i],reglist);
3313 if(rt1[i]) {
3314 assert(th>=0);
3315 assert(tl>=0);
3316 emit_testimm(temp,32);
3317 emit_andimm(temp,24,temp);
3318 if (opcode[i]==0x1A) { // LDL
3319 emit_rsbimm(temp,32,HOST_TEMPREG);
3320 emit_shl(temp2h,temp,temp2h);
3321 emit_orrshr(temp2,HOST_TEMPREG,temp2h);
3322 emit_movimm(-1,HOST_TEMPREG);
3323 emit_shl(temp2,temp,temp2);
3324 emit_cmove_reg(temp2h,th);
3325 emit_biceq_lsl(tl,HOST_TEMPREG,temp,tl);
3326 emit_bicne_lsl(th,HOST_TEMPREG,temp,th);
3327 emit_orreq(temp2,tl,tl);
3328 emit_orrne(temp2,th,th);
3329 }
3330 if (opcode[i]==0x1B) { // LDR
3331 emit_xorimm(temp,24,temp);
3332 emit_rsbimm(temp,32,HOST_TEMPREG);
3333 emit_shr(temp2,temp,temp2);
3334 emit_orrshl(temp2h,HOST_TEMPREG,temp2);
3335 emit_movimm(-1,HOST_TEMPREG);
3336 emit_shr(temp2h,temp,temp2h);
3337 emit_cmovne_reg(temp2,tl);
3338 emit_bicne_lsr(th,HOST_TEMPREG,temp,th);
3339 emit_biceq_lsr(tl,HOST_TEMPREG,temp,tl);
3340 emit_orrne(temp2h,th,th);
3341 emit_orreq(temp2h,tl,tl);
3342 }
3343 }
3344 }
3345}
3346#define loadlr_assemble loadlr_assemble_arm
3347
3348static void cop0_assemble(int i,struct regstat *i_regs)
3349{
3350 if(opcode2[i]==0) // MFC0
3351 {
3352 if(rt1[i]) {
3353 signed char t=get_reg(i_regs->regmap,rt1[i]);
3354 char copr=(source[i]>>11)&0x1f;
3355 if(t>=0) {
3356 emit_addimm(FP,(int)&fake_pc-(int)&dynarec_local,0);
3357 emit_movimm((source[i]>>11)&0x1f,1);
3358 emit_writeword(0,(int)&PC);
3359 emit_writebyte(1,(int)&(fake_pc.f.r.nrd));
3360 if(copr==9) {
3361 emit_readword((int)&last_count,ECX);
3362 emit_loadreg(CCREG,HOST_CCREG); // TODO: do proper reg alloc
3363 emit_add(HOST_CCREG,ECX,HOST_CCREG);
3364 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG);
3365 emit_writeword(HOST_CCREG,(int)&Count);
3366 }
3367 emit_call((int)cached_interpreter_table.MFC0);
3368 emit_readword((int)&readmem_dword,t);
3369 }
3370 }
3371 }
3372 else if(opcode2[i]==4) // MTC0
3373 {
3374 signed char s=get_reg(i_regs->regmap,rs1[i]);
3375 char copr=(source[i]>>11)&0x1f;
3376 assert(s>=0);
3377 emit_writeword(s,(int)&readmem_dword);
3378 wb_register(rs1[i],i_regs->regmap,i_regs->dirty,i_regs->is32);
3379 emit_addimm(FP,(int)&fake_pc-(int)&dynarec_local,0);
3380 emit_movimm((source[i]>>11)&0x1f,1);
3381 emit_writeword(0,(int)&PC);
3382 emit_writebyte(1,(int)&(fake_pc.f.r.nrd));
3383 if(copr==9||copr==11||copr==12) {
3384 emit_readword((int)&last_count,ECX);
3385 emit_loadreg(CCREG,HOST_CCREG); // TODO: do proper reg alloc
3386 emit_add(HOST_CCREG,ECX,HOST_CCREG);
3387 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG);
3388 emit_writeword(HOST_CCREG,(int)&Count);
3389 }
3390 // What a mess. The status register (12) can enable interrupts,
3391 // so needs a special case to handle a pending interrupt.
3392 // The interrupt must be taken immediately, because a subsequent
3393 // instruction might disable interrupts again.
3394 if(copr==12&&!is_delayslot) {
3395 emit_movimm(start+i*4+4,0);
3396 emit_movimm(0,1);
3397 emit_writeword(0,(int)&pcaddr);
3398 emit_writeword(1,(int)&pending_exception);
3399 }
3400 //else if(copr==12&&is_delayslot) emit_call((int)MTC0_R12);
3401 //else
3402 emit_call((int)cached_interpreter_table.MTC0);
3403 if(copr==9||copr==11||copr==12) {
3404 emit_readword((int)&Count,HOST_CCREG);
3405 emit_readword((int)&next_interupt,ECX);
3406 emit_addimm(HOST_CCREG,-CLOCK_DIVIDER*ccadj[i],HOST_CCREG);
3407 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
3408 emit_writeword(ECX,(int)&last_count);
3409 emit_storereg(CCREG,HOST_CCREG);
3410 }
3411 if(copr==12) {
3412 assert(!is_delayslot);
3413 emit_readword((int)&pending_exception,14);
3414 }
3415 emit_loadreg(rs1[i],s);
3416 if(get_reg(i_regs->regmap,rs1[i]|64)>=0)
3417 emit_loadreg(rs1[i]|64,get_reg(i_regs->regmap,rs1[i]|64));
3418 if(copr==12) {
3419 emit_test(14,14);
3420 emit_jne((int)&do_interrupt);
3421 }
3422 cop1_usable=0;
3423 }
3424 else
3425 {
3426 assert(opcode2[i]==0x10);
3427 if((source[i]&0x3f)==0x01) // TLBR
3428 emit_call((int)cached_interpreter_table.TLBR);
3429 if((source[i]&0x3f)==0x02) // TLBWI
3430 emit_call((int)TLBWI_new);
3431 if((source[i]&0x3f)==0x06) { // TLBWR
3432 // The TLB entry written by TLBWR is dependent on the count,
3433 // so update the cycle count
3434 emit_readword((int)&last_count,ECX);
3435 if(i_regs->regmap[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
3436 emit_add(HOST_CCREG,ECX,HOST_CCREG);
3437 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG);
3438 emit_writeword(HOST_CCREG,(int)&Count);
3439 emit_call((int)TLBWR_new);
3440 }
3441 if((source[i]&0x3f)==0x08) // TLBP
3442 emit_call((int)cached_interpreter_table.TLBP);
3443 if((source[i]&0x3f)==0x18) // ERET
3444 {
3445 int count=ccadj[i];
3446 if(i_regs->regmap[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
3447 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*count,HOST_CCREG); // TODO: Should there be an extra cycle here?
3448 emit_jmp((int)jump_eret);
3449 }
3450 }
3451}
3452
3453static void cop1_assemble(int i,struct regstat *i_regs)
3454{
3455 // Check cop1 unusable
3456 if(!cop1_usable) {
3457 signed char rs=get_reg(i_regs->regmap,CSREG);
3458 assert(rs>=0);
3459 emit_testimm(rs,0x20000000);
3460 int jaddr=(int)out;
3461 emit_jeq(0);
3462 add_stub(FP_STUB,jaddr,(int)out,i,rs,(int)i_regs,is_delayslot,0);
3463 cop1_usable=1;
3464 }
3465 if (opcode2[i]==0) { // MFC1
3466 signed char tl=get_reg(i_regs->regmap,rt1[i]);
3467 if(tl>=0) {
3468 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],tl);
3469 emit_readword_indexed(0,tl,tl);
3470 }
3471 }
3472 else if (opcode2[i]==1) { // DMFC1
3473 signed char tl=get_reg(i_regs->regmap,rt1[i]);
3474 signed char th=get_reg(i_regs->regmap,rt1[i]|64);
3475 if(tl>=0) {
3476 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],tl);
3477 if(th>=0) emit_readword_indexed(4,tl,th);
3478 emit_readword_indexed(0,tl,tl);
3479 }
3480 }
3481 else if (opcode2[i]==4) { // MTC1
3482 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3483 signed char temp=get_reg(i_regs->regmap,-1);
3484 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
3485 emit_writeword_indexed(sl,0,temp);
3486 }
3487 else if (opcode2[i]==5) { // DMTC1
3488 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3489 signed char sh=rs1[i]>0?get_reg(i_regs->regmap,rs1[i]|64):sl;
3490 signed char temp=get_reg(i_regs->regmap,-1);
3491 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
3492 emit_writeword_indexed(sh,4,temp);
3493 emit_writeword_indexed(sl,0,temp);
3494 }
3495 else if (opcode2[i]==2) // CFC1
3496 {
3497 signed char tl=get_reg(i_regs->regmap,rt1[i]);
3498 if(tl>=0) {
3499 u_int copr=(source[i]>>11)&0x1f;
3500 if(copr==0) emit_readword((int)&FCR0,tl);
3501 if(copr==31) emit_readword((int)&FCR31,tl);
3502 }
3503 }
3504 else if (opcode2[i]==6) // CTC1
3505 {
3506 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3507 u_int copr=(source[i]>>11)&0x1f;
3508 assert(sl>=0);
3509 if(copr==31)
3510 {
3511 emit_writeword(sl,(int)&FCR31);
3512 // Set the rounding mode
3513 //FIXME
3514 //char temp=get_reg(i_regs->regmap,-1);
3515 //emit_andimm(sl,3,temp);
3516 //emit_fldcw_indexed((int)&rounding_modes,temp);
3517 }
3518 }
3519}
3520
3521static void fconv_assemble_arm(int i,struct regstat *i_regs)
3522{
3523 signed char temp=get_reg(i_regs->regmap,-1);
3524 assert(temp>=0);
3525 // Check cop1 unusable
3526 if(!cop1_usable) {
3527 signed char rs=get_reg(i_regs->regmap,CSREG);
3528 assert(rs>=0);
3529 emit_testimm(rs,0x20000000);
3530 int jaddr=(int)out;
3531 emit_jeq(0);
3532 add_stub(FP_STUB,jaddr,(int)out,i,rs,(int)i_regs,is_delayslot,0);
3533 cop1_usable=1;
3534 }
3535
3536 #if (defined(__VFP_FP__) && !defined(__SOFTFP__))
3537 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0d) { // trunc_w_s
3538 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
3539 emit_flds(temp,15);
3540 emit_ftosizs(15,15); // float->int, truncate
3541 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f))
3542 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
3543 emit_fsts(15,temp);
3544 return;
3545 }
3546 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0d) { // trunc_w_d
3547 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
3548 emit_vldr(temp,7);
3549 emit_ftosizd(7,13); // double->int, truncate
3550 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
3551 emit_fsts(13,temp);
3552 return;
3553 }
3554
3555 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x20) { // cvt_s_w
3556 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
3557 emit_flds(temp,13);
3558 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f))
3559 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
3560 emit_fsitos(13,15);
3561 emit_fsts(15,temp);
3562 return;
3563 }
3564 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x21) { // cvt_d_w
3565 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
3566 emit_flds(temp,13);
3567 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
3568 emit_fsitod(13,7);
3569 emit_vstr(7,temp);
3570 return;
3571 }
3572
3573 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x21) { // cvt_d_s
3574 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
3575 emit_flds(temp,13);
3576 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
3577 emit_fcvtds(13,7);
3578 emit_vstr(7,temp);
3579 return;
3580 }
3581 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x20) { // cvt_s_d
3582 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
3583 emit_vldr(temp,7);
3584 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
3585 emit_fcvtsd(7,13);
3586 emit_fsts(13,temp);
3587 return;
3588 }
3589 #endif
3590
3591 // C emulation code
3592
3593 u_int hr,reglist=0;
3594 for(hr=0;hr<HOST_REGS;hr++) {
3595 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
3596 }
3597 save_regs(reglist);
3598
3599 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x20) {
3600 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3601 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3602 emit_call((int)cvt_s_w);
3603 }
3604 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x21) {
3605 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3606 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3607 emit_call((int)cvt_d_w);
3608 }
3609 if(opcode2[i]==0x15&&(source[i]&0x3f)==0x20) {
3610 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3611 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3612 emit_call((int)cvt_s_l);
3613 }
3614 if(opcode2[i]==0x15&&(source[i]&0x3f)==0x21) {
3615 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3616 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3617 emit_call((int)cvt_d_l);
3618 }
3619
3620 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x21) {
3621 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3622 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3623 emit_call((int)cvt_d_s);
3624 }
3625 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x24) {
3626 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3627 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3628 emit_call((int)cvt_w_s);
3629 }
3630 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x25) {
3631 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3632 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3633 emit_call((int)cvt_l_s);
3634 }
3635
3636 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x20) {
3637 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3638 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3639 emit_call((int)cvt_s_d);
3640 }
3641 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x24) {
3642 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3643 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3644 emit_call((int)cvt_w_d);
3645 }
3646 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x25) {
3647 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3648 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3649 emit_call((int)cvt_l_d);
3650 }
3651
3652 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x08) {
3653 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3654 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3655 emit_call((int)round_l_s);
3656 }
3657 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x09) {
3658 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3659 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3660 emit_call((int)trunc_l_s);
3661 }
3662 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0a) {
3663 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3664 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3665 emit_call((int)ceil_l_s);
3666 }
3667 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0b) {
3668 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3669 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3670 emit_call((int)floor_l_s);
3671 }
3672 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0c) {
3673 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3674 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3675 emit_call((int)round_w_s);
3676 }
3677 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0d) {
3678 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3679 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3680 emit_call((int)trunc_w_s);
3681 }
3682 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0e) {
3683 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3684 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3685 emit_call((int)ceil_w_s);
3686 }
3687 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0f) {
3688 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3689 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3690 emit_call((int)floor_w_s);
3691 }
3692
3693 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x08) {
3694 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3695 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3696 emit_call((int)round_l_d);
3697 }
3698 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x09) {
3699 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3700 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3701 emit_call((int)trunc_l_d);
3702 }
3703 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0a) {
3704 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3705 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3706 emit_call((int)ceil_l_d);
3707 }
3708 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0b) {
3709 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3710 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3711 emit_call((int)floor_l_d);
3712 }
3713 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0c) {
3714 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3715 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3716 emit_call((int)round_w_d);
3717 }
3718 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0d) {
3719 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3720 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3721 emit_call((int)trunc_w_d);
3722 }
3723 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0e) {
3724 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3725 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3726 emit_call((int)ceil_w_d);
3727 }
3728 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0f) {
3729 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3730 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3731 emit_call((int)floor_w_d);
3732 }
3733
3734 restore_regs(reglist);
3735}
3736#define fconv_assemble fconv_assemble_arm
3737
3738static void fcomp_assemble(int i,struct regstat *i_regs)
3739{
3740 signed char fs=get_reg(i_regs->regmap,FSREG);
3741 signed char temp=get_reg(i_regs->regmap,-1);
3742 assert(temp>=0);
3743 // Check cop1 unusable
3744 if(!cop1_usable) {
3745 signed char cs=get_reg(i_regs->regmap,CSREG);
3746 assert(cs>=0);
3747 emit_testimm(cs,0x20000000);
3748 int jaddr=(int)out;
3749 emit_jeq(0);
3750 add_stub(FP_STUB,jaddr,(int)out,i,cs,(int)i_regs,is_delayslot,0);
3751 cop1_usable=1;
3752 }
3753
3754 if((source[i]&0x3f)==0x30) {
3755 emit_andimm(fs,~0x800000,fs);
3756 return;
3757 }
3758
3759 if((source[i]&0x3e)==0x38) {
3760 // sf/ngle - these should throw exceptions for NaNs
3761 emit_andimm(fs,~0x800000,fs);
3762 return;
3763 }
3764
3765 #if (defined(__VFP_FP__) && !defined(__SOFTFP__))
3766 if(opcode2[i]==0x10) {
3767 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
3768 emit_readword((int)&reg_cop1_simple[(source[i]>>16)&0x1f],HOST_TEMPREG);
3769 emit_orimm(fs,0x800000,fs);
3770 emit_flds(temp,14);
3771 emit_flds(HOST_TEMPREG,15);
3772 emit_fcmps(14,15);
3773 emit_fmstat();
3774 if((source[i]&0x3f)==0x31) emit_bicvc_imm(fs,0x800000,fs); // c_un_s
3775 if((source[i]&0x3f)==0x32) emit_bicne_imm(fs,0x800000,fs); // c_eq_s
3776 if((source[i]&0x3f)==0x33) {emit_bicne_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ueq_s
3777 if((source[i]&0x3f)==0x34) emit_biccs_imm(fs,0x800000,fs); // c_olt_s
3778 if((source[i]&0x3f)==0x35) {emit_biccs_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ult_s
3779 if((source[i]&0x3f)==0x36) emit_bichi_imm(fs,0x800000,fs); // c_ole_s
3780 if((source[i]&0x3f)==0x37) {emit_bichi_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ule_s
3781 if((source[i]&0x3f)==0x3a) emit_bicne_imm(fs,0x800000,fs); // c_seq_s
3782 if((source[i]&0x3f)==0x3b) emit_bicne_imm(fs,0x800000,fs); // c_ngl_s
3783 if((source[i]&0x3f)==0x3c) emit_biccs_imm(fs,0x800000,fs); // c_lt_s
3784 if((source[i]&0x3f)==0x3d) emit_biccs_imm(fs,0x800000,fs); // c_nge_s
3785 if((source[i]&0x3f)==0x3e) emit_bichi_imm(fs,0x800000,fs); // c_le_s
3786 if((source[i]&0x3f)==0x3f) emit_bichi_imm(fs,0x800000,fs); // c_ngt_s
3787 return;
3788 }
3789 if(opcode2[i]==0x11) {
3790 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
3791 emit_readword((int)&reg_cop1_double[(source[i]>>16)&0x1f],HOST_TEMPREG);
3792 emit_orimm(fs,0x800000,fs);
3793 emit_vldr(temp,6);
3794 emit_vldr(HOST_TEMPREG,7);
3795 emit_fcmpd(6,7);
3796 emit_fmstat();
3797 if((source[i]&0x3f)==0x31) emit_bicvc_imm(fs,0x800000,fs); // c_un_d
3798 if((source[i]&0x3f)==0x32) emit_bicne_imm(fs,0x800000,fs); // c_eq_d
3799 if((source[i]&0x3f)==0x33) {emit_bicne_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ueq_d
3800 if((source[i]&0x3f)==0x34) emit_biccs_imm(fs,0x800000,fs); // c_olt_d
3801 if((source[i]&0x3f)==0x35) {emit_biccs_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ult_d
3802 if((source[i]&0x3f)==0x36) emit_bichi_imm(fs,0x800000,fs); // c_ole_d
3803 if((source[i]&0x3f)==0x37) {emit_bichi_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ule_d
3804 if((source[i]&0x3f)==0x3a) emit_bicne_imm(fs,0x800000,fs); // c_seq_d
3805 if((source[i]&0x3f)==0x3b) emit_bicne_imm(fs,0x800000,fs); // c_ngl_d
3806 if((source[i]&0x3f)==0x3c) emit_biccs_imm(fs,0x800000,fs); // c_lt_d
3807 if((source[i]&0x3f)==0x3d) emit_biccs_imm(fs,0x800000,fs); // c_nge_d
3808 if((source[i]&0x3f)==0x3e) emit_bichi_imm(fs,0x800000,fs); // c_le_d
3809 if((source[i]&0x3f)==0x3f) emit_bichi_imm(fs,0x800000,fs); // c_ngt_d
3810 return;
3811 }
3812 #endif
3813
3814 // C only
3815
3816 u_int hr,reglist=0;
3817 for(hr=0;hr<HOST_REGS;hr++) {
3818 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
3819 }
3820 reglist&=~(1<<fs);
3821 save_regs(reglist);
3822 if(opcode2[i]==0x10) {
3823 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3824 emit_readword((int)&reg_cop1_simple[(source[i]>>16)&0x1f],ARG2_REG);
3825 if((source[i]&0x3f)==0x30) emit_call((int)c_f_s);
3826 if((source[i]&0x3f)==0x31) emit_call((int)c_un_s);
3827 if((source[i]&0x3f)==0x32) emit_call((int)c_eq_s);
3828 if((source[i]&0x3f)==0x33) emit_call((int)c_ueq_s);
3829 if((source[i]&0x3f)==0x34) emit_call((int)c_olt_s);
3830 if((source[i]&0x3f)==0x35) emit_call((int)c_ult_s);
3831 if((source[i]&0x3f)==0x36) emit_call((int)c_ole_s);
3832 if((source[i]&0x3f)==0x37) emit_call((int)c_ule_s);
3833 if((source[i]&0x3f)==0x38) emit_call((int)c_sf_s);
3834 if((source[i]&0x3f)==0x39) emit_call((int)c_ngle_s);
3835 if((source[i]&0x3f)==0x3a) emit_call((int)c_seq_s);
3836 if((source[i]&0x3f)==0x3b) emit_call((int)c_ngl_s);
3837 if((source[i]&0x3f)==0x3c) emit_call((int)c_lt_s);
3838 if((source[i]&0x3f)==0x3d) emit_call((int)c_nge_s);
3839 if((source[i]&0x3f)==0x3e) emit_call((int)c_le_s);
3840 if((source[i]&0x3f)==0x3f) emit_call((int)c_ngt_s);
3841 }
3842 if(opcode2[i]==0x11) {
3843 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3844 emit_readword((int)&reg_cop1_double[(source[i]>>16)&0x1f],ARG2_REG);
3845 if((source[i]&0x3f)==0x30) emit_call((int)c_f_d);
3846 if((source[i]&0x3f)==0x31) emit_call((int)c_un_d);
3847 if((source[i]&0x3f)==0x32) emit_call((int)c_eq_d);
3848 if((source[i]&0x3f)==0x33) emit_call((int)c_ueq_d);
3849 if((source[i]&0x3f)==0x34) emit_call((int)c_olt_d);
3850 if((source[i]&0x3f)==0x35) emit_call((int)c_ult_d);
3851 if((source[i]&0x3f)==0x36) emit_call((int)c_ole_d);
3852 if((source[i]&0x3f)==0x37) emit_call((int)c_ule_d);
3853 if((source[i]&0x3f)==0x38) emit_call((int)c_sf_d);
3854 if((source[i]&0x3f)==0x39) emit_call((int)c_ngle_d);
3855 if((source[i]&0x3f)==0x3a) emit_call((int)c_seq_d);
3856 if((source[i]&0x3f)==0x3b) emit_call((int)c_ngl_d);
3857 if((source[i]&0x3f)==0x3c) emit_call((int)c_lt_d);
3858 if((source[i]&0x3f)==0x3d) emit_call((int)c_nge_d);
3859 if((source[i]&0x3f)==0x3e) emit_call((int)c_le_d);
3860 if((source[i]&0x3f)==0x3f) emit_call((int)c_ngt_d);
3861 }
3862 restore_regs(reglist);
3863 emit_loadreg(FSREG,fs);
3864}
3865
3866static void float_assemble(int i,struct regstat *i_regs)
3867{
3868 signed char temp=get_reg(i_regs->regmap,-1);
3869 assert(temp>=0);
3870 // Check cop1 unusable
3871 if(!cop1_usable) {
3872 signed char cs=get_reg(i_regs->regmap,CSREG);
3873 assert(cs>=0);
3874 emit_testimm(cs,0x20000000);
3875 int jaddr=(int)out;
3876 emit_jeq(0);
3877 add_stub(FP_STUB,jaddr,(int)out,i,cs,(int)i_regs,is_delayslot,0);
3878 cop1_usable=1;
3879 }
3880
3881 #if (defined(__VFP_FP__) && !defined(__SOFTFP__))
3882 if((source[i]&0x3f)==6) // mov
3883 {
3884 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
3885 if(opcode2[i]==0x10) {
3886 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
3887 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],HOST_TEMPREG);
3888 emit_readword_indexed(0,temp,temp);
3889 emit_writeword_indexed(temp,0,HOST_TEMPREG);
3890 }
3891 if(opcode2[i]==0x11) {
3892 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
3893 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],HOST_TEMPREG);
3894 emit_vldr(temp,7);
3895 emit_vstr(7,HOST_TEMPREG);
3896 }
3897 }
3898 return;
3899 }
3900
3901 if((source[i]&0x3f)>3)
3902 {
3903 if(opcode2[i]==0x10) {
3904 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
3905 emit_flds(temp,15);
3906 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
3907 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
3908 }
3909 if((source[i]&0x3f)==4) // sqrt
3910 emit_fsqrts(15,15);
3911 if((source[i]&0x3f)==5) // abs
3912 emit_fabss(15,15);
3913 if((source[i]&0x3f)==7) // neg
3914 emit_fnegs(15,15);
3915 emit_fsts(15,temp);
3916 }
3917 if(opcode2[i]==0x11) {
3918 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
3919 emit_vldr(temp,7);
3920 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
3921 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
3922 }
3923 if((source[i]&0x3f)==4) // sqrt
3924 emit_fsqrtd(7,7);
3925 if((source[i]&0x3f)==5) // abs
3926 emit_fabsd(7,7);
3927 if((source[i]&0x3f)==7) // neg
3928 emit_fnegd(7,7);
3929 emit_vstr(7,temp);
3930 }
3931 return;
3932 }
3933 if((source[i]&0x3f)<4)
3934 {
3935 if(opcode2[i]==0x10) {
3936 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
3937 }
3938 if(opcode2[i]==0x11) {
3939 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
3940 }
3941 if(((source[i]>>11)&0x1f)!=((source[i]>>16)&0x1f)) {
3942 if(opcode2[i]==0x10) {
3943 emit_readword((int)&reg_cop1_simple[(source[i]>>16)&0x1f],HOST_TEMPREG);
3944 emit_flds(temp,15);
3945 emit_flds(HOST_TEMPREG,13);
3946 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
3947 if(((source[i]>>16)&0x1f)!=((source[i]>>6)&0x1f)) {
3948 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
3949 }
3950 }
3951 if((source[i]&0x3f)==0) emit_fadds(15,13,15);
3952 if((source[i]&0x3f)==1) emit_fsubs(15,13,15);
3953 if((source[i]&0x3f)==2) emit_fmuls(15,13,15);
3954 if((source[i]&0x3f)==3) emit_fdivs(15,13,15);
3955 if(((source[i]>>16)&0x1f)==((source[i]>>6)&0x1f)) {
3956 emit_fsts(15,HOST_TEMPREG);
3957 }else{
3958 emit_fsts(15,temp);
3959 }
3960 }
3961 else if(opcode2[i]==0x11) {
3962 emit_readword((int)&reg_cop1_double[(source[i]>>16)&0x1f],HOST_TEMPREG);
3963 emit_vldr(temp,7);
3964 emit_vldr(HOST_TEMPREG,6);
3965 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
3966 if(((source[i]>>16)&0x1f)!=((source[i]>>6)&0x1f)) {
3967 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
3968 }
3969 }
3970 if((source[i]&0x3f)==0) emit_faddd(7,6,7);
3971 if((source[i]&0x3f)==1) emit_fsubd(7,6,7);
3972 if((source[i]&0x3f)==2) emit_fmuld(7,6,7);
3973 if((source[i]&0x3f)==3) emit_fdivd(7,6,7);
3974 if(((source[i]>>16)&0x1f)==((source[i]>>6)&0x1f)) {
3975 emit_vstr(7,HOST_TEMPREG);
3976 }else{
3977 emit_vstr(7,temp);
3978 }
3979 }
3980 }
3981 else {
3982 if(opcode2[i]==0x10) {
3983 emit_flds(temp,15);
3984 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
3985 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
3986 }
3987 if((source[i]&0x3f)==0) emit_fadds(15,15,15);
3988 if((source[i]&0x3f)==1) emit_fsubs(15,15,15);
3989 if((source[i]&0x3f)==2) emit_fmuls(15,15,15);
3990 if((source[i]&0x3f)==3) emit_fdivs(15,15,15);
3991 emit_fsts(15,temp);
3992 }
3993 else if(opcode2[i]==0x11) {
3994 emit_vldr(temp,7);
3995 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
3996 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
3997 }
3998 if((source[i]&0x3f)==0) emit_faddd(7,7,7);
3999 if((source[i]&0x3f)==1) emit_fsubd(7,7,7);
4000 if((source[i]&0x3f)==2) emit_fmuld(7,7,7);
4001 if((source[i]&0x3f)==3) emit_fdivd(7,7,7);
4002 emit_vstr(7,temp);
4003 }
4004 }
4005 return;
4006 }
4007 #endif
4008
4009 u_int hr,reglist=0;
4010 for(hr=0;hr<HOST_REGS;hr++) {
4011 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
4012 }
4013 if(opcode2[i]==0x10) { // Single precision
4014 save_regs(reglist);
4015 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4016 if((source[i]&0x3f)<4) {
4017 emit_readword((int)&reg_cop1_simple[(source[i]>>16)&0x1f],ARG2_REG);
4018 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG3_REG);
4019 }else{
4020 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4021 }
4022 switch(source[i]&0x3f)
4023 {
4024 case 0x00: emit_call((int)add_s);break;
4025 case 0x01: emit_call((int)sub_s);break;
4026 case 0x02: emit_call((int)mul_s);break;
4027 case 0x03: emit_call((int)div_s);break;
4028 case 0x04: emit_call((int)sqrt_s);break;
4029 case 0x05: emit_call((int)abs_s);break;
4030 case 0x06: emit_call((int)mov_s);break;
4031 case 0x07: emit_call((int)neg_s);break;
4032 }
4033 restore_regs(reglist);
4034 }
4035 if(opcode2[i]==0x11) { // Double precision
4036 save_regs(reglist);
4037 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4038 if((source[i]&0x3f)<4) {
4039 emit_readword((int)&reg_cop1_double[(source[i]>>16)&0x1f],ARG2_REG);
4040 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG3_REG);
4041 }else{
4042 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4043 }
4044 switch(source[i]&0x3f)
4045 {
4046 case 0x00: emit_call((int)add_d);break;
4047 case 0x01: emit_call((int)sub_d);break;
4048 case 0x02: emit_call((int)mul_d);break;
4049 case 0x03: emit_call((int)div_d);break;
4050 case 0x04: emit_call((int)sqrt_d);break;
4051 case 0x05: emit_call((int)abs_d);break;
4052 case 0x06: emit_call((int)mov_d);break;
4053 case 0x07: emit_call((int)neg_d);break;
4054 }
4055 restore_regs(reglist);
4056 }
4057}
4058
4059static void multdiv_assemble_arm(int i,struct regstat *i_regs)
4060{
4061 // case 0x18: MULT
4062 // case 0x19: MULTU
4063 // case 0x1A: DIV
4064 // case 0x1B: DIVU
4065 // case 0x1C: DMULT
4066 // case 0x1D: DMULTU
4067 // case 0x1E: DDIV
4068 // case 0x1F: DDIVU
4069 if(rs1[i]&&rs2[i])
4070 {
4071 if((opcode2[i]&4)==0) // 32-bit
4072 {
4073 if(opcode2[i]==0x18) // MULT
4074 {
4075 signed char m1=get_reg(i_regs->regmap,rs1[i]);
4076 signed char m2=get_reg(i_regs->regmap,rs2[i]);
4077 signed char hi=get_reg(i_regs->regmap,HIREG);
4078 signed char lo=get_reg(i_regs->regmap,LOREG);
4079 assert(m1>=0);
4080 assert(m2>=0);
4081 assert(hi>=0);
4082 assert(lo>=0);
4083 emit_smull(m1,m2,hi,lo);
4084 }
4085 if(opcode2[i]==0x19) // MULTU
4086 {
4087 signed char m1=get_reg(i_regs->regmap,rs1[i]);
4088 signed char m2=get_reg(i_regs->regmap,rs2[i]);
4089 signed char hi=get_reg(i_regs->regmap,HIREG);
4090 signed char lo=get_reg(i_regs->regmap,LOREG);
4091 assert(m1>=0);
4092 assert(m2>=0);
4093 assert(hi>=0);
4094 assert(lo>=0);
4095 emit_umull(m1,m2,hi,lo);
4096 }
4097 if(opcode2[i]==0x1A) // DIV
4098 {
e87f91c8 4099 #if 0
587ca588 4100 signed char m1l=get_reg(i_regs->regmap,rs1[i]);
4101 signed char m2l=get_reg(i_regs->regmap,rs2[i]);
4102 assert(m1l>=0);
4103 assert(m2l>=0);
4104 save_regs(0x100f);
4105 if(m1l!=0) emit_mov(m1l,0);
4106 if(m2l<1) emit_readword((int)&dynarec_local,1);
4107 else if(m2l>1) emit_mov(m2l,1);
4108 emit_call((int)&div32);
4109 restore_regs(0x100f);
4110 signed char hil=get_reg(i_regs->regmap,HIREG);
4111 if(hil>=0) emit_loadreg(HIREG,hil);
4112 signed char lol=get_reg(i_regs->regmap,LOREG);
4113 if(lol>=0) emit_loadreg(LOREG,lol);
4114 #else
451ab91e 4115 signed char d1=get_reg(i_regs->regmap,rs1[i]);
4116 signed char d2=get_reg(i_regs->regmap,rs2[i]);
4117 assert(d1>=0);
4118 assert(d2>=0);
4119 signed char quotient=get_reg(i_regs->regmap,LOREG);
4120 signed char remainder=get_reg(i_regs->regmap,HIREG);
4121 assert(quotient>=0);
4122 assert(remainder>=0);
4123 emit_movs(d1,remainder);
4124 emit_negmi(remainder,remainder);
4125 emit_movs(d2,HOST_TEMPREG);
4126 emit_jeq((int)out+52); // Division by zero
4127 emit_negmi(HOST_TEMPREG,HOST_TEMPREG);
4128 emit_clz(HOST_TEMPREG,quotient);
4129 emit_shl(HOST_TEMPREG,quotient,HOST_TEMPREG);
4130 emit_orimm(quotient,1<<31,quotient);
4131 emit_shr(quotient,quotient,quotient);
4132 emit_cmp(remainder,HOST_TEMPREG);
4133 emit_subcs(remainder,HOST_TEMPREG,remainder);
4134 emit_adcs(quotient,quotient,quotient);
4135 emit_shrimm(HOST_TEMPREG,1,HOST_TEMPREG);
4136 emit_jcc((int)out-16); // -4
4137 emit_teq(d1,d2);
4138 emit_negmi(quotient,quotient);
4139 emit_test(d1,d1);
4140 emit_negmi(remainder,remainder);
587ca588 4141 #endif
451ab91e 4142 }
4143 if(opcode2[i]==0x1B) // DIVU
4144 {
e87f91c8 4145 #if 0
4146 signed char m1l=get_reg(i_regs->regmap,rs1[i]);
4147 signed char m2l=get_reg(i_regs->regmap,rs2[i]);
4148 assert(m1l>=0);
4149 assert(m2l>=0);
4150 save_regs(0x100f);
4151 if(m1l!=0) emit_mov(m1l,0);
4152 if(m2l<1) emit_readword((int)&dynarec_local,1);
4153 else if(m2l>1) emit_mov(m2l,1);
4154 emit_call((int)&divu32);
4155 restore_regs(0x100f);
4156 signed char hil=get_reg(i_regs->regmap,HIREG);
4157 if(hil>=0) emit_loadreg(HIREG,hil);
4158 signed char lol=get_reg(i_regs->regmap,LOREG);
4159 if(lol>=0) emit_loadreg(LOREG,lol);
4160 #else
4161 signed char d1=get_reg(i_regs->regmap,rs1[i]); // dividend
451ab91e 4162 signed char d2=get_reg(i_regs->regmap,rs2[i]); // divisor
4163 assert(d1>=0);
4164 assert(d2>=0);
4165 signed char quotient=get_reg(i_regs->regmap,LOREG);
4166 signed char remainder=get_reg(i_regs->regmap,HIREG);
4167 assert(quotient>=0);
4168 assert(remainder>=0);
4169 emit_test(d2,d2);
4170 emit_jeq((int)out+44); // Division by zero
4171 emit_clz(d2,HOST_TEMPREG);
4172 emit_movimm(1<<31,quotient);
4173 emit_shl(d2,HOST_TEMPREG,d2);
4174 emit_mov(d1,remainder);
4175 emit_shr(quotient,HOST_TEMPREG,quotient);
4176 emit_cmp(remainder,d2);
4177 emit_subcs(remainder,d2,remainder);
4178 emit_adcs(quotient,quotient,quotient);
4179 emit_shrcc_imm(d2,1,d2);
4180 emit_jcc((int)out-16); // -4
e87f91c8 4181 #endif
451ab91e 4182 }
4183 }
4184 else // 64-bit
4185 {
4186 if(opcode2[i]==0x1C) // DMULT
4187 {
ce68e3b9 4188 //assert(opcode2[i]!=0x1C);
451ab91e 4189 signed char m1h=get_reg(i_regs->regmap,rs1[i]|64);
4190 signed char m1l=get_reg(i_regs->regmap,rs1[i]);
4191 signed char m2h=get_reg(i_regs->regmap,rs2[i]|64);
4192 signed char m2l=get_reg(i_regs->regmap,rs2[i]);
4193 assert(m1h>=0);
4194 assert(m2h>=0);
4195 assert(m1l>=0);
4196 assert(m2l>=0);
ce68e3b9 4197 save_regs(0x100f);
4198 if(m1l!=0) emit_mov(m1l,0);
4199 if(m1h==0) emit_readword((int)&dynarec_local,1);
4200 else if(m1h>1) emit_mov(m1h,1);
4201 if(m2l<2) emit_readword((int)&dynarec_local+m2l*4,2);
4202 else if(m2l>2) emit_mov(m2l,2);
4203 if(m2h<3) emit_readword((int)&dynarec_local+m2h*4,3);
4204 else if(m2h>3) emit_mov(m2h,3);
451ab91e 4205 emit_call((int)&mult64);
ce68e3b9 4206 restore_regs(0x100f);
451ab91e 4207 signed char hih=get_reg(i_regs->regmap,HIREG|64);
4208 signed char hil=get_reg(i_regs->regmap,HIREG);
4209 if(hih>=0) emit_loadreg(HIREG|64,hih);
4210 if(hil>=0) emit_loadreg(HIREG,hil);
4211 signed char loh=get_reg(i_regs->regmap,LOREG|64);
4212 signed char lol=get_reg(i_regs->regmap,LOREG);
4213 if(loh>=0) emit_loadreg(LOREG|64,loh);
4214 if(lol>=0) emit_loadreg(LOREG,lol);
4215 }
4216 if(opcode2[i]==0x1D) // DMULTU
4217 {
4218 signed char m1h=get_reg(i_regs->regmap,rs1[i]|64);
4219 signed char m1l=get_reg(i_regs->regmap,rs1[i]);
4220 signed char m2h=get_reg(i_regs->regmap,rs2[i]|64);
4221 signed char m2l=get_reg(i_regs->regmap,rs2[i]);
4222 assert(m1h>=0);
4223 assert(m2h>=0);
4224 assert(m1l>=0);
4225 assert(m2l>=0);
4226 save_regs(0x100f);
4227 if(m1l!=0) emit_mov(m1l,0);
4228 if(m1h==0) emit_readword((int)&dynarec_local,1);
4229 else if(m1h>1) emit_mov(m1h,1);
4230 if(m2l<2) emit_readword((int)&dynarec_local+m2l*4,2);
4231 else if(m2l>2) emit_mov(m2l,2);
4232 if(m2h<3) emit_readword((int)&dynarec_local+m2h*4,3);
4233 else if(m2h>3) emit_mov(m2h,3);
4234 emit_call((int)&multu64);
4235 restore_regs(0x100f);
4236 signed char hih=get_reg(i_regs->regmap,HIREG|64);
4237 signed char hil=get_reg(i_regs->regmap,HIREG);
587ca588 4238 if(hih>=0) emit_loadreg(HIREG|64,hih);
4239 if(hil>=0) emit_loadreg(HIREG,hil);
451ab91e 4240 signed char loh=get_reg(i_regs->regmap,LOREG|64);
4241 signed char lol=get_reg(i_regs->regmap,LOREG);
587ca588 4242 if(loh>=0) emit_loadreg(LOREG|64,loh);
4243 if(lol>=0) emit_loadreg(LOREG,lol);
451ab91e 4244 /*signed char temp=get_reg(i_regs->regmap,-1);
4245 signed char rh=get_reg(i_regs->regmap,HIREG|64);
4246 signed char rl=get_reg(i_regs->regmap,HIREG);
4247 assert(m1h>=0);
4248 assert(m2h>=0);
4249 assert(m1l>=0);
4250 assert(m2l>=0);
4251 assert(temp>=0);
4252 //emit_mov(m1l,EAX);
4253 //emit_mul(m2l);
4254 emit_umull(rl,rh,m1l,m2l);
4255 emit_storereg(LOREG,rl);
4256 emit_mov(rh,temp);
4257 //emit_mov(m1h,EAX);
4258 //emit_mul(m2l);
4259 emit_umull(rl,rh,m1h,m2l);
4260 emit_adds(rl,temp,temp);
4261 emit_adcimm(rh,0,rh);
4262 emit_storereg(HIREG,rh);
4263 //emit_mov(m2h,EAX);
4264 //emit_mul(m1l);
4265 emit_umull(rl,rh,m1l,m2h);
4266 emit_adds(rl,temp,temp);
4267 emit_adcimm(rh,0,rh);
4268 emit_storereg(LOREG|64,temp);
4269 emit_mov(rh,temp);
4270 //emit_mov(m2h,EAX);
4271 //emit_mul(m1h);
4272 emit_umull(rl,rh,m1h,m2h);
4273 emit_adds(rl,temp,rl);
4274 emit_loadreg(HIREG,temp);
4275 emit_adcimm(rh,0,rh);
4276 emit_adds(rl,temp,rl);
4277 emit_adcimm(rh,0,rh);
4278 // DEBUG
4279 /*
4280 emit_pushreg(m2h);
4281 emit_pushreg(m2l);
4282 emit_pushreg(m1h);
4283 emit_pushreg(m1l);
4284 emit_call((int)&multu64);
4285 emit_popreg(m1l);
4286 emit_popreg(m1h);
4287 emit_popreg(m2l);
4288 emit_popreg(m2h);
4289 signed char hih=get_reg(i_regs->regmap,HIREG|64);
4290 signed char hil=get_reg(i_regs->regmap,HIREG);
4291 if(hih>=0) emit_loadreg(HIREG|64,hih); // DEBUG
4292 if(hil>=0) emit_loadreg(HIREG,hil); // DEBUG
4293 */
4294 // Shouldn't be necessary
4295 //char loh=get_reg(i_regs->regmap,LOREG|64);
4296 //char lol=get_reg(i_regs->regmap,LOREG);
4297 //if(loh>=0) emit_loadreg(LOREG|64,loh);
4298 //if(lol>=0) emit_loadreg(LOREG,lol);
4299 }
4300 if(opcode2[i]==0x1E) // DDIV
4301 {
4302 signed char d1h=get_reg(i_regs->regmap,rs1[i]|64);
4303 signed char d1l=get_reg(i_regs->regmap,rs1[i]);
4304 signed char d2h=get_reg(i_regs->regmap,rs2[i]|64);
4305 signed char d2l=get_reg(i_regs->regmap,rs2[i]);
4306 assert(d1h>=0);
4307 assert(d2h>=0);
4308 assert(d1l>=0);
4309 assert(d2l>=0);
4310 save_regs(0x100f);
4311 if(d1l!=0) emit_mov(d1l,0);
4312 if(d1h==0) emit_readword((int)&dynarec_local,1);
4313 else if(d1h>1) emit_mov(d1h,1);
4314 if(d2l<2) emit_readword((int)&dynarec_local+d2l*4,2);
4315 else if(d2l>2) emit_mov(d2l,2);
4316 if(d2h<3) emit_readword((int)&dynarec_local+d2h*4,3);
4317 else if(d2h>3) emit_mov(d2h,3);
4318 emit_call((int)&div64);
4319 restore_regs(0x100f);
4320 signed char hih=get_reg(i_regs->regmap,HIREG|64);
4321 signed char hil=get_reg(i_regs->regmap,HIREG);
4322 signed char loh=get_reg(i_regs->regmap,LOREG|64);
4323 signed char lol=get_reg(i_regs->regmap,LOREG);
4324 if(hih>=0) emit_loadreg(HIREG|64,hih);
4325 if(hil>=0) emit_loadreg(HIREG,hil);
4326 if(loh>=0) emit_loadreg(LOREG|64,loh);
4327 if(lol>=0) emit_loadreg(LOREG,lol);
4328 }
4329 if(opcode2[i]==0x1F) // DDIVU
4330 {
4331 //u_int hr,reglist=0;
4332 //for(hr=0;hr<HOST_REGS;hr++) {
4333 // if(i_regs->regmap[hr]>=0 && (i_regs->regmap[hr]&62)!=HIREG) reglist|=1<<hr;
4334 //}
4335 signed char d1h=get_reg(i_regs->regmap,rs1[i]|64);
4336 signed char d1l=get_reg(i_regs->regmap,rs1[i]);
4337 signed char d2h=get_reg(i_regs->regmap,rs2[i]|64);
4338 signed char d2l=get_reg(i_regs->regmap,rs2[i]);
4339 assert(d1h>=0);
4340 assert(d2h>=0);
4341 assert(d1l>=0);
4342 assert(d2l>=0);
4343 save_regs(0x100f);
4344 if(d1l!=0) emit_mov(d1l,0);
4345 if(d1h==0) emit_readword((int)&dynarec_local,1);
4346 else if(d1h>1) emit_mov(d1h,1);
4347 if(d2l<2) emit_readword((int)&dynarec_local+d2l*4,2);
4348 else if(d2l>2) emit_mov(d2l,2);
4349 if(d2h<3) emit_readword((int)&dynarec_local+d2h*4,3);
4350 else if(d2h>3) emit_mov(d2h,3);
4351 emit_call((int)&divu64);
4352 restore_regs(0x100f);
4353 signed char hih=get_reg(i_regs->regmap,HIREG|64);
4354 signed char hil=get_reg(i_regs->regmap,HIREG);
4355 signed char loh=get_reg(i_regs->regmap,LOREG|64);
4356 signed char lol=get_reg(i_regs->regmap,LOREG);
4357 if(hih>=0) emit_loadreg(HIREG|64,hih);
4358 if(hil>=0) emit_loadreg(HIREG,hil);
4359 if(loh>=0) emit_loadreg(LOREG|64,loh);
4360 if(lol>=0) emit_loadreg(LOREG,lol);
4361 }
4362 }
4363 }
4364 else
4365 {
4366 // Multiply by zero is zero.
4367 // MIPS does not have a divide by zero exception.
4368 // The result is undefined, we return zero.
587ca588 4369 if((opcode2[i]&4)!=0) // 64-bit
ce68e3b9 4370 {
4371 signed char hih=get_reg(i_regs->regmap,HIREG|64);
4372 signed char hr=get_reg(i_regs->regmap,HIREG);
4373 signed char loh=get_reg(i_regs->regmap,LOREG|64);
4374 signed char lr=get_reg(i_regs->regmap,LOREG);
4375 if(hih>=0) emit_zeroreg(hih);
4376 if(hr>=0) emit_zeroreg(hr);
4377 if(loh>=0) emit_zeroreg(loh);
4378 if(lr>=0) emit_zeroreg(lr);
587ca588 4379 } else
ce68e3b9 4380 {
4381 signed char hr=get_reg(i_regs->regmap,HIREG);
4382 signed char lr=get_reg(i_regs->regmap,LOREG);
4383 if(hr>=0) emit_zeroreg(hr);
4384 if(lr>=0) emit_zeroreg(lr);
4385 }
451ab91e 4386 }
4387}
4388#define multdiv_assemble multdiv_assemble_arm
4389
4390static void do_preload_rhash(int r) {
4391 // Don't need this for ARM. On x86, this puts the value 0xf8 into the
4392 // register. On ARM the hash can be done with a single instruction (below)
4393}
4394
4395static void do_preload_rhtbl(int ht) {
4396 emit_addimm(FP,(int)&mini_ht-(int)&dynarec_local,ht);
4397}
4398
4399static void do_rhash(int rs,int rh) {
4400 emit_andimm(rs,0xf8,rh);
4401}
4402
4403static void do_miniht_load(int ht,int rh) {
4404 assem_debug("ldr %s,[%s,%s]!",regname[rh],regname[ht],regname[rh]);
4405 output_w32(0xe7b00000|rd_rn_rm(rh,ht,rh));
4406}
4407
4408static void do_miniht_jump(int rs,int rh,int ht) {
4409 emit_cmp(rh,rs);
4410 emit_ldreq_indexed(ht,4,15);
4411 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
4412 emit_mov(rs,7);
4413 emit_jmp(jump_vaddr_reg[7]);
4414 #else
4415 emit_jmp(jump_vaddr_reg[rs]);
4416 #endif
4417}
4418
4419static void do_miniht_insert(u_int return_address,int rt,int temp) {
4420 #ifdef ARMv5_ONLY
4421 emit_movimm(return_address,rt); // PC into link register
4422 add_to_linker((int)out,return_address,1);
4423 emit_pcreladdr(temp);
4424 emit_writeword(rt,(int)&mini_ht[(return_address&0xFF)>>3][0]);
4425 emit_writeword(temp,(int)&mini_ht[(return_address&0xFF)>>3][1]);
4426 #else
4427 emit_movw(return_address&0x0000FFFF,rt);
4428 add_to_linker((int)out,return_address,1);
4429 emit_pcreladdr(temp);
4430 emit_writeword(temp,(int)&mini_ht[(return_address&0xFF)>>3][1]);
4431 emit_movt(return_address&0xFFFF0000,rt);
4432 emit_writeword(rt,(int)&mini_ht[(return_address&0xFF)>>3][0]);
4433 #endif
4434}
4435
4436// Sign-extend to 64 bits and write out upper half of a register
4437// This is useful where we have a 32-bit value in a register, and want to
4438// keep it in a 32-bit register, but can't guarantee that it won't be read
4439// as a 64-bit value later.
4440static void wb_sx(signed char pre[],signed char entry[],uint64_t dirty,uint64_t is32_pre,uint64_t is32,uint64_t u,uint64_t uu)
4441{
4442 if(is32_pre==is32) return;
4443 int hr,reg;
4444 for(hr=0;hr<HOST_REGS;hr++) {
4445 if(hr!=EXCLUDE_REG) {
4446 //if(pre[hr]==entry[hr]) {
4447 if((reg=pre[hr])>=0) {
4448 if((dirty>>hr)&1) {
4449 if( ((is32_pre&~is32&~uu)>>reg)&1 ) {
4450 emit_sarimm(hr,31,HOST_TEMPREG);
4451 emit_storereg(reg|64,HOST_TEMPREG);
4452 }
4453 }
4454 }
4455 //}
4456 }
4457 }
4458}
4459
4460static void wb_valid(signed char pre[],signed char entry[],u_int dirty_pre,u_int dirty,uint64_t is32_pre,uint64_t u,uint64_t uu)
4461{
4462 //if(dirty_pre==dirty) return;
4463 int hr,reg,new_hr;
4464 for(hr=0;hr<HOST_REGS;hr++) {
4465 if(hr!=EXCLUDE_REG) {
4466 reg=pre[hr];
4467 if(((~u)>>(reg&63))&1) {
4468 if(reg>0) {
4469 if(((dirty_pre&~dirty)>>hr)&1) {
4470 if(reg>0&&reg<34) {
4471 emit_storereg(reg,hr);
4472 if( ((is32_pre&~uu)>>reg)&1 ) {
4473 emit_sarimm(hr,31,HOST_TEMPREG);
4474 emit_storereg(reg|64,HOST_TEMPREG);
4475 }
4476 }
4477 else if(reg>=64) {
4478 emit_storereg(reg,hr);
4479 }
4480 }
4481 }
4482 }
4483 }
4484 }
4485}
4486
4487
4488/* using strd could possibly help but you'd have to allocate registers in pairs
4489static void wb_invalidate_arm(signed char pre[],signed char entry[],uint64_t dirty,uint64_t is32,uint64_t u,uint64_t uu)
4490{
4491 int hr;
4492 int wrote=-1;
4493 for(hr=HOST_REGS-1;hr>=0;hr--) {
4494 if(hr!=EXCLUDE_REG) {
4495 if(pre[hr]!=entry[hr]) {
4496 if(pre[hr]>=0) {
4497 if((dirty>>hr)&1) {
4498 if(get_reg(entry,pre[hr])<0) {
4499 if(pre[hr]<64) {
4500 if(!((u>>pre[hr])&1)) {
4501 if(hr<10&&(~hr&1)&&(pre[hr+1]<0||wrote==hr+1)) {
4502 if( ((is32>>pre[hr])&1) && !((uu>>pre[hr])&1) ) {
4503 emit_sarimm(hr,31,hr+1);
4504 emit_strdreg(pre[hr],hr);
4505 }
4506 else
4507 emit_storereg(pre[hr],hr);
4508 }else{
4509 emit_storereg(pre[hr],hr);
4510 if( ((is32>>pre[hr])&1) && !((uu>>pre[hr])&1) ) {
4511 emit_sarimm(hr,31,hr);
4512 emit_storereg(pre[hr]|64,hr);
4513 }
4514 }
4515 }
4516 }else{
4517 if(!((uu>>(pre[hr]&63))&1) && !((is32>>(pre[hr]&63))&1)) {
4518 emit_storereg(pre[hr],hr);
4519 }
4520 }
4521 wrote=hr;
4522 }
4523 }
4524 }
4525 }
4526 }
4527 }
4528 for(hr=0;hr<HOST_REGS;hr++) {
4529 if(hr!=EXCLUDE_REG) {
4530 if(pre[hr]!=entry[hr]) {
4531 if(pre[hr]>=0) {
4532 int nr;
4533 if((nr=get_reg(entry,pre[hr]))>=0) {
4534 emit_mov(hr,nr);
4535 }
4536 }
4537 }
4538 }
4539 }
4540}
4541#define wb_invalidate wb_invalidate_arm
4542*/
4543
4544// Clearing the cache is rather slow on ARM Linux, so mark the areas
4545// that need to be cleared, and then only clear these areas once.
4546static void do_clear_cache()
4547{
4548 int i,j;
4549 for (i=0;i<(1<<(TARGET_SIZE_2-17));i++)
4550 {
4551 u_int bitmap=needs_clear_cache[i];
4552 if(bitmap) {
4553 u_int start,end;
4554 for(j=0;j<32;j++)
4555 {
4556 if(bitmap&(1<<j)) {
4557 start=BASE_ADDR+i*131072+j*4096;
4558 end=start+4095;
4559 j++;
4560 while(j<32) {
4561 if(bitmap&(1<<j)) {
4562 end+=4096;
4563 j++;
4564 }else{
4565 __clear_cache((void *)start,(void *)end);
4566 //cacheflush((void *)start,(void *)end,0);
4567 break;
4568 }
4569 }
4570 }
4571 }
4572 needs_clear_cache[i]=0;
4573 }
4574 }
4575}
4576
4577// CPU-architecture-specific initialization
4578static void arch_init() {
4579 rounding_modes[0]=0x0<<22; // round
4580 rounding_modes[1]=0x3<<22; // trunc
4581 rounding_modes[2]=0x1<<22; // ceil
4582 rounding_modes[3]=0x2<<22; // floor
4583
4584 jump_table_symbols[15] = (int) cached_interpreter_table.MFC0;
4585 jump_table_symbols[16] = (int) cached_interpreter_table.MTC0;
4586 jump_table_symbols[17] = (int) cached_interpreter_table.TLBR;
4587 jump_table_symbols[18] = (int) cached_interpreter_table.TLBP;
4588
4589 #ifdef RAM_OFFSET
4590 ram_offset=((int)rdram-(int)0x80000000)>>2;
4591 #endif
4592
4593 // Trampolines for jumps >32M
4594 int *ptr,*ptr2;
4595 ptr=(int *)jump_table_symbols;
4596 ptr2=(int *)((void *)BASE_ADDR+(1<<TARGET_SIZE_2)-JUMP_TABLE_SIZE);
4597 while((void *)ptr<(void *)jump_table_symbols+sizeof(jump_table_symbols))
4598 {
4599 int offset=*ptr-(int)ptr2-8;
4600 if(offset>=-33554432&&offset<33554432) {
4601 *ptr2=0xea000000|((offset>>2)&0xffffff); // direct branch
4602 }else{
4603 *ptr2=0xe51ff004; // ldr pc,[pc,#-4]
4604 }
4605 ptr2++;
4606 *ptr2=*ptr;
4607 ptr++;
4608 ptr2++;
4609 }
4610
4611 // Jumping thru the trampolines created above slows things down by about 1%.
4612 // If part of the cache is beyond the 32M limit, avoid using this area
4613 // initially. It will be used later if the cache gets full.
4614 if((u_int)dyna_linker-33554432>(u_int)BASE_ADDR) {
4615 if((u_int)dyna_linker-33554432<(u_int)BASE_ADDR+(1<<(TARGET_SIZE_2-1))) {
4616 out=(u_char *)(((u_int)dyna_linker-33554432)&~4095);
4617 expirep=((((int)out-BASE_ADDR)>>(TARGET_SIZE_2-16))+16384)&65535;
4618 }
4619 }
4620}