drc: remove yet yet more n64 stuff
[pcsx_rearmed.git] / libpcsxcore / new_dynarec / assem_arm.c
... / ...
CommitLineData
1/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2 * Mupen64plus/PCSX - assem_arm.c *
3 * Copyright (C) 2009-2011 Ari64 *
4 * Copyright (C) 2010-2011 GraÅžvydas "notaz" Ignotas *
5 * *
6 * This program is free software; you can redistribute it and/or modify *
7 * it under the terms of the GNU General Public License as published by *
8 * the Free Software Foundation; either version 2 of the License, or *
9 * (at your option) any later version. *
10 * *
11 * This program is distributed in the hope that it will be useful, *
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
14 * GNU General Public License for more details. *
15 * *
16 * You should have received a copy of the GNU General Public License *
17 * along with this program; if not, write to the *
18 * Free Software Foundation, Inc., *
19 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
20 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
21
22#include "../gte.h"
23#define FLAGLESS
24#include "../gte.h"
25#undef FLAGLESS
26#include "../gte_arm.h"
27#include "../gte_neon.h"
28#include "pcnt.h"
29#include "arm_features.h"
30
31#if defined(BASE_ADDR_FIXED)
32#elif defined(BASE_ADDR_DYNAMIC)
33u_char *translation_cache;
34#else
35u_char translation_cache[1 << TARGET_SIZE_2] __attribute__((aligned(4096)));
36#endif
37
38#ifndef __MACH__
39#define CALLER_SAVE_REGS 0x100f
40#else
41#define CALLER_SAVE_REGS 0x120f
42#endif
43
44#define unused __attribute__((unused))
45
46#ifdef DRC_DBG
47#pragma GCC diagnostic ignored "-Wunused-function"
48#pragma GCC diagnostic ignored "-Wunused-variable"
49#pragma GCC diagnostic ignored "-Wunused-but-set-variable"
50#endif
51
52extern int cycle_count;
53extern int last_count;
54extern int pcaddr;
55extern int pending_exception;
56extern int branch_target;
57extern uint64_t readmem_dword;
58extern void *dynarec_local;
59extern u_int mini_ht[32][2];
60
61void indirect_jump_indexed();
62void indirect_jump();
63void do_interrupt();
64void jump_vaddr_r0();
65void jump_vaddr_r1();
66void jump_vaddr_r2();
67void jump_vaddr_r3();
68void jump_vaddr_r4();
69void jump_vaddr_r5();
70void jump_vaddr_r6();
71void jump_vaddr_r7();
72void jump_vaddr_r8();
73void jump_vaddr_r9();
74void jump_vaddr_r10();
75void jump_vaddr_r12();
76
77void * const jump_vaddr_reg[16] = {
78 jump_vaddr_r0,
79 jump_vaddr_r1,
80 jump_vaddr_r2,
81 jump_vaddr_r3,
82 jump_vaddr_r4,
83 jump_vaddr_r5,
84 jump_vaddr_r6,
85 jump_vaddr_r7,
86 jump_vaddr_r8,
87 jump_vaddr_r9,
88 jump_vaddr_r10,
89 0,
90 jump_vaddr_r12,
91 0,
92 0,
93 0
94};
95
96void invalidate_addr_r0();
97void invalidate_addr_r1();
98void invalidate_addr_r2();
99void invalidate_addr_r3();
100void invalidate_addr_r4();
101void invalidate_addr_r5();
102void invalidate_addr_r6();
103void invalidate_addr_r7();
104void invalidate_addr_r8();
105void invalidate_addr_r9();
106void invalidate_addr_r10();
107void invalidate_addr_r12();
108
109const u_int invalidate_addr_reg[16] = {
110 (int)invalidate_addr_r0,
111 (int)invalidate_addr_r1,
112 (int)invalidate_addr_r2,
113 (int)invalidate_addr_r3,
114 (int)invalidate_addr_r4,
115 (int)invalidate_addr_r5,
116 (int)invalidate_addr_r6,
117 (int)invalidate_addr_r7,
118 (int)invalidate_addr_r8,
119 (int)invalidate_addr_r9,
120 (int)invalidate_addr_r10,
121 0,
122 (int)invalidate_addr_r12,
123 0,
124 0,
125 0};
126
127static u_int needs_clear_cache[1<<(TARGET_SIZE_2-17)];
128
129/* Linker */
130
131static void set_jump_target(void *addr, void *target_)
132{
133 u_int target = (u_int)target_;
134 u_char *ptr = addr;
135 u_int *ptr2=(u_int *)ptr;
136 if(ptr[3]==0xe2) {
137 assert((target-(u_int)ptr2-8)<1024);
138 assert(((uintptr_t)addr&3)==0);
139 assert((target&3)==0);
140 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
141 //printf("target=%x addr=%p insn=%x\n",target,addr,*ptr2);
142 }
143 else if(ptr[3]==0x72) {
144 // generated by emit_jno_unlikely
145 if((target-(u_int)ptr2-8)<1024) {
146 assert(((uintptr_t)addr&3)==0);
147 assert((target&3)==0);
148 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
149 }
150 else if((target-(u_int)ptr2-8)<4096&&!((target-(u_int)ptr2-8)&15)) {
151 assert(((uintptr_t)addr&3)==0);
152 assert((target&3)==0);
153 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>4)|0xE00;
154 }
155 else *ptr2=(0x7A000000)|(((target-(u_int)ptr2-8)<<6)>>8);
156 }
157 else {
158 assert((ptr[3]&0x0e)==0xa);
159 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
160 }
161}
162
163// This optionally copies the instruction from the target of the branch into
164// the space before the branch. Works, but the difference in speed is
165// usually insignificant.
166#if 0
167static void set_jump_target_fillslot(int addr,u_int target,int copy)
168{
169 u_char *ptr=(u_char *)addr;
170 u_int *ptr2=(u_int *)ptr;
171 assert(!copy||ptr2[-1]==0xe28dd000);
172 if(ptr[3]==0xe2) {
173 assert(!copy);
174 assert((target-(u_int)ptr2-8)<4096);
175 *ptr2=(*ptr2&0xFFFFF000)|(target-(u_int)ptr2-8);
176 }
177 else {
178 assert((ptr[3]&0x0e)==0xa);
179 u_int target_insn=*(u_int *)target;
180 if((target_insn&0x0e100000)==0) { // ALU, no immediate, no flags
181 copy=0;
182 }
183 if((target_insn&0x0c100000)==0x04100000) { // Load
184 copy=0;
185 }
186 if(target_insn&0x08000000) {
187 copy=0;
188 }
189 if(copy) {
190 ptr2[-1]=target_insn;
191 target+=4;
192 }
193 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
194 }
195}
196#endif
197
198/* Literal pool */
199static void add_literal(int addr,int val)
200{
201 assert(literalcount<sizeof(literals)/sizeof(literals[0]));
202 literals[literalcount][0]=addr;
203 literals[literalcount][1]=val;
204 literalcount++;
205}
206
207// from a pointer to external jump stub (which was produced by emit_extjump2)
208// find where the jumping insn is
209static void *find_extjump_insn(void *stub)
210{
211 int *ptr=(int *)(stub+4);
212 assert((*ptr&0x0fff0000)==0x059f0000); // ldr rx, [pc, #ofs]
213 u_int offset=*ptr&0xfff;
214 void **l_ptr=(void *)ptr+offset+8;
215 return *l_ptr;
216}
217
218// find where external branch is liked to using addr of it's stub:
219// get address that insn one after stub loads (dyna_linker arg1),
220// treat it as a pointer to branch insn,
221// return addr where that branch jumps to
222static void *get_pointer(void *stub)
223{
224 //printf("get_pointer(%x)\n",(int)stub);
225 int *i_ptr=find_extjump_insn(stub);
226 assert((*i_ptr&0x0f000000)==0x0a000000);
227 return (u_char *)i_ptr+((*i_ptr<<8)>>6)+8;
228}
229
230// Find the "clean" entry point from a "dirty" entry point
231// by skipping past the call to verify_code
232static void *get_clean_addr(void *addr)
233{
234 signed int *ptr = addr;
235 #ifndef HAVE_ARMV7
236 ptr+=4;
237 #else
238 ptr+=6;
239 #endif
240 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
241 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
242 ptr++;
243 if((*ptr&0xFF000000)==0xea000000) {
244 return (char *)ptr+((*ptr<<8)>>6)+8; // follow jump
245 }
246 return ptr;
247}
248
249static int verify_dirty(u_int *ptr)
250{
251 #ifndef HAVE_ARMV7
252 u_int offset;
253 // get from literal pool
254 assert((*ptr&0xFFFF0000)==0xe59f0000);
255 offset=*ptr&0xfff;
256 u_int source=*(u_int*)((void *)ptr+offset+8);
257 ptr++;
258 assert((*ptr&0xFFFF0000)==0xe59f0000);
259 offset=*ptr&0xfff;
260 u_int copy=*(u_int*)((void *)ptr+offset+8);
261 ptr++;
262 assert((*ptr&0xFFFF0000)==0xe59f0000);
263 offset=*ptr&0xfff;
264 u_int len=*(u_int*)((void *)ptr+offset+8);
265 ptr++;
266 ptr++;
267 #else
268 // ARMv7 movw/movt
269 assert((*ptr&0xFFF00000)==0xe3000000);
270 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
271 u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
272 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
273 ptr+=6;
274 #endif
275 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
276 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
277 //printf("verify_dirty: %x %x %x\n",source,copy,len);
278 return !memcmp((void *)source,(void *)copy,len);
279}
280
281// This doesn't necessarily find all clean entry points, just
282// guarantees that it's not dirty
283static int isclean(void *addr)
284{
285 #ifndef HAVE_ARMV7
286 u_int *ptr=((u_int *)addr)+4;
287 #else
288 u_int *ptr=((u_int *)addr)+6;
289 #endif
290 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
291 if((*ptr&0xFF000000)!=0xeb000000) return 1; // bl instruction
292 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code) return 0;
293 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_vm) return 0;
294 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_ds) return 0;
295 return 1;
296}
297
298// get source that block at addr was compiled from (host pointers)
299static void get_bounds(void *addr, u_char **start, u_char **end)
300{
301 u_int *ptr = addr;
302 #ifndef HAVE_ARMV7
303 u_int offset;
304 // get from literal pool
305 assert((*ptr&0xFFFF0000)==0xe59f0000);
306 offset=*ptr&0xfff;
307 u_int source=*(u_int*)((void *)ptr+offset+8);
308 ptr++;
309 //assert((*ptr&0xFFFF0000)==0xe59f0000);
310 //offset=*ptr&0xfff;
311 //u_int copy=*(u_int*)((void *)ptr+offset+8);
312 ptr++;
313 assert((*ptr&0xFFFF0000)==0xe59f0000);
314 offset=*ptr&0xfff;
315 u_int len=*(u_int*)((void *)ptr+offset+8);
316 ptr++;
317 ptr++;
318 #else
319 // ARMv7 movw/movt
320 assert((*ptr&0xFFF00000)==0xe3000000);
321 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
322 //u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
323 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
324 ptr+=6;
325 #endif
326 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
327 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
328 *start=(u_char *)source;
329 *end=(u_char *)source+len;
330}
331
332/* Register allocation */
333
334// Note: registers are allocated clean (unmodified state)
335// if you intend to modify the register, you must call dirty_reg().
336static void alloc_reg(struct regstat *cur,int i,signed char reg)
337{
338 int r,hr;
339 int preferred_reg = (reg&7);
340 if(reg==CCREG) preferred_reg=HOST_CCREG;
341 if(reg==PTEMP||reg==FTEMP) preferred_reg=12;
342
343 // Don't allocate unused registers
344 if((cur->u>>reg)&1) return;
345
346 // see if it's already allocated
347 for(hr=0;hr<HOST_REGS;hr++)
348 {
349 if(cur->regmap[hr]==reg) return;
350 }
351
352 // Keep the same mapping if the register was already allocated in a loop
353 preferred_reg = loop_reg(i,reg,preferred_reg);
354
355 // Try to allocate the preferred register
356 if(cur->regmap[preferred_reg]==-1) {
357 cur->regmap[preferred_reg]=reg;
358 cur->dirty&=~(1<<preferred_reg);
359 cur->isconst&=~(1<<preferred_reg);
360 return;
361 }
362 r=cur->regmap[preferred_reg];
363 assert(r < 64);
364 if((cur->u>>r)&1) {
365 cur->regmap[preferred_reg]=reg;
366 cur->dirty&=~(1<<preferred_reg);
367 cur->isconst&=~(1<<preferred_reg);
368 return;
369 }
370
371 // Clear any unneeded registers
372 // We try to keep the mapping consistent, if possible, because it
373 // makes branches easier (especially loops). So we try to allocate
374 // first (see above) before removing old mappings. If this is not
375 // possible then go ahead and clear out the registers that are no
376 // longer needed.
377 for(hr=0;hr<HOST_REGS;hr++)
378 {
379 r=cur->regmap[hr];
380 if(r>=0) {
381 assert(r < 64);
382 if((cur->u>>r)&1) {cur->regmap[hr]=-1;break;}
383 }
384 }
385 // Try to allocate any available register, but prefer
386 // registers that have not been used recently.
387 if(i>0) {
388 for(hr=0;hr<HOST_REGS;hr++) {
389 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
390 if(regs[i-1].regmap[hr]!=rs1[i-1]&&regs[i-1].regmap[hr]!=rs2[i-1]&&regs[i-1].regmap[hr]!=rt1[i-1]&&regs[i-1].regmap[hr]!=rt2[i-1]) {
391 cur->regmap[hr]=reg;
392 cur->dirty&=~(1<<hr);
393 cur->isconst&=~(1<<hr);
394 return;
395 }
396 }
397 }
398 }
399 // Try to allocate any available register
400 for(hr=0;hr<HOST_REGS;hr++) {
401 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
402 cur->regmap[hr]=reg;
403 cur->dirty&=~(1<<hr);
404 cur->isconst&=~(1<<hr);
405 return;
406 }
407 }
408
409 // Ok, now we have to evict someone
410 // Pick a register we hopefully won't need soon
411 u_char hsn[MAXREG+1];
412 memset(hsn,10,sizeof(hsn));
413 int j;
414 lsn(hsn,i,&preferred_reg);
415 //printf("eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",cur->regmap[0],cur->regmap[1],cur->regmap[2],cur->regmap[3],cur->regmap[5],cur->regmap[6],cur->regmap[7]);
416 //printf("hsn(%x): %d %d %d %d %d %d %d\n",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
417 if(i>0) {
418 // Don't evict the cycle count at entry points, otherwise the entry
419 // stub will have to write it.
420 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
421 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP)) hsn[CCREG]=2;
422 for(j=10;j>=3;j--)
423 {
424 // Alloc preferred register if available
425 if(hsn[r=cur->regmap[preferred_reg]&63]==j) {
426 for(hr=0;hr<HOST_REGS;hr++) {
427 // Evict both parts of a 64-bit register
428 if((cur->regmap[hr]&63)==r) {
429 cur->regmap[hr]=-1;
430 cur->dirty&=~(1<<hr);
431 cur->isconst&=~(1<<hr);
432 }
433 }
434 cur->regmap[preferred_reg]=reg;
435 return;
436 }
437 for(r=1;r<=MAXREG;r++)
438 {
439 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
440 for(hr=0;hr<HOST_REGS;hr++) {
441 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
442 if(cur->regmap[hr]==r+64) {
443 cur->regmap[hr]=reg;
444 cur->dirty&=~(1<<hr);
445 cur->isconst&=~(1<<hr);
446 return;
447 }
448 }
449 }
450 for(hr=0;hr<HOST_REGS;hr++) {
451 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
452 if(cur->regmap[hr]==r) {
453 cur->regmap[hr]=reg;
454 cur->dirty&=~(1<<hr);
455 cur->isconst&=~(1<<hr);
456 return;
457 }
458 }
459 }
460 }
461 }
462 }
463 }
464 for(j=10;j>=0;j--)
465 {
466 for(r=1;r<=MAXREG;r++)
467 {
468 if(hsn[r]==j) {
469 for(hr=0;hr<HOST_REGS;hr++) {
470 if(cur->regmap[hr]==r+64) {
471 cur->regmap[hr]=reg;
472 cur->dirty&=~(1<<hr);
473 cur->isconst&=~(1<<hr);
474 return;
475 }
476 }
477 for(hr=0;hr<HOST_REGS;hr++) {
478 if(cur->regmap[hr]==r) {
479 cur->regmap[hr]=reg;
480 cur->dirty&=~(1<<hr);
481 cur->isconst&=~(1<<hr);
482 return;
483 }
484 }
485 }
486 }
487 }
488 SysPrintf("This shouldn't happen (alloc_reg)");exit(1);
489}
490
491// Allocate a temporary register. This is done without regard to
492// dirty status or whether the register we request is on the unneeded list
493// Note: This will only allocate one register, even if called multiple times
494static void alloc_reg_temp(struct regstat *cur,int i,signed char reg)
495{
496 int r,hr;
497 int preferred_reg = -1;
498
499 // see if it's already allocated
500 for(hr=0;hr<HOST_REGS;hr++)
501 {
502 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==reg) return;
503 }
504
505 // Try to allocate any available register
506 for(hr=HOST_REGS-1;hr>=0;hr--) {
507 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
508 cur->regmap[hr]=reg;
509 cur->dirty&=~(1<<hr);
510 cur->isconst&=~(1<<hr);
511 return;
512 }
513 }
514
515 // Find an unneeded register
516 for(hr=HOST_REGS-1;hr>=0;hr--)
517 {
518 r=cur->regmap[hr];
519 if(r>=0) {
520 assert(r < 64);
521 if((cur->u>>r)&1) {
522 if(i==0||((unneeded_reg[i-1]>>r)&1)) {
523 cur->regmap[hr]=reg;
524 cur->dirty&=~(1<<hr);
525 cur->isconst&=~(1<<hr);
526 return;
527 }
528 }
529 }
530 }
531
532 // Ok, now we have to evict someone
533 // Pick a register we hopefully won't need soon
534 // TODO: we might want to follow unconditional jumps here
535 // TODO: get rid of dupe code and make this into a function
536 u_char hsn[MAXREG+1];
537 memset(hsn,10,sizeof(hsn));
538 int j;
539 lsn(hsn,i,&preferred_reg);
540 //printf("hsn: %d %d %d %d %d %d %d\n",hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
541 if(i>0) {
542 // Don't evict the cycle count at entry points, otherwise the entry
543 // stub will have to write it.
544 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
545 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP)) hsn[CCREG]=2;
546 for(j=10;j>=3;j--)
547 {
548 for(r=1;r<=MAXREG;r++)
549 {
550 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
551 for(hr=0;hr<HOST_REGS;hr++) {
552 if(hr!=HOST_CCREG||hsn[CCREG]>2) {
553 if(cur->regmap[hr]==r+64) {
554 cur->regmap[hr]=reg;
555 cur->dirty&=~(1<<hr);
556 cur->isconst&=~(1<<hr);
557 return;
558 }
559 }
560 }
561 for(hr=0;hr<HOST_REGS;hr++) {
562 if(hr!=HOST_CCREG||hsn[CCREG]>2) {
563 if(cur->regmap[hr]==r) {
564 cur->regmap[hr]=reg;
565 cur->dirty&=~(1<<hr);
566 cur->isconst&=~(1<<hr);
567 return;
568 }
569 }
570 }
571 }
572 }
573 }
574 }
575 for(j=10;j>=0;j--)
576 {
577 for(r=1;r<=MAXREG;r++)
578 {
579 if(hsn[r]==j) {
580 for(hr=0;hr<HOST_REGS;hr++) {
581 if(cur->regmap[hr]==r+64) {
582 cur->regmap[hr]=reg;
583 cur->dirty&=~(1<<hr);
584 cur->isconst&=~(1<<hr);
585 return;
586 }
587 }
588 for(hr=0;hr<HOST_REGS;hr++) {
589 if(cur->regmap[hr]==r) {
590 cur->regmap[hr]=reg;
591 cur->dirty&=~(1<<hr);
592 cur->isconst&=~(1<<hr);
593 return;
594 }
595 }
596 }
597 }
598 }
599 SysPrintf("This shouldn't happen");exit(1);
600}
601
602// Allocate a specific ARM register.
603static void alloc_arm_reg(struct regstat *cur,int i,signed char reg,int hr)
604{
605 int n;
606 int dirty=0;
607
608 // see if it's already allocated (and dealloc it)
609 for(n=0;n<HOST_REGS;n++)
610 {
611 if(n!=EXCLUDE_REG&&cur->regmap[n]==reg) {
612 dirty=(cur->dirty>>n)&1;
613 cur->regmap[n]=-1;
614 }
615 }
616
617 cur->regmap[hr]=reg;
618 cur->dirty&=~(1<<hr);
619 cur->dirty|=dirty<<hr;
620 cur->isconst&=~(1<<hr);
621}
622
623// Alloc cycle count into dedicated register
624static void alloc_cc(struct regstat *cur,int i)
625{
626 alloc_arm_reg(cur,i,CCREG,HOST_CCREG);
627}
628
629/* Special alloc */
630
631
632/* Assembler */
633
634static unused char regname[16][4] = {
635 "r0",
636 "r1",
637 "r2",
638 "r3",
639 "r4",
640 "r5",
641 "r6",
642 "r7",
643 "r8",
644 "r9",
645 "r10",
646 "fp",
647 "r12",
648 "sp",
649 "lr",
650 "pc"};
651
652static void output_w32(u_int word)
653{
654 *((u_int *)out)=word;
655 out+=4;
656}
657
658static u_int rd_rn_rm(u_int rd, u_int rn, u_int rm)
659{
660 assert(rd<16);
661 assert(rn<16);
662 assert(rm<16);
663 return((rn<<16)|(rd<<12)|rm);
664}
665
666static u_int rd_rn_imm_shift(u_int rd, u_int rn, u_int imm, u_int shift)
667{
668 assert(rd<16);
669 assert(rn<16);
670 assert(imm<256);
671 assert((shift&1)==0);
672 return((rn<<16)|(rd<<12)|(((32-shift)&30)<<7)|imm);
673}
674
675static u_int genimm(u_int imm,u_int *encoded)
676{
677 *encoded=0;
678 if(imm==0) return 1;
679 int i=32;
680 while(i>0)
681 {
682 if(imm<256) {
683 *encoded=((i&30)<<7)|imm;
684 return 1;
685 }
686 imm=(imm>>2)|(imm<<30);i-=2;
687 }
688 return 0;
689}
690
691static void genimm_checked(u_int imm,u_int *encoded)
692{
693 u_int ret=genimm(imm,encoded);
694 assert(ret);
695 (void)ret;
696}
697
698static u_int genjmp(u_int addr)
699{
700 int offset=addr-(int)out-8;
701 if(offset<-33554432||offset>=33554432) {
702 if (addr>2) {
703 SysPrintf("genjmp: out of range: %08x\n", offset);
704 exit(1);
705 }
706 return 0;
707 }
708 return ((u_int)offset>>2)&0xffffff;
709}
710
711static void emit_mov(int rs,int rt)
712{
713 assem_debug("mov %s,%s\n",regname[rt],regname[rs]);
714 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs));
715}
716
717static void emit_movs(int rs,int rt)
718{
719 assem_debug("movs %s,%s\n",regname[rt],regname[rs]);
720 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs));
721}
722
723static void emit_add(int rs1,int rs2,int rt)
724{
725 assem_debug("add %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
726 output_w32(0xe0800000|rd_rn_rm(rt,rs1,rs2));
727}
728
729static void emit_adds(int rs1,int rs2,int rt)
730{
731 assem_debug("adds %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
732 output_w32(0xe0900000|rd_rn_rm(rt,rs1,rs2));
733}
734
735static void emit_adcs(int rs1,int rs2,int rt)
736{
737 assem_debug("adcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
738 output_w32(0xe0b00000|rd_rn_rm(rt,rs1,rs2));
739}
740
741static void emit_neg(int rs, int rt)
742{
743 assem_debug("rsb %s,%s,#0\n",regname[rt],regname[rs]);
744 output_w32(0xe2600000|rd_rn_rm(rt,rs,0));
745}
746
747static void emit_sub(int rs1,int rs2,int rt)
748{
749 assem_debug("sub %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
750 output_w32(0xe0400000|rd_rn_rm(rt,rs1,rs2));
751}
752
753static void emit_zeroreg(int rt)
754{
755 assem_debug("mov %s,#0\n",regname[rt]);
756 output_w32(0xe3a00000|rd_rn_rm(rt,0,0));
757}
758
759static void emit_loadlp(u_int imm,u_int rt)
760{
761 add_literal((int)out,imm);
762 assem_debug("ldr %s,pc+? [=%x]\n",regname[rt],imm);
763 output_w32(0xe5900000|rd_rn_rm(rt,15,0));
764}
765
766static void emit_movw(u_int imm,u_int rt)
767{
768 assert(imm<65536);
769 assem_debug("movw %s,#%d (0x%x)\n",regname[rt],imm,imm);
770 output_w32(0xe3000000|rd_rn_rm(rt,0,0)|(imm&0xfff)|((imm<<4)&0xf0000));
771}
772
773static void emit_movt(u_int imm,u_int rt)
774{
775 assem_debug("movt %s,#%d (0x%x)\n",regname[rt],imm&0xffff0000,imm&0xffff0000);
776 output_w32(0xe3400000|rd_rn_rm(rt,0,0)|((imm>>16)&0xfff)|((imm>>12)&0xf0000));
777}
778
779static void emit_movimm(u_int imm,u_int rt)
780{
781 u_int armval;
782 if(genimm(imm,&armval)) {
783 assem_debug("mov %s,#%d\n",regname[rt],imm);
784 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
785 }else if(genimm(~imm,&armval)) {
786 assem_debug("mvn %s,#%d\n",regname[rt],imm);
787 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
788 }else if(imm<65536) {
789 #ifndef HAVE_ARMV7
790 assem_debug("mov %s,#%d\n",regname[rt],imm&0xFF00);
791 output_w32(0xe3a00000|rd_rn_imm_shift(rt,0,imm>>8,8));
792 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
793 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
794 #else
795 emit_movw(imm,rt);
796 #endif
797 }else{
798 #ifndef HAVE_ARMV7
799 emit_loadlp(imm,rt);
800 #else
801 emit_movw(imm&0x0000FFFF,rt);
802 emit_movt(imm&0xFFFF0000,rt);
803 #endif
804 }
805}
806
807static void emit_pcreladdr(u_int rt)
808{
809 assem_debug("add %s,pc,#?\n",regname[rt]);
810 output_w32(0xe2800000|rd_rn_rm(rt,15,0));
811}
812
813static void emit_loadreg(int r, int hr)
814{
815 if(r&64) {
816 SysPrintf("64bit load in 32bit mode!\n");
817 assert(0);
818 return;
819 }
820 if((r&63)==0)
821 emit_zeroreg(hr);
822 else {
823 int addr=((int)reg)+((r&63)<<REG_SHIFT)+((r&64)>>4);
824 if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4);
825 if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4);
826 if(r==CCREG) addr=(int)&cycle_count;
827 if(r==CSREG) addr=(int)&Status;
828 if(r==INVCP) addr=(int)&invc_ptr;
829 u_int offset = addr-(u_int)&dynarec_local;
830 assert(offset<4096);
831 assem_debug("ldr %s,fp+%d\n",regname[hr],offset);
832 output_w32(0xe5900000|rd_rn_rm(hr,FP,0)|offset);
833 }
834}
835
836static void emit_storereg(int r, int hr)
837{
838 if(r&64) {
839 SysPrintf("64bit store in 32bit mode!\n");
840 assert(0);
841 return;
842 }
843 int addr=((int)reg)+((r&63)<<REG_SHIFT)+((r&64)>>4);
844 if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4);
845 if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4);
846 if(r==CCREG) addr=(int)&cycle_count;
847 u_int offset = addr-(u_int)&dynarec_local;
848 assert(offset<4096);
849 assem_debug("str %s,fp+%d\n",regname[hr],offset);
850 output_w32(0xe5800000|rd_rn_rm(hr,FP,0)|offset);
851}
852
853static void emit_test(int rs, int rt)
854{
855 assem_debug("tst %s,%s\n",regname[rs],regname[rt]);
856 output_w32(0xe1100000|rd_rn_rm(0,rs,rt));
857}
858
859static void emit_testimm(int rs,int imm)
860{
861 u_int armval;
862 assem_debug("tst %s,#%d\n",regname[rs],imm);
863 genimm_checked(imm,&armval);
864 output_w32(0xe3100000|rd_rn_rm(0,rs,0)|armval);
865}
866
867static void emit_testeqimm(int rs,int imm)
868{
869 u_int armval;
870 assem_debug("tsteq %s,$%d\n",regname[rs],imm);
871 genimm_checked(imm,&armval);
872 output_w32(0x03100000|rd_rn_rm(0,rs,0)|armval);
873}
874
875static void emit_not(int rs,int rt)
876{
877 assem_debug("mvn %s,%s\n",regname[rt],regname[rs]);
878 output_w32(0xe1e00000|rd_rn_rm(rt,0,rs));
879}
880
881static void emit_mvnmi(int rs,int rt)
882{
883 assem_debug("mvnmi %s,%s\n",regname[rt],regname[rs]);
884 output_w32(0x41e00000|rd_rn_rm(rt,0,rs));
885}
886
887static void emit_and(u_int rs1,u_int rs2,u_int rt)
888{
889 assem_debug("and %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
890 output_w32(0xe0000000|rd_rn_rm(rt,rs1,rs2));
891}
892
893static void emit_or(u_int rs1,u_int rs2,u_int rt)
894{
895 assem_debug("orr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
896 output_w32(0xe1800000|rd_rn_rm(rt,rs1,rs2));
897}
898
899static void emit_orrshl_imm(u_int rs,u_int imm,u_int rt)
900{
901 assert(rs<16);
902 assert(rt<16);
903 assert(imm<32);
904 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs],imm);
905 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|(imm<<7));
906}
907
908static void emit_orrshr_imm(u_int rs,u_int imm,u_int rt)
909{
910 assert(rs<16);
911 assert(rt<16);
912 assert(imm<32);
913 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs],imm);
914 output_w32(0xe1800020|rd_rn_rm(rt,rt,rs)|(imm<<7));
915}
916
917static void emit_xor(u_int rs1,u_int rs2,u_int rt)
918{
919 assem_debug("eor %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
920 output_w32(0xe0200000|rd_rn_rm(rt,rs1,rs2));
921}
922
923static void emit_addimm(u_int rs,int imm,u_int rt)
924{
925 assert(rs<16);
926 assert(rt<16);
927 if(imm!=0) {
928 u_int armval;
929 if(genimm(imm,&armval)) {
930 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],imm);
931 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
932 }else if(genimm(-imm,&armval)) {
933 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],-imm);
934 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
935 #ifdef HAVE_ARMV7
936 }else if(rt!=rs&&(u_int)imm<65536) {
937 emit_movw(imm&0x0000ffff,rt);
938 emit_add(rs,rt,rt);
939 }else if(rt!=rs&&(u_int)-imm<65536) {
940 emit_movw(-imm&0x0000ffff,rt);
941 emit_sub(rs,rt,rt);
942 #endif
943 }else if((u_int)-imm<65536) {
944 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],(-imm)&0xFF00);
945 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
946 output_w32(0xe2400000|rd_rn_imm_shift(rt,rs,(-imm)>>8,8));
947 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
948 }else {
949 do {
950 int shift = (ffs(imm) - 1) & ~1;
951 int imm8 = imm & (0xff << shift);
952 genimm_checked(imm8,&armval);
953 assem_debug("add %s,%s,#0x%x\n",regname[rt],regname[rs],imm8);
954 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
955 rs = rt;
956 imm &= ~imm8;
957 }
958 while (imm != 0);
959 }
960 }
961 else if(rs!=rt) emit_mov(rs,rt);
962}
963
964static void emit_addimm_and_set_flags(int imm,int rt)
965{
966 assert(imm>-65536&&imm<65536);
967 u_int armval;
968 if(genimm(imm,&armval)) {
969 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm);
970 output_w32(0xe2900000|rd_rn_rm(rt,rt,0)|armval);
971 }else if(genimm(-imm,&armval)) {
972 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],imm);
973 output_w32(0xe2500000|rd_rn_rm(rt,rt,0)|armval);
974 }else if(imm<0) {
975 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF00);
976 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
977 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)>>8,8));
978 output_w32(0xe2500000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
979 }else{
980 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF00);
981 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
982 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm>>8,8));
983 output_w32(0xe2900000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
984 }
985}
986
987static void emit_addimm_no_flags(u_int imm,u_int rt)
988{
989 emit_addimm(rt,imm,rt);
990}
991
992static void emit_addnop(u_int r)
993{
994 assert(r<16);
995 assem_debug("add %s,%s,#0 (nop)\n",regname[r],regname[r]);
996 output_w32(0xe2800000|rd_rn_rm(r,r,0));
997}
998
999static void emit_adcimm(u_int rs,int imm,u_int rt)
1000{
1001 u_int armval;
1002 genimm_checked(imm,&armval);
1003 assem_debug("adc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1004 output_w32(0xe2a00000|rd_rn_rm(rt,rs,0)|armval);
1005}
1006
1007static void emit_addimm64_32(int rsh,int rsl,int imm,int rth,int rtl)
1008{
1009 // TODO: if(genimm(imm,&armval)) ...
1010 // else
1011 emit_movimm(imm,HOST_TEMPREG);
1012 emit_adds(HOST_TEMPREG,rsl,rtl);
1013 emit_adcimm(rsh,0,rth);
1014}
1015
1016static void emit_andimm(int rs,int imm,int rt)
1017{
1018 u_int armval;
1019 if(imm==0) {
1020 emit_zeroreg(rt);
1021 }else if(genimm(imm,&armval)) {
1022 assem_debug("and %s,%s,#%d\n",regname[rt],regname[rs],imm);
1023 output_w32(0xe2000000|rd_rn_rm(rt,rs,0)|armval);
1024 }else if(genimm(~imm,&armval)) {
1025 assem_debug("bic %s,%s,#%d\n",regname[rt],regname[rs],imm);
1026 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|armval);
1027 }else if(imm==65535) {
1028 #ifndef HAVE_ARMV6
1029 assem_debug("bic %s,%s,#FF000000\n",regname[rt],regname[rs]);
1030 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|0x4FF);
1031 assem_debug("bic %s,%s,#00FF0000\n",regname[rt],regname[rt]);
1032 output_w32(0xe3c00000|rd_rn_rm(rt,rt,0)|0x8FF);
1033 #else
1034 assem_debug("uxth %s,%s\n",regname[rt],regname[rs]);
1035 output_w32(0xe6ff0070|rd_rn_rm(rt,0,rs));
1036 #endif
1037 }else{
1038 assert(imm>0&&imm<65535);
1039 #ifndef HAVE_ARMV7
1040 assem_debug("mov r14,#%d\n",imm&0xFF00);
1041 output_w32(0xe3a00000|rd_rn_imm_shift(HOST_TEMPREG,0,imm>>8,8));
1042 assem_debug("add r14,r14,#%d\n",imm&0xFF);
1043 output_w32(0xe2800000|rd_rn_imm_shift(HOST_TEMPREG,HOST_TEMPREG,imm&0xff,0));
1044 #else
1045 emit_movw(imm,HOST_TEMPREG);
1046 #endif
1047 assem_debug("and %s,%s,r14\n",regname[rt],regname[rs]);
1048 output_w32(0xe0000000|rd_rn_rm(rt,rs,HOST_TEMPREG));
1049 }
1050}
1051
1052static void emit_orimm(int rs,int imm,int rt)
1053{
1054 u_int armval;
1055 if(imm==0) {
1056 if(rs!=rt) emit_mov(rs,rt);
1057 }else if(genimm(imm,&armval)) {
1058 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1059 output_w32(0xe3800000|rd_rn_rm(rt,rs,0)|armval);
1060 }else{
1061 assert(imm>0&&imm<65536);
1062 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1063 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
1064 output_w32(0xe3800000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1065 output_w32(0xe3800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1066 }
1067}
1068
1069static void emit_xorimm(int rs,int imm,int rt)
1070{
1071 u_int armval;
1072 if(imm==0) {
1073 if(rs!=rt) emit_mov(rs,rt);
1074 }else if(genimm(imm,&armval)) {
1075 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm);
1076 output_w32(0xe2200000|rd_rn_rm(rt,rs,0)|armval);
1077 }else{
1078 assert(imm>0&&imm<65536);
1079 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1080 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
1081 output_w32(0xe2200000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1082 output_w32(0xe2200000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1083 }
1084}
1085
1086static void emit_shlimm(int rs,u_int imm,int rt)
1087{
1088 assert(imm>0);
1089 assert(imm<32);
1090 //if(imm==1) ...
1091 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1092 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1093}
1094
1095static void emit_lsls_imm(int rs,int imm,int rt)
1096{
1097 assert(imm>0);
1098 assert(imm<32);
1099 assem_debug("lsls %s,%s,#%d\n",regname[rt],regname[rs],imm);
1100 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1101}
1102
1103static unused void emit_lslpls_imm(int rs,int imm,int rt)
1104{
1105 assert(imm>0);
1106 assert(imm<32);
1107 assem_debug("lslpls %s,%s,#%d\n",regname[rt],regname[rs],imm);
1108 output_w32(0x51b00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1109}
1110
1111static void emit_shrimm(int rs,u_int imm,int rt)
1112{
1113 assert(imm>0);
1114 assert(imm<32);
1115 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1116 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
1117}
1118
1119static void emit_sarimm(int rs,u_int imm,int rt)
1120{
1121 assert(imm>0);
1122 assert(imm<32);
1123 assem_debug("asr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1124 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x40|(imm<<7));
1125}
1126
1127static void emit_rorimm(int rs,u_int imm,int rt)
1128{
1129 assert(imm>0);
1130 assert(imm<32);
1131 assem_debug("ror %s,%s,#%d\n",regname[rt],regname[rs],imm);
1132 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x60|(imm<<7));
1133}
1134
1135static void emit_signextend16(int rs,int rt)
1136{
1137 #ifndef HAVE_ARMV6
1138 emit_shlimm(rs,16,rt);
1139 emit_sarimm(rt,16,rt);
1140 #else
1141 assem_debug("sxth %s,%s\n",regname[rt],regname[rs]);
1142 output_w32(0xe6bf0070|rd_rn_rm(rt,0,rs));
1143 #endif
1144}
1145
1146static void emit_signextend8(int rs,int rt)
1147{
1148 #ifndef HAVE_ARMV6
1149 emit_shlimm(rs,24,rt);
1150 emit_sarimm(rt,24,rt);
1151 #else
1152 assem_debug("sxtb %s,%s\n",regname[rt],regname[rs]);
1153 output_w32(0xe6af0070|rd_rn_rm(rt,0,rs));
1154 #endif
1155}
1156
1157static void emit_shl(u_int rs,u_int shift,u_int rt)
1158{
1159 assert(rs<16);
1160 assert(rt<16);
1161 assert(shift<16);
1162 //if(imm==1) ...
1163 assem_debug("lsl %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1164 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x10|(shift<<8));
1165}
1166
1167static void emit_shr(u_int rs,u_int shift,u_int rt)
1168{
1169 assert(rs<16);
1170 assert(rt<16);
1171 assert(shift<16);
1172 assem_debug("lsr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1173 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x30|(shift<<8));
1174}
1175
1176static void emit_sar(u_int rs,u_int shift,u_int rt)
1177{
1178 assert(rs<16);
1179 assert(rt<16);
1180 assert(shift<16);
1181 assem_debug("asr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1182 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x50|(shift<<8));
1183}
1184
1185static void emit_orrshl(u_int rs,u_int shift,u_int rt)
1186{
1187 assert(rs<16);
1188 assert(rt<16);
1189 assert(shift<16);
1190 assem_debug("orr %s,%s,%s,lsl %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
1191 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x10|(shift<<8));
1192}
1193
1194static void emit_orrshr(u_int rs,u_int shift,u_int rt)
1195{
1196 assert(rs<16);
1197 assert(rt<16);
1198 assert(shift<16);
1199 assem_debug("orr %s,%s,%s,lsr %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
1200 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x30|(shift<<8));
1201}
1202
1203static void emit_cmpimm(int rs,int imm)
1204{
1205 u_int armval;
1206 if(genimm(imm,&armval)) {
1207 assem_debug("cmp %s,#%d\n",regname[rs],imm);
1208 output_w32(0xe3500000|rd_rn_rm(0,rs,0)|armval);
1209 }else if(genimm(-imm,&armval)) {
1210 assem_debug("cmn %s,#%d\n",regname[rs],imm);
1211 output_w32(0xe3700000|rd_rn_rm(0,rs,0)|armval);
1212 }else if(imm>0) {
1213 assert(imm<65536);
1214 emit_movimm(imm,HOST_TEMPREG);
1215 assem_debug("cmp %s,r14\n",regname[rs]);
1216 output_w32(0xe1500000|rd_rn_rm(0,rs,HOST_TEMPREG));
1217 }else{
1218 assert(imm>-65536);
1219 emit_movimm(-imm,HOST_TEMPREG);
1220 assem_debug("cmn %s,r14\n",regname[rs]);
1221 output_w32(0xe1700000|rd_rn_rm(0,rs,HOST_TEMPREG));
1222 }
1223}
1224
1225static void emit_cmovne_imm(int imm,int rt)
1226{
1227 assem_debug("movne %s,#%d\n",regname[rt],imm);
1228 u_int armval;
1229 genimm_checked(imm,&armval);
1230 output_w32(0x13a00000|rd_rn_rm(rt,0,0)|armval);
1231}
1232
1233static void emit_cmovl_imm(int imm,int rt)
1234{
1235 assem_debug("movlt %s,#%d\n",regname[rt],imm);
1236 u_int armval;
1237 genimm_checked(imm,&armval);
1238 output_w32(0xb3a00000|rd_rn_rm(rt,0,0)|armval);
1239}
1240
1241static void emit_cmovb_imm(int imm,int rt)
1242{
1243 assem_debug("movcc %s,#%d\n",regname[rt],imm);
1244 u_int armval;
1245 genimm_checked(imm,&armval);
1246 output_w32(0x33a00000|rd_rn_rm(rt,0,0)|armval);
1247}
1248
1249static void emit_cmovne_reg(int rs,int rt)
1250{
1251 assem_debug("movne %s,%s\n",regname[rt],regname[rs]);
1252 output_w32(0x11a00000|rd_rn_rm(rt,0,rs));
1253}
1254
1255static void emit_cmovl_reg(int rs,int rt)
1256{
1257 assem_debug("movlt %s,%s\n",regname[rt],regname[rs]);
1258 output_w32(0xb1a00000|rd_rn_rm(rt,0,rs));
1259}
1260
1261static void emit_cmovs_reg(int rs,int rt)
1262{
1263 assem_debug("movmi %s,%s\n",regname[rt],regname[rs]);
1264 output_w32(0x41a00000|rd_rn_rm(rt,0,rs));
1265}
1266
1267static void emit_slti32(int rs,int imm,int rt)
1268{
1269 if(rs!=rt) emit_zeroreg(rt);
1270 emit_cmpimm(rs,imm);
1271 if(rs==rt) emit_movimm(0,rt);
1272 emit_cmovl_imm(1,rt);
1273}
1274
1275static void emit_sltiu32(int rs,int imm,int rt)
1276{
1277 if(rs!=rt) emit_zeroreg(rt);
1278 emit_cmpimm(rs,imm);
1279 if(rs==rt) emit_movimm(0,rt);
1280 emit_cmovb_imm(1,rt);
1281}
1282
1283static void emit_cmp(int rs,int rt)
1284{
1285 assem_debug("cmp %s,%s\n",regname[rs],regname[rt]);
1286 output_w32(0xe1500000|rd_rn_rm(0,rs,rt));
1287}
1288
1289static void emit_set_gz32(int rs, int rt)
1290{
1291 //assem_debug("set_gz32\n");
1292 emit_cmpimm(rs,1);
1293 emit_movimm(1,rt);
1294 emit_cmovl_imm(0,rt);
1295}
1296
1297static void emit_set_nz32(int rs, int rt)
1298{
1299 //assem_debug("set_nz32\n");
1300 if(rs!=rt) emit_movs(rs,rt);
1301 else emit_test(rs,rs);
1302 emit_cmovne_imm(1,rt);
1303}
1304
1305static void emit_set_if_less32(int rs1, int rs2, int rt)
1306{
1307 //assem_debug("set if less (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1308 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1309 emit_cmp(rs1,rs2);
1310 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1311 emit_cmovl_imm(1,rt);
1312}
1313
1314static void emit_set_if_carry32(int rs1, int rs2, int rt)
1315{
1316 //assem_debug("set if carry (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1317 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1318 emit_cmp(rs1,rs2);
1319 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1320 emit_cmovb_imm(1,rt);
1321}
1322
1323#ifdef DRC_DBG
1324extern void gen_interupt();
1325extern void do_insn_cmp();
1326#define FUNCNAME(f) { (intptr_t)f, " " #f }
1327static const struct {
1328 intptr_t addr;
1329 const char *name;
1330} function_names[] = {
1331 FUNCNAME(cc_interrupt),
1332 FUNCNAME(gen_interupt),
1333 FUNCNAME(get_addr_ht),
1334 FUNCNAME(get_addr),
1335 FUNCNAME(jump_handler_read8),
1336 FUNCNAME(jump_handler_read16),
1337 FUNCNAME(jump_handler_read32),
1338 FUNCNAME(jump_handler_write8),
1339 FUNCNAME(jump_handler_write16),
1340 FUNCNAME(jump_handler_write32),
1341 FUNCNAME(invalidate_addr),
1342 FUNCNAME(verify_code_vm),
1343 FUNCNAME(verify_code),
1344 FUNCNAME(jump_hlecall),
1345 FUNCNAME(jump_syscall_hle),
1346 FUNCNAME(new_dyna_leave),
1347 FUNCNAME(pcsx_mtc0),
1348 FUNCNAME(pcsx_mtc0_ds),
1349 FUNCNAME(do_insn_cmp),
1350};
1351
1352static const char *func_name(intptr_t a)
1353{
1354 int i;
1355 for (i = 0; i < sizeof(function_names)/sizeof(function_names[0]); i++)
1356 if (function_names[i].addr == a)
1357 return function_names[i].name;
1358 return "";
1359}
1360#else
1361#define func_name(x) ""
1362#endif
1363
1364static void emit_call(const void *a_)
1365{
1366 int a = (int)a_;
1367 assem_debug("bl %x (%x+%x)%s\n",a,(int)out,a-(int)out-8,func_name(a));
1368 u_int offset=genjmp(a);
1369 output_w32(0xeb000000|offset);
1370}
1371
1372static void emit_jmp(const void *a_)
1373{
1374 int a = (int)a_;
1375 assem_debug("b %x (%x+%x)%s\n",a,(int)out,a-(int)out-8,func_name(a));
1376 u_int offset=genjmp(a);
1377 output_w32(0xea000000|offset);
1378}
1379
1380static void emit_jne(const void *a_)
1381{
1382 int a = (int)a_;
1383 assem_debug("bne %x\n",a);
1384 u_int offset=genjmp(a);
1385 output_w32(0x1a000000|offset);
1386}
1387
1388static void emit_jeq(int a)
1389{
1390 assem_debug("beq %x\n",a);
1391 u_int offset=genjmp(a);
1392 output_w32(0x0a000000|offset);
1393}
1394
1395static void emit_js(int a)
1396{
1397 assem_debug("bmi %x\n",a);
1398 u_int offset=genjmp(a);
1399 output_w32(0x4a000000|offset);
1400}
1401
1402static void emit_jns(int a)
1403{
1404 assem_debug("bpl %x\n",a);
1405 u_int offset=genjmp(a);
1406 output_w32(0x5a000000|offset);
1407}
1408
1409static void emit_jl(int a)
1410{
1411 assem_debug("blt %x\n",a);
1412 u_int offset=genjmp(a);
1413 output_w32(0xba000000|offset);
1414}
1415
1416static void emit_jge(int a)
1417{
1418 assem_debug("bge %x\n",a);
1419 u_int offset=genjmp(a);
1420 output_w32(0xaa000000|offset);
1421}
1422
1423static void emit_jno(int a)
1424{
1425 assem_debug("bvc %x\n",a);
1426 u_int offset=genjmp(a);
1427 output_w32(0x7a000000|offset);
1428}
1429
1430static void emit_jc(int a)
1431{
1432 assem_debug("bcs %x\n",a);
1433 u_int offset=genjmp(a);
1434 output_w32(0x2a000000|offset);
1435}
1436
1437static void emit_jcc(void *a_)
1438{
1439 int a = (int)a_;
1440 assem_debug("bcc %x\n",a);
1441 u_int offset=genjmp(a);
1442 output_w32(0x3a000000|offset);
1443}
1444
1445static void emit_callreg(u_int r)
1446{
1447 assert(r<15);
1448 assem_debug("blx %s\n",regname[r]);
1449 output_w32(0xe12fff30|r);
1450}
1451
1452static void emit_jmpreg(u_int r)
1453{
1454 assem_debug("mov pc,%s\n",regname[r]);
1455 output_w32(0xe1a00000|rd_rn_rm(15,0,r));
1456}
1457
1458static void emit_readword_indexed(int offset, int rs, int rt)
1459{
1460 assert(offset>-4096&&offset<4096);
1461 assem_debug("ldr %s,%s+%d\n",regname[rt],regname[rs],offset);
1462 if(offset>=0) {
1463 output_w32(0xe5900000|rd_rn_rm(rt,rs,0)|offset);
1464 }else{
1465 output_w32(0xe5100000|rd_rn_rm(rt,rs,0)|(-offset));
1466 }
1467}
1468
1469static void emit_readword_dualindexedx4(int rs1, int rs2, int rt)
1470{
1471 assem_debug("ldr %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1472 output_w32(0xe7900000|rd_rn_rm(rt,rs1,rs2)|0x100);
1473}
1474
1475static void emit_ldrcc_dualindexed(int rs1, int rs2, int rt)
1476{
1477 assem_debug("ldrcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1478 output_w32(0x37900000|rd_rn_rm(rt,rs1,rs2));
1479}
1480
1481static void emit_ldrccb_dualindexed(int rs1, int rs2, int rt)
1482{
1483 assem_debug("ldrccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1484 output_w32(0x37d00000|rd_rn_rm(rt,rs1,rs2));
1485}
1486
1487static void emit_ldrccsb_dualindexed(int rs1, int rs2, int rt)
1488{
1489 assem_debug("ldrccsb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1490 output_w32(0x319000d0|rd_rn_rm(rt,rs1,rs2));
1491}
1492
1493static void emit_ldrcch_dualindexed(int rs1, int rs2, int rt)
1494{
1495 assem_debug("ldrcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1496 output_w32(0x319000b0|rd_rn_rm(rt,rs1,rs2));
1497}
1498
1499static void emit_ldrccsh_dualindexed(int rs1, int rs2, int rt)
1500{
1501 assem_debug("ldrccsh %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1502 output_w32(0x319000f0|rd_rn_rm(rt,rs1,rs2));
1503}
1504
1505static void emit_movsbl_indexed(int offset, int rs, int rt)
1506{
1507 assert(offset>-256&&offset<256);
1508 assem_debug("ldrsb %s,%s+%d\n",regname[rt],regname[rs],offset);
1509 if(offset>=0) {
1510 output_w32(0xe1d000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1511 }else{
1512 output_w32(0xe15000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1513 }
1514}
1515
1516static void emit_movswl_indexed(int offset, int rs, int rt)
1517{
1518 assert(offset>-256&&offset<256);
1519 assem_debug("ldrsh %s,%s+%d\n",regname[rt],regname[rs],offset);
1520 if(offset>=0) {
1521 output_w32(0xe1d000f0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1522 }else{
1523 output_w32(0xe15000f0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1524 }
1525}
1526
1527static void emit_movzbl_indexed(int offset, int rs, int rt)
1528{
1529 assert(offset>-4096&&offset<4096);
1530 assem_debug("ldrb %s,%s+%d\n",regname[rt],regname[rs],offset);
1531 if(offset>=0) {
1532 output_w32(0xe5d00000|rd_rn_rm(rt,rs,0)|offset);
1533 }else{
1534 output_w32(0xe5500000|rd_rn_rm(rt,rs,0)|(-offset));
1535 }
1536}
1537
1538static void emit_movzwl_indexed(int offset, int rs, int rt)
1539{
1540 assert(offset>-256&&offset<256);
1541 assem_debug("ldrh %s,%s+%d\n",regname[rt],regname[rs],offset);
1542 if(offset>=0) {
1543 output_w32(0xe1d000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1544 }else{
1545 output_w32(0xe15000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1546 }
1547}
1548
1549static void emit_ldrd(int offset, int rs, int rt)
1550{
1551 assert(offset>-256&&offset<256);
1552 assem_debug("ldrd %s,%s+%d\n",regname[rt],regname[rs],offset);
1553 if(offset>=0) {
1554 output_w32(0xe1c000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1555 }else{
1556 output_w32(0xe14000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1557 }
1558}
1559
1560static void emit_readword(void *addr, int rt)
1561{
1562 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
1563 assert(offset<4096);
1564 assem_debug("ldr %s,fp+%d\n",regname[rt],offset);
1565 output_w32(0xe5900000|rd_rn_rm(rt,FP,0)|offset);
1566}
1567
1568static void emit_writeword_indexed(int rt, int offset, int rs)
1569{
1570 assert(offset>-4096&&offset<4096);
1571 assem_debug("str %s,%s+%d\n",regname[rt],regname[rs],offset);
1572 if(offset>=0) {
1573 output_w32(0xe5800000|rd_rn_rm(rt,rs,0)|offset);
1574 }else{
1575 output_w32(0xe5000000|rd_rn_rm(rt,rs,0)|(-offset));
1576 }
1577}
1578
1579static void emit_writehword_indexed(int rt, int offset, int rs)
1580{
1581 assert(offset>-256&&offset<256);
1582 assem_debug("strh %s,%s+%d\n",regname[rt],regname[rs],offset);
1583 if(offset>=0) {
1584 output_w32(0xe1c000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1585 }else{
1586 output_w32(0xe14000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1587 }
1588}
1589
1590static void emit_writebyte_indexed(int rt, int offset, int rs)
1591{
1592 assert(offset>-4096&&offset<4096);
1593 assem_debug("strb %s,%s+%d\n",regname[rt],regname[rs],offset);
1594 if(offset>=0) {
1595 output_w32(0xe5c00000|rd_rn_rm(rt,rs,0)|offset);
1596 }else{
1597 output_w32(0xe5400000|rd_rn_rm(rt,rs,0)|(-offset));
1598 }
1599}
1600
1601static void emit_strcc_dualindexed(int rs1, int rs2, int rt)
1602{
1603 assem_debug("strcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1604 output_w32(0x37800000|rd_rn_rm(rt,rs1,rs2));
1605}
1606
1607static void emit_strccb_dualindexed(int rs1, int rs2, int rt)
1608{
1609 assem_debug("strccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1610 output_w32(0x37c00000|rd_rn_rm(rt,rs1,rs2));
1611}
1612
1613static void emit_strcch_dualindexed(int rs1, int rs2, int rt)
1614{
1615 assem_debug("strcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1616 output_w32(0x318000b0|rd_rn_rm(rt,rs1,rs2));
1617}
1618
1619static void emit_writeword(int rt, void *addr)
1620{
1621 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
1622 assert(offset<4096);
1623 assem_debug("str %s,fp+%d\n",regname[rt],offset);
1624 output_w32(0xe5800000|rd_rn_rm(rt,FP,0)|offset);
1625}
1626
1627static void emit_umull(u_int rs1, u_int rs2, u_int hi, u_int lo)
1628{
1629 assem_debug("umull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
1630 assert(rs1<16);
1631 assert(rs2<16);
1632 assert(hi<16);
1633 assert(lo<16);
1634 output_w32(0xe0800090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
1635}
1636
1637static void emit_smull(u_int rs1, u_int rs2, u_int hi, u_int lo)
1638{
1639 assem_debug("smull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
1640 assert(rs1<16);
1641 assert(rs2<16);
1642 assert(hi<16);
1643 assert(lo<16);
1644 output_w32(0xe0c00090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
1645}
1646
1647static void emit_clz(int rs,int rt)
1648{
1649 assem_debug("clz %s,%s\n",regname[rt],regname[rs]);
1650 output_w32(0xe16f0f10|rd_rn_rm(rt,0,rs));
1651}
1652
1653static void emit_subcs(int rs1,int rs2,int rt)
1654{
1655 assem_debug("subcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1656 output_w32(0x20400000|rd_rn_rm(rt,rs1,rs2));
1657}
1658
1659static void emit_shrcc_imm(int rs,u_int imm,int rt)
1660{
1661 assert(imm>0);
1662 assert(imm<32);
1663 assem_debug("lsrcc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1664 output_w32(0x31a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
1665}
1666
1667static void emit_shrne_imm(int rs,u_int imm,int rt)
1668{
1669 assert(imm>0);
1670 assert(imm<32);
1671 assem_debug("lsrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
1672 output_w32(0x11a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
1673}
1674
1675static void emit_negmi(int rs, int rt)
1676{
1677 assem_debug("rsbmi %s,%s,#0\n",regname[rt],regname[rs]);
1678 output_w32(0x42600000|rd_rn_rm(rt,rs,0));
1679}
1680
1681static void emit_negsmi(int rs, int rt)
1682{
1683 assem_debug("rsbsmi %s,%s,#0\n",regname[rt],regname[rs]);
1684 output_w32(0x42700000|rd_rn_rm(rt,rs,0));
1685}
1686
1687static void emit_bic_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
1688{
1689 assem_debug("bic %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
1690 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
1691}
1692
1693static void emit_bic_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
1694{
1695 assem_debug("bic %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
1696 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
1697}
1698
1699static void emit_teq(int rs, int rt)
1700{
1701 assem_debug("teq %s,%s\n",regname[rs],regname[rt]);
1702 output_w32(0xe1300000|rd_rn_rm(0,rs,rt));
1703}
1704
1705static void emit_rsbimm(int rs, int imm, int rt)
1706{
1707 u_int armval;
1708 genimm_checked(imm,&armval);
1709 assem_debug("rsb %s,%s,#%d\n",regname[rt],regname[rs],imm);
1710 output_w32(0xe2600000|rd_rn_rm(rt,rs,0)|armval);
1711}
1712
1713// Load 2 immediates optimizing for small code size
1714static void emit_mov2imm_compact(int imm1,u_int rt1,int imm2,u_int rt2)
1715{
1716 emit_movimm(imm1,rt1);
1717 u_int armval;
1718 if(genimm(imm2-imm1,&armval)) {
1719 assem_debug("add %s,%s,#%d\n",regname[rt2],regname[rt1],imm2-imm1);
1720 output_w32(0xe2800000|rd_rn_rm(rt2,rt1,0)|armval);
1721 }else if(genimm(imm1-imm2,&armval)) {
1722 assem_debug("sub %s,%s,#%d\n",regname[rt2],regname[rt1],imm1-imm2);
1723 output_w32(0xe2400000|rd_rn_rm(rt2,rt1,0)|armval);
1724 }
1725 else emit_movimm(imm2,rt2);
1726}
1727
1728// Conditionally select one of two immediates, optimizing for small code size
1729// This will only be called if HAVE_CMOV_IMM is defined
1730static void emit_cmov2imm_e_ne_compact(int imm1,int imm2,u_int rt)
1731{
1732 u_int armval;
1733 if(genimm(imm2-imm1,&armval)) {
1734 emit_movimm(imm1,rt);
1735 assem_debug("addne %s,%s,#%d\n",regname[rt],regname[rt],imm2-imm1);
1736 output_w32(0x12800000|rd_rn_rm(rt,rt,0)|armval);
1737 }else if(genimm(imm1-imm2,&armval)) {
1738 emit_movimm(imm1,rt);
1739 assem_debug("subne %s,%s,#%d\n",regname[rt],regname[rt],imm1-imm2);
1740 output_w32(0x12400000|rd_rn_rm(rt,rt,0)|armval);
1741 }
1742 else {
1743 #ifndef HAVE_ARMV7
1744 emit_movimm(imm1,rt);
1745 add_literal((int)out,imm2);
1746 assem_debug("ldrne %s,pc+? [=%x]\n",regname[rt],imm2);
1747 output_w32(0x15900000|rd_rn_rm(rt,15,0));
1748 #else
1749 emit_movw(imm1&0x0000FFFF,rt);
1750 if((imm1&0xFFFF)!=(imm2&0xFFFF)) {
1751 assem_debug("movwne %s,#%d (0x%x)\n",regname[rt],imm2&0xFFFF,imm2&0xFFFF);
1752 output_w32(0x13000000|rd_rn_rm(rt,0,0)|(imm2&0xfff)|((imm2<<4)&0xf0000));
1753 }
1754 emit_movt(imm1&0xFFFF0000,rt);
1755 if((imm1&0xFFFF0000)!=(imm2&0xFFFF0000)) {
1756 assem_debug("movtne %s,#%d (0x%x)\n",regname[rt],imm2&0xffff0000,imm2&0xffff0000);
1757 output_w32(0x13400000|rd_rn_rm(rt,0,0)|((imm2>>16)&0xfff)|((imm2>>12)&0xf0000));
1758 }
1759 #endif
1760 }
1761}
1762
1763// special case for checking invalid_code
1764static void emit_cmpmem_indexedsr12_reg(int base,int r,int imm)
1765{
1766 assert(imm<128&&imm>=0);
1767 assert(r>=0&&r<16);
1768 assem_debug("ldrb lr,%s,%s lsr #12\n",regname[base],regname[r]);
1769 output_w32(0xe7d00000|rd_rn_rm(HOST_TEMPREG,base,r)|0x620);
1770 emit_cmpimm(HOST_TEMPREG,imm);
1771}
1772
1773static void emit_callne(int a)
1774{
1775 assem_debug("blne %x\n",a);
1776 u_int offset=genjmp(a);
1777 output_w32(0x1b000000|offset);
1778}
1779
1780// Used to preload hash table entries
1781static unused void emit_prefetchreg(int r)
1782{
1783 assem_debug("pld %s\n",regname[r]);
1784 output_w32(0xf5d0f000|rd_rn_rm(0,r,0));
1785}
1786
1787// Special case for mini_ht
1788static void emit_ldreq_indexed(int rs, u_int offset, int rt)
1789{
1790 assert(offset<4096);
1791 assem_debug("ldreq %s,[%s, #%d]\n",regname[rt],regname[rs],offset);
1792 output_w32(0x05900000|rd_rn_rm(rt,rs,0)|offset);
1793}
1794
1795static void emit_orrne_imm(int rs,int imm,int rt)
1796{
1797 u_int armval;
1798 genimm_checked(imm,&armval);
1799 assem_debug("orrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
1800 output_w32(0x13800000|rd_rn_rm(rt,rs,0)|armval);
1801}
1802
1803static void emit_andne_imm(int rs,int imm,int rt)
1804{
1805 u_int armval;
1806 genimm_checked(imm,&armval);
1807 assem_debug("andne %s,%s,#%d\n",regname[rt],regname[rs],imm);
1808 output_w32(0x12000000|rd_rn_rm(rt,rs,0)|armval);
1809}
1810
1811static unused void emit_addpl_imm(int rs,int imm,int rt)
1812{
1813 u_int armval;
1814 genimm_checked(imm,&armval);
1815 assem_debug("addpl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1816 output_w32(0x52800000|rd_rn_rm(rt,rs,0)|armval);
1817}
1818
1819static void emit_jno_unlikely(int a)
1820{
1821 //emit_jno(a);
1822 assem_debug("addvc pc,pc,#? (%x)\n",/*a-(int)out-8,*/a);
1823 output_w32(0x72800000|rd_rn_rm(15,15,0));
1824}
1825
1826static void save_regs_all(u_int reglist)
1827{
1828 int i;
1829 if(!reglist) return;
1830 assem_debug("stmia fp,{");
1831 for(i=0;i<16;i++)
1832 if(reglist&(1<<i))
1833 assem_debug("r%d,",i);
1834 assem_debug("}\n");
1835 output_w32(0xe88b0000|reglist);
1836}
1837
1838static void restore_regs_all(u_int reglist)
1839{
1840 int i;
1841 if(!reglist) return;
1842 assem_debug("ldmia fp,{");
1843 for(i=0;i<16;i++)
1844 if(reglist&(1<<i))
1845 assem_debug("r%d,",i);
1846 assem_debug("}\n");
1847 output_w32(0xe89b0000|reglist);
1848}
1849
1850// Save registers before function call
1851static void save_regs(u_int reglist)
1852{
1853 reglist&=CALLER_SAVE_REGS; // only save the caller-save registers, r0-r3, r12
1854 save_regs_all(reglist);
1855}
1856
1857// Restore registers after function call
1858static void restore_regs(u_int reglist)
1859{
1860 reglist&=CALLER_SAVE_REGS;
1861 restore_regs_all(reglist);
1862}
1863
1864/* Stubs/epilogue */
1865
1866static void literal_pool(int n)
1867{
1868 if(!literalcount) return;
1869 if(n) {
1870 if((int)out-literals[0][0]<4096-n) return;
1871 }
1872 u_int *ptr;
1873 int i;
1874 for(i=0;i<literalcount;i++)
1875 {
1876 u_int l_addr=(u_int)out;
1877 int j;
1878 for(j=0;j<i;j++) {
1879 if(literals[j][1]==literals[i][1]) {
1880 //printf("dup %08x\n",literals[i][1]);
1881 l_addr=literals[j][0];
1882 break;
1883 }
1884 }
1885 ptr=(u_int *)literals[i][0];
1886 u_int offset=l_addr-(u_int)ptr-8;
1887 assert(offset<4096);
1888 assert(!(offset&3));
1889 *ptr|=offset;
1890 if(l_addr==(u_int)out) {
1891 literals[i][0]=l_addr; // remember for dupes
1892 output_w32(literals[i][1]);
1893 }
1894 }
1895 literalcount=0;
1896}
1897
1898static void literal_pool_jumpover(int n)
1899{
1900 if(!literalcount) return;
1901 if(n) {
1902 if((int)out-literals[0][0]<4096-n) return;
1903 }
1904 void *jaddr = out;
1905 emit_jmp(0);
1906 literal_pool(0);
1907 set_jump_target(jaddr, out);
1908}
1909
1910static void emit_extjump2(u_char *addr, int target, void *linker)
1911{
1912 u_char *ptr=(u_char *)addr;
1913 assert((ptr[3]&0x0e)==0xa);
1914 (void)ptr;
1915
1916 emit_loadlp(target,0);
1917 emit_loadlp((u_int)addr,1);
1918 assert(addr>=translation_cache&&addr<(translation_cache+(1<<TARGET_SIZE_2)));
1919 //assert((target>=0x80000000&&target<0x80800000)||(target>0xA4000000&&target<0xA4001000));
1920//DEBUG >
1921#ifdef DEBUG_CYCLE_COUNT
1922 emit_readword(&last_count,ECX);
1923 emit_add(HOST_CCREG,ECX,HOST_CCREG);
1924 emit_readword(&next_interupt,ECX);
1925 emit_writeword(HOST_CCREG,&Count);
1926 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
1927 emit_writeword(ECX,&last_count);
1928#endif
1929//DEBUG <
1930 emit_jmp(linker);
1931}
1932
1933static void emit_extjump(void *addr, int target)
1934{
1935 emit_extjump2(addr, target, dyna_linker);
1936}
1937
1938static void emit_extjump_ds(void *addr, int target)
1939{
1940 emit_extjump2(addr, target, dyna_linker_ds);
1941}
1942
1943// put rt_val into rt, potentially making use of rs with value rs_val
1944static void emit_movimm_from(u_int rs_val,int rs,u_int rt_val,int rt)
1945{
1946 u_int armval;
1947 int diff;
1948 if(genimm(rt_val,&armval)) {
1949 assem_debug("mov %s,#%d\n",regname[rt],rt_val);
1950 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
1951 return;
1952 }
1953 if(genimm(~rt_val,&armval)) {
1954 assem_debug("mvn %s,#%d\n",regname[rt],rt_val);
1955 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
1956 return;
1957 }
1958 diff=rt_val-rs_val;
1959 if(genimm(diff,&armval)) {
1960 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],diff);
1961 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
1962 return;
1963 }else if(genimm(-diff,&armval)) {
1964 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],-diff);
1965 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
1966 return;
1967 }
1968 emit_movimm(rt_val,rt);
1969}
1970
1971// return 1 if above function can do it's job cheaply
1972static int is_similar_value(u_int v1,u_int v2)
1973{
1974 u_int xs;
1975 int diff;
1976 if(v1==v2) return 1;
1977 diff=v2-v1;
1978 for(xs=diff;xs!=0&&(xs&3)==0;xs>>=2)
1979 ;
1980 if(xs<0x100) return 1;
1981 for(xs=-diff;xs!=0&&(xs&3)==0;xs>>=2)
1982 ;
1983 if(xs<0x100) return 1;
1984 return 0;
1985}
1986
1987// trashes r2
1988static void pass_args(int a0, int a1)
1989{
1990 if(a0==1&&a1==0) {
1991 // must swap
1992 emit_mov(a0,2); emit_mov(a1,1); emit_mov(2,0);
1993 }
1994 else if(a0!=0&&a1==0) {
1995 emit_mov(a1,1);
1996 if (a0>=0) emit_mov(a0,0);
1997 }
1998 else {
1999 if(a0>=0&&a0!=0) emit_mov(a0,0);
2000 if(a1>=0&&a1!=1) emit_mov(a1,1);
2001 }
2002}
2003
2004static void mov_loadtype_adj(enum stub_type type,int rs,int rt)
2005{
2006 switch(type) {
2007 case LOADB_STUB: emit_signextend8(rs,rt); break;
2008 case LOADBU_STUB: emit_andimm(rs,0xff,rt); break;
2009 case LOADH_STUB: emit_signextend16(rs,rt); break;
2010 case LOADHU_STUB: emit_andimm(rs,0xffff,rt); break;
2011 case LOADW_STUB: if(rs!=rt) emit_mov(rs,rt); break;
2012 default: assert(0);
2013 }
2014}
2015
2016#include "pcsxmem.h"
2017#include "pcsxmem_inline.c"
2018
2019static void do_readstub(int n)
2020{
2021 assem_debug("do_readstub %x\n",start+stubs[n].a*4);
2022 literal_pool(256);
2023 set_jump_target(stubs[n].addr, out);
2024 enum stub_type type=stubs[n].type;
2025 int i=stubs[n].a;
2026 int rs=stubs[n].b;
2027 struct regstat *i_regs=(struct regstat *)stubs[n].c;
2028 u_int reglist=stubs[n].e;
2029 signed char *i_regmap=i_regs->regmap;
2030 int rt;
2031 if(itype[i]==C1LS||itype[i]==C2LS||itype[i]==LOADLR) {
2032 rt=get_reg(i_regmap,FTEMP);
2033 }else{
2034 rt=get_reg(i_regmap,rt1[i]);
2035 }
2036 assert(rs>=0);
2037 int r,temp=-1,temp2=HOST_TEMPREG,regs_saved=0;
2038 void *restore_jump = NULL;
2039 reglist|=(1<<rs);
2040 for(r=0;r<=12;r++) {
2041 if(((1<<r)&0x13ff)&&((1<<r)&reglist)==0) {
2042 temp=r; break;
2043 }
2044 }
2045 if(rt>=0&&rt1[i]!=0)
2046 reglist&=~(1<<rt);
2047 if(temp==-1) {
2048 save_regs(reglist);
2049 regs_saved=1;
2050 temp=(rs==0)?2:0;
2051 }
2052 if((regs_saved||(reglist&2)==0)&&temp!=1&&rs!=1)
2053 temp2=1;
2054 emit_readword(&mem_rtab,temp);
2055 emit_shrimm(rs,12,temp2);
2056 emit_readword_dualindexedx4(temp,temp2,temp2);
2057 emit_lsls_imm(temp2,1,temp2);
2058 if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) {
2059 switch(type) {
2060 case LOADB_STUB: emit_ldrccsb_dualindexed(temp2,rs,rt); break;
2061 case LOADBU_STUB: emit_ldrccb_dualindexed(temp2,rs,rt); break;
2062 case LOADH_STUB: emit_ldrccsh_dualindexed(temp2,rs,rt); break;
2063 case LOADHU_STUB: emit_ldrcch_dualindexed(temp2,rs,rt); break;
2064 case LOADW_STUB: emit_ldrcc_dualindexed(temp2,rs,rt); break;
2065 default: assert(0);
2066 }
2067 }
2068 if(regs_saved) {
2069 restore_jump=out;
2070 emit_jcc(0); // jump to reg restore
2071 }
2072 else
2073 emit_jcc(stubs[n].retaddr); // return address
2074
2075 if(!regs_saved)
2076 save_regs(reglist);
2077 void *handler=NULL;
2078 if(type==LOADB_STUB||type==LOADBU_STUB)
2079 handler=jump_handler_read8;
2080 if(type==LOADH_STUB||type==LOADHU_STUB)
2081 handler=jump_handler_read16;
2082 if(type==LOADW_STUB)
2083 handler=jump_handler_read32;
2084 assert(handler);
2085 pass_args(rs,temp2);
2086 int cc=get_reg(i_regmap,CCREG);
2087 if(cc<0)
2088 emit_loadreg(CCREG,2);
2089 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n].d+1),2);
2090 emit_call(handler);
2091 if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) {
2092 mov_loadtype_adj(type,0,rt);
2093 }
2094 if(restore_jump)
2095 set_jump_target(restore_jump, out);
2096 restore_regs(reglist);
2097 emit_jmp(stubs[n].retaddr); // return address
2098}
2099
2100// return memhandler, or get directly accessable address and return 0
2101static void *get_direct_memhandler(void *table,u_int addr,enum stub_type type,u_int *addr_host)
2102{
2103 u_int l1,l2=0;
2104 l1=((u_int *)table)[addr>>12];
2105 if((l1&(1<<31))==0) {
2106 u_int v=l1<<1;
2107 *addr_host=v+addr;
2108 return NULL;
2109 }
2110 else {
2111 l1<<=1;
2112 if(type==LOADB_STUB||type==LOADBU_STUB||type==STOREB_STUB)
2113 l2=((u_int *)l1)[0x1000/4 + 0x1000/2 + (addr&0xfff)];
2114 else if(type==LOADH_STUB||type==LOADHU_STUB||type==STOREH_STUB)
2115 l2=((u_int *)l1)[0x1000/4 + (addr&0xfff)/2];
2116 else
2117 l2=((u_int *)l1)[(addr&0xfff)/4];
2118 if((l2&(1<<31))==0) {
2119 u_int v=l2<<1;
2120 *addr_host=v+(addr&0xfff);
2121 return NULL;
2122 }
2123 return (void *)(l2<<1);
2124 }
2125}
2126
2127static void inline_readstub(enum stub_type type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
2128{
2129 int rs=get_reg(regmap,target);
2130 int rt=get_reg(regmap,target);
2131 if(rs<0) rs=get_reg(regmap,-1);
2132 assert(rs>=0);
2133 u_int host_addr=0,is_dynamic,far_call=0;
2134 void *handler;
2135 int cc=get_reg(regmap,CCREG);
2136 if(pcsx_direct_read(type,addr,CLOCK_ADJUST(adj+1),cc,target?rs:-1,rt))
2137 return;
2138 handler = get_direct_memhandler(mem_rtab, addr, type, &host_addr);
2139 if (handler == NULL) {
2140 if(rt<0||rt1[i]==0)
2141 return;
2142 if(addr!=host_addr)
2143 emit_movimm_from(addr,rs,host_addr,rs);
2144 switch(type) {
2145 case LOADB_STUB: emit_movsbl_indexed(0,rs,rt); break;
2146 case LOADBU_STUB: emit_movzbl_indexed(0,rs,rt); break;
2147 case LOADH_STUB: emit_movswl_indexed(0,rs,rt); break;
2148 case LOADHU_STUB: emit_movzwl_indexed(0,rs,rt); break;
2149 case LOADW_STUB: emit_readword_indexed(0,rs,rt); break;
2150 default: assert(0);
2151 }
2152 return;
2153 }
2154 is_dynamic=pcsxmem_is_handler_dynamic(addr);
2155 if(is_dynamic) {
2156 if(type==LOADB_STUB||type==LOADBU_STUB)
2157 handler=jump_handler_read8;
2158 if(type==LOADH_STUB||type==LOADHU_STUB)
2159 handler=jump_handler_read16;
2160 if(type==LOADW_STUB)
2161 handler=jump_handler_read32;
2162 }
2163
2164 // call a memhandler
2165 if(rt>=0&&rt1[i]!=0)
2166 reglist&=~(1<<rt);
2167 save_regs(reglist);
2168 if(target==0)
2169 emit_movimm(addr,0);
2170 else if(rs!=0)
2171 emit_mov(rs,0);
2172 int offset=(u_char *)handler-out-8;
2173 if(offset<-33554432||offset>=33554432) {
2174 // unreachable memhandler, a plugin func perhaps
2175 emit_movimm((u_int)handler,12);
2176 far_call=1;
2177 }
2178 if(cc<0)
2179 emit_loadreg(CCREG,2);
2180 if(is_dynamic) {
2181 emit_movimm(((u_int *)mem_rtab)[addr>>12]<<1,1);
2182 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2);
2183 }
2184 else {
2185 emit_readword(&last_count,3);
2186 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2);
2187 emit_add(2,3,2);
2188 emit_writeword(2,&Count);
2189 }
2190
2191 if(far_call)
2192 emit_callreg(12);
2193 else
2194 emit_call(handler);
2195
2196 if(rt>=0&&rt1[i]!=0) {
2197 switch(type) {
2198 case LOADB_STUB: emit_signextend8(0,rt); break;
2199 case LOADBU_STUB: emit_andimm(0,0xff,rt); break;
2200 case LOADH_STUB: emit_signextend16(0,rt); break;
2201 case LOADHU_STUB: emit_andimm(0,0xffff,rt); break;
2202 case LOADW_STUB: if(rt!=0) emit_mov(0,rt); break;
2203 default: assert(0);
2204 }
2205 }
2206 restore_regs(reglist);
2207}
2208
2209static void do_writestub(int n)
2210{
2211 assem_debug("do_writestub %x\n",start+stubs[n].a*4);
2212 literal_pool(256);
2213 set_jump_target(stubs[n].addr, out);
2214 enum stub_type type=stubs[n].type;
2215 int i=stubs[n].a;
2216 int rs=stubs[n].b;
2217 struct regstat *i_regs=(struct regstat *)stubs[n].c;
2218 u_int reglist=stubs[n].e;
2219 signed char *i_regmap=i_regs->regmap;
2220 int rt,r;
2221 if(itype[i]==C1LS||itype[i]==C2LS) {
2222 rt=get_reg(i_regmap,r=FTEMP);
2223 }else{
2224 rt=get_reg(i_regmap,r=rs2[i]);
2225 }
2226 assert(rs>=0);
2227 assert(rt>=0);
2228 int rtmp,temp=-1,temp2=HOST_TEMPREG,regs_saved=0;
2229 void *restore_jump = NULL;
2230 int reglist2=reglist|(1<<rs)|(1<<rt);
2231 for(rtmp=0;rtmp<=12;rtmp++) {
2232 if(((1<<rtmp)&0x13ff)&&((1<<rtmp)&reglist2)==0) {
2233 temp=rtmp; break;
2234 }
2235 }
2236 if(temp==-1) {
2237 save_regs(reglist);
2238 regs_saved=1;
2239 for(rtmp=0;rtmp<=3;rtmp++)
2240 if(rtmp!=rs&&rtmp!=rt)
2241 {temp=rtmp;break;}
2242 }
2243 if((regs_saved||(reglist2&8)==0)&&temp!=3&&rs!=3&&rt!=3)
2244 temp2=3;
2245 emit_readword(&mem_wtab,temp);
2246 emit_shrimm(rs,12,temp2);
2247 emit_readword_dualindexedx4(temp,temp2,temp2);
2248 emit_lsls_imm(temp2,1,temp2);
2249 switch(type) {
2250 case STOREB_STUB: emit_strccb_dualindexed(temp2,rs,rt); break;
2251 case STOREH_STUB: emit_strcch_dualindexed(temp2,rs,rt); break;
2252 case STOREW_STUB: emit_strcc_dualindexed(temp2,rs,rt); break;
2253 default: assert(0);
2254 }
2255 if(regs_saved) {
2256 restore_jump=out;
2257 emit_jcc(0); // jump to reg restore
2258 }
2259 else
2260 emit_jcc(stubs[n].retaddr); // return address (invcode check)
2261
2262 if(!regs_saved)
2263 save_regs(reglist);
2264 void *handler=NULL;
2265 switch(type) {
2266 case STOREB_STUB: handler=jump_handler_write8; break;
2267 case STOREH_STUB: handler=jump_handler_write16; break;
2268 case STOREW_STUB: handler=jump_handler_write32; break;
2269 default: assert(0);
2270 }
2271 assert(handler);
2272 pass_args(rs,rt);
2273 if(temp2!=3)
2274 emit_mov(temp2,3);
2275 int cc=get_reg(i_regmap,CCREG);
2276 if(cc<0)
2277 emit_loadreg(CCREG,2);
2278 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n].d+1),2);
2279 // returns new cycle_count
2280 emit_call(handler);
2281 emit_addimm(0,-CLOCK_ADJUST((int)stubs[n].d+1),cc<0?2:cc);
2282 if(cc<0)
2283 emit_storereg(CCREG,2);
2284 if(restore_jump)
2285 set_jump_target(restore_jump, out);
2286 restore_regs(reglist);
2287 emit_jmp(stubs[n].retaddr);
2288}
2289
2290static void inline_writestub(enum stub_type type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
2291{
2292 int rs=get_reg(regmap,-1);
2293 int rt=get_reg(regmap,target);
2294 assert(rs>=0);
2295 assert(rt>=0);
2296 u_int host_addr=0;
2297 void *handler = get_direct_memhandler(mem_wtab, addr, type, &host_addr);
2298 if (handler == NULL) {
2299 if(addr!=host_addr)
2300 emit_movimm_from(addr,rs,host_addr,rs);
2301 switch(type) {
2302 case STOREB_STUB: emit_writebyte_indexed(rt,0,rs); break;
2303 case STOREH_STUB: emit_writehword_indexed(rt,0,rs); break;
2304 case STOREW_STUB: emit_writeword_indexed(rt,0,rs); break;
2305 default: assert(0);
2306 }
2307 return;
2308 }
2309
2310 // call a memhandler
2311 save_regs(reglist);
2312 pass_args(rs,rt);
2313 int cc=get_reg(regmap,CCREG);
2314 if(cc<0)
2315 emit_loadreg(CCREG,2);
2316 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2);
2317 emit_movimm((u_int)handler,3);
2318 // returns new cycle_count
2319 emit_call(jump_handler_write_h);
2320 emit_addimm(0,-CLOCK_ADJUST(adj+1),cc<0?2:cc);
2321 if(cc<0)
2322 emit_storereg(CCREG,2);
2323 restore_regs(reglist);
2324}
2325
2326static void do_unalignedwritestub(int n)
2327{
2328 assem_debug("do_unalignedwritestub %x\n",start+stubs[n].a*4);
2329 literal_pool(256);
2330 set_jump_target(stubs[n].addr, out);
2331
2332 int i=stubs[n].a;
2333 struct regstat *i_regs=(struct regstat *)stubs[n].c;
2334 int addr=stubs[n].b;
2335 u_int reglist=stubs[n].e;
2336 signed char *i_regmap=i_regs->regmap;
2337 int temp2=get_reg(i_regmap,FTEMP);
2338 int rt;
2339 rt=get_reg(i_regmap,rs2[i]);
2340 assert(rt>=0);
2341 assert(addr>=0);
2342 assert(opcode[i]==0x2a||opcode[i]==0x2e); // SWL/SWR only implemented
2343 reglist|=(1<<addr);
2344 reglist&=~(1<<temp2);
2345
2346#if 1
2347 // don't bother with it and call write handler
2348 save_regs(reglist);
2349 pass_args(addr,rt);
2350 int cc=get_reg(i_regmap,CCREG);
2351 if(cc<0)
2352 emit_loadreg(CCREG,2);
2353 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n].d+1),2);
2354 emit_call((opcode[i]==0x2a?jump_handle_swl:jump_handle_swr));
2355 emit_addimm(0,-CLOCK_ADJUST((int)stubs[n].d+1),cc<0?2:cc);
2356 if(cc<0)
2357 emit_storereg(CCREG,2);
2358 restore_regs(reglist);
2359 emit_jmp(stubs[n].retaddr); // return address
2360#else
2361 emit_andimm(addr,0xfffffffc,temp2);
2362 emit_writeword(temp2,&address);
2363
2364 save_regs(reglist);
2365 emit_shrimm(addr,16,1);
2366 int cc=get_reg(i_regmap,CCREG);
2367 if(cc<0) {
2368 emit_loadreg(CCREG,2);
2369 }
2370 emit_movimm((u_int)readmem,0);
2371 emit_addimm(cc<0?2:cc,2*stubs[n].d+2,2);
2372 emit_call((int)&indirect_jump_indexed);
2373 restore_regs(reglist);
2374
2375 emit_readword(&readmem_dword,temp2);
2376 int temp=addr; //hmh
2377 emit_shlimm(addr,3,temp);
2378 emit_andimm(temp,24,temp);
2379#ifdef BIG_ENDIAN_MIPS
2380 if (opcode[i]==0x2e) // SWR
2381#else
2382 if (opcode[i]==0x2a) // SWL
2383#endif
2384 emit_xorimm(temp,24,temp);
2385 emit_movimm(-1,HOST_TEMPREG);
2386 if (opcode[i]==0x2a) { // SWL
2387 emit_bic_lsr(temp2,HOST_TEMPREG,temp,temp2);
2388 emit_orrshr(rt,temp,temp2);
2389 }else{
2390 emit_bic_lsl(temp2,HOST_TEMPREG,temp,temp2);
2391 emit_orrshl(rt,temp,temp2);
2392 }
2393 emit_readword(&address,addr);
2394 emit_writeword(temp2,&word);
2395 //save_regs(reglist); // don't need to, no state changes
2396 emit_shrimm(addr,16,1);
2397 emit_movimm((u_int)writemem,0);
2398 //emit_call((int)&indirect_jump_indexed);
2399 emit_mov(15,14);
2400 emit_readword_dualindexedx4(0,1,15);
2401 emit_readword(&Count,HOST_TEMPREG);
2402 emit_readword(&next_interupt,2);
2403 emit_addimm(HOST_TEMPREG,-2*stubs[n].d-2,HOST_TEMPREG);
2404 emit_writeword(2,&last_count);
2405 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2406 if(cc<0) {
2407 emit_storereg(CCREG,HOST_TEMPREG);
2408 }
2409 restore_regs(reglist);
2410 emit_jmp(stubs[n].retaddr); // return address
2411#endif
2412}
2413
2414static void do_invstub(int n)
2415{
2416 literal_pool(20);
2417 u_int reglist=stubs[n].a;
2418 set_jump_target(stubs[n].addr, out);
2419 save_regs(reglist);
2420 if(stubs[n].b!=0) emit_mov(stubs[n].b,0);
2421 emit_call(&invalidate_addr);
2422 restore_regs(reglist);
2423 emit_jmp(stubs[n].retaddr); // return address
2424}
2425
2426void *do_dirty_stub(int i)
2427{
2428 assem_debug("do_dirty_stub %x\n",start+i*4);
2429 u_int addr=(u_int)source;
2430 // Careful about the code output here, verify_dirty needs to parse it.
2431 #ifndef HAVE_ARMV7
2432 emit_loadlp(addr,1);
2433 emit_loadlp((int)copy,2);
2434 emit_loadlp(slen*4,3);
2435 #else
2436 emit_movw(addr&0x0000FFFF,1);
2437 emit_movw(((u_int)copy)&0x0000FFFF,2);
2438 emit_movt(addr&0xFFFF0000,1);
2439 emit_movt(((u_int)copy)&0xFFFF0000,2);
2440 emit_movw(slen*4,3);
2441 #endif
2442 emit_movimm(start+i*4,0);
2443 emit_call((int)start<(int)0xC0000000?&verify_code:&verify_code_vm);
2444 void *entry = out;
2445 load_regs_entry(i);
2446 if (entry == out)
2447 entry = instr_addr[i];
2448 emit_jmp(instr_addr[i]);
2449 return entry;
2450}
2451
2452static void do_dirty_stub_ds()
2453{
2454 // Careful about the code output here, verify_dirty needs to parse it.
2455 #ifndef HAVE_ARMV7
2456 emit_loadlp((int)start<(int)0xC0000000?(int)source:(int)start,1);
2457 emit_loadlp((int)copy,2);
2458 emit_loadlp(slen*4,3);
2459 #else
2460 emit_movw(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0x0000FFFF,1);
2461 emit_movw(((u_int)copy)&0x0000FFFF,2);
2462 emit_movt(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0xFFFF0000,1);
2463 emit_movt(((u_int)copy)&0xFFFF0000,2);
2464 emit_movw(slen*4,3);
2465 #endif
2466 emit_movimm(start+1,0);
2467 emit_call(&verify_code_ds);
2468}
2469
2470// FP_STUB
2471static void do_cop1stub(int n)
2472{
2473 literal_pool(256);
2474 assem_debug("do_cop1stub %x\n",start+stubs[n].a*4);
2475 set_jump_target(stubs[n].addr, out);
2476 int i=stubs[n].a;
2477// int rs=stubs[n].b;
2478 struct regstat *i_regs=(struct regstat *)stubs[n].c;
2479 int ds=stubs[n].d;
2480 if(!ds) {
2481 load_all_consts(regs[i].regmap_entry,regs[i].wasdirty,i);
2482 //if(i_regs!=&regs[i]) printf("oops: regs[i]=%x i_regs=%x",(int)&regs[i],(int)i_regs);
2483 }
2484 //else {printf("fp exception in delay slot\n");}
2485 wb_dirtys(i_regs->regmap_entry,i_regs->wasdirty);
2486 if(regs[i].regmap_entry[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
2487 emit_movimm(start+(i-ds)*4,EAX); // Get PC
2488 emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]),HOST_CCREG); // CHECK: is this right? There should probably be an extra cycle...
2489 emit_jmp(ds?fp_exception_ds:fp_exception);
2490}
2491
2492/* Special assem */
2493
2494static void shift_assemble_arm(int i,struct regstat *i_regs)
2495{
2496 if(rt1[i]) {
2497 if(opcode2[i]<=0x07) // SLLV/SRLV/SRAV
2498 {
2499 signed char s,t,shift;
2500 t=get_reg(i_regs->regmap,rt1[i]);
2501 s=get_reg(i_regs->regmap,rs1[i]);
2502 shift=get_reg(i_regs->regmap,rs2[i]);
2503 if(t>=0){
2504 if(rs1[i]==0)
2505 {
2506 emit_zeroreg(t);
2507 }
2508 else if(rs2[i]==0)
2509 {
2510 assert(s>=0);
2511 if(s!=t) emit_mov(s,t);
2512 }
2513 else
2514 {
2515 emit_andimm(shift,31,HOST_TEMPREG);
2516 if(opcode2[i]==4) // SLLV
2517 {
2518 emit_shl(s,HOST_TEMPREG,t);
2519 }
2520 if(opcode2[i]==6) // SRLV
2521 {
2522 emit_shr(s,HOST_TEMPREG,t);
2523 }
2524 if(opcode2[i]==7) // SRAV
2525 {
2526 emit_sar(s,HOST_TEMPREG,t);
2527 }
2528 }
2529 }
2530 } else { // DSLLV/DSRLV/DSRAV
2531 signed char sh,sl,th,tl,shift;
2532 th=get_reg(i_regs->regmap,rt1[i]|64);
2533 tl=get_reg(i_regs->regmap,rt1[i]);
2534 sh=get_reg(i_regs->regmap,rs1[i]|64);
2535 sl=get_reg(i_regs->regmap,rs1[i]);
2536 shift=get_reg(i_regs->regmap,rs2[i]);
2537 if(tl>=0){
2538 if(rs1[i]==0)
2539 {
2540 emit_zeroreg(tl);
2541 if(th>=0) emit_zeroreg(th);
2542 }
2543 else if(rs2[i]==0)
2544 {
2545 assert(sl>=0);
2546 if(sl!=tl) emit_mov(sl,tl);
2547 if(th>=0&&sh!=th) emit_mov(sh,th);
2548 }
2549 else
2550 {
2551 // FIXME: What if shift==tl ?
2552 assert(shift!=tl);
2553 int temp=get_reg(i_regs->regmap,-1);
2554 int real_th=th;
2555 if(th<0&&opcode2[i]!=0x14) {th=temp;} // DSLLV doesn't need a temporary register
2556 assert(sl>=0);
2557 assert(sh>=0);
2558 emit_andimm(shift,31,HOST_TEMPREG);
2559 if(opcode2[i]==0x14) // DSLLV
2560 {
2561 if(th>=0) emit_shl(sh,HOST_TEMPREG,th);
2562 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
2563 emit_orrshr(sl,HOST_TEMPREG,th);
2564 emit_andimm(shift,31,HOST_TEMPREG);
2565 emit_testimm(shift,32);
2566 emit_shl(sl,HOST_TEMPREG,tl);
2567 if(th>=0) emit_cmovne_reg(tl,th);
2568 emit_cmovne_imm(0,tl);
2569 }
2570 if(opcode2[i]==0x16) // DSRLV
2571 {
2572 assert(th>=0);
2573 emit_shr(sl,HOST_TEMPREG,tl);
2574 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
2575 emit_orrshl(sh,HOST_TEMPREG,tl);
2576 emit_andimm(shift,31,HOST_TEMPREG);
2577 emit_testimm(shift,32);
2578 emit_shr(sh,HOST_TEMPREG,th);
2579 emit_cmovne_reg(th,tl);
2580 if(real_th>=0) emit_cmovne_imm(0,th);
2581 }
2582 if(opcode2[i]==0x17) // DSRAV
2583 {
2584 assert(th>=0);
2585 emit_shr(sl,HOST_TEMPREG,tl);
2586 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
2587 if(real_th>=0) {
2588 assert(temp>=0);
2589 emit_sarimm(th,31,temp);
2590 }
2591 emit_orrshl(sh,HOST_TEMPREG,tl);
2592 emit_andimm(shift,31,HOST_TEMPREG);
2593 emit_testimm(shift,32);
2594 emit_sar(sh,HOST_TEMPREG,th);
2595 emit_cmovne_reg(th,tl);
2596 if(real_th>=0) emit_cmovne_reg(temp,th);
2597 }
2598 }
2599 }
2600 }
2601 }
2602}
2603
2604static void speculate_mov(int rs,int rt)
2605{
2606 if(rt!=0) {
2607 smrv_strong_next|=1<<rt;
2608 smrv[rt]=smrv[rs];
2609 }
2610}
2611
2612static void speculate_mov_weak(int rs,int rt)
2613{
2614 if(rt!=0) {
2615 smrv_weak_next|=1<<rt;
2616 smrv[rt]=smrv[rs];
2617 }
2618}
2619
2620static void speculate_register_values(int i)
2621{
2622 if(i==0) {
2623 memcpy(smrv,psxRegs.GPR.r,sizeof(smrv));
2624 // gp,sp are likely to stay the same throughout the block
2625 smrv_strong_next=(1<<28)|(1<<29)|(1<<30);
2626 smrv_weak_next=~smrv_strong_next;
2627 //printf(" llr %08x\n", smrv[4]);
2628 }
2629 smrv_strong=smrv_strong_next;
2630 smrv_weak=smrv_weak_next;
2631 switch(itype[i]) {
2632 case ALU:
2633 if ((smrv_strong>>rs1[i])&1) speculate_mov(rs1[i],rt1[i]);
2634 else if((smrv_strong>>rs2[i])&1) speculate_mov(rs2[i],rt1[i]);
2635 else if((smrv_weak>>rs1[i])&1) speculate_mov_weak(rs1[i],rt1[i]);
2636 else if((smrv_weak>>rs2[i])&1) speculate_mov_weak(rs2[i],rt1[i]);
2637 else {
2638 smrv_strong_next&=~(1<<rt1[i]);
2639 smrv_weak_next&=~(1<<rt1[i]);
2640 }
2641 break;
2642 case SHIFTIMM:
2643 smrv_strong_next&=~(1<<rt1[i]);
2644 smrv_weak_next&=~(1<<rt1[i]);
2645 // fallthrough
2646 case IMM16:
2647 if(rt1[i]&&is_const(&regs[i],rt1[i])) {
2648 int value,hr=get_reg(regs[i].regmap,rt1[i]);
2649 if(hr>=0) {
2650 if(get_final_value(hr,i,&value))
2651 smrv[rt1[i]]=value;
2652 else smrv[rt1[i]]=constmap[i][hr];
2653 smrv_strong_next|=1<<rt1[i];
2654 }
2655 }
2656 else {
2657 if ((smrv_strong>>rs1[i])&1) speculate_mov(rs1[i],rt1[i]);
2658 else if((smrv_weak>>rs1[i])&1) speculate_mov_weak(rs1[i],rt1[i]);
2659 }
2660 break;
2661 case LOAD:
2662 if(start<0x2000&&(rt1[i]==26||(smrv[rt1[i]]>>24)==0xa0)) {
2663 // special case for BIOS
2664 smrv[rt1[i]]=0xa0000000;
2665 smrv_strong_next|=1<<rt1[i];
2666 break;
2667 }
2668 // fallthrough
2669 case SHIFT:
2670 case LOADLR:
2671 case MOV:
2672 smrv_strong_next&=~(1<<rt1[i]);
2673 smrv_weak_next&=~(1<<rt1[i]);
2674 break;
2675 case COP0:
2676 case COP2:
2677 if(opcode2[i]==0||opcode2[i]==2) { // MFC/CFC
2678 smrv_strong_next&=~(1<<rt1[i]);
2679 smrv_weak_next&=~(1<<rt1[i]);
2680 }
2681 break;
2682 case C2LS:
2683 if (opcode[i]==0x32) { // LWC2
2684 smrv_strong_next&=~(1<<rt1[i]);
2685 smrv_weak_next&=~(1<<rt1[i]);
2686 }
2687 break;
2688 }
2689#if 0
2690 int r=4;
2691 printf("x %08x %08x %d %d c %08x %08x\n",smrv[r],start+i*4,
2692 ((smrv_strong>>r)&1),(smrv_weak>>r)&1,regs[i].isconst,regs[i].wasconst);
2693#endif
2694}
2695
2696enum {
2697 MTYPE_8000 = 0,
2698 MTYPE_8020,
2699 MTYPE_0000,
2700 MTYPE_A000,
2701 MTYPE_1F80,
2702};
2703
2704static int get_ptr_mem_type(u_int a)
2705{
2706 if(a < 0x00200000) {
2707 if(a<0x1000&&((start>>20)==0xbfc||(start>>24)==0xa0))
2708 // return wrong, must use memhandler for BIOS self-test to pass
2709 // 007 does similar stuff from a00 mirror, weird stuff
2710 return MTYPE_8000;
2711 return MTYPE_0000;
2712 }
2713 if(0x1f800000 <= a && a < 0x1f801000)
2714 return MTYPE_1F80;
2715 if(0x80200000 <= a && a < 0x80800000)
2716 return MTYPE_8020;
2717 if(0xa0000000 <= a && a < 0xa0200000)
2718 return MTYPE_A000;
2719 return MTYPE_8000;
2720}
2721
2722static void *emit_fastpath_cmp_jump(int i,int addr,int *addr_reg_override)
2723{
2724 void *jaddr = NULL;
2725 int type=0;
2726 int mr=rs1[i];
2727 if(((smrv_strong|smrv_weak)>>mr)&1) {
2728 type=get_ptr_mem_type(smrv[mr]);
2729 //printf("set %08x @%08x r%d %d\n", smrv[mr], start+i*4, mr, type);
2730 }
2731 else {
2732 // use the mirror we are running on
2733 type=get_ptr_mem_type(start);
2734 //printf("set nospec @%08x r%d %d\n", start+i*4, mr, type);
2735 }
2736
2737 if(type==MTYPE_8020) { // RAM 80200000+ mirror
2738 emit_andimm(addr,~0x00e00000,HOST_TEMPREG);
2739 addr=*addr_reg_override=HOST_TEMPREG;
2740 type=0;
2741 }
2742 else if(type==MTYPE_0000) { // RAM 0 mirror
2743 emit_orimm(addr,0x80000000,HOST_TEMPREG);
2744 addr=*addr_reg_override=HOST_TEMPREG;
2745 type=0;
2746 }
2747 else if(type==MTYPE_A000) { // RAM A mirror
2748 emit_andimm(addr,~0x20000000,HOST_TEMPREG);
2749 addr=*addr_reg_override=HOST_TEMPREG;
2750 type=0;
2751 }
2752 else if(type==MTYPE_1F80) { // scratchpad
2753 if (psxH == (void *)0x1f800000) {
2754 emit_addimm(addr,-0x1f800000,HOST_TEMPREG);
2755 emit_cmpimm(HOST_TEMPREG,0x1000);
2756 jaddr=out;
2757 emit_jc(0);
2758 }
2759 else {
2760 // do usual RAM check, jump will go to the right handler
2761 type=0;
2762 }
2763 }
2764
2765 if(type==0)
2766 {
2767 emit_cmpimm(addr,RAM_SIZE);
2768 jaddr=out;
2769 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
2770 // Hint to branch predictor that the branch is unlikely to be taken
2771 if(rs1[i]>=28)
2772 emit_jno_unlikely(0);
2773 else
2774 #endif
2775 emit_jno(0);
2776 if(ram_offset!=0) {
2777 emit_addimm(addr,ram_offset,HOST_TEMPREG);
2778 addr=*addr_reg_override=HOST_TEMPREG;
2779 }
2780 }
2781
2782 return jaddr;
2783}
2784
2785#define shift_assemble shift_assemble_arm
2786
2787static void loadlr_assemble_arm(int i,struct regstat *i_regs)
2788{
2789 int s,tl,temp,temp2,addr;
2790 int offset;
2791 void *jaddr=0;
2792 int memtarget=0,c=0;
2793 int fastload_reg_override=0;
2794 u_int hr,reglist=0;
2795 tl=get_reg(i_regs->regmap,rt1[i]);
2796 s=get_reg(i_regs->regmap,rs1[i]);
2797 temp=get_reg(i_regs->regmap,-1);
2798 temp2=get_reg(i_regs->regmap,FTEMP);
2799 addr=get_reg(i_regs->regmap,AGEN1+(i&1));
2800 assert(addr<0);
2801 offset=imm[i];
2802 for(hr=0;hr<HOST_REGS;hr++) {
2803 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
2804 }
2805 reglist|=1<<temp;
2806 if(offset||s<0||c) addr=temp2;
2807 else addr=s;
2808 if(s>=0) {
2809 c=(i_regs->wasconst>>s)&1;
2810 if(c) {
2811 memtarget=((signed int)(constmap[i][s]+offset))<(signed int)0x80000000+RAM_SIZE;
2812 }
2813 }
2814 if(!c) {
2815 emit_shlimm(addr,3,temp);
2816 if (opcode[i]==0x22||opcode[i]==0x26) {
2817 emit_andimm(addr,0xFFFFFFFC,temp2); // LWL/LWR
2818 }else{
2819 emit_andimm(addr,0xFFFFFFF8,temp2); // LDL/LDR
2820 }
2821 jaddr=emit_fastpath_cmp_jump(i,temp2,&fastload_reg_override);
2822 }
2823 else {
2824 if(ram_offset&&memtarget) {
2825 emit_addimm(temp2,ram_offset,HOST_TEMPREG);
2826 fastload_reg_override=HOST_TEMPREG;
2827 }
2828 if (opcode[i]==0x22||opcode[i]==0x26) {
2829 emit_movimm(((constmap[i][s]+offset)<<3)&24,temp); // LWL/LWR
2830 }else{
2831 emit_movimm(((constmap[i][s]+offset)<<3)&56,temp); // LDL/LDR
2832 }
2833 }
2834 if (opcode[i]==0x22||opcode[i]==0x26) { // LWL/LWR
2835 if(!c||memtarget) {
2836 int a=temp2;
2837 if(fastload_reg_override) a=fastload_reg_override;
2838 emit_readword_indexed(0,a,temp2);
2839 if(jaddr) add_stub_r(LOADW_STUB,jaddr,out,i,temp2,i_regs,ccadj[i],reglist);
2840 }
2841 else
2842 inline_readstub(LOADW_STUB,i,(constmap[i][s]+offset)&0xFFFFFFFC,i_regs->regmap,FTEMP,ccadj[i],reglist);
2843 if(rt1[i]) {
2844 assert(tl>=0);
2845 emit_andimm(temp,24,temp);
2846#ifdef BIG_ENDIAN_MIPS
2847 if (opcode[i]==0x26) // LWR
2848#else
2849 if (opcode[i]==0x22) // LWL
2850#endif
2851 emit_xorimm(temp,24,temp);
2852 emit_movimm(-1,HOST_TEMPREG);
2853 if (opcode[i]==0x26) {
2854 emit_shr(temp2,temp,temp2);
2855 emit_bic_lsr(tl,HOST_TEMPREG,temp,tl);
2856 }else{
2857 emit_shl(temp2,temp,temp2);
2858 emit_bic_lsl(tl,HOST_TEMPREG,temp,tl);
2859 }
2860 emit_or(temp2,tl,tl);
2861 }
2862 //emit_storereg(rt1[i],tl); // DEBUG
2863 }
2864 if (opcode[i]==0x1A||opcode[i]==0x1B) { // LDL/LDR
2865 assert(0);
2866 }
2867}
2868#define loadlr_assemble loadlr_assemble_arm
2869
2870static void cop0_assemble(int i,struct regstat *i_regs)
2871{
2872 if(opcode2[i]==0) // MFC0
2873 {
2874 signed char t=get_reg(i_regs->regmap,rt1[i]);
2875 u_int copr=(source[i]>>11)&0x1f;
2876 //assert(t>=0); // Why does this happen? OOT is weird
2877 if(t>=0&&rt1[i]!=0) {
2878 emit_readword(&reg_cop0[copr],t);
2879 }
2880 }
2881 else if(opcode2[i]==4) // MTC0
2882 {
2883 signed char s=get_reg(i_regs->regmap,rs1[i]);
2884 char copr=(source[i]>>11)&0x1f;
2885 assert(s>=0);
2886 wb_register(rs1[i],i_regs->regmap,i_regs->dirty);
2887 if(copr==9||copr==11||copr==12||copr==13) {
2888 emit_readword(&last_count,HOST_TEMPREG);
2889 emit_loadreg(CCREG,HOST_CCREG); // TODO: do proper reg alloc
2890 emit_add(HOST_CCREG,HOST_TEMPREG,HOST_CCREG);
2891 emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]),HOST_CCREG);
2892 emit_writeword(HOST_CCREG,&Count);
2893 }
2894 // What a mess. The status register (12) can enable interrupts,
2895 // so needs a special case to handle a pending interrupt.
2896 // The interrupt must be taken immediately, because a subsequent
2897 // instruction might disable interrupts again.
2898 if(copr==12||copr==13) {
2899 if (is_delayslot) {
2900 // burn cycles to cause cc_interrupt, which will
2901 // reschedule next_interupt. Relies on CCREG from above.
2902 assem_debug("MTC0 DS %d\n", copr);
2903 emit_writeword(HOST_CCREG,&last_count);
2904 emit_movimm(0,HOST_CCREG);
2905 emit_storereg(CCREG,HOST_CCREG);
2906 emit_loadreg(rs1[i],1);
2907 emit_movimm(copr,0);
2908 emit_call(pcsx_mtc0_ds);
2909 emit_loadreg(rs1[i],s);
2910 return;
2911 }
2912 emit_movimm(start+i*4+4,HOST_TEMPREG);
2913 emit_writeword(HOST_TEMPREG,&pcaddr);
2914 emit_movimm(0,HOST_TEMPREG);
2915 emit_writeword(HOST_TEMPREG,&pending_exception);
2916 }
2917 //else if(copr==12&&is_delayslot) emit_call((int)MTC0_R12);
2918 //else
2919 if(s==HOST_CCREG)
2920 emit_loadreg(rs1[i],1);
2921 else if(s!=1)
2922 emit_mov(s,1);
2923 emit_movimm(copr,0);
2924 emit_call(pcsx_mtc0);
2925 if(copr==9||copr==11||copr==12||copr==13) {
2926 emit_readword(&Count,HOST_CCREG);
2927 emit_readword(&next_interupt,HOST_TEMPREG);
2928 emit_addimm(HOST_CCREG,-CLOCK_ADJUST(ccadj[i]),HOST_CCREG);
2929 emit_sub(HOST_CCREG,HOST_TEMPREG,HOST_CCREG);
2930 emit_writeword(HOST_TEMPREG,&last_count);
2931 emit_storereg(CCREG,HOST_CCREG);
2932 }
2933 if(copr==12||copr==13) {
2934 assert(!is_delayslot);
2935 emit_readword(&pending_exception,14);
2936 emit_test(14,14);
2937 emit_jne(&do_interrupt);
2938 }
2939 emit_loadreg(rs1[i],s);
2940 if(get_reg(i_regs->regmap,rs1[i]|64)>=0)
2941 emit_loadreg(rs1[i]|64,get_reg(i_regs->regmap,rs1[i]|64));
2942 }
2943 else
2944 {
2945 assert(opcode2[i]==0x10);
2946 //if((source[i]&0x3f)==0x10) // RFE
2947 {
2948 emit_readword(&Status,0);
2949 emit_andimm(0,0x3c,1);
2950 emit_andimm(0,~0xf,0);
2951 emit_orrshr_imm(1,2,0);
2952 emit_writeword(0,&Status);
2953 }
2954 }
2955}
2956
2957static void cop2_get_dreg(u_int copr,signed char tl,signed char temp)
2958{
2959 switch (copr) {
2960 case 1:
2961 case 3:
2962 case 5:
2963 case 8:
2964 case 9:
2965 case 10:
2966 case 11:
2967 emit_readword(&reg_cop2d[copr],tl);
2968 emit_signextend16(tl,tl);
2969 emit_writeword(tl,&reg_cop2d[copr]); // hmh
2970 break;
2971 case 7:
2972 case 16:
2973 case 17:
2974 case 18:
2975 case 19:
2976 emit_readword(&reg_cop2d[copr],tl);
2977 emit_andimm(tl,0xffff,tl);
2978 emit_writeword(tl,&reg_cop2d[copr]);
2979 break;
2980 case 15:
2981 emit_readword(&reg_cop2d[14],tl); // SXY2
2982 emit_writeword(tl,&reg_cop2d[copr]);
2983 break;
2984 case 28:
2985 case 29:
2986 emit_readword(&reg_cop2d[9],temp);
2987 emit_testimm(temp,0x8000); // do we need this?
2988 emit_andimm(temp,0xf80,temp);
2989 emit_andne_imm(temp,0,temp);
2990 emit_shrimm(temp,7,tl);
2991 emit_readword(&reg_cop2d[10],temp);
2992 emit_testimm(temp,0x8000);
2993 emit_andimm(temp,0xf80,temp);
2994 emit_andne_imm(temp,0,temp);
2995 emit_orrshr_imm(temp,2,tl);
2996 emit_readword(&reg_cop2d[11],temp);
2997 emit_testimm(temp,0x8000);
2998 emit_andimm(temp,0xf80,temp);
2999 emit_andne_imm(temp,0,temp);
3000 emit_orrshl_imm(temp,3,tl);
3001 emit_writeword(tl,&reg_cop2d[copr]);
3002 break;
3003 default:
3004 emit_readword(&reg_cop2d[copr],tl);
3005 break;
3006 }
3007}
3008
3009static void cop2_put_dreg(u_int copr,signed char sl,signed char temp)
3010{
3011 switch (copr) {
3012 case 15:
3013 emit_readword(&reg_cop2d[13],temp); // SXY1
3014 emit_writeword(sl,&reg_cop2d[copr]);
3015 emit_writeword(temp,&reg_cop2d[12]); // SXY0
3016 emit_readword(&reg_cop2d[14],temp); // SXY2
3017 emit_writeword(sl,&reg_cop2d[14]);
3018 emit_writeword(temp,&reg_cop2d[13]); // SXY1
3019 break;
3020 case 28:
3021 emit_andimm(sl,0x001f,temp);
3022 emit_shlimm(temp,7,temp);
3023 emit_writeword(temp,&reg_cop2d[9]);
3024 emit_andimm(sl,0x03e0,temp);
3025 emit_shlimm(temp,2,temp);
3026 emit_writeword(temp,&reg_cop2d[10]);
3027 emit_andimm(sl,0x7c00,temp);
3028 emit_shrimm(temp,3,temp);
3029 emit_writeword(temp,&reg_cop2d[11]);
3030 emit_writeword(sl,&reg_cop2d[28]);
3031 break;
3032 case 30:
3033 emit_movs(sl,temp);
3034 emit_mvnmi(temp,temp);
3035#ifdef HAVE_ARMV5
3036 emit_clz(temp,temp);
3037#else
3038 emit_movs(temp,HOST_TEMPREG);
3039 emit_movimm(0,temp);
3040 emit_jeq((int)out+4*4);
3041 emit_addpl_imm(temp,1,temp);
3042 emit_lslpls_imm(HOST_TEMPREG,1,HOST_TEMPREG);
3043 emit_jns((int)out-2*4);
3044#endif
3045 emit_writeword(sl,&reg_cop2d[30]);
3046 emit_writeword(temp,&reg_cop2d[31]);
3047 break;
3048 case 31:
3049 break;
3050 default:
3051 emit_writeword(sl,&reg_cop2d[copr]);
3052 break;
3053 }
3054}
3055
3056static void cop2_assemble(int i,struct regstat *i_regs)
3057{
3058 u_int copr=(source[i]>>11)&0x1f;
3059 signed char temp=get_reg(i_regs->regmap,-1);
3060 if (opcode2[i]==0) { // MFC2
3061 signed char tl=get_reg(i_regs->regmap,rt1[i]);
3062 if(tl>=0&&rt1[i]!=0)
3063 cop2_get_dreg(copr,tl,temp);
3064 }
3065 else if (opcode2[i]==4) { // MTC2
3066 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3067 cop2_put_dreg(copr,sl,temp);
3068 }
3069 else if (opcode2[i]==2) // CFC2
3070 {
3071 signed char tl=get_reg(i_regs->regmap,rt1[i]);
3072 if(tl>=0&&rt1[i]!=0)
3073 emit_readword(&reg_cop2c[copr],tl);
3074 }
3075 else if (opcode2[i]==6) // CTC2
3076 {
3077 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3078 switch(copr) {
3079 case 4:
3080 case 12:
3081 case 20:
3082 case 26:
3083 case 27:
3084 case 29:
3085 case 30:
3086 emit_signextend16(sl,temp);
3087 break;
3088 case 31:
3089 //value = value & 0x7ffff000;
3090 //if (value & 0x7f87e000) value |= 0x80000000;
3091 emit_shrimm(sl,12,temp);
3092 emit_shlimm(temp,12,temp);
3093 emit_testimm(temp,0x7f000000);
3094 emit_testeqimm(temp,0x00870000);
3095 emit_testeqimm(temp,0x0000e000);
3096 emit_orrne_imm(temp,0x80000000,temp);
3097 break;
3098 default:
3099 temp=sl;
3100 break;
3101 }
3102 emit_writeword(temp,&reg_cop2c[copr]);
3103 assert(sl>=0);
3104 }
3105}
3106
3107static void c2op_prologue(u_int op,u_int reglist)
3108{
3109 save_regs_all(reglist);
3110#ifdef PCNT
3111 emit_movimm(op,0);
3112 emit_call((int)pcnt_gte_start);
3113#endif
3114 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0); // cop2 regs
3115}
3116
3117static void c2op_epilogue(u_int op,u_int reglist)
3118{
3119#ifdef PCNT
3120 emit_movimm(op,0);
3121 emit_call((int)pcnt_gte_end);
3122#endif
3123 restore_regs_all(reglist);
3124}
3125
3126static void c2op_call_MACtoIR(int lm,int need_flags)
3127{
3128 if(need_flags)
3129 emit_call(lm?gteMACtoIR_lm1:gteMACtoIR_lm0);
3130 else
3131 emit_call(lm?gteMACtoIR_lm1_nf:gteMACtoIR_lm0_nf);
3132}
3133
3134static void c2op_call_rgb_func(void *func,int lm,int need_ir,int need_flags)
3135{
3136 emit_call(func);
3137 // func is C code and trashes r0
3138 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
3139 if(need_flags||need_ir)
3140 c2op_call_MACtoIR(lm,need_flags);
3141 emit_call(need_flags?gteMACtoRGB:gteMACtoRGB_nf);
3142}
3143
3144static void c2op_assemble(int i,struct regstat *i_regs)
3145{
3146 u_int c2op=source[i]&0x3f;
3147 u_int hr,reglist_full=0,reglist;
3148 int need_flags,need_ir;
3149 for(hr=0;hr<HOST_REGS;hr++) {
3150 if(i_regs->regmap[hr]>=0) reglist_full|=1<<hr;
3151 }
3152 reglist=reglist_full&CALLER_SAVE_REGS;
3153
3154 if (gte_handlers[c2op]!=NULL) {
3155 need_flags=!(gte_unneeded[i+1]>>63); // +1 because of how liveness detection works
3156 need_ir=(gte_unneeded[i+1]&0xe00)!=0xe00;
3157 assem_debug("gte op %08x, unneeded %016llx, need_flags %d, need_ir %d\n",
3158 source[i],gte_unneeded[i+1],need_flags,need_ir);
3159 if(new_dynarec_hacks&NDHACK_GTE_NO_FLAGS)
3160 need_flags=0;
3161 int shift = (source[i] >> 19) & 1;
3162 int lm = (source[i] >> 10) & 1;
3163 switch(c2op) {
3164#ifndef DRC_DBG
3165 case GTE_MVMVA: {
3166#ifdef HAVE_ARMV5
3167 int v = (source[i] >> 15) & 3;
3168 int cv = (source[i] >> 13) & 3;
3169 int mx = (source[i] >> 17) & 3;
3170 reglist=reglist_full&(CALLER_SAVE_REGS|0xf0); // +{r4-r7}
3171 c2op_prologue(c2op,reglist);
3172 /* r4,r5 = VXYZ(v) packed; r6 = &MX11(mx); r7 = &CV1(cv) */
3173 if(v<3)
3174 emit_ldrd(v*8,0,4);
3175 else {
3176 emit_movzwl_indexed(9*4,0,4); // gteIR
3177 emit_movzwl_indexed(10*4,0,6);
3178 emit_movzwl_indexed(11*4,0,5);
3179 emit_orrshl_imm(6,16,4);
3180 }
3181 if(mx<3)
3182 emit_addimm(0,32*4+mx*8*4,6);
3183 else
3184 emit_readword(&zeromem_ptr,6);
3185 if(cv<3)
3186 emit_addimm(0,32*4+(cv*8+5)*4,7);
3187 else
3188 emit_readword(&zeromem_ptr,7);
3189#ifdef __ARM_NEON__
3190 emit_movimm(source[i],1); // opcode
3191 emit_call(gteMVMVA_part_neon);
3192 if(need_flags) {
3193 emit_movimm(lm,1);
3194 emit_call(gteMACtoIR_flags_neon);
3195 }
3196#else
3197 if(cv==3&&shift)
3198 emit_call((int)gteMVMVA_part_cv3sh12_arm);
3199 else {
3200 emit_movimm(shift,1);
3201 emit_call((int)(need_flags?gteMVMVA_part_arm:gteMVMVA_part_nf_arm));
3202 }
3203 if(need_flags||need_ir)
3204 c2op_call_MACtoIR(lm,need_flags);
3205#endif
3206#else /* if not HAVE_ARMV5 */
3207 c2op_prologue(c2op,reglist);
3208 emit_movimm(source[i],1); // opcode
3209 emit_writeword(1,&psxRegs.code);
3210 emit_call((int)(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]));
3211#endif
3212 break;
3213 }
3214 case GTE_OP:
3215 c2op_prologue(c2op,reglist);
3216 emit_call(shift?gteOP_part_shift:gteOP_part_noshift);
3217 if(need_flags||need_ir) {
3218 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
3219 c2op_call_MACtoIR(lm,need_flags);
3220 }
3221 break;
3222 case GTE_DPCS:
3223 c2op_prologue(c2op,reglist);
3224 c2op_call_rgb_func(shift?gteDPCS_part_shift:gteDPCS_part_noshift,lm,need_ir,need_flags);
3225 break;
3226 case GTE_INTPL:
3227 c2op_prologue(c2op,reglist);
3228 c2op_call_rgb_func(shift?gteINTPL_part_shift:gteINTPL_part_noshift,lm,need_ir,need_flags);
3229 break;
3230 case GTE_SQR:
3231 c2op_prologue(c2op,reglist);
3232 emit_call(shift?gteSQR_part_shift:gteSQR_part_noshift);
3233 if(need_flags||need_ir) {
3234 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
3235 c2op_call_MACtoIR(lm,need_flags);
3236 }
3237 break;
3238 case GTE_DCPL:
3239 c2op_prologue(c2op,reglist);
3240 c2op_call_rgb_func(gteDCPL_part,lm,need_ir,need_flags);
3241 break;
3242 case GTE_GPF:
3243 c2op_prologue(c2op,reglist);
3244 c2op_call_rgb_func(shift?gteGPF_part_shift:gteGPF_part_noshift,lm,need_ir,need_flags);
3245 break;
3246 case GTE_GPL:
3247 c2op_prologue(c2op,reglist);
3248 c2op_call_rgb_func(shift?gteGPL_part_shift:gteGPL_part_noshift,lm,need_ir,need_flags);
3249 break;
3250#endif
3251 default:
3252 c2op_prologue(c2op,reglist);
3253#ifdef DRC_DBG
3254 emit_movimm(source[i],1); // opcode
3255 emit_writeword(1,&psxRegs.code);
3256#endif
3257 emit_call(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]);
3258 break;
3259 }
3260 c2op_epilogue(c2op,reglist);
3261 }
3262}
3263
3264static void cop1_unusable(int i,struct regstat *i_regs)
3265{
3266 // XXX: should just just do the exception instead
3267 //if(!cop1_usable)
3268 {
3269 void *jaddr=out;
3270 emit_jmp(0);
3271 add_stub_r(FP_STUB,jaddr,out,i,0,i_regs,is_delayslot,0);
3272 }
3273}
3274
3275static void cop1_assemble(int i,struct regstat *i_regs)
3276{
3277 cop1_unusable(i, i_regs);
3278}
3279
3280static void multdiv_assemble_arm(int i,struct regstat *i_regs)
3281{
3282 // case 0x18: MULT
3283 // case 0x19: MULTU
3284 // case 0x1A: DIV
3285 // case 0x1B: DIVU
3286 // case 0x1C: DMULT
3287 // case 0x1D: DMULTU
3288 // case 0x1E: DDIV
3289 // case 0x1F: DDIVU
3290 if(rs1[i]&&rs2[i])
3291 {
3292 if((opcode2[i]&4)==0) // 32-bit
3293 {
3294 if(opcode2[i]==0x18) // MULT
3295 {
3296 signed char m1=get_reg(i_regs->regmap,rs1[i]);
3297 signed char m2=get_reg(i_regs->regmap,rs2[i]);
3298 signed char hi=get_reg(i_regs->regmap,HIREG);
3299 signed char lo=get_reg(i_regs->regmap,LOREG);
3300 assert(m1>=0);
3301 assert(m2>=0);
3302 assert(hi>=0);
3303 assert(lo>=0);
3304 emit_smull(m1,m2,hi,lo);
3305 }
3306 if(opcode2[i]==0x19) // MULTU
3307 {
3308 signed char m1=get_reg(i_regs->regmap,rs1[i]);
3309 signed char m2=get_reg(i_regs->regmap,rs2[i]);
3310 signed char hi=get_reg(i_regs->regmap,HIREG);
3311 signed char lo=get_reg(i_regs->regmap,LOREG);
3312 assert(m1>=0);
3313 assert(m2>=0);
3314 assert(hi>=0);
3315 assert(lo>=0);
3316 emit_umull(m1,m2,hi,lo);
3317 }
3318 if(opcode2[i]==0x1A) // DIV
3319 {
3320 signed char d1=get_reg(i_regs->regmap,rs1[i]);
3321 signed char d2=get_reg(i_regs->regmap,rs2[i]);
3322 assert(d1>=0);
3323 assert(d2>=0);
3324 signed char quotient=get_reg(i_regs->regmap,LOREG);
3325 signed char remainder=get_reg(i_regs->regmap,HIREG);
3326 assert(quotient>=0);
3327 assert(remainder>=0);
3328 emit_movs(d1,remainder);
3329 emit_movimm(0xffffffff,quotient);
3330 emit_negmi(quotient,quotient); // .. quotient and ..
3331 emit_negmi(remainder,remainder); // .. remainder for div0 case (will be negated back after jump)
3332 emit_movs(d2,HOST_TEMPREG);
3333 emit_jeq((int)out+52); // Division by zero
3334 emit_negsmi(HOST_TEMPREG,HOST_TEMPREG);
3335#ifdef HAVE_ARMV5
3336 emit_clz(HOST_TEMPREG,quotient);
3337 emit_shl(HOST_TEMPREG,quotient,HOST_TEMPREG);
3338#else
3339 emit_movimm(0,quotient);
3340 emit_addpl_imm(quotient,1,quotient);
3341 emit_lslpls_imm(HOST_TEMPREG,1,HOST_TEMPREG);
3342 emit_jns((int)out-2*4);
3343#endif
3344 emit_orimm(quotient,1<<31,quotient);
3345 emit_shr(quotient,quotient,quotient);
3346 emit_cmp(remainder,HOST_TEMPREG);
3347 emit_subcs(remainder,HOST_TEMPREG,remainder);
3348 emit_adcs(quotient,quotient,quotient);
3349 emit_shrimm(HOST_TEMPREG,1,HOST_TEMPREG);
3350 emit_jcc(out-16); // -4
3351 emit_teq(d1,d2);
3352 emit_negmi(quotient,quotient);
3353 emit_test(d1,d1);
3354 emit_negmi(remainder,remainder);
3355 }
3356 if(opcode2[i]==0x1B) // DIVU
3357 {
3358 signed char d1=get_reg(i_regs->regmap,rs1[i]); // dividend
3359 signed char d2=get_reg(i_regs->regmap,rs2[i]); // divisor
3360 assert(d1>=0);
3361 assert(d2>=0);
3362 signed char quotient=get_reg(i_regs->regmap,LOREG);
3363 signed char remainder=get_reg(i_regs->regmap,HIREG);
3364 assert(quotient>=0);
3365 assert(remainder>=0);
3366 emit_mov(d1,remainder);
3367 emit_movimm(0xffffffff,quotient); // div0 case
3368 emit_test(d2,d2);
3369 emit_jeq((int)out+40); // Division by zero
3370#ifdef HAVE_ARMV5
3371 emit_clz(d2,HOST_TEMPREG);
3372 emit_movimm(1<<31,quotient);
3373 emit_shl(d2,HOST_TEMPREG,d2);
3374#else
3375 emit_movimm(0,HOST_TEMPREG);
3376 emit_addpl_imm(HOST_TEMPREG,1,HOST_TEMPREG);
3377 emit_lslpls_imm(d2,1,d2);
3378 emit_jns((int)out-2*4);
3379 emit_movimm(1<<31,quotient);
3380#endif
3381 emit_shr(quotient,HOST_TEMPREG,quotient);
3382 emit_cmp(remainder,d2);
3383 emit_subcs(remainder,d2,remainder);
3384 emit_adcs(quotient,quotient,quotient);
3385 emit_shrcc_imm(d2,1,d2);
3386 emit_jcc(out-16); // -4
3387 }
3388 }
3389 else // 64-bit
3390 assert(0);
3391 }
3392 else
3393 {
3394 // Multiply by zero is zero.
3395 // MIPS does not have a divide by zero exception.
3396 // The result is undefined, we return zero.
3397 signed char hr=get_reg(i_regs->regmap,HIREG);
3398 signed char lr=get_reg(i_regs->regmap,LOREG);
3399 if(hr>=0) emit_zeroreg(hr);
3400 if(lr>=0) emit_zeroreg(lr);
3401 }
3402}
3403#define multdiv_assemble multdiv_assemble_arm
3404
3405static void do_preload_rhash(int r) {
3406 // Don't need this for ARM. On x86, this puts the value 0xf8 into the
3407 // register. On ARM the hash can be done with a single instruction (below)
3408}
3409
3410static void do_preload_rhtbl(int ht) {
3411 emit_addimm(FP,(int)&mini_ht-(int)&dynarec_local,ht);
3412}
3413
3414static void do_rhash(int rs,int rh) {
3415 emit_andimm(rs,0xf8,rh);
3416}
3417
3418static void do_miniht_load(int ht,int rh) {
3419 assem_debug("ldr %s,[%s,%s]!\n",regname[rh],regname[ht],regname[rh]);
3420 output_w32(0xe7b00000|rd_rn_rm(rh,ht,rh));
3421}
3422
3423static void do_miniht_jump(int rs,int rh,int ht) {
3424 emit_cmp(rh,rs);
3425 emit_ldreq_indexed(ht,4,15);
3426 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
3427 emit_mov(rs,7);
3428 emit_jmp(jump_vaddr_reg[7]);
3429 #else
3430 emit_jmp(jump_vaddr_reg[rs]);
3431 #endif
3432}
3433
3434static void do_miniht_insert(u_int return_address,int rt,int temp) {
3435 #ifndef HAVE_ARMV7
3436 emit_movimm(return_address,rt); // PC into link register
3437 add_to_linker(out,return_address,1);
3438 emit_pcreladdr(temp);
3439 emit_writeword(rt,&mini_ht[(return_address&0xFF)>>3][0]);
3440 emit_writeword(temp,&mini_ht[(return_address&0xFF)>>3][1]);
3441 #else
3442 emit_movw(return_address&0x0000FFFF,rt);
3443 add_to_linker(out,return_address,1);
3444 emit_pcreladdr(temp);
3445 emit_writeword(temp,&mini_ht[(return_address&0xFF)>>3][1]);
3446 emit_movt(return_address&0xFFFF0000,rt);
3447 emit_writeword(rt,&mini_ht[(return_address&0xFF)>>3][0]);
3448 #endif
3449}
3450
3451static void wb_valid(signed char pre[],signed char entry[],u_int dirty_pre,u_int dirty,uint64_t u)
3452{
3453 //if(dirty_pre==dirty) return;
3454 int hr,reg;
3455 for(hr=0;hr<HOST_REGS;hr++) {
3456 if(hr!=EXCLUDE_REG) {
3457 reg=pre[hr];
3458 if(((~u)>>(reg&63))&1) {
3459 if(reg>0) {
3460 if(((dirty_pre&~dirty)>>hr)&1) {
3461 if(reg>0&&reg<34) {
3462 emit_storereg(reg,hr);
3463 }
3464 else if(reg>=64) {
3465 assert(0);
3466 }
3467 }
3468 }
3469 }
3470 }
3471 }
3472}
3473
3474static void mark_clear_cache(void *target)
3475{
3476 u_long offset = (u_char *)target - translation_cache;
3477 u_int mask = 1u << ((offset >> 12) & 31);
3478 if (!(needs_clear_cache[offset >> 17] & mask)) {
3479 char *start = (char *)((u_long)target & ~4095ul);
3480 start_tcache_write(start, start + 4096);
3481 needs_clear_cache[offset >> 17] |= mask;
3482 }
3483}
3484
3485// Clearing the cache is rather slow on ARM Linux, so mark the areas
3486// that need to be cleared, and then only clear these areas once.
3487static void do_clear_cache()
3488{
3489 int i,j;
3490 for (i=0;i<(1<<(TARGET_SIZE_2-17));i++)
3491 {
3492 u_int bitmap=needs_clear_cache[i];
3493 if(bitmap) {
3494 u_char *start, *end;
3495 for(j=0;j<32;j++)
3496 {
3497 if(bitmap&(1<<j)) {
3498 start=translation_cache+i*131072+j*4096;
3499 end=start+4095;
3500 j++;
3501 while(j<32) {
3502 if(bitmap&(1<<j)) {
3503 end+=4096;
3504 j++;
3505 }else{
3506 end_tcache_write(start, end);
3507 break;
3508 }
3509 }
3510 }
3511 }
3512 needs_clear_cache[i]=0;
3513 }
3514 }
3515}
3516
3517// CPU-architecture-specific initialization
3518static void arch_init() {
3519}
3520
3521// vim:shiftwidth=2:expandtab