451ab91e |
1 | /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * |
2 | * Mupen64plus - regcache.c * |
3 | * Mupen64Plus homepage: http://code.google.com/p/mupen64plus/ * |
4 | * Copyright (C) 2007 Richard Goedeken (Richard42) * |
5 | * Copyright (C) 2002 Hacktarux * |
6 | * * |
7 | * This program is free software; you can redistribute it and/or modify * |
8 | * it under the terms of the GNU General Public License as published by * |
9 | * the Free Software Foundation; either version 2 of the License, or * |
10 | * (at your option) any later version. * |
11 | * * |
12 | * This program is distributed in the hope that it will be useful, * |
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of * |
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * |
15 | * GNU General Public License for more details. * |
16 | * * |
17 | * You should have received a copy of the GNU General Public License * |
18 | * along with this program; if not, write to the * |
19 | * Free Software Foundation, Inc., * |
20 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * |
21 | * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ |
22 | |
23 | #include <stdio.h> |
24 | |
25 | #include "regcache.h" |
26 | |
27 | #include "api/m64p_types.h" |
28 | #include "api/callbacks.h" |
29 | #include "r4300/recomp.h" |
30 | #include "r4300/r4300.h" |
31 | #include "r4300/recomph.h" |
32 | |
33 | static unsigned long long * reg_content[8]; |
34 | static precomp_instr* last_access[8]; |
35 | static precomp_instr* free_since[8]; |
36 | static int dirty[8]; |
37 | static int is64bits[8]; |
38 | static unsigned long long *r0; |
39 | |
40 | void init_cache(precomp_instr* start) |
41 | { |
42 | int i; |
43 | for (i=0; i<8; i++) |
44 | { |
45 | reg_content[i] = NULL; |
46 | last_access[i] = NULL; |
47 | free_since[i] = start; |
48 | dirty[i] = 0; |
49 | is64bits[i] = 0; |
50 | } |
51 | r0 = (unsigned long long *) reg; |
52 | } |
53 | |
54 | void free_all_registers(void) |
55 | { |
56 | #if defined(PROFILE_R4300) |
57 | int freestart = code_length; |
58 | int flushed = 0; |
59 | #endif |
60 | |
61 | int i; |
62 | for (i=0; i<8; i++) |
63 | { |
64 | #if defined(PROFILE_R4300) |
65 | if (last_access[i] && dirty[i]) flushed = 1; |
66 | #endif |
67 | if (last_access[i]) |
68 | { |
69 | free_register(i); |
70 | } |
71 | else |
72 | { |
73 | while (free_since[i] <= dst) |
74 | { |
75 | free_since[i]->reg_cache_infos.needed_registers[i] = NULL; |
76 | free_since[i]++; |
77 | } |
78 | } |
79 | } |
80 | |
81 | #if defined(PROFILE_R4300) |
82 | if (flushed == 1) |
83 | { |
84 | long long x86addr = (long long) ((*inst_pointer) + freestart); |
85 | int mipsop = -5; |
86 | if (fwrite(&mipsop, 1, 4, pfProfile) != 4 || /* -5 = regcache flushing */ |
87 | fwrite(&x86addr, 1, sizeof(char *), pfProfile) != sizeof(char *)) // write pointer to start of register cache flushing instructions |
88 | DebugMessage(M64MSG_ERROR, "Error writing R4300 instruction address profiling data"); |
89 | x86addr = (long long) ((*inst_pointer) + code_length); |
90 | if (fwrite(&src, 1, 4, pfProfile) != 4 || // write 4-byte MIPS opcode for current instruction |
91 | fwrite(&x86addr, 1, sizeof(char *), pfProfile) != sizeof(char *)) // write pointer to dynamically generated x86 code for this MIPS instruction |
92 | DebugMessage(M64MSG_ERROR, "Error writing R4300 instruction address profiling data"); |
93 | } |
94 | #endif |
95 | } |
96 | |
97 | static void simplify_access(void) |
98 | { |
99 | int i; |
100 | dst->local_addr = code_length; |
101 | for(i=0; i<8; i++) dst->reg_cache_infos.needed_registers[i] = NULL; |
102 | } |
103 | |
104 | void free_registers_move_start(void) |
105 | { |
106 | /* flush all dirty registers and clear needed_registers table */ |
107 | free_all_registers(); |
108 | |
109 | /* now move the start of the new instruction down past the flushing instructions */ |
110 | simplify_access(); |
111 | |
112 | } |
113 | |
114 | // this function frees a specific X86 GPR |
115 | void free_register(int reg) |
116 | { |
117 | precomp_instr *last; |
118 | |
119 | if (last_access[reg] != NULL) |
120 | last = last_access[reg]+1; |
121 | else |
122 | last = free_since[reg]; |
123 | |
124 | while (last <= dst) |
125 | { |
126 | if (last_access[reg] != NULL && dirty[reg]) |
127 | last->reg_cache_infos.needed_registers[reg] = reg_content[reg]; |
128 | else |
129 | last->reg_cache_infos.needed_registers[reg] = NULL; |
130 | last++; |
131 | } |
132 | if (last_access[reg] == NULL) |
133 | { |
134 | free_since[reg] = dst+1; |
135 | return; |
136 | } |
137 | |
138 | if (dirty[reg]) |
139 | { |
140 | if (is64bits[reg]) |
141 | { |
142 | mov_m64rel_xreg64((unsigned long long *) reg_content[reg], reg); |
143 | } |
144 | else |
145 | { |
146 | movsxd_reg64_reg32(reg, reg); |
147 | mov_m64rel_xreg64((unsigned long long *) reg_content[reg], reg); |
148 | } |
149 | } |
150 | |
151 | last_access[reg] = NULL; |
152 | free_since[reg] = dst+1; |
153 | } |
154 | |
155 | int lru_register(void) |
156 | { |
157 | unsigned long long oldest_access = 0xFFFFFFFFFFFFFFFFULL; |
158 | int i, reg = 0; |
159 | for (i=0; i<8; i++) |
160 | { |
161 | if (i != ESP && (unsigned long long) last_access[i] < oldest_access) |
162 | { |
163 | oldest_access = (unsigned long long) last_access[i]; |
164 | reg = i; |
165 | } |
166 | } |
167 | return reg; |
168 | } |
169 | |
170 | int lru_base_register(void) /* EBP cannot be used as a base register for SIB addressing byte */ |
171 | { |
172 | unsigned long long oldest_access = 0xFFFFFFFFFFFFFFFFULL; |
173 | int i, reg = 0; |
174 | for (i=0; i<8; i++) |
175 | { |
176 | if (i != ESP && i != EBP && (unsigned long long) last_access[i] < oldest_access) |
177 | { |
178 | oldest_access = (unsigned long long) last_access[i]; |
179 | reg = i; |
180 | } |
181 | } |
182 | return reg; |
183 | } |
184 | |
185 | void set_register_state(int reg, unsigned int *addr, int _dirty, int _is64bits) |
186 | { |
187 | if (addr == NULL) |
188 | last_access[reg] = NULL; |
189 | else |
190 | last_access[reg] = dst; |
191 | reg_content[reg] = (unsigned long long *) addr; |
192 | is64bits[reg] = _is64bits; |
193 | dirty[reg] = _dirty; |
194 | } |
195 | |
196 | int lock_register(int reg) |
197 | { |
198 | free_register(reg); |
199 | last_access[reg] = (precomp_instr *) 0xFFFFFFFFFFFFFFFFULL; |
200 | reg_content[reg] = NULL; |
201 | return reg; |
202 | } |
203 | |
204 | void unlock_register(int reg) |
205 | { |
206 | last_access[reg] = NULL; |
207 | } |
208 | |
209 | // this function finds a register to put the data contained in addr, |
210 | // if there was another value before it's cleanly removed of the |
211 | // register cache. After that, the register number is returned. |
212 | // If data are already cached, the function only returns the register number |
213 | int allocate_register_32(unsigned int *addr) |
214 | { |
215 | int reg = 0, i; |
216 | |
217 | // is it already cached ? |
218 | if (addr != NULL) |
219 | { |
220 | for (i = 0; i < 8; i++) |
221 | { |
222 | if (last_access[i] != NULL && (unsigned int *) reg_content[i] == addr) |
223 | { |
224 | precomp_instr *last = last_access[i]+1; |
225 | |
226 | while (last <= dst) |
227 | { |
228 | last->reg_cache_infos.needed_registers[i] = reg_content[i]; |
229 | last++; |
230 | } |
231 | last_access[i] = dst; |
232 | is64bits[i] = 0; |
233 | return i; |
234 | } |
235 | } |
236 | } |
237 | |
238 | // it's not cached, so take the least recently used register |
239 | reg = lru_register(); |
240 | |
241 | if (last_access[reg]) |
242 | free_register(reg); |
243 | else |
244 | { |
245 | while (free_since[reg] <= dst) |
246 | { |
247 | free_since[reg]->reg_cache_infos.needed_registers[reg] = NULL; |
248 | free_since[reg]++; |
249 | } |
250 | } |
251 | |
252 | last_access[reg] = dst; |
253 | reg_content[reg] = (unsigned long long *) addr; |
254 | dirty[reg] = 0; |
255 | is64bits[reg] = 0; |
256 | |
257 | if (addr != NULL) |
258 | { |
259 | if (addr == (unsigned int *) r0) |
260 | xor_reg32_reg32(reg, reg); |
261 | else |
262 | mov_xreg32_m32rel(reg, addr); |
263 | } |
264 | |
265 | return reg; |
266 | } |
267 | |
268 | // this function is similar to allocate_register except it loads |
269 | // a 64 bits value, and return the register number of the LSB part |
270 | int allocate_register_64(unsigned long long *addr) |
271 | { |
272 | int reg, i; |
273 | |
274 | // is it already cached? |
275 | if (addr != NULL) |
276 | { |
277 | for (i = 0; i < 8; i++) |
278 | { |
279 | if (last_access[i] != NULL && reg_content[i] == addr) |
280 | { |
281 | precomp_instr *last = last_access[i]+1; |
282 | |
283 | while (last <= dst) |
284 | { |
285 | last->reg_cache_infos.needed_registers[i] = reg_content[i]; |
286 | last++; |
287 | } |
288 | last_access[i] = dst; |
289 | if (is64bits[i] == 0) |
290 | { |
291 | movsxd_reg64_reg32(i, i); |
292 | is64bits[i] = 1; |
293 | } |
294 | return i; |
295 | } |
296 | } |
297 | } |
298 | |
299 | // it's not cached, so take the least recently used register |
300 | reg = lru_register(); |
301 | |
302 | if (last_access[reg]) |
303 | free_register(reg); |
304 | else |
305 | { |
306 | while (free_since[reg] <= dst) |
307 | { |
308 | free_since[reg]->reg_cache_infos.needed_registers[reg] = NULL; |
309 | free_since[reg]++; |
310 | } |
311 | } |
312 | |
313 | last_access[reg] = dst; |
314 | reg_content[reg] = addr; |
315 | dirty[reg] = 0; |
316 | is64bits[reg] = 1; |
317 | |
318 | if (addr != NULL) |
319 | { |
320 | if (addr == r0) |
321 | xor_reg64_reg64(reg, reg); |
322 | else |
323 | mov_xreg64_m64rel(reg, addr); |
324 | } |
325 | |
326 | return reg; |
327 | } |
328 | |
329 | // this function checks if the data located at addr are cached in a register |
330 | // and then, it returns 1 if it's a 64 bit value |
331 | // 0 if it's a 32 bit value |
332 | // -1 if it's not cached |
333 | int is64(unsigned int *addr) |
334 | { |
335 | int i; |
336 | for (i = 0; i < 8; i++) |
337 | { |
338 | if (last_access[i] != NULL && reg_content[i] == (unsigned long long *) addr) |
339 | { |
340 | return is64bits[i]; |
341 | } |
342 | } |
343 | return -1; |
344 | } |
345 | |
346 | int allocate_register_32_w(unsigned int *addr) |
347 | { |
348 | int reg = 0, i; |
349 | |
350 | // is it already cached ? |
351 | for (i = 0; i < 8; i++) |
352 | { |
353 | if (last_access[i] != NULL && reg_content[i] == (unsigned long long *) addr) |
354 | { |
355 | precomp_instr *last = last_access[i] + 1; |
356 | |
357 | while (last <= dst) |
358 | { |
359 | last->reg_cache_infos.needed_registers[i] = NULL; |
360 | last++; |
361 | } |
362 | last_access[i] = dst; |
363 | dirty[i] = 1; |
364 | is64bits[i] = 0; |
365 | return i; |
366 | } |
367 | } |
368 | |
369 | // it's not cached, so take the least recently used register |
370 | reg = lru_register(); |
371 | |
372 | if (last_access[reg]) |
373 | free_register(reg); |
374 | else |
375 | { |
376 | while (free_since[reg] <= dst) |
377 | { |
378 | free_since[reg]->reg_cache_infos.needed_registers[reg] = NULL; |
379 | free_since[reg]++; |
380 | } |
381 | } |
382 | |
383 | last_access[reg] = dst; |
384 | reg_content[reg] = (unsigned long long *) addr; |
385 | dirty[reg] = 1; |
386 | is64bits[reg] = 0; |
387 | |
388 | return reg; |
389 | } |
390 | |
391 | int allocate_register_64_w(unsigned long long *addr) |
392 | { |
393 | int reg, i; |
394 | |
395 | // is it already cached? |
396 | for (i = 0; i < 8; i++) |
397 | { |
398 | if (last_access[i] != NULL && reg_content[i] == addr) |
399 | { |
400 | precomp_instr *last = last_access[i] + 1; |
401 | |
402 | while (last <= dst) |
403 | { |
404 | last->reg_cache_infos.needed_registers[i] = NULL; |
405 | last++; |
406 | } |
407 | last_access[i] = dst; |
408 | is64bits[i] = 1; |
409 | dirty[i] = 1; |
410 | return i; |
411 | } |
412 | } |
413 | |
414 | // it's not cached, so take the least recently used register |
415 | reg = lru_register(); |
416 | |
417 | if (last_access[reg]) |
418 | free_register(reg); |
419 | else |
420 | { |
421 | while (free_since[reg] <= dst) |
422 | { |
423 | free_since[reg]->reg_cache_infos.needed_registers[reg] = NULL; |
424 | free_since[reg]++; |
425 | } |
426 | } |
427 | |
428 | last_access[reg] = dst; |
429 | reg_content[reg] = addr; |
430 | dirty[reg] = 1; |
431 | is64bits[reg] = 1; |
432 | |
433 | return reg; |
434 | } |
435 | |
436 | void allocate_register_32_manually(int reg, unsigned int *addr) |
437 | { |
438 | int i; |
439 | |
440 | /* check if we just happen to already have this r4300 reg cached in the requested x86 reg */ |
441 | if (last_access[reg] != NULL && reg_content[reg] == (unsigned long long *) addr) |
442 | { |
443 | precomp_instr *last = last_access[reg] + 1; |
444 | while (last <= dst) |
445 | { |
446 | last->reg_cache_infos.needed_registers[reg] = reg_content[reg]; |
447 | last++; |
448 | } |
449 | last_access[reg] = dst; |
450 | /* we won't touch is64bits or dirty; the register returned is "read-only" */ |
451 | return; |
452 | } |
453 | |
454 | /* otherwise free up the requested x86 register */ |
455 | if (last_access[reg]) |
456 | free_register(reg); |
457 | else |
458 | { |
459 | while (free_since[reg] <= dst) |
460 | { |
461 | free_since[reg]->reg_cache_infos.needed_registers[reg] = NULL; |
462 | free_since[reg]++; |
463 | } |
464 | } |
465 | |
466 | /* if the r4300 register is already cached in a different x86 register, then copy it to the requested x86 register */ |
467 | for (i=0; i<8; i++) |
468 | { |
469 | if (last_access[i] != NULL && reg_content[i] == (unsigned long long *) addr) |
470 | { |
471 | precomp_instr *last = last_access[i]+1; |
472 | while (last <= dst) |
473 | { |
474 | last->reg_cache_infos.needed_registers[i] = reg_content[i]; |
475 | last++; |
476 | } |
477 | last_access[i] = dst; |
478 | if (is64bits[i]) |
479 | mov_reg64_reg64(reg, i); |
480 | else |
481 | mov_reg32_reg32(reg, i); |
482 | last_access[reg] = dst; |
483 | is64bits[reg] = is64bits[i]; |
484 | dirty[reg] = dirty[i]; |
485 | reg_content[reg] = reg_content[i]; |
486 | /* free the previous x86 register used to cache this r4300 register */ |
487 | free_since[i] = dst + 1; |
488 | last_access[i] = NULL; |
489 | return; |
490 | } |
491 | } |
492 | |
493 | /* otherwise just load the 32-bit value into the requested register */ |
494 | last_access[reg] = dst; |
495 | reg_content[reg] = (unsigned long long *) addr; |
496 | dirty[reg] = 0; |
497 | is64bits[reg] = 0; |
498 | |
499 | if ((unsigned long long *) addr == r0) |
500 | xor_reg32_reg32(reg, reg); |
501 | else |
502 | mov_xreg32_m32rel(reg, addr); |
503 | } |
504 | |
505 | void allocate_register_32_manually_w(int reg, unsigned int *addr) |
506 | { |
507 | int i; |
508 | |
509 | /* check if we just happen to already have this r4300 reg cached in the requested x86 reg */ |
510 | if (last_access[reg] != NULL && reg_content[reg] == (unsigned long long *) addr) |
511 | { |
512 | precomp_instr *last = last_access[reg]+1; |
513 | while (last <= dst) |
514 | { |
515 | last->reg_cache_infos.needed_registers[reg] = NULL; |
516 | last++; |
517 | } |
518 | last_access[reg] = dst; |
519 | is64bits[reg] = 0; |
520 | dirty[reg] = 1; |
521 | return; |
522 | } |
523 | |
524 | /* otherwise free up the requested x86 register */ |
525 | if (last_access[reg]) |
526 | free_register(reg); |
527 | else |
528 | { |
529 | while (free_since[reg] <= dst) |
530 | { |
531 | free_since[reg]->reg_cache_infos.needed_registers[reg] = NULL; |
532 | free_since[reg]++; |
533 | } |
534 | } |
535 | |
536 | /* if the r4300 register is already cached in a different x86 register, then free it and bind to the requested x86 register */ |
537 | for (i = 0; i < 8; i++) |
538 | { |
539 | if (last_access[i] != NULL && reg_content[i] == (unsigned long long *) addr) |
540 | { |
541 | precomp_instr *last = last_access[i] + 1; |
542 | while (last <= dst) |
543 | { |
544 | last->reg_cache_infos.needed_registers[i] = NULL; |
545 | last++; |
546 | } |
547 | last_access[reg] = dst; |
548 | reg_content[reg] = reg_content[i]; |
549 | dirty[reg] = 1; |
550 | is64bits[reg] = 0; |
551 | /* free the previous x86 register used to cache this r4300 register */ |
552 | free_since[i] = dst+1; |
553 | last_access[i] = NULL; |
554 | return; |
555 | } |
556 | } |
557 | |
558 | /* otherwise just set up the requested register as 32-bit */ |
559 | last_access[reg] = dst; |
560 | reg_content[reg] = (unsigned long long *) addr; |
561 | dirty[reg] = 1; |
562 | is64bits[reg] = 0; |
563 | } |
564 | |
565 | |
566 | // 0x48 0x83 0xEC 0x8 sub rsp, byte 8 |
567 | // 0x48 0xA1 0xXXXXXXXXXXXXXXXX mov rax, qword (&code start) |
568 | // 0x48 0x05 0xXXXXXXXX add rax, dword (local_addr) |
569 | // 0x48 0x89 0x04 0x24 mov [rsp], rax |
570 | // 0x48 0xB8 0xXXXXXXXXXXXXXXXX mov rax, ®[0] |
571 | // 0x48 0x8B (reg<<3)|0x80 0xXXXXXXXX mov rdi, [rax + XXXXXXXX] |
572 | // 0x48 0x8B (reg<<3)|0x80 0xXXXXXXXX mov rsi, [rax + XXXXXXXX] |
573 | // 0x48 0x8B (reg<<3)|0x80 0xXXXXXXXX mov rbp, [rax + XXXXXXXX] |
574 | // 0x48 0x8B (reg<<3)|0x80 0xXXXXXXXX mov rdx, [rax + XXXXXXXX] |
575 | // 0x48 0x8B (reg<<3)|0x80 0xXXXXXXXX mov rcx, [rax + XXXXXXXX] |
576 | // 0x48 0x8B (reg<<3)|0x80 0xXXXXXXXX mov rbx, [rax + XXXXXXXX] |
577 | // 0x48 0x8B (reg<<3)|0x80 0xXXXXXXXX mov rax, [rax + XXXXXXXX] |
578 | // 0xC3 ret |
579 | // total : 84 bytes |
580 | |
581 | static void build_wrapper(precomp_instr *instr, unsigned char* pCode, precomp_block* block) |
582 | { |
583 | int i; |
584 | |
585 | #if defined(PROFILE_R4300) |
586 | long long x86addr = (long long) pCode; |
587 | int mipsop = -4; |
588 | if (fwrite(&mipsop, 1, 4, pfProfile) != 4 || // write 4-byte MIPS opcode |
589 | fwrite(&x86addr, 1, sizeof(char *), pfProfile) != sizeof(char *)) // write pointer to dynamically generated x86 code for this MIPS instruction |
590 | DebugMessage(M64MSG_ERROR, "Error writing R4300 instruction address profiling data"); |
591 | #endif |
592 | |
593 | *pCode++ = 0x48; |
594 | *pCode++ = 0x83; |
595 | *pCode++ = 0xEC; |
596 | *pCode++ = 0x08; |
597 | |
598 | *pCode++ = 0x48; |
599 | *pCode++ = 0xA1; |
600 | *((unsigned long long *) pCode) = (unsigned long long) (&block->code); |
601 | pCode += 8; |
602 | |
603 | *pCode++ = 0x48; |
604 | *pCode++ = 0x05; |
605 | *((unsigned int *) pCode) = (unsigned int) instr->local_addr; |
606 | pCode += 4; |
607 | |
608 | *pCode++ = 0x48; |
609 | *pCode++ = 0x89; |
610 | *pCode++ = 0x04; |
611 | *pCode++ = 0x24; |
612 | |
613 | *pCode++ = 0x48; |
614 | *pCode++ = 0xB8; |
615 | *((unsigned long long *) pCode) = (unsigned long long) ®[0]; |
616 | pCode += 8; |
617 | |
618 | for (i=7; i>=0; i--) |
619 | { |
620 | long long riprel; |
621 | if (instr->reg_cache_infos.needed_registers[i] != NULL) |
622 | { |
623 | *pCode++ = 0x48; |
624 | *pCode++ = 0x8B; |
625 | *pCode++ = 0x80 | (i << 3); |
626 | riprel = (long long) ((unsigned char *) instr->reg_cache_infos.needed_registers[i] - (unsigned char *) ®[0]); |
627 | *((int *) pCode) = (int) riprel; |
628 | pCode += 4; |
629 | if (riprel >= 0x7fffffffLL || riprel < -0x80000000LL) |
630 | { |
631 | DebugMessage(M64MSG_ERROR, "build_wrapper error: reg[%i] offset too big for relative address from %p to %p", |
632 | i, (®[0]), instr->reg_cache_infos.needed_registers[i]); |
633 | asm(" int $3; "); |
634 | } |
635 | } |
636 | } |
637 | *pCode++ = 0xC3; |
638 | } |
639 | |
640 | void build_wrappers(precomp_instr *instr, int start, int end, precomp_block* block) |
641 | { |
642 | int i, reg; |
643 | for (i=start; i<end; i++) |
644 | { |
645 | instr[i].reg_cache_infos.need_map = 0; |
646 | for (reg=0; reg<8; reg++) |
647 | { |
648 | if (instr[i].reg_cache_infos.needed_registers[reg] != NULL) |
649 | { |
650 | instr[i].reg_cache_infos.need_map = 1; |
651 | build_wrapper(&instr[i], instr[i].reg_cache_infos.jump_wrapper, block); |
652 | break; |
653 | } |
654 | } |
655 | } |
656 | } |
657 | |