gpu_neon: fix some missing ebuf updates
[pcsx_rearmed.git] / deps / libretro-common / libco / ppc.c
CommitLineData
3719602c
PC
1/*
2 libco.ppc (2010-10-17)
3 author: blargg
4 license: public domain
5*/
6
7/* PowerPC 32/64 using embedded or external asm, with optional
8floating-point and AltiVec save/restore */
9
10#define LIBCO_C
11#include <libco.h>
12#include <stdlib.h>
13#include <stdint.h>
14#include <string.h>
15
16#define LIBCO_MPROTECT (__unix__ && !LIBCO_PPC_ASM)
17
18#if LIBCO_MPROTECT
19#include <unistd.h>
20#include <sys/mman.h>
21#endif
22
23/* State format (offsets in 32-bit words)
24
25+0 Pointer to swap code
26 Rest of function descriptor for entry function
27+8 PC
28+10 SP
29 Special regs
30 GPRs
31 FPRs
32 VRs
33 stack
34*/
35
36enum { state_size = 1024 };
37enum { above_stack = 2048 };
38enum { stack_align = 256 };
39
40static thread_local cothread_t co_active_handle = 0;
41
42/**** Determine environment ****/
43
44#define LIBCO_PPC64 (_ARCH_PPC64 || __PPC64__ || __ppc64__ || __powerpc64__)
45
46/* Whether function calls are indirect through a descriptor,
47or are directly to function */
48#ifndef LIBCO_PPCDESC
49 #if !_CALL_SYSV && (_CALL_AIX || _CALL_AIXDESC || LIBCO_PPC64)
50 #define LIBCO_PPCDESC 1
51 #endif
52#endif
53
54#ifdef LIBCO_PPC_ASM
55
56 #ifdef __cplusplus
57 extern "C"
58 #endif
59
60 /* Swap code is in ppc.S */
61 void co_swap_asm(cothread_t, cothread_t);
62 #define CO_SWAP_ASM(x, y) co_swap_asm(x, y)
63
64#else
65
66/* Swap code is here in array. Please leave dieassembly comments,
67as they make it easy to see what it does, and reorder instructions
68if one wants to see whether that improves performance. */
69static const uint32_t libco_ppc_code [] = {
70#if LIBCO_PPC64
71 0x7d000026, /* mfcr r8 */
72 0xf8240028, /* std r1,40(r4) */
73 0x7d2802a6, /* mflr r9 */
74 0xf9c40048, /* std r14,72(r4) */
75 0xf9e40050, /* std r15,80(r4) */
76 0xfa040058, /* std r16,88(r4) */
77 0xfa240060, /* std r17,96(r4) */
78 0xfa440068, /* std r18,104(r4) */
79 0xfa640070, /* std r19,112(r4) */
80 0xfa840078, /* std r20,120(r4) */
81 0xfaa40080, /* std r21,128(r4) */
82 0xfac40088, /* std r22,136(r4) */
83 0xfae40090, /* std r23,144(r4) */
84 0xfb040098, /* std r24,152(r4) */
85 0xfb2400a0, /* std r25,160(r4) */
86 0xfb4400a8, /* std r26,168(r4) */
87 0xfb6400b0, /* std r27,176(r4) */
88 0xfb8400b8, /* std r28,184(r4) */
89 0xfba400c0, /* std r29,192(r4) */
90 0xfbc400c8, /* std r30,200(r4) */
91 0xfbe400d0, /* std r31,208(r4) */
92 0xf9240020, /* std r9,32(r4) */
93 0xe8e30020, /* ld r7,32(r3) */
94 0xe8230028, /* ld r1,40(r3) */
95 0x48000009, /* bl 1 */
96 0x7fe00008, /* trap */
97 0x91040030,/*1:stw r8,48(r4) */
98 0x80c30030, /* lwz r6,48(r3) */
99 0x7ce903a6, /* mtctr r7 */
100 0xe9c30048, /* ld r14,72(r3) */
101 0xe9e30050, /* ld r15,80(r3) */
102 0xea030058, /* ld r16,88(r3) */
103 0xea230060, /* ld r17,96(r3) */
104 0xea430068, /* ld r18,104(r3) */
105 0xea630070, /* ld r19,112(r3) */
106 0xea830078, /* ld r20,120(r3) */
107 0xeaa30080, /* ld r21,128(r3) */
108 0xeac30088, /* ld r22,136(r3) */
109 0xeae30090, /* ld r23,144(r3) */
110 0xeb030098, /* ld r24,152(r3) */
111 0xeb2300a0, /* ld r25,160(r3) */
112 0xeb4300a8, /* ld r26,168(r3) */
113 0xeb6300b0, /* ld r27,176(r3) */
114 0xeb8300b8, /* ld r28,184(r3) */
115 0xeba300c0, /* ld r29,192(r3) */
116 0xebc300c8, /* ld r30,200(r3) */
117 0xebe300d0, /* ld r31,208(r3) */
118 0x7ccff120, /* mtcr r6 */
119#else
120 0x7d000026, /* mfcr r8 */
121 0x90240028, /* stw r1,40(r4) */
122 0x7d2802a6, /* mflr r9 */
123 0x91a4003c, /* stw r13,60(r4) */
124 0x91c40040, /* stw r14,64(r4) */
125 0x91e40044, /* stw r15,68(r4) */
126 0x92040048, /* stw r16,72(r4) */
127 0x9224004c, /* stw r17,76(r4) */
128 0x92440050, /* stw r18,80(r4) */
129 0x92640054, /* stw r19,84(r4) */
130 0x92840058, /* stw r20,88(r4) */
131 0x92a4005c, /* stw r21,92(r4) */
132 0x92c40060, /* stw r22,96(r4) */
133 0x92e40064, /* stw r23,100(r4) */
134 0x93040068, /* stw r24,104(r4) */
135 0x9324006c, /* stw r25,108(r4) */
136 0x93440070, /* stw r26,112(r4) */
137 0x93640074, /* stw r27,116(r4) */
138 0x93840078, /* stw r28,120(r4) */
139 0x93a4007c, /* stw r29,124(r4) */
140 0x93c40080, /* stw r30,128(r4) */
141 0x93e40084, /* stw r31,132(r4) */
142 0x91240020, /* stw r9,32(r4) */
143 0x80e30020, /* lwz r7,32(r3) */
144 0x80230028, /* lwz r1,40(r3) */
145 0x48000009, /* bl 1 */
146 0x7fe00008, /* trap */
147 0x91040030,/*1:stw r8,48(r4) */
148 0x80c30030, /* lwz r6,48(r3) */
149 0x7ce903a6, /* mtctr r7 */
150 0x81a3003c, /* lwz r13,60(r3) */
151 0x81c30040, /* lwz r14,64(r3) */
152 0x81e30044, /* lwz r15,68(r3) */
153 0x82030048, /* lwz r16,72(r3) */
154 0x8223004c, /* lwz r17,76(r3) */
155 0x82430050, /* lwz r18,80(r3) */
156 0x82630054, /* lwz r19,84(r3) */
157 0x82830058, /* lwz r20,88(r3) */
158 0x82a3005c, /* lwz r21,92(r3) */
159 0x82c30060, /* lwz r22,96(r3) */
160 0x82e30064, /* lwz r23,100(r3) */
161 0x83030068, /* lwz r24,104(r3) */
162 0x8323006c, /* lwz r25,108(r3) */
163 0x83430070, /* lwz r26,112(r3) */
164 0x83630074, /* lwz r27,116(r3) */
165 0x83830078, /* lwz r28,120(r3) */
166 0x83a3007c, /* lwz r29,124(r3) */
167 0x83c30080, /* lwz r30,128(r3) */
168 0x83e30084, /* lwz r31,132(r3) */
169 0x7ccff120, /* mtcr r6 */
170#endif
171
172#ifndef LIBCO_PPC_NOFP
173 0xd9c400e0, /* stfd f14,224(r4) */
174 0xd9e400e8, /* stfd f15,232(r4) */
175 0xda0400f0, /* stfd f16,240(r4) */
176 0xda2400f8, /* stfd f17,248(r4) */
177 0xda440100, /* stfd f18,256(r4) */
178 0xda640108, /* stfd f19,264(r4) */
179 0xda840110, /* stfd f20,272(r4) */
180 0xdaa40118, /* stfd f21,280(r4) */
181 0xdac40120, /* stfd f22,288(r4) */
182 0xdae40128, /* stfd f23,296(r4) */
183 0xdb040130, /* stfd f24,304(r4) */
184 0xdb240138, /* stfd f25,312(r4) */
185 0xdb440140, /* stfd f26,320(r4) */
186 0xdb640148, /* stfd f27,328(r4) */
187 0xdb840150, /* stfd f28,336(r4) */
188 0xdba40158, /* stfd f29,344(r4) */
189 0xdbc40160, /* stfd f30,352(r4) */
190 0xdbe40168, /* stfd f31,360(r4) */
191 0xc9c300e0, /* lfd f14,224(r3) */
192 0xc9e300e8, /* lfd f15,232(r3) */
193 0xca0300f0, /* lfd f16,240(r3) */
194 0xca2300f8, /* lfd f17,248(r3) */
195 0xca430100, /* lfd f18,256(r3) */
196 0xca630108, /* lfd f19,264(r3) */
197 0xca830110, /* lfd f20,272(r3) */
198 0xcaa30118, /* lfd f21,280(r3) */
199 0xcac30120, /* lfd f22,288(r3) */
200 0xcae30128, /* lfd f23,296(r3) */
201 0xcb030130, /* lfd f24,304(r3) */
202 0xcb230138, /* lfd f25,312(r3) */
203 0xcb430140, /* lfd f26,320(r3) */
204 0xcb630148, /* lfd f27,328(r3) */
205 0xcb830150, /* lfd f28,336(r3) */
206 0xcba30158, /* lfd f29,344(r3) */
207 0xcbc30160, /* lfd f30,352(r3) */
208 0xcbe30168, /* lfd f31,360(r3) */
209#endif
210
211#ifdef __ALTIVEC__
212 0x7ca042a6, /* mfvrsave r5 */
213 0x39040180, /* addi r8,r4,384 */
214 0x39240190, /* addi r9,r4,400 */
215 0x70a00fff, /* andi. r0,r5,4095 */
216 0x90a40034, /* stw r5,52(r4) */
217 0x4182005c, /* beq- 2 */
218 0x7e8041ce, /* stvx v20,r0,r8 */
219 0x39080020, /* addi r8,r8,32 */
220 0x7ea049ce, /* stvx v21,r0,r9 */
221 0x39290020, /* addi r9,r9,32 */
222 0x7ec041ce, /* stvx v22,r0,r8 */
223 0x39080020, /* addi r8,r8,32 */
224 0x7ee049ce, /* stvx v23,r0,r9 */
225 0x39290020, /* addi r9,r9,32 */
226 0x7f0041ce, /* stvx v24,r0,r8 */
227 0x39080020, /* addi r8,r8,32 */
228 0x7f2049ce, /* stvx v25,r0,r9 */
229 0x39290020, /* addi r9,r9,32 */
230 0x7f4041ce, /* stvx v26,r0,r8 */
231 0x39080020, /* addi r8,r8,32 */
232 0x7f6049ce, /* stvx v27,r0,r9 */
233 0x39290020, /* addi r9,r9,32 */
234 0x7f8041ce, /* stvx v28,r0,r8 */
235 0x39080020, /* addi r8,r8,32 */
236 0x7fa049ce, /* stvx v29,r0,r9 */
237 0x39290020, /* addi r9,r9,32 */
238 0x7fc041ce, /* stvx v30,r0,r8 */
239 0x7fe049ce, /* stvx v31,r0,r9 */
240 0x80a30034,/*2:lwz r5,52(r3) */
241 0x39030180, /* addi r8,r3,384 */
242 0x39230190, /* addi r9,r3,400 */
243 0x70a00fff, /* andi. r0,r5,4095 */
244 0x7ca043a6, /* mtvrsave r5 */
245 0x4d820420, /* beqctr */
246 0x7e8040ce, /* lvx v20,r0,r8 */
247 0x39080020, /* addi r8,r8,32 */
248 0x7ea048ce, /* lvx v21,r0,r9 */
249 0x39290020, /* addi r9,r9,32 */
250 0x7ec040ce, /* lvx v22,r0,r8 */
251 0x39080020, /* addi r8,r8,32 */
252 0x7ee048ce, /* lvx v23,r0,r9 */
253 0x39290020, /* addi r9,r9,32 */
254 0x7f0040ce, /* lvx v24,r0,r8 */
255 0x39080020, /* addi r8,r8,32 */
256 0x7f2048ce, /* lvx v25,r0,r9 */
257 0x39290020, /* addi r9,r9,32 */
258 0x7f4040ce, /* lvx v26,r0,r8 */
259 0x39080020, /* addi r8,r8,32 */
260 0x7f6048ce, /* lvx v27,r0,r9 */
261 0x39290020, /* addi r9,r9,32 */
262 0x7f8040ce, /* lvx v28,r0,r8 */
263 0x39080020, /* addi r8,r8,32 */
264 0x7fa048ce, /* lvx v29,r0,r9 */
265 0x39290020, /* addi r9,r9,32 */
266 0x7fc040ce, /* lvx v30,r0,r8 */
267 0x7fe048ce, /* lvx v31,r0,r9 */
268#endif
269
270 0x4e800420, /* bctr */
271};
272
273 #if LIBCO_PPCDESC
274 /* Function call goes through indirect descriptor */
275 #define CO_SWAP_ASM(x, y) \
276 ((void (*)(cothread_t, cothread_t)) (uintptr_t) x)(x, y)
277 #else
278 /* Function call goes directly to code */
279 #define CO_SWAP_ASM(x, y) \
280 ((void (*)(cothread_t, cothread_t)) (uintptr_t) libco_ppc_code)(x, y)
281 #endif
282
283#endif
284
285static uint32_t* co_create_( unsigned size, uintptr_t entry)
286{
287 uint32_t *t = (uint32_t*)malloc(size);
288
289#if LIBCO_PPCDESC
290 if (t)
291 {
292 /* Copy entry's descriptor */
293 memcpy(t, (void*)entry, sizeof(void*) * 3);
294
295 /* Set function pointer to swap routine */
296#ifdef LIBCO_PPC_ASM
297 *(const void**) t = *(void**) &co_swap_asm;
298#else
299 *(const void**) t = libco_ppc_code;
300#endif
301 }
302 #endif
303
304 return t;
305}
306
307cothread_t co_create(unsigned int size, void (*entry_)(void))
308{
309 uintptr_t entry = (uintptr_t) entry_;
310 uint32_t *t = NULL;
311
312 /* Be sure main thread was successfully allocated */
313 if (co_active())
314 {
315 size += state_size + above_stack + stack_align;
316 t = co_create_(size, entry);
317 }
318
319 if (t)
320 {
321 uintptr_t sp;
322#if LIBCO_PPC64
323 int shift = 16;
324#else
325 int shift = 0;
326#endif
327 /* Save current registers into new thread, so that any special ones will
328 have proper values when thread is begun */
329 CO_SWAP_ASM(t, t);
330
331#if LIBCO_PPCDESC
332 /* Get real address */
333 entry = (uintptr_t) *(void**)entry;
334#endif
335
336 /* Put stack near end of block, and align */
337 sp = (uintptr_t) t + size - above_stack;
338 sp -= sp % stack_align;
339
340 /* On PPC32, we save and restore GPRs as 32 bits. For PPC64, we
341 save and restore them as 64 bits, regardless of the size the ABI
342 uses. So, we manually write pointers at the proper size. We always
343 save and restore at the same address, and since PPC is big-endian,
344 we must put the low byte first on PPC32. */
345
346 /* If uintptr_t is 32 bits, >>32 is undefined behavior, so we do two shifts
347 and don't have to care how many bits uintptr_t is. */
348
349 /* Set up so entry will be called on next swap */
350 t [8] = (uint32_t) (entry >> shift >> shift);
351 t [9] = (uint32_t) entry;
352
353 t [10] = (uint32_t) (sp >> shift >> shift);
354 t [11] = (uint32_t) sp;
355 }
356
357 return t;
358}
359
360void co_delete(cothread_t t)
361{
362 free(t);
363}
364
365static void co_init_(void)
366{
367#if LIBCO_MPROTECT
368 /* TODO: pre- and post-pad PPC code so that this doesn't make other
369 data executable and writable */
370 long page_size = sysconf(_SC_PAGESIZE);
371 if (page_size > 0)
372 {
373 uintptr_t align = page_size;
374 uintptr_t begin = (uintptr_t) libco_ppc_code;
375 uintptr_t end = begin + sizeof libco_ppc_code;
376
377 /* Align beginning and end */
378 end += align - 1;
379 end -= end % align;
380 begin -= begin % align;
381
382 mprotect((void*)begin, end - begin, PROT_READ | PROT_WRITE | PROT_EXEC);
383 }
384#endif
385
386 co_active_handle = co_create_(state_size, (uintptr_t) &co_switch);
387}
388
389cothread_t co_active(void)
390{
391 if (!co_active_handle)
392 co_init_();
393
394 return co_active_handle;
395}
396
397void co_switch(cothread_t t)
398{
399 cothread_t old = co_active_handle;
400 co_active_handle = t;
401 CO_SWAP_ASM(t, old);
402}