Commit | Line | Data |
---|---|---|
3719602c PC |
1 | /* |
2 | libco.ppc (2010-10-17) | |
3 | author: blargg | |
4 | license: public domain | |
5 | */ | |
6 | ||
7 | /* PowerPC 32/64 using embedded or external asm, with optional | |
8 | floating-point and AltiVec save/restore */ | |
9 | ||
10 | #define LIBCO_C | |
11 | #include <libco.h> | |
12 | #include <stdlib.h> | |
13 | #include <stdint.h> | |
14 | #include <string.h> | |
15 | ||
16 | #define LIBCO_MPROTECT (__unix__ && !LIBCO_PPC_ASM) | |
17 | ||
18 | #if LIBCO_MPROTECT | |
19 | #include <unistd.h> | |
20 | #include <sys/mman.h> | |
21 | #endif | |
22 | ||
23 | /* State format (offsets in 32-bit words) | |
24 | ||
25 | +0 Pointer to swap code | |
26 | Rest of function descriptor for entry function | |
27 | +8 PC | |
28 | +10 SP | |
29 | Special regs | |
30 | GPRs | |
31 | FPRs | |
32 | VRs | |
33 | stack | |
34 | */ | |
35 | ||
36 | enum { state_size = 1024 }; | |
37 | enum { above_stack = 2048 }; | |
38 | enum { stack_align = 256 }; | |
39 | ||
40 | static thread_local cothread_t co_active_handle = 0; | |
41 | ||
42 | /**** Determine environment ****/ | |
43 | ||
44 | #define LIBCO_PPC64 (_ARCH_PPC64 || __PPC64__ || __ppc64__ || __powerpc64__) | |
45 | ||
46 | /* Whether function calls are indirect through a descriptor, | |
47 | or are directly to function */ | |
48 | #ifndef LIBCO_PPCDESC | |
49 | #if !_CALL_SYSV && (_CALL_AIX || _CALL_AIXDESC || LIBCO_PPC64) | |
50 | #define LIBCO_PPCDESC 1 | |
51 | #endif | |
52 | #endif | |
53 | ||
54 | #ifdef LIBCO_PPC_ASM | |
55 | ||
56 | #ifdef __cplusplus | |
57 | extern "C" | |
58 | #endif | |
59 | ||
60 | /* Swap code is in ppc.S */ | |
61 | void co_swap_asm(cothread_t, cothread_t); | |
62 | #define CO_SWAP_ASM(x, y) co_swap_asm(x, y) | |
63 | ||
64 | #else | |
65 | ||
66 | /* Swap code is here in array. Please leave dieassembly comments, | |
67 | as they make it easy to see what it does, and reorder instructions | |
68 | if one wants to see whether that improves performance. */ | |
69 | static const uint32_t libco_ppc_code [] = { | |
70 | #if LIBCO_PPC64 | |
71 | 0x7d000026, /* mfcr r8 */ | |
72 | 0xf8240028, /* std r1,40(r4) */ | |
73 | 0x7d2802a6, /* mflr r9 */ | |
74 | 0xf9c40048, /* std r14,72(r4) */ | |
75 | 0xf9e40050, /* std r15,80(r4) */ | |
76 | 0xfa040058, /* std r16,88(r4) */ | |
77 | 0xfa240060, /* std r17,96(r4) */ | |
78 | 0xfa440068, /* std r18,104(r4) */ | |
79 | 0xfa640070, /* std r19,112(r4) */ | |
80 | 0xfa840078, /* std r20,120(r4) */ | |
81 | 0xfaa40080, /* std r21,128(r4) */ | |
82 | 0xfac40088, /* std r22,136(r4) */ | |
83 | 0xfae40090, /* std r23,144(r4) */ | |
84 | 0xfb040098, /* std r24,152(r4) */ | |
85 | 0xfb2400a0, /* std r25,160(r4) */ | |
86 | 0xfb4400a8, /* std r26,168(r4) */ | |
87 | 0xfb6400b0, /* std r27,176(r4) */ | |
88 | 0xfb8400b8, /* std r28,184(r4) */ | |
89 | 0xfba400c0, /* std r29,192(r4) */ | |
90 | 0xfbc400c8, /* std r30,200(r4) */ | |
91 | 0xfbe400d0, /* std r31,208(r4) */ | |
92 | 0xf9240020, /* std r9,32(r4) */ | |
93 | 0xe8e30020, /* ld r7,32(r3) */ | |
94 | 0xe8230028, /* ld r1,40(r3) */ | |
95 | 0x48000009, /* bl 1 */ | |
96 | 0x7fe00008, /* trap */ | |
97 | 0x91040030,/*1:stw r8,48(r4) */ | |
98 | 0x80c30030, /* lwz r6,48(r3) */ | |
99 | 0x7ce903a6, /* mtctr r7 */ | |
100 | 0xe9c30048, /* ld r14,72(r3) */ | |
101 | 0xe9e30050, /* ld r15,80(r3) */ | |
102 | 0xea030058, /* ld r16,88(r3) */ | |
103 | 0xea230060, /* ld r17,96(r3) */ | |
104 | 0xea430068, /* ld r18,104(r3) */ | |
105 | 0xea630070, /* ld r19,112(r3) */ | |
106 | 0xea830078, /* ld r20,120(r3) */ | |
107 | 0xeaa30080, /* ld r21,128(r3) */ | |
108 | 0xeac30088, /* ld r22,136(r3) */ | |
109 | 0xeae30090, /* ld r23,144(r3) */ | |
110 | 0xeb030098, /* ld r24,152(r3) */ | |
111 | 0xeb2300a0, /* ld r25,160(r3) */ | |
112 | 0xeb4300a8, /* ld r26,168(r3) */ | |
113 | 0xeb6300b0, /* ld r27,176(r3) */ | |
114 | 0xeb8300b8, /* ld r28,184(r3) */ | |
115 | 0xeba300c0, /* ld r29,192(r3) */ | |
116 | 0xebc300c8, /* ld r30,200(r3) */ | |
117 | 0xebe300d0, /* ld r31,208(r3) */ | |
118 | 0x7ccff120, /* mtcr r6 */ | |
119 | #else | |
120 | 0x7d000026, /* mfcr r8 */ | |
121 | 0x90240028, /* stw r1,40(r4) */ | |
122 | 0x7d2802a6, /* mflr r9 */ | |
123 | 0x91a4003c, /* stw r13,60(r4) */ | |
124 | 0x91c40040, /* stw r14,64(r4) */ | |
125 | 0x91e40044, /* stw r15,68(r4) */ | |
126 | 0x92040048, /* stw r16,72(r4) */ | |
127 | 0x9224004c, /* stw r17,76(r4) */ | |
128 | 0x92440050, /* stw r18,80(r4) */ | |
129 | 0x92640054, /* stw r19,84(r4) */ | |
130 | 0x92840058, /* stw r20,88(r4) */ | |
131 | 0x92a4005c, /* stw r21,92(r4) */ | |
132 | 0x92c40060, /* stw r22,96(r4) */ | |
133 | 0x92e40064, /* stw r23,100(r4) */ | |
134 | 0x93040068, /* stw r24,104(r4) */ | |
135 | 0x9324006c, /* stw r25,108(r4) */ | |
136 | 0x93440070, /* stw r26,112(r4) */ | |
137 | 0x93640074, /* stw r27,116(r4) */ | |
138 | 0x93840078, /* stw r28,120(r4) */ | |
139 | 0x93a4007c, /* stw r29,124(r4) */ | |
140 | 0x93c40080, /* stw r30,128(r4) */ | |
141 | 0x93e40084, /* stw r31,132(r4) */ | |
142 | 0x91240020, /* stw r9,32(r4) */ | |
143 | 0x80e30020, /* lwz r7,32(r3) */ | |
144 | 0x80230028, /* lwz r1,40(r3) */ | |
145 | 0x48000009, /* bl 1 */ | |
146 | 0x7fe00008, /* trap */ | |
147 | 0x91040030,/*1:stw r8,48(r4) */ | |
148 | 0x80c30030, /* lwz r6,48(r3) */ | |
149 | 0x7ce903a6, /* mtctr r7 */ | |
150 | 0x81a3003c, /* lwz r13,60(r3) */ | |
151 | 0x81c30040, /* lwz r14,64(r3) */ | |
152 | 0x81e30044, /* lwz r15,68(r3) */ | |
153 | 0x82030048, /* lwz r16,72(r3) */ | |
154 | 0x8223004c, /* lwz r17,76(r3) */ | |
155 | 0x82430050, /* lwz r18,80(r3) */ | |
156 | 0x82630054, /* lwz r19,84(r3) */ | |
157 | 0x82830058, /* lwz r20,88(r3) */ | |
158 | 0x82a3005c, /* lwz r21,92(r3) */ | |
159 | 0x82c30060, /* lwz r22,96(r3) */ | |
160 | 0x82e30064, /* lwz r23,100(r3) */ | |
161 | 0x83030068, /* lwz r24,104(r3) */ | |
162 | 0x8323006c, /* lwz r25,108(r3) */ | |
163 | 0x83430070, /* lwz r26,112(r3) */ | |
164 | 0x83630074, /* lwz r27,116(r3) */ | |
165 | 0x83830078, /* lwz r28,120(r3) */ | |
166 | 0x83a3007c, /* lwz r29,124(r3) */ | |
167 | 0x83c30080, /* lwz r30,128(r3) */ | |
168 | 0x83e30084, /* lwz r31,132(r3) */ | |
169 | 0x7ccff120, /* mtcr r6 */ | |
170 | #endif | |
171 | ||
172 | #ifndef LIBCO_PPC_NOFP | |
173 | 0xd9c400e0, /* stfd f14,224(r4) */ | |
174 | 0xd9e400e8, /* stfd f15,232(r4) */ | |
175 | 0xda0400f0, /* stfd f16,240(r4) */ | |
176 | 0xda2400f8, /* stfd f17,248(r4) */ | |
177 | 0xda440100, /* stfd f18,256(r4) */ | |
178 | 0xda640108, /* stfd f19,264(r4) */ | |
179 | 0xda840110, /* stfd f20,272(r4) */ | |
180 | 0xdaa40118, /* stfd f21,280(r4) */ | |
181 | 0xdac40120, /* stfd f22,288(r4) */ | |
182 | 0xdae40128, /* stfd f23,296(r4) */ | |
183 | 0xdb040130, /* stfd f24,304(r4) */ | |
184 | 0xdb240138, /* stfd f25,312(r4) */ | |
185 | 0xdb440140, /* stfd f26,320(r4) */ | |
186 | 0xdb640148, /* stfd f27,328(r4) */ | |
187 | 0xdb840150, /* stfd f28,336(r4) */ | |
188 | 0xdba40158, /* stfd f29,344(r4) */ | |
189 | 0xdbc40160, /* stfd f30,352(r4) */ | |
190 | 0xdbe40168, /* stfd f31,360(r4) */ | |
191 | 0xc9c300e0, /* lfd f14,224(r3) */ | |
192 | 0xc9e300e8, /* lfd f15,232(r3) */ | |
193 | 0xca0300f0, /* lfd f16,240(r3) */ | |
194 | 0xca2300f8, /* lfd f17,248(r3) */ | |
195 | 0xca430100, /* lfd f18,256(r3) */ | |
196 | 0xca630108, /* lfd f19,264(r3) */ | |
197 | 0xca830110, /* lfd f20,272(r3) */ | |
198 | 0xcaa30118, /* lfd f21,280(r3) */ | |
199 | 0xcac30120, /* lfd f22,288(r3) */ | |
200 | 0xcae30128, /* lfd f23,296(r3) */ | |
201 | 0xcb030130, /* lfd f24,304(r3) */ | |
202 | 0xcb230138, /* lfd f25,312(r3) */ | |
203 | 0xcb430140, /* lfd f26,320(r3) */ | |
204 | 0xcb630148, /* lfd f27,328(r3) */ | |
205 | 0xcb830150, /* lfd f28,336(r3) */ | |
206 | 0xcba30158, /* lfd f29,344(r3) */ | |
207 | 0xcbc30160, /* lfd f30,352(r3) */ | |
208 | 0xcbe30168, /* lfd f31,360(r3) */ | |
209 | #endif | |
210 | ||
211 | #ifdef __ALTIVEC__ | |
212 | 0x7ca042a6, /* mfvrsave r5 */ | |
213 | 0x39040180, /* addi r8,r4,384 */ | |
214 | 0x39240190, /* addi r9,r4,400 */ | |
215 | 0x70a00fff, /* andi. r0,r5,4095 */ | |
216 | 0x90a40034, /* stw r5,52(r4) */ | |
217 | 0x4182005c, /* beq- 2 */ | |
218 | 0x7e8041ce, /* stvx v20,r0,r8 */ | |
219 | 0x39080020, /* addi r8,r8,32 */ | |
220 | 0x7ea049ce, /* stvx v21,r0,r9 */ | |
221 | 0x39290020, /* addi r9,r9,32 */ | |
222 | 0x7ec041ce, /* stvx v22,r0,r8 */ | |
223 | 0x39080020, /* addi r8,r8,32 */ | |
224 | 0x7ee049ce, /* stvx v23,r0,r9 */ | |
225 | 0x39290020, /* addi r9,r9,32 */ | |
226 | 0x7f0041ce, /* stvx v24,r0,r8 */ | |
227 | 0x39080020, /* addi r8,r8,32 */ | |
228 | 0x7f2049ce, /* stvx v25,r0,r9 */ | |
229 | 0x39290020, /* addi r9,r9,32 */ | |
230 | 0x7f4041ce, /* stvx v26,r0,r8 */ | |
231 | 0x39080020, /* addi r8,r8,32 */ | |
232 | 0x7f6049ce, /* stvx v27,r0,r9 */ | |
233 | 0x39290020, /* addi r9,r9,32 */ | |
234 | 0x7f8041ce, /* stvx v28,r0,r8 */ | |
235 | 0x39080020, /* addi r8,r8,32 */ | |
236 | 0x7fa049ce, /* stvx v29,r0,r9 */ | |
237 | 0x39290020, /* addi r9,r9,32 */ | |
238 | 0x7fc041ce, /* stvx v30,r0,r8 */ | |
239 | 0x7fe049ce, /* stvx v31,r0,r9 */ | |
240 | 0x80a30034,/*2:lwz r5,52(r3) */ | |
241 | 0x39030180, /* addi r8,r3,384 */ | |
242 | 0x39230190, /* addi r9,r3,400 */ | |
243 | 0x70a00fff, /* andi. r0,r5,4095 */ | |
244 | 0x7ca043a6, /* mtvrsave r5 */ | |
245 | 0x4d820420, /* beqctr */ | |
246 | 0x7e8040ce, /* lvx v20,r0,r8 */ | |
247 | 0x39080020, /* addi r8,r8,32 */ | |
248 | 0x7ea048ce, /* lvx v21,r0,r9 */ | |
249 | 0x39290020, /* addi r9,r9,32 */ | |
250 | 0x7ec040ce, /* lvx v22,r0,r8 */ | |
251 | 0x39080020, /* addi r8,r8,32 */ | |
252 | 0x7ee048ce, /* lvx v23,r0,r9 */ | |
253 | 0x39290020, /* addi r9,r9,32 */ | |
254 | 0x7f0040ce, /* lvx v24,r0,r8 */ | |
255 | 0x39080020, /* addi r8,r8,32 */ | |
256 | 0x7f2048ce, /* lvx v25,r0,r9 */ | |
257 | 0x39290020, /* addi r9,r9,32 */ | |
258 | 0x7f4040ce, /* lvx v26,r0,r8 */ | |
259 | 0x39080020, /* addi r8,r8,32 */ | |
260 | 0x7f6048ce, /* lvx v27,r0,r9 */ | |
261 | 0x39290020, /* addi r9,r9,32 */ | |
262 | 0x7f8040ce, /* lvx v28,r0,r8 */ | |
263 | 0x39080020, /* addi r8,r8,32 */ | |
264 | 0x7fa048ce, /* lvx v29,r0,r9 */ | |
265 | 0x39290020, /* addi r9,r9,32 */ | |
266 | 0x7fc040ce, /* lvx v30,r0,r8 */ | |
267 | 0x7fe048ce, /* lvx v31,r0,r9 */ | |
268 | #endif | |
269 | ||
270 | 0x4e800420, /* bctr */ | |
271 | }; | |
272 | ||
273 | #if LIBCO_PPCDESC | |
274 | /* Function call goes through indirect descriptor */ | |
275 | #define CO_SWAP_ASM(x, y) \ | |
276 | ((void (*)(cothread_t, cothread_t)) (uintptr_t) x)(x, y) | |
277 | #else | |
278 | /* Function call goes directly to code */ | |
279 | #define CO_SWAP_ASM(x, y) \ | |
280 | ((void (*)(cothread_t, cothread_t)) (uintptr_t) libco_ppc_code)(x, y) | |
281 | #endif | |
282 | ||
283 | #endif | |
284 | ||
285 | static uint32_t* co_create_( unsigned size, uintptr_t entry) | |
286 | { | |
287 | uint32_t *t = (uint32_t*)malloc(size); | |
288 | ||
289 | #if LIBCO_PPCDESC | |
290 | if (t) | |
291 | { | |
292 | /* Copy entry's descriptor */ | |
293 | memcpy(t, (void*)entry, sizeof(void*) * 3); | |
294 | ||
295 | /* Set function pointer to swap routine */ | |
296 | #ifdef LIBCO_PPC_ASM | |
297 | *(const void**) t = *(void**) &co_swap_asm; | |
298 | #else | |
299 | *(const void**) t = libco_ppc_code; | |
300 | #endif | |
301 | } | |
302 | #endif | |
303 | ||
304 | return t; | |
305 | } | |
306 | ||
307 | cothread_t co_create(unsigned int size, void (*entry_)(void)) | |
308 | { | |
309 | uintptr_t entry = (uintptr_t) entry_; | |
310 | uint32_t *t = NULL; | |
311 | ||
312 | /* Be sure main thread was successfully allocated */ | |
313 | if (co_active()) | |
314 | { | |
315 | size += state_size + above_stack + stack_align; | |
316 | t = co_create_(size, entry); | |
317 | } | |
318 | ||
319 | if (t) | |
320 | { | |
321 | uintptr_t sp; | |
322 | #if LIBCO_PPC64 | |
323 | int shift = 16; | |
324 | #else | |
325 | int shift = 0; | |
326 | #endif | |
327 | /* Save current registers into new thread, so that any special ones will | |
328 | have proper values when thread is begun */ | |
329 | CO_SWAP_ASM(t, t); | |
330 | ||
331 | #if LIBCO_PPCDESC | |
332 | /* Get real address */ | |
333 | entry = (uintptr_t) *(void**)entry; | |
334 | #endif | |
335 | ||
336 | /* Put stack near end of block, and align */ | |
337 | sp = (uintptr_t) t + size - above_stack; | |
338 | sp -= sp % stack_align; | |
339 | ||
340 | /* On PPC32, we save and restore GPRs as 32 bits. For PPC64, we | |
341 | save and restore them as 64 bits, regardless of the size the ABI | |
342 | uses. So, we manually write pointers at the proper size. We always | |
343 | save and restore at the same address, and since PPC is big-endian, | |
344 | we must put the low byte first on PPC32. */ | |
345 | ||
346 | /* If uintptr_t is 32 bits, >>32 is undefined behavior, so we do two shifts | |
347 | and don't have to care how many bits uintptr_t is. */ | |
348 | ||
349 | /* Set up so entry will be called on next swap */ | |
350 | t [8] = (uint32_t) (entry >> shift >> shift); | |
351 | t [9] = (uint32_t) entry; | |
352 | ||
353 | t [10] = (uint32_t) (sp >> shift >> shift); | |
354 | t [11] = (uint32_t) sp; | |
355 | } | |
356 | ||
357 | return t; | |
358 | } | |
359 | ||
360 | void co_delete(cothread_t t) | |
361 | { | |
362 | free(t); | |
363 | } | |
364 | ||
365 | static void co_init_(void) | |
366 | { | |
367 | #if LIBCO_MPROTECT | |
368 | /* TODO: pre- and post-pad PPC code so that this doesn't make other | |
369 | data executable and writable */ | |
370 | long page_size = sysconf(_SC_PAGESIZE); | |
371 | if (page_size > 0) | |
372 | { | |
373 | uintptr_t align = page_size; | |
374 | uintptr_t begin = (uintptr_t) libco_ppc_code; | |
375 | uintptr_t end = begin + sizeof libco_ppc_code; | |
376 | ||
377 | /* Align beginning and end */ | |
378 | end += align - 1; | |
379 | end -= end % align; | |
380 | begin -= begin % align; | |
381 | ||
382 | mprotect((void*)begin, end - begin, PROT_READ | PROT_WRITE | PROT_EXEC); | |
383 | } | |
384 | #endif | |
385 | ||
386 | co_active_handle = co_create_(state_size, (uintptr_t) &co_switch); | |
387 | } | |
388 | ||
389 | cothread_t co_active(void) | |
390 | { | |
391 | if (!co_active_handle) | |
392 | co_init_(); | |
393 | ||
394 | return co_active_handle; | |
395 | } | |
396 | ||
397 | void co_switch(cothread_t t) | |
398 | { | |
399 | cothread_t old = co_active_handle; | |
400 | co_active_handle = t; | |
401 | CO_SWAP_ASM(t, old); | |
402 | } |