| 1 | /* |
| 2 | libco.ppc (2010-10-17) |
| 3 | author: blargg |
| 4 | license: public domain |
| 5 | */ |
| 6 | |
| 7 | /* PowerPC 32/64 using embedded or external asm, with optional |
| 8 | floating-point and AltiVec save/restore */ |
| 9 | |
| 10 | #define LIBCO_C |
| 11 | #include <libco.h> |
| 12 | #include <stdlib.h> |
| 13 | #include <stdint.h> |
| 14 | #include <string.h> |
| 15 | |
| 16 | #define LIBCO_MPROTECT (__unix__ && !LIBCO_PPC_ASM) |
| 17 | |
| 18 | #if LIBCO_MPROTECT |
| 19 | #include <unistd.h> |
| 20 | #include <sys/mman.h> |
| 21 | #endif |
| 22 | |
| 23 | /* State format (offsets in 32-bit words) |
| 24 | |
| 25 | +0 Pointer to swap code |
| 26 | Rest of function descriptor for entry function |
| 27 | +8 PC |
| 28 | +10 SP |
| 29 | Special regs |
| 30 | GPRs |
| 31 | FPRs |
| 32 | VRs |
| 33 | stack |
| 34 | */ |
| 35 | |
| 36 | enum { state_size = 1024 }; |
| 37 | enum { above_stack = 2048 }; |
| 38 | enum { stack_align = 256 }; |
| 39 | |
| 40 | static thread_local cothread_t co_active_handle = 0; |
| 41 | |
| 42 | /**** Determine environment ****/ |
| 43 | |
| 44 | #define LIBCO_PPC64 (_ARCH_PPC64 || __PPC64__ || __ppc64__ || __powerpc64__) |
| 45 | |
| 46 | /* Whether function calls are indirect through a descriptor, |
| 47 | or are directly to function */ |
| 48 | #ifndef LIBCO_PPCDESC |
| 49 | #if !_CALL_SYSV && (_CALL_AIX || _CALL_AIXDESC || LIBCO_PPC64) |
| 50 | #define LIBCO_PPCDESC 1 |
| 51 | #endif |
| 52 | #endif |
| 53 | |
| 54 | #ifdef LIBCO_PPC_ASM |
| 55 | |
| 56 | #ifdef __cplusplus |
| 57 | extern "C" |
| 58 | #endif |
| 59 | |
| 60 | /* Swap code is in ppc.S */ |
| 61 | void co_swap_asm(cothread_t, cothread_t); |
| 62 | #define CO_SWAP_ASM(x, y) co_swap_asm(x, y) |
| 63 | |
| 64 | #else |
| 65 | |
| 66 | /* Swap code is here in array. Please leave dieassembly comments, |
| 67 | as they make it easy to see what it does, and reorder instructions |
| 68 | if one wants to see whether that improves performance. */ |
| 69 | static const uint32_t libco_ppc_code [] = { |
| 70 | #if LIBCO_PPC64 |
| 71 | 0x7d000026, /* mfcr r8 */ |
| 72 | 0xf8240028, /* std r1,40(r4) */ |
| 73 | 0x7d2802a6, /* mflr r9 */ |
| 74 | 0xf9c40048, /* std r14,72(r4) */ |
| 75 | 0xf9e40050, /* std r15,80(r4) */ |
| 76 | 0xfa040058, /* std r16,88(r4) */ |
| 77 | 0xfa240060, /* std r17,96(r4) */ |
| 78 | 0xfa440068, /* std r18,104(r4) */ |
| 79 | 0xfa640070, /* std r19,112(r4) */ |
| 80 | 0xfa840078, /* std r20,120(r4) */ |
| 81 | 0xfaa40080, /* std r21,128(r4) */ |
| 82 | 0xfac40088, /* std r22,136(r4) */ |
| 83 | 0xfae40090, /* std r23,144(r4) */ |
| 84 | 0xfb040098, /* std r24,152(r4) */ |
| 85 | 0xfb2400a0, /* std r25,160(r4) */ |
| 86 | 0xfb4400a8, /* std r26,168(r4) */ |
| 87 | 0xfb6400b0, /* std r27,176(r4) */ |
| 88 | 0xfb8400b8, /* std r28,184(r4) */ |
| 89 | 0xfba400c0, /* std r29,192(r4) */ |
| 90 | 0xfbc400c8, /* std r30,200(r4) */ |
| 91 | 0xfbe400d0, /* std r31,208(r4) */ |
| 92 | 0xf9240020, /* std r9,32(r4) */ |
| 93 | 0xe8e30020, /* ld r7,32(r3) */ |
| 94 | 0xe8230028, /* ld r1,40(r3) */ |
| 95 | 0x48000009, /* bl 1 */ |
| 96 | 0x7fe00008, /* trap */ |
| 97 | 0x91040030,/*1:stw r8,48(r4) */ |
| 98 | 0x80c30030, /* lwz r6,48(r3) */ |
| 99 | 0x7ce903a6, /* mtctr r7 */ |
| 100 | 0xe9c30048, /* ld r14,72(r3) */ |
| 101 | 0xe9e30050, /* ld r15,80(r3) */ |
| 102 | 0xea030058, /* ld r16,88(r3) */ |
| 103 | 0xea230060, /* ld r17,96(r3) */ |
| 104 | 0xea430068, /* ld r18,104(r3) */ |
| 105 | 0xea630070, /* ld r19,112(r3) */ |
| 106 | 0xea830078, /* ld r20,120(r3) */ |
| 107 | 0xeaa30080, /* ld r21,128(r3) */ |
| 108 | 0xeac30088, /* ld r22,136(r3) */ |
| 109 | 0xeae30090, /* ld r23,144(r3) */ |
| 110 | 0xeb030098, /* ld r24,152(r3) */ |
| 111 | 0xeb2300a0, /* ld r25,160(r3) */ |
| 112 | 0xeb4300a8, /* ld r26,168(r3) */ |
| 113 | 0xeb6300b0, /* ld r27,176(r3) */ |
| 114 | 0xeb8300b8, /* ld r28,184(r3) */ |
| 115 | 0xeba300c0, /* ld r29,192(r3) */ |
| 116 | 0xebc300c8, /* ld r30,200(r3) */ |
| 117 | 0xebe300d0, /* ld r31,208(r3) */ |
| 118 | 0x7ccff120, /* mtcr r6 */ |
| 119 | #else |
| 120 | 0x7d000026, /* mfcr r8 */ |
| 121 | 0x90240028, /* stw r1,40(r4) */ |
| 122 | 0x7d2802a6, /* mflr r9 */ |
| 123 | 0x91a4003c, /* stw r13,60(r4) */ |
| 124 | 0x91c40040, /* stw r14,64(r4) */ |
| 125 | 0x91e40044, /* stw r15,68(r4) */ |
| 126 | 0x92040048, /* stw r16,72(r4) */ |
| 127 | 0x9224004c, /* stw r17,76(r4) */ |
| 128 | 0x92440050, /* stw r18,80(r4) */ |
| 129 | 0x92640054, /* stw r19,84(r4) */ |
| 130 | 0x92840058, /* stw r20,88(r4) */ |
| 131 | 0x92a4005c, /* stw r21,92(r4) */ |
| 132 | 0x92c40060, /* stw r22,96(r4) */ |
| 133 | 0x92e40064, /* stw r23,100(r4) */ |
| 134 | 0x93040068, /* stw r24,104(r4) */ |
| 135 | 0x9324006c, /* stw r25,108(r4) */ |
| 136 | 0x93440070, /* stw r26,112(r4) */ |
| 137 | 0x93640074, /* stw r27,116(r4) */ |
| 138 | 0x93840078, /* stw r28,120(r4) */ |
| 139 | 0x93a4007c, /* stw r29,124(r4) */ |
| 140 | 0x93c40080, /* stw r30,128(r4) */ |
| 141 | 0x93e40084, /* stw r31,132(r4) */ |
| 142 | 0x91240020, /* stw r9,32(r4) */ |
| 143 | 0x80e30020, /* lwz r7,32(r3) */ |
| 144 | 0x80230028, /* lwz r1,40(r3) */ |
| 145 | 0x48000009, /* bl 1 */ |
| 146 | 0x7fe00008, /* trap */ |
| 147 | 0x91040030,/*1:stw r8,48(r4) */ |
| 148 | 0x80c30030, /* lwz r6,48(r3) */ |
| 149 | 0x7ce903a6, /* mtctr r7 */ |
| 150 | 0x81a3003c, /* lwz r13,60(r3) */ |
| 151 | 0x81c30040, /* lwz r14,64(r3) */ |
| 152 | 0x81e30044, /* lwz r15,68(r3) */ |
| 153 | 0x82030048, /* lwz r16,72(r3) */ |
| 154 | 0x8223004c, /* lwz r17,76(r3) */ |
| 155 | 0x82430050, /* lwz r18,80(r3) */ |
| 156 | 0x82630054, /* lwz r19,84(r3) */ |
| 157 | 0x82830058, /* lwz r20,88(r3) */ |
| 158 | 0x82a3005c, /* lwz r21,92(r3) */ |
| 159 | 0x82c30060, /* lwz r22,96(r3) */ |
| 160 | 0x82e30064, /* lwz r23,100(r3) */ |
| 161 | 0x83030068, /* lwz r24,104(r3) */ |
| 162 | 0x8323006c, /* lwz r25,108(r3) */ |
| 163 | 0x83430070, /* lwz r26,112(r3) */ |
| 164 | 0x83630074, /* lwz r27,116(r3) */ |
| 165 | 0x83830078, /* lwz r28,120(r3) */ |
| 166 | 0x83a3007c, /* lwz r29,124(r3) */ |
| 167 | 0x83c30080, /* lwz r30,128(r3) */ |
| 168 | 0x83e30084, /* lwz r31,132(r3) */ |
| 169 | 0x7ccff120, /* mtcr r6 */ |
| 170 | #endif |
| 171 | |
| 172 | #ifndef LIBCO_PPC_NOFP |
| 173 | 0xd9c400e0, /* stfd f14,224(r4) */ |
| 174 | 0xd9e400e8, /* stfd f15,232(r4) */ |
| 175 | 0xda0400f0, /* stfd f16,240(r4) */ |
| 176 | 0xda2400f8, /* stfd f17,248(r4) */ |
| 177 | 0xda440100, /* stfd f18,256(r4) */ |
| 178 | 0xda640108, /* stfd f19,264(r4) */ |
| 179 | 0xda840110, /* stfd f20,272(r4) */ |
| 180 | 0xdaa40118, /* stfd f21,280(r4) */ |
| 181 | 0xdac40120, /* stfd f22,288(r4) */ |
| 182 | 0xdae40128, /* stfd f23,296(r4) */ |
| 183 | 0xdb040130, /* stfd f24,304(r4) */ |
| 184 | 0xdb240138, /* stfd f25,312(r4) */ |
| 185 | 0xdb440140, /* stfd f26,320(r4) */ |
| 186 | 0xdb640148, /* stfd f27,328(r4) */ |
| 187 | 0xdb840150, /* stfd f28,336(r4) */ |
| 188 | 0xdba40158, /* stfd f29,344(r4) */ |
| 189 | 0xdbc40160, /* stfd f30,352(r4) */ |
| 190 | 0xdbe40168, /* stfd f31,360(r4) */ |
| 191 | 0xc9c300e0, /* lfd f14,224(r3) */ |
| 192 | 0xc9e300e8, /* lfd f15,232(r3) */ |
| 193 | 0xca0300f0, /* lfd f16,240(r3) */ |
| 194 | 0xca2300f8, /* lfd f17,248(r3) */ |
| 195 | 0xca430100, /* lfd f18,256(r3) */ |
| 196 | 0xca630108, /* lfd f19,264(r3) */ |
| 197 | 0xca830110, /* lfd f20,272(r3) */ |
| 198 | 0xcaa30118, /* lfd f21,280(r3) */ |
| 199 | 0xcac30120, /* lfd f22,288(r3) */ |
| 200 | 0xcae30128, /* lfd f23,296(r3) */ |
| 201 | 0xcb030130, /* lfd f24,304(r3) */ |
| 202 | 0xcb230138, /* lfd f25,312(r3) */ |
| 203 | 0xcb430140, /* lfd f26,320(r3) */ |
| 204 | 0xcb630148, /* lfd f27,328(r3) */ |
| 205 | 0xcb830150, /* lfd f28,336(r3) */ |
| 206 | 0xcba30158, /* lfd f29,344(r3) */ |
| 207 | 0xcbc30160, /* lfd f30,352(r3) */ |
| 208 | 0xcbe30168, /* lfd f31,360(r3) */ |
| 209 | #endif |
| 210 | |
| 211 | #ifdef __ALTIVEC__ |
| 212 | 0x7ca042a6, /* mfvrsave r5 */ |
| 213 | 0x39040180, /* addi r8,r4,384 */ |
| 214 | 0x39240190, /* addi r9,r4,400 */ |
| 215 | 0x70a00fff, /* andi. r0,r5,4095 */ |
| 216 | 0x90a40034, /* stw r5,52(r4) */ |
| 217 | 0x4182005c, /* beq- 2 */ |
| 218 | 0x7e8041ce, /* stvx v20,r0,r8 */ |
| 219 | 0x39080020, /* addi r8,r8,32 */ |
| 220 | 0x7ea049ce, /* stvx v21,r0,r9 */ |
| 221 | 0x39290020, /* addi r9,r9,32 */ |
| 222 | 0x7ec041ce, /* stvx v22,r0,r8 */ |
| 223 | 0x39080020, /* addi r8,r8,32 */ |
| 224 | 0x7ee049ce, /* stvx v23,r0,r9 */ |
| 225 | 0x39290020, /* addi r9,r9,32 */ |
| 226 | 0x7f0041ce, /* stvx v24,r0,r8 */ |
| 227 | 0x39080020, /* addi r8,r8,32 */ |
| 228 | 0x7f2049ce, /* stvx v25,r0,r9 */ |
| 229 | 0x39290020, /* addi r9,r9,32 */ |
| 230 | 0x7f4041ce, /* stvx v26,r0,r8 */ |
| 231 | 0x39080020, /* addi r8,r8,32 */ |
| 232 | 0x7f6049ce, /* stvx v27,r0,r9 */ |
| 233 | 0x39290020, /* addi r9,r9,32 */ |
| 234 | 0x7f8041ce, /* stvx v28,r0,r8 */ |
| 235 | 0x39080020, /* addi r8,r8,32 */ |
| 236 | 0x7fa049ce, /* stvx v29,r0,r9 */ |
| 237 | 0x39290020, /* addi r9,r9,32 */ |
| 238 | 0x7fc041ce, /* stvx v30,r0,r8 */ |
| 239 | 0x7fe049ce, /* stvx v31,r0,r9 */ |
| 240 | 0x80a30034,/*2:lwz r5,52(r3) */ |
| 241 | 0x39030180, /* addi r8,r3,384 */ |
| 242 | 0x39230190, /* addi r9,r3,400 */ |
| 243 | 0x70a00fff, /* andi. r0,r5,4095 */ |
| 244 | 0x7ca043a6, /* mtvrsave r5 */ |
| 245 | 0x4d820420, /* beqctr */ |
| 246 | 0x7e8040ce, /* lvx v20,r0,r8 */ |
| 247 | 0x39080020, /* addi r8,r8,32 */ |
| 248 | 0x7ea048ce, /* lvx v21,r0,r9 */ |
| 249 | 0x39290020, /* addi r9,r9,32 */ |
| 250 | 0x7ec040ce, /* lvx v22,r0,r8 */ |
| 251 | 0x39080020, /* addi r8,r8,32 */ |
| 252 | 0x7ee048ce, /* lvx v23,r0,r9 */ |
| 253 | 0x39290020, /* addi r9,r9,32 */ |
| 254 | 0x7f0040ce, /* lvx v24,r0,r8 */ |
| 255 | 0x39080020, /* addi r8,r8,32 */ |
| 256 | 0x7f2048ce, /* lvx v25,r0,r9 */ |
| 257 | 0x39290020, /* addi r9,r9,32 */ |
| 258 | 0x7f4040ce, /* lvx v26,r0,r8 */ |
| 259 | 0x39080020, /* addi r8,r8,32 */ |
| 260 | 0x7f6048ce, /* lvx v27,r0,r9 */ |
| 261 | 0x39290020, /* addi r9,r9,32 */ |
| 262 | 0x7f8040ce, /* lvx v28,r0,r8 */ |
| 263 | 0x39080020, /* addi r8,r8,32 */ |
| 264 | 0x7fa048ce, /* lvx v29,r0,r9 */ |
| 265 | 0x39290020, /* addi r9,r9,32 */ |
| 266 | 0x7fc040ce, /* lvx v30,r0,r8 */ |
| 267 | 0x7fe048ce, /* lvx v31,r0,r9 */ |
| 268 | #endif |
| 269 | |
| 270 | 0x4e800420, /* bctr */ |
| 271 | }; |
| 272 | |
| 273 | #if LIBCO_PPCDESC |
| 274 | /* Function call goes through indirect descriptor */ |
| 275 | #define CO_SWAP_ASM(x, y) \ |
| 276 | ((void (*)(cothread_t, cothread_t)) (uintptr_t) x)(x, y) |
| 277 | #else |
| 278 | /* Function call goes directly to code */ |
| 279 | #define CO_SWAP_ASM(x, y) \ |
| 280 | ((void (*)(cothread_t, cothread_t)) (uintptr_t) libco_ppc_code)(x, y) |
| 281 | #endif |
| 282 | |
| 283 | #endif |
| 284 | |
| 285 | static uint32_t* co_create_( unsigned size, uintptr_t entry) |
| 286 | { |
| 287 | uint32_t *t = (uint32_t*)malloc(size); |
| 288 | |
| 289 | #if LIBCO_PPCDESC |
| 290 | if (t) |
| 291 | { |
| 292 | /* Copy entry's descriptor */ |
| 293 | memcpy(t, (void*)entry, sizeof(void*) * 3); |
| 294 | |
| 295 | /* Set function pointer to swap routine */ |
| 296 | #ifdef LIBCO_PPC_ASM |
| 297 | *(const void**) t = *(void**) &co_swap_asm; |
| 298 | #else |
| 299 | *(const void**) t = libco_ppc_code; |
| 300 | #endif |
| 301 | } |
| 302 | #endif |
| 303 | |
| 304 | return t; |
| 305 | } |
| 306 | |
| 307 | cothread_t co_create(unsigned int size, void (*entry_)(void)) |
| 308 | { |
| 309 | uintptr_t entry = (uintptr_t) entry_; |
| 310 | uint32_t *t = NULL; |
| 311 | |
| 312 | /* Be sure main thread was successfully allocated */ |
| 313 | if (co_active()) |
| 314 | { |
| 315 | size += state_size + above_stack + stack_align; |
| 316 | t = co_create_(size, entry); |
| 317 | } |
| 318 | |
| 319 | if (t) |
| 320 | { |
| 321 | uintptr_t sp; |
| 322 | #if LIBCO_PPC64 |
| 323 | int shift = 16; |
| 324 | #else |
| 325 | int shift = 0; |
| 326 | #endif |
| 327 | /* Save current registers into new thread, so that any special ones will |
| 328 | have proper values when thread is begun */ |
| 329 | CO_SWAP_ASM(t, t); |
| 330 | |
| 331 | #if LIBCO_PPCDESC |
| 332 | /* Get real address */ |
| 333 | entry = (uintptr_t) *(void**)entry; |
| 334 | #endif |
| 335 | |
| 336 | /* Put stack near end of block, and align */ |
| 337 | sp = (uintptr_t) t + size - above_stack; |
| 338 | sp -= sp % stack_align; |
| 339 | |
| 340 | /* On PPC32, we save and restore GPRs as 32 bits. For PPC64, we |
| 341 | save and restore them as 64 bits, regardless of the size the ABI |
| 342 | uses. So, we manually write pointers at the proper size. We always |
| 343 | save and restore at the same address, and since PPC is big-endian, |
| 344 | we must put the low byte first on PPC32. */ |
| 345 | |
| 346 | /* If uintptr_t is 32 bits, >>32 is undefined behavior, so we do two shifts |
| 347 | and don't have to care how many bits uintptr_t is. */ |
| 348 | |
| 349 | /* Set up so entry will be called on next swap */ |
| 350 | t [8] = (uint32_t) (entry >> shift >> shift); |
| 351 | t [9] = (uint32_t) entry; |
| 352 | |
| 353 | t [10] = (uint32_t) (sp >> shift >> shift); |
| 354 | t [11] = (uint32_t) sp; |
| 355 | } |
| 356 | |
| 357 | return t; |
| 358 | } |
| 359 | |
| 360 | void co_delete(cothread_t t) |
| 361 | { |
| 362 | free(t); |
| 363 | } |
| 364 | |
| 365 | static void co_init_(void) |
| 366 | { |
| 367 | #if LIBCO_MPROTECT |
| 368 | /* TODO: pre- and post-pad PPC code so that this doesn't make other |
| 369 | data executable and writable */ |
| 370 | long page_size = sysconf(_SC_PAGESIZE); |
| 371 | if (page_size > 0) |
| 372 | { |
| 373 | uintptr_t align = page_size; |
| 374 | uintptr_t begin = (uintptr_t) libco_ppc_code; |
| 375 | uintptr_t end = begin + sizeof libco_ppc_code; |
| 376 | |
| 377 | /* Align beginning and end */ |
| 378 | end += align - 1; |
| 379 | end -= end % align; |
| 380 | begin -= begin % align; |
| 381 | |
| 382 | mprotect((void*)begin, end - begin, PROT_READ | PROT_WRITE | PROT_EXEC); |
| 383 | } |
| 384 | #endif |
| 385 | |
| 386 | co_active_handle = co_create_(state_size, (uintptr_t) &co_switch); |
| 387 | } |
| 388 | |
| 389 | cothread_t co_active(void) |
| 390 | { |
| 391 | if (!co_active_handle) |
| 392 | co_init_(); |
| 393 | |
| 394 | return co_active_handle; |
| 395 | } |
| 396 | |
| 397 | void co_switch(cothread_t t) |
| 398 | { |
| 399 | cothread_t old = co_active_handle; |
| 400 | co_active_handle = t; |
| 401 | CO_SWAP_ASM(t, old); |
| 402 | } |