gpu: start doing some basic gpu timing
[pcsx_rearmed.git] / deps / lightrec / tlsf / tlsf.c
CommitLineData
02487de7
PC
1#include <assert.h>\r
2#include <limits.h>\r
3#include <stddef.h>\r
4#include <stdio.h>\r
5#include <stdlib.h>\r
6#include <string.h>\r
7\r
8#include "tlsf.h"\r
9\r
10#if defined(__cplusplus)\r
11#define tlsf_decl inline\r
12#else\r
13#define tlsf_decl static\r
14#endif\r
15\r
16/*\r
17** Architecture-specific bit manipulation routines.\r
18**\r
19** TLSF achieves O(1) cost for malloc and free operations by limiting\r
20** the search for a free block to a free list of guaranteed size\r
21** adequate to fulfill the request, combined with efficient free list\r
22** queries using bitmasks and architecture-specific bit-manipulation\r
23** routines.\r
24**\r
25** Most modern processors provide instructions to count leading zeroes\r
26** in a word, find the lowest and highest set bit, etc. These\r
27** specific implementations will be used when available, falling back\r
28** to a reasonably efficient generic implementation.\r
29**\r
30** NOTE: TLSF spec relies on ffs/fls returning value 0..31.\r
31** ffs/fls return 1-32 by default, returning 0 for error.\r
32*/\r
33\r
34/*\r
35** Detect whether or not we are building for a 32- or 64-bit (LP/LLP)\r
36** architecture. There is no reliable portable method at compile-time.\r
37*/\r
38#if defined (__alpha__) || defined (__ia64__) || defined (__x86_64__) \\r
39 || defined (_WIN64) || defined (__LP64__) || defined (__LLP64__)\r
40#define TLSF_64BIT\r
41#endif\r
42\r
43/*\r
44** gcc 3.4 and above have builtin support, specialized for architecture.\r
45** Some compilers masquerade as gcc; patchlevel test filters them out.\r
46*/\r
47#if defined (__GNUC__) && (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4)) \\r
48 && defined (__GNUC_PATCHLEVEL__)\r
49\r
50#if defined (__SNC__)\r
51/* SNC for Playstation 3. */\r
52\r
53tlsf_decl int tlsf_ffs(unsigned int word)\r
54{\r
55 const unsigned int reverse = word & (~word + 1);\r
56 const int bit = 32 - __builtin_clz(reverse);\r
57 return bit - 1;\r
58}\r
59\r
60#else\r
61\r
62tlsf_decl int tlsf_ffs(unsigned int word)\r
63{\r
64 return __builtin_ffs(word) - 1;\r
65}\r
66\r
67#endif\r
68\r
69tlsf_decl int tlsf_fls(unsigned int word)\r
70{\r
71 const int bit = word ? 32 - __builtin_clz(word) : 0;\r
72 return bit - 1;\r
73}\r
74\r
75#elif defined (_MSC_VER) && (_MSC_VER >= 1400) && (defined (_M_IX86) || defined (_M_X64))\r
76/* Microsoft Visual C++ support on x86/X64 architectures. */\r
77\r
78#include <intrin.h>\r
79\r
80#pragma intrinsic(_BitScanReverse)\r
81#pragma intrinsic(_BitScanForward)\r
82\r
83tlsf_decl int tlsf_fls(unsigned int word)\r
84{\r
85 unsigned long index;\r
86 return _BitScanReverse(&index, word) ? index : -1;\r
87}\r
88\r
89tlsf_decl int tlsf_ffs(unsigned int word)\r
90{\r
91 unsigned long index;\r
92 return _BitScanForward(&index, word) ? index : -1;\r
93}\r
94\r
95#elif defined (_MSC_VER) && defined (_M_PPC)\r
96/* Microsoft Visual C++ support on PowerPC architectures. */\r
97\r
98#include <ppcintrinsics.h>\r
99\r
100tlsf_decl int tlsf_fls(unsigned int word)\r
101{\r
102 const int bit = 32 - _CountLeadingZeros(word);\r
103 return bit - 1;\r
104}\r
105\r
106tlsf_decl int tlsf_ffs(unsigned int word)\r
107{\r
108 const unsigned int reverse = word & (~word + 1);\r
109 const int bit = 32 - _CountLeadingZeros(reverse);\r
110 return bit - 1;\r
111}\r
112\r
113#elif defined (__ARMCC_VERSION)\r
114/* RealView Compilation Tools for ARM */\r
115\r
116tlsf_decl int tlsf_ffs(unsigned int word)\r
117{\r
118 const unsigned int reverse = word & (~word + 1);\r
119 const int bit = 32 - __clz(reverse);\r
120 return bit - 1;\r
121}\r
122\r
123tlsf_decl int tlsf_fls(unsigned int word)\r
124{\r
125 const int bit = word ? 32 - __clz(word) : 0;\r
126 return bit - 1;\r
127}\r
128\r
129#elif defined (__ghs__)\r
130/* Green Hills support for PowerPC */\r
131\r
132#include <ppc_ghs.h>\r
133\r
134tlsf_decl int tlsf_ffs(unsigned int word)\r
135{\r
136 const unsigned int reverse = word & (~word + 1);\r
137 const int bit = 32 - __CLZ32(reverse);\r
138 return bit - 1;\r
139}\r
140\r
141tlsf_decl int tlsf_fls(unsigned int word)\r
142{\r
143 const int bit = word ? 32 - __CLZ32(word) : 0;\r
144 return bit - 1;\r
145}\r
146\r
147#else\r
148/* Fall back to generic implementation. */\r
149\r
150tlsf_decl int tlsf_fls_generic(unsigned int word)\r
151{\r
152 int bit = 32;\r
153\r
154 if (!word) bit -= 1;\r
155 if (!(word & 0xffff0000)) { word <<= 16; bit -= 16; }\r
156 if (!(word & 0xff000000)) { word <<= 8; bit -= 8; }\r
157 if (!(word & 0xf0000000)) { word <<= 4; bit -= 4; }\r
158 if (!(word & 0xc0000000)) { word <<= 2; bit -= 2; }\r
159 if (!(word & 0x80000000)) { word <<= 1; bit -= 1; }\r
160\r
161 return bit;\r
162}\r
163\r
164/* Implement ffs in terms of fls. */\r
165tlsf_decl int tlsf_ffs(unsigned int word)\r
166{\r
167 return tlsf_fls_generic(word & (~word + 1)) - 1;\r
168}\r
169\r
170tlsf_decl int tlsf_fls(unsigned int word)\r
171{\r
172 return tlsf_fls_generic(word) - 1;\r
173}\r
174\r
175#endif\r
176\r
177/* Possibly 64-bit version of tlsf_fls. */\r
178#if defined (TLSF_64BIT)\r
179tlsf_decl int tlsf_fls_sizet(size_t size)\r
180{\r
181 int high = (int)(size >> 32);\r
182 int bits = 0;\r
183 if (high)\r
184 {\r
185 bits = 32 + tlsf_fls(high);\r
186 }\r
187 else\r
188 {\r
189 bits = tlsf_fls((int)size & 0xffffffff);\r
190\r
191 }\r
192 return bits;\r
193}\r
194#else\r
195#define tlsf_fls_sizet tlsf_fls\r
196#endif\r
197\r
198#undef tlsf_decl\r
199\r
200/*\r
201** Constants.\r
202*/\r
203\r
204/* Public constants: may be modified. */\r
205enum tlsf_public\r
206{\r
207 /* log2 of number of linear subdivisions of block sizes. Larger\r
208 ** values require more memory in the control structure. Values of\r
209 ** 4 or 5 are typical.\r
210 */\r
211 SL_INDEX_COUNT_LOG2 = 5,\r
212};\r
213\r
214/* Private constants: do not modify. */\r
215enum tlsf_private\r
216{\r
217#if defined (TLSF_64BIT)\r
218 /* All allocation sizes and addresses are aligned to 8 bytes. */\r
219 ALIGN_SIZE_LOG2 = 3,\r
220#else\r
221 /* All allocation sizes and addresses are aligned to 4 bytes. */\r
222 ALIGN_SIZE_LOG2 = 2,\r
223#endif\r
224 ALIGN_SIZE = (1 << ALIGN_SIZE_LOG2),\r
225\r
226 /*\r
227 ** We support allocations of sizes up to (1 << FL_INDEX_MAX) bits.\r
228 ** However, because we linearly subdivide the second-level lists, and\r
229 ** our minimum size granularity is 4 bytes, it doesn't make sense to\r
230 ** create first-level lists for sizes smaller than SL_INDEX_COUNT * 4,\r
231 ** or (1 << (SL_INDEX_COUNT_LOG2 + 2)) bytes, as there we will be\r
232 ** trying to split size ranges into more slots than we have available.\r
233 ** Instead, we calculate the minimum threshold size, and place all\r
234 ** blocks below that size into the 0th first-level list.\r
235 */\r
236\r
237#if defined (TLSF_64BIT)\r
238 /*\r
239 ** TODO: We can increase this to support larger sizes, at the expense\r
240 ** of more overhead in the TLSF structure.\r
241 */\r
242 FL_INDEX_MAX = 32,\r
243#else\r
244 FL_INDEX_MAX = 30,\r
245#endif\r
246 SL_INDEX_COUNT = (1 << SL_INDEX_COUNT_LOG2),\r
247 FL_INDEX_SHIFT = (SL_INDEX_COUNT_LOG2 + ALIGN_SIZE_LOG2),\r
248 FL_INDEX_COUNT = (FL_INDEX_MAX - FL_INDEX_SHIFT + 1),\r
249\r
250 SMALL_BLOCK_SIZE = (1 << FL_INDEX_SHIFT),\r
251};\r
252\r
253/*\r
254** Cast and min/max macros.\r
255*/\r
256\r
257#define tlsf_cast(t, exp) ((t) (exp))\r
258#define tlsf_min(a, b) ((a) < (b) ? (a) : (b))\r
259#define tlsf_max(a, b) ((a) > (b) ? (a) : (b))\r
260\r
261/*\r
262** Set assert macro, if it has not been provided by the user.\r
263*/\r
264#if !defined (tlsf_assert)\r
265#define tlsf_assert assert\r
266#endif\r
267\r
268/*\r
269** Static assertion mechanism.\r
270*/\r
271\r
272#define _tlsf_glue2(x, y) x ## y\r
273#define _tlsf_glue(x, y) _tlsf_glue2(x, y)\r
274#define tlsf_static_assert(exp) \\r
275 typedef char _tlsf_glue(static_assert, __LINE__) [(exp) ? 1 : -1]\r
276\r
277/* This code has been tested on 32- and 64-bit (LP/LLP) architectures. */\r
278tlsf_static_assert(sizeof(int) * CHAR_BIT == 32);\r
279tlsf_static_assert(sizeof(size_t) * CHAR_BIT >= 32);\r
280tlsf_static_assert(sizeof(size_t) * CHAR_BIT <= 64);\r
281\r
282/* SL_INDEX_COUNT must be <= number of bits in sl_bitmap's storage type. */\r
283tlsf_static_assert(sizeof(unsigned int) * CHAR_BIT >= SL_INDEX_COUNT);\r
284\r
285/* Ensure we've properly tuned our sizes. */\r
286tlsf_static_assert(ALIGN_SIZE == SMALL_BLOCK_SIZE / SL_INDEX_COUNT);\r
287\r
288/*\r
289** Data structures and associated constants.\r
290*/\r
291\r
292/*\r
293** Block header structure.\r
294**\r
295** There are several implementation subtleties involved:\r
296** - The prev_phys_block field is only valid if the previous block is free.\r
297** - The prev_phys_block field is actually stored at the end of the\r
298** previous block. It appears at the beginning of this structure only to\r
299** simplify the implementation.\r
300** - The next_free / prev_free fields are only valid if the block is free.\r
301*/\r
302typedef struct block_header_t\r
303{\r
304 /* Points to the previous physical block. */\r
305 struct block_header_t* prev_phys_block;\r
306\r
307 /* The size of this block, excluding the block header. */\r
308 size_t size;\r
309\r
310 /* Next and previous free blocks. */\r
311 struct block_header_t* next_free;\r
312 struct block_header_t* prev_free;\r
313} block_header_t;\r
314\r
315/*\r
316** Since block sizes are always at least a multiple of 4, the two least\r
317** significant bits of the size field are used to store the block status:\r
318** - bit 0: whether block is busy or free\r
319** - bit 1: whether previous block is busy or free\r
320*/\r
321static const size_t block_header_free_bit = 1 << 0;\r
322static const size_t block_header_prev_free_bit = 1 << 1;\r
323\r
324/*\r
325** The size of the block header exposed to used blocks is the size field.\r
326** The prev_phys_block field is stored *inside* the previous free block.\r
327*/\r
328static const size_t block_header_overhead = sizeof(size_t);\r
329\r
330/* User data starts directly after the size field in a used block. */\r
331static const size_t block_start_offset =\r
332 offsetof(block_header_t, size) + sizeof(size_t);\r
333\r
334/*\r
335** A free block must be large enough to store its header minus the size of\r
336** the prev_phys_block field, and no larger than the number of addressable\r
337** bits for FL_INDEX.\r
338*/\r
339static const size_t block_size_min = \r
340 sizeof(block_header_t) - sizeof(block_header_t*);\r
341static const size_t block_size_max = tlsf_cast(size_t, 1) << FL_INDEX_MAX;\r
342\r
343\r
344/* The TLSF control structure. */\r
345typedef struct control_t\r
346{\r
347 /* Empty lists point at this block to indicate they are free. */\r
348 block_header_t block_null;\r
349\r
350 /* Bitmaps for free lists. */\r
351 unsigned int fl_bitmap;\r
352 unsigned int sl_bitmap[FL_INDEX_COUNT];\r
353\r
354 /* Head of free lists. */\r
355 block_header_t* blocks[FL_INDEX_COUNT][SL_INDEX_COUNT];\r
356} control_t;\r
357\r
358/* A type used for casting when doing pointer arithmetic. */\r
359typedef ptrdiff_t tlsfptr_t;\r
360\r
361/*\r
362** block_header_t member functions.\r
363*/\r
364\r
365static size_t block_size(const block_header_t* block)\r
366{\r
367 return block->size & ~(block_header_free_bit | block_header_prev_free_bit);\r
368}\r
369\r
370static void block_set_size(block_header_t* block, size_t size)\r
371{\r
372 const size_t oldsize = block->size;\r
373 block->size = size | (oldsize & (block_header_free_bit | block_header_prev_free_bit));\r
374}\r
375\r
376static int block_is_last(const block_header_t* block)\r
377{\r
378 return block_size(block) == 0;\r
379}\r
380\r
381static int block_is_free(const block_header_t* block)\r
382{\r
383 return tlsf_cast(int, block->size & block_header_free_bit);\r
384}\r
385\r
386static void block_set_free(block_header_t* block)\r
387{\r
388 block->size |= block_header_free_bit;\r
389}\r
390\r
391static void block_set_used(block_header_t* block)\r
392{\r
393 block->size &= ~block_header_free_bit;\r
394}\r
395\r
396static int block_is_prev_free(const block_header_t* block)\r
397{\r
398 return tlsf_cast(int, block->size & block_header_prev_free_bit);\r
399}\r
400\r
401static void block_set_prev_free(block_header_t* block)\r
402{\r
403 block->size |= block_header_prev_free_bit;\r
404}\r
405\r
406static void block_set_prev_used(block_header_t* block)\r
407{\r
408 block->size &= ~block_header_prev_free_bit;\r
409}\r
410\r
411static block_header_t* block_from_ptr(const void* ptr)\r
412{\r
413 return tlsf_cast(block_header_t*,\r
414 tlsf_cast(unsigned char*, ptr) - block_start_offset);\r
415}\r
416\r
417static void* block_to_ptr(const block_header_t* block)\r
418{\r
419 return tlsf_cast(void*,\r
420 tlsf_cast(unsigned char*, block) + block_start_offset);\r
421}\r
422\r
423/* Return location of next block after block of given size. */\r
424static block_header_t* offset_to_block(const void* ptr, size_t size)\r
425{\r
426 return tlsf_cast(block_header_t*, tlsf_cast(tlsfptr_t, ptr) + size);\r
427}\r
428\r
429/* Return location of previous block. */\r
430static block_header_t* block_prev(const block_header_t* block)\r
431{\r
432 tlsf_assert(block_is_prev_free(block) && "previous block must be free");\r
433 return block->prev_phys_block;\r
434}\r
435\r
436/* Return location of next existing block. */\r
437static block_header_t* block_next(const block_header_t* block)\r
438{\r
439 block_header_t* next = offset_to_block(block_to_ptr(block),\r
440 block_size(block) - block_header_overhead);\r
441 tlsf_assert(!block_is_last(block));\r
442 return next;\r
443}\r
444\r
445/* Link a new block with its physical neighbor, return the neighbor. */\r
446static block_header_t* block_link_next(block_header_t* block)\r
447{\r
448 block_header_t* next = block_next(block);\r
449 next->prev_phys_block = block;\r
450 return next;\r
451}\r
452\r
453static void block_mark_as_free(block_header_t* block)\r
454{\r
455 /* Link the block to the next block, first. */\r
456 block_header_t* next = block_link_next(block);\r
457 block_set_prev_free(next);\r
458 block_set_free(block);\r
459}\r
460\r
461static void block_mark_as_used(block_header_t* block)\r
462{\r
463 block_header_t* next = block_next(block);\r
464 block_set_prev_used(next);\r
465 block_set_used(block);\r
466}\r
467\r
468static size_t align_up(size_t x, size_t align)\r
469{\r
470 tlsf_assert(0 == (align & (align - 1)) && "must align to a power of two");\r
471 return (x + (align - 1)) & ~(align - 1);\r
472}\r
473\r
474static size_t align_down(size_t x, size_t align)\r
475{\r
476 tlsf_assert(0 == (align & (align - 1)) && "must align to a power of two");\r
477 return x - (x & (align - 1));\r
478}\r
479\r
480static void* align_ptr(const void* ptr, size_t align)\r
481{\r
482 const tlsfptr_t aligned =\r
483 (tlsf_cast(tlsfptr_t, ptr) + (align - 1)) & ~(align - 1);\r
484 tlsf_assert(0 == (align & (align - 1)) && "must align to a power of two");\r
485 return tlsf_cast(void*, aligned);\r
486}\r
487\r
488/*\r
489** Adjust an allocation size to be aligned to word size, and no smaller\r
490** than internal minimum.\r
491*/\r
492static size_t adjust_request_size(size_t size, size_t align)\r
493{\r
494 size_t adjust = 0;\r
495 if (size)\r
496 {\r
497 const size_t aligned = align_up(size, align);\r
498\r
499 /* aligned sized must not exceed block_size_max or we'll go out of bounds on sl_bitmap */\r
500 if (aligned < block_size_max) \r
501 {\r
502 adjust = tlsf_max(aligned, block_size_min);\r
503 }\r
504 }\r
505 return adjust;\r
506}\r
507\r
508/*\r
509** TLSF utility functions. In most cases, these are direct translations of\r
510** the documentation found in the white paper.\r
511*/\r
512\r
513static void mapping_insert(size_t size, int* fli, int* sli)\r
514{\r
515 int fl, sl;\r
516 if (size < SMALL_BLOCK_SIZE)\r
517 {\r
518 /* Store small blocks in first list. */\r
519 fl = 0;\r
520 sl = tlsf_cast(int, size) / (SMALL_BLOCK_SIZE / SL_INDEX_COUNT);\r
521 }\r
522 else\r
523 {\r
524 fl = tlsf_fls_sizet(size);\r
525 sl = tlsf_cast(int, size >> (fl - SL_INDEX_COUNT_LOG2)) ^ (1 << SL_INDEX_COUNT_LOG2);\r
526 fl -= (FL_INDEX_SHIFT - 1);\r
527 }\r
528 *fli = fl;\r
529 *sli = sl;\r
530}\r
531\r
532/* This version rounds up to the next block size (for allocations) */\r
533static void mapping_search(size_t size, int* fli, int* sli)\r
534{\r
535 if (size >= SMALL_BLOCK_SIZE)\r
536 {\r
537 const size_t round = (1 << (tlsf_fls_sizet(size) - SL_INDEX_COUNT_LOG2)) - 1;\r
538 size += round;\r
539 }\r
540 mapping_insert(size, fli, sli);\r
541}\r
542\r
543static block_header_t* search_suitable_block(control_t* control, int* fli, int* sli)\r
544{\r
545 int fl = *fli;\r
546 int sl = *sli;\r
547\r
548 /*\r
549 ** First, search for a block in the list associated with the given\r
550 ** fl/sl index.\r
551 */\r
552 unsigned int sl_map = control->sl_bitmap[fl] & (~0U << sl);\r
553 if (!sl_map)\r
554 {\r
555 /* No block exists. Search in the next largest first-level list. */\r
556 const unsigned int fl_map = control->fl_bitmap & (~0U << (fl + 1));\r
557 if (!fl_map)\r
558 {\r
559 /* No free blocks available, memory has been exhausted. */\r
560 return 0;\r
561 }\r
562\r
563 fl = tlsf_ffs(fl_map);\r
564 *fli = fl;\r
565 sl_map = control->sl_bitmap[fl];\r
566 }\r
567 tlsf_assert(sl_map && "internal error - second level bitmap is null");\r
568 sl = tlsf_ffs(sl_map);\r
569 *sli = sl;\r
570\r
571 /* Return the first block in the free list. */\r
572 return control->blocks[fl][sl];\r
573}\r
574\r
575/* Remove a free block from the free list.*/\r
576static void remove_free_block(control_t* control, block_header_t* block, int fl, int sl)\r
577{\r
578 block_header_t* prev = block->prev_free;\r
579 block_header_t* next = block->next_free;\r
580 tlsf_assert(prev && "prev_free field can not be null");\r
581 tlsf_assert(next && "next_free field can not be null");\r
582 next->prev_free = prev;\r
583 prev->next_free = next;\r
584\r
585 /* If this block is the head of the free list, set new head. */\r
586 if (control->blocks[fl][sl] == block)\r
587 {\r
588 control->blocks[fl][sl] = next;\r
589\r
590 /* If the new head is null, clear the bitmap. */\r
591 if (next == &control->block_null)\r
592 {\r
593 control->sl_bitmap[fl] &= ~(1U << sl);\r
594\r
595 /* If the second bitmap is now empty, clear the fl bitmap. */\r
596 if (!control->sl_bitmap[fl])\r
597 {\r
598 control->fl_bitmap &= ~(1U << fl);\r
599 }\r
600 }\r
601 }\r
602}\r
603\r
604/* Insert a free block into the free block list. */\r
605static void insert_free_block(control_t* control, block_header_t* block, int fl, int sl)\r
606{\r
607 block_header_t* current = control->blocks[fl][sl];\r
608 tlsf_assert(current && "free list cannot have a null entry");\r
609 tlsf_assert(block && "cannot insert a null entry into the free list");\r
610 block->next_free = current;\r
611 block->prev_free = &control->block_null;\r
612 current->prev_free = block;\r
613\r
614 tlsf_assert(block_to_ptr(block) == align_ptr(block_to_ptr(block), ALIGN_SIZE)\r
615 && "block not aligned properly");\r
616 /*\r
617 ** Insert the new block at the head of the list, and mark the first-\r
618 ** and second-level bitmaps appropriately.\r
619 */\r
620 control->blocks[fl][sl] = block;\r
621 control->fl_bitmap |= (1U << fl);\r
622 control->sl_bitmap[fl] |= (1U << sl);\r
623}\r
624\r
625/* Remove a given block from the free list. */\r
626static void block_remove(control_t* control, block_header_t* block)\r
627{\r
628 int fl, sl;\r
629 mapping_insert(block_size(block), &fl, &sl);\r
630 remove_free_block(control, block, fl, sl);\r
631}\r
632\r
633/* Insert a given block into the free list. */\r
634static void block_insert(control_t* control, block_header_t* block)\r
635{\r
636 int fl, sl;\r
637 mapping_insert(block_size(block), &fl, &sl);\r
638 insert_free_block(control, block, fl, sl);\r
639}\r
640\r
641static int block_can_split(block_header_t* block, size_t size)\r
642{\r
643 return block_size(block) >= sizeof(block_header_t) + size;\r
644}\r
645\r
646/* Split a block into two, the second of which is free. */\r
647static block_header_t* block_split(block_header_t* block, size_t size)\r
648{\r
649 /* Calculate the amount of space left in the remaining block. */\r
650 block_header_t* remaining =\r
651 offset_to_block(block_to_ptr(block), size - block_header_overhead);\r
652\r
653 const size_t remain_size = block_size(block) - (size + block_header_overhead);\r
654\r
655 tlsf_assert(block_to_ptr(remaining) == align_ptr(block_to_ptr(remaining), ALIGN_SIZE)\r
656 && "remaining block not aligned properly");\r
657\r
658 tlsf_assert(block_size(block) == remain_size + size + block_header_overhead);\r
659 block_set_size(remaining, remain_size);\r
660 tlsf_assert(block_size(remaining) >= block_size_min && "block split with invalid size");\r
661\r
662 block_set_size(block, size);\r
663 block_mark_as_free(remaining);\r
664\r
665 return remaining;\r
666}\r
667\r
668/* Absorb a free block's storage into an adjacent previous free block. */\r
669static block_header_t* block_absorb(block_header_t* prev, block_header_t* block)\r
670{\r
671 tlsf_assert(!block_is_last(prev) && "previous block can't be last");\r
672 /* Note: Leaves flags untouched. */\r
673 prev->size += block_size(block) + block_header_overhead;\r
674 block_link_next(prev);\r
675 return prev;\r
676}\r
677\r
678/* Merge a just-freed block with an adjacent previous free block. */\r
679static block_header_t* block_merge_prev(control_t* control, block_header_t* block)\r
680{\r
681 if (block_is_prev_free(block))\r
682 {\r
683 block_header_t* prev = block_prev(block);\r
684 tlsf_assert(prev && "prev physical block can't be null");\r
685 tlsf_assert(block_is_free(prev) && "prev block is not free though marked as such");\r
686 block_remove(control, prev);\r
687 block = block_absorb(prev, block);\r
688 }\r
689\r
690 return block;\r
691}\r
692\r
693/* Merge a just-freed block with an adjacent free block. */\r
694static block_header_t* block_merge_next(control_t* control, block_header_t* block)\r
695{\r
696 block_header_t* next = block_next(block);\r
697 tlsf_assert(next && "next physical block can't be null");\r
698\r
699 if (block_is_free(next))\r
700 {\r
701 tlsf_assert(!block_is_last(block) && "previous block can't be last");\r
702 block_remove(control, next);\r
703 block = block_absorb(block, next);\r
704 }\r
705\r
706 return block;\r
707}\r
708\r
709/* Trim any trailing block space off the end of a block, return to pool. */\r
710static void block_trim_free(control_t* control, block_header_t* block, size_t size)\r
711{\r
712 tlsf_assert(block_is_free(block) && "block must be free");\r
713 if (block_can_split(block, size))\r
714 {\r
715 block_header_t* remaining_block = block_split(block, size);\r
716 block_link_next(block);\r
717 block_set_prev_free(remaining_block);\r
718 block_insert(control, remaining_block);\r
719 }\r
720}\r
721\r
722/* Trim any trailing block space off the end of a used block, return to pool. */\r
723static void block_trim_used(control_t* control, block_header_t* block, size_t size)\r
724{\r
725 tlsf_assert(!block_is_free(block) && "block must be used");\r
726 if (block_can_split(block, size))\r
727 {\r
728 /* If the next block is free, we must coalesce. */\r
729 block_header_t* remaining_block = block_split(block, size);\r
730 block_set_prev_used(remaining_block);\r
731\r
732 remaining_block = block_merge_next(control, remaining_block);\r
733 block_insert(control, remaining_block);\r
734 }\r
735}\r
736\r
737static block_header_t* block_trim_free_leading(control_t* control, block_header_t* block, size_t size)\r
738{\r
739 block_header_t* remaining_block = block;\r
740 if (block_can_split(block, size))\r
741 {\r
742 /* We want the 2nd block. */\r
743 remaining_block = block_split(block, size - block_header_overhead);\r
744 block_set_prev_free(remaining_block);\r
745\r
746 block_link_next(block);\r
747 block_insert(control, block);\r
748 }\r
749\r
750 return remaining_block;\r
751}\r
752\r
753static block_header_t* block_locate_free(control_t* control, size_t size)\r
754{\r
755 int fl = 0, sl = 0;\r
756 block_header_t* block = 0;\r
757\r
758 if (size)\r
759 {\r
760 mapping_search(size, &fl, &sl);\r
761 \r
762 /*\r
763 ** mapping_search can futz with the size, so for excessively large sizes it can sometimes wind up \r
764 ** with indices that are off the end of the block array.\r
765 ** So, we protect against that here, since this is the only callsite of mapping_search.\r
766 ** Note that we don't need to check sl, since it comes from a modulo operation that guarantees it's always in range.\r
767 */\r
768 if (fl < FL_INDEX_COUNT)\r
769 {\r
770 block = search_suitable_block(control, &fl, &sl);\r
771 }\r
772 }\r
773\r
774 if (block)\r
775 {\r
776 tlsf_assert(block_size(block) >= size);\r
777 remove_free_block(control, block, fl, sl);\r
778 }\r
779\r
780 return block;\r
781}\r
782\r
783static void* block_prepare_used(control_t* control, block_header_t* block, size_t size)\r
784{\r
785 void* p = 0;\r
786 if (block)\r
787 {\r
788 tlsf_assert(size && "size must be non-zero");\r
789 block_trim_free(control, block, size);\r
790 block_mark_as_used(block);\r
791 p = block_to_ptr(block);\r
792 }\r
793 return p;\r
794}\r
795\r
796/* Clear structure and point all empty lists at the null block. */\r
797static void control_construct(control_t* control)\r
798{\r
799 int i, j;\r
800\r
801 control->block_null.next_free = &control->block_null;\r
802 control->block_null.prev_free = &control->block_null;\r
803\r
804 control->fl_bitmap = 0;\r
805 for (i = 0; i < FL_INDEX_COUNT; ++i)\r
806 {\r
807 control->sl_bitmap[i] = 0;\r
808 for (j = 0; j < SL_INDEX_COUNT; ++j)\r
809 {\r
810 control->blocks[i][j] = &control->block_null;\r
811 }\r
812 }\r
813}\r
814\r
815/*\r
816** Debugging utilities.\r
817*/\r
818\r
819typedef struct integrity_t\r
820{\r
821 int prev_status;\r
822 int status;\r
823} integrity_t;\r
824\r
825#define tlsf_insist(x) { tlsf_assert(x); if (!(x)) { status--; } }\r
826\r
827static void integrity_walker(void* ptr, size_t size, int used, void* user)\r
828{\r
829 block_header_t* block = block_from_ptr(ptr);\r
830 integrity_t* integ = tlsf_cast(integrity_t*, user);\r
831 const int this_prev_status = block_is_prev_free(block) ? 1 : 0;\r
832 const int this_status = block_is_free(block) ? 1 : 0;\r
833 const size_t this_block_size = block_size(block);\r
834\r
835 int status = 0;\r
836 (void)used;\r
837 tlsf_insist(integ->prev_status == this_prev_status && "prev status incorrect");\r
838 tlsf_insist(size == this_block_size && "block size incorrect");\r
839\r
840 integ->prev_status = this_status;\r
841 integ->status += status;\r
842}\r
843\r
844int tlsf_check(tlsf_t tlsf)\r
845{\r
846 int i, j;\r
847\r
848 control_t* control = tlsf_cast(control_t*, tlsf);\r
849 int status = 0;\r
850\r
851 /* Check that the free lists and bitmaps are accurate. */\r
852 for (i = 0; i < FL_INDEX_COUNT; ++i)\r
853 {\r
854 for (j = 0; j < SL_INDEX_COUNT; ++j)\r
855 {\r
856 const int fl_map = control->fl_bitmap & (1U << i);\r
857 const int sl_list = control->sl_bitmap[i];\r
858 const int sl_map = sl_list & (1U << j);\r
859 const block_header_t* block = control->blocks[i][j];\r
860\r
861 /* Check that first- and second-level lists agree. */\r
862 if (!fl_map)\r
863 {\r
864 tlsf_insist(!sl_map && "second-level map must be null");\r
865 }\r
866\r
867 if (!sl_map)\r
868 {\r
869 tlsf_insist(block == &control->block_null && "block list must be null");\r
870 continue;\r
871 }\r
872\r
873 /* Check that there is at least one free block. */\r
874 tlsf_insist(sl_list && "no free blocks in second-level map");\r
875 tlsf_insist(block != &control->block_null && "block should not be null");\r
876\r
877 while (block != &control->block_null)\r
878 {\r
879 int fli, sli;\r
880 tlsf_insist(block_is_free(block) && "block should be free");\r
881 tlsf_insist(!block_is_prev_free(block) && "blocks should have coalesced");\r
882 tlsf_insist(!block_is_free(block_next(block)) && "blocks should have coalesced");\r
883 tlsf_insist(block_is_prev_free(block_next(block)) && "block should be free");\r
884 tlsf_insist(block_size(block) >= block_size_min && "block not minimum size");\r
885\r
886 mapping_insert(block_size(block), &fli, &sli);\r
887 tlsf_insist(fli == i && sli == j && "block size indexed in wrong list");\r
888 block = block->next_free;\r
889 }\r
890 }\r
891 }\r
892\r
893 return status;\r
894}\r
895\r
896#undef tlsf_insist\r
897\r
898static void default_walker(void* ptr, size_t size, int used, void* user)\r
899{\r
900 (void)user;\r
901 printf("\t%p %s size: %x (%p)\n", ptr, used ? "used" : "free", (unsigned int)size, block_from_ptr(ptr));\r
902}\r
903\r
904void tlsf_walk_pool(pool_t pool, tlsf_walker walker, void* user)\r
905{\r
906 tlsf_walker pool_walker = walker ? walker : default_walker;\r
907 block_header_t* block =\r
908 offset_to_block(pool, -(int)block_header_overhead);\r
909\r
910 while (block && !block_is_last(block))\r
911 {\r
912 pool_walker(\r
913 block_to_ptr(block),\r
914 block_size(block),\r
915 !block_is_free(block),\r
916 user);\r
917 block = block_next(block);\r
918 }\r
919}\r
920\r
921size_t tlsf_block_size(void* ptr)\r
922{\r
923 size_t size = 0;\r
924 if (ptr)\r
925 {\r
926 const block_header_t* block = block_from_ptr(ptr);\r
927 size = block_size(block);\r
928 }\r
929 return size;\r
930}\r
931\r
932int tlsf_check_pool(pool_t pool)\r
933{\r
934 /* Check that the blocks are physically correct. */\r
935 integrity_t integ = { 0, 0 };\r
936 tlsf_walk_pool(pool, integrity_walker, &integ);\r
937\r
938 return integ.status;\r
939}\r
940\r
941/*\r
942** Size of the TLSF structures in a given memory block passed to\r
943** tlsf_create, equal to the size of a control_t\r
944*/\r
945size_t tlsf_size(void)\r
946{\r
947 return sizeof(control_t);\r
948}\r
949\r
950size_t tlsf_align_size(void)\r
951{\r
952 return ALIGN_SIZE;\r
953}\r
954\r
955size_t tlsf_block_size_min(void)\r
956{\r
957 return block_size_min;\r
958}\r
959\r
960size_t tlsf_block_size_max(void)\r
961{\r
962 return block_size_max;\r
963}\r
964\r
965/*\r
966** Overhead of the TLSF structures in a given memory block passed to\r
967** tlsf_add_pool, equal to the overhead of a free block and the\r
968** sentinel block.\r
969*/\r
970size_t tlsf_pool_overhead(void)\r
971{\r
972 return 2 * block_header_overhead;\r
973}\r
974\r
975size_t tlsf_alloc_overhead(void)\r
976{\r
977 return block_header_overhead;\r
978}\r
979\r
980pool_t tlsf_add_pool(tlsf_t tlsf, void* mem, size_t bytes)\r
981{\r
982 block_header_t* block;\r
983 block_header_t* next;\r
984\r
985 const size_t pool_overhead = tlsf_pool_overhead();\r
986 const size_t pool_bytes = align_down(bytes - pool_overhead, ALIGN_SIZE);\r
987\r
988 if (((ptrdiff_t)mem % ALIGN_SIZE) != 0)\r
989 {\r
990 printf("tlsf_add_pool: Memory must be aligned by %u bytes.\n",\r
991 (unsigned int)ALIGN_SIZE);\r
992 return 0;\r
993 }\r
994\r
995 if (pool_bytes < block_size_min || pool_bytes > block_size_max)\r
996 {\r
997#if defined (TLSF_64BIT)\r
998 printf("tlsf_add_pool: Memory size must be between 0x%x and 0x%x00 bytes.\n", \r
999 (unsigned int)(pool_overhead + block_size_min),\r
1000 (unsigned int)((pool_overhead + block_size_max) / 256));\r
1001#else\r
1002 printf("tlsf_add_pool: Memory size must be between %u and %u bytes.\n", \r
1003 (unsigned int)(pool_overhead + block_size_min),\r
1004 (unsigned int)(pool_overhead + block_size_max));\r
1005#endif\r
1006 return 0;\r
1007 }\r
1008\r
1009 /*\r
1010 ** Create the main free block. Offset the start of the block slightly\r
1011 ** so that the prev_phys_block field falls outside of the pool -\r
1012 ** it will never be used.\r
1013 */\r
1014 block = offset_to_block(mem, -(tlsfptr_t)block_header_overhead);\r
1015 block_set_size(block, pool_bytes);\r
1016 block_set_free(block);\r
1017 block_set_prev_used(block);\r
1018 block_insert(tlsf_cast(control_t*, tlsf), block);\r
1019\r
1020 /* Split the block to create a zero-size sentinel block. */\r
1021 next = block_link_next(block);\r
1022 block_set_size(next, 0);\r
1023 block_set_used(next);\r
1024 block_set_prev_free(next);\r
1025\r
1026 return mem;\r
1027}\r
1028\r
1029void tlsf_remove_pool(tlsf_t tlsf, pool_t pool)\r
1030{\r
1031 control_t* control = tlsf_cast(control_t*, tlsf);\r
1032 block_header_t* block = offset_to_block(pool, -(int)block_header_overhead);\r
1033\r
1034 int fl = 0, sl = 0;\r
1035\r
1036 tlsf_assert(block_is_free(block) && "block should be free");\r
1037 tlsf_assert(!block_is_free(block_next(block)) && "next block should not be free");\r
1038 tlsf_assert(block_size(block_next(block)) == 0 && "next block size should be zero");\r
1039\r
1040 mapping_insert(block_size(block), &fl, &sl);\r
1041 remove_free_block(control, block, fl, sl);\r
1042}\r
1043\r
1044/*\r
1045** TLSF main interface.\r
1046*/\r
1047\r
1048#if _DEBUG\r
1049int test_ffs_fls()\r
1050{\r
1051 /* Verify ffs/fls work properly. */\r
1052 int rv = 0;\r
1053 rv += (tlsf_ffs(0) == -1) ? 0 : 0x1;\r
1054 rv += (tlsf_fls(0) == -1) ? 0 : 0x2;\r
1055 rv += (tlsf_ffs(1) == 0) ? 0 : 0x4;\r
1056 rv += (tlsf_fls(1) == 0) ? 0 : 0x8;\r
1057 rv += (tlsf_ffs(0x80000000) == 31) ? 0 : 0x10;\r
1058 rv += (tlsf_ffs(0x80008000) == 15) ? 0 : 0x20;\r
1059 rv += (tlsf_fls(0x80000008) == 31) ? 0 : 0x40;\r
1060 rv += (tlsf_fls(0x7FFFFFFF) == 30) ? 0 : 0x80;\r
1061\r
1062#if defined (TLSF_64BIT)\r
1063 rv += (tlsf_fls_sizet(0x80000000) == 31) ? 0 : 0x100;\r
1064 rv += (tlsf_fls_sizet(0x100000000) == 32) ? 0 : 0x200;\r
1065 rv += (tlsf_fls_sizet(0xffffffffffffffff) == 63) ? 0 : 0x400;\r
1066#endif\r
1067\r
1068 if (rv)\r
1069 {\r
1070 printf("test_ffs_fls: %x ffs/fls tests failed.\n", rv);\r
1071 }\r
1072 return rv;\r
1073}\r
1074#endif\r
1075\r
1076tlsf_t tlsf_create(void* mem)\r
1077{\r
1078#if _DEBUG\r
1079 if (test_ffs_fls())\r
1080 {\r
1081 return 0;\r
1082 }\r
1083#endif\r
1084\r
1085 if (((tlsfptr_t)mem % ALIGN_SIZE) != 0)\r
1086 {\r
1087 printf("tlsf_create: Memory must be aligned to %u bytes.\n",\r
1088 (unsigned int)ALIGN_SIZE);\r
1089 return 0;\r
1090 }\r
1091\r
1092 control_construct(tlsf_cast(control_t*, mem));\r
1093\r
1094 return tlsf_cast(tlsf_t, mem);\r
1095}\r
1096\r
1097tlsf_t tlsf_create_with_pool(void* mem, size_t bytes)\r
1098{\r
1099 tlsf_t tlsf = tlsf_create(mem);\r
1100 tlsf_add_pool(tlsf, (char*)mem + tlsf_size(), bytes - tlsf_size());\r
1101 return tlsf;\r
1102}\r
1103\r
1104void tlsf_destroy(tlsf_t tlsf)\r
1105{\r
1106 /* Nothing to do. */\r
1107 (void)tlsf;\r
1108}\r
1109\r
1110pool_t tlsf_get_pool(tlsf_t tlsf)\r
1111{\r
1112 return tlsf_cast(pool_t, (char*)tlsf + tlsf_size());\r
1113}\r
1114\r
1115void* tlsf_malloc(tlsf_t tlsf, size_t size)\r
1116{\r
1117 control_t* control = tlsf_cast(control_t*, tlsf);\r
1118 const size_t adjust = adjust_request_size(size, ALIGN_SIZE);\r
1119 block_header_t* block = block_locate_free(control, adjust);\r
1120 return block_prepare_used(control, block, adjust);\r
1121}\r
1122\r
1123void* tlsf_memalign(tlsf_t tlsf, size_t align, size_t size)\r
1124{\r
1125 control_t* control = tlsf_cast(control_t*, tlsf);\r
1126 const size_t adjust = adjust_request_size(size, ALIGN_SIZE);\r
1127\r
1128 /*\r
1129 ** We must allocate an additional minimum block size bytes so that if\r
1130 ** our free block will leave an alignment gap which is smaller, we can\r
1131 ** trim a leading free block and release it back to the pool. We must\r
1132 ** do this because the previous physical block is in use, therefore\r
1133 ** the prev_phys_block field is not valid, and we can't simply adjust\r
1134 ** the size of that block.\r
1135 */\r
1136 const size_t gap_minimum = sizeof(block_header_t);\r
1137 const size_t size_with_gap = adjust_request_size(adjust + align + gap_minimum, align);\r
1138\r
1139 /*\r
1140 ** If alignment is less than or equals base alignment, we're done.\r
1141 ** If we requested 0 bytes, return null, as tlsf_malloc(0) does.\r
1142 */\r
1143 const size_t aligned_size = (adjust && align > ALIGN_SIZE) ? size_with_gap : adjust;\r
1144\r
1145 block_header_t* block = block_locate_free(control, aligned_size);\r
1146\r
1147 /* This can't be a static assert. */\r
1148 tlsf_assert(sizeof(block_header_t) == block_size_min + block_header_overhead);\r
1149\r
1150 if (block)\r
1151 {\r
1152 void* ptr = block_to_ptr(block);\r
1153 void* aligned = align_ptr(ptr, align);\r
1154 size_t gap = tlsf_cast(size_t,\r
1155 tlsf_cast(tlsfptr_t, aligned) - tlsf_cast(tlsfptr_t, ptr));\r
1156\r
1157 /* If gap size is too small, offset to next aligned boundary. */\r
1158 if (gap && gap < gap_minimum)\r
1159 {\r
1160 const size_t gap_remain = gap_minimum - gap;\r
1161 const size_t offset = tlsf_max(gap_remain, align);\r
1162 const void* next_aligned = tlsf_cast(void*,\r
1163 tlsf_cast(tlsfptr_t, aligned) + offset);\r
1164\r
1165 aligned = align_ptr(next_aligned, align);\r
1166 gap = tlsf_cast(size_t,\r
1167 tlsf_cast(tlsfptr_t, aligned) - tlsf_cast(tlsfptr_t, ptr));\r
1168 }\r
1169\r
1170 if (gap)\r
1171 {\r
1172 tlsf_assert(gap >= gap_minimum && "gap size too small");\r
1173 block = block_trim_free_leading(control, block, gap);\r
1174 }\r
1175 }\r
1176\r
1177 return block_prepare_used(control, block, adjust);\r
1178}\r
1179\r
1180void tlsf_free(tlsf_t tlsf, void* ptr)\r
1181{\r
1182 /* Don't attempt to free a NULL pointer. */\r
1183 if (ptr)\r
1184 {\r
1185 control_t* control = tlsf_cast(control_t*, tlsf);\r
1186 block_header_t* block = block_from_ptr(ptr);\r
1187 tlsf_assert(!block_is_free(block) && "block already marked as free");\r
1188 block_mark_as_free(block);\r
1189 block = block_merge_prev(control, block);\r
1190 block = block_merge_next(control, block);\r
1191 block_insert(control, block);\r
1192 }\r
1193}\r
1194\r
1195/*\r
1196** The TLSF block information provides us with enough information to\r
1197** provide a reasonably intelligent implementation of realloc, growing or\r
1198** shrinking the currently allocated block as required.\r
1199**\r
1200** This routine handles the somewhat esoteric edge cases of realloc:\r
1201** - a non-zero size with a null pointer will behave like malloc\r
1202** - a zero size with a non-null pointer will behave like free\r
1203** - a request that cannot be satisfied will leave the original buffer\r
1204** untouched\r
1205** - an extended buffer size will leave the newly-allocated area with\r
1206** contents undefined\r
1207*/\r
1208void* tlsf_realloc(tlsf_t tlsf, void* ptr, size_t size)\r
1209{\r
1210 control_t* control = tlsf_cast(control_t*, tlsf);\r
1211 void* p = 0;\r
1212\r
1213 /* Zero-size requests are treated as free. */\r
1214 if (ptr && size == 0)\r
1215 {\r
1216 tlsf_free(tlsf, ptr);\r
1217 }\r
1218 /* Requests with NULL pointers are treated as malloc. */\r
1219 else if (!ptr)\r
1220 {\r
1221 p = tlsf_malloc(tlsf, size);\r
1222 }\r
1223 else\r
1224 {\r
1225 block_header_t* block = block_from_ptr(ptr);\r
1226 block_header_t* next = block_next(block);\r
1227\r
1228 const size_t cursize = block_size(block);\r
1229 const size_t combined = cursize + block_size(next) + block_header_overhead;\r
1230 const size_t adjust = adjust_request_size(size, ALIGN_SIZE);\r
1231\r
1232 tlsf_assert(!block_is_free(block) && "block already marked as free");\r
1233\r
1234 /*\r
1235 ** If the next block is used, or when combined with the current\r
1236 ** block, does not offer enough space, we must reallocate and copy.\r
1237 */\r
1238 if (adjust > cursize && (!block_is_free(next) || adjust > combined))\r
1239 {\r
1240 p = tlsf_malloc(tlsf, size);\r
1241 if (p)\r
1242 {\r
1243 const size_t minsize = tlsf_min(cursize, size);\r
1244 memcpy(p, ptr, minsize);\r
1245 tlsf_free(tlsf, ptr);\r
1246 }\r
1247 }\r
1248 else\r
1249 {\r
1250 /* Do we need to expand to the next block? */\r
1251 if (adjust > cursize)\r
1252 {\r
1253 block_merge_next(control, block);\r
1254 block_mark_as_used(block);\r
1255 }\r
1256\r
1257 /* Trim the resulting block and return the original pointer. */\r
1258 block_trim_used(control, block, adjust);\r
1259 p = ptr;\r
1260 }\r
1261 }\r
1262\r
1263 return p;\r
1264}\r