2 * divsufsort.c for libdivsufsort-lite
3 * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
5 * Permission is hereby granted, free of charge, to any person
6 * obtaining a copy of this software and associated documentation
7 * files (the "Software"), to deal in the Software without
8 * restriction, including without limitation the rights to use,
9 * copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following
14 * The above copyright notice and this permission notice shall be
15 * included in all copies or substantial portions of the Software.
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
19 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
20 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
21 * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
22 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
24 * OTHER DEALINGS IN THE SOFTWARE.
27 /*- Compiler specifics -*/
29 #pragma clang diagnostic ignored "-Wshorten-64-to-32"
33 # pragma warning(disable : 4244)
34 # pragma warning(disable : 4127) /* C4127 : Condition expression is constant */
43 #include "divsufsort.h"
50 # define INLINE __inline
52 #if defined(ALPHABET_SIZE) && (ALPHABET_SIZE < 1)
55 #if !defined(ALPHABET_SIZE)
56 # define ALPHABET_SIZE (256)
58 #define BUCKET_A_SIZE (ALPHABET_SIZE)
59 #define BUCKET_B_SIZE (ALPHABET_SIZE * ALPHABET_SIZE)
60 #if defined(SS_INSERTIONSORT_THRESHOLD)
61 # if SS_INSERTIONSORT_THRESHOLD < 1
62 # undef SS_INSERTIONSORT_THRESHOLD
63 # define SS_INSERTIONSORT_THRESHOLD (1)
66 # define SS_INSERTIONSORT_THRESHOLD (8)
68 #if defined(SS_BLOCKSIZE)
71 # define SS_BLOCKSIZE (0)
72 # elif 32768 <= SS_BLOCKSIZE
74 # define SS_BLOCKSIZE (32767)
77 # define SS_BLOCKSIZE (1024)
79 /* minstacksize = log(SS_BLOCKSIZE) / log(3) * 2 */
81 # define SS_MISORT_STACKSIZE (96)
82 #elif SS_BLOCKSIZE <= 4096
83 # define SS_MISORT_STACKSIZE (16)
85 # define SS_MISORT_STACKSIZE (24)
87 #define SS_SMERGE_STACKSIZE (32)
88 #define TR_INSERTIONSORT_THRESHOLD (8)
89 #define TR_STACKSIZE (64)
94 # define SWAP(_a, _b) do { t = (_a); (_a) = (_b); (_b) = t; } while(0)
97 # define MIN(_a, _b) (((_a) < (_b)) ? (_a) : (_b))
100 # define MAX(_a, _b) (((_a) > (_b)) ? (_a) : (_b))
102 #define STACK_PUSH(_a, _b, _c, _d)\
104 assert(ssize < STACK_SIZE);\
105 stack[ssize].a = (_a), stack[ssize].b = (_b),\
106 stack[ssize].c = (_c), stack[ssize++].d = (_d);\
108 #define STACK_PUSH5(_a, _b, _c, _d, _e)\
110 assert(ssize < STACK_SIZE);\
111 stack[ssize].a = (_a), stack[ssize].b = (_b),\
112 stack[ssize].c = (_c), stack[ssize].d = (_d), stack[ssize++].e = (_e);\
114 #define STACK_POP(_a, _b, _c, _d)\
117 if(ssize == 0) { return; }\
118 (_a) = stack[--ssize].a, (_b) = stack[ssize].b,\
119 (_c) = stack[ssize].c, (_d) = stack[ssize].d;\
121 #define STACK_POP5(_a, _b, _c, _d, _e)\
124 if(ssize == 0) { return; }\
125 (_a) = stack[--ssize].a, (_b) = stack[ssize].b,\
126 (_c) = stack[ssize].c, (_d) = stack[ssize].d, (_e) = stack[ssize].e;\
128 #define BUCKET_A(_c0) bucket_A[(_c0)]
129 #if ALPHABET_SIZE == 256
130 #define BUCKET_B(_c0, _c1) (bucket_B[((_c1) << 8) | (_c0)])
131 #define BUCKET_BSTAR(_c0, _c1) (bucket_B[((_c0) << 8) | (_c1)])
133 #define BUCKET_B(_c0, _c1) (bucket_B[(_c1) * ALPHABET_SIZE + (_c0)])
134 #define BUCKET_BSTAR(_c0, _c1) (bucket_B[(_c0) * ALPHABET_SIZE + (_c1)])
138 /*- Private Functions -*/
140 static const int lg_table[256]= {
141 -1,0,1,1,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
142 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
143 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
144 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
145 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
146 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
147 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
148 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7
151 #if (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE)
156 #if SS_BLOCKSIZE == 0
157 return (n & 0xffff0000) ?
159 24 + lg_table[(n >> 24) & 0xff] :
160 16 + lg_table[(n >> 16) & 0xff]) :
162 8 + lg_table[(n >> 8) & 0xff] :
163 0 + lg_table[(n >> 0) & 0xff]);
164 #elif SS_BLOCKSIZE < 256
167 return (n & 0xff00) ?
168 8 + lg_table[(n >> 8) & 0xff] :
169 0 + lg_table[(n >> 0) & 0xff];
173 #endif /* (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE) */
175 #if SS_BLOCKSIZE != 0
177 static const int sqq_table[256] = {
178 0, 16, 22, 27, 32, 35, 39, 42, 45, 48, 50, 53, 55, 57, 59, 61,
179 64, 65, 67, 69, 71, 73, 75, 76, 78, 80, 81, 83, 84, 86, 87, 89,
180 90, 91, 93, 94, 96, 97, 98, 99, 101, 102, 103, 104, 106, 107, 108, 109,
181 110, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126,
182 128, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142,
183 143, 144, 144, 145, 146, 147, 148, 149, 150, 150, 151, 152, 153, 154, 155, 155,
184 156, 157, 158, 159, 160, 160, 161, 162, 163, 163, 164, 165, 166, 167, 167, 168,
185 169, 170, 170, 171, 172, 173, 173, 174, 175, 176, 176, 177, 178, 178, 179, 180,
186 181, 181, 182, 183, 183, 184, 185, 185, 186, 187, 187, 188, 189, 189, 190, 191,
187 192, 192, 193, 193, 194, 195, 195, 196, 197, 197, 198, 199, 199, 200, 201, 201,
188 202, 203, 203, 204, 204, 205, 206, 206, 207, 208, 208, 209, 209, 210, 211, 211,
189 212, 212, 213, 214, 214, 215, 215, 216, 217, 217, 218, 218, 219, 219, 220, 221,
190 221, 222, 222, 223, 224, 224, 225, 225, 226, 226, 227, 227, 228, 229, 229, 230,
191 230, 231, 231, 232, 232, 233, 234, 234, 235, 235, 236, 236, 237, 237, 238, 238,
192 239, 240, 240, 241, 241, 242, 242, 243, 243, 244, 244, 245, 245, 246, 246, 247,
193 247, 248, 248, 249, 249, 250, 250, 251, 251, 252, 252, 253, 253, 254, 254, 255
201 if(x >= (SS_BLOCKSIZE * SS_BLOCKSIZE)) { return SS_BLOCKSIZE; }
202 e = (x & 0xffff0000) ?
204 24 + lg_table[(x >> 24) & 0xff] :
205 16 + lg_table[(x >> 16) & 0xff]) :
207 8 + lg_table[(x >> 8) & 0xff] :
208 0 + lg_table[(x >> 0) & 0xff]);
211 y = sqq_table[x >> ((e - 6) - (e & 1))] << ((e >> 1) - 7);
212 if(e >= 24) { y = (y + 1 + x / y) >> 1; }
213 y = (y + 1 + x / y) >> 1;
215 y = (sqq_table[x >> ((e - 6) - (e & 1))] >> (7 - (e >> 1))) + 1;
217 return sqq_table[x] >> 4;
220 return (x < (y * y)) ? y - 1 : y;
223 #endif /* SS_BLOCKSIZE != 0 */
226 /*---------------------------------------------------------------------------*/
228 /* Compares two suffixes. */
231 ss_compare(const unsigned char *T,
232 const int *p1, const int *p2,
234 const unsigned char *U1, *U2, *U1n, *U2n;
236 for(U1 = T + depth + *p1,
237 U2 = T + depth + *p2,
238 U1n = T + *(p1 + 1) + 2,
239 U2n = T + *(p2 + 1) + 2;
240 (U1 < U1n) && (U2 < U2n) && (*U1 == *U2);
245 (U2 < U2n ? *U1 - *U2 : 1) :
250 /*---------------------------------------------------------------------------*/
252 #if (SS_BLOCKSIZE != 1) && (SS_INSERTIONSORT_THRESHOLD != 1)
254 /* Insertionsort for small size groups */
257 ss_insertionsort(const unsigned char *T, const int *PA,
258 int *first, int *last, int depth) {
263 for(i = last - 2; first <= i; --i) {
264 for(t = *i, j = i + 1; 0 < (r = ss_compare(T, PA + t, PA + *j, depth));) {
265 do { *(j - 1) = *j; } while((++j < last) && (*j < 0));
266 if(last <= j) { break; }
268 if(r == 0) { *j = ~*j; }
273 #endif /* (SS_BLOCKSIZE != 1) && (SS_INSERTIONSORT_THRESHOLD != 1) */
276 /*---------------------------------------------------------------------------*/
278 #if (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE)
282 ss_fixdown(const unsigned char *Td, const int *PA,
283 int *SA, int i, int size) {
288 for(v = SA[i], c = Td[PA[v]]; (j = 2 * i + 1) < size; SA[i] = SA[k], i = k) {
289 d = Td[PA[SA[k = j++]]];
290 if(d < (e = Td[PA[SA[j]]])) { k = j; d = e; }
291 if(d <= c) { break; }
296 /* Simple top-down heapsort. */
299 ss_heapsort(const unsigned char *Td, const int *PA, int *SA, int size) {
304 if((size % 2) == 0) {
306 if(Td[PA[SA[m / 2]]] < Td[PA[SA[m]]]) { SWAP(SA[m], SA[m / 2]); }
309 for(i = m / 2 - 1; 0 <= i; --i) { ss_fixdown(Td, PA, SA, i, m); }
310 if((size % 2) == 0) { SWAP(SA[0], SA[m]); ss_fixdown(Td, PA, SA, 0, m); }
311 for(i = m - 1; 0 < i; --i) {
312 t = SA[0], SA[0] = SA[i];
313 ss_fixdown(Td, PA, SA, 0, i);
319 /*---------------------------------------------------------------------------*/
321 /* Returns the median of three elements. */
324 ss_median3(const unsigned char *Td, const int *PA,
325 int *v1, int *v2, int *v3) {
327 if(Td[PA[*v1]] > Td[PA[*v2]]) { SWAP(v1, v2); }
328 if(Td[PA[*v2]] > Td[PA[*v3]]) {
329 if(Td[PA[*v1]] > Td[PA[*v3]]) { return v1; }
335 /* Returns the median of five elements. */
338 ss_median5(const unsigned char *Td, const int *PA,
339 int *v1, int *v2, int *v3, int *v4, int *v5) {
341 if(Td[PA[*v2]] > Td[PA[*v3]]) { SWAP(v2, v3); }
342 if(Td[PA[*v4]] > Td[PA[*v5]]) { SWAP(v4, v5); }
343 if(Td[PA[*v2]] > Td[PA[*v4]]) { SWAP(v2, v4); SWAP(v3, v5); }
344 if(Td[PA[*v1]] > Td[PA[*v3]]) { SWAP(v1, v3); }
345 if(Td[PA[*v1]] > Td[PA[*v4]]) { SWAP(v1, v4); SWAP(v3, v5); }
346 if(Td[PA[*v3]] > Td[PA[*v4]]) { return v4; }
350 /* Returns the pivot element. */
353 ss_pivot(const unsigned char *Td, const int *PA, int *first, int *last) {
358 middle = first + t / 2;
362 return ss_median3(Td, PA, first, middle, last - 1);
365 return ss_median5(Td, PA, first, first + t, middle, last - 1 - t, last - 1);
369 first = ss_median3(Td, PA, first, first + t, first + (t << 1));
370 middle = ss_median3(Td, PA, middle - t, middle, middle + t);
371 last = ss_median3(Td, PA, last - 1 - (t << 1), last - 1 - t, last - 1);
372 return ss_median3(Td, PA, first, middle, last);
376 /*---------------------------------------------------------------------------*/
378 /* Binary partition for substrings. */
381 ss_partition(const int *PA,
382 int *first, int *last, int depth) {
385 for(a = first - 1, b = last;;) {
386 for(; (++a < b) && ((PA[*a] + depth) >= (PA[*a + 1] + 1));) { *a = ~*a; }
387 for(; (a < --b) && ((PA[*b] + depth) < (PA[*b + 1] + 1));) { }
388 if(b <= a) { break; }
393 if(first < a) { *first = ~*first; }
397 /* Multikey introsort for medium size groups. */
400 ss_mintrosort(const unsigned char *T, const int *PA,
401 int *first, int *last,
403 #define STACK_SIZE SS_MISORT_STACKSIZE
404 struct { int *a, *b, c; int d; } stack[STACK_SIZE];
405 const unsigned char *Td;
406 int *a, *b, *c, *d, *e, *f;
412 for(ssize = 0, limit = ss_ilg(last - first);;) {
414 if((last - first) <= SS_INSERTIONSORT_THRESHOLD) {
415 #if 1 < SS_INSERTIONSORT_THRESHOLD
416 if(1 < (last - first)) { ss_insertionsort(T, PA, first, last, depth); }
418 STACK_POP(first, last, depth, limit);
423 if(limit-- == 0) { ss_heapsort(Td, PA, first, last - first); }
425 for(a = first + 1, v = Td[PA[*first]]; a < last; ++a) {
426 if((x = Td[PA[*a]]) != v) {
427 if(1 < (a - first)) { break; }
432 if(Td[PA[*first] - 1] < v) {
433 first = ss_partition(PA, first, a, depth);
435 if((a - first) <= (last - a)) {
436 if(1 < (a - first)) {
437 STACK_PUSH(a, last, depth, -1);
438 last = a, depth += 1, limit = ss_ilg(a - first);
440 first = a, limit = -1;
444 STACK_PUSH(first, a, depth + 1, ss_ilg(a - first));
445 first = a, limit = -1;
447 last = a, depth += 1, limit = ss_ilg(a - first);
454 a = ss_pivot(Td, PA, first, last);
459 for(b = first; (++b < last) && ((x = Td[PA[*b]]) == v);) { }
460 if(((a = b) < last) && (x < v)) {
461 for(; (++b < last) && ((x = Td[PA[*b]]) <= v);) {
462 if(x == v) { SWAP(*b, *a); ++a; }
465 for(c = last; (b < --c) && ((x = Td[PA[*c]]) == v);) { }
466 if((b < (d = c)) && (x > v)) {
467 for(; (b < --c) && ((x = Td[PA[*c]]) >= v);) {
468 if(x == v) { SWAP(*c, *d); --d; }
473 for(; (++b < c) && ((x = Td[PA[*b]]) <= v);) {
474 if(x == v) { SWAP(*b, *a); ++a; }
476 for(; (b < --c) && ((x = Td[PA[*c]]) >= v);) {
477 if(x == v) { SWAP(*c, *d); --d; }
484 if((s = a - first) > (t = b - a)) { s = t; }
485 for(e = first, f = b - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); }
486 if((s = d - c) > (t = last - d - 1)) { s = t; }
487 for(e = b, f = last - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); }
489 a = first + (b - a), c = last - (d - c);
490 b = (v <= Td[PA[*a] - 1]) ? a : ss_partition(PA, a, c, depth);
492 if((a - first) <= (last - c)) {
493 if((last - c) <= (c - b)) {
494 STACK_PUSH(b, c, depth + 1, ss_ilg(c - b));
495 STACK_PUSH(c, last, depth, limit);
497 } else if((a - first) <= (c - b)) {
498 STACK_PUSH(c, last, depth, limit);
499 STACK_PUSH(b, c, depth + 1, ss_ilg(c - b));
502 STACK_PUSH(c, last, depth, limit);
503 STACK_PUSH(first, a, depth, limit);
504 first = b, last = c, depth += 1, limit = ss_ilg(c - b);
507 if((a - first) <= (c - b)) {
508 STACK_PUSH(b, c, depth + 1, ss_ilg(c - b));
509 STACK_PUSH(first, a, depth, limit);
511 } else if((last - c) <= (c - b)) {
512 STACK_PUSH(first, a, depth, limit);
513 STACK_PUSH(b, c, depth + 1, ss_ilg(c - b));
516 STACK_PUSH(first, a, depth, limit);
517 STACK_PUSH(c, last, depth, limit);
518 first = b, last = c, depth += 1, limit = ss_ilg(c - b);
523 if(Td[PA[*first] - 1] < v) {
524 first = ss_partition(PA, first, last, depth);
525 limit = ss_ilg(last - first);
533 #endif /* (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE) */
536 /*---------------------------------------------------------------------------*/
538 #if SS_BLOCKSIZE != 0
542 ss_blockswap(int *a, int *b, int n) {
544 for(; 0 < n; --n, ++a, ++b) {
545 t = *a, *a = *b, *b = t;
551 ss_rotate(int *first, int *middle, int *last) {
554 l = middle - first, r = last - middle;
555 for(; (0 < l) && (0 < r);) {
556 if(l == r) { ss_blockswap(first, middle, l); break; }
558 a = last - 1, b = middle - 1;
561 *a-- = *b, *b-- = *a;
565 if((r -= l + 1) <= l) { break; }
566 a -= 1, b = middle - 1;
571 a = first, b = middle;
574 *a++ = *b, *b++ = *a;
578 if((l -= r + 1) <= r) { break; }
588 /*---------------------------------------------------------------------------*/
592 ss_inplacemerge(const unsigned char *T, const int *PA,
593 int *first, int *middle, int *last,
602 if(*(last - 1) < 0) { x = 1; p = PA + ~*(last - 1); }
603 else { x = 0; p = PA + *(last - 1); }
604 for(a = first, len = middle - first, half = len >> 1, r = -1;
606 len = half, half >>= 1) {
608 q = ss_compare(T, PA + ((0 <= *b) ? *b : ~*b), p, depth);
611 half -= (len & 1) ^ 1;
617 if(r == 0) { *a = ~*a; }
618 ss_rotate(a, middle, last);
621 if(first == middle) { break; }
624 if(x != 0) { while(*--last < 0) { } }
625 if(middle == last) { break; }
630 /*---------------------------------------------------------------------------*/
632 /* Merge-forward with internal buffer. */
635 ss_mergeforward(const unsigned char *T, const int *PA,
636 int *first, int *middle, int *last,
637 int *buf, int depth) {
638 int *a, *b, *c, *bufend;
642 bufend = buf + (middle - first) - 1;
643 ss_blockswap(buf, first, middle - first);
645 for(t = *(a = first), b = buf, c = middle;;) {
646 r = ss_compare(T, PA + *b, PA + *c, depth);
650 if(bufend <= b) { *bufend = t; return; }
655 *a++ = *c, *c++ = *a;
657 while(b < bufend) { *a++ = *b, *b++ = *a; }
666 if(bufend <= b) { *bufend = t; return; }
671 *a++ = *c, *c++ = *a;
673 while(b < bufend) { *a++ = *b, *b++ = *a; }
682 /* Merge-backward with internal buffer. */
685 ss_mergebackward(const unsigned char *T, const int *PA,
686 int *first, int *middle, int *last,
687 int *buf, int depth) {
689 int *a, *b, *c, *bufend;
694 bufend = buf + (last - middle) - 1;
695 ss_blockswap(buf, middle, last - middle);
698 if(*bufend < 0) { p1 = PA + ~*bufend; x |= 1; }
699 else { p1 = PA + *bufend; }
700 if(*(middle - 1) < 0) { p2 = PA + ~*(middle - 1); x |= 2; }
701 else { p2 = PA + *(middle - 1); }
702 for(t = *(a = last - 1), b = bufend, c = middle - 1;;) {
703 r = ss_compare(T, p1, p2, depth);
705 if(x & 1) { do { *a-- = *b, *b-- = *a; } while(*b < 0); x ^= 1; }
707 if(b <= buf) { *buf = t; break; }
709 if(*b < 0) { p1 = PA + ~*b; x |= 1; }
710 else { p1 = PA + *b; }
712 if(x & 2) { do { *a-- = *c, *c-- = *a; } while(*c < 0); x ^= 2; }
713 *a-- = *c, *c-- = *a;
715 while(buf < b) { *a-- = *b, *b-- = *a; }
719 if(*c < 0) { p2 = PA + ~*c; x |= 2; }
720 else { p2 = PA + *c; }
722 if(x & 1) { do { *a-- = *b, *b-- = *a; } while(*b < 0); x ^= 1; }
724 if(b <= buf) { *buf = t; break; }
726 if(x & 2) { do { *a-- = *c, *c-- = *a; } while(*c < 0); x ^= 2; }
727 *a-- = *c, *c-- = *a;
729 while(buf < b) { *a-- = *b, *b-- = *a; }
733 if(*b < 0) { p1 = PA + ~*b; x |= 1; }
734 else { p1 = PA + *b; }
735 if(*c < 0) { p2 = PA + ~*c; x |= 2; }
736 else { p2 = PA + *c; }
741 /* D&C based merge. */
744 ss_swapmerge(const unsigned char *T, const int *PA,
745 int *first, int *middle, int *last,
746 int *buf, int bufsize, int depth) {
747 #define STACK_SIZE SS_SMERGE_STACKSIZE
748 #define GETIDX(a) ((0 <= (a)) ? (a) : (~(a)))
749 #define MERGE_CHECK(a, b, c)\
752 (((c) & 2) && (ss_compare(T, PA + GETIDX(*((a) - 1)), PA + *(a), depth) == 0))) {\
755 if(((c) & 4) && ((ss_compare(T, PA + GETIDX(*((b) - 1)), PA + *(b), depth) == 0))) {\
759 struct { int *a, *b, *c; int d; } stack[STACK_SIZE];
760 int *l, *r, *lm, *rm;
765 for(check = 0, ssize = 0;;) {
766 if((last - middle) <= bufsize) {
767 if((first < middle) && (middle < last)) {
768 ss_mergebackward(T, PA, first, middle, last, buf, depth);
770 MERGE_CHECK(first, last, check);
771 STACK_POP(first, middle, last, check);
775 if((middle - first) <= bufsize) {
777 ss_mergeforward(T, PA, first, middle, last, buf, depth);
779 MERGE_CHECK(first, last, check);
780 STACK_POP(first, middle, last, check);
784 for(m = 0, len = MIN(middle - first, last - middle), half = len >> 1;
786 len = half, half >>= 1) {
787 if(ss_compare(T, PA + GETIDX(*(middle + m + half)),
788 PA + GETIDX(*(middle - m - half - 1)), depth) < 0) {
790 half -= (len & 1) ^ 1;
795 lm = middle - m, rm = middle + m;
796 ss_blockswap(lm, middle, m);
797 l = r = middle, next = 0;
801 if(first < lm) { for(; *--l < 0;) { } next |= 4; }
803 } else if(first < lm) {
804 for(; *r < 0; ++r) { }
809 if((l - first) <= (last - r)) {
810 STACK_PUSH(r, rm, last, (next & 3) | (check & 4));
811 middle = lm, last = l, check = (check & 3) | (next & 4);
813 if((next & 2) && (r == middle)) { next ^= 6; }
814 STACK_PUSH(first, lm, l, (check & 3) | (next & 4));
815 first = r, middle = rm, check = (next & 3) | (check & 4);
818 if(ss_compare(T, PA + GETIDX(*(middle - 1)), PA + *middle, depth) == 0) {
821 MERGE_CHECK(first, last, check);
822 STACK_POP(first, middle, last, check);
828 #endif /* SS_BLOCKSIZE != 0 */
831 /*---------------------------------------------------------------------------*/
836 sssort(const unsigned char *T, const int *PA,
837 int *first, int *last,
838 int *buf, int bufsize,
839 int depth, int n, int lastsuffix) {
841 #if SS_BLOCKSIZE != 0
842 int *b, *middle, *curbuf;
843 int j, k, curbufsize, limit;
847 if(lastsuffix != 0) { ++first; }
849 #if SS_BLOCKSIZE == 0
850 ss_mintrosort(T, PA, first, last, depth);
852 if((bufsize < SS_BLOCKSIZE) &&
853 (bufsize < (last - first)) &&
854 (bufsize < (limit = ss_isqrt(last - first)))) {
855 if(SS_BLOCKSIZE < limit) { limit = SS_BLOCKSIZE; }
856 buf = middle = last - limit, bufsize = limit;
858 middle = last, limit = 0;
860 for(a = first, i = 0; SS_BLOCKSIZE < (middle - a); a += SS_BLOCKSIZE, ++i) {
861 #if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE
862 ss_mintrosort(T, PA, a, a + SS_BLOCKSIZE, depth);
863 #elif 1 < SS_BLOCKSIZE
864 ss_insertionsort(T, PA, a, a + SS_BLOCKSIZE, depth);
866 curbufsize = last - (a + SS_BLOCKSIZE);
867 curbuf = a + SS_BLOCKSIZE;
868 if(curbufsize <= bufsize) { curbufsize = bufsize, curbuf = buf; }
869 for(b = a, k = SS_BLOCKSIZE, j = i; j & 1; b -= k, k <<= 1, j >>= 1) {
870 ss_swapmerge(T, PA, b - k, b, b + k, curbuf, curbufsize, depth);
873 #if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE
874 ss_mintrosort(T, PA, a, middle, depth);
875 #elif 1 < SS_BLOCKSIZE
876 ss_insertionsort(T, PA, a, middle, depth);
878 for(k = SS_BLOCKSIZE; i != 0; k <<= 1, i >>= 1) {
880 ss_swapmerge(T, PA, a - k, a, middle, buf, bufsize, depth);
885 #if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE
886 ss_mintrosort(T, PA, middle, last, depth);
887 #elif 1 < SS_BLOCKSIZE
888 ss_insertionsort(T, PA, middle, last, depth);
890 ss_inplacemerge(T, PA, first, middle, last, depth);
894 if(lastsuffix != 0) {
895 /* Insert last type B* suffix. */
896 int PAi[2]; PAi[0] = PA[*(first - 1)], PAi[1] = n - 2;
897 for(a = first, i = *(first - 1);
898 (a < last) && ((*a < 0) || (0 < ss_compare(T, &(PAi[0]), PA + *a, depth)));
907 /*---------------------------------------------------------------------------*/
912 return (n & 0xffff0000) ?
914 24 + lg_table[(n >> 24) & 0xff] :
915 16 + lg_table[(n >> 16) & 0xff]) :
917 8 + lg_table[(n >> 8) & 0xff] :
918 0 + lg_table[(n >> 0) & 0xff]);
922 /*---------------------------------------------------------------------------*/
924 /* Simple insertionsort for small size groups. */
927 tr_insertionsort(const int *ISAd, int *first, int *last) {
931 for(a = first + 1; a < last; ++a) {
932 for(t = *a, b = a - 1; 0 > (r = ISAd[t] - ISAd[*b]);) {
933 do { *(b + 1) = *b; } while((first <= --b) && (*b < 0));
934 if(b < first) { break; }
936 if(r == 0) { *b = ~*b; }
942 /*---------------------------------------------------------------------------*/
946 tr_fixdown(const int *ISAd, int *SA, int i, int size) {
951 for(v = SA[i], c = ISAd[v]; (j = 2 * i + 1) < size; SA[i] = SA[k], i = k) {
952 d = ISAd[SA[k = j++]];
953 if(d < (e = ISAd[SA[j]])) { k = j; d = e; }
954 if(d <= c) { break; }
959 /* Simple top-down heapsort. */
962 tr_heapsort(const int *ISAd, int *SA, int size) {
967 if((size % 2) == 0) {
969 if(ISAd[SA[m / 2]] < ISAd[SA[m]]) { SWAP(SA[m], SA[m / 2]); }
972 for(i = m / 2 - 1; 0 <= i; --i) { tr_fixdown(ISAd, SA, i, m); }
973 if((size % 2) == 0) { SWAP(SA[0], SA[m]); tr_fixdown(ISAd, SA, 0, m); }
974 for(i = m - 1; 0 < i; --i) {
975 t = SA[0], SA[0] = SA[i];
976 tr_fixdown(ISAd, SA, 0, i);
982 /*---------------------------------------------------------------------------*/
984 /* Returns the median of three elements. */
987 tr_median3(const int *ISAd, int *v1, int *v2, int *v3) {
989 if(ISAd[*v1] > ISAd[*v2]) { SWAP(v1, v2); }
990 if(ISAd[*v2] > ISAd[*v3]) {
991 if(ISAd[*v1] > ISAd[*v3]) { return v1; }
997 /* Returns the median of five elements. */
1000 tr_median5(const int *ISAd,
1001 int *v1, int *v2, int *v3, int *v4, int *v5) {
1003 if(ISAd[*v2] > ISAd[*v3]) { SWAP(v2, v3); }
1004 if(ISAd[*v4] > ISAd[*v5]) { SWAP(v4, v5); }
1005 if(ISAd[*v2] > ISAd[*v4]) { SWAP(v2, v4); SWAP(v3, v5); }
1006 if(ISAd[*v1] > ISAd[*v3]) { SWAP(v1, v3); }
1007 if(ISAd[*v1] > ISAd[*v4]) { SWAP(v1, v4); SWAP(v3, v5); }
1008 if(ISAd[*v3] > ISAd[*v4]) { return v4; }
1012 /* Returns the pivot element. */
1015 tr_pivot(const int *ISAd, int *first, int *last) {
1020 middle = first + t / 2;
1024 return tr_median3(ISAd, first, middle, last - 1);
1027 return tr_median5(ISAd, first, first + t, middle, last - 1 - t, last - 1);
1031 first = tr_median3(ISAd, first, first + t, first + (t << 1));
1032 middle = tr_median3(ISAd, middle - t, middle, middle + t);
1033 last = tr_median3(ISAd, last - 1 - (t << 1), last - 1 - t, last - 1);
1034 return tr_median3(ISAd, first, middle, last);
1038 /*---------------------------------------------------------------------------*/
1040 typedef struct _trbudget_t trbudget_t;
1041 struct _trbudget_t {
1050 trbudget_init(trbudget_t *budget, int chance, int incval) {
1051 budget->chance = chance;
1052 budget->remain = budget->incval = incval;
1057 trbudget_check(trbudget_t *budget, int size) {
1058 if(size <= budget->remain) { budget->remain -= size; return 1; }
1059 if(budget->chance == 0) { budget->count += size; return 0; }
1060 budget->remain += budget->incval - size;
1061 budget->chance -= 1;
1066 /*---------------------------------------------------------------------------*/
1070 tr_partition(const int *ISAd,
1071 int *first, int *middle, int *last,
1072 int **pa, int **pb, int v) {
1073 int *a, *b, *c, *d, *e, *f;
1077 for(b = middle - 1; (++b < last) && ((x = ISAd[*b]) == v);) { }
1078 if(((a = b) < last) && (x < v)) {
1079 for(; (++b < last) && ((x = ISAd[*b]) <= v);) {
1080 if(x == v) { SWAP(*b, *a); ++a; }
1083 for(c = last; (b < --c) && ((x = ISAd[*c]) == v);) { }
1084 if((b < (d = c)) && (x > v)) {
1085 for(; (b < --c) && ((x = ISAd[*c]) >= v);) {
1086 if(x == v) { SWAP(*c, *d); --d; }
1091 for(; (++b < c) && ((x = ISAd[*b]) <= v);) {
1092 if(x == v) { SWAP(*b, *a); ++a; }
1094 for(; (b < --c) && ((x = ISAd[*c]) >= v);) {
1095 if(x == v) { SWAP(*c, *d); --d; }
1101 if((s = a - first) > (t = b - a)) { s = t; }
1102 for(e = first, f = b - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); }
1103 if((s = d - c) > (t = last - d - 1)) { s = t; }
1104 for(e = b, f = last - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); }
1105 first += (b - a), last -= (d - c);
1107 *pa = first, *pb = last;
1112 tr_copy(int *ISA, const int *SA,
1113 int *first, int *a, int *b, int *last,
1115 /* sort suffixes of middle partition
1116 by using sorted order of suffixes of left and right partition. */
1121 for(c = first, d = a - 1; c <= d; ++c) {
1122 if((0 <= (s = *c - depth)) && (ISA[s] == v)) {
1127 for(c = last - 1, e = d + 1, d = b; e < d; --c) {
1128 if((0 <= (s = *c - depth)) && (ISA[s] == v)) {
1137 tr_partialcopy(int *ISA, const int *SA,
1138 int *first, int *a, int *b, int *last,
1142 int rank, lastrank, newrank = -1;
1146 for(c = first, d = a - 1; c <= d; ++c) {
1147 if((0 <= (s = *c - depth)) && (ISA[s] == v)) {
1149 rank = ISA[s + depth];
1150 if(lastrank != rank) { lastrank = rank; newrank = d - SA; }
1156 for(e = d; first <= e; --e) {
1158 if(lastrank != rank) { lastrank = rank; newrank = e - SA; }
1159 if(newrank != rank) { ISA[*e] = newrank; }
1163 for(c = last - 1, e = d + 1, d = b; e < d; --c) {
1164 if((0 <= (s = *c - depth)) && (ISA[s] == v)) {
1166 rank = ISA[s + depth];
1167 if(lastrank != rank) { lastrank = rank; newrank = d - SA; }
1175 tr_introsort(int *ISA, const int *ISAd,
1176 int *SA, int *first, int *last,
1177 trbudget_t *budget) {
1178 #define STACK_SIZE TR_STACKSIZE
1179 struct { const int *a; int *b, *c; int d, e; }stack[STACK_SIZE];
1183 int incr = ISAd - ISA;
1185 int ssize, trlink = -1;
1187 for(ssize = 0, limit = tr_ilg(last - first);;) {
1191 /* tandem repeat partition */
1192 tr_partition(ISAd - incr, first, first, last, &a, &b, last - SA - 1);
1196 for(c = first, v = a - SA - 1; c < a; ++c) { ISA[*c] = v; }
1199 for(c = a, v = b - SA - 1; c < b; ++c) { ISA[*c] = v; }
1204 STACK_PUSH5(NULL, a, b, 0, 0);
1205 STACK_PUSH5(ISAd - incr, first, last, -2, trlink);
1208 if((a - first) <= (last - b)) {
1209 if(1 < (a - first)) {
1210 STACK_PUSH5(ISAd, b, last, tr_ilg(last - b), trlink);
1211 last = a, limit = tr_ilg(a - first);
1212 } else if(1 < (last - b)) {
1213 first = b, limit = tr_ilg(last - b);
1215 STACK_POP5(ISAd, first, last, limit, trlink);
1218 if(1 < (last - b)) {
1219 STACK_PUSH5(ISAd, first, a, tr_ilg(a - first), trlink);
1220 first = b, limit = tr_ilg(last - b);
1221 } else if(1 < (a - first)) {
1222 last = a, limit = tr_ilg(a - first);
1224 STACK_POP5(ISAd, first, last, limit, trlink);
1227 } else if(limit == -2) {
1228 /* tandem repeat copy */
1229 a = stack[--ssize].b, b = stack[ssize].c;
1230 if(stack[ssize].d == 0) {
1231 tr_copy(ISA, SA, first, a, b, last, ISAd - ISA);
1233 if(0 <= trlink) { stack[trlink].d = -1; }
1234 tr_partialcopy(ISA, SA, first, a, b, last, ISAd - ISA);
1236 STACK_POP5(ISAd, first, last, limit, trlink);
1238 /* sorted partition */
1241 do { ISA[*a] = a - SA; } while((++a < last) && (0 <= *a));
1245 a = first; do { *a = ~*a; } while(*++a < 0);
1246 next = (ISA[*a] != ISAd[*a]) ? tr_ilg(a - first + 1) : -1;
1247 if(++a < last) { for(b = first, v = a - SA - 1; b < a; ++b) { ISA[*b] = v; } }
1250 if(trbudget_check(budget, a - first)) {
1251 if((a - first) <= (last - a)) {
1252 STACK_PUSH5(ISAd, a, last, -3, trlink);
1253 ISAd += incr, last = a, limit = next;
1255 if(1 < (last - a)) {
1256 STACK_PUSH5(ISAd + incr, first, a, next, trlink);
1257 first = a, limit = -3;
1259 ISAd += incr, last = a, limit = next;
1263 if(0 <= trlink) { stack[trlink].d = -1; }
1264 if(1 < (last - a)) {
1265 first = a, limit = -3;
1267 STACK_POP5(ISAd, first, last, limit, trlink);
1271 STACK_POP5(ISAd, first, last, limit, trlink);
1277 if((last - first) <= TR_INSERTIONSORT_THRESHOLD) {
1278 tr_insertionsort(ISAd, first, last);
1284 tr_heapsort(ISAd, first, last - first);
1285 for(a = last - 1; first < a; a = b) {
1286 for(x = ISAd[*a], b = a - 1; (first <= b) && (ISAd[*b] == x); --b) { *b = ~*b; }
1293 a = tr_pivot(ISAd, first, last);
1298 tr_partition(ISAd, first, first + 1, last, &a, &b, v);
1299 if((last - first) != (b - a)) {
1300 next = (ISA[*a] != v) ? tr_ilg(b - a) : -1;
1303 for(c = first, v = a - SA - 1; c < a; ++c) { ISA[*c] = v; }
1304 if(b < last) { for(c = a, v = b - SA - 1; c < b; ++c) { ISA[*c] = v; } }
1307 if((1 < (b - a)) && (trbudget_check(budget, b - a))) {
1308 if((a - first) <= (last - b)) {
1309 if((last - b) <= (b - a)) {
1310 if(1 < (a - first)) {
1311 STACK_PUSH5(ISAd + incr, a, b, next, trlink);
1312 STACK_PUSH5(ISAd, b, last, limit, trlink);
1314 } else if(1 < (last - b)) {
1315 STACK_PUSH5(ISAd + incr, a, b, next, trlink);
1318 ISAd += incr, first = a, last = b, limit = next;
1320 } else if((a - first) <= (b - a)) {
1321 if(1 < (a - first)) {
1322 STACK_PUSH5(ISAd, b, last, limit, trlink);
1323 STACK_PUSH5(ISAd + incr, a, b, next, trlink);
1326 STACK_PUSH5(ISAd, b, last, limit, trlink);
1327 ISAd += incr, first = a, last = b, limit = next;
1330 STACK_PUSH5(ISAd, b, last, limit, trlink);
1331 STACK_PUSH5(ISAd, first, a, limit, trlink);
1332 ISAd += incr, first = a, last = b, limit = next;
1335 if((a - first) <= (b - a)) {
1336 if(1 < (last - b)) {
1337 STACK_PUSH5(ISAd + incr, a, b, next, trlink);
1338 STACK_PUSH5(ISAd, first, a, limit, trlink);
1340 } else if(1 < (a - first)) {
1341 STACK_PUSH5(ISAd + incr, a, b, next, trlink);
1344 ISAd += incr, first = a, last = b, limit = next;
1346 } else if((last - b) <= (b - a)) {
1347 if(1 < (last - b)) {
1348 STACK_PUSH5(ISAd, first, a, limit, trlink);
1349 STACK_PUSH5(ISAd + incr, a, b, next, trlink);
1352 STACK_PUSH5(ISAd, first, a, limit, trlink);
1353 ISAd += incr, first = a, last = b, limit = next;
1356 STACK_PUSH5(ISAd, first, a, limit, trlink);
1357 STACK_PUSH5(ISAd, b, last, limit, trlink);
1358 ISAd += incr, first = a, last = b, limit = next;
1362 if((1 < (b - a)) && (0 <= trlink)) { stack[trlink].d = -1; }
1363 if((a - first) <= (last - b)) {
1364 if(1 < (a - first)) {
1365 STACK_PUSH5(ISAd, b, last, limit, trlink);
1367 } else if(1 < (last - b)) {
1370 STACK_POP5(ISAd, first, last, limit, trlink);
1373 if(1 < (last - b)) {
1374 STACK_PUSH5(ISAd, first, a, limit, trlink);
1376 } else if(1 < (a - first)) {
1379 STACK_POP5(ISAd, first, last, limit, trlink);
1384 if(trbudget_check(budget, last - first)) {
1385 limit = tr_ilg(last - first), ISAd += incr;
1387 if(0 <= trlink) { stack[trlink].d = -1; }
1388 STACK_POP5(ISAd, first, last, limit, trlink);
1397 /*---------------------------------------------------------------------------*/
1399 /* Tandem repeat sort */
1402 trsort(int *ISA, int *SA, int n, int depth) {
1406 int t, skip, unsorted;
1408 trbudget_init(&budget, tr_ilg(n) * 2 / 3, n);
1409 /* trbudget_init(&budget, tr_ilg(n) * 3 / 4, n); */
1410 for(ISAd = ISA + depth; -n < *SA; ISAd += ISAd - ISA) {
1415 if((t = *first) < 0) { first -= t; skip += t; }
1417 if(skip != 0) { *(first + skip) = skip; skip = 0; }
1418 last = SA + ISA[t] + 1;
1419 if(1 < (last - first)) {
1421 tr_introsort(ISA, ISAd, SA, first, last, &budget);
1422 if(budget.count != 0) { unsorted += budget.count; }
1423 else { skip = first - last; }
1424 } else if((last - first) == 1) {
1429 } while(first < (SA + n));
1430 if(skip != 0) { *(first + skip) = skip; }
1431 if(unsorted == 0) { break; }
1436 /*---------------------------------------------------------------------------*/
1438 /* Sorts suffixes of type B*. */
1441 sort_typeBstar(const unsigned char *T, int *SA,
1442 int *bucket_A, int *bucket_B,
1443 int n, int openMP) {
1444 int *PAb, *ISAb, *buf;
1445 #ifdef LIBBSC_OPENMP
1449 int i, j, k, t, m, bufsize;
1451 #ifdef LIBBSC_OPENMP
1456 /* Initialize bucket arrays. */
1457 for(i = 0; i < BUCKET_A_SIZE; ++i) { bucket_A[i] = 0; }
1458 for(i = 0; i < BUCKET_B_SIZE; ++i) { bucket_B[i] = 0; }
1460 /* Count the number of occurrences of the first one or two characters of each
1461 type A, B and B* suffix. Moreover, store the beginning position of all
1462 type B* suffixes into the array SA. */
1463 for(i = n - 1, m = n, c0 = T[n - 1]; 0 <= i;) {
1464 /* type A suffix. */
1465 do { ++BUCKET_A(c1 = c0); } while((0 <= --i) && ((c0 = T[i]) >= c1));
1467 /* type B* suffix. */
1468 ++BUCKET_BSTAR(c0, c1);
1470 /* type B suffix. */
1471 for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) <= c1); --i, c1 = c0) {
1479 A type B* suffix is lexicographically smaller than a type B suffix that
1480 begins with the same first two characters.
1483 /* Calculate the index of start/end point of each bucket. */
1484 for(c0 = 0, i = 0, j = 0; c0 < ALPHABET_SIZE; ++c0) {
1485 t = i + BUCKET_A(c0);
1486 BUCKET_A(c0) = i + j; /* start point */
1487 i = t + BUCKET_B(c0, c0);
1488 for(c1 = c0 + 1; c1 < ALPHABET_SIZE; ++c1) {
1489 j += BUCKET_BSTAR(c0, c1);
1490 BUCKET_BSTAR(c0, c1) = j; /* end point */
1491 i += BUCKET_B(c0, c1);
1496 /* Sort the type B* suffixes by their first two characters. */
1497 PAb = SA + n - m; ISAb = SA + m;
1498 for(i = m - 2; 0 <= i; --i) {
1499 t = PAb[i], c0 = T[t], c1 = T[t + 1];
1500 SA[--BUCKET_BSTAR(c0, c1)] = i;
1502 t = PAb[m - 1], c0 = T[t], c1 = T[t + 1];
1503 SA[--BUCKET_BSTAR(c0, c1)] = m - 1;
1505 /* Sort the type B* substrings using sssort. */
1506 #ifdef LIBBSC_OPENMP
1510 c0 = ALPHABET_SIZE - 2, c1 = ALPHABET_SIZE - 1, j = m;
1511 #pragma omp parallel default(shared) private(bufsize, curbuf, k, l, d0, d1)
1513 bufsize = (n - (2 * m)) / omp_get_num_threads();
1514 curbuf = buf + omp_get_thread_num() * bufsize;
1517 #pragma omp critical(sssort_lock)
1522 k = BUCKET_BSTAR(d0, d1);
1524 d1 = ALPHABET_SIZE - 1;
1525 if(--d0 < 0) { break; }
1527 } while(((l - k) <= 1) && (0 < (l = k)));
1528 c0 = d0, c1 = d1, j = k;
1531 if(l == 0) { break; }
1532 sssort(T, PAb, SA + k, SA + l,
1533 curbuf, bufsize, 2, n, *(SA + k) == (m - 1));
1539 buf = SA + m, bufsize = n - (2 * m);
1540 for(c0 = ALPHABET_SIZE - 2, j = m; 0 < j; --c0) {
1541 for(c1 = ALPHABET_SIZE - 1; c0 < c1; j = i, --c1) {
1542 i = BUCKET_BSTAR(c0, c1);
1544 sssort(T, PAb, SA + i, SA + j,
1545 buf, bufsize, 2, n, *(SA + i) == (m - 1));
1551 buf = SA + m, bufsize = n - (2 * m);
1552 for(c0 = ALPHABET_SIZE - 2, j = m; 0 < j; --c0) {
1553 for(c1 = ALPHABET_SIZE - 1; c0 < c1; j = i, --c1) {
1554 i = BUCKET_BSTAR(c0, c1);
1556 sssort(T, PAb, SA + i, SA + j,
1557 buf, bufsize, 2, n, *(SA + i) == (m - 1));
1563 /* Compute ranks of type B* substrings. */
1564 for(i = m - 1; 0 <= i; --i) {
1567 do { ISAb[SA[i]] = i; } while((0 <= --i) && (0 <= SA[i]));
1569 if(i <= 0) { break; }
1572 do { ISAb[SA[i] = ~SA[i]] = j; } while(SA[--i] < 0);
1576 /* Construct the inverse suffix array of type B* suffixes using trsort. */
1577 trsort(ISAb, SA, m, 1);
1579 /* Set the sorted order of type B* suffixes. */
1580 for(i = n - 1, j = m, c0 = T[n - 1]; 0 <= i;) {
1581 for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) >= c1); --i, c1 = c0) { }
1584 for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) <= c1); --i, c1 = c0) { }
1585 SA[ISAb[--j]] = ((t == 0) || (1 < (t - i))) ? t : ~t;
1589 /* Calculate the index of start/end point of each bucket. */
1590 BUCKET_B(ALPHABET_SIZE - 1, ALPHABET_SIZE - 1) = n; /* end point */
1591 for(c0 = ALPHABET_SIZE - 2, k = m - 1; 0 <= c0; --c0) {
1592 i = BUCKET_A(c0 + 1) - 1;
1593 for(c1 = ALPHABET_SIZE - 1; c0 < c1; --c1) {
1594 t = i - BUCKET_B(c0, c1);
1595 BUCKET_B(c0, c1) = i; /* end point */
1597 /* Move all type B* suffixes to the correct position. */
1598 for(i = t, j = BUCKET_BSTAR(c0, c1);
1600 --i, --k) { SA[i] = SA[k]; }
1602 BUCKET_BSTAR(c0, c0 + 1) = i - BUCKET_B(c0, c0) + 1; /* start point */
1603 BUCKET_B(c0, c0) = i; /* end point */
1610 /* Constructs the suffix array by using the sorted order of type B* suffixes. */
1613 construct_SA(const unsigned char *T, int *SA,
1614 int *bucket_A, int *bucket_B,
1621 /* Construct the sorted order of type B suffixes by using
1622 the sorted order of type B* suffixes. */
1623 for(c1 = ALPHABET_SIZE - 2; 0 <= c1; --c1) {
1624 /* Scan the suffix array from right to left. */
1625 for(i = SA + BUCKET_BSTAR(c1, c1 + 1),
1626 j = SA + BUCKET_A(c1 + 1) - 1, k = NULL, c2 = -1;
1631 assert(((s + 1) < n) && (T[s] <= T[s + 1]));
1632 assert(T[s - 1] <= T[s]);
1635 if((0 < s) && (T[s - 1] > c0)) { s = ~s; }
1637 if(0 <= c2) { BUCKET_B(c2, c1) = k - SA; }
1638 k = SA + BUCKET_B(c2 = c0, c1);
1640 assert(k < j); assert(k != NULL);
1643 assert(((s == 0) && (T[s] == c1)) || (s < 0));
1650 /* Construct the suffix array by using
1651 the sorted order of type B suffixes. */
1652 k = SA + BUCKET_A(c2 = T[n - 1]);
1653 *k++ = (T[n - 2] < c2) ? ~(n - 1) : (n - 1);
1654 /* Scan the suffix array from left to right. */
1655 for(i = SA, j = SA + n; i < j; ++i) {
1657 assert(T[s - 1] >= T[s]);
1659 if((s == 0) || (T[s - 1] < c0)) { s = ~s; }
1661 BUCKET_A(c2) = k - SA;
1662 k = SA + BUCKET_A(c2 = c0);
1673 /* Constructs the burrows-wheeler transformed string directly
1674 by using the sorted order of type B* suffixes. */
1677 construct_BWT(const unsigned char *T, int *SA,
1678 int *bucket_A, int *bucket_B,
1680 int *i, *j, *k, *orig;
1685 /* Construct the sorted order of type B suffixes by using
1686 the sorted order of type B* suffixes. */
1687 for(c1 = ALPHABET_SIZE - 2; 0 <= c1; --c1) {
1688 /* Scan the suffix array from right to left. */
1689 for(i = SA + BUCKET_BSTAR(c1, c1 + 1),
1690 j = SA + BUCKET_A(c1 + 1) - 1, k = NULL, c2 = -1;
1695 assert(((s + 1) < n) && (T[s] <= T[s + 1]));
1696 assert(T[s - 1] <= T[s]);
1699 if((0 < s) && (T[s - 1] > c0)) { s = ~s; }
1701 if(0 <= c2) { BUCKET_B(c2, c1) = k - SA; }
1702 k = SA + BUCKET_B(c2 = c0, c1);
1704 assert(k < j); assert(k != NULL);
1717 /* Construct the BWTed string by using
1718 the sorted order of type B suffixes. */
1719 k = SA + BUCKET_A(c2 = T[n - 1]);
1720 *k++ = (T[n - 2] < c2) ? ~((int)T[n - 2]) : (n - 1);
1721 /* Scan the suffix array from left to right. */
1722 for(i = SA, j = SA + n, orig = SA; i < j; ++i) {
1724 assert(T[s - 1] >= T[s]);
1727 if((0 < s) && (T[s - 1] < c0)) { s = ~((int)T[s - 1]); }
1729 BUCKET_A(c2) = k - SA;
1730 k = SA + BUCKET_A(c2 = c0);
1744 /* Constructs the burrows-wheeler transformed string directly
1745 by using the sorted order of type B* suffixes. */
1748 construct_BWT_indexes(const unsigned char *T, int *SA,
1749 int *bucket_A, int *bucket_B,
1751 unsigned char * num_indexes, int * indexes) {
1752 int *i, *j, *k, *orig;
1758 mod |= mod >> 1; mod |= mod >> 2;
1759 mod |= mod >> 4; mod |= mod >> 8;
1760 mod |= mod >> 16; mod >>= 1;
1762 *num_indexes = (unsigned char)((n - 1) / (mod + 1));
1766 /* Construct the sorted order of type B suffixes by using
1767 the sorted order of type B* suffixes. */
1768 for(c1 = ALPHABET_SIZE - 2; 0 <= c1; --c1) {
1769 /* Scan the suffix array from right to left. */
1770 for(i = SA + BUCKET_BSTAR(c1, c1 + 1),
1771 j = SA + BUCKET_A(c1 + 1) - 1, k = NULL, c2 = -1;
1776 assert(((s + 1) < n) && (T[s] <= T[s + 1]));
1777 assert(T[s - 1] <= T[s]);
1779 if ((s & mod) == 0) indexes[s / (mod + 1) - 1] = j - SA;
1783 if((0 < s) && (T[s - 1] > c0)) { s = ~s; }
1785 if(0 <= c2) { BUCKET_B(c2, c1) = k - SA; }
1786 k = SA + BUCKET_B(c2 = c0, c1);
1788 assert(k < j); assert(k != NULL);
1801 /* Construct the BWTed string by using
1802 the sorted order of type B suffixes. */
1803 k = SA + BUCKET_A(c2 = T[n - 1]);
1804 if (T[n - 2] < c2) {
1805 if (((n - 1) & mod) == 0) indexes[(n - 1) / (mod + 1) - 1] = k - SA;
1806 *k++ = ~((int)T[n - 2]);
1812 /* Scan the suffix array from left to right. */
1813 for(i = SA, j = SA + n, orig = SA; i < j; ++i) {
1815 assert(T[s - 1] >= T[s]);
1817 if ((s & mod) == 0) indexes[s / (mod + 1) - 1] = i - SA;
1822 BUCKET_A(c2) = k - SA;
1823 k = SA + BUCKET_A(c2 = c0);
1826 if((0 < s) && (T[s - 1] < c0)) {
1827 if ((s & mod) == 0) indexes[s / (mod + 1) - 1] = k - SA;
1828 *k++ = ~((int)T[s - 1]);
1842 /*---------------------------------------------------------------------------*/
1847 divsufsort(const unsigned char *T, int *SA, int n, int openMP) {
1848 int *bucket_A, *bucket_B;
1852 /* Check arguments. */
1853 if((T == NULL) || (SA == NULL) || (n < 0)) { return -1; }
1854 else if(n == 0) { return 0; }
1855 else if(n == 1) { SA[0] = 0; return 0; }
1856 else if(n == 2) { m = (T[0] < T[1]); SA[m ^ 1] = 0, SA[m] = 1; return 0; }
1858 bucket_A = (int *)malloc(BUCKET_A_SIZE * sizeof(int));
1859 bucket_B = (int *)malloc(BUCKET_B_SIZE * sizeof(int));
1862 if((bucket_A != NULL) && (bucket_B != NULL)) {
1863 m = sort_typeBstar(T, SA, bucket_A, bucket_B, n, openMP);
1864 construct_SA(T, SA, bucket_A, bucket_B, n, m);
1876 divbwt(const unsigned char *T, unsigned char *U, int *A, int n, unsigned char * num_indexes, int * indexes, int openMP) {
1878 int *bucket_A, *bucket_B;
1881 /* Check arguments. */
1882 if((T == NULL) || (U == NULL) || (n < 0)) { return -1; }
1883 else if(n <= 1) { if(n == 1) { U[0] = T[0]; } return n; }
1885 if((B = A) == NULL) { B = (int *)malloc((size_t)(n + 1) * sizeof(int)); }
1886 bucket_A = (int *)malloc(BUCKET_A_SIZE * sizeof(int));
1887 bucket_B = (int *)malloc(BUCKET_B_SIZE * sizeof(int));
1889 /* Burrows-Wheeler Transform. */
1890 if((B != NULL) && (bucket_A != NULL) && (bucket_B != NULL)) {
1891 m = sort_typeBstar(T, B, bucket_A, bucket_B, n, openMP);
1893 if (num_indexes == NULL || indexes == NULL) {
1894 pidx = construct_BWT(T, B, bucket_A, bucket_B, n, m);
1896 pidx = construct_BWT_indexes(T, B, bucket_A, bucket_B, n, m, num_indexes, indexes);
1899 /* Copy to output string. */
1901 for(i = 0; i < pidx; ++i) { U[i + 1] = (unsigned char)B[i]; }
1902 for(i += 1; i < n; ++i) { U[i] = (unsigned char)B[i]; }
1910 if(A == NULL) { free(B); }