sais.h 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604
  1. /*
  2. * sais.hxx for sais-lite
  3. * Copyright (c) 2008-2010 Yuta Mori All Rights Reserved.
  4. *
  5. * Permission is hereby granted, free of charge, to any person
  6. * obtaining a copy of this software and associated documentation
  7. * files (the "Software"), to deal in the Software without
  8. * restriction, including without limitation the rights to use,
  9. * copy, modify, merge, publish, distribute, sublicense, and/or sell
  10. * copies of the Software, and to permit persons to whom the
  11. * Software is furnished to do so, subject to the following
  12. * conditions:
  13. *
  14. * The above copyright notice and this permission notice shall be
  15. * included in all copies or substantial portions of the Software.
  16. *
  17. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  18. * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
  19. * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  20. * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
  21. * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
  22. * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  23. * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  24. * OTHER DEALINGS IN THE SOFTWARE.
  25. */
  26. #ifndef _SAIS_HXX
  27. #define _SAIS_HXX 1
  28. #ifdef __cplusplus
  29. #include <cassert>
  30. #include <iterator>
  31. #include <limits>
  32. #ifdef __INTEL_COMPILER
  33. #pragma warning(disable : 383 981 1418)
  34. #endif
  35. #ifdef _MSC_VER
  36. #pragma warning(push)
  37. #pragma warning(disable : 4365)
  38. #endif
  39. namespace saisxx_private {
  40. /* find the start or end of each bucket */
  41. template<typename string_type, typename bucket_type, typename index_type>
  42. void
  43. getCounts(const string_type T, bucket_type C, index_type n, index_type k) {
  44. index_type i;
  45. for(i = 0; i < k; ++i) { C[i] = 0; }
  46. for(i = 0; i < n; ++i) { ++C[T[i]]; }
  47. }
  48. template<typename bucketC_type, typename bucketB_type, typename index_type>
  49. void
  50. getBuckets(const bucketC_type C, bucketB_type B, index_type k, bool end) {
  51. index_type i, sum = 0;
  52. if(end != false) { for(i = 0; i < k; ++i) { sum += C[i]; B[i] = sum; } }
  53. else { for(i = 0; i < k; ++i) { sum += C[i]; B[i] = sum - C[i]; } }
  54. }
  55. template<typename string_type, typename sarray_type,
  56. typename bucketC_type, typename bucketB_type, typename index_type>
  57. void
  58. LMSsort1(string_type T, sarray_type SA,
  59. bucketC_type C, bucketB_type B,
  60. index_type n, index_type k, bool recount) {
  61. typedef typename std::iterator_traits<string_type>::value_type char_type;
  62. typedef typename std::iterator_traits<bucketB_type>::value_type bucket_type;
  63. sarray_type b;
  64. index_type i, j;
  65. char_type c0, c1;
  66. /* compute SAl */
  67. if(recount != false) { getCounts(T, C, n, k); }
  68. getBuckets(C, B, k, false); /* find starts of buckets */
  69. j = n - 1;
  70. b = SA + B[c1 = T[j]];
  71. --j;
  72. *b++ = (T[j] < c1) ? ~j : j;
  73. for(i = 0; i < n; ++i) {
  74. if(0 < (j = SA[i])) {
  75. assert(T[j] >= T[j + 1]);
  76. if((c0 = T[j]) != c1) { B[c1] = bucket_type(b - SA); b = SA + B[c1 = c0]; }
  77. assert(i < (b - SA));
  78. --j;
  79. *b++ = (T[j] < c1) ? ~j : j;
  80. SA[i] = 0;
  81. } else if(j < 0) {
  82. SA[i] = ~j;
  83. }
  84. }
  85. /* compute SAs */
  86. if(recount != false) { getCounts(T, C, n, k); }
  87. getBuckets(C, B, k, true); /* find ends of buckets */
  88. for(i = n - 1, b = SA + B[c1 = 0]; 0 <= i; --i) {
  89. if(0 < (j = SA[i])) {
  90. assert(T[j] <= T[j + 1]);
  91. if((c0 = T[j]) != c1) { B[c1] = bucket_type(b - SA); b = SA + B[c1 = c0]; }
  92. assert((b - SA) <= i);
  93. --j;
  94. *--b = (T[j] > c1) ? ~(j + 1) : j;
  95. SA[i] = 0;
  96. }
  97. }
  98. }
  99. template<typename string_type, typename sarray_type, typename index_type>
  100. index_type
  101. LMSpostproc1(string_type T, sarray_type SA, index_type n, index_type m) {
  102. typedef typename std::iterator_traits<string_type>::value_type char_type;
  103. index_type i, j, p, q, plen, qlen, name;
  104. char_type c0, c1;
  105. bool diff;
  106. /* compact all the sorted substrings into the first m items of SA
  107. 2*m must be not larger than n (proveable) */
  108. assert(0 < n);
  109. for(i = 0; (p = SA[i]) < 0; ++i) { SA[i] = ~p; assert((i + 1) < n); }
  110. if(i < m) {
  111. for(j = i, ++i;; ++i) {
  112. assert(i < n);
  113. if((p = SA[i]) < 0) {
  114. SA[j++] = ~p; SA[i] = 0;
  115. if(j == m) { break; }
  116. }
  117. }
  118. }
  119. /* store the length of all substrings */
  120. i = n - 1; j = n - 1; c0 = T[n - 1];
  121. do { c1 = c0; } while((0 <= --i) && ((c0 = T[i]) >= c1));
  122. for(; 0 <= i;) {
  123. do { c1 = c0; } while((0 <= --i) && ((c0 = T[i]) <= c1));
  124. if(0 <= i) {
  125. SA[m + ((i + 1) >> 1)] = j - i; j = i + 1;
  126. do { c1 = c0; } while((0 <= --i) && ((c0 = T[i]) >= c1));
  127. }
  128. }
  129. /* find the lexicographic names of all substrings */
  130. for(i = 0, name = 0, q = n, qlen = 0; i < m; ++i) {
  131. p = SA[i], plen = SA[m + (p >> 1)], diff = true;
  132. if((plen == qlen) && ((q + plen) < n)) {
  133. for(j = 0; (j < plen) && (T[p + j] == T[q + j]); ++j) { }
  134. if(j == plen) { diff = false; }
  135. }
  136. if(diff != false) { ++name, q = p, qlen = plen; }
  137. SA[m + (p >> 1)] = name;
  138. }
  139. return name;
  140. }
  141. template<typename string_type, typename sarray_type,
  142. typename bucketC_type, typename bucketB_type, typename bucketD_type,
  143. typename index_type>
  144. void
  145. LMSsort2(string_type T, sarray_type SA,
  146. bucketC_type C, bucketB_type B, bucketD_type D,
  147. index_type n, index_type k) {
  148. typedef typename std::iterator_traits<string_type>::value_type char_type;
  149. typedef typename std::iterator_traits<bucketB_type>::value_type bucket_type;
  150. sarray_type b;
  151. index_type i, j, t, d;
  152. char_type c0, c1;
  153. /* compute SAl */
  154. getBuckets(C, B, k, false); /* find starts of buckets */
  155. j = n - 1;
  156. b = SA + B[c1 = T[j]];
  157. --j;
  158. t = (T[j] < c1);
  159. j += n;
  160. *b++ = (t & 1) ? ~j : j;
  161. for(i = 0, d = 0; i < n; ++i) {
  162. if(0 < (j = SA[i])) {
  163. if(n <= j) { d += 1; j -= n; }
  164. assert(T[j] >= T[j + 1]);
  165. if((c0 = T[j]) != c1) { B[c1] = bucket_type(b - SA); b = SA + B[c1 = c0]; }
  166. assert(i < (b - SA));
  167. --j;
  168. t = c0; t = (t << 1) | (T[j] < c1);
  169. if(D[t] != d) { j += n; D[t] = d; }
  170. *b++ = (t & 1) ? ~j : j;
  171. SA[i] = 0;
  172. } else if(j < 0) {
  173. SA[i] = ~j;
  174. }
  175. }
  176. for(i = n - 1; 0 <= i; --i) {
  177. if(0 < SA[i]) {
  178. if(SA[i] < n) {
  179. SA[i] += n;
  180. for(j = i - 1; SA[j] < n; --j) { }
  181. SA[j] -= n;
  182. i = j;
  183. }
  184. }
  185. }
  186. /* compute SAs */
  187. getBuckets(C, B, k, true); /* find ends of buckets */
  188. for(i = n - 1, d += 1, b = SA + B[c1 = 0]; 0 <= i; --i) {
  189. if(0 < (j = SA[i])) {
  190. if(n <= j) { d += 1; j -= n; }
  191. assert(T[j] <= T[j + 1]);
  192. if((c0 = T[j]) != c1) { B[c1] = bucket_type(b - SA); b = SA + B[c1 = c0]; }
  193. assert((b - SA) <= i);
  194. --j;
  195. t = c0; t = (t << 1) | (T[j] > c1);
  196. if(D[t] != d) { j += n; D[t] = d; }
  197. *--b = (t & 1) ? ~(j + 1) : j;
  198. SA[i] = 0;
  199. }
  200. }
  201. }
  202. template<typename sarray_type, typename index_type>
  203. index_type
  204. LMSpostproc2(sarray_type SA, index_type n, index_type m) {
  205. index_type i, j, d, name;
  206. /* compact all the sorted LMS substrings into the first m items of SA */
  207. assert(0 < n);
  208. for(i = 0, name = 0; (j = SA[i]) < 0; ++i) {
  209. j = ~j;
  210. if(n <= j) { name += 1; }
  211. SA[i] = j;
  212. assert((i + 1) < n);
  213. }
  214. if(i < m) {
  215. for(d = i, ++i;; ++i) {
  216. assert(i < n);
  217. if((j = SA[i]) < 0) {
  218. j = ~j;
  219. if(n <= j) { name += 1; }
  220. SA[d++] = j; SA[i] = 0;
  221. if(d == m) { break; }
  222. }
  223. }
  224. }
  225. if(name < m) {
  226. /* store the lexicographic names */
  227. for(i = m - 1, d = name + 1; 0 <= i; --i) {
  228. if(n <= (j = SA[i])) { j -= n; --d; }
  229. SA[m + (j >> 1)] = d;
  230. }
  231. } else {
  232. /* unset flags */
  233. for(i = 0; i < m; ++i) {
  234. if(n <= (j = SA[i])) { j -= n; SA[i] = j; }
  235. }
  236. }
  237. return name;
  238. }
  239. /* compute SA and BWT */
  240. template<typename string_type, typename sarray_type,
  241. typename bucketC_type, typename bucketB_type, typename index_type>
  242. void
  243. induceSA(string_type T, sarray_type SA, bucketC_type C, bucketB_type B,
  244. index_type n, index_type k, bool recount) {
  245. typedef typename std::iterator_traits<string_type>::value_type char_type;
  246. typedef typename std::iterator_traits<bucketB_type>::value_type bucket_type;
  247. sarray_type b;
  248. index_type i, j;
  249. char_type c0, c1;
  250. /* compute SAl */
  251. if(recount != false) { getCounts(T, C, n, k); }
  252. getBuckets(C, B, k, false); /* find starts of buckets */
  253. b = SA + B[c1 = T[j = n - 1]];
  254. *b++ = ((0 < j) && (T[j - 1] < c1)) ? ~j : j;
  255. for(i = 0; i < n; ++i) {
  256. j = SA[i], SA[i] = ~j;
  257. if(0 < j) {
  258. if((c0 = T[--j]) != c1) { B[c1] = bucket_type(b - SA); b = SA + B[c1 = c0]; }
  259. *b++ = ((0 < j) && (T[j - 1] < c1)) ? ~j : j;
  260. }
  261. }
  262. /* compute SAs */
  263. if(recount != false) { getCounts(T, C, n, k); }
  264. getBuckets(C, B, k, true); /* find ends of buckets */
  265. for(i = n - 1, b = SA + B[c1 = 0]; 0 <= i; --i) {
  266. if(0 < (j = SA[i])) {
  267. if((c0 = T[--j]) != c1) { B[c1] = bucket_type(b - SA); b = SA + B[c1 = c0]; }
  268. *--b = ((j == 0) || (T[j - 1] > c1)) ? ~j : j;
  269. } else {
  270. SA[i] = ~j;
  271. }
  272. }
  273. }
  274. template<typename string_type, typename sarray_type,
  275. typename bucketC_type, typename bucketB_type, typename index_type>
  276. index_type
  277. computeBWT(string_type T, sarray_type SA, bucketC_type C, bucketB_type B,
  278. index_type n, index_type k, bool recount) {
  279. typedef typename std::iterator_traits<string_type>::value_type char_type;
  280. typedef typename std::iterator_traits<bucketB_type>::value_type bucket_type;
  281. sarray_type b;
  282. index_type i, j, pidx = -1;
  283. char_type c0, c1;
  284. /* compute SAl */
  285. if(recount != false) { getCounts(T, C, n, k); }
  286. getBuckets(C, B, k, false); /* find starts of buckets */
  287. b = SA + B[c1 = T[j = n - 1]];
  288. *b++ = ((0 < j) && (T[j - 1] < c1)) ? ~j : j;
  289. for(i = 0; i < n; ++i) {
  290. if(0 < (j = SA[i])) {
  291. SA[i] = ~((index_type)(c0 = T[--j]));
  292. if(c0 != c1) { B[c1] = bucket_type(b - SA); b = SA + B[c1 = c0]; }
  293. *b++ = ((0 < j) && (T[j - 1] < c1)) ? ~j : j;
  294. } else if(j != 0) {
  295. SA[i] = ~j;
  296. }
  297. }
  298. /* compute SAs */
  299. if(recount != false) { getCounts(T, C, n, k); }
  300. getBuckets(C, B, k, true); /* find ends of buckets */
  301. for(i = n - 1, b = SA + B[c1 = 0]; 0 <= i; --i) {
  302. if(0 < (j = SA[i])) {
  303. SA[i] = (c0 = T[--j]);
  304. if(c0 != c1) { B[c1] = bucket_type(b - SA); b = SA + B[c1 = c0]; }
  305. *--b = ((0 < j) && (T[j - 1] > c1)) ? ~((index_type)T[j - 1]) : j;
  306. } else if(j != 0) {
  307. SA[i] = ~j;
  308. } else {
  309. pidx = i;
  310. }
  311. }
  312. return pidx;
  313. }
  314. template<typename string_type, typename sarray_type,
  315. typename bucketC_type, typename bucketB_type,
  316. typename index_type>
  317. std::pair<index_type, index_type>
  318. stage1sort(string_type T, sarray_type SA,
  319. bucketC_type C, bucketB_type B,
  320. index_type n, index_type k, unsigned flags) {
  321. typedef typename std::iterator_traits<string_type>::value_type char_type;
  322. sarray_type b;
  323. index_type i, j, name, m;
  324. char_type c0, c1;
  325. getCounts(T, C, n, k); getBuckets(C, B, k, true); /* find ends of buckets */
  326. for(i = 0; i < n; ++i) { SA[i] = 0; }
  327. b = SA + n - 1; i = n - 1; j = n; m = 0; c0 = T[n - 1];
  328. do { c1 = c0; } while((0 <= --i) && ((c0 = T[i]) >= c1));
  329. for(; 0 <= i;) {
  330. do { c1 = c0; } while((0 <= --i) && ((c0 = T[i]) <= c1));
  331. if(0 <= i) {
  332. *b = j; b = SA + --B[c1]; j = i; ++m; assert(B[c1] != (n - 1));
  333. do { c1 = c0; } while((0 <= --i) && ((c0 = T[i]) >= c1));
  334. }
  335. }
  336. SA[n - 1] = 0;
  337. if(1 < m) {
  338. if(flags & (16 | 32)) {
  339. assert((j + 1) < n);
  340. ++B[T[j + 1]];
  341. if(flags & 16) {
  342. index_type *D;
  343. try { D = new index_type[k * 2]; } catch(...) { D = 0; }
  344. if(D == 0) { return std::make_pair(-2, -2); }
  345. for(i = 0, j = 0; i < k; ++i) {
  346. j += C[i];
  347. if(B[i] != j) { assert(SA[B[i]] != 0); SA[B[i]] += n; }
  348. D[i] = D[i + k] = 0;
  349. }
  350. LMSsort2(T, SA, C, B, D, n, k);
  351. delete[] D;
  352. } else {
  353. bucketB_type D = B - k * 2;
  354. for(i = 0, j = 0; i < k; ++i) {
  355. j += C[i];
  356. if(B[i] != j) { assert(SA[B[i]] != 0); SA[B[i]] += n; }
  357. D[i] = D[i + k] = 0;
  358. }
  359. LMSsort2(T, SA, C, B, D, n, k);
  360. }
  361. name = LMSpostproc2(SA, n, m);
  362. } else {
  363. LMSsort1(T, SA, C, B, n, k, (flags & (4 | 64)) != 0);
  364. name = LMSpostproc1(T, SA, n, m);
  365. }
  366. } else if(m == 1) {
  367. *b = j + 1;
  368. name = 1;
  369. } else {
  370. name = 0;
  371. }
  372. return std::make_pair(m, name);
  373. }
  374. template<typename string_type, typename sarray_type,
  375. typename bucketC_type, typename bucketB_type, typename index_type>
  376. index_type
  377. stage3sort(string_type T, sarray_type SA, bucketC_type C, bucketB_type B,
  378. index_type n, index_type m, index_type k,
  379. unsigned flags, bool isbwt) {
  380. typedef typename std::iterator_traits<string_type>::value_type char_type;
  381. index_type i, j, p, q, pidx = 0;
  382. char_type c0, c1;
  383. if((flags & 8) != 0) { getCounts(T, C, n, k); }
  384. /* put all left-most S characters into their buckets */
  385. if(1 < m) {
  386. getBuckets(C, B, k, 1); /* find ends of buckets */
  387. i = m - 1, j = n, p = SA[m - 1], c1 = T[p];
  388. do {
  389. q = B[c0 = c1];
  390. while(q < j) { SA[--j] = 0; }
  391. do {
  392. SA[--j] = p;
  393. if(--i < 0) { break; }
  394. p = SA[i];
  395. } while((c1 = T[p]) == c0);
  396. } while(0 <= i);
  397. while(0 < j) { SA[--j] = 0; }
  398. }
  399. if(isbwt == false) { induceSA(T, SA, C, B, n, k, (flags & (4 | 64)) != 0); }
  400. else { pidx = computeBWT(T, SA, C, B, n, k, (flags & (4 | 64)) != 0); }
  401. return pidx;
  402. }
  403. /* find the suffix array SA of T[0..n-1] in {0..k}^n
  404. use a working space (excluding s and SA) of at most 2n+O(1) for a constant alphabet */
  405. template<typename string_type, typename sarray_type, typename index_type>
  406. index_type
  407. suffixsort(string_type T, sarray_type SA,
  408. index_type fs, index_type n, index_type k,
  409. bool isbwt) {
  410. typedef typename std::iterator_traits<string_type>::value_type char_type;
  411. sarray_type RA, C, B;
  412. index_type *Cp, *Bp;
  413. index_type i, j, m, name, pidx, newfs;
  414. unsigned flags = 0;
  415. char_type c0, c1;
  416. /* stage 1: reduce the problem by at least 1/2
  417. sort all the S-substrings */
  418. C = B = SA; /* for warnings */
  419. Cp = 0, Bp = 0;
  420. if(k <= 256) {
  421. try { Cp = new index_type[k]; } catch(...) { Cp = 0; }
  422. if(Cp == 0) { return -2; }
  423. if(k <= fs) {
  424. B = SA + (n + fs - k);
  425. flags = 1;
  426. } else {
  427. try { Bp = new index_type[k]; } catch(...) { Bp = 0; }
  428. if(Bp == 0) { return -2; }
  429. flags = 3;
  430. }
  431. } else if(k <= fs) {
  432. C = SA + (n + fs - k);
  433. if(k <= (fs - k)) {
  434. B = C - k;
  435. flags = 0;
  436. } else if(k <= 1024) {
  437. try { Bp = new index_type[k]; } catch(...) { Bp = 0; }
  438. if(Bp == 0) { return -2; }
  439. flags = 2;
  440. } else {
  441. B = C;
  442. flags = 64 | 8;
  443. }
  444. } else {
  445. try { Cp = new index_type[k]; } catch(...) { Cp = 0; }
  446. if(Cp == 0) { return -2; }
  447. Bp = Cp;
  448. flags = 4 | 8;
  449. }
  450. if((n <= ((std::numeric_limits<index_type>::max)() / 2)) && (2 <= (n / k))) {
  451. if(flags & 1) { flags |= ((k * 2) <= (fs - k)) ? 32 : 16; }
  452. else if((flags == 0) && ((k * 2) <= (fs - k * 2))) { flags |= 32; }
  453. }
  454. {
  455. std::pair<index_type, index_type> r;
  456. if(Cp != 0) {
  457. if(Bp != 0) { r = stage1sort(T, SA, Cp, Bp, n, k, flags); }
  458. else { r = stage1sort(T, SA, Cp, B, n, k, flags); }
  459. } else {
  460. if(Bp != 0) { r = stage1sort(T, SA, C, Bp, n, k, flags); }
  461. else { r = stage1sort(T, SA, C, B, n, k, flags); }
  462. }
  463. m = r.first, name = r.second;
  464. }
  465. if(m < 0) {
  466. if(flags & (1 | 4)) { delete[] Cp; }
  467. if(flags & 2) { delete[] Bp; }
  468. return -2;
  469. }
  470. /* stage 2: solve the reduced problem
  471. recurse if names are not yet unique */
  472. if(name < m) {
  473. if(flags & 4) { delete[] Cp; }
  474. if(flags & 2) { delete[] Bp; }
  475. newfs = (n + fs) - (m * 2);
  476. if((flags & (1 | 4 | 64)) == 0) {
  477. if((k + name) <= newfs) { newfs -= k; }
  478. else { flags |= 8; }
  479. }
  480. assert((n >> 1) <= (newfs + m));
  481. RA = SA + m + newfs;
  482. for(i = m + (n >> 1) - 1, j = m - 1; m <= i; --i) {
  483. if(SA[i] != 0) { RA[j--] = SA[i] - 1; }
  484. }
  485. if(suffixsort(RA, SA, newfs, m, name, false) != 0) { if(flags & 1) { delete[] Cp; } return -2; }
  486. i = n - 1; j = m - 1; c0 = T[n - 1];
  487. do { c1 = c0; } while((0 <= --i) && ((c0 = T[i]) >= c1));
  488. for(; 0 <= i;) {
  489. do { c1 = c0; } while((0 <= --i) && ((c0 = T[i]) <= c1));
  490. if(0 <= i) {
  491. RA[j--] = i + 1;
  492. do { c1 = c0; } while((0 <= --i) && ((c0 = T[i]) >= c1));
  493. }
  494. }
  495. for(i = 0; i < m; ++i) { SA[i] = RA[SA[i]]; }
  496. if(flags & 4) {
  497. try { Cp = new index_type[k]; } catch(...) { Cp = 0; }
  498. if(Cp == 0) { return -2; }
  499. Bp = Cp;
  500. }
  501. if(flags & 2) {
  502. try { Bp = new index_type[k]; } catch(...) { Bp = 0; }
  503. if(Bp == 0) { if(flags & 1) { delete[] Cp; } return -2; }
  504. }
  505. }
  506. /* stage 3: induce the result for the original problem */
  507. if(Cp != 0) {
  508. if(Bp != 0) { pidx = stage3sort(T, SA, Cp, Bp, n, m, k, flags, isbwt); }
  509. else { pidx = stage3sort(T, SA, Cp, B, n, m, k, flags, isbwt); }
  510. } else {
  511. if(Bp != 0) { pidx = stage3sort(T, SA, C, Bp, n, m, k, flags, isbwt); }
  512. else { pidx = stage3sort(T, SA, C, B, n, m, k, flags, isbwt); }
  513. }
  514. if(flags & (1 | 4)) { delete[] Cp; }
  515. if(flags & 2) { delete[] Bp; }
  516. return pidx;
  517. }
  518. } /* namespace saisxx_private */
  519. /**
  520. * @brief Constructs the suffix array of a given string in linear time.
  521. * @param T[0..n-1] The input string. (random access iterator)
  522. * @param SA[0..n-1] The output array of suffixes. (random access iterator)
  523. * @param n The length of the given string.
  524. * @param k The alphabet size.
  525. * @return 0 if no error occurred, -1 or -2 otherwise.
  526. */
  527. template<typename string_type, typename sarray_type, typename index_type>
  528. index_type
  529. saisxx(string_type T, sarray_type SA, index_type n, index_type k = 256) {
  530. typedef typename std::iterator_traits<sarray_type>::value_type savalue_type;
  531. assert((std::numeric_limits<index_type>::min)() < 0);
  532. assert((std::numeric_limits<savalue_type>::min)() < 0);
  533. assert((std::numeric_limits<savalue_type>::max)() == (std::numeric_limits<index_type>::max)());
  534. assert((std::numeric_limits<savalue_type>::min)() == (std::numeric_limits<index_type>::min)());
  535. if((n < 0) || (k <= 0)) { return -1; }
  536. if(n <= 1) { if(n == 1) { SA[0] = 0; } return 0; }
  537. return saisxx_private::suffixsort(T, SA, 0, n, k, false);
  538. }
  539. /**
  540. * @brief Constructs the burrows-wheeler transformed string of a given string in linear time.
  541. * @param T[0..n-1] The input string. (random access iterator)
  542. * @param U[0..n-1] The output string. (random access iterator)
  543. * @param A[0..n-1] The temporary array. (random access iterator)
  544. * @param n The length of the given string.
  545. * @param k The alphabet size.
  546. * @return The primary index if no error occurred, -1 or -2 otherwise.
  547. */
  548. template<typename string_type, typename string_type2, typename sarray_type, typename index_type>
  549. index_type
  550. saisxx_bwt(string_type T, string_type2 U, sarray_type A, index_type n, index_type k = 256) {
  551. typedef typename std::iterator_traits<sarray_type>::value_type savalue_type;
  552. typedef typename std::iterator_traits<string_type>::value_type char_type;
  553. index_type i, pidx;
  554. assert((std::numeric_limits<index_type>::min)() < 0);
  555. assert((std::numeric_limits<savalue_type>::min)() < 0);
  556. assert((std::numeric_limits<savalue_type>::max)() == (std::numeric_limits<index_type>::max)());
  557. assert((std::numeric_limits<savalue_type>::min)() == (std::numeric_limits<index_type>::min)());
  558. if((n < 0) || (k <= 0)) { return -1; }
  559. if(n <= 1) { if(n == 1) { U[0] = T[0]; } return n; }
  560. pidx = saisxx_private::suffixsort(T, A, index_type(0), n, k, true);
  561. if(0 <= pidx) {
  562. U[index_type(0)] = T[n - 1];
  563. for(i = 0; i < pidx; ++i) { U[i + 1] = (char_type)A[i]; }
  564. for(i += 1; i < n; ++i) { U[i] = (char_type)A[i]; }
  565. pidx += 1;
  566. }
  567. return pidx;
  568. }
  569. #ifdef _MSC_VER
  570. #pragma warning(pop)
  571. #endif
  572. #endif /* __cplusplus */
  573. #endif /* _SAIS_HXX */