SDSL 3.0.3
Succinct Data Structure Library
Loading...
Searching...
No Matches
wt_int.hpp
Go to the documentation of this file.
1// Copyright (c) 2016, the SDSL Project Authors. All rights reserved.
2// Please see the AUTHORS file for details. Use of this source code is governed
3// by a BSD license that can be found in the LICENSE file.
10#ifndef INCLUDED_SDSL_INT_WAVELET_TREE
11#define INCLUDED_SDSL_INT_WAVELET_TREE
12
13#include <algorithm> // for std::swap
14#include <array>
15#include <assert.h>
16#include <cstdint>
17#include <functional>
18#include <iterator>
19#include <ostream>
20#include <stddef.h>
21#include <stdexcept>
22#include <string>
23#include <tuple>
24#include <type_traits>
25#include <utility>
26#include <vector>
27
28#include <sdsl/bits.hpp>
29#include <sdsl/cereal.hpp>
30#include <sdsl/int_vector.hpp>
32#include <sdsl/io.hpp>
33#include <sdsl/iterators.hpp>
34#include <sdsl/ram_fs.hpp>
36#include <sdsl/sfstream.hpp>
38#include <sdsl/util.hpp>
39#include <sdsl/wt_helper.hpp>
40
42namespace sdsl
43{
44
46
57template <class t_bitvector = bit_vector,
58 class t_rank = typename t_bitvector::rank_1_type,
59 class t_select = typename t_bitvector::select_1_type,
60 class t_select_zero = typename t_bitvector::select_0_type>
61class wt_int
62{
63public:
66 typedef typename t_bitvector::difference_type difference_type;
69 typedef t_bitvector bit_vector_type;
70 typedef t_rank rank_1_type;
71 typedef t_select select_1_type;
72 typedef t_select_zero select_0_type;
75 enum
76 {
77 lex_ordered = 1
78 };
79
80 typedef std::pair<value_type, size_type> point_type;
81 typedef std::vector<point_type> point_vec_type;
82 typedef std::pair<size_type, point_vec_type> r2d_res_type;
83
84protected:
86 size_type m_sigma = 0; //<- \f$ |\Sigma| \f$
87 bit_vector_type m_tree; // bit vector to store the wavelet tree
88 rank_1_type m_tree_rank; // rank support for the wavelet tree bit vector
89 select_1_type m_tree_select1; // select support for the wavelet tree bit vector
91 uint32_t m_max_level = 0;
92
93private:
94 // recursive internal version of the method interval_symbols
95 void _interval_symbols(size_type i,
96 size_type j,
97 size_type & k,
98 std::vector<value_type> & cs,
99 std::vector<size_type> & rank_c_i,
100 std::vector<size_type> & rank_c_j,
101 size_type level,
103 size_type node_size,
104 size_type offset) const
105 {
106 // invariant: j>i
107
108 if (level >= m_max_level)
109 {
110 rank_c_i[k] = i;
111 rank_c_j[k] = j;
112 cs[k++] = path;
113 return;
114 }
115
116 size_type ones_before_o = m_tree_rank(offset);
117 size_type ones_before_i = m_tree_rank(offset + i) - ones_before_o;
118 size_type ones_before_j = m_tree_rank(offset + j) - ones_before_o;
119 size_type ones_before_end = m_tree_rank(offset + node_size) - ones_before_o;
120
121 // goto left child
122 if ((j - i) - (ones_before_j - ones_before_i) > 0)
123 {
124 size_type new_offset = offset + m_size;
125 size_type new_node_size = node_size - ones_before_end;
126 size_type new_i = i - ones_before_i;
127 size_type new_j = j - ones_before_j;
128 _interval_symbols(new_i, new_j, k, cs, rank_c_i, rank_c_j, level + 1, path << 1, new_node_size, new_offset);
129 }
130
131 // goto right child
132 if ((ones_before_j - ones_before_i) > 0)
133 {
134 size_type new_offset = offset + (node_size - ones_before_end) + m_size;
135 size_type new_node_size = ones_before_end;
136 size_type new_i = ones_before_i;
137 size_type new_j = ones_before_j;
138 _interval_symbols(new_i,
139 new_j,
140 k,
141 cs,
142 rank_c_i,
143 rank_c_j,
144 level + 1,
145 (path << 1) | 1,
146 new_node_size,
147 new_offset);
148 }
149 }
150
151public:
154 uint32_t const & max_level = m_max_level;
155
157 wt_int() = default;
158
160
168 template <typename t_it>
169 wt_int(t_it begin, t_it end, std::string tmp_dir = ram_file_name("")) : m_size(std::distance(begin, end))
170 {
171 if (0 == m_size)
172 return;
173 m_sigma = 0;
174
175 value_type max_elem = 1; // variable for the biggest value in rac
176 for (auto it = begin; it != end; ++it)
177 {
178 value_type value = *it;
179 if (value > max_elem)
180 max_elem = value;
181 }
182 m_max_level = bits::hi(max_elem) + 1;
183 std::string tree_out_buf_file_name = tmp_file(tmp_dir, "_m_tree");
184 {
186 std::copy(begin, end, rac.begin());
187
188 // buffer for elements in the right node
189 int_vector_buffer<> buf1(tmp_file(tmp_dir, "_wt_constr_buf"), std::ios::out, 10 * (1 << 20), m_max_level);
190 osfstream tree_out_buf(tree_out_buf_file_name, std::ios::binary | std::ios::trunc | std::ios::out);
191
192 size_type bit_size = m_size * m_max_level;
193 int_vector<1>::write_header(bit_size, 1, tree_out_buf); // write bv header
194
195 size_type tree_pos = 0;
196 uint64_t tree_word = 0;
197
198 uint64_t mask_old = 1ULL << (m_max_level);
199 for (uint32_t k = 0; k < m_max_level; ++k)
200 {
201 size_type start = 0;
202 const uint64_t mask_new = 1ULL << (m_max_level - k - 1);
203 do
204 {
205 size_type i = start;
206 size_type cnt0 = 0;
207 size_type cnt1 = 0;
208 uint64_t start_value = (rac[i] & mask_old);
209 uint64_t x;
210 while (i < m_size and ((x = rac[i]) & mask_old) == start_value)
211 {
212 if (x & mask_new)
213 {
214 tree_word |= (1ULL << (tree_pos & 0x3FULL));
215 buf1[cnt1++] = x;
216 }
217 else
218 {
219 rac[start + cnt0++] = x;
220 }
221 ++tree_pos;
222 if ((tree_pos & 0x3FULL) == 0)
223 { // if tree_pos % 64 == 0 write old word
224 tree_out_buf.write((char *)&tree_word, sizeof(tree_word));
225 tree_word = 0;
226 }
227 ++i;
228 }
229 if (k + 1 < m_max_level)
230 { // inner node
231 for (size_type j = 0; j < cnt1; ++j)
232 {
233 rac[start + cnt0 + j] = buf1[j];
234 }
235 }
236 else
237 { // leaf node
238 m_sigma += (cnt0 > 0) + (cnt1 > 0); // increase sigma for each leaf
239 }
240 start += cnt0 + cnt1;
241 }
242 while (start < m_size);
243 mask_old += mask_new;
244 }
245 if ((tree_pos & 0x3FULL) != 0)
246 { // if tree_pos % 64 > 0 => there are remaining entries we have to write
247 tree_out_buf.write((char *)&tree_word, sizeof(tree_word));
248 }
249 buf1.close(true); // remove temporary file
250 tree_out_buf.close();
251 } // destruct rac
252
254 load_from_file(tree, tree_out_buf_file_name);
255 sdsl::remove(tree_out_buf_file_name);
256 m_tree = bit_vector_type(std::move(tree));
257 util::init_support(m_tree_rank, &m_tree);
258 util::init_support(m_tree_select0, &m_tree);
259 util::init_support(m_tree_select1, &m_tree);
260 }
261
263 wt_int(wt_int const & wt)
264 {
265 m_size = wt.m_size;
266 m_sigma = wt.m_sigma;
267 m_tree = wt.m_tree;
269 m_tree_rank.set_vector(&m_tree);
271 m_tree_select1.set_vector(&m_tree);
273 m_tree_select0.set_vector(&m_tree);
275 }
276
278 wt_int(wt_int && wt) :
279 m_size(wt.m_size),
280 m_sigma(wt.m_sigma),
281 m_tree(std::move(wt.m_tree)),
282 m_tree_rank(std::move(wt.m_tree_rank)),
283 m_tree_select1(std::move(wt.m_tree_select1)),
284 m_tree_select0(std::move(wt.m_tree_select0)),
286 {
287 m_tree_rank.set_vector(&m_tree);
288 m_tree_select1.set_vector(&m_tree);
289 m_tree_select0.set_vector(&m_tree);
290 }
291
293 wt_int & operator=(wt_int const & wt)
294 {
295 if (this != &wt)
296 {
297 wt_int tmp(wt); // re-use copy-constructor
298 *this = std::move(tmp); // re-use move-assignment
299 }
300 return *this;
301 }
302
305 {
306 if (this != &wt)
307 {
308 m_size = wt.m_size;
309 m_sigma = wt.m_sigma;
310 m_tree = std::move(wt.m_tree);
311 m_tree_rank = std::move(wt.m_tree_rank);
312 m_tree_rank.set_vector(&m_tree);
313 m_tree_select1 = std::move(wt.m_tree_select1);
314 m_tree_select1.set_vector(&m_tree);
315 m_tree_select0 = std::move(wt.m_tree_select0);
316 m_tree_select0.set_vector(&m_tree);
317 m_max_level = std::move(wt.m_max_level);
318 }
319 return *this;
320 }
321
324 {
325 return m_size;
326 }
327
329 bool empty() const
330 {
331 return m_size == 0;
332 }
333
335
341 {
342 assert(i < size());
343 size_type offset = 0;
344 value_type res = 0;
345 size_type node_size = m_size;
346 for (uint32_t k = 0; k < m_max_level; ++k)
347 {
348 res <<= 1;
349 size_type ones_before_o = m_tree_rank(offset);
350 size_type ones_before_i = m_tree_rank(offset + i) - ones_before_o;
351 size_type ones_before_end = m_tree_rank(offset + node_size) - ones_before_o;
352 if (m_tree[offset + i])
353 { // one at position i => follow right child
354 offset += (node_size - ones_before_end);
355 node_size = ones_before_end;
356 i = ones_before_i;
357 res |= 1;
358 }
359 else
360 { // zero at position i => follow left child
361 node_size = (node_size - ones_before_end);
362 i = (i - ones_before_i);
363 }
364 offset += m_size;
365 }
366 return res;
367 };
368
370
380 {
381 assert(i <= size());
382 if (((1ULL) << (m_max_level)) <= c)
383 { // c is greater than any symbol in wt
384 return 0;
385 }
386 size_type offset = 0;
387 uint64_t mask = (1ULL) << (m_max_level - 1);
388 size_type node_size = m_size;
389 for (uint32_t k = 0; k < m_max_level and i; ++k)
390 {
391 size_type ones_before_o = m_tree_rank(offset);
392 size_type ones_before_i = m_tree_rank(offset + i) - ones_before_o;
393 size_type ones_before_end = m_tree_rank(offset + node_size) - ones_before_o;
394 if (c & mask)
395 { // search for a one at this level
396 offset += (node_size - ones_before_end);
397 node_size = ones_before_end;
398 i = ones_before_i;
399 }
400 else
401 { // search for a zero at this level
402 node_size = (node_size - ones_before_end);
403 i = (i - ones_before_i);
404 }
405 offset += m_size;
406 mask >>= 1;
407 }
408 return i;
409 };
410
412
418 std::pair<size_type, value_type> inverse_select(size_type i) const
419 {
420 assert(i < size());
421
422 value_type c = 0;
423 size_type node_size = m_size, offset = 0;
424 for (uint32_t k = 0; k < m_max_level; ++k)
425 {
426 size_type ones_before_o = m_tree_rank(offset);
427 size_type ones_before_i = m_tree_rank(offset + i) - ones_before_o;
428 size_type ones_before_end = m_tree_rank(offset + node_size) - ones_before_o;
429 c <<= 1;
430 if (m_tree[offset + i])
431 { // go to the right child
432 offset += (node_size - ones_before_end);
433 node_size = ones_before_end;
434 i = ones_before_i;
435 c |= 1;
436 }
437 else
438 { // go to the left child
439 node_size = (node_size - ones_before_end);
440 i = (i - ones_before_i);
441 }
442 offset += m_size;
443 }
444 return std::make_pair(i, c);
445 }
446
448
457 {
458 assert(1 <= i and i <= rank(size(), c));
459 // possible optimization: if the array is a permutation we can start at the bottom of the tree
460 size_type offset = 0;
461 uint64_t mask = (1ULL) << (m_max_level - 1);
462 size_type node_size = m_size;
463 int_vector<64> m_path_off(max_level + 1);
464 int_vector<64> m_path_rank_off(max_level + 1);
465 m_path_off[0] = m_path_rank_off[0] = 0;
466
467 for (uint32_t k = 0; k < m_max_level and node_size; ++k)
468 {
469 size_type ones_before_o = m_tree_rank(offset);
470 m_path_rank_off[k] = ones_before_o;
471 size_type ones_before_end = m_tree_rank(offset + node_size) - ones_before_o;
472 if (c & mask)
473 { // search for a one at this level
474 offset += (node_size - ones_before_end);
475 node_size = ones_before_end;
476 }
477 else
478 { // search for a zero at this level
479 node_size = (node_size - ones_before_end);
480 }
481 offset += m_size;
482 m_path_off[k + 1] = offset;
483 mask >>= 1;
484 }
485 if (0ULL == node_size or node_size < i)
486 {
487 throw std::logic_error("select(" + util::to_string(i) + "," + util::to_string(c)
488 + "): c does not occur i times in the WT");
489 return m_size;
490 }
491 mask = 1ULL;
492 for (uint32_t k = m_max_level; k > 0; --k)
493 {
494 offset = m_path_off[k - 1];
495 size_type ones_before_o = m_path_rank_off[k - 1];
496 if (c & mask)
497 { // right child => search i'th
498 i = m_tree_select1(ones_before_o + i) - offset + 1;
499 }
500 else
501 { // left child => search i'th zero
502 i = m_tree_select0(offset - ones_before_o + i) - offset + 1;
503 }
504 mask <<= 1;
505 }
506 return i - 1;
507 };
508
510
531 size_type j,
532 size_type & k,
533 std::vector<value_type> & cs,
534 std::vector<size_type> & rank_c_i,
535 std::vector<size_type> & rank_c_j) const
536 {
537 assert(i <= j and j <= size());
538 k = 0;
539 if (i == j)
540 {
541 return;
542 }
543 if ((i + 1) == j)
544 {
545 auto res = inverse_select(i);
546 cs[0] = res.second;
547 rank_c_i[0] = res.first;
548 rank_c_j[0] = res.first + 1;
549 k = 1;
550 return;
551 }
552
553 _interval_symbols(i, j, k, cs, rank_c_i, rank_c_j, 0, 0, m_size, 0);
554 }
555
557
569 template <class t_ret_type = std::tuple<size_type, size_type, size_type>>
570 t_ret_type lex_count(size_type i, size_type j, value_type c) const
571 {
572 assert(i <= j and j <= size());
573 if (((1ULL) << (m_max_level)) <= c)
574 { // c is greater than any symbol in wt
575 return t_ret_type{0, j - i, 0};
576 }
577 size_type offset = 0;
578 size_type smaller = 0;
579 size_type greater = 0;
580 uint64_t mask = (1ULL) << (m_max_level - 1);
581 size_type node_size = m_size;
582 for (uint32_t k = 0; k < m_max_level; ++k)
583 {
584 size_type ones_before_o = m_tree_rank(offset);
585 size_type ones_before_i = m_tree_rank(offset + i) - ones_before_o;
586 size_type ones_before_j = m_tree_rank(offset + j) - ones_before_o;
587 size_type ones_before_end = m_tree_rank(offset + node_size) - ones_before_o;
588 if (c & mask)
589 { // search for a one at this level
590 offset += (node_size - ones_before_end);
591 node_size = ones_before_end;
592 smaller += j - i - ones_before_j + ones_before_i;
593 i = ones_before_i;
594 j = ones_before_j;
595 }
596 else
597 { // search for a zero at this level
598 node_size -= ones_before_end;
599 greater += ones_before_j - ones_before_i;
600 i -= ones_before_i;
601 j -= ones_before_j;
602 }
603 offset += m_size;
604 mask >>= 1;
605 }
606 return t_ret_type{i, smaller, greater};
607 }
608
610
619 template <class t_ret_type = std::tuple<size_type, size_type>>
621 {
622 assert(i <= size());
623 if (((1ULL) << (m_max_level)) <= c)
624 { // c is greater than any symbol in wt
625 return t_ret_type{0, i};
626 }
627 size_type offset = 0;
628 size_type result = 0;
629 uint64_t mask = (1ULL) << (m_max_level - 1);
630 size_type node_size = m_size;
631 for (uint32_t k = 0; k < m_max_level and i; ++k)
632 {
633 size_type ones_before_o = m_tree_rank(offset);
634 size_type ones_before_i = m_tree_rank(offset + i) - ones_before_o;
635 size_type ones_before_end = m_tree_rank(offset + node_size) - ones_before_o;
636 if (c & mask)
637 { // search for a one at this level
638 offset += (node_size - ones_before_end);
639 node_size = ones_before_end;
640 result += i - ones_before_i;
641 i = ones_before_i;
642 }
643 else
644 { // search for a zero at this level
645 node_size = (node_size - ones_before_end);
646 i -= ones_before_i;
647 }
648 offset += m_size;
649 mask >>= 1;
650 }
651 return t_ret_type{i, result};
652 }
653
655
663 std::pair<size_type, std::vector<std::pair<value_type, size_type>>>
664 range_search_2d(size_type lb, size_type rb, value_type vlb, value_type vrb, bool report = true) const
665 {
666 std::vector<size_type> offsets(m_max_level + 1);
667 std::vector<size_type> ones_before_os(m_max_level + 1);
668 offsets[0] = 0;
669 if (vrb > (1ULL << m_max_level))
670 vrb = (1ULL << m_max_level);
671 if (vlb > vrb)
672 return make_pair(0, point_vec_type());
673 size_type cnt_answers = 0;
674 point_vec_type point_vec;
675 _range_search_2d(lb, rb, vlb, vrb, 0, 0, m_size, offsets, ones_before_os, 0, point_vec, report, cnt_answers);
676 return make_pair(cnt_answers, point_vec);
677 }
678
680 size_type rb,
681 value_type vlb,
682 value_type vrb,
683 size_type level,
684 size_type ilb,
685 size_type node_size,
686 std::vector<size_type> & offsets,
687 std::vector<size_type> & ones_before_os,
689 point_vec_type & point_vec,
690 bool report,
691 size_type & cnt_answers) const
692 {
693 if (lb > rb)
694 return;
695 if (level == m_max_level)
696 {
697 if (report)
698 {
699 for (size_type j = lb + 1; j <= rb + 1; ++j)
700 {
701 size_type i = j;
702 size_type c = path;
703 for (uint32_t k = m_max_level; k > 0; --k)
704 {
705 size_type offset = offsets[k - 1];
706 size_type ones_before_o = ones_before_os[k - 1];
707 if (c & 1)
708 {
709 i = m_tree_select1(ones_before_o + i) - offset + 1;
710 }
711 else
712 {
713 i = m_tree_select0(offset - ones_before_o + i) - offset + 1;
714 }
715 c >>= 1;
716 }
717 point_vec.emplace_back(i - 1, path);
718 }
719 }
720 cnt_answers += rb - lb + 1;
721 return;
722 }
723 size_type irb = ilb + (1ULL << (m_max_level - level));
724 size_type mid = (irb + ilb) >> 1;
725
726 size_type offset = offsets[level];
727
728 size_type ones_before_o = m_tree_rank(offset);
729 ones_before_os[level] = ones_before_o;
730 size_type ones_before_lb = m_tree_rank(offset + lb);
731 size_type ones_before_rb = m_tree_rank(offset + rb + 1);
732 size_type ones_before_end = m_tree_rank(offset + node_size);
733 size_type zeros_before_o = offset - ones_before_o;
734 size_type zeros_before_lb = offset + lb - ones_before_lb;
735 size_type zeros_before_rb = offset + rb + 1 - ones_before_rb;
736 size_type zeros_before_end = offset + node_size - ones_before_end;
737 if (vlb < mid and mid)
738 {
739 size_type nlb = zeros_before_lb - zeros_before_o;
740 size_type nrb = zeros_before_rb - zeros_before_o;
741 offsets[level + 1] = offset + m_size;
742 if (nrb)
744 nrb - 1,
745 vlb,
746 std::min(vrb, mid - 1),
747 level + 1,
748 ilb,
749 zeros_before_end - zeros_before_o,
750 offsets,
751 ones_before_os,
752 path << 1,
753 point_vec,
754 report,
755 cnt_answers);
756 }
757 if (vrb >= mid)
758 {
759 size_type nlb = ones_before_lb - ones_before_o;
760 size_type nrb = ones_before_rb - ones_before_o;
761 offsets[level + 1] = offset + m_size + (zeros_before_end - zeros_before_o);
762 if (nrb)
764 nrb - 1,
765 std::max(mid, vlb),
766 vrb,
767 level + 1,
768 mid,
769 ones_before_end - ones_before_o,
770 offsets,
771 ones_before_os,
772 (path << 1) + 1,
773 point_vec,
774 report,
775 cnt_answers);
776 }
777 }
778
781 {
782 return const_iterator(this, 0);
783 }
784
787 {
788 return const_iterator(this, size());
789 }
790
792 size_type serialize(std::ostream & out, structure_tree_node * v = nullptr, std::string name = "") const
793 {
794 structure_tree_node * child = structure_tree::add_child(v, name, util::class_name(*this));
795 size_type written_bytes = 0;
796 written_bytes += write_member(m_size, out, child, "size");
797 written_bytes += write_member(m_sigma, out, child, "sigma");
798 written_bytes += m_tree.serialize(out, child, "tree");
799 written_bytes += m_tree_rank.serialize(out, child, "tree_rank");
800 written_bytes += m_tree_select1.serialize(out, child, "tree_select_1");
801 written_bytes += m_tree_select0.serialize(out, child, "tree_select_0");
802 written_bytes += write_member(m_max_level, out, child, "max_level");
803 structure_tree::add_size(child, written_bytes);
804 return written_bytes;
805 }
806
808 void load(std::istream & in)
809 {
810 read_member(m_size, in);
811 read_member(m_sigma, in);
812 m_tree.load(in);
813 m_tree_rank.load(in, &m_tree);
814 m_tree_select1.load(in, &m_tree);
815 m_tree_select0.load(in, &m_tree);
817 }
818
819 template <typename archive_t>
820 void CEREAL_SAVE_FUNCTION_NAME(archive_t & ar) const
821 {
822 ar(CEREAL_NVP(m_size));
823 ar(CEREAL_NVP(m_sigma));
825 ar(CEREAL_NVP(m_tree));
829 }
830
831 template <typename archive_t>
832 void CEREAL_LOAD_FUNCTION_NAME(archive_t & ar)
833 {
834 ar(CEREAL_NVP(m_size));
835 ar(CEREAL_NVP(m_sigma));
837 ar(CEREAL_NVP(m_tree));
839 m_tree_rank.set_vector(&m_tree);
841 m_tree_select1.set_vector(&m_tree);
843 m_tree_select0.set_vector(&m_tree);
844 }
845
847 bool operator==(wt_int const & other) const noexcept
848 {
849 return (m_size == other.m_size) && (m_sigma == other.m_sigma) && (m_tree == other.m_tree)
850 && (m_tree_rank == other.m_tree_rank) && (m_tree_select1 == other.m_tree_select1)
851 && (m_tree_select0 == other.m_tree_select0) && (m_max_level == other.m_max_level);
852 }
853
855 bool operator!=(wt_int const & other) const noexcept
856 {
857 return !(*this == other);
858 }
859
862 {
867
868 // Default constructor
869 node_type(size_type o = 0, size_type sz = 0, size_type l = 0, value_type sy = 0) :
870 offset(o),
871 size(sz),
872 level(l),
873 sym(sy)
874 {}
875
876 // Copy constructor
877 node_type(node_type const &) = default;
878
879 // Move copy constructor
880 node_type(node_type &&) = default;
881
882 // Assignment operator
883 node_type & operator=(node_type const &) = default;
884
885 // Move assignment operator
887
888 // Comparator operator
889 bool operator==(node_type const & v) const
890 {
891 return offset == v.offset;
892 }
893
894 // Smaller operator
895 bool operator<(node_type const & v) const
896 {
897 return offset < v.offset;
898 }
899
900 // Greater operator
901 bool operator>(node_type const & v) const
902 {
903 return offset > v.offset;
904 }
905 };
906
908 bool is_leaf(node_type const & v) const
909 {
910 return v.level == m_max_level;
911 }
912
914 value_type sym(node_type const & v) const
915 {
916 return v.sym;
917 }
918
921 {
923 }
924
926 auto seq(node_type const & v) const -> random_access_container<std::function<value_type(size_type)>>
927 {
928 return random_access_container<std::function<value_type(size_type)>>(
929 [&v, this](size_type i)
930 {
931 node_type vv = v;
932 while (!is_leaf(vv))
933 {
934 auto vs = expand(vv);
935 auto rs = expand(vv, range_type{{0, i}});
936 bool bit = *(begin(vv) + i);
937 i = std::get<1>(rs[bit]);
938 vv = vs[bit];
939 }
940 return sym(vv);
941 },
942 size(v));
943 }
944
946 bool empty(node_type const & v) const
947 {
948 return v.size == (size_type)0;
949 }
950
952 auto size(node_type const & v) const -> decltype(v.size)
953 {
954 return v.size;
955 }
956
959 {
960 return node_type(0, m_size, 0, 0);
961 }
962
964
968 std::array<node_type, 2> expand(node_type const & v) const
969 {
970 node_type v_right = v;
971 return expand(std::move(v_right));
972 }
973
975
979 std::array<node_type, 2> expand(node_type && v) const
980 {
981 node_type v_left;
982 size_type offset_rank = m_tree_rank(v.offset);
983 size_type ones = m_tree_rank(v.offset + v.size) - offset_rank;
984
985 v_left.offset = v.offset + m_size;
986 v_left.size = v.size - ones;
987 v_left.level = v.level + 1;
988 v_left.sym = v.sym << 1;
989
990 v.offset = v.offset + m_size + v_left.size;
991 v.size = ones;
992 v.level = v.level + 1;
993 v.sym = (v.sym << 1) | 1;
994
995 return {{std::move(v_left), v}};
996 }
997
999
1008 std::array<range_vec_type, 2> expand(node_type const & v, range_vec_type const & ranges) const
1009 {
1010 auto ranges_copy = ranges;
1011 return expand(v, std::move(ranges_copy));
1012 }
1013
1015
1024 std::array<range_vec_type, 2> expand(node_type const & v, range_vec_type && ranges) const
1025 {
1026 auto v_sp_rank = m_tree_rank(v.offset); // this is already calculated in expand(v)
1027 range_vec_type res(ranges.size());
1028 size_t i = 0;
1029 for (auto & r : ranges)
1030 {
1031 auto sp_rank = m_tree_rank(v.offset + r[0]);
1032 auto right_size = m_tree_rank(v.offset + r[1] + 1) - sp_rank;
1033 auto left_size = (r[1] - r[0] + 1) - right_size;
1034
1035 auto right_sp = sp_rank - v_sp_rank;
1036 auto left_sp = r[0] - right_sp;
1037
1038 r = {{left_sp, left_sp + left_size - 1}};
1039 res[i++] = {{right_sp, right_sp + right_size - 1}};
1040 }
1041 return {{ranges, std::move(res)}};
1042 }
1043
1045
1054 std::array<range_type, 2> expand(node_type const & v, range_type const & r) const
1055 {
1056 auto v_sp_rank = m_tree_rank(v.offset); // this is already calculated in expand(v)
1057 auto sp_rank = m_tree_rank(v.offset + r[0]);
1058 auto right_size = m_tree_rank(v.offset + r[1] + 1) - sp_rank;
1059 auto left_size = (r[1] - r[0] + 1) - right_size;
1060
1061 auto right_sp = sp_rank - v_sp_rank;
1062 auto left_sp = r[0] - right_sp;
1063
1064 return {{{{left_sp, left_sp + left_size - 1}}, {{right_sp, right_sp + right_size - 1}}}};
1065 }
1066
1068 std::pair<uint64_t, uint64_t> path(value_type c) const
1069 {
1070 return {m_max_level, c};
1071 }
1072
1073private:
1075 auto begin(node_type const & v) const -> decltype(m_tree.begin() + v.offset)
1076 {
1077 return m_tree.begin() + v.offset;
1078 }
1079
1081 auto end(node_type const & v) const -> decltype(m_tree.begin() + v.offset + v.size)
1082 {
1083 return m_tree.begin() + v.offset + v.size;
1084 }
1085};
1086
1087} // end namespace sdsl
1088#endif
bits.hpp contains the sdsl::bits class.
cereal.hpp offers cereal support
#define CEREAL_NVP(X)
Definition cereal.hpp:31
void close(bool remove_file=false)
Close the int_vector_buffer.
A generic vector class for integers of width .
static uint64_t write_header(uint64_t size, uint8_t int_width, std::ostream &out)
Write the size and int_width of a int_vector.
iterator begin() noexcept
Iterator that points to the first element of the int_vector.
void close()
Close the stream.
Definition sfstream.hpp:91
Generic iterator for a random access container.
Definition iterators.hpp:24
static structure_tree_node * add_child(structure_tree_node *v, std::string const &name, std::string const &type)
static void add_size(structure_tree_node *v, uint64_t value)
A wavelet tree class for integer sequences.
Definition wt_int.hpp:62
value_type operator[](size_type i) const
Recovers the i-th symbol of the original vector.
Definition wt_int.hpp:340
wt_tag index_category
Definition wt_int.hpp:73
bool operator!=(wt_int const &other) const noexcept
Inequality operator.
Definition wt_int.hpp:855
bool operator==(wt_int const &other) const noexcept
Equality operator.
Definition wt_int.hpp:847
select_0_type m_tree_select0
Definition wt_int.hpp:90
uint32_t const & max_level
Maximal level of the wavelet tree.
Definition wt_int.hpp:154
std::array< node_type, 2 > expand(node_type &&v) const
Returns the two child nodes of an inner node.
Definition wt_int.hpp:979
node_type root() const
Return the root node.
Definition wt_int.hpp:958
wt_int & operator=(wt_int &&wt)
Assignment move operator.
Definition wt_int.hpp:304
void CEREAL_SAVE_FUNCTION_NAME(archive_t &ar) const
Definition wt_int.hpp:820
wt_int(wt_int const &wt)
Copy constructor.
Definition wt_int.hpp:263
size_type serialize(std::ostream &out, structure_tree_node *v=nullptr, std::string name="") const
Serializes the data structure into the given ostream.
Definition wt_int.hpp:792
size_type size() const
Returns the size of the original vector.
Definition wt_int.hpp:323
std::pair< size_type, point_vec_type > r2d_res_type
Definition wt_int.hpp:82
bool is_leaf(node_type const &v) const
Checks if the node is a leaf node.
Definition wt_int.hpp:908
auto seq(node_type const &v) const -> random_access_container< std::function< value_type(size_type)> >
Random access container to sequence of node v.
Definition wt_int.hpp:926
t_select_zero select_0_type
Definition wt_int.hpp:72
uint32_t m_max_level
Definition wt_int.hpp:91
std::pair< value_type, size_type > point_type
Definition wt_int.hpp:80
wt_int()=default
Default constructor.
bit_vector_type const & tree
A concatenation of all bit vectors of the wavelet tree.
Definition wt_int.hpp:153
size_type m_size
Definition wt_int.hpp:85
int_vector ::value_type value_type
Definition wt_int.hpp:65
const_iterator iterator
Definition wt_int.hpp:68
std::pair< size_type, std::vector< std::pair< value_type, size_type > > > range_search_2d(size_type lb, size_type rb, value_type vlb, value_type vrb, bool report=true) const
range_search_2d searches points in the index interval [lb..rb] and value interval [vlb....
Definition wt_int.hpp:664
std::array< range_vec_type, 2 > expand(node_type const &v, range_vec_type &&ranges) const
Returns for each range its left and right child ranges.
Definition wt_int.hpp:1024
t_rank rank_1_type
Definition wt_int.hpp:70
t_bitvector bit_vector_type
Definition wt_int.hpp:69
auto bit_vec(node_type const &v) const -> node_bv_container< t_bitvector >
Random access container to bitvector of node v.
Definition wt_int.hpp:920
rank_1_type m_tree_rank
Definition wt_int.hpp:88
size_type m_sigma
Definition wt_int.hpp:86
std::array< range_vec_type, 2 > expand(node_type const &v, range_vec_type const &ranges) const
Returns for each range its left and right child ranges.
Definition wt_int.hpp:1008
void interval_symbols(size_type i, size_type j, size_type &k, std::vector< value_type > &cs, std::vector< size_type > &rank_c_i, std::vector< size_type > &rank_c_j) const
For each symbol c in wt[i..j-1] get rank(i,c) and rank(j,c).
Definition wt_int.hpp:530
std::array< range_type, 2 > expand(node_type const &v, range_type const &r) const
Returns for a range its left and right child ranges.
Definition wt_int.hpp:1054
bool empty(node_type const &v) const
Indicates if node v is empty.
Definition wt_int.hpp:946
wt_int & operator=(wt_int const &wt)
Assignment operator.
Definition wt_int.hpp:293
random_access_const_iterator< wt_int > const_iterator
Definition wt_int.hpp:67
wt_int(t_it begin, t_it end, std::string tmp_dir=ram_file_name(""))
Construct the wavelet tree from a sequence defined by two interators.
Definition wt_int.hpp:169
int_alphabet_tag alphabet_category
Definition wt_int.hpp:74
size_type const & sigma
Effective alphabet size of the wavelet tree.
Definition wt_int.hpp:152
wt_int(wt_int &&wt)
Move constructor.
Definition wt_int.hpp:278
std::vector< point_type > point_vec_type
Definition wt_int.hpp:81
size_type select(size_type i, value_type c) const
Calculates the i-th occurrence of the symbol c in the supported vector.
Definition wt_int.hpp:456
int_vector ::size_type size_type
Definition wt_int.hpp:64
t_select select_1_type
Definition wt_int.hpp:71
value_type sym(node_type const &v) const
Returns the symbol of leaf node v.
Definition wt_int.hpp:914
void _range_search_2d(size_type lb, size_type rb, value_type vlb, value_type vrb, size_type level, size_type ilb, size_type node_size, std::vector< size_type > &offsets, std::vector< size_type > &ones_before_os, size_type path, point_vec_type &point_vec, bool report, size_type &cnt_answers) const
Definition wt_int.hpp:679
std::array< node_type, 2 > expand(node_type const &v) const
Returns the two child nodes of an inner node.
Definition wt_int.hpp:968
void CEREAL_LOAD_FUNCTION_NAME(archive_t &ar)
Definition wt_int.hpp:832
size_type rank(size_type i, value_type c) const
Calculates how many symbols c are in the prefix [0..i-1] of the supported vector.
Definition wt_int.hpp:379
const_iterator end() const
Returns a const_iterator to the element after the last element.
Definition wt_int.hpp:786
auto size(node_type const &v) const -> decltype(v.size)
Return the size of node v.
Definition wt_int.hpp:952
std::pair< uint64_t, uint64_t > path(value_type c) const
return the path to the leaf for a given symbol
Definition wt_int.hpp:1068
t_bitvector::difference_type difference_type
Definition wt_int.hpp:66
const_iterator begin() const
Returns a const_iterator to the first element.
Definition wt_int.hpp:780
bit_vector_type m_tree
Definition wt_int.hpp:87
std::pair< size_type, value_type > inverse_select(size_type i) const
Calculates how many occurrences of symbol wt[i] are in the prefix [0..i-1] of the original sequence.
Definition wt_int.hpp:418
void load(std::istream &in)
Loads the data structure from the given istream.
Definition wt_int.hpp:808
t_ret_type lex_count(size_type i, size_type j, value_type c) const
How many symbols are lexicographic smaller/greater than c in [i..j-1].
Definition wt_int.hpp:570
t_ret_type lex_smaller_count(size_type i, value_type c) const
How many symbols are lexicographic smaller than c in [0..i-1].
Definition wt_int.hpp:620
select_1_type m_tree_select1
Definition wt_int.hpp:89
bool empty() const
Returns whether the wavelet tree contains no data.
Definition wt_int.hpp:329
int_vector.hpp contains the sdsl::int_vector class.
int_vector_buffer.hpp contains the sdsl::int_vector_buffer class.
io.hpp contains some methods for reading/writing sdsl structures.
iterators.hpp contains an generic iterator for random access containers.
std::string to_string(T const &t, int w=1)
Namespace for the succinct data structure library.
int remove(std::string const &)
Remove a file.
Definition ram_fs.hpp:221
std::string tmp_file(cache_config const &config, std::string name_part="")
Returns a name for a temporary file. I.e. the name was not used before.
Definition io.hpp:759
std::string ram_file_name(std::string const &file)
Returns the corresponding RAM-file name for file.
Definition ram_fs.hpp:195
size_t write_member(T const &t, std::ostream &out, sdsl::structure_tree_node *v=nullptr, std::string name="")
Definition io.hpp:94
bool load_from_file(T &v, std::string const &file)
Load sdsl-object v from a file.
Definition io.hpp:989
void read_member(T &t, std::istream &in)
Definition io.hpp:119
int_vector< 1 > bit_vector
bit_vector is a specialization of the int_vector.
std::array< int_vector<>::size_type, 2 > range_type
Definition wt_helper.hpp:27
std::vector< range_type > range_vec_type
Definition wt_helper.hpp:28
ram_fs.hpp
Contains declarations and definitions of data structure concepts.
sfstream.hpp contains a two stream class which can be used to read/write from/to files or strings.
static constexpr uint32_t hi(uint64_t x)
Position of the most significant set bit the 64-bit word x.
Definition bits.hpp:653
Represents a node in the wavelet tree.
Definition wt_int.hpp:862
node_type(size_type o=0, size_type sz=0, size_type l=0, value_type sy=0)
Definition wt_int.hpp:869
node_type(node_type &&)=default
bool operator<(node_type const &v) const
Definition wt_int.hpp:895
bool operator>(node_type const &v) const
Definition wt_int.hpp:901
node_type(node_type const &)=default
node_type & operator=(node_type &&)=default
bool operator==(node_type const &v) const
Definition wt_int.hpp:889
node_type & operator=(node_type const &)=default
structure_tree.hpp contains a helper class which can represent the memory structure of a class.
util.hpp contains some helper methods for int_vector and other stuff like demangle class names.