SDSL 3.0.1
Succinct Data Structure Library
Loading...
Searching...
No Matches
wt_pc.hpp
Go to the documentation of this file.
1// Copyright (c) 2016, the SDSL Project Authors. All rights reserved.
2// Please see the AUTHORS file for details. Use of this source code is governed
3// by a BSD license that can be found in the LICENSE file.
9#ifndef INCLUDED_SDSL_WT_PC
10#define INCLUDED_SDSL_WT_PC
11
12#include <tuple>
13#include <utility>
14#include <vector>
15
16#include <sdsl/bit_vectors.hpp>
17#include <sdsl/rank_support.hpp>
19#include <sdsl/wt_helper.hpp>
20
22namespace sdsl
23{
24
26
37template <class t_shape,
38 class t_bitvector = bit_vector,
39 class t_rank = typename t_bitvector::rank_1_type,
40 class t_select = typename t_bitvector::select_1_type,
41 class t_select_zero = typename t_bitvector::select_0_type,
42 class t_tree_strat = byte_tree<>>
43class wt_pc
44{
45 public:
46 typedef typename t_tree_strat::template type<wt_pc> tree_strat_type;
48 typedef typename tree_strat_type::value_type value_type;
49 typedef typename t_bitvector::difference_type difference_type;
52 typedef t_bitvector bit_vector_type;
53 typedef t_rank rank_1_type;
54 typedef t_select select_1_type;
55 typedef t_select_zero select_0_type;
57 typedef typename tree_strat_type::alphabet_category alphabet_category;
58 typedef typename t_shape::template type<wt_pc> shape_type;
59 enum
60 {
61 lex_ordered = shape_type::lex_ordered
62 };
63 using node_type = typename tree_strat_type::node_type;
64
65 private:
66#ifdef WT_PC_CACHE
67 mutable value_type m_last_access_answer;
68 mutable size_type m_last_access_i;
69 mutable size_type m_last_access_rl;
70#endif
71
72 size_type m_size = 0; // original text size
73 size_type m_sigma = 0; // alphabet size
74 bit_vector_type m_bv; // bit vector to store the wavelet tree
75 rank_1_type m_bv_rank; // rank support for the wavelet tree bit vector
76 select_1_type m_bv_select1; // select support for the wavelet tree bit vector
77 select_0_type m_bv_select0;
78 tree_strat_type m_tree;
79
80 // insert a character into the wavelet tree, see construct method
81 void insert_char(value_type old_chr, std::vector<uint64_t> & bv_node_pos, size_type times, bit_vector & bv)
82 {
83 uint64_t p = m_tree.bit_path(old_chr);
84 uint32_t path_len = p >> 56;
85 node_type v = m_tree.root();
86 for (uint32_t l = 0; l < path_len; ++l, p >>= 1)
87 {
88 if (p & 1) { bv.set_int(bv_node_pos[v], 0xFFFFFFFFFFFFFFFFULL, times); }
89 bv_node_pos[v] += times;
90 v = m_tree.child(v, p & 1);
91 }
92 }
93
94 // calculates the tree shape returns the size of the WT bit vector
95 size_type construct_tree_shape(const std::vector<size_type> & C)
96 {
97 // vector for node of the tree
98 std::vector<pc_node> temp_nodes; //(2*m_sigma-1);
99 shape_type::construct_tree(C, temp_nodes);
100 // Convert code tree into BFS order in memory and
101 // calculate bv_pos values
102 size_type bv_size = 0;
103 m_tree = tree_strat_type(temp_nodes, bv_size, this);
104 return bv_size;
105 }
106
107 void construct_init_rank_select()
108 {
109 util::init_support(m_bv_rank, &m_bv);
110 util::init_support(m_bv_select0, &m_bv);
111 util::init_support(m_bv_select1, &m_bv);
112 }
113
114 // recursive internal version of the method interval_symbols
115 void _interval_symbols(size_type i,
116 size_type j,
117 size_type & k,
118 std::vector<value_type> & cs,
119 std::vector<size_type> & rank_c_i,
120 std::vector<size_type> & rank_c_j,
121 node_type v) const
122 {
123 // invariant: j>i
124 size_type i_new = (m_bv_rank(m_tree.bv_pos(v) + i) - m_tree.bv_pos_rank(v));
125 size_type j_new = (m_bv_rank(m_tree.bv_pos(v) + j) - m_tree.bv_pos_rank(v));
126 // goto left child
127 i -= i_new;
128 j -= j_new;
129 if (i != j)
130 {
131 node_type v_new = m_tree.child(v, 0);
132 if (!m_tree.is_leaf(v_new)) { _interval_symbols(i, j, k, cs, rank_c_i, rank_c_j, v_new); }
133 else
134 {
135 rank_c_i[k] = i;
136 rank_c_j[k] = j;
137 cs[k++] = m_tree.bv_pos_rank(v_new);
138 }
139 }
140 // goto right child
141 if (i_new != j_new)
142 {
143 node_type v_new = m_tree.child(v, 1);
144 if (!m_tree.is_leaf(v_new)) { _interval_symbols(i_new, j_new, k, cs, rank_c_i, rank_c_j, v_new); }
145 else
146 {
147 rank_c_i[k] = i_new;
148 rank_c_j[k] = j_new;
149 cs[k++] = m_tree.bv_pos_rank(v_new);
150 }
151 }
152 }
153
154 public:
155 const size_type & sigma = m_sigma;
156 const bit_vector_type & bv = m_bv;
157
158 // Default constructor
159 wt_pc(){};
160
162
168 template <typename t_it>
169 wt_pc(t_it begin, t_it end)
170 : m_size(std::distance(begin, end))
171 {
172 if (0 == m_size) return;
173 // O(n + |\Sigma|\log|\Sigma|) algorithm for calculating node sizes
174 // TODO: C should also depend on the tree_strategy. C is just a mapping
175 // from a symbol to its frequency. So a map<uint64_t,uint64_t> could be
176 // used for integer alphabets...
177 std::vector<size_type> C;
178 // 1. Count occurrences of characters
180 // 2. Calculate effective alphabet size
182 // 3. Generate tree shape
183 size_type tree_size = construct_tree_shape(C);
184 // 4. Generate wavelet tree bit sequence m_bv
185 bit_vector temp_bv(tree_size, 0);
186
187 // Initializing starting position of wavelet tree nodes
188 std::vector<uint64_t> bv_node_pos(m_tree.size(), 0);
189 for (size_type v = 0; v < m_tree.size(); ++v) { bv_node_pos[v] = m_tree.bv_pos(v); }
190 value_type old_chr = *begin;
191 uint32_t times = 0;
192 for (auto it = begin; it != end; ++it)
193 {
194 value_type chr = *it;
195 if (chr != old_chr)
196 {
197 insert_char(old_chr, bv_node_pos, times, temp_bv);
198 times = 1;
199 old_chr = chr;
200 }
201 else
202 { // chr == old_chr
203 ++times;
204 if (times == 64)
205 {
206 insert_char(old_chr, bv_node_pos, times, temp_bv);
207 times = 0;
208 }
209 }
210 }
211 if (times > 0) { insert_char(old_chr, bv_node_pos, times, temp_bv); }
212 m_bv = bit_vector_type(std::move(temp_bv));
213 // 5. Initialize rank and select data structures for m_bv
214 construct_init_rank_select();
215 // 6. Finish inner nodes by precalculating the bv_pos_rank values
216 m_tree.init_node_ranks(m_bv_rank);
217 }
218
219 template <typename t_it>
220 wt_pc(t_it begin, t_it end, std::string)
221 : wt_pc(begin, end)
222 {}
223
225 wt_pc(const wt_pc & wt)
226 : m_size(wt.m_size)
227 , m_sigma(wt.m_sigma)
228 , m_bv(wt.m_bv)
229 , m_bv_rank(wt.m_bv_rank)
230 , m_bv_select1(wt.m_bv_select1)
231 , m_bv_select0(wt.m_bv_select0)
232 , m_tree(wt.m_tree)
233 {
234 m_bv_rank.set_vector(&m_bv);
235 m_bv_select1.set_vector(&m_bv);
236 m_bv_select0.set_vector(&m_bv);
237 }
238
240 : m_size(wt.m_size)
241 , m_sigma(wt.m_sigma)
242 , m_bv(std::move(wt.m_bv))
243 , m_bv_rank(std::move(wt.m_bv_rank))
244 , m_bv_select1(std::move(wt.m_bv_select1))
245 , m_bv_select0(std::move(wt.m_bv_select0))
246 , m_tree(std::move(wt.m_tree))
247 {
248 m_bv_rank.set_vector(&m_bv);
249 m_bv_select1.set_vector(&m_bv);
250 m_bv_select0.set_vector(&m_bv);
251 }
252
254 wt_pc & operator=(const wt_pc & wt)
255 {
256 if (this != &wt)
257 {
258 wt_pc tmp(wt); // re-use copy-constructor
259 *this = std::move(tmp); // re-use move-assignment
260 }
261 return *this;
262 }
263
266 {
267 if (this != &wt)
268 {
269 m_size = wt.m_size;
270 m_sigma = wt.m_sigma;
271 m_bv = std::move(wt.m_bv);
272 m_bv_rank = std::move(wt.m_bv_rank);
273 m_bv_rank.set_vector(&m_bv);
274 m_bv_select1 = std::move(wt.m_bv_select1);
275 m_bv_select1.set_vector(&m_bv);
276 m_bv_select0 = std::move(wt.m_bv_select0);
277 m_bv_select0.set_vector(&m_bv);
278 m_tree = std::move(wt.m_tree);
279 }
280 return *this;
281 }
282
284 size_type size() const { return m_size; }
285
287 bool empty() const { return m_size == 0; }
288
290
301 {
302 assert(i < size());
303 // which stores how many of the next symbols are equal
304 // with the current char
305 node_type v = m_tree.root(); // start at root node
306 while (!m_tree.is_leaf(v))
307 { // while not a leaf
308 if (m_bv[m_tree.bv_pos(v) + i])
309 { // goto right child
310 i = m_bv_rank(m_tree.bv_pos(v) + i) - m_tree.bv_pos_rank(v);
311 v = m_tree.child(v, 1);
312 }
313 else
314 { // goto the left child
315 i -= (m_bv_rank(m_tree.bv_pos(v) + i) - m_tree.bv_pos_rank(v));
316 v = m_tree.child(v, 0);
317 }
318 }
319 // if v is a leaf bv_pos_rank returns symbol itself
320 return m_tree.bv_pos_rank(v);
321 };
322
324
336 {
337 assert(i <= size());
338 if (!m_tree.is_valid(m_tree.c_to_leaf(c)))
339 {
340 return 0; // if `c` was not in the text
341 }
342 if (m_sigma == 1)
343 {
344 return i; // if m_sigma == 1 answer is trivial
345 }
346 uint64_t p = m_tree.bit_path(c);
347 uint32_t path_len = (p >> 56);
348 size_type result = i;
349 node_type v = m_tree.root();
350 for (uint32_t l = 0; l < path_len and result; ++l, p >>= 1)
351 {
352 if (p & 1) { result = (m_bv_rank(m_tree.bv_pos(v) + result) - m_tree.bv_pos_rank(v)); }
353 else
354 {
355 result -= (m_bv_rank(m_tree.bv_pos(v) + result) - m_tree.bv_pos_rank(v));
356 }
357 v = m_tree.child(v, p & 1); // goto child
358 }
359 return result;
360 };
361
363
372 std::pair<size_type, value_type> inverse_select(size_type i) const
373 {
374 assert(i < size());
375 node_type v = m_tree.root();
376 while (!m_tree.is_leaf(v))
377 { // while not a leaf
378 if (m_bv[m_tree.bv_pos(v) + i])
379 { // goto right child
380 i = (m_bv_rank(m_tree.bv_pos(v) + i) - m_tree.bv_pos_rank(v));
381 v = m_tree.child(v, 1);
382 }
383 else
384 { // goto left child
385 i -= (m_bv_rank(m_tree.bv_pos(v) + i) - m_tree.bv_pos_rank(v));
386 v = m_tree.child(v, 0);
387 }
388 }
389 // if v is a leaf bv_pos_rank returns symbol itself
390 return std::make_pair(i, (value_type)m_tree.bv_pos_rank(v));
391 }
392
394
405 {
406 assert(1 <= i and i <= rank(size(), c));
407 node_type v = m_tree.c_to_leaf(c);
408 if (!m_tree.is_valid(v))
409 { // if c was not in the text
410 return m_size; // -> return a position right to the end
411 }
412 if (m_sigma == 1) { return std::min(i - 1, m_size); }
413 size_type result = i - 1; // otherwise
414 uint64_t p = m_tree.bit_path(c);
415 uint32_t path_len = (p >> 56);
416 // path_len > 0, since we have handled m_sigma = 1.
417 p <<= (64 - path_len);
418 for (uint32_t l = 0; l < path_len; ++l, p <<= 1)
419 {
420 if ((p & 0x8000000000000000ULL) == 0)
421 { // node was a left child
422 v = m_tree.parent(v);
423 result = m_bv_select0(m_tree.bv_pos(v) - m_tree.bv_pos_rank(v) + result + 1) - m_tree.bv_pos(v);
424 }
425 else
426 { // node was a right child
427 v = m_tree.parent(v);
428 result = m_bv_select1(m_tree.bv_pos_rank(v) + result + 1) - m_tree.bv_pos(v);
429 }
430 }
431 return result;
432 };
433
435
457 size_type j,
458 size_type & k,
459 std::vector<value_type> & cs,
460 std::vector<size_type> & rank_c_i,
461 std::vector<size_type> & rank_c_j) const
462 {
463 assert(i <= j and j <= size());
464 if (i == j) { k = 0; }
465 else if (1 == m_sigma)
466 {
467 k = 1;
468 cs[0] = m_tree.bv_pos_rank(m_tree.root());
469 rank_c_i[0] = std::min(i, m_size);
470 rank_c_j[0] = std::min(j, m_size);
471 }
472 else if ((j - i) == 1)
473 {
474 k = 1;
475 auto rc = inverse_select(i);
476 rank_c_i[0] = rc.first;
477 cs[0] = rc.second;
478 rank_c_j[0] = rank_c_i[0] + 1;
479 }
480 else if ((j - i) == 2)
481 {
482 auto rc = inverse_select(i);
483 rank_c_i[0] = rc.first;
484 cs[0] = rc.second;
485 rc = inverse_select(i + 1);
486 rank_c_i[1] = rc.first;
487 cs[1] = rc.second;
488
489 if (cs[0] == cs[1])
490 {
491 k = 1;
492 rank_c_j[0] = rank_c_i[0] + 2;
493 }
494 else
495 {
496 k = 2;
497 if (lex_ordered and cs[0] > cs[1])
498 {
499 std::swap(cs[0], cs[1]);
500 std::swap(rank_c_i[0], rank_c_i[1]);
501 }
502 rank_c_j[0] = rank_c_i[0] + 1;
503 rank_c_j[1] = rank_c_i[1] + 1;
504 }
505 }
506 else
507 {
508 k = 0;
509 _interval_symbols(i, j, k, cs, rank_c_i, rank_c_j, 0);
510 }
511 }
512
514
528 template <class t_ret_type = std::tuple<size_type, size_type, size_type>>
529 typename std::enable_if<shape_type::lex_ordered, t_ret_type>::type lex_count(size_type i,
530 size_type j,
531 value_type c) const
532 {
533 assert(i <= j and j <= size());
534 if (1 == m_sigma)
535 {
536 value_type _c = m_tree.bv_pos_rank(m_tree.root());
537 if (c == _c)
538 { // c is the only symbol in the wt
539 return t_ret_type{ i, 0, 0 };
540 }
541 else if (c < _c)
542 {
543 return t_ret_type{ 0, 0, j - i };
544 }
545 else
546 {
547 return t_ret_type{ 0, j - i, 0 };
548 }
549 }
550 if (i == j) { return t_ret_type{ rank(i, c), 0, 0 }; }
551 uint64_t p = m_tree.bit_path(c);
552 uint32_t path_len = p >> 56;
553 if (path_len == 0)
554 { // path_len=0: => c is not present
555 value_type _c = (value_type)p;
556 if (c == _c)
557 { // c is smaller than any symbol in wt
558 return t_ret_type{ 0, 0, j - i };
559 }
560 auto res = lex_count(i, j, _c);
561 return t_ret_type{ 0, j - i - std::get<2>(res), std::get<2>(res) };
562 }
563 size_type smaller = 0, greater = 0;
564 node_type v = m_tree.root();
565 for (uint32_t l = 0; l < path_len; ++l, p >>= 1)
566 {
567 size_type r1_1 = (m_bv_rank(m_tree.bv_pos(v) + i) - m_tree.bv_pos_rank(v));
568 size_type r1_2 = (m_bv_rank(m_tree.bv_pos(v) + j) - m_tree.bv_pos_rank(v));
569
570 if (p & 1)
571 {
572 smaller += j - r1_2 - i + r1_1;
573 i = r1_1;
574 j = r1_2;
575 }
576 else
577 {
578 greater += r1_2 - r1_1;
579 i -= r1_1;
580 j -= r1_2;
581 }
582 v = m_tree.child(v, p & 1);
583 }
584 return t_ret_type{ i, smaller, greater };
585 }
586
588
599 template <class t_ret_type = std::tuple<size_type, size_type>>
600 typename std::enable_if<shape_type::lex_ordered, t_ret_type>::type lex_smaller_count(size_type i,
601 value_type c) const
602 {
603 assert(i <= size());
604 if (1 == m_sigma)
605 {
606 value_type _c = m_tree.bv_pos_rank(m_tree.root());
607 if (c == _c)
608 { // c is the only symbol in the wt
609 return t_ret_type{ i, 0 };
610 }
611 else if (c < _c)
612 {
613 return t_ret_type{ 0, 0 };
614 }
615 else
616 {
617 return t_ret_type{ 0, i };
618 }
619 }
620
621 uint64_t p = m_tree.bit_path(c);
622 uint32_t path_len = p >> 56;
623 if (path_len == 0)
624 { // path_len=0: => c is not present
625 value_type _c = (value_type)p;
626 if (c == _c)
627 { // c is smaller than any symbol in wt
628 return t_ret_type{ 0, 0 };
629 }
630 auto res = lex_smaller_count(i, _c);
631 return t_ret_type{ 0, std::get<0>(res) + std::get<1>(res) };
632 }
633 size_type result = 0;
634 size_type all = i; // possible occurrences of c
635 node_type v = m_tree.root();
636 for (uint32_t l = 0; l < path_len and all; ++l, p >>= 1)
637 {
638 size_type ones = (m_bv_rank(m_tree.bv_pos(v) + all) - m_tree.bv_pos_rank(v));
639 if (p & 1)
640 {
641 result += all - ones;
642 all = ones;
643 }
644 else
645 {
646 all -= ones;
647 }
648 v = m_tree.child(v, p & 1);
649 }
650 return t_ret_type{ all, result };
651 }
652
654 const_iterator begin() const { return const_iterator(this, 0); }
655
657 const_iterator end() const { return const_iterator(this, size()); }
658
660 size_type serialize(std::ostream & out, structure_tree_node * v = nullptr, std::string name = "") const
661 {
662 structure_tree_node * child = structure_tree::add_child(v, name, util::class_name(*this));
663 size_type written_bytes = 0;
664 written_bytes += write_member(m_size, out, child, "size");
665 written_bytes += write_member(m_sigma, out, child, "sigma");
666 written_bytes += m_bv.serialize(out, child, "bv");
667 written_bytes += m_bv_rank.serialize(out, child, "bv_rank");
668 written_bytes += m_bv_select1.serialize(out, child, "bv_select_1");
669 written_bytes += m_bv_select0.serialize(out, child, "bv_select_0");
670 written_bytes += m_tree.serialize(out, child, "tree");
671 structure_tree::add_size(child, written_bytes);
672 return written_bytes;
673 }
674
676 void load(std::istream & in)
677 {
678 read_member(m_size, in);
679 read_member(m_sigma, in);
680 m_bv.load(in);
681 m_bv_rank.load(in, &m_bv);
682 m_bv_select1.load(in, &m_bv);
683 m_bv_select0.load(in, &m_bv);
684 m_tree.load(in);
685 }
686
688 bool operator==(wt_pc const & other) const noexcept
689 {
690 return (m_size == other.m_size) && (m_sigma == other.m_sigma) && (m_bv == other.m_bv) &&
691 (m_bv_rank == other.m_bv_rank) && (m_bv_select1 == other.m_bv_select1) &&
692 (m_bv_select0 == other.m_bv_select0) && (m_tree == other.m_tree);
693 }
694
696 bool operator!=(wt_pc const & other) const noexcept { return !(*this == other); }
697
698 template <typename archive_t>
699 void CEREAL_SAVE_FUNCTION_NAME(archive_t & ar) const
700 {
701 ar(CEREAL_NVP(m_size));
702 ar(CEREAL_NVP(m_sigma));
703 ar(CEREAL_NVP(m_bv));
704 ar(CEREAL_NVP(m_bv_rank));
705 ar(CEREAL_NVP(m_bv_select1));
706 ar(CEREAL_NVP(m_bv_select0));
707 ar(CEREAL_NVP(m_tree));
708 }
709
710 template <typename archive_t>
711 void CEREAL_LOAD_FUNCTION_NAME(archive_t & ar)
712 {
713 ar(CEREAL_NVP(m_size));
714 ar(CEREAL_NVP(m_sigma));
715 ar(CEREAL_NVP(m_bv));
716 ar(CEREAL_NVP(m_bv_rank));
717 m_bv_rank.set_vector(&m_bv);
718 ar(CEREAL_NVP(m_bv_select1));
719 m_bv_select1.set_vector(&m_bv);
720 ar(CEREAL_NVP(m_bv_select0));
721 m_bv_select0.set_vector(&m_bv);
722 ar(CEREAL_NVP(m_tree));
723 }
724
727 {
729 }
730
732 auto seq(const node_type & v) const -> random_access_container<std::function<value_type(size_type)>>
733 {
734 return random_access_container<std::function<value_type(size_type)>>(
735 [&v, this](size_type i) {
736 node_type vv = v;
737 while (!is_leaf(vv))
738 {
739 auto vs = expand(vv);
740 auto rs = expand(vv, range_type{ { 0, i } });
741 bool bit = *(begin(vv) + i);
742 i = std::get<1>(rs[bit]);
743 vv = vs[bit];
744 }
745 return sym(vv);
746 },
747 size(v));
748 }
749
751 bool is_leaf(const node_type & v) const { return m_tree.is_leaf(v); }
752
754 value_type sym(const node_type & v) const { return m_tree.bv_pos_rank(v); }
755
757 bool empty(const node_type & v) const { return size(v) == 0; }
758
760 auto size(const node_type & v) const -> decltype(m_tree.size(v))
761 {
762 if (is_leaf(v))
763 {
764 if (v == root())
765 return size();
766 else
767 {
768 auto parent = m_tree.parent(v);
769 auto rs = expand(parent, range_type{ { 0, size(parent) - 1 } });
770 if (m_tree.child(parent, 0) == v)
771 return std::get<1>(std::get<0>(rs)) - std::get<0>((std::get<0>(rs))) + 1;
772 else
773 return std::get<1>(std::get<1>(rs)) - std::get<0>((std::get<1>(rs))) + 1;
774 }
775 }
776 else
777 {
778 return m_tree.size(v);
779 }
780 }
781
783 node_type root() const { return m_tree.root(); }
784
786
790 std::array<node_type, 2> expand(const node_type & v) const
791 {
792 return { { m_tree.child(v, 0), m_tree.child(v, 1) } };
793 }
794
796
805 std::array<range_vec_type, 2> expand(const node_type & v, const range_vec_type & ranges) const
806 {
807 auto ranges_copy = ranges;
808 return expand(v, std::move(ranges_copy));
809 }
810
812
821 std::array<range_vec_type, 2> expand(const node_type & v, range_vec_type && ranges) const
822 {
823 auto v_sp_rank = m_tree.bv_pos_rank(v);
824 range_vec_type res(ranges.size());
825 size_t i = 0;
826 for (auto & r : ranges)
827 {
828 auto sp_rank = m_bv_rank(m_tree.bv_pos(v) + r[0]);
829 auto right_size = m_bv_rank(m_tree.bv_pos(v) + r[1] + 1) - sp_rank;
830 auto left_size = (r[1] - r[0] + 1) - right_size;
831
832 auto right_sp = sp_rank - v_sp_rank;
833 auto left_sp = r[0] - right_sp;
834
835 r = { { left_sp, left_sp + left_size - 1 } };
836 res[i++] = { { right_sp, right_sp + right_size - 1 } };
837 }
838 return { { ranges, std::move(res) } };
839 }
840
842
851 std::array<range_type, 2> expand(const node_type & v, const range_type & r) const
852 {
853 auto v_sp_rank = m_tree.bv_pos_rank(v);
854 auto sp_rank = m_bv_rank(m_tree.bv_pos(v) + r[0]);
855 auto right_size = m_bv_rank(m_tree.bv_pos(v) + r[1] + 1) - sp_rank;
856 auto left_size = (r[1] - r[0] + 1) - right_size;
857
858 auto right_sp = sp_rank - v_sp_rank;
859 auto left_sp = r[0] - right_sp;
860
861 return { { { { left_sp, left_sp + left_size - 1 } }, { { right_sp, right_sp + right_size - 1 } } } };
862 }
863
865 std::pair<uint64_t, uint64_t> path(value_type c) const
866 {
867 uint64_t path = m_tree.bit_path(c);
868 uint64_t path_len = path >> 56;
869 // reverse the path till we fix the ordering
871 path = path >> (64 - path_len); // remove the length
872 return { path_len, path };
873 }
874
876
881 std::pair<bool, value_type> symbol_gte(value_type c) const { return m_tree.symbol_gte(c); }
882
884
889 std::pair<bool, value_type> symbol_lte(value_type c) const { return m_tree.symbol_lte(c); }
890
891 private:
893 auto begin(const node_type & v) const -> decltype(m_bv.begin() + m_tree.bv_pos(v))
894 {
895 return m_bv.begin() + m_tree.bv_pos(v);
896 }
897
899 auto end(const node_type & v) const -> decltype(m_bv.begin() + m_tree.bv_pos(v) + m_tree.size(v))
900 {
901 return m_bv.begin() + m_tree.bv_pos(v) + m_tree.size(v);
902 }
903};
904} // namespace sdsl
905
906#endif
bit_vectors.hpp contains classes for uncompressed and compressed bit vector representations.
#define CEREAL_NVP(X)
Definition: cereal.hpp:31
A generic vector class for integers of width .
Definition: int_vector.hpp:216
Generic iterator for a random access container.
Definition: iterators.hpp:24
static structure_tree_node * add_child(structure_tree_node *v, const std::string &name, const std::string &type)
static void add_size(structure_tree_node *v, uint64_t value)
A prefix code-shaped wavelet.
Definition: wt_pc.hpp:44
void interval_symbols(size_type i, size_type j, size_type &k, std::vector< value_type > &cs, std::vector< size_type > &rank_c_i, std::vector< size_type > &rank_c_j) const
For each symbol c in wt[i..j-1] get rank(i,c) and rank(j,c).
Definition: wt_pc.hpp:456
wt_pc & operator=(wt_pc &&wt)
Move assignment operator.
Definition: wt_pc.hpp:265
value_type sym(const node_type &v) const
Symbol for a leaf.
Definition: wt_pc.hpp:754
std::enable_if< shape_type::lex_ordered, t_ret_type >::type lex_smaller_count(size_type i, value_type c) const
How many symbols are lexicographic smaller than c in [0..i-1].
Definition: wt_pc.hpp:600
wt_pc(const wt_pc &wt)
Copy constructor.
Definition: wt_pc.hpp:225
size_type rank(size_type i, value_type c) const
Calculates how many symbols c are in the prefix [0..i-1].
Definition: wt_pc.hpp:335
t_bitvector::difference_type difference_type
Definition: wt_pc.hpp:49
std::pair< bool, value_type > symbol_gte(value_type c) const
Returns for a symbol c the next larger or equal symbol in the WT.
Definition: wt_pc.hpp:881
void load(std::istream &in)
Loads the data structure from the given istream.
Definition: wt_pc.hpp:676
int_vector ::size_type size_type
Definition: wt_pc.hpp:47
value_type operator[](size_type i) const
Recovers the i-th symbol of the original vector.
Definition: wt_pc.hpp:300
void CEREAL_LOAD_FUNCTION_NAME(archive_t &ar)
Definition: wt_pc.hpp:711
tree_strat_type::alphabet_category alphabet_category
Definition: wt_pc.hpp:57
t_bitvector bit_vector_type
Definition: wt_pc.hpp:52
const_iterator iterator
Definition: wt_pc.hpp:51
bool is_leaf(const node_type &v) const
Checks if the node is a leaf node.
Definition: wt_pc.hpp:751
wt_pc(t_it begin, t_it end, std::string)
Definition: wt_pc.hpp:220
wt_tag index_category
Definition: wt_pc.hpp:56
void CEREAL_SAVE_FUNCTION_NAME(archive_t &ar) const
Definition: wt_pc.hpp:699
wt_pc & operator=(const wt_pc &wt)
Assignment operator.
Definition: wt_pc.hpp:254
const_iterator begin() const
Returns a const_iterator to the first element.
Definition: wt_pc.hpp:654
std::array< range_type, 2 > expand(const node_type &v, const range_type &r) const
Returns for a range its left and right child ranges.
Definition: wt_pc.hpp:851
size_type size() const
Returns the size of the original vector.
Definition: wt_pc.hpp:284
typename tree_strat_type::node_type node_type
Definition: wt_pc.hpp:63
node_type root() const
Returns the root node.
Definition: wt_pc.hpp:783
auto bit_vec(const node_type &v) const -> node_bv_container< t_bitvector >
Random access container to bitvector of node v.
Definition: wt_pc.hpp:726
bool operator==(wt_pc const &other) const noexcept
Equality operator.
Definition: wt_pc.hpp:688
const_iterator end() const
Returns a const_iterator to the element after the last element.
Definition: wt_pc.hpp:657
std::pair< bool, value_type > symbol_lte(value_type c) const
Returns for a symbol c the previous smaller or equal symbol in the WT.
Definition: wt_pc.hpp:889
wt_pc(t_it begin, t_it end)
Construct the wavelet tree from a sequence defined by two interators.
Definition: wt_pc.hpp:169
t_select_zero select_0_type
Definition: wt_pc.hpp:55
t_tree_strat::template type< wt_pc > tree_strat_type
Definition: wt_pc.hpp:46
bool empty(const node_type &v) const
Indicates if node v is empty.
Definition: wt_pc.hpp:757
t_select select_1_type
Definition: wt_pc.hpp:54
wt_pc(wt_pc &&wt)
Definition: wt_pc.hpp:239
const bit_vector_type & bv
Definition: wt_pc.hpp:156
auto seq(const node_type &v) const -> random_access_container< std::function< value_type(size_type)> >
Random access container to sequence of node v.
Definition: wt_pc.hpp:732
std::pair< size_type, value_type > inverse_select(size_type i) const
Calculates how many times symbol wt[i] occurs in the prefix [0..i-1].
Definition: wt_pc.hpp:372
size_type serialize(std::ostream &out, structure_tree_node *v=nullptr, std::string name="") const
Serializes the data structure into the given ostream.
Definition: wt_pc.hpp:660
tree_strat_type::value_type value_type
Definition: wt_pc.hpp:48
std::enable_if< shape_type::lex_ordered, t_ret_type >::type lex_count(size_type i, size_type j, value_type c) const
How many symbols are lexicographic smaller/greater than c in [i..j-1].
Definition: wt_pc.hpp:529
bool empty() const
Returns whether the wavelet tree contains no data.
Definition: wt_pc.hpp:287
t_shape::template type< wt_pc > shape_type
Definition: wt_pc.hpp:58
std::array< range_vec_type, 2 > expand(const node_type &v, range_vec_type &&ranges) const
Returns for each range its left and right child ranges.
Definition: wt_pc.hpp:821
std::array< range_vec_type, 2 > expand(const node_type &v, const range_vec_type &ranges) const
Returns for each range its left and right child ranges.
Definition: wt_pc.hpp:805
auto size(const node_type &v) const -> decltype(m_tree.size(v))
Return the size of node v.
Definition: wt_pc.hpp:760
bool operator!=(wt_pc const &other) const noexcept
Inequality operator.
Definition: wt_pc.hpp:696
@ lex_ordered
Definition: wt_pc.hpp:61
size_type select(size_type i, value_type c) const
Calculates the ith occurrence of the symbol c in the supported vector.
Definition: wt_pc.hpp:404
t_rank rank_1_type
Definition: wt_pc.hpp:53
const size_type & sigma
Definition: wt_pc.hpp:155
random_access_const_iterator< wt_pc > const_iterator
Definition: wt_pc.hpp:50
std::pair< uint64_t, uint64_t > path(value_type c) const
return the path to the leaf for a given symbol
Definition: wt_pc.hpp:865
std::array< node_type, 2 > expand(const node_type &v) const
Returns the two child nodes of an inner node.
Definition: wt_pc.hpp:790
Namespace for the succinct data structure library.
void calculate_effective_alphabet_size(const t_rac &C, sigma_type &sigma)
Definition: wt_helper.hpp:52
void calculate_character_occurences(t_it begin, t_it end, t_rac &C)
Count for each character the number of occurrences in rac[0..size-1].
Definition: wt_helper.hpp:40
size_t write_member(const T &t, std::ostream &out, sdsl::structure_tree_node *v=nullptr, std::string name="")
Definition: io.hpp:84
void read_member(T &t, std::istream &in)
Definition: io.hpp:111
int_vector< 1 > bit_vector
bit_vector is a specialization of the int_vector.
Definition: int_vector.hpp:53
std::array< int_vector<>::size_type, 2 > range_type
Definition: wt_helper.hpp:20
std::vector< range_type > range_vec_type
Definition: wt_helper.hpp:21
rank_support.hpp contains classes that support a sdsl::bit_vector with constant time rank information...
select_support.hpp contains classes that support a sdsl::bit_vector with constant time select informa...
static constexpr uint64_t rev(uint64_t x)
reverses a given 64 bit word
Definition: bits.hpp:927