SDSL 3.0.1
Succinct Data Structure Library
Loading...
Searching...
No Matches
wt_helper.hpp
Go to the documentation of this file.
1// Copyright (c) 2016, the SDSL Project Authors. All rights reserved.
2// Please see the AUTHORS file for details. Use of this source code is governed
3// by a BSD license that can be found in the LICENSE file.
4#ifndef INCLUDED_SDSL_WT_HELPER
5#define INCLUDED_SDSL_WT_HELPER
6
7#include <algorithm>
8#include <array>
9#include <deque>
10#include <limits>
11#include <queue>
12#include <utility>
13#include <vector>
14
15#include <sdsl/int_vector.hpp>
16
17namespace sdsl
18{
19
20typedef std::array<int_vector<>::size_type, 2> range_type;
21typedef std::vector<range_type> range_vec_type;
22
24
27bool empty(const range_type & r);
28
30
34
36
39template <typename t_it, typename t_rac>
40void calculate_character_occurences(t_it begin, t_it end, t_rac & C)
41{
42 C = t_rac();
43 for (auto it = begin; it != end; ++it)
44 {
45 uint64_t c = *it;
46 if (c >= C.size()) { C.resize(c + 1, 0); }
47 ++C[c];
48 }
49}
50
51template <typename t_rac, typename sigma_type>
52void calculate_effective_alphabet_size(const t_rac & C, sigma_type & sigma)
53{
54 sigma = std::count_if(begin(C), end(C), [](decltype(*begin(C)) & x) { return x > 0; });
55}
56
57struct pc_node
58{
59 uint64_t freq; // frequency of symbol sym
60 uint64_t sym; // symbol
61 uint64_t parent; // pointer to the parent
62 uint64_t child[2]; // pointer to the children
63
64 enum : uint64_t
65 {
66 undef = 0xFFFFFFFFFFFFFFFFULL
67 }; // max uint64_t value
68
69 pc_node(uint64_t freq = 0,
70 uint64_t sym = 0,
71 uint64_t parent = undef,
72 uint64_t child_left = undef,
73 uint64_t child_right = undef);
74};
75
76template <typename t_tree_strat_fat>
77struct _node
78{
79 using node_type = typename t_tree_strat_fat::node_type;
80 typedef uint64_t size_type;
81 uint64_t bv_pos = 0; // pointer into the bit_vector, which represents the wavelet tree
82 uint64_t bv_pos_rank = 0; // pre-calculated rank for the prefix up to but not including bv_pos
83 node_type parent = t_tree_strat_fat::undef; // pointer to the parent
84 node_type child[2] = { t_tree_strat_fat::undef, t_tree_strat_fat::undef }; // pointer to the children
85
86 _node(uint64_t bv_pos = 0,
87 uint64_t bv_pos_rank = 0,
88 node_type parent = t_tree_strat_fat::undef,
89 node_type child_left = t_tree_strat_fat::undef,
90 node_type child_right = t_tree_strat_fat::undef)
91 : bv_pos(bv_pos)
93 , parent(parent)
94 {
95 child[0] = child_left;
96 child[1] = child_right;
97 }
98
99 _node(const _node &) = default;
100
101 _node & operator=(const _node & v)
102 {
103 if (this != &v)
104 {
105 bv_pos = v.bv_pos;
107 parent = v.parent;
108 child[0] = v.child[0];
109 child[1] = v.child[1];
110 }
111 return *this;
112 }
113
115 {
116 bv_pos = v.freq;
117 bv_pos_rank = v.sym;
118 parent = v.parent;
119 child[0] = v.child[0];
120 child[1] = v.child[1];
121 return *this;
122 }
123
124 size_type serialize(std::ostream & out, structure_tree_node * v = nullptr, std::string name = "") const
125 {
126 structure_tree_node * st_child = structure_tree::add_child(v, name, util::class_name(*this));
127 uint64_t written_bytes = 0;
128 written_bytes += write_member(bv_pos, out);
129 written_bytes += write_member(bv_pos_rank, out);
130 written_bytes += write_member(parent, out);
131 out.write((char *)child, 2 * sizeof(child[0]));
132 written_bytes += 2 * sizeof(child[0]);
133 structure_tree::add_size(st_child, written_bytes);
134 return written_bytes;
135 }
136
137 void load(std::istream & in)
138 {
139 read_member(bv_pos, in);
141 read_member(parent, in);
142 in.read((char *)child, 2 * sizeof(child[0]));
143 }
144
145 template <typename archive_t>
146 void CEREAL_SAVE_FUNCTION_NAME(archive_t & ar) const
147 {
148 ar(CEREAL_NVP(bv_pos));
150 ar(CEREAL_NVP(parent));
151 ar(CEREAL_NVP(child[0]));
152 ar(CEREAL_NVP(child[1]));
153 }
154
155 template <typename archive_t>
156 void CEREAL_LOAD_FUNCTION_NAME(archive_t & ar)
157 {
158 ar(CEREAL_NVP(bv_pos));
160 ar(CEREAL_NVP(parent));
161 ar(CEREAL_NVP(child[0]));
162 ar(CEREAL_NVP(child[1]));
163 }
164
166 bool operator==(_node const & other) const noexcept
167 {
168 return (bv_pos == other.bv_pos) && (bv_pos_rank == other.bv_pos_rank) && (parent == other.parent) &&
169 (child[0] == other.child[0]) && (child[1] == other.child[1]);
170 }
171
173 bool operator!=(_node const & other) const noexcept { return !(*this == other); }
174};
175
176// TODO: version of _byte_tree for lex_ordered tree shapes
177// m_c_to_leaf can be compressed and
178// m_path is only needed for sigma chars
179
180// Strategy class for tree representation of a WT
181template <bool t_dfs_shape, typename t_wt>
183{
185 using value_type = uint8_t;
186 using node_type = uint16_t; // node is represented by index in m_nodes
188 enum : uint16_t
189 {
190 undef = 0xFFFF
191 }; // max uint16_t value
192 enum : uint32_t
193 {
194 fixed_sigma = 256
195 };
196 enum : uint8_t
197 {
198 int_width = 8
199 }; // width of the input integers
200
201 std::vector<data_node> m_nodes; // nodes for the prefix code tree structure
202 node_type m_c_to_leaf[fixed_sigma]; // map symbol c to a leaf in the tree structure
203 // if m_c_to_leaf[c] == undef the char does
204 // not exists in the text
205 uint64_t m_path[fixed_sigma]; // path information for each char; the bits at position
206 // 0..55 hold path information; bits 56..63 the length
207 // of the path in binary representation
208
210
211 _byte_tree(const std::vector<pc_node> & temp_nodes, uint64_t & bv_size, const t_wt *)
212 {
213 m_nodes.resize(temp_nodes.size());
214 m_nodes[0] = temp_nodes.back(); // insert root at index 0
215 bv_size = 0;
216 size_t node_cnt = 1;
217 node_type last_parent = undef;
218 std::deque<node_type> q;
219 q.push_back(0);
220 while (!q.empty())
221 {
222 node_type idx;
223 if (!t_dfs_shape)
224 {
225 idx = q.front();
226 q.pop_front();
227 }
228 else
229 {
230 idx = q.back();
231 q.pop_back();
232 }
233 // frq_sum is store in bv_pos value
234 uint64_t frq = m_nodes[idx].bv_pos;
235 m_nodes[idx].bv_pos = bv_size;
236 if (m_nodes[idx].child[0] != undef) // if node is not a leaf
237 bv_size += frq; // add frequency
238 if (idx > 0)
239 { // node is not the root
240 if (last_parent != m_nodes[idx].parent)
241 m_nodes[m_nodes[idx].parent].child[0] = idx;
242 else
243 m_nodes[m_nodes[idx].parent].child[1] = idx;
244 last_parent = m_nodes[idx].parent;
245 }
246 if (m_nodes[idx].child[0] != undef)
247 { // if node is not a leaf
248 for (uint32_t k = 0; k < 2; ++k)
249 { // add children to tree
250 m_nodes[node_cnt] = temp_nodes[m_nodes[idx].child[k]];
251 m_nodes[node_cnt].parent = idx;
252 q.push_back(node_cnt);
253 m_nodes[idx].child[k] = node_cnt++;
254 }
255 }
256 }
257 // initialize m_c_to_leaf
258 for (uint32_t i = 0; i < fixed_sigma; ++i)
259 m_c_to_leaf[i] = undef; // if c is not in the alphabet m_c_to_leaf[c] = undef
260 for (node_type v = 0; v < m_nodes.size(); ++v)
261 {
262 if (m_nodes[v].child[0] == undef) // if node is a leaf
263 m_c_to_leaf[(uint8_t)m_nodes[v].bv_pos_rank] = v; // calculate value
264 }
265 // initialize path information
266 // Note: In the case of a bfs search order,
267 // we can classify nodes as right child and left child with an easy criterion:
268 // node is a left child, if node%2==1
269 // node is a right child, if node%2==0
270 for (uint32_t c = 0, prev_c = 0; c < fixed_sigma; ++c)
271 {
272 if (m_c_to_leaf[c] != undef)
273 { // if char exists in the alphabet
274 node_type v = m_c_to_leaf[c];
275 uint64_t pw = 0; // path
276 uint64_t pl = 0; // path len
277 while (v != root())
278 { // while node is not the root
279 pw <<= 1;
280 if (m_nodes[m_nodes[v].parent].child[1] == v) // if the node is a right child
281 pw |= 1ULL;
282 ++pl;
283 v = m_nodes[v].parent; // go up the tree
284 }
285 if (pl > 56) { throw std::logic_error("Code depth greater than 56!!!"); }
286 m_path[c] = pw | (pl << 56);
287 prev_c = c;
288 }
289 else
290 {
291 uint64_t pl = 0; // len is 0, good for special case in rank
292 m_path[c] = prev_c | (pl << 56);
293 }
294 }
295 }
296
297 template <typename t_rank_type>
298 void init_node_ranks(const t_rank_type & rank)
299 {
300 for (uint64_t i = 0; i < m_nodes.size(); ++i)
301 {
302 if (m_nodes[i].child[0] != undef) // if node is not a leaf
303 m_nodes[i].bv_pos_rank = rank.rank(m_nodes[i].bv_pos);
304 }
305 }
306
308 : m_nodes(bt.m_nodes)
309 {
310
311 for (uint32_t i = 0; i < fixed_sigma; ++i) m_c_to_leaf[i] = bt.m_c_to_leaf[i];
312 for (uint32_t i = 0; i < fixed_sigma; ++i) m_path[i] = bt.m_path[i];
313 }
314
316 {
317 if (this != &bt)
318 {
319 _byte_tree tmp(bt);
320 *this = std::move(tmp);
321 }
322 return *this;
323 }
324
326 {
327 if (this != &bt)
328 {
329 m_nodes = std::move(bt.m_nodes);
330 for (uint32_t i = 0; i < fixed_sigma; ++i) m_c_to_leaf[i] = bt.m_c_to_leaf[i];
331 for (uint32_t i = 0; i < fixed_sigma; ++i) m_path[i] = bt.m_path[i];
332 }
333 return *this;
334 }
335
337 uint64_t serialize(std::ostream & out, structure_tree_node * v = nullptr, std::string name = "") const
338 {
339 structure_tree_node * child = structure_tree::add_child(v, name, util::class_name(*this));
340 uint64_t written_bytes = 0;
341 uint64_t m_nodes_size = m_nodes.size();
342 written_bytes += write_member(m_nodes_size, out, child, "m_nodes.size()");
343 written_bytes += serialize_vector(m_nodes, out, child, "m_nodes");
344 out.write((char *)m_c_to_leaf, fixed_sigma * sizeof(m_c_to_leaf[0]));
345 written_bytes += fixed_sigma * sizeof(m_c_to_leaf[0]); // bytes from previous loop
346 out.write((char *)m_path, fixed_sigma * sizeof(m_path[0]));
347 written_bytes += fixed_sigma * sizeof(m_path[0]); // bytes from previous loop
348 structure_tree::add_size(child, written_bytes);
349 return written_bytes;
350 }
351
353 void load(std::istream & in)
354 {
355 uint64_t m_nodes_size = 0;
356 read_member(m_nodes_size, in);
357 m_nodes = std::vector<data_node>(m_nodes_size);
358 load_vector(m_nodes, in);
359 in.read((char *)m_c_to_leaf, fixed_sigma * sizeof(m_c_to_leaf[0]));
360 in.read((char *)m_path, fixed_sigma * sizeof(m_path[0]));
361 }
362
363 template <typename archive_t>
364 void CEREAL_SAVE_FUNCTION_NAME(archive_t & ar) const
365 {
366 ar(CEREAL_NVP(m_nodes));
368 ar(CEREAL_NVP(m_path));
369 }
370
371 template <typename archive_t>
372 void CEREAL_LOAD_FUNCTION_NAME(archive_t & ar)
373 {
374 ar(CEREAL_NVP(m_nodes));
376 ar(CEREAL_NVP(m_path));
377 }
378
380 bool operator==(_byte_tree const & other) const noexcept
381 {
382 return (m_nodes == other.m_nodes) /* && (m_c_to_leaf == other.m_c_to_leaf) &&
383 (m_path == other.m_path)*/;
384 }
385
387 bool operator!=(_byte_tree const & other) const noexcept { return !(*this == other); }
388
390 inline node_type c_to_leaf(value_type c) const { return m_c_to_leaf[c]; }
392 inline static node_type root() { return 0; }
393
395 uint64_t size() const { return m_nodes.size(); }
396
398 inline node_type parent(node_type v) const { return m_nodes[v].parent; }
400 inline node_type child(node_type v, uint8_t i) const { return m_nodes[v].child[i]; }
401
403 inline bool is_leaf(node_type v) const { return m_nodes[v].child[0] == undef; }
404
406 inline uint64_t size(node_type v) const
407 {
408 auto next_v = t_dfs_shape ? m_nodes[v].child[0] : v + 1;
409 return bv_pos(next_v) - bv_pos(v);
410 }
411
413 inline uint64_t bit_path(value_type c) const { return m_path[c]; }
414
416 inline uint64_t bv_pos(node_type v) const { return m_nodes[v].bv_pos; }
417
419 inline uint64_t bv_pos_rank(node_type v) const { return m_nodes[v].bv_pos_rank; }
420
422 inline bool is_valid(node_type v) const { return v != undef; }
423
425 inline std::pair<bool, value_type> symbol_gte(value_type c) const
426 {
427 for (uint32_t i = c; i < fixed_sigma; i++)
428 {
429 if (m_c_to_leaf[i] != undef) { return { true, i }; }
430 }
431 return { false, 0 };
432 }
433
435 inline std::pair<bool, value_type> symbol_lte(value_type c) const
436 {
437 for (uint32_t i = c; i > 0; i--)
438 {
439 if (m_c_to_leaf[i] != undef) { return { true, i }; }
440 }
441 if (m_c_to_leaf[0] != undef) return { true, 0 };
442 return { false, 0 };
443 }
444};
445
446// Strategy class for tree representation of a WT
447template <bool t_dfs_shape = false>
449{
450 template <typename t_wt>
452};
453
454// Strategy class for tree representation of a WT
455template <bool t_dfs_shape, typename t_wt>
457{
459 using value_type = uint64_t;
460 using node_type = uint64_t; // node is represented by index in m_nodes
462 enum : uint64_t
463 {
464 undef = 0xFFFFFFFFFFFFFFFFULL
465 }; // max uint64_t value
466 enum : uint8_t
467 {
468 int_width = 0
469 }; // width of the input integers is variable
470
471 std::vector<data_node> m_nodes; // nodes for the prefix code tree structure
472 std::vector<node_type> m_c_to_leaf; // map symbol c to a leaf in the tree structure
473 // if m_c_to_leaf[c] == undef the char does
474 // not exists in the text
475 std::vector<uint64_t> m_path; // path information for each char; the bits at position
476 // 0..55 hold path information; bits 56..63 the length
477 // of the path in binary representation
478
479 _int_tree() = default;
480
481 _int_tree(const std::vector<pc_node> & temp_nodes, uint64_t & bv_size, const t_wt *)
482 {
483 m_nodes.resize(temp_nodes.size());
484 m_nodes[0] = temp_nodes.back(); // insert root at index 0
485 bv_size = 0;
486 size_t node_cnt = 1;
487 node_type last_parent = undef;
488 std::deque<node_type> q;
489 q.push_back(0);
490 uint64_t max_c = 0;
491 while (!q.empty())
492 {
493 node_type idx;
494 if (!t_dfs_shape)
495 {
496 idx = q.front();
497 q.pop_front();
498 }
499 else
500 {
501 idx = q.back();
502 q.pop_back();
503 }
504 // frq_sum is store in bv_pos value
505 uint64_t frq = m_nodes[idx].bv_pos;
506 m_nodes[idx].bv_pos = bv_size;
507 if (m_nodes[idx].child[0] != undef)
508 { // if node is not a leaf
509 bv_size += frq; // add frequency
510 }
511 else if (max_c < m_nodes[idx].bv_pos_rank)
512 { // node is leaf and contains large symbol
513 max_c = m_nodes[idx].bv_pos_rank;
514 }
515 if (idx > 0)
516 { // node is not the root
517 if (last_parent != m_nodes[idx].parent)
518 m_nodes[m_nodes[idx].parent].child[0] = idx;
519 else
520 m_nodes[m_nodes[idx].parent].child[1] = idx;
521 last_parent = m_nodes[idx].parent;
522 }
523 if (m_nodes[idx].child[0] != undef)
524 { // if node is not a leaf
525 for (uint32_t k = 0; k < 2; ++k)
526 { // add children to tree
527 m_nodes[node_cnt] = temp_nodes[m_nodes[idx].child[k]];
528 m_nodes[node_cnt].parent = idx;
529 q.push_back(node_cnt);
530 m_nodes[idx].child[k] = node_cnt++;
531 }
532 }
533 }
534 // initialize m_c_to_leaf
535 // if c is not in the alphabet m_c_to_leaf[c] = undef
536 m_c_to_leaf.resize(max_c + 1, undef);
537 for (node_type v = 0; v < m_nodes.size(); ++v)
538 {
539 if (m_nodes[v].child[0] == undef)
540 { // if node is a leaf
541 uint64_t c = m_nodes[v].bv_pos_rank;
542 m_c_to_leaf[c] = v; // calculate value
543 if (c > max_c) max_c = c;
544 }
545 }
546 m_path = std::vector<uint64_t>(m_c_to_leaf.size(), 0);
547 // initialize path information
548 // Note: In the case of a bfs search order,
549 // we can classify nodes as right child and left child with an easy criterion:
550 // node is a left child, if node%2==1
551 // node is a right child, if node%2==0
552 for (value_type c = 0, prev_c = 0; c < m_c_to_leaf.size(); ++c)
553 {
554 if (m_c_to_leaf[c] != undef)
555 { // if char exists in the alphabet
556 node_type v = m_c_to_leaf[c];
557 uint64_t w = 0; // path
558 uint64_t l = 0; // path len
559 while (v != root())
560 { // while node is not the root
561 w <<= 1;
562 if (m_nodes[m_nodes[v].parent].child[1] == v) // if the node is a right child
563 w |= 1ULL;
564 ++l;
565 v = m_nodes[v].parent; // go up the tree
566 }
567 if (l > 56) { throw std::logic_error("Code depth greater than 56!!!"); }
568 m_path[c] = w | (l << 56);
569 prev_c = c;
570 }
571 else
572 {
573 uint64_t pl = 0; // len is 0, good for special case in rank
574 m_path[c] = prev_c | (pl << 56);
575 }
576 }
577 }
578
579 template <typename t_rank_type>
580 void init_node_ranks(const t_rank_type & rank)
581 {
582 for (uint64_t i = 0; i < m_nodes.size(); ++i)
583 {
584 if (m_nodes[i].child[0] != undef) // if node is not a leaf
585 m_nodes[i].bv_pos_rank = rank.rank(m_nodes[i].bv_pos);
586 }
587 }
588
589 _int_tree(const _int_tree & bt) = default;
590 _int_tree(_int_tree && bt) = default;
591
592 _int_tree & operator=(const _int_tree & bt) = default;
593 _int_tree & operator=(_int_tree && bt) = default;
594
596 uint64_t serialize(std::ostream & out, structure_tree_node * v = nullptr, std::string name = "") const
597 {
598 structure_tree_node * child = structure_tree::add_child(v, name, util::class_name(*this));
599 uint64_t written_bytes = 0;
600 uint64_t m_nodes_size = m_nodes.size();
601 written_bytes += write_member(m_nodes_size, out, child, "m_nodes.size()");
602 written_bytes += serialize_vector(m_nodes, out, child, "m_nodes");
603 uint64_t m_c_to_leaf_size = m_c_to_leaf.size();
604 written_bytes += write_member(m_c_to_leaf_size, out, child, "m_c_to_leaf.size()");
605 written_bytes += serialize_vector(m_c_to_leaf, out, child, "m_c_to_leaf");
606 uint64_t m_path_size = m_path.size();
607 written_bytes += write_member(m_path_size, out, child, "m_path.size()");
608 written_bytes += serialize_vector(m_path, out, child, "m_path");
609 structure_tree::add_size(child, written_bytes);
610 return written_bytes;
611 }
612
614 void load(std::istream & in)
615 {
616 uint64_t m_nodes_size = 0;
617 read_member(m_nodes_size, in);
618 m_nodes = std::vector<data_node>(m_nodes_size);
619 load_vector(m_nodes, in);
620 uint64_t m_c_to_leaf_size = 0;
621 read_member(m_c_to_leaf_size, in);
622 m_c_to_leaf = std::vector<node_type>(m_c_to_leaf_size);
624 uint64_t m_path_size = 0;
625 read_member(m_path_size, in);
626 m_path = std::vector<uint64_t>(m_path_size);
627 load_vector(m_path, in);
628 }
629
631 bool operator==(_int_tree const & other) const noexcept
632 {
633 return (m_nodes == other.m_nodes) && (m_c_to_leaf == other.m_c_to_leaf) && (m_path == other.m_path);
634 }
635
637 bool operator!=(_int_tree const & other) const noexcept { return !(*this == other); }
638
639 template <typename archive_t>
640 void CEREAL_SAVE_FUNCTION_NAME(archive_t & ar) const
641 {
642 ar(CEREAL_NVP(m_nodes));
644 ar(CEREAL_NVP(m_path));
645 }
646
647 template <typename archive_t>
648 void CEREAL_LOAD_FUNCTION_NAME(archive_t & ar)
649 {
650 ar(CEREAL_NVP(m_nodes));
652 ar(CEREAL_NVP(m_path));
653 }
654
657 {
658 if (c >= m_c_to_leaf.size())
659 return undef;
660 else
661 return m_c_to_leaf[c];
662 }
664 inline static node_type root() { return 0; }
665
667 uint64_t size() const { return m_nodes.size(); }
668
670 inline node_type parent(node_type v) const { return m_nodes[v].parent; }
672 inline node_type child(node_type v, uint8_t i) const { return m_nodes[v].child[i]; }
673
675 inline bool is_leaf(node_type v) const { return m_nodes[v].child[0] == undef; }
676
678 inline uint64_t size(node_type v) const
679 {
680 auto next_v = t_dfs_shape ? m_nodes[v].child[0] : v + 1;
681 return bv_pos(next_v) - bv_pos(v);
682 }
683
685 inline uint64_t bit_path(value_type c) const
686 {
687 if (c >= m_path.size()) { return m_path.size() - 1; }
688 return m_path[c];
689 }
690
692 inline uint64_t bv_pos(node_type v) const { return m_nodes[v].bv_pos; }
693
695 inline uint64_t bv_pos_rank(node_type v) const { return m_nodes[v].bv_pos_rank; }
696
698 inline bool is_valid(node_type v) const { return v != undef; }
699
701 inline std::pair<bool, value_type> symbol_gte(value_type c) const
702 {
703 if (c >= m_c_to_leaf.size()) { return { false, 0 }; }
704 for (value_type i = c; i < m_c_to_leaf.size(); i++)
705 {
706 if (m_c_to_leaf[i] != undef) { return { true, i }; }
707 }
708 return { false, 0 };
709 }
710
712 inline std::pair<bool, value_type> symbol_lte(value_type c) const
713 {
714 if (c >= m_c_to_leaf.size())
715 {
716 // return the largest symbol
717 c = m_c_to_leaf.size() - 1;
718 }
719 for (value_type i = c; i > 0; i--)
720 {
721 if (m_c_to_leaf[i] != undef) { return { true, i }; }
722 }
723 if (m_c_to_leaf[0] != undef) return { true, 0 };
724 return { false, 0 };
725 }
726};
727
728// Strategy class for tree representation of a WT
729template <bool t_dfs_shape = false>
731{
732 template <typename t_wt>
734};
735
736template <typename t_bv>
738{
739 public:
740 typedef typename t_bv::value_type value_type;
741 typedef typename t_bv::size_type size_type;
742 typedef typename t_bv::difference_type difference_type;
743 typedef typename t_bv::const_iterator iterator;
744
745 private:
746 iterator m_begin, m_end;
747
748 public:
750 : m_begin(b)
751 , m_end(e)
752 {}
753 value_type operator[](size_type i) const { return *(m_begin + i); }
754 size_type size() const { return m_end - m_begin; }
755 iterator begin() const { return m_begin; }
756 iterator end() const { return m_end; }
757};
758
759template <typename t_bv>
761{
762 public:
763 typedef typename t_bv::value_type value_type;
764 typedef typename t_bv::size_type size_type;
765 typedef typename t_bv::difference_type difference_type;
766 typedef typename t_bv::const_iterator iterator;
767
768 private:
769 iterator m_begin, m_end;
770
771 public:
773 : m_begin(b)
774 , m_end(e)
775 {}
776 value_type operator[](size_type i) const { return *(m_begin + i); }
777 size_type size() const { return m_end - m_begin; }
778 iterator begin() const { return m_begin; }
779 iterator end() const { return m_end; }
780};
781
782inline bool empty(const range_type & r)
783{
784 return std::get<0>(r) == (std::get<1>(r) + 1);
785}
786
788{
789 return std::get<1>(r) - std::get<0>(r) + 1;
790}
791
792inline pc_node::pc_node(uint64_t freq, uint64_t sym, uint64_t parent, uint64_t child_left, uint64_t child_right)
793 : freq(freq)
794 , sym(sym)
795 , parent(parent)
796{
797 child[0] = child_left;
798 child[1] = child_right;
799}
800
801} // end namespace sdsl
802#endif
#define CEREAL_NVP(X)
Definition: cereal.hpp:31
A generic vector class for integers of width .
Definition: int_vector.hpp:216
size_type size() const
Definition: wt_helper.hpp:754
iterator begin() const
Definition: wt_helper.hpp:755
node_bv_container(iterator b, iterator e)
Definition: wt_helper.hpp:749
t_bv::size_type size_type
Definition: wt_helper.hpp:741
t_bv::const_iterator iterator
Definition: wt_helper.hpp:743
value_type operator[](size_type i) const
Definition: wt_helper.hpp:753
iterator end() const
Definition: wt_helper.hpp:756
t_bv::value_type value_type
Definition: wt_helper.hpp:740
t_bv::difference_type difference_type
Definition: wt_helper.hpp:742
iterator end() const
Definition: wt_helper.hpp:779
t_bv::size_type size_type
Definition: wt_helper.hpp:764
value_type operator[](size_type i) const
Definition: wt_helper.hpp:776
node_seq_container(iterator b, iterator e)
Definition: wt_helper.hpp:772
iterator begin() const
Definition: wt_helper.hpp:778
t_bv::difference_type difference_type
Definition: wt_helper.hpp:765
t_bv::value_type value_type
Definition: wt_helper.hpp:763
t_bv::const_iterator iterator
Definition: wt_helper.hpp:766
size_type size() const
Definition: wt_helper.hpp:777
static structure_tree_node * add_child(structure_tree_node *v, const std::string &name, const std::string &type)
static void add_size(structure_tree_node *v, uint64_t value)
int_vector.hpp contains the sdsl::int_vector class.
Namespace for the succinct data structure library.
void load_vector(std::vector< T > &, std::istream &)
Load all elements of a vector from a input stream.
Definition: io.hpp:404
void calculate_effective_alphabet_size(const t_rac &C, sigma_type &sigma)
Definition: wt_helper.hpp:52
uint64_t serialize_vector(const std::vector< T > &, std::ostream &, sdsl::structure_tree_node *v=nullptr, std::string="")
Serialize each element of an std::vector.
Definition: io.hpp:376
void calculate_character_occurences(t_it begin, t_it end, t_rac &C)
Count for each character the number of occurrences in rac[0..size-1].
Definition: wt_helper.hpp:40
bool empty(const range_type &r)
Empty range check.
Definition: wt_helper.hpp:782
size_t write_member(const T &t, std::ostream &out, sdsl::structure_tree_node *v=nullptr, std::string name="")
Definition: io.hpp:84
void read_member(T &t, std::istream &in)
Definition: io.hpp:111
std::array< int_vector<>::size_type, 2 > range_type
Definition: wt_helper.hpp:20
std::vector< range_type > range_vec_type
Definition: wt_helper.hpp:21
int_vector ::size_type size(const range_type &r)
Size of a range.
Definition: wt_helper.hpp:787
void CEREAL_SAVE_FUNCTION_NAME(archive_t &ar) const
Definition: wt_helper.hpp:364
uint64_t bv_pos(node_type v) const
Return the start of the node in the WT's bit vector.
Definition: wt_helper.hpp:416
uint8_t value_type
Definition: wt_helper.hpp:185
void init_node_ranks(const t_rank_type &rank)
Definition: wt_helper.hpp:298
node_type m_c_to_leaf[fixed_sigma]
Definition: wt_helper.hpp:202
_byte_tree(const _byte_tree &bt)
Definition: wt_helper.hpp:307
_byte_tree(const std::vector< pc_node > &temp_nodes, uint64_t &bv_size, const t_wt *)
Definition: wt_helper.hpp:211
uint64_t bv_pos_rank(node_type v) const
Returns for node v the rank of 1's up to bv_pos(v)
Definition: wt_helper.hpp:419
bool is_valid(node_type v) const
Return if the node is a valid node.
Definition: wt_helper.hpp:422
_byte_tree & operator=(_byte_tree &&bt)
Definition: wt_helper.hpp:325
std::pair< bool, value_type > symbol_lte(value_type c) const
Return symbol c or the next smaller symbol in the wt.
Definition: wt_helper.hpp:435
_byte_tree & operator=(const _byte_tree &bt)
Definition: wt_helper.hpp:315
static node_type root()
Return the root node of the tree.
Definition: wt_helper.hpp:392
uint16_t node_type
Definition: wt_helper.hpp:186
uint64_t bit_path(value_type c) const
Return the path as left/right bit sequence in a uint64_t.
Definition: wt_helper.hpp:413
node_type child(node_type v, uint8_t i) const
Return left (i=0) or right (i=1) child node of v.
Definition: wt_helper.hpp:400
bool operator==(_byte_tree const &other) const noexcept
Equality operator.
Definition: wt_helper.hpp:380
node_type parent(node_type v) const
Return the parent node of v.
Definition: wt_helper.hpp:398
uint64_t size(node_type v) const
Return size of an inner node.
Definition: wt_helper.hpp:406
uint64_t size() const
Return the number of nodes in the tree.
Definition: wt_helper.hpp:395
uint64_t m_path[fixed_sigma]
Definition: wt_helper.hpp:205
void CEREAL_LOAD_FUNCTION_NAME(archive_t &ar)
Definition: wt_helper.hpp:372
bool operator!=(_byte_tree const &other) const noexcept
Inequality operator.
Definition: wt_helper.hpp:387
bool is_leaf(node_type v) const
Return if v is a leaf node.
Definition: wt_helper.hpp:403
std::vector< data_node > m_nodes
Definition: wt_helper.hpp:201
node_type c_to_leaf(value_type c) const
Get corresponding leaf for symbol c.
Definition: wt_helper.hpp:390
void load(std::istream &in)
Loads the data structure from the given istream.
Definition: wt_helper.hpp:353
std::pair< bool, value_type > symbol_gte(value_type c) const
Return symbol c or the next larger symbol in the wt.
Definition: wt_helper.hpp:425
uint64_t serialize(std::ostream &out, structure_tree_node *v=nullptr, std::string name="") const
Serializes the data structure into the given ostream.
Definition: wt_helper.hpp:337
_int_tree & operator=(const _int_tree &bt)=default
_int_tree(const std::vector< pc_node > &temp_nodes, uint64_t &bv_size, const t_wt *)
Definition: wt_helper.hpp:481
_int_tree(_int_tree &&bt)=default
uint64_t bv_pos_rank(node_type v) const
Returns for node v the rank of 1's up to bv_pos(v)
Definition: wt_helper.hpp:695
void init_node_ranks(const t_rank_type &rank)
Definition: wt_helper.hpp:580
std::vector< data_node > m_nodes
Definition: wt_helper.hpp:471
void CEREAL_LOAD_FUNCTION_NAME(archive_t &ar)
Definition: wt_helper.hpp:648
void load(std::istream &in)
Loads the data structure from the given istream.
Definition: wt_helper.hpp:614
uint64_t bit_path(value_type c) const
Return the path as left/right bit sequence in a uint64_t.
Definition: wt_helper.hpp:685
static node_type root()
Return the root node of the tree.
Definition: wt_helper.hpp:664
_int_tree()=default
bool operator!=(_int_tree const &other) const noexcept
Inequality operator.
Definition: wt_helper.hpp:637
uint64_t value_type
Definition: wt_helper.hpp:459
uint64_t bv_pos(node_type v) const
Return the start of the node in the WT's bit vector.
Definition: wt_helper.hpp:692
node_type child(node_type v, uint8_t i) const
Return left (i=0) or right (i=1) child node of v.
Definition: wt_helper.hpp:672
_int_tree(const _int_tree &bt)=default
uint64_t size(node_type v) const
Return size of an inner node.
Definition: wt_helper.hpp:678
node_type parent(node_type v) const
Return the parent node of v.
Definition: wt_helper.hpp:670
bool is_leaf(node_type v) const
Return if v is a leaf node.
Definition: wt_helper.hpp:675
std::vector< uint64_t > m_path
Definition: wt_helper.hpp:475
node_type c_to_leaf(value_type c) const
Get corresponding leaf for symbol c.
Definition: wt_helper.hpp:656
void CEREAL_SAVE_FUNCTION_NAME(archive_t &ar) const
Definition: wt_helper.hpp:640
bool operator==(_int_tree const &other) const noexcept
Equality operator.
Definition: wt_helper.hpp:631
std::pair< bool, value_type > symbol_gte(value_type c) const
Return symbol c or the next larger symbol in the wt.
Definition: wt_helper.hpp:701
uint64_t node_type
Definition: wt_helper.hpp:460
std::pair< bool, value_type > symbol_lte(value_type c) const
Return symbol c or the next smaller symbol in the wt.
Definition: wt_helper.hpp:712
uint64_t size() const
Return the number of nodes in the tree.
Definition: wt_helper.hpp:667
std::vector< node_type > m_c_to_leaf
Definition: wt_helper.hpp:472
_int_tree & operator=(_int_tree &&bt)=default
uint64_t serialize(std::ostream &out, structure_tree_node *v=nullptr, std::string name="") const
Serializes the data structure into the given ostream.
Definition: wt_helper.hpp:596
bool is_valid(node_type v) const
Return if the node is a valid node.
Definition: wt_helper.hpp:698
bool operator==(_node const &other) const noexcept
Equality operator.
Definition: wt_helper.hpp:166
_node & operator=(const _node &v)
Definition: wt_helper.hpp:101
void CEREAL_SAVE_FUNCTION_NAME(archive_t &ar) const
Definition: wt_helper.hpp:146
_node(const _node &)=default
uint64_t size_type
Definition: wt_helper.hpp:80
uint64_t bv_pos_rank
Definition: wt_helper.hpp:82
uint64_t bv_pos
Definition: wt_helper.hpp:81
node_type child[2]
Definition: wt_helper.hpp:84
bool operator!=(_node const &other) const noexcept
Inequality operator.
Definition: wt_helper.hpp:173
node_type parent
Definition: wt_helper.hpp:83
_node(uint64_t bv_pos=0, uint64_t bv_pos_rank=0, node_type parent=t_tree_strat_fat::undef, node_type child_left=t_tree_strat_fat::undef, node_type child_right=t_tree_strat_fat::undef)
Definition: wt_helper.hpp:86
void CEREAL_LOAD_FUNCTION_NAME(archive_t &ar)
Definition: wt_helper.hpp:156
typename t_tree_strat_fat::node_type node_type
Definition: wt_helper.hpp:79
size_type serialize(std::ostream &out, structure_tree_node *v=nullptr, std::string name="") const
Definition: wt_helper.hpp:124
_node & operator=(const pc_node &v)
Definition: wt_helper.hpp:114
void load(std::istream &in)
Definition: wt_helper.hpp:137
uint64_t freq
Definition: wt_helper.hpp:59
uint64_t parent
Definition: wt_helper.hpp:61
uint64_t child[2]
Definition: wt_helper.hpp:62
uint64_t sym
Definition: wt_helper.hpp:60
pc_node(uint64_t freq=0, uint64_t sym=0, uint64_t parent=undef, uint64_t child_left=undef, uint64_t child_right=undef)
Definition: wt_helper.hpp:792