10#ifndef INCLUDED_SDSL_INT_WAVELET_TREE
11#define INCLUDED_SDSL_INT_WAVELET_TREE
45 class t_rank =
typename t_bitvector::rank_1_type,
46 class t_select =
typename t_bitvector::select_1_type,
47 class t_select_zero =
typename t_bitvector::select_0_type>
85 std::vector<value_type> & cs,
86 std::vector<size_type> & rank_c_i,
87 std::vector<size_type> & rank_c_j,
109 if ((j - i) - (ones_before_j - ones_before_i) > 0)
112 size_type new_node_size = node_size - ones_before_end;
115 _interval_symbols(new_i, new_j, k, cs, rank_c_i, rank_c_j, level + 1,
path << 1, new_node_size, new_offset);
119 if ((ones_before_j - ones_before_i) > 0)
121 size_type new_offset = offset + (node_size - ones_before_end) +
m_size;
122 size_type new_node_size = ones_before_end;
125 _interval_symbols(new_i,
155 template <
typename t_it>
163 for (
auto it =
begin; it !=
end; ++it)
166 if (value > max_elem) max_elem = value;
169 std::string tree_out_buf_file_name =
tmp_file(tmp_dir,
"_m_tree");
176 osfstream tree_out_buf(tree_out_buf_file_name, std::ios::binary | std::ios::trunc | std::ios::out);
182 uint64_t tree_word = 0;
188 const uint64_t mask_new = 1ULL << (
m_max_level - k - 1);
193 uint64_t start_value = (rac[i] & mask_old);
195 while (i <
m_size and ((x = rac[i]) & mask_old) == start_value)
199 tree_word |= (1ULL << (tree_pos & 0x3FULL));
204 rac[start + cnt0++] = x;
207 if ((tree_pos & 0x3FULL) == 0)
209 tree_out_buf.write((
char *)&tree_word,
sizeof(tree_word));
216 for (
size_type j = 0; j < cnt1; ++j) { rac[start + cnt0 + j] = buf1[j]; }
220 m_sigma += (cnt0 > 0) + (cnt1 > 0);
222 start += cnt0 + cnt1;
224 mask_old += mask_new;
226 if ((tree_pos & 0x3FULL) != 0)
228 tree_out_buf.write((
char *)&tree_word,
sizeof(tree_word));
231 tree_out_buf.
close();
279 *
this = std::move(tmp);
291 m_tree = std::move(wt.m_tree);
329 offset += (node_size - ones_before_end);
330 node_size = ones_before_end;
336 node_size = (node_size - ones_before_end);
337 i = (i - ones_before_i);
371 offset += (node_size - ones_before_end);
372 node_size = ones_before_end;
377 node_size = (node_size - ones_before_end);
378 i = (i - ones_before_i);
407 offset += (node_size - ones_before_end);
408 node_size = ones_before_end;
414 node_size = (node_size - ones_before_end);
415 i = (i - ones_before_i);
419 return std::make_pair(i, c);
433 assert(1 <= i and i <=
rank(
size(), c));
440 m_path_off[0] = m_path_rank_off[0] = 0;
442 for (uint32_t k = 0; k <
m_max_level and node_size; ++k)
445 m_path_rank_off[k] = ones_before_o;
449 offset += (node_size - ones_before_end);
450 node_size = ones_before_end;
454 node_size = (node_size - ones_before_end);
457 m_path_off[k + 1] = offset;
460 if (0ULL == node_size or node_size < i)
463 "): c does not occur i times in the WT");
469 offset = m_path_off[k - 1];
470 size_type ones_before_o = m_path_rank_off[k - 1];
508 std::vector<value_type> & cs,
509 std::vector<size_type> & rank_c_i,
510 std::vector<size_type> & rank_c_j)
const
512 assert(i <= j and j <=
size());
514 if (i == j) {
return; }
519 rank_c_i[0] = res.first;
520 rank_c_j[0] = res.first + 1;
525 _interval_symbols(i, j, k, cs, rank_c_i, rank_c_j, 0, 0,
m_size, 0);
541 template <
class t_ret_type = std::tuple<
size_type,
size_type,
size_type>>
544 assert(i <= j and j <=
size());
547 return t_ret_type{ 0, j - i, 0 };
562 offset += (node_size - ones_before_end);
563 node_size = ones_before_end;
564 smaller += j - i - ones_before_j + ones_before_i;
570 node_size -= ones_before_end;
571 greater += ones_before_j - ones_before_i;
578 return t_ret_type{ i, smaller, greater };
591 template <
class t_ret_type = std::tuple<
size_type,
size_type>>
597 return t_ret_type{ 0, i };
610 offset += (node_size - ones_before_end);
611 node_size = ones_before_end;
612 result += i - ones_before_i;
617 node_size = (node_size - ones_before_end);
623 return t_ret_type{ i, result };
639 bool report =
true)
const
642 std::vector<size_type> ones_before_os(
m_max_level + 1);
648 _range_search_2d(lb, rb, vlb, vrb, 0, 0,
m_size, offsets, ones_before_os, 0, point_vec, report, cnt_answers);
649 return make_pair(cnt_answers, point_vec);
659 std::vector<size_type> & offsets,
660 std::vector<size_type> & ones_before_os,
671 for (
size_type j = lb + 1; j <= rb + 1; ++j)
678 size_type ones_before_o = ones_before_os[k - 1];
679 if (c & 1) { i =
m_tree_select1(ones_before_o + i) - offset + 1; }
686 point_vec.emplace_back(i - 1,
path);
689 cnt_answers += rb - lb + 1;
698 ones_before_os[level] = ones_before_o;
702 size_type zeros_before_o = offset - ones_before_o;
703 size_type zeros_before_lb = offset + lb - ones_before_lb;
704 size_type zeros_before_rb = offset + rb + 1 - ones_before_rb;
705 size_type zeros_before_end = offset + node_size - ones_before_end;
706 if (vlb < mid and mid)
708 size_type nlb = zeros_before_lb - zeros_before_o;
709 size_type nrb = zeros_before_rb - zeros_before_o;
710 offsets[level + 1] = offset +
m_size;
715 std::min(vrb, mid - 1),
718 zeros_before_end - zeros_before_o,
728 size_type nlb = ones_before_lb - ones_before_o;
729 size_type nrb = ones_before_rb - ones_before_o;
730 offsets[level + 1] = offset +
m_size + (zeros_before_end - zeros_before_o);
738 ones_before_end - ones_before_o,
761 written_bytes +=
m_tree.serialize(out, child,
"tree");
762 written_bytes +=
m_tree_rank.serialize(out, child,
"tree_rank");
763 written_bytes +=
m_tree_select1.serialize(out, child,
"tree_select_1");
764 written_bytes +=
m_tree_select0.serialize(out, child,
"tree_select_0");
767 return written_bytes;
782 template <
typename archive_t>
794 template <
typename archive_t>
812 return (
m_size == other.m_size) && (
m_sigma == other.m_sigma) && (
m_tree == other.m_tree) &&
880 bool bit = *(
begin(vv) + i);
881 i = std::get<1>(rs[bit]);
906 return expand(std::move(v_right));
921 v_left.
size = v.size - ones;
922 v_left.
level = v.level + 1;
923 v_left.
sym = v.sym << 1;
927 v.level = v.level + 1;
928 v.sym = (v.sym << 1) | 1;
930 return { { std::move(v_left), v } };
945 auto ranges_copy = ranges;
946 return expand(v, std::move(ranges_copy));
964 for (
auto & r : ranges)
968 auto left_size = (r[1] - r[0] + 1) - right_size;
970 auto right_sp = sp_rank - v_sp_rank;
971 auto left_sp = r[0] - right_sp;
973 r = { { left_sp, left_sp + left_size - 1 } };
974 res[i++] = { { right_sp, right_sp + right_size - 1 } };
976 return { { ranges, std::move(res) } };
994 auto left_size = (r[1] - r[0] + 1) - right_size;
996 auto right_sp = sp_rank - v_sp_rank;
997 auto left_sp = r[0] - right_sp;
999 return { { { { left_sp, left_sp + left_size - 1 } }, { { right_sp, right_sp + right_size - 1 } } } };
1007 auto begin(
const node_type & v)
const ->
decltype(
m_tree.begin() + v.offset) {
return m_tree.begin() + v.offset; }
1010 auto end(
const node_type & v)
const ->
decltype(
m_tree.begin() + v.offset + v.size)
1012 return m_tree.begin() + v.offset + v.size;
void close(bool remove_file=false)
Close the int_vector_buffer.
A generic vector class for integers of width .
static uint64_t write_header(uint64_t size, uint8_t int_width, std::ostream &out)
Write the size and int_width of a int_vector.
iterator begin() noexcept
Iterator that points to the first element of the int_vector.
void close()
Close the stream.
Generic iterator for a random access container.
static structure_tree_node * add_child(structure_tree_node *v, const std::string &name, const std::string &type)
static void add_size(structure_tree_node *v, uint64_t value)
A wavelet tree class for integer sequences.
value_type operator[](size_type i) const
Recovers the i-th symbol of the original vector.
bool operator!=(wt_int const &other) const noexcept
Inequality operator.
bool operator==(wt_int const &other) const noexcept
Equality operator.
std::array< range_type, 2 > expand(const node_type &v, const range_type &r) const
Returns for a range its left and right child ranges.
std::array< range_vec_type, 2 > expand(const node_type &v, const range_vec_type &ranges) const
Returns for each range its left and right child ranges.
select_0_type m_tree_select0
std::array< node_type, 2 > expand(node_type &&v) const
Returns the two child nodes of an inner node.
auto bit_vec(const node_type &v) const -> node_bv_container< t_bitvector >
Random access container to bitvector of node v.
node_type root() const
Return the root node.
wt_int & operator=(wt_int &&wt)
Assignment move operator.
const bit_vector_type & tree
A concatenation of all bit vectors of the wavelet tree.
void CEREAL_SAVE_FUNCTION_NAME(archive_t &ar) const
size_type serialize(std::ostream &out, structure_tree_node *v=nullptr, std::string name="") const
Serializes the data structure into the given ostream.
size_type size() const
Returns the size of the original vector.
std::pair< size_type, point_vec_type > r2d_res_type
const size_type & sigma
Effective alphabet size of the wavelet tree.
t_select_zero select_0_type
std::pair< value_type, size_type > point_type
wt_int()=default
Default constructor.
value_type sym(const node_type &v) const
Returns the symbol of leaf node v.
int_vector ::value_type value_type
wt_int & operator=(const wt_int &wt)
Assignment operator.
const uint32_t & max_level
Maximal level of the wavelet tree.
std::pair< size_type, std::vector< std::pair< value_type, size_type > > > range_search_2d(size_type lb, size_type rb, value_type vlb, value_type vrb, bool report=true) const
range_search_2d searches points in the index interval [lb..rb] and value interval [vlb....
t_bitvector bit_vector_type
void interval_symbols(size_type i, size_type j, size_type &k, std::vector< value_type > &cs, std::vector< size_type > &rank_c_i, std::vector< size_type > &rank_c_j) const
For each symbol c in wt[i..j-1] get rank(i,c) and rank(j,c).
wt_int(const wt_int &wt)
Copy constructor.
random_access_const_iterator< wt_int > const_iterator
wt_int(t_it begin, t_it end, std::string tmp_dir=ram_file_name(""))
Construct the wavelet tree from a sequence defined by two interators.
int_alphabet_tag alphabet_category
wt_int(wt_int &&wt)
Move constructor.
auto seq(const node_type &v) const -> random_access_container< std::function< value_type(size_type)> >
Random access container to sequence of node v.
std::vector< point_type > point_vec_type
size_type select(size_type i, value_type c) const
Calculates the i-th occurrence of the symbol c in the supported vector.
int_vector ::size_type size_type
std::array< range_vec_type, 2 > expand(const node_type &v, range_vec_type &&ranges) const
Returns for each range its left and right child ranges.
bool empty(const node_type &v) const
Indicates if node v is empty.
void _range_search_2d(size_type lb, size_type rb, value_type vlb, value_type vrb, size_type level, size_type ilb, size_type node_size, std::vector< size_type > &offsets, std::vector< size_type > &ones_before_os, size_type path, point_vec_type &point_vec, bool report, size_type &cnt_answers) const
void CEREAL_LOAD_FUNCTION_NAME(archive_t &ar)
size_type rank(size_type i, value_type c) const
Calculates how many symbols c are in the prefix [0..i-1] of the supported vector.
const_iterator end() const
Returns a const_iterator to the element after the last element.
std::pair< uint64_t, uint64_t > path(value_type c) const
return the path to the leaf for a given symbol
t_bitvector::difference_type difference_type
const_iterator begin() const
Returns a const_iterator to the first element.
std::array< node_type, 2 > expand(const node_type &v) const
Returns the two child nodes of an inner node.
std::pair< size_type, value_type > inverse_select(size_type i) const
Calculates how many occurrences of symbol wt[i] are in the prefix [0..i-1] of the original sequence.
bool is_leaf(const node_type &v) const
Checks if the node is a leaf node.
void load(std::istream &in)
Loads the data structure from the given istream.
auto size(const node_type &v) const -> decltype(v.size)
Return the size of node v.
t_ret_type lex_count(size_type i, size_type j, value_type c) const
How many symbols are lexicographic smaller/greater than c in [i..j-1].
t_ret_type lex_smaller_count(size_type i, value_type c) const
How many symbols are lexicographic smaller than c in [0..i-1].
select_1_type m_tree_select1
bool empty() const
Returns whether the wavelet tree contains no data.
int_vector.hpp contains the sdsl::int_vector class.
std::string to_string(const T &t, int w=1)
Namespace for the succinct data structure library.
std::string ram_file_name(const std::string &file)
Returns the corresponding RAM-file name for file.
size_t write_member(const T &t, std::ostream &out, sdsl::structure_tree_node *v=nullptr, std::string name="")
void read_member(T &t, std::istream &in)
std::string tmp_file(const cache_config &config, std::string name_part="")
Returns a name for a temporary file. I.e. the name was not used before.
int_vector< 1 > bit_vector
bit_vector is a specialization of the int_vector.
std::array< int_vector<>::size_type, 2 > range_type
bool load_from_file(T &v, const std::string &file)
Load sdsl-object v from a file.
std::vector< range_type > range_vec_type
int remove(const std::string &)
Remove a file.
rank_support_v.hpp contains rank_support_v.
Contains declarations and definitions of data structure concepts.
select_support_mcl.hpp contains classes that support a sdsl::bit_vector with constant time select inf...
static constexpr uint32_t hi(uint64_t x)
Position of the most significant set bit the 64-bit word x.
Represents a node in the wavelet tree.
bool operator>(const node_type &v) const
node_type(size_type o=0, size_type sz=0, size_type l=0, value_type sy=0)
node_type(node_type &&)=default
node_type & operator=(node_type &&)=default
node_type & operator=(const node_type &)=default
node_type(const node_type &)=default
bool operator==(const node_type &v) const
bool operator<(const node_type &v) const
util.hpp contains some helper methods for int_vector and other stuff like demangle class names.