SDSL 3.0.3
Succinct Data Structure Library
Loading...
Searching...
No Matches
lcp_support_tree2.hpp
Go to the documentation of this file.
1// Copyright (c) 2016, the SDSL Project Authors. All rights reserved.
2// Please see the AUTHORS file for details. Use of this source code is governed
3// by a BSD license that can be found in the LICENSE file.
4#ifndef INCLUDED_SDSL_SUPPORT_TREE2
5#define INCLUDED_SDSL_SUPPORT_TREE2
6
7#include <iostream>
8#include <stdint.h>
9#include <string>
10
11#include <sdsl/bits.hpp>
12#include <sdsl/cereal.hpp>
13#include <sdsl/config.hpp>
15#include <sdsl/int_vector.hpp>
17#include <sdsl/io.hpp>
18#include <sdsl/iterators.hpp>
19#include <sdsl/ram_fs.hpp>
23#include <sdsl/sfstream.hpp>
26#include <sdsl/util.hpp>
27#include <sdsl/wt_huff.hpp>
28
29namespace sdsl
30{
31
32// Forward declaration of helper method
33template <uint32_t t_dens, uint8_t t_bwt_width>
34void construct_first_child_and_lf_lcp(int_vector_buffer<> &,
35 int_vector_buffer<t_bwt_width> &,
36 std::string const &,
37 std::string const &,
38 int_vector<> &);
39
49template <uint32_t t_dens, class t_cst>
51{
52public:
59 typedef const pointer const_pointer;
62 typedef t_cst cst_type;
64
66
67 enum
68 {
71 sa_order = 0
72 };
73
74 template <class CST>
75 struct type
76 {
78 };
79
80private:
81 cst_type const * m_cst;
82 small_lcp_type m_small_lcp; // vector for lcp values < 254
83 int_vector<> m_big_lcp; // vector for lcp values >= 254
84
85public:
89
95
97
100 _lcp_support_tree2(cache_config & config, cst_type const * cst = nullptr)
101 {
102 m_cst = cst;
103
105 std::string bwt_file = cache_file_name(key_bwt<t_cst::csa_type::alphabet_type::int_width>(), config);
107
108 std::string sml_lcp_file = tmp_file(config, "_fc_lf_lcp_sml");
109 std::string big_lcp_file = tmp_file(config, "_fc_lf_lcp_big");
110
111 construct_first_child_and_lf_lcp<t_dens>(lcp_buf, bwt_buf, sml_lcp_file, big_lcp_file, m_big_lcp);
112 int_vector_buffer<8> sml_lcp_buf(sml_lcp_file);
113
114 m_small_lcp = small_lcp_type(sml_lcp_buf.begin(), sml_lcp_buf.end(), config.dir);
115 sml_lcp_buf.close(true);
116 sdsl::remove(big_lcp_file);
117 }
118
119 void set_cst(cst_type const * cst)
120 {
121 m_cst = cst;
122 }
123
125 {
126 return m_cst->size();
127 }
128
130 {
131 return int_vector<>::max_size();
132 }
133
135 {
136 return m_small_lcp.empty();
137 }
138
141 {
142 return const_iterator(this, 0);
143 }
144
147 {
148 return const_iterator(this, size());
149 }
150
152
157 {
158 size_type idx, offset = 0;
159 uint8_t val;
160 start:
161 idx = m_cst->tlcp_idx(i);
162 val = m_small_lcp[idx];
163 if (val < 254)
164 {
165 return val; // - offset;
166 }
167 else if (val == 254)
168 { // if lcp value is >= 254 and position i is reducible
169 i = m_cst->csa.lf[i]; // i = LF[i] // (*m_psi)(i);
170 ++offset; // goto lcp value, which is one bigger
171 goto start;
172 }
173 else
174 { // if lcp value is >= 254 and (not reducable or sampled)
175 return m_big_lcp[m_small_lcp.rank(idx, 255)] - offset;
176 }
177 }
178
180 size_type serialize(std::ostream & out, structure_tree_node * v = nullptr, std::string name = "") const
181 {
182 structure_tree_node * child = structure_tree::add_child(v, name, util::class_name(*this));
183 size_type written_bytes = 0;
184 written_bytes += m_small_lcp.serialize(out, child, "small_lcp");
185 written_bytes += m_big_lcp.serialize(out, child, "large_lcp");
186 structure_tree::add_size(child, written_bytes);
187 return written_bytes;
188 }
189
191 void load(std::istream & in, t_cst const * cst = nullptr)
192 {
193 m_small_lcp.load(in);
194 m_big_lcp.load(in);
195 m_cst = cst;
196 }
197
198 template <typename archive_t>
199 void CEREAL_SAVE_FUNCTION_NAME(archive_t & ar) const
200 {
201 ar(CEREAL_NVP(m_small_lcp));
202 ar(CEREAL_NVP(m_big_lcp));
203 }
204
205 template <typename archive_t>
206 void CEREAL_LOAD_FUNCTION_NAME(archive_t & ar)
207 {
208 ar(CEREAL_NVP(m_small_lcp));
209 ar(CEREAL_NVP(m_big_lcp));
210 }
211
213 bool operator==(_lcp_support_tree2 const & other) const noexcept
214 {
215 return (m_small_lcp == other.m_small_lcp) && (m_big_lcp == other.m_big_lcp);
216 }
217
219 bool operator!=(_lcp_support_tree2 const & other) const noexcept
220 {
221 return !(*this == other);
222 }
223};
224
226template <uint32_t t_dens = 16>
228{
229 template <class t_cst>
231};
232
238template <uint32_t t_dens, uint8_t t_bwt_width>
241 std::string const & small_lcp_file,
242 std::string const & big_lcp_file,
243 int_vector<> & big_lcp)
244{
245 typedef int_vector<>::size_type size_type;
246 const size_type M = 255; // limit for values represented in the small LCP part
247 size_type buf_len = 1000000;
248 lcp_buf.buffersize(buf_len);
249 bwt_buf.buffersize(buf_len);
250 size_type n = lcp_buf.size();
251
252 int_vector_buffer<8> sml_lcp_out(small_lcp_file, std::ios::out);
253
254 osfstream big_lcp_out(big_lcp_file, std::ios::out | std::ios::trunc | std::ios::binary);
255
256 size_type fc_cnt_big = 0; // number of lcp values at the first child which are big and not reducible
257 sorted_multi_stack_support vec_stack(n); // occupies 2n bits
258 bit_vector is_big_and_not_reducable(n, 0); // initialized with 0s
259 bool is_one_big_and_not_reducable = false; // all positions have to be reducible
260
261 size_type y, max_lcp = 0;
262 uint64_t last_bwti = 0, val;
263 for (size_type i = 0, x; i < n; ++i)
264 {
265 x = lcp_buf[i];
266 is_one_big_and_not_reducable = false;
267
268 while (!vec_stack.empty() and x < vec_stack.top())
269 {
270 y = vec_stack.top();
271 is_one_big_and_not_reducable |= is_big_and_not_reducable[vec_stack.size() - 1];
272 if (vec_stack.pop())
273 { // if y was the last copy of y on the stack
274 if (y > M - 2)
275 {
276 if (is_one_big_and_not_reducable)
277 {
278 val = M;
279 big_lcp_out.write((char *)&y, sizeof(y));
280 ++fc_cnt_big;
281 if (y > max_lcp)
282 max_lcp = y;
283 }
284 else
285 {
286 val = M - 1;
287 }
288 }
289 else
290 {
291 val = y;
292 }
293 sml_lcp_out.push_back(val);
294 is_one_big_and_not_reducable = false;
295 }
296 }
297 if (x > M - 2 and (0 == i or last_bwti != bwt_buf[i] or x % t_dens == 0))
298 {
299 is_big_and_not_reducable[vec_stack.size()] = 1;
300 }
301 else
302 {
303 is_big_and_not_reducable[vec_stack.size()] = 0;
304 }
305 vec_stack.push(x);
306 last_bwti = bwt_buf[i];
307 }
308
309 while (!vec_stack.empty())
310 {
311 y = vec_stack.top();
312 if (vec_stack.pop())
313 {
314 if (y > M - 2)
315 {
316 if (is_big_and_not_reducable[vec_stack.size()])
317 {
318 val = M;
319 big_lcp_out.write((char *)&y, sizeof(y));
320 ++fc_cnt_big;
321 if (y > max_lcp)
322 max_lcp = y;
323 }
324 else
325 {
326 val = M - 1;
327 }
328 }
329 else
330 {
331 val = y;
332 }
333 sml_lcp_out.push_back(val);
334 }
335 }
336
337 big_lcp_out.close();
338 isfstream big_lcp_in(big_lcp_file, std::ios::in | std::ios::binary);
339 big_lcp.width(bits::hi(max_lcp) + 1);
340 big_lcp.resize(fc_cnt_big);
341
342 for (size_type i = 0; i < fc_cnt_big; ++i)
343 {
344 big_lcp_in.read((char *)&y, sizeof(y));
345 big_lcp[i] = y;
346 }
347}
348
349} // namespace sdsl
350#endif
bits.hpp contains the sdsl::bits class.
cereal.hpp offers cereal support
#define CEREAL_NVP(X)
Definition cereal.hpp:31
An lcp array class for cst_sct3 and cst_sada.
_lcp_support_tree2(_lcp_support_tree2 const &)=default
Copy / Move constructor.
const_iterator end() const
Returns a const_iterator to the element after the last element.
void load(std::istream &in, t_cst const *cst=nullptr)
Load from a stream.
void set_cst(cst_type const *cst)
_lcp_support_tree2()
Default constructor.
random_access_const_iterator< _lcp_support_tree2 > const_iterator
_lcp_support_tree2(cache_config &config, cst_type const *cst=nullptr)
Constructor.
int_vector ::size_type size_type
void CEREAL_SAVE_FUNCTION_NAME(archive_t &ar) const
_lcp_support_tree2 & operator=(_lcp_support_tree2 &&)=default
lcp_tree_and_lf_compressed_tag lcp_category
bool operator!=(_lcp_support_tree2 const &other) const noexcept
Inequality operator.
wt_huff< bit_vector, rank_support_v5<>, select_support_scan< 1 >, select_support_scan< 0 > > small_lcp_type
_lcp_support_tree2(_lcp_support_tree2 &&)=default
int_vector ::value_type value_type
value_type operator[](size_type i) const
[]-operator
void CEREAL_LOAD_FUNCTION_NAME(archive_t &ar)
_lcp_support_tree2 & operator=(_lcp_support_tree2 const &)=default
bool operator==(_lcp_support_tree2 const &other) const noexcept
Equality operator.
const_iterator begin() const
Returns a const_iterator to the first element.
size_type serialize(std::ostream &out, structure_tree_node *v=nullptr, std::string name="") const
Serialize to a stream.
int_vector ::difference_type difference_type
uint64_t size() const
Returns the number of elements currently stored.
void close(bool remove_file=false)
Close the int_vector_buffer.
uint64_t buffersize() const
Returns the buffersize in bytes.
void push_back(const uint64_t value)
Appends the given element value to the end of the int_vector_buffer.
void load(std::istream &in)
Load the int_vector for a stream.
uint8_t width() const noexcept
Returns the width of the integers which are accessed via the [] operator.
size_type serialize(std::ostream &out, structure_tree_node *v=nullptr, std::string name="") const
Serializes the int_vector to a stream.
static size_type max_size() noexcept
Maximum size of the int_vector.
void resize(const size_type size)
Resize the int_vector in terms of elements.
void close()
Close the stream.
Definition sfstream.hpp:91
Generic iterator for a random access container.
Definition iterators.hpp:24
A class supporting linear time select queries.
Stack which contains elements from [0..n] in sorted order. Duplicates are possible.
size_type size() const
Returns the number of element is the stack.
bool empty() const
Returns if the stack is empty.
size_type top() const
Returns the topmost index on the stack.
bool pop()
Pop the topmost index of the stack.
bool push(size_type x)
Push the index x of vector vec onto the stack.
static structure_tree_node * add_child(structure_tree_node *v, std::string const &name, std::string const &type)
static void add_size(structure_tree_node *v, uint64_t value)
A prefix code-shaped wavelet.
Definition wt_pc.hpp:60
size_type rank(size_type i, value_type c) const
Calculates how many symbols c are in the prefix [0..i-1].
Definition wt_pc.hpp:371
void load(std::istream &in)
Loads the data structure from the given istream.
Definition wt_pc.hpp:729
size_type serialize(std::ostream &out, structure_tree_node *v=nullptr, std::string name="") const
Serializes the data structure into the given ostream.
Definition wt_pc.hpp:713
bool empty() const
Returns whether the wavelet tree contains no data.
Definition wt_pc.hpp:320
csa_alphabet_strategy.hpp includes different strategy classes for representing an alphabet of a CSA.
int_vector.hpp contains the sdsl::int_vector class.
int_vector_buffer.hpp contains the sdsl::int_vector_buffer class.
io.hpp contains some methods for reading/writing sdsl structures.
iterators.hpp contains an generic iterator for random access containers.
constexpr char KEY_LCP[]
Definition config.hpp:43
Namespace for the succinct data structure library.
int remove(std::string const &)
Remove a file.
Definition ram_fs.hpp:221
std::string tmp_file(cache_config const &config, std::string name_part="")
Returns a name for a temporary file. I.e. the name was not used before.
Definition io.hpp:759
std::string cache_file_name(std::string const &key, cache_config const &config)
Returns the file name of the resource.
Definition io.hpp:688
void construct_first_child_and_lf_lcp(int_vector_buffer<> &, int_vector_buffer< t_bwt_width > &, std::string const &, std::string const &, int_vector<> &)
ram_fs.hpp
rank_support_v5.hpp contains rank_support_v5.5
Contains declarations and definitions of data structure concepts.
select_support_scan.hpp contains classes that support a sdsl::bit_vector with linear time select.
sfstream.hpp contains a two stream class which can be used to read/write from/to files or strings.
sorted_multi_stack_support.hpp contains a data structure for a stack which contains elements from [0....
static constexpr uint32_t hi(uint64_t x)
Position of the most significant set bit the 64-bit word x.
Definition bits.hpp:653
Helper class for construction process.
Definition config.hpp:66
std::string dir
Definition config.hpp:70
Helper class which provides _lcp_support_tree2 the context of a CST.
structure_tree.hpp contains a helper class which can represent the memory structure of a class.
util.hpp contains some helper methods for int_vector and other stuff like demangle class names.
wt_huff.hpp contains a class for a Huffman shaped wavelet tree over byte sequences.