SDSL 3.0.1
Succinct Data Structure Library
Loading...
Searching...
No Matches
lcp_wt.hpp
Go to the documentation of this file.
1// Copyright (c) 2016, the SDSL Project Authors. All rights reserved.
2// Please see the AUTHORS file for details. Use of this source code is governed
3// by a BSD license that can be found in the LICENSE file.
8#ifndef INCLUDED_SDSL_LCP_WT
9#define INCLUDED_SDSL_LCP_WT
10
11#include <algorithm> // for lower_bound
12#include <cassert>
13#include <cstring> // for strlen
14#include <iomanip>
15#include <iostream>
16#include <iterator>
17#include <stdexcept>
18#include <utility> // for pair
19#include <vector>
20
21#include <sdsl/int_vector.hpp>
22#include <sdsl/iterators.hpp>
23#include <sdsl/util.hpp>
24#include <sdsl/wt_huff.hpp>
25
26namespace sdsl
27{
28
30
37template <uint8_t t_width = 0>
38class lcp_wt
39{
40 public:
47 typedef const pointer const_pointer;
49 typedef ptrdiff_t difference_type;
51
54
55 enum
56 {
59 sa_order = 1
60 }; // as the lcp_wt is not fast for texts with long repetition
61
62 template <class Cst>
63 using type = lcp_wt;
64
65 private:
66 small_lcp_type m_small_lcp; // vector for lcp values < 255
67 int_vector<t_width> m_big_lcp; // vector for lcp values > 254
68
69 typedef std::pair<size_type, size_type> tPII;
70 typedef std::vector<tPII> tVPII;
71
72 public:
74 lcp_wt() = default;
76 lcp_wt(const lcp_wt &) = default;
77 lcp_wt(lcp_wt &&) = default;
78 lcp_wt & operator=(const lcp_wt &) = default;
79 lcp_wt & operator=(lcp_wt &&) = default;
80
82 lcp_wt(cache_config & config, std::string other_key = "")
83 {
84 std::string temp_file = tmp_file(config, "_lcp_sml");
85 std::string lcp_key = conf::KEY_LCP;
86 if ("" != other_key) { lcp_key = other_key; }
87 int_vector_buffer<> lcp_buf(cache_file_name(lcp_key, config));
88 size_type l = 0, max_l = 0, big_sum = 0, n = lcp_buf.size();
89 {
90 int_vector<8> small_lcp = int_vector<8>(n);
91 for (size_type i = 0; i < n; ++i)
92 {
93 if ((l = lcp_buf[i]) < 255) { small_lcp[i] = l; }
94 else
95 {
96 small_lcp[i] = 255;
97 if (l > max_l) max_l = l;
98 ++big_sum;
99 }
100 }
101 store_to_file(small_lcp, temp_file);
102 }
103 {
104 int_vector_buffer<8> lcp_sml_buf(temp_file);
105 m_small_lcp = small_lcp_type(lcp_sml_buf.begin(), lcp_sml_buf.end(), config.dir);
106 }
107 sdsl::remove(temp_file);
108 m_big_lcp = int_vector<>(big_sum, 0, bits::hi(max_l) + 1);
109 {
110 for (size_type i = 0, ii = 0; i < n; ++i)
111 {
112 if (lcp_buf[i] >= 255) { m_big_lcp[ii++] = lcp_buf[i]; }
113 }
114 }
115 }
116
118 size_type size() const { return m_small_lcp.size(); }
119
122
124 bool empty() const { return 0 == m_small_lcp.size(); }
125
127 const_iterator begin() const { return const_iterator(this, 0); }
128
130 const_iterator end() const { return const_iterator(this, size()); }
131
133
136 {
137 if (m_small_lcp[i] != 255) { return m_small_lcp[i]; }
138 else
139 {
140 return m_big_lcp[m_small_lcp.rank(i, 255)];
141 }
142 }
143
145 size_type serialize(std::ostream & out, structure_tree_node * v = nullptr, std::string name = "") const
146 {
147 structure_tree_node * child = structure_tree::add_child(v, name, util::class_name(*this));
148 size_type written_bytes = 0;
149 written_bytes += m_small_lcp.serialize(out, child, "small_lcp");
150 written_bytes += m_big_lcp.serialize(out, child, "large_lcp");
151 structure_tree::add_size(child, written_bytes);
152 return written_bytes;
153 }
154
156 void load(std::istream & in)
157 {
158 m_small_lcp.load(in);
159 m_big_lcp.load(in);
160 }
161
162 template <typename archive_t>
163 void CEREAL_SAVE_FUNCTION_NAME(archive_t & ar) const
164 {
165 ar(CEREAL_NVP(m_small_lcp));
166 ar(CEREAL_NVP(m_big_lcp));
167 }
168
169 template <typename archive_t>
170 void CEREAL_LOAD_FUNCTION_NAME(archive_t & ar)
171 {
172 ar(CEREAL_NVP(m_small_lcp));
173 ar(CEREAL_NVP(m_big_lcp));
174 }
175
177 bool operator==(lcp_wt const & other) const noexcept
178 {
179 return (m_small_lcp == other.m_small_lcp) && (m_big_lcp == other.m_big_lcp);
180 }
181
183 bool operator!=(lcp_wt const & other) const noexcept { return !(*this == other); }
184};
185
186} // end namespace sdsl
187#endif
#define CEREAL_NVP(X)
Definition: cereal.hpp:31
uint64_t size() const
Returns the number of elements currently stored.
A generic vector class for integers of width .
Definition: int_vector.hpp:216
void load(std::istream &in)
Load the int_vector for a stream.
int_vector_trait< t_width >::value_type value_type
Definition: int_vector.hpp:221
size_type serialize(std::ostream &out, structure_tree_node *v=nullptr, std::string name="") const
Serializes the int_vector to a stream.
static size_type max_size() noexcept
Maximum size of the int_vector.
Definition: int_vector.hpp:542
A class for the compressed version of lcp information of an suffix array.
Definition: lcp_wt.hpp:39
random_access_const_iterator< lcp_wt > const_iterator
Definition: lcp_wt.hpp:42
void CEREAL_SAVE_FUNCTION_NAME(archive_t &ar) const
Definition: lcp_wt.hpp:163
lcp_plain_tag lcp_category
Definition: lcp_wt.hpp:52
bool operator!=(lcp_wt const &other) const noexcept
Inequality operator.
Definition: lcp_wt.hpp:183
const value_type const_reference
Definition: lcp_wt.hpp:44
const_iterator iterator
Definition: lcp_wt.hpp:43
static size_type max_size()
Returns the largest size that lcp_wt can ever have.
Definition: lcp_wt.hpp:121
lcp_wt()=default
Default Constructor.
int_vector< t_width >::value_type value_type
Definition: lcp_wt.hpp:41
lcp_wt & operator=(lcp_wt &&)=default
lcp_tag index_category
Definition: lcp_wt.hpp:53
lcp_wt & operator=(const lcp_wt &)=default
int_vector ::size_type size_type
Definition: lcp_wt.hpp:48
void load(std::istream &in)
Load from a stream.
Definition: lcp_wt.hpp:156
ptrdiff_t difference_type
Definition: lcp_wt.hpp:49
const pointer const_pointer
Definition: lcp_wt.hpp:47
value_type operator[](size_type i) const
[]-operator
Definition: lcp_wt.hpp:135
bool operator==(lcp_wt const &other) const noexcept
Equality operator.
Definition: lcp_wt.hpp:177
bool empty() const
Returns if the data structure is empty.
Definition: lcp_wt.hpp:124
const_iterator begin() const
Returns a const_iterator to the first element.
Definition: lcp_wt.hpp:127
size_type size() const
Number of elements in the instance.
Definition: lcp_wt.hpp:118
const_reference * pointer
Definition: lcp_wt.hpp:46
const_reference reference
Definition: lcp_wt.hpp:45
lcp_wt(cache_config &config, std::string other_key="")
Constructor.
Definition: lcp_wt.hpp:82
size_type serialize(std::ostream &out, structure_tree_node *v=nullptr, std::string name="") const
Serialize to a stream.
Definition: lcp_wt.hpp:145
lcp_wt(const lcp_wt &)=default
Copy / Move constructor.
const_iterator end() const
Returns a const_iterator to the element after the last element.
Definition: lcp_wt.hpp:130
wt_huff< bit_vector, rank_support_v<>, select_support_scan< 1 >, select_support_scan< 0 > > small_lcp_type
Definition: lcp_wt.hpp:50
void CEREAL_LOAD_FUNCTION_NAME(archive_t &ar)
Definition: lcp_wt.hpp:170
lcp_wt(lcp_wt &&)=default
Generic iterator for a random access container.
Definition: iterators.hpp:24
A class supporting linear time select queries.
static structure_tree_node * add_child(structure_tree_node *v, const std::string &name, const std::string &type)
static void add_size(structure_tree_node *v, uint64_t value)
A prefix code-shaped wavelet.
Definition: wt_pc.hpp:44
size_type rank(size_type i, value_type c) const
Calculates how many symbols c are in the prefix [0..i-1].
Definition: wt_pc.hpp:335
void load(std::istream &in)
Loads the data structure from the given istream.
Definition: wt_pc.hpp:676
size_type size() const
Returns the size of the original vector.
Definition: wt_pc.hpp:284
size_type serialize(std::ostream &out, structure_tree_node *v=nullptr, std::string name="") const
Serializes the data structure into the given ostream.
Definition: wt_pc.hpp:660
int_vector.hpp contains the sdsl::int_vector class.
iterators.hpp contains an generic iterator for random access containers.
constexpr char KEY_LCP[]
Definition: config.hpp:44
Namespace for the succinct data structure library.
std::string cache_file_name(const std::string &key, const cache_config &config)
Returns the file name of the resource.
Definition: io.hpp:630
std::string tmp_file(const cache_config &config, std::string name_part="")
Returns a name for a temporary file. I.e. the name was not used before.
Definition: io.hpp:698
bool store_to_file(const T &v, const std::string &file)
Store a data structure to a file.
Definition: io.hpp:798
int remove(const std::string &)
Remove a file.
Definition: ram_fs.hpp:194
static constexpr uint32_t hi(uint64_t x)
Position of the most significant set bit the 64-bit word x.
Definition: bits.hpp:651
Helper class for construction process.
Definition: config.hpp:67
std::string dir
Definition: config.hpp:71
util.hpp contains some helper methods for int_vector and other stuff like demangle class names.
wt_huff.hpp contains a class for a Huffman shaped wavelet tree over byte sequences.