SDSL 3.0.1
Succinct Data Structure Library
Loading...
Searching...
No Matches
wt_hutu.hpp
Go to the documentation of this file.
1// Copyright (c) 2016, the SDSL Project Authors. All rights reserved.
2// Please see the AUTHORS file for details. Use of this source code is governed
3// by a BSD license that can be found in the LICENSE file.
9#ifndef INCLUDED_SDSL_WT_HUTU
10#define INCLUDED_SDSL_WT_HUTU
11
12#include <vector>
13
14#include <sdsl/wt_pc.hpp>
15
17namespace sdsl
18{
19
20// forward declaration
21struct hutu_shape;
22
24
37template <class t_bitvector = bit_vector,
38 class t_rank = typename t_bitvector::rank_1_type,
39 class t_select = typename t_bitvector::select_1_type,
40 class t_select_zero = typename t_bitvector::select_0_type,
41 class t_tree_strat = byte_tree<>>
43
44// Hu Tucker shape for wt_pc
45template <class t_wt>
47{
48 typedef typename t_wt::size_type size_type;
49 enum
50 {
51 lex_ordered = 1
52 };
53
55 template <class t_element>
56 struct heap_node
57 {
58 t_element * item; // pointer to the represented item
59 heap_node *left, *right, *parent; // pointer to left/right child, parent
60 int64_t rank; // rank of the heap node
62 heap_node(t_element * it = nullptr)
63 : item(it)
64 , left(nullptr)
65 , right(nullptr)
66 , parent(nullptr)
67 , rank(0)
68 {}
70 bool operator<(const heap_node & other) { return *item < *(other.item); }
71 };
72
73 // Implementation of a leftist heap as needed in the first phase of
74 // Hu-Tucker Code construction
75 template <class t_element>
76 class l_heap
77 {
78 private:
79 heap_node<t_element> * m_root; // pointer to the root
80
81 // fixes node information after the deletion of elements
82 void fix_node(heap_node<t_element> * item)
83 {
84 if (item != nullptr)
85 {
86 if (!item->left || !item->right)
87 { // if node has only one child
88 // only go on fixing if the node information needs to be changed
89 if (item->rank != 0)
90 {
91 item->rank = 0;
92 if (item->parent) fix_node(item->parent);
93 }
94 }
95 else
96 { // node information has to be adapted
97 int64_t nn = (item->left->rank > item->right->rank) ? item->right->rank : item->left->rank;
98 if (item->rank != nn && item->parent != 0)
99 {
100 item->rank = nn;
101 fix_node(item->parent);
102 }
103 }
104 }
105 }
106
107 // helper function to remove the data structure from memory
108 void free_node(heap_node<t_element> * item)
109 {
110 if (item->left)
111 {
112 free_node(item->left);
113 delete item->left;
114 item->left = nullptr;
115 }
116 if (item->right)
117 {
118 free_node(item->right);
119 delete item->right;
120 item->right = nullptr;
121 }
122 }
123
124 // internal merge function
126 {
127 if (!h1) return h2;
128 if (!h2) return h1;
129 if (*(h1->item) < *(h2->item))
130 return merge1(h1, h2);
131 else
132 return merge1(h2, h1);
133 }
134 // internal merge function
136 {
137 if (!h1->left)
138 { // if h1 has no children, the merge is simple
139 h1->left = h2;
140 h2->parent = h1; // adjust the parent pointer
141 }
142 else
143 {
144 h1->right = merge(h1->right, h2);
145 if (h1->right) { h1->right->parent = h1; }
146
147 if ((h1->left->rank) < (h1->right->rank))
148 {
149 heap_node<t_element> * tmp = h1->left;
150 h1->left = h1->right;
151 h1->right = tmp;
152 }
153 h1->rank = h1->right->rank + 1;
154 }
155 return h1;
156 }
157
158 public:
161 : m_root(nullptr)
162 {}
163
165 bool empty() const { return (m_root == nullptr); }
166
168
171 heap_node<t_element> * find_min() const { return m_root; }
172
174
178 {
179 if (m_root == nullptr) return nullptr;
180 if (m_root->left == nullptr) return m_root->right;
181 if (m_root->right == nullptr) return m_root->left;
182
183 if (m_root->left->operator<(*m_root->right))
184 return m_root->left;
185 else
186 return m_root->right;
187 }
188
190
194 {
197 lh.m_root = n;
198 merge(&lh);
199 return n;
200 }
201
204 {
205 heap_node<t_element> * old_root = m_root;
206 m_root = merge(m_root->left, m_root->right);
207 if (m_root) m_root->parent = nullptr;
208 delete old_root;
209 }
210
211 // deletes an arbitrary element from the heap
212 // this function assumes, that item is an element of the heap
214 {
215 if (item != nullptr)
216 {
217 if (m_root == item)
218 { // deleting the root is trivial
219 delete_min();
220 }
221 else
222 {
223 // otherwise we have to adapt the parent node and
224 // the children of item
225 heap_node<t_element> * h1 = merge(item->left, item->right);
226 if (h1) h1->parent = item->parent;
227 if (item == item->parent->left) { item->parent->left = h1; }
228 else if (item == item->parent->right)
229 {
230 item->parent->right = h1;
231 }
232 // fix node information considering rank
233 fix_node(item->parent);
234 delete item; // remove the item from memory
235 }
236 }
237 }
238
239 // public merge function
241 {
242 m_root = merge(m_root, rhs->m_root);
243 rhs->m_root = nullptr;
244 }
245
246 // removes the whole data structure from memory
248 {
249 if (m_root != nullptr)
250 {
251 free_node(m_root);
252 delete m_root;
253 m_root = nullptr;
254 }
255 }
256 };
257
258 // forward declaration of node classes
259 struct ht_node;
260
261 // Master node as used in the first phase of the Hu-Tucker algorithm
262 struct m_node
263 {
264 // min sum of the two min elements of the hpq this node points to
266 int64_t i; // position of the left node in the working sequence
267 int64_t j; // position of the right node in the working sequence
268 // pointer to the corresponding heap element (used for deletion)
270 l_heap<ht_node> * myhpq; // pointer to the hpq
271
272 ht_node * lt; // pointer to the left- and rightmost leafs of the hpq
273 ht_node * rt; // need for merge operations
274
276 : qel(0)
277 , myhpq(0)
278 , lt(0)
279 , rt(0)
280 {}
281
282 bool operator<(const m_node other)
283 {
284 if (min_sum != other.min_sum) { return min_sum < other.min_sum; }
285 if (i != other.i) { return i < other.i; }
286 return j < other.j;
287 }
288
289 bool operator>(const m_node other) { return other < *this; }
290 };
291
292 // Hu-Tucker node as used in the first phase of the Hu-Tucker algorithm
293 struct ht_node
294 {
295 int64_t pos; // position of the node
296 uint64_t c; // the represented letter
297 size_type w; // frequency of the node
298 bool t; // whether the node is a leaf
299 int64_t level; // level in the tree
300
301 // pointer to the two master nodes
302 // (as a node can belong to up to two hpqs)
304 m_node * mpqr; // only mpql is used for inner nodes
305 // pointer to the two heap nodes (as a node can belong to up to two hpqs)
307 heap_node<ht_node> * qr; // only ql is used for inner nodes
308 ht_node * left; // left child
309 ht_node * right; // right child
310
312 : mpql(0)
313 , mpqr(0)
314 , ql(0)
315 , qr(0)
316 , left(nullptr)
317 , right(nullptr)
318 {}
319
320 bool operator<(const ht_node & other)
321 {
322 if (w != other.w) { return w < other.w; }
323 return pos < other.pos;
324 }
325
326 bool operator>(const ht_node & other) { return other < *this; }
327 };
328
329 template <class t_rac>
330 static void construct_tree(t_rac & C, std::vector<pc_node> & temp_nodes)
331 {
332 // create a leaf for every letter
333 std::vector<ht_node> node_vector;
334 for (size_t i = 0; i < C.size(); i++)
335 {
336 if (C[i])
337 {
338 ht_node n;
339 n.c = (uint64_t)i;
340 n.w = C[i];
341 n.t = true;
342 n.pos = node_vector.size();
343 node_vector.push_back(n);
344 }
345 }
346 if (node_vector.size() == 1)
347 {
348 // special case of an alphabet of size 1:
349 // just instantly create the tree and return it
350 temp_nodes.emplace_back(pc_node(node_vector[0].w, (size_type)node_vector[0].c));
351 return;
352 }
353 size_type sigma = node_vector.size();
354 std::vector<ht_node> T(sigma); // physical leaves
355 std::vector<ht_node *> A(sigma); // the current working sequence
356 // Priority Queues, containing the Huffman Sequences
357 std::vector<l_heap<ht_node>> HPQ(sigma);
358 l_heap<m_node> MPQ; // Master Priority Queue
359
360 // init T, A, HPQs and MPQ
361 T[0] = node_vector[0];
362 A[0] = &T[0];
363
364 // initialization needed for every leaf
365 for (size_type i = 1; i < sigma; i++)
366 {
367 T[i] = node_vector[i];
368 A[i] = &T[i];
369
370 T[i - 1].qr = HPQ[i - 1].insert(&T[i - 1]);
371 T[i].ql = HPQ[i - 1].insert(&T[i]);
372
373 m_node * m = new m_node();
374 m->min_sum = T[i - 1].w + T[i].w;
375 m->i = i - 1;
376 m->j = i;
377 m->lt = &T[i - 1];
378 m->rt = &T[i];
379 m->myhpq = &HPQ[i - 1];
380
381 m->qel = MPQ.insert(m);
382
383 T[i - 1].mpqr = m;
384 T[i].mpql = m;
385 }
386
387 // main action loop
388 for (size_type k = 1; k < sigma; k++)
389 {
390 m_node * m = MPQ.find_min()->item;
391 ht_node * l = A[m->i];
392 ht_node * r = A[m->j];
393 int64_t lpos = m->i;
394 int64_t rpos = m->j;
395
396 l_heap<ht_node> * n_hpq = nullptr;
397 ht_node * n_rt = nullptr;
398 ht_node * n_lt = nullptr;
399
400 // create a new master priority queue
401 m_node * n_m = new m_node();
402 // delete old nodes from all hpqs
403 if (l->t)
404 {
405 if (l->mpql) l->mpql->myhpq->delete_element(l->ql);
406 l->ql = nullptr;
407 if (l->mpqr) l->mpqr->myhpq->delete_element(l->qr);
408 l->qr = nullptr;
409 }
410 else
411 {
412 m->myhpq->delete_element(l->ql);
413 l->ql = nullptr;
414 }
415 if (r->t)
416 {
417 if (r->mpql) r->mpql->myhpq->delete_element(r->ql);
418 l->ql = nullptr;
419
420 if (r->mpqr) r->mpqr->myhpq->delete_element(r->qr);
421 r->qr = nullptr;
422 }
423 else
424 {
425 m->myhpq->delete_element(r->ql);
426 r->ql = nullptr;
427 }
428 // handle the merge of hpqs
429 if (l->t && r->t)
430 {
431 // both nodes are leaves
432 l_heap<ht_node> * h1 = nullptr;
433 l_heap<ht_node> * h2 = nullptr;
434 l_heap<ht_node> * h3 = nullptr;
435 if (l->mpql)
436 {
437 n_lt = l->mpql->lt;
438 if (n_lt == l) n_lt = nullptr;
439 if (n_lt) n_lt->mpqr = n_m;
440
441 h1 = l->mpql->myhpq;
442 h2 = l->mpqr->myhpq;
443
444 h1->merge(h2);
445 MPQ.delete_element(l->mpql->qel);
446 MPQ.delete_element(l->mpqr->qel);
447 delete l->mpql;
448 delete l->mpqr;
449 }
450 else
451 {
452 h1 = l->mpqr->myhpq;
453 h2 = l->mpqr->myhpq;
454 n_lt = nullptr;
455
456 MPQ.delete_element(l->mpqr->qel);
457 delete l->mpqr;
458 }
459 if (r->mpqr)
460 {
461 n_rt = r->mpqr->rt;
462 if (n_rt == r) n_rt = nullptr;
463 if (n_rt) n_rt->mpql = n_m;
464
465 h3 = r->mpqr->myhpq;
466 h1->merge(h3);
467 MPQ.delete_element(r->mpqr->qel);
468 delete r->mpqr;
469
470 n_hpq = h1;
471 if (n_rt) n_rt->mpql = n_m;
472 }
473 else
474 {
475 n_rt = nullptr;
476 n_hpq = h1;
477 }
478 }
479 else if (l->t)
480 { // the left node is a leaf
481 if (l->mpql)
482 {
483 n_lt = l->mpql->lt;
484 if (n_lt) n_lt->mpqr = n_m;
485 n_rt = l->mpqr->rt;
486 if (n_rt) n_rt->mpql = n_m;
487
488 l->mpql->myhpq->merge(l->mpqr->myhpq);
489 n_hpq = l->mpql->myhpq;
490 MPQ.delete_element(l->mpql->qel);
491 MPQ.delete_element(l->mpqr->qel);
492 delete l->mpql;
493 delete l->mpqr;
494 }
495 else
496 {
497 n_lt = nullptr;
498 n_rt = l->mpqr->rt;
499 if (n_rt) n_rt->mpql = n_m;
500
501 n_hpq = l->mpqr->myhpq;
502 MPQ.delete_element(l->mpqr->qel);
503 delete l->mpqr;
504 }
505 }
506 else if (r->t)
507 { // right node is a leaf
508 if (r->mpqr)
509 {
510 n_lt = r->mpql->lt;
511 if (n_lt) n_lt->mpqr = n_m;
512 n_rt = r->mpqr->rt;
513 if (n_rt) n_rt->mpql = n_m;
514
515 r->mpql->myhpq->merge(r->mpqr->myhpq);
516 n_hpq = r->mpql->myhpq;
517 MPQ.delete_element(r->mpql->qel);
518 MPQ.delete_element(r->mpqr->qel);
519 delete r->mpql;
520 delete r->mpqr;
521 }
522 else
523 {
524 n_lt = r->mpql->lt;
525 if (n_lt) n_lt->mpqr = n_m;
526 n_rt = nullptr;
527
528 n_hpq = r->mpql->myhpq;
529 MPQ.delete_element(r->mpql->qel);
530 delete r->mpql;
531 }
532 }
533 else
534 {
535 // merge of two inner nodes
536 // no need to merge hpqs
537 MPQ.delete_element(m->qel);
538
539 n_hpq = m->myhpq;
540 n_lt = m->lt;
541 n_rt = m->rt;
542
543 if (n_lt) n_lt->mpqr = n_m;
544 if (n_rt) n_rt->mpql = n_m;
545
546 delete m;
547 }
548
549 // create a new node with the information gained above
550 ht_node * new_node = new ht_node();
551 new_node->c = ' ';
552 new_node->w = l->w + r->w;
553 new_node->t = false;
554 new_node->pos = lpos;
555 new_node->left = l;
556 new_node->right = r;
557 // insert node to the correct hpq
558 new_node->ql = n_hpq->insert(new_node);
559
560 // update working sequence
561 A[lpos] = new_node;
562 A[rpos] = nullptr;
563 // update information in the new master node and reinsert it to mpq
564 ht_node * tmp_min = n_hpq->find_min()->item;
565 heap_node<ht_node> * tmpsnd = n_hpq->find_snd_min();
566 if (tmpsnd)
567 {
568 ht_node * tmp_snd = n_hpq->find_snd_min()->item;
569 n_m->min_sum = tmp_min->w + tmp_snd->w;
570
571 if (tmp_min->pos < tmp_snd->pos)
572 {
573 n_m->i = tmp_min->pos;
574 n_m->j = tmp_snd->pos;
575 }
576 else
577 {
578 n_m->i = tmp_snd->pos;
579 n_m->j = tmp_min->pos;
580 }
581 n_m->qel = MPQ.insert(n_m);
582 n_m->myhpq = n_hpq;
583 n_m->lt = n_lt;
584 n_m->rt = n_rt;
585 }
586 else
587 {
588 // free the last remaining hpq
589 n_hpq->free_memory();
590 delete n_m;
591 }
592 }
593
594 // level assignment and deletion of unneeded nodes
595 assign_level(A[0], 0);
596
597 // reconstruction phase using the stack algorithm
598 std::vector<ht_node *> stack(sigma, nullptr);
599
600 for (size_type i = 0; i < sigma; i++)
601 {
602 temp_nodes.emplace_back(pc_node(T[i].w, (size_type)T[i].c));
603 T[i].pos = i;
604 }
605
606 int64_t spointer = -1;
607 uint64_t qpointer = 0; // use the Array T as a stack
608 int64_t max_nodes = sigma;
609 while (qpointer < sigma or spointer >= 1LL)
610 {
611 if (spointer >= 1LL and (stack[spointer]->level == stack[spointer - 1]->level))
612 {
613 ht_node * n_node = new ht_node();
614 max_nodes++;
615 n_node->t = false;
616 n_node->left = stack[spointer - 1];
617 n_node->right = stack[spointer];
618 n_node->level = stack[spointer]->level - 1;
619 n_node->w = stack[spointer]->w + stack[spointer - 1]->w;
620 n_node->c = '|';
621
622 n_node->pos = temp_nodes.size();
623 temp_nodes[stack[spointer - 1]->pos].parent = temp_nodes.size();
624 temp_nodes[stack[spointer]->pos].parent = temp_nodes.size();
625 temp_nodes.emplace_back(pc_node(n_node->w,
626 0,
628 stack[spointer - 1]->pos,
629 stack[spointer]->pos));
630
631 if (!stack[spointer - 1]->t) delete stack[spointer - 1];
632 if (!stack[spointer]->t) delete stack[spointer];
633
634 stack[--spointer] = n_node;
635 }
636 else
637 {
638 stack[++spointer] = &T[qpointer++];
639 }
640 }
641 delete stack[0];
642 }
643
644 static void assign_level(ht_node * n, int64_t lvl)
645 {
646 if (n)
647 {
648 n->level = lvl;
649 assign_level(n->left, lvl + 1);
650 assign_level(n->right, lvl + 1);
651
652 if (!n->t) { delete n; }
653 }
654 }
655};
656
658{
659 template <class t_wt>
661};
662
663} // end namespace sdsl
664
665#endif // end file
void merge(l_heap< t_element > *rhs)
Definition: wt_hutu.hpp:240
heap_node< t_element > * insert(t_element *x)
Insert an element into the heap.
Definition: wt_hutu.hpp:193
void delete_element(heap_node< t_element > *item)
Definition: wt_hutu.hpp:213
l_heap()
Default constructor.
Definition: wt_hutu.hpp:160
bool empty() const
Indicates if the heap is empty.
Definition: wt_hutu.hpp:165
heap_node< t_element > * find_min() const
Get the smallest element.
Definition: wt_hutu.hpp:171
void delete_min()
Delete the smallest element in the heap.
Definition: wt_hutu.hpp:203
heap_node< t_element > * find_snd_min() const
Get the second smallest element.
Definition: wt_hutu.hpp:177
A prefix code-shaped wavelet.
Definition: wt_pc.hpp:44
Namespace for the succinct data structure library.
int_vector< 1 > bit_vector
bit_vector is a specialization of the int_vector.
Definition: int_vector.hpp:53
Node class used by the leftist heap.
Definition: wt_hutu.hpp:57
heap_node(t_element *it=nullptr)
Constructor.
Definition: wt_hutu.hpp:62
bool operator<(const heap_node &other)
Less then operator.
Definition: wt_hutu.hpp:70
heap_node< ht_node > * qr
Definition: wt_hutu.hpp:307
bool operator>(const ht_node &other)
Definition: wt_hutu.hpp:326
heap_node< ht_node > * ql
Definition: wt_hutu.hpp:306
bool operator<(const ht_node &other)
Definition: wt_hutu.hpp:320
bool operator<(const m_node other)
Definition: wt_hutu.hpp:282
l_heap< ht_node > * myhpq
Definition: wt_hutu.hpp:270
bool operator>(const m_node other)
Definition: wt_hutu.hpp:289
heap_node< m_node > * qel
Definition: wt_hutu.hpp:269
static void construct_tree(t_rac &C, std::vector< pc_node > &temp_nodes)
Definition: wt_hutu.hpp:330
t_wt::size_type size_type
Definition: wt_hutu.hpp:48
static void assign_level(ht_node *n, int64_t lvl)
Definition: wt_hutu.hpp:644
wt_pc.hpp contains a class for the wavelet tree of byte sequences.