MMTF-C++
The C++ language MMTF libraries
binary_decoder.hpp
Go to the documentation of this file.
1// *************************************************************************
2//
3// Licensed under the MIT License (see accompanying LICENSE file).
4//
5// The authors of this code are: Gerardo Tauriello
6//
7// Based on mmtf_c developed by Julien Ferte (http://www.julienferte.com/),
8// Anthony Bradley, Thomas Holder with contributions from Yana Valasatava,
9// Gazal Kalyan, Alexander Rose.
10//
11// *************************************************************************
12
13#ifndef MMTF_BINARY_DECODER_H
14#define MMTF_BINARY_DECODER_H
15
16#include "structure_data.hpp"
17
18#include <msgpack.hpp>
19#include <cstring> // low level mem
20#include <sstream>
21#include <limits>
22#include <algorithm>
23
24namespace mmtf {
25
30public:
38 BinaryDecoder(const msgpack::object& obj,
39 const std::string& key = "UNNAMED_BINARY");
40
56 template<typename T>
57 void decode(T& target);
58
59private:
60 // for error reporting
61 std::string key_;
62 // data from binary header
63 int32_t strategy_;
64 int32_t length_;
65 int32_t parameter_;
66 const char* encodedData_;
67 uint32_t encodedDataLength_; // max. size for binary is 2^32 - 1
68
69 // check length consistency (throws)
70 void checkLength_(int32_t exp_length);
71 // check if binary data is divisible by x (throws)
72 void checkDivisibleBy_(int32_t item_size);
73
74 // byte decoders
75 void decodeFromBytes_(std::vector<float>& output);
76 void decodeFromBytes_(std::vector<int8_t>& output);
77 void decodeFromBytes_(std::vector<int16_t>& output);
78 void decodeFromBytes_(std::vector<int32_t>& output);
79 // special one: decode to vector of strings
80 void decodeFromBytes_(std::vector<std::string>& output);
81
82 // run length decoding
83 // -> Int and IntOut can be any integer types
84 // -> Int values are blindly converted to IntOut
85 template<typename Int, typename IntOut>
86 void runLengthDecode_(const std::vector<Int>& input,
87 std::vector<IntOut>& output);
88
89 // delta decoding -> Int can be any integer type
90 template<typename Int>
91 void deltaDecode_(const std::vector<Int>& input, std::vector<Int>& output);
92 // variant doing it in-place
93 template<typename Int>
94 void deltaDecode_(std::vector<Int>& in_out);
95
96 // recursive indexing decode -> SmallInt must be smaller than Int
97 template<typename SmallInt, typename Int>
98 void recursiveIndexDecode_(const std::vector<SmallInt>& input,
99 std::vector<Int>& output);
100
101 // decode integer to float -> Int can be any integer type
102 template<typename Int>
103 void decodeDivide_(const std::vector<Int>& input, float divisor,
104 std::vector<float>& output);
105};
106
107// *************************************************************************
108// IMPLEMENTATION
109// *************************************************************************
110
111// helpers in anonymous namespace (only visible in this file)
112namespace {
113
114// byteorder functions ("ntohl" etc.)
115#ifdef WIN32
116#include <winsock2.h>
117#else
118#include <arpa/inet.h>
119#endif
120
121#ifndef __EMSCRIPTEN__
122void assignBigendian4(void* dst, const char* src) {
123 *((uint32_t*)dst) = ntohl(*((uint32_t*)src));
124}
125
126void assignBigendian2(void* dst, const char* src) {
127 *((uint16_t*)dst) = ntohs(*((uint16_t*)src));
128}
129#else
130// Need to avoid how emscripten handles memory
131// Note that this will only work on little endian machines, but this should not be a major
132// an issue as Emscripten only supports little endian hardware.
133// see: https://kripken.github.io/emscripten-site/docs/porting/guidelines/portability_guidelines.html
134
135void assignBigendian4(void* dst, const char* src) {
136 ((uint8_t*)dst)[0] = src[3];
137 ((uint8_t*)dst)[1] = src[2];
138 ((uint8_t*)dst)[2] = src[1];
139 ((uint8_t*)dst)[3] = src[0];
140}
141
142void assignBigendian2(void* dst, const char* src) {
143 ((uint8_t*)dst)[0] = src[1];
144 ((uint8_t*)dst)[1] = src[0];
145}
146#endif
147
148void arrayCopyBigendian4(void* dst, const char* src, size_t n) {
149 for (size_t i = 0; i < n; i += 4) {
150 assignBigendian4(((char*)dst) + i, src + i);
151 }
152}
153
154void arrayCopyBigendian2(void* dst, const char* src, size_t n) {
155 for (size_t i = 0; i < n; i += 2) {
156 assignBigendian2(((char*)dst) + i, src + i);
157 }
158}
159
160} // anon ns
161
162inline BinaryDecoder::BinaryDecoder(const msgpack::object& obj,
163 const std::string& key)
164 : key_(key) {
165 // sanity checks
166 if (obj.type != msgpack::type::BIN) {
167 throw DecodeError("The '" + key + "' entry is not binary data");
168 }
169 if (obj.via.bin.size < 12) {
170 std::stringstream err;
171 err << "The '" + key + "' entry is too short " << obj.via.bin.size;
172 throw DecodeError(err.str());
173 }
174 // get data (encoded data is only pointed to and not parsed here)
175 const char* bytes = obj.via.bin.ptr;
176
177 assignBigendian4(&strategy_, bytes);
178 assignBigendian4(&length_, bytes + 4);
179 assignBigendian4(&parameter_, bytes + 8);
180 encodedData_ = bytes + 12;
181 encodedDataLength_ = obj.via.bin.size - 12;
182}
183
184template<typename T>
185void BinaryDecoder::decode(T& target) {
186 throw mmtf::DecodeError("Invalid target type for binary '" + key_ + "'");
187}
188
189template<>
190inline void BinaryDecoder::decode(std::vector<float>& output) {
191
192 // check strategy to parse
193 switch (strategy_) {
194 case 1: {
195 decodeFromBytes_(output);
196 break;
197 }
198 case 9: {
199 std::vector<int32_t> step1;
200 std::vector<int32_t> step2;
201 decodeFromBytes_(step1);
202 runLengthDecode_(step1, step2);
203 decodeDivide_(step2, static_cast<float>(parameter_), output);
204 break;
205 }
206 case 10: {
207 std::vector<int16_t> step1;
208 std::vector<int32_t> step2;
209 decodeFromBytes_(step1);
210 recursiveIndexDecode_(step1, step2);
211 deltaDecode_(step2);
212 decodeDivide_(step2, static_cast<float>(parameter_), output);
213 break;
214 }
215 case 11: {
216 std::vector<int16_t> step1;
217 decodeFromBytes_(step1);
218 decodeDivide_(step1, static_cast<float>(parameter_), output);
219 break;
220 }
221 case 12: {
222 std::vector<int16_t> step1;
223 std::vector<int32_t> step2;
224 decodeFromBytes_(step1);
225 recursiveIndexDecode_(step1, step2);
226 decodeDivide_(step2, static_cast<float>(parameter_), output);
227 break;
228 }
229 case 13: {
230 std::vector<int8_t> step1;
231 std::vector<int32_t> step2;
232 decodeFromBytes_(step1);
233 recursiveIndexDecode_(step1, step2);
234 decodeDivide_(step2, static_cast<float>(parameter_), output);
235 break;
236 }
237 default: {
238 std::stringstream err;
239 err << "Invalid strategy " << strategy_ << " for binary '" + key_
240 << "': does not decode to float array";
241 throw DecodeError(err.str());
242 }
243 }
244
245 // check size
246 checkLength_(output.size());
247}
248
249template<>
250inline void BinaryDecoder::decode(std::vector<int8_t>& output) {
251
252 // check strategy to parse
253 switch (strategy_) {
254 case 2: {
255 decodeFromBytes_(output);
256 break;
257 }
258 default: {
259 std::stringstream err;
260 err << "Invalid strategy " << strategy_ << " for binary '" + key_
261 << "': does not decode to int8 array";
262 throw DecodeError(err.str());
263 }
264 }
265
266 // check size
267 checkLength_(output.size());
268}
269
270template<>
271inline void BinaryDecoder::decode(std::vector<int16_t>& output) {
272
273 // check strategy to parse
274 switch (strategy_) {
275 case 3: {
276 decodeFromBytes_(output);
277 break;
278 }
279 default: {
280 std::stringstream err;
281 err << "Invalid strategy " << strategy_ << " for binary '" + key_
282 << "': does not decode to int16 array";
283 throw DecodeError(err.str());
284 }
285 }
286
287 // check size
288 checkLength_(output.size());
289}
290
291template<>
292inline void BinaryDecoder::decode(std::vector<int32_t>& output) {
293
294 // check strategy to parse
295 switch (strategy_) {
296 case 4: {
297 decodeFromBytes_(output);
298 break;
299 }
300 case 7: {
301 std::vector<int32_t> step1;
302 decodeFromBytes_(step1);
303 runLengthDecode_(step1, output);
304 break;
305 }
306 case 8: {
307 std::vector<int32_t> step1;
308 decodeFromBytes_(step1);
309 runLengthDecode_(step1, output);
310 deltaDecode_(output);
311 break;
312 }
313 case 14: {
314 std::vector<int16_t> step1;
315 decodeFromBytes_(step1);
316 recursiveIndexDecode_(step1, output);
317 break;
318 }
319 case 15: {
320 std::vector<int8_t> step1;
321 decodeFromBytes_(step1);
322 recursiveIndexDecode_(step1, output);
323 break;
324 }
325 default: {
326 std::stringstream err;
327 err << "Invalid strategy " << strategy_ << " for binary '" + key_
328 << "': does not decode to int32 array";
329 throw DecodeError(err.str());
330 }
331 }
332
333 // check size
334 checkLength_(output.size());
335}
336
337template<>
338inline void BinaryDecoder::decode(std::vector<std::string>& output) {
339
340 // check strategy to parse
341 switch (strategy_) {
342 case 5: {
343 decodeFromBytes_(output);
344 break;
345 }
346 default: {
347 std::stringstream err;
348 err << "Invalid strategy " << strategy_ << " for binary '" + key_
349 << "': does not decode to string array";
350 throw DecodeError(err.str());
351 }
352 }
353
354 // check size
355 checkLength_(output.size());
356}
357
358template<>
359inline void BinaryDecoder::decode(std::vector<char>& output) {
360
361 // check strategy to parse
362 switch (strategy_) {
363 case 6: {
364 std::vector<int32_t> step1;
365 decodeFromBytes_(step1);
366 runLengthDecode_(step1, output);
367 break;
368 }
369 default: {
370 std::stringstream err;
371 err << "Invalid strategy " << strategy_ << " for binary '" + key_
372 << "': does not decode to string array";
373 throw DecodeError(err.str());
374 }
375 }
376
377 // check size
378 checkLength_(output.size());
379}
380
381// checks
382inline void BinaryDecoder::checkLength_(int32_t exp_length) {
383 if (length_ != exp_length) {
384 std::stringstream err;
385 err << "Length mismatch for binary '" + key_ + "': "
386 << length_ << " vs " << exp_length;
387 throw DecodeError(err.str());
388 }
389}
390
391inline void BinaryDecoder::checkDivisibleBy_(int32_t item_size) {
392 if (encodedDataLength_ % item_size != 0) {
393 std::stringstream err;
394 err << "Binary length of '" + key_ + "': "
395 << encodedDataLength_ << " is not a multiple of " << item_size;
396 throw DecodeError(err.str());
397 }
398}
399
400// byte decoders
401inline void BinaryDecoder::decodeFromBytes_(std::vector<float>& output) {
402 checkDivisibleBy_(4);
403 // prepare memory
404 output.resize(encodedDataLength_ / 4);
405 // get data
406 if(!output.empty()) {
407 arrayCopyBigendian4(&output[0], encodedData_, encodedDataLength_);
408 }
409}
410inline void BinaryDecoder::decodeFromBytes_(std::vector<int8_t>& output) {
411 // prepare memory
412 output.resize(encodedDataLength_);
413 // get data
414 if (!output.empty()) {
415 memcpy(&output[0], encodedData_, encodedDataLength_);
416 }
417}
418inline void BinaryDecoder::decodeFromBytes_(std::vector<int16_t>& output) {
419 checkDivisibleBy_(2);
420 // prepare memory
421 output.resize(encodedDataLength_ / 2);
422 // get data
423 if (!output.empty()) {
424 arrayCopyBigendian2(&output[0], encodedData_, encodedDataLength_);
425 }
426}
427inline void BinaryDecoder::decodeFromBytes_(std::vector<int32_t>& output) {
428 checkDivisibleBy_(4);
429 // prepare memory
430 output.resize(encodedDataLength_ / 4);
431 // get data
432 if (!output.empty()) {
433 arrayCopyBigendian4(&output[0], encodedData_, encodedDataLength_);
434 }
435}
436// special one: decode to vector of strings
437inline void BinaryDecoder::decodeFromBytes_(std::vector<std::string>& output) {
438 char NULL_BYTE = 0x00;
439 // check parameter
440 const int32_t str_len = parameter_;
441 checkDivisibleBy_(str_len);
442 // prepare memory
443 output.resize(encodedDataLength_ / str_len);
444 // get data
445 for (size_t i = 0; i < output.size(); ++i) {
446 output[i].assign(encodedData_ + i * str_len, str_len);
447 output[i].erase(std::remove(output[i].begin(), output[i].end(), NULL_BYTE), output[i].end());
448 }
449}
450
451// run length decoding
452template<typename Int, typename IntOut>
453void BinaryDecoder::runLengthDecode_(const std::vector<Int>& input,
454 std::vector<IntOut>& output) {
455 // we work with pairs of numbers
456 checkDivisibleBy_(2);
457 // find out size of resulting vector (for speed)
458 size_t out_size = 0;
459 for (size_t i = 0; i < input.size(); i += 2) {
460 out_size += input[i + 1];
461 }
462 // reserve space (for speed)
463 output.clear();
464 output.reserve(out_size);
465 // get data
466 for (size_t i = 0; i < input.size(); i += 2) {
467 const IntOut value = IntOut(input[i]);
468 const Int number = input[i+1];
469 for (Int j = 0; j < number; ++j) {
470 output.push_back(value);
471 }
472 }
473}
474
475// delta decoding
476template<typename Int>
477void BinaryDecoder::deltaDecode_(const std::vector<Int>& input,
478 std::vector<Int>& output) {
479 // reserve space (for speed)
480 output.clear();
481 if (input.empty()) return; // ensure we have some values
482 output.reserve(input.size());
483 // get data
484 output.push_back(input[0]);
485 for (size_t i = 1; i < input.size(); ++i) {
486 output.push_back(output[i - 1] + input[i]);
487 }
488}
489template<typename Int>
490void BinaryDecoder::deltaDecode_(std::vector<Int>& in_out) {
491 for (size_t i = 1; i < in_out.size(); ++i) {
492 in_out[i] = in_out[i - 1] + in_out[i];
493 }
494}
495
496// recursive indexing decode
497template<typename SmallInt, typename Int>
498void BinaryDecoder::recursiveIndexDecode_(const std::vector<SmallInt>& input,
499 std::vector<Int>& output) {
500 // get limits
501 const SmallInt min_int = std::numeric_limits<SmallInt>::min();
502 const SmallInt max_int = std::numeric_limits<SmallInt>::max();
503 // find out size of resulting vector (for speed)
504 size_t out_size = 0;
505 for (size_t i = 0; i < input.size(); ++i) {
506 if (input[i] != min_int && input[i] != max_int) ++out_size;
507 }
508 // reserve space (for speed)
509 output.clear();
510 output.reserve(out_size);
511 // get data
512 Int cur_val = 0;
513 for (size_t i = 0; i < input.size(); ++i) {
514 cur_val += input[i];
515 if (input[i] != min_int && input[i] != max_int) {
516 output.push_back(cur_val);
517 cur_val = 0;
518 }
519 }
520}
521
522// decode integer to float
523template<typename Int>
524void BinaryDecoder::decodeDivide_(const std::vector<Int>& input, float divisor,
525 std::vector<float>& output) {
526 // reserve space and get inverted divisor (for speed)
527 output.clear();
528 output.reserve(input.size());
529 float inv_div = float(1) / divisor;
530 // get data
531 for (size_t i = 0; i < input.size(); ++i) {
532 output.push_back(float(input[i]) * inv_div);
533 }
534}
535
536} // mmtf namespace
537
538#endif
Helper class to decode msgpack binary into a vector.
Definition: binary_decoder.hpp:29
void decode(T &target)
Decode binary msgpack object into the given target.
Definition: binary_decoder.hpp:185
BinaryDecoder(const msgpack::object &obj, const std::string &key="UNNAMED_BINARY")
Initialize object given a msgpack object. Reads out binary header to prepare for call of decode.
Definition: binary_decoder.hpp:162
Exception thrown when failing during decoding.
Definition: errors.hpp:23
Definition: binary_decoder.hpp:24