MMTF-C++
The C++ language MMTF libraries
binary_encoder.hpp
Go to the documentation of this file.
1 // *************************************************************************
2 //
3 // Licensed under the MIT License (see accompanying LICENSE file).
4 //
5 // The author of this code is: Daniel Farrell
6 //
7 // Based on mmtf_python, adapted to c++ standards 2018
8 //
9 // *************************************************************************
10 
11 
12 #ifndef MMTF_BINARY_ENCODER_H
13 #define MMTF_BINARY_ENCODER_H
14 #include <math.h>
15 #include <vector>
16 #include <string>
17 #include <sstream>
18 
19 // byteorder functions
20 #ifdef WIN32
21 #include <winsock2.h>
22 #else
23 #include <arpa/inet.h>
24 #endif
25 
26 namespace mmtf {
27 
28 // *************************************************************************
29 // PRIVATE FUNCTIONS (only visible in this header)
30 // *************************************************************************
31 
32 namespace { // private helpers
33 
40 inline std::vector<int32_t> convertFloatsToInts(std::vector<float> const & vec_in,
41  int multiplier);
42 
48 inline std::vector<int32_t> deltaEncode(std::vector<int32_t> const & vec_in);
49 
50 
56 template<typename Int>
57 inline std::vector<int32_t> runLengthEncode(std::vector<Int> const & vec_in );
58 
66 inline std::vector<int32_t> recursiveIndexEncode(std::vector<int32_t> const & vec_in,
67  int max=32767, int min=-32768);
68 
76 inline void add_header(std::stringstream & ss, uint32_t array_size, uint32_t codec, uint32_t param=0);
77 
83 inline std::vector<char> stringstreamToCharVector(std::stringstream & ss);
84 
85 } // anon ns
86 
87 // *************************************************************************
88 // PUBLIC FUNCTIONS
89 // *************************************************************************
90 
95 inline std::vector<char> encodeInt8ToByte(std::vector<int8_t> vec_in);
96 
101 inline std::vector<char> encodeFourByteInt(std::vector<int32_t> const & vec_in);
102 
108 inline std::vector<char> encodeStringVector(std::vector<std::string> const & in_sv, int32_t const CHAIN_LEN);
109 
110 
115 inline std::vector<char> encodeRunLengthChar(std::vector<char> const & in_cv);
116 
117 
122 inline std::vector<char> encodeRunLengthDeltaInt(std::vector<int32_t> int_vec);
123 
129 inline std::vector<char> encodeRunLengthFloat(std::vector<float> const & floats_in, int32_t const multiplier);
130 
136 inline std::vector<char> encodeDeltaRecursiveFloat(std::vector<float> const & floats_in, int32_t const multiplier);
137 
142 inline std::vector<char> encodeRunLengthInt8(std::vector<int8_t> const & int8_vec);
143 
144 // *************************************************************************
145 // IMPLEMENTATION
146 // *************************************************************************
147 
148 namespace { // private helpers
149 
150 inline std::vector<int32_t> convertFloatsToInts(std::vector<float> const & vec_in,
151  int const multiplier) {
152  std::vector<int32_t> vec_out;
153  for (size_t i=0; i<vec_in.size(); ++i) {
154  vec_out.push_back(static_cast<int32_t>(round(vec_in[i]*multiplier)));
155  }
156  return vec_out;
157 }
158 
159 
160 inline std::vector<int32_t> deltaEncode(std::vector<int32_t> const & vec_in) {
161  std::vector<int32_t> vec_out;
162  if (vec_in.size() == 0) return vec_out;
163  vec_out.push_back(vec_in[0]);
164  for (int32_t i=1; i< (int)vec_in.size(); ++i) {
165  vec_out.push_back(vec_in[i]-vec_in[i-1]);
166  }
167  return vec_out;
168 }
169 
170 
171 template<typename Int>
172 inline std::vector<int32_t> runLengthEncode(std::vector<Int> const & vec_in ) {
173  std::vector<int32_t> ret;
174  if (vec_in.size()==0) return ret;
175  Int curr = vec_in[0];
176  ret.push_back((int32_t)curr);
177  int32_t counter = 1;
178  for (std::size_t i = 1; i < vec_in.size(); ++i) {
179  if ( vec_in[i] == curr ) {
180  ++counter;
181  } else {
182  ret.push_back(counter);
183  ret.push_back((int32_t)vec_in[i]);
184  curr = vec_in[i];
185  counter = 1;
186  }
187  }
188  ret.push_back(counter);
189  return ret;
190 }
191 
192 
193 inline std::vector<int32_t> recursiveIndexEncode(
194  std::vector<int32_t> const & vec_in,
195  int max /* =32767 */, int min /*=-32768 */) {
196  std::vector<int32_t> vec_out;
197  for (int32_t i=0; i< (int)vec_in.size(); ++i) {
198  int32_t x = vec_in[i];
199  if ( x >= 0 ) {
200  while (x >= max) {
201  vec_out.push_back(max);
202  x -= max;
203  }
204  } else {
205  while (x <= min) {
206  vec_out.push_back(min);
207  x += std::abs(min);
208  }
209  }
210  vec_out.push_back(x);
211  }
212  return vec_out;
213 }
214 
215 
216 inline void add_header(std::stringstream & ss, uint32_t array_size, uint32_t codec, uint32_t param /* =0 */) {
217  uint32_t be_codec = htonl(codec);
218  uint32_t be_array_size = htonl(array_size);
219  uint32_t be_param = htonl(param);
220  ss.write(reinterpret_cast< char * >(&be_codec), sizeof(be_codec));
221  ss.write(reinterpret_cast< char * >(&be_array_size), sizeof(be_array_size));
222  ss.write(reinterpret_cast< char * >(&be_param), sizeof(be_param));
223 }
224 
225 
226 inline std::vector<char> stringstreamToCharVector(std::stringstream & ss) {
227  std::string s = ss.str();
228  std::vector<char> ret(s.begin(), s.end());
229  return ret;
230 }
231 
232 } // anon ns
233 
234 
235 inline std::vector<char> encodeInt8ToByte(std::vector<int8_t> vec_in) {
236  std::stringstream ss;
237  add_header(ss, vec_in.size(), 2, 0);
238  for (size_t i=0; i<vec_in.size(); ++i) {
239  ss.write(reinterpret_cast< char * >(&vec_in[i]), sizeof(vec_in[i]));
240  }
241  return stringstreamToCharVector(ss);
242 }
243 
244 
245 inline std::vector<char> encodeFourByteInt(std::vector<int32_t> const & vec_in) {
246  std::stringstream ss;
247  add_header(ss, vec_in.size(), 4, 0);
248  for (size_t i=0; i<vec_in.size(); ++i) {
249  int32_t be_x = htonl(vec_in[i]);
250  ss.write(reinterpret_cast< char * >(&be_x), sizeof(be_x));
251  }
252  return stringstreamToCharVector(ss);
253 }
254 
255 
256 inline std::vector<char> encodeStringVector(std::vector<std::string> const & in_sv, int32_t const CHAIN_LEN) {
257  char NULL_BYTE = 0x00;
258  std::stringstream ss;
259  add_header(ss, in_sv.size(), 5, CHAIN_LEN);
260  std::vector<char> char_vec;
261  for (size_t i=0; i<in_sv.size(); ++i) {
262  char_vec.insert(char_vec.end(), in_sv[i].begin(), in_sv[i].end());
263  for (size_t j=0; j<CHAIN_LEN-in_sv[i].size(); ++j) {
264  char_vec.push_back(NULL_BYTE);
265  }
266  }
267  for (size_t i=0; i<char_vec.size(); ++i) {
268  ss.write(reinterpret_cast< char * >(&char_vec[i]), sizeof(char_vec[i]));
269  }
270  return stringstreamToCharVector(ss);
271 }
272 
273 
274 inline std::vector<char> encodeRunLengthChar(std::vector<char> const & in_cv) {
275  std::stringstream ss;
276  add_header(ss, in_cv.size(), 6, 0);
277  std::vector<int32_t> int_vec = runLengthEncode(in_cv);
278  for (size_t i=0; i<int_vec.size(); ++i) {
279  int32_t temp = htonl(int_vec[i]);
280  ss.write(reinterpret_cast< char * >(&temp), sizeof(temp));
281  }
282  return stringstreamToCharVector(ss);
283 }
284 
285 
286 inline std::vector<char> encodeRunLengthDeltaInt(std::vector<int32_t> int_vec) {
287  std::stringstream ss;
288  add_header(ss, int_vec.size(), 8, 0);
289  int_vec = deltaEncode(int_vec);
290  int_vec = runLengthEncode(int_vec);
291  for (size_t i=0; i<int_vec.size(); ++i) {
292  int32_t temp = htonl(int_vec[i]);
293  ss.write(reinterpret_cast< char * >(&temp), sizeof(temp));
294  }
295  return stringstreamToCharVector(ss);
296 }
297 
298 inline std::vector<char> encodeRunLengthFloat(std::vector<float> const & floats_in, int32_t const multiplier) {
299  std::stringstream ss;
300  add_header(ss, floats_in.size(), 9, multiplier);
301  std::vector<int32_t> int_vec = convertFloatsToInts(floats_in, multiplier);
302  int_vec = runLengthEncode(int_vec);
303  for (size_t i=0; i<int_vec.size(); ++i) {
304  int32_t temp = htonl(int_vec[i]);
305  ss.write(reinterpret_cast< char * >(&temp), sizeof(temp));
306  }
307  return stringstreamToCharVector(ss);
308 }
309 
310 
311 inline std::vector<char> encodeDeltaRecursiveFloat(std::vector<float> const & floats_in, int32_t const multiplier) {
312  std::stringstream ss;
313  add_header(ss, floats_in.size(), 10, multiplier);
314  std::vector<int32_t> int_vec = convertFloatsToInts(floats_in, multiplier);
315  int_vec = deltaEncode(int_vec);
316  int_vec = recursiveIndexEncode(int_vec);
317  for (size_t i=0; i<int_vec.size(); ++i) {
318  int16_t temp = htons(int_vec[i]);
319  ss.write(reinterpret_cast< char * >(&temp), sizeof(temp));
320  }
321  return stringstreamToCharVector(ss);
322 }
323 
324 
325 inline std::vector<char> encodeRunLengthInt8(std::vector<int8_t> const & int8_vec) {
326  std::stringstream ss;
327  add_header(ss, int8_vec.size(), 16, 0);
328  std::vector<int32_t> const int_vec = runLengthEncode(int8_vec);
329  for (size_t i=0; i<int_vec.size(); ++i) {
330  int32_t temp = htonl(int_vec[i]);
331  ss.write(reinterpret_cast< char * >(&temp), sizeof(temp));
332  }
333  return stringstreamToCharVector(ss);
334 }
335 
336 } // mmtf namespace
337 #endif
std::vector< char > encodeInt8ToByte(std::vector< int8_t > vec_in)
Definition: binary_encoder.hpp:235
std::vector< char > encodeRunLengthInt8(std::vector< int8_t > const &int8_vec)
Definition: binary_encoder.hpp:325
Definition: binary_decoder.hpp:25
std::vector< char > encodeDeltaRecursiveFloat(std::vector< float > const &floats_in, int32_t const multiplier)
Definition: binary_encoder.hpp:311
std::vector< char > encodeRunLengthFloat(std::vector< float > const &floats_in, int32_t const multiplier)
Definition: binary_encoder.hpp:298
std::vector< char > encodeRunLengthChar(std::vector< char > const &in_cv)
Definition: binary_encoder.hpp:274
std::vector< char > encodeRunLengthDeltaInt(std::vector< int32_t > int_vec)
Definition: binary_encoder.hpp:286
std::vector< char > encodeFourByteInt(std::vector< int32_t > const &vec_in)
Definition: binary_encoder.hpp:245
std::vector< char > encodeStringVector(std::vector< std::string > const &in_sv, int32_t const CHAIN_LEN)
Definition: binary_encoder.hpp:256