MMTF-C++
The C++ language MMTF libraries
structure_data.hpp
Go to the documentation of this file.
1 // *************************************************************************
2 //
3 // Licensed under the MIT License (see accompanying LICENSE file).
4 //
5 // The authors of this code are: Gabriel Studer, Gerardo Tauriello,
6 // Daniel Farrell.
7 //
8 // Based on mmtf_c developed by Julien Ferte (http://www.julienferte.com/),
9 // Anthony Bradley, Thomas Holder with contributions from Yana Valasatava,
10 // Gazal Kalyan, Alexander Rose.
11 //
12 // *************************************************************************
13 
14 #ifndef MMTF_STRUCTURE_DATA_H
15 #define MMTF_STRUCTURE_DATA_H
16 
17 #include "errors.hpp"
18 
19 #include <string>
20 #include <vector>
21 #include <algorithm>
22 #include <stdint.h>
23 #include <sstream>
24 #include <limits>
25 #include <msgpack.hpp>
26 #include <iostream>
27 #include <iomanip>
28 
29 namespace mmtf {
30 
34 #define MMTF_SPEC_VERSION_MAJOR 1
35 #define MMTF_SPEC_VERSION_MINOR 1
36 
40 inline std::string getVersionString();
41 
46 inline bool isVersionSupported(const std::string& version_string);
47 
53 struct GroupType { // NOTE: can't use MSGPACK_DEFINE_MAP due to char
54  std::vector<int32_t> formalChargeList;
55  std::vector<std::string> atomNameList;
56  std::vector<std::string> elementList;
57  std::vector<int32_t> bondAtomList;
58  std::vector<int8_t> bondOrderList;
59  std::vector<int8_t> bondResonanceList;
60  std::string groupName;
62  std::string chemCompType;
63 
64  bool operator==(GroupType const & c) const {
65  return(
68  elementList == c.elementList &&
72  groupName == c.groupName &&
75  }
76 };
77 
83 struct Entity {
84  std::vector<int32_t> chainIndexList;
85  std::string description;
86  std::string type;
87  std::string sequence;
88 
89  bool operator==(Entity const & c) const {
90  return(
92  description == c.description &&
93  type == c.type &&
94  sequence == c.sequence);
95  }
96 
100  type,
101  sequence);
102 };
103 
109 struct Transform {
110  std::vector<int32_t> chainIndexList;
111  float matrix[16];
112 
113  bool operator==(Transform const & c) const {
114  bool comp = true;
115  for(size_t i = 16; i--;) {
116  if ( matrix[i] != c.matrix[i] ) {
117  comp = false;
118  break;
119  }
120  }
121  return (chainIndexList == c.chainIndexList && comp);
122  }
123 
125 };
126 
132 struct BioAssembly {
133  std::vector<Transform> transformList;
134  std::string name;
135 
136  bool operator==(BioAssembly const & c) const {
137  return (
139  name == c.name);
140  }
141 
143 };
144 
158  std::string mmtfVersion;
159  std::string mmtfProducer;
160  std::vector<float> unitCell;
161  std::string spaceGroup;
162  std::string structureId;
163  std::string title;
164  std::string depositionDate;
165  std::string releaseDate;
166  std::vector<std::vector<float> > ncsOperatorList;
167  std::vector<BioAssembly> bioAssemblyList;
168  std::vector<Entity> entityList;
169  std::vector<std::string> experimentalMethods;
170  float resolution;
171  float rFree;
172  float rWork;
173  int32_t numBonds;
174  int32_t numAtoms;
175  int32_t numGroups;
176  int32_t numChains;
177  int32_t numModels;
178  std::vector<GroupType> groupList;
179  std::vector<int32_t> bondAtomList;
180  std::vector<int8_t> bondOrderList;
181  std::vector<int8_t> bondResonanceList;
182  std::vector<float> xCoordList;
183  std::vector<float> yCoordList;
184  std::vector<float> zCoordList;
185  std::vector<float> bFactorList;
186  std::vector<int32_t> atomIdList;
187  std::vector<char> altLocList;
188  std::vector<float> occupancyList;
189  std::vector<int32_t> groupIdList;
190  std::vector<int32_t> groupTypeList;
191  std::vector<int8_t> secStructList;
192  std::vector<char> insCodeList;
193  std::vector<int32_t> sequenceIndexList;
194  std::vector<std::string> chainIdList;
195  std::vector<std::string> chainNameList;
196  std::vector<int32_t> groupsPerChain;
197  std::vector<int32_t> chainsPerModel;
198  mutable msgpack::zone msgpack_zone;
199  std::map<std::string, msgpack::object> bondProperties;
200  std::map<std::string, msgpack::object> atomProperties;
201  std::map<std::string, msgpack::object> groupProperties;
202  std::map<std::string, msgpack::object> chainProperties;
203  std::map<std::string, msgpack::object> modelProperties;
204  std::map<std::string, msgpack::object> extraProperties;
205 
209  StructureData();
210 
214  StructureData(const StructureData& obj);
215 
220 
228  bool hasConsistentData(bool verbose=false, uint32_t chain_name_max_length = 4) const;
229 
237  std::string print(std::string delim="\t") const;
238 
243  bool operator==(const StructureData& c) const;
244 
249  bool operator!=(const StructureData& c) const {
250  return !(*this == c);
251  }
252 
253 private:
254  // Helper to copy map data
255  void copyMapData_(std::map<std::string, msgpack::object>& target,
256  const std::map<std::string, msgpack::object>& source);
257  // Helper to copy all data
258  void copyAllData_(const StructureData& obj);
259 };
260 
261 
265 template <typename T>
266 inline T getDefaultValue();
267 
268 
273 template <typename T>
274 inline bool isDefaultValue(const T& value);
275 template <typename T>
276 inline bool isDefaultValue(const std::vector<T>& value);
277 template <>
278 inline bool isDefaultValue(const std::string& value);
279 template <>
280 inline bool isDefaultValue(const std::map<std::string, msgpack::object>& value);
281 
282 
287 template <typename T>
288 inline void setDefaultValue(T& value);
289 
290 
298 inline bool is_polymer(const unsigned int chain_index,
299  const std::vector<Entity>& entity_list);
300 
314 inline bool is_hetatm(const char* type);
315 
334 inline bool is_hetatm(const unsigned int chain_index,
335  const std::vector<Entity>& entity_list,
336  const GroupType& group_type);
337 
338 
339 // *************************************************************************
340 // IMPLEMENTATION
341 // *************************************************************************
342 
343 // helpers in anonymous namespace (only visible in this file)
344 namespace {
345 
346 // check optional date string
347 // -> either default or "YYYY-MM-DD" (only string format checked, not date)
348 bool isValidDateFormatOptional(const std::string& s) {
349  // default?
350  if (isDefaultValue(s)) return true;
351  // check length
352  if (s.length() != 10) return false;
353  // check delimiters
354  if (s[4] != '-' || s[7] != '-') return false;
355  // check format
356  std::istringstream is(s);
357  int d, m, y;
358  char dash1, dash2;
359  if (is >> y >> dash1 >> m >> dash2 >> d) {
360  return (dash1 == '-' && dash2 == '-');
361  } else {
362  return false;
363  }
364 }
365 
366 // check if optional vector has right size
367 template<typename T>
368 bool hasRightSizeOptional(const std::vector<T>& v, int exp_size) {
369  return (isDefaultValue(v) || (int)v.size() == exp_size);
370 }
371 
372 // check if all indices in vector are in [0, num-1] (T = integer type)
373 template<typename T, typename Tnum>
374 bool hasValidIndices(const T* v, size_t size, Tnum num) {
375  T tnum = T(num);
376  for (size_t i = 0; i < size; ++i) {
377  if (v[i] < T(0) || v[i] >= tnum) return false;
378  }
379  return true;
380 }
381 template<typename T, typename Tnum>
382 bool hasValidIndices(const std::vector<T>& v, Tnum num) {
383  if (v.empty()) return true;
384  else return hasValidIndices(&v[0], v.size(), num);
385 }
386 
387 } // anon ns
388 
389 // VERSIONING
390 
391 inline std::string getVersionString() {
392  std::stringstream version;
394  return version.str();
395 }
396 
397 inline bool isVersionSupported(const std::string& version_string) {
398  std::stringstream ss(version_string);
399  int major = -1;
400  return ((ss >> major) && (major <= MMTF_SPEC_VERSION_MAJOR));
401 }
402 
403 // DEFAULT VALUES
404 
405 template <typename T>
406 inline T getDefaultValue() { return std::numeric_limits<T>::max(); }
407 
408 template <typename T>
409 inline bool isDefaultValue(const T& value) {
410  return (value == getDefaultValue<T>());
411 }
412 template <typename T>
413 inline bool isDefaultValue(const std::vector<T>& value) {
414  return value.empty();
415 }
416 template <>
417 inline bool isDefaultValue(const std::string& value) {
418  return value.empty();
419 }
420 template <>
421 inline bool isDefaultValue(const std::map<std::string, msgpack::object>& value) {
422  return value.empty();
423 }
424 
425 template <typename T>
426 inline void setDefaultValue(T& value) {
427  value = getDefaultValue<T>();
428 }
429 
430 // HELPERS
431 
432 bool is_polymer(const unsigned int chain_index,
433  const std::vector<Entity>& entity_list) {
434  for (std::size_t i = 0; i < entity_list.size(); ++i) {
435  if ( std::find(entity_list[i].chainIndexList.begin(),
436  entity_list[i].chainIndexList.end(),
437  chain_index)
438  != entity_list[i].chainIndexList.end()) {
439  return ( entity_list[i].type == "polymer"
440  || entity_list[i].type == "POLYMER");
441  }
442  }
443  std::stringstream err;
444  err << "'is_polymer' unable to find chain_index: " << chain_index
445  << " in entity list";
446  throw DecodeError(err.str());
447 }
448 
449 bool is_hetatm(const char* type) {
450  const char* hetatm_type[] = {
451  "D-BETA-PEPTIDE, C-GAMMA LINKING",
452  "D-GAMMA-PEPTIDE, C-DELTA LINKING",
453  "D-PEPTIDE COOH CARBOXY TERMINUS",
454  "D-PEPTIDE NH3 AMINO TERMINUS",
455  "D-PEPTIDE LINKING",
456  "D-SACCHARIDE",
457  "D-SACCHARIDE 1,4 AND 1,4 LINKING",
458  "D-SACCHARIDE 1,4 AND 1,6 LINKING",
459  "DNA OH 3 PRIME TERMINUS",
460  "DNA OH 5 PRIME TERMINUS",
461  "DNA LINKING",
462  "L-DNA LINKING",
463  "L-RNA LINKING",
464  "L-BETA-PEPTIDE, C-GAMMA LINKING",
465  "L-GAMMA-PEPTIDE, C-DELTA LINKING",
466  "L-PEPTIDE COOH CARBOXY TERMINUS",
467  "L-PEPTIDE NH3 AMINO TERMINUS",
468  //"L-PEPTIDE LINKING", // All canonical L AA
469  "L-SACCHARIDE",
470  "L-SACCHARIDE 1,4 AND 1,4 LINKING",
471  "L-SACCHARIDE 1,4 AND 1,6 LINKING",
472  "RNA OH 3 PRIME TERMINUS",
473  "RNA OH 5 PRIME TERMINUS",
474  "RNA LINKING",
475  "NON-POLYMER",
476  "OTHER",
477  //"PEPTIDE LINKING", // GLY
478  "PEPTIDE-LIKE",
479  "SACCHARIDE",
480  0 };
481  for (int i=0; hetatm_type[i]; ++i) {
482  if (strcmp(type,hetatm_type[i]) == 0) return true;
483  }
484  return false;
485 }
486 
487 bool is_hetatm(const unsigned int chain_index,
488  const std::vector<Entity>& entity_list,
489  const GroupType& group_type) {
490  return ( is_hetatm(group_type.chemCompType.c_str())
491  || !is_polymer(chain_index, entity_list));
492 }
493 
494 
495 // CLASS StructureData
496 
498  // no need to do anything with strings and vectors
502  // numXX set to 0 to have consistent data
503  numBonds = 0;
504  numAtoms = 0;
505  numGroups = 0;
506  numChains = 0;
507  numModels = 0;
508  // set version and producer
510  mmtfProducer = "mmtf-cpp library (github.com/rcsb/mmtf-cpp)";
511 }
512 
514  copyAllData_(obj);
515 }
516 
518  if (this != &obj) copyAllData_(obj);
519  return *this;
520 }
521 
522 inline bool StructureData::operator==(const StructureData& c) const {
523  return (
524  mmtfVersion == c.mmtfVersion &&
525  mmtfProducer == c.mmtfProducer &&
526  unitCell == c.unitCell &&
527  spaceGroup == c.spaceGroup &&
528  structureId == c.structureId &&
529  title == c.title &&
531  releaseDate == c.releaseDate &&
534  entityList == c.entityList &&
536  resolution == c.resolution &&
537  rFree == c.rFree &&
538  rWork == c.rWork &&
539  numBonds == c.numBonds &&
540  numAtoms == c.numAtoms &&
541  numGroups == c.numGroups &&
542  numChains == c.numChains &&
543  numModels == c.numModels &&
544  groupList == c.groupList &&
545  bondAtomList == c.bondAtomList &&
547  xCoordList == c.xCoordList &&
548  yCoordList == c.yCoordList &&
549  zCoordList == c.zCoordList &&
550  bFactorList == c.bFactorList &&
551  atomIdList == c.atomIdList &&
552  altLocList == c.altLocList &&
554  groupIdList == c.groupIdList &&
557  insCodeList == c.insCodeList &&
559  chainIdList == c.chainIdList &&
569 }
570 
571 
572 inline bool StructureData::hasConsistentData(bool verbose, uint32_t chain_name_max_length) const {
573  std::vector<int8_t> allowed_bond_orders;
574  allowed_bond_orders.push_back(-1);
575  allowed_bond_orders.push_back(1);
576  allowed_bond_orders.push_back(2);
577  allowed_bond_orders.push_back(3);
578  allowed_bond_orders.push_back(4);
579 
580  // check unitCell: if given, must be of length 6
581  if (!hasRightSizeOptional(unitCell, 6)) {
582  if (verbose) {
583  std::cout << "inconsistent unitCell (unitCell length != 6)" << std::endl;
584  }
585  return false;
586  }
587  // check dates
588  if (!isValidDateFormatOptional(depositionDate)) {
589  if (verbose) {
590  std::cout << "inconsistent depositionDate (does not match 'YYYY-MM-DD' "
591  "or empty)" << std::endl;
592  }
593  return false;
594  }
595  if (!isValidDateFormatOptional(releaseDate)) {
596  if (verbose) {
597  std::cout << "inconsistent releaseDate (does not match 'YYYY-MM-DD' "
598  "or empty)" << std::endl;
599  }
600  return false;
601  }
602  // check ncsOperatorList: all elements must have length 16
603  for (size_t i = 0; i < ncsOperatorList.size(); ++i) {
604  if ((int)ncsOperatorList[i].size() != 16) {
605  if (verbose) {
606  std::cout << "inconsistent ncsOperatorList idx: " << i << " found size: "
607  << ncsOperatorList[i].size() << " != 16" << std::endl;
608  }
609  return false;
610  }
611  }
612  // check chain indices in bioAssembly-transforms and entities
613  for (size_t i = 0; i < bioAssemblyList.size(); ++i) {
614  const BioAssembly& ba = bioAssemblyList[i];
615  for (size_t j = 0; j < ba.transformList.size(); ++j) {
616  const Transform & t = ba.transformList[j];
617  if (!hasValidIndices(t.chainIndexList, numChains)) {
618  if (verbose) {
619  std::cout << "inconsistent BioAssemby transform i j: " << i
620  << " " << j << std::endl;
621  }
622  return false;
623  }
624  }
625  }
626  for (size_t i = 0; i < entityList.size(); ++i) {
627  const Entity& ent = entityList[i];
628  if (!hasValidIndices(ent.chainIndexList, numChains)) {
629  if (verbose) {
630  std::cout << "inconsistent entity idx: " << i << std::endl;
631  }
632  return false;
633  }
634  }
635  // check groups
636  for (size_t i = 0; i < groupList.size(); ++i) {
637  const GroupType& g = groupList[i];
638  const size_t num_atoms = g.formalChargeList.size();
639  if (g.atomNameList.size() != num_atoms) {
640  if (verbose) {
641  std::cout << "inconsistent group::atomNameList size at idx: "
642  << i << std::endl;
643  }
644  return false;
645  }
646  if (g.elementList.size() != num_atoms) {
647  if (verbose) {
648  std::cout << "inconsistent group::elementList size at idx: "
649  << i << std::endl;
650  }
651  return false;
652  }
653  if (!isDefaultValue(g.bondOrderList)) {
654  if (g.bondAtomList.size() != g.bondOrderList.size() * 2) {
655  if (verbose) {
656  std::cout << "inconsistent group::bondAtomList size: " <<
657  g.bondAtomList.size() << " != group::bondOrderList size(*2): " <<
658  g.bondOrderList.size()*2 << " at idx: " << i << std::endl;
659  }
660  return false;
661  }
662  for (size_t j = 0; j < g.bondOrderList.size(); ++j) {
663  if (std::find(allowed_bond_orders.begin(), allowed_bond_orders.end(),
664  g.bondOrderList[j]) == allowed_bond_orders.end()) {
665  if (verbose) {
666  std::cout << "Cannot have bond order of: " << (int)g.bondOrderList[j]
667  << " allowed bond orders are: -1, 1, 2, 3 or 4. at idx: " << j << std::endl;
668  }
669  return false;
670  }
671  }
672  }
675  if (verbose) {
676  std::cout << "Cannot have bondResonanceList without both " <<
677  "bondOrderList and bondAtomList! at idx: " << i << std::endl;
678  }
679  return false;
680  }
681  if (g.bondOrderList.size() != g.bondResonanceList.size()) {
682  if (verbose) {
683  std::cout << "inconsistent group::bondOrderSize size: " <<
684  g.bondOrderList.size() << " != group::bondResonanceList size: " <<
685  g.bondResonanceList.size() << " at idx: " << i << std::endl;
686  }
687  return false;
688  }
689  if (g.bondAtomList.size() != g.bondResonanceList.size() * 2) {
690  if (verbose) {
691  std::cout << "inconsistent group::bondAtomList size: " <<
692  g.bondAtomList.size() << " != group::bondResonanceList size(*2): " <<
693  g.bondResonanceList.size()*2 << " at idx: " << i << std::endl;
694  }
695  return false;
696  }
697  // Check bond resonance matches
698  for (size_t j = 0; j < g.bondResonanceList.size(); ++j) {
699  if (g.bondResonanceList[j] < -1 || g.bondResonanceList[j] > 1) {
700  if (verbose) {
701  std::cout << "group::bondResonanceList had a Resonance of: "
702  << (int)g.bondResonanceList[j] << " and only -1, 0, or 1 are allowed"
703  << std::endl;
704  }
705  return false;
706  }
707  if (g.bondOrderList[j] == -1 && g.bondResonanceList[j] != 1) {
708  if (verbose) {
709  std::cout << "group::bondResonanceList had a Resonance of: "
710  << (int)g.bondResonanceList[j] << " and group::bondOrderList had an order of "
711  << (int)g.bondOrderList[j] << " we require unknown bondOrders to have resonance"
712  << std::endl;
713  }
714  return false;
715  }
716  }
717  }
718  if (!hasValidIndices(g.bondAtomList, num_atoms)) {
719  if (verbose) {
720  std::cout << "inconsistent group::bondAtomList indices (not all in [0, "
721  << num_atoms - 1 << "]) at idx: " << i << std::endl;
722  }
723  return false;
724  }
725  }
726  // check global bonds
728  if (bondAtomList.size() != bondOrderList.size() * 2) {
729  if (verbose) {
730  std::cout << "inconsistent bondAtomList size: " <<
731  bondAtomList.size() << " != bondOrderList size(*2): " <<
732  bondOrderList.size()*2 << std::endl;
733  }
734  return false;
735  }
736  for (size_t i = 0; i < bondOrderList.size(); ++i) {
737  if (std::find(allowed_bond_orders.begin(), allowed_bond_orders.end(),
738  bondOrderList[i]) == allowed_bond_orders.end()) {
739  if (verbose) {
740  std::cout << "Cannot have bond order of: " << (int)bondOrderList[i]
741  << " allowed bond orders are: -1, 1, 2, 3 or 4. at idx: " << i << std::endl;
742  }
743  return false;
744  }
745  }
746  }
749  if (verbose) {
750  std::cout << "Cannot have bondResonanceList without both " <<
751  "bondOrderList and bondAtomList!" << std::endl;
752  }
753  return false;
754  }
755  if (bondAtomList.size() != bondResonanceList.size() * 2) {
756  if (verbose) {
757  std::cout << "inconsistent bondAtomList size: " <<
758  bondAtomList.size() << " != bondResonanceList size(*2): " <<
759  bondResonanceList.size()*2 << std::endl;
760  }
761  return false;
762  }
763  for (size_t i = 0; i < bondResonanceList.size(); ++i) {
764  if (bondResonanceList[i] < -1 || bondResonanceList[i] > 1) {
765  if (verbose) {
766  std::cout << "bondResonanceList had a Resonance of: "
767  << (int)bondResonanceList[i] << " and only -1, 0, or 1 are allowed"
768  << std::endl;
769  }
770  return false;
771  }
772  if (bondOrderList[i] == -1 && bondResonanceList[i] != 1) {
773  if (verbose) {
774  std::cout << "bondResonanceList had a Resonance of: "
775  << (int)bondResonanceList[i] << " and bondOrderList had an order of "
776  << (int)bondOrderList[i] << " we require unknown bondOrders to have resonance"
777  << std::endl;
778  }
779  return false;
780  }
781  }
782  }
783  if (!hasValidIndices(bondAtomList, numAtoms)) {
784  if (verbose) {
785  std::cout << "inconsistent bondAtomList indices (not all in [0, "
786  << numAtoms - 1 << "])" << std::endl;
787  }
788  return false;
789  }
790  // check vector sizes
791  if ((int)xCoordList.size() != numAtoms) {
792  if (verbose) {
793  std::cout << "inconsistent xCoordList size" << std::endl;
794  }
795  return false;
796  }
797  if ((int)yCoordList.size() != numAtoms) {
798  if (verbose) {
799  std::cout << "inconsistent yCoordList size" << std::endl;
800  }
801  return false;
802  }
803  if ((int)zCoordList.size() != numAtoms) {
804  if (verbose) {
805  std::cout << "inconsistent zCoordList size" << std::endl;
806  }
807  return false;
808  }
809  if (!hasRightSizeOptional(bFactorList, numAtoms)) {
810  if (verbose) {
811  std::cout << "inconsistent bFactorList size" << std::endl;
812  }
813  return false;
814  }
815  if (!hasRightSizeOptional(atomIdList, numAtoms)) {
816  if (verbose) {
817  std::cout << "inconsistent atomIdList size" << std::endl;
818  }
819  return false;
820  }
821  if (!hasRightSizeOptional(altLocList, numAtoms)) {
822  if (verbose) {
823  std::cout << "inconsistent altLocList size" << std::endl;
824  }
825  return false;
826  }
827  if (!hasRightSizeOptional(occupancyList, numAtoms)) {
828  if (verbose) {
829  std::cout << "inconsistent occupancyList size" << std::endl;
830  }
831  return false;
832  }
833  if ((int)groupIdList.size() != numGroups) {
834  if (verbose) {
835  std::cout << "inconsistent groupIdList size" << std::endl;
836  }
837  return false;
838  }
839  if ((int)groupTypeList.size() != numGroups) {
840  if (verbose) {
841  std::cout << "inconsistent groupTypeList size" << std::endl;
842  }
843  return false;
844  }
845  if (!hasRightSizeOptional(secStructList, numGroups)) {
846  if (verbose) {
847  std::cout << "inconsistent secStructList size" << std::endl;
848  }
849  return false;
850  }
851  if (!hasRightSizeOptional(insCodeList, numGroups)) {
852  if (verbose) {
853  std::cout << "inconsistent insCodeList size" << std::endl;
854  }
855  return false;
856  }
857  if (!hasRightSizeOptional(sequenceIndexList, numGroups)) {
858  if (verbose) {
859  std::cout << "inconsistent sequenceIndexList size" << std::endl;
860  }
861  return false;
862  }
863  if ((int)chainIdList.size() != numChains) {
864  if (verbose) {
865  std::cout << "inconsistent chainIdList size" << std::endl;
866  }
867  return false;
868  }
869  if (!hasRightSizeOptional(chainNameList, numChains)) {
870  if (verbose) {
871  std::cout << "inconsistent chainNameList size" << std::endl;
872  }
873  return false;
874  }
875  if ((int)groupsPerChain.size() != numChains) {
876  if (verbose) {
877  std::cout << "inconsistent groupsPerChain size" << std::endl;
878  }
879  return false;
880  }
881  if ((int)chainsPerModel.size() != numModels) {
882  if (verbose) {
883  std::cout << "inconsistent chainsPerModel size" << std::endl;
884  }
885  return false;
886  }
887  // check indices
888  if (!hasValidIndices(groupTypeList, groupList.size())) {
889  if (verbose) {
890  std::cout << "inconsistent groupTypeList indices (not all in [0, "
891  << groupList.size() - 1 << "])" << std::endl;
892  }
893  return false;
894  }
895  // collect sequence lengths from entities and use to check
896  std::vector<int32_t> sequenceIndexSize(numChains);
897  for (size_t i = 0; i < entityList.size(); ++i) {
898  const Entity& ent = entityList[i];
899  for (size_t j = 0; j < ent.chainIndexList.size(); ++j) {
900  sequenceIndexSize[ent.chainIndexList[j]] = ent.sequence.length();
901  }
902  }
903  // traverse structure for more checks
904  int bond_count_from_atom = 0;
905  int bond_count_from_order = 0;
906  int bond_count_from_resonance = 0;
907  bool all_bond_orderLists_are_default = true;
908  bool all_bond_resonanceLists_are_default = true;
909  bool all_bond_atomLists_are_default = true;
911  all_bond_orderLists_are_default = false;
912  bond_count_from_order = bondOrderList.size();
913  }
915  all_bond_resonanceLists_are_default = false;
916  bond_count_from_resonance = bondOrderList.size();
917  }
919  all_bond_atomLists_are_default = false;
920  bond_count_from_atom = bondAtomList.size()/2;
921  }
922  int chain_idx = 0; // will be count at end of loop
923  int group_idx = 0; // will be count at end of loop
924  int atom_idx = 0; // will be count at end of loop
925  // traverse models
926  for (int model_idx = 0; model_idx < numModels; ++model_idx) {
927  // traverse chains
928  for (int j = 0; j < chainsPerModel[model_idx]; ++j, ++chain_idx) {
929  // check chain names (fixed length)
930  if (chainIdList[chain_idx].size() > chain_name_max_length) {
931  if (verbose) {
932  std::cout << "inconsistent chainIdList size at chain_idx: "
933  << chain_idx << " size: "
934  << chainIdList[chain_idx].size() << std::endl;
935  }
936  return false;
937  }
939  && chainNameList[chain_idx].size() > chain_name_max_length) {
940  if (verbose) {
941  std::cout << "inconsistent chainNameList size at chain_idx:"
942  << chain_idx << " size: "
943  << chainNameList[chain_idx].size() << std::endl;
944  }
945  return false;
946  }
947  // traverse groups
948  for (int k = 0; k < groupsPerChain[chain_idx]; ++k, ++group_idx) {
949  // check seq. idx
951  const int32_t idx = sequenceIndexList[group_idx];
952  // -1 is ok here
953  if (idx < -1 || idx >= sequenceIndexSize[chain_idx]) {
954  if (verbose) {
955  std::cout << "inconsistent sequenceIndexSize at"
956  " chain_idx: " << chain_idx << std::endl;
957  }
958  return false;
959  }
960  }
961  // count atoms
962  const GroupType& group = groupList[groupTypeList[group_idx]];
963  atom_idx += group.atomNameList.size();
964  // count bonds
965  if (!isDefaultValue(group.bondOrderList)) {
966  all_bond_orderLists_are_default = false;
967  bond_count_from_order += group.bondOrderList.size();
968  }
969  if (!isDefaultValue(group.bondResonanceList)) {
970  all_bond_resonanceLists_are_default = false;
971  bond_count_from_resonance += group.bondResonanceList.size();
972  }
973  if (!isDefaultValue(group.bondAtomList)) {
974  all_bond_atomLists_are_default = false;
975  bond_count_from_atom += group.bondAtomList.size()/2;
976  }
977 
978  }
979  }
980  }
981  // check sizes
982  if (!all_bond_orderLists_are_default) {
983  if (bond_count_from_order != numBonds) {
984  if (verbose) {
985  std::cout << "inconsistent numBonds vs bond order count" << std::endl;
986  }
987  return false;
988  }
989  }
990  if (!all_bond_resonanceLists_are_default) {
991  if (bond_count_from_resonance != numBonds) {
992  if (verbose) {
993  std::cout << "inconsistent numBonds vs bond resonance count" << std::endl;
994  }
995  return false;
996  }
997  }
998  if (!all_bond_atomLists_are_default) {
999  if (bond_count_from_atom != numBonds) {
1000  if (verbose) {
1001  std::cout << "inconsistent numBonds vs bond atom list count" << std::endl;
1002  }
1003  return false;
1004  }
1005  }
1006  if (chain_idx != numChains) {
1007  if (verbose) {
1008  std::cout << "inconsistent numChains" << std::endl;
1009  }
1010  return false;
1011  }
1012  if (group_idx != numGroups) {
1013  if (verbose) {
1014  std::cout << "inconsistent numGroups size" << std::endl;
1015  }
1016  return false;
1017  }
1018  if (atom_idx != numAtoms) {
1019  if (verbose) {
1020  std::cout << "inconsistent numAtoms size" << std::endl;
1021  }
1022  return false;
1023  }
1024  // All looks good :)
1025  return true;
1026 }
1027 
1028 inline std::string StructureData::print(std::string delim) const {
1029  std::ostringstream out;
1030  int modelIndex = 0;
1031  int chainIndex = 0;
1032  int groupIndex = 0;
1033  int atomIndex = 0;
1034 
1035  // traverse models
1036  for (int i = 0; i < numModels; i++, modelIndex++) {
1037  // traverse chains
1038  for (int j = 0; j < chainsPerModel[modelIndex]; j++, chainIndex++) {
1039  // traverse groups
1040  for (int k = 0; k < groupsPerChain[chainIndex]; k++, groupIndex++) {
1041  const mmtf::GroupType& group =
1042  groupList[groupTypeList[groupIndex]];
1043  int groupAtomCount = group.atomNameList.size();
1044 
1045  for (int l = 0; l < groupAtomCount; l++, atomIndex++) {
1046  // ATOM or HETATM
1047  if (is_hetatm(chainIndex, entityList, group))
1048  out << "HETATM" << delim;
1049  else
1050  out << "ATOM" << delim;
1051  // Atom serial
1052  if ( !mmtf::isDefaultValue(atomIdList) ) {
1053  out << std::setfill('0') << std::internal << std::setw(6) <<
1054  std::right << atomIdList[atomIndex] << delim;
1055  } else out << "." << delim;
1056  // Atom name
1057  out << group.atomNameList[l] << delim;
1058  // Alternate location
1059  if ( !mmtf::isDefaultValue(altLocList) ) {
1060  if ( altLocList[atomIndex] == ' ' ||
1061  altLocList[atomIndex] == 0x00 )
1062  out << "." << delim;
1063  else out << altLocList[atomIndex] << delim;
1064  } else out << "." << delim;
1065  // Group name
1066  out << group.groupName << delim;
1067  // Chain
1068  out << chainIdList[chainIndex] << delim;
1070  out << chainNameList[chainIndex];
1071  out << delim;
1072  } else out << "." << delim;
1073  // Group serial
1074  out << groupIdList[groupIndex] << delim;
1075  // Insertion code
1077  if ( insCodeList[groupIndex] == ' ' ||
1078  insCodeList[groupIndex] == 0x00 )
1079  out << "." << delim;
1080  else out << int(insCodeList[groupIndex]) << delim;
1081  } else out << ". ";
1082  // x, y, z
1083  out << std::fixed << std::setprecision(3);
1084  out << xCoordList[atomIndex] << delim;
1085  out << yCoordList[atomIndex] << delim;
1086  out << zCoordList[atomIndex] << delim;
1087 
1088  // B-factor
1090  out << bFactorList[atomIndex] << delim;
1091  } else out << "." << delim;
1092  // Occupancy
1094  out << occupancyList[atomIndex] << delim;
1095  } else out << "." << delim;
1096  // Element
1097  out << group.elementList[l] << delim;
1098  // Charge
1099  out << group.formalChargeList[l] << "\n";
1100  }
1101  }
1102  }
1103  }
1104  return out.str();
1105 }
1106 
1107 inline void StructureData::copyMapData_(
1108  std::map<std::string, msgpack::object>& target,
1109  const std::map<std::string, msgpack::object>& source) {
1110  target.clear();
1111  std::map<std::string, msgpack::object>::const_iterator it;
1112  for (it = source.begin(); it != source.end(); ++it) {
1113  msgpack::object tmp_object(it->second, msgpack_zone);
1114  target[it->first] = tmp_object;
1115  }
1116 }
1117 
1118 inline void StructureData::copyAllData_(const StructureData& obj) {
1119  mmtfVersion = obj.mmtfVersion;
1120  mmtfProducer = obj.mmtfProducer;
1121  unitCell = obj.unitCell;
1122  spaceGroup = obj.spaceGroup;
1123  structureId = obj.structureId;
1124  title = obj.title;
1125  depositionDate = obj.depositionDate;
1126  releaseDate = obj.releaseDate;
1127  ncsOperatorList = obj.ncsOperatorList;
1128  bioAssemblyList = obj.bioAssemblyList;
1129  entityList = obj.entityList;
1130  experimentalMethods = obj.experimentalMethods;
1131  resolution = obj.resolution;
1132  rFree = obj.rFree;
1133  rWork = obj.rWork;
1134  numBonds = obj.numBonds;
1135  numAtoms = obj.numAtoms;
1136  numGroups = obj.numGroups;
1137  numChains = obj.numChains;
1138  numModels = obj.numModels;
1139  groupList = obj.groupList;
1140  bondAtomList = obj.bondAtomList;
1141  bondOrderList = obj.bondOrderList;
1142  xCoordList = obj.xCoordList;
1143  yCoordList = obj.yCoordList;
1144  zCoordList = obj.zCoordList;
1145  bFactorList = obj.bFactorList;
1146  atomIdList = obj.atomIdList;
1147  altLocList = obj.altLocList;
1148  occupancyList = obj.occupancyList;
1149  groupIdList = obj.groupIdList;
1150  groupTypeList = obj.groupTypeList;
1151  secStructList = obj.secStructList;
1152  insCodeList = obj.insCodeList;
1153  sequenceIndexList = obj.sequenceIndexList;
1154  chainIdList = obj.chainIdList;
1155  chainNameList = obj.chainNameList;
1156  groupsPerChain = obj.groupsPerChain;
1157  chainsPerModel = obj.chainsPerModel;
1158  copyMapData_(bondProperties, obj.bondProperties);
1159  copyMapData_(atomProperties, obj.atomProperties);
1160  copyMapData_(groupProperties, obj.groupProperties);
1161  copyMapData_(chainProperties, obj.chainProperties);
1162  copyMapData_(modelProperties, obj.modelProperties);
1163  copyMapData_(extraProperties, obj.extraProperties);
1164 }
1165 
1166 } // mmtf namespace
1167 
1168 #endif
1169 
Transformation definition for a set of chains.
Definition: structure_data.hpp:109
std::vector< int8_t > bondOrderList
Definition: structure_data.hpp:180
std::vector< std::string > elementList
Definition: structure_data.hpp:56
std::vector< int8_t > secStructList
Definition: structure_data.hpp:191
std::vector< std::string > atomNameList
Definition: structure_data.hpp:55
std::map< std::string, msgpack::object > bondProperties
Definition: structure_data.hpp:199
bool isVersionSupported(const std::string &version_string)
Check if version is supported (minor revisions ok, major ones not)
Definition: structure_data.hpp:397
msgpack::zone msgpack_zone
Definition: structure_data.hpp:198
std::vector< char > altLocList
Definition: structure_data.hpp:187
bool operator==(Transform const &c) const
Definition: structure_data.hpp:113
Exception thrown when failing during decoding.
Definition: errors.hpp:23
bool operator!=(const StructureData &c) const
Compare two StructureData classes for inequality.
Definition: structure_data.hpp:249
std::string mmtfVersion
Definition: structure_data.hpp:158
std::map< std::string, msgpack::object > chainProperties
Definition: structure_data.hpp:202
std::vector< std::string > experimentalMethods
Definition: structure_data.hpp:169
int32_t numGroups
Definition: structure_data.hpp:175
std::vector< float > yCoordList
Definition: structure_data.hpp:183
std::vector< int32_t > groupsPerChain
Definition: structure_data.hpp:196
std::vector< char > insCodeList
Definition: structure_data.hpp:192
std::string structureId
Definition: structure_data.hpp:162
std::vector< GroupType > groupList
Definition: structure_data.hpp:178
std::map< std::string, msgpack::object > modelProperties
Definition: structure_data.hpp:203
Top level MMTF data container.
Definition: structure_data.hpp:157
std::vector< int32_t > groupTypeList
Definition: structure_data.hpp:190
std::string type
Definition: structure_data.hpp:86
std::vector< int32_t > groupIdList
Definition: structure_data.hpp:189
bool hasConsistentData(bool verbose=false, uint32_t chain_name_max_length=4) const
Check consistency of structural data.
Definition: structure_data.hpp:572
std::map< std::string, msgpack::object > groupProperties
Definition: structure_data.hpp:201
#define MMTF_SPEC_VERSION_MINOR
Definition: structure_data.hpp:35
std::vector< Transform > transformList
Definition: structure_data.hpp:133
std::vector< int32_t > chainsPerModel
Definition: structure_data.hpp:197
std::vector< Entity > entityList
Definition: structure_data.hpp:168
std::string description
Definition: structure_data.hpp:85
std::string groupName
Definition: structure_data.hpp:60
std::vector< int32_t > chainIndexList
Definition: structure_data.hpp:110
std::vector< int32_t > bondAtomList
Definition: structure_data.hpp:57
bool operator==(const StructureData &c) const
Compare two StructureData classes for equality.
Definition: structure_data.hpp:522
char singleLetterCode
Definition: structure_data.hpp:61
int32_t numChains
Definition: structure_data.hpp:176
std::vector< int32_t > formalChargeList
Definition: structure_data.hpp:54
void setDefaultValue(T &value)
Set default value to given type.
Definition: structure_data.hpp:426
Data store for the biological assembly annotation.
Definition: structure_data.hpp:132
std::vector< int32_t > atomIdList
Definition: structure_data.hpp:186
std::vector< std::string > chainNameList
Definition: structure_data.hpp:195
std::vector< std::vector< float > > ncsOperatorList
Definition: structure_data.hpp:166
float resolution
Definition: structure_data.hpp:170
Group (residue) level data store.
Definition: structure_data.hpp:53
float rFree
Definition: structure_data.hpp:171
int32_t numBonds
Definition: structure_data.hpp:173
std::string print(std::string delim="\) const
Read out the contents of mmtf::StructureData in a PDB-like fashion Columns are in order: ATOM/HETATM ...
Definition: structure_data.hpp:1028
std::vector< float > xCoordList
Definition: structure_data.hpp:182
Definition: binary_decoder.hpp:25
std::string chemCompType
Definition: structure_data.hpp:62
MSGPACK_DEFINE_MAP(chainIndexList, description, type, sequence)
std::string sequence
Definition: structure_data.hpp:87
std::vector< float > zCoordList
Definition: structure_data.hpp:184
std::vector< float > unitCell
Definition: structure_data.hpp:160
std::vector< int8_t > bondResonanceList
Definition: structure_data.hpp:181
std::string releaseDate
Definition: structure_data.hpp:165
float matrix[16]
Definition: structure_data.hpp:111
std::string name
Definition: structure_data.hpp:134
int32_t numModels
Definition: structure_data.hpp:177
std::string depositionDate
Definition: structure_data.hpp:164
std::vector< std::string > chainIdList
Definition: structure_data.hpp:194
#define MMTF_SPEC_VERSION_MAJOR
MMTF spec version which this library implements.
Definition: structure_data.hpp:34
StructureData & operator=(const StructureData &f)
Overload for assignment operator.
Definition: structure_data.hpp:517
bool operator==(BioAssembly const &c) const
Definition: structure_data.hpp:136
T getDefaultValue()
Get default value for given type.
Definition: structure_data.hpp:406
bool is_polymer(const unsigned int chain_index, const std::vector< Entity > &entity_list)
Check if chain is a polymer chain.
Definition: structure_data.hpp:432
int32_t numAtoms
Definition: structure_data.hpp:174
std::vector< BioAssembly > bioAssemblyList
Definition: structure_data.hpp:167
MSGPACK_DEFINE_MAP(chainIndexList, matrix)
std::vector< int32_t > chainIndexList
Definition: structure_data.hpp:84
std::vector< int8_t > bondResonanceList
Definition: structure_data.hpp:59
float rWork
Definition: structure_data.hpp:172
std::vector< int8_t > bondOrderList
Definition: structure_data.hpp:58
bool isDefaultValue(const T &value)
Definition: structure_data.hpp:409
std::vector< int32_t > sequenceIndexList
Definition: structure_data.hpp:193
bool operator==(GroupType const &c) const
Definition: structure_data.hpp:64
std::string title
Definition: structure_data.hpp:163
std::map< std::string, msgpack::object > atomProperties
Definition: structure_data.hpp:200
std::string getVersionString()
Get string representation of MMTF spec version implemented here.
Definition: structure_data.hpp:391
StructureData()
Construct object with default values set.
Definition: structure_data.hpp:497
Entity type.
Definition: structure_data.hpp:83
bool operator==(Entity const &c) const
Definition: structure_data.hpp:89
MSGPACK_DEFINE_MAP(transformList, name)
std::string spaceGroup
Definition: structure_data.hpp:161
std::string mmtfProducer
Definition: structure_data.hpp:159
std::vector< float > bFactorList
Definition: structure_data.hpp:185
std::vector< int32_t > bondAtomList
Definition: structure_data.hpp:179
std::vector< float > occupancyList
Definition: structure_data.hpp:188
bool is_hetatm(const char *type)
Check if group type consists of HETATM atoms.
Definition: structure_data.hpp:449
std::map< std::string, msgpack::object > extraProperties
Definition: structure_data.hpp:204