DCCL v4
codec.h
1 // Copyright 2009-2023:
2 // GobySoft, LLC (2013-)
3 // Massachusetts Institute of Technology (2007-2014)
4 // Community contributors (see AUTHORS file)
5 // File authors:
6 // Toby Schneider <toby@gobysoft.org>
7 // Nathan Knotts <nknotts@gmail.com>
8 // philboske <philboske@gmail.com>
9 // Chris Murphy <cmurphy@aphysci.com>
10 //
11 //
12 // This file is part of the Dynamic Compact Control Language Library
13 // ("DCCL").
14 //
15 // DCCL is free software: you can redistribute it and/or modify
16 // it under the terms of the GNU Lesser General Public License as published by
17 // the Free Software Foundation, either version 2.1 of the License, or
18 // (at your option) any later version.
19 //
20 // DCCL is distributed in the hope that it will be useful,
21 // but WITHOUT ANY WARRANTY; without even the implied warranty of
22 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23 // GNU Lesser General Public License for more details.
24 //
25 // You should have received a copy of the GNU Lesser General Public License
26 // along with DCCL. If not, see <http://www.gnu.org/licenses/>.
27 #ifndef DCCL20091211H
28 #define DCCL20091211H
29 
30 #include <map>
31 #include <ostream>
32 #include <set>
33 #include <stdexcept>
34 #include <string>
35 #include <type_traits>
36 #include <vector>
37 
38 #include <google/protobuf/descriptor.h>
39 
40 #include <memory>
41 
42 #include "binary.h"
43 #include "dynamic_protobuf_manager.h"
44 #include "exception.h"
45 #include "field_codec.h"
46 #include "field_codec_fixed.h"
47 #include "logger.h"
48 
49 #include "codecs2/field_codec_default_message.h"
50 #include "codecs3/field_codec_default_message.h"
51 #include "dccl/def.h"
52 #include "dccl/version.h"
53 #include "field_codec_manager.h"
54 
56 namespace dccl
57 {
58 class FieldCodec;
59 
62 class Codec
63 {
64  public:
70  Codec(std::string dccl_id_codec_name = default_id_codec_name(),
71  const std::string& library_path = "");
72 
79  template <class IDFieldCodec,
80  typename std::enable_if<std::is_base_of<FieldCodecBase, IDFieldCodec>::value,
81  int>::type = 0>
82  Codec(const std::string& dccl_id_codec_name, const IDFieldCodec& dccl_id_codec) // NOLINT
83  : id_codec_(dccl_id_codec_name)
84  {
85  set_default_codecs();
86  manager_.add<IDFieldCodec>(dccl_id_codec_name);
87  }
88 
90  virtual ~Codec();
91 
92  Codec(const Codec&) = delete;
93  Codec& operator=(const Codec&) = delete;
94 
100  void load_library(void* dl_handle);
101 
108  void unload_library(void* dl_handle);
109 
114  void load_library(const std::string& library_path);
115 
121  template <typename ProtobufMessage> std::size_t load()
122  {
123  return load(ProtobufMessage::descriptor());
124  }
125 
129  template <typename ProtobufMessage> void unload() { unload(ProtobufMessage::descriptor()); }
130 
131  void unload_all() { id2desc_.clear(); }
132 
140  std::size_t load(const google::protobuf::Descriptor* desc, int user_id = -1);
141 
146  void unload(const google::protobuf::Descriptor* desc);
147 
152  void unload(size_t dccl_id);
153 
155  void set_id_codec(const std::string& id_codec_name);
156  std::string get_id_codec() { return id_codec_; }
157 
163  void set_crypto_passphrase(const std::string& passphrase,
164  const std::set<int32>& do_not_encrypt_ids = std::set<int32>());
165 
166  void set_crypto_passphrase(const std::string& passphrase,
167  const std::set<unsigned>& do_not_encrypt_ids)
168  {
169  std::set<int32> s_ids{do_not_encrypt_ids.begin(), do_not_encrypt_ids.end()};
170  set_crypto_passphrase(passphrase, s_ids);
171  }
172 
176  void set_strict(bool mode) { strict_ = mode; }
177 
181  void set_console_width(unsigned num_chars) { console_width_ = num_chars; }
182 
184 
188 
189 
196  template <typename ProtobufMessage>
197  void info(std::ostream* os = nullptr, int user_id = -1) const
198  {
199  info(ProtobufMessage::descriptor(), os, user_id);
200  }
201 
207  void info(const google::protobuf::Descriptor* desc, std::ostream* os = nullptr,
208  int user_id = -1) const;
209 
213  void info_all(std::ostream* os = nullptr) const;
214 
218  template <typename ProtobufMessage> int32 id() const
219  {
220  return id(ProtobufMessage::descriptor());
221  }
222 
242  int32 id(const std::string& bytes) const;
243 
245  template <typename CharIterator> int32 id(CharIterator begin, CharIterator end) const;
246 
248  int32 id(const google::protobuf::Descriptor* desc) const
249  {
250  if (desc->options().GetExtension(dccl::msg).omit_id())
251  throw(Exception("Cannot call id(...) on message with omit_id == true"));
252  Bitset id_bits;
253  dccl::uint32 hardcoded_id = desc->options().GetExtension(dccl::msg).id();
254  // pass the hard coded id, that is, (dccl.msg).id,
255  // through encode/decode to allow a custom ID codec (if in use)
256  // to always take effect.
257  id_codec()->field_encode(&id_bits, hardcoded_id, nullptr);
258  std::string id_bytes(id_bits.to_byte_string());
259  return id(id_bytes);
260  }
261 
263  const std::map<int32, const google::protobuf::Descriptor*>& loaded() const { return id2desc_; }
264 
266 
270 
271 
280  void encode(std::string* bytes, const google::protobuf::Message& msg, bool header_only = false,
281  int user_id = -1);
282 
293  size_t encode(char* bytes, size_t max_len, const google::protobuf::Message& msg,
294  bool header_only = false, int user_id = -1);
295 
304  template <typename CharIterator, typename ProtobufMessage>
305  CharIterator decode(CharIterator begin, CharIterator end, ProtobufMessage* msg,
306  bool header_only = false);
307 
314  template <typename ProtobufMessage>
315  void decode(const std::string& bytes, ProtobufMessage* msg, bool header_only = false)
316  {
317  decode(bytes.begin(), bytes.end(), msg, header_only);
318  }
319 
325  template <typename ProtobufMessage> void decode(std::string* bytes, ProtobufMessage* msg)
326  {
327  decode(*bytes, msg);
328  unsigned last_size = size(*msg);
329  bytes->erase(0, last_size);
330  }
331 
339  template <typename GoogleProtobufMessagePointer>
340  GoogleProtobufMessagePointer decode(const std::string& bytes, bool header_only = false);
341 
348  template <typename GoogleProtobufMessagePointer>
349  GoogleProtobufMessagePointer decode(std::string* bytes);
350 
357  unsigned size(const google::protobuf::Message& msg, int user_id = -1);
358 
363  template <typename ProtobufMessage> unsigned max_size()
364  {
365  return max_size(ProtobufMessage::descriptor());
366  }
367 
369  unsigned max_size(const google::protobuf::Descriptor* desc) const;
370 
375  template <typename ProtobufMessage> unsigned min_size()
376  {
377  return min_size(ProtobufMessage::descriptor());
378  }
379 
381  unsigned min_size(const google::protobuf::Descriptor* desc) const;
382 
384 
385  static std::string default_id_codec_name() { return "dccl.default.id"; }
386 
387  static std::string default_codec_name(int version = 2)
388  {
389  return "dccl.default" + std::to_string(version);
390  }
391 
392  FieldCodecManagerLocal& manager() { return manager_; }
393 
394  private:
395  void encode_internal(const google::protobuf::Message& msg, bool header_only,
396  Bitset& header_bits, Bitset& body_bits, int user_id);
397  std::string get_all_error_fields_in_message(const google::protobuf::Message& msg,
398  uint8_t depth = 1);
399 
400  void encrypt(std::string* s, const std::string& nonce);
401  void decrypt(std::string* s, const std::string& nonce);
402 
403  void set_default_codecs();
404 
405  std::shared_ptr<FieldCodecBase> id_codec() const
406  {
407  return manager_.find(google::protobuf::FieldDescriptor::TYPE_UINT32, DCCL_VERSION_MAJOR,
408  id_codec_);
409  }
410 
411  int32 id_internal(const google::protobuf::Descriptor* desc, int user_id)
412  {
413  // if we have omit_id, check for or assign an autogenerate negative internal placeholder ID
414  if (desc->options().GetExtension(dccl::msg).omit_id() && !desc2placeholder_id_.count(desc))
415  desc2placeholder_id_.insert(std::make_pair(desc, omit_id_placeholder_id_--));
416 
417  return id_internal_const(desc, user_id);
418  }
419 
420  int32 id_internal_const(const google::protobuf::Descriptor* desc, int user_id) const
421  {
422  if (desc->options().GetExtension(dccl::msg).omit_id())
423  {
424  if (desc2placeholder_id_.count(desc))
425  return desc2placeholder_id_.find(desc)->second;
426  else
427  throw(Exception("Message " + desc->full_name() +
428  " has omit_id == true but has not been loaded, so id_internal() "
429  "const cannot be called"));
430  }
431  else
432  {
433  return (user_id < 0) ? id(desc) : user_id;
434  }
435  }
436 
437  private:
438  // SHA256 hash of the crypto passphrase
439  std::string crypto_key_;
440 
441  // strict mode setting
442  bool strict_{false};
443 
444  // console outputting format width
445  unsigned console_width_{60};
446 
447  // set of DCCL IDs *not* to encrypt
448  std::set<int32> skip_crypto_ids_;
449 
450  // maps `dccl.id`s onto Message Descriptors
451  std::map<int32, const google::protobuf::Descriptor*> id2desc_;
452  std::string id_codec_;
453 
454  std::vector<void*> dl_handles_;
455 
456  std::string build_guard_for_console_output(std::string& base, char guard_char) const;
457 
458  FieldCodecManagerLocal manager_;
459 
460  // current omit_id placeholder DCCL Id (starts at -1 and decrements)
461  int32 omit_id_placeholder_id_{-1};
462  // maps message descriptor onto placeholder ID for omit_id messages
463  std::map<const google::protobuf::Descriptor*, int32> desc2placeholder_id_;
464 };
465 
466 inline std::ostream& operator<<(std::ostream& os, const Codec& codec)
467 {
468  codec.info_all(&os);
469  return os;
470 }
471 } // namespace dccl
472 
473 template <typename GoogleProtobufMessagePointer>
474 GoogleProtobufMessagePointer dccl::Codec::decode(const std::string& bytes,
475  bool header_only /* = false */)
476 {
477  int32 this_id = id(bytes);
478 
479  if (!id2desc_.count(this_id))
480  throw(Exception("Message id " + std::to_string(this_id) +
481  " has not been loaded. Call load() before decoding this type."));
482 
483  // ownership of this object goes to the caller of decode()
484  auto msg = dccl::DynamicProtobufManager::new_protobuf_message<GoogleProtobufMessagePointer>(
485  id2desc_.find(this_id)->second);
486  decode(bytes, &(*msg), header_only);
487  return msg;
488 }
489 
490 template <typename GoogleProtobufMessagePointer>
491 GoogleProtobufMessagePointer dccl::Codec::decode(std::string* bytes)
492 {
493  int32 this_id = id(*bytes);
494 
495  if (!id2desc_.count(this_id))
496  throw(Exception("Message id " + std::to_string(this_id) +
497  " has not been loaded. Call load() before decoding this type."));
498 
499  GoogleProtobufMessagePointer msg =
500  dccl::DynamicProtobufManager::new_protobuf_message<GoogleProtobufMessagePointer>(
501  id2desc_.find(this_id)->second);
502  std::string::iterator new_begin = decode(bytes->begin(), bytes->end(), &(*msg));
503  bytes->erase(bytes->begin(), new_begin);
504  return msg;
505 }
506 
507 template <typename CharIterator>
508 dccl::int32 dccl::Codec::id(CharIterator begin, CharIterator end) const
509 {
510  try
511  {
512  unsigned id_min_size = 0, id_max_size = 0;
513  id_codec()->field_min_size(&id_min_size, nullptr);
514  id_codec()->field_max_size(&id_max_size, nullptr);
515  Bitset fixed_header_bits;
516 
517  // ensure we don't go past-the-end if fewer bytes are passed in than id_max_size
518  int incr = std::min<size_t>(
519  static_cast<size_t>(std::distance(begin, end)),
520  static_cast<size_t>(std::ceil(static_cast<double>(id_max_size) / BITS_IN_BYTE)));
521  fixed_header_bits.from_byte_stream(begin, begin + incr);
522 
523  Bitset these_bits(&fixed_header_bits);
524  these_bits.get_more_bits(id_min_size);
525 
526  dccl::any return_value;
527  id_codec()->field_decode(&these_bits, &return_value, nullptr);
528  return dccl::any_cast<uint32>(return_value);
529  }
530  catch (const dccl::Exception& e)
531  {
532  throw(Exception("Failed to decoded id from bytes passed (hex: " + hex_encode(begin, end) +
533  ")"));
534  }
535 }
536 
537 template <typename CharIterator, typename ProtobufMessage>
538 CharIterator dccl::Codec::decode(CharIterator begin, CharIterator end, ProtobufMessage* msg,
539  bool header_only /*= false*/)
540 {
541  try
542  {
543  const google::protobuf::Descriptor* desc = msg->GetDescriptor();
544  int32 expected_id = id_internal(desc, -1);
545  int32 received_id =
546  expected_id; // if omit_id, we have to assume we have the correct type. Otherwise, overwrite if not omit_id and check
547  if (!desc->options().GetExtension(dccl::msg).omit_id())
548  {
549  received_id = id(begin, end);
550 
551  if (!id2desc_.count(received_id))
552  throw(Exception("Message id " + std::to_string(received_id) +
553  " has not been loaded. Call load() before decoding this type."));
554 
555  if (expected_id != received_id)
556  throw(Exception("Received message with id " + std::to_string(received_id) + " (" +
557  id2desc_.at(received_id)->full_name() +
558  ") but decode was called with message of id " +
559  std::to_string(expected_id) + " (" + desc->full_name() +
560  "). Ensure dccl::Codec::decode is called with the correct Protobuf "
561  "message or use the dynamic overloads of decode."));
562  }
563 
564  dlog.is(logger::DEBUG1, logger::DECODE) &&
565  dlog << "Began decoding message of id: " << received_id << std::endl;
566 
567  dlog.is(logger::DEBUG1, logger::DECODE) && dlog << "Type name: " << desc->full_name()
568  << std::endl;
569 
570  std::shared_ptr<FieldCodecBase> codec = manager_.find(desc);
571  std::shared_ptr<internal::FromProtoCppTypeBase> helper = manager_.type_helper().find(desc);
572 
573  CharIterator actual_end = end;
574  if (codec)
575  {
576  unsigned head_size_bits;
577  unsigned body_size_bits;
578  codec->base_max_size(&head_size_bits, desc, HEAD);
579  codec->base_max_size(&body_size_bits, desc, BODY);
580  unsigned id_size = 0;
581  if (!desc->options().GetExtension(dccl::msg).omit_id())
582  id_codec()->field_size(&id_size, static_cast<uint32>(received_id), nullptr);
583  head_size_bits += id_size;
584 
585  unsigned head_size_bytes = ceil_bits2bytes(head_size_bits);
586  unsigned body_size_bytes = ceil_bits2bytes(body_size_bits);
587 
588  dlog.is(logger::DEBUG2, logger::DECODE) &&
589  dlog << "Head bytes (bits): " << head_size_bytes << "(" << head_size_bits
590  << "), max body bytes (bits): " << body_size_bytes << "(" << body_size_bits
591  << ")" << std::endl;
592 
593  CharIterator head_bytes_end = begin + head_size_bytes;
594  dlog.is(logger::DEBUG3, logger::DECODE) &&
595  dlog << "Unencrypted Head (hex): " << hex_encode(begin, head_bytes_end)
596  << std::endl;
597 
598  Bitset head_bits;
599  head_bits.from_byte_stream(begin, head_bytes_end);
600  dlog.is(logger::DEBUG3, logger::DECODE) &&
601  dlog << "Unencrypted Head (bin): " << head_bits << std::endl;
602 
603  // shift off ID bits
604  head_bits >>= id_size;
605 
606  dlog.is(logger::DEBUG3, logger::DECODE) &&
607  dlog << "Unencrypted Head after ID bits removal (bin): " << head_bits << std::endl;
608 
609  internal::MessageStack msg_stack(manager_.codec_data().root_message_,
610  manager_.codec_data().message_data_);
611  msg_stack.push(msg->GetDescriptor());
612 
613  codec->base_decode(&head_bits, msg, HEAD);
614  dlog.is(logger::DEBUG2, logger::DECODE) &&
615  dlog << "after header decode, message is: " << *msg << std::endl;
616 
617  if (header_only)
618  {
619  dlog.is(logger::DEBUG2, logger::DECODE) &&
620  dlog << "as requested, skipping decrypting and decoding body." << std::endl;
621  actual_end = head_bytes_end;
622  }
623  else
624  {
625  dlog.is(logger::DEBUG3, logger::DECODE) &&
626  dlog << "Encrypted Body (hex): " << hex_encode(head_bytes_end, end)
627  << std::endl;
628 
629  Bitset body_bits;
630  if (!crypto_key_.empty() && !skip_crypto_ids_.count(received_id))
631  {
632  std::string head_bytes(begin, head_bytes_end);
633  std::string body_bytes(head_bytes_end, end);
634  decrypt(&body_bytes, head_bytes);
635  dlog.is(logger::DEBUG3, logger::DECODE) &&
636  dlog << "Unencrypted Body (hex): " << hex_encode(body_bytes) << std::endl;
637  body_bits.from_byte_stream(body_bytes.begin(), body_bytes.end());
638  }
639  else
640  {
641  dlog.is(logger::DEBUG3, logger::DECODE) &&
642  dlog << "Unencrypted Body (hex): " << hex_encode(head_bytes_end, end)
643  << std::endl;
644  body_bits.from_byte_stream(head_bytes_end, end);
645  }
646 
647  dlog.is(logger::DEBUG3, logger::DECODE) &&
648  dlog << "Unencrypted Body (bin): " << body_bits << std::endl;
649 
650  codec->base_decode(&body_bits, msg, BODY);
651  dlog.is(logger::DEBUG2, logger::DECODE) &&
652  dlog << "after header & body decode, message is: " << *msg << std::endl;
653 
654  actual_end = end - body_bits.size() / BITS_IN_BYTE;
655  }
656  }
657  else
658  {
659  throw(Exception("Failed to find (dccl.msg).codec `" +
660  desc->options().GetExtension(dccl::msg).codec() + "`"),
661  desc);
662  }
663 
664  dlog.is(logger::DEBUG1, logger::DECODE) &&
665  dlog << "Successfully decoded message of type: " << desc->full_name() << std::endl;
666  return actual_end;
667  }
668  catch (std::exception& e)
669  {
670  std::stringstream ss;
671 
672  ss << "Message " << hex_encode(begin, end) << " failed to decode. Reason: " << e.what()
673  << std::endl;
674 
675  dlog.is(logger::DEBUG1, logger::DECODE) && dlog << ss.str() << std::endl;
676  throw(Exception(ss.str()));
677  }
678 }
679 
680 #endif
A variable size container of bits (subclassed from std::deque<bool>) with an optional hierarchy....
Definition: bitset.h:43
std::string to_byte_string()
Returns the value of the Bitset to a byte string, where each character represents 8 bits of the Bitse...
Definition: bitset.h:297
void from_byte_stream(CharIterator begin, CharIterator end)
Sets the value of the Bitset to the contents of a byte string, where each character represents 8 bits...
Definition: bitset.h:341
The Dynamic CCL enCODer/DECoder. This is the main class you will use to load, encode and decode DCCL ...
Definition: codec.h:63
int32 id(const google::protobuf::Descriptor *desc) const
Provides the DCCL ID given a DCCL type.
Definition: codec.h:248
void info(std::ostream *os=nullptr, int user_id=-1) const
Writes a human readable summary (including field sizes) of the provided DCCL type to the stream provi...
Definition: codec.h:197
void set_strict(bool mode)
Set "strict" mode where a dccl::OutOfRangeException will be thrown for encode if the value(s) provide...
Definition: codec.h:176
void info_all(std::ostream *os=nullptr) const
Writes a human readable summary (including field sizes) of all the loaded (validated) DCCL types.
Definition: codec.cpp:707
void set_id_codec(const std::string &id_codec_name)
Set a different ID codec name (note that is calls unload_all() so all messages must be reloaded)
Definition: codec.cpp:732
unsigned min_size()
Provides the encoded minimum size (in bytes) of msg.
Definition: codec.h:375
void decode(std::string *bytes, ProtobufMessage *msg)
Decode a DCCL message when the type is known at compile time.
Definition: codec.h:325
Codec(std::string dccl_id_codec_name=default_id_codec_name(), const std::string &library_path="")
Instantiate a Codec, optionally with a non-default identifier field codec (loaded via a shared librar...
Definition: codec.cpp:73
int32 id(CharIterator begin, CharIterator end) const
Get the DCCL ID of an unknown encoded DCCL message (Iterator overload).
int32 id() const
Gives the DCCL id (defined by the custom message option extension "(dccl.msg).id" in the ....
Definition: codec.h:218
void unload_library(void *dl_handle)
Remove codecs and/or unload messages present in the given shared library handle.
Definition: codec.cpp:671
unsigned max_size()
Provides the encoded maximum size (in bytes) of msg.
Definition: codec.h:363
void encode(std::string *bytes, const google::protobuf::Message &msg, bool header_only=false, int user_id=-1)
Encodes a DCCL message.
Definition: codec.cpp:272
void set_console_width(unsigned num_chars)
Set the number of characters used in programmatic generation of console outputs.
Definition: codec.h:181
void unload()
Unload a given message.
Definition: codec.h:129
const std::map< int32, const google::protobuf::Descriptor * > & loaded() const
Provides a map of all loaded DCCL IDs to the equivalent Protobuf descriptor.
Definition: codec.h:263
void set_crypto_passphrase(const std::string &passphrase, const std::set< int32 > &do_not_encrypt_ids=std::set< int32 >())
Set a passphrase to be used when encoded messages to encrypt them and to decrypt messages after decod...
Definition: codec.cpp:683
virtual ~Codec()
Destructor.
Definition: codec.cpp:85
CharIterator decode(CharIterator begin, CharIterator end, ProtobufMessage *msg, bool header_only=false)
Decode a DCCL message when the type is known at compile time.
Definition: codec.h:538
Codec(const std::string &dccl_id_codec_name, const IDFieldCodec &dccl_id_codec)
Instantiate a Codec with a non-default identifier field codec (loaded directly).
Definition: codec.h:82
void load_library(void *dl_handle)
Add codecs and/or load messages present in the given shared library handle.
Definition: codec.cpp:659
unsigned size(const google::protobuf::Message &msg, int user_id=-1)
Provides the encoded size (in bytes) of msg. This is useful if you need to know the size of a message...
Definition: codec.cpp:447
std::size_t load()
All messages must be explicited loaded and validated (size checks, option extensions checks,...
Definition: codec.h:121
void decode(const std::string &bytes, ProtobufMessage *msg, bool header_only=false)
Decode a DCCL message when the type is known at compile time.
Definition: codec.h:315
Exception class for DCCL.
Definition: exception.h:47
std::shared_ptr< FieldCodecBase > find(const google::protobuf::FieldDescriptor *field, int codec_version, bool has_codec_group, const std::string &codec_group) const
Find the codec for a given field. For embedded messages, prefers (dccl.field).codec (inside field) ov...
std::enable_if< std::is_base_of< google::protobuf::Message, typename Codec::wire_type >::value &&!std::is_same< google::protobuf::Message, typename Codec::wire_type >::value, void >::type add(const std::string &name)
Add a new field codec (used for codecs operating on statically generated Protobuf messages,...
bool is(logger::Verbosity verbosity, logger::Group group=logger::GENERAL)
Indicates the verbosity of the Logger until the next std::flush or std::endl. The boolean return is u...
Definition: logger.h:192
Dynamic Compact Control Language namespace.
Definition: any.h:47
google::protobuf::int32 int32
a signed 32 bit integer
Definition: common.h:58
google::protobuf::uint32 uint32
an unsigned 32 bit integer
Definition: common.h:56
void hex_encode(CharIterator begin, CharIterator end, std::string *out, bool upper_case=false)
Encodes a (little-endian) hexadecimal string from a byte string. Index 0 of begin is written to index...
Definition: binary.h:100