DCCL v4
codec.h
1 // Copyright 2009-2023:
2 // GobySoft, LLC (2013-)
3 // Massachusetts Institute of Technology (2007-2014)
4 // Community contributors (see AUTHORS file)
5 // File authors:
6 // Toby Schneider <toby@gobysoft.org>
7 // Nathan Knotts <nknotts@gmail.com>
8 // philboske <philboske@gmail.com>
9 // Chris Murphy <cmurphy@aphysci.com>
10 //
11 //
12 // This file is part of the Dynamic Compact Control Language Library
13 // ("DCCL").
14 //
15 // DCCL is free software: you can redistribute it and/or modify
16 // it under the terms of the GNU Lesser General Public License as published by
17 // the Free Software Foundation, either version 2.1 of the License, or
18 // (at your option) any later version.
19 //
20 // DCCL is distributed in the hope that it will be useful,
21 // but WITHOUT ANY WARRANTY; without even the implied warranty of
22 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23 // GNU Lesser General Public License for more details.
24 //
25 // You should have received a copy of the GNU Lesser General Public License
26 // along with DCCL. If not, see <http://www.gnu.org/licenses/>.
27 #ifndef DCCL20091211H
28 #define DCCL20091211H
29 
30 #include <map>
31 #include <ostream>
32 #include <set>
33 #include <stdexcept>
34 #include <string>
35 #include <type_traits>
36 #include <vector>
37 
38 #include <google/protobuf/descriptor.h>
39 
40 #include <memory>
41 
42 #include "binary.h"
43 #include "dynamic_protobuf_manager.h"
44 #include "exception.h"
45 #include "field_codec.h"
46 #include "field_codec_fixed.h"
47 #include "logger.h"
48 
49 #include "codecs2/field_codec_default_message.h"
50 #include "codecs3/field_codec_default_message.h"
51 #include "dccl/def.h"
52 #include "dccl/version.h"
53 #include "field_codec_manager.h"
54 
56 namespace dccl
57 {
58 class FieldCodec;
59 
62 class Codec
63 {
64  public:
70  Codec(std::string dccl_id_codec_name = default_id_codec_name(),
71  const std::string& library_path = "");
72 
79  template <class IDFieldCodec,
80  typename std::enable_if<std::is_base_of<FieldCodecBase, IDFieldCodec>::value,
81  int>::type = 0>
82  Codec(const std::string& dccl_id_codec_name, const IDFieldCodec& dccl_id_codec) // NOLINT
83  : id_codec_(dccl_id_codec_name)
84  {
85  set_default_codecs();
86  manager_.add<IDFieldCodec>(dccl_id_codec_name);
87  }
88 
90  virtual ~Codec();
91 
92  Codec(const Codec&) = delete;
93  Codec& operator=(const Codec&) = delete;
94 
100  void load_library(void* dl_handle);
101 
108  void unload_library(void* dl_handle);
109 
114  void load_library(const std::string& library_path);
115 
121  template <typename ProtobufMessage> std::size_t load()
122  {
123  return load(ProtobufMessage::descriptor());
124  }
125 
129  template <typename ProtobufMessage> void unload() { unload(ProtobufMessage::descriptor()); }
130 
131  void unload_all() { id2desc_.clear(); }
132 
140  std::size_t load(const google::protobuf::Descriptor* desc, int user_id = -1);
141 
146  void unload(const google::protobuf::Descriptor* desc);
147 
152  void unload(size_t dccl_id);
153 
155  void set_id_codec(const std::string& id_codec_name);
156  std::string get_id_codec() { return id_codec_; }
157 
163  void set_crypto_passphrase(const std::string& passphrase,
164  const std::set<int32>& do_not_encrypt_ids = std::set<int32>());
165 
166  void set_crypto_passphrase(const std::string& passphrase,
167  const std::set<unsigned>& do_not_encrypt_ids)
168  {
169  std::set<int32> s_ids{do_not_encrypt_ids.begin(), do_not_encrypt_ids.end()};
170  set_crypto_passphrase(passphrase, s_ids);
171  }
172 
176  void set_strict(bool mode) { strict_ = mode; }
177 
181  void set_console_width(unsigned num_chars) { console_width_ = num_chars; }
182 
184 
188 
189 
196  template <typename ProtobufMessage>
197  void info(std::ostream* os = nullptr, int user_id = -1) const
198  {
199  info(ProtobufMessage::descriptor(), os, user_id);
200  }
201 
207  void info(const google::protobuf::Descriptor* desc, std::ostream* os = nullptr,
208  int user_id = -1) const;
209 
213  void info_all(std::ostream* os = nullptr) const;
214 
218  template <typename ProtobufMessage> int32 id() const
219  {
220  return id(ProtobufMessage::descriptor());
221  }
222 
242  int32 id(const std::string& bytes) const;
243 
245  template <typename CharIterator> int32 id(CharIterator begin, CharIterator end) const;
246 
248  int32 id(const google::protobuf::Descriptor* desc) const
249  {
250  if (desc->options().GetExtension(dccl::msg).omit_id())
251  throw(Exception("Cannot call id(...) on message with omit_id == true"));
252  Bitset id_bits;
253  dccl::uint32 hardcoded_id = desc->options().GetExtension(dccl::msg).id();
254  // pass the hard coded id, that is, (dccl.msg).id,
255  // through encode/decode to allow a custom ID codec (if in use)
256  // to always take effect.
257  id_codec()->field_encode(&id_bits, hardcoded_id, nullptr);
258  std::string id_bytes(id_bits.to_byte_string());
259  return id(id_bytes);
260  }
261 
263  const std::map<int32, const google::protobuf::Descriptor*>& loaded() const { return id2desc_; }
264 
266 
270 
271 
280  void encode(std::string* bytes, const google::protobuf::Message& msg, bool header_only = false,
281  int user_id = -1);
282 
293  size_t encode(char* bytes, size_t max_len, const google::protobuf::Message& msg,
294  bool header_only = false, int user_id = -1);
295 
304  template <typename CharIterator, typename ProtobufMessage>
305  CharIterator decode(CharIterator begin, CharIterator end, ProtobufMessage* msg,
306  bool header_only = false);
307 
314  template <typename ProtobufMessage>
315  void decode(const std::string& bytes, ProtobufMessage* msg, bool header_only = false)
316  {
317  decode(bytes.begin(), bytes.end(), msg, header_only);
318  }
319 
325  template <typename ProtobufMessage> void decode(std::string* bytes, ProtobufMessage* msg)
326  {
327  decode(*bytes, msg);
328  unsigned last_size = size(*msg);
329  bytes->erase(0, last_size);
330  }
331 
339  template <typename GoogleProtobufMessagePointer>
340  GoogleProtobufMessagePointer decode(const std::string& bytes, bool header_only = false);
341 
348  template <typename GoogleProtobufMessagePointer>
349  GoogleProtobufMessagePointer decode(std::string* bytes);
350 
357  unsigned size(const google::protobuf::Message& msg, int user_id = -1);
358 
363  template <typename ProtobufMessage> unsigned max_size()
364  {
365  return max_size(ProtobufMessage::descriptor());
366  }
367 
369  unsigned max_size(const google::protobuf::Descriptor* desc) const;
370 
375  template <typename ProtobufMessage> unsigned min_size()
376  {
377  return min_size(ProtobufMessage::descriptor());
378  }
379 
381  unsigned min_size(const google::protobuf::Descriptor* desc) const;
382 
384 
385  static std::string default_id_codec_name() { return "dccl.default.id"; }
386 
387  static std::string default_codec_name(int version = 2)
388  {
389  return "dccl.default" + std::to_string(version);
390  }
391 
392  FieldCodecManagerLocal& manager() { return manager_; }
393 
394  private:
395  void encode_internal(const google::protobuf::Message& msg, bool header_only,
396  Bitset& header_bits, Bitset& body_bits, int user_id);
397  std::string get_all_error_fields_in_message(const google::protobuf::Message& msg,
398  uint8_t depth = 1);
399 
400  void encrypt(std::string* s, const std::string& nonce);
401  void decrypt(std::string* s, const std::string& nonce);
402 
403  void set_default_codecs();
404 
405  std::shared_ptr<FieldCodecBase> id_codec() const
406  {
407  return manager_.find(google::protobuf::FieldDescriptor::TYPE_UINT32, DCCL_VERSION_MAJOR,
408  id_codec_);
409  }
410 
411  int32 id_internal(const google::protobuf::Descriptor* desc, int user_id)
412  {
413  // if we have omit_id, check for or assign an autogenerate negative internal placeholder ID
414  if (desc->options().GetExtension(dccl::msg).omit_id() && !desc2placeholder_id_.count(desc))
415  desc2placeholder_id_.insert(std::make_pair(desc, omit_id_placeholder_id_--));
416 
417  return id_internal_const(desc, user_id);
418  }
419 
420  int32 id_internal_const(const google::protobuf::Descriptor* desc, int user_id) const
421  {
422  if (desc->options().GetExtension(dccl::msg).omit_id())
423  {
424  if (desc2placeholder_id_.count(desc))
425  return desc2placeholder_id_.find(desc)->second;
426  else
427  throw(Exception("Message " + desc->full_name() +
428  " has omit_id == true but has not been loaded, so id_internal() "
429  "const cannot be called"));
430  }
431  else
432  {
433  return (user_id < 0) ? id(desc) : user_id;
434  }
435  }
436 
437  private:
438  // SHA256 hash of the crypto passphrase
439  std::string crypto_key_;
440 
441  // strict mode setting
442  bool strict_{false};
443 
444  // console outputting format width
445  unsigned console_width_{60};
446 
447  // set of DCCL IDs *not* to encrypt
448  std::set<int32> skip_crypto_ids_;
449 
450  // maps `dccl.id`s onto Message Descriptors
451  std::map<int32, const google::protobuf::Descriptor*> id2desc_;
452  std::string id_codec_;
453 
454  std::vector<void*> dl_handles_;
455 
456  std::string build_guard_for_console_output(std::string& base, char guard_char) const;
457 
458  FieldCodecManagerLocal manager_;
459 
460  // current omit_id placeholder DCCL Id (starts at -1 and decrements)
461  int32 omit_id_placeholder_id_{-1};
462  // maps message descriptor onto placeholder ID for omit_id messages
463  std::map<const google::protobuf::Descriptor*, int32> desc2placeholder_id_;
464 };
465 
466 inline std::ostream& operator<<(std::ostream& os, const Codec& codec)
467 {
468  codec.info_all(&os);
469  return os;
470 }
471 } // namespace dccl
472 
473 template <typename GoogleProtobufMessagePointer>
474 GoogleProtobufMessagePointer dccl::Codec::decode(const std::string& bytes,
475  bool header_only /* = false */)
476 {
477  int32 this_id = id(bytes);
478 
479  if (!id2desc_.count(this_id))
480  throw(Exception("Message id " + std::to_string(this_id) +
481  " has not been loaded. Call load() before decoding this type."));
482 
483  // ownership of this object goes to the caller of decode()
484  auto msg = dccl::DynamicProtobufManager::new_protobuf_message<GoogleProtobufMessagePointer>(
485  id2desc_.find(this_id)->second);
486  decode(bytes, &(*msg), header_only);
487  return msg;
488 }
489 
490 template <typename GoogleProtobufMessagePointer>
491 GoogleProtobufMessagePointer dccl::Codec::decode(std::string* bytes)
492 {
493  int32 this_id = id(*bytes);
494 
495  if (!id2desc_.count(this_id))
496  throw(Exception("Message id " + std::to_string(this_id) +
497  " has not been loaded. Call load() before decoding this type."));
498 
499  GoogleProtobufMessagePointer msg =
500  dccl::DynamicProtobufManager::new_protobuf_message<GoogleProtobufMessagePointer>(
501  id2desc_.find(this_id)->second);
502  std::string::iterator new_begin = decode(bytes->begin(), bytes->end(), &(*msg));
503  bytes->erase(bytes->begin(), new_begin);
504  return msg;
505 }
506 
507 template <typename CharIterator>
508 dccl::int32 dccl::Codec::id(CharIterator begin, CharIterator end) const
509 {
510  unsigned id_min_size = 0, id_max_size = 0;
511  id_codec()->field_min_size(&id_min_size, nullptr);
512  id_codec()->field_max_size(&id_max_size, nullptr);
513 
514  if (std::distance(begin, end) < (id_min_size / BITS_IN_BYTE))
515  throw(Exception("Bytes passed (hex: " + hex_encode(begin, end) +
516  ") is too small to be a valid DCCL message"));
517 
518  Bitset fixed_header_bits;
519  fixed_header_bits.from_byte_stream(
520  begin, begin + (size_t)std::ceil(double(id_max_size) / BITS_IN_BYTE));
521 
522  Bitset these_bits(&fixed_header_bits);
523  these_bits.get_more_bits(id_min_size);
524 
525  dccl::any return_value;
526  id_codec()->field_decode(&these_bits, &return_value, nullptr);
527 
528  return dccl::any_cast<uint32>(return_value);
529 }
530 
531 template <typename CharIterator, typename ProtobufMessage>
532 CharIterator dccl::Codec::decode(CharIterator begin, CharIterator end, ProtobufMessage* msg,
533  bool header_only /*= false*/)
534 {
535  try
536  {
537  const google::protobuf::Descriptor* desc = msg->GetDescriptor();
538  int32 expected_id = id_internal(desc, -1);
539  int32 received_id =
540  expected_id; // if omit_id, we have to assume we have the correct type. Otherwise, overwrite if not omit_id and check
541  if (!desc->options().GetExtension(dccl::msg).omit_id())
542  {
543  received_id = id(begin, end);
544 
545  if (!id2desc_.count(received_id))
546  throw(Exception("Message id " + std::to_string(received_id) +
547  " has not been loaded. Call load() before decoding this type."));
548 
549  if (expected_id != received_id)
550  throw(Exception("Received message with id " + std::to_string(received_id) + " (" +
551  id2desc_.at(received_id)->full_name() +
552  ") but decode was called with message of id " +
553  std::to_string(expected_id) + " (" + desc->full_name() +
554  "). Ensure dccl::Codec::decode is called with the correct Protobuf "
555  "message or use the dynamic overloads of decode."));
556  }
557 
558  dlog.is(logger::DEBUG1, logger::DECODE) &&
559  dlog << "Began decoding message of id: " << received_id << std::endl;
560 
561  dlog.is(logger::DEBUG1, logger::DECODE) && dlog << "Type name: " << desc->full_name()
562  << std::endl;
563 
564  std::shared_ptr<FieldCodecBase> codec = manager_.find(desc);
565  std::shared_ptr<internal::FromProtoCppTypeBase> helper = manager_.type_helper().find(desc);
566 
567  CharIterator actual_end = end;
568  if (codec)
569  {
570  unsigned head_size_bits;
571  unsigned body_size_bits;
572  codec->base_max_size(&head_size_bits, desc, HEAD);
573  codec->base_max_size(&body_size_bits, desc, BODY);
574  unsigned id_size = 0;
575  if (!desc->options().GetExtension(dccl::msg).omit_id())
576  id_codec()->field_size(&id_size, static_cast<uint32>(received_id), nullptr);
577  head_size_bits += id_size;
578 
579  unsigned head_size_bytes = ceil_bits2bytes(head_size_bits);
580  unsigned body_size_bytes = ceil_bits2bytes(body_size_bits);
581 
582  dlog.is(logger::DEBUG2, logger::DECODE) &&
583  dlog << "Head bytes (bits): " << head_size_bytes << "(" << head_size_bits
584  << "), max body bytes (bits): " << body_size_bytes << "(" << body_size_bits
585  << ")" << std::endl;
586 
587  CharIterator head_bytes_end = begin + head_size_bytes;
588  dlog.is(logger::DEBUG3, logger::DECODE) &&
589  dlog << "Unencrypted Head (hex): " << hex_encode(begin, head_bytes_end)
590  << std::endl;
591 
592  Bitset head_bits;
593  head_bits.from_byte_stream(begin, head_bytes_end);
594  dlog.is(logger::DEBUG3, logger::DECODE) &&
595  dlog << "Unencrypted Head (bin): " << head_bits << std::endl;
596 
597  // shift off ID bits
598  head_bits >>= id_size;
599 
600  dlog.is(logger::DEBUG3, logger::DECODE) &&
601  dlog << "Unencrypted Head after ID bits removal (bin): " << head_bits << std::endl;
602 
603  internal::MessageStack msg_stack(manager_.codec_data().root_message_,
604  manager_.codec_data().message_data_);
605  msg_stack.push(msg->GetDescriptor());
606 
607  codec->base_decode(&head_bits, msg, HEAD);
608  dlog.is(logger::DEBUG2, logger::DECODE) &&
609  dlog << "after header decode, message is: " << *msg << std::endl;
610 
611  if (header_only)
612  {
613  dlog.is(logger::DEBUG2, logger::DECODE) &&
614  dlog << "as requested, skipping decrypting and decoding body." << std::endl;
615  actual_end = head_bytes_end;
616  }
617  else
618  {
619  dlog.is(logger::DEBUG3, logger::DECODE) &&
620  dlog << "Encrypted Body (hex): " << hex_encode(head_bytes_end, end)
621  << std::endl;
622 
623  Bitset body_bits;
624  if (!crypto_key_.empty() && !skip_crypto_ids_.count(received_id))
625  {
626  std::string head_bytes(begin, head_bytes_end);
627  std::string body_bytes(head_bytes_end, end);
628  decrypt(&body_bytes, head_bytes);
629  dlog.is(logger::DEBUG3, logger::DECODE) &&
630  dlog << "Unencrypted Body (hex): " << hex_encode(body_bytes) << std::endl;
631  body_bits.from_byte_stream(body_bytes.begin(), body_bytes.end());
632  }
633  else
634  {
635  dlog.is(logger::DEBUG3, logger::DECODE) &&
636  dlog << "Unencrypted Body (hex): " << hex_encode(head_bytes_end, end)
637  << std::endl;
638  body_bits.from_byte_stream(head_bytes_end, end);
639  }
640 
641  dlog.is(logger::DEBUG3, logger::DECODE) &&
642  dlog << "Unencrypted Body (bin): " << body_bits << std::endl;
643 
644  codec->base_decode(&body_bits, msg, BODY);
645  dlog.is(logger::DEBUG2, logger::DECODE) &&
646  dlog << "after header & body decode, message is: " << *msg << std::endl;
647 
648  actual_end = end - body_bits.size() / BITS_IN_BYTE;
649  }
650  }
651  else
652  {
653  throw(Exception("Failed to find (dccl.msg).codec `" +
654  desc->options().GetExtension(dccl::msg).codec() + "`"),
655  desc);
656  }
657 
658  dlog.is(logger::DEBUG1, logger::DECODE) &&
659  dlog << "Successfully decoded message of type: " << desc->full_name() << std::endl;
660  return actual_end;
661  }
662  catch (std::exception& e)
663  {
664  std::stringstream ss;
665 
666  ss << "Message " << hex_encode(begin, end) << " failed to decode. Reason: " << e.what()
667  << std::endl;
668 
669  dlog.is(logger::DEBUG1, logger::DECODE) && dlog << ss.str() << std::endl;
670  throw(Exception(ss.str()));
671  }
672 }
673 
674 #endif
A variable size container of bits (subclassed from std::deque<bool>) with an optional hierarchy....
Definition: bitset.h:42
std::string to_byte_string()
Returns the value of the Bitset to a byte string, where each character represents 8 bits of the Bitse...
Definition: bitset.h:296
void from_byte_stream(CharIterator begin, CharIterator end)
Sets the value of the Bitset to the contents of a byte string, where each character represents 8 bits...
Definition: bitset.h:340
The Dynamic CCL enCODer/DECoder. This is the main class you will use to load, encode and decode DCCL ...
Definition: codec.h:63
int32 id(const google::protobuf::Descriptor *desc) const
Provides the DCCL ID given a DCCL type.
Definition: codec.h:248
void info(std::ostream *os=nullptr, int user_id=-1) const
Writes a human readable summary (including field sizes) of the provided DCCL type to the stream provi...
Definition: codec.h:197
void set_strict(bool mode)
Set "strict" mode where a dccl::OutOfRangeException will be thrown for encode if the value(s) provide...
Definition: codec.h:176
void info_all(std::ostream *os=nullptr) const
Writes a human readable summary (including field sizes) of all the loaded (validated) DCCL types.
Definition: codec.cpp:709
void set_id_codec(const std::string &id_codec_name)
Set a different ID codec name (note that is calls unload_all() so all messages must be reloaded)
Definition: codec.cpp:734
unsigned min_size()
Provides the encoded minimum size (in bytes) of msg.
Definition: codec.h:375
void decode(std::string *bytes, ProtobufMessage *msg)
Decode a DCCL message when the type is known at compile time.
Definition: codec.h:325
Codec(std::string dccl_id_codec_name=default_id_codec_name(), const std::string &library_path="")
Instantiate a Codec, optionally with a non-default identifier field codec (loaded via a shared librar...
Definition: codec.cpp:75
int32 id(CharIterator begin, CharIterator end) const
Get the DCCL ID of an unknown encoded DCCL message (Iterator overload).
int32 id() const
Gives the DCCL id (defined by the custom message option extension "(dccl.msg).id" in the ....
Definition: codec.h:218
void unload_library(void *dl_handle)
Remove codecs and/or unload messages present in the given shared library handle.
Definition: codec.cpp:673
unsigned max_size()
Provides the encoded maximum size (in bytes) of msg.
Definition: codec.h:363
void encode(std::string *bytes, const google::protobuf::Message &msg, bool header_only=false, int user_id=-1)
Encodes a DCCL message.
Definition: codec.cpp:274
void set_console_width(unsigned num_chars)
Set the number of characters used in programmatic generation of console outputs.
Definition: codec.h:181
void unload()
Unload a given message.
Definition: codec.h:129
const std::map< int32, const google::protobuf::Descriptor * > & loaded() const
Provides a map of all loaded DCCL IDs to the equivalent Protobuf descriptor.
Definition: codec.h:263
void set_crypto_passphrase(const std::string &passphrase, const std::set< int32 > &do_not_encrypt_ids=std::set< int32 >())
Set a passphrase to be used when encoded messages to encrypt them and to decrypt messages after decod...
Definition: codec.cpp:685
virtual ~Codec()
Destructor.
Definition: codec.cpp:87
CharIterator decode(CharIterator begin, CharIterator end, ProtobufMessage *msg, bool header_only=false)
Decode a DCCL message when the type is known at compile time.
Definition: codec.h:532
Codec(const std::string &dccl_id_codec_name, const IDFieldCodec &dccl_id_codec)
Instantiate a Codec with a non-default identifier field codec (loaded directly).
Definition: codec.h:82
void load_library(void *dl_handle)
Add codecs and/or load messages present in the given shared library handle.
Definition: codec.cpp:661
unsigned size(const google::protobuf::Message &msg, int user_id=-1)
Provides the encoded size (in bytes) of msg. This is useful if you need to know the size of a message...
Definition: codec.cpp:449
std::size_t load()
All messages must be explicited loaded and validated (size checks, option extensions checks,...
Definition: codec.h:121
void decode(const std::string &bytes, ProtobufMessage *msg, bool header_only=false)
Decode a DCCL message when the type is known at compile time.
Definition: codec.h:315
Exception class for DCCL.
Definition: exception.h:48
std::shared_ptr< FieldCodecBase > find(const google::protobuf::FieldDescriptor *field, int codec_version, bool has_codec_group, const std::string &codec_group) const
Find the codec for a given field. For embedded messages, prefers (dccl.field).codec (inside field) ov...
std::enable_if< std::is_base_of< google::protobuf::Message, typename Codec::wire_type >::value &&!std::is_same< google::protobuf::Message, typename Codec::wire_type >::value, void >::type add(const std::string &name)
Add a new field codec (used for codecs operating on statically generated Protobuf messages,...
bool is(logger::Verbosity verbosity, logger::Group group=logger::GENERAL)
Indicates the verbosity of the Logger until the next std::flush or std::endl. The boolean return is u...
Definition: logger.h:192
Dynamic Compact Control Language namespace.
Definition: any.h:50
google::protobuf::int32 int32
a signed 32 bit integer
Definition: common.h:58
google::protobuf::uint32 uint32
an unsigned 32 bit integer
Definition: common.h:56
void hex_encode(CharIterator begin, CharIterator end, std::string *out, bool upper_case=false)
Encodes a (little-endian) hexadecimal string from a byte string. Index 0 of begin is written to index...
Definition: binary.h:100