text_format.h 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473
  1. // Protocol Buffers - Google's data interchange format
  2. // Copyright 2008 Google Inc. All rights reserved.
  3. // https://developers.google.com/protocol-buffers/
  4. //
  5. // Redistribution and use in source and binary forms, with or without
  6. // modification, are permitted provided that the following conditions are
  7. // met:
  8. //
  9. // * Redistributions of source code must retain the above copyright
  10. // notice, this list of conditions and the following disclaimer.
  11. // * Redistributions in binary form must reproduce the above
  12. // copyright notice, this list of conditions and the following disclaimer
  13. // in the documentation and/or other materials provided with the
  14. // distribution.
  15. // * Neither the name of Google Inc. nor the names of its
  16. // contributors may be used to endorse or promote products derived from
  17. // this software without specific prior written permission.
  18. //
  19. // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  20. // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  21. // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  22. // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  23. // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  24. // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  25. // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  26. // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  27. // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  28. // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  29. // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  30. // Author: jschorr@google.com (Joseph Schorr)
  31. // Based on original Protocol Buffers design by
  32. // Sanjay Ghemawat, Jeff Dean, and others.
  33. //
  34. // Utilities for printing and parsing protocol messages in a human-readable,
  35. // text-based format.
  36. #ifndef GOOGLE_PROTOBUF_TEXT_FORMAT_H__
  37. #define GOOGLE_PROTOBUF_TEXT_FORMAT_H__
  38. #include <map>
  39. #include <memory>
  40. #include <string>
  41. #include <vector>
  42. #include <google/protobuf/stubs/common.h>
  43. #include <google/protobuf/descriptor.h>
  44. #include <google/protobuf/message.h>
  45. namespace google {
  46. namespace protobuf {
  47. namespace io {
  48. class ErrorCollector; // tokenizer.h
  49. }
  50. // This class implements protocol buffer text format. Printing and parsing
  51. // protocol messages in text format is useful for debugging and human editing
  52. // of messages.
  53. //
  54. // This class is really a namespace that contains only static methods.
  55. class LIBPROTOBUF_EXPORT TextFormat {
  56. public:
  57. // Outputs a textual representation of the given message to the given
  58. // output stream.
  59. static bool Print(const Message& message, io::ZeroCopyOutputStream* output);
  60. // Print the fields in an UnknownFieldSet. They are printed by tag number
  61. // only. Embedded messages are heuristically identified by attempting to
  62. // parse them.
  63. static bool PrintUnknownFields(const UnknownFieldSet& unknown_fields,
  64. io::ZeroCopyOutputStream* output);
  65. // Like Print(), but outputs directly to a string.
  66. static bool PrintToString(const Message& message, string* output);
  67. // Like PrintUnknownFields(), but outputs directly to a string.
  68. static bool PrintUnknownFieldsToString(const UnknownFieldSet& unknown_fields,
  69. string* output);
  70. // Outputs a textual representation of the value of the field supplied on
  71. // the message supplied. For non-repeated fields, an index of -1 must
  72. // be supplied. Note that this method will print the default value for a
  73. // field if it is not set.
  74. static void PrintFieldValueToString(const Message& message,
  75. const FieldDescriptor* field,
  76. int index,
  77. string* output);
  78. // The default printer that converts scalar values from fields into
  79. // their string representation.
  80. // You can derive from this FieldValuePrinter if you want to have
  81. // fields to be printed in a different way and register it at the
  82. // Printer.
  83. class LIBPROTOBUF_EXPORT FieldValuePrinter {
  84. public:
  85. FieldValuePrinter();
  86. virtual ~FieldValuePrinter();
  87. virtual string PrintBool(bool val) const;
  88. virtual string PrintInt32(int32 val) const;
  89. virtual string PrintUInt32(uint32 val) const;
  90. virtual string PrintInt64(int64 val) const;
  91. virtual string PrintUInt64(uint64 val) const;
  92. virtual string PrintFloat(float val) const;
  93. virtual string PrintDouble(double val) const;
  94. virtual string PrintString(const string& val) const;
  95. virtual string PrintBytes(const string& val) const;
  96. virtual string PrintEnum(int32 val, const string& name) const;
  97. virtual string PrintFieldName(const Message& message,
  98. const Reflection* reflection,
  99. const FieldDescriptor* field) const;
  100. virtual string PrintMessageStart(const Message& message,
  101. int field_index,
  102. int field_count,
  103. bool single_line_mode) const;
  104. virtual string PrintMessageEnd(const Message& message,
  105. int field_index,
  106. int field_count,
  107. bool single_line_mode) const;
  108. private:
  109. GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(FieldValuePrinter);
  110. };
  111. // Class for those users which require more fine-grained control over how
  112. // a protobuffer message is printed out.
  113. class LIBPROTOBUF_EXPORT Printer {
  114. public:
  115. Printer();
  116. ~Printer();
  117. // Like TextFormat::Print
  118. bool Print(const Message& message, io::ZeroCopyOutputStream* output) const;
  119. // Like TextFormat::PrintUnknownFields
  120. bool PrintUnknownFields(const UnknownFieldSet& unknown_fields,
  121. io::ZeroCopyOutputStream* output) const;
  122. // Like TextFormat::PrintToString
  123. bool PrintToString(const Message& message, string* output) const;
  124. // Like TextFormat::PrintUnknownFieldsToString
  125. bool PrintUnknownFieldsToString(const UnknownFieldSet& unknown_fields,
  126. string* output) const;
  127. // Like TextFormat::PrintFieldValueToString
  128. void PrintFieldValueToString(const Message& message,
  129. const FieldDescriptor* field,
  130. int index,
  131. string* output) const;
  132. // Adjust the initial indent level of all output. Each indent level is
  133. // equal to two spaces.
  134. void SetInitialIndentLevel(int indent_level) {
  135. initial_indent_level_ = indent_level;
  136. }
  137. // If printing in single line mode, then the entire message will be output
  138. // on a single line with no line breaks.
  139. void SetSingleLineMode(bool single_line_mode) {
  140. single_line_mode_ = single_line_mode;
  141. }
  142. bool IsInSingleLineMode() {
  143. return single_line_mode_;
  144. }
  145. // If use_field_number is true, uses field number instead of field name.
  146. void SetUseFieldNumber(bool use_field_number) {
  147. use_field_number_ = use_field_number;
  148. }
  149. // Set true to print repeated primitives in a format like:
  150. // field_name: [1, 2, 3, 4]
  151. // instead of printing each value on its own line. Short format applies
  152. // only to primitive values -- i.e. everything except strings and
  153. // sub-messages/groups.
  154. void SetUseShortRepeatedPrimitives(bool use_short_repeated_primitives) {
  155. use_short_repeated_primitives_ = use_short_repeated_primitives;
  156. }
  157. // Set true to output UTF-8 instead of ASCII. The only difference
  158. // is that bytes >= 0x80 in string fields will not be escaped,
  159. // because they are assumed to be part of UTF-8 multi-byte
  160. // sequences. This will change the default FieldValuePrinter.
  161. void SetUseUtf8StringEscaping(bool as_utf8);
  162. // Set the default FieldValuePrinter that is used for all fields that
  163. // don't have a field-specific printer registered.
  164. // Takes ownership of the printer.
  165. void SetDefaultFieldValuePrinter(const FieldValuePrinter* printer);
  166. // Sets whether we want to hide unknown fields or not.
  167. // Usually unknown fields are printed in a generic way that includes the
  168. // tag number of the field instead of field name. However, sometimes it
  169. // is useful to be able to print the message without unknown fields (e.g.
  170. // for the python protobuf version to maintain consistency between its pure
  171. // python and c++ implementations).
  172. void SetHideUnknownFields(bool hide) {
  173. hide_unknown_fields_ = hide;
  174. }
  175. // If print_message_fields_in_index_order is true, print fields of a proto
  176. // message using the order defined in source code instead of the field
  177. // number. By default, use the field number order.
  178. void SetPrintMessageFieldsInIndexOrder(
  179. bool print_message_fields_in_index_order) {
  180. print_message_fields_in_index_order_ =
  181. print_message_fields_in_index_order;
  182. }
  183. // Register a custom field-specific FieldValuePrinter for fields
  184. // with a particular FieldDescriptor.
  185. // Returns "true" if the registration succeeded, or "false", if there is
  186. // already a printer for that FieldDescriptor.
  187. // Takes ownership of the printer on successful registration.
  188. bool RegisterFieldValuePrinter(const FieldDescriptor* field,
  189. const FieldValuePrinter* printer);
  190. private:
  191. // Forward declaration of an internal class used to print the text
  192. // output to the OutputStream (see text_format.cc for implementation).
  193. class TextGenerator;
  194. // Internal Print method, used for writing to the OutputStream via
  195. // the TextGenerator class.
  196. void Print(const Message& message,
  197. TextGenerator& generator) const;
  198. // Print a single field.
  199. void PrintField(const Message& message,
  200. const Reflection* reflection,
  201. const FieldDescriptor* field,
  202. TextGenerator& generator) const;
  203. // Print a repeated primitive field in short form.
  204. void PrintShortRepeatedField(const Message& message,
  205. const Reflection* reflection,
  206. const FieldDescriptor* field,
  207. TextGenerator& generator) const;
  208. // Print the name of a field -- i.e. everything that comes before the
  209. // ':' for a single name/value pair.
  210. void PrintFieldName(const Message& message,
  211. const Reflection* reflection,
  212. const FieldDescriptor* field,
  213. TextGenerator& generator) const;
  214. // Outputs a textual representation of the value of the field supplied on
  215. // the message supplied or the default value if not set.
  216. void PrintFieldValue(const Message& message,
  217. const Reflection* reflection,
  218. const FieldDescriptor* field,
  219. int index,
  220. TextGenerator& generator) const;
  221. // Print the fields in an UnknownFieldSet. They are printed by tag number
  222. // only. Embedded messages are heuristically identified by attempting to
  223. // parse them.
  224. void PrintUnknownFields(const UnknownFieldSet& unknown_fields,
  225. TextGenerator& generator) const;
  226. int initial_indent_level_;
  227. bool single_line_mode_;
  228. bool use_field_number_;
  229. bool use_short_repeated_primitives_;
  230. bool hide_unknown_fields_;
  231. bool print_message_fields_in_index_order_;
  232. scoped_ptr<const FieldValuePrinter> default_field_value_printer_;
  233. typedef map<const FieldDescriptor*,
  234. const FieldValuePrinter*> CustomPrinterMap;
  235. CustomPrinterMap custom_printers_;
  236. };
  237. // Parses a text-format protocol message from the given input stream to
  238. // the given message object. This function parses the format written
  239. // by Print().
  240. static bool Parse(io::ZeroCopyInputStream* input, Message* output);
  241. // Like Parse(), but reads directly from a string.
  242. static bool ParseFromString(const string& input, Message* output);
  243. // Like Parse(), but the data is merged into the given message, as if
  244. // using Message::MergeFrom().
  245. static bool Merge(io::ZeroCopyInputStream* input, Message* output);
  246. // Like Merge(), but reads directly from a string.
  247. static bool MergeFromString(const string& input, Message* output);
  248. // Parse the given text as a single field value and store it into the
  249. // given field of the given message. If the field is a repeated field,
  250. // the new value will be added to the end
  251. static bool ParseFieldValueFromString(const string& input,
  252. const FieldDescriptor* field,
  253. Message* message);
  254. // Interface that TextFormat::Parser can use to find extensions.
  255. // This class may be extended in the future to find more information
  256. // like fields, etc.
  257. class LIBPROTOBUF_EXPORT Finder {
  258. public:
  259. virtual ~Finder();
  260. // Try to find an extension of *message by fully-qualified field
  261. // name. Returns NULL if no extension is known for this name or number.
  262. virtual const FieldDescriptor* FindExtension(
  263. Message* message,
  264. const string& name) const = 0;
  265. };
  266. // A location in the parsed text.
  267. struct ParseLocation {
  268. int line;
  269. int column;
  270. ParseLocation() : line(-1), column(-1) {}
  271. ParseLocation(int line_param, int column_param)
  272. : line(line_param), column(column_param) {}
  273. };
  274. // Data structure which is populated with the locations of each field
  275. // value parsed from the text.
  276. class LIBPROTOBUF_EXPORT ParseInfoTree {
  277. public:
  278. ParseInfoTree();
  279. ~ParseInfoTree();
  280. // Returns the parse location for index-th value of the field in the parsed
  281. // text. If none exists, returns a location with line = -1. Index should be
  282. // -1 for not-repeated fields.
  283. ParseLocation GetLocation(const FieldDescriptor* field, int index) const;
  284. // Returns the parse info tree for the given field, which must be a message
  285. // type. The nested information tree is owned by the root tree and will be
  286. // deleted when it is deleted.
  287. ParseInfoTree* GetTreeForNested(const FieldDescriptor* field,
  288. int index) const;
  289. private:
  290. // Allow the text format parser to record information into the tree.
  291. friend class TextFormat;
  292. // Records the starting location of a single value for a field.
  293. void RecordLocation(const FieldDescriptor* field, ParseLocation location);
  294. // Create and records a nested tree for a nested message field.
  295. ParseInfoTree* CreateNested(const FieldDescriptor* field);
  296. // Defines the map from the index-th field descriptor to its parse location.
  297. typedef map<const FieldDescriptor*, vector<ParseLocation> > LocationMap;
  298. // Defines the map from the index-th field descriptor to the nested parse
  299. // info tree.
  300. typedef map<const FieldDescriptor*, vector<ParseInfoTree*> > NestedMap;
  301. LocationMap locations_;
  302. NestedMap nested_;
  303. GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(ParseInfoTree);
  304. };
  305. // For more control over parsing, use this class.
  306. class LIBPROTOBUF_EXPORT Parser {
  307. public:
  308. Parser();
  309. ~Parser();
  310. // Like TextFormat::Parse().
  311. bool Parse(io::ZeroCopyInputStream* input, Message* output);
  312. // Like TextFormat::ParseFromString().
  313. bool ParseFromString(const string& input, Message* output);
  314. // Like TextFormat::Merge().
  315. bool Merge(io::ZeroCopyInputStream* input, Message* output);
  316. // Like TextFormat::MergeFromString().
  317. bool MergeFromString(const string& input, Message* output);
  318. // Set where to report parse errors. If NULL (the default), errors will
  319. // be printed to stderr.
  320. void RecordErrorsTo(io::ErrorCollector* error_collector) {
  321. error_collector_ = error_collector;
  322. }
  323. // Set how parser finds extensions. If NULL (the default), the
  324. // parser will use the standard Reflection object associated with
  325. // the message being parsed.
  326. void SetFinder(Finder* finder) {
  327. finder_ = finder;
  328. }
  329. // Sets where location information about the parse will be written. If NULL
  330. // (the default), then no location will be written.
  331. void WriteLocationsTo(ParseInfoTree* tree) {
  332. parse_info_tree_ = tree;
  333. }
  334. // Normally parsing fails if, after parsing, output->IsInitialized()
  335. // returns false. Call AllowPartialMessage(true) to skip this check.
  336. void AllowPartialMessage(bool allow) {
  337. allow_partial_ = allow;
  338. }
  339. // Allow field names to be matched case-insensitively.
  340. // This is not advisable if there are fields that only differ in case, or
  341. // if you want to enforce writing in the canonical form.
  342. // This is 'false' by default.
  343. void AllowCaseInsensitiveField(bool allow) {
  344. allow_case_insensitive_field_ = allow;
  345. }
  346. // Like TextFormat::ParseFieldValueFromString
  347. bool ParseFieldValueFromString(const string& input,
  348. const FieldDescriptor* field,
  349. Message* output);
  350. void AllowFieldNumber(bool allow) {
  351. allow_field_number_ = allow;
  352. }
  353. private:
  354. // Forward declaration of an internal class used to parse text
  355. // representations (see text_format.cc for implementation).
  356. class ParserImpl;
  357. // Like TextFormat::Merge(). The provided implementation is used
  358. // to do the parsing.
  359. bool MergeUsingImpl(io::ZeroCopyInputStream* input,
  360. Message* output,
  361. ParserImpl* parser_impl);
  362. io::ErrorCollector* error_collector_;
  363. Finder* finder_;
  364. ParseInfoTree* parse_info_tree_;
  365. bool allow_partial_;
  366. bool allow_case_insensitive_field_;
  367. bool allow_unknown_field_;
  368. bool allow_unknown_enum_;
  369. bool allow_field_number_;
  370. bool allow_relaxed_whitespace_;
  371. bool allow_singular_overwrites_;
  372. };
  373. private:
  374. // Hack: ParseInfoTree declares TextFormat as a friend which should extend
  375. // the friendship to TextFormat::Parser::ParserImpl, but unfortunately some
  376. // old compilers (e.g. GCC 3.4.6) don't implement this correctly. We provide
  377. // helpers for ParserImpl to call methods of ParseInfoTree.
  378. static inline void RecordLocation(ParseInfoTree* info_tree,
  379. const FieldDescriptor* field,
  380. ParseLocation location);
  381. static inline ParseInfoTree* CreateNested(ParseInfoTree* info_tree,
  382. const FieldDescriptor* field);
  383. GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(TextFormat);
  384. };
  385. inline void TextFormat::RecordLocation(ParseInfoTree* info_tree,
  386. const FieldDescriptor* field,
  387. ParseLocation location) {
  388. info_tree->RecordLocation(field, location);
  389. }
  390. inline TextFormat::ParseInfoTree* TextFormat::CreateNested(
  391. ParseInfoTree* info_tree, const FieldDescriptor* field) {
  392. return info_tree->CreateNested(field);
  393. }
  394. } // namespace protobuf
  395. } // namespace google
  396. #endif // GOOGLE_PROTOBUF_TEXT_FORMAT_H__