cmXMLSafe.cxx 2.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990
  1. /* Distributed under the OSI-approved BSD 3-Clause License. See accompanying
  2. file Copyright.txt or https://cmake.org/licensing for details. */
  3. #include "cmXMLSafe.h"
  4. #include "cm_utf8.h"
  5. #include <sstream>
  6. #include <stdio.h>
  7. #include <string.h>
  8. cmXMLSafe::cmXMLSafe(const char* s)
  9. : Data(s)
  10. , Size(static_cast<unsigned long>(strlen(s)))
  11. , DoQuotes(true)
  12. {
  13. }
  14. cmXMLSafe::cmXMLSafe(std::string const& s)
  15. : Data(s.c_str())
  16. , Size(static_cast<unsigned long>(s.length()))
  17. , DoQuotes(true)
  18. {
  19. }
  20. cmXMLSafe& cmXMLSafe::Quotes(bool b)
  21. {
  22. this->DoQuotes = b;
  23. return *this;
  24. }
  25. std::string cmXMLSafe::str()
  26. {
  27. std::ostringstream ss;
  28. ss << *this;
  29. return ss.str();
  30. }
  31. std::ostream& operator<<(std::ostream& os, cmXMLSafe const& self)
  32. {
  33. char const* first = self.Data;
  34. char const* last = self.Data + self.Size;
  35. while (first != last) {
  36. unsigned int ch;
  37. if (const char* next = cm_utf8_decode_character(first, last, &ch)) {
  38. // http://www.w3.org/TR/REC-xml/#NT-Char
  39. if ((ch >= 0x20 && ch <= 0xD7FF) || (ch >= 0xE000 && ch <= 0xFFFD) ||
  40. (ch >= 0x10000 && ch <= 0x10FFFF) || ch == 0x9 || ch == 0xA ||
  41. ch == 0xD) {
  42. switch (ch) {
  43. // Escape XML control characters.
  44. case '&':
  45. os << "&amp;";
  46. break;
  47. case '<':
  48. os << "&lt;";
  49. break;
  50. case '>':
  51. os << "&gt;";
  52. break;
  53. case '"':
  54. os << (self.DoQuotes ? "&quot;" : "\"");
  55. break;
  56. case '\'':
  57. os << (self.DoQuotes ? "&apos;" : "'");
  58. break;
  59. case '\r':
  60. break; // Ignore CR
  61. // Print the UTF-8 character.
  62. default:
  63. os.write(first, next - first);
  64. break;
  65. }
  66. } else {
  67. // Use a human-readable hex value for this invalid character.
  68. char buf[16];
  69. sprintf(buf, "%X", ch);
  70. os << "[NON-XML-CHAR-0x" << buf << "]";
  71. }
  72. first = next;
  73. } else {
  74. ch = static_cast<unsigned char>(*first++);
  75. // Use a human-readable hex value for this invalid byte.
  76. char buf[16];
  77. sprintf(buf, "%X", ch);
  78. os << "[NON-UTF-8-BYTE-0x" << buf << "]";
  79. }
  80. }
  81. return os;
  82. }