123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990 |
- /* Distributed under the OSI-approved BSD 3-Clause License. See accompanying
- file Copyright.txt or https://cmake.org/licensing for details. */
- #include "cmXMLSafe.h"
- #include "cm_utf8.h"
- #include <sstream>
- #include <stdio.h>
- #include <string.h>
- cmXMLSafe::cmXMLSafe(const char* s)
- : Data(s)
- , Size(static_cast<unsigned long>(strlen(s)))
- , DoQuotes(true)
- {
- }
- cmXMLSafe::cmXMLSafe(std::string const& s)
- : Data(s.c_str())
- , Size(static_cast<unsigned long>(s.length()))
- , DoQuotes(true)
- {
- }
- cmXMLSafe& cmXMLSafe::Quotes(bool b)
- {
- this->DoQuotes = b;
- return *this;
- }
- std::string cmXMLSafe::str()
- {
- std::ostringstream ss;
- ss << *this;
- return ss.str();
- }
- std::ostream& operator<<(std::ostream& os, cmXMLSafe const& self)
- {
- char const* first = self.Data;
- char const* last = self.Data + self.Size;
- while (first != last) {
- unsigned int ch;
- if (const char* next = cm_utf8_decode_character(first, last, &ch)) {
- // http://www.w3.org/TR/REC-xml/#NT-Char
- if ((ch >= 0x20 && ch <= 0xD7FF) || (ch >= 0xE000 && ch <= 0xFFFD) ||
- (ch >= 0x10000 && ch <= 0x10FFFF) || ch == 0x9 || ch == 0xA ||
- ch == 0xD) {
- switch (ch) {
- // Escape XML control characters.
- case '&':
- os << "&";
- break;
- case '<':
- os << "<";
- break;
- case '>':
- os << ">";
- break;
- case '"':
- os << (self.DoQuotes ? """ : "\"");
- break;
- case '\'':
- os << (self.DoQuotes ? "'" : "'");
- break;
- case '\r':
- break; // Ignore CR
- // Print the UTF-8 character.
- default:
- os.write(first, next - first);
- break;
- }
- } else {
- // Use a human-readable hex value for this invalid character.
- char buf[16];
- sprintf(buf, "%X", ch);
- os << "[NON-XML-CHAR-0x" << buf << "]";
- }
- first = next;
- } else {
- ch = static_cast<unsigned char>(*first++);
- // Use a human-readable hex value for this invalid byte.
- char buf[16];
- sprintf(buf, "%X", ch);
- os << "[NON-UTF-8-BYTE-0x" << buf << "]";
- }
- }
- return os;
- }
|