testEncoding.cxx 8.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286
  1. /* Distributed under the OSI-approved BSD 3-Clause License. See accompanying
  2. file Copyright.txt or https://cmake.org/licensing#kwsys for details. */
  3. #include "kwsysPrivate.h"
  4. #if defined(_MSC_VER)
  5. #pragma warning(disable : 4786)
  6. #endif
  7. #include KWSYS_HEADER(Encoding.hxx)
  8. #include KWSYS_HEADER(Encoding.h)
  9. #include <algorithm>
  10. #include <iostream>
  11. #include <locale.h>
  12. #include <stdlib.h>
  13. #include <string.h>
  14. // Work-around CMake dependency scanning limitation. This must
  15. // duplicate the above list of headers.
  16. #if 0
  17. #include "Encoding.h.in"
  18. #include "Encoding.hxx.in"
  19. #endif
  20. static const unsigned char helloWorldStrings[][32] = {
  21. // English
  22. { 'H', 'e', 'l', 'l', 'o', ' ', 'W', 'o', 'r', 'l', 'd', 0 },
  23. // Japanese
  24. { 0xE3, 0x81, 0x93, 0xE3, 0x82, 0x93, 0xE3, 0x81, 0xAB, 0xE3, 0x81,
  25. 0xA1, 0xE3, 0x81, 0xAF, 0xE4, 0xB8, 0x96, 0xE7, 0x95, 0x8C, 0 },
  26. // Arabic
  27. { 0xD9, 0x85, 0xD8, 0xB1, 0xD8, 0xAD, 0xD8, 0xA8, 0xD8, 0xA7, 0x20, 0xD8,
  28. 0xA7, 0xD9, 0x84, 0xD8, 0xB9, 0xD8, 0xA7, 0xD9, 0x84, 0xD9, 0x85, 0 },
  29. // Yiddish
  30. { 0xD7, 0x94, 0xD7, 0xA2, 0xD7, 0x9C, 0xD7, 0x90, 0x20, 0xD7,
  31. 0x95, 0xD7, 0x95, 0xD7, 0xA2, 0xD7, 0x9C, 0xD7, 0x98, 0 },
  32. // Russian
  33. { 0xD0, 0xBF, 0xD1, 0x80, 0xD0, 0xB8, 0xD0, 0xB2, 0xD0, 0xB5,
  34. 0xD1, 0x82, 0x20, 0xD0, 0xBC, 0xD0, 0xB8, 0xD1, 0x80, 0 },
  35. // Latin
  36. { 0x4D, 0x75, 0x6E, 0x64, 0x75, 0x73, 0x20, 0x73, 0x61, 0x6C, 0x76, 0x65,
  37. 0 },
  38. // Swahili
  39. { 0x68, 0x75, 0x6A, 0x61, 0x6D, 0x62, 0x6F, 0x20, 0x44, 0x75, 0x6E, 0x69,
  40. 0x61, 0 },
  41. // Icelandic
  42. { 0x48, 0x61, 0x6C, 0x6C, 0xC3, 0xB3, 0x20, 0x68, 0x65, 0x69, 0x6D, 0x75,
  43. 0x72, 0 },
  44. { 0 }
  45. };
  46. static int testHelloWorldEncoding()
  47. {
  48. int ret = 0;
  49. for (int i = 0; helloWorldStrings[i][0] != 0; i++) {
  50. std::string str = reinterpret_cast<const char*>(helloWorldStrings[i]);
  51. std::cout << str << std::endl;
  52. std::wstring wstr = kwsys::Encoding::ToWide(str);
  53. std::string str2 = kwsys::Encoding::ToNarrow(wstr);
  54. wchar_t* c_wstr = kwsysEncoding_DupToWide(str.c_str());
  55. char* c_str2 = kwsysEncoding_DupToNarrow(c_wstr);
  56. if (!wstr.empty() && (str != str2 || strcmp(c_str2, str.c_str()))) {
  57. std::cout << "converted string was different: " << str2 << std::endl;
  58. std::cout << "converted string was different: " << c_str2 << std::endl;
  59. ret++;
  60. }
  61. free(c_wstr);
  62. free(c_str2);
  63. }
  64. return ret;
  65. }
  66. static int testRobustEncoding()
  67. {
  68. // test that the conversion functions handle invalid
  69. // unicode correctly/gracefully
  70. // we manipulate the format flags of stdout, remember
  71. // the original state here to restore before return
  72. std::ios::fmtflags const& flags = std::cout.flags();
  73. int ret = 0;
  74. char cstr[] = { (char)-1, 0 };
  75. // this conversion could fail
  76. std::wstring wstr = kwsys::Encoding::ToWide(cstr);
  77. wstr = kwsys::Encoding::ToWide(KWSYS_NULLPTR);
  78. if (wstr != L"") {
  79. const wchar_t* wcstr = wstr.c_str();
  80. std::cout << "ToWide(NULL) returned";
  81. for (size_t i = 0; i < wstr.size(); i++) {
  82. std::cout << " " << std::hex << (int)wcstr[i];
  83. }
  84. std::cout << std::endl;
  85. ret++;
  86. }
  87. wstr = kwsys::Encoding::ToWide("");
  88. if (wstr != L"") {
  89. const wchar_t* wcstr = wstr.c_str();
  90. std::cout << "ToWide(\"\") returned";
  91. for (size_t i = 0; i < wstr.size(); i++) {
  92. std::cout << " " << std::hex << (int)wcstr[i];
  93. }
  94. std::cout << std::endl;
  95. ret++;
  96. }
  97. #ifdef _WIN32
  98. // 16 bit wchar_t - we make an invalid surrogate pair
  99. wchar_t cwstr[] = { 0xD801, 0xDA00, 0 };
  100. // this conversion could fail
  101. std::string win_str = kwsys::Encoding::ToNarrow(cwstr);
  102. #endif
  103. std::string str = kwsys::Encoding::ToNarrow(KWSYS_NULLPTR);
  104. if (str != "") {
  105. std::cout << "ToNarrow(NULL) returned " << str << std::endl;
  106. ret++;
  107. }
  108. str = kwsys::Encoding::ToNarrow(L"");
  109. if (wstr != L"") {
  110. std::cout << "ToNarrow(\"\") returned " << str << std::endl;
  111. ret++;
  112. }
  113. std::cout.flags(flags);
  114. return ret;
  115. }
  116. static int testWithNulls()
  117. {
  118. int ret = 0;
  119. std::vector<std::string> strings;
  120. strings.push_back(std::string("ab") + '\0' + 'c');
  121. strings.push_back(std::string("d") + '\0' + '\0' + 'e');
  122. strings.push_back(std::string() + '\0' + 'f');
  123. strings.push_back(std::string() + '\0' + '\0' + "gh");
  124. strings.push_back(std::string("ij") + '\0');
  125. strings.push_back(std::string("k") + '\0' + '\0');
  126. strings.push_back(std::string("\0\0\0\0", 4) + "lmn" +
  127. std::string("\0\0\0\0", 4));
  128. for (std::vector<std::string>::iterator it = strings.begin();
  129. it != strings.end(); ++it) {
  130. std::wstring wstr = kwsys::Encoding::ToWide(*it);
  131. std::string str = kwsys::Encoding::ToNarrow(wstr);
  132. std::string s(*it);
  133. std::replace(s.begin(), s.end(), '\0', ' ');
  134. std::cout << "'" << s << "' (" << it->size() << ")" << std::endl;
  135. if (str != *it) {
  136. std::replace(str.begin(), str.end(), '\0', ' ');
  137. std::cout << "string with null was different: '" << str << "' ("
  138. << str.size() << ")" << std::endl;
  139. ret++;
  140. }
  141. }
  142. return ret;
  143. }
  144. static int testCommandLineArguments()
  145. {
  146. int status = 0;
  147. char const* argv[2] = { "./app.exe", (char const*)helloWorldStrings[1] };
  148. kwsys::Encoding::CommandLineArguments args(2, argv);
  149. kwsys::Encoding::CommandLineArguments arg2 =
  150. kwsys::Encoding::CommandLineArguments(args);
  151. char const* const* u8_argv = args.argv();
  152. for (int i = 0; i < args.argc(); i++) {
  153. char const* u8_arg = u8_argv[i];
  154. if (strcmp(argv[i], u8_arg) != 0) {
  155. std::cout << "argv[" << i << "] " << argv[i] << " != " << u8_arg
  156. << std::endl;
  157. status++;
  158. }
  159. }
  160. kwsys::Encoding::CommandLineArguments args3 =
  161. kwsys::Encoding::CommandLineArguments::Main(2, argv);
  162. return status;
  163. }
  164. static int testToWindowsExtendedPath()
  165. {
  166. #ifdef _WIN32
  167. int ret = 0;
  168. if (kwsys::Encoding::ToWindowsExtendedPath(
  169. "L:\\Local Mojo\\Hex Power Pack\\Iffy Voodoo") !=
  170. L"\\\\?\\L:\\Local Mojo\\Hex Power Pack\\Iffy Voodoo") {
  171. std::cout << "Problem with ToWindowsExtendedPath "
  172. << "\"L:\\Local Mojo\\Hex Power Pack\\Iffy Voodoo\""
  173. << std::endl;
  174. ++ret;
  175. }
  176. if (kwsys::Encoding::ToWindowsExtendedPath(
  177. "L:/Local Mojo/Hex Power Pack/Iffy Voodoo") !=
  178. L"\\\\?\\L:\\Local Mojo\\Hex Power Pack\\Iffy Voodoo") {
  179. std::cout << "Problem with ToWindowsExtendedPath "
  180. << "\"L:/Local Mojo/Hex Power Pack/Iffy Voodoo\"" << std::endl;
  181. ++ret;
  182. }
  183. if (kwsys::Encoding::ToWindowsExtendedPath(
  184. "\\\\Foo\\Local Mojo\\Hex Power Pack\\Iffy Voodoo") !=
  185. L"\\\\?\\UNC\\Foo\\Local Mojo\\Hex Power Pack\\Iffy Voodoo") {
  186. std::cout << "Problem with ToWindowsExtendedPath "
  187. << "\"\\\\Foo\\Local Mojo\\Hex Power Pack\\Iffy Voodoo\""
  188. << std::endl;
  189. ++ret;
  190. }
  191. if (kwsys::Encoding::ToWindowsExtendedPath(
  192. "//Foo/Local Mojo/Hex Power Pack/Iffy Voodoo") !=
  193. L"\\\\?\\UNC\\Foo\\Local Mojo\\Hex Power Pack\\Iffy Voodoo") {
  194. std::cout << "Problem with ToWindowsExtendedPath "
  195. << "\"//Foo/Local Mojo/Hex Power Pack/Iffy Voodoo\""
  196. << std::endl;
  197. ++ret;
  198. }
  199. if (kwsys::Encoding::ToWindowsExtendedPath("//") != L"//") {
  200. std::cout << "Problem with ToWindowsExtendedPath "
  201. << "\"//\"" << std::endl;
  202. ++ret;
  203. }
  204. if (kwsys::Encoding::ToWindowsExtendedPath("\\\\.\\") != L"\\\\.\\") {
  205. std::cout << "Problem with ToWindowsExtendedPath "
  206. << "\"\\\\.\\\"" << std::endl;
  207. ++ret;
  208. }
  209. if (kwsys::Encoding::ToWindowsExtendedPath("\\\\.\\X") != L"\\\\.\\X") {
  210. std::cout << "Problem with ToWindowsExtendedPath "
  211. << "\"\\\\.\\X\"" << std::endl;
  212. ++ret;
  213. }
  214. if (kwsys::Encoding::ToWindowsExtendedPath("\\\\.\\X:") != L"\\\\?\\X:") {
  215. std::cout << "Problem with ToWindowsExtendedPath "
  216. << "\"\\\\.\\X:\"" << std::endl;
  217. ++ret;
  218. }
  219. if (kwsys::Encoding::ToWindowsExtendedPath("\\\\.\\X:\\") !=
  220. L"\\\\?\\X:\\") {
  221. std::cout << "Problem with ToWindowsExtendedPath "
  222. << "\"\\\\.\\X:\\\"" << std::endl;
  223. ++ret;
  224. }
  225. if (kwsys::Encoding::ToWindowsExtendedPath("NUL") != L"\\\\.\\NUL") {
  226. std::cout << "Problem with ToWindowsExtendedPath "
  227. << "\"NUL\"" << std::endl;
  228. ++ret;
  229. }
  230. return ret;
  231. #else
  232. return 0;
  233. #endif
  234. }
  235. int testEncoding(int, char* [])
  236. {
  237. const char* loc = setlocale(LC_ALL, "");
  238. if (loc) {
  239. std::cout << "Locale: " << loc << std::endl;
  240. } else {
  241. std::cout << "Locale: None" << std::endl;
  242. }
  243. int ret = 0;
  244. ret |= testHelloWorldEncoding();
  245. ret |= testRobustEncoding();
  246. ret |= testCommandLineArguments();
  247. ret |= testWithNulls();
  248. ret |= testToWindowsExtendedPath();
  249. return ret;
  250. }