iso2022kr_encoding.phpt 3.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113
  1. --TEST--
  2. Test of ASCII and KS X 1001-1992 support in ISO-2022-KR encoding
  3. --EXTENSIONS--
  4. mbstring
  5. --SKIPIF--
  6. <?php
  7. if (getenv("SKIP_SLOW_TESTS")) die("skip slow test");
  8. ?>
  9. --FILE--
  10. <?php
  11. include('encoding_tests.inc');
  12. mb_substitute_character(0x25); // '%'
  13. readConversionTable(__DIR__ . '/data/KSX1001.txt', $ksxChars, $unused);
  14. function testValid($from, $to, $bothWays = true) {
  15. identifyValidString($from, 'ISO-2022-KR');
  16. convertValidString($from, $to, 'ISO-2022-KR', 'UTF-16BE', false);
  17. if ($bothWays) {
  18. /* 0xF at the beginning of an ISO-2022 string is redundant; it switches
  19. * to ASCII mode, but ASCII mode is default */
  20. if (strlen($from) > 0 && $from[0] == "\x0F")
  21. $from = substr($from, 1, strlen($from) - 1);
  22. /* If the string switches to a different charset, it should switch back to
  23. * ASCII at the end */
  24. if (strpos($from, "\x1B\$C") !== false)
  25. $from .= "\x0F";
  26. convertValidString($to, $from, 'UTF-16BE', 'ISO-2022-KR', false);
  27. }
  28. }
  29. function testInvalid($from, $to) {
  30. testInvalidString($from, $to, 'ISO-2022-KR', 'UTF-16BE');
  31. }
  32. testValid("", "");
  33. echo "Empty string OK\n";
  34. for ($i = 0; $i < 0x80; $i++) {
  35. if ($i == 0xE || $i == 0xF || $i == 0x1B)
  36. continue;
  37. testValid(chr($i), "\x00" . chr($i));
  38. testValid("\x0F" . chr($i), "\x00" . chr($i)); /* 0xF is 'Shift In' code */
  39. }
  40. for ($i = 0x80; $i < 256; $i++) {
  41. testInvalid(chr($i), "\x00%");
  42. testInvalid("\x0F" . chr($i), "\x00%");
  43. }
  44. echo "ASCII support OK\n";
  45. foreach ($ksxChars as $ksx => $utf16BE) {
  46. testValid("\x0E" . $ksx, $utf16BE, false);
  47. testValid("\x1B$)C\x0E" . $ksx, $utf16BE, false);
  48. testValid("\x1B$)C\x0E" . $ksx . "\x0F", $utf16BE);
  49. }
  50. findInvalidChars($ksxChars, $invalidKsx, $truncatedKsx);
  51. $badChars = array_keys($invalidKsx);
  52. foreach ($badChars as $badChar) {
  53. if ($badChar[0] == "\x0E" || $badChar[0] == "\x0F" || $badChar[0] == "\x1B")
  54. continue;
  55. testInvalid("\x1B$)C\x0E" . $badChar, "\x00%");
  56. }
  57. $badChars = array_keys($truncatedKsx);
  58. foreach ($badChars as $badChar) {
  59. testInvalid("\x1B$)C\x0E" . $badChar, "\x00%");
  60. }
  61. echo "KS X 1001 support OK\n";
  62. /* After a valid ESC sequence, we are still in ASCII mode; 'Shift Out' is needed to start KS X 1001 */
  63. testValid("\x1B$)Cabc", "\x00a\x00b\x00c", false);
  64. /* Test invalid and truncated ESC sequences */
  65. testInvalid("\x1B", "\x00%");
  66. testInvalid("\x1B$", "\x00%");
  67. testInvalid("\x1B$)", "\x00%");
  68. for ($i = 0; $i < 256; $i++) {
  69. if (chr($i) != '$')
  70. testInvalid("\x1B" . chr($i), "\x00%");
  71. if (chr($i) != ')')
  72. testInvalid("\x1B$" . chr($i), "\x00%");
  73. if (chr($i) != 'C')
  74. testInvalid("\x1B$)" . chr($i), "\x00%");
  75. }
  76. /* We can switch back and forth between ASCII and KS X 1001 */
  77. testValid("\x0E\x0E\x0F\x0E\x0Fabc", "\x00a\x00b\x00c", false);
  78. echo "Escapes behave as expected\n";
  79. // Test "long" illegal character markers
  80. mb_substitute_character("long");
  81. convertInvalidString("\x1B", "%", "ISO-2022-KR", "UTF-8");
  82. convertInvalidString("\x1B$", "%", "ISO-2022-KR", "UTF-8");
  83. convertInvalidString("\x1B$)", "%", "ISO-2022-KR", "UTF-8");
  84. convertInvalidString("\x1B$)C\x0E\x7C\x84", "%", "ISO-2022-KR", "UTF-8");
  85. echo "Done!\n";
  86. ?>
  87. --EXPECT--
  88. Empty string OK
  89. ASCII support OK
  90. KS X 1001 support OK
  91. Escapes behave as expected
  92. Done!