eucjp_ms_encoding.phpt 2.3 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465
  1. --TEST--
  2. Exhaustive test of EUC-JP-MS (AKA EUC-JP-WIN) text encoding
  3. --EXTENSIONS--
  4. mbstring
  5. --SKIPIF--
  6. <?php
  7. if (getenv("SKIP_SLOW_TESTS")) die("skip slow test");
  8. ?>
  9. --FILE--
  10. <?php
  11. srand(555); /* Make results consistent */
  12. include('encoding_tests.inc');
  13. mb_substitute_character(0x25); // '%'
  14. readConversionTable(__DIR__ . '/data/EUC-JP-MS.txt', $toUnicode, $fromUnicode);
  15. readConversionTable(__DIR__ . '/data/EUC-JP-MS.IRREVERSIBLE.txt', $toUnicodeIrreversible, $_);
  16. foreach ($toUnicodeIrreversible as $char => $codepoint) {
  17. if (!isset($fromUnicode[$codepoint]))
  18. $fromUnicode[$codepoint] = $char;
  19. }
  20. // The conversion table has several cases where more than one EUC-JP-WIN code maps to the
  21. // same Unicode codepoint. Make sure we use the intended reverse mapping for tests:
  22. $fromUnicode["\x21\x21"] = "\xAD\xE4";
  23. $fromUnicode["\x21\x61"] = "\xAD\xB6";
  24. $fromUnicode["\x21\x62"] = "\xAD\xB7";
  25. $fromUnicode["\x21\x63"] = "\xAD\xB8";
  26. $fromUnicode["\x21\x65"] = "\xAD\xBA";
  27. $fromUnicode["\x21\x68"] = "\xAD\xBD";
  28. $fromUnicode["\x21\x69"] = "\xAD\xBE";
  29. $fromUnicode["\x22\x1A"] = "\xA2\xE5";
  30. $fromUnicode["\x22\x20"] = "\xA2\xDC";
  31. $fromUnicode["\x22\x29"] = "\xA2\xC1";
  32. $fromUnicode["\x22\x2A"] = "\xA2\xC0";
  33. $fromUnicode["\x22\x2B"] = "\xA2\xE9";
  34. $fromUnicode["\x22\x35"] = "\xA2\xE8";
  35. $fromUnicode["\x22\x52"] = "\xA2\xE2";
  36. $fromUnicode["\x22\x61"] = "\xA2\xE1";
  37. $fromUnicode["\x22\xA5"] = "\xA2\xDD";
  38. $fromUnicode["\x32\x31"] = "\xAD\xEA";
  39. $fromUnicode["\xFF\x5E"] = "\xA1\xC1";
  40. findInvalidChars($toUnicode, $invalid, $truncated, array_fill_keys(range(0xA1,0xFE), 2) + [0x8F => 3]);
  41. testAllValidChars($toUnicode, 'eucJP-win', 'UTF-16BE', false);
  42. testAllInvalidChars($invalid, $toUnicode, 'eucJP-win', 'UTF-16BE', "\x00%");
  43. testTruncatedChars($truncated, 'eucJP-win', 'UTF-16BE', "\x00%");
  44. echo "Tested eucJP-win -> UTF-16BE\n";
  45. findInvalidChars($fromUnicode, $invalid, $unused, array_fill_keys(range(0,0xFF), 2));
  46. convertAllInvalidChars($invalid, $fromUnicode, 'UTF-16BE', 'eucJP-win', '%');
  47. echo "Tested UTF-16BE -> eucJP-win\n";
  48. // Test "long" illegal character markers
  49. mb_substitute_character("long");
  50. convertInvalidString("\x80", "%", "eucJP-win", "UTF-8");
  51. convertInvalidString("\xFE\xFF", "%", "eucJP-win", "UTF-8");
  52. ?>
  53. --EXPECT--
  54. Tested eucJP-win -> UTF-16BE
  55. Tested UTF-16BE -> eucJP-win