htmlentities-utf-3.phpt 2.3 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283
  1. --TEST--
  2. Test get_next_char(), used by htmlentities()/htmlspecialchars(): validity of UTF-8 sequences
  3. --FILE--
  4. <?php
  5. /* conformance to Unicode 5.2, section 3.9, D92 */
  6. $val_ranges = array(
  7. array(array(0x00, 0x7F)),
  8. array(array(0xC2, 0xDF), array(0x80, 0xBF)),
  9. array(array(0xE0, 0xE0), array(0xA0, 0xBF), array(0x80, 0xBF)),
  10. array(array(0xE1, 0xEC), array(0x80, 0xBF), array(0x80, 0xBF)),
  11. array(array(0xED, 0xED), array(0x80, 0x9F), array(0x80, 0xBF)),
  12. array(array(0xEE, 0xEF), array(0x80, 0xBF), array(0x80, 0xBF)),
  13. array(array(0xF0, 0xF0), array(0x90, 0xBF), array(0x80, 0xBF), array(0x80, 0xBF)),
  14. array(array(0xF1, 0xF3), array(0x80, 0xBF), array(0x80, 0xBF), array(0x80, 0xBF)),
  15. array(array(0xF4, 0xF4), array(0x80, 0x8F), array(0x80, 0xBF), array(0x80, 0xBF)),
  16. );
  17. function is_valid($seq) {
  18. global $val_ranges;
  19. $b = ord($seq[0]);
  20. foreach ($val_ranges as $l) {
  21. if ($b >= $l[0][0] && $b <= $l[0][1]) {
  22. if (count($l) != strlen($seq)) {
  23. return false;
  24. }
  25. for ($n = 1; $n < strlen($seq); $n++) {
  26. if (ord($seq[$n]) < $l[$n][0] || ord($seq[$n]) > $l[$n][1]) {
  27. return false;
  28. }
  29. }
  30. return true;
  31. }
  32. }
  33. return false;
  34. }
  35. function concordance($s) {
  36. $vhe = strlen(htmlspecialchars($s, ENT_QUOTES, "UTF-8")) > 0;
  37. $v = is_valid($s);
  38. return ($vhe === $v);
  39. }
  40. for ($b1 = 0xC0; $b1 < 0xE0; $b1++) {
  41. for ($b2 = 0x80; $b2 < 0xBF; $b2++) {
  42. $s = chr($b1).chr($b2);
  43. if (!concordance($s))
  44. echo "Discordance for ".bin2hex($s),"\n";
  45. }
  46. }
  47. for ($b1 = 0xE0; $b1 < 0xEF; $b1++) {
  48. for ($b2 = 0x80; $b2 < 0xBF; $b2++) {
  49. $s = chr($b1).chr($b2)."\x80";
  50. if (!concordance($s))
  51. echo "Discordance for ".bin2hex($s),"\n";
  52. $s = chr($b1).chr($b2)."\xBF";
  53. if (!concordance($s))
  54. echo "Discordance for ".bin2hex($s),"\n";
  55. }
  56. }
  57. for ($b1 = 0xF0; $b1 < 0xFF; $b1++) {
  58. for ($b2 = 0x80; $b2 < 0xBF; $b2++) {
  59. $s = chr($b1).chr($b2)."\x80\x80";
  60. if (!concordance($s))
  61. echo "Discordance for ".bin2hex($s),"\n";
  62. $s = chr($b1).chr($b2)."\xBF\x80";
  63. if (!concordance($s))
  64. echo "Discordance for ".bin2hex($s),"\n";
  65. $s = chr($b1).chr($b2)."\x80\xBF";
  66. if (!concordance($s))
  67. echo "Discordance for ".bin2hex($s),"\n";
  68. $s = chr($b1).chr($b2)."\xBF\xBF";
  69. if (!concordance($s))
  70. echo "Discordance for ".bin2hex($s),"\n";
  71. }
  72. }
  73. echo "Done.\n";
  74. --EXPECT--
  75. Done.