normalizer_normalize.phpt 8.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164
  1. --TEST--
  2. normalize()
  3. --SKIPIF--
  4. <?php if( !extension_loaded( 'intl' ) ) print 'skip'; ?>
  5. --FILE--
  6. <?php
  7. /*
  8. * Try normalization and test normalization
  9. * with Procedural and Object methods.
  10. */
  11. function ut_main()
  12. {
  13. $res_str = '';
  14. $forms = array(
  15. Normalizer::FORM_C,
  16. Normalizer::FORM_D,
  17. Normalizer::FORM_KC,
  18. Normalizer::FORM_KD,
  19. Normalizer::NONE,
  20. );
  21. $forms_str = array (
  22. Normalizer::FORM_C => 'UNORM_FORM_C',
  23. Normalizer::FORM_D => 'UNORM_FORM_D',
  24. Normalizer::FORM_KC => 'UNORM_FORM_KC',
  25. Normalizer::FORM_KD => 'UNORM_FORM_KD',
  26. Normalizer::NONE => 'UNORM_NONE',
  27. );
  28. /* just make sure all the form constants are defined as in the api spec */
  29. if ( Normalizer::FORM_C != Normalizer::NFC ||
  30. Normalizer::FORM_D != Normalizer::NFD ||
  31. Normalizer::FORM_KC != Normalizer::NFKC ||
  32. Normalizer::FORM_KD != Normalizer::NFKD ||
  33. Normalizer::NONE == Normalizer::FORM_C ) {
  34. $res_str .= "Invalid normalization form declarations!\n";
  35. }
  36. $char_a_diaeresis = "\xC3\xA4"; // 'LATIN SMALL LETTER A WITH DIAERESIS' (U+00E4)
  37. $char_a_ring = "\xC3\xA5"; // 'LATIN SMALL LETTER A WITH RING ABOVE' (U+00E5)
  38. $char_o_diaeresis = "\xC3\xB6"; // 'LATIN SMALL LETTER O WITH DIAERESIS' (U+00F6)
  39. $char_angstrom_sign = "\xE2\x84\xAB"; // 'ANGSTROM SIGN' (U+212B)
  40. $char_A_ring = "\xC3\x85"; // 'LATIN CAPITAL LETTER A WITH RING ABOVE' (U+00C5)
  41. $char_ohm_sign = "\xE2\x84\xA6"; // 'OHM SIGN' (U+2126)
  42. $char_omega = "\xCE\xA9"; // 'GREEK CAPITAL LETTER OMEGA' (U+03A9)
  43. $char_combining_ring_above = "\xCC\x8A"; // 'COMBINING RING ABOVE' (U+030A)
  44. $char_fi_ligature = "\xEF\xAC\x81"; // 'LATIN SMALL LIGATURE FI' (U+FB01)
  45. $char_long_s_dot = "\xE1\xBA\x9B"; // 'LATIN SMALL LETTER LONG S WITH DOT ABOVE' (U+1E9B)
  46. $strs = array(
  47. 'ABC',
  48. $char_a_diaeresis . '||' . $char_a_ring . '||' . $char_o_diaeresis,
  49. $char_angstrom_sign . '||' . $char_A_ring . '||' . 'A' . $char_combining_ring_above,
  50. $char_ohm_sign . '||' . $char_omega,
  51. $char_fi_ligature,
  52. $char_long_s_dot,
  53. );
  54. foreach( $forms as $form )
  55. {
  56. foreach( $strs as $str )
  57. {
  58. if (Normalizer::NONE == $form) {
  59. /* Hide deprecation warning. */
  60. $str_norm = @ut_norm_normalize( $str, $form );
  61. } else {
  62. $str_norm = ut_norm_normalize( $str, $form );
  63. }
  64. $error_code = intl_get_error_code();
  65. $error_message = intl_get_error_message();
  66. $str_hex = urlencode($str);
  67. $str_norm_hex = urlencode($str_norm);
  68. $res_str .= "'$str_hex' normalized to form '{$forms_str[$form]}' is '$str_norm_hex'"
  69. . "\terror info: '$error_message' ($error_code)\n"
  70. . "";
  71. $is_norm = ut_norm_is_normalized( $str, $form );
  72. $error_code = intl_get_error_code();
  73. $error_message = intl_get_error_message();
  74. $res_str .= " is in form '{$forms_str[$form]}'? = " . ($is_norm ? "yes" : "no")
  75. . "\terror info: '$error_message' ($error_code)\n"
  76. . "";
  77. }
  78. }
  79. return $res_str;
  80. }
  81. include_once( 'ut_common.inc' );
  82. ut_run();
  83. ?>
  84. --EXPECT--
  85. 'ABC' normalized to form 'UNORM_FORM_C' is 'ABC' error info: 'U_ZERO_ERROR' (0)
  86. is in form 'UNORM_FORM_C'? = yes error info: 'U_ZERO_ERROR' (0)
  87. '%C3%A4%7C%7C%C3%A5%7C%7C%C3%B6' normalized to form 'UNORM_FORM_C' is '%C3%A4%7C%7C%C3%A5%7C%7C%C3%B6' error info: 'U_ZERO_ERROR' (0)
  88. is in form 'UNORM_FORM_C'? = yes error info: 'U_ZERO_ERROR' (0)
  89. '%E2%84%AB%7C%7C%C3%85%7C%7CA%CC%8A' normalized to form 'UNORM_FORM_C' is '%C3%85%7C%7C%C3%85%7C%7C%C3%85' error info: 'U_ZERO_ERROR' (0)
  90. is in form 'UNORM_FORM_C'? = no error info: 'U_ZERO_ERROR' (0)
  91. '%E2%84%A6%7C%7C%CE%A9' normalized to form 'UNORM_FORM_C' is '%CE%A9%7C%7C%CE%A9' error info: 'U_ZERO_ERROR' (0)
  92. is in form 'UNORM_FORM_C'? = no error info: 'U_ZERO_ERROR' (0)
  93. '%EF%AC%81' normalized to form 'UNORM_FORM_C' is '%EF%AC%81' error info: 'U_ZERO_ERROR' (0)
  94. is in form 'UNORM_FORM_C'? = yes error info: 'U_ZERO_ERROR' (0)
  95. '%E1%BA%9B' normalized to form 'UNORM_FORM_C' is '%E1%BA%9B' error info: 'U_ZERO_ERROR' (0)
  96. is in form 'UNORM_FORM_C'? = yes error info: 'U_ZERO_ERROR' (0)
  97. 'ABC' normalized to form 'UNORM_FORM_D' is 'ABC' error info: 'U_ZERO_ERROR' (0)
  98. is in form 'UNORM_FORM_D'? = yes error info: 'U_ZERO_ERROR' (0)
  99. '%C3%A4%7C%7C%C3%A5%7C%7C%C3%B6' normalized to form 'UNORM_FORM_D' is 'a%CC%88%7C%7Ca%CC%8A%7C%7Co%CC%88' error info: 'U_ZERO_ERROR' (0)
  100. is in form 'UNORM_FORM_D'? = no error info: 'U_ZERO_ERROR' (0)
  101. '%E2%84%AB%7C%7C%C3%85%7C%7CA%CC%8A' normalized to form 'UNORM_FORM_D' is 'A%CC%8A%7C%7CA%CC%8A%7C%7CA%CC%8A' error info: 'U_ZERO_ERROR' (0)
  102. is in form 'UNORM_FORM_D'? = no error info: 'U_ZERO_ERROR' (0)
  103. '%E2%84%A6%7C%7C%CE%A9' normalized to form 'UNORM_FORM_D' is '%CE%A9%7C%7C%CE%A9' error info: 'U_ZERO_ERROR' (0)
  104. is in form 'UNORM_FORM_D'? = no error info: 'U_ZERO_ERROR' (0)
  105. '%EF%AC%81' normalized to form 'UNORM_FORM_D' is '%EF%AC%81' error info: 'U_ZERO_ERROR' (0)
  106. is in form 'UNORM_FORM_D'? = yes error info: 'U_ZERO_ERROR' (0)
  107. '%E1%BA%9B' normalized to form 'UNORM_FORM_D' is '%C5%BF%CC%87' error info: 'U_ZERO_ERROR' (0)
  108. is in form 'UNORM_FORM_D'? = no error info: 'U_ZERO_ERROR' (0)
  109. 'ABC' normalized to form 'UNORM_FORM_KC' is 'ABC' error info: 'U_ZERO_ERROR' (0)
  110. is in form 'UNORM_FORM_KC'? = yes error info: 'U_ZERO_ERROR' (0)
  111. '%C3%A4%7C%7C%C3%A5%7C%7C%C3%B6' normalized to form 'UNORM_FORM_KC' is '%C3%A4%7C%7C%C3%A5%7C%7C%C3%B6' error info: 'U_ZERO_ERROR' (0)
  112. is in form 'UNORM_FORM_KC'? = yes error info: 'U_ZERO_ERROR' (0)
  113. '%E2%84%AB%7C%7C%C3%85%7C%7CA%CC%8A' normalized to form 'UNORM_FORM_KC' is '%C3%85%7C%7C%C3%85%7C%7C%C3%85' error info: 'U_ZERO_ERROR' (0)
  114. is in form 'UNORM_FORM_KC'? = no error info: 'U_ZERO_ERROR' (0)
  115. '%E2%84%A6%7C%7C%CE%A9' normalized to form 'UNORM_FORM_KC' is '%CE%A9%7C%7C%CE%A9' error info: 'U_ZERO_ERROR' (0)
  116. is in form 'UNORM_FORM_KC'? = no error info: 'U_ZERO_ERROR' (0)
  117. '%EF%AC%81' normalized to form 'UNORM_FORM_KC' is 'fi' error info: 'U_ZERO_ERROR' (0)
  118. is in form 'UNORM_FORM_KC'? = no error info: 'U_ZERO_ERROR' (0)
  119. '%E1%BA%9B' normalized to form 'UNORM_FORM_KC' is '%E1%B9%A1' error info: 'U_ZERO_ERROR' (0)
  120. is in form 'UNORM_FORM_KC'? = no error info: 'U_ZERO_ERROR' (0)
  121. 'ABC' normalized to form 'UNORM_FORM_KD' is 'ABC' error info: 'U_ZERO_ERROR' (0)
  122. is in form 'UNORM_FORM_KD'? = yes error info: 'U_ZERO_ERROR' (0)
  123. '%C3%A4%7C%7C%C3%A5%7C%7C%C3%B6' normalized to form 'UNORM_FORM_KD' is 'a%CC%88%7C%7Ca%CC%8A%7C%7Co%CC%88' error info: 'U_ZERO_ERROR' (0)
  124. is in form 'UNORM_FORM_KD'? = no error info: 'U_ZERO_ERROR' (0)
  125. '%E2%84%AB%7C%7C%C3%85%7C%7CA%CC%8A' normalized to form 'UNORM_FORM_KD' is 'A%CC%8A%7C%7CA%CC%8A%7C%7CA%CC%8A' error info: 'U_ZERO_ERROR' (0)
  126. is in form 'UNORM_FORM_KD'? = no error info: 'U_ZERO_ERROR' (0)
  127. '%E2%84%A6%7C%7C%CE%A9' normalized to form 'UNORM_FORM_KD' is '%CE%A9%7C%7C%CE%A9' error info: 'U_ZERO_ERROR' (0)
  128. is in form 'UNORM_FORM_KD'? = no error info: 'U_ZERO_ERROR' (0)
  129. '%EF%AC%81' normalized to form 'UNORM_FORM_KD' is 'fi' error info: 'U_ZERO_ERROR' (0)
  130. is in form 'UNORM_FORM_KD'? = no error info: 'U_ZERO_ERROR' (0)
  131. '%E1%BA%9B' normalized to form 'UNORM_FORM_KD' is 's%CC%87' error info: 'U_ZERO_ERROR' (0)
  132. is in form 'UNORM_FORM_KD'? = no error info: 'U_ZERO_ERROR' (0)
  133. 'ABC' normalized to form 'UNORM_NONE' is 'ABC' error info: 'U_ZERO_ERROR' (0)
  134. is in form 'UNORM_NONE'? = no error info: 'normalizer_normalize: illegal normalization form: U_ILLEGAL_ARGUMENT_ERROR' (1)
  135. '%C3%A4%7C%7C%C3%A5%7C%7C%C3%B6' normalized to form 'UNORM_NONE' is '%C3%A4%7C%7C%C3%A5%7C%7C%C3%B6' error info: 'U_ZERO_ERROR' (0)
  136. is in form 'UNORM_NONE'? = no error info: 'normalizer_normalize: illegal normalization form: U_ILLEGAL_ARGUMENT_ERROR' (1)
  137. '%E2%84%AB%7C%7C%C3%85%7C%7CA%CC%8A' normalized to form 'UNORM_NONE' is '%E2%84%AB%7C%7C%C3%85%7C%7CA%CC%8A' error info: 'U_ZERO_ERROR' (0)
  138. is in form 'UNORM_NONE'? = no error info: 'normalizer_normalize: illegal normalization form: U_ILLEGAL_ARGUMENT_ERROR' (1)
  139. '%E2%84%A6%7C%7C%CE%A9' normalized to form 'UNORM_NONE' is '%E2%84%A6%7C%7C%CE%A9' error info: 'U_ZERO_ERROR' (0)
  140. is in form 'UNORM_NONE'? = no error info: 'normalizer_normalize: illegal normalization form: U_ILLEGAL_ARGUMENT_ERROR' (1)
  141. '%EF%AC%81' normalized to form 'UNORM_NONE' is '%EF%AC%81' error info: 'U_ZERO_ERROR' (0)
  142. is in form 'UNORM_NONE'? = no error info: 'normalizer_normalize: illegal normalization form: U_ILLEGAL_ARGUMENT_ERROR' (1)
  143. '%E1%BA%9B' normalized to form 'UNORM_NONE' is '%E1%BA%9B' error info: 'U_ZERO_ERROR' (0)
  144. is in form 'UNORM_NONE'? = no error info: 'normalizer_normalize: illegal normalization form: U_ILLEGAL_ARGUMENT_ERROR' (1)