htmlentities_html4.phpt 5.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313
  1. --TEST--
  2. htmlentities() conformance check (HTML 4)
  3. --FILE--
  4. <?php
  5. function utf32_utf8($k) {
  6. if ($k < 0x80) {
  7. $retval = pack('C', $k);
  8. } else if ($k < 0x800) {
  9. $retval = pack('C2',
  10. 0xc0 | ($k >> 6),
  11. 0x80 | ($k & 0x3f));
  12. } else if ($k < 0x10000) {
  13. $retval = pack('C3',
  14. 0xe0 | ($k >> 12),
  15. 0x80 | (($k >> 6) & 0x3f),
  16. 0x80 | ($k & 0x3f));
  17. } else if ($k < 0x200000) {
  18. $retval = pack('C4',
  19. 0xf0 | ($k >> 18),
  20. 0x80 | (($k >> 12) & 0x3f),
  21. 0x80 | (($k >> 6) & 0x3f),
  22. 0x80 | ($k & 0x3f));
  23. } else if ($k < 0x4000000) {
  24. $retval = pack('C5',
  25. 0xf8 | ($k >> 24),
  26. 0x80 | (($k >> 18) & 0x3f),
  27. 0x80 | (($k >> 12) & 0x3f),
  28. 0x80 | (($k >> 6) & 0x3f),
  29. 0x80 | ($k & 0x3f));
  30. } else {
  31. $retval = pack('C6',
  32. 0xfc | ($k >> 30),
  33. 0x80 | (($k >> 24) & 0x3f),
  34. 0x80 | (($k >> 18) & 0x3f),
  35. 0x80 | (($k >> 12) & 0x3f),
  36. 0x80 | (($k >> 6) & 0x3f),
  37. 0x80 | ($k & 0x3f));
  38. }
  39. return $retval;
  40. }
  41. $table = get_html_translation_table(HTML_ENTITIES, ENT_QUOTES, 'UTF-8');
  42. for ($i = 0; $i < 0x2710; $i++) {
  43. if ($i >= 0xd800 && $i < 0xe000)
  44. continue;
  45. $str = utf32_utf8($i);
  46. if (isset($table[$str])) {
  47. printf("%s\tU+%05X\n", $table[$str], $i);
  48. unset($table[$str]);
  49. }
  50. }
  51. if (!empty($table)) {
  52. echo "Not matched entities: ";
  53. var_dump($table);
  54. }
  55. ?>
  56. --EXPECT--
  57. &quot; U+00022
  58. &amp; U+00026
  59. &#039; U+00027
  60. &lt; U+0003C
  61. &gt; U+0003E
  62. &nbsp; U+000A0
  63. &iexcl; U+000A1
  64. &cent; U+000A2
  65. &pound; U+000A3
  66. &curren; U+000A4
  67. &yen; U+000A5
  68. &brvbar; U+000A6
  69. &sect; U+000A7
  70. &uml; U+000A8
  71. &copy; U+000A9
  72. &ordf; U+000AA
  73. &laquo; U+000AB
  74. &not; U+000AC
  75. &shy; U+000AD
  76. &reg; U+000AE
  77. &macr; U+000AF
  78. &deg; U+000B0
  79. &plusmn; U+000B1
  80. &sup2; U+000B2
  81. &sup3; U+000B3
  82. &acute; U+000B4
  83. &micro; U+000B5
  84. &para; U+000B6
  85. &middot; U+000B7
  86. &cedil; U+000B8
  87. &sup1; U+000B9
  88. &ordm; U+000BA
  89. &raquo; U+000BB
  90. &frac14; U+000BC
  91. &frac12; U+000BD
  92. &frac34; U+000BE
  93. &iquest; U+000BF
  94. &Agrave; U+000C0
  95. &Aacute; U+000C1
  96. &Acirc; U+000C2
  97. &Atilde; U+000C3
  98. &Auml; U+000C4
  99. &Aring; U+000C5
  100. &AElig; U+000C6
  101. &Ccedil; U+000C7
  102. &Egrave; U+000C8
  103. &Eacute; U+000C9
  104. &Ecirc; U+000CA
  105. &Euml; U+000CB
  106. &Igrave; U+000CC
  107. &Iacute; U+000CD
  108. &Icirc; U+000CE
  109. &Iuml; U+000CF
  110. &ETH; U+000D0
  111. &Ntilde; U+000D1
  112. &Ograve; U+000D2
  113. &Oacute; U+000D3
  114. &Ocirc; U+000D4
  115. &Otilde; U+000D5
  116. &Ouml; U+000D6
  117. &times; U+000D7
  118. &Oslash; U+000D8
  119. &Ugrave; U+000D9
  120. &Uacute; U+000DA
  121. &Ucirc; U+000DB
  122. &Uuml; U+000DC
  123. &Yacute; U+000DD
  124. &THORN; U+000DE
  125. &szlig; U+000DF
  126. &agrave; U+000E0
  127. &aacute; U+000E1
  128. &acirc; U+000E2
  129. &atilde; U+000E3
  130. &auml; U+000E4
  131. &aring; U+000E5
  132. &aelig; U+000E6
  133. &ccedil; U+000E7
  134. &egrave; U+000E8
  135. &eacute; U+000E9
  136. &ecirc; U+000EA
  137. &euml; U+000EB
  138. &igrave; U+000EC
  139. &iacute; U+000ED
  140. &icirc; U+000EE
  141. &iuml; U+000EF
  142. &eth; U+000F0
  143. &ntilde; U+000F1
  144. &ograve; U+000F2
  145. &oacute; U+000F3
  146. &ocirc; U+000F4
  147. &otilde; U+000F5
  148. &ouml; U+000F6
  149. &divide; U+000F7
  150. &oslash; U+000F8
  151. &ugrave; U+000F9
  152. &uacute; U+000FA
  153. &ucirc; U+000FB
  154. &uuml; U+000FC
  155. &yacute; U+000FD
  156. &thorn; U+000FE
  157. &yuml; U+000FF
  158. &OElig; U+00152
  159. &oelig; U+00153
  160. &Scaron; U+00160
  161. &scaron; U+00161
  162. &Yuml; U+00178
  163. &fnof; U+00192
  164. &circ; U+002C6
  165. &tilde; U+002DC
  166. &Alpha; U+00391
  167. &Beta; U+00392
  168. &Gamma; U+00393
  169. &Delta; U+00394
  170. &Epsilon; U+00395
  171. &Zeta; U+00396
  172. &Eta; U+00397
  173. &Theta; U+00398
  174. &Iota; U+00399
  175. &Kappa; U+0039A
  176. &Lambda; U+0039B
  177. &Mu; U+0039C
  178. &Nu; U+0039D
  179. &Xi; U+0039E
  180. &Omicron; U+0039F
  181. &Pi; U+003A0
  182. &Rho; U+003A1
  183. &Sigma; U+003A3
  184. &Tau; U+003A4
  185. &Upsilon; U+003A5
  186. &Phi; U+003A6
  187. &Chi; U+003A7
  188. &Psi; U+003A8
  189. &Omega; U+003A9
  190. &alpha; U+003B1
  191. &beta; U+003B2
  192. &gamma; U+003B3
  193. &delta; U+003B4
  194. &epsilon; U+003B5
  195. &zeta; U+003B6
  196. &eta; U+003B7
  197. &theta; U+003B8
  198. &iota; U+003B9
  199. &kappa; U+003BA
  200. &lambda; U+003BB
  201. &mu; U+003BC
  202. &nu; U+003BD
  203. &xi; U+003BE
  204. &omicron; U+003BF
  205. &pi; U+003C0
  206. &rho; U+003C1
  207. &sigmaf; U+003C2
  208. &sigma; U+003C3
  209. &tau; U+003C4
  210. &upsilon; U+003C5
  211. &phi; U+003C6
  212. &chi; U+003C7
  213. &psi; U+003C8
  214. &omega; U+003C9
  215. &thetasym; U+003D1
  216. &upsih; U+003D2
  217. &piv; U+003D6
  218. &ensp; U+02002
  219. &emsp; U+02003
  220. &thinsp; U+02009
  221. &zwnj; U+0200C
  222. &zwj; U+0200D
  223. &lrm; U+0200E
  224. &rlm; U+0200F
  225. &ndash; U+02013
  226. &mdash; U+02014
  227. &lsquo; U+02018
  228. &rsquo; U+02019
  229. &sbquo; U+0201A
  230. &ldquo; U+0201C
  231. &rdquo; U+0201D
  232. &bdquo; U+0201E
  233. &dagger; U+02020
  234. &Dagger; U+02021
  235. &bull; U+02022
  236. &hellip; U+02026
  237. &permil; U+02030
  238. &prime; U+02032
  239. &Prime; U+02033
  240. &lsaquo; U+02039
  241. &rsaquo; U+0203A
  242. &oline; U+0203E
  243. &frasl; U+02044
  244. &euro; U+020AC
  245. &image; U+02111
  246. &weierp; U+02118
  247. &real; U+0211C
  248. &trade; U+02122
  249. &alefsym; U+02135
  250. &larr; U+02190
  251. &uarr; U+02191
  252. &rarr; U+02192
  253. &darr; U+02193
  254. &harr; U+02194
  255. &crarr; U+021B5
  256. &lArr; U+021D0
  257. &uArr; U+021D1
  258. &rArr; U+021D2
  259. &dArr; U+021D3
  260. &hArr; U+021D4
  261. &forall; U+02200
  262. &part; U+02202
  263. &exist; U+02203
  264. &empty; U+02205
  265. &nabla; U+02207
  266. &isin; U+02208
  267. &notin; U+02209
  268. &ni; U+0220B
  269. &prod; U+0220F
  270. &sum; U+02211
  271. &minus; U+02212
  272. &lowast; U+02217
  273. &radic; U+0221A
  274. &prop; U+0221D
  275. &infin; U+0221E
  276. &ang; U+02220
  277. &and; U+02227
  278. &or; U+02228
  279. &cap; U+02229
  280. &cup; U+0222A
  281. &int; U+0222B
  282. &there4; U+02234
  283. &sim; U+0223C
  284. &cong; U+02245
  285. &asymp; U+02248
  286. &ne; U+02260
  287. &equiv; U+02261
  288. &le; U+02264
  289. &ge; U+02265
  290. &sub; U+02282
  291. &sup; U+02283
  292. &nsub; U+02284
  293. &sube; U+02286
  294. &supe; U+02287
  295. &oplus; U+02295
  296. &otimes; U+02297
  297. &perp; U+022A5
  298. &sdot; U+022C5
  299. &lceil; U+02308
  300. &rceil; U+02309
  301. &lfloor; U+0230A
  302. &rfloor; U+0230B
  303. &lang; U+02329
  304. &rang; U+0232A
  305. &loz; U+025CA
  306. &spades; U+02660
  307. &clubs; U+02663
  308. &hearts; U+02665
  309. &diams; U+02666