utf_encodings.phpt 25 KB


  1. --TEST--
  2. Torture test for UTF-{7,8,16,32}
  3. --EXTENSIONS--
  4. mbstring
  5. --SKIPIF--
  6. <?php
  7. if (getenv("SKIP_SLOW_TESTS")) die("skip slow test");
  8. ?>
  9. --FILE--
  10. <?php
  11. srand(232); /* Make results consistent */
  12. mb_substitute_character(0x25); // '%'
  13. include('encoding_tests.inc');
  14. // all ranges of valid codepoints in UnicodeData.txt
  15. $validRanges = [
  16. [0x0, 0x377],
  17. [0x37a, 0x37f],
  18. [0x384, 0x38a],
  19. [0x38c, 0x38c],
  20. [0x38e, 0x3a1],
  21. [0x3a3, 0x52f],
  22. [0x531, 0x556],
  23. [0x559, 0x58a],
  24. [0x58d, 0x58f],
  25. [0x591, 0x5c7],
  26. [0x5d0, 0x5ea],
  27. [0x5ef, 0x5f4],
  28. [0x600, 0x61c],
  29. [0x61e, 0x70d],
  30. [0x70f, 0x74a],
  31. [0x74d, 0x7b1],
  32. [0x7c0, 0x7fa],
  33. [0x7fd, 0x82d],
  34. [0x830, 0x83e],
  35. [0x840, 0x85b],
  36. [0x85e, 0x85e],
  37. [0x860, 0x86a],
  38. [0x8a0, 0x8b4],
  39. [0x8b6, 0x8c7],
  40. [0x8d3, 0x983],
  41. [0x985, 0x98c],
  42. [0x98f, 0x990],
  43. [0x993, 0x9a8],
  44. [0x9aa, 0x9b0],
  45. [0x9b2, 0x9b2],
  46. [0x9b6, 0x9b9],
  47. [0x9bc, 0x9c4],
  48. [0x9c7, 0x9c8],
  49. [0x9cb, 0x9ce],
  50. [0x9d7, 0x9d7],
  51. [0x9dc, 0x9dd],
  52. [0x9df, 0x9e3],
  53. [0x9e6, 0x9fe],
  54. [0xa01, 0xa03],
  55. [0xa05, 0xa0a],
  56. [0xa0f, 0xa10],
  57. [0xa13, 0xa28],
  58. [0xa2a, 0xa30],
  59. [0xa32, 0xa33],
  60. [0xa35, 0xa36],
  61. [0xa38, 0xa39],
  62. [0xa3c, 0xa3c],
  63. [0xa3e, 0xa42],
  64. [0xa47, 0xa48],
  65. [0xa4b, 0xa4d],
  66. [0xa51, 0xa51],
  67. [0xa59, 0xa5c],
  68. [0xa5e, 0xa5e],
  69. [0xa66, 0xa76],
  70. [0xa81, 0xa83],
  71. [0xa85, 0xa8d],
  72. [0xa8f, 0xa91],
  73. [0xa93, 0xaa8],
  74. [0xaaa, 0xab0],
  75. [0xab2, 0xab3],
  76. [0xab5, 0xab9],
  77. [0xabc, 0xac5],
  78. [0xac7, 0xac9],
  79. [0xacb, 0xacd],
  80. [0xad0, 0xad0],
  81. [0xae0, 0xae3],
  82. [0xae6, 0xaf1],
  83. [0xaf9, 0xaff],
  84. [0xb01, 0xb03],
  85. [0xb05, 0xb0c],
  86. [0xb0f, 0xb10],
  87. [0xb13, 0xb28],
  88. [0xb2a, 0xb30],
  89. [0xb32, 0xb33],
  90. [0xb35, 0xb39],
  91. [0xb3c, 0xb44],
  92. [0xb47, 0xb48],
  93. [0xb4b, 0xb4d],
  94. [0xb55, 0xb57],
  95. [0xb5c, 0xb5d],
  96. [0xb5f, 0xb63],
  97. [0xb66, 0xb77],
  98. [0xb82, 0xb83],
  99. [0xb85, 0xb8a],
  100. [0xb8e, 0xb90],
  101. [0xb92, 0xb95],
  102. [0xb99, 0xb9a],
  103. [0xb9c, 0xb9c],
  104. [0xb9e, 0xb9f],
  105. [0xba3, 0xba4],
  106. [0xba8, 0xbaa],
  107. [0xbae, 0xbb9],
  108. [0xbbe, 0xbc2],
  109. [0xbc6, 0xbc8],
  110. [0xbca, 0xbcd],
  111. [0xbd0, 0xbd0],
  112. [0xbd7, 0xbd7],
  113. [0xbe6, 0xbfa],
  114. [0xc00, 0xc0c],
  115. [0xc0e, 0xc10],
  116. [0xc12, 0xc28],
  117. [0xc2a, 0xc39],
  118. [0xc3d, 0xc44],
  119. [0xc46, 0xc48],
  120. [0xc4a, 0xc4d],
  121. [0xc55, 0xc56],
  122. [0xc58, 0xc5a],
  123. [0xc60, 0xc63],
  124. [0xc66, 0xc6f],
  125. [0xc77, 0xc8c],
  126. [0xc8e, 0xc90],
  127. [0xc92, 0xca8],
  128. [0xcaa, 0xcb3],
  129. [0xcb5, 0xcb9],
  130. [0xcbc, 0xcc4],
  131. [0xcc6, 0xcc8],
  132. [0xcca, 0xccd],
  133. [0xcd5, 0xcd6],
  134. [0xcde, 0xcde],
  135. [0xce0, 0xce3],
  136. [0xce6, 0xcef],
  137. [0xcf1, 0xcf2],
  138. [0xd00, 0xd0c],
  139. [0xd0e, 0xd10],
  140. [0xd12, 0xd44],
  141. [0xd46, 0xd48],
  142. [0xd4a, 0xd4f],
  143. [0xd54, 0xd63],
  144. [0xd66, 0xd7f],
  145. [0xd81, 0xd83],
  146. [0xd85, 0xd96],
  147. [0xd9a, 0xdb1],
  148. [0xdb3, 0xdbb],
  149. [0xdbd, 0xdbd],
  150. [0xdc0, 0xdc6],
  151. [0xdca, 0xdca],
  152. [0xdcf, 0xdd4],
  153. [0xdd6, 0xdd6],
  154. [0xdd8, 0xddf],
  155. [0xde6, 0xdef],
  156. [0xdf2, 0xdf4],
  157. [0xe01, 0xe3a],
  158. [0xe3f, 0xe5b],
  159. [0xe81, 0xe82],
  160. [0xe84, 0xe84],
  161. [0xe86, 0xe8a],
  162. [0xe8c, 0xea3],
  163. [0xea5, 0xea5],
  164. [0xea7, 0xebd],
  165. [0xec0, 0xec4],
  166. [0xec6, 0xec6],
  167. [0xec8, 0xecd],
  168. [0xed0, 0xed9],
  169. [0xedc, 0xedf],
  170. [0xf00, 0xf47],
  171. [0xf49, 0xf6c],
  172. [0xf71, 0xf97],
  173. [0xf99, 0xfbc],
  174. [0xfbe, 0xfcc],
  175. [0xfce, 0xfda],
  176. [0x1000, 0x10c5],
  177. [0x10c7, 0x10c7],
  178. [0x10cd, 0x10cd],
  179. [0x10d0, 0x1248],
  180. [0x124a, 0x124d],
  181. [0x1250, 0x1256],
  182. [0x1258, 0x1258],
  183. [0x125a, 0x125d],
  184. [0x1260, 0x1288],
  185. [0x128a, 0x128d],
  186. [0x1290, 0x12b0],
  187. [0x12b2, 0x12b5],
  188. [0x12b8, 0x12be],
  189. [0x12c0, 0x12c0],
  190. [0x12c2, 0x12c5],
  191. [0x12c8, 0x12d6],
  192. [0x12d8, 0x1310],
  193. [0x1312, 0x1315],
  194. [0x1318, 0x135a],
  195. [0x135d, 0x137c],
  196. [0x1380, 0x1399],
  197. [0x13a0, 0x13f5],
  198. [0x13f8, 0x13fd],
  199. [0x1400, 0x169c],
  200. [0x16a0, 0x16f8],
  201. [0x1700, 0x170c],
  202. [0x170e, 0x1714],
  203. [0x1720, 0x1736],
  204. [0x1740, 0x1753],
  205. [0x1760, 0x176c],
  206. [0x176e, 0x1770],
  207. [0x1772, 0x1773],
  208. [0x1780, 0x17dd],
  209. [0x17e0, 0x17e9],
  210. [0x17f0, 0x17f9],
  211. [0x1800, 0x180e],
  212. [0x1810, 0x1819],
  213. [0x1820, 0x1878],
  214. [0x1880, 0x18aa],
  215. [0x18b0, 0x18f5],
  216. [0x1900, 0x191e],
  217. [0x1920, 0x192b],
  218. [0x1930, 0x193b],
  219. [0x1940, 0x1940],
  220. [0x1944, 0x196d],
  221. [0x1970, 0x1974],
  222. [0x1980, 0x19ab],
  223. [0x19b0, 0x19c9],
  224. [0x19d0, 0x19da],
  225. [0x19de, 0x1a1b],
  226. [0x1a1e, 0x1a5e],
  227. [0x1a60, 0x1a7c],
  228. [0x1a7f, 0x1a89],
  229. [0x1a90, 0x1a99],
  230. [0x1aa0, 0x1aad],
  231. [0x1ab0, 0x1ac0],
  232. [0x1b00, 0x1b4b],
  233. [0x1b50, 0x1b7c],
  234. [0x1b80, 0x1bf3],
  235. [0x1bfc, 0x1c37],
  236. [0x1c3b, 0x1c49],
  237. [0x1c4d, 0x1c88],
  238. [0x1c90, 0x1cba],
  239. [0x1cbd, 0x1cc7],
  240. [0x1cd0, 0x1cfa],
  241. [0x1d00, 0x1df9],
  242. [0x1dfb, 0x1f15],
  243. [0x1f18, 0x1f1d],
  244. [0x1f20, 0x1f45],
  245. [0x1f48, 0x1f4d],
  246. [0x1f50, 0x1f57],
  247. [0x1f59, 0x1f59],
  248. [0x1f5b, 0x1f5b],
  249. [0x1f5d, 0x1f5d],
  250. [0x1f5f, 0x1f7d],
  251. [0x1f80, 0x1fb4],
  252. [0x1fb6, 0x1fc4],
  253. [0x1fc6, 0x1fd3],
  254. [0x1fd6, 0x1fdb],
  255. [0x1fdd, 0x1fef],
  256. [0x1ff2, 0x1ff4],
  257. [0x1ff6, 0x1ffe],
  258. [0x2000, 0x2064],
  259. [0x2066, 0x2071],
  260. [0x2074, 0x208e],
  261. [0x2090, 0x209c],
  262. [0x20a0, 0x20bf],
  263. [0x20d0, 0x20f0],
  264. [0x2100, 0x218b],
  265. [0x2190, 0x2426],
  266. [0x2440, 0x244a],
  267. [0x2460, 0x2b73],
  268. [0x2b76, 0x2b95],
  269. [0x2b97, 0x2c2e],
  270. [0x2c30, 0x2c5e],
  271. [0x2c60, 0x2cf3],
  272. [0x2cf9, 0x2d25],
  273. [0x2d27, 0x2d27],
  274. [0x2d2d, 0x2d2d],
  275. [0x2d30, 0x2d67],
  276. [0x2d6f, 0x2d70],
  277. [0x2d7f, 0x2d96],
  278. [0x2da0, 0x2da6],
  279. [0x2da8, 0x2dae],
  280. [0x2db0, 0x2db6],
  281. [0x2db8, 0x2dbe],
  282. [0x2dc0, 0x2dc6],
  283. [0x2dc8, 0x2dce],
  284. [0x2dd0, 0x2dd6],
  285. [0x2dd8, 0x2dde],
  286. [0x2de0, 0x2e52],
  287. [0x2e80, 0x2e99],
  288. [0x2e9b, 0x2ef3],
  289. [0x2f00, 0x2fd5],
  290. [0x2ff0, 0x2ffb],
  291. [0x3000, 0x303f],
  292. [0x3041, 0x3096],
  293. [0x3099, 0x30ff],
  294. [0x3105, 0x312f],
  295. [0x3131, 0x318e],
  296. [0x3190, 0x31e3],
  297. [0x31f0, 0x321e],
  298. [0x3220, 0x3400],
  299. [0x4dbf, 0x4e00],
  300. [0x9ffc, 0x9ffc],
  301. [0xa000, 0xa48c],
  302. [0xa490, 0xa4c6],
  303. [0xa4d0, 0xa62b],
  304. [0xa640, 0xa6f7],
  305. [0xa700, 0xa7bf],
  306. [0xa7c2, 0xa7ca],
  307. [0xa7f5, 0xa82c],
  308. [0xa830, 0xa839],
  309. [0xa840, 0xa877],
  310. [0xa880, 0xa8c5],
  311. [0xa8ce, 0xa8d9],
  312. [0xa8e0, 0xa953],
  313. [0xa95f, 0xa97c],
  314. [0xa980, 0xa9cd],
  315. [0xa9cf, 0xa9d9],
  316. [0xa9de, 0xa9fe],
  317. [0xaa00, 0xaa36],
  318. [0xaa40, 0xaa4d],
  319. [0xaa50, 0xaa59],
  320. [0xaa5c, 0xaac2],
  321. [0xaadb, 0xaaf6],
  322. [0xab01, 0xab06],
  323. [0xab09, 0xab0e],
  324. [0xab11, 0xab16],
  325. [0xab20, 0xab26],
  326. [0xab28, 0xab2e],
  327. [0xab30, 0xab6b],
  328. [0xab70, 0xabed],
  329. [0xabf0, 0xabf9],
  330. [0xac00, 0xac00],
  331. [0xd7a3, 0xd7a3],
  332. [0xd7b0, 0xd7c6],
  333. [0xd7cb, 0xd7fb],
  334. [0xd800, 0xd800],
  335. [0xdb7f, 0xdb80],
  336. [0xdbff, 0xdc00],
  337. [0xdfff, 0xe000],
  338. [0xf8ff, 0xfa6d],
  339. [0xfa70, 0xfad9],
  340. [0xfb00, 0xfb06],
  341. [0xfb13, 0xfb17],
  342. [0xfb1d, 0xfb36],
  343. [0xfb38, 0xfb3c],
  344. [0xfb3e, 0xfb3e],
  345. [0xfb40, 0xfb41],
  346. [0xfb43, 0xfb44],
  347. [0xfb46, 0xfbc1],
  348. [0xfbd3, 0xfd3f],
  349. [0xfd50, 0xfd8f],
  350. [0xfd92, 0xfdc7],
  351. [0xfdf0, 0xfdfd],
  352. [0xfe00, 0xfe19],
  353. [0xfe20, 0xfe52],
  354. [0xfe54, 0xfe66],
  355. [0xfe68, 0xfe6b],
  356. [0xfe70, 0xfe74],
  357. [0xfe76, 0xfefc],
  358. [0xfeff, 0xfeff],
  359. [0xff01, 0xffbe],
  360. [0xffc2, 0xffc7],
  361. [0xffca, 0xffcf],
  362. [0xffd2, 0xffd7],
  363. [0xffda, 0xffdc],
  364. [0xffe0, 0xffe6],
  365. [0xffe8, 0xffee],
  366. [0xfff9, 0xfffd],
  367. [0x10000, 0x1000b],
  368. [0x1000d, 0x10026],
  369. [0x10028, 0x1003a],
  370. [0x1003c, 0x1003d],
  371. [0x1003f, 0x1004d],
  372. [0x10050, 0x1005d],
  373. [0x10080, 0x100fa],
  374. [0x10100, 0x10102],
  375. [0x10107, 0x10133],
  376. [0x10137, 0x1018e],
  377. [0x10190, 0x1019c],
  378. [0x101a0, 0x101a0],
  379. [0x101d0, 0x101fd],
  380. [0x10280, 0x1029c],
  381. [0x102a0, 0x102d0],
  382. [0x102e0, 0x102fb],
  383. [0x10300, 0x10323],
  384. [0x1032d, 0x1034a],
  385. [0x10350, 0x1037a],
  386. [0x10380, 0x1039d],
  387. [0x1039f, 0x103c3],
  388. [0x103c8, 0x103d5],
  389. [0x10400, 0x1049d],
  390. [0x104a0, 0x104a9],
  391. [0x104b0, 0x104d3],
  392. [0x104d8, 0x104fb],
  393. [0x10500, 0x10527],
  394. [0x10530, 0x10563],
  395. [0x1056f, 0x1056f],
  396. [0x10600, 0x10736],
  397. [0x10740, 0x10755],
  398. [0x10760, 0x10767],
  399. [0x10800, 0x10805],
  400. [0x10808, 0x10808],
  401. [0x1080a, 0x10835],
  402. [0x10837, 0x10838],
  403. [0x1083c, 0x1083c],
  404. [0x1083f, 0x10855],
  405. [0x10857, 0x1089e],
  406. [0x108a7, 0x108af],
  407. [0x108e0, 0x108f2],
  408. [0x108f4, 0x108f5],
  409. [0x108fb, 0x1091b],
  410. [0x1091f, 0x10939],
  411. [0x1093f, 0x1093f],
  412. [0x10980, 0x109b7],
  413. [0x109bc, 0x109cf],
  414. [0x109d2, 0x10a03],
  415. [0x10a05, 0x10a06],
  416. [0x10a0c, 0x10a13],
  417. [0x10a15, 0x10a17],
  418. [0x10a19, 0x10a35],
  419. [0x10a38, 0x10a3a],
  420. [0x10a3f, 0x10a48],
  421. [0x10a50, 0x10a58],
  422. [0x10a60, 0x10a9f],
  423. [0x10ac0, 0x10ae6],
  424. [0x10aeb, 0x10af6],
  425. [0x10b00, 0x10b35],
  426. [0x10b39, 0x10b55],
  427. [0x10b58, 0x10b72],
  428. [0x10b78, 0x10b91],
  429. [0x10b99, 0x10b9c],
  430. [0x10ba9, 0x10baf],
  431. [0x10c00, 0x10c48],
  432. [0x10c80, 0x10cb2],
  433. [0x10cc0, 0x10cf2],
  434. [0x10cfa, 0x10d27],
  435. [0x10d30, 0x10d39],
  436. [0x10e60, 0x10e7e],
  437. [0x10e80, 0x10ea9],
  438. [0x10eab, 0x10ead],
  439. [0x10eb0, 0x10eb1],
  440. [0x10f00, 0x10f27],
  441. [0x10f30, 0x10f59],
  442. [0x10fb0, 0x10fcb],
  443. [0x10fe0, 0x10ff6],
  444. [0x11000, 0x1104d],
  445. [0x11052, 0x1106f],
  446. [0x1107f, 0x110c1],
  447. [0x110cd, 0x110cd],
  448. [0x110d0, 0x110e8],
  449. [0x110f0, 0x110f9],
  450. [0x11100, 0x11134],
  451. [0x11136, 0x11147],
  452. [0x11150, 0x11176],
  453. [0x11180, 0x111df],
  454. [0x111e1, 0x111f4],
  455. [0x11200, 0x11211],
  456. [0x11213, 0x1123e],
  457. [0x11280, 0x11286],
  458. [0x11288, 0x11288],
  459. [0x1128a, 0x1128d],
  460. [0x1128f, 0x1129d],
  461. [0x1129f, 0x112a9],
  462. [0x112b0, 0x112ea],
  463. [0x112f0, 0x112f9],
  464. [0x11300, 0x11303],
  465. [0x11305, 0x1130c],
  466. [0x1130f, 0x11310],
  467. [0x11313, 0x11328],
  468. [0x1132a, 0x11330],
  469. [0x11332, 0x11333],
  470. [0x11335, 0x11339],
  471. [0x1133b, 0x11344],
  472. [0x11347, 0x11348],
  473. [0x1134b, 0x1134d],
  474. [0x11350, 0x11350],
  475. [0x11357, 0x11357],
  476. [0x1135d, 0x11363],
  477. [0x11366, 0x1136c],
  478. [0x11370, 0x11374],
  479. [0x11400, 0x1145b],
  480. [0x1145d, 0x11461],
  481. [0x11480, 0x114c7],
  482. [0x114d0, 0x114d9],
  483. [0x11580, 0x115b5],
  484. [0x115b8, 0x115dd],
  485. [0x11600, 0x11644],
  486. [0x11650, 0x11659],
  487. [0x11660, 0x1166c],
  488. [0x11680, 0x116b8],
  489. [0x116c0, 0x116c9],
  490. [0x11700, 0x1171a],
  491. [0x1171d, 0x1172b],
  492. [0x11730, 0x1173f],
  493. [0x11800, 0x1183b],
  494. [0x118a0, 0x118f2],
  495. [0x118ff, 0x11906],
  496. [0x11909, 0x11909],
  497. [0x1190c, 0x11913],
  498. [0x11915, 0x11916],
  499. [0x11918, 0x11935],
  500. [0x11937, 0x11938],
  501. [0x1193b, 0x11946],
  502. [0x11950, 0x11959],
  503. [0x119a0, 0x119a7],
  504. [0x119aa, 0x119d7],
  505. [0x119da, 0x119e4],
  506. [0x11a00, 0x11a47],
  507. [0x11a50, 0x11aa2],
  508. [0x11ac0, 0x11af8],
  509. [0x11c00, 0x11c08],
  510. [0x11c0a, 0x11c36],
  511. [0x11c38, 0x11c45],
  512. [0x11c50, 0x11c6c],
  513. [0x11c70, 0x11c8f],
  514. [0x11c92, 0x11ca7],
  515. [0x11ca9, 0x11cb6],
  516. [0x11d00, 0x11d06],
  517. [0x11d08, 0x11d09],
  518. [0x11d0b, 0x11d36],
  519. [0x11d3a, 0x11d3a],
  520. [0x11d3c, 0x11d3d],
  521. [0x11d3f, 0x11d47],
  522. [0x11d50, 0x11d59],
  523. [0x11d60, 0x11d65],
  524. [0x11d67, 0x11d68],
  525. [0x11d6a, 0x11d8e],
  526. [0x11d90, 0x11d91],
  527. [0x11d93, 0x11d98],
  528. [0x11da0, 0x11da9],
  529. [0x11ee0, 0x11ef8],
  530. [0x11fb0, 0x11fb0],
  531. [0x11fc0, 0x11ff1],
  532. [0x11fff, 0x12399],
  533. [0x12400, 0x1246e],
  534. [0x12470, 0x12474],
  535. [0x12480, 0x12543],
  536. [0x13000, 0x1342e],
  537. [0x13430, 0x13438],
  538. [0x14400, 0x14646],
  539. [0x16800, 0x16a38],
  540. [0x16a40, 0x16a5e],
  541. [0x16a60, 0x16a69],
  542. [0x16a6e, 0x16a6f],
  543. [0x16ad0, 0x16aed],
  544. [0x16af0, 0x16af5],
  545. [0x16b00, 0x16b45],
  546. [0x16b50, 0x16b59],
  547. [0x16b5b, 0x16b61],
  548. [0x16b63, 0x16b77],
  549. [0x16b7d, 0x16b8f],
  550. [0x16e40, 0x16e9a],
  551. [0x16f00, 0x16f4a],
  552. [0x16f4f, 0x16f87],
  553. [0x16f8f, 0x16f9f],
  554. [0x16fe0, 0x16fe4],
  555. [0x16ff0, 0x16ff1],
  556. [0x17000, 0x17000],
  557. [0x187f7, 0x187f7],
  558. [0x18800, 0x18cd5],
  559. [0x18d00, 0x18d00],
  560. [0x18d08, 0x18d08],
  561. [0x1b000, 0x1b11e],
  562. [0x1b150, 0x1b152],
  563. [0x1b164, 0x1b167],
  564. [0x1b170, 0x1b2fb],
  565. [0x1bc00, 0x1bc6a],
  566. [0x1bc70, 0x1bc7c],
  567. [0x1bc80, 0x1bc88],
  568. [0x1bc90, 0x1bc99],
  569. [0x1bc9c, 0x1bca3],
  570. [0x1d000, 0x1d0f5],
  571. [0x1d100, 0x1d126],
  572. [0x1d129, 0x1d1e8],
  573. [0x1d200, 0x1d245],
  574. [0x1d2e0, 0x1d2f3],
  575. [0x1d300, 0x1d356],
  576. [0x1d360, 0x1d378],
  577. [0x1d400, 0x1d454],
  578. [0x1d456, 0x1d49c],
  579. [0x1d49e, 0x1d49f],
  580. [0x1d4a2, 0x1d4a2],
  581. [0x1d4a5, 0x1d4a6],
  582. [0x1d4a9, 0x1d4ac],
  583. [0x1d4ae, 0x1d4b9],
  584. [0x1d4bb, 0x1d4bb],
  585. [0x1d4bd, 0x1d4c3],
  586. [0x1d4c5, 0x1d505],
  587. [0x1d507, 0x1d50a],
  588. [0x1d50d, 0x1d514],
  589. [0x1d516, 0x1d51c],
  590. [0x1d51e, 0x1d539],
  591. [0x1d53b, 0x1d53e],
  592. [0x1d540, 0x1d544],
  593. [0x1d546, 0x1d546],
  594. [0x1d54a, 0x1d550],
  595. [0x1d552, 0x1d6a5],
  596. [0x1d6a8, 0x1d7cb],
  597. [0x1d7ce, 0x1da8b],
  598. [0x1da9b, 0x1da9f],
  599. [0x1daa1, 0x1daaf],
  600. [0x1e000, 0x1e006],
  601. [0x1e008, 0x1e018],
  602. [0x1e01b, 0x1e021],
  603. [0x1e023, 0x1e024],
  604. [0x1e026, 0x1e02a],
  605. [0x1e100, 0x1e12c],
  606. [0x1e130, 0x1e13d],
  607. [0x1e140, 0x1e149],
  608. [0x1e14e, 0x1e14f],
  609. [0x1e2c0, 0x1e2f9],
  610. [0x1e2ff, 0x1e2ff],
  611. [0x1e800, 0x1e8c4],
  612. [0x1e8c7, 0x1e8d6],
  613. [0x1e900, 0x1e94b],
  614. [0x1e950, 0x1e959],
  615. [0x1e95e, 0x1e95f],
  616. [0x1ec71, 0x1ecb4],
  617. [0x1ed01, 0x1ed3d],
  618. [0x1ee00, 0x1ee03],
  619. [0x1ee05, 0x1ee1f],
  620. [0x1ee21, 0x1ee22],
  621. [0x1ee24, 0x1ee24],
  622. [0x1ee27, 0x1ee27],
  623. [0x1ee29, 0x1ee32],
  624. [0x1ee34, 0x1ee37],
  625. [0x1ee39, 0x1ee39],
  626. [0x1ee3b, 0x1ee3b],
  627. [0x1ee42, 0x1ee42],
  628. [0x1ee47, 0x1ee47],
  629. [0x1ee49, 0x1ee49],
  630. [0x1ee4b, 0x1ee4b],
  631. [0x1ee4d, 0x1ee4f],
  632. [0x1ee51, 0x1ee52],
  633. [0x1ee54, 0x1ee54],
  634. [0x1ee57, 0x1ee57],
  635. [0x1ee59, 0x1ee59],
  636. [0x1ee5b, 0x1ee5b],
  637. [0x1ee5d, 0x1ee5d],
  638. [0x1ee5f, 0x1ee5f],
  639. [0x1ee61, 0x1ee62],
  640. [0x1ee64, 0x1ee64],
  641. [0x1ee67, 0x1ee6a],
  642. [0x1ee6c, 0x1ee72],
  643. [0x1ee74, 0x1ee77],
  644. [0x1ee79, 0x1ee7c],
  645. [0x1ee7e, 0x1ee7e],
  646. [0x1ee80, 0x1ee89],
  647. [0x1ee8b, 0x1ee9b],
  648. [0x1eea1, 0x1eea3],
  649. [0x1eea5, 0x1eea9],
  650. [0x1eeab, 0x1eebb],
  651. [0x1eef0, 0x1eef1],
  652. [0x1f000, 0x1f02b],
  653. [0x1f030, 0x1f093],
  654. [0x1f0a0, 0x1f0ae],
  655. [0x1f0b1, 0x1f0bf],
  656. [0x1f0c1, 0x1f0cf],
  657. [0x1f0d1, 0x1f0f5],
  658. [0x1f100, 0x1f1ad],
  659. [0x1f1e6, 0x1f202],
  660. [0x1f210, 0x1f23b],
  661. [0x1f240, 0x1f248],
  662. [0x1f250, 0x1f251],
  663. [0x1f260, 0x1f265],
  664. [0x1f300, 0x1f6d7],
  665. [0x1f6e0, 0x1f6ec],
  666. [0x1f6f0, 0x1f6fc],
  667. [0x1f700, 0x1f773],
  668. [0x1f780, 0x1f7d8],
  669. [0x1f7e0, 0x1f7eb],
  670. [0x1f800, 0x1f80b],
  671. [0x1f810, 0x1f847],
  672. [0x1f850, 0x1f859],
  673. [0x1f860, 0x1f887],
  674. [0x1f890, 0x1f8ad],
  675. [0x1f8b0, 0x1f8b1],
  676. [0x1f900, 0x1f978],
  677. [0x1f97a, 0x1f9cb],
  678. [0x1f9cd, 0x1fa53],
  679. [0x1fa60, 0x1fa6d],
  680. [0x1fa70, 0x1fa74],
  681. [0x1fa78, 0x1fa7a],
  682. [0x1fa80, 0x1fa86],
  683. [0x1fa90, 0x1faa8],
  684. [0x1fab0, 0x1fab6],
  685. [0x1fac0, 0x1fac2],
  686. [0x1fad0, 0x1fad6],
  687. [0x1fb00, 0x1fb92],
  688. [0x1fb94, 0x1fbca],
  689. [0x1fbf0, 0x1fbf9],
  690. [0x20000, 0x20000],
  691. [0x2a6dd, 0x2a6dd],
  692. [0x2a700, 0x2a700],
  693. [0x2b734, 0x2b734],
  694. [0x2b740, 0x2b740],
  695. [0x2b81d, 0x2b81d],
  696. [0x2b820, 0x2b820],
  697. [0x2cea1, 0x2cea1],
  698. [0x2ceb0, 0x2ceb0],
  699. [0x2ebe0, 0x2ebe0],
  700. [0x2f800, 0x2fa1d],
  701. [0x30000, 0x30000],
  702. [0x3134a, 0x3134a],
  703. [0xe0001, 0xe0001],
  704. [0xe0020, 0xe007f],
  705. [0xe0100, 0xe01ef],
  706. [0xf0000, 0xf0000],
  707. [0xffffd, 0xffffd],
  708. [0x100000, 0x100000],
  709. [0x10fffd, 0x10fffd]];
  710. // in UTF-32BE
  711. $validCodepoints = array();
  712. foreach ($validRanges as $range) {
  713. for ($cp = $range[0]; $cp <= $range[1]; $cp++) {
  714. if ($cp < 0xD800 || $cp > 0xDFFF) // surrogates; included in UnicodeData.txt
  715. $validCodepoints[pack('N', $cp)] = true;
  716. }
  717. }
  718. function testValidCodepoints($encoding) {
  719. global $validCodepoints;
  720. $good = array_keys($validCodepoints);
  721. shuffle($good);
  722. while (!empty($good)) {
  723. $string = '';
  724. $length = min(rand(20,30), count($good));
  725. while ($length--) {
  726. $string .= array_pop($good);
  727. }
  728. $converted = mb_convert_encoding($string, $encoding, 'UTF-32BE');
  729. if ($converted === false)
  730. die("mb_convert_encoding failed to convert UTF-32BE to $encoding." .
  731. "\nString: " . bin2hex($string));
  732. testValidString($converted, $string, $encoding, 'UTF-32BE');
  733. }
  734. }
  735. function testInvalidCodepoints($invalid, $encoding) {
  736. global $validCodepoints;
  737. $good = array_keys($validCodepoints);
  738. shuffle($good);
  739. foreach ($invalid as $bad => $expected) {
  740. $good1 = array_pop($good);
  741. $string = $bad . mb_convert_encoding($good1, $encoding, 'UTF-32BE');
  742. testInvalidString($string, $expected . $good1, $encoding, 'UTF-32BE');
  743. }
  744. }
  745. echo "== UTF-8 ==\n";
  746. testValidCodepoints('UTF-8');
  747. testValidString('', '', 'UTF-8', 'UTF-32BE');
  748. $invalid = array(
  749. // Codepoints outside of valid 0-0x10FFFF range for Unicode
  750. "\xF4\x90\x80\x80" => str_repeat("\x00\x00\x00%", 4), // CP 0x110000
  751. "\xF7\x80\x80\x80" => str_repeat("\x00\x00\x00%", 4), // CP 0x1C0000
  752. "\xF7\xBF\xBF\xBF" => str_repeat("\x00\x00\x00%", 4), // CP 0x1FFFFF
  753. // Reserved range for UTF-16 surrogate pairs
  754. "\xED\xA0\x80" => str_repeat("\x00\x00\x00%", 3), // CP 0xD800
  755. "\xED\xAF\xBF" => str_repeat("\x00\x00\x00%", 3), // CP 0xDBFF
  756. "\xED\xBF\xBF" => str_repeat("\x00\x00\x00%", 3), // CP 0xDFFF
  757. // Truncated characters
  758. "\xDF" => "\x00\x00\x00%", // should have been 2-byte
  759. "\xEF\xBF" => "\x00\x00\x00%", // should have been 3-byte
  760. "\xF0\xBF\xBF" => "\x00\x00\x00%", // should have been 4-byte
  761. // Multi-byte characters which end too soon and go to ASCII
  762. "\xDFA" => "\x00\x00\x00%\x00\x00\x00A",
  763. "\xEF\xBFA" => "\x00\x00\x00%\x00\x00\x00A",
  764. "\xF0\xBFA" => "\x00\x00\x00%\x00\x00\x00A",
  765. "\xF0\xBF\xBFA" => "\x00\x00\x00%\x00\x00\x00A",
  766. // Multi-byte characters which end too soon and go to another MB char
  767. "\xDF\xDF\xBF" => "\x00\x00\x00%\x00\x00\x07\xFF",
  768. "\xEF\xBF\xDF\xBF" => "\x00\x00\x00%\x00\x00\x07\xFF",
  769. "\xF0\xBF\xBF\xDF\xBF" => "\x00\x00\x00%\x00\x00\x07\xFF",
  770. // Multi-byte characters which end too soon and go to a junk byte
  771. // (Which isn't even valid to start a new character)
  772. "\xF0\xBF\xBF\xFF" => str_repeat("\x00\x00\x00%", 2),
  773. "\xF0\xBF\xFF" => str_repeat("\x00\x00\x00%", 2),
  774. // Continuation bytes which appear outside of a MB char
  775. "\x80" => "\x00\x00\x00%",
  776. "A\x80" => "\x00\x00\x00A\x00\x00\x00%",
  777. "\xDF\xBF\x80" => "\x00\x00\x07\xFF\x00\x00\x00%",
  778. // Overlong code units
  779. // (Using more bytes than needed to encode a character)
  780. "\xC1\xBF" => str_repeat("\x00\x00\x00%", 2), // didn't need 2 bytes
  781. "\xE0\x9F\xBF" => str_repeat("\x00\x00\x00%", 3), // didn't need 3 bytes
  782. "\xF0\x8F\xBF\xBF" => str_repeat("\x00\x00\x00%", 4) // didn't need 4 bytes
  783. );
  784. testInvalidCodepoints($invalid, 'UTF-8');
  785. echo "== UTF-16 ==\n";
  786. testValidCodepoints("UTF-16");
  787. testValidCodepoints("UTF-16LE");
  788. testValidCodepoints("UTF-16BE");
  789. testValidString('', '', 'UTF-16', 'UTF-32BE');
  790. testValidString('', '', 'UTF-16LE', 'UTF-32BE');
  791. testValidString('', '', 'UTF-16BE', 'UTF-32BE');
  792. $invalid = array(
  793. // UTF-16 _cannot_ represent codepoints bigger than 0x10FFFF, so we're not
  794. // worried about that. But there are plenty of other ways to mess up...
  795. // Second half of surrogate pair comes first
  796. "\xDC\x01\xD8\x02" => "\x00\x00\x00%\x00\x00\x00%",
  797. // First half of surrogate pair not followed by second part
  798. "\xD8\x01\x00A" => "\x00\x00\x00%\x00\x00\x00A",
  799. // First half of surrogate pair at end of string
  800. "\xD8\x01" => "\x00\x00\x00%",
  801. );
  802. testInvalidCodepoints($invalid, 'UTF-16');
  803. testInvalidCodepoints($invalid, 'UTF-16BE');
  804. // Truncated strings
  805. testInvalidString("\x00", "\x00\x00\x00%", 'UTF-16', 'UTF-32BE');
  806. testInvalidString("\x00A\x01", "\x00\x00\x00A\x00\x00\x00%", 'UTF-16', 'UTF-32BE');
  807. testInvalidString("\x00", "\x00\x00\x00%", 'UTF-16BE', 'UTF-32BE');
  808. testInvalidString("\x00A\x01", "\x00\x00\x00A\x00\x00\x00%", 'UTF-16BE', 'UTF-32BE');
  809. $invalid = array(
  810. // Second half of surrogate pair comes first
  811. "\x01\xDC\x02\xD8" => "\x00\x00\x00%\x00\x00\x00%",
  812. // First half of surrogate pair not followed by second part
  813. "\x01\xD8A\x00" => "\x00\x00\x00%\x00\x00\x00A",
  814. // First half of surrogate pair at end of string
  815. "\x01\xD8" => "\x00\x00\x00%",
  816. // Two successive codepoints which are both the 1st part of a surrogate pair
  817. "\x01\xD8\x02\xD8" => "\x00\x00\x00%\x00\x00\x00%"
  818. );
  819. testInvalidCodepoints($invalid, 'UTF-16LE');
  820. // Truncated
  821. testInvalidString("\x00", "\x00\x00\x00%", 'UTF-16LE', 'UTF-32BE');
  822. testInvalidString("A\x00\x01", "\x00\x00\x00A\x00\x00\x00%", 'UTF-16LE', 'UTF-32BE');
  823. // TODO: test handling of UTF-16 BOM
  824. echo "== UTF-32 ==\n";
  825. testValidCodepoints("UTF-32LE");
  826. testValidCodepoints("UTF-32BE");
  827. // Empty string
  828. testValidString('', '', 'UTF-32', 'UTF-32BE');
  829. testValidString('', '', 'UTF-32BE', 'UTF-32');
  830. testValidString('', '', 'UTF-32LE', 'UTF-32BE');
  831. $invalid = array(
  832. // Codepoints which are too big
  833. "\x00\x11\x00\x00" => "\x00\x00\x00%",
  834. "\x80\x00\x00\x00" => "\x00\x00\x00%",
  835. "\xff\xff\xfe\xff" => "\x00\x00\x00%",
  836. // Surrogates
  837. "\x00\x00\xd8\x00" => "\x00\x00\x00%",
  838. "\x00\x00\xdb\xff" => "\x00\x00\x00%",
  839. "\x00\x00\xdc\x00" => "\x00\x00\x00%",
  840. "\x00\x00\xdf\xff" => "\x00\x00\x00%",
  841. );
  842. testInvalidCodepoints($invalid, 'UTF-32');
  843. testInvalidCodepoints($invalid, 'UTF-32BE');
  844. // Truncated code units
  845. testInvalidString("\x00\x01\x01", "\x00\x00\x00%", 'UTF-32', 'UTF-32BE');
  846. testInvalidString("\x00\x01", "\x00\x00\x00%", 'UTF-32', 'UTF-32BE');
  847. testInvalidString("\x00", "\x00\x00\x00%", 'UTF-32', 'UTF-32BE');
  848. testInvalidString("\x00", "\x00\x00\x00%", 'UTF-32BE', 'UTF-32');
  849. testInvalidString("\x00", "\x00\x00\x00%", 'UTF-32BE', 'UTF-32');
  850. testInvalidString("\x00", "\x00\x00\x00%", 'UTF-32BE', 'UTF-32');
  851. $invalid = array(
  852. // Codepoints which are too big
  853. "\x00\x00\x11\x00" => "\x00\x00\x00%",
  854. "\x00\x00\x00\x80" => "\x00\x00\x00%",
  855. "\xff\xfe\xff\xff" => "\x00\x00\x00%",
  856. // Surrogates
  857. "\x00\xd8\x00\x00" => "\x00\x00\x00%",
  858. "\xff\xdb\x00\x00" => "\x00\x00\x00%",
  859. "\x00\xdc\x00\x00" => "\x00\x00\x00%",
  860. "\xff\xdf\x00\x00" => "\x00\x00\x00%",
  861. );
  862. testInvalidCodepoints($invalid, 'UTF-32LE');
  863. // Truncated code units
  864. testInvalidString("\x00\x01\x01", "\x00\x00\x00%", 'UTF-32LE', 'UTF-32BE');
  865. testInvalidString("\x00\x01", "\x00\x00\x00%", 'UTF-32LE', 'UTF-32BE');
  866. testInvalidString("\x00", "\x00\x00\x00%", 'UTF-32LE', 'UTF-32BE');
  867. // TODO: test handling of UTF-32 BOM
  868. echo "== UTF-7 ==\n";
  869. testValidString('', '', 'UTF-7', 'UTF-32BE');
  870. // 'Direct' characters
  871. foreach (range(ord('A'), ord('Z')) as $byte)
  872. testValidString(chr($byte), "\x00\x00\x00" . chr($byte), 'UTF-7', 'UTF-32BE');
  873. foreach (range(ord('a'), ord('z')) as $byte)
  874. testValidString(chr($byte), "\x00\x00\x00" . chr($byte), 'UTF-7', 'UTF-32BE');
  875. foreach (range(ord('0'), ord('9')) as $byte)
  876. testValidString(chr($byte), "\x00\x00\x00" . chr($byte), 'UTF-7', 'UTF-32BE');
  877. foreach (str_split("'(),-./:?") as $char)
  878. testValidString($char, "\x00\x00\x00" . $char, 'UTF-7', 'UTF-32BE');
  879. // 'Optional direct' characters are Base64-encoded in mbstring's implementation
  880. // Whitespace
  881. foreach (str_split(" \t\r\n\x00") as $char)
  882. testValidString($char, "\x00\x00\x00" . $char, 'UTF-7', 'UTF-32BE');
  883. // Encoding + as +-
  884. testValidString('+-', "\x00\x00\x00+", 'UTF-7', 'UTF-32BE', false);
  885. // UTF-16 + Base64 encoding
  886. function encode($str, $encoding) {
  887. // Base64 encoding for UTF-7 doesn't use '=' for padding
  888. return str_replace('=', '', base64_encode(mb_convert_encoding($str, 'UTF-16BE', $encoding)));
  889. }
  890. for ($i = 0; $i < 256; $i++) {
  891. $reversible = true;
  892. if ($i >= ord('A') && $i <= ord('Z'))
  893. $reversible = false;
  894. if ($i >= ord('a') && $i <= ord('z'))
  895. $reversible = false;
  896. if ($i >= ord('0') && $i <= ord('9'))
  897. $reversible = false;
  898. if (strpos("'(),-./:?\x00 \t\r\n", chr($i)) !== false)
  899. $reversible = false;
  900. testValidString('+' . encode("\x00" . chr($i), 'UTF-16BE') . '-', "\x00\x00\x00" . chr($i), 'UTF-7', 'UTF-32BE', $reversible);
  901. }
  902. testValidString('+' . encode("\x12\x34", 'UTF-16BE') . '-', "\x00\x00\x12\x34", 'UTF-7', 'UTF-32BE');
  903. testValidString('+' . encode("\x12\x34\x56\x78", 'UTF-16BE') . '-', "\x00\x00\x12\x34\x00\x00\x56\x78", 'UTF-7', 'UTF-32BE');
  904. testValidString('+' . encode("\x12\x34\x56\x78\x00\x40", 'UTF-16BE') . '-', "\x00\x00\x12\x34\x00\x00\x56\x78\x00\x00\x00\x40", 'UTF-7', 'UTF-32BE');
  905. // Surrogate pair
  906. testValidString('+' . encode("\x00\x01\x04\x00", 'UTF-32BE') . '-', "\x00\x01\x04\x00", 'UTF-7', 'UTF-32BE');
  907. testValidString('+' . encode("\x00\x00\x00A\x00\x01\x04\x00\x00\x00\x00B", 'UTF-32BE') . '-', "\x00\x00\x00A\x00\x01\x04\x00\x00\x00\x00B", 'UTF-7', 'UTF-32BE', false);
  908. testValidString('+' . encode("\x00\x01\x04\x00\x00\x01\x04\x00", 'UTF-32BE') . '-', "\x00\x01\x04\x00\x00\x01\x04\x00", 'UTF-7', 'UTF-32BE');
  909. // Unterminated + section
  910. // (This is not considered illegal)
  911. testValidString('+' . encode('ABC', 'ASCII'), "\x00A\x00B\x00C", 'UTF-7', 'UTF-16BE', false);
  912. // + sections immediately after each other
  913. // (This isn't illegal either)
  914. testValidString('+' . encode('AB', 'ASCII') . '-+' . encode('CD', 'ASCII') . '-', "\x00A\x00B\x00C\x00D", 'UTF-7', 'UTF-16BE', false);
  915. // + sections not immediately after each other
  916. // (Just trying to be exhaustive here)
  917. testValidString('+' . encode('AB', 'ASCII') . '-!+' . encode('CD', 'ASCII') . '-', "\x00A\x00B\x00!\x00C\x00D", 'UTF-7', 'UTF-16BE', false);
  918. // + section terminated by a non-Base64 ASCII character which is NOT -
  919. for ($i = 0; $i < 128; $i++) {
  920. if ($i >= ord('A') && $i <= ord('Z'))
  921. continue;
  922. if ($i >= ord('a') && $i <= ord('z'))
  923. continue;
  924. if ($i >= ord('0') && $i <= ord('9'))
  925. continue;
  926. if ($i == ord('+') || $i == ord('/') || $i == ord('-') || $i == ord('\\') || $i == ord('~'))
  927. continue;
  928. $char = chr($i);
  929. testValidString('+' . encode("\x12\x34", 'UTF-16BE') . $char, "\x00\x00\x12\x34\x00\x00\x00" . $char, 'UTF-7', 'UTF-32BE', false);
  930. }
  931. // Now let's see how UTF-7 can go BAD...
  932. function rawEncode($str) {
  933. return str_replace('=', '', base64_encode($str));
  934. }
  935. // First, messed up UTF16 in + section
  936. // Second half of surrogate pair coming first
  937. testInvalidString('+' . rawEncode("\xDC\x01\xD8\x02") . '-', "\x00\x00\x00%\x00\x00\x00%", 'UTF-7', 'UTF-32BE');
  938. // First half of surrogate pair not followed by second half
  939. testInvalidString('+' . rawEncode("\xD8\x01\x00A") . '-', "\x00\x00\x00%\x00\x00\x00A", 'UTF-7', 'UTF-32BE');
  940. testInvalidString('+' . rawEncode("\xD8\x01\xD9\x02") . '-', "\x00\x00\x00%\x00\x00\x00%", 'UTF-7', 'UTF-32BE');
  941. // First half of surrogate pair appearing at end of string
  942. testInvalidString('+' . rawEncode("\xD8\x01") . '-', "\x00\x00\x00%", 'UTF-7', 'UTF-32BE');
  943. // Truncated string
  944. testInvalidString('+' . rawEncode("\x01") . '-', "\x00\x00\x00%", 'UTF-7', 'UTF-32BE');
  945. // And then, messed up Base64 encoding
  946. // Bad padding on + section (not zeroes)
  947. $encoded = encode("\x12\x34", 'UTF-16BE'); // 3 Base64 bytes, 2 bits of padding...
  948. $corrupted = substr($encoded, 0, 2) . chr(ord($encoded[2]) + 1);
  949. testInvalidString('+' . $corrupted . '-', "\x00\x00\x12\x34\x00\x00\x00%", 'UTF-7', 'UTF-32BE');
  950. // Characters which are not Base64 (and not even ASCII) appearing in Base64 section
  951. testInvalidString("+\x80", "\x00\x00\x00%", 'UTF-7', 'UTF-32BE');
  952. echo "Done!\n";
  953. ?>
  954. --EXPECT--
  955. == UTF-8 ==
  956. == UTF-16 ==
  957. == UTF-32 ==
  958. == UTF-7 ==
  959. Done!