bug32001.phpt 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408
  1. --TEST--
  2. Bug #32001 (xml_parse*() goes into infinite loop when autodetection in effect), using UTF-*
  3. --EXTENSIONS--
  4. iconv
  5. xml
  6. --SKIPIF--
  7. <?php
  8. if (ICONV_IMPL == 'glibc' && version_compare(ICONV_VERSION, '2.12', '<='))
  9. die("skip iconv of glibc <= 2.12 is buggy");
  10. ?>
  11. --FILE--
  12. <?php
  13. class testcase {
  14. private $encoding;
  15. private $bom;
  16. private $prologue;
  17. private $tags;
  18. private $chunk_size;
  19. function __construct($enc, $chunk_size = 0, $bom = 0, $omit_prologue = 0) {
  20. $this->encoding = $enc;
  21. $this->chunk_size = $chunk_size;
  22. $this->bom = $bom;
  23. $this->prologue = !$omit_prologue;
  24. $this->tags = array();
  25. }
  26. function start_element($parser, $name, $attrs) {
  27. $attrs = array_map('bin2hex', $attrs);
  28. $this->tags[] = bin2hex($name).": ".implode(', ', $attrs);
  29. }
  30. function end_element($parser, $name) {
  31. }
  32. function run() {
  33. $data = '';
  34. if ($this->prologue) {
  35. $canonical_name = preg_replace('/BE|LE/i', '', $this->encoding);
  36. $data .= "<?xml version=\"1.0\" encoding=\"$canonical_name\" ?>\n";
  37. }
  38. $data .= <<<HERE
  39. <テスト:テスト1 xmlns:テスト="http://www.example.com/テスト/" テスト="テスト">
  40. <テスト:テスト2 テスト="テスト">
  41. <テスト:テスト3>
  42. test!
  43. </テスト:テスト3>
  44. </テスト:テスト2>
  45. </テスト:テスト1>
  46. HERE;
  47. $data = iconv("UTF-8", $this->encoding, $data);
  48. if ($this->bom) {
  49. switch (strtoupper($this->encoding)) {
  50. case 'UTF-8':
  51. case 'UTF8':
  52. $data = "\xef\xbb\xbf".$data;
  53. break;
  54. case 'UTF-16':
  55. case 'UTF16':
  56. case 'UTF-16BE':
  57. case 'UTF16BE':
  58. case 'UCS-2':
  59. case 'UCS2':
  60. case 'UCS-2BE':
  61. case 'UCS2BE':
  62. $data = "\xfe\xff".$data;
  63. break;
  64. case 'UTF-16LE':
  65. case 'UTF16LE':
  66. case 'UCS-2LE':
  67. case 'UCS2LE':
  68. $data = "\xff\xfe".$data;
  69. break;
  70. case 'UTF-32':
  71. case 'UTF32':
  72. case 'UTF-32BE':
  73. case 'UTF32BE':
  74. case 'UCS-4':
  75. case 'UCS4':
  76. case 'UCS-4BE':
  77. case 'UCS4BE':
  78. $data = "\x00\x00\xfe\xff".$data;
  79. break;
  80. case 'UTF-32LE':
  81. case 'UTF32LE':
  82. case 'UCS-4LE':
  83. case 'UCS4LE':
  84. $data = "\xff\xfe\x00\x00".$data;
  85. break;
  86. }
  87. }
  88. $parser = xml_parser_create(NULL);
  89. xml_parser_set_option($parser, XML_OPTION_CASE_FOLDING, 0);
  90. xml_set_element_handler($parser, "start_element", "end_element");
  91. xml_set_object($parser, $this);
  92. if ($this->chunk_size == 0) {
  93. $success = @xml_parse($parser, $data, true);
  94. } else {
  95. for ($offset = 0; $offset < strlen($data);
  96. $offset += $this->chunk_size) {
  97. $success = @xml_parse($parser, substr($data, $offset, $this->chunk_size), false);
  98. if (!$success) {
  99. break;
  100. }
  101. }
  102. if ($success) {
  103. $success = @xml_parse($parser, "", true);
  104. }
  105. }
  106. echo "Encoding: $this->encoding\n";
  107. echo "XML Prologue: ".($this->prologue ? 'present': 'not present'), "\n";
  108. echo "Chunk size: ".($this->chunk_size ? "$this->chunk_size byte(s)\n": "all data at once\n");
  109. echo "BOM: ".($this->bom ? 'prepended': 'not prepended'), "\n";
  110. if ($success) {
  111. var_dump($this->tags);
  112. } else {
  113. echo "[Error] ", xml_error_string(xml_get_error_code($parser)), "\n";
  114. }
  115. }
  116. }
  117. $suite = array(
  118. new testcase("UTF-8", 0, 0, 0),
  119. new testcase("UTF-8", 0, 0, 1),
  120. new testcase("UTF-8", 0, 1, 0),
  121. new testcase("UTF-8", 0, 1, 1),
  122. new testcase("UTF-16BE", 0, 0, 0),
  123. new testcase("UTF-16BE", 0, 1, 0),
  124. new testcase("UTF-16BE", 0, 1, 1),
  125. new testcase("UTF-16LE", 0, 0, 0),
  126. new testcase("UTF-16LE", 0, 1, 0),
  127. new testcase("UTF-16LE", 0, 1, 1),
  128. new testcase("UTF-8", 1, 0, 0),
  129. new testcase("UTF-8", 1, 0, 1),
  130. new testcase("UTF-8", 1, 1, 0),
  131. new testcase("UTF-8", 1, 1, 1),
  132. new testcase("UTF-16BE", 1, 0, 0),
  133. new testcase("UTF-16BE", 1, 1, 0),
  134. new testcase("UTF-16BE", 1, 1, 1),
  135. new testcase("UTF-16LE", 1, 0, 0),
  136. new testcase("UTF-16LE", 1, 1, 0),
  137. new testcase("UTF-16LE", 1, 1, 1),
  138. );
  139. if (XML_SAX_IMPL == 'libxml') {
  140. echo "libxml2 Version => " . LIBXML_DOTTED_VERSION. "\n";
  141. } else {
  142. echo "libxml2 Version => NONE\n";
  143. }
  144. foreach ($suite as $testcase) {
  145. $testcase->run();
  146. }
  147. ?>
  148. --EXPECTF--
  149. libxml2 Version => %s
  150. Encoding: UTF-8
  151. XML Prologue: present
  152. Chunk size: all data at once
  153. BOM: not prepended
  154. array(3) {
  155. [0]=>
  156. string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388"
  157. [1]=>
  158. string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388"
  159. [2]=>
  160. string(42) "e38386e382b9e383883ae38386e382b9e3838833: "
  161. }
  162. Encoding: UTF-8
  163. XML Prologue: not present
  164. Chunk size: all data at once
  165. BOM: not prepended
  166. array(3) {
  167. [0]=>
  168. string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388"
  169. [1]=>
  170. string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388"
  171. [2]=>
  172. string(42) "e38386e382b9e383883ae38386e382b9e3838833: "
  173. }
  174. Encoding: UTF-8
  175. XML Prologue: present
  176. Chunk size: all data at once
  177. BOM: prepended
  178. array(3) {
  179. [0]=>
  180. string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388"
  181. [1]=>
  182. string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388"
  183. [2]=>
  184. string(42) "e38386e382b9e383883ae38386e382b9e3838833: "
  185. }
  186. Encoding: UTF-8
  187. XML Prologue: not present
  188. Chunk size: all data at once
  189. BOM: prepended
  190. array(3) {
  191. [0]=>
  192. string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388"
  193. [1]=>
  194. string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388"
  195. [2]=>
  196. string(42) "e38386e382b9e383883ae38386e382b9e3838833: "
  197. }
  198. Encoding: UTF-16BE
  199. XML Prologue: present
  200. Chunk size: all data at once
  201. BOM: not prepended
  202. array(3) {
  203. [0]=>
  204. string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388"
  205. [1]=>
  206. string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388"
  207. [2]=>
  208. string(42) "e38386e382b9e383883ae38386e382b9e3838833: "
  209. }
  210. Encoding: UTF-16BE
  211. XML Prologue: present
  212. Chunk size: all data at once
  213. BOM: prepended
  214. array(3) {
  215. [0]=>
  216. string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388"
  217. [1]=>
  218. string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388"
  219. [2]=>
  220. string(42) "e38386e382b9e383883ae38386e382b9e3838833: "
  221. }
  222. Encoding: UTF-16BE
  223. XML Prologue: not present
  224. Chunk size: all data at once
  225. BOM: prepended
  226. array(3) {
  227. [0]=>
  228. string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388"
  229. [1]=>
  230. string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388"
  231. [2]=>
  232. string(42) "e38386e382b9e383883ae38386e382b9e3838833: "
  233. }
  234. Encoding: UTF-16LE
  235. XML Prologue: present
  236. Chunk size: all data at once
  237. BOM: not prepended
  238. array(3) {
  239. [0]=>
  240. string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388"
  241. [1]=>
  242. string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388"
  243. [2]=>
  244. string(42) "e38386e382b9e383883ae38386e382b9e3838833: "
  245. }
  246. Encoding: UTF-16LE
  247. XML Prologue: present
  248. Chunk size: all data at once
  249. BOM: prepended
  250. array(3) {
  251. [0]=>
  252. string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388"
  253. [1]=>
  254. string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388"
  255. [2]=>
  256. string(42) "e38386e382b9e383883ae38386e382b9e3838833: "
  257. }
  258. Encoding: UTF-16LE
  259. XML Prologue: not present
  260. Chunk size: all data at once
  261. BOM: prepended
  262. array(3) {
  263. [0]=>
  264. string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388"
  265. [1]=>
  266. string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388"
  267. [2]=>
  268. string(42) "e38386e382b9e383883ae38386e382b9e3838833: "
  269. }
  270. Encoding: UTF-8
  271. XML Prologue: present
  272. Chunk size: 1 byte(s)
  273. BOM: not prepended
  274. array(3) {
  275. [0]=>
  276. string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388"
  277. [1]=>
  278. string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388"
  279. [2]=>
  280. string(42) "e38386e382b9e383883ae38386e382b9e3838833: "
  281. }
  282. Encoding: UTF-8
  283. XML Prologue: not present
  284. Chunk size: 1 byte(s)
  285. BOM: not prepended
  286. array(3) {
  287. [0]=>
  288. string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388"
  289. [1]=>
  290. string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388"
  291. [2]=>
  292. string(42) "e38386e382b9e383883ae38386e382b9e3838833: "
  293. }
  294. Encoding: UTF-8
  295. XML Prologue: present
  296. Chunk size: 1 byte(s)
  297. BOM: prepended
  298. array(3) {
  299. [0]=>
  300. string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388"
  301. [1]=>
  302. string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388"
  303. [2]=>
  304. string(42) "e38386e382b9e383883ae38386e382b9e3838833: "
  305. }
  306. Encoding: UTF-8
  307. XML Prologue: not present
  308. Chunk size: 1 byte(s)
  309. BOM: prepended
  310. array(3) {
  311. [0]=>
  312. string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388"
  313. [1]=>
  314. string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388"
  315. [2]=>
  316. string(42) "e38386e382b9e383883ae38386e382b9e3838833: "
  317. }
  318. Encoding: UTF-16BE
  319. XML Prologue: present
  320. Chunk size: 1 byte(s)
  321. BOM: not prepended
  322. array(3) {
  323. [0]=>
  324. string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388"
  325. [1]=>
  326. string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388"
  327. [2]=>
  328. string(42) "e38386e382b9e383883ae38386e382b9e3838833: "
  329. }
  330. Encoding: UTF-16BE
  331. XML Prologue: present
  332. Chunk size: 1 byte(s)
  333. BOM: prepended
  334. array(3) {
  335. [0]=>
  336. string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388"
  337. [1]=>
  338. string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388"
  339. [2]=>
  340. string(42) "e38386e382b9e383883ae38386e382b9e3838833: "
  341. }
  342. Encoding: UTF-16BE
  343. XML Prologue: not present
  344. Chunk size: 1 byte(s)
  345. BOM: prepended
  346. array(3) {
  347. [0]=>
  348. string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388"
  349. [1]=>
  350. string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388"
  351. [2]=>
  352. string(42) "e38386e382b9e383883ae38386e382b9e3838833: "
  353. }
  354. Encoding: UTF-16LE
  355. XML Prologue: present
  356. Chunk size: 1 byte(s)
  357. BOM: not prepended
  358. array(3) {
  359. [0]=>
  360. string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388"
  361. [1]=>
  362. string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388"
  363. [2]=>
  364. string(42) "e38386e382b9e383883ae38386e382b9e3838833: "
  365. }
  366. Encoding: UTF-16LE
  367. XML Prologue: present
  368. Chunk size: 1 byte(s)
  369. BOM: prepended
  370. array(3) {
  371. [0]=>
  372. string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388"
  373. [1]=>
  374. string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388"
  375. [2]=>
  376. string(42) "e38386e382b9e383883ae38386e382b9e3838833: "
  377. }
  378. Encoding: UTF-16LE
  379. XML Prologue: not present
  380. Chunk size: 1 byte(s)
  381. BOM: prepended
  382. array(3) {
  383. [0]=>
  384. string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388"
  385. [1]=>
  386. string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388"
  387. [2]=>
  388. string(42) "e38386e382b9e383883ae38386e382b9e3838833: "
  389. }