pcre2_printint.c 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836
  1. /*************************************************
  2. * Perl-Compatible Regular Expressions *
  3. *************************************************/
  4. /* PCRE is a library of functions to support regular expressions whose syntax
  5. and semantics are as close as possible to those of the Perl 5 language.
  6. Written by Philip Hazel
  7. Original API code Copyright (c) 1997-2012 University of Cambridge
  8. New API code Copyright (c) 2016-2019 University of Cambridge
  9. -----------------------------------------------------------------------------
  10. Redistribution and use in source and binary forms, with or without
  11. modification, are permitted provided that the following conditions are met:
  12. * Redistributions of source code must retain the above copyright notice,
  13. this list of conditions and the following disclaimer.
  14. * Redistributions in binary form must reproduce the above copyright
  15. notice, this list of conditions and the following disclaimer in the
  16. documentation and/or other materials provided with the distribution.
  17. * Neither the name of the University of Cambridge nor the names of its
  18. contributors may be used to endorse or promote products derived from
  19. this software without specific prior written permission.
  20. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  21. AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  22. IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  23. ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  24. LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  25. CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  26. SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  27. INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  28. CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  29. ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  30. POSSIBILITY OF SUCH DAMAGE.
  31. -----------------------------------------------------------------------------
  32. */
  33. /* This module contains a PCRE private debugging function for printing out the
  34. internal form of a compiled regular expression, along with some supporting
  35. local functions. This source file is #included in pcre2test.c at each supported
  36. code unit width, with PCRE2_SUFFIX set appropriately, just like the functions
  37. that comprise the library. It can also optionally be included in
  38. pcre2_compile.c for detailed debugging in error situations. */
  39. /* Tables of operator names. The same 8-bit table is used for all code unit
  40. widths, so it must be defined only once. The list itself is defined in
  41. pcre2_internal.h, which is #included by pcre2test before this file. */
  42. #ifndef OP_LISTS_DEFINED
  43. static const char *OP_names[] = { OP_NAME_LIST };
  44. #define OP_LISTS_DEFINED
  45. #endif
  46. /* The functions and tables herein must all have mode-dependent names. */
  47. #define OP_lengths PCRE2_SUFFIX(OP_lengths_)
  48. #define get_ucpname PCRE2_SUFFIX(get_ucpname_)
  49. #define pcre2_printint PCRE2_SUFFIX(pcre2_printint_)
  50. #define print_char PCRE2_SUFFIX(print_char_)
  51. #define print_custring PCRE2_SUFFIX(print_custring_)
  52. #define print_custring_bylen PCRE2_SUFFIX(print_custring_bylen_)
  53. #define print_prop PCRE2_SUFFIX(print_prop_)
  54. /* Table of sizes for the fixed-length opcodes. It's defined in a macro so that
  55. the definition is next to the definition of the opcodes in pcre2_internal.h.
  56. The contents of the table are, however, mode-dependent. */
  57. static const uint8_t OP_lengths[] = { OP_LENGTHS };
  58. /*************************************************
  59. * Print one character from a string *
  60. *************************************************/
  61. /* In UTF mode the character may occupy more than one code unit.
  62. Arguments:
  63. f file to write to
  64. ptr pointer to first code unit of the character
  65. utf TRUE if string is UTF (will be FALSE if UTF is not supported)
  66. Returns: number of additional code units used
  67. */
  68. static unsigned int
  69. print_char(FILE *f, PCRE2_SPTR ptr, BOOL utf)
  70. {
  71. uint32_t c = *ptr;
  72. BOOL one_code_unit = !utf;
  73. /* If UTF is supported and requested, check for a valid single code unit. */
  74. #ifdef SUPPORT_UNICODE
  75. if (utf)
  76. {
  77. #if PCRE2_CODE_UNIT_WIDTH == 8
  78. one_code_unit = c < 0x80;
  79. #elif PCRE2_CODE_UNIT_WIDTH == 16
  80. one_code_unit = (c & 0xfc00) != 0xd800;
  81. #else
  82. one_code_unit = (c & 0xfffff800u) != 0xd800u;
  83. #endif /* CODE_UNIT_WIDTH */
  84. }
  85. #endif /* SUPPORT_UNICODE */
  86. /* Handle a valid one-code-unit character at any width. */
  87. if (one_code_unit)
  88. {
  89. if (PRINTABLE(c)) fprintf(f, "%c", (char)c);
  90. else if (c < 0x80) fprintf(f, "\\x%02x", c);
  91. else fprintf(f, "\\x{%02x}", c);
  92. return 0;
  93. }
  94. /* Code for invalid UTF code units and multi-unit UTF characters is different
  95. for each width. If UTF is not supported, control should never get here, but we
  96. need a return statement to keep the compiler happy. */
  97. #ifndef SUPPORT_UNICODE
  98. return 0;
  99. #else
  100. /* Malformed UTF-8 should occur only if the sanity check has been turned off.
  101. Rather than swallow random bytes, just stop if we hit a bad one. Print it with
  102. \X instead of \x as an indication. */
  103. #if PCRE2_CODE_UNIT_WIDTH == 8
  104. if ((c & 0xc0) != 0xc0)
  105. {
  106. fprintf(f, "\\X{%x}", c); /* Invalid starting byte */
  107. return 0;
  108. }
  109. else
  110. {
  111. int i;
  112. int a = PRIV(utf8_table4)[c & 0x3f]; /* Number of additional bytes */
  113. int s = 6*a;
  114. c = (c & PRIV(utf8_table3)[a]) << s;
  115. for (i = 1; i <= a; i++)
  116. {
  117. if ((ptr[i] & 0xc0) != 0x80)
  118. {
  119. fprintf(f, "\\X{%x}", c); /* Invalid secondary byte */
  120. return i - 1;
  121. }
  122. s -= 6;
  123. c |= (ptr[i] & 0x3f) << s;
  124. }
  125. fprintf(f, "\\x{%x}", c);
  126. return a;
  127. }
  128. #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
  129. /* UTF-16: rather than swallow a low surrogate, just stop if we hit a bad one.
  130. Print it with \X instead of \x as an indication. */
  131. #if PCRE2_CODE_UNIT_WIDTH == 16
  132. if ((ptr[1] & 0xfc00) != 0xdc00)
  133. {
  134. fprintf(f, "\\X{%x}", c);
  135. return 0;
  136. }
  137. c = (((c & 0x3ff) << 10) | (ptr[1] & 0x3ff)) + 0x10000;
  138. fprintf(f, "\\x{%x}", c);
  139. return 1;
  140. #endif /* PCRE2_CODE_UNIT_WIDTH == 16 */
  141. /* For UTF-32 we get here only for a malformed code unit, which should only
  142. occur if the sanity check has been turned off. Print it with \X instead of \x
  143. as an indication. */
  144. #if PCRE2_CODE_UNIT_WIDTH == 32
  145. fprintf(f, "\\X{%x}", c);
  146. return 0;
  147. #endif /* PCRE2_CODE_UNIT_WIDTH == 32 */
  148. #endif /* SUPPORT_UNICODE */
  149. }
  150. /*************************************************
  151. * Print string as a list of code units *
  152. *************************************************/
  153. /* These take no account of UTF as they always print each individual code unit.
  154. The string is zero-terminated for print_custring(); the length is given for
  155. print_custring_bylen().
  156. Arguments:
  157. f file to write to
  158. ptr point to the string
  159. len length for print_custring_bylen()
  160. Returns: nothing
  161. */
  162. static void
  163. print_custring(FILE *f, PCRE2_SPTR ptr)
  164. {
  165. while (*ptr != '\0')
  166. {
  167. uint32_t c = *ptr++;
  168. if (PRINTABLE(c)) fprintf(f, "%c", c); else fprintf(f, "\\x{%x}", c);
  169. }
  170. }
  171. static void
  172. print_custring_bylen(FILE *f, PCRE2_SPTR ptr, PCRE2_UCHAR len)
  173. {
  174. for (; len > 0; len--)
  175. {
  176. uint32_t c = *ptr++;
  177. if (PRINTABLE(c)) fprintf(f, "%c", c); else fprintf(f, "\\x{%x}", c);
  178. }
  179. }
  180. /*************************************************
  181. * Find Unicode property name *
  182. *************************************************/
  183. /* When there is no UTF/UCP support, the table of names does not exist. This
  184. function should not be called in such configurations, because a pattern that
  185. tries to use Unicode properties won't compile. Rather than put lots of #ifdefs
  186. into the main code, however, we just put one into this function. */
  187. static const char *
  188. get_ucpname(unsigned int ptype, unsigned int pvalue)
  189. {
  190. #ifdef SUPPORT_UNICODE
  191. int i;
  192. for (i = PRIV(utt_size) - 1; i >= 0; i--)
  193. {
  194. if (ptype == PRIV(utt)[i].type && pvalue == PRIV(utt)[i].value) break;
  195. }
  196. return (i >= 0)? PRIV(utt_names) + PRIV(utt)[i].name_offset : "??";
  197. #else /* No UTF support */
  198. (void)ptype;
  199. (void)pvalue;
  200. return "??";
  201. #endif /* SUPPORT_UNICODE */
  202. }
  203. /*************************************************
  204. * Print Unicode property value *
  205. *************************************************/
  206. /* "Normal" properties can be printed from tables. The PT_CLIST property is a
  207. pseudo-property that contains a pointer to a list of case-equivalent
  208. characters.
  209. Arguments:
  210. f file to write to
  211. code pointer in the compiled code
  212. before text to print before
  213. after text to print after
  214. Returns: nothing
  215. */
  216. static void
  217. print_prop(FILE *f, PCRE2_SPTR code, const char *before, const char *after)
  218. {
  219. if (code[1] != PT_CLIST)
  220. {
  221. fprintf(f, "%s%s %s%s", before, OP_names[*code], get_ucpname(code[1],
  222. code[2]), after);
  223. }
  224. else
  225. {
  226. const char *not = (*code == OP_PROP)? "" : "not ";
  227. const uint32_t *p = PRIV(ucd_caseless_sets) + code[2];
  228. fprintf (f, "%s%sclist", before, not);
  229. while (*p < NOTACHAR) fprintf(f, " %04x", *p++);
  230. fprintf(f, "%s", after);
  231. }
  232. }
  233. /*************************************************
  234. * Print compiled pattern *
  235. *************************************************/
  236. /* The print_lengths flag controls whether offsets and lengths of items are
  237. printed. Lenths can be turned off from pcre2test so that automatic tests on
  238. bytecode can be written that do not depend on the value of LINK_SIZE.
  239. Arguments:
  240. re a compiled pattern
  241. f the file to write to
  242. print_lengths show various lengths
  243. Returns: nothing
  244. */
  245. static void
  246. pcre2_printint(pcre2_code *re, FILE *f, BOOL print_lengths)
  247. {
  248. PCRE2_SPTR codestart, nametable, code;
  249. uint32_t nesize = re->name_entry_size;
  250. BOOL utf = (re->overall_options & PCRE2_UTF) != 0;
  251. nametable = (PCRE2_SPTR)((uint8_t *)re + sizeof(pcre2_real_code));
  252. code = codestart = nametable + re->name_count * re->name_entry_size;
  253. for(;;)
  254. {
  255. PCRE2_SPTR ccode;
  256. uint32_t c;
  257. int i;
  258. const char *flag = " ";
  259. unsigned int extra = 0;
  260. if (print_lengths)
  261. fprintf(f, "%3d ", (int)(code - codestart));
  262. else
  263. fprintf(f, " ");
  264. switch(*code)
  265. {
  266. /* ========================================================================== */
  267. /* These cases are never obeyed. This is a fudge that causes a compile-
  268. time error if the vectors OP_names or OP_lengths, which are indexed
  269. by opcode, are not the correct length. It seems to be the only way to do
  270. such a check at compile time, as the sizeof() operator does not work in
  271. the C preprocessor. */
  272. case OP_TABLE_LENGTH:
  273. case OP_TABLE_LENGTH +
  274. ((sizeof(OP_names)/sizeof(const char *) == OP_TABLE_LENGTH) &&
  275. (sizeof(OP_lengths) == OP_TABLE_LENGTH)):
  276. return;
  277. /* ========================================================================== */
  278. case OP_END:
  279. fprintf(f, " %s\n", OP_names[*code]);
  280. fprintf(f, "------------------------------------------------------------------\n");
  281. return;
  282. case OP_CHAR:
  283. fprintf(f, " ");
  284. do
  285. {
  286. code++;
  287. code += 1 + print_char(f, code, utf);
  288. }
  289. while (*code == OP_CHAR);
  290. fprintf(f, "\n");
  291. continue;
  292. case OP_CHARI:
  293. fprintf(f, " /i ");
  294. do
  295. {
  296. code++;
  297. code += 1 + print_char(f, code, utf);
  298. }
  299. while (*code == OP_CHARI);
  300. fprintf(f, "\n");
  301. continue;
  302. case OP_CBRA:
  303. case OP_CBRAPOS:
  304. case OP_SCBRA:
  305. case OP_SCBRAPOS:
  306. if (print_lengths) fprintf(f, "%3d ", GET(code, 1));
  307. else fprintf(f, " ");
  308. fprintf(f, "%s %d", OP_names[*code], GET2(code, 1+LINK_SIZE));
  309. break;
  310. case OP_BRA:
  311. case OP_BRAPOS:
  312. case OP_SBRA:
  313. case OP_SBRAPOS:
  314. case OP_KETRMAX:
  315. case OP_KETRMIN:
  316. case OP_KETRPOS:
  317. case OP_ALT:
  318. case OP_KET:
  319. case OP_ASSERT:
  320. case OP_ASSERT_NOT:
  321. case OP_ASSERTBACK:
  322. case OP_ASSERTBACK_NOT:
  323. case OP_ASSERT_NA:
  324. case OP_ASSERTBACK_NA:
  325. case OP_ONCE:
  326. case OP_SCRIPT_RUN:
  327. case OP_COND:
  328. case OP_SCOND:
  329. case OP_REVERSE:
  330. if (print_lengths) fprintf(f, "%3d ", GET(code, 1));
  331. else fprintf(f, " ");
  332. fprintf(f, "%s", OP_names[*code]);
  333. break;
  334. case OP_CLOSE:
  335. fprintf(f, " %s %d", OP_names[*code], GET2(code, 1));
  336. break;
  337. case OP_CREF:
  338. fprintf(f, "%3d %s", GET2(code,1), OP_names[*code]);
  339. break;
  340. case OP_DNCREF:
  341. {
  342. PCRE2_SPTR entry = nametable + (GET2(code, 1) * nesize) + IMM2_SIZE;
  343. fprintf(f, " %s Cond ref <", flag);
  344. print_custring(f, entry);
  345. fprintf(f, ">%d", GET2(code, 1 + IMM2_SIZE));
  346. }
  347. break;
  348. case OP_RREF:
  349. c = GET2(code, 1);
  350. if (c == RREF_ANY)
  351. fprintf(f, " Cond recurse any");
  352. else
  353. fprintf(f, " Cond recurse %d", c);
  354. break;
  355. case OP_DNRREF:
  356. {
  357. PCRE2_SPTR entry = nametable + (GET2(code, 1) * nesize) + IMM2_SIZE;
  358. fprintf(f, " %s Cond recurse <", flag);
  359. print_custring(f, entry);
  360. fprintf(f, ">%d", GET2(code, 1 + IMM2_SIZE));
  361. }
  362. break;
  363. case OP_FALSE:
  364. fprintf(f, " Cond false");
  365. break;
  366. case OP_TRUE:
  367. fprintf(f, " Cond true");
  368. break;
  369. case OP_STARI:
  370. case OP_MINSTARI:
  371. case OP_POSSTARI:
  372. case OP_PLUSI:
  373. case OP_MINPLUSI:
  374. case OP_POSPLUSI:
  375. case OP_QUERYI:
  376. case OP_MINQUERYI:
  377. case OP_POSQUERYI:
  378. flag = "/i";
  379. /* Fall through */
  380. case OP_STAR:
  381. case OP_MINSTAR:
  382. case OP_POSSTAR:
  383. case OP_PLUS:
  384. case OP_MINPLUS:
  385. case OP_POSPLUS:
  386. case OP_QUERY:
  387. case OP_MINQUERY:
  388. case OP_POSQUERY:
  389. case OP_TYPESTAR:
  390. case OP_TYPEMINSTAR:
  391. case OP_TYPEPOSSTAR:
  392. case OP_TYPEPLUS:
  393. case OP_TYPEMINPLUS:
  394. case OP_TYPEPOSPLUS:
  395. case OP_TYPEQUERY:
  396. case OP_TYPEMINQUERY:
  397. case OP_TYPEPOSQUERY:
  398. fprintf(f, " %s ", flag);
  399. if (*code >= OP_TYPESTAR)
  400. {
  401. if (code[1] == OP_PROP || code[1] == OP_NOTPROP)
  402. {
  403. print_prop(f, code + 1, "", " ");
  404. extra = 2;
  405. }
  406. else fprintf(f, "%s", OP_names[code[1]]);
  407. }
  408. else extra = print_char(f, code+1, utf);
  409. fprintf(f, "%s", OP_names[*code]);
  410. break;
  411. case OP_EXACTI:
  412. case OP_UPTOI:
  413. case OP_MINUPTOI:
  414. case OP_POSUPTOI:
  415. flag = "/i";
  416. /* Fall through */
  417. case OP_EXACT:
  418. case OP_UPTO:
  419. case OP_MINUPTO:
  420. case OP_POSUPTO:
  421. fprintf(f, " %s ", flag);
  422. extra = print_char(f, code + 1 + IMM2_SIZE, utf);
  423. fprintf(f, "{");
  424. if (*code != OP_EXACT && *code != OP_EXACTI) fprintf(f, "0,");
  425. fprintf(f, "%d}", GET2(code,1));
  426. if (*code == OP_MINUPTO || *code == OP_MINUPTOI) fprintf(f, "?");
  427. else if (*code == OP_POSUPTO || *code == OP_POSUPTOI) fprintf(f, "+");
  428. break;
  429. case OP_TYPEEXACT:
  430. case OP_TYPEUPTO:
  431. case OP_TYPEMINUPTO:
  432. case OP_TYPEPOSUPTO:
  433. if (code[1 + IMM2_SIZE] == OP_PROP || code[1 + IMM2_SIZE] == OP_NOTPROP)
  434. {
  435. print_prop(f, code + IMM2_SIZE + 1, " ", " ");
  436. extra = 2;
  437. }
  438. else fprintf(f, " %s", OP_names[code[1 + IMM2_SIZE]]);
  439. fprintf(f, "{");
  440. if (*code != OP_TYPEEXACT) fprintf(f, "0,");
  441. fprintf(f, "%d}", GET2(code,1));
  442. if (*code == OP_TYPEMINUPTO) fprintf(f, "?");
  443. else if (*code == OP_TYPEPOSUPTO) fprintf(f, "+");
  444. break;
  445. case OP_NOTI:
  446. flag = "/i";
  447. /* Fall through */
  448. case OP_NOT:
  449. fprintf(f, " %s [^", flag);
  450. extra = print_char(f, code + 1, utf);
  451. fprintf(f, "]");
  452. break;
  453. case OP_NOTSTARI:
  454. case OP_NOTMINSTARI:
  455. case OP_NOTPOSSTARI:
  456. case OP_NOTPLUSI:
  457. case OP_NOTMINPLUSI:
  458. case OP_NOTPOSPLUSI:
  459. case OP_NOTQUERYI:
  460. case OP_NOTMINQUERYI:
  461. case OP_NOTPOSQUERYI:
  462. flag = "/i";
  463. /* Fall through */
  464. case OP_NOTSTAR:
  465. case OP_NOTMINSTAR:
  466. case OP_NOTPOSSTAR:
  467. case OP_NOTPLUS:
  468. case OP_NOTMINPLUS:
  469. case OP_NOTPOSPLUS:
  470. case OP_NOTQUERY:
  471. case OP_NOTMINQUERY:
  472. case OP_NOTPOSQUERY:
  473. fprintf(f, " %s [^", flag);
  474. extra = print_char(f, code + 1, utf);
  475. fprintf(f, "]%s", OP_names[*code]);
  476. break;
  477. case OP_NOTEXACTI:
  478. case OP_NOTUPTOI:
  479. case OP_NOTMINUPTOI:
  480. case OP_NOTPOSUPTOI:
  481. flag = "/i";
  482. /* Fall through */
  483. case OP_NOTEXACT:
  484. case OP_NOTUPTO:
  485. case OP_NOTMINUPTO:
  486. case OP_NOTPOSUPTO:
  487. fprintf(f, " %s [^", flag);
  488. extra = print_char(f, code + 1 + IMM2_SIZE, utf);
  489. fprintf(f, "]{");
  490. if (*code != OP_NOTEXACT && *code != OP_NOTEXACTI) fprintf(f, "0,");
  491. fprintf(f, "%d}", GET2(code,1));
  492. if (*code == OP_NOTMINUPTO || *code == OP_NOTMINUPTOI) fprintf(f, "?");
  493. else
  494. if (*code == OP_NOTPOSUPTO || *code == OP_NOTPOSUPTOI) fprintf(f, "+");
  495. break;
  496. case OP_RECURSE:
  497. if (print_lengths) fprintf(f, "%3d ", GET(code, 1));
  498. else fprintf(f, " ");
  499. fprintf(f, "%s", OP_names[*code]);
  500. break;
  501. case OP_REFI:
  502. flag = "/i";
  503. /* Fall through */
  504. case OP_REF:
  505. fprintf(f, " %s \\%d", flag, GET2(code,1));
  506. ccode = code + OP_lengths[*code];
  507. goto CLASS_REF_REPEAT;
  508. case OP_DNREFI:
  509. flag = "/i";
  510. /* Fall through */
  511. case OP_DNREF:
  512. {
  513. PCRE2_SPTR entry = nametable + (GET2(code, 1) * nesize) + IMM2_SIZE;
  514. fprintf(f, " %s \\k<", flag);
  515. print_custring(f, entry);
  516. fprintf(f, ">%d", GET2(code, 1 + IMM2_SIZE));
  517. }
  518. ccode = code + OP_lengths[*code];
  519. goto CLASS_REF_REPEAT;
  520. case OP_CALLOUT:
  521. fprintf(f, " %s %d %d %d", OP_names[*code], code[1 + 2*LINK_SIZE],
  522. GET(code, 1), GET(code, 1 + LINK_SIZE));
  523. break;
  524. case OP_CALLOUT_STR:
  525. c = code[1 + 4*LINK_SIZE];
  526. fprintf(f, " %s %c", OP_names[*code], c);
  527. extra = GET(code, 1 + 2*LINK_SIZE);
  528. print_custring_bylen(f, code + 2 + 4*LINK_SIZE, extra - 3 - 4*LINK_SIZE);
  529. for (i = 0; PRIV(callout_start_delims)[i] != 0; i++)
  530. if (c == PRIV(callout_start_delims)[i])
  531. {
  532. c = PRIV(callout_end_delims)[i];
  533. break;
  534. }
  535. fprintf(f, "%c %d %d %d", c, GET(code, 1 + 3*LINK_SIZE), GET(code, 1),
  536. GET(code, 1 + LINK_SIZE));
  537. break;
  538. case OP_PROP:
  539. case OP_NOTPROP:
  540. print_prop(f, code, " ", "");
  541. break;
  542. /* OP_XCLASS cannot occur in 8-bit, non-UTF mode. However, there's no harm
  543. in having this code always here, and it makes it less messy without all
  544. those #ifdefs. */
  545. case OP_CLASS:
  546. case OP_NCLASS:
  547. case OP_XCLASS:
  548. {
  549. unsigned int min, max;
  550. BOOL printmap;
  551. BOOL invertmap = FALSE;
  552. uint8_t *map;
  553. uint8_t inverted_map[32];
  554. fprintf(f, " [");
  555. if (*code == OP_XCLASS)
  556. {
  557. extra = GET(code, 1);
  558. ccode = code + LINK_SIZE + 1;
  559. printmap = (*ccode & XCL_MAP) != 0;
  560. if ((*ccode & XCL_NOT) != 0)
  561. {
  562. invertmap = (*ccode & XCL_HASPROP) == 0;
  563. fprintf(f, "^");
  564. }
  565. ccode++;
  566. }
  567. else
  568. {
  569. printmap = TRUE;
  570. ccode = code + 1;
  571. }
  572. /* Print a bit map */
  573. if (printmap)
  574. {
  575. map = (uint8_t *)ccode;
  576. if (invertmap)
  577. {
  578. /* Using 255 ^ instead of ~ avoids clang sanitize warning. */
  579. for (i = 0; i < 32; i++) inverted_map[i] = 255 ^ map[i];
  580. map = inverted_map;
  581. }
  582. for (i = 0; i < 256; i++)
  583. {
  584. if ((map[i/8] & (1u << (i&7))) != 0)
  585. {
  586. int j;
  587. for (j = i+1; j < 256; j++)
  588. if ((map[j/8] & (1u << (j&7))) == 0) break;
  589. if (i == '-' || i == ']') fprintf(f, "\\");
  590. if (PRINTABLE(i)) fprintf(f, "%c", i);
  591. else fprintf(f, "\\x%02x", i);
  592. if (--j > i)
  593. {
  594. if (j != i + 1) fprintf(f, "-");
  595. if (j == '-' || j == ']') fprintf(f, "\\");
  596. if (PRINTABLE(j)) fprintf(f, "%c", j);
  597. else fprintf(f, "\\x%02x", j);
  598. }
  599. i = j;
  600. }
  601. }
  602. ccode += 32 / sizeof(PCRE2_UCHAR);
  603. }
  604. /* For an XCLASS there is always some additional data */
  605. if (*code == OP_XCLASS)
  606. {
  607. PCRE2_UCHAR ch;
  608. while ((ch = *ccode++) != XCL_END)
  609. {
  610. BOOL not = FALSE;
  611. const char *notch = "";
  612. switch(ch)
  613. {
  614. case XCL_NOTPROP:
  615. not = TRUE;
  616. notch = "^";
  617. /* Fall through */
  618. case XCL_PROP:
  619. {
  620. unsigned int ptype = *ccode++;
  621. unsigned int pvalue = *ccode++;
  622. switch(ptype)
  623. {
  624. case PT_PXGRAPH:
  625. fprintf(f, "[:%sgraph:]", notch);
  626. break;
  627. case PT_PXPRINT:
  628. fprintf(f, "[:%sprint:]", notch);
  629. break;
  630. case PT_PXPUNCT:
  631. fprintf(f, "[:%spunct:]", notch);
  632. break;
  633. default:
  634. fprintf(f, "\\%c{%s}", (not? 'P':'p'),
  635. get_ucpname(ptype, pvalue));
  636. break;
  637. }
  638. }
  639. break;
  640. default:
  641. ccode += 1 + print_char(f, ccode, utf);
  642. if (ch == XCL_RANGE)
  643. {
  644. fprintf(f, "-");
  645. ccode += 1 + print_char(f, ccode, utf);
  646. }
  647. break;
  648. }
  649. }
  650. }
  651. /* Indicate a non-UTF class which was created by negation */
  652. fprintf(f, "]%s", (*code == OP_NCLASS)? " (neg)" : "");
  653. /* Handle repeats after a class or a back reference */
  654. CLASS_REF_REPEAT:
  655. switch(*ccode)
  656. {
  657. case OP_CRSTAR:
  658. case OP_CRMINSTAR:
  659. case OP_CRPLUS:
  660. case OP_CRMINPLUS:
  661. case OP_CRQUERY:
  662. case OP_CRMINQUERY:
  663. case OP_CRPOSSTAR:
  664. case OP_CRPOSPLUS:
  665. case OP_CRPOSQUERY:
  666. fprintf(f, "%s", OP_names[*ccode]);
  667. extra += OP_lengths[*ccode];
  668. break;
  669. case OP_CRRANGE:
  670. case OP_CRMINRANGE:
  671. case OP_CRPOSRANGE:
  672. min = GET2(ccode,1);
  673. max = GET2(ccode,1 + IMM2_SIZE);
  674. if (max == 0) fprintf(f, "{%u,}", min);
  675. else fprintf(f, "{%u,%u}", min, max);
  676. if (*ccode == OP_CRMINRANGE) fprintf(f, "?");
  677. else if (*ccode == OP_CRPOSRANGE) fprintf(f, "+");
  678. extra += OP_lengths[*ccode];
  679. break;
  680. /* Do nothing if it's not a repeat; this code stops picky compilers
  681. warning about the lack of a default code path. */
  682. default:
  683. break;
  684. }
  685. }
  686. break;
  687. case OP_MARK:
  688. case OP_COMMIT_ARG:
  689. case OP_PRUNE_ARG:
  690. case OP_SKIP_ARG:
  691. case OP_THEN_ARG:
  692. fprintf(f, " %s ", OP_names[*code]);
  693. print_custring_bylen(f, code + 2, code[1]);
  694. extra += code[1];
  695. break;
  696. case OP_THEN:
  697. fprintf(f, " %s", OP_names[*code]);
  698. break;
  699. case OP_CIRCM:
  700. case OP_DOLLM:
  701. flag = "/m";
  702. /* Fall through */
  703. /* Anything else is just an item with no data, but possibly a flag. */
  704. default:
  705. fprintf(f, " %s %s", flag, OP_names[*code]);
  706. break;
  707. }
  708. code += OP_lengths[*code] + extra;
  709. fprintf(f, "\n");
  710. }
  711. }
  712. /* End of pcre2_printint.c */