pcre_printint.c 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834
  1. /*************************************************
  2. * Perl-Compatible Regular Expressions *
  3. *************************************************/
  4. /* PCRE is a library of functions to support regular expressions whose syntax
  5. and semantics are as close as possible to those of the Perl 5 language.
  6. Written by Philip Hazel
  7. Copyright (c) 1997-2012 University of Cambridge
  8. -----------------------------------------------------------------------------
  9. Redistribution and use in source and binary forms, with or without
  10. modification, are permitted provided that the following conditions are met:
  11. * Redistributions of source code must retain the above copyright notice,
  12. this list of conditions and the following disclaimer.
  13. * Redistributions in binary form must reproduce the above copyright
  14. notice, this list of conditions and the following disclaimer in the
  15. documentation and/or other materials provided with the distribution.
  16. * Neither the name of the University of Cambridge nor the names of its
  17. contributors may be used to endorse or promote products derived from
  18. this software without specific prior written permission.
  19. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  20. AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  21. IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  22. ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  23. LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  24. CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  25. SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  26. INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  27. CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  28. ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  29. POSSIBILITY OF SUCH DAMAGE.
  30. -----------------------------------------------------------------------------
  31. */
  32. /* This module contains a PCRE private debugging function for printing out the
  33. internal form of a compiled regular expression, along with some supporting
  34. local functions. This source file is used in two places:
  35. (1) It is #included by pcre_compile.c when it is compiled in debugging mode
  36. (PCRE_DEBUG defined in pcre_internal.h). It is not included in production
  37. compiles. In this case PCRE_INCLUDED is defined.
  38. (2) It is also compiled separately and linked with pcretest.c, which can be
  39. asked to print out a compiled regex for debugging purposes. */
  40. #ifndef PCRE_INCLUDED
  41. #ifdef HAVE_CONFIG_H
  42. #include "config.h"
  43. #endif
  44. /* For pcretest program. */
  45. #define PRIV(name) name
  46. /* We have to include pcre_internal.h because we need the internal info for
  47. displaying the results of pcre_study() and we also need to know about the
  48. internal macros, structures, and other internal data values; pcretest has
  49. "inside information" compared to a program that strictly follows the PCRE API.
  50. Although pcre_internal.h does itself include pcre.h, we explicitly include it
  51. here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
  52. appropriately for an application, not for building PCRE. */
  53. #include "pcre.h"
  54. #include "pcre_internal.h"
  55. /* These are the funtions that are contained within. It doesn't seem worth
  56. having a separate .h file just for this. */
  57. #endif /* PCRE_INCLUDED */
  58. #ifdef PCRE_INCLUDED
  59. static /* Keep the following function as private. */
  60. #endif
  61. #if defined COMPILE_PCRE8
  62. void pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths);
  63. #elif defined COMPILE_PCRE16
  64. void pcre16_printint(pcre *external_re, FILE *f, BOOL print_lengths);
  65. #elif defined COMPILE_PCRE32
  66. void pcre32_printint(pcre *external_re, FILE *f, BOOL print_lengths);
  67. #endif
  68. /* Macro that decides whether a character should be output as a literal or in
  69. hexadecimal. We don't use isprint() because that can vary from system to system
  70. (even without the use of locales) and we want the output always to be the same,
  71. for testing purposes. */
  72. #ifdef EBCDIC
  73. #define PRINTABLE(c) ((c) >= 64 && (c) < 255)
  74. #else
  75. #define PRINTABLE(c) ((c) >= 32 && (c) < 127)
  76. #endif
  77. /* The table of operator names. */
  78. static const char *priv_OP_names[] = { OP_NAME_LIST };
  79. /* This table of operator lengths is not actually used by the working code,
  80. but its size is needed for a check that ensures it is the correct size for the
  81. number of opcodes (thus catching update omissions). */
  82. static const pcre_uint8 priv_OP_lengths[] = { OP_LENGTHS };
  83. /*************************************************
  84. * Print single- or multi-byte character *
  85. *************************************************/
  86. static unsigned int
  87. print_char(FILE *f, pcre_uchar *ptr, BOOL utf)
  88. {
  89. pcre_uint32 c = *ptr;
  90. #ifndef SUPPORT_UTF
  91. (void)utf; /* Avoid compiler warning */
  92. if (PRINTABLE(c)) fprintf(f, "%c", (char)c);
  93. else if (c <= 0x80) fprintf(f, "\\x%02x", c);
  94. else fprintf(f, "\\x{%x}", c);
  95. return 0;
  96. #else
  97. #if defined COMPILE_PCRE8
  98. if (!utf || (c & 0xc0) != 0xc0)
  99. {
  100. if (PRINTABLE(c)) fprintf(f, "%c", (char)c);
  101. else if (c < 0x80) fprintf(f, "\\x%02x", c);
  102. else fprintf(f, "\\x{%02x}", c);
  103. return 0;
  104. }
  105. else
  106. {
  107. int i;
  108. int a = PRIV(utf8_table4)[c & 0x3f]; /* Number of additional bytes */
  109. int s = 6*a;
  110. c = (c & PRIV(utf8_table3)[a]) << s;
  111. for (i = 1; i <= a; i++)
  112. {
  113. /* This is a check for malformed UTF-8; it should only occur if the sanity
  114. check has been turned off. Rather than swallow random bytes, just stop if
  115. we hit a bad one. Print it with \X instead of \x as an indication. */
  116. if ((ptr[i] & 0xc0) != 0x80)
  117. {
  118. fprintf(f, "\\X{%x}", c);
  119. return i - 1;
  120. }
  121. /* The byte is OK */
  122. s -= 6;
  123. c |= (ptr[i] & 0x3f) << s;
  124. }
  125. fprintf(f, "\\x{%x}", c);
  126. return a;
  127. }
  128. #elif defined COMPILE_PCRE16
  129. if (!utf || (c & 0xfc00) != 0xd800)
  130. {
  131. if (PRINTABLE(c)) fprintf(f, "%c", (char)c);
  132. else if (c <= 0x80) fprintf(f, "\\x%02x", c);
  133. else fprintf(f, "\\x{%02x}", c);
  134. return 0;
  135. }
  136. else
  137. {
  138. /* This is a check for malformed UTF-16; it should only occur if the sanity
  139. check has been turned off. Rather than swallow a low surrogate, just stop if
  140. we hit a bad one. Print it with \X instead of \x as an indication. */
  141. if ((ptr[1] & 0xfc00) != 0xdc00)
  142. {
  143. fprintf(f, "\\X{%x}", c);
  144. return 0;
  145. }
  146. c = (((c & 0x3ff) << 10) | (ptr[1] & 0x3ff)) + 0x10000;
  147. fprintf(f, "\\x{%x}", c);
  148. return 1;
  149. }
  150. #elif defined COMPILE_PCRE32
  151. if (!utf || (c & 0xfffff800u) != 0xd800u)
  152. {
  153. if (PRINTABLE(c)) fprintf(f, "%c", (char)c);
  154. else if (c <= 0x80) fprintf(f, "\\x%02x", c);
  155. else fprintf(f, "\\x{%x}", c);
  156. return 0;
  157. }
  158. else
  159. {
  160. /* This is a check for malformed UTF-32; it should only occur if the sanity
  161. check has been turned off. Rather than swallow a surrogate, just stop if
  162. we hit one. Print it with \X instead of \x as an indication. */
  163. fprintf(f, "\\X{%x}", c);
  164. return 0;
  165. }
  166. #endif /* COMPILE_PCRE[8|16|32] */
  167. #endif /* SUPPORT_UTF */
  168. }
  169. /*************************************************
  170. * Print uchar string (regardless of utf) *
  171. *************************************************/
  172. static void
  173. print_puchar(FILE *f, PCRE_PUCHAR ptr)
  174. {
  175. while (*ptr != '\0')
  176. {
  177. register pcre_uint32 c = *ptr++;
  178. if (PRINTABLE(c)) fprintf(f, "%c", c); else fprintf(f, "\\x{%x}", c);
  179. }
  180. }
  181. /*************************************************
  182. * Find Unicode property name *
  183. *************************************************/
  184. static const char *
  185. get_ucpname(unsigned int ptype, unsigned int pvalue)
  186. {
  187. #ifdef SUPPORT_UCP
  188. int i;
  189. for (i = PRIV(utt_size) - 1; i >= 0; i--)
  190. {
  191. if (ptype == PRIV(utt)[i].type && pvalue == PRIV(utt)[i].value) break;
  192. }
  193. return (i >= 0)? PRIV(utt_names) + PRIV(utt)[i].name_offset : "??";
  194. #else
  195. /* It gets harder and harder to shut off unwanted compiler warnings. */
  196. ptype = ptype * pvalue;
  197. return (ptype == pvalue)? "??" : "??";
  198. #endif
  199. }
  200. /*************************************************
  201. * Print Unicode property value *
  202. *************************************************/
  203. /* "Normal" properties can be printed from tables. The PT_CLIST property is a
  204. pseudo-property that contains a pointer to a list of case-equivalent
  205. characters. This is used only when UCP support is available and UTF mode is
  206. selected. It should never occur otherwise, but just in case it does, have
  207. something ready to print. */
  208. static void
  209. print_prop(FILE *f, pcre_uchar *code, const char *before, const char *after)
  210. {
  211. if (code[1] != PT_CLIST)
  212. {
  213. fprintf(f, "%s%s %s%s", before, priv_OP_names[*code], get_ucpname(code[1],
  214. code[2]), after);
  215. }
  216. else
  217. {
  218. const char *not = (*code == OP_PROP)? "" : "not ";
  219. #ifndef SUPPORT_UCP
  220. fprintf(f, "%s%sclist %d%s", before, not, code[2], after);
  221. #else
  222. const pcre_uint32 *p = PRIV(ucd_caseless_sets) + code[2];
  223. fprintf (f, "%s%sclist", before, not);
  224. while (*p < NOTACHAR) fprintf(f, " %04x", *p++);
  225. fprintf(f, "%s", after);
  226. #endif
  227. }
  228. }
  229. /*************************************************
  230. * Print compiled regex *
  231. *************************************************/
  232. /* Make this function work for a regex with integers either byte order.
  233. However, we assume that what we are passed is a compiled regex. The
  234. print_lengths flag controls whether offsets and lengths of items are printed.
  235. They can be turned off from pcretest so that automatic tests on bytecode can be
  236. written that do not depend on the value of LINK_SIZE. */
  237. #ifdef PCRE_INCLUDED
  238. static /* Keep the following function as private. */
  239. #endif
  240. #if defined COMPILE_PCRE8
  241. void
  242. pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths)
  243. #elif defined COMPILE_PCRE16
  244. void
  245. pcre16_printint(pcre *external_re, FILE *f, BOOL print_lengths)
  246. #elif defined COMPILE_PCRE32
  247. void
  248. pcre32_printint(pcre *external_re, FILE *f, BOOL print_lengths)
  249. #endif
  250. {
  251. REAL_PCRE *re = (REAL_PCRE *)external_re;
  252. pcre_uchar *codestart, *code;
  253. BOOL utf;
  254. unsigned int options = re->options;
  255. int offset = re->name_table_offset;
  256. int count = re->name_count;
  257. int size = re->name_entry_size;
  258. if (re->magic_number != MAGIC_NUMBER)
  259. {
  260. offset = ((offset << 8) & 0xff00) | ((offset >> 8) & 0xff);
  261. count = ((count << 8) & 0xff00) | ((count >> 8) & 0xff);
  262. size = ((size << 8) & 0xff00) | ((size >> 8) & 0xff);
  263. options = ((options << 24) & 0xff000000) |
  264. ((options << 8) & 0x00ff0000) |
  265. ((options >> 8) & 0x0000ff00) |
  266. ((options >> 24) & 0x000000ff);
  267. }
  268. code = codestart = (pcre_uchar *)re + offset + count * size;
  269. /* PCRE_UTF(16|32) have the same value as PCRE_UTF8. */
  270. utf = (options & PCRE_UTF8) != 0;
  271. for(;;)
  272. {
  273. pcre_uchar *ccode;
  274. const char *flag = " ";
  275. pcre_uint32 c;
  276. unsigned int extra = 0;
  277. if (print_lengths)
  278. fprintf(f, "%3d ", (int)(code - codestart));
  279. else
  280. fprintf(f, " ");
  281. switch(*code)
  282. {
  283. /* ========================================================================== */
  284. /* These cases are never obeyed. This is a fudge that causes a compile-
  285. time error if the vectors OP_names or OP_lengths, which are indexed
  286. by opcode, are not the correct length. It seems to be the only way to do
  287. such a check at compile time, as the sizeof() operator does not work in
  288. the C preprocessor. */
  289. case OP_TABLE_LENGTH:
  290. case OP_TABLE_LENGTH +
  291. ((sizeof(priv_OP_names)/sizeof(const char *) == OP_TABLE_LENGTH) &&
  292. (sizeof(priv_OP_lengths) == OP_TABLE_LENGTH)):
  293. break;
  294. /* ========================================================================== */
  295. case OP_END:
  296. fprintf(f, " %s\n", priv_OP_names[*code]);
  297. fprintf(f, "------------------------------------------------------------------\n");
  298. return;
  299. case OP_CHAR:
  300. fprintf(f, " ");
  301. do
  302. {
  303. code++;
  304. code += 1 + print_char(f, code, utf);
  305. }
  306. while (*code == OP_CHAR);
  307. fprintf(f, "\n");
  308. continue;
  309. case OP_CHARI:
  310. fprintf(f, " /i ");
  311. do
  312. {
  313. code++;
  314. code += 1 + print_char(f, code, utf);
  315. }
  316. while (*code == OP_CHARI);
  317. fprintf(f, "\n");
  318. continue;
  319. case OP_CBRA:
  320. case OP_CBRAPOS:
  321. case OP_SCBRA:
  322. case OP_SCBRAPOS:
  323. if (print_lengths) fprintf(f, "%3d ", GET(code, 1));
  324. else fprintf(f, " ");
  325. fprintf(f, "%s %d", priv_OP_names[*code], GET2(code, 1+LINK_SIZE));
  326. break;
  327. case OP_BRA:
  328. case OP_BRAPOS:
  329. case OP_SBRA:
  330. case OP_SBRAPOS:
  331. case OP_KETRMAX:
  332. case OP_KETRMIN:
  333. case OP_KETRPOS:
  334. case OP_ALT:
  335. case OP_KET:
  336. case OP_ASSERT:
  337. case OP_ASSERT_NOT:
  338. case OP_ASSERTBACK:
  339. case OP_ASSERTBACK_NOT:
  340. case OP_ONCE:
  341. case OP_ONCE_NC:
  342. case OP_COND:
  343. case OP_SCOND:
  344. case OP_REVERSE:
  345. if (print_lengths) fprintf(f, "%3d ", GET(code, 1));
  346. else fprintf(f, " ");
  347. fprintf(f, "%s", priv_OP_names[*code]);
  348. break;
  349. case OP_CLOSE:
  350. fprintf(f, " %s %d", priv_OP_names[*code], GET2(code, 1));
  351. break;
  352. case OP_CREF:
  353. fprintf(f, "%3d %s", GET2(code,1), priv_OP_names[*code]);
  354. break;
  355. case OP_DNCREF:
  356. {
  357. pcre_uchar *entry = (pcre_uchar *)re + offset + (GET2(code, 1) * size) +
  358. IMM2_SIZE;
  359. fprintf(f, " %s Cond ref <", flag);
  360. print_puchar(f, entry);
  361. fprintf(f, ">%d", GET2(code, 1 + IMM2_SIZE));
  362. }
  363. break;
  364. case OP_RREF:
  365. c = GET2(code, 1);
  366. if (c == RREF_ANY)
  367. fprintf(f, " Cond recurse any");
  368. else
  369. fprintf(f, " Cond recurse %d", c);
  370. break;
  371. case OP_DNRREF:
  372. {
  373. pcre_uchar *entry = (pcre_uchar *)re + offset + (GET2(code, 1) * size) +
  374. IMM2_SIZE;
  375. fprintf(f, " %s Cond recurse <", flag);
  376. print_puchar(f, entry);
  377. fprintf(f, ">%d", GET2(code, 1 + IMM2_SIZE));
  378. }
  379. break;
  380. case OP_DEF:
  381. fprintf(f, " Cond def");
  382. break;
  383. case OP_STARI:
  384. case OP_MINSTARI:
  385. case OP_POSSTARI:
  386. case OP_PLUSI:
  387. case OP_MINPLUSI:
  388. case OP_POSPLUSI:
  389. case OP_QUERYI:
  390. case OP_MINQUERYI:
  391. case OP_POSQUERYI:
  392. flag = "/i";
  393. /* Fall through */
  394. case OP_STAR:
  395. case OP_MINSTAR:
  396. case OP_POSSTAR:
  397. case OP_PLUS:
  398. case OP_MINPLUS:
  399. case OP_POSPLUS:
  400. case OP_QUERY:
  401. case OP_MINQUERY:
  402. case OP_POSQUERY:
  403. case OP_TYPESTAR:
  404. case OP_TYPEMINSTAR:
  405. case OP_TYPEPOSSTAR:
  406. case OP_TYPEPLUS:
  407. case OP_TYPEMINPLUS:
  408. case OP_TYPEPOSPLUS:
  409. case OP_TYPEQUERY:
  410. case OP_TYPEMINQUERY:
  411. case OP_TYPEPOSQUERY:
  412. fprintf(f, " %s ", flag);
  413. if (*code >= OP_TYPESTAR)
  414. {
  415. if (code[1] == OP_PROP || code[1] == OP_NOTPROP)
  416. {
  417. print_prop(f, code + 1, "", " ");
  418. extra = 2;
  419. }
  420. else fprintf(f, "%s", priv_OP_names[code[1]]);
  421. }
  422. else extra = print_char(f, code+1, utf);
  423. fprintf(f, "%s", priv_OP_names[*code]);
  424. break;
  425. case OP_EXACTI:
  426. case OP_UPTOI:
  427. case OP_MINUPTOI:
  428. case OP_POSUPTOI:
  429. flag = "/i";
  430. /* Fall through */
  431. case OP_EXACT:
  432. case OP_UPTO:
  433. case OP_MINUPTO:
  434. case OP_POSUPTO:
  435. fprintf(f, " %s ", flag);
  436. extra = print_char(f, code + 1 + IMM2_SIZE, utf);
  437. fprintf(f, "{");
  438. if (*code != OP_EXACT && *code != OP_EXACTI) fprintf(f, "0,");
  439. fprintf(f, "%d}", GET2(code,1));
  440. if (*code == OP_MINUPTO || *code == OP_MINUPTOI) fprintf(f, "?");
  441. else if (*code == OP_POSUPTO || *code == OP_POSUPTOI) fprintf(f, "+");
  442. break;
  443. case OP_TYPEEXACT:
  444. case OP_TYPEUPTO:
  445. case OP_TYPEMINUPTO:
  446. case OP_TYPEPOSUPTO:
  447. if (code[1 + IMM2_SIZE] == OP_PROP || code[1 + IMM2_SIZE] == OP_NOTPROP)
  448. {
  449. print_prop(f, code + IMM2_SIZE + 1, " ", " ");
  450. extra = 2;
  451. }
  452. else fprintf(f, " %s", priv_OP_names[code[1 + IMM2_SIZE]]);
  453. fprintf(f, "{");
  454. if (*code != OP_TYPEEXACT) fprintf(f, "0,");
  455. fprintf(f, "%d}", GET2(code,1));
  456. if (*code == OP_TYPEMINUPTO) fprintf(f, "?");
  457. else if (*code == OP_TYPEPOSUPTO) fprintf(f, "+");
  458. break;
  459. case OP_NOTI:
  460. flag = "/i";
  461. /* Fall through */
  462. case OP_NOT:
  463. fprintf(f, " %s [^", flag);
  464. extra = print_char(f, code + 1, utf);
  465. fprintf(f, "]");
  466. break;
  467. case OP_NOTSTARI:
  468. case OP_NOTMINSTARI:
  469. case OP_NOTPOSSTARI:
  470. case OP_NOTPLUSI:
  471. case OP_NOTMINPLUSI:
  472. case OP_NOTPOSPLUSI:
  473. case OP_NOTQUERYI:
  474. case OP_NOTMINQUERYI:
  475. case OP_NOTPOSQUERYI:
  476. flag = "/i";
  477. /* Fall through */
  478. case OP_NOTSTAR:
  479. case OP_NOTMINSTAR:
  480. case OP_NOTPOSSTAR:
  481. case OP_NOTPLUS:
  482. case OP_NOTMINPLUS:
  483. case OP_NOTPOSPLUS:
  484. case OP_NOTQUERY:
  485. case OP_NOTMINQUERY:
  486. case OP_NOTPOSQUERY:
  487. fprintf(f, " %s [^", flag);
  488. extra = print_char(f, code + 1, utf);
  489. fprintf(f, "]%s", priv_OP_names[*code]);
  490. break;
  491. case OP_NOTEXACTI:
  492. case OP_NOTUPTOI:
  493. case OP_NOTMINUPTOI:
  494. case OP_NOTPOSUPTOI:
  495. flag = "/i";
  496. /* Fall through */
  497. case OP_NOTEXACT:
  498. case OP_NOTUPTO:
  499. case OP_NOTMINUPTO:
  500. case OP_NOTPOSUPTO:
  501. fprintf(f, " %s [^", flag);
  502. extra = print_char(f, code + 1 + IMM2_SIZE, utf);
  503. fprintf(f, "]{");
  504. if (*code != OP_NOTEXACT && *code != OP_NOTEXACTI) fprintf(f, "0,");
  505. fprintf(f, "%d}", GET2(code,1));
  506. if (*code == OP_NOTMINUPTO || *code == OP_NOTMINUPTOI) fprintf(f, "?");
  507. else
  508. if (*code == OP_NOTPOSUPTO || *code == OP_NOTPOSUPTOI) fprintf(f, "+");
  509. break;
  510. case OP_RECURSE:
  511. if (print_lengths) fprintf(f, "%3d ", GET(code, 1));
  512. else fprintf(f, " ");
  513. fprintf(f, "%s", priv_OP_names[*code]);
  514. break;
  515. case OP_REFI:
  516. flag = "/i";
  517. /* Fall through */
  518. case OP_REF:
  519. fprintf(f, " %s \\%d", flag, GET2(code,1));
  520. ccode = code + priv_OP_lengths[*code];
  521. goto CLASS_REF_REPEAT;
  522. case OP_DNREFI:
  523. flag = "/i";
  524. /* Fall through */
  525. case OP_DNREF:
  526. {
  527. pcre_uchar *entry = (pcre_uchar *)re + offset + (GET2(code, 1) * size) +
  528. IMM2_SIZE;
  529. fprintf(f, " %s \\k<", flag);
  530. print_puchar(f, entry);
  531. fprintf(f, ">%d", GET2(code, 1 + IMM2_SIZE));
  532. }
  533. ccode = code + priv_OP_lengths[*code];
  534. goto CLASS_REF_REPEAT;
  535. case OP_CALLOUT:
  536. fprintf(f, " %s %d %d %d", priv_OP_names[*code], code[1], GET(code,2),
  537. GET(code, 2 + LINK_SIZE));
  538. break;
  539. case OP_PROP:
  540. case OP_NOTPROP:
  541. print_prop(f, code, " ", "");
  542. break;
  543. /* OP_XCLASS cannot occur in 8-bit, non-UTF mode. However, there's no harm
  544. in having this code always here, and it makes it less messy without all
  545. those #ifdefs. */
  546. case OP_CLASS:
  547. case OP_NCLASS:
  548. case OP_XCLASS:
  549. {
  550. int i;
  551. unsigned int min, max;
  552. BOOL printmap;
  553. BOOL invertmap = FALSE;
  554. pcre_uint8 *map;
  555. pcre_uint8 inverted_map[32];
  556. fprintf(f, " [");
  557. if (*code == OP_XCLASS)
  558. {
  559. extra = GET(code, 1);
  560. ccode = code + LINK_SIZE + 1;
  561. printmap = (*ccode & XCL_MAP) != 0;
  562. if ((*ccode & XCL_NOT) != 0)
  563. {
  564. invertmap = (*ccode & XCL_HASPROP) == 0;
  565. fprintf(f, "^");
  566. }
  567. ccode++;
  568. }
  569. else
  570. {
  571. printmap = TRUE;
  572. ccode = code + 1;
  573. }
  574. /* Print a bit map */
  575. if (printmap)
  576. {
  577. map = (pcre_uint8 *)ccode;
  578. if (invertmap)
  579. {
  580. for (i = 0; i < 32; i++) inverted_map[i] = ~map[i];
  581. map = inverted_map;
  582. }
  583. for (i = 0; i < 256; i++)
  584. {
  585. if ((map[i/8] & (1 << (i&7))) != 0)
  586. {
  587. int j;
  588. for (j = i+1; j < 256; j++)
  589. if ((map[j/8] & (1 << (j&7))) == 0) break;
  590. if (i == '-' || i == ']') fprintf(f, "\\");
  591. if (PRINTABLE(i)) fprintf(f, "%c", i);
  592. else fprintf(f, "\\x%02x", i);
  593. if (--j > i)
  594. {
  595. if (j != i + 1) fprintf(f, "-");
  596. if (j == '-' || j == ']') fprintf(f, "\\");
  597. if (PRINTABLE(j)) fprintf(f, "%c", j);
  598. else fprintf(f, "\\x%02x", j);
  599. }
  600. i = j;
  601. }
  602. }
  603. ccode += 32 / sizeof(pcre_uchar);
  604. }
  605. /* For an XCLASS there is always some additional data */
  606. if (*code == OP_XCLASS)
  607. {
  608. pcre_uchar ch;
  609. while ((ch = *ccode++) != XCL_END)
  610. {
  611. BOOL not = FALSE;
  612. const char *notch = "";
  613. switch(ch)
  614. {
  615. case XCL_NOTPROP:
  616. not = TRUE;
  617. notch = "^";
  618. /* Fall through */
  619. case XCL_PROP:
  620. {
  621. unsigned int ptype = *ccode++;
  622. unsigned int pvalue = *ccode++;
  623. switch(ptype)
  624. {
  625. case PT_PXGRAPH:
  626. fprintf(f, "[:%sgraph:]", notch);
  627. break;
  628. case PT_PXPRINT:
  629. fprintf(f, "[:%sprint:]", notch);
  630. break;
  631. case PT_PXPUNCT:
  632. fprintf(f, "[:%spunct:]", notch);
  633. break;
  634. default:
  635. fprintf(f, "\\%c{%s}", (not? 'P':'p'),
  636. get_ucpname(ptype, pvalue));
  637. break;
  638. }
  639. }
  640. break;
  641. default:
  642. ccode += 1 + print_char(f, ccode, utf);
  643. if (ch == XCL_RANGE)
  644. {
  645. fprintf(f, "-");
  646. ccode += 1 + print_char(f, ccode, utf);
  647. }
  648. break;
  649. }
  650. }
  651. }
  652. /* Indicate a non-UTF class which was created by negation */
  653. fprintf(f, "]%s", (*code == OP_NCLASS)? " (neg)" : "");
  654. /* Handle repeats after a class or a back reference */
  655. CLASS_REF_REPEAT:
  656. switch(*ccode)
  657. {
  658. case OP_CRSTAR:
  659. case OP_CRMINSTAR:
  660. case OP_CRPLUS:
  661. case OP_CRMINPLUS:
  662. case OP_CRQUERY:
  663. case OP_CRMINQUERY:
  664. case OP_CRPOSSTAR:
  665. case OP_CRPOSPLUS:
  666. case OP_CRPOSQUERY:
  667. fprintf(f, "%s", priv_OP_names[*ccode]);
  668. extra += priv_OP_lengths[*ccode];
  669. break;
  670. case OP_CRRANGE:
  671. case OP_CRMINRANGE:
  672. case OP_CRPOSRANGE:
  673. min = GET2(ccode,1);
  674. max = GET2(ccode,1 + IMM2_SIZE);
  675. if (max == 0) fprintf(f, "{%u,}", min);
  676. else fprintf(f, "{%u,%u}", min, max);
  677. if (*ccode == OP_CRMINRANGE) fprintf(f, "?");
  678. else if (*ccode == OP_CRPOSRANGE) fprintf(f, "+");
  679. extra += priv_OP_lengths[*ccode];
  680. break;
  681. /* Do nothing if it's not a repeat; this code stops picky compilers
  682. warning about the lack of a default code path. */
  683. default:
  684. break;
  685. }
  686. }
  687. break;
  688. case OP_MARK:
  689. case OP_PRUNE_ARG:
  690. case OP_SKIP_ARG:
  691. case OP_THEN_ARG:
  692. fprintf(f, " %s ", priv_OP_names[*code]);
  693. print_puchar(f, code + 2);
  694. extra += code[1];
  695. break;
  696. case OP_THEN:
  697. fprintf(f, " %s", priv_OP_names[*code]);
  698. break;
  699. case OP_CIRCM:
  700. case OP_DOLLM:
  701. flag = "/m";
  702. /* Fall through */
  703. /* Anything else is just an item with no data, but possibly a flag. */
  704. default:
  705. fprintf(f, " %s %s", flag, priv_OP_names[*code]);
  706. break;
  707. }
  708. code += priv_OP_lengths[*code] + extra;
  709. fprintf(f, "\n");
  710. }
  711. }
  712. /* End of pcre_printint.src */