123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432 |
- /*************************************************
- * Perl-Compatible Regular Expressions *
- *************************************************/
- /* PCRE is a library of functions to support regular expressions whose syntax
- and semantics are as close as possible to those of the Perl 5 language.
- Written by Philip Hazel
- Original API code Copyright (c) 1997-2012 University of Cambridge
- New API code Copyright (c) 2016-2018 University of Cambridge
- -----------------------------------------------------------------------------
- Redistribution and use in source and binary forms, with or without
- modification, are permitted provided that the following conditions are met:
- * Redistributions of source code must retain the above copyright notice,
- this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above copyright
- notice, this list of conditions and the following disclaimer in the
- documentation and/or other materials provided with the distribution.
- * Neither the name of the University of Cambridge nor the names of its
- contributors may be used to endorse or promote products derived from
- this software without specific prior written permission.
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- POSSIBILITY OF SUCH DAMAGE.
- -----------------------------------------------------------------------------
- */
- #ifdef HAVE_CONFIG_H
- #include "config.h"
- #endif
- #include "pcre2_internal.h"
- /*************************************************
- * Return info about compiled pattern *
- *************************************************/
- /*
- Arguments:
- code points to compiled code
- what what information is required
- where where to put the information; if NULL, return length
- Returns: 0 when data returned
- > 0 when length requested
- < 0 on error or unset value
- */
- PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
- pcre2_pattern_info(const pcre2_code *code, uint32_t what, void *where)
- {
- const pcre2_real_code *re = (pcre2_real_code *)code;
- if (where == NULL) /* Requests field length */
- {
- switch(what)
- {
- case PCRE2_INFO_ALLOPTIONS:
- case PCRE2_INFO_ARGOPTIONS:
- case PCRE2_INFO_BACKREFMAX:
- case PCRE2_INFO_BSR:
- case PCRE2_INFO_CAPTURECOUNT:
- case PCRE2_INFO_DEPTHLIMIT:
- case PCRE2_INFO_EXTRAOPTIONS:
- case PCRE2_INFO_FIRSTCODETYPE:
- case PCRE2_INFO_FIRSTCODEUNIT:
- case PCRE2_INFO_HASBACKSLASHC:
- case PCRE2_INFO_HASCRORLF:
- case PCRE2_INFO_HEAPLIMIT:
- case PCRE2_INFO_JCHANGED:
- case PCRE2_INFO_LASTCODETYPE:
- case PCRE2_INFO_LASTCODEUNIT:
- case PCRE2_INFO_MATCHEMPTY:
- case PCRE2_INFO_MATCHLIMIT:
- case PCRE2_INFO_MAXLOOKBEHIND:
- case PCRE2_INFO_MINLENGTH:
- case PCRE2_INFO_NAMEENTRYSIZE:
- case PCRE2_INFO_NAMECOUNT:
- case PCRE2_INFO_NEWLINE:
- return sizeof(uint32_t);
- case PCRE2_INFO_FIRSTBITMAP:
- return sizeof(const uint8_t *);
- case PCRE2_INFO_JITSIZE:
- case PCRE2_INFO_SIZE:
- case PCRE2_INFO_FRAMESIZE:
- return sizeof(size_t);
- case PCRE2_INFO_NAMETABLE:
- return sizeof(PCRE2_SPTR);
- }
- }
- if (re == NULL) return PCRE2_ERROR_NULL;
- /* Check that the first field in the block is the magic number. If it is not,
- return with PCRE2_ERROR_BADMAGIC. */
- if (re->magic_number != MAGIC_NUMBER) return PCRE2_ERROR_BADMAGIC;
- /* Check that this pattern was compiled in the correct bit mode */
- if ((re->flags & (PCRE2_CODE_UNIT_WIDTH/8)) == 0) return PCRE2_ERROR_BADMODE;
- switch(what)
- {
- case PCRE2_INFO_ALLOPTIONS:
- *((uint32_t *)where) = re->overall_options;
- break;
- case PCRE2_INFO_ARGOPTIONS:
- *((uint32_t *)where) = re->compile_options;
- break;
- case PCRE2_INFO_BACKREFMAX:
- *((uint32_t *)where) = re->top_backref;
- break;
- case PCRE2_INFO_BSR:
- *((uint32_t *)where) = re->bsr_convention;
- break;
- case PCRE2_INFO_CAPTURECOUNT:
- *((uint32_t *)where) = re->top_bracket;
- break;
- case PCRE2_INFO_DEPTHLIMIT:
- *((uint32_t *)where) = re->limit_depth;
- if (re->limit_depth == UINT32_MAX) return PCRE2_ERROR_UNSET;
- break;
- case PCRE2_INFO_EXTRAOPTIONS:
- *((uint32_t *)where) = re->extra_options;
- break;
- case PCRE2_INFO_FIRSTCODETYPE:
- *((uint32_t *)where) = ((re->flags & PCRE2_FIRSTSET) != 0)? 1 :
- ((re->flags & PCRE2_STARTLINE) != 0)? 2 : 0;
- break;
- case PCRE2_INFO_FIRSTCODEUNIT:
- *((uint32_t *)where) = ((re->flags & PCRE2_FIRSTSET) != 0)?
- re->first_codeunit : 0;
- break;
- case PCRE2_INFO_FIRSTBITMAP:
- *((const uint8_t **)where) = ((re->flags & PCRE2_FIRSTMAPSET) != 0)?
- &(re->start_bitmap[0]) : NULL;
- break;
- case PCRE2_INFO_FRAMESIZE:
- *((size_t *)where) = offsetof(heapframe, ovector) +
- re->top_bracket * 2 * sizeof(PCRE2_SIZE);
- break;
- case PCRE2_INFO_HASBACKSLASHC:
- *((uint32_t *)where) = (re->flags & PCRE2_HASBKC) != 0;
- break;
- case PCRE2_INFO_HASCRORLF:
- *((uint32_t *)where) = (re->flags & PCRE2_HASCRORLF) != 0;
- break;
- case PCRE2_INFO_HEAPLIMIT:
- *((uint32_t *)where) = re->limit_heap;
- if (re->limit_heap == UINT32_MAX) return PCRE2_ERROR_UNSET;
- break;
- case PCRE2_INFO_JCHANGED:
- *((uint32_t *)where) = (re->flags & PCRE2_JCHANGED) != 0;
- break;
- case PCRE2_INFO_JITSIZE:
- #ifdef SUPPORT_JIT
- *((size_t *)where) = (re->executable_jit != NULL)?
- PRIV(jit_get_size)(re->executable_jit) : 0;
- #else
- *((size_t *)where) = 0;
- #endif
- break;
- case PCRE2_INFO_LASTCODETYPE:
- *((uint32_t *)where) = ((re->flags & PCRE2_LASTSET) != 0)? 1 : 0;
- break;
- case PCRE2_INFO_LASTCODEUNIT:
- *((uint32_t *)where) = ((re->flags & PCRE2_LASTSET) != 0)?
- re->last_codeunit : 0;
- break;
- case PCRE2_INFO_MATCHEMPTY:
- *((uint32_t *)where) = (re->flags & PCRE2_MATCH_EMPTY) != 0;
- break;
- case PCRE2_INFO_MATCHLIMIT:
- *((uint32_t *)where) = re->limit_match;
- if (re->limit_match == UINT32_MAX) return PCRE2_ERROR_UNSET;
- break;
- case PCRE2_INFO_MAXLOOKBEHIND:
- *((uint32_t *)where) = re->max_lookbehind;
- break;
- case PCRE2_INFO_MINLENGTH:
- *((uint32_t *)where) = re->minlength;
- break;
- case PCRE2_INFO_NAMEENTRYSIZE:
- *((uint32_t *)where) = re->name_entry_size;
- break;
- case PCRE2_INFO_NAMECOUNT:
- *((uint32_t *)where) = re->name_count;
- break;
- case PCRE2_INFO_NAMETABLE:
- *((PCRE2_SPTR *)where) = (PCRE2_SPTR)((char *)re + sizeof(pcre2_real_code));
- break;
- case PCRE2_INFO_NEWLINE:
- *((uint32_t *)where) = re->newline_convention;
- break;
- case PCRE2_INFO_SIZE:
- *((size_t *)where) = re->blocksize;
- break;
- default: return PCRE2_ERROR_BADOPTION;
- }
- return 0;
- }
- /*************************************************
- * Callout enumerator *
- *************************************************/
- /*
- Arguments:
- code points to compiled code
- callback function called for each callout block
- callout_data user data passed to the callback
- Returns: 0 when successfully completed
- < 0 on local error
- != 0 for callback error
- */
- PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
- pcre2_callout_enumerate(const pcre2_code *code,
- int (*callback)(pcre2_callout_enumerate_block *, void *), void *callout_data)
- {
- pcre2_real_code *re = (pcre2_real_code *)code;
- pcre2_callout_enumerate_block cb;
- PCRE2_SPTR cc;
- #ifdef SUPPORT_UNICODE
- BOOL utf;
- #endif
- if (re == NULL) return PCRE2_ERROR_NULL;
- #ifdef SUPPORT_UNICODE
- utf = (re->overall_options & PCRE2_UTF) != 0;
- #endif
- /* Check that the first field in the block is the magic number. If it is not,
- return with PCRE2_ERROR_BADMAGIC. */
- if (re->magic_number != MAGIC_NUMBER) return PCRE2_ERROR_BADMAGIC;
- /* Check that this pattern was compiled in the correct bit mode */
- if ((re->flags & (PCRE2_CODE_UNIT_WIDTH/8)) == 0) return PCRE2_ERROR_BADMODE;
- cb.version = 0;
- cc = (PCRE2_SPTR)((uint8_t *)re + sizeof(pcre2_real_code))
- + re->name_count * re->name_entry_size;
- while (TRUE)
- {
- int rc;
- switch (*cc)
- {
- case OP_END:
- return 0;
- case OP_CHAR:
- case OP_CHARI:
- case OP_NOT:
- case OP_NOTI:
- case OP_STAR:
- case OP_MINSTAR:
- case OP_PLUS:
- case OP_MINPLUS:
- case OP_QUERY:
- case OP_MINQUERY:
- case OP_UPTO:
- case OP_MINUPTO:
- case OP_EXACT:
- case OP_POSSTAR:
- case OP_POSPLUS:
- case OP_POSQUERY:
- case OP_POSUPTO:
- case OP_STARI:
- case OP_MINSTARI:
- case OP_PLUSI:
- case OP_MINPLUSI:
- case OP_QUERYI:
- case OP_MINQUERYI:
- case OP_UPTOI:
- case OP_MINUPTOI:
- case OP_EXACTI:
- case OP_POSSTARI:
- case OP_POSPLUSI:
- case OP_POSQUERYI:
- case OP_POSUPTOI:
- case OP_NOTSTAR:
- case OP_NOTMINSTAR:
- case OP_NOTPLUS:
- case OP_NOTMINPLUS:
- case OP_NOTQUERY:
- case OP_NOTMINQUERY:
- case OP_NOTUPTO:
- case OP_NOTMINUPTO:
- case OP_NOTEXACT:
- case OP_NOTPOSSTAR:
- case OP_NOTPOSPLUS:
- case OP_NOTPOSQUERY:
- case OP_NOTPOSUPTO:
- case OP_NOTSTARI:
- case OP_NOTMINSTARI:
- case OP_NOTPLUSI:
- case OP_NOTMINPLUSI:
- case OP_NOTQUERYI:
- case OP_NOTMINQUERYI:
- case OP_NOTUPTOI:
- case OP_NOTMINUPTOI:
- case OP_NOTEXACTI:
- case OP_NOTPOSSTARI:
- case OP_NOTPOSPLUSI:
- case OP_NOTPOSQUERYI:
- case OP_NOTPOSUPTOI:
- cc += PRIV(OP_lengths)[*cc];
- #ifdef SUPPORT_UNICODE
- if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
- #endif
- break;
- case OP_TYPESTAR:
- case OP_TYPEMINSTAR:
- case OP_TYPEPLUS:
- case OP_TYPEMINPLUS:
- case OP_TYPEQUERY:
- case OP_TYPEMINQUERY:
- case OP_TYPEUPTO:
- case OP_TYPEMINUPTO:
- case OP_TYPEEXACT:
- case OP_TYPEPOSSTAR:
- case OP_TYPEPOSPLUS:
- case OP_TYPEPOSQUERY:
- case OP_TYPEPOSUPTO:
- cc += PRIV(OP_lengths)[*cc];
- #ifdef SUPPORT_UNICODE
- if (cc[-1] == OP_PROP || cc[-1] == OP_NOTPROP) cc += 2;
- #endif
- break;
- #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
- case OP_XCLASS:
- cc += GET(cc, 1);
- break;
- #endif
- case OP_MARK:
- case OP_COMMIT_ARG:
- case OP_PRUNE_ARG:
- case OP_SKIP_ARG:
- case OP_THEN_ARG:
- cc += PRIV(OP_lengths)[*cc] + cc[1];
- break;
- case OP_CALLOUT:
- cb.pattern_position = GET(cc, 1);
- cb.next_item_length = GET(cc, 1 + LINK_SIZE);
- cb.callout_number = cc[1 + 2*LINK_SIZE];
- cb.callout_string_offset = 0;
- cb.callout_string_length = 0;
- cb.callout_string = NULL;
- rc = callback(&cb, callout_data);
- if (rc != 0) return rc;
- cc += PRIV(OP_lengths)[*cc];
- break;
- case OP_CALLOUT_STR:
- cb.pattern_position = GET(cc, 1);
- cb.next_item_length = GET(cc, 1 + LINK_SIZE);
- cb.callout_number = 0;
- cb.callout_string_offset = GET(cc, 1 + 3*LINK_SIZE);
- cb.callout_string_length =
- GET(cc, 1 + 2*LINK_SIZE) - (1 + 4*LINK_SIZE) - 2;
- cb.callout_string = cc + (1 + 4*LINK_SIZE) + 1;
- rc = callback(&cb, callout_data);
- if (rc != 0) return rc;
- cc += GET(cc, 1 + 2*LINK_SIZE);
- break;
- default:
- cc += PRIV(OP_lengths)[*cc];
- break;
- }
- }
- }
- /* End of pcre2_pattern_info.c */
|