123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547 |
- /*************************************************
- * Perl-Compatible Regular Expressions *
- *************************************************/
- /* PCRE is a library of functions to support regular expressions whose syntax
- and semantics are as close as possible to those of the Perl 5 language.
- Written by Philip Hazel
- Original API code Copyright (c) 1997-2012 University of Cambridge
- New API code Copyright (c) 2016-2018 University of Cambridge
- -----------------------------------------------------------------------------
- Redistribution and use in source and binary forms, with or without
- modification, are permitted provided that the following conditions are met:
- * Redistributions of source code must retain the above copyright notice,
- this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above copyright
- notice, this list of conditions and the following disclaimer in the
- documentation and/or other materials provided with the distribution.
- * Neither the name of the University of Cambridge nor the names of its
- contributors may be used to endorse or promote products derived from
- this software without specific prior written permission.
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- POSSIBILITY OF SUCH DAMAGE.
- -----------------------------------------------------------------------------
- */
- #ifdef HAVE_CONFIG_H
- #include "config.h"
- #endif
- #include "pcre2_internal.h"
- /*************************************************
- * Copy named captured string to given buffer *
- *************************************************/
- /* This function copies a single captured substring into a given buffer,
- identifying it by name. If the regex permits duplicate names, the first
- substring that is set is chosen.
- Arguments:
- match_data points to the match data
- stringname the name of the required substring
- buffer where to put the substring
- sizeptr the size of the buffer, updated to the size of the substring
- Returns: if successful: zero
- if not successful, a negative error code:
- (1) an error from nametable_scan()
- (2) an error from copy_bynumber()
- (3) PCRE2_ERROR_UNAVAILABLE: no group is in ovector
- (4) PCRE2_ERROR_UNSET: all named groups in ovector are unset
- */
- PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
- pcre2_substring_copy_byname(pcre2_match_data *match_data, PCRE2_SPTR stringname,
- PCRE2_UCHAR *buffer, PCRE2_SIZE *sizeptr)
- {
- PCRE2_SPTR first, last, entry;
- int failrc, entrysize;
- if (match_data->matchedby == PCRE2_MATCHEDBY_DFA_INTERPRETER)
- return PCRE2_ERROR_DFA_UFUNC;
- entrysize = pcre2_substring_nametable_scan(match_data->code, stringname,
- &first, &last);
- if (entrysize < 0) return entrysize;
- failrc = PCRE2_ERROR_UNAVAILABLE;
- for (entry = first; entry <= last; entry += entrysize)
- {
- uint32_t n = GET2(entry, 0);
- if (n < match_data->oveccount)
- {
- if (match_data->ovector[n*2] != PCRE2_UNSET)
- return pcre2_substring_copy_bynumber(match_data, n, buffer, sizeptr);
- failrc = PCRE2_ERROR_UNSET;
- }
- }
- return failrc;
- }
- /*************************************************
- * Copy numbered captured string to given buffer *
- *************************************************/
- /* This function copies a single captured substring into a given buffer,
- identifying it by number.
- Arguments:
- match_data points to the match data
- stringnumber the number of the required substring
- buffer where to put the substring
- sizeptr the size of the buffer, updated to the size of the substring
- Returns: if successful: 0
- if not successful, a negative error code:
- PCRE2_ERROR_NOMEMORY: buffer too small
- PCRE2_ERROR_NOSUBSTRING: no such substring
- PCRE2_ERROR_UNAVAILABLE: ovector too small
- PCRE2_ERROR_UNSET: substring is not set
- */
- PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
- pcre2_substring_copy_bynumber(pcre2_match_data *match_data,
- uint32_t stringnumber, PCRE2_UCHAR *buffer, PCRE2_SIZE *sizeptr)
- {
- int rc;
- PCRE2_SIZE size;
- rc = pcre2_substring_length_bynumber(match_data, stringnumber, &size);
- if (rc < 0) return rc;
- if (size + 1 > *sizeptr) return PCRE2_ERROR_NOMEMORY;
- memcpy(buffer, match_data->subject + match_data->ovector[stringnumber*2],
- CU2BYTES(size));
- buffer[size] = 0;
- *sizeptr = size;
- return 0;
- }
- /*************************************************
- * Extract named captured string *
- *************************************************/
- /* This function copies a single captured substring, identified by name, into
- new memory. If the regex permits duplicate names, the first substring that is
- set is chosen.
- Arguments:
- match_data pointer to match_data
- stringname the name of the required substring
- stringptr where to put the pointer to the new memory
- sizeptr where to put the length of the substring
- Returns: if successful: zero
- if not successful, a negative value:
- (1) an error from nametable_scan()
- (2) an error from get_bynumber()
- (3) PCRE2_ERROR_UNAVAILABLE: no group is in ovector
- (4) PCRE2_ERROR_UNSET: all named groups in ovector are unset
- */
- PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
- pcre2_substring_get_byname(pcre2_match_data *match_data,
- PCRE2_SPTR stringname, PCRE2_UCHAR **stringptr, PCRE2_SIZE *sizeptr)
- {
- PCRE2_SPTR first, last, entry;
- int failrc, entrysize;
- if (match_data->matchedby == PCRE2_MATCHEDBY_DFA_INTERPRETER)
- return PCRE2_ERROR_DFA_UFUNC;
- entrysize = pcre2_substring_nametable_scan(match_data->code, stringname,
- &first, &last);
- if (entrysize < 0) return entrysize;
- failrc = PCRE2_ERROR_UNAVAILABLE;
- for (entry = first; entry <= last; entry += entrysize)
- {
- uint32_t n = GET2(entry, 0);
- if (n < match_data->oveccount)
- {
- if (match_data->ovector[n*2] != PCRE2_UNSET)
- return pcre2_substring_get_bynumber(match_data, n, stringptr, sizeptr);
- failrc = PCRE2_ERROR_UNSET;
- }
- }
- return failrc;
- }
- /*************************************************
- * Extract captured string to new memory *
- *************************************************/
- /* This function copies a single captured substring into a piece of new
- memory.
- Arguments:
- match_data points to match data
- stringnumber the number of the required substring
- stringptr where to put a pointer to the new memory
- sizeptr where to put the size of the substring
- Returns: if successful: 0
- if not successful, a negative error code:
- PCRE2_ERROR_NOMEMORY: failed to get memory
- PCRE2_ERROR_NOSUBSTRING: no such substring
- PCRE2_ERROR_UNAVAILABLE: ovector too small
- PCRE2_ERROR_UNSET: substring is not set
- */
- PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
- pcre2_substring_get_bynumber(pcre2_match_data *match_data,
- uint32_t stringnumber, PCRE2_UCHAR **stringptr, PCRE2_SIZE *sizeptr)
- {
- int rc;
- PCRE2_SIZE size;
- PCRE2_UCHAR *yield;
- rc = pcre2_substring_length_bynumber(match_data, stringnumber, &size);
- if (rc < 0) return rc;
- yield = PRIV(memctl_malloc)(sizeof(pcre2_memctl) +
- (size + 1)*PCRE2_CODE_UNIT_WIDTH, (pcre2_memctl *)match_data);
- if (yield == NULL) return PCRE2_ERROR_NOMEMORY;
- yield = (PCRE2_UCHAR *)(((char *)yield) + sizeof(pcre2_memctl));
- memcpy(yield, match_data->subject + match_data->ovector[stringnumber*2],
- CU2BYTES(size));
- yield[size] = 0;
- *stringptr = yield;
- *sizeptr = size;
- return 0;
- }
- /*************************************************
- * Free memory obtained by get_substring *
- *************************************************/
- /*
- Argument: the result of a previous pcre2_substring_get_byxxx()
- Returns: nothing
- */
- PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
- pcre2_substring_free(PCRE2_UCHAR *string)
- {
- if (string != NULL)
- {
- pcre2_memctl *memctl = (pcre2_memctl *)((char *)string - sizeof(pcre2_memctl));
- memctl->free(memctl, memctl->memory_data);
- }
- }
- /*************************************************
- * Get length of a named substring *
- *************************************************/
- /* This function returns the length of a named captured substring. If the regex
- permits duplicate names, the first substring that is set is chosen.
- Arguments:
- match_data pointer to match data
- stringname the name of the required substring
- sizeptr where to put the length
- Returns: 0 if successful, else a negative error number
- */
- PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
- pcre2_substring_length_byname(pcre2_match_data *match_data,
- PCRE2_SPTR stringname, PCRE2_SIZE *sizeptr)
- {
- PCRE2_SPTR first, last, entry;
- int failrc, entrysize;
- if (match_data->matchedby == PCRE2_MATCHEDBY_DFA_INTERPRETER)
- return PCRE2_ERROR_DFA_UFUNC;
- entrysize = pcre2_substring_nametable_scan(match_data->code, stringname,
- &first, &last);
- if (entrysize < 0) return entrysize;
- failrc = PCRE2_ERROR_UNAVAILABLE;
- for (entry = first; entry <= last; entry += entrysize)
- {
- uint32_t n = GET2(entry, 0);
- if (n < match_data->oveccount)
- {
- if (match_data->ovector[n*2] != PCRE2_UNSET)
- return pcre2_substring_length_bynumber(match_data, n, sizeptr);
- failrc = PCRE2_ERROR_UNSET;
- }
- }
- return failrc;
- }
- /*************************************************
- * Get length of a numbered substring *
- *************************************************/
- /* This function returns the length of a captured substring. If the start is
- beyond the end (which can happen when \K is used in an assertion), it sets the
- length to zero.
- Arguments:
- match_data pointer to match data
- stringnumber the number of the required substring
- sizeptr where to put the length, if not NULL
- Returns: if successful: 0
- if not successful, a negative error code:
- PCRE2_ERROR_NOSUBSTRING: no such substring
- PCRE2_ERROR_UNAVAILABLE: ovector is too small
- PCRE2_ERROR_UNSET: substring is not set
- */
- PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
- pcre2_substring_length_bynumber(pcre2_match_data *match_data,
- uint32_t stringnumber, PCRE2_SIZE *sizeptr)
- {
- PCRE2_SIZE left, right;
- int count = match_data->rc;
- if (count == PCRE2_ERROR_PARTIAL)
- {
- if (stringnumber > 0) return PCRE2_ERROR_PARTIAL;
- count = 0;
- }
- else if (count < 0) return count; /* Match failed */
- if (match_data->matchedby != PCRE2_MATCHEDBY_DFA_INTERPRETER)
- {
- if (stringnumber > match_data->code->top_bracket)
- return PCRE2_ERROR_NOSUBSTRING;
- if (stringnumber >= match_data->oveccount)
- return PCRE2_ERROR_UNAVAILABLE;
- if (match_data->ovector[stringnumber*2] == PCRE2_UNSET)
- return PCRE2_ERROR_UNSET;
- }
- else /* Matched using pcre2_dfa_match() */
- {
- if (stringnumber >= match_data->oveccount) return PCRE2_ERROR_UNAVAILABLE;
- if (count != 0 && stringnumber >= (uint32_t)count) return PCRE2_ERROR_UNSET;
- }
- left = match_data->ovector[stringnumber*2];
- right = match_data->ovector[stringnumber*2+1];
- if (sizeptr != NULL) *sizeptr = (left > right)? 0 : right - left;
- return 0;
- }
- /*************************************************
- * Extract all captured strings to new memory *
- *************************************************/
- /* This function gets one chunk of memory and builds a list of pointers and all
- the captured substrings in it. A NULL pointer is put on the end of the list.
- The substrings are zero-terminated, but also, if the final argument is
- non-NULL, a list of lengths is also returned. This allows binary data to be
- handled.
- Arguments:
- match_data points to the match data
- listptr set to point to the list of pointers
- lengthsptr set to point to the list of lengths (may be NULL)
- Returns: if successful: 0
- if not successful, a negative error code:
- PCRE2_ERROR_NOMEMORY: failed to get memory,
- or a match failure code
- */
- PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
- pcre2_substring_list_get(pcre2_match_data *match_data, PCRE2_UCHAR ***listptr,
- PCRE2_SIZE **lengthsptr)
- {
- int i, count, count2;
- PCRE2_SIZE size;
- PCRE2_SIZE *lensp;
- pcre2_memctl *memp;
- PCRE2_UCHAR **listp;
- PCRE2_UCHAR *sp;
- PCRE2_SIZE *ovector;
- if ((count = match_data->rc) < 0) return count; /* Match failed */
- if (count == 0) count = match_data->oveccount; /* Ovector too small */
- count2 = 2*count;
- ovector = match_data->ovector;
- size = sizeof(pcre2_memctl) + sizeof(PCRE2_UCHAR *); /* For final NULL */
- if (lengthsptr != NULL) size += sizeof(PCRE2_SIZE) * count; /* For lengths */
- for (i = 0; i < count2; i += 2)
- {
- size += sizeof(PCRE2_UCHAR *) + CU2BYTES(1);
- if (ovector[i+1] > ovector[i]) size += CU2BYTES(ovector[i+1] - ovector[i]);
- }
- memp = PRIV(memctl_malloc)(size, (pcre2_memctl *)match_data);
- if (memp == NULL) return PCRE2_ERROR_NOMEMORY;
- *listptr = listp = (PCRE2_UCHAR **)((char *)memp + sizeof(pcre2_memctl));
- lensp = (PCRE2_SIZE *)((char *)listp + sizeof(PCRE2_UCHAR *) * (count + 1));
- if (lengthsptr == NULL)
- {
- sp = (PCRE2_UCHAR *)lensp;
- lensp = NULL;
- }
- else
- {
- *lengthsptr = lensp;
- sp = (PCRE2_UCHAR *)((char *)lensp + sizeof(PCRE2_SIZE) * count);
- }
- for (i = 0; i < count2; i += 2)
- {
- size = (ovector[i+1] > ovector[i])? (ovector[i+1] - ovector[i]) : 0;
- /* Size == 0 includes the case when the capture is unset. Avoid adding
- PCRE2_UNSET to match_data->subject because it overflows, even though with
- zero size calling memcpy() is harmless. */
- if (size != 0) memcpy(sp, match_data->subject + ovector[i], CU2BYTES(size));
- *listp++ = sp;
- if (lensp != NULL) *lensp++ = size;
- sp += size;
- *sp++ = 0;
- }
- *listp = NULL;
- return 0;
- }
- /*************************************************
- * Free memory obtained by substring_list_get *
- *************************************************/
- /*
- Argument: the result of a previous pcre2_substring_list_get()
- Returns: nothing
- */
- PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
- pcre2_substring_list_free(PCRE2_SPTR *list)
- {
- if (list != NULL)
- {
- pcre2_memctl *memctl = (pcre2_memctl *)((char *)list - sizeof(pcre2_memctl));
- memctl->free(memctl, memctl->memory_data);
- }
- }
- /*************************************************
- * Find (multiple) entries for named string *
- *************************************************/
- /* This function scans the nametable for a given name, using binary chop. It
- returns either two pointers to the entries in the table, or, if no pointers are
- given, the number of a unique group with the given name. If duplicate names are
- permitted, and the name is not unique, an error is generated.
- Arguments:
- code the compiled regex
- stringname the name whose entries required
- firstptr where to put the pointer to the first entry
- lastptr where to put the pointer to the last entry
- Returns: PCRE2_ERROR_NOSUBSTRING if the name is not found
- otherwise, if firstptr and lastptr are NULL:
- a group number for a unique substring
- else PCRE2_ERROR_NOUNIQUESUBSTRING
- otherwise:
- the length of each entry, having set firstptr and lastptr
- */
- PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
- pcre2_substring_nametable_scan(const pcre2_code *code, PCRE2_SPTR stringname,
- PCRE2_SPTR *firstptr, PCRE2_SPTR *lastptr)
- {
- uint16_t bot = 0;
- uint16_t top = code->name_count;
- uint16_t entrysize = code->name_entry_size;
- PCRE2_SPTR nametable = (PCRE2_SPTR)((char *)code + sizeof(pcre2_real_code));
- while (top > bot)
- {
- uint16_t mid = (top + bot) / 2;
- PCRE2_SPTR entry = nametable + entrysize*mid;
- int c = PRIV(strcmp)(stringname, entry + IMM2_SIZE);
- if (c == 0)
- {
- PCRE2_SPTR first;
- PCRE2_SPTR last;
- PCRE2_SPTR lastentry;
- lastentry = nametable + entrysize * (code->name_count - 1);
- first = last = entry;
- while (first > nametable)
- {
- if (PRIV(strcmp)(stringname, (first - entrysize + IMM2_SIZE)) != 0) break;
- first -= entrysize;
- }
- while (last < lastentry)
- {
- if (PRIV(strcmp)(stringname, (last + entrysize + IMM2_SIZE)) != 0) break;
- last += entrysize;
- }
- if (firstptr == NULL) return (first == last)?
- (int)GET2(entry, 0) : PCRE2_ERROR_NOUNIQUESUBSTRING;
- *firstptr = first;
- *lastptr = last;
- return entrysize;
- }
- if (c > 0) bot = mid + 1; else top = mid;
- }
- return PCRE2_ERROR_NOSUBSTRING;
- }
- /*************************************************
- * Find number for named string *
- *************************************************/
- /* This function is a convenience wrapper for pcre2_substring_nametable_scan()
- when it is known that names are unique. If there are duplicate names, it is not
- defined which number is returned.
- Arguments:
- code the compiled regex
- stringname the name whose number is required
- Returns: the number of the named parenthesis, or a negative number
- PCRE2_ERROR_NOSUBSTRING if not found
- PCRE2_ERROR_NOUNIQUESUBSTRING if not unique
- */
- PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
- pcre2_substring_number_from_name(const pcre2_code *code,
- PCRE2_SPTR stringname)
- {
- return pcre2_substring_nametable_scan(code, stringname, NULL, NULL);
- }
- /* End of pcre2_substring.c */
|