123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148 |
- /*************************************************
- * Perl-Compatible Regular Expressions *
- *************************************************/
- /* PCRE is a library of functions to support regular expressions whose syntax
- and semantics are as close as possible to those of the Perl 5 language.
- Written by Philip Hazel
- Original API code Copyright (c) 1997-2012 University of Cambridge
- New API code Copyright (c) 2016-2019 University of Cambridge
- -----------------------------------------------------------------------------
- Redistribution and use in source and binary forms, with or without
- modification, are permitted provided that the following conditions are met:
- * Redistributions of source code must retain the above copyright notice,
- this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above copyright
- notice, this list of conditions and the following disclaimer in the
- documentation and/or other materials provided with the distribution.
- * Neither the name of the University of Cambridge nor the names of its
- contributors may be used to endorse or promote products derived from
- this software without specific prior written permission.
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- POSSIBILITY OF SUCH DAMAGE.
- -----------------------------------------------------------------------------
- */
- /* This module contains an internal function that is used to match a Unicode
- extended grapheme sequence. It is used by both pcre2_match() and
- pcre2_def_match(). However, it is called only when Unicode support is being
- compiled. Nevertheless, we provide a dummy function when there is no Unicode
- support, because some compilers do not like functionless source files. */
- #ifdef HAVE_CONFIG_H
- #include "config.h"
- #endif
- #include "pcre2_internal.h"
- /* Dummy function */
- #ifndef SUPPORT_UNICODE
- PCRE2_SPTR
- PRIV(extuni)(uint32_t c, PCRE2_SPTR eptr, PCRE2_SPTR start_subject,
- PCRE2_SPTR end_subject, BOOL utf, int *xcount)
- {
- (void)c;
- (void)eptr;
- (void)start_subject;
- (void)end_subject;
- (void)utf;
- (void)xcount;
- return NULL;
- }
- #else
- /*************************************************
- * Match an extended grapheme sequence *
- *************************************************/
- /*
- Arguments:
- c the first character
- eptr pointer to next character
- start_subject pointer to start of subject
- end_subject pointer to end of subject
- utf TRUE if in UTF mode
- xcount pointer to count of additional characters,
- or NULL if count not needed
- Returns: pointer after the end of the sequence
- */
- PCRE2_SPTR
- PRIV(extuni)(uint32_t c, PCRE2_SPTR eptr, PCRE2_SPTR start_subject,
- PCRE2_SPTR end_subject, BOOL utf, int *xcount)
- {
- int lgb = UCD_GRAPHBREAK(c);
- while (eptr < end_subject)
- {
- int rgb;
- int len = 1;
- if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
- rgb = UCD_GRAPHBREAK(c);
- if ((PRIV(ucp_gbtable)[lgb] & (1u << rgb)) == 0) break;
- /* Not breaking between Regional Indicators is allowed only if there
- are an even number of preceding RIs. */
- if (lgb == ucp_gbRegionalIndicator && rgb == ucp_gbRegionalIndicator)
- {
- int ricount = 0;
- PCRE2_SPTR bptr = eptr - 1;
- if (utf) BACKCHAR(bptr);
- /* bptr is pointing to the left-hand character */
- while (bptr > start_subject)
- {
- bptr--;
- if (utf)
- {
- BACKCHAR(bptr);
- GETCHAR(c, bptr);
- }
- else
- c = *bptr;
- if (UCD_GRAPHBREAK(c) != ucp_gbRegionalIndicator) break;
- ricount++;
- }
- if ((ricount & 1) != 0) break; /* Grapheme break required */
- }
- /* If Extend or ZWJ follows Extended_Pictographic, do not update lgb; this
- allows any number of them before a following Extended_Pictographic. */
- if ((rgb != ucp_gbExtend && rgb != ucp_gbZWJ) ||
- lgb != ucp_gbExtended_Pictographic)
- lgb = rgb;
- eptr += len;
- if (xcount != NULL) *xcount += 1;
- }
- return eptr;
- }
- #endif /* SUPPORT_UNICODE */
- /* End of pcre2_extuni.c */
|