123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243 |
- /*************************************************
- * Perl-Compatible Regular Expressions *
- *************************************************/
- /* PCRE is a library of functions to support regular expressions whose syntax
- and semantics are as close as possible to those of the Perl 5 language.
- Written by Philip Hazel
- Original API code Copyright (c) 1997-2012 University of Cambridge
- New API code Copyright (c) 2016 University of Cambridge
- -----------------------------------------------------------------------------
- Redistribution and use in source and binary forms, with or without
- modification, are permitted provided that the following conditions are met:
- * Redistributions of source code must retain the above copyright notice,
- this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above copyright
- notice, this list of conditions and the following disclaimer in the
- documentation and/or other materials provided with the distribution.
- * Neither the name of the University of Cambridge nor the names of its
- contributors may be used to endorse or promote products derived from
- this software without specific prior written permission.
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- POSSIBILITY OF SUCH DAMAGE.
- -----------------------------------------------------------------------------
- */
- /* This module contains internal functions for testing newlines when more than
- one kind of newline is to be recognized. When a newline is found, its length is
- returned. In principle, we could implement several newline "types", each
- referring to a different set of newline characters. At present, PCRE2 supports
- only NLTYPE_FIXED, which gets handled without these functions, NLTYPE_ANYCRLF,
- and NLTYPE_ANY. The full list of Unicode newline characters is taken from
- http://unicode.org/unicode/reports/tr18/. */
- #ifdef HAVE_CONFIG_H
- #include "config.h"
- #endif
- #include "pcre2_internal.h"
- /*************************************************
- * Check for newline at given position *
- *************************************************/
- /* This function is called only via the IS_NEWLINE macro, which does so only
- when the newline type is NLTYPE_ANY or NLTYPE_ANYCRLF. The case of a fixed
- newline (NLTYPE_FIXED) is handled inline. It is guaranteed that the code unit
- pointed to by ptr is less than the end of the string.
- Arguments:
- ptr pointer to possible newline
- type the newline type
- endptr pointer to the end of the string
- lenptr where to return the length
- utf TRUE if in utf mode
- Returns: TRUE or FALSE
- */
- BOOL
- PRIV(is_newline)(PCRE2_SPTR ptr, uint32_t type, PCRE2_SPTR endptr,
- uint32_t *lenptr, BOOL utf)
- {
- uint32_t c;
- #ifdef SUPPORT_UNICODE
- if (utf) { GETCHAR(c, ptr); } else c = *ptr;
- #else
- (void)utf;
- c = *ptr;
- #endif /* SUPPORT_UNICODE */
- if (type == NLTYPE_ANYCRLF) switch(c)
- {
- case CHAR_LF:
- *lenptr = 1;
- return TRUE;
- case CHAR_CR:
- *lenptr = (ptr < endptr - 1 && ptr[1] == CHAR_LF)? 2 : 1;
- return TRUE;
- default:
- return FALSE;
- }
- /* NLTYPE_ANY */
- else switch(c)
- {
- #ifdef EBCDIC
- case CHAR_NEL:
- #endif
- case CHAR_LF:
- case CHAR_VT:
- case CHAR_FF:
- *lenptr = 1;
- return TRUE;
- case CHAR_CR:
- *lenptr = (ptr < endptr - 1 && ptr[1] == CHAR_LF)? 2 : 1;
- return TRUE;
- #ifndef EBCDIC
- #if PCRE2_CODE_UNIT_WIDTH == 8
- case CHAR_NEL:
- *lenptr = utf? 2 : 1;
- return TRUE;
- case 0x2028: /* LS */
- case 0x2029: /* PS */
- *lenptr = 3;
- return TRUE;
- #else /* 16-bit or 32-bit code units */
- case CHAR_NEL:
- case 0x2028: /* LS */
- case 0x2029: /* PS */
- *lenptr = 1;
- return TRUE;
- #endif
- #endif /* Not EBCDIC */
- default:
- return FALSE;
- }
- }
- /*************************************************
- * Check for newline at previous position *
- *************************************************/
- /* This function is called only via the WAS_NEWLINE macro, which does so only
- when the newline type is NLTYPE_ANY or NLTYPE_ANYCRLF. The case of a fixed
- newline (NLTYPE_FIXED) is handled inline. It is guaranteed that the initial
- value of ptr is greater than the start of the string that is being processed.
- Arguments:
- ptr pointer to possible newline
- type the newline type
- startptr pointer to the start of the string
- lenptr where to return the length
- utf TRUE if in utf mode
- Returns: TRUE or FALSE
- */
- BOOL
- PRIV(was_newline)(PCRE2_SPTR ptr, uint32_t type, PCRE2_SPTR startptr,
- uint32_t *lenptr, BOOL utf)
- {
- uint32_t c;
- ptr--;
- #ifdef SUPPORT_UNICODE
- if (utf)
- {
- BACKCHAR(ptr);
- GETCHAR(c, ptr);
- }
- else c = *ptr;
- #else
- (void)utf;
- c = *ptr;
- #endif /* SUPPORT_UNICODE */
- if (type == NLTYPE_ANYCRLF) switch(c)
- {
- case CHAR_LF:
- *lenptr = (ptr > startptr && ptr[-1] == CHAR_CR)? 2 : 1;
- return TRUE;
- case CHAR_CR:
- *lenptr = 1;
- return TRUE;
- default:
- return FALSE;
- }
- /* NLTYPE_ANY */
- else switch(c)
- {
- case CHAR_LF:
- *lenptr = (ptr > startptr && ptr[-1] == CHAR_CR)? 2 : 1;
- return TRUE;
- #ifdef EBCDIC
- case CHAR_NEL:
- #endif
- case CHAR_VT:
- case CHAR_FF:
- case CHAR_CR:
- *lenptr = 1;
- return TRUE;
- #ifndef EBCDIC
- #if PCRE2_CODE_UNIT_WIDTH == 8
- case CHAR_NEL:
- *lenptr = utf? 2 : 1;
- return TRUE;
- case 0x2028: /* LS */
- case 0x2029: /* PS */
- *lenptr = 3;
- return TRUE;
- #else /* 16-bit or 32-bit code units */
- case CHAR_NEL:
- case 0x2028: /* LS */
- case 0x2029: /* PS */
- *lenptr = 1;
- return TRUE;
- #endif
- #endif /* Not EBCDIC */
- default:
- return FALSE;
- }
- }
- /* End of pcre2_newline.c */
|