123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548 |
- /* Distributed under the OSI-approved BSD 3-Clause License. See accompanying
- file Copyright.txt or https://cmake.org/licensing#kwsys for details. */
- // Original Copyright notice:
- // Copyright (C) 1991 Texas Instruments Incorporated.
- //
- // Permission is granted to any individual or institution to use, copy, modify,
- // and distribute this software, provided that this complete copyright and
- // permission notice is maintained, intact, in all copies and supporting
- // documentation.
- //
- // Texas Instruments Incorporated provides this software "as is" without
- // express or implied warranty.
- //
- // Created: MNF 06/13/89 Initial Design and Implementation
- // Updated: LGO 08/09/89 Inherit from Generic
- // Updated: MBN 09/07/89 Added conditional exception handling
- // Updated: MBN 12/15/89 Sprinkled "const" qualifiers all over the place!
- // Updated: DLS 03/22/91 New lite version
- //
- #ifndef cmsys_RegularExpression_hxx
- #define cmsys_RegularExpression_hxx
- #include <cmsys/Configure.h>
- #include <cmsys/Configure.hxx>
- #include <string>
- /* Disable useless Borland warnings. KWSys tries not to force things
- on its includers, but there is no choice here. */
- #if defined(__BORLANDC__)
- #pragma warn - 8027 /* function not inlined. */
- #endif
- namespace cmsys {
- // Forward declaration
- class RegularExpression;
- /** \class RegularExpressionMatch
- * \brief Stores the pattern matches of a RegularExpression
- */
- class cmsys_EXPORT RegularExpressionMatch
- {
- public:
- RegularExpressionMatch();
- bool isValid() const;
- void clear();
- std::string::size_type start() const;
- std::string::size_type end() const;
- std::string::size_type start(int n) const;
- std::string::size_type end(int n) const;
- std::string match(int n) const;
- enum
- {
- NSUBEXP = 10
- };
- private:
- friend class RegularExpression;
- const char* startp[NSUBEXP];
- const char* endp[NSUBEXP];
- const char* searchstring;
- };
- /**
- * \brief Creates an invalid match object
- */
- inline RegularExpressionMatch::RegularExpressionMatch()
- {
- startp[0] = 0;
- endp[0] = 0;
- searchstring = 0;
- }
- /**
- * \brief Returns true if the match pointers are valid
- */
- inline bool RegularExpressionMatch::isValid() const
- {
- return (this->startp[0] != 0);
- }
- /**
- * \brief Resets to the (invalid) construction state.
- */
- inline void RegularExpressionMatch::clear()
- {
- startp[0] = 0;
- endp[0] = 0;
- searchstring = 0;
- }
- /**
- * \brief Returns the start index of the full match.
- */
- inline std::string::size_type RegularExpressionMatch::start() const
- {
- return static_cast<std::string::size_type>(this->startp[0] - searchstring);
- }
- /**
- * \brief Returns the end index of the full match.
- */
- inline std::string::size_type RegularExpressionMatch::end() const
- {
- return static_cast<std::string::size_type>(this->endp[0] - searchstring);
- }
- /**
- * \brief Returns the start index of nth submatch.
- * start(0) is the start of the full match.
- */
- inline std::string::size_type RegularExpressionMatch::start(int n) const
- {
- return static_cast<std::string::size_type>(this->startp[n] -
- this->searchstring);
- }
- /**
- * \brief Returns the end index of nth submatch.
- * end(0) is the end of the full match.
- */
- inline std::string::size_type RegularExpressionMatch::end(int n) const
- {
- return static_cast<std::string::size_type>(this->endp[n] -
- this->searchstring);
- }
- /**
- * \brief Returns the nth submatch as a string.
- */
- inline std::string RegularExpressionMatch::match(int n) const
- {
- if (this->startp[n] == 0) {
- return std::string();
- } else {
- return std::string(this->startp[n], static_cast<std::string::size_type>(
- this->endp[n] - this->startp[n]));
- }
- }
- /** \class RegularExpression
- * \brief Implements pattern matching with regular expressions.
- *
- * This is the header file for the regular expression class. An object of
- * this class contains a regular expression, in a special "compiled" format.
- * This compiled format consists of several slots all kept as the objects
- * private data. The RegularExpression class provides a convenient way to
- * represent regular expressions. It makes it easy to search for the same
- * regular expression in many different strings without having to compile a
- * string to regular expression format more than necessary.
- *
- * This class implements pattern matching via regular expressions.
- * A regular expression allows a programmer to specify complex
- * patterns that can be searched for and matched against the
- * character string of a string object. In its simplest form, a
- * regular expression is a sequence of characters used to
- * search for exact character matches. However, many times the
- * exact sequence to be found is not known, or only a match at
- * the beginning or end of a string is desired. The RegularExpression regu-
- * lar expression class implements regular expression pattern
- * matching as is found and implemented in many UNIX commands
- * and utilities.
- *
- * Example: The perl code
- *
- * $filename =~ m"([a-z]+)\.cc";
- * print $1;
- *
- * Is written as follows in C++
- *
- * RegularExpression re("([a-z]+)\\.cc");
- * re.find(filename);
- * cerr << re.match(1);
- *
- *
- * The regular expression class provides a convenient mechanism
- * for specifying and manipulating regular expressions. The
- * regular expression object allows specification of such pat-
- * terns by using the following regular expression metacharac-
- * ters:
- *
- * ^ Matches at beginning of a line
- *
- * $ Matches at end of a line
- *
- * . Matches any single character
- *
- * [ ] Matches any character(s) inside the brackets
- *
- * [^ ] Matches any character(s) not inside the brackets
- *
- * - Matches any character in range on either side of a dash
- *
- * * Matches preceding pattern zero or more times
- *
- * + Matches preceding pattern one or more times
- *
- * ? Matches preceding pattern zero or once only
- *
- * () Saves a matched expression and uses it in a later match
- *
- * Note that more than one of these metacharacters can be used
- * in a single regular expression in order to create complex
- * search patterns. For example, the pattern [^ab1-9] says to
- * match any character sequence that does not begin with the
- * characters "ab" followed by numbers in the series one
- * through nine.
- *
- * There are three constructors for RegularExpression. One just creates an
- * empty RegularExpression object. Another creates a RegularExpression
- * object and initializes it with a regular expression that is given in the
- * form of a char*. The third takes a reference to a RegularExpression
- * object as an argument and creates an object initialized with the
- * information from the given RegularExpression object.
- *
- * The find member function finds the first occurrence of the regular
- * expression of that object in the string given to find as an argument. Find
- * returns a boolean, and if true, mutates the private data appropriately.
- * Find sets pointers to the beginning and end of the thing last found, they
- * are pointers into the actual string that was searched. The start and end
- * member functions return indices into the searched string that correspond
- * to the beginning and end pointers respectively. The compile member
- * function takes a char* and puts the compiled version of the char* argument
- * into the object's private data fields. The == and != operators only check
- * the to see if the compiled regular expression is the same, and the
- * deep_equal functions also checks to see if the start and end pointers are
- * the same. The is_valid function returns false if program is set to NULL,
- * (i.e. there is no valid compiled exression). The set_invalid function sets
- * the program to NULL (Warning: this deletes the compiled expression). The
- * following examples may help clarify regular expression usage:
- *
- * * The regular expression "^hello" matches a "hello" only at the
- * beginning of a line. It would match "hello there" but not "hi,
- * hello there".
- *
- * * The regular expression "long$" matches a "long" only at the end
- * of a line. It would match "so long\0", but not "long ago".
- *
- * * The regular expression "t..t..g" will match anything that has a
- * "t" then any two characters, another "t", any two characters and
- * then a "g". It will match "testing", or "test again" but would
- * not match "toasting"
- *
- * * The regular expression "[1-9ab]" matches any number one through
- * nine, and the characters "a" and "b". It would match "hello 1"
- * or "begin", but would not match "no-match".
- *
- * * The regular expression "[^1-9ab]" matches any character that is
- * not a number one through nine, or an "a" or "b". It would NOT
- * match "hello 1" or "begin", but would match "no-match".
- *
- * * The regular expression "br* " matches something that begins with
- * a "b", is followed by zero or more "r"s, and ends in a space. It
- * would match "brrrrr ", and "b ", but would not match "brrh ".
- *
- * * The regular expression "br+ " matches something that begins with
- * a "b", is followed by one or more "r"s, and ends in a space. It
- * would match "brrrrr ", and "br ", but would not match "b " or
- * "brrh ".
- *
- * * The regular expression "br? " matches something that begins with
- * a "b", is followed by zero or one "r"s, and ends in a space. It
- * would match "br ", and "b ", but would not match "brrrr " or
- * "brrh ".
- *
- * * The regular expression "(..p)b" matches something ending with pb
- * and beginning with whatever the two characters before the first p
- * encounterd in the line were. It would find "repb" in "rep drepa
- * qrepb". The regular expression "(..p)a" would find "repa qrepb"
- * in "rep drepa qrepb"
- *
- * * The regular expression "d(..p)" matches something ending with p,
- * beginning with d, and having two characters in between that are
- * the same as the two characters before the first p encounterd in
- * the line. It would match "drepa qrepb" in "rep drepa qrepb".
- *
- * All methods of RegularExpression can be called simultaneously from
- * different threads but only if each invocation uses an own instance of
- * RegularExpression.
- */
- class cmsys_EXPORT RegularExpression
- {
- public:
- /**
- * Instantiate RegularExpression with program=NULL.
- */
- inline RegularExpression();
- /**
- * Instantiate RegularExpression with compiled char*.
- */
- inline RegularExpression(char const*);
- /**
- * Instantiate RegularExpression as a copy of another regular expression.
- */
- RegularExpression(RegularExpression const&);
- /**
- * Instantiate RegularExpression with compiled string.
- */
- inline RegularExpression(std::string const&);
- /**
- * Destructor.
- */
- inline ~RegularExpression();
- /**
- * Compile a regular expression into internal code
- * for later pattern matching.
- */
- bool compile(char const*);
- /**
- * Compile a regular expression into internal code
- * for later pattern matching.
- */
- inline bool compile(std::string const&);
- /**
- * Matches the regular expression to the given string.
- * Returns true if found, and sets start and end indexes
- * in the RegularExpressionMatch instance accordingly.
- *
- * This method is thread safe when called with different
- * RegularExpressionMatch instances.
- */
- bool find(char const*, RegularExpressionMatch&) const;
- /**
- * Matches the regular expression to the given string.
- * Returns true if found, and sets start and end indexes accordingly.
- */
- inline bool find(char const*);
- /**
- * Matches the regular expression to the given std string.
- * Returns true if found, and sets start and end indexes accordingly.
- */
- inline bool find(std::string const&);
- /**
- * Match indices
- */
- inline RegularExpressionMatch const& regMatch() const;
- inline std::string::size_type start() const;
- inline std::string::size_type end() const;
- inline std::string::size_type start(int n) const;
- inline std::string::size_type end(int n) const;
- /**
- * Match strings
- */
- inline std::string match(int n) const;
- /**
- * Copy the given regular expression.
- */
- RegularExpression& operator=(const RegularExpression& rxp);
- /**
- * Returns true if two regular expressions have the same
- * compiled program for pattern matching.
- */
- bool operator==(RegularExpression const&) const;
- /**
- * Returns true if two regular expressions have different
- * compiled program for pattern matching.
- */
- inline bool operator!=(RegularExpression const&) const;
- /**
- * Returns true if have the same compiled regular expressions
- * and the same start and end pointers.
- */
- bool deep_equal(RegularExpression const&) const;
- /**
- * True if the compiled regexp is valid.
- */
- inline bool is_valid() const;
- /**
- * Marks the regular expression as invalid.
- */
- inline void set_invalid();
- private:
- RegularExpressionMatch regmatch;
- char regstart; // Internal use only
- char reganch; // Internal use only
- const char* regmust; // Internal use only
- std::string::size_type regmlen; // Internal use only
- char* program;
- int progsize;
- };
- /**
- * Create an empty regular expression.
- */
- inline RegularExpression::RegularExpression()
- {
- this->program = 0;
- }
- /**
- * Creates a regular expression from string s, and
- * compiles s.
- */
- inline RegularExpression::RegularExpression(const char* s)
- {
- this->program = 0;
- if (s) {
- this->compile(s);
- }
- }
- /**
- * Creates a regular expression from string s, and
- * compiles s.
- */
- inline RegularExpression::RegularExpression(const std::string& s)
- {
- this->program = 0;
- this->compile(s);
- }
- /**
- * Destroys and frees space allocated for the regular expression.
- */
- inline RegularExpression::~RegularExpression()
- {
- //#ifndef _WIN32
- delete[] this->program;
- //#endif
- }
- /**
- * Compile a regular expression into internal code
- * for later pattern matching.
- */
- inline bool RegularExpression::compile(std::string const& s)
- {
- return this->compile(s.c_str());
- }
- /**
- * Matches the regular expression to the given std string.
- * Returns true if found, and sets start and end indexes accordingly.
- */
- inline bool RegularExpression::find(const char* s)
- {
- return this->find(s, this->regmatch);
- }
- /**
- * Matches the regular expression to the given std string.
- * Returns true if found, and sets start and end indexes accordingly.
- */
- inline bool RegularExpression::find(std::string const& s)
- {
- return this->find(s.c_str());
- }
- /**
- * Returns the internal match object
- */
- inline RegularExpressionMatch const& RegularExpression::regMatch() const
- {
- return this->regmatch;
- }
- /**
- * Returns the start index of the full match.
- */
- inline std::string::size_type RegularExpression::start() const
- {
- return regmatch.start();
- }
- /**
- * Returns the end index of the full match.
- */
- inline std::string::size_type RegularExpression::end() const
- {
- return regmatch.end();
- }
- /**
- * Return start index of nth submatch. start(0) is the start of the full match.
- */
- inline std::string::size_type RegularExpression::start(int n) const
- {
- return regmatch.start(n);
- }
- /**
- * Return end index of nth submatch. end(0) is the end of the full match.
- */
- inline std::string::size_type RegularExpression::end(int n) const
- {
- return regmatch.end(n);
- }
- /**
- * Return nth submatch as a string.
- */
- inline std::string RegularExpression::match(int n) const
- {
- return regmatch.match(n);
- }
- /**
- * Returns true if two regular expressions have different
- * compiled program for pattern matching.
- */
- inline bool RegularExpression::operator!=(const RegularExpression& r) const
- {
- return (!(*this == r));
- }
- /**
- * Returns true if a valid regular expression is compiled
- * and ready for pattern matching.
- */
- inline bool RegularExpression::is_valid() const
- {
- return (this->program != 0);
- }
- inline void RegularExpression::set_invalid()
- {
- //#ifndef _WIN32
- delete[] this->program;
- //#endif
- this->program = 0;
- }
- } // namespace cmsys
- #endif
|