gregex.h 28 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610
  1. /* GRegex -- regular expression API wrapper around PCRE.
  2. *
  3. * Copyright (C) 1999, 2000 Scott Wimer
  4. * Copyright (C) 2004, Matthias Clasen <mclasen@redhat.com>
  5. * Copyright (C) 2005 - 2007, Marco Barisione <marco@barisione.org>
  6. *
  7. * This library is free software; you can redistribute it and/or
  8. * modify it under the terms of the GNU Lesser General Public
  9. * License as published by the Free Software Foundation; either
  10. * version 2.1 of the License, or (at your option) any later version.
  11. *
  12. * This library is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  15. * Lesser General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU Lesser General Public
  18. * License along with this library; if not, write to the Free Software
  19. * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
  20. */
  21. #ifndef __G_REGEX_H__
  22. #define __G_REGEX_H__
  23. #if !defined (__GLIB_H_INSIDE__) && !defined (GLIB_COMPILATION)
  24. #error "Only <glib.h> can be included directly."
  25. #endif
  26. #include <glib/gerror.h>
  27. #include <glib/gstring.h>
  28. G_BEGIN_DECLS
  29. /**
  30. * GRegexError:
  31. * @G_REGEX_ERROR_COMPILE: Compilation of the regular expression failed.
  32. * @G_REGEX_ERROR_OPTIMIZE: Optimization of the regular expression failed.
  33. * @G_REGEX_ERROR_REPLACE: Replacement failed due to an ill-formed replacement
  34. * string.
  35. * @G_REGEX_ERROR_MATCH: The match process failed.
  36. * @G_REGEX_ERROR_INTERNAL: Internal error of the regular expression engine.
  37. * Since 2.16
  38. * @G_REGEX_ERROR_STRAY_BACKSLASH: "\\" at end of pattern. Since 2.16
  39. * @G_REGEX_ERROR_MISSING_CONTROL_CHAR: "\\c" at end of pattern. Since 2.16
  40. * @G_REGEX_ERROR_UNRECOGNIZED_ESCAPE: Unrecognized character follows "\\".
  41. * Since 2.16
  42. * @G_REGEX_ERROR_QUANTIFIERS_OUT_OF_ORDER: Numbers out of order in "{}"
  43. * quantifier. Since 2.16
  44. * @G_REGEX_ERROR_QUANTIFIER_TOO_BIG: Number too big in "{}" quantifier.
  45. * Since 2.16
  46. * @G_REGEX_ERROR_UNTERMINATED_CHARACTER_CLASS: Missing terminating "]" for
  47. * character class. Since 2.16
  48. * @G_REGEX_ERROR_INVALID_ESCAPE_IN_CHARACTER_CLASS: Invalid escape sequence
  49. * in character class. Since 2.16
  50. * @G_REGEX_ERROR_RANGE_OUT_OF_ORDER: Range out of order in character class.
  51. * Since 2.16
  52. * @G_REGEX_ERROR_NOTHING_TO_REPEAT: Nothing to repeat. Since 2.16
  53. * @G_REGEX_ERROR_UNRECOGNIZED_CHARACTER: Unrecognized character after "(?",
  54. * "(?<" or "(?P". Since 2.16
  55. * @G_REGEX_ERROR_POSIX_NAMED_CLASS_OUTSIDE_CLASS: POSIX named classes are
  56. * supported only within a class. Since 2.16
  57. * @G_REGEX_ERROR_UNMATCHED_PARENTHESIS: Missing terminating ")" or ")"
  58. * without opening "(". Since 2.16
  59. * @G_REGEX_ERROR_INEXISTENT_SUBPATTERN_REFERENCE: Reference to non-existent
  60. * subpattern. Since 2.16
  61. * @G_REGEX_ERROR_UNTERMINATED_COMMENT: Missing terminating ")" after comment.
  62. * Since 2.16
  63. * @G_REGEX_ERROR_EXPRESSION_TOO_LARGE: Regular expression too large.
  64. * Since 2.16
  65. * @G_REGEX_ERROR_MEMORY_ERROR: Failed to get memory. Since 2.16
  66. * @G_REGEX_ERROR_VARIABLE_LENGTH_LOOKBEHIND: Lookbehind assertion is not
  67. * fixed length. Since 2.16
  68. * @G_REGEX_ERROR_MALFORMED_CONDITION: Malformed number or name after "(?(".
  69. * Since 2.16
  70. * @G_REGEX_ERROR_TOO_MANY_CONDITIONAL_BRANCHES: Conditional group contains
  71. * more than two branches. Since 2.16
  72. * @G_REGEX_ERROR_ASSERTION_EXPECTED: Assertion expected after "(?(".
  73. * Since 2.16
  74. * @G_REGEX_ERROR_UNKNOWN_POSIX_CLASS_NAME: Unknown POSIX class name.
  75. * Since 2.16
  76. * @G_REGEX_ERROR_POSIX_COLLATING_ELEMENTS_NOT_SUPPORTED: POSIX collating
  77. * elements are not supported. Since 2.16
  78. * @G_REGEX_ERROR_HEX_CODE_TOO_LARGE: Character value in "\\x{...}" sequence
  79. * is too large. Since 2.16
  80. * @G_REGEX_ERROR_INVALID_CONDITION: Invalid condition "(?(0)". Since 2.16
  81. * @G_REGEX_ERROR_SINGLE_BYTE_MATCH_IN_LOOKBEHIND: \\C not allowed in
  82. * lookbehind assertion. Since 2.16
  83. * @G_REGEX_ERROR_INFINITE_LOOP: Recursive call could loop indefinitely.
  84. * Since 2.16
  85. * @G_REGEX_ERROR_MISSING_SUBPATTERN_NAME_TERMINATOR: Missing terminator
  86. * in subpattern name. Since 2.16
  87. * @G_REGEX_ERROR_DUPLICATE_SUBPATTERN_NAME: Two named subpatterns have
  88. * the same name. Since 2.16
  89. * @G_REGEX_ERROR_MALFORMED_PROPERTY: Malformed "\\P" or "\\p" sequence.
  90. * Since 2.16
  91. * @G_REGEX_ERROR_UNKNOWN_PROPERTY: Unknown property name after "\\P" or
  92. * "\\p". Since 2.16
  93. * @G_REGEX_ERROR_SUBPATTERN_NAME_TOO_LONG: Subpattern name is too long
  94. * (maximum 32 characters). Since 2.16
  95. * @G_REGEX_ERROR_TOO_MANY_SUBPATTERNS: Too many named subpatterns (maximum
  96. * 10,000). Since 2.16
  97. * @G_REGEX_ERROR_INVALID_OCTAL_VALUE: Octal value is greater than "\\377".
  98. * Since 2.16
  99. * @G_REGEX_ERROR_TOO_MANY_BRANCHES_IN_DEFINE: "DEFINE" group contains more
  100. * than one branch. Since 2.16
  101. * @G_REGEX_ERROR_DEFINE_REPETION: Repeating a "DEFINE" group is not allowed.
  102. * This error is never raised. Since: 2.16 Deprecated: 2.34
  103. * @G_REGEX_ERROR_INCONSISTENT_NEWLINE_OPTIONS: Inconsistent newline options.
  104. * Since 2.16
  105. * @G_REGEX_ERROR_MISSING_BACK_REFERENCE: "\\g" is not followed by a braced,
  106. * angle-bracketed, or quoted name or number, or by a plain number. Since: 2.16
  107. * @G_REGEX_ERROR_INVALID_RELATIVE_REFERENCE: relative reference must not be zero. Since: 2.34
  108. * @G_REGEX_ERROR_BACKTRACKING_CONTROL_VERB_ARGUMENT_FORBIDDEN: the backtracing
  109. * control verb used does not allow an argument. Since: 2.34
  110. * @G_REGEX_ERROR_UNKNOWN_BACKTRACKING_CONTROL_VERB: unknown backtracing
  111. * control verb. Since: 2.34
  112. * @G_REGEX_ERROR_NUMBER_TOO_BIG: number is too big in escape sequence. Since: 2.34
  113. * @G_REGEX_ERROR_MISSING_SUBPATTERN_NAME: Missing subpattern name. Since: 2.34
  114. * @G_REGEX_ERROR_MISSING_DIGIT: Missing digit. Since 2.34
  115. * @G_REGEX_ERROR_INVALID_DATA_CHARACTER: In JavaScript compatibility mode,
  116. * "[" is an invalid data character. Since: 2.34
  117. * @G_REGEX_ERROR_EXTRA_SUBPATTERN_NAME: different names for subpatterns of the
  118. * same number are not allowed. Since: 2.34
  119. * @G_REGEX_ERROR_BACKTRACKING_CONTROL_VERB_ARGUMENT_REQUIRED: the backtracing control
  120. * verb requires an argument. Since: 2.34
  121. * @G_REGEX_ERROR_INVALID_CONTROL_CHAR: "\\c" must be followed by an ASCII
  122. * character. Since: 2.34
  123. * @G_REGEX_ERROR_MISSING_NAME: "\\k" is not followed by a braced, angle-bracketed, or
  124. * quoted name. Since: 2.34
  125. * @G_REGEX_ERROR_NOT_SUPPORTED_IN_CLASS: "\\N" is not supported in a class. Since: 2.34
  126. * @G_REGEX_ERROR_TOO_MANY_FORWARD_REFERENCES: too many forward references. Since: 2.34
  127. * @G_REGEX_ERROR_NAME_TOO_LONG: the name is too long in "(*MARK)", "(*PRUNE)",
  128. * "(*SKIP)", or "(*THEN)". Since: 2.34
  129. * @G_REGEX_ERROR_CHARACTER_VALUE_TOO_LARGE: the character value in the \\u sequence is
  130. * too large. Since: 2.34
  131. *
  132. * Error codes returned by regular expressions functions.
  133. *
  134. * Since: 2.14
  135. */
  136. typedef enum
  137. {
  138. G_REGEX_ERROR_COMPILE,
  139. G_REGEX_ERROR_OPTIMIZE,
  140. G_REGEX_ERROR_REPLACE,
  141. G_REGEX_ERROR_MATCH,
  142. G_REGEX_ERROR_INTERNAL,
  143. /* These are the error codes from PCRE + 100 */
  144. G_REGEX_ERROR_STRAY_BACKSLASH = 101,
  145. G_REGEX_ERROR_MISSING_CONTROL_CHAR = 102,
  146. G_REGEX_ERROR_UNRECOGNIZED_ESCAPE = 103,
  147. G_REGEX_ERROR_QUANTIFIERS_OUT_OF_ORDER = 104,
  148. G_REGEX_ERROR_QUANTIFIER_TOO_BIG = 105,
  149. G_REGEX_ERROR_UNTERMINATED_CHARACTER_CLASS = 106,
  150. G_REGEX_ERROR_INVALID_ESCAPE_IN_CHARACTER_CLASS = 107,
  151. G_REGEX_ERROR_RANGE_OUT_OF_ORDER = 108,
  152. G_REGEX_ERROR_NOTHING_TO_REPEAT = 109,
  153. G_REGEX_ERROR_UNRECOGNIZED_CHARACTER = 112,
  154. G_REGEX_ERROR_POSIX_NAMED_CLASS_OUTSIDE_CLASS = 113,
  155. G_REGEX_ERROR_UNMATCHED_PARENTHESIS = 114,
  156. G_REGEX_ERROR_INEXISTENT_SUBPATTERN_REFERENCE = 115,
  157. G_REGEX_ERROR_UNTERMINATED_COMMENT = 118,
  158. G_REGEX_ERROR_EXPRESSION_TOO_LARGE = 120,
  159. G_REGEX_ERROR_MEMORY_ERROR = 121,
  160. G_REGEX_ERROR_VARIABLE_LENGTH_LOOKBEHIND = 125,
  161. G_REGEX_ERROR_MALFORMED_CONDITION = 126,
  162. G_REGEX_ERROR_TOO_MANY_CONDITIONAL_BRANCHES = 127,
  163. G_REGEX_ERROR_ASSERTION_EXPECTED = 128,
  164. G_REGEX_ERROR_UNKNOWN_POSIX_CLASS_NAME = 130,
  165. G_REGEX_ERROR_POSIX_COLLATING_ELEMENTS_NOT_SUPPORTED = 131,
  166. G_REGEX_ERROR_HEX_CODE_TOO_LARGE = 134,
  167. G_REGEX_ERROR_INVALID_CONDITION = 135,
  168. G_REGEX_ERROR_SINGLE_BYTE_MATCH_IN_LOOKBEHIND = 136,
  169. G_REGEX_ERROR_INFINITE_LOOP = 140,
  170. G_REGEX_ERROR_MISSING_SUBPATTERN_NAME_TERMINATOR = 142,
  171. G_REGEX_ERROR_DUPLICATE_SUBPATTERN_NAME = 143,
  172. G_REGEX_ERROR_MALFORMED_PROPERTY = 146,
  173. G_REGEX_ERROR_UNKNOWN_PROPERTY = 147,
  174. G_REGEX_ERROR_SUBPATTERN_NAME_TOO_LONG = 148,
  175. G_REGEX_ERROR_TOO_MANY_SUBPATTERNS = 149,
  176. G_REGEX_ERROR_INVALID_OCTAL_VALUE = 151,
  177. G_REGEX_ERROR_TOO_MANY_BRANCHES_IN_DEFINE = 154,
  178. G_REGEX_ERROR_DEFINE_REPETION = 155,
  179. G_REGEX_ERROR_INCONSISTENT_NEWLINE_OPTIONS = 156,
  180. G_REGEX_ERROR_MISSING_BACK_REFERENCE = 157,
  181. G_REGEX_ERROR_INVALID_RELATIVE_REFERENCE = 158,
  182. G_REGEX_ERROR_BACKTRACKING_CONTROL_VERB_ARGUMENT_FORBIDDEN = 159,
  183. G_REGEX_ERROR_UNKNOWN_BACKTRACKING_CONTROL_VERB = 160,
  184. G_REGEX_ERROR_NUMBER_TOO_BIG = 161,
  185. G_REGEX_ERROR_MISSING_SUBPATTERN_NAME = 162,
  186. G_REGEX_ERROR_MISSING_DIGIT = 163,
  187. G_REGEX_ERROR_INVALID_DATA_CHARACTER = 164,
  188. G_REGEX_ERROR_EXTRA_SUBPATTERN_NAME = 165,
  189. G_REGEX_ERROR_BACKTRACKING_CONTROL_VERB_ARGUMENT_REQUIRED = 166,
  190. G_REGEX_ERROR_INVALID_CONTROL_CHAR = 168,
  191. G_REGEX_ERROR_MISSING_NAME = 169,
  192. G_REGEX_ERROR_NOT_SUPPORTED_IN_CLASS = 171,
  193. G_REGEX_ERROR_TOO_MANY_FORWARD_REFERENCES = 172,
  194. G_REGEX_ERROR_NAME_TOO_LONG = 175,
  195. G_REGEX_ERROR_CHARACTER_VALUE_TOO_LARGE = 176
  196. } GRegexError;
  197. /**
  198. * G_REGEX_ERROR:
  199. *
  200. * Error domain for regular expressions. Errors in this domain will be
  201. * from the #GRegexError enumeration. See #GError for information on
  202. * error domains.
  203. *
  204. * Since: 2.14
  205. */
  206. #define G_REGEX_ERROR g_regex_error_quark ()
  207. GLIB_AVAILABLE_IN_ALL
  208. GQuark g_regex_error_quark (void);
  209. /**
  210. * GRegexCompileFlags:
  211. * @G_REGEX_CASELESS: Letters in the pattern match both upper- and
  212. * lowercase letters. This option can be changed within a pattern
  213. * by a "(?i)" option setting.
  214. * @G_REGEX_MULTILINE: By default, GRegex treats the strings as consisting
  215. * of a single line of characters (even if it actually contains
  216. * newlines). The "start of line" metacharacter ("^") matches only
  217. * at the start of the string, while the "end of line" metacharacter
  218. * ("$") matches only at the end of the string, or before a terminating
  219. * newline (unless #G_REGEX_DOLLAR_ENDONLY is set). When
  220. * #G_REGEX_MULTILINE is set, the "start of line" and "end of line"
  221. * constructs match immediately following or immediately before any
  222. * newline in the string, respectively, as well as at the very start
  223. * and end. This can be changed within a pattern by a "(?m)" option
  224. * setting.
  225. * @G_REGEX_DOTALL: A dot metacharater (".") in the pattern matches all
  226. * characters, including newlines. Without it, newlines are excluded.
  227. * This option can be changed within a pattern by a ("?s") option setting.
  228. * @G_REGEX_EXTENDED: Whitespace data characters in the pattern are
  229. * totally ignored except when escaped or inside a character class.
  230. * Whitespace does not include the VT character (code 11). In addition,
  231. * characters between an unescaped "#" outside a character class and
  232. * the next newline character, inclusive, are also ignored. This can
  233. * be changed within a pattern by a "(?x)" option setting.
  234. * @G_REGEX_ANCHORED: The pattern is forced to be "anchored", that is,
  235. * it is constrained to match only at the first matching point in the
  236. * string that is being searched. This effect can also be achieved by
  237. * appropriate constructs in the pattern itself such as the "^"
  238. * metacharater.
  239. * @G_REGEX_DOLLAR_ENDONLY: A dollar metacharacter ("$") in the pattern
  240. * matches only at the end of the string. Without this option, a
  241. * dollar also matches immediately before the final character if
  242. * it is a newline (but not before any other newlines). This option
  243. * is ignored if #G_REGEX_MULTILINE is set.
  244. * @G_REGEX_UNGREEDY: Inverts the "greediness" of the quantifiers so that
  245. * they are not greedy by default, but become greedy if followed by "?".
  246. * It can also be set by a "(?U)" option setting within the pattern.
  247. * @G_REGEX_RAW: Usually strings must be valid UTF-8 strings, using this
  248. * flag they are considered as a raw sequence of bytes.
  249. * @G_REGEX_NO_AUTO_CAPTURE: Disables the use of numbered capturing
  250. * parentheses in the pattern. Any opening parenthesis that is not
  251. * followed by "?" behaves as if it were followed by "?:" but named
  252. * parentheses can still be used for capturing (and they acquire numbers
  253. * in the usual way).
  254. * @G_REGEX_OPTIMIZE: Optimize the regular expression. If the pattern will
  255. * be used many times, then it may be worth the effort to optimize it
  256. * to improve the speed of matches.
  257. * @G_REGEX_FIRSTLINE: Limits an unanchored pattern to match before (or at) the
  258. * first newline. Since: 2.34
  259. * @G_REGEX_DUPNAMES: Names used to identify capturing subpatterns need not
  260. * be unique. This can be helpful for certain types of pattern when it
  261. * is known that only one instance of the named subpattern can ever be
  262. * matched.
  263. * @G_REGEX_NEWLINE_CR: Usually any newline character or character sequence is
  264. * recognized. If this option is set, the only recognized newline character
  265. * is '\r'.
  266. * @G_REGEX_NEWLINE_LF: Usually any newline character or character sequence is
  267. * recognized. If this option is set, the only recognized newline character
  268. * is '\n'.
  269. * @G_REGEX_NEWLINE_CRLF: Usually any newline character or character sequence is
  270. * recognized. If this option is set, the only recognized newline character
  271. * sequence is '\r\n'.
  272. * @G_REGEX_NEWLINE_ANYCRLF: Usually any newline character or character sequence
  273. * is recognized. If this option is set, the only recognized newline character
  274. * sequences are '\r', '\n', and '\r\n'. Since: 2.34
  275. * @G_REGEX_BSR_ANYCRLF: Usually any newline character or character sequence
  276. * is recognised. If this option is set, then "\R" only recognizes the newline
  277. * characters '\r', '\n' and '\r\n'. Since: 2.34
  278. * @G_REGEX_JAVASCRIPT_COMPAT: Changes behaviour so that it is compatible with
  279. * JavaScript rather than PCRE. Since: 2.34
  280. *
  281. * Flags specifying compile-time options.
  282. *
  283. * Since: 2.14
  284. */
  285. /* Remember to update G_REGEX_COMPILE_MASK in gregex.c after
  286. * adding a new flag.
  287. */
  288. typedef enum
  289. {
  290. G_REGEX_CASELESS = 1 << 0,
  291. G_REGEX_MULTILINE = 1 << 1,
  292. G_REGEX_DOTALL = 1 << 2,
  293. G_REGEX_EXTENDED = 1 << 3,
  294. G_REGEX_ANCHORED = 1 << 4,
  295. G_REGEX_DOLLAR_ENDONLY = 1 << 5,
  296. G_REGEX_UNGREEDY = 1 << 9,
  297. G_REGEX_RAW = 1 << 11,
  298. G_REGEX_NO_AUTO_CAPTURE = 1 << 12,
  299. G_REGEX_OPTIMIZE = 1 << 13,
  300. G_REGEX_FIRSTLINE = 1 << 18,
  301. G_REGEX_DUPNAMES = 1 << 19,
  302. G_REGEX_NEWLINE_CR = 1 << 20,
  303. G_REGEX_NEWLINE_LF = 1 << 21,
  304. G_REGEX_NEWLINE_CRLF = G_REGEX_NEWLINE_CR | G_REGEX_NEWLINE_LF,
  305. G_REGEX_NEWLINE_ANYCRLF = G_REGEX_NEWLINE_CR | 1 << 22,
  306. G_REGEX_BSR_ANYCRLF = 1 << 23,
  307. G_REGEX_JAVASCRIPT_COMPAT = 1 << 25
  308. } GRegexCompileFlags;
  309. /**
  310. * GRegexMatchFlags:
  311. * @G_REGEX_MATCH_ANCHORED: The pattern is forced to be "anchored", that is,
  312. * it is constrained to match only at the first matching point in the
  313. * string that is being searched. This effect can also be achieved by
  314. * appropriate constructs in the pattern itself such as the "^"
  315. * metacharater.
  316. * @G_REGEX_MATCH_NOTBOL: Specifies that first character of the string is
  317. * not the beginning of a line, so the circumflex metacharacter should
  318. * not match before it. Setting this without #G_REGEX_MULTILINE (at
  319. * compile time) causes circumflex never to match. This option affects
  320. * only the behaviour of the circumflex metacharacter, it does not
  321. * affect "\A".
  322. * @G_REGEX_MATCH_NOTEOL: Specifies that the end of the subject string is
  323. * not the end of a line, so the dollar metacharacter should not match
  324. * it nor (except in multiline mode) a newline immediately before it.
  325. * Setting this without #G_REGEX_MULTILINE (at compile time) causes
  326. * dollar never to match. This option affects only the behaviour of
  327. * the dollar metacharacter, it does not affect "\Z" or "\z".
  328. * @G_REGEX_MATCH_NOTEMPTY: An empty string is not considered to be a valid
  329. * match if this option is set. If there are alternatives in the pattern,
  330. * they are tried. If all the alternatives match the empty string, the
  331. * entire match fails. For example, if the pattern "a?b?" is applied to
  332. * a string not beginning with "a" or "b", it matches the empty string
  333. * at the start of the string. With this flag set, this match is not
  334. * valid, so GRegex searches further into the string for occurrences
  335. * of "a" or "b".
  336. * @G_REGEX_MATCH_PARTIAL: Turns on the partial matching feature, for more
  337. * documentation on partial matching see g_match_info_is_partial_match().
  338. * @G_REGEX_MATCH_NEWLINE_CR: Overrides the newline definition set when
  339. * creating a new #GRegex, setting the '\r' character as line terminator.
  340. * @G_REGEX_MATCH_NEWLINE_LF: Overrides the newline definition set when
  341. * creating a new #GRegex, setting the '\n' character as line terminator.
  342. * @G_REGEX_MATCH_NEWLINE_CRLF: Overrides the newline definition set when
  343. * creating a new #GRegex, setting the '\r\n' characters sequence as line terminator.
  344. * @G_REGEX_MATCH_NEWLINE_ANY: Overrides the newline definition set when
  345. * creating a new #GRegex, any Unicode newline sequence
  346. * is recognised as a newline. These are '\r', '\n' and '\rn', and the
  347. * single characters U+000B LINE TABULATION, U+000C FORM FEED (FF),
  348. * U+0085 NEXT LINE (NEL), U+2028 LINE SEPARATOR and
  349. * U+2029 PARAGRAPH SEPARATOR.
  350. * @G_REGEX_MATCH_NEWLINE_ANYCRLF: Overrides the newline definition set when
  351. * creating a new #GRegex; any '\r', '\n', or '\r\n' character sequence
  352. * is recognized as a newline. Since: 2.34
  353. * @G_REGEX_MATCH_BSR_ANYCRLF: Overrides the newline definition for "\R" set when
  354. * creating a new #GRegex; only '\r', '\n', or '\r\n' character sequences
  355. * are recognized as a newline by "\R". Since: 2.34
  356. * @G_REGEX_MATCH_BSR_ANY: Overrides the newline definition for "\R" set when
  357. * creating a new #GRegex; any Unicode newline character or character sequence
  358. * are recognized as a newline by "\R". These are '\r', '\n' and '\rn', and the
  359. * single characters U+000B LINE TABULATION, U+000C FORM FEED (FF),
  360. * U+0085 NEXT LINE (NEL), U+2028 LINE SEPARATOR and
  361. * U+2029 PARAGRAPH SEPARATOR. Since: 2.34
  362. * @G_REGEX_MATCH_PARTIAL_SOFT: An alias for #G_REGEX_MATCH_PARTIAL. Since: 2.34
  363. * @G_REGEX_MATCH_PARTIAL_HARD: Turns on the partial matching feature. In contrast to
  364. * to #G_REGEX_MATCH_PARTIAL_SOFT, this stops matching as soon as a partial match
  365. * is found, without continuing to search for a possible complete match. See
  366. * g_match_info_is_partial_match() for more information. Since: 2.34
  367. * @G_REGEX_MATCH_NOTEMPTY_ATSTART: Like #G_REGEX_MATCH_NOTEMPTY, but only applied to
  368. * the start of the matched string. For anchored
  369. * patterns this can only happen for pattern containing "\K". Since: 2.34
  370. *
  371. * Flags specifying match-time options.
  372. *
  373. * Since: 2.14
  374. */
  375. /* Remember to update G_REGEX_MATCH_MASK in gregex.c after
  376. * adding a new flag. */
  377. typedef enum
  378. {
  379. G_REGEX_MATCH_ANCHORED = 1 << 4,
  380. G_REGEX_MATCH_NOTBOL = 1 << 7,
  381. G_REGEX_MATCH_NOTEOL = 1 << 8,
  382. G_REGEX_MATCH_NOTEMPTY = 1 << 10,
  383. G_REGEX_MATCH_PARTIAL = 1 << 15,
  384. G_REGEX_MATCH_NEWLINE_CR = 1 << 20,
  385. G_REGEX_MATCH_NEWLINE_LF = 1 << 21,
  386. G_REGEX_MATCH_NEWLINE_CRLF = G_REGEX_MATCH_NEWLINE_CR | G_REGEX_MATCH_NEWLINE_LF,
  387. G_REGEX_MATCH_NEWLINE_ANY = 1 << 22,
  388. G_REGEX_MATCH_NEWLINE_ANYCRLF = G_REGEX_MATCH_NEWLINE_CR | G_REGEX_MATCH_NEWLINE_ANY,
  389. G_REGEX_MATCH_BSR_ANYCRLF = 1 << 23,
  390. G_REGEX_MATCH_BSR_ANY = 1 << 24,
  391. G_REGEX_MATCH_PARTIAL_SOFT = G_REGEX_MATCH_PARTIAL,
  392. G_REGEX_MATCH_PARTIAL_HARD = 1 << 27,
  393. G_REGEX_MATCH_NOTEMPTY_ATSTART = 1 << 28
  394. } GRegexMatchFlags;
  395. /**
  396. * GRegex:
  397. *
  398. * A GRegex is the "compiled" form of a regular expression pattern.
  399. * This structure is opaque and its fields cannot be accessed directly.
  400. *
  401. * Since: 2.14
  402. */
  403. typedef struct _GRegex GRegex;
  404. /**
  405. * GMatchInfo:
  406. *
  407. * A GMatchInfo is an opaque struct used to return information about
  408. * matches.
  409. */
  410. typedef struct _GMatchInfo GMatchInfo;
  411. /**
  412. * GRegexEvalCallback:
  413. * @match_info: the #GMatchInfo generated by the match.
  414. * Use g_match_info_get_regex() and g_match_info_get_string() if you
  415. * need the #GRegex or the matched string.
  416. * @result: a #GString containing the new string
  417. * @user_data: user data passed to g_regex_replace_eval()
  418. *
  419. * Specifies the type of the function passed to g_regex_replace_eval().
  420. * It is called for each occurrence of the pattern in the string passed
  421. * to g_regex_replace_eval(), and it should append the replacement to
  422. * @result.
  423. *
  424. * Returns: %FALSE to continue the replacement process, %TRUE to stop it
  425. *
  426. * Since: 2.14
  427. */
  428. typedef gboolean (*GRegexEvalCallback) (const GMatchInfo *match_info,
  429. GString *result,
  430. gpointer user_data);
  431. GLIB_AVAILABLE_IN_ALL
  432. GRegex *g_regex_new (const gchar *pattern,
  433. GRegexCompileFlags compile_options,
  434. GRegexMatchFlags match_options,
  435. GError **error);
  436. GLIB_AVAILABLE_IN_ALL
  437. GRegex *g_regex_ref (GRegex *regex);
  438. GLIB_AVAILABLE_IN_ALL
  439. void g_regex_unref (GRegex *regex);
  440. GLIB_AVAILABLE_IN_ALL
  441. const gchar *g_regex_get_pattern (const GRegex *regex);
  442. GLIB_AVAILABLE_IN_ALL
  443. gint g_regex_get_max_backref (const GRegex *regex);
  444. GLIB_AVAILABLE_IN_ALL
  445. gint g_regex_get_capture_count (const GRegex *regex);
  446. GLIB_AVAILABLE_IN_ALL
  447. gboolean g_regex_get_has_cr_or_lf (const GRegex *regex);
  448. GLIB_AVAILABLE_IN_2_38
  449. gint g_regex_get_max_lookbehind (const GRegex *regex);
  450. GLIB_AVAILABLE_IN_ALL
  451. gint g_regex_get_string_number (const GRegex *regex,
  452. const gchar *name);
  453. GLIB_AVAILABLE_IN_ALL
  454. gchar *g_regex_escape_string (const gchar *string,
  455. gint length);
  456. GLIB_AVAILABLE_IN_ALL
  457. gchar *g_regex_escape_nul (const gchar *string,
  458. gint length);
  459. GLIB_AVAILABLE_IN_ALL
  460. GRegexCompileFlags g_regex_get_compile_flags (const GRegex *regex);
  461. GLIB_AVAILABLE_IN_ALL
  462. GRegexMatchFlags g_regex_get_match_flags (const GRegex *regex);
  463. /* Matching. */
  464. GLIB_AVAILABLE_IN_ALL
  465. gboolean g_regex_match_simple (const gchar *pattern,
  466. const gchar *string,
  467. GRegexCompileFlags compile_options,
  468. GRegexMatchFlags match_options);
  469. GLIB_AVAILABLE_IN_ALL
  470. gboolean g_regex_match (const GRegex *regex,
  471. const gchar *string,
  472. GRegexMatchFlags match_options,
  473. GMatchInfo **match_info);
  474. GLIB_AVAILABLE_IN_ALL
  475. gboolean g_regex_match_full (const GRegex *regex,
  476. const gchar *string,
  477. gssize string_len,
  478. gint start_position,
  479. GRegexMatchFlags match_options,
  480. GMatchInfo **match_info,
  481. GError **error);
  482. GLIB_AVAILABLE_IN_ALL
  483. gboolean g_regex_match_all (const GRegex *regex,
  484. const gchar *string,
  485. GRegexMatchFlags match_options,
  486. GMatchInfo **match_info);
  487. GLIB_AVAILABLE_IN_ALL
  488. gboolean g_regex_match_all_full (const GRegex *regex,
  489. const gchar *string,
  490. gssize string_len,
  491. gint start_position,
  492. GRegexMatchFlags match_options,
  493. GMatchInfo **match_info,
  494. GError **error);
  495. /* String splitting. */
  496. GLIB_AVAILABLE_IN_ALL
  497. gchar **g_regex_split_simple (const gchar *pattern,
  498. const gchar *string,
  499. GRegexCompileFlags compile_options,
  500. GRegexMatchFlags match_options);
  501. GLIB_AVAILABLE_IN_ALL
  502. gchar **g_regex_split (const GRegex *regex,
  503. const gchar *string,
  504. GRegexMatchFlags match_options);
  505. GLIB_AVAILABLE_IN_ALL
  506. gchar **g_regex_split_full (const GRegex *regex,
  507. const gchar *string,
  508. gssize string_len,
  509. gint start_position,
  510. GRegexMatchFlags match_options,
  511. gint max_tokens,
  512. GError **error);
  513. /* String replacement. */
  514. GLIB_AVAILABLE_IN_ALL
  515. gchar *g_regex_replace (const GRegex *regex,
  516. const gchar *string,
  517. gssize string_len,
  518. gint start_position,
  519. const gchar *replacement,
  520. GRegexMatchFlags match_options,
  521. GError **error);
  522. GLIB_AVAILABLE_IN_ALL
  523. gchar *g_regex_replace_literal (const GRegex *regex,
  524. const gchar *string,
  525. gssize string_len,
  526. gint start_position,
  527. const gchar *replacement,
  528. GRegexMatchFlags match_options,
  529. GError **error);
  530. GLIB_AVAILABLE_IN_ALL
  531. gchar *g_regex_replace_eval (const GRegex *regex,
  532. const gchar *string,
  533. gssize string_len,
  534. gint start_position,
  535. GRegexMatchFlags match_options,
  536. GRegexEvalCallback eval,
  537. gpointer user_data,
  538. GError **error);
  539. GLIB_AVAILABLE_IN_ALL
  540. gboolean g_regex_check_replacement (const gchar *replacement,
  541. gboolean *has_references,
  542. GError **error);
  543. /* Match info */
  544. GLIB_AVAILABLE_IN_ALL
  545. GRegex *g_match_info_get_regex (const GMatchInfo *match_info);
  546. GLIB_AVAILABLE_IN_ALL
  547. const gchar *g_match_info_get_string (const GMatchInfo *match_info);
  548. GLIB_AVAILABLE_IN_ALL
  549. GMatchInfo *g_match_info_ref (GMatchInfo *match_info);
  550. GLIB_AVAILABLE_IN_ALL
  551. void g_match_info_unref (GMatchInfo *match_info);
  552. GLIB_AVAILABLE_IN_ALL
  553. void g_match_info_free (GMatchInfo *match_info);
  554. GLIB_AVAILABLE_IN_ALL
  555. gboolean g_match_info_next (GMatchInfo *match_info,
  556. GError **error);
  557. GLIB_AVAILABLE_IN_ALL
  558. gboolean g_match_info_matches (const GMatchInfo *match_info);
  559. GLIB_AVAILABLE_IN_ALL
  560. gint g_match_info_get_match_count (const GMatchInfo *match_info);
  561. GLIB_AVAILABLE_IN_ALL
  562. gboolean g_match_info_is_partial_match (const GMatchInfo *match_info);
  563. GLIB_AVAILABLE_IN_ALL
  564. gchar *g_match_info_expand_references(const GMatchInfo *match_info,
  565. const gchar *string_to_expand,
  566. GError **error);
  567. GLIB_AVAILABLE_IN_ALL
  568. gchar *g_match_info_fetch (const GMatchInfo *match_info,
  569. gint match_num);
  570. GLIB_AVAILABLE_IN_ALL
  571. gboolean g_match_info_fetch_pos (const GMatchInfo *match_info,
  572. gint match_num,
  573. gint *start_pos,
  574. gint *end_pos);
  575. GLIB_AVAILABLE_IN_ALL
  576. gchar *g_match_info_fetch_named (const GMatchInfo *match_info,
  577. const gchar *name);
  578. GLIB_AVAILABLE_IN_ALL
  579. gboolean g_match_info_fetch_named_pos (const GMatchInfo *match_info,
  580. const gchar *name,
  581. gint *start_pos,
  582. gint *end_pos);
  583. GLIB_AVAILABLE_IN_ALL
  584. gchar **g_match_info_fetch_all (const GMatchInfo *match_info);
  585. G_END_DECLS
  586. #endif /* __G_REGEX_H__ */