regsyntax.c 11 KB


  1. /**********************************************************************
  2. regsyntax.c - Oniguruma (regular expression library)
  3. **********************************************************************/
  4. /*-
  5. * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
  6. * All rights reserved.
  7. *
  8. * Redistribution and use in source and binary forms, with or without
  9. * modification, are permitted provided that the following conditions
  10. * are met:
  11. * 1. Redistributions of source code must retain the above copyright
  12. * notice, this list of conditions and the following disclaimer.
  13. * 2. Redistributions in binary form must reproduce the above copyright
  14. * notice, this list of conditions and the following disclaimer in the
  15. * documentation and/or other materials provided with the distribution.
  16. *
  17. * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  18. * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  19. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  20. * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  21. * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  22. * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  23. * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  24. * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  25. * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  26. * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  27. * SUCH DAMAGE.
  28. */
  29. #include "regint.h"
  30. OnigSyntaxType OnigSyntaxASIS = {
  31. 0
  32. , ONIG_SYN_OP2_INEFFECTIVE_ESCAPE
  33. , 0
  34. , ONIG_OPTION_NONE
  35. ,
  36. {
  37. (OnigCodePoint )'\\' /* esc */
  38. , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
  39. , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
  40. , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
  41. , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
  42. , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
  43. }
  44. };
  45. OnigSyntaxType OnigSyntaxPosixBasic = {
  46. ( SYN_POSIX_COMMON_OP | ONIG_SYN_OP_ESC_LPAREN_SUBEXP |
  47. ONIG_SYN_OP_ESC_BRACE_INTERVAL )
  48. , 0
  49. , 0
  50. , ( ONIG_OPTION_SINGLELINE | ONIG_OPTION_MULTILINE )
  51. ,
  52. {
  53. (OnigCodePoint )'\\' /* esc */
  54. , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
  55. , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
  56. , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
  57. , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
  58. , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
  59. }
  60. };
  61. OnigSyntaxType OnigSyntaxPosixExtended = {
  62. ( SYN_POSIX_COMMON_OP | ONIG_SYN_OP_LPAREN_SUBEXP |
  63. ONIG_SYN_OP_BRACE_INTERVAL |
  64. ONIG_SYN_OP_PLUS_ONE_INF | ONIG_SYN_OP_QMARK_ZERO_ONE | ONIG_SYN_OP_VBAR_ALT )
  65. , 0
  66. , ( ONIG_SYN_CONTEXT_INDEP_ANCHORS |
  67. ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS | ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS |
  68. ONIG_SYN_ALLOW_UNMATCHED_CLOSE_SUBEXP |
  69. ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC )
  70. , ( ONIG_OPTION_SINGLELINE | ONIG_OPTION_MULTILINE )
  71. ,
  72. {
  73. (OnigCodePoint )'\\' /* esc */
  74. , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
  75. , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
  76. , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
  77. , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
  78. , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
  79. }
  80. };
  81. OnigSyntaxType OnigSyntaxEmacs = {
  82. ( ONIG_SYN_OP_DOT_ANYCHAR | ONIG_SYN_OP_BRACKET_CC |
  83. ONIG_SYN_OP_ESC_BRACE_INTERVAL |
  84. ONIG_SYN_OP_ESC_LPAREN_SUBEXP | ONIG_SYN_OP_ESC_VBAR_ALT |
  85. ONIG_SYN_OP_ASTERISK_ZERO_INF | ONIG_SYN_OP_PLUS_ONE_INF |
  86. ONIG_SYN_OP_QMARK_ZERO_ONE | ONIG_SYN_OP_DECIMAL_BACKREF |
  87. ONIG_SYN_OP_LINE_ANCHOR | ONIG_SYN_OP_ESC_CONTROL_CHARS )
  88. , ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR
  89. , ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC
  90. , ONIG_OPTION_NONE
  91. ,
  92. {
  93. (OnigCodePoint )'\\' /* esc */
  94. , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
  95. , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
  96. , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
  97. , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
  98. , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
  99. }
  100. };
  101. OnigSyntaxType OnigSyntaxGrep = {
  102. ( ONIG_SYN_OP_DOT_ANYCHAR | ONIG_SYN_OP_BRACKET_CC | ONIG_SYN_OP_POSIX_BRACKET |
  103. ONIG_SYN_OP_ESC_BRACE_INTERVAL | ONIG_SYN_OP_ESC_LPAREN_SUBEXP |
  104. ONIG_SYN_OP_ESC_VBAR_ALT |
  105. ONIG_SYN_OP_ASTERISK_ZERO_INF | ONIG_SYN_OP_ESC_PLUS_ONE_INF |
  106. ONIG_SYN_OP_ESC_QMARK_ZERO_ONE | ONIG_SYN_OP_LINE_ANCHOR |
  107. ONIG_SYN_OP_ESC_W_WORD | ONIG_SYN_OP_ESC_B_WORD_BOUND |
  108. ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END | ONIG_SYN_OP_DECIMAL_BACKREF )
  109. , 0
  110. , ( ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC | ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC )
  111. , ONIG_OPTION_NONE
  112. ,
  113. {
  114. (OnigCodePoint )'\\' /* esc */
  115. , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
  116. , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
  117. , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
  118. , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
  119. , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
  120. }
  121. };
  122. OnigSyntaxType OnigSyntaxGnuRegex = {
  123. SYN_GNU_REGEX_OP
  124. , 0
  125. , SYN_GNU_REGEX_BV
  126. , ONIG_OPTION_NONE
  127. ,
  128. {
  129. (OnigCodePoint )'\\' /* esc */
  130. , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
  131. , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
  132. , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
  133. , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
  134. , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
  135. }
  136. };
  137. OnigSyntaxType OnigSyntaxJava = {
  138. (( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY |
  139. ONIG_SYN_OP_ESC_CONTROL_CHARS | ONIG_SYN_OP_ESC_C_CONTROL |
  140. ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 )
  141. & ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END )
  142. , ( ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE | ONIG_SYN_OP2_QMARK_GROUP_EFFECT |
  143. ONIG_SYN_OP2_OPTION_PERL | ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT |
  144. ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL | ONIG_SYN_OP2_CCLASS_SET_OP |
  145. ONIG_SYN_OP2_ESC_V_VTAB | ONIG_SYN_OP2_ESC_U_HEX4 |
  146. ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY )
  147. , ( SYN_GNU_REGEX_BV | ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND )
  148. , ONIG_OPTION_SINGLELINE
  149. ,
  150. {
  151. (OnigCodePoint )'\\' /* esc */
  152. , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
  153. , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
  154. , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
  155. , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
  156. , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
  157. }
  158. };
  159. OnigSyntaxType OnigSyntaxPerl = {
  160. (( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY |
  161. ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 |
  162. ONIG_SYN_OP_ESC_X_BRACE_HEX8 | ONIG_SYN_OP_ESC_CONTROL_CHARS |
  163. ONIG_SYN_OP_ESC_C_CONTROL )
  164. & ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END )
  165. , ( ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE |
  166. ONIG_SYN_OP2_QMARK_GROUP_EFFECT | ONIG_SYN_OP2_OPTION_PERL |
  167. ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY |
  168. ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT )
  169. , SYN_GNU_REGEX_BV
  170. , ONIG_OPTION_SINGLELINE
  171. ,
  172. {
  173. (OnigCodePoint )'\\' /* esc */
  174. , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
  175. , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
  176. , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
  177. , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
  178. , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
  179. }
  180. };
  181. /* Perl + named group */
  182. OnigSyntaxType OnigSyntaxPerl_NG = {
  183. (( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY |
  184. ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 |
  185. ONIG_SYN_OP_ESC_X_BRACE_HEX8 | ONIG_SYN_OP_ESC_CONTROL_CHARS |
  186. ONIG_SYN_OP_ESC_C_CONTROL )
  187. & ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END )
  188. , ( ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE |
  189. ONIG_SYN_OP2_QMARK_GROUP_EFFECT | ONIG_SYN_OP2_OPTION_PERL |
  190. ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY |
  191. ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT |
  192. ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP |
  193. ONIG_SYN_OP2_ESC_K_NAMED_BACKREF |
  194. ONIG_SYN_OP2_ESC_G_SUBEXP_CALL )
  195. , ( SYN_GNU_REGEX_BV |
  196. ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP |
  197. ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME )
  198. , ONIG_OPTION_SINGLELINE
  199. ,
  200. {
  201. (OnigCodePoint )'\\' /* esc */
  202. , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
  203. , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
  204. , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
  205. , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
  206. , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
  207. }
  208. };
  209. extern int
  210. onig_set_default_syntax(OnigSyntaxType* syntax)
  211. {
  212. if (IS_NULL(syntax))
  213. syntax = ONIG_SYNTAX_RUBY;
  214. OnigDefaultSyntax = syntax;
  215. return 0;
  216. }
  217. extern void
  218. onig_copy_syntax(OnigSyntaxType* to, OnigSyntaxType* from)
  219. {
  220. *to = *from;
  221. }
  222. extern void
  223. onig_set_syntax_op(OnigSyntaxType* syntax, unsigned int op)
  224. {
  225. syntax->op = op;
  226. }
  227. extern void
  228. onig_set_syntax_op2(OnigSyntaxType* syntax, unsigned int op2)
  229. {
  230. syntax->op2 = op2;
  231. }
  232. extern void
  233. onig_set_syntax_behavior(OnigSyntaxType* syntax, unsigned int behavior)
  234. {
  235. syntax->behavior = behavior;
  236. }
  237. extern void
  238. onig_set_syntax_options(OnigSyntaxType* syntax, OnigOptionType options)
  239. {
  240. syntax->options = options;
  241. }
  242. extern unsigned int
  243. onig_get_syntax_op(OnigSyntaxType* syntax)
  244. {
  245. return syntax->op;
  246. }
  247. extern unsigned int
  248. onig_get_syntax_op2(OnigSyntaxType* syntax)
  249. {
  250. return syntax->op2;
  251. }
  252. extern unsigned int
  253. onig_get_syntax_behavior(OnigSyntaxType* syntax)
  254. {
  255. return syntax->behavior;
  256. }
  257. extern OnigOptionType
  258. onig_get_syntax_options(OnigSyntaxType* syntax)
  259. {
  260. return syntax->options;
  261. }
  262. #ifdef USE_VARIABLE_META_CHARS
  263. extern int onig_set_meta_char(OnigSyntaxType* enc,
  264. unsigned int what, OnigCodePoint code)
  265. {
  266. switch (what) {
  267. case ONIG_META_CHAR_ESCAPE:
  268. enc->meta_char_table.esc = code;
  269. break;
  270. case ONIG_META_CHAR_ANYCHAR:
  271. enc->meta_char_table.anychar = code;
  272. break;
  273. case ONIG_META_CHAR_ANYTIME:
  274. enc->meta_char_table.anytime = code;
  275. break;
  276. case ONIG_META_CHAR_ZERO_OR_ONE_TIME:
  277. enc->meta_char_table.zero_or_one_time = code;
  278. break;
  279. case ONIG_META_CHAR_ONE_OR_MORE_TIME:
  280. enc->meta_char_table.one_or_more_time = code;
  281. break;
  282. case ONIG_META_CHAR_ANYCHAR_ANYTIME:
  283. enc->meta_char_table.anychar_anytime = code;
  284. break;
  285. default:
  286. return ONIGERR_INVALID_ARGUMENT;
  287. break;
  288. }
  289. return 0;
  290. }
  291. #endif /* USE_VARIABLE_META_CHARS */