regext.c 5.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222
  1. /**********************************************************************
  2. regext.c - Oniguruma (regular expression library)
  3. **********************************************************************/
  4. /*-
  5. * Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
  6. * All rights reserved.
  7. *
  8. * Redistribution and use in source and binary forms, with or without
  9. * modification, are permitted provided that the following conditions
  10. * are met:
  11. * 1. Redistributions of source code must retain the above copyright
  12. * notice, this list of conditions and the following disclaimer.
  13. * 2. Redistributions in binary form must reproduce the above copyright
  14. * notice, this list of conditions and the following disclaimer in the
  15. * documentation and/or other materials provided with the distribution.
  16. *
  17. * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  18. * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  19. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  20. * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  21. * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  22. * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  23. * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  24. * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  25. * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  26. * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  27. * SUCH DAMAGE.
  28. */
  29. #include "regint.h"
  30. static void
  31. conv_ext0be32(const UChar* s, const UChar* end, UChar* conv)
  32. {
  33. while (s < end) {
  34. *conv++ = '\0';
  35. *conv++ = '\0';
  36. *conv++ = '\0';
  37. *conv++ = *s++;
  38. }
  39. }
  40. static void
  41. conv_ext0le32(const UChar* s, const UChar* end, UChar* conv)
  42. {
  43. while (s < end) {
  44. *conv++ = *s++;
  45. *conv++ = '\0';
  46. *conv++ = '\0';
  47. *conv++ = '\0';
  48. }
  49. }
  50. static void
  51. conv_ext0be(const UChar* s, const UChar* end, UChar* conv)
  52. {
  53. while (s < end) {
  54. *conv++ = '\0';
  55. *conv++ = *s++;
  56. }
  57. }
  58. static void
  59. conv_ext0le(const UChar* s, const UChar* end, UChar* conv)
  60. {
  61. while (s < end) {
  62. *conv++ = *s++;
  63. *conv++ = '\0';
  64. }
  65. }
  66. static void
  67. conv_swap4bytes(const UChar* s, const UChar* end, UChar* conv)
  68. {
  69. while (s < end) {
  70. *conv++ = s[3];
  71. *conv++ = s[2];
  72. *conv++ = s[1];
  73. *conv++ = s[0];
  74. s += 4;
  75. }
  76. }
  77. static void
  78. conv_swap2bytes(const UChar* s, const UChar* end, UChar* conv)
  79. {
  80. while (s < end) {
  81. *conv++ = s[1];
  82. *conv++ = s[0];
  83. s += 2;
  84. }
  85. }
  86. static int
  87. conv_encoding(OnigEncoding from, OnigEncoding to, const UChar* s, const UChar* end,
  88. UChar** conv, UChar** conv_end)
  89. {
  90. int len = end - s;
  91. if (to == ONIG_ENCODING_UTF16_BE) {
  92. if (from == ONIG_ENCODING_ASCII || from == ONIG_ENCODING_ISO_8859_1) {
  93. *conv = (UChar* )xmalloc(len * 2);
  94. CHECK_NULL_RETURN_MEMERR(*conv);
  95. *conv_end = *conv + (len * 2);
  96. conv_ext0be(s, end, *conv);
  97. return 0;
  98. }
  99. else if (from == ONIG_ENCODING_UTF16_LE) {
  100. swap16:
  101. *conv = (UChar* )xmalloc(len);
  102. CHECK_NULL_RETURN_MEMERR(*conv);
  103. *conv_end = *conv + len;
  104. conv_swap2bytes(s, end, *conv);
  105. return 0;
  106. }
  107. }
  108. else if (to == ONIG_ENCODING_UTF16_LE) {
  109. if (from == ONIG_ENCODING_ASCII || from == ONIG_ENCODING_ISO_8859_1) {
  110. *conv = (UChar* )xmalloc(len * 2);
  111. CHECK_NULL_RETURN_MEMERR(*conv);
  112. *conv_end = *conv + (len * 2);
  113. conv_ext0le(s, end, *conv);
  114. return 0;
  115. }
  116. else if (from == ONIG_ENCODING_UTF16_BE) {
  117. goto swap16;
  118. }
  119. }
  120. if (to == ONIG_ENCODING_UTF32_BE) {
  121. if (from == ONIG_ENCODING_ASCII || from == ONIG_ENCODING_ISO_8859_1) {
  122. *conv = (UChar* )xmalloc(len * 4);
  123. CHECK_NULL_RETURN_MEMERR(*conv);
  124. *conv_end = *conv + (len * 4);
  125. conv_ext0be32(s, end, *conv);
  126. return 0;
  127. }
  128. else if (from == ONIG_ENCODING_UTF32_LE) {
  129. swap32:
  130. *conv = (UChar* )xmalloc(len);
  131. CHECK_NULL_RETURN_MEMERR(*conv);
  132. *conv_end = *conv + len;
  133. conv_swap4bytes(s, end, *conv);
  134. return 0;
  135. }
  136. }
  137. else if (to == ONIG_ENCODING_UTF32_LE) {
  138. if (from == ONIG_ENCODING_ASCII || from == ONIG_ENCODING_ISO_8859_1) {
  139. *conv = (UChar* )xmalloc(len * 4);
  140. CHECK_NULL_RETURN_MEMERR(*conv);
  141. *conv_end = *conv + (len * 4);
  142. conv_ext0le32(s, end, *conv);
  143. return 0;
  144. }
  145. else if (from == ONIG_ENCODING_UTF32_BE) {
  146. goto swap32;
  147. }
  148. }
  149. return ONIGERR_NOT_SUPPORTED_ENCODING_COMBINATION;
  150. }
  151. extern int
  152. onig_new_deluxe(regex_t** reg, const UChar* pattern, const UChar* pattern_end,
  153. OnigCompileInfo* ci, OnigErrorInfo* einfo)
  154. {
  155. int r;
  156. UChar *cpat, *cpat_end;
  157. if (IS_NOT_NULL(einfo)) einfo->par = (UChar* )NULL;
  158. if (ci->pattern_enc != ci->target_enc) {
  159. r = conv_encoding(ci->pattern_enc, ci->target_enc, pattern, pattern_end,
  160. &cpat, &cpat_end);
  161. if (r) return r;
  162. }
  163. else {
  164. cpat = (UChar* )pattern;
  165. cpat_end = (UChar* )pattern_end;
  166. }
  167. *reg = (regex_t* )xmalloc(sizeof(regex_t));
  168. if (IS_NULL(*reg)) {
  169. r = ONIGERR_MEMORY;
  170. goto err2;
  171. }
  172. r = onig_reg_init(*reg, ci->option, ci->case_fold_flag, ci->target_enc,
  173. ci->syntax);
  174. if (r) goto err;
  175. r = onig_compile(*reg, cpat, cpat_end, einfo);
  176. if (r) {
  177. err:
  178. onig_free(*reg);
  179. *reg = NULL;
  180. }
  181. err2:
  182. if (cpat != pattern) xfree(cpat);
  183. return r;
  184. }
  185. #ifdef USE_RECOMPILE_API
  186. extern int
  187. onig_recompile_deluxe(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
  188. OnigCompileInfo* ci, OnigErrorInfo* einfo)
  189. {
  190. int r;
  191. regex_t *new_reg;
  192. r = onig_new_deluxe(&new_reg, pattern, pattern_end, ci, einfo);
  193. if (r) return r;
  194. if (ONIG_STATE(reg) == ONIG_STATE_NORMAL) {
  195. onig_transfer(reg, new_reg);
  196. }
  197. else {
  198. onig_chain_link_add(reg, new_reg);
  199. }
  200. return 0;
  201. }
  202. #endif