regenc.c 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902
  1. /**********************************************************************
  2. regenc.c - Oniguruma (regular expression library)
  3. **********************************************************************/
  4. /*-
  5. * Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
  6. * All rights reserved.
  7. *
  8. * Redistribution and use in source and binary forms, with or without
  9. * modification, are permitted provided that the following conditions
  10. * are met:
  11. * 1. Redistributions of source code must retain the above copyright
  12. * notice, this list of conditions and the following disclaimer.
  13. * 2. Redistributions in binary form must reproduce the above copyright
  14. * notice, this list of conditions and the following disclaimer in the
  15. * documentation and/or other materials provided with the distribution.
  16. *
  17. * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  18. * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  19. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  20. * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  21. * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  22. * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  23. * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  24. * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  25. * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  26. * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  27. * SUCH DAMAGE.
  28. */
  29. #include "regint.h"
  30. OnigEncoding OnigEncDefaultCharEncoding = ONIG_ENCODING_INIT_DEFAULT;
  31. extern int
  32. onigenc_init(void)
  33. {
  34. return 0;
  35. }
  36. extern OnigEncoding
  37. onigenc_get_default_encoding(void)
  38. {
  39. return OnigEncDefaultCharEncoding;
  40. }
  41. extern int
  42. onigenc_set_default_encoding(OnigEncoding enc)
  43. {
  44. OnigEncDefaultCharEncoding = enc;
  45. return 0;
  46. }
  47. extern UChar*
  48. onigenc_get_right_adjust_char_head(OnigEncoding enc, const UChar* start, const UChar* s)
  49. {
  50. UChar* p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s);
  51. if (p < s) {
  52. p += enclen(enc, p);
  53. }
  54. return p;
  55. }
  56. extern UChar*
  57. onigenc_get_right_adjust_char_head_with_prev(OnigEncoding enc,
  58. const UChar* start, const UChar* s, const UChar** prev)
  59. {
  60. UChar* p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s);
  61. if (p < s) {
  62. if (prev) *prev = (const UChar* )p;
  63. p += enclen(enc, p);
  64. }
  65. else {
  66. if (prev) *prev = (const UChar* )NULL; /* Sorry */
  67. }
  68. return p;
  69. }
  70. extern UChar*
  71. onigenc_get_prev_char_head(OnigEncoding enc, const UChar* start, const UChar* s)
  72. {
  73. if (s <= start)
  74. return (UChar* )NULL;
  75. return ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s - 1);
  76. }
  77. extern UChar*
  78. onigenc_step_back(OnigEncoding enc, const UChar* start, const UChar* s, int n)
  79. {
  80. while (ONIG_IS_NOT_NULL(s) && n-- > 0) {
  81. if (s <= start)
  82. return (UChar* )NULL;
  83. s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s - 1);
  84. }
  85. return (UChar* )s;
  86. }
  87. extern UChar*
  88. onigenc_step(OnigEncoding enc, const UChar* p, const UChar* end, int n)
  89. {
  90. UChar* q = (UChar* )p;
  91. while (n-- > 0) {
  92. q += ONIGENC_MBC_ENC_LEN(enc, q);
  93. }
  94. return (q <= end ? q : NULL);
  95. }
  96. extern int
  97. onigenc_strlen(OnigEncoding enc, const UChar* p, const UChar* end)
  98. {
  99. int n = 0;
  100. UChar* q = (UChar* )p;
  101. while (q < end) {
  102. q += ONIGENC_MBC_ENC_LEN(enc, q);
  103. n++;
  104. }
  105. return n;
  106. }
  107. extern int
  108. onigenc_strlen_null(OnigEncoding enc, const UChar* s)
  109. {
  110. int n = 0;
  111. UChar* p = (UChar* )s;
  112. while (1) {
  113. if (*p == '\0') {
  114. UChar* q;
  115. int len = ONIGENC_MBC_MINLEN(enc);
  116. if (len == 1) return n;
  117. q = p + 1;
  118. while (len > 1) {
  119. if (*q != '\0') break;
  120. q++;
  121. len--;
  122. }
  123. if (len == 1) return n;
  124. }
  125. p += ONIGENC_MBC_ENC_LEN(enc, p);
  126. n++;
  127. }
  128. }
  129. extern int
  130. onigenc_str_bytelen_null(OnigEncoding enc, const UChar* s)
  131. {
  132. UChar* start = (UChar* )s;
  133. UChar* p = (UChar* )s;
  134. while (1) {
  135. if (*p == '\0') {
  136. UChar* q;
  137. int len = ONIGENC_MBC_MINLEN(enc);
  138. if (len == 1) return (int )(p - start);
  139. q = p + 1;
  140. while (len > 1) {
  141. if (*q != '\0') break;
  142. q++;
  143. len--;
  144. }
  145. if (len == 1) return (int )(p - start);
  146. }
  147. p += ONIGENC_MBC_ENC_LEN(enc, p);
  148. }
  149. }
  150. const UChar OnigEncAsciiToLowerCaseTable[] = {
  151. '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
  152. '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
  153. '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
  154. '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
  155. '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
  156. '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
  157. '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
  158. '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
  159. '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
  160. '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
  161. '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
  162. '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',
  163. '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
  164. '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
  165. '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
  166. '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',
  167. '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
  168. '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
  169. '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
  170. '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
  171. '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
  172. '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
  173. '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
  174. '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
  175. '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
  176. '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
  177. '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327',
  178. '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337',
  179. '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
  180. '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
  181. '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
  182. '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377',
  183. };
  184. #ifdef USE_UPPER_CASE_TABLE
  185. const UChar OnigEncAsciiToUpperCaseTable[256] = {
  186. '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
  187. '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
  188. '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
  189. '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
  190. '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
  191. '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
  192. '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
  193. '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
  194. '\100', '\101', '\102', '\103', '\104', '\105', '\106', '\107',
  195. '\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117',
  196. '\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127',
  197. '\130', '\131', '\132', '\133', '\134', '\135', '\136', '\137',
  198. '\140', '\101', '\102', '\103', '\104', '\105', '\106', '\107',
  199. '\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117',
  200. '\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127',
  201. '\130', '\131', '\132', '\173', '\174', '\175', '\176', '\177',
  202. '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
  203. '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
  204. '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
  205. '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
  206. '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
  207. '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
  208. '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
  209. '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
  210. '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
  211. '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
  212. '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327',
  213. '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337',
  214. '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
  215. '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
  216. '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
  217. '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377',
  218. };
  219. #endif
  220. const unsigned short OnigEncAsciiCtypeTable[256] = {
  221. 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
  222. 0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008,
  223. 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
  224. 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
  225. 0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
  226. 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
  227. 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0,
  228. 0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
  229. 0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2,
  230. 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
  231. 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
  232. 0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0,
  233. 0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2,
  234. 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
  235. 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
  236. 0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008,
  237. 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
  238. 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
  239. 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
  240. 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
  241. 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
  242. 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
  243. 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
  244. 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
  245. 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
  246. 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
  247. 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
  248. 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
  249. 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
  250. 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
  251. 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
  252. 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000
  253. };
  254. const UChar OnigEncISO_8859_1_ToLowerCaseTable[256] = {
  255. '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
  256. '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
  257. '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
  258. '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
  259. '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
  260. '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
  261. '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
  262. '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
  263. '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
  264. '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
  265. '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
  266. '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',
  267. '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
  268. '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
  269. '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
  270. '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',
  271. '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
  272. '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
  273. '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
  274. '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
  275. '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
  276. '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
  277. '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
  278. '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
  279. '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
  280. '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
  281. '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\327',
  282. '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\337',
  283. '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
  284. '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
  285. '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
  286. '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377'
  287. };
  288. #ifdef USE_UPPER_CASE_TABLE
  289. const UChar OnigEncISO_8859_1_ToUpperCaseTable[256] = {
  290. '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
  291. '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
  292. '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
  293. '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
  294. '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
  295. '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
  296. '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
  297. '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
  298. '\100', '\101', '\102', '\103', '\104', '\105', '\106', '\107',
  299. '\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117',
  300. '\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127',
  301. '\130', '\131', '\132', '\133', '\134', '\135', '\136', '\137',
  302. '\140', '\101', '\102', '\103', '\104', '\105', '\106', '\107',
  303. '\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117',
  304. '\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127',
  305. '\130', '\131', '\132', '\173', '\174', '\175', '\176', '\177',
  306. '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
  307. '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
  308. '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
  309. '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
  310. '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
  311. '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
  312. '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
  313. '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
  314. '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
  315. '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
  316. '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327',
  317. '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337',
  318. '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
  319. '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
  320. '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\367',
  321. '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\377',
  322. };
  323. #endif
  324. extern void
  325. onigenc_set_default_caseconv_table(const UChar* table ARG_UNUSED)
  326. {
  327. /* nothing */
  328. /* obsoleted. */
  329. }
  330. extern UChar*
  331. onigenc_get_left_adjust_char_head(OnigEncoding enc, const UChar* start, const UChar* s)
  332. {
  333. return ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s);
  334. }
  335. const OnigPairCaseFoldCodes OnigAsciiLowerMap[] = {
  336. { 0x41, 0x61 },
  337. { 0x42, 0x62 },
  338. { 0x43, 0x63 },
  339. { 0x44, 0x64 },
  340. { 0x45, 0x65 },
  341. { 0x46, 0x66 },
  342. { 0x47, 0x67 },
  343. { 0x48, 0x68 },
  344. { 0x49, 0x69 },
  345. { 0x4a, 0x6a },
  346. { 0x4b, 0x6b },
  347. { 0x4c, 0x6c },
  348. { 0x4d, 0x6d },
  349. { 0x4e, 0x6e },
  350. { 0x4f, 0x6f },
  351. { 0x50, 0x70 },
  352. { 0x51, 0x71 },
  353. { 0x52, 0x72 },
  354. { 0x53, 0x73 },
  355. { 0x54, 0x74 },
  356. { 0x55, 0x75 },
  357. { 0x56, 0x76 },
  358. { 0x57, 0x77 },
  359. { 0x58, 0x78 },
  360. { 0x59, 0x79 },
  361. { 0x5a, 0x7a }
  362. };
  363. extern int
  364. onigenc_ascii_apply_all_case_fold(OnigCaseFoldType flag ARG_UNUSED,
  365. OnigApplyAllCaseFoldFunc f, void* arg)
  366. {
  367. OnigCodePoint code;
  368. int i, r;
  369. for (i = 0;
  370. i < (int )(sizeof(OnigAsciiLowerMap)/sizeof(OnigPairCaseFoldCodes));
  371. i++) {
  372. code = OnigAsciiLowerMap[i].to;
  373. r = (*f)(OnigAsciiLowerMap[i].from, &code, 1, arg);
  374. if (r != 0) return r;
  375. code = OnigAsciiLowerMap[i].from;
  376. r = (*f)(OnigAsciiLowerMap[i].to, &code, 1, arg);
  377. if (r != 0) return r;
  378. }
  379. return 0;
  380. }
  381. extern int
  382. onigenc_ascii_get_case_fold_codes_by_str(OnigCaseFoldType flag ARG_UNUSED,
  383. const OnigUChar* p, const OnigUChar* end ARG_UNUSED,
  384. OnigCaseFoldCodeItem items[])
  385. {
  386. if (0x41 <= *p && *p <= 0x5a) {
  387. items[0].byte_len = 1;
  388. items[0].code_len = 1;
  389. items[0].code[0] = (OnigCodePoint )(*p + 0x20);
  390. return 1;
  391. }
  392. else if (0x61 <= *p && *p <= 0x7a) {
  393. items[0].byte_len = 1;
  394. items[0].code_len = 1;
  395. items[0].code[0] = (OnigCodePoint )(*p - 0x20);
  396. return 1;
  397. }
  398. else
  399. return 0;
  400. }
  401. static int
  402. ss_apply_all_case_fold(OnigCaseFoldType flag ARG_UNUSED,
  403. OnigApplyAllCaseFoldFunc f, void* arg)
  404. {
  405. static OnigCodePoint ss[] = { 0x73, 0x73 };
  406. return (*f)((OnigCodePoint )0xdf, ss, 2, arg);
  407. }
  408. extern int
  409. onigenc_apply_all_case_fold_with_map(int map_size,
  410. const OnigPairCaseFoldCodes map[],
  411. int ess_tsett_flag, OnigCaseFoldType flag,
  412. OnigApplyAllCaseFoldFunc f, void* arg)
  413. {
  414. OnigCodePoint code;
  415. int i, r;
  416. r = onigenc_ascii_apply_all_case_fold(flag, f, arg);
  417. if (r != 0) return r;
  418. for (i = 0; i < map_size; i++) {
  419. code = map[i].to;
  420. r = (*f)(map[i].from, &code, 1, arg);
  421. if (r != 0) return r;
  422. code = map[i].from;
  423. r = (*f)(map[i].to, &code, 1, arg);
  424. if (r != 0) return r;
  425. }
  426. if (ess_tsett_flag != 0)
  427. return ss_apply_all_case_fold(flag, f, arg);
  428. return 0;
  429. }
  430. extern int
  431. onigenc_get_case_fold_codes_by_str_with_map(int map_size,
  432. const OnigPairCaseFoldCodes map[],
  433. int ess_tsett_flag, OnigCaseFoldType flag ARG_UNUSED,
  434. const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[])
  435. {
  436. if (0x41 <= *p && *p <= 0x5a) {
  437. items[0].byte_len = 1;
  438. items[0].code_len = 1;
  439. items[0].code[0] = (OnigCodePoint )(*p + 0x20);
  440. if (*p == 0x53 && ess_tsett_flag != 0 && end > p + 1
  441. && (*(p+1) == 0x53 || *(p+1) == 0x73)) {
  442. /* SS */
  443. items[1].byte_len = 2;
  444. items[1].code_len = 1;
  445. items[1].code[0] = (OnigCodePoint )0xdf;
  446. return 2;
  447. }
  448. else
  449. return 1;
  450. }
  451. else if (0x61 <= *p && *p <= 0x7a) {
  452. items[0].byte_len = 1;
  453. items[0].code_len = 1;
  454. items[0].code[0] = (OnigCodePoint )(*p - 0x20);
  455. if (*p == 0x73 && ess_tsett_flag != 0 && end > p + 1
  456. && (*(p+1) == 0x73 || *(p+1) == 0x53)) {
  457. /* ss */
  458. items[1].byte_len = 2;
  459. items[1].code_len = 1;
  460. items[1].code[0] = (OnigCodePoint )0xdf;
  461. return 2;
  462. }
  463. else
  464. return 1;
  465. }
  466. else if (*p == 0xdf && ess_tsett_flag != 0) {
  467. items[0].byte_len = 1;
  468. items[0].code_len = 2;
  469. items[0].code[0] = (OnigCodePoint )'s';
  470. items[0].code[1] = (OnigCodePoint )'s';
  471. items[1].byte_len = 1;
  472. items[1].code_len = 2;
  473. items[1].code[0] = (OnigCodePoint )'S';
  474. items[1].code[1] = (OnigCodePoint )'S';
  475. items[2].byte_len = 1;
  476. items[2].code_len = 2;
  477. items[2].code[0] = (OnigCodePoint )'s';
  478. items[2].code[1] = (OnigCodePoint )'S';
  479. items[3].byte_len = 1;
  480. items[3].code_len = 2;
  481. items[3].code[0] = (OnigCodePoint )'S';
  482. items[3].code[1] = (OnigCodePoint )'s';
  483. return 4;
  484. }
  485. else {
  486. int i;
  487. for (i = 0; i < map_size; i++) {
  488. if (*p == map[i].from) {
  489. items[0].byte_len = 1;
  490. items[0].code_len = 1;
  491. items[0].code[0] = map[i].to;
  492. return 1;
  493. }
  494. else if (*p == map[i].to) {
  495. items[0].byte_len = 1;
  496. items[0].code_len = 1;
  497. items[0].code[0] = map[i].from;
  498. return 1;
  499. }
  500. }
  501. }
  502. return 0;
  503. }
  504. extern int
  505. onigenc_not_support_get_ctype_code_range(OnigCtype ctype ARG_UNUSED,
  506. OnigCodePoint* sb_out ARG_UNUSED,
  507. const OnigCodePoint* ranges[] ARG_UNUSED)
  508. {
  509. return ONIG_NO_SUPPORT_CONFIG;
  510. }
  511. extern int
  512. onigenc_is_mbc_newline_0x0a(const UChar* p, const UChar* end)
  513. {
  514. if (p < end) {
  515. if (*p == 0x0a) return 1;
  516. }
  517. return 0;
  518. }
  519. /* for single byte encodings */
  520. extern int
  521. onigenc_ascii_mbc_case_fold(OnigCaseFoldType flag ARG_UNUSED, const UChar** p,
  522. const UChar*end ARG_UNUSED, UChar* lower)
  523. {
  524. *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(**p);
  525. (*p)++;
  526. return 1; /* return byte length of converted char to lower */
  527. }
  528. #if 0
  529. extern int
  530. onigenc_ascii_is_mbc_ambiguous(OnigCaseFoldType flag,
  531. const UChar** pp, const UChar* end)
  532. {
  533. const UChar* p = *pp;
  534. (*pp)++;
  535. return ONIGENC_IS_ASCII_CODE_CASE_AMBIG(*p);
  536. }
  537. #endif
  538. extern int
  539. onigenc_single_byte_mbc_enc_len(const UChar* p ARG_UNUSED)
  540. {
  541. return 1;
  542. }
  543. extern OnigCodePoint
  544. onigenc_single_byte_mbc_to_code(const UChar* p, const UChar* end ARG_UNUSED)
  545. {
  546. return (OnigCodePoint )(*p);
  547. }
  548. extern int
  549. onigenc_single_byte_code_to_mbclen(OnigCodePoint code ARG_UNUSED)
  550. {
  551. return (code < 0x100 ? 1 : ONIGERR_INVALID_CODE_POINT_VALUE);
  552. }
  553. extern int
  554. onigenc_single_byte_code_to_mbc(OnigCodePoint code, UChar *buf)
  555. {
  556. *buf = (UChar )(code & 0xff);
  557. return 1;
  558. }
  559. extern UChar*
  560. onigenc_single_byte_left_adjust_char_head(const UChar* start ARG_UNUSED,
  561. const UChar* s)
  562. {
  563. return (UChar* )s;
  564. }
  565. extern int
  566. onigenc_always_true_is_allowed_reverse_match(const UChar* s ARG_UNUSED,
  567. const UChar* end ARG_UNUSED)
  568. {
  569. return TRUE;
  570. }
  571. extern int
  572. onigenc_always_false_is_allowed_reverse_match(const UChar* s ARG_UNUSED,
  573. const UChar* end ARG_UNUSED)
  574. {
  575. return FALSE;
  576. }
  577. extern OnigCodePoint
  578. onigenc_mbn_mbc_to_code(OnigEncoding enc, const UChar* p, const UChar* end)
  579. {
  580. int c, i, len;
  581. OnigCodePoint n;
  582. len = enclen(enc, p);
  583. n = (OnigCodePoint )(*p++);
  584. if (len == 1) return n;
  585. for (i = 1; i < len; i++) {
  586. if (p >= end) break;
  587. c = *p++;
  588. n <<= 8; n += c;
  589. }
  590. return n;
  591. }
  592. extern int
  593. onigenc_mbn_mbc_case_fold(OnigEncoding enc, OnigCaseFoldType flag ARG_UNUSED,
  594. const UChar** pp, const UChar* end ARG_UNUSED,
  595. UChar* lower)
  596. {
  597. int len;
  598. const UChar *p = *pp;
  599. if (ONIGENC_IS_MBC_ASCII(p)) {
  600. *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p);
  601. (*pp)++;
  602. return 1;
  603. }
  604. else {
  605. int i;
  606. len = enclen(enc, p);
  607. for (i = 0; i < len; i++) {
  608. *lower++ = *p++;
  609. }
  610. (*pp) += len;
  611. return len; /* return byte length of converted to lower char */
  612. }
  613. }
  614. #if 0
  615. extern int
  616. onigenc_mbn_is_mbc_ambiguous(OnigEncoding enc, OnigCaseFoldType flag,
  617. const UChar** pp, const UChar* end)
  618. {
  619. const UChar* p = *pp;
  620. if (ONIGENC_IS_MBC_ASCII(p)) {
  621. (*pp)++;
  622. return ONIGENC_IS_ASCII_CODE_CASE_AMBIG(*p);
  623. }
  624. (*pp) += enclen(enc, p);
  625. return FALSE;
  626. }
  627. #endif
  628. extern int
  629. onigenc_mb2_code_to_mbclen(OnigCodePoint code)
  630. {
  631. if ((code & 0xff00) != 0) return 2;
  632. else return 1;
  633. }
  634. extern int
  635. onigenc_mb4_code_to_mbclen(OnigCodePoint code)
  636. {
  637. if ((code & 0xff000000) != 0) return 4;
  638. else if ((code & 0xff0000) != 0) return 3;
  639. else if ((code & 0xff00) != 0) return 2;
  640. else return 1;
  641. }
  642. extern int
  643. onigenc_mb2_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf)
  644. {
  645. UChar *p = buf;
  646. if ((code & 0xff00) != 0) {
  647. *p++ = (UChar )((code >> 8) & 0xff);
  648. }
  649. *p++ = (UChar )(code & 0xff);
  650. #if 1
  651. if (enclen(enc, buf) != (p - buf))
  652. return ONIGERR_INVALID_CODE_POINT_VALUE;
  653. #endif
  654. return p - buf;
  655. }
  656. extern int
  657. onigenc_mb4_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf)
  658. {
  659. UChar *p = buf;
  660. if ((code & 0xff000000) != 0) {
  661. *p++ = (UChar )((code >> 24) & 0xff);
  662. }
  663. if ((code & 0xff0000) != 0 || p != buf) {
  664. *p++ = (UChar )((code >> 16) & 0xff);
  665. }
  666. if ((code & 0xff00) != 0 || p != buf) {
  667. *p++ = (UChar )((code >> 8) & 0xff);
  668. }
  669. *p++ = (UChar )(code & 0xff);
  670. #if 1
  671. if (enclen(enc, buf) != (p - buf))
  672. return ONIGERR_INVALID_CODE_POINT_VALUE;
  673. #endif
  674. return p - buf;
  675. }
  676. extern int
  677. onigenc_minimum_property_name_to_ctype(OnigEncoding enc, UChar* p, UChar* end)
  678. {
  679. static PosixBracketEntryType PBS[] = {
  680. { (UChar* )"Alnum", ONIGENC_CTYPE_ALNUM, 5 },
  681. { (UChar* )"Alpha", ONIGENC_CTYPE_ALPHA, 5 },
  682. { (UChar* )"Blank", ONIGENC_CTYPE_BLANK, 5 },
  683. { (UChar* )"Cntrl", ONIGENC_CTYPE_CNTRL, 5 },
  684. { (UChar* )"Digit", ONIGENC_CTYPE_DIGIT, 5 },
  685. { (UChar* )"Graph", ONIGENC_CTYPE_GRAPH, 5 },
  686. { (UChar* )"Lower", ONIGENC_CTYPE_LOWER, 5 },
  687. { (UChar* )"Print", ONIGENC_CTYPE_PRINT, 5 },
  688. { (UChar* )"Punct", ONIGENC_CTYPE_PUNCT, 5 },
  689. { (UChar* )"Space", ONIGENC_CTYPE_SPACE, 5 },
  690. { (UChar* )"Upper", ONIGENC_CTYPE_UPPER, 5 },
  691. { (UChar* )"XDigit", ONIGENC_CTYPE_XDIGIT, 6 },
  692. { (UChar* )"ASCII", ONIGENC_CTYPE_ASCII, 5 },
  693. { (UChar* )"Word", ONIGENC_CTYPE_WORD, 4 },
  694. { (UChar* )NULL, -1, 0 }
  695. };
  696. PosixBracketEntryType *pb;
  697. int len;
  698. len = onigenc_strlen(enc, p, end);
  699. for (pb = PBS; IS_NOT_NULL(pb->name); pb++) {
  700. if (len == pb->len &&
  701. onigenc_with_ascii_strncmp(enc, p, end, pb->name, pb->len) == 0)
  702. return pb->ctype;
  703. }
  704. return ONIGERR_INVALID_CHAR_PROPERTY_NAME;
  705. }
  706. extern int
  707. onigenc_mb2_is_code_ctype(OnigEncoding enc, OnigCodePoint code,
  708. unsigned int ctype)
  709. {
  710. if (code < 128)
  711. return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
  712. else {
  713. if (CTYPE_IS_WORD_GRAPH_PRINT(ctype)) {
  714. return (ONIGENC_CODE_TO_MBCLEN(enc, code) > 1 ? TRUE : FALSE);
  715. }
  716. }
  717. return FALSE;
  718. }
  719. extern int
  720. onigenc_mb4_is_code_ctype(OnigEncoding enc, OnigCodePoint code,
  721. unsigned int ctype)
  722. {
  723. if (code < 128)
  724. return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
  725. else {
  726. if (CTYPE_IS_WORD_GRAPH_PRINT(ctype)) {
  727. return (ONIGENC_CODE_TO_MBCLEN(enc, code) > 1 ? TRUE : FALSE);
  728. }
  729. }
  730. return FALSE;
  731. }
  732. extern int
  733. onigenc_with_ascii_strncmp(OnigEncoding enc, const UChar* p, const UChar* end,
  734. const UChar* sascii /* ascii */, int n)
  735. {
  736. int x, c;
  737. while (n-- > 0) {
  738. if (p >= end) return (int )(*sascii);
  739. c = (int )ONIGENC_MBC_TO_CODE(enc, p, end);
  740. x = *sascii - c;
  741. if (x) return x;
  742. sascii++;
  743. p += enclen(enc, p);
  744. }
  745. return 0;
  746. }
  747. /* Property management */
  748. static int
  749. resize_property_list(int new_size, const OnigCodePoint*** plist, int* psize)
  750. {
  751. int size;
  752. const OnigCodePoint **list = *plist;
  753. size = sizeof(OnigCodePoint*) * new_size;
  754. if (IS_NULL(list)) {
  755. list = (const OnigCodePoint** )xmalloc(size);
  756. }
  757. else {
  758. list = (const OnigCodePoint** )xrealloc((void* )list, size);
  759. }
  760. if (IS_NULL(list)) return ONIGERR_MEMORY;
  761. *plist = list;
  762. *psize = new_size;
  763. return 0;
  764. }
  765. extern int
  766. onigenc_property_list_add_property(UChar* name, const OnigCodePoint* prop,
  767. hash_table_type **table, const OnigCodePoint*** plist, int *pnum,
  768. int *psize)
  769. {
  770. #define PROP_INIT_SIZE 16
  771. int r;
  772. if (*psize <= *pnum) {
  773. int new_size = (*psize == 0 ? PROP_INIT_SIZE : *psize * 2);
  774. r = resize_property_list(new_size, plist, psize);
  775. if (r != 0) return r;
  776. }
  777. (*plist)[*pnum] = prop;
  778. if (ONIG_IS_NULL(*table)) {
  779. *table = onig_st_init_strend_table_with_size(PROP_INIT_SIZE);
  780. if (ONIG_IS_NULL(*table)) return ONIGERR_MEMORY;
  781. }
  782. *pnum = *pnum + 1;
  783. onig_st_insert_strend(*table, name, name + strlen((char* )name),
  784. (hash_data_type )(*pnum + ONIGENC_MAX_STD_CTYPE));
  785. return 0;
  786. }
  787. extern int
  788. onigenc_property_list_init(int (*f)(void))
  789. {
  790. int r;
  791. THREAD_ATOMIC_START;
  792. r = f();
  793. THREAD_ATOMIC_END;
  794. return r;
  795. }