mbfl_ident.c 7.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304
  1. /*
  2. * "streamable kanji code filter and converter"
  3. * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
  4. *
  5. * LICENSE NOTICES
  6. *
  7. * This file is part of "streamable kanji code filter and converter",
  8. * which is distributed under the terms of GNU Lesser General Public
  9. * License (version 2) as published by the Free Software Foundation.
  10. *
  11. * This software is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14. * GNU Lesser General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU Lesser General Public
  17. * License along with "streamable kanji code filter and converter";
  18. * if not, write to the Free Software Foundation, Inc., 59 Temple Place,
  19. * Suite 330, Boston, MA 02111-1307 USA
  20. *
  21. * The author of this file:
  22. *
  23. */
  24. /*
  25. * The source code included in this files was separated from mbfilter.c
  26. * by Moriyoshi Koizumi <moriyoshi@php.net> on 20 Dec 2002. The file
  27. * mbfilter.c is included in this package .
  28. *
  29. */
  30. #ifdef HAVE_CONFIG_H
  31. #include "config.h"
  32. #endif
  33. #ifdef HAVE_STDDEF_H
  34. #include <stddef.h>
  35. #endif
  36. #include "mbfl_ident.h"
  37. #include "mbfl_allocators.h"
  38. #include "mbfilter_pass.h"
  39. #include "mbfilter_8bit.h"
  40. #include "mbfilter_wchar.h"
  41. #include "filters/mbfilter_euc_cn.h"
  42. #include "filters/mbfilter_hz.h"
  43. #include "filters/mbfilter_euc_tw.h"
  44. #include "filters/mbfilter_big5.h"
  45. #include "filters/mbfilter_uhc.h"
  46. #include "filters/mbfilter_euc_kr.h"
  47. #include "filters/mbfilter_iso2022_kr.h"
  48. #include "filters/mbfilter_sjis.h"
  49. #include "filters/mbfilter_sjis_open.h"
  50. #include "filters/mbfilter_sjis_mobile.h"
  51. #include "filters/mbfilter_jis.h"
  52. #include "filters/mbfilter_iso2022_jp_ms.h"
  53. #include "filters/mbfilter_iso2022jp_2004.h"
  54. #include "filters/mbfilter_iso2022jp_mobile.h"
  55. #include "filters/mbfilter_euc_jp.h"
  56. #include "filters/mbfilter_euc_jp_win.h"
  57. #include "filters/mbfilter_euc_jp_2004.h"
  58. #include "filters/mbfilter_utf8_mobile.h"
  59. #include "filters/mbfilter_ascii.h"
  60. #include "filters/mbfilter_koi8r.h"
  61. #include "filters/mbfilter_koi8u.h"
  62. #include "filters/mbfilter_cp866.h"
  63. #include "filters/mbfilter_cp932.h"
  64. #include "filters/mbfilter_cp936.h"
  65. #include "filters/mbfilter_cp1251.h"
  66. #include "filters/mbfilter_cp1252.h"
  67. #include "filters/mbfilter_cp1254.h"
  68. #include "filters/mbfilter_cp51932.h"
  69. #include "filters/mbfilter_cp5022x.h"
  70. #include "filters/mbfilter_gb18030.h"
  71. #include "filters/mbfilter_iso8859_1.h"
  72. #include "filters/mbfilter_iso8859_2.h"
  73. #include "filters/mbfilter_iso8859_3.h"
  74. #include "filters/mbfilter_iso8859_4.h"
  75. #include "filters/mbfilter_iso8859_5.h"
  76. #include "filters/mbfilter_iso8859_6.h"
  77. #include "filters/mbfilter_iso8859_7.h"
  78. #include "filters/mbfilter_iso8859_8.h"
  79. #include "filters/mbfilter_iso8859_9.h"
  80. #include "filters/mbfilter_iso8859_10.h"
  81. #include "filters/mbfilter_iso8859_13.h"
  82. #include "filters/mbfilter_iso8859_14.h"
  83. #include "filters/mbfilter_iso8859_15.h"
  84. #include "filters/mbfilter_base64.h"
  85. #include "filters/mbfilter_qprint.h"
  86. #include "filters/mbfilter_uuencode.h"
  87. #include "filters/mbfilter_7bit.h"
  88. #include "filters/mbfilter_utf7.h"
  89. #include "filters/mbfilter_utf7imap.h"
  90. #include "filters/mbfilter_utf8.h"
  91. #include "filters/mbfilter_utf16.h"
  92. #include "filters/mbfilter_utf32.h"
  93. #include "filters/mbfilter_byte2.h"
  94. #include "filters/mbfilter_byte4.h"
  95. #include "filters/mbfilter_ucs4.h"
  96. #include "filters/mbfilter_ucs2.h"
  97. #include "filters/mbfilter_htmlent.h"
  98. #include "filters/mbfilter_armscii8.h"
  99. #include "filters/mbfilter_cp850.h"
  100. static const struct mbfl_identify_vtbl vtbl_identify_false = {
  101. mbfl_no_encoding_pass,
  102. mbfl_filt_ident_false_ctor,
  103. mbfl_filt_ident_common_dtor,
  104. mbfl_filt_ident_false };
  105. static const struct mbfl_identify_vtbl *mbfl_identify_filter_list[] = {
  106. &vtbl_identify_utf8,
  107. &vtbl_identify_utf7,
  108. &vtbl_identify_ascii,
  109. &vtbl_identify_eucjp,
  110. &vtbl_identify_sjis,
  111. &vtbl_identify_sjis_open,
  112. &vtbl_identify_eucjpwin,
  113. &vtbl_identify_eucjp2004,
  114. &vtbl_identify_cp932,
  115. &vtbl_identify_jis,
  116. &vtbl_identify_2022jp,
  117. &vtbl_identify_2022jpms,
  118. &vtbl_identify_2022jp_2004,
  119. &vtbl_identify_2022jp_kddi,
  120. &vtbl_identify_cp51932,
  121. &vtbl_identify_sjis_docomo,
  122. &vtbl_identify_sjis_kddi,
  123. &vtbl_identify_sjis_sb,
  124. &vtbl_identify_utf8_docomo,
  125. &vtbl_identify_utf8_kddi_a,
  126. &vtbl_identify_utf8_kddi_b,
  127. &vtbl_identify_utf8_sb,
  128. &vtbl_identify_euccn,
  129. &vtbl_identify_cp936,
  130. &vtbl_identify_hz,
  131. &vtbl_identify_euctw,
  132. &vtbl_identify_big5,
  133. &vtbl_identify_cp950,
  134. &vtbl_identify_euckr,
  135. &vtbl_identify_uhc,
  136. &vtbl_identify_2022kr,
  137. &vtbl_identify_cp1251,
  138. &vtbl_identify_cp866,
  139. &vtbl_identify_koi8r,
  140. &vtbl_identify_koi8u,
  141. &vtbl_identify_cp1252,
  142. &vtbl_identify_cp1254,
  143. &vtbl_identify_8859_1,
  144. &vtbl_identify_8859_2,
  145. &vtbl_identify_8859_3,
  146. &vtbl_identify_8859_4,
  147. &vtbl_identify_8859_5,
  148. &vtbl_identify_8859_6,
  149. &vtbl_identify_8859_7,
  150. &vtbl_identify_8859_8,
  151. &vtbl_identify_8859_9,
  152. &vtbl_identify_8859_10,
  153. &vtbl_identify_8859_13,
  154. &vtbl_identify_8859_14,
  155. &vtbl_identify_8859_15,
  156. &vtbl_identify_armscii8,
  157. &vtbl_identify_cp850,
  158. &vtbl_identify_jis_ms,
  159. &vtbl_identify_cp50220,
  160. &vtbl_identify_cp50221,
  161. &vtbl_identify_cp50222,
  162. &vtbl_identify_gb18030,
  163. &vtbl_identify_false,
  164. NULL
  165. };
  166. /*
  167. * identify filter
  168. */
  169. const struct mbfl_identify_vtbl * mbfl_identify_filter_get_vtbl(enum mbfl_no_encoding encoding)
  170. {
  171. const struct mbfl_identify_vtbl * vtbl;
  172. int i;
  173. i = 0;
  174. while ((vtbl = mbfl_identify_filter_list[i++]) != NULL) {
  175. if (vtbl->encoding == encoding) {
  176. break;
  177. }
  178. }
  179. return vtbl;
  180. }
  181. mbfl_identify_filter *mbfl_identify_filter_new(enum mbfl_no_encoding encoding)
  182. {
  183. mbfl_identify_filter *filter;
  184. /* allocate */
  185. filter = (mbfl_identify_filter *)mbfl_malloc(sizeof(mbfl_identify_filter));
  186. if (filter == NULL) {
  187. return NULL;
  188. }
  189. if (mbfl_identify_filter_init(filter, encoding)) {
  190. mbfl_free(filter);
  191. return NULL;
  192. }
  193. return filter;
  194. }
  195. mbfl_identify_filter *mbfl_identify_filter_new2(const mbfl_encoding *encoding)
  196. {
  197. mbfl_identify_filter *filter;
  198. /* allocate */
  199. filter = (mbfl_identify_filter *)mbfl_malloc(sizeof(mbfl_identify_filter));
  200. if (filter == NULL) {
  201. return NULL;
  202. }
  203. if (mbfl_identify_filter_init2(filter, encoding)) {
  204. mbfl_free(filter);
  205. return NULL;
  206. }
  207. return filter;
  208. }
  209. int mbfl_identify_filter_init(mbfl_identify_filter *filter, enum mbfl_no_encoding encoding)
  210. {
  211. const mbfl_encoding *enc = mbfl_no2encoding(encoding);
  212. return mbfl_identify_filter_init2(filter, enc ? enc: &mbfl_encoding_pass);
  213. }
  214. int mbfl_identify_filter_init2(mbfl_identify_filter *filter, const mbfl_encoding *encoding)
  215. {
  216. const struct mbfl_identify_vtbl *vtbl;
  217. /* encoding structure */
  218. filter->encoding = encoding;
  219. filter->status = 0;
  220. filter->flag = 0;
  221. filter->score = 0;
  222. /* setup the function table */
  223. vtbl = mbfl_identify_filter_get_vtbl(filter->encoding->no_encoding);
  224. if (vtbl == NULL) {
  225. vtbl = &vtbl_identify_false;
  226. }
  227. filter->filter_ctor = vtbl->filter_ctor;
  228. filter->filter_dtor = vtbl->filter_dtor;
  229. filter->filter_function = vtbl->filter_function;
  230. /* constructor */
  231. (*filter->filter_ctor)(filter);
  232. return 0;
  233. }
  234. void mbfl_identify_filter_delete(mbfl_identify_filter *filter)
  235. {
  236. if (filter == NULL) {
  237. return;
  238. }
  239. mbfl_identify_filter_cleanup(filter);
  240. mbfl_free((void*)filter);
  241. }
  242. void mbfl_identify_filter_cleanup(mbfl_identify_filter *filter)
  243. {
  244. (*filter->filter_dtor)(filter);
  245. }
  246. void mbfl_filt_ident_common_ctor(mbfl_identify_filter *filter)
  247. {
  248. filter->status = 0;
  249. filter->flag = 0;
  250. }
  251. void mbfl_filt_ident_common_dtor(mbfl_identify_filter *filter)
  252. {
  253. filter->status = 0;
  254. }
  255. int mbfl_filt_ident_false(int c, mbfl_identify_filter *filter)
  256. {
  257. filter->flag = 1; /* bad */
  258. return c;
  259. }
  260. void mbfl_filt_ident_false_ctor(mbfl_identify_filter *filter)
  261. {
  262. filter->status = 0;
  263. filter->flag = 1;
  264. }
  265. int mbfl_filt_ident_true(int c, mbfl_identify_filter *filter)
  266. {
  267. return c;
  268. }