mbfl_convert.c 9.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349
  1. /*
  2. * "streamable kanji code filter and converter"
  3. * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
  4. *
  5. * LICENSE NOTICES
  6. *
  7. * This file is part of "streamable kanji code filter and converter",
  8. * which is distributed under the terms of GNU Lesser General Public
  9. * License (version 2) as published by the Free Software Foundation.
  10. *
  11. * This software is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14. * GNU Lesser General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU Lesser General Public
  17. * License along with "streamable kanji code filter and converter";
  18. * if not, write to the Free Software Foundation, Inc., 59 Temple Place,
  19. * Suite 330, Boston, MA 02111-1307 USA
  20. *
  21. * The author of this file:
  22. *
  23. */
  24. /*
  25. * The source code included in this files was separated from mbfilter.c
  26. * by Moriyoshi Koizumi <moriyoshi@php.net> on 20 Dec 2002. The file
  27. * mbfilter.c is included in this package .
  28. *
  29. */
  30. #include <stddef.h>
  31. #include "mbfl_encoding.h"
  32. #include "mbfl_filter_output.h"
  33. #include "mbfilter_pass.h"
  34. #include "mbfilter_8bit.h"
  35. #include "mbfilter_wchar.h"
  36. #include "filters/mbfilter_euc_cn.h"
  37. #include "filters/mbfilter_hz.h"
  38. #include "filters/mbfilter_euc_tw.h"
  39. #include "filters/mbfilter_big5.h"
  40. #include "filters/mbfilter_uhc.h"
  41. #include "filters/mbfilter_euc_kr.h"
  42. #include "filters/mbfilter_iso2022_kr.h"
  43. #include "filters/mbfilter_sjis.h"
  44. #include "filters/mbfilter_sjis_2004.h"
  45. #include "filters/mbfilter_sjis_mobile.h"
  46. #include "filters/mbfilter_sjis_mac.h"
  47. #include "filters/mbfilter_cp51932.h"
  48. #include "filters/mbfilter_jis.h"
  49. #include "filters/mbfilter_iso2022_jp_ms.h"
  50. #include "filters/mbfilter_iso2022jp_2004.h"
  51. #include "filters/mbfilter_iso2022jp_mobile.h"
  52. #include "filters/mbfilter_euc_jp.h"
  53. #include "filters/mbfilter_euc_jp_2004.h"
  54. #include "filters/mbfilter_euc_jp_win.h"
  55. #include "filters/mbfilter_gb18030.h"
  56. #include "filters/mbfilter_cp932.h"
  57. #include "filters/mbfilter_cp936.h"
  58. #include "filters/mbfilter_cp5022x.h"
  59. #include "filters/mbfilter_base64.h"
  60. #include "filters/mbfilter_qprint.h"
  61. #include "filters/mbfilter_uuencode.h"
  62. #include "filters/mbfilter_7bit.h"
  63. #include "filters/mbfilter_utf7.h"
  64. #include "filters/mbfilter_utf7imap.h"
  65. #include "filters/mbfilter_utf8.h"
  66. #include "filters/mbfilter_utf8_mobile.h"
  67. #include "filters/mbfilter_utf16.h"
  68. #include "filters/mbfilter_utf32.h"
  69. #include "filters/mbfilter_ucs4.h"
  70. #include "filters/mbfilter_ucs2.h"
  71. #include "filters/mbfilter_htmlent.h"
  72. #include "filters/mbfilter_singlebyte.h"
  73. /* hex character table "0123456789ABCDEF" */
  74. static char mbfl_hexchar_table[] = {
  75. 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x41,0x42,0x43,0x44,0x45,0x46
  76. };
  77. static const struct mbfl_convert_vtbl *mbfl_special_filter_list[] = {
  78. &vtbl_8bit_b64,
  79. &vtbl_b64_8bit,
  80. &vtbl_uuencode_8bit,
  81. &vtbl_8bit_qprint,
  82. &vtbl_qprint_8bit,
  83. &vtbl_8bit_7bit,
  84. &vtbl_7bit_8bit,
  85. &vtbl_pass,
  86. NULL
  87. };
  88. static void mbfl_convert_filter_init(mbfl_convert_filter *filter, const mbfl_encoding *from, const mbfl_encoding *to,
  89. const struct mbfl_convert_vtbl *vtbl, output_function_t output_function, flush_function_t flush_function, void* data)
  90. {
  91. /* encoding structure */
  92. filter->from = from;
  93. filter->to = to;
  94. if (output_function != NULL) {
  95. filter->output_function = output_function;
  96. } else {
  97. filter->output_function = mbfl_filter_output_null;
  98. }
  99. filter->flush_function = flush_function;
  100. filter->data = data;
  101. filter->illegal_mode = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
  102. filter->illegal_substchar = '?';
  103. filter->num_illegalchar = 0;
  104. filter->filter_dtor = vtbl->filter_dtor;
  105. filter->filter_function = vtbl->filter_function;
  106. filter->filter_flush = (filter_flush_t)vtbl->filter_flush;
  107. filter->filter_copy = vtbl->filter_copy;
  108. (*vtbl->filter_ctor)(filter);
  109. }
  110. mbfl_convert_filter* mbfl_convert_filter_new(const mbfl_encoding *from, const mbfl_encoding *to, output_function_t output_function,
  111. flush_function_t flush_function, void* data)
  112. {
  113. const struct mbfl_convert_vtbl *vtbl = mbfl_convert_filter_get_vtbl(from, to);
  114. if (vtbl == NULL) {
  115. return NULL;
  116. }
  117. mbfl_convert_filter *filter = emalloc(sizeof(mbfl_convert_filter));
  118. mbfl_convert_filter_init(filter, from, to, vtbl, output_function, flush_function, data);
  119. return filter;
  120. }
  121. mbfl_convert_filter* mbfl_convert_filter_new2(const struct mbfl_convert_vtbl *vtbl, output_function_t output_function,
  122. flush_function_t flush_function, void* data)
  123. {
  124. const mbfl_encoding *from_encoding = mbfl_no2encoding(vtbl->from);
  125. const mbfl_encoding *to_encoding = mbfl_no2encoding(vtbl->to);
  126. mbfl_convert_filter *filter = emalloc(sizeof(mbfl_convert_filter));
  127. mbfl_convert_filter_init(filter, from_encoding, to_encoding, vtbl, output_function, flush_function, data);
  128. return filter;
  129. }
  130. void mbfl_convert_filter_delete(mbfl_convert_filter *filter)
  131. {
  132. if (filter->filter_dtor) {
  133. (*filter->filter_dtor)(filter);
  134. }
  135. efree(filter);
  136. }
  137. /* Feed a char, return 0 if ok - used by mailparse ext */
  138. int mbfl_convert_filter_feed(int c, mbfl_convert_filter *filter)
  139. {
  140. return (*filter->filter_function)(c, filter);
  141. }
  142. /* Feed string into `filter` byte by byte; return pointer to first byte not processed */
  143. unsigned char* mbfl_convert_filter_feed_string(mbfl_convert_filter *filter, unsigned char *p, size_t len)
  144. {
  145. while (len--) {
  146. if ((*filter->filter_function)(*p++, filter) < 0) {
  147. break;
  148. }
  149. }
  150. return p;
  151. }
  152. int mbfl_convert_filter_flush(mbfl_convert_filter *filter)
  153. {
  154. (*filter->filter_flush)(filter);
  155. return 0;
  156. }
  157. void mbfl_convert_filter_reset(mbfl_convert_filter *filter, const mbfl_encoding *from, const mbfl_encoding *to)
  158. {
  159. if (filter->filter_dtor) {
  160. (*filter->filter_dtor)(filter);
  161. }
  162. const struct mbfl_convert_vtbl *vtbl = mbfl_convert_filter_get_vtbl(from, to);
  163. if (vtbl == NULL) {
  164. vtbl = &vtbl_pass;
  165. }
  166. mbfl_convert_filter_init(filter, from, to, vtbl, filter->output_function, filter->flush_function, filter->data);
  167. }
  168. void mbfl_convert_filter_copy(mbfl_convert_filter *src, mbfl_convert_filter *dest)
  169. {
  170. if (src->filter_copy != NULL) {
  171. src->filter_copy(src, dest);
  172. return;
  173. }
  174. *dest = *src;
  175. }
  176. void mbfl_convert_filter_devcat(mbfl_convert_filter *filter, mbfl_memory_device *src)
  177. {
  178. mbfl_convert_filter_feed_string(filter, src->buffer, src->pos);
  179. }
  180. int mbfl_convert_filter_strcat(mbfl_convert_filter *filter, const unsigned char *p)
  181. {
  182. int c;
  183. while ((c = *p++)) {
  184. if ((*filter->filter_function)(c, filter) < 0) {
  185. return -1;
  186. }
  187. }
  188. return 0;
  189. }
  190. static int mbfl_filt_conv_output_hex(unsigned int w, mbfl_convert_filter *filter)
  191. {
  192. bool nonzero = false;
  193. int shift = 28, ret = 0;
  194. while (shift >= 0) {
  195. int n = (w >> shift) & 0xF;
  196. if (n || nonzero) {
  197. nonzero = true;
  198. ret = (*filter->filter_function)(mbfl_hexchar_table[n], filter);
  199. if (ret < 0) {
  200. return ret;
  201. }
  202. }
  203. shift -= 4;
  204. }
  205. if (!nonzero) {
  206. /* No hex digits were output by above loop */
  207. ret = (*filter->filter_function)('0', filter);
  208. }
  209. return ret;
  210. }
  211. /* illegal character output function for conv-filter */
  212. int mbfl_filt_conv_illegal_output(int c, mbfl_convert_filter *filter)
  213. {
  214. unsigned int w = c;
  215. int ret = 0;
  216. int mode_backup = filter->illegal_mode;
  217. int substchar_backup = filter->illegal_substchar;
  218. /* The used substitution character may not be supported by the target character encoding.
  219. * If that happens, first try to use "?" instead and if that also fails, silently drop the
  220. * character. */
  221. if (filter->illegal_mode == MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR
  222. && filter->illegal_substchar != '?') {
  223. filter->illegal_substchar = '?';
  224. } else {
  225. filter->illegal_mode = MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE;
  226. }
  227. switch (mode_backup) {
  228. case MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR:
  229. ret = (*filter->filter_function)(substchar_backup, filter);
  230. break;
  231. case MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG:
  232. if (w != MBFL_BAD_INPUT) {
  233. ret = mbfl_convert_filter_strcat(filter, (const unsigned char *)"U+");
  234. if (ret < 0)
  235. break;
  236. ret = mbfl_filt_conv_output_hex(w, filter);
  237. } else {
  238. ret = (*filter->filter_function)(substchar_backup, filter);
  239. }
  240. break;
  241. case MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY:
  242. if (w != MBFL_BAD_INPUT) {
  243. ret = mbfl_convert_filter_strcat(filter, (const unsigned char *)"&#x");
  244. if (ret < 0)
  245. break;
  246. ret = mbfl_filt_conv_output_hex(w, filter);
  247. if (ret < 0)
  248. break;
  249. ret = mbfl_convert_filter_strcat(filter, (const unsigned char *)";");
  250. } else {
  251. ret = (*filter->filter_function)(substchar_backup, filter);
  252. }
  253. break;
  254. case MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE:
  255. default:
  256. break;
  257. }
  258. filter->illegal_mode = mode_backup;
  259. filter->illegal_substchar = substchar_backup;
  260. filter->num_illegalchar++;
  261. return ret;
  262. }
  263. const struct mbfl_convert_vtbl* mbfl_convert_filter_get_vtbl(const mbfl_encoding *from, const mbfl_encoding *to)
  264. {
  265. if (to->no_encoding == mbfl_no_encoding_base64 ||
  266. to->no_encoding == mbfl_no_encoding_qprint ||
  267. to->no_encoding == mbfl_no_encoding_7bit) {
  268. from = &mbfl_encoding_8bit;
  269. } else if (from->no_encoding == mbfl_no_encoding_base64 ||
  270. from->no_encoding == mbfl_no_encoding_qprint ||
  271. from->no_encoding == mbfl_no_encoding_uuencode ||
  272. from->no_encoding == mbfl_no_encoding_7bit) {
  273. to = &mbfl_encoding_8bit;
  274. }
  275. if (to == from && (to == &mbfl_encoding_wchar || to == &mbfl_encoding_8bit)) {
  276. return &vtbl_pass;
  277. }
  278. if (to->no_encoding == mbfl_no_encoding_wchar) {
  279. return from->input_filter;
  280. } else if (from->no_encoding == mbfl_no_encoding_wchar) {
  281. return to->output_filter;
  282. } else {
  283. int i = 0;
  284. const struct mbfl_convert_vtbl *vtbl;
  285. while ((vtbl = mbfl_special_filter_list[i++])) {
  286. if (vtbl->from == from->no_encoding && vtbl->to == to->no_encoding) {
  287. return vtbl;
  288. }
  289. }
  290. return NULL;
  291. }
  292. }
  293. /*
  294. * commonly used constructor
  295. */
  296. void mbfl_filt_conv_common_ctor(mbfl_convert_filter *filter)
  297. {
  298. filter->status = filter->cache = 0;
  299. }
  300. int mbfl_filt_conv_common_flush(mbfl_convert_filter *filter)
  301. {
  302. if (filter->flush_function) {
  303. (*filter->flush_function)(filter->data);
  304. }
  305. return 0;
  306. }