mktable.c 28 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162
  1. /**********************************************************************
  2. mktable.c
  3. **********************************************************************/
  4. /*-
  5. * Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
  6. * All rights reserved.
  7. *
  8. * Redistribution and use in source and binary forms, with or without
  9. * modification, are permitted provided that the following conditions
  10. * are met:
  11. * 1. Redistributions of source code must retain the above copyright
  12. * notice, this list of conditions and the following disclaimer.
  13. * 2. Redistributions in binary form must reproduce the above copyright
  14. * notice, this list of conditions and the following disclaimer in the
  15. * documentation and/or other materials provided with the distribution.
  16. *
  17. * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  18. * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  19. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  20. * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  21. * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  22. * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  23. * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  24. * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  25. * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  26. * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  27. * SUCH DAMAGE.
  28. */
  29. #include <stdlib.h>
  30. #include <stdio.h>
  31. #include <locale.h>
  32. #define __USE_ISOC99
  33. #include <ctype.h>
  34. #include "regenc.h"
  35. #define ASCII 0
  36. #define UNICODE_ISO_8859_1 1
  37. #define ISO_8859_1 2
  38. #define ISO_8859_2 3
  39. #define ISO_8859_3 4
  40. #define ISO_8859_4 5
  41. #define ISO_8859_5 6
  42. #define ISO_8859_6 7
  43. #define ISO_8859_7 8
  44. #define ISO_8859_8 9
  45. #define ISO_8859_9 10
  46. #define ISO_8859_10 11
  47. #define ISO_8859_11 12
  48. #define ISO_8859_13 13
  49. #define ISO_8859_14 14
  50. #define ISO_8859_15 15
  51. #define ISO_8859_16 16
  52. #define KOI8 17
  53. #define KOI8_R 18
  54. typedef struct {
  55. int num;
  56. char* name;
  57. } ENC_INFO;
  58. static ENC_INFO Info[] = {
  59. { ASCII, "ASCII" },
  60. { UNICODE_ISO_8859_1, "UNICODE_ISO_8859_1" },
  61. { ISO_8859_1, "ISO_8859_1" },
  62. { ISO_8859_2, "ISO_8859_2" },
  63. { ISO_8859_3, "ISO_8859_3" },
  64. { ISO_8859_4, "ISO_8859_4" },
  65. { ISO_8859_5, "ISO_8859_5" },
  66. { ISO_8859_6, "ISO_8859_6" },
  67. { ISO_8859_7, "ISO_8859_7" },
  68. { ISO_8859_8, "ISO_8859_8" },
  69. { ISO_8859_9, "ISO_8859_9" },
  70. { ISO_8859_10, "ISO_8859_10" },
  71. { ISO_8859_11, "ISO_8859_11" },
  72. { ISO_8859_13, "ISO_8859_13" },
  73. { ISO_8859_14, "ISO_8859_14" },
  74. { ISO_8859_15, "ISO_8859_15" },
  75. { ISO_8859_16, "ISO_8859_16" },
  76. { KOI8, "KOI8" },
  77. { KOI8_R, "KOI8_R" }
  78. };
  79. static int IsAlpha(int enc, int c)
  80. {
  81. if (enc == ASCII)
  82. return isalpha(c);
  83. if (c >= 0x41 && c <= 0x5a) return 1;
  84. if (c >= 0x61 && c <= 0x7a) return 1;
  85. switch (enc) {
  86. case UNICODE_ISO_8859_1:
  87. case ISO_8859_1:
  88. case ISO_8859_9:
  89. if (c == 0xaa) return 1;
  90. if (c == 0xb5) return 1;
  91. if (c == 0xba) return 1;
  92. if (c >= 0xc0 && c <= 0xd6) return 1;
  93. if (c >= 0xd8 && c <= 0xf6) return 1;
  94. if (c >= 0xf8 && c <= 0xff) return 1;
  95. break;
  96. case ISO_8859_2:
  97. if (c == 0xa1 || c == 0xa3) return 1;
  98. if (c == 0xa5 || c == 0xa6) return 1;
  99. if (c >= 0xa9 && c <= 0xac) return 1;
  100. if (c >= 0xae && c <= 0xaf) return 1;
  101. if (c == 0xb1 || c == 0xb3) return 1;
  102. if (c == 0xb5 || c == 0xb6) return 1;
  103. if (c >= 0xb9 && c <= 0xbc) return 1;
  104. if (c >= 0xbe && c <= 0xbf) return 1;
  105. if (c >= 0xc0 && c <= 0xd6) return 1;
  106. if (c >= 0xd8 && c <= 0xf6) return 1;
  107. if (c >= 0xf8 && c <= 0xfe) return 1;
  108. break;
  109. case ISO_8859_3:
  110. if (c == 0xa1) return 1;
  111. if (c == 0xa6) return 1;
  112. if (c >= 0xa9 && c <= 0xac) return 1;
  113. if (c == 0xaf) return 1;
  114. if (c == 0xb1) return 1;
  115. if (c == 0xb5 || c == 0xb6) return 1;
  116. if (c >= 0xb9 && c <= 0xbc) return 1;
  117. if (c == 0xbf) return 1;
  118. if (c >= 0xc0 && c <= 0xc2) return 1;
  119. if (c >= 0xc4 && c <= 0xcf) return 1;
  120. if (c >= 0xd1 && c <= 0xd6) return 1;
  121. if (c >= 0xd8 && c <= 0xe2) return 1;
  122. if (c >= 0xe4 && c <= 0xef) return 1;
  123. if (c >= 0xf1 && c <= 0xf6) return 1;
  124. if (c >= 0xf8 && c <= 0xfe) return 1;
  125. break;
  126. case ISO_8859_4:
  127. if (c >= 0xa1 && c <= 0xa3) return 1;
  128. if (c == 0xa5 || c == 0xa6) return 1;
  129. if (c >= 0xa9 && c <= 0xac) return 1;
  130. if (c == 0xae) return 1;
  131. if (c == 0xb1 || c == 0xb3) return 1;
  132. if (c == 0xb5 || c == 0xb6) return 1;
  133. if (c >= 0xb9 && c <= 0xbf) return 1;
  134. if (c >= 0xc0 && c <= 0xd6) return 1;
  135. if (c >= 0xd8 && c <= 0xf6) return 1;
  136. if (c >= 0xf8 && c <= 0xfe) return 1;
  137. break;
  138. case ISO_8859_5:
  139. if (c >= 0xa1 && c <= 0xcf && c != 0xad) return 1;
  140. if (c >= 0xd0 && c <= 0xff && c != 0xf0 && c != 0xfd) return 1;
  141. break;
  142. case ISO_8859_6:
  143. if (c >= 0xc1 && c <= 0xda) return 1;
  144. if (c >= 0xe0 && c <= 0xf2) return 1;
  145. break;
  146. case ISO_8859_7:
  147. if (c == 0xb6) return 1;
  148. if (c >= 0xb8 && c <= 0xba) return 1;
  149. if (c == 0xbc) return 1;
  150. if (c >= 0xbe && c <= 0xbf) return 1;
  151. if (c == 0xc0) return 1;
  152. if (c >= 0xc1 && c <= 0xdb && c != 0xd2) return 1;
  153. if (c >= 0xdc && c <= 0xfe) return 1;
  154. break;
  155. case ISO_8859_8:
  156. if (c == 0xb5) return 1;
  157. if (c >= 0xe0 && c <= 0xfa) return 1;
  158. break;
  159. case ISO_8859_10:
  160. if (c >= 0xa1 && c <= 0xa6) return 1;
  161. if (c >= 0xa8 && c <= 0xac) return 1;
  162. if (c == 0xae || c == 0xaf) return 1;
  163. if (c >= 0xb1 && c <= 0xb6) return 1;
  164. if (c >= 0xb8 && c <= 0xbc) return 1;
  165. if (c >= 0xbe && c <= 0xff) return 1;
  166. break;
  167. case ISO_8859_11:
  168. if (c >= 0xa1 && c <= 0xda) return 1;
  169. if (c >= 0xdf && c <= 0xfb) return 1;
  170. break;
  171. case ISO_8859_13:
  172. if (c == 0xa8) return 1;
  173. if (c == 0xaa) return 1;
  174. if (c == 0xaf) return 1;
  175. if (c == 0xb5) return 1;
  176. if (c == 0xb8) return 1;
  177. if (c == 0xba) return 1;
  178. if (c >= 0xbf && c <= 0xd6) return 1;
  179. if (c >= 0xd8 && c <= 0xf6) return 1;
  180. if (c >= 0xf8 && c <= 0xfe) return 1;
  181. break;
  182. case ISO_8859_14:
  183. if (c == 0xa1 || c == 0xa2) return 1;
  184. if (c == 0xa4 || c == 0xa5) return 1;
  185. if (c == 0xa6 || c == 0xa8) return 1;
  186. if (c >= 0xaa && c <= 0xac) return 1;
  187. if (c >= 0xaf && c <= 0xb5) return 1;
  188. if (c >= 0xb7 && c <= 0xff) return 1;
  189. break;
  190. case ISO_8859_15:
  191. if (c == 0xaa) return 1;
  192. if (c == 0xb5) return 1;
  193. if (c == 0xba) return 1;
  194. if (c >= 0xc0 && c <= 0xd6) return 1;
  195. if (c >= 0xd8 && c <= 0xf6) return 1;
  196. if (c >= 0xf8 && c <= 0xff) return 1;
  197. if (c == 0xa6) return 1;
  198. if (c == 0xa8) return 1;
  199. if (c == 0xb4) return 1;
  200. if (c == 0xb8) return 1;
  201. if (c == 0xbc) return 1;
  202. if (c == 0xbd) return 1;
  203. if (c == 0xbe) return 1;
  204. break;
  205. case ISO_8859_16:
  206. if (c == 0xa1) return 1;
  207. if (c == 0xa2) return 1;
  208. if (c == 0xa3) return 1;
  209. if (c == 0xa6) return 1;
  210. if (c == 0xa8) return 1;
  211. if (c == 0xaa) return 1;
  212. if (c == 0xac) return 1;
  213. if (c == 0xae) return 1;
  214. if (c == 0xaf) return 1;
  215. if (c == 0xb2) return 1;
  216. if (c == 0xb3) return 1;
  217. if (c == 0xb4) return 1;
  218. if (c >= 0xb8 && c <= 0xba) return 1;
  219. if (c == 0xbc) return 1;
  220. if (c == 0xbd) return 1;
  221. if (c == 0xbe) return 1;
  222. if (c == 0xbf) return 1;
  223. if (c >= 0xc0 && c <= 0xde) return 1;
  224. if (c >= 0xdf && c <= 0xff) return 1;
  225. break;
  226. case KOI8_R:
  227. if (c == 0xa3 || c == 0xb3) return 1;
  228. /* fall */
  229. case KOI8:
  230. if (c >= 0xc0 && c <= 0xff) return 1;
  231. break;
  232. default:
  233. exit(-1);
  234. }
  235. return 0;
  236. }
  237. static int IsBlank(int enc, int c)
  238. {
  239. if (enc == ASCII)
  240. return isblank(c);
  241. if (c == 0x09 || c == 0x20) return 1;
  242. switch (enc) {
  243. case UNICODE_ISO_8859_1:
  244. case ISO_8859_1:
  245. case ISO_8859_2:
  246. case ISO_8859_3:
  247. case ISO_8859_4:
  248. case ISO_8859_5:
  249. case ISO_8859_6:
  250. case ISO_8859_7:
  251. case ISO_8859_8:
  252. case ISO_8859_9:
  253. case ISO_8859_10:
  254. case ISO_8859_11:
  255. case ISO_8859_13:
  256. case ISO_8859_14:
  257. case ISO_8859_15:
  258. case ISO_8859_16:
  259. case KOI8:
  260. if (c == 0xa0) return 1;
  261. break;
  262. case KOI8_R:
  263. if (c == 0x9a) return 1;
  264. break;
  265. default:
  266. exit(-1);
  267. }
  268. return 0;
  269. }
  270. static int IsCntrl(int enc, int c)
  271. {
  272. if (enc == ASCII)
  273. return iscntrl(c);
  274. if (c >= 0x00 && c <= 0x1F) return 1;
  275. switch (enc) {
  276. case UNICODE_ISO_8859_1:
  277. if (c == 0xad) return 1;
  278. /* fall */
  279. case ISO_8859_1:
  280. case ISO_8859_2:
  281. case ISO_8859_3:
  282. case ISO_8859_4:
  283. case ISO_8859_5:
  284. case ISO_8859_6:
  285. case ISO_8859_7:
  286. case ISO_8859_8:
  287. case ISO_8859_9:
  288. case ISO_8859_10:
  289. case ISO_8859_11:
  290. case ISO_8859_13:
  291. case ISO_8859_14:
  292. case ISO_8859_15:
  293. case ISO_8859_16:
  294. case KOI8:
  295. if (c >= 0x7f && c <= 0x9F) return 1;
  296. break;
  297. case KOI8_R:
  298. if (c == 0x7f) return 1;
  299. break;
  300. default:
  301. exit(-1);
  302. }
  303. return 0;
  304. }
  305. static int IsDigit(int enc ARG_UNUSED, int c)
  306. {
  307. if (c >= 0x30 && c <= 0x39) return 1;
  308. return 0;
  309. }
  310. static int IsGraph(int enc, int c)
  311. {
  312. if (enc == ASCII)
  313. return isgraph(c);
  314. if (c >= 0x21 && c <= 0x7e) return 1;
  315. switch (enc) {
  316. case UNICODE_ISO_8859_1:
  317. case ISO_8859_1:
  318. case ISO_8859_2:
  319. case ISO_8859_4:
  320. case ISO_8859_5:
  321. case ISO_8859_9:
  322. case ISO_8859_10:
  323. case ISO_8859_13:
  324. case ISO_8859_14:
  325. case ISO_8859_15:
  326. case ISO_8859_16:
  327. if (c >= 0xa1 && c <= 0xff) return 1;
  328. break;
  329. case ISO_8859_3:
  330. if (c >= 0xa1) {
  331. if (c == 0xa5 || c == 0xae || c == 0xbe || c == 0xc3 || c == 0xd0 ||
  332. c == 0xe3 || c == 0xf0)
  333. return 0;
  334. else
  335. return 1;
  336. }
  337. break;
  338. case ISO_8859_6:
  339. if (c == 0xa4 || c == 0xac || c == 0xad || c == 0xbb || c == 0xbf)
  340. return 1;
  341. if (c >= 0xc1 && c <= 0xda) return 1;
  342. if (c >= 0xe0 && c <= 0xf2) return 1;
  343. break;
  344. case ISO_8859_7:
  345. if (c >= 0xa1 && c <= 0xfe &&
  346. c != 0xa4 && c != 0xa5 && c != 0xaa &&
  347. c != 0xae && c != 0xd2) return 1;
  348. break;
  349. case ISO_8859_8:
  350. if (c >= 0xa2 && c <= 0xfa) {
  351. if (c >= 0xbf && c <= 0xde) return 0;
  352. return 1;
  353. }
  354. break;
  355. case ISO_8859_11:
  356. if (c >= 0xa1 && c <= 0xda) return 1;
  357. if (c >= 0xdf && c <= 0xfb) return 1;
  358. break;
  359. case KOI8:
  360. if (c >= 0xc0 && c <= 0xff) return 1;
  361. break;
  362. case KOI8_R:
  363. if (c >= 0x80 && c <= 0xff && c != 0x9a) return 1;
  364. break;
  365. default:
  366. exit(-1);
  367. }
  368. return 0;
  369. }
  370. static int IsLower(int enc, int c)
  371. {
  372. if (enc == ASCII)
  373. return islower(c);
  374. if (c >= 0x61 && c <= 0x7a) return 1;
  375. switch (enc) {
  376. case UNICODE_ISO_8859_1:
  377. case ISO_8859_1:
  378. case ISO_8859_9:
  379. if (c == 0xaa) return 1;
  380. if (c == 0xb5) return 1;
  381. if (c == 0xba) return 1;
  382. if (c >= 0xdf && c <= 0xf6) return 1;
  383. if (c >= 0xf8 && c <= 0xff) return 1;
  384. break;
  385. case ISO_8859_2:
  386. if (c == 0xb1 || c == 0xb3) return 1;
  387. if (c == 0xb5 || c == 0xb6) return 1;
  388. if (c >= 0xb9 && c <= 0xbc) return 1;
  389. if (c >= 0xbe && c <= 0xbf) return 1;
  390. if (c >= 0xdf && c <= 0xf6) return 1;
  391. if (c >= 0xf8 && c <= 0xfe) return 1;
  392. break;
  393. case ISO_8859_3:
  394. if (c == 0xb1) return 1;
  395. if (c == 0xb5 || c == 0xb6) return 1;
  396. if (c >= 0xb9 && c <= 0xbc) return 1;
  397. if (c == 0xbf) return 1;
  398. if (c == 0xdf) return 1;
  399. if (c >= 0xe0 && c <= 0xe2) return 1;
  400. if (c >= 0xe4 && c <= 0xef) return 1;
  401. if (c >= 0xf1 && c <= 0xf6) return 1;
  402. if (c >= 0xf8 && c <= 0xfe) return 1;
  403. break;
  404. case ISO_8859_4:
  405. if (c == 0xa2) return 1;
  406. if (c == 0xb1 || c == 0xb3) return 1;
  407. if (c == 0xb5 || c == 0xb6) return 1;
  408. if (c >= 0xb9 && c <= 0xbc) return 1;
  409. if (c >= 0xbe && c <= 0xbf) return 1;
  410. if (c == 0xdf) return 1;
  411. if (c >= 0xe0 && c <= 0xf6) return 1;
  412. if (c >= 0xf8 && c <= 0xfe) return 1;
  413. break;
  414. case ISO_8859_5:
  415. if (c >= 0xd0 && c <= 0xff && c != 0xf0 && c != 0xfd) return 1;
  416. break;
  417. case ISO_8859_6:
  418. break;
  419. case ISO_8859_7:
  420. if (c == 0xc0) return 1;
  421. if (c >= 0xdc && c <= 0xfe) return 1;
  422. break;
  423. case ISO_8859_8:
  424. if (c == 0xb5) return 1;
  425. break;
  426. case ISO_8859_10:
  427. if (c >= 0xb1 && c <= 0xb6) return 1;
  428. if (c >= 0xb8 && c <= 0xbc) return 1;
  429. if (c == 0xbe || c == 0xbf) return 1;
  430. if (c >= 0xdf && c <= 0xff) return 1;
  431. break;
  432. case ISO_8859_11:
  433. break;
  434. case ISO_8859_13:
  435. if (c == 0xb5) return 1;
  436. if (c == 0xb8) return 1;
  437. if (c == 0xba) return 1;
  438. if (c == 0xbf) return 1;
  439. if (c >= 0xdf && c <= 0xf6) return 1;
  440. if (c >= 0xf8 && c <= 0xfe) return 1;
  441. break;
  442. case ISO_8859_14:
  443. if (c == 0xa2) return 1;
  444. if (c == 0xa5) return 1;
  445. if (c == 0xab) return 1;
  446. if (c == 0xb1 || c == 0xb3 || c == 0xb5) return 1;
  447. if (c >= 0xb8 && c <= 0xba) return 1;
  448. if (c == 0xbc) return 1;
  449. if (c == 0xbe || c == 0xbf) return 1;
  450. if (c >= 0xdf && c <= 0xff) return 1;
  451. break;
  452. case ISO_8859_15:
  453. if (c == 0xaa) return 1;
  454. if (c == 0xb5) return 1;
  455. if (c == 0xba) return 1;
  456. if (c >= 0xdf && c <= 0xf6) return 1;
  457. if (c >= 0xf8 && c <= 0xff) return 1;
  458. if (c == 0xa8) return 1;
  459. if (c == 0xb8) return 1;
  460. if (c == 0xbd) return 1;
  461. break;
  462. case ISO_8859_16:
  463. if (c == 0xa2) return 1;
  464. if (c == 0xa8) return 1;
  465. if (c == 0xae) return 1;
  466. if (c == 0xb3) return 1;
  467. if (c >= 0xb8 && c <= 0xba) return 1;
  468. if (c == 0xbd) return 1;
  469. if (c == 0xbf) return 1;
  470. if (c >= 0xdf && c <= 0xff) return 1;
  471. break;
  472. case KOI8_R:
  473. if (c == 0xa3) return 1;
  474. /* fall */
  475. case KOI8:
  476. if (c >= 0xc0 && c <= 0xdf) return 1;
  477. break;
  478. default:
  479. exit(-1);
  480. }
  481. return 0;
  482. }
  483. static int IsPrint(int enc, int c)
  484. {
  485. if (enc == ASCII)
  486. return isprint(c);
  487. if (c >= 0x20 && c <= 0x7e) return 1;
  488. switch (enc) {
  489. case UNICODE_ISO_8859_1:
  490. if (c >= 0x09 && c <= 0x0d) return 1;
  491. if (c == 0x85) return 1;
  492. /* fall */
  493. case ISO_8859_1:
  494. case ISO_8859_2:
  495. case ISO_8859_4:
  496. case ISO_8859_5:
  497. case ISO_8859_9:
  498. case ISO_8859_10:
  499. case ISO_8859_13:
  500. case ISO_8859_14:
  501. case ISO_8859_15:
  502. case ISO_8859_16:
  503. if (c >= 0xa0 && c <= 0xff) return 1;
  504. break;
  505. case ISO_8859_3:
  506. if (c >= 0xa0) {
  507. if (c == 0xa5 || c == 0xae || c == 0xbe || c == 0xc3 || c == 0xd0 ||
  508. c == 0xe3 || c == 0xf0)
  509. return 0;
  510. else
  511. return 1;
  512. }
  513. break;
  514. case ISO_8859_6:
  515. if (c == 0xa0) return 1;
  516. if (c == 0xa4 || c == 0xac || c == 0xad || c == 0xbb || c == 0xbf)
  517. return 1;
  518. if (c >= 0xc1 && c <= 0xda) return 1;
  519. if (c >= 0xe0 && c <= 0xf2) return 1;
  520. break;
  521. case ISO_8859_7:
  522. if (c >= 0xa0 && c <= 0xfe &&
  523. c != 0xa4 && c != 0xa5 && c != 0xaa &&
  524. c != 0xae && c != 0xd2) return 1;
  525. break;
  526. case ISO_8859_8:
  527. if (c >= 0xa0 && c <= 0xfa) {
  528. if (c >= 0xbf && c <= 0xde) return 0;
  529. if (c == 0xa1) return 0;
  530. return 1;
  531. }
  532. break;
  533. case ISO_8859_11:
  534. if (c >= 0xa0 && c <= 0xda) return 1;
  535. if (c >= 0xdf && c <= 0xfb) return 1;
  536. break;
  537. case KOI8:
  538. if (c == 0xa0) return 1;
  539. if (c >= 0xc0 && c <= 0xff) return 1;
  540. break;
  541. case KOI8_R:
  542. if (c >= 0x80 && c <= 0xff) return 1;
  543. break;
  544. default:
  545. exit(-1);
  546. }
  547. return 0;
  548. }
  549. static int IsPunct(int enc, int c)
  550. {
  551. if (enc == ASCII)
  552. return ispunct(c);
  553. if (enc == UNICODE_ISO_8859_1) {
  554. if (c == 0x24 || c == 0x2b || c == 0x5e || c == 0x60 ||
  555. c == 0x7c || c == 0x7e) return 1;
  556. if (c >= 0x3c && c <= 0x3e) return 1;
  557. }
  558. if (c >= 0x21 && c <= 0x2f) return 1;
  559. if (c >= 0x3a && c <= 0x40) return 1;
  560. if (c >= 0x5b && c <= 0x60) return 1;
  561. if (c >= 0x7b && c <= 0x7e) return 1;
  562. switch (enc) {
  563. case ISO_8859_1:
  564. case ISO_8859_9:
  565. case ISO_8859_15:
  566. if (c == 0xad) return 1;
  567. /* fall */
  568. case UNICODE_ISO_8859_1:
  569. if (c == 0xa1) return 1;
  570. if (c == 0xab) return 1;
  571. if (c == 0xb7) return 1;
  572. if (c == 0xbb) return 1;
  573. if (c == 0xbf) return 1;
  574. break;
  575. case ISO_8859_2:
  576. case ISO_8859_4:
  577. case ISO_8859_5:
  578. case ISO_8859_14:
  579. if (c == 0xad) return 1;
  580. break;
  581. case ISO_8859_3:
  582. case ISO_8859_10:
  583. if (c == 0xad) return 1;
  584. if (c == 0xb7) return 1;
  585. if (c == 0xbd) return 1;
  586. break;
  587. case ISO_8859_6:
  588. if (c == 0xac) return 1;
  589. if (c == 0xad) return 1;
  590. if (c == 0xbb) return 1;
  591. if (c == 0xbf) return 1;
  592. break;
  593. case ISO_8859_7:
  594. if (c == 0xa1 || c == 0xa2) return 1;
  595. if (c == 0xab) return 1;
  596. if (c == 0xaf) return 1;
  597. if (c == 0xad) return 1;
  598. if (c == 0xb7 || c == 0xbb) return 1;
  599. break;
  600. case ISO_8859_8:
  601. if (c == 0xab) return 1;
  602. if (c == 0xad) return 1;
  603. if (c == 0xb7) return 1;
  604. if (c == 0xbb) return 1;
  605. if (c == 0xdf) return 1;
  606. break;
  607. case ISO_8859_13:
  608. if (c == 0xa1 || c == 0xa5) return 1;
  609. if (c == 0xab || c == 0xad) return 1;
  610. if (c == 0xb4 || c == 0xb7) return 1;
  611. if (c == 0xbb) return 1;
  612. if (c == 0xff) return 1;
  613. break;
  614. case ISO_8859_16:
  615. if (c == 0xa5) return 1;
  616. if (c == 0xab) return 1;
  617. if (c == 0xad) return 1;
  618. if (c == 0xb5) return 1;
  619. if (c == 0xb7) return 1;
  620. if (c == 0xbb) return 1;
  621. break;
  622. case KOI8_R:
  623. if (c == 0x9e) return 1;
  624. break;
  625. case ISO_8859_11:
  626. case KOI8:
  627. break;
  628. default:
  629. exit(-1);
  630. }
  631. return 0;
  632. }
  633. static int IsSpace(int enc, int c)
  634. {
  635. if (enc == ASCII)
  636. return isspace(c);
  637. if (c >= 0x09 && c <= 0x0d) return 1;
  638. if (c == 0x20) return 1;
  639. switch (enc) {
  640. case UNICODE_ISO_8859_1:
  641. if (c == 0x85) return 1;
  642. /* fall */
  643. case ISO_8859_1:
  644. case ISO_8859_2:
  645. case ISO_8859_3:
  646. case ISO_8859_4:
  647. case ISO_8859_5:
  648. case ISO_8859_6:
  649. case ISO_8859_7:
  650. case ISO_8859_8:
  651. case ISO_8859_9:
  652. case ISO_8859_10:
  653. case ISO_8859_11:
  654. case ISO_8859_13:
  655. case ISO_8859_14:
  656. case ISO_8859_15:
  657. case ISO_8859_16:
  658. case KOI8:
  659. if (c == 0xa0) return 1;
  660. break;
  661. case KOI8_R:
  662. if (c == 0x9a) return 1;
  663. break;
  664. default:
  665. exit(-1);
  666. }
  667. return 0;
  668. }
  669. static int IsUpper(int enc, int c)
  670. {
  671. if (enc == ASCII)
  672. return isupper(c);
  673. if (c >= 0x41 && c <= 0x5a) return 1;
  674. switch (enc) {
  675. case UNICODE_ISO_8859_1:
  676. case ISO_8859_1:
  677. case ISO_8859_9:
  678. if (c >= 0xc0 && c <= 0xd6) return 1;
  679. if (c >= 0xd8 && c <= 0xde) return 1;
  680. break;
  681. case ISO_8859_2:
  682. if (c == 0xa1 || c == 0xa3) return 1;
  683. if (c == 0xa5 || c == 0xa6) return 1;
  684. if (c >= 0xa9 && c <= 0xac) return 1;
  685. if (c >= 0xae && c <= 0xaf) return 1;
  686. if (c >= 0xc0 && c <= 0xd6) return 1;
  687. if (c >= 0xd8 && c <= 0xde) return 1;
  688. break;
  689. case ISO_8859_3:
  690. if (c == 0xa1) return 1;
  691. if (c == 0xa6) return 1;
  692. if (c >= 0xa9 && c <= 0xac) return 1;
  693. if (c == 0xaf) return 1;
  694. if (c >= 0xc0 && c <= 0xc2) return 1;
  695. if (c >= 0xc4 && c <= 0xcf) return 1;
  696. if (c >= 0xd1 && c <= 0xd6) return 1;
  697. if (c >= 0xd8 && c <= 0xde) return 1;
  698. break;
  699. case ISO_8859_4:
  700. if (c == 0xa1 || c == 0xa3) return 1;
  701. if (c == 0xa5 || c == 0xa6) return 1;
  702. if (c >= 0xa9 && c <= 0xac) return 1;
  703. if (c == 0xae) return 1;
  704. if (c == 0xbd) return 1;
  705. if (c >= 0xc0 && c <= 0xd6) return 1;
  706. if (c >= 0xd8 && c <= 0xde) return 1;
  707. break;
  708. case ISO_8859_5:
  709. if (c >= 0xa1 && c <= 0xcf && c != 0xad) return 1;
  710. break;
  711. case ISO_8859_6:
  712. break;
  713. case ISO_8859_7:
  714. if (c == 0xb6) return 1;
  715. if (c >= 0xb8 && c <= 0xba) return 1;
  716. if (c == 0xbc) return 1;
  717. if (c >= 0xbe && c <= 0xbf) return 1;
  718. if (c >= 0xc1 && c <= 0xdb && c != 0xd2) return 1;
  719. break;
  720. case ISO_8859_8:
  721. case ISO_8859_11:
  722. break;
  723. case ISO_8859_10:
  724. if (c >= 0xa1 && c <= 0xa6) return 1;
  725. if (c >= 0xa8 && c <= 0xac) return 1;
  726. if (c == 0xae || c == 0xaf) return 1;
  727. if (c >= 0xc0 && c <= 0xde) return 1;
  728. break;
  729. case ISO_8859_13:
  730. if (c == 0xa8) return 1;
  731. if (c == 0xaa) return 1;
  732. if (c == 0xaf) return 1;
  733. if (c >= 0xc0 && c <= 0xd6) return 1;
  734. if (c >= 0xd8 && c <= 0xde) return 1;
  735. break;
  736. case ISO_8859_14:
  737. if (c == 0xa1) return 1;
  738. if (c == 0xa4 || c == 0xa6) return 1;
  739. if (c == 0xa8) return 1;
  740. if (c == 0xaa || c == 0xac) return 1;
  741. if (c == 0xaf || c == 0xb0) return 1;
  742. if (c == 0xb2 || c == 0xb4 || c == 0xb7) return 1;
  743. if (c == 0xbb || c == 0xbd) return 1;
  744. if (c >= 0xc0 && c <= 0xde) return 1;
  745. break;
  746. case ISO_8859_15:
  747. if (c >= 0xc0 && c <= 0xd6) return 1;
  748. if (c >= 0xd8 && c <= 0xde) return 1;
  749. if (c == 0xa6) return 1;
  750. if (c == 0xb4) return 1;
  751. if (c == 0xbc) return 1;
  752. if (c == 0xbe) return 1;
  753. break;
  754. case ISO_8859_16:
  755. if (c == 0xa1) return 1;
  756. if (c == 0xa3) return 1;
  757. if (c == 0xa6) return 1;
  758. if (c == 0xaa) return 1;
  759. if (c == 0xac) return 1;
  760. if (c == 0xaf) return 1;
  761. if (c == 0xb2) return 1;
  762. if (c == 0xb4) return 1;
  763. if (c == 0xbc) return 1;
  764. if (c == 0xbe) return 1;
  765. if (c >= 0xc0 && c <= 0xde) return 1;
  766. break;
  767. case KOI8_R:
  768. if (c == 0xb3) return 1;
  769. /* fall */
  770. case KOI8:
  771. if (c >= 0xe0 && c <= 0xff) return 1;
  772. break;
  773. default:
  774. exit(-1);
  775. }
  776. return 0;
  777. }
  778. static int IsXDigit(int enc, int c)
  779. {
  780. if (enc == ASCII)
  781. return isxdigit(c);
  782. if (c >= 0x30 && c <= 0x39) return 1;
  783. if (c >= 0x41 && c <= 0x46) return 1;
  784. if (c >= 0x61 && c <= 0x66) return 1;
  785. return 0;
  786. }
  787. static int IsWord(int enc, int c)
  788. {
  789. if (enc == ASCII) {
  790. return (isalpha(c) || isdigit(c) || c == 0x5f);
  791. }
  792. if (c >= 0x30 && c <= 0x39) return 1;
  793. if (c >= 0x41 && c <= 0x5a) return 1;
  794. if (c == 0x5f) return 1;
  795. if (c >= 0x61 && c <= 0x7a) return 1;
  796. switch (enc) {
  797. case UNICODE_ISO_8859_1:
  798. case ISO_8859_1:
  799. case ISO_8859_9:
  800. if (c == 0xaa) return 1;
  801. if (c >= 0xb2 && c <= 0xb3) return 1;
  802. if (c == 0xb5) return 1;
  803. if (c >= 0xb9 && c <= 0xba) return 1;
  804. if (c >= 0xbc && c <= 0xbe) return 1;
  805. if (c >= 0xc0 && c <= 0xd6) return 1;
  806. if (c >= 0xd8 && c <= 0xf6) return 1;
  807. if (c >= 0xf8 && c <= 0xff) return 1;
  808. break;
  809. case ISO_8859_2:
  810. if (c == 0xa1 || c == 0xa3) return 1;
  811. if (c == 0xa5 || c == 0xa6) return 1;
  812. if (c >= 0xa9 && c <= 0xac) return 1;
  813. if (c >= 0xae && c <= 0xaf) return 1;
  814. if (c == 0xb1 || c == 0xb3) return 1;
  815. if (c == 0xb5 || c == 0xb6) return 1;
  816. if (c >= 0xb9 && c <= 0xbc) return 1;
  817. if (c >= 0xbe && c <= 0xbf) return 1;
  818. if (c >= 0xc0 && c <= 0xd6) return 1;
  819. if (c >= 0xd8 && c <= 0xf6) return 1;
  820. if (c >= 0xf8 && c <= 0xfe) return 1;
  821. break;
  822. case ISO_8859_3:
  823. if (c == 0xa1) return 1;
  824. if (c == 0xa6) return 1;
  825. if (c >= 0xa9 && c <= 0xac) return 1;
  826. if (c == 0xaf) return 1;
  827. if (c >= 0xb1 && c <= 0xb3) return 1;
  828. if (c == 0xb5 || c == 0xb6) return 1;
  829. if (c >= 0xb9 && c <= 0xbd) return 1;
  830. if (c == 0xbf) return 1;
  831. if (c >= 0xc0 && c <= 0xc2) return 1;
  832. if (c >= 0xc4 && c <= 0xcf) return 1;
  833. if (c >= 0xd1 && c <= 0xd6) return 1;
  834. if (c >= 0xd8 && c <= 0xe2) return 1;
  835. if (c >= 0xe4 && c <= 0xef) return 1;
  836. if (c >= 0xf1 && c <= 0xf6) return 1;
  837. if (c >= 0xf8 && c <= 0xfe) return 1;
  838. break;
  839. case ISO_8859_4:
  840. if (c >= 0xa1 && c <= 0xa3) return 1;
  841. if (c == 0xa5 || c == 0xa6) return 1;
  842. if (c >= 0xa9 && c <= 0xac) return 1;
  843. if (c == 0xae) return 1;
  844. if (c == 0xb1 || c == 0xb3) return 1;
  845. if (c == 0xb5 || c == 0xb6) return 1;
  846. if (c >= 0xb9 && c <= 0xbf) return 1;
  847. if (c >= 0xc0 && c <= 0xd6) return 1;
  848. if (c >= 0xd8 && c <= 0xf6) return 1;
  849. if (c >= 0xf8 && c <= 0xfe) return 1;
  850. break;
  851. case ISO_8859_5:
  852. if (c >= 0xa1 && c <= 0xcf && c != 0xad) return 1;
  853. if (c >= 0xd0 && c <= 0xff && c != 0xf0 && c != 0xfd) return 1;
  854. break;
  855. case ISO_8859_6:
  856. if (c >= 0xc1 && c <= 0xda) return 1;
  857. if (c >= 0xe0 && c <= 0xea) return 1;
  858. if (c >= 0xeb && c <= 0xf2) return 1;
  859. break;
  860. case ISO_8859_7:
  861. if (c == 0xb2 || c == 0xb3) return 1;
  862. if (c == 0xb6) return 1;
  863. if (c >= 0xb8 && c <= 0xba) return 1;
  864. if (c >= 0xbc && c <= 0xbf) return 1;
  865. if (c == 0xc0) return 1;
  866. if (c >= 0xc1 && c <= 0xdb && c != 0xd2) return 1;
  867. if (c >= 0xdc && c <= 0xfe) return 1;
  868. break;
  869. case ISO_8859_8:
  870. if (c == 0xb2 || c == 0xb3 || c == 0xb5 || c == 0xb9) return 1;
  871. if (c >= 0xbc && c <= 0xbe) return 1;
  872. if (c >= 0xe0 && c <= 0xfa) return 1;
  873. break;
  874. case ISO_8859_10:
  875. if (c >= 0xa1 && c <= 0xff) {
  876. if (c != 0xa7 && c != 0xad && c != 0xb0 && c != 0xb7 && c != 0xbd)
  877. return 1;
  878. }
  879. break;
  880. case ISO_8859_11:
  881. if (c >= 0xa1 && c <= 0xda) return 1;
  882. if (c >= 0xdf && c <= 0xfb) return 1;
  883. break;
  884. case ISO_8859_13:
  885. if (c == 0xa8) return 1;
  886. if (c == 0xaa) return 1;
  887. if (c == 0xaf) return 1;
  888. if (c == 0xb2 || c == 0xb3 || c == 0xb5 || c == 0xb9) return 1;
  889. if (c >= 0xbc && c <= 0xbe) return 1;
  890. if (c == 0xb8) return 1;
  891. if (c == 0xba) return 1;
  892. if (c >= 0xbf && c <= 0xd6) return 1;
  893. if (c >= 0xd8 && c <= 0xf6) return 1;
  894. if (c >= 0xf8 && c <= 0xfe) return 1;
  895. break;
  896. case ISO_8859_14:
  897. if (c >= 0xa1 && c <= 0xff) {
  898. if (c == 0xa3 || c == 0xa7 || c == 0xa9 || c == 0xad || c == 0xae ||
  899. c == 0xb6) return 0;
  900. return 1;
  901. }
  902. break;
  903. case ISO_8859_15:
  904. if (c == 0xaa) return 1;
  905. if (c >= 0xb2 && c <= 0xb3) return 1;
  906. if (c == 0xb5) return 1;
  907. if (c >= 0xb9 && c <= 0xba) return 1;
  908. if (c >= 0xbc && c <= 0xbe) return 1;
  909. if (c >= 0xc0 && c <= 0xd6) return 1;
  910. if (c >= 0xd8 && c <= 0xf6) return 1;
  911. if (c >= 0xf8 && c <= 0xff) return 1;
  912. if (c == 0xa6) return 1;
  913. if (c == 0xa8) return 1;
  914. if (c == 0xb4) return 1;
  915. if (c == 0xb8) return 1;
  916. break;
  917. case ISO_8859_16:
  918. if (c == 0xa1) return 1;
  919. if (c == 0xa2) return 1;
  920. if (c == 0xa3) return 1;
  921. if (c == 0xa6) return 1;
  922. if (c == 0xa8) return 1;
  923. if (c == 0xaa) return 1;
  924. if (c == 0xac) return 1;
  925. if (c == 0xae) return 1;
  926. if (c == 0xaf) return 1;
  927. if (c == 0xb2) return 1;
  928. if (c == 0xb3) return 1;
  929. if (c == 0xb4) return 1;
  930. if (c >= 0xb8 && c <= 0xba) return 1;
  931. if (c == 0xbc) return 1;
  932. if (c == 0xbd) return 1;
  933. if (c == 0xbe) return 1;
  934. if (c == 0xbf) return 1;
  935. if (c >= 0xc0 && c <= 0xde) return 1;
  936. if (c >= 0xdf && c <= 0xff) return 1;
  937. break;
  938. case KOI8_R:
  939. if (c == 0x9d) return 1;
  940. if (c == 0xa3 || c == 0xb3) return 1;
  941. /* fall */
  942. case KOI8:
  943. if (c >= 0xc0 && c <= 0xff) return 1;
  944. break;
  945. default:
  946. exit(-1);
  947. }
  948. return 0;
  949. }
  950. static int IsAscii(int enc ARG_UNUSED, int c)
  951. {
  952. if (c >= 0x00 && c <= 0x7f) return 1;
  953. return 0;
  954. }
  955. static int IsNewline(int enc ARG_UNUSED, int c)
  956. {
  957. if (c == 0x0a) return 1;
  958. return 0;
  959. }
  960. static int exec(FILE* fp, ENC_INFO* einfo)
  961. {
  962. #define NCOL 8
  963. int c, val, enc;
  964. enc = einfo->num;
  965. fprintf(fp, "static const unsigned short Enc%s_CtypeTable[256] = {\n",
  966. einfo->name);
  967. for (c = 0; c < 256; c++) {
  968. val = 0;
  969. if (IsNewline(enc, c)) val |= BIT_CTYPE_NEWLINE;
  970. if (IsAlpha (enc, c)) val |= (BIT_CTYPE_ALPHA | BIT_CTYPE_ALNUM);
  971. if (IsBlank (enc, c)) val |= BIT_CTYPE_BLANK;
  972. if (IsCntrl (enc, c)) val |= BIT_CTYPE_CNTRL;
  973. if (IsDigit (enc, c)) val |= (BIT_CTYPE_DIGIT | BIT_CTYPE_ALNUM);
  974. if (IsGraph (enc, c)) val |= BIT_CTYPE_GRAPH;
  975. if (IsLower (enc, c)) val |= BIT_CTYPE_LOWER;
  976. if (IsPrint (enc, c)) val |= BIT_CTYPE_PRINT;
  977. if (IsPunct (enc, c)) val |= BIT_CTYPE_PUNCT;
  978. if (IsSpace (enc, c)) val |= BIT_CTYPE_SPACE;
  979. if (IsUpper (enc, c)) val |= BIT_CTYPE_UPPER;
  980. if (IsXDigit(enc, c)) val |= BIT_CTYPE_XDIGIT;
  981. if (IsWord (enc, c)) val |= BIT_CTYPE_WORD;
  982. if (IsAscii (enc, c)) val |= BIT_CTYPE_ASCII;
  983. if (c % NCOL == 0) fputs(" ", fp);
  984. fprintf(fp, "0x%04x", val);
  985. if (c != 255) fputs(",", fp);
  986. if (c != 0 && c % NCOL == (NCOL-1))
  987. fputs("\n", fp);
  988. else
  989. fputs(" ", fp);
  990. }
  991. fprintf(fp, "};\n");
  992. return 0;
  993. }
  994. extern int main(int argc ARG_UNUSED, char* argv[] ARG_UNUSED)
  995. {
  996. int i;
  997. FILE* fp = stdout;
  998. setlocale(LC_ALL, "C");
  999. /* setlocale(LC_ALL, "POSIX"); */
  1000. /* setlocale(LC_ALL, "en_GB.iso88591"); */
  1001. /* setlocale(LC_ALL, "de_BE.iso88591"); */
  1002. /* setlocale(LC_ALL, "fr_FR.iso88591"); */
  1003. for (i = 0; i < (int )(sizeof(Info)/sizeof(ENC_INFO)); i++) {
  1004. exec(fp, &Info[i]);
  1005. }
  1006. return 0;
  1007. }