123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620 |
- /* gdkanji.c (Kanji code converter) */
- /* written by Masahito Yamaga (ma@yama-ga.com) */
- #include <stdio.h>
- #include <stdlib.h>
- #include <string.h>
- #include "gd.h"
- #include "gdhelpers.h"
- #include <stdarg.h>
- #if defined(HAVE_ICONV_H) || defined(HAVE_ICONV)
- #include <iconv.h>
- #include <errno.h>
- #endif
- #if defined(HAVE_ICONV_H) && !defined(HAVE_ICONV)
- #define HAVE_ICONV 1
- #endif
- #define LIBNAME "any2eucjp()"
- #if defined(__MSC__) || defined(__BORLANDC__) || defined(__TURBOC__) || defined(_Windows) || defined(MSDOS)
- #ifndef SJISPRE
- #define SJISPRE 1
- #endif
- #endif
- #ifdef TRUE
- #undef TRUE
- #endif
- #ifdef FALSE
- #undef FALSE
- #endif
- #define TRUE 1
- #define FALSE 0
- #define NEW 1
- #define OLD 2
- #define ESCI 3
- #define NEC 4
- #define EUC 5
- #define SJIS 6
- #define EUCORSJIS 7
- #define ASCII 8
- #define NEWJISSTR "JIS7"
- #define OLDJISSTR "jis"
- #define EUCSTR "eucJP"
- #define SJISSTR "SJIS"
- #define ESC 27
- #define SS2 142
- static void
- debug (const char *format,...)
- {
- #ifdef DEBUG
- va_list args;
- va_start (args, format);
- fprintf (stdout, "%s: ", LIBNAME);
- vfprintf (stdout, format, args);
- fprintf (stdout, "\n");
- va_end (args);
- #endif
- }
- static void
- error (const char *format,...)
- {
- va_list args;
- char *tmp;
- va_start(args, format);
- vspprintf(&tmp, 0, format, args);
- va_end(args);
- php_error_docref(NULL, E_WARNING, "%s: %s", LIBNAME, tmp);
- efree(tmp);
- }
- /* DetectKanjiCode() derived from DetectCodeType() by Ken Lunde. */
- static int
- DetectKanjiCode (unsigned char *str)
- {
- static int whatcode = ASCII;
- int oldcode = ASCII;
- int c, i;
- char *lang = NULL;
- c = '\1';
- i = 0;
- if (whatcode != EUCORSJIS && whatcode != ASCII)
- {
- oldcode = whatcode;
- whatcode = ASCII;
- }
- while ((whatcode == EUCORSJIS || whatcode == ASCII) && c != '\0')
- {
- if ((c = str[i++]) != '\0')
- {
- if (c == ESC)
- {
- c = str[i++];
- if (c == '$')
- {
- c = str[i++];
- if (c == 'B')
- whatcode = NEW;
- else if (c == '@')
- whatcode = OLD;
- }
- else if (c == '(')
- {
- c = str[i++];
- if (c == 'I')
- whatcode = ESCI;
- }
- else if (c == 'K')
- whatcode = NEC;
- }
- else if ((c >= 129 && c <= 141) || (c >= 143 && c <= 159))
- whatcode = SJIS;
- else if (c == SS2)
- {
- c = str[i++];
- if ((c >= 64 && c <= 126) || (c >= 128 && c <= 160) || (c >= 224 && c <= 252))
- whatcode = SJIS;
- else if (c >= 161 && c <= 223)
- whatcode = EUCORSJIS;
- }
- else if (c >= 161 && c <= 223)
- {
- c = str[i++];
- if (c >= 240 && c <= 254)
- whatcode = EUC;
- else if (c >= 161 && c <= 223)
- whatcode = EUCORSJIS;
- else if (c >= 224 && c <= 239)
- {
- whatcode = EUCORSJIS;
- while (c >= 64 && whatcode == EUCORSJIS)
- {
- if (c >= 129)
- {
- if (c <= 141 || (c >= 143 && c <= 159))
- whatcode = SJIS;
- else if (c >= 253 && c <= 254)
- whatcode = EUC;
- }
- c = str[i++];
- }
- }
- else if (c <= 159)
- whatcode = SJIS;
- }
- else if (c >= 240 && c <= 254)
- whatcode = EUC;
- else if (c >= 224 && c <= 239)
- {
- c = str[i++];
- if ((c >= 64 && c <= 126) || (c >= 128 && c <= 160))
- whatcode = SJIS;
- else if (c >= 253 && c <= 254)
- whatcode = EUC;
- else if (c >= 161 && c <= 252)
- whatcode = EUCORSJIS;
- }
- }
- }
- #ifdef DEBUG
- if (whatcode == ASCII)
- debug ("Kanji code not included.");
- else if (whatcode == EUCORSJIS)
- debug ("Kanji code not detected.");
- else
- debug ("Kanji code detected at %d byte.", i);
- #endif
- if (whatcode == EUCORSJIS && oldcode != ASCII)
- whatcode = oldcode;
- if (whatcode == EUCORSJIS)
- {
- if (getenv ("LC_ALL"))
- lang = getenv ("LC_ALL");
- else if (getenv ("LC_CTYPE"))
- lang = getenv ("LC_CTYPE");
- else if (getenv ("LANG"))
- lang = getenv ("LANG");
- if (lang)
- {
- if (strcmp (lang, "ja_JP.SJIS") == 0 ||
- #ifdef hpux
- strcmp (lang, "japanese") == 0 ||
- #endif
- strcmp (lang, "ja_JP.mscode") == 0 ||
- strcmp (lang, "ja_JP.PCK") == 0)
- whatcode = SJIS;
- else if (strncmp (lang, "ja", 2) == 0)
- #ifdef SJISPRE
- whatcode = SJIS;
- #else
- whatcode = EUC;
- #endif
- }
- }
- if (whatcode == EUCORSJIS)
- #ifdef SJISPRE
- whatcode = SJIS;
- #else
- whatcode = EUC;
- #endif
- return whatcode;
- }
- /* SJIStoJIS() is sjis2jis() by Ken Lunde. */
- static void
- SJIStoJIS (int *p1, int *p2)
- {
- register unsigned char c1 = *p1;
- register unsigned char c2 = *p2;
- register int adjust = c2 < 159;
- register int rowOffset = c1 < 160 ? 112 : 176;
- register int cellOffset = adjust ? (31 + (c2 > 127)) : 126;
- *p1 = ((c1 - rowOffset) << 1) - adjust;
- *p2 -= cellOffset;
- }
- /* han2zen() was derived from han2zen() written by Ken Lunde. */
- #define IS_DAKU(c) ((c >= 182 && c <= 196) || (c >= 202 && c <= 206) || (c == 179))
- #define IS_HANDAKU(c) (c >= 202 && c <= 206)
- static void
- han2zen (int *p1, int *p2)
- {
- int c = *p1;
- int daku = FALSE;
- int handaku = FALSE;
- int mtable[][2] =
- {
- {129, 66},
- {129, 117},
- {129, 118},
- {129, 65},
- {129, 69},
- {131, 146},
- {131, 64},
- {131, 66},
- {131, 68},
- {131, 70},
- {131, 72},
- {131, 131},
- {131, 133},
- {131, 135},
- {131, 98},
- {129, 91},
- {131, 65},
- {131, 67},
- {131, 69},
- {131, 71},
- {131, 73},
- {131, 74},
- {131, 76},
- {131, 78},
- {131, 80},
- {131, 82},
- {131, 84},
- {131, 86},
- {131, 88},
- {131, 90},
- {131, 92},
- {131, 94},
- {131, 96},
- {131, 99},
- {131, 101},
- {131, 103},
- {131, 105},
- {131, 106},
- {131, 107},
- {131, 108},
- {131, 109},
- {131, 110},
- {131, 113},
- {131, 116},
- {131, 119},
- {131, 122},
- {131, 125},
- {131, 126},
- {131, 128},
- {131, 129},
- {131, 130},
- {131, 132},
- {131, 134},
- {131, 136},
- {131, 137},
- {131, 138},
- {131, 139},
- {131, 140},
- {131, 141},
- {131, 143},
- {131, 147},
- {129, 74},
- {129, 75}
- };
- if (*p2 == 222 && IS_DAKU (*p1))
- daku = TRUE; /* Daku-ten */
- else if (*p2 == 223 && IS_HANDAKU (*p1))
- handaku = TRUE; /* Han-daku-ten */
- *p1 = mtable[c - 161][0];
- *p2 = mtable[c - 161][1];
- if (daku)
- {
- if ((*p2 >= 74 && *p2 <= 103) || (*p2 >= 110 && *p2 <= 122))
- (*p2)++;
- else if (*p2 == 131 || *p2 == 69)
- *p2 = 148;
- }
- else if (handaku && *p2 >= 110 && *p2 <= 122)
- (*p2) += 2;
- }
- /* Recast strcpy to handle unsigned chars used below. */
- #define ustrcpy(A,B) (strcpy((char*)(A),(const char*)(B)))
- static void
- do_convert (unsigned char *to, unsigned char *from, const char *code)
- {
- #ifdef HAVE_ICONV
- iconv_t cd;
- size_t from_len, to_len;
- if ((cd = iconv_open (EUCSTR, code)) == (iconv_t) - 1)
- {
- error ("iconv_open() error");
- if (errno == EINVAL)
- error ("invalid code specification: \"%s\" or \"%s\"",
- EUCSTR, code);
- strcpy ((char *) to, (const char *) from);
- return;
- }
- from_len = strlen ((const char *) from) + 1;
- to_len = BUFSIZ;
- if ((int) iconv(cd, (char **) &from, &from_len, (char **) &to, &to_len) == -1)
- {
- if (errno == EINVAL)
- error ("invalid end of input string");
- else if (errno == EILSEQ)
- error ("invalid code in input string");
- else if (errno == E2BIG)
- error ("output buffer overflow at do_convert()");
- else
- error ("something happen");
- strcpy ((char *) to, (const char *) from);
- return;
- }
- if (iconv_close (cd) != 0)
- {
- error ("iconv_close() error");
- }
- #else
- int p1, p2, i, j;
- int jisx0208 = FALSE;
- int hankaku = FALSE;
- j = 0;
- if (strcmp (code, NEWJISSTR) == 0 || strcmp (code, OLDJISSTR) == 0)
- {
- for (i = 0; from[i] != '\0' && j < BUFSIZ; i++)
- {
- if (from[i] == ESC)
- {
- i++;
- if (from[i] == '$')
- {
- jisx0208 = TRUE;
- hankaku = FALSE;
- i++;
- }
- else if (from[i] == '(')
- {
- jisx0208 = FALSE;
- i++;
- if (from[i] == 'I') /* Hankaku Kana */
- hankaku = TRUE;
- else
- hankaku = FALSE;
- }
- }
- else
- {
- if (jisx0208)
- to[j++] = from[i] + 128;
- else if (hankaku)
- {
- to[j++] = SS2;
- to[j++] = from[i] + 128;
- }
- else
- to[j++] = from[i];
- }
- }
- }
- else if (strcmp (code, SJISSTR) == 0)
- {
- for (i = 0; from[i] != '\0' && j < BUFSIZ; i++)
- {
- p1 = from[i];
- if (p1 < 127)
- to[j++] = p1;
- else if ((p1 >= 161) && (p1 <= 223))
- { /* Hankaku Kana */
- to[j++] = SS2;
- to[j++] = p1;
- }
- else
- {
- p2 = from[++i];
- SJIStoJIS (&p1, &p2);
- to[j++] = p1 + 128;
- to[j++] = p2 + 128;
- }
- }
- }
- else
- {
- error ("invalid code specification: \"%s\"", code);
- return;
- }
- if (j >= BUFSIZ)
- {
- error ("output buffer overflow at do_convert()");
- ustrcpy (to, from);
- }
- else
- to[j] = '\0';
- #endif /* HAVE_ICONV */
- }
- static int
- do_check_and_conv (unsigned char *to, unsigned char *from)
- {
- static unsigned char tmp[BUFSIZ];
- int p1, p2, i, j;
- int kanji = TRUE;
- switch (DetectKanjiCode (from))
- {
- case NEW:
- debug ("Kanji code is New JIS.");
- do_convert (tmp, from, NEWJISSTR);
- break;
- case OLD:
- debug ("Kanji code is Old JIS.");
- do_convert (tmp, from, OLDJISSTR);
- break;
- case ESCI:
- debug ("This string includes Hankaku-Kana (jisx0201) escape sequence [ESC] + ( + I.");
- do_convert (tmp, from, NEWJISSTR);
- break;
- case NEC:
- debug ("Kanji code is NEC Kanji.");
- error ("cannot convert NEC Kanji.");
- ustrcpy (tmp, from);
- kanji = FALSE;
- break;
- case EUC:
- debug ("Kanji code is EUC.");
- ustrcpy (tmp, from);
- break;
- case SJIS:
- debug ("Kanji code is SJIS.");
- do_convert (tmp, from, SJISSTR);
- break;
- case EUCORSJIS:
- debug ("Kanji code is EUC or SJIS.");
- ustrcpy (tmp, from);
- kanji = FALSE;
- break;
- case ASCII:
- debug ("This is ASCII string.");
- ustrcpy (tmp, from);
- kanji = FALSE;
- break;
- default:
- debug ("This string includes unknown code.");
- ustrcpy (tmp, from);
- kanji = FALSE;
- break;
- }
- /* Hankaku Kana ---> Zenkaku Kana */
- if (kanji)
- {
- j = 0;
- for (i = 0; tmp[i] != '\0' && j < BUFSIZ; i++)
- {
- if (tmp[i] == SS2)
- {
- p1 = tmp[++i];
- if (tmp[i + 1] == SS2)
- {
- p2 = tmp[i + 2];
- if (p2 == 222 || p2 == 223)
- i += 2;
- else
- p2 = 0;
- }
- else
- p2 = 0;
- han2zen (&p1, &p2);
- SJIStoJIS (&p1, &p2);
- to[j++] = p1 + 128;
- to[j++] = p2 + 128;
- }
- else
- to[j++] = tmp[i];
- }
- if (j >= BUFSIZ)
- {
- error ("output buffer overflow at Hankaku --> Zenkaku");
- ustrcpy (to, tmp);
- }
- else
- to[j] = '\0';
- }
- else
- ustrcpy (to, tmp);
- return kanji;
- }
- int
- any2eucjp (unsigned char *dest, unsigned char *src, unsigned int dest_max)
- {
- static unsigned char tmp_dest[BUFSIZ];
- int ret;
- if (strlen ((const char *) src) >= BUFSIZ)
- {
- error ("input string too large");
- return -1;
- }
- if (dest_max > BUFSIZ)
- {
- error ("invalid maximum size of destination\nit should be less than %d.", BUFSIZ);
- return -1;
- }
- ret = do_check_and_conv (tmp_dest, src);
- if (strlen ((const char *) tmp_dest) >= dest_max)
- {
- error ("output buffer overflow");
- ustrcpy (dest, src);
- return -1;
- }
- ustrcpy (dest, tmp_dest);
- return ret;
- }
- #if 0
- unsigned int
- strwidth (unsigned char *s)
- {
- unsigned char *t;
- unsigned int i;
- t = (unsigned char *) gdMalloc (BUFSIZ);
- any2eucjp (t, s, BUFSIZ);
- i = strlen (t);
- gdFree (t);
- return i;
- }
- #ifdef DEBUG
- int
- main ()
- {
- unsigned char input[BUFSIZ];
- unsigned char *output;
- unsigned char *str;
- int c, i = 0;
- while ((c = fgetc (stdin)) != '\n' && i < BUFSIZ)
- input[i++] = c;
- input[i] = '\0';
- printf ("input : %d bytes\n", strlen ((const char *) input));
- printf ("output: %d bytes\n", strwidth (input));
- output = (unsigned char *) gdMalloc (BUFSIZ);
- any2eucjp (output, input, BUFSIZ);
- str = output;
- while (*str != '\0')
- putchar (*(str++));
- putchar ('\n');
- gdFree (output);
- return 0;
- }
- #endif
- #endif
|