1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357 |
- /* Copyright (C) 1996-2019 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
- Contributed by Ulrich Drepper <drepper@redhat.com>, 1996.
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; version 2 of the License, or
- (at your option) any later version.
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
- You should have received a copy of the GNU General Public License
- along with this program; if not, see <http://www.gnu.org/licenses/>. */
- #ifdef HAVE_CONFIG_H
- # include "config.h"
- #endif
- #include <argp.h>
- #include <assert.h>
- #include <ctype.h>
- #include <endian.h>
- #include <errno.h>
- #include <error.h>
- #include <fcntl.h>
- #include <iconv.h>
- #include <langinfo.h>
- #include <locale.h>
- #include <libintl.h>
- #include <limits.h>
- #include <nl_types.h>
- #include <obstack.h>
- #include <stdint.h>
- #include <stdio.h>
- #include <stdlib.h>
- #include <string.h>
- #include <unistd.h>
- #include <wchar.h>
- #include "version.h"
- #include "catgetsinfo.h"
- #define SWAPU32(w) \
- (((w) << 24) | (((w) & 0xff00) << 8) | (((w) >> 8) & 0xff00) | ((w) >> 24))
- struct message_list
- {
- int number;
- const char *message;
- const char *fname;
- size_t line;
- const char *symbol;
- struct message_list *next;
- };
- struct set_list
- {
- int number;
- int deleted;
- struct message_list *messages;
- int last_message;
- const char *fname;
- size_t line;
- const char *symbol;
- struct set_list *next;
- };
- struct catalog
- {
- struct set_list *all_sets;
- struct set_list *current_set;
- size_t total_messages;
- wint_t quote_char;
- int last_set;
- struct obstack mem_pool;
- };
- /* If non-zero force creation of new file, not using existing one. */
- static int force_new;
- /* Name of output file. */
- static const char *output_name;
- /* Name of generated C header file. */
- static const char *header_name;
- /* Name and version of program. */
- static void print_version (FILE *stream, struct argp_state *state);
- void (*argp_program_version_hook) (FILE *, struct argp_state *) = print_version;
- #define OPT_NEW 1
- /* Definitions of arguments for argp functions. */
- static const struct argp_option options[] =
- {
- { "header", 'H', N_("NAME"), 0,
- N_("Create C header file NAME containing symbol definitions") },
- { "new", OPT_NEW, NULL, 0,
- N_("Do not use existing catalog, force new output file") },
- { "output", 'o', N_("NAME"), 0, N_("Write output to file NAME") },
- { NULL, 0, NULL, 0, NULL }
- };
- /* Short description of program. */
- static const char doc[] = N_("Generate message catalog.\
- \vIf INPUT-FILE is -, input is read from standard input. If OUTPUT-FILE\n\
- is -, output is written to standard output.\n");
- /* Strings for arguments in help texts. */
- static const char args_doc[] = N_("\
- -o OUTPUT-FILE [INPUT-FILE]...\n[OUTPUT-FILE [INPUT-FILE]...]");
- /* Prototype for option handler. */
- static error_t parse_opt (int key, char *arg, struct argp_state *state);
- /* Function to print some extra text in the help message. */
- static char *more_help (int key, const char *text, void *input);
- /* Data structure to communicate with argp functions. */
- static struct argp argp =
- {
- options, parse_opt, args_doc, doc, NULL, more_help
- };
- /* Wrapper functions with error checking for standard functions. */
- #include <programs/xmalloc.h>
- /* Prototypes for local functions. */
- static void error_print (void);
- static struct catalog *read_input_file (struct catalog *current,
- const char *fname);
- static void write_out (struct catalog *result, const char *output_name,
- const char *header_name);
- static struct set_list *find_set (struct catalog *current, int number);
- static void normalize_line (const char *fname, size_t line, iconv_t cd,
- wchar_t *string, wchar_t quote_char,
- wchar_t escape_char);
- static void read_old (struct catalog *catalog, const char *file_name);
- static int open_conversion (const char *codesetp, iconv_t *cd_towcp,
- iconv_t *cd_tombp, wchar_t *escape_charp);
- int
- main (int argc, char *argv[])
- {
- struct catalog *result;
- int remaining;
- /* Set program name for messages. */
- error_print_progname = error_print;
- /* Set locale via LC_ALL. */
- setlocale (LC_ALL, "");
- /* Set the text message domain. */
- textdomain (PACKAGE);
- /* Initialize local variables. */
- result = NULL;
- /* Parse and process arguments. */
- argp_parse (&argp, argc, argv, 0, &remaining, NULL);
- /* Determine output file. */
- if (output_name == NULL)
- output_name = remaining < argc ? argv[remaining++] : "-";
- /* Process all input files. */
- setlocale (LC_CTYPE, "C");
- if (remaining < argc)
- do
- result = read_input_file (result, argv[remaining]);
- while (++remaining < argc);
- else
- result = read_input_file (NULL, "-");
- /* Write out the result. */
- if (result != NULL)
- write_out (result, output_name, header_name);
- return error_message_count != 0;
- }
- /* Handle program arguments. */
- static error_t
- parse_opt (int key, char *arg, struct argp_state *state)
- {
- switch (key)
- {
- case 'H':
- header_name = arg;
- break;
- case OPT_NEW:
- force_new = 1;
- break;
- case 'o':
- output_name = arg;
- break;
- default:
- return ARGP_ERR_UNKNOWN;
- }
- return 0;
- }
- static char *
- more_help (int key, const char *text, void *input)
- {
- char *tp = NULL;
- switch (key)
- {
- case ARGP_KEY_HELP_EXTRA:
- /* We print some extra information. */
- if (asprintf (&tp, gettext ("\
- For bug reporting instructions, please see:\n\
- %s.\n"), REPORT_BUGS_TO) < 0)
- return NULL;
- return tp;
- default:
- break;
- }
- return (char *) text;
- }
- /* Print the version information. */
- static void
- print_version (FILE *stream, struct argp_state *state)
- {
- fprintf (stream, "gencat %s%s\n", PKGVERSION, VERSION);
- fprintf (stream, gettext ("\
- Copyright (C) %s Free Software Foundation, Inc.\n\
- This is free software; see the source for copying conditions. There is NO\n\
- warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n\
- "), "2019");
- fprintf (stream, gettext ("Written by %s.\n"), "Ulrich Drepper");
- }
- /* The address of this function will be assigned to the hook in the
- error functions. */
- static void
- error_print (void)
- {
- /* We don't want the program name to be printed in messages. Emacs'
- compile.el does not like this. */
- }
- static struct catalog *
- read_input_file (struct catalog *current, const char *fname)
- {
- FILE *fp;
- char *buf;
- size_t len;
- size_t line_number;
- wchar_t *wbuf;
- size_t wbufsize;
- iconv_t cd_towc = (iconv_t) -1;
- iconv_t cd_tomb = (iconv_t) -1;
- wchar_t escape_char = L'\\';
- char *codeset = NULL;
- if (strcmp (fname, "-") == 0 || strcmp (fname, "/dev/stdin") == 0)
- {
- fp = stdin;
- fname = gettext ("*standard input*");
- }
- else
- fp = fopen (fname, "r");
- if (fp == NULL)
- {
- error (0, errno, gettext ("cannot open input file `%s'"), fname);
- return current;
- }
- /* If we haven't seen anything yet, allocate result structure. */
- if (current == NULL)
- {
- current = (struct catalog *) xcalloc (1, sizeof (*current));
- #define obstack_chunk_alloc malloc
- #define obstack_chunk_free free
- obstack_init (¤t->mem_pool);
- current->current_set = find_set (current, NL_SETD);
- }
- buf = NULL;
- len = 0;
- line_number = 0;
- wbufsize = 1024;
- wbuf = (wchar_t *) xmalloc (wbufsize);
- while (!feof (fp))
- {
- int continued;
- int used;
- size_t start_line = line_number + 1;
- char *this_line;
- do
- {
- int act_len;
- act_len = getline (&buf, &len, fp);
- if (act_len <= 0)
- break;
- ++line_number;
- /* It the line continued? */
- continued = 0;
- if (buf[act_len - 1] == '\n')
- {
- --act_len;
- /* There might be more than one backslash at the end of
- the line. Only if there is an odd number of them is
- the line continued. */
- if (act_len > 0 && buf[act_len - 1] == '\\')
- {
- int temp_act_len = act_len;
- do
- {
- --temp_act_len;
- continued = !continued;
- }
- while (temp_act_len > 0 && buf[temp_act_len - 1] == '\\');
- if (continued)
- --act_len;
- }
- }
- /* Append to currently selected line. */
- obstack_grow (¤t->mem_pool, buf, act_len);
- }
- while (continued);
- obstack_1grow (¤t->mem_pool, '\0');
- this_line = (char *) obstack_finish (¤t->mem_pool);
- used = 0;
- if (this_line[0] == '$')
- {
- if (isblank (this_line[1]))
- {
- int cnt = 1;
- while (isblank (this_line[cnt]))
- ++cnt;
- if (strncmp (&this_line[cnt], "codeset=", 8) != 0)
- /* This is a comment line. Do nothing. */;
- else if (codeset != NULL)
- /* Ignore multiple codeset. */;
- else
- {
- int start = cnt + 8;
- cnt = start;
- while (this_line[cnt] != '\0' && !isspace (this_line[cnt]))
- ++cnt;
- if (cnt != start)
- {
- int len = cnt - start;
- codeset = xmalloc (len + 1);
- *((char *) mempcpy (codeset, &this_line[start], len))
- = '\0';
- }
- }
- }
- else if (strncmp (&this_line[1], "set", 3) == 0)
- {
- int cnt = sizeof ("set");
- int set_number;
- const char *symbol = NULL;
- while (isspace (this_line[cnt]))
- ++cnt;
- if (isdigit (this_line[cnt]))
- {
- set_number = atol (&this_line[cnt]);
- /* If the given number for the character set is
- higher than any we used for symbolic set names
- avoid clashing by using only higher numbers for
- the following symbolic definitions. */
- if (set_number > current->last_set)
- current->last_set = set_number;
- }
- else
- {
- /* See whether it is a reasonable identifier. */
- int start = cnt;
- while (isalnum (this_line[cnt]) || this_line[cnt] == '_')
- ++cnt;
- if (cnt == start)
- {
- /* No correct character found. */
- error_at_line (0, 0, fname, start_line,
- gettext ("illegal set number"));
- set_number = 0;
- }
- else
- {
- /* We have found seomthing that looks like a
- correct identifier. */
- struct set_list *runp;
- this_line[cnt] = '\0';
- used = 1;
- symbol = &this_line[start];
- /* Test whether the identifier was already used. */
- runp = current->all_sets;
- while (runp != 0)
- if (runp->symbol != NULL
- && strcmp (runp->symbol, symbol) == 0)
- break;
- else
- runp = runp->next;
- if (runp != NULL)
- {
- /* We cannot allow duplicate identifiers for
- message sets. */
- error_at_line (0, 0, fname, start_line,
- gettext ("duplicate set definition"));
- error_at_line (0, 0, runp->fname, runp->line,
- gettext ("\
- this is the first definition"));
- set_number = 0;
- }
- else
- /* Allocate next free message set for identifier. */
- set_number = ++current->last_set;
- }
- }
- if (set_number != 0)
- {
- /* We found a legal set number. */
- current->current_set = find_set (current, set_number);
- if (symbol != NULL)
- used = 1;
- current->current_set->symbol = symbol;
- current->current_set->fname = fname;
- current->current_set->line = start_line;
- }
- }
- else if (strncmp (&this_line[1], "delset", 6) == 0)
- {
- int cnt = sizeof ("delset");
- while (isspace (this_line[cnt]))
- ++cnt;
- if (isdigit (this_line[cnt]))
- {
- size_t set_number = atol (&this_line[cnt]);
- struct set_list *set;
- /* Mark the message set with the given number as
- deleted. */
- set = find_set (current, set_number);
- set->deleted = 1;
- }
- else
- {
- /* See whether it is a reasonable identifier. */
- int start = cnt;
- while (isalnum (this_line[cnt]) || this_line[cnt] == '_')
- ++cnt;
- if (cnt == start)
- error_at_line (0, 0, fname, start_line,
- gettext ("illegal set number"));
- else
- {
- const char *symbol;
- struct set_list *runp;
- this_line[cnt] = '\0';
- used = 1;
- symbol = &this_line[start];
- /* We have a symbolic set name. This name must
- appear somewhere else in the catalogs read so
- far. */
- for (runp = current->all_sets; runp != NULL;
- runp = runp->next)
- {
- if (strcmp (runp->symbol, symbol) == 0)
- {
- runp->deleted = 1;
- break;
- }
- }
- if (runp == NULL)
- /* Name does not exist before. */
- error_at_line (0, 0, fname, start_line,
- gettext ("unknown set `%s'"), symbol);
- }
- }
- }
- else if (strncmp (&this_line[1], "quote", 5) == 0)
- {
- char buf[2];
- char *bufptr;
- size_t buflen;
- char *wbufptr;
- size_t wbuflen;
- int cnt;
- cnt = sizeof ("quote");
- while (isspace (this_line[cnt]))
- ++cnt;
- /* We need the conversion. */
- if (cd_towc == (iconv_t) -1
- && open_conversion (codeset, &cd_towc, &cd_tomb,
- &escape_char) != 0)
- /* Something is wrong. */
- goto out;
- /* Yes, the quote char can be '\0'; this means no quote
- char. The function using the information works on
- wide characters so we have to convert it here. */
- buf[0] = this_line[cnt];
- buf[1] = '\0';
- bufptr = buf;
- buflen = 2;
- wbufptr = (char *) wbuf;
- wbuflen = wbufsize;
- /* Flush the state. */
- iconv (cd_towc, NULL, NULL, NULL, NULL);
- iconv (cd_towc, &bufptr, &buflen, &wbufptr, &wbuflen);
- if (buflen != 0 || (wchar_t *) wbufptr != &wbuf[2])
- error_at_line (0, 0, fname, start_line,
- gettext ("invalid quote character"));
- else
- /* Use the converted wide character. */
- current->quote_char = wbuf[0];
- }
- else
- {
- int cnt;
- cnt = 2;
- while (this_line[cnt] != '\0' && !isspace (this_line[cnt]))
- ++cnt;
- this_line[cnt] = '\0';
- error_at_line (0, 0, fname, start_line,
- gettext ("unknown directive `%s': line ignored"),
- &this_line[1]);
- }
- }
- else if (isalnum (this_line[0]) || this_line[0] == '_')
- {
- const char *ident = this_line;
- char *line = this_line;
- int message_number;
- do
- ++line;
- while (line[0] != '\0' && !isspace (line[0]));
- if (line[0] != '\0')
- *line++ = '\0'; /* Terminate the identifier. */
- /* Now we found the beginning of the message itself. */
- if (isdigit (ident[0]))
- {
- struct message_list *runp;
- struct message_list *lastp;
- message_number = atoi (ident);
- /* Find location to insert the new message. */
- runp = current->current_set->messages;
- lastp = NULL;
- while (runp != NULL)
- if (runp->number == message_number)
- break;
- else
- {
- lastp = runp;
- runp = runp->next;
- }
- if (runp != NULL)
- {
- /* Oh, oh. There is already a message with this
- number in the message set. */
- if (runp->symbol == NULL)
- {
- /* The existing message had its number specified
- by the user. Fatal collision type uh, oh. */
- error_at_line (0, 0, fname, start_line,
- gettext ("duplicated message number"));
- error_at_line (0, 0, runp->fname, runp->line,
- gettext ("this is the first definition"));
- message_number = 0;
- }
- else
- {
- /* Collision was with number auto-assigned to a
- symbolic. Change existing symbolic number
- and move to end the list (if not already there). */
- runp->number = ++current->current_set->last_message;
- if (runp->next != NULL)
- {
- struct message_list *endp;
- if (lastp == NULL)
- current->current_set->messages=runp->next;
- else
- lastp->next=runp->next;
- endp = runp->next;
- while (endp->next != NULL)
- endp = endp->next;
- endp->next = runp;
- runp->next = NULL;
- }
- }
- }
- ident = NULL; /* We don't have a symbol. */
- if (message_number != 0
- && message_number > current->current_set->last_message)
- current->current_set->last_message = message_number;
- }
- else if (ident[0] != '\0')
- {
- struct message_list *runp;
- /* Test whether the symbolic name was not used for
- another message in this message set. */
- runp = current->current_set->messages;
- while (runp != NULL)
- if (runp->symbol != NULL && strcmp (ident, runp->symbol) == 0)
- break;
- else
- runp = runp->next;
- if (runp != NULL)
- {
- /* The name is already used. */
- error_at_line (0, 0, fname, start_line, gettext ("\
- duplicated message identifier"));
- error_at_line (0, 0, runp->fname, runp->line,
- gettext ("this is the first definition"));
- message_number = 0;
- }
- else
- /* Give the message the next unused number. */
- message_number = ++current->current_set->last_message;
- }
- else
- message_number = 0;
- if (message_number != 0)
- {
- char *inbuf;
- size_t inlen;
- char *outbuf;
- size_t outlen;
- struct message_list *newp;
- size_t line_len = strlen (line) + 1;
- size_t ident_len = 0;
- /* We need the conversion. */
- if (cd_towc == (iconv_t) -1
- && open_conversion (codeset, &cd_towc, &cd_tomb,
- &escape_char) != 0)
- /* Something is wrong. */
- goto out;
- /* Convert to a wide character string. We have to
- interpret escape sequences which will be impossible
- without doing the conversion if the codeset of the
- message is stateful. */
- while (1)
- {
- inbuf = line;
- inlen = line_len;
- outbuf = (char *) wbuf;
- outlen = wbufsize;
- /* Flush the state. */
- iconv (cd_towc, NULL, NULL, NULL, NULL);
- iconv (cd_towc, &inbuf, &inlen, &outbuf, &outlen);
- if (inlen == 0)
- {
- /* The string is converted. */
- assert (outlen < wbufsize);
- assert (wbuf[(wbufsize - outlen) / sizeof (wchar_t) - 1]
- == L'\0');
- break;
- }
- if (outlen != 0)
- {
- /* Something is wrong with this string, we ignore it. */
- error_at_line (0, 0, fname, start_line, gettext ("\
- invalid character: message ignored"));
- goto ignore;
- }
- /* The output buffer is too small. */
- wbufsize *= 2;
- wbuf = (wchar_t *) xrealloc (wbuf, wbufsize);
- }
- /* Strip quote characters, change escape sequences into
- correct characters etc. */
- normalize_line (fname, start_line, cd_towc, wbuf,
- current->quote_char, escape_char);
- if (ident)
- ident_len = line - this_line;
- /* Now the string is free of escape sequences. Convert it
- back into a multibyte character string. First free the
- memory allocated for the original string. */
- obstack_free (¤t->mem_pool, this_line);
- used = 1; /* Yes, we use the line. */
- /* Now fill in the new string. It should never happen that
- the replaced string is longer than the original. */
- inbuf = (char *) wbuf;
- inlen = (wcslen (wbuf) + 1) * sizeof (wchar_t);
- outlen = obstack_room (¤t->mem_pool);
- obstack_blank (¤t->mem_pool, outlen);
- this_line = (char *) obstack_base (¤t->mem_pool);
- outbuf = this_line + ident_len;
- outlen -= ident_len;
- /* Flush the state. */
- iconv (cd_tomb, NULL, NULL, NULL, NULL);
- iconv (cd_tomb, &inbuf, &inlen, &outbuf, &outlen);
- if (inlen != 0)
- {
- error_at_line (0, 0, fname, start_line,
- gettext ("invalid line"));
- goto ignore;
- }
- assert (outbuf[-1] == '\0');
- /* Free the memory in the obstack we don't use. */
- obstack_blank (¤t->mem_pool, -(int) outlen);
- line = obstack_finish (¤t->mem_pool);
- newp = (struct message_list *) xmalloc (sizeof (*newp));
- newp->number = message_number;
- newp->message = line + ident_len;
- /* Remember symbolic name; is NULL if no is given. */
- newp->symbol = ident ? line : NULL;
- /* Remember where we found the character. */
- newp->fname = fname;
- newp->line = start_line;
- /* Find place to insert to message. We keep them in a
- sorted single linked list. */
- if (current->current_set->messages == NULL
- || current->current_set->messages->number > message_number)
- {
- newp->next = current->current_set->messages;
- current->current_set->messages = newp;
- }
- else
- {
- struct message_list *runp;
- runp = current->current_set->messages;
- while (runp->next != NULL)
- if (runp->next->number > message_number)
- break;
- else
- runp = runp->next;
- newp->next = runp->next;
- runp->next = newp;
- }
- }
- ++current->total_messages;
- }
- else
- {
- size_t cnt;
- cnt = 0;
- /* See whether we have any non-white space character in this
- line. */
- while (this_line[cnt] != '\0' && isspace (this_line[cnt]))
- ++cnt;
- if (this_line[cnt] != '\0')
- /* Yes, some unknown characters found. */
- error_at_line (0, 0, fname, start_line,
- gettext ("malformed line ignored"));
- }
- ignore:
- /* We can save the memory for the line if it was not used. */
- if (!used)
- obstack_free (¤t->mem_pool, this_line);
- }
- /* Close the conversion modules. */
- iconv_close (cd_towc);
- iconv_close (cd_tomb);
- free (codeset);
- out:
- free (wbuf);
- if (fp != stdin)
- fclose (fp);
- return current;
- }
- static void
- write_out (struct catalog *catalog, const char *output_name,
- const char *header_name)
- {
- /* Computing the "optimal" size. */
- struct set_list *set_run;
- size_t best_total, best_size, best_depth;
- size_t act_size, act_depth;
- struct catalog_obj obj;
- struct obstack string_pool;
- const char *strings;
- size_t strings_size;
- uint32_t *array1, *array2;
- size_t cnt;
- int fd;
- /* If not otherwise told try to read file with existing
- translations. */
- if (!force_new)
- read_old (catalog, output_name);
- /* Initialize best_size with a very high value. */
- best_total = best_size = best_depth = UINT_MAX;
- /* We need some start size for testing. Let's start with
- TOTAL_MESSAGES / 5, which theoretically provides a mean depth of
- 5. */
- act_size = 1 + catalog->total_messages / 5;
- /* We determine the size of a hash table here. Because the message
- numbers can be chosen arbitrary by the programmer we cannot use
- the simple method of accessing the array using the message
- number. The algorithm is based on the trivial hash function
- NUMBER % TABLE_SIZE, where collisions are stored in a second
- dimension up to TABLE_DEPTH. We here compute TABLE_SIZE so that
- the needed space (= TABLE_SIZE * TABLE_DEPTH) is minimal. */
- while (act_size <= best_total)
- {
- size_t deep[act_size];
- act_depth = 1;
- memset (deep, '\0', act_size * sizeof (size_t));
- set_run = catalog->all_sets;
- while (set_run != NULL)
- {
- struct message_list *message_run;
- message_run = set_run->messages;
- while (message_run != NULL)
- {
- size_t idx = (message_run->number * set_run->number) % act_size;
- ++deep[idx];
- if (deep[idx] > act_depth)
- {
- act_depth = deep[idx];
- if (act_depth * act_size > best_total)
- break;
- }
- message_run = message_run->next;
- }
- set_run = set_run->next;
- }
- if (act_depth * act_size <= best_total)
- {
- /* We have found a better solution. */
- best_total = act_depth * act_size;
- best_size = act_size;
- best_depth = act_depth;
- }
- ++act_size;
- }
- /* let's be prepared for an empty message file. */
- if (best_size == UINT_MAX)
- {
- best_size = 1;
- best_depth = 1;
- }
- /* OK, now we have the size we will use. Fill in the header, build
- the table and the second one with swapped byte order. */
- obj.magic = CATGETS_MAGIC;
- obj.plane_size = best_size;
- obj.plane_depth = best_depth;
- /* Allocate room for all needed arrays. */
- array1 =
- (uint32_t *) alloca (best_size * best_depth * sizeof (uint32_t) * 3);
- memset (array1, '\0', best_size * best_depth * sizeof (uint32_t) * 3);
- array2
- = (uint32_t *) alloca (best_size * best_depth * sizeof (uint32_t) * 3);
- obstack_init (&string_pool);
- set_run = catalog->all_sets;
- while (set_run != NULL)
- {
- struct message_list *message_run;
- message_run = set_run->messages;
- while (message_run != NULL)
- {
- size_t idx = (((message_run->number * set_run->number) % best_size)
- * 3);
- /* Determine collision depth. */
- while (array1[idx] != 0)
- idx += best_size * 3;
- /* Store set number, message number and pointer into string
- space, relative to the first string. */
- array1[idx + 0] = set_run->number;
- array1[idx + 1] = message_run->number;
- array1[idx + 2] = obstack_object_size (&string_pool);
- /* Add current string to the continuous space containing all
- strings. */
- obstack_grow0 (&string_pool, message_run->message,
- strlen (message_run->message));
- message_run = message_run->next;
- }
- set_run = set_run->next;
- }
- strings_size = obstack_object_size (&string_pool);
- strings = obstack_finish (&string_pool);
- /* Compute ARRAY2 by changing the byte order. */
- for (cnt = 0; cnt < best_size * best_depth * 3; ++cnt)
- array2[cnt] = SWAPU32 (array1[cnt]);
- /* Now we can write out the whole data. */
- if (strcmp (output_name, "-") == 0
- || strcmp (output_name, "/dev/stdout") == 0)
- fd = STDOUT_FILENO;
- else
- {
- fd = creat (output_name, 0666);
- if (fd < 0)
- error (EXIT_FAILURE, errno, gettext ("cannot open output file `%s'"),
- output_name);
- }
- /* Write out header. */
- write (fd, &obj, sizeof (obj));
- /* We always write out the little endian version of the index
- arrays. */
- #if __BYTE_ORDER == __LITTLE_ENDIAN
- write (fd, array1, best_size * best_depth * sizeof (uint32_t) * 3);
- write (fd, array2, best_size * best_depth * sizeof (uint32_t) * 3);
- #elif __BYTE_ORDER == __BIG_ENDIAN
- write (fd, array2, best_size * best_depth * sizeof (uint32_t) * 3);
- write (fd, array1, best_size * best_depth * sizeof (uint32_t) * 3);
- #else
- # error Cannot handle __BYTE_ORDER byte order
- #endif
- /* Finally write the strings. */
- write (fd, strings, strings_size);
- if (fd != STDOUT_FILENO)
- close (fd);
- /* If requested now write out the header file. */
- if (header_name != NULL)
- {
- int first = 1;
- FILE *fp;
- /* Open output file. "-" or "/dev/stdout" means write to
- standard output. */
- if (strcmp (header_name, "-") == 0
- || strcmp (header_name, "/dev/stdout") == 0)
- fp = stdout;
- else
- {
- fp = fopen (header_name, "w");
- if (fp == NULL)
- error (EXIT_FAILURE, errno,
- gettext ("cannot open output file `%s'"), header_name);
- }
- /* Iterate over all sets and all messages. */
- set_run = catalog->all_sets;
- while (set_run != NULL)
- {
- struct message_list *message_run;
- /* If the current message set has a symbolic name write this
- out first. */
- if (set_run->symbol != NULL)
- fprintf (fp, "%s#define %sSet %#x\t/* %s:%Zu */\n",
- first ? "" : "\n", set_run->symbol, set_run->number - 1,
- set_run->fname, set_run->line);
- first = 0;
- message_run = set_run->messages;
- while (message_run != NULL)
- {
- /* If the current message has a symbolic name write
- #define out. But we have to take care for the set
- not having a symbolic name. */
- if (message_run->symbol != NULL)
- {
- if (set_run->symbol == NULL)
- fprintf (fp, "#define AutomaticSet%d%s %#x\t/* %s:%Zu */\n",
- set_run->number, message_run->symbol,
- message_run->number, message_run->fname,
- message_run->line);
- else
- fprintf (fp, "#define %s%s %#x\t/* %s:%Zu */\n",
- set_run->symbol, message_run->symbol,
- message_run->number, message_run->fname,
- message_run->line);
- }
- message_run = message_run->next;
- }
- set_run = set_run->next;
- }
- if (fp != stdout)
- fclose (fp);
- }
- }
- static struct set_list *
- find_set (struct catalog *current, int number)
- {
- struct set_list *result = current->all_sets;
- /* We must avoid set number 0 because a set of this number signals
- in the tables that the entry is not occupied. */
- ++number;
- while (result != NULL)
- if (result->number == number)
- return result;
- else
- result = result->next;
- /* Prepare new message set. */
- result = (struct set_list *) xcalloc (1, sizeof (*result));
- result->number = number;
- result->next = current->all_sets;
- current->all_sets = result;
- return result;
- }
- /* Normalize given string *in*place* by processing escape sequences
- and quote characters. */
- static void
- normalize_line (const char *fname, size_t line, iconv_t cd, wchar_t *string,
- wchar_t quote_char, wchar_t escape_char)
- {
- int is_quoted;
- wchar_t *rp = string;
- wchar_t *wp = string;
- if (quote_char != L'\0' && *rp == quote_char)
- {
- is_quoted = 1;
- ++rp;
- }
- else
- is_quoted = 0;
- while (*rp != L'\0')
- if (*rp == quote_char)
- /* We simply end the string when we find the first time an
- not-escaped quote character. */
- break;
- else if (*rp == escape_char)
- {
- ++rp;
- if (quote_char != L'\0' && *rp == quote_char)
- /* This is an extension to XPG. */
- *wp++ = *rp++;
- else
- /* Recognize escape sequences. */
- switch (*rp)
- {
- case L'n':
- *wp++ = L'\n';
- ++rp;
- break;
- case L't':
- *wp++ = L'\t';
- ++rp;
- break;
- case L'v':
- *wp++ = L'\v';
- ++rp;
- break;
- case L'b':
- *wp++ = L'\b';
- ++rp;
- break;
- case L'r':
- *wp++ = L'\r';
- ++rp;
- break;
- case L'f':
- *wp++ = L'\f';
- ++rp;
- break;
- case L'0' ... L'7':
- {
- int number;
- char cbuf[2];
- char *cbufptr;
- size_t cbufin;
- wchar_t wcbuf[2];
- char *wcbufptr;
- size_t wcbufin;
- number = *rp++ - L'0';
- while (number <= (255 / 8) && *rp >= L'0' && *rp <= L'7')
- {
- number *= 8;
- number += *rp++ - L'0';
- }
- cbuf[0] = (char) number;
- cbuf[1] = '\0';
- cbufptr = cbuf;
- cbufin = 2;
- wcbufptr = (char *) wcbuf;
- wcbufin = sizeof (wcbuf);
- /* Flush the state. */
- iconv (cd, NULL, NULL, NULL, NULL);
- iconv (cd, &cbufptr, &cbufin, &wcbufptr, &wcbufin);
- if (cbufptr != &cbuf[2] || (wchar_t *) wcbufptr != &wcbuf[2])
- error_at_line (0, 0, fname, line,
- gettext ("invalid escape sequence"));
- else
- *wp++ = wcbuf[0];
- }
- break;
- default:
- if (*rp == escape_char)
- {
- *wp++ = escape_char;
- ++rp;
- }
- else
- /* Simply ignore the backslash character. */;
- break;
- }
- }
- else
- *wp++ = *rp++;
- /* If we saw a quote character at the beginning we expect another
- one at the end. */
- if (is_quoted && *rp != quote_char)
- error_at_line (0, 0, fname, line, gettext ("unterminated message"));
- /* Terminate string. */
- *wp = L'\0';
- return;
- }
- static void
- read_old (struct catalog *catalog, const char *file_name)
- {
- struct catalog_info old_cat_obj;
- struct set_list *set = NULL;
- int last_set = -1;
- size_t cnt;
- /* Try to open catalog, but don't look through the NLSPATH. */
- if (__open_catalog (file_name, NULL, NULL, &old_cat_obj) != 0)
- {
- if (errno == ENOENT)
- /* No problem, the catalog simply does not exist. */
- return;
- else
- error (EXIT_FAILURE, errno,
- gettext ("while opening old catalog file"));
- }
- /* OK, we have the catalog loaded. Now read all messages and merge
- them. When set and message number clash for any message the new
- one is used. If the new one is empty it indicates that the
- message should be deleted. */
- for (cnt = 0; cnt < old_cat_obj.plane_size * old_cat_obj.plane_depth; ++cnt)
- {
- struct message_list *message, *last;
- if (old_cat_obj.name_ptr[cnt * 3 + 0] == 0)
- /* No message in this slot. */
- continue;
- if (old_cat_obj.name_ptr[cnt * 3 + 0] - 1 != (uint32_t) last_set)
- {
- last_set = old_cat_obj.name_ptr[cnt * 3 + 0] - 1;
- set = find_set (catalog, old_cat_obj.name_ptr[cnt * 3 + 0] - 1);
- }
- last = NULL;
- message = set->messages;
- while (message != NULL)
- {
- if ((uint32_t) message->number >= old_cat_obj.name_ptr[cnt * 3 + 1])
- break;
- last = message;
- message = message->next;
- }
- if (message == NULL
- || (uint32_t) message->number > old_cat_obj.name_ptr[cnt * 3 + 1])
- {
- /* We have found a message which is not yet in the catalog.
- Insert it at the right position. */
- struct message_list *newp;
- newp = (struct message_list *) xmalloc (sizeof(*newp));
- newp->number = old_cat_obj.name_ptr[cnt * 3 + 1];
- newp->message =
- &old_cat_obj.strings[old_cat_obj.name_ptr[cnt * 3 + 2]];
- newp->fname = NULL;
- newp->line = 0;
- newp->symbol = NULL;
- newp->next = message;
- if (last == NULL)
- set->messages = newp;
- else
- last->next = newp;
- ++catalog->total_messages;
- }
- else if (*message->message == '\0')
- {
- /* The new empty message has overridden the old one thus
- "deleting" it as required. Now remove the empty remains. */
- if (last == NULL)
- set->messages = message->next;
- else
- last->next = message->next;
- }
- }
- }
- static int
- open_conversion (const char *codeset, iconv_t *cd_towcp, iconv_t *cd_tombp,
- wchar_t *escape_charp)
- {
- char buf[2];
- char *bufptr;
- size_t bufsize;
- wchar_t wbuf[2];
- char *wbufptr;
- size_t wbufsize;
- /* If the input file does not specify the codeset use the locale's. */
- if (codeset == NULL)
- {
- setlocale (LC_ALL, "");
- codeset = nl_langinfo (CODESET);
- setlocale (LC_ALL, "C");
- }
- /* Get the conversion modules. */
- *cd_towcp = iconv_open ("WCHAR_T", codeset);
- *cd_tombp = iconv_open (codeset, "WCHAR_T");
- if (*cd_towcp == (iconv_t) -1 || *cd_tombp == (iconv_t) -1)
- {
- error (0, 0, gettext ("conversion modules not available"));
- if (*cd_towcp != (iconv_t) -1)
- iconv_close (*cd_towcp);
- return 1;
- }
- /* One special case for historical reasons is the backslash
- character. In some codesets the byte value 0x5c is not mapped to
- U005c in Unicode. These charsets then don't have a backslash
- character at all. Therefore we have to live with whatever the
- codeset provides and recognize, instead of the U005c, the character
- the byte value 0x5c is mapped to. */
- buf[0] = '\\';
- buf[1] = '\0';
- bufptr = buf;
- bufsize = 2;
- wbufptr = (char *) wbuf;
- wbufsize = sizeof (wbuf);
- iconv (*cd_towcp, &bufptr, &bufsize, &wbufptr, &wbufsize);
- if (bufsize != 0 || wbufsize != 0)
- {
- /* Something went wrong, we couldn't convert the byte 0x5c. Go
- on with using U005c. */
- error (0, 0, gettext ("cannot determine escape character"));
- *escape_charp = L'\\';
- }
- else
- *escape_charp = wbuf[0];
- return 0;
- }
|