41 #include <yaz/yaz-util.h>
43 #define CHR_MAXSTR 1024
44 #define CHR_MAXEQUIV 32
93 const char *from,
int len,
char *to,
100 root = (
chr_t_entry *) nmem_malloc(nmem,
sizeof(*root));
107 strcmp((
const char *) root->
target[0], to))
113 yaz_log(YLOG_WARN,
"duplicate entry for charmap from '%s'",
116 root->
target = (
unsigned char **)
117 nmem_malloc(nmem,
sizeof(*root->
target)*2);
118 root->
target[0] = (
unsigned char *) nmem_strdup(nmem, to);
130 for (i = 0; i < 256; i++)
133 if (!(root->
children[(
unsigned char) *from] =
135 from + 1, len - 1, to, from_0)))
154 const char *old_from = *from;
168 if (!res && t->
children[(
unsigned char) **from])
191 return (
const char **) (res->
target);
204 return (
const char **) (res->
target);
208 const char **from,
int len,
int first)
218 return (
const char **) (res->
target);
223 unsigned char c = ** (
unsigned char **) from;
224 const char *out = (
const char*) maptab->
output[c];
245 if (**s ==
'\\' && 1[*s])
251 case '\\': c =
'\\'; (*s)++;
break;
252 case 'r': c =
'\r'; (*s)++;
break;
253 case 'n': c =
'\n'; (*s)++;
break;
254 case 't': c =
'\t'; (*s)++;
break;
255 case 's': c =
' '; (*s)++;
break;
262 sscanf(fmtstr,
"%x", &i);
283 sscanf(fmtstr,
"%o", &i);
296 sscanf(fmtstr,
"%x", &i);
310 yaz_log(YLOG_DEBUG,
"out %d", c);
323 tmp[0] = num; tmp[1] =
'\0';
326 (
unsigned char *) nmem_strdup(tab->
nmem, s);
357 const char **res, *p = s;
361 yaz_log(YLOG_WARN,
"Map: '%s' has no mapping", p);
386 yaz_log(YLOG_DEBUG,
"set map %.*s", (
int) strlen(s), s);
389 for (s = arg->
string; *s; s++)
390 yaz_log(YLOG_DEBUG,
" %3d", (
unsigned char) *s);
394 char *outbuf,
size_t outbytesleft)
396 size_t inbytesleft = inlen *
sizeof(
ucs4_t);
397 char *inbuf = (
char*) from;
404 ret = yaz_iconv(t, &inbuf, &inbytesleft, &outbuf, &outbytesleft);
405 if (ret != (
size_t) (-1))
406 ret = yaz_iconv(t, 0, 0, &outbuf, &outbytesleft);
409 if (ret == (
size_t) (-1))
411 yaz_log(YLOG_LOG,
"from: %2X %2X %2X %2X",
412 from[0], from[1], from[2], from[3]);
413 yaz_log(YLOG_WARN|YLOG_ERRNO,
"bad unicode sequence");
422 yaz_iconv_t t_unicode, yaz_iconv_t t_utf8,
423 void (*fun)(
const char *c,
void *data,
int num),
424 void *data,
int *num)
436 char *outbuf = (
char *) arg;
437 char *inbuf = s_native;
438 size_t outbytesleft =
sizeof(arg)-4;
439 size_t inbytesleft = strlen(s_native);
441 ret = yaz_iconv(t_unicode, &inbuf, &inbytesleft,
442 &outbuf, &outbytesleft);
443 if (ret != (
size_t)(-1))
444 ret = yaz_iconv(t_unicode, 0, 0, &outbuf, &outbytesleft);
446 if (ret == (
size_t)(-1))
448 i = (outbuf - (
char*) arg)/
sizeof(
ucs4_t);
452 for (i = 0; s_native[i]; i++)
453 arg[i] = s_native[i] & 255;
456 if (s[0] == 0xfeff || s[0] == 0xfeff)
467 yaz_log(YLOG_FATAL,
"Bad range in char-map");
474 yaz_log(YLOG_FATAL,
"Bad range in char-map");
482 (*fun)(str, data, num ? (*num)++ : 0);
488 while (*s !=
')' || s[-1] ==
'\\')
492 yaz_log(YLOG_FATAL,
"Missing ) in charmap");
500 (*fun)(str, data, num ? (*num)++ : 0);
507 (*fun)(str, data, num ? (*num)++ : 0);
517 char line[512], *argv[50];
520 int no_directives = 0;
524 yaz_iconv_t t_unicode = 0;
525 yaz_iconv_t t_utf8 = 0;
526 unsigned endian = 31;
527 const char *ucs4_native =
"UCS-4";
529 yaz_log(YLOG_DEBUG,
"maptab %s open", name);
530 if (!(f = yaz_fopen(tabpath, name,
"r", tabroot)))
532 yaz_log(YLOG_WARN|YLOG_ERRNO,
"%s", name);
536 if (*(
char*) &endian == 31)
537 ucs4_native =
"UCS-4LE";
539 t_utf8 = yaz_iconv_open(
"UTF-8", ucs4_native);
541 nmem = nmem_create();
542 res = (
chrmaptab) nmem_malloc(nmem,
sizeof(*res));
550 nmem_malloc(res->
nmem,
sizeof(res->
input) * 256);
551 for (i = 0; i < 256; i++)
554 nmem_malloc(res->
nmem,
sizeof(*res->
input));
557 nmem_malloc(res->
nmem, 2 *
sizeof(
unsigned char *));
572 while (!errors && (argc = readconf_line(f, &lineno, line, 512, argv, 50)))
575 if (!yaz_matchstr(argv[0],
"lowercase"))
579 yaz_log(YLOG_FATAL,
"Syntax error in charmap");
585 yaz_log(YLOG_FATAL,
"Bad value-set specification");
593 else if (!yaz_matchstr(argv[0],
"uppercase"))
597 yaz_log(YLOG_FATAL,
"Uppercase directive with no lowercase set");
602 yaz_log(YLOG_FATAL,
"Missing arg for uppercase directive");
608 yaz_log(YLOG_FATAL,
"Bad value-set specification");
612 else if (!yaz_matchstr(argv[0],
"space"))
616 yaz_log(YLOG_FATAL,
"Syntax error in charmap for space");
622 yaz_log(YLOG_FATAL,
"Bad space specification");
626 else if (!yaz_matchstr(argv[0],
"cut"))
630 yaz_log(YLOG_FATAL,
"Syntax error in charmap for cut");
636 yaz_log(YLOG_FATAL,
"Bad cut specification");
640 else if (!yaz_matchstr(argv[0],
"map"))
646 yaz_log(YLOG_FATAL,
"charmap directive map requires 2 args");
654 yaz_log(YLOG_FATAL,
"Bad map target");
660 yaz_log(YLOG_FATAL,
"Bad map source");
664 else if (!yaz_matchstr(argv[0],
"equivalent"))
670 yaz_log(YLOG_FATAL,
"equivalent requires 1 argument");
678 yaz_log(YLOG_FATAL,
"equivalent: invalid string");
681 else if (w.
no_eq == 0)
683 yaz_log(YLOG_FATAL,
"equivalent: no strings");
692 for (i = 0; i<w.
no_eq; i++)
693 slen += strlen(w.
eq[i]) + 1;
694 result_str = nmem_malloc(res->
nmem, slen + 5);
699 for (i = 0; i<w.
no_eq; i++)
701 result_str[slen++] = i ?
'|' :
'(';
702 strcpy(result_str + slen, w.
eq[i]);
703 slen += strlen(w.
eq[i]);
705 result_str[slen++] =
')';
706 result_str[slen] =
'\0';
709 for (i = 0; i<w.
no_eq; i++)
712 w.
eq[i], strlen(w.
eq[i]),
717 else if (!yaz_matchstr(argv[0],
"encoding"))
720 yaz_iconv_close(t_unicode);
721 t_unicode = yaz_iconv_open(ucs4_native, argv[1]);
725 yaz_log(YLOG_WARN,
"Syntax error at '%s' in %s", line, name);
730 if (no_directives == 0)
732 yaz_log(YLOG_WARN,
"No directives in '%s'", name);
740 yaz_log(YLOG_DEBUG,
"maptab %s num=%d close %d errors", name, num, errors);
742 yaz_iconv_close(t_utf8);
744 yaz_iconv_close(t_unicode);
751 nmem_destroy(tab->
nmem);
static void fun_addentry(const char *s, void *data, int num)
ucs4_t zebra_prim_w(ucs4_t **s)
void chrmaptab_destroy(chrmaptab tab)
const char ** chr_map_q_input(chrmaptab maptab, const char **from, int len, int first)
static void fun_addcut(const char *s, void *data, int num)
static chr_t_entry * find_entry_x(chr_t_entry *t, const char **from, int *len, int first)
static int zebra_ucs4_strlen(ucs4_t *s)
const unsigned char CHR_FIELD_BEGIN
static void fun_mkstring(const char *s, void *data, int num)
const char * chr_map_output(chrmaptab maptab, const char **from, int len)
static void fun_add_map(const char *s, void *data, int num)
static void fun_add_equivalent_string(const char *s, void *data, int num)
static void fun_addspace(const char *s, void *data, int num)
static int scan_string(char *s_native, yaz_iconv_t t_unicode, yaz_iconv_t t_utf8, void(*fun)(const char *c, void *data, int num), void *data, int *num)
chrmaptab chrmaptab_create(const char *tabpath, const char *name, const char *tabroot)
const char ** chr_map_input(chrmaptab maptab, const char **from, int len, int first)
static chr_t_entry * set_map_string(chr_t_entry *root, NMEM nmem, const char *from, int len, char *to, const char *from_0)
static int scan_to_utf8(yaz_iconv_t t, ucs4_t *from, size_t inlen, char *outbuf, size_t outbytesleft)
const char ** chr_map_input_x(chrmaptab maptab, const char **from, int *len, int first)
struct chrmaptab_info * chrmaptab
static void * begin(struct zebra_register *reg, void *class_handle, RSET rset, NMEM nmem, TERMID *terms, int numterms)
static void end(struct zebra_register *reg, void *set_handle)
unsigned char * output[256]
char string[CHR_MAXSTR+1]