IDZEBRA 2.2.8
charmap.c
Go to the documentation of this file.
1/* This file is part of the Zebra server.
2 Copyright (C) Index Data
3
4Zebra is free software; you can redistribute it and/or modify it under
5the terms of the GNU General Public License as published by the Free
6Software Foundation; either version 2, or (at your option) any later
7version.
8
9Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
10WARRANTY; without even the implied warranty of MERCHANTABILITY or
11FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12for more details.
13
14You should have received a copy of the GNU General Public License
15along with this program; if not, write to the Free Software
16Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17
18*/
19
20
29#if HAVE_CONFIG_H
30#include <config.h>
31#endif
32#include <ctype.h>
33#include <string.h>
34#include <stdlib.h>
35#include <assert.h>
36
37typedef unsigned ucs4_t;
38
39#include <charmap.h>
40
41#include <yaz/yaz-util.h>
42
43#define CHR_MAXSTR 1024
44#define CHR_MAXEQUIV 32
45
46const unsigned char CHR_FIELD_BEGIN = '^';
47
48const char *CHR_UNKNOWN = "\001";
49const char *CHR_SPACE = "\002";
50const char *CHR_CUT = "\003";
51const char *CHR_BASE = "\005"; /* CHECK CHR_BASE_CHAR as well */
52
54{
55 chr_t_entry *input; /* mapping table for input data */
56 chr_t_entry *q_input; /* mapping table for queries */
57 unsigned char *output[256]; /* return mapping - for display of registers */
58 int base_uppercase; /* Start of upper-case ordinals */
59 NMEM nmem;
60};
61
62/*
63 * Character map trie node.
64 */
66{
67 chr_t_entry **children; /* array of children */
68 unsigned char **target; /* target for this node, if any */
69};
70
71/*
72 * General argument structure for callback functions (internal use only)
73 */
74typedef struct chrwork
75{
77 char string[CHR_MAXSTR+1];
79
80/*
81 * Callback for equivalent stuff
82 */
83typedef struct
84{
85 NMEM nmem;
86 int no_eq;
87 char *eq[CHR_MAXEQUIV];
89/*
90 * Add an entry to the character map.
91 */
92static chr_t_entry *set_map_string(chr_t_entry *root, NMEM nmem,
93 const char *from, int len, char *to,
94 const char *from_0)
95{
96 if (!from_0)
97 from_0 = from;
98 if (!root)
99 {
100 root = (chr_t_entry *) nmem_malloc(nmem, sizeof(*root));
101 root->children = 0;
102 root->target = 0;
103 }
104 if (!len)
105 {
106 if (!root->target || !root->target[0] ||
107 strcmp((const char *) root->target[0], to))
108 {
109 if (from_0 &&
110 root->target && root->target[0] && root->target[0][0] &&
111 strcmp((const char *) root->target[0], CHR_UNKNOWN))
112 {
113 yaz_log(YLOG_WARN, "duplicate entry for charmap from '%s'",
114 from_0);
115 }
116 root->target = (unsigned char **)
117 nmem_malloc(nmem, sizeof(*root->target)*2);
118 root->target[0] = (unsigned char *) nmem_strdup(nmem, to);
119 root->target[1] = 0;
120 }
121 }
122 else
123 {
124 if (!root->children)
125 {
126 int i;
127
128 root->children = (chr_t_entry **)
129 nmem_malloc(nmem, sizeof(chr_t_entry*) * 256);
130 for (i = 0; i < 256; i++)
131 root->children[i] = 0;
132 }
133 if (!(root->children[(unsigned char) *from] =
134 set_map_string(root->children[(unsigned char) *from], nmem,
135 from + 1, len - 1, to, from_0)))
136 return 0;
137 }
138 return root;
139}
140
141static chr_t_entry *find_entry_x(chr_t_entry *t, const char **from, int *len, int first)
142{
143 chr_t_entry *res;
144
145 while (*len <= 0)
146 { /* switch to next buffer */
147 if (*len < 0)
148 break;
149 from++;
150 len++;
151 }
152 if (*len > 0 && t->children)
153 {
154 const char *old_from = *from;
155 int old_len = *len;
156
157 res = 0;
158
159 if (first && t->children[CHR_FIELD_BEGIN])
160 {
161 if ((res = find_entry_x(t->children[CHR_FIELD_BEGIN], from, len, 0)) && res != t->children[CHR_FIELD_BEGIN])
162 return res;
163 else
164 res = 0;
165 /* otherwhise there was no match on beginning of field, move on */
166 }
167
168 if (!res && t->children[(unsigned char) **from])
169 {
170 (*len)--;
171 (*from)++;
172 if ((res = find_entry_x(t->children[(unsigned char) *old_from],
173 from, len, 0)))
174 return res;
175 /* no match */
176 *len = old_len;
177 *from = old_from;
178 }
179 }
180 /* no children match. use ourselves, if we have a target */
181 return t->target ? t : 0;
182}
183
184const char **chr_map_input_x(chrmaptab maptab, const char **from, int *len, int first)
185{
186 chr_t_entry *t = maptab->input;
187 chr_t_entry *res;
188
189 if (!(res = find_entry_x(t, from, len, first)))
190 abort();
191 return (const char **) (res->target);
192}
193
194const char **chr_map_input(chrmaptab maptab, const char **from, int len, int first)
195{
196 chr_t_entry *t = maptab->input;
197 chr_t_entry *res;
198 int len_tmp[2];
199
200 len_tmp[0] = len;
201 len_tmp[1] = -1;
202 if (!(res = find_entry_x(t, from, len_tmp, first)))
203 abort();
204 return (const char **) (res->target);
205}
206
207const char **chr_map_q_input(chrmaptab maptab,
208 const char **from, int len, int first)
209{
210 chr_t_entry *t = maptab->q_input;
211 chr_t_entry *res;
212 int len_tmp[2];
213
214 len_tmp[0] = len;
215 len_tmp[1] = -1;
216 if (!(res = find_entry_x(t, from, len_tmp, first)))
217 return 0;
218 return (const char **) (res->target);
219}
220
221const char *chr_map_output(chrmaptab maptab, const char **from, int len)
222{
223 unsigned char c = ** (unsigned char **) from;
224 const char *out = (const char*) maptab->output[c];
225
226 if (out)
227 (*from)++;
228 return out;
229}
230
232{
233 int i = 0;
234 while (*s++)
235 i++;
236 return i;
237}
238
240{
241 ucs4_t c;
242 ucs4_t i = 0;
243 char fmtstr[8];
244
245 if (**s == '\\' && 1[*s])
246 {
247 (*s)++;
248 c = **s;
249 switch (c)
250 {
251 case '\\': c = '\\'; (*s)++; break;
252 case 'r': c = '\r'; (*s)++; break;
253 case 'n': c = '\n'; (*s)++; break;
254 case 't': c = '\t'; (*s)++; break;
255 case 's': c = ' '; (*s)++; break;
256 case 'x':
257 if (zebra_ucs4_strlen(*s) >= 3)
258 {
259 fmtstr[0] = (*s)[1];
260 fmtstr[1] = (*s)[2];
261 fmtstr[2] = 0;
262 sscanf(fmtstr, "%x", &i);
263 c = i;
264 *s += 3;
265 }
266 break;
267 case '0':
268 case '1':
269 case '2':
270 case '3':
271 case '4':
272 case '5':
273 case '6':
274 case '7':
275 case '8':
276 case '9':
277 if (zebra_ucs4_strlen(*s) >= 3)
278 {
279 fmtstr[0] = (*s)[0];
280 fmtstr[1] = (*s)[1];
281 fmtstr[2] = (*s)[2];
282 fmtstr[3] = 0;
283 sscanf(fmtstr, "%o", &i);
284 c = i;
285 *s += 3;
286 }
287 break;
288 case 'L':
289 if (zebra_ucs4_strlen(*s) >= 5)
290 {
291 fmtstr[0] = (*s)[1];
292 fmtstr[1] = (*s)[2];
293 fmtstr[2] = (*s)[3];
294 fmtstr[3] = (*s)[4];
295 fmtstr[4] = 0;
296 sscanf(fmtstr, "%x", &i);
297 c = i;
298 *s += 5;
299 }
300 break;
301 default:
302 (*s)++;
303 }
304 }
305 else
306 {
307 c = **s;
308 ++(*s);
309 }
310 yaz_log(YLOG_DEBUG, "out %d", c);
311 return c;
312}
313
314/*
315 * Callback function.
316 * Add an entry to the value space.
317 */
318static void fun_addentry(const char *s, void *data, int num)
319{
320 chrmaptab tab = (chrmaptab) data;
321 char tmp[2];
322
323 tmp[0] = num; tmp[1] = '\0';
324 tab->input = set_map_string(tab->input, tab->nmem, s, strlen(s), tmp, 0);
325 tab->output[num + tab->base_uppercase] =
326 (unsigned char *) nmem_strdup(tab->nmem, s);
327}
328
329/*
330 * Callback function.
331 * Add a space-entry to the value space.
332 */
333static void fun_addspace(const char *s, void *data, int num)
334{
335 chrmaptab tab = (chrmaptab) data;
336 tab->input = set_map_string(tab->input, tab->nmem, s, strlen(s),
337 (char*) CHR_SPACE, 0);
338}
339
340/*
341 * Callback function.
342 * Add a space-entry to the value space.
343 */
344static void fun_addcut(const char *s, void *data, int num)
345{
346 chrmaptab tab = (chrmaptab) data;
347 tab->input = set_map_string(tab->input, tab->nmem, s, strlen(s),
348 (char*) CHR_CUT, 0);
349}
350
351/*
352 * Create a string containing the mapped characters provided.
353 */
354static void fun_mkstring(const char *s, void *data, int num)
355{
356 chrwork *arg = (chrwork *) data;
357 const char **res, *p = s;
358
359 res = chr_map_input(arg->map, &s, strlen(s), 0);
360 if (*res == (char*) CHR_UNKNOWN)
361 yaz_log(YLOG_WARN, "Map: '%s' has no mapping", p);
362 strncat(arg->string, *res, CHR_MAXSTR - strlen(arg->string));
363 arg->string[CHR_MAXSTR] = '\0';
364}
365
366/*
367 * Create an unmodified string (scan_string handler).
368 */
369static void fun_add_equivalent_string(const char *s, void *data, int num)
370{
371 chr_equiv_work *arg = (chr_equiv_work *) data;
372
373 if (arg->no_eq == CHR_MAXEQUIV)
374 return;
375 arg->eq[arg->no_eq++] = nmem_strdup(arg->nmem, s);
376}
377
378/*
379 * Add a map to the string contained in the argument.
380 */
381static void fun_add_map(const char *s, void *data, int num)
382{
383 chrwork *arg = (chrwork *) data;
384
385 assert(arg->map->input);
386 yaz_log(YLOG_DEBUG, "set map %.*s", (int) strlen(s), s);
387 set_map_string(arg->map->input, arg->map->nmem, s, strlen(s), arg->string,
388 0);
389 for (s = arg->string; *s; s++)
390 yaz_log(YLOG_DEBUG, " %3d", (unsigned char) *s);
391}
392
393static int scan_to_utf8(yaz_iconv_t t, ucs4_t *from, size_t inlen,
394 char *outbuf, size_t outbytesleft)
395{
396 size_t inbytesleft = inlen * sizeof(ucs4_t);
397 char *inbuf = (char*) from;
398 size_t ret;
399
400 if (t == 0)
401 *outbuf++ = *from; /* ISO-8859-1 is OK here */
402 else
403 {
404 ret = yaz_iconv(t, &inbuf, &inbytesleft, &outbuf, &outbytesleft);
405 if (ret != (size_t) (-1))
406 ret = yaz_iconv(t, 0, 0, &outbuf, &outbytesleft);
407
408
409 if (ret == (size_t) (-1))
410 {
411 yaz_log(YLOG_LOG, "from: %2X %2X %2X %2X",
412 from[0], from[1], from[2], from[3]);
413 yaz_log(YLOG_WARN|YLOG_ERRNO, "bad unicode sequence");
414 return -1;
415 }
416 }
417 *outbuf = '\0';
418 return 0;
419}
420
421static int scan_string(char *s_native,
422 yaz_iconv_t t_unicode, yaz_iconv_t t_utf8,
423 void (*fun)(const char *c, void *data, int num),
424 void *data, int *num)
425{
426 char str[1024];
427
428 ucs4_t arg[512];
429 ucs4_t arg_prim[512];
430 ucs4_t *s = arg;
431 ucs4_t c, begin, end;
432 size_t i;
433
434 if (t_unicode != 0)
435 {
436 char *outbuf = (char *) arg;
437 char *inbuf = s_native;
438 size_t outbytesleft = sizeof(arg)-4;
439 size_t inbytesleft = strlen(s_native);
440 size_t ret;
441 ret = yaz_iconv(t_unicode, &inbuf, &inbytesleft,
442 &outbuf, &outbytesleft);
443 if (ret != (size_t)(-1))
444 ret = yaz_iconv(t_unicode, 0, 0, &outbuf, &outbytesleft);
445
446 if (ret == (size_t)(-1))
447 return -1;
448 i = (outbuf - (char*) arg)/sizeof(ucs4_t);
449 }
450 else
451 {
452 for (i = 0; s_native[i]; i++)
453 arg[i] = s_native[i] & 255; /* ISO-8859-1 conversion */
454 }
455 arg[i] = 0; /* terminate */
456 if (s[0] == 0xfeff || s[0] == 0xfeff) /* skip byte Order Mark */
457 s++;
458 while (*s)
459 {
460 switch (*s)
461 {
462 case '{':
463 s++;
464 begin = zebra_prim_w(&s);
465 if (*s != '-')
466 {
467 yaz_log(YLOG_FATAL, "Bad range in char-map");
468 return -1;
469 }
470 s++;
471 end = zebra_prim_w(&s);
472 if (end <= begin)
473 {
474 yaz_log(YLOG_FATAL, "Bad range in char-map");
475 return -1;
476 }
477 s++;
478 for (c = begin; c <= end; c++)
479 {
480 if (scan_to_utf8(t_utf8, &c, 1, str, sizeof(str)-1))
481 return -1;
482 (*fun)(str, data, num ? (*num)++ : 0);
483 }
484 break;
485 case '(':
486 ++s;
487 i = 0;
488 while (*s != ')' || s[-1] == '\\')
489 {
490 if (*s == '\0')
491 {
492 yaz_log(YLOG_FATAL, "Missing ) in charmap");
493 return -1;
494 }
495 arg_prim[i++] = zebra_prim_w(&s);
496 }
497 arg_prim[i] = 0;
498 if (scan_to_utf8(t_utf8, arg_prim, zebra_ucs4_strlen(arg_prim), str, sizeof(str)-1))
499 return -1;
500 (*fun)(str, data, num ? (*num)++ : 0);
501 s++;
502 break;
503 default:
504 c = zebra_prim_w(&s);
505 if (scan_to_utf8(t_utf8, &c, 1, str, sizeof(str)-1))
506 return -1;
507 (*fun)(str, data, num ? (*num)++ : 0);
508 }
509 }
510 return 0;
511}
512
513chrmaptab chrmaptab_create(const char *tabpath, const char *name,
514 const char *tabroot)
515{
516 FILE *f;
517 char line[512], *argv[50];
518 chrmaptab res;
519 int lineno = 0;
520 int no_directives = 0;
521 int errors = 0;
522 int argc, num = (int) *CHR_BASE, i;
523 NMEM nmem;
524 yaz_iconv_t t_unicode = 0;
525 yaz_iconv_t t_utf8 = 0;
526 unsigned endian = 31;
527 const char *ucs4_native = "UCS-4";
528
529 yaz_log(YLOG_DEBUG, "maptab %s open", name);
530 if (!(f = yaz_fopen(tabpath, name, "r", tabroot)))
531 {
532 yaz_log(YLOG_WARN|YLOG_ERRNO, "%s", name);
533 return 0;
534 }
535
536 if (*(char*) &endian == 31) /* little endian? */
537 ucs4_native = "UCS-4LE";
538
539 t_utf8 = yaz_iconv_open("UTF-8", ucs4_native);
540
541 nmem = nmem_create();
542 res = (chrmaptab) nmem_malloc(nmem, sizeof(*res));
543 res->nmem = nmem;
544 res->input = (chr_t_entry *) nmem_malloc(res->nmem, sizeof(*res->input));
545 res->input->target = (unsigned char **)
546 nmem_malloc(res->nmem, sizeof(*res->input->target) * 2);
547 res->input->target[0] = (unsigned char*) CHR_UNKNOWN;
548 res->input->target[1] = 0;
549 res->input->children = (chr_t_entry **)
550 nmem_malloc(res->nmem, sizeof(res->input) * 256);
551 for (i = 0; i < 256; i++)
552 {
553 res->input->children[i] = (chr_t_entry *)
554 nmem_malloc(res->nmem, sizeof(*res->input));
555 res->input->children[i]->children = 0;
556 res->input->children[i]->target = (unsigned char **)
557 nmem_malloc(res->nmem, 2 * sizeof(unsigned char *));
558 res->input->children[i]->target[1] = 0;
559 res->input->children[i]->target[0] = (unsigned char*) CHR_UNKNOWN;
560 }
561 res->q_input = (chr_t_entry *)
562 nmem_malloc(res->nmem, sizeof(*res->q_input));
563 res->q_input->target = 0;
564 res->q_input->children = 0;
565
566 for (i = *CHR_BASE; i < 256; i++)
567 res->output[i] = 0;
568 res->output[(int) *CHR_SPACE] = (unsigned char *) " ";
569 res->output[(int) *CHR_UNKNOWN] = (unsigned char*) "@";
570 res->base_uppercase = 0;
571
572 while (!errors && (argc = readconf_line(f, &lineno, line, 512, argv, 50)))
573 {
574 no_directives++;
575 if (!yaz_matchstr(argv[0], "lowercase"))
576 {
577 if (argc != 2)
578 {
579 yaz_log(YLOG_FATAL, "Syntax error in charmap");
580 ++errors;
581 }
582 if (scan_string(argv[1], t_unicode, t_utf8, fun_addentry,
583 res, &num) < 0)
584 {
585 yaz_log(YLOG_FATAL, "Bad value-set specification");
586 ++errors;
587 }
588 res->base_uppercase = num;
589 res->output[(int) *CHR_SPACE + num] = (unsigned char *) " ";
590 res->output[(int) *CHR_UNKNOWN + num] = (unsigned char*) "@";
591 num = (int) *CHR_BASE;
592 }
593 else if (!yaz_matchstr(argv[0], "uppercase"))
594 {
595 if (!res->base_uppercase)
596 {
597 yaz_log(YLOG_FATAL, "Uppercase directive with no lowercase set");
598 ++errors;
599 }
600 if (argc != 2)
601 {
602 yaz_log(YLOG_FATAL, "Missing arg for uppercase directive");
603 ++errors;
604 }
605 if (scan_string(argv[1], t_unicode, t_utf8, fun_addentry,
606 res, &num) < 0)
607 {
608 yaz_log(YLOG_FATAL, "Bad value-set specification");
609 ++errors;
610 }
611 }
612 else if (!yaz_matchstr(argv[0], "space"))
613 {
614 if (argc != 2)
615 {
616 yaz_log(YLOG_FATAL, "Syntax error in charmap for space");
617 ++errors;
618 }
619 if (scan_string(argv[1], t_unicode, t_utf8,
620 fun_addspace, res, 0) < 0)
621 {
622 yaz_log(YLOG_FATAL, "Bad space specification");
623 ++errors;
624 }
625 }
626 else if (!yaz_matchstr(argv[0], "cut"))
627 {
628 if (argc != 2)
629 {
630 yaz_log(YLOG_FATAL, "Syntax error in charmap for cut");
631 ++errors;
632 }
633 if (scan_string(argv[1], t_unicode, t_utf8,
634 fun_addcut, res, 0) < 0)
635 {
636 yaz_log(YLOG_FATAL, "Bad cut specification");
637 ++errors;
638 }
639 }
640 else if (!yaz_matchstr(argv[0], "map"))
641 {
642 chrwork buf;
643
644 if (argc != 3)
645 {
646 yaz_log(YLOG_FATAL, "charmap directive map requires 2 args");
647 ++errors;
648 }
649 buf.map = res;
650 buf.string[0] = '\0';
651 if (scan_string(argv[2], t_unicode, t_utf8,
652 fun_mkstring, &buf, 0) < 0)
653 {
654 yaz_log(YLOG_FATAL, "Bad map target");
655 ++errors;
656 }
657 if (scan_string(argv[1], t_unicode, t_utf8,
658 fun_add_map, &buf, 0) < 0)
659 {
660 yaz_log(YLOG_FATAL, "Bad map source");
661 ++errors;
662 }
663 }
664 else if (!yaz_matchstr(argv[0], "equivalent"))
665 {
667
668 if (argc != 2)
669 {
670 yaz_log(YLOG_FATAL, "equivalent requires 1 argument");
671 ++errors;
672 }
673 w.nmem = res->nmem;
674 w.no_eq = 0;
675 if (scan_string(argv[1], t_unicode, t_utf8,
676 fun_add_equivalent_string, &w, 0) < 0)
677 {
678 yaz_log(YLOG_FATAL, "equivalent: invalid string");
679 ++errors;
680 }
681 else if (w.no_eq == 0)
682 {
683 yaz_log(YLOG_FATAL, "equivalent: no strings");
684 ++errors;
685 }
686 else
687 {
688 char *result_str;
689 int i, slen = 5;
690
691 /* determine length of regular expression */
692 for (i = 0; i<w.no_eq; i++)
693 slen += strlen(w.eq[i]) + 1;
694 result_str = nmem_malloc(res->nmem, slen + 5);
695
696 /* build the regular expression */
697 *result_str = '\0';
698 slen = 0;
699 for (i = 0; i<w.no_eq; i++)
700 {
701 result_str[slen++] = i ? '|' : '(';
702 strcpy(result_str + slen, w.eq[i]);
703 slen += strlen(w.eq[i]);
704 }
705 result_str[slen++] = ')';
706 result_str[slen] = '\0';
707
708 /* each eq will map to this regular expression */
709 for (i = 0; i<w.no_eq; i++)
710 {
711 set_map_string(res->q_input, res->nmem,
712 w.eq[i], strlen(w.eq[i]),
713 result_str, 0);
714 }
715 }
716 }
717 else if (!yaz_matchstr(argv[0], "encoding"))
718 {
719 if (t_unicode != 0)
720 yaz_iconv_close(t_unicode);
721 t_unicode = yaz_iconv_open(ucs4_native, argv[1]);
722 }
723 else
724 {
725 yaz_log(YLOG_WARN, "Syntax error at '%s' in %s", line, name);
726 errors++;
727 }
728 }
729 yaz_fclose(f);
730 if (no_directives == 0)
731 {
732 yaz_log(YLOG_WARN, "No directives in '%s'", name);
733 errors++;
734 }
735 if (errors)
736 {
738 res = 0;
739 }
740 yaz_log(YLOG_DEBUG, "maptab %s num=%d close %d errors", name, num, errors);
741 if (t_utf8 != 0)
742 yaz_iconv_close(t_utf8);
743 if (t_unicode != 0)
744 yaz_iconv_close(t_unicode);
745 return res;
746}
747
749{
750 if (tab)
751 nmem_destroy(tab->nmem);
752}
753
754
755/*
756 * Local variables:
757 * c-basic-offset: 4
758 * c-file-style: "Stroustrup"
759 * indent-tabs-mode: nil
760 * End:
761 * vim: shiftwidth=4 tabstop=8 expandtab
762 */
763
static void fun_addentry(const char *s, void *data, int num)
Definition charmap.c:318
ucs4_t zebra_prim_w(ucs4_t **s)
Definition charmap.c:239
void chrmaptab_destroy(chrmaptab tab)
Definition charmap.c:748
static void fun_addcut(const char *s, void *data, int num)
Definition charmap.c:344
#define CHR_MAXSTR
Definition charmap.c:43
static int zebra_ucs4_strlen(ucs4_t *s)
Definition charmap.c:231
static chr_t_entry * find_entry_x(chr_t_entry *t, const char **from, int *len, int first)
Definition charmap.c:141
const unsigned char CHR_FIELD_BEGIN
Definition charmap.c:46
static void fun_mkstring(const char *s, void *data, int num)
Definition charmap.c:354
#define CHR_MAXEQUIV
Definition charmap.c:44
static chr_t_entry * set_map_string(chr_t_entry *root, NMEM nmem, const char *from, int len, char *to, const char *from_0)
Definition charmap.c:92
const char ** chr_map_input(chrmaptab maptab, const char **from, int len, int first)
Definition charmap.c:194
static void fun_add_map(const char *s, void *data, int num)
Definition charmap.c:381
const char * CHR_CUT
Definition charmap.c:50
const char * CHR_SPACE
Definition charmap.c:49
static void fun_add_equivalent_string(const char *s, void *data, int num)
Definition charmap.c:369
const char ** chr_map_input_x(chrmaptab maptab, const char **from, int *len, int first)
Definition charmap.c:184
static void fun_addspace(const char *s, void *data, int num)
Definition charmap.c:333
unsigned ucs4_t
Definition charmap.c:37
static int scan_string(char *s_native, yaz_iconv_t t_unicode, yaz_iconv_t t_utf8, void(*fun)(const char *c, void *data, int num), void *data, int *num)
Definition charmap.c:421
chrmaptab chrmaptab_create(const char *tabpath, const char *name, const char *tabroot)
Definition charmap.c:513
const char * chr_map_output(chrmaptab maptab, const char **from, int len)
Definition charmap.c:221
const char ** chr_map_q_input(chrmaptab maptab, const char **from, int len, int first)
Definition charmap.c:207
const char * CHR_UNKNOWN
Definition charmap.c:48
static int scan_to_utf8(yaz_iconv_t t, ucs4_t *from, size_t inlen, char *outbuf, size_t outbytesleft)
Definition charmap.c:393
const char * CHR_BASE
Definition charmap.c:51
struct chrmaptab_info * chrmaptab
Definition charmap.h:38
static void end(struct zebra_register *reg, void *set_handle)
Definition rank1.c:156
static void * begin(struct zebra_register *reg, void *class_handle, RSET rset, NMEM nmem, TERMID *terms, int numterms)
Definition rank1.c:107
char * eq[CHR_MAXEQUIV]
Definition charmap.c:87
Definition charmap.c:66
chr_t_entry ** children
Definition charmap.c:67
unsigned char ** target
Definition charmap.c:68
chr_t_entry * q_input
Definition charmap.c:56
chr_t_entry * input
Definition charmap.c:55
unsigned char * output[256]
Definition charmap.c:57
int base_uppercase
Definition charmap.c:58
chrmaptab map
Definition charmap.c:76
char string[CHR_MAXSTR+1]
Definition charmap.c:77