IDZEBRA 2.2.8
d1_read.c
Go to the documentation of this file.
1/* This file is part of the Zebra server.
2 Copyright (C) Index Data
3
4Zebra is free software; you can redistribute it and/or modify it under
5the terms of the GNU General Public License as published by the Free
6Software Foundation; either version 2, or (at your option) any later
7version.
8
9Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
10WARRANTY; without even the implied warranty of MERCHANTABILITY or
11FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12for more details.
13
14You should have received a copy of the GNU General Public License
15along with this program; if not, write to the Free Software
16Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17
18*/
19
20
21/*
22 * This module reads "loose" SGML and converts it to data1 tree
23 */
24
25#if HAVE_CONFIG_H
26#include <config.h>
27#endif
28#include <assert.h>
29#include <stdio.h>
30#include <stdlib.h>
31
32#include <yaz/snprintf.h>
33#include <yaz/yaz-util.h>
34#include <d1_absyn.h>
35
37{
38 if (!n)
39 return 0;
40 if (data1_is_xmlmode(dh))
41 {
42 n = n->child;
43 while (n && n->which != DATA1N_tag)
44 n = n->next;
45 }
46 return n;
47}
48
49/*
50 * get the tag which is the immediate parent of this node (this may mean
51 * traversing intermediate things like variants and stuff.
52 */
54{
55 if (data1_is_xmlmode(dh))
56 {
57 for (; n && n->which != DATA1N_root; n = n->parent)
58 if (n->which == DATA1N_tag && n->parent &&
60 return n;
61 }
62 else
63 {
64 for (; n && n->which != DATA1N_root; n = n->parent)
65 if (n->which == DATA1N_tag)
66 return n;
67 }
68 return 0;
69}
70
72{
73 return data1_mk_node2(dh, m, DATA1N_root, 0);
74}
75
77{
78 return data1_mk_node2(dh, m, type, 0);
79}
80
81static void data1_init_node(data1_handle dh, data1_node *r, int type)
82{
83 r->which = type;
84 switch(type)
85 {
86 case DATA1N_tag:
87 r->u.tag.tag = 0;
88 r->u.tag.element = 0;
89 r->u.tag.no_data_requested = 0;
90 r->u.tag.node_selected = 0;
91 r->u.tag.make_variantlist = 0;
92 r->u.tag.get_bytes = -1;
93 r->u.tag.attributes = 0;
94 break;
95 case DATA1N_root:
96 r->u.root.type = 0;
97 r->u.root.absyn = 0;
98 break;
99 case DATA1N_data:
100 r->u.data.data = 0;
101 r->u.data.len = 0;
102 r->u.data.what = 0;
103 r->u.data.formatted_text = 0;
104 break;
105 case DATA1N_comment:
106 r->u.data.data = 0;
107 r->u.data.len = 0;
108 r->u.data.what = 0;
109 r->u.data.formatted_text = 1;
110 break;
111 case DATA1N_variant:
112 r->u.variant.type = 0;
113 r->u.variant.value = 0;
114 break;
116 r->u.preprocess.target = 0;
117 r->u.preprocess.attributes = 0;
118 break;
119 default:
120 yaz_log(YLOG_WARN, "data_mk_node_type. bad type = %d\n", type);
121 }
122}
123
125 data1_node *parent)
126{
127 data1_node *r = (data1_node *)nmem_malloc(m, sizeof(*r));
128 r->next = r->child = r->last_child = 0;
129
130 r->parent = parent;
131 if (!parent)
132 r->root = r;
133 else
134 {
135 r->root = parent->root;
136 if (!parent->child)
137 parent->child = parent->last_child = r;
138 else
139 parent->last_child->next = r;
140 parent->last_child = r;
141 }
142 data1_init_node(dh, r, type);
143 return r;
144}
145
147 data1_node *parent)
148{
149 return data1_append_node(dh, m, type, parent);
150}
151
153 data1_node *parent)
154{
155 data1_node *r = (data1_node *)nmem_malloc(m, sizeof(*r));
156 r->next = r->child = r->last_child = 0;
157
158 if (!parent)
159 r->root = r;
160 else
161 {
162 r->root = parent->root;
163 r->parent = parent;
164 if (!parent->child)
165 parent->last_child = r;
166 else
167 r->next = parent->child;
168 parent->child = r;
169 }
170 data1_init_node(dh, r, type);
171 return r;
172}
173
174data1_node *data1_mk_root(data1_handle dh, NMEM nmem, const char *name)
175{
176 data1_absyn *absyn = data1_get_absyn(dh, name, 1);
177 data1_node *res;
178
179 if (!absyn)
180 {
181 yaz_log(YLOG_WARN, "Unable to acquire abstract syntax " "for '%s'",
182 name);
183 /* It's now OK for a record not to have an absyn */
184 }
185 res = data1_mk_node2(dh, nmem, DATA1N_root, 0);
186 res->u.root.type = data1_insert_string(dh, res, nmem, name);
187 res->u.root.absyn = absyn;
188 return res;
189}
190
192 NMEM nmem, const char *name)
193{
196
197 res->u.root.type = data1_insert_string(dh, res, nmem, name);
198 res->u.root.absyn = absyn;
199}
200
201void data1_add_attrs(data1_handle dh, NMEM nmem, const char **attr,
202 data1_xattr **p)
203{
204 while (*p)
205 p = &(*p)->next;
206
207 while (attr && *attr)
208 {
209 *p = (data1_xattr*) nmem_malloc(nmem, sizeof(**p));
210 (*p)->name = nmem_strdup(nmem, *attr++);
211 (*p)->value = nmem_strdup(nmem, *attr++);
212 (*p)->what = DATA1I_text;
213
214 p = &(*p)->next;
215 }
216 *p = 0;
217}
218
220 const char *target,
221 const char **attr, data1_node *at)
222{
223 return data1_mk_preprocess_n(dh, nmem, target, strlen(target),
224 attr, at);
225}
226
228 const char *target, size_t len,
229 const char **attr, data1_node *at)
230{
231 data1_node *res = data1_mk_node2(dh, nmem, DATA1N_preprocess, at);
232 res->u.preprocess.target = data1_insert_string_n(dh, res, nmem,
233 target, len);
234
235 data1_add_attrs(dh, nmem, attr, &res->u.preprocess.attributes);
236 return res;
237}
238
240 const char *target,
241 const char **attr, data1_node *at)
242{
243 return data1_insert_preprocess_n(dh, nmem, target, strlen(target),
244 attr, at);
245}
246
248 const char *target, size_t len,
249 const char **attr, data1_node *at)
250{
251 data1_node *res = data1_insert_node(dh, nmem, DATA1N_preprocess, at);
252 res->u.preprocess.target = data1_insert_string_n(dh, res, nmem,
253 target, len);
254
255 data1_add_attrs(dh, nmem, attr, &res->u.preprocess.attributes);
256 return res;
257}
258
260 const char *tag, size_t len, const char **attr,
261 data1_node *at)
262{
263 data1_node *partag = get_parent_tag(dh, at);
264 data1_node *res = data1_mk_node2(dh, nmem, DATA1N_tag, at);
265 data1_element *e = 0;
266
267 res->u.tag.tag = data1_insert_string_n(dh, res, nmem, tag, len);
268
269 if (!partag) /* top tag? */
271 0 /* index as local */,
272 res->u.tag.tag);
273 else
274 {
275 /* only set element for known tags */
276 e = partag->u.tag.element;
277 if (e)
279 e, res->u.tag.tag);
280 }
281 res->u.tag.element = e;
282 data1_add_attrs(dh, nmem, attr, &res->u.tag.attributes);
283 return res;
284}
285
287 data1_node *res, const char **attr)
288{
289 if (res->which != DATA1N_tag)
290 return;
291
292 data1_add_attrs(dh, nmem, attr, &res->u.tag.attributes);
293}
294
296 const char *tag, const char **attr, data1_node *at)
297{
298 return data1_mk_tag_n(dh, nmem, tag, strlen(tag), attr, at);
299}
300
302 const char *tag)
303{
304 if (*tag == '/')
305 {
306 n = data1_get_root_tag(dh, n);
307 if (n)
308 n = n->child;
309 tag++;
310 }
311 for (; n; n = n->next)
312 if (n->which == DATA1N_tag && n->u.tag.tag &&
313 !yaz_matchstr(n->u.tag.tag, tag))
314 {
315 return n;
316 }
317 return 0;
318}
319
321 const char *tag, data1_node *at)
322{
323 data1_node *node = data1_search_tag(dh, at->child, tag);
324 if (!node)
325 node = data1_mk_tag(dh, nmem, tag, 0 /* attr */, at);
326 else
327 node->child = node->last_child = 0;
328 return node;
329}
330
332 const char *buf, size_t len, data1_node *parent)
333{
334 data1_node *res = data1_mk_node2(dh, mem, DATA1N_data, parent);
335 data1_set_data_string_n(dh, res, mem, buf, len);
336 return res;
337}
338
340 const char *buf, size_t len, data1_node *parent)
341{
342 data1_node *res = data1_mk_text_n(dh, mem, buf, len, parent);
343 res->u.data.formatted_text = 1;
344 return res;
345}
346
348 const char *buf, data1_node *parent)
349{
350 return data1_mk_text_n(dh, mem, buf, strlen(buf), parent);
351}
352
354 const char *buf, size_t len,
355 data1_node *parent)
356{
357 data1_node *res = data1_mk_node2(dh, mem, DATA1N_comment, parent);
358 data1_set_data_string_n(dh, res, mem, buf, len);
359 return res;
360}
361
363 const char *buf, data1_node *parent)
364{
365 return data1_mk_comment_n(dh, mem, buf, strlen(buf), parent);
366}
367
369 const char *str, size_t len)
370{
371 res->u.data.what = DATA1I_text;
372 res->u.data.data = data1_insert_string_n(dh, res, m, str, len);
373 res->u.data.len = len;
374}
375
377 const char *str)
378{
379 data1_set_data_string_n(dh, res, m, str, strlen(str));
380}
381
383 NMEM m, const char *str, size_t len)
384{
385 char *b;
386 if (len >= DATA1_LOCALDATA)
387 b = (char *) nmem_malloc(m, len+1);
388 else
389 b = res->lbuf;
390 memcpy(b, str, len);
391 b[len] = 0;
392 return b;
393}
394
395char *data1_insert_zint(data1_handle dh, data1_node *res, NMEM m, zint num)
396{
397 char str[64];
398
399 yaz_snprintf(str, sizeof(str), ZINT_FORMAT, num);
400 return data1_insert_string(dh, res, m, str);
401}
402
404{
405 res->u.data.what = DATA1I_num;
406 res->u.data.data = data1_insert_zint(dh, res, m, num);
407 res->u.data.len = strlen(res->u.data.data);
408}
409
411 NMEM m, const char *str)
412{
413 return data1_insert_string_n(dh, res, m, str, strlen(str));
414}
415
417 data1_node *at,
418 const char *tagname, NMEM m,
419 int local_allowed,
420 int insert_mode)
421{
422 data1_node *root = at->root;
423 data1_node *partag = get_parent_tag(dh, at);
424 data1_element *e = NULL;
425 data1_node *datn = 0;
426 data1_node *tagn = 0;
427
428 if (!partag)
429 e = data1_getelementbytagname(dh, root->u.root.absyn, 0, tagname);
430 else
431 {
432 e = partag->u.tag.element;
433 if (e)
434 e = data1_getelementbytagname(dh, root->u.root.absyn, e, tagname);
435 }
436 if (local_allowed || e)
437 {
438 if (insert_mode)
439 tagn = data1_insert_node(dh, m, DATA1N_tag, at);
440 else
441 tagn = data1_append_node(dh, m, DATA1N_tag, at);
442 tagn->u.tag.tag = data1_insert_string(dh, tagn, m, tagname);
443 tagn->u.tag.element = e;
444 datn = data1_mk_node2(dh, m, DATA1N_data, tagn);
445 }
446 return datn;
447}
448
450 const char *tagname, NMEM m)
451{
452 return data1_add_insert_taggeddata(dh, at, tagname, m, 1, 0);
453}
454
455
456/*
457 * Insert a tagged node into the record root as first child of the node at
458 * which should be root or tag itself). Returns pointer to the data node,
459 * which can then be modified.
460 */
462 const char *tagname, NMEM m)
463{
464 return data1_add_insert_taggeddata(dh, at, tagname, m, 0, 1);
465}
466
468 data1_node *at, const char *tagname,
469 NMEM m)
470{
471 return data1_add_insert_taggeddata(dh, at, tagname, m, 0, 1);
472}
473
475 data1_node *at, const char *tagname,
476 NMEM m)
477{
478 return data1_add_insert_taggeddata(dh, at, tagname, m, 1, 0);
479}
480
482 const char *tag, zint num,
483 NMEM nmem)
484{
485 data1_node *node_data;
486
487 node_data = data1_mk_tag_data(dh, at, tag, nmem);
488 if (!node_data)
489 return 0;
490 data1_set_data_zint(dh, node_data, nmem, num);
491 return node_data;
492}
493
495 const char *tag, int num,
496 NMEM nmem)
497{
498 return data1_mk_tag_data_zint(dh, at, tag, num, nmem);
499}
500
502 const char *tag, Odr_oid *oid,
503 NMEM nmem)
504{
505 data1_node *node_data;
506 char str[128], *p = str;
507 size_t i;
508
509 node_data = data1_mk_tag_data(dh, at, tag, nmem);
510 if (!node_data)
511 return 0;
512
513 for (i = 0; i < 14 && oid[i] >= 0; i++)
514 {
515 if (i > 0)
516 *p++ = '.';
517 yaz_snprintf(p, 7, "%d", oid[i]);
518 p += strlen(p);
519 }
520 data1_set_data_string(dh, node_data, nmem, str);
521 node_data->u.data.what = DATA1I_oid;
522 return node_data;
523}
524
525
527 const char *tag, const char *str,
528 NMEM nmem)
529{
530 data1_node *node_data = data1_mk_tag_data(dh, at, tag, nmem);
531 if (!node_data)
532 return 0;
533 data1_set_data_string(dh, node_data, nmem, str);
534 return node_data;
535}
536
537
539 const char *tag, const char *str,
540 NMEM nmem)
541{
542 data1_node *node = data1_search_tag(dh, at->child, tag);
543 if (!node)
544 return data1_mk_tag_data_text(dh, at, tag, str, nmem);
545 node = node->child;
546 data1_set_data_string(dh, node, nmem, str);
547 node->child = node->last_child = 0;
548 return node;
549}
550
551static int ampr(int (*get_byte)(void *fh), void *fh, int *amp)
552{
553 int c = (*get_byte)(fh);
554 *amp = 0;
555 return c;
556}
557
559 int (*get_byte)(void *fh), void *fh,
560 WRBUF wrbuf, int *ch, int *amp)
561{
562 data1_xattr *p_first = 0;
563 data1_xattr **pp = &p_first;
564 int c = *ch;
565 for (;;)
566 {
567 data1_xattr *p;
568 while (*amp || (c && d1_isspace(c)))
569 c = ampr(get_byte, fh, amp);
570 if (*amp == 0 && (c == 0 || c == '>' || c == '/'))
571 break;
572 *pp = p = (data1_xattr *) nmem_malloc(m, sizeof(*p));
573 p->next = 0;
574 pp = &p->next;
575 p->value = 0;
576 p->what = DATA1I_xmltext;
577
578 wrbuf_rewind(wrbuf);
579 while (c && c != '=' && c != '>' && c != '/' && !d1_isspace(c))
580 {
581 wrbuf_putc(wrbuf, c);
582 c = ampr(get_byte, fh, amp);
583 }
584 p->name = nmem_strdup(m, wrbuf_cstr(wrbuf));
585 if (c == '=')
586 {
587 c = ampr(get_byte, fh, amp);
588 if (*amp == 0 && c == '"')
589 {
590 c = ampr(get_byte, fh, amp);
591 wrbuf_rewind(wrbuf);
592 while (*amp || (c && c != '"'))
593 {
594 wrbuf_putc(wrbuf, c);
595 c = ampr(get_byte, fh, amp);
596 }
597 if (c)
598 c = ampr(get_byte, fh, amp);
599 }
600 else if (*amp == 0 && c == '\'')
601 {
602 c = ampr(get_byte, fh, amp);
603 wrbuf_rewind(wrbuf);
604 while (*amp || (c && c != '\''))
605 {
606 wrbuf_putc(wrbuf, c);
607 c = ampr(get_byte, fh, amp);
608 }
609 if (c)
610 c = ampr(get_byte, fh, amp);
611 }
612 else
613 {
614 wrbuf_rewind(wrbuf);
615 while (*amp || (c && c != '>' && c != '/'))
616 {
617 wrbuf_putc(wrbuf, c);
618 c = ampr(get_byte, fh, amp);
619 }
620 }
621 p->value = nmem_strdup(m, wrbuf_cstr(wrbuf));
622 }
623 }
624 *ch = c;
625 return p_first;
626}
627
628/*
629 * Ugh. Sometimes functions just grow and grow on you. This one reads a
630 * 'node' and its children.
631 */
633 int (*get_byte)(void *fh), void *fh, WRBUF wrbuf)
634{
635 data1_node *d1_stack[256];
636 data1_node *res;
637 int c, amp;
638 int level = 0;
639 int line = 1;
640
641 d1_stack[level] = 0;
642 c = ampr(get_byte, fh, &amp);
643 while (c != '\0')
644 {
645 data1_node *parent = level ? d1_stack[level-1] : 0;
646
647 if (amp == 0 && c == '<') /* beginning of tag */
648 {
649 data1_xattr *xattr;
650
651 char tag[256];
652 int null_tag = 0;
653 int end_tag = 0;
654 size_t i = 0;
655
656 c = ampr(get_byte, fh, &amp);
657 if (amp == 0 && c == '/')
658 {
659 end_tag = 1;
660 c = ampr(get_byte, fh, &amp);
661 }
662 else if (amp == 0 && c == '?')
663 {
664 int quote_mode = 0;
665 while ((c = ampr(get_byte, fh, &amp)))
666 {
667 if (amp)
668 continue;
669 if (quote_mode == 0)
670 {
671 if (c == '"')
672 quote_mode = c;
673 else if (c == '\'')
674 quote_mode = c;
675 else if (c == '>')
676 {
677 c = ampr(get_byte, fh, &amp);
678 break;
679 }
680 }
681 else
682 {
683 if (amp == 0 && c == quote_mode)
684 quote_mode = 0;
685 }
686 }
687 continue;
688 }
689 else if (amp == 0 && c == '!')
690 {
691 int c0, amp0;
692
693 wrbuf_rewind(wrbuf);
694
695 c0 = ampr(get_byte, fh, &amp0);
696 if (amp0 == 0 && c0 == '\0')
697 break;
698 c = ampr(get_byte, fh, &amp);
699
700 if (amp0 == 0 && c0 == '-' && amp == 0 && c == '-')
701 {
702 /* COMMENT: <!-- ... --> */
703 int no_dash = 0;
704
705 c = ampr(get_byte, fh, &amp);
706 while (amp || c)
707 {
708 if (amp == 0 && c == '-')
709 no_dash++;
710 else if (amp == 0 && c == '>' && no_dash >= 2)
711 {
712 if (level > 0)
713 d1_stack[level] =
715 dh, m,
716 wrbuf_buf(wrbuf), wrbuf_len(wrbuf)-2,
717 d1_stack[level-1]);
718 c = ampr(get_byte, fh, &amp); /* skip > */
719 break;
720 }
721 else
722 no_dash = 0;
723 wrbuf_putc(wrbuf, c);
724 c = ampr(get_byte, fh, &amp);
725 }
726 continue;
727 }
728 else
729 { /* DIRECTIVE: <! .. > */
730
731 int blevel = 0;
732 while (amp || c)
733 {
734 if (amp == 0 && c == '>' && blevel == 0)
735 {
736 c = ampr(get_byte, fh, &amp);
737 break;
738 }
739 if (amp == 0 && c == '[')
740 blevel++;
741 if (amp == 0 && c == ']' && blevel > 0)
742 blevel--;
743 c = ampr(get_byte, fh, &amp);
744 }
745 continue;
746 }
747 }
748 while (amp || (c && c != '>' && c != '/' && !d1_isspace(c)))
749 {
750 if (i < (sizeof(tag)-1))
751 tag[i++] = c;
752 c = ampr(get_byte, fh, &amp);
753 }
754 tag[i] = '\0';
755 xattr = data1_read_xattr(dh, m, get_byte, fh, wrbuf, &c, &amp);
756 if (amp == 0 && c == '/')
757 { /* <tag attrs/> or <tag/> */
758 null_tag = 1;
759 c = ampr(get_byte, fh, &amp);
760 }
761 if (amp || c != '>')
762 {
763 yaz_log(YLOG_WARN, "d1: %d: Malformed tag", line);
764 return 0;
765 }
766 else
767 c = ampr(get_byte, fh, &amp);
768
769 /* End tag? */
770 if (end_tag)
771 {
772 if (*tag == '\0')
773 --level; /* </> */
774 else
775 { /* </tag> */
776 int i = level;
777 while (i > 0)
778 {
779 parent = d1_stack[--i];
780 if ((parent->which == DATA1N_root &&
781 !strcmp(tag, parent->u.root.type)) ||
782 (parent->which == DATA1N_tag &&
783 !strcmp(tag, parent->u.tag.tag)))
784 {
785 level = i;
786 break;
787 }
788 }
789 if (i != level)
790 {
791 yaz_log(YLOG_WARN, "%d: no begin tag for %s",
792 line, tag);
793 break;
794 }
795 }
796 if (data1_is_xmlmode(dh))
797 {
798 if (level <= 1)
799 return d1_stack[0];
800 }
801 else
802 {
803 if (level <= 0)
804 return d1_stack[0];
805 }
806 continue;
807 }
808 else if (!strcmp(tag, "var")
809 && xattr && xattr->next && xattr->next->next
810 && xattr->value == 0
811 && xattr->next->value == 0
812 && xattr->next->next->value == 0)
813 {
814 /* <var class type value> */
815 const char *tclass = xattr->name;
816 const char *type = xattr->next->name;
817 const char *value = xattr->next->name;
818 data1_vartype *tp;
819
820 yaz_log(YLOG_LOG, "Variant class=%s type=%s value=%s",
821 tclass, type, value);
822 if (!(tp =
824 parent->root->u.root.absyn->varset,
825 tclass, type)))
826 continue;
827 /*
828 * If we're the first variant in this group, create a parent
829 * variant, and insert it before the current variant.
830 */
831 if (parent->which != DATA1N_variant)
832 {
833 res = data1_mk_node2(dh, m, DATA1N_variant, parent);
834 }
835 else
836 {
837 /*
838 * now determine if one of our ancestor triples is of
839 * same type. If so, we break here.
840 */
841 int i;
842 for (i = level-1; d1_stack[i]->which==DATA1N_variant; --i)
843 if (d1_stack[i]->u.variant.type == tp)
844 {
845 level = i;
846 break;
847 }
848 res = data1_mk_node2(dh, m, DATA1N_variant, parent);
849 res->u.variant.type = tp;
850 res->u.variant.value =
851 data1_insert_string(dh, res, m, value);
852 }
853 }
854 else
855 {
856
857 /* tag .. acquire our element in the abstract syntax */
858 if (level == 0)
859 {
860 parent = data1_mk_root(dh, m, tag);
861 res = d1_stack[level] = parent;
862
863 if (data1_is_xmlmode(dh))
864 {
865 level++;
866 res = data1_mk_tag(dh, m, tag, 0 /* attr */, parent);
867 res->u.tag.attributes = xattr;
868 }
869 }
870 else
871 {
872 res = data1_mk_tag(dh, m, tag, 0 /* attr */, parent);
873 res->u.tag.attributes = xattr;
874 }
875 }
876 d1_stack[level] = res;
877 d1_stack[level+1] = 0;
878 if (level < 250 && !null_tag)
879 ++level;
880 }
881 else /* != '<'... this is a body of text */
882 {
883 int len;
884
885 if (level == 0)
886 {
887 c = ampr(get_byte, fh, &amp);
888 continue;
889 }
890 res = data1_mk_node2(dh, m, DATA1N_data, parent);
891 res->u.data.what = DATA1I_xmltext;
892 res->u.data.formatted_text = 0;
893 d1_stack[level] = res;
894
895 wrbuf_rewind(wrbuf);
896
897 while (amp || (c && c != '<'))
898 {
899 wrbuf_putc(wrbuf, c);
900 c = ampr(get_byte, fh, &amp);
901 }
902 len = wrbuf_len(wrbuf);
903
904 /* use local buffer of nmem if too large */
905 if (len >= DATA1_LOCALDATA)
906 res->u.data.data = (char*) nmem_malloc(m, len);
907 else
908 res->u.data.data = res->lbuf;
909
910 if (len)
911 memcpy(res->u.data.data, wrbuf_buf(wrbuf), len);
912 else
913 res->u.data.data = 0;
914 res->u.data.len = len;
915 }
916 }
917 return 0;
918}
919
920int getc_mem(void *fh)
921{
922 const char **p = (const char **) fh;
923 if (**p)
924 return *(*p)++;
925 return 0;
926}
927
928data1_node *data1_read_node(data1_handle dh, const char **buf, NMEM m)
929{
930 WRBUF wrbuf = wrbuf_alloc();
931 data1_node *node;
932
933 node = data1_read_nodex(dh, m, getc_mem, (void *) (buf), wrbuf);
934 wrbuf_destroy(wrbuf);
935 return node;
936}
937
938/*
939 * Read a record in the native syntax.
940 */
942 int (*rf)(void *, char *, size_t), void *fh,
943 NMEM m)
944{
945 int *size;
946 char **buf = data1_get_read_buf(dh, &size);
947 const char *bp;
948 int rd = 0, res;
949
950 if (!*buf)
951 *buf = (char *)xmalloc(*size = 4096);
952
953 for (;;)
954 {
955 if (rd + 2048 >= *size && !(*buf =(char *)xrealloc(*buf, *size *= 2)))
956 abort();
957 if ((res = (*rf)(fh, *buf + rd, 2048)) <= 0)
958 {
959 if (!res)
960 {
961 bp = *buf;
962 (*buf)[rd] = '\0';
963 return data1_read_node(dh, &bp, m);
964 }
965 else
966 return 0;
967 }
968 rd += res;
969 }
970}
971
972data1_node *data1_read_sgml(data1_handle dh, NMEM m, const char *buf)
973{
974 const char *bp = buf;
975 return data1_read_node(dh, &bp, m);
976}
977
978
979static int conv_item(NMEM m, yaz_iconv_t t,
980 WRBUF wrbuf, char *inbuf, size_t inlen)
981{
982 wrbuf_rewind(wrbuf);
983 wrbuf_iconv_write(wrbuf, t, inbuf, inlen);
984 wrbuf_iconv_reset(wrbuf, t);
985 return 0;
986}
987
988static void data1_iconv_s(data1_handle dh, NMEM m, data1_node *n,
989 yaz_iconv_t t, WRBUF wrbuf, const char *tocode)
990{
991 for (; n; n = n->next)
992 {
993 switch (n->which)
994 {
995 case DATA1N_data:
996 case DATA1N_comment:
997 if (conv_item(m, t, wrbuf, n->u.data.data, n->u.data.len) == 0)
998 {
999 n->u.data.data =
1000 data1_insert_string_n(dh, n, m, wrbuf->buf, wrbuf->pos);
1001 n->u.data.len = wrbuf->pos;
1002 }
1003 break;
1004 case DATA1N_tag:
1005 if (conv_item(m, t, wrbuf, n->u.tag.tag, strlen(n->u.tag.tag))
1006 == 0)
1007 {
1008 n->u.tag.tag =
1009 data1_insert_string_n(dh, n, m, wrbuf->buf, wrbuf->pos);
1010 }
1011 if (n->u.tag.attributes)
1012 {
1013 data1_xattr *p;
1014 for (p = n->u.tag.attributes; p; p = p->next)
1015 {
1016 if (p->value &&
1017 conv_item(m, t, wrbuf, p->value, strlen(p->value))
1018 == 0)
1019 {
1020 p->value = nmem_strdup(m, wrbuf_cstr(wrbuf));
1021 }
1022 }
1023 }
1024 break;
1025 case DATA1N_preprocess:
1026 if (strcmp(n->u.preprocess.target, "xml") == 0)
1027 {
1029 for (; p; p = p->next)
1030 if (strcmp(p->name, "encoding") == 0)
1031 p->value = nmem_strdup(m, tocode);
1032 }
1033 break;
1034 }
1035 data1_iconv_s(dh, m, n->child, t, wrbuf, tocode);
1036 }
1037}
1038
1040{
1041 /* see if we have an xml header that specifies encoding */
1042 if (n && n->child && n->child->which == DATA1N_preprocess &&
1043 strcmp(n->child->u.preprocess.target, "xml") == 0)
1044 {
1046 for (; xp; xp = xp->next)
1047 if (strcmp(xp->name, "encoding") == 0)
1048 return xp->value;
1049 }
1050 /* no encoding in header, so see if "encoding" was specified for abs */
1051 if (n && n->which == DATA1N_root &&
1052 n->u.root.absyn && n->u.root.absyn->encoding)
1053 return n->u.root.absyn->encoding;
1054 /* none of above, return a hard coded default */
1055 return "ISO-8859-1";
1056}
1057
1059 const char *tocode,
1060 const char *fromcode)
1061{
1062 if (yaz_matchstr(tocode, fromcode))
1063 {
1064 WRBUF wrbuf = wrbuf_alloc();
1065 yaz_iconv_t t = yaz_iconv_open(tocode, fromcode);
1066 if (!t)
1067 {
1068 wrbuf_destroy(wrbuf);
1069 return -1;
1070 }
1071 data1_iconv_s(dh, m, n, t, wrbuf, tocode);
1072 yaz_iconv_close(t);
1073 wrbuf_destroy(wrbuf);
1074 }
1075 return 0;
1076}
1077
1079{
1080 for (; n; n = n->next)
1081 {
1082 if (n->which == DATA1N_data)
1083 {
1084
1085 int sz = n->u.data.len;
1086 const char *ndata = n->u.data.data;
1087 int off = 0;
1088
1089 for (off = 0; off < sz; off++)
1090 if (!d1_isspace(ndata[off]))
1091 break;
1092 sz = sz - off;
1093 ndata += off;
1094
1095 while (sz && d1_isspace(ndata[sz - 1]))
1096 sz--;
1097
1098 n->u.data.data = nmem_malloc(m, sz);
1099 n->u.data.len = sz;
1100 memcpy(n->u.data.data, ndata, sz);
1101
1102 }
1103 data1_chop_text(dh, m, n->child);
1104 }
1105}
1106
1108{
1109 for (; n; n = n->next)
1110 {
1111 if (n->which == DATA1N_data && n->next &&
1112 n->next->which == DATA1N_data)
1113 {
1114 int sz = 0;
1115 int off = 0;
1116 char *ndata;
1117 data1_node *np;
1118 for (np = n; np && np->which == DATA1N_data; np=np->next)
1119 sz += np->u.data.len;
1120 ndata = nmem_malloc(m, sz);
1121 for (np = n; np && np->which == DATA1N_data; np=np->next)
1122 {
1123 memcpy(ndata+off, np->u.data.data, np->u.data.len);
1124 off += np->u.data.len;
1125 }
1126 n->u.data.data = ndata;
1127 n->u.data.len = sz;
1128 n->next = np;
1129 if (!np && n->parent)
1130 n->parent->last_child = n;
1131
1132 }
1133 data1_concat_text(dh, m, n->child);
1134 }
1135}
1136
1137/*
1138 * Local variables:
1139 * c-basic-offset: 4
1140 * c-file-style: "Stroustrup"
1141 * indent-tabs-mode: nil
1142 * End:
1143 * vim: shiftwidth=4 tabstop=8 expandtab
1144 */
1145
data1_node * data1_read_nodex(data1_handle dh, NMEM m, int(*get_byte)(void *fh), void *fh, WRBUF wrbuf)
Definition d1_read.c:632
data1_node * data1_mk_comment_n(data1_handle dh, NMEM mem, const char *buf, size_t len, data1_node *parent)
Definition d1_read.c:353
data1_node * data1_insert_preprocess(data1_handle dh, NMEM nmem, const char *target, const char **attr, data1_node *at)
Definition d1_read.c:239
data1_node * data1_read_node(data1_handle dh, const char **buf, NMEM m)
Definition d1_read.c:928
int data1_iconv(data1_handle dh, NMEM m, data1_node *n, const char *tocode, const char *fromcode)
Definition d1_read.c:1058
data1_xattr * data1_read_xattr(data1_handle dh, NMEM m, int(*get_byte)(void *fh), void *fh, WRBUF wrbuf, int *ch, int *amp)
Definition d1_read.c:558
data1_node * data1_mk_tag_data_oid(data1_handle dh, data1_node *at, const char *tag, Odr_oid *oid, NMEM nmem)
Definition d1_read.c:501
void data1_add_attrs(data1_handle dh, NMEM nmem, const char **attr, data1_xattr **p)
Definition d1_read.c:201
data1_node * data1_mk_comment(data1_handle dh, NMEM mem, const char *buf, data1_node *parent)
Definition d1_read.c:362
data1_node * data1_mk_preprocess(data1_handle dh, NMEM nmem, const char *target, const char **attr, data1_node *at)
Definition d1_read.c:219
data1_node * data1_mk_node(data1_handle dh, NMEM m)
Definition d1_read.c:71
data1_node * data1_get_root_tag(data1_handle dh, data1_node *n)
Definition d1_read.c:36
data1_node * data1_mk_text_nf(data1_handle dh, NMEM mem, const char *buf, size_t len, data1_node *parent)
Definition d1_read.c:339
static void data1_init_node(data1_handle dh, data1_node *r, int type)
Definition d1_read.c:81
void data1_tag_add_attr(data1_handle dh, NMEM nmem, data1_node *res, const char **attr)
Definition d1_read.c:286
data1_node * data1_insert_node(data1_handle dh, NMEM m, int type, data1_node *parent)
Definition d1_read.c:152
void data1_concat_text(data1_handle dh, NMEM m, data1_node *n)
Definition d1_read.c:1107
data1_node * data1_search_tag(data1_handle dh, data1_node *n, const char *tag)
Definition d1_read.c:301
data1_node * get_parent_tag(data1_handle dh, data1_node *n)
Definition d1_read.c:53
data1_node * data1_mk_node_type(data1_handle dh, NMEM m, int type)
Definition d1_read.c:76
data1_node * data1_read_sgml(data1_handle dh, NMEM m, const char *buf)
Definition d1_read.c:972
void data1_set_data_string(data1_handle dh, data1_node *res, NMEM m, const char *str)
Definition d1_read.c:376
void data1_set_data_zint(data1_handle dh, data1_node *res, NMEM m, zint num)
Definition d1_read.c:403
data1_node * data1_mk_root(data1_handle dh, NMEM nmem, const char *name)
Definition d1_read.c:174
static void data1_iconv_s(data1_handle dh, NMEM m, data1_node *n, yaz_iconv_t t, WRBUF wrbuf, const char *tocode)
Definition d1_read.c:988
const char * data1_get_encoding(data1_handle dh, data1_node *n)
Definition d1_read.c:1039
data1_node * data1_insert_taggeddata(data1_handle dh, data1_node *root, data1_node *at, const char *tagname, NMEM m)
Definition d1_read.c:467
data1_node * data1_mk_tag_data_text(data1_handle dh, data1_node *at, const char *tag, const char *str, NMEM nmem)
Definition d1_read.c:526
data1_node * data1_mk_tag_data_wd(data1_handle dh, data1_node *at, const char *tagname, NMEM m)
Definition d1_read.c:461
char * data1_insert_string_n(data1_handle dh, data1_node *res, NMEM m, const char *str, size_t len)
Definition d1_read.c:382
data1_node * data1_mk_tag_n(data1_handle dh, NMEM nmem, const char *tag, size_t len, const char **attr, data1_node *at)
Definition d1_read.c:259
data1_node * data1_mk_tag_data(data1_handle dh, data1_node *at, const char *tagname, NMEM m)
Definition d1_read.c:449
data1_node * data1_mk_text_n(data1_handle dh, NMEM mem, const char *buf, size_t len, data1_node *parent)
Definition d1_read.c:331
char * data1_insert_string(data1_handle dh, data1_node *res, NMEM m, const char *str)
Definition d1_read.c:410
data1_node * data1_add_taggeddata(data1_handle dh, data1_node *root, data1_node *at, const char *tagname, NMEM m)
Definition d1_read.c:474
data1_node * data1_insert_preprocess_n(data1_handle dh, NMEM nmem, const char *target, size_t len, const char **attr, data1_node *at)
Definition d1_read.c:247
data1_node * data1_mk_node2(data1_handle dh, NMEM m, int type, data1_node *parent)
Definition d1_read.c:146
static data1_node * data1_add_insert_taggeddata(data1_handle dh, data1_node *at, const char *tagname, NMEM m, int local_allowed, int insert_mode)
Definition d1_read.c:416
char * data1_insert_zint(data1_handle dh, data1_node *res, NMEM m, zint num)
Definition d1_read.c:395
void data1_chop_text(data1_handle dh, NMEM m, data1_node *n)
Definition d1_read.c:1078
data1_node * data1_mk_tag(data1_handle dh, NMEM nmem, const char *tag, const char **attr, data1_node *at)
Definition d1_read.c:295
void data1_set_data_string_n(data1_handle dh, data1_node *res, NMEM m, const char *str, size_t len)
Definition d1_read.c:368
data1_node * data1_mk_text(data1_handle dh, NMEM mem, const char *buf, data1_node *parent)
Definition d1_read.c:347
data1_node * data1_mk_tag_data_int(data1_handle dh, data1_node *at, const char *tag, int num, NMEM nmem)
Definition d1_read.c:494
data1_node * data1_read_record(data1_handle dh, int(*rf)(void *, char *, size_t), void *fh, NMEM m)
Definition d1_read.c:941
int getc_mem(void *fh)
Definition d1_read.c:920
data1_node * data1_mk_preprocess_n(data1_handle dh, NMEM nmem, const char *target, size_t len, const char **attr, data1_node *at)
Definition d1_read.c:227
data1_node * data1_mk_tag_uni(data1_handle dh, NMEM nmem, const char *tag, data1_node *at)
Definition d1_read.c:320
data1_node * data1_mk_tag_data_text_uni(data1_handle dh, data1_node *at, const char *tag, const char *str, NMEM nmem)
Definition d1_read.c:538
data1_node * data1_mk_tag_data_zint(data1_handle dh, data1_node *at, const char *tag, zint num, NMEM nmem)
Definition d1_read.c:481
void data1_set_root(data1_handle dh, data1_node *res, NMEM nmem, const char *name)
Definition d1_read.c:191
data1_node * data1_append_node(data1_handle dh, NMEM m, int type, data1_node *parent)
Definition d1_read.c:124
static int ampr(int(*get_byte)(void *fh), void *fh, int *amp)
Definition d1_read.c:551
static int conv_item(NMEM m, yaz_iconv_t t, WRBUF wrbuf, char *inbuf, size_t inlen)
Definition d1_read.c:979
char ** data1_get_read_buf(data1_handle dp, int **lenp)
Definition d1_handle.c:107
#define DATA1N_variant
Definition data1.h:280
#define DATA1N_comment
Definition data1.h:282
data1_vartype * data1_getvartypebyct(data1_handle dh, data1_varset *set, const char *zclass, const char *type)
Definition d1_varset.c:30
#define DATA1I_oid
Definition data1.h:318
data1_element * data1_getelementbytagname(data1_handle dh, data1_absyn *abs, data1_element *parent, const char *tagname)
Definition d1_absyn.c:312
#define DATA1N_tag
Definition data1.h:276
#define DATA1N_data
Definition data1.h:278
#define d1_isspace(c)
Definition data1.h:31
#define DATA1N_root
Definition data1.h:274
data1_absyn * data1_get_absyn(data1_handle dh, const char *name, enum DATA1_XPATH_INDEXING en)
Definition d1_absyn.c:231
#define DATA1N_preprocess
Definition data1.h:284
#define DATA1_LOCALDATA
Definition data1.h:338
#define DATA1I_num
Definition data1.h:316
#define DATA1I_text
Definition data1.h:314
@ DATA1_XPATH_INDEXING_ENABLE
Definition data1.h:349
int data1_is_xmlmode(data1_handle dh)
Definition d1_handle.c:170
#define DATA1I_xmltext
Definition data1.h:320
data1_varset * varset
Definition d1_absyn.h:52
char * encoding
Definition d1_absyn.h:60
char lbuf[DATA1_LOCALDATA]
Definition data1.h:339
struct data1_node::@2::@7 preprocess
data1_xattr * attributes
Definition data1.h:302
struct data1_node::@2::@3 root
char * type
Definition data1.h:290
struct data1_node * parent
Definition data1.h:343
struct data1_node * child
Definition data1.h:341
char * tag
Definition data1.h:296
char * data
Definition data1.h:307
struct data1_node * next
Definition data1.h:340
char * target
Definition data1.h:333
struct data1_node * last_child
Definition data1.h:342
struct data1_absyn * absyn
Definition data1.h:291
union data1_node::@2 u
int which
Definition data1.h:285
struct data1_node::@2::@6 variant
char * value
Definition data1.h:328
char * value
Definition data1.h:261
char * name
Definition data1.h:260
struct data1_xattr * next
Definition data1.h:262
unsigned short what
Definition data1.h:263
long zint
Zebra integer.
Definition util.h:66
#define ZINT_FORMAT
Definition util.h:72