IDZEBRA 2.2.8
d1_absyn.c
Go to the documentation of this file.
1/* This file is part of the Zebra server.
2 Copyright (C) Index Data
3
4Zebra is free software; you can redistribute it and/or modify it under
5the terms of the GNU General Public License as published by the Free
6Software Foundation; either version 2, or (at your option) any later
7version.
8
9Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
10WARRANTY; without even the implied warranty of MERCHANTABILITY or
11FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12for more details.
13
14You should have received a copy of the GNU General Public License
15along with this program; if not, write to the Free Software
16Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17
18*/
19
20#if HAVE_CONFIG_H
21#include <config.h>
22#endif
23#include <stdio.h>
24#include <assert.h>
25#include <stdlib.h>
26#include <string.h>
27
28#include <yaz/log.h>
29#include <yaz/oid_db.h>
30#include <yaz/snprintf.h>
31#include <idzebra/data1.h>
32#include <idzebra/recctrl.h>
33#include <zebra_xpath.h>
34#include <d1_absyn.h>
35
36#define D1_MAX_NESTING 128
37
39 NMEM nmem;
40 int size;
42};
43
46 char *str;
48};
49
50unsigned data1_hash_calc(struct data1_hash_table *ht, const char *str)
51{
52 unsigned v = 0;
53 assert(str);
54 while (*str)
55 {
56 if (*str >= 'a' && *str <= 'z')
57 v = v*65509 + *str -'a'+10;
58 else if (*str >= 'A' && *str <= 'Z')
59 v = v*65509 + *str -'A'+10;
60 else if (*str >= '0' && *str <= '9')
61 v = v*65509 + *str -'0';
62 str++;
63 }
64 return v % ht->size;
65}
66
68{
69 int i;
70 struct data1_hash_table *ht = nmem_malloc(nmem, sizeof(*ht));
71 ht->nmem = nmem;
72 ht->size = size;
73 if (ht->size <= 0)
74 ht->size = 29;
75 ht->ar = nmem_malloc(nmem, sizeof(*ht->ar) * ht->size);
76 for (i = 0; i<ht->size; i++)
77 ht->ar[i] = 0;
78 return ht;
79}
80
81void data1_hash_insert(struct data1_hash_table *ht, const char *str,
82 void *clientData, int copy)
83{
84 char *dstr = copy ? nmem_strdup(ht->nmem, str) : (char*) str;
85 if (strchr(str, '?') || strchr(str, '.'))
86 {
87 int i;
88 for (i = 0; i<ht->size; i++)
89 {
90 struct data1_hash_entry **he = &ht->ar[i];
91 for (; *he && strcmp(str, (*he)->str); he = &(*he)->next)
92 ;
93 if (!*he)
94 {
95 *he = nmem_malloc(ht->nmem, sizeof(**he));
96 (*he)->str = dstr;
97 (*he)->next = 0;
98 }
99 (*he)->clientData = clientData;
100 }
101 }
102 else
103 {
104 struct data1_hash_entry **he = &ht->ar[data1_hash_calc(ht, str)];
105 for (; *he && strcmp(str, (*he)->str); he = &(*he)->next)
106 ;
107 if (!*he)
108 {
109 *he = nmem_malloc(ht->nmem, sizeof(**he));
110 (*he)->str = dstr;
111 (*he)->next = 0;
112 }
113 (*he)->clientData = clientData;
114 }
115}
116
117void *data1_hash_lookup(struct data1_hash_table *ht, const char *str)
118{
119 struct data1_hash_entry **he = &ht->ar[data1_hash_calc(ht, str)];
120
121 for (; *he && yaz_matchstr(str, (*he)->str); he = &(*he)->next)
122 ;
123 if (*he)
124 return (*he)->clientData;
125 return 0;
126}
127
129 char *name;
130 char *value;
132};
133
140
147
149{
150 data1_element *e = nmem_malloc(data1_nmem_get(dh), sizeof(*e));
151 e->name = 0;
152 e->tag = 0;
153 e->termlists = 0;
154 e->next = e->children = 0;
155 e->sub_name = 0;
156 e->hash = 0;
157 return e;
158}
159
161{
163
164 while (p)
165 {
166 if (!yaz_matchstr(name, p->name))
167 return p->absyn;
168 p = p->next;
169 }
170 return 0;
171}
172/* *ostrich*
173 We need to destroy DFAs, in xp_element (xelm) definitions
174 pop, 2002-12-13
175*/
176
178{
180
181 while (p)
182 {
183 data1_absyn *abs = p->absyn;
184 if (abs)
185 {
186 data1_xpelement *xpe = abs->xp_elements;
187 while (xpe) {
188 yaz_log (YLOG_DEBUG,"Destroy xp element %s",xpe->xpath_expr);
189 if (xpe->dfa)
190 dfa_delete (&xpe->dfa);
191 xpe = xpe->next;
192 }
193 }
194 p = p->next;
195 }
196}
197
198
199void data1_absyn_trav(data1_handle dh, void *handle,
200 void (*fh)(data1_handle dh, void *h, data1_absyn *a))
201{
203
204 while (p)
205 {
206 (*fh)(dh, handle, p->absyn);
207 p = p->next;
208 }
209}
210
211static data1_absyn *data1_read_absyn(data1_handle dh, const char *file,
212 enum DATA1_XPATH_INDEXING en);
213
214static data1_absyn *data1_absyn_add(data1_handle dh, const char *name,
215 enum DATA1_XPATH_INDEXING en)
216{
217 char fname[512];
218 NMEM mem = data1_nmem_get(dh);
219
220 data1_absyn_cache p = (data1_absyn_cache)nmem_malloc(mem, sizeof(*p));
222
223 yaz_snprintf(fname, sizeof(fname), "%s.abs", name);
224 p->absyn = data1_read_absyn(dh, fname, en);
225 p->name = nmem_strdup(mem, name);
226 p->next = *pp;
227 *pp = p;
228 return p->absyn;
229}
230
232 enum DATA1_XPATH_INDEXING en)
233{
234 data1_absyn *absyn;
235
236 if (!(absyn = data1_absyn_search(dh, name)))
237 absyn = data1_absyn_add(dh, name, en);
238 return absyn;
239}
240
242{
244
245 while (p)
246 {
247 if (!yaz_matchstr(name, p->name))
248 return p->attset;
249 p = p->next;
250 }
251 return 0;
252}
253
255{
257
258 while (p)
259 {
260 if (p->attset->oid && !oid_oidcmp(oid, p->attset->oid))
261 return p->attset;
262 p = p->next;
263 }
264 return 0;
265}
266
268{
269 NMEM mem = data1_nmem_get(dh);
270 data1_attset *attset;
271
272 attset = data1_read_attset(dh, name);
273 if (!attset)
274 yaz_log(YLOG_WARN|YLOG_ERRNO, "Couldn't load attribute set %s", name);
275 else
276 {
278 nmem_malloc (mem, sizeof(*p));
280
281 attset->name = p->name = nmem_strdup(mem, name);
282 p->attset = attset;
283 p->next = *pp;
284 *pp = p;
285 }
286 return attset;
287}
288
290{
291 data1_attset *attset;
292
293 if (!(attset = data1_attset_search_name(dh, name)))
294 attset = data1_attset_add(dh, name);
295 return attset;
296}
297
299 const char *name)
300{
302
303 for (r = a->esetnames; r; r = r->next)
304 if (!data1_matchstr(r->name, name))
305 return r;
306 return 0;
307}
308
309/* we have multiple versions of data1_getelementbyname */
310#define DATA1_GETELEMENTBYTAGNAME_VERSION 1
311
313 data1_element *parent,
314 const char *tagname)
315{
316 data1_element *r;
317 struct data1_hash_table *ht;
318
319 /* It's now possible to have a data1 tree with no abstract syntax */
320 if (abs == 0)
321 return 0;
322
323 if (!parent)
324 r = abs->main_elements;
325 else
326 r = parent->children;
327
328#if DATA1_GETELEMENTBYTAGNAME_VERSION==1
329 /* using hash search */
330 if (!r)
331 return 0;
332
333 ht = r->hash;
334 if (!ht)
335 {
336 /* build hash table (the first time) */
337 ht = r->hash = data1_hash_open(29, data1_nmem_get(dh));
338 for (; r; r = r->next)
339 {
340 data1_name *n;
341
342 for (n = r->tag->names; n; n = n->next)
343 data1_hash_insert(ht, n->name, r, 0);
344 }
345 }
346 return data1_hash_lookup(ht, tagname);
347#else
348 /* using linear search */
349 for (; r; r = r->next)
350 {
351 data1_name *n;
352
353 for (n = r->tag->names; n; n = n->next)
354 if (!data1_matchstr(tagname, n->name))
355 return r;
356 }
357 return 0;
358#endif
359}
360
362 const char *name)
363{
364 data1_element *r;
365
366 /* It's now possible to have a data1 tree with no abstract syntax */
367 if (absyn == 0)
368 return 0;
369 for (r = absyn->main_elements; r; r = r->next)
370 if (!data1_matchstr(r->name, name))
371 return r;
372 return 0;
373}
374
375
377{
378 /* It's now possible to have a data1 tree with no abstract syntax */
379 if (absyn == 0)
380 return;
381
382 for (; e; e = e->next)
383 {
384 if (!e->sub_name)
385 {
386 if (e->children)
387 fix_element_ref(dh, absyn, e->children);
388 }
389 else
390 {
391 data1_sub_elements *sub_e = absyn->sub_elements;
392 while (sub_e && strcmp(e->sub_name, sub_e->name))
393 sub_e = sub_e->next;
394 if (sub_e)
395 e->children = sub_e->elements;
396 else
397 yaz_log(YLOG_WARN, "Unresolved reference to sub-elements %s",
398 e->sub_name);
399 }
400 }
401}
402/* *ostrich*
403
404 New function, a bit dummy now... I've seen it in zrpn.c... We should build
405 more clever regexps...
406
407
408 //a -> ^a/.*$
409 //a/b -> ^b/a/.*$
410 /a -> ^a/$
411 /a/b -> ^b/a/$
412
413 / -> none
414
415 pop, 2002-12-13
416
417 Now [] predicates are supported
418
419 pop, 2003-01-17
420
421 */
422
423static const char * mk_xpath_regexp(data1_handle dh, const char *expr)
424{
425 const char *p = expr;
426 int abs = 1;
427 int e = 0;
428 char *stack[32];
429 char *res_p, *res = 0;
430 size_t res_size = 1;
431
432 if (*p != '/')
433 return ("");
434 p++;
435 if (*p == '/')
436 {
437 abs = 0;
438 p++;
439 }
440 while (*p && e < 30)
441 {
442 int is_predicate = 0;
443 char *s;
444 int i, j;
445 for (i = 0; *p && !strchr("/",*p); i++, p++)
446 ;
447 res_size += (i+3); /* we'll add / between later .. */
448 s = stack[e] = (char *) nmem_malloc(data1_nmem_get(dh), i + 1);
449 for (j = 0; j < i; j++)
450 {
451 const char *pp = p-i+j;
452 if (*pp == '[')
453 is_predicate = 1;
454 else if (*pp == ']')
455 is_predicate = 0;
456 else
457 {
458 if (!is_predicate) {
459 if (*pp == '*')
460 *s++ = '.';
461 *s++ = *pp;
462 }
463 }
464 }
465 *s = 0;
466 e++;
467 if (*p)
468 p++;
469 }
470 res_p = res = nmem_malloc(data1_nmem_get(dh), res_size + 10);
471
472 if (stack[e-1][0] == '@') /* path/@attr spec (leaf is attribute) */
473 strcpy(res_p, "/");
474 else
475 strcpy(res_p, "[^@]*/"); /* path .. (index all cdata below it) */
476 res_p = res_p + strlen(res_p);
477 while (--e >= 0) {
478 strcpy(res_p, stack[e]);
479 strcat(res_p, "/");
480 res_p += strlen(stack[e]) + 1;
481 }
482 if (!abs)
483 {
484 strcpy(res_p, ".*");
485 res_p += 2;
486 }
487 strcpy(res_p, "$");
488 res_p++;
489 yaz_log(YLOG_DEBUG, "Got regexp: %s", res);
490 return res;
491}
492
494 char *cp, const char *file, int lineno,
495 const char *element_name, data1_absyn *res,
496 int xpelement,
497 data1_attset *attset)
498{
499 data1_termlist **tp = *tpp;
500 while(1)
501 {
502 char attname[512], structure[512];
503 char *source;
504 int r, i;
505 int level = 0;
506 structure[0] = '\0';
507 for (i = 0; cp[i] && i<sizeof(attname)-1; i++)
508 if (strchr(":,", cp[i]))
509 break;
510 else
511 attname[i] = cp[i];
512 if (i == 0)
513 {
514 if (*cp)
515 yaz_log(YLOG_WARN,
516 "%s:%d: Syntax error in termlistspec '%s'",
517 file, lineno, cp);
518 break;
519 }
520 attname[i] = '\0';
521 r = 1;
522 cp += i;
523 if (*cp == ':')
524 cp++;
525
526 for (i = 0; cp[i] && i<sizeof(structure)-1; i++)
527 if (level == 0 && strchr(",", cp[i]))
528 break;
529 else
530 {
531 structure[i] = cp[i];
532 if (cp[i] == '(')
533 level++;
534 else if (cp[i] == ')')
535 level--;
536 }
537 structure[i] = '\0';
538 if (i)
539 r = 2;
540 cp += i;
541 if (*cp)
542 cp++; /* skip , */
543
544 *tp = (data1_termlist *)
545 nmem_malloc(data1_nmem_get(dh), sizeof(**tp));
546 (*tp)->next = 0;
547
548 if (*attname == '!')
549 {
550 if (!xpelement && element_name)
551 strcpy(attname, element_name);
552 else if (xpelement)
553 strcpy(attname, ZEBRA_XPATH_CDATA);
554 }
555 if (attset)
556 {
557 if (!data1_getattbyname(dh, attset, attname))
558 {
559 yaz_log(YLOG_WARN, "Index '%s' not found in attset(s)",
560 attname);
561 }
562 }
563
564 (*tp)->index_name = nmem_strdup(data1_nmem_get(dh), attname);
565 assert (*(*tp)->index_name != '!');
566 if (r == 2 && (source = strchr(structure, ':')))
567 *source++ = '\0'; /* cut off structure .. */
568 else
569 source = "data"; /* ok: default is leaf data */
570 (*tp)->source = nmem_strdup(data1_nmem_get (dh), source);
571
572 if (r < 2) /* is the structure qualified? */
573 (*tp)->structure = "w";
574 else
575 (*tp)->structure = nmem_strdup(data1_nmem_get (dh), structure);
576 tp = &(*tp)->next;
577 }
578
579 *tpp = tp;
580 return 0;
581}
582
583/* quinn
584 * Converts a 'melm' field[$subfield] pattern to a simple xpath
585 */
586static int melm2xpath(char *melm, char *buf)
587{
588 char *dollar;
589 char *field = melm;
590 char *subfield;
591 char *fieldtype;
592 if ((dollar = strchr(melm, '$'))) {
593 *dollar = '\0';
594 subfield = ++dollar;
595 } else
596 subfield = "";
597 if (field[0] == '0' && field[1] == '0')
598 fieldtype = "controlfield";
599 else
600 fieldtype = "datafield";
601 yaz_snprintf(buf, 60, "/*/%s[@tag=\"%s\"]", fieldtype, field);
602 if (*subfield)
603 yaz_snprintf(buf + strlen(buf), 60, "/subfield[@code=\"%s\"]", subfield);
604 else if (field[0] != '0' || field[1] != '0')
605 strcat(buf, "/subfield");
606 yaz_log(YLOG_DEBUG, "Created xpath: '%s'", buf);
607 return 0;
608}
609
610const char *data1_systag_lookup(data1_absyn *absyn, const char *tag,
611 const char *default_value)
612{
613 struct data1_systag *p = absyn->systags;
614 for (; p; p = p->next)
615 if (!strcmp(p->name, tag))
616 return p->value;
617 return default_value;
618}
619
620#define l_isspace(c) ((c) == '\t' || (c) == ' ' || (c) == '\n' || (c) == '\r')
621
622int read_absyn_line(FILE *f, int *lineno, char *line, int len,
623 char *argv[], int num)
624{
625 char *p;
626 int argc;
627 int quoted = 0;
628
629 while ((p = fgets(line, len, f)))
630 {
631 (*lineno)++;
632 while (*p && l_isspace(*p))
633 p++;
634 if (*p && *p != '#')
635 break;
636 }
637 if (!p)
638 return 0;
639
640 for (argc = 0; *p ; argc++)
641 {
642 if (*p == '#') /* trailing comment */
643 break;
644 argv[argc] = p;
645 while (*p && !(l_isspace(*p) && !quoted)) {
646 if (*p =='"') quoted = 1 - quoted;
647 if (*p =='[') quoted = 1;
648 if (*p ==']') quoted = 0;
649 p++;
650 }
651 if (*p)
652 {
653 *(p++) = '\0';
654 while (*p && l_isspace(*p))
655 p++;
656 }
657 }
658 return argc;
659}
660
662{
663 if (root->u.root.absyn)
664 return root->u.root.absyn->marc;
665 return 0;
666}
667
669 data1_node *root)
670{
671 if (root->u.root.absyn)
672 return root->u.root.absyn->main_elements;
673 return 0;
674}
675
676static data1_absyn *data1_read_absyn(data1_handle dh, const char *file,
677 enum DATA1_XPATH_INDEXING default_xpath)
678{
679 data1_sub_elements *cur_elements = NULL;
680 data1_xpelement **cur_xpelement = NULL;
681 data1_attset *attset_list = data1_empty_attset(dh);
682 data1_attset_child **attset_childp = &attset_list->children;
683
684 data1_absyn *res = 0;
685 FILE *f;
687 data1_esetname **esetpp;
688 data1_maptab **maptabp;
689 data1_marctab **marcp;
690 data1_termlist *all = 0;
691 data1_tagset **tagset_childp;
692 struct data1_systag **systagsp;
693 int level = 0;
694 int lineno = 0;
695 int argc;
696 char *argv[50], line[512];
697
698 f = data1_path_fopen(dh, file, "r");
699
700 res = (data1_absyn *) nmem_malloc(data1_nmem_get(dh), sizeof(*res));
701 res->name = 0;
702 res->oid = 0;
703 res->tagset = 0;
704 res->encoding = 0;
705 res->xpath_indexing =
706 (f ? DATA1_XPATH_INDEXING_DISABLE : default_xpath);
707 res->systags = 0;
708 systagsp = &res->systags;
709 tagset_childp = &res->tagset;
710
711 res->varset = 0;
712 res->esetnames = 0;
713 esetpp = &res->esetnames;
714 res->maptabs = 0;
715 maptabp = &res->maptabs;
716 res->marc = 0;
717 marcp = &res->marc;
718 res->sub_elements = NULL;
719 res->main_elements = NULL;
720 res->xp_elements = NULL;
721 cur_xpelement = &res->xp_elements;
722
723 while (f && (argc = read_absyn_line(f, &lineno, line, 512, argv, 50)))
724 {
725 char *cmd = *argv;
726 if (!strcmp(cmd, "elm") || !strcmp(cmd, "element"))
727 {
728 data1_element *new_element;
729 int i;
730 char *p, *sub_p, *path, *name, *termlists;
731 int type, value;
732 data1_termlist **tp;
733
734 if (argc < 4)
735 {
736 yaz_log(YLOG_WARN, "%s:%d: Bad # of args to elm", file, lineno);
737 continue;
738 }
739 path = argv[1];
740 name = argv[2];
741 termlists = argv[3];
742
743 if (!cur_elements)
744 {
745 cur_elements = (data1_sub_elements *)
746 nmem_malloc(data1_nmem_get(dh), sizeof(*cur_elements));
747 cur_elements->next = res->sub_elements;
748 cur_elements->elements = NULL;
749 cur_elements->name = "main";
750 res->sub_elements = cur_elements;
751
752 level = 0;
753 ppl[level] = &cur_elements->elements;
754 }
755 p = path;
756 for (i = 1;; i++)
757 {
758 char *e;
759
760 if ((e = strchr(p, '/')))
761 p = e+1;
762 else
763 break;
764 }
765 if (i > level+1)
766 {
767 yaz_log(YLOG_WARN, "%s:%d: Bad level increase", file, lineno);
768 fclose(f);
769 return 0;
770 }
771 level = i;
772 new_element = *ppl[level-1] = data1_mk_element(dh);
773
774 tp = &new_element->termlists;
775 ppl[level-1] = &new_element->next;
776 ppl[level] = &new_element->children;
777
778 /* consider subtree (if any) ... */
779 if ((sub_p = strchr (p, ':')) && sub_p[1])
780 {
781 *sub_p++ = '\0';
782 new_element->sub_name =
783 nmem_strdup (data1_nmem_get(dh), sub_p);
784 }
785 /* well-defined tag */
786 if (sscanf(p, "(%d,%d)", &type, &value) == 2)
787 {
788 if (!res->tagset)
789 {
790 yaz_log(YLOG_WARN, "%s:%d: No tagset loaded", file, lineno);
791 fclose(f);
792 return 0;
793 }
794 if (!(new_element->tag = data1_gettagbynum(dh, res->tagset,
795 type, value)))
796 {
797 yaz_log(YLOG_WARN, "%s:%d: Couldn't find tag %s in tagset",
798 file, lineno, p);
799 fclose(f);
800 return 0;
801 }
802 }
803 /* private tag */
804 else if (*p)
805 {
806 data1_tag *nt =
807 new_element->tag = (data1_tag *)
808 nmem_malloc(data1_nmem_get (dh),
809 sizeof(*new_element->tag));
810 nt->which = DATA1T_string;
811 nt->value.string = nmem_strdup(data1_nmem_get(dh), p);
812 nt->names = (data1_name *)
813 nmem_malloc(data1_nmem_get(dh),
814 sizeof(*new_element->tag->names));
815 nt->names->name = nt->value.string;
816 nt->names->next = 0;
817 nt->kind = DATA1K_string;
818 nt->next = 0;
819 nt->tagset = 0;
820 }
821 else
822 {
823 yaz_log(YLOG_WARN, "%s:%d: Bad element", file, lineno);
824 fclose(f);
825 return 0;
826 }
827 /* parse termList definitions */
828 p = termlists;
829 if (*p != '-')
830 {
831 if (parse_termlists(dh, &tp, p, file, lineno, name, res, 0,
832 attset_list))
833 {
834 fclose (f);
835 return 0;
836 }
837 *tp = all; /* append any ALL entries to the list */
838 }
839 new_element->name = nmem_strdup(data1_nmem_get(dh), name);
840 }
841 /* *ostrich*
842 New code to support xelm directive
843 for each xelm a dfa is built. xelms are stored in res->xp_elements
844
845 maybe we should use a simple sscanf instead of dfa?
846
847 pop, 2002-12-13
848
849 Now [] predicates are supported. regexps and xpath structure is
850 a bit redundant, however it's comfortable later...
851
852 pop, 2003-01-17
853 */
854
855 else if (!strcmp(cmd, "xelm") || !strcmp(cmd, "melm")) {
856
857 int i;
858 char *p, *xpath_expr, *termlists;
859 const char *regexp;
860 struct DFA *dfa = 0;
861 data1_termlist **tp;
862 char melm_xpath[128];
863 data1_xpelement *xp_ele = 0;
864 data1_xpelement *last_match = 0;
865
866 if (argc != 3)
867 {
868 yaz_log(YLOG_WARN, "%s:%d: Bad # of args to %s",
869 file, lineno, cmd);
870 continue;
871 }
872
873 if (!strcmp(cmd, "melm")) {
874 if (melm2xpath(argv[1], melm_xpath) < 0)
875 continue;
876 xpath_expr = melm_xpath;
877 } else {
878 xpath_expr = argv[1];
879 }
880 termlists = argv[2];
881 regexp = mk_xpath_regexp(dh, xpath_expr);
882
883#if OPTIMIZE_MELM
884 /* get last of existing regulars with same regexp */
885 for (xp_ele = res->xp_elements; xp_ele; xp_ele = xp_ele->next)
886 if (!strcmp(xp_ele->regexp, regexp))
887 last_match = xp_ele;
888#endif
889 if (!last_match)
890 {
891 /* new regular expression . Parse + generate */
892 const char *regexp_ptr = regexp;
893
894 dfa = dfa_init();
895 i = dfa_parse (dfa, &regexp_ptr);
896 if (i || *regexp_ptr) {
897 yaz_log(YLOG_WARN, "%s:%d: Bad xpath to xelm", file, lineno);
898 dfa_delete (&dfa);
899 continue;
900 }
901 }
902 *cur_xpelement = (data1_xpelement *)
903 nmem_malloc(data1_nmem_get(dh), sizeof(**cur_xpelement));
904 (*cur_xpelement)->next = 0;
905 (*cur_xpelement)->match_next = 0;
906 if (last_match)
907 last_match->match_next = *cur_xpelement;
908#if OPTIMIZE_MELM
909 (*cur_xpelement)->regexp = regexp;
910#endif
911 (*cur_xpelement)->xpath_expr = nmem_strdup(data1_nmem_get (dh),
912 xpath_expr);
913
914 if (dfa)
915 dfa_mkstate (dfa);
916 (*cur_xpelement)->dfa = dfa;
917
918#ifdef ENHANCED_XELM
919 (*cur_xpelement)->xpath_len =
921 xpath_expr,
922 (*cur_xpelement)->xpath, XPATH_STEP_COUNT,
923 data1_nmem_get(dh));
924#endif
925 (*cur_xpelement)->termlists = 0;
926 tp = &(*cur_xpelement)->termlists;
927
928 /* parse termList definitions */
929 p = termlists;
930 if (*p != '-')
931 {
932 if (parse_termlists(dh, &tp, p, file, lineno,
933 xpath_expr, res, 1, attset_list))
934 {
935 fclose (f);
936 return 0;
937 }
938 *tp = all; /* append any ALL entries to the list */
939 }
940 cur_xpelement = &(*cur_xpelement)->next;
941 }
942 else if (!strcmp(cmd, "section"))
943 {
944 char *name;
945
946 if (argc < 2)
947 {
948 yaz_log(YLOG_WARN, "%s:%d: Bad # of args to section",
949 file, lineno);
950 continue;
951 }
952 name = argv[1];
953
954 cur_elements = (data1_sub_elements *)
955 nmem_malloc(data1_nmem_get(dh), sizeof(*cur_elements));
956 cur_elements->next = res->sub_elements;
957 cur_elements->elements = NULL;
958 cur_elements->name = nmem_strdup(data1_nmem_get(dh), name);
959 res->sub_elements = cur_elements;
960
961 level = 0;
962 ppl[level] = &cur_elements->elements;
963 }
964 else if (!strcmp(cmd, "xpath"))
965 {
966 if (argc != 2)
967 {
968 yaz_log(YLOG_WARN, "%s:%d: Bad # of args to 'xpath' directive",
969 file, lineno);
970 continue;
971 }
972 if (!strcmp(argv[1], "enable"))
974 else if (!strcmp (argv[1], "disable"))
976 else
977 {
978 yaz_log(YLOG_WARN, "%s:%d: Expecting disable/enable "
979 "after 'xpath' directive", file, lineno);
980 }
981 }
982 else if (!strcmp(cmd, "all"))
983 {
984 data1_termlist **tp = &all;
985 if (all)
986 {
987 yaz_log(YLOG_WARN, "%s:%d: Too many 'all' directives - ignored",
988 file, lineno);
989 continue;
990 }
991 if (argc != 2)
992 {
993 yaz_log(YLOG_WARN, "%s:%d: Bad # of args to 'all' directive",
994 file, lineno);
995 continue;
996 }
997 if (parse_termlists(dh, &tp, argv[1], file, lineno, 0, res, 0,
998 attset_list))
999 {
1000 fclose (f);
1001 return 0;
1002 }
1003 }
1004 else if (!strcmp(cmd, "name"))
1005 {
1006 if (argc != 2)
1007 {
1008 yaz_log(YLOG_WARN, "%s:%d: Bad # of args to name directive",
1009 file, lineno);
1010 continue;
1011 }
1012 res->name = nmem_strdup(data1_nmem_get(dh), argv[1]);
1013 }
1014 else if (!strcmp(cmd, "reference"))
1015 {
1016 char *name;
1017
1018 if (argc != 2)
1019 {
1020 yaz_log(YLOG_WARN, "%s:%d: Bad # of args to reference",
1021 file, lineno);
1022 continue;
1023 }
1024 name = argv[1];
1025 res->oid = yaz_string_to_oid_nmem(yaz_oid_std(),
1026 CLASS_SCHEMA, name,
1027 data1_nmem_get(dh));
1028 if (!res->oid)
1029 {
1030 yaz_log(YLOG_WARN, "%s:%d: Unknown tagset ref '%s'",
1031 file, lineno, name);
1032 continue;
1033 }
1034 }
1035 else if (!strcmp(cmd, "attset"))
1036 {
1037 char *name;
1038 data1_attset *attset;
1039
1040 if (argc != 2)
1041 {
1042 yaz_log(YLOG_WARN, "%s:%d: Bad # of args to attset",
1043 file, lineno);
1044 continue;
1045 }
1046 name = argv[1];
1047 if (!(attset = data1_get_attset(dh, name)))
1048 {
1049 yaz_log(YLOG_WARN, "%s:%d: Couldn't find attset %s",
1050 file, lineno, name);
1051 continue;
1052 }
1053 *attset_childp = (data1_attset_child *)
1054 nmem_malloc(data1_nmem_get(dh), sizeof(**attset_childp));
1055 (*attset_childp)->child = attset;
1056 (*attset_childp)->next = 0;
1057 attset_childp = &(*attset_childp)->next;
1058 }
1059 else if (!strcmp(cmd, "tagset"))
1060 {
1061 char *name;
1062 int type = 0;
1063 if (argc < 2)
1064 {
1065 yaz_log(YLOG_WARN, "%s:%d: Bad # of args to tagset",
1066 file, lineno);
1067 continue;
1068 }
1069 name = argv[1];
1070 if (argc == 3)
1071 type = atoi(argv[2]);
1072 *tagset_childp = data1_read_tagset(dh, name, type);
1073 if (!(*tagset_childp))
1074 {
1075 yaz_log(YLOG_WARN, "%s:%d: Couldn't load tagset %s",
1076 file, lineno, name);
1077 continue;
1078 }
1079 tagset_childp = &(*tagset_childp)->next;
1080 }
1081 else if (!strcmp(cmd, "varset"))
1082 {
1083 char *name;
1084
1085 if (argc != 2)
1086 {
1087 yaz_log(YLOG_WARN, "%s:%d: Bad # of args in varset",
1088 file, lineno);
1089 continue;
1090 }
1091 name = argv[1];
1092 if (!(res->varset = data1_read_varset(dh, name)))
1093 {
1094 yaz_log(YLOG_WARN, "%s:%d: Couldn't load Varset %s",
1095 file, lineno, name);
1096 continue;
1097 }
1098 }
1099 else if (!strcmp(cmd, "esetname"))
1100 {
1101 char *name, *fname;
1102
1103 if (argc != 3)
1104 {
1105 yaz_log(YLOG_WARN, "%s:%d: Bad # of args in esetname",
1106 file, lineno);
1107 continue;
1108 }
1109 name = argv[1];
1110 fname = argv[2];
1111
1112 *esetpp = (data1_esetname *)
1113 nmem_malloc(data1_nmem_get(dh), sizeof(**esetpp));
1114 (*esetpp)->name = nmem_strdup(data1_nmem_get(dh), name);
1115 (*esetpp)->next = 0;
1116 if (*fname == '@')
1117 (*esetpp)->spec = 0;
1118 else if (!((*esetpp)->spec = data1_read_espec1(dh, fname)))
1119 {
1120 yaz_log(YLOG_WARN, "%s:%d: Espec-1 read failed for %s",
1121 file, lineno, fname);
1122 continue;
1123 }
1124 esetpp = &(*esetpp)->next;
1125 }
1126 else if (!strcmp(cmd, "maptab"))
1127 {
1128 char *name;
1129
1130 if (argc != 2)
1131 {
1132 yaz_log(YLOG_WARN, "%s:%d: Bad # of args for maptab",
1133 file, lineno);
1134 continue;
1135 }
1136 name = argv[1];
1137 if (!(*maptabp = data1_read_maptab(dh, name)))
1138 {
1139 yaz_log(YLOG_WARN, "%s:%d: Couldn't load maptab %s",
1140 file, lineno, name);
1141 continue;
1142 }
1143 maptabp = &(*maptabp)->next;
1144 }
1145 else if (!strcmp(cmd, "marc"))
1146 {
1147 char *name;
1148
1149 if (argc != 2)
1150 {
1151 yaz_log(YLOG_WARN, "%s:%d: Bad # or args for marc",
1152 file, lineno);
1153 continue;
1154 }
1155 name = argv[1];
1156 if (!(*marcp = data1_read_marctab(dh, name)))
1157 {
1158 yaz_log(YLOG_WARN, "%s:%d: Couldn't read marctab %s",
1159 file, lineno, name);
1160 continue;
1161 }
1162 marcp = &(*marcp)->next;
1163 }
1164 else if (!strcmp(cmd, "encoding"))
1165 {
1166 if (argc != 2)
1167 {
1168 yaz_log(YLOG_WARN, "%s:%d: Bad # or args for encoding",
1169 file, lineno);
1170 continue;
1171 }
1172 res->encoding = nmem_strdup(data1_nmem_get(dh), argv[1]);
1173 }
1174 else if (!strcmp(cmd, "systag"))
1175 {
1176 if (argc != 3)
1177 {
1178 yaz_log(YLOG_WARN, "%s:%d: Bad # or args for systag",
1179 file, lineno);
1180 continue;
1181 }
1182 *systagsp = nmem_malloc(data1_nmem_get(dh), sizeof(**systagsp));
1183
1184 (*systagsp)->name = nmem_strdup(data1_nmem_get(dh), argv[1]);
1185 (*systagsp)->value = nmem_strdup(data1_nmem_get(dh), argv[2]);
1186 systagsp = &(*systagsp)->next;
1187 }
1188 else
1189 {
1190 yaz_log(YLOG_WARN, "%s:%d: Unknown directive '%s'", file,
1191 lineno, cmd);
1192 continue;
1193 }
1194 }
1195 if (f)
1196 fclose(f);
1197
1198 for (cur_elements = res->sub_elements; cur_elements;
1199 cur_elements = cur_elements->next)
1200 {
1201 if (!strcmp(cur_elements->name, "main"))
1202 res->main_elements = cur_elements->elements;
1203 fix_element_ref(dh, res, cur_elements->elements);
1204 }
1205 *systagsp = 0;
1206 return res;
1207}
1208
1209/*
1210 * Local variables:
1211 * c-basic-offset: 4
1212 * c-file-style: "Stroustrup"
1213 * indent-tabs-mode: nil
1214 * End:
1215 * vim: shiftwidth=4 tabstop=8 expandtab
1216 */
1217
void data1_absyn_destroy(data1_handle dh)
Definition d1_absyn.c:177
static data1_absyn * data1_absyn_add(data1_handle dh, const char *name, enum DATA1_XPATH_INDEXING en)
Definition d1_absyn.c:214
struct data1_hash_table * data1_hash_open(int size, NMEM nmem)
Definition d1_absyn.c:67
static data1_absyn * data1_read_absyn(data1_handle dh, const char *file, enum DATA1_XPATH_INDEXING en)
Definition d1_absyn.c:676
const char * data1_systag_lookup(data1_absyn *absyn, const char *tag, const char *default_value)
Definition d1_absyn.c:610
static const char * mk_xpath_regexp(data1_handle dh, const char *expr)
Definition d1_absyn.c:423
void data1_absyn_trav(data1_handle dh, void *handle, void(*fh)(data1_handle dh, void *h, data1_absyn *a))
Definition d1_absyn.c:199
data1_absyn * data1_absyn_search(data1_handle dh, const char *name)
Definition d1_absyn.c:160
data1_attset * data1_attset_search_id(data1_handle dh, const Odr_oid *oid)
Definition d1_absyn.c:254
data1_esetname * data1_getesetbyname(data1_handle dh, data1_absyn *a, const char *name)
Definition d1_absyn.c:298
void data1_hash_insert(struct data1_hash_table *ht, const char *str, void *clientData, int copy)
Definition d1_absyn.c:81
#define l_isspace(c)
Definition d1_absyn.c:620
data1_element * data1_absyn_getelements(data1_handle dh, data1_node *root)
Definition d1_absyn.c:668
data1_element * data1_getelementbytagname(data1_handle dh, data1_absyn *abs, data1_element *parent, const char *tagname)
Definition d1_absyn.c:312
static int melm2xpath(char *melm, char *buf)
Definition d1_absyn.c:586
data1_element * data1_mk_element(data1_handle dh)
Definition d1_absyn.c:148
void fix_element_ref(data1_handle dh, data1_absyn *absyn, data1_element *e)
Definition d1_absyn.c:376
data1_attset * data1_attset_search_name(data1_handle dh, const char *name)
Definition d1_absyn.c:241
data1_element * data1_getelementbyname(data1_handle dh, data1_absyn *absyn, const char *name)
Definition d1_absyn.c:361
int read_absyn_line(FILE *f, int *lineno, char *line, int len, char *argv[], int num)
Definition d1_absyn.c:622
static int parse_termlists(data1_handle dh, data1_termlist ***tpp, char *cp, const char *file, int lineno, const char *element_name, data1_absyn *res, int xpelement, data1_attset *attset)
Definition d1_absyn.c:493
data1_absyn * data1_get_absyn(data1_handle dh, const char *name, enum DATA1_XPATH_INDEXING en)
Definition d1_absyn.c:231
unsigned data1_hash_calc(struct data1_hash_table *ht, const char *str)
Definition d1_absyn.c:50
data1_attset * data1_attset_add(data1_handle dh, const char *name)
Definition d1_absyn.c:267
data1_marctab * data1_absyn_getmarctab(data1_handle dh, data1_node *root)
Definition d1_absyn.c:661
data1_attset * data1_get_attset(data1_handle dh, const char *name)
Definition d1_absyn.c:289
void * data1_hash_lookup(struct data1_hash_table *ht, const char *str)
Definition d1_absyn.c:117
#define D1_MAX_NESTING
Definition d1_absyn.c:36
struct data1_absyn_cache_info * data1_absyn_cache
Definition data1.h:118
data1_tag * data1_gettagbynum(data1_handle dh, data1_tagset *s, int type, int value)
Definition d1_tagset.c:64
@ DATA1K_string
Definition data1.h:126
data1_attset_cache * data1_attset_cache_get(data1_handle dh)
Definition d1_handle.c:76
data1_att * data1_getattbyname(data1_handle dh, data1_attset *s, const char *name)
Definition d1_attset.c:31
data1_absyn_cache * data1_absyn_cache_get(data1_handle dh)
Definition d1_handle.c:71
data1_attset * data1_read_attset(data1_handle dh, const char *file)
Definition d1_attset.c:63
data1_attset * data1_empty_attset(data1_handle dh)
Definition d1_attset.c:50
Z_Espec1 * data1_read_espec1(data1_handle dh, const char *file)
Definition d1_espec.c:213
#define DATA1T_string
Definition data1.h:205
NMEM data1_nmem_get(data1_handle dh)
Definition d1_handle.c:66
#define data1_matchstr(s1, s2)
Definition data1.h:36
struct data1_attset_cache_info * data1_attset_cache
Definition data1.h:119
data1_varset * data1_read_varset(data1_handle dh, const char *file)
Definition d1_varset.c:56
data1_maptab * data1_read_maptab(data1_handle dh, const char *file)
Definition d1_map.c:43
data1_marctab * data1_read_marctab(data1_handle dh, const char *file)
Definition d1_marc.c:38
FILE * data1_path_fopen(data1_handle dh, const char *file, const char *mode)
Definition d1_handle.c:147
DATA1_XPATH_INDEXING
Definition data1.h:347
@ DATA1_XPATH_INDEXING_ENABLE
Definition data1.h:349
@ DATA1_XPATH_INDEXING_DISABLE
Definition data1.h:348
data1_tagset * data1_read_tagset(data1_handle dh, const char *file, int type)
Definition d1_tagset.c:120
int dfa_parse(struct DFA *, const char **)
Definition dfa.c:1121
void dfa_mkstate(struct DFA *)
Definition dfa.c:1148
void dfa_delete(struct DFA **)
Definition dfa.c:1158
struct DFA * dfa_init(void)
Definition dfa.c:1092
#define ZEBRA_XPATH_CDATA
Definition recctrl.h:39
Definition dfa.h:53
data1_absyn * absyn
Definition d1_absyn.c:137
data1_absyn_cache next
Definition d1_absyn.c:138
data1_esetname * esetnames
Definition d1_absyn.h:53
data1_varset * varset
Definition d1_absyn.h:52
data1_sub_elements * sub_elements
Definition d1_absyn.h:56
enum DATA1_XPATH_INDEXING xpath_indexing
Definition d1_absyn.h:61
data1_marctab * marc
Definition d1_absyn.h:55
data1_maptab * maptabs
Definition d1_absyn.h:54
data1_tagset * tagset
Definition d1_absyn.h:51
char * name
Definition d1_absyn.h:49
char * encoding
Definition d1_absyn.h:60
struct data1_xpelement * xp_elements
Definition d1_absyn.h:58
data1_element * main_elements
Definition d1_absyn.h:57
Odr_oid * oid
Definition d1_absyn.h:50
struct data1_systag * systags
Definition d1_absyn.h:59
data1_attset * attset
Definition d1_absyn.c:144
data1_attset_cache next
Definition d1_absyn.c:145
data1_attset * child
Definition data1.h:64
data1_attset_child * next
Definition data1.h:65
char * name
Definition data1.h:70
data1_attset_child * children
Definition data1.h:73
data1_attset * next
Definition data1.h:74
Odr_oid * oid
Definition data1.h:71
data1_tag * tag
Definition data1.h:245
data1_termlist * termlists
Definition data1.h:246
struct data1_element * next
Definition data1.h:249
struct data1_element * children
Definition data1.h:248
struct data1_hash_table * hash
Definition data1.h:250
char * sub_name
Definition data1.h:247
char * name
Definition data1.h:244
struct data1_esetname * next
Definition data1.h:163
char * name
Definition data1.h:161
Definition d1_absyn.c:44
void * clientData
Definition d1_absyn.c:45
char * str
Definition d1_absyn.c:46
struct data1_hash_entry * next
Definition d1_absyn.c:47
struct data1_hash_entry ** ar
Definition d1_absyn.c:41
struct data1_maptab * next
Definition data1.h:108
struct data1_marctab * next
Definition data1.h:156
struct data1_name * next
Definition data1.h:115
char * name
Definition data1.h:114
struct data1_node::@2::@3 root
struct data1_absyn * absyn
Definition data1.h:291
union data1_node::@2 u
data1_element * elements
Definition data1.h:256
struct data1_sub_elements * next
Definition data1.h:255
char * name
Definition d1_absyn.c:129
struct data1_systag * next
Definition d1_absyn.c:131
char * value
Definition d1_absyn.c:130
struct data1_tag * next
Definition data1.h:215
int which
Definition data1.h:206
data1_datatype kind
Definition data1.h:212
struct data1_tagset * tagset
Definition data1.h:214
data1_name * names
Definition data1.h:203
union data1_tag::@1 value
char * string
Definition data1.h:210
data1_tagset * next
Definition data1.h:227
struct data1_termlist * next
Definition data1.h:235
char * xpath_expr
Definition d1_absyn.h:32
struct data1_xpelement * match_next
Definition d1_absyn.h:44
struct DFA * dfa
Definition d1_absyn.h:37
struct data1_xpelement * next
Definition d1_absyn.h:39
int zebra_parse_xpath_str(const char *xpath_string, struct xpath_location_step *xpath, int max, NMEM mem)
Definition xpath.c:162
#define XPATH_STEP_COUNT
Definition zebra_xpath.h:25