IDZEBRA  2.2.7
d1_absyn.c
Go to the documentation of this file.
1 /* This file is part of the Zebra server.
2  Copyright (C) Index Data
3 
4 Zebra is free software; you can redistribute it and/or modify it under
5 the terms of the GNU General Public License as published by the Free
6 Software Foundation; either version 2, or (at your option) any later
7 version.
8 
9 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
10 WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 for more details.
13 
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 
18 */
19 
20 #if HAVE_CONFIG_H
21 #include <config.h>
22 #endif
23 #include <stdio.h>
24 #include <assert.h>
25 #include <stdlib.h>
26 #include <string.h>
27 
28 #include <yaz/log.h>
29 #include <yaz/oid_db.h>
30 #include <yaz/snprintf.h>
31 #include <idzebra/data1.h>
32 #include <idzebra/recctrl.h>
33 #include <zebra_xpath.h>
34 #include <d1_absyn.h>
35 
36 #define D1_MAX_NESTING 128
37 
39  NMEM nmem;
40  int size;
41  struct data1_hash_entry **ar;
42 };
43 
45  void *clientData;
46  char *str;
48 };
49 
50 unsigned data1_hash_calc(struct data1_hash_table *ht, const char *str)
51 {
52  unsigned v = 0;
53  assert(str);
54  while (*str)
55  {
56  if (*str >= 'a' && *str <= 'z')
57  v = v*65509 + *str -'a'+10;
58  else if (*str >= 'A' && *str <= 'Z')
59  v = v*65509 + *str -'A'+10;
60  else if (*str >= '0' && *str <= '9')
61  v = v*65509 + *str -'0';
62  str++;
63  }
64  return v % ht->size;
65 }
66 
68 {
69  int i;
70  struct data1_hash_table *ht = nmem_malloc(nmem, sizeof(*ht));
71  ht->nmem = nmem;
72  ht->size = size;
73  if (ht->size <= 0)
74  ht->size = 29;
75  ht->ar = nmem_malloc(nmem, sizeof(*ht->ar) * ht->size);
76  for (i = 0; i<ht->size; i++)
77  ht->ar[i] = 0;
78  return ht;
79 }
80 
81 void data1_hash_insert(struct data1_hash_table *ht, const char *str,
82  void *clientData, int copy)
83 {
84  char *dstr = copy ? nmem_strdup(ht->nmem, str) : (char*) str;
85  if (strchr(str, '?') || strchr(str, '.'))
86  {
87  int i;
88  for (i = 0; i<ht->size; i++)
89  {
90  struct data1_hash_entry **he = &ht->ar[i];
91  for (; *he && strcmp(str, (*he)->str); he = &(*he)->next)
92  ;
93  if (!*he)
94  {
95  *he = nmem_malloc(ht->nmem, sizeof(**he));
96  (*he)->str = dstr;
97  (*he)->next = 0;
98  }
99  (*he)->clientData = clientData;
100  }
101  }
102  else
103  {
104  struct data1_hash_entry **he = &ht->ar[data1_hash_calc(ht, str)];
105  for (; *he && strcmp(str, (*he)->str); he = &(*he)->next)
106  ;
107  if (!*he)
108  {
109  *he = nmem_malloc(ht->nmem, sizeof(**he));
110  (*he)->str = dstr;
111  (*he)->next = 0;
112  }
113  (*he)->clientData = clientData;
114  }
115 }
116 
117 void *data1_hash_lookup(struct data1_hash_table *ht, const char *str)
118 {
119  struct data1_hash_entry **he = &ht->ar[data1_hash_calc(ht, str)];
120 
121  for (; *he && yaz_matchstr(str, (*he)->str); he = &(*he)->next)
122  ;
123  if (*he)
124  return (*he)->clientData;
125  return 0;
126 }
127 
128 struct data1_systag {
129  char *name;
130  char *value;
132 };
133 
135 {
136  char *name;
139 };
140 
142 {
143  char *name;
146 };
147 
149 {
150  data1_element *e = nmem_malloc(data1_nmem_get(dh), sizeof(*e));
151  e->name = 0;
152  e->tag = 0;
153  e->termlists = 0;
154  e->next = e->children = 0;
155  e->sub_name = 0;
156  e->hash = 0;
157  return e;
158 }
159 
161 {
163 
164  while (p)
165  {
166  if (!yaz_matchstr(name, p->name))
167  return p->absyn;
168  p = p->next;
169  }
170  return 0;
171 }
172 /* *ostrich*
173  We need to destroy DFAs, in xp_element (xelm) definitions
174  pop, 2002-12-13
175 */
176 
178 {
180 
181  while (p)
182  {
183  data1_absyn *abs = p->absyn;
184  if (abs)
185  {
186  data1_xpelement *xpe = abs->xp_elements;
187  while (xpe) {
188  yaz_log (YLOG_DEBUG,"Destroy xp element %s",xpe->xpath_expr);
189  if (xpe->dfa)
190  dfa_delete (&xpe->dfa);
191  xpe = xpe->next;
192  }
193  }
194  p = p->next;
195  }
196 }
197 
198 
199 void data1_absyn_trav(data1_handle dh, void *handle,
200  void (*fh)(data1_handle dh, void *h, data1_absyn *a))
201 {
203 
204  while (p)
205  {
206  (*fh)(dh, handle, p->absyn);
207  p = p->next;
208  }
209 }
210 
211 static data1_absyn *data1_read_absyn(data1_handle dh, const char *file,
212  enum DATA1_XPATH_INDEXING en);
213 
214 static data1_absyn *data1_absyn_add(data1_handle dh, const char *name,
215  enum DATA1_XPATH_INDEXING en)
216 {
217  char fname[512];
218  NMEM mem = data1_nmem_get(dh);
219 
220  data1_absyn_cache p = (data1_absyn_cache)nmem_malloc(mem, sizeof(*p));
222 
223  yaz_snprintf(fname, sizeof(fname), "%s.abs", name);
224  p->absyn = data1_read_absyn(dh, fname, en);
225  p->name = nmem_strdup(mem, name);
226  p->next = *pp;
227  *pp = p;
228  return p->absyn;
229 }
230 
232  enum DATA1_XPATH_INDEXING en)
233 {
234  data1_absyn *absyn;
235 
236  if (!(absyn = data1_absyn_search(dh, name)))
237  absyn = data1_absyn_add(dh, name, en);
238  return absyn;
239 }
240 
242 {
244 
245  while (p)
246  {
247  if (!yaz_matchstr(name, p->name))
248  return p->attset;
249  p = p->next;
250  }
251  return 0;
252 }
253 
255 {
257 
258  while (p)
259  {
260  if (p->attset->oid && !oid_oidcmp(oid, p->attset->oid))
261  return p->attset;
262  p = p->next;
263  }
264  return 0;
265 }
266 
268 {
269  NMEM mem = data1_nmem_get(dh);
270  data1_attset *attset;
271 
272  attset = data1_read_attset(dh, name);
273  if (!attset)
274  yaz_log(YLOG_WARN|YLOG_ERRNO, "Couldn't load attribute set %s", name);
275  else
276  {
278  nmem_malloc (mem, sizeof(*p));
280 
281  attset->name = p->name = nmem_strdup(mem, name);
282  p->attset = attset;
283  p->next = *pp;
284  *pp = p;
285  }
286  return attset;
287 }
288 
290 {
291  data1_attset *attset;
292 
293  if (!(attset = data1_attset_search_name(dh, name)))
294  attset = data1_attset_add(dh, name);
295  return attset;
296 }
297 
299  const char *name)
300 {
301  data1_esetname *r;
302 
303  for (r = a->esetnames; r; r = r->next)
304  if (!data1_matchstr(r->name, name))
305  return r;
306  return 0;
307 }
308 
309 /* we have multiple versions of data1_getelementbyname */
310 #define DATA1_GETELEMENTBYTAGNAME_VERSION 1
311 
313  data1_element *parent,
314  const char *tagname)
315 {
316  data1_element *r;
317  struct data1_hash_table *ht;
318 
319  /* It's now possible to have a data1 tree with no abstract syntax */
320  if (abs == 0)
321  return 0;
322 
323  if (!parent)
324  r = abs->main_elements;
325  else
326  r = parent->children;
327 
328 #if DATA1_GETELEMENTBYTAGNAME_VERSION==1
329  /* using hash search */
330  if (!r)
331  return 0;
332 
333  ht = r->hash;
334  if (!ht)
335  {
336  /* build hash table (the first time) */
337  ht = r->hash = data1_hash_open(29, data1_nmem_get(dh));
338  for (; r; r = r->next)
339  {
340  data1_name *n;
341 
342  for (n = r->tag->names; n; n = n->next)
343  data1_hash_insert(ht, n->name, r, 0);
344  }
345  }
346  return data1_hash_lookup(ht, tagname);
347 #else
348  /* using linear search */
349  for (; r; r = r->next)
350  {
351  data1_name *n;
352 
353  for (n = r->tag->names; n; n = n->next)
354  if (!data1_matchstr(tagname, n->name))
355  return r;
356  }
357  return 0;
358 #endif
359 }
360 
362  const char *name)
363 {
364  data1_element *r;
365 
366  /* It's now possible to have a data1 tree with no abstract syntax */
367  if (absyn == 0)
368  return 0;
369  for (r = absyn->main_elements; r; r = r->next)
370  if (!data1_matchstr(r->name, name))
371  return r;
372  return 0;
373 }
374 
375 
377 {
378  /* It's now possible to have a data1 tree with no abstract syntax */
379  if (absyn == 0)
380  return;
381 
382  for (; e; e = e->next)
383  {
384  if (!e->sub_name)
385  {
386  if (e->children)
387  fix_element_ref(dh, absyn, e->children);
388  }
389  else
390  {
391  data1_sub_elements *sub_e = absyn->sub_elements;
392  while (sub_e && strcmp(e->sub_name, sub_e->name))
393  sub_e = sub_e->next;
394  if (sub_e)
395  e->children = sub_e->elements;
396  else
397  yaz_log(YLOG_WARN, "Unresolved reference to sub-elements %s",
398  e->sub_name);
399  }
400  }
401 }
402 /* *ostrich*
403 
404  New function, a bit dummy now... I've seen it in zrpn.c... We should build
405  more clever regexps...
406 
407 
408  //a -> ^a/.*$
409  //a/b -> ^b/a/.*$
410  /a -> ^a/$
411  /a/b -> ^b/a/$
412 
413  / -> none
414 
415  pop, 2002-12-13
416 
417  Now [] predicates are supported
418 
419  pop, 2003-01-17
420 
421  */
422 
423 static const char * mk_xpath_regexp(data1_handle dh, const char *expr)
424 {
425  const char *p = expr;
426  int abs = 1;
427  int e = 0;
428  char *stack[32];
429  char *res_p, *res = 0;
430  size_t res_size = 1;
431 
432  if (*p != '/')
433  return ("");
434  p++;
435  if (*p == '/')
436  {
437  abs = 0;
438  p++;
439  }
440  while (*p && e < 30)
441  {
442  int is_predicate = 0;
443  char *s;
444  int i, j;
445  for (i = 0; *p && !strchr("/",*p); i++, p++)
446  ;
447  res_size += (i+3); /* we'll add / between later .. */
448  s = stack[e] = (char *) nmem_malloc(data1_nmem_get(dh), i + 1);
449  for (j = 0; j < i; j++)
450  {
451  const char *pp = p-i+j;
452  if (*pp == '[')
453  is_predicate = 1;
454  else if (*pp == ']')
455  is_predicate = 0;
456  else
457  {
458  if (!is_predicate) {
459  if (*pp == '*')
460  *s++ = '.';
461  *s++ = *pp;
462  }
463  }
464  }
465  *s = 0;
466  e++;
467  if (*p)
468  p++;
469  }
470  res_p = res = nmem_malloc(data1_nmem_get(dh), res_size + 10);
471 
472  if (stack[e-1][0] == '@') /* path/@attr spec (leaf is attribute) */
473  strcpy(res_p, "/");
474  else
475  strcpy(res_p, "[^@]*/"); /* path .. (index all cdata below it) */
476  res_p = res_p + strlen(res_p);
477  while (--e >= 0) {
478  strcpy(res_p, stack[e]);
479  strcat(res_p, "/");
480  res_p += strlen(stack[e]) + 1;
481  }
482  if (!abs)
483  {
484  strcpy(res_p, ".*");
485  res_p += 2;
486  }
487  strcpy(res_p, "$");
488  res_p++;
489  yaz_log(YLOG_DEBUG, "Got regexp: %s", res);
490  return res;
491 }
492 
494  char *cp, const char *file, int lineno,
495  const char *element_name, data1_absyn *res,
496  int xpelement,
497  data1_attset *attset)
498 {
499  data1_termlist **tp = *tpp;
500  while(1)
501  {
502  char attname[512], structure[512];
503  char *source;
504  int r, i;
505  int level = 0;
506  structure[0] = '\0';
507  for (i = 0; cp[i] && i<sizeof(attname)-1; i++)
508  if (strchr(":,", cp[i]))
509  break;
510  else
511  attname[i] = cp[i];
512  if (i == 0)
513  {
514  if (*cp)
515  yaz_log(YLOG_WARN,
516  "%s:%d: Syntax error in termlistspec '%s'",
517  file, lineno, cp);
518  break;
519  }
520  attname[i] = '\0';
521  r = 1;
522  cp += i;
523  if (*cp == ':')
524  cp++;
525 
526  for (i = 0; cp[i] && i<sizeof(structure)-1; i++)
527  if (level == 0 && strchr(",", cp[i]))
528  break;
529  else
530  {
531  structure[i] = cp[i];
532  if (cp[i] == '(')
533  level++;
534  else if (cp[i] == ')')
535  level--;
536  }
537  structure[i] = '\0';
538  if (i)
539  r = 2;
540  cp += i;
541  if (*cp)
542  cp++; /* skip , */
543 
544  *tp = (data1_termlist *)
545  nmem_malloc(data1_nmem_get(dh), sizeof(**tp));
546  (*tp)->next = 0;
547 
548  if (*attname == '!')
549  {
550  if (!xpelement && element_name)
551  strcpy(attname, element_name);
552  else if (xpelement)
553  strcpy(attname, ZEBRA_XPATH_CDATA);
554  }
555  if (attset)
556  {
557  if (!data1_getattbyname(dh, attset, attname))
558  {
559  yaz_log(YLOG_WARN, "Index '%s' not found in attset(s)",
560  attname);
561  }
562  }
563 
564  (*tp)->index_name = nmem_strdup(data1_nmem_get(dh), attname);
565  assert (*(*tp)->index_name != '!');
566  if (r == 2 && (source = strchr(structure, ':')))
567  *source++ = '\0'; /* cut off structure .. */
568  else
569  source = "data"; /* ok: default is leaf data */
570  (*tp)->source = nmem_strdup(data1_nmem_get (dh), source);
571 
572  if (r < 2) /* is the structure qualified? */
573  (*tp)->structure = "w";
574  else
575  (*tp)->structure = nmem_strdup(data1_nmem_get (dh), structure);
576  tp = &(*tp)->next;
577  }
578 
579  *tpp = tp;
580  return 0;
581 }
582 
583 /* quinn
584  * Converts a 'melm' field[$subfield] pattern to a simple xpath
585  */
586 static int melm2xpath(char *melm, char *buf)
587 {
588  char *dollar;
589  char *field = melm;
590  char *subfield;
591  char *fieldtype;
592  if ((dollar = strchr(melm, '$'))) {
593  *dollar = '\0';
594  subfield = ++dollar;
595  } else
596  subfield = "";
597  if (field[0] == '0' && field[1] == '0')
598  fieldtype = "controlfield";
599  else
600  fieldtype = "datafield";
601  yaz_snprintf(buf, 60, "/*/%s[@tag=\"%s\"]", fieldtype, field);
602  if (*subfield)
603  yaz_snprintf(buf + strlen(buf), 60, "/subfield[@code=\"%s\"]", subfield);
604  else if (field[0] != '0' || field[1] != '0')
605  strcat(buf, "/subfield");
606  yaz_log(YLOG_DEBUG, "Created xpath: '%s'", buf);
607  return 0;
608 }
609 
610 const char *data1_systag_lookup(data1_absyn *absyn, const char *tag,
611  const char *default_value)
612 {
613  struct data1_systag *p = absyn->systags;
614  for (; p; p = p->next)
615  if (!strcmp(p->name, tag))
616  return p->value;
617  return default_value;
618 }
619 
620 #define l_isspace(c) ((c) == '\t' || (c) == ' ' || (c) == '\n' || (c) == '\r')
621 
622 int read_absyn_line(FILE *f, int *lineno, char *line, int len,
623  char *argv[], int num)
624 {
625  char *p;
626  int argc;
627  int quoted = 0;
628 
629  while ((p = fgets(line, len, f)))
630  {
631  (*lineno)++;
632  while (*p && l_isspace(*p))
633  p++;
634  if (*p && *p != '#')
635  break;
636  }
637  if (!p)
638  return 0;
639 
640  for (argc = 0; *p ; argc++)
641  {
642  if (*p == '#') /* trailing comment */
643  break;
644  argv[argc] = p;
645  while (*p && !(l_isspace(*p) && !quoted)) {
646  if (*p =='"') quoted = 1 - quoted;
647  if (*p =='[') quoted = 1;
648  if (*p ==']') quoted = 0;
649  p++;
650  }
651  if (*p)
652  {
653  *(p++) = '\0';
654  while (*p && l_isspace(*p))
655  p++;
656  }
657  }
658  return argc;
659 }
660 
662 {
663  if (root->u.root.absyn)
664  return root->u.root.absyn->marc;
665  return 0;
666 }
667 
669  data1_node *root)
670 {
671  if (root->u.root.absyn)
672  return root->u.root.absyn->main_elements;
673  return 0;
674 }
675 
676 static data1_absyn *data1_read_absyn(data1_handle dh, const char *file,
677  enum DATA1_XPATH_INDEXING default_xpath)
678 {
679  data1_sub_elements *cur_elements = NULL;
680  data1_xpelement **cur_xpelement = NULL;
681  data1_attset *attset_list = data1_empty_attset(dh);
682  data1_attset_child **attset_childp = &attset_list->children;
683 
684  data1_absyn *res = 0;
685  FILE *f;
687  data1_esetname **esetpp;
688  data1_maptab **maptabp;
689  data1_marctab **marcp;
690  data1_termlist *all = 0;
691  data1_tagset **tagset_childp;
692  struct data1_systag **systagsp;
693  int level = 0;
694  int lineno = 0;
695  int argc;
696  char *argv[50], line[512];
697 
698  f = data1_path_fopen(dh, file, "r");
699 
700  res = (data1_absyn *) nmem_malloc(data1_nmem_get(dh), sizeof(*res));
701  res->name = 0;
702  res->oid = 0;
703  res->tagset = 0;
704  res->encoding = 0;
705  res->xpath_indexing =
706  (f ? DATA1_XPATH_INDEXING_DISABLE : default_xpath);
707  res->systags = 0;
708  systagsp = &res->systags;
709  tagset_childp = &res->tagset;
710 
711  res->varset = 0;
712  res->esetnames = 0;
713  esetpp = &res->esetnames;
714  res->maptabs = 0;
715  maptabp = &res->maptabs;
716  res->marc = 0;
717  marcp = &res->marc;
718  res->sub_elements = NULL;
719  res->main_elements = NULL;
720  res->xp_elements = NULL;
721  cur_xpelement = &res->xp_elements;
722 
723  while (f && (argc = read_absyn_line(f, &lineno, line, 512, argv, 50)))
724  {
725  char *cmd = *argv;
726  if (!strcmp(cmd, "elm") || !strcmp(cmd, "element"))
727  {
728  data1_element *new_element;
729  int i;
730  char *p, *sub_p, *path, *name, *termlists;
731  int type, value;
732  data1_termlist **tp;
733 
734  if (argc < 4)
735  {
736  yaz_log(YLOG_WARN, "%s:%d: Bad # of args to elm", file, lineno);
737  continue;
738  }
739  path = argv[1];
740  name = argv[2];
741  termlists = argv[3];
742 
743  if (!cur_elements)
744  {
745  cur_elements = (data1_sub_elements *)
746  nmem_malloc(data1_nmem_get(dh), sizeof(*cur_elements));
747  cur_elements->next = res->sub_elements;
748  cur_elements->elements = NULL;
749  cur_elements->name = "main";
750  res->sub_elements = cur_elements;
751 
752  level = 0;
753  ppl[level] = &cur_elements->elements;
754  }
755  p = path;
756  for (i = 1;; i++)
757  {
758  char *e;
759 
760  if ((e = strchr(p, '/')))
761  p = e+1;
762  else
763  break;
764  }
765  if (i > level+1)
766  {
767  yaz_log(YLOG_WARN, "%s:%d: Bad level increase", file, lineno);
768  fclose(f);
769  return 0;
770  }
771  level = i;
772  new_element = *ppl[level-1] = data1_mk_element(dh);
773 
774  tp = &new_element->termlists;
775  ppl[level-1] = &new_element->next;
776  ppl[level] = &new_element->children;
777 
778  /* consider subtree (if any) ... */
779  if ((sub_p = strchr (p, ':')) && sub_p[1])
780  {
781  *sub_p++ = '\0';
782  new_element->sub_name =
783  nmem_strdup (data1_nmem_get(dh), sub_p);
784  }
785  /* well-defined tag */
786  if (sscanf(p, "(%d,%d)", &type, &value) == 2)
787  {
788  if (!res->tagset)
789  {
790  yaz_log(YLOG_WARN, "%s:%d: No tagset loaded", file, lineno);
791  fclose(f);
792  return 0;
793  }
794  if (!(new_element->tag = data1_gettagbynum(dh, res->tagset,
795  type, value)))
796  {
797  yaz_log(YLOG_WARN, "%s:%d: Couldn't find tag %s in tagset",
798  file, lineno, p);
799  fclose(f);
800  return 0;
801  }
802  }
803  /* private tag */
804  else if (*p)
805  {
806  data1_tag *nt =
807  new_element->tag = (data1_tag *)
808  nmem_malloc(data1_nmem_get (dh),
809  sizeof(*new_element->tag));
810  nt->which = DATA1T_string;
811  nt->value.string = nmem_strdup(data1_nmem_get(dh), p);
812  nt->names = (data1_name *)
813  nmem_malloc(data1_nmem_get(dh),
814  sizeof(*new_element->tag->names));
815  nt->names->name = nt->value.string;
816  nt->names->next = 0;
817  nt->kind = DATA1K_string;
818  nt->next = 0;
819  nt->tagset = 0;
820  }
821  else
822  {
823  yaz_log(YLOG_WARN, "%s:%d: Bad element", file, lineno);
824  fclose(f);
825  return 0;
826  }
827  /* parse termList definitions */
828  p = termlists;
829  if (*p != '-')
830  {
831  if (parse_termlists(dh, &tp, p, file, lineno, name, res, 0,
832  attset_list))
833  {
834  fclose (f);
835  return 0;
836  }
837  *tp = all; /* append any ALL entries to the list */
838  }
839  new_element->name = nmem_strdup(data1_nmem_get(dh), name);
840  }
841  /* *ostrich*
842  New code to support xelm directive
843  for each xelm a dfa is built. xelms are stored in res->xp_elements
844 
845  maybe we should use a simple sscanf instead of dfa?
846 
847  pop, 2002-12-13
848 
849  Now [] predicates are supported. regexps and xpath structure is
850  a bit redundant, however it's comfortable later...
851 
852  pop, 2003-01-17
853  */
854 
855  else if (!strcmp(cmd, "xelm") || !strcmp(cmd, "melm")) {
856 
857  int i;
858  char *p, *xpath_expr, *termlists;
859  const char *regexp;
860  struct DFA *dfa = 0;
861  data1_termlist **tp;
862  char melm_xpath[128];
863  data1_xpelement *xp_ele = 0;
864  data1_xpelement *last_match = 0;
865 
866  if (argc != 3)
867  {
868  yaz_log(YLOG_WARN, "%s:%d: Bad # of args to %s",
869  file, lineno, cmd);
870  continue;
871  }
872 
873  if (!strcmp(cmd, "melm")) {
874  if (melm2xpath(argv[1], melm_xpath) < 0)
875  continue;
876  xpath_expr = melm_xpath;
877  } else {
878  xpath_expr = argv[1];
879  }
880  termlists = argv[2];
881  regexp = mk_xpath_regexp(dh, xpath_expr);
882 
883 #if OPTIMIZE_MELM
884  /* get last of existing regulars with same regexp */
885  for (xp_ele = res->xp_elements; xp_ele; xp_ele = xp_ele->next)
886  if (!strcmp(xp_ele->regexp, regexp))
887  last_match = xp_ele;
888 #endif
889  if (!last_match)
890  {
891  /* new regular expression . Parse + generate */
892  const char *regexp_ptr = regexp;
893 
894  dfa = dfa_init();
895  i = dfa_parse (dfa, &regexp_ptr);
896  if (i || *regexp_ptr) {
897  yaz_log(YLOG_WARN, "%s:%d: Bad xpath to xelm", file, lineno);
898  dfa_delete (&dfa);
899  continue;
900  }
901  }
902  *cur_xpelement = (data1_xpelement *)
903  nmem_malloc(data1_nmem_get(dh), sizeof(**cur_xpelement));
904  (*cur_xpelement)->next = 0;
905  (*cur_xpelement)->match_next = 0;
906  if (last_match)
907  last_match->match_next = *cur_xpelement;
908 #if OPTIMIZE_MELM
909  (*cur_xpelement)->regexp = regexp;
910 #endif
911  (*cur_xpelement)->xpath_expr = nmem_strdup(data1_nmem_get (dh),
912  xpath_expr);
913 
914  if (dfa)
915  dfa_mkstate (dfa);
916  (*cur_xpelement)->dfa = dfa;
917 
918 #ifdef ENHANCED_XELM
919  (*cur_xpelement)->xpath_len =
921  xpath_expr,
922  (*cur_xpelement)->xpath, XPATH_STEP_COUNT,
923  data1_nmem_get(dh));
924 #endif
925  (*cur_xpelement)->termlists = 0;
926  tp = &(*cur_xpelement)->termlists;
927 
928  /* parse termList definitions */
929  p = termlists;
930  if (*p != '-')
931  {
932  if (parse_termlists(dh, &tp, p, file, lineno,
933  xpath_expr, res, 1, attset_list))
934  {
935  fclose (f);
936  return 0;
937  }
938  *tp = all; /* append any ALL entries to the list */
939  }
940  cur_xpelement = &(*cur_xpelement)->next;
941  }
942  else if (!strcmp(cmd, "section"))
943  {
944  char *name;
945 
946  if (argc < 2)
947  {
948  yaz_log(YLOG_WARN, "%s:%d: Bad # of args to section",
949  file, lineno);
950  continue;
951  }
952  name = argv[1];
953 
954  cur_elements = (data1_sub_elements *)
955  nmem_malloc(data1_nmem_get(dh), sizeof(*cur_elements));
956  cur_elements->next = res->sub_elements;
957  cur_elements->elements = NULL;
958  cur_elements->name = nmem_strdup(data1_nmem_get(dh), name);
959  res->sub_elements = cur_elements;
960 
961  level = 0;
962  ppl[level] = &cur_elements->elements;
963  }
964  else if (!strcmp(cmd, "xpath"))
965  {
966  if (argc != 2)
967  {
968  yaz_log(YLOG_WARN, "%s:%d: Bad # of args to 'xpath' directive",
969  file, lineno);
970  continue;
971  }
972  if (!strcmp(argv[1], "enable"))
974  else if (!strcmp (argv[1], "disable"))
976  else
977  {
978  yaz_log(YLOG_WARN, "%s:%d: Expecting disable/enable "
979  "after 'xpath' directive", file, lineno);
980  }
981  }
982  else if (!strcmp(cmd, "all"))
983  {
984  data1_termlist **tp = &all;
985  if (all)
986  {
987  yaz_log(YLOG_WARN, "%s:%d: Too many 'all' directives - ignored",
988  file, lineno);
989  continue;
990  }
991  if (argc != 2)
992  {
993  yaz_log(YLOG_WARN, "%s:%d: Bad # of args to 'all' directive",
994  file, lineno);
995  continue;
996  }
997  if (parse_termlists(dh, &tp, argv[1], file, lineno, 0, res, 0,
998  attset_list))
999  {
1000  fclose (f);
1001  return 0;
1002  }
1003  }
1004  else if (!strcmp(cmd, "name"))
1005  {
1006  if (argc != 2)
1007  {
1008  yaz_log(YLOG_WARN, "%s:%d: Bad # of args to name directive",
1009  file, lineno);
1010  continue;
1011  }
1012  res->name = nmem_strdup(data1_nmem_get(dh), argv[1]);
1013  }
1014  else if (!strcmp(cmd, "reference"))
1015  {
1016  char *name;
1017 
1018  if (argc != 2)
1019  {
1020  yaz_log(YLOG_WARN, "%s:%d: Bad # of args to reference",
1021  file, lineno);
1022  continue;
1023  }
1024  name = argv[1];
1025  res->oid = yaz_string_to_oid_nmem(yaz_oid_std(),
1026  CLASS_SCHEMA, name,
1027  data1_nmem_get(dh));
1028  if (!res->oid)
1029  {
1030  yaz_log(YLOG_WARN, "%s:%d: Unknown tagset ref '%s'",
1031  file, lineno, name);
1032  continue;
1033  }
1034  }
1035  else if (!strcmp(cmd, "attset"))
1036  {
1037  char *name;
1038  data1_attset *attset;
1039 
1040  if (argc != 2)
1041  {
1042  yaz_log(YLOG_WARN, "%s:%d: Bad # of args to attset",
1043  file, lineno);
1044  continue;
1045  }
1046  name = argv[1];
1047  if (!(attset = data1_get_attset(dh, name)))
1048  {
1049  yaz_log(YLOG_WARN, "%s:%d: Couldn't find attset %s",
1050  file, lineno, name);
1051  continue;
1052  }
1053  *attset_childp = (data1_attset_child *)
1054  nmem_malloc(data1_nmem_get(dh), sizeof(**attset_childp));
1055  (*attset_childp)->child = attset;
1056  (*attset_childp)->next = 0;
1057  attset_childp = &(*attset_childp)->next;
1058  }
1059  else if (!strcmp(cmd, "tagset"))
1060  {
1061  char *name;
1062  int type = 0;
1063  if (argc < 2)
1064  {
1065  yaz_log(YLOG_WARN, "%s:%d: Bad # of args to tagset",
1066  file, lineno);
1067  continue;
1068  }
1069  name = argv[1];
1070  if (argc == 3)
1071  type = atoi(argv[2]);
1072  *tagset_childp = data1_read_tagset(dh, name, type);
1073  if (!(*tagset_childp))
1074  {
1075  yaz_log(YLOG_WARN, "%s:%d: Couldn't load tagset %s",
1076  file, lineno, name);
1077  continue;
1078  }
1079  tagset_childp = &(*tagset_childp)->next;
1080  }
1081  else if (!strcmp(cmd, "varset"))
1082  {
1083  char *name;
1084 
1085  if (argc != 2)
1086  {
1087  yaz_log(YLOG_WARN, "%s:%d: Bad # of args in varset",
1088  file, lineno);
1089  continue;
1090  }
1091  name = argv[1];
1092  if (!(res->varset = data1_read_varset(dh, name)))
1093  {
1094  yaz_log(YLOG_WARN, "%s:%d: Couldn't load Varset %s",
1095  file, lineno, name);
1096  continue;
1097  }
1098  }
1099  else if (!strcmp(cmd, "esetname"))
1100  {
1101  char *name, *fname;
1102 
1103  if (argc != 3)
1104  {
1105  yaz_log(YLOG_WARN, "%s:%d: Bad # of args in esetname",
1106  file, lineno);
1107  continue;
1108  }
1109  name = argv[1];
1110  fname = argv[2];
1111 
1112  *esetpp = (data1_esetname *)
1113  nmem_malloc(data1_nmem_get(dh), sizeof(**esetpp));
1114  (*esetpp)->name = nmem_strdup(data1_nmem_get(dh), name);
1115  (*esetpp)->next = 0;
1116  if (*fname == '@')
1117  (*esetpp)->spec = 0;
1118  else if (!((*esetpp)->spec = data1_read_espec1(dh, fname)))
1119  {
1120  yaz_log(YLOG_WARN, "%s:%d: Espec-1 read failed for %s",
1121  file, lineno, fname);
1122  continue;
1123  }
1124  esetpp = &(*esetpp)->next;
1125  }
1126  else if (!strcmp(cmd, "maptab"))
1127  {
1128  char *name;
1129 
1130  if (argc != 2)
1131  {
1132  yaz_log(YLOG_WARN, "%s:%d: Bad # of args for maptab",
1133  file, lineno);
1134  continue;
1135  }
1136  name = argv[1];
1137  if (!(*maptabp = data1_read_maptab(dh, name)))
1138  {
1139  yaz_log(YLOG_WARN, "%s:%d: Couldn't load maptab %s",
1140  file, lineno, name);
1141  continue;
1142  }
1143  maptabp = &(*maptabp)->next;
1144  }
1145  else if (!strcmp(cmd, "marc"))
1146  {
1147  char *name;
1148 
1149  if (argc != 2)
1150  {
1151  yaz_log(YLOG_WARN, "%s:%d: Bad # or args for marc",
1152  file, lineno);
1153  continue;
1154  }
1155  name = argv[1];
1156  if (!(*marcp = data1_read_marctab(dh, name)))
1157  {
1158  yaz_log(YLOG_WARN, "%s:%d: Couldn't read marctab %s",
1159  file, lineno, name);
1160  continue;
1161  }
1162  marcp = &(*marcp)->next;
1163  }
1164  else if (!strcmp(cmd, "encoding"))
1165  {
1166  if (argc != 2)
1167  {
1168  yaz_log(YLOG_WARN, "%s:%d: Bad # or args for encoding",
1169  file, lineno);
1170  continue;
1171  }
1172  res->encoding = nmem_strdup(data1_nmem_get(dh), argv[1]);
1173  }
1174  else if (!strcmp(cmd, "systag"))
1175  {
1176  if (argc != 3)
1177  {
1178  yaz_log(YLOG_WARN, "%s:%d: Bad # or args for systag",
1179  file, lineno);
1180  continue;
1181  }
1182  *systagsp = nmem_malloc(data1_nmem_get(dh), sizeof(**systagsp));
1183 
1184  (*systagsp)->name = nmem_strdup(data1_nmem_get(dh), argv[1]);
1185  (*systagsp)->value = nmem_strdup(data1_nmem_get(dh), argv[2]);
1186  systagsp = &(*systagsp)->next;
1187  }
1188  else
1189  {
1190  yaz_log(YLOG_WARN, "%s:%d: Unknown directive '%s'", file,
1191  lineno, cmd);
1192  continue;
1193  }
1194  }
1195  if (f)
1196  fclose(f);
1197 
1198  for (cur_elements = res->sub_elements; cur_elements;
1199  cur_elements = cur_elements->next)
1200  {
1201  if (!strcmp(cur_elements->name, "main"))
1202  res->main_elements = cur_elements->elements;
1203  fix_element_ref(dh, res, cur_elements->elements);
1204  }
1205  *systagsp = 0;
1206  return res;
1207 }
1208 
1209 /*
1210  * Local variables:
1211  * c-basic-offset: 4
1212  * c-file-style: "Stroustrup"
1213  * indent-tabs-mode: nil
1214  * End:
1215  * vim: shiftwidth=4 tabstop=8 expandtab
1216  */
1217 
static const char * mk_xpath_regexp(data1_handle dh, const char *expr)
Definition: d1_absyn.c:423
data1_element * data1_getelementbytagname(data1_handle dh, data1_absyn *abs, data1_element *parent, const char *tagname)
Definition: d1_absyn.c:312
void data1_absyn_destroy(data1_handle dh)
Definition: d1_absyn.c:177
data1_attset * data1_attset_add(data1_handle dh, const char *name)
Definition: d1_absyn.c:267
data1_absyn * data1_get_absyn(data1_handle dh, const char *name, enum DATA1_XPATH_INDEXING en)
Definition: d1_absyn.c:231
static data1_absyn * data1_read_absyn(data1_handle dh, const char *file, enum DATA1_XPATH_INDEXING en)
Definition: d1_absyn.c:676
struct data1_hash_table * data1_hash_open(int size, NMEM nmem)
Definition: d1_absyn.c:67
data1_attset * data1_attset_search_id(data1_handle dh, const Odr_oid *oid)
Definition: d1_absyn.c:254
void data1_absyn_trav(data1_handle dh, void *handle, void(*fh)(data1_handle dh, void *h, data1_absyn *a))
Definition: d1_absyn.c:199
data1_attset * data1_attset_search_name(data1_handle dh, const char *name)
Definition: d1_absyn.c:241
data1_attset * data1_get_attset(data1_handle dh, const char *name)
Definition: d1_absyn.c:289
void data1_hash_insert(struct data1_hash_table *ht, const char *str, void *clientData, int copy)
Definition: d1_absyn.c:81
#define l_isspace(c)
Definition: d1_absyn.c:620
data1_element * data1_absyn_getelements(data1_handle dh, data1_node *root)
Definition: d1_absyn.c:668
data1_element * data1_getelementbyname(data1_handle dh, data1_absyn *absyn, const char *name)
Definition: d1_absyn.c:361
static int melm2xpath(char *melm, char *buf)
Definition: d1_absyn.c:586
void fix_element_ref(data1_handle dh, data1_absyn *absyn, data1_element *e)
Definition: d1_absyn.c:376
int read_absyn_line(FILE *f, int *lineno, char *line, int len, char *argv[], int num)
Definition: d1_absyn.c:622
static int parse_termlists(data1_handle dh, data1_termlist ***tpp, char *cp, const char *file, int lineno, const char *element_name, data1_absyn *res, int xpelement, data1_attset *attset)
Definition: d1_absyn.c:493
data1_esetname * data1_getesetbyname(data1_handle dh, data1_absyn *a, const char *name)
Definition: d1_absyn.c:298
unsigned data1_hash_calc(struct data1_hash_table *ht, const char *str)
Definition: d1_absyn.c:50
data1_element * data1_mk_element(data1_handle dh)
Definition: d1_absyn.c:148
static data1_absyn * data1_absyn_add(data1_handle dh, const char *name, enum DATA1_XPATH_INDEXING en)
Definition: d1_absyn.c:214
data1_marctab * data1_absyn_getmarctab(data1_handle dh, data1_node *root)
Definition: d1_absyn.c:661
void * data1_hash_lookup(struct data1_hash_table *ht, const char *str)
Definition: d1_absyn.c:117
const char * data1_systag_lookup(data1_absyn *absyn, const char *tag, const char *default_value)
Definition: d1_absyn.c:610
data1_absyn * data1_absyn_search(data1_handle dh, const char *name)
Definition: d1_absyn.c:160
#define D1_MAX_NESTING
Definition: d1_absyn.c:36
struct data1_absyn_cache_info * data1_absyn_cache
Definition: data1.h:118
data1_marctab * data1_read_marctab(data1_handle dh, const char *file)
Definition: d1_marc.c:38
data1_attset * data1_empty_attset(data1_handle dh)
Definition: d1_attset.c:50
@ DATA1K_string
Definition: data1.h:126
data1_attset_cache * data1_attset_cache_get(data1_handle dh)
Definition: d1_handle.c:76
FILE * data1_path_fopen(data1_handle dh, const char *file, const char *mode)
Definition: d1_handle.c:147
data1_attset * data1_read_attset(data1_handle dh, const char *file)
Definition: d1_attset.c:63
Z_Espec1 * data1_read_espec1(data1_handle dh, const char *file)
Definition: d1_espec.c:213
data1_tag * data1_gettagbynum(data1_handle dh, data1_tagset *s, int type, int value)
Definition: d1_tagset.c:64
data1_maptab * data1_read_maptab(data1_handle dh, const char *file)
Definition: d1_map.c:43
#define DATA1T_string
Definition: data1.h:205
NMEM data1_nmem_get(data1_handle dh)
Definition: d1_handle.c:66
#define data1_matchstr(s1, s2)
Definition: data1.h:36
struct data1_attset_cache_info * data1_attset_cache
Definition: data1.h:119
data1_varset * data1_read_varset(data1_handle dh, const char *file)
Definition: d1_varset.c:56
data1_att * data1_getattbyname(data1_handle dh, data1_attset *s, const char *name)
Definition: d1_attset.c:31
data1_tagset * data1_read_tagset(data1_handle dh, const char *file, int type)
Definition: d1_tagset.c:120
DATA1_XPATH_INDEXING
Definition: data1.h:347
@ DATA1_XPATH_INDEXING_ENABLE
Definition: data1.h:349
@ DATA1_XPATH_INDEXING_DISABLE
Definition: data1.h:348
data1_absyn_cache * data1_absyn_cache_get(data1_handle dh)
Definition: d1_handle.c:71
int dfa_parse(struct DFA *, const char **)
Definition: dfa.c:1121
void dfa_mkstate(struct DFA *)
Definition: dfa.c:1148
void dfa_delete(struct DFA **)
Definition: dfa.c:1158
struct DFA * dfa_init(void)
Definition: dfa.c:1092
#define ZEBRA_XPATH_CDATA
Definition: recctrl.h:39
Definition: dfa.h:53
data1_absyn * absyn
Definition: d1_absyn.c:137
data1_absyn_cache next
Definition: d1_absyn.c:138
data1_esetname * esetnames
Definition: d1_absyn.h:53
data1_varset * varset
Definition: d1_absyn.h:52
data1_sub_elements * sub_elements
Definition: d1_absyn.h:56
enum DATA1_XPATH_INDEXING xpath_indexing
Definition: d1_absyn.h:61
data1_marctab * marc
Definition: d1_absyn.h:55
data1_maptab * maptabs
Definition: d1_absyn.h:54
data1_tagset * tagset
Definition: d1_absyn.h:51
char * name
Definition: d1_absyn.h:49
char * encoding
Definition: d1_absyn.h:60
struct data1_xpelement * xp_elements
Definition: d1_absyn.h:58
data1_element * main_elements
Definition: d1_absyn.h:57
Odr_oid * oid
Definition: d1_absyn.h:50
struct data1_systag * systags
Definition: d1_absyn.h:59
data1_attset * attset
Definition: d1_absyn.c:144
data1_attset_cache next
Definition: d1_absyn.c:145
data1_attset * child
Definition: data1.h:64
data1_attset_child * next
Definition: data1.h:65
char * name
Definition: data1.h:70
data1_attset_child * children
Definition: data1.h:73
data1_attset * next
Definition: data1.h:74
Odr_oid * oid
Definition: data1.h:71
data1_tag * tag
Definition: data1.h:245
data1_termlist * termlists
Definition: data1.h:246
struct data1_element * next
Definition: data1.h:249
struct data1_element * children
Definition: data1.h:248
struct data1_hash_table * hash
Definition: data1.h:250
char * sub_name
Definition: data1.h:247
char * name
Definition: data1.h:244
struct data1_esetname * next
Definition: data1.h:163
char * name
Definition: data1.h:161
Definition: d1_absyn.c:44
void * clientData
Definition: d1_absyn.c:45
char * str
Definition: d1_absyn.c:46
struct data1_hash_entry * next
Definition: d1_absyn.c:47
struct data1_hash_entry ** ar
Definition: d1_absyn.c:41
struct data1_maptab * next
Definition: data1.h:108
struct data1_marctab * next
Definition: data1.h:156
struct data1_name * next
Definition: data1.h:115
char * name
Definition: data1.h:114
struct data1_node::@2::@3 root
union data1_node::@2 u
data1_element * elements
Definition: data1.h:256
struct data1_sub_elements * next
Definition: data1.h:255
char * name
Definition: d1_absyn.c:129
struct data1_systag * next
Definition: d1_absyn.c:131
char * value
Definition: d1_absyn.c:130
struct data1_tag * next
Definition: data1.h:215
int which
Definition: data1.h:206
data1_datatype kind
Definition: data1.h:212
struct data1_tagset * tagset
Definition: data1.h:214
data1_name * names
Definition: data1.h:203
union data1_tag::@1 value
char * string
Definition: data1.h:210
data1_tagset * next
Definition: data1.h:227
struct data1_termlist * next
Definition: data1.h:235
char * xpath_expr
Definition: d1_absyn.h:32
struct data1_xpelement * match_next
Definition: d1_absyn.h:44
const char * regexp
Definition: d1_absyn.h:41
struct DFA * dfa
Definition: d1_absyn.h:37
struct data1_xpelement * next
Definition: d1_absyn.h:39
int zebra_parse_xpath_str(const char *xpath_string, struct xpath_location_step *xpath, int max, NMEM mem)
Definition: xpath.c:162
#define XPATH_STEP_COUNT
Definition: zebra_xpath.h:25