YAZ  5.34.0
cclfind.c
Go to the documentation of this file.
1 /* This file is part of the YAZ toolkit.
2  * Copyright (C) Index Data
3  * See the file LICENSE for details.
4  */
14 #if HAVE_CONFIG_H
15 #include <config.h>
16 #endif
17 
18 #include <stdlib.h>
19 #include <string.h>
20 #include <assert.h>
21 
22 #include "cclp.h"
23 
24 /* returns type of current lookahead */
25 #define KIND (cclp->look_token->kind)
26 
27 /* move one token forward */
28 #define ADVANCE cclp->look_token = cclp->look_token->next
29 
37 static int qual_val_type(ccl_qualifier_t *qa, int type, int value,
38  char **attset)
39 {
40  int i;
41  if (!qa)
42  return 0;
43  for (i = 0; qa[i]; i++)
44  {
45  int got_type = 0;
46  struct ccl_rpn_attr *q = ccl_qual_get_attr(qa[i]);
47  for (; q; q = q->next)
48  {
49  if (q->type == type && q->kind == CCL_RPN_ATTR_NUMERIC)
50  {
51  got_type = 1;
52  if (q->value.numeric == value)
53  {
54  if (attset)
55  *attset = q->set;
56  return 1;
57  }
58  }
59  }
60  if (got_type)
61  return 0;
62  }
63  return 0;
64 }
65 
72 static void strxcat(char *n, const char *src, int len)
73 {
74  while (*n)
75  n++;
76  while (--len >= 0)
77  *n++ = *src++;
78  *n = '\0';
79 }
80 
86 static char *copy_token_name(struct ccl_token *tp)
87 {
88  char *str = (char *)xmalloc(tp->len + 1);
89  ccl_assert(str);
90  memcpy(str, tp->name, tp->len);
91  str[tp->len] = '\0';
92  return str;
93 }
94 
101 {
102  struct ccl_rpn_node *p;
103  p = (struct ccl_rpn_node *)xmalloc(sizeof(*p));
104  ccl_assert(p);
105  p->kind = kind;
106 
107  switch (kind)
108  {
109  case CCL_RPN_TERM:
110  p->u.t.attr_list = 0;
111  p->u.t.term = 0;
112  p->u.t.qual = 0;
113  break;
114  default:
115  break;
116  }
117  return p;
118 }
119 
121  struct ccl_rpn_node *r,
122  enum ccl_rpn_kind op)
123 {
124  if (l && r)
125  {
126  struct ccl_rpn_node *tmp = ccl_rpn_node_create(op);
127  tmp->u.p[0] = l;
128  tmp->u.p[1] = r;
129  tmp->u.p[2] = 0;
130  return tmp;
131  }
132  else if (r)
133  return r;
134  return l;
135 }
136 
141 void ccl_rpn_delete(struct ccl_rpn_node *rpn)
142 {
143  struct ccl_rpn_attr *attr, *attr1;
144  if (!rpn)
145  return;
146  switch (rpn->kind)
147  {
148  case CCL_RPN_AND:
149  case CCL_RPN_OR:
150  case CCL_RPN_NOT:
151  ccl_rpn_delete(rpn->u.p[0]);
152  ccl_rpn_delete(rpn->u.p[1]);
153  break;
154  case CCL_RPN_TERM:
155  xfree(rpn->u.t.term);
156  xfree(rpn->u.t.qual);
157  for (attr = rpn->u.t.attr_list; attr; attr = attr1)
158  {
159  attr1 = attr->next;
160  if (attr->kind == CCL_RPN_ATTR_STRING)
161  xfree(attr->value.str);
162  if (attr->set)
163  xfree(attr->set);
164  xfree(attr);
165  }
166  break;
167  case CCL_RPN_SET:
168  xfree(rpn->u.setname);
169  break;
170  case CCL_RPN_PROX:
171  ccl_rpn_delete(rpn->u.p[0]);
172  ccl_rpn_delete(rpn->u.p[1]);
173  ccl_rpn_delete(rpn->u.p[2]);
174  break;
175  }
176  xfree(rpn);
177 }
178 
179 static struct ccl_rpn_node *find_spec(CCL_parser cclp, ccl_qualifier_t *qa);
180 
181 static int is_term_ok(int look, int *list)
182 {
183  for (; *list >= 0; list++)
184  if (look == *list)
185  return 1;
186  return 0;
187 }
188 
189 static struct ccl_rpn_node *search_terms(CCL_parser cclp, ccl_qualifier_t *qa);
190 
191 static struct ccl_rpn_attr *add_attr_node(struct ccl_rpn_node *p,
192  const char *set, int type)
193 {
194  struct ccl_rpn_attr *n = (struct ccl_rpn_attr *) xmalloc(sizeof(*n));
195  ccl_assert(n);
196  if (set)
197  n->set = xstrdup(set);
198  else
199  n->set = 0;
200  n->type = type;
201  n->next = p->u.t.attr_list;
202  p->u.t.attr_list = n;
203  return n;
204 }
205 
213 void ccl_add_attr_numeric(struct ccl_rpn_node *p, const char *set,
214  int type, int value)
215 {
216  struct ccl_rpn_attr *n = add_attr_node(p, set, type);
218  n->value.numeric = value;
219 }
220 
221 void ccl_set_attr_numeric(struct ccl_rpn_node *p, const char *set,
222  int type, int value)
223 {
224  struct ccl_rpn_attr *n;
225  for (n = p->u.t.attr_list; n; n = n->next)
226  if (n->type == type)
227  {
228  xfree(n->set);
229  n->set = set ? xstrdup(set) : 0;
230  if (n->kind == CCL_RPN_ATTR_STRING)
231  xfree(n->value.str);
233  n->value.numeric = value;
234  return;
235  }
237 }
238 
239 void ccl_add_attr_string(struct ccl_rpn_node *p, const char *set,
240  int type, char *value)
241 {
242  struct ccl_rpn_attr *n = add_attr_node(p, set, type);
244  n->value.str = xstrdup(value);
245 }
246 
247 static size_t cmp_operator(const char **aliases, const char *input)
248 {
249  for (; *aliases; aliases++)
250  {
251  const char *cp = *aliases;
252  size_t i;
253  for (i = 0; *cp && *cp == input[i]; i++, cp++)
254  ;
255  if (*cp == '\0')
256  return i;
257  }
258  return 0;
259 }
260 
261 
262 #define REGEX_CHARS "^[]{}()|.*+?!$"
263 #define CCL_CHARS "#?\\"
264 
265 static int has_ccl_masking(const char *src_str,
266  size_t src_len,
267  const char **truncation_aliases,
268  const char **mask_aliases)
269 {
270  size_t j;
271  int quote_mode = 0;
272 
273  for (j = 0; j < src_len; j++)
274  {
275  size_t op_size;
276  if (j > 0 && src_str[j-1] == '\\')
277  ;
278  else if (src_str[j] == '"')
279  quote_mode = !quote_mode;
280  else if (!quote_mode &&
281  (op_size = cmp_operator(truncation_aliases,
282  src_str + j)))
283  return 1;
284  else if (!quote_mode &&
285  (op_size = cmp_operator(mask_aliases,
286  src_str + j)))
287  return 1;
288  }
289  return 0;
290 }
291 
292 static int append_term(CCL_parser cclp, const char *src_str, size_t src_len,
293  char *dst_term, int regex_trunc, int z3958_trunc,
294  const char **truncation_aliases,
295  const char **mask_aliases,
296  int is_first, int is_last,
297  int *left_trunc, int *right_trunc)
298 {
299  size_t j;
300  int quote_mode = 0;
301 
302  for (j = 0; j < src_len; j++)
303  {
304  size_t op_size;
305  if (j > 0 && src_str[j-1] == '\\')
306  {
307  if (regex_trunc && strchr(REGEX_CHARS "\\", src_str[j]))
308  strcat(dst_term, "\\");
309  else if (z3958_trunc && strchr(CCL_CHARS "\\", src_str[j]))
310  strcat(dst_term, "\\");
311  strxcat(dst_term, src_str + j, 1);
312  }
313  else if (src_str[j] == '"')
314  quote_mode = !quote_mode;
315  else if (!quote_mode &&
316  (op_size = cmp_operator(truncation_aliases,
317  src_str + j))
318  )
319  {
320  j += (op_size - 1); /* j++ in for loop */
321  if (regex_trunc)
322  strcat(dst_term, ".*");
323  else if (z3958_trunc)
324  strcat(dst_term, "?");
325  else if (is_first && j == 0)
326  *left_trunc = 1;
327  else if (is_last && j == src_len - 1)
328  *right_trunc = 1;
329  else
330  {
332  return -1;
333  }
334  }
335  else if (!quote_mode &&
336  (op_size = cmp_operator(mask_aliases, src_str + j)))
337  {
338  j += (op_size - 1); /* j++ in for loop */
339  if (regex_trunc)
340  strcat(dst_term, ".");
341  else if (z3958_trunc)
342  strcat(dst_term, "#");
343  else
344  {
346  return -1;
347  }
348  }
349  else if (src_str[j] != '\\')
350  {
351  if (regex_trunc && strchr(REGEX_CHARS, src_str[j]))
352  strcat(dst_term, "\\");
353  else if (z3958_trunc && strchr(CCL_CHARS, src_str[j]))
354  strcat(dst_term, "\\");
355  strxcat(dst_term, src_str + j, 1);
356  }
357  }
358  return 0;
359 }
360 
361 
363  struct ccl_token *lookahead0,
364  struct ccl_rpn_attr *attr_use,
365  ccl_qualifier_t *qa,
366  size_t no,
367  int is_phrase,
368  int auto_group)
369 {
370  struct ccl_rpn_node *p;
371  size_t i;
372  int structure_value = -1;
373 
374  int left_trunc = 0;
375  int right_trunc = 0;
376  int regex_trunc = 0;
377  int z3958_trunc = 0;
378  int is_ccl_masked = 0;
379  char *attset;
380  struct ccl_token *lookahead = lookahead0;
381  const char **truncation_aliases;
382  const char *t_default[2];
383  const char **mask_aliases;
384  const char *m_default[2];
385  int term_len = 0;
386 
387  truncation_aliases =
388  ccl_qual_search_special(cclp->bibset, "truncation");
389  if (!truncation_aliases)
390  {
391  truncation_aliases = t_default;
392  t_default[0] = "?";
393  t_default[1] = 0;
394  }
395  mask_aliases =
396  ccl_qual_search_special(cclp->bibset, "mask");
397  if (!mask_aliases)
398  {
399  mask_aliases = m_default;
400  m_default[0] = "#";
401  m_default[1] = 0;
402  }
403  for (i = 0; i < no; i++)
404  {
405  if (has_ccl_masking(lookahead->name, lookahead->len,
406  truncation_aliases,
407  mask_aliases))
408  is_ccl_masked = 1;
409 
410  term_len += 1 + lookahead->len + lookahead->ws_prefix_len;
411  lookahead = lookahead->next;
412  }
413  lookahead = lookahead0;
414 
416  p->u.t.attr_list = NULL;
417  p->u.t.term = NULL;
418  if (qa && qa[0])
419  {
420  const char *n = ccl_qual_get_name(qa[0]);
421  if (n)
422  p->u.t.qual = xstrdup(n);
423  }
424  /* go through all attributes and add them to the attribute list */
425  for (i = 0; qa && qa[i]; i++)
426  {
427  struct ccl_rpn_attr *attr;
428  for (attr = ccl_qual_get_attr(qa[i]); attr; attr = attr->next)
429  if (attr->type == 1 && attr_use && attr != attr_use)
430  continue;
431  else
432  {
433  switch (attr->kind)
434  {
435  case CCL_RPN_ATTR_STRING:
436  ccl_add_attr_string(p, attr->set, attr->type,
437  attr->value.str);
438  break;
440  if (attr->value.numeric > 0)
441  { /* deal only with REAL attributes (positive) */
442  switch (attr->type)
443  {
444  case CCL_BIB1_STR:
445  if (structure_value != -1)
446  continue;
447  structure_value = attr->value.numeric;
448  break;
449  }
450  ccl_add_attr_numeric(p, attr->set, attr->type,
451  attr->value.numeric);
452  }
453  }
454  }
455  }
456  attset = 0;
457  if (structure_value == -1 && (
458  auto_group ||
460  )
461  {
462  if (!is_phrase)
463  ccl_add_attr_numeric(p, attset, CCL_BIB1_STR, 2);
464  else
465  ccl_add_attr_numeric(p, attset, CCL_BIB1_STR, 1);
466  }
468  &attset))
469  {
470  if (is_ccl_masked)
471  regex_trunc = 1; /* regex trunc (102) allowed */
472  }
474  &attset))
475  {
476  if (is_ccl_masked)
477  z3958_trunc = 1; /* Z39.58 trunc (CCL) trunc allowed */
478  }
479  /* make the RPN token */
480  p->u.t.term = (char *)xmalloc(term_len * 2 + 2);
481  ccl_assert(p->u.t.term);
482  p->u.t.term[0] = '\0';
483 
484  for (i = 0; i < no; i++)
485  {
486  const char *src_str = lookahead->name;
487  size_t src_len = lookahead->len;
488 
489  if (p->u.t.term[0] && lookahead->ws_prefix_len)
490  {
491  strxcat(p->u.t.term, lookahead->ws_prefix_buf,
492  lookahead->ws_prefix_len);
493  }
494  if (append_term(cclp, src_str, src_len, p->u.t.term, regex_trunc,
495  z3958_trunc, truncation_aliases, mask_aliases,
496  i == 0, i == no - 1,
497  &left_trunc, &right_trunc))
498  {
499  ccl_rpn_delete(p);
500  return NULL;
501  }
502  lookahead = lookahead->next;
503  }
504  if (left_trunc && right_trunc)
505  {
507  &attset))
508  {
510  ccl_rpn_delete(p);
511  return NULL;
512  }
513  ccl_add_attr_numeric(p, attset, CCL_BIB1_TRU, 3);
514  }
515  else if (right_trunc)
516  {
518  &attset))
519  {
521  ccl_rpn_delete(p);
522  return NULL;
523  }
524  ccl_add_attr_numeric(p, attset, CCL_BIB1_TRU, 1);
525  }
526  else if (left_trunc)
527  {
529  &attset))
530  {
532  ccl_rpn_delete(p);
533  return NULL;
534  }
535  ccl_add_attr_numeric(p, attset, CCL_BIB1_TRU, 2);
536  }
537  else if (regex_trunc)
538  {
539  ccl_add_attr_numeric(p, attset, CCL_BIB1_TRU, 102);
540  }
541  else if (z3958_trunc)
542  {
543  ccl_add_attr_numeric(p, attset, CCL_BIB1_TRU, 104);
544  }
545  else
546  {
548  &attset))
549  ccl_add_attr_numeric(p, attset, CCL_BIB1_TRU, 100);
550  }
551  return p;
552 }
553 
555  struct ccl_token *lookahead0,
556  ccl_qualifier_t *qa,
557  size_t no,
558  int is_phrase,
559  int auto_group)
560 {
561  struct ccl_rpn_node *p = 0;
562  int i;
563  for (i = 0; qa && qa[i]; i++)
564  {
565  struct ccl_rpn_attr *attr;
566  for (attr = ccl_qual_get_attr(qa[i]); attr; attr = attr->next)
567  if (attr->type == 1 && i == 0)
568  {
569  struct ccl_rpn_node *tmp2;
570  tmp2 = ccl_term_one_use(cclp, lookahead0,
571  attr, qa, no,
572  is_phrase, auto_group);
573  if (!tmp2)
574  {
575  ccl_rpn_delete(p);
576  return 0;
577  }
578  p = ccl_rpn_node_mkbool(p, tmp2, CCL_RPN_OR);
579  }
580  }
581  if (!p)
582  p = ccl_term_one_use(cclp, lookahead0,
583  0 /* attr: no use */, qa, no,
584  is_phrase, auto_group);
585  return p;
586 }
587 
589  struct ccl_token **ar, size_t sz,
590  size_t sub_len)
591 {
592  size_t l;
593  struct ccl_rpn_node *p_top = 0;
594  assert(sz > 0);
595  for (l = 1; l <= sz && l <= sub_len; l++)
596  {
597  struct ccl_rpn_node *p2 = ccl_term_multi_use(cclp, ar[0],
598  qa, l,
599  l > 1,
600  /* auto_group */0);
601  if (!p2)
602  {
603  ccl_rpn_delete(p_top);
604  return 0;
605  }
606  if (sz > l)
607  {
608  struct ccl_rpn_node *p1 = split_recur(cclp, qa, ar + l, sz - l,
609  sub_len);
610  if (!p1)
611  {
612  ccl_rpn_delete(p2);
613  return 0;
614  }
615  p2 = ccl_rpn_node_mkbool(p2, p1, CCL_RPN_AND);
616  }
617  p_top = ccl_rpn_node_mkbool(p_top, p2, CCL_RPN_OR);
618  }
619  assert(p_top);
620  return p_top;
621 }
622 
624  ccl_qualifier_t *qa,
625  int *term_list, int multi)
626 {
627  struct ccl_rpn_node *p;
628  struct ccl_token **ar;
629  struct ccl_token *lookahead = cclp->look_token;
630  size_t i, sz, sub_len;
631  for (sz = 0; is_term_ok(lookahead->kind, term_list); sz++)
632  lookahead = lookahead->next;
633  if (sz == 0)
634  {
636  return 0;
637  }
638  ar = (struct ccl_token **) xmalloc(sizeof(*lookahead) * sz);
639  lookahead = cclp->look_token;
640  for (i = 0; is_term_ok(lookahead->kind, term_list); i++)
641  {
642  ar[i] = lookahead;
643  lookahead = lookahead->next;
644  }
645  /* choose sub phrase carefully to avoid huge expansions */
646  if (sz >= 7)
647  sub_len = 1;
648  else if (sz >= 5)
649  sub_len = 2;
650  else
651  sub_len = 3;
652  p = split_recur(cclp, qa, ar, sz, sub_len);
653  xfree(ar);
654  for (i = 0; i < sz; i++)
655  ADVANCE;
656  return p;
657 }
658 
668  ccl_qualifier_t *qa,
669  int *term_list, int multi)
670 {
671  struct ccl_rpn_node *p_top = 0;
672  struct ccl_token *lookahead = cclp->look_token;
673  int and_list = 0;
674  int auto_group = 0;
675  int or_list = 0;
676 
678  and_list = 1;
680  auto_group = 1;
682  or_list = 1;
684  {
685  return search_term_split_list(cclp, qa, term_list, multi);
686  }
687  while (1)
688  {
689  struct ccl_rpn_node *p = 0;
690  size_t no, i;
691  int is_phrase = 0;
692  size_t max = 200;
693  if (and_list || or_list || !multi)
694  max = 1;
695 
696  /* ignore commas when dealing with and-lists .. */
697  if (and_list && lookahead && lookahead->kind == CCL_TOK_COMMA)
698  {
699  lookahead = lookahead->next;
700  ADVANCE;
701  continue;
702  }
703  for (no = 0; no < max && is_term_ok(lookahead->kind, term_list); no++)
704  {
705  int this_is_phrase = 0;
706  for (i = 0; i<lookahead->len; i++)
707  if (lookahead->name[i] == ' ')
708  this_is_phrase = 1;
709  if (auto_group)
710  {
711  if (no > 0 && (is_phrase || is_phrase != this_is_phrase))
712  break;
713  is_phrase = this_is_phrase;
714  }
715  else if (this_is_phrase || no > 0)
716  is_phrase = 1;
717  lookahead = lookahead->next;
718  }
719 
720  if (no == 0)
721  break; /* no more terms . stop . */
722  p = ccl_term_multi_use(cclp, cclp->look_token, qa, no,
723  is_phrase, auto_group);
724  for (i = 0; i < no; i++)
725  ADVANCE;
726  if (!p)
727  return 0;
728  p_top = ccl_rpn_node_mkbool(p_top, p, or_list ? CCL_RPN_OR : CCL_RPN_AND);
729  if (!multi)
730  break;
731  }
732  if (!p_top)
734  return p_top;
735 }
736 
738 {
739  static int list[] = {CCL_TOK_TERM, CCL_TOK_COMMA, -1};
740  return search_term_x(cclp, qa, list, 0);
741 }
742 
743 
745  ccl_qualifier_t *qa)
746 {
747  if (KIND == CCL_TOK_LP)
748  {
749  struct ccl_rpn_node *p;
750  ADVANCE;
751  if (!(p = find_spec(cclp, qa)))
752  return NULL;
753  if (KIND != CCL_TOK_RP)
754  {
756  ccl_rpn_delete(p);
757  return NULL;
758  }
759  ADVANCE;
760  return p;
761  }
762  else
763  {
764  static int list[] = {
766  CCL_TOK_REL, CCL_TOK_SET, -1};
767 
768  return search_term_x(cclp, qa, list, 1);
769  }
770 }
771 
772 
773 static
775  ccl_qualifier_t *ap, char *attset)
776 {
777  int rel = 0;
778  struct ccl_rpn_node *p;
779 
780  if (cclp->look_token->len == 1)
781  {
782  if (cclp->look_token->name[0] == '<')
783  rel = 1;
784  else if (cclp->look_token->name[0] == '=')
785  rel = 3;
786  else if (cclp->look_token->name[0] == '>')
787  rel = 5;
788  }
789  else if (cclp->look_token->len == 2)
790  {
791  if (!memcmp(cclp->look_token->name, "<=", 2))
792  rel = 2;
793  else if (!memcmp(cclp->look_token->name, ">=", 2))
794  rel = 4;
795  else if (!memcmp(cclp->look_token->name, "<>", 2))
796  rel = 6;
797  }
798  if (!rel)
799  {
801  return NULL;
802  }
803  ADVANCE; /* skip relation */
804  if (rel == 3 &&
806  {
807  /* allow - inside term and treat it as range _always_ */
808  /* relation is =. Extract "embedded" - to separate terms */
809  if (KIND == CCL_TOK_TERM)
810  {
811  size_t i;
812  int quote_mode = 0;
813  for (i = 0; i<cclp->look_token->len; i++)
814  {
815  if (i > 0 && cclp->look_token->name[i] == '\\')
816  ;
817  else if (cclp->look_token->name[i] == '"')
818  quote_mode = !quote_mode;
819  else if (cclp->look_token->name[i] == '-' && !quote_mode)
820  break;
821  }
822 
823  if (cclp->look_token->len > 1 && i == 0)
824  { /* -xx*/
825  struct ccl_token *ntoken = ccl_token_add(cclp->look_token);
826 
827  ntoken->kind = CCL_TOK_TERM;
828  ntoken->name = cclp->look_token->name + 1;
829  ntoken->len = cclp->look_token->len - 1;
830 
831  cclp->look_token->len = 1;
832  cclp->look_token->name = "-";
833  }
834  else if (cclp->look_token->len > 1 && i == cclp->look_token->len-1)
835  { /* xx- */
836  struct ccl_token *ntoken = ccl_token_add(cclp->look_token);
837 
838  ntoken->kind = CCL_TOK_TERM;
839  ntoken->name = "-";
840  ntoken->len = 1;
841 
842  (cclp->look_token->len)--;
843  }
844  else if (cclp->look_token->len > 2 && i < cclp->look_token->len)
845  { /* xx-yy */
846  struct ccl_token *ntoken1 = ccl_token_add(cclp->look_token);
847  struct ccl_token *ntoken2 = ccl_token_add(ntoken1);
848 
849  ntoken1->kind = CCL_TOK_TERM; /* generate - */
850  ntoken1->name = "-";
851  ntoken1->len = 1;
852 
853  ntoken2->kind = CCL_TOK_TERM; /* generate yy */
854  ntoken2->name = cclp->look_token->name + (i+1);
855  ntoken2->len = cclp->look_token->len - (i+1);
856 
857  cclp->look_token->len = i; /* adjust xx */
858  }
859  else if (i == cclp->look_token->len &&
860  cclp->look_token->next &&
861  cclp->look_token->next->kind == CCL_TOK_TERM &&
862  cclp->look_token->next->len > 1 &&
863  cclp->look_token->next->name[0] == '-')
864 
865  { /* xx -yy */
866  /* we _know_ that xx does not have - in it */
867  struct ccl_token *ntoken = ccl_token_add(cclp->look_token);
868 
869  ntoken->kind = CCL_TOK_TERM; /* generate - */
870  ntoken->name = "-";
871  ntoken->len = 1;
872 
873  (ntoken->next->name)++; /* adjust yy */
874  (ntoken->next->len)--;
875  }
876  }
877  }
878 
879  if (rel == 3 &&
880  KIND == CCL_TOK_TERM &&
881  cclp->look_token->next && cclp->look_token->next->len == 1 &&
882  cclp->look_token->next->name[0] == '-')
883  {
884  struct ccl_rpn_node *p1;
885  if (!(p1 = search_term(cclp, ap)))
886  return NULL;
887  ADVANCE; /* skip '-' */
888  if (KIND == CCL_TOK_TERM) /* = term - term ? */
889  {
890  struct ccl_rpn_node *p2;
891 
892  if (!(p2 = search_term(cclp, ap)))
893  {
894  ccl_rpn_delete(p1);
895  return NULL;
896  }
898  p->u.p[0] = p1;
899  ccl_set_attr_numeric(p1, attset, CCL_BIB1_REL, 4);
900  p->u.p[1] = p2;
901  ccl_set_attr_numeric(p2, attset, CCL_BIB1_REL, 2);
902  return p;
903  }
904  else /* = term - */
905  {
906  ccl_set_attr_numeric(p1, attset, CCL_BIB1_REL, 4);
907  return p1;
908  }
909  }
910  else if (rel == 3 &&
911  cclp->look_token->len == 1 &&
912  cclp->look_token->name[0] == '-') /* = - term ? */
913  {
914  ADVANCE;
915  if (!(p = search_term(cclp, ap)))
916  return NULL;
917  ccl_set_attr_numeric(p, attset, CCL_BIB1_REL, 2);
918  return p;
919  }
920  else
921  {
922  if (!(p = search_terms(cclp, ap)))
923  return NULL;
924  if (rel != 3 ||
926  ccl_set_attr_numeric(p, attset, CCL_BIB1_REL, rel);
927  return p;
928  }
929  return NULL;
930 }
931 
932 static
934 {
935  char *attset;
936 
939  return qualifiers_order(cclp, ap, attset);
940 
941  /* unordered relation */
942  if (KIND != CCL_TOK_EQ)
943  {
945  return NULL;
946  }
947  ADVANCE;
948  return search_terms(cclp, ap);
949 }
950 
959  struct ccl_token *la,
960  ccl_qualifier_t *qa)
961 {
962  struct ccl_token *lookahead = cclp->look_token;
963  struct ccl_token *look_start = cclp->look_token;
964  ccl_qualifier_t *ap;
965  struct ccl_rpn_node *node = 0;
966  const char **field_str;
967  int no = 0;
968  int seq = 0;
969  int i;
970  int mode_merge = 1;
971 #if 0
972  if (qa)
973  {
975  return NULL;
976  }
977 #endif
978  for (lookahead = cclp->look_token; lookahead != la;
979  lookahead=lookahead->next)
980  no++;
981  if (qa)
982  for (i=0; qa[i]; i++)
983  no++;
984  ap = (ccl_qualifier_t *)xmalloc((no ? (no+1) : 2) * sizeof(*ap));
985  ccl_assert(ap);
986 
987  field_str = ccl_qual_search_special(cclp->bibset, "field");
988  if (field_str)
989  {
990  if (!strcmp(field_str[0], "or"))
991  mode_merge = 0;
992  else if (!strcmp(field_str[0], "merge"))
993  mode_merge = 1;
994  }
995  if (!mode_merge)
996  {
997  /* consider each field separately and OR */
998  lookahead = look_start;
999  while (lookahead != la)
1000  {
1001  ap[1] = 0;
1002  seq = 0;
1003  while ((ap[0] = ccl_qual_search(cclp, lookahead->name,
1004  lookahead->len, seq)) != 0)
1005  {
1006  struct ccl_rpn_node *node_sub;
1007  cclp->look_token = la;
1008 
1009  node_sub = qualifier_relation(cclp, ap);
1010  if (!node_sub)
1011  {
1013  xfree(ap);
1014  return 0;
1015  }
1016  node = ccl_rpn_node_mkbool(node, node_sub, CCL_RPN_OR);
1017  seq++;
1018  }
1019  if (seq == 0)
1020  {
1021  cclp->look_token = lookahead;
1023  xfree(ap);
1024  return NULL;
1025  }
1026  lookahead = lookahead->next;
1027  if (lookahead->kind == CCL_TOK_COMMA)
1028  lookahead = lookahead->next;
1029  }
1030  }
1031  else
1032  {
1033  /* merge attributes from ALL fields - including inherited ones */
1034  while (1)
1035  {
1036  struct ccl_rpn_node *node_sub;
1037  int found = 0;
1038  lookahead = look_start;
1039  for (i = 0; lookahead != la; i++)
1040  {
1041  ap[i] = ccl_qual_search(cclp, lookahead->name,
1042  lookahead->len, seq);
1043  if (ap[i])
1044  found++;
1045  if (!ap[i] && seq > 0)
1046  ap[i] = ccl_qual_search(cclp, lookahead->name,
1047  lookahead->len, 0);
1048  if (!ap[i])
1049  {
1050  cclp->look_token = lookahead;
1052  xfree(ap);
1053  return NULL;
1054  }
1055  lookahead = lookahead->next;
1056  if (lookahead->kind == CCL_TOK_COMMA)
1057  lookahead = lookahead->next;
1058  }
1059  if (qa)
1060  {
1061  ccl_qualifier_t *qa0 = qa;
1062 
1063  while (*qa0)
1064  ap[i++] = *qa0++;
1065  }
1066  ap[i] = NULL;
1067 
1068  if (!found)
1069  break;
1070 
1071  cclp->look_token = lookahead;
1072 
1073  node_sub = qualifier_relation(cclp, ap);
1074  if (!node_sub)
1075  {
1077  break;
1078  }
1079  node = ccl_rpn_node_mkbool(node, node_sub, CCL_RPN_OR);
1080  seq++;
1081  }
1082  }
1083  xfree(ap);
1084  return node;
1085 }
1086 
1087 
1095 {
1096  static int list[] = {
1098  CCL_TOK_REL, CCL_TOK_SET, -1};
1099  struct ccl_rpn_node *p1, *p2, *pn;
1100  p1 = search_terms2(cclp, qa);
1101  if (!p1)
1102  return NULL;
1103  while (1)
1104  {
1105  if (KIND == CCL_TOK_PROX)
1106  {
1107  struct ccl_rpn_node *p_prox = 0;
1108  /* ! word order specified */
1109  /* % word order not specified */
1111  p_prox->u.t.term = (char *) xmalloc(1 + cclp->look_token->len);
1112  memcpy(p_prox->u.t.term, cclp->look_token->name,
1113  cclp->look_token->len);
1114  p_prox->u.t.term[cclp->look_token->len] = 0;
1115  p_prox->u.t.attr_list = 0;
1116 
1117  ADVANCE;
1118  p2 = search_terms2(cclp, qa);
1119  if (!p2)
1120  {
1121  ccl_rpn_delete(p1);
1122  return NULL;
1123  }
1125  pn->u.p[0] = p1;
1126  pn->u.p[1] = p2;
1127  pn->u.p[2] = p_prox;
1128  p1 = pn;
1129  }
1130  else if (is_term_ok(KIND, list))
1131  {
1132  p2 = search_terms2(cclp, qa);
1133  if (!p2)
1134  {
1135  ccl_rpn_delete(p1);
1136  return NULL;
1137  }
1139  pn->u.p[0] = p1;
1140  pn->u.p[1] = p2;
1141  pn->u.p[2] = 0;
1142  p1 = pn;
1143  }
1144  else
1145  break;
1146  }
1147  return p1;
1148 }
1149 
1157  ccl_qualifier_t *qa)
1158 {
1159  struct ccl_rpn_node *p1;
1160  struct ccl_token *lookahead;
1161  if (KIND == CCL_TOK_SET)
1162  {
1163  ADVANCE;
1164  if (KIND == CCL_TOK_EQ)
1165  ADVANCE;
1166  if (KIND != CCL_TOK_TERM)
1167  {
1169  return NULL;
1170  }
1172  p1->u.setname = copy_token_name(cclp->look_token);
1173  ADVANCE;
1174  return p1;
1175  }
1176  lookahead = cclp->look_token;
1177 
1178  while (lookahead->kind==CCL_TOK_TERM)
1179  {
1180  lookahead = lookahead->next;
1181  if (lookahead->kind == CCL_TOK_REL || lookahead->kind == CCL_TOK_EQ)
1182  return qualifier_list(cclp, lookahead, qa);
1183  if (lookahead->kind != CCL_TOK_COMMA)
1184  break;
1185  lookahead = lookahead->next;
1186  }
1187  if (qa || lookahead->kind == CCL_TOK_LP)
1188  return search_terms(cclp, qa);
1189  else
1190  {
1191  ccl_qualifier_t qa[2];
1192  struct ccl_rpn_node *node = 0;
1193  int seq;
1194  lookahead = cclp->look_token;
1195 
1196  qa[1] = 0;
1197  for(seq = 0; ;seq++)
1198  {
1199  struct ccl_rpn_node *node_sub;
1200  qa[0] = ccl_qual_search(cclp, "term", 4, seq);
1201  if (!qa[0])
1202  break;
1203 
1204  cclp->look_token = lookahead;
1205 
1206  node_sub = search_terms(cclp, qa);
1207  if (!node_sub)
1208  {
1210  return 0;
1211  }
1212  node = ccl_rpn_node_mkbool(node, node_sub, CCL_RPN_OR);
1213  }
1214  if (!node)
1215  node = search_terms(cclp, 0);
1216  return node;
1217  }
1218 }
1219 
1227 {
1228  struct ccl_rpn_node *p1, *p2;
1229  if (!(p1 = search_elements(cclp, qa)))
1230  return NULL;
1231  while (1)
1232  {
1233  switch (KIND)
1234  {
1235  case CCL_TOK_AND:
1236  ADVANCE;
1237  p2 = search_elements(cclp, qa);
1238  if (!p2)
1239  {
1240  ccl_rpn_delete(p1);
1241  return NULL;
1242  }
1243  p1 = ccl_rpn_node_mkbool(p1, p2, CCL_RPN_AND);
1244  continue;
1245  case CCL_TOK_OR:
1246  ADVANCE;
1247  p2 = search_elements(cclp, qa);
1248  if (!p2)
1249  {
1250  ccl_rpn_delete(p1);
1251  return NULL;
1252  }
1253  p1 = ccl_rpn_node_mkbool(p1, p2, CCL_RPN_OR);
1254  continue;
1255  case CCL_TOK_NOT:
1256  ADVANCE;
1257  p2 = search_elements(cclp, qa);
1258  if (!p2)
1259  {
1260  ccl_rpn_delete(p1);
1261  return NULL;
1262  }
1263  p1 = ccl_rpn_node_mkbool(p1, p2, CCL_RPN_NOT);
1264  continue;
1265  }
1266  break;
1267  }
1268  return p1;
1269 }
1270 
1271 struct ccl_rpn_node *ccl_parser_find_str(CCL_parser cclp, const char *str)
1272 {
1273  struct ccl_rpn_node *p;
1274  struct ccl_token *list = ccl_parser_tokenize(cclp, str);
1275  p = ccl_parser_find_token(cclp, list);
1276  ccl_token_del(list);
1277  return p;
1278 }
1279 
1281  struct ccl_token *list)
1282 {
1283  struct ccl_rpn_node *p;
1284 
1285  cclp->look_token = list;
1286  p = find_spec(cclp, NULL);
1287  if (p && KIND != CCL_TOK_EOL)
1288  {
1289  if (KIND == CCL_TOK_RP)
1290  cclp->error_code = CCL_ERR_BAD_RP;
1291  else
1293  ccl_rpn_delete(p);
1294  p = NULL;
1295  }
1296  cclp->error_pos = cclp->look_token->name;
1297  if (p)
1298  cclp->error_code = CCL_ERR_OK;
1299  return p;
1300 }
1301 
1310 struct ccl_rpn_node *ccl_find_str(CCL_bibset bibset, const char *str,
1311  int *error, int *pos)
1312 {
1313  CCL_parser cclp = ccl_parser_create(bibset);
1314  struct ccl_token *list;
1315  struct ccl_rpn_node *p;
1316 
1317  list = ccl_parser_tokenize(cclp, str);
1318  p = ccl_parser_find_token(cclp, list);
1319 
1320  *error = cclp->error_code;
1321  if (*error)
1322  *pos = cclp->error_pos - str;
1323  ccl_parser_destroy(cclp);
1324  ccl_token_del(list);
1325  return p;
1326 }
1327 
1328 /*
1329  * Local variables:
1330  * c-basic-offset: 4
1331  * c-file-style: "Stroustrup"
1332  * indent-tabs-mode: nil
1333  * End:
1334  * vim: shiftwidth=4 tabstop=8 expandtab
1335  */
1336 
#define CCL_ERR_OK
Definition: ccl.h:81
#define CCL_ERR_UNKNOWN_QUAL
Definition: ccl.h:87
#define CCL_RPN_ATTR_STRING
Definition: ccl.h:108
#define CCL_ERR_BAD_RP
Definition: ccl.h:86
#define CCL_BIB1_TRU_CAN_NONE
Definition: ccl.h:367
#define CCL_ERR_TRUNC_NOT_RIGHT
Definition: ccl.h:93
#define CCL_BIB1_REL_ORDER
Definition: ccl.h:360
#define CCL_ERR_SETNAME_EXPECTED
Definition: ccl.h:84
#define CCL_BIB1_REL
Definition: ccl.h:349
#define CCL_BIB1_TRU
Definition: ccl.h:352
#define CCL_ERR_TRUNC_NOT_BOTH
Definition: ccl.h:92
#define CCL_BIB1_TRU_CAN_LEFT
Definition: ccl.h:364
#define CCL_BIB1_STR
Definition: ccl.h:351
#define CCL_ERR_DOUBLE_QUAL
Definition: ccl.h:88
#define CCL_ERR_RP_EXPECTED
Definition: ccl.h:83
#define CCL_ERR_TRUNC_NOT_EMBED
Definition: ccl.h:94
#define CCL_ERR_TRUNC_NOT_SINGLE
Definition: ccl.h:95
#define CCL_ERR_BAD_RELATION
Definition: ccl.h:90
#define CCL_ERR_EQ_EXPECTED
Definition: ccl.h:89
#define CCL_BIB1_TRU_CAN_Z3958
Definition: ccl.h:369
#define CCL_RPN_ATTR_NUMERIC
Definition: ccl.h:107
#define CCL_BIB1_REL_PORDER
Definition: ccl.h:361
#define CCL_BIB1_REL_OMIT_EQUALS
Definition: ccl.h:362
#define CCL_ERR_TRUNC_NOT_LEFT
Definition: ccl.h:91
#define CCL_ERR_TERM_EXPECTED
Definition: ccl.h:82
#define CCL_BIB1_STR_AUTO_GROUP
Definition: ccl.h:358
#define CCL_BIB1_TRU_CAN_REGEX
Definition: ccl.h:368
#define CCL_BIB1_TRU_CAN_BOTH
Definition: ccl.h:366
#define CCL_ERR_OP_EXPECTED
Definition: ccl.h:85
#define CCL_BIB1_TRU_CAN_RIGHT
Definition: ccl.h:365
ccl_rpn_kind
node type or RPN tree generated by the CCL parser
Definition: ccl.h:118
@ CCL_RPN_AND
Definition: ccl.h:119
@ CCL_RPN_TERM
Definition: ccl.h:122
@ CCL_RPN_PROX
Definition: ccl.h:124
@ CCL_RPN_NOT
Definition: ccl.h:121
@ CCL_RPN_SET
Definition: ccl.h:123
@ CCL_RPN_OR
Definition: ccl.h:120
#define ccl_assert(x)
Definition: ccl.h:314
#define CCL_BIB1_STR_OR_LIST
Definition: ccl.h:357
#define CCL_BIB1_STR_AND_LIST
Definition: ccl.h:356
#define CCL_BIB1_STR_SPLIT_LIST
Definition: ccl.h:359
#define CCL_BIB1_STR_WP
Definition: ccl.h:355
static struct ccl_rpn_node * ccl_term_multi_use(CCL_parser cclp, struct ccl_token *lookahead0, ccl_qualifier_t *qa, size_t no, int is_phrase, int auto_group)
Definition: cclfind.c:554
static struct ccl_rpn_node * qualifier_relation(CCL_parser cclp, ccl_qualifier_t *ap)
Definition: cclfind.c:933
static struct ccl_rpn_node * split_recur(CCL_parser cclp, ccl_qualifier_t *qa, struct ccl_token **ar, size_t sz, size_t sub_len)
Definition: cclfind.c:588
static struct ccl_rpn_node * search_term_split_list(CCL_parser cclp, ccl_qualifier_t *qa, int *term_list, int multi)
Definition: cclfind.c:623
#define KIND
Definition: cclfind.c:25
static int has_ccl_masking(const char *src_str, size_t src_len, const char **truncation_aliases, const char **mask_aliases)
Definition: cclfind.c:265
static void strxcat(char *n, const char *src, int len)
Definition: cclfind.c:72
#define CCL_CHARS
Definition: cclfind.c:263
struct ccl_rpn_node * ccl_find_str(CCL_bibset bibset, const char *str, int *error, int *pos)
parse CCL find string using CCL profile return RPN tree
Definition: cclfind.c:1310
struct ccl_rpn_node * ccl_parser_find_str(CCL_parser cclp, const char *str)
parse CCL find string with parser and return RPN tree
Definition: cclfind.c:1271
static struct ccl_rpn_node * qualifiers_order(CCL_parser cclp, ccl_qualifier_t *ap, char *attset)
Definition: cclfind.c:774
static struct ccl_rpn_node * ccl_term_one_use(CCL_parser cclp, struct ccl_token *lookahead0, struct ccl_rpn_attr *attr_use, ccl_qualifier_t *qa, size_t no, int is_phrase, int auto_group)
Definition: cclfind.c:362
static struct ccl_rpn_node * search_term(CCL_parser cclp, ccl_qualifier_t *qa)
Definition: cclfind.c:737
static struct ccl_rpn_node * search_terms2(CCL_parser cclp, ccl_qualifier_t *qa)
Definition: cclfind.c:744
void ccl_rpn_delete(struct ccl_rpn_node *rpn)
Definition: cclfind.c:141
void ccl_set_attr_numeric(struct ccl_rpn_node *p, const char *set, int type, int value)
Definition: cclfind.c:221
static char * copy_token_name(struct ccl_token *tp)
Definition: cclfind.c:86
struct ccl_rpn_node * ccl_parser_find_token(CCL_parser cclp, struct ccl_token *list)
Definition: cclfind.c:1280
#define REGEX_CHARS
Definition: cclfind.c:262
static struct ccl_rpn_node * ccl_rpn_node_mkbool(struct ccl_rpn_node *l, struct ccl_rpn_node *r, enum ccl_rpn_kind op)
Definition: cclfind.c:120
static struct ccl_rpn_node * search_term_x(CCL_parser cclp, ccl_qualifier_t *qa, int *term_list, int multi)
Definition: cclfind.c:667
static struct ccl_rpn_node * qualifier_list(CCL_parser cclp, struct ccl_token *la, ccl_qualifier_t *qa)
Definition: cclfind.c:958
static struct ccl_rpn_node * search_terms(CCL_parser cclp, ccl_qualifier_t *qa)
Definition: cclfind.c:1094
static int is_term_ok(int look, int *list)
Definition: cclfind.c:181
void ccl_add_attr_numeric(struct ccl_rpn_node *p, const char *set, int type, int value)
Definition: cclfind.c:213
void ccl_add_attr_string(struct ccl_rpn_node *p, const char *set, int type, char *value)
Definition: cclfind.c:239
static int append_term(CCL_parser cclp, const char *src_str, size_t src_len, char *dst_term, int regex_trunc, int z3958_trunc, const char **truncation_aliases, const char **mask_aliases, int is_first, int is_last, int *left_trunc, int *right_trunc)
Definition: cclfind.c:292
static struct ccl_rpn_node * search_elements(CCL_parser cclp, ccl_qualifier_t *qa)
Definition: cclfind.c:1156
static struct ccl_rpn_attr * add_attr_node(struct ccl_rpn_node *p, const char *set, int type)
Definition: cclfind.c:191
static size_t cmp_operator(const char **aliases, const char *input)
Definition: cclfind.c:247
struct ccl_rpn_node * ccl_rpn_node_create(enum ccl_rpn_kind kind)
Definition: cclfind.c:100
static int qual_val_type(ccl_qualifier_t *qa, int type, int value, char **attset)
Definition: cclfind.c:37
#define ADVANCE
Definition: cclfind.c:28
static struct ccl_rpn_node * find_spec(CCL_parser cclp, ccl_qualifier_t *qa)
Definition: cclfind.c:1226
CCL header with private definitions.
ccl_qualifier_t ccl_qual_search(CCL_parser cclp, const char *name, size_t name_len, int seq)
Definition: cclqual.c:338
#define CCL_TOK_SET
Definition: cclp.h:45
#define CCL_TOK_NOT
Definition: cclp.h:44
#define CCL_TOK_OR
Definition: cclp.h:43
#define CCL_TOK_TERM
Definition: cclp.h:35
#define CCL_TOK_AND
Definition: cclp.h:42
const char * ccl_qual_get_name(ccl_qualifier_t q)
Definition: cclqual.c:396
struct ccl_rpn_attr * ccl_qual_get_attr(ccl_qualifier_t q)
Definition: cclqual.c:382
struct ccl_token * ccl_parser_tokenize(CCL_parser cclp, const char *command)
Definition: ccltoken.c:58
#define CCL_TOK_RP
Definition: cclp.h:40
#define CCL_TOK_EOL
Definition: cclp.h:34
void ccl_token_del(struct ccl_token *list)
Definition: ccltoken.c:209
struct ccl_token * ccl_token_add(struct ccl_token *at)
Definition: ccltoken.c:188
#define CCL_TOK_COMMA
Definition: cclp.h:41
#define CCL_TOK_LP
Definition: cclp.h:39
#define CCL_TOK_EQ
Definition: cclp.h:37
#define CCL_TOK_PROX
Definition: cclp.h:38
#define CCL_TOK_REL
Definition: cclp.h:36
const char ** ccl_qual_search_special(CCL_bibset b, const char *name)
Definition: cclqual.c:401
CCL_parser ccl_parser_create(CCL_bibset bibset)
Definition: ccltoken.c:246
void ccl_parser_destroy(CCL_parser p)
Definition: ccltoken.c:265
static int node(struct cql_node *cn, void(*pr)(const char *buf, void *client_data), void *client_data)
Definition: cql2ccl.c:86
enum l_file_type type
Definition: log.c:47
CCL_bibset bibset
Definition: cclp.h:73
int error_code
Definition: cclp.h:66
const char * error_pos
Definition: cclp.h:70
struct ccl_token * look_token
Definition: cclp.h:63
attribute node (type, value) pair as used in RPN
Definition: ccl.h:98
char * str
string attribute value
Definition: ccl.h:113
int kind
attribute value type (numeric or string)
Definition: ccl.h:106
int numeric
numeric attribute value
Definition: ccl.h:111
int type
attribute type, Bib-1: 1=use, 2=relation, 3=position, etc
Definition: ccl.h:104
union ccl_rpn_attr::@7 value
char * set
attribute set
Definition: ccl.h:102
struct ccl_rpn_attr * next
next attribute
Definition: ccl.h:100
RPN tree structure node.
Definition: ccl.h:128
struct ccl_rpn_node * p[3]
Boolean including proximity 0=left, 1=right, 2=prox parms.
Definition: ccl.h:133
char * setname
Definition: ccl.h:141
struct ccl_rpn_node::@8::@9 t
Attributes + Term.
enum ccl_rpn_kind kind
node type, one of CCL_RPN_AND, CCL_RPN_OR, etc
Definition: ccl.h:130
union ccl_rpn_node::@8 u
Definition: cclp.h:50
size_t len
Definition: cclp.h:52
char kind
Definition: cclp.h:51
const char * name
Definition: cclp.h:53
struct ccl_token * next
Definition: cclp.h:54
size_t ws_prefix_len
Definition: cclp.h:57
const char * ws_prefix_buf
Definition: cclp.h:56
#define xstrdup(s)
utility macro which calls xstrdup_f
Definition: xmalloc.h:55
#define xfree(x)
utility macro which calls xfree_f
Definition: xmalloc.h:53
#define xmalloc(x)
utility macro which calls malloc_f
Definition: xmalloc.h:49