IDZEBRA  2.2.7
rpnsearch.c
Go to the documentation of this file.
1 /* This file is part of the Zebra server.
2  Copyright (C) Index Data
3 
4 Zebra is free software; you can redistribute it and/or modify it under
5 the terms of the GNU General Public License as published by the Free
6 Software Foundation; either version 2, or (at your option) any later
7 version.
8 
9 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
10 WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 for more details.
13 
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 
18 */
19 
20 #if HAVE_CONFIG_H
21 #include <config.h>
22 #endif
23 #include <stdio.h>
24 #include <assert.h>
25 #ifdef WIN32
26 #include <io.h>
27 #endif
28 #if HAVE_UNISTD_H
29 #include <unistd.h>
30 #endif
31 #include <ctype.h>
32 
33 #include <yaz/diagbib1.h>
34 #include <yaz/snprintf.h>
35 #include "index.h"
36 #include <zebra_xpath.h>
37 #include <attrfind.h>
38 #include <charmap.h>
39 #include <rset.h>
40 
41 static int log_level_set = 0;
42 static int log_level_rpn = 0;
43 
44 static const char **rpn_char_map_handler(void *vp, const char **from, int len)
45 {
46  struct rpn_char_map_info *p = (struct rpn_char_map_info *) vp;
47  const char **out = zebra_maps_input(p->zm, from, len, 0);
48 #if 0
49  if (out && *out)
50  {
51  const char *outp = *out;
52  yaz_log(YLOG_LOG, "---");
53  while (*outp)
54  {
55  yaz_log(YLOG_LOG, "%02X", *outp);
56  outp++;
57  }
58  }
59 #endif
60  return out;
61 }
62 
64  struct rpn_char_map_info *map_info)
65 {
66  map_info->zm = zm;
67  if (zebra_maps_is_icu(zm))
68  dict_grep_cmap(reg->dict, 0, 0);
69  else
70  dict_grep_cmap(reg->dict, map_info, rpn_char_map_handler);
71 }
72 
73 #define TERM_COUNT
74 
75 struct grep_info {
76 #ifdef TERM_COUNT
77  int *term_no;
78 #endif
82  int trunc_max;
84  const char *index_type;
86 };
87 
88 static int add_isam_p(const char *name, const char *info,
89  struct grep_info *p)
90 {
91  if (!log_level_set)
92  {
93  log_level_rpn = yaz_log_module_level("rpn");
94  log_level_set = 1;
95  }
96  /* we may have to stop this madness.. NOTE: -1 so that if
97  truncmax == trunxlimit we do *not* generate result sets */
98  if (p->isam_p_indx >= p->trunc_max - 1)
99  return 1;
100 
101  if (p->isam_p_indx == p->isam_p_size)
102  {
103  ISAM_P *new_isam_p_buf;
104 #ifdef TERM_COUNT
105  int *new_term_no;
106 #endif
107  p->isam_p_size = 2*p->isam_p_size + 100;
108  new_isam_p_buf = (ISAM_P *) xmalloc(sizeof(*new_isam_p_buf) *
109  p->isam_p_size);
110  if (p->isam_p_buf)
111  {
112  memcpy(new_isam_p_buf, p->isam_p_buf,
113  p->isam_p_indx * sizeof(*p->isam_p_buf));
114  xfree(p->isam_p_buf);
115  }
116  p->isam_p_buf = new_isam_p_buf;
117 
118 #ifdef TERM_COUNT
119  new_term_no = (int *) xmalloc(sizeof(*new_term_no) * p->isam_p_size);
120  if (p->term_no)
121  {
122  memcpy(new_term_no, p->isam_p_buf,
123  p->isam_p_indx * sizeof(*p->term_no));
124  xfree(p->term_no);
125  }
126  p->term_no = new_term_no;
127 #endif
128  }
129  assert(*info == sizeof(*p->isam_p_buf));
130  memcpy(p->isam_p_buf + p->isam_p_indx, info+1, sizeof(*p->isam_p_buf));
131 
132  if (p->termset)
133  {
134  const char *db;
135  char term_tmp[IT_MAX_WORD];
136  int ord = 0;
137  const char *index_name;
138  int len = key_SU_decode(&ord, (const unsigned char *) name);
139 
140  zebra_term_untrans (p->zh, p->index_type, term_tmp, name+len);
141  yaz_log(log_level_rpn, "grep: %d %c %s", ord, name[len], term_tmp);
143  ord, 0 /* index_type */, &db, &index_name);
144  yaz_log(log_level_rpn, "grep: db=%s index=%s", db, index_name);
145 
146  resultSetAddTerm(p->zh, p->termset, name[len], db,
147  index_name, term_tmp);
148  }
149  (p->isam_p_indx)++;
150  return 0;
151 }
152 
153 static int grep_handle(char *name, const char *info, void *p)
154 {
155  return add_isam_p(name, info, (struct grep_info *) p);
156 }
157 
158 static int term_pre(zebra_map_t zm, const char **src,
159  const char *ct1, int first)
160 {
161  const char *s1, *s0 = *src;
162  const char **map;
163 
164  /* skip white space */
165  while (*s0)
166  {
167  if (ct1 && strchr(ct1, *s0))
168  break;
169  s1 = s0;
170  map = zebra_maps_input(zm, &s1, strlen(s1), first);
171  if (**map != *CHR_SPACE)
172  break;
173  s0 = s1;
174  }
175  *src = s0;
176  return *s0;
177 }
178 
179 
180 static void esc_str(char *out_buf, size_t out_size,
181  const char *in_buf, int in_size)
182 {
183  int k;
184 
185  assert(out_buf);
186  assert(in_buf);
187  assert(out_size > 20);
188  *out_buf = '\0';
189  for (k = 0; k < in_size; k++)
190  {
191  int c = in_buf[k] & 0xff;
192  int pc;
193  if (c < 32 || c > 126)
194  pc = '?';
195  else
196  pc = c;
197  yaz_snprintf(out_buf + strlen(out_buf), 20, "%02X:%c ", c, pc);
198  if (strlen(out_buf) > out_size-20)
199  {
200  strcat(out_buf, "..");
201  break;
202  }
203  }
204 }
205 
206 #define REGEX_CHARS " ^[]()|.*+?!\"$\\"
207 
208 static void add_non_space(const char *start, const char *end,
209  WRBUF term_dict,
210  WRBUF display_term,
211  const char **map, int q_map_match)
212 {
213  size_t sz = end - start;
214 
215  wrbuf_write(display_term, start, sz);
216  if (!q_map_match)
217  {
218  while (start < end)
219  {
220  if (strchr(REGEX_CHARS, *start))
221  wrbuf_putc(term_dict, '\\');
222  wrbuf_putc(term_dict, *start);
223  start++;
224  }
225  }
226  else
227  {
228  char tmpbuf[80];
229  esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
230 
231  wrbuf_puts(term_dict, map[0]);
232  }
233 }
234 
235 
236 /* ICU sort keys seem to be of the form
237  basechars \x01 accents \x01 length
238  For now we'll just right truncate from basechars . This
239  may give false hits due to accents not being used.
240 */
241 static size_t icu_basechars(const char *buf, size_t i)
242 {
243  while (i > 0 && buf[--i] != '\x01') /* skip length */
244  ;
245  while (i > 0 && buf[--i] != '\x01') /* skip accents */
246  ;
247  return i; /* only basechars left */
248 }
249 
250 static int term_102_icu(zebra_map_t zm,
251  const char **src, WRBUF term_dict, int space_split,
252  WRBUF display_term)
253 {
254  int no_terms = 0;
255  const char *s0 = *src, *s1;
256  while (*s0 == ' ')
257  s0++;
258  s1 = s0;
259  for (;;)
260  {
261  if (*s1 == ' ' && space_split)
262  break;
263  else if (*s1 && !strchr(REGEX_CHARS "-", *s1))
264  s1++;
265  else
266  {
267  /* EOF or regex reserved char */
268  if (s0 != s1)
269  {
270  const char *res_buf = 0;
271  size_t res_len = 0;
272  const char *display_buf;
273  size_t display_len;
274 
275  zebra_map_tokenize_start(zm, s0, s1 - s0);
276 
277  if (zebra_map_tokenize_next(zm, &res_buf, &res_len,
278  &display_buf, &display_len))
279  {
280  size_t i;
281  res_len = icu_basechars(res_buf, res_len);
282  for (i = 0; i < res_len; i++)
283  {
284  if (strchr(REGEX_CHARS "\\", res_buf[i]))
285  wrbuf_putc(term_dict, '\\');
286  if (res_buf[i] < 32)
287  wrbuf_putc(term_dict, '\x01');
288 
289  wrbuf_putc(term_dict, res_buf[i]);
290  }
291  wrbuf_write(display_term, display_buf, display_len);
292 
293  no_terms++;
294  }
295  }
296  if (*s1 == '\0')
297  break;
298 
299  wrbuf_putc(term_dict, *s1);
300  wrbuf_putc(display_term, *s1);
301 
302  s1++;
303  s0 = s1;
304  }
305  }
306  if (no_terms)
307  wrbuf_puts(term_dict, "\x01\x01.*");
308  *src = s1;
309  return no_terms;
310 }
311 
312 static int term_100_icu(zebra_map_t zm,
313  const char **src, WRBUF term_dict,
314  WRBUF display_term,
315  int mode,
316  size_t token_number)
317 {
318  size_t i;
319  const char *res_buf = 0;
320  size_t res_len = 0;
321  const char *display_buf;
322  size_t display_len;
323 
324  zebra_map_tokenize_start(zm, *src, strlen(*src));
325  for (i = 0; i <= token_number; i++)
326  {
327  if (!zebra_map_tokenize_next(zm, &res_buf, &res_len,
328  &display_buf, &display_len))
329  return 0;
330  }
331  wrbuf_write(display_term, display_buf, display_len);
332  if (mode)
333  {
334  res_len = icu_basechars(res_buf, res_len);
335  }
336  if (mode & 2)
337  wrbuf_puts(term_dict, ".*");
338  for (i = 0; i < res_len; i++)
339  {
340  if (strchr(REGEX_CHARS "\\", res_buf[i]))
341  wrbuf_putc(term_dict, '\\');
342  if (res_buf[i] < 32)
343  wrbuf_putc(term_dict, '\x01');
344 
345  wrbuf_putc(term_dict, res_buf[i]);
346  }
347  if (mode & 1)
348  wrbuf_puts(term_dict, ".*");
349  else if (mode)
350  wrbuf_puts(term_dict, "\x01\x01.*");
351  return 1;
352 }
353 
354 /* term_100: handle term, where trunc = none(no operators at all) */
355 static int term_100(zebra_map_t zm,
356  const char **src, WRBUF term_dict, int space_split,
357  WRBUF display_term)
358 {
359  const char *s0;
360  const char **map;
361  int i = 0;
362 
363  const char *space_start = 0;
364  const char *space_end = 0;
365 
366  if (!term_pre(zm, src, 0, !space_split))
367  return 0;
368  s0 = *src;
369  while (*s0)
370  {
371  const char *s1 = s0;
372  int q_map_match = 0;
373  map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
374  if (space_split)
375  {
376  if (**map == *CHR_SPACE)
377  break;
378  }
379  else /* complete subfield only. */
380  {
381  if (**map == *CHR_SPACE)
382  { /* save space mapping for later .. */
383  space_start = s1;
384  space_end = s0;
385  continue;
386  }
387  else if (space_start)
388  { /* reload last space */
389  while (space_start < space_end)
390  {
391  if (strchr(REGEX_CHARS, *space_start))
392  wrbuf_putc(term_dict, '\\');
393  wrbuf_putc(display_term, *space_start);
394  wrbuf_putc(term_dict, *space_start);
395  space_start++;
396 
397  }
398  /* and reset */
399  space_start = space_end = 0;
400  }
401  }
402  i++;
403 
404  add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
405  }
406  *src = s0;
407  return i;
408 }
409 
410 /* term_101: handle term, where trunc = Process # */
411 static int term_101(zebra_map_t zm,
412  const char **src, WRBUF term_dict, int space_split,
413  WRBUF display_term)
414 {
415  const char *s0;
416  const char **map;
417  int i = 0;
418 
419  if (!term_pre(zm, src, "#", !space_split))
420  return 0;
421  s0 = *src;
422  while (*s0)
423  {
424  if (*s0 == '#')
425  {
426  i++;
427  wrbuf_puts(term_dict, ".*");
428  wrbuf_putc(display_term, *s0);
429  s0++;
430  }
431  else
432  {
433  const char *s1 = s0;
434  int q_map_match = 0;
435  map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
436  if (space_split && **map == *CHR_SPACE)
437  break;
438 
439  i++;
440  add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
441  }
442  }
443  *src = s0;
444  return i;
445 }
446 
447 /* term_103: handle term, where trunc = re-2 (regular expressions) */
448 static int term_103(zebra_map_t zm, const char **src,
449  WRBUF term_dict, int *errors, int space_split,
450  WRBUF display_term)
451 {
452  int i = 0;
453  const char *s0;
454  const char **map;
455 
456  if (!term_pre(zm, src, "^\\()[].*+?|", !space_split))
457  return 0;
458  s0 = *src;
459  if (errors && *s0 == '+' && s0[1] && s0[2] == '+' && s0[3] &&
460  isdigit(((const unsigned char *)s0)[1]))
461  {
462  *errors = s0[1] - '0';
463  s0 += 3;
464  if (*errors > 3)
465  *errors = 3;
466  }
467  while (*s0)
468  {
469  if (strchr("^\\()[].*+?|-", *s0))
470  {
471  wrbuf_putc(display_term, *s0);
472  wrbuf_putc(term_dict, *s0);
473  s0++;
474  i++;
475  }
476  else
477  {
478  const char *s1 = s0;
479  int q_map_match = 0;
480  map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
481  if (space_split && **map == *CHR_SPACE)
482  break;
483 
484  i++;
485  add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
486  }
487  }
488  *src = s0;
489 
490  return i;
491 }
492 
493 /* term_103: handle term, where trunc = re-1 (regular expressions) */
494 static int term_102(zebra_map_t zm, const char **src,
495  WRBUF term_dict, int space_split, WRBUF display_term)
496 {
497  return term_103(zm, src, term_dict, NULL, space_split, display_term);
498 }
499 
500 
501 /* term_104: handle term, process ?n * # */
502 static int term_104(zebra_map_t zm, const char **src,
503  WRBUF term_dict, int space_split, WRBUF display_term)
504 {
505  const char *s0;
506  const char **map;
507  int i = 0;
508 
509  if (!term_pre(zm, src, "?*#", !space_split))
510  return 0;
511  s0 = *src;
512  while (*s0)
513  {
514  if (*s0 == '?')
515  {
516  i++;
517  wrbuf_putc(display_term, *s0);
518  s0++;
519  if (*s0 >= '0' && *s0 <= '9')
520  {
521  int limit = 0;
522  while (*s0 >= '0' && *s0 <= '9')
523  {
524  limit = limit * 10 + (*s0 - '0');
525  wrbuf_putc(display_term, *s0);
526  s0++;
527  }
528  if (limit > 20)
529  limit = 20;
530  while (--limit >= 0)
531  {
532  wrbuf_puts(term_dict, ".?");
533  }
534  }
535  else
536  {
537  wrbuf_puts(term_dict, ".*");
538  }
539  }
540  else if (*s0 == '*')
541  {
542  i++;
543  wrbuf_puts(term_dict, ".*");
544  wrbuf_putc(display_term, *s0);
545  s0++;
546  }
547  else if (*s0 == '#')
548  {
549  i++;
550  wrbuf_puts(term_dict, ".");
551  wrbuf_putc(display_term, *s0);
552  s0++;
553  }
554  else
555  {
556  const char *s1 = s0;
557  int q_map_match = 0;
558  map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
559  if (space_split && **map == *CHR_SPACE)
560  break;
561 
562  i++;
563  add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
564  }
565  }
566  *src = s0;
567  return i;
568 }
569 
570 /* term_105/106: handle term, process * ! and possibly right_truncate */
571 static int term_105(zebra_map_t zm, const char **src,
572  WRBUF term_dict, int space_split,
573  WRBUF display_term, int right_truncate)
574 {
575  const char *s0;
576  const char **map;
577  int i = 0;
578 
579  if (!term_pre(zm, src, "\\*!", !space_split))
580  return 0;
581  s0 = *src;
582  while (*s0)
583  {
584  if (*s0 == '*')
585  {
586  i++;
587  wrbuf_puts(term_dict, ".*");
588  wrbuf_putc(display_term, *s0);
589  s0++;
590  }
591  else if (*s0 == '!')
592  {
593  i++;
594  wrbuf_putc(term_dict, '.');
595  wrbuf_putc(display_term, *s0);
596  s0++;
597  }
598  else if (*s0 == '\\')
599  {
600  i++;
601  wrbuf_puts(term_dict, "\\\\");
602  wrbuf_putc(display_term, *s0);
603  s0++;
604  }
605  else
606  {
607  const char *s1 = s0;
608  int q_map_match = 0;
609  map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
610  if (space_split && **map == *CHR_SPACE)
611  break;
612 
613  i++;
614  add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
615  }
616  }
617  if (right_truncate)
618  wrbuf_puts(term_dict, ".*");
619  *src = s0;
620  return i;
621 }
622 
623 
624 /* gen_regular_rel - generate regular expression from relation
625  * val: border value (inclusive)
626  * islt: 1 if <=; 0 if >=.
627  */
628 static void gen_regular_rel(WRBUF dst, int val, int islt)
629 {
630  int w, d, i;
631  int pos = 0;
632  char numstr[20];
633 
634  yaz_log(YLOG_DEBUG, "gen_regular_rel. val=%d, islt=%d", val, islt);
635  if (val >= 0)
636  {
637  if (islt)
638  wrbuf_puts(dst, "(-[0-9]+|(");
639  else
640  wrbuf_puts(dst, "((");
641  }
642  else
643  {
644  if (!islt)
645  {
646  wrbuf_puts(dst, "([0-9]+|-(");
647  islt = 1;
648  }
649  else
650  {
651  wrbuf_puts(dst, "(-(");
652  islt = 0;
653  }
654  val = -val;
655  }
656  yaz_snprintf(numstr, sizeof(numstr), "%d", val);
657  for (w = strlen(numstr); --w >= 0; pos++)
658  {
659  d = numstr[w];
660  if (pos > 0)
661  {
662  if (islt)
663  {
664  if (d == '0')
665  continue;
666  d--;
667  }
668  else
669  {
670  if (d == '9')
671  continue;
672  d++;
673  }
674  }
675  wrbuf_write(dst, numstr, w);
676  if (islt)
677  {
678  if (d != '0')
679  {
680  wrbuf_putc(dst, '[');
681  wrbuf_putc(dst, '0');
682  wrbuf_putc(dst, '-');
683  wrbuf_putc(dst, d);
684  wrbuf_putc(dst, ']');
685  }
686  else
687  wrbuf_putc(dst, d);
688  }
689  else
690  {
691  if (d != '9')
692  {
693  wrbuf_putc(dst, '[');
694  wrbuf_putc(dst, d);
695  wrbuf_putc(dst, '-');
696  wrbuf_putc(dst, '9');
697  wrbuf_putc(dst, ']');
698  }
699  else
700  wrbuf_putc(dst, d);
701  }
702  for (i = 0; i < pos; i++)
703  wrbuf_puts(dst, "[0-9]");
704  wrbuf_putc(dst, '|');
705  }
706  if (islt)
707  {
708  /* match everything less than 10^(pos-1) */
709  wrbuf_puts(dst, "0*");
710  for (i = 1; i < pos; i++)
711  wrbuf_puts(dst, "[0-9]?");
712  }
713  else
714  {
715  /* match everything greater than 10^pos */
716  for (i = 0; i <= pos; i++)
717  wrbuf_puts(dst, "[0-9]");
718  wrbuf_puts(dst, "[0-9]*");
719  }
720  wrbuf_puts(dst, "))");
721 }
722 
723 void string_rel_add_char(WRBUF term_p, WRBUF wsrc, int *indx)
724 {
725  const char *src = wrbuf_cstr(wsrc);
726  if (src[*indx] == '\\')
727  {
728  wrbuf_putc(term_p, src[*indx]);
729  (*indx)++;
730  }
731  wrbuf_putc(term_p, src[*indx]);
732  (*indx)++;
733 }
734 
735 /*
736  * > abc ([b-].*|a[c-].*|ab[d-].*|abc.+)
737  * ([^-a].*|a[^-b].*ab[^-c].*|abc.+)
738  * >= abc ([b-].*|a[c-].*|ab[c-].*)
739  * ([^-a].*|a[^-b].*|ab[c-].*)
740  * < abc ([-0].*|a[-a].*|ab[-b].*)
741  * ([^a-].*|a[^b-].*|ab[^c-].*)
742  * <= abc ([-0].*|a[-a].*|ab[-b].*|abc)
743  * ([^a-].*|a[^b-].*|ab[^c-].*|abc)
744  */
745 static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
746  const char **term_sub, WRBUF term_dict,
747  const Odr_oid *attributeSet,
748  zebra_map_t zm, int space_split,
749  WRBUF display_term,
750  int *error_code)
751 {
752  AttrType relation;
753  int relation_value;
754  int i;
755  WRBUF term_component = wrbuf_alloc();
756 
757  attr_init_APT(&relation, zapt, 2);
758  relation_value = attr_find(&relation, NULL);
759 
760  *error_code = 0;
761  yaz_log(YLOG_DEBUG, "string relation value=%d", relation_value);
762  switch (relation_value)
763  {
764  case 1:
765  if (!term_100(zm, term_sub, term_component, space_split, display_term))
766  {
767  wrbuf_destroy(term_component);
768  return 0;
769  }
770  yaz_log(log_level_rpn, "Relation <");
771 
772  wrbuf_putc(term_dict, '(');
773  for (i = 0; i < wrbuf_len(term_component); )
774  {
775  int j = 0;
776 
777  if (i)
778  wrbuf_putc(term_dict, '|');
779  while (j < i)
780  string_rel_add_char(term_dict, term_component, &j);
781 
782  wrbuf_putc(term_dict, '[');
783 
784  wrbuf_putc(term_dict, '^');
785 
786  wrbuf_putc(term_dict, 1);
787  wrbuf_putc(term_dict, FIRST_IN_FIELD_CHAR);
788 
789  string_rel_add_char(term_dict, term_component, &i);
790  wrbuf_putc(term_dict, '-');
791 
792  wrbuf_putc(term_dict, ']');
793  wrbuf_putc(term_dict, '.');
794  wrbuf_putc(term_dict, '*');
795  }
796  wrbuf_putc(term_dict, ')');
797  break;
798  case 2:
799  if (!term_100(zm, term_sub, term_component, space_split, display_term))
800  {
801  wrbuf_destroy(term_component);
802  return 0;
803  }
804  yaz_log(log_level_rpn, "Relation <=");
805 
806  wrbuf_putc(term_dict, '(');
807  for (i = 0; i < wrbuf_len(term_component); )
808  {
809  int j = 0;
810 
811  while (j < i)
812  string_rel_add_char(term_dict, term_component, &j);
813  wrbuf_putc(term_dict, '[');
814 
815  wrbuf_putc(term_dict, '^');
816 
817  wrbuf_putc(term_dict, 1);
818  wrbuf_putc(term_dict, FIRST_IN_FIELD_CHAR);
819 
820  string_rel_add_char(term_dict, term_component, &i);
821  wrbuf_putc(term_dict, '-');
822 
823  wrbuf_putc(term_dict, ']');
824  wrbuf_putc(term_dict, '.');
825  wrbuf_putc(term_dict, '*');
826 
827  wrbuf_putc(term_dict, '|');
828  }
829  for (i = 0; i < wrbuf_len(term_component); )
830  string_rel_add_char(term_dict, term_component, &i);
831  wrbuf_putc(term_dict, ')');
832  break;
833  case 5:
834  if (!term_100(zm, term_sub, term_component, space_split, display_term))
835  {
836  wrbuf_destroy(term_component);
837  return 0;
838  }
839  yaz_log(log_level_rpn, "Relation >");
840 
841  wrbuf_putc(term_dict, '(');
842  for (i = 0; i < wrbuf_len(term_component); )
843  {
844  int j = 0;
845 
846  while (j < i)
847  string_rel_add_char(term_dict, term_component, &j);
848  wrbuf_putc(term_dict, '[');
849 
850  wrbuf_putc(term_dict, '^');
851  wrbuf_putc(term_dict, '-');
852  string_rel_add_char(term_dict, term_component, &i);
853 
854  wrbuf_putc(term_dict, ']');
855  wrbuf_putc(term_dict, '.');
856  wrbuf_putc(term_dict, '*');
857 
858  wrbuf_putc(term_dict, '|');
859  }
860  for (i = 0; i < wrbuf_len(term_component); )
861  string_rel_add_char(term_dict, term_component, &i);
862  wrbuf_putc(term_dict, '.');
863  wrbuf_putc(term_dict, '+');
864  wrbuf_putc(term_dict, ')');
865  break;
866  case 4:
867  if (!term_100(zm, term_sub, term_component, space_split, display_term))
868  {
869  wrbuf_destroy(term_component);
870  return 0;
871  }
872  yaz_log(log_level_rpn, "Relation >=");
873 
874  wrbuf_putc(term_dict, '(');
875  for (i = 0; i < wrbuf_len(term_component); )
876  {
877  int j = 0;
878 
879  if (i)
880  wrbuf_putc(term_dict, '|');
881  while (j < i)
882  string_rel_add_char(term_dict, term_component, &j);
883  wrbuf_putc(term_dict, '[');
884 
885  if (i < wrbuf_len(term_component)-1)
886  {
887  wrbuf_putc(term_dict, '^');
888  wrbuf_putc(term_dict, '-');
889  string_rel_add_char(term_dict, term_component, &i);
890  }
891  else
892  {
893  string_rel_add_char(term_dict, term_component, &i);
894  wrbuf_putc(term_dict, '-');
895  }
896  wrbuf_putc(term_dict, ']');
897  wrbuf_putc(term_dict, '.');
898  wrbuf_putc(term_dict, '*');
899  }
900  wrbuf_putc(term_dict, ')');
901  break;
902  case 3:
903  case 102:
904  case -1:
905  if (!**term_sub)
906  return 1;
907  yaz_log(log_level_rpn, "Relation =");
908  if (!term_100(zm, term_sub, term_component, space_split, display_term))
909  {
910  wrbuf_destroy(term_component);
911  return 0;
912  }
913  wrbuf_puts(term_dict, "(");
914  wrbuf_puts(term_dict, wrbuf_cstr(term_component));
915  wrbuf_puts(term_dict, ")");
916  break;
917  case 103:
918  yaz_log(log_level_rpn, "Relation always matches");
919  /* skip to end of term (we don't care what it is) */
920  while (**term_sub != '\0')
921  (*term_sub)++;
922  break;
923  default:
924  *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
925  wrbuf_destroy(term_component);
926  return 0;
927  }
928  wrbuf_destroy(term_component);
929  return 1;
930 }
931 
932 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
933  const char **term_sub,
934  WRBUF term_dict,
935  const Odr_oid *attributeSet, NMEM stream,
936  struct grep_info *grep_info,
937  const char *index_type, int complete_flag,
938  WRBUF display_term,
939  const char *xpath_use,
940  struct ord_list **ol,
941  zebra_map_t zm, size_t token_number);
942 
944  Z_AttributesPlusTerm *zapt,
945  zint *hits_limit_value,
946  const char **term_ref_id_str,
947  NMEM nmem)
948 {
949  AttrType term_ref_id_attr;
950  AttrType hits_limit_attr;
951  int term_ref_id_int;
952  zint hits_limit_from_attr;
953 
954  attr_init_APT(&hits_limit_attr, zapt, 11);
955  hits_limit_from_attr = attr_find(&hits_limit_attr, NULL);
956 
957  attr_init_APT(&term_ref_id_attr, zapt, 10);
958  term_ref_id_int = attr_find_ex(&term_ref_id_attr, NULL, term_ref_id_str);
959  if (term_ref_id_int >= 0)
960  {
961  char *res = nmem_malloc(nmem, 20);
962  yaz_snprintf(res, 20, "%d", term_ref_id_int);
963  *term_ref_id_str = res;
964  }
965  if (hits_limit_from_attr != -1)
966  *hits_limit_value = hits_limit_from_attr;
967 
968  yaz_log(YLOG_DEBUG, "term_limits_APT ref_id=%s limit=" ZINT_FORMAT,
969  *term_ref_id_str ? *term_ref_id_str : "none",
970  *hits_limit_value);
971  return ZEBRA_OK;
972 }
973 
977  Z_AttributesPlusTerm *zapt,
978  const char **term_sub,
979  const Odr_oid *attributeSet,
980  zint hits_limit, NMEM stream,
981  struct grep_info *grep_info,
982  const char *index_type, int complete_flag,
983  const char *rank_type,
984  const char *xpath_use,
985  NMEM rset_nmem,
986  RSET *rset,
987  struct rset_key_control *kc,
988  zebra_map_t zm,
989  size_t token_number)
990 {
991  ZEBRA_RES res;
992  struct ord_list *ol;
993  zint hits_limit_value = hits_limit;
994  const char *term_ref_id_str = 0;
995  WRBUF term_dict = wrbuf_alloc();
996  WRBUF display_term = wrbuf_alloc();
997  *rset = 0;
998  zebra_term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str,
999  stream);
1000  grep_info->isam_p_indx = 0;
1001  res = string_term(zh, zapt, term_sub, term_dict,
1002  attributeSet, stream, grep_info,
1003  index_type, complete_flag,
1004  display_term, xpath_use, &ol, zm, token_number);
1005  wrbuf_destroy(term_dict);
1006  if (res == ZEBRA_OK && *term_sub)
1007  {
1008  yaz_log(log_level_rpn, "term: %s", wrbuf_cstr(display_term));
1010  grep_info->isam_p_indx, wrbuf_buf(display_term),
1011  wrbuf_len(display_term), rank_type,
1012  1 /* preserve pos */,
1013  zapt->term->which, rset_nmem,
1014  kc, kc->scope, ol, index_type, hits_limit_value,
1015  term_ref_id_str);
1016  if (!*rset)
1017  res = ZEBRA_FAIL;
1018  }
1019  wrbuf_destroy(display_term);
1020  return res;
1021 }
1022 
1023 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1024  const char **term_sub,
1025  WRBUF term_dict,
1026  const Odr_oid *attributeSet, NMEM stream,
1027  struct grep_info *grep_info,
1028  const char *index_type, int complete_flag,
1029  WRBUF display_term,
1030  const char *xpath_use,
1031  struct ord_list **ol,
1032  zebra_map_t zm, size_t token_number)
1033 {
1034  int r;
1035  AttrType truncation;
1036  int truncation_value;
1037  const char *termp;
1038  struct rpn_char_map_info rcmi;
1039 
1040  int space_split = complete_flag ? 0 : 1;
1041  int ord = -1;
1042  int regex_range = 0;
1043  int max_pos, prefix_len = 0;
1044  int relation_error;
1045  char ord_buf[32];
1046  int ord_len, i;
1047 
1048  *ol = ord_list_create(stream);
1049 
1050  rpn_char_map_prepare(zh->reg, zm, &rcmi);
1051  attr_init_APT(&truncation, zapt, 5);
1052  truncation_value = attr_find(&truncation, NULL);
1053  yaz_log(log_level_rpn, "truncation value %d", truncation_value);
1054 
1055  termp = *term_sub; /* start of term for each database */
1056 
1057  if (zebra_apt_get_ord(zh, zapt, index_type, xpath_use,
1058  attributeSet, &ord) != ZEBRA_OK)
1059  {
1060  *term_sub = 0;
1061  return ZEBRA_FAIL;
1062  }
1063 
1064  wrbuf_rewind(term_dict); /* new dictionary regexp term */
1065 
1066  *ol = ord_list_append(stream, *ol, ord);
1067  ord_len = key_SU_encode(ord, ord_buf);
1068 
1069  wrbuf_putc(term_dict, '(');
1070 
1071  for (i = 0; i < ord_len; i++)
1072  {
1073  wrbuf_putc(term_dict, 1); /* our internal regexp escape char */
1074  wrbuf_putc(term_dict, ord_buf[i]);
1075  }
1076  wrbuf_putc(term_dict, ')');
1077 
1078  prefix_len = wrbuf_len(term_dict);
1079 
1080  if (zebra_maps_is_icu(zm))
1081  {
1082  int relation_value;
1083  AttrType relation;
1084 
1085  attr_init_APT(&relation, zapt, 2);
1086  relation_value = attr_find(&relation, NULL);
1087  if (relation_value == 103) /* always matches */
1088  termp += strlen(termp); /* move to end of term */
1089  else if (relation_value == 3 || relation_value == 102 || relation_value == -1)
1090  {
1091  /* ICU case */
1092  switch (truncation_value)
1093  {
1094  case -1: /* not specified */
1095  case 100: /* do not truncate */
1096  if (!term_100_icu(zm, &termp, term_dict, display_term, 0, token_number))
1097  {
1098  *term_sub = 0;
1099  return ZEBRA_OK;
1100  }
1101  break;
1102  case 102:
1103  if (!term_102_icu(zm, &termp, term_dict, space_split, display_term))
1104  {
1105  *term_sub = 0;
1106  return ZEBRA_OK;
1107  }
1108  break;
1109  case 1: /* right truncation */
1110  if (!term_100_icu(zm, &termp, term_dict, display_term, 1, token_number))
1111  {
1112  *term_sub = 0;
1113  return ZEBRA_OK;
1114  }
1115  break;
1116  case 2:
1117  if (!term_100_icu(zm, &termp, term_dict, display_term, 2, token_number))
1118  {
1119  *term_sub = 0;
1120  return ZEBRA_OK;
1121  }
1122  break;
1123  case 3:
1124  if (!term_100_icu(zm, &termp, term_dict, display_term, 3, token_number))
1125  {
1126  *term_sub = 0;
1127  return ZEBRA_OK;
1128  }
1129  break;
1130  default:
1132  YAZ_BIB1_UNSUPP_TRUNCATION_ATTRIBUTE,
1133  truncation_value);
1134  return ZEBRA_FAIL;
1135  }
1136  }
1137  else
1138  {
1140  YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE,
1141  relation_value);
1142  return ZEBRA_FAIL;
1143  }
1144  }
1145  else
1146  {
1147  /* non-ICU case. using string.chr and friends */
1148  switch (truncation_value)
1149  {
1150  case -1: /* not specified */
1151  case 100: /* do not truncate */
1152  if (!string_relation(zh, zapt, &termp, term_dict,
1153  attributeSet,
1154  zm, space_split, display_term,
1155  &relation_error))
1156  {
1157  if (relation_error)
1158  {
1159  zebra_setError(zh, relation_error, 0);
1160  return ZEBRA_FAIL;
1161  }
1162  *term_sub = 0;
1163  return ZEBRA_OK;
1164  }
1165  break;
1166  case 1: /* right truncation */
1167  wrbuf_putc(term_dict, '(');
1168  if (!term_100(zm, &termp, term_dict, space_split, display_term))
1169  {
1170  *term_sub = 0;
1171  return ZEBRA_OK;
1172  }
1173  wrbuf_puts(term_dict, ".*)");
1174  break;
1175  case 2: /* left truncation */
1176  wrbuf_puts(term_dict, "(.*");
1177  if (!term_100(zm, &termp, term_dict, space_split, display_term))
1178  {
1179  *term_sub = 0;
1180  return ZEBRA_OK;
1181  }
1182  wrbuf_putc(term_dict, ')');
1183  break;
1184  case 3: /* left&right truncation */
1185  wrbuf_puts(term_dict, "(.*");
1186  if (!term_100(zm, &termp, term_dict, space_split, display_term))
1187  {
1188  *term_sub = 0;
1189  return ZEBRA_OK;
1190  }
1191  wrbuf_puts(term_dict, ".*)");
1192  break;
1193  case 101: /* process # in term */
1194  wrbuf_putc(term_dict, '(');
1195  if (!term_101(zm, &termp, term_dict, space_split, display_term))
1196  {
1197  *term_sub = 0;
1198  return ZEBRA_OK;
1199  }
1200  wrbuf_puts(term_dict, ")");
1201  break;
1202  case 102: /* Regexp-1 */
1203  wrbuf_putc(term_dict, '(');
1204  if (!term_102(zm, &termp, term_dict, space_split, display_term))
1205  {
1206  *term_sub = 0;
1207  return ZEBRA_OK;
1208  }
1209  wrbuf_putc(term_dict, ')');
1210  break;
1211  case 103: /* Regexp-2 */
1212  regex_range = 1;
1213  wrbuf_putc(term_dict, '(');
1214  if (!term_103(zm, &termp, term_dict, &regex_range,
1215  space_split, display_term))
1216  {
1217  *term_sub = 0;
1218  return ZEBRA_OK;
1219  }
1220  wrbuf_putc(term_dict, ')');
1221  break;
1222  case 104: /* process ?n * # term */
1223  wrbuf_putc(term_dict, '(');
1224  if (!term_104(zm, &termp, term_dict, space_split, display_term))
1225  {
1226  *term_sub = 0;
1227  return ZEBRA_OK;
1228  }
1229  wrbuf_putc(term_dict, ')');
1230  break;
1231  case 105: /* process * ! in term and right truncate */
1232  wrbuf_putc(term_dict, '(');
1233  if (!term_105(zm, &termp, term_dict, space_split, display_term, 1))
1234  {
1235  *term_sub = 0;
1236  return ZEBRA_OK;
1237  }
1238  wrbuf_putc(term_dict, ')');
1239  break;
1240  case 106: /* process * ! in term */
1241  wrbuf_putc(term_dict, '(');
1242  if (!term_105(zm, &termp, term_dict, space_split, display_term, 0))
1243  {
1244  *term_sub = 0;
1245  return ZEBRA_OK;
1246  }
1247  wrbuf_putc(term_dict, ')');
1248  break;
1249  default:
1251  YAZ_BIB1_UNSUPP_TRUNCATION_ATTRIBUTE,
1252  truncation_value);
1253  return ZEBRA_FAIL;
1254  }
1255  }
1256  if (1)
1257  {
1258  char buf[1000];
1259  const char *input = wrbuf_cstr(term_dict) + prefix_len;
1260  esc_str(buf, sizeof(buf), input, strlen(input));
1261  }
1262  {
1263  WRBUF pr_wr = wrbuf_alloc();
1264 
1265  wrbuf_write_escaped(pr_wr, wrbuf_buf(term_dict), wrbuf_len(term_dict));
1266  yaz_log(YLOG_LOG, "dict_lookup_grep: %s", wrbuf_cstr(pr_wr));
1267  wrbuf_destroy(pr_wr);
1268  }
1269  r = dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict), regex_range,
1270  grep_info, &max_pos,
1271  ord_len /* number of "exact" chars */,
1272  grep_handle);
1273  if (r == 1)
1275  else if (r)
1276  yaz_log(YLOG_WARN, "dict_lookup_grep fail %d", r);
1277  *term_sub = termp;
1278  yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1279  return ZEBRA_OK;
1280 }
1281 
1282 
1283 
1285 {
1286 #ifdef TERM_COUNT
1287  xfree(grep_info->term_no);
1288 #endif
1289  xfree(grep_info->isam_p_buf);
1290 }
1291 
1293  Z_AttributesPlusTerm *zapt,
1294  struct grep_info *grep_info,
1295  const char *index_type)
1296 {
1297 #ifdef TERM_COUNT
1298  grep_info->term_no = 0;
1299 #endif
1300  grep_info->trunc_max = atoi(res_get_def(zh->res, "truncmax", "10000"));
1301  grep_info->isam_p_size = 0;
1302  grep_info->isam_p_buf = NULL;
1303  grep_info->zh = zh;
1304  grep_info->index_type = index_type;
1305  grep_info->termset = 0;
1306  if (zapt)
1307  {
1308  AttrType truncmax;
1309  int truncmax_value;
1310 
1311  attr_init_APT(&truncmax, zapt, 13);
1312  truncmax_value = attr_find(&truncmax, NULL);
1313  if (truncmax_value != -1)
1314  grep_info->trunc_max = truncmax_value;
1315  }
1316  if (zapt)
1317  {
1318  AttrType termset;
1319  int termset_value_numeric;
1320  const char *termset_value_string;
1321 
1322  attr_init_APT(&termset, zapt, 8);
1323  termset_value_numeric =
1324  attr_find_ex(&termset, NULL, &termset_value_string);
1325  if (termset_value_numeric != -1)
1326  {
1327  zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, "termset");
1328  return ZEBRA_FAIL;
1329  }
1330  }
1331  return ZEBRA_OK;
1332 }
1333 
1335  Z_AttributesPlusTerm *zapt,
1336  const char *termz,
1337  const Odr_oid *attributeSet,
1338  zint hits_limit,
1339  NMEM stream,
1340  const char *index_type, int complete_flag,
1341  const char *rank_type,
1342  const char *xpath_use,
1343  NMEM rset_nmem,
1344  RSET **result_sets, int *num_result_sets,
1345  struct rset_key_control *kc,
1346  zebra_map_t zm)
1347 {
1348  struct grep_info grep_info;
1349  const char *termp = termz;
1350  int alloc_sets = 0;
1351 
1352  *num_result_sets = 0;
1354  return ZEBRA_FAIL;
1355  while (1)
1356  {
1357  ZEBRA_RES res;
1358 
1359  if (alloc_sets == *num_result_sets)
1360  {
1361  int add = 10;
1362  RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) *
1363  sizeof(*rnew));
1364  if (alloc_sets)
1365  memcpy(rnew, *result_sets, alloc_sets * sizeof(*rnew));
1366  alloc_sets = alloc_sets + add;
1367  *result_sets = rnew;
1368  }
1369  res = search_term(zh, zapt, &termp, attributeSet, hits_limit,
1370  stream, &grep_info,
1371  index_type, complete_flag,
1372  rank_type,
1373  xpath_use, rset_nmem,
1374  &(*result_sets)[*num_result_sets],
1375  kc, zm,
1376  *num_result_sets);
1377  if (res != ZEBRA_OK)
1378  {
1379  int i;
1380  for (i = 0; i < *num_result_sets; i++)
1381  rset_delete((*result_sets)[i]);
1383  return res;
1384  }
1385  if ((*result_sets)[*num_result_sets] == 0)
1386  break;
1387  (*num_result_sets)++;
1388 
1389  if (!*termp)
1390  break;
1391  }
1393  return ZEBRA_OK;
1394 }
1395 
1413  Z_AttributesPlusTerm *zapt,
1414  const char *termz,
1415  const Odr_oid *attributeSet,
1416  zint hits_limit,
1417  NMEM stream,
1418  const char *index_type, int complete_flag,
1419  const char *rank_type,
1420  const char *xpath_use,
1421  NMEM rset_nmem,
1422  RSET **result_sets, int *num_result_sets,
1423  struct rset_key_control *kc)
1424 {
1426  return search_terms_chrmap(zh, zapt, termz, attributeSet, hits_limit,
1427  stream, index_type, complete_flag,
1428  rank_type, xpath_use,
1429  rset_nmem, result_sets, num_result_sets,
1430  kc, zm);
1431 }
1432 
1434  const char *unit,
1435  const char *term,
1436  NMEM rset_nmem,
1437  struct rset_key_control *kc)
1438 {
1440  WRBUF w = wrbuf_alloc();
1441  wrbuf_puts(w, ZEBRA_GROUP_INDEX_NAME);
1442  wrbuf_puts(w, unit);
1443  int ord = zebraExplain_lookup_attr_str(zh->reg->zei, cat, "0",
1444  wrbuf_cstr(w));
1445  wrbuf_destroy(w);
1446  if (ord == -1)
1447  return 0;
1448  char ord_buf[32];
1449  int ord_len = key_SU_encode(ord, ord_buf);
1450  char term_dict[100];
1451  memcpy(term_dict, ord_buf, ord_len);
1452  strcpy(term_dict + ord_len, term);
1453  char *val = dict_lookup(zh->reg->dict, term_dict);
1454  if (!val)
1455  return 0;
1456  ISAM_P isam_p;
1457  assert(*val == sizeof(ISAM_P));
1458  memcpy(&isam_p, val+1, sizeof(isam_p));
1459  return zebra_create_rset_isam(zh, rset_nmem, kc, kc->scope,
1460  isam_p, 0);
1461 }
1462 
1466  Z_AttributesPlusTerm *zapt,
1467  const Odr_oid *attributeSet,
1468  const char *index_type,
1469  NMEM rset_nmem,
1470  RSET *rset,
1471  struct rset_key_control *kc)
1472 {
1473  int position_value;
1474  AttrType position;
1475  int ord = -1;
1476  char ord_buf[32];
1477  char term_dict[100];
1478  int ord_len;
1479  char *val;
1480  ISAM_P isam_p;
1482 
1483  attr_init_APT(&position, zapt, 3);
1484  position_value = attr_find(&position, NULL);
1485  switch(position_value)
1486  {
1487  case 3:
1488  case -1:
1489  return ZEBRA_OK;
1490  case 1:
1491  case 2:
1492  break;
1493  default:
1494  zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
1495  position_value);
1496  return ZEBRA_FAIL;
1497  }
1498 
1499 
1501  {
1502  zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
1503  position_value);
1504  return ZEBRA_FAIL;
1505  }
1506 
1507  if (zebra_apt_get_ord(zh, zapt, index_type, 0,
1508  attributeSet, &ord) != ZEBRA_OK)
1509  {
1510  return ZEBRA_FAIL;
1511  }
1512  ord_len = key_SU_encode(ord, ord_buf);
1513  memcpy(term_dict, ord_buf, ord_len);
1514  strcpy(term_dict+ord_len, FIRST_IN_FIELD_STR);
1515  val = dict_lookup(zh->reg->dict, term_dict);
1516  if (val)
1517  {
1518  assert(*val == sizeof(ISAM_P));
1519  memcpy(&isam_p, val+1, sizeof(isam_p));
1520 
1521  *rset = zebra_create_rset_isam(zh, rset_nmem, kc, kc->scope,
1522  isam_p, 0);
1523  }
1524  return ZEBRA_OK;
1525 }
1526 
1530  Z_AttributesPlusTerm *zapt,
1531  const char *termz_org,
1532  const Odr_oid *attributeSet,
1533  zint hits_limit,
1534  NMEM stream,
1535  const char *index_type,
1536  int complete_flag,
1537  const char *rank_type,
1538  const char *xpath_use,
1539  NMEM rset_nmem,
1540  RSET *rset,
1541  struct rset_key_control *kc)
1542 {
1543  RSET *result_sets = 0;
1544  int num_result_sets = 0;
1545  ZEBRA_RES res =
1546  search_terms_list(zh, zapt, termz_org, attributeSet, hits_limit,
1547  stream, index_type, complete_flag,
1548  rank_type, xpath_use,
1549  rset_nmem,
1550  &result_sets, &num_result_sets, kc);
1551 
1552  if (res != ZEBRA_OK)
1553  return res;
1554 
1555  if (num_result_sets > 0)
1556  {
1557  RSET first_set = 0;
1558  res = search_position(zh, zapt, attributeSet,
1559  index_type,
1560  rset_nmem, &first_set,
1561  kc);
1562  if (res != ZEBRA_OK)
1563  {
1564  int i;
1565  for (i = 0; i < num_result_sets; i++)
1566  rset_delete(result_sets[i]);
1567  return res;
1568  }
1569  if (first_set)
1570  {
1571  RSET *nsets = nmem_malloc(stream,
1572  sizeof(RSET) * (num_result_sets+1));
1573  nsets[0] = first_set;
1574  memcpy(nsets+1, result_sets, sizeof(RSET) * num_result_sets);
1575  result_sets = nsets;
1576  num_result_sets++;
1577  }
1578  }
1579  if (num_result_sets == 0)
1580  *rset = rset_create_null(rset_nmem, kc, 0);
1581  else if (num_result_sets == 1)
1582  *rset = result_sets[0];
1583  else
1584  *rset = rset_create_prox(rset_nmem, kc, kc->scope,
1585  num_result_sets, result_sets,
1586  1 /* ordered */, 0 /* exclusion */,
1587  3 /* relation */, 1 /* distance */);
1588  if (!*rset)
1589  return ZEBRA_FAIL;
1590  return ZEBRA_OK;
1591 }
1592 
1596  Z_AttributesPlusTerm *zapt,
1597  const char *termz_org,
1598  const Odr_oid *attributeSet,
1599  zint hits_limit,
1600  NMEM stream,
1601  const char *index_type,
1602  int complete_flag,
1603  const char *rank_type,
1604  const char *xpath_use,
1605  NMEM rset_nmem,
1606  RSET *rset,
1607  struct rset_key_control *kc)
1608 {
1609  RSET *result_sets = 0;
1610  int num_result_sets = 0;
1611  int i;
1612  ZEBRA_RES res =
1613  search_terms_list(zh, zapt, termz_org, attributeSet, hits_limit,
1614  stream, index_type, complete_flag,
1615  rank_type, xpath_use,
1616  rset_nmem,
1617  &result_sets, &num_result_sets, kc);
1618  if (res != ZEBRA_OK)
1619  return res;
1620 
1621  for (i = 0; i < num_result_sets; i++)
1622  {
1623  RSET first_set = 0;
1624  res = search_position(zh, zapt, attributeSet,
1625  index_type,
1626  rset_nmem, &first_set,
1627  kc);
1628  if (res != ZEBRA_OK)
1629  {
1630  for (i = 0; i < num_result_sets; i++)
1631  rset_delete(result_sets[i]);
1632  return res;
1633  }
1634 
1635  if (first_set)
1636  {
1637  RSET tmp_set[2];
1638 
1639  tmp_set[0] = first_set;
1640  tmp_set[1] = result_sets[i];
1641 
1642  result_sets[i] = rset_create_prox(
1643  rset_nmem, kc, kc->scope,
1644  2, tmp_set,
1645  1 /* ordered */, 0 /* exclusion */,
1646  3 /* relation */, 1 /* distance */);
1647  }
1648  }
1649  if (num_result_sets == 0)
1650  *rset = rset_create_null(rset_nmem, kc, 0);
1651  else if (num_result_sets == 1)
1652  *rset = result_sets[0];
1653  else
1654  *rset = rset_create_or(rset_nmem, kc, kc->scope, 0 /* termid */,
1655  num_result_sets, result_sets);
1656  if (!*rset)
1657  return ZEBRA_FAIL;
1658  return ZEBRA_OK;
1659 }
1660 
1664  Z_AttributesPlusTerm *zapt,
1665  const char *termz_org,
1666  const Odr_oid *attributeSet,
1667  zint hits_limit,
1668  NMEM stream,
1669  const char *index_type,
1670  int complete_flag,
1671  const char *rank_type,
1672  const char *xpath_use,
1673  NMEM rset_nmem,
1674  RSET *rset,
1675  struct rset_key_control *kc)
1676 {
1677  RSET *result_sets = 0;
1678  int num_result_sets = 0;
1679  int i;
1680  ZEBRA_RES res =
1681  search_terms_list(zh, zapt, termz_org, attributeSet, hits_limit,
1682  stream, index_type, complete_flag,
1683  rank_type, xpath_use,
1684  rset_nmem,
1685  &result_sets, &num_result_sets,
1686  kc);
1687  if (res != ZEBRA_OK)
1688  return res;
1689  for (i = 0; i < num_result_sets; i++)
1690  {
1691  RSET first_set = 0;
1692  res = search_position(zh, zapt, attributeSet,
1693  index_type,
1694  rset_nmem, &first_set,
1695  kc);
1696  if (res != ZEBRA_OK)
1697  {
1698  for (i = 0; i < num_result_sets; i++)
1699  rset_delete(result_sets[i]);
1700  return res;
1701  }
1702 
1703  if (first_set)
1704  {
1705  RSET tmp_set[2];
1706 
1707  tmp_set[0] = first_set;
1708  tmp_set[1] = result_sets[i];
1709 
1710  result_sets[i] = rset_create_prox(
1711  rset_nmem, kc, kc->scope,
1712  2, tmp_set,
1713  1 /* ordered */, 0 /* exclusion */,
1714  3 /* relation */, 1 /* distance */);
1715  }
1716  }
1717 
1718 
1719  if (num_result_sets == 0)
1720  *rset = rset_create_null(rset_nmem, kc, 0);
1721  else if (num_result_sets == 1)
1722  *rset = result_sets[0];
1723  else
1724  *rset = rset_create_and(rset_nmem, kc, kc->scope,
1725  num_result_sets, result_sets);
1726  if (!*rset)
1727  return ZEBRA_FAIL;
1728  return ZEBRA_OK;
1729 }
1730 
1731 static int numeric_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1732  const char **term_sub,
1733  WRBUF term_dict,
1734  const Odr_oid *attributeSet,
1735  struct grep_info *grep_info,
1736  int *max_pos,
1737  zebra_map_t zm,
1738  WRBUF display_term,
1739  int *error_code)
1740 {
1741  AttrType relation;
1742  int relation_value;
1743  int term_value;
1744  int r;
1745  WRBUF term_num = wrbuf_alloc();
1746 
1747  *error_code = 0;
1748  attr_init_APT(&relation, zapt, 2);
1749  relation_value = attr_find(&relation, NULL);
1750 
1751  yaz_log(log_level_rpn, "numeric relation value=%d", relation_value);
1752 
1753  switch (relation_value)
1754  {
1755  case 1:
1756  yaz_log(log_level_rpn, "Relation <");
1757  if (!term_100(zm, term_sub, term_num, 1, display_term))
1758  {
1759  wrbuf_destroy(term_num);
1760  return 0;
1761  }
1762  term_value = atoi(wrbuf_cstr(term_num));
1763  gen_regular_rel(term_dict, term_value-1, 1);
1764  break;
1765  case 2:
1766  yaz_log(log_level_rpn, "Relation <=");
1767  if (!term_100(zm, term_sub, term_num, 1, display_term))
1768  {
1769  wrbuf_destroy(term_num);
1770  return 0;
1771  }
1772  term_value = atoi(wrbuf_cstr(term_num));
1773  gen_regular_rel(term_dict, term_value, 1);
1774  break;
1775  case 4:
1776  yaz_log(log_level_rpn, "Relation >=");
1777  if (!term_100(zm, term_sub, term_num, 1, display_term))
1778  {
1779  wrbuf_destroy(term_num);
1780  return 0;
1781  }
1782  term_value = atoi(wrbuf_cstr(term_num));
1783  gen_regular_rel(term_dict, term_value, 0);
1784  break;
1785  case 5:
1786  yaz_log(log_level_rpn, "Relation >");
1787  if (!term_100(zm, term_sub, term_num, 1, display_term))
1788  {
1789  wrbuf_destroy(term_num);
1790  return 0;
1791  }
1792  term_value = atoi(wrbuf_cstr(term_num));
1793  gen_regular_rel(term_dict, term_value+1, 0);
1794  break;
1795  case -1:
1796  case 102:
1797  case 3:
1798  yaz_log(log_level_rpn, "Relation =");
1799  if (!term_100(zm, term_sub, term_num, 1, display_term))
1800  {
1801  wrbuf_destroy(term_num);
1802  return 0;
1803  }
1804  term_value = atoi(wrbuf_cstr(term_num));
1805  wrbuf_printf(term_dict, "(0*%d)", term_value);
1806  break;
1807  case 103:
1808  /* term_tmp untouched.. */
1809  while (**term_sub != '\0')
1810  (*term_sub)++;
1811  break;
1812  default:
1813  *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
1814  wrbuf_destroy(term_num);
1815  return 0;
1816  }
1817  r = dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict),
1818  0, grep_info, max_pos, 0, grep_handle);
1819 
1820  if (r == 1)
1822  else if (r)
1823  yaz_log(YLOG_WARN, "dict_lookup_grep fail, rel = gt: %d", r);
1824  yaz_log(log_level_rpn, "%d positions", grep_info->isam_p_indx);
1825  wrbuf_destroy(term_num);
1826  return 1;
1827 }
1828 
1829 static ZEBRA_RES numeric_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1830  const char **term_sub,
1831  WRBUF term_dict,
1832  const Odr_oid *attributeSet, NMEM stream,
1833  struct grep_info *grep_info,
1834  const char *index_type, int complete_flag,
1835  WRBUF display_term,
1836  const char *xpath_use,
1837  struct ord_list **ol)
1838 {
1839  const char *termp;
1840  struct rpn_char_map_info rcmi;
1841  int max_pos;
1842  int relation_error = 0;
1843  int ord, ord_len, i;
1844  char ord_buf[32];
1845  zebra_map_t zm = zebra_map_get_or_add(zh->reg->zebra_maps, index_type);
1846 
1847  *ol = ord_list_create(stream);
1848 
1849  rpn_char_map_prepare(zh->reg, zm, &rcmi);
1850 
1851  termp = *term_sub;
1852 
1853  if (zebra_apt_get_ord(zh, zapt, index_type, xpath_use,
1854  attributeSet, &ord) != ZEBRA_OK)
1855  {
1856  return ZEBRA_FAIL;
1857  }
1858 
1859  wrbuf_rewind(term_dict);
1860 
1861  *ol = ord_list_append(stream, *ol, ord);
1862 
1863  ord_len = key_SU_encode(ord, ord_buf);
1864 
1865  wrbuf_putc(term_dict, '(');
1866  for (i = 0; i < ord_len; i++)
1867  {
1868  wrbuf_putc(term_dict, 1);
1869  wrbuf_putc(term_dict, ord_buf[i]);
1870  }
1871  wrbuf_putc(term_dict, ')');
1872 
1873  if (!numeric_relation(zh, zapt, &termp, term_dict,
1874  attributeSet, grep_info, &max_pos, zm,
1875  display_term, &relation_error))
1876  {
1877  if (relation_error)
1878  {
1879  zebra_setError(zh, relation_error, 0);
1880  return ZEBRA_FAIL;
1881  }
1882  *term_sub = 0;
1883  return ZEBRA_OK;
1884  }
1885  *term_sub = termp;
1886  yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1887  return ZEBRA_OK;
1888 }
1889 
1890 
1892  Z_AttributesPlusTerm *zapt,
1893  const char *termz,
1894  const Odr_oid *attributeSet,
1895  zint hits_limit,
1896  NMEM stream,
1897  const char *index_type,
1898  int complete_flag,
1899  const char *rank_type,
1900  const char *xpath_use,
1901  NMEM rset_nmem,
1902  RSET *rset,
1903  struct rset_key_control *kc)
1904 {
1905  const char *termp = termz;
1906  RSET *result_sets = 0;
1907  int num_result_sets = 0;
1908  ZEBRA_RES res;
1909  struct grep_info grep_info;
1910  int alloc_sets = 0;
1911  zint hits_limit_value = hits_limit;
1912  const char *term_ref_id_str = 0;
1913 
1914  zebra_term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str,
1915  stream);
1916 
1917  yaz_log(log_level_rpn, "APT_numeric t='%s'", termz);
1919  return ZEBRA_FAIL;
1920  while (1)
1921  {
1922  struct ord_list *ol;
1923  WRBUF term_dict = wrbuf_alloc();
1924  WRBUF display_term = wrbuf_alloc();
1925  if (alloc_sets == num_result_sets)
1926  {
1927  int add = 10;
1928  RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) *
1929  sizeof(*rnew));
1930  if (alloc_sets)
1931  memcpy(rnew, result_sets, alloc_sets * sizeof(*rnew));
1932  alloc_sets = alloc_sets + add;
1933  result_sets = rnew;
1934  }
1935  yaz_log(YLOG_DEBUG, "APT_numeric termp=%s", termp);
1936  grep_info.isam_p_indx = 0;
1937  res = numeric_term(zh, zapt, &termp, term_dict,
1938  attributeSet, stream, &grep_info,
1939  index_type, complete_flag,
1940  display_term, xpath_use, &ol);
1941  wrbuf_destroy(term_dict);
1942  if (res == ZEBRA_FAIL || termp == 0)
1943  {
1944  wrbuf_destroy(display_term);
1945  break;
1946  }
1947  yaz_log(YLOG_DEBUG, "term: %s", wrbuf_cstr(display_term));
1948  result_sets[num_result_sets] =
1950  grep_info.isam_p_indx, wrbuf_buf(display_term),
1951  wrbuf_len(display_term), rank_type,
1952  0 /* preserve position */,
1953  zapt->term->which, rset_nmem,
1954  kc, kc->scope, ol, index_type,
1955  hits_limit_value,
1956  term_ref_id_str);
1957  wrbuf_destroy(display_term);
1958  if (!result_sets[num_result_sets])
1959  break;
1960  num_result_sets++;
1961  if (!*termp)
1962  break;
1963  }
1965 
1966  if (res != ZEBRA_OK)
1967  return res;
1968  if (num_result_sets == 0)
1969  *rset = rset_create_null(rset_nmem, kc, 0);
1970  else if (num_result_sets == 1)
1971  *rset = result_sets[0];
1972  else
1973  *rset = rset_create_and(rset_nmem, kc, kc->scope,
1974  num_result_sets, result_sets);
1975  if (!*rset)
1976  return ZEBRA_FAIL;
1977  return ZEBRA_OK;
1978 }
1979 
1981  Z_AttributesPlusTerm *zapt,
1982  const char *termz,
1983  const Odr_oid *attributeSet,
1984  NMEM stream,
1985  const char *rank_type, NMEM rset_nmem,
1986  RSET *rset,
1987  struct rset_key_control *kc)
1988 {
1989  Record rec;
1990  zint sysno = atozint(termz);
1991 
1992  if (sysno <= 0)
1993  sysno = 0;
1994  rec = rec_get(zh->reg->records, sysno);
1995  if (!rec)
1996  sysno = 0;
1997 
1998  rec_free(&rec);
1999 
2000  if (sysno <= 0)
2001  {
2002  *rset = rset_create_null(rset_nmem, kc, 0);
2003  }
2004  else
2005  {
2006  RSFD rsfd;
2007  struct it_key key;
2008  *rset = rset_create_temp(rset_nmem, kc, kc->scope,
2009  res_get(zh->res, "setTmpDir"), 0);
2011 
2012  key.mem[0] = sysno;
2013  key.mem[1] = 1;
2014  key.len = 2;
2015  rset_write(rsfd, &key);
2016  rset_close(rsfd);
2017  }
2018  return ZEBRA_OK;
2019 }
2020 
2021 static ZEBRA_RES rpn_sort_spec(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2022  const Odr_oid *attributeSet, NMEM stream,
2023  Z_SortKeySpecList *sort_sequence,
2024  const char *rank_type,
2025  NMEM rset_nmem,
2026  RSET *rset,
2027  struct rset_key_control *kc)
2028 {
2029  int i;
2030  int sort_relation_value;
2031  AttrType sort_relation_type;
2032  Z_SortKeySpec *sks;
2033  Z_SortKey *sk;
2034  char termz[20];
2035 
2036  attr_init_APT(&sort_relation_type, zapt, 7);
2037  sort_relation_value = attr_find(&sort_relation_type, &attributeSet);
2038 
2039  if (!sort_sequence->specs)
2040  {
2041  sort_sequence->num_specs = 10;
2042  sort_sequence->specs = (Z_SortKeySpec **)
2043  nmem_malloc(stream, sort_sequence->num_specs *
2044  sizeof(*sort_sequence->specs));
2045  for (i = 0; i < sort_sequence->num_specs; i++)
2046  sort_sequence->specs[i] = 0;
2047  }
2048  if (zapt->term->which != Z_Term_general)
2049  i = 0;
2050  else
2051  i = atoi_n((char *) zapt->term->u.general->buf,
2052  zapt->term->u.general->len);
2053  if (i >= sort_sequence->num_specs)
2054  i = 0;
2055  yaz_snprintf(termz, sizeof(termz), "%d", i);
2056 
2057  sks = (Z_SortKeySpec *) nmem_malloc(stream, sizeof(*sks));
2058  sks->sortElement = (Z_SortElement *)
2059  nmem_malloc(stream, sizeof(*sks->sortElement));
2060  sks->sortElement->which = Z_SortElement_generic;
2061  sk = sks->sortElement->u.generic = (Z_SortKey *)
2062  nmem_malloc(stream, sizeof(*sk));
2063  sk->which = Z_SortKey_sortAttributes;
2064  sk->u.sortAttributes = (Z_SortAttributes *)
2065  nmem_malloc(stream, sizeof(*sk->u.sortAttributes));
2066 
2067  sk->u.sortAttributes->id = odr_oiddup_nmem(stream, attributeSet);
2068  sk->u.sortAttributes->list = zapt->attributes;
2069 
2070  sks->sortRelation = (Odr_int *)
2071  nmem_malloc(stream, sizeof(*sks->sortRelation));
2072  if (sort_relation_value == 1)
2073  *sks->sortRelation = Z_SortKeySpec_ascending;
2074  else if (sort_relation_value == 2)
2075  *sks->sortRelation = Z_SortKeySpec_descending;
2076  else
2077  *sks->sortRelation = Z_SortKeySpec_ascending;
2078 
2079  sks->caseSensitivity = (Odr_int *)
2080  nmem_malloc(stream, sizeof(*sks->caseSensitivity));
2081  *sks->caseSensitivity = 0;
2082 
2083  sks->which = Z_SortKeySpec_null;
2084  sks->u.null = odr_nullval ();
2085  sort_sequence->specs[i] = sks;
2086  *rset = rset_create_null(rset_nmem, kc, 0);
2087  return ZEBRA_OK;
2088 }
2089 
2090 
2091 static int rpn_check_xpath(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2092  const Odr_oid *attributeSet,
2093  struct xpath_location_step *xpath, int max,
2094  NMEM mem)
2095 {
2096  const Odr_oid *curAttributeSet = attributeSet;
2097  AttrType use;
2098  const char *use_string = 0;
2099 
2100  attr_init_APT(&use, zapt, 1);
2101  attr_find_ex(&use, &curAttributeSet, &use_string);
2102 
2103  if (!use_string || *use_string != '/')
2104  return -1;
2105 
2106  return zebra_parse_xpath_str(use_string, xpath, max, mem);
2107 }
2108 
2109 
2110 
2111 static RSET xpath_trunc(ZebraHandle zh, NMEM stream,
2112  const char *index_type, const char *term,
2113  const char *xpath_use,
2114  NMEM rset_nmem,
2115  struct rset_key_control *kc)
2116 {
2117  struct grep_info grep_info;
2118  int ord = zebraExplain_lookup_attr_str(zh->reg->zei,
2120  index_type, xpath_use);
2121  if (grep_info_prepare(zh, 0 /* zapt */, &grep_info, "0") == ZEBRA_FAIL)
2122  return rset_create_null(rset_nmem, kc, 0);
2123 
2124  if (ord < 0)
2125  return rset_create_null(rset_nmem, kc, 0);
2126  else
2127  {
2128  int i, max_pos;
2129  char ord_buf[32];
2130  RSET rset;
2131  WRBUF term_dict = wrbuf_alloc();
2132  int ord_len = key_SU_encode(ord, ord_buf);
2133  int term_type = Z_Term_characterString;
2134  const char *flags = "void";
2135 
2136  wrbuf_putc(term_dict, '(');
2137  for (i = 0; i < ord_len; i++)
2138  {
2139  wrbuf_putc(term_dict, 1);
2140  wrbuf_putc(term_dict, ord_buf[i]);
2141  }
2142  wrbuf_putc(term_dict, ')');
2143  wrbuf_puts(term_dict, term);
2144 
2145  grep_info.isam_p_indx = 0;
2146  dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict), 0,
2147  &grep_info, &max_pos, 0, grep_handle);
2148  yaz_log(YLOG_DEBUG, "%s %d positions", term,
2151  grep_info.isam_p_indx, term, strlen(term),
2152  flags, 1, term_type, rset_nmem,
2153  kc, kc->scope, 0, index_type, 0 /* hits_limit */,
2154  0 /* term_ref_id_str */);
2156  wrbuf_destroy(term_dict);
2157  return rset;
2158  }
2159 }
2160 
2161 static
2163  NMEM stream, const char *rank_type, RSET rset,
2164  int xpath_len, struct xpath_location_step *xpath,
2165  NMEM rset_nmem,
2166  RSET *rset_out,
2167  struct rset_key_control *kc)
2168 {
2169  int i;
2170  int always_matches = rset ? 0 : 1;
2171 
2172  if (xpath_len < 0)
2173  {
2174  *rset_out = rset;
2175  return ZEBRA_OK;
2176  }
2177 
2178  yaz_log(YLOG_DEBUG, "xpath len=%d", xpath_len);
2179  for (i = 0; i < xpath_len; i++)
2180  {
2181  yaz_log(log_level_rpn, "XPATH %d %s", i, xpath[i].part);
2182 
2183  }
2184 
2185  /*
2186  //a -> a/.*
2187  //a/b -> b/a/.*
2188  /a -> a/
2189  /a/b -> b/a/
2190 
2191  / -> none
2192 
2193  a[@attr = value]/b[@other = othervalue]
2194 
2195  /e/@a val range(e/,range(@a,freetext(w,1015,val),@a),e/)
2196  /a/b val range(b/a/,freetext(w,1016,val),b/a/)
2197  /a/b/@c val range(b/a/,range(@c,freetext(w,1016,val),@c),b/a/)
2198  /a/b[@c = y] val range(b/a/,freetext(w,1016,val),b/a/,@c = y)
2199  /a[@c = y]/b val range(a/,range(b/a/,freetext(w,1016,val),b/a/),a/,@c = y)
2200  /a[@c = x]/b[@c = y] range(a/,range(b/a/,freetext(w,1016,val),b/a/,@c = y),a/,@c = x)
2201 
2202  */
2203 
2204  dict_grep_cmap(zh->reg->dict, 0, 0);
2205 
2206  {
2207  int level = xpath_len;
2208  int first_path = 1;
2209 
2210  while (--level >= 0)
2211  {
2212  WRBUF xpath_rev = wrbuf_alloc();
2213  int i;
2214  RSET rset_start_tag = 0, rset_end_tag = 0, rset_attr = 0;
2215 
2216  for (i = level; i >= 1; --i)
2217  {
2218  const char *cp = xpath[i].part;
2219  if (*cp)
2220  {
2221  for (; *cp; cp++)
2222  {
2223  if (*cp == '*')
2224  wrbuf_puts(xpath_rev, "[^/]*");
2225  else if (*cp == ' ')
2226  wrbuf_puts(xpath_rev, "\001 ");
2227  else
2228  wrbuf_putc(xpath_rev, *cp);
2229 
2230  /* wrbuf_putc does not null-terminate , but
2231  wrbuf_puts below ensures it does.. so xpath_rev
2232  is OK iff length is > 0 */
2233  }
2234  wrbuf_puts(xpath_rev, "/");
2235  }
2236  else if (i == 1) /* // case */
2237  wrbuf_puts(xpath_rev, ".*");
2238  }
2239  if (xpath[level].predicate &&
2240  xpath[level].predicate->which == XPATH_PREDICATE_RELATION &&
2241  xpath[level].predicate->u.relation.name[0])
2242  {
2243  WRBUF wbuf = wrbuf_alloc();
2244  wrbuf_puts(wbuf, xpath[level].predicate->u.relation.name+1);
2245  if (xpath[level].predicate->u.relation.value)
2246  {
2247  const char *cp = xpath[level].predicate->u.relation.value;
2248  wrbuf_putc(wbuf, '=');
2249 
2250  while (*cp)
2251  {
2252  if (strchr(REGEX_CHARS, *cp))
2253  wrbuf_putc(wbuf, '\\');
2254  wrbuf_putc(wbuf, *cp);
2255  cp++;
2256  }
2257  }
2258  rset_attr = xpath_trunc(
2259  zh, stream, "0", wrbuf_cstr(wbuf),
2261  rset_nmem, kc);
2262  wrbuf_destroy(wbuf);
2263  }
2264  else
2265  {
2266  if (!first_path)
2267  {
2268  wrbuf_destroy(xpath_rev);
2269  continue;
2270  }
2271  }
2272  yaz_log(log_level_rpn, "xpath_rev (%d) = %s", level,
2273  wrbuf_cstr(xpath_rev));
2274  if (wrbuf_len(xpath_rev))
2275  {
2276  rset_start_tag = xpath_trunc(zh, stream, "0",
2277  wrbuf_cstr(xpath_rev),
2279  rset_nmem, kc);
2280  if (always_matches)
2281  rset = rset_start_tag;
2282  else
2283  {
2284  rset_end_tag = xpath_trunc(zh, stream, "0",
2285  wrbuf_cstr(xpath_rev),
2287  rset_nmem, kc);
2288 
2289  rset = rset_create_between(rset_nmem, kc, kc->scope,
2290  rset_start_tag, rset, NULL,
2291  rset_end_tag, rset_attr);
2292  }
2293  }
2294  wrbuf_destroy(xpath_rev);
2295  first_path = 0;
2296  }
2297  }
2298  *rset_out = rset;
2299  return ZEBRA_OK;
2300 }
2301 
2302 #define MAX_XPATH_STEPS 10
2303 
2305  Z_AttributesPlusTerm *zapt,
2306  const Odr_oid *attributeSet,
2307  zint hits_limit, NMEM stream,
2308  Z_SortKeySpecList *sort_sequence,
2309  NMEM rset_nmem,
2310  RSET *rset,
2311  struct rset_key_control *kc);
2312 
2313 static ZEBRA_RES rpn_search_APT(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2314  const Odr_oid *attributeSet,
2315  zint hits_limit, NMEM stream,
2316  Z_SortKeySpecList *sort_sequence,
2317  int num_bases, const char **basenames,
2318  NMEM rset_nmem,
2319  RSET *rset,
2320  struct rset_key_control *kc)
2321 {
2322  RSET *rsets = nmem_malloc(stream, num_bases * sizeof(*rsets));
2323  ZEBRA_RES res = ZEBRA_OK;
2324  int i;
2325  for (i = 0; i < num_bases; i++)
2326  {
2327 
2328  if (zebraExplain_curDatabase(zh->reg->zei, basenames[i]))
2329  {
2330  zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
2331  basenames[i]);
2332  res = ZEBRA_FAIL;
2333  break;
2334  }
2335  res = rpn_search_database(zh, zapt, attributeSet, hits_limit, stream,
2336  sort_sequence,
2337  rset_nmem, rsets+i, kc);
2338  if (res != ZEBRA_OK)
2339  break;
2340  }
2341  if (res != ZEBRA_OK)
2342  { /* must clean up the already created sets */
2343  while (--i >= 0)
2344  rset_delete(rsets[i]);
2345  *rset = 0;
2346  }
2347  else
2348  {
2349  if (num_bases == 1)
2350  *rset = rsets[0];
2351  else if (num_bases == 0)
2352  *rset = rset_create_null(rset_nmem, kc, 0);
2353  else
2354  *rset = rset_create_or(rset_nmem, kc, kc->scope, 0 /* TERMID */,
2355  num_bases, rsets);
2356  }
2357  return res;
2358 }
2359 
2361  Z_AttributesPlusTerm *zapt,
2362  const Odr_oid *attributeSet,
2363  zint hits_limit, NMEM stream,
2364  Z_SortKeySpecList *sort_sequence,
2365  NMEM rset_nmem,
2366  RSET *rset,
2367  struct rset_key_control *kc)
2368 {
2369  ZEBRA_RES res = ZEBRA_OK;
2370  const char *index_type;
2371  char *search_type = NULL;
2372  char rank_type[128];
2373  int complete_flag;
2374  int sort_flag;
2375  char termz[IT_MAX_WORD+1];
2376  int xpath_len;
2377  const char *xpath_use = 0;
2378  struct xpath_location_step xpath[MAX_XPATH_STEPS];
2379 
2380  if (!log_level_set)
2381  {
2382  log_level_rpn = yaz_log_module_level("rpn");
2383  log_level_set = 1;
2384  }
2385  zebra_maps_attr(zh->reg->zebra_maps, zapt, &index_type, &search_type,
2386  rank_type, &complete_flag, &sort_flag);
2387 
2388  yaz_log(YLOG_DEBUG, "index_type=%s", index_type);
2389  yaz_log(YLOG_DEBUG, "complete_flag=%d", complete_flag);
2390  yaz_log(YLOG_DEBUG, "search_type=%s", search_type);
2391  yaz_log(YLOG_DEBUG, "rank_type=%s", rank_type);
2392 
2393  if (zapt_term_to_utf8(zh, zapt, termz) == ZEBRA_FAIL)
2394  return ZEBRA_FAIL;
2395 
2396  if (sort_flag)
2397  return rpn_sort_spec(zh, zapt, attributeSet, stream, sort_sequence,
2398  rank_type, rset_nmem, rset, kc);
2399  /* consider if an X-Path query is used */
2400  xpath_len = rpn_check_xpath(zh, zapt, attributeSet,
2401  xpath, MAX_XPATH_STEPS, stream);
2402  if (xpath_len >= 0)
2403  {
2404  if (xpath[xpath_len-1].part[0] == '@')
2405  xpath_use = ZEBRA_XPATH_ATTR_CDATA; /* last step is attribute */
2406  else
2407  xpath_use = ZEBRA_XPATH_CDATA; /* searching for cdata */
2408 
2409  if (1)
2410  {
2411  AttrType relation;
2412  int relation_value;
2413 
2414  attr_init_APT(&relation, zapt, 2);
2415  relation_value = attr_find(&relation, NULL);
2416 
2417  if (relation_value == 103) /* alwaysmatches */
2418  {
2419  *rset = 0; /* signal no "term" set */
2420  return rpn_search_xpath(zh, stream, rank_type, *rset,
2421  xpath_len, xpath, rset_nmem, rset, kc);
2422  }
2423  }
2424  }
2425 
2426  /* search using one of the various search type strategies
2427  termz is our UTF-8 search term
2428  attributeSet is top-level default attribute set
2429  stream is ODR for search
2430  reg_id is the register type
2431  complete_flag is 1 for complete subfield, 0 for incomplete
2432  xpath_use is use-attribute to be used for X-Path search, 0 for none
2433  */
2434  if (!strcmp(search_type, "phrase"))
2435  {
2436  res = rpn_search_APT_phrase(zh, zapt, termz, attributeSet, hits_limit,
2437  stream,
2438  index_type, complete_flag, rank_type,
2439  xpath_use,
2440  rset_nmem,
2441  rset, kc);
2442  }
2443  else if (!strcmp(search_type, "and-list"))
2444  {
2445  res = rpn_search_APT_and_list(zh, zapt, termz, attributeSet, hits_limit,
2446  stream,
2447  index_type, complete_flag, rank_type,
2448  xpath_use,
2449  rset_nmem,
2450  rset, kc);
2451  }
2452  else if (!strcmp(search_type, "or-list"))
2453  {
2454  res = rpn_search_APT_or_list(zh, zapt, termz, attributeSet, hits_limit,
2455  stream,
2456  index_type, complete_flag, rank_type,
2457  xpath_use,
2458  rset_nmem,
2459  rset, kc);
2460  }
2461  else if (!strcmp(search_type, "local"))
2462  {
2463  res = rpn_search_APT_local(zh, zapt, termz, attributeSet, stream,
2464  rank_type, rset_nmem, rset, kc);
2465  }
2466  else if (!strcmp(search_type, "numeric"))
2467  {
2468  res = rpn_search_APT_numeric(zh, zapt, termz, attributeSet, hits_limit,
2469  stream,
2470  index_type, complete_flag, rank_type,
2471  xpath_use,
2472  rset_nmem,
2473  rset, kc);
2474  }
2475  else
2476  {
2477  zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2478  res = ZEBRA_FAIL;
2479  }
2480  if (res != ZEBRA_OK)
2481  return res;
2482  if (!*rset)
2483  return ZEBRA_FAIL;
2484  return rpn_search_xpath(zh, stream, rank_type, *rset,
2485  xpath_len, xpath, rset_nmem, rset, kc);
2486 }
2487 
2488 static ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2489  const Odr_oid *attributeSet,
2490  zint hits_limit,
2491  NMEM stream, NMEM rset_nmem,
2492  Z_SortKeySpecList *sort_sequence,
2493  int num_bases, const char **basenames,
2494  RSET **result_sets, int *num_result_sets,
2495  Z_Operator *parent_op,
2496  struct rset_key_control *kc);
2497 
2499  zint *approx_limit)
2500 {
2501  ZEBRA_RES res = ZEBRA_OK;
2502  if (zs->which == Z_RPNStructure_complex)
2503  {
2504  if (res == ZEBRA_OK)
2505  res = rpn_get_top_approx_limit(zh, zs->u.complex->s1,
2506  approx_limit);
2507  if (res == ZEBRA_OK)
2508  res = rpn_get_top_approx_limit(zh, zs->u.complex->s2,
2509  approx_limit);
2510  }
2511  else if (zs->which == Z_RPNStructure_simple)
2512  {
2513  if (zs->u.simple->which == Z_Operand_APT)
2514  {
2515  Z_AttributesPlusTerm *zapt = zs->u.simple->u.attributesPlusTerm;
2516  AttrType global_hits_limit_attr;
2517  int l;
2518 
2519  attr_init_APT(&global_hits_limit_attr, zapt, 12);
2520 
2521  l = attr_find(&global_hits_limit_attr, NULL);
2522  if (l != -1)
2523  *approx_limit = l;
2524  }
2525  }
2526  return res;
2527 }
2528 
2529 ZEBRA_RES rpn_search_top(ZebraHandle zh, Z_RPNStructure *zs,
2530  const Odr_oid *attributeSet,
2531  zint hits_limit,
2532  NMEM stream, NMEM rset_nmem,
2533  Z_SortKeySpecList *sort_sequence,
2534  int num_bases, const char **basenames,
2535  RSET *result_set)
2536 {
2537  RSET *result_sets = 0;
2538  int num_result_sets = 0;
2539  ZEBRA_RES res;
2540  struct rset_key_control *kc = zebra_key_control_create(zh);
2541 
2542  res = rpn_search_structure(zh, zs, attributeSet, hits_limit,
2543  stream, rset_nmem,
2544  sort_sequence,
2545  num_bases, basenames,
2546  &result_sets, &num_result_sets,
2547  0 /* no parent op */,
2548  kc);
2549  if (res != ZEBRA_OK)
2550  {
2551  int i;
2552  for (i = 0; i < num_result_sets; i++)
2553  rset_delete(result_sets[i]);
2554  *result_set = 0;
2555  }
2556  else
2557  {
2558  assert(num_result_sets == 1);
2559  assert(result_sets);
2560  assert(*result_sets);
2561  *result_set = *result_sets;
2562  }
2563  (*kc->dec)(kc);
2564  return res;
2565 }
2566 
2568  const Odr_oid *attributeSet, zint hits_limit,
2569  NMEM stream, NMEM rset_nmem,
2570  Z_SortKeySpecList *sort_sequence,
2571  int num_bases, const char **basenames,
2572  RSET **result_sets, int *num_result_sets,
2573  Z_Operator *parent_op,
2574  struct rset_key_control *kc)
2575 {
2576  *num_result_sets = 0;
2577  if (zs->which == Z_RPNStructure_complex)
2578  {
2579  ZEBRA_RES res;
2580  Z_Operator *zop = zs->u.complex->roperator;
2581  RSET *result_sets_l = 0;
2582  int num_result_sets_l = 0;
2583  RSET *result_sets_r = 0;
2584  int num_result_sets_r = 0;
2585 
2586  res = rpn_search_structure(zh, zs->u.complex->s1,
2587  attributeSet, hits_limit, stream, rset_nmem,
2588  sort_sequence,
2589  num_bases, basenames,
2590  &result_sets_l, &num_result_sets_l,
2591  zop, kc);
2592  if (res != ZEBRA_OK)
2593  {
2594  int i;
2595  for (i = 0; i < num_result_sets_l; i++)
2596  rset_delete(result_sets_l[i]);
2597  return res;
2598  }
2599  res = rpn_search_structure(zh, zs->u.complex->s2,
2600  attributeSet, hits_limit, stream, rset_nmem,
2601  sort_sequence,
2602  num_bases, basenames,
2603  &result_sets_r, &num_result_sets_r,
2604  zop, kc);
2605  if (res != ZEBRA_OK)
2606  {
2607  int i;
2608  for (i = 0; i < num_result_sets_l; i++)
2609  rset_delete(result_sets_l[i]);
2610  for (i = 0; i < num_result_sets_r; i++)
2611  rset_delete(result_sets_r[i]);
2612  return res;
2613  }
2614 
2615  /* make a new list of result for all children */
2616  *num_result_sets = num_result_sets_l + num_result_sets_r;
2617  *result_sets = nmem_malloc(stream, *num_result_sets *
2618  sizeof(**result_sets));
2619  memcpy(*result_sets, result_sets_l,
2620  num_result_sets_l * sizeof(**result_sets));
2621  memcpy(*result_sets + num_result_sets_l, result_sets_r,
2622  num_result_sets_r * sizeof(**result_sets));
2623 
2624  if (!parent_op || parent_op->which != zop->which
2625  || (zop->which != Z_Operator_and &&
2626  zop->which != Z_Operator_or))
2627  {
2628  /* parent node different from this one (or non-present) */
2629  /* we must combine result sets now */
2630  RSET rset;
2631  switch (zop->which)
2632  {
2633  case Z_Operator_and:
2634  rset = rset_create_and(rset_nmem, kc,
2635  kc->scope,
2636  *num_result_sets, *result_sets);
2637  break;
2638  case Z_Operator_or:
2639  rset = rset_create_or(rset_nmem, kc,
2640  kc->scope, 0, /* termid */
2641  *num_result_sets, *result_sets);
2642  break;
2643  case Z_Operator_and_not:
2644  rset = rset_create_not(rset_nmem, kc,
2645  kc->scope,
2646  (*result_sets)[0],
2647  (*result_sets)[1]);
2648  break;
2649  case Z_Operator_prox:
2650  if (zop->u.prox->which != Z_ProximityOperator_known)
2651  {
2652  zebra_setError(zh,
2653  YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2654  0);
2655  return ZEBRA_FAIL;
2656  }
2657  if (*zop->u.prox->u.known == Z_ProxUnit_word)
2658  {
2659  rset = rset_create_prox(rset_nmem, kc,
2660  kc->scope,
2661  *num_result_sets, *result_sets,
2662  *zop->u.prox->ordered,
2663  (!zop->u.prox->exclusion ?
2664  0 : *zop->u.prox->exclusion),
2665  *zop->u.prox->relationType,
2666  *zop->u.prox->distance );
2667  }
2668  else if (*zop->u.prox->u.known >= 3 &&
2669  *zop->u.prox->u.known <= 10 &&
2670  *num_result_sets == 2)
2671  {
2672  /* Z39.50 known proximity units */
2673  static const char *units[] = {
2674  "sentence", /* (3) */
2675  "paragraph", /* (4) */
2676  "section", /* (5) */
2677  "chapter", /* (6) */
2678  "document", /* (7) */
2679  "element", /* (8) */
2680  "subelement", /* (9) */
2681  "elementType" /* (10) */
2682  };
2683  const char *unit = units[*zop->u.prox->u.known - 3];
2684  RSET begin_set = search_group(zh, unit, "begin",
2685  rset_nmem, kc);
2686  RSET end_set = search_group(zh, unit, "end",
2687  rset_nmem, kc);
2688  if (begin_set && end_set)
2689  {
2691  rset_nmem, kc, kc->scope,
2692  begin_set,
2693  (*result_sets[0]), (*result_sets)[1], end_set,
2694  0 /* rset_attr */);
2695  }
2696  else
2697  {
2698  if (begin_set)
2699  rset_delete(begin_set);
2700  if (end_set)
2701  rset_delete(end_set);
2703  YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2704  *zop->u.prox->u.known);
2705  return ZEBRA_FAIL;
2706 
2707  }
2708  }
2709  else
2710  {
2712  YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2713  *zop->u.prox->u.known);
2714  return ZEBRA_FAIL;
2715  }
2716  break;
2717  default:
2718  zebra_setError(zh, YAZ_BIB1_OPERATOR_UNSUPP, 0);
2719  return ZEBRA_FAIL;
2720  }
2721  *num_result_sets = 1;
2722  *result_sets = nmem_malloc(stream, *num_result_sets *
2723  sizeof(**result_sets));
2724  (*result_sets)[0] = rset;
2725  }
2726  }
2727  else if (zs->which == Z_RPNStructure_simple)
2728  {
2729  RSET rset;
2730  ZEBRA_RES res;
2731 
2732  if (zs->u.simple->which == Z_Operand_APT)
2733  {
2734  yaz_log(YLOG_DEBUG, "rpn_search_APT");
2735  res = rpn_search_APT(zh, zs->u.simple->u.attributesPlusTerm,
2736  attributeSet, hits_limit,
2737  stream, sort_sequence,
2738  num_bases, basenames, rset_nmem, &rset,
2739  kc);
2740  if (res != ZEBRA_OK)
2741  return res;
2742  }
2743  else if (zs->u.simple->which == Z_Operand_resultSetId)
2744  {
2745  yaz_log(YLOG_DEBUG, "rpn_search_ref");
2746  rset = resultSetRef(zh, zs->u.simple->u.resultSetId);
2747  if (!rset)
2748  {
2749  zebra_setError(zh,
2750  YAZ_BIB1_SPECIFIED_RESULT_SET_DOES_NOT_EXIST,
2751  zs->u.simple->u.resultSetId);
2752  return ZEBRA_FAIL;
2753  }
2754  rset_dup(rset);
2755  }
2756  else
2757  {
2758  zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2759  return ZEBRA_FAIL;
2760  }
2761  *num_result_sets = 1;
2762  *result_sets = nmem_malloc(stream, *num_result_sets *
2763  sizeof(**result_sets));
2764  (*result_sets)[0] = rset;
2765  }
2766  else
2767  {
2768  zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2769  return ZEBRA_FAIL;
2770  }
2771  return ZEBRA_OK;
2772 }
2773 
2774 
2775 
2776 /*
2777  * Local variables:
2778  * c-basic-offset: 4
2779  * c-file-style: "Stroustrup"
2780  * indent-tabs-mode: nil
2781  * End:
2782  * vim: shiftwidth=4 tabstop=8 expandtab
2783  */
2784 
int attr_find(AttrType *src, const Odr_oid **attribute_set_oid)
Definition: attrfind.c:99
void attr_init_APT(AttrType *src, Z_AttributesPlusTerm *zapt, int type)
Definition: attrfind.c:27
int attr_find_ex(AttrType *src, const Odr_oid **attribute_set_oid, const char **string_value)
Definition: attrfind.c:45
ZEBRA_RES zebra_apt_get_ord(ZebraHandle zh, Z_AttributesPlusTerm *zapt, const char *index_type, const char *xpath_use, const Odr_oid *curAttributeSet, int *ord)
Definition: attribute.c:135
const char * CHR_SPACE
Definition: charmap.c:49
char * dict_lookup(Dict dict, const char *p)
lookup item in dictionary
Definition: lookup.c:100
void dict_grep_cmap(Dict dict, void *vp, const char **(*cmap)(void *vp, const char **from, int len))
install character mapping handler for dict_lookup_grep
Definition: lookgrep.c:445
int dict_lookup_grep(Dict dict, const char *p, int range, void *client, int *max_pos, int init_pos, int(*f)(char *name, const char *info, void *client))
regular expression search with error correction
Definition: lookgrep.c:374
RSET resultSetRef(ZebraHandle zh, const char *resultSetId)
Definition: zsets.c:1075
void zebra_set_partial_result(ZebraHandle zh)
Definition: zebraapi.c:1064
int zebra_term_untrans(ZebraHandle zh, const char *index_type, char *dst, const char *src)
Definition: untrans.c:31
#define FIRST_IN_FIELD_STR
Definition: index.h:419
ZEBRA_RES zapt_term_to_utf8(ZebraHandle zh, Z_AttributesPlusTerm *zapt, char *termz)
Definition: zaptterm.c:32
void zebra_setError(ZebraHandle zh, int code, const char *addinfo)
Definition: zebraapi.c:2755
void zebra_setError_zint(ZebraHandle zh, int code, zint i)
Definition: zebraapi.c:2764
#define FIRST_IN_FIELD_CHAR
Definition: index.h:420
RSET rset_trunc(ZebraHandle zh, ISAM_P *isam_p, int no, const char *term, int length_term, const char *flags, int preserve_position, int term_type, NMEM rset_nmem, struct rset_key_control *kctrl, int scope, struct ord_list *ol, const char *index_type, zint hits_limit, const char *term_ref_id)
Definition: trunc.c:403
RSET zebra_create_rset_isam(ZebraHandle zh, NMEM rset_nmem, struct rset_key_control *kctl, int scope, ISAM_P pos, TERMID termid)
Definition: rset_isam.c:32
void resultSetAddTerm(ZebraHandle zh, ZebraSet s, int reg_type, const char *db, const char *index_name, const char *term)
Definition: zsets.c:188
struct rset_key_control * zebra_key_control_create(ZebraHandle zh)
Definition: kcontrol.c:57
zint ISAM_P
Definition: isamc.h:28
int key_SU_decode(int *ch, const unsigned char *out)
Definition: su_codec.c:64
#define IT_MAX_WORD
Definition: it_key.h:27
int key_SU_encode(int ch, char *out)
Definition: su_codec.c:31
static void end(struct zebra_register *reg, void *set_handle)
Definition: rank1.c:156
static void add(void *set_handle, int seqno, TERMID term)
Definition: rank1.c:168
#define ZEBRA_XPATH_ELM_END
Definition: recctrl.h:36
#define ZEBRA_XPATH_ATTR_CDATA
Definition: recctrl.h:45
#define ZEBRA_XPATH_CDATA
Definition: recctrl.h:39
#define ZEBRA_XPATH_ELM_BEGIN
Definition: recctrl.h:33
#define ZEBRA_GROUP_INDEX_NAME
Definition: recctrl.h:47
#define ZEBRA_XPATH_ATTR_NAME
Definition: recctrl.h:42
Record rec_get(Records p, zint sysno)
gets record - with given system number
Definition: records.c:928
void rec_free(Record *recpp)
frees record (from memory)
Definition: records.c:1044
const char * res_get(Res r, const char *name)
Definition: res.c:294
const char * res_get_def(Res r, const char *name, const char *def)
Definition: res.c:313
static void grep_info_delete(struct grep_info *grep_info)
Definition: rpnsearch.c:1284
static void esc_str(char *out_buf, size_t out_size, const char *in_buf, int in_size)
Definition: rpnsearch.c:180
static ZEBRA_RES grep_info_prepare(ZebraHandle zh, Z_AttributesPlusTerm *zapt, struct grep_info *grep_info, const char *index_type)
Definition: rpnsearch.c:1292
static int term_104(zebra_map_t zm, const char **src, WRBUF term_dict, int space_split, WRBUF display_term)
Definition: rpnsearch.c:502
static ZEBRA_RES rpn_search_APT_or_list(ZebraHandle zh, Z_AttributesPlusTerm *zapt, const char *termz_org, const Odr_oid *attributeSet, zint hits_limit, NMEM stream, const char *index_type, int complete_flag, const char *rank_type, const char *xpath_use, NMEM rset_nmem, RSET *rset, struct rset_key_control *kc)
returns result set for or-list search
Definition: rpnsearch.c:1595
static int numeric_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt, const char **term_sub, WRBUF term_dict, const Odr_oid *attributeSet, struct grep_info *grep_info, int *max_pos, zebra_map_t zm, WRBUF display_term, int *error_code)
Definition: rpnsearch.c:1731
static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt, const char **term_sub, WRBUF term_dict, const Odr_oid *attributeSet, zebra_map_t zm, int space_split, WRBUF display_term, int *error_code)
Definition: rpnsearch.c:745
static void add_non_space(const char *start, const char *end, WRBUF term_dict, WRBUF display_term, const char **map, int q_map_match)
Definition: rpnsearch.c:208
static RSET xpath_trunc(ZebraHandle zh, NMEM stream, const char *index_type, const char *term, const char *xpath_use, NMEM rset_nmem, struct rset_key_control *kc)
Definition: rpnsearch.c:2111
static int term_pre(zebra_map_t zm, const char **src, const char *ct1, int first)
Definition: rpnsearch.c:158
static ZEBRA_RES rpn_sort_spec(ZebraHandle zh, Z_AttributesPlusTerm *zapt, const Odr_oid *attributeSet, NMEM stream, Z_SortKeySpecList *sort_sequence, const char *rank_type, NMEM rset_nmem, RSET *rset, struct rset_key_control *kc)
Definition: rpnsearch.c:2021
static ZEBRA_RES search_terms_list(ZebraHandle zh, Z_AttributesPlusTerm *zapt, const char *termz, const Odr_oid *attributeSet, zint hits_limit, NMEM stream, const char *index_type, int complete_flag, const char *rank_type, const char *xpath_use, NMEM rset_nmem, RSET **result_sets, int *num_result_sets, struct rset_key_control *kc)
Create result set(s) for list of terms.
Definition: rpnsearch.c:1412
static int term_101(zebra_map_t zm, const char **src, WRBUF term_dict, int space_split, WRBUF display_term)
Definition: rpnsearch.c:411
void rpn_char_map_prepare(struct zebra_register *reg, zebra_map_t zm, struct rpn_char_map_info *map_info)
Definition: rpnsearch.c:63
static int term_102_icu(zebra_map_t zm, const char **src, WRBUF term_dict, int space_split, WRBUF display_term)
Definition: rpnsearch.c:250
static size_t icu_basechars(const char *buf, size_t i)
Definition: rpnsearch.c:241
static ZEBRA_RES search_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt, const char **term_sub, const Odr_oid *attributeSet, zint hits_limit, NMEM stream, struct grep_info *grep_info, const char *index_type, int complete_flag, const char *rank_type, const char *xpath_use, NMEM rset_nmem, RSET *rset, struct rset_key_control *kc, zebra_map_t zm, size_t token_number)
search for term (which may be truncated)
Definition: rpnsearch.c:976
static int term_103(zebra_map_t zm, const char **src, WRBUF term_dict, int *errors, int space_split, WRBUF display_term)
Definition: rpnsearch.c:448
static int term_102(zebra_map_t zm, const char **src, WRBUF term_dict, int space_split, WRBUF display_term)
Definition: rpnsearch.c:494
static ZEBRA_RES rpn_search_database(ZebraHandle zh, Z_AttributesPlusTerm *zapt, const Odr_oid *attributeSet, zint hits_limit, NMEM stream, Z_SortKeySpecList *sort_sequence, NMEM rset_nmem, RSET *rset, struct rset_key_control *kc)
Definition: rpnsearch.c:2360
static const char ** rpn_char_map_handler(void *vp, const char **from, int len)
Definition: rpnsearch.c:44
static int grep_handle(char *name, const char *info, void *p)
Definition: rpnsearch.c:153
static int term_105(zebra_map_t zm, const char **src, WRBUF term_dict, int space_split, WRBUF display_term, int right_truncate)
Definition: rpnsearch.c:571
static ZEBRA_RES numeric_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt, const char **term_sub, WRBUF term_dict, const Odr_oid *attributeSet, NMEM stream, struct grep_info *grep_info, const char *index_type, int complete_flag, WRBUF display_term, const char *xpath_use, struct ord_list **ol)
Definition: rpnsearch.c:1829
static void gen_regular_rel(WRBUF dst, int val, int islt)
Definition: rpnsearch.c:628
#define REGEX_CHARS
Definition: rpnsearch.c:206
static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt, const char **term_sub, WRBUF term_dict, const Odr_oid *attributeSet, NMEM stream, struct grep_info *grep_info, const char *index_type, int complete_flag, WRBUF display_term, const char *xpath_use, struct ord_list **ol, zebra_map_t zm, size_t token_number)
Definition: rpnsearch.c:1023
static int add_isam_p(const char *name, const char *info, struct grep_info *p)
Definition: rpnsearch.c:88
static int term_100_icu(zebra_map_t zm, const char **src, WRBUF term_dict, WRBUF display_term, int mode, size_t token_number)
Definition: rpnsearch.c:312
static ZEBRA_RES rpn_search_xpath(ZebraHandle zh, NMEM stream, const char *rank_type, RSET rset, int xpath_len, struct xpath_location_step *xpath, NMEM rset_nmem, RSET *rset_out, struct rset_key_control *kc)
Definition: rpnsearch.c:2162
void string_rel_add_char(WRBUF term_p, WRBUF wsrc, int *indx)
Definition: rpnsearch.c:723
static ZEBRA_RES rpn_search_APT_local(ZebraHandle zh, Z_AttributesPlusTerm *zapt, const char *termz, const Odr_oid *attributeSet, NMEM stream, const char *rank_type, NMEM rset_nmem, RSET *rset, struct rset_key_control *kc)
Definition: rpnsearch.c:1980
static ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs, const Odr_oid *attributeSet, zint hits_limit, NMEM stream, NMEM rset_nmem, Z_SortKeySpecList *sort_sequence, int num_bases, const char **basenames, RSET **result_sets, int *num_result_sets, Z_Operator *parent_op, struct rset_key_control *kc)
Definition: rpnsearch.c:2567
static ZEBRA_RES rpn_search_APT_numeric(ZebraHandle zh, Z_AttributesPlusTerm *zapt, const char *termz, const Odr_oid *attributeSet, zint hits_limit, NMEM stream, const char *index_type, int complete_flag, const char *rank_type, const char *xpath_use, NMEM rset_nmem, RSET *rset, struct rset_key_control *kc)
Definition: rpnsearch.c:1891
ZEBRA_RES zebra_term_limits_APT(ZebraHandle zh, Z_AttributesPlusTerm *zapt, zint *hits_limit_value, const char **term_ref_id_str, NMEM nmem)
Definition: rpnsearch.c:943
ZEBRA_RES rpn_get_top_approx_limit(ZebraHandle zh, Z_RPNStructure *zs, zint *approx_limit)
Definition: rpnsearch.c:2498
static ZEBRA_RES rpn_search_APT_and_list(ZebraHandle zh, Z_AttributesPlusTerm *zapt, const char *termz_org, const Odr_oid *attributeSet, zint hits_limit, NMEM stream, const char *index_type, int complete_flag, const char *rank_type, const char *xpath_use, NMEM rset_nmem, RSET *rset, struct rset_key_control *kc)
returns result set for and-list search
Definition: rpnsearch.c:1663
static int rpn_check_xpath(ZebraHandle zh, Z_AttributesPlusTerm *zapt, const Odr_oid *attributeSet, struct xpath_location_step *xpath, int max, NMEM mem)
Definition: rpnsearch.c:2091
static int term_100(zebra_map_t zm, const char **src, WRBUF term_dict, int space_split, WRBUF display_term)
Definition: rpnsearch.c:355
ZEBRA_RES rpn_search_top(ZebraHandle zh, Z_RPNStructure *zs, const Odr_oid *attributeSet, zint hits_limit, NMEM stream, NMEM rset_nmem, Z_SortKeySpecList *sort_sequence, int num_bases, const char **basenames, RSET *result_set)
Definition: rpnsearch.c:2529
static int log_level_rpn
Definition: rpnsearch.c:42
static ZEBRA_RES rpn_search_APT_phrase(ZebraHandle zh, Z_AttributesPlusTerm *zapt, const char *termz_org, const Odr_oid *attributeSet, zint hits_limit, NMEM stream, const char *index_type, int complete_flag, const char *rank_type, const char *xpath_use, NMEM rset_nmem, RSET *rset, struct rset_key_control *kc)
returns result set for phrase search
Definition: rpnsearch.c:1529
static ZEBRA_RES search_position(ZebraHandle zh, Z_AttributesPlusTerm *zapt, const Odr_oid *attributeSet, const char *index_type, NMEM rset_nmem, RSET *rset, struct rset_key_control *kc)
limit a search by position - returns result set
Definition: rpnsearch.c:1465
#define MAX_XPATH_STEPS
Definition: rpnsearch.c:2302
static ZEBRA_RES search_terms_chrmap(ZebraHandle zh, Z_AttributesPlusTerm *zapt, const char *termz, const Odr_oid *attributeSet, zint hits_limit, NMEM stream, const char *index_type, int complete_flag, const char *rank_type, const char *xpath_use, NMEM rset_nmem, RSET **result_sets, int *num_result_sets, struct rset_key_control *kc, zebra_map_t zm)
Definition: rpnsearch.c:1334
static int log_level_set
Definition: rpnsearch.c:41
static RSET search_group(ZebraHandle zh, const char *unit, const char *term, NMEM rset_nmem, struct rset_key_control *kc)
Definition: rpnsearch.c:1433
static ZEBRA_RES rpn_search_APT(ZebraHandle zh, Z_AttributesPlusTerm *zapt, const Odr_oid *attributeSet, zint hits_limit, NMEM stream, Z_SortKeySpecList *sort_sequence, int num_bases, const char **basenames, NMEM rset_nmem, RSET *rset, struct rset_key_control *kc)
Definition: rpnsearch.c:2313
void rset_delete(RSET rs)
Destructor RSETs.
Definition: rset.c:218
RSET rset_create_or(NMEM nmem, struct rset_key_control *kcontrol, int scope, TERMID termid, int no_rsets, RSET *rsets)
Definition: rsmultiandor.c:273
RSET rset_create_and(NMEM nmem, struct rset_key_control *kcontrol, int scope, int no_rsets, RSET *rsets)
Definition: rsmultiandor.c:280
struct rset rset
RSET rset_dup(RSET rs)
Duplicate an RSET.
Definition: rset.c:255
RSET rset_create_not(NMEM nmem, struct rset_key_control *kcontrol, int scope, RSET rset_l, RSET rset_r)
Definition: rsbool.c:92
#define RSETF_WRITE
Definition: rset.h:200
struct ord_list * ord_list_append(NMEM nmem, struct ord_list *list, int ord)
Definition: rset.c:306
struct ord_list * ord_list_create(NMEM nmem)
Definition: rset.c:301
RSET rset_create_between(NMEM nmem, struct rset_key_control *kcontrol, int scope, RSET rset_l, RSET rset_m1, RSET rset_m2, RSET rset_r, RSET rset_attr)
Definition: rsbetween.c:101
RSET rset_create_null(NMEM nmem, struct rset_key_control *kcontrol, TERMID term)
Definition: rsnull.c:47
#define rset_write(rfd, buf)
Definition: rset.h:220
RSET rset_create_temp(NMEM nmem, struct rset_key_control *kcontrol, int scope, const char *temp_path, TERMID term)
Definition: rstemp.c:86
#define rset_open(rs, wflag)
Definition: rset.h:202
void rset_close(RSFD rfd)
Closes a result set RFD handle.
Definition: rset.c:98
RSET rset_create_prox(NMEM nmem, struct rset_key_control *kcontrol, int scope, int rset_no, RSET *rset, int ordered, int exclusion, int relation, int distance)
Definition: rsprox.c:72
int * term_no
Definition: rpnsearch.c:77
ISAM_P * isam_p_buf
Definition: rpnsearch.c:79
int isam_p_size
Definition: rpnsearch.c:80
ZebraSet termset
Definition: rpnsearch.c:85
const char * index_type
Definition: rpnsearch.c:84
int isam_p_indx
Definition: rpnsearch.c:81
ZebraHandle zh
Definition: rpnsearch.c:83
int trunc_max
Definition: rpnsearch.c:82
Definition: it_key.h:30
int len
Definition: it_key.h:31
zint mem[IT_KEY_LEVEL_MAX]
Definition: it_key.h:32
Definition: rset.h:35
zebra_map_t zm
Definition: index.h:404
void(* dec)(struct rset_key_control *kc)
Definition: rset.h:138
Definition: rset.h:151
Definition: rset.h:73
struct xpath_predicate * predicate
Definition: zebra_xpath.h:46
union xpath_predicate::@8 u
struct xpath_predicate::@8::@9 relation
ZebraExplainInfo zei
Definition: index.h:139
zebra_maps_t zebra_maps
Definition: index.h:143
Records records
Definition: index.h:138
Dict dict
Definition: index.h:132
struct zebra_register * reg
Definition: index.h:174
long zint
Zebra integer.
Definition: util.h:66
#define ZEBRA_FAIL
Definition: util.h:81
#define ZINT_FORMAT
Definition: util.h:72
zint atozint(const char *src)
Definition: zint.c:55
#define ZEBRA_OK
Definition: util.h:82
short ZEBRA_RES
Common return type for Zebra API.
Definition: util.h:80
int zebra_parse_xpath_str(const char *xpath_string, struct xpath_location_step *xpath, int max, NMEM mem)
Definition: xpath.c:162
#define XPATH_PREDICATE_RELATION
Definition: zebra_xpath.h:29
int zebra_maps_is_first_in_field(zebra_map_t zm)
Definition: zebramap.c:492
int zebra_map_tokenize_next(zebra_map_t zm, const char **result_buf, size_t *result_len, const char **display_buf, size_t *display_len)
Definition: zebramap.c:658
int zebra_maps_attr(zebra_maps_t zms, Z_AttributesPlusTerm *zapt, const char **reg_id, char **search_type, char *rank_type, int *complete_flag, int *sort_flag)
Definition: zebramap.c:515
int zebra_maps_is_icu(zebra_map_t zm)
Definition: zebramap.c:741
int zebra_map_tokenize_start(zebra_map_t zm, const char *buf, size_t len)
Definition: zebramap.c:702
const char ** zebra_maps_search(zebra_map_t zm, const char **from, int len, int *q_map_match)
Definition: zebramap.c:412
zebra_map_t zebra_map_get_or_add(zebra_maps_t zms, const char *id)
Definition: zebramap.c:364
const char ** zebra_maps_input(zebra_map_t zm, const char **from, int len, int first)
Definition: zebramap.c:399
int zebraExplain_curDatabase(ZebraExplainInfo zei, const char *database)
Definition: zinfo.c:790
int zebraExplain_lookup_attr_str(ZebraExplainInfo zei, zinfo_index_category_t cat, const char *index_type, const char *str)
lookup ordinal from string index + index type
Definition: zinfo.c:1353
int zebraExplain_lookup_ord(ZebraExplainInfo zei, int ord, const char **index_type, const char **db, const char **string_index)
Definition: zinfo.c:1478
zinfo_index_category_t
Definition: zinfo.h:37
@ zinfo_index_category_index
Definition: zinfo.h:38