IDZEBRA  2.2.7
zsets.c
Go to the documentation of this file.
1 /* This file is part of the Zebra server.
2  Copyright (C) Index Data
3 
4 Zebra is free software; you can redistribute it and/or modify it under
5 the terms of the GNU General Public License as published by the Free
6 Software Foundation; either version 2, or (at your option) any later
7 version.
8 
9 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
10 WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 for more details.
13 
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 
18 */
19 
20 
21 #if HAVE_CONFIG_H
22 #include <config.h>
23 #endif
24 #include <stdio.h>
25 #include <assert.h>
26 #ifdef WIN32
27 #include <io.h>
28 #else
29 #include <unistd.h>
30 #endif
31 
32 #include "index.h"
33 #include "rank.h"
34 #include <yaz/diagbib1.h>
35 #include <rset.h>
36 
37 #define ZSET_SORT_MAX_LEVEL 10
38 
40  int reg_type;
41  char *db;
42  char *index_name;
43  char *term;
44 };
45 
46 struct zebra_set {
47  char *name;
49  NMEM nmem;
50  NMEM rset_nmem; /* for creating the rsets in */
52  int num_bases;
53  const char **basenames;
54  Z_RPNQuery *rpn;
55  Z_SortKeySpecList *sortSpec;
59  struct zebra_set *next;
60  int locked;
62 
63  zint cache_position; /* last position */
64  RSFD cache_rfd; /* rfd (NULL if not existing) */
65  zint cache_psysno; /* sysno for last position */
66  zint approx_limit; /* limit before we do approx */
67 };
68 
71  int score;
72 };
73 
79 };
80 
81 static int log_level_set=0;
82 static int log_level_sort=0;
83 static int log_level_searchhits=0;
84 static int log_level_searchterms=0;
85 static int log_level_resultsets=0;
86 
87 static void loglevels(void)
88 {
89  if (log_level_set)
90  return;
91  log_level_sort = yaz_log_module_level("sorting");
92  log_level_searchhits = yaz_log_module_level("searchhits");
93  log_level_searchterms = yaz_log_module_level("searchterms");
94  log_level_resultsets = yaz_log_module_level("resultsets");
95  log_level_set = 1;
96 }
97 
98 
99 static ZEBRA_RES resultSetSearch(ZebraHandle zh, NMEM nmem, NMEM rset_nmem,
100  Z_RPNQuery *rpn, ZebraSet sset)
101 {
102  RSET rset = 0;
103  Z_SortKeySpecList *sort_sequence;
104  int sort_status, i;
105  ZEBRA_RES res = ZEBRA_OK;
106 
107  sort_sequence = (Z_SortKeySpecList *)
108  nmem_malloc(nmem, sizeof(*sort_sequence));
109  sort_sequence->num_specs = 10; /* FIXME - Hard-coded number */
110  sort_sequence->specs = (Z_SortKeySpec **)
111  nmem_malloc(nmem, sort_sequence->num_specs *
112  sizeof(*sort_sequence->specs));
113  for (i = 0; i<sort_sequence->num_specs; i++)
114  sort_sequence->specs[i] = 0;
115 
116  rpn_get_top_approx_limit(zh, rpn->RPNStructure, &sset->approx_limit);
117 
118  res = rpn_search_top(zh, rpn->RPNStructure, rpn->attributeSetId,
119  sset->approx_limit,
120  nmem, rset_nmem,
121  sort_sequence,
122  sset->num_bases, sset->basenames,
123  &rset);
124  if (res != ZEBRA_OK)
125  {
126  sset->rset = 0;
127  return res;
128  }
129  for (i = 0; sort_sequence->specs[i]; i++)
130  ;
131  sort_sequence->num_specs = i;
133 
134  if (!i)
135  {
136  res = resultSetRank(zh, sset, rset, rset_nmem);
137  }
138  else
139  {
140  res = resultSetSortSingle(zh, nmem, sset, rset,
141  sort_sequence, &sort_status);
142  }
143  sset->rset = rset;
144  return res;
145 }
146 
147 
148 ZEBRA_RES resultSetAddRPN(ZebraHandle zh, NMEM m, Z_RPNQuery *rpn,
149  int num_bases, char **basenames,
150  const char *setname,
151  zint *hits, int *estimated_hit_count)
152 {
153  ZebraSet zebraSet;
154  int i;
155  ZEBRA_RES res;
156 
157  *hits = 0;
158  *estimated_hit_count = 0;
159 
160  zebraSet = resultSetAdd(zh, setname, 1);
161  if (!zebraSet)
162  return ZEBRA_FAIL;
163  zebraSet->locked = 1;
164  zebraSet->rpn = 0;
165  zebraSet->nmem = m;
166  zebraSet->rset_nmem = nmem_create();
167 
168  zebraSet->num_bases = num_bases;
169  zebraSet->basenames =
170  nmem_malloc(zebraSet->nmem, num_bases * sizeof(*zebraSet->basenames));
171  for (i = 0; i<num_bases; i++)
172  zebraSet->basenames[i] = nmem_strdup(zebraSet->nmem, basenames[i]);
173 
174  res = resultSetSearch(zh, zebraSet->nmem, zebraSet->rset_nmem,
175  rpn, zebraSet);
176  *hits = zebraSet->hits;
177  if (zebraSet->estimated_hit_count)
178  *estimated_hit_count = 1;
179 
180  if (zebraSet->rset)
181  zebraSet->rpn = rpn;
182  zebraSet->locked = 0;
183  if (!zebraSet->rset)
184  return ZEBRA_FAIL;
185  return res;
186 }
187 
188 void resultSetAddTerm(ZebraHandle zh, ZebraSet s, int reg_type,
189  const char *db, const char *index_name,
190  const char *term)
191 {
192  assert(zh); /* compiler shut up */
193  if (!s->nmem)
194  s->nmem = nmem_create();
195  if (!s->term_entries)
196  {
197  int i;
198  s->term_entries_max = 1000;
199  s->term_entries =
200  nmem_malloc(s->nmem, s->term_entries_max *
201  sizeof(*s->term_entries));
202  for (i = 0; i < s->term_entries_max; i++)
203  s->term_entries[i].term = 0;
204  }
205  if (s->hits < s->term_entries_max)
206  {
207  s->term_entries[s->hits].reg_type = reg_type;
208  s->term_entries[s->hits].db = nmem_strdup(s->nmem, db);
209  s->term_entries[s->hits].index_name = nmem_strdup(s->nmem, index_name);
210  s->term_entries[s->hits].term = nmem_strdup(s->nmem, term);
211  }
212  (s->hits)++;
213 }
214 
215 ZebraSet resultSetAdd(ZebraHandle zh, const char *name, int ov)
216 {
217  ZebraSet s;
218  int i;
219 
220  for (s = zh->sets; s; s = s->next)
221  if (!strcmp(s->name, name))
222  break;
223 
224  if (!log_level_set)
225  loglevels();
226  if (s)
227  {
228  yaz_log(log_level_resultsets, "updating result set %s", name);
229  if (!ov || s->locked)
230  return NULL;
231  if (s->rset)
232  {
233  if (s->cache_rfd)
234  rset_close(s->cache_rfd);
235  rset_delete(s->rset);
236  }
237  if (s->rset_nmem)
238  nmem_destroy(s->rset_nmem);
239  if (s->nmem)
240  nmem_destroy(s->nmem);
241  }
242  else
243  {
244  const char *sort_max_str = zebra_get_resource(zh, "sortmax", "1000");
245 
246  yaz_log(log_level_resultsets, "adding result set %s", name);
247  s = (ZebraSet) xmalloc(sizeof(*s));
248  s->next = zh->sets;
249  zh->sets = s;
250  s->name = xstrdup(name);
251 
252  s->sort_info = (struct zset_sort_info *)
253  xmalloc(sizeof(*s->sort_info));
254  s->sort_info->max_entries = atoi(sort_max_str);
255  if (s->sort_info->max_entries < 2)
256  s->sort_info->max_entries = 2;
257 
258  s->sort_info->entries = (struct zset_sort_entry **)
259  xmalloc(sizeof(*s->sort_info->entries) *
260  s->sort_info->max_entries);
261  s->sort_info->all_entries = (struct zset_sort_entry *)
262  xmalloc(sizeof(*s->sort_info->all_entries) *
263  s->sort_info->max_entries);
264  for (i = 0; i < s->sort_info->max_entries; i++)
265  s->sort_info->entries[i] = s->sort_info->all_entries + i;
266  }
267  s->locked = 0;
268  s->term_entries = 0;
269  s->hits = 0;
270  s->rset = 0;
271  s->rset_nmem = 0;
272  s->nmem = 0;
273  s->rpn = 0;
274  s->sortSpec = 0;
275  s->cache_position = 0;
276  s->cache_rfd = 0;
277  s->approx_limit = zh->approx_limit;
278  s->estimated_hit_count = 0;
279  return s;
280 }
281 
282 ZebraSet resultSetGet(ZebraHandle zh, const char *name)
283 {
284  ZebraSet s;
285 
286  for (s = zh->sets; s; s = s->next)
287  if (!strcmp(s->name, name))
288  {
289  if (!s->term_entries && !s->rset && s->rpn)
290  {
291  NMEM nmem = nmem_create();
292  yaz_log(log_level_resultsets, "research %s", name);
293  if (!s->rset_nmem)
294  s->rset_nmem = nmem_create();
295  resultSetSearch(zh, nmem, s->rset_nmem, s->rpn, s);
296  if (s->rset && s->sortSpec)
297  {
298  int sort_status;
299  yaz_log(log_level_resultsets, "resort %s", name);
300  resultSetSortSingle(zh, nmem, s, s->rset, s->sortSpec,
301  &sort_status);
302  }
303  nmem_destroy(nmem);
304  }
305  return s;
306  }
307  return NULL;
308 }
309 
311  const char ***basenames, int *num_bases)
312 {
313  ZebraSet sset = resultSetGet(zh, setname);
314  if (!sset)
315  return ZEBRA_FAIL;
316  *basenames = sset->basenames;
317  *num_bases = sset->num_bases;
318  return ZEBRA_OK;
319 
320 }
321 
323 {
324  ZebraSet s = zh->sets;
325 
326  yaz_log(log_level_resultsets, "invalidating result sets");
327  for (; s; s = s->next)
328  {
329  if (s->rset)
330  {
331  if (s->cache_rfd)
332  rset_close(s->cache_rfd);
333  rset_delete(s->rset);
334  }
335  s->rset = 0;
336  s->cache_rfd = 0;
337  s->cache_position = 0;
338  if (s->rset_nmem)
339  nmem_destroy(s->rset_nmem);
340  s->rset_nmem=0;
341  }
342 }
343 
344 void resultSetDestroy(ZebraHandle zh, int num, char **names,int *statuses)
345 {
346  ZebraSet * ss = &zh->sets;
347  int i;
348 
349  if (statuses)
350  for (i = 0; i<num; i++)
351  statuses[i] = Z_DeleteStatus_resultSetDidNotExist;
352  while (*ss)
353  {
354  int i = -1;
355  ZebraSet s = *ss;
356  if (num >= 0)
357  {
358  for (i = 0; i<num; i++)
359  if (!strcmp(s->name, names[i]))
360  {
361  if (statuses)
362  statuses[i] = Z_DeleteStatus_success;
363  i = -1;
364  break;
365  }
366  }
367  if (i < 0)
368  {
369  *ss = s->next;
370 
371  xfree(s->sort_info->all_entries);
372  xfree(s->sort_info->entries);
373  xfree(s->sort_info);
374 
375  if (s->nmem)
376  nmem_destroy(s->nmem);
377  if (s->rset)
378  {
379  if (s->cache_rfd)
380  rset_close(s->cache_rfd);
381  rset_delete(s->rset);
382  }
383  if (s->rset_nmem)
384  nmem_destroy(s->rset_nmem);
385  xfree(s->name);
386  xfree(s);
387  }
388  else
389  ss = &s->next;
390  }
391 }
392 
394  const char *name,
395  zint start, int num)
396 {
397  zint pos_small[10];
398  zint *pos = pos_small;
399  ZebraMetaRecord *mr;
400  int i;
401 
402  if (num > 10000 || num <= 0)
403  return 0;
404 
405  if (num > 10)
406  pos = xmalloc(sizeof(*pos) * num);
407 
408  for (i = 0; i<num; i++)
409  pos[i] = start+i;
410 
411  mr = zebra_meta_records_create(zh, name, num, pos);
412 
413  if (num > 10)
414  xfree(pos);
415  return mr;
416 }
417 
419  int num, zint *positions)
420 {
421  ZebraSet sset;
422  ZebraMetaRecord *sr = 0;
423  RSET rset;
424  int i;
425  struct zset_sort_info *sort_info;
426  size_t sysno_mem_index = 0;
427 
428  if (zh->m_staticrank)
429  sysno_mem_index = 1;
430 
431  if (!log_level_set)
432  loglevels();
433  if (!(sset = resultSetGet(zh, name)))
434  return NULL;
435  if (!(rset = sset->rset))
436  {
437  if (!sset->term_entries)
438  return 0;
439  sr = (ZebraMetaRecord *) xmalloc(sizeof(*sr) * num);
440  for (i = 0; i<num; i++)
441  {
442  sr[i].sysno = 0;
443  sr[i].score = -1;
444  sr[i].term = 0;
445  sr[i].db = 0;
446 
447  if (positions[i] <= sset->term_entries_max)
448  {
449  sr[i].term = sset->term_entries[positions[i]-1].term;
450  sr[i].db = sset->term_entries[positions[i]-1].db;
451  }
452  }
453  }
454  else
455  {
456  sr = (ZebraMetaRecord *) xmalloc(sizeof(*sr) * num);
457  for (i = 0; i<num; i++)
458  {
459  sr[i].sysno = 0;
460  sr[i].score = -1;
461  sr[i].term = 0;
462  sr[i].db = 0;
463  }
464  sort_info = sset->sort_info;
465  if (sort_info)
466  {
467  zint position;
468 
469  for (i = 0; i<num; i++)
470  {
471  position = positions[i];
472  if (position > 0 && position <= sort_info->num_entries)
473  {
474  yaz_log(log_level_sort, "got pos=" ZINT_FORMAT
475  " (sorted)", position);
476  sr[i].sysno = sort_info->entries[position-1]->sysno;
477  sr[i].score = sort_info->entries[position-1]->score;
478  }
479  }
480  }
481  /* did we really get all entries using sort ? */
482  for (i = 0; i<num; i++)
483  {
484  if (!sr[i].sysno)
485  break;
486  }
487  if (i < num) /* nope, get the rest, unsorted - sorry */
488  {
489  zint position = 0;
490  int num_i = 0;
491  zint psysno = 0;
492  RSFD rfd;
493  struct it_key key;
494 
495  if (sort_info)
496  position = sort_info->num_entries;
497  while (num_i < num && positions[num_i] <= position)
498  num_i++;
499 
500  if (sset->cache_rfd &&
501  num_i < num && positions[num_i] > sset->cache_position)
502  {
503  position = sset->cache_position;
504  rfd = sset->cache_rfd;
505  psysno = sset->cache_psysno;
506  }
507  else
508  {
509  if (sset->cache_rfd)
510  rset_close(sset->cache_rfd);
511  rfd = rset_open(rset, RSETF_READ);
512  }
513  while (num_i < num && rset_read(rfd, &key, 0))
514  {
515  zint this_sys = key.mem[sysno_mem_index];
516  if (this_sys != psysno)
517  {
518  psysno = this_sys;
519  if (sort_info)
520  {
521  /* determine we alreay have this in our set */
522  for (i = sort_info->num_entries; --i >= 0; )
523  if (psysno == sort_info->entries[i]->sysno)
524  break;
525  if (i >= 0)
526  continue;
527  }
528  position++;
529  assert(num_i < num);
530  if (position == positions[num_i])
531  {
532  sr[num_i].sysno = psysno;
533  yaz_log(log_level_sort, "got pos=" ZINT_FORMAT " (unsorted)", position);
534  sr[num_i].score = -1;
535  num_i++;
536  }
537  }
538  }
539  sset->cache_position = position;
540  sset->cache_psysno = psysno;
541  sset->cache_rfd = rfd;
542  }
543  }
544  return sr;
545 }
546 
548  int num)
549 {
550  assert(zh); /* compiler shut up about unused arg */
551  xfree(records);
552 }
553 
554 struct sortKeyInfo {
555  int relation;
556  int *ord; /* array of ord for each database searched */
557  int *numerical; /* array of ord for each database searched */
558  const char *index_type;
559 };
560 
562  int database_no,
563  struct sortKeyInfo *criteria, int num_criteria,
564  zint sysno,
565  char *cmp_buf[], char *tmp_cmp_buf[])
566 {
567  struct zset_sort_entry *new_entry = NULL;
568  struct zset_sort_info *sort_info = sset->sort_info;
569  int i, j;
570  WRBUF w = wrbuf_alloc();
571 
572  zebra_sort_sysno(zh->reg->sort_index, sysno);
573  for (i = 0; i<num_criteria; i++)
574  {
575  char *this_entry_buf = tmp_cmp_buf[i];
576  memset(this_entry_buf, '\0', SORT_IDX_ENTRYSIZE);
577 
578  if (criteria[i].ord[database_no] != -1)
579  {
580  yaz_log(log_level_sort, "pre zebra_sort_type ord is %d",
581  criteria[i].ord[database_no]);
582  zebra_sort_type(zh->reg->sort_index, criteria[i].ord[database_no]);
583  wrbuf_rewind(w);
584  if (zebra_sort_read(zh->reg->sort_index, 0, w))
585  {
586  /* consider each sort entry and take lowest/highest one
587  of the one as sorting key depending on whether sort is
588  ascending/descending */
589  int off = 0;
590  while (off != wrbuf_len(w))
591  {
592  size_t l = strlen(wrbuf_buf(w)+off);
593  assert(off < wrbuf_len(w));
594 
595  if (l >= SORT_IDX_ENTRYSIZE)
596  l = SORT_IDX_ENTRYSIZE-1;
597  if ( (off == 0)
598  || (criteria[i].relation == 'A'
599  && strcmp(wrbuf_buf(w)+off, this_entry_buf) < 0)
600  || (criteria[i].relation == 'D'
601  && strcmp(wrbuf_buf(w)+off, this_entry_buf) > 0)
602  )
603  {
604  memcpy(this_entry_buf, wrbuf_buf(w)+off, l);
605  this_entry_buf[l] = '\0';
606  }
607  off += 1 + strlen(wrbuf_buf(w)+off);
608  }
609  }
610  }
611  else
612  {
613  yaz_log(log_level_sort, "criteria[i].ord is -1 so not reading from sort index");
614  }
615  }
616  wrbuf_destroy(w);
617  i = sort_info->num_entries;
618  while (--i >= 0)
619  {
620  int rel = 0;
621  for (j = 0; j<num_criteria; j++)
622  {
623  char *this_entry_buf = tmp_cmp_buf[j];
624  char *other_entry_buf =
625  cmp_buf[j] + i * SORT_IDX_ENTRYSIZE;
626  if (criteria[j].numerical[database_no])
627  {
628  char this_entry_org[1024];
629  char other_entry_org[1024];
630  double diff;
631  const char *index_type = criteria[j].index_type;
632  zebra_term_untrans(zh, index_type, this_entry_org,
633  this_entry_buf);
634  zebra_term_untrans(zh, index_type, other_entry_org,
635  other_entry_buf);
636  diff = atof(this_entry_org) - atof(other_entry_org);
637 
638  if (diff > 0.0)
639  rel = 1;
640  else if (diff < 0.0)
641  rel = -1;
642  else
643  rel = 0;
644  }
645  else
646  {
647  rel = memcmp(this_entry_buf, other_entry_buf,
649  }
650  /* when the compare is equal, continue to next criteria,
651  else break out */
652  if (rel)
653  break;
654  }
655  if (!rel)
656  break;
657  if (criteria[j].relation == 'A')
658  {
659  if (rel > 0)
660  break;
661  }
662  else if (criteria[j].relation == 'D')
663  {
664  if (rel < 0)
665  break;
666  }
667  }
668  ++i;
669  yaz_log(log_level_sort, "ok, we want to insert record at position %d",i);
670  j = sort_info->max_entries;
671  if (i == j){
672  yaz_log(log_level_sort, "sort_info->max_entries reached (%d) abort sort",j);
673  return;
674  }
675 
676  if (sort_info->num_entries == j)
677  --j;
678  else
679  j = (sort_info->num_entries)++;
680  new_entry = sort_info->entries[j];
681  /* move up all higher entries (to make room) */
682  while (j != i)
683  {
684  int k;
685  for (k = 0; k<num_criteria; k++)
686  {
687  char *j_buf = cmp_buf[k] + j * SORT_IDX_ENTRYSIZE;
688  char *j_1_buf = cmp_buf[k] + (j-1) * SORT_IDX_ENTRYSIZE;
689  memcpy(j_buf, j_1_buf, SORT_IDX_ENTRYSIZE);
690  }
691  sort_info->entries[j] = sort_info->entries[j-1];
692  --j;
693  }
694  /* and insert the new entry at the correct place */
695  sort_info->entries[i] = new_entry;
696  assert(new_entry);
697  /* and add this to the compare buffer */
698  for (i = 0; i<num_criteria; i++)
699  {
700  char *new_entry_buf = cmp_buf[i] + j * SORT_IDX_ENTRYSIZE;
701  char *this_entry_buf = tmp_cmp_buf[i];
702  memcpy(new_entry_buf, this_entry_buf, SORT_IDX_ENTRYSIZE);
703  }
704  new_entry->sysno = sysno;
705  new_entry->score = -1;
706 }
707 
708 void resultSetInsertRank(ZebraHandle zh, struct zset_sort_info *sort_info,
709  zint sysno, int score, int relation)
710 {
711  struct zset_sort_entry *new_entry = NULL;
712  int i, j;
713  assert(zh); /* compiler shut up about unused arg */
714 
715  i = sort_info->num_entries;
716  while (--i >= 0)
717  {
718  int rel = 0;
719 
720  rel = score - sort_info->entries[i]->score;
721 
722  if (relation == 'D')
723  {
724  if (rel >= 0)
725  break;
726  }
727  else if (relation == 'A')
728  {
729  if (rel <= 0)
730  break;
731  }
732  }
733  ++i;
734  j = sort_info->max_entries;
735  if (i == j)
736  return;
737 
738  if (sort_info->num_entries == j)
739  --j;
740  else
741  j = (sort_info->num_entries)++;
742 
743  new_entry = sort_info->entries[j];
744  while (j != i)
745  {
746  sort_info->entries[j] = sort_info->entries[j-1];
747  --j;
748  }
749  sort_info->entries[i] = new_entry;
750  assert(new_entry);
751  new_entry->sysno = sysno;
752  new_entry->score = score;
753 }
754 
755 static Z_RPNQuery *copy_RPNQuery(Z_RPNQuery *src, NMEM nmem)
756 {
757  Z_RPNQuery *dst = 0;
758  ODR encode = odr_createmem(ODR_ENCODE);
759  ODR decode = odr_createmem(ODR_DECODE);
760 
761  if (z_RPNQuery(encode, &src, 0, 0))
762  {
763  int len;
764  char *buf = odr_getbuf(encode, &len, 0);
765 
766  if (buf)
767  {
768  odr_setbuf(decode, buf, len, 0);
769  z_RPNQuery(decode, &dst, 0, 0);
770  }
771  }
772  nmem_transfer(nmem, decode->mem);
773  odr_destroy(encode);
774  odr_destroy(decode);
775  return dst;
776 }
777 
778 static Z_SortKeySpecList *copy_SortKeySpecList(Z_SortKeySpecList *src, NMEM nmem)
779 {
780  Z_SortKeySpecList *dst = 0;
781  ODR encode = odr_createmem(ODR_ENCODE);
782  ODR decode = odr_createmem(ODR_DECODE);
783 
784  if (z_SortKeySpecList(encode, &src, 0, 0))
785  {
786  int len;
787  char *buf = odr_getbuf(encode, &len, 0);
788 
789  if (buf)
790  {
791  odr_setbuf(decode, buf, len, 0);
792  z_SortKeySpecList(decode, &dst, 0, 0);
793  }
794  }
795  nmem_transfer(nmem, decode->mem);
796  odr_destroy(encode);
797  odr_destroy(decode);
798  return dst;
799 }
800 
801 ZebraSet resultSetClone(ZebraHandle zh, const char *setname,
802  ZebraSet rset)
803 {
804  ZebraSet nset;
805  int i;
806 
807  nset = resultSetAdd(zh, setname, 1);
808  if (!nset)
809  return 0;
810 
811  nset->nmem = nmem_create();
812 
813  nset->num_bases = rset->num_bases;
814  nset->basenames =
815  nmem_malloc(nset->nmem, nset->num_bases * sizeof(*rset->basenames));
816  for (i = 0; i<rset->num_bases; i++)
817  nset->basenames[i] = nmem_strdup(nset->nmem, rset->basenames[i]);
818 
819  if (rset->rset)
820  nset->rset = rset_dup(rset->rset);
821  if (rset->rpn)
822  nset->rpn = copy_RPNQuery(rset->rpn, nset->nmem);
823  return nset;
824 }
825 
827  int num_input_setnames, const char **input_setnames,
828  const char *output_setname,
829  Z_SortKeySpecList *sort_sequence, int *sort_status)
830 {
831  ZebraSet sset;
832  RSET rset;
833 
834  if (num_input_setnames == 0)
835  {
836  zebra_setError(zh, YAZ_BIB1_NO_RESULT_SET_NAME_SUPPLIED_ON_SORT, 0);
837  return ZEBRA_FAIL;
838  }
839  if (num_input_setnames > 1)
840  {
841  zebra_setError(zh, YAZ_BIB1_SORT_TOO_MANY_INPUT_RESULTS, 0);
842  return ZEBRA_FAIL;
843  }
844  if (!log_level_set)
845  loglevels();
846  yaz_log(log_level_sort, "result set sort input=%s output=%s",
847  *input_setnames, output_setname);
848  sset = resultSetGet(zh, input_setnames[0]);
849  if (!sset)
850  {
851  zebra_setError(zh, YAZ_BIB1_SPECIFIED_RESULT_SET_DOES_NOT_EXIST,
852  input_setnames[0]);
853  return ZEBRA_FAIL;
854  }
855  if (!(rset = sset->rset))
856  {
857  zebra_setError(zh, YAZ_BIB1_SPECIFIED_RESULT_SET_DOES_NOT_EXIST,
858  input_setnames[0]);
859  return ZEBRA_FAIL;
860  }
861  if (strcmp(output_setname, input_setnames[0]))
862  sset = resultSetClone(zh, output_setname, sset);
863  sset->sortSpec = copy_SortKeySpecList(sort_sequence, sset->nmem);
864  return resultSetSortSingle(zh, nmem, sset, rset, sort_sequence,
865  sort_status);
866 }
867 
869  ZebraSet sset, RSET rset,
870  Z_SortKeySpecList *sort_sequence,
871  int *sort_status)
872 {
873  int i;
874  int ib;
875  int n = 0;
876  zint kno = 0;
877  zint psysno = 0;
878  struct it_key key;
879  struct sortKeyInfo sort_criteria[ZSET_SORT_MAX_LEVEL];
880  char *cmp_buf[ZSET_SORT_MAX_LEVEL];
881  char *tmp_cmp_buf[ZSET_SORT_MAX_LEVEL];
882  int num_criteria;
883  RSFD rfd;
884  TERMID termid;
885  TERMID *terms;
886  int numTerms = 0;
887  size_t sysno_mem_index = 0;
888 
889  int numbases = zh->num_basenames;
890  yaz_log(log_level_sort, "searching %d databases",numbases);
891 
892  if (zh->m_staticrank)
893  sysno_mem_index = 1;
894 
895  assert(nmem); /* compiler shut up about unused param */
896  sset->sort_info->num_entries = 0;
897 
898  rset_getterms(rset, 0, 0, &n);
899  terms = (TERMID *) nmem_malloc(nmem, sizeof(*terms)*n);
900  rset_getterms(rset, terms, n, &numTerms);
901 
902  sset->hits = 0;
903  num_criteria = sort_sequence->num_specs;
904  if (num_criteria > ZSET_SORT_MAX_LEVEL)
905  num_criteria = ZSET_SORT_MAX_LEVEL;
906  /* set up the search criteria */
907  for (i = 0; i < num_criteria; i++)
908  {
909  Z_SortKeySpec *sks = sort_sequence->specs[i];
910  Z_SortKey *sk;
911 
912  sort_criteria[i].ord = (int *)
913  nmem_malloc(nmem, sizeof(int)*numbases);
914  sort_criteria[i].numerical = (int *)
915  nmem_malloc(nmem, sizeof(int)*numbases);
916 
917  /* initialize ord and numerical for each database */
918  for (ib = 0; ib < numbases; ib++)
919  {
920  sort_criteria[i].ord[ib] = -1;
921  sort_criteria[i].numerical[ib] = 0;
922  }
923 
924  if (sks->which == Z_SortKeySpec_missingValueData)
925  {
926  zebra_setError(zh, YAZ_BIB1_UNSUPP_MISSING_DATA_ACTION, 0);
927  return ZEBRA_FAIL;
928  }
929  if (*sks->sortRelation == Z_SortKeySpec_ascending)
930  sort_criteria[i].relation = 'A';
931  else if (*sks->sortRelation == Z_SortKeySpec_descending)
932  sort_criteria[i].relation = 'D';
933  else
934  {
935  zebra_setError(zh, YAZ_BIB1_ILLEGAL_SORT_RELATION, 0);
936  return ZEBRA_FAIL;
937  }
938  if (sks->sortElement->which == Z_SortElement_databaseSpecific)
939  {
940  zebra_setError(zh, YAZ_BIB1_DATABASE_SPECIFIC_SORT_UNSUPP, 0);
941  return ZEBRA_FAIL;
942  }
943  else if (sks->sortElement->which != Z_SortElement_generic)
944  {
945  zebra_setError(zh, YAZ_BIB1_SORT_ILLEGAL_SORT, 0);
946  return ZEBRA_FAIL;
947  }
948  sk = sks->sortElement->u.generic;
949  switch (sk->which)
950  {
951  case Z_SortKey_sortField:
952  yaz_log(log_level_sort, "key %d is of type sortField", i+1);
953  for (ib = 0; ib < numbases; ib++)
954  {
955  zebraExplain_curDatabase(zh->reg->zei, zh->basenames[ib]);
956  sort_criteria[i].numerical[ib] = 0;
957  sort_criteria[i].ord[ib] =
960  0, sk->u.sortField);
961  if (sks->which != Z_SortKeySpec_null
962  && sort_criteria[i].ord[ib] == -1)
963  {
964  zebra_setError(zh,
965  YAZ_BIB1_CANNOT_SORT_ACCORDING_TO_SEQUENCE, 0);
966  return ZEBRA_FAIL;
967  }
968  }
969  break;
970  case Z_SortKey_elementSpec:
971  yaz_log(log_level_sort, "key %d is of type elementSpec", i+1);
972  zebra_setError(zh, YAZ_BIB1_CANNOT_SORT_ACCORDING_TO_SEQUENCE, 0);
973  return ZEBRA_FAIL;
974  case Z_SortKey_sortAttributes:
975  yaz_log(log_level_sort, "key %d is of type sortAttributes", i+1);
976  /* for every database we searched, get the sort index file
977  id (ord) and its numerical indication and store them in
978  the sort_criteria */
979  for (ib = 0; ib < numbases; ib++)
980  {
981  zebraExplain_curDatabase(zh->reg->zei, zh->basenames[ib]);
982  if (zebra_sort_get_ord(zh, sk->u.sortAttributes,
983  &sort_criteria[i].ord[ib],
984  &sort_criteria[i].numerical[ib]) !=
985  ZEBRA_OK && sks->which != Z_SortKeySpec_null)
986  return ZEBRA_FAIL;
987  }
988  break;
989  }
990  /* right now we look up the index type based on the first database
991  if the index_type's can differ between the indexes of different
992  databases (which i guess they can?) then we have to store the
993  index types for each database, just like the ord and numerical */
994  if (zebraExplain_lookup_ord(zh->reg->zei, sort_criteria[i].ord[0],
995  &sort_criteria[i].index_type,
996  0, 0))
997  {
998  zebra_setError(zh, YAZ_BIB1_CANNOT_SORT_ACCORDING_TO_SEQUENCE, 0);
999  return ZEBRA_FAIL;
1000  }
1001  }
1002  /* allocate space for each cmpare buf + one extra for tmp comparison */
1003  /* cmp_buf is an array of array, the first dimension is the criteria and the second dimension are
1004  all other result entries to compare against. This is slowly filled when records are processed.
1005  tmp_cmp_buf is an array with a value of the current record for each criteria
1006  */
1007  for (i = 0; i<num_criteria; i++)
1008  {
1009  cmp_buf[i] = xmalloc(sset->sort_info->max_entries
1010  * SORT_IDX_ENTRYSIZE);
1011  tmp_cmp_buf[i] = xmalloc(SORT_IDX_ENTRYSIZE);
1012  }
1013  rfd = rset_open(rset, RSETF_READ);
1014  while (rset_read(rfd, &key, &termid))
1015  {
1016  zint this_sys = key.mem[sysno_mem_index];
1018  key_logdump_txt(log_level_searchhits, &key, termid->name);
1019  kno++;
1020  if (this_sys != psysno)
1021  {
1022  int database_no = 0;
1023  if ((sset->hits & 255) == 0 && zh->break_handler_func)
1024  {
1026  {
1028  break;
1029  }
1030  }
1031  (sset->hits)++;
1032  psysno = this_sys;
1033 
1034  /* determine database from the term, but only bother if more than
1035  one database is in use*/
1036  if (numbases > 1 && termid->ol)
1037  {
1038  const char *this_db = 0;
1039  if (zebraExplain_lookup_ord(zh->reg->zei, termid->ol->ord, 0, &this_db, 0)
1040  == 0 && this_db)
1041  {
1042  for (ib = 0; ib < numbases; ib++)
1043  if (!strcmp(this_db, zh->basenames[ib]))
1044  database_no = ib;
1045  }
1046  }
1047 #if 0
1048  yaz_log(YLOG_LOG, "sysno=" ZINT_FORMAT " database_no=%d", this_sys,
1049  database_no);
1050  ord_list_print(termid->ol);
1051 #endif
1052  resultSetInsertSort(zh, sset, database_no,
1053  sort_criteria, num_criteria, psysno, cmp_buf,
1054  tmp_cmp_buf);
1055  }
1056  }
1057  rset_close(rfd);
1058 
1059  /* free the compare buffers */
1060  for (i = 0; i<num_criteria; i++)
1061  {
1062  xfree(cmp_buf[i]);
1063  xfree(tmp_cmp_buf[i]);
1064  }
1065 
1066  yaz_log(log_level_sort, ZINT_FORMAT " keys, " ZINT_FORMAT " sysnos, sort",
1067  kno, sset->hits);
1068  for (i = 0; i < numTerms; i++)
1069  yaz_log(log_level_sort, "term=\"%s\" type=%s count=" ZINT_FORMAT,
1070  terms[i]->name, terms[i]->flags, terms[i]->rset->hits_count);
1071  *sort_status = Z_SortResponse_success;
1072  return ZEBRA_OK;
1073 }
1074 
1075 RSET resultSetRef(ZebraHandle zh, const char *resultSetId)
1076 {
1077  ZebraSet s;
1078 
1079  if ((s = resultSetGet(zh, resultSetId)))
1080  return s->rset;
1081  return NULL;
1082 }
1083 
1085  RSET rset, NMEM nmem)
1086 {
1087  struct it_key key;
1088  TERMID termid;
1089  TERMID *terms;
1090  zint kno = 0;
1091  int numTerms = 0;
1092  int n = 0;
1093  int i;
1094  ZebraRankClass rank_class;
1095  struct zset_sort_info *sort_info;
1096  const char *rank_handler_name = res_get_def(zh->res, "rank", "rank-1");
1097  size_t sysno_mem_index = 0;
1098 
1099  if (zh->m_staticrank)
1100  sysno_mem_index = 1;
1101 
1102  if (!log_level_set)
1103  loglevels();
1104  sort_info = zebraSet->sort_info;
1105  sort_info->num_entries = 0;
1106  zebraSet->hits = 0;
1107  zebraSet->estimated_hit_count = 0;
1108  rset_getterms(rset, 0, 0, &n);
1109  terms = (TERMID *) nmem_malloc(nmem, sizeof(*terms)*n);
1110  rset_getterms(rset, terms, n, &numTerms);
1111 
1112  rank_class = zebraRankLookup(zh, rank_handler_name);
1113  if (!rank_class)
1114  {
1115  yaz_log(YLOG_WARN, "No such rank handler: %s", rank_handler_name);
1116  zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, "Cannot find rank handler");
1117  return ZEBRA_FAIL;
1118  }
1119  else
1120  {
1121  RSFD rfd = rset_open(rset, RSETF_READ);
1122  struct rank_control *rc = rank_class->control;
1123  int score;
1124  zint count = 0;
1125  void *handle = (*rc->begin) (zh->reg, rank_class->class_handle, rset,
1126  nmem, terms, numTerms);
1127  zint psysno = 0; /* previous doc id / sys no */
1128  zint pstaticrank = 0; /* previous static rank */
1129  int stop_flag = 0;
1130  while (rset_read(rfd, &key, &termid))
1131  {
1132  zint this_sys = key.mem[sysno_mem_index];
1133 
1134  zint seqno = key.mem[key.len-1];
1135  kno++;
1137  key_logdump_txt(log_level_searchhits, &key, termid->name);
1138  if (this_sys != psysno)
1139  { /* new record .. */
1140  if (!(rfd->counted_items & 255) && zh->break_handler_func)
1141  {
1143  {
1144  yaz_log(YLOG_LOG, "Aborted search");
1145  stop_flag = 1;
1146  }
1147  }
1148  if (rfd->counted_items > rset->hits_limit)
1149  stop_flag = 1;
1150  if (stop_flag)
1151  {
1152  zebraSet->estimated_hit_count = 1;
1153  break;
1154  }
1155  if (psysno)
1156  { /* only if we did have a previous record */
1157  score = (*rc->calc)(handle, psysno, pstaticrank,
1158  &stop_flag);
1159  /* insert the hit. A=Ascending */
1160  resultSetInsertRank(zh, sort_info, psysno, score, 'A');
1161  count++;
1162  }
1163  psysno = this_sys;
1164  if (zh->m_staticrank)
1165  pstaticrank = key.mem[0];
1166  }
1167  (*rc->add)(handle, CAST_ZINT_TO_INT(seqno), termid);
1168  }
1169  /* no more items */
1170  if (psysno)
1171  { /* we had - at least - one record */
1172  score = (*rc->calc)(handle, psysno, pstaticrank, &stop_flag);
1173  /* insert the hit. A=Ascending */
1174  resultSetInsertRank(zh, sort_info, psysno, score, 'A');
1175  count++;
1176  }
1177  (*rc->end)(zh->reg, handle);
1178  rset_close(rfd);
1179  }
1180  zebraSet->hits = rset->hits_count;
1181 
1182  yaz_log(log_level_searchterms, ZINT_FORMAT " keys, "
1183  ZINT_FORMAT " sysnos, rank", kno, zebraSet->hits);
1184  for (i = 0; i < numTerms; i++)
1185  {
1186  yaz_log(log_level_searchterms, "term=\"%s\" type=%s count="
1187  ZINT_FORMAT,
1188  terms[i]->name, terms[i]->flags, terms[i]->rset->hits_count);
1189  }
1190  return ZEBRA_OK;
1191 }
1192 
1194 {
1195  ZebraRankClass p = zh->reg->rank_classes;
1196  while (p && strcmp(p->control->name, name))
1197  p = p->next;
1198  if (p && !p->init_flag)
1199  {
1200  if (p->control->create)
1201  p->class_handle = (*p->control->create)(zh);
1202  p->init_flag = 1;
1203  }
1204  return p;
1205 }
1206 
1207 void zebraRankInstall(struct zebra_register *reg, struct rank_control *ctrl)
1208 {
1209  ZebraRankClass p = (ZebraRankClass) xmalloc(sizeof(*p));
1210  p->control = (struct rank_control *) xmalloc(sizeof(*p->control));
1211  memcpy(p->control, ctrl, sizeof(*p->control));
1212  p->control->name = xstrdup(ctrl->name);
1213  p->init_flag = 0;
1214  p->next = reg->rank_classes;
1215  reg->rank_classes = p;
1216 }
1217 
1219 {
1220  ZebraRankClass p = reg->rank_classes;
1221  while (p)
1222  {
1223  ZebraRankClass p_next = p->next;
1224  if (p->init_flag && p->control->destroy)
1225  (*p->control->destroy)(reg, p->class_handle);
1226  xfree(p->control->name);
1227  xfree(p->control);
1228  xfree(p);
1229  p = p_next;
1230  }
1231  reg->rank_classes = NULL;
1232 }
1233 
1234 static int trav_rset_for_termids(RSET rset, TERMID *termid_array,
1235  zint *hits_array, int *approx_array)
1236 {
1237  int no = 0;
1238  int i;
1239  for (i = 0; i<rset->no_children; i++)
1241  (termid_array ? termid_array + no : 0),
1242  (hits_array ? hits_array + no : 0),
1243  (approx_array ? approx_array + no : 0));
1244  if (rset->term && rset->term->name[0])
1245  {
1246  if (termid_array)
1247  termid_array[no] = rset->term;
1248  if (hits_array)
1249  hits_array[no] = rset->hits_count;
1250  if (approx_array)
1251  approx_array[no] = rset->hits_approx;
1252 #if 0
1253  yaz_log(YLOG_LOG, "rset=%p term=%s limit=" ZINT_FORMAT
1254  " count=" ZINT_FORMAT,
1256 #endif
1257  no++;
1258  }
1259  return no;
1260 }
1261 
1263  int *num_terms)
1264 {
1265  ZebraSet sset = resultSetGet(zh, setname);
1266  *num_terms = 0;
1267  if (sset)
1268  {
1269  *num_terms = trav_rset_for_termids(sset->rset, 0, 0, 0);
1270  return ZEBRA_OK;
1271  }
1272  return ZEBRA_FAIL;
1273 }
1274 
1276  int no, zint *count, int *approx,
1277  char *termbuf, size_t *termlen,
1278  const char **term_ref_id)
1279 {
1280  ZebraSet sset = resultSetGet(zh, setname);
1281  if (sset)
1282  {
1283  int num_terms = trav_rset_for_termids(sset->rset, 0, 0, 0);
1284  if (no >= 0 && no < num_terms)
1285  {
1286  TERMID *term_array = xmalloc(num_terms * sizeof(*term_array));
1287  zint *hits_array = xmalloc(num_terms * sizeof(*hits_array));
1288  int *approx_array = xmalloc(num_terms * sizeof(*approx_array));
1289 
1290  trav_rset_for_termids(sset->rset, term_array,
1291  hits_array, approx_array);
1292 
1293  if (count)
1294  *count = hits_array[no];
1295  if (approx)
1296  *approx = approx_array[no];
1297  if (termbuf)
1298  {
1299  char *inbuf = term_array[no]->name;
1300  size_t inleft = strlen(inbuf);
1301  size_t outleft = *termlen - 1;
1302 
1303  if (zh->iconv_from_utf8 != 0)
1304  {
1305  char *outbuf = termbuf;
1306  size_t ret;
1307 
1308  ret = yaz_iconv(zh->iconv_from_utf8, &inbuf, &inleft,
1309  &outbuf, &outleft);
1310  if (ret == (size_t)(-1))
1311  *termlen = 0;
1312  else
1313  {
1314  yaz_iconv(zh->iconv_from_utf8, 0, 0,
1315  &outbuf, &outleft);
1316  *termlen = outbuf - termbuf;
1317  }
1318  }
1319  else
1320  {
1321  if (inleft > outleft)
1322  inleft = outleft;
1323  *termlen = inleft;
1324  memcpy(termbuf, inbuf, *termlen);
1325  }
1326  termbuf[*termlen] = '\0';
1327  }
1328  if (term_ref_id)
1329  *term_ref_id = term_array[no]->ref_id;
1330 
1331  xfree(term_array);
1332  xfree(hits_array);
1333  xfree(approx_array);
1334  return ZEBRA_OK;
1335  }
1336  }
1337  return ZEBRA_FAIL;
1338 }
1339 
1341  zint sysno, zebra_snippets *snippets)
1342 {
1343  ZebraSet sset = resultSetGet(zh, setname);
1344  yaz_log(YLOG_DEBUG, "zebra_get_hit_vector setname=%s zysno=" ZINT_FORMAT,
1345  setname, sysno);
1346  if (!sset)
1347  return ZEBRA_FAIL;
1348  else
1349  {
1350  struct rset_key_control *kc = zebra_key_control_create(zh);
1351  NMEM nmem = nmem_create();
1352  struct it_key key;
1353  RSET rsets[2], rset_comb;
1354  RSET rset_temp = rset_create_temp(nmem, kc, kc->scope,
1355  res_get(zh->res, "setTmpDir"),0 );
1356 
1357  TERMID termid;
1358  RSFD rsfd = rset_open(rset_temp, RSETF_WRITE);
1359 
1360  key.mem[0] = sysno;
1361  key.mem[1] = 0;
1362  key.mem[2] = 0;
1363  key.mem[3] = 0;
1364  key.len = 2;
1365  rset_write(rsfd, &key);
1366  rset_close(rsfd);
1367 
1368  rsets[0] = rset_temp;
1369  rsets[1] = rset_dup(sset->rset);
1370 
1371  rset_comb = rset_create_and(nmem, kc, kc->scope, 2, rsets);
1372 
1373  rsfd = rset_open(rset_comb, RSETF_READ);
1374 
1375  while (rset_read(rsfd, &key, &termid))
1376  {
1377  if (termid)
1378  {
1379  struct ord_list *ol;
1380  for (ol = termid->ol; ol; ol = ol->next)
1381  {
1382  zebra_snippets_append(snippets, key.mem[key.len-1], 0,
1383  ol->ord, termid->name);
1384  }
1385  }
1386  }
1387  rset_close(rsfd);
1388 
1389  rset_delete(rset_comb);
1390  nmem_destroy(nmem);
1391  kc->dec(kc);
1392  }
1393  return ZEBRA_OK;
1394 }
1395 
1397  const char **basenames, int num_bases,
1398  zint recid,
1399  zint *sysnos, int *no_sysnos)
1400 {
1401  ZEBRA_RES res = ZEBRA_OK;
1402  int sysnos_offset = 0;
1403  int i;
1404 
1405  if (!zh->reg->isamb || !zh->m_segment_indexing)
1406  {
1407  if (sysnos_offset < *no_sysnos)
1408  *sysnos = recid;
1409  sysnos_offset++;
1410  }
1411  else
1412  {
1413  for (i = 0; res == ZEBRA_OK && i < num_bases; i++)
1414  {
1415  const char *database = basenames[i];
1416  if (zebraExplain_curDatabase(zh->reg->zei, database) == 0)
1417  {
1418  const char *index_type = "w";
1419  const char *use_string = "_ALLRECORDS";
1420  int ord;
1423  index_type, use_string);
1424  if (ord != -1)
1425  {
1426  char ord_buf[32];
1427  int ord_len = key_SU_encode(ord, ord_buf);
1428  char *info;
1429 
1430  ord_buf[ord_len] = '\0';
1431 
1432  info = dict_lookup(zh->reg->dict, ord_buf);
1433  if (info)
1434  {
1435  if (*info != sizeof(ISAM_P))
1436  {
1437  res = ZEBRA_FAIL;
1438  }
1439  else
1440  {
1441  ISAM_P isam_p;
1442  ISAMB_PP pt;
1443  struct it_key key_until, key_found;
1444  int i = 0;
1445  int r;
1446 
1447  memcpy(&isam_p, info+1, sizeof(ISAM_P));
1448 
1449  pt = isamb_pp_open(zh->reg->isamb, isam_p, 2);
1450  if (!pt)
1451  res = ZEBRA_FAIL;
1452  else
1453  {
1454  key_until.mem[i++] = recid;
1455  key_until.mem[i++] = 0; /* section_id */
1456  if (zh->m_segment_indexing)
1457  key_until.mem[i++] = 0; /* segment */
1458  key_until.mem[i++] = 0;
1459  key_until.len = i;
1460 
1461  r = isamb_pp_forward(pt, &key_found, &key_until);
1462  while (r && key_found.mem[0] == recid)
1463  {
1464  if (sysnos_offset < *no_sysnos)
1465  sysnos[sysnos_offset++] =
1466  key_found.mem[key_found.len-1];
1467  r = isamb_pp_read(pt, &key_found);
1468  }
1469  isamb_pp_close(pt);
1470  }
1471  }
1472  }
1473  }
1474  }
1475  }
1476  }
1477  *no_sysnos = sysnos_offset;
1478  return res;
1479 }
1480 
1482  const char *setname,
1483  zint recid,
1484  zint *sysnos, int *no_sysnos)
1485 {
1486  const char **basenames;
1487  int num_bases;
1488  ZEBRA_RES res;
1489 
1490  res = resultSetGetBaseNames(zh, setname, &basenames, &num_bases);
1491  if (res != ZEBRA_OK)
1492  return ZEBRA_FAIL;
1493 
1494  return zebra_recid_to_sysno(zh, basenames, num_bases,
1495  recid, sysnos, no_sysnos);
1496 }
1497 
1499  zint approx_limit)
1500 {
1501  zint psysno = 0;
1502  struct it_key key;
1503  RSFD rfd;
1504 
1505  yaz_log(YLOG_DEBUG, "count_set");
1506 
1507  rset->hits_limit = approx_limit;
1508 
1509  *count = 0;
1510  rfd = rset_open(rset, RSETF_READ);
1511  while (rset_read(rfd, &key,0 /* never mind terms */))
1512  {
1513  if (key.mem[0] != psysno)
1514  {
1515  psysno = key.mem[0];
1516  if (rfd->counted_items >= rset->hits_limit)
1517  break;
1518  }
1519  }
1520  rset_close(rfd);
1521  *count = rset->hits_count;
1522 }
1523 
1524 
1525 /*
1526  * Local variables:
1527  * c-basic-offset: 4
1528  * c-file-style: "Stroustrup"
1529  * indent-tabs-mode: nil
1530  * End:
1531  * vim: shiftwidth=4 tabstop=8 expandtab
1532  */
1533 
const char * zebra_get_resource(ZebraHandle zh, const char *name, const char *defaultvalue)
Definition: zebraapi.c:2565
ZEBRA_RES zebra_sort_get_ord(ZebraHandle zh, Z_SortAttributes *sortAttributes, int *ord, int *numerical)
Definition: attribute.c:190
char * dict_lookup(Dict dict, const char *p)
lookup item in dictionary
Definition: lookup.c:100
struct zebra_rank_class * ZebraRankClass
int zebra_term_untrans(ZebraHandle zh, const char *index_type, char *dst, const char *src)
Definition: untrans.c:31
void zebra_setError(ZebraHandle zh, int code, const char *addinfo)
Definition: zebraapi.c:2755
ZEBRA_RES rpn_get_top_approx_limit(ZebraHandle zh, Z_RPNStructure *zs, zint *approx_limit)
Definition: rpnsearch.c:2498
struct zebra_set * ZebraSet
Definition: index.h:114
ZEBRA_RES rpn_search_top(ZebraHandle zh, Z_RPNStructure *zs, const Odr_oid *attributeSet, zint hits_limit, NMEM stream, NMEM rset_nmem, Z_SortKeySpecList *sort_sequence, int num_bases, const char **basenames, RSET *result_set)
Definition: rpnsearch.c:2529
struct rset_key_control * zebra_key_control_create(ZebraHandle zh)
Definition: kcontrol.c:57
ISAMB_PP isamb_pp_open(ISAMB isamb, ISAM_P pos, int scope)
Definition: isamb.c:1387
int isamb_pp_forward(ISAMB_PP pp, void *buf, const void *untilbuf)
Definition: isamb.c:1525
void isamb_pp_close(ISAMB_PP pp)
Definition: isamb.c:1429
int isamb_pp_read(ISAMB_PP pp, void *buf)
Definition: isamb.c:1503
zint ISAM_P
Definition: isamc.h:28
void key_logdump_txt(int logmask, const void *p, const char *txt)
Definition: it_key.c:38
int key_SU_encode(int ch, char *out)
Definition: su_codec.c:31
const char * res_get(Res r, const char *name)
Definition: res.c:294
const char * res_get_def(Res r, const char *name, const char *def)
Definition: res.c:313
void rset_delete(RSET rs)
Destructor RSETs.
Definition: rset.c:218
void rset_set_hits_limit(RSET rs, zint l)
Definition: rset.c:88
void ord_list_print(struct ord_list *list)
Definition: rset.c:323
RSET rset_create_and(NMEM nmem, struct rset_key_control *kcontrol, int scope, int no_rsets, RSET *rsets)
Definition: rsmultiandor.c:280
#define rset_read(rfd, buf, term)
Definition: rset.h:217
struct rset rset
RSET rset_dup(RSET rs)
Duplicate an RSET.
Definition: rset.c:255
#define RSETF_WRITE
Definition: rset.h:200
#define RSETF_READ
Definition: rset.h:199
#define rset_write(rfd, buf)
Definition: rset.h:220
#define rset_getterms(ct, terms, maxterms, curterm)
Definition: rset.h:209
RSET rset_create_temp(NMEM nmem, struct rset_key_control *kcontrol, int scope, const char *temp_path, TERMID term)
Definition: rstemp.c:86
#define rset_open(rs, wflag)
Definition: rset.h:202
void rset_close(RSFD rfd)
Closes a result set RFD handle.
Definition: rset.c:98
void zebra_snippets_append(zebra_snippets *l, zint seqno, int ws, int ord, const char *term)
Definition: snippet.c:51
int zebra_sort_read(zebra_sort_index_t si, zint *section_id, WRBUF w)
reads sort entry
Definition: sortidx.c:470
int zebra_sort_type(zebra_sort_index_t si, int type)
sets type for sort usage
Definition: sortidx.c:235
#define SORT_IDX_ENTRYSIZE
Definition: sortidx.h:29
void zebra_sort_sysno(zebra_sort_index_t si, zint sysno)
sets sort system number for read / add / delete
Definition: sortidx.c:340
char * term
Definition: api.h:481
zint sysno
Definition: api.h:483
int score
Definition: api.h:484
char * db
Definition: api.h:482
Definition: it_key.h:30
int len
Definition: it_key.h:31
zint mem[IT_KEY_LEVEL_MAX]
Definition: it_key.h:32
Definition: rset.h:35
int ord
Definition: rset.h:36
struct ord_list * next
Definition: rset.h:37
void(* end)(struct zebra_register *reg, void *set_handle)
Definition: rank.h:39
void *(* create)(ZebraHandle zh)
Definition: rank.h:29
int(* calc)(void *set_handle, zint sysno, zint staticrank, int *stop_flag)
Definition: rank.h:40
void(* destroy)(struct zebra_register *reg, void *class_handle)
Definition: rank.h:30
void *(* begin)(struct zebra_register *reg, void *class_handle, RSET rset, NMEM nmem, TERMID *terms, int numterms)
Definition: rank.h:31
void(* add)(void *set_handle, int seqno, TERMID term)
Definition: rank.h:42
char * name
Definition: rank.h:28
void(* dec)(struct rset_key_control *kc)
Definition: rset.h:138
Definition: rset.h:50
struct ord_list * ol
Definition: rset.h:64
char * ref_id
Definition: rset.h:63
char * name
Definition: rset.h:51
Definition: rset.h:151
int hits_approx
Definition: rset.h:166
zint hits_limit
Definition: rset.h:163
TERMID term
Definition: rset.h:160
RSET * children
Definition: rset.h:162
zint hits_count
Definition: rset.h:164
int no_children
Definition: rset.h:161
Definition: rset.h:73
zint counted_items
Definition: rset.h:77
int * ord
Definition: zsets.c:556
int relation
Definition: zsets.c:555
const char * index_type
Definition: zsets.c:558
int * numerical
Definition: zsets.c:557
struct rank_control * control
Definition: index.h:117
void * class_handle
Definition: index.h:119
struct zebra_rank_class * next
Definition: index.h:120
ZebraExplainInfo zei
Definition: index.h:139
ISAMB isamb
Definition: index.h:131
ZebraRankClass rank_classes
Definition: index.h:144
zebra_sort_index_t sort_index
Definition: index.h:134
Dict dict
Definition: index.h:132
struct zebra_register * reg
Definition: index.h:174
char ** basenames
Definition: index.h:178
zint approx_limit
Definition: index.h:180
int m_staticrank
Definition: index.h:205
int num_basenames
Definition: index.h:179
int m_segment_indexing
Definition: index.h:206
void * break_handler_data
Definition: index.h:235
ZebraSet sets
Definition: index.h:191
int(* break_handler_func)(void *client_data)
Definition: index.h:234
yaz_iconv_t iconv_from_utf8
Definition: index.h:216
Definition: zsets.c:39
char * index_name
Definition: zsets.c:42
char * term
Definition: zsets.c:43
int reg_type
Definition: zsets.c:40
char * db
Definition: zsets.c:41
NMEM rset_nmem
Definition: zsets.c:50
Z_RPNQuery * rpn
Definition: zsets.c:54
zint cache_psysno
Definition: zsets.c:65
int num_bases
Definition: zsets.c:52
const char ** basenames
Definition: zsets.c:53
NMEM nmem
Definition: zsets.c:49
RSFD cache_rfd
Definition: zsets.c:64
struct zset_sort_info * sort_info
Definition: zsets.c:56
struct zebra_set_term_entry * term_entries
Definition: zsets.c:57
zint approx_limit
Definition: zsets.c:66
int estimated_hit_count
Definition: zsets.c:61
Z_SortKeySpecList * sortSpec
Definition: zsets.c:55
int locked
Definition: zsets.c:60
zint hits
Definition: zsets.c:51
int term_entries_max
Definition: zsets.c:58
struct zebra_set * next
Definition: zsets.c:59
char * name
Definition: zsets.c:47
zint cache_position
Definition: zsets.c:63
RSET rset
Definition: zsets.c:48
Definition: zsets.c:69
int score
Definition: zsets.c:71
zint sysno
Definition: zsets.c:70
struct zset_sort_entry ** entries
Definition: zsets.c:78
struct zset_sort_entry * all_entries
Definition: zsets.c:77
int max_entries
Definition: zsets.c:75
int num_entries
Definition: zsets.c:76
long zint
Zebra integer.
Definition: util.h:66
#define ZEBRA_FAIL
Definition: util.h:81
#define ZINT_FORMAT
Definition: util.h:72
#define CAST_ZINT_TO_INT(x)
Definition: util.h:96
#define ZEBRA_OK
Definition: util.h:82
short ZEBRA_RES
Common return type for Zebra API.
Definition: util.h:80
int zebraExplain_curDatabase(ZebraExplainInfo zei, const char *database)
Definition: zinfo.c:790
int zebraExplain_lookup_attr_str(ZebraExplainInfo zei, zinfo_index_category_t cat, const char *index_type, const char *str)
lookup ordinal from string index + index type
Definition: zinfo.c:1353
int zebraExplain_lookup_ord(ZebraExplainInfo zei, int ord, const char **index_type, const char **db, const char **string_index)
Definition: zinfo.c:1478
zinfo_index_category_t
Definition: zinfo.h:37
@ zinfo_index_category_alwaysmatches
Definition: zinfo.h:40
@ zinfo_index_category_sort
Definition: zinfo.h:39
ZEBRA_RES zebra_result_recid_to_sysno(ZebraHandle zh, const char *setname, zint recid, zint *sysnos, int *no_sysnos)
Definition: zsets.c:1481
ZebraSet resultSetAdd(ZebraHandle zh, const char *name, int ov)
Definition: zsets.c:215
void zebraRankInstall(struct zebra_register *reg, struct rank_control *ctrl)
Definition: zsets.c:1207
static int log_level_resultsets
Definition: zsets.c:85
void zebraRankDestroy(struct zebra_register *reg)
Definition: zsets.c:1218
void resultSetDestroy(ZebraHandle zh, int num, char **names, int *statuses)
Definition: zsets.c:344
static void loglevels(void)
Definition: zsets.c:87
ZebraMetaRecord * zebra_meta_records_create_range(ZebraHandle zh, const char *name, zint start, int num)
Definition: zsets.c:393
RSET resultSetRef(ZebraHandle zh, const char *resultSetId)
Definition: zsets.c:1075
ZebraMetaRecord * zebra_meta_records_create(ZebraHandle zh, const char *name, int num, zint *positions)
Definition: zsets.c:418
void zebra_meta_records_destroy(ZebraHandle zh, ZebraMetaRecord *records, int num)
Definition: zsets.c:547
#define ZSET_SORT_MAX_LEVEL
Definition: zsets.c:37
static ZEBRA_RES resultSetSearch(ZebraHandle zh, NMEM nmem, NMEM rset_nmem, Z_RPNQuery *rpn, ZebraSet sset)
Definition: zsets.c:99
ZEBRA_RES resultSetGetBaseNames(ZebraHandle zh, const char *setname, const char ***basenames, int *num_bases)
Definition: zsets.c:310
ZEBRA_RES resultSetRank(ZebraHandle zh, ZebraSet zebraSet, RSET rset, NMEM nmem)
Definition: zsets.c:1084
void zebra_count_set(ZebraHandle zh, RSET rset, zint *count, zint approx_limit)
Definition: zsets.c:1498
ZEBRA_RES resultSetSortSingle(ZebraHandle zh, NMEM nmem, ZebraSet sset, RSET rset, Z_SortKeySpecList *sort_sequence, int *sort_status)
Definition: zsets.c:868
ZEBRA_RES resultSetSort(ZebraHandle zh, NMEM nmem, int num_input_setnames, const char **input_setnames, const char *output_setname, Z_SortKeySpecList *sort_sequence, int *sort_status)
Definition: zsets.c:826
ZebraRankClass zebraRankLookup(ZebraHandle zh, const char *name)
Definition: zsets.c:1193
void resultSetInsertRank(ZebraHandle zh, struct zset_sort_info *sort_info, zint sysno, int score, int relation)
Definition: zsets.c:708
void resultSetAddTerm(ZebraHandle zh, ZebraSet s, int reg_type, const char *db, const char *index_name, const char *term)
Definition: zsets.c:188
static Z_RPNQuery * copy_RPNQuery(Z_RPNQuery *src, NMEM nmem)
Definition: zsets.c:755
static Z_SortKeySpecList * copy_SortKeySpecList(Z_SortKeySpecList *src, NMEM nmem)
Definition: zsets.c:778
void resultSetInsertSort(ZebraHandle zh, ZebraSet sset, int database_no, struct sortKeyInfo *criteria, int num_criteria, zint sysno, char *cmp_buf[], char *tmp_cmp_buf[])
Definition: zsets.c:561
static int log_level_sort
Definition: zsets.c:82
ZEBRA_RES zebra_result_set_term_info(ZebraHandle zh, const char *setname, int no, zint *count, int *approx, char *termbuf, size_t *termlen, const char **term_ref_id)
returns information about a term assocated with a result set
Definition: zsets.c:1275
static int log_level_searchterms
Definition: zsets.c:84
ZEBRA_RES resultSetAddRPN(ZebraHandle zh, NMEM m, Z_RPNQuery *rpn, int num_bases, char **basenames, const char *setname, zint *hits, int *estimated_hit_count)
Definition: zsets.c:148
ZebraSet resultSetClone(ZebraHandle zh, const char *setname, ZebraSet rset)
Definition: zsets.c:801
static int log_level_searchhits
Definition: zsets.c:83
ZEBRA_RES zebra_result_set_term_no(ZebraHandle zh, const char *setname, int *num_terms)
returns number of term info terms assocaited with result set
Definition: zsets.c:1262
void resultSetInvalidate(ZebraHandle zh)
Definition: zsets.c:322
static int trav_rset_for_termids(RSET rset, TERMID *termid_array, zint *hits_array, int *approx_array)
Definition: zsets.c:1234
ZebraSet resultSetGet(ZebraHandle zh, const char *name)
Definition: zsets.c:282
static int log_level_set
Definition: zsets.c:81
static ZEBRA_RES zebra_recid_to_sysno(ZebraHandle zh, const char **basenames, int num_bases, zint recid, zint *sysnos, int *no_sysnos)
Definition: zsets.c:1396
ZEBRA_RES zebra_snippets_hit_vector(ZebraHandle zh, const char *setname, zint sysno, zebra_snippets *snippets)
Definition: zsets.c:1340