IDZEBRA 2.2.8
rsmultiandor.c
Go to the documentation of this file.
1/* This file is part of the Zebra server.
2 Copyright (C) Index Data
3
4Zebra is free software; you can redistribute it and/or modify it under
5the terms of the GNU General Public License as published by the Free
6Software Foundation; either version 2, or (at your option) any later
7version.
8
9Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
10WARRANTY; without even the implied warranty of MERCHANTABILITY or
11FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12for more details.
13
14You should have received a copy of the GNU General Public License
15along with this program; if not, write to the Free Software
16Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17
18*/
19
20
35#if HAVE_CONFIG_H
36#include <config.h>
37#endif
38#include <assert.h>
39#include <stdio.h>
40#include <stdlib.h>
41#include <string.h>
42
43#include <idzebra/util.h>
44#include <idzebra/isamc.h>
45#include <rset.h>
46
47static RSFD r_open_and(RSET ct, int flag);
48static RSFD r_open_or(RSET ct, int flag);
49static void r_close(RSFD rfd);
50static void r_delete(RSET ct);
51static int r_read_and(RSFD rfd, void *buf, TERMID *term);
52static int r_read_or(RSFD rfd, void *buf, TERMID *term);
53static int r_forward_and(RSFD rfd, void *buf, TERMID *term,
54 const void *untilbuf);
55static int r_forward_or(RSFD rfd, void *buf, TERMID *term,
56 const void *untilbuf);
57static void r_pos_and(RSFD rfd, double *current, double *total);
58static void r_pos_or(RSFD rfd, double *current, double *total);
59static void r_get_terms(RSET ct, TERMID *terms, int maxterms, int *curterm);
60
61static const struct rset_control control_or =
62{
63 "multi-or",
67 r_close,
72};
73
74static const struct rset_control control_and =
75{
76 "multi-and",
80 r_close,
85};
86
87/* The heap structure:
88 * The rset contains a list or rsets we are ORing together
89 * The rfd contains a heap of heap-items, which contain
90 * a rfd opened to those rsets, and a buffer for one key.
91 * They also contain a ptr to the rset list in the rset
92 * itself, for practical reasons.
93 */
94
101
102struct heap {
105 const struct rset_key_control *kctrl;
106 struct heap_item **heap; /* ptrs to the rfd */
107};
108typedef struct heap *HEAP;
109
110
111struct rset_private {
112 int dummy;
113};
114
115struct rfd_private {
116 int flag;
117 struct heap_item *items; /* we alloc and free them here */
118 HEAP h; /* and move around here */
119 zint hits; /* returned so far */
120 int eof; /* seen the end of it */
121 int tailcount; /* how many items are tailing */
123 int skip;
124 char *tailbits;
125};
126
127static int log_level = 0;
129
130/* Heap functions ***********************/
131
132static void heap_swap(HEAP h, int x, int y)
133{
134 struct heap_item *swap;
135 swap = h->heap[x];
136 h->heap[x] = h->heap[y];
137 h->heap[y] = swap;
138}
139
140static int heap_cmp(HEAP h, int x, int y)
141{
142 return (*h->kctrl->cmp)(h->heap[x]->buf, h->heap[y]->buf);
143}
144
145static int heap_empty(HEAP h)
146{
147 return 0 == h->heapnum;
148}
149
152static void heap_delete(HEAP h)
153{
154 int cur = 1, child = 2;
155 h->heap[1] = 0; /* been deleted */
156 heap_swap(h, 1, h->heapnum--);
157 while (child <= h->heapnum)
158 {
159 if (child < h->heapnum && heap_cmp(h, child, 1 + child) > 0)
160 child++;
161 if (heap_cmp(h,cur,child) > 0)
162 {
163 heap_swap(h, cur, child);
164 cur = child;
165 child = 2*cur;
166 }
167 else
168 break;
169 }
170}
171
176static void heap_balance(HEAP h)
177{
178 int cur = 1, child = 2;
179 while (child <= h->heapnum)
180 {
181 if (child < h->heapnum && heap_cmp(h, child, 1 + child) > 0)
182 child++;
183 if (heap_cmp(h,cur,child) > 0)
184 {
185 heap_swap(h, cur, child);
186 cur = child;
187 child = 2*cur;
188 }
189 else
190 break;
191 }
192}
193
194static void heap_insert(HEAP h, struct heap_item *hi)
195{
196 int cur, parent;
197
198 cur = ++(h->heapnum);
199 assert(cur <= h->heapmax);
200 h->heap[cur] = hi;
201 parent = cur/2;
202 while (parent && (heap_cmp(h,parent,cur) > 0))
203 {
204 assert(parent>0);
205 heap_swap(h, cur, parent);
206 cur = parent;
207 parent = cur/2;
208 }
209}
210
211static
212HEAP heap_create(NMEM nmem, int size, const struct rset_key_control *kctrl)
213{
214 HEAP h = (HEAP) nmem_malloc(nmem, sizeof(*h));
215
216 ++size; /* heap array starts at 1 */
217 h->heapnum = 0;
218 h->heapmax = size;
219 h->kctrl = kctrl;
220 h->heap = (struct heap_item**) nmem_malloc(nmem, size * sizeof(*h->heap));
221 h->heap[0] = 0; /* not used */
222 return h;
223}
224
225static void heap_clear( HEAP h)
226{
227 assert(h);
228 h->heapnum = 0;
229}
230
231static void heap_destroy(HEAP h)
232{
233 /* nothing to delete, all is nmem'd, and will go away in due time */
234}
235
240int compare_ands(const void *x, const void *y)
241{ const struct heap_item *hx = x;
242 const struct heap_item *hy = y;
243 double cur, totx, toty;
244 rset_pos(hx->fd, &cur, &totx);
245 rset_pos(hy->fd, &cur, &toty);
246 if (totx > toty + 0.5)
247 return 1;
248 if (totx < toty - 0.5)
249 return -1;
250 return 0; /* return totx - toty, except for overflows and rounding */
251}
252
253static RSET rsmulti_andor_create(NMEM nmem,
254 struct rset_key_control *kcontrol,
255 int scope, TERMID termid,
256 int no_rsets, RSET* rsets,
257 const struct rset_control *ctrl)
258{
259 RSET rnew = rset_create_base(ctrl, nmem, kcontrol, scope, termid,
260 no_rsets, rsets);
261 struct rset_private *info;
263 {
264 log_level = yaz_log_module_level("rsmultiandor");
266 }
267 yaz_log(log_level, "rsmultiand_andor_create scope=%d", scope);
268 info = (struct rset_private *) nmem_malloc(rnew->nmem, sizeof(*info));
269 rnew->priv = info;
270 return rnew;
271}
272
279
281 int scope, int no_rsets, RSET* rsets)
282{
283 return rsmulti_andor_create(nmem, kcontrol, scope, 0,
285}
286
287static void r_delete(RSET ct)
288{
289}
290
291static RSFD r_open_andor(RSET ct, int flag, int is_and)
292{
293 RSFD rfd;
294 struct rfd_private *p;
295 const struct rset_key_control *kctrl = ct->keycontrol;
296 int i;
297
298 if (flag & RSETF_WRITE)
299 {
300 yaz_log(YLOG_FATAL, "multiandor set type is read-only");
301 return NULL;
302 }
303 rfd = rfd_create_base(ct);
304 if (rfd->priv)
305 {
306 p = (struct rfd_private *)rfd->priv;
307 if (!is_and)
308 heap_clear(p->h);
309 assert(p->items);
310 /* all other pointers shouls already be allocated, in right sizes! */
311 }
312 else
313 {
314 p = (struct rfd_private *) nmem_malloc(ct->nmem,sizeof(*p));
315 rfd->priv = p;
316 p->h = 0;
317 p->tailbits = 0;
318 if (is_and)
319 p->tailbits = nmem_malloc(ct->nmem, ct->no_children*sizeof(char) );
320 else
321 p->h = heap_create(ct->nmem, ct->no_children, kctrl);
322 p->items = (struct heap_item *)
323 nmem_malloc(ct->nmem, ct->no_children*sizeof(*p->items));
324 for (i = 0; i < ct->no_children; i++)
325 {
326 p->items[i].rset = ct->children[i];
327 p->items[i].buf = nmem_malloc(ct->nmem, kctrl->key_size);
328 }
329 }
330 p->flag = flag;
331 p->hits = 0;
332 p->eof = 0;
333 p->tailcount = 0;
334 if (is_and)
335 { /* read the array and sort it */
336 for (i = 0; i < ct->no_children; i++)
337 {
338 p->items[i].fd = rset_open(ct->children[i], RSETF_READ);
339 if (!rset_read(p->items[i].fd, p->items[i].buf, &p->items[i].term))
340 p->eof = 1;
341 p->tailbits[i] = 0;
342 }
343 qsort(p->items, ct->no_children, sizeof(p->items[0]), compare_ands);
344 }
345 else
346 { /* fill the heap for ORing */
347 for (i = 0; i < ct->no_children; i++)
348 {
349 p->items[i].fd = rset_open(ct->children[i],RSETF_READ);
350 if ( rset_read(p->items[i].fd, p->items[i].buf, &p->items[i].term))
351 heap_insert(p->h, &(p->items[i]));
352 }
353 }
354 return rfd;
355}
356
357static RSFD r_open_or(RSET ct, int flag)
358{
359 return r_open_andor(ct, flag, 0);
360}
361
362static RSFD r_open_and(RSET ct, int flag)
363{
364 return r_open_andor(ct, flag, 1);
365}
366
367static void r_close(RSFD rfd)
368{
369 struct rfd_private *p=(struct rfd_private *)(rfd->priv);
370 int i;
371
372 if (p->h)
373 heap_destroy(p->h);
374 for (i = 0; i < rfd->rset->no_children; i++)
375 if (p->items[i].fd)
376 rset_close(p->items[i].fd);
377}
378
379static int r_forward_or(RSFD rfd, void *buf,
380 TERMID *term, const void *untilbuf)
381{ /* while heap head behind untilbuf, forward it and rebalance heap */
382 struct rfd_private *p = rfd->priv;
383 const struct rset_key_control *kctrl = rfd->rset->keycontrol;
384 if (heap_empty(p->h))
385 return 0;
386 while ((*kctrl->cmp)(p->h->heap[1]->buf,untilbuf) < -rfd->rset->scope )
387 {
388 if (rset_forward(p->h->heap[1]->fd, p->h->heap[1]->buf,
389 &p->h->heap[1]->term, untilbuf))
390 heap_balance(p->h);
391 else
392 {
393 heap_delete(p->h);
394 if (heap_empty(p->h))
395 return 0;
396 }
397
398 }
399 return r_read_or(rfd, buf, term);
400}
401
409static int r_read_or(RSFD rfd, void *buf, TERMID *term)
410{
411 RSET rset = rfd->rset;
412 struct rfd_private *mrfd = rfd->priv;
413 const struct rset_key_control *kctrl = rset->keycontrol;
414 struct heap_item *it;
415 int rdres;
416 if (heap_empty(mrfd->h))
417 return 0;
418 it = mrfd->h->heap[1];
419 memcpy(buf, it->buf, kctrl->key_size);
420 if (term)
421 {
422 if (rset->term)
423 *term = rset->term;
424 else
425 *term = it->term;
426 }
427 (mrfd->hits)++;
428 rdres = rset_read(it->fd, it->buf, &it->term);
429 if (rdres)
430 heap_balance(mrfd->h);
431 else
432 heap_delete(mrfd->h);
433 return 1;
434}
435
452static int r_read_and(RSFD rfd, void *buf, TERMID *term)
453{
454 struct rfd_private *p = rfd->priv;
455 RSET ct = rfd->rset;
456 const struct rset_key_control *kctrl = ct->keycontrol;
457 int i;
458
459 while (1)
460 {
461 if (p->tailcount)
462 { /* we are tailing, find lowest tail and return it */
463 int mintail = -1;
464 int cmp;
465
466 for (i = 0; i < ct->no_children; i++)
467 {
468 if (p->tailbits[i])
469 {
470 if (mintail >= 0)
471 cmp = (*kctrl->cmp)
472 (p->items[i].buf, p->items[mintail].buf);
473 else
474 cmp = -1;
475 if (cmp < 0)
476 mintail = i;
477
478 if (kctrl->get_segment)
479 { /* segments enabled */
480 zint segment = kctrl->get_segment(p->items[i].buf);
481 /* store segment if not stored already */
482 if (!p->segment && segment)
483 p->segment = segment;
484
485 /* skip rest entirely if segments don't match */
486 if (p->segment && segment && p->segment != segment)
487 p->skip = 1;
488 }
489 }
490 }
491 /* return the lowest tail */
492 memcpy(buf, p->items[mintail].buf, kctrl->key_size);
493 if (term)
494 *term = p->items[mintail].term;
495 if (!rset_read(p->items[mintail].fd, p->items[mintail].buf,
496 &p->items[mintail].term))
497 {
498 p->eof = 1; /* game over, once tails have been returned */
499 p->tailbits[mintail] = 0;
500 (p->tailcount)--;
501 }
502 else
503 {
504 /* still a tail? */
505 cmp = (*kctrl->cmp)(p->items[mintail].buf,buf);
506 if (cmp >= rfd->rset->scope)
507 {
508 p->tailbits[mintail] = 0;
509 (p->tailcount)--;
510 }
511 }
512 if (p->skip)
513 continue; /* skip again.. eventually tailcount will be 0 */
514 if (p->tailcount == 0)
515 (p->hits)++;
516 return 1;
517 }
518 /* not tailing, forward until all records match, and set up */
519 /* as tails. the earlier 'if' will then return the hits */
520 if (p->eof)
521 return 0; /* nothing more to see */
522 i = 1; /* assume items[0] is highest up */
523 while (i < ct->no_children)
524 {
525 int cmp = (*kctrl->cmp)(p->items[0].buf, p->items[i].buf);
526 if (cmp <= -rfd->rset->scope) { /* [0] was behind, forward it */
527 if (!rset_forward(p->items[0].fd, p->items[0].buf,
528 &p->items[0].term, p->items[i].buf))
529 {
530 p->eof = 1; /* game over */
531 return 0;
532 }
533 i = 0; /* start forwarding from scratch */
534 }
535 else if (cmp >= rfd->rset->scope)
536 { /* [0] was ahead, forward i */
537 if (!rset_forward(p->items[i].fd, p->items[i].buf,
538 &p->items[i].term, p->items[0].buf))
539 {
540 p->eof = 1; /* game over */
541 return 0;
542 }
543 }
544 else
545 i++;
546 } /* while i */
547 /* if we get this far, all rsets are now within +- scope of [0] */
548 /* ergo, we have a hit. Mark them all as tailing, and let the */
549 /* upper 'if' return the hits in right order */
550 for (i = 0; i < ct->no_children; i++)
551 p->tailbits[i] = 1;
552 p->tailcount = ct->no_children;
553 p->segment = 0;
554 p->skip = 0;
555 } /* while 1 */
556}
557
558
559static int r_forward_and(RSFD rfd, void *buf, TERMID *term,
560 const void *untilbuf)
561{
562 struct rfd_private *p = rfd->priv;
563 RSET ct = rfd->rset;
564 const struct rset_key_control *kctrl = ct->keycontrol;
565 int i;
566 int cmp;
567 int killtail = 0;
568
569 for (i = 0; i < ct->no_children; i++)
570 {
571 cmp = (*kctrl->cmp)(p->items[i].buf,untilbuf);
572 if (cmp <= -rfd->rset->scope)
573 {
574 killtail = 1; /* we are moving to a different hit */
575 if (!rset_forward(p->items[i].fd, p->items[i].buf,
576 &p->items[i].term, untilbuf))
577 {
578 p->eof = 1; /* game over */
579 p->tailcount = 0;
580 return 0;
581 }
582 }
583 }
584 if (killtail)
585 {
586 for (i = 0; i < ct->no_children; i++)
587 p->tailbits[i] = 0;
588 p->tailcount = 0;
589 }
590 return r_read_and(rfd,buf,term);
591}
592
593static void r_pos_x(RSFD rfd, double *current, double *total, int and_op)
594{
595 RSET ct = rfd->rset;
596 struct rfd_private *mrfd = (struct rfd_private *)(rfd->priv);
597 double ratio = and_op ? 0.0 : 1.0;
598 int i;
599 double sum_cur = 0.0;
600 double sum_tot = 0.0;
601 for (i = 0; i < ct->no_children; i++)
602 {
603 double cur, tot;
604 rset_pos(mrfd->items[i].fd, &cur, &tot);
605 if (i < 100)
606 yaz_log(log_level, "r_pos: %d %0.1f %0.1f", i, cur,tot);
607 if (and_op)
608 {
609 if (tot > 0.0)
610 {
611 double nratio = cur / tot;
612 if (nratio > ratio)
613 ratio = nratio;
614 }
615 }
616 else
617 {
618 if (cur > 0)
619 sum_cur += (cur - 1);
620 sum_tot += tot;
621 }
622 }
623 if (!and_op && sum_tot > 0.0)
624 {
625 yaz_log(YLOG_LOG, "or op sum_cur=%0.1f sum_tot=%0.1f hits=%f", sum_cur, sum_tot, (double) mrfd->hits);
626 ratio = sum_cur / sum_tot;
627 }
628 if (ratio == 0.0 || ratio == 1.0)
629 { /* nothing there */
630 *current = 0;
631 *total = 0;
632 yaz_log(log_level, "r_pos: NULL %0.1f %0.1f", *current, *total);
633 }
634 else
635 {
636 *current = (double) (mrfd->hits);
637 *total = *current / ratio;
638 yaz_log(log_level, "r_pos: = %0.1f %0.1f", *current, *total);
639 }
640}
641
642static void r_pos_and(RSFD rfd, double *current, double *total)
643{
644 r_pos_x(rfd, current, total, 1);
645}
646
647static void r_pos_or(RSFD rfd, double *current, double *total)
648{
649 r_pos_x(rfd, current, total, 0);
650}
651
652static void r_get_terms(RSET ct, TERMID *terms, int maxterms, int *curterm)
653{
654 if (ct->term)
655 rset_get_one_term(ct, terms, maxterms, curterm);
656 else
657 {
658 /* Special case: Some multi-ors have all terms pointing to the same
659 term. We do not want to duplicate those. Other multiors (and ands)
660 have different terms under them. Those we want.
661 */
662 int firstterm = *curterm;
663 int i;
664 for (i = 0; i < ct->no_children; i++)
665 {
666 rset_getterms(ct->children[i], terms, maxterms, curterm);
667 if (*curterm > firstterm + 1 && *curterm <= maxterms &&
668 terms[(*curterm) - 1] == terms[firstterm])
669 (*curterm)--; /* forget the term, seen that before */
670 }
671 }
672}
673
674
675/*
676 * Local variables:
677 * c-basic-offset: 4
678 * c-file-style: "Stroustrup"
679 * indent-tabs-mode: nil
680 * End:
681 * vim: shiftwidth=4 tabstop=8 expandtab
682 */
683
RSET rset_create_base(const struct rset_control *sel, NMEM nmem, struct rset_key_control *kcontrol, int scope, TERMID term, int no_children, RSET *children)
Common constuctor for RSETs.
Definition rset.c:164
#define rset_read(rfd, buf, term)
Definition rset.h:217
#define RSETF_WRITE
Definition rset.h:200
int rset_no_write(RSFD rfd, const void *buf)
Definition rset.c:431
RSFD rfd_create_base(RSET rs)
Common constuctor for RFDs.
Definition rset.c:43
#define RSETF_READ
Definition rset.h:199
void rset_get_one_term(RSET ct, TERMID *terms, int maxterms, int *curterm)
is a getterms function for those that don't have any
Definition rset.c:291
#define rset_getterms(ct, terms, maxterms, curterm)
Definition rset.h:209
#define rset_pos(rfd, cur, tot)
Definition rset.h:213
#define rset_open(rs, wflag)
Definition rset.h:202
void rset_close(RSFD rfd)
Closes a result set RFD handle.
Definition rset.c:98
#define rset_forward(rfd, buf, term, untilbuf)
Definition rset.h:205
static void r_pos_and(RSFD rfd, double *current, double *total)
static RSET rsmulti_andor_create(NMEM nmem, struct rset_key_control *kcontrol, int scope, TERMID termid, int no_rsets, RSET *rsets, const struct rset_control *ctrl)
int compare_ands(const void *x, const void *y)
compare and items for quicksort used in qsort to get the multi-and args in optimal order that is,...
RSET rset_create_or(NMEM nmem, struct rset_key_control *kcontrol, int scope, TERMID termid, int no_rsets, RSET *rsets)
static int r_forward_and(RSFD rfd, void *buf, TERMID *term, const void *untilbuf)
RSET rset_create_and(NMEM nmem, struct rset_key_control *kcontrol, int scope, int no_rsets, RSET *rsets)
static RSFD r_open_andor(RSET ct, int flag, int is_and)
static RSFD r_open_or(RSET ct, int flag)
static int heap_cmp(HEAP h, int x, int y)
static void heap_clear(HEAP h)
static int r_forward_or(RSFD rfd, void *buf, TERMID *term, const void *untilbuf)
static const struct rset_control control_or
static void r_get_terms(RSET ct, TERMID *terms, int maxterms, int *curterm)
static RSFD r_open_and(RSET ct, int flag)
static void r_delete(RSET ct)
static void heap_swap(HEAP h, int x, int y)
static int heap_empty(HEAP h)
static void r_pos_x(RSFD rfd, double *current, double *total, int and_op)
static int r_read_and(RSFD rfd, void *buf, TERMID *term)
reads one item key from an 'and' set
static const struct rset_control control_and
static int r_read_or(RSFD rfd, void *buf, TERMID *term)
reads one item key from an 'or' set
static int log_level
static void heap_destroy(HEAP h)
static void r_pos_or(RSFD rfd, double *current, double *total)
static void heap_balance(HEAP h)
puts item into heap. The heap root element has changed value (to bigger) Swap downwards until the hea...
static void heap_insert(HEAP h, struct heap_item *hi)
static HEAP heap_create(NMEM nmem, int size, const struct rset_key_control *kctrl)
struct heap * HEAP
static int log_level_initialized
static void heap_delete(HEAP h)
deletes the first item in the heap, and balances the rest
static void r_close(RSFD rfd)
TERMID term
void * buf
struct heap_item ** heap
int heapmax
int heapnum
const struct rset_key_control * kctrl
zint hits
Definition rsbool.c:62
zint cur
Definition rstemp.c:80
void * buf
Definition rsisamb.c:65
char * tailbits
struct heap_item * items
int(* cmp)(const void *p1, const void *p2)
Definition rset.h:131
zint(* get_segment)(const void *p)
Definition rset.h:134
ISAM_P pos
Definition rsisamb.c:70
Definition rset.h:151
TERMID term
Definition rset.h:160
RSET * children
Definition rset.h:162
NMEM nmem
Definition rset.h:156
struct rset_key_control * keycontrol
Definition rset.h:153
int scope
Definition rset.h:159
int no_children
Definition rset.h:161
Definition rset.h:73
void * priv
Definition rset.h:75
RSET rset
Definition rset.h:74
const char * scope
long zint
Zebra integer.
Definition util.h:66