IDZEBRA  2.2.7
rsbetween.c
Go to the documentation of this file.
1 /* This file is part of the Zebra server.
2  Copyright (C) Index Data
3 
4 Zebra is free software; you can redistribute it and/or modify it under
5 the terms of the GNU General Public License as published by the Free
6 Software Foundation; either version 2, or (at your option) any later
7 version.
8 
9 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
10 WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 for more details.
13 
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 
18 */
19 
20 
21 /* rsbetween is (mostly) used for xml searches. It returns the hits of the
22  * "middle" rset, that are in between the "left" and "right" rsets. For
23  * example "Shakespeare" in between "<author>" and </author>. The thing is
24  * complicated by the inclusion of attributes (from their own rset). If attrs
25  * specified, they must match the "left" rset (start tag). "Hamlet" between
26  * "<title lang = eng>" and "</title>". (This assumes that the attributes are
27  * indexed to the same seqno as the tags).
28  *
29 */
30 
31 #if HAVE_CONFIG_H
32 #include <config.h>
33 #endif
34 #include <stdio.h>
35 #include <stdlib.h>
36 #include <string.h>
37 #include <assert.h>
38 
39 #include <idzebra/util.h>
40 #include <rset.h>
41 
42 static RSFD r_open(RSET ct, int flag);
43 static void r_close(RSFD rfd);
44 static void r_delete(RSET ct);
45 static int r_forward(RSFD rfd, void *buf,
46  TERMID *term, const void *untilbuf);
47 static int r_read(RSFD rfd, void *buf, TERMID *term );
48 static void r_pos(RSFD rfd, double *current, double *total);
49 static void r_get_terms(RSET ct, TERMID *terms, int maxterms, int *curterm);
50 
51 static const struct rset_control control =
52 {
53  "between",
54  r_delete,
56  r_open,
57  r_close,
58  r_forward,
59  r_pos,
60  r_read,
62 };
63 
65  TERMID startterm; /* pseudo terms for detecting which one we read from */
69 };
70 
73  void *recbuf; /* a key that tells which record we are in */
74  void *startbuf; /* the start tag */
75  int startbufok; /* we have seen the first start tag */
76  void *attrbuf; /* the attr tag. If these two match, we have attr match */
77  int attrbufok; /* we have seen the first attr tag, can compare */
78  int depth; /* number of start-tags without end-tags */
79  int attrdepth; /* on what depth the attr matched */
80  int match_1;
81  int match_2;
83 };
84 
85 static int log_level = 0;
86 static int log_level_initialized = 0;
87 
88 
89 /* make sure that the rset has a term attached. If not, create one */
90 /* we need these terms for the tags, to distinguish what we read */
91 static void checkterm(RSET rs, char *tag, NMEM nmem)
92 {
93  if (!rs->term)
94  {
95  rs->term = rset_term_create(tag, -1, "", 0, nmem, 0, 0, 0, 0);
96  rs->term->rset = rs;
97  }
98 }
99 
100 
101 RSET rset_create_between(NMEM nmem, struct rset_key_control *kcontrol,
102  int scope, RSET rset_l, RSET rset_m1, RSET rset_m2,
103  RSET rset_r, RSET rset_attr)
104 {
105  RSET rnew = rset_create_base(&control, nmem, kcontrol, scope, 0, 0, 0);
106  struct rset_between_info *info =
107  (struct rset_between_info *) nmem_malloc(rnew->nmem,sizeof(*info));
108  RSET rsetarray[5];
109  int n = 0;
110 
112  {
113  log_level = yaz_log_module_level("rsbetween");
115  }
116  rsetarray[n++] = rset_l;
117  checkterm(rset_l, "", nmem);
118  info->startterm = rset_l->term;
119 
120  rsetarray[n++] = rset_r;
121  checkterm(rset_r, "", nmem);
122  info->stopterm = rset_r->term;
123 
124  rsetarray[n++] = rset_m1;
125  if (rset_m2)
126  {
127  rsetarray[n++] = rset_m2;
128  /* hard to work do determine whether we get results from
129  rset_m2 or rset_m1 */
130  info->hit2_terms = (TERMID*)
131  nmem_malloc(nmem, (2 + rset_m2->no_children) * sizeof(TERMID));
132  int i;
133  for (i = 0; i < rset_m2->no_children; i++) /* sub terms */
134  info->hit2_terms[i] = rset_m2->children[i]->term;
135  if (rset_m2->term) /* immediate term */
136  info->hit2_terms[i++] = rset_m2->term;
137  info->hit2_terms[i] = 0;
138  }
139  else
140  info->hit2_terms = NULL;
141 
142  if (rset_attr)
143  {
144  rsetarray[n++] = rset_attr;
145  checkterm(rset_attr, "(attr)", nmem);
146  info->attrterm = rset_attr->term;
147  }
148  else
149  info->attrterm = NULL;
150  rnew->no_children = 1;
151  rnew->children = nmem_malloc(rnew->nmem, sizeof(RSET *));
152  rnew->children[0] = rset_create_and(nmem, kcontrol,
153  scope, n, rsetarray);
154  rnew->priv = info;
155  yaz_log(log_level, "create rset at %p", rnew);
156  return rnew;
157 }
158 
159 static void r_delete(RSET ct)
160 {
161 }
162 
163 
164 static RSFD r_open(RSET ct, int flag)
165 {
166  RSFD rfd;
167  struct rset_between_rfd *p;
168 
169  if (flag & RSETF_WRITE)
170  {
171  yaz_log(YLOG_FATAL, "between set type is read-only");
172  return NULL;
173  }
174  rfd = rfd_create_base(ct);
175  if (rfd->priv)
176  p = (struct rset_between_rfd *) rfd->priv;
177  else {
178  p = (struct rset_between_rfd *) nmem_malloc(ct->nmem, sizeof(*p));
179  rfd->priv = p;
180  p->recbuf = nmem_malloc(ct->nmem, ct->keycontrol->key_size);
181  p->startbuf = nmem_malloc(ct->nmem, ct->keycontrol->key_size);
182  p->attrbuf = nmem_malloc(ct->nmem, ct->keycontrol->key_size);
183  }
184  p->andrfd = rset_open(ct->children[0], RSETF_READ);
185  p->hits = -1;
186  p->depth = 0;
187  p->attrdepth = 0;
188  p->attrbufok = 0;
189  p->startbufok = 0;
190  yaz_log(log_level, "open rset=%p rfd=%p", ct, rfd);
191  return rfd;
192 }
193 
194 static void r_close(RSFD rfd)
195 {
196  struct rset_between_rfd *p = (struct rset_between_rfd *) rfd->priv;
197  yaz_log(log_level, "close rfd=%p", rfd);
198  rset_close(p->andrfd);
199 }
200 
201 static int r_forward(RSFD rfd, void *buf,
202  TERMID *term, const void *untilbuf)
203 {
204  struct rset_between_rfd *p = (struct rset_between_rfd *) rfd->priv;
205  int rc;
206  yaz_log(log_level, "forwarding ");
207  rc = rset_forward(p->andrfd,buf,term,untilbuf);
208  return rc;
209 }
210 
211 static void checkattr(RSFD rfd)
212 {
213  struct rset_between_info *info = (struct rset_between_info *)
214  rfd->rset->priv;
215  struct rset_between_rfd *p = (struct rset_between_rfd *)rfd->priv;
216  const struct rset_key_control *kctrl = rfd->rset->keycontrol;
217  int cmp;
218  if (p->attrdepth)
219  return; /* already found one */
220  if (!info->attrterm)
221  {
222  p->attrdepth = -1; /* matches always */
223  return;
224  }
225  if ( p->startbufok && p->attrbufok )
226  { /* have buffers to compare */
227  cmp = (kctrl->cmp)(p->startbuf, p->attrbuf);
228  if (0 == cmp) /* and the keys match */
229  {
230  p->attrdepth = p->depth;
231  yaz_log(log_level, "found attribute match at depth %d",
232  p->attrdepth);
233  }
234  }
235 }
236 
237 static int r_read(RSFD rfd, void *buf, TERMID *term)
238 {
239  struct rset_between_info *info =
240  (struct rset_between_info *)rfd->rset->priv;
241  struct rset_between_rfd *p = (struct rset_between_rfd *)rfd->priv;
242  const struct rset_key_control *kctrl = rfd->rset->keycontrol;
243  int cmp;
244  TERMID dummyterm = 0;
245  yaz_log(log_level, "== read: term=%p",term);
246  if (!term)
247  term = &dummyterm;
248  while (rset_read(p->andrfd, buf, term))
249  {
250  yaz_log(log_level, "read loop term=%p d=%d ad=%d",
251  *term, p->depth, p->attrdepth);
252  if (p->hits < 0)
253  {/* first time? */
254  memcpy(p->recbuf, buf, kctrl->key_size);
255  p->hits = 0;
256  cmp = rfd->rset->scope; /* force newrecord */
257  }
258  else {
259  cmp = (kctrl->cmp)(buf, p->recbuf);
260  yaz_log(log_level, "cmp=%d", cmp);
261  }
262 
263  if (cmp >= rfd->rset->scope)
264  {
265  yaz_log(log_level, "new record");
266  p->depth = 0;
267  p->attrdepth = 0;
268  p->match_1 = p->match_2 = 0;
269  memcpy(p->recbuf, buf, kctrl->key_size);
270  }
271 
272  if (*term)
273  yaz_log(log_level, " term: '%s'", (*term)->name);
274  if (*term == info->startterm)
275  {
276  p->depth++;
277  yaz_log(log_level, "read start tag. d=%d", p->depth);
278  memcpy(p->startbuf, buf, kctrl->key_size);
279  p->startbufok = 1;
280  checkattr(rfd); /* in case we already saw the attr here */
281  }
282  else if (*term == info->stopterm)
283  {
284  if (p->depth == p->attrdepth)
285  p->attrdepth = 0; /* ending the tag with attr match */
286  p->depth--;
287  if (p->depth == 0)
288  p->match_1 = p->match_2 = 0;
289  yaz_log(log_level, "read end tag. d=%d ad=%d", p->depth,
290  p->attrdepth);
291  }
292  else if (*term == info->attrterm)
293  {
294  yaz_log(log_level, "read attr");
295  memcpy(p->attrbuf, buf, kctrl->key_size);
296  p->attrbufok = 1;
297  checkattr(rfd); /* in case the start tag came first */
298  }
299  else
300  { /* mut be a real hit */
301  if (p->depth && p->attrdepth)
302  {
303  if (!info->hit2_terms)
304  p->match_1 = p->match_2 = 1;
305  else
306  {
307  int i;
308  for (i = 0; info->hit2_terms[i]; i++)
309  if (info->hit2_terms[i] == *term)
310  break;
311  if (info->hit2_terms[i])
312  p->match_2 = 1;
313  else
314  p->match_1 = 1;
315  }
316  if (p->match_1 && p->match_2)
317  {
318  p->hits++;
319  yaz_log(log_level, "got a hit h="ZINT_FORMAT" d=%d ad=%d",
320  p->hits, p->depth, p->attrdepth);
321  return 1; /* we have everything in place already! */
322  }
323  } else
324  yaz_log(log_level, "Ignoring hit. h="ZINT_FORMAT" d=%d ad=%d",
325  p->hits, p->depth, p->attrdepth);
326  }
327  } /* while read */
328  return 0;
329 } /* r_read */
330 
331 static void r_pos(RSFD rfd, double *current, double *total)
332 {
333  struct rset_between_rfd *p = (struct rset_between_rfd *) rfd->priv;
334  rset_pos(p->andrfd, current, total);
335  yaz_log(log_level, "pos: %0.1f/%0.1f ", *current, *total);
336 }
337 
338 static void r_get_terms(RSET ct, TERMID *terms, int maxterms, int *curterm)
339 {
340  rset_getterms(ct->children[0], terms, maxterms, curterm);
341 }
342 
343 
344 /*
345  * Local variables:
346  * c-basic-offset: 4
347  * c-file-style: "Stroustrup"
348  * indent-tabs-mode: nil
349  * End:
350  * vim: shiftwidth=4 tabstop=8 expandtab
351  */
352 
static const struct rset_control control
Definition: rsbetween.c:51
static int r_read(RSFD rfd, void *buf, TERMID *term)
Definition: rsbetween.c:237
static void checkattr(RSFD rfd)
Definition: rsbetween.c:211
static void r_get_terms(RSET ct, TERMID *terms, int maxterms, int *curterm)
Definition: rsbetween.c:338
static void r_delete(RSET ct)
Definition: rsbetween.c:159
RSET rset_create_between(NMEM nmem, struct rset_key_control *kcontrol, int scope, RSET rset_l, RSET rset_m1, RSET rset_m2, RSET rset_r, RSET rset_attr)
Definition: rsbetween.c:101
static int log_level
Definition: rsbetween.c:85
static int r_forward(RSFD rfd, void *buf, TERMID *term, const void *untilbuf)
Definition: rsbetween.c:201
static void r_pos(RSFD rfd, double *current, double *total)
Definition: rsbetween.c:331
static int log_level_initialized
Definition: rsbetween.c:86
static void checkterm(RSET rs, char *tag, NMEM nmem)
Definition: rsbetween.c:91
static RSFD r_open(RSET ct, int flag)
Definition: rsbetween.c:164
static void r_close(RSFD rfd)
Definition: rsbetween.c:194
RSET rset_create_base(const struct rset_control *sel, NMEM nmem, struct rset_key_control *kcontrol, int scope, TERMID term, int no_children, RSET *children)
Common constuctor for RSETs.
Definition: rset.c:164
RSET rset_create_and(NMEM nmem, struct rset_key_control *kcontrol, int scope, int no_rsets, RSET *rsets)
Definition: rsmultiandor.c:280
#define rset_read(rfd, buf, term)
Definition: rset.h:217
struct rset_term * TERMID
Definition: rset.h:67
#define RSETF_WRITE
Definition: rset.h:200
int rset_no_write(RSFD rfd, const void *buf)
Definition: rset.c:431
RSFD rfd_create_base(RSET rs)
Common constuctor for RFDs.
Definition: rset.c:43
#define RSETF_READ
Definition: rset.h:199
TERMID rset_term_create(const char *name, int length, const char *flags, int type, NMEM nmem, struct ord_list *ol, int reg_type, zint hits_limit, const char *ref_id)
Creates a TERMID entry.
Definition: rset.c:340
#define rset_getterms(ct, terms, maxterms, curterm)
Definition: rset.h:209
#define rset_pos(rfd, cur, tot)
Definition: rset.h:213
#define rset_open(rs, wflag)
Definition: rset.h:202
void rset_close(RSFD rfd)
Closes a result set RFD handle.
Definition: rset.c:98
#define rset_forward(rfd, buf, term, untilbuf)
Definition: rset.h:205
TERMID startterm
Definition: rsbetween.c:65
TERMID * hit2_terms
Definition: rsbetween.c:68
void * startbuf
Definition: rsbetween.c:74
void * attrbuf
Definition: rsbetween.c:76
int(* cmp)(const void *p1, const void *p2)
Definition: rset.h:131
int key_size
Definition: rset.h:128
Definition: rset.h:50
RSET rset
Definition: rset.h:60
Definition: rset.h:151
TERMID term
Definition: rset.h:160
RSET * children
Definition: rset.h:162
NMEM nmem
Definition: rset.h:156
struct rset_key_control * keycontrol
Definition: rset.h:153
int scope
Definition: rset.h:159
int no_children
Definition: rset.h:161
void * priv
Definition: rset.h:155
Definition: rset.h:73
void * priv
Definition: rset.h:75
RSET rset
Definition: rset.h:74
const char * scope
Definition: tstlockscope.c:40
long zint
Zebra integer.
Definition: util.h:66
#define ZINT_FORMAT
Definition: util.h:72