IDZEBRA 2.2.8
rsbetween.c
Go to the documentation of this file.
1/* This file is part of the Zebra server.
2 Copyright (C) Index Data
3
4Zebra is free software; you can redistribute it and/or modify it under
5the terms of the GNU General Public License as published by the Free
6Software Foundation; either version 2, or (at your option) any later
7version.
8
9Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
10WARRANTY; without even the implied warranty of MERCHANTABILITY or
11FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12for more details.
13
14You should have received a copy of the GNU General Public License
15along with this program; if not, write to the Free Software
16Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17
18*/
19
20
21/* rsbetween is (mostly) used for xml searches. It returns the hits of the
22 * "middle" rset, that are in between the "left" and "right" rsets. For
23 * example "Shakespeare" in between "<author>" and </author>. The thing is
24 * complicated by the inclusion of attributes (from their own rset). If attrs
25 * specified, they must match the "left" rset (start tag). "Hamlet" between
26 * "<title lang = eng>" and "</title>". (This assumes that the attributes are
27 * indexed to the same seqno as the tags).
28 *
29*/
30
31#if HAVE_CONFIG_H
32#include <config.h>
33#endif
34#include <stdio.h>
35#include <stdlib.h>
36#include <string.h>
37#include <assert.h>
38
39#include <idzebra/util.h>
40#include <rset.h>
41
42static RSFD r_open(RSET ct, int flag);
43static void r_close(RSFD rfd);
44static void r_delete(RSET ct);
45static int r_forward(RSFD rfd, void *buf,
46 TERMID *term, const void *untilbuf);
47static int r_read(RSFD rfd, void *buf, TERMID *term );
48static void r_pos(RSFD rfd, double *current, double *total);
49static void r_get_terms(RSET ct, TERMID *terms, int maxterms, int *curterm);
50
51static const struct rset_control control =
52{
53 "between",
56 r_open,
57 r_close,
59 r_pos,
60 r_read,
62};
63
65 TERMID startterm; /* pseudo terms for detecting which one we read from */
69};
70
73 void *recbuf; /* a key that tells which record we are in */
74 void *startbuf; /* the start tag */
75 int startbufok; /* we have seen the first start tag */
76 void *attrbuf; /* the attr tag. If these two match, we have attr match */
77 int attrbufok; /* we have seen the first attr tag, can compare */
78 int depth; /* number of start-tags without end-tags */
79 int attrdepth; /* on what depth the attr matched */
83};
84
85static int log_level = 0;
86static int log_level_initialized = 0;
87
88
89/* make sure that the rset has a term attached. If not, create one */
90/* we need these terms for the tags, to distinguish what we read */
91static void checkterm(RSET rs, char *tag, NMEM nmem)
92{
93 if (!rs->term)
94 {
95 rs->term = rset_term_create(tag, -1, "", 0, nmem, 0, 0, 0, 0);
96 rs->term->rset = rs;
97 }
98}
99
100
101RSET rset_create_between(NMEM nmem, struct rset_key_control *kcontrol,
102 int scope, RSET rset_l, RSET rset_m1, RSET rset_m2,
103 RSET rset_r, RSET rset_attr)
104{
105 RSET rnew = rset_create_base(&control, nmem, kcontrol, scope, 0, 0, 0);
106 struct rset_between_info *info =
107 (struct rset_between_info *) nmem_malloc(rnew->nmem,sizeof(*info));
108 RSET rsetarray[5];
109 int n = 0;
110
112 {
113 log_level = yaz_log_module_level("rsbetween");
115 }
116 rsetarray[n++] = rset_l;
117 checkterm(rset_l, "", nmem);
118 info->startterm = rset_l->term;
119
120 rsetarray[n++] = rset_r;
121 checkterm(rset_r, "", nmem);
122 info->stopterm = rset_r->term;
123
124 rsetarray[n++] = rset_m1;
125 if (rset_m2)
126 {
127 rsetarray[n++] = rset_m2;
128 /* hard to work do determine whether we get results from
129 rset_m2 or rset_m1 */
130 info->hit2_terms = (TERMID*)
131 nmem_malloc(nmem, (2 + rset_m2->no_children) * sizeof(TERMID));
132 int i;
133 for (i = 0; i < rset_m2->no_children; i++) /* sub terms */
134 info->hit2_terms[i] = rset_m2->children[i]->term;
135 if (rset_m2->term) /* immediate term */
136 info->hit2_terms[i++] = rset_m2->term;
137 info->hit2_terms[i] = 0;
138 }
139 else
140 info->hit2_terms = NULL;
141
142 if (rset_attr)
143 {
144 rsetarray[n++] = rset_attr;
145 checkterm(rset_attr, "(attr)", nmem);
146 info->attrterm = rset_attr->term;
147 }
148 else
149 info->attrterm = NULL;
150 rnew->no_children = 1;
151 rnew->children = nmem_malloc(rnew->nmem, sizeof(RSET *));
152 rnew->children[0] = rset_create_and(nmem, kcontrol,
153 scope, n, rsetarray);
154 rnew->priv = info;
155 yaz_log(log_level, "create rset at %p", rnew);
156 return rnew;
157}
158
159static void r_delete(RSET ct)
160{
161}
162
163
164static RSFD r_open(RSET ct, int flag)
165{
166 RSFD rfd;
167 struct rset_between_rfd *p;
168
169 if (flag & RSETF_WRITE)
170 {
171 yaz_log(YLOG_FATAL, "between set type is read-only");
172 return NULL;
173 }
174 rfd = rfd_create_base(ct);
175 if (rfd->priv)
176 p = (struct rset_between_rfd *) rfd->priv;
177 else {
178 p = (struct rset_between_rfd *) nmem_malloc(ct->nmem, sizeof(*p));
179 rfd->priv = p;
180 p->recbuf = nmem_malloc(ct->nmem, ct->keycontrol->key_size);
181 p->startbuf = nmem_malloc(ct->nmem, ct->keycontrol->key_size);
182 p->attrbuf = nmem_malloc(ct->nmem, ct->keycontrol->key_size);
183 }
184 p->andrfd = rset_open(ct->children[0], RSETF_READ);
185 p->hits = -1;
186 p->depth = 0;
187 p->attrdepth = 0;
188 p->attrbufok = 0;
189 p->startbufok = 0;
190 yaz_log(log_level, "open rset=%p rfd=%p", ct, rfd);
191 return rfd;
192}
193
194static void r_close(RSFD rfd)
195{
196 struct rset_between_rfd *p = (struct rset_between_rfd *) rfd->priv;
197 yaz_log(log_level, "close rfd=%p", rfd);
198 rset_close(p->andrfd);
199}
200
201static int r_forward(RSFD rfd, void *buf,
202 TERMID *term, const void *untilbuf)
203{
204 struct rset_between_rfd *p = (struct rset_between_rfd *) rfd->priv;
205 int rc;
206 yaz_log(log_level, "forwarding ");
207 rc = rset_forward(p->andrfd,buf,term,untilbuf);
208 return rc;
209}
210
211static void checkattr(RSFD rfd)
212{
213 struct rset_between_info *info = (struct rset_between_info *)
214 rfd->rset->priv;
215 struct rset_between_rfd *p = (struct rset_between_rfd *)rfd->priv;
216 const struct rset_key_control *kctrl = rfd->rset->keycontrol;
217 int cmp;
218 if (p->attrdepth)
219 return; /* already found one */
220 if (!info->attrterm)
221 {
222 p->attrdepth = -1; /* matches always */
223 return;
224 }
225 if ( p->startbufok && p->attrbufok )
226 { /* have buffers to compare */
227 cmp = (kctrl->cmp)(p->startbuf, p->attrbuf);
228 if (0 == cmp) /* and the keys match */
229 {
230 p->attrdepth = p->depth;
231 yaz_log(log_level, "found attribute match at depth %d",
232 p->attrdepth);
233 }
234 }
235}
236
237static int r_read(RSFD rfd, void *buf, TERMID *term)
238{
239 struct rset_between_info *info =
240 (struct rset_between_info *)rfd->rset->priv;
241 struct rset_between_rfd *p = (struct rset_between_rfd *)rfd->priv;
242 const struct rset_key_control *kctrl = rfd->rset->keycontrol;
243 int cmp;
244 TERMID dummyterm = 0;
245 yaz_log(log_level, "== read: term=%p",term);
246 if (!term)
247 term = &dummyterm;
248 while (rset_read(p->andrfd, buf, term))
249 {
250 yaz_log(log_level, "read loop term=%p d=%d ad=%d",
251 *term, p->depth, p->attrdepth);
252 if (p->hits < 0)
253 {/* first time? */
254 memcpy(p->recbuf, buf, kctrl->key_size);
255 p->hits = 0;
256 cmp = rfd->rset->scope; /* force newrecord */
257 }
258 else {
259 cmp = (kctrl->cmp)(buf, p->recbuf);
260 yaz_log(log_level, "cmp=%d", cmp);
261 }
262
263 if (cmp >= rfd->rset->scope)
264 {
265 yaz_log(log_level, "new record");
266 p->depth = 0;
267 p->attrdepth = 0;
268 p->match_1 = p->match_2 = 0;
269 memcpy(p->recbuf, buf, kctrl->key_size);
270 }
271
272 if (*term)
273 yaz_log(log_level, " term: '%s'", (*term)->name);
274 if (*term == info->startterm)
275 {
276 p->depth++;
277 yaz_log(log_level, "read start tag. d=%d", p->depth);
278 memcpy(p->startbuf, buf, kctrl->key_size);
279 p->startbufok = 1;
280 checkattr(rfd); /* in case we already saw the attr here */
281 }
282 else if (*term == info->stopterm)
283 {
284 if (p->depth == p->attrdepth)
285 p->attrdepth = 0; /* ending the tag with attr match */
286 p->depth--;
287 if (p->depth == 0)
288 p->match_1 = p->match_2 = 0;
289 yaz_log(log_level, "read end tag. d=%d ad=%d", p->depth,
290 p->attrdepth);
291 }
292 else if (*term == info->attrterm)
293 {
294 yaz_log(log_level, "read attr");
295 memcpy(p->attrbuf, buf, kctrl->key_size);
296 p->attrbufok = 1;
297 checkattr(rfd); /* in case the start tag came first */
298 }
299 else
300 { /* mut be a real hit */
301 if (p->depth && p->attrdepth)
302 {
303 if (!info->hit2_terms)
304 p->match_1 = p->match_2 = 1;
305 else
306 {
307 int i;
308 for (i = 0; info->hit2_terms[i]; i++)
309 if (info->hit2_terms[i] == *term)
310 break;
311 if (info->hit2_terms[i])
312 p->match_2 = 1;
313 else
314 p->match_1 = 1;
315 }
316 if (p->match_1 && p->match_2)
317 {
318 p->hits++;
319 yaz_log(log_level, "got a hit h="ZINT_FORMAT" d=%d ad=%d",
320 p->hits, p->depth, p->attrdepth);
321 return 1; /* we have everything in place already! */
322 }
323 } else
324 yaz_log(log_level, "Ignoring hit. h="ZINT_FORMAT" d=%d ad=%d",
325 p->hits, p->depth, p->attrdepth);
326 }
327 } /* while read */
328 return 0;
329} /* r_read */
330
331static void r_pos(RSFD rfd, double *current, double *total)
332{
333 struct rset_between_rfd *p = (struct rset_between_rfd *) rfd->priv;
334 rset_pos(p->andrfd, current, total);
335 yaz_log(log_level, "pos: %0.1f/%0.1f ", *current, *total);
336}
337
338static void r_get_terms(RSET ct, TERMID *terms, int maxterms, int *curterm)
339{
340 rset_getterms(ct->children[0], terms, maxterms, curterm);
341}
342
343
344/*
345 * Local variables:
346 * c-basic-offset: 4
347 * c-file-style: "Stroustrup"
348 * indent-tabs-mode: nil
349 * End:
350 * vim: shiftwidth=4 tabstop=8 expandtab
351 */
352
static const struct rset_control control
Definition rsbetween.c:51
static int r_read(RSFD rfd, void *buf, TERMID *term)
Definition rsbetween.c:237
static void checkattr(RSFD rfd)
Definition rsbetween.c:211
static void r_get_terms(RSET ct, TERMID *terms, int maxterms, int *curterm)
Definition rsbetween.c:338
static void r_delete(RSET ct)
Definition rsbetween.c:159
RSET rset_create_between(NMEM nmem, struct rset_key_control *kcontrol, int scope, RSET rset_l, RSET rset_m1, RSET rset_m2, RSET rset_r, RSET rset_attr)
Definition rsbetween.c:101
static int log_level
Definition rsbetween.c:85
static int r_forward(RSFD rfd, void *buf, TERMID *term, const void *untilbuf)
Definition rsbetween.c:201
static void r_pos(RSFD rfd, double *current, double *total)
Definition rsbetween.c:331
static int log_level_initialized
Definition rsbetween.c:86
static void checkterm(RSET rs, char *tag, NMEM nmem)
Definition rsbetween.c:91
static RSFD r_open(RSET ct, int flag)
Definition rsbetween.c:164
static void r_close(RSFD rfd)
Definition rsbetween.c:194
RSET rset_create_base(const struct rset_control *sel, NMEM nmem, struct rset_key_control *kcontrol, int scope, TERMID term, int no_children, RSET *children)
Common constuctor for RSETs.
Definition rset.c:164
RSET rset_create_and(NMEM nmem, struct rset_key_control *kcontrol, int scope, int no_rsets, RSET *rsets)
#define rset_read(rfd, buf, term)
Definition rset.h:217
struct rset_term * TERMID
Definition rset.h:67
#define RSETF_WRITE
Definition rset.h:200
int rset_no_write(RSFD rfd, const void *buf)
Definition rset.c:431
RSFD rfd_create_base(RSET rs)
Common constuctor for RFDs.
Definition rset.c:43
#define RSETF_READ
Definition rset.h:199
TERMID rset_term_create(const char *name, int length, const char *flags, int type, NMEM nmem, struct ord_list *ol, int reg_type, zint hits_limit, const char *ref_id)
Creates a TERMID entry.
Definition rset.c:340
#define rset_getterms(ct, terms, maxterms, curterm)
Definition rset.h:209
#define rset_pos(rfd, cur, tot)
Definition rset.h:213
#define rset_open(rs, wflag)
Definition rset.h:202
void rset_close(RSFD rfd)
Closes a result set RFD handle.
Definition rset.c:98
#define rset_forward(rfd, buf, term, untilbuf)
Definition rset.h:205
TERMID * hit2_terms
Definition rsbetween.c:68
int(* cmp)(const void *p1, const void *p2)
Definition rset.h:131
RSET rset
Definition rset.h:60
Definition rset.h:151
TERMID term
Definition rset.h:160
RSET * children
Definition rset.h:162
NMEM nmem
Definition rset.h:156
struct rset_key_control * keycontrol
Definition rset.h:153
int scope
Definition rset.h:159
int no_children
Definition rset.h:161
void * priv
Definition rset.h:155
Definition rset.h:73
void * priv
Definition rset.h:75
RSET rset
Definition rset.h:74
const char * scope
long zint
Zebra integer.
Definition util.h:66
#define ZINT_FORMAT
Definition util.h:72