IDZEBRA 2.2.8
trunc.c
Go to the documentation of this file.
1/* This file is part of the Zebra server.
2 Copyright (C) Index Data
3
4Zebra is free software; you can redistribute it and/or modify it under
5the terms of the GNU General Public License as published by the Free
6Software Foundation; either version 2, or (at your option) any later
7version.
8
9Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
10WARRANTY; without even the implied warranty of MERCHANTABILITY or
11FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12for more details.
13
14You should have received a copy of the GNU General Public License
15along with this program; if not, write to the Free Software
16Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17
18*/
19
20
21#if HAVE_CONFIG_H
22#include <config.h>
23#endif
24#include <stdio.h>
25#include <assert.h>
26
27#include "index.h"
28#include <rset.h>
29
30struct trunc_info {
31 int *ptr;
32 int *indx;
33 char **heap;
35 int (*cmp)(const void *p1, const void *p2);
37 char *swapbuf;
38 char *tmpbuf;
39 char *buf;
40};
41
42static void heap_swap(struct trunc_info *ti, int i1, int i2)
43{
44 int swap;
45
46 swap = ti->ptr[i1];
47 ti->ptr[i1] = ti->ptr[i2];
48 ti->ptr[i2] = swap;
49}
50
51static void heap_delete(struct trunc_info *ti)
52{
53 int cur = 1, child = 2;
54
55 heap_swap(ti, 1, ti->heapnum--);
56 while (child <= ti->heapnum) {
57 if (child < ti->heapnum &&
58 (*ti->cmp)(ti->heap[ti->ptr[child]],
59 ti->heap[ti->ptr[1+child]]) > 0)
60 child++;
61 if ((*ti->cmp)(ti->heap[ti->ptr[cur]],
62 ti->heap[ti->ptr[child]]) > 0)
63 {
64 heap_swap(ti, cur, child);
65 cur = child;
66 child = 2*cur;
67 }
68 else
69 break;
70 }
71}
72
73static void heap_insert(struct trunc_info *ti, const char *buf, int indx)
74{
75 int cur, parent;
76
77 cur = ++(ti->heapnum);
78 memcpy(ti->heap[ti->ptr[cur]], buf, ti->keysize);
79 ti->indx[ti->ptr[cur]] = indx;
80 parent = cur/2;
81 while (parent && (*ti->cmp)(ti->heap[ti->ptr[parent]],
82 ti->heap[ti->ptr[cur]]) > 0)
83 {
84 heap_swap(ti, cur, parent);
85 cur = parent;
86 parent = cur/2;
87 }
88}
89
90static struct trunc_info *heap_init(int size, int key_size,
91 int (*cmp)(const void *p1,
92 const void *p2))
93{
94 struct trunc_info *ti = (struct trunc_info *) xmalloc(sizeof(*ti));
95 int i;
96
97 ++size;
98 ti->heapnum = 0;
99 ti->keysize = key_size;
100 ti->cmp = cmp;
101 ti->indx = (int *) xmalloc(size * sizeof(*ti->indx));
102 ti->heap = (char **) xmalloc(size * sizeof(*ti->heap));
103 ti->ptr = (int *) xmalloc(size * sizeof(*ti->ptr));
104 ti->swapbuf = (char *) xmalloc(ti->keysize);
105 ti->tmpbuf = (char *) xmalloc(ti->keysize);
106 ti->buf = (char *) xmalloc(size * ti->keysize);
107 for (i = size; --i >= 0; )
108 {
109 ti->ptr[i] = i;
110 ti->heap[i] = ti->buf + ti->keysize * i;
111 }
112 return ti;
113}
114
115static void heap_close(struct trunc_info *ti)
116{
117 xfree(ti->ptr);
118 xfree(ti->indx);
119 xfree(ti->heap);
120 xfree(ti->swapbuf);
121 xfree(ti->tmpbuf);
122 xfree(ti->buf);
123 xfree(ti);
124}
125
126static RSET rset_trunc_r(ZebraHandle zi, const char *term, int length,
127 const char *flags, ISAM_P *isam_p, int from, int to,
128 int merge_chunk, int preserve_position,
129 int term_type, NMEM rset_nmem,
130 struct rset_key_control *kctrl, int scope,
131 TERMID termid)
132{
133 RSET result;
134 RSFD result_rsfd;
135 int nn = 0;
136
137 result = rset_create_temp(rset_nmem, kctrl, scope,
138 res_get(zi->res, "setTmpDir"), termid);
139 result_rsfd = rset_open(result, RSETF_WRITE);
140
141 if (to - from > merge_chunk)
142 {
143 RSFD *rsfd;
144 RSET *rset;
145 int i, i_add = (to-from)/merge_chunk + 1;
146 struct trunc_info *ti;
147 int rscur = 0;
148 int rsmax = (to-from)/i_add + 1;
149 int cmp_border = preserve_position ? 0 : 1;
150 NMEM rset_nmem_sub = nmem_create(); /* all sub rsets not needed
151 after this */
152
153 rset = (RSET *) xmalloc(sizeof(*rset) * rsmax);
154 rsfd = (RSFD *) xmalloc(sizeof(*rsfd) * rsmax);
155
156 for (i = from; i < to; i += i_add)
157 {
158 if (i_add <= to - i)
159 rset[rscur] = rset_trunc_r(zi, term, length, flags,
160 isam_p, i, i+i_add,
161 merge_chunk, preserve_position,
162 term_type, rset_nmem_sub,
163 kctrl, scope, 0);
164 else
165 rset[rscur] = rset_trunc_r(zi, term, length, flags,
166 isam_p, i, to,
167 merge_chunk, preserve_position,
168 term_type, rset_nmem_sub,
169 kctrl, scope, 0);
170 rscur++;
171 }
172 ti = heap_init (rscur, sizeof(struct it_key), key_compare);
173 for (i = rscur; --i >= 0; )
174 {
175 rsfd[i] = rset_open(rset[i], RSETF_READ);
176 if (rset_read(rsfd[i], ti->tmpbuf, 0))
177 heap_insert(ti, ti->tmpbuf, i);
178 else
179 {
180 rset_close(rsfd[i]);
181 rset_delete(rset[i]);
182 }
183 }
184 while (ti->heapnum)
185 {
186 int n = ti->indx[ti->ptr[1]];
187
188 rset_write(result_rsfd, ti->heap[ti->ptr[1]]);
189 nn++;
190
191 while (1)
192 {
193 if(!rset_read (rsfd[n], ti->tmpbuf,0))
194 {
195 heap_delete(ti);
196 rset_close(rsfd[n]);
197 rset_delete(rset[n]);
198 break;
199 }
200 if ((*ti->cmp)(ti->tmpbuf, ti->heap[ti->ptr[1]]) > cmp_border)
201 {
202 heap_delete(ti);
203 heap_insert(ti, ti->tmpbuf, n);
204 break;
205 }
206 }
207 }
208 xfree(rset);
209 xfree(rsfd);
210 heap_close(ti);
211 nmem_destroy(rset_nmem_sub);
212 }
213 else if (zi->reg->isamc)
214 {
215 ISAMC_PP *ispt;
216 int i;
217 struct trunc_info *ti;
218
219 ispt = (ISAMC_PP *) xmalloc(sizeof(*ispt) * (to-from));
220
221 ti = heap_init(to-from, sizeof(struct it_key),
223 for (i = to-from; --i >= 0; )
224 {
225 ispt[i] = isamc_pp_open(zi->reg->isamc, isam_p[from+i]);
226 if (isamc_pp_read(ispt[i], ti->tmpbuf))
227 heap_insert(ti, ti->tmpbuf, i);
228 else
229 isamc_pp_close(ispt[i]);
230 }
231 while (ti->heapnum)
232 {
233 int n = ti->indx[ti->ptr[1]];
234
235 rset_write(result_rsfd, ti->heap[ti->ptr[1]]);
236 nn++;
237 if (preserve_position)
238 {
239 heap_delete(ti);
240 if (isamc_pp_read(ispt[n], ti->tmpbuf))
241 heap_insert(ti, ti->tmpbuf, n);
242 else
243 isamc_pp_close(ispt[n]);
244 }
245 else
246 {
247 while (1)
248 {
249 if (!isamc_pp_read(ispt[n], ti->tmpbuf))
250 {
251 heap_delete(ti);
252 isamc_pp_close(ispt[n]);
253 break;
254 }
255 if ((*ti->cmp)(ti->tmpbuf, ti->heap[ti->ptr[1]]) > 1)
256 {
257 heap_delete(ti);
258 heap_insert(ti, ti->tmpbuf, n);
259 break;
260 }
261 }
262 }
263 }
264 heap_close(ti);
265 xfree(ispt);
266 }
267 else if (zi->reg->isams)
268 {
269 ISAMS_PP *ispt;
270 int i;
271 struct trunc_info *ti;
272 int nn = 0;
273
274 ispt = (ISAMS_PP *) xmalloc(sizeof(*ispt) * (to-from));
275
276 ti = heap_init(to-from, sizeof(struct it_key),
278 for (i = to-from; --i >= 0; )
279 {
280 ispt[i] = isams_pp_open(zi->reg->isams, isam_p[from+i]);
281 if (isams_pp_read(ispt[i], ti->tmpbuf))
282 heap_insert(ti, ti->tmpbuf, i);
283 else
284 isams_pp_close(ispt[i]);
285 }
286 while (ti->heapnum)
287 {
288 int n = ti->indx[ti->ptr[1]];
289
290 rset_write(result_rsfd, ti->heap[ti->ptr[1]]);
291 nn++;
292 while (1)
293 {
294 if (!isams_pp_read(ispt[n], ti->tmpbuf))
295 {
296 heap_delete(ti);
297 isams_pp_close(ispt[n]);
298 break;
299 }
300 if ((*ti->cmp)(ti->tmpbuf, ti->heap[ti->ptr[1]]) > 1)
301 {
302 heap_delete(ti);
303 heap_insert(ti, ti->tmpbuf, n);
304 break;
305 }
306 }
307 }
308 heap_close(ti);
309 xfree(ispt);
310 }
311 else if (zi->reg->isamb)
312 {
313 ISAMB_PP *ispt;
314 int i;
315 struct trunc_info *ti;
316
317 ispt = (ISAMB_PP *) xmalloc(sizeof(*ispt) * (to-from));
318
319 ti = heap_init(to-from, sizeof(struct it_key),
321 for (i = to-from; --i >= 0; )
322 {
323 if (isam_p[from+i]) {
324 ispt[i] = isamb_pp_open(zi->reg->isamb, isam_p[from+i], scope);
325 if (isamb_pp_read(ispt[i], ti->tmpbuf))
326 heap_insert(ti, ti->tmpbuf, i);
327 else
328 isamb_pp_close(ispt[i]);
329 }
330 }
331 while (ti->heapnum)
332 {
333 int n = ti->indx[ti->ptr[1]];
334
335 rset_write(result_rsfd, ti->heap[ti->ptr[1]]);
336 nn++;
337
338 if (preserve_position)
339 {
340 heap_delete(ti);
341 if (isamb_pp_read(ispt[n], ti->tmpbuf))
342 heap_insert(ti, ti->tmpbuf, n);
343 else
344 isamb_pp_close(ispt[n]);
345 }
346 else
347 {
348 while (1)
349 {
350 if (!isamb_pp_read(ispt[n], ti->tmpbuf))
351 {
352 heap_delete(ti);
353 isamb_pp_close(ispt[n]);
354 break;
355 }
356 if ((*ti->cmp)(ti->tmpbuf, ti->heap[ti->ptr[1]]) > 1)
357 {
358 heap_delete(ti);
359 heap_insert(ti, ti->tmpbuf, n);
360 break;
361 }
362 }
363 }
364 }
365 heap_close(ti);
366 xfree(ispt);
367 }
368 else
369 yaz_log(YLOG_WARN, "Unknown isam set in rset_trunc_r");
370
371 rset_close(result_rsfd);
372 return result;
373}
374
375static int isams_trunc_cmp(const void *p1, const void *p2)
376{
377 ISAM_P i1 = *(ISAM_P*) p1;
378 ISAM_P i2 = *(ISAM_P*) p2;
379
380 if (i1 > i2)
381 return 1;
382 else if (i1 < i2)
383 return -1;
384 return 0;
385}
386
387static int isamc_trunc_cmp(const void *p1, const void *p2)
388{
389 ISAM_P i1 = *(ISAM_P*) p1;
390 ISAM_P i2 = *(ISAM_P*) p2;
391 zint d;
392
393 d = (isamc_type(i1) - isamc_type(i2));
394 if (d == 0)
395 d = isamc_block(i1) - isamc_block(i2);
396 if (d > 0)
397 return 1;
398 else if (d < 0)
399 return -1;
400 return 0;
401}
402
403RSET rset_trunc(ZebraHandle zh, ISAM_P *isam_p, int no,
404 const char *term, int length, const char *flags,
405 int preserve_position, int term_type, NMEM rset_nmem,
406 struct rset_key_control *kctrl, int scope,
407 struct ord_list *ol, const char *index_type,
408 zint hits_limit, const char *term_ref_id)
409{
410 TERMID termid;
411 RSET result;
412 int trunc_chunk;
413 int trunc_limit = atoi(res_get_def(zh->res, "trunclimit", "10000"));
414
415 termid = rset_term_create(term, length, flags, term_type, rset_nmem, ol,
416 *index_type, hits_limit, term_ref_id);
417
418 if (no < 1)
419 return rset_create_null(rset_nmem, kctrl, termid);
420 else if (no == 1)
421 return zebra_create_rset_isam(zh, rset_nmem, kctrl,
422 scope, *isam_p, termid);
423 else if (zh->reg->isamb && no > 1 && no < trunc_limit)
424 {
425 RSET r;
426 RSET *rsets = xmalloc(no*sizeof(RSET)); /* use nmem! */
427 int i;
428 for (i = 0; i<no; i++)
429 rsets[i] = rsisamb_create(rset_nmem, kctrl, scope,
430 zh->reg->isamb, isam_p[i],
431 0 /* termid */);
432 r = rset_create_or(rset_nmem, kctrl, scope,
433 termid, no, rsets);
434 xfree(rsets);
435 return r;
436 }
437 if (zh->reg->isamc)
438 qsort(isam_p, no, sizeof(*isam_p), isamc_trunc_cmp);
439 else
440 qsort(isam_p, no, sizeof(*isam_p), isams_trunc_cmp);
441 trunc_chunk = atoi(res_get_def(zh->res, "truncchunk", "20"));
442 result = rset_trunc_r(zh, term, length, flags, isam_p, 0, no, trunc_chunk,
443 preserve_position, term_type, rset_nmem, kctrl,
444 scope, termid);
445 return result;
446}
447
448/*
449 * Local variables:
450 * c-basic-offset: 4
451 * c-file-style: "Stroustrup"
452 * indent-tabs-mode: nil
453 * End:
454 * vim: shiftwidth=4 tabstop=8 expandtab
455 */
456
RSET zebra_create_rset_isam(ZebraHandle zh, NMEM rset_nmem, struct rset_key_control *kctl, int scope, ISAM_P pos, TERMID termid)
Definition rset_isam.c:32
ISAMB_PP isamb_pp_open(ISAMB isamb, ISAM_P pos, int scope)
Definition isamb.c:1387
void isamb_pp_close(ISAMB_PP pp)
Definition isamb.c:1429
int isamb_pp_read(ISAMB_PP pp, void *buf)
Definition isamb.c:1503
#define isamc_type(x)
Definition isamc.h:90
int isamc_pp_read(ISAMC_PP pp, void *buf)
Definition isamc.c:511
void isamc_pp_close(ISAMC_PP pp)
Definition isamc.c:458
ISAMC_PP isamc_pp_open(ISAMC is, ISAM_P pos)
Definition isamc.c:467
#define isamc_block(x)
Definition isamc.h:91
zint ISAM_P
Definition isamc.h:28
int isams_pp_read(ISAMS_PP pp, void *buf)
Definition isams.c:227
void isams_pp_close(ISAMS_PP pp)
Definition isams.c:215
ISAMS_PP isams_pp_open(ISAMS is, ISAM_P pos)
Definition isams.c:187
int key_compare(const void *p1, const void *p2)
Definition it_key.c:74
const char * res_get_def(Res r, const char *name, const char *def)
Definition res.c:313
const char * res_get(Res r, const char *name)
Definition res.c:294
void rset_delete(RSET rs)
Destructor RSETs.
Definition rset.c:218
RSET rset_create_or(NMEM nmem, struct rset_key_control *kcontrol, int scope, TERMID termid, int no_rsets, RSET *rsets)
#define rset_read(rfd, buf, term)
Definition rset.h:217
RSET rsisamb_create(NMEM nmem, struct rset_key_control *kcontrol, int scope, ISAMB is, ISAM_P pos, TERMID term)
Definition rsisamb.c:76
#define RSETF_WRITE
Definition rset.h:200
#define RSETF_READ
Definition rset.h:199
TERMID rset_term_create(const char *name, int length, const char *flags, int type, NMEM nmem, struct ord_list *ol, int reg_type, zint hits_limit, const char *ref_id)
Creates a TERMID entry.
Definition rset.c:340
RSET rset_create_null(NMEM nmem, struct rset_key_control *kcontrol, TERMID term)
Definition rsnull.c:47
#define rset_write(rfd, buf)
Definition rset.h:220
RSET rset_create_temp(NMEM nmem, struct rset_key_control *kcontrol, int scope, const char *temp_path, TERMID term)
Definition rstemp.c:86
#define rset_open(rs, wflag)
Definition rset.h:202
void rset_close(RSFD rfd)
Closes a result set RFD handle.
Definition rset.c:98
Definition rset.h:151
Definition rset.h:73
int * ptr
Definition trunc.c:31
char ** heap
Definition trunc.c:33
char * swapbuf
Definition trunc.c:37
int keysize
Definition trunc.c:36
char * tmpbuf
Definition trunc.c:38
int * indx
Definition trunc.c:32
char * buf
Definition trunc.c:39
int(* cmp)(const void *p1, const void *p2)
Definition trunc.c:35
int heapnum
Definition trunc.c:34
ISAMS isams
Definition index.h:129
ISAMB isamb
Definition index.h:131
ISAMC isamc
Definition index.h:130
struct zebra_register * reg
Definition index.h:174
static void heap_insert(struct trunc_info *ti, const char *buf, int indx)
Definition trunc.c:73
RSET rset_trunc(ZebraHandle zh, ISAM_P *isam_p, int no, const char *term, int length, const char *flags, int preserve_position, int term_type, NMEM rset_nmem, struct rset_key_control *kctrl, int scope, struct ord_list *ol, const char *index_type, zint hits_limit, const char *term_ref_id)
Definition trunc.c:403
static struct trunc_info * heap_init(int size, int key_size, int(*cmp)(const void *p1, const void *p2))
Definition trunc.c:90
static int isams_trunc_cmp(const void *p1, const void *p2)
Definition trunc.c:375
static int isamc_trunc_cmp(const void *p1, const void *p2)
Definition trunc.c:387
static void heap_swap(struct trunc_info *ti, int i1, int i2)
Definition trunc.c:42
static RSET rset_trunc_r(ZebraHandle zi, const char *term, int length, const char *flags, ISAM_P *isam_p, int from, int to, int merge_chunk, int preserve_position, int term_type, NMEM rset_nmem, struct rset_key_control *kctrl, int scope, TERMID termid)
Definition trunc.c:126
static void heap_delete(struct trunc_info *ti)
Definition trunc.c:51
static void heap_close(struct trunc_info *ti)
Definition trunc.c:115
const char * scope
long zint
Zebra integer.
Definition util.h:66