IDZEBRA  2.2.7
trunc.c
Go to the documentation of this file.
1 /* This file is part of the Zebra server.
2  Copyright (C) Index Data
3 
4 Zebra is free software; you can redistribute it and/or modify it under
5 the terms of the GNU General Public License as published by the Free
6 Software Foundation; either version 2, or (at your option) any later
7 version.
8 
9 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
10 WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 for more details.
13 
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 
18 */
19 
20 
21 #if HAVE_CONFIG_H
22 #include <config.h>
23 #endif
24 #include <stdio.h>
25 #include <assert.h>
26 
27 #include "index.h"
28 #include <rset.h>
29 
30 struct trunc_info {
31  int *ptr;
32  int *indx;
33  char **heap;
34  int heapnum;
35  int (*cmp)(const void *p1, const void *p2);
36  int keysize;
37  char *swapbuf;
38  char *tmpbuf;
39  char *buf;
40 };
41 
42 static void heap_swap(struct trunc_info *ti, int i1, int i2)
43 {
44  int swap;
45 
46  swap = ti->ptr[i1];
47  ti->ptr[i1] = ti->ptr[i2];
48  ti->ptr[i2] = swap;
49 }
50 
51 static void heap_delete(struct trunc_info *ti)
52 {
53  int cur = 1, child = 2;
54 
55  heap_swap(ti, 1, ti->heapnum--);
56  while (child <= ti->heapnum) {
57  if (child < ti->heapnum &&
58  (*ti->cmp)(ti->heap[ti->ptr[child]],
59  ti->heap[ti->ptr[1+child]]) > 0)
60  child++;
61  if ((*ti->cmp)(ti->heap[ti->ptr[cur]],
62  ti->heap[ti->ptr[child]]) > 0)
63  {
64  heap_swap(ti, cur, child);
65  cur = child;
66  child = 2*cur;
67  }
68  else
69  break;
70  }
71 }
72 
73 static void heap_insert(struct trunc_info *ti, const char *buf, int indx)
74 {
75  int cur, parent;
76 
77  cur = ++(ti->heapnum);
78  memcpy(ti->heap[ti->ptr[cur]], buf, ti->keysize);
79  ti->indx[ti->ptr[cur]] = indx;
80  parent = cur/2;
81  while (parent && (*ti->cmp)(ti->heap[ti->ptr[parent]],
82  ti->heap[ti->ptr[cur]]) > 0)
83  {
84  heap_swap(ti, cur, parent);
85  cur = parent;
86  parent = cur/2;
87  }
88 }
89 
90 static struct trunc_info *heap_init(int size, int key_size,
91  int (*cmp)(const void *p1,
92  const void *p2))
93 {
94  struct trunc_info *ti = (struct trunc_info *) xmalloc(sizeof(*ti));
95  int i;
96 
97  ++size;
98  ti->heapnum = 0;
99  ti->keysize = key_size;
100  ti->cmp = cmp;
101  ti->indx = (int *) xmalloc(size * sizeof(*ti->indx));
102  ti->heap = (char **) xmalloc(size * sizeof(*ti->heap));
103  ti->ptr = (int *) xmalloc(size * sizeof(*ti->ptr));
104  ti->swapbuf = (char *) xmalloc(ti->keysize);
105  ti->tmpbuf = (char *) xmalloc(ti->keysize);
106  ti->buf = (char *) xmalloc(size * ti->keysize);
107  for (i = size; --i >= 0; )
108  {
109  ti->ptr[i] = i;
110  ti->heap[i] = ti->buf + ti->keysize * i;
111  }
112  return ti;
113 }
114 
115 static void heap_close(struct trunc_info *ti)
116 {
117  xfree(ti->ptr);
118  xfree(ti->indx);
119  xfree(ti->heap);
120  xfree(ti->swapbuf);
121  xfree(ti->tmpbuf);
122  xfree(ti->buf);
123  xfree(ti);
124 }
125 
126 static RSET rset_trunc_r(ZebraHandle zi, const char *term, int length,
127  const char *flags, ISAM_P *isam_p, int from, int to,
128  int merge_chunk, int preserve_position,
129  int term_type, NMEM rset_nmem,
130  struct rset_key_control *kctrl, int scope,
131  TERMID termid)
132 {
133  RSET result;
134  RSFD result_rsfd;
135  int nn = 0;
136 
137  result = rset_create_temp(rset_nmem, kctrl, scope,
138  res_get(zi->res, "setTmpDir"), termid);
139  result_rsfd = rset_open(result, RSETF_WRITE);
140 
141  if (to - from > merge_chunk)
142  {
143  RSFD *rsfd;
144  RSET *rset;
145  int i, i_add = (to-from)/merge_chunk + 1;
146  struct trunc_info *ti;
147  int rscur = 0;
148  int rsmax = (to-from)/i_add + 1;
149  int cmp_border = preserve_position ? 0 : 1;
150  NMEM rset_nmem_sub = nmem_create(); /* all sub rsets not needed
151  after this */
152 
153  rset = (RSET *) xmalloc(sizeof(*rset) * rsmax);
154  rsfd = (RSFD *) xmalloc(sizeof(*rsfd) * rsmax);
155 
156  for (i = from; i < to; i += i_add)
157  {
158  if (i_add <= to - i)
159  rset[rscur] = rset_trunc_r(zi, term, length, flags,
160  isam_p, i, i+i_add,
161  merge_chunk, preserve_position,
162  term_type, rset_nmem_sub,
163  kctrl, scope, 0);
164  else
165  rset[rscur] = rset_trunc_r(zi, term, length, flags,
166  isam_p, i, to,
167  merge_chunk, preserve_position,
168  term_type, rset_nmem_sub,
169  kctrl, scope, 0);
170  rscur++;
171  }
172  ti = heap_init (rscur, sizeof(struct it_key), key_compare);
173  for (i = rscur; --i >= 0; )
174  {
175  rsfd[i] = rset_open(rset[i], RSETF_READ);
176  if (rset_read(rsfd[i], ti->tmpbuf, 0))
177  heap_insert(ti, ti->tmpbuf, i);
178  else
179  {
180  rset_close(rsfd[i]);
181  rset_delete(rset[i]);
182  }
183  }
184  while (ti->heapnum)
185  {
186  int n = ti->indx[ti->ptr[1]];
187 
188  rset_write(result_rsfd, ti->heap[ti->ptr[1]]);
189  nn++;
190 
191  while (1)
192  {
193  if(!rset_read (rsfd[n], ti->tmpbuf,0))
194  {
195  heap_delete(ti);
196  rset_close(rsfd[n]);
197  rset_delete(rset[n]);
198  break;
199  }
200  if ((*ti->cmp)(ti->tmpbuf, ti->heap[ti->ptr[1]]) > cmp_border)
201  {
202  heap_delete(ti);
203  heap_insert(ti, ti->tmpbuf, n);
204  break;
205  }
206  }
207  }
208  xfree(rset);
209  xfree(rsfd);
210  heap_close(ti);
211  nmem_destroy(rset_nmem_sub);
212  }
213  else if (zi->reg->isamc)
214  {
215  ISAMC_PP *ispt;
216  int i;
217  struct trunc_info *ti;
218 
219  ispt = (ISAMC_PP *) xmalloc(sizeof(*ispt) * (to-from));
220 
221  ti = heap_init(to-from, sizeof(struct it_key),
222  key_compare);
223  for (i = to-from; --i >= 0; )
224  {
225  ispt[i] = isamc_pp_open(zi->reg->isamc, isam_p[from+i]);
226  if (isamc_pp_read(ispt[i], ti->tmpbuf))
227  heap_insert(ti, ti->tmpbuf, i);
228  else
229  isamc_pp_close(ispt[i]);
230  }
231  while (ti->heapnum)
232  {
233  int n = ti->indx[ti->ptr[1]];
234 
235  rset_write(result_rsfd, ti->heap[ti->ptr[1]]);
236  nn++;
237  if (preserve_position)
238  {
239  heap_delete(ti);
240  if (isamc_pp_read(ispt[n], ti->tmpbuf))
241  heap_insert(ti, ti->tmpbuf, n);
242  else
243  isamc_pp_close(ispt[n]);
244  }
245  else
246  {
247  while (1)
248  {
249  if (!isamc_pp_read(ispt[n], ti->tmpbuf))
250  {
251  heap_delete(ti);
252  isamc_pp_close(ispt[n]);
253  break;
254  }
255  if ((*ti->cmp)(ti->tmpbuf, ti->heap[ti->ptr[1]]) > 1)
256  {
257  heap_delete(ti);
258  heap_insert(ti, ti->tmpbuf, n);
259  break;
260  }
261  }
262  }
263  }
264  heap_close(ti);
265  xfree(ispt);
266  }
267  else if (zi->reg->isams)
268  {
269  ISAMS_PP *ispt;
270  int i;
271  struct trunc_info *ti;
272  int nn = 0;
273 
274  ispt = (ISAMS_PP *) xmalloc(sizeof(*ispt) * (to-from));
275 
276  ti = heap_init(to-from, sizeof(struct it_key),
277  key_compare);
278  for (i = to-from; --i >= 0; )
279  {
280  ispt[i] = isams_pp_open(zi->reg->isams, isam_p[from+i]);
281  if (isams_pp_read(ispt[i], ti->tmpbuf))
282  heap_insert(ti, ti->tmpbuf, i);
283  else
284  isams_pp_close(ispt[i]);
285  }
286  while (ti->heapnum)
287  {
288  int n = ti->indx[ti->ptr[1]];
289 
290  rset_write(result_rsfd, ti->heap[ti->ptr[1]]);
291  nn++;
292  while (1)
293  {
294  if (!isams_pp_read(ispt[n], ti->tmpbuf))
295  {
296  heap_delete(ti);
297  isams_pp_close(ispt[n]);
298  break;
299  }
300  if ((*ti->cmp)(ti->tmpbuf, ti->heap[ti->ptr[1]]) > 1)
301  {
302  heap_delete(ti);
303  heap_insert(ti, ti->tmpbuf, n);
304  break;
305  }
306  }
307  }
308  heap_close(ti);
309  xfree(ispt);
310  }
311  else if (zi->reg->isamb)
312  {
313  ISAMB_PP *ispt;
314  int i;
315  struct trunc_info *ti;
316 
317  ispt = (ISAMB_PP *) xmalloc(sizeof(*ispt) * (to-from));
318 
319  ti = heap_init(to-from, sizeof(struct it_key),
320  key_compare);
321  for (i = to-from; --i >= 0; )
322  {
323  if (isam_p[from+i]) {
324  ispt[i] = isamb_pp_open(zi->reg->isamb, isam_p[from+i], scope);
325  if (isamb_pp_read(ispt[i], ti->tmpbuf))
326  heap_insert(ti, ti->tmpbuf, i);
327  else
328  isamb_pp_close(ispt[i]);
329  }
330  }
331  while (ti->heapnum)
332  {
333  int n = ti->indx[ti->ptr[1]];
334 
335  rset_write(result_rsfd, ti->heap[ti->ptr[1]]);
336  nn++;
337 
338  if (preserve_position)
339  {
340  heap_delete(ti);
341  if (isamb_pp_read(ispt[n], ti->tmpbuf))
342  heap_insert(ti, ti->tmpbuf, n);
343  else
344  isamb_pp_close(ispt[n]);
345  }
346  else
347  {
348  while (1)
349  {
350  if (!isamb_pp_read(ispt[n], ti->tmpbuf))
351  {
352  heap_delete(ti);
353  isamb_pp_close(ispt[n]);
354  break;
355  }
356  if ((*ti->cmp)(ti->tmpbuf, ti->heap[ti->ptr[1]]) > 1)
357  {
358  heap_delete(ti);
359  heap_insert(ti, ti->tmpbuf, n);
360  break;
361  }
362  }
363  }
364  }
365  heap_close(ti);
366  xfree(ispt);
367  }
368  else
369  yaz_log(YLOG_WARN, "Unknown isam set in rset_trunc_r");
370 
371  rset_close(result_rsfd);
372  return result;
373 }
374 
375 static int isams_trunc_cmp(const void *p1, const void *p2)
376 {
377  ISAM_P i1 = *(ISAM_P*) p1;
378  ISAM_P i2 = *(ISAM_P*) p2;
379 
380  if (i1 > i2)
381  return 1;
382  else if (i1 < i2)
383  return -1;
384  return 0;
385 }
386 
387 static int isamc_trunc_cmp(const void *p1, const void *p2)
388 {
389  ISAM_P i1 = *(ISAM_P*) p1;
390  ISAM_P i2 = *(ISAM_P*) p2;
391  zint d;
392 
393  d = (isamc_type(i1) - isamc_type(i2));
394  if (d == 0)
395  d = isamc_block(i1) - isamc_block(i2);
396  if (d > 0)
397  return 1;
398  else if (d < 0)
399  return -1;
400  return 0;
401 }
402 
403 RSET rset_trunc(ZebraHandle zh, ISAM_P *isam_p, int no,
404  const char *term, int length, const char *flags,
405  int preserve_position, int term_type, NMEM rset_nmem,
406  struct rset_key_control *kctrl, int scope,
407  struct ord_list *ol, const char *index_type,
408  zint hits_limit, const char *term_ref_id)
409 {
410  TERMID termid;
411  RSET result;
412  int trunc_chunk;
413  int trunc_limit = atoi(res_get_def(zh->res, "trunclimit", "10000"));
414 
415  termid = rset_term_create(term, length, flags, term_type, rset_nmem, ol,
416  *index_type, hits_limit, term_ref_id);
417 
418  if (no < 1)
419  return rset_create_null(rset_nmem, kctrl, termid);
420  else if (no == 1)
421  return zebra_create_rset_isam(zh, rset_nmem, kctrl,
422  scope, *isam_p, termid);
423  else if (zh->reg->isamb && no > 1 && no < trunc_limit)
424  {
425  RSET r;
426  RSET *rsets = xmalloc(no*sizeof(RSET)); /* use nmem! */
427  int i;
428  for (i = 0; i<no; i++)
429  rsets[i] = rsisamb_create(rset_nmem, kctrl, scope,
430  zh->reg->isamb, isam_p[i],
431  0 /* termid */);
432  r = rset_create_or(rset_nmem, kctrl, scope,
433  termid, no, rsets);
434  xfree(rsets);
435  return r;
436  }
437  if (zh->reg->isamc)
438  qsort(isam_p, no, sizeof(*isam_p), isamc_trunc_cmp);
439  else
440  qsort(isam_p, no, sizeof(*isam_p), isams_trunc_cmp);
441  trunc_chunk = atoi(res_get_def(zh->res, "truncchunk", "20"));
442  result = rset_trunc_r(zh, term, length, flags, isam_p, 0, no, trunc_chunk,
443  preserve_position, term_type, rset_nmem, kctrl,
444  scope, termid);
445  return result;
446 }
447 
448 /*
449  * Local variables:
450  * c-basic-offset: 4
451  * c-file-style: "Stroustrup"
452  * indent-tabs-mode: nil
453  * End:
454  * vim: shiftwidth=4 tabstop=8 expandtab
455  */
456 
RSET zebra_create_rset_isam(ZebraHandle zh, NMEM rset_nmem, struct rset_key_control *kctl, int scope, ISAM_P pos, TERMID termid)
Definition: rset_isam.c:32
ISAMB_PP isamb_pp_open(ISAMB isamb, ISAM_P pos, int scope)
Definition: isamb.c:1387
void isamb_pp_close(ISAMB_PP pp)
Definition: isamb.c:1429
int isamb_pp_read(ISAMB_PP pp, void *buf)
Definition: isamb.c:1503
#define isamc_type(x)
Definition: isamc.h:90
int isamc_pp_read(ISAMC_PP pp, void *buf)
Definition: isamc.c:511
void isamc_pp_close(ISAMC_PP pp)
Definition: isamc.c:458
ISAMC_PP isamc_pp_open(ISAMC is, ISAM_P pos)
Definition: isamc.c:467
#define isamc_block(x)
Definition: isamc.h:91
zint ISAM_P
Definition: isamc.h:28
int isams_pp_read(ISAMS_PP pp, void *buf)
Definition: isams.c:227
void isams_pp_close(ISAMS_PP pp)
Definition: isams.c:215
ISAMS_PP isams_pp_open(ISAMS is, ISAM_P pos)
Definition: isams.c:187
int key_compare(const void *p1, const void *p2)
Definition: it_key.c:74
const char * res_get(Res r, const char *name)
Definition: res.c:294
const char * res_get_def(Res r, const char *name, const char *def)
Definition: res.c:313
void rset_delete(RSET rs)
Destructor RSETs.
Definition: rset.c:218
RSET rset_create_or(NMEM nmem, struct rset_key_control *kcontrol, int scope, TERMID termid, int no_rsets, RSET *rsets)
Definition: rsmultiandor.c:273
#define rset_read(rfd, buf, term)
Definition: rset.h:217
RSET rsisamb_create(NMEM nmem, struct rset_key_control *kcontrol, int scope, ISAMB is, ISAM_P pos, TERMID term)
Definition: rsisamb.c:76
struct rset rset
#define RSETF_WRITE
Definition: rset.h:200
#define RSETF_READ
Definition: rset.h:199
TERMID rset_term_create(const char *name, int length, const char *flags, int type, NMEM nmem, struct ord_list *ol, int reg_type, zint hits_limit, const char *ref_id)
Creates a TERMID entry.
Definition: rset.c:340
RSET rset_create_null(NMEM nmem, struct rset_key_control *kcontrol, TERMID term)
Definition: rsnull.c:47
#define rset_write(rfd, buf)
Definition: rset.h:220
RSET rset_create_temp(NMEM nmem, struct rset_key_control *kcontrol, int scope, const char *temp_path, TERMID term)
Definition: rstemp.c:86
#define rset_open(rs, wflag)
Definition: rset.h:202
void rset_close(RSFD rfd)
Closes a result set RFD handle.
Definition: rset.c:98
Definition: it_key.h:30
Definition: rset.h:35
Definition: rset.h:50
Definition: rset.h:151
Definition: rset.h:73
int * ptr
Definition: trunc.c:31
char ** heap
Definition: trunc.c:33
char * swapbuf
Definition: trunc.c:37
int keysize
Definition: trunc.c:36
char * tmpbuf
Definition: trunc.c:38
int * indx
Definition: trunc.c:32
char * buf
Definition: trunc.c:39
int(* cmp)(const void *p1, const void *p2)
Definition: trunc.c:35
int heapnum
Definition: trunc.c:34
ISAMS isams
Definition: index.h:129
ISAMB isamb
Definition: index.h:131
ISAMC isamc
Definition: index.h:130
struct zebra_register * reg
Definition: index.h:174
static void heap_insert(struct trunc_info *ti, const char *buf, int indx)
Definition: trunc.c:73
static struct trunc_info * heap_init(int size, int key_size, int(*cmp)(const void *p1, const void *p2))
Definition: trunc.c:90
RSET rset_trunc(ZebraHandle zh, ISAM_P *isam_p, int no, const char *term, int length, const char *flags, int preserve_position, int term_type, NMEM rset_nmem, struct rset_key_control *kctrl, int scope, struct ord_list *ol, const char *index_type, zint hits_limit, const char *term_ref_id)
Definition: trunc.c:403
static int isams_trunc_cmp(const void *p1, const void *p2)
Definition: trunc.c:375
static int isamc_trunc_cmp(const void *p1, const void *p2)
Definition: trunc.c:387
static void heap_swap(struct trunc_info *ti, int i1, int i2)
Definition: trunc.c:42
static RSET rset_trunc_r(ZebraHandle zi, const char *term, int length, const char *flags, ISAM_P *isam_p, int from, int to, int merge_chunk, int preserve_position, int term_type, NMEM rset_nmem, struct rset_key_control *kctrl, int scope, TERMID termid)
Definition: trunc.c:126
static void heap_delete(struct trunc_info *ti)
Definition: trunc.c:51
static void heap_close(struct trunc_info *ti)
Definition: trunc.c:115
const char * scope
Definition: tstlockscope.c:40
long zint
Zebra integer.
Definition: util.h:66