IDZEBRA 2.2.8
sortidx.c
Go to the documentation of this file.
1/* This file is part of the Zebra server.
2 Copyright (C) Index Data
3
4Zebra is free software; you can redistribute it and/or modify it under
5the terms of the GNU General Public License as published by the Free
6Software Foundation; either version 2, or (at your option) any later
7version.
8
9Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
10WARRANTY; without even the implied warranty of MERCHANTABILITY or
11FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12for more details.
13
14You should have received a copy of the GNU General Public License
15along with this program; if not, write to the Free Software
16Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17
18*/
19
20
21#if HAVE_CONFIG_H
22#include <config.h>
23#endif
24#include <assert.h>
25#include <string.h>
26
27#include <yaz/log.h>
28#include <yaz/xmalloc.h>
29#include <yaz/snprintf.h>
30#include <idzebra/isamb.h>
31#include <idzebra/bfile.h>
32#include <sortidx.h>
33#include "recindex.h"
34
35#define SORT_MAX_TERM 110
36#define SORT_MAX_MULTI 4096
37
38#define SORT_IDX_BLOCKSIZE 64
39
46
47
48static void sort_term_log_item(int level, const void *b, const char *txt)
49{
50 struct sort_term a1;
51
52 memcpy(&a1, b, sizeof(a1));
53
54 yaz_log(level, "%s " ZINT_FORMAT " " ZINT_FORMAT " %.*s", txt, a1.sysno,
55 a1.section_id, (int) a1.length-1, a1.term);
56}
57
58static int sort_term_compare(const void *a, const void *b)
59{
60 struct sort_term a1, b1;
61
62 memcpy(&a1, a, sizeof(a1));
63 memcpy(&b1, b, sizeof(b1));
64
65 if (a1.sysno > b1.sysno)
66 return 1;
67 else if (a1.sysno < b1.sysno)
68 return -1;
69 if (a1.section_id > b1.section_id)
70 return 1;
71 else if (a1.section_id < b1.section_id)
72 return -1;
73
74 return 0;
75}
76
77static void *sort_term_code_start(void)
78{
79 return 0;
80}
81
82static void sort_term_encode1(void *p, char **dst, const char **src)
83{
84 struct sort_term a1;
85
86 memcpy(&a1, *src, sizeof(a1));
87 *src += sizeof(a1);
88
89 zebra_zint_encode(dst, a1.sysno); /* encode record id */
90 strcpy(*dst, a1.term); /* then sort term, 0 terminated */
91 *dst += strlen(a1.term) + 1;
92}
93
94static void sort_term_encode2(void *p, char **dst, const char **src)
95{
96 struct sort_term a1;
97
98 memcpy(&a1, *src, sizeof(a1));
99 *src += sizeof(a1);
100
101 zebra_zint_encode(dst, a1.sysno);
103 zebra_zint_encode(dst, a1.length); /* encode length */
104 memcpy(*dst, a1.term, a1.length);
105 *dst += a1.length;
106}
107
108static void sort_term_decode1(void *p, char **dst, const char **src)
109{
110 struct sort_term a1;
111 size_t slen;
112
113 zebra_zint_decode(src, &a1.sysno);
114 a1.section_id = 0;
115
116 strcpy(a1.term, *src);
117 slen = 1 + strlen(a1.term);
118 *src += slen;
119 a1.length = slen;
120
121 memcpy(*dst, &a1, sizeof(a1));
122 *dst += sizeof(a1);
123}
124
125static void sort_term_decode2(void *p, char **dst, const char **src)
126{
127 struct sort_term a1;
128
129 zebra_zint_decode(src, &a1.sysno);
131 zebra_zint_decode(src, &a1.length);
132
133 memcpy(a1.term, *src, a1.length);
134 *src += a1.length;
135
136 memcpy(*dst, &a1, sizeof(a1));
137 *dst += sizeof(a1);
138}
139
140static void sort_term_code_reset(void *p)
141{
142}
143
144static void sort_term_code_stop(void *p)
145{
146}
147
149 int no;
151 struct sort_term st;
152};
153
154static int sort_term_code_read(void *vp, char **dst, int *insertMode)
155{
156 struct sort_term_stream *s = (struct sort_term_stream *) vp;
157
158 if (s->no == 0)
159 return 0;
160
161 (s->no)--;
162
163 *insertMode = s->insert_flag;
164 memcpy(*dst, &s->st, sizeof(s->st));
165 *dst += sizeof(s->st);
166 return 1;
167}
168
172
186
196
197zebra_sort_index_t zebra_sort_open(BFiles bfs, int write_flag, int type)
198{
199 zebra_sort_index_t si = (zebra_sort_index_t) xmalloc(sizeof(*si));
200 si->bfs = bfs;
201 si->write_flag = write_flag;
202 si->current_file = NULL;
203 si->files = NULL;
204 si->type = type;
205 si->entry_buf = (char *) xmalloc(SORT_IDX_ENTRYSIZE);
206 return si;
207}
208
210{
211 struct sortFile *sf = si->files;
212 while (sf)
213 {
214 struct sortFile *sf_next = sf->next;
215 switch(si->type)
216 {
218 bf_close(sf->u.bf);
219 break;
222 if (sf->isam_pp)
225 isamb_close(sf->u.isamb);
226 break;
227 }
228 xfree(sf);
229 sf = sf_next;
230 }
231 xfree(si->entry_buf);
232 xfree(si);
233}
234
236{
237 int isam_block_size = 4096;
238
239 ISAMC_M method;
240 char fname[80];
241 struct sortFile *sf;
242
248
249 if (si->current_file && si->current_file->id == id)
250 return 0;
251 for (sf = si->files; sf; sf = sf->next)
252 if (sf->id == id)
253 {
254 si->current_file = sf;
255 return 0;
256 }
257 sf = (struct sortFile *) xmalloc(sizeof(*sf));
258 sf->id = id;
259
260 switch(si->type)
261 {
263 sf->u.bf = NULL;
264 yaz_snprintf(fname, sizeof(fname), "sort%d", id);
265 yaz_log(YLOG_DEBUG, "sort idx %s wr=%d", fname, si->write_flag);
266 sf->u.bf = bf_open(si->bfs, fname, SORT_IDX_BLOCKSIZE, si->write_flag);
267 if (!sf->u.bf)
268 {
269 xfree(sf);
270 return -1;
271 }
272 if (!bf_read(sf->u.bf, 0, 0, sizeof(sf->head), &sf->head))
273 {
274 sf->head.sysno_max = 0;
275 if (!si->write_flag)
276 {
277 bf_close(sf->u.bf);
278 xfree(sf);
279 return -1;
280 }
281 }
282 break;
286
287 yaz_snprintf(fname, sizeof(fname), "sortb%d", id);
288 sf->u.isamb = isamb_open2(si->bfs, fname, si->write_flag, &method,
289 /* cache */ 0,
290 /* no_cat */ 1, &isam_block_size,
291 /* use_root_ptr */ 1);
292 if (!sf->u.isamb)
293 {
294 xfree(sf);
295 return -1;
296 }
297 else
298 {
299 sf->isam_p = isamb_get_root_ptr(sf->u.isamb);
300 }
301 break;
303 isam_block_size = 32768;
306
307 yaz_snprintf(fname, sizeof(fname), "sortm%d", id);
308 sf->u.isamb = isamb_open2(si->bfs, fname, si->write_flag, &method,
309 /* cache */ 0,
310 /* no_cat */ 1, &isam_block_size,
311 /* use_root_ptr */ 1);
312 if (!sf->u.isamb)
313 {
314 xfree(sf);
315 return -1;
316 }
317 else
318 {
319 sf->isam_p = isamb_get_root_ptr(sf->u.isamb);
320 }
321 break;
322 }
323 sf->isam_pp = 0;
324 sf->no_inserted = 0;
325 sf->no_deleted = 0;
326 sf->next = si->files;
327 si->current_file = si->files = sf;
328 return 0;
329}
330
331static void zebra_sortf_rewind(struct sortFile *sf)
332{
333 if (sf->isam_pp)
335 sf->isam_pp = 0;
336 sf->no_inserted = 0;
337 sf->no_deleted = 0;
338}
339
341{
342 zint new_sysno = rec_sysno_to_int(sysno);
343 struct sortFile *sf;
344
345 for (sf = si->files; sf; sf = sf->next)
346 {
347 if (sf->no_inserted || sf->no_deleted)
349 else if (sf->isam_pp && new_sysno <= si->sysno)
351 }
352 si->sysno = new_sysno;
353}
354
355
357{
358 struct sortFile *sf = si->current_file;
359
360 if (!sf || !sf->u.bf)
361 return;
362 switch(si->type)
363 {
365 memset(si->entry_buf, 0, SORT_IDX_ENTRYSIZE);
366 bf_write(sf->u.bf, si->sysno+1, 0, 0, si->entry_buf);
367 break;
370 assert(sf->u.isamb);
371 if (sf->no_deleted == 0)
372 {
373 struct sort_term_stream s;
374 ISAMC_I isamc_i;
375
376 s.st.sysno = si->sysno;
377 s.st.section_id = section_id;
378 s.st.length = 0;
379 s.st.term[0] = '\0';
380
381 s.no = 1;
382 s.insert_flag = 0;
383 isamc_i.clientData = &s;
384 isamc_i.read_item = sort_term_code_read;
385
386 isamb_merge(sf->u.isamb, &sf->isam_p, &isamc_i);
387 sf->no_deleted++;
388 }
389 break;
390 }
391}
392
393void zebra_sort_add(zebra_sort_index_t si, zint section_id, WRBUF wrbuf)
394{
395 struct sortFile *sf = si->current_file;
396 int len;
397
398 if (!sf || !sf->u.bf)
399 return;
400 switch(si->type)
401 {
403 /* take first entry from wrbuf - itself is 0-terminated */
404 len = strlen(wrbuf_buf(wrbuf));
405 if (len > SORT_IDX_ENTRYSIZE)
406 len = SORT_IDX_ENTRYSIZE;
407
408 memcpy(si->entry_buf, wrbuf_buf(wrbuf), len);
409 if (len < SORT_IDX_ENTRYSIZE-len)
410 memset(si->entry_buf+len, 0, SORT_IDX_ENTRYSIZE-len);
411 bf_write(sf->u.bf, si->sysno+1, 0, 0, si->entry_buf);
412 break;
414 assert(sf->u.isamb);
415
416 if (sf->no_inserted == 0)
417 {
418 struct sort_term_stream s;
419 ISAMC_I isamc_i;
420 /* take first entry from wrbuf - itself is 0-terminated */
421
422 len = wrbuf_len(wrbuf);
423 if (len > SORT_MAX_TERM)
424 {
425 len = SORT_MAX_TERM;
426 wrbuf_buf(wrbuf)[len-1] = '\0';
427 }
428 memcpy(s.st.term, wrbuf_buf(wrbuf), len);
429 s.st.length = len;
430 s.st.sysno = si->sysno;
431 s.st.section_id = 0;
432 s.no = 1;
433 s.insert_flag = 1;
434 isamc_i.clientData = &s;
435 isamc_i.read_item = sort_term_code_read;
436
437 isamb_merge(sf->u.isamb, &sf->isam_p, &isamc_i);
438 sf->no_inserted++;
439 }
440 break;
442 assert(sf->u.isamb);
443 if (sf->no_inserted == 0)
444 {
445 struct sort_term_stream s;
446 ISAMC_I isamc_i;
447 len = wrbuf_len(wrbuf);
448 if (len > SORT_MAX_MULTI)
449 {
450 len = SORT_MAX_MULTI;
451 wrbuf_buf(wrbuf)[len-1] = '\0';
452 }
453 memcpy(s.st.term, wrbuf_buf(wrbuf), len);
454 s.st.length = len;
455 s.st.sysno = si->sysno;
456 s.st.section_id = section_id;
457 s.no = 1;
458 s.insert_flag = 1;
459 isamc_i.clientData = &s;
460 isamc_i.read_item = sort_term_code_read;
461
462 isamb_merge(sf->u.isamb, &sf->isam_p, &isamc_i);
463 sf->no_inserted++;
464 }
465 break;
466 }
467}
468
469
470int zebra_sort_read(zebra_sort_index_t si, zint *section_id, WRBUF w)
471{
472 int r;
473 struct sortFile *sf = si->current_file;
474 char tbuf[SORT_IDX_ENTRYSIZE];
475
476 assert(sf);
477 assert(sf->u.bf);
478
479 switch(si->type)
480 {
482 r = bf_read(sf->u.bf, si->sysno+1, 0, 0, tbuf);
483 if (r && *tbuf)
484 {
485 wrbuf_puts(w, tbuf);
486 wrbuf_putc(w, '\0');
487 return 1;
488 }
489 break;
492 if (sf->isam_p)
493 {
494
495 if (!sf->isam_pp)
496 sf->isam_pp = isamb_pp_open(sf->u.isamb, sf->isam_p, 1);
497 if (sf->isam_pp)
498 {
499 struct sort_term st, st_untilbuf;
500
501 st_untilbuf.sysno = si->sysno;
502 st_untilbuf.section_id = 0;
503 st_untilbuf.length = 0;
504 st_untilbuf.term[0] = '\0';
505 r = isamb_pp_forward(sf->isam_pp, &st, &st_untilbuf);
506 if (r && st.sysno == si->sysno)
507 {
508 wrbuf_write(w, st.term, st.length);
509 if (section_id)
511 return 1;
512 }
513 }
514 }
515 break;
516 }
517 return 0;
518}
519/*
520 * Local variables:
521 * c-basic-offset: 4
522 * c-file-style: "Stroustrup"
523 * indent-tabs-mode: nil
524 * End:
525 * vim: shiftwidth=4 tabstop=8 expandtab
526 */
527
Zebra Block File Layer.
int bf_read(BFile bf, zint no, int offset, int nbytes, void *buf)
read from block file (may call exit)
Definition bfile.c:205
void bf_close(BFile bf)
closes a Block file (may call exit)
Definition bfile.c:139
BFile bf_open(BFiles bfs, const char *name, int block_size, int wflag)
opens and returns a Block file handle
Definition bfile.c:150
int bf_write(BFile bf, zint no, int offset, int nbytes, const void *buf)
writes block of bytes to file (may call exit)
Definition bfile.c:232
ISAMB_PP isamb_pp_open(ISAMB isamb, ISAM_P pos, int scope)
Definition isamb.c:1387
zint isamb_get_root_ptr(ISAMB b)
Definition isamb.c:1669
void isamb_close(ISAMB isamb)
Definition isamb.c:455
void isamb_set_root_ptr(ISAMB b, zint root_ptr)
Definition isamb.c:1674
int isamb_pp_forward(ISAMB_PP pp, void *buf, const void *untilbuf)
Definition isamb.c:1525
ISAMB isamb_open2(BFiles bfs, const char *name, int writeflag, ISAMC_M *method, int cache, int no_cat, int *sizes, int use_root_ptr)
Definition isamb.c:205
void isamb_pp_close(ISAMB_PP pp)
Definition isamb.c:1429
void isamb_merge(ISAMB b, ISAM_P *pos, ISAMC_I *data)
Definition isamb.c:1266
zint ISAM_P
Definition isamc.h:28
zint rec_sysno_to_int(zint sysno)
Definition records.c:130
int zebra_sort_read(zebra_sort_index_t si, zint *section_id, WRBUF w)
reads sort entry
Definition sortidx.c:470
void zebra_sort_close(zebra_sort_index_t si)
frees sort handle
Definition sortidx.c:209
static void zebra_sortf_rewind(struct sortFile *sf)
Definition sortidx.c:331
static void sort_term_code_reset(void *p)
Definition sortidx.c:140
static int sort_term_compare(const void *a, const void *b)
Definition sortidx.c:58
zebra_sort_index_t zebra_sort_open(BFiles bfs, int write_flag, int type)
creates sort handle
Definition sortidx.c:197
int zebra_sort_type(zebra_sort_index_t si, int id)
sets type for sort usage
Definition sortidx.c:235
#define SORT_MAX_TERM
Definition sortidx.c:35
static void sort_term_code_stop(void *p)
Definition sortidx.c:144
static void sort_term_encode1(void *p, char **dst, const char **src)
Definition sortidx.c:82
#define SORT_MAX_MULTI
Definition sortidx.c:36
static void sort_term_log_item(int level, const void *b, const char *txt)
Definition sortidx.c:48
static int sort_term_code_read(void *vp, char **dst, int *insertMode)
Definition sortidx.c:154
#define SORT_IDX_BLOCKSIZE
Definition sortidx.c:38
static void sort_term_decode2(void *p, char **dst, const char **src)
Definition sortidx.c:125
static void * sort_term_code_start(void)
Definition sortidx.c:77
static void sort_term_decode1(void *p, char **dst, const char **src)
Definition sortidx.c:108
void zebra_sort_sysno(zebra_sort_index_t si, zint sysno)
sets sort system number for read / add / delete
Definition sortidx.c:340
void zebra_sort_delete(zebra_sort_index_t si, zint section_id)
delete sort entry
Definition sortidx.c:356
static void sort_term_encode2(void *p, char **dst, const char **src)
Definition sortidx.c:94
void zebra_sort_add(zebra_sort_index_t si, zint section_id, WRBUF wrbuf)
adds multi-map content to sort file
Definition sortidx.c:393
#define ZEBRA_SORT_TYPE_ISAMB
Definition sortidx.h:37
struct zebra_sort_index * zebra_sort_index_t
sort index handle
Definition sortidx.h:34
#define ZEBRA_SORT_TYPE_FLAT
Definition sortidx.h:36
#define ZEBRA_SORT_TYPE_MULTI
Definition sortidx.h:38
#define SORT_IDX_ENTRYSIZE
Definition sortidx.h:29
int(* compare_item)(const void *a, const void *b)
Definition isamc.h:43
ISAM_CODEC codec
Definition isamc.h:46
void(* log_item)(int logmask, const void *p, const char *txt)
Definition isamc.h:44
void(* decode)(void *p, char **dst, const char **src)
Definition isam-codec.h:26
void(* stop)(void *p)
Definition isam-codec.h:25
void *(* start)(void)
Definition isam-codec.h:24
void(* encode)(void *p, char **dst, const char **src)
Definition isam-codec.h:27
void(* reset)(void *p)
Definition isam-codec.h:28
zint sysno_max
Definition sortidx.c:170
ISAM_P isam_p
Definition sortidx.c:179
ISAMB_PP isam_pp
Definition sortidx.c:180
BFile bf
Definition sortidx.c:176
int no_inserted
Definition sortidx.c:183
struct sortFileHead head
Definition sortidx.c:182
int no_deleted
Definition sortidx.c:184
int id
Definition sortidx.c:174
union sortFile::@25 u
struct sortFile * next
Definition sortidx.c:181
ISAMB isamb
Definition sortidx.c:177
struct sort_term st
Definition sortidx.c:151
zint section_id
Definition sortidx.c:42
zint sysno
Definition sortidx.c:41
char term[SORT_MAX_MULTI]
Definition sortidx.c:44
zint length
Definition sortidx.c:43
char * entry_buf
Definition sortidx.c:192
struct sortFile * current_file
Definition sortidx.c:193
struct sortFile * files
Definition sortidx.c:194
long zint
Zebra integer.
Definition util.h:66
void zebra_zint_decode(const char **src, zint *pos)
Definition zint.c:39
#define ZINT_FORMAT
Definition util.h:72
void zebra_zint_encode(char **dst, zint pos)
Definition zint.c:26