IDZEBRA 2.2.8
insert.c
Go to the documentation of this file.
1/* This file is part of the Zebra server.
2 Copyright (C) Index Data
3
4Zebra is free software; you can redistribute it and/or modify it under
5the terms of the GNU General Public License as published by the Free
6Software Foundation; either version 2, or (at your option) any later
7version.
8
9Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
10WARRANTY; without even the implied warranty of MERCHANTABILITY or
11FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12for more details.
13
14You should have received a copy of the GNU General Public License
15along with this program; if not, write to the Free Software
16Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17
18*/
19
20
21
22#if HAVE_CONFIG_H
23#include <config.h>
24#endif
25#include <string.h>
26#include <stdlib.h>
27#include <stdio.h>
28#include <assert.h>
29
30#include "dict-p.h"
31
32static int dict_ins(Dict dict, const Dict_char *str,
33 Dict_ptr back_ptr, int userlen, void *userinfo);
34static void clean_page(Dict dict, Dict_ptr ptr, void *p, Dict_char *out,
35 Dict_ptr subptr, char *userinfo);
36static void checkPage(Dict dict, void *p);
37
38static Dict_ptr new_page(Dict dict, Dict_ptr back_ptr, void **pp)
39{
40 void *p;
41 Dict_ptr ptr = dict->head.last;
42 if (!dict->head.freelist)
43 {
45 (dict->head.last)++;
46 }
47 else
48 {
49 ptr = dict->head.freelist;
50 dict_bf_readp(dict->dbf, ptr, &p);
52 }
53 assert(p);
54 DICT_type(p) = 0;
55 DICT_backptr(p) = back_ptr;
56 DICT_nodir(p) = 0;
59 if (pp)
60 *pp = p;
61 return ptr;
62}
63
64static int split_page(Dict dict, Dict_ptr ptr, void *p)
65{
66 void *subp;
67 char *info_here;
68 Dict_ptr subptr;
69 int i, j;
70 short *indxp, *best_indxp = NULL;
71 Dict_char best_char = 0;
72 Dict_char prev_char = 0;
73 int best_no = 0, no_current = 0;
74 int current_size = 0;
75 int best_size = 0;
76
77 checkPage(dict, p);
78 dict->no_split++;
79 /* determine splitting char... */
80 indxp = (short*) ((char*) p+DICT_bsize(p)-sizeof(short));
81 for (i = DICT_nodir(p); --i >= 0; --indxp)
82 {
83 if (*indxp > 0) /* tail string here! */
84 {
85 const Dict_char *term = (Dict_char*) p + *indxp;
86 Dict_char dc = *term;
87 if (dc != prev_char) {
88 current_size = 0;
89 no_current = 0;
90 prev_char = dc;
91 }
92 no_current++;
93 current_size += 2 + dict_strlen(term);
94 if (current_size > best_size) {
95 best_size = current_size;
96 best_char = dc;
97 best_indxp = indxp;
98 best_no = no_current;
99 }
100 }
101 }
102 assert(best_no > 0); /* we didn't find any tail string entry at all! */
103
104 j = best_indxp - (short*) p;
105 i = DICT_nodir(p);
106 subptr = new_page(dict, ptr, &subp);
107 assert(i == DICT_nodir(p));
108 /* scan entries to see if there is a string with */
109 /* length 1. info_here indicates if such entry exist */
110 info_here = NULL;
111 char info_char = 0;
112 for (i=0; i<best_no; i++, j++)
113 {
114 char *info, *info1;
115 int slen;
116 Dict_char dc;
117
118 info = (char*) p + ((short*) p)[j];
119 /* entry start */
120 memcpy(&dc, info, sizeof(dc));
121 assert(dc == best_char);
122 slen = 1+dict_strlen((Dict_char*) info);
123
124 assert(slen > 1);
125 if (slen == 2)
126 {
127 assert(!info_here);
128 info_here = info+slen*sizeof(Dict_char);
129 info_char = *info_here;
130 }
131 else
132 {
133 info1 = info+slen*sizeof(Dict_char); /* info start */
134 dict_ins(dict, (Dict_char*) (info+sizeof(Dict_char)),
135 subptr, *info1, info1+1);
136 dict_bf_readp(dict->dbf, ptr, &p);
137 assert(info_here == NULL);
138 }
139 }
140 assert(info_here == NULL || info_char == *info_here);
141 /* now clean the page ... */
142 checkPage(dict, p);
143 clean_page(dict, ptr, p, &best_char, subptr, info_here);
144 return 0;
145}
146
147static void clean_page(Dict dict, Dict_ptr ptr, void *p, Dict_char *out,
148 Dict_ptr subptr, char *userinfo)
149{
150 char *np = (char *) xmalloc(dict->head.page_size);
151 int i, slen, no = 0;
152 short *indxp1, *indxp2;
153 char *info1, *info2;
154
155 checkPage(dict, p);
157 indxp1 = (short*) ((char*) p+DICT_bsize(p)-sizeof(short));
158 indxp2 = (short*) ((char*) np+DICT_bsize(np));
159 info2 = (char*) np + DICT_infoffset;
160 for (i = DICT_nodir(p); --i >= 0; --indxp1)
161 {
162 if (*indxp1 > 0) /* tail string here! */
163 {
164 /* string (Dict_char *) DICT_EOS terminated */
165 /* unsigned char length of information */
166 /* char * information */
167
168 info1 = (char*) p + *indxp1;
169 if (out && memcmp(out, info1, sizeof(Dict_char)) == 0)
170 {
171 if (subptr == 0)
172 continue;
173 *--indxp2 = -(info2 - np);
174 memcpy(info2, &subptr, sizeof(Dict_ptr));
175 info2 += sizeof(Dict_ptr);
176 memcpy(info2, out, sizeof(Dict_char));
177 info2 += sizeof(Dict_char);
178 if (userinfo)
179 {
180 memcpy(info2, userinfo, *userinfo+1);
181 info2 += *userinfo + 1;
182 }
183 else
184 *info2++ = 0;
185 subptr = 0;
186 ++no;
187 continue;
188 }
189 *--indxp2 = info2 - np;
190 slen = (dict_strlen((Dict_char*) info1)+1)*sizeof(Dict_char);
191 memcpy(info2, info1, slen);
192 info1 += slen;
193 info2 += slen;
194 }
195 else
196 {
197 /* Dict_ptr subptr */
198 /* Dict_char sub char */
199 /* unsigned char length of information */
200 /* char * information */
201
202 assert(*indxp1 < 0);
203 *--indxp2 = -(info2 - np);
204 info1 = (char*) p - *indxp1;
205 memcpy(info2, info1, sizeof(Dict_ptr)+sizeof(Dict_char));
206 info1 += sizeof(Dict_ptr)+sizeof(Dict_char);
207 info2 += sizeof(Dict_ptr)+sizeof(Dict_char);
208 }
209 slen = *info1+1;
210 memcpy(info2, info1, slen);
211 info2 += slen;
212 ++no;
213 }
214#if 1
215 memcpy((char*)p+DICT_infoffset,
216 (char*)np+DICT_infoffset,
217 info2 - ((char*)np+DICT_infoffset));
218 memcpy((char*)p + ((char*)indxp2 - (char*)np),
219 indxp2,
220 ((char*) np+DICT_bsize(p)) - (char*)indxp2);
221#else
222 memcpy((char*)p+DICT_infoffset, (char*)np+DICT_infoffset,
223 DICT_pagesize(dict)-DICT_infoffset);
224#endif
225 DICT_size(p) = info2 - np;
226 DICT_type(p) = 0;
227 DICT_nodir(p) = no;
228 checkPage(dict, p);
229 xfree(np);
230 dict_bf_touch(dict->dbf, ptr);
231}
232
233static void checkPage(Dict dict, void *p)
234{
235 int check = DICT_size(p) <= (int)(DICT_bsize(p) - (DICT_nodir(p))*sizeof(short));
236 if (!check) {
237 yaz_log(YLOG_LOG, "checkPage failed");
238 yaz_log(YLOG_LOG, "DICT_size(p)=%ld", (long) DICT_size(p));
239 yaz_log(YLOG_LOG, "DICT_bsize(p)=%ld", (long) DICT_bsize(p));
240 yaz_log(YLOG_LOG, "DICT_nodir(p)=%ld", (long) DICT_nodir(p));
241 }
242 assert(check);
243}
244
245/* return 0 if new */
246/* return 1 if before but change of info */
247/* return 2 if same as before */
248
249static int dict_ins(Dict dict, const Dict_char *str,
250 Dict_ptr ptr, int userlen, void *userinfo)
251{
252 int hi, lo, mid, slen, cmp = 1;
253 short *indxp;
254 char *info;
255 void *p;
256
257 dict_bf_readp(dict->dbf, ptr, &p);
258
259 assert(p);
260 assert(ptr);
261
262 mid = lo = 0;
263 hi = DICT_nodir(p)-1;
264 indxp = (short*) ((char*) p+DICT_bsize(p)-sizeof(short));
265 while (lo <= hi)
266 {
267 mid = (lo+hi)/2;
268 if (indxp[-mid] > 0)
269 {
270 /* string (Dict_char *) DICT_EOS terminated */
271 /* unsigned char length of information */
272 /* char * information */
273 info = (char*)p + indxp[-mid];
274 cmp = dict_strcmp((Dict_char*) info, str);
275 if (!cmp)
276 {
277 info += (dict_strlen((Dict_char*) info)+1)*sizeof(Dict_char);
278 /* consider change of userinfo length... */
279 if (*info == userlen)
280 {
281 /* change of userinfo ? */
282 if (memcmp(info+1, userinfo, userlen))
283 {
284 dict_bf_touch(dict->dbf, ptr);
285 memcpy(info+1, userinfo, userlen);
286 return 1;
287 }
288 /* same userinfo */
289 return 2;
290 }
291 else if (*info > userlen)
292 {
293 /* room for new userinfo */
294 DICT_type(p) = 1;
295 *info = userlen;
296 dict_bf_touch(dict->dbf, ptr);
297 memcpy(info+1, userinfo, userlen);
298 return 1;
299 }
300 break;
301 }
302 }
303 else
304 {
305 Dict_char dc;
306 Dict_ptr subptr;
307
308 /* Dict_ptr subptr */
309 /* Dict_char sub char */
310 /* unsigned char length of information */
311 /* char * information */
312 info = (char*)p - indxp[-mid];
313 memcpy(&dc, info+sizeof(Dict_ptr), sizeof(Dict_char));
314 cmp = dc- *str;
315 if (!cmp)
316 {
317 memcpy(&subptr, info, sizeof(Dict_ptr));
318 if (*++str == DICT_EOS)
319 {
320 /* finish of string. Store userinfo here... */
321
322 int xlen = info[sizeof(Dict_ptr)+sizeof(Dict_char)];
323 if (xlen == userlen)
324 {
325 if (memcmp(info+sizeof(Dict_ptr)+sizeof(Dict_char)+1,
326 userinfo, userlen))
327 {
328 dict_bf_touch(dict->dbf, ptr);
329 memcpy(info+sizeof(Dict_ptr)+sizeof(Dict_char)+1,
330 userinfo, userlen);
331 return 1;
332 }
333 return 2;
334 }
335 else if (xlen > userlen)
336 {
337 DICT_type(p) = 1;
338 info[sizeof(Dict_ptr)+sizeof(Dict_char)] = userlen;
339 memcpy(info+sizeof(Dict_ptr)+sizeof(Dict_char)+1,
340 userinfo, userlen);
341 dict_bf_touch(dict->dbf, ptr);
342 return 1;
343 }
344 /* xlen < userlen, expanding needed ... */
345 if (DICT_size(p)+sizeof(Dict_char)+sizeof(Dict_ptr)+
346 userlen >=
347 DICT_bsize(p) - (1+DICT_nodir(p))*sizeof(short))
348 {
349 /* not enough room - split needed ... */
350 if (DICT_type(p) == 1)
351 {
352 clean_page(dict, ptr, p, NULL, 0, NULL);
353 return dict_ins(dict, str-1, ptr,
354 userlen, userinfo);
355 }
356 if (split_page(dict, ptr, p))
357 {
358 yaz_log(YLOG_FATAL, "Unable to split page %d\n", ptr);
359 assert(0);
360 }
361 return dict_ins(dict, str-1, ptr, userlen, userinfo);
362 }
363 else
364 { /* enough room - no split needed ... */
365 info = (char*)p + DICT_size(p);
366 memcpy(info, &subptr, sizeof(subptr));
367 memcpy(info+sizeof(Dict_ptr), &dc, sizeof(Dict_char));
368 info[sizeof(Dict_char)+sizeof(Dict_ptr)] = userlen;
369 memcpy(info+sizeof(Dict_char)+sizeof(Dict_ptr)+1,
370 userinfo, userlen);
371 indxp[-mid] = -DICT_size(p);
372 DICT_size(p) += sizeof(Dict_char)+sizeof(Dict_ptr)
373 +1+userlen;
374 DICT_type(p) = 1;
375 dict_bf_touch(dict->dbf, ptr);
376 }
377 if (xlen)
378 return 1;
379 return 0;
380 }
381 else
382 {
383 if (subptr == 0)
384 {
385 subptr = new_page(dict, ptr, NULL);
386 memcpy(info, &subptr, sizeof(subptr));
387 dict_bf_touch(dict->dbf, ptr);
388 }
389 return dict_ins(dict, str, subptr, userlen, userinfo);
390 }
391 }
392 }
393 if (cmp < 0)
394 lo = mid+1;
395 else
396 hi = mid-1;
397 }
398 indxp = indxp-mid;
399 if (lo>hi && cmp < 0)
400 --indxp;
401 slen = (dict_strlen(str)+1)*sizeof(Dict_char);
402 if (DICT_size(p)+slen+userlen >=
403 (int)(DICT_bsize(p) - (1+DICT_nodir(p))*sizeof(short)))/* overflow? */
404 {
405 if (DICT_type(p))
406 {
407 clean_page(dict, ptr, p, NULL, 0, NULL);
408 return dict_ins(dict, str, ptr, userlen, userinfo);
409 }
410 split_page(dict, ptr, p);
411 return dict_ins(dict, str, ptr, userlen, userinfo);
412 }
413 if (cmp)
414 {
415 short *indxp1;
416 (DICT_nodir(p))++;
417 indxp1 = (short*)((char*) p + DICT_bsize(p)
418 - DICT_nodir(p)*sizeof(short));
419 for (; indxp1 != indxp; indxp1++)
420 indxp1[0] = indxp1[1];
421 }
422 else
423 DICT_type(p) = 1;
424 info = (char*)p + DICT_size(p);
425 memcpy(info, str, slen);
426 info += slen;
427 *info++ = userlen;
428 memcpy(info, userinfo, userlen);
429 info += userlen;
430
431 *indxp = DICT_size(p);
432 DICT_size(p) = info- (char*) p;
433 dict_bf_touch(dict->dbf, ptr);
434 if (cmp)
435 return 0;
436 return 1;
437}
438
439int dict_insert(Dict dict, const char *str, int userlen, void *userinfo)
440{
441 if (!dict->rw)
442 return -1;
443 dict->no_insert++;
444 if (!dict->head.root)
445 {
446 void *p;
447 dict->head.root = new_page(dict, 0, &p);
448 if (!dict->head.root)
449 return -1;
450 }
451 assert(strlen(str) > 0);
452 return dict_ins(dict, (const Dict_char *) str, dict->head.root,
453 userlen, userinfo);
454}
455
456/*
457 * Local variables:
458 * c-basic-offset: 4
459 * c-file-style: "Stroustrup"
460 * indent-tabs-mode: nil
461 * End:
462 * vim: shiftwidth=4 tabstop=8 expandtab
463 */
464
#define DICT_type(x)
Definition dict-p.h:103
unsigned Dict_ptr
Definition dict-p.h:34
#define DICT_EOS
Definition dict-p.h:102
int dict_bf_touch(Dict_BFile bf, int no)
Definition drdwr.c:244
#define DICT_backptr(x)
Definition dict-p.h:104
int dict_bf_readp(Dict_BFile bf, int no, void **bufp)
Definition drdwr.c:188
#define DICT_infoffset
Definition dict-p.h:108
int dict_strlen(const Dict_char *s)
Definition open.c:118
int dict_strcmp(const Dict_char *s1, const Dict_char *s2)
Definition open.c:108
#define DICT_bsize(x)
Definition dict-p.h:105
unsigned char Dict_char
Definition dict-p.h:33
#define DICT_size(x)
Definition dict-p.h:107
int dict_bf_newp(Dict_BFile bf, int no, void **bufp, int nbytes)
Definition drdwr.c:226
#define DICT_nodir(x)
Definition dict-p.h:106
static Dict dict
Definition dicttest.c:35
static Dict_ptr new_page(Dict dict, Dict_ptr back_ptr, void **pp)
Definition insert.c:38
static void checkPage(Dict dict, void *p)
Definition insert.c:233
static int dict_ins(Dict dict, const Dict_char *str, Dict_ptr back_ptr, int userlen, void *userinfo)
Definition insert.c:249
static int split_page(Dict dict, Dict_ptr ptr, void *p)
Definition insert.c:64
int dict_insert(Dict dict, const char *str, int userlen, void *userinfo)
insert item into dictionary
Definition insert.c:439
static void clean_page(Dict dict, Dict_ptr ptr, void *p, Dict_char *out, Dict_ptr subptr, char *userinfo)
Definition insert.c:147
int page_size
Definition dict-p.h:38
Dict_ptr root
Definition dict-p.h:40
Dict_ptr freelist
Definition dict-p.h:40
Dict_ptr last
Definition dict-p.h:40
zint no_split
Definition dict-p.h:78
struct Dict_head head
Definition dict-p.h:83
zint no_insert
Definition dict-p.h:80
Dict_BFile dbf
Definition dict-p.h:74