IDZEBRA  2.2.7
insert.c
Go to the documentation of this file.
1 /* This file is part of the Zebra server.
2  Copyright (C) Index Data
3 
4 Zebra is free software; you can redistribute it and/or modify it under
5 the terms of the GNU General Public License as published by the Free
6 Software Foundation; either version 2, or (at your option) any later
7 version.
8 
9 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
10 WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 for more details.
13 
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 
18 */
19 
20 
21 
22 #if HAVE_CONFIG_H
23 #include <config.h>
24 #endif
25 #include <string.h>
26 #include <stdlib.h>
27 #include <stdio.h>
28 #include <assert.h>
29 
30 #include "dict-p.h"
31 
32 static int dict_ins(Dict dict, const Dict_char *str,
33  Dict_ptr back_ptr, int userlen, void *userinfo);
34 static void clean_page(Dict dict, Dict_ptr ptr, void *p, Dict_char *out,
35  Dict_ptr subptr, char *userinfo);
36 static void checkPage(Dict dict, void *p);
37 
38 static Dict_ptr new_page(Dict dict, Dict_ptr back_ptr, void **pp)
39 {
40  void *p;
41  Dict_ptr ptr = dict->head.last;
42  if (!dict->head.freelist)
43  {
45  (dict->head.last)++;
46  }
47  else
48  {
49  ptr = dict->head.freelist;
50  dict_bf_readp(dict->dbf, ptr, &p);
52  }
53  assert(p);
54  DICT_type(p) = 0;
55  DICT_backptr(p) = back_ptr;
56  DICT_nodir(p) = 0;
59  if (pp)
60  *pp = p;
61  return ptr;
62 }
63 
64 static int split_page(Dict dict, Dict_ptr ptr, void *p)
65 {
66  void *subp;
67  char *info_here;
68  Dict_ptr subptr;
69  int i, j;
70  short *indxp, *best_indxp = NULL;
71  Dict_char best_char = 0;
72  Dict_char prev_char = 0;
73  int best_no = 0, no_current = 0;
74  int current_size = 0;
75  int best_size = 0;
76 
77  checkPage(dict, p);
78  dict->no_split++;
79  /* determine splitting char... */
80  indxp = (short*) ((char*) p+DICT_bsize(p)-sizeof(short));
81  for (i = DICT_nodir(p); --i >= 0; --indxp)
82  {
83  if (*indxp > 0) /* tail string here! */
84  {
85  const Dict_char *term = (Dict_char*) p + *indxp;
86  Dict_char dc = *term;
87  if (dc != prev_char) {
88  current_size = 0;
89  no_current = 0;
90  prev_char = dc;
91  }
92  no_current++;
93  current_size += 2 + dict_strlen(term);
94  if (current_size > best_size) {
95  best_size = current_size;
96  best_char = dc;
97  best_indxp = indxp;
98  best_no = no_current;
99  }
100  }
101  }
102  assert(best_no > 0); /* we didn't find any tail string entry at all! */
103 
104  j = best_indxp - (short*) p;
105  i = DICT_nodir(p);
106  subptr = new_page(dict, ptr, &subp);
107  assert(i == DICT_nodir(p));
108  /* scan entries to see if there is a string with */
109  /* length 1. info_here indicates if such entry exist */
110  info_here = NULL;
111  char info_char = 0;
112  for (i=0; i<best_no; i++, j++)
113  {
114  char *info, *info1;
115  int slen;
116  Dict_char dc;
117 
118  info = (char*) p + ((short*) p)[j];
119  /* entry start */
120  memcpy(&dc, info, sizeof(dc));
121  assert(dc == best_char);
122  slen = 1+dict_strlen((Dict_char*) info);
123 
124  assert(slen > 1);
125  if (slen == 2)
126  {
127  assert(!info_here);
128  info_here = info+slen*sizeof(Dict_char);
129  info_char = *info_here;
130  }
131  else
132  {
133  info1 = info+slen*sizeof(Dict_char); /* info start */
134  dict_ins(dict, (Dict_char*) (info+sizeof(Dict_char)),
135  subptr, *info1, info1+1);
136  dict_bf_readp(dict->dbf, ptr, &p);
137  assert(info_here == NULL);
138  }
139  }
140  assert(info_here == NULL || info_char == *info_here);
141  /* now clean the page ... */
142  checkPage(dict, p);
143  clean_page(dict, ptr, p, &best_char, subptr, info_here);
144  return 0;
145 }
146 
147 static void clean_page(Dict dict, Dict_ptr ptr, void *p, Dict_char *out,
148  Dict_ptr subptr, char *userinfo)
149 {
150  char *np = (char *) xmalloc(dict->head.page_size);
151  int i, slen, no = 0;
152  short *indxp1, *indxp2;
153  char *info1, *info2;
154 
155  checkPage(dict, p);
156  DICT_bsize(np) = dict->head.page_size;
157  indxp1 = (short*) ((char*) p+DICT_bsize(p)-sizeof(short));
158  indxp2 = (short*) ((char*) np+DICT_bsize(np));
159  info2 = (char*) np + DICT_infoffset;
160  for (i = DICT_nodir(p); --i >= 0; --indxp1)
161  {
162  if (*indxp1 > 0) /* tail string here! */
163  {
164  /* string (Dict_char *) DICT_EOS terminated */
165  /* unsigned char length of information */
166  /* char * information */
167 
168  info1 = (char*) p + *indxp1;
169  if (out && memcmp(out, info1, sizeof(Dict_char)) == 0)
170  {
171  if (subptr == 0)
172  continue;
173  *--indxp2 = -(info2 - np);
174  memcpy(info2, &subptr, sizeof(Dict_ptr));
175  info2 += sizeof(Dict_ptr);
176  memcpy(info2, out, sizeof(Dict_char));
177  info2 += sizeof(Dict_char);
178  if (userinfo)
179  {
180  memcpy(info2, userinfo, *userinfo+1);
181  info2 += *userinfo + 1;
182  }
183  else
184  *info2++ = 0;
185  subptr = 0;
186  ++no;
187  continue;
188  }
189  *--indxp2 = info2 - np;
190  slen = (dict_strlen((Dict_char*) info1)+1)*sizeof(Dict_char);
191  memcpy(info2, info1, slen);
192  info1 += slen;
193  info2 += slen;
194  }
195  else
196  {
197  /* Dict_ptr subptr */
198  /* Dict_char sub char */
199  /* unsigned char length of information */
200  /* char * information */
201 
202  assert(*indxp1 < 0);
203  *--indxp2 = -(info2 - np);
204  info1 = (char*) p - *indxp1;
205  memcpy(info2, info1, sizeof(Dict_ptr)+sizeof(Dict_char));
206  info1 += sizeof(Dict_ptr)+sizeof(Dict_char);
207  info2 += sizeof(Dict_ptr)+sizeof(Dict_char);
208  }
209  slen = *info1+1;
210  memcpy(info2, info1, slen);
211  info2 += slen;
212  ++no;
213  }
214 #if 1
215  memcpy((char*)p+DICT_infoffset,
216  (char*)np+DICT_infoffset,
217  info2 - ((char*)np+DICT_infoffset));
218  memcpy((char*)p + ((char*)indxp2 - (char*)np),
219  indxp2,
220  ((char*) np+DICT_bsize(p)) - (char*)indxp2);
221 #else
222  memcpy((char*)p+DICT_infoffset, (char*)np+DICT_infoffset,
223  DICT_pagesize(dict)-DICT_infoffset);
224 #endif
225  DICT_size(p) = info2 - np;
226  DICT_type(p) = 0;
227  DICT_nodir(p) = no;
228  checkPage(dict, p);
229  xfree(np);
230  dict_bf_touch(dict->dbf, ptr);
231 }
232 
233 static void checkPage(Dict dict, void *p)
234 {
235  int check = DICT_size(p) <= (int)(DICT_bsize(p) - (DICT_nodir(p))*sizeof(short));
236  if (!check) {
237  yaz_log(YLOG_LOG, "checkPage failed");
238  yaz_log(YLOG_LOG, "DICT_size(p)=%ld", (long) DICT_size(p));
239  yaz_log(YLOG_LOG, "DICT_bsize(p)=%ld", (long) DICT_bsize(p));
240  yaz_log(YLOG_LOG, "DICT_nodir(p)=%ld", (long) DICT_nodir(p));
241  }
242  assert(check);
243 }
244 
245 /* return 0 if new */
246 /* return 1 if before but change of info */
247 /* return 2 if same as before */
248 
249 static int dict_ins(Dict dict, const Dict_char *str,
250  Dict_ptr ptr, int userlen, void *userinfo)
251 {
252  int hi, lo, mid, slen, cmp = 1;
253  short *indxp;
254  char *info;
255  void *p;
256 
257  dict_bf_readp(dict->dbf, ptr, &p);
258 
259  assert(p);
260  assert(ptr);
261 
262  mid = lo = 0;
263  hi = DICT_nodir(p)-1;
264  indxp = (short*) ((char*) p+DICT_bsize(p)-sizeof(short));
265  while (lo <= hi)
266  {
267  mid = (lo+hi)/2;
268  if (indxp[-mid] > 0)
269  {
270  /* string (Dict_char *) DICT_EOS terminated */
271  /* unsigned char length of information */
272  /* char * information */
273  info = (char*)p + indxp[-mid];
274  cmp = dict_strcmp((Dict_char*) info, str);
275  if (!cmp)
276  {
277  info += (dict_strlen((Dict_char*) info)+1)*sizeof(Dict_char);
278  /* consider change of userinfo length... */
279  if (*info == userlen)
280  {
281  /* change of userinfo ? */
282  if (memcmp(info+1, userinfo, userlen))
283  {
284  dict_bf_touch(dict->dbf, ptr);
285  memcpy(info+1, userinfo, userlen);
286  return 1;
287  }
288  /* same userinfo */
289  return 2;
290  }
291  else if (*info > userlen)
292  {
293  /* room for new userinfo */
294  DICT_type(p) = 1;
295  *info = userlen;
296  dict_bf_touch(dict->dbf, ptr);
297  memcpy(info+1, userinfo, userlen);
298  return 1;
299  }
300  break;
301  }
302  }
303  else
304  {
305  Dict_char dc;
306  Dict_ptr subptr;
307 
308  /* Dict_ptr subptr */
309  /* Dict_char sub char */
310  /* unsigned char length of information */
311  /* char * information */
312  info = (char*)p - indxp[-mid];
313  memcpy(&dc, info+sizeof(Dict_ptr), sizeof(Dict_char));
314  cmp = dc- *str;
315  if (!cmp)
316  {
317  memcpy(&subptr, info, sizeof(Dict_ptr));
318  if (*++str == DICT_EOS)
319  {
320  /* finish of string. Store userinfo here... */
321 
322  int xlen = info[sizeof(Dict_ptr)+sizeof(Dict_char)];
323  if (xlen == userlen)
324  {
325  if (memcmp(info+sizeof(Dict_ptr)+sizeof(Dict_char)+1,
326  userinfo, userlen))
327  {
328  dict_bf_touch(dict->dbf, ptr);
329  memcpy(info+sizeof(Dict_ptr)+sizeof(Dict_char)+1,
330  userinfo, userlen);
331  return 1;
332  }
333  return 2;
334  }
335  else if (xlen > userlen)
336  {
337  DICT_type(p) = 1;
338  info[sizeof(Dict_ptr)+sizeof(Dict_char)] = userlen;
339  memcpy(info+sizeof(Dict_ptr)+sizeof(Dict_char)+1,
340  userinfo, userlen);
341  dict_bf_touch(dict->dbf, ptr);
342  return 1;
343  }
344  /* xlen < userlen, expanding needed ... */
345  if (DICT_size(p)+sizeof(Dict_char)+sizeof(Dict_ptr)+
346  userlen >=
347  DICT_bsize(p) - (1+DICT_nodir(p))*sizeof(short))
348  {
349  /* not enough room - split needed ... */
350  if (DICT_type(p) == 1)
351  {
352  clean_page(dict, ptr, p, NULL, 0, NULL);
353  return dict_ins(dict, str-1, ptr,
354  userlen, userinfo);
355  }
356  if (split_page(dict, ptr, p))
357  {
358  yaz_log(YLOG_FATAL, "Unable to split page %d\n", ptr);
359  assert(0);
360  }
361  return dict_ins(dict, str-1, ptr, userlen, userinfo);
362  }
363  else
364  { /* enough room - no split needed ... */
365  info = (char*)p + DICT_size(p);
366  memcpy(info, &subptr, sizeof(subptr));
367  memcpy(info+sizeof(Dict_ptr), &dc, sizeof(Dict_char));
368  info[sizeof(Dict_char)+sizeof(Dict_ptr)] = userlen;
369  memcpy(info+sizeof(Dict_char)+sizeof(Dict_ptr)+1,
370  userinfo, userlen);
371  indxp[-mid] = -DICT_size(p);
372  DICT_size(p) += sizeof(Dict_char)+sizeof(Dict_ptr)
373  +1+userlen;
374  DICT_type(p) = 1;
375  dict_bf_touch(dict->dbf, ptr);
376  }
377  if (xlen)
378  return 1;
379  return 0;
380  }
381  else
382  {
383  if (subptr == 0)
384  {
385  subptr = new_page(dict, ptr, NULL);
386  memcpy(info, &subptr, sizeof(subptr));
387  dict_bf_touch(dict->dbf, ptr);
388  }
389  return dict_ins(dict, str, subptr, userlen, userinfo);
390  }
391  }
392  }
393  if (cmp < 0)
394  lo = mid+1;
395  else
396  hi = mid-1;
397  }
398  indxp = indxp-mid;
399  if (lo>hi && cmp < 0)
400  --indxp;
401  slen = (dict_strlen(str)+1)*sizeof(Dict_char);
402  if (DICT_size(p)+slen+userlen >=
403  (int)(DICT_bsize(p) - (1+DICT_nodir(p))*sizeof(short)))/* overflow? */
404  {
405  if (DICT_type(p))
406  {
407  clean_page(dict, ptr, p, NULL, 0, NULL);
408  return dict_ins(dict, str, ptr, userlen, userinfo);
409  }
410  split_page(dict, ptr, p);
411  return dict_ins(dict, str, ptr, userlen, userinfo);
412  }
413  if (cmp)
414  {
415  short *indxp1;
416  (DICT_nodir(p))++;
417  indxp1 = (short*)((char*) p + DICT_bsize(p)
418  - DICT_nodir(p)*sizeof(short));
419  for (; indxp1 != indxp; indxp1++)
420  indxp1[0] = indxp1[1];
421  }
422  else
423  DICT_type(p) = 1;
424  info = (char*)p + DICT_size(p);
425  memcpy(info, str, slen);
426  info += slen;
427  *info++ = userlen;
428  memcpy(info, userinfo, userlen);
429  info += userlen;
430 
431  *indxp = DICT_size(p);
432  DICT_size(p) = info- (char*) p;
433  dict_bf_touch(dict->dbf, ptr);
434  if (cmp)
435  return 0;
436  return 1;
437 }
438 
439 int dict_insert(Dict dict, const char *str, int userlen, void *userinfo)
440 {
441  if (!dict->rw)
442  return -1;
443  dict->no_insert++;
444  if (!dict->head.root)
445  {
446  void *p;
447  dict->head.root = new_page(dict, 0, &p);
448  if (!dict->head.root)
449  return -1;
450  }
451  assert(strlen(str) > 0);
452  return dict_ins(dict, (const Dict_char *) str, dict->head.root,
453  userlen, userinfo);
454 }
455 
456 /*
457  * Local variables:
458  * c-basic-offset: 4
459  * c-file-style: "Stroustrup"
460  * indent-tabs-mode: nil
461  * End:
462  * vim: shiftwidth=4 tabstop=8 expandtab
463  */
464 
#define DICT_type(x)
Definition: dict-p.h:103
unsigned Dict_ptr
Definition: dict-p.h:34
#define DICT_EOS
Definition: dict-p.h:102
int dict_bf_touch(Dict_BFile bf, int no)
Definition: drdwr.c:244
#define DICT_backptr(x)
Definition: dict-p.h:104
int dict_bf_readp(Dict_BFile bf, int no, void **bufp)
Definition: drdwr.c:188
#define DICT_infoffset
Definition: dict-p.h:108
int dict_strlen(const Dict_char *s)
Definition: open.c:118
int dict_strcmp(const Dict_char *s1, const Dict_char *s2)
Definition: open.c:108
#define DICT_bsize(x)
Definition: dict-p.h:105
unsigned char Dict_char
Definition: dict-p.h:33
#define DICT_size(x)
Definition: dict-p.h:107
int dict_bf_newp(Dict_BFile bf, int no, void **bufp, int nbytes)
Definition: drdwr.c:226
#define DICT_nodir(x)
Definition: dict-p.h:106
static Dict dict
Definition: dicttest.c:35
static Dict_ptr new_page(Dict dict, Dict_ptr back_ptr, void **pp)
Definition: insert.c:38
static void checkPage(Dict dict, void *p)
Definition: insert.c:233
static int dict_ins(Dict dict, const Dict_char *str, Dict_ptr back_ptr, int userlen, void *userinfo)
Definition: insert.c:249
static int split_page(Dict dict, Dict_ptr ptr, void *p)
Definition: insert.c:64
int dict_insert(Dict dict, const char *str, int userlen, void *userinfo)
insert item into dictionary
Definition: insert.c:439
static void clean_page(Dict dict, Dict_ptr ptr, void *p, Dict_char *out, Dict_ptr subptr, char *userinfo)
Definition: insert.c:147
int page_size
Definition: dict-p.h:38
Dict_ptr root
Definition: dict-p.h:40
Dict_ptr freelist
Definition: dict-p.h:40
Dict_ptr last
Definition: dict-p.h:40
zint no_split
Definition: dict-p.h:78
int rw
Definition: dict-p.h:73
struct Dict_head head
Definition: dict-p.h:83
zint no_insert
Definition: dict-p.h:80
Dict_BFile dbf
Definition: dict-p.h:74