IDZEBRA  2.2.7
lookupec.c
Go to the documentation of this file.
1 /* This file is part of the Zebra server.
2  Copyright (C) Index Data
3 
4 Zebra is free software; you can redistribute it and/or modify it under
5 the terms of the GNU General Public License as published by the Free
6 Software Foundation; either version 2, or (at your option) any later
7 version.
8 
9 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
10 WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 for more details.
13 
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 
18 */
19 
20 #if HAVE_CONFIG_H
21 #include <config.h>
22 #endif
23 #include <stdlib.h>
24 #include <string.h>
25 #include <stdio.h>
26 #include <assert.h>
27 
28 #include "dict-p.h"
29 
30 typedef unsigned MatchWord;
31 
32 typedef struct {
34  int m;
35 } MatchInfo;
36 
37 #define SH(x) (((x)<<1)+1)
38 
39 static int lookup_ec(Dict dict, Dict_ptr ptr,
40  MatchInfo *mi, MatchWord *ri_base,
41  int pos, int (*userfunc)(char *), int range,
42  Dict_char *prefix)
43 {
44  int lo, hi;
45  void *p;
46  short *indxp;
47  char *info;
48  MatchWord match_mask = 1<<(mi->m-1);
49 
50  dict_bf_readp(dict->dbf, ptr, &p);
51  lo = 0;
52  hi = DICT_nodir(p)-1;
53  indxp = (short*) ((char*) p+DICT_bsize(p)-sizeof(short));
54  while (lo <= hi)
55  {
56  if (indxp[-lo] > 0)
57  {
58  /* string (Dict_char *) DICT_EOS terminated */
59  /* unsigned char length of information */
60  /* char * information */
61  MatchWord *ri = ri_base, sc;
62  int i, j;
63  info = (char*)p + indxp[-lo];
64  for (j=0; ; j++)
65  {
66  Dict_char ch;
67 
68  memcpy(&ch, info+j*sizeof(Dict_char), sizeof(Dict_char));
69  prefix[pos+j] = ch;
70  if (ch == DICT_EOS)
71  {
72  if (ri[range] & match_mask)
73  (*userfunc)((char*) prefix);
74  break;
75  }
76  if (j+pos >= mi->m+range)
77  break;
78  sc = mi->s[ch & 255];
79  ri[1+range] = SH(ri[0]) & sc;
80  for (i=1; i<=range; i++)
81  ri[i+1+range] = (SH(ri[i]) & sc) | SH(ri[i-1])
82  | SH(ri[i+range]) | ri[i-1];
83  ri += 1+range;
84  if (!(ri[range] & (1<<(pos+j))))
85  break;
86  }
87  }
88  else
89  {
90  Dict_char ch;
91  MatchWord *ri = ri_base, sc;
92  int i;
93 
94  /* Dict_ptr subptr */
95  /* Dict_char sub char */
96  /* unsigned char length of information */
97  /* char * information */
98  info = (char*)p - indxp[-lo];
99  memcpy(&ch, info+sizeof(Dict_ptr), sizeof(Dict_char));
100  prefix[pos] = ch;
101 
102  sc = mi->s[ch & 255];
103  ri[1+range] = SH(ri[0]) & sc;
104  for (i=1; i<=range; i++)
105  ri[i+1+range] = (SH(ri[i]) & sc) | SH(ri[i-1])
106  | SH(ri[i+range]) | ri[i-1];
107  ri += 1+range;
108  if (ri[range] & (1<<pos))
109  {
110  Dict_ptr subptr;
111  if (info[sizeof(Dict_ptr)+sizeof(Dict_char)] &&
112  (ri[range] & match_mask))
113  {
114  prefix[pos+1] = DICT_EOS;
115  (*userfunc)((char*) prefix);
116  }
117  memcpy(&subptr, info, sizeof(Dict_ptr));
118  if (subptr)
119  {
120  lookup_ec(dict, subptr, mi, ri, pos+1,
121  userfunc, range, prefix);
122  dict_bf_readp(dict->dbf, ptr, &p);
123  indxp = (short*) ((char*) p +
124  DICT_bsize(p)-sizeof(short));
125  }
126  }
127  }
128  lo++;
129  }
130  return 0;
131 }
132 
134 {
135  int i;
136  MatchWord *s;
137  MatchInfo *mi;
138 
139  mi = (MatchInfo *) xmalloc(sizeof(*mi));
140  mi->m = dict_strlen(pattern);
141  mi->s = s = (MatchWord *) xmalloc(sizeof(*s)*256); /* 256 !!! */
142  for (i = 0; i < 256; i++)
143  s[i] = 0;
144  for (i = 0; pattern[i]; i++)
145  s[pattern[i]&255] += 1<<i;
146  return mi;
147 }
148 
149 int dict_lookup_ec(Dict dict, char *pattern, int range,
150  int (*userfunc)(char *name))
151 {
152  MatchInfo *mi;
153  MatchWord *ri;
154  int ret, i;
155  Dict_char prefix[2048];
156 
157  if (!dict->head.root)
158  return 0;
159 
160  mi = prepare_match((Dict_char*) pattern);
161 
162  ri = (MatchWord *) xmalloc((dict_strlen((Dict_char*) pattern)+range+2)
163  * (range+1)*sizeof(*ri));
164  for (i = 0; i <= range; i++)
165  ri[i] = (2<<i)-1;
166 
167  ret = lookup_ec(dict, dict->head.root, mi, ri, 0, userfunc,
168  range, prefix);
169  xfree(ri);
170  return ret;
171 }
172 
173 /*
174  * Local variables:
175  * c-basic-offset: 4
176  * c-file-style: "Stroustrup"
177  * indent-tabs-mode: nil
178  * End:
179  * vim: shiftwidth=4 tabstop=8 expandtab
180  */
181 
unsigned Dict_ptr
Definition: dict-p.h:34
#define DICT_EOS
Definition: dict-p.h:102
int dict_bf_readp(Dict_BFile bf, int no, void **bufp)
Definition: drdwr.c:188
int dict_strlen(const Dict_char *s)
Definition: open.c:118
#define DICT_bsize(x)
Definition: dict-p.h:105
unsigned char Dict_char
Definition: dict-p.h:33
#define DICT_nodir(x)
Definition: dict-p.h:106
static Dict dict
Definition: dicttest.c:35
unsigned MatchWord
Definition: grepper.c:38
unsigned MatchWord
Definition: lookupec.c:30
int dict_lookup_ec(Dict dict, char *pattern, int range, int(*userfunc)(char *name))
lookup item(s) in dictionary with error correction
Definition: lookupec.c:149
static MatchInfo * prepare_match(Dict_char *pattern)
Definition: lookupec.c:133
static int lookup_ec(Dict dict, Dict_ptr ptr, MatchInfo *mi, MatchWord *ri_base, int pos, int(*userfunc)(char *), int range, Dict_char *prefix)
Definition: lookupec.c:39
#define SH(x)
Definition: lookupec.c:37
Dict_ptr root
Definition: dict-p.h:40
struct Dict_head head
Definition: dict-p.h:83
Dict_BFile dbf
Definition: dict-p.h:74
int m
Definition: lookupec.c:34
MatchWord * s
Definition: lookupec.c:33