IDZEBRA 2.2.8
dicttest.c
Go to the documentation of this file.
1/* This file is part of the Zebra server.
2 Copyright (C) Index Data
3
4Zebra is free software; you can redistribute it and/or modify it under
5the terms of the GNU General Public License as published by the Free
6Software Foundation; either version 2, or (at your option) any later
7version.
8
9Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
10WARRANTY; without even the implied warranty of MERCHANTABILITY or
11FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12for more details.
13
14You should have received a copy of the GNU General Public License
15along with this program; if not, write to the Free Software
16Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17
18*/
19
20#if HAVE_CONFIG_H
21#include <config.h>
22#endif
23#include <stdlib.h>
24#include <string.h>
25#include <stdio.h>
26#include <ctype.h>
27
28#include <idzebra/dict.h>
29#include <idzebra/util.h>
30#include <idzebra/res.h>
31#include <yaz/yaz-util.h>
32#include <yaz/snprintf.h>
33
34char *prog;
35static Dict dict;
36
37static int look_hits;
38
39static int grep_handler (char *name, const char *info, void *client)
40{
41 look_hits++;
42 printf("%s\n", name);
43 return 0;
44}
45
46static int scan_handler (char *name, const char *info, int pos, void *client)
47{
48 printf("%s\n", name);
49 return 0;
50}
51
52int main (int argc, char **argv)
53{
54 Res my_resource = 0;
55 BFiles bfs;
56 const char *name = NULL;
57 const char *inputfile = NULL;
58 const char *config = NULL;
59 const char *delete_term = NULL;
60 int scan_the_thing = 0;
61 int do_delete = 0;
62 int range = -1;
63 int srange = 0;
64 int rw = 0;
65 int infosize = 4;
66 int cache = 10;
67 int ret;
68 int unique = 0;
69 char *grep_pattern = NULL;
70 char *arg;
71 int no_of_iterations = 0;
72 int no_of_new = 0, no_of_same = 0, no_of_change = 0;
73 int no_of_hits = 0, no_of_misses = 0, no_not_found = 0, no_of_deleted = 0;
74 int max_pos;
75
76 prog = argv[0];
77 if (argc < 2)
78 {
79 fprintf(stderr, "usage:\n "
80 " %s [-d] [-D t] [-S] [-r n] [-p n] [-u] [-g pat] [-s n] "
81 "[-v n] [-i f] [-w] [-c n] config file\n\n",
82 prog);
83 fprintf(stderr, " -d delete instead of insert\n");
84 fprintf(stderr, " -D t delete subtree instead of insert\n");
85 fprintf(stderr, " -r n set regular match range\n");
86 fprintf(stderr, " -p n set regular match start range\n");
87 fprintf(stderr, " -u report if keys change during insert\n");
88 fprintf(stderr, " -g p try pattern n (see -r)\n");
89 fprintf(stderr, " -s n set info size to n (instead of 4)\n");
90 fprintf(stderr, " -v n set logging level\n");
91 fprintf(stderr, " -i f read file with words\n");
92 fprintf(stderr, " -w insert/delete instead of lookup\n");
93 fprintf(stderr, " -c n cache size (number of pages)\n");
94 fprintf(stderr, " -S scan the dictionary\n");
95 exit(1);
96 }
97 while ((ret = options ("D:Sdr:p:ug:s:v:i:wc:", argv, argc, &arg)) != -2)
98 {
99 if (ret == 0)
100 {
101 if (!config)
102 config = arg;
103 else if (!name)
104 name = arg;
105 else
106 {
107 yaz_log (YLOG_FATAL, "too many files specified\n");
108 exit (1);
109 }
110 }
111 else if (ret == 'D')
112 {
113 delete_term = arg;
114 }
115 else if (ret == 'd')
116 do_delete = 1;
117 else if (ret == 'g')
118 {
119 grep_pattern = arg;
120 }
121 else if (ret == 'r')
122 {
123 range = atoi (arg);
124 }
125 else if (ret == 'p')
126 {
127 srange = atoi (arg);
128 }
129 else if (ret == 'u')
130 {
131 unique = 1;
132 }
133 else if (ret == 'c')
134 {
135 cache = atoi(arg);
136 if (cache<2)
137 cache = 2;
138 }
139 else if (ret == 'w')
140 rw = 1;
141 else if (ret == 'i')
142 inputfile = arg;
143 else if (ret == 'S')
144 scan_the_thing = 1;
145 else if (ret == 's')
146 {
147 infosize = atoi(arg);
148 }
149 else if (ret == 'v')
150 {
151 yaz_log_init (yaz_log_mask_str(arg), prog, NULL);
152 }
153 else
154 {
155 yaz_log (YLOG_FATAL, "Unknown option '-%s'", arg);
156 exit (1);
157 }
158 }
159 if (!config || !name)
160 {
161 yaz_log (YLOG_FATAL, "no config and/or dictionary specified");
162 exit (1);
163 }
164 my_resource = res_open(0, 0);
165 if (!my_resource)
166 {
167 yaz_log (YLOG_FATAL, "cannot open resource `%s'", config);
168 exit (1);
169 }
170 res_read_file(my_resource, config);
171
172 bfs = bfs_create (res_get(my_resource, "register"), 0);
173 if (!bfs)
174 {
175 yaz_log (YLOG_FATAL, "bfs_create fail");
176 exit (1);
177 }
178 dict = dict_open (bfs, name, cache, rw, 0, 4096);
179 if (!dict)
180 {
181 yaz_log (YLOG_FATAL, "dict_open fail of `%s'", name);
182 exit (1);
183 }
184 if (inputfile)
185 {
186 FILE *ipf;
187 char ipf_buf[1024];
188 int line = 1;
189 char infobytes[120];
190 memset(infobytes, 0, sizeof(infobytes));
191
192 if (!(ipf = fopen(inputfile, "r")))
193 {
194 yaz_log (YLOG_FATAL|YLOG_ERRNO, "cannot open %s", inputfile);
195 exit (1);
196 }
197
198 while (fgets (ipf_buf, 1023, ipf))
199 {
200 char *ipf_ptr = ipf_buf;
201 yaz_snprintf(infobytes, sizeof(infobytes), "%d", line);
202 for (;*ipf_ptr && *ipf_ptr != '\n';ipf_ptr++)
203 {
204 if (isalpha(*ipf_ptr) || *ipf_ptr == '_')
205 {
206 int i = 1;
207 while (ipf_ptr[i] && (isalnum(ipf_ptr[i]) ||
208 ipf_ptr[i] == '_'))
209 i++;
210 if (ipf_ptr[i])
211 ipf_ptr[i++] = '\0';
212 if (rw)
213 {
214 if (do_delete)
215 switch (dict_delete (dict, ipf_ptr))
216 {
217 case 0:
218 no_not_found++;
219 break;
220 case 1:
221 no_of_deleted++;
222 }
223 else
224 switch(dict_insert (dict, ipf_ptr,
225 infosize, infobytes))
226 {
227 case 0:
228 no_of_new++;
229 break;
230 case 1:
231 no_of_change++;
232 if (unique)
233 yaz_log (YLOG_LOG, "%s change\n", ipf_ptr);
234 break;
235 case 2:
236 if (unique)
237 yaz_log (YLOG_LOG, "%s duplicate\n", ipf_ptr);
238 no_of_same++;
239 break;
240 }
241 }
242 else if(range < 0)
243 {
244 char *cp;
245
246 cp = dict_lookup (dict, ipf_ptr);
247 if (cp && *cp)
248 no_of_hits++;
249 else
250 no_of_misses++;
251 }
252 else
253 {
254 look_hits = 0;
255 dict_lookup_grep (dict, ipf_ptr, range, NULL,
256 &max_pos, srange, grep_handler);
257 if (look_hits)
258 no_of_hits++;
259 else
260 no_of_misses++;
261 }
262 ++no_of_iterations;
263 if ((no_of_iterations % 10000) == 0)
264 {
265 printf ("."); fflush(stdout);
266 }
267 ipf_ptr += (i-1);
268 }
269 }
270 ++line;
271 }
272 fclose (ipf);
273 }
274 if (rw && delete_term)
275 {
276 yaz_log (YLOG_LOG, "dict_delete_subtree %s", delete_term);
277 dict_delete_subtree (dict, delete_term, 0, 0);
278 }
279 if (grep_pattern)
280 {
281 if (range < 0)
282 range = 0;
283 yaz_log (YLOG_LOG, "Grepping '%s'", grep_pattern);
284 dict_lookup_grep (dict, grep_pattern, range, NULL, &max_pos,
285 srange, grep_handler);
286 }
287 if (rw)
288 {
289 yaz_log (YLOG_LOG, "Iterations.... %d", no_of_iterations);
290 if (do_delete)
291 {
292 yaz_log (YLOG_LOG, "No of deleted. %d", no_of_deleted);
293 yaz_log (YLOG_LOG, "No not found.. %d", no_not_found);
294 }
295 else
296 {
297 yaz_log (YLOG_LOG, "No of new..... %d", no_of_new);
298 yaz_log (YLOG_LOG, "No of change.. %d", no_of_change);
299 }
300 }
301 else
302 {
303 yaz_log (YLOG_LOG, "Lookups....... %d", no_of_iterations);
304 yaz_log (YLOG_LOG, "No of hits.... %d", no_of_hits);
305 yaz_log (YLOG_LOG, "No of misses.. %d", no_of_misses);
306 }
307 if (scan_the_thing)
308 {
309 char term_dict[1024];
310
311 int before = 1000000;
312 int after = 1000000;
313 yaz_log (YLOG_LOG, "dict_scan");
314 term_dict[0] = 1;
315 term_dict[1] = 0;
316 dict_scan (dict, term_dict, &before, &after, 0, scan_handler);
317 }
319 bfs_destroy (bfs);
320 res_close (my_resource);
321 return 0;
322}
323/*
324 * Local variables:
325 * c-basic-offset: 4
326 * c-file-style: "Stroustrup"
327 * indent-tabs-mode: nil
328 * End:
329 * vim: shiftwidth=4 tabstop=8 expandtab
330 */
331
BFiles bfs_create(const char *spec, const char *base)
creates a Block files collection
Definition bfile.c:56
void bfs_destroy(BFiles bfiles)
destroys a block files handle
Definition bfile.c:73
Zebra dictionary.
int dict_delete(Dict dict, const char *p)
deletes item from dictionary
Definition delete.c:260
Dict dict_open(BFiles bfs, const char *name, int cache, int rw, int compact_flag, int page_size)
open dictionary
Definition open.c:50
int dict_lookup_grep(Dict dict, const char *p, int range, void *client, int *max_pos, int init_pos, int(*f)(char *name, const char *info, void *client))
regular expression search with error correction
Definition lookgrep.c:374
int dict_delete_subtree(Dict dict, const char *p, void *client, int(*f)(const char *info, void *client))
delete items with a given prefix from dictionary
Definition delete.c:266
int dict_insert(Dict dict, const char *p, int userlen, void *userinfo)
insert item into dictionary
Definition insert.c:439
char * dict_lookup(Dict dict, const char *p)
lookup item in dictionary
Definition lookup.c:100
int dict_scan(Dict dict, char *str, int *before, int *after, void *client, int(*f)(char *name, const char *info, int pos, void *client))
dictionary scan
Definition scan.c:242
int dict_close(Dict dict)
closes dictionary
Definition close.c:32
int main(int argc, char **argv)
Definition dicttest.c:52
static int look_hits
Definition dicttest.c:37
static int scan_handler(char *name, const char *info, int pos, void *client)
Definition dicttest.c:46
static int grep_handler(char *name, const char *info, void *client)
Definition dicttest.c:39
static Dict dict
Definition dicttest.c:35
char * prog
Definition dicttest.c:34
void res_close(Res r)
Definition res.c:261
ZEBRA_RES res_read_file(Res r, const char *fname)
Definition res.c:146
Res res_open(Res res_def, Res over_res)
Definition res.c:234
const char * res_get(Res r, const char *name)
Definition res.c:294