23 #include <yaz/options.h>
29 #include <yaz/xmalloc.h>
30 #include <yaz/marcdisp.h>
31 #include <yaz/timing.h>
61 b->
nmem = nmem_create();
70 nmem_destroy((*bp)->nmem);
76 static int cmp_ar(
const void *p1,
const void *p2)
96 int code_read(
void *vp,
char **dst,
int *insertMode)
114 memcpy(*dst, &key,
sizeof(key));
116 (*dst) +=
sizeof(key);
131 int no_words = 0, no_new_words = 0;
132 const char *dict_info = 0;
134 yaz_timing_t tim_dict = 0;
135 yaz_timing_t tim_isamb = 0;
149 tim_dict = yaz_timing_create();
160 assert(*dict_info ==
sizeof(word_id_seq));
161 memcpy(&word_id_seq, dict_info+1,
sizeof(word_id_seq));
167 assert(*dict_info ==
sizeof(isamc_p));
168 memcpy(&isamc_p, dict_info+1,
sizeof(isamc_p));
173 if (i > 0 && strcmp(b->
ar[i-1]->
term, b->
ar[i]->
term) == 0)
180 memcpy(&b->
ar[i]->
word_id, dict_info+1,
sizeof(
int));
194 yaz_timing_stop(tim_dict);
195 tim_isamb = yaz_timing_create();
212 yaz_timing_stop(tim_isamb);
220 printf(
"# run total dict-real user sys isam-real user sys "
221 " intsp leafsp docs postings words new d-spl\n");
224 printf(
"%5d %9.6f %9.6f %5.2f %5.2f %9.6f %5.2f %5.2f "
228 yaz_timing_get_real(tim_dict) + yaz_timing_get_real(tim_isamb),
229 yaz_timing_get_real(tim_dict),
230 yaz_timing_get_user(tim_dict),
231 yaz_timing_get_sys(tim_dict),
232 yaz_timing_get_real(tim_isamb),
233 yaz_timing_get_user(tim_isamb),
234 yaz_timing_get_sys(tim_isamb),
235 number_of_int_splits,
236 number_of_leaf_splits,
241 number_of_dict_splits
251 yaz_timing_destroy(&tim_isamb);
252 yaz_timing_destroy(&tim_dict);
258 int total = nmem_total(b->
nmem);
293 const char *cp = wrbuf_buf(wrbuf);
305 for (i = 0; i<6 && *cp; i++, cp++)
310 for (i = 0; i<4 && *cp; i++, cp++)
325 else if (*cp == subfield_char && cp[1])
334 else if (strchr(
"$*/-;,.:[]\"&(){} ", *cp))
345 unsigned ch = *(
const unsigned char *)cp;
346 if (sz <
sizeof(
term))
348 term[sz] = tolower(ch);
365 WRBUF wrbuf = wrbuf_alloc();
370 while(fgets(line,
sizeof(line)-1,
inf))
391 wrbuf_puts(wrbuf, line);
397 if (wrbuf_len(wrbuf))
402 if (line[0] !=
' ' && line[1] !=
' ' && line[2] !=
' ' &&
406 wrbuf_puts(wrbuf, line);
410 if (wrbuf_len(wrbuf))
426 int verbose,
int print_offset)
428 yaz_marc_t mt = yaz_marc_create();
429 WRBUF wrbuf = wrbuf_alloc();
439 r = fread (buf, 1, 5,
inf);
442 if (r && print_offset && verbose)
443 printf (
"<!-- Extra %ld bytes at end of file -->\n",
447 while (*buf < '0' || *buf >
'9')
450 long off = ftell(
inf) - 5;
451 if (verbose || print_offset)
452 printf(
"<!-- Skipping bad byte %d (0x%02X) at offset "
454 *buf & 0xff, *buf & 0xff,
456 for (i = 0; i<4; i++)
458 r = fread(buf+4, 1, 1,
inf);
464 if (verbose || print_offset)
465 printf (
"<!-- End of file with data -->\n");
468 len = atoi_n(buf, 5);
469 if (len < 25 || len > 100000)
471 long off = ftell(
inf) - 5;
472 printf(
"Bad Length %ld read at offset %ld (%lx)\n",
473 (
long)len, (
long) off, (
long) off);
477 r = fread (buf + 5, 1, rlen,
inf);
480 yaz_marc_read_iso2709(mt, buf, len);
482 if (yaz_marc_write_line(mt, wrbuf))
493 wrbuf_destroy(wrbuf);
494 yaz_marc_destroy(mt);
500 fprintf(stderr,
"benchindex1 [-t type] [-c d:i] [-m mem] [-i] [inputfile]\n");
504 int main(
int argc,
char **argv)
514 int isam_cache_size = 40;
515 int dict_cache_size = 50;
516 const char *fname = 0;
518 yaz_timing_t tim = 0;
520 const char *dict_info;
521 const char *type =
"iso2709";
522 int int_count_enable = 1;
524 while ((ret = options(
"im:t:c:N", argv, argc, &arg)) != -2)
535 if (!strcmp(arg,
"iso2709"))
537 else if (!strcmp(arg,
"line"))
541 fprintf(stderr,
"bad type: %s.\n", arg);
546 if (sscanf(arg,
"%d:%d", &dict_cache_size, &isam_cache_size)
549 fprintf(stderr,
"bad cache sizes for -c\n");
557 int_count_enable = 0;
560 fprintf(stderr,
"bad option.\n");
567 inf = fopen(fname,
"rb");
570 fprintf(stderr,
"Cannot open %s\n", fname);
574 printf(
"# benchindex1 %s %s\n", __DATE__, __TIME__);
575 printf(
"# isam_cache_size = %d\n", isam_cache_size);
576 printf(
"# dict_cache_size = %d\n", dict_cache_size);
577 printf(
"# int_count_enable = %d\n", int_count_enable);
578 printf(
"# memory = %d\n", memory);
590 method_postings.
debug = 0;
596 yaz_log(YLOG_WARN,
"bfs_create failed");
603 tim = yaz_timing_create();
605 isb_postings =
isamb_open (bfs,
"isamb", isam_cache_size ? 1 : 0,
606 &method_postings, 0);
609 yaz_log(YLOG_WARN,
"isamb_open failed");
619 assert(*dict_info ==
sizeof(docid_seq));
620 memcpy(&docid_seq, dict_info+1,
sizeof(docid_seq));
623 if (!strcmp(type,
"iso2709"))
626 else if (!strcmp(type,
"line"))
629 printf(
"# Total " ZINT_FORMAT " documents\n", docid_seq);
639 yaz_timing_stop(tim);
641 printf(
"# Total timings real=%8.6f user=%3.2f system=%3.2f\n",
642 yaz_timing_get_real(tim),
643 yaz_timing_get_user(tim),
644 yaz_timing_get_sys(tim));
646 yaz_timing_destroy(&tim);
void index_term(struct index_block *b, const char *term, zint docid, zint *seqno)
static int cmp_ar(const void *p1, const void *p2)
void index_block_add(struct index_block *b, const char *term, zint docid, zint seqno)
void index_block_flush(struct index_block *b, ISAMB isb, Dict dict, int no_docs)
int main(int argc, char **argv)
struct index_block * index_block_new(int memory)
void index_marc_from_file(ISAMB isb, Dict dict, zint *docid_seq, FILE *inf, int memory, int verbose, int print_offset)
void index_marc_line_records(ISAMB isb, Dict dict, zint *docid_seq, FILE *inf, int memory)
void index_block_destroy(struct index_block **bp)
void index_wrbuf(struct index_block *b, WRBUF wrbuf, zint docid, int subfield_char)
void index_block_check_flush(struct index_block *b, ISAMB isb, Dict dict, int no_docs)
int code_read(void *vp, char **dst, int *insertMode)
void bf_reset(BFiles bfs)
Removes register and shadow completely.
BFiles bfs_create(const char *spec, const char *base)
creates a Block files collection
void bfs_destroy(BFiles bfiles)
destroys a block files handle
char * dict_lookup(Dict dict, const char *p)
lookup item in dictionary
zint dict_get_no_split(Dict dict)
get number of page split operations, since dict_open
Dict dict_open(BFiles bfs, const char *name, int cache, int rw, int compact_flag, int page_size)
open dictionary
int dict_insert(Dict dict, const char *p, int userlen, void *userinfo)
insert item into dictionary
int dict_close(Dict dict)
closes dictionary
void isamb_set_int_count(ISAMB b, int v)
zint isamb_get_leaf_splits(ISAMB b)
ISAMB isamb_open(BFiles bfs, const char *name, int writeflag, ISAMC_M *method, int cache)
zint isamb_get_int_splits(ISAMB b)
void isamb_close(ISAMB isamb)
void isamb_set_cache_size(ISAMB b, int sz)
void isamb_merge(ISAMB b, ISAM_P *pos, ISAMC_I *data)
void iscz1_decode(void *vp, char **dst, const char **src)
void iscz1_encode(void *vp, char **dst, const char **src)
int key_compare(const void *p1, const void *p2)
void key_logdump_txt(int logmask, const void *p, const char *txt)
void iscz1_reset(void *vp)
int(* read_item)(void *clientData, char **dst, int *insertMode)
int(* compare_item)(const void *a, const void *b)
void(* log_item)(int logmask, const void *p, const char *txt)
void(* decode)(void *p, char **dst, const char **src)
void(* encode)(void *p, char **dst, const char **src)
struct index_term * terms
zint mem[IT_KEY_LEVEL_MAX]