IDZEBRA 2.2.8
extract.c
Go to the documentation of this file.
1/* This file is part of the Zebra server.
2 Copyright (C) Index Data
3
4Zebra is free software; you can redistribute it and/or modify it under
5the terms of the GNU General Public License as published by the Free
6Software Foundation; either version 2, or (at your option) any later
7version.
8
9Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
10WARRANTY; without even the implied warranty of MERCHANTABILITY or
11FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12for more details.
13
14You should have received a copy of the GNU General Public License
15along with this program; if not, write to the Free Software
16Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17
18*/
19
24#if HAVE_CONFIG_H
25#include <config.h>
26#endif
27#include <stdio.h>
28#include <assert.h>
29#include <ctype.h>
30#ifdef WIN32
31#include <io.h>
32#endif
33#if HAVE_UNISTD_H
34#include <unistd.h>
35#endif
36#include <fcntl.h>
37
38
39#include "index.h"
40#include "orddict.h"
41#include <direntz.h>
42#include <charmap.h>
43#include <yaz/snprintf.h>
44
45static int log_level_extract = 0;
46static int log_level_details = 0;
47static int log_level_initialized = 0;
48
49static void extract_flush_record_keys2(ZebraHandle zh, zint sysno,
50 zebra_rec_keys_t ins_keys,
51 zint ins_rank,
52 zebra_rec_keys_t del_keys,
53 zint del_rank);
54
55static void zebra_init_log_level(void)
56{
58 {
60
61 log_level_extract = yaz_log_module_level("extract");
62 log_level_details = yaz_log_module_level("indexdetails");
63 }
64}
65
66static WRBUF wrbuf_hex_str(const char *cstr)
67{
68 size_t i;
69 WRBUF w = wrbuf_alloc();
70 for (i = 0; cstr[i]; i++)
71 {
72 if (cstr[i] < ' ' || cstr[i] > 126)
73 wrbuf_printf(w, "\\%02X", cstr[i] & 0xff);
74 else
75 wrbuf_putc(w, cstr[i]);
76 }
77 return w;
78}
79
80
81static void extract_flush_sort_keys(ZebraHandle zh, zint sysno,
82 int cmd, zebra_rec_keys_t skp);
83static void extract_schema_add(struct recExtractCtrl *p, Odr_oid *oid);
84static void extract_token_add(RecWord *p);
85
87{
89 {
90 yaz_log(YLOG_LOG, "More than %d file log entries. Omitting rest (-fn to see more)",
92 }
93}
94
95static void logRecord(ZebraHandle zh)
96{
99 if (!(zh->records_processed % 1000))
100 {
101 yaz_log(YLOG_LOG, "Records: "ZINT_FORMAT" i/u/d "
105 }
106}
107
108static void init_extractCtrl(ZebraHandle zh, struct recExtractCtrl *ctrl)
109{
110 ctrl->flagShowRecords = !zh->m_flag_rw;
111}
112
113
114static void extract_add_index_string(RecWord *p,
116 const char *str, int length);
117
119
120static void extract_init(struct recExtractCtrl *p, RecWord *w)
121{
122 w->seqno = 1;
123 w->index_name = "any";
124 w->index_type = "w";
125 w->extractCtrl = p;
126 w->record_id = 0;
127 w->section_id = 0;
128 w->segment = 0;
129}
130
135
137 char *buf)
138{
139 const char *b = p->term_buf;
140 const char **map = 0;
141 int i = 0, remain = p->term_len;
142
143 if (remain > 0)
144 map = zebra_maps_input(zm, &b, remain, 1);
145 while (remain > 0 && i < IT_MAX_WORD)
146 {
147 while (map && *map && **map == *CHR_SPACE)
148 {
149 remain = p->term_len - (b - p->term_buf);
150
151 if (remain > 0)
152 {
153 int first = i ? 0 : 1; /* first position */
154 map = zebra_maps_input(zm, &b, remain, first);
155 }
156 else
157 map = 0;
158 }
159 if (!map)
160 break;
161
162 if (i && i < IT_MAX_WORD)
163 buf[i++] = *CHR_SPACE;
164 while (map && *map && **map != *CHR_SPACE)
165 {
166 const char *cp = *map;
167
168 if (**map == *CHR_CUT)
169 {
170 i = 0;
171 }
172 else
173 {
174 if (i >= IT_MAX_WORD)
175 break;
176 while (i < IT_MAX_WORD && *cp)
177 buf[i++] = *(cp++);
178 }
179 remain = p->term_len - (b - p->term_buf);
180 if (remain > 0)
181 {
182 map = zebra_maps_input(zm, &b, remain, 0);
183 }
184 else
185 map = 0;
186 }
187 }
188 return i;
189}
190
191static void snippet_add_complete_field(RecWord *p, int ord,
192 zebra_map_t zm)
193{
194 struct snip_rec_info *h = p->extractCtrl->handle;
195 char buf[IT_MAX_WORD+1];
196 int i = parse_complete_field(p, zm, buf);
197
198 if (!i)
199 return;
200
201 if (p->term_len && p->term_buf && zebra_maps_is_index(zm))
203 p->term_buf, p->term_len);
204 p->seqno++;
205}
206
208{
209 struct snip_rec_info *h = p->extractCtrl->handle;
210 const char *b = p->term_buf;
211 int remain = p->term_len;
212 int first = 1;
213 const char **map = 0;
214 const char *start = b;
215 const char *last = b;
216
217 if (remain > 0)
218 map = zebra_maps_input(zm, &b, remain, 0);
219
220 while (map)
221 {
222 int remain;
223
224 /* Skip spaces */
225 while (map && *map && **map == *CHR_SPACE)
226 {
227 remain = p->term_len - (b - p->term_buf);
228 last = b;
229 if (remain > 0)
230 map = zebra_maps_input(zm, &b, remain, 0);
231 else
232 map = 0;
233 }
234 if (!map)
235 break;
236 if (start != last && zebra_maps_is_index(zm))
237 {
239 start, last - start);
240 }
241 start = last;
242 while (map && *map && **map != *CHR_SPACE)
243 {
244 remain = p->term_len - (b - p->term_buf);
245 last = b;
246 if (remain > 0)
247 map = zebra_maps_input(zm, &b, remain, 0);
248 else
249 map = 0;
250 }
251 if (start == last)
252 return ;
253
254 if (first)
255 {
256 first = 0;
258 {
259 /* first in field marker */
260 p->seqno++;
261 }
262 }
263 if (start != last && zebra_maps_is_index(zm))
265 start, last - start);
266 start = last;
267 p->seqno++;
268 }
269
270}
271
272static void snippet_add_icu(RecWord *p, int ord, zebra_map_t zm)
273{
274 struct snip_rec_info *h = p->extractCtrl->handle;
275
276 const char *res_buf = 0;
277 size_t res_len = 0;
278
279 const char *display_buf = 0;
280 size_t display_len = 0;
281
283 while (zebra_map_tokenize_next(zm, &res_buf, &res_len,
284 &display_buf, &display_len))
285 {
286 if (zebra_maps_is_index(zm))
288 display_buf, display_len);
289 p->seqno++;
290 }
291}
292
294{
295 struct snip_rec_info *h = p->extractCtrl->handle;
296 ZebraHandle zh = h->zh;
298
299 if (zm)
300 {
301 ZebraExplainInfo zei = zh->reg->zei;
304
305 if (zebra_maps_is_icu(zm))
306 snippet_add_icu(p, ch, zm);
307 else
308 {
311 else
313 }
314 }
315}
316
318 struct recExtractCtrl *p, Odr_oid *oid)
319{
320
321}
322
324 struct ZebraRecStream *stream,
325 RecType rt, void *recTypeClientData)
326{
327 struct recExtractCtrl extractCtrl;
328 struct snip_rec_info info;
329
330 extractCtrl.stream = stream;
331 extractCtrl.first_record = 1;
332 extractCtrl.init = extract_init;
333 extractCtrl.tokenAdd = snippet_token_add;
334 extractCtrl.schemaAdd = snippet_schema_add;
335 assert(zh->reg);
336 assert(zh->reg->dh);
337
338 extractCtrl.dh = zh->reg->dh;
339
340 info.zh = zh;
341 info.snippets = sn;
342 extractCtrl.handle = &info;
343 extractCtrl.match_criteria[0] = '\0';
344 extractCtrl.staticrank = 0;
345 extractCtrl.action = action_insert;
346
347 init_extractCtrl(zh, &extractCtrl);
348
349 extractCtrl.setStoreData = 0;
350
351 (*rt->extract)(recTypeClientData, &extractCtrl);
352}
353
355 zebra_rec_keys_t reckeys,
356 const char *index_name,
357 const char **ws, int ws_length)
358{
359 int i;
360 int ch = -1;
362
363 for (i = 0; i<ws_length; i++)
364 ws[i] = NULL;
365
366 if (ch < 0)
367 ch = zebraExplain_lookup_attr_str(zh->reg->zei, cat, "0", index_name);
368 if (ch < 0)
369 ch = zebraExplain_lookup_attr_str(zh->reg->zei, cat, "p", index_name);
370 if (ch < 0)
371 ch = zebraExplain_lookup_attr_str(zh->reg->zei, cat, "w", index_name);
372
373 if (ch < 0)
374 return ;
375
376 if (zebra_rec_keys_rewind(reckeys))
377 {
378 zint startSeq = -1;
379 const char *str;
380 size_t slen;
381 struct it_key key;
382 zint seqno;
383 while (zebra_rec_keys_read(reckeys, &str, &slen, &key))
384 {
385 assert(key.len <= IT_KEY_LEVEL_MAX && key.len > 2);
386
387 seqno = key.mem[key.len-1];
388
389 if (key.mem[0] == ch)
390 {
391 zint woff;
392
393 if (startSeq == -1)
394 startSeq = seqno;
395 woff = seqno - startSeq;
396 if (woff >= 0 && woff < ws_length)
397 ws[woff] = str;
398 }
399 }
400 }
401}
402
403#define FILE_MATCH_BLANK "\t "
404
406 zebra_rec_keys_t reckeys,
407 const char *fname, const char *spec)
408{
409 static char dstBuf[2048]; /* static here ??? */
410 char *dst = dstBuf;
411 const char *s = spec;
412
413 while (1)
414 {
415 for (; *s && strchr(FILE_MATCH_BLANK, *s); s++)
416 ;
417 if (!*s)
418 break;
419 if (*s == '(')
420 {
421 const char *ws[32];
422 char attset_str[64], attname_str[64];
423 int i;
424 int first = 1;
425
426 for (s++; strchr(FILE_MATCH_BLANK, *s); s++)
427 ;
428 for (i = 0; *s && *s != ',' && *s != ')' &&
429 !strchr(FILE_MATCH_BLANK, *s); s++)
430 if (i+1 < sizeof(attset_str))
431 attset_str[i++] = *s;
432 attset_str[i] = '\0';
433
434 for (; strchr(FILE_MATCH_BLANK, *s); s++)
435 ;
436 if (*s != ',')
437 strcpy(attname_str, attset_str);
438 else
439 {
440 for (s++; strchr(FILE_MATCH_BLANK, *s); s++)
441 ;
442 for (i = 0; *s && *s != ')' &&
443 !strchr(FILE_MATCH_BLANK, *s); s++)
444 if (i+1 < sizeof(attname_str))
445 attname_str[i++] = *s;
446 attname_str[i] = '\0';
447 }
448 if (*s != ')')
449 {
450 yaz_log(YLOG_WARN, "Missing ) in match criteria %s in group %s",
451 spec, zh->m_group ? zh->m_group : "none");
452 return NULL;
453 }
454 s++;
455
456 searchRecordKey(zh, reckeys, attname_str, ws, 32);
457 if (0) /* for debugging */
458 {
459 for (i = 0; i<32; i++)
460 {
461 if (ws[i])
462 {
463 WRBUF w = wrbuf_hex_str(ws[i]);
464 yaz_log(YLOG_LOG, "ws[%d] = %s", i, wrbuf_cstr(w));
465 wrbuf_destroy(w);
466 }
467 }
468 }
469
470 for (i = 0; i<32; i++)
471 if (ws[i])
472 {
473 if (first)
474 {
475 *dst++ = ' ';
476 first = 0;
477 }
478 strcpy(dst, ws[i]);
479 dst += strlen(ws[i]);
480 }
481 if (first)
482 {
483 yaz_log(YLOG_WARN, "Record didn't contain match"
484 " fields in (%s,%s)", attset_str, attname_str);
485 return NULL;
486 }
487 }
488 else if (*s == '$')
489 {
490 int spec_len;
491 char special[64];
492 const char *spec_src = NULL;
493 const char *s1 = ++s;
494 while (*s1 && !strchr(FILE_MATCH_BLANK, *s1))
495 s1++;
496
497 spec_len = s1 - s;
498 if (spec_len > sizeof(special)-1)
499 spec_len = sizeof(special)-1;
500 memcpy(special, s, spec_len);
501 special[spec_len] = '\0';
502 s = s1;
503
504 if (!strcmp(special, "group"))
505 spec_src = zh->m_group;
506 else if (!strcmp(special, "database"))
507 spec_src = zh->basenames[0];
508 else if (!strcmp(special, "filename")) {
509 spec_src = fname;
510 }
511 else if (!strcmp(special, "type"))
512 spec_src = zh->m_record_type;
513 else
514 spec_src = NULL;
515 if (spec_src)
516 {
517 strcpy(dst, spec_src);
518 dst += strlen(spec_src);
519 }
520 }
521 else if (*s == '\"' || *s == '\'')
522 {
523 int stopMarker = *s++;
524 char tmpString[64];
525 int i = 0;
526
527 while (*s && *s != stopMarker)
528 {
529 if (i+1 < sizeof(tmpString))
530 tmpString[i++] = *s++;
531 }
532 if (*s)
533 s++;
534 tmpString[i] = '\0';
535 strcpy(dst, tmpString);
536 dst += strlen(tmpString);
537 }
538 else
539 {
540 yaz_log(YLOG_WARN, "Syntax error in match criteria %s in group %s",
541 spec, zh->m_group ? zh->m_group : "none");
542 return NULL;
543 }
544 *dst++ = 1;
545 }
546 if (dst == dstBuf)
547 {
548 yaz_log(YLOG_WARN, "No match criteria for record %s in group %s",
549 fname, zh->m_group ? zh->m_group : "none");
550 return NULL;
551 }
552 *dst = '\0';
553
554 if (0) /* for debugging */
555 {
556 WRBUF w = wrbuf_hex_str(dstBuf);
557 yaz_log(YLOG_LOG, "get_match_from_spec %s", wrbuf_cstr(w));
558 wrbuf_destroy(w);
559 }
560
561 return dstBuf;
562}
563
565 const char *fname;
567 struct recordGroup *rGroup;
568};
569
580static void all_matches_add(struct recExtractCtrl *ctrl, zint record_id,
581 zint sysno)
582{
583 RecWord word;
584 extract_init(ctrl, &word);
585 word.record_id = record_id;
586 /* we use the seqno as placeholder for a way to get back to
587 record database from _ALLRECORDS.. This is used if a custom
588 RECORD was defined */
589 word.seqno = sysno;
590 word.index_name = "_ALLRECORDS";
591 word.index_type = "w";
592
594 "", 0);
595}
596
597/* forward declaration */
599 struct ZebraRecStream *stream,
600 enum zebra_recctrl_action_t action,
601 const char *recordType,
602 zint *sysno,
603 const char *match_criteria,
604 const char *fname,
606 void *recTypeClientData);
607
608
609ZEBRA_RES zebra_extract_file(ZebraHandle zh, zint *sysno, const char *fname,
610 enum zebra_recctrl_action_t action)
611{
613 int i, fd;
614 char gprefix[128];
615 char ext[128];
616 char ext_res[266];
617 const char *original_record_type = 0;
619 void *recTypeClientData;
620 struct ZebraRecStream stream, *streamp;
621
623
624 if (!zh->m_group || !*zh->m_group)
625 *gprefix = '\0';
626 else
627 yaz_snprintf(gprefix, sizeof(gprefix), "%s.", zh->m_group);
628
629 yaz_log(log_level_extract, "zebra_extract_file %s", fname);
630
631 /* determine file extension */
632 *ext = '\0';
633 for (i = strlen(fname); --i >= 0; )
634 if (fname[i] == '/')
635 break;
636 else if (fname[i] == '.')
637 {
638 strcpy(ext, fname+i+1);
639 break;
640 }
641 /* determine file type - depending on extension */
642 original_record_type = zh->m_record_type;
643 if (!zh->m_record_type)
644 {
645 yaz_snprintf(ext_res, sizeof(ext_res), "%srecordType.%s", gprefix, ext);
646 zh->m_record_type = res_get(zh->res, ext_res);
647 }
648 if (!zh->m_record_type)
649 {
650 check_log_limit(zh);
653 yaz_log(YLOG_LOG, "? %s", fname);
654 zh->records_skipped++;
655 return 0;
656 }
657 /* determine match criteria */
658 if (!zh->m_record_id)
659 {
660 yaz_snprintf(ext_res, sizeof(ext_res), "%srecordId.%s", gprefix, ext);
661 zh->m_record_id = res_get(zh->res, ext_res);
662 }
663
664 if (!(recType =
666 &recTypeClientData)))
667 {
668 yaz_log(YLOG_WARN, "No such record type: %s", zh->m_record_type);
669 return ZEBRA_FAIL;
670 }
671
672 switch(recType->version)
673 {
674 case 0:
675 break;
676 default:
677 yaz_log(YLOG_WARN, "Bad filter version: %s", zh->m_record_type);
678 }
679 if (sysno && (action == action_delete || action == action_a_delete))
680 {
681 streamp = 0;
682 }
683 else
684 {
685 char full_rep[1024];
686
687 if (zh->path_reg && !yaz_is_abspath(fname))
688 {
689 strcpy(full_rep, zh->path_reg);
690 strcat(full_rep, "/");
691 strcat(full_rep, fname);
692 }
693 else
694 strcpy(full_rep, fname);
695
696 if ((fd = open(full_rep, O_BINARY|O_RDONLY)) == -1)
697 {
698 yaz_log(YLOG_WARN|YLOG_ERRNO, "open %s", full_rep);
699 zh->m_record_type = original_record_type;
700 return ZEBRA_FAIL;
701 }
702 streamp = &stream;
703 zebra_create_stream_fd(streamp, fd, 0);
704 }
705 r = zebra_extract_records_stream(zh, streamp,
706 action,
707 zh->m_record_type,
708 sysno,
709 0, /*match_criteria */
710 fname,
711 recType, recTypeClientData);
712 if (streamp)
713 stream.destroy(streamp);
714 zh->m_record_type = original_record_type;
715 return r;
716}
717
718/*
719 If sysno is provided, then it's used to identify the reocord.
720 If not, and match_criteria is provided, then sysno is guessed
721 If not, and a record is provided, then sysno is got from there
722
723 */
724
726 const char *buf, size_t buf_size,
727 enum zebra_recctrl_action_t action,
728 const char *recordType,
729 zint *sysno,
730 const char *match_criteria,
731 const char *fname)
732{
733 struct ZebraRecStream stream;
734 ZEBRA_RES res;
735 void *clientData;
736 RecType recType = 0;
737
738 if (recordType && *recordType)
739 {
740 yaz_log(log_level_extract,
741 "Record type explicitly specified: %s", recordType);
742 recType = recType_byName(zh->reg->recTypes, zh->res, recordType,
743 &clientData);
744 }
745 else
746 {
747 if (!(zh->m_record_type))
748 {
749 yaz_log(YLOG_WARN, "No such record type defined");
750 return ZEBRA_FAIL;
751 }
752 yaz_log(log_level_extract, "Get record type from rgroup: %s",
753 zh->m_record_type);
755 zh->m_record_type, &clientData);
756 recordType = zh->m_record_type;
757 }
758
759 if (!recType)
760 {
761 yaz_log(YLOG_WARN, "No such record type: %s", recordType);
762 return ZEBRA_FAIL;
763 }
764
765 zebra_create_stream_mem(&stream, buf, buf_size);
766
767 res = zebra_extract_records_stream(zh, &stream,
768 action,
769 recordType,
770 sysno,
771 match_criteria,
772 fname,
773 recType, clientData);
774 stream.destroy(&stream);
775 return res;
776}
777
779 struct ZebraRecStream *stream,
780 enum zebra_recctrl_action_t action,
781 const char *recordType,
782 zint *sysno,
783 const char *match_criteria,
784 const char *fname,
786 void *recTypeClientData,
787 int *more)
788
789{
790 zint sysno0 = 0;
791 RecordAttr *recordAttr;
792 struct recExtractCtrl extractCtrl;
793 int r;
794 const char *matchStr = 0;
795 Record rec;
796 off_t start_offset = 0, end_offset = 0;
797 const char *pr_fname = fname; /* filename to print .. */
798 int show_progress = zh->records_processed + zh->records_skipped
799 < zh->m_file_verbose_limit ? 1:0;
800
802
803 if (!pr_fname)
804 pr_fname = "<no file>"; /* make it printable if file is omitted */
805
808
809 if (zebraExplain_curDatabase(zh->reg->zei, zh->basenames[0]))
810 {
811 if (zebraExplain_newDatabase(zh->reg->zei, zh->basenames[0],
813 return ZEBRA_FAIL;
814 }
815
816 if (stream)
817 {
818 off_t null_offset = 0;
819 extractCtrl.stream = stream;
820
821 start_offset = stream->tellf(stream);
822
823 extractCtrl.first_record = start_offset ? 0 : 1;
824
825 stream->endf(stream, &null_offset);;
826
827 extractCtrl.init = extract_init;
828 extractCtrl.tokenAdd = extract_token_add;
829 extractCtrl.schemaAdd = extract_schema_add;
830 extractCtrl.dh = zh->reg->dh;
831 extractCtrl.handle = zh;
832 extractCtrl.match_criteria[0] = '\0';
833 extractCtrl.staticrank = 0;
834 extractCtrl.action = action;
835
836 init_extractCtrl(zh, &extractCtrl);
837
838 extract_set_store_data_prepare(&extractCtrl);
839
840 r = (*recType->extract)(recTypeClientData, &extractCtrl);
841
842 if (action == action_update)
843 {
844 action = extractCtrl.action;
845 }
846
847 switch (r)
848 {
850 return ZEBRA_FAIL;
852 /* error occured during extraction ... */
853 yaz_log(YLOG_WARN, "extract error: generic");
854 return ZEBRA_FAIL;
856 /* error occured during extraction ... */
857 yaz_log(YLOG_WARN, "extract error: no such filter");
858 return ZEBRA_FAIL;
860 if (show_progress)
861 yaz_log(YLOG_LOG, "skip %s %s " ZINT_FORMAT,
862 recordType, pr_fname, (zint) start_offset);
863 *more = 1;
864
865 end_offset = stream->endf(stream, 0);
866 if (end_offset)
867 stream->seekf(stream, end_offset);
868
869 return ZEBRA_OK;
871 break;
872 default:
873 yaz_log(YLOG_WARN, "extract error: unknown error: %d", r);
874 return ZEBRA_FAIL;
875 }
876 end_offset = stream->endf(stream, 0);
877 if (end_offset)
878 stream->seekf(stream, end_offset);
879 else
880 end_offset = stream->tellf(stream);
881
882 if (extractCtrl.match_criteria[0])
883 match_criteria = extractCtrl.match_criteria;
884 }
885
886 *more = 1;
887
888 if (zh->m_flag_rw == 0)
889 {
890 yaz_log(YLOG_LOG, "test %s %s " ZINT_FORMAT, recordType,
891 pr_fname, (zint) start_offset);
892 /* test mode .. Do not perform match */
893 return ZEBRA_OK;
894 }
895
896 if (!sysno)
897 {
898 sysno = &sysno0;
899
901 matchStr = match_criteria;
902 else
903 {
904 if (zh->m_record_id && *zh->m_record_id)
905 {
906 matchStr = get_match_from_spec(zh, zh->reg->keys, pr_fname,
907 zh->m_record_id);
908 if (!matchStr)
909 {
910 yaz_log(YLOG_LOG, "error %s %s " ZINT_FORMAT, recordType,
911 pr_fname, (zint) start_offset);
912 return ZEBRA_FAIL;
913 }
914 if (0 && matchStr)
915 {
916 WRBUF w = wrbuf_alloc();
917 size_t i;
918 for (i = 0; i < strlen(matchStr); i++)
919 {
920 wrbuf_printf(w, "%02X", matchStr[i] & 0xff);
921 }
922 yaz_log(YLOG_LOG, "Got match %s", wrbuf_cstr(w));
923 wrbuf_destroy(w);
924 }
925 }
926 }
927 if (matchStr)
928 {
929 int db_ord = zebraExplain_get_database_ord(zh->reg->zei);
930 char *rinfo = dict_lookup_ord(zh->reg->matchDict, db_ord,
931 matchStr);
932
933
935 {
936 WRBUF w = wrbuf_hex_str(matchStr);
937 yaz_log(log_level_extract, "matchStr: %s", wrbuf_cstr(w));
938 wrbuf_destroy(w);
939 }
940 if (rinfo)
941 {
942 assert(*rinfo == sizeof(*sysno));
943 memcpy(sysno, rinfo+1, sizeof(*sysno));
944 }
945 }
946 }
947
948 if (! *sysno)
949 {
950 /* new record AKA does not exist already */
951 if (action == action_delete)
952 {
953 yaz_log(YLOG_LOG, "delete %s %s " ZINT_FORMAT, recordType,
954 pr_fname, (zint) start_offset);
955 yaz_log(YLOG_WARN, "cannot delete record above (seems new)");
956 return ZEBRA_FAIL;
957 }
958 else if (action == action_a_delete)
959 {
960 if (show_progress)
961 yaz_log(YLOG_LOG, "adelete %s %s " ZINT_FORMAT, recordType,
962 pr_fname, (zint) start_offset);
963 return ZEBRA_OK;
964 }
965 else if (action == action_replace)
966 {
967 yaz_log(YLOG_LOG, "update %s %s " ZINT_FORMAT, recordType,
968 pr_fname, (zint) start_offset);
969 yaz_log(YLOG_WARN, "cannot update record above (seems new)");
970 return ZEBRA_FAIL;
971 }
972 if (show_progress)
973 yaz_log(YLOG_LOG, "add %s %s " ZINT_FORMAT, recordType, pr_fname,
974 (zint) start_offset);
975 rec = rec_new(zh->reg->records);
976
977 *sysno = rec->sysno;
978
979
980 if (stream)
981 {
982 all_matches_add(&extractCtrl,
984 *sysno);
985 }
986
987
988 recordAttr = rec_init_attr(zh->reg->zei, rec);
989 if (extractCtrl.staticrank < 0)
990 {
991 yaz_log(YLOG_WARN, "Negative staticrank for record. Set to 0");
992 extractCtrl.staticrank = 0;
993 }
994
995 if (matchStr)
996 {
997 int db_ord = zebraExplain_get_database_ord(zh->reg->zei);
998 dict_insert_ord(zh->reg->matchDict, db_ord, matchStr,
999 sizeof(*sysno), sysno);
1000 }
1001
1002 extract_flush_sort_keys(zh, *sysno, 1, zh->reg->sortKeys);
1003 extract_flush_record_keys2(zh, *sysno,
1004 zh->reg->keys, extractCtrl.staticrank,
1005 0, recordAttr->staticrank);
1006 recordAttr->staticrank = extractCtrl.staticrank;
1007 zh->records_inserted++;
1008 }
1009 else
1010 {
1011 /* record already exists */
1014 if (action == action_insert)
1015 {
1016 yaz_log(YLOG_LOG, "skipped %s %s " ZINT_FORMAT,
1017 recordType, pr_fname, (zint) start_offset);
1018 logRecord(zh);
1019 return ZEBRA_FAIL;
1020 }
1021
1022 rec = rec_get(zh->reg->records, *sysno);
1023 assert(rec);
1024
1025 if (stream)
1026 {
1027 all_matches_add(&extractCtrl,
1029 *sysno);
1030 }
1031
1032 recordAttr = rec_init_attr(zh->reg->zei, rec);
1033
1034 /* decrease total size */
1036 - recordAttr->recordSize);
1037
1038 zebra_rec_keys_set_buf(delkeys,
1039 rec->info[recInfo_delKeys],
1040 rec->size[recInfo_delKeys],
1041 0);
1042 zebra_rec_keys_set_buf(sortKeys,
1043 rec->info[recInfo_sortKeys],
1044 rec->size[recInfo_sortKeys],
1045 0);
1046
1047 extract_flush_sort_keys(zh, *sysno, 0, sortKeys);
1049 {
1050 /* record going to be deleted */
1051 extract_flush_record_keys2(zh, *sysno, 0, recordAttr->staticrank,
1052 delkeys, recordAttr->staticrank);
1053 if (zebra_rec_keys_empty(delkeys))
1054 {
1055 yaz_log(YLOG_LOG, "delete %s %s " ZINT_FORMAT, recordType,
1056 pr_fname, (zint) start_offset);
1057 yaz_log(YLOG_WARN, "cannot delete file above, "
1058 "storeKeys false (3)");
1059 }
1060 else
1061 {
1062 if (show_progress)
1063 yaz_log(YLOG_LOG, "delete %s %s " ZINT_FORMAT, recordType,
1064 pr_fname, (zint) start_offset);
1065 zh->records_deleted++;
1066 if (matchStr)
1067 {
1068 int db_ord = zebraExplain_get_database_ord(zh->reg->zei);
1069 dict_delete_ord(zh->reg->matchDict, db_ord, matchStr);
1070 }
1071 rec_del(zh->reg->records, &rec);
1072 }
1073 zebra_rec_keys_close(delkeys);
1074 zebra_rec_keys_close(sortKeys);
1075 rec_free(&rec);
1076 logRecord(zh);
1077 return ZEBRA_OK;
1078 }
1079 else
1080 { /* update or special_update */
1081 if (show_progress)
1082 yaz_log(YLOG_LOG, "update %s %s " ZINT_FORMAT, recordType,
1083 pr_fname, (zint) start_offset);
1084 extract_flush_sort_keys(zh, *sysno, 1, zh->reg->sortKeys);
1085
1086 extract_flush_record_keys2(zh, *sysno,
1087 zh->reg->keys, extractCtrl.staticrank,
1088 delkeys, recordAttr->staticrank);
1089 recordAttr->staticrank = extractCtrl.staticrank;
1090 zh->records_updated++;
1091 }
1092 zebra_rec_keys_close(delkeys);
1093 zebra_rec_keys_close(sortKeys);
1094 }
1095 /* update file type */
1096 xfree(rec->info[recInfo_fileType]);
1097 rec->info[recInfo_fileType] =
1098 rec_strdup(recordType, &rec->size[recInfo_fileType]);
1099
1100 /* update filename */
1101 xfree(rec->info[recInfo_filename]);
1102 rec->info[recInfo_filename] =
1103 rec_strdup(fname, &rec->size[recInfo_filename]);
1104
1105 /* update delete keys */
1106 xfree(rec->info[recInfo_delKeys]);
1107 if (!zebra_rec_keys_empty(zh->reg->keys) && zh->m_store_keys == 1)
1108 {
1110 &rec->info[recInfo_delKeys],
1111 &rec->size[recInfo_delKeys]);
1112 }
1113 else
1114 {
1115 rec->info[recInfo_delKeys] = NULL;
1116 rec->size[recInfo_delKeys] = 0;
1117 }
1118 /* update sort keys */
1119 xfree(rec->info[recInfo_sortKeys]);
1120
1122 &rec->info[recInfo_sortKeys],
1123 &rec->size[recInfo_sortKeys]);
1124
1125 if (stream)
1126 {
1127 recordAttr->recordSize = end_offset - start_offset;
1129 recordAttr->recordSize);
1130 }
1131
1132 /* set run-number for this record */
1133 recordAttr->runNumber =
1135
1136 /* update store data */
1137 xfree(rec->info[recInfo_storeData]);
1138
1139 /* update store data */
1140 if (zh->store_data_buf)
1141 {
1144 zh->store_data_buf = 0;
1145 recordAttr->recordSize = zh->store_data_size;
1146 }
1147 else if (zh->m_store_data)
1148 {
1149 off_t cur_offset = stream->tellf(stream);
1150
1151 rec->size[recInfo_storeData] = recordAttr->recordSize;
1152 rec->info[recInfo_storeData] = (char *)
1153 xmalloc(recordAttr->recordSize);
1154 stream->seekf(stream, start_offset);
1156 recordAttr->recordSize);
1157 stream->seekf(stream, cur_offset);
1158 }
1159 else
1160 {
1161 rec->info[recInfo_storeData] = NULL;
1162 rec->size[recInfo_storeData] = 0;
1163 }
1164 /* update database name */
1165 xfree(rec->info[recInfo_databaseName]);
1168
1169 /* update offset */
1170 recordAttr->recordOffset = start_offset;
1171
1172 /* commit this record */
1173 rec_put(zh->reg->records, &rec);
1174 logRecord(zh);
1175 return ZEBRA_OK;
1176}
1177
1191 struct ZebraRecStream *stream,
1193 const char *recordType,
1194 zint *sysno,
1195 const char *match_criteria,
1196 const char *fname,
1198 void *recTypeClientData)
1199{
1200 ZEBRA_RES res = ZEBRA_OK;
1201 while (1)
1202 {
1203 int more = 0;
1205 action,
1206 recordType,
1207 sysno,
1209 fname,
1210 recType, recTypeClientData, &more);
1211 if (!more)
1212 {
1213 res = ZEBRA_OK;
1214 break;
1215 }
1216 if (res != ZEBRA_OK)
1217 break;
1218 if (sysno)
1219 break;
1220 }
1221 return res;
1222}
1223
1225{
1227 struct recExtractCtrl extractCtrl;
1228
1231 {
1232 abort();
1234 rec->info[recInfo_databaseName], 0))
1235 abort();
1236 }
1237
1240
1241 extractCtrl.init = extract_init;
1242 extractCtrl.tokenAdd = extract_token_add;
1243 extractCtrl.schemaAdd = extract_schema_add;
1244 extractCtrl.dh = zh->reg->dh;
1245
1246 init_extractCtrl(zh, &extractCtrl);
1247
1248 extractCtrl.flagShowRecords = 0;
1249 extractCtrl.match_criteria[0] = '\0';
1250 extractCtrl.staticrank = 0;
1251 extractCtrl.action = action_update;
1252
1253 extractCtrl.handle = handle;
1254 extractCtrl.first_record = 1;
1255
1256 extract_set_store_data_prepare(&extractCtrl);
1257
1258 if (n)
1259 grs_extract_tree(&extractCtrl, n);
1260
1261 if (rec->size[recInfo_delKeys])
1262 {
1264
1266
1268 rec->size[recInfo_delKeys],
1269 0);
1271 zh->reg->keys, 0, delkeys, 0);
1272 zebra_rec_keys_close(delkeys);
1273
1275 rec->size[recInfo_sortKeys],
1276 0);
1277
1278 extract_flush_sort_keys(zh, rec->sysno, 0, sortkeys);
1279 zebra_rec_keys_close(sortkeys);
1280 }
1281 else
1282 {
1283 extract_flush_record_keys2(zh, rec->sysno, zh->reg->keys, 0, 0, 0);
1284 }
1285 extract_flush_sort_keys(zh, rec->sysno, 1, zh->reg->sortKeys);
1286
1287 xfree(rec->info[recInfo_delKeys]);
1289 &rec->info[recInfo_delKeys],
1290 &rec->size[recInfo_delKeys]);
1291
1292 xfree(rec->info[recInfo_sortKeys]);
1294 &rec->info[recInfo_sortKeys],
1295 &rec->size[recInfo_sortKeys]);
1296 return ZEBRA_OK;
1297}
1298
1300 const char *str, size_t slen, NMEM nmem, int level)
1301{
1302 WRBUF w = wrbuf_alloc();
1303 int ord = CAST_ZINT_TO_INT(key->mem[0]);
1304 const char *index_type;
1305 int i;
1306 const char *string_index;
1307
1308 zebraExplain_lookup_ord(zh->reg->zei, ord, &index_type,
1309 0/* db */, &string_index);
1310 assert(index_type);
1311 for (i = 0; i < key->len; i++)
1312 {
1313 wrbuf_printf(w, ZINT_FORMAT " ", key->mem[i]);
1314 }
1315
1316 if (*str < CHR_BASE_CHAR)
1317 {
1318 int i;
1319 WRBUF w1 = wrbuf_alloc();
1320
1321 if (!strcmp(str, ""))
1322 wrbuf_puts(w1, "alwaysmatches");
1323 if (!strcmp(str, FIRST_IN_FIELD_STR))
1324 wrbuf_puts(w1, "firstinfield");
1325 else if (!strcmp(str, CHR_UNKNOWN))
1326 wrbuf_puts(w1, "unknown");
1327 else if (!strcmp(str, CHR_SPACE))
1328 wrbuf_puts(w1, "space");
1329 else
1330 wrbuf_puts(w1, "?");
1331
1332 for (i = 0; i<slen; i++)
1333 {
1334 wrbuf_printf(w1, " %d", str[i] & 0xff);
1335 }
1336 yaz_log(level, "%s%s %s %s", wrbuf_cstr(w), index_type,
1337 string_index, wrbuf_cstr(w1));
1338 wrbuf_destroy(w1);
1339 }
1340 else
1341 {
1342 char *dst_term = 0;
1343 zebra_term_untrans_iconv(zh, nmem, index_type, &dst_term, str);
1344 if (dst_term)
1345 yaz_log(level, "%s%s %s \"%s\"", wrbuf_cstr(w), index_type,
1346 string_index, dst_term);
1347 else
1348 {
1349 WRBUF w1 = wrbuf_alloc();
1350 wrbuf_write_escaped(w1, str, strlen(str));
1351 yaz_log(level, "%s%s %s %s", wrbuf_cstr(w), index_type,
1352 string_index, wrbuf_cstr(w1));
1353 wrbuf_destroy(w1);
1354 }
1355 }
1356 wrbuf_destroy(w);
1357}
1358
1359void extract_rec_keys_log(ZebraHandle zh, int is_insert,
1360 zebra_rec_keys_t reckeys,
1361 int level)
1362{
1363 if (zebra_rec_keys_rewind(reckeys))
1364 {
1365 size_t slen;
1366 const char *str;
1367 struct it_key key;
1368 NMEM nmem = nmem_create();
1369
1370 while(zebra_rec_keys_read(reckeys, &str, &slen, &key))
1371 {
1372 zebra_it_key_str_dump(zh, &key, str, slen, nmem, level);
1373 nmem_reset(nmem);
1374 }
1375 nmem_destroy(nmem);
1376 }
1377}
1378
1380 zebra_rec_keys_t reckeys)
1381{
1382 ZebraExplainInfo zei = zh->reg->zei;
1383 struct ord_stat {
1384 int no;
1385 int ord;
1386 struct ord_stat *next;
1387 };
1388
1389 if (zebra_rec_keys_rewind(reckeys))
1390 {
1391 struct ord_stat *ord_list = 0;
1392 struct ord_stat *p;
1393 size_t slen;
1394 const char *str;
1395 struct it_key key_in;
1396 while(zebra_rec_keys_read(reckeys, &str, &slen, &key_in))
1397 {
1398 int ord = CAST_ZINT_TO_INT(key_in.mem[0]);
1399
1400 for (p = ord_list; p ; p = p->next)
1401 if (p->ord == ord)
1402 {
1403 p->no++;
1404 break;
1405 }
1406 if (!p)
1407 {
1408 p = xmalloc(sizeof(*p));
1409 p->no = 1;
1410 p->ord = ord;
1411 p->next = ord_list;
1412 ord_list = p;
1413 }
1414 }
1415
1416 p = ord_list;
1417 while (p)
1418 {
1419 struct ord_stat *p1 = p;
1420
1421 if (is_insert)
1422 zebraExplain_ord_adjust_occurrences(zei, p->ord, p->no, 1);
1423 else
1424 zebraExplain_ord_adjust_occurrences(zei, p->ord, - p->no, -1);
1425 p = p->next;
1426 xfree(p1);
1427 }
1428 }
1429}
1430
1432 ZebraHandle zh, zint sysno,
1433 zebra_rec_keys_t ins_keys, zint ins_rank,
1434 zebra_rec_keys_t del_keys, zint del_rank)
1435{
1436 ZebraExplainInfo zei = zh->reg->zei;
1437 int normal = 0;
1438 int optimized = 0;
1439
1440 if (!zh->reg->key_block)
1441 {
1442 size_t mem = 1024*1024 * atol(res_get_def(zh->res, "memmax", "8"));
1443 const char *key_tmp_dir = res_get_def(zh->res, "keyTmpDir", ".");
1444 int use_threads = atoi(res_get_def(zh->res, "threads", "1"));
1445 zh->reg->key_block = key_block_create(mem, key_tmp_dir, use_threads);
1446 }
1447
1448 if (ins_keys)
1449 {
1450 extract_rec_keys_adjust(zh, 1, ins_keys);
1451 if (!del_keys)
1453 zebra_rec_keys_rewind(ins_keys);
1454 }
1455 if (del_keys)
1456 {
1457 extract_rec_keys_adjust(zh, 0, del_keys);
1458 if (!ins_keys)
1460 zebra_rec_keys_rewind(del_keys);
1461 }
1462
1463 while (1)
1464 {
1465 size_t del_slen;
1466 const char *del_str;
1467 struct it_key del_key_in;
1468 int del = 0;
1469
1470 size_t ins_slen;
1471 const char *ins_str;
1472 struct it_key ins_key_in;
1473 int ins = 0;
1474
1475 if (del_keys)
1476 del = zebra_rec_keys_read(del_keys, &del_str, &del_slen,
1477 &del_key_in);
1478 if (ins_keys)
1479 ins = zebra_rec_keys_read(ins_keys, &ins_str, &ins_slen,
1480 &ins_key_in);
1481
1482 if (del && ins && ins_rank == del_rank
1483 && !key_compare(&del_key_in, &ins_key_in)
1484 && ins_slen == del_slen && !memcmp(del_str, ins_str, del_slen))
1485 {
1486 optimized++;
1487 continue;
1488 }
1489 if (!del && !ins)
1490 break;
1491
1492 normal++;
1493 if (del)
1494 key_block_write(zh->reg->key_block, sysno,
1495 &del_key_in, 0, del_str, del_slen,
1496 del_rank, zh->m_staticrank);
1497 if (ins)
1498 key_block_write(zh->reg->key_block, sysno,
1499 &ins_key_in, 1, ins_str, ins_slen,
1500 ins_rank, zh->m_staticrank);
1501 }
1502 yaz_log(log_level_extract, "normal=%d optimized=%d", normal, optimized);
1503}
1504
1506 zebra_rec_keys_t reckeys,
1507 zebra_snippets *snippets)
1508{
1509 NMEM nmem = nmem_create();
1510 if (zebra_rec_keys_rewind(reckeys))
1511 {
1512 const char *str;
1513 size_t slen;
1514 struct it_key key;
1515 while (zebra_rec_keys_read(reckeys, &str, &slen, &key))
1516 {
1517 char *dst_term = 0;
1518 int ord;
1519 zint seqno;
1520 const char *index_type;
1521
1522 assert(key.len <= IT_KEY_LEVEL_MAX && key.len > 2);
1523 seqno = key.mem[key.len-1];
1524 ord = CAST_ZINT_TO_INT(key.mem[0]);
1525
1526 zebraExplain_lookup_ord(zh->reg->zei, ord, &index_type,
1527 0/* db */, 0 /* string_index */);
1528 assert(index_type);
1529 zebra_term_untrans_iconv(zh, nmem, index_type,
1530 &dst_term, str);
1531 zebra_snippets_append(snippets, seqno, 0, ord, dst_term);
1532 nmem_reset(nmem);
1533 }
1534 }
1535 nmem_destroy(nmem);
1536 return ZEBRA_OK;
1537}
1538
1540{
1541 yaz_log(YLOG_LOG, "print_rec_keys");
1542 if (zebra_rec_keys_rewind(reckeys))
1543 {
1544 const char *str;
1545 size_t slen;
1546 struct it_key key;
1547 while (zebra_rec_keys_read(reckeys, &str, &slen, &key))
1548 {
1549 char dst_buf[IT_MAX_WORD];
1550 zint seqno;
1551 const char *index_type;
1552 int ord = CAST_ZINT_TO_INT(key.mem[0]);
1553 const char *db = 0;
1554 assert(key.len <= IT_KEY_LEVEL_MAX && key.len > 2);
1555
1556 zebraExplain_lookup_ord(zh->reg->zei, ord, &index_type, &db, 0);
1557
1558 seqno = key.mem[key.len-1];
1559
1560 zebra_term_untrans(zh, index_type, dst_buf, str);
1561
1562 yaz_log(YLOG_LOG, "ord=%d seqno=" ZINT_FORMAT
1563 " term=%s", ord, seqno, dst_buf);
1564 }
1565 }
1566}
1567
1569 const char *str, int length)
1570{
1571 struct it_key key;
1573 ZebraExplainInfo zei = zh->reg->zei;
1574 int ch, i;
1575
1576 ch = zebraExplain_lookup_attr_str(zei, cat, p->index_type, p->index_name);
1577 if (ch < 0)
1578 ch = zebraExplain_add_attr_str(zei, cat, p->index_type, p->index_name);
1579
1580 i = 0;
1581 key.mem[i++] = ch;
1582 key.mem[i++] = p->record_id;
1583 key.mem[i++] = p->section_id;
1584
1585 if (zh->m_segment_indexing)
1586 key.mem[i++] = p->segment;
1587 key.mem[i++] = p->seqno;
1588 key.len = i;
1589
1590 zebra_rec_keys_write(zh->reg->keys, str, length, &key);
1591}
1592
1593static void extract_add_sort_string(RecWord *p, const char *str, int length)
1594{
1595 struct it_key key;
1597 ZebraExplainInfo zei = zh->reg->zei;
1598 int ch;
1600
1601 ch = zebraExplain_lookup_attr_str(zei, cat, p->index_type, p->index_name);
1602 if (ch < 0)
1603 ch = zebraExplain_add_attr_str(zei, cat, p->index_type, p->index_name);
1604 key.len = 3;
1605 key.mem[0] = ch;
1606 key.mem[1] = p->record_id;
1607 key.mem[2] = p->section_id;
1608
1609 zebra_rec_keys_write(zh->reg->sortKeys, str, length, &key);
1610}
1611
1613 const char *str, int length)
1614{
1615 char valz[40];
1616 struct recExtractCtrl *ctrl = p->extractCtrl;
1617
1618 if (length > sizeof(valz)-1)
1619 length = sizeof(valz)-1;
1620
1621 memcpy(valz, str, length);
1622 valz[length] = '\0';
1623 ctrl->staticrank = atozint(valz);
1624}
1625
1627 const char *string, int length)
1628{
1629 assert(length > 0);
1630
1631 if (!p->index_name)
1632 return;
1634 {
1635
1636 WRBUF w = wrbuf_alloc();
1637
1638 wrbuf_write_escaped(w, string, length);
1639 yaz_log(log_level_details, "extract_add_string: %s", wrbuf_cstr(w));
1640 wrbuf_destroy(w);
1641 }
1642 if (zebra_maps_is_index(zm))
1643 {
1645 string, length);
1647 {
1648 RecWord word;
1649 memcpy(&word, p, sizeof(word));
1650
1651 word.seqno = 1;
1654 }
1655 }
1656 else if (zebra_maps_is_sort(zm))
1657 {
1658 extract_add_sort_string(p, string, length);
1659 }
1660 else if (zebra_maps_is_staticrank(zm))
1661 {
1662 extract_add_staticrank_string(p, string, length);
1663 }
1664}
1665
1667{
1668 const char *b = p->term_buf;
1669 int remain = p->term_len;
1670 int first = 1;
1671 const char **map = 0;
1672
1673 if (remain > 0)
1674 map = zebra_maps_input(zm, &b, remain, 0);
1675
1676 while (map)
1677 {
1678 char buf[IT_MAX_WORD+1];
1679 int i, remain;
1680
1681 /* Skip spaces */
1682 while (map && *map && **map == *CHR_SPACE)
1683 {
1684 remain = p->term_len - (b - p->term_buf);
1685 if (remain > 0)
1686 map = zebra_maps_input(zm, &b, remain, 0);
1687 else
1688 map = 0;
1689 }
1690 if (!map)
1691 break;
1692 i = 0;
1693 while (map && *map && **map != *CHR_SPACE)
1694 {
1695 const char *cp = *map;
1696
1697 while (i < IT_MAX_WORD && *cp)
1698 buf[i++] = *(cp++);
1699 remain = p->term_len - (b - p->term_buf);
1700 if (remain > 0)
1701 map = zebra_maps_input(zm, &b, remain, 0);
1702 else
1703 map = 0;
1704 }
1705 if (!i)
1706 return;
1707
1708 if (first)
1709 {
1710 first = 0;
1712 {
1713 /* first in field marker */
1715 p->seqno++;
1716 }
1717 }
1718 extract_add_string(p, zm, buf, i);
1719 p->seqno++;
1720 }
1721}
1722
1724{
1725 char buf[IT_MAX_WORD+1];
1726 int i = parse_complete_field(p, zm, buf);
1727 if (!i)
1728 return;
1729 extract_add_string(p, zm, buf, i);
1730 p->seqno++;
1731}
1732
1734{
1735 const char *res_buf = 0;
1736 size_t res_len = 0;
1737
1739 while (zebra_map_tokenize_next(zm, &res_buf, &res_len, 0, 0))
1740 {
1741 if (res_len > IT_MAX_WORD)
1742 {
1743 yaz_log(YLOG_LOG, "Truncating long term %ld", (long) res_len);
1744 res_len = IT_MAX_WORD;
1745 }
1746 extract_add_string(p, zm, res_buf, res_len);
1747 p->seqno++;
1748 }
1749}
1750
1751
1768{
1771
1773 {
1774 yaz_log(log_level_details, "extract_token_add "
1775 "type=%s index=%s seqno=" ZINT_FORMAT " s=%.*s",
1776 p->index_type, p->index_name,
1777 p->seqno, p->term_len, p->term_buf);
1778 }
1779 if (zebra_maps_is_icu(zm))
1780 {
1781 extract_add_icu(p, zm);
1782 }
1783 else
1784 {
1785 if (zebra_maps_is_complete(zm))
1787 else
1789 }
1790}
1791
1793 void *buf, size_t sz)
1794{
1795 ZebraHandle zh = (ZebraHandle) p->handle;
1796
1797 xfree(zh->store_data_buf);
1798 zh->store_data_buf = 0;
1799 zh->store_data_size = 0;
1800 if (buf && sz)
1801 {
1802 zh->store_data_buf = xmalloc(sz);
1803 zh->store_data_size = sz;
1804 memcpy(zh->store_data_buf, buf, sz);
1805 }
1806}
1807
1809{
1810 ZebraHandle zh = (ZebraHandle) p->handle;
1811 xfree(zh->store_data_buf);
1812 zh->store_data_buf = 0;
1813 zh->store_data_size = 0;
1815}
1816
1817static void extract_schema_add(struct recExtractCtrl *p, Odr_oid *oid)
1818{
1819 ZebraHandle zh = (ZebraHandle) p->handle;
1820 zebraExplain_addSchema(zh->reg->zei, oid);
1821}
1822
1824 int cmd, zebra_rec_keys_t reckeys)
1825{
1826#if 0
1827 yaz_log(YLOG_LOG, "extract_flush_sort_keys cmd=%d sysno=" ZINT_FORMAT,
1828 cmd, sysno);
1829 extract_rec_keys_log(zh, cmd, reckeys, YLOG_LOG);
1830#endif
1831
1832 if (zebra_rec_keys_rewind(reckeys))
1833 {
1835 size_t slen;
1836 const char *str;
1837 struct it_key key_in;
1838
1839 NMEM nmem = nmem_create();
1840 struct sort_add_ent {
1841 int ord;
1842 int cmd;
1843 struct sort_add_ent *next;
1844 WRBUF wrbuf;
1845 zint sysno;
1846 zint section_id;
1847 };
1848 struct sort_add_ent *sort_ent_list = 0;
1849
1850 while (zebra_rec_keys_read(reckeys, &str, &slen, &key_in))
1851 {
1852 int ord = CAST_ZINT_TO_INT(key_in.mem[0]);
1853 zint filter_sysno = key_in.mem[1];
1854 zint section_id = key_in.mem[2];
1855
1856 struct sort_add_ent **e = &sort_ent_list;
1857 for (; *e; e = &(*e)->next)
1858 if ((*e)->ord == ord && section_id == (*e)->section_id)
1859 break;
1860 if (!*e)
1861 {
1862 *e = nmem_malloc(nmem, sizeof(**e));
1863 (*e)->next = 0;
1864 (*e)->wrbuf = wrbuf_alloc();
1865 (*e)->ord = ord;
1866 (*e)->cmd = cmd;
1867 (*e)->sysno = filter_sysno ? filter_sysno : sysno;
1868 (*e)->section_id = section_id;
1869 }
1870
1871 wrbuf_write((*e)->wrbuf, str, slen);
1872 wrbuf_putc((*e)->wrbuf, '\0');
1873 }
1874 if (sort_ent_list)
1875 {
1876 zint last_sysno = 0;
1877 struct sort_add_ent *e = sort_ent_list;
1878 for (; e; e = e->next)
1879 {
1880 if (last_sysno != e->sysno)
1881 {
1882 zebra_sort_sysno(si, e->sysno);
1883 last_sysno = e->sysno;
1884 }
1885 zebra_sort_type(si, e->ord);
1886 if (e->cmd == 1)
1887 zebra_sort_add(si, e->section_id, e->wrbuf);
1888 else
1889 zebra_sort_delete(si, e->section_id);
1890 wrbuf_destroy(e->wrbuf);
1891 }
1892 }
1893 nmem_destroy(nmem);
1894 }
1895}
1896
1897/*
1898 * Local variables:
1899 * c-basic-offset: 4
1900 * c-file-style: "Stroustrup"
1901 * indent-tabs-mode: nil
1902 * End:
1903 * vim: shiftwidth=4 tabstop=8 expandtab
1904 */
1905
#define O_BINARY
Definition agrep.c:46
struct zebra_session * ZebraHandle
a Zebra Handle - (session)
Definition api.h:71
const char * CHR_CUT
Definition charmap.c:50
const char * CHR_SPACE
Definition charmap.c:49
#define CHR_BASE_CHAR
Definition charmap.h:33
const char * CHR_UNKNOWN
Definition charmap.c:48
static void init_extractCtrl(ZebraHandle zh, struct recExtractCtrl *ctrl)
Definition extract.c:108
static void snippet_schema_add(struct recExtractCtrl *p, Odr_oid *oid)
Definition extract.c:317
static void snippet_add_incomplete_field(RecWord *p, int ord, zebra_map_t zm)
Definition extract.c:207
void zebra_it_key_str_dump(ZebraHandle zh, struct it_key *key, const char *str, size_t slen, NMEM nmem, int level)
Definition extract.c:1299
static ZEBRA_RES zebra_extract_record_stream(ZebraHandle zh, struct ZebraRecStream *stream, enum zebra_recctrl_action_t action, const char *recordType, zint *sysno, const char *match_criteria, const char *fname, RecType recType, void *recTypeClientData, int *more)
Definition extract.c:778
static void extract_set_store_data_cb(struct recExtractCtrl *p, void *buf, size_t sz)
Definition extract.c:1792
static void extract_add_sort_string(RecWord *p, const char *str, int length)
Definition extract.c:1593
static void extract_schema_add(struct recExtractCtrl *p, Odr_oid *oid)
Definition extract.c:1817
static void extract_add_index_string(RecWord *p, zinfo_index_category_t cat, const char *str, int length)
Definition extract.c:1568
ZEBRA_RES zebra_extract_records_stream(ZebraHandle zh, struct ZebraRecStream *stream, enum zebra_recctrl_action_t action, const char *recordType, zint *sysno, const char *match_criteria, const char *fname, RecType recType, void *recTypeClientData)
extracts records from stream
Definition extract.c:1190
static void zebra_init_log_level(void)
Definition extract.c:55
#define FILE_MATCH_BLANK
Definition extract.c:403
static void all_matches_add(struct recExtractCtrl *ctrl, zint record_id, zint sysno)
add the always-matches index entry and map to real record ID
Definition extract.c:580
static void extract_token_add(RecWord *p)
top-level indexing handler for recctrl system
Definition extract.c:1767
static char * get_match_from_spec(ZebraHandle zh, zebra_rec_keys_t reckeys, const char *fname, const char *spec)
Definition extract.c:405
static void extract_add_complete_field(RecWord *p, zebra_map_t zm)
Definition extract.c:1723
static void snippet_add_complete_field(RecWord *p, int ord, zebra_map_t zm)
Definition extract.c:191
static void extract_add_staticrank_string(RecWord *p, const char *str, int length)
Definition extract.c:1612
static void extract_add_incomplete_field(RecWord *p, zebra_map_t zm)
Definition extract.c:1666
void extract_snippet(ZebraHandle zh, zebra_snippets *sn, struct ZebraRecStream *stream, RecType rt, void *recTypeClientData)
Definition extract.c:323
ZEBRA_RES zebra_extract_explain(void *handle, Record rec, data1_node *n)
Definition extract.c:1224
void extract_rec_keys_log(ZebraHandle zh, int is_insert, zebra_rec_keys_t reckeys, int level)
Definition extract.c:1359
static WRBUF wrbuf_hex_str(const char *cstr)
Definition extract.c:66
static void extract_add_icu(RecWord *p, zebra_map_t zm)
Definition extract.c:1733
static void extract_flush_record_keys2(ZebraHandle zh, zint sysno, zebra_rec_keys_t ins_keys, zint ins_rank, zebra_rec_keys_t del_keys, zint del_rank)
Definition extract.c:1431
static void extract_set_store_data_prepare(struct recExtractCtrl *p)
Definition extract.c:1808
void print_rec_keys(ZebraHandle zh, zebra_rec_keys_t reckeys)
Definition extract.c:1539
static void snippet_token_add(RecWord *p)
Definition extract.c:293
static void snippet_add_icu(RecWord *p, int ord, zebra_map_t zm)
Definition extract.c:272
ZEBRA_RES zebra_buffer_extract_record(ZebraHandle zh, const char *buf, size_t buf_size, enum zebra_recctrl_action_t action, const char *recordType, zint *sysno, const char *match_criteria, const char *fname)
Definition extract.c:725
ZEBRA_RES zebra_extract_file(ZebraHandle zh, zint *sysno, const char *fname, enum zebra_recctrl_action_t action)
Definition extract.c:609
static int log_level_extract
Definition extract.c:45
ZEBRA_RES zebra_rec_keys_to_snippets1(ZebraHandle zh, zebra_rec_keys_t reckeys, zebra_snippets *snippets)
Definition extract.c:1505
static void extract_flush_sort_keys(ZebraHandle zh, zint sysno, int cmd, zebra_rec_keys_t skp)
Definition extract.c:1823
void extract_rec_keys_adjust(ZebraHandle zh, int is_insert, zebra_rec_keys_t reckeys)
Definition extract.c:1379
static int log_level_initialized
Definition extract.c:47
static void extract_init(struct recExtractCtrl *p, RecWord *w)
Definition extract.c:120
static void extract_add_string(RecWord *p, zebra_map_t zm, const char *string, int length)
Definition extract.c:1626
static void searchRecordKey(ZebraHandle zh, zebra_rec_keys_t reckeys, const char *index_name, const char **ws, int ws_length)
Definition extract.c:354
static int parse_complete_field(RecWord *p, zebra_map_t zm, char *buf)
Definition extract.c:136
static void check_log_limit(ZebraHandle zh)
Definition extract.c:86
static int log_level_details
Definition extract.c:46
static void logRecord(ZebraHandle zh)
Definition extract.c:95
int zebra_term_untrans(ZebraHandle zh, const char *index_type, char *dst, const char *src)
Definition untrans.c:31
void zebra_create_stream_fd(struct ZebraRecStream *stream, int fd, off_t start_offset)
Definition stream.c:140
#define FIRST_IN_FIELD_STR
Definition index.h:419
void zebra_create_stream_mem(struct ZebraRecStream *stream, const char *buf, size_t sz)
Definition stream.c:123
#define FIRST_IN_FIELD_LEN
Definition index.h:421
int zebra_term_untrans_iconv(ZebraHandle zh, NMEM stream, const char *index_type, char **dst, const char *src)
Definition untrans.c:96
int key_compare(const void *p1, const void *p2)
Definition it_key.c:74
#define IT_KEY_LEVEL_MAX
Definition it_key.h:29
#define IT_MAX_WORD
Definition it_key.h:27
zebra_key_block_t key_block_create(size_t mem, const char *key_tmp_dir, int use_threads)
Definition key_block.c:191
void key_block_write(zebra_key_block_t p, zint sysno, struct it_key *key_in, int cmd, const char *str_buf, size_t str_len, zint staticrank, int static_rank_enable)
Definition key_block.c:259
int dict_insert_ord(Dict d, int ord, const char *p, int userlen, void *userinfo)
Definition orddict.c:50
char * dict_lookup_ord(Dict d, int ord, const char *str)
Definition orddict.c:42
int dict_delete_ord(Dict d, int ord, const char *p)
Definition orddict.c:59
#define RECCTRL_EXTRACT_EOF
Definition recctrl.h:164
zebra_recctrl_action_t
Definition recctrl.h:87
@ action_a_delete
Definition recctrl.h:97
@ action_delete
Definition recctrl.h:93
@ action_insert
Definition recctrl.h:89
@ action_update
Definition recctrl.h:95
@ action_replace
Definition recctrl.h:91
#define RECCTRL_EXTRACT_ERROR_GENERIC
Definition recctrl.h:165
#define RECCTRL_EXTRACT_OK
Definition recctrl.h:163
RecType recType_byName(RecTypes rts, Res res, const char *name, void **clientDataP)
Definition recctrl.c:264
#define RECCTRL_EXTRACT_ERROR_NO_SUCH_FILTER
Definition recctrl.h:166
#define RECCTRL_EXTRACT_SKIP
Definition recctrl.h:167
int grs_extract_tree(struct recExtractCtrl *p, data1_node *n)
Definition recgrs.c:885
Record rec_new(Records p)
creates new record (to be written to file storage)
Definition records.c:991
Record rec_get(Records p, zint sysno)
gets record - with given system number
Definition records.c:928
ZEBRA_RES rec_put(Records p, Record *recpp)
puts record (writes into file storage)
Definition records.c:1023
char * rec_strdup(const char *s, size_t *len)
Definition records.c:1080
@ recInfo_databaseName
Definition recindex.h:122
@ recInfo_filename
Definition recindex.h:120
@ recInfo_delKeys
Definition recindex.h:121
@ recInfo_fileType
Definition recindex.h:119
@ recInfo_sortKeys
Definition recindex.h:125
@ recInfo_storeData
Definition recindex.h:123
void rec_free(Record *recpp)
frees record (from memory)
Definition records.c:1044
ZEBRA_RES rec_del(Records p, Record *recpp)
marks record for deletion (on file storage)
Definition records.c:1001
int zebra_rec_keys_empty(zebra_rec_keys_t keys)
Definition reckeys.c:252
int zebra_rec_keys_read(zebra_rec_keys_t keys, const char **str, size_t *slen, struct it_key *key)
Definition reckeys.c:259
void zebra_rec_keys_reset(zebra_rec_keys_t keys)
Definition reckeys.c:230
void zebra_rec_keys_get_buf(zebra_rec_keys_t p, char **buf, size_t *sz)
Definition reckeys.c:133
int zebra_rec_keys_rewind(zebra_rec_keys_t keys)
Definition reckeys.c:240
zint zebra_rec_keys_get_custom_record_id(zebra_rec_keys_t keys)
Definition reckeys.c:286
zebra_rec_keys_t zebra_rec_keys_open(void)
Definition reckeys.c:88
void zebra_rec_keys_set_buf(zebra_rec_keys_t p, char *buf, size_t sz, int copy_buf)
Definition reckeys.c:109
void zebra_rec_keys_write(zebra_rec_keys_t keys, const char *str, size_t slen, const struct it_key *key)
Definition reckeys.c:188
void zebra_rec_keys_close(zebra_rec_keys_t p)
Definition reckeys.c:143
const char * res_get_def(Res r, const char *name, const char *def)
Definition res.c:313
const char * res_get(Res r, const char *name)
Definition res.c:294
void zebra_snippets_appendn(zebra_snippets *l, zint seqno, int ws, int ord, const char *term, size_t term_len)
Definition snippet.c:57
void zebra_snippets_append(zebra_snippets *l, zint seqno, int ws, int ord, const char *term)
Definition snippet.c:51
int zebra_sort_type(zebra_sort_index_t si, int type)
sets type for sort usage
Definition sortidx.c:235
void zebra_sort_add(zebra_sort_index_t si, zint section_id, WRBUF w)
adds multi-map content to sort file
Definition sortidx.c:393
void zebra_sort_sysno(zebra_sort_index_t si, zint sysno)
sets sort system number for read / add / delete
Definition sortidx.c:340
void zebra_sort_delete(zebra_sort_index_t si, zint section_id)
delete sort entry
Definition sortidx.c:356
const char * term_buf
Definition recctrl.h:56
zint record_id
Definition recctrl.h:64
const char * index_type
Definition recctrl.h:52
struct recExtractCtrl * extractCtrl
Definition recctrl.h:67
zint seqno
Definition recctrl.h:60
int term_len
Definition recctrl.h:58
zint section_id
Definition recctrl.h:66
zint segment
Definition recctrl.h:62
const char * index_name
Definition recctrl.h:54
off_t recordOffset
Definition zinfo.h:107
zint runNumber
Definition zinfo.h:108
int recordSize
Definition zinfo.h:106
zint staticrank
Definition zinfo.h:109
record reader stream
Definition recctrl.h:71
void(* destroy)(struct ZebraRecStream *s)
close and destroy stream
Definition recctrl.h:83
off_t(* seekf)(struct ZebraRecStream *s, off_t offset)
seek function
Definition recctrl.h:77
off_t(* endf)(struct ZebraRecStream *s, off_t *offset)
set and get of record position
Definition recctrl.h:81
int(* readf)(struct ZebraRecStream *s, char *buf, size_t count)
read function
Definition recctrl.h:75
off_t(* tellf)(struct ZebraRecStream *s)
tell function
Definition recctrl.h:79
int len
Definition it_key.h:31
zint mem[IT_KEY_LEVEL_MAX]
Definition it_key.h:32
record extract for indexing
Definition recctrl.h:101
int flagShowRecords
Definition recctrl.h:108
void(* init)(struct recExtractCtrl *p, RecWord *w)
Definition recctrl.h:103
enum zebra_recctrl_action_t action
Definition recctrl.h:114
char match_criteria[256]
Definition recctrl.h:109
void(* tokenAdd)(RecWord *w)
Definition recctrl.h:105
zint staticrank
Definition recctrl.h:110
void * handle
Definition recctrl.h:113
void(* setStoreData)(struct recExtractCtrl *p, void *buf, size_t size)
Definition recctrl.h:106
void(* schemaAdd)(struct recExtractCtrl *p, Odr_oid *oid)
Definition recctrl.h:111
data1_handle dh
Definition recctrl.h:112
struct ZebraRecStream * stream
Definition recctrl.h:102
int(* extract)(void *clientData, struct recExtractCtrl *ctrl)
Definition recctrl.h:157
int version
Definition recctrl.h:152
int recordOffset
Definition extract.c:566
const char * fname
Definition extract.c:565
struct recordGroup * rGroup
Definition extract.c:567
char * info[REC_NO_INFO]
Definition recindex.h:34
size_t size[REC_NO_INFO]
Definition recindex.h:35
zint sysno
Definition recindex.h:32
zebra_snippets * snippets
Definition extract.c:133
ZebraHandle zh
Definition extract.c:132
ZebraExplainInfo zei
Definition index.h:139
zebra_rec_keys_t sortKeys
Definition index.h:151
zebra_key_block_t key_block
Definition index.h:153
RecTypes recTypes
Definition index.h:145
data1_handle dh
Definition index.h:142
zebra_maps_t zebra_maps
Definition index.h:143
Records records
Definition index.h:138
zebra_sort_index_t sort_index
Definition index.h:134
Dict matchDict
Definition index.h:133
zebra_rec_keys_t keys
Definition index.h:150
zint records_skipped
Definition index.h:212
struct zebra_register * reg
Definition index.h:174
zint records_processed
Definition index.h:211
void * store_data_buf
Definition index.h:228
zint records_updated
Definition index.h:209
char ** basenames
Definition index.h:178
zint records_deleted
Definition index.h:210
int m_flag_rw
Definition index.h:225
char * path_reg
Definition index.h:182
size_t store_data_size
Definition index.h:229
int m_staticrank
Definition index.h:205
const char * m_record_type
Definition index.h:221
int m_store_data
Definition index.h:222
int m_explain_database
Definition index.h:224
int m_file_verbose_limit
Definition index.h:226
const char * m_group
Definition index.h:219
const char * m_record_id
Definition index.h:220
int m_store_keys
Definition index.h:223
zint records_inserted
Definition index.h:208
int fd
long zint
Zebra integer.
Definition util.h:66
#define ZEBRA_FAIL
Definition util.h:81
#define ZINT_FORMAT
Definition util.h:72
#define CAST_ZINT_TO_INT(x)
Definition util.h:96
zint atozint(const char *src)
Definition zint.c:55
#define ZEBRA_OK
Definition util.h:82
short ZEBRA_RES
Common return type for Zebra API.
Definition util.h:80
int zebra_maps_is_index(zebra_map_t zm)
Definition zebramap.c:464
const char ** zebra_maps_input(zebra_map_t zm, const char **from, int len, int first)
Definition zebramap.c:399
int zebra_maps_is_alwaysmatches(zebra_map_t zm)
Definition zebramap.c:485
int zebra_maps_is_first_in_field(zebra_map_t zm)
Definition zebramap.c:492
int zebra_map_tokenize_next(zebra_map_t zm, const char **result_buf, size_t *result_len, const char **display_buf, size_t *display_len)
Definition zebramap.c:658
int zebra_maps_is_sort(zebra_map_t zm)
Definition zebramap.c:478
int zebra_maps_is_staticrank(zebra_map_t zm)
Definition zebramap.c:471
int zebra_maps_is_icu(zebra_map_t zm)
Definition zebramap.c:741
int zebra_map_tokenize_start(zebra_map_t zm, const char *buf, size_t len)
Definition zebramap.c:702
int zebra_maps_is_complete(zebra_map_t zm)
Definition zebramap.c:450
zebra_map_t zebra_map_get_or_add(zebra_maps_t zms, const char *id)
Definition zebramap.c:364
RecordAttr * rec_init_attr(ZebraExplainInfo zei, Record rec)
Definition zinfo.c:1594
int zebraExplain_curDatabase(ZebraExplainInfo zei, const char *database)
Definition zinfo.c:790
void zebraExplain_recordBytesIncrement(ZebraExplainInfo zei, int adjust_num)
Definition zinfo.c:1563
int zebraExplain_lookup_attr_str(ZebraExplainInfo zei, zinfo_index_category_t cat, const char *index_type, const char *str)
lookup ordinal from string index + index type
Definition zinfo.c:1353
void zebraExplain_addSchema(ZebraExplainInfo zei, Odr_oid *oid)
Definition zinfo.c:1556
void zebraExplain_recordCountIncrement(ZebraExplainInfo zei, int adjust_num)
Definition zinfo.c:1574
int zebraExplain_add_attr_str(ZebraExplainInfo zei, zinfo_index_category_t cat, const char *index_type, const char *index_name)
Definition zinfo.c:1545
zint zebraExplain_runNumberIncrement(ZebraExplainInfo zei, int adjust_num)
Definition zinfo.c:1585
int zebraExplain_get_database_ord(ZebraExplainInfo zei)
Definition zinfo.c:1620
int zebraExplain_ord_adjust_occurrences(ZebraExplainInfo zei, int ord, int term_delta, int doc_delta)
Definition zinfo.c:1435
int zebraExplain_lookup_ord(ZebraExplainInfo zei, int ord, const char **index_type, const char **db, const char **string_index)
Definition zinfo.c:1478
int zebraExplain_newDatabase(ZebraExplainInfo zei, const char *database, int explain_database)
Definition zinfo.c:882
zinfo_index_category_t
Definition zinfo.h:37
@ zinfo_index_category_index
Definition zinfo.h:38
@ zinfo_index_category_alwaysmatches
Definition zinfo.h:40
@ zinfo_index_category_sort
Definition zinfo.h:39