IDZEBRA 2.2.8
mod_dom.c
Go to the documentation of this file.
1/* This file is part of the Zebra server.
2 Copyright (C) Index Data
3
4Zebra is free software; you can redistribute it and/or modify it under
5the terms of the GNU General Public License as published by the Free
6Software Foundation; either version 2, or (at your option) any later
7version.
8
9Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
10WARRANTY; without even the implied warranty of MERCHANTABILITY or
11FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12for more details.
13
14You should have received a copy of the GNU General Public License
15along with this program; if not, write to the Free Software
16Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17
18*/
19
20#if HAVE_CONFIG_H
21#include <config.h>
22#endif
23#include <stdio.h>
24#include <stdlib.h>
25#include <assert.h>
26#include <ctype.h>
27#include <stdarg.h>
28
29#include <yaz/diagbib1.h>
30#include <yaz/tpath.h>
31#include <yaz/snprintf.h>
32
33#include <libxml/xmlversion.h>
34#include <libxml/parser.h>
35#include <libxml/tree.h>
36#include <libxml/xmlIO.h>
37#include <libxml/xmlreader.h>
38#include <libxslt/transform.h>
39#include <libxslt/xsltutils.h>
40
41#if YAZ_HAVE_EXSLT
42#include <libexslt/exslt.h>
43#endif
44
45#include <idzebra/util.h>
46#include <idzebra/recctrl.h>
47#include <yaz/oid_db.h>
48
49/* DOM filter style indexing */
50#define ZEBRA_DOM_NS "http://indexdata.com/zebra-2.0"
51static const char *zebra_dom_ns = ZEBRA_DOM_NS;
52
53/* DOM filter style indexing */
54#define ZEBRA_PI_NAME "zebra-2.0"
55static const char *zebra_pi_name = ZEBRA_PI_NAME;
56
61
63 const char *stylesheet;
64 xsltStylesheetPtr stylesheet_xsp;
65};
66
68 int dummy;
69};
70
71struct convert_s {
73 union {
76 } u;
77 struct convert_s *next;
78};
79
81 const char *name;
83};
84
87};
88
90 const char *name;
91 const char *identifier;
94};
95
96#define DOM_INPUT_XMLREADER 1
97#define DOM_INPUT_MARC 2
99 const char *syntax;
100 const char *name;
102 int type;
103 union {
104 struct {
105 xmlTextReaderPtr reader;
107 } xmlreader;
108 struct {
109 const char *input_charset;
110 yaz_marc_t handle;
111 yaz_iconv_t iconv;
112 } marc;
113 } u;
115};
116
117struct filter_info {
118 char *fname;
119 char *full_name;
120 const char *profile_path;
123 xmlDocPtr doc_config;
129};
130
131
132
133#define XML_STRCMP(a,b) strcmp((char*)a, b)
134#define XML_STRLEN(a) strlen((char*)a)
135
136
137#define FOR_EACH_ELEMENT(ptr) for (; ptr; ptr = ptr->next) if (ptr->type == XML_ELEMENT_NODE)
138
139static void dom_log(int level, struct filter_info *tinfo, xmlNodePtr ptr,
140 const char *fmt, ...)
141#ifdef __GNUC__
142 __attribute__ ((format (printf, 4, 5)))
143#endif
144 ;
145
146static void dom_log(int level, struct filter_info *tinfo, xmlNodePtr ptr,
147 const char *fmt, ...)
148{
149 va_list ap;
150 char buf[4096];
151
152 va_start(ap, fmt);
153 yaz_vsnprintf(buf, sizeof(buf)-1, fmt, ap);
154 if (ptr)
155 {
156 yaz_log(level, "%s:%ld: %s", tinfo->fname ? tinfo->fname : "none",
157 xmlGetLineNo(ptr), buf);
158 }
159 else
160 {
161 yaz_log(level, "%s: %s", tinfo->fname ? tinfo->fname : "none", buf);
162 }
163 va_end(ap);
164}
165
166
167static void set_param_str(const char **params, const char *name,
168 const char *value, NMEM nmem)
169{
170 char *quoted = nmem_malloc(nmem, 3 + strlen(value));
171 yaz_snprintf(quoted, 3 + strlen(value), "'%s'", value);
172 while (*params)
173 params++;
174 params[0] = name;
175 params[1] = quoted;
176 params[2] = 0;
177}
178
179static void set_param_int(const char **params, const char *name,
180 zint value, NMEM nmem)
181{
182 char *quoted = nmem_malloc(nmem, 30); /* 25 digits enough for 2^64 */
183 while (*params)
184 params++;
185 yaz_snprintf(quoted, 30, "'" ZINT_FORMAT "'", value);
186 params[0] = name;
187 params[1] = quoted;
188 params[2] = 0;
189}
190
191static void *filter_init(Res res, RecType recType)
192{
193 struct filter_info *tinfo = (struct filter_info *) xmalloc(sizeof(*tinfo));
194 tinfo->fname = 0;
195 tinfo->full_name = 0;
196 tinfo->profile_path = 0;
197 tinfo->nmem_record = nmem_create();
198 tinfo->nmem_config = nmem_create();
199 tinfo->extract = 0;
200 tinfo->retrieve_list = 0;
201 tinfo->input_list = 0;
202 tinfo->store = 0;
203 tinfo->doc_config = 0;
204 tinfo->record_info_invoked = 0;
205
206#if YAZ_HAVE_EXSLT
207 exsltRegisterAll();
208#endif
209
210 return tinfo;
211}
212
213static int attr_content(struct _xmlAttr *attr, const char *name,
214 const char **dst_content)
215{
216 if (!XML_STRCMP(attr->name, name) && attr->children
217 && attr->children->type == XML_TEXT_NODE)
218 {
219 *dst_content = (const char *)(attr->children->content);
220 return 1;
221 }
222 return 0;
223}
224
225static void destroy_xsp(struct convert_s *c)
226{
227 while (c)
228 {
229 if (c->which == convert_xslt_type)
230 {
231 if (c->u.xslt.stylesheet_xsp)
232 xsltFreeStylesheet(c->u.xslt.stylesheet_xsp);
233 }
234 c = c->next;
235 }
236}
237
238static void destroy_dom(struct filter_info *tinfo)
239{
240 if (tinfo->extract)
241 {
242 destroy_xsp(tinfo->extract->convert);
243 tinfo->extract = 0;
244 }
245 if (tinfo->store)
246 {
247 destroy_xsp(tinfo->store->convert);
248 tinfo->store = 0;
249 }
250 if (tinfo->input_list)
251 {
252 struct filter_input *i_ptr;
253 for (i_ptr = tinfo->input_list; i_ptr; i_ptr = i_ptr->next)
254 {
255 switch(i_ptr->type)
256 {
258 if (i_ptr->u.xmlreader.reader)
259 xmlFreeTextReader(i_ptr->u.xmlreader.reader);
260 break;
261 case DOM_INPUT_MARC:
262 yaz_iconv_close(i_ptr->u.marc.iconv);
263 yaz_marc_destroy(i_ptr->u.marc.handle);
264 break;
265 }
266 destroy_xsp(i_ptr->convert);
267 }
268 tinfo->input_list = 0;
269 }
270 if (tinfo->retrieve_list)
271 {
272 struct filter_retrieve *r_ptr;
273 for (r_ptr = tinfo->retrieve_list; r_ptr; r_ptr = r_ptr->next)
274 destroy_xsp(r_ptr->convert);
275 tinfo->retrieve_list = 0;
276 }
277
278 if (tinfo->doc_config)
279 {
280 xmlFreeDoc(tinfo->doc_config);
281 tinfo->doc_config = 0;
282 }
283 nmem_reset(tinfo->nmem_config);
284}
285
286static ZEBRA_RES parse_convert(struct filter_info *tinfo, xmlNodePtr ptr,
287 struct convert_s **l)
288{
289 *l = 0;
290 FOR_EACH_ELEMENT(ptr) {
291 if (!XML_STRCMP(ptr->name, "xslt"))
292 {
293 struct _xmlAttr *attr;
294 struct convert_s *p = nmem_malloc(tinfo->nmem_config, sizeof(*p));
295
296 p->next = 0;
298 p->u.xslt.stylesheet = 0;
299 p->u.xslt.stylesheet_xsp = 0;
300
301 for (attr = ptr->properties; attr; attr = attr->next)
302 if (attr_content(attr, "stylesheet", &p->u.xslt.stylesheet))
303 ;
304 else
305 {
306 dom_log(YLOG_WARN, tinfo, ptr,
307 "bad attribute @%s", attr->name);
308 }
309 if (p->u.xslt.stylesheet)
310 {
311 char tmp_xslt_full_name[1024];
312 if (!yaz_filepath_resolve(p->u.xslt.stylesheet,
313 tinfo->profile_path,
314 NULL,
315 tmp_xslt_full_name))
316 {
317 dom_log(YLOG_WARN, tinfo, 0,
318 "stylesheet %s not found in "
319 "path %s",
320 p->u.xslt.stylesheet,
321 tinfo->profile_path);
322 return ZEBRA_FAIL;
323 }
324
326 = xsltParseStylesheetFile((const xmlChar*)
327 tmp_xslt_full_name);
328 if (!p->u.xslt.stylesheet_xsp)
329 {
330 dom_log(YLOG_WARN, tinfo, 0,
331 "could not parse xslt stylesheet %s",
332 tmp_xslt_full_name);
333 return ZEBRA_FAIL;
334 }
335 }
336 else
337 {
338 dom_log(YLOG_WARN, tinfo, ptr,
339 "missing attribute 'stylesheet'");
340 return ZEBRA_FAIL;
341 }
342 *l = p;
343 l = &p->next;
344 }
345 else if (!XML_STRCMP(ptr->name, "process-meta"))
346 {
347 struct _xmlAttr *attr;
348 struct convert_s *p = nmem_malloc(tinfo->nmem_config, sizeof(*p));
349
350 p->next = 0;
352
353 for (attr = ptr->properties; attr; attr = attr->next)
354 dom_log(YLOG_WARN, tinfo, ptr,
355 "bad attribute @%s", attr->name);
356 *l = p;
357 l = &p->next;
358 }
359 else
360 {
361 dom_log(YLOG_WARN, tinfo, ptr,
362 "bad element '%s', expected <xslt>", ptr->name);
363 return ZEBRA_FAIL;
364 }
365 }
366 return ZEBRA_OK;
367}
368
369static int process_meta(struct filter_info *tinfo, xmlDocPtr doc, xmlNodePtr node,
370 struct recRetrieveCtrl *retctr)
371{
372
373 if (node->type == XML_ELEMENT_NODE && node->ns && node->ns->href &&
374 0 == XML_STRCMP(node->ns->href, zebra_dom_ns))
375 {
376 if (0 == XML_STRCMP(node->name, "meta"))
377 {
378 const char *element_set_name = 0;
379
380 struct _xmlAttr *attr;
381 for (attr = node->properties; attr; attr = attr->next)
382 {
383 if (attr_content(attr, "name", &element_set_name))
384 ;
385 else
386 {
387 dom_log(YLOG_WARN, tinfo, node,
388 "bad attribute @%s, expected @name", attr->name);
389 }
390 }
391 if (element_set_name)
392 {
393 WRBUF result = wrbuf_alloc();
394 WRBUF addinfo = wrbuf_alloc();
395 const Odr_oid *input_format = yaz_oid_recsyn_xml;
396 const Odr_oid *output_format = 0;
397 int ret;
398
399 ret = retctr->special_fetch(retctr->handle,
400 element_set_name,
401 input_format, &output_format,
402 result, addinfo);
403 if (ret == 0)
404 {
405 xmlDocPtr sub_doc =
406 xmlParseMemory(wrbuf_buf(result), wrbuf_len(result));
407 if (sub_doc)
408 {
409 xmlNodePtr t = xmlDocGetRootElement(sub_doc);
410 xmlReplaceNode(node, xmlCopyNode(t, 1));
411 xmlFreeDoc(sub_doc);
412 }
413 }
414 wrbuf_destroy(result);
415 wrbuf_destroy(addinfo);
416 }
417 }
418 }
419 for (node = node->children; node; node = node->next)
420 process_meta(tinfo, doc, node, retctr);
421 return 0;
422}
423
425 struct recExtractCtrl *extctr,
426 struct recRetrieveCtrl *retctr,
427 struct convert_s *convert,
428 const char **params,
429 xmlDocPtr *doc,
430 xsltStylesheetPtr *last_xsp)
431{
432 for (; convert; convert = convert->next)
433 {
434 if (convert->which == convert_xslt_type)
435 {
436 xmlChar *buf_out = 0;
437 int len_out = 0;
438 xmlDocPtr res_doc = xsltApplyStylesheet(convert->u.xslt.stylesheet_xsp,
439 *doc, params);
440 if (last_xsp)
441 *last_xsp = convert->u.xslt.stylesheet_xsp;
442
443 if (!res_doc)
444 break;
445
446 /* now saving into buffer and re-reading into DOM to avoid annoing
447 XSLT problem with thrown-out indentation text nodes */
448 xsltSaveResultToString(&buf_out, &len_out, res_doc,
449 convert->u.xslt.stylesheet_xsp);
450 xmlFreeDoc(res_doc);
451
452 xmlFreeDoc(*doc);
453
454 *doc = xmlParseMemory((const char *) buf_out, len_out);
455
456 /* writing debug info out */
457 if (extctr && extctr->flagShowRecords)
458 yaz_log(YLOG_LOG, "%s: XSLT %s\n %.*s",
459 tinfo->fname ? tinfo->fname : "(none)",
460 convert->u.xslt.stylesheet,
461 len_out, buf_out);
462
463 xmlFree(buf_out);
464 }
465 else if (convert->which == convert_meta_type)
466 {
467 if (retctr) /* only execute meta on retrieval */
468 {
469 process_meta(tinfo, *doc, xmlDocGetRootElement(*doc), retctr);
470
471 /* last stylesheet absent */
472 if (last_xsp)
473 *last_xsp = 0;
474 }
475 }
476 }
477 return ZEBRA_OK;
478}
479
480static struct filter_input *new_input(struct filter_info *tinfo, int type)
481{
482 struct filter_input *p;
483 struct filter_input **np = &tinfo->input_list;
484 for (;*np; np = &(*np)->next)
485 ;
486 p = *np = nmem_malloc(tinfo->nmem_config, sizeof(*p));
487 p->next = 0;
488 p->syntax = 0;
489 p->name = 0;
490 p->convert = 0;
491 p->type = type;
492 return p;
493}
494
495static ZEBRA_RES parse_input(struct filter_info *tinfo, xmlNodePtr ptr,
496 const char *syntax, const char *name)
497{
498 FOR_EACH_ELEMENT(ptr) {
499 if (!XML_STRCMP(ptr->name, "marc"))
500 {
501 yaz_iconv_t iconv = 0;
502 const char *input_charset = "marc-8";
503 struct _xmlAttr *attr;
504
505 for (attr = ptr->properties; attr; attr = attr->next)
506 {
507 if (attr_content(attr, "inputcharset", &input_charset))
508 ;
509 else
510 {
511 dom_log(YLOG_WARN, tinfo, ptr,
512 "bad attribute @%s, expected @inputcharset",
513 attr->name);
514 }
515 }
516 iconv = yaz_iconv_open("utf-8", input_charset);
517 if (!iconv)
518 {
519 dom_log(YLOG_WARN, tinfo, ptr,
520 "unsupported @charset '%s'", input_charset);
521 return ZEBRA_FAIL;
522 }
523 else
524 {
525 struct filter_input *p
526 = new_input(tinfo, DOM_INPUT_MARC);
527 p->u.marc.handle = yaz_marc_create();
528 p->u.marc.iconv = iconv;
529
530 yaz_marc_iconv(p->u.marc.handle, p->u.marc.iconv);
531
532 ptr = ptr->next;
533
534 parse_convert(tinfo, ptr, &p->convert);
535 }
536 break;
537
538 }
539 else if (!XML_STRCMP(ptr->name, "xmlreader"))
540 {
541 struct filter_input *p
543 struct _xmlAttr *attr;
544 const char *level_str = 0;
545
546 p->u.xmlreader.split_level = 0;
547 p->u.xmlreader.reader = 0;
548
549 for (attr = ptr->properties; attr; attr = attr->next)
550 {
551 if (attr_content(attr, "level", &level_str))
552 ;
553 else
554 {
555 dom_log(YLOG_WARN, tinfo, ptr,
556 "bad attribute @%s, expected @level",
557 attr->name);
558 }
559 }
560 if (level_str)
561 p->u.xmlreader.split_level = atoi(level_str);
562
563 ptr = ptr->next;
564
565 parse_convert(tinfo, ptr, &p->convert);
566 break;
567 }
568 else
569 {
570 dom_log(YLOG_WARN, tinfo, ptr,
571 "bad element <%s>, expected <marc>|<xmlreader>",
572 ptr->name);
573 return ZEBRA_FAIL;
574 }
575 }
576 return ZEBRA_OK;
577}
578
579static ZEBRA_RES parse_dom(struct filter_info *tinfo, const char *fname)
580{
581 char tmp_full_name[1024];
582 xmlNodePtr ptr;
583 xmlDocPtr doc;
584
585 tinfo->fname = nmem_strdup(tinfo->nmem_config, fname);
586
587 if (yaz_filepath_resolve(tinfo->fname, tinfo->profile_path,
588 NULL, tmp_full_name))
589 tinfo->full_name = nmem_strdup(tinfo->nmem_config, tmp_full_name);
590 else
591 tinfo->full_name = nmem_strdup(tinfo->nmem_config, tinfo->fname);
592
593 yaz_log(YLOG_LOG, "%s dom filter: "
594 "loading config file %s", tinfo->fname, tinfo->full_name);
595
596 doc = xmlParseFile(tinfo->full_name);
597 if (!doc)
598 {
599 yaz_log(YLOG_WARN, "%s: dom filter: "
600 "failed to parse config file %s",
601 tinfo->fname, tinfo->full_name);
602 return ZEBRA_FAIL;
603 }
604 /* save because we store ptrs to the content */
605 tinfo->doc_config = doc;
606
607 ptr = xmlDocGetRootElement(doc);
608 if (!ptr || ptr->type != XML_ELEMENT_NODE
609 || XML_STRCMP(ptr->name, "dom"))
610 {
611 dom_log(YLOG_WARN, tinfo, ptr,
612 "bad root element <%s>, expected root element <dom>",
613 ptr->name);
614 return ZEBRA_FAIL;
615 }
616
617 ptr = ptr->children;
618 FOR_EACH_ELEMENT(ptr) {
619 if (!XML_STRCMP(ptr->name, "extract"))
620 {
621 /*
622 <extract name="index">
623 <xslt stylesheet="first.xsl"/>
624 <xslt stylesheet="second.xsl"/>
625 </extract>
626 */
627 struct _xmlAttr *attr;
628 struct filter_extract *f =
629 nmem_malloc(tinfo->nmem_config, sizeof(*f));
630
631 tinfo->extract = f;
632 f->name = 0;
633 f->convert = 0;
634 for (attr = ptr->properties; attr; attr = attr->next)
635 {
636 if (attr_content(attr, "name", &f->name))
637 ;
638 else
639 {
640 dom_log(YLOG_WARN, tinfo, ptr,
641 "bad attribute @%s, expected @name",
642 attr->name);
643 }
644 }
645 parse_convert(tinfo, ptr->children, &f->convert);
646 }
647 else if (!XML_STRCMP(ptr->name, "retrieve"))
648 {
649 /*
650 <retrieve name="F">
651 <xslt stylesheet="some.xsl"/>
652 <xslt stylesheet="some.xsl"/>
653 </retrieve>
654 */
655 struct _xmlAttr *attr;
656 struct filter_retrieve **fp = &tinfo->retrieve_list;
657 struct filter_retrieve *f =
658 nmem_malloc(tinfo->nmem_config, sizeof(*f));
659
660 while (*fp)
661 fp = &(*fp)->next;
662
663 *fp = f;
664 f->name = 0;
665 f->identifier = 0;
666 f->convert = 0;
667 f->next = 0;
668
669 for (attr = ptr->properties; attr; attr = attr->next)
670 {
671 if (attr_content(attr, "identifier",
672 &f->identifier))
673 ;
674 else if (attr_content(attr, "name", &f->name))
675 ;
676 else
677 {
678 dom_log(YLOG_WARN, tinfo, ptr,
679 "bad attribute @%s, expected @identifier|@name",
680 attr->name);
681 }
682 }
683 parse_convert(tinfo, ptr->children, &f->convert);
684 }
685 else if (!XML_STRCMP(ptr->name, "store"))
686 {
687 /*
688 <store name="F">
689 <xslt stylesheet="some.xsl"/>
690 <xslt stylesheet="some.xsl"/>
691 </retrieve>
692 */
693 struct filter_store *f =
694 nmem_malloc(tinfo->nmem_config, sizeof(*f));
695
696 tinfo->store = f;
697 f->convert = 0;
698 parse_convert(tinfo, ptr->children, &f->convert);
699 }
700 else if (!XML_STRCMP(ptr->name, "input"))
701 {
702 /*
703 <input syntax="xml">
704 <xmlreader level="1"/>
705 </input>
706 <input syntax="usmarc">
707 <marc inputcharset="marc-8"/>
708 </input>
709 */
710 struct _xmlAttr *attr;
711 const char *syntax = 0;
712 const char *name = 0;
713 for (attr = ptr->properties; attr; attr = attr->next)
714 {
715 if (attr_content(attr, "syntax", &syntax))
716 ;
717 else if (attr_content(attr, "name", &name))
718 ;
719 else
720 {
721 dom_log(YLOG_WARN, tinfo, ptr,
722 "bad attribute @%s, expected @syntax|@name",
723 attr->name);
724 }
725 }
726 parse_input(tinfo, ptr->children, syntax, name);
727 }
728 else
729 {
730 dom_log(YLOG_WARN, tinfo, ptr,
731 "bad element <%s>, "
732 "expected <extract>|<input>|<retrieve>|<store>",
733 ptr->name);
734 return ZEBRA_FAIL;
735 }
736 }
737 if (!tinfo->input_list)
738 {
739 struct filter_input *p
741 p->u.xmlreader.split_level = 0;
742 p->u.xmlreader.reader = 0;
743 }
744 return ZEBRA_OK;
745}
746
747static struct filter_retrieve *lookup_retrieve(struct filter_info *tinfo,
748 const char *est)
749{
750 struct filter_retrieve *f = tinfo->retrieve_list;
751
752 /* return first schema if no est is provided */
753 if (!est)
754 return f;
755 for (; f; f = f->next)
756 {
757 /* find requested schema */
758 if (est)
759 {
760 if (f->identifier && !strcmp(f->identifier, est))
761 return f;
762 if (f->name && !strcmp(f->name, est))
763 return f;
764 }
765 }
766 return 0;
767}
768
769static ZEBRA_RES filter_config(void *clientData, Res res, const char *args)
770{
771 struct filter_info *tinfo = clientData;
772 if (!args || !*args)
773 {
774 yaz_log(YLOG_WARN, "dom filter: need config file");
775 return ZEBRA_FAIL;
776 }
777
778 if (tinfo->fname && !strcmp(args, tinfo->fname))
779 return ZEBRA_OK;
780
781 tinfo->profile_path = res_get(res, "profilePath");
782
783 destroy_dom(tinfo);
784 return parse_dom(tinfo, args);
785}
786
787static void filter_destroy(void *clientData)
788{
789 struct filter_info *tinfo = clientData;
790 destroy_dom(tinfo);
791 nmem_destroy(tinfo->nmem_config);
792 nmem_destroy(tinfo->nmem_record);
793 xfree(tinfo);
794}
795
796static int ioread_ex(void *context, char *buffer, int len)
797{
798 struct recExtractCtrl *p = context;
799 return p->stream->readf(p->stream, buffer, len);
800}
801
802static int ioclose_ex(void *context)
803{
804 return 0;
805}
806
807
808
809/* DOM filter style indexing */
810static void index_value_of(struct filter_info *tinfo,
811 struct recExtractCtrl *extctr,
812 RecWord* recword,
813 xmlNodePtr node,
814 const char *index_p)
815{
816 if (tinfo->record_info_invoked == 1)
817 {
818 xmlChar *text = xmlNodeGetContent(node);
819 size_t text_len = strlen((const char *)text);
820
821 /* if there is no text, we do not need to proceed */
822 if (text_len)
823 {
824 /* keep seqno base so that all text will have
825 identical seqno's for multiple fields , e.g
826 <z:index name="title:w any:w title:p">.. */
827
828 zint seqno_base = recword->seqno;
829 zint seqno_max = recword->seqno;
830
831
832 const char *look = index_p;
833 const char *bval;
834 const char *eval;
835
836 xmlChar index[256];
837 xmlChar type[256];
838
839 /* assingning text to be indexed */
840 recword->term_buf = (const char *)text;
841 recword->term_len = text_len;
842
843 /* parsing all index name/type pairs */
844 /* may not start with ' ' or ':' */
845 while (*look && ' ' != *look && ':' != *look)
846 {
847 /* setting name and type to zero */
848 *index = '\0';
849 *type = '\0';
850
851 /* parsing one index name */
852 bval = look;
853 while (*look && ':' != *look && ' ' != *look)
854 {
855 look++;
856 }
857 eval = look;
858 strncpy((char *)index, (const char *)bval, eval - bval);
859 index[eval - bval] = '\0';
860
861
862 /* parsing one index type, if existing */
863 if (':' == *look)
864 {
865 look++;
866
867 bval = look;
868 while (*look && ' ' != *look)
869 {
870 look++;
871 }
872 eval = look;
873 strncpy((char *)type, (const char *)bval, eval - bval);
874 type[eval - bval] = '\0';
875 }
876
877 /* actually indexing the text given */
878
879 recword->seqno = seqno_base;
880 recword->index_name = (const char *)index;
881 if (*type)
882 recword->index_type = (const char *) type;
883
884 /* writing debug out */
885 if (extctr->flagShowRecords)
886 dom_log(YLOG_LOG, tinfo, 0,
887 "INDEX '%s:%s' '%s'",
888 (const char *) index,
889 (const char *) type,
890 (const char *) text);
891
892 (extctr->tokenAdd)(recword);
893
894 if (seqno_max < recword->seqno)
895 seqno_max = recword->seqno;
896
897 /* eat whitespaces */
898 if (*look && ' ' == *look)
899 {
900 look++;
901 }
902 }
903 recword->seqno = seqno_max;
904 }
905 xmlFree(text);
906 }
907}
908
909
910/* DOM filter style indexing */
911static void set_record_info(struct filter_info *tinfo,
912 struct recExtractCtrl *extctr,
913 xmlNodePtr node,
914 const char * id_p,
915 const char * rank_p,
916 const char * type_p)
917{
918 /* writing debug info out */
919 if (extctr && extctr->flagShowRecords)
920 dom_log(YLOG_LOG, tinfo, node,
921 "RECORD id=%s rank=%s type=%s",
922 id_p ? (const char *) id_p : "(null)",
923 rank_p ? (const char *) rank_p : "(null)",
924 type_p ? (const char *) type_p : "(null)");
925
926
927 if (id_p && *id_p)
928 {
929 size_t l = strlen(id_p);
930 if (l >= sizeof(extctr->match_criteria))
931 l = sizeof(extctr->match_criteria)-1;
932 memcpy(extctr->match_criteria, id_p, l);
933 extctr->match_criteria[l] = '\0';
934 }
935
936 if (rank_p && *rank_p)
937 extctr->staticrank = atozint((const char *)rank_p);
938
939 if (type_p && *type_p)
940 {
942 if (!strcmp(type_p, "insert"))
944 else if (!strcmp(type_p, "delete"))
946 else if (!strcmp(type_p, "replace"))
948 else if (!strcmp(type_p, "update"))
950 else if (!strcmp(type_p, "adelete"))
952 else
953 dom_log(YLOG_WARN, tinfo, node, "bad @type value: %s", type_p);
954 extctr->action = action;
955 }
956
957 if (tinfo->record_info_invoked == 1)
958 {
959 /* warn about multiple only once */
960 dom_log(YLOG_WARN, tinfo, node, "multiple record elements");
961 }
962 tinfo->record_info_invoked++;
963
964}
965
966
967static void process_xml_element_node(struct filter_info *tinfo,
968 struct recExtractCtrl *extctr,
969 RecWord* recword,
970 xmlNodePtr node);
971
972/* DOM filter style indexing */
974 struct recExtractCtrl *extctr,
975 RecWord* recword,
976 xmlNodePtr node)
977{
978 if (node->type == XML_ELEMENT_NODE && node->ns && node->ns->href
979 && 0 == XML_STRCMP(node->ns->href, zebra_dom_ns))
980 {
981 if (0 == XML_STRCMP(node->name, "index"))
982 {
983 const char *index_p = 0;
984
985 struct _xmlAttr *attr;
986 for (attr = node->properties; attr; attr = attr->next)
987 {
988 if (attr_content(attr, "name", &index_p))
989 {
990 index_value_of(tinfo, extctr, recword, node, index_p);
991 }
992 else
993 {
994 dom_log(YLOG_WARN, tinfo, node,
995 "bad attribute @%s, expected @name",
996 attr->name);
997 }
998 }
999 }
1000 else if (0 == XML_STRCMP(node->name, "group"))
1001 {
1002 const char *unit_p = "element";
1003
1004 struct _xmlAttr *attr;
1005 for (attr = node->properties; attr; attr = attr->next)
1006 {
1007 if (attr_content(attr, "unit", &unit_p))
1008 ;
1009 else
1010 {
1011 dom_log(YLOG_WARN, tinfo, node,
1012 "bad attribute @%s, expected @unit",
1013 attr->name);
1014 }
1015 }
1016 if (node->children)
1017 {
1018 WRBUF w = wrbuf_alloc();
1019 wrbuf_puts(w, ZEBRA_GROUP_INDEX_NAME);
1020 wrbuf_puts(w, unit_p);
1021 recword->term_buf = "begin";
1022 recword->term_len = 5;
1023 recword->index_name = wrbuf_cstr(w);
1024 recword->index_type = "0";
1025
1026 if (extctr->flagShowRecords)
1027 dom_log(YLOG_LOG, tinfo, 0,
1028 "INDEX '%s:%s' '%s'",
1029 (const char *) recword->index_name,
1030 (const char *) recword->index_type,
1031 (const char *) recword->term_buf);
1032 (extctr->tokenAdd)(recword);
1033
1034 for (node = node->children; node; node = node->next)
1035 process_xml_element_node(tinfo, extctr, recword,
1036 node);
1037 recword->term_buf = "end";
1038 recword->term_len = 3;
1039 recword->index_name = wrbuf_cstr(w);
1040 recword->index_type = "0";
1041 if (extctr->flagShowRecords)
1042 dom_log(YLOG_LOG, tinfo, 0,
1043 "INDEX '%s:%s' '%s'",
1044 (const char *) recword->index_name,
1045 (const char *) recword->index_type,
1046 (const char *) recword->term_buf);
1047 (extctr->tokenAdd)(recword);
1048 wrbuf_destroy(w);
1049 }
1050 }
1051 else if (0 == XML_STRCMP(node->name, "record"))
1052 {
1053 const char *id_p = 0;
1054 const char *rank_p = 0;
1055 const char *type_p = 0;
1056
1057 struct _xmlAttr *attr;
1058 for (attr = node->properties; attr; attr = attr->next)
1059 {
1060 if (attr_content(attr, "id", &id_p))
1061 ;
1062 else if (attr_content(attr, "rank", &rank_p))
1063 ;
1064 else if (attr_content(attr, "type", &type_p))
1065 ;
1066 else
1067 {
1068 dom_log(YLOG_WARN, tinfo, node,
1069 "bad attribute @%s, expected @id|@rank|@type",
1070 attr->name);
1071 }
1072 }
1073 set_record_info(tinfo, extctr, node, id_p, rank_p, type_p);
1074 }
1075 else
1076 {
1077 dom_log(YLOG_WARN, tinfo, node,
1078 "bad element <%s>,"
1079 " expected <record>|<index> in namespace '%s'",
1080 node->name, zebra_dom_ns);
1081 }
1082 }
1083}
1084
1085static int attr_content_pi(const char **c_ptr, const char *name,
1086 char *value, size_t value_max)
1087{
1088 size_t name_len = strlen(name);
1089 const char *look = *c_ptr;
1090 int ret = 0;
1091
1092 if (strlen(look) > name_len)
1093 {
1094 if (look[name_len] == '=' && !memcmp(look, name, name_len))
1095 {
1096 size_t i = 0;
1097 look += name_len+1;
1098 while (*look && ' ' != *look)
1099 {
1100 if (i < value_max-1)
1101 value[i++] = *look;
1102 look++;
1103 }
1104 value[i] = '\0';
1105 ret = 1;
1106 }
1107 }
1108 *c_ptr = look;
1109 return ret;
1110}
1111
1112/* DOM filter style indexing */
1113static void process_xml_pi_node(struct filter_info *tinfo,
1114 struct recExtractCtrl *extctr,
1115 xmlNodePtr node,
1116 const char **index_pp)
1117{
1118 /* if right PI name, continue parsing PI */
1119 if (0 == strcmp(zebra_pi_name, (const char *)node->name))
1120 {
1121 xmlChar *pi_p = node->content;
1122 const char *look = (const char *) node->content;
1123
1124 /* parsing PI record instructions */
1125 if (0 == strncmp((const char *)look, "record", 6))
1126 {
1127 char id[256];
1128 char rank[256];
1129 char type[256];
1130
1131 *id = '\0';
1132 *rank = '\0';
1133 *type = '\0';
1134 look += 6;
1135 for (;;)
1136 {
1137 /* eat whitespace */
1138 while (' ' == *look)
1139 look++;
1140 if (*look == '\0')
1141 break;
1142 if (attr_content_pi(&look, "id", id, sizeof(id)))
1143 ;
1144 else if (attr_content_pi(&look, "rank", rank, sizeof(rank)))
1145 ;
1146 else if (attr_content_pi(&look, "type", type, sizeof(type)))
1147 ;
1148 else
1149 {
1150 dom_log(YLOG_WARN, tinfo, node,
1151 "content '%s', can not parse '%s'",
1152 pi_p, look);
1153 break;
1154 }
1155 }
1156 set_record_info(tinfo, extctr, node, id, rank, type);
1157 }
1158 /* parsing index instruction */
1159 else if (0 == strncmp((const char *)look, "index", 5))
1160 {
1161 look += 5;
1162
1163 /* eat whitespace */
1164 while (*look && ' ' == *look)
1165 look++;
1166
1167 /* export index instructions to outside */
1168 *index_pp = look;
1169 }
1170 else
1171 {
1172 dom_log(YLOG_WARN, tinfo, node,
1173 "content '%s', can not parse '%s'",
1174 pi_p, look);
1175 }
1176 }
1177}
1178
1179/* DOM filter style indexing */
1180static void process_xml_element_node(struct filter_info *tinfo,
1181 struct recExtractCtrl *extctr,
1182 RecWord* recword,
1183 xmlNodePtr node)
1184{
1185 /* remember indexing instruction from PI to next element node */
1186 const char *index_p = 0;
1187
1188 /* check if we are an element node in the special zebra namespace
1189 and either set record data or index value-of node content*/
1190 process_xml_element_zebra_node(tinfo, extctr, recword, node);
1191
1192 /* loop through kid nodes */
1193 for (node = node->children; node; node = node->next)
1194 {
1195 /* check and set PI record and index index instructions */
1196 if (node->type == XML_PI_NODE)
1197 {
1198 process_xml_pi_node(tinfo, extctr, node, &index_p);
1199 }
1200 else if (node->type == XML_ELEMENT_NODE)
1201 {
1202 /* if there was a PI index instruction before this element */
1203 if (index_p)
1204 {
1205 index_value_of(tinfo, extctr, recword, node, index_p);
1206 index_p = 0;
1207 }
1208 process_xml_element_node(tinfo, extctr, recword,node);
1209 }
1210 else
1211 continue;
1212 }
1213}
1214
1215
1216/* DOM filter style indexing */
1217static void extract_dom_doc_node(struct filter_info *tinfo,
1218 struct recExtractCtrl *extctr,
1219 xmlDocPtr doc)
1220{
1221 /* only need to do the initialization once, reuse recword for all terms */
1222 RecWord recword;
1223 (*extctr->init)(extctr, &recword);
1224
1225 process_xml_element_node(tinfo, extctr, &recword, (xmlNodePtr)doc);
1226}
1227
1228
1229static int convert_extract_doc(struct filter_info *tinfo,
1230 struct filter_input *input,
1231 struct recExtractCtrl *p,
1232 xmlDocPtr doc)
1233{
1234 xmlChar *buf_out;
1235 int len_out;
1236 const char *params[10];
1237 xsltStylesheetPtr last_xsp = 0;
1238
1239 /* per default do not ingest record */
1240 tinfo->record_info_invoked = 0;
1241
1242 /* exit if empty document given */
1243 if (!doc)
1244 return RECCTRL_EXTRACT_SKIP;
1245
1246 /* we actuallu have a document which needs to be processed further */
1247 params[0] = 0;
1248 set_param_str(params, "schema", zebra_dom_ns, tinfo->nmem_record);
1249
1250 if (p && p->flagShowRecords)
1251 {
1252 xmlChar *buf_out;
1253 int len_out;
1254 xmlDocDumpMemory(doc, &buf_out, &len_out);
1255#if 0
1256 FILE *outf = fopen("extract.xml", "w");
1257 fwrite(buf_out, 1, len_out, outf);
1258 fclose(outf);
1259#endif
1260 yaz_log(YLOG_LOG, "Extract Doc: %.*s", len_out, buf_out);
1261 }
1262
1263 if (p->setStoreData)
1264 {
1265 xmlDocPtr store_doc = 0;
1266
1267 /* input conversion */
1268 perform_convert(tinfo, p, 0, input->convert, params, &doc, 0);
1269
1270 if (tinfo->store)
1271 {
1272 /* store conversion */
1273 store_doc = xmlCopyDoc(doc, 1);
1274 perform_convert(tinfo, p, 0, tinfo->store->convert,
1275 params, &store_doc, &last_xsp);
1276 }
1277
1278 /* saving either store doc or original doc in case no store doc exists */
1279 if (last_xsp)
1280 xsltSaveResultToString(&buf_out, &len_out,
1281 store_doc ? store_doc : doc, last_xsp);
1282 else
1283 xmlDocDumpMemory(store_doc ? store_doc : doc, &buf_out, &len_out);
1284
1285 if (p->setStoreData)
1286 (*p->setStoreData)(p, buf_out, len_out);
1287 xmlFree(buf_out);
1288 if (store_doc)
1289 xmlFreeDoc(store_doc);
1290 }
1291
1292
1293 /* extract conversion */
1294 perform_convert(tinfo, p, 0, tinfo->extract->convert, params, &doc, 0);
1295
1296
1297 /* finally, do the indexing */
1298 if (doc){
1299 extract_dom_doc_node(tinfo, p, doc);
1300 xmlFreeDoc(doc);
1301 }
1302
1303 /* there was nothing to index, so there is no inserted/updated record */
1304 if (tinfo->record_info_invoked == 0)
1305 return RECCTRL_EXTRACT_SKIP;
1306
1307 return RECCTRL_EXTRACT_OK;
1308}
1309
1310static int extract_xml_split(struct filter_info *tinfo,
1311 struct filter_input *input,
1312 struct recExtractCtrl *p)
1313{
1314 int ret;
1315
1316 if (p->first_record)
1317 {
1318 if (input->u.xmlreader.reader)
1319 xmlFreeTextReader(input->u.xmlreader.reader);
1320 input->u.xmlreader.reader = xmlReaderForIO(ioread_ex, ioclose_ex,
1321 p /* I/O handler */,
1322 0 /* URL */,
1323 0 /* encoding */,
1324 XML_PARSE_XINCLUDE
1325 | XML_PARSE_NOENT
1326 | XML_PARSE_NONET);
1327 }
1328 if (!input->u.xmlreader.reader)
1330
1331 ret = xmlTextReaderRead(input->u.xmlreader.reader);
1332 while (ret == 1)
1333 {
1334 int type = xmlTextReaderNodeType(input->u.xmlreader.reader);
1335 int depth = xmlTextReaderDepth(input->u.xmlreader.reader);
1336
1337 if (type == XML_READER_TYPE_ELEMENT &&
1338 input->u.xmlreader.split_level == depth)
1339 {
1340 xmlNodePtr ptr;
1341
1342 /* per default do not ingest record */
1343 tinfo->record_info_invoked = 0;
1344
1345 ptr = xmlTextReaderExpand(input->u.xmlreader.reader);
1346 if (ptr)
1347 {
1348 /* we have a new document */
1349
1350 xmlNodePtr ptr2 = xmlCopyNode(ptr, 1);
1351 xmlDocPtr doc = xmlNewDoc((const xmlChar*) "1.0");
1352
1353 xmlDocSetRootElement(doc, ptr2);
1354
1355 /* writing debug info out */
1356 if (p->flagShowRecords)
1357 {
1358 xmlChar *buf_out = 0;
1359 int len_out = 0;
1360 xmlDocDumpMemory(doc, &buf_out, &len_out);
1361 yaz_log(YLOG_LOG, "%s: XMLREADER level: %i\n%.*s",
1362 tinfo->fname ? tinfo->fname : "(none)",
1363 depth, len_out, buf_out);
1364 xmlFree(buf_out);
1365 }
1366
1367 return convert_extract_doc(tinfo, input, p, doc);
1368 }
1369 else
1370 {
1371 xmlFreeTextReader(input->u.xmlreader.reader);
1372 input->u.xmlreader.reader = 0;
1374 }
1375 }
1376 ret = xmlTextReaderRead(input->u.xmlreader.reader);
1377 }
1378 xmlFreeTextReader(input->u.xmlreader.reader);
1379 input->u.xmlreader.reader = 0;
1380 return RECCTRL_EXTRACT_EOF;
1381}
1382
1383static int extract_xml_full(struct filter_info *tinfo,
1384 struct filter_input *input,
1385 struct recExtractCtrl *p)
1386{
1387 if (p->first_record) /* only one record per stream */
1388 {
1389 xmlDocPtr doc = xmlReadIO(ioread_ex, ioclose_ex,
1390 p /* I/O handler */,
1391 0 /* URL */,
1392 0 /* encoding */,
1393 XML_PARSE_XINCLUDE
1394 | XML_PARSE_NOENT
1395 | XML_PARSE_NONET);
1396 if (!doc)
1397 {
1399 }
1400 return convert_extract_doc(tinfo, input, p, doc);
1401 }
1402 else
1403 return RECCTRL_EXTRACT_EOF;
1404}
1405
1406static int extract_iso2709(struct filter_info *tinfo,
1407 struct filter_input *input,
1408 struct recExtractCtrl *p)
1409{
1410 char buf[100000];
1411 int record_length;
1412 int read_bytes, r;
1413
1414 if (p->stream->readf(p->stream, buf, 5) != 5)
1415 return RECCTRL_EXTRACT_EOF;
1416 while (*buf < '0' || *buf > '9')
1417 {
1418 int i;
1419
1420 dom_log(YLOG_WARN, tinfo, 0,
1421 "MARC: Skipping bad byte %d (0x%02X)",
1422 *buf & 0xff, *buf & 0xff);
1423 for (i = 0; i < 4; i++)
1424 buf[i] = buf[i+1];
1425
1426 if (p->stream->readf(p->stream, buf+4, 1) != 1)
1427 return RECCTRL_EXTRACT_EOF;
1428 }
1429 record_length = atoi_n (buf, 5);
1430 if (record_length < 25)
1431 {
1432 dom_log(YLOG_WARN, tinfo, 0,
1433 "MARC record length < 25, is %d", record_length);
1435 }
1436 read_bytes = p->stream->readf(p->stream, buf+5, record_length-5);
1437 if (read_bytes < record_length-5)
1438 {
1439 dom_log(YLOG_WARN, tinfo, 0,
1440 "couldn't read whole MARC record");
1442 }
1443 r = yaz_marc_read_iso2709(input->u.marc.handle, buf, record_length);
1444 if (r < record_length)
1445 {
1446 dom_log (YLOG_WARN, tinfo, 0,
1447 "parsing of MARC record failed r=%d length=%d",
1448 r, record_length);
1450 }
1451 else
1452 {
1453 xmlDocPtr rdoc;
1454 xmlNode *root_ptr;
1455 yaz_marc_write_xml(input->u.marc.handle, &root_ptr,
1456 "http://www.loc.gov/MARC21/slim", 0, 0);
1457 rdoc = xmlNewDoc((const xmlChar*) "1.0");
1458 xmlDocSetRootElement(rdoc, root_ptr);
1459 return convert_extract_doc(tinfo, input, p, rdoc);
1460 }
1461 return RECCTRL_EXTRACT_OK;
1462}
1463
1464static int filter_extract(void *clientData, struct recExtractCtrl *p)
1465{
1466 struct filter_info *tinfo = clientData;
1467 struct filter_input *input = tinfo->input_list;
1468
1469 if (!input)
1471
1472 nmem_reset(tinfo->nmem_record);
1473
1474 if (p->setStoreData == 0)
1475 return extract_xml_full(tinfo, input, p);
1476 switch(input->type)
1477 {
1479 if (input->u.xmlreader.split_level == 0)
1480 return extract_xml_full(tinfo, input, p);
1481 else
1482 return extract_xml_split(tinfo, input, p);
1483 break;
1484 case DOM_INPUT_MARC:
1485 return extract_iso2709(tinfo, input, p);
1486 }
1488}
1489
1490static int ioread_ret(void *context, char *buffer, int len)
1491{
1492 struct recRetrieveCtrl *p = context;
1493 int r = p->stream->readf(p->stream, buffer, len);
1494 return r;
1495}
1496
1497static int ioclose_ret(void *context)
1498{
1499 return 0;
1500}
1501
1502static int filter_retrieve(void *clientData, struct recRetrieveCtrl *p)
1503{
1504 /* const char *esn = zebra_dom_ns; */
1505 const char *esn = 0;
1506 const char *params[32];
1507 struct filter_info *tinfo = clientData;
1508 xmlDocPtr doc;
1509 struct filter_retrieve *retrieve;
1510 xsltStylesheetPtr last_xsp = 0;
1511
1512 if (p->comp)
1513 {
1514 if (p->comp->which == Z_RecordComp_simple
1515 && p->comp->u.simple->which == Z_ElementSetNames_generic)
1516 {
1517 esn = p->comp->u.simple->u.generic;
1518 }
1519 else if (p->comp->which == Z_RecordComp_complex
1520 && p->comp->u.complex->generic->elementSpec
1521 && p->comp->u.complex->generic->elementSpec->which ==
1522 Z_ElementSpec_elementSetName)
1523 {
1524 esn = p->comp->u.complex->generic->elementSpec->u.elementSetName;
1525 }
1526 }
1527 retrieve = lookup_retrieve(tinfo, esn);
1528 if (!retrieve)
1529 {
1530 p->diagnostic =
1531 YAZ_BIB1_SPECIFIED_ELEMENT_SET_NAME_NOT_VALID_FOR_SPECIFIED_;
1532 p->addinfo = odr_strdup_null(p->odr, esn);
1533 return 0;
1534 }
1535
1536 params[0] = 0;
1537 set_param_int(params, "id", p->localno, p->odr->mem);
1538 if (p->fname)
1539 set_param_str(params, "filename", p->fname, p->odr->mem);
1540 if (p->staticrank >= 0)
1541 set_param_int(params, "rank", p->staticrank, p->odr->mem);
1542
1543 if (esn)
1544 set_param_str(params, "schema", esn, p->odr->mem);
1545 else
1546 if (retrieve->name)
1547 set_param_str(params, "schema", retrieve->name, p->odr->mem);
1548 else if (retrieve->identifier)
1549 set_param_str(params, "schema", retrieve->identifier, p->odr->mem);
1550 else
1551 set_param_str(params, "schema", "", p->odr->mem);
1552
1553 if (p->score >= 0)
1554 set_param_int(params, "score", p->score, p->odr->mem);
1555 set_param_int(params, "size", p->recordSize, p->odr->mem);
1556
1557 doc = xmlReadIO(ioread_ret, ioclose_ret, p /* I/O handler */,
1558 0 /* URL */,
1559 0 /* encoding */,
1560 XML_PARSE_XINCLUDE | XML_PARSE_NOENT | XML_PARSE_NONET);
1561 if (!doc)
1562 {
1563 p->diagnostic = YAZ_BIB1_SYSTEM_ERROR_IN_PRESENTING_RECORDS;
1564 return 0;
1565 }
1566
1567 /* retrieve conversion */
1568 perform_convert(tinfo, 0, p, retrieve->convert, params, &doc, &last_xsp);
1569 if (!doc)
1570 {
1571 p->diagnostic = YAZ_BIB1_SYSTEM_ERROR_IN_PRESENTING_RECORDS;
1572 }
1573 else if (!p->input_format
1574 || !oid_oidcmp(p->input_format, yaz_oid_recsyn_xml))
1575 {
1576 xmlChar *buf_out;
1577 int len_out;
1578
1579 if (last_xsp)
1580 xsltSaveResultToString(&buf_out, &len_out, doc, last_xsp);
1581 else
1582 xmlDocDumpMemory(doc, &buf_out, &len_out);
1583
1584 p->output_format = yaz_oid_recsyn_xml;
1585 p->rec_len = len_out;
1586 p->rec_buf = odr_malloc(p->odr, p->rec_len);
1587 memcpy(p->rec_buf, buf_out, p->rec_len);
1588 xmlFree(buf_out);
1589 }
1590 else if (!oid_oidcmp(p->output_format, yaz_oid_recsyn_sutrs))
1591 {
1592 xmlChar *buf_out;
1593 int len_out;
1594
1595 if (last_xsp)
1596 xsltSaveResultToString(&buf_out, &len_out, doc, last_xsp);
1597 else
1598 xmlDocDumpMemory(doc, &buf_out, &len_out);
1599
1600 p->output_format = yaz_oid_recsyn_sutrs;
1601 p->rec_len = len_out;
1602 p->rec_buf = odr_malloc(p->odr, p->rec_len);
1603 memcpy(p->rec_buf, buf_out, p->rec_len);
1604
1605 xmlFree(buf_out);
1606 }
1607 else
1608 {
1609 p->diagnostic = YAZ_BIB1_RECORD_SYNTAX_UNSUPP;
1610 }
1611 xmlFreeDoc(doc);
1612 return 0;
1613}
1614
1615static struct recType filter_type = {
1616 0,
1617 "dom",
1623};
1624
1625RecType
1626#if IDZEBRA_STATIC_DOM
1627idzebra_filter_dom
1628#else
1630#endif
1631
1632[] = {
1633 &filter_type,
1634 0,
1635};
1636/*
1637 * Local variables:
1638 * c-basic-offset: 4
1639 * c-file-style: "Stroustrup"
1640 * indent-tabs-mode: nil
1641 * End:
1642 * vim: shiftwidth=4 tabstop=8 expandtab
1643 */
static void filter_destroy(void *clientData)
Definition mod_dom.c:787
#define DOM_INPUT_MARC
Definition mod_dom.c:97
static void * filter_init(Res res, RecType recType)
Definition mod_dom.c:191
RecType idzebra_filter[]
Definition mod_dom.c:1632
#define XML_STRCMP(a, b)
Definition mod_dom.c:133
static void destroy_dom(struct filter_info *tinfo)
Definition mod_dom.c:238
#define ZEBRA_DOM_NS
Definition mod_dom.c:50
static int extract_xml_split(struct filter_info *tinfo, struct filter_input *input, struct recExtractCtrl *p)
Definition mod_dom.c:1310
#define DOM_INPUT_XMLREADER
Definition mod_dom.c:96
static ZEBRA_RES parse_dom(struct filter_info *tinfo, const char *fname)
Definition mod_dom.c:579
static ZEBRA_RES perform_convert(struct filter_info *tinfo, struct recExtractCtrl *extctr, struct recRetrieveCtrl *retctr, struct convert_s *convert, const char **params, xmlDocPtr *doc, xsltStylesheetPtr *last_xsp)
Definition mod_dom.c:424
static void process_xml_element_zebra_node(struct filter_info *tinfo, struct recExtractCtrl *extctr, RecWord *recword, xmlNodePtr node)
Definition mod_dom.c:973
static ZEBRA_RES parse_input(struct filter_info *tinfo, xmlNodePtr ptr, const char *syntax, const char *name)
Definition mod_dom.c:495
static int extract_iso2709(struct filter_info *tinfo, struct filter_input *input, struct recExtractCtrl *p)
Definition mod_dom.c:1406
static int ioclose_ret(void *context)
Definition mod_dom.c:1497
#define FOR_EACH_ELEMENT(ptr)
Definition mod_dom.c:137
static void process_xml_pi_node(struct filter_info *tinfo, struct recExtractCtrl *extctr, xmlNodePtr node, const char **index_pp)
Definition mod_dom.c:1113
static const char * zebra_pi_name
Definition mod_dom.c:55
static void process_xml_element_node(struct filter_info *tinfo, struct recExtractCtrl *extctr, RecWord *recword, xmlNodePtr node)
Definition mod_dom.c:1180
static struct recType filter_type
Definition mod_dom.c:1615
static int ioclose_ex(void *context)
Definition mod_dom.c:802
static void extract_dom_doc_node(struct filter_info *tinfo, struct recExtractCtrl *extctr, xmlDocPtr doc)
Definition mod_dom.c:1217
static ZEBRA_RES parse_convert(struct filter_info *tinfo, xmlNodePtr ptr, struct convert_s **l)
Definition mod_dom.c:286
static void index_value_of(struct filter_info *tinfo, struct recExtractCtrl *extctr, RecWord *recword, xmlNodePtr node, const char *index_p)
Definition mod_dom.c:810
static int process_meta(struct filter_info *tinfo, xmlDocPtr doc, xmlNodePtr node, struct recRetrieveCtrl *retctr)
Definition mod_dom.c:369
static void set_record_info(struct filter_info *tinfo, struct recExtractCtrl *extctr, xmlNodePtr node, const char *id_p, const char *rank_p, const char *type_p)
Definition mod_dom.c:911
static int convert_extract_doc(struct filter_info *tinfo, struct filter_input *input, struct recExtractCtrl *p, xmlDocPtr doc)
Definition mod_dom.c:1229
static const char * zebra_dom_ns
Definition mod_dom.c:51
static int ioread_ret(void *context, char *buffer, int len)
Definition mod_dom.c:1490
static int attr_content(struct _xmlAttr *attr, const char *name, const char **dst_content)
Definition mod_dom.c:213
#define ZEBRA_PI_NAME
Definition mod_dom.c:54
static int attr_content_pi(const char **c_ptr, const char *name, char *value, size_t value_max)
Definition mod_dom.c:1085
static void dom_log(int level, struct filter_info *tinfo, xmlNodePtr ptr, const char *fmt,...)
Definition mod_dom.c:146
static int ioread_ex(void *context, char *buffer, int len)
Definition mod_dom.c:796
static void set_param_str(const char **params, const char *name, const char *value, NMEM nmem)
Definition mod_dom.c:167
static void destroy_xsp(struct convert_s *c)
Definition mod_dom.c:225
static void set_param_int(const char **params, const char *name, zint value, NMEM nmem)
Definition mod_dom.c:179
static struct filter_input * new_input(struct filter_info *tinfo, int type)
Definition mod_dom.c:480
static struct filter_retrieve * lookup_retrieve(struct filter_info *tinfo, const char *est)
Definition mod_dom.c:747
static int extract_xml_full(struct filter_info *tinfo, struct filter_input *input, struct recExtractCtrl *p)
Definition mod_dom.c:1383
static ZEBRA_RES filter_config(void *clientData, Res res, const char *args)
Definition mod_dom.c:769
convert_type
Definition mod_dom.c:57
@ convert_xslt_type
Definition mod_dom.c:58
@ convert_meta_type
Definition mod_dom.c:59
static FILE * outf
Definition readfile.c:38
#define RECCTRL_EXTRACT_EOF
Definition recctrl.h:164
#define ZEBRA_GROUP_INDEX_NAME
Definition recctrl.h:47
zebra_recctrl_action_t
Definition recctrl.h:87
@ action_a_delete
Definition recctrl.h:97
@ action_delete
Definition recctrl.h:93
@ action_insert
Definition recctrl.h:89
@ action_update
Definition recctrl.h:95
@ action_replace
Definition recctrl.h:91
#define RECCTRL_EXTRACT_ERROR_GENERIC
Definition recctrl.h:165
#define RECCTRL_EXTRACT_OK
Definition recctrl.h:163
#define RECCTRL_EXTRACT_SKIP
Definition recctrl.h:167
const char * res_get(Res r, const char *name)
Definition res.c:294
const char * term_buf
Definition recctrl.h:56
const char * index_type
Definition recctrl.h:52
zint seqno
Definition recctrl.h:60
int term_len
Definition recctrl.h:58
const char * index_name
Definition recctrl.h:54
int(* readf)(struct ZebraRecStream *s, char *buf, size_t count)
read function
Definition recctrl.h:75
struct convert_s * next
Definition mod_dom.c:77
union convert_s::@18 u
struct convert_meta meta
Definition mod_dom.c:75
enum convert_type which
Definition mod_dom.c:72
struct convert_xslt xslt
Definition mod_dom.c:74
xsltStylesheetPtr stylesheet_xsp
Definition mod_dom.c:64
const char * stylesheet
Definition mod_dom.c:63
const char * name
Definition mod_dom.c:81
struct convert_s * convert
Definition mod_dom.c:82
struct filter_retrieve * retrieve_list
Definition mod_dom.c:125
NMEM nmem_config
Definition mod_dom.c:122
char * fname
Definition mod_alvis.c:60
struct filter_extract * extract
Definition mod_dom.c:124
NMEM nmem_record
Definition mod_dom.c:121
xmlDocPtr doc_config
Definition mod_dom.c:123
char * full_name
Definition mod_alvis.c:61
struct filter_store * store
Definition mod_dom.c:127
int record_info_invoked
Definition mod_dom.c:128
struct filter_input * input_list
Definition mod_dom.c:126
const char * profile_path
Definition mod_alvis.c:62
xmlDocPtr doc
Definition mod_alvis.c:59
xmlTextReaderPtr reader
Definition mod_dom.c:105
const char * input_charset
Definition mod_dom.c:109
const char * syntax
Definition mod_dom.c:99
struct filter_input::@19::@21 marc
struct filter_input::@19::@20 xmlreader
union filter_input::@19 u
const char * name
Definition mod_dom.c:100
struct convert_s * convert
Definition mod_dom.c:101
struct filter_input * next
Definition mod_dom.c:114
yaz_iconv_t iconv
Definition mod_dom.c:111
yaz_marc_t handle
Definition mod_dom.c:110
int split_level
Definition mod_dom.c:106
struct filter_retrieve * next
Definition mod_dom.c:93
struct convert_s * convert
Definition mod_dom.c:92
const char * identifier
Definition mod_dom.c:91
const char * name
Definition mod_dom.c:90
struct convert_s * convert
Definition mod_dom.c:86
record extract for indexing
Definition recctrl.h:101
int flagShowRecords
Definition recctrl.h:108
void(* init)(struct recExtractCtrl *p, RecWord *w)
Definition recctrl.h:103
enum zebra_recctrl_action_t action
Definition recctrl.h:114
char match_criteria[256]
Definition recctrl.h:109
void(* tokenAdd)(RecWord *w)
Definition recctrl.h:105
zint staticrank
Definition recctrl.h:110
void(* setStoreData)(struct recExtractCtrl *p, void *buf, size_t size)
Definition recctrl.h:106
struct ZebraRecStream * stream
Definition recctrl.h:102
const Odr_oid * input_format
Definition recctrl.h:123
int(* special_fetch)(void *handle, const char *esn, const Odr_oid *input_format, const Odr_oid **output_format, WRBUF result, WRBUF addinfo)
Definition recctrl.h:142
char * addinfo
Definition recctrl.h:138
Z_RecordComposition * comp
Definition recctrl.h:124
struct ZebraRecStream * stream
Definition recctrl.h:119
const Odr_oid * output_format
Definition recctrl.h:134
void * handle
Definition recctrl.h:141
void * rec_buf
Definition recctrl.h:135
long zint
Zebra integer.
Definition util.h:66
#define ZEBRA_FAIL
Definition util.h:81
#define ZINT_FORMAT
Definition util.h:72
zint atozint(const char *src)
Definition zint.c:55
#define ZEBRA_OK
Definition util.h:82
short ZEBRA_RES
Common return type for Zebra API.
Definition util.h:80