25 #include <sys/types.h>
29 #include <yaz/oid_db.h>
30 #include <yaz/diagbib1.h>
31 #include <yaz/wrbuf.h>
32 #include <yaz/snprintf.h>
37 #define GRS_MAX_WORD 512
49 while (*sp->
src ==
' ')
53 while (*sp->
src && !strchr(
"<>();,-: ", *sp->
src))
144 if (!
sp_expr(sp, n, &search_w))
154 if (min_pos == -1 || i < min_pos)
165 yaz_snprintf(num_str,
sizeof(num_str),
"%d", min_pos);
175 if (sp->
len == 4 && !memcmp(sp->
tok,
"data", sp->
len))
184 else if (sp->
len == 3 && !memcmp(sp->
tok,
"tag", sp->
len))
193 else if (sp->
len == 4 && !memcmp(sp->
tok,
"attr", sp->
len))
222 else if (sp->
len == 5 && !memcmp(sp->
tok,
"first", sp->
len))
226 else if (sp->
len == 5 && !memcmp(sp->
tok,
"range", sp->
len))
230 else if (sp->
len > 0 && isdigit(*(
unsigned char *)sp->
tok))
234 b = nmem_malloc(sp->
nmem, sp->
len);
235 memcpy(b, sp->
tok, sp->
len);
239 else if (sp->
len > 2 && sp->
tok[0] ==
'\'' && sp->
tok[sp->
len-1] ==
'\'')
261 sp->
nmem = nmem_create();
269 nmem_destroy(sp->
nmem);
280 nmem_reset(sp->
nmem);
299 " Only attributes (@) are supported in xelm xpath predicates");
300 yaz_log(YLOG_WARN,
"predicate %s ignored", p->
u.
relation.name);
306 for (attr = n->
u.
tag.attributes; attr; attr = attr->
next) {
307 if (!strcmp(attr->
name, attname)) {
311 "Only '=' relation is supported (%s)",p->
u.
relation.op);
312 yaz_log(YLOG_WARN,
"predicate %s ignored", p->
u.
relation.name);
331 if (!strcmp(p->
u.
boolean.op,
"and")) {
335 else if (!strcmp(p->
u.
boolean.op,
"or")) {
339 yaz_log(YLOG_WARN,
"Unknown boolean relation %s, ignored",p->
u.
boolean.op);
353 const char *p = text;
356 for (c = *p++, t = s->
trans, i = s->
tran_no; --i >= 0; t++)
358 if (c >= t->
ch[0] && c <= t->
ch[1])
371 if (c >= t->
ch[0] && c <= t->
ch[1])
405 WRBUF pexpr = wrbuf_alloc();
407 wrbuf_printf(pexpr,
"/%s\n", tagpath);
425 for (xpe1 = xpe; xpe1; xpe1 = xpe1->
match_next)
429 assert(ok == 0 || ok == 1);
458 wrbuf_destroy(pexpr);
516 for (nn = n; nn; nn = nn->
parent)
520 size_t tlen = strlen(nn->
u.
tag.tag);
521 if (tlen + flen > (max - 2))
523 memcpy(tag_path_full + flen, nn->
u.
tag.tag, tlen);
525 tag_path_full[flen++] =
'/';
531 tag_path_full[flen] = 0;
543 char tag_path_full[1024];
544 int termlist_only = 1;
547 if (!n->
root->u.root.absyn
564 if (n->
root->u.root.absyn &&
568 for (; tl; tl = tl->
next)
573 memcpy(&wrd_tl, wrd,
sizeof(*wrd));
582 printf(
"%*sIdx: [%s]", (level + 1) * 4,
"",
586 for (i = 0; i<wrd_tl.
term_len && i < 40; i++)
597 if (wrd_tl.
seqno > max_seqno)
598 max_seqno = wrd_tl.
seqno;
601 wrd->
seqno = max_seqno;
618 wrd->
term_len = strlen(tag_path_full);
622 printf(
"%*s tag=", (level + 1) * 4,
"");
623 for (i = 0; i<wrd->
term_len && i < 40; i++)
636 if (xpath_is_start == 1)
638 #define MAX_ATTR_COUNT 50
642 for (xp = n->
u.
tag.attributes; xp; xp = xp->
next) {
644 char attr_tag_path_full[1026];
647 yaz_snprintf(attr_tag_path_full,
sizeof(attr_tag_path_full),
648 "@%s/%s", xp->
name, tag_path_full);
665 strlen(xp->
name) + strlen(xp->
value) <
sizeof(comb)-2)
668 strcpy(comb, xp->
name);
670 strcat(comb, xp->
value);
685 for (xp = n->
u.
tag.attributes; xp; xp = xp->
next) {
687 char attr_tag_path_full[1026];
690 yaz_snprintf(attr_tag_path_full,
sizeof(attr_tag_path_full),
691 "@%s/%s", xp->
name, tag_path_full);
695 for (; tl; tl = tl->
next)
720 if (!xpdone && !termlist_only)
723 xp->
value,
"w", p, wrd);
743 while (!par->
u.
tag.element)
746 if (!par || !(tlist = par->
u.
tag.element->termlists))
748 for (; tlist; tlist = tlist->
next)
760 printf(
"%*sIdx: [%s]", (level + 1) * 4,
"",
764 for (i = 0; i<wrd->
term_len && i < 40; i++)
785 for (; n; n = n->
next)
791 printf(
"%*s", level * 4,
"");
792 printf(
"Record type: '%s'\n", n->
u.
root.type);
798 printf(
"%*s", level * 4,
"");
799 if (!(e = n->
u.
tag.element))
800 printf(
"Local tag: '%s'\n", n->
u.
tag.tag);
803 printf(
"Elm: '%s' ", e->
name);
828 if (n->
root->u.root.absyn)
844 printf(
"%*s", level * 4,
"");
846 if (n->
u.
data.len > 256)
847 printf(
"'%.170s ... %.70s'\n", n->
u.
data.data,
849 else if (n->
u.
data.len > 0)
850 printf(
"'%.*s'\n", n->
u.
data.len, n->
u.
data.data);
871 printf(
"%*s-------------\n\n", level * 4,
"");
911 n = (*grs_read)(&gri);
940 NMEM
mem = nmem_create();
950 char **addinfo, ODR o)
958 case Z_RecordComp_simple:
959 if (c->u.simple->which != Z_ElementSetNames_generic)
962 c->u.simple->u.generic)))
964 yaz_log(YLOG_LOG,
"Unknown esetname '%s'", c->u.simple->u.generic);
965 *addinfo = odr_strdup(o, c->u.simple->u.generic);
968 yaz_log(YLOG_DEBUG,
"Esetname '%s' in simple compspec",
969 c->u.simple->u.generic);
972 case Z_RecordComp_complex:
973 if (c->u.complex->generic)
976 if ((p = c->u.complex->generic->elementSpec))
980 case Z_ElementSpec_elementSetName:
983 p->u.elementSetName)))
985 yaz_log(YLOG_DEBUG,
"Unknown esetname '%s'",
986 p->u.elementSetName);
987 *addinfo = odr_strdup(o, p->u.elementSetName);
990 yaz_log(YLOG_DEBUG,
"Esetname '%s' in complex compspec",
991 p->u.elementSetName);
994 case Z_ElementSpec_externalSpec:
995 if (p->u.externalSpec->which == Z_External_espec1)
997 yaz_log(YLOG_DEBUG,
"Got Espec-1");
998 espec = p->u.externalSpec-> u.espec1;
1002 yaz_log(YLOG_LOG,
"Unknown external espec.");
1014 yaz_log(YLOG_DEBUG,
"Element: Espec-1 match");
1019 yaz_log(YLOG_DEBUG,
"Element: all match");
1038 const char *idzebra_ns[3];
1039 const char *i2 =
"\n ";
1040 const char *i4 =
"\n ";
1043 idzebra_ns[0] =
"xmlns";
1044 idzebra_ns[1] =
"http://www.indexdata.dk/zebra/";
1078 int res, selected = 0;
1081 const char *tagname;
1083 const Odr_oid *requested_schema = 0;
1087 mem = nmem_create();
1093 yaz_log(YLOG_DEBUG,
"grs_retrieve");
1094 node = (*grs_read)(&gri);
1097 p->
diagnostic = YAZ_BIB1_SYSTEM_ERROR_IN_PRESENTING_RECORDS;
1110 yaz_log(YLOG_DEBUG,
"grs_retrieve: size");
1119 if (tagname && p->
score >= 0 &&
1122 yaz_log(YLOG_DEBUG,
"grs_retrieve: %s", tagname);
1127 "localControlNumber");
1128 if (tagname && p->
localno > 0 &&
1131 yaz_log(YLOG_DEBUG,
"grs_retrieve: %s", tagname);
1147 if (p->
comp && p->
comp->which == Z_RecordComp_complex &&
1148 p->
comp->u.complex->generic &&
1149 p->
comp->u.complex->generic->which == Z_Schema_oid &&
1150 p->
comp->u.complex->generic->schema.oid)
1152 requested_schema = p->
comp->u.complex->generic->schema.oid;
1157 if (requested_schema)
1159 yaz_log(YLOG_DEBUG,
"grs_retrieve: schema mapping");
1160 for (map = node->
u.
root.absyn->maptabs; map; map = map->
next)
1162 if (!oid_oidcmp(map->
oid, requested_schema))
1167 p->
diagnostic = YAZ_BIB1_SYSTEM_ERROR_IN_PRESENTING_RECORDS;
1174 if (node->
u.
root.absyn
1175 && oid_oidcmp(requested_schema, node->
u.
root.absyn->oid))
1177 p->
diagnostic = YAZ_BIB1_RECORD_NOT_AVAILABLE_IN_REQUESTED_SYNTAX;
1187 yaz_log(YLOG_DEBUG,
"grs_retrieve: syntax mapping");
1188 if (node->
u.
root.absyn)
1189 for (map = node->
u.
root.absyn->maptabs; map; map = map->
next)
1196 p->
diagnostic = YAZ_BIB1_SYSTEM_ERROR_IN_PRESENTING_RECORDS;
1203 yaz_log(YLOG_DEBUG,
"grs_retrieve: schemaIdentifier");
1204 if (node->
u.
root.absyn && node->
u.
root.absyn->oid
1207 char oid_str[OID_STR_MAX];
1208 char *dot_str = oid_oid_to_dotstring(node->
u.
root.absyn->oid, oid_str);
1211 "schemaIdentifier",
mem)))
1214 dnew->
u.
data.data = (
char *) nmem_strdup(
mem, dot_str);
1215 dnew->
u.
data.len = strlen(dot_str);
1219 yaz_log(YLOG_DEBUG,
"grs_retrieve: element spec");
1227 else if (p->
comp && !res)
1233 yaz_log(YLOG_DEBUG,
"grs_retrieve: transfer syntax mapping");
1250 p->
diagnostic = YAZ_BIB1_RECORD_NOT_AVAILABLE_IN_REQUESTED_SYNTAX;
1253 char *new_buf = (
char*) odr_malloc(p->
odr, p->
rec_len);
1258 else if (!oid_oidcmp(p->
input_format, yaz_oid_recsyn_grs_1))
1264 p->
diagnostic = YAZ_BIB1_RECORD_NOT_AVAILABLE_IN_REQUESTED_SYNTAX;
1268 else if (!oid_oidcmp(p->
input_format, yaz_oid_recsyn_explain))
1275 p->
diagnostic = YAZ_BIB1_RECORD_NOT_AVAILABLE_IN_REQUESTED_SYNTAX;
1279 else if (!oid_oidcmp(p->
input_format, yaz_oid_recsyn_summary))
1285 p->
diagnostic = YAZ_BIB1_RECORD_NOT_AVAILABLE_IN_REQUESTED_SYNTAX;
1289 else if (!oid_oidcmp(p->
input_format, yaz_oid_recsyn_sutrs))
1296 p->
diagnostic = YAZ_BIB1_RECORD_NOT_AVAILABLE_IN_REQUESTED_SYNTAX;
1299 char *new_buf = (
char*) odr_malloc(p->
odr, p->
rec_len);
1304 else if (!oid_oidcmp(p->
input_format, yaz_oid_recsyn_soif))
1311 p->
diagnostic = YAZ_BIB1_RECORD_NOT_AVAILABLE_IN_REQUESTED_SYNTAX;
1314 char *new_buf = (
char*) odr_malloc(p->
odr, p->
rec_len);
1321 if (!node->
u.
root.absyn)
1322 p->
diagnostic = YAZ_BIB1_RECORD_NOT_AVAILABLE_IN_REQUESTED_SYNTAX;
1325 for (marctab = node->
u.
root.absyn->marc; marctab;
1326 marctab = marctab->
next)
1330 p->
diagnostic = YAZ_BIB1_RECORD_NOT_AVAILABLE_IN_REQUESTED_SYNTAX;
1338 p->
diagnostic = YAZ_BIB1_RECORD_NOT_AVAILABLE_IN_REQUESTED_SYNTAX;
1341 char *new_buf = (
char*) odr_malloc(p->
odr, p->
rec_len);
data1_node * data1_mk_tag(data1_handle dh, NMEM nmem, const char *tag, const char **attr, data1_node *at)
int data1_iconv(data1_handle dh, NMEM m, data1_node *n, const char *tocode, const char *fromcode)
data1_node * data1_mk_tag_data_text(data1_handle dh, data1_node *at, const char *tag, const char *str, NMEM nmem)
void data1_concat_text(data1_handle dh, NMEM m, data1_node *n)
data1_node * data1_mk_tag_data_wd(data1_handle dh, data1_node *at, const char *tagname, NMEM m)
char * data1_nodetosoif(data1_handle dh, data1_node *n, int select, int *len)
char * data1_nodetoidsgml(data1_handle dh, data1_node *n, int select, int *len)
void data1_set_data_zint(data1_handle dh, data1_node *res, NMEM m, zint num)
data1_node * data1_get_root_tag(data1_handle dh, data1_node *n)
void data1_pr_tree(data1_handle dh, data1_node *n, FILE *out)
Z_ExplainRecord * data1_nodetoexplain(data1_handle dh, data1_node *n, int select, ODR o)
char * data1_nodetomarc(data1_handle dh, data1_marctab *p, data1_node *n, int selected, int *len)
char * data1_nodetobuf(data1_handle dh, data1_node *n, int select, int *len)
data1_esetname * data1_getesetbyname(data1_handle dh, data1_absyn *a, const char *name)
Z_BriefBib * data1_nodetosummary(data1_handle dh, data1_node *n, int select, ODR o)
data1_node * get_parent_tag(data1_handle dh, data1_node *n)
data1_node * data1_mk_tag_data_zint(data1_handle dh, data1_node *at, const char *tag, zint num, NMEM nmem)
data1_node * data1_map_record(data1_handle dh, data1_node *n, data1_maptab *map, NMEM m)
Z_GenericRecord * data1_nodetogr(data1_handle dh, data1_node *n, int select, ODR o, int *len)
int data1_doespec1(data1_handle dh, data1_node *n, Z_Espec1 *e)
@ DATA1_XPATH_INDEXING_ENABLE
void data1_remove_idzebra_subtree(data1_handle dh, data1_node *n)
const char * data1_get_encoding(data1_handle dh, data1_node *n)
const char * data1_systag_lookup(data1_absyn *absyn, const char *tag, const char *default_value)
data1_node * data1_mk_text(data1_handle dh, NMEM mem, const char *buf, data1_node *parent)
data1_node * data1_mk_tag_data_int(data1_handle dh, data1_node *at, const char *tag, int num, NMEM nmem)
#define ZEBRA_XPATH_ELM_END
#define ZEBRA_XPATH_ATTR_CDATA
#define RECCTRL_EXTRACT_EOF
#define ZEBRA_XPATH_CDATA
#define ZEBRA_XPATH_ELM_BEGIN
#define ZEBRA_XPATH_ATTR_NAME
#define RECCTRL_EXTRACT_ERROR_GENERIC
#define RECCTRL_EXTRACT_OK
static int sp_expr(struct source_parser *sp, data1_node *n, RecWord *wrd)
static void index_termlist(struct source_parser *sp, data1_node *par, data1_node *n, struct recExtractCtrl *p, int level, RecWord *wrd)
static int dumpkeys_r(struct source_parser *sp, data1_node *n, struct recExtractCtrl *p, int level, RecWord *wrd)
int d1_check_xpath_predicate(data1_node *n, struct xpath_predicate *p)
static void source_parser_destroy(struct source_parser *sp)
static int dfa_match_first(struct DFA_state **dfaar, const char *text)
static struct source_parser * source_parser_create(void)
static int sp_lex(struct source_parser *sp)
static int sp_parse(struct source_parser *sp, data1_node *n, RecWord *wrd, const char *src)
data1_termlist * xpath_termlist_by_tagpath(char *tagpath, data1_node *n)
static void index_xpath_attr(char *tag_path, char *name, char *value, char *structure, struct recExtractCtrl *p, RecWord *wrd)
static void mk_tag_path_full(char *tag_path_full, size_t max, data1_node *n)
static int process_comp(data1_handle dh, data1_node *n, Z_RecordComposition *c, char **addinfo, ODR o)
static int sp_range(struct source_parser *sp, data1_node *n, RecWord *wrd)
int zebra_grs_retrieve(void *clientData, struct recRetrieveCtrl *p, data1_node *(*grs_read)(struct grs_read_info *))
static int dumpkeys(data1_node *n, struct recExtractCtrl *p, RecWord *wrd)
int zebra_grs_extract(void *clientData, struct recExtractCtrl *p, data1_node *(*grs_read)(struct grs_read_info *))
static int sp_first(struct source_parser *sp, data1_node *n, RecWord *wrd)
static void zebra_xml_metadata(struct recRetrieveCtrl *p, data1_node *top, NMEM mem)
static int grs_extract_sub(void *clientData, struct recExtractCtrl *p, NMEM mem, data1_node *(*grs_read)(struct grs_read_info *))
static void index_xpath(struct source_parser *sp, data1_node *n, struct recExtractCtrl *p, int level, RecWord *wrd, char *xpath_index, int xpath_is_start)
int grs_extract_tree(struct recExtractCtrl *p, data1_node *n)
struct DFA_state ** states
struct data1_xpelement * xp_elements
struct data1_maptab * next
struct data1_marctab * next
struct data1_node::@2::@3 root
struct data1_node * parent
struct data1_node * child
struct data1_tagset * tagset
union data1_tag::@1 value
struct data1_termlist * next
struct data1_xattr * next
data1_termlist * termlists
struct xpath_location_step xpath[XPATH_STEP_COUNT]
struct data1_xpelement * match_next
struct data1_xpelement * next
struct ZebraRecStream * stream
const Odr_oid * input_format
Z_RecordComposition * comp
struct ZebraRecStream * stream
const Odr_oid * output_format
struct xpath_predicate * predicate
union xpath_predicate::@8 u
struct xpath_predicate::@8::@9 relation
struct xpath_predicate::@8::@10 boolean
#define XPATH_PREDICATE_BOOLEAN
#define XPATH_PREDICATE_RELATION