YAZ 5.35.1
marc_read_xml.c
Go to the documentation of this file.
1/* This file is part of the YAZ toolkit.
2 * Copyright (C) Index Data
3 * See the file LICENSE for details.
4 */
5
11#if HAVE_CONFIG_H
12#include <config.h>
13#endif
14
15#ifdef WIN32
16#include <windows.h>
17#endif
18
19#include <stdio.h>
20#include <string.h>
21#include <yaz/marcdisp.h>
22#include <yaz/wrbuf.h>
23#include <yaz/yaz-util.h>
24#include <yaz/nmem_xml.h>
25
26#if YAZ_HAVE_XML2
27#include <libxml/tree.h>
28#endif
29
30#if YAZ_HAVE_XML2
31static int yaz_marc_read_xml_subfields(yaz_marc_t mt, const xmlNode *ptr)
32{
33 NMEM nmem = yaz_marc_get_nmem(mt);
34 for (; ptr; ptr = ptr->next)
35 {
36 if (ptr->type == XML_ELEMENT_NODE)
37 {
38 if (!strcmp((const char *) ptr->name, "subfield"))
39 {
40 size_t ctrl_data_len = 0;
41 char *ctrl_data_buf = 0;
42 const xmlNode *p = 0, *ptr_code = 0;
43 struct _xmlAttr *attr;
44 for (attr = ptr->properties; attr; attr = attr->next)
45 if (!strcmp((const char *)attr->name, "code"))
46 ptr_code = attr->children;
47 else
48 {
50 mt, "Bad attribute '%.80s' for 'subfield'",
51 attr->name);
52 return -1;
53 }
54 if (!ptr_code)
55 {
57 mt, "Missing attribute 'code' for 'subfield'" );
58 return -1;
59 }
60 if (ptr_code->type == XML_TEXT_NODE)
61 {
62 ctrl_data_len =
63 strlen((const char *)ptr_code->content);
64 }
65 else
66 {
68 mt, "Missing value for 'code' in 'subfield'" );
69 return -1;
70 }
71 for (p = ptr->children; p ; p = p->next)
72 if (p->type == XML_TEXT_NODE)
73 ctrl_data_len += strlen((const char *)p->content);
74 ctrl_data_buf = (char *) nmem_malloc(nmem, ctrl_data_len+1);
75 strcpy(ctrl_data_buf, (const char *)ptr_code->content);
76 for (p = ptr->children; p ; p = p->next)
77 if (p->type == XML_TEXT_NODE)
78 strcat(ctrl_data_buf, (const char *)p->content);
79 yaz_marc_add_subfield(mt, ctrl_data_buf, ctrl_data_len);
80 }
81 else
82 {
84 mt, "Expected element 'subfield', got '%.80s'", ptr->name);
85 return -1;
86 }
87 }
88 }
89 return 0;
90}
91
92static char *element_attribute_value_extract(const xmlNode *ptr,
93 const char *attribute_name,
94 NMEM nmem)
95{
96 const char *name = (const char *) ptr->name;
97 size_t length = strlen(name);
98 xmlAttr *attr;
99 if (length > 1 )
100 return nmem_strdup(nmem, name+1);
101 /* TODO Extract from attribute where matches attribute_name */
102 for (attr = ptr->properties; attr; attr = attr->next)
103 if (!strcmp((const char *)attr->name, attribute_name))
104 return nmem_text_node_cdata(attr->children, nmem);
105 return 0;
106}
107
108static void get_indicator_value(yaz_marc_t mt, const xmlNode *ptr,
109 char *res, int turbo, int indicator_length)
110{
111 int i;
112 res[0] = '\0';
113 for (i = 1; i <= indicator_length; i++)
114 {
115 struct _xmlAttr *attr;
116 char attrname[16];
117 sprintf(attrname, "%s%d", turbo ? "i" : "ind", i);
118 for (attr = ptr->properties; attr; attr = attr->next)
119 {
120 if (!strcmp((const char *)attr->name, attrname) &&
121 attr->children && attr->children->type == XML_TEXT_NODE &&
122 attr->children->content &&
123 strlen((const char *) attr->children->content) < 5)
124 {
125 strcat(res, (const char *)attr->children->content);
126 break;
127 }
128 }
129 if (!attr)
130 strcat(res, " ");
131 }
132}
133
134static int yaz_marc_read_turbo_xml_subfields(yaz_marc_t mt, const xmlNode *ptr)
135{
136 for (; ptr; ptr = ptr->next)
137 {
138 if (ptr->type == XML_ELEMENT_NODE)
139 {
140 if (!strncmp((const char *) ptr->name, "s", 1))
141 {
142 NMEM nmem = yaz_marc_get_nmem(mt);
143 xmlNode *p;
144 size_t ctrl_data_len = 0;
145 char *ctrl_data_buf = 0;
146 const char *tag_value = element_attribute_value_extract(ptr, "code", nmem);
147 if (!tag_value)
148 {
150 mt, "Missing 'code' value for 'subfield'" );
151 return -1;
152 }
153
154 ctrl_data_len = strlen((const char *) tag_value);
155 /* Extract (length) from CDATA */
156 for (p = ptr->children; p ; p = p->next)
157 if (p->type == XML_TEXT_NODE)
158 ctrl_data_len += strlen((const char *)p->content);
159 /* Allocate memory for code value (1 character (can be multi-byte) and data */
160 ctrl_data_buf = (char *) nmem_malloc(nmem, ctrl_data_len+1);
161 /* Build a string with "<Code><data>" */
162 strcpy(ctrl_data_buf, (const char *) tag_value);
163 for (p = ptr->children; p ; p = p->next)
164 if (p->type == XML_TEXT_NODE)
165 strcat(ctrl_data_buf, (const char *)p->content);
166 yaz_marc_add_subfield(mt, ctrl_data_buf, ctrl_data_len);
167 }
168 else
169 {
171 mt, "Expected element 'subfield', got '%.80s'", ptr->name);
172 return -1;
173 }
174 }
175 }
176 return 0;
177}
178
179
180static int yaz_marc_read_xml_leader(yaz_marc_t mt, const xmlNode **ptr_p,
181 int *indicator_length)
182{
183 int identifier_length;
184 int base_address;
185 int length_data_entry;
186 int length_starting;
187 int length_implementation;
188 const char *leader = 0;
189 const xmlNode *ptr = *ptr_p;
190
191 for(; ptr; ptr = ptr->next)
192 if (ptr->type == XML_ELEMENT_NODE)
193 {
194 if ( !strcmp( (const char *) ptr->name, "leader") ||
195 (!strncmp((const char *) ptr->name, "l", 1) ))
196 {
197 xmlNode *p = ptr->children;
198 for(; p; p = p->next)
199 if (p->type == XML_TEXT_NODE)
200 leader = (const char *) p->content;
201 ptr = ptr->next;
202 }
203 break;
204 }
205 if (!leader)
206 {
207 yaz_marc_cprintf(mt, "Missing leader. Inserting fake leader");
208 leader = "00000nam a22000000a 4500";
209 }
210 if (strlen(leader) != 24)
211 {
212 yaz_marc_cprintf(mt, "Bad length %d of leader data."
213 " Must have length of 24 characters", strlen(leader));
214 return -1;
215 }
216 yaz_marc_set_leader(mt, leader,
217 indicator_length,
218 &identifier_length,
219 &base_address,
220 &length_data_entry,
221 &length_starting,
222 &length_implementation);
223 *ptr_p = ptr;
224 return 0;
225}
226
227static int yaz_marc_read_xml_fields(yaz_marc_t mt, const xmlNode *ptr,
228 int indicator_length)
229{
230 for(; ptr; ptr = ptr->next)
231 if (ptr->type == XML_ELEMENT_NODE)
232 {
233 if (!strcmp( (const char *) ptr->name, "controlfield"))
234 {
235 const xmlNode *ptr_tag = 0;
236 struct _xmlAttr *attr;
237 for (attr = ptr->properties; attr; attr = attr->next)
238 if (!strcmp((const char *)attr->name, "tag"))
239 ptr_tag = attr->children;
240 else
241 {
243 mt, "Bad attribute '%.80s' for 'controlfield'",
244 attr->name);
245 return -1;
246 }
247 if (!ptr_tag)
248 {
250 mt, "Missing attribute 'tag' for 'controlfield'" );
251 return -1;
252 }
253 yaz_marc_add_controlfield_xml(mt, ptr_tag, ptr->children);
254 }
255 else if (!strcmp((const char *) ptr->name, "datafield"))
256 {
257 char indstr[48];
258 const xmlNode *ptr_tag = 0;
259 struct _xmlAttr *attr;
260
261 get_indicator_value(mt, ptr, indstr, 0, indicator_length);
262 for (attr = ptr->properties; attr; attr = attr->next)
263 if (!strcmp((const char *)attr->name, "tag"))
264 ptr_tag = attr->children;
265 else if (!strncmp((const char *)attr->name, "ind", 3))
266 ;
267 else
268 {
270 mt, "Bad attribute '%.80s' for 'datafield'",
271 attr->name);
272 }
273 if (!ptr_tag)
274 {
276 mt, "Missing attribute 'tag' for 'datafield'" );
277 return -1;
278 }
279 yaz_marc_add_datafield_xml(mt, ptr_tag,
280 indstr, indicator_length);
281 if (yaz_marc_read_xml_subfields(mt, ptr->children))
282 return -1;
283 }
284 else
285 {
287 "Expected element controlfield or datafield,"
288 " got %.80s", ptr->name);
289 return -1;
290 }
291 }
292 return 0;
293}
294
295
296static int yaz_marc_read_turbo_xml_fields(yaz_marc_t mt, const xmlNode *ptr,
297 int indicator_length)
298{
299 for(; ptr; ptr = ptr->next)
300 if (ptr->type == XML_ELEMENT_NODE)
301 {
302 if (!strncmp( (const char *) ptr->name, "c", 1))
303 {
304 NMEM nmem = yaz_marc_get_nmem(mt);
305 char *tag_value = element_attribute_value_extract(ptr, "tag", nmem);
306 if (!tag_value)
307 {
309 mt, "Missing attribute 'tag' for 'controlfield'" );
310 return -1;
311 }
312 yaz_marc_add_controlfield_xml2(mt, tag_value, ptr->children);
313 }
314 else if (!strncmp((const char *) ptr->name, "d",1))
315 {
316 struct _xmlAttr *attr;
317 NMEM nmem = yaz_marc_get_nmem(mt);
318 char *tag_value;
319 char *indstr = nmem_malloc(nmem, indicator_length * 5);
320 tag_value = element_attribute_value_extract(ptr, "tag", nmem);
321 if (!tag_value)
322 {
324 mt, "Missing attribute 'tag' for 'datafield'" );
325 return -1;
326 }
327 get_indicator_value(mt, ptr, indstr, 1, indicator_length);
328 for (attr = ptr->properties; attr; attr = attr->next)
329 if (strlen((const char *)attr->name) == 2 &&
330 attr->name[0] == 'i')
331 ;
332 else
333 {
335 mt, "Bad attribute '%.80s' for 'd'", attr->name);
336 }
337 yaz_marc_add_datafield_xml2(mt, tag_value, indstr);
338 if (yaz_marc_read_turbo_xml_subfields(mt, ptr->children /*, indstr */))
339 return -1;
340 }
341 else
342 {
344 "Expected element controlfield or datafield,"
345 " got %.80s", ptr->name);
346 return -1;
347 }
348 }
349 return 0;
350}
351
352
353#endif
354
355#if YAZ_HAVE_XML2
356int yaz_marc_read_xml(yaz_marc_t mt, const xmlNode *ptr)
357{
358 int indicator_length = 0;
359 int format = 0;
360 yaz_marc_reset(mt);
361
362 for(; ptr; ptr = ptr->next)
363 if (ptr->type == XML_ELEMENT_NODE)
364 {
365 if (!strcmp((const char *) ptr->name, "record"))
366 {
367 format = YAZ_MARC_MARCXML;
368 break;
369 }
370 else if (!strcmp((const char *) ptr->name, "r"))
371 {
372 format = YAZ_MARC_TURBOMARC;
373 break;
374 }
375 else
376 {
378 mt, "Unknown element '%.80s' in MARC XML reader",
379 ptr->name);
380 return -1;
381 }
382 }
383 if (!ptr)
384 {
385 yaz_marc_cprintf(mt, "Missing element 'record' in MARC XML record");
386 return -1;
387 }
388 /* ptr points to record node now */
389 ptr = ptr->children;
390 if (yaz_marc_read_xml_leader(mt, &ptr, &indicator_length))
391 return -1;
392
393 switch (format)
394 {
395 case YAZ_MARC_MARCXML:
396 return yaz_marc_read_xml_fields(mt, ptr, indicator_length);
398 return yaz_marc_read_turbo_xml_fields(mt, ptr, indicator_length);
399 }
400 return -1;
401}
402#endif
403
404
405/*
406 * Local variables:
407 * c-basic-offset: 4
408 * c-file-style: "Stroustrup"
409 * indent-tabs-mode: nil
410 * End:
411 * vim: shiftwidth=4 tabstop=8 expandtab
412 */
413
char * name
Definition initopt.c:18
static int yaz_marc_read_xml_subfields(yaz_marc_t mt, const xmlNode *ptr)
int yaz_marc_read_xml(yaz_marc_t mt, const xmlNode *ptr)
parses MARCXML/MarcXchange/TurboMARC record from xmlNode pointer
static char * element_attribute_value_extract(const xmlNode *ptr, const char *attribute_name, NMEM nmem)
static int yaz_marc_read_xml_fields(yaz_marc_t mt, const xmlNode *ptr, int indicator_length)
static void get_indicator_value(yaz_marc_t mt, const xmlNode *ptr, char *res, int turbo, int indicator_length)
static int yaz_marc_read_turbo_xml_subfields(yaz_marc_t mt, const xmlNode *ptr)
static int yaz_marc_read_xml_leader(yaz_marc_t mt, const xmlNode **ptr_p, int *indicator_length)
static int yaz_marc_read_turbo_xml_fields(yaz_marc_t mt, const xmlNode *ptr, int indicator_length)
void yaz_marc_add_controlfield_xml(yaz_marc_t mt, const xmlNode *ptr_tag, const xmlNode *ptr_data)
adds controlfield to MARC structure using xml Nodes
Definition marcdisp.c:160
void yaz_marc_cprintf(yaz_marc_t mt, const char *fmt,...)
adds MARC annotation - printf interface
Definition marcdisp.c:188
void yaz_marc_add_subfield(yaz_marc_t mt, const char *code_data, size_t code_data_len)
adds subfield to MARC structure
Definition marcdisp.c:316
void yaz_marc_add_datafield_xml(yaz_marc_t mt, const xmlNode *ptr_tag, const char *indicator, size_t indicator_len)
adds datafield to MARC structure using xml Nodes
Definition marcdisp.c:284
NMEM yaz_marc_get_nmem(yaz_marc_t mt)
returns memory for MARC handle
Definition marcdisp.c:130
void yaz_marc_add_datafield_xml2(yaz_marc_t mt, char *tag_value, char *indicators)
adds datafield to MARC structure using xml Nodes
Definition marcdisp.c:297
void yaz_marc_set_leader(yaz_marc_t mt, const char *leader_c, int *indicator_length, int *identifier_length, int *base_address, int *length_data_entry, int *length_starting, int *length_implementation)
sets leader, validates it, and returns important values
Definition marcdisp.c:356
void yaz_marc_add_controlfield_xml2(yaz_marc_t mt, char *tag, const xmlNode *ptr_data)
adds controlfield to MARC structure using xml Nodes for data
Definition marcdisp.c:169
void yaz_marc_reset(yaz_marc_t mt)
clears memory and MARC record
Definition marcdisp.c:483
MARC conversion.
#define YAZ_MARC_MARCXML
Output format: MARCXML.
Definition marcdisp.h:70
#define YAZ_MARC_TURBOMARC
Output format: Turbo MARC Index Data format (XML based)
Definition marcdisp.h:78
void * nmem_malloc(NMEM n, size_t size)
allocates memory block on NMEM handle
Definition nmem.c:145
Header for Nibble Memory functions + Libxml2 specific stuff.
char * nmem_strdup(NMEM mem, const char *src)
allocates string on NMEM handle (similar strdup)
Definition nmemsdup.c:18
char * nmem_text_node_cdata(const xmlNode *ptr_cdata, NMEM nmem)
copies TEXT Libxml2 node data to NMEM
Definition nmemsdup.c:145
the internals of a yaz_marc_t handle
Definition marcdisp.c:86
Header for WRBUF (growing buffer)
Header for common YAZ utilities.