YAZ 5.37.0
marc_read_xml.c
Go to the documentation of this file.
1/* This file is part of the YAZ toolkit.
2 * Copyright (C) Index Data
3 * See the file LICENSE for details.
4 */
5
10
11#if HAVE_CONFIG_H
12#include <config.h>
13#endif
14
15#ifdef WIN32
16#include <windows.h>
17#endif
18
19#include <stdio.h>
20#include <string.h>
21#include <yaz/marcdisp.h>
22#include <yaz/wrbuf.h>
23#include <yaz/yaz-util.h>
24#include <yaz/nmem_xml.h>
25#include <yaz/snprintf.h>
26
27#if YAZ_HAVE_XML2
28#include <libxml/tree.h>
29#endif
30
31#if YAZ_HAVE_XML2
32static int yaz_marc_read_xml_subfields(yaz_marc_t mt, const xmlNode *ptr)
33{
34 NMEM nmem = yaz_marc_get_nmem(mt);
35 for (; ptr; ptr = ptr->next)
36 {
37 if (ptr->type == XML_ELEMENT_NODE)
38 {
39 if (!strcmp((const char *) ptr->name, "subfield"))
40 {
41 size_t ctrl_data_len = 0;
42 char *ctrl_data_buf = 0;
43 const xmlNode *p = 0, *ptr_code = 0;
44 struct _xmlAttr *attr;
45 for (attr = ptr->properties; attr; attr = attr->next)
46 if (!strcmp((const char *)attr->name, "code"))
47 ptr_code = attr->children;
48 else
49 {
51 mt, "Bad attribute '%.80s' for 'subfield'",
52 attr->name);
53 return -1;
54 }
55 if (!ptr_code)
56 {
58 mt, "Missing attribute 'code' for 'subfield'" );
59 return -1;
60 }
61 if (ptr_code->type == XML_TEXT_NODE)
62 {
63 ctrl_data_len =
64 strlen((const char *)ptr_code->content);
65 }
66 else
67 {
69 mt, "Missing value for 'code' in 'subfield'" );
70 return -1;
71 }
72 for (p = ptr->children; p ; p = p->next)
73 if (p->type == XML_TEXT_NODE)
74 ctrl_data_len += strlen((const char *)p->content);
75 ctrl_data_buf = (char *) nmem_malloc(nmem, ctrl_data_len+1);
76 strcpy(ctrl_data_buf, (const char *)ptr_code->content);
77 for (p = ptr->children; p ; p = p->next)
78 if (p->type == XML_TEXT_NODE)
79 strcat(ctrl_data_buf, (const char *)p->content);
80 yaz_marc_add_subfield(mt, ctrl_data_buf, ctrl_data_len);
81 }
82 else
83 {
85 mt, "Expected element 'subfield', got '%.80s'", ptr->name);
86 return -1;
87 }
88 }
89 }
90 return 0;
91}
92
93static char *element_attribute_value_extract(const xmlNode *ptr,
94 const char *attribute_name,
95 NMEM nmem)
96{
97 const char *name = (const char *) ptr->name;
98 size_t length = strlen(name);
99 xmlAttr *attr;
100 if (length > 1 )
101 return nmem_strdup(nmem, name+1);
102 /* TODO Extract from attribute where matches attribute_name */
103 for (attr = ptr->properties; attr; attr = attr->next)
104 if (!strcmp((const char *)attr->name, attribute_name))
105 return nmem_text_node_cdata(attr->children, nmem);
106 return 0;
107}
108
109static void get_indicator_value(yaz_marc_t mt, const xmlNode *ptr,
110 char *res, int turbo, int indicator_length)
111{
112 int i;
113 res[0] = '\0';
114 for (i = 1; i <= indicator_length; i++)
115 {
116 struct _xmlAttr *attr;
117 char attrname[16];
118 yaz_snprintf(attrname, sizeof attrname, "%s%d", turbo ? "i" : "ind", i);
119 for (attr = ptr->properties; attr; attr = attr->next)
120 {
121 if (!strcmp((const char *)attr->name, attrname) &&
122 attr->children && attr->children->type == XML_TEXT_NODE &&
123 attr->children->content &&
124 strlen((const char *) attr->children->content) < 5)
125 {
126 strcat(res, (const char *)attr->children->content);
127 break;
128 }
129 }
130 if (!attr)
131 strcat(res, " ");
132 }
133}
134
135static int yaz_marc_read_turbo_xml_subfields(yaz_marc_t mt, const xmlNode *ptr)
136{
137 for (; ptr; ptr = ptr->next)
138 {
139 if (ptr->type == XML_ELEMENT_NODE)
140 {
141 if (!strncmp((const char *) ptr->name, "s", 1))
142 {
143 NMEM nmem = yaz_marc_get_nmem(mt);
144 xmlNode *p;
145 size_t ctrl_data_len = 0;
146 char *ctrl_data_buf = 0;
147 const char *tag_value = element_attribute_value_extract(ptr, "code", nmem);
148 if (!tag_value)
149 {
151 mt, "Missing 'code' value for 'subfield'" );
152 return -1;
153 }
154
155 ctrl_data_len = strlen((const char *) tag_value);
156 /* Extract (length) from CDATA */
157 for (p = ptr->children; p ; p = p->next)
158 if (p->type == XML_TEXT_NODE)
159 ctrl_data_len += strlen((const char *)p->content);
160 /* Allocate memory for code value (1 character (can be multi-byte) and data */
161 ctrl_data_buf = (char *) nmem_malloc(nmem, ctrl_data_len+1);
162 /* Build a string with "<Code><data>" */
163 strcpy(ctrl_data_buf, (const char *) tag_value);
164 for (p = ptr->children; p ; p = p->next)
165 if (p->type == XML_TEXT_NODE)
166 strcat(ctrl_data_buf, (const char *)p->content);
167 yaz_marc_add_subfield(mt, ctrl_data_buf, ctrl_data_len);
168 }
169 else
170 {
172 mt, "Expected element 'subfield', got '%.80s'", ptr->name);
173 return -1;
174 }
175 }
176 }
177 return 0;
178}
179
180
181static int yaz_marc_read_xml_leader(yaz_marc_t mt, const xmlNode **ptr_p,
182 int *indicator_length)
183{
184 int identifier_length;
185 int base_address;
186 int length_data_entry;
187 int length_starting;
188 int length_implementation;
189 const char *leader = 0;
190 const xmlNode *ptr = *ptr_p;
191
192 for(; ptr; ptr = ptr->next)
193 if (ptr->type == XML_ELEMENT_NODE)
194 {
195 if ( !strcmp( (const char *) ptr->name, "leader") ||
196 (!strncmp((const char *) ptr->name, "l", 1) ))
197 {
198 xmlNode *p = ptr->children;
199 for(; p; p = p->next)
200 if (p->type == XML_TEXT_NODE)
201 leader = (const char *) p->content;
202 ptr = ptr->next;
203 }
204 break;
205 }
206 if (!leader)
207 {
208 yaz_marc_cprintf(mt, "Missing leader. Inserting fake leader");
209 leader = "00000nam a22000000a 4500";
210 }
211 if (strlen(leader) != 24)
212 {
213 yaz_marc_cprintf(mt, "Bad length %d of leader data."
214 " Must have length of 24 characters", strlen(leader));
215 return -1;
216 }
217 yaz_marc_set_leader(mt, leader,
218 indicator_length,
219 &identifier_length,
220 &base_address,
221 &length_data_entry,
222 &length_starting,
223 &length_implementation);
224 *ptr_p = ptr;
225 return 0;
226}
227
228static int yaz_marc_read_xml_fields(yaz_marc_t mt, const xmlNode *ptr,
229 int indicator_length)
230{
231 for(; ptr; ptr = ptr->next)
232 if (ptr->type == XML_ELEMENT_NODE)
233 {
234 if (!strcmp( (const char *) ptr->name, "controlfield"))
235 {
236 const xmlNode *ptr_tag = 0;
237 struct _xmlAttr *attr;
238 for (attr = ptr->properties; attr; attr = attr->next)
239 if (!strcmp((const char *)attr->name, "tag"))
240 ptr_tag = attr->children;
241 else
242 {
244 mt, "Bad attribute '%.80s' for 'controlfield'",
245 attr->name);
246 return -1;
247 }
248 if (!ptr_tag)
249 {
251 mt, "Missing attribute 'tag' for 'controlfield'" );
252 return -1;
253 }
254 yaz_marc_add_controlfield_xml(mt, ptr_tag, ptr->children);
255 }
256 else if (!strcmp((const char *) ptr->name, "datafield"))
257 {
258 char indstr[48];
259 const xmlNode *ptr_tag = 0;
260 struct _xmlAttr *attr;
261
262 get_indicator_value(mt, ptr, indstr, 0, indicator_length);
263 for (attr = ptr->properties; attr; attr = attr->next)
264 if (!strcmp((const char *)attr->name, "tag"))
265 ptr_tag = attr->children;
266 else if (!strncmp((const char *)attr->name, "ind", 3))
267 ;
268 else
269 {
271 mt, "Bad attribute '%.80s' for 'datafield'",
272 attr->name);
273 }
274 if (!ptr_tag)
275 {
277 mt, "Missing attribute 'tag' for 'datafield'" );
278 return -1;
279 }
280 yaz_marc_add_datafield_xml(mt, ptr_tag,
281 indstr, indicator_length);
282 if (yaz_marc_read_xml_subfields(mt, ptr->children))
283 return -1;
284 }
285 else
286 {
288 "Expected element controlfield or datafield,"
289 " got %.80s", ptr->name);
290 return -1;
291 }
292 }
293 return 0;
294}
295
296
297static int yaz_marc_read_turbo_xml_fields(yaz_marc_t mt, const xmlNode *ptr,
298 int indicator_length)
299{
300 for(; ptr; ptr = ptr->next)
301 if (ptr->type == XML_ELEMENT_NODE)
302 {
303 if (!strncmp( (const char *) ptr->name, "c", 1))
304 {
305 NMEM nmem = yaz_marc_get_nmem(mt);
306 char *tag_value = element_attribute_value_extract(ptr, "tag", nmem);
307 if (!tag_value)
308 {
310 mt, "Missing attribute 'tag' for 'controlfield'" );
311 return -1;
312 }
313 yaz_marc_add_controlfield_xml2(mt, tag_value, ptr->children);
314 }
315 else if (!strncmp((const char *) ptr->name, "d",1))
316 {
317 struct _xmlAttr *attr;
318 NMEM nmem = yaz_marc_get_nmem(mt);
319 char *tag_value;
320 char *indstr = nmem_malloc(nmem, indicator_length * 5);
321 tag_value = element_attribute_value_extract(ptr, "tag", nmem);
322 if (!tag_value)
323 {
325 mt, "Missing attribute 'tag' for 'datafield'" );
326 return -1;
327 }
328 get_indicator_value(mt, ptr, indstr, 1, indicator_length);
329 for (attr = ptr->properties; attr; attr = attr->next)
330 if (strlen((const char *)attr->name) == 2 &&
331 attr->name[0] == 'i')
332 ;
333 else
334 {
336 mt, "Bad attribute '%.80s' for 'd'", attr->name);
337 }
338 yaz_marc_add_datafield_xml2(mt, tag_value, indstr);
339 if (yaz_marc_read_turbo_xml_subfields(mt, ptr->children /*, indstr */))
340 return -1;
341 }
342 else
343 {
345 "Expected element controlfield or datafield,"
346 " got %.80s", ptr->name);
347 return -1;
348 }
349 }
350 return 0;
351}
352
353
354#endif
355
356#if YAZ_HAVE_XML2
357int yaz_marc_read_xml(yaz_marc_t mt, const xmlNode *ptr)
358{
359 int indicator_length = 0;
360 int format = 0;
361 yaz_marc_reset(mt);
362
363 for(; ptr; ptr = ptr->next)
364 if (ptr->type == XML_ELEMENT_NODE)
365 {
366 if (!strcmp((const char *) ptr->name, "record"))
367 {
368 format = YAZ_MARC_MARCXML;
369 break;
370 }
371 else if (!strcmp((const char *) ptr->name, "r"))
372 {
373 format = YAZ_MARC_TURBOMARC;
374 break;
375 }
376 else
377 {
379 mt, "Unknown element '%.80s' in MARC XML reader",
380 ptr->name);
381 return -1;
382 }
383 }
384 if (!ptr)
385 {
386 yaz_marc_cprintf(mt, "Missing element 'record' in MARC XML record");
387 return -1;
388 }
389 /* ptr points to record node now */
390 ptr = ptr->children;
391 if (yaz_marc_read_xml_leader(mt, &ptr, &indicator_length))
392 return -1;
393
394 switch (format)
395 {
396 case YAZ_MARC_MARCXML:
397 return yaz_marc_read_xml_fields(mt, ptr, indicator_length);
399 return yaz_marc_read_turbo_xml_fields(mt, ptr, indicator_length);
400 }
401 return -1;
402}
403#endif
404
405
406/*
407 * Local variables:
408 * c-basic-offset: 4
409 * c-file-style: "Stroustrup"
410 * indent-tabs-mode: nil
411 * End:
412 * vim: shiftwidth=4 tabstop=8 expandtab
413 */
414
char * name
Definition initopt.c:18
static int yaz_marc_read_xml_subfields(yaz_marc_t mt, const xmlNode *ptr)
int yaz_marc_read_xml(yaz_marc_t mt, const xmlNode *ptr)
parses MARCXML/MarcXchange/TurboMARC record from xmlNode pointer
static char * element_attribute_value_extract(const xmlNode *ptr, const char *attribute_name, NMEM nmem)
static int yaz_marc_read_xml_fields(yaz_marc_t mt, const xmlNode *ptr, int indicator_length)
static void get_indicator_value(yaz_marc_t mt, const xmlNode *ptr, char *res, int turbo, int indicator_length)
static int yaz_marc_read_turbo_xml_subfields(yaz_marc_t mt, const xmlNode *ptr)
static int yaz_marc_read_xml_leader(yaz_marc_t mt, const xmlNode **ptr_p, int *indicator_length)
static int yaz_marc_read_turbo_xml_fields(yaz_marc_t mt, const xmlNode *ptr, int indicator_length)
void yaz_marc_add_controlfield_xml(yaz_marc_t mt, const xmlNode *ptr_tag, const xmlNode *ptr_data)
adds controlfield to MARC structure using xml Nodes
Definition marcdisp.c:160
void yaz_marc_cprintf(yaz_marc_t mt, const char *fmt,...)
adds MARC annotation - printf interface
Definition marcdisp.c:188
void yaz_marc_add_subfield(yaz_marc_t mt, const char *code_data, size_t code_data_len)
adds subfield to MARC structure
Definition marcdisp.c:316
void yaz_marc_add_datafield_xml(yaz_marc_t mt, const xmlNode *ptr_tag, const char *indicator, size_t indicator_len)
adds datafield to MARC structure using xml Nodes
Definition marcdisp.c:284
NMEM yaz_marc_get_nmem(yaz_marc_t mt)
returns memory for MARC handle
Definition marcdisp.c:130
void yaz_marc_add_datafield_xml2(yaz_marc_t mt, char *tag_value, char *indicators)
adds datafield to MARC structure using xml Nodes
Definition marcdisp.c:297
void yaz_marc_set_leader(yaz_marc_t mt, const char *leader_c, int *indicator_length, int *identifier_length, int *base_address, int *length_data_entry, int *length_starting, int *length_implementation)
sets leader, validates it, and returns important values
Definition marcdisp.c:356
void yaz_marc_add_controlfield_xml2(yaz_marc_t mt, char *tag, const xmlNode *ptr_data)
adds controlfield to MARC structure using xml Nodes for data
Definition marcdisp.c:169
void yaz_marc_reset(yaz_marc_t mt)
clears memory and MARC record
Definition marcdisp.c:483
MARC conversion.
#define YAZ_MARC_MARCXML
Output format: MARCXML.
Definition marcdisp.h:70
#define YAZ_MARC_TURBOMARC
Output format: Turbo MARC Index Data format (XML based).
Definition marcdisp.h:78
struct yaz_marc_t_ * yaz_marc_t
a yaz_marc_t handle (private content)
Definition marcdisp.h:47
void * nmem_malloc(NMEM n, size_t size)
allocates memory block on NMEM handle
Definition nmem.c:145
struct nmem_control * NMEM
NMEM handle (an opaque pointer to memory).
Definition nmem.h:44
char * nmem_text_node_cdata(const xmlNode *ptr_cdata, NMEM nmem)
copies TEXT Libxml2 node data to NMEM
Definition nmem_xml.c:19
Header for Nibble Memory functions + Libxml2 specific stuff.
char * nmem_strdup(NMEM mem, const char *src)
allocates string on NMEM handle (similar strdup)
Definition nmemsdup.c:19
void yaz_snprintf(char *buf, size_t size, const char *fmt,...)
Definition snprintf.c:31
Header for config file reading utilities.
Header for WRBUF (growing buffer).
Header for common YAZ utilities.