YAZ 5.35.1
marc_read_iso2709.c
Go to the documentation of this file.
1/* This file is part of the YAZ toolkit.
2 * Copyright (C) Index Data
3 * See the file LICENSE for details.
4 */
5
11#if HAVE_CONFIG_H
12#include <config.h>
13#endif
14
15#ifdef WIN32
16#include <windows.h>
17#endif
18
19#include <stdio.h>
20#include <string.h>
21#include <yaz/marcdisp.h>
22#include <yaz/wrbuf.h>
23#include <yaz/yaz-util.h>
24
25int yaz_marc_read_iso2709(yaz_marc_t mt, const char *buf, int bsize)
26{
27 int entry_p;
28 int record_length;
29 int indicator_length;
30 int identifier_length;
31 int end_of_directory;
32 int base_address;
33 int length_data_entry;
34 int length_starting;
35 int length_implementation;
36
38
39 if (!atoi_n_check(buf, 5, &record_length))
40 {
41 yaz_marc_cprintf(mt, "Bad leader");
42 return -1;
43 }
44 if (record_length < 25)
45 {
46 yaz_marc_cprintf(mt, "Record length %d < 24", record_length);
47 return -1;
48 }
49 /* ballout if bsize is known and record_length is less than that */
50 if (bsize != -1 && record_length > bsize)
51 {
52 yaz_marc_cprintf(mt, "Record appears to be larger than buffer %d < %d",
53 record_length, bsize);
54 return -1;
55 }
56 if (yaz_marc_get_debug(mt))
57 yaz_marc_cprintf(mt, "Record length %5d", record_length);
58
59 yaz_marc_set_leader(mt, buf,
60 &indicator_length,
61 &identifier_length,
62 &base_address,
63 &length_data_entry,
64 &length_starting,
65 &length_implementation);
66
67 /* First pass. determine length of directory & base of data */
68 for (entry_p = 24; buf[entry_p] != ISO2709_FS; )
69 {
70 /* length of directory entry */
71 int l = 3 + length_data_entry + length_starting;
72 if (entry_p + l >= record_length)
73 {
74 yaz_marc_cprintf(mt, "Directory offset %d: end of record."
75 " Missing FS char", entry_p);
76 return -1;
77 }
78 if (yaz_marc_get_debug(mt))
79 {
80 WRBUF hex = wrbuf_alloc();
81
82 wrbuf_puts(hex, "Tag ");
83 wrbuf_write_escaped(hex, buf + entry_p, 3);
84 wrbuf_puts(hex, ", length ");
85 wrbuf_write_escaped(hex, buf + entry_p + 3,
86 length_data_entry);
87 wrbuf_puts(hex, ", starting ");
88 wrbuf_write_escaped(hex, buf + entry_p + 3 + length_data_entry,
89 length_starting);
90 yaz_marc_cprintf(mt, "Directory offset %d: %s",
91 entry_p, wrbuf_cstr(hex));
92 wrbuf_destroy(hex);
93 }
94 /* Check for digits in length+starting info */
95 while (--l >= 3)
96 if (!yaz_isdigit(buf[entry_p + l]))
97 break;
98 if (l >= 3)
99 {
100 WRBUF hex = wrbuf_alloc();
101 /* Not all digits, so stop directory scan */
102 wrbuf_write_escaped(hex, buf + entry_p,
103 length_data_entry + length_starting + 3);
104 yaz_marc_cprintf(mt, "Directory offset %d: Bad value for data"
105 " length and/or length starting (%s)", entry_p,
106 wrbuf_cstr(hex));
107 wrbuf_destroy(hex);
108 break;
109 }
110 entry_p += 3 + length_data_entry + length_starting;
111 }
112 end_of_directory = entry_p;
113 if (base_address != entry_p+1)
114 {
115 yaz_marc_cprintf(mt, "Base address not at end of directory,"
116 " base %d, end %d", base_address, entry_p+1);
117 }
118
119 /* Second pass. parse control - and datafields */
120 for (entry_p = 24; entry_p != end_of_directory; )
121 {
122 int data_length;
123 int data_offset;
124 int end_offset;
125 int i;
126 char tag[4];
127 int identifier_flag = 0;
128 int entry_p0 = entry_p;
129
130 memcpy (tag, buf+entry_p, 3);
131 entry_p += 3;
132 tag[3] = '\0';
133 data_length = atoi_n(buf+entry_p, length_data_entry);
134 entry_p += length_data_entry;
135 data_offset = atoi_n(buf+entry_p, length_starting);
136 entry_p += length_starting;
137 i = data_offset + base_address;
138 end_offset = i+data_length-1;
139
140 if (data_length <= 0 || data_offset < 0)
141 break;
142
143 if (yaz_marc_get_debug(mt))
144 {
145 yaz_marc_cprintf(mt, "Tag: %s. Directory offset %d: data-length %d,"
146 " data-offset %d",
147 tag, entry_p0, data_length, data_offset);
148 }
149 if (end_offset >= record_length)
150 {
151 yaz_marc_cprintf(mt, "Directory offset %d: Data out of bounds %d >= %d",
152 entry_p0, end_offset, record_length);
153 break;
154 }
155
156 if (memcmp (tag, "00", 2))
157 identifier_flag = 1; /* if not 00X assume subfields */
158 else if (indicator_length < 4 && indicator_length > 0)
159 {
160 /* Danmarc 00X have subfields */
161 if (buf[i + indicator_length] == ISO2709_IDFS)
162 identifier_flag = 1;
163 else if (buf[i + indicator_length + 1] == ISO2709_IDFS)
164 identifier_flag = 2;
165 }
166
167 if (identifier_flag)
168 {
169 /* datafield */
170 i += identifier_flag-1;
171 if (indicator_length)
172 {
173 int j, i_start = i;
174 for (j = 0; j < indicator_length; j++)
175 i += yaz_marc_sizeof_char(mt, buf + i);
176 yaz_marc_add_datafield(mt, tag, buf + i_start,
177 i - i_start);
178 }
179 int code_offset = i + 1;
180 while (i < end_offset && buf[i] != ISO2709_RS && buf[i] != ISO2709_FS)
181 {
182 if (buf[i] == ISO2709_IDFS && i < end_offset -1 && !(buf[i+1] >= 0 && buf[i+1] <= ' '))
183 {
184 if (i > code_offset)
185 yaz_marc_add_subfield(mt, buf + code_offset, i - code_offset);
186 code_offset = i + 1;
187 }
188 i++;
189 }
190 if (i > code_offset)
191 yaz_marc_add_subfield(mt, buf + code_offset, i - code_offset);
192 }
193 else
194 {
195 /* controlfield */
196 int i0 = i;
197 while (i < end_offset &&
198 buf[i] != ISO2709_RS && buf[i] != ISO2709_FS)
199 i++;
200 yaz_marc_add_controlfield(mt, tag, buf+i0, i-i0);
201 }
202 if (i < end_offset)
203 {
204 yaz_marc_cprintf(mt, "Separator but not at end of field length=%d",
205 data_length);
206 }
207 if (buf[i] != ISO2709_RS && buf[i] != ISO2709_FS)
208 {
209 yaz_marc_cprintf(mt, "No separator at end of field length=%d",
210 data_length);
211 }
212 }
213 return record_length;
214}
215
216/*
217 * Local variables:
218 * c-basic-offset: 4
219 * c-file-style: "Stroustrup"
220 * indent-tabs-mode: nil
221 * End:
222 * vim: shiftwidth=4 tabstop=8 expandtab
223 */
224
int atoi_n_check(const char *buf, int size, int *val)
like atoi_n but checks for proper formatting
Definition atoin.c:32
int atoi_n(const char *buf, int len)
like atoi(3) except that it reads exactly len characters
Definition atoin.c:19
int yaz_marc_read_iso2709(yaz_marc_t mt, const char *buf, int bsize)
read ISO2709/MARC record from buffer
void yaz_marc_cprintf(yaz_marc_t mt, const char *fmt,...)
adds MARC annotation - printf interface
Definition marcdisp.c:188
size_t yaz_marc_sizeof_char(yaz_marc_t mt, const char *buf)
Definition marcdisp.c:478
void yaz_marc_add_subfield(yaz_marc_t mt, const char *code_data, size_t code_data_len)
adds subfield to MARC structure
Definition marcdisp.c:316
int yaz_marc_get_debug(yaz_marc_t mt)
gets debug level for MARC system
Definition marcdisp.c:199
void yaz_marc_add_datafield(yaz_marc_t mt, const char *tag, const char *indicator, size_t indicator_len)
adds datafield to MARC structure using strings
Definition marcdisp.c:233
void yaz_marc_set_leader(yaz_marc_t mt, const char *leader_c, int *indicator_length, int *identifier_length, int *base_address, int *length_data_entry, int *length_starting, int *length_implementation)
sets leader, validates it, and returns important values
Definition marcdisp.c:356
void yaz_marc_add_controlfield(yaz_marc_t mt, const char *tag, const char *data, size_t data_len)
adds controlfield to MARC structure
Definition marcdisp.c:212
void yaz_marc_reset(yaz_marc_t mt)
clears memory and MARC record
Definition marcdisp.c:483
MARC conversion.
#define ISO2709_FS
MARC control char: field separator (30 Dec, 1E Hex)
Definition marcdisp.h:148
#define ISO2709_IDFS
MARC control char: identifier-field separator (31 Dec, 1F Hex)
Definition marcdisp.h:150
#define ISO2709_RS
MARC control char: record separator (29 Dec, 1D Hex)
Definition marcdisp.h:146
string buffer
Definition wrbuf.h:43
the internals of a yaz_marc_t handle
Definition marcdisp.c:86
void wrbuf_destroy(WRBUF b)
destroy WRBUF and its buffer
Definition wrbuf.c:38
const char * wrbuf_cstr(WRBUF b)
returns WRBUF content as C-string
Definition wrbuf.c:299
WRBUF wrbuf_alloc(void)
construct WRBUF
Definition wrbuf.c:25
void wrbuf_write_escaped(WRBUF b, const char *str, size_t len)
writes buffer to WRBUF and escape non-ASCII characters
Definition wrbuf.c:327
void wrbuf_puts(WRBUF b, const char *buf)
appends C-string to WRBUF
Definition wrbuf.c:89
Header for WRBUF (growing buffer)
#define yaz_isdigit(x)
Definition yaz-iconv.h:86
Header for common YAZ utilities.