YAZ 5.35.1
iconv_decode_marc8.c
Go to the documentation of this file.
1/* This file is part of the YAZ toolkit.
2 * Copyright (C) Index Data
3 * See the file LICENSE for details.
4 */
13#if HAVE_CONFIG_H
14#include <config.h>
15#endif
16
17#include <assert.h>
18#include <errno.h>
19#include <string.h>
20
21#include <yaz/xmalloc.h>
22#include "iconv-p.h"
23
24struct decoder_data {
25 int g0_mode;
26 int g1_mode;
27
28 int comb_offset;
29 int comb_size;
30 unsigned long comb_x[8];
31 size_t comb_no_read[8];
33};
34
47
48
49static unsigned long yaz_read_marc8_comb(yaz_iconv_t cd,
50 struct decoder_data *data,
51 unsigned char *inp,
52 size_t inbytesleft, size_t *no_read,
53 int *comb);
54
55static unsigned long read_marc8(yaz_iconv_t cd, yaz_iconv_decoder_t d,
56 unsigned char *inp,
57 size_t inbytesleft, size_t *no_read)
58{
59 struct decoder_data *data = (struct decoder_data *) d->data;
60 unsigned long x;
61 if (data->comb_offset < data->comb_size)
62 {
63 *no_read = data->comb_no_read[data->comb_offset];
64 x = data->comb_x[data->comb_offset];
65
66 /* special case for double-diacritic combining characters,
67 INVERTED BREVE and DOUBLE TILDE.
68 We'll increment the no_read counter by 1, since we want to skip over
69 the processing of the closing ligature character
70 */
71 /* this code is no longer necessary.. our handlers code in
72 yaz_marc8_?_conv (generated by charconv.tcl) now returns
73 0 and no_read=1 when a sequence does not match the input.
74 The SECOND HALFs in codetables.xml produces a non-existant
75 entry in the conversion trie.. Hence when met, the input byte is
76 skipped as it should (in yaz_iconv)
77 */
78#if 0
79 if (x == 0x0361 || x == 0x0360)
80 *no_read += 1;
81#endif
82 data->comb_offset++;
83 return x;
84 }
85
86 data->comb_offset = 0;
87 for (data->comb_size = 0; data->comb_size < 8; data->comb_size++)
88 {
89 int comb = 0;
90
91 if (inbytesleft == 0 && data->comb_size)
92 {
94 x = 0;
95 *no_read = 0;
96 break;
97 }
98 x = yaz_read_marc8_comb(cd, data, inp, inbytesleft, no_read, &comb);
99 if (!comb || !x)
100 break;
101 data->comb_x[data->comb_size] = x;
102 data->comb_no_read[data->comb_size] = *no_read;
103 inp += *no_read;
104 inbytesleft = inbytesleft - *no_read;
105 }
106 return x;
107}
108
109static unsigned long read_marc8s(yaz_iconv_t cd, yaz_iconv_decoder_t d,
110 unsigned char *inp,
111 size_t inbytesleft, size_t *no_read)
112{
113 struct decoder_data *data = (struct decoder_data *) d->data;
114 unsigned long x = read_marc8(cd, d, inp, inbytesleft, no_read);
115 if (x && data->comb_size == 1)
116 {
117 if (yaz_iso_8859_1_lookup_x12(x, data->comb_x[0], &x))
118 {
119 *no_read += data->comb_no_read[0];
120 data->comb_size = 0;
121 }
122 }
123 return x;
124}
125
126static unsigned long yaz_read_marc8_comb(yaz_iconv_t cd,
127 struct decoder_data *data,
128 unsigned char *inp,
129 size_t inbytesleft, size_t *no_read,
130 int *comb)
131{
132 *no_read = 0;
133 while (inbytesleft > 0 && *inp == 27)
134 {
135 int *modep = &data->g0_mode;
136 size_t inbytesleft0 = inbytesleft;
137
138 inbytesleft--;
139 inp++;
140 if (inbytesleft == 0)
141 goto incomplete;
142 if (*inp == '$') /* set with multiple bytes */
143 {
144 inbytesleft--;
145 inp++;
146 }
147 if (inbytesleft == 0)
148 goto incomplete;
149 if (*inp == '(' || *inp == ',') /* G0 */
150 {
151 inbytesleft--;
152 inp++;
153 }
154 else if (*inp == ')' || *inp == '-') /* G1 */
155 {
156 inbytesleft--;
157 inp++;
158 modep = &data->g1_mode;
159 }
160 if (inbytesleft == 0)
161 goto incomplete;
162 if (*inp == '!') /* ANSEL is a special case */
163 {
164 inbytesleft--;
165 inp++;
166 }
167 if (inbytesleft == 0)
168 goto incomplete;
169 *modep = *inp++; /* Final character */
170 inbytesleft--;
171
172 (*no_read) += inbytesleft0 - inbytesleft;
173 }
174 if (inbytesleft == 0)
175 return 0;
176 else if (*inp == ' ')
177 {
178 *no_read += 1;
179 return ' ';
180 }
181 else if (*inp < ' ' && data->control_mode)
182 {
183 *no_read += 1;
184 return *inp;
185 }
186 else
187 {
188 unsigned long x;
189 size_t no_read_sub = 0;
190 int mode = *inp < 128 ? data->g0_mode : data->g1_mode;
191 *comb = 0;
192
193 switch(mode)
194 {
195 case 'B': /* Basic ASCII */
196 case 's': /* ASCII */
197 x = yaz_marc8_42_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0);
198 break;
199 case 'E': /* ANSEL */
200 x = yaz_marc8_45_conv(inp, inbytesleft, &no_read_sub, comb, 127, 128);
201 break;
202 case 'g': /* Greek */
203 x = yaz_marc8_67_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0);
204 break;
205 case 'b': /* Subscripts */
206 x = yaz_marc8_62_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0);
207 break;
208 case 'p': /* Superscripts */
209 x = yaz_marc8_70_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0);
210 break;
211 case '2': /* Basic Hebrew */
212 x = yaz_marc8_32_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0);
213 break;
214 case 'N': /* Basic Cyrillic */
215 x = yaz_marc8_4E_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0);
216 break;
217 case 'Q': /* Extended Cyrillic */
218 x = yaz_marc8_51_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0);
219 break;
220 case '3': /* Basic Arabic */
221 x = yaz_marc8_33_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0);
222 break;
223 case '4': /* Extended Arabic */
224 x = yaz_marc8_34_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0);
225 break;
226 case 'S': /* Greek */
227 x = yaz_marc8_53_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0);
228 break;
229 case '1': /* Chinese, Japanese, Korean (EACC) */
230 x = yaz_marc8_31_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0);
231 break;
232 default:
233 *no_read = 0;
235 return 0;
236 }
237 *no_read += no_read_sub;
238 return x;
239 }
240incomplete:
241 *no_read = 0;
243 return 0;
244}
245
246
248 unsigned char *inp,
249 size_t inbytesleft, size_t *no_read)
250{
251 struct decoder_data *data = (struct decoder_data *) d->data;
252 data->g0_mode = 'B';
253 data->g1_mode = 'E';
254 data->comb_offset = data->comb_size = 0;
255 data->control_mode = 0;
256 return 0;
257}
258
260 unsigned char *inp,
261 size_t inbytesleft, size_t *no_read)
262{
263 struct decoder_data *data = (struct decoder_data *) d->data;
264
265 init_marc8(cd, d, inp, inbytesleft, no_read);
266 data->control_mode = 1;
267 return 0;
268}
269
271{
272 struct decoder_data *data = (struct decoder_data *) d->data;
273 xfree(data);
274}
275
278{
279 if (!yaz_matchstr(fromcode, "MARC8") || !yaz_matchstr(fromcode, "ANSEL"))
280 {
283 }
284 else if (!yaz_matchstr(fromcode, "MARC8s"))
285 {
288 }
289 else if (!yaz_matchstr(fromcode, "MARC8c"))
290 {
293 }
294 else
295 return 0;
296 {
297 struct decoder_data *data = (struct decoder_data *)
298 xmalloc(sizeof(*data));
299 d->data = data;
301 }
302 return d;
303}
304
305
306/*
307 * Local variables:
308 * c-basic-offset: 4
309 * c-file-style: "Stroustrup"
310 * indent-tabs-mode: nil
311 * End:
312 * vim: shiftwidth=4 tabstop=8 expandtab
313 */
314
Header for errno utilities.
Internal header for iconv.
int yaz_iso_8859_1_lookup_x12(unsigned long x1, unsigned long x2, unsigned long *y)
unsigned long yaz_conv_func_t(unsigned char *inp, size_t inbytesleft, size_t *no_read, int *combining, unsigned mask, int boffset)
Definition iconv-p.h:70
void yaz_iconv_set_errno(yaz_iconv_t cd, int no)
Definition siconv.c:298
yaz_conv_func_t yaz_marc8_62_conv
yaz_conv_func_t yaz_marc8_33_conv
yaz_conv_func_t yaz_marc8_51_conv
yaz_conv_func_t yaz_marc8_31_conv
static size_t init_marc8(yaz_iconv_t cd, yaz_iconv_decoder_t d, unsigned char *inp, size_t inbytesleft, size_t *no_read)
static size_t init_marc8c(yaz_iconv_t cd, yaz_iconv_decoder_t d, unsigned char *inp, size_t inbytesleft, size_t *no_read)
yaz_conv_func_t yaz_marc8_34_conv
yaz_conv_func_t yaz_marc8_53_conv
yaz_conv_func_t yaz_marc8_70_conv
static unsigned long read_marc8s(yaz_iconv_t cd, yaz_iconv_decoder_t d, unsigned char *inp, size_t inbytesleft, size_t *no_read)
static unsigned long read_marc8(yaz_iconv_t cd, yaz_iconv_decoder_t d, unsigned char *inp, size_t inbytesleft, size_t *no_read)
void destroy_marc8(yaz_iconv_decoder_t d)
yaz_conv_func_t yaz_marc8_45_conv
yaz_conv_func_t yaz_marc8_42_conv
yaz_conv_func_t yaz_marc8_32_conv
yaz_iconv_decoder_t yaz_marc8_decoder(const char *fromcode, yaz_iconv_decoder_t d)
static unsigned long yaz_read_marc8_comb(yaz_iconv_t cd, struct decoder_data *data, unsigned char *inp, size_t inbytesleft, size_t *no_read, int *comb)
yaz_conv_func_t yaz_marc8_4E_conv
yaz_conv_func_t yaz_marc8_67_conv
int yaz_matchstr(const char *s1, const char *s2)
match strings - independent of case and '-'
Definition matchstr.c:42
size_t no_read[MAX_COMP]
unsigned long comb_x[8]
unsigned long(* read_handle)(yaz_iconv_t cd, yaz_iconv_decoder_t d, unsigned char *inbuf, size_t inbytesleft, size_t *no_read)
Definition iconv-p.h:86
size_t(* init_handle)(yaz_iconv_t cd, yaz_iconv_decoder_t d, unsigned char *inbuf, size_t inbytesleft, size_t *no_read)
Definition iconv-p.h:83
void(* destroy_handle)(yaz_iconv_decoder_t d)
Definition iconv-p.h:89
Header for memory handling functions.
#define xfree(x)
utility macro which calls xfree_f
Definition xmalloc.h:53
#define xmalloc(x)
utility macro which calls malloc_f
Definition xmalloc.h:49
#define YAZ_ICONV_EILSEQ
error code: Invalid sequence
Definition yaz-iconv.h:49
#define YAZ_ICONV_EINVAL
error code: An incomplete multibyte sequence is in input buffer
Definition yaz-iconv.h:51