YAZ 5.35.1
iconv_encode_marc8.c
Go to the documentation of this file.
1/* This file is part of the YAZ toolkit.
2 * Copyright (C) Index Data
3 * See the file LICENSE for details.
4 */
13#if HAVE_CONFIG_H
14#include <config.h>
15#endif
16
17#include <assert.h>
18#include <errno.h>
19#include <string.h>
20
21#include <yaz/xmalloc.h>
22#include <yaz/snprintf.h>
23#include "iconv-p.h"
24
37
38#define ESC "\033"
39
40struct encoder_data
41{
43 unsigned long write_marc8_last;
45 const char *write_marc8_lpage;
46 const char *write_marc8_g0;
47 const char *write_marc8_g1;
48};
49
51{
52 struct encoder_data *data = (struct encoder_data *) w->data;
54 data->write_marc8_last = 0;
55 data->write_marc8_ncr = 0;
56 data->write_marc8_lpage = 0;
57 data->write_marc8_g0 = ESC "(B";
58 data->write_marc8_g1 = 0;
59}
60
62 struct encoder_data *w,
63 char **outbuf, size_t *outbytesleft,
64 const char *page_chr);
65
66static unsigned long lookup_marc8(yaz_iconv_t cd,
67 unsigned long x, int *comb,
68 const char **page_chr)
69{
70 char utf8_buf[7];
71 char *utf8_outbuf = utf8_buf;
72 size_t utf8_outbytesleft = sizeof(utf8_buf)-1, r;
73 int error_code;
74
75 r = yaz_write_UTF8_char(x, &utf8_outbuf, &utf8_outbytesleft, &error_code);
76 if (r == (size_t)(-1))
77 {
79 return 0;
80 }
81 else
82 {
83 unsigned char *inp;
84 size_t inbytesleft, no_read_sub = 0;
85 unsigned long x;
86
87 *utf8_outbuf = '\0';
88 inp = (unsigned char *) utf8_buf;
89 inbytesleft = strlen(utf8_buf);
90
91 x = yaz_marc8r_42_conv(inp, inbytesleft, &no_read_sub, comb, 255, 0);
92 if (x)
93 {
94 *page_chr = ESC "(B";
95 return x;
96 }
97 x = yaz_marc8r_45_conv(inp, inbytesleft, &no_read_sub, comb, 255, 0);
98 if (x)
99 {
100 *page_chr = ESC "(B";
101 return x;
102 }
103 x = yaz_marc8r_62_conv(inp, inbytesleft, &no_read_sub, comb, 255, 0);
104 if (x)
105 {
106 *page_chr = ESC "b";
107 return x;
108 }
109 x = yaz_marc8r_70_conv(inp, inbytesleft, &no_read_sub, comb, 255, 0);
110 if (x)
111 {
112 *page_chr = ESC "p";
113 return x;
114 }
115 x = yaz_marc8r_32_conv(inp, inbytesleft, &no_read_sub, comb, 255, 0);
116 if (x)
117 {
118 *page_chr = ESC "(2";
119 return x;
120 }
121 x = yaz_marc8r_4E_conv(inp, inbytesleft, &no_read_sub, comb, 255, 0);
122 if (x)
123 {
124 *page_chr = ESC "(N";
125 return x;
126 }
127 x = yaz_marc8r_51_conv(inp, inbytesleft, &no_read_sub, comb, 255, 0);
128 if (x)
129 {
130 *page_chr = ESC "(Q";
131 return x;
132 }
133 x = yaz_marc8r_33_conv(inp, inbytesleft, &no_read_sub, comb, 255, 0);
134 if (x)
135 {
136 *page_chr = ESC "(3";
137 return x;
138 }
139 x = yaz_marc8r_34_conv(inp, inbytesleft, &no_read_sub, comb, 255, 0);
140 if (x)
141 {
142 *page_chr = ESC "(4";
143 return x;
144 }
145 x = yaz_marc8r_53_conv(inp, inbytesleft, &no_read_sub, comb, 255, 0);
146 if (x)
147 {
148 *page_chr = ESC "(S";
149 return x;
150 }
151 x = yaz_marc8r_31_conv(inp, inbytesleft, &no_read_sub, comb, 255, 0);
152 if (x)
153 {
154 *page_chr = ESC "$1";
155 return x;
156 }
158 return x;
159 }
160}
161
162static size_t flush_combos(yaz_iconv_t cd,
163 struct encoder_data *w,
164 char **outbuf, size_t *outbytesleft)
165{
166 unsigned long y = w->write_marc8_last;
167
168 if (!y)
169 return 0;
170
171 assert(w->write_marc8_lpage);
172 if (w->write_marc8_lpage)
173 {
174 size_t r = yaz_write_marc8_page_chr(cd, w, outbuf, outbytesleft,
176 if (r)
177 return r;
178 }
179
180 if (9 >= *outbytesleft)
181 {
183 return (size_t) (-1);
184 }
185 if (w->write_marc8_ncr)
186 {
187 yaz_snprintf(*outbuf, 9, "&#x%04x;", y);
188 (*outbytesleft) -= 8;
189 (*outbuf) += 8;
190 }
191 else
192 {
193 size_t out_no = 0;
194 unsigned char byte;
195
196 byte = (unsigned char )((y>>16) & 0xff);
197 if (byte)
198 (*outbuf)[out_no++] = byte;
199 byte = (unsigned char)((y>>8) & 0xff);
200 if (byte)
201 (*outbuf)[out_no++] = byte;
202 byte = (unsigned char )(y & 0xff);
203 if (byte)
204 (*outbuf)[out_no++] = byte;
205 *outbuf += out_no;
206 (*outbytesleft) -= out_no;
207 }
208
210 {
211 *(*outbuf)++ = w->write_marc8_second_half_char;
212 (*outbytesleft)--;
213 }
214
215 w->write_marc8_last = 0;
216 w->write_marc8_ncr = 0;
217 w->write_marc8_lpage = 0;
219 return 0;
220}
221
223 struct encoder_data *w,
224 char **outbuf, size_t *outbytesleft,
225 const char *page_chr)
226{
227 const char **old_page_chr = &w->write_marc8_g0;
228
229 /* are we going to a G1-set (such as such as ESC ")!E") */
230 if (page_chr && page_chr[1] == ')')
231 old_page_chr = &w->write_marc8_g1;
232
233 if (!*old_page_chr || strcmp(page_chr, *old_page_chr))
234 {
235 size_t plen = 0;
236 const char *page_out = page_chr;
237
238 if (*outbytesleft < 8)
239 {
241
242 return (size_t) (-1);
243 }
244
245 if (*old_page_chr)
246 {
247 if (!strcmp(*old_page_chr, ESC "p")
248 || !strcmp(*old_page_chr, ESC "g")
249 || !strcmp(*old_page_chr, ESC "b"))
250 {
251 page_out = ESC "s";
252 /* Technique 1 leave */
253 if (strcmp(page_chr, ESC "(B")) /* Not going ASCII page? */
254 {
255 /* Must leave script + enter new page */
256 plen = strlen(page_out);
257 memcpy(*outbuf, page_out, plen);
258 (*outbuf) += plen;
259 (*outbytesleft) -= plen;
260 page_out = ESC "(B";
261 }
262 }
263 }
264 *old_page_chr = page_chr;
265 plen = strlen(page_out);
266 memcpy(*outbuf, page_out, plen);
267 (*outbuf) += plen;
268 (*outbytesleft) -= plen;
269 }
270 return 0;
271}
272
273
274static size_t yaz_write_marc8_2(yaz_iconv_t cd, struct encoder_data *w,
275 unsigned long x,
276 char **outbuf, size_t *outbytesleft,
277 int loss_mode)
278{
279 int comb = 0;
280 int enable_ncr = 0;
281 const char *page_chr = 0;
282 unsigned long y = lookup_marc8(cd, x, &comb, &page_chr);
283
284 if (!y)
285 {
286 page_chr = ESC "(B";
287 switch (loss_mode)
288 {
289 case 0:
290 return (size_t) (-1);
291 case 1:
292 y = '|';
293 break;
294 case 2:
295 y = x;
296 enable_ncr = 1;
297 break;
298 case 3:
299 if (x < 32 && x != 27)
300 y = x;
301 else
302 return (size_t) (-1);
303 }
304 }
305
306 if (comb)
307 {
308 if (page_chr)
309 {
310 size_t r = yaz_write_marc8_page_chr(cd, w, outbuf, outbytesleft,
311 page_chr);
312 if (r)
313 return r;
314 }
315 if (x == 0x0361)
317 else if (x == 0x0360)
319
320 if (*outbytesleft <= 1)
321 {
323 return (size_t) (-1);
324 }
325 *(*outbuf)++ = (char) y;
326 (*outbytesleft)--;
327 }
328 else
329 {
330 size_t r = flush_combos(cd, w, outbuf, outbytesleft);
331 if (r)
332 return r;
333
334 w->write_marc8_last = y;
335 w->write_marc8_lpage = page_chr;
336 w->write_marc8_ncr = enable_ncr;
337 }
338 return 0;
339}
340
342 char **outbuf, size_t *outbytesleft)
343{
344 struct encoder_data *w = (struct encoder_data *) en->data;
345 size_t r = flush_combos(cd, w, outbuf, outbytesleft);
346 if (r)
347 return r;
348 w->write_marc8_g1 = 0;
349 return yaz_write_marc8_page_chr(cd, w, outbuf, outbytesleft, ESC "(B");
350}
351
353 unsigned long x,
354 char **outbuf, size_t *outbytesleft,
355 int loss_mode)
356{
357 unsigned long x1, x2;
358 if (yaz_iso_8859_1_lookup_y(x, &x1, &x2))
359 {
360 /* save the output pointers .. */
361 char *outbuf0 = *outbuf;
362 size_t outbytesleft0 = *outbytesleft;
363 int last_ch = w->write_marc8_last;
364 int ncr = w->write_marc8_ncr;
365 const char *lpage = w->write_marc8_lpage;
366 size_t r;
367
368 r = yaz_write_marc8_2(cd, w, x1,
369 outbuf, outbytesleft, loss_mode);
370 if (r)
371 return r;
372 r = yaz_write_marc8_2(cd, w, x2,
373 outbuf, outbytesleft, loss_mode);
374 if (r && yaz_iconv_error(cd) == YAZ_ICONV_E2BIG)
375 {
376 /* not enough room. reset output to original values */
377 *outbuf = outbuf0;
378 *outbytesleft = outbytesleft0;
379 w->write_marc8_last = last_ch;
380 w->write_marc8_ncr = ncr;
381 w->write_marc8_lpage = lpage;
382 }
383 return r;
384 }
385 return yaz_write_marc8_2(cd, w, x, outbuf, outbytesleft, loss_mode);
386}
387
389 unsigned long x,
390 char **outbuf, size_t *outbytesleft)
391{
392 return yaz_write_marc8_generic(cd, (struct encoder_data *) e->data,
393 x, outbuf, outbytesleft, 0);
394}
395
397 unsigned long x,
398 char **outbuf, size_t *outbytesleft)
399{
400 return yaz_write_marc8_generic(cd, (struct encoder_data *) e->data,
401 x, outbuf, outbytesleft, 1);
402}
403
405 unsigned long x,
406 char **outbuf, size_t *outbytesleft)
407{
408 return yaz_write_marc8_generic(cd, (struct encoder_data *) e->data,
409 x, outbuf, outbytesleft, 2);
410}
411
413 unsigned long x,
414 char **outbuf, size_t *outbytesleft)
415{
416 return yaz_write_marc8_generic(cd, (struct encoder_data *) e->data,
417 x, outbuf, outbytesleft, 3);
418}
419
421{
422 xfree(e->data);
423}
424
427
428{
429 if (!yaz_matchstr(tocode, "MARC8"))
431 else if (!yaz_matchstr(tocode, "MARC8s"))
433 else if (!yaz_matchstr(tocode, "MARC8lossy"))
435 else if (!yaz_matchstr(tocode, "MARC8lossless"))
437 else if (!yaz_matchstr(tocode, "MARC8c"))
439 else
440 return 0;
441
442 {
443 struct encoder_data *data = (struct encoder_data *)
444 xmalloc(sizeof(*data));
445 e->data = data;
449 }
450 return e;
451}
452
453
454/*
455 * Local variables:
456 * c-basic-offset: 4
457 * c-file-style: "Stroustrup"
458 * indent-tabs-mode: nil
459 * End:
460 * vim: shiftwidth=4 tabstop=8 expandtab
461 */
462
Header for errno utilities.
Internal header for iconv.
unsigned long yaz_conv_func_t(unsigned char *inp, size_t inbytesleft, size_t *no_read, int *combining, unsigned mask, int boffset)
Definition iconv-p.h:70
void yaz_iconv_set_errno(yaz_iconv_t cd, int no)
Definition siconv.c:298
int yaz_iso_8859_1_lookup_y(unsigned long v, unsigned long *x1, unsigned long *x2)
unsigned long x2
unsigned long x1
unsigned y
yaz_conv_func_t yaz_marc8r_45_conv
yaz_conv_func_t yaz_marc8r_4E_conv
static size_t write_marc8_lossless(yaz_iconv_t cd, yaz_iconv_encoder_t e, unsigned long x, char **outbuf, size_t *outbytesleft)
static size_t flush_combos(yaz_iconv_t cd, struct encoder_data *w, char **outbuf, size_t *outbytesleft)
static size_t yaz_write_marc8_generic(yaz_iconv_t cd, struct encoder_data *w, unsigned long x, char **outbuf, size_t *outbytesleft, int loss_mode)
static size_t write_marc8_normal(yaz_iconv_t cd, yaz_iconv_encoder_t e, unsigned long x, char **outbuf, size_t *outbytesleft)
static size_t write_marc8_control(yaz_iconv_t cd, yaz_iconv_encoder_t e, unsigned long x, char **outbuf, size_t *outbytesleft)
#define ESC
static size_t yaz_write_marc8_page_chr(yaz_iconv_t cd, struct encoder_data *w, char **outbuf, size_t *outbytesleft, const char *page_chr)
yaz_conv_func_t yaz_marc8r_62_conv
yaz_conv_func_t yaz_marc8r_42_conv
static unsigned long lookup_marc8(yaz_iconv_t cd, unsigned long x, int *comb, const char **page_chr)
yaz_conv_func_t yaz_marc8r_34_conv
static size_t write_marc8_lossy(yaz_iconv_t cd, yaz_iconv_encoder_t e, unsigned long x, char **outbuf, size_t *outbytesleft)
yaz_conv_func_t yaz_marc8r_70_conv
yaz_conv_func_t yaz_marc8r_32_conv
yaz_conv_func_t yaz_marc8r_67_conv
yaz_conv_func_t yaz_marc8r_31_conv
static size_t yaz_write_marc8_2(yaz_iconv_t cd, struct encoder_data *w, unsigned long x, char **outbuf, size_t *outbytesleft, int loss_mode)
static size_t flush_marc8(yaz_iconv_t cd, yaz_iconv_encoder_t en, char **outbuf, size_t *outbytesleft)
yaz_conv_func_t yaz_marc8r_53_conv
yaz_conv_func_t yaz_marc8r_33_conv
static void destroy_marc8(yaz_iconv_encoder_t e)
static void init_marc8(yaz_iconv_encoder_t w)
yaz_iconv_encoder_t yaz_marc8_encoder(const char *tocode, yaz_iconv_encoder_t e)
yaz_conv_func_t yaz_marc8r_51_conv
int yaz_matchstr(const char *s1, const char *s2)
match strings - independent of case and '-'
Definition matchstr.c:42
int yaz_iconv_error(yaz_iconv_t cd)
returns last error - like errno for iconv(3)
Definition siconv.c:279
void yaz_snprintf(char *buf, size_t size, const char *fmt,...)
Definition snprintf.c:31
Header for config file reading utilities.
const char * write_marc8_lpage
const char * write_marc8_g0
const char * write_marc8_g1
unsigned write_marc8_second_half_char
unsigned long write_marc8_last
void(* init_handle)(yaz_iconv_encoder_t e)
Definition iconv-p.h:50
size_t(* write_handle)(yaz_iconv_t cd, yaz_iconv_encoder_t e, unsigned long x, char **outbuf, size_t *outbytesleft)
Definition iconv-p.h:45
void(* destroy_handle)(yaz_iconv_encoder_t e)
Definition iconv-p.h:51
size_t(* flush_handle)(yaz_iconv_t cd, yaz_iconv_encoder_t e, char **outbuf, size_t *outbytesleft)
Definition iconv-p.h:48
size_t yaz_write_UTF8_char(unsigned long x, char **outbuf, size_t *outbytesleft, int *error)
encodes UTF-8 sequence
Definition utf8.c:159
Header for memory handling functions.
#define xfree(x)
utility macro which calls xfree_f
Definition xmalloc.h:53
#define xmalloc(x)
utility macro which calls malloc_f
Definition xmalloc.h:49
#define YAZ_ICONV_EILSEQ
error code: Invalid sequence
Definition yaz-iconv.h:49
#define YAZ_ICONV_E2BIG
error code: Not sufficient room for output buffer
Definition yaz-iconv.h:47