YAZ  5.34.0
iconv_encode_marc8.c
Go to the documentation of this file.
1 /* This file is part of the YAZ toolkit.
2  * Copyright (C) Index Data
3  * See the file LICENSE for details.
4  */
13 #if HAVE_CONFIG_H
14 #include <config.h>
15 #endif
16 
17 #include <assert.h>
18 #include <errno.h>
19 #include <string.h>
20 
21 #include <yaz/xmalloc.h>
22 #include <yaz/snprintf.h>
23 #include "iconv-p.h"
24 
37 
38 #define ESC "\033"
39 
40 struct encoder_data
41 {
43  unsigned long write_marc8_last;
45  const char *write_marc8_lpage;
46  const char *write_marc8_g0;
47  const char *write_marc8_g1;
48 };
49 
51 {
52  struct encoder_data *data = (struct encoder_data *) w->data;
54  data->write_marc8_last = 0;
55  data->write_marc8_ncr = 0;
56  data->write_marc8_lpage = 0;
57  data->write_marc8_g0 = ESC "(B";
58  data->write_marc8_g1 = 0;
59 }
60 
61 static size_t yaz_write_marc8_page_chr(yaz_iconv_t cd,
62  struct encoder_data *w,
63  char **outbuf, size_t *outbytesleft,
64  const char *page_chr);
65 
66 static unsigned long lookup_marc8(yaz_iconv_t cd,
67  unsigned long x, int *comb,
68  const char **page_chr)
69 {
70  char utf8_buf[7];
71  char *utf8_outbuf = utf8_buf;
72  size_t utf8_outbytesleft = sizeof(utf8_buf)-1, r;
73  int error_code;
74 
75  r = yaz_write_UTF8_char(x, &utf8_outbuf, &utf8_outbytesleft, &error_code);
76  if (r == (size_t)(-1))
77  {
79  return 0;
80  }
81  else
82  {
83  unsigned char *inp;
84  size_t inbytesleft, no_read_sub = 0;
85  unsigned long x;
86 
87  *utf8_outbuf = '\0';
88  inp = (unsigned char *) utf8_buf;
89  inbytesleft = strlen(utf8_buf);
90 
91  x = yaz_marc8r_42_conv(inp, inbytesleft, &no_read_sub, comb, 255, 0);
92  if (x)
93  {
94  *page_chr = ESC "(B";
95  return x;
96  }
97  x = yaz_marc8r_45_conv(inp, inbytesleft, &no_read_sub, comb, 255, 0);
98  if (x)
99  {
100  *page_chr = ESC "(B";
101  return x;
102  }
103  x = yaz_marc8r_62_conv(inp, inbytesleft, &no_read_sub, comb, 255, 0);
104  if (x)
105  {
106  *page_chr = ESC "b";
107  return x;
108  }
109  x = yaz_marc8r_70_conv(inp, inbytesleft, &no_read_sub, comb, 255, 0);
110  if (x)
111  {
112  *page_chr = ESC "p";
113  return x;
114  }
115  x = yaz_marc8r_32_conv(inp, inbytesleft, &no_read_sub, comb, 255, 0);
116  if (x)
117  {
118  *page_chr = ESC "(2";
119  return x;
120  }
121  x = yaz_marc8r_4E_conv(inp, inbytesleft, &no_read_sub, comb, 255, 0);
122  if (x)
123  {
124  *page_chr = ESC "(N";
125  return x;
126  }
127  x = yaz_marc8r_51_conv(inp, inbytesleft, &no_read_sub, comb, 255, 0);
128  if (x)
129  {
130  *page_chr = ESC "(Q";
131  return x;
132  }
133  x = yaz_marc8r_33_conv(inp, inbytesleft, &no_read_sub, comb, 255, 0);
134  if (x)
135  {
136  *page_chr = ESC "(3";
137  return x;
138  }
139  x = yaz_marc8r_34_conv(inp, inbytesleft, &no_read_sub, comb, 255, 0);
140  if (x)
141  {
142  *page_chr = ESC "(4";
143  return x;
144  }
145  x = yaz_marc8r_53_conv(inp, inbytesleft, &no_read_sub, comb, 255, 0);
146  if (x)
147  {
148  *page_chr = ESC "(S";
149  return x;
150  }
151  x = yaz_marc8r_31_conv(inp, inbytesleft, &no_read_sub, comb, 255, 0);
152  if (x)
153  {
154  *page_chr = ESC "$1";
155  return x;
156  }
158  return x;
159  }
160 }
161 
162 static size_t flush_combos(yaz_iconv_t cd,
163  struct encoder_data *w,
164  char **outbuf, size_t *outbytesleft)
165 {
166  unsigned long y = w->write_marc8_last;
167 
168  if (!y)
169  return 0;
170 
171  assert(w->write_marc8_lpage);
172  if (w->write_marc8_lpage)
173  {
174  size_t r = yaz_write_marc8_page_chr(cd, w, outbuf, outbytesleft,
175  w->write_marc8_lpage);
176  if (r)
177  return r;
178  }
179 
180  if (9 >= *outbytesleft)
181  {
183  return (size_t) (-1);
184  }
185  if (w->write_marc8_ncr)
186  {
187  yaz_snprintf(*outbuf, 9, "&#x%04x;", y);
188  (*outbytesleft) -= 8;
189  (*outbuf) += 8;
190  }
191  else
192  {
193  size_t out_no = 0;
194  unsigned char byte;
195 
196  byte = (unsigned char )((y>>16) & 0xff);
197  if (byte)
198  (*outbuf)[out_no++] = byte;
199  byte = (unsigned char)((y>>8) & 0xff);
200  if (byte)
201  (*outbuf)[out_no++] = byte;
202  byte = (unsigned char )(y & 0xff);
203  if (byte)
204  (*outbuf)[out_no++] = byte;
205  *outbuf += out_no;
206  (*outbytesleft) -= out_no;
207  }
208 
210  {
211  *(*outbuf)++ = w->write_marc8_second_half_char;
212  (*outbytesleft)--;
213  }
214 
215  w->write_marc8_last = 0;
216  w->write_marc8_ncr = 0;
217  w->write_marc8_lpage = 0;
219  return 0;
220 }
221 
223  struct encoder_data *w,
224  char **outbuf, size_t *outbytesleft,
225  const char *page_chr)
226 {
227  const char **old_page_chr = &w->write_marc8_g0;
228 
229  /* are we going to a G1-set (such as such as ESC ")!E") */
230  if (page_chr && page_chr[1] == ')')
231  old_page_chr = &w->write_marc8_g1;
232 
233  if (!*old_page_chr || strcmp(page_chr, *old_page_chr))
234  {
235  size_t plen = 0;
236  const char *page_out = page_chr;
237 
238  if (*outbytesleft < 8)
239  {
241 
242  return (size_t) (-1);
243  }
244 
245  if (*old_page_chr)
246  {
247  if (!strcmp(*old_page_chr, ESC "p")
248  || !strcmp(*old_page_chr, ESC "g")
249  || !strcmp(*old_page_chr, ESC "b"))
250  {
251  page_out = ESC "s";
252  /* Technique 1 leave */
253  if (strcmp(page_chr, ESC "(B")) /* Not going ASCII page? */
254  {
255  /* Must leave script + enter new page */
256  plen = strlen(page_out);
257  memcpy(*outbuf, page_out, plen);
258  (*outbuf) += plen;
259  (*outbytesleft) -= plen;
260  page_out = ESC "(B";
261  }
262  }
263  }
264  *old_page_chr = page_chr;
265  plen = strlen(page_out);
266  memcpy(*outbuf, page_out, plen);
267  (*outbuf) += plen;
268  (*outbytesleft) -= plen;
269  }
270  return 0;
271 }
272 
273 
274 static size_t yaz_write_marc8_2(yaz_iconv_t cd, struct encoder_data *w,
275  unsigned long x,
276  char **outbuf, size_t *outbytesleft,
277  int loss_mode)
278 {
279  int comb = 0;
280  int enable_ncr = 0;
281  const char *page_chr = 0;
282  unsigned long y = lookup_marc8(cd, x, &comb, &page_chr);
283 
284  if (!y)
285  {
286  page_chr = ESC "(B";
287  switch (loss_mode)
288  {
289  case 0:
290  return (size_t) (-1);
291  case 1:
292  y = '|';
293  break;
294  case 2:
295  y = x;
296  enable_ncr = 1;
297  break;
298  case 3:
299  if (x < 32 && x != 27)
300  y = x;
301  else
302  return (size_t) (-1);
303  }
304  }
305 
306  if (comb)
307  {
308  if (page_chr)
309  {
310  size_t r = yaz_write_marc8_page_chr(cd, w, outbuf, outbytesleft,
311  page_chr);
312  if (r)
313  return r;
314  }
315  if (x == 0x0361)
317  else if (x == 0x0360)
319 
320  if (*outbytesleft <= 1)
321  {
323  return (size_t) (-1);
324  }
325  *(*outbuf)++ = (char) y;
326  (*outbytesleft)--;
327  }
328  else
329  {
330  size_t r = flush_combos(cd, w, outbuf, outbytesleft);
331  if (r)
332  return r;
333 
334  w->write_marc8_last = y;
335  w->write_marc8_lpage = page_chr;
336  w->write_marc8_ncr = enable_ncr;
337  }
338  return 0;
339 }
340 
342  char **outbuf, size_t *outbytesleft)
343 {
344  struct encoder_data *w = (struct encoder_data *) en->data;
345  size_t r = flush_combos(cd, w, outbuf, outbytesleft);
346  if (r)
347  return r;
348  w->write_marc8_g1 = 0;
349  return yaz_write_marc8_page_chr(cd, w, outbuf, outbytesleft, ESC "(B");
350 }
351 
353  unsigned long x,
354  char **outbuf, size_t *outbytesleft,
355  int loss_mode)
356 {
357  unsigned long x1, x2;
358  if (yaz_iso_8859_1_lookup_y(x, &x1, &x2))
359  {
360  /* save the output pointers .. */
361  char *outbuf0 = *outbuf;
362  size_t outbytesleft0 = *outbytesleft;
363  int last_ch = w->write_marc8_last;
364  int ncr = w->write_marc8_ncr;
365  const char *lpage = w->write_marc8_lpage;
366  size_t r;
367 
368  r = yaz_write_marc8_2(cd, w, x1,
369  outbuf, outbytesleft, loss_mode);
370  if (r)
371  return r;
372  r = yaz_write_marc8_2(cd, w, x2,
373  outbuf, outbytesleft, loss_mode);
374  if (r && yaz_iconv_error(cd) == YAZ_ICONV_E2BIG)
375  {
376  /* not enough room. reset output to original values */
377  *outbuf = outbuf0;
378  *outbytesleft = outbytesleft0;
379  w->write_marc8_last = last_ch;
380  w->write_marc8_ncr = ncr;
381  w->write_marc8_lpage = lpage;
382  }
383  return r;
384  }
385  return yaz_write_marc8_2(cd, w, x, outbuf, outbytesleft, loss_mode);
386 }
387 
389  unsigned long x,
390  char **outbuf, size_t *outbytesleft)
391 {
392  return yaz_write_marc8_generic(cd, (struct encoder_data *) e->data,
393  x, outbuf, outbytesleft, 0);
394 }
395 
397  unsigned long x,
398  char **outbuf, size_t *outbytesleft)
399 {
400  return yaz_write_marc8_generic(cd, (struct encoder_data *) e->data,
401  x, outbuf, outbytesleft, 1);
402 }
403 
405  unsigned long x,
406  char **outbuf, size_t *outbytesleft)
407 {
408  return yaz_write_marc8_generic(cd, (struct encoder_data *) e->data,
409  x, outbuf, outbytesleft, 2);
410 }
411 
413  unsigned long x,
414  char **outbuf, size_t *outbytesleft)
415 {
416  return yaz_write_marc8_generic(cd, (struct encoder_data *) e->data,
417  x, outbuf, outbytesleft, 3);
418 }
419 
421 {
422  xfree(e->data);
423 }
424 
427 
428 {
429  if (!yaz_matchstr(tocode, "MARC8"))
431  else if (!yaz_matchstr(tocode, "MARC8s"))
433  else if (!yaz_matchstr(tocode, "MARC8lossy"))
435  else if (!yaz_matchstr(tocode, "MARC8lossless"))
437  else if (!yaz_matchstr(tocode, "MARC8c"))
439  else
440  return 0;
441 
442  {
443  struct encoder_data *data = (struct encoder_data *)
444  xmalloc(sizeof(*data));
445  e->data = data;
448  e->init_handle = init_marc8;
449  }
450  return e;
451 }
452 
453 
454 /*
455  * Local variables:
456  * c-basic-offset: 4
457  * c-file-style: "Stroustrup"
458  * indent-tabs-mode: nil
459  * End:
460  * vim: shiftwidth=4 tabstop=8 expandtab
461  */
462 
Header for errno utilities.
Internal header for iconv.
unsigned long yaz_conv_func_t(unsigned char *inp, size_t inbytesleft, size_t *no_read, int *combining, unsigned mask, int boffset)
Definition: iconv-p.h:70
void yaz_iconv_set_errno(yaz_iconv_t cd, int no)
Definition: siconv.c:298
int yaz_iso_8859_1_lookup_y(unsigned long v, unsigned long *x1, unsigned long *x2)
unsigned long x2
unsigned long x1
unsigned y
yaz_conv_func_t yaz_marc8r_45_conv
yaz_conv_func_t yaz_marc8r_4E_conv
static size_t write_marc8_lossless(yaz_iconv_t cd, yaz_iconv_encoder_t e, unsigned long x, char **outbuf, size_t *outbytesleft)
static size_t flush_combos(yaz_iconv_t cd, struct encoder_data *w, char **outbuf, size_t *outbytesleft)
static size_t yaz_write_marc8_generic(yaz_iconv_t cd, struct encoder_data *w, unsigned long x, char **outbuf, size_t *outbytesleft, int loss_mode)
static size_t write_marc8_normal(yaz_iconv_t cd, yaz_iconv_encoder_t e, unsigned long x, char **outbuf, size_t *outbytesleft)
static size_t write_marc8_control(yaz_iconv_t cd, yaz_iconv_encoder_t e, unsigned long x, char **outbuf, size_t *outbytesleft)
#define ESC
static size_t yaz_write_marc8_page_chr(yaz_iconv_t cd, struct encoder_data *w, char **outbuf, size_t *outbytesleft, const char *page_chr)
yaz_conv_func_t yaz_marc8r_62_conv
yaz_conv_func_t yaz_marc8r_42_conv
static unsigned long lookup_marc8(yaz_iconv_t cd, unsigned long x, int *comb, const char **page_chr)
yaz_conv_func_t yaz_marc8r_34_conv
static size_t write_marc8_lossy(yaz_iconv_t cd, yaz_iconv_encoder_t e, unsigned long x, char **outbuf, size_t *outbytesleft)
yaz_conv_func_t yaz_marc8r_70_conv
yaz_conv_func_t yaz_marc8r_32_conv
yaz_conv_func_t yaz_marc8r_67_conv
yaz_conv_func_t yaz_marc8r_31_conv
static size_t yaz_write_marc8_2(yaz_iconv_t cd, struct encoder_data *w, unsigned long x, char **outbuf, size_t *outbytesleft, int loss_mode)
static size_t flush_marc8(yaz_iconv_t cd, yaz_iconv_encoder_t en, char **outbuf, size_t *outbytesleft)
yaz_conv_func_t yaz_marc8r_53_conv
yaz_conv_func_t yaz_marc8r_33_conv
static void destroy_marc8(yaz_iconv_encoder_t e)
static void init_marc8(yaz_iconv_encoder_t w)
yaz_iconv_encoder_t yaz_marc8_encoder(const char *tocode, yaz_iconv_encoder_t e)
yaz_conv_func_t yaz_marc8r_51_conv
int yaz_matchstr(const char *s1, const char *s2)
match strings - independent of case and '-'
Definition: matchstr.c:42
int yaz_iconv_error(yaz_iconv_t cd)
returns last error - like errno for iconv(3)
Definition: siconv.c:279
void yaz_snprintf(char *buf, size_t size, const char *fmt,...)
Definition: snprintf.c:31
Header for config file reading utilities.
const char * write_marc8_lpage
const char * write_marc8_g0
const char * write_marc8_g1
unsigned write_marc8_second_half_char
unsigned long write_marc8_last
void(* init_handle)(yaz_iconv_encoder_t e)
Definition: iconv-p.h:50
size_t(* write_handle)(yaz_iconv_t cd, yaz_iconv_encoder_t e, unsigned long x, char **outbuf, size_t *outbytesleft)
Definition: iconv-p.h:45
void(* destroy_handle)(yaz_iconv_encoder_t e)
Definition: iconv-p.h:51
size_t(* flush_handle)(yaz_iconv_t cd, yaz_iconv_encoder_t e, char **outbuf, size_t *outbytesleft)
Definition: iconv-p.h:48
size_t yaz_write_UTF8_char(unsigned long x, char **outbuf, size_t *outbytesleft, int *error)
encodes UTF-8 sequence
Definition: utf8.c:159
Header for memory handling functions.
#define xfree(x)
utility macro which calls xfree_f
Definition: xmalloc.h:53
#define xmalloc(x)
utility macro which calls malloc_f
Definition: xmalloc.h:49
#define YAZ_ICONV_EILSEQ
error code: Invalid sequence
Definition: yaz-iconv.h:49
#define YAZ_ICONV_E2BIG
error code: Not sufficient room for output buffer
Definition: yaz-iconv.h:47