YAZ  5.34.0
iconv_decode_danmarc.c
Go to the documentation of this file.
1 /* This file is part of the YAZ toolkit.
2  * Copyright (C) Index Data
3  * See the file LICENSE for details.
4  */
11 #if HAVE_CONFIG_H
12 #include <config.h>
13 #endif
14 
15 #include <assert.h>
16 #include <errno.h>
17 #include <string.h>
18 #include <stdio.h>
19 #include <yaz/log.h>
20 #include <yaz/xmalloc.h>
21 #include "iconv-p.h"
22 
23 #define MAX_COMP 4
24 
25 struct decoder_data {
26  unsigned long comp[MAX_COMP];
27  size_t no_read[MAX_COMP];
28  size_t sz;
29 };
30 
31 static unsigned long read_useq(yaz_iconv_t cd,
33  unsigned char *inp,
34  size_t inbytesleft, size_t *no_read)
35 {
36  static unsigned long u_seq[] = {
37  0xA7, 0x2b9,
38  0xAE, 0x2bc,
39  0xB0, 0x2bb,
40  0xB7, 0x2ba,
41  0xD8, 0x2014, /* Note 13 */
42  0xD9, 0x2191, /* Note 14 */
43  0xDA, 0x2193, /* Note 15 */
44  0xE0, 0x309,
45  0xEB, 0xfe20, /* Note 20 */
46  0xEC, 0xfe21, /* Note 21 */
47  0xF7, 0x326,
48  0xF8, 0x31c,
49  0xF9, 0x32e,
50  0xFA, 0xfe22, /* Note 26 */
51  /* 0xFB Note 27 */
52  0xFC, 0x308,
53  0xFD, 0xf0fd,
54  0xFE, 0xf0fe,
55  0xFF, 0xf0ff,
56  0};
57  int i;
58  unsigned long x;
59 
60  if (inbytesleft < 4)
61  {
63  *no_read = 0;
64  return 0;
65  }
66  sscanf((const char *) inp+2, "%2lx", &x);
67  *no_read = 4;
68  for (i = 0; u_seq[i]; i += 2)
69  if (x == u_seq[i])
70  return u_seq[i+1];
71  return x;
72 }
73 
74 static unsigned long read_danmarc(yaz_iconv_t cd,
76  unsigned char *inp,
77  size_t inbytesleft, size_t *no_read)
78 {
79  unsigned long x;
80 
81  if (inbytesleft < 1)
82  {
84  *no_read = 0;
85  return 0;
86  }
87  x = inp[0];
88  if (x != '@')
89  {
90  *no_read = 1;
91  return x;
92  }
93  if (inbytesleft < 2)
94  {
96  *no_read = 0;
97  return 0;
98  }
99  switch (inp[1])
100  {
101  case '@':
102  case '*':
103  case 0xa4: /* CURRENCY SIGN */
104  x = inp[1];
105  *no_read = 2;
106  break;
107  case 0xe5: /* LATIN SMALL LETTER A WITH RING ABOVE */
108  x = 0xa733;
109  *no_read = 2;
110  break;
111  case 0xc5: /* LATIN CAPITAL LETTER A WITH RING ABOVE */
112  x = 0xa732;
113  *no_read = 2;
114  break;
115  default:
116  if (inp[1] == 'U')
117  return read_useq(cd, d, inp, inbytesleft, no_read);
118  if (inbytesleft < 5)
119  {
121  *no_read = 0;
122  return 0;
123  }
124  sscanf((const char *) inp+1, "%4lx", &x);
125  *no_read = 5;
126  }
127  return x;
128 }
129 
130 static unsigned long swap_seq[] = {
131  0x5e, 0x302,
132  0x5f, 0x332,
133  0x60, 0x300,
134  0xa8, 0x308,
135  0xaf, 0x304,
136  0xb4, 0x301,
137  0xb8, 0x327,
138  0x02c7, 0x30c,
139  0x02d8, 0x306,
140  0x02da, 0x30a,
141  0x02db, 0x328,
142  0x02dd, 0x30b,
143  0
144 };
145 
146 unsigned long yaz_danmarc_swap_to_danmarc(unsigned long x)
147 {
148  int i;
149  for (i = 0; swap_seq[i]; i += 2)
150  if (swap_seq[i+1] == x)
151  return swap_seq[i];
152  return x;
153 }
154 
155 static unsigned long yaz_danmarc_swap_to_utf8(unsigned long x)
156 {
157  int i;
158  for (i = 0; swap_seq[i]; i += 2)
159  if (swap_seq[i] == x)
160  return swap_seq[i+1];
161  return x;
162 }
163 
164 static unsigned long read_danmarc_comb(yaz_iconv_t cd,
166  unsigned char *inp,
167  size_t inbytesleft, size_t *no_read)
168 {
169  struct decoder_data *data = (struct decoder_data *) d->data;
170  unsigned long x;
171 
172  if (data->sz)
173  {
174  *no_read = data->no_read[--data->sz];
175  return data->comp[data->sz];
176  }
177  while (1)
178  {
179  x = read_danmarc(cd, d, inp, inbytesleft, no_read);
180  if (x)
182  if (data->sz >= MAX_COMP)
183  break;
184  if (!yaz_danmarc_is_combining(x))
185  break;
186  data->no_read[data->sz] = *no_read;
187  data->comp[data->sz++] = x;
188  inp += *no_read;
189  inbytesleft -= *no_read;
190  }
191  return x;
192 }
193 
195  unsigned char *inp,
196  size_t inbytesleft, size_t *no_read)
197 {
198  struct decoder_data *data = (struct decoder_data *) d->data;
199  data->sz = 0;
200  return 0;
201 }
202 
204 {
205  struct decoder_data *data = (struct decoder_data *) d->data;
206  xfree(data);
207 }
208 
211 
212 {
213  if (!yaz_matchstr(fromcode, "danmarc")
214  || !yaz_matchstr(fromcode, "danmarc2"))
215  {
216  struct decoder_data *data = (struct decoder_data *)
217  xmalloc(sizeof(*data));
218  d->data = data;
222  return d;
223  }
224  return 0;
225 }
226 
227 int yaz_danmarc_is_combining(unsigned long x)
228 {
229  /* https://en.wikipedia.org/wiki/Combining_character */
230  int ranges[] = {
231  0x300, 0x36F,
232  0x1AB0, 0x1AFF,
233  0x1DC0, 0x1DFF,
234  0x20D0, 0x20FF,
235  0xFE20, 0xFE2F,
236  0};
237  int i = 0;
238  while (ranges[i] != 0 && x >= ranges[i])
239  {
240  if (x <= ranges[i+1])
241  return 1;
242  i += 2;
243  }
244  return 0;
245 }
246 
247 /*
248  * Local variables:
249  * c-basic-offset: 4
250  * c-file-style: "Stroustrup"
251  * indent-tabs-mode: nil
252  * End:
253  * vim: shiftwidth=4 tabstop=8 expandtab
254  */
255 
Header for errno utilities.
Internal header for iconv.
void yaz_iconv_set_errno(yaz_iconv_t cd, int no)
Definition: siconv.c:298
yaz_iconv_decoder_t yaz_danmarc_decoder(const char *fromcode, yaz_iconv_decoder_t d)
static size_t init_danmarc(yaz_iconv_t cd, yaz_iconv_decoder_t d, unsigned char *inp, size_t inbytesleft, size_t *no_read)
int yaz_danmarc_is_combining(unsigned long x)
static unsigned long yaz_danmarc_swap_to_utf8(unsigned long x)
static unsigned long read_useq(yaz_iconv_t cd, yaz_iconv_decoder_t d, unsigned char *inp, size_t inbytesleft, size_t *no_read)
void destroy_danmarc(yaz_iconv_decoder_t d)
static unsigned long read_danmarc(yaz_iconv_t cd, yaz_iconv_decoder_t d, unsigned char *inp, size_t inbytesleft, size_t *no_read)
unsigned long yaz_danmarc_swap_to_danmarc(unsigned long x)
static unsigned long swap_seq[]
#define MAX_COMP
static unsigned long read_danmarc_comb(yaz_iconv_t cd, yaz_iconv_decoder_t d, unsigned char *inp, size_t inbytesleft, size_t *no_read)
Logging utility.
int yaz_matchstr(const char *s1, const char *s2)
match strings - independent of case and '-'
Definition: matchstr.c:42
unsigned long comp[MAX_COMP]
size_t no_read[MAX_COMP]
unsigned long(* read_handle)(yaz_iconv_t cd, yaz_iconv_decoder_t d, unsigned char *inbuf, size_t inbytesleft, size_t *no_read)
Definition: iconv-p.h:86
size_t(* init_handle)(yaz_iconv_t cd, yaz_iconv_decoder_t d, unsigned char *inbuf, size_t inbytesleft, size_t *no_read)
Definition: iconv-p.h:83
void(* destroy_handle)(yaz_iconv_decoder_t d)
Definition: iconv-p.h:89
Header for memory handling functions.
#define xfree(x)
utility macro which calls xfree_f
Definition: xmalloc.h:53
#define xmalloc(x)
utility macro which calls malloc_f
Definition: xmalloc.h:49
#define YAZ_ICONV_EINVAL
error code: An incomplete multibyte sequence is in input buffer
Definition: yaz-iconv.h:51