YAZ 5.35.1
iconv_decode_danmarc.c
Go to the documentation of this file.
1/* This file is part of the YAZ toolkit.
2 * Copyright (C) Index Data
3 * See the file LICENSE for details.
4 */
11#if HAVE_CONFIG_H
12#include <config.h>
13#endif
14
15#include <assert.h>
16#include <errno.h>
17#include <string.h>
18#include <stdio.h>
19#include <yaz/log.h>
20#include <yaz/xmalloc.h>
21#include "iconv-p.h"
22
23#define MAX_COMP 4
24
26 unsigned long comp[MAX_COMP];
28 size_t sz;
29};
30
31static unsigned long read_useq(yaz_iconv_t cd,
33 unsigned char *inp,
34 size_t inbytesleft, size_t *no_read)
35{
36 static unsigned long u_seq[] = {
37 0xA7, 0x2b9,
38 0xAE, 0x2bc,
39 0xB0, 0x2bb,
40 0xB7, 0x2ba,
41 0xD8, 0x2014, /* Note 13 */
42 0xD9, 0x2191, /* Note 14 */
43 0xDA, 0x2193, /* Note 15 */
44 0xE0, 0x309,
45 0xEB, 0xfe20, /* Note 20 */
46 0xEC, 0xfe21, /* Note 21 */
47 0xF7, 0x326,
48 0xF8, 0x31c,
49 0xF9, 0x32e,
50 0xFA, 0xfe22, /* Note 26 */
51 /* 0xFB Note 27 */
52 0xFC, 0x308,
53 0xFD, 0xf0fd,
54 0xFE, 0xf0fe,
55 0xFF, 0xf0ff,
56 0};
57 int i;
58 unsigned long x;
59
60 if (inbytesleft < 4)
61 {
63 *no_read = 0;
64 return 0;
65 }
66 sscanf((const char *) inp+2, "%2lx", &x);
67 *no_read = 4;
68 for (i = 0; u_seq[i]; i += 2)
69 if (x == u_seq[i])
70 return u_seq[i+1];
71 return x;
72}
73
74static unsigned long read_danmarc(yaz_iconv_t cd,
76 unsigned char *inp,
77 size_t inbytesleft, size_t *no_read)
78{
79 unsigned long x;
80
81 if (inbytesleft < 1)
82 {
84 *no_read = 0;
85 return 0;
86 }
87 x = inp[0];
88 if (x != '@')
89 {
90 *no_read = 1;
91 return x;
92 }
93 if (inbytesleft < 2)
94 {
96 *no_read = 0;
97 return 0;
98 }
99 switch (inp[1])
100 {
101 case '@':
102 case '*':
103 case 0xa4: /* CURRENCY SIGN */
104 x = inp[1];
105 *no_read = 2;
106 break;
107 case 0xe5: /* LATIN SMALL LETTER A WITH RING ABOVE */
108 x = 0xa733;
109 *no_read = 2;
110 break;
111 case 0xc5: /* LATIN CAPITAL LETTER A WITH RING ABOVE */
112 x = 0xa732;
113 *no_read = 2;
114 break;
115 default:
116 if (inp[1] == 'U')
117 return read_useq(cd, d, inp, inbytesleft, no_read);
118 if (inbytesleft < 5)
119 {
121 *no_read = 0;
122 return 0;
123 }
124 sscanf((const char *) inp+1, "%4lx", &x);
125 *no_read = 5;
126 }
127 return x;
128}
129
130static unsigned long swap_seq[] = {
131 0x5e, 0x302,
132 0x5f, 0x332,
133 0x60, 0x300,
134 0xa8, 0x308,
135 0xaf, 0x304,
136 0xb4, 0x301,
137 0xb8, 0x327,
138 0x02c7, 0x30c,
139 0x02d8, 0x306,
140 0x02da, 0x30a,
141 0x02db, 0x328,
142 0x02dd, 0x30b,
143 0
144};
145
146unsigned long yaz_danmarc_swap_to_danmarc(unsigned long x)
147{
148 int i;
149 for (i = 0; swap_seq[i]; i += 2)
150 if (swap_seq[i+1] == x)
151 return swap_seq[i];
152 return x;
153}
154
155static unsigned long yaz_danmarc_swap_to_utf8(unsigned long x)
156{
157 int i;
158 for (i = 0; swap_seq[i]; i += 2)
159 if (swap_seq[i] == x)
160 return swap_seq[i+1];
161 return x;
162}
163
164static unsigned long read_danmarc_comb(yaz_iconv_t cd,
166 unsigned char *inp,
167 size_t inbytesleft, size_t *no_read)
168{
169 struct decoder_data *data = (struct decoder_data *) d->data;
170 unsigned long x;
171
172 if (data->sz)
173 {
174 *no_read = data->no_read[--data->sz];
175 return data->comp[data->sz];
176 }
177 while (1)
178 {
179 x = read_danmarc(cd, d, inp, inbytesleft, no_read);
180 if (x)
182 if (data->sz >= MAX_COMP)
183 break;
185 break;
186 data->no_read[data->sz] = *no_read;
187 data->comp[data->sz++] = x;
188 inp += *no_read;
189 inbytesleft -= *no_read;
190 }
191 return x;
192}
193
195 unsigned char *inp,
196 size_t inbytesleft, size_t *no_read)
197{
198 struct decoder_data *data = (struct decoder_data *) d->data;
199 data->sz = 0;
200 return 0;
201}
202
204{
205 struct decoder_data *data = (struct decoder_data *) d->data;
206 xfree(data);
207}
208
211
212{
213 if (!yaz_matchstr(fromcode, "danmarc")
214 || !yaz_matchstr(fromcode, "danmarc2"))
215 {
216 struct decoder_data *data = (struct decoder_data *)
217 xmalloc(sizeof(*data));
218 d->data = data;
222 return d;
223 }
224 return 0;
225}
226
227int yaz_danmarc_is_combining(unsigned long x)
228{
229 /* https://en.wikipedia.org/wiki/Combining_character */
230 int ranges[] = {
231 0x300, 0x36F,
232 0x1AB0, 0x1AFF,
233 0x1DC0, 0x1DFF,
234 0x20D0, 0x20FF,
235 0xFE20, 0xFE2F,
236 0};
237 int i = 0;
238 while (ranges[i] != 0 && x >= ranges[i])
239 {
240 if (x <= ranges[i+1])
241 return 1;
242 i += 2;
243 }
244 return 0;
245}
246
247/*
248 * Local variables:
249 * c-basic-offset: 4
250 * c-file-style: "Stroustrup"
251 * indent-tabs-mode: nil
252 * End:
253 * vim: shiftwidth=4 tabstop=8 expandtab
254 */
255
Header for errno utilities.
Internal header for iconv.
void yaz_iconv_set_errno(yaz_iconv_t cd, int no)
Definition siconv.c:298
yaz_iconv_decoder_t yaz_danmarc_decoder(const char *fromcode, yaz_iconv_decoder_t d)
static size_t init_danmarc(yaz_iconv_t cd, yaz_iconv_decoder_t d, unsigned char *inp, size_t inbytesleft, size_t *no_read)
int yaz_danmarc_is_combining(unsigned long x)
static unsigned long yaz_danmarc_swap_to_utf8(unsigned long x)
static unsigned long read_useq(yaz_iconv_t cd, yaz_iconv_decoder_t d, unsigned char *inp, size_t inbytesleft, size_t *no_read)
void destroy_danmarc(yaz_iconv_decoder_t d)
static unsigned long read_danmarc(yaz_iconv_t cd, yaz_iconv_decoder_t d, unsigned char *inp, size_t inbytesleft, size_t *no_read)
unsigned long yaz_danmarc_swap_to_danmarc(unsigned long x)
static unsigned long swap_seq[]
#define MAX_COMP
static unsigned long read_danmarc_comb(yaz_iconv_t cd, yaz_iconv_decoder_t d, unsigned char *inp, size_t inbytesleft, size_t *no_read)
Logging utility.
int yaz_matchstr(const char *s1, const char *s2)
match strings - independent of case and '-'
Definition matchstr.c:42
unsigned long comp[MAX_COMP]
size_t no_read[MAX_COMP]
unsigned long(* read_handle)(yaz_iconv_t cd, yaz_iconv_decoder_t d, unsigned char *inbuf, size_t inbytesleft, size_t *no_read)
Definition iconv-p.h:86
size_t(* init_handle)(yaz_iconv_t cd, yaz_iconv_decoder_t d, unsigned char *inbuf, size_t inbytesleft, size_t *no_read)
Definition iconv-p.h:83
void(* destroy_handle)(yaz_iconv_decoder_t d)
Definition iconv-p.h:89
Header for memory handling functions.
#define xfree(x)
utility macro which calls xfree_f
Definition xmalloc.h:53
#define xmalloc(x)
utility macro which calls malloc_f
Definition xmalloc.h:49
#define YAZ_ICONV_EINVAL
error code: An incomplete multibyte sequence is in input buffer
Definition yaz-iconv.h:51