YAZ 5.35.1
iconv_encode_iso_8859_1.c
Go to the documentation of this file.
1/* This file is part of the YAZ toolkit.
2 * Copyright (C) Index Data
3 * See the file LICENSE for details.
4 */
11#if HAVE_CONFIG_H
12#include <config.h>
13#endif
14
15#include <assert.h>
16#include <errno.h>
17#include <string.h>
18
19#include <yaz/xmalloc.h>
20#include "iconv-p.h"
21
22struct encoder_data
23{
24 unsigned long compose_char;
25};
26
27
28
29static struct {
30 unsigned long x1, x2;
31 unsigned y;
32} latin1_comb[] = {
33 { 'A', 0x0300, 0xc0}, /* LATIN CAPITAL LETTER A WITH GRAVE */
34 { 'A', 0x0301, 0xc1}, /* LATIN CAPITAL LETTER A WITH ACUTE */
35 { 'A', 0x0302, 0xc2}, /* LATIN CAPITAL LETTER A WITH CIRCUMFLEX */
36 { 'A', 0x0303, 0xc3}, /* LATIN CAPITAL LETTER A WITH TILDE */
37 { 'A', 0x0308, 0xc4}, /* LATIN CAPITAL LETTER A WITH DIAERESIS */
38 { 'A', 0x030a, 0xc5}, /* LATIN CAPITAL LETTER A WITH RING ABOVE */
39 /* no need for 0xc6 LATIN CAPITAL LETTER AE */
40 { 'C', 0x0327, 0xc7}, /* LATIN CAPITAL LETTER C WITH CEDILLA */
41 { 'E', 0x0300, 0xc8}, /* LATIN CAPITAL LETTER E WITH GRAVE */
42 { 'E', 0x0301, 0xc9}, /* LATIN CAPITAL LETTER E WITH ACUTE */
43 { 'E', 0x0302, 0xca}, /* LATIN CAPITAL LETTER E WITH CIRCUMFLEX */
44 { 'E', 0x0308, 0xcb}, /* LATIN CAPITAL LETTER E WITH DIAERESIS */
45 { 'I', 0x0300, 0xcc}, /* LATIN CAPITAL LETTER I WITH GRAVE */
46 { 'I', 0x0301, 0xcd}, /* LATIN CAPITAL LETTER I WITH ACUTE */
47 { 'I', 0x0302, 0xce}, /* LATIN CAPITAL LETTER I WITH CIRCUMFLEX */
48 { 'I', 0x0308, 0xcf}, /* LATIN CAPITAL LETTER I WITH DIAERESIS */
49 { 'N', 0x0303, 0xd1}, /* LATIN CAPITAL LETTER N WITH TILDE */
50 { 'O', 0x0300, 0xd2}, /* LATIN CAPITAL LETTER O WITH GRAVE */
51 { 'O', 0x0301, 0xd3}, /* LATIN CAPITAL LETTER O WITH ACUTE */
52 { 'O', 0x0302, 0xd4}, /* LATIN CAPITAL LETTER O WITH CIRCUMFLEX */
53 { 'O', 0x0303, 0xd5}, /* LATIN CAPITAL LETTER O WITH TILDE */
54 { 'O', 0x0308, 0xd6}, /* LATIN CAPITAL LETTER O WITH DIAERESIS */
55 /* omitted: 0xd7 MULTIPLICATION SIGN */
56 /* omitted: 0xd8 LATIN CAPITAL LETTER O WITH STROKE */
57 { 'U', 0x0300, 0xd9}, /* LATIN CAPITAL LETTER U WITH GRAVE */
58 { 'U', 0x0301, 0xda}, /* LATIN CAPITAL LETTER U WITH ACUTE */
59 { 'U', 0x0302, 0xdb}, /* LATIN CAPITAL LETTER U WITH CIRCUMFLEX */
60 { 'U', 0x0308, 0xdc}, /* LATIN CAPITAL LETTER U WITH DIAERESIS */
61 { 'Y', 0x0301, 0xdd}, /* LATIN CAPITAL LETTER Y WITH ACUTE */
62 /* omitted: 0xde LATIN CAPITAL LETTER THORN */
63 /* omitted: 0xdf LATIN SMALL LETTER SHARP S */
64 { 'a', 0x0300, 0xe0}, /* LATIN SMALL LETTER A WITH GRAVE */
65 { 'a', 0x0301, 0xe1}, /* LATIN SMALL LETTER A WITH ACUTE */
66 { 'a', 0x0302, 0xe2}, /* LATIN SMALL LETTER A WITH CIRCUMFLEX */
67 { 'a', 0x0303, 0xe3}, /* LATIN SMALL LETTER A WITH TILDE */
68 { 'a', 0x0308, 0xe4}, /* LATIN SMALL LETTER A WITH DIAERESIS */
69 { 'a', 0x030a, 0xe5}, /* LATIN SMALL LETTER A WITH RING ABOVE */
70 /* omitted: 0xe6 LATIN SMALL LETTER AE */
71 { 'c', 0x0327, 0xe7}, /* LATIN SMALL LETTER C WITH CEDILLA */
72 { 'e', 0x0300, 0xe8}, /* LATIN SMALL LETTER E WITH GRAVE */
73 { 'e', 0x0301, 0xe9}, /* LATIN SMALL LETTER E WITH ACUTE */
74 { 'e', 0x0302, 0xea}, /* LATIN SMALL LETTER E WITH CIRCUMFLEX */
75 { 'e', 0x0308, 0xeb}, /* LATIN SMALL LETTER E WITH DIAERESIS */
76 { 'i', 0x0300, 0xec}, /* LATIN SMALL LETTER I WITH GRAVE */
77 { 'i', 0x0301, 0xed}, /* LATIN SMALL LETTER I WITH ACUTE */
78 { 'i', 0x0302, 0xee}, /* LATIN SMALL LETTER I WITH CIRCUMFLEX */
79 { 'i', 0x0308, 0xef}, /* LATIN SMALL LETTER I WITH DIAERESIS */
80 /* omitted: 0xf0 LATIN SMALL LETTER ETH */
81 { 'n', 0x0303, 0xf1}, /* LATIN SMALL LETTER N WITH TILDE */
82 { 'o', 0x0300, 0xf2}, /* LATIN SMALL LETTER O WITH GRAVE */
83 { 'o', 0x0301, 0xf3}, /* LATIN SMALL LETTER O WITH ACUTE */
84 { 'o', 0x0302, 0xf4}, /* LATIN SMALL LETTER O WITH CIRCUMFLEX */
85 { 'o', 0x0303, 0xf5}, /* LATIN SMALL LETTER O WITH TILDE */
86 { 'o', 0x0308, 0xf6}, /* LATIN SMALL LETTER O WITH DIAERESIS */
87 /* omitted: 0xf7 DIVISION SIGN */
88 /* omitted: 0xf8 LATIN SMALL LETTER O WITH STROKE */
89 { 'u', 0x0300, 0xf9}, /* LATIN SMALL LETTER U WITH GRAVE */
90 { 'u', 0x0301, 0xfa}, /* LATIN SMALL LETTER U WITH ACUTE */
91 { 'u', 0x0302, 0xfb}, /* LATIN SMALL LETTER U WITH CIRCUMFLEX */
92 { 'u', 0x0308, 0xfc}, /* LATIN SMALL LETTER U WITH DIAERESIS */
93 { 'y', 0x0301, 0xfd}, /* LATIN SMALL LETTER Y WITH ACUTE */
94 /* omitted: 0xfe LATIN SMALL LETTER THORN */
95 { 'y', 0x0308, 0xff}, /* LATIN SMALL LETTER Y WITH DIAERESIS */
96
97 { 0, 0, 0}
98};
99
100int yaz_iso_8859_1_lookup_y(unsigned long v,
101 unsigned long *x1, unsigned long *x2)
102{
103 if (v >= 0xc0 && v <= 0xff) /* optimization. min and max .y values */
104 {
105 int i;
106 for (i = 0; latin1_comb[i].x1; i++)
107 {
108 if (v == latin1_comb[i].y)
109 {
110 *x1 = latin1_comb[i].x1;
111 *x2 = latin1_comb[i].x2;
112 return 1;
113 }
114 }
115 }
116 return 0;
117}
118
119int yaz_iso_8859_1_lookup_x12(unsigned long x1, unsigned long x2,
120 unsigned long *y)
121{
122 /* For MARC8s we try to get a Latin-1 page code out of it */
123 int i;
124 for (i = 0; latin1_comb[i].x1; i++)
125 if (x2 == latin1_comb[i].x2 && x1 == latin1_comb[i].x1)
126 {
127 *y = latin1_comb[i].y;
128 return 1;
129 }
130 return 0;
131}
132
134 unsigned long x,
135 char **outbuf, size_t *outbytesleft)
136{
137 struct encoder_data *w = (struct encoder_data *) e->data;
138 /* list of two char unicode sequence that, when combined, are
139 equivalent to single unicode chars that can be represented in
140 ISO-8859-1/Latin-1.
141 Regular iconv on Linux at least does not seem to convert these,
142 but since MARC-8 to UTF-8 generates these composed sequence
143 we get a better chance of a successful MARC-8 -> ISO-8859-1
144 conversion */
145 unsigned char *outp = (unsigned char *) *outbuf;
146
147 if (w->compose_char)
148 {
149 int i;
150 for (i = 0; latin1_comb[i].x1; i++)
151 if (w->compose_char == latin1_comb[i].x1 && x == latin1_comb[i].x2)
152 {
153 x = latin1_comb[i].y;
154 break;
155 }
156 if (*outbytesleft < 1)
157 { /* no room. Retain compose_char and bail out */
159 return (size_t)(-1);
160 }
161 if (!latin1_comb[i].x1)
162 { /* not found. Just write compose_char */
163 *outp++ = (unsigned char) w->compose_char;
164 (*outbytesleft)--;
165 *outbuf = (char *) outp;
166 }
167 /* compose_char used so reset it. x now holds current char */
168 w->compose_char = 0;
169 }
170
171 if (x > 32 && x < 127 && w->compose_char == 0)
172 {
173 w->compose_char = x;
174 return 0;
175 }
176 else if (x > 255 || x < 1)
177 {
179 return (size_t) -1;
180 }
181 else if (*outbytesleft < 1)
182 {
184 return (size_t)(-1);
185 }
186 *outp++ = (unsigned char) x;
187 (*outbytesleft)--;
188 *outbuf = (char *) outp;
189 return 0;
190}
191
193 char **outbuf, size_t *outbytesleft)
194{
195 struct encoder_data *w = (struct encoder_data *) e->data;
196 if (w->compose_char)
197 {
198 unsigned char *outp = (unsigned char *) *outbuf;
199 if (*outbytesleft < 1)
200 {
202 return (size_t)(-1);
203 }
204 *outp++ = (unsigned char) w->compose_char;
205 (*outbytesleft)--;
206 *outbuf = (char *) outp;
207 w->compose_char = 0;
208 }
209 return 0;
210}
211
212
214{
215 struct encoder_data *w = (struct encoder_data *) e->data;
216 w->compose_char = 0;
217}
218
223
226
227{
228 if (!yaz_matchstr(tocode, "iso88591"))
229 {
230 struct encoder_data *data = (struct encoder_data *)
231 xmalloc(sizeof(*data));
232 e->data = data;
237 return e;
238 }
239 return 0;
240}
241
242static unsigned long read_ISO8859_1(yaz_iconv_t cd,
244 unsigned char *inp,
245 size_t inbytesleft, size_t *no_read)
246{
247 unsigned long x = inp[0];
248 *no_read = 1;
249 return x;
250}
251
254
255{
256 if (!yaz_matchstr(fromcode, "iso88591"))
257 {
259 return d;
260 }
261 return 0;
262}
263
264
265/*
266 * Local variables:
267 * c-basic-offset: 4
268 * c-file-style: "Stroustrup"
269 * indent-tabs-mode: nil
270 * End:
271 * vim: shiftwidth=4 tabstop=8 expandtab
272 */
273
Header for errno utilities.
Internal header for iconv.
void yaz_iconv_set_errno(yaz_iconv_t cd, int no)
Definition siconv.c:298
void destroy_iso_8859_1(yaz_iconv_encoder_t e)
unsigned long x2
yaz_iconv_encoder_t yaz_iso_8859_1_encoder(const char *tocode, yaz_iconv_encoder_t e)
static size_t flush_iso_8859_1(yaz_iconv_t cd, yaz_iconv_encoder_t e, char **outbuf, size_t *outbytesleft)
unsigned long x1
yaz_iconv_decoder_t yaz_iso_8859_1_decoder(const char *fromcode, yaz_iconv_decoder_t d)
int yaz_iso_8859_1_lookup_x12(unsigned long x1, unsigned long x2, unsigned long *y)
static struct @1 latin1_comb[]
static size_t write_iso_8859_1(yaz_iconv_t cd, yaz_iconv_encoder_t e, unsigned long x, char **outbuf, size_t *outbytesleft)
static unsigned long read_ISO8859_1(yaz_iconv_t cd, yaz_iconv_decoder_t d, unsigned char *inp, size_t inbytesleft, size_t *no_read)
unsigned y
int yaz_iso_8859_1_lookup_y(unsigned long v, unsigned long *x1, unsigned long *x2)
void init_iso_8859_1(yaz_iconv_encoder_t e)
int yaz_matchstr(const char *s1, const char *s2)
match strings - independent of case and '-'
Definition matchstr.c:42
unsigned long compose_char
unsigned long(* read_handle)(yaz_iconv_t cd, yaz_iconv_decoder_t d, unsigned char *inbuf, size_t inbytesleft, size_t *no_read)
Definition iconv-p.h:86
void(* init_handle)(yaz_iconv_encoder_t e)
Definition iconv-p.h:50
size_t(* write_handle)(yaz_iconv_t cd, yaz_iconv_encoder_t e, unsigned long x, char **outbuf, size_t *outbytesleft)
Definition iconv-p.h:45
void(* destroy_handle)(yaz_iconv_encoder_t e)
Definition iconv-p.h:51
size_t(* flush_handle)(yaz_iconv_t cd, yaz_iconv_encoder_t e, char **outbuf, size_t *outbytesleft)
Definition iconv-p.h:48
Header for memory handling functions.
#define xfree(x)
utility macro which calls xfree_f
Definition xmalloc.h:53
#define xmalloc(x)
utility macro which calls malloc_f
Definition xmalloc.h:49
#define YAZ_ICONV_EILSEQ
error code: Invalid sequence
Definition yaz-iconv.h:49
#define YAZ_ICONV_E2BIG
error code: Not sufficient room for output buffer
Definition yaz-iconv.h:47