YAZ 5.35.1
icu.h
Go to the documentation of this file.
1/* This file is part of the YAZ toolkit.
2 * Copyright (C) Index Data.
3 * All rights reserved.
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are met:
6 *
7 * * Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of Index Data nor the names of its contributors
13 * may be used to endorse or promote products derived from this
14 * software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ANY
17 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 * DISCLAIMED. IN NO EVENT SHALL THE REGENTS AND CONTRIBUTORS BE LIABLE FOR ANY
20 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
33#ifndef YAZ_ICU_H
34#define YAZ_ICU_H
35
36#include <yaz/yconfig.h>
37
38#include <yaz/xmltypes.h>
39
40#include <unicode/utypes.h>
41
43
45typedef struct icu_chain *yaz_icu_chain_t;
46
48YAZ_EXPORT void icu_chain_destroy(yaz_icu_chain_t chain);
49
56YAZ_EXPORT yaz_icu_chain_t icu_chain_xml_config(const xmlNode *xml_node,
57 int sort,
58 UErrorCode *status);
67 const char *src8cstr,
68 UErrorCode *status);
69
80 UErrorCode *status);
81
87
94YAZ_EXPORT const char * icu_chain_token_display(yaz_icu_chain_t chain);
95
102YAZ_EXPORT const char * icu_chain_token_norm(yaz_icu_chain_t chain);
103
110YAZ_EXPORT const char * icu_chain_token_sortkey(yaz_icu_chain_t chain);
111
118 size_t *start, size_t *len);
119
127 size_t *start, size_t *len,
128 const char **cstr);
129
131typedef struct icu_iter *yaz_icu_iter_t;
132
137YAZ_EXPORT
138yaz_icu_iter_t icu_iter_create(struct icu_chain *chain);
139
146YAZ_EXPORT
147void icu_iter_first(yaz_icu_iter_t iter, const char *src8cstr);
148
154YAZ_EXPORT
156
160YAZ_EXPORT
162
167YAZ_EXPORT
169
174YAZ_EXPORT
176
181YAZ_EXPORT
183
188YAZ_EXPORT
190
196YAZ_EXPORT
197void icu_iter_get_org_info(yaz_icu_iter_t iter, size_t *start, size_t *len);
198
205YAZ_EXPORT
206void icu_iter_get_org_info2(yaz_icu_iter_t iter, size_t *start, size_t *len,
207 const char **cstr);
208
210
211#endif /* YAZ_ICU_H */
212
213/*
214 * Local variables:
215 * c-basic-offset: 4
216 * c-file-style: "Stroustrup"
217 * indent-tabs-mode: nil
218 * End:
219 * vim: shiftwidth=4 tabstop=8 expandtab
220 */
221
const char * icu_iter_get_norm(yaz_icu_iter_t iter)
returns ICU normalized token
struct icu_iter * yaz_icu_iter_t
ICU tokenizer iterator type (opaque)
Definition icu.h:131
void icu_chain_destroy(yaz_icu_chain_t chain)
destroys ICU chain
struct icu_chain * yaz_icu_chain_t
opaque ICU chain
Definition icu.h:45
int icu_iter_get_token_number(yaz_icu_iter_t iter)
returns ICU token count for iterator
void icu_iter_destroy(yaz_icu_iter_t iter)
destroy ICU tokenizer iterator
void icu_chain_get_org_info(yaz_icu_chain_t chain, size_t *start, size_t *len)
returns token as it relates to original text (legacy)
void icu_chain_get_org_info2(yaz_icu_chain_t chain, size_t *start, size_t *len, const char **cstr)
returns token as it relates to original text (2nd version)
yaz_icu_chain_t icu_chain_xml_config(const xmlNode *xml_node, int sort, UErrorCode *status)
constructs ICU chain from XML specification
int icu_chain_assign_cstr(yaz_icu_chain_t chain, const char *src8cstr, UErrorCode *status)
pass string to ICU for parsing/tokenization/etc
const char * icu_iter_get_display(yaz_icu_iter_t iter)
returns ICU display string
void icu_iter_first(yaz_icu_iter_t iter, const char *src8cstr)
starts iteration over string
const char * icu_chain_token_sortkey(yaz_icu_chain_t chain)
returns sortkey token of last token processed
int icu_chain_token_number(yaz_icu_chain_t chain)
returns token number of last token processed
const char * icu_iter_get_sortkey(yaz_icu_iter_t iter)
returns ICU sortkey string
const char * icu_chain_token_norm(yaz_icu_chain_t chain)
returns normalized token of last token processed
yaz_icu_iter_t icu_iter_create(struct icu_chain *chain)
create ICU tokenizer iterator from chain
int icu_iter_next(yaz_icu_iter_t iter)
iterates over one token
void icu_iter_get_org_info(yaz_icu_iter_t iter, size_t *start, size_t *len)
returns ICU original token start (offset) and length (legacy)
const char * icu_chain_token_display(yaz_icu_chain_t chain)
returns display token of last token processed
int icu_chain_next_token(yaz_icu_chain_t chain, UErrorCode *status)
returns one token (if any)
void icu_iter_get_org_info2(yaz_icu_iter_t iter, size_t *start, size_t *len, const char **cstr)
returns ICU original token start (offset) and length
Define xmlNode and xmlDocPtr if Libxml2 is present.
Header with fundamental macros.
#define YAZ_BEGIN_CDECL
Definition yconfig.h:56
#define YAZ_END_CDECL
Definition yconfig.h:57