|
YAZ 5.35.1
|
ICU utilities. More...
Go to the source code of this file.
Typedefs | |
| typedef struct icu_chain * | yaz_icu_chain_t |
| opaque ICU chain | |
| typedef struct icu_iter * | yaz_icu_iter_t |
| ICU tokenizer iterator type (opaque) | |
Functions | |
| void | icu_chain_destroy (yaz_icu_chain_t chain) |
| destroys ICU chain | |
| yaz_icu_chain_t | icu_chain_xml_config (const xmlNode *xml_node, int sort, UErrorCode *status) |
| constructs ICU chain from XML specification | |
| int | icu_chain_assign_cstr (yaz_icu_chain_t chain, const char *src8cstr, UErrorCode *status) |
| pass string to ICU for parsing/tokenization/etc | |
| int | icu_chain_next_token (yaz_icu_chain_t chain, UErrorCode *status) |
| returns one token (if any) | |
| int | icu_chain_token_number (yaz_icu_chain_t chain) |
| returns token number of last token processed | |
| const char * | icu_chain_token_display (yaz_icu_chain_t chain) |
| returns display token of last token processed | |
| const char * | icu_chain_token_norm (yaz_icu_chain_t chain) |
| returns normalized token of last token processed | |
| const char * | icu_chain_token_sortkey (yaz_icu_chain_t chain) |
| returns sortkey token of last token processed | |
| void | icu_chain_get_org_info (yaz_icu_chain_t chain, size_t *start, size_t *len) |
| returns token as it relates to original text (legacy) | |
| void | icu_chain_get_org_info2 (yaz_icu_chain_t chain, size_t *start, size_t *len, const char **cstr) |
| returns token as it relates to original text (2nd version) | |
| yaz_icu_iter_t | icu_iter_create (struct icu_chain *chain) |
| create ICU tokenizer iterator from chain | |
| void | icu_iter_first (yaz_icu_iter_t iter, const char *src8cstr) |
| starts iteration over string | |
| int | icu_iter_next (yaz_icu_iter_t iter) |
| iterates over one token | |
| void | icu_iter_destroy (yaz_icu_iter_t iter) |
| destroy ICU tokenizer iterator | |
| const char * | icu_iter_get_norm (yaz_icu_iter_t iter) |
| returns ICU normalized token | |
| const char * | icu_iter_get_sortkey (yaz_icu_iter_t iter) |
| returns ICU sortkey string | |
| const char * | icu_iter_get_display (yaz_icu_iter_t iter) |
| returns ICU display string | |
| int | icu_iter_get_token_number (yaz_icu_iter_t iter) |
| returns ICU token count for iterator | |
| void | icu_iter_get_org_info (yaz_icu_iter_t iter, size_t *start, size_t *len) |
| returns ICU original token start (offset) and length (legacy) | |
| void | icu_iter_get_org_info2 (yaz_icu_iter_t iter, size_t *start, size_t *len, const char **cstr) |
| returns ICU original token start (offset) and length | |
ICU utilities.
Definition in file icu.h.
| typedef struct icu_chain* yaz_icu_chain_t |
| typedef struct icu_iter* yaz_icu_iter_t |
| int icu_chain_assign_cstr | ( | yaz_icu_chain_t | chain, |
| const char * | src8cstr, | ||
| UErrorCode * | status | ||
| ) |
pass string to ICU for parsing/tokenization/etc
| chain | ICU chain to be used for parsing |
| src8cstr | input C string (null-terminated) |
| status | may include ICU error on failure |
| 0 | failure |
| 1 | success |
| void icu_chain_destroy | ( | yaz_icu_chain_t | chain | ) |
destroys ICU chain
| void icu_chain_get_org_info | ( | yaz_icu_chain_t | chain, |
| size_t * | start, | ||
| size_t * | len | ||
| ) |
returns token as it relates to original text (legacy)
| chain | ICU chain |
| start | offset in original text |
| len | number of uchars in original text |
| void icu_chain_get_org_info2 | ( | yaz_icu_chain_t | chain, |
| size_t * | start, | ||
| size_t * | len, | ||
| const char ** | cstr | ||
| ) |
returns token as it relates to original text (2nd version)
| chain | ICU chain |
| start | offset in original text |
| len | number of uchars in original text |
| cstr | if not-null, holds original string in there |
| int icu_chain_next_token | ( | yaz_icu_chain_t | chain, |
| UErrorCode * | status | ||
| ) |
returns one token (if any)
| chain | ICU chain |
| status | may include ICU error on failure |
| 0 | error or end-of-tokens (no more tokens) |
| >0 | token number (1, 3, 3, ..) |
This function tries to move to "next" token in assigned C-string .. Or returns 0 if no more is to be found
| const char * icu_chain_token_display | ( | yaz_icu_chain_t | chain | ) |
returns display token of last token processed
| chain | ICU chain |
| const char * icu_chain_token_norm | ( | yaz_icu_chain_t | chain | ) |
returns normalized token of last token processed
| chain | ICU chain |
| int icu_chain_token_number | ( | yaz_icu_chain_t | chain | ) |
returns token number of last token processed
chain ICU chain
| const char * icu_chain_token_sortkey | ( | yaz_icu_chain_t | chain | ) |
returns sortkey token of last token processed
| chain | ICU chain |
| yaz_icu_chain_t icu_chain_xml_config | ( | const xmlNode * | xml_node, |
| int | sort, | ||
| UErrorCode * | status | ||
| ) |
constructs ICU chain from XML specification
| xml_node | icu_chain XML node - with attribute locale in it |
| sort | 1 if ICU chain is to deal with sort keys; 0 otherwise |
| status | May include ICU error code on failure |
| yaz_icu_iter_t icu_iter_create | ( | struct icu_chain * | chain | ) |
create ICU tokenizer iterator from chain
| chain | ICU chain |
| void icu_iter_destroy | ( | yaz_icu_iter_t | iter | ) |
destroy ICU tokenizer iterator
| iter | ICU tokenizer iterator |
| void icu_iter_first | ( | yaz_icu_iter_t | iter, |
| const char * | src8cstr | ||
| ) |
starts iteration over string
| iter | ICU tokenizer iterator |
| src8cstr | input string (0-terminated) |
Call icu_iter_next to iterate over each token.
| const char * icu_iter_get_display | ( | yaz_icu_iter_t | iter | ) |
returns ICU display string
| iter | ICU tokenizer iterator |
| const char * icu_iter_get_norm | ( | yaz_icu_iter_t | iter | ) |
returns ICU normalized token
| iter | ICU tokenizer iterator |
| void icu_iter_get_org_info | ( | yaz_icu_iter_t | iter, |
| size_t * | start, | ||
| size_t * | len | ||
| ) |
returns ICU original token start (offset) and length (legacy)
| iter | ICU tokenizer iterator |
| start | offset of last token in original text |
| len | length of last token in original text |
| void icu_iter_get_org_info2 | ( | yaz_icu_iter_t | iter, |
| size_t * | start, | ||
| size_t * | len, | ||
| const char ** | cstr | ||
| ) |
returns ICU original token start (offset) and length
| iter | ICU tokenizer iterator |
| start | offset of last token in original text |
| len | length of last token in original text |
| cstr | if non-null: original string |
| const char * icu_iter_get_sortkey | ( | yaz_icu_iter_t | iter | ) |
returns ICU sortkey string
| iter | ICU tokenizer iterator |
| int icu_iter_get_token_number | ( | yaz_icu_iter_t | iter | ) |
returns ICU token count for iterator
| iter | ICU tokenizer iterator |
| int icu_iter_next | ( | yaz_icu_iter_t | iter | ) |
iterates over one token
| iter | ICU tokenizer iterator |
| 0 | no more tokens (EOF) |
| 1 | got one token (use icu_iter_get..-functions) |