YAZ 5.37.0
libstemmer_utf8.c
Go to the documentation of this file.
1
2#include <stdlib.h>
3#include <string.h>
5#include "../runtime/api.h"
6#include "modules_utf8.h"
7
8struct sb_stemmer {
9 struct SN_env * (*create)(void);
10 void (*close)(struct SN_env *);
11 int (*stem)(struct SN_env *);
12
13 struct SN_env * env;
14};
15
16extern const char **
18{
19 return algorithm_names;
20}
21
23sb_getenc(const char * charenc)
24{
25 struct stemmer_encoding * encoding;
26 if (charenc == NULL) return ENC_UTF_8;
27 for (encoding = encodings; encoding->name != 0; encoding++) {
28 if (strcmp(encoding->name, charenc) == 0) break;
29 }
30 if (encoding->name == NULL) return ENC_UNKNOWN;
31 return encoding->enc;
32}
33
34extern struct sb_stemmer *
35sb_stemmer_new(const char * algorithm, const char * charenc)
36{
38 struct stemmer_modules * module;
39 struct sb_stemmer * stemmer;
40
41 enc = sb_getenc(charenc);
42 if (enc == ENC_UNKNOWN) return NULL;
43
44 for (module = modules; module->name != 0; module++) {
45 if (strcmp(module->name, algorithm) == 0 && module->enc == enc) break;
46 }
47 if (module->name == NULL) return NULL;
48
49 stemmer = (struct sb_stemmer *) malloc(sizeof(struct sb_stemmer));
50 if (stemmer == NULL) return NULL;
51
52 stemmer->create = module->create;
53 stemmer->close = module->close;
54 stemmer->stem = module->stem;
55
56 stemmer->env = stemmer->create();
57 if (stemmer->env == NULL)
58 {
59 sb_stemmer_delete(stemmer);
60 return NULL;
61 }
62
63 return stemmer;
64}
65
66void
68{
69 if (stemmer == 0) return;
70 if (stemmer->close == 0) return;
71 stemmer->close(stemmer->env);
72 stemmer->close = 0;
73 free(stemmer);
74}
75
76const sb_symbol *
77sb_stemmer_stem(struct sb_stemmer * stemmer, const sb_symbol * word, int size)
78{
79 int ret;
80 if (SN_set_current(stemmer->env, size, (const symbol *)(word)))
81 {
82 stemmer->env->l = 0;
83 return NULL;
84 }
85 ret = stemmer->stem(stemmer->env);
86 if (ret < 0) return NULL;
87 stemmer->env->p[stemmer->env->l] = 0;
88 return (const sb_symbol *)(stemmer->env->p);
89}
90
91int
93{
94 return stemmer->env->l;
95}
int SN_set_current(struct SN_env *z, int size, const symbol *s)
Definition api.c:60
unsigned char symbol
Definition api.h:2
void * malloc(YYSIZE_T)
void free(void *)
unsigned char sb_symbol
Definition libstemmer.h:8
int sb_stemmer_length(struct sb_stemmer *stemmer)
static stemmer_encoding_t sb_getenc(const char *charenc)
struct sb_stemmer * sb_stemmer_new(const char *algorithm, const char *charenc)
const char ** sb_stemmer_list(void)
void sb_stemmer_delete(struct sb_stemmer *stemmer)
const sb_symbol * sb_stemmer_stem(struct sb_stemmer *stemmer, const sb_symbol *word, int size)
static struct stemmer_modules modules[]
Definition modules.h:83
stemmer_encoding_t
Definition modules.h:53
@ ENC_UTF_8
Definition modules.h:58
@ ENC_UNKNOWN
Definition modules.h:54
static struct stemmer_encoding encodings[]
Definition modules.h:65
static const char * algorithm_names[]
Definition modules.h:204
symbol * p
Definition api.h:15
int l
Definition api.h:16
void(* close)(struct SN_env *)
Definition libstemmer.c:10
struct SN_env * env
Definition libstemmer.c:13
int(* stem)(struct SN_env *)
Definition libstemmer.c:11
const char * name
Definition modules.h:62
stemmer_encoding_t enc
Definition modules.h:63