YAZ 5.35.1
libstemmer_utf8.c
Go to the documentation of this file.
1
2#include <stdlib.h>
3#include <string.h>
4#include "../include/libstemmer.h"
5#include "../runtime/api.h"
6#include "modules_utf8.h"
7
8struct sb_stemmer
9{
10 struct SN_env *(*create) (void);
11 void (*close) (struct SN_env *);
12 int (*stem) (struct SN_env *);
13
14 struct SN_env *env;
15};
16
17extern const char **
19{
20 return algorithm_names;
21}
22
24sb_getenc (const char *charenc)
25{
26 struct stemmer_encoding *encoding;
27 if (charenc == NULL)
28 return ENC_UTF_8;
29 for (encoding = encodings; encoding->name != 0; encoding++)
30 {
31 if (strcmp (encoding->name, charenc) == 0)
32 break;
33 }
34 if (encoding->name == NULL)
35 return ENC_UNKNOWN;
36 return encoding->enc;
37}
38
39extern struct sb_stemmer *
40sb_stemmer_new (const char *algorithm, const char *charenc)
41{
43 struct stemmer_modules *module;
44 struct sb_stemmer *stemmer;
45
46 enc = sb_getenc (charenc);
47 if (enc == ENC_UNKNOWN)
48 return NULL;
49
50 for (module = modules; module->name != 0; module++)
51 {
52 if (strcmp (module->name, algorithm) == 0 && module->enc == enc)
53 break;
54 }
55 if (module->name == NULL)
56 return NULL;
57
58 stemmer = (struct sb_stemmer *) malloc (sizeof (struct sb_stemmer));
59 if (stemmer == NULL)
60 return NULL;
61
62 stemmer->create = module->create;
63 stemmer->close = module->close;
64 stemmer->stem = module->stem;
65
66 stemmer->env = stemmer->create ();
67 if (stemmer->env == NULL)
68 {
69 sb_stemmer_delete (stemmer);
70 return NULL;
71 }
72
73 return stemmer;
74}
75
76void
78{
79 if (stemmer == 0)
80 return;
81 if (stemmer->close == 0)
82 return;
83 stemmer->close (stemmer->env);
84 stemmer->close = 0;
85 free (stemmer);
86}
87
88const sb_symbol *
89sb_stemmer_stem (struct sb_stemmer *stemmer, const sb_symbol *word, int size)
90{
91 int ret;
92 if (SN_set_current (stemmer->env, size, (const symbol *) (word)))
93 {
94 stemmer->env->l = 0;
95 return NULL;
96 }
97 ret = stemmer->stem (stemmer->env);
98 if (ret < 0)
99 return NULL;
100 stemmer->env->p[stemmer->env->l] = 0;
101 return (const sb_symbol *) (stemmer->env->p);
102}
103
104int
106{
107 return stemmer->env->l;
108}
int SN_set_current(struct SN_env *z, int size, const symbol *s)
Definition api.c:71
unsigned char symbol
Definition api.h:2
void * malloc(YYSIZE_T)
void free(void *)
unsigned char sb_symbol
Definition libstemmer.h:8
int sb_stemmer_length(struct sb_stemmer *stemmer)
static stemmer_encoding_t sb_getenc(const char *charenc)
struct sb_stemmer * sb_stemmer_new(const char *algorithm, const char *charenc)
const char ** sb_stemmer_list(void)
void sb_stemmer_delete(struct sb_stemmer *stemmer)
const sb_symbol * sb_stemmer_stem(struct sb_stemmer *stemmer, const sb_symbol *word, int size)
static struct stemmer_modules modules[]
Definition modules.h:83
stemmer_encoding_t
Definition modules.h:53
@ ENC_UTF_8
Definition modules.h:58
@ ENC_UNKNOWN
Definition modules.h:54
static struct stemmer_encoding encodings[]
Definition modules.h:65
static const char * algorithm_names[]
Definition modules.h:204
Definition api.h:14
symbol * p
Definition api.h:15
int l
Definition api.h:16
void(* close)(struct SN_env *)
Definition libstemmer.c:11
struct SN_env * env
Definition libstemmer.c:14
int(* stem)(struct SN_env *)
Definition libstemmer.c:12
const char * name
Definition modules.h:62
stemmer_encoding_t enc
Definition modules.h:63