31 if (ch ==
'\n' || ch == EOF)
break;
36 if (newb == 0)
goto error;
41 if (ch < 0x80 || ch > 0xBF) inlen += 1;
43 if (isupper(ch)) ch = tolower(ch);
54 fprintf(stderr,
"Out of memory");
60 fwrite(b, i, 1, f_out);
63 fwrite(b, i, 1, f_out);
67 for (j = 30 - inlen; j > 0; j--)
71 for (j = 30; j > 0; j--)
77 fputs((
char *)stemmed, f_out);
94 printf(
"usage: %s [-l <language>] [-i <input file>] [-o <output file>] [-c <character encoding>] [-p[2]] [-h]\n"
96 "The input file consists of a list of words to be stemmed, one per\n"
97 "line. Words should be in lower case, but (for English) A-Z letters\n"
98 "are mapped to their a-z equivalents anyway. If omitted, stdin is\n"
101 "If -c is given, the argument is the character encoding of the input\n"
102 "and output files. If it is omitted, the UTF-8 encoding is used.\n"
104 "If -p is given the output file consists of each word of the input\n"
105 "file followed by \"->\" followed by its stemmed equivalent.\n"
106 "If -p2 is given the output file is a two column layout containing\n"
107 "the input words in the first column and the stemmed eqivalents in\n"
108 "the second column.\n"
109 "Otherwise, the output file consists of the stemmed words, one per\n"
112 "-h displays this help\n",
126 char * language =
"english";
127 char * charenc = NULL;
138 if (strcmp(s,
"-o") == 0) {
140 fprintf(stderr,
"%s requires an argument\n", s);
144 }
else if (strcmp(s,
"-i") == 0) {
146 fprintf(stderr,
"%s requires an argument\n", s);
150 }
else if (strcmp(s,
"-l") == 0) {
152 fprintf(stderr,
"%s requires an argument\n", s);
155 language = argv[i++];
156 }
else if (strcmp(s,
"-c") == 0) {
158 fprintf(stderr,
"%s requires an argument\n", s);
162 }
else if (strcmp(s,
"-p2") == 0) {
164 }
else if (strcmp(s,
"-p") == 0) {
166 }
else if (strcmp(s,
"-h") == 0) {
169 fprintf(stderr,
"option %s unknown\n", s);
173 fprintf(stderr,
"unexpected parameter %s\n", s);
179 f_in = (in == 0) ? stdin : fopen(in,
"r");
181 fprintf(stderr,
"file %s not found\n", in);
184 f_out = (out == 0) ? stdout : fopen(out,
"w");
186 fprintf(stderr,
"file %s cannot be opened\n", out);
193 if (charenc == NULL) {
194 fprintf(stderr,
"language `%s' not available for stemming\n", language);
197 fprintf(stderr,
"language `%s' not available for stemming in encoding `%s'\n", language, charenc);
204 if (in != 0) (void) fclose(f_in);
205 if (out != 0) (void) fclose(f_out);
int sb_stemmer_length(struct sb_stemmer *stemmer)
struct sb_stemmer * sb_stemmer_new(const char *algorithm, const char *charenc)
const sb_symbol * sb_stemmer_stem(struct sb_stemmer *stemmer, const sb_symbol *word, int size)
void sb_stemmer_delete(struct sb_stemmer *stemmer)
int main(int argc, char *argv[])
static void stem_file(struct sb_stemmer *stemmer, FILE *f_in, FILE *f_out)