35 if (ch ==
'\n' || ch == EOF)
48 if (ch < 0x80 || ch > 0xBF)
63 fprintf (stderr,
"Out of memory");
70 fwrite (b, i, 1, f_out);
71 fputs (
" -> ", f_out);
75 fwrite (b, i, 1, f_out);
81 for (j = 30 - inlen; j > 0; j--)
87 for (j = 30; j > 0; j--)
93 fputs ((
char *) stemmed, f_out);
112 (
"usage: %s [-l <language>] [-i <input file>] [-o <output file>] [-c <character encoding>] [-p[2]] [-h]\n"
114 "The input file consists of a list of words to be stemmed, one per\n"
115 "line. Words should be in lower case, but (for English) A-Z letters\n"
116 "are mapped to their a-z equivalents anyway. If omitted, stdin is\n"
118 "If -c is given, the argument is the character encoding of the input\n"
119 "and output files. If it is omitted, the UTF-8 encoding is used.\n" "\n"
120 "If -p is given the output file consists of each word of the input\n"
121 "file followed by \"->\" followed by its stemmed equivalent.\n"
122 "If -p2 is given the output file is a two column layout containing\n"
123 "the input words in the first column and the stemmed eqivalents in\n"
124 "the second column.\n"
125 "Otherwise, the output file consists of the stemmed words, one per\n"
126 "line.\n" "\n" "-h displays this help\n",
progname);
139 char *language =
"english";
140 char *charenc = NULL;
153 if (strcmp (s,
"-o") == 0)
157 fprintf (stderr,
"%s requires an argument\n", s);
162 else if (strcmp (s,
"-i") == 0)
166 fprintf (stderr,
"%s requires an argument\n", s);
171 else if (strcmp (s,
"-l") == 0)
175 fprintf (stderr,
"%s requires an argument\n", s);
178 language = argv[i++];
180 else if (strcmp (s,
"-c") == 0)
184 fprintf (stderr,
"%s requires an argument\n", s);
189 else if (strcmp (s,
"-p2") == 0)
193 else if (strcmp (s,
"-p") == 0)
197 else if (strcmp (s,
"-h") == 0)
203 fprintf (stderr,
"option %s unknown\n", s);
209 fprintf (stderr,
"unexpected parameter %s\n", s);
215 f_in = (in == 0) ? stdin : fopen (in,
"r");
218 fprintf (stderr,
"file %s not found\n", in);
221 f_out = (out == 0) ? stdout : fopen (out,
"w");
224 fprintf (stderr,
"file %s cannot be opened\n", out);
234 fprintf (stderr,
"language `%s' not available for stemming\n",
241 "language `%s' not available for stemming in encoding `%s'\n",
250 (void) fclose (f_in);
252 (void) fclose (f_out);
int sb_stemmer_length(struct sb_stemmer *stemmer)
struct sb_stemmer * sb_stemmer_new(const char *algorithm, const char *charenc)
void sb_stemmer_delete(struct sb_stemmer *stemmer)
const sb_symbol * sb_stemmer_stem(struct sb_stemmer *stemmer, const sb_symbol *word, int size)
int main(int argc, char *argv[])
static void stem_file(struct sb_stemmer *stemmer, FILE *f_in, FILE *f_out)