IDZEBRA 2.2.8
mod_text.c
Go to the documentation of this file.
1/* This file is part of the Zebra server.
2 Copyright (C) Index Data
3
4Zebra is free software; you can redistribute it and/or modify it under
5the terms of the GNU General Public License as published by the Free
6Software Foundation; either version 2, or (at your option) any later
7version.
8
9Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
10WARRANTY; without even the implied warranty of MERCHANTABILITY or
11FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12for more details.
13
14You should have received a copy of the GNU General Public License
15along with this program; if not, write to the Free Software
16Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17
18*/
19
20
21#if HAVE_CONFIG_H
22#include <config.h>
23#endif
24#include <stdio.h>
25#include <assert.h>
26#include <ctype.h>
27
28#include <yaz/oid_db.h>
29#include <yaz/snprintf.h>
30
31#include <idzebra/util.h>
32#include <idzebra/recctrl.h>
33
34struct filter_info {
35 char *sep;
36};
37
38static void *filter_init(Res res, RecType recType)
39{
40 struct filter_info *tinfo = (struct filter_info *) xmalloc(sizeof(*tinfo));
41 tinfo->sep = 0;
42 return tinfo;
43}
44
45static ZEBRA_RES filter_config(void *clientData, Res res, const char *args)
46{
47 struct filter_info *tinfo = (struct filter_info*) clientData;
48 xfree(tinfo->sep);
49 tinfo->sep = 0;
50 if (args && *args)
51 tinfo->sep = xstrdup(args);
52 return ZEBRA_OK;
53}
54
55static void filter_destroy(void *clientData)
56{
57 struct filter_info *tinfo = clientData;
58 xfree(tinfo->sep);
59 xfree(tinfo);
60}
61
62struct buf_info {
64 char *buf;
65 int offset;
66 int max;
67};
68
69static struct buf_info *buf_open(struct recExtractCtrl *p)
70{
71 struct buf_info *fi = (struct buf_info *) xmalloc(sizeof(*fi));
72
73 fi->p = p;
74 fi->buf = (char *) xmalloc(4096);
75 fi->offset = 1;
76 fi->max = 1;
77 return fi;
78}
79
80static int buf_getchar(struct filter_info *tinfo, struct buf_info *fi, char *dst)
81{
82 if (fi->offset >= fi->max)
83 {
84 if (fi->max <= 0)
85 return 0;
86 fi->max = fi->p->stream->readf(fi->p->stream, fi->buf, 4096);
87 fi->offset = 0;
88 if (fi->max <= 0)
89 return 0;
90 }
91 *dst = fi->buf[(fi->offset)++];
92 if (tinfo->sep && *dst == *tinfo->sep)
93 {
94 off_t off = fi->p->stream->tellf(fi->p->stream);
95 off_t end_offset = off - (fi->max - fi->offset);
96 fi->p->stream->endf(fi->p->stream, &end_offset);
97 return 0;
98 }
99 return 1;
100}
101
102static void buf_close(struct buf_info *fi)
103{
104 xfree(fi->buf);
105 xfree(fi);
106}
107
108static int filter_extract(void *clientData, struct recExtractCtrl *p)
109{
110 struct filter_info *tinfo = clientData;
111 char w[512];
112 RecWord recWord;
113 int r;
114 struct buf_info *fi = buf_open(p);
115 int no_read = 0;
116
117#if 0
118 yaz_log(YLOG_LOG, "filter_extract off=%ld",
119 (long) (*fi->p->tellf)(fi->p->fh));
120#endif
121 (*p->init)(p, &recWord);
122 do
123 {
124 int i = 0;
125
126 r = buf_getchar(tinfo, fi, w);
127 while (r > 0 && i < 511 && w[i] != '\n' && w[i] != '\r')
128 {
129 i++;
130 r = buf_getchar(tinfo, fi, w + i);
131 }
132 if (i)
133 {
134 no_read += i;
135 recWord.term_buf = w;
136 recWord.term_len = i;
137 (*p->tokenAdd)(&recWord);
138 }
139 } while (r > 0);
140 buf_close(fi);
141 if (no_read == 0)
142 return RECCTRL_EXTRACT_EOF;
143 return RECCTRL_EXTRACT_OK;
144}
145
146static int filter_retrieve(void *clientData, struct recRetrieveCtrl *p)
147{
148 int r, filter_ptr = 0;
149 /* not reentrant and thread safe as static buffer is returned */
150 static char *filter_buf = NULL;
151 static int filter_size = 0;
152 int make_header = 1;
153 int make_body = 1;
154 const char *elementSetName = NULL;
155 int no_lines = 0;
156
157 if (p->comp && p->comp->which == Z_RecordComp_simple &&
158 p->comp->u.simple->which == Z_ElementSetNames_generic)
159 elementSetName = p->comp->u.simple->u.generic;
160
161 if (elementSetName)
162 {
163 /* don't make header for the R(aw) element set name */
164 if (!strcmp(elementSetName, "R"))
165 {
166 make_header = 0;
167 make_body = 1;
168 }
169 /* only make header for the H(eader) element set name */
170 else if (!strcmp(elementSetName, "H"))
171 {
172 make_header = 1;
173 make_body = 0;
174 }
175 }
176 while (1)
177 {
178 if (filter_ptr + 4096 >= filter_size)
179 {
180 char *nb;
181
182 filter_size = 2*filter_size + 8192;
183 nb = (char *) xmalloc(filter_size);
184 if (filter_buf)
185 {
186 memcpy(nb, filter_buf, filter_ptr);
187 xfree(filter_buf);
188 }
189 filter_buf = nb;
190 }
191 if (make_header && filter_ptr == 0)
192 {
193 if (p->score >= 0)
194 {
195 yaz_snprintf(filter_buf, 50, "Rank: %d\n", p->score);
196 filter_ptr = strlen(filter_buf);
197 }
198 yaz_snprintf(filter_buf + filter_ptr, 50, "Local Number: " ZINT_FORMAT "\n",
199 p->localno);
200 filter_ptr = strlen(filter_buf);
201 if (p->fname)
202 {
203 yaz_snprintf(filter_buf + filter_ptr, 200, "Filename: %s\n", p->fname);
204 filter_ptr = strlen(filter_buf);
205 }
206 strcpy(filter_buf+filter_ptr++, "\n");
207 }
208 if (!make_body)
209 break;
210 r = p->stream->readf(p->stream, filter_buf + filter_ptr, 4096);
211 if (r <= 0)
212 break;
213 filter_ptr += r;
214 }
215 filter_buf[filter_ptr] = '\0';
216 if (elementSetName)
217 {
218 if (!strcmp(elementSetName, "B"))
219 no_lines = 4;
220 if (!strcmp(elementSetName, "M"))
221 no_lines = 20;
222 }
223 if (no_lines)
224 {
225 char *p = filter_buf;
226 int i = 0;
227
228 while (++i <= no_lines && (p = strchr(p, '\n')))
229 p++;
230 if (p)
231 {
232 p[1] = '\0';
233 filter_ptr = p-filter_buf;
234 }
235 }
236 p->output_format = yaz_oid_recsyn_sutrs;
237 p->rec_buf = filter_buf;
238 p->rec_len = filter_ptr;
239 return 0;
240}
241
242static struct recType filter_type = {
243 0,
244 "text",
250};
251
253#if IDZEBRA_STATIC_TEXT
254idzebra_filter_text
255#else
257#endif
258
259[] = {
261 0,
262};
263/*
264 * Local variables:
265 * c-basic-offset: 4
266 * c-file-style: "Stroustrup"
267 * indent-tabs-mode: nil
268 * End:
269 * vim: shiftwidth=4 tabstop=8 expandtab
270 */
271
static void filter_destroy(void *clientData)
Definition mod_text.c:55
static void * filter_init(Res res, RecType recType)
Definition mod_text.c:38
RecType idzebra_filter[]
Definition mod_text.c:259
static struct buf_info * buf_open(struct recExtractCtrl *p)
Definition mod_text.c:69
static struct recType filter_type
Definition mod_text.c:242
static void buf_close(struct buf_info *fi)
Definition mod_text.c:102
static int buf_getchar(struct filter_info *tinfo, struct buf_info *fi, char *dst)
Definition mod_text.c:80
static ZEBRA_RES filter_config(void *clientData, Res res, const char *args)
Definition mod_text.c:45
#define RECCTRL_EXTRACT_EOF
Definition recctrl.h:164
#define RECCTRL_EXTRACT_OK
Definition recctrl.h:163
const char * term_buf
Definition recctrl.h:56
int term_len
Definition recctrl.h:58
off_t(* endf)(struct ZebraRecStream *s, off_t *offset)
set and get of record position
Definition recctrl.h:81
int(* readf)(struct ZebraRecStream *s, char *buf, size_t count)
read function
Definition recctrl.h:75
off_t(* tellf)(struct ZebraRecStream *s)
tell function
Definition recctrl.h:79
char * buf
Definition mod_text.c:64
int offset
Definition mod_text.c:65
int max
Definition mod_text.c:66
struct recExtractCtrl * p
Definition mod_text.c:63
char * sep
Definition mod_text.c:35
record extract for indexing
Definition recctrl.h:101
void(* init)(struct recExtractCtrl *p, RecWord *w)
Definition recctrl.h:103
void(* tokenAdd)(RecWord *w)
Definition recctrl.h:105
struct ZebraRecStream * stream
Definition recctrl.h:102
#define ZINT_FORMAT
Definition util.h:72
#define ZEBRA_OK
Definition util.h:82
short ZEBRA_RES
Common return type for Zebra API.
Definition util.h:80