IDZEBRA  2.2.7
mod_text.c
Go to the documentation of this file.
1 /* This file is part of the Zebra server.
2  Copyright (C) Index Data
3 
4 Zebra is free software; you can redistribute it and/or modify it under
5 the terms of the GNU General Public License as published by the Free
6 Software Foundation; either version 2, or (at your option) any later
7 version.
8 
9 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
10 WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 for more details.
13 
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 
18 */
19 
20 
21 #if HAVE_CONFIG_H
22 #include <config.h>
23 #endif
24 #include <stdio.h>
25 #include <assert.h>
26 #include <ctype.h>
27 
28 #include <yaz/oid_db.h>
29 #include <yaz/snprintf.h>
30 
31 #include <idzebra/util.h>
32 #include <idzebra/recctrl.h>
33 
34 struct filter_info {
35  char *sep;
36 };
37 
38 static void *filter_init(Res res, RecType recType)
39 {
40  struct filter_info *tinfo = (struct filter_info *) xmalloc(sizeof(*tinfo));
41  tinfo->sep = 0;
42  return tinfo;
43 }
44 
45 static ZEBRA_RES filter_config(void *clientData, Res res, const char *args)
46 {
47  struct filter_info *tinfo = (struct filter_info*) clientData;
48  xfree(tinfo->sep);
49  tinfo->sep = 0;
50  if (args && *args)
51  tinfo->sep = xstrdup(args);
52  return ZEBRA_OK;
53 }
54 
55 static void filter_destroy(void *clientData)
56 {
57  struct filter_info *tinfo = clientData;
58  xfree(tinfo->sep);
59  xfree(tinfo);
60 }
61 
62 struct buf_info {
63  struct recExtractCtrl *p;
64  char *buf;
65  int offset;
66  int max;
67 };
68 
69 static struct buf_info *buf_open(struct recExtractCtrl *p)
70 {
71  struct buf_info *fi = (struct buf_info *) xmalloc(sizeof(*fi));
72 
73  fi->p = p;
74  fi->buf = (char *) xmalloc(4096);
75  fi->offset = 1;
76  fi->max = 1;
77  return fi;
78 }
79 
80 static int buf_getchar(struct filter_info *tinfo, struct buf_info *fi, char *dst)
81 {
82  if (fi->offset >= fi->max)
83  {
84  if (fi->max <= 0)
85  return 0;
86  fi->max = fi->p->stream->readf(fi->p->stream, fi->buf, 4096);
87  fi->offset = 0;
88  if (fi->max <= 0)
89  return 0;
90  }
91  *dst = fi->buf[(fi->offset)++];
92  if (tinfo->sep && *dst == *tinfo->sep)
93  {
94  off_t off = fi->p->stream->tellf(fi->p->stream);
95  off_t end_offset = off - (fi->max - fi->offset);
96  fi->p->stream->endf(fi->p->stream, &end_offset);
97  return 0;
98  }
99  return 1;
100 }
101 
102 static void buf_close(struct buf_info *fi)
103 {
104  xfree(fi->buf);
105  xfree(fi);
106 }
107 
108 static int filter_extract(void *clientData, struct recExtractCtrl *p)
109 {
110  struct filter_info *tinfo = clientData;
111  char w[512];
112  RecWord recWord;
113  int r;
114  struct buf_info *fi = buf_open(p);
115  int no_read = 0;
116 
117 #if 0
118  yaz_log(YLOG_LOG, "filter_extract off=%ld",
119  (long) (*fi->p->tellf)(fi->p->fh));
120 #endif
121  (*p->init)(p, &recWord);
122  do
123  {
124  int i = 0;
125 
126  r = buf_getchar(tinfo, fi, w);
127  while (r > 0 && i < 511 && w[i] != '\n' && w[i] != '\r')
128  {
129  i++;
130  r = buf_getchar(tinfo, fi, w + i);
131  }
132  if (i)
133  {
134  no_read += i;
135  recWord.term_buf = w;
136  recWord.term_len = i;
137  (*p->tokenAdd)(&recWord);
138  }
139  } while (r > 0);
140  buf_close(fi);
141  if (no_read == 0)
142  return RECCTRL_EXTRACT_EOF;
143  return RECCTRL_EXTRACT_OK;
144 }
145 
146 static int filter_retrieve(void *clientData, struct recRetrieveCtrl *p)
147 {
148  int r, filter_ptr = 0;
149  /* not reentrant and thread safe as static buffer is returned */
150  static char *filter_buf = NULL;
151  static int filter_size = 0;
152  int make_header = 1;
153  int make_body = 1;
154  const char *elementSetName = NULL;
155  int no_lines = 0;
156 
157  if (p->comp && p->comp->which == Z_RecordComp_simple &&
158  p->comp->u.simple->which == Z_ElementSetNames_generic)
159  elementSetName = p->comp->u.simple->u.generic;
160 
161  if (elementSetName)
162  {
163  /* don't make header for the R(aw) element set name */
164  if (!strcmp(elementSetName, "R"))
165  {
166  make_header = 0;
167  make_body = 1;
168  }
169  /* only make header for the H(eader) element set name */
170  else if (!strcmp(elementSetName, "H"))
171  {
172  make_header = 1;
173  make_body = 0;
174  }
175  }
176  while (1)
177  {
178  if (filter_ptr + 4096 >= filter_size)
179  {
180  char *nb;
181 
182  filter_size = 2*filter_size + 8192;
183  nb = (char *) xmalloc(filter_size);
184  if (filter_buf)
185  {
186  memcpy(nb, filter_buf, filter_ptr);
187  xfree(filter_buf);
188  }
189  filter_buf = nb;
190  }
191  if (make_header && filter_ptr == 0)
192  {
193  if (p->score >= 0)
194  {
195  yaz_snprintf(filter_buf, 50, "Rank: %d\n", p->score);
196  filter_ptr = strlen(filter_buf);
197  }
198  yaz_snprintf(filter_buf + filter_ptr, 50, "Local Number: " ZINT_FORMAT "\n",
199  p->localno);
200  filter_ptr = strlen(filter_buf);
201  if (p->fname)
202  {
203  yaz_snprintf(filter_buf + filter_ptr, 200, "Filename: %s\n", p->fname);
204  filter_ptr = strlen(filter_buf);
205  }
206  strcpy(filter_buf+filter_ptr++, "\n");
207  }
208  if (!make_body)
209  break;
210  r = p->stream->readf(p->stream, filter_buf + filter_ptr, 4096);
211  if (r <= 0)
212  break;
213  filter_ptr += r;
214  }
215  filter_buf[filter_ptr] = '\0';
216  if (elementSetName)
217  {
218  if (!strcmp(elementSetName, "B"))
219  no_lines = 4;
220  if (!strcmp(elementSetName, "M"))
221  no_lines = 20;
222  }
223  if (no_lines)
224  {
225  char *p = filter_buf;
226  int i = 0;
227 
228  while (++i <= no_lines && (p = strchr(p, '\n')))
229  p++;
230  if (p)
231  {
232  p[1] = '\0';
233  filter_ptr = p-filter_buf;
234  }
235  }
236  p->output_format = yaz_oid_recsyn_sutrs;
237  p->rec_buf = filter_buf;
238  p->rec_len = filter_ptr;
239  return 0;
240 }
241 
242 static struct recType filter_type = {
243  0,
244  "text",
245  filter_init,
250 };
251 
252 RecType
253 #if IDZEBRA_STATIC_TEXT
254 idzebra_filter_text
255 #else
257 #endif
258 
259 [] = {
260  &filter_type,
261  0,
262 };
263 /*
264  * Local variables:
265  * c-basic-offset: 4
266  * c-file-style: "Stroustrup"
267  * indent-tabs-mode: nil
268  * End:
269  * vim: shiftwidth=4 tabstop=8 expandtab
270  */
271 
static void filter_destroy(void *clientData)
Definition: mod_text.c:55
RecType idzebra_filter[]
Definition: mod_text.c:259
static int filter_retrieve(void *clientData, struct recRetrieveCtrl *p)
Definition: mod_text.c:146
static int filter_extract(void *clientData, struct recExtractCtrl *p)
Definition: mod_text.c:108
static struct recType filter_type
Definition: mod_text.c:242
static void buf_close(struct buf_info *fi)
Definition: mod_text.c:102
static void * filter_init(Res res, RecType recType)
Definition: mod_text.c:38
static struct buf_info * buf_open(struct recExtractCtrl *p)
Definition: mod_text.c:69
static int buf_getchar(struct filter_info *tinfo, struct buf_info *fi, char *dst)
Definition: mod_text.c:80
static ZEBRA_RES filter_config(void *clientData, Res res, const char *args)
Definition: mod_text.c:45
#define RECCTRL_EXTRACT_EOF
Definition: recctrl.h:164
#define RECCTRL_EXTRACT_OK
Definition: recctrl.h:163
const char * term_buf
Definition: recctrl.h:56
int term_len
Definition: recctrl.h:58
off_t(* endf)(struct ZebraRecStream *s, off_t *offset)
set and get of record position
Definition: recctrl.h:81
int(* readf)(struct ZebraRecStream *s, char *buf, size_t count)
read function
Definition: recctrl.h:75
off_t(* tellf)(struct ZebraRecStream *s)
tell function
Definition: recctrl.h:79
char * buf
Definition: mod_text.c:64
int offset
Definition: mod_text.c:65
int max
Definition: mod_text.c:66
struct recExtractCtrl * p
Definition: mod_text.c:63
char * sep
Definition: mod_text.c:35
record extract for indexing
Definition: recctrl.h:101
void(* init)(struct recExtractCtrl *p, RecWord *w)
Definition: recctrl.h:103
void(* tokenAdd)(RecWord *w)
Definition: recctrl.h:105
struct ZebraRecStream * stream
Definition: recctrl.h:102
Definition: res.c:46
#define ZINT_FORMAT
Definition: util.h:72
#define ZEBRA_OK
Definition: util.h:82
short ZEBRA_RES
Common return type for Zebra API.
Definition: util.h:80