IDZEBRA 2.2.8
snippet.c
Go to the documentation of this file.
1/* This file is part of the Zebra server.
2 Copyright (C) Index Data
3
4Zebra is free software; you can redistribute it and/or modify it under
5the terms of the GNU General Public License as published by the Free
6Software Foundation; either version 2, or (at your option) any later
7version.
8
9Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
10WARRANTY; without even the implied warranty of MERCHANTABILITY or
11FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12for more details.
13
14You should have received a copy of the GNU General Public License
15along with this program; if not, write to the Free Software
16Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17
18*/
19
20#if HAVE_CONFIG_H
21#include <config.h>
22#endif
23#include <stddef.h>
24#include <string.h>
25#include <yaz/nmem.h>
26#include <yaz/log.h>
27#include <yaz/wrbuf.h>
28#include <idzebra/snippet.h>
29
35
37{
38 NMEM nmem = nmem_create();
39 zebra_snippets *l = nmem_malloc(nmem, sizeof(*l));
40 l->nmem = nmem;
41 l->front = l->tail = 0;
42 return l;
43}
44
46{
47 if (l)
48 nmem_destroy(l->nmem);
49}
50
52 zint seqno, int ws, int ord, const char *term)
53{
54 zebra_snippets_append_match(l, seqno, ws, ord, term, strlen(term), 0);
55}
56
58 zint seqno, int ws, int ord, const char *term,
59 size_t term_len)
60{
61 zebra_snippets_append_match(l, seqno, ws, ord, term, term_len, 0);
62}
63
64
66 zint seqno, int ws, int ord,
67 const char *term, size_t term_len,
68 int match)
69{
70 struct zebra_snippet_word *w = nmem_malloc(l->nmem, sizeof(*w));
71
72 w->next = 0;
73 w->prev = l->tail;
74 if (l->tail)
75 {
76 l->tail->next = w;
77 }
78 else
79 {
80 l->front = w;
81 }
82 l->tail = w;
83
84 w->seqno = seqno;
85 w->ws = ws;
86 w->ord = ord;
87 w->term = nmem_malloc(l->nmem, term_len+1);
88 memcpy(w->term, term, term_len);
89 w->term[term_len] = '\0';
90 w->match = match;
91 w->mark = 0;
92}
93
98
100{
101 return l->front;
102}
103
104void zebra_snippets_log(const zebra_snippets *l, int log_level, int all)
105{
107 for (w = l->front; w; w = w->next)
108 {
109 WRBUF wr_term = wrbuf_alloc();
110 wrbuf_puts_escaped(wr_term, w->term);
111
112 if (all || w->mark)
113 yaz_log(log_level, "term='%s'%s mark=%d seqno=" ZINT_FORMAT " ord=%d",
114 wrbuf_cstr(wr_term),
115 (w->match && !w->ws ? "*" : ""), w->mark,
116 w->seqno, w->ord);
117 wrbuf_destroy(wr_term);
118 }
119}
120
122 const zebra_snippets *hit,
123 int window_size)
124{
125 int ord = -1;
127 if (window_size == 0)
128 window_size = 1000000;
129
130 while(1)
131 {
132 zint window_start;
133 zint first_seq_no_best_window = 0;
134 zint last_seq_no_best_window = 0;
135 int number_best_window = 0;
136 const zebra_snippet_word *hit_w, *doc_w;
137 int min_ord = 0; /* not set yet */
138
139 for (hit_w = zebra_snippets_constlist(hit); hit_w; hit_w = hit_w->next)
140 if (hit_w->ord > ord &&
141 (min_ord == 0 || hit_w->ord < min_ord))
142 {
143 min_ord = hit_w->ord;
144 }
145 if (min_ord == 0)
146 break;
147 ord = min_ord;
148
149 for (hit_w = zebra_snippets_constlist(hit); hit_w; hit_w = hit_w->next)
150 {
151 if (hit_w->ord == ord)
152 {
153 const zebra_snippet_word *look_w = hit_w;
154 int number_this = 0;
155 zint seq_no_last = 0;
156 while (look_w && look_w->seqno < hit_w->seqno + window_size)
157 {
158 if (look_w->ord == ord)
159 {
160 seq_no_last = look_w->seqno;
161 number_this++;
162 }
163 look_w = look_w->next;
164 }
165 if (number_this > number_best_window)
166 {
167 number_best_window = number_this;
168 first_seq_no_best_window = hit_w->seqno;
169 last_seq_no_best_window = seq_no_last;
170 }
171 }
172 }
173 yaz_log(YLOG_DEBUG, "ord=%d", ord);
174 yaz_log(YLOG_DEBUG, "first_seq_no_best_window=" ZINT_FORMAT,
175 first_seq_no_best_window);
176 yaz_log(YLOG_DEBUG, "last_seq_no_best_window=" ZINT_FORMAT,
177 last_seq_no_best_window);
178 yaz_log(YLOG_DEBUG, "number_best_window=%d", number_best_window);
179
180 window_start = (first_seq_no_best_window + last_seq_no_best_window -
181 window_size) / 2;
182 for (doc_w = zebra_snippets_constlist(doc); doc_w; doc_w = doc_w->next)
183 if (doc_w->ord == ord
184 && doc_w->seqno >= window_start
185 && doc_w->seqno < window_start + window_size)
186 {
187 int match = 0;
188 for (hit_w = zebra_snippets_constlist(hit); hit_w;
189 hit_w = hit_w->next)
190 {
191 if (hit_w->ord == ord && hit_w->seqno == doc_w->seqno)
192
193 {
194 match = 1;
195 break;
196 }
197 }
198 zebra_snippets_append_match(result, doc_w->seqno,
199 doc_w->ws,
200 ord, doc_w->term,
201 strlen(doc_w->term), match);
202 }
203 }
204 return result;
205}
206
208{
210
211 for (w = zebra_snippets_list(sn); w; w = w->next)
212 {
213 w->mark = 0;
214 w->match = 0;
215 }
216}
217
219 const zebra_snippets *doc, const zebra_snippets *hit)
220{
221 const zebra_snippet_word *hit_w;
222 for (hit_w = zebra_snippets_constlist(hit); hit_w; hit_w = hit_w->next)
223 {
224 const zebra_snippet_word *doc_w;
225 for (doc_w = zebra_snippets_constlist(doc); doc_w; doc_w = doc_w->next)
226 {
227 if (doc_w->ord == hit_w->ord && doc_w->seqno == hit_w->seqno
228 && !doc_w->ws)
229 {
230 return doc_w;
231 }
232 }
233 }
234 return 0;
235}
236
238 int before, int after)
239{
240 int ord = -1;
241
243 while (1)
244 {
245 const zebra_snippet_word *hit_w;
246 zebra_snippet_word *doc_w;
247 int min_ord = 0; /* not set yet */
248
249 for (hit_w = zebra_snippets_constlist(hit); hit_w; hit_w = hit_w->next)
250 if (hit_w->ord > ord &&
251 (min_ord == 0 || hit_w->ord < min_ord))
252 {
253 min_ord = hit_w->ord;
254 }
255 if (min_ord == 0)
256 break;
257 ord = min_ord;
258
259 for (hit_w = zebra_snippets_constlist(hit); hit_w; hit_w = hit_w->next)
260 {
261 if (hit_w->ord == ord)
262 {
263 for (doc_w = zebra_snippets_list(doc); doc_w; doc_w = doc_w->next)
264 {
265 if (doc_w->ord == ord && doc_w->seqno == hit_w->seqno
266 && !doc_w->ws)
267 {
268 doc_w->match = 1;
269 doc_w->mark = 1;
270 break;
271 }
272
273 }
274 /* mark following terms */
275 if (doc_w)
276 {
277 zebra_snippet_word *w = doc_w->next;
278 while (w)
279 if (w->ord == ord
280 && hit_w->seqno - before < w->seqno
281 && hit_w->seqno + after > w->seqno)
282 {
283 w->mark = 1;
284 w = w->next;
285 }
286 else
287 break;
288 }
289 /* mark preceding terms */
290 if (doc_w)
291 {
292 zebra_snippet_word *w = doc_w->prev;
293 while (w)
294 if (w->ord == ord
295 && hit_w->seqno - before < w->seqno
296 && hit_w->seqno + after > w->seqno)
297 {
298 w->mark = 1;
299 w = w->prev;
300 }
301 else
302 break;
303 }
304 }
305 }
306 }
307}
308
309
310/*
311 * Local variables:
312 * c-basic-offset: 4
313 * c-file-style: "Stroustrup"
314 * indent-tabs-mode: nil
315 * End:
316 * vim: shiftwidth=4 tabstop=8 expandtab
317 */
318
static int log_level
Definition flock.c:82
const zebra_snippet_word * zebra_snippets_constlist(const zebra_snippets *l)
Definition snippet.c:99
zebra_snippets * zebra_snippets_window(const zebra_snippets *doc, const zebra_snippets *hit, int window_size)
Definition snippet.c:121
void zebra_snippets_append_match(zebra_snippets *l, zint seqno, int ws, int ord, const char *term, size_t term_len, int match)
Definition snippet.c:65
void zebra_snippets_appendn(zebra_snippets *l, zint seqno, int ws, int ord, const char *term, size_t term_len)
Definition snippet.c:57
zebra_snippet_word * zebra_snippets_list(zebra_snippets *l)
Definition snippet.c:94
static void zebra_snippets_clear(zebra_snippets *sn)
Definition snippet.c:207
void zebra_snippets_log(const zebra_snippets *l, int log_level, int all)
Definition snippet.c:104
void zebra_snippets_destroy(zebra_snippets *l)
Definition snippet.c:45
void zebra_snippets_append(zebra_snippets *l, zint seqno, int ws, int ord, const char *term)
Definition snippet.c:51
zebra_snippets * zebra_snippets_create(void)
Definition snippet.c:36
void zebra_snippets_ring(zebra_snippets *doc, const zebra_snippets *hit, int before, int after)
Definition snippet.c:237
const struct zebra_snippet_word * zebra_snippets_lookup(const zebra_snippets *doc, const zebra_snippets *hit)
Definition snippet.c:218
struct zebra_snippet_word * prev
Definition snippet.h:35
struct zebra_snippet_word * next
Definition snippet.h:34
zebra_snippet_word * tail
Definition snippet.c:33
zebra_snippet_word * front
Definition snippet.c:32
long zint
Zebra integer.
Definition util.h:66
#define ZINT_FORMAT
Definition util.h:72