IDZEBRA  2.2.7
snippet.c
Go to the documentation of this file.
1 /* This file is part of the Zebra server.
2  Copyright (C) Index Data
3 
4 Zebra is free software; you can redistribute it and/or modify it under
5 the terms of the GNU General Public License as published by the Free
6 Software Foundation; either version 2, or (at your option) any later
7 version.
8 
9 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
10 WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 for more details.
13 
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 
18 */
19 
20 #if HAVE_CONFIG_H
21 #include <config.h>
22 #endif
23 #include <stddef.h>
24 #include <string.h>
25 #include <yaz/nmem.h>
26 #include <yaz/log.h>
27 #include <yaz/wrbuf.h>
28 #include <idzebra/snippet.h>
29 
31  NMEM nmem;
34 };
35 
37 {
38  NMEM nmem = nmem_create();
39  zebra_snippets *l = nmem_malloc(nmem, sizeof(*l));
40  l->nmem = nmem;
41  l->front = l->tail = 0;
42  return l;
43 }
44 
46 {
47  if (l)
48  nmem_destroy(l->nmem);
49 }
50 
52  zint seqno, int ws, int ord, const char *term)
53 {
54  zebra_snippets_append_match(l, seqno, ws, ord, term, strlen(term), 0);
55 }
56 
58  zint seqno, int ws, int ord, const char *term,
59  size_t term_len)
60 {
61  zebra_snippets_append_match(l, seqno, ws, ord, term, term_len, 0);
62 }
63 
64 
66  zint seqno, int ws, int ord,
67  const char *term, size_t term_len,
68  int match)
69 {
70  struct zebra_snippet_word *w = nmem_malloc(l->nmem, sizeof(*w));
71 
72  w->next = 0;
73  w->prev = l->tail;
74  if (l->tail)
75  {
76  l->tail->next = w;
77  }
78  else
79  {
80  l->front = w;
81  }
82  l->tail = w;
83 
84  w->seqno = seqno;
85  w->ws = ws;
86  w->ord = ord;
87  w->term = nmem_malloc(l->nmem, term_len+1);
88  memcpy(w->term, term, term_len);
89  w->term[term_len] = '\0';
90  w->match = match;
91  w->mark = 0;
92 }
93 
95 {
96  return l->front;
97 }
98 
100 {
101  return l->front;
102 }
103 
104 void zebra_snippets_log(const zebra_snippets *l, int log_level, int all)
105 {
107  for (w = l->front; w; w = w->next)
108  {
109  WRBUF wr_term = wrbuf_alloc();
110  wrbuf_puts_escaped(wr_term, w->term);
111 
112  if (all || w->mark)
113  yaz_log(log_level, "term='%s'%s mark=%d seqno=" ZINT_FORMAT " ord=%d",
114  wrbuf_cstr(wr_term),
115  (w->match && !w->ws ? "*" : ""), w->mark,
116  w->seqno, w->ord);
117  wrbuf_destroy(wr_term);
118  }
119 }
120 
122  const zebra_snippets *hit,
123  int window_size)
124 {
125  int ord = -1;
127  if (window_size == 0)
128  window_size = 1000000;
129 
130  while(1)
131  {
132  zint window_start;
133  zint first_seq_no_best_window = 0;
134  zint last_seq_no_best_window = 0;
135  int number_best_window = 0;
136  const zebra_snippet_word *hit_w, *doc_w;
137  int min_ord = 0; /* not set yet */
138 
139  for (hit_w = zebra_snippets_constlist(hit); hit_w; hit_w = hit_w->next)
140  if (hit_w->ord > ord &&
141  (min_ord == 0 || hit_w->ord < min_ord))
142  {
143  min_ord = hit_w->ord;
144  }
145  if (min_ord == 0)
146  break;
147  ord = min_ord;
148 
149  for (hit_w = zebra_snippets_constlist(hit); hit_w; hit_w = hit_w->next)
150  {
151  if (hit_w->ord == ord)
152  {
153  const zebra_snippet_word *look_w = hit_w;
154  int number_this = 0;
155  zint seq_no_last = 0;
156  while (look_w && look_w->seqno < hit_w->seqno + window_size)
157  {
158  if (look_w->ord == ord)
159  {
160  seq_no_last = look_w->seqno;
161  number_this++;
162  }
163  look_w = look_w->next;
164  }
165  if (number_this > number_best_window)
166  {
167  number_best_window = number_this;
168  first_seq_no_best_window = hit_w->seqno;
169  last_seq_no_best_window = seq_no_last;
170  }
171  }
172  }
173  yaz_log(YLOG_DEBUG, "ord=%d", ord);
174  yaz_log(YLOG_DEBUG, "first_seq_no_best_window=" ZINT_FORMAT,
175  first_seq_no_best_window);
176  yaz_log(YLOG_DEBUG, "last_seq_no_best_window=" ZINT_FORMAT,
177  last_seq_no_best_window);
178  yaz_log(YLOG_DEBUG, "number_best_window=%d", number_best_window);
179 
180  window_start = (first_seq_no_best_window + last_seq_no_best_window -
181  window_size) / 2;
182  for (doc_w = zebra_snippets_constlist(doc); doc_w; doc_w = doc_w->next)
183  if (doc_w->ord == ord
184  && doc_w->seqno >= window_start
185  && doc_w->seqno < window_start + window_size)
186  {
187  int match = 0;
188  for (hit_w = zebra_snippets_constlist(hit); hit_w;
189  hit_w = hit_w->next)
190  {
191  if (hit_w->ord == ord && hit_w->seqno == doc_w->seqno)
192 
193  {
194  match = 1;
195  break;
196  }
197  }
198  zebra_snippets_append_match(result, doc_w->seqno,
199  doc_w->ws,
200  ord, doc_w->term,
201  strlen(doc_w->term), match);
202  }
203  }
204  return result;
205 }
206 
208 {
210 
211  for (w = zebra_snippets_list(sn); w; w = w->next)
212  {
213  w->mark = 0;
214  w->match = 0;
215  }
216 }
217 
219  const zebra_snippets *doc, const zebra_snippets *hit)
220 {
221  const zebra_snippet_word *hit_w;
222  for (hit_w = zebra_snippets_constlist(hit); hit_w; hit_w = hit_w->next)
223  {
224  const zebra_snippet_word *doc_w;
225  for (doc_w = zebra_snippets_constlist(doc); doc_w; doc_w = doc_w->next)
226  {
227  if (doc_w->ord == hit_w->ord && doc_w->seqno == hit_w->seqno
228  && !doc_w->ws)
229  {
230  return doc_w;
231  }
232  }
233  }
234  return 0;
235 }
236 
238  int before, int after)
239 {
240  int ord = -1;
241 
243  while (1)
244  {
245  const zebra_snippet_word *hit_w;
246  zebra_snippet_word *doc_w;
247  int min_ord = 0; /* not set yet */
248 
249  for (hit_w = zebra_snippets_constlist(hit); hit_w; hit_w = hit_w->next)
250  if (hit_w->ord > ord &&
251  (min_ord == 0 || hit_w->ord < min_ord))
252  {
253  min_ord = hit_w->ord;
254  }
255  if (min_ord == 0)
256  break;
257  ord = min_ord;
258 
259  for (hit_w = zebra_snippets_constlist(hit); hit_w; hit_w = hit_w->next)
260  {
261  if (hit_w->ord == ord)
262  {
263  for (doc_w = zebra_snippets_list(doc); doc_w; doc_w = doc_w->next)
264  {
265  if (doc_w->ord == ord && doc_w->seqno == hit_w->seqno
266  && !doc_w->ws)
267  {
268  doc_w->match = 1;
269  doc_w->mark = 1;
270  break;
271  }
272 
273  }
274  /* mark following terms */
275  if (doc_w)
276  {
277  zebra_snippet_word *w = doc_w->next;
278  while (w)
279  if (w->ord == ord
280  && hit_w->seqno - before < w->seqno
281  && hit_w->seqno + after > w->seqno)
282  {
283  w->mark = 1;
284  w = w->next;
285  }
286  else
287  break;
288  }
289  /* mark preceding terms */
290  if (doc_w)
291  {
292  zebra_snippet_word *w = doc_w->prev;
293  while (w)
294  if (w->ord == ord
295  && hit_w->seqno - before < w->seqno
296  && hit_w->seqno + after > w->seqno)
297  {
298  w->mark = 1;
299  w = w->prev;
300  }
301  else
302  break;
303  }
304  }
305  }
306  }
307 }
308 
309 
310 /*
311  * Local variables:
312  * c-basic-offset: 4
313  * c-file-style: "Stroustrup"
314  * indent-tabs-mode: nil
315  * End:
316  * vim: shiftwidth=4 tabstop=8 expandtab
317  */
318 
static int log_level
Definition: flock.c:82
zebra_snippet_word * zebra_snippets_list(zebra_snippets *l)
Definition: snippet.c:94
void zebra_snippets_append_match(zebra_snippets *l, zint seqno, int ws, int ord, const char *term, size_t term_len, int match)
Definition: snippet.c:65
void zebra_snippets_appendn(zebra_snippets *l, zint seqno, int ws, int ord, const char *term, size_t term_len)
Definition: snippet.c:57
static void zebra_snippets_clear(zebra_snippets *sn)
Definition: snippet.c:207
void zebra_snippets_log(const zebra_snippets *l, int log_level, int all)
Definition: snippet.c:104
void zebra_snippets_destroy(zebra_snippets *l)
Definition: snippet.c:45
const zebra_snippet_word * zebra_snippets_constlist(const zebra_snippets *l)
Definition: snippet.c:99
void zebra_snippets_append(zebra_snippets *l, zint seqno, int ws, int ord, const char *term)
Definition: snippet.c:51
const struct zebra_snippet_word * zebra_snippets_lookup(const zebra_snippets *doc, const zebra_snippets *hit)
Definition: snippet.c:218
void zebra_snippets_ring(zebra_snippets *doc, const zebra_snippets *hit, int before, int after)
Definition: snippet.c:237
zebra_snippets * zebra_snippets_window(const zebra_snippets *doc, const zebra_snippets *hit, int window_size)
Definition: snippet.c:121
zebra_snippets * zebra_snippets_create(void)
Definition: snippet.c:36
struct zebra_snippet_word * prev
Definition: snippet.h:35
struct zebra_snippet_word * next
Definition: snippet.h:34
zebra_snippet_word * tail
Definition: snippet.c:33
zebra_snippet_word * front
Definition: snippet.c:32
long zint
Zebra integer.
Definition: util.h:66
#define ZINT_FORMAT
Definition: util.h:72