YAZ 5.35.1
ccl_stop_words.c
Go to the documentation of this file.
1/* This file is part of the YAZ toolkit.
2 * Copyright (C) Index Data
3 * See the file LICENSE for details.
4 */
9#if HAVE_CONFIG_H
10#include <config.h>
11#endif
12
13#include <stdio.h>
14#include <string.h>
15#include <yaz/ccl.h>
16#include <yaz/nmem.h>
17
19 char *qualname;
20 char *term;
22};
23
26 NMEM nmem; /* memory for removed items */
28};
29
31 const char *qname,
32 const char *t, size_t len)
33{
34 struct ccl_stop_info *csi = (struct ccl_stop_info *)
35 nmem_malloc(csw->nmem, sizeof(*csi));
36 struct ccl_stop_info **csip = &csw->removed_items;
37 if (qname)
38 csi->qualname = nmem_strdup(csw->nmem, qname);
39 else
40 csi->qualname = 0;
41
42 csi->term = (char *) nmem_malloc(csw->nmem, len+1);
43 memcpy(csi->term, t, len);
44 csi->term[len] = '\0';
45 csi->next = 0;
46
47 while (*csip)
48 csip = &(*csip)->next;
49
50 *csip = csi;
51}
52
54{
55 NMEM nmem = nmem_create();
56 ccl_stop_words_t csw = (ccl_stop_words_t) xmalloc(sizeof(*csw));
57 csw->nmem = nmem;
58 csw->removed_items = 0;
59 csw->blank_chars = xstrdup(" \r\n\t");
60 return csw;
61}
62
64{
65 if (csw)
66 {
67 nmem_destroy(csw->nmem);
68 xfree(csw->blank_chars);
69 xfree(csw);
70 }
71}
72
74 CCL_bibset bibset,
75 struct ccl_rpn_node *p)
76{
77 struct ccl_rpn_node *left, *right;
78 switch (p->kind)
79 {
80 case CCL_RPN_AND:
81 case CCL_RPN_OR:
82 case CCL_RPN_NOT:
83 case CCL_RPN_PROX:
84 left = ccl_remove_stop_r(csw, bibset, p->u.p[0]);
85 right = ccl_remove_stop_r(csw, bibset, p->u.p[1]);
86 if (!left || !right)
87 {
88 /* we must delete our binary node and return child (if any) */
89 p->u.p[0] = 0;
90 p->u.p[1] = 0;
92 if (left)
93 return left;
94 else
95 return right;
96 }
97 break;
98 case CCL_RPN_SET:
99 break;
100 case CCL_RPN_TERM:
101 if (p->u.t.term)
102 {
103 int found = 1;
104 while (found)
105 {
106 char *cp = p->u.t.term;
107 found = 0;
108 while (1)
109 {
110 while (*cp && strchr(csw->blank_chars, *cp))
111 cp++;
112 if (!*cp)
113 break;
114 else
115 {
116 char *cp0 = cp;
117 while (*cp && !strchr(csw->blank_chars, *cp))
118 cp++;
119 if (cp != cp0)
120 {
121 size_t len = cp - cp0;
122 if (ccl_search_stop(bibset, p->u.t.qual,
123 cp0, len))
124 {
126 cp0, len);
127 while (*cp && strchr(csw->blank_chars, *cp))
128 cp++;
129 memmove(cp0, cp, strlen(cp)+1);
130 found = 1;
131 break;
132 }
133 }
134 }
135 }
136 }
137 }
138 /* chop right blanks .. and see if term it gets empty */
139 if (p->u.t.term && csw->removed_items)
140 {
141 char *cp = p->u.t.term + strlen(p->u.t.term);
142 while (1)
143 {
144 if (cp == p->u.t.term)
145 {
146 /* term is empty / blank */
148 return 0;
149 }
150 if (!strchr(csw->blank_chars, cp[-1]))
151 break;
152 /* chop right */
153 cp[-1] = 0;
154 --cp;
155 }
156 }
157 break;
158 }
159 return p;
160}
161
163 CCL_bibset bibset, struct ccl_rpn_node **t)
164{
165 struct ccl_rpn_node *r;
166
167 /* remove list items */
168 nmem_reset(csw->nmem);
169 csw->removed_items = 0;
170
171 r = ccl_remove_stop_r(csw, bibset, *t);
172 *t = r;
173 if (csw->removed_items)
174 return 1;
175 return 0;
176}
177
179 const char **qualname, const char **term)
180{
181 struct ccl_stop_info *csi = csw->removed_items;
182 int i = 0;
183 while (csi && i < idx)
184 {
185 csi = csi->next;
186 i++;
187 }
188 if (csi)
189 {
190 *qualname = csi->qualname;
191 *term = csi->term;
192 return 1;
193 }
194 return 0;
195}
196
197/*
198 * Local variables:
199 * c-basic-offset: 4
200 * c-file-style: "Stroustrup"
201 * indent-tabs-mode: nil
202 * End:
203 * vim: shiftwidth=4 tabstop=8 expandtab
204 */
205
Header with public definitions for CCL.
struct ccl_stop_words * ccl_stop_words_t
stop words handle (pimpl)
Definition ccl.h:288
@ CCL_RPN_AND
Definition ccl.h:119
@ CCL_RPN_TERM
Definition ccl.h:122
@ CCL_RPN_PROX
Definition ccl.h:124
@ CCL_RPN_NOT
Definition ccl.h:121
@ CCL_RPN_SET
Definition ccl.h:123
@ CCL_RPN_OR
Definition ccl.h:120
int ccl_stop_words_tree(ccl_stop_words_t csw, CCL_bibset bibset, struct ccl_rpn_node **t)
removes stop words from RPN tree
struct ccl_rpn_node * ccl_remove_stop_r(ccl_stop_words_t csw, CCL_bibset bibset, struct ccl_rpn_node *p)
void ccl_stop_words_destroy(ccl_stop_words_t csw)
destroys stop words handle
ccl_stop_words_t ccl_stop_words_create(void)
creates stop words handle
static void append_removed_item(ccl_stop_words_t csw, const char *qname, const char *t, size_t len)
int ccl_stop_words_info(ccl_stop_words_t csw, int idx, const char **qualname, const char **term)
returns information about removed "stop" words
void ccl_rpn_delete(struct ccl_rpn_node *rpn)
Definition cclfind.c:141
int ccl_search_stop(CCL_bibset bibset, const char *qname, const char *src_str, size_t src_len)
Definition cclqual.c:413
void nmem_reset(NMEM n)
releases memory associaged with an NMEM handle
Definition nmem.c:129
NMEM nmem_create(void)
returns new NMEM handle
Definition nmem.c:181
void * nmem_malloc(NMEM n, size_t size)
allocates memory block on NMEM handle
Definition nmem.c:145
void nmem_destroy(NMEM n)
destroys NMEM handle and memory associated with it
Definition nmem.c:204
Header for Nibble Memory functions.
char * nmem_strdup(NMEM mem, const char *src)
allocates string on NMEM handle (similar strdup)
Definition nmemsdup.c:18
RPN tree structure node.
Definition ccl.h:128
struct ccl_rpn_node * p[3]
Boolean including proximity 0=left, 1=right, 2=prox parms.
Definition ccl.h:133
char * qual
Definition ccl.h:137
struct ccl_rpn_node::@8::@9 t
Attributes + Term.
enum ccl_rpn_kind kind
node type, one of CCL_RPN_AND, CCL_RPN_OR, etc
Definition ccl.h:130
union ccl_rpn_node::@8 u
char * term
Definition ccl.h:136
struct ccl_stop_info * next
struct ccl_stop_info * removed_items
#define xstrdup(s)
utility macro which calls xstrdup_f
Definition xmalloc.h:55
#define xfree(x)
utility macro which calls xfree_f
Definition xmalloc.h:53
#define xmalloc(x)
utility macro which calls malloc_f
Definition xmalloc.h:49