pazpar2  1.14.1
marchash.c
Go to the documentation of this file.
1 /* This file is part of Pazpar2.
2  Copyright (C) Index Data
3 
4 Pazpar2 is free software; you can redistribute it and/or modify it under
5 the terms of the GNU General Public License as published by the Free
6 Software Foundation; either version 2, or (at your option) any later
7 version.
8 
9 Pazpar2 is distributed in the hope that it will be useful, but WITHOUT ANY
10 WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 for more details.
13 
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 
18 */
19 
24 #if HAVE_CONFIG_H
25 #include <config.h>
26 #else
27 /* disable inline if AC_C_INLINE is not in use (Windows) */
28 #define inline
29 #endif
30 
31 #include <stdlib.h>
32 #include <stdio.h>
33 #include <string.h>
34 #include <ctype.h>
35 
36 #include <libxml/tree.h>
37 #include <libxml/parser.h>
38 #include <yaz/nmem.h>
39 
40 #include "jenkins_hash.h"
41 #include "marchash.h"
42 
43 static inline void strtrimcat(char *dest, const char *src)
44 {
45  const char *in;
46  char *out;
47  char *last_nonspace;
48  in = src;
49  out = dest;
50  // move to end of dest
51  while (*out)
52  out++;
53  // initialise last non-space charater
54  last_nonspace = out;
55  // skip leading whitespace
56  while (isspace(*in))
57  in++;
58  while (*in)
59  {
60  *out = *in;
61  if (!isspace(*in))
62  last_nonspace = out;
63  out++;
64  in++;
65  }
66  *(++last_nonspace) = '\0';
67 }
68 
69 static inline void strtrimcpy(char *dest, const char *src)
70 {
71  *dest = '\0';
72  strtrimcat(dest, src);
73 }
74 
76 {
77  struct marchash *new;
78  new = nmem_malloc(nmem, sizeof (struct marchash));
79  memset(new, 0, sizeof (struct marchash));
80  new->nmem = nmem;
81  return new;
82 }
83 
84 void marchash_ingest_marcxml(struct marchash *marchash, xmlNodePtr rec_node)
85 {
86  xmlNodePtr field_node;
87  xmlNodePtr sub_node;
88  struct marcfield *field;
89  field_node = rec_node->children;
90 
91  while (field_node)
92  {
93  if (field_node->type == XML_ELEMENT_NODE)
94  {
95  field = NULL;
96  if (!strcmp((const char *) field_node->name, "controlfield"))
97  {
98  xmlChar *content = xmlNodeGetContent(field_node);
99  xmlChar *tag = xmlGetProp(field_node, BAD_CAST "tag");
100  if (tag && content)
101  field = marchash_add_field(
102  marchash, (const char *) tag, (const char *) content);
103  xmlFree(content);
104  xmlFree(tag);
105  }
106  else if (!strcmp((const char *) field_node->name, "datafield"))
107  {
108  xmlChar *content = xmlNodeGetContent(field_node);
109  xmlChar *tag = xmlGetProp(field_node, BAD_CAST "tag");
110  if (tag && content)
111  field = marchash_add_field(
112  marchash, (const char *) tag, (const char *) content);
113  xmlFree(content);
114  xmlFree(tag);
115  }
116  if (field)
117  {
118  sub_node = field_node->children;
119  while (sub_node)
120  {
121  if ((sub_node->type == XML_ELEMENT_NODE) &&
122  !strcmp((const char *) sub_node->name, "subfield"))
123  {
124  xmlChar *content = xmlNodeGetContent(sub_node);
125  xmlChar *code = xmlGetProp(sub_node, BAD_CAST "code");
126  if (code && content)
128  marchash, field,
129  code[0], (const char *) content);
130  xmlFree(content);
131  xmlFree(code);
132  }
133  sub_node = sub_node->next;
134  }
135  }
136  }
137  field_node = field_node->next;
138  }
139 }
140 
142  const char *key, const char *val)
143 {
144  int slot;
145  struct marcfield *new;
146  struct marcfield *last;
147 
148  slot = jenkins_hash((const unsigned char *) key) & MARCHASH_MASK;
149  new = marchash->table[slot];
150  last = NULL;
151 
152  while (new)
153  {
154  last = new;
155  new = new->next;
156  }
157 
158  new = nmem_malloc(marchash->nmem, sizeof (struct marcfield));
159 
160  if (last)
161  last->next = new;
162  else
163  marchash->table[slot] = new;
164 
165  new->next = NULL;
166  new->subfields = NULL;
167  strncpy(new->key, key, 4);
168 
169  // only 3 char in a marc field name
170  if (new->key[3] != '\0')
171  return 0;
172 
173  new->val = nmem_malloc(marchash->nmem, sizeof (char) * strlen(val) + 1);
174  strtrimcpy(new->val, val);
175 
176  return new;
177 }
178 
180  struct marcfield *field,
181  const char key, const char *val)
182 {
183  struct marcsubfield *new;
184  struct marcsubfield *last;
185  last = NULL;
186  new = field->subfields;
187 
188  while (new)
189  {
190  last = new;
191  new = new->next;
192  }
193 
194  new = nmem_malloc(marchash->nmem, sizeof (struct marcsubfield));
195 
196  if (last)
197  last->next = new;
198  else
199  field->subfields = new;
200 
201  new->next = NULL;
202  new->key = key;
203  new->val = nmem_malloc(marchash->nmem, sizeof (char) * strlen(val) + 1);
204  strcpy(new->val, val);
205  return new;
206 }
207 
209  const char *key, struct marcfield *last)
210 {
211  struct marcfield *cur;
212  if (last)
213  cur = last->next;
214  else
215  cur = marchash->table[jenkins_hash((const unsigned char *)key) & MARCHASH_MASK];
216  while (cur)
217  {
218  if (!strcmp(cur->key, key))
219  return cur;
220  cur = cur->next;
221  }
222  return NULL;
223 }
224 
226  struct marcfield *field,
227  struct marcsubfield *last)
228 {
229  struct marcsubfield *cur;
230  if (last)
231  cur = last->next;
232  else
233  cur = field->subfields;
234  while (cur)
235  {
236  if (cur->key == key)
237  return cur;
238  cur = cur->next;
239  }
240  return NULL;
241 }
242 
244  const char *delim, NMEM nmem)
245 {
246  char *output;
247  struct marcsubfield *cur;
248  int delimsize = strlen(delim);
249  int outsize = 1-delimsize;
250  // maybe it would make sense to have an nmem strcpy/strcat?
251  cur = field -> subfields;
252  while (cur)
253  {
254  outsize += strlen(cur->val) + delimsize;
255  cur = cur->next;
256  }
257  if (outsize > 0)
258  output = nmem_malloc(nmem, outsize);
259  else
260  return NULL;
261  *output = '\0';
262  cur = field -> subfields;
263  while (cur)
264  {
265  strtrimcat(output, cur->val);
266  if (cur->next)
267  strcat(output, delim);
268  cur = cur->next;
269  }
270  return output;
271 }
272 /*
273  * Local variables:
274  * c-basic-offset: 4
275  * c-file-style: "Stroustrup"
276  * indent-tabs-mode: nil
277  * End:
278  * vim: shiftwidth=4 tabstop=8 expandtab
279  */
unsigned int jenkins_hash(const unsigned char *key)
Definition: jenkins_hash.c:31
struct marcfield * marchash_add_field(struct marchash *marchash, const char *key, const char *val)
Definition: marchash.c:141
char * marchash_catenate_subfields(struct marcfield *field, const char *delim, NMEM nmem)
Definition: marchash.c:243
struct marcsubfield * marchash_add_subfield(struct marchash *marchash, struct marcfield *field, const char key, const char *val)
Definition: marchash.c:179
static void strtrimcat(char *dest, const char *src)
Definition: marchash.c:43
struct marcsubfield * marchash_get_subfield(char key, struct marcfield *field, struct marcsubfield *last)
Definition: marchash.c:225
struct marcfield * marchash_get_field(struct marchash *marchash, const char *key, struct marcfield *last)
Definition: marchash.c:208
static void strtrimcpy(char *dest, const char *src)
Definition: marchash.c:69
struct marchash * marchash_create(NMEM nmem)
Definition: marchash.c:75
void marchash_ingest_marcxml(struct marchash *marchash, xmlNodePtr rec_node)
Definition: marchash.c:84
#define MARCHASH_MASK
Definition: marchash.h:23
struct marcfield * next
Definition: marchash.h:36
char * val
Definition: marchash.h:34
struct marcsubfield * subfields
Definition: marchash.h:35
char key[4]
Definition: marchash.h:33
struct marcfield * table[MARCHASH_MASK+1]
Definition: marchash.h:27
NMEM nmem
Definition: marchash.h:28
char key
Definition: marchash.h:41
struct marcsubfield * next
Definition: marchash.h:43
char * val
Definition: marchash.h:42