pazpar2  1.14.1
marcmap.c
Go to the documentation of this file.
1 /* This file is part of Pazpar2.
2  Copyright (C) Index Data
3 
4 Pazpar2 is free software; you can redistribute it and/or modify it under
5 the terms of the GNU General Public License as published by the Free
6 Software Foundation; either version 2, or (at your option) any later
7 version.
8 
9 Pazpar2 is distributed in the hope that it will be useful, but WITHOUT ANY
10 WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 for more details.
13 
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 
18 */
19 
24 #if HAVE_CONFIG_H
25 #include <config.h>
26 #endif
27 
28 #include <stdlib.h>
29 #include <stdio.h>
30 #include <string.h>
31 
32 #include <libxml/parser.h>
33 #include <libxml/tree.h>
34 
35 #include <yaz/nmem.h>
36 
37 #include "marcmap.h"
38 #include "marchash.h"
39 
40 struct marcmap *marcmap_load(const char *filename, NMEM nmem)
41 {
42  struct marcmap *mmhead;
43  struct marcmap *mm = 0, **mmp = &mmhead;
44  FILE *fp;
45  int c;
46  char buf[256];
47  int len = 0;
48  int field = 0;
49 
50  fp = fopen(filename, "r");
51  if (!fp)
52  return mm;
53 
54  while ((c = getc(fp) ) != EOF)
55  {
56  // allocate some space
57  if (!mm)
58  {
59  *mmp = mm = nmem_malloc(nmem, sizeof(struct marcmap));
60  mmp = &mm->next;
61  }
62  // whitespace saves and moves on
63  if (c == ' ' || c == '\n' || c == '\t')
64  {
65  buf[len] = '\0';
66  len++;
67  // first field, marc
68  if (field == 0)
69  {
70  // allow blank lines
71  if (!(len <3))
72  {
73  mm->field = nmem_malloc(nmem, len * sizeof(char));
74  strncpy(mm->field, buf, len);
75  }
76  }
77  // second, marc subfield, just a char
78  else if (field == 1)
79  {
80  mm->subfield = buf[len-2];
81  }
82  // third, pz fieldname
83  else if (field == 2)
84  {
85  mm->pz = nmem_malloc(nmem, len * sizeof(char));
86  strncpy(mm->pz, buf, len);
87  }
88 
89  // new line, new record
90  if (c == '\n')
91  {
92  field = 0;
93  mm = 0;
94  }
95  else
96  {
97  field++;
98  }
99  len = 0;
100  }
101  else
102  {
103  buf[len] = c;
104  len++;
105  }
106  }
107  *mmp = NULL;
108  fclose(fp);
109  return mmhead;
110 }
111 
112 xmlDoc *marcmap_apply(struct marcmap *marcmap, xmlDoc *xml_in)
113 {
114  char mergekey[1024];
115  char medium[32];
116  char *s;
117  NMEM nmem;
118  xmlNsPtr ns_pz;
119  xmlDocPtr xml_out;
120  xmlNodePtr xml_out_root;
121  xmlNodePtr rec_node;
122  xmlNodePtr meta_node;
123  struct marchash *marchash;
124  struct marcfield *field;
125  struct marcsubfield *subfield;
126  struct marcmap *mmcur;
127 
128  xml_out = xmlNewDoc(BAD_CAST "1.0");
129  xml_out->encoding = xmlCharStrdup("UTF-8");
130  xml_out_root = xmlNewNode(NULL, BAD_CAST "record");
131  xmlDocSetRootElement(xml_out, xml_out_root);
132  ns_pz = xmlNewNs(xml_out_root, BAD_CAST "http://www.indexdata.com/pazpar2/1.0", BAD_CAST "pz");
133  xmlSetNs(xml_out_root, ns_pz);
134  nmem = nmem_create();
135  rec_node = xmlDocGetRootElement(xml_in);
136  marchash = marchash_create(nmem);
138 
139  mmcur = marcmap;
140  while (mmcur != NULL)
141  {
142  field = 0;
143  while ((field = marchash_get_field(marchash, mmcur->field, field)) != 0)
144  {
145  // field value
146  if ((mmcur->subfield == '$') && (s = field->val))
147  {
148  meta_node = xmlNewChild(xml_out_root, ns_pz, BAD_CAST "metadata", BAD_CAST s);
149  xmlSetProp(meta_node, BAD_CAST "type", BAD_CAST mmcur->pz);
150  }
151  // catenate all subfields
152  else if ((mmcur->subfield == '*') && (s = marchash_catenate_subfields(field, " ", nmem)))
153  {
154  meta_node = xmlNewChild(xml_out_root, ns_pz, BAD_CAST "metadata", BAD_CAST s);
155  xmlSetProp(meta_node, BAD_CAST "type", BAD_CAST mmcur->pz);
156  }
157  // subfield value
158  else if (mmcur->subfield)
159  {
160  subfield = 0;
161  while ((subfield =
163  field, subfield)) != 0)
164  {
165  if ((s = subfield->val) != 0)
166  {
167  meta_node = xmlNewChild(xml_out_root, ns_pz, BAD_CAST "metadata", BAD_CAST s);
168  xmlSetProp(meta_node, BAD_CAST "type", BAD_CAST mmcur->pz);
169  }
170  }
171  }
172 
173  }
174  mmcur = mmcur->next;
175  }
176 
177  // hard coded mappings
178 
179  // medium
180  if ((field = marchash_get_field(marchash, "245", NULL)) && (subfield = marchash_get_subfield('h', field, NULL)))
181  {
182  strncpy(medium, subfield->val, 32);
183  }
184  else if ((field = marchash_get_field(marchash, "900", NULL)) && (subfield = marchash_get_subfield('a', field, NULL)))
185  strcpy(medium, "electronic resource");
186  else if ((field = marchash_get_field(marchash, "900", NULL)) && (subfield = marchash_get_subfield('b', field, NULL)))
187  strcpy(medium, "electronic resource");
188  else if ((field = marchash_get_field(marchash, "773", NULL)) && (subfield = marchash_get_subfield('t', field, NULL)))
189  strcpy(medium, "article");
190  else
191  strcpy(medium, "book");
192 
193  meta_node = xmlNewChild(xml_out_root, ns_pz, BAD_CAST "metadata", BAD_CAST medium);
194  xmlSetProp(meta_node, BAD_CAST "type", BAD_CAST "medium");
195 
196  // merge key
197  memset(mergekey, 0, 1024);
198  strcpy(mergekey, "title ");
199  if ((field = marchash_get_field(marchash, "245", NULL)) && (subfield = marchash_get_subfield('a', field, NULL)))
200  strncat(mergekey, subfield->val, 1023 - strlen(mergekey));
201  strncat(mergekey, " author ", 1023 - strlen(mergekey));
202  if ((field = marchash_get_field(marchash, "100", NULL)) && (subfield = marchash_get_subfield('a', field, NULL)))
203  strncat(mergekey, subfield->val, 1023 - strlen(mergekey));
204  strncat(mergekey, " medium ", 1023 - strlen(mergekey));
205  strncat(mergekey, medium, 1023 - strlen(mergekey));
206 
207 // xmlSetProp(xml_out_root, BAD_CAST "mergekey", BAD_CAST mergekey);
208 
209  nmem_destroy(nmem);
210  return xml_out;
211 }
212 
213 /*
214  * Local variables:
215  * c-basic-offset: 4
216  * c-file-style: "Stroustrup"
217  * indent-tabs-mode: nil
218  * End:
219  * vim: shiftwidth=4 tabstop=8 expandtab
220  */
char * marchash_catenate_subfields(struct marcfield *field, const char *delim, NMEM nmem)
Definition: marchash.c:243
struct marcsubfield * marchash_get_subfield(char key, struct marcfield *field, struct marcsubfield *last)
Definition: marchash.c:225
struct marcfield * marchash_get_field(struct marchash *marchash, const char *key, struct marcfield *last)
Definition: marchash.c:208
struct marchash * marchash_create(NMEM nmem)
Definition: marchash.c:75
void marchash_ingest_marcxml(struct marchash *marchash, xmlNodePtr rec_node)
Definition: marchash.c:84
struct marcmap * marcmap_load(const char *filename, NMEM nmem)
Definition: marcmap.c:40
xmlDoc * marcmap_apply(struct marcmap *marcmap, xmlDoc *xml_in)
Definition: marcmap.c:112
char * field
Definition: marcmap.h:25
struct marcmap * next
Definition: marcmap.h:28
char * pz
Definition: marcmap.h:27
char subfield
Definition: marcmap.h:26