metaproxy  1.21.0
filter_http_file.cpp
Go to the documentation of this file.
1 /* This file is part of Metaproxy.
2  Copyright (C) Index Data
3 
4 Metaproxy is free software; you can redistribute it and/or modify it under
5 the terms of the GNU General Public License as published by the Free
6 Software Foundation; either version 2, or (at your option) any later
7 version.
8 
9 Metaproxy is distributed in the hope that it will be useful, but WITHOUT ANY
10 WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 for more details.
13 
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18 
19 #include "config.hpp"
20 #include <metaproxy/filter.hpp>
21 #include <metaproxy/package.hpp>
22 #include <metaproxy/util.hpp>
23 #include "filter_http_file.hpp"
24 #include <sys/stat.h>
25 
26 #include <yaz/zgdu.h>
27 #include <yaz/log.h>
28 
29 #include <boost/thread/mutex.hpp>
30 
31 #include <list>
32 #include <map>
33 #include <iostream>
34 
35 #if HAVE_SYS_TYPES_H
36 #include <sys/types.h>
37 #endif
38 
39 #if HAVE_SYS_STAT_H
40 #include <sys/stat.h>
41 #endif
42 
43 namespace mp = metaproxy_1;
44 namespace yf = mp::filter;
45 
46 namespace metaproxy_1 {
47  namespace filter {
48  struct HttpFile::Area {
49  std::string m_url_path_prefix;
50  std::string m_file_root;
51  bool m_raw;
52  bool m_passthru;
53  Area();
54  };
56  friend class Rep;
57  std::string m_type;
58  public:
59  Mime(std::string type);
60  Mime();
61  };
62  class HttpFile::Rep {
63  friend class HttpFile;
64 
65  typedef std::list<Area> AreaList;
66  typedef std::map<std::string,Mime> MimeMap;
67 
70  void fetch_uri(mp::Session &session,
71  Z_HTTP_Request *req, mp::Package &package);
72  void fetch_file(mp::Session &session,
73  Z_HTTP_Request *req,
74  std::string &fname, mp::Package &package,
75  bool raw, bool passthru);
76  std::string get_mime_type(std::string &fname);
77  };
78  }
79 }
80 
81 yf::HttpFile::Area::Area() : m_raw(false), m_passthru(false)
82 {
83 }
84 
85 yf::HttpFile::Mime::Mime() {}
86 
87 yf::HttpFile::Mime::Mime(std::string type) : m_type(type) {}
88 
89 yf::HttpFile::HttpFile() : m_p(new Rep)
90 {
91 #if 0
92  m_p->m_ext_to_map["html"] = Mime("text/html");
93  m_p->m_ext_to_map["htm"] = Mime("text/html");
94  m_p->m_ext_to_map["png"] = Mime("image/png");
95  m_p->m_ext_to_map["txt"] = Mime("text/plain");
96  m_p->m_ext_to_map["text"] = Mime("text/plain");
97  m_p->m_ext_to_map["asc"] = Mime("text/plain");
98  m_p->m_ext_to_map["xml"] = Mime("application/xml");
99  m_p->m_ext_to_map["xsl"] = Mime("application/xml");
100 #endif
101 #if 0
102  Area a;
103  a.m_url_path_prefix = "/etc";
104  a.m_file_root = ".";
105  m_p->m_area_list.push_back(a);
106 #endif
107 }
108 
109 yf::HttpFile::~HttpFile()
110 {
111 }
112 
113 std::string yf::HttpFile::Rep::get_mime_type(std::string &fname)
114 {
115  std::string file_part = fname;
116  std::string::size_type p = fname.find_last_of('/');
117 
118  if (p != std::string::npos)
119  file_part = fname.substr(p+1);
120 
121  p = file_part.find_last_of('.');
122  std::string content_type;
123  if (p != std::string::npos)
124  {
125  std::string ext = file_part.substr(p+1);
126  MimeMap::const_iterator it = m_ext_to_map.find(ext);
127 
128  if (it != m_ext_to_map.end())
129  content_type = it->second.m_type;
130  }
131  if (content_type.length() == 0)
132  content_type = "application/octet-stream";
133  return content_type;
134 }
135 
136 void yf::HttpFile::Rep::fetch_file(mp::Session &session,
137  Z_HTTP_Request *req,
138  std::string &fname, mp::Package &package,
139  bool raw, bool passthru)
140 {
141  mp::odr o(ODR_ENCODE);
142 
143  if (strcmp(req->method, "GET"))
144  {
145  if (passthru)
146  {
147  package.move();
148  }
149  else
150  {
151  Z_GDU *gdu = o.create_HTTP_Response(session, req, 405);
152  package.response() = gdu;
153  }
154  return;
155  }
156 
157  struct stat st;
158  if (stat(fname.c_str(), &st) == -1 || (st.st_mode & S_IFMT) != S_IFREG)
159  {
160  if (passthru)
161  {
162  package.move();
163  }
164  else
165  {
166  Z_GDU *gdu = o.create_HTTP_Response(session, req, 404);
167  package.response() = gdu;
168  }
169  return;
170  }
171 
172  FILE *f = fopen(fname.c_str(), "rb");
173  if (!f)
174  {
175  Z_GDU *gdu = o.create_HTTP_Response(session, req, 404);
176  package.response() = gdu;
177  return;
178  }
179  if (fseek(f, 0L, SEEK_END) == -1)
180  {
181  fclose(f);
182  Z_GDU *gdu = o.create_HTTP_Response(session, req, 404);
183  package.response() = gdu;
184  return;
185  }
186  long sz = ftell(f);
187  if (sz > 1000000L)
188  {
189  fclose(f);
190  Z_GDU *gdu = o.create_HTTP_Response(session, req, 404);
191  package.response() = gdu;
192  return;
193  }
194  rewind(f);
195  char *fbuf = (char*) odr_malloc(o, sz);
196  if (sz > 0)
197  {
198  if (fread(fbuf, sz, 1, f) != 1)
199  {
200  Z_GDU *gdu = o.create_HTTP_Response(session, req, 500);
201  package.response() = gdu;
202  fclose(f);
203  return;
204  }
205  }
206  fclose(f);
207 
208  Z_GDU *gdu = 0;
209  mp::odr decode(ODR_DECODE);
210  if (raw)
211  {
212  odr_setbuf(decode, (char *) fbuf, sz, 0);
213  int r = z_GDU(decode, &gdu, 0, 0);
214  if (!r)
215  {
216  gdu = o.create_HTTP_Response(session, req, 500);
217  }
218  }
219  else
220  {
221  gdu = o.create_HTTP_Response(session, req, 200);
222  Z_HTTP_Response *hres = gdu->u.HTTP_Response;
223  hres->content_len = sz;
224  hres->content_buf = fbuf;
225  std::string content_type = get_mime_type(fname);
226  z_HTTP_header_add(o, &hres->headers,
227  "Content-Type", content_type.c_str());
228  }
229  package.response() = gdu;
230 }
231 
232 void yf::HttpFile::Rep::fetch_uri(mp::Session &session,
233  Z_HTTP_Request *req, mp::Package &package)
234 {
235  bool sane = true;
236  std::string::size_type p;
237  std::string path = req->path;
238 
239  p = path.find("#");
240  if (p != std::string::npos)
241  path = path.erase(p);
242 
243  p = path.find("?");
244  if (p != std::string::npos)
245  path = path.erase(p);
246 
247  path = mp::util::uri_decode(path);
248 
249  // we don't allow ..
250  p = path.find("..");
251  if (p != std::string::npos)
252  sane = false;
253 
254  if (sane)
255  {
256  AreaList::const_iterator it;
257  for (it = m_area_list.begin(); it != m_area_list.end(); it++)
258  {
259  std::string::size_type l = it->m_url_path_prefix.length();
260 
261  if (path.compare(0, l, it->m_url_path_prefix) == 0)
262  {
263  std::string fname = it->m_file_root + path.substr(l);
264  package.log("http_file", YLOG_LOG, "%s", fname.c_str());
265  fetch_file(session, req, fname, package, it->m_raw,
266  it->m_passthru);
267  return;
268  }
269  }
270  }
271  package.move();
272 }
273 
274 void yf::HttpFile::process(mp::Package &package) const
275 {
276  Z_GDU *gdu = package.request().get();
277  if (gdu && gdu->which == Z_GDU_HTTP_Request)
278  m_p->fetch_uri(package.session(), gdu->u.HTTP_Request, package);
279  else
280  package.move();
281 }
282 
283 void mp::filter::HttpFile::configure(const xmlNode * ptr, bool test_only,
284  const char *path)
285 {
286  for (ptr = ptr->children; ptr; ptr = ptr->next)
287  {
288  if (ptr->type != XML_ELEMENT_NODE)
289  continue;
290  if (!strcmp((const char *) ptr->name, "mimetypes"))
291  {
292  std::string fname = mp::xml::get_text(ptr);
293 
294  mp::PlainFile f;
295 
296  if (!f.open(fname))
297  {
298  throw mp::filter::FilterException
299  ("Can not open mime types file " + fname);
300  }
301 
302  std::vector<std::string> args;
303  while (f.getline(args))
304  {
305  size_t i;
306  for (i = 1; i<args.size(); i++)
307  m_p->m_ext_to_map[args[i]] = args[0];
308  }
309  }
310  else if (!strcmp((const char *) ptr->name, "area"))
311  {
312  xmlNode *a_node = ptr->children;
313  Area a;
314 
315  for (; a_node; a_node = a_node->next)
316  {
317  if (a_node->type != XML_ELEMENT_NODE)
318  continue;
319 
320  if (mp::xml::is_element_mp(a_node, "documentroot"))
321  a.m_file_root = mp::xml::get_text(a_node);
322  else if (mp::xml::is_element_mp(a_node, "prefix"))
323  a.m_url_path_prefix = mp::xml::get_text(a_node);
324  else if (mp::xml::is_element_mp(a_node, "raw"))
325  a.m_raw = mp::xml::get_bool(a_node, false);
326  else if (mp::xml::is_element_mp(a_node, "passthru"))
327  a.m_passthru = mp::xml::get_bool(a_node, false);
328  else
329  throw mp::filter::FilterException
330  ("Bad element "
331  + std::string((const char *) a_node->name)
332  + " in area section"
333  );
334  }
335  if (a.m_file_root.length())
336  {
337  m_p->m_area_list.push_back(a);
338  }
339  }
340  else
341  {
342  throw mp::filter::FilterException
343  ("Bad element "
344  + std::string((const char *) ptr->name)
345  + " in virt_db filter");
346  }
347  }
348 }
349 
350 static mp::filter::Base* filter_creator()
351 {
352  return new mp::filter::HttpFile;
353 }
354 
355 extern "C" {
356  struct metaproxy_1_filter_struct metaproxy_1_filter_http_file = {
357  0,
358  "http_file",
360  };
361 }
362 
363 
364 /*
365  * Local variables:
366  * c-basic-offset: 4
367  * c-file-style: "Stroustrup"
368  * indent-tabs-mode: nil
369  * End:
370  * vim: shiftwidth=4 tabstop=8 expandtab
371  */
372 
void fetch_uri(mp::Session &session, Z_HTTP_Request *req, mp::Package &package)
std::map< std::string, Mime > MimeMap
std::string get_mime_type(std::string &fname)
void fetch_file(mp::Session &session, Z_HTTP_Request *req, std::string &fname, mp::Package &package, bool raw, bool passthru)
boost::scoped_ptr< Rep > m_p
static mp::filter::Base * filter_creator()
struct metaproxy_1_filter_struct metaproxy_1_filter_http_file