metaproxy 1.22.1
filter_http_rewrite.cpp
Go to the documentation of this file.
1/* This file is part of Metaproxy.
2 Copyright (C) Index Data
3
4Metaproxy is free software; you can redistribute it and/or modify it under
5the terms of the GNU General Public License as published by the Free
6Software Foundation; either version 2, or (at your option) any later
7version.
8
9Metaproxy is distributed in the hope that it will be useful, but WITHOUT ANY
10WARRANTY; without even the implied warranty of MERCHANTABILITY or
11FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12for more details.
13
14You should have received a copy of the GNU General Public License
15along with this program; if not, write to the Free Software
16Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17*/
18
19#include "config.hpp"
20#include <metaproxy/filter.hpp>
21#include <metaproxy/package.hpp>
22#include <metaproxy/util.hpp>
24#include "html_parser.hpp"
25
26#include <yaz/zgdu.h>
27#include <yaz/log.h>
28
29#include <stack>
30#include <boost/regex.hpp>
31#include <boost/lexical_cast.hpp>
32#include <boost/algorithm/string.hpp>
33
34#include <map>
35#include <stdlib.h>
36
37namespace mp = metaproxy_1;
38namespace yf = mp::filter;
39
40namespace metaproxy_1 {
41 namespace filter {
43 public:
45 boost::regex re;
46 std::string recipe;
47 std::map<int, std::string> group_index;
48 std::string sub_vars(
49 const std::map<std::string, std::string> & vars) const;
50 void parse_groups(std::string pattern);
51 };
52
54 public:
55 std::list<Replace> replace_list;
56 bool test_patterns(
57 std::map<std::string, std::string> &vars,
58 std::string &txt, bool anchor,
59 std::list<boost::regex> &skip_list);
60 };
62 public:
63 boost::regex header;
64 boost::regex attr;
65 boost::regex tag;
66 std::string type;
67 bool reqline;
69 bool exec(std::map<std::string, std::string> &vars,
70 std::string &txt, bool anchor,
71 std::list<boost::regex> &skip_list) const;
72 };
73
75 public:
76 std::string type;
77 boost::regex content_re;
78 std::list<Within> within_list;
79 void configure(const xmlNode *ptr,
80 std::map<std::string, RulePtr > &rules);
81 void quoted_literal(std::string &content,
82 std::map<std::string, std::string> &vars,
83 std::list<boost::regex> & skip_list) const;
84 void parse(int verbose, std::string &content,
85 std::map<std::string, std::string> & vars,
86 std::list<boost::regex> & skip_list ) const;
87 };
89 public:
90 Phase();
92 std::list<Content> content_list;
93 void read_skip_headers(Z_HTTP_Request *hreq,
94 std::list<boost::regex> &skip_list, std::string bind_addr);
95 void rewrite_reqline(mp::odr & o, Z_HTTP_Request *hreq,
96 std::map<std::string, std::string> & vars, std::string bind_addr) const;
97 void rewrite_headers(mp::odr & o, Z_HTTP_Header *headers,
98 std::map<std::string, std::string> & vars) const;
99 void rewrite_body(mp::odr & o,
100 const char *content_type,
101 char **content_buf, int *content_len,
102 std::map<std::string, std::string> & vars,
103 std::list<boost::regex> & skip_list ) const;
104 };
106 void openTagStart(const char *tag, int tag_len);
107 void anyTagEnd(const char *tag, int tag_len, int close_it);
108 void attribute(const char *tag, int tag_len,
109 const char *attr, int attr_len,
110 const char *value, int val_len,
111 const char *sep);
112 void closeTag(const char *tag, int tag_len);
113 void text(const char *value, int len);
115 WRBUF m_w;
116 std::stack<std::list<Within>::const_iterator> s_within;
117 std::map<std::string, std::string> &m_vars;
118 std::list<boost::regex> & m_skips;
119 public:
120 Event(const Content *p,
121 std::map<std::string, std::string> &vars,
122 std::list<boost::regex> & skip_list );
123 ~Event();
124 const char *result();
125 };
126 }
127}
128
129yf::HttpRewrite::HttpRewrite() :
130 req_phase(new Phase), res_phase(new Phase)
131{
132}
133
134yf::HttpRewrite::~HttpRewrite()
135{
136}
137
138void yf::HttpRewrite::process(mp::Package & package) const
139{
140 yaz_log(YLOG_LOG, "HttpRewrite begins....");
141 Z_GDU *gdu = package.request().get();
142 //map of request/response vars
143 std::map<std::string, std::string> vars;
144 //we have an http req
145
146 std::list<boost::regex> skip_list;
147
148 if (gdu && gdu->which == Z_GDU_HTTP_Request)
149 {
150 Z_HTTP_Request *hreq = gdu->u.HTTP_Request;
151 mp::odr o;
152 std::string bind_addr = package.origin(). get_bind_address();
153 req_phase->rewrite_reqline(o, hreq, vars, bind_addr);
154 res_phase->read_skip_headers(hreq, skip_list, bind_addr);
155 yaz_log(YLOG_LOG, ">> Request headers");
156 req_phase->rewrite_headers(o, hreq->headers, vars);
157 req_phase->rewrite_body(o,
158 z_HTTP_header_lookup(hreq->headers,
159 "Content-Type"),
160 &hreq->content_buf, &hreq->content_len,
161 vars, skip_list);
162 package.request() = gdu;
163 }
164 package.move();
165 gdu = package.response().get();
166 if (gdu && gdu->which == Z_GDU_HTTP_Response)
167 {
168 Z_HTTP_Response *hres = gdu->u.HTTP_Response;
169 yaz_log(YLOG_LOG, "Response code %d", hres->code);
170 mp::odr o;
171 yaz_log(YLOG_LOG, "<< Respose headers");
172 res_phase->rewrite_headers(o, hres->headers, vars);
173 res_phase->rewrite_body(o,
174 z_HTTP_header_lookup(hres->headers,
175 "Content-Type"),
176 &hres->content_buf, &hres->content_len,
177 vars, skip_list);
178 package.response() = gdu;
179 }
180}
181
182// Read (and remove) the X-Metaproxy-SkipLink headers
183void yf::HttpRewrite::Phase::read_skip_headers(Z_HTTP_Request *hreq,
184 std::list<boost::regex> &skip_list,
185 std::string bind_addr )
186{
187 std::string url(hreq->path);
188 if ( url.substr(0,7) != "http://" && url.substr(0,8) != "https://")
189 { // path was relative, as it often is
190 // make absolute, so we can match the page regex against it
191 const char *host = z_HTTP_header_lookup(hreq->headers, "Host");
192 std::string proto;
193 if (bind_addr.find("ssl:") == 0) {
194 proto = "https";
195 } else {
196 proto = "http";
197 }
198 if (host)
199 url = proto + "://" + std::string(host) + hreq->path ;
200 }
201
202 while ( const char *hv = z_HTTP_header_remove( &(hreq->headers),
203 "X-Metaproxy-SkipLink") )
204 {
205 yaz_log(YLOG_LOG,"Found SkipLink '%s'", hv );
206 const char *p = strchr(hv,' ');
207 if (!p)
208 continue; // should not happen
209 std::string page(hv,p);
210 std::string link(p+1);
211 boost::regex pagere(page);
212 if ( boost::regex_search(url, pagere) )
213 {
214 yaz_log(YLOG_LOG,"SkipLink '%s' matches URL %s",
215 page.c_str(), url.c_str() );
216 boost::regex linkre(link);
217 skip_list.push_back(linkre);
218 }
219 else
220 {
221 yaz_log(YLOG_LOG,"SkipLink ignored, '%s' does not match '%s'",
222 url.c_str(), page.c_str() );
223 }
224 }
225}
226
227
228void yf::HttpRewrite::Phase::rewrite_reqline (mp::odr & o,
229 Z_HTTP_Request *hreq,
230 std::map<std::string, std::string> & vars,
231 std::string bind_addr) const
232{
233 std::string proto;
234 if (bind_addr.find("ssl:") == 0) {
235 proto = "https";
236 } else {
237 proto = "http";
238 }
239 yaz_log(YLOG_LOG,"rewrite_reqline: p='%s' ba='%s'",
240 hreq->path, proto.c_str() );
241 std::string path;
242 if ((strstr(hreq->path, "http://") == hreq->path) ||
243 (strstr(hreq->path, "https://") == hreq->path) )
244 {
245 yaz_log(YLOG_LOG, "Path in the method line is absolute, "
246 "possibly a proxy request"); // the usual case with cf_proxy
247 path = hreq->path;
248 }
249 else
250 {
251 const char *host = z_HTTP_header_lookup(hreq->headers, "Host");
252 if (!host)
253 return;
254
255 path = proto + "://";
256 path += host;
257 path += hreq->path;
258 }
259
260 std::list<Content>::const_iterator cit = content_list.begin();
261 for (; cit != content_list.end(); cit++)
262 if (cit->type == "headers")
263 break;
264
265 if (cit == content_list.end())
266 return;
267
268 std::list<Within>::const_iterator it = cit->within_list.begin();
269 for (; it != cit->within_list.end(); it++)
270 if (it->reqline)
271 {
272 yaz_log(YLOG_LOG, "Proxy request URL is %s", path.c_str());
273 std::list<boost::regex> dummy_skip_list; // no skips here!
274 if (it->exec(vars, path, true, dummy_skip_list))
275 {
276 yaz_log(YLOG_LOG, "Rewritten request URL is %s", path.c_str());
277 hreq->path = odr_strdup(o, path.c_str());
278 }
279 }
280}
281
282void yf::HttpRewrite::Phase::rewrite_headers(mp::odr & o,
283 Z_HTTP_Header *headers,
284 std::map<std::string, std::string> & vars ) const
285{
286 std::list<Content>::const_iterator cit = content_list.begin();
287 for (; cit != content_list.end(); cit++)
288 if (cit->type == "headers")
289 break;
290
291 if (cit == content_list.end())
292 return;
293
294 for (Z_HTTP_Header *header = headers; header; header = header->next)
295 {
296 std::list<Within>::const_iterator it = cit->within_list.begin();
297 for (; it != cit->within_list.end(); it++)
298 {
299 if (!it->header.empty() &&
300 regex_match(header->name, it->header))
301 {
302 // Match and replace only the header value
303 std::string hval(header->value);
304 std::list<boost::regex> dummy_skip_list; // no skips here!
305 if (it->exec(vars, hval, true, dummy_skip_list))
306 {
307 header->value = odr_strdup(o, hval.c_str());
308 }
309 }
310 }
311 }
312}
313
314void yf::HttpRewrite::Phase::rewrite_body(
315 mp::odr &o,
316 const char *content_type,
317 char **content_buf,
318 int *content_len,
319 std::map<std::string, std::string> & vars,
320 std::list<boost::regex> & skip_list ) const
321{
322 if (*content_len == 0)
323 return;
324 if (!content_type) {
325 yaz_log(YLOG_LOG, "rewrite_body: null content_type, can not rewrite");
326 return;
327 }
328 std::list<Content>::const_iterator cit = content_list.begin();
329 for (; cit != content_list.end(); cit++)
330 {
331 yaz_log(YLOG_LOG, "rewrite_body: content_type=%s type=%s",
332 content_type, cit->type.c_str());
333 if (cit->type != "headers"
334 && regex_match(content_type, cit->content_re))
335 break;
336 }
337 if (cit == content_list.end()) {
338 yaz_log(YLOG_LOG,"rewrite_body: No content rule matched %s, not rewriting",
339 content_type );
340 return;
341 }
342
343 int i;
344 for (i = 0; i < *content_len; i++)
345 if ((*content_buf)[i] == 0) {
346 yaz_log(YLOG_LOG,"rewrite_body: Looks like binary stuff, not rewriting");
347 return; // binary content. skip
348 }
349
350 std::string content(*content_buf, *content_len);
351 cit->parse(m_verbose, content, vars, skip_list);
352 *content_buf = odr_strdup(o, content.c_str());
353 *content_len = strlen(*content_buf);
354}
355
356yf::HttpRewrite::Event::Event(const Content *p,
357 std::map<std::string, std::string> & vars,
358 std::list<boost::regex> & skip_list
359 ) : m_content(p), m_vars(vars), m_skips(skip_list)
360{
361 m_w = wrbuf_alloc();
362}
363
364yf::HttpRewrite::Event::~Event()
365{
366 wrbuf_destroy(m_w);
367}
368
369const char *yf::HttpRewrite::Event::result()
370{
371 return wrbuf_cstr(m_w);
372}
373
374void yf::HttpRewrite::Event::openTagStart(const char *tag, int tag_len)
375{
376 wrbuf_putc(m_w, '<');
377 wrbuf_write(m_w, tag, tag_len);
378
379 std::string t(tag, tag_len);
380 std::list<Within>::const_iterator it = m_content->within_list.begin();
381 for (; it != m_content->within_list.end(); it++)
382 {
383 if (!it->tag.empty() && regex_match(t, it->tag))
384 {
385 if (!it->attr.empty() && regex_match("#text", it->attr))
386 {
387 s_within.push(it);
388 return;
389 }
390 }
391 }
392}
393
394void yf::HttpRewrite::Event::anyTagEnd(const char *tag, int tag_len,
395 int close_it)
396{
397 if (close_it)
398 {
399 if (!s_within.empty())
400 {
401 std::list<Within>::const_iterator it = s_within.top();
402 std::string t(tag, tag_len);
403 if (regex_match(t, it->tag))
404 s_within.pop();
405 }
406 }
407 if (close_it)
408 wrbuf_putc(m_w, '/');
409 wrbuf_putc(m_w, '>');
410}
411
412void yf::HttpRewrite::Event::attribute(const char *tag, int tag_len,
413 const char *attr, int attr_len,
414 const char *value, int val_len,
415 const char *sep)
416{
417 std::list<Within>::const_iterator it = m_content->within_list.begin();
418 bool subst = false;
419
420 for (; it != m_content->within_list.end(); it++)
421 {
422 std::string t(tag, tag_len);
423 if (it->tag.empty() || regex_match(t, it->tag))
424 {
425 std::string a(attr, attr_len);
426 if (!it->attr.empty() && regex_match(a, it->attr))
427 subst = true;
428 }
429 if (subst)
430 break;
431 }
432
433 wrbuf_putc(m_w, ' ');
434 wrbuf_write(m_w, attr, attr_len);
435 if (value)
436 {
437 wrbuf_puts(m_w, "=");
438 wrbuf_puts(m_w, sep);
439
440 std::string output;
441 if (subst)
442 {
443 std::string s(value, val_len);
444 it->exec(m_vars, s, true, m_skips);
445 wrbuf_puts(m_w, s.c_str());
446 }
447 else
448 wrbuf_write(m_w, value, val_len);
449 wrbuf_puts(m_w, sep);
450 }
451}
452
453void yf::HttpRewrite::Event::closeTag(const char *tag, int tag_len)
454{
455 if (!s_within.empty())
456 {
457 std::list<Within>::const_iterator it = s_within.top();
458 std::string t(tag, tag_len);
459 if (regex_match(t, it->tag))
460 s_within.pop();
461 }
462 wrbuf_puts(m_w, "</");
463 wrbuf_write(m_w, tag, tag_len);
464}
465
466void yf::HttpRewrite::Event::text(const char *value, int len)
467{
468 std::list<Within>::const_iterator it = m_content->within_list.end();
469 if (!s_within.empty())
470 it = s_within.top();
471 if (it != m_content->within_list.end())
472 {
473 std::string s(value, len);
474 it->exec(m_vars, s, false, m_skips);
475 wrbuf_puts(m_w, s.c_str());
476 }
477 else
478 wrbuf_write(m_w, value, len);
479}
480
482 std::string &content,
483 std::map<std::string, std::string> &vars,
484 mp::filter::HttpRewrite::RulePtr ruleptr,
485 bool html_context,
486 std::list<boost::regex> &skip_list)
487{
488 bool replace = false;
489 std::string res;
490 const char *cp = content.c_str();
491 const char *cp0 = cp;
492 while (*cp)
493 {
494 if (html_context && !strncmp(cp, "&quot;", 6))
495 {
496 cp += 6;
497 res.append(cp0, cp - cp0);
498 cp0 = cp;
499 while (*cp)
500 {
501 if (!strncmp(cp, "&quot;", 6))
502 break;
503 if (*cp == '\n')
504 break;
505 cp++;
506 }
507 if (!*cp)
508 break;
509 std::string s(cp0, cp - cp0);
510 if (ruleptr->test_patterns(vars, s, true, skip_list))
511 replace = true;
512 cp0 = cp;
513 res.append(s);
514 }
515 else if (*cp == '"' || *cp == '\'')
516 {
517 int m = *cp;
518 cp++;
519 res.append(cp0, cp - cp0);
520 cp0 = cp;
521 while (*cp)
522 {
523 if (cp[-1] != '\\' && *cp == m)
524 break;
525 if (*cp == '\n')
526 break;
527 cp++;
528 }
529 if (!*cp)
530 break;
531 std::string s(cp0, cp - cp0);
532 if (ruleptr->test_patterns(vars, s, true, skip_list))
533 replace = true;
534 cp0 = cp;
535 res.append(s);
536 }
537 else if (*cp == '/' && cp[1] == '/')
538 {
539 while (cp[1] && cp[1] != '\n')
540 cp++;
541 }
542 cp++;
543 }
544 res.append(cp0, cp - cp0);
545 content = res;
546 return replace;
547}
548
549bool yf::HttpRewrite::Within::exec(
550 std::map<std::string, std::string> & vars,
551 std::string & txt, bool anchor,
552 std::list<boost::regex> & skip_list) const
553{
554 if (type == "quoted-literal")
555 {
556 return embed_quoted_literal(txt, vars, rule, true, skip_list);
557 }
558 else
559 {
560 return rule->test_patterns(vars, txt, anchor, skip_list);
561 }
562}
563
564bool yf::HttpRewrite::Rule::test_patterns(
565 std::map<std::string, std::string> & vars,
566 std::string & txt, bool anchor,
567 std::list<boost::regex> & skip_list )
568{
569 bool replaces = false;
570 bool first = anchor;
571 std::string out;
572 std::string::const_iterator start, end;
573 start = txt.begin();
574 end = txt.end();
575 while (1)
576 {
577 std::list<Replace>::iterator bit = replace_list.end();
578 boost::smatch bwhat;
579 bool match_one = false;
580 {
581 std::list<Replace>::iterator it = replace_list.begin();
582 for (; it != replace_list.end(); it++)
583 {
584 if (it->start_anchor && !first)
585 continue;
586 boost::smatch what;
587 if (regex_search(start, end, what, it->re))
588 {
589 if (!match_one || what[0].first < bwhat[0].first)
590 {
591 bwhat = what;
592 bit = it;
593 }
594 match_one = true;
595 }
596 }
597 if (!match_one)
598 break;
599 }
600 first = false;
601 replaces = true;
602 size_t i;
603 for (i = 1; i < bwhat.size(); ++i)
604 {
605 //check if the group is named
606 std::map<int, std::string>::const_iterator git
607 = bit->group_index.find(i);
608 if (git != bit->group_index.end())
609 { //it is
610 vars[git->second] = bwhat[i];
611 }
612
613 }
614 // Compare against skip_list
615 bool skipthis = false;
616 std::list<boost::regex>::iterator si = skip_list.begin();
617 for ( ; si != skip_list.end(); si++) {
618 if ( boost::regex_search(bwhat.str(0), *si) )
619 {
620 skipthis = true;
621 break;
622 }
623 }
624 //prepare replacement string
625 std::string rvalue = bit->sub_vars(vars);
626 out.append(start, bwhat[0].first);
627 if ( skipthis )
628 {
629 yaz_log(YLOG_LOG,"! Not rewriting '%s', skiplist match",
630 bwhat.str(0).c_str() );
631 out.append(bwhat.str(0).c_str());
632 }
633 else
634 {
635 yaz_log(YLOG_LOG, "! Rewritten '%s' to '%s'",
636 bwhat.str(0).c_str(), rvalue.c_str());
637 out.append(rvalue);
638 }
639 start = bwhat[0].second; //move search forward
640 }
641 out.append(start, end);
642 txt = out;
643 return replaces;
644}
645
646void yf::HttpRewrite::Replace::parse_groups(std::string pattern)
647{
648 int gnum = 0;
649 bool esc = false;
650 const std::string &str = pattern;
651 std::string res;
652 start_anchor = str[0] == '^';
653 yaz_log(YLOG_LOG, "Parsing groups from '%s'", str.c_str());
654 for (size_t i = 0; i < str.size(); ++i)
655 {
656 res += str[i];
657 if (!esc && str[i] == '\\')
658 {
659 esc = true;
660 continue;
661 }
662 if (!esc && str[i] == '(') //group starts
663 {
664 gnum++;
665 if (i+1 < str.size() && str[i+1] == '?') //group with attrs
666 {
667 i++;
668 if (i+1 < str.size() && str[i+1] == ':') //non-capturing
669 {
670 if (gnum > 0) gnum--;
671 res += str[i];
672 i++;
673 res += str[i];
674 continue;
675 }
676 if (i+1 < str.size() && str[i+1] == 'P') //optional, python
677 i++;
678 if (i+1 < str.size() && str[i+1] == '<') //named
679 {
680 i++;
681 std::string gname;
682 bool term = false;
683 while (++i < str.size())
684 {
685 if (str[i] == '>') { term = true; break; }
686 if (!isalnum(str[i]))
687 throw mp::filter::FilterException
688 ("Only alphanumeric chars allowed, found "
689 " in '"
690 + str
691 + "' at "
692 + boost::lexical_cast<std::string>(i));
693 gname += str[i];
694 }
695 if (!term)
696 throw mp::filter::FilterException
697 ("Unterminated group name '" + gname
698 + " in '" + str +"'");
699 group_index[gnum] = gname;
700 yaz_log(YLOG_LOG, "Found named group '%s' at $%d",
701 gname.c_str(), gnum);
702 }
703 }
704 }
705 esc = false;
706 }
707 re = res;
708}
709
710std::string yf::HttpRewrite::Replace::sub_vars(
711 const std::map<std::string, std::string> & vars) const
712{
713 std::string out;
714 bool esc = false;
715 const std::string & in = recipe;
716 for (size_t i = 0; i < in.size(); ++i)
717 {
718 if (!esc && in[i] == '\\')
719 {
720 esc = true;
721 continue;
722 }
723 if (!esc && in[i] == '$') //var
724 {
725 if (i+1 < in.size() && in[i+1] == '{') //ref prefix
726 {
727 ++i;
728 std::string name;
729 bool term = false;
730 while (++i < in.size())
731 {
732 if (in[i] == '}') { term = true; break; }
733 name += in[i];
734 }
735 if (!term) throw mp::filter::FilterException
736 ("Unterminated var ref in '"+in+"' at "
737 + boost::lexical_cast<std::string>(i));
738 std::map<std::string, std::string>::const_iterator it
739 = vars.find(name);
740 if (it != vars.end())
741 {
742 out += it->second;
743 }
744 }
745 else
746 {
747 throw mp::filter::FilterException
748 ("Malformed or trimmed var ref in '"
749 +in+"' at "+boost::lexical_cast<std::string>(i));
750 }
751 continue;
752 }
753 //passthru
754 out += in[i];
755 esc = false;
756 }
757 return out;
758}
759
760yf::HttpRewrite::Phase::Phase() : m_verbose(0)
761{
762}
763
764void yf::HttpRewrite::Content::parse(
765 int verbose,
766 std::string &content,
767 std::map<std::string, std::string> &vars,
768 std::list<boost::regex> & skip_list ) const
769{
770 if (type == "html")
771 {
772 HTMLParser parser;
773 Event ev(this, vars, skip_list);
774
775 parser.set_verbose(verbose);
776
777 parser.parse(ev, content.c_str());
778 content = ev.result();
779 }
780 if (type == "quoted-literal")
781 {
782 quoted_literal(content, vars, skip_list);
783 }
784}
785
786void yf::HttpRewrite::Content::quoted_literal(
787 std::string &content,
788 std::map<std::string, std::string> &vars,
789 std::list<boost::regex> & skip_list ) const
790{
791 std::list<Within>::const_iterator it = within_list.begin();
792 if (it != within_list.end())
793 embed_quoted_literal(content, vars, it->rule, false, skip_list);
794}
795
796void yf::HttpRewrite::Content::configure(
797 const xmlNode *ptr, std::map<std::string, RulePtr > &rules)
798{
799 for (; ptr; ptr = ptr->next)
800 {
801 if (ptr->type != XML_ELEMENT_NODE)
802 continue;
803 if (!strcmp((const char *) ptr->name, "within"))
804 {
805 static const char *names[7] =
806 { "header", "attr", "tag", "rule", "reqline", "type", 0 };
807 std::string values[6];
808 mp::xml::parse_attr(ptr, names, values);
809 Within w;
810 if (values[0].length() > 0)
811 w.header.assign(values[0], boost::regex_constants::icase);
812 if (values[1].length() > 0)
813 w.attr.assign(values[1], boost::regex_constants::icase);
814 if (values[2].length() > 0)
815 w.tag.assign(values[2], boost::regex_constants::icase);
816
817 std::vector<std::string> rulenames;
818 boost::split(rulenames, values[3], boost::is_any_of(","));
819 if (rulenames.size() == 0)
820 {
821 throw mp::filter::FilterException
822 ("Empty rule in '" + values[3] +
823 "' in http_rewrite filter");
824 }
825 else if (rulenames.size() == 1)
826 {
827 std::map<std::string,RulePtr>::const_iterator it =
828 rules.find(rulenames[0]);
829 if (it == rules.end())
830 throw mp::filter::FilterException
831 ("Reference to non-existing rule '" + rulenames[0] +
832 "' in http_rewrite filter");
833 w.rule = it->second;
834
835 }
836 else
837 {
838 RulePtr rule(new Rule);
839 size_t i;
840 for (i = 0; i < rulenames.size(); i++)
841 {
842 std::map<std::string,RulePtr>::const_iterator it =
843 rules.find(rulenames[i]);
844 if (it == rules.end())
845 throw mp::filter::FilterException
846 ("Reference to non-existing rule '" + rulenames[i] +
847 "' in http_rewrite filter");
848 RulePtr subRule = it->second;
849 std::list<Replace>::iterator rit =
850 subRule->replace_list.begin();
851 for (; rit != subRule->replace_list.end(); rit++)
852 rule->replace_list.push_back(*rit);
853 }
854 w.rule = rule;
855 }
856 w.reqline = values[4] == "1";
857 w.type = values[5];
858 if (w.type.empty() || w.type == "quoted-literal")
859 ;
860 else
861 throw mp::filter::FilterException
862 ("within type must be quoted-literal or none in "
863 " in http_rewrite filter");
864 within_list.push_back(w);
865 }
866 }
867}
868
869void yf::HttpRewrite::configure_phase(const xmlNode *ptr, Phase &phase)
870{
871 static const char *names[2] = { "verbose", 0 };
872 std::string values[1];
873 values[0] = "0";
874 mp::xml::parse_attr(ptr, names, values);
875
876 phase.m_verbose = atoi(values[0].c_str());
877
878 std::map<std::string, RulePtr > rules;
879 for (ptr = ptr->children; ptr; ptr = ptr->next)
880 {
881 if (ptr->type != XML_ELEMENT_NODE)
882 continue;
883 else if (!strcmp((const char *) ptr->name, "rule"))
884 {
885 static const char *names[2] = { "name", 0 };
886 std::string values[1];
887 values[0] = "default";
888 mp::xml::parse_attr(ptr, names, values);
889
890 RulePtr rule(new Rule);
891 for (xmlNode *p = ptr->children; p; p = p->next)
892 {
893 if (p->type != XML_ELEMENT_NODE)
894 continue;
895 if (!strcmp((const char *) p->name, "rewrite"))
896 {
897 Replace replace;
898 std::string from;
899 const struct _xmlAttr *attr;
900 for (attr = p->properties; attr; attr = attr->next)
901 {
902 if (!strcmp((const char *) attr->name, "from"))
903 from = mp::xml::get_text(attr->children);
904 else if (!strcmp((const char *) attr->name, "to"))
905 replace.recipe = mp::xml::get_text(attr->children);
906 else
907 throw mp::filter::FilterException
908 ("Bad attribute "
909 + std::string((const char *) attr->name)
910 + " in rewrite section of http_rewrite");
911 }
912 yaz_log(YLOG_LOG, "Found rewrite rule from '%s' to '%s'",
913 from.c_str(), replace.recipe.c_str());
914 if (!from.empty())
915 {
916 replace.parse_groups(from);
917 rule->replace_list.push_back(replace);
918 }
919 }
920 else
921 throw mp::filter::FilterException
922 ("Bad element "
923 + std::string((const char *) p->name)
924 + " in http_rewrite filter");
925 }
926 rules[values[0]] = rule;
927 }
928 else if (!strcmp((const char *) ptr->name, "content"))
929 {
930 static const char *names[3] =
931 { "type", "mime", 0 };
932 std::string values[2];
933 mp::xml::parse_attr(ptr, names, values);
934 if (values[0].empty())
935 {
936 throw mp::filter::FilterException
937 ("Missing attribute, type for for element "
938 + std::string((const char *) ptr->name)
939 + " in http_rewrite filter");
940 }
941 Content c;
942
943 c.type = values[0];
944 if (!values[1].empty())
945 c.content_re.assign(values[1], boost::regex::icase);
946 c.configure(ptr->children, rules);
947 phase.content_list.push_back(c);
948 }
949 else
950 {
951 throw mp::filter::FilterException
952 ("Bad element "
953 + std::string((const char *) ptr->name)
954 + " in http_rewrite filter");
955 }
956 }
957}
958
959void yf::HttpRewrite::configure(const xmlNode * ptr, bool test_only,
960 const char *path)
961{
962 for (ptr = ptr->children; ptr; ptr = ptr->next)
963 {
964 if (ptr->type != XML_ELEMENT_NODE)
965 continue;
966 else if (!strcmp((const char *) ptr->name, "request"))
967 {
969 }
970 else if (!strcmp((const char *) ptr->name, "response"))
971 {
973 }
974 else
975 {
976 throw mp::filter::FilterException
977 ("Bad element "
978 + std::string((const char *) ptr->name)
979 + " in http_rewrite filter");
980 }
981 }
982}
983
984static mp::filter::Base* filter_creator()
985{
986 return new mp::filter::HttpRewrite;
987}
988
989extern "C" {
990 struct metaproxy_1_filter_struct metaproxy_1_filter_http_rewrite = {
991 0,
992 "http_rewrite",
994 };
995}
996
997
998/*
999 * Local variables:
1000 * c-basic-offset: 4
1001 * c-file-style: "Stroustrup"
1002 * indent-tabs-mode: nil
1003 * End:
1004 * vim: shiftwidth=4 tabstop=8 expandtab
1005 */
1006
void parse(HTMLParserEvent &event, const char *str) const
void configure(const xmlNode *ptr, std::map< std::string, RulePtr > &rules)
void quoted_literal(std::string &content, std::map< std::string, std::string > &vars, std::list< boost::regex > &skip_list) const
void parse(int verbose, std::string &content, std::map< std::string, std::string > &vars, std::list< boost::regex > &skip_list) const
void anyTagEnd(const char *tag, int tag_len, int close_it)
std::stack< std::list< Within >::const_iterator > s_within
void closeTag(const char *tag, int tag_len)
void openTagStart(const char *tag, int tag_len)
void text(const char *value, int len)
std::map< std::string, std::string > & m_vars
void attribute(const char *tag, int tag_len, const char *attr, int attr_len, const char *value, int val_len, const char *sep)
void rewrite_reqline(mp::odr &o, Z_HTTP_Request *hreq, std::map< std::string, std::string > &vars, std::string bind_addr) const
void rewrite_body(mp::odr &o, const char *content_type, char **content_buf, int *content_len, std::map< std::string, std::string > &vars, std::list< boost::regex > &skip_list) const
void read_skip_headers(Z_HTTP_Request *hreq, std::list< boost::regex > &skip_list, std::string bind_addr)
void rewrite_headers(mp::odr &o, Z_HTTP_Header *headers, std::map< std::string, std::string > &vars) const
std::string sub_vars(const std::map< std::string, std::string > &vars) const
bool test_patterns(std::map< std::string, std::string > &vars, std::string &txt, bool anchor, std::list< boost::regex > &skip_list)
bool exec(std::map< std::string, std::string > &vars, std::string &txt, bool anchor, std::list< boost::regex > &skip_list) const
void configure_phase(const xmlNode *ptr, Phase &phase)
boost::scoped_ptr< Phase > res_phase
boost::scoped_ptr< Phase > req_phase
static mp::filter::Base * filter_creator()
static bool embed_quoted_literal(std::string &content, std::map< std::string, std::string > &vars, mp::filter::HttpRewrite::RulePtr ruleptr, bool html_context, std::list< boost::regex > &skip_list)
struct metaproxy_1_filter_struct metaproxy_1_filter_http_rewrite