20#include <metaproxy/filter.hpp>
21#include <metaproxy/package.hpp>
22#include <metaproxy/util.hpp>
30#include <boost/regex.hpp>
31#include <boost/lexical_cast.hpp>
32#include <boost/algorithm/string.hpp>
38namespace yf = mp::filter;
49 const std::map<std::string, std::string> & vars)
const;
57 std::map<std::string, std::string> &vars,
58 std::string &txt,
bool anchor,
59 std::list<boost::regex> &skip_list);
69 bool exec(std::map<std::string, std::string> &vars,
70 std::string &txt,
bool anchor,
71 std::list<boost::regex> &skip_list)
const;
80 std::map<std::string, RulePtr > &rules);
82 std::map<std::string, std::string> &vars,
83 std::list<boost::regex> & skip_list)
const;
84 void parse(
int verbose, std::string &content,
85 std::map<std::string, std::string> & vars,
86 std::list<boost::regex> & skip_list )
const;
94 std::list<boost::regex> &skip_list, std::string bind_addr);
96 std::map<std::string, std::string> & vars, std::string bind_addr)
const;
98 std::map<std::string, std::string> & vars)
const;
100 const char *content_type,
101 char **content_buf,
int *content_len,
102 std::map<std::string, std::string> & vars,
103 std::list<boost::regex> & skip_list )
const;
107 void anyTagEnd(
const char *tag,
int tag_len,
int close_it);
108 void attribute(
const char *tag,
int tag_len,
109 const char *attr,
int attr_len,
110 const char *value,
int val_len,
112 void closeTag(
const char *tag,
int tag_len);
113 void text(
const char *value,
int len);
116 std::stack<std::list<Within>::const_iterator>
s_within;
117 std::map<std::string, std::string> &
m_vars;
121 std::map<std::string, std::string> &vars,
122 std::list<boost::regex> & skip_list );
129yf::HttpRewrite::HttpRewrite() :
134yf::HttpRewrite::~HttpRewrite()
138void yf::HttpRewrite::process(mp::Package & package)
const
140 yaz_log(YLOG_LOG,
"HttpRewrite begins....");
141 Z_GDU *gdu = package.request().get();
143 std::map<std::string, std::string> vars;
146 std::list<boost::regex> skip_list;
148 if (gdu && gdu->which == Z_GDU_HTTP_Request)
150 Z_HTTP_Request *hreq = gdu->u.HTTP_Request;
152 std::string bind_addr = package.origin(). get_bind_address();
153 req_phase->rewrite_reqline(o, hreq, vars, bind_addr);
154 res_phase->read_skip_headers(hreq, skip_list, bind_addr);
155 yaz_log(YLOG_LOG,
">> Request headers");
156 req_phase->rewrite_headers(o, hreq->headers, vars);
157 req_phase->rewrite_body(o,
158 z_HTTP_header_lookup(hreq->headers,
160 &hreq->content_buf, &hreq->content_len,
162 package.request() = gdu;
165 gdu = package.response().get();
166 if (gdu && gdu->which == Z_GDU_HTTP_Response)
168 Z_HTTP_Response *hres = gdu->u.HTTP_Response;
169 yaz_log(YLOG_LOG,
"Response code %d", hres->code);
171 yaz_log(YLOG_LOG,
"<< Respose headers");
172 res_phase->rewrite_headers(o, hres->headers, vars);
173 res_phase->rewrite_body(o,
174 z_HTTP_header_lookup(hres->headers,
176 &hres->content_buf, &hres->content_len,
178 package.response() = gdu;
183void yf::HttpRewrite::Phase::read_skip_headers(Z_HTTP_Request *hreq,
184 std::list<boost::regex> &skip_list,
185 std::string bind_addr )
187 std::string url(hreq->path);
188 if ( url.substr(0,7) !=
"http://" && url.substr(0,8) !=
"https://")
191 const char *host = z_HTTP_header_lookup(hreq->headers,
"Host");
193 if (bind_addr.find(
"ssl:") == 0) {
199 url = proto +
"://" + std::string(host) + hreq->path ;
202 while (
const char *hv = z_HTTP_header_remove( &(hreq->headers),
203 "X-Metaproxy-SkipLink") )
205 yaz_log(YLOG_LOG,
"Found SkipLink '%s'", hv );
206 const char *p = strchr(hv,
' ');
209 std::string page(hv,p);
210 std::string link(p+1);
211 boost::regex pagere(page);
212 if ( boost::regex_search(url, pagere) )
214 yaz_log(YLOG_LOG,
"SkipLink '%s' matches URL %s",
215 page.c_str(), url.c_str() );
216 boost::regex linkre(link);
217 skip_list.push_back(linkre);
221 yaz_log(YLOG_LOG,
"SkipLink ignored, '%s' does not match '%s'",
222 url.c_str(), page.c_str() );
228void yf::HttpRewrite::Phase::rewrite_reqline (mp::odr & o,
229 Z_HTTP_Request *hreq,
230 std::map<std::string, std::string> & vars,
231 std::string bind_addr)
const
234 if (bind_addr.find(
"ssl:") == 0) {
239 yaz_log(YLOG_LOG,
"rewrite_reqline: p='%s' ba='%s'",
240 hreq->path, proto.c_str() );
242 if ((strstr(hreq->path,
"http://") == hreq->path) ||
243 (strstr(hreq->path,
"https://") == hreq->path) )
245 yaz_log(YLOG_LOG,
"Path in the method line is absolute, "
246 "possibly a proxy request");
251 const char *host = z_HTTP_header_lookup(hreq->headers,
"Host");
255 path = proto +
"://";
260 std::list<Content>::const_iterator cit = content_list.begin();
261 for (; cit != content_list.end(); cit++)
262 if (cit->type ==
"headers")
265 if (cit == content_list.end())
268 std::list<Within>::const_iterator it = cit->within_list.begin();
269 for (; it != cit->within_list.end(); it++)
272 yaz_log(YLOG_LOG,
"Proxy request URL is %s", path.c_str());
273 std::list<boost::regex> dummy_skip_list;
274 if (it->exec(vars, path,
true, dummy_skip_list))
276 yaz_log(YLOG_LOG,
"Rewritten request URL is %s", path.c_str());
277 hreq->path = odr_strdup(o, path.c_str());
282void yf::HttpRewrite::Phase::rewrite_headers(mp::odr & o,
283 Z_HTTP_Header *headers,
284 std::map<std::string, std::string> & vars )
const
286 std::list<Content>::const_iterator cit = content_list.begin();
287 for (; cit != content_list.end(); cit++)
288 if (cit->type ==
"headers")
291 if (cit == content_list.end())
294 for (Z_HTTP_Header *header = headers; header; header = header->next)
296 std::list<Within>::const_iterator it = cit->within_list.begin();
297 for (; it != cit->within_list.end(); it++)
299 if (!it->header.empty() &&
300 regex_match(header->name, it->header))
303 std::string hval(header->value);
304 std::list<boost::regex> dummy_skip_list;
305 if (it->exec(vars, hval,
true, dummy_skip_list))
307 header->value = odr_strdup(o, hval.c_str());
314void yf::HttpRewrite::Phase::rewrite_body(
316 const char *content_type,
319 std::map<std::string, std::string> & vars,
320 std::list<boost::regex> & skip_list )
const
322 if (*content_len == 0)
325 yaz_log(YLOG_LOG,
"rewrite_body: null content_type, can not rewrite");
328 std::list<Content>::const_iterator cit = content_list.begin();
329 for (; cit != content_list.end(); cit++)
331 yaz_log(YLOG_LOG,
"rewrite_body: content_type=%s type=%s",
332 content_type, cit->type.c_str());
333 if (cit->type !=
"headers"
334 && regex_match(content_type, cit->content_re))
337 if (cit == content_list.end()) {
338 yaz_log(YLOG_LOG,
"rewrite_body: No content rule matched %s, not rewriting",
344 for (i = 0; i < *content_len; i++)
345 if ((*content_buf)[i] == 0) {
346 yaz_log(YLOG_LOG,
"rewrite_body: Looks like binary stuff, not rewriting");
350 std::string content(*content_buf, *content_len);
351 cit->parse(m_verbose, content, vars, skip_list);
352 *content_buf = odr_strdup(o, content.c_str());
353 *content_len = strlen(*content_buf);
356yf::HttpRewrite::Event::Event(
const Content *p,
357 std::map<std::string, std::string> & vars,
358 std::list<boost::regex> & skip_list
359 ) : m_content(p), m_vars(vars), m_skips(skip_list)
364yf::HttpRewrite::Event::~Event()
369const char *yf::HttpRewrite::Event::result()
371 return wrbuf_cstr(m_w);
374void yf::HttpRewrite::Event::openTagStart(
const char *tag,
int tag_len)
376 wrbuf_putc(m_w,
'<');
377 wrbuf_write(m_w, tag, tag_len);
379 std::string t(tag, tag_len);
380 std::list<Within>::const_iterator it = m_content->within_list.begin();
381 for (; it != m_content->within_list.end(); it++)
383 if (!it->tag.empty() && regex_match(t, it->tag))
385 if (!it->attr.empty() && regex_match(
"#text", it->attr))
394void yf::HttpRewrite::Event::anyTagEnd(
const char *tag,
int tag_len,
399 if (!s_within.empty())
401 std::list<Within>::const_iterator it = s_within.top();
402 std::string t(tag, tag_len);
403 if (regex_match(t, it->tag))
408 wrbuf_putc(m_w,
'/');
409 wrbuf_putc(m_w,
'>');
412void yf::HttpRewrite::Event::attribute(
const char *tag,
int tag_len,
413 const char *attr,
int attr_len,
414 const char *value,
int val_len,
417 std::list<Within>::const_iterator it = m_content->within_list.begin();
420 for (; it != m_content->within_list.end(); it++)
422 std::string t(tag, tag_len);
423 if (it->tag.empty() || regex_match(t, it->tag))
425 std::string a(attr, attr_len);
426 if (!it->attr.empty() && regex_match(a, it->attr))
433 wrbuf_putc(m_w,
' ');
434 wrbuf_write(m_w, attr, attr_len);
437 wrbuf_puts(m_w,
"=");
438 wrbuf_puts(m_w, sep);
443 std::string s(value, val_len);
444 it->exec(m_vars, s,
true, m_skips);
445 wrbuf_puts(m_w, s.c_str());
448 wrbuf_write(m_w, value, val_len);
449 wrbuf_puts(m_w, sep);
453void yf::HttpRewrite::Event::closeTag(
const char *tag,
int tag_len)
455 if (!s_within.empty())
457 std::list<Within>::const_iterator it = s_within.top();
458 std::string t(tag, tag_len);
459 if (regex_match(t, it->tag))
462 wrbuf_puts(m_w,
"</");
463 wrbuf_write(m_w, tag, tag_len);
466void yf::HttpRewrite::Event::text(
const char *value,
int len)
468 std::list<Within>::const_iterator it = m_content->within_list.end();
469 if (!s_within.empty())
471 if (it != m_content->within_list.end())
473 std::string s(value, len);
474 it->exec(m_vars, s,
false, m_skips);
475 wrbuf_puts(m_w, s.c_str());
478 wrbuf_write(m_w, value, len);
482 std::string &content,
483 std::map<std::string, std::string> &vars,
484 mp::filter::HttpRewrite::RulePtr ruleptr,
486 std::list<boost::regex> &skip_list)
488 bool replace =
false;
490 const char *cp = content.c_str();
491 const char *cp0 = cp;
494 if (html_context && !strncmp(cp,
""", 6))
497 res.append(cp0, cp - cp0);
501 if (!strncmp(cp,
""", 6))
509 std::string s(cp0, cp - cp0);
510 if (ruleptr->test_patterns(vars, s,
true, skip_list))
515 else if (*cp ==
'"' || *cp ==
'\'')
519 res.append(cp0, cp - cp0);
523 if (cp[-1] !=
'\\' && *cp == m)
531 std::string s(cp0, cp - cp0);
532 if (ruleptr->test_patterns(vars, s,
true, skip_list))
537 else if (*cp ==
'/' && cp[1] ==
'/')
539 while (cp[1] && cp[1] !=
'\n')
544 res.append(cp0, cp - cp0);
549bool yf::HttpRewrite::Within::exec(
550 std::map<std::string, std::string> & vars,
551 std::string & txt,
bool anchor,
552 std::list<boost::regex> & skip_list)
const
554 if (type ==
"quoted-literal")
560 return rule->test_patterns(vars, txt, anchor, skip_list);
564bool yf::HttpRewrite::Rule::test_patterns(
565 std::map<std::string, std::string> & vars,
566 std::string & txt,
bool anchor,
567 std::list<boost::regex> & skip_list )
569 bool replaces =
false;
572 std::string::const_iterator start, end;
577 std::list<Replace>::iterator bit = replace_list.end();
579 bool match_one =
false;
581 std::list<Replace>::iterator it = replace_list.begin();
582 for (; it != replace_list.end(); it++)
584 if (it->start_anchor && !first)
587 if (regex_search(start, end, what, it->re))
589 if (!match_one || what[0].first < bwhat[0].first)
603 for (i = 1; i < bwhat.size(); ++i)
606 std::map<int, std::string>::const_iterator git
607 = bit->group_index.find(i);
608 if (git != bit->group_index.end())
610 vars[git->second] = bwhat[i];
615 bool skipthis =
false;
616 std::list<boost::regex>::iterator si = skip_list.begin();
617 for ( ; si != skip_list.end(); si++) {
618 if ( boost::regex_search(bwhat.str(0), *si) )
625 std::string rvalue = bit->sub_vars(vars);
626 out.append(start, bwhat[0].first);
629 yaz_log(YLOG_LOG,
"! Not rewriting '%s', skiplist match",
630 bwhat.str(0).c_str() );
631 out.append(bwhat.str(0).c_str());
635 yaz_log(YLOG_LOG,
"! Rewritten '%s' to '%s'",
636 bwhat.str(0).c_str(), rvalue.c_str());
639 start = bwhat[0].second;
641 out.append(start, end);
646void yf::HttpRewrite::Replace::parse_groups(std::string pattern)
650 const std::string &str = pattern;
652 start_anchor = str[0] ==
'^';
653 yaz_log(YLOG_LOG,
"Parsing groups from '%s'", str.c_str());
654 for (
size_t i = 0; i < str.size(); ++i)
657 if (!esc && str[i] ==
'\\')
662 if (!esc && str[i] ==
'(')
665 if (i+1 < str.size() && str[i+1] ==
'?')
668 if (i+1 < str.size() && str[i+1] ==
':')
670 if (gnum > 0) gnum--;
676 if (i+1 < str.size() && str[i+1] ==
'P')
678 if (i+1 < str.size() && str[i+1] ==
'<')
683 while (++i < str.size())
685 if (str[i] ==
'>') { term =
true;
break; }
686 if (!isalnum(str[i]))
687 throw mp::filter::FilterException
688 (
"Only alphanumeric chars allowed, found "
692 + boost::lexical_cast<std::string>(i));
696 throw mp::filter::FilterException
697 (
"Unterminated group name '" + gname
698 +
" in '" + str +
"'");
699 group_index[gnum] = gname;
700 yaz_log(YLOG_LOG,
"Found named group '%s' at $%d",
701 gname.c_str(), gnum);
710std::string yf::HttpRewrite::Replace::sub_vars(
711 const std::map<std::string, std::string> & vars)
const
715 const std::string & in = recipe;
716 for (
size_t i = 0; i < in.size(); ++i)
718 if (!esc && in[i] ==
'\\')
723 if (!esc && in[i] ==
'$')
725 if (i+1 < in.size() && in[i+1] ==
'{')
730 while (++i < in.size())
732 if (in[i] ==
'}') { term =
true;
break; }
735 if (!term)
throw mp::filter::FilterException
736 (
"Unterminated var ref in '"+in+
"' at "
737 + boost::lexical_cast<std::string>(i));
738 std::map<std::string, std::string>::const_iterator it
740 if (it != vars.end())
747 throw mp::filter::FilterException
748 (
"Malformed or trimmed var ref in '"
749 +in+
"' at "+boost::lexical_cast<std::string>(i));
760yf::HttpRewrite::Phase::Phase() : m_verbose(0)
764void yf::HttpRewrite::Content::parse(
766 std::string &content,
767 std::map<std::string, std::string> &vars,
768 std::list<boost::regex> & skip_list )
const
773 Event ev(
this, vars, skip_list);
777 parser.
parse(ev, content.c_str());
780 if (type ==
"quoted-literal")
782 quoted_literal(content, vars, skip_list);
786void yf::HttpRewrite::Content::quoted_literal(
787 std::string &content,
788 std::map<std::string, std::string> &vars,
789 std::list<boost::regex> & skip_list )
const
791 std::list<Within>::const_iterator it = within_list.begin();
792 if (it != within_list.end())
796void yf::HttpRewrite::Content::configure(
797 const xmlNode *ptr, std::map<std::string, RulePtr > &rules)
799 for (; ptr; ptr = ptr->next)
801 if (ptr->type != XML_ELEMENT_NODE)
803 if (!strcmp((
const char *) ptr->name,
"within"))
805 static const char *names[7] =
806 {
"header",
"attr",
"tag",
"rule",
"reqline",
"type", 0 };
807 std::string values[6];
808 mp::xml::parse_attr(ptr, names, values);
810 if (values[0].length() > 0)
811 w.
header.assign(values[0], boost::regex_constants::icase);
812 if (values[1].length() > 0)
813 w.
attr.assign(values[1], boost::regex_constants::icase);
814 if (values[2].length() > 0)
815 w.
tag.assign(values[2], boost::regex_constants::icase);
817 std::vector<std::string> rulenames;
818 boost::split(rulenames, values[3], boost::is_any_of(
","));
819 if (rulenames.size() == 0)
821 throw mp::filter::FilterException
822 (
"Empty rule in '" + values[3] +
823 "' in http_rewrite filter");
825 else if (rulenames.size() == 1)
827 std::map<std::string,RulePtr>::const_iterator it =
828 rules.find(rulenames[0]);
829 if (it == rules.end())
830 throw mp::filter::FilterException
831 (
"Reference to non-existing rule '" + rulenames[0] +
832 "' in http_rewrite filter");
840 for (i = 0; i < rulenames.size(); i++)
842 std::map<std::string,RulePtr>::const_iterator it =
843 rules.find(rulenames[i]);
844 if (it == rules.end())
845 throw mp::filter::FilterException
846 (
"Reference to non-existing rule '" + rulenames[i] +
847 "' in http_rewrite filter");
849 std::list<Replace>::iterator rit =
850 subRule->replace_list.begin();
851 for (; rit != subRule->replace_list.end(); rit++)
852 rule->replace_list.push_back(*rit);
858 if (w.
type.empty() || w.
type ==
"quoted-literal")
861 throw mp::filter::FilterException
862 (
"within type must be quoted-literal or none in "
863 " in http_rewrite filter");
864 within_list.push_back(w);
869void yf::HttpRewrite::configure_phase(
const xmlNode *ptr,
Phase &phase)
871 static const char *names[2] = {
"verbose", 0 };
872 std::string values[1];
874 mp::xml::parse_attr(ptr, names, values);
876 phase.
m_verbose = atoi(values[0].c_str());
878 std::map<std::string, RulePtr > rules;
879 for (ptr = ptr->children; ptr; ptr = ptr->next)
881 if (ptr->type != XML_ELEMENT_NODE)
883 else if (!strcmp((
const char *) ptr->name,
"rule"))
885 static const char *names[2] = {
"name", 0 };
886 std::string values[1];
887 values[0] =
"default";
888 mp::xml::parse_attr(ptr, names, values);
891 for (xmlNode *p = ptr->children; p; p = p->next)
893 if (p->type != XML_ELEMENT_NODE)
895 if (!strcmp((
const char *) p->name,
"rewrite"))
899 const struct _xmlAttr *attr;
900 for (attr = p->properties; attr; attr = attr->next)
902 if (!strcmp((
const char *) attr->name,
"from"))
903 from = mp::xml::get_text(attr->children);
904 else if (!strcmp((
const char *) attr->name,
"to"))
905 replace.
recipe = mp::xml::get_text(attr->children);
907 throw mp::filter::FilterException
909 + std::string((
const char *) attr->name)
910 +
" in rewrite section of http_rewrite");
912 yaz_log(YLOG_LOG,
"Found rewrite rule from '%s' to '%s'",
913 from.c_str(), replace.
recipe.c_str());
917 rule->replace_list.push_back(replace);
921 throw mp::filter::FilterException
923 + std::string((
const char *) p->name)
924 +
" in http_rewrite filter");
926 rules[values[0]] = rule;
928 else if (!strcmp((
const char *) ptr->name,
"content"))
930 static const char *names[3] =
931 {
"type",
"mime", 0 };
932 std::string values[2];
933 mp::xml::parse_attr(ptr, names, values);
934 if (values[0].empty())
936 throw mp::filter::FilterException
937 (
"Missing attribute, type for for element "
938 + std::string((
const char *) ptr->name)
939 +
" in http_rewrite filter");
944 if (!values[1].empty())
945 c.
content_re.assign(values[1], boost::regex::icase);
951 throw mp::filter::FilterException
953 + std::string((
const char *) ptr->name)
954 +
" in http_rewrite filter");
959void yf::HttpRewrite::configure(
const xmlNode * ptr,
bool test_only,
962 for (ptr = ptr->children; ptr; ptr = ptr->next)
964 if (ptr->type != XML_ELEMENT_NODE)
966 else if (!strcmp((
const char *) ptr->name,
"request"))
970 else if (!strcmp((
const char *) ptr->name,
"response"))
976 throw mp::filter::FilterException
978 + std::string((
const char *) ptr->name)
979 +
" in http_rewrite filter");
986 return new mp::filter::HttpRewrite;
void configure(const xmlNode *ptr, std::map< std::string, RulePtr > &rules)
void quoted_literal(std::string &content, std::map< std::string, std::string > &vars, std::list< boost::regex > &skip_list) const
void parse(int verbose, std::string &content, std::map< std::string, std::string > &vars, std::list< boost::regex > &skip_list) const
std::list< Within > within_list
static mp::filter::Base * filter_creator()
static bool embed_quoted_literal(std::string &content, std::map< std::string, std::string > &vars, mp::filter::HttpRewrite::RulePtr ruleptr, bool html_context, std::list< boost::regex > &skip_list)
struct metaproxy_1_filter_struct metaproxy_1_filter_http_rewrite