YAZ  5.34.0
url.c
Go to the documentation of this file.
1 /* This file is part of the YAZ toolkit.
2  * Copyright (C) Index Data
3  * See the file LICENSE for details.
4  */
9 #if HAVE_CONFIG_H
10 #include <config.h>
11 #endif
12 
13 #include <yaz/url.h>
14 #include <yaz/comstack.h>
15 #include <yaz/log.h>
16 #include <yaz/wrbuf.h>
17 #include <yaz/cookie.h>
18 #include <yaz/poll.h>
19 
20 struct yaz_url {
23  char *proxy;
26  int verbose;
30 };
31 
33 {
34  yaz_url_t p = xmalloc(sizeof(*p));
37  p->proxy = 0;
38  p->max_redirects = 10;
39  p->w_error = wrbuf_alloc();
40  p->verbose = 0;
41  p->timeout_sec = 30;
42  p->timeout_ns = 0;
44  return p;
45 }
46 
48 {
49  if (p)
50  {
51  odr_destroy(p->odr_in);
52  odr_destroy(p->odr_out);
53  xfree(p->proxy);
56  xfree(p);
57  }
58 }
59 
60 void yaz_url_set_proxy(yaz_url_t p, const char *proxy)
61 {
62  xfree(p->proxy);
63  p->proxy = 0;
64  if (proxy && *proxy)
65  p->proxy = xstrdup(proxy);
66 }
67 
69 {
70  p->max_redirects = num;
71 }
72 
74 {
75  p->verbose = num;
76 }
77 
78 void yaz_url_set_timeout(yaz_url_t p, int sec, int ns)
79 {
80  p->timeout_sec = sec;
81  p->timeout_ns = ns;
82 }
83 
84 static void extract_user_pass(NMEM nmem,
85  const char *uri,
86  char **uri_lean, char **http_user,
87  char **http_pass)
88 {
89  const char *cp1 = strchr(uri, '/');
90  *uri_lean = 0;
91  *http_user = 0;
92  *http_pass = 0;
93  if (cp1 && cp1 > uri)
94  {
95  cp1--;
96 
97  if (!strncmp(cp1, "://", 3))
98  {
99  const char *cp3 = 0;
100  const char *cp2 = cp1 + 3;
101  while (*cp2 && *cp2 != '/' && *cp2 != '@')
102  {
103  if (*cp2 == ':')
104  cp3 = cp2;
105  cp2++;
106  }
107  if (*cp2 == '@' && cp3)
108  {
109  *uri_lean = nmem_malloc(nmem, strlen(uri) + 1);
110  memcpy(*uri_lean, uri, cp1 + 3 - uri);
111  strcpy(*uri_lean + (cp1 + 3 - uri), cp2 + 1);
112 
113  *http_user = nmem_strdupn(nmem, cp1 + 3, cp3 - (cp1 + 3));
114  *http_pass = nmem_strdupn(nmem, cp3 + 1, cp2 - (cp3 + 1));
115  }
116  }
117  }
118  if (*uri_lean == 0)
119  *uri_lean = nmem_strdup(nmem, uri);
120 }
121 
123 {
124  return wrbuf_cstr(p->w_error);
125 }
126 
127 static void log_warn(yaz_url_t p)
128 {
129  yaz_log(YLOG_WARN, "yaz_url: %s", wrbuf_cstr(p->w_error));
130 }
131 
133  const char *method,
134  Z_HTTP_Header *user_headers,
135  const char *buf, size_t len)
136 {
137  Z_HTTP_Response *res;
138  int number_of_redirects = 0;
139 
140  odr_reset(p->odr_out);
142  wrbuf_rewind(p->w_error);
143  while (1)
144  {
145  void *add;
146  COMSTACK conn = 0;
147  int code;
148  const char *location = 0;
149  char *http_user = 0;
150  char *http_pass = 0;
151  char *uri_lean = 0;
152  int proxy_mode = 0;
153  int ret;
154  Z_GDU *gdu;
155 
156  res = 0;
157  extract_user_pass(p->odr_out->mem, uri, &uri_lean,
158  &http_user, &http_pass);
159  conn = cs_create_host2(uri_lean, 0, &add, p->proxy, &proxy_mode);
160  if (!conn)
161  {
162  wrbuf_printf(p->w_error, "Can not resolve URL %s", uri);
163  log_warn(p);
164  return res;
165  }
166  gdu = z_get_HTTP_Request_uri(p->odr_out, uri_lean, 0, proxy_mode);
167  gdu->u.HTTP_Request->method = odr_strdup(p->odr_out, method);
169  for ( ; user_headers; user_headers = user_headers->next)
170  {
171  /* prefer new Host over user-supplied Host */
172  if (!strcmp(user_headers->name, "Host"))
173  ;
174  /* prefer user-supplied User-Agent over YAZ' own */
175  else if (!strcmp(user_headers->name, "User-Agent"))
177  user_headers->name, user_headers->value);
178  else
180  user_headers->name, user_headers->value);
181  }
182  if (http_user && http_pass)
184  &gdu->u.HTTP_Request->headers,
185  http_user, http_pass);
186  if (buf && len)
187  {
188  gdu->u.HTTP_Request->content_buf = (char *) buf;
189  gdu->u.HTTP_Request->content_len = len;
190  }
191  if (!z_GDU(p->odr_out, &gdu, 0, 0))
192  {
193  wrbuf_printf(p->w_error, "Can not encode HTTP request for URL %s",
194  uri);
195  log_warn(p);
196  }
197  else if ((ret = cs_connect(conn, add)) < 0)
198  {
199  wrbuf_printf(p->w_error, "Can not connect to URL %s", uri);
200  log_warn(p);
201  }
202  else
203  {
204  char *netbuffer = 0;
205  int netlen = 0;
206  int len_out;
207  char *buf_out = odr_getbuf(p->odr_out, &len_out, 0);
208  int state = 0; /* 0=connect phase, 1=send, 2=recv */
209  if (p->verbose)
210  fwrite(buf_out, 1, len_out, stdout);
211  if (!strcmp(gdu->u.HTTP_Request->method, "HEAD"))
212  cs_set_head_only(conn, 1);
213  if (ret == 0)
214  state = 1; /* connect complete, so send phase */
215  while (1)
216  {
217  if (ret == 1) /* incomplete , wait */
218  {
219  struct yaz_poll_fd yp;
222  if (conn->io_pending & CS_WANT_WRITE)
224  if (conn->io_pending & CS_WANT_READ)
226  yp.fd = cs_fileno(conn);
227  yp.input_mask = input_mask;
228  ret = yaz_poll(&yp, 1, p->timeout_sec, p->timeout_ns);
229  if (ret == 0)
230  {
231  wrbuf_printf(p->w_error, "timeout URL %s", uri);
232  break;
233  }
234  else if (ret < 0)
235  {
236  wrbuf_printf(p->w_error, "poll error URL %s", uri);
237  break;
238  }
239  }
240  if (state == 0) /* connect phase */
241  {
242  ret = cs_rcvconnect(conn);
243  if (ret < 0)
244  {
246  "cs_rcvconnect failed for URL %s", uri);
247  log_warn(p);
248  break;
249  }
250  else if (ret == 0)
251  state = 1;
252  }
253  else if (state == 1) /* write request phase */
254  {
255  ret = cs_put(conn, buf_out, len_out);
256  if (ret < 0)
257  {
258  wrbuf_printf(p->w_error, "cs_put fail for URL %s", uri);
259  log_warn(p);
260  break;
261  }
262  else if (ret == 0)
263  {
264  state = 2;
265  }
266  }
267  else if (state == 2) /* read response phase */
268  {
269  ret = cs_get(conn, &netbuffer, &netlen);
270  if (ret <= 0)
271  {
272  wrbuf_printf(p->w_error, "cs_get failed for URL %s",
273  uri);
274  log_warn(p);
275  break;
276  }
277  else if (ret > 1)
278  {
279  Z_GDU *gdu;
280  if (p->verbose)
281  fwrite(netbuffer, 1, ret, stdout);
282  odr_setbuf(p->odr_in, netbuffer, ret, 0);
283  if (!z_GDU(p->odr_in, &gdu, 0, 0)
284  || gdu->which != Z_GDU_HTTP_Response)
285  {
286  wrbuf_printf(p->w_error, "HTTP decoding fail for "
287  "URL %s", uri);
288  log_warn(p);
289  }
290  else
291  {
292  res = gdu->u.HTTP_Response;
293  break;
294  }
295  }
296  }
297  }
298  xfree(netbuffer);
299  }
300  cs_close(conn);
301  if (!res)
302  break;
303  code = res->code;
304  location = z_HTTP_header_lookup(res->headers, "Location");
305  if (++number_of_redirects <= p->max_redirects &&
306  location && (code == 301 || code == 302 || code == 307))
307  {
308  int host_change = 0;
309  const char *nlocation = yaz_check_location(p->odr_in, uri,
310  location, &host_change);
311 
312  odr_reset(p->odr_out);
313  uri = odr_strdup(p->odr_out, nlocation);
314  }
315  else
316  break;
317  yaz_cookies_response(p->cookies, res);
318  odr_reset(p->odr_in);
319  }
320  return res;
321 }
322 
323 /*
324  * Local variables:
325  * c-basic-offset: 4
326  * c-file-style: "Stroustrup"
327  * indent-tabs-mode: nil
328  * End:
329  * vim: shiftwidth=4 tabstop=8 expandtab
330  */
331 
COMSTACK cs_create_host2(const char *vhost, int blocking, void **vp, const char *proxy_host, int *proxy_mode)
Definition: comstack.c:179
Header for COMSTACK.
#define cs_close(handle)
Definition: comstack.h:99
#define cs_connect(handle, address)
Definition: comstack.h:93
#define cs_put(handle, buf, size)
Definition: comstack.h:90
#define CS_WANT_READ
Definition: comstack.h:114
#define cs_get(handle, buf, size)
Definition: comstack.h:91
#define cs_fileno(handle)
Definition: comstack.h:104
#define CS_WANT_WRITE
Definition: comstack.h:115
#define cs_rcvconnect(handle)
Definition: comstack.h:94
void yaz_cookies_reset(yaz_cookies_t yc)
Definition: cookie.c:41
void yaz_cookies_destroy(yaz_cookies_t yc)
Definition: cookie.c:35
yaz_cookies_t yaz_cookies_create(void)
Definition: cookie.c:28
void yaz_cookies_request(yaz_cookies_t yc, ODR odr, Z_HTTP_Request *req)
Definition: cookie.c:98
void yaz_cookies_response(yaz_cookies_t yc, Z_HTTP_Response *res)
Definition: cookie.c:60
HTTP cookie handling.
const char * z_HTTP_header_lookup(const Z_HTTP_Header *hp, const char *n)
Definition: http.c:233
Z_GDU * z_get_HTTP_Request_uri(ODR odr, const char *uri, const char *args, int use_full_uri)
Definition: http.c:291
void z_HTTP_header_add_basic_auth(ODR o, Z_HTTP_Header **hp, const char *username, const char *password)
Definition: http.c:168
void z_HTTP_header_set(ODR o, Z_HTTP_Header **hp, const char *n, const char *v)
Definition: http.c:200
const char * yaz_check_location(ODR odr, const char *uri, const char *location, int *host_change)
Definition: http.c:659
void z_HTTP_header_add(ODR o, Z_HTTP_Header **hp, const char *n, const char *v)
Definition: http.c:189
void yaz_log(int level, const char *fmt,...)
Writes log message.
Definition: log.c:487
Logging utility.
#define YLOG_WARN
log level: warning
Definition: log.h:46
void * nmem_malloc(NMEM n, size_t size)
allocates memory block on NMEM handle
Definition: nmem.c:145
char * nmem_strdupn(NMEM mem, const char *src, size_t n)
allocates string of certain size on NMEM handle
Definition: nmemsdup.c:33
char * nmem_strdup(NMEM mem, const char *src)
allocates string on NMEM handle (similar strdup)
Definition: nmemsdup.c:18
ODR odr_createmem(int direction)
Definition: odr.c:200
void odr_setbuf(ODR o, char *buf, int len, int can_grow)
Definition: odr.c:267
char * odr_getbuf(ODR o, int *len, int *size)
Definition: odr.c:277
void odr_destroy(ODR o)
Definition: odr.c:253
void odr_reset(ODR o)
Definition: odr.c:226
#define ODR_DECODE
Definition: odr.h:95
#define ODR_ENCODE
Definition: odr.h:96
char * odr_strdup(ODR o, const char *str)
Definition: odr_mem.c:36
int yaz_poll(struct yaz_poll_fd *fds, int num_fds, int sec, int nsec)
poll wrapper for poll or select
Definition: poll.c:161
Poll, select wrappers.
#define yaz_poll_add(var, value)
Definition: poll.h:76
yaz_poll_mask
select/poll masks .. timeout is "output" only
Definition: poll.h:41
@ yaz_poll_read
Definition: poll.h:43
@ yaz_poll_except
Definition: poll.h:45
@ yaz_poll_none
Definition: poll.h:42
@ yaz_poll_write
Definition: poll.h:44
Definition: zgdu.h:68
Z_HTTP_Request * HTTP_Request
Definition: zgdu.h:72
int which
Definition: zgdu.h:69
union Z_GDU::@132 u
Z_HTTP_Response * HTTP_Response
Definition: zgdu.h:73
char * name
Definition: zgdu.h:43
Z_HTTP_Header * next
Definition: zgdu.h:45
char * value
Definition: zgdu.h:44
Z_HTTP_Header * headers
Definition: zgdu.h:52
char * content_buf
Definition: zgdu.h:53
char * method
Definition: zgdu.h:49
int content_len
Definition: zgdu.h:54
int code
Definition: zgdu.h:58
Z_HTTP_Header * headers
Definition: zgdu.h:60
unsigned io_pending
Definition: comstack.h:63
Definition: odr.h:125
NMEM mem
Definition: odr.h:130
string buffer
Definition: wrbuf.h:43
select/poll fd info
Definition: poll.h:50
enum yaz_poll_mask input_mask
Definition: poll.h:52
int fd
Definition: poll.h:56
Definition: url.c:20
int verbose
Definition: url.c:26
yaz_cookies_t cookies
Definition: url.c:29
int timeout_ns
Definition: url.c:28
int max_redirects
Definition: url.c:24
ODR odr_out
Definition: url.c:22
int timeout_sec
Definition: url.c:27
WRBUF w_error
Definition: url.c:25
char * proxy
Definition: url.c:23
ODR odr_in
Definition: url.c:21
int cs_set_head_only(COMSTACK cs, int head_only)
Definition: tcpip.c:1602
const char * yaz_url_get_error(yaz_url_t p)
get last error from yaz_url_exec
Definition: url.c:122
void yaz_url_set_timeout(yaz_url_t p, int sec, int ns)
sets I/O timeout
Definition: url.c:78
yaz_url_t yaz_url_create(void)
creates a URL fetcher handle
Definition: url.c:32
static void extract_user_pass(NMEM nmem, const char *uri, char **uri_lean, char **http_user, char **http_pass)
Definition: url.c:84
static void log_warn(yaz_url_t p)
Definition: url.c:127
void yaz_url_set_max_redirects(yaz_url_t p, int num)
sets maximum number of redirects
Definition: url.c:68
void yaz_url_destroy(yaz_url_t p)
destroys a URL fetcher
Definition: url.c:47
void yaz_url_set_proxy(yaz_url_t p, const char *proxy)
sets proxy for URL fetcher
Definition: url.c:60
Z_HTTP_Response * yaz_url_exec(yaz_url_t p, const char *uri, const char *method, Z_HTTP_Header *user_headers, const char *buf, size_t len)
executes the actual HTTP request (including redirects, etc)
Definition: url.c:132
void yaz_url_set_verbose(yaz_url_t p, int num)
sets verbose level 0=none, >0 verbose
Definition: url.c:73
URL fetch utility.
void wrbuf_destroy(WRBUF b)
destroy WRBUF and its buffer
Definition: wrbuf.c:38
void wrbuf_rewind(WRBUF b)
empty WRBUF content (length of buffer set to 0)
Definition: wrbuf.c:47
WRBUF wrbuf_alloc(void)
construct WRBUF
Definition: wrbuf.c:25
void wrbuf_printf(WRBUF b, const char *fmt,...)
writes printf result to WRBUF
Definition: wrbuf.c:178
const char * wrbuf_cstr(WRBUF b)
returns WRBUF content as C-string
Definition: wrbuf.c:281
Header for WRBUF (growing buffer)
#define xstrdup(s)
utility macro which calls xstrdup_f
Definition: xmalloc.h:55
#define xfree(x)
utility macro which calls xfree_f
Definition: xmalloc.h:53
#define xmalloc(x)
utility macro which calls malloc_f
Definition: xmalloc.h:49
#define Z_GDU_HTTP_Response
Definition: zgdu.h:67
int z_GDU(ODR o, Z_GDU **p, int opt, const char *name)
Definition: zgdu.c:17