patch to support custom HTTP headers in download.file() and url()

classic Classic list List threaded Threaded
2 messages Options
Reply | Threaded
Open this post in threaded view
|

patch to support custom HTTP headers in download.file() and url()

Gábor Csárdi
The patch below adds support for custom HTTP headers in
download.file() and url().

My main motivation for this is performing basic http authentication.
Some web sites do not support embedding the credentials into the URI
itself, they only work if the username and password are sent in the
HTTP headers. In fact specifying the username and password in the URI
has been deprecated.(https://en.wikipedia.org/wiki/Basic_access_authentication#URL_encoding)

Unfortunately this means that download.file() and url() cannot access
these password protected URLs. This patch fixes that.

I am happy to update the patch as needed.

Details:

* This patch adds supports for custom HTTP headers in download.file() and url().
* They both get a headers = NULL argument.
* This is implemented for the internal, wininet and libcurl methods.
* For other methods headers is silently ignored.
* For non-HTTP URLs headers is silently ignored.
* The headers argument must be a named character vector without NAs, or NULL.
* If headers is not named or it contains NAs, or the names contain
NAs, an error is thrown.
* For download.file() the method is chosen in R, and we a character
vector to C for libcurl and a collapsed string constant for internal
and wininet.
* For url() the method is only chosen in C, so we pass both a string
vector and the collapsed string vector to C. This is simpler than
collapsing in C.
* It is not possible to specify headers for file(), even though it handles URLs.
* The user agent (coming from the HTTPUserAgent options), will the the
first header, for the methods that need it together with the other
headers.
* We don't check for duplicate headers, just pass to the methods as
the user specified them.
* We test all methods.
* We have run the tests on macOS, Debian Linux and Windows 2016 Server.

You can also browse the changes here:
https://github.com/gaborcsardi/r-source/pull/3/files
You can also download the diff below from
https://github.com/gaborcsardi/r-source/pull/3.diff

Best,
Gabor

diff --git a/src/include/Rconnections.h b/src/include/Rconnections.h
index a2c53f058f..32bb35e31f 100644
--- a/src/include/Rconnections.h
+++ b/src/include/Rconnections.h
@@ -36,6 +36,7 @@ typedef enum {HTTPsh, FTPsh, HTTPSsh, FTPSsh} UrlScheme;
 typedef struct urlconn {
     void *ctxt;
     UrlScheme type;
+    char *headers;
 } *Rurlconn;

 /* used in internet module */
@@ -67,7 +68,7 @@ Rconnection getConnection_no_err(int n);
 Rboolean switch_stdout(int icon, int closeOnExit);
 void init_con(Rconnection new, const char *description, int enc,
       const char * const mode);
-Rconnection R_newurl(const char *description, const char * const
mode, int type);
+Rconnection R_newurl(const char *description, const char * const
mode, SEXP headers, int type);
 Rconnection R_newsock(const char *host, int port, int server, const
char * const mode, int timeout);
 Rconnection in_R_newsock(const char *host, int port, int server,
const char *const mode, int timeout);
 Rconnection R_newunz(const char *description, const char * const mode);
diff --git a/src/include/Rmodules/Rinternet.h b/src/include/Rmodules/Rinternet.h
index 619992eeda..5f02b78514 100644
--- a/src/include/Rmodules/Rinternet.h
+++ b/src/include/Rmodules/Rinternet.h
@@ -25,10 +25,10 @@


 typedef SEXP (*R_DownloadRoutine)(SEXP args);
-typedef Rconnection (*R_NewUrlRoutine)(const char *description, const
char * const mode, int method);
+typedef Rconnection (*R_NewUrlRoutine)(const char *description, const
char * const mode, SEXP headers, int method);
 typedef Rconnection (*R_NewSockRoutine)(const char *host, int port,
int server, const char *const mode, int timeout);

-typedef void * (*R_HTTPOpenRoutine)(const char *url, const char
*headers, const int cacheOK);
+typedef void * (*R_HTTPOpenRoutine)(const char *url, const char
*agent, const char *headers, const int cacheOK);
 typedef int    (*R_HTTPReadRoutine)(void *ctx, char *dest, int len);
 typedef void   (*R_HTTPCloseRoutine)(void *ctx);

diff --git a/src/library/base/R/connections.R b/src/library/base/R/connections.R
index 7445d2327b..50c0ea0a1c 100644
--- a/src/library/base/R/connections.R
+++ b/src/library/base/R/connections.R
@@ -91,10 +91,18 @@ fifo <- function(description, open = "", blocking = FALSE,

 url <- function(description, open = "", blocking = TRUE,
                 encoding = getOption("encoding"),
-                method = getOption("url.method", "default"))
+                method = getOption("url.method", "default"),
+                headers = NULL)
 {
     method <- match.arg(method, c("default", "internal", "libcurl", "wininet"))
-    .Internal(url(description, open, blocking, encoding, method))
+    if (!is.null(headers)) {
+      if (length(names(headers)) != length(headers) ||
+          any(names(headers) == "") || anyNA(headers) || anyNA(names(headers)))
+        stop("'headers' must must have names and must not be NA")
+      headers <- paste0(names(headers), ": ", headers)
+      headers <- list(headers, paste0(headers, "\r\n", collapse = ""))
+    }
+    .Internal(url(description, open, blocking, encoding, method, headers))
 }

 gzfile <- function(description, open = "",
diff --git a/src/library/base/man/connections.Rd
b/src/library/base/man/connections.Rd
index 04c77d6cac..bce232ba86 100644
--- a/src/library/base/man/connections.Rd
+++ b/src/library/base/man/connections.Rd
@@ -46,7 +46,8 @@ file(description = "", open = "", blocking = TRUE,

 url(description, open = "", blocking = TRUE,
     encoding = getOption("encoding"),
-    method = getOption("url.method", "default"))
+    method = getOption("url.method", "default"),
+    headers = NULL)

 gzfile(description, open = "", encoding = getOption("encoding"),
        compression = 6)
@@ -98,6 +99,10 @@ isIncomplete(con)
     \code{c("default", "internal", "wininet", "libcurl")}:
 %% FIXME:  Consider  "auto", as in download.file()
     see \sQuote{Details}.}
+  \item{headers}{named character vector of HTTP headers to use in HTTP
+    requests. It is ignored for non-HTTP URLs. The \code{User-Agent}
+    header, coming from the \code{HTTPUserAgent} option (see
+    \code{\link{options}}) is used as the first header, automatically.}
   \item{compression}{integer in 0--9.  The amount of compression to be
     applied when writing, from none to maximal available.  For
     \code{xzfile} can also be negative: see the \sQuote{Compression}
diff --git a/src/library/utils/R/unix/download.file.R
b/src/library/utils/R/unix/download.file.R
index 460c4f350b..0e67b267d8 100644
--- a/src/library/utils/R/unix/download.file.R
+++ b/src/library/utils/R/unix/download.file.R
@@ -18,7 +18,8 @@

 download.file <-
     function(url, destfile, method, quiet = FALSE, mode = "w",
-             cacheOK = TRUE, extra = getOption("download.file.extra"), ...)
+             cacheOK = TRUE, extra = getOption("download.file.extra"),
+             headers = NULL, ...)
 {
     destfile # check supplied
     method <- if (missing(method))
@@ -33,14 +34,26 @@ download.file <-
  method <- if(startsWith(url, "file:")) "internal" else "libcurl"
     }

+    if (length(names(headers)) != length(headers) ||
+        any(names(headers) == "") || anyNA(headers) || anyNA(names(headers)))
+        stop("'headers' must must have names and must not be NA")
+
     switch(method,
    "internal" = {
-       status <- .External(C_download, url, destfile, quiet, mode, cacheOK)
+               if (!is.null(headers)) {
+                   headers <- paste0(names(headers), ": ", headers,
"\r\n", collapse = "")
+               }
+               status <- .External(C_download, url, destfile, quiet, mode,
+                                   cacheOK, headers)
        ## needed for Mac GUI from download.packages etc
        if(!quiet) flush.console()
    },
    "libcurl" = {
-       status <- .Internal(curlDownload(url, destfile, quiet, mode, cacheOK))
+               if (!is.null(headers)) {
+                   headers <- paste0(names(headers), ": ", headers)
+               }
+               status <- .Internal(curlDownload(url, destfile, quiet, mode,
+                                                cacheOK, headers))
        if(!quiet) flush.console()
    },
    "wget" = {
diff --git a/src/library/utils/R/windows/download.file.R
b/src/library/utils/R/windows/download.file.R
index 4a84134470..450c22304d 100644
--- a/src/library/utils/R/windows/download.file.R
+++ b/src/library/utils/R/windows/download.file.R
@@ -18,7 +18,8 @@

 download.file <-
     function(url, destfile, method, quiet = FALSE, mode = "w",
-             cacheOK = TRUE, extra = getOption("download.file.extra"), ...)
+             cacheOK = TRUE, extra = getOption("download.file.extra"),
+             headers = NULL, ...)
 {
     destfile # check supplied
     method <- if (missing(method))
@@ -38,13 +39,24 @@ download.file <-
             else "wininet"
     }

+    if (length(names(headers)) != length(headers) ||
+        any(names(headers) == "") || anyNA(headers) || anyNA(names(headers)))
+        stop("'headers' must must have names and must not be NA")
+
     switch(method,
    "internal" =, "wininet" = {
-       status <- .External(C_download, url, destfile, quiet, mode, cacheOK,
-   method == "wininet")
+               if (!is.null(headers)) {
+                   headers <- paste0(names(headers), ": ", headers,
"\r\n", collapse = "")
+               }
+               status <- .External(C_download, url, destfile, quiet,
mode, cacheOK,
+   headers, method == "wininet")
    },
    "libcurl" = {
-       status <- .Internal(curlDownload(url, destfile, quiet, mode, cacheOK))
+               if (!is.null(headers)) {
+                   headers <- paste0(names(headers), ": ", headers)
+               }
+               status <- .Internal(curlDownload(url, destfile, quiet,
mode, cacheOK,
+                                                headers))
    },
    "wget" = {
        if(length(url) != 1L || typeof(url) != "character")
diff --git a/src/library/utils/man/download.file.Rd
b/src/library/utils/man/download.file.Rd
index 1ce34e6953..6aa90719b2 100644
--- a/src/library/utils/man/download.file.Rd
+++ b/src/library/utils/man/download.file.Rd
@@ -15,7 +15,8 @@
 \usage{
 download.file(url, destfile, method, quiet = FALSE, mode = "w",
               cacheOK = TRUE,
-              extra = getOption("download.file.extra"), \dots)
+              extra = getOption("download.file.extra"),
+              headers = NULL, \dots)
 }
 \arguments{
   \item{url}{a \code{\link{character}} string (or longer vector e.g.,
@@ -48,6 +49,11 @@ download.file(url, destfile, method, quiet = FALSE,
mode = "w",
   \item{extra}{character vector of additional command-line arguments for
     the \code{"wget"} and \code{"curl"} methods.}

+  \item{headers}{named character vector of HTTP headers to use in HTTP
+    requests. It is ignored for non-HTTP URLs. The \code{User-Agent}
+    header, coming from the \code{HTTPUserAgent} option (see
+    \code{\link{options}}) is used as the first header, automatically.}
+
   \item{\dots}{allow additional arguments to be passed, unused.}
 }
 \details{
diff --git a/src/library/utils/src/init.c b/src/library/utils/src/init.c
index ebbaf1054a..785347b772 100644
--- a/src/library/utils/src/init.c
+++ b/src/library/utils/src/init.c
@@ -74,9 +74,9 @@ static const R_CallMethodDef CallEntries[] = {

 static const R_ExternalMethodDef ExtEntries[] = {
 #ifdef Win32
-    EXTDEF(download, 6),
+    EXTDEF(download, 7),
 #else
-    EXTDEF(download, 5),
+    EXTDEF(download, 6),
 #endif
     EXTDEF(unzip, 7),
     EXTDEF(Rprof, 8),
diff --git a/src/library/utils/tests/download.file.R
b/src/library/utils/tests/download.file.R
new file mode 100644
index 0000000000..1cbe2e0db1
--- /dev/null
+++ b/src/library/utils/tests/download.file.R
@@ -0,0 +1,173 @@
+
+## Tests for HTTP headers -----------------------------------------------
+
+is_online <- function() {
+  tryCatch({
+    con <- suppressWarnings(socketConnection("8.8.8.8", port = 53))
+    close(con)
+    con <- url("http://eu.httpbin.org/headers")
+    lines <- readLines(con)
+    close(con)
+    stopifnot(any(grepl("Host.*eu.httpbin.org", lines)))
+    TRUE
+  }, error = function(e) FALSE)
+}
+
+get_headers <- function(path = "anything", quiet = TRUE, ...,
+                        protocol = "http") {
+  url <- get_path(path, protocol)
+  tmp <-  tempfile()
+  on.exit(try(unlink(tmp)), add = TRUE)
+  download.file(url, tmp, quiet = quiet, ...)
+  readLines(tmp)
+}
+
+get_headers_url <- function(path = "anything", ..., protocol = "http") {
+  con <- url(get_path(path, protocol), ...)
+  on.exit(try(close(con)), add = TRUE)
+  readLines(con)
+}
+
+get_path <- function(path = "anything", protocol = "http") {
+  paste0(protocol, "://", "eu.httpbin.org/", path)
+}
+
+with_options <- function(opts, expr) {
+  old <- do.call(options, as.list(opts))
+  on.exit(options(old), add = TRUE)
+  expr
+}
+
+tests <- function() {
+  cat("- User agent is still set\n")
+  with_options(list(HTTPUserAgent = "foobar"), {
+    h <- get_headers()
+    stopifnot(any(grepl("User-Agent.*foobar", h)))
+  })
+
+  with_options(list(HTTPUserAgent = "foobar"), {
+    h <- get_headers(headers = c(foo = "bar", zzzz = "bee"))
+    stopifnot(any(grepl("User-Agent.*foobar", h)))
+    stopifnot(any(grepl("Foo.*bar", h)))
+    stopifnot(any(grepl("Zzzz.*bee", h)))
+  })
+
+  cat("- Can supply headers\n")
+  h <- get_headers(headers = c(foo = "bar", zzzz = "bee"))
+  stopifnot(any(grepl("Foo.*bar", h)))
+  stopifnot(any(grepl("Zzzz.*bee", h)))
+
+  cat("- Basic auth\n")
+  ret <- tryCatch({
+    h <- suppressWarnings(get_headers(
+      "basic-auth/Aladdin/OpenSesame",
+      headers = c(Authorization = "Basic QWxhZGRpbjpPcGVuU2VzYW1l")))
+    TRUE
+  }, error = function(e) FALSE)
+  stopifnot(any(grepl("authenticated.*true", h)))
+
+  if (getOption("download.file.method") == "libcurl") {
+    cat("- Multiple urls (libcurl only)\n")
+    urls <- get_path(c("anything", "headers"))
+    tmp1 <- tempfile()
+    tmp2 <- tempfile()
+    on.exit(unlink(c(tmp1, tmp2)), add = TRUE)
+    download.file(urls, c(tmp1, tmp2), quiet = TRUE,
+                  headers = c(foo = "bar", zzzz = "bee"))
+    h1 <- readLines(tmp1)
+    h2 <- readLines(tmp2)
+    stopifnot(any(grepl("Foo.*bar", h1)))
+    stopifnot(any(grepl("Zzzz.*bee", h1)))
+    stopifnot(any(grepl("Foo.*bar", h2)))
+    stopifnot(any(grepl("Zzzz.*bee", h2)))
+  }
+
+  if (getOption("download.file.method", "") != "internal") {
+    cat("- HTTPS\n")
+    h <- get_headers(headers = c(foo = "bar", zzzz = "bee"),
+                     protocol = "https")
+    stopifnot(any(grepl("Foo.*bar", h)))
+    stopifnot(any(grepl("Zzzz.*bee", h)))
+  }
+
+  cat("- If headers not named, then error\n")
+  ret <- tryCatch(
+    download.file(get_path(), headers = c("foo", "xxx" = "bar")),
+    error = function(err) TRUE)
+  stopifnot(isTRUE(ret))
+  ret <- tryCatch(
+    download.file(get_path(), headers = "foobar"),
+    error = function(err) TRUE)
+  stopifnot(isTRUE(ret))
+
+  cat("- If headers are NA, then error\n")
+  ret <- tryCatch(
+    download.file(get_path(), headers = c("foo" = NA, "xxx" = "bar")),
+    error = function(err) TRUE)
+  stopifnot(isTRUE(ret))
+  ret <- tryCatch(
+    download.file(
+      get_path(), quiet = TRUE,
+      headers = structure(c("foo", "bar", names = c("foo", NA)))),
+    error = function(err) TRUE)
+  stopifnot(isTRUE(ret))
+
+  cat("- user agent is set in url()\n")
+  with_options(list(HTTPUserAgent = "foobar"), {
+    h <- get_headers_url()
+    stopifnot(any(grepl("User-Agent.*foobar", h)))
+  })
+
+  cat("- file() still works with URLs\n")
+  con <- file(get_path("anything", "http"))
+  on.exit(close(con), add = TRUE)
+  h <- readLines(con)
+  stopifnot(any(grepl("Host.*eu.httpbin.org", h)))
+
+  cat("- If headers not named, then url() errors\n")
+  ret <- tryCatch(
+    url(get_path(), headers = c("foo", "xxx" = "bar")),
+    error = function(err) TRUE)
+  stopifnot(isTRUE(ret))
+
+  cat("- If headers are NA, then url() errors\n")
+  ret <- tryCatch(
+    url(get_path(), headers = c("foo" = "bar", "xxx" = NA)),
+    error = function(err) TRUE)
+  stopifnot(isTRUE(ret))
+  ret <- tryCatch(
+    url(get_path(),
+        headers = structure(c("1", "2"), names = c("foo", NA))),
+    error = function(err) TRUE)
+  stopifnot(isTRUE(ret))
+
+  cat("- Can supply headers in url()\n")
+  h <- get_headers_url(headers = c(foo = "bar", zzzz = "bee"))
+  stopifnot(any(grepl("Foo.*bar", h)))
+  stopifnot(any(grepl("Zzzz.*bee", h)))
+
+  if (getOption("download.file.method", "") != "internal") {
+    cat("- HTTPS with url()\n")
+    h <- get_headers_url(headers = c(foo = "bar", zzzz = "bee"),
+                         protocol = "https")
+    stopifnot(any(grepl("Foo.*bar", h)))
+    stopifnot(any(grepl("Zzzz.*bee", h)))
+  }
+}
+
+main <- function() {
+  cat("internal method\n")
+  with_options(c(download.file.method = "internal"), tests())
+
+  if (.Platform$OS.type == "windows")  {
+    cat("\nwininet method\n")
+    with_options(c(download.file.method = "wininet"), tests())
+  }
+
+  if (isTRUE(capabilities()[["libcurl"]])) {
+    cat("\nlibcurl method\n")
+    with_options(c(download.file.method = "libcurl"), tests())
+  }
+}
+
+if (is_online()) main()
diff --git a/src/main/connections.c b/src/main/connections.c
index c4cac3c92a..e534cfbaaf 100644
--- a/src/main/connections.c
+++ b/src/main/connections.c
@@ -5236,15 +5236,16 @@ SEXP attribute_hidden do_sumconnection(SEXP
call, SEXP op, SEXP args, SEXP env)

 // in internet module: 'type' is unused
 extern Rconnection
-R_newCurlUrl(const char *description, const char * const mode, int type);
+R_newCurlUrl(const char *description, const char * const mode, SEXP
headers, int type);


-/* op = 0: .Internal( url(description, open, blocking, encoding, method))
+/* op = 0: .Internal( url(description, open, blocking, encoding,
method, headers))
    op = 1: .Internal(file(description, open, blocking, encoding, method, raw))
 */
 SEXP attribute_hidden do_url(SEXP call, SEXP op, SEXP args, SEXP env)
 {
-    SEXP scmd, sopen, ans, class, enc;
+    SEXP scmd, sopen, ans, class, enc, headers = R_NilValue,
+ headers_flat = R_NilValue;
     char *class2 = "url";
     const char *url, *open;
     int ncon, block, raw = 0, defmeth,
@@ -5333,6 +5334,15 @@ SEXP attribute_hidden do_url(SEXP call, SEXP
op, SEXP args, SEXP env)
     error(_("invalid '%s' argument"), "raw");
     }

+    // --------- headers, for url() only
+    if(PRIMVAL(op) == 0) {
+ SEXP lheaders = CAD4R(CDR(args));
+ if (!isNull(lheaders)) {
+    headers = VECTOR_ELT(lheaders, 0);
+    headers_flat = VECTOR_ELT(lheaders, 1);
+ }
+    }
+
     if(!meth) {
  if (strncmp(url, "ftps://", 7) == 0) {
 #ifdef HAVE_LIBCURL
@@ -5369,12 +5379,12 @@ SEXP attribute_hidden do_url(SEXP call, SEXP
op, SEXP args, SEXP env)
     } else if (inet) {
  if(meth) {
 # ifdef HAVE_LIBCURL
-    con = R_newCurlUrl(url, strlen(open) ? open : "r", 0);
+    con = R_newCurlUrl(url, strlen(open) ? open : "r", headers, 0);
 # else
     error("url(method = \"libcurl\") is not supported on this platform");
 # endif
  } else {
-    con = R_newurl(url, strlen(open) ? open : "r", winmeth);
+    con = R_newurl(url, strlen(open) ? open : "r", headers_flat, winmeth);
     ((Rurlconn)con->private)->type = type;
  }
     } else {
diff --git a/src/main/internet.c b/src/main/internet.c
index 10dfa2b30a..801d9ed1cd 100644
--- a/src/main/internet.c
+++ b/src/main/internet.c
@@ -90,11 +90,11 @@ SEXP Rdownload(SEXP args)
 }

 Rconnection attribute_hidden
-R_newurl(const char *description, const char * const mode, int type)
+R_newurl(const char *description, const char * const mode, SEXP
headers, int type)
 {
     if(!initialized) internet_Init();
     if(initialized > 0)
- return (*ptr->newurl)(description, mode, type);
+ return (*ptr->newurl)(description, mode, headers, type);
     else {
  error(_("internet routines cannot be loaded"));
  return (Rconnection)0;
@@ -118,7 +118,7 @@ void *R_HTTPOpen(const char *url)
 {
     if(!initialized) internet_Init();
     if(initialized > 0)
- return (*ptr->HTTPOpen)(url, NULL, 0);
+ return (*ptr->HTTPOpen)(url, NULL, NULL, 0);
     else {
  error(_("internet routines cannot be loaded"));
  return NULL;
@@ -340,11 +340,11 @@ SEXP attribute_hidden do_curlDownload(SEXP call,
SEXP op, SEXP args, SEXP rho)
 }

 Rconnection attribute_hidden
-R_newCurlUrl(const char *description, const char * const mode, int type)
+R_newCurlUrl(const char *description, const char * const mode, SEXP
headers, int type)
 {
     if(!initialized) internet_Init();
     if(initialized > 0)
- return (*ptr->newcurlurl)(description, mode, type);
+ return (*ptr->newcurlurl)(description, mode, headers, type);
     else {
  error(_("internet routines cannot be loaded"));
  return (Rconnection)0;
diff --git a/src/main/names.c b/src/main/names.c
index ed21798a85..e03fdd4588 100644
--- a/src/main/names.c
+++ b/src/main/names.c
@@ -861,7 +861,7 @@ FUNTAB R_FunTab[] =
 {"close", do_close, 0,      111,     2,      {PP_FUNCALL, PREC_FN, 0}},
 {"flush", do_flush, 0,      111,     1,      {PP_FUNCALL, PREC_FN, 0}},
 {"file", do_url, 1,      11,     6,      {PP_FUNCALL, PREC_FN, 0}},
-{"url", do_url, 0,      11,     5,      {PP_FUNCALL, PREC_FN, 0}},
+{"url", do_url, 0,      11,     6,      {PP_FUNCALL, PREC_FN, 0}},
 {"pipe", do_pipe, 0,      11,     3,      {PP_FUNCALL, PREC_FN, 0}},
 {"fifo", do_fifo, 0,      11,     4,      {PP_FUNCALL, PREC_FN, 0}},
 {"gzfile", do_gzfile, 0,      11,     4,      {PP_FUNCALL, PREC_FN, 0}},
@@ -983,7 +983,7 @@ FUNTAB R_FunTab[] =
 {"eSoftVersion",do_eSoftVersion, 0, 11, 0, {PP_FUNCALL, PREC_FN, 0}},
 {"curlVersion", do_curlVersion, 0, 11, 0, {PP_FUNCALL, PREC_FN, 0}},
 {"curlGetHeaders",do_curlGetHeaders,0, 11, 3, {PP_FUNCALL, PREC_FN, 0}},
-{"curlDownload",do_curlDownload, 0, 11, 5, {PP_FUNCALL, PREC_FN, 0}},
+{"curlDownload",do_curlDownload, 0, 11, 6, {PP_FUNCALL, PREC_FN, 0}},

 {NULL, NULL, 0, 0, 0, {PP_INVALID, PREC_FN, 0}},
 };
diff --git a/src/modules/internet/internet.c b/src/modules/internet/internet.c
index 0a4305348f..8f6c75931c 100644
--- a/src/modules/internet/internet.c
+++ b/src/modules/internet/internet.c
@@ -32,7 +32,7 @@
 #include <errno.h>
 #include <R_ext/Print.h>

-static void *in_R_HTTPOpen(const char *url, const char *headers,
const int cacheOK);
+static void *in_R_HTTPOpen(const char *url, const char *agent, const
char *headers, const int cacheOK);
 static int   in_R_HTTPRead(void *ctx, char *dest, int len);
 static void  in_R_HTTPClose(void *ctx);

@@ -44,17 +44,17 @@ SEXP in_do_curlVersion(SEXP call, SEXP op, SEXP
args, SEXP rho);
 SEXP in_do_curlGetHeaders(SEXP call, SEXP op, SEXP args, SEXP rho);
 SEXP in_do_curlDownload(SEXP call, SEXP op, SEXP args, SEXP rho);
 Rconnection
-in_newCurlUrl(const char *description, const char * const mode, int type);
+in_newCurlUrl(const char *description, const char * const mode, SEXP
headers, int type);

 #ifdef Win32
-static void *in_R_HTTPOpen2(const char *url, const char *headers,
const int cacheOK);
+static void *in_R_HTTPOpen2(const char *url, const char *agent, const
char *headers, const int cacheOK);
 static int   in_R_HTTPRead2(void *ctx, char *dest, int len);
 static void  in_R_HTTPClose2(void *ctx);
 static void *in_R_FTPOpen2(const char *url);

-#define Ri_HTTPOpen(url, headers, cacheOK) \
-    (meth ? in_R_HTTPOpen2(url, headers, cacheOK) : \
- in_R_HTTPOpen(url, headers, cacheOK));
+#define Ri_HTTPOpen(url, agent, headers, cacheOK)   \
+    (meth ? in_R_HTTPOpen2(url, agent, headers, cacheOK) : \
+       in_R_HTTPOpen(url, agent, headers, cacheOK));

 #define Ri_HTTPRead(ctx, dest, len) \
     (meth ? in_R_HTTPRead2(ctx, dest, len) : in_R_HTTPRead(ctx, dest, len))
@@ -115,19 +115,20 @@ static Rboolean url_open(Rconnection con)
 #endif
     case HTTPsh:
     {
- SEXP sheaders, agentFun;
- const char *headers;
+ SEXP sagent, agentFun;
+ const char *agent;
  SEXP s_makeUserAgent = install("makeUserAgent");
  agentFun = PROTECT(lang1(s_makeUserAgent)); // defaults to ,TRUE
  SEXP utilsNS = PROTECT(R_FindNamespace(mkString("utils")));
- sheaders = eval(agentFun, utilsNS);
+ struct urlconn *uc = con->private;
+ sagent = eval(agentFun, utilsNS);
  UNPROTECT(1); /* utilsNS */
- PROTECT(sheaders);
- if(TYPEOF(sheaders) == NILSXP)
-    headers = NULL;
+ PROTECT(sagent);
+ if(TYPEOF(sagent) == NILSXP)
+    agent = NULL;
  else
-    headers = CHAR(STRING_ELT(sheaders, 0));
- ctxt = in_R_HTTPOpen(url, headers, 0);
+    agent = CHAR(STRING_ELT(sagent, 0));
+ ctxt = in_R_HTTPOpen(url, agent, uc->headers, 0);
  UNPROTECT(2);
  if(ctxt == NULL) {
   /* if we call error() we get a connection leak*/
@@ -167,13 +168,15 @@ static Rboolean url_open(Rconnection con)
 static void url_close(Rconnection con)
 {
     UrlScheme type = ((Rurlconn)(con->private))->type;
+    struct urlconn *uc = con->private;
     switch(type) {
     case HTTPsh:
     case HTTPSsh:
- in_R_HTTPClose(((Rurlconn)(con->private))->ctxt);
+ if (uc && uc->headers) free(uc->headers);
+ in_R_HTTPClose(uc->ctxt);
  break;
     case FTPsh:
- in_R_FTPClose(((Rurlconn)(con->private))->ctxt);
+ in_R_FTPClose(uc->ctxt);
  break;
     default:
  break;
@@ -239,16 +242,17 @@ static Rboolean url_open2(Rconnection con)
     case HTTPSsh:
     case HTTPsh:
     {
- SEXP sheaders, agentFun;
- const char *headers;
+ SEXP sagent, agentFun;
+ const char *agent;
  SEXP s_makeUserAgent = install("makeUserAgent");
+ struct urlconn * uc = con->private;
  agentFun = PROTECT(lang2(s_makeUserAgent, ScalarLogical(0)));
- sheaders = PROTECT(eval(agentFun, R_FindNamespace(mkString("utils"))));
- if(TYPEOF(sheaders) == NILSXP)
-    headers = NULL;
+ sagent = PROTECT(eval(agentFun, R_FindNamespace(mkString("utils"))));
+ if(TYPEOF(sagent) == NILSXP)
+    agent = NULL;
  else
-    headers = CHAR(STRING_ELT(sheaders, 0));
- ctxt = in_R_HTTPOpen2(url, headers, 0);
+    agent = CHAR(STRING_ELT(sagent, 0));
+ ctxt = in_R_HTTPOpen2(url, agent, uc->headers, 0);
  UNPROTECT(2);
  if(ctxt == NULL) {
   /* if we call error() we get a connection leak*/
@@ -340,10 +344,9 @@ static size_t url_read2(void *ptr, size_t size,
size_t nitems,
 #endif

 static Rconnection
-in_R_newurl(const char *description, const char * const mode, int type)
+in_R_newurl(const char *description, const char * const mode, SEXP
headers, int type)
 {
     Rconnection new;
-
     new = (Rconnection) malloc(sizeof(struct Rconn));
     if(!new) error(_("allocation of url connection failed"));
     new->class = (char *) malloc(strlen("url-wininet") + 1);
@@ -377,12 +380,21 @@ in_R_newurl(const char *description, const char
* const mode, int type)
  strcpy(new->class, "url");
     }
     new->fgetc = &dummy_fgetc;
-    new->private = (void *) malloc(sizeof(struct urlconn));
+    struct urlconn *uc = new->private = (void *) malloc(sizeof(struct
urlconn));
     if(!new->private) {
  free(new->description); free(new->class); free(new);
  error(_("allocation of url connection failed"));
  /* for Solaris 12.5 */ new = NULL;
     }
+    uc->headers = NULL;
+    if(!isNull(headers)) {
+ uc->headers = strdup(CHAR(STRING_ELT(headers, 0)));
+ if(!uc->headers) {
+    free(new->description); free(new->class); free(new->private); free(new);
+    error(_("allocation of url connection failed"));
+    /* for Solaris 12.5 */ new = NULL;
+ }
+    }

     IDquiet = TRUE;
     return new;
@@ -443,7 +455,7 @@ static void doneprogressbar(void *data)
 #define IBUFSIZE 4096
 static SEXP in_do_download(SEXP args)
 {
-    SEXP scmd, sfile, smode;
+    SEXP scmd, sfile, smode, sheaders;
     const char *url, *file, *mode;
     int quiet, status = 0, cacheOK;
 #ifdef Win32
@@ -470,10 +482,13 @@ static SEXP in_do_download(SEXP args)
     if(!isString(smode) || length(smode) != 1)
  error(_("invalid '%s' argument"), "mode");
     mode = CHAR(STRING_ELT(smode, 0));
-    cacheOK = asLogical(CAR(args));
+    cacheOK = asLogical(CAR(args)); args = CDR(args);
     if(cacheOK == NA_LOGICAL)
  error(_("invalid '%s' argument"), "cacheOK");
     Rboolean file_URL = (strncmp(url, "file://", 7) == 0);
+    sheaders = CAR(args);
+    if(TYPEOF(sheaders) != NILSXP && !isString(sheaders))
+        error(_("invalid '%s' argument"), "headers");
 #ifdef Win32
     int meth = asLogical(CADR(args));
     if(meth == NA_LOGICAL)
@@ -542,7 +557,7 @@ static SEXP in_do_download(SEXP args)

  R_Busy(1);
  if(!quiet) REprintf(_("trying URL '%s'\n"), url);
- SEXP agentFun, sheaders;
+ SEXP agentFun, sagent;
 #ifdef Win32
  R_FlushConsole();
  if(meth)
@@ -553,12 +568,15 @@ static SEXP in_do_download(SEXP args)
  agentFun = PROTECT(lang1(install("makeUserAgent")));
 #endif
  SEXP utilsNS = PROTECT(R_FindNamespace(mkString("utils")));
- sheaders = eval(agentFun, utilsNS);
+ sagent = eval(agentFun, utilsNS);
  UNPROTECT(1); /* utilsNS */
- PROTECT(sheaders);
- const char *headers = (TYPEOF(sheaders) == NILSXP) ?
+ PROTECT(sagent);
+ const char *cagent = (TYPEOF(sagent) == NILSXP) ?
+    NULL : CHAR(STRING_ELT(sagent, 0));
+ /* TODO: flatten headers */
+ const char *cheaders = (TYPEOF(sheaders) == NILSXP) ?
     NULL : CHAR(STRING_ELT(sheaders, 0));
- ctxt = Ri_HTTPOpen(url, headers, cacheOK);
+ ctxt = Ri_HTTPOpen(url, cagent, cheaders, cacheOK);
  UNPROTECT(2);
  if(ctxt == NULL) status = 1;
  else {
@@ -766,18 +784,31 @@ static SEXP in_do_download(SEXP args)
 }


-void *in_R_HTTPOpen(const char *url, const char *headers, const int cacheOK)
+void *in_R_HTTPOpen(const char *url, const char *agent, const char
*headers, const int cacheOK)
 {
     inetconn *con;
     void *ctxt;
     int timeout = asInteger(GetOption1(install("timeout")));
     DLsize_t len = -1;
     char *type = NULL;
+    char *fullheaders = NULL;

     if(timeout == NA_INTEGER || timeout <= 0) timeout = 60;

     RxmlNanoHTTPTimeout(timeout);
-    ctxt = RxmlNanoHTTPOpen(url, NULL, headers, cacheOK);
+
+    if (agent || headers) {
+ fullheaders = malloc((agent ? strlen(agent) : 0) +
+     (headers ? strlen(headers) : 0) + 1);
+ if(!fullheaders) error(_("could not allocate memory for http headers"));
+ fullheaders[0] = '\0';
+ if (agent) strcat(fullheaders, agent);
+ if (headers) strcat(fullheaders, headers);
+    }
+
+    ctxt = RxmlNanoHTTPOpen(url, NULL, fullheaders, cacheOK);
+    if (fullheaders) free(fullheaders);
+
     if(ctxt != NULL) {
  int rc = RxmlNanoHTTPReturnCode(ctxt);
  if(rc != 200) {
@@ -885,7 +916,7 @@ typedef struct wictxt {
     HINTERNET session;
 } wIctxt, *WIctxt;

-static void *in_R_HTTPOpen2(const char *url, const char *headers,
+static void *in_R_HTTPOpen2(const char *url, const char *agent, const
char *headers,
     const int cacheOK)
 {
     WIctxt  wictxt;
@@ -896,7 +927,7 @@ static void *in_R_HTTPOpen2(const char *url, const
char *headers,
     wictxt->length = -1;
     wictxt->type = NULL;
     wictxt->hand =
- InternetOpen(headers, INTERNET_OPEN_TYPE_PRECONFIG, NULL, NULL, 0);
+ InternetOpen(agent, INTERNET_OPEN_TYPE_PRECONFIG, NULL, NULL, 0);
     if(!wictxt->hand) {
  free(wictxt);
  /* error("cannot open Internet connection"); */
@@ -906,7 +937,7 @@ static void *in_R_HTTPOpen2(const char *url, const
char *headers,
     // use keep-alive semantics, do not use local WinINet cache.
     DWORD flags = INTERNET_FLAG_KEEP_CONNECTION | INTERNET_FLAG_NO_CACHE_WRITE;
     if(!cacheOK) flags |= INTERNET_FLAG_PRAGMA_NOCACHE;
-    wictxt->session = InternetOpenUrl(wictxt->hand, url, NULL, 0, flags, 0);
+    wictxt->session = InternetOpenUrl(wictxt->hand, url, headers,
headers ? -1 : 0, flags, 0);
     if(!wictxt->session) {
  DWORD err1 = GetLastError(), err2, blen = 101;
  InternetCloseHandle(wictxt->hand);
diff --git a/src/modules/internet/libcurl.c b/src/modules/internet/libcurl.c
index 669c7240ef..6bf01ef175 100644
--- a/src/modules/internet/libcurl.c
+++ b/src/modules/internet/libcurl.c
@@ -222,7 +222,6 @@ static int curlMultiCheckerrs(CURLM *mhnd)
     }
     return retval;
 }
-
 static void curlCommon(CURL *hnd, int redirect, int verify)
 {
     const char *capath = getenv("CURL_CA_BUNDLE");
@@ -469,10 +468,10 @@ in_do_curlDownload(SEXP call, SEXP op, SEXP
args, SEXP rho)
     error(_("download.file(method = \"libcurl\") is not supported on
this platform"));
     return R_NilValue;
 #else
-    SEXP scmd, sfile, smode;
+    SEXP scmd, sfile, smode, sheaders;
     const char *url, *file, *mode;
     int quiet, cacheOK;
-    struct curl_slist *slist1 = NULL;
+    struct curl_slist *headers = NULL;

     scmd = CAR(args); args = CDR(args);
     if (!isString(scmd) || length(scmd) < 1)
@@ -490,9 +489,23 @@ in_do_curlDownload(SEXP call, SEXP op, SEXP args, SEXP rho)
     if (!isString(smode) || length(smode) != 1)
  error(_("invalid '%s' argument"), "mode");
     mode = CHAR(STRING_ELT(smode, 0));
-    cacheOK = asLogical(CAR(args));
+    cacheOK = asLogical(CAR(args)); args = CDR(args);
     if (cacheOK == NA_LOGICAL)
  error(_("invalid '%s' argument"), "cacheOK");
+    sheaders = CAR(args);
+    if(TYPEOF(sheaders) != NILSXP && !isString(sheaders))
+        error(_("invalid '%s' argument"), "headers");
+    if(TYPEOF(sheaders) != NILSXP) {
+ for (int i = 0; i < LENGTH(sheaders); i++) {
+    struct curl_slist *tmp =
+ curl_slist_append(headers, CHAR(STRING_ELT(sheaders, i)));
+    if (!tmp) {
+ curl_slist_free_all(headers);
+ error(_("out of memory"));
+    }
+    headers = tmp;
+ }
+    }

     /* This comes mainly from curl --libcurl on the call used by
        download.file(method = "curl").
@@ -502,7 +515,13 @@ in_do_curlDownload(SEXP call, SEXP op, SEXP args, SEXP rho)
     if (!cacheOK) {
  /* This _is_ the right way to do this: see §14.9 of
    http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html */
- slist1 = curl_slist_append(slist1, "Pragma: no-cache");
+ struct curl_slist *tmp =
+    curl_slist_append(headers, "Pragma: no-cache");
+ if  (!tmp) {
+    curl_slist_free_all(headers);
+    error(_("out of memory"));
+ }
+ headers = tmp;
     }

     CURLM *mhnd = curl_multi_init();
@@ -521,8 +540,7 @@ in_do_curlDownload(SEXP call, SEXP op, SEXP args, SEXP rho)
 #if (LIBCURL_VERSION_MINOR >= 25)
  curl_easy_setopt(hnd[i], CURLOPT_TCP_KEEPALIVE, 1L);
 #endif
- if (!cacheOK)
-    curl_easy_setopt(hnd[i], CURLOPT_HTTPHEADER, slist1);
+ curl_easy_setopt(hnd[i], CURLOPT_HTTPHEADER, headers);

  /* check that destfile can be written */
  file = translateChar(STRING_ELT(sfile, i));
@@ -660,7 +678,7 @@ in_do_curlDownload(SEXP call, SEXP op, SEXP args, SEXP rho)
     if(nurls == 1)
  curl_easy_getinfo(hnd[0], CURLINFO_RESPONSE_CODE, &status);
     curl_multi_cleanup(mhnd);
-    if (!cacheOK) curl_slist_free_all(slist1);
+    curl_slist_free_all(headers);

     if(nurls > 1) {
  if (n_err == nurls) error(_("cannot download any files"));
@@ -703,6 +721,7 @@ typedef struct Curlconn {
     Rboolean available; // to be read out
     int sr; // 'still running' count
     CURLM *mh; CURL *hnd;
+    struct curl_slist *headers;
 } *RCurlconn;

 static size_t rcvData(void *ptr, size_t size, size_t nitems, void *ctx)
@@ -771,6 +790,7 @@ static void Curl_close(Rconnection con)
 {
     RCurlconn ctxt = (RCurlconn)(con->private);

+    curl_slist_free_all(ctxt->headers);
     curl_multi_remove_handle(ctxt->mh, ctxt->hnd);
     curl_easy_cleanup(ctxt->hnd);
     curl_multi_cleanup(ctxt->mh);
@@ -830,6 +850,9 @@ static Rboolean Curl_open(Rconnection con)
     curl_easy_setopt(ctxt->hnd, CURLOPT_TCP_KEEPALIVE, 1L);
 #endif

+    if (ctxt->headers) {
+ curl_easy_setopt(ctxt->hnd, CURLOPT_HTTPHEADER, ctxt->headers);
+    }
     curl_easy_setopt(ctxt->hnd, CURLOPT_WRITEFUNCTION, rcvData);
     curl_easy_setopt(ctxt->hnd, CURLOPT_WRITEDATA, ctxt);
     ctxt->mh = curl_multi_init();
@@ -868,7 +891,8 @@ static int Curl_fgetc_internal(Rconnection con)

 // 'type' is unused.
 Rconnection
-in_newCurlUrl(const char *description, const char * const mode, int type)
+in_newCurlUrl(const char *description, const char * const mode,
+      SEXP headers, int type)
 {
 #ifdef HAVE_LIBCURL
     Rconnection new = (Rconnection) malloc(sizeof(struct Rconn));
@@ -909,6 +933,18 @@ in_newCurlUrl(const char *description, const char
* const mode, int type)
  error(_("allocation of url connection failed"));
  /* for Solaris 12.5 */ new = NULL;
     }
+    ctxt->headers = NULL;
+    for (int i = 0; i < LENGTH(headers); i++) {
+ struct curl_slist *tmp =
+    curl_slist_append(ctxt->headers, CHAR(STRING_ELT(headers, i)));
+ if (!tmp) {
+    free(new->description); free(new->class); free(new->private);
+    free(new); curl_slist_free_all(ctxt->headers);
+    error(_("allocation of url connection failed"));
+    /* for Solaris 12.5 */ new = NULL;
+ }
+ ctxt->headers = tmp;
+    }
     return new;
 #else
     error(_("url(method = \"libcurl\") is not supported on this platform"));

______________________________________________
[hidden email] mailing list
https://stat.ethz.ch/mailman/listinfo/r-devel

Reply | Threaded
Open this post in threaded view
|

Inconsistent returned values of normalizePath(NA_character_) on Windows and *nix

Yihui Xie-2
Hi,

I just noticed normalizePath(NA_character_) returns NA_character_ on
*nix but "%HOME%\\NA" on Windows (with a warning by default), where
%HOME% denotes the HOME folder like "C:\\Users\\John". I'm not sure if
this is a bug or by design.

Regards,
Yihui
--
https://yihui.name

______________________________________________
[hidden email] mailing list
https://stat.ethz.ch/mailman/listinfo/r-devel