From 57109080e68e237f3a509eee11c45f07a9a878fa Mon Sep 17 00:00:00 2001 From: AlexCD Date: Fri, 23 Apr 2021 23:42:34 +0300 Subject: [PATCH 1/2] respect original request timeout --- .../htmlunit/HttpWebConnection.java | 31 +++++++++++ .../gargoylesoftware/htmlunit/WebClient.java | 41 +++++++++++++- .../htmlunit/WebClientOptions.java | 4 +- .../htmlunit/WebClient4Test.java | 55 +++++++++++++++++++ 4 files changed, 126 insertions(+), 5 deletions(-) diff --git a/src/main/java/com/gargoylesoftware/htmlunit/HttpWebConnection.java b/src/main/java/com/gargoylesoftware/htmlunit/HttpWebConnection.java index 54be6f3ef50..5f9646d34e6 100644 --- a/src/main/java/com/gargoylesoftware/htmlunit/HttpWebConnection.java +++ b/src/main/java/com/gargoylesoftware/htmlunit/HttpWebConnection.java @@ -24,17 +24,23 @@ import java.io.InputStream; import java.io.OutputStream; import java.net.InetAddress; +import java.net.SocketException; +import java.net.SocketTimeoutException; import java.net.URI; import java.net.URISyntaxException; import java.net.URL; import java.nio.charset.Charset; import java.nio.file.Files; +import java.time.Instant; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.Timer; +import java.util.TimerTask; import java.util.WeakHashMap; import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicBoolean; import javax.net.ssl.HostnameVerifier; import javax.net.ssl.SSLContext; @@ -154,6 +160,8 @@ public class HttpWebConnection implements WebConnection { /** Maintains a separate {@link HttpClientContext} object per HttpWebConnection and thread. */ private final Map httpClientContextByThread_ = new WeakHashMap<>(); + private final Timer timeoutGuard = new Timer(true); + /** * Creates a new HTTP web connection instance. * @param webClient the WebClient that is using this connection @@ -164,6 +172,22 @@ public HttpWebConnection(final WebClient webClient) { usedOptions_ = new WebClientOptions(); } + private AtomicBoolean startTimeoutGuard(final WebRequest webRequest, final HttpUriRequest httpMethod) { + AtomicBoolean aborted = new AtomicBoolean(false); + int timeoutMs = getTimeout(webRequest); + TimerTask task = new TimerTask() { + @Override + public void run() { + if (httpMethod != null && !httpMethod.isAborted()) { + httpMethod.abort(); + aborted.set(true); + } + } + }; + timeoutGuard.schedule(task, timeoutMs); + return aborted; + } + /** * {@inheritDoc} */ @@ -184,12 +208,19 @@ public WebResponse getResponse(final WebRequest webRequest) throws IOException { final URL url = webRequest.getUrl(); final HttpHost httpHost = new HttpHost(url.getHost(), url.getPort(), url.getProtocol()); final long startTime = System.currentTimeMillis(); + final AtomicBoolean timedOut = startTimeoutGuard(webRequest, httpMethod); final HttpContext httpContext = getHttpContext(); HttpResponse httpResponse = null; try { try (CloseableHttpClient closeableHttpClient = builder.build()) { httpResponse = closeableHttpClient.execute(httpHost, httpMethod, httpContext); + } catch (SocketException ex) { + if (timedOut.get()) { + //translate socket closed exception to timeout exception + throw new SocketTimeoutException(); + } + throw ex; } } catch (final SSLPeerUnverifiedException s) { diff --git a/src/main/java/com/gargoylesoftware/htmlunit/WebClient.java b/src/main/java/com/gargoylesoftware/htmlunit/WebClient.java index 707ba8d74cd..b4ba5ece503 100644 --- a/src/main/java/com/gargoylesoftware/htmlunit/WebClient.java +++ b/src/main/java/com/gargoylesoftware/htmlunit/WebClient.java @@ -33,11 +33,15 @@ import java.io.Serializable; import java.lang.ref.WeakReference; import java.net.MalformedURLException; +import java.net.SocketTimeoutException; import java.net.URL; import java.net.URLConnection; import java.net.URLDecoder; +import java.nio.channels.InterruptedByTimeoutException; import java.nio.charset.Charset; import java.nio.file.Files; +import java.time.Duration; +import java.time.Instant; import java.util.ArrayList; import java.util.Collections; import java.util.ConcurrentModificationException; @@ -1503,6 +1507,31 @@ public WebResponse loadWebResponse(final WebRequest webRequest) throws IOExcepti } } + /** + * Returns the hard timeout to use for the whole request. + * @param webRequest the request might have his own timeout + * @return the WebClient's timeout + */ + protected int getTimeout(final WebRequest webRequest) { + if (webRequest == null || webRequest.getTimeout() < 0) { + return getOptions().getTimeout(); + } + return webRequest.getTimeout(); + } + + private void setTimeout(WebRequest request, int remainingTimeoutMs, Instant start) throws IOException { + int timeout = (int) (remainingTimeoutMs - Duration.between(start, Instant.now()).toMillis()); + if (timeout <= 0) { + throw new SocketTimeoutException(); + } + request.setTimeout(timeout); + } + + private WebResponse loadWebResponseFromWebConnection(final WebRequest webRequest, + final int allowedRedirects) throws IOException { + return loadWebResponseFromWebConnection(webRequest, allowedRedirects, null, 0); + } + /** * Loads a {@link WebResponse} from the server through the WebConnection. * @param webRequest the request @@ -1511,8 +1540,13 @@ public WebResponse loadWebResponse(final WebRequest webRequest) throws IOExcepti * @return the resultant {@link WebResponse} */ private WebResponse loadWebResponseFromWebConnection(final WebRequest webRequest, - final int allowedRedirects) throws IOException { + final int allowedRedirects, Instant start, int timeoutMs) throws IOException { + if(start == null) { + //set timeout using first request (used in next redirect requests) + timeoutMs = getTimeout(webRequest); + start = Instant.now(); + } URL url = webRequest.getUrl(); final HttpMethod method = webRequest.getHttpMethod(); final List parameters = webRequest.getRequestParameters(); @@ -1577,6 +1611,7 @@ else if (!proxyConfig.shouldBypassProxy(webRequest.getUrl().getHost())) { final WebResponse fromCache = getCache().getCachedResponse(webRequest); final WebResponse webResponse; if (fromCache == null) { + setTimeout(webRequest, timeoutMs, start); webResponse = getWebConnection().getResponse(webRequest); } else { @@ -1638,7 +1673,7 @@ && getOptions().isRedirectEnabled()) { for (final Map.Entry entry : webRequest.getAdditionalHeaders().entrySet()) { wrs.setAdditionalHeader(entry.getKey(), entry.getValue()); } - return loadWebResponseFromWebConnection(wrs, allowedRedirects - 1); + return loadWebResponseFromWebConnection(wrs, allowedRedirects - 1, start, timeoutMs); } else if (status == HttpStatus.SC_TEMPORARY_REDIRECT || status == 308) { @@ -1663,7 +1698,7 @@ else if (status == HttpStatus.SC_TEMPORARY_REDIRECT wrs.setAdditionalHeader(entry.getKey(), entry.getValue()); } - return loadWebResponseFromWebConnection(wrs, allowedRedirects - 1); + return loadWebResponseFromWebConnection(wrs, allowedRedirects - 1, start, timeoutMs); } } diff --git a/src/main/java/com/gargoylesoftware/htmlunit/WebClientOptions.java b/src/main/java/com/gargoylesoftware/htmlunit/WebClientOptions.java index 85cf8e8f24a..a8c8b0b936f 100644 --- a/src/main/java/com/gargoylesoftware/htmlunit/WebClientOptions.java +++ b/src/main/java/com/gargoylesoftware/htmlunit/WebClientOptions.java @@ -453,8 +453,8 @@ public int getTimeout() { /** *

Sets the timeout of the {@link WebConnection}. Set to zero for an infinite wait.

* - *

Note: The timeout is used twice. The first is for making the socket connection, the second is - * for data retrieval. If the time is critical you must allow for twice the time specified here.

+ *

Note: This is a hard timeout. Total time spent for making the socket connection, data retrieval + * or following redirects (if enabled) is at most the time specified here.

* * @param timeout the value of the timeout in milliseconds */ diff --git a/src/test/java/com/gargoylesoftware/htmlunit/WebClient4Test.java b/src/test/java/com/gargoylesoftware/htmlunit/WebClient4Test.java index ef0c14a5d18..7b0a5bb2396 100644 --- a/src/test/java/com/gargoylesoftware/htmlunit/WebClient4Test.java +++ b/src/test/java/com/gargoylesoftware/htmlunit/WebClient4Test.java @@ -96,6 +96,7 @@ public void redirectInfinite303And307() throws Exception { try { client.getPage("http://localhost:" + PORT + RedirectServlet307.URL); + fail(); } catch (final Exception e) { assertTrue(e.getMessage(), e.getMessage().contains("Too much redirect")); @@ -345,6 +346,59 @@ public void timeout() throws Exception { client.getPage(URL_FIRST); } + public static class DelayRedirectServlet extends RedirectServlet{ + + public DelayRedirectServlet() { + super(301,"/test2"); + } + + @Override + protected void doGet(final HttpServletRequest req, final HttpServletResponse res) throws IOException { + try { + Thread.sleep(500); + } + catch (final InterruptedException e) { + throw new RuntimeException(e); + } + super.doGet(req, res); + } + } + + public static class AfterDelayRedirectServlet extends HttpServlet { + + @Override + protected void doGet(final HttpServletRequest req, final HttpServletResponse res) throws IOException { + try { + Thread.sleep(500); + } + catch (final InterruptedException e) { + throw new RuntimeException(e); + } + res.setContentType(MimeType.TEXT_HTML); + final Writer writer = res.getWriter(); + writer.write("foo"); + } + } + + @Test + public void hardTimeoutRedirects() throws Exception{ + final Map> servlets = new HashMap<>(); + servlets.put("/test1", DelayRedirectServlet.class); + servlets.put("/test2", AfterDelayRedirectServlet.class); + startWebServer("./", new String[0], servlets); + + final WebClient client = getWebClient(); + client.getOptions().setTimeout(750); + + try { + client.getPage(URL_FIRST + "test1"); + fail("Should timeout"); + } + catch (final SocketTimeoutException e) { + //expected + } + } + /** * Make sure cookies set for the request are overwriting the cookieManager. * @@ -433,6 +487,7 @@ public void redirectInfiniteMeta() throws Exception { try { client.getPage(URL_FIRST + "test1"); + fail(); } catch (final Exception e) { assertTrue(e.getMessage(), e.getMessage().contains("Too much redirect")); From f477b1b558c02747b8fb6b891a8a53c65d05011e Mon Sep 17 00:00:00 2001 From: AlexCD Date: Sat, 24 Apr 2021 01:01:49 +0300 Subject: [PATCH 2/2] fix race condition + cancel timeout task after request --- .../htmlunit/HttpWebConnection.java | 42 +++++++++++-------- 1 file changed, 25 insertions(+), 17 deletions(-) diff --git a/src/main/java/com/gargoylesoftware/htmlunit/HttpWebConnection.java b/src/main/java/com/gargoylesoftware/htmlunit/HttpWebConnection.java index 5f9646d34e6..fa6811212a5 100644 --- a/src/main/java/com/gargoylesoftware/htmlunit/HttpWebConnection.java +++ b/src/main/java/com/gargoylesoftware/htmlunit/HttpWebConnection.java @@ -31,7 +31,6 @@ import java.net.URL; import java.nio.charset.Charset; import java.nio.file.Files; -import java.time.Instant; import java.util.ArrayList; import java.util.HashMap; import java.util.List; @@ -160,7 +159,7 @@ public class HttpWebConnection implements WebConnection { /** Maintains a separate {@link HttpClientContext} object per HttpWebConnection and thread. */ private final Map httpClientContextByThread_ = new WeakHashMap<>(); - private final Timer timeoutGuard = new Timer(true); + private final Timer timeoutTimer = new Timer(true); /** * Creates a new HTTP web connection instance. @@ -172,20 +171,27 @@ public HttpWebConnection(final WebClient webClient) { usedOptions_ = new WebClientOptions(); } - private AtomicBoolean startTimeoutGuard(final WebRequest webRequest, final HttpUriRequest httpMethod) { - AtomicBoolean aborted = new AtomicBoolean(false); - int timeoutMs = getTimeout(webRequest); - TimerTask task = new TimerTask() { - @Override - public void run() { - if (httpMethod != null && !httpMethod.isAborted()) { - httpMethod.abort(); - aborted.set(true); - } + private class TimeoutGuardTask extends TimerTask { + + private final AtomicBoolean aborted = new AtomicBoolean(false); + private final HttpUriRequest httpMethod; + + public TimeoutGuardTask(final HttpUriRequest httpMethod, final int timeoutMs) { + this.httpMethod = httpMethod; + timeoutTimer.schedule(this, timeoutMs); + } + + @Override + public void run() { + if (httpMethod != null && !httpMethod.isAborted()) { + aborted.set(true); + httpMethod.abort(); } - }; - timeoutGuard.schedule(task, timeoutMs); - return aborted; + } + + public boolean isTimedOut() { + return aborted.get(); + } } /** @@ -208,7 +214,7 @@ public WebResponse getResponse(final WebRequest webRequest) throws IOException { final URL url = webRequest.getUrl(); final HttpHost httpHost = new HttpHost(url.getHost(), url.getPort(), url.getProtocol()); final long startTime = System.currentTimeMillis(); - final AtomicBoolean timedOut = startTimeoutGuard(webRequest, httpMethod); + final TimeoutGuardTask timedOutGuard = new TimeoutGuardTask(httpMethod, getTimeout(webRequest)); final HttpContext httpContext = getHttpContext(); HttpResponse httpResponse = null; @@ -216,7 +222,7 @@ public WebResponse getResponse(final WebRequest webRequest) throws IOException { try (CloseableHttpClient closeableHttpClient = builder.build()) { httpResponse = closeableHttpClient.execute(httpHost, httpMethod, httpContext); } catch (SocketException ex) { - if (timedOut.get()) { + if (timedOutGuard.isTimedOut()) { //translate socket closed exception to timeout exception throw new SocketTimeoutException(); } @@ -242,6 +248,8 @@ public WebResponse getResponse(final WebRequest webRequest) throws IOException { // => best solution, discard the HttpClient instance. httpClientBuilder_.remove(Thread.currentThread()); throw e; + } finally { + timedOutGuard.cancel(); } final DownloadedContent downloadedBody = downloadResponseBody(httpResponse);