Skip to content

Commit 4da44e9

Browse files
committed
Added methods and util classes to clarify and test http range request parsing.
1 parent d428471 commit 4da44e9

File tree

3 files changed

+269
-49
lines changed

3 files changed

+269
-49
lines changed
Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,107 @@
1+
package cwms.cda.api;
2+
3+
import org.jspecify.annotations.NonNull;
4+
5+
import java.util.*;
6+
import java.util.regex.*;
7+
8+
/**
9+
* Utility class for parsing HTTP Range headers.
10+
* These typically look like: bytes=100-1234
11+
* or: bytes=100- this is common to resume a download
12+
* or: bytes=0- equivalent to a regular request for the whole file
13+
* but by returning 206 we show that we support range requests
14+
* Note that multiple ranges can be requested at once such
15+
* as: bytes=500-600,700-999 Server responds identifies separator and then puts separator between chunks
16+
* bytes=0-0,-1 also legal its just the first and the last byte
17+
* or: bytes=500-600,601-999 legal but what is the point?
18+
* or: bytes=500-700,601-999 legal, notice they overlap.
19+
*
20+
*
21+
*/
22+
public class RangeParser {
23+
24+
private static final Pattern RANGE_PATTERN = Pattern.compile("(\\d*)-(\\d*)");
25+
26+
/**
27+
* Return a list of two element long[] containing byte ranges parsed from the HTTP Range header.
28+
* If the end of a range is not specified ( e.g. bytes=100- ) then a -1 is returned in the second position
29+
* If the range only includes a negative byte (e.g bytes=-50) then -1 is returned as the start of the range
30+
* and -1*end is returned as the end of the range. bytes=-50 will result in [-1,50]
31+
*
32+
* @param header the HTTP Range header
33+
* @return a list of byte ranges
34+
*/
35+
public static List<long[]> parse(String header) {
36+
if (header == null || header.isEmpty() ) {
37+
return Collections.emptyList();
38+
} else if ( !header.startsWith("bytes=")){
39+
throw new IllegalArgumentException("Invalid Range header: " + header);
40+
}
41+
42+
String rangePart = header.substring(6);
43+
List<long[]> retval = parseRanges(rangePart);
44+
if( retval.isEmpty() ){
45+
throw new IllegalArgumentException("Invalid Range header: " + header);
46+
}
47+
return retval;
48+
}
49+
50+
public static @NonNull List<long[]> parseRanges(String rangePart) {
51+
if( rangePart == null || rangePart.isEmpty() ){
52+
throw new IllegalArgumentException("Invalid range specified: " + rangePart);
53+
}
54+
String[] parts = rangePart.split(",");
55+
List<long[]> ranges = new ArrayList<>();
56+
57+
for (String part : parts) {
58+
Matcher m = RANGE_PATTERN.matcher(part.trim());
59+
if (m.matches()) {
60+
String start = m.group(1);
61+
String end = m.group(2);
62+
63+
long s = start.isEmpty() ? -1 : Long.parseLong(start);
64+
long e = end.isEmpty() ? -1 : Long.parseLong(end);
65+
66+
ranges.add(new long[]{s, e});
67+
}
68+
}
69+
return ranges;
70+
}
71+
72+
/**
73+
* The parse() method in this class can return -1 for unspecified values or when suffix ranges are supplied.
74+
* This method interprets the negative values in regard to the totalSize and returns inclusive indices of the
75+
* requested range.
76+
* @param inputs the array of start and end byte positions
77+
* @param totalBytes the total number of bytes in the file
78+
* @return a long array with the start and end byte positions, these are inclusive. [0,0] means return the first byte
79+
*/
80+
public static long[] interpret(long[] inputs, long totalBytes){
81+
if(inputs == null){
82+
throw new IllegalArgumentException("null range array provided");
83+
} else if( inputs.length != 2 ){
84+
throw new IllegalArgumentException("Invalid number of inputs: " + Arrays.toString(inputs));
85+
}
86+
87+
long start = inputs[0];
88+
long end = inputs[1];
89+
90+
if(start == -1L){
91+
// its a suffix request.
92+
start = totalBytes - end;
93+
end = totalBytes - 1;
94+
} else {
95+
if (start < 0 || end < start) {
96+
throw new IllegalArgumentException("Invalid range specified: " + Arrays.toString(inputs));
97+
}
98+
99+
start = Math.min(start, totalBytes - 1);
100+
end = Math.min(end, totalBytes - 1);
101+
}
102+
103+
return new long[]{start, end};
104+
}
105+
106+
107+
}
Lines changed: 52 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,16 @@
11
package cwms.cda.api;
22

3+
import com.google.common.flogger.FluentLogger;
34
import io.javalin.core.util.Header;
45
import io.javalin.http.Context;
56
import java.io.IOException;
67
import java.io.InputStream;
78
import java.io.OutputStream;
8-
import java.util.Arrays;
99
import java.util.List;
10+
import org.apache.commons.io.IOUtils;
1011

1112
public class RangeRequestUtil {
13+
static FluentLogger logger = FluentLogger.forEnclosingClass();
1214

1315
private RangeRequestUtil() {
1416
// utility class
@@ -19,78 +21,79 @@ private RangeRequestUtil() {
1921
* take the InputStream, wrap it in a CompletedFuture and then process the request asynchronously. This
2022
* causes problems when the InputStream is tied to a database connection that gets closed before the
2123
* async processing happens. This method doesn't do the async thing but tries to support the rest.
22-
* @param ctx
23-
* @param is
24-
* @param mediaType
25-
* @param totalBytes
26-
* @throws IOException
24+
* @param ctx the Javalin context
25+
* @param is the input stream
26+
* @param mediaType the content type
27+
* @param totalBytes the total number of bytes in the input stream
28+
* @throws IOException if either of the streams throw an IOException
2729
*/
2830
public static void seekableStream(Context ctx, InputStream is, String mediaType, long totalBytes) throws IOException {
29-
long from = 0;
30-
long to = totalBytes - 1;
31+
3132
if (ctx.header(Header.RANGE) == null) {
33+
// Not a range request.
3234
ctx.res.setContentType(mediaType);
35+
3336
// Javalin's version of this method doesn't set the content-length
3437
// Not setting the content-length makes the servlet container use Transfer-Encoding=chunked.
3538
// Chunked is a worse experience overall, seems like we should just set the length if we know it.
36-
writeRange(ctx.res.getOutputStream(), is, from, Math.min(to, totalBytes - 1));
39+
ctx.header(Header.CONTENT_LENGTH, String.valueOf(totalBytes));
40+
41+
IOUtils.copyLarge(is, (OutputStream) ctx.res.getOutputStream(), 0, totalBytes);
3742
} else {
38-
int chunkSize = 128000;
3943
String rangeHeader = ctx.header(Header.RANGE);
40-
String[] eqSplit = rangeHeader.split("=", 2);
41-
String[] dashSplit = eqSplit[1].split("-", -1); // keep empty trailing part
42-
43-
List<String> requestedRange = Arrays.stream(dashSplit)
44-
.filter(s -> !s.isEmpty())
45-
.collect(java.util.stream.Collectors.toList());
46-
47-
from = Long.parseLong(requestedRange.get(0));
4844

49-
if (from + chunkSize > totalBytes) {
50-
// chunk bigger than file, write all
51-
to = totalBytes - 1;
52-
} else if (requestedRange.size() == 2) {
53-
// chunk smaller than file, to/from specified
54-
to = Long.parseLong(requestedRange.get(1));
45+
List<long[]> ranges = RangeParser.parse(rangeHeader);
46+
47+
long[] requestedRange = ranges.get(0);
48+
if( ranges.size() > 1 ){
49+
// we support range requests but we not currently supporting multiple ranges.
50+
// Range request are optional so we have choices what to do if multiple ranges are requested:
51+
// We could return 416 and hope the client figures out to only send one range
52+
// We could service the first range with 206 and ignore the other ranges
53+
// We could ignore the range request entirely and return the full body with 200
54+
// We could implement support for multiple ranges
55+
logger.atInfo().log("Multiple ranges requested, using first and ignoring additional ranges");
5556
} else {
56-
// chunk smaller than file, to/from not specified
57-
to = from + chunkSize - 1;
58-
}
57+
requestedRange = RangeParser.interpret(requestedRange, totalBytes);
5958

60-
ctx.status(206);
59+
long from = requestedRange[0];
60+
long to = requestedRange[1];
6161

62-
ctx.header(Header.ACCEPT_RANGES, "bytes");
63-
ctx.header(Header.CONTENT_RANGE, "bytes " + from + "-" + to + "/" + totalBytes);
62+
ctx.status(206);
6463

65-
ctx.res.setContentType(mediaType);
66-
ctx.header(Header.CONTENT_LENGTH, String.valueOf(Math.min(to - from + 1, totalBytes)));
67-
writeRange(ctx.res.getOutputStream(), is, from, Math.min(to, totalBytes - 1));
64+
ctx.header(Header.ACCEPT_RANGES, "bytes");
65+
ctx.header(Header.CONTENT_RANGE, "bytes " + from + "-" + to + "/" + totalBytes);
66+
67+
ctx.res.setContentType(mediaType);
68+
ctx.header(Header.CONTENT_LENGTH, String.valueOf(Math.min(to - from + 1, totalBytes)));
69+
writeRange(ctx.res.getOutputStream(), is, from, Math.min(to, totalBytes - 1));
70+
}
6871
}
6972
}
7073

71-
74+
/**
75+
* Writes a range of bytes from the input stream to the output stream.
76+
* @param out the output stream to write to.
77+
* @param in the input stream to read from. It is assumed that this stream is open and positioned at 0.
78+
* @param from the starting byte position to read from (inclusive)
79+
* @param to the ending byte position to read to (inclusive)
80+
* @throws IOException if either of the streams throw an IOException
81+
*/
7282
public static void writeRange(OutputStream out, InputStream in, long from, long to) throws IOException {
73-
writeRange(out, in, from, to, new byte[8192]);
83+
skip(in, from);
84+
long len = to - from + 1;
85+
86+
// If the inputOffset to IOUtils.copyLarge is not 0 then IOUtils will do its own skipping. For reasons
87+
// that IOUtils explains (quirks of certain streams) it does its skipping via read(). Using read() has performance
88+
// implications b/c all the skipped data gets copied to memory. We do our own skipping and then have IOUtils copy.
89+
IOUtils.copyLarge(in, out, 0, len);
7490
}
7591

76-
public static void writeRange(OutputStream out, InputStream is, long from, long to, byte[] buffer) throws IOException {
77-
long toSkip = from;
92+
private static void skip(InputStream is, long toSkip) throws IOException {
7893
while (toSkip > 0) {
7994
long skipped = is.skip(toSkip);
8095
toSkip -= skipped;
8196
}
82-
83-
long bytesLeft = to - from + 1;
84-
while (bytesLeft != 0L) {
85-
int maxRead = (int) Math.min(buffer.length, bytesLeft);
86-
int read = is.read(buffer, 0, maxRead);
87-
if (read == -1) {
88-
break;
89-
}
90-
out.write(buffer, 0, read);
91-
bytesLeft -= read;
92-
}
93-
9497
}
9598

9699
}
Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,110 @@
1+
package cwms.cda.api;
2+
3+
import org.junit.jupiter.api.Test;
4+
5+
import java.util.List;
6+
7+
import static org.junit.jupiter.api.Assertions.*;
8+
9+
class RangeParserTest {
10+
11+
@Test
12+
void testResume() {
13+
List<long[]> ranges = RangeParser.parse("bytes=100-");
14+
assertNotNull(ranges);
15+
assertEquals(1, ranges.size());
16+
assertArrayEquals(new long[]{100L, -1L}, ranges.get(0));
17+
}
18+
19+
@Test
20+
void testFirstK() {
21+
List<long[]> ranges = RangeParser.parse("bytes=0-1000");
22+
assertNotNull(ranges);
23+
assertEquals(1, ranges.size());
24+
assertArrayEquals(new long[]{0L, 1000L}, ranges.get(0));
25+
}
26+
27+
@Test
28+
void testFirstOpen() {
29+
List<long[]> ranges = RangeParser.parse("bytes=0-");
30+
assertNotNull(ranges);
31+
assertEquals(1, ranges.size());
32+
assertArrayEquals(new long[]{0L, -1L}, ranges.get(0));
33+
}
34+
35+
@Test
36+
void testSuffixOpen() {
37+
List<long[]> ranges = RangeParser.parse("bytes=-50");
38+
assertNotNull(ranges);
39+
assertEquals(1, ranges.size());
40+
assertArrayEquals(new long[]{-1L, 50L}, ranges.get(0));
41+
}
42+
43+
44+
@Test
45+
void testTwoPart() {
46+
List<long[]> ranges = RangeParser.parse("bytes=0-10,99-100");
47+
assertNotNull(ranges);
48+
assertEquals(2, ranges.size());
49+
assertArrayEquals(new long[]{0L, 10L}, ranges.get(0));
50+
assertArrayEquals(new long[]{99L, 100L}, ranges.get(1));
51+
}
52+
53+
54+
@Test
55+
void testMultiParse() {
56+
List<long[]> ranges = RangeParser.parse("bytes=0-99,200-299,-50");
57+
assertNotNull(ranges);
58+
assertEquals(3, ranges.size());
59+
assertArrayEquals(new long[]{0L, 99L}, ranges.get(0));
60+
assertArrayEquals(new long[]{200L, 299L}, ranges.get(1));
61+
assertArrayEquals(new long[]{-1L, 50L}, ranges.get(2));
62+
}
63+
64+
65+
@Test
66+
void testTwoWeird() {
67+
List<long[]> ranges = RangeParser.parse("bytes=0-0,-1");
68+
assertNotNull(ranges);
69+
assertEquals(2, ranges.size());
70+
assertArrayEquals(new long[]{0L, 0L}, ranges.get(0));
71+
assertArrayEquals(new long[]{-1L, 1L}, ranges.get(1));
72+
}
73+
74+
@Test
75+
void testNotBytes() {
76+
assertThrows(IllegalArgumentException.class, () -> RangeParser.parse("bits=0-10"));
77+
}
78+
79+
80+
@Test
81+
void testSuffixDoubleNeg() {
82+
assertThrows(IllegalArgumentException.class, () -> RangeParser.parse("bytes=--64"));
83+
}
84+
85+
86+
@Test
87+
void testSuffixClosed() {
88+
assertThrows(IllegalArgumentException.class, () ->
89+
RangeParser.parse("bytes=-50-100"));
90+
}
91+
92+
93+
@Test
94+
void testSuffixDoubleClosed() {
95+
assertThrows(IllegalArgumentException.class, () -> RangeParser.parse("bytes=-50--100"));
96+
}
97+
98+
@Test
99+
void testInterpret(){
100+
101+
assertArrayEquals(new long[]{0L, 10L}, RangeParser.interpret(new long[]{0L, 10L}, 100));
102+
assertArrayEquals(new long[]{0L, 0L}, RangeParser.interpret(new long[]{0L, 0L}, 100));
103+
assertArrayEquals(new long[]{8L, 12L}, RangeParser.interpret(new long[]{8L, 12L}, 100));
104+
assertArrayEquals(new long[]{8L, 99L}, RangeParser.interpret(new long[]{8L, 100L}, 100));
105+
assertArrayEquals(new long[]{8L, 99L}, RangeParser.interpret(new long[]{8L, 200L}, 100));
106+
107+
}
108+
109+
// probably invalid assertArrayEquals(new long[]{8L, 100L}, RangeParser.interpret(new long[]{100L, 200L}, 100));
110+
}

0 commit comments

Comments
 (0)