Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Issue 250 encoded url param #290

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -48,11 +48,10 @@ See the full list of available options at
### Remote URL ###

The URL of the original image to load is specified as the remainder of the
path, without any encoding. For example,
path and may be URL encoded. For example,
`http://localhost/200/https://willnorris.com/logo.jpg`.

In order to [optimize caching][], it is recommended that URLs not contain query
strings.
If remote image URLs contain query strings, it is recommended to URL-encode them when passing to imageproxy in order to [optimize caching][].

[optimize caching]: http://www.stevesouders.com/blog/2008/08/23/revving-filenames-dont-use-querystring/

Expand All @@ -75,6 +74,7 @@ x0.15 | 15% original height, proportional width | <a href="https://imageproxy
200x,q60 | 200px wide, proportional height, 60% quality | <a href="https://imageproxy.willnorris.com/200x,q60/https://willnorris.com/2013/12/small-things.jpg"><img src="https://imageproxy.willnorris.com/200x,q60/https://willnorris.com/2013/12/small-things.jpg" alt="200x,q60"></a>
200x,png | 200px wide, converted to PNG format | <a href="https://imageproxy.willnorris.com/200x,png/https://willnorris.com/2013/12/small-things.jpg"><img src="https://imageproxy.willnorris.com/200x,png/https://willnorris.com/2013/12/small-things.jpg" alt="200x,png"></a>
cx175,cw400,ch300,100x | crop to 400x300px starting at (175,0), scale to 100px wide | <a href="https://imageproxy.willnorris.com/cx175,cw400,ch300,100x/https://willnorris.com/2013/12/small-things.jpg"><img src="https://imageproxy.willnorris.com/cx175,cw400,ch300,100x/https://willnorris.com/2013/12/small-things.jpg" alt="cx175,cw400,ch300,100x"></a>
x | no options, don't transform, just proxy | <a href="https://imageproxy.willnorris.com/x/https://willnorris.com/2013/12/small-things.jpg"><img src="https://imageproxy.willnorris.com/x/https://willnorris.com/2013/12/small-things.jpg" alt="x"></a>

The [smart crop feature](https://godoc.org/willnorris.com/go/imageproxy#hdr-Smart_Crop)
can best be seen by comparing crops of [this source image][judah-sheets], with
Expand Down
95 changes: 70 additions & 25 deletions data.go
Original file line number Diff line number Diff line change
Expand Up @@ -308,61 +308,106 @@ func (r Request) String() string {

// NewRequest parses an http.Request into an imageproxy Request. Options and
// the remote image URL are specified in the request path, formatted as:
// /{options}/{remote_url}. Options may be omitted, so a request path may
// simply contain /{remote_url}. The remote URL must be an absolute "http" or
// "https" URL, should not be URL encoded, and may contain a query string.
// /{options}/{remote_url}. Options may not be omitted, but `x` or `0x0` can
// be used as noop option (/x/{remote_url}).
// The remote URL must be an absolute "http(s)" URL unless BaseURL is set.
// The remote URL may be URL encoded.
//
// Assuming an imageproxy server running on localhost, the following are all
// valid imageproxy requests:
//
// http://localhost/100x200/http://example.com/image.jpg
// http://localhost/100x200,r90/http://example.com/image.jpg?foo=bar
// http://localhost//http://example.com/image.jpg
// http://localhost/http://example.com/image.jpg
// http://localhost/x/http://example.com/image.jpg
// http://localhost/x/http%3A%2F%2Fexample.com%2Fimage.jpg
func NewRequest(r *http.Request, baseURL *url.URL) (*Request, error) {
var err error
req := &Request{Original: r}

path := r.URL.EscapedPath()[1:] // strip leading slash
req.URL, err = parseURL(path)
if err != nil || !req.URL.IsAbs() {
// first segment should be options
parts := strings.SplitN(path, "/", 2)
if len(parts) != 2 {
return nil, URLError{"too few path segments", r.URL}
}

var err error
req.URL, err = parseURL(parts[1])
if err != nil {
return nil, URLError{fmt.Sprintf("unable to parse remote URL: %v", err), r.URL}
}
// first segment should be options
parts := strings.SplitN(path, "/", 2)
if len(parts) != 2 {
return nil, URLError{"too few path segments", r.URL}
}

req.Options = ParseOptions(parts[0])
req.URL, err = ParseURL(parts[1], (baseURL == nil)) // don't url decode if baseURL is set
if err != nil {
return nil, URLError{fmt.Sprintf("unable to parse remote URL: %v", err), r.URL}
}

req.Options = ParseOptions(parts[0])



if baseURL != nil {
req.URL = baseURL.ResolveReference(req.URL)
}

if !req.URL.IsAbs() {
return nil, URLError{"must provide absolute remote URL", r.URL}
return nil, URLError{"must provide absolute remote URL", req.URL}
}

if req.URL.Scheme != "http" && req.URL.Scheme != "https" {
return nil, URLError{"remote URL must have http or https scheme", r.URL}
}

// query string is always part of the remote URL
req.URL.RawQuery = r.URL.RawQuery
// only append original (unencoded) query string
// if we don't have one already. Example:
// http://localhost/x/https%3A%2F%2Fexample.com%2Ffoo%2Fbar%3Fhello%3Dworld?more=query
// should be https://example.com/foo/bar?hello=world
// not https://example.com/foo/bar?more=query
if len(r.URL.RawQuery) > 0 && len(req.URL.RawQuery) == 0 {
// query string is always part of the remote URL
req.URL.RawQuery = r.URL.RawQuery
}

return req, nil
}

var reCleanedURL = regexp.MustCompile(`^(https?):/+([^/])`)

// parseURL parses s as a URL, handling URLs that have been munged by
// ParseURL parses s as a URL, handling URLs that have been munged by
// path.Clean or a webserver that collapses multiple slashes.
func parseURL(s string) (*url.URL, error) {
func ParseURL(s string, urlDecode bool) (*url.URL, error) {
var err error

// convert http:/example.com to http://example.com
s = reCleanedURL.ReplaceAllString(s, "$1://$2")

// only decode remote url param if baseURL is not set
if urlDecode {
s, err = decodeURL(s)
if err != nil {
return nil, err
}
}

return url.Parse(s)
}

var reAbsURL = regexp.MustCompile(`^https?`)
var reDecodedURL = regexp.MustCompile(`^https?://`)

func decodeURL(s string) (string, error) {
var err error
// don't try to decode unless looks like abs url
if !reAbsURL.MatchString(s) {
return s, nil
}

// preserve original value in case we are not able to decode
decodedURL := s
maxDecodeAttempts := 3
for i := 0; !reDecodedURL.MatchString(decodedURL) && i < maxDecodeAttempts; i++ {
decodedURL, err = url.QueryUnescape(decodedURL)
if err != nil {
return "", URLError{fmt.Sprintf("remote URL could not be decoded: %v", err), nil}
}
}
if reDecodedURL.MatchString(decodedURL) {
return decodedURL, nil
} else {
// return original value. might be relative url (e.g. https/foo.jpg)
return s, nil
}
}
185 changes: 170 additions & 15 deletions data_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,12 @@ func TestNewRequest(t *testing.T) {
{"http://localhost//example.com/foo", "", emptyOptions, true},
{"http://localhost//ftp://example.com/foo", "", emptyOptions, true},

// invalid URL because options now required
{"http://localhost/http://example.com/foo", "", emptyOptions, true},

// invalid URL because remote url has invalid url encoding (the %u)
{"http://localhost/x/https%253A%252F%252Fexample.com%252F%253FbadParam%253D%25u0414", "", emptyOptions, true},

// invalid options. These won't return errors, but will not fully parse the options
{
"http://localhost/s/http://example.com/",
Expand All @@ -123,39 +129,80 @@ func TestNewRequest(t *testing.T) {
"http://example.com/", Options{Width: 1}, false,
},


// valid URLs

{
"http://localhost/http://example.com/foo",
"http://localhost//http://example.com/foo",
"http://example.com/foo", emptyOptions, false,
},
{
"http://localhost//http://example.com/foo",
"http://localhost/x/http://example.com/foo",
"http://example.com/foo", emptyOptions, false,
},
{
"http://localhost//https://example.com/foo",
"http://localhost/x/http://example.com/foo",
"http://example.com/foo", emptyOptions, false,
},
{
"http://localhost/0x0/https://example.com/foo",
"https://example.com/foo", emptyOptions, false,
},
{
"http://localhost/1x2/http://example.com/foo",
"http://example.com/foo", Options{Width: 1, Height: 2}, false,
},
{
"http://localhost//http://example.com/foo?bar",
"http://localhost/0x0/http://example.com/foo?bar",
"http://example.com/foo?bar", emptyOptions, false,
},
{
"http://localhost/http:/example.com/foo",
"http://localhost/x/http:/example.com/foo",
"http://example.com/foo", emptyOptions, false,
},
{
"http://localhost/http:///example.com/foo",
"http://localhost/x/http:///example.com/foo",
"http://example.com/foo", emptyOptions, false,
},
{ // escaped path
"http://localhost/http://example.com/%2C",
"http://localhost/x/http://example.com/%2C",
"http://example.com/%2C", emptyOptions, false,
},
// unescaped querystring
{
"http://localhost/x/http://example.com/foo/bar?hello=world",
"http://example.com/foo/bar?hello=world", emptyOptions, false,
},
// escaped remote including querystring
{
"http://localhost/x/http%3A%2F%2Fexample.com%2Ffoo%2Fbar%3Fhello%3Dworld",
"http://example.com/foo/bar?hello=world", emptyOptions, false,
},
// escaped remote including encoded querystring and unencoded querystring (not retained)
{
"http://localhost/x/http%3A%2F%2Fexample.com%2Ffoo%2Fbar%3Fhello%3Dworld?a=b",
"http://example.com/foo/bar?hello=world", emptyOptions, false,
},
// escaped remote WITHOUT encoded querystring and unencoded querystring (should be retained)
{
"http://localhost/x/http%3A%2F%2Fexample.com%2Ffoo%2Fbar?a=b",
"http://example.com/foo/bar?a=b", emptyOptions, false,
},
{
"http://localhost/x/https%3A%2F%2Fexample.com%2Ffoo%2Fbar%3Fhello%3Dworld",
"https://example.com/foo/bar?hello=world", emptyOptions, false,
},
// multi-escaped remote
{
"http://localhost/x/https%25253A%25252F%25252Fexample.com%25252Ffoo%25252Fbar%25253Fhello%25253Dworld",
"https://example.com/foo/bar?hello=world", emptyOptions, false,
},
// escaped remote containing double escaped url as param
// test that we don't over-decode remote url breaking parameters
{
"http://localhost/x/http%3A%2F%2Fexample.com%2Ffoo%2Fbar%3Fhello%3Dworld%26url%3Dhttps%253A%252F%252Fwww.example.com%252F%253Ffoo%253Dbar%2526hello%253Dworld",
"http://example.com/foo/bar?hello=world&url=https%3A%2F%2Fwww.example.com%2F%3Ffoo%3Dbar%26hello%3Dworld", emptyOptions, false,
},
}

for _, tt := range tests {
Expand Down Expand Up @@ -186,16 +233,124 @@ func TestNewRequest(t *testing.T) {
}

func TestNewRequest_BaseURL(t *testing.T) {
req, _ := http.NewRequest("GET", "/x/path", nil)
base, _ := url.Parse("https://example.com/")
tests := []struct {
BaseURL string // base url to use
URL string // input URL to parse as an imageproxy request
RemoteURL string // expected URL of remote image parsed from input
Options Options // expected options parsed from input
ExpectError bool // whether an error is expected from NewRequest
}{
{
"http://example.com/",
"http://localhost/x/foo",
"http://example.com/foo", emptyOptions, false,
},
{
"http://example.com/hello",
"http://localhost/x//foo/bar",
"http://example.com/foo/bar", emptyOptions, false,
},
// if BaseURL doesn't have trailing slash
// URL.ResolveReference will strip last directory
{
"http://example.com/hello/",
"http://localhost/x/foo/bar",
"http://example.com/hello/foo/bar", emptyOptions, false,
},
{
"http://example.com/hello/",
"http://localhost/x/../foo/bar",
"http://example.com/foo/bar", emptyOptions, false,
},
// relative remote urls (baseURL set) should not have URL Decoding even if
// they start with http
{
"http://example.com/hello/",
"http://localhost/x/https%3A%2F%2Fimg.example.com%2Fpic.jpg",
"http://example.com/hello/https%3A%2F%2Fimg.example.com%2Fpic.jpg", emptyOptions, false,
},
}

r, err := NewRequest(req, base)
if err != nil {
t.Errorf("NewRequest(%v, %v) returned unexpected error: %v", req, base, err)
for _, tt := range tests {
req, err := http.NewRequest("GET", tt.URL, nil)
if err != nil {
t.Errorf("http.NewRequest(%q) returned error: %v", tt.URL, err)
continue
}
base, err := url.Parse(tt.BaseURL)
if err != nil {
t.Errorf("url.Parse(%q) returned error: %v", tt.BaseURL, err)
continue
}

r, err := NewRequest(req, base)
if tt.ExpectError {
if err == nil {
t.Errorf("NewRequest(%v, %v) did not return expected error", req, base)
}
continue
} else if err != nil {
t.Errorf("NewRequest(%v, %v) returned unexpected error: %v", req, base, err)
continue
}

if got, want := r.URL.String(), tt.RemoteURL; got != want {
t.Errorf("NewRequest(%q, %q) request URL = %v, want %v", tt.URL, tt.BaseURL, got, want)
}
if got, want := r.Options, tt.Options; got != want {
t.Errorf("NewRequest(%q, %q) request options = %v, want %v", tt.URL, tt.BaseURL, got, want)
}
}
}



want := "https://example.com/path#0x0"
if got := r.String(); got != want {
t.Errorf("NewRequest(%v, %v) returned %q, want %q", req, base, got, want)
func TestParseURL(t *testing.T) {
tests := []struct {
URL string // input URL to parse as an imageproxy request
URLDecode bool // expected options parsed from input
URLExpected string // expected URL of remote image parsed from input
ExpectError bool // whether an error is expected from NewRequest
}{
// should fix missing slashes
{"http:/example.com/", true,
"http://example.com/", false},
{"https:/example.com/", true,
"https://example.com/", false},

// should decode when told
{"https%253A%252F%252Fexample.com%252Fimg.jpg%253Ffoo%253Dbar%2526bar%253Ddo%25252Fnot%25252Fdecode%25252Fme", true,
"https://example.com/img.jpg?foo=bar&bar=do%2Fnot%2Fdecode%2Fme", false},

// should NOT decode unless told (e.g. baseURL set)
{"https%3A%2F%2Fexample.com%2Fimg.jpg%3Ffoo%3Dbar%26bar%3Ddo%252Fnot%252Fdecode%252Fme", false,
"https%3A%2F%2Fexample.com%2Fimg.jpg%3Ffoo%3Dbar%26bar%3Ddo%252Fnot%252Fdecode%252Fme", false},

// should return original url if asked to decode but can't find anything that
// looks like absolute url (starts with https?://) before giving up
{"https%3Aexample.com%2Fimg.jpg", true,
"https%3Aexample.com%2Fimg.jpg", false},

// should error when asked to decode a url with invalid encoding
{"https%253A%252F%252Fexample.com%252F%253FbadParam%253D%25u0414", true,
"", true},
}

for _, tt := range tests {

URLOut, err := ParseURL(tt.URL, tt.URLDecode)
if tt.ExpectError {
if err == nil {
t.Errorf("ParseURL(%q, decode=%v) did not return expected error", tt.URL, tt.URLDecode)
}
continue
} else if err != nil {
t.Errorf("ParseURL(%v, decode=%v) return unexpected error: %v", tt.URL, tt.URLDecode, err)
continue
}

if got, want := URLOut.String(), tt.URLExpected; got != want {
t.Errorf("ParseURL(%q, decode=%v) returned = %v, wanted %v", tt.URL, tt.URLDecode, got, want)
}
}
}
Loading