From 8ae0d6a509fd1b871457cf742369af04346933a8 Mon Sep 17 00:00:00 2001 From: Matthew Holt Date: Fri, 10 May 2019 21:07:02 -0600 Subject: [PATCH] caddyhttp: Implement better HTTP matchers including regexp; add tests --- modules/caddyhttp/matchers.go | 199 ++++++++++++---- modules/caddyhttp/matchers_test.go | 346 ++++++++++++++++++++++++++++ modules/caddyhttp/replacer.go | 9 +- modules/caddyhttp/responsewriter.go | 25 +- modules/caddyhttp/routes.go | 6 +- 5 files changed, 519 insertions(+), 66 deletions(-) create mode 100644 modules/caddyhttp/matchers_test.go diff --git a/modules/caddyhttp/matchers.go b/modules/caddyhttp/matchers.go index 7336a1b2..0179dd7b 100644 --- a/modules/caddyhttp/matchers.go +++ b/modules/caddyhttp/matchers.go @@ -1,8 +1,12 @@ package caddyhttp import ( + "fmt" "log" "net/http" + "net/textproto" + "net/url" + "regexp" "strings" "bitbucket.org/lightcodelabs/caddy2" @@ -10,15 +14,16 @@ import ( "go.starlark.net/starlark" ) -// TODO: Matchers should probably support regex of some sort... performance trade-offs? type ( matchHost []string matchPath []string + matchPathRE struct{ matchRegexp } matchMethod []string - matchQuery map[string][]string - matchHeader map[string][]string + matchQuery url.Values + matchHeader http.Header + matchHeaderRE map[string]*matchRegexp matchProtocol string - matchScript string + matchStarlark string ) func init() { @@ -30,6 +35,10 @@ func init() { Name: "http.matchers.path", New: func() (interface{}, error) { return matchPath{}, nil }, }) + caddy2.RegisterModule(caddy2.Module{ + Name: "http.matchers.path_regexp", + New: func() (interface{}, error) { return new(matchPathRE), nil }, + }) caddy2.RegisterModule(caddy2.Module{ Name: "http.matchers.method", New: func() (interface{}, error) { return matchMethod{}, nil }, @@ -42,45 +51,39 @@ func init() { Name: "http.matchers.header", New: func() (interface{}, error) { return matchHeader{}, nil }, }) + caddy2.RegisterModule(caddy2.Module{ + Name: "http.matchers.header_regexp", + New: func() (interface{}, error) { return matchHeaderRE{}, nil }, + }) caddy2.RegisterModule(caddy2.Module{ Name: "http.matchers.protocol", New: func() (interface{}, error) { return new(matchProtocol), nil }, }) caddy2.RegisterModule(caddy2.Module{ Name: "http.matchers.caddyscript", - New: func() (interface{}, error) { return new(matchScript), nil }, + New: func() (interface{}, error) { return new(matchStarlark), nil }, }) } -func (m matchScript) Match(r *http.Request) bool { - input := string(m) - thread := new(starlark.Thread) - env := caddyscript.MatcherEnv(r) - val, err := starlark.Eval(thread, "", input, env) - if err != nil { - log.Printf("caddyscript for matcher is invalid: attempting to evaluate expression `%v` error `%v`", input, err) - return false - } - - return val.String() == "True" -} - -func (m matchProtocol) Match(r *http.Request) bool { - switch string(m) { - case "grpc": - return r.Header.Get("content-type") == "application/grpc" - case "https": - return r.TLS != nil - case "http": - return r.TLS == nil - } - - return false -} - func (m matchHost) Match(r *http.Request) bool { +outer: for _, host := range m { - if r.Host == host { + if strings.Contains(host, "*") { + patternParts := strings.Split(host, ".") + incomingParts := strings.Split(r.Host, ".") + if len(patternParts) != len(incomingParts) { + continue + } + for i := range patternParts { + if patternParts[i] == "*" { + continue + } + if !strings.EqualFold(patternParts[i], incomingParts[i]) { + continue outer + } + } + return true + } else if strings.EqualFold(r.Host, host) { return true } } @@ -96,6 +99,11 @@ func (m matchPath) Match(r *http.Request) bool { return false } +func (m matchPathRE) Match(r *http.Request) bool { + repl := r.Context().Value(ReplacerCtxKey).(*Replacer) + return m.match(r.URL.Path, repl, "path_regexp") +} + func (m matchMethod) Match(r *http.Request) bool { for _, method := range m { if r.Method == method { @@ -118,26 +126,139 @@ func (m matchQuery) Match(r *http.Request) bool { } func (m matchHeader) Match(r *http.Request) bool { - for field, vals := range m { - fieldVals := r.Header[field] - for _, fieldVal := range fieldVals { - for _, v := range vals { - if fieldVal == v { - return true + for field, allowedFieldVals := range m { + var match bool + actualFieldVals := r.Header[textproto.CanonicalMIMEHeaderKey(field)] + fieldVals: + for _, actualFieldVal := range actualFieldVals { + for _, allowedFieldVal := range allowedFieldVals { + if actualFieldVal == allowedFieldVal { + match = true + break fieldVals } } } + if !match { + return false + } + } + return true +} + +func (m matchHeaderRE) Match(r *http.Request) bool { + for field, rm := range m { + repl := r.Context().Value(ReplacerCtxKey).(*Replacer) + match := rm.match(r.Header.Get(field), repl, "header_regexp") + if !match { + return false + } + } + return true +} + +func (m matchHeaderRE) Provision() error { + for _, rm := range m { + err := rm.Provision() + if err != nil { + return err + } + } + return nil +} + +func (m matchHeaderRE) Validate() error { + for _, rm := range m { + err := rm.Validate() + if err != nil { + return err + } + } + return nil +} + +func (m matchProtocol) Match(r *http.Request) bool { + switch string(m) { + case "grpc": + return r.Header.Get("content-type") == "application/grpc" + case "https": + return r.TLS != nil + case "http": + return r.TLS == nil } return false } +func (m matchStarlark) Match(r *http.Request) bool { + input := string(m) + thread := new(starlark.Thread) + env := caddyscript.MatcherEnv(r) + val, err := starlark.Eval(thread, "", input, env) + if err != nil { + // TODO: Can we detect this in Provision or Validate instead? + log.Printf("caddyscript for matcher is invalid: attempting to evaluate expression `%v` error `%v`", input, err) + return false + } + return val.String() == "True" +} + +// matchRegexp is just the fields common among +// matchers that can use regular expressions. +type matchRegexp struct { + Name string `json:"name"` + Pattern string `json:"pattern"` + compiled *regexp.Regexp +} + +func (mre *matchRegexp) Provision() error { + re, err := regexp.Compile(mre.Pattern) + if err != nil { + return fmt.Errorf("compiling matcher regexp %s: %v", mre.Pattern, err) + } + mre.compiled = re + return nil +} + +func (mre *matchRegexp) Validate() error { + if mre.Name != "" && !wordRE.MatchString(mre.Name) { + return fmt.Errorf("invalid regexp name (must contain only word characters): %s", mre.Name) + } + return nil +} + +func (mre *matchRegexp) match(input string, repl *Replacer, scope string) bool { + matches := mre.compiled.FindStringSubmatch(input) + if matches == nil { + return false + } + + // save all capture groups, first by index + for i, match := range matches { + key := fmt.Sprintf("http.matchers.%s.%s.%d", scope, mre.Name, i) + repl.Map(key, match) + } + + // then by name + for i, name := range mre.compiled.SubexpNames() { + if i != 0 && name != "" { + key := fmt.Sprintf("http.matchers.%s.%s.%s", scope, mre.Name, name) + repl.Map(key, matches[i]) + } + } + + return true +} + +var wordRE = regexp.MustCompile(`\w+`) + // Interface guards var ( _ RouteMatcher = (*matchHost)(nil) _ RouteMatcher = (*matchPath)(nil) + _ RouteMatcher = (*matchPathRE)(nil) _ RouteMatcher = (*matchMethod)(nil) _ RouteMatcher = (*matchQuery)(nil) _ RouteMatcher = (*matchHeader)(nil) + _ RouteMatcher = (*matchHeaderRE)(nil) _ RouteMatcher = (*matchProtocol)(nil) - _ RouteMatcher = (*matchScript)(nil) + _ RouteMatcher = (*matchStarlark)(nil) ) diff --git a/modules/caddyhttp/matchers_test.go b/modules/caddyhttp/matchers_test.go new file mode 100644 index 00000000..07c98e30 --- /dev/null +++ b/modules/caddyhttp/matchers_test.go @@ -0,0 +1,346 @@ +package caddyhttp + +import ( + "context" + "fmt" + "net/http" + "net/http/httptest" + "net/url" + "testing" +) + +func TestHostMatcher(t *testing.T) { + for i, tc := range []struct { + match matchHost + input string + expect bool + }{ + { + match: matchHost{}, + input: "example.com", + expect: false, + }, + { + match: matchHost{"example.com"}, + input: "example.com", + expect: true, + }, + { + match: matchHost{"example.com"}, + input: "foo.example.com", + expect: false, + }, + { + match: matchHost{"example.com"}, + input: "EXAMPLE.COM", + expect: true, + }, + { + match: matchHost{"foo.example.com"}, + input: "foo.example.com", + expect: true, + }, + { + match: matchHost{"foo.example.com"}, + input: "bar.example.com", + expect: false, + }, + { + match: matchHost{"*.example.com"}, + input: "example.com", + expect: false, + }, + { + match: matchHost{"*.example.com"}, + input: "foo.example.com", + expect: true, + }, + { + match: matchHost{"*.example.com"}, + input: "foo.bar.example.com", + expect: false, + }, + { + match: matchHost{"*.example.com", "example.net"}, + input: "example.net", + expect: true, + }, + { + match: matchHost{"example.net", "*.example.com"}, + input: "foo.example.com", + expect: true, + }, + { + match: matchHost{"*.example.net", "*.*.example.com"}, + input: "foo.bar.example.com", + expect: true, + }, + { + match: matchHost{"*.example.net", "sub.*.example.com"}, + input: "sub.foo.example.com", + expect: true, + }, + { + match: matchHost{"*.example.net", "sub.*.example.com"}, + input: "sub.foo.example.net", + expect: false, + }, + } { + req := &http.Request{Host: tc.input} + actual := tc.match.Match(req) + if actual != tc.expect { + t.Errorf("Test %d %v: Expected %t, got %t for '%s'", i, tc.match, tc.expect, actual, tc.input) + continue + } + } +} + +func TestPathMatcher(t *testing.T) { + for i, tc := range []struct { + match matchPath + input string + expect bool + }{ + { + match: matchPath{}, + input: "/", + expect: false, + }, + { + match: matchPath{"/"}, + input: "/", + expect: true, + }, + { + match: matchPath{"/foo/bar"}, + input: "/", + expect: false, + }, + { + match: matchPath{"/foo/bar"}, + input: "/foo/bar", + expect: true, + }, + { + match: matchPath{"/foo/bar/"}, + input: "/foo/bar", + expect: false, + }, + { + match: matchPath{"/foo/bar/", "/other"}, + input: "/other/", + expect: true, + }, + } { + req := &http.Request{URL: &url.URL{Path: tc.input}} + actual := tc.match.Match(req) + if actual != tc.expect { + t.Errorf("Test %d %v: Expected %t, got %t for '%s'", i, tc.match, tc.expect, actual, tc.input) + continue + } + } +} + +func TestPathREMatcher(t *testing.T) { + for i, tc := range []struct { + match matchPathRE + input string + expect bool + expectRepl map[string]string + }{ + { + match: matchPathRE{}, + input: "/", + expect: true, + }, + { + match: matchPathRE{matchRegexp{Pattern: "/"}}, + input: "/", + expect: true, + }, + { + match: matchPathRE{matchRegexp{Pattern: "/foo"}}, + input: "/foo", + expect: true, + }, + { + match: matchPathRE{matchRegexp{Pattern: "/foo"}}, + input: "/foo/", + expect: true, + }, + { + match: matchPathRE{matchRegexp{Pattern: "/bar"}}, + input: "/foo/", + expect: false, + }, + { + match: matchPathRE{matchRegexp{Pattern: "^/bar"}}, + input: "/foo/bar", + expect: false, + }, + { + match: matchPathRE{matchRegexp{Pattern: "^/foo/(.*)/baz$", Name: "name"}}, + input: "/foo/bar/baz", + expect: true, + expectRepl: map[string]string{"name.1": "bar"}, + }, + { + match: matchPathRE{matchRegexp{Pattern: "^/foo/(?P.*)/baz$", Name: "name"}}, + input: "/foo/bar/baz", + expect: true, + expectRepl: map[string]string{"name.myparam": "bar"}, + }, + } { + // compile the regexp and validate its name + err := tc.match.Provision() + if err != nil { + t.Errorf("Test %d %v: Provisioning: %v", i, tc.match, err) + continue + } + err = tc.match.Validate() + if err != nil { + t.Errorf("Test %d %v: Validating: %v", i, tc.match, err) + continue + } + + // set up the fake request and its Replacer + req := &http.Request{URL: &url.URL{Path: tc.input}} + repl := &Replacer{req: req, resp: httptest.NewRecorder(), custom: make(map[string]string)} + ctx := context.WithValue(req.Context(), ReplacerCtxKey, repl) + req = req.WithContext(ctx) + + actual := tc.match.Match(req) + if actual != tc.expect { + t.Errorf("Test %d [%v]: Expected %t, got %t for input '%s'", + i, tc.match.Pattern, tc.expect, actual, tc.input) + continue + } + + for key, expectVal := range tc.expectRepl { + placeholder := fmt.Sprintf("{http.matchers.path_regexp.%s}", key) + actualVal := repl.Replace(placeholder, "") + if actualVal != expectVal { + t.Errorf("Test %d [%v]: Expected placeholder {http.matchers.path_regexp.%s} to be '%s' but got '%s'", + i, tc.match.Pattern, key, expectVal, actualVal) + continue + } + } + } +} + +func TestHeaderMatcher(t *testing.T) { + for i, tc := range []struct { + match matchHeader + input http.Header // make sure these are canonical cased (std lib will do that in a real request) + expect bool + }{ + { + match: matchHeader{"Field": []string{"foo"}}, + input: http.Header{"Field": []string{"foo"}}, + expect: true, + }, + { + match: matchHeader{"Field": []string{"foo", "bar"}}, + input: http.Header{"Field": []string{"bar"}}, + expect: true, + }, + { + match: matchHeader{"Field": []string{"foo", "bar"}}, + input: http.Header{"Alakazam": []string{"kapow"}}, + expect: false, + }, + { + match: matchHeader{"Field": []string{"foo", "bar"}}, + input: http.Header{"Field": []string{"kapow"}}, + expect: false, + }, + { + match: matchHeader{"Field": []string{"foo", "bar"}}, + input: http.Header{"Field": []string{"kapow", "foo"}}, + expect: true, + }, + { + match: matchHeader{"Field1": []string{"foo"}, "Field2": []string{"bar"}}, + input: http.Header{"Field1": []string{"foo"}, "Field2": []string{"bar"}}, + expect: true, + }, + { + match: matchHeader{"field1": []string{"foo"}, "field2": []string{"bar"}}, + input: http.Header{"Field1": []string{"foo"}, "Field2": []string{"bar"}}, + expect: true, + }, + { + match: matchHeader{"field1": []string{"foo"}, "field2": []string{"bar"}}, + input: http.Header{"Field1": []string{"foo"}, "Field2": []string{"kapow"}}, + expect: false, + }, + } { + req := &http.Request{Header: tc.input} + actual := tc.match.Match(req) + if actual != tc.expect { + t.Errorf("Test %d %v: Expected %t, got %t for '%s'", i, tc.match, tc.expect, actual, tc.input) + continue + } + } +} + +func TestHeaderREMatcher(t *testing.T) { + for i, tc := range []struct { + match matchHeaderRE + input http.Header // make sure these are canonical cased (std lib will do that in a real request) + expect bool + expectRepl map[string]string + }{ + { + match: matchHeaderRE{"Field": &matchRegexp{Pattern: "foo"}}, + input: http.Header{"Field": []string{"foo"}}, + expect: true, + }, + { + match: matchHeaderRE{"Field": &matchRegexp{Pattern: "$foo^"}}, + input: http.Header{"Field": []string{"foobar"}}, + expect: false, + }, + { + match: matchHeaderRE{"Field": &matchRegexp{Pattern: "^foo(.*)$", Name: "name"}}, + input: http.Header{"Field": []string{"foobar"}}, + expect: true, + expectRepl: map[string]string{"name.1": "bar"}, + }, + } { + // compile the regexp and validate its name + err := tc.match.Provision() + if err != nil { + t.Errorf("Test %d %v: Provisioning: %v", i, tc.match, err) + continue + } + err = tc.match.Validate() + if err != nil { + t.Errorf("Test %d %v: Validating: %v", i, tc.match, err) + continue + } + + // set up the fake request and its Replacer + req := &http.Request{Header: tc.input, URL: new(url.URL)} + repl := &Replacer{req: req, resp: httptest.NewRecorder(), custom: make(map[string]string)} + ctx := context.WithValue(req.Context(), ReplacerCtxKey, repl) + req = req.WithContext(ctx) + + actual := tc.match.Match(req) + if actual != tc.expect { + t.Errorf("Test %d [%v]: Expected %t, got %t for input '%s'", + i, tc.match, tc.expect, actual, tc.input) + continue + } + + for key, expectVal := range tc.expectRepl { + placeholder := fmt.Sprintf("{http.matchers.header_regexp.%s}", key) + actualVal := repl.Replace(placeholder, "") + if actualVal != expectVal { + t.Errorf("Test %d [%v]: Expected placeholder {http.matchers.header_regexp.%s} to be '%s' but got '%s'", + i, tc.match, key, expectVal, actualVal) + continue + } + } + } +} diff --git a/modules/caddyhttp/replacer.go b/modules/caddyhttp/replacer.go index 947de092..644cfc19 100644 --- a/modules/caddyhttp/replacer.go +++ b/modules/caddyhttp/replacer.go @@ -8,7 +8,9 @@ import ( ) // Replacer can replace values in strings based -// on a request and/or response writer. +// on a request and/or response writer. The zero +// Replacer is not valid; it must be initialized +// within this package. type Replacer struct { req *http.Request resp http.ResponseWriter @@ -75,18 +77,17 @@ func (r *Replacer) defaults() map[string]string { }(), } + // TODO: why should header fields, cookies, and query params get special treatment like this? + // maybe they should be scoped by words like "request.header." just like everything else. for field, vals := range r.req.Header { m[">"+strings.ToLower(field)] = strings.Join(vals, ",") } - for field, vals := range r.resp.Header() { m["<"+strings.ToLower(field)] = strings.Join(vals, ",") } - for _, cookie := range r.req.Cookies() { m["~"+cookie.Name] = cookie.Value } - for param, vals := range r.req.URL.Query() { m["?"+param] = strings.Join(vals, ",") } diff --git a/modules/caddyhttp/responsewriter.go b/modules/caddyhttp/responsewriter.go index 8aefd3fb..4599fd2c 100644 --- a/modules/caddyhttp/responsewriter.go +++ b/modules/caddyhttp/responsewriter.go @@ -7,16 +7,13 @@ import ( "net/http" ) -// TODO: Is this type really required? Wouldn't embedding the -// default ResponseWriter always work too, when wrapping it? - // ResponseWriterWrapper wraps an underlying ResponseWriter and -// promotes its Pusher/Flusher/CloseNotifier/Hijacker methods -// as well. To use this type, embed a pointer to it within your -// own struct type that implements the http.ResponseWriter -// interface, then call methods on the embedded value. You can -// make sure your type wraps correctly by asserting that it -// implements the HTTPInterfaces interface. +// promotes its Pusher/Flusher/Hijacker methods as well. To use +// this type, embed a pointer to it within your own struct type +// that implements the http.ResponseWriter interface, then call +// methods on the embedded value. You can make sure your type +// wraps correctly by asserting that it implements the +// HTTPInterfaces interface. type ResponseWriterWrapper struct { http.ResponseWriter } @@ -40,15 +37,6 @@ func (rww *ResponseWriterWrapper) Flush() { } } -// CloseNotify implements http.CloseNotifier. It simply calls the underlying -// ResponseWriter's CloseNotify method if there is one, or panics. -func (rww *ResponseWriterWrapper) CloseNotify() <-chan bool { - if cn, ok := rww.ResponseWriter.(http.CloseNotifier); ok { - return cn.CloseNotify() - } - panic("not a close notifier") -} - // Push implements http.Pusher. It simply calls the underlying // ResponseWriter's Push method if there is one, or returns an error. func (rww *ResponseWriterWrapper) Push(target string, opts *http.PushOptions) error { @@ -63,7 +51,6 @@ type HTTPInterfaces interface { http.ResponseWriter http.Pusher http.Flusher - http.CloseNotifier http.Hijacker } diff --git a/modules/caddyhttp/routes.go b/modules/caddyhttp/routes.go index cc26436e..48a5000a 100644 --- a/modules/caddyhttp/routes.go +++ b/modules/caddyhttp/routes.go @@ -13,7 +13,7 @@ type serverRoute struct { Apply []json.RawMessage `json:"apply"` Respond json.RawMessage `json:"respond"` - Exclusive bool `json:"exclusive"` + Terminal bool `json:"terminal"` // decoded values matchers []RouteMatcher @@ -49,9 +49,7 @@ routeLoop: if responder == nil { responder = route.responder } - // TODO: Should exclusive apply to only middlewares, or responder too? - // i.e. what if they haven't set a responder yet, but the first middleware chain is exclusive... - if route.Exclusive { + if route.Terminal { break } }