// Copyright 2019 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package proxy import ( "bytes" "errors" "io" "io/ioutil" "mime" "mime/multipart" "net/http" "net/url" "regexp" "strings" ) // A Converter converts HTTP requests and responses to the Request and Response types // of this package, while removing or redacting information. type Converter struct { // These all apply to both headers and trailers. ClearHeaders []tRegexp // replace matching headers with "CLEARED" RemoveRequestHeaders []tRegexp // remove matching headers in requests RemoveResponseHeaders []tRegexp // remove matching headers in responses ClearParams []tRegexp // replace matching query params with "CLEARED" RemoveParams []tRegexp // remove matching query params } // A regexp that can be marshaled to and from text. type tRegexp struct { *regexp.Regexp } func (r tRegexp) MarshalText() ([]byte, error) { return []byte(r.String()), nil } func (r *tRegexp) UnmarshalText(b []byte) error { var err error r.Regexp, err = regexp.Compile(string(b)) return err } func (c *Converter) registerRemoveRequestHeaders(pat string) { c.RemoveRequestHeaders = append(c.RemoveRequestHeaders, pattern(pat)) } func (c *Converter) registerClearHeaders(pat string) { c.ClearHeaders = append(c.ClearHeaders, pattern(pat)) } func (c *Converter) registerRemoveParams(pat string) { c.RemoveParams = append(c.RemoveParams, pattern(pat)) } func (c *Converter) registerClearParams(pat string) { c.ClearParams = append(c.ClearParams, pattern(pat)) } var ( defaultRemoveRequestHeaders = []string{ "Authorization", // not only is it secret, but it is probably missing on replay "Proxy-Authorization", "Connection", "Content-Type", // because it may contain a random multipart boundary "Date", "Host", "Transfer-Encoding", "Via", "X-Forwarded-*", // Google-specific "X-Cloud-Trace-Context", // OpenCensus traces have a random ID "X-Goog-Api-Client", // can differ for, e.g., different Go versions } defaultRemoveBothHeaders = []string{ // Google-specific // GFEs scrub X-Google- and X-GFE- headers from requests and responses. // Drop them from recordings made by users inside Google. // http://g3doc/gfe/g3doc/gfe3/design/http_filters/google_header_filter // (internal Google documentation). "X-Google-*", "X-Gfe-*", } defaultClearHeaders = []string{ // Google-specific // Used by Cloud Storage for customer-supplied encryption. "X-Goog-*Encryption-Key", } ) func defaultConverter() *Converter { c := &Converter{} for _, h := range defaultClearHeaders { c.registerClearHeaders(h) } for _, h := range defaultRemoveRequestHeaders { c.registerRemoveRequestHeaders(h) } for _, h := range defaultRemoveBothHeaders { c.registerRemoveRequestHeaders(h) c.RemoveResponseHeaders = append(c.RemoveResponseHeaders, pattern(h)) } return c } // Convert a pattern into a regexp. // A pattern is like a literal regexp anchored on both ends, with only one // non-literal character: "*", which matches zero or more characters. func pattern(p string) tRegexp { q := regexp.QuoteMeta(p) q = "^" + strings.Replace(q, `\*`, `.*`, -1) + "$" // q must be a legal regexp. return tRegexp{regexp.MustCompile(q)} } func (c *Converter) convertRequest(req *http.Request) (*Request, error) { body, err := snapshotBody(&req.Body) if err != nil { return nil, err } // If the body is empty, set it to nil to make sure the proxy sends a // Content-Length header. if len(body) == 0 { req.Body = nil } mediaType, parts, err := parseRequestBody(req.Header.Get("Content-Type"), body) if err != nil { return nil, err } url2 := *req.URL url2.RawQuery = scrubQuery(url2.RawQuery, c.ClearParams, c.RemoveParams) return &Request{ Method: req.Method, URL: url2.String(), Header: scrubHeaders(req.Header, c.ClearHeaders, c.RemoveRequestHeaders), MediaType: mediaType, BodyParts: parts, Trailer: scrubHeaders(req.Trailer, c.ClearHeaders, c.RemoveRequestHeaders), }, nil } // parseRequestBody parses the Content-Type header, reads the body, and splits it into // parts if necessary. It returns the media type and the body parts. func parseRequestBody(contentType string, body []byte) (string, [][]byte, error) { if contentType == "" { // No content-type header. There should not be a body. if len(body) != 0 { return "", nil, errors.New("no Content-Type, but body") } return "", nil, nil } mediaType, params, err := mime.ParseMediaType(contentType) if err != nil { return "", nil, err } var parts [][]byte if strings.HasPrefix(mediaType, "multipart/") { mr := multipart.NewReader(bytes.NewReader(body), params["boundary"]) for { p, err := mr.NextPart() if err == io.EOF { break } if err != nil { return "", nil, err } part, err := ioutil.ReadAll(p) if err != nil { return "", nil, err } // TODO(jba): care about part headers? parts = append(parts, part) } } else { parts = [][]byte{body} } return mediaType, parts, nil } func (c *Converter) convertResponse(res *http.Response) (*Response, error) { data, err := snapshotBody(&res.Body) if err != nil { return nil, err } return &Response{ StatusCode: res.StatusCode, Proto: res.Proto, ProtoMajor: res.ProtoMajor, ProtoMinor: res.ProtoMinor, Header: scrubHeaders(res.Header, c.ClearHeaders, c.RemoveResponseHeaders), Body: data, Trailer: scrubHeaders(res.Trailer, c.ClearHeaders, c.RemoveResponseHeaders), }, nil } func snapshotBody(body *io.ReadCloser) ([]byte, error) { data, err := ioutil.ReadAll(*body) if err != nil { return nil, err } (*body).Close() *body = ioutil.NopCloser(bytes.NewReader(data)) return data, nil } // Copy headers, clearing some and removing others. func scrubHeaders(hs http.Header, clear, remove []tRegexp) http.Header { rh := http.Header{} for k, v := range hs { switch { case match(k, clear): rh.Set(k, "CLEARED") case match(k, remove): // skip default: rh[k] = v } } return rh } // Copy the query string, clearing some query params and removing others. // Preserve the order of the string. func scrubQuery(query string, clear, remove []tRegexp) string { // We can't use url.ParseQuery because it doesn't preserve order. var buf bytes.Buffer for { if i := strings.IndexAny(query, "&;"); i >= 0 { scrubParam(&buf, query[:i], query[i], clear, remove) query = query[i+1:] } else { scrubParam(&buf, query, 0, clear, remove) break } } s := buf.String() if strings.HasSuffix(s, "&") { return s[:len(s)-1] } return s } func scrubParam(buf *bytes.Buffer, param string, sep byte, clear, remove []tRegexp) { if param == "" { return } key := param value := "" if i := strings.Index(param, "="); i >= 0 { key, value = key[:i], key[i+1:] } ukey, err := url.QueryUnescape(key) // If the key is bad, just pass it and the value through. if err != nil { buf.WriteString(param) if sep != 0 { buf.WriteByte(sep) } return } if match(ukey, remove) { return } if match(ukey, clear) && value != "" { value = "CLEARED" } buf.WriteString(key) buf.WriteByte('=') buf.WriteString(value) if sep != 0 { buf.WriteByte(sep) } } func match(s string, res []tRegexp) bool { for _, re := range res { if re.MatchString(s) { return true } } return false }