|
- // Copyright 2019 Google LLC
- //
- // Licensed under the Apache License, Version 2.0 (the "License");
- // you may not use this file except in compliance with the License.
- // You may obtain a copy of the License at
- //
- // http://www.apache.org/licenses/LICENSE-2.0
- //
- // Unless required by applicable law or agreed to in writing, software
- // distributed under the License is distributed on an "AS IS" BASIS,
- // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- // See the License for the specific language governing permissions and
- // limitations under the License.
-
- package proxy
-
- import (
- "bytes"
- "errors"
- "io"
- "io/ioutil"
- "mime"
- "mime/multipart"
- "net/http"
- "net/url"
- "regexp"
- "strings"
- )
-
- // A Converter converts HTTP requests and responses to the Request and Response types
- // of this package, while removing or redacting information.
- type Converter struct {
- // These all apply to both headers and trailers.
- ClearHeaders []tRegexp // replace matching headers with "CLEARED"
- RemoveRequestHeaders []tRegexp // remove matching headers in requests
- RemoveResponseHeaders []tRegexp // remove matching headers in responses
- ClearParams []tRegexp // replace matching query params with "CLEARED"
- RemoveParams []tRegexp // remove matching query params
- }
-
- // A regexp that can be marshaled to and from text.
- type tRegexp struct {
- *regexp.Regexp
- }
-
- func (r tRegexp) MarshalText() ([]byte, error) {
- return []byte(r.String()), nil
- }
-
- func (r *tRegexp) UnmarshalText(b []byte) error {
- var err error
- r.Regexp, err = regexp.Compile(string(b))
- return err
- }
-
- func (c *Converter) registerRemoveRequestHeaders(pat string) {
- c.RemoveRequestHeaders = append(c.RemoveRequestHeaders, pattern(pat))
- }
-
- func (c *Converter) registerClearHeaders(pat string) {
- c.ClearHeaders = append(c.ClearHeaders, pattern(pat))
- }
-
- func (c *Converter) registerRemoveParams(pat string) {
- c.RemoveParams = append(c.RemoveParams, pattern(pat))
- }
-
- func (c *Converter) registerClearParams(pat string) {
- c.ClearParams = append(c.ClearParams, pattern(pat))
- }
-
- var (
- defaultRemoveRequestHeaders = []string{
- "Authorization", // not only is it secret, but it is probably missing on replay
- "Proxy-Authorization",
- "Connection",
- "Content-Type", // because it may contain a random multipart boundary
- "Date",
- "Host",
- "Transfer-Encoding",
- "Via",
- "X-Forwarded-*",
- // Google-specific
- "X-Cloud-Trace-Context", // OpenCensus traces have a random ID
- "X-Goog-Api-Client", // can differ for, e.g., different Go versions
- }
-
- defaultRemoveBothHeaders = []string{
- // Google-specific
- // GFEs scrub X-Google- and X-GFE- headers from requests and responses.
- // Drop them from recordings made by users inside Google.
- // http://g3doc/gfe/g3doc/gfe3/design/http_filters/google_header_filter
- // (internal Google documentation).
- "X-Google-*",
- "X-Gfe-*",
- }
-
- defaultClearHeaders = []string{
- // Google-specific
- // Used by Cloud Storage for customer-supplied encryption.
- "X-Goog-*Encryption-Key",
- }
- )
-
- func defaultConverter() *Converter {
- c := &Converter{}
- for _, h := range defaultClearHeaders {
- c.registerClearHeaders(h)
- }
- for _, h := range defaultRemoveRequestHeaders {
- c.registerRemoveRequestHeaders(h)
- }
- for _, h := range defaultRemoveBothHeaders {
- c.registerRemoveRequestHeaders(h)
- c.RemoveResponseHeaders = append(c.RemoveResponseHeaders, pattern(h))
- }
- return c
- }
-
- // Convert a pattern into a regexp.
- // A pattern is like a literal regexp anchored on both ends, with only one
- // non-literal character: "*", which matches zero or more characters.
- func pattern(p string) tRegexp {
- q := regexp.QuoteMeta(p)
- q = "^" + strings.Replace(q, `\*`, `.*`, -1) + "$"
- // q must be a legal regexp.
- return tRegexp{regexp.MustCompile(q)}
- }
-
- func (c *Converter) convertRequest(req *http.Request) (*Request, error) {
- body, err := snapshotBody(&req.Body)
- if err != nil {
- return nil, err
- }
- // If the body is empty, set it to nil to make sure the proxy sends a
- // Content-Length header.
- if len(body) == 0 {
- req.Body = nil
- }
- mediaType, parts, err := parseRequestBody(req.Header.Get("Content-Type"), body)
- if err != nil {
- return nil, err
- }
- url2 := *req.URL
- url2.RawQuery = scrubQuery(url2.RawQuery, c.ClearParams, c.RemoveParams)
- return &Request{
- Method: req.Method,
- URL: url2.String(),
- Header: scrubHeaders(req.Header, c.ClearHeaders, c.RemoveRequestHeaders),
- MediaType: mediaType,
- BodyParts: parts,
- Trailer: scrubHeaders(req.Trailer, c.ClearHeaders, c.RemoveRequestHeaders),
- }, nil
- }
-
- // parseRequestBody parses the Content-Type header, reads the body, and splits it into
- // parts if necessary. It returns the media type and the body parts.
- func parseRequestBody(contentType string, body []byte) (string, [][]byte, error) {
- if contentType == "" {
- // No content-type header. There should not be a body.
- if len(body) != 0 {
- return "", nil, errors.New("no Content-Type, but body")
- }
- return "", nil, nil
- }
- mediaType, params, err := mime.ParseMediaType(contentType)
- if err != nil {
- return "", nil, err
- }
- var parts [][]byte
- if strings.HasPrefix(mediaType, "multipart/") {
- mr := multipart.NewReader(bytes.NewReader(body), params["boundary"])
- for {
- p, err := mr.NextPart()
- if err == io.EOF {
- break
- }
- if err != nil {
- return "", nil, err
- }
- part, err := ioutil.ReadAll(p)
- if err != nil {
- return "", nil, err
- }
- // TODO(jba): care about part headers?
- parts = append(parts, part)
- }
- } else {
- parts = [][]byte{body}
- }
- return mediaType, parts, nil
- }
-
- func (c *Converter) convertResponse(res *http.Response) (*Response, error) {
- data, err := snapshotBody(&res.Body)
- if err != nil {
- return nil, err
- }
- return &Response{
- StatusCode: res.StatusCode,
- Proto: res.Proto,
- ProtoMajor: res.ProtoMajor,
- ProtoMinor: res.ProtoMinor,
- Header: scrubHeaders(res.Header, c.ClearHeaders, c.RemoveResponseHeaders),
- Body: data,
- Trailer: scrubHeaders(res.Trailer, c.ClearHeaders, c.RemoveResponseHeaders),
- }, nil
- }
-
- func snapshotBody(body *io.ReadCloser) ([]byte, error) {
- data, err := ioutil.ReadAll(*body)
- if err != nil {
- return nil, err
- }
- (*body).Close()
- *body = ioutil.NopCloser(bytes.NewReader(data))
- return data, nil
- }
-
- // Copy headers, clearing some and removing others.
- func scrubHeaders(hs http.Header, clear, remove []tRegexp) http.Header {
- rh := http.Header{}
- for k, v := range hs {
- switch {
- case match(k, clear):
- rh.Set(k, "CLEARED")
- case match(k, remove):
- // skip
- default:
- rh[k] = v
- }
- }
- return rh
- }
-
- // Copy the query string, clearing some query params and removing others.
- // Preserve the order of the string.
- func scrubQuery(query string, clear, remove []tRegexp) string {
- // We can't use url.ParseQuery because it doesn't preserve order.
- var buf bytes.Buffer
- for {
- if i := strings.IndexAny(query, "&;"); i >= 0 {
- scrubParam(&buf, query[:i], query[i], clear, remove)
- query = query[i+1:]
- } else {
- scrubParam(&buf, query, 0, clear, remove)
- break
- }
- }
- s := buf.String()
- if strings.HasSuffix(s, "&") {
- return s[:len(s)-1]
- }
- return s
- }
-
- func scrubParam(buf *bytes.Buffer, param string, sep byte, clear, remove []tRegexp) {
- if param == "" {
- return
- }
- key := param
- value := ""
- if i := strings.Index(param, "="); i >= 0 {
- key, value = key[:i], key[i+1:]
- }
- ukey, err := url.QueryUnescape(key)
- // If the key is bad, just pass it and the value through.
- if err != nil {
- buf.WriteString(param)
- if sep != 0 {
- buf.WriteByte(sep)
- }
- return
- }
- if match(ukey, remove) {
- return
- }
- if match(ukey, clear) && value != "" {
- value = "CLEARED"
- }
- buf.WriteString(key)
- buf.WriteByte('=')
- buf.WriteString(value)
- if sep != 0 {
- buf.WriteByte(sep)
- }
- }
-
- func match(s string, res []tRegexp) bool {
- for _, re := range res {
- if re.MatchString(s) {
- return true
- }
- }
- return false
- }
|