Du kan inte välja fler än 25 ämnen Ämnen måste starta med en bokstav eller siffra, kan innehålla bindestreck ('-') och vara max 35 tecken långa.
 
 
 

297 rader
7.8 KiB

  1. // Copyright 2019 Google LLC
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package proxy
  15. import (
  16. "bytes"
  17. "errors"
  18. "io"
  19. "io/ioutil"
  20. "mime"
  21. "mime/multipart"
  22. "net/http"
  23. "net/url"
  24. "regexp"
  25. "strings"
  26. )
  27. // A Converter converts HTTP requests and responses to the Request and Response types
  28. // of this package, while removing or redacting information.
  29. type Converter struct {
  30. // These all apply to both headers and trailers.
  31. ClearHeaders []tRegexp // replace matching headers with "CLEARED"
  32. RemoveRequestHeaders []tRegexp // remove matching headers in requests
  33. RemoveResponseHeaders []tRegexp // remove matching headers in responses
  34. ClearParams []tRegexp // replace matching query params with "CLEARED"
  35. RemoveParams []tRegexp // remove matching query params
  36. }
  37. // A regexp that can be marshaled to and from text.
  38. type tRegexp struct {
  39. *regexp.Regexp
  40. }
  41. func (r tRegexp) MarshalText() ([]byte, error) {
  42. return []byte(r.String()), nil
  43. }
  44. func (r *tRegexp) UnmarshalText(b []byte) error {
  45. var err error
  46. r.Regexp, err = regexp.Compile(string(b))
  47. return err
  48. }
  49. func (c *Converter) registerRemoveRequestHeaders(pat string) {
  50. c.RemoveRequestHeaders = append(c.RemoveRequestHeaders, pattern(pat))
  51. }
  52. func (c *Converter) registerClearHeaders(pat string) {
  53. c.ClearHeaders = append(c.ClearHeaders, pattern(pat))
  54. }
  55. func (c *Converter) registerRemoveParams(pat string) {
  56. c.RemoveParams = append(c.RemoveParams, pattern(pat))
  57. }
  58. func (c *Converter) registerClearParams(pat string) {
  59. c.ClearParams = append(c.ClearParams, pattern(pat))
  60. }
  61. var (
  62. defaultRemoveRequestHeaders = []string{
  63. "Authorization", // not only is it secret, but it is probably missing on replay
  64. "Proxy-Authorization",
  65. "Connection",
  66. "Content-Type", // because it may contain a random multipart boundary
  67. "Date",
  68. "Host",
  69. "Transfer-Encoding",
  70. "Via",
  71. "X-Forwarded-*",
  72. // Google-specific
  73. "X-Cloud-Trace-Context", // OpenCensus traces have a random ID
  74. "X-Goog-Api-Client", // can differ for, e.g., different Go versions
  75. }
  76. defaultRemoveBothHeaders = []string{
  77. // Google-specific
  78. // GFEs scrub X-Google- and X-GFE- headers from requests and responses.
  79. // Drop them from recordings made by users inside Google.
  80. // http://g3doc/gfe/g3doc/gfe3/design/http_filters/google_header_filter
  81. // (internal Google documentation).
  82. "X-Google-*",
  83. "X-Gfe-*",
  84. }
  85. defaultClearHeaders = []string{
  86. // Google-specific
  87. // Used by Cloud Storage for customer-supplied encryption.
  88. "X-Goog-*Encryption-Key",
  89. }
  90. )
  91. func defaultConverter() *Converter {
  92. c := &Converter{}
  93. for _, h := range defaultClearHeaders {
  94. c.registerClearHeaders(h)
  95. }
  96. for _, h := range defaultRemoveRequestHeaders {
  97. c.registerRemoveRequestHeaders(h)
  98. }
  99. for _, h := range defaultRemoveBothHeaders {
  100. c.registerRemoveRequestHeaders(h)
  101. c.RemoveResponseHeaders = append(c.RemoveResponseHeaders, pattern(h))
  102. }
  103. return c
  104. }
  105. // Convert a pattern into a regexp.
  106. // A pattern is like a literal regexp anchored on both ends, with only one
  107. // non-literal character: "*", which matches zero or more characters.
  108. func pattern(p string) tRegexp {
  109. q := regexp.QuoteMeta(p)
  110. q = "^" + strings.Replace(q, `\*`, `.*`, -1) + "$"
  111. // q must be a legal regexp.
  112. return tRegexp{regexp.MustCompile(q)}
  113. }
  114. func (c *Converter) convertRequest(req *http.Request) (*Request, error) {
  115. body, err := snapshotBody(&req.Body)
  116. if err != nil {
  117. return nil, err
  118. }
  119. // If the body is empty, set it to nil to make sure the proxy sends a
  120. // Content-Length header.
  121. if len(body) == 0 {
  122. req.Body = nil
  123. }
  124. mediaType, parts, err := parseRequestBody(req.Header.Get("Content-Type"), body)
  125. if err != nil {
  126. return nil, err
  127. }
  128. url2 := *req.URL
  129. url2.RawQuery = scrubQuery(url2.RawQuery, c.ClearParams, c.RemoveParams)
  130. return &Request{
  131. Method: req.Method,
  132. URL: url2.String(),
  133. Header: scrubHeaders(req.Header, c.ClearHeaders, c.RemoveRequestHeaders),
  134. MediaType: mediaType,
  135. BodyParts: parts,
  136. Trailer: scrubHeaders(req.Trailer, c.ClearHeaders, c.RemoveRequestHeaders),
  137. }, nil
  138. }
  139. // parseRequestBody parses the Content-Type header, reads the body, and splits it into
  140. // parts if necessary. It returns the media type and the body parts.
  141. func parseRequestBody(contentType string, body []byte) (string, [][]byte, error) {
  142. if contentType == "" {
  143. // No content-type header. There should not be a body.
  144. if len(body) != 0 {
  145. return "", nil, errors.New("no Content-Type, but body")
  146. }
  147. return "", nil, nil
  148. }
  149. mediaType, params, err := mime.ParseMediaType(contentType)
  150. if err != nil {
  151. return "", nil, err
  152. }
  153. var parts [][]byte
  154. if strings.HasPrefix(mediaType, "multipart/") {
  155. mr := multipart.NewReader(bytes.NewReader(body), params["boundary"])
  156. for {
  157. p, err := mr.NextPart()
  158. if err == io.EOF {
  159. break
  160. }
  161. if err != nil {
  162. return "", nil, err
  163. }
  164. part, err := ioutil.ReadAll(p)
  165. if err != nil {
  166. return "", nil, err
  167. }
  168. // TODO(jba): care about part headers?
  169. parts = append(parts, part)
  170. }
  171. } else {
  172. parts = [][]byte{body}
  173. }
  174. return mediaType, parts, nil
  175. }
  176. func (c *Converter) convertResponse(res *http.Response) (*Response, error) {
  177. data, err := snapshotBody(&res.Body)
  178. if err != nil {
  179. return nil, err
  180. }
  181. return &Response{
  182. StatusCode: res.StatusCode,
  183. Proto: res.Proto,
  184. ProtoMajor: res.ProtoMajor,
  185. ProtoMinor: res.ProtoMinor,
  186. Header: scrubHeaders(res.Header, c.ClearHeaders, c.RemoveResponseHeaders),
  187. Body: data,
  188. Trailer: scrubHeaders(res.Trailer, c.ClearHeaders, c.RemoveResponseHeaders),
  189. }, nil
  190. }
  191. func snapshotBody(body *io.ReadCloser) ([]byte, error) {
  192. data, err := ioutil.ReadAll(*body)
  193. if err != nil {
  194. return nil, err
  195. }
  196. (*body).Close()
  197. *body = ioutil.NopCloser(bytes.NewReader(data))
  198. return data, nil
  199. }
  200. // Copy headers, clearing some and removing others.
  201. func scrubHeaders(hs http.Header, clear, remove []tRegexp) http.Header {
  202. rh := http.Header{}
  203. for k, v := range hs {
  204. switch {
  205. case match(k, clear):
  206. rh.Set(k, "CLEARED")
  207. case match(k, remove):
  208. // skip
  209. default:
  210. rh[k] = v
  211. }
  212. }
  213. return rh
  214. }
  215. // Copy the query string, clearing some query params and removing others.
  216. // Preserve the order of the string.
  217. func scrubQuery(query string, clear, remove []tRegexp) string {
  218. // We can't use url.ParseQuery because it doesn't preserve order.
  219. var buf bytes.Buffer
  220. for {
  221. if i := strings.IndexAny(query, "&;"); i >= 0 {
  222. scrubParam(&buf, query[:i], query[i], clear, remove)
  223. query = query[i+1:]
  224. } else {
  225. scrubParam(&buf, query, 0, clear, remove)
  226. break
  227. }
  228. }
  229. s := buf.String()
  230. if strings.HasSuffix(s, "&") {
  231. return s[:len(s)-1]
  232. }
  233. return s
  234. }
  235. func scrubParam(buf *bytes.Buffer, param string, sep byte, clear, remove []tRegexp) {
  236. if param == "" {
  237. return
  238. }
  239. key := param
  240. value := ""
  241. if i := strings.Index(param, "="); i >= 0 {
  242. key, value = key[:i], key[i+1:]
  243. }
  244. ukey, err := url.QueryUnescape(key)
  245. // If the key is bad, just pass it and the value through.
  246. if err != nil {
  247. buf.WriteString(param)
  248. if sep != 0 {
  249. buf.WriteByte(sep)
  250. }
  251. return
  252. }
  253. if match(ukey, remove) {
  254. return
  255. }
  256. if match(ukey, clear) && value != "" {
  257. value = "CLEARED"
  258. }
  259. buf.WriteString(key)
  260. buf.WriteByte('=')
  261. buf.WriteString(value)
  262. if sep != 0 {
  263. buf.WriteByte(sep)
  264. }
  265. }
  266. func match(s string, res []tRegexp) bool {
  267. for _, re := range res {
  268. if re.MatchString(s) {
  269. return true
  270. }
  271. }
  272. return false
  273. }