You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 

764 lines
20 KiB

  1. // Copyright 2015 Google Inc. All rights reserved.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. // Package har collects HTTP requests and responses and stores them in HAR format.
  15. //
  16. // For more information on HAR, see:
  17. // https://w3c.github.io/web-performance/specs/HAR/Overview.html
  18. package har
  19. import (
  20. "bytes"
  21. "encoding/json"
  22. "fmt"
  23. "io"
  24. "io/ioutil"
  25. "mime"
  26. "mime/multipart"
  27. "net/http"
  28. "net/url"
  29. "strings"
  30. "sync"
  31. "time"
  32. "unicode/utf8"
  33. "github.com/google/martian"
  34. "github.com/google/martian/log"
  35. "github.com/google/martian/messageview"
  36. "github.com/google/martian/proxyutil"
  37. )
  38. // Logger maintains request and response log entries.
  39. type Logger struct {
  40. bodyLogging func(*http.Response) bool
  41. postDataLogging func(*http.Request) bool
  42. creator *Creator
  43. mu sync.Mutex
  44. entries map[string]*Entry
  45. tail *Entry
  46. }
  47. // HAR is the top level object of a HAR log.
  48. type HAR struct {
  49. Log *Log `json:"log"`
  50. }
  51. // Log is the HAR HTTP request and response log.
  52. type Log struct {
  53. // Version number of the HAR format.
  54. Version string `json:"version"`
  55. // Creator holds information about the log creator application.
  56. Creator *Creator `json:"creator"`
  57. // Entries is a list containing requests and responses.
  58. Entries []*Entry `json:"entries"`
  59. }
  60. // Creator is the program responsible for generating the log. Martian, in this case.
  61. type Creator struct {
  62. // Name of the log creator application.
  63. Name string `json:"name"`
  64. // Version of the log creator application.
  65. Version string `json:"version"`
  66. }
  67. // Entry is a individual log entry for a request or response.
  68. type Entry struct {
  69. // ID is the unique ID for the entry.
  70. ID string `json:"_id"`
  71. // StartedDateTime is the date and time stamp of the request start (ISO 8601).
  72. StartedDateTime time.Time `json:"startedDateTime"`
  73. // Time is the total elapsed time of the request in milliseconds.
  74. Time int64 `json:"time"`
  75. // Request contains the detailed information about the request.
  76. Request *Request `json:"request"`
  77. // Response contains the detailed information about the response.
  78. Response *Response `json:"response,omitempty"`
  79. // Cache contains information about a request coming from browser cache.
  80. Cache *Cache `json:"cache"`
  81. // Timings describes various phases within request-response round trip. All
  82. // times are specified in milliseconds.
  83. Timings *Timings `json:"timings"`
  84. next *Entry
  85. }
  86. // Request holds data about an individual HTTP request.
  87. type Request struct {
  88. // Method is the request method (GET, POST, ...).
  89. Method string `json:"method"`
  90. // URL is the absolute URL of the request (fragments are not included).
  91. URL string `json:"url"`
  92. // HTTPVersion is the Request HTTP version (HTTP/1.1).
  93. HTTPVersion string `json:"httpVersion"`
  94. // Cookies is a list of cookies.
  95. Cookies []Cookie `json:"cookies"`
  96. // Headers is a list of headers.
  97. Headers []Header `json:"headers"`
  98. // QueryString is a list of query parameters.
  99. QueryString []QueryString `json:"queryString"`
  100. // PostData is the posted data information.
  101. PostData *PostData `json:"postData,omitempty"`
  102. // HeaderSize is the Total number of bytes from the start of the HTTP request
  103. // message until (and including) the double CLRF before the body. Set to -1
  104. // if the info is not available.
  105. HeadersSize int64 `json:"headersSize"`
  106. // BodySize is the size of the request body (POST data payload) in bytes. Set
  107. // to -1 if the info is not available.
  108. BodySize int64 `json:"bodySize"`
  109. }
  110. // Response holds data about an individual HTTP response.
  111. type Response struct {
  112. // Status is the response status code.
  113. Status int `json:"status"`
  114. // StatusText is the response status description.
  115. StatusText string `json:"statusText"`
  116. // HTTPVersion is the Response HTTP version (HTTP/1.1).
  117. HTTPVersion string `json:"httpVersion"`
  118. // Cookies is a list of cookies.
  119. Cookies []Cookie `json:"cookies"`
  120. // Headers is a list of headers.
  121. Headers []Header `json:"headers"`
  122. // Content contains the details of the response body.
  123. Content *Content `json:"content"`
  124. // RedirectURL is the target URL from the Location response header.
  125. RedirectURL string `json:"redirectURL"`
  126. // HeadersSize is the total number of bytes from the start of the HTTP
  127. // request message until (and including) the double CLRF before the body.
  128. // Set to -1 if the info is not available.
  129. HeadersSize int64 `json:"headersSize"`
  130. // BodySize is the size of the request body (POST data payload) in bytes. Set
  131. // to -1 if the info is not available.
  132. BodySize int64 `json:"bodySize"`
  133. }
  134. // Cache contains information about a request coming from browser cache.
  135. type Cache struct {
  136. // Has no fields as they are not supported, but HAR requires the "cache"
  137. // object to exist.
  138. }
  139. // Timings describes various phases within request-response round trip. All
  140. // times are specified in milliseconds
  141. type Timings struct {
  142. // Send is the time required to send HTTP request to the server.
  143. Send int64 `json:"send"`
  144. // Wait is the time spent waiting for a response from the server.
  145. Wait int64 `json:"wait"`
  146. // Receive is the time required to read entire response from server or cache.
  147. Receive int64 `json:"receive"`
  148. }
  149. // Cookie is the data about a cookie on a request or response.
  150. type Cookie struct {
  151. // Name is the cookie name.
  152. Name string `json:"name"`
  153. // Value is the cookie value.
  154. Value string `json:"value"`
  155. // Path is the path pertaining to the cookie.
  156. Path string `json:"path,omitempty"`
  157. // Domain is the host of the cookie.
  158. Domain string `json:"domain,omitempty"`
  159. // Expires contains cookie expiration time.
  160. Expires time.Time `json:"-"`
  161. // Expires8601 contains cookie expiration time in ISO 8601 format.
  162. Expires8601 string `json:"expires,omitempty"`
  163. // HTTPOnly is set to true if the cookie is HTTP only, false otherwise.
  164. HTTPOnly bool `json:"httpOnly,omitempty"`
  165. // Secure is set to true if the cookie was transmitted over SSL, false
  166. // otherwise.
  167. Secure bool `json:"secure,omitempty"`
  168. }
  169. // Header is an HTTP request or response header.
  170. type Header struct {
  171. // Name is the header name.
  172. Name string `json:"name"`
  173. // Value is the header value.
  174. Value string `json:"value"`
  175. }
  176. // QueryString is a query string parameter on a request.
  177. type QueryString struct {
  178. // Name is the query parameter name.
  179. Name string `json:"name"`
  180. // Value is the query parameter value.
  181. Value string `json:"value"`
  182. }
  183. // PostData describes posted data on a request.
  184. type PostData struct {
  185. // MimeType is the MIME type of the posted data.
  186. MimeType string `json:"mimeType"`
  187. // Params is a list of posted parameters (in case of URL encoded parameters).
  188. Params []Param `json:"params"`
  189. // Text contains the posted data. Although its type is string, it may contain
  190. // binary data.
  191. Text string `json:"text"`
  192. }
  193. // pdBinary is the JSON representation of binary PostData.
  194. type pdBinary struct {
  195. MimeType string `json:"mimeType"`
  196. // Params is a list of posted parameters (in case of URL encoded parameters).
  197. Params []Param `json:"params"`
  198. Text []byte `json:"text"`
  199. Encoding string `json:"encoding"`
  200. }
  201. // MarshalJSON returns a JSON representation of binary PostData.
  202. func (p *PostData) MarshalJSON() ([]byte, error) {
  203. if utf8.ValidString(p.Text) {
  204. type noMethod PostData // avoid infinite recursion
  205. return json.Marshal((*noMethod)(p))
  206. }
  207. return json.Marshal(pdBinary{
  208. MimeType: p.MimeType,
  209. Params: p.Params,
  210. Text: []byte(p.Text),
  211. Encoding: "base64",
  212. })
  213. }
  214. // UnmarshalJSON populates PostData based on the []byte representation of
  215. // the binary PostData.
  216. func (p *PostData) UnmarshalJSON(data []byte) error {
  217. if bytes.Equal(data, []byte("null")) { // conform to json.Unmarshaler spec
  218. return nil
  219. }
  220. var enc struct {
  221. Encoding string `json:"encoding"`
  222. }
  223. if err := json.Unmarshal(data, &enc); err != nil {
  224. return err
  225. }
  226. if enc.Encoding != "base64" {
  227. type noMethod PostData // avoid infinite recursion
  228. return json.Unmarshal(data, (*noMethod)(p))
  229. }
  230. var pb pdBinary
  231. if err := json.Unmarshal(data, &pb); err != nil {
  232. return err
  233. }
  234. p.MimeType = pb.MimeType
  235. p.Params = pb.Params
  236. p.Text = string(pb.Text)
  237. return nil
  238. }
  239. // Param describes an individual posted parameter.
  240. type Param struct {
  241. // Name of the posted parameter.
  242. Name string `json:"name"`
  243. // Value of the posted parameter.
  244. Value string `json:"value,omitempty"`
  245. // Filename of a posted file.
  246. Filename string `json:"fileName,omitempty"`
  247. // ContentType is the content type of a posted file.
  248. ContentType string `json:"contentType,omitempty"`
  249. }
  250. // Content describes details about response content.
  251. type Content struct {
  252. // Size is the length of the returned content in bytes. Should be equal to
  253. // response.bodySize if there is no compression and bigger when the content
  254. // has been compressed.
  255. Size int64 `json:"size"`
  256. // MimeType is the MIME type of the response text (value of the Content-Type
  257. // response header).
  258. MimeType string `json:"mimeType"`
  259. // Text contains the response body sent from the server or loaded from the
  260. // browser cache. This field is populated with textual content only. The text
  261. // field is either HTTP decoded text or a encoded (e.g. "base64")
  262. // representation of the response body. Leave out this field if the
  263. // information is not available.
  264. Text []byte `json:"text,omitempty"`
  265. // Encoding used for response text field e.g "base64". Leave out this field
  266. // if the text field is HTTP decoded (decompressed & unchunked), than
  267. // trans-coded from its original character set into UTF-8.
  268. Encoding string `json:"encoding,omitempty"`
  269. }
  270. // Option is a configurable setting for the logger.
  271. type Option func(l *Logger)
  272. // PostDataLogging returns an option that configures request post data logging.
  273. func PostDataLogging(enabled bool) Option {
  274. return func(l *Logger) {
  275. l.postDataLogging = func(*http.Request) bool {
  276. return enabled
  277. }
  278. }
  279. }
  280. // PostDataLoggingForContentTypes returns an option that logs request bodies based
  281. // on opting in to the Content-Type of the request.
  282. func PostDataLoggingForContentTypes(cts ...string) Option {
  283. return func(l *Logger) {
  284. l.postDataLogging = func(req *http.Request) bool {
  285. rct := req.Header.Get("Content-Type")
  286. for _, ct := range cts {
  287. if strings.HasPrefix(strings.ToLower(rct), strings.ToLower(ct)) {
  288. return true
  289. }
  290. }
  291. return false
  292. }
  293. }
  294. }
  295. // SkipPostDataLoggingForContentTypes returns an option that logs request bodies based
  296. // on opting out of the Content-Type of the request.
  297. func SkipPostDataLoggingForContentTypes(cts ...string) Option {
  298. return func(l *Logger) {
  299. l.postDataLogging = func(req *http.Request) bool {
  300. rct := req.Header.Get("Content-Type")
  301. for _, ct := range cts {
  302. if strings.HasPrefix(strings.ToLower(rct), strings.ToLower(ct)) {
  303. return false
  304. }
  305. }
  306. return true
  307. }
  308. }
  309. }
  310. // BodyLogging returns an option that configures response body logging.
  311. func BodyLogging(enabled bool) Option {
  312. return func(l *Logger) {
  313. l.bodyLogging = func(*http.Response) bool {
  314. return enabled
  315. }
  316. }
  317. }
  318. // BodyLoggingForContentTypes returns an option that logs response bodies based
  319. // on opting in to the Content-Type of the response.
  320. func BodyLoggingForContentTypes(cts ...string) Option {
  321. return func(l *Logger) {
  322. l.bodyLogging = func(res *http.Response) bool {
  323. rct := res.Header.Get("Content-Type")
  324. for _, ct := range cts {
  325. if strings.HasPrefix(strings.ToLower(rct), strings.ToLower(ct)) {
  326. return true
  327. }
  328. }
  329. return false
  330. }
  331. }
  332. }
  333. // SkipBodyLoggingForContentTypes returns an option that logs response bodies based
  334. // on opting out of the Content-Type of the response.
  335. func SkipBodyLoggingForContentTypes(cts ...string) Option {
  336. return func(l *Logger) {
  337. l.bodyLogging = func(res *http.Response) bool {
  338. rct := res.Header.Get("Content-Type")
  339. for _, ct := range cts {
  340. if strings.HasPrefix(strings.ToLower(rct), strings.ToLower(ct)) {
  341. return false
  342. }
  343. }
  344. return true
  345. }
  346. }
  347. }
  348. // NewLogger returns a HAR logger. The returned
  349. // logger logs all request post data and response bodies by default.
  350. func NewLogger() *Logger {
  351. l := &Logger{
  352. creator: &Creator{
  353. Name: "martian proxy",
  354. Version: "2.0.0",
  355. },
  356. entries: make(map[string]*Entry),
  357. }
  358. l.SetOption(BodyLogging(true))
  359. l.SetOption(PostDataLogging(true))
  360. return l
  361. }
  362. // SetOption sets configurable options on the logger.
  363. func (l *Logger) SetOption(opts ...Option) {
  364. for _, opt := range opts {
  365. opt(l)
  366. }
  367. }
  368. // ModifyRequest logs requests.
  369. func (l *Logger) ModifyRequest(req *http.Request) error {
  370. ctx := martian.NewContext(req)
  371. if ctx.SkippingLogging() {
  372. return nil
  373. }
  374. id := ctx.ID()
  375. return l.RecordRequest(id, req)
  376. }
  377. // RecordRequest logs the HTTP request with the given ID. The ID should be unique
  378. // per request/response pair.
  379. func (l *Logger) RecordRequest(id string, req *http.Request) error {
  380. hreq, err := NewRequest(req, l.postDataLogging(req))
  381. if err != nil {
  382. return err
  383. }
  384. entry := &Entry{
  385. ID: id,
  386. StartedDateTime: time.Now().UTC(),
  387. Request: hreq,
  388. Cache: &Cache{},
  389. Timings: &Timings{},
  390. }
  391. l.mu.Lock()
  392. defer l.mu.Unlock()
  393. if _, exists := l.entries[id]; exists {
  394. return fmt.Errorf("Duplicate request ID: %s", id)
  395. }
  396. l.entries[id] = entry
  397. if l.tail == nil {
  398. l.tail = entry
  399. }
  400. entry.next = l.tail.next
  401. l.tail.next = entry
  402. l.tail = entry
  403. return nil
  404. }
  405. // NewRequest constructs and returns a Request from req. If withBody is true,
  406. // req.Body is read to EOF and replaced with a copy in a bytes.Buffer. An error
  407. // is returned (and req.Body may be in an intermediate state) if an error is
  408. // returned from req.Body.Read.
  409. func NewRequest(req *http.Request, withBody bool) (*Request, error) {
  410. r := &Request{
  411. Method: req.Method,
  412. URL: req.URL.String(),
  413. HTTPVersion: req.Proto,
  414. HeadersSize: -1,
  415. BodySize: req.ContentLength,
  416. QueryString: []QueryString{},
  417. Headers: headers(proxyutil.RequestHeader(req).Map()),
  418. Cookies: cookies(req.Cookies()),
  419. }
  420. for n, vs := range req.URL.Query() {
  421. for _, v := range vs {
  422. r.QueryString = append(r.QueryString, QueryString{
  423. Name: n,
  424. Value: v,
  425. })
  426. }
  427. }
  428. pd, err := postData(req, withBody)
  429. if err != nil {
  430. return nil, err
  431. }
  432. r.PostData = pd
  433. return r, nil
  434. }
  435. // ModifyResponse logs responses.
  436. func (l *Logger) ModifyResponse(res *http.Response) error {
  437. ctx := martian.NewContext(res.Request)
  438. if ctx.SkippingLogging() {
  439. return nil
  440. }
  441. id := ctx.ID()
  442. return l.RecordResponse(id, res)
  443. }
  444. // RecordResponse logs an HTTP response, associating it with the previously-logged
  445. // HTTP request with the same ID.
  446. func (l *Logger) RecordResponse(id string, res *http.Response) error {
  447. hres, err := NewResponse(res, l.bodyLogging(res))
  448. if err != nil {
  449. return err
  450. }
  451. l.mu.Lock()
  452. defer l.mu.Unlock()
  453. if e, ok := l.entries[id]; ok {
  454. e.Response = hres
  455. e.Time = time.Since(e.StartedDateTime).Nanoseconds() / 1000000
  456. }
  457. return nil
  458. }
  459. // NewResponse constructs and returns a Response from resp. If withBody is true,
  460. // resp.Body is read to EOF and replaced with a copy in a bytes.Buffer. An error
  461. // is returned (and resp.Body may be in an intermediate state) if an error is
  462. // returned from resp.Body.Read.
  463. func NewResponse(res *http.Response, withBody bool) (*Response, error) {
  464. r := &Response{
  465. HTTPVersion: res.Proto,
  466. Status: res.StatusCode,
  467. StatusText: http.StatusText(res.StatusCode),
  468. HeadersSize: -1,
  469. BodySize: res.ContentLength,
  470. Headers: headers(proxyutil.ResponseHeader(res).Map()),
  471. Cookies: cookies(res.Cookies()),
  472. }
  473. if res.StatusCode >= 300 && res.StatusCode < 400 {
  474. r.RedirectURL = res.Header.Get("Location")
  475. }
  476. r.Content = &Content{
  477. Encoding: "base64",
  478. MimeType: res.Header.Get("Content-Type"),
  479. }
  480. if withBody {
  481. mv := messageview.New()
  482. if err := mv.SnapshotResponse(res); err != nil {
  483. return nil, err
  484. }
  485. br, err := mv.BodyReader(messageview.Decode())
  486. if err != nil {
  487. return nil, err
  488. }
  489. body, err := ioutil.ReadAll(br)
  490. if err != nil {
  491. return nil, err
  492. }
  493. r.Content.Text = body
  494. r.Content.Size = int64(len(body))
  495. }
  496. return r, nil
  497. }
  498. // Export returns the in-memory log.
  499. func (l *Logger) Export() *HAR {
  500. l.mu.Lock()
  501. defer l.mu.Unlock()
  502. es := make([]*Entry, 0, len(l.entries))
  503. curr := l.tail
  504. for curr != nil {
  505. curr = curr.next
  506. es = append(es, curr)
  507. if curr == l.tail {
  508. break
  509. }
  510. }
  511. return l.makeHAR(es)
  512. }
  513. // ExportAndReset returns the in-memory log for completed requests, clearing them.
  514. func (l *Logger) ExportAndReset() *HAR {
  515. l.mu.Lock()
  516. defer l.mu.Unlock()
  517. es := make([]*Entry, 0, len(l.entries))
  518. curr := l.tail
  519. prev := l.tail
  520. var first *Entry
  521. for curr != nil {
  522. curr = curr.next
  523. if curr.Response != nil {
  524. es = append(es, curr)
  525. delete(l.entries, curr.ID)
  526. } else {
  527. if first == nil {
  528. first = curr
  529. }
  530. prev.next = curr
  531. prev = curr
  532. }
  533. if curr == l.tail {
  534. break
  535. }
  536. }
  537. if len(l.entries) == 0 {
  538. l.tail = nil
  539. } else {
  540. l.tail = prev
  541. l.tail.next = first
  542. }
  543. return l.makeHAR(es)
  544. }
  545. func (l *Logger) makeHAR(es []*Entry) *HAR {
  546. return &HAR{
  547. Log: &Log{
  548. Version: "1.2",
  549. Creator: l.creator,
  550. Entries: es,
  551. },
  552. }
  553. }
  554. // Reset clears the in-memory log of entries.
  555. func (l *Logger) Reset() {
  556. l.mu.Lock()
  557. defer l.mu.Unlock()
  558. l.entries = make(map[string]*Entry)
  559. l.tail = nil
  560. }
  561. func cookies(cs []*http.Cookie) []Cookie {
  562. hcs := make([]Cookie, 0, len(cs))
  563. for _, c := range cs {
  564. var expires string
  565. if !c.Expires.IsZero() {
  566. expires = c.Expires.Format(time.RFC3339)
  567. }
  568. hcs = append(hcs, Cookie{
  569. Name: c.Name,
  570. Value: c.Value,
  571. Path: c.Path,
  572. Domain: c.Domain,
  573. HTTPOnly: c.HttpOnly,
  574. Secure: c.Secure,
  575. Expires: c.Expires,
  576. Expires8601: expires,
  577. })
  578. }
  579. return hcs
  580. }
  581. func headers(hs http.Header) []Header {
  582. hhs := make([]Header, 0, len(hs))
  583. for n, vs := range hs {
  584. for _, v := range vs {
  585. hhs = append(hhs, Header{
  586. Name: n,
  587. Value: v,
  588. })
  589. }
  590. }
  591. return hhs
  592. }
  593. func postData(req *http.Request, logBody bool) (*PostData, error) {
  594. // If the request has no body (no Content-Length and Transfer-Encoding isn't
  595. // chunked), skip the post data.
  596. if req.ContentLength <= 0 && len(req.TransferEncoding) == 0 {
  597. return nil, nil
  598. }
  599. ct := req.Header.Get("Content-Type")
  600. mt, ps, err := mime.ParseMediaType(ct)
  601. if err != nil {
  602. log.Errorf("har: cannot parse Content-Type header %q: %v", ct, err)
  603. mt = ct
  604. }
  605. pd := &PostData{
  606. MimeType: mt,
  607. Params: []Param{},
  608. }
  609. if !logBody {
  610. return pd, nil
  611. }
  612. mv := messageview.New()
  613. if err := mv.SnapshotRequest(req); err != nil {
  614. return nil, err
  615. }
  616. br, err := mv.BodyReader()
  617. if err != nil {
  618. return nil, err
  619. }
  620. switch mt {
  621. case "multipart/form-data":
  622. mpr := multipart.NewReader(br, ps["boundary"])
  623. for {
  624. p, err := mpr.NextPart()
  625. if err == io.EOF {
  626. break
  627. }
  628. if err != nil {
  629. return nil, err
  630. }
  631. defer p.Close()
  632. body, err := ioutil.ReadAll(p)
  633. if err != nil {
  634. return nil, err
  635. }
  636. pd.Params = append(pd.Params, Param{
  637. Name: p.FormName(),
  638. Filename: p.FileName(),
  639. ContentType: p.Header.Get("Content-Type"),
  640. Value: string(body),
  641. })
  642. }
  643. case "application/x-www-form-urlencoded":
  644. body, err := ioutil.ReadAll(br)
  645. if err != nil {
  646. return nil, err
  647. }
  648. vs, err := url.ParseQuery(string(body))
  649. if err != nil {
  650. return nil, err
  651. }
  652. for n, vs := range vs {
  653. for _, v := range vs {
  654. pd.Params = append(pd.Params, Param{
  655. Name: n,
  656. Value: v,
  657. })
  658. }
  659. }
  660. default:
  661. body, err := ioutil.ReadAll(br)
  662. if err != nil {
  663. return nil, err
  664. }
  665. pd.Text = string(body)
  666. }
  667. return pd, nil
  668. }