|
- // Copyright 2015 Google Inc. All rights reserved.
- //
- // Licensed under the Apache License, Version 2.0 (the "License");
- // you may not use this file except in compliance with the License.
- // You may obtain a copy of the License at
- //
- // http://www.apache.org/licenses/LICENSE-2.0
- //
- // Unless required by applicable law or agreed to in writing, software
- // distributed under the License is distributed on an "AS IS" BASIS,
- // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- // See the License for the specific language governing permissions and
- // limitations under the License.
-
- // Package har collects HTTP requests and responses and stores them in HAR format.
- //
- // For more information on HAR, see:
- // https://w3c.github.io/web-performance/specs/HAR/Overview.html
- package har
-
- import (
- "bytes"
- "encoding/json"
- "fmt"
- "io"
- "io/ioutil"
- "mime"
- "mime/multipart"
- "net/http"
- "net/url"
- "strings"
- "sync"
- "time"
- "unicode/utf8"
-
- "github.com/google/martian"
- "github.com/google/martian/log"
- "github.com/google/martian/messageview"
- "github.com/google/martian/proxyutil"
- )
-
- // Logger maintains request and response log entries.
- type Logger struct {
- bodyLogging func(*http.Response) bool
- postDataLogging func(*http.Request) bool
-
- creator *Creator
-
- mu sync.Mutex
- entries map[string]*Entry
- tail *Entry
- }
-
- // HAR is the top level object of a HAR log.
- type HAR struct {
- Log *Log `json:"log"`
- }
-
- // Log is the HAR HTTP request and response log.
- type Log struct {
- // Version number of the HAR format.
- Version string `json:"version"`
- // Creator holds information about the log creator application.
- Creator *Creator `json:"creator"`
- // Entries is a list containing requests and responses.
- Entries []*Entry `json:"entries"`
- }
-
- // Creator is the program responsible for generating the log. Martian, in this case.
- type Creator struct {
- // Name of the log creator application.
- Name string `json:"name"`
- // Version of the log creator application.
- Version string `json:"version"`
- }
-
- // Entry is a individual log entry for a request or response.
- type Entry struct {
- // ID is the unique ID for the entry.
- ID string `json:"_id"`
- // StartedDateTime is the date and time stamp of the request start (ISO 8601).
- StartedDateTime time.Time `json:"startedDateTime"`
- // Time is the total elapsed time of the request in milliseconds.
- Time int64 `json:"time"`
- // Request contains the detailed information about the request.
- Request *Request `json:"request"`
- // Response contains the detailed information about the response.
- Response *Response `json:"response,omitempty"`
- // Cache contains information about a request coming from browser cache.
- Cache *Cache `json:"cache"`
- // Timings describes various phases within request-response round trip. All
- // times are specified in milliseconds.
- Timings *Timings `json:"timings"`
- next *Entry
- }
-
- // Request holds data about an individual HTTP request.
- type Request struct {
- // Method is the request method (GET, POST, ...).
- Method string `json:"method"`
- // URL is the absolute URL of the request (fragments are not included).
- URL string `json:"url"`
- // HTTPVersion is the Request HTTP version (HTTP/1.1).
- HTTPVersion string `json:"httpVersion"`
- // Cookies is a list of cookies.
- Cookies []Cookie `json:"cookies"`
- // Headers is a list of headers.
- Headers []Header `json:"headers"`
- // QueryString is a list of query parameters.
- QueryString []QueryString `json:"queryString"`
- // PostData is the posted data information.
- PostData *PostData `json:"postData,omitempty"`
- // HeaderSize is the Total number of bytes from the start of the HTTP request
- // message until (and including) the double CLRF before the body. Set to -1
- // if the info is not available.
- HeadersSize int64 `json:"headersSize"`
- // BodySize is the size of the request body (POST data payload) in bytes. Set
- // to -1 if the info is not available.
- BodySize int64 `json:"bodySize"`
- }
-
- // Response holds data about an individual HTTP response.
- type Response struct {
- // Status is the response status code.
- Status int `json:"status"`
- // StatusText is the response status description.
- StatusText string `json:"statusText"`
- // HTTPVersion is the Response HTTP version (HTTP/1.1).
- HTTPVersion string `json:"httpVersion"`
- // Cookies is a list of cookies.
- Cookies []Cookie `json:"cookies"`
- // Headers is a list of headers.
- Headers []Header `json:"headers"`
- // Content contains the details of the response body.
- Content *Content `json:"content"`
- // RedirectURL is the target URL from the Location response header.
- RedirectURL string `json:"redirectURL"`
- // HeadersSize is the total number of bytes from the start of the HTTP
- // request message until (and including) the double CLRF before the body.
- // Set to -1 if the info is not available.
- HeadersSize int64 `json:"headersSize"`
- // BodySize is the size of the request body (POST data payload) in bytes. Set
- // to -1 if the info is not available.
- BodySize int64 `json:"bodySize"`
- }
-
- // Cache contains information about a request coming from browser cache.
- type Cache struct {
- // Has no fields as they are not supported, but HAR requires the "cache"
- // object to exist.
- }
-
- // Timings describes various phases within request-response round trip. All
- // times are specified in milliseconds
- type Timings struct {
- // Send is the time required to send HTTP request to the server.
- Send int64 `json:"send"`
- // Wait is the time spent waiting for a response from the server.
- Wait int64 `json:"wait"`
- // Receive is the time required to read entire response from server or cache.
- Receive int64 `json:"receive"`
- }
-
- // Cookie is the data about a cookie on a request or response.
- type Cookie struct {
- // Name is the cookie name.
- Name string `json:"name"`
- // Value is the cookie value.
- Value string `json:"value"`
- // Path is the path pertaining to the cookie.
- Path string `json:"path,omitempty"`
- // Domain is the host of the cookie.
- Domain string `json:"domain,omitempty"`
- // Expires contains cookie expiration time.
- Expires time.Time `json:"-"`
- // Expires8601 contains cookie expiration time in ISO 8601 format.
- Expires8601 string `json:"expires,omitempty"`
- // HTTPOnly is set to true if the cookie is HTTP only, false otherwise.
- HTTPOnly bool `json:"httpOnly,omitempty"`
- // Secure is set to true if the cookie was transmitted over SSL, false
- // otherwise.
- Secure bool `json:"secure,omitempty"`
- }
-
- // Header is an HTTP request or response header.
- type Header struct {
- // Name is the header name.
- Name string `json:"name"`
- // Value is the header value.
- Value string `json:"value"`
- }
-
- // QueryString is a query string parameter on a request.
- type QueryString struct {
- // Name is the query parameter name.
- Name string `json:"name"`
- // Value is the query parameter value.
- Value string `json:"value"`
- }
-
- // PostData describes posted data on a request.
- type PostData struct {
- // MimeType is the MIME type of the posted data.
- MimeType string `json:"mimeType"`
- // Params is a list of posted parameters (in case of URL encoded parameters).
- Params []Param `json:"params"`
- // Text contains the posted data. Although its type is string, it may contain
- // binary data.
- Text string `json:"text"`
- }
-
- // pdBinary is the JSON representation of binary PostData.
- type pdBinary struct {
- MimeType string `json:"mimeType"`
- // Params is a list of posted parameters (in case of URL encoded parameters).
- Params []Param `json:"params"`
- Text []byte `json:"text"`
- Encoding string `json:"encoding"`
- }
-
- // MarshalJSON returns a JSON representation of binary PostData.
- func (p *PostData) MarshalJSON() ([]byte, error) {
- if utf8.ValidString(p.Text) {
- type noMethod PostData // avoid infinite recursion
- return json.Marshal((*noMethod)(p))
- }
- return json.Marshal(pdBinary{
- MimeType: p.MimeType,
- Params: p.Params,
- Text: []byte(p.Text),
- Encoding: "base64",
- })
- }
-
- // UnmarshalJSON populates PostData based on the []byte representation of
- // the binary PostData.
- func (p *PostData) UnmarshalJSON(data []byte) error {
- if bytes.Equal(data, []byte("null")) { // conform to json.Unmarshaler spec
- return nil
- }
- var enc struct {
- Encoding string `json:"encoding"`
- }
- if err := json.Unmarshal(data, &enc); err != nil {
- return err
- }
- if enc.Encoding != "base64" {
- type noMethod PostData // avoid infinite recursion
- return json.Unmarshal(data, (*noMethod)(p))
- }
- var pb pdBinary
- if err := json.Unmarshal(data, &pb); err != nil {
- return err
- }
- p.MimeType = pb.MimeType
- p.Params = pb.Params
- p.Text = string(pb.Text)
- return nil
- }
-
- // Param describes an individual posted parameter.
- type Param struct {
- // Name of the posted parameter.
- Name string `json:"name"`
- // Value of the posted parameter.
- Value string `json:"value,omitempty"`
- // Filename of a posted file.
- Filename string `json:"fileName,omitempty"`
- // ContentType is the content type of a posted file.
- ContentType string `json:"contentType,omitempty"`
- }
-
- // Content describes details about response content.
- type Content struct {
- // Size is the length of the returned content in bytes. Should be equal to
- // response.bodySize if there is no compression and bigger when the content
- // has been compressed.
- Size int64 `json:"size"`
- // MimeType is the MIME type of the response text (value of the Content-Type
- // response header).
- MimeType string `json:"mimeType"`
- // Text contains the response body sent from the server or loaded from the
- // browser cache. This field is populated with textual content only. The text
- // field is either HTTP decoded text or a encoded (e.g. "base64")
- // representation of the response body. Leave out this field if the
- // information is not available.
- Text []byte `json:"text,omitempty"`
- // Encoding used for response text field e.g "base64". Leave out this field
- // if the text field is HTTP decoded (decompressed & unchunked), than
- // trans-coded from its original character set into UTF-8.
- Encoding string `json:"encoding,omitempty"`
- }
-
- // Option is a configurable setting for the logger.
- type Option func(l *Logger)
-
- // PostDataLogging returns an option that configures request post data logging.
- func PostDataLogging(enabled bool) Option {
- return func(l *Logger) {
- l.postDataLogging = func(*http.Request) bool {
- return enabled
- }
- }
- }
-
- // PostDataLoggingForContentTypes returns an option that logs request bodies based
- // on opting in to the Content-Type of the request.
- func PostDataLoggingForContentTypes(cts ...string) Option {
- return func(l *Logger) {
- l.postDataLogging = func(req *http.Request) bool {
- rct := req.Header.Get("Content-Type")
-
- for _, ct := range cts {
- if strings.HasPrefix(strings.ToLower(rct), strings.ToLower(ct)) {
- return true
- }
- }
-
- return false
- }
- }
- }
-
- // SkipPostDataLoggingForContentTypes returns an option that logs request bodies based
- // on opting out of the Content-Type of the request.
- func SkipPostDataLoggingForContentTypes(cts ...string) Option {
- return func(l *Logger) {
- l.postDataLogging = func(req *http.Request) bool {
- rct := req.Header.Get("Content-Type")
-
- for _, ct := range cts {
- if strings.HasPrefix(strings.ToLower(rct), strings.ToLower(ct)) {
- return false
- }
- }
-
- return true
- }
- }
- }
-
- // BodyLogging returns an option that configures response body logging.
- func BodyLogging(enabled bool) Option {
- return func(l *Logger) {
- l.bodyLogging = func(*http.Response) bool {
- return enabled
- }
- }
- }
-
- // BodyLoggingForContentTypes returns an option that logs response bodies based
- // on opting in to the Content-Type of the response.
- func BodyLoggingForContentTypes(cts ...string) Option {
- return func(l *Logger) {
- l.bodyLogging = func(res *http.Response) bool {
- rct := res.Header.Get("Content-Type")
-
- for _, ct := range cts {
- if strings.HasPrefix(strings.ToLower(rct), strings.ToLower(ct)) {
- return true
- }
- }
-
- return false
- }
- }
- }
-
- // SkipBodyLoggingForContentTypes returns an option that logs response bodies based
- // on opting out of the Content-Type of the response.
- func SkipBodyLoggingForContentTypes(cts ...string) Option {
- return func(l *Logger) {
- l.bodyLogging = func(res *http.Response) bool {
- rct := res.Header.Get("Content-Type")
-
- for _, ct := range cts {
- if strings.HasPrefix(strings.ToLower(rct), strings.ToLower(ct)) {
- return false
- }
- }
-
- return true
- }
- }
- }
-
- // NewLogger returns a HAR logger. The returned
- // logger logs all request post data and response bodies by default.
- func NewLogger() *Logger {
- l := &Logger{
- creator: &Creator{
- Name: "martian proxy",
- Version: "2.0.0",
- },
- entries: make(map[string]*Entry),
- }
- l.SetOption(BodyLogging(true))
- l.SetOption(PostDataLogging(true))
- return l
- }
-
- // SetOption sets configurable options on the logger.
- func (l *Logger) SetOption(opts ...Option) {
- for _, opt := range opts {
- opt(l)
- }
- }
-
- // ModifyRequest logs requests.
- func (l *Logger) ModifyRequest(req *http.Request) error {
- ctx := martian.NewContext(req)
- if ctx.SkippingLogging() {
- return nil
- }
-
- id := ctx.ID()
-
- return l.RecordRequest(id, req)
- }
-
- // RecordRequest logs the HTTP request with the given ID. The ID should be unique
- // per request/response pair.
- func (l *Logger) RecordRequest(id string, req *http.Request) error {
- hreq, err := NewRequest(req, l.postDataLogging(req))
- if err != nil {
- return err
- }
-
- entry := &Entry{
- ID: id,
- StartedDateTime: time.Now().UTC(),
- Request: hreq,
- Cache: &Cache{},
- Timings: &Timings{},
- }
-
- l.mu.Lock()
- defer l.mu.Unlock()
-
- if _, exists := l.entries[id]; exists {
- return fmt.Errorf("Duplicate request ID: %s", id)
- }
- l.entries[id] = entry
- if l.tail == nil {
- l.tail = entry
- }
- entry.next = l.tail.next
- l.tail.next = entry
- l.tail = entry
-
- return nil
- }
-
- // NewRequest constructs and returns a Request from req. If withBody is true,
- // req.Body is read to EOF and replaced with a copy in a bytes.Buffer. An error
- // is returned (and req.Body may be in an intermediate state) if an error is
- // returned from req.Body.Read.
- func NewRequest(req *http.Request, withBody bool) (*Request, error) {
- r := &Request{
- Method: req.Method,
- URL: req.URL.String(),
- HTTPVersion: req.Proto,
- HeadersSize: -1,
- BodySize: req.ContentLength,
- QueryString: []QueryString{},
- Headers: headers(proxyutil.RequestHeader(req).Map()),
- Cookies: cookies(req.Cookies()),
- }
-
- for n, vs := range req.URL.Query() {
- for _, v := range vs {
- r.QueryString = append(r.QueryString, QueryString{
- Name: n,
- Value: v,
- })
- }
- }
-
- pd, err := postData(req, withBody)
- if err != nil {
- return nil, err
- }
- r.PostData = pd
-
- return r, nil
- }
-
- // ModifyResponse logs responses.
- func (l *Logger) ModifyResponse(res *http.Response) error {
- ctx := martian.NewContext(res.Request)
- if ctx.SkippingLogging() {
- return nil
- }
- id := ctx.ID()
-
- return l.RecordResponse(id, res)
- }
-
- // RecordResponse logs an HTTP response, associating it with the previously-logged
- // HTTP request with the same ID.
- func (l *Logger) RecordResponse(id string, res *http.Response) error {
- hres, err := NewResponse(res, l.bodyLogging(res))
- if err != nil {
- return err
- }
-
- l.mu.Lock()
- defer l.mu.Unlock()
-
- if e, ok := l.entries[id]; ok {
- e.Response = hres
- e.Time = time.Since(e.StartedDateTime).Nanoseconds() / 1000000
- }
-
- return nil
- }
-
- // NewResponse constructs and returns a Response from resp. If withBody is true,
- // resp.Body is read to EOF and replaced with a copy in a bytes.Buffer. An error
- // is returned (and resp.Body may be in an intermediate state) if an error is
- // returned from resp.Body.Read.
- func NewResponse(res *http.Response, withBody bool) (*Response, error) {
- r := &Response{
- HTTPVersion: res.Proto,
- Status: res.StatusCode,
- StatusText: http.StatusText(res.StatusCode),
- HeadersSize: -1,
- BodySize: res.ContentLength,
- Headers: headers(proxyutil.ResponseHeader(res).Map()),
- Cookies: cookies(res.Cookies()),
- }
-
- if res.StatusCode >= 300 && res.StatusCode < 400 {
- r.RedirectURL = res.Header.Get("Location")
- }
-
- r.Content = &Content{
- Encoding: "base64",
- MimeType: res.Header.Get("Content-Type"),
- }
-
- if withBody {
- mv := messageview.New()
- if err := mv.SnapshotResponse(res); err != nil {
- return nil, err
- }
-
- br, err := mv.BodyReader(messageview.Decode())
- if err != nil {
- return nil, err
- }
-
- body, err := ioutil.ReadAll(br)
- if err != nil {
- return nil, err
- }
-
- r.Content.Text = body
- r.Content.Size = int64(len(body))
- }
- return r, nil
- }
-
- // Export returns the in-memory log.
- func (l *Logger) Export() *HAR {
- l.mu.Lock()
- defer l.mu.Unlock()
-
- es := make([]*Entry, 0, len(l.entries))
- curr := l.tail
- for curr != nil {
- curr = curr.next
- es = append(es, curr)
- if curr == l.tail {
- break
- }
- }
-
- return l.makeHAR(es)
- }
-
- // ExportAndReset returns the in-memory log for completed requests, clearing them.
- func (l *Logger) ExportAndReset() *HAR {
- l.mu.Lock()
- defer l.mu.Unlock()
-
- es := make([]*Entry, 0, len(l.entries))
- curr := l.tail
- prev := l.tail
- var first *Entry
- for curr != nil {
- curr = curr.next
- if curr.Response != nil {
- es = append(es, curr)
- delete(l.entries, curr.ID)
- } else {
- if first == nil {
- first = curr
- }
- prev.next = curr
- prev = curr
- }
- if curr == l.tail {
- break
- }
- }
- if len(l.entries) == 0 {
- l.tail = nil
- } else {
- l.tail = prev
- l.tail.next = first
- }
-
- return l.makeHAR(es)
- }
-
- func (l *Logger) makeHAR(es []*Entry) *HAR {
- return &HAR{
- Log: &Log{
- Version: "1.2",
- Creator: l.creator,
- Entries: es,
- },
- }
- }
-
- // Reset clears the in-memory log of entries.
- func (l *Logger) Reset() {
- l.mu.Lock()
- defer l.mu.Unlock()
-
- l.entries = make(map[string]*Entry)
- l.tail = nil
- }
-
- func cookies(cs []*http.Cookie) []Cookie {
- hcs := make([]Cookie, 0, len(cs))
-
- for _, c := range cs {
- var expires string
- if !c.Expires.IsZero() {
- expires = c.Expires.Format(time.RFC3339)
- }
-
- hcs = append(hcs, Cookie{
- Name: c.Name,
- Value: c.Value,
- Path: c.Path,
- Domain: c.Domain,
- HTTPOnly: c.HttpOnly,
- Secure: c.Secure,
- Expires: c.Expires,
- Expires8601: expires,
- })
- }
-
- return hcs
- }
-
- func headers(hs http.Header) []Header {
- hhs := make([]Header, 0, len(hs))
-
- for n, vs := range hs {
- for _, v := range vs {
- hhs = append(hhs, Header{
- Name: n,
- Value: v,
- })
- }
- }
-
- return hhs
- }
-
- func postData(req *http.Request, logBody bool) (*PostData, error) {
- // If the request has no body (no Content-Length and Transfer-Encoding isn't
- // chunked), skip the post data.
- if req.ContentLength <= 0 && len(req.TransferEncoding) == 0 {
- return nil, nil
- }
-
- ct := req.Header.Get("Content-Type")
- mt, ps, err := mime.ParseMediaType(ct)
- if err != nil {
- log.Errorf("har: cannot parse Content-Type header %q: %v", ct, err)
- mt = ct
- }
-
- pd := &PostData{
- MimeType: mt,
- Params: []Param{},
- }
-
- if !logBody {
- return pd, nil
- }
-
- mv := messageview.New()
- if err := mv.SnapshotRequest(req); err != nil {
- return nil, err
- }
-
- br, err := mv.BodyReader()
- if err != nil {
- return nil, err
- }
-
- switch mt {
- case "multipart/form-data":
- mpr := multipart.NewReader(br, ps["boundary"])
-
- for {
- p, err := mpr.NextPart()
- if err == io.EOF {
- break
- }
- if err != nil {
- return nil, err
- }
- defer p.Close()
-
- body, err := ioutil.ReadAll(p)
- if err != nil {
- return nil, err
- }
-
- pd.Params = append(pd.Params, Param{
- Name: p.FormName(),
- Filename: p.FileName(),
- ContentType: p.Header.Get("Content-Type"),
- Value: string(body),
- })
- }
- case "application/x-www-form-urlencoded":
- body, err := ioutil.ReadAll(br)
- if err != nil {
- return nil, err
- }
-
- vs, err := url.ParseQuery(string(body))
- if err != nil {
- return nil, err
- }
-
- for n, vs := range vs {
- for _, v := range vs {
- pd.Params = append(pd.Params, Param{
- Name: n,
- Value: v,
- })
- }
- }
- default:
- body, err := ioutil.ReadAll(br)
- if err != nil {
- return nil, err
- }
-
- pd.Text = string(body)
- }
-
- return pd, nil
- }
|