You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

396 lines
15 KiB

  1. // Copyright 2016 The Prometheus Authors
  2. // Licensed under the Apache License, Version 2.0 (the "License");
  3. // you may not use this file except in compliance with the License.
  4. // You may obtain a copy of the License at
  5. //
  6. // http://www.apache.org/licenses/LICENSE-2.0
  7. //
  8. // Unless required by applicable law or agreed to in writing, software
  9. // distributed under the License is distributed on an "AS IS" BASIS,
  10. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  11. // See the License for the specific language governing permissions and
  12. // limitations under the License.
  13. // Package promhttp provides tooling around HTTP servers and clients.
  14. //
  15. // First, the package allows the creation of http.Handler instances to expose
  16. // Prometheus metrics via HTTP. promhttp.Handler acts on the
  17. // prometheus.DefaultGatherer. With HandlerFor, you can create a handler for a
  18. // custom registry or anything that implements the Gatherer interface. It also
  19. // allows the creation of handlers that act differently on errors or allow to
  20. // log errors.
  21. //
  22. // Second, the package provides tooling to instrument instances of http.Handler
  23. // via middleware. Middleware wrappers follow the naming scheme
  24. // InstrumentHandlerX, where X describes the intended use of the middleware.
  25. // See each function's doc comment for specific details.
  26. //
  27. // Finally, the package allows for an http.RoundTripper to be instrumented via
  28. // middleware. Middleware wrappers follow the naming scheme
  29. // InstrumentRoundTripperX, where X describes the intended use of the
  30. // middleware. See each function's doc comment for specific details.
  31. package promhttp
  32. import (
  33. "compress/gzip"
  34. "errors"
  35. "fmt"
  36. "io"
  37. "net/http"
  38. "strings"
  39. "sync"
  40. "time"
  41. "github.com/prometheus/common/expfmt"
  42. "github.com/prometheus/client_golang/prometheus"
  43. )
  44. const (
  45. contentTypeHeader = "Content-Type"
  46. contentEncodingHeader = "Content-Encoding"
  47. acceptEncodingHeader = "Accept-Encoding"
  48. )
  49. var gzipPool = sync.Pool{
  50. New: func() interface{} {
  51. return gzip.NewWriter(nil)
  52. },
  53. }
  54. // Handler returns an http.Handler for the prometheus.DefaultGatherer, using
  55. // default HandlerOpts, i.e. it reports the first error as an HTTP error, it has
  56. // no error logging, and it applies compression if requested by the client.
  57. //
  58. // The returned http.Handler is already instrumented using the
  59. // InstrumentMetricHandler function and the prometheus.DefaultRegisterer. If you
  60. // create multiple http.Handlers by separate calls of the Handler function, the
  61. // metrics used for instrumentation will be shared between them, providing
  62. // global scrape counts.
  63. //
  64. // This function is meant to cover the bulk of basic use cases. If you are doing
  65. // anything that requires more customization (including using a non-default
  66. // Gatherer, different instrumentation, and non-default HandlerOpts), use the
  67. // HandlerFor function. See there for details.
  68. func Handler() http.Handler {
  69. return InstrumentMetricHandler(
  70. prometheus.DefaultRegisterer, HandlerFor(prometheus.DefaultGatherer, HandlerOpts{}),
  71. )
  72. }
  73. // HandlerFor returns an uninstrumented http.Handler for the provided
  74. // Gatherer. The behavior of the Handler is defined by the provided
  75. // HandlerOpts. Thus, HandlerFor is useful to create http.Handlers for custom
  76. // Gatherers, with non-default HandlerOpts, and/or with custom (or no)
  77. // instrumentation. Use the InstrumentMetricHandler function to apply the same
  78. // kind of instrumentation as it is used by the Handler function.
  79. func HandlerFor(reg prometheus.Gatherer, opts HandlerOpts) http.Handler {
  80. return HandlerForTransactional(prometheus.ToTransactionalGatherer(reg), opts)
  81. }
  82. // HandlerForTransactional is like HandlerFor, but it uses transactional gather, which
  83. // can safely change in-place returned *dto.MetricFamily before call to `Gather` and after
  84. // call to `done` of that `Gather`.
  85. func HandlerForTransactional(reg prometheus.TransactionalGatherer, opts HandlerOpts) http.Handler {
  86. var (
  87. inFlightSem chan struct{}
  88. errCnt = prometheus.NewCounterVec(
  89. prometheus.CounterOpts{
  90. Name: "promhttp_metric_handler_errors_total",
  91. Help: "Total number of internal errors encountered by the promhttp metric handler.",
  92. },
  93. []string{"cause"},
  94. )
  95. )
  96. if opts.MaxRequestsInFlight > 0 {
  97. inFlightSem = make(chan struct{}, opts.MaxRequestsInFlight)
  98. }
  99. if opts.Registry != nil {
  100. // Initialize all possibilities that can occur below.
  101. errCnt.WithLabelValues("gathering")
  102. errCnt.WithLabelValues("encoding")
  103. if err := opts.Registry.Register(errCnt); err != nil {
  104. are := &prometheus.AlreadyRegisteredError{}
  105. if errors.As(err, are) {
  106. errCnt = are.ExistingCollector.(*prometheus.CounterVec)
  107. } else {
  108. panic(err)
  109. }
  110. }
  111. }
  112. h := http.HandlerFunc(func(rsp http.ResponseWriter, req *http.Request) {
  113. if inFlightSem != nil {
  114. select {
  115. case inFlightSem <- struct{}{}: // All good, carry on.
  116. defer func() { <-inFlightSem }()
  117. default:
  118. http.Error(rsp, fmt.Sprintf(
  119. "Limit of concurrent requests reached (%d), try again later.", opts.MaxRequestsInFlight,
  120. ), http.StatusServiceUnavailable)
  121. return
  122. }
  123. }
  124. mfs, done, err := reg.Gather()
  125. defer done()
  126. if err != nil {
  127. if opts.ErrorLog != nil {
  128. opts.ErrorLog.Println("error gathering metrics:", err)
  129. }
  130. errCnt.WithLabelValues("gathering").Inc()
  131. switch opts.ErrorHandling {
  132. case PanicOnError:
  133. panic(err)
  134. case ContinueOnError:
  135. if len(mfs) == 0 {
  136. // Still report the error if no metrics have been gathered.
  137. httpError(rsp, err)
  138. return
  139. }
  140. case HTTPErrorOnError:
  141. httpError(rsp, err)
  142. return
  143. }
  144. }
  145. var contentType expfmt.Format
  146. if opts.EnableOpenMetrics {
  147. contentType = expfmt.NegotiateIncludingOpenMetrics(req.Header)
  148. } else {
  149. contentType = expfmt.Negotiate(req.Header)
  150. }
  151. header := rsp.Header()
  152. header.Set(contentTypeHeader, string(contentType))
  153. w := io.Writer(rsp)
  154. if !opts.DisableCompression && gzipAccepted(req.Header) {
  155. header.Set(contentEncodingHeader, "gzip")
  156. gz := gzipPool.Get().(*gzip.Writer)
  157. defer gzipPool.Put(gz)
  158. gz.Reset(w)
  159. defer gz.Close()
  160. w = gz
  161. }
  162. enc := expfmt.NewEncoder(w, contentType)
  163. // handleError handles the error according to opts.ErrorHandling
  164. // and returns true if we have to abort after the handling.
  165. handleError := func(err error) bool {
  166. if err == nil {
  167. return false
  168. }
  169. if opts.ErrorLog != nil {
  170. opts.ErrorLog.Println("error encoding and sending metric family:", err)
  171. }
  172. errCnt.WithLabelValues("encoding").Inc()
  173. switch opts.ErrorHandling {
  174. case PanicOnError:
  175. panic(err)
  176. case HTTPErrorOnError:
  177. // We cannot really send an HTTP error at this
  178. // point because we most likely have written
  179. // something to rsp already. But at least we can
  180. // stop sending.
  181. return true
  182. }
  183. // Do nothing in all other cases, including ContinueOnError.
  184. return false
  185. }
  186. for _, mf := range mfs {
  187. if handleError(enc.Encode(mf)) {
  188. return
  189. }
  190. }
  191. if closer, ok := enc.(expfmt.Closer); ok {
  192. // This in particular takes care of the final "# EOF\n" line for OpenMetrics.
  193. if handleError(closer.Close()) {
  194. return
  195. }
  196. }
  197. })
  198. if opts.Timeout <= 0 {
  199. return h
  200. }
  201. return http.TimeoutHandler(h, opts.Timeout, fmt.Sprintf(
  202. "Exceeded configured timeout of %v.\n",
  203. opts.Timeout,
  204. ))
  205. }
  206. // InstrumentMetricHandler is usually used with an http.Handler returned by the
  207. // HandlerFor function. It instruments the provided http.Handler with two
  208. // metrics: A counter vector "promhttp_metric_handler_requests_total" to count
  209. // scrapes partitioned by HTTP status code, and a gauge
  210. // "promhttp_metric_handler_requests_in_flight" to track the number of
  211. // simultaneous scrapes. This function idempotently registers collectors for
  212. // both metrics with the provided Registerer. It panics if the registration
  213. // fails. The provided metrics are useful to see how many scrapes hit the
  214. // monitored target (which could be from different Prometheus servers or other
  215. // scrapers), and how often they overlap (which would result in more than one
  216. // scrape in flight at the same time). Note that the scrapes-in-flight gauge
  217. // will contain the scrape by which it is exposed, while the scrape counter will
  218. // only get incremented after the scrape is complete (as only then the status
  219. // code is known). For tracking scrape durations, use the
  220. // "scrape_duration_seconds" gauge created by the Prometheus server upon each
  221. // scrape.
  222. func InstrumentMetricHandler(reg prometheus.Registerer, handler http.Handler) http.Handler {
  223. cnt := prometheus.NewCounterVec(
  224. prometheus.CounterOpts{
  225. Name: "promhttp_metric_handler_requests_total",
  226. Help: "Total number of scrapes by HTTP status code.",
  227. },
  228. []string{"code"},
  229. )
  230. // Initialize the most likely HTTP status codes.
  231. cnt.WithLabelValues("200")
  232. cnt.WithLabelValues("500")
  233. cnt.WithLabelValues("503")
  234. if err := reg.Register(cnt); err != nil {
  235. are := &prometheus.AlreadyRegisteredError{}
  236. if errors.As(err, are) {
  237. cnt = are.ExistingCollector.(*prometheus.CounterVec)
  238. } else {
  239. panic(err)
  240. }
  241. }
  242. gge := prometheus.NewGauge(prometheus.GaugeOpts{
  243. Name: "promhttp_metric_handler_requests_in_flight",
  244. Help: "Current number of scrapes being served.",
  245. })
  246. if err := reg.Register(gge); err != nil {
  247. are := &prometheus.AlreadyRegisteredError{}
  248. if errors.As(err, are) {
  249. gge = are.ExistingCollector.(prometheus.Gauge)
  250. } else {
  251. panic(err)
  252. }
  253. }
  254. return InstrumentHandlerCounter(cnt, InstrumentHandlerInFlight(gge, handler))
  255. }
  256. // HandlerErrorHandling defines how a Handler serving metrics will handle
  257. // errors.
  258. type HandlerErrorHandling int
  259. // These constants cause handlers serving metrics to behave as described if
  260. // errors are encountered.
  261. const (
  262. // Serve an HTTP status code 500 upon the first error
  263. // encountered. Report the error message in the body. Note that HTTP
  264. // errors cannot be served anymore once the beginning of a regular
  265. // payload has been sent. Thus, in the (unlikely) case that encoding the
  266. // payload into the negotiated wire format fails, serving the response
  267. // will simply be aborted. Set an ErrorLog in HandlerOpts to detect
  268. // those errors.
  269. HTTPErrorOnError HandlerErrorHandling = iota
  270. // Ignore errors and try to serve as many metrics as possible. However,
  271. // if no metrics can be served, serve an HTTP status code 500 and the
  272. // last error message in the body. Only use this in deliberate "best
  273. // effort" metrics collection scenarios. In this case, it is highly
  274. // recommended to provide other means of detecting errors: By setting an
  275. // ErrorLog in HandlerOpts, the errors are logged. By providing a
  276. // Registry in HandlerOpts, the exposed metrics include an error counter
  277. // "promhttp_metric_handler_errors_total", which can be used for
  278. // alerts.
  279. ContinueOnError
  280. // Panic upon the first error encountered (useful for "crash only" apps).
  281. PanicOnError
  282. )
  283. // Logger is the minimal interface HandlerOpts needs for logging. Note that
  284. // log.Logger from the standard library implements this interface, and it is
  285. // easy to implement by custom loggers, if they don't do so already anyway.
  286. type Logger interface {
  287. Println(v ...interface{})
  288. }
  289. // HandlerOpts specifies options how to serve metrics via an http.Handler. The
  290. // zero value of HandlerOpts is a reasonable default.
  291. type HandlerOpts struct {
  292. // ErrorLog specifies an optional Logger for errors collecting and
  293. // serving metrics. If nil, errors are not logged at all. Note that the
  294. // type of a reported error is often prometheus.MultiError, which
  295. // formats into a multi-line error string. If you want to avoid the
  296. // latter, create a Logger implementation that detects a
  297. // prometheus.MultiError and formats the contained errors into one line.
  298. ErrorLog Logger
  299. // ErrorHandling defines how errors are handled. Note that errors are
  300. // logged regardless of the configured ErrorHandling provided ErrorLog
  301. // is not nil.
  302. ErrorHandling HandlerErrorHandling
  303. // If Registry is not nil, it is used to register a metric
  304. // "promhttp_metric_handler_errors_total", partitioned by "cause". A
  305. // failed registration causes a panic. Note that this error counter is
  306. // different from the instrumentation you get from the various
  307. // InstrumentHandler... helpers. It counts errors that don't necessarily
  308. // result in a non-2xx HTTP status code. There are two typical cases:
  309. // (1) Encoding errors that only happen after streaming of the HTTP body
  310. // has already started (and the status code 200 has been sent). This
  311. // should only happen with custom collectors. (2) Collection errors with
  312. // no effect on the HTTP status code because ErrorHandling is set to
  313. // ContinueOnError.
  314. Registry prometheus.Registerer
  315. // If DisableCompression is true, the handler will never compress the
  316. // response, even if requested by the client.
  317. DisableCompression bool
  318. // The number of concurrent HTTP requests is limited to
  319. // MaxRequestsInFlight. Additional requests are responded to with 503
  320. // Service Unavailable and a suitable message in the body. If
  321. // MaxRequestsInFlight is 0 or negative, no limit is applied.
  322. MaxRequestsInFlight int
  323. // If handling a request takes longer than Timeout, it is responded to
  324. // with 503 ServiceUnavailable and a suitable Message. No timeout is
  325. // applied if Timeout is 0 or negative. Note that with the current
  326. // implementation, reaching the timeout simply ends the HTTP requests as
  327. // described above (and even that only if sending of the body hasn't
  328. // started yet), while the bulk work of gathering all the metrics keeps
  329. // running in the background (with the eventual result to be thrown
  330. // away). Until the implementation is improved, it is recommended to
  331. // implement a separate timeout in potentially slow Collectors.
  332. Timeout time.Duration
  333. // If true, the experimental OpenMetrics encoding is added to the
  334. // possible options during content negotiation. Note that Prometheus
  335. // 2.5.0+ will negotiate OpenMetrics as first priority. OpenMetrics is
  336. // the only way to transmit exemplars. However, the move to OpenMetrics
  337. // is not completely transparent. Most notably, the values of "quantile"
  338. // labels of Summaries and "le" labels of Histograms are formatted with
  339. // a trailing ".0" if they would otherwise look like integer numbers
  340. // (which changes the identity of the resulting series on the Prometheus
  341. // server).
  342. EnableOpenMetrics bool
  343. }
  344. // gzipAccepted returns whether the client will accept gzip-encoded content.
  345. func gzipAccepted(header http.Header) bool {
  346. a := header.Get(acceptEncodingHeader)
  347. parts := strings.Split(a, ",")
  348. for _, part := range parts {
  349. part = strings.TrimSpace(part)
  350. if part == "gzip" || strings.HasPrefix(part, "gzip;") {
  351. return true
  352. }
  353. }
  354. return false
  355. }
  356. // httpError removes any content-encoding header and then calls http.Error with
  357. // the provided error and http.StatusInternalServerError. Error contents is
  358. // supposed to be uncompressed plain text. Same as with a plain http.Error, this
  359. // must not be called if the header or any payload has already been sent.
  360. func httpError(rsp http.ResponseWriter, err error) {
  361. rsp.Header().Del(contentEncodingHeader)
  362. http.Error(
  363. rsp,
  364. "An error has occurred while serving metrics:\n\n"+err.Error(),
  365. http.StatusInternalServerError,
  366. )
  367. }