Du kan inte välja fler än 25 ämnen Ämnen måste starta med en bokstav eller siffra, kan innehålla bindestreck ('-') och vara max 35 tecken långa.

384 rader
14 KiB

  1. // Copyright 2016 The Prometheus Authors
  2. // Licensed under the Apache License, Version 2.0 (the "License");
  3. // you may not use this file except in compliance with the License.
  4. // You may obtain a copy of the License at
  5. //
  6. // http://www.apache.org/licenses/LICENSE-2.0
  7. //
  8. // Unless required by applicable law or agreed to in writing, software
  9. // distributed under the License is distributed on an "AS IS" BASIS,
  10. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  11. // See the License for the specific language governing permissions and
  12. // limitations under the License.
  13. // Package promhttp provides tooling around HTTP servers and clients.
  14. //
  15. // First, the package allows the creation of http.Handler instances to expose
  16. // Prometheus metrics via HTTP. promhttp.Handler acts on the
  17. // prometheus.DefaultGatherer. With HandlerFor, you can create a handler for a
  18. // custom registry or anything that implements the Gatherer interface. It also
  19. // allows the creation of handlers that act differently on errors or allow to
  20. // log errors.
  21. //
  22. // Second, the package provides tooling to instrument instances of http.Handler
  23. // via middleware. Middleware wrappers follow the naming scheme
  24. // InstrumentHandlerX, where X describes the intended use of the middleware.
  25. // See each function's doc comment for specific details.
  26. //
  27. // Finally, the package allows for an http.RoundTripper to be instrumented via
  28. // middleware. Middleware wrappers follow the naming scheme
  29. // InstrumentRoundTripperX, where X describes the intended use of the
  30. // middleware. See each function's doc comment for specific details.
  31. package promhttp
  32. import (
  33. "compress/gzip"
  34. "fmt"
  35. "io"
  36. "net/http"
  37. "strings"
  38. "sync"
  39. "time"
  40. "github.com/prometheus/common/expfmt"
  41. "github.com/prometheus/client_golang/prometheus"
  42. )
  43. const (
  44. contentTypeHeader = "Content-Type"
  45. contentEncodingHeader = "Content-Encoding"
  46. acceptEncodingHeader = "Accept-Encoding"
  47. )
  48. var gzipPool = sync.Pool{
  49. New: func() interface{} {
  50. return gzip.NewWriter(nil)
  51. },
  52. }
  53. // Handler returns an http.Handler for the prometheus.DefaultGatherer, using
  54. // default HandlerOpts, i.e. it reports the first error as an HTTP error, it has
  55. // no error logging, and it applies compression if requested by the client.
  56. //
  57. // The returned http.Handler is already instrumented using the
  58. // InstrumentMetricHandler function and the prometheus.DefaultRegisterer. If you
  59. // create multiple http.Handlers by separate calls of the Handler function, the
  60. // metrics used for instrumentation will be shared between them, providing
  61. // global scrape counts.
  62. //
  63. // This function is meant to cover the bulk of basic use cases. If you are doing
  64. // anything that requires more customization (including using a non-default
  65. // Gatherer, different instrumentation, and non-default HandlerOpts), use the
  66. // HandlerFor function. See there for details.
  67. func Handler() http.Handler {
  68. return InstrumentMetricHandler(
  69. prometheus.DefaultRegisterer, HandlerFor(prometheus.DefaultGatherer, HandlerOpts{}),
  70. )
  71. }
  72. // HandlerFor returns an uninstrumented http.Handler for the provided
  73. // Gatherer. The behavior of the Handler is defined by the provided
  74. // HandlerOpts. Thus, HandlerFor is useful to create http.Handlers for custom
  75. // Gatherers, with non-default HandlerOpts, and/or with custom (or no)
  76. // instrumentation. Use the InstrumentMetricHandler function to apply the same
  77. // kind of instrumentation as it is used by the Handler function.
  78. func HandlerFor(reg prometheus.Gatherer, opts HandlerOpts) http.Handler {
  79. var (
  80. inFlightSem chan struct{}
  81. errCnt = prometheus.NewCounterVec(
  82. prometheus.CounterOpts{
  83. Name: "promhttp_metric_handler_errors_total",
  84. Help: "Total number of internal errors encountered by the promhttp metric handler.",
  85. },
  86. []string{"cause"},
  87. )
  88. )
  89. if opts.MaxRequestsInFlight > 0 {
  90. inFlightSem = make(chan struct{}, opts.MaxRequestsInFlight)
  91. }
  92. if opts.Registry != nil {
  93. // Initialize all possibilities that can occur below.
  94. errCnt.WithLabelValues("gathering")
  95. errCnt.WithLabelValues("encoding")
  96. if err := opts.Registry.Register(errCnt); err != nil {
  97. if are, ok := err.(prometheus.AlreadyRegisteredError); ok {
  98. errCnt = are.ExistingCollector.(*prometheus.CounterVec)
  99. } else {
  100. panic(err)
  101. }
  102. }
  103. }
  104. h := http.HandlerFunc(func(rsp http.ResponseWriter, req *http.Request) {
  105. if inFlightSem != nil {
  106. select {
  107. case inFlightSem <- struct{}{}: // All good, carry on.
  108. defer func() { <-inFlightSem }()
  109. default:
  110. http.Error(rsp, fmt.Sprintf(
  111. "Limit of concurrent requests reached (%d), try again later.", opts.MaxRequestsInFlight,
  112. ), http.StatusServiceUnavailable)
  113. return
  114. }
  115. }
  116. mfs, err := reg.Gather()
  117. if err != nil {
  118. if opts.ErrorLog != nil {
  119. opts.ErrorLog.Println("error gathering metrics:", err)
  120. }
  121. errCnt.WithLabelValues("gathering").Inc()
  122. switch opts.ErrorHandling {
  123. case PanicOnError:
  124. panic(err)
  125. case ContinueOnError:
  126. if len(mfs) == 0 {
  127. // Still report the error if no metrics have been gathered.
  128. httpError(rsp, err)
  129. return
  130. }
  131. case HTTPErrorOnError:
  132. httpError(rsp, err)
  133. return
  134. }
  135. }
  136. var contentType expfmt.Format
  137. if opts.EnableOpenMetrics {
  138. contentType = expfmt.NegotiateIncludingOpenMetrics(req.Header)
  139. } else {
  140. contentType = expfmt.Negotiate(req.Header)
  141. }
  142. header := rsp.Header()
  143. header.Set(contentTypeHeader, string(contentType))
  144. w := io.Writer(rsp)
  145. if !opts.DisableCompression && gzipAccepted(req.Header) {
  146. header.Set(contentEncodingHeader, "gzip")
  147. gz := gzipPool.Get().(*gzip.Writer)
  148. defer gzipPool.Put(gz)
  149. gz.Reset(w)
  150. defer gz.Close()
  151. w = gz
  152. }
  153. enc := expfmt.NewEncoder(w, contentType)
  154. // handleError handles the error according to opts.ErrorHandling
  155. // and returns true if we have to abort after the handling.
  156. handleError := func(err error) bool {
  157. if err == nil {
  158. return false
  159. }
  160. if opts.ErrorLog != nil {
  161. opts.ErrorLog.Println("error encoding and sending metric family:", err)
  162. }
  163. errCnt.WithLabelValues("encoding").Inc()
  164. switch opts.ErrorHandling {
  165. case PanicOnError:
  166. panic(err)
  167. case HTTPErrorOnError:
  168. // We cannot really send an HTTP error at this
  169. // point because we most likely have written
  170. // something to rsp already. But at least we can
  171. // stop sending.
  172. return true
  173. }
  174. // Do nothing in all other cases, including ContinueOnError.
  175. return false
  176. }
  177. for _, mf := range mfs {
  178. if handleError(enc.Encode(mf)) {
  179. return
  180. }
  181. }
  182. if closer, ok := enc.(expfmt.Closer); ok {
  183. // This in particular takes care of the final "# EOF\n" line for OpenMetrics.
  184. if handleError(closer.Close()) {
  185. return
  186. }
  187. }
  188. })
  189. if opts.Timeout <= 0 {
  190. return h
  191. }
  192. return http.TimeoutHandler(h, opts.Timeout, fmt.Sprintf(
  193. "Exceeded configured timeout of %v.\n",
  194. opts.Timeout,
  195. ))
  196. }
  197. // InstrumentMetricHandler is usually used with an http.Handler returned by the
  198. // HandlerFor function. It instruments the provided http.Handler with two
  199. // metrics: A counter vector "promhttp_metric_handler_requests_total" to count
  200. // scrapes partitioned by HTTP status code, and a gauge
  201. // "promhttp_metric_handler_requests_in_flight" to track the number of
  202. // simultaneous scrapes. This function idempotently registers collectors for
  203. // both metrics with the provided Registerer. It panics if the registration
  204. // fails. The provided metrics are useful to see how many scrapes hit the
  205. // monitored target (which could be from different Prometheus servers or other
  206. // scrapers), and how often they overlap (which would result in more than one
  207. // scrape in flight at the same time). Note that the scrapes-in-flight gauge
  208. // will contain the scrape by which it is exposed, while the scrape counter will
  209. // only get incremented after the scrape is complete (as only then the status
  210. // code is known). For tracking scrape durations, use the
  211. // "scrape_duration_seconds" gauge created by the Prometheus server upon each
  212. // scrape.
  213. func InstrumentMetricHandler(reg prometheus.Registerer, handler http.Handler) http.Handler {
  214. cnt := prometheus.NewCounterVec(
  215. prometheus.CounterOpts{
  216. Name: "promhttp_metric_handler_requests_total",
  217. Help: "Total number of scrapes by HTTP status code.",
  218. },
  219. []string{"code"},
  220. )
  221. // Initialize the most likely HTTP status codes.
  222. cnt.WithLabelValues("200")
  223. cnt.WithLabelValues("500")
  224. cnt.WithLabelValues("503")
  225. if err := reg.Register(cnt); err != nil {
  226. if are, ok := err.(prometheus.AlreadyRegisteredError); ok {
  227. cnt = are.ExistingCollector.(*prometheus.CounterVec)
  228. } else {
  229. panic(err)
  230. }
  231. }
  232. gge := prometheus.NewGauge(prometheus.GaugeOpts{
  233. Name: "promhttp_metric_handler_requests_in_flight",
  234. Help: "Current number of scrapes being served.",
  235. })
  236. if err := reg.Register(gge); err != nil {
  237. if are, ok := err.(prometheus.AlreadyRegisteredError); ok {
  238. gge = are.ExistingCollector.(prometheus.Gauge)
  239. } else {
  240. panic(err)
  241. }
  242. }
  243. return InstrumentHandlerCounter(cnt, InstrumentHandlerInFlight(gge, handler))
  244. }
  245. // HandlerErrorHandling defines how a Handler serving metrics will handle
  246. // errors.
  247. type HandlerErrorHandling int
  248. // These constants cause handlers serving metrics to behave as described if
  249. // errors are encountered.
  250. const (
  251. // Serve an HTTP status code 500 upon the first error
  252. // encountered. Report the error message in the body. Note that HTTP
  253. // errors cannot be served anymore once the beginning of a regular
  254. // payload has been sent. Thus, in the (unlikely) case that encoding the
  255. // payload into the negotiated wire format fails, serving the response
  256. // will simply be aborted. Set an ErrorLog in HandlerOpts to detect
  257. // those errors.
  258. HTTPErrorOnError HandlerErrorHandling = iota
  259. // Ignore errors and try to serve as many metrics as possible. However,
  260. // if no metrics can be served, serve an HTTP status code 500 and the
  261. // last error message in the body. Only use this in deliberate "best
  262. // effort" metrics collection scenarios. In this case, it is highly
  263. // recommended to provide other means of detecting errors: By setting an
  264. // ErrorLog in HandlerOpts, the errors are logged. By providing a
  265. // Registry in HandlerOpts, the exposed metrics include an error counter
  266. // "promhttp_metric_handler_errors_total", which can be used for
  267. // alerts.
  268. ContinueOnError
  269. // Panic upon the first error encountered (useful for "crash only" apps).
  270. PanicOnError
  271. )
  272. // Logger is the minimal interface HandlerOpts needs for logging. Note that
  273. // log.Logger from the standard library implements this interface, and it is
  274. // easy to implement by custom loggers, if they don't do so already anyway.
  275. type Logger interface {
  276. Println(v ...interface{})
  277. }
  278. // HandlerOpts specifies options how to serve metrics via an http.Handler. The
  279. // zero value of HandlerOpts is a reasonable default.
  280. type HandlerOpts struct {
  281. // ErrorLog specifies an optional Logger for errors collecting and
  282. // serving metrics. If nil, errors are not logged at all. Note that the
  283. // type of a reported error is often prometheus.MultiError, which
  284. // formats into a multi-line error string. If you want to avoid the
  285. // latter, create a Logger implementation that detects a
  286. // prometheus.MultiError and formats the contained errors into one line.
  287. ErrorLog Logger
  288. // ErrorHandling defines how errors are handled. Note that errors are
  289. // logged regardless of the configured ErrorHandling provided ErrorLog
  290. // is not nil.
  291. ErrorHandling HandlerErrorHandling
  292. // If Registry is not nil, it is used to register a metric
  293. // "promhttp_metric_handler_errors_total", partitioned by "cause". A
  294. // failed registration causes a panic. Note that this error counter is
  295. // different from the instrumentation you get from the various
  296. // InstrumentHandler... helpers. It counts errors that don't necessarily
  297. // result in a non-2xx HTTP status code. There are two typical cases:
  298. // (1) Encoding errors that only happen after streaming of the HTTP body
  299. // has already started (and the status code 200 has been sent). This
  300. // should only happen with custom collectors. (2) Collection errors with
  301. // no effect on the HTTP status code because ErrorHandling is set to
  302. // ContinueOnError.
  303. Registry prometheus.Registerer
  304. // If DisableCompression is true, the handler will never compress the
  305. // response, even if requested by the client.
  306. DisableCompression bool
  307. // The number of concurrent HTTP requests is limited to
  308. // MaxRequestsInFlight. Additional requests are responded to with 503
  309. // Service Unavailable and a suitable message in the body. If
  310. // MaxRequestsInFlight is 0 or negative, no limit is applied.
  311. MaxRequestsInFlight int
  312. // If handling a request takes longer than Timeout, it is responded to
  313. // with 503 ServiceUnavailable and a suitable Message. No timeout is
  314. // applied if Timeout is 0 or negative. Note that with the current
  315. // implementation, reaching the timeout simply ends the HTTP requests as
  316. // described above (and even that only if sending of the body hasn't
  317. // started yet), while the bulk work of gathering all the metrics keeps
  318. // running in the background (with the eventual result to be thrown
  319. // away). Until the implementation is improved, it is recommended to
  320. // implement a separate timeout in potentially slow Collectors.
  321. Timeout time.Duration
  322. // If true, the experimental OpenMetrics encoding is added to the
  323. // possible options during content negotiation. Note that Prometheus
  324. // 2.5.0+ will negotiate OpenMetrics as first priority. OpenMetrics is
  325. // the only way to transmit exemplars. However, the move to OpenMetrics
  326. // is not completely transparent. Most notably, the values of "quantile"
  327. // labels of Summaries and "le" labels of Histograms are formatted with
  328. // a trailing ".0" if they would otherwise look like integer numbers
  329. // (which changes the identity of the resulting series on the Prometheus
  330. // server).
  331. EnableOpenMetrics bool
  332. }
  333. // gzipAccepted returns whether the client will accept gzip-encoded content.
  334. func gzipAccepted(header http.Header) bool {
  335. a := header.Get(acceptEncodingHeader)
  336. parts := strings.Split(a, ",")
  337. for _, part := range parts {
  338. part = strings.TrimSpace(part)
  339. if part == "gzip" || strings.HasPrefix(part, "gzip;") {
  340. return true
  341. }
  342. }
  343. return false
  344. }
  345. // httpError removes any content-encoding header and then calls http.Error with
  346. // the provided error and http.StatusInternalServerError. Error contents is
  347. // supposed to be uncompressed plain text. Same as with a plain http.Error, this
  348. // must not be called if the header or any payload has already been sent.
  349. func httpError(rsp http.ResponseWriter, err error) {
  350. rsp.Header().Del(contentEncodingHeader)
  351. http.Error(
  352. rsp,
  353. "An error has occurred while serving metrics:\n\n"+err.Error(),
  354. http.StatusInternalServerError,
  355. )
  356. }