You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

951 rivejä
31 KiB

  1. // Copyright 2014 The Prometheus Authors
  2. // Licensed under the Apache License, Version 2.0 (the "License");
  3. // you may not use this file except in compliance with the License.
  4. // You may obtain a copy of the License at
  5. //
  6. // http://www.apache.org/licenses/LICENSE-2.0
  7. //
  8. // Unless required by applicable law or agreed to in writing, software
  9. // distributed under the License is distributed on an "AS IS" BASIS,
  10. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  11. // See the License for the specific language governing permissions and
  12. // limitations under the License.
  13. package prometheus
  14. import (
  15. "bytes"
  16. "fmt"
  17. "io/ioutil"
  18. "os"
  19. "path/filepath"
  20. "runtime"
  21. "sort"
  22. "strings"
  23. "sync"
  24. "unicode/utf8"
  25. "github.com/cespare/xxhash/v2"
  26. //nolint:staticcheck // Ignore SA1019. Need to keep deprecated package for compatibility.
  27. "github.com/golang/protobuf/proto"
  28. "github.com/prometheus/common/expfmt"
  29. dto "github.com/prometheus/client_model/go"
  30. "github.com/prometheus/client_golang/prometheus/internal"
  31. )
  32. const (
  33. // Capacity for the channel to collect metrics and descriptors.
  34. capMetricChan = 1000
  35. capDescChan = 10
  36. )
  37. // DefaultRegisterer and DefaultGatherer are the implementations of the
  38. // Registerer and Gatherer interface a number of convenience functions in this
  39. // package act on. Initially, both variables point to the same Registry, which
  40. // has a process collector (currently on Linux only, see NewProcessCollector)
  41. // and a Go collector (see NewGoCollector, in particular the note about
  42. // stop-the-world implication with Go versions older than 1.9) already
  43. // registered. This approach to keep default instances as global state mirrors
  44. // the approach of other packages in the Go standard library. Note that there
  45. // are caveats. Change the variables with caution and only if you understand the
  46. // consequences. Users who want to avoid global state altogether should not use
  47. // the convenience functions and act on custom instances instead.
  48. var (
  49. defaultRegistry = NewRegistry()
  50. DefaultRegisterer Registerer = defaultRegistry
  51. DefaultGatherer Gatherer = defaultRegistry
  52. )
  53. func init() {
  54. MustRegister(NewProcessCollector(ProcessCollectorOpts{}))
  55. MustRegister(NewGoCollector())
  56. }
  57. // NewRegistry creates a new vanilla Registry without any Collectors
  58. // pre-registered.
  59. func NewRegistry() *Registry {
  60. return &Registry{
  61. collectorsByID: map[uint64]Collector{},
  62. descIDs: map[uint64]struct{}{},
  63. dimHashesByName: map[string]uint64{},
  64. }
  65. }
  66. // NewPedanticRegistry returns a registry that checks during collection if each
  67. // collected Metric is consistent with its reported Desc, and if the Desc has
  68. // actually been registered with the registry. Unchecked Collectors (those whose
  69. // Describe method does not yield any descriptors) are excluded from the check.
  70. //
  71. // Usually, a Registry will be happy as long as the union of all collected
  72. // Metrics is consistent and valid even if some metrics are not consistent with
  73. // their own Desc or a Desc provided by their registered Collector. Well-behaved
  74. // Collectors and Metrics will only provide consistent Descs. This Registry is
  75. // useful to test the implementation of Collectors and Metrics.
  76. func NewPedanticRegistry() *Registry {
  77. r := NewRegistry()
  78. r.pedanticChecksEnabled = true
  79. return r
  80. }
  81. // Registerer is the interface for the part of a registry in charge of
  82. // registering and unregistering. Users of custom registries should use
  83. // Registerer as type for registration purposes (rather than the Registry type
  84. // directly). In that way, they are free to use custom Registerer implementation
  85. // (e.g. for testing purposes).
  86. type Registerer interface {
  87. // Register registers a new Collector to be included in metrics
  88. // collection. It returns an error if the descriptors provided by the
  89. // Collector are invalid or if they — in combination with descriptors of
  90. // already registered Collectors — do not fulfill the consistency and
  91. // uniqueness criteria described in the documentation of metric.Desc.
  92. //
  93. // If the provided Collector is equal to a Collector already registered
  94. // (which includes the case of re-registering the same Collector), the
  95. // returned error is an instance of AlreadyRegisteredError, which
  96. // contains the previously registered Collector.
  97. //
  98. // A Collector whose Describe method does not yield any Desc is treated
  99. // as unchecked. Registration will always succeed. No check for
  100. // re-registering (see previous paragraph) is performed. Thus, the
  101. // caller is responsible for not double-registering the same unchecked
  102. // Collector, and for providing a Collector that will not cause
  103. // inconsistent metrics on collection. (This would lead to scrape
  104. // errors.)
  105. Register(Collector) error
  106. // MustRegister works like Register but registers any number of
  107. // Collectors and panics upon the first registration that causes an
  108. // error.
  109. MustRegister(...Collector)
  110. // Unregister unregisters the Collector that equals the Collector passed
  111. // in as an argument. (Two Collectors are considered equal if their
  112. // Describe method yields the same set of descriptors.) The function
  113. // returns whether a Collector was unregistered. Note that an unchecked
  114. // Collector cannot be unregistered (as its Describe method does not
  115. // yield any descriptor).
  116. //
  117. // Note that even after unregistering, it will not be possible to
  118. // register a new Collector that is inconsistent with the unregistered
  119. // Collector, e.g. a Collector collecting metrics with the same name but
  120. // a different help string. The rationale here is that the same registry
  121. // instance must only collect consistent metrics throughout its
  122. // lifetime.
  123. Unregister(Collector) bool
  124. }
  125. // Gatherer is the interface for the part of a registry in charge of gathering
  126. // the collected metrics into a number of MetricFamilies. The Gatherer interface
  127. // comes with the same general implication as described for the Registerer
  128. // interface.
  129. type Gatherer interface {
  130. // Gather calls the Collect method of the registered Collectors and then
  131. // gathers the collected metrics into a lexicographically sorted slice
  132. // of uniquely named MetricFamily protobufs. Gather ensures that the
  133. // returned slice is valid and self-consistent so that it can be used
  134. // for valid exposition. As an exception to the strict consistency
  135. // requirements described for metric.Desc, Gather will tolerate
  136. // different sets of label names for metrics of the same metric family.
  137. //
  138. // Even if an error occurs, Gather attempts to gather as many metrics as
  139. // possible. Hence, if a non-nil error is returned, the returned
  140. // MetricFamily slice could be nil (in case of a fatal error that
  141. // prevented any meaningful metric collection) or contain a number of
  142. // MetricFamily protobufs, some of which might be incomplete, and some
  143. // might be missing altogether. The returned error (which might be a
  144. // MultiError) explains the details. Note that this is mostly useful for
  145. // debugging purposes. If the gathered protobufs are to be used for
  146. // exposition in actual monitoring, it is almost always better to not
  147. // expose an incomplete result and instead disregard the returned
  148. // MetricFamily protobufs in case the returned error is non-nil.
  149. Gather() ([]*dto.MetricFamily, error)
  150. }
  151. // Register registers the provided Collector with the DefaultRegisterer.
  152. //
  153. // Register is a shortcut for DefaultRegisterer.Register(c). See there for more
  154. // details.
  155. func Register(c Collector) error {
  156. return DefaultRegisterer.Register(c)
  157. }
  158. // MustRegister registers the provided Collectors with the DefaultRegisterer and
  159. // panics if any error occurs.
  160. //
  161. // MustRegister is a shortcut for DefaultRegisterer.MustRegister(cs...). See
  162. // there for more details.
  163. func MustRegister(cs ...Collector) {
  164. DefaultRegisterer.MustRegister(cs...)
  165. }
  166. // Unregister removes the registration of the provided Collector from the
  167. // DefaultRegisterer.
  168. //
  169. // Unregister is a shortcut for DefaultRegisterer.Unregister(c). See there for
  170. // more details.
  171. func Unregister(c Collector) bool {
  172. return DefaultRegisterer.Unregister(c)
  173. }
  174. // GathererFunc turns a function into a Gatherer.
  175. type GathererFunc func() ([]*dto.MetricFamily, error)
  176. // Gather implements Gatherer.
  177. func (gf GathererFunc) Gather() ([]*dto.MetricFamily, error) {
  178. return gf()
  179. }
  180. // AlreadyRegisteredError is returned by the Register method if the Collector to
  181. // be registered has already been registered before, or a different Collector
  182. // that collects the same metrics has been registered before. Registration fails
  183. // in that case, but you can detect from the kind of error what has
  184. // happened. The error contains fields for the existing Collector and the
  185. // (rejected) new Collector that equals the existing one. This can be used to
  186. // find out if an equal Collector has been registered before and switch over to
  187. // using the old one, as demonstrated in the example.
  188. type AlreadyRegisteredError struct {
  189. ExistingCollector, NewCollector Collector
  190. }
  191. func (err AlreadyRegisteredError) Error() string {
  192. return "duplicate metrics collector registration attempted"
  193. }
  194. // MultiError is a slice of errors implementing the error interface. It is used
  195. // by a Gatherer to report multiple errors during MetricFamily gathering.
  196. type MultiError []error
  197. // Error formats the contained errors as a bullet point list, preceded by the
  198. // total number of errors. Note that this results in a multi-line string.
  199. func (errs MultiError) Error() string {
  200. if len(errs) == 0 {
  201. return ""
  202. }
  203. buf := &bytes.Buffer{}
  204. fmt.Fprintf(buf, "%d error(s) occurred:", len(errs))
  205. for _, err := range errs {
  206. fmt.Fprintf(buf, "\n* %s", err)
  207. }
  208. return buf.String()
  209. }
  210. // Append appends the provided error if it is not nil.
  211. func (errs *MultiError) Append(err error) {
  212. if err != nil {
  213. *errs = append(*errs, err)
  214. }
  215. }
  216. // MaybeUnwrap returns nil if len(errs) is 0. It returns the first and only
  217. // contained error as error if len(errs is 1). In all other cases, it returns
  218. // the MultiError directly. This is helpful for returning a MultiError in a way
  219. // that only uses the MultiError if needed.
  220. func (errs MultiError) MaybeUnwrap() error {
  221. switch len(errs) {
  222. case 0:
  223. return nil
  224. case 1:
  225. return errs[0]
  226. default:
  227. return errs
  228. }
  229. }
  230. // Registry registers Prometheus collectors, collects their metrics, and gathers
  231. // them into MetricFamilies for exposition. It implements both Registerer and
  232. // Gatherer. The zero value is not usable. Create instances with NewRegistry or
  233. // NewPedanticRegistry.
  234. type Registry struct {
  235. mtx sync.RWMutex
  236. collectorsByID map[uint64]Collector // ID is a hash of the descIDs.
  237. descIDs map[uint64]struct{}
  238. dimHashesByName map[string]uint64
  239. uncheckedCollectors []Collector
  240. pedanticChecksEnabled bool
  241. }
  242. // Register implements Registerer.
  243. func (r *Registry) Register(c Collector) error {
  244. var (
  245. descChan = make(chan *Desc, capDescChan)
  246. newDescIDs = map[uint64]struct{}{}
  247. newDimHashesByName = map[string]uint64{}
  248. collectorID uint64 // All desc IDs XOR'd together.
  249. duplicateDescErr error
  250. )
  251. go func() {
  252. c.Describe(descChan)
  253. close(descChan)
  254. }()
  255. r.mtx.Lock()
  256. defer func() {
  257. // Drain channel in case of premature return to not leak a goroutine.
  258. for range descChan {
  259. }
  260. r.mtx.Unlock()
  261. }()
  262. // Conduct various tests...
  263. for desc := range descChan {
  264. // Is the descriptor valid at all?
  265. if desc.err != nil {
  266. return fmt.Errorf("descriptor %s is invalid: %s", desc, desc.err)
  267. }
  268. // Is the descID unique?
  269. // (In other words: Is the fqName + constLabel combination unique?)
  270. if _, exists := r.descIDs[desc.id]; exists {
  271. duplicateDescErr = fmt.Errorf("descriptor %s already exists with the same fully-qualified name and const label values", desc)
  272. }
  273. // If it is not a duplicate desc in this collector, XOR it to
  274. // the collectorID. (We allow duplicate descs within the same
  275. // collector, but their existence must be a no-op.)
  276. if _, exists := newDescIDs[desc.id]; !exists {
  277. newDescIDs[desc.id] = struct{}{}
  278. collectorID ^= desc.id
  279. }
  280. // Are all the label names and the help string consistent with
  281. // previous descriptors of the same name?
  282. // First check existing descriptors...
  283. if dimHash, exists := r.dimHashesByName[desc.fqName]; exists {
  284. if dimHash != desc.dimHash {
  285. return fmt.Errorf("a previously registered descriptor with the same fully-qualified name as %s has different label names or a different help string", desc)
  286. }
  287. } else {
  288. // ...then check the new descriptors already seen.
  289. if dimHash, exists := newDimHashesByName[desc.fqName]; exists {
  290. if dimHash != desc.dimHash {
  291. return fmt.Errorf("descriptors reported by collector have inconsistent label names or help strings for the same fully-qualified name, offender is %s", desc)
  292. }
  293. } else {
  294. newDimHashesByName[desc.fqName] = desc.dimHash
  295. }
  296. }
  297. }
  298. // A Collector yielding no Desc at all is considered unchecked.
  299. if len(newDescIDs) == 0 {
  300. r.uncheckedCollectors = append(r.uncheckedCollectors, c)
  301. return nil
  302. }
  303. if existing, exists := r.collectorsByID[collectorID]; exists {
  304. switch e := existing.(type) {
  305. case *wrappingCollector:
  306. return AlreadyRegisteredError{
  307. ExistingCollector: e.unwrapRecursively(),
  308. NewCollector: c,
  309. }
  310. default:
  311. return AlreadyRegisteredError{
  312. ExistingCollector: e,
  313. NewCollector: c,
  314. }
  315. }
  316. }
  317. // If the collectorID is new, but at least one of the descs existed
  318. // before, we are in trouble.
  319. if duplicateDescErr != nil {
  320. return duplicateDescErr
  321. }
  322. // Only after all tests have passed, actually register.
  323. r.collectorsByID[collectorID] = c
  324. for hash := range newDescIDs {
  325. r.descIDs[hash] = struct{}{}
  326. }
  327. for name, dimHash := range newDimHashesByName {
  328. r.dimHashesByName[name] = dimHash
  329. }
  330. return nil
  331. }
  332. // Unregister implements Registerer.
  333. func (r *Registry) Unregister(c Collector) bool {
  334. var (
  335. descChan = make(chan *Desc, capDescChan)
  336. descIDs = map[uint64]struct{}{}
  337. collectorID uint64 // All desc IDs XOR'd together.
  338. )
  339. go func() {
  340. c.Describe(descChan)
  341. close(descChan)
  342. }()
  343. for desc := range descChan {
  344. if _, exists := descIDs[desc.id]; !exists {
  345. collectorID ^= desc.id
  346. descIDs[desc.id] = struct{}{}
  347. }
  348. }
  349. r.mtx.RLock()
  350. if _, exists := r.collectorsByID[collectorID]; !exists {
  351. r.mtx.RUnlock()
  352. return false
  353. }
  354. r.mtx.RUnlock()
  355. r.mtx.Lock()
  356. defer r.mtx.Unlock()
  357. delete(r.collectorsByID, collectorID)
  358. for id := range descIDs {
  359. delete(r.descIDs, id)
  360. }
  361. // dimHashesByName is left untouched as those must be consistent
  362. // throughout the lifetime of a program.
  363. return true
  364. }
  365. // MustRegister implements Registerer.
  366. func (r *Registry) MustRegister(cs ...Collector) {
  367. for _, c := range cs {
  368. if err := r.Register(c); err != nil {
  369. panic(err)
  370. }
  371. }
  372. }
  373. // Gather implements Gatherer.
  374. func (r *Registry) Gather() ([]*dto.MetricFamily, error) {
  375. var (
  376. checkedMetricChan = make(chan Metric, capMetricChan)
  377. uncheckedMetricChan = make(chan Metric, capMetricChan)
  378. metricHashes = map[uint64]struct{}{}
  379. wg sync.WaitGroup
  380. errs MultiError // The collected errors to return in the end.
  381. registeredDescIDs map[uint64]struct{} // Only used for pedantic checks
  382. )
  383. r.mtx.RLock()
  384. goroutineBudget := len(r.collectorsByID) + len(r.uncheckedCollectors)
  385. metricFamiliesByName := make(map[string]*dto.MetricFamily, len(r.dimHashesByName))
  386. checkedCollectors := make(chan Collector, len(r.collectorsByID))
  387. uncheckedCollectors := make(chan Collector, len(r.uncheckedCollectors))
  388. for _, collector := range r.collectorsByID {
  389. checkedCollectors <- collector
  390. }
  391. for _, collector := range r.uncheckedCollectors {
  392. uncheckedCollectors <- collector
  393. }
  394. // In case pedantic checks are enabled, we have to copy the map before
  395. // giving up the RLock.
  396. if r.pedanticChecksEnabled {
  397. registeredDescIDs = make(map[uint64]struct{}, len(r.descIDs))
  398. for id := range r.descIDs {
  399. registeredDescIDs[id] = struct{}{}
  400. }
  401. }
  402. r.mtx.RUnlock()
  403. wg.Add(goroutineBudget)
  404. collectWorker := func() {
  405. for {
  406. select {
  407. case collector := <-checkedCollectors:
  408. collector.Collect(checkedMetricChan)
  409. case collector := <-uncheckedCollectors:
  410. collector.Collect(uncheckedMetricChan)
  411. default:
  412. return
  413. }
  414. wg.Done()
  415. }
  416. }
  417. // Start the first worker now to make sure at least one is running.
  418. go collectWorker()
  419. goroutineBudget--
  420. // Close checkedMetricChan and uncheckedMetricChan once all collectors
  421. // are collected.
  422. go func() {
  423. wg.Wait()
  424. close(checkedMetricChan)
  425. close(uncheckedMetricChan)
  426. }()
  427. // Drain checkedMetricChan and uncheckedMetricChan in case of premature return.
  428. defer func() {
  429. if checkedMetricChan != nil {
  430. for range checkedMetricChan {
  431. }
  432. }
  433. if uncheckedMetricChan != nil {
  434. for range uncheckedMetricChan {
  435. }
  436. }
  437. }()
  438. // Copy the channel references so we can nil them out later to remove
  439. // them from the select statements below.
  440. cmc := checkedMetricChan
  441. umc := uncheckedMetricChan
  442. for {
  443. select {
  444. case metric, ok := <-cmc:
  445. if !ok {
  446. cmc = nil
  447. break
  448. }
  449. errs.Append(processMetric(
  450. metric, metricFamiliesByName,
  451. metricHashes,
  452. registeredDescIDs,
  453. ))
  454. case metric, ok := <-umc:
  455. if !ok {
  456. umc = nil
  457. break
  458. }
  459. errs.Append(processMetric(
  460. metric, metricFamiliesByName,
  461. metricHashes,
  462. nil,
  463. ))
  464. default:
  465. if goroutineBudget <= 0 || len(checkedCollectors)+len(uncheckedCollectors) == 0 {
  466. // All collectors are already being worked on or
  467. // we have already as many goroutines started as
  468. // there are collectors. Do the same as above,
  469. // just without the default.
  470. select {
  471. case metric, ok := <-cmc:
  472. if !ok {
  473. cmc = nil
  474. break
  475. }
  476. errs.Append(processMetric(
  477. metric, metricFamiliesByName,
  478. metricHashes,
  479. registeredDescIDs,
  480. ))
  481. case metric, ok := <-umc:
  482. if !ok {
  483. umc = nil
  484. break
  485. }
  486. errs.Append(processMetric(
  487. metric, metricFamiliesByName,
  488. metricHashes,
  489. nil,
  490. ))
  491. }
  492. break
  493. }
  494. // Start more workers.
  495. go collectWorker()
  496. goroutineBudget--
  497. runtime.Gosched()
  498. }
  499. // Once both checkedMetricChan and uncheckdMetricChan are closed
  500. // and drained, the contraption above will nil out cmc and umc,
  501. // and then we can leave the collect loop here.
  502. if cmc == nil && umc == nil {
  503. break
  504. }
  505. }
  506. return internal.NormalizeMetricFamilies(metricFamiliesByName), errs.MaybeUnwrap()
  507. }
  508. // WriteToTextfile calls Gather on the provided Gatherer, encodes the result in the
  509. // Prometheus text format, and writes it to a temporary file. Upon success, the
  510. // temporary file is renamed to the provided filename.
  511. //
  512. // This is intended for use with the textfile collector of the node exporter.
  513. // Note that the node exporter expects the filename to be suffixed with ".prom".
  514. func WriteToTextfile(filename string, g Gatherer) error {
  515. tmp, err := ioutil.TempFile(filepath.Dir(filename), filepath.Base(filename))
  516. if err != nil {
  517. return err
  518. }
  519. defer os.Remove(tmp.Name())
  520. mfs, err := g.Gather()
  521. if err != nil {
  522. return err
  523. }
  524. for _, mf := range mfs {
  525. if _, err := expfmt.MetricFamilyToText(tmp, mf); err != nil {
  526. return err
  527. }
  528. }
  529. if err := tmp.Close(); err != nil {
  530. return err
  531. }
  532. if err := os.Chmod(tmp.Name(), 0644); err != nil {
  533. return err
  534. }
  535. return os.Rename(tmp.Name(), filename)
  536. }
  537. // processMetric is an internal helper method only used by the Gather method.
  538. func processMetric(
  539. metric Metric,
  540. metricFamiliesByName map[string]*dto.MetricFamily,
  541. metricHashes map[uint64]struct{},
  542. registeredDescIDs map[uint64]struct{},
  543. ) error {
  544. desc := metric.Desc()
  545. // Wrapped metrics collected by an unchecked Collector can have an
  546. // invalid Desc.
  547. if desc.err != nil {
  548. return desc.err
  549. }
  550. dtoMetric := &dto.Metric{}
  551. if err := metric.Write(dtoMetric); err != nil {
  552. return fmt.Errorf("error collecting metric %v: %s", desc, err)
  553. }
  554. metricFamily, ok := metricFamiliesByName[desc.fqName]
  555. if ok { // Existing name.
  556. if metricFamily.GetHelp() != desc.help {
  557. return fmt.Errorf(
  558. "collected metric %s %s has help %q but should have %q",
  559. desc.fqName, dtoMetric, desc.help, metricFamily.GetHelp(),
  560. )
  561. }
  562. // TODO(beorn7): Simplify switch once Desc has type.
  563. switch metricFamily.GetType() {
  564. case dto.MetricType_COUNTER:
  565. if dtoMetric.Counter == nil {
  566. return fmt.Errorf(
  567. "collected metric %s %s should be a Counter",
  568. desc.fqName, dtoMetric,
  569. )
  570. }
  571. case dto.MetricType_GAUGE:
  572. if dtoMetric.Gauge == nil {
  573. return fmt.Errorf(
  574. "collected metric %s %s should be a Gauge",
  575. desc.fqName, dtoMetric,
  576. )
  577. }
  578. case dto.MetricType_SUMMARY:
  579. if dtoMetric.Summary == nil {
  580. return fmt.Errorf(
  581. "collected metric %s %s should be a Summary",
  582. desc.fqName, dtoMetric,
  583. )
  584. }
  585. case dto.MetricType_UNTYPED:
  586. if dtoMetric.Untyped == nil {
  587. return fmt.Errorf(
  588. "collected metric %s %s should be Untyped",
  589. desc.fqName, dtoMetric,
  590. )
  591. }
  592. case dto.MetricType_HISTOGRAM:
  593. if dtoMetric.Histogram == nil {
  594. return fmt.Errorf(
  595. "collected metric %s %s should be a Histogram",
  596. desc.fqName, dtoMetric,
  597. )
  598. }
  599. default:
  600. panic("encountered MetricFamily with invalid type")
  601. }
  602. } else { // New name.
  603. metricFamily = &dto.MetricFamily{}
  604. metricFamily.Name = proto.String(desc.fqName)
  605. metricFamily.Help = proto.String(desc.help)
  606. // TODO(beorn7): Simplify switch once Desc has type.
  607. switch {
  608. case dtoMetric.Gauge != nil:
  609. metricFamily.Type = dto.MetricType_GAUGE.Enum()
  610. case dtoMetric.Counter != nil:
  611. metricFamily.Type = dto.MetricType_COUNTER.Enum()
  612. case dtoMetric.Summary != nil:
  613. metricFamily.Type = dto.MetricType_SUMMARY.Enum()
  614. case dtoMetric.Untyped != nil:
  615. metricFamily.Type = dto.MetricType_UNTYPED.Enum()
  616. case dtoMetric.Histogram != nil:
  617. metricFamily.Type = dto.MetricType_HISTOGRAM.Enum()
  618. default:
  619. return fmt.Errorf("empty metric collected: %s", dtoMetric)
  620. }
  621. if err := checkSuffixCollisions(metricFamily, metricFamiliesByName); err != nil {
  622. return err
  623. }
  624. metricFamiliesByName[desc.fqName] = metricFamily
  625. }
  626. if err := checkMetricConsistency(metricFamily, dtoMetric, metricHashes); err != nil {
  627. return err
  628. }
  629. if registeredDescIDs != nil {
  630. // Is the desc registered at all?
  631. if _, exist := registeredDescIDs[desc.id]; !exist {
  632. return fmt.Errorf(
  633. "collected metric %s %s with unregistered descriptor %s",
  634. metricFamily.GetName(), dtoMetric, desc,
  635. )
  636. }
  637. if err := checkDescConsistency(metricFamily, dtoMetric, desc); err != nil {
  638. return err
  639. }
  640. }
  641. metricFamily.Metric = append(metricFamily.Metric, dtoMetric)
  642. return nil
  643. }
  644. // Gatherers is a slice of Gatherer instances that implements the Gatherer
  645. // interface itself. Its Gather method calls Gather on all Gatherers in the
  646. // slice in order and returns the merged results. Errors returned from the
  647. // Gather calls are all returned in a flattened MultiError. Duplicate and
  648. // inconsistent Metrics are skipped (first occurrence in slice order wins) and
  649. // reported in the returned error.
  650. //
  651. // Gatherers can be used to merge the Gather results from multiple
  652. // Registries. It also provides a way to directly inject existing MetricFamily
  653. // protobufs into the gathering by creating a custom Gatherer with a Gather
  654. // method that simply returns the existing MetricFamily protobufs. Note that no
  655. // registration is involved (in contrast to Collector registration), so
  656. // obviously registration-time checks cannot happen. Any inconsistencies between
  657. // the gathered MetricFamilies are reported as errors by the Gather method, and
  658. // inconsistent Metrics are dropped. Invalid parts of the MetricFamilies
  659. // (e.g. syntactically invalid metric or label names) will go undetected.
  660. type Gatherers []Gatherer
  661. // Gather implements Gatherer.
  662. func (gs Gatherers) Gather() ([]*dto.MetricFamily, error) {
  663. var (
  664. metricFamiliesByName = map[string]*dto.MetricFamily{}
  665. metricHashes = map[uint64]struct{}{}
  666. errs MultiError // The collected errors to return in the end.
  667. )
  668. for i, g := range gs {
  669. mfs, err := g.Gather()
  670. if err != nil {
  671. if multiErr, ok := err.(MultiError); ok {
  672. for _, err := range multiErr {
  673. errs = append(errs, fmt.Errorf("[from Gatherer #%d] %s", i+1, err))
  674. }
  675. } else {
  676. errs = append(errs, fmt.Errorf("[from Gatherer #%d] %s", i+1, err))
  677. }
  678. }
  679. for _, mf := range mfs {
  680. existingMF, exists := metricFamiliesByName[mf.GetName()]
  681. if exists {
  682. if existingMF.GetHelp() != mf.GetHelp() {
  683. errs = append(errs, fmt.Errorf(
  684. "gathered metric family %s has help %q but should have %q",
  685. mf.GetName(), mf.GetHelp(), existingMF.GetHelp(),
  686. ))
  687. continue
  688. }
  689. if existingMF.GetType() != mf.GetType() {
  690. errs = append(errs, fmt.Errorf(
  691. "gathered metric family %s has type %s but should have %s",
  692. mf.GetName(), mf.GetType(), existingMF.GetType(),
  693. ))
  694. continue
  695. }
  696. } else {
  697. existingMF = &dto.MetricFamily{}
  698. existingMF.Name = mf.Name
  699. existingMF.Help = mf.Help
  700. existingMF.Type = mf.Type
  701. if err := checkSuffixCollisions(existingMF, metricFamiliesByName); err != nil {
  702. errs = append(errs, err)
  703. continue
  704. }
  705. metricFamiliesByName[mf.GetName()] = existingMF
  706. }
  707. for _, m := range mf.Metric {
  708. if err := checkMetricConsistency(existingMF, m, metricHashes); err != nil {
  709. errs = append(errs, err)
  710. continue
  711. }
  712. existingMF.Metric = append(existingMF.Metric, m)
  713. }
  714. }
  715. }
  716. return internal.NormalizeMetricFamilies(metricFamiliesByName), errs.MaybeUnwrap()
  717. }
  718. // checkSuffixCollisions checks for collisions with the “magic” suffixes the
  719. // Prometheus text format and the internal metric representation of the
  720. // Prometheus server add while flattening Summaries and Histograms.
  721. func checkSuffixCollisions(mf *dto.MetricFamily, mfs map[string]*dto.MetricFamily) error {
  722. var (
  723. newName = mf.GetName()
  724. newType = mf.GetType()
  725. newNameWithoutSuffix = ""
  726. )
  727. switch {
  728. case strings.HasSuffix(newName, "_count"):
  729. newNameWithoutSuffix = newName[:len(newName)-6]
  730. case strings.HasSuffix(newName, "_sum"):
  731. newNameWithoutSuffix = newName[:len(newName)-4]
  732. case strings.HasSuffix(newName, "_bucket"):
  733. newNameWithoutSuffix = newName[:len(newName)-7]
  734. }
  735. if newNameWithoutSuffix != "" {
  736. if existingMF, ok := mfs[newNameWithoutSuffix]; ok {
  737. switch existingMF.GetType() {
  738. case dto.MetricType_SUMMARY:
  739. if !strings.HasSuffix(newName, "_bucket") {
  740. return fmt.Errorf(
  741. "collected metric named %q collides with previously collected summary named %q",
  742. newName, newNameWithoutSuffix,
  743. )
  744. }
  745. case dto.MetricType_HISTOGRAM:
  746. return fmt.Errorf(
  747. "collected metric named %q collides with previously collected histogram named %q",
  748. newName, newNameWithoutSuffix,
  749. )
  750. }
  751. }
  752. }
  753. if newType == dto.MetricType_SUMMARY || newType == dto.MetricType_HISTOGRAM {
  754. if _, ok := mfs[newName+"_count"]; ok {
  755. return fmt.Errorf(
  756. "collected histogram or summary named %q collides with previously collected metric named %q",
  757. newName, newName+"_count",
  758. )
  759. }
  760. if _, ok := mfs[newName+"_sum"]; ok {
  761. return fmt.Errorf(
  762. "collected histogram or summary named %q collides with previously collected metric named %q",
  763. newName, newName+"_sum",
  764. )
  765. }
  766. }
  767. if newType == dto.MetricType_HISTOGRAM {
  768. if _, ok := mfs[newName+"_bucket"]; ok {
  769. return fmt.Errorf(
  770. "collected histogram named %q collides with previously collected metric named %q",
  771. newName, newName+"_bucket",
  772. )
  773. }
  774. }
  775. return nil
  776. }
  777. // checkMetricConsistency checks if the provided Metric is consistent with the
  778. // provided MetricFamily. It also hashes the Metric labels and the MetricFamily
  779. // name. If the resulting hash is already in the provided metricHashes, an error
  780. // is returned. If not, it is added to metricHashes.
  781. func checkMetricConsistency(
  782. metricFamily *dto.MetricFamily,
  783. dtoMetric *dto.Metric,
  784. metricHashes map[uint64]struct{},
  785. ) error {
  786. name := metricFamily.GetName()
  787. // Type consistency with metric family.
  788. if metricFamily.GetType() == dto.MetricType_GAUGE && dtoMetric.Gauge == nil ||
  789. metricFamily.GetType() == dto.MetricType_COUNTER && dtoMetric.Counter == nil ||
  790. metricFamily.GetType() == dto.MetricType_SUMMARY && dtoMetric.Summary == nil ||
  791. metricFamily.GetType() == dto.MetricType_HISTOGRAM && dtoMetric.Histogram == nil ||
  792. metricFamily.GetType() == dto.MetricType_UNTYPED && dtoMetric.Untyped == nil {
  793. return fmt.Errorf(
  794. "collected metric %q { %s} is not a %s",
  795. name, dtoMetric, metricFamily.GetType(),
  796. )
  797. }
  798. previousLabelName := ""
  799. for _, labelPair := range dtoMetric.GetLabel() {
  800. labelName := labelPair.GetName()
  801. if labelName == previousLabelName {
  802. return fmt.Errorf(
  803. "collected metric %q { %s} has two or more labels with the same name: %s",
  804. name, dtoMetric, labelName,
  805. )
  806. }
  807. if !checkLabelName(labelName) {
  808. return fmt.Errorf(
  809. "collected metric %q { %s} has a label with an invalid name: %s",
  810. name, dtoMetric, labelName,
  811. )
  812. }
  813. if dtoMetric.Summary != nil && labelName == quantileLabel {
  814. return fmt.Errorf(
  815. "collected metric %q { %s} must not have an explicit %q label",
  816. name, dtoMetric, quantileLabel,
  817. )
  818. }
  819. if !utf8.ValidString(labelPair.GetValue()) {
  820. return fmt.Errorf(
  821. "collected metric %q { %s} has a label named %q whose value is not utf8: %#v",
  822. name, dtoMetric, labelName, labelPair.GetValue())
  823. }
  824. previousLabelName = labelName
  825. }
  826. // Is the metric unique (i.e. no other metric with the same name and the same labels)?
  827. h := xxhash.New()
  828. h.WriteString(name)
  829. h.Write(separatorByteSlice)
  830. // Make sure label pairs are sorted. We depend on it for the consistency
  831. // check.
  832. if !sort.IsSorted(labelPairSorter(dtoMetric.Label)) {
  833. // We cannot sort dtoMetric.Label in place as it is immutable by contract.
  834. copiedLabels := make([]*dto.LabelPair, len(dtoMetric.Label))
  835. copy(copiedLabels, dtoMetric.Label)
  836. sort.Sort(labelPairSorter(copiedLabels))
  837. dtoMetric.Label = copiedLabels
  838. }
  839. for _, lp := range dtoMetric.Label {
  840. h.WriteString(lp.GetName())
  841. h.Write(separatorByteSlice)
  842. h.WriteString(lp.GetValue())
  843. h.Write(separatorByteSlice)
  844. }
  845. hSum := h.Sum64()
  846. if _, exists := metricHashes[hSum]; exists {
  847. return fmt.Errorf(
  848. "collected metric %q { %s} was collected before with the same name and label values",
  849. name, dtoMetric,
  850. )
  851. }
  852. metricHashes[hSum] = struct{}{}
  853. return nil
  854. }
  855. func checkDescConsistency(
  856. metricFamily *dto.MetricFamily,
  857. dtoMetric *dto.Metric,
  858. desc *Desc,
  859. ) error {
  860. // Desc help consistency with metric family help.
  861. if metricFamily.GetHelp() != desc.help {
  862. return fmt.Errorf(
  863. "collected metric %s %s has help %q but should have %q",
  864. metricFamily.GetName(), dtoMetric, metricFamily.GetHelp(), desc.help,
  865. )
  866. }
  867. // Is the desc consistent with the content of the metric?
  868. lpsFromDesc := make([]*dto.LabelPair, len(desc.constLabelPairs), len(dtoMetric.Label))
  869. copy(lpsFromDesc, desc.constLabelPairs)
  870. for _, l := range desc.variableLabels {
  871. lpsFromDesc = append(lpsFromDesc, &dto.LabelPair{
  872. Name: proto.String(l),
  873. })
  874. }
  875. if len(lpsFromDesc) != len(dtoMetric.Label) {
  876. return fmt.Errorf(
  877. "labels in collected metric %s %s are inconsistent with descriptor %s",
  878. metricFamily.GetName(), dtoMetric, desc,
  879. )
  880. }
  881. sort.Sort(labelPairSorter(lpsFromDesc))
  882. for i, lpFromDesc := range lpsFromDesc {
  883. lpFromMetric := dtoMetric.Label[i]
  884. if lpFromDesc.GetName() != lpFromMetric.GetName() ||
  885. lpFromDesc.Value != nil && lpFromDesc.GetValue() != lpFromMetric.GetValue() {
  886. return fmt.Errorf(
  887. "labels in collected metric %s %s are inconsistent with descriptor %s",
  888. metricFamily.GetName(), dtoMetric, desc,
  889. )
  890. }
  891. }
  892. return nil
  893. }