You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 

640 lines
19 KiB

  1. // Copyright 2018 The Prometheus Authors
  2. // Licensed under the Apache License, Version 2.0 (the "License");
  3. // you may not use this file except in compliance with the License.
  4. // You may obtain a copy of the License at
  5. //
  6. // http://www.apache.org/licenses/LICENSE-2.0
  7. //
  8. // Unless required by applicable law or agreed to in writing, software
  9. // distributed under the License is distributed on an "AS IS" BASIS,
  10. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  11. // See the License for the specific language governing permissions and
  12. // limitations under the License.
  13. package procfs
  14. // While implementing parsing of /proc/[pid]/mountstats, this blog was used
  15. // heavily as a reference:
  16. // https://utcc.utoronto.ca/~cks/space/blog/linux/NFSMountstatsIndex
  17. //
  18. // Special thanks to Chris Siebenmann for all of his posts explaining the
  19. // various statistics available for NFS.
  20. import (
  21. "bufio"
  22. "fmt"
  23. "io"
  24. "strconv"
  25. "strings"
  26. "time"
  27. )
  28. // Constants shared between multiple functions.
  29. const (
  30. deviceEntryLen = 8
  31. fieldBytesLen = 8
  32. fieldEventsLen = 27
  33. statVersion10 = "1.0"
  34. statVersion11 = "1.1"
  35. fieldTransport10TCPLen = 10
  36. fieldTransport10UDPLen = 7
  37. fieldTransport11TCPLen = 13
  38. fieldTransport11UDPLen = 10
  39. )
  40. // A Mount is a device mount parsed from /proc/[pid]/mountstats.
  41. type Mount struct {
  42. // Name of the device.
  43. Device string
  44. // The mount point of the device.
  45. Mount string
  46. // The filesystem type used by the device.
  47. Type string
  48. // If available additional statistics related to this Mount.
  49. // Use a type assertion to determine if additional statistics are available.
  50. Stats MountStats
  51. }
  52. // A MountStats is a type which contains detailed statistics for a specific
  53. // type of Mount.
  54. type MountStats interface {
  55. mountStats()
  56. }
  57. // A MountStatsNFS is a MountStats implementation for NFSv3 and v4 mounts.
  58. type MountStatsNFS struct {
  59. // The version of statistics provided.
  60. StatVersion string
  61. // The mount options of the NFS mount.
  62. Opts map[string]string
  63. // The age of the NFS mount.
  64. Age time.Duration
  65. // Statistics related to byte counters for various operations.
  66. Bytes NFSBytesStats
  67. // Statistics related to various NFS event occurrences.
  68. Events NFSEventsStats
  69. // Statistics broken down by filesystem operation.
  70. Operations []NFSOperationStats
  71. // Statistics about the NFS RPC transport.
  72. Transport NFSTransportStats
  73. }
  74. // mountStats implements MountStats.
  75. func (m MountStatsNFS) mountStats() {}
  76. // A NFSBytesStats contains statistics about the number of bytes read and written
  77. // by an NFS client to and from an NFS server.
  78. type NFSBytesStats struct {
  79. // Number of bytes read using the read() syscall.
  80. Read uint64
  81. // Number of bytes written using the write() syscall.
  82. Write uint64
  83. // Number of bytes read using the read() syscall in O_DIRECT mode.
  84. DirectRead uint64
  85. // Number of bytes written using the write() syscall in O_DIRECT mode.
  86. DirectWrite uint64
  87. // Number of bytes read from the NFS server, in total.
  88. ReadTotal uint64
  89. // Number of bytes written to the NFS server, in total.
  90. WriteTotal uint64
  91. // Number of pages read directly via mmap()'d files.
  92. ReadPages uint64
  93. // Number of pages written directly via mmap()'d files.
  94. WritePages uint64
  95. }
  96. // A NFSEventsStats contains statistics about NFS event occurrences.
  97. type NFSEventsStats struct {
  98. // Number of times cached inode attributes are re-validated from the server.
  99. InodeRevalidate uint64
  100. // Number of times cached dentry nodes are re-validated from the server.
  101. DnodeRevalidate uint64
  102. // Number of times an inode cache is cleared.
  103. DataInvalidate uint64
  104. // Number of times cached inode attributes are invalidated.
  105. AttributeInvalidate uint64
  106. // Number of times files or directories have been open()'d.
  107. VFSOpen uint64
  108. // Number of times a directory lookup has occurred.
  109. VFSLookup uint64
  110. // Number of times permissions have been checked.
  111. VFSAccess uint64
  112. // Number of updates (and potential writes) to pages.
  113. VFSUpdatePage uint64
  114. // Number of pages read directly via mmap()'d files.
  115. VFSReadPage uint64
  116. // Number of times a group of pages have been read.
  117. VFSReadPages uint64
  118. // Number of pages written directly via mmap()'d files.
  119. VFSWritePage uint64
  120. // Number of times a group of pages have been written.
  121. VFSWritePages uint64
  122. // Number of times directory entries have been read with getdents().
  123. VFSGetdents uint64
  124. // Number of times attributes have been set on inodes.
  125. VFSSetattr uint64
  126. // Number of pending writes that have been forcefully flushed to the server.
  127. VFSFlush uint64
  128. // Number of times fsync() has been called on directories and files.
  129. VFSFsync uint64
  130. // Number of times locking has been attempted on a file.
  131. VFSLock uint64
  132. // Number of times files have been closed and released.
  133. VFSFileRelease uint64
  134. // Unknown. Possibly unused.
  135. CongestionWait uint64
  136. // Number of times files have been truncated.
  137. Truncation uint64
  138. // Number of times a file has been grown due to writes beyond its existing end.
  139. WriteExtension uint64
  140. // Number of times a file was removed while still open by another process.
  141. SillyRename uint64
  142. // Number of times the NFS server gave less data than expected while reading.
  143. ShortRead uint64
  144. // Number of times the NFS server wrote less data than expected while writing.
  145. ShortWrite uint64
  146. // Number of times the NFS server indicated EJUKEBOX; retrieving data from
  147. // offline storage.
  148. JukeboxDelay uint64
  149. // Number of NFS v4.1+ pNFS reads.
  150. PNFSRead uint64
  151. // Number of NFS v4.1+ pNFS writes.
  152. PNFSWrite uint64
  153. }
  154. // A NFSOperationStats contains statistics for a single operation.
  155. type NFSOperationStats struct {
  156. // The name of the operation.
  157. Operation string
  158. // Number of requests performed for this operation.
  159. Requests uint64
  160. // Number of times an actual RPC request has been transmitted for this operation.
  161. Transmissions uint64
  162. // Number of times a request has had a major timeout.
  163. MajorTimeouts uint64
  164. // Number of bytes sent for this operation, including RPC headers and payload.
  165. BytesSent uint64
  166. // Number of bytes received for this operation, including RPC headers and payload.
  167. BytesReceived uint64
  168. // Duration all requests spent queued for transmission before they were sent.
  169. CumulativeQueueMilliseconds uint64
  170. // Duration it took to get a reply back after the request was transmitted.
  171. CumulativeTotalResponseMilliseconds uint64
  172. // Duration from when a request was enqueued to when it was completely handled.
  173. CumulativeTotalRequestMilliseconds uint64
  174. // The count of operations that complete with tk_status < 0. These statuses usually indicate error conditions.
  175. Errors uint64
  176. }
  177. // A NFSTransportStats contains statistics for the NFS mount RPC requests and
  178. // responses.
  179. type NFSTransportStats struct {
  180. // The transport protocol used for the NFS mount.
  181. Protocol string
  182. // The local port used for the NFS mount.
  183. Port uint64
  184. // Number of times the client has had to establish a connection from scratch
  185. // to the NFS server.
  186. Bind uint64
  187. // Number of times the client has made a TCP connection to the NFS server.
  188. Connect uint64
  189. // Duration (in jiffies, a kernel internal unit of time) the NFS mount has
  190. // spent waiting for connections to the server to be established.
  191. ConnectIdleTime uint64
  192. // Duration since the NFS mount last saw any RPC traffic.
  193. IdleTimeSeconds uint64
  194. // Number of RPC requests for this mount sent to the NFS server.
  195. Sends uint64
  196. // Number of RPC responses for this mount received from the NFS server.
  197. Receives uint64
  198. // Number of times the NFS server sent a response with a transaction ID
  199. // unknown to this client.
  200. BadTransactionIDs uint64
  201. // A running counter, incremented on each request as the current difference
  202. // ebetween sends and receives.
  203. CumulativeActiveRequests uint64
  204. // A running counter, incremented on each request by the current backlog
  205. // queue size.
  206. CumulativeBacklog uint64
  207. // Stats below only available with stat version 1.1.
  208. // Maximum number of simultaneously active RPC requests ever used.
  209. MaximumRPCSlotsUsed uint64
  210. // A running counter, incremented on each request as the current size of the
  211. // sending queue.
  212. CumulativeSendingQueue uint64
  213. // A running counter, incremented on each request as the current size of the
  214. // pending queue.
  215. CumulativePendingQueue uint64
  216. }
  217. // parseMountStats parses a /proc/[pid]/mountstats file and returns a slice
  218. // of Mount structures containing detailed information about each mount.
  219. // If available, statistics for each mount are parsed as well.
  220. func parseMountStats(r io.Reader) ([]*Mount, error) {
  221. const (
  222. device = "device"
  223. statVersionPrefix = "statvers="
  224. nfs3Type = "nfs"
  225. nfs4Type = "nfs4"
  226. )
  227. var mounts []*Mount
  228. s := bufio.NewScanner(r)
  229. for s.Scan() {
  230. // Only look for device entries in this function
  231. ss := strings.Fields(string(s.Bytes()))
  232. if len(ss) == 0 || ss[0] != device {
  233. continue
  234. }
  235. m, err := parseMount(ss)
  236. if err != nil {
  237. return nil, err
  238. }
  239. // Does this mount also possess statistics information?
  240. if len(ss) > deviceEntryLen {
  241. // Only NFSv3 and v4 are supported for parsing statistics
  242. if m.Type != nfs3Type && m.Type != nfs4Type {
  243. return nil, fmt.Errorf("cannot parse MountStats for fstype %q", m.Type)
  244. }
  245. statVersion := strings.TrimPrefix(ss[8], statVersionPrefix)
  246. stats, err := parseMountStatsNFS(s, statVersion)
  247. if err != nil {
  248. return nil, err
  249. }
  250. m.Stats = stats
  251. }
  252. mounts = append(mounts, m)
  253. }
  254. return mounts, s.Err()
  255. }
  256. // parseMount parses an entry in /proc/[pid]/mountstats in the format:
  257. //
  258. // device [device] mounted on [mount] with fstype [type]
  259. func parseMount(ss []string) (*Mount, error) {
  260. if len(ss) < deviceEntryLen {
  261. return nil, fmt.Errorf("invalid device entry: %v", ss)
  262. }
  263. // Check for specific words appearing at specific indices to ensure
  264. // the format is consistent with what we expect
  265. format := []struct {
  266. i int
  267. s string
  268. }{
  269. {i: 0, s: "device"},
  270. {i: 2, s: "mounted"},
  271. {i: 3, s: "on"},
  272. {i: 5, s: "with"},
  273. {i: 6, s: "fstype"},
  274. }
  275. for _, f := range format {
  276. if ss[f.i] != f.s {
  277. return nil, fmt.Errorf("invalid device entry: %v", ss)
  278. }
  279. }
  280. return &Mount{
  281. Device: ss[1],
  282. Mount: ss[4],
  283. Type: ss[7],
  284. }, nil
  285. }
  286. // parseMountStatsNFS parses a MountStatsNFS by scanning additional information
  287. // related to NFS statistics.
  288. func parseMountStatsNFS(s *bufio.Scanner, statVersion string) (*MountStatsNFS, error) {
  289. // Field indicators for parsing specific types of data
  290. const (
  291. fieldOpts = "opts:"
  292. fieldAge = "age:"
  293. fieldBytes = "bytes:"
  294. fieldEvents = "events:"
  295. fieldPerOpStats = "per-op"
  296. fieldTransport = "xprt:"
  297. )
  298. stats := &MountStatsNFS{
  299. StatVersion: statVersion,
  300. }
  301. for s.Scan() {
  302. ss := strings.Fields(string(s.Bytes()))
  303. if len(ss) == 0 {
  304. break
  305. }
  306. switch ss[0] {
  307. case fieldOpts:
  308. if len(ss) < 2 {
  309. return nil, fmt.Errorf("not enough information for NFS stats: %v", ss)
  310. }
  311. if stats.Opts == nil {
  312. stats.Opts = map[string]string{}
  313. }
  314. for _, opt := range strings.Split(ss[1], ",") {
  315. split := strings.Split(opt, "=")
  316. if len(split) == 2 {
  317. stats.Opts[split[0]] = split[1]
  318. } else {
  319. stats.Opts[opt] = ""
  320. }
  321. }
  322. case fieldAge:
  323. if len(ss) < 2 {
  324. return nil, fmt.Errorf("not enough information for NFS stats: %v", ss)
  325. }
  326. // Age integer is in seconds
  327. d, err := time.ParseDuration(ss[1] + "s")
  328. if err != nil {
  329. return nil, err
  330. }
  331. stats.Age = d
  332. case fieldBytes:
  333. if len(ss) < 2 {
  334. return nil, fmt.Errorf("not enough information for NFS stats: %v", ss)
  335. }
  336. bstats, err := parseNFSBytesStats(ss[1:])
  337. if err != nil {
  338. return nil, err
  339. }
  340. stats.Bytes = *bstats
  341. case fieldEvents:
  342. if len(ss) < 2 {
  343. return nil, fmt.Errorf("not enough information for NFS stats: %v", ss)
  344. }
  345. estats, err := parseNFSEventsStats(ss[1:])
  346. if err != nil {
  347. return nil, err
  348. }
  349. stats.Events = *estats
  350. case fieldTransport:
  351. if len(ss) < 3 {
  352. return nil, fmt.Errorf("not enough information for NFS transport stats: %v", ss)
  353. }
  354. tstats, err := parseNFSTransportStats(ss[1:], statVersion)
  355. if err != nil {
  356. return nil, err
  357. }
  358. stats.Transport = *tstats
  359. }
  360. // When encountering "per-operation statistics", we must break this
  361. // loop and parse them separately to ensure we can terminate parsing
  362. // before reaching another device entry; hence why this 'if' statement
  363. // is not just another switch case
  364. if ss[0] == fieldPerOpStats {
  365. break
  366. }
  367. }
  368. if err := s.Err(); err != nil {
  369. return nil, err
  370. }
  371. // NFS per-operation stats appear last before the next device entry
  372. perOpStats, err := parseNFSOperationStats(s)
  373. if err != nil {
  374. return nil, err
  375. }
  376. stats.Operations = perOpStats
  377. return stats, nil
  378. }
  379. // parseNFSBytesStats parses a NFSBytesStats line using an input set of
  380. // integer fields.
  381. func parseNFSBytesStats(ss []string) (*NFSBytesStats, error) {
  382. if len(ss) != fieldBytesLen {
  383. return nil, fmt.Errorf("invalid NFS bytes stats: %v", ss)
  384. }
  385. ns := make([]uint64, 0, fieldBytesLen)
  386. for _, s := range ss {
  387. n, err := strconv.ParseUint(s, 10, 64)
  388. if err != nil {
  389. return nil, err
  390. }
  391. ns = append(ns, n)
  392. }
  393. return &NFSBytesStats{
  394. Read: ns[0],
  395. Write: ns[1],
  396. DirectRead: ns[2],
  397. DirectWrite: ns[3],
  398. ReadTotal: ns[4],
  399. WriteTotal: ns[5],
  400. ReadPages: ns[6],
  401. WritePages: ns[7],
  402. }, nil
  403. }
  404. // parseNFSEventsStats parses a NFSEventsStats line using an input set of
  405. // integer fields.
  406. func parseNFSEventsStats(ss []string) (*NFSEventsStats, error) {
  407. if len(ss) != fieldEventsLen {
  408. return nil, fmt.Errorf("invalid NFS events stats: %v", ss)
  409. }
  410. ns := make([]uint64, 0, fieldEventsLen)
  411. for _, s := range ss {
  412. n, err := strconv.ParseUint(s, 10, 64)
  413. if err != nil {
  414. return nil, err
  415. }
  416. ns = append(ns, n)
  417. }
  418. return &NFSEventsStats{
  419. InodeRevalidate: ns[0],
  420. DnodeRevalidate: ns[1],
  421. DataInvalidate: ns[2],
  422. AttributeInvalidate: ns[3],
  423. VFSOpen: ns[4],
  424. VFSLookup: ns[5],
  425. VFSAccess: ns[6],
  426. VFSUpdatePage: ns[7],
  427. VFSReadPage: ns[8],
  428. VFSReadPages: ns[9],
  429. VFSWritePage: ns[10],
  430. VFSWritePages: ns[11],
  431. VFSGetdents: ns[12],
  432. VFSSetattr: ns[13],
  433. VFSFlush: ns[14],
  434. VFSFsync: ns[15],
  435. VFSLock: ns[16],
  436. VFSFileRelease: ns[17],
  437. CongestionWait: ns[18],
  438. Truncation: ns[19],
  439. WriteExtension: ns[20],
  440. SillyRename: ns[21],
  441. ShortRead: ns[22],
  442. ShortWrite: ns[23],
  443. JukeboxDelay: ns[24],
  444. PNFSRead: ns[25],
  445. PNFSWrite: ns[26],
  446. }, nil
  447. }
  448. // parseNFSOperationStats parses a slice of NFSOperationStats by scanning
  449. // additional information about per-operation statistics until an empty
  450. // line is reached.
  451. func parseNFSOperationStats(s *bufio.Scanner) ([]NFSOperationStats, error) {
  452. const (
  453. // Minimum number of expected fields in each per-operation statistics set
  454. minFields = 9
  455. )
  456. var ops []NFSOperationStats
  457. for s.Scan() {
  458. ss := strings.Fields(string(s.Bytes()))
  459. if len(ss) == 0 {
  460. // Must break when reading a blank line after per-operation stats to
  461. // enable top-level function to parse the next device entry
  462. break
  463. }
  464. if len(ss) < minFields {
  465. return nil, fmt.Errorf("invalid NFS per-operations stats: %v", ss)
  466. }
  467. // Skip string operation name for integers
  468. ns := make([]uint64, 0, minFields-1)
  469. for _, st := range ss[1:] {
  470. n, err := strconv.ParseUint(st, 10, 64)
  471. if err != nil {
  472. return nil, err
  473. }
  474. ns = append(ns, n)
  475. }
  476. opStats := NFSOperationStats{
  477. Operation: strings.TrimSuffix(ss[0], ":"),
  478. Requests: ns[0],
  479. Transmissions: ns[1],
  480. MajorTimeouts: ns[2],
  481. BytesSent: ns[3],
  482. BytesReceived: ns[4],
  483. CumulativeQueueMilliseconds: ns[5],
  484. CumulativeTotalResponseMilliseconds: ns[6],
  485. CumulativeTotalRequestMilliseconds: ns[7],
  486. }
  487. if len(ns) > 8 {
  488. opStats.Errors = ns[8]
  489. }
  490. ops = append(ops, opStats)
  491. }
  492. return ops, s.Err()
  493. }
  494. // parseNFSTransportStats parses a NFSTransportStats line using an input set of
  495. // integer fields matched to a specific stats version.
  496. func parseNFSTransportStats(ss []string, statVersion string) (*NFSTransportStats, error) {
  497. // Extract the protocol field. It is the only string value in the line
  498. protocol := ss[0]
  499. ss = ss[1:]
  500. switch statVersion {
  501. case statVersion10:
  502. var expectedLength int
  503. if protocol == "tcp" {
  504. expectedLength = fieldTransport10TCPLen
  505. } else if protocol == "udp" {
  506. expectedLength = fieldTransport10UDPLen
  507. } else {
  508. return nil, fmt.Errorf("invalid NFS protocol \"%s\" in stats 1.0 statement: %v", protocol, ss)
  509. }
  510. if len(ss) != expectedLength {
  511. return nil, fmt.Errorf("invalid NFS transport stats 1.0 statement: %v", ss)
  512. }
  513. case statVersion11:
  514. var expectedLength int
  515. if protocol == "tcp" {
  516. expectedLength = fieldTransport11TCPLen
  517. } else if protocol == "udp" {
  518. expectedLength = fieldTransport11UDPLen
  519. } else {
  520. return nil, fmt.Errorf("invalid NFS protocol \"%s\" in stats 1.1 statement: %v", protocol, ss)
  521. }
  522. if len(ss) != expectedLength {
  523. return nil, fmt.Errorf("invalid NFS transport stats 1.1 statement: %v", ss)
  524. }
  525. default:
  526. return nil, fmt.Errorf("unrecognized NFS transport stats version: %q", statVersion)
  527. }
  528. // Allocate enough for v1.1 stats since zero value for v1.1 stats will be okay
  529. // in a v1.0 response. Since the stat length is bigger for TCP stats, we use
  530. // the TCP length here.
  531. //
  532. // Note: slice length must be set to length of v1.1 stats to avoid a panic when
  533. // only v1.0 stats are present.
  534. // See: https://github.com/prometheus/node_exporter/issues/571.
  535. ns := make([]uint64, fieldTransport11TCPLen)
  536. for i, s := range ss {
  537. n, err := strconv.ParseUint(s, 10, 64)
  538. if err != nil {
  539. return nil, err
  540. }
  541. ns[i] = n
  542. }
  543. // The fields differ depending on the transport protocol (TCP or UDP)
  544. // From https://utcc.utoronto.ca/%7Ecks/space/blog/linux/NFSMountstatsXprt
  545. //
  546. // For the udp RPC transport there is no connection count, connect idle time,
  547. // or idle time (fields #3, #4, and #5); all other fields are the same. So
  548. // we set them to 0 here.
  549. if protocol == "udp" {
  550. ns = append(ns[:2], append(make([]uint64, 3), ns[2:]...)...)
  551. }
  552. return &NFSTransportStats{
  553. Protocol: protocol,
  554. Port: ns[0],
  555. Bind: ns[1],
  556. Connect: ns[2],
  557. ConnectIdleTime: ns[3],
  558. IdleTimeSeconds: ns[4],
  559. Sends: ns[5],
  560. Receives: ns[6],
  561. BadTransactionIDs: ns[7],
  562. CumulativeActiveRequests: ns[8],
  563. CumulativeBacklog: ns[9],
  564. MaximumRPCSlotsUsed: ns[10],
  565. CumulativeSendingQueue: ns[11],
  566. CumulativePendingQueue: ns[12],
  567. }, nil
  568. }