You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 

463 lines
13 KiB

  1. // Copyright 2014 Google Inc. All Rights Reserved.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. // Package binutils provides access to the GNU binutils.
  15. package binutils
  16. import (
  17. "debug/elf"
  18. "debug/macho"
  19. "encoding/binary"
  20. "fmt"
  21. "io"
  22. "os"
  23. "os/exec"
  24. "path/filepath"
  25. "regexp"
  26. "runtime"
  27. "strings"
  28. "sync"
  29. "github.com/google/pprof/internal/elfexec"
  30. "github.com/google/pprof/internal/plugin"
  31. )
  32. // A Binutils implements plugin.ObjTool by invoking the GNU binutils.
  33. type Binutils struct {
  34. mu sync.Mutex
  35. rep *binrep
  36. }
  37. // binrep is an immutable representation for Binutils. It is atomically
  38. // replaced on every mutation to provide thread-safe access.
  39. type binrep struct {
  40. // Commands to invoke.
  41. llvmSymbolizer string
  42. llvmSymbolizerFound bool
  43. addr2line string
  44. addr2lineFound bool
  45. nm string
  46. nmFound bool
  47. objdump string
  48. objdumpFound bool
  49. // if fast, perform symbolization using nm (symbol names only),
  50. // instead of file-line detail from the slower addr2line.
  51. fast bool
  52. }
  53. // get returns the current representation for bu, initializing it if necessary.
  54. func (bu *Binutils) get() *binrep {
  55. bu.mu.Lock()
  56. r := bu.rep
  57. if r == nil {
  58. r = &binrep{}
  59. initTools(r, "")
  60. bu.rep = r
  61. }
  62. bu.mu.Unlock()
  63. return r
  64. }
  65. // update modifies the rep for bu via the supplied function.
  66. func (bu *Binutils) update(fn func(r *binrep)) {
  67. r := &binrep{}
  68. bu.mu.Lock()
  69. defer bu.mu.Unlock()
  70. if bu.rep == nil {
  71. initTools(r, "")
  72. } else {
  73. *r = *bu.rep
  74. }
  75. fn(r)
  76. bu.rep = r
  77. }
  78. // String returns string representation of the binutils state for debug logging.
  79. func (bu *Binutils) String() string {
  80. r := bu.get()
  81. var llvmSymbolizer, addr2line, nm, objdump string
  82. if r.llvmSymbolizerFound {
  83. llvmSymbolizer = r.llvmSymbolizer
  84. }
  85. if r.addr2lineFound {
  86. addr2line = r.addr2line
  87. }
  88. if r.nmFound {
  89. nm = r.nm
  90. }
  91. if r.objdumpFound {
  92. objdump = r.objdump
  93. }
  94. return fmt.Sprintf("llvm-symbolizer=%q addr2line=%q nm=%q objdump=%q fast=%t",
  95. llvmSymbolizer, addr2line, nm, objdump, r.fast)
  96. }
  97. // SetFastSymbolization sets a toggle that makes binutils use fast
  98. // symbolization (using nm), which is much faster than addr2line but
  99. // provides only symbol name information (no file/line).
  100. func (bu *Binutils) SetFastSymbolization(fast bool) {
  101. bu.update(func(r *binrep) { r.fast = fast })
  102. }
  103. // SetTools processes the contents of the tools option. It
  104. // expects a set of entries separated by commas; each entry is a pair
  105. // of the form t:path, where cmd will be used to look only for the
  106. // tool named t. If t is not specified, the path is searched for all
  107. // tools.
  108. func (bu *Binutils) SetTools(config string) {
  109. bu.update(func(r *binrep) { initTools(r, config) })
  110. }
  111. func initTools(b *binrep, config string) {
  112. // paths collect paths per tool; Key "" contains the default.
  113. paths := make(map[string][]string)
  114. for _, t := range strings.Split(config, ",") {
  115. name, path := "", t
  116. if ct := strings.SplitN(t, ":", 2); len(ct) == 2 {
  117. name, path = ct[0], ct[1]
  118. }
  119. paths[name] = append(paths[name], path)
  120. }
  121. defaultPath := paths[""]
  122. b.llvmSymbolizer, b.llvmSymbolizerFound = findExe("llvm-symbolizer", append(paths["llvm-symbolizer"], defaultPath...))
  123. b.addr2line, b.addr2lineFound = findExe("addr2line", append(paths["addr2line"], defaultPath...))
  124. if !b.addr2lineFound {
  125. // On MacOS, brew installs addr2line under gaddr2line name, so search for
  126. // that if the tool is not found by its default name.
  127. b.addr2line, b.addr2lineFound = findExe("gaddr2line", append(paths["addr2line"], defaultPath...))
  128. }
  129. b.nm, b.nmFound = findExe("nm", append(paths["nm"], defaultPath...))
  130. b.objdump, b.objdumpFound = findExe("objdump", append(paths["objdump"], defaultPath...))
  131. }
  132. // findExe looks for an executable command on a set of paths.
  133. // If it cannot find it, returns cmd.
  134. func findExe(cmd string, paths []string) (string, bool) {
  135. for _, p := range paths {
  136. cp := filepath.Join(p, cmd)
  137. if c, err := exec.LookPath(cp); err == nil {
  138. return c, true
  139. }
  140. }
  141. return cmd, false
  142. }
  143. // Disasm returns the assembly instructions for the specified address range
  144. // of a binary.
  145. func (bu *Binutils) Disasm(file string, start, end uint64) ([]plugin.Inst, error) {
  146. b := bu.get()
  147. cmd := exec.Command(b.objdump, "-d", "-C", "--no-show-raw-insn", "-l",
  148. fmt.Sprintf("--start-address=%#x", start),
  149. fmt.Sprintf("--stop-address=%#x", end),
  150. file)
  151. out, err := cmd.Output()
  152. if err != nil {
  153. return nil, fmt.Errorf("%v: %v", cmd.Args, err)
  154. }
  155. return disassemble(out)
  156. }
  157. // Open satisfies the plugin.ObjTool interface.
  158. func (bu *Binutils) Open(name string, start, limit, offset uint64) (plugin.ObjFile, error) {
  159. b := bu.get()
  160. // Make sure file is a supported executable.
  161. // This uses magic numbers, mainly to provide better error messages but
  162. // it should also help speed.
  163. if _, err := os.Stat(name); err != nil {
  164. // For testing, do not require file name to exist.
  165. if strings.Contains(b.addr2line, "testdata/") {
  166. return &fileAddr2Line{file: file{b: b, name: name}}, nil
  167. }
  168. return nil, err
  169. }
  170. // Read the first 4 bytes of the file.
  171. f, err := os.Open(name)
  172. if err != nil {
  173. return nil, fmt.Errorf("error opening %s: %v", name, err)
  174. }
  175. defer f.Close()
  176. var header [4]byte
  177. if _, err = io.ReadFull(f, header[:]); err != nil {
  178. return nil, fmt.Errorf("error reading magic number from %s: %v", name, err)
  179. }
  180. elfMagic := string(header[:])
  181. // Match against supported file types.
  182. if elfMagic == elf.ELFMAG {
  183. f, err := b.openELF(name, start, limit, offset)
  184. if err != nil {
  185. return nil, fmt.Errorf("error reading ELF file %s: %v", name, err)
  186. }
  187. return f, nil
  188. }
  189. // Mach-O magic numbers can be big or little endian.
  190. machoMagicLittle := binary.LittleEndian.Uint32(header[:])
  191. machoMagicBig := binary.BigEndian.Uint32(header[:])
  192. if machoMagicLittle == macho.Magic32 || machoMagicLittle == macho.Magic64 ||
  193. machoMagicBig == macho.Magic32 || machoMagicBig == macho.Magic64 {
  194. f, err := b.openMachO(name, start, limit, offset)
  195. if err != nil {
  196. return nil, fmt.Errorf("error reading Mach-O file %s: %v", name, err)
  197. }
  198. return f, nil
  199. }
  200. if machoMagicLittle == macho.MagicFat || machoMagicBig == macho.MagicFat {
  201. f, err := b.openFatMachO(name, start, limit, offset)
  202. if err != nil {
  203. return nil, fmt.Errorf("error reading fat Mach-O file %s: %v", name, err)
  204. }
  205. return f, nil
  206. }
  207. return nil, fmt.Errorf("unrecognized binary format: %s", name)
  208. }
  209. func (b *binrep) openMachOCommon(name string, of *macho.File, start, limit, offset uint64) (plugin.ObjFile, error) {
  210. // Subtract the load address of the __TEXT section. Usually 0 for shared
  211. // libraries or 0x100000000 for executables. You can check this value by
  212. // running `objdump -private-headers <file>`.
  213. textSegment := of.Segment("__TEXT")
  214. if textSegment == nil {
  215. return nil, fmt.Errorf("could not identify base for %s: no __TEXT segment", name)
  216. }
  217. if textSegment.Addr > start {
  218. return nil, fmt.Errorf("could not identify base for %s: __TEXT segment address (0x%x) > mapping start address (0x%x)",
  219. name, textSegment.Addr, start)
  220. }
  221. base := start - textSegment.Addr
  222. if b.fast || (!b.addr2lineFound && !b.llvmSymbolizerFound) {
  223. return &fileNM{file: file{b: b, name: name, base: base}}, nil
  224. }
  225. return &fileAddr2Line{file: file{b: b, name: name, base: base}}, nil
  226. }
  227. func (b *binrep) openFatMachO(name string, start, limit, offset uint64) (plugin.ObjFile, error) {
  228. of, err := macho.OpenFat(name)
  229. if err != nil {
  230. return nil, fmt.Errorf("error parsing %s: %v", name, err)
  231. }
  232. defer of.Close()
  233. if len(of.Arches) == 0 {
  234. return nil, fmt.Errorf("empty fat Mach-O file: %s", name)
  235. }
  236. var arch macho.Cpu
  237. // Use the host architecture.
  238. // TODO: This is not ideal because the host architecture may not be the one
  239. // that was profiled. E.g. an amd64 host can profile a 386 program.
  240. switch runtime.GOARCH {
  241. case "386":
  242. arch = macho.Cpu386
  243. case "amd64", "amd64p32":
  244. arch = macho.CpuAmd64
  245. case "arm", "armbe", "arm64", "arm64be":
  246. arch = macho.CpuArm
  247. case "ppc":
  248. arch = macho.CpuPpc
  249. case "ppc64", "ppc64le":
  250. arch = macho.CpuPpc64
  251. default:
  252. return nil, fmt.Errorf("unsupported host architecture for %s: %s", name, runtime.GOARCH)
  253. }
  254. for i := range of.Arches {
  255. if of.Arches[i].Cpu == arch {
  256. return b.openMachOCommon(name, of.Arches[i].File, start, limit, offset)
  257. }
  258. }
  259. return nil, fmt.Errorf("architecture not found in %s: %s", name, runtime.GOARCH)
  260. }
  261. func (b *binrep) openMachO(name string, start, limit, offset uint64) (plugin.ObjFile, error) {
  262. of, err := macho.Open(name)
  263. if err != nil {
  264. return nil, fmt.Errorf("error parsing %s: %v", name, err)
  265. }
  266. defer of.Close()
  267. return b.openMachOCommon(name, of, start, limit, offset)
  268. }
  269. func (b *binrep) openELF(name string, start, limit, offset uint64) (plugin.ObjFile, error) {
  270. ef, err := elf.Open(name)
  271. if err != nil {
  272. return nil, fmt.Errorf("error parsing %s: %v", name, err)
  273. }
  274. defer ef.Close()
  275. var stextOffset *uint64
  276. var pageAligned = func(addr uint64) bool { return addr%4096 == 0 }
  277. if strings.Contains(name, "vmlinux") || !pageAligned(start) || !pageAligned(limit) || !pageAligned(offset) {
  278. // Reading all Symbols is expensive, and we only rarely need it so
  279. // we don't want to do it every time. But if _stext happens to be
  280. // page-aligned but isn't the same as Vaddr, we would symbolize
  281. // wrong. So if the name the addresses aren't page aligned, or if
  282. // the name is "vmlinux" we read _stext. We can be wrong if: (1)
  283. // someone passes a kernel path that doesn't contain "vmlinux" AND
  284. // (2) _stext is page-aligned AND (3) _stext is not at Vaddr
  285. symbols, err := ef.Symbols()
  286. if err != nil && err != elf.ErrNoSymbols {
  287. return nil, err
  288. }
  289. for _, s := range symbols {
  290. if s.Name == "_stext" {
  291. // The kernel may use _stext as the mapping start address.
  292. stextOffset = &s.Value
  293. break
  294. }
  295. }
  296. }
  297. base, err := elfexec.GetBase(&ef.FileHeader, elfexec.FindTextProgHeader(ef), stextOffset, start, limit, offset)
  298. if err != nil {
  299. return nil, fmt.Errorf("could not identify base for %s: %v", name, err)
  300. }
  301. buildID := ""
  302. if f, err := os.Open(name); err == nil {
  303. if id, err := elfexec.GetBuildID(f); err == nil {
  304. buildID = fmt.Sprintf("%x", id)
  305. }
  306. }
  307. if b.fast || (!b.addr2lineFound && !b.llvmSymbolizerFound) {
  308. return &fileNM{file: file{b, name, base, buildID}}, nil
  309. }
  310. return &fileAddr2Line{file: file{b, name, base, buildID}}, nil
  311. }
  312. // file implements the binutils.ObjFile interface.
  313. type file struct {
  314. b *binrep
  315. name string
  316. base uint64
  317. buildID string
  318. }
  319. func (f *file) Name() string {
  320. return f.name
  321. }
  322. func (f *file) Base() uint64 {
  323. return f.base
  324. }
  325. func (f *file) BuildID() string {
  326. return f.buildID
  327. }
  328. func (f *file) SourceLine(addr uint64) ([]plugin.Frame, error) {
  329. return []plugin.Frame{}, nil
  330. }
  331. func (f *file) Close() error {
  332. return nil
  333. }
  334. func (f *file) Symbols(r *regexp.Regexp, addr uint64) ([]*plugin.Sym, error) {
  335. // Get from nm a list of symbols sorted by address.
  336. cmd := exec.Command(f.b.nm, "-n", f.name)
  337. out, err := cmd.Output()
  338. if err != nil {
  339. return nil, fmt.Errorf("%v: %v", cmd.Args, err)
  340. }
  341. return findSymbols(out, f.name, r, addr)
  342. }
  343. // fileNM implements the binutils.ObjFile interface, using 'nm' to map
  344. // addresses to symbols (without file/line number information). It is
  345. // faster than fileAddr2Line.
  346. type fileNM struct {
  347. file
  348. addr2linernm *addr2LinerNM
  349. }
  350. func (f *fileNM) SourceLine(addr uint64) ([]plugin.Frame, error) {
  351. if f.addr2linernm == nil {
  352. addr2liner, err := newAddr2LinerNM(f.b.nm, f.name, f.base)
  353. if err != nil {
  354. return nil, err
  355. }
  356. f.addr2linernm = addr2liner
  357. }
  358. return f.addr2linernm.addrInfo(addr)
  359. }
  360. // fileAddr2Line implements the binutils.ObjFile interface, using
  361. // llvm-symbolizer, if that's available, or addr2line to map addresses to
  362. // symbols (with file/line number information). It can be slow for large
  363. // binaries with debug information.
  364. type fileAddr2Line struct {
  365. once sync.Once
  366. file
  367. addr2liner *addr2Liner
  368. llvmSymbolizer *llvmSymbolizer
  369. }
  370. func (f *fileAddr2Line) SourceLine(addr uint64) ([]plugin.Frame, error) {
  371. f.once.Do(f.init)
  372. if f.llvmSymbolizer != nil {
  373. return f.llvmSymbolizer.addrInfo(addr)
  374. }
  375. if f.addr2liner != nil {
  376. return f.addr2liner.addrInfo(addr)
  377. }
  378. return nil, fmt.Errorf("could not find local addr2liner")
  379. }
  380. func (f *fileAddr2Line) init() {
  381. if llvmSymbolizer, err := newLLVMSymbolizer(f.b.llvmSymbolizer, f.name, f.base); err == nil {
  382. f.llvmSymbolizer = llvmSymbolizer
  383. return
  384. }
  385. if addr2liner, err := newAddr2Liner(f.b.addr2line, f.name, f.base); err == nil {
  386. f.addr2liner = addr2liner
  387. // When addr2line encounters some gcc compiled binaries, it
  388. // drops interesting parts of names in anonymous namespaces.
  389. // Fallback to NM for better function names.
  390. if nm, err := newAddr2LinerNM(f.b.nm, f.name, f.base); err == nil {
  391. f.addr2liner.nm = nm
  392. }
  393. }
  394. }
  395. func (f *fileAddr2Line) Close() error {
  396. if f.llvmSymbolizer != nil {
  397. f.llvmSymbolizer.rw.close()
  398. f.llvmSymbolizer = nil
  399. }
  400. if f.addr2liner != nil {
  401. f.addr2liner.rw.close()
  402. f.addr2liner = nil
  403. }
  404. return nil
  405. }