You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 

965 lines
27 KiB

  1. // Copyright 2017 Google LLC
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package profiler
  15. import (
  16. "bytes"
  17. "compress/gzip"
  18. "context"
  19. "errors"
  20. "fmt"
  21. "io"
  22. "log"
  23. "math/rand"
  24. "os"
  25. "runtime"
  26. "strings"
  27. "sync"
  28. "testing"
  29. "time"
  30. gcemd "cloud.google.com/go/compute/metadata"
  31. "cloud.google.com/go/internal/testutil"
  32. "cloud.google.com/go/profiler/mocks"
  33. "cloud.google.com/go/profiler/testdata"
  34. "github.com/golang/mock/gomock"
  35. "github.com/golang/protobuf/proto"
  36. "github.com/golang/protobuf/ptypes"
  37. "github.com/google/pprof/profile"
  38. gax "github.com/googleapis/gax-go/v2"
  39. gtransport "google.golang.org/api/transport/grpc"
  40. pb "google.golang.org/genproto/googleapis/devtools/cloudprofiler/v2"
  41. edpb "google.golang.org/genproto/googleapis/rpc/errdetails"
  42. "google.golang.org/grpc/codes"
  43. grpcmd "google.golang.org/grpc/metadata"
  44. "google.golang.org/grpc/status"
  45. )
  46. const (
  47. testProjectID = "test-project-ID"
  48. testInstance = "test-instance"
  49. testZone = "test-zone"
  50. testService = "test-service"
  51. testSvcVersion = "test-service-version"
  52. testProfileDuration = time.Second * 10
  53. testServerTimeout = time.Second * 15
  54. )
  55. func createTestDeployment() *pb.Deployment {
  56. labels := map[string]string{
  57. zoneNameLabel: testZone,
  58. versionLabel: testSvcVersion,
  59. }
  60. return &pb.Deployment{
  61. ProjectId: testProjectID,
  62. Target: testService,
  63. Labels: labels,
  64. }
  65. }
  66. func createTestAgent(psc pb.ProfilerServiceClient) *agent {
  67. return &agent{
  68. client: psc,
  69. deployment: createTestDeployment(),
  70. profileLabels: map[string]string{instanceLabel: testInstance},
  71. profileTypes: []pb.ProfileType{pb.ProfileType_CPU, pb.ProfileType_HEAP, pb.ProfileType_THREADS},
  72. }
  73. }
  74. func createTrailers(dur time.Duration) map[string]string {
  75. b, _ := proto.Marshal(&edpb.RetryInfo{
  76. RetryDelay: ptypes.DurationProto(dur),
  77. })
  78. return map[string]string{
  79. retryInfoMetadata: string(b),
  80. }
  81. }
  82. func TestCreateProfile(t *testing.T) {
  83. ctx := context.Background()
  84. ctrl := gomock.NewController(t)
  85. defer ctrl.Finish()
  86. mpc := mocks.NewMockProfilerServiceClient(ctrl)
  87. a := createTestAgent(mpc)
  88. p := &pb.Profile{Name: "test_profile"}
  89. wantRequest := pb.CreateProfileRequest{
  90. Parent: "projects/" + a.deployment.ProjectId,
  91. Deployment: a.deployment,
  92. ProfileType: a.profileTypes,
  93. }
  94. mpc.EXPECT().CreateProfile(ctx, gomock.Eq(&wantRequest), gomock.Any()).Times(1).Return(p, nil)
  95. gotP := a.createProfile(ctx)
  96. if !testutil.Equal(gotP, p) {
  97. t.Errorf("CreateProfile() got wrong profile, got %v, want %v", gotP, p)
  98. }
  99. }
  100. func TestProfileAndUpload(t *testing.T) {
  101. oldStartCPUProfile, oldStopCPUProfile, oldWriteHeapProfile, oldSleep := startCPUProfile, stopCPUProfile, writeHeapProfile, sleep
  102. defer func() {
  103. startCPUProfile, stopCPUProfile, writeHeapProfile, sleep = oldStartCPUProfile, oldStopCPUProfile, oldWriteHeapProfile, oldSleep
  104. }()
  105. ctx := context.Background()
  106. ctrl := gomock.NewController(t)
  107. defer ctrl.Finish()
  108. var heapCollected1, heapCollected2, heapUploaded, allocUploaded bytes.Buffer
  109. testdata.HeapProfileCollected1.Write(&heapCollected1)
  110. testdata.HeapProfileCollected2.Write(&heapCollected2)
  111. testdata.HeapProfileUploaded.Write(&heapUploaded)
  112. testdata.AllocProfileUploaded.Write(&allocUploaded)
  113. callCount := 0
  114. writeTwoHeapFunc := func(w io.Writer) error {
  115. callCount++
  116. if callCount%2 == 1 {
  117. w.Write(heapCollected1.Bytes())
  118. return nil
  119. }
  120. w.Write(heapCollected2.Bytes())
  121. return nil
  122. }
  123. errFunc := func(io.Writer) error { return errors.New("") }
  124. testDuration := time.Second * 5
  125. tests := []struct {
  126. profileType pb.ProfileType
  127. duration *time.Duration
  128. startCPUProfileFunc func(io.Writer) error
  129. writeHeapProfileFunc func(io.Writer) error
  130. wantBytes []byte
  131. }{
  132. {
  133. profileType: pb.ProfileType_CPU,
  134. duration: &testDuration,
  135. startCPUProfileFunc: func(w io.Writer) error {
  136. w.Write([]byte{1})
  137. return nil
  138. },
  139. writeHeapProfileFunc: errFunc,
  140. wantBytes: []byte{1},
  141. },
  142. {
  143. profileType: pb.ProfileType_CPU,
  144. startCPUProfileFunc: errFunc,
  145. writeHeapProfileFunc: errFunc,
  146. },
  147. {
  148. profileType: pb.ProfileType_CPU,
  149. duration: &testDuration,
  150. startCPUProfileFunc: func(w io.Writer) error {
  151. w.Write([]byte{2})
  152. return nil
  153. },
  154. writeHeapProfileFunc: func(w io.Writer) error {
  155. w.Write([]byte{3})
  156. return nil
  157. },
  158. wantBytes: []byte{2},
  159. },
  160. {
  161. profileType: pb.ProfileType_HEAP,
  162. startCPUProfileFunc: errFunc,
  163. writeHeapProfileFunc: func(w io.Writer) error {
  164. w.Write(heapCollected1.Bytes())
  165. return nil
  166. },
  167. wantBytes: heapUploaded.Bytes(),
  168. },
  169. {
  170. profileType: pb.ProfileType_HEAP_ALLOC,
  171. startCPUProfileFunc: errFunc,
  172. writeHeapProfileFunc: writeTwoHeapFunc,
  173. duration: &testDuration,
  174. wantBytes: allocUploaded.Bytes(),
  175. },
  176. {
  177. profileType: pb.ProfileType_HEAP,
  178. startCPUProfileFunc: errFunc,
  179. writeHeapProfileFunc: errFunc,
  180. },
  181. {
  182. profileType: pb.ProfileType_HEAP,
  183. startCPUProfileFunc: func(w io.Writer) error {
  184. w.Write([]byte{5})
  185. return nil
  186. },
  187. writeHeapProfileFunc: func(w io.Writer) error {
  188. w.Write(heapCollected1.Bytes())
  189. return nil
  190. },
  191. wantBytes: heapUploaded.Bytes(),
  192. },
  193. {
  194. profileType: pb.ProfileType_PROFILE_TYPE_UNSPECIFIED,
  195. startCPUProfileFunc: func(w io.Writer) error {
  196. w.Write([]byte{7})
  197. return nil
  198. },
  199. writeHeapProfileFunc: func(w io.Writer) error {
  200. w.Write(heapCollected1.Bytes())
  201. return nil
  202. },
  203. },
  204. }
  205. for _, tt := range tests {
  206. mpc := mocks.NewMockProfilerServiceClient(ctrl)
  207. a := createTestAgent(mpc)
  208. startCPUProfile = tt.startCPUProfileFunc
  209. stopCPUProfile = func() {}
  210. writeHeapProfile = tt.writeHeapProfileFunc
  211. var gotSleep *time.Duration
  212. sleep = func(ctx context.Context, d time.Duration) error {
  213. gotSleep = &d
  214. return nil
  215. }
  216. p := &pb.Profile{ProfileType: tt.profileType}
  217. if tt.duration != nil {
  218. p.Duration = ptypes.DurationProto(*tt.duration)
  219. }
  220. if tt.wantBytes != nil {
  221. wantProfile := &pb.Profile{
  222. ProfileType: p.ProfileType,
  223. Duration: p.Duration,
  224. ProfileBytes: tt.wantBytes,
  225. Labels: a.profileLabels,
  226. }
  227. wantRequest := pb.UpdateProfileRequest{
  228. Profile: wantProfile,
  229. }
  230. mpc.EXPECT().UpdateProfile(ctx, gomock.Eq(&wantRequest)).Times(1)
  231. } else {
  232. mpc.EXPECT().UpdateProfile(gomock.Any(), gomock.Any()).MaxTimes(0)
  233. }
  234. a.profileAndUpload(ctx, p)
  235. if tt.duration == nil {
  236. if gotSleep != nil {
  237. t.Errorf("profileAndUpload(%v) slept for: %v, want no sleep", p, gotSleep)
  238. }
  239. } else {
  240. if gotSleep == nil {
  241. t.Errorf("profileAndUpload(%v) didn't sleep, want sleep for: %v", p, tt.duration)
  242. } else if *gotSleep != *tt.duration {
  243. t.Errorf("profileAndUpload(%v) slept for wrong duration, got: %v, want: %v", p, gotSleep, tt.duration)
  244. }
  245. }
  246. }
  247. }
  248. func TestRetry(t *testing.T) {
  249. normalDuration := time.Second * 3
  250. negativeDuration := time.Second * -3
  251. tests := []struct {
  252. trailers map[string]string
  253. wantPause *time.Duration
  254. }{
  255. {
  256. createTrailers(normalDuration),
  257. &normalDuration,
  258. },
  259. {
  260. createTrailers(negativeDuration),
  261. nil,
  262. },
  263. {
  264. map[string]string{retryInfoMetadata: "wrong format"},
  265. nil,
  266. },
  267. {
  268. map[string]string{},
  269. nil,
  270. },
  271. }
  272. for _, tt := range tests {
  273. md := grpcmd.New(tt.trailers)
  274. r := &retryer{
  275. backoff: gax.Backoff{
  276. Initial: initialBackoff,
  277. Max: maxBackoff,
  278. Multiplier: backoffMultiplier,
  279. },
  280. md: md,
  281. }
  282. pause, shouldRetry := r.Retry(status.Error(codes.Aborted, ""))
  283. if !shouldRetry {
  284. t.Error("retryer.Retry() returned shouldRetry false, want true")
  285. }
  286. if tt.wantPause != nil {
  287. if pause != *tt.wantPause {
  288. t.Errorf("retryer.Retry() returned wrong pause, got: %v, want: %v", pause, tt.wantPause)
  289. }
  290. } else {
  291. if pause > initialBackoff {
  292. t.Errorf("retryer.Retry() returned wrong pause, got: %v, want: < %v", pause, initialBackoff)
  293. }
  294. }
  295. }
  296. md := grpcmd.New(map[string]string{})
  297. r := &retryer{
  298. backoff: gax.Backoff{
  299. Initial: initialBackoff,
  300. Max: maxBackoff,
  301. Multiplier: backoffMultiplier,
  302. },
  303. md: md,
  304. }
  305. for i := 0; i < 100; i++ {
  306. pause, shouldRetry := r.Retry(errors.New(""))
  307. if !shouldRetry {
  308. t.Errorf("retryer.Retry() called %v times, returned shouldRetry false, want true", i)
  309. }
  310. if pause > maxBackoff {
  311. t.Errorf("retryer.Retry() called %v times, returned wrong pause, got: %v, want: < %v", i, pause, maxBackoff)
  312. }
  313. }
  314. }
  315. func TestWithXGoogHeader(t *testing.T) {
  316. ctx := withXGoogHeader(context.Background())
  317. md, _ := grpcmd.FromOutgoingContext(ctx)
  318. if xg := md[xGoogAPIMetadata]; len(xg) == 0 {
  319. t.Errorf("withXGoogHeader() sets empty xGoogHeader")
  320. } else {
  321. if !strings.Contains(xg[0], "gl-go/") {
  322. t.Errorf("withXGoogHeader() got: %v, want gl-go key", xg[0])
  323. }
  324. if !strings.Contains(xg[0], "gccl/") {
  325. t.Errorf("withXGoogHeader() got: %v, want gccl key", xg[0])
  326. }
  327. if !strings.Contains(xg[0], "gax/") {
  328. t.Errorf("withXGoogHeader() got: %v, want gax key", xg[0])
  329. }
  330. if !strings.Contains(xg[0], "grpc/") {
  331. t.Errorf("withXGoogHeader() got: %v, want grpc key", xg[0])
  332. }
  333. }
  334. }
  335. func TestInitializeAgent(t *testing.T) {
  336. oldConfig, oldMutexEnabled := config, mutexEnabled
  337. defer func() {
  338. config, mutexEnabled = oldConfig, oldMutexEnabled
  339. }()
  340. for _, tt := range []struct {
  341. config Config
  342. enableMutex bool
  343. wantProfileTypes []pb.ProfileType
  344. wantDeploymentLabels map[string]string
  345. wantProfileLabels map[string]string
  346. }{
  347. {
  348. config: Config{ServiceVersion: testSvcVersion, Zone: testZone},
  349. wantProfileTypes: []pb.ProfileType{pb.ProfileType_CPU, pb.ProfileType_HEAP, pb.ProfileType_THREADS, pb.ProfileType_HEAP_ALLOC},
  350. wantDeploymentLabels: map[string]string{zoneNameLabel: testZone, versionLabel: testSvcVersion, languageLabel: "go"},
  351. wantProfileLabels: map[string]string{},
  352. },
  353. {
  354. config: Config{Zone: testZone},
  355. wantProfileTypes: []pb.ProfileType{pb.ProfileType_CPU, pb.ProfileType_HEAP, pb.ProfileType_THREADS, pb.ProfileType_HEAP_ALLOC},
  356. wantDeploymentLabels: map[string]string{zoneNameLabel: testZone, languageLabel: "go"},
  357. wantProfileLabels: map[string]string{},
  358. },
  359. {
  360. config: Config{ServiceVersion: testSvcVersion},
  361. wantProfileTypes: []pb.ProfileType{pb.ProfileType_CPU, pb.ProfileType_HEAP, pb.ProfileType_THREADS, pb.ProfileType_HEAP_ALLOC},
  362. wantDeploymentLabels: map[string]string{versionLabel: testSvcVersion, languageLabel: "go"},
  363. wantProfileLabels: map[string]string{},
  364. },
  365. {
  366. config: Config{Instance: testInstance},
  367. wantProfileTypes: []pb.ProfileType{pb.ProfileType_CPU, pb.ProfileType_HEAP, pb.ProfileType_THREADS, pb.ProfileType_HEAP_ALLOC},
  368. wantDeploymentLabels: map[string]string{languageLabel: "go"},
  369. wantProfileLabels: map[string]string{instanceLabel: testInstance},
  370. },
  371. {
  372. config: Config{Instance: testInstance},
  373. enableMutex: true,
  374. wantProfileTypes: []pb.ProfileType{pb.ProfileType_CPU, pb.ProfileType_HEAP, pb.ProfileType_THREADS, pb.ProfileType_HEAP_ALLOC, pb.ProfileType_CONTENTION},
  375. wantDeploymentLabels: map[string]string{languageLabel: "go"},
  376. wantProfileLabels: map[string]string{instanceLabel: testInstance},
  377. },
  378. {
  379. config: Config{NoHeapProfiling: true},
  380. wantProfileTypes: []pb.ProfileType{pb.ProfileType_CPU, pb.ProfileType_THREADS, pb.ProfileType_HEAP_ALLOC},
  381. wantDeploymentLabels: map[string]string{languageLabel: "go"},
  382. wantProfileLabels: map[string]string{},
  383. },
  384. {
  385. config: Config{NoHeapProfiling: true, NoGoroutineProfiling: true, NoAllocProfiling: true},
  386. wantProfileTypes: []pb.ProfileType{pb.ProfileType_CPU},
  387. wantDeploymentLabels: map[string]string{languageLabel: "go"},
  388. wantProfileLabels: map[string]string{},
  389. },
  390. } {
  391. config = tt.config
  392. config.ProjectID = testProjectID
  393. config.Service = testService
  394. mutexEnabled = tt.enableMutex
  395. a := initializeAgent(nil)
  396. wantDeployment := &pb.Deployment{
  397. ProjectId: testProjectID,
  398. Target: testService,
  399. Labels: tt.wantDeploymentLabels,
  400. }
  401. if !testutil.Equal(a.deployment, wantDeployment) {
  402. t.Errorf("initializeAgent() got deployment: %v, want %v", a.deployment, wantDeployment)
  403. }
  404. if !testutil.Equal(a.profileLabels, tt.wantProfileLabels) {
  405. t.Errorf("initializeAgent() got profile labels: %v, want %v", a.profileLabels, tt.wantProfileLabels)
  406. }
  407. if !testutil.Equal(a.profileTypes, tt.wantProfileTypes) {
  408. t.Errorf("initializeAgent() got profile types: %v, want %v", a.profileTypes, tt.wantProfileTypes)
  409. }
  410. }
  411. }
  412. func TestInitializeConfig(t *testing.T) {
  413. oldConfig, oldGAEService, oldGAEVersion, oldKnativeService, oldKnativeVersion, oldEnvProjectID, oldGetProjectID, oldGetInstanceName, oldGetZone, oldOnGCE := config, os.Getenv("GAE_SERVICE"), os.Getenv("GAE_VERSION"), os.Getenv("K_SERVICE"), os.Getenv("K_REVISION"), os.Getenv("GOOGLE_CLOUD_PROJECT"), getProjectID, getInstanceName, getZone, onGCE
  414. defer func() {
  415. config, getProjectID, getInstanceName, getZone, onGCE = oldConfig, oldGetProjectID, oldGetInstanceName, oldGetZone, oldOnGCE
  416. if err := os.Setenv("GAE_SERVICE", oldGAEService); err != nil {
  417. t.Fatal(err)
  418. }
  419. if err := os.Setenv("GAE_VERSION", oldGAEVersion); err != nil {
  420. t.Fatal(err)
  421. }
  422. if err := os.Setenv("K_SERVICE", oldKnativeService); err != nil {
  423. t.Fatal(err)
  424. }
  425. if err := os.Setenv("K_REVISION", oldKnativeVersion); err != nil {
  426. t.Fatal(err)
  427. }
  428. if err := os.Setenv("GOOGLE_CLOUD_PROJECT", oldEnvProjectID); err != nil {
  429. t.Fatal(err)
  430. }
  431. }()
  432. const (
  433. testGAEService = "test-gae-service"
  434. testGAEVersion = "test-gae-version"
  435. testKnativeService = "test-knative-service"
  436. testKnativeVersion = "test-knative-version"
  437. testGCEProjectID = "test-gce-project-id"
  438. testEnvProjectID = "test-env-project-id"
  439. )
  440. for _, tt := range []struct {
  441. desc string
  442. config Config
  443. wantConfig Config
  444. wantErrorString string
  445. onGAE bool
  446. onKnative bool
  447. onGCE bool
  448. envProjectID bool
  449. }{
  450. {
  451. "accepts service name",
  452. Config{Service: testService},
  453. Config{Service: testService, ProjectID: testGCEProjectID, Zone: testZone, Instance: testInstance},
  454. "",
  455. false,
  456. false,
  457. true,
  458. false,
  459. },
  460. {
  461. "env project overrides GCE project",
  462. Config{Service: testService},
  463. Config{Service: testService, ProjectID: testEnvProjectID, Zone: testZone, Instance: testInstance},
  464. "",
  465. false,
  466. false,
  467. true,
  468. true,
  469. },
  470. {
  471. "requires service name",
  472. Config{},
  473. Config{},
  474. "service name must be configured",
  475. false,
  476. false,
  477. true,
  478. false,
  479. },
  480. {
  481. "requires valid service name",
  482. Config{Service: "Service"},
  483. Config{Service: "Service"},
  484. "service name \"Service\" does not match regular expression ^[a-z]([-a-z0-9_.]{0,253}[a-z0-9])?$",
  485. false,
  486. false,
  487. true,
  488. false,
  489. },
  490. {
  491. "accepts service name from config and service version from GAE",
  492. Config{Service: testService},
  493. Config{Service: testService, ServiceVersion: testGAEVersion, ProjectID: testGCEProjectID, Zone: testZone, Instance: testInstance},
  494. "",
  495. true,
  496. false,
  497. true,
  498. false,
  499. },
  500. {
  501. "reads both service name and version from GAE env vars",
  502. Config{},
  503. Config{Service: testGAEService, ServiceVersion: testGAEVersion, ProjectID: testGCEProjectID, Zone: testZone, Instance: testInstance},
  504. "",
  505. true,
  506. false,
  507. true,
  508. false,
  509. },
  510. {
  511. "reads both service name and version from Knative env vars",
  512. Config{},
  513. Config{Service: testKnativeService, ServiceVersion: testKnativeVersion, ProjectID: testGCEProjectID, Zone: testZone, Instance: testInstance},
  514. "",
  515. false,
  516. true,
  517. true,
  518. false,
  519. },
  520. {
  521. "accepts service version from config",
  522. Config{Service: testService, ServiceVersion: testSvcVersion},
  523. Config{Service: testService, ServiceVersion: testSvcVersion, ProjectID: testGCEProjectID, Zone: testZone, Instance: testInstance},
  524. "",
  525. false,
  526. false,
  527. true,
  528. false,
  529. },
  530. {
  531. "configured version has priority over GAE-provided version",
  532. Config{Service: testService, ServiceVersion: testSvcVersion},
  533. Config{Service: testService, ServiceVersion: testSvcVersion, ProjectID: testGCEProjectID, Zone: testZone, Instance: testInstance},
  534. "",
  535. true,
  536. false,
  537. true,
  538. false,
  539. },
  540. {
  541. "configured version has priority over Knative-provided version",
  542. Config{Service: testService, ServiceVersion: testSvcVersion},
  543. Config{Service: testService, ServiceVersion: testSvcVersion, ProjectID: testGCEProjectID, Zone: testZone, Instance: testInstance},
  544. "",
  545. false,
  546. true,
  547. true,
  548. false,
  549. },
  550. {
  551. "GAE version has priority over Knative-provided version",
  552. Config{},
  553. Config{Service: testGAEService, ServiceVersion: testGAEVersion, ProjectID: testGCEProjectID, Zone: testZone, Instance: testInstance},
  554. "",
  555. true,
  556. true,
  557. true,
  558. false,
  559. },
  560. {
  561. "configured project ID has priority over metadata-provided project ID",
  562. Config{Service: testService, ProjectID: testProjectID},
  563. Config{Service: testService, ProjectID: testProjectID, Zone: testZone, Instance: testInstance},
  564. "",
  565. false,
  566. false,
  567. true,
  568. false,
  569. },
  570. {
  571. "configured project ID has priority over environment project ID",
  572. Config{Service: testService, ProjectID: testProjectID},
  573. Config{Service: testService, ProjectID: testProjectID},
  574. "",
  575. false,
  576. false,
  577. false,
  578. true,
  579. },
  580. {
  581. "requires project ID if not on GCE",
  582. Config{Service: testService},
  583. Config{Service: testService},
  584. "project ID must be specified in the configuration if running outside of GCP",
  585. false,
  586. false,
  587. false,
  588. false,
  589. },
  590. {
  591. "configured zone has priority over metadata-provided zone",
  592. Config{Service: testService, ProjectID: testProjectID, Zone: testZone + "-override"},
  593. Config{Service: testService, ProjectID: testProjectID, Zone: testZone + "-override", Instance: testInstance},
  594. "",
  595. false,
  596. false,
  597. true,
  598. false,
  599. },
  600. {
  601. "configured instance has priority over metadata-provided instance",
  602. Config{Service: testService, ProjectID: testProjectID, Instance: testInstance + "-override"},
  603. Config{Service: testService, ProjectID: testProjectID, Zone: testZone, Instance: testInstance + "-override"},
  604. "",
  605. false,
  606. false,
  607. true,
  608. false,
  609. },
  610. } {
  611. t.Logf("Running test: %s", tt.desc)
  612. gaeEnvService, gaeEnvVersion := "", ""
  613. if tt.onGAE {
  614. gaeEnvService, gaeEnvVersion = testGAEService, testGAEVersion
  615. }
  616. if err := os.Setenv("GAE_SERVICE", gaeEnvService); err != nil {
  617. t.Fatal(err)
  618. }
  619. if err := os.Setenv("GAE_VERSION", gaeEnvVersion); err != nil {
  620. t.Fatal(err)
  621. }
  622. knEnvService, knEnvVersion := "", ""
  623. if tt.onKnative {
  624. knEnvService, knEnvVersion = testKnativeService, testKnativeVersion
  625. }
  626. if err := os.Setenv("K_SERVICE", knEnvService); err != nil {
  627. t.Fatal(err)
  628. }
  629. if err := os.Setenv("K_REVISION", knEnvVersion); err != nil {
  630. t.Fatal(err)
  631. }
  632. if tt.onGCE {
  633. onGCE = func() bool { return true }
  634. getProjectID = func() (string, error) { return testGCEProjectID, nil }
  635. getZone = func() (string, error) { return testZone, nil }
  636. getInstanceName = func() (string, error) { return testInstance, nil }
  637. } else {
  638. onGCE = func() bool { return false }
  639. getProjectID = func() (string, error) { return "", fmt.Errorf("test get project id error") }
  640. getZone = func() (string, error) { return "", fmt.Errorf("test get zone error") }
  641. getInstanceName = func() (string, error) { return "", fmt.Errorf("test get instance error") }
  642. }
  643. envProjectID := ""
  644. if tt.envProjectID {
  645. envProjectID = testEnvProjectID
  646. }
  647. if err := os.Setenv("GOOGLE_CLOUD_PROJECT", envProjectID); err != nil {
  648. t.Fatal(err)
  649. }
  650. errorString := ""
  651. if err := initializeConfig(tt.config); err != nil {
  652. errorString = err.Error()
  653. }
  654. if !strings.Contains(errorString, tt.wantErrorString) {
  655. t.Errorf("initializeConfig(%v) got error: %v, want contain %v", tt.config, errorString, tt.wantErrorString)
  656. }
  657. if tt.wantErrorString == "" {
  658. tt.wantConfig.APIAddr = apiAddress
  659. }
  660. if config != tt.wantConfig {
  661. t.Errorf("initializeConfig(%v) got: %v, want %v", tt.config, config, tt.wantConfig)
  662. }
  663. }
  664. for _, tt := range []struct {
  665. desc string
  666. wantErr bool
  667. getProjectIDError error
  668. getZoneError error
  669. getInstanceError error
  670. }{
  671. {
  672. desc: "metadata returns error for project ID",
  673. wantErr: true,
  674. getProjectIDError: errors.New("fake get project ID error"),
  675. },
  676. {
  677. desc: "metadata returns error for zone",
  678. wantErr: true,
  679. getZoneError: errors.New("fake get zone error"),
  680. },
  681. {
  682. desc: "metadata returns error for instance",
  683. wantErr: true,
  684. getInstanceError: errors.New("fake get instance error"),
  685. },
  686. {
  687. desc: "metadata returns NotDefinedError for instance",
  688. getInstanceError: gcemd.NotDefinedError("fake GCE metadata NotDefinedError error"),
  689. },
  690. } {
  691. onGCE = func() bool { return true }
  692. getProjectID = func() (string, error) { return testGCEProjectID, tt.getProjectIDError }
  693. getZone = func() (string, error) { return testZone, tt.getZoneError }
  694. getInstanceName = func() (string, error) { return testInstance, tt.getInstanceError }
  695. if err := initializeConfig(Config{Service: testService}); (err != nil) != tt.wantErr {
  696. t.Errorf("%s: initializeConfig() got error: %v, want error %t", tt.desc, err, tt.wantErr)
  697. }
  698. }
  699. }
  700. type fakeProfilerServer struct {
  701. count int
  702. gotProfiles map[string][]byte
  703. done chan bool
  704. }
  705. func (fs *fakeProfilerServer) CreateProfile(ctx context.Context, in *pb.CreateProfileRequest) (*pb.Profile, error) {
  706. fs.count++
  707. switch fs.count {
  708. case 1:
  709. return &pb.Profile{Name: "testCPU", ProfileType: pb.ProfileType_CPU, Duration: ptypes.DurationProto(testProfileDuration)}, nil
  710. case 2:
  711. return &pb.Profile{Name: "testHeap", ProfileType: pb.ProfileType_HEAP}, nil
  712. default:
  713. select {}
  714. }
  715. }
  716. func (fs *fakeProfilerServer) UpdateProfile(ctx context.Context, in *pb.UpdateProfileRequest) (*pb.Profile, error) {
  717. switch in.Profile.ProfileType {
  718. case pb.ProfileType_CPU:
  719. fs.gotProfiles["CPU"] = in.Profile.ProfileBytes
  720. case pb.ProfileType_HEAP:
  721. fs.gotProfiles["HEAP"] = in.Profile.ProfileBytes
  722. fs.done <- true
  723. }
  724. return in.Profile, nil
  725. }
  726. func (fs *fakeProfilerServer) CreateOfflineProfile(_ context.Context, _ *pb.CreateOfflineProfileRequest) (*pb.Profile, error) {
  727. return nil, status.Error(codes.Unimplemented, "")
  728. }
  729. func profileeLoop(quit chan bool) {
  730. for {
  731. select {
  732. case <-quit:
  733. return
  734. default:
  735. profileeWork()
  736. }
  737. }
  738. }
  739. func profileeWork() {
  740. data := make([]byte, 1024*1024)
  741. rand.Read(data)
  742. var b bytes.Buffer
  743. gz := gzip.NewWriter(&b)
  744. if _, err := gz.Write(data); err != nil {
  745. log.Println("failed to write to gzip stream", err)
  746. return
  747. }
  748. if err := gz.Flush(); err != nil {
  749. log.Println("failed to flush to gzip stream", err)
  750. return
  751. }
  752. if err := gz.Close(); err != nil {
  753. log.Println("failed to close gzip stream", err)
  754. }
  755. }
  756. func validateProfile(rawData []byte, wantFunctionName string) error {
  757. p, err := profile.ParseData(rawData)
  758. if err != nil {
  759. return fmt.Errorf("ParseData failed: %v", err)
  760. }
  761. if len(p.Sample) == 0 {
  762. return fmt.Errorf("profile contains zero samples: %v", p)
  763. }
  764. if len(p.Location) == 0 {
  765. return fmt.Errorf("profile contains zero locations: %v", p)
  766. }
  767. if len(p.Function) == 0 {
  768. return fmt.Errorf("profile contains zero functions: %v", p)
  769. }
  770. for _, l := range p.Location {
  771. if len(l.Line) > 0 && l.Line[0].Function != nil && strings.Contains(l.Line[0].Function.Name, wantFunctionName) {
  772. return nil
  773. }
  774. }
  775. return fmt.Errorf("wanted function name %s not found in the profile", wantFunctionName)
  776. }
  777. func TestDeltaMutexProfile(t *testing.T) {
  778. oldMutexEnabled, oldMaxProcs := mutexEnabled, runtime.GOMAXPROCS(10)
  779. defer func() {
  780. mutexEnabled = oldMutexEnabled
  781. runtime.GOMAXPROCS(oldMaxProcs)
  782. }()
  783. if mutexEnabled = enableMutexProfiling(); !mutexEnabled {
  784. t.Skip("Go too old - mutex profiling not supported.")
  785. }
  786. hog(time.Second, mutexHog)
  787. go func() {
  788. hog(2*time.Second, backgroundHog)
  789. }()
  790. var prof bytes.Buffer
  791. if err := deltaMutexProfile(context.Background(), time.Second, &prof); err != nil {
  792. t.Fatalf("deltaMutexProfile() got error: %v", err)
  793. }
  794. p, err := profile.Parse(&prof)
  795. if err != nil {
  796. t.Fatalf("profile.Parse() got error: %v", err)
  797. }
  798. if s := sum(p, "mutexHog"); s != 0 {
  799. t.Errorf("mutexHog found in the delta mutex profile (sum=%d):\n%s", s, p)
  800. }
  801. if s := sum(p, "backgroundHog"); s <= 0 {
  802. t.Errorf("backgroundHog not in the delta mutex profile (sum=%d):\n%s", s, p)
  803. }
  804. }
  805. // sum returns the sum of all mutex counts from the samples whose
  806. // stacks include the specified function name.
  807. func sum(p *profile.Profile, fname string) int64 {
  808. locIDs := map[*profile.Location]bool{}
  809. for _, loc := range p.Location {
  810. for _, l := range loc.Line {
  811. if strings.Contains(l.Function.Name, fname) {
  812. locIDs[loc] = true
  813. break
  814. }
  815. }
  816. }
  817. var s int64
  818. for _, sample := range p.Sample {
  819. for _, loc := range sample.Location {
  820. if locIDs[loc] {
  821. s += sample.Value[0]
  822. break
  823. }
  824. }
  825. }
  826. return s
  827. }
  828. func mutexHog(mu1, mu2 *sync.Mutex, start time.Time, dt time.Duration) {
  829. for time.Since(start) < dt {
  830. mu1.Lock()
  831. runtime.Gosched()
  832. mu2.Lock()
  833. mu1.Unlock()
  834. mu2.Unlock()
  835. }
  836. }
  837. // backgroundHog is identical to mutexHog. We keep them separate
  838. // in order to distinguish them with function names in the stack trace.
  839. func backgroundHog(mu1, mu2 *sync.Mutex, start time.Time, dt time.Duration) {
  840. for time.Since(start) < dt {
  841. mu1.Lock()
  842. runtime.Gosched()
  843. mu2.Lock()
  844. mu1.Unlock()
  845. mu2.Unlock()
  846. }
  847. }
  848. func hog(dt time.Duration, hogger func(mu1, mu2 *sync.Mutex, start time.Time, dt time.Duration)) {
  849. start := time.Now()
  850. mu1 := new(sync.Mutex)
  851. mu2 := new(sync.Mutex)
  852. var wg sync.WaitGroup
  853. wg.Add(10)
  854. for i := 0; i < 10; i++ {
  855. go func() {
  856. defer wg.Done()
  857. hogger(mu1, mu2, start, dt)
  858. }()
  859. }
  860. wg.Wait()
  861. }
  862. func TestAgentWithServer(t *testing.T) {
  863. oldDialGRPC, oldConfig := dialGRPC, config
  864. defer func() {
  865. dialGRPC, config = oldDialGRPC, oldConfig
  866. }()
  867. srv, err := testutil.NewServer()
  868. if err != nil {
  869. t.Fatalf("testutil.NewServer(): %v", err)
  870. }
  871. fakeServer := &fakeProfilerServer{gotProfiles: map[string][]byte{}, done: make(chan bool)}
  872. pb.RegisterProfilerServiceServer(srv.Gsrv, fakeServer)
  873. srv.Start()
  874. dialGRPC = gtransport.DialInsecure
  875. if err := Start(Config{
  876. Service: testService,
  877. ProjectID: testProjectID,
  878. APIAddr: srv.Addr,
  879. Instance: testInstance,
  880. Zone: testZone,
  881. }); err != nil {
  882. t.Fatalf("Start(): %v", err)
  883. }
  884. quitProfilee := make(chan bool)
  885. go profileeLoop(quitProfilee)
  886. select {
  887. case <-fakeServer.done:
  888. case <-time.After(testServerTimeout):
  889. t.Errorf("got timeout after %v, want fake server done", testServerTimeout)
  890. }
  891. quitProfilee <- true
  892. for _, pType := range []string{"CPU", "HEAP"} {
  893. if profile, ok := fakeServer.gotProfiles[pType]; !ok {
  894. t.Errorf("fakeServer.gotProfiles[%s] got no profile, want profile", pType)
  895. } else if err := validateProfile(profile, "profilee"); err != nil {
  896. t.Errorf("validateProfile(%s) got error: %v", pType, err)
  897. }
  898. }
  899. }