|
- /*
- *
- * Copyright 2019 gRPC authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- *
- */
-
- // Package xds implements a balancer that communicates with a remote balancer using the Envoy xDS
- // protocol.
- package xds
-
- import (
- "context"
- "encoding/json"
- "errors"
- "fmt"
- "reflect"
- "sync"
- "time"
-
- xdspb "github.com/envoyproxy/go-control-plane/envoy/api/v2"
- "github.com/gogo/protobuf/proto"
- "google.golang.org/grpc/balancer"
- "google.golang.org/grpc/balancer/xds/edsbalancer"
- "google.golang.org/grpc/connectivity"
- "google.golang.org/grpc/grpclog"
- "google.golang.org/grpc/resolver"
- )
-
- const (
- defaultTimeout = 10 * time.Second
- xdsName = "xds"
- )
-
- var (
- // This field is for testing purpose.
- // TODO: if later we make startupTimeout configurable through BuildOptions(maybe?), then we can remove
- // this field and configure through BuildOptions instead.
- startupTimeout = defaultTimeout
- newEDSBalancer = func(cc balancer.ClientConn) edsBalancerInterface {
- return edsbalancer.NewXDSBalancer(cc)
- }
- )
-
- func init() {
- balancer.Register(newXDSBalancerBuilder())
- }
-
- type xdsBalancerBuilder struct{}
-
- func newXDSBalancerBuilder() balancer.Builder {
- return &xdsBalancerBuilder{}
- }
-
- func (b *xdsBalancerBuilder) Build(cc balancer.ClientConn, opts balancer.BuildOptions) balancer.Balancer {
- ctx, cancel := context.WithCancel(context.Background())
- x := &xdsBalancer{
- ctx: ctx,
- cancel: cancel,
- buildOpts: opts,
- startupTimeout: startupTimeout,
- connStateMgr: &connStateMgr{},
- startup: true,
- grpcUpdate: make(chan interface{}),
- xdsClientUpdate: make(chan interface{}),
- timer: createDrainedTimer(), // initialized a timer that won't fire without reset
- }
- x.cc = &xdsClientConn{
- updateState: x.connStateMgr.updateState,
- ClientConn: cc,
- }
- go x.run()
- return x
- }
-
- func (b *xdsBalancerBuilder) Name() string {
- return xdsName
- }
-
- // edsBalancerInterface defines the interface that edsBalancer must implement to
- // communicate with xdsBalancer.
- //
- // It's implemented by the real eds balancer and a fake testing eds balancer.
- type edsBalancerInterface interface {
- // HandleEDSResponse passes the received EDS message from traffic director to eds balancer.
- HandleEDSResponse(edsResp *xdspb.ClusterLoadAssignment)
- // HandleChildPolicy updates the eds balancer the intra-cluster load balancing policy to use.
- HandleChildPolicy(name string, config json.RawMessage)
- // HandleSubConnStateChange handles state change for SubConn.
- HandleSubConnStateChange(sc balancer.SubConn, state connectivity.State)
- // Close closes the eds balancer.
- Close()
- }
-
- // xdsBalancer manages xdsClient and the actual balancer that does load balancing (either edsBalancer,
- // or fallback LB).
- type xdsBalancer struct {
- cc balancer.ClientConn // *xdsClientConn
- buildOpts balancer.BuildOptions
- startupTimeout time.Duration
- xdsStaleTimeout *time.Duration
- connStateMgr *connStateMgr
- ctx context.Context
- cancel context.CancelFunc
- startup bool // startup indicates whether this xdsBalancer is in startup stage.
- inFallbackMonitor bool
-
- // xdsBalancer continuously monitor the channels below, and will handle events from them in sync.
- grpcUpdate chan interface{}
- xdsClientUpdate chan interface{}
- timer *time.Timer
- noSubConnAlert <-chan struct{}
-
- client *client // may change when passed a different service config
- config *xdsConfig // may change when passed a different service config
- xdsLB edsBalancerInterface
- fallbackLB balancer.Balancer
- fallbackInitData *addressUpdate // may change when HandleResolved address is called
- }
-
- func (x *xdsBalancer) startNewXDSClient(u *xdsConfig) {
- // If the xdsBalancer is in startup stage, then we need to apply the startup timeout for the first
- // xdsClient to get a response from the traffic director.
- if x.startup {
- x.startFallbackMonitoring()
- }
-
- // Whenever service config gives a new traffic director name, we need to create an xds client to
- // connect to it. However, previous xds client should not be closed until the new one successfully
- // connects to the traffic director (i.e. get an ADS response from the traffic director). Therefore,
- // we let each new client to be responsible to close its immediate predecessor. In this way,
- // xdsBalancer does not to implement complex synchronization to achieve the same purpose.
- prevClient := x.client
- // haveGotADS is true means, this xdsClient has got ADS response from director in the past, which
- // means it can close previous client if it hasn't and it now can send lose contact signal for
- // fallback monitoring.
- var haveGotADS bool
-
- // set up callbacks for the xds client.
- newADS := func(ctx context.Context, resp proto.Message) error {
- if !haveGotADS {
- if prevClient != nil {
- prevClient.close()
- }
- haveGotADS = true
- }
- return x.newADSResponse(ctx, resp)
- }
- loseContact := func(ctx context.Context) {
- // loseContact signal is only useful when the current xds client has received ADS response before,
- // and has not been closed by later xds client.
- if haveGotADS {
- select {
- case <-ctx.Done():
- return
- default:
- }
- x.loseContact(ctx)
- }
- }
- exitCleanup := func() {
- // Each xds client is responsible to close its predecessor if there's one. There are two paths
- // for a xds client to close its predecessor:
- // 1. Once it receives its first ADS response.
- // 2. It hasn't received its first ADS response yet, but its own successor has received ADS
- // response (which triggers the exit of it). Therefore, it needs to close its predecessor if
- // it has one.
- // Here the exitCleanup is for the 2nd path.
- if !haveGotADS && prevClient != nil {
- prevClient.close()
- }
- }
- x.client = newXDSClient(u.BalancerName, x.cc.Target(), u.ChildPolicy == nil, x.buildOpts, newADS, loseContact, exitCleanup)
- go x.client.run()
- }
-
- // run gets executed in a goroutine once xdsBalancer is created. It monitors updates from grpc,
- // xdsClient and load balancer. It synchronizes the operations that happen inside xdsBalancer. It
- // exits when xdsBalancer is closed.
- func (x *xdsBalancer) run() {
- for {
- select {
- case update := <-x.grpcUpdate:
- x.handleGRPCUpdate(update)
- case update := <-x.xdsClientUpdate:
- x.handleXDSClientUpdate(update)
- case <-x.timer.C: // x.timer.C will block if we are not in fallback monitoring stage.
- x.switchFallback()
- case <-x.noSubConnAlert: // x.noSubConnAlert will block if we are not in fallback monitoring stage.
- x.switchFallback()
- case <-x.ctx.Done():
- if x.client != nil {
- x.client.close()
- }
- if x.xdsLB != nil {
- x.xdsLB.Close()
- }
- if x.fallbackLB != nil {
- x.fallbackLB.Close()
- }
- return
- }
- }
- }
-
- func (x *xdsBalancer) handleGRPCUpdate(update interface{}) {
- switch u := update.(type) {
- case *addressUpdate:
- if x.fallbackLB != nil {
- x.fallbackLB.HandleResolvedAddrs(u.addrs, u.err)
- }
- x.fallbackInitData = u
- case *subConnStateUpdate:
- if x.xdsLB != nil {
- x.xdsLB.HandleSubConnStateChange(u.sc, u.state)
- }
- if x.fallbackLB != nil {
- x.fallbackLB.HandleSubConnStateChange(u.sc, u.state)
- }
- case *xdsConfig:
- if x.config == nil {
- // The first time we get config, we just need to start the xdsClient.
- x.startNewXDSClient(u)
- x.config = u
- return
- }
- // With a different BalancerName, we need to create a new xdsClient.
- // If current or previous ChildPolicy is nil, then we also need to recreate a new xdsClient.
- // This is because with nil ChildPolicy xdsClient will do CDS request, while non-nil won't.
- if u.BalancerName != x.config.BalancerName || (u.ChildPolicy == nil) != (x.config.ChildPolicy == nil) {
- x.startNewXDSClient(u)
- }
- // We will update the xdsLB with the new child policy, if we got a different one and it's not nil.
- // The nil case will be handled when the CDS response gets processed, we will update xdsLB at that time.
- if !reflect.DeepEqual(u.ChildPolicy, x.config.ChildPolicy) && u.ChildPolicy != nil && x.xdsLB != nil {
- x.xdsLB.HandleChildPolicy(u.ChildPolicy.Name, u.ChildPolicy.Config)
- }
- if !reflect.DeepEqual(u.FallBackPolicy, x.config.FallBackPolicy) && x.fallbackLB != nil {
- x.fallbackLB.Close()
- x.startFallBackBalancer(u)
- }
- x.config = u
- default:
- // unreachable path
- panic("wrong update type")
- }
- }
-
- func (x *xdsBalancer) handleXDSClientUpdate(update interface{}) {
- switch u := update.(type) {
- case *cdsResp:
- select {
- case <-u.ctx.Done():
- return
- default:
- }
- x.cancelFallbackAndSwitchEDSBalancerIfNecessary()
- // TODO: Get the optional xds record stale timeout from OutlierDetection message. If not exist,
- // reset to 0.
- // x.xdsStaleTimeout = u.OutlierDetection.TO_BE_DEFINED_AND_ADDED
- x.xdsLB.HandleChildPolicy(u.resp.LbPolicy.String(), nil)
- case *edsResp:
- select {
- case <-u.ctx.Done():
- return
- default:
- }
- x.cancelFallbackAndSwitchEDSBalancerIfNecessary()
- x.xdsLB.HandleEDSResponse(u.resp)
- case *loseContact:
- select {
- case <-u.ctx.Done():
- return
- default:
- }
- // if we are already doing fallback monitoring, then we ignore new loseContact signal.
- if x.inFallbackMonitor {
- return
- }
- x.inFallbackMonitor = true
- x.startFallbackMonitoring()
- default:
- panic("unexpected xds client update type")
- }
- }
-
- type connStateMgr struct {
- mu sync.Mutex
- curState connectivity.State
- notify chan struct{}
- }
-
- func (c *connStateMgr) updateState(s connectivity.State) {
- c.mu.Lock()
- defer c.mu.Unlock()
- c.curState = s
- if s != connectivity.Ready && c.notify != nil {
- close(c.notify)
- c.notify = nil
- }
- }
-
- func (c *connStateMgr) notifyWhenNotReady() <-chan struct{} {
- c.mu.Lock()
- defer c.mu.Unlock()
- if c.curState != connectivity.Ready {
- ch := make(chan struct{})
- close(ch)
- return ch
- }
- c.notify = make(chan struct{})
- return c.notify
- }
-
- // xdsClientConn wraps around the balancer.ClientConn passed in from grpc. The wrapping is to add
- // functionality to get notification when no subconn is in READY state.
- // TODO: once we have the change that keeps both edsbalancer and fallback balancer alive at the same
- // time, we need to make sure to synchronize updates from both entities on the ClientConn.
- type xdsClientConn struct {
- updateState func(s connectivity.State)
- balancer.ClientConn
- }
-
- func (w *xdsClientConn) UpdateBalancerState(s connectivity.State, p balancer.Picker) {
- w.updateState(s)
- w.ClientConn.UpdateBalancerState(s, p)
- }
-
- type addressUpdate struct {
- addrs []resolver.Address
- err error
- }
-
- type subConnStateUpdate struct {
- sc balancer.SubConn
- state connectivity.State
- }
-
- func (x *xdsBalancer) HandleSubConnStateChange(sc balancer.SubConn, state connectivity.State) {
- update := &subConnStateUpdate{
- sc: sc,
- state: state,
- }
- select {
- case x.grpcUpdate <- update:
- case <-x.ctx.Done():
- }
- }
-
- func (x *xdsBalancer) HandleResolvedAddrs(addrs []resolver.Address, err error) {
- update := &addressUpdate{
- addrs: addrs,
- err: err,
- }
- select {
- case x.grpcUpdate <- update:
- case <-x.ctx.Done():
- }
- }
-
- // TODO: once the API is merged, check whether we need to change the function name/signature here.
- func (x *xdsBalancer) HandleBalancerConfig(config json.RawMessage) error {
- var cfg xdsConfig
- if err := json.Unmarshal(config, &cfg); err != nil {
- return errors.New("unable to unmarshal balancer config into xds config")
- }
-
- select {
- case x.grpcUpdate <- &cfg:
- case <-x.ctx.Done():
- }
- return nil
- }
-
- type cdsResp struct {
- ctx context.Context
- resp *xdspb.Cluster
- }
-
- type edsResp struct {
- ctx context.Context
- resp *xdspb.ClusterLoadAssignment
- }
-
- func (x *xdsBalancer) newADSResponse(ctx context.Context, resp proto.Message) error {
- var update interface{}
- switch u := resp.(type) {
- case *xdspb.Cluster:
- if u.GetName() != x.cc.Target() {
- return fmt.Errorf("unmatched service name, got %s, want %s", u.GetName(), x.cc.Target())
- }
- if u.GetType() != xdspb.Cluster_EDS {
- return fmt.Errorf("unexpected service discovery type, got %v, want %v", u.GetType(), xdspb.Cluster_EDS)
- }
- update = &cdsResp{ctx: ctx, resp: u}
- case *xdspb.ClusterLoadAssignment:
- // nothing to check
- update = &edsResp{ctx: ctx, resp: u}
- default:
- grpclog.Warningf("xdsBalancer: got a response that's neither CDS nor EDS, type = %T", u)
- }
-
- select {
- case x.xdsClientUpdate <- update:
- case <-x.ctx.Done():
- case <-ctx.Done():
- }
-
- return nil
- }
-
- type loseContact struct {
- ctx context.Context
- }
-
- func (x *xdsBalancer) loseContact(ctx context.Context) {
- select {
- case x.xdsClientUpdate <- &loseContact{ctx: ctx}:
- case <-x.ctx.Done():
- case <-ctx.Done():
- }
- }
-
- func (x *xdsBalancer) switchFallback() {
- if x.xdsLB != nil {
- x.xdsLB.Close()
- x.xdsLB = nil
- }
- x.startFallBackBalancer(x.config)
- x.cancelFallbackMonitoring()
- }
-
- // x.cancelFallbackAndSwitchEDSBalancerIfNecessary() will be no-op if we have a working xds client.
- // It will cancel fallback monitoring if we are in fallback monitoring stage.
- // If there's no running edsBalancer currently, it will create one and initialize it. Also, it will
- // shutdown the fallback balancer if there's one running.
- func (x *xdsBalancer) cancelFallbackAndSwitchEDSBalancerIfNecessary() {
- // xDS update will cancel fallback monitoring if we are in fallback monitoring stage.
- x.cancelFallbackMonitoring()
-
- // xDS update will switch balancer back to edsBalancer if we are in fallback.
- if x.xdsLB == nil {
- if x.fallbackLB != nil {
- x.fallbackLB.Close()
- x.fallbackLB = nil
- }
- x.xdsLB = newEDSBalancer(x.cc)
- if x.config.ChildPolicy != nil {
- x.xdsLB.HandleChildPolicy(x.config.ChildPolicy.Name, x.config.ChildPolicy.Config)
- }
- }
- }
-
- func (x *xdsBalancer) startFallBackBalancer(c *xdsConfig) {
- if c.FallBackPolicy == nil {
- x.startFallBackBalancer(&xdsConfig{
- FallBackPolicy: &loadBalancingConfig{
- Name: "round_robin",
- },
- })
- return
- }
- // builder will always be non-nil, since when parse JSON into xdsConfig, we check whether the specified
- // balancer is registered or not.
- builder := balancer.Get(c.FallBackPolicy.Name)
- x.fallbackLB = builder.Build(x.cc, x.buildOpts)
- if x.fallbackInitData != nil {
- // TODO: uncomment when HandleBalancerConfig API is merged.
- //x.fallbackLB.HandleBalancerConfig(c.FallBackPolicy.Config)
- x.fallbackLB.HandleResolvedAddrs(x.fallbackInitData.addrs, x.fallbackInitData.err)
- }
- }
-
- // There are three ways that could lead to fallback:
- // 1. During startup (i.e. the first xds client is just created and attempts to contact the traffic
- // director), fallback if it has not received any response from the director within the configured
- // timeout.
- // 2. After xds client loses contact with the remote, fallback if all connections to the backends are
- // lost (i.e. not in state READY).
- // 3. After xds client loses contact with the remote, fallback if the stale eds timeout has been
- // configured through CDS and is timed out.
- func (x *xdsBalancer) startFallbackMonitoring() {
- if x.startup {
- x.startup = false
- x.timer.Reset(x.startupTimeout)
- return
- }
-
- x.noSubConnAlert = x.connStateMgr.notifyWhenNotReady()
- if x.xdsStaleTimeout != nil {
- if !x.timer.Stop() {
- <-x.timer.C
- }
- x.timer.Reset(*x.xdsStaleTimeout)
- }
- }
-
- // There are two cases where fallback monitoring should be canceled:
- // 1. xDS client returns a new ADS message.
- // 2. fallback has been triggered.
- func (x *xdsBalancer) cancelFallbackMonitoring() {
- if !x.timer.Stop() {
- select {
- case <-x.timer.C:
- // For cases where some fallback condition happens along with the timeout, but timeout loses
- // the race, so we need to drain the x.timer.C. thus we don't trigger fallback again.
- default:
- // if the timer timeout leads us here, then there's no thing to drain from x.timer.C.
- }
- }
- x.noSubConnAlert = nil
- x.inFallbackMonitor = false
- }
-
- func (x *xdsBalancer) Close() {
- x.cancel()
- }
-
- func createDrainedTimer() *time.Timer {
- timer := time.NewTimer(0 * time.Millisecond)
- // make sure initially the timer channel is blocking until reset.
- if !timer.Stop() {
- <-timer.C
- }
- return timer
- }
-
- type xdsConfig struct {
- BalancerName string
- ChildPolicy *loadBalancingConfig
- FallBackPolicy *loadBalancingConfig
- }
-
- // When unmarshalling json to xdsConfig, we iterate through the childPolicy/fallbackPolicy lists
- // and select the first LB policy which has been registered to be stored in the returned xdsConfig.
- func (p *xdsConfig) UnmarshalJSON(data []byte) error {
- var val map[string]json.RawMessage
- if err := json.Unmarshal(data, &val); err != nil {
- return err
- }
- for k, v := range val {
- switch k {
- case "balancerName":
- if err := json.Unmarshal(v, &p.BalancerName); err != nil {
- return err
- }
- case "childPolicy":
- var lbcfgs []*loadBalancingConfig
- if err := json.Unmarshal(v, &lbcfgs); err != nil {
- return err
- }
- for _, lbcfg := range lbcfgs {
- if balancer.Get(lbcfg.Name) != nil {
- p.ChildPolicy = lbcfg
- break
- }
- }
- case "fallbackPolicy":
- var lbcfgs []*loadBalancingConfig
- if err := json.Unmarshal(v, &lbcfgs); err != nil {
- return err
- }
- for _, lbcfg := range lbcfgs {
- if balancer.Get(lbcfg.Name) != nil {
- p.FallBackPolicy = lbcfg
- break
- }
- }
- }
- }
- return nil
- }
-
- func (p *xdsConfig) MarshalJSON() ([]byte, error) {
- return nil, nil
- }
-
- type loadBalancingConfig struct {
- Name string
- Config json.RawMessage
- }
-
- func (l *loadBalancingConfig) MarshalJSON() ([]byte, error) {
- return nil, nil
- }
-
- func (l *loadBalancingConfig) UnmarshalJSON(data []byte) error {
- var cfg map[string]json.RawMessage
- if err := json.Unmarshal(data, &cfg); err != nil {
- return err
- }
- for name, config := range cfg {
- l.Name = name
- l.Config = config
- }
- return nil
- }
|