@@ -18,6 +18,7 @@ package e2e_test
1818
1919import (
2020 "context"
21+ "errors"
2122 "fmt"
2223 "sort"
2324 "strings"
@@ -31,6 +32,7 @@ import (
3132 "google.golang.org/grpc/internal"
3233 "google.golang.org/grpc/internal/stubserver"
3334 "google.golang.org/grpc/internal/testutils"
35+ "google.golang.org/grpc/internal/testutils/pickfirst"
3436 "google.golang.org/grpc/internal/testutils/xds/e2e"
3537 "google.golang.org/grpc/peer"
3638 "google.golang.org/grpc/resolver"
@@ -596,8 +598,8 @@ func (s) TestAggregateCluster_SwitchEDSAndDNS(t *testing.T) {
596598// DNS resolver yet. Once the DNS resolver pushes an update, the test verifies
597599// that we switch to the DNS cluster and can make a successful RPC. At this
598600// point when the DNS cluster returns an error, the test verifies that RPCs are
599- // still successful. This is the expected behavior because pick_first ( the leaf
600- // policy) ignores resolver errors when it is not in TransientFailure .
601+ // still successful. This is the expected behavior because the cluster resolver
602+ // policy eats errors from DNS Resolver after it has returned an error .
601603func (s ) TestAggregateCluster_BadEDS_GoodToBadDNS (t * testing.T ) {
602604 dnsTargetCh , _ , _ , dnsR , cleanup1 := setupDNS ()
603605 defer cleanup1 ()
@@ -612,8 +614,8 @@ func (s) TestAggregateCluster_BadEDS_GoodToBadDNS(t *testing.T) {
612614 addrs , _ := backendAddressesAndPorts (t , servers )
613615
614616 // Configure an aggregate cluster pointing to an EDS and LOGICAL_DNS
615- // cluster. Also configure an empty endpoints resource for the EDS cluster
616- // that contains no endpoints .
617+ // cluster. Also configure an endpoints resource for the EDS cluster which
618+ // triggers a NACK .
617619 const (
618620 edsClusterName = clusterName + "-eds"
619621 dnsClusterName = clusterName + "-dns"
@@ -698,13 +700,160 @@ func (s) TestAggregateCluster_BadEDS_GoodToBadDNS(t *testing.T) {
698700 }
699701}
700702
703+ // TestAggregateCluster_BadEDS_GoodToBadDNS tests the case where the top-level
704+ // cluster is an aggregate cluster that resolves to an EDS and LOGICAL_DNS
705+ // cluster. The test first sends an EDS response which triggers an NACK. Once
706+ // the DNS resolver pushes an update, the test verifies that we switch to the
707+ // DNS cluster and can make a successful RPC.
708+ func (s ) TestAggregateCluster_BadEDSFromError_GoodToBadDNS (t * testing.T ) {
709+ dnsTargetCh , _ , _ , dnsR , cleanup1 := setupDNS ()
710+ defer cleanup1 ()
711+
712+ // Start an xDS management server.
713+ managementServer , nodeID , bootstrapContents , _ , cleanup2 := e2e .SetupManagementServer (t , e2e.ManagementServerOptions {AllowResourceSubset : true })
714+ defer cleanup2 ()
715+
716+ // Start two test backends.
717+ servers , cleanup3 := startTestServiceBackends (t , 2 )
718+ defer cleanup3 ()
719+ addrs , _ := backendAddressesAndPorts (t , servers )
720+
721+ // Configure an aggregate cluster pointing to an EDS and LOGICAL_DNS
722+ // cluster. Also configure an empty endpoints resource for the EDS cluster
723+ // that contains no endpoints.
724+ const (
725+ edsClusterName = clusterName + "-eds"
726+ dnsClusterName = clusterName + "-dns"
727+ dnsHostName = "dns_host"
728+ dnsPort = uint32 (8080 )
729+ )
730+ nackEndpointResource := e2e .DefaultEndpoint (edsServiceName , "localhost" , nil )
731+ nackEndpointResource .Endpoints = []* v3endpointpb.LocalityLbEndpoints {
732+ {
733+ LoadBalancingWeight : & wrapperspb.UInt32Value {
734+ Value : 0 , // causes an NACK
735+ },
736+ },
737+ }
738+ resources := e2e.UpdateOptions {
739+ NodeID : nodeID ,
740+ Clusters : []* v3clusterpb.Cluster {
741+ makeAggregateClusterResource (clusterName , []string {edsClusterName , dnsClusterName }),
742+ e2e .DefaultCluster (edsClusterName , edsServiceName , e2e .SecurityLevelNone ),
743+ makeLogicalDNSClusterResource (dnsClusterName , dnsHostName , dnsPort ),
744+ },
745+ Endpoints : []* v3endpointpb.ClusterLoadAssignment {nackEndpointResource },
746+ SkipValidation : true ,
747+ }
748+ ctx , cancel := context .WithTimeout (context .Background (), defaultTestTimeout )
749+ defer cancel ()
750+ if err := managementServer .Update (ctx , resources ); err != nil {
751+ t .Fatal (err )
752+ }
753+
754+ // Create xDS client, configure cds_experimental LB policy with a manual
755+ // resolver, and dial the test backends.
756+ cc , cleanup := setupAndDial (t , bootstrapContents )
757+ defer cleanup ()
758+
759+ // Ensure that the DNS resolver is started for the expected target.
760+ select {
761+ case <- ctx .Done ():
762+ t .Fatal ("Timeout when waiting for DNS resolver to be started" )
763+ case target := <- dnsTargetCh :
764+ got , want := target .Endpoint (), fmt .Sprintf ("%s:%d" , dnsHostName , dnsPort )
765+ if got != want {
766+ t .Fatalf ("DNS resolution started for target %q, want %q" , got , want )
767+ }
768+ }
769+
770+ // Update DNS resolver with test backend addresses.
771+ dnsR .UpdateState (resolver.State {Addresses : addrs })
772+
773+ // Ensure that RPCs start getting routed to the first backend since the
774+ // child policy for a LOGICAL_DNS cluster is pick_first by default.
775+ pickfirst .CheckRPCsToBackend (ctx , cc , addrs [0 ])
776+ }
777+
778+ // TestAggregateCluster_BadDNS_GoodEDS tests the case where the top-level
779+ // cluster is an aggregate cluster that resolves to an LOGICAL_DNS and EDS
780+ // cluster. When the DNS Resolver returns an error and EDS cluster returns a
781+ // good update, this test verifies the cluster_resolver balancer correctly falls
782+ // back from the LOGICAL_DNS cluster to the EDS cluster.
783+ func (s ) TestAggregateCluster_BadDNS_GoodEDS (t * testing.T ) {
784+ dnsTargetCh , _ , _ , dnsR , cleanup1 := setupDNS ()
785+ defer cleanup1 ()
786+
787+ // Start an xDS management server.
788+ managementServer , nodeID , bootstrapContents , _ , cleanup2 := e2e .SetupManagementServer (t , e2e.ManagementServerOptions {AllowResourceSubset : true })
789+ defer cleanup2 ()
790+
791+ // Start two test backends.
792+ servers , cleanup3 := startTestServiceBackends (t , 2 )
793+ defer cleanup3 ()
794+ addrs , ports := backendAddressesAndPorts (t , servers )
795+
796+ // Configure an aggregate cluster pointing to an LOGICAL_DNS and EDS
797+ // cluster. Also configure an endpoints resource for the EDS cluster.
798+ const (
799+ edsClusterName = clusterName + "-eds"
800+ dnsClusterName = clusterName + "-dns"
801+ dnsHostName = "dns_host"
802+ dnsPort = uint32 (8080 )
803+ )
804+ resources := e2e.UpdateOptions {
805+ NodeID : nodeID ,
806+ Clusters : []* v3clusterpb.Cluster {
807+ makeAggregateClusterResource (clusterName , []string {dnsClusterName , edsClusterName }),
808+ makeLogicalDNSClusterResource (dnsClusterName , dnsHostName , dnsPort ),
809+ e2e .DefaultCluster (edsClusterName , edsServiceName , e2e .SecurityLevelNone ),
810+ },
811+ Endpoints : []* v3endpointpb.ClusterLoadAssignment {e2e .DefaultEndpoint (edsServiceName , "localhost" , []uint32 {uint32 (ports [0 ])})},
812+ SkipValidation : true ,
813+ }
814+ ctx , cancel := context .WithTimeout (context .Background (), defaultTestTimeout )
815+ defer cancel ()
816+ if err := managementServer .Update (ctx , resources ); err != nil {
817+ t .Fatal (err )
818+ }
819+
820+ // Create xDS client, configure cds_experimental LB policy with a manual
821+ // resolver, and dial the test backends.
822+ cc , cleanup := setupAndDial (t , bootstrapContents )
823+ defer cleanup ()
824+
825+ // Ensure that the DNS resolver is started for the expected target.
826+ select {
827+ case <- ctx .Done ():
828+ t .Fatal ("Timeout when waiting for DNS resolver to be started" )
829+ case target := <- dnsTargetCh :
830+ got , want := target .Endpoint (), fmt .Sprintf ("%s:%d" , dnsHostName , dnsPort )
831+ if got != want {
832+ t .Fatalf ("DNS resolution started for target %q, want %q" , got , want )
833+ }
834+ }
835+
836+ // Push an error through the DNS resolver.
837+ dnsR .ReportError (errors .New ("some error" ))
838+
839+ // RPCs should work, higher level DNS cluster errors so should fallback to
840+ // EDS cluster.
841+ client := testgrpc .NewTestServiceClient (cc )
842+ peer := & peer.Peer {}
843+ if _ , err := client .EmptyCall (ctx , & testpb.Empty {}, grpc .Peer (peer ), grpc .WaitForReady (true )); err != nil {
844+ t .Fatalf ("EmptyCall() failed: %v" , err )
845+ }
846+ if peer .Addr .String () != addrs [0 ].Addr {
847+ t .Fatalf ("EmptyCall() routed to backend %q, want %q" , peer .Addr , addrs [0 ].Addr )
848+ }
849+ }
850+
701851// TestAggregateCluster_BadEDS_BadDNS tests the case where the top-level cluster
702852// is an aggregate cluster that resolves to an EDS and LOGICAL_DNS cluster. When
703853// the EDS request returns a resource that contains no endpoints, the test
704854// verifies that we switch to the DNS cluster. When the DNS cluster returns an
705- // error, the test verifies that RPCs fail with the error returned by the DNS
706- // resolver, and thus, ensures that pick_first (the leaf policy) does not ignore
707- // resolver errors.
855+ // error, the test verifies that RPCs fail with the error triggered by the DNS
856+ // Discovery Mechanism (from sending an empty address list down).
708857func (s ) TestAggregateCluster_BadEDS_BadDNS (t * testing.T ) {
709858 dnsTargetCh , _ , _ , dnsR , cleanup1 := setupDNS ()
710859 defer cleanup1 ()
@@ -769,14 +918,14 @@ func (s) TestAggregateCluster_BadEDS_BadDNS(t *testing.T) {
769918 dnsErr := fmt .Errorf ("DNS error" )
770919 dnsR .ReportError (dnsErr )
771920
772- // Ensure that the error returned from the DNS resolver is reported to the
773- // caller of the RPC .
921+ // Ensure that the error from the DNS Resolver leads to an empty address
922+ // update for both priorities .
774923 _ , err := client .EmptyCall (ctx , & testpb.Empty {})
775924 if code := status .Code (err ); code != codes .Unavailable {
776925 t .Fatalf ("EmptyCall() failed with code %s, want %s" , code , codes .Unavailable )
777926 }
778- if err == nil || ! strings .Contains (err .Error (), dnsErr . Error () ) {
779- t .Fatalf ("EmptyCall() failed with error %v, want %v " , err , dnsErr )
927+ if err == nil || ! strings .Contains (err .Error (), "produced zero addresses" ) {
928+ t .Fatalf ("EmptyCall() failed with error: %v, want: produced zero addresses " , err )
780929 }
781930}
782931
0 commit comments