From 3fe27c84114e058a1e6936baca157d0229724133 Mon Sep 17 00:00:00 2001 From: Harshavardhana Date: Fri, 17 Jul 2020 14:25:47 -0700 Subject: [PATCH] fix: In federated setup dial all hosts to figure out online host (#10074) In federated NAS gateway setups, multiple hosts in srvRecords was picked at random which could mean that if one of the host was down the request can indeed fail and if client retries it would succeed. Instead allow server to figure out the current online host quickly such that we can exclude the host which is down. At the max the attempt to look for a downed node is to 300 millisecond, if the node is taking longer to respond than this value we simply ignore and move to the node, total attempts are equal to number of srvRecords if no server is online we simply fallback to last dialed host. --- cmd/object-api-utils.go | 34 ++++++++++++++++++++++++++-------- cmd/object-handlers.go | 18 ++++++++++-------- 2 files changed, 36 insertions(+), 16 deletions(-) diff --git a/cmd/object-api-utils.go b/cmd/object-api-utils.go index 423f81a48..a079ba7aa 100644 --- a/cmd/object-api-utils.go +++ b/cmd/object-api-utils.go @@ -346,18 +346,36 @@ func isMinioReservedBucket(bucketName string) bool { // returns a slice of hosts by reading a slice of DNS records func getHostsSlice(records []dns.SrvRecord) []string { - var hosts []string - for _, r := range records { - hosts = append(hosts, net.JoinHostPort(r.Host, string(r.Port))) + hosts := make([]string, len(records)) + for i, r := range records { + hosts[i] = net.JoinHostPort(r.Host, string(r.Port)) } return hosts } -// returns a host (and corresponding port) from a slice of DNS records -func getHostFromSrv(records []dns.SrvRecord) string { - rand.Seed(time.Now().Unix()) - srvRecord := records[rand.Intn(len(records))] - return net.JoinHostPort(srvRecord.Host, string(srvRecord.Port)) +var rng = rand.New(rand.NewSource(time.Now().UTC().UnixNano())) + +// returns an online host (and corresponding port) from a slice of DNS records +func getHostFromSrv(records []dns.SrvRecord) (host string) { + hosts := getHostsSlice(records) + + var d net.Dialer + var retry int + for retry < len(hosts) { + ctx, cancel := context.WithTimeout(GlobalContext, 300*time.Millisecond) + + host = hosts[rng.Intn(len(hosts))] + conn, err := d.DialContext(ctx, "tcp", host) + cancel() + if err != nil { + retry++ + continue + } + conn.Close() + break + } + + return host } // IsCompressed returns true if the object is marked as compressed. diff --git a/cmd/object-handlers.go b/cmd/object-handlers.go index 3810bc682..f1a123147 100644 --- a/cmd/object-handlers.go +++ b/cmd/object-handlers.go @@ -721,6 +721,11 @@ var getRemoteInstanceTransportOnce sync.Once // Returns a minio-go Client configured to access remote host described by destDNSRecord // Applicable only in a federated deployment var getRemoteInstanceClient = func(r *http.Request, host string) (*miniogo.Core, error) { + getRemoteInstanceTransportOnce.Do(func() { + getRemoteInstanceTransport = NewGatewayHTTPTransport() + getRemoteInstanceTransportLongTO = newGatewayHTTPTransport(time.Hour) + }) + cred := getReqAccessCred(r, globalServerRegion) // In a federated deployment, all the instances share config files // and hence expected to have same credentials. @@ -732,10 +737,6 @@ var getRemoteInstanceClient = func(r *http.Request, host string) (*miniogo.Core, if err != nil { return nil, err } - getRemoteInstanceTransportOnce.Do(func() { - getRemoteInstanceTransport = NewGatewayHTTPTransport() - getRemoteInstanceTransportLongTO = newGatewayHTTPTransport(time.Hour) - }) return core, nil } @@ -743,6 +744,11 @@ var getRemoteInstanceClient = func(r *http.Request, host string) (*miniogo.Core, // Applicable only in a federated deployment. // The transport does not contain any timeout except for dialing. func getRemoteInstanceClientLongTimeout(r *http.Request, host string) (*miniogo.Core, error) { + getRemoteInstanceTransportOnce.Do(func() { + getRemoteInstanceTransport = NewGatewayHTTPTransport() + getRemoteInstanceTransportLongTO = newGatewayHTTPTransport(time.Hour) + }) + cred := getReqAccessCred(r, globalServerRegion) // In a federated deployment, all the instances share config files // and hence expected to have same credentials. @@ -754,10 +760,6 @@ func getRemoteInstanceClientLongTimeout(r *http.Request, host string) (*miniogo. if err != nil { return nil, err } - getRemoteInstanceTransportOnce.Do(func() { - getRemoteInstanceTransport = NewGatewayHTTPTransport() - getRemoteInstanceTransportLongTO = newGatewayHTTPTransport(time.Hour) - }) return core, nil }