fix: In federated setup dial all hosts to figure out online host (#10074)

In federated NAS gateway setups, multiple hosts in srvRecords
was picked at random which could mean that if one of the
host was down the request can indeed fail and if client
retries it would succeed. Instead allow server to figure
out the current online host quickly such that we can
exclude the host which is down.

At the max the attempt to look for a downed node is to
300 millisecond, if the node is taking longer to respond
than this value we simply ignore and move to the node,
total attempts are equal to number of srvRecords if no
server is online we simply fallback to last dialed host.
master
Harshavardhana 5 years ago committed by GitHub
parent 14b1c9f8e4
commit 3fe27c8411
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 34
      cmd/object-api-utils.go
  2. 18
      cmd/object-handlers.go

@ -346,18 +346,36 @@ func isMinioReservedBucket(bucketName string) bool {
// returns a slice of hosts by reading a slice of DNS records // returns a slice of hosts by reading a slice of DNS records
func getHostsSlice(records []dns.SrvRecord) []string { func getHostsSlice(records []dns.SrvRecord) []string {
var hosts []string hosts := make([]string, len(records))
for _, r := range records { for i, r := range records {
hosts = append(hosts, net.JoinHostPort(r.Host, string(r.Port))) hosts[i] = net.JoinHostPort(r.Host, string(r.Port))
} }
return hosts return hosts
} }
// returns a host (and corresponding port) from a slice of DNS records var rng = rand.New(rand.NewSource(time.Now().UTC().UnixNano()))
func getHostFromSrv(records []dns.SrvRecord) string {
rand.Seed(time.Now().Unix()) // returns an online host (and corresponding port) from a slice of DNS records
srvRecord := records[rand.Intn(len(records))] func getHostFromSrv(records []dns.SrvRecord) (host string) {
return net.JoinHostPort(srvRecord.Host, string(srvRecord.Port)) hosts := getHostsSlice(records)
var d net.Dialer
var retry int
for retry < len(hosts) {
ctx, cancel := context.WithTimeout(GlobalContext, 300*time.Millisecond)
host = hosts[rng.Intn(len(hosts))]
conn, err := d.DialContext(ctx, "tcp", host)
cancel()
if err != nil {
retry++
continue
}
conn.Close()
break
}
return host
} }
// IsCompressed returns true if the object is marked as compressed. // IsCompressed returns true if the object is marked as compressed.

@ -721,6 +721,11 @@ var getRemoteInstanceTransportOnce sync.Once
// Returns a minio-go Client configured to access remote host described by destDNSRecord // Returns a minio-go Client configured to access remote host described by destDNSRecord
// Applicable only in a federated deployment // Applicable only in a federated deployment
var getRemoteInstanceClient = func(r *http.Request, host string) (*miniogo.Core, error) { var getRemoteInstanceClient = func(r *http.Request, host string) (*miniogo.Core, error) {
getRemoteInstanceTransportOnce.Do(func() {
getRemoteInstanceTransport = NewGatewayHTTPTransport()
getRemoteInstanceTransportLongTO = newGatewayHTTPTransport(time.Hour)
})
cred := getReqAccessCred(r, globalServerRegion) cred := getReqAccessCred(r, globalServerRegion)
// In a federated deployment, all the instances share config files // In a federated deployment, all the instances share config files
// and hence expected to have same credentials. // and hence expected to have same credentials.
@ -732,10 +737,6 @@ var getRemoteInstanceClient = func(r *http.Request, host string) (*miniogo.Core,
if err != nil { if err != nil {
return nil, err return nil, err
} }
getRemoteInstanceTransportOnce.Do(func() {
getRemoteInstanceTransport = NewGatewayHTTPTransport()
getRemoteInstanceTransportLongTO = newGatewayHTTPTransport(time.Hour)
})
return core, nil return core, nil
} }
@ -743,6 +744,11 @@ var getRemoteInstanceClient = func(r *http.Request, host string) (*miniogo.Core,
// Applicable only in a federated deployment. // Applicable only in a federated deployment.
// The transport does not contain any timeout except for dialing. // The transport does not contain any timeout except for dialing.
func getRemoteInstanceClientLongTimeout(r *http.Request, host string) (*miniogo.Core, error) { func getRemoteInstanceClientLongTimeout(r *http.Request, host string) (*miniogo.Core, error) {
getRemoteInstanceTransportOnce.Do(func() {
getRemoteInstanceTransport = NewGatewayHTTPTransport()
getRemoteInstanceTransportLongTO = newGatewayHTTPTransport(time.Hour)
})
cred := getReqAccessCred(r, globalServerRegion) cred := getReqAccessCred(r, globalServerRegion)
// In a federated deployment, all the instances share config files // In a federated deployment, all the instances share config files
// and hence expected to have same credentials. // and hence expected to have same credentials.
@ -754,10 +760,6 @@ func getRemoteInstanceClientLongTimeout(r *http.Request, host string) (*miniogo.
if err != nil { if err != nil {
return nil, err return nil, err
} }
getRemoteInstanceTransportOnce.Do(func() {
getRemoteInstanceTransport = NewGatewayHTTPTransport()
getRemoteInstanceTransportLongTO = newGatewayHTTPTransport(time.Hour)
})
return core, nil return core, nil
} }

Loading…
Cancel
Save