From a2ccba69e5637383271b2f76a8432cc982c34db9 Mon Sep 17 00:00:00 2001 From: Harshavardhana Date: Wed, 6 May 2020 11:44:06 -0700 Subject: [PATCH] add kes retries upto two times with jitter backoff (#9527) KES calls are not retried and under certain situations when KES is under high load, the request should be retried automatically. --- cmd/crypto/kes.go | 107 ++++++++++++++++++++++++++++---------------- cmd/crypto/retry.go | 65 +++++++++++++++++++++++++++ go.sum | 2 + 3 files changed, 136 insertions(+), 38 deletions(-) create mode 100644 cmd/crypto/retry.go diff --git a/cmd/crypto/kes.go b/cmd/crypto/kes.go index bfc1df752..5c77bf216 100644 --- a/cmd/crypto/kes.go +++ b/cmd/crypto/kes.go @@ -1,4 +1,4 @@ -// MinIO Cloud Storage, (C) 2019 MinIO, Inc. +// MinIO Cloud Storage, (C) 2019-2020 MinIO, Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -19,6 +19,7 @@ import ( "crypto/tls" "crypto/x509" "encoding/json" + "errors" "fmt" "io" "io/ioutil" @@ -27,6 +28,10 @@ import ( "os" "path/filepath" "strings" + "time" + + xhttp "github.com/minio/minio/cmd/http" + xnet "github.com/minio/minio/pkg/net" ) // KesConfig contains the configuration required @@ -209,6 +214,18 @@ type kesClient struct { httpClient http.Client } +// Response KES response struct +type response struct { + Plaintext []byte `json:"plaintext"` + Ciphertext []byte `json:"ciphertext,omitempty"` +} + +// Request KES request struct +type request struct { + Ciphertext []byte `json:"ciphertext,omitempty"` + Context []byte `json:"context"` +} + // GenerateDataKey requests a new data key from the KES server. // On success, the KES server will respond with the plaintext key // and the ciphertext key as the plaintext key encrypted with @@ -218,10 +235,7 @@ type kesClient struct { // such that you have to provide the same context when decrypting // the data key. func (c *kesClient) GenerateDataKey(name string, context []byte) ([]byte, []byte, error) { - type Request struct { - Context []byte `json:"context"` - } - body, err := json.Marshal(Request{ + body, err := json.Marshal(request{ Context: context, }) if err != nil { @@ -229,25 +243,57 @@ func (c *kesClient) GenerateDataKey(name string, context []byte) ([]byte, []byte } url := fmt.Sprintf("%s/v1/key/generate/%s", c.addr, url.PathEscape(name)) - resp, err := c.httpClient.Post(url, "application/json", bytes.NewReader(body)) + + const limit = 1 << 20 // A plaintext/ciphertext key pair will never be larger than 1 MB + resp, err := c.postRetry(url, bytes.NewReader(body), limit) if err != nil { return nil, nil, err } + + return resp.Plaintext, resp.Ciphertext, nil +} + +func (c *kesClient) post(url string, body io.Reader, limit int64) (*response, error) { + resp, err := c.httpClient.Post(url, "application/json", body) + if err != nil { + return nil, err + } + + // Drain the entire body to make sure we have re-use connections + defer xhttp.DrainBody(resp.Body) + if resp.StatusCode != http.StatusOK { - return nil, nil, c.parseErrorResponse(resp) + return nil, c.parseErrorResponse(resp) } - defer resp.Body.Close() - type Response struct { - Plaintext []byte `json:"plaintext"` - Ciphertext []byte `json:"ciphertext"` + response := &response{} + if err = json.NewDecoder(io.LimitReader(resp.Body, limit)).Decode(response); err != nil { + return nil, err } - const limit = 1 << 20 // A plaintext/ciphertext key pair will never be larger than 1 MB - var response Response - if err = json.NewDecoder(io.LimitReader(resp.Body, limit)).Decode(&response); err != nil { - return nil, nil, err + return response, nil +} + +func (c *kesClient) postRetry(url string, body io.ReadSeeker, limit int64) (*response, error) { + for i := 0; ; i++ { + body.Seek(0, io.SeekStart) // seek to the beginning of the body. + + response, err := c.post(url, body, limit) + if err == nil { + return response, nil + } + + if !xnet.IsNetworkOrHostDown(err) && !errors.Is(err, io.EOF) && !errors.Is(err, io.ErrUnexpectedEOF) { + return nil, err + } + + // retriable network errors. + remain := retryMax - i + if remain <= 0 { + return response, err + } + + <-time.After(LinearJitterBackoff(retryWaitMin, retryWaitMax, i)) } - return response.Plaintext, response.Ciphertext, nil } // GenerateDataKey decrypts an encrypted data key with the key @@ -257,11 +303,7 @@ func (c *kesClient) GenerateDataKey(name string, context []byte) ([]byte, []byte // The optional context must match the value you provided when // generating the data key. func (c *kesClient) DecryptDataKey(name string, ciphertext, context []byte) ([]byte, error) { - type Request struct { - Ciphertext []byte `json:"ciphertext"` - Context []byte `json:"context"` - } - body, err := json.Marshal(Request{ + body, err := json.Marshal(request{ Ciphertext: ciphertext, Context: context, }) @@ -270,37 +312,26 @@ func (c *kesClient) DecryptDataKey(name string, ciphertext, context []byte) ([]b } url := fmt.Sprintf("%s/v1/key/decrypt/%s", c.addr, url.PathEscape(name)) - resp, err := c.httpClient.Post(url, "application/json", bytes.NewReader(body)) - if err != nil { - return nil, err - } - if resp.StatusCode != http.StatusOK { - return nil, c.parseErrorResponse(resp) - } - defer resp.Body.Close() - type Response struct { - Plaintext []byte `json:"plaintext"` - } const limit = 32 * 1024 // A data key will never be larger than 32 KB - var response Response - if err = json.NewDecoder(io.LimitReader(resp.Body, limit)).Decode(&response); err != nil { + resp, err := c.postRetry(url, bytes.NewReader(body), limit) + if err != nil { return nil, err } - return response.Plaintext, nil + return resp.Plaintext, nil } func (c *kesClient) parseErrorResponse(resp *http.Response) error { if resp.Body == nil { - return nil + return Errorf("%s: no body", http.StatusText(resp.StatusCode)) } - defer resp.Body.Close() const limit = 32 * 1024 // A (valid) error response will not be greater than 32 KB var errMsg strings.Builder if _, err := io.Copy(&errMsg, io.LimitReader(resp.Body, limit)); err != nil { - return err + return Errorf("%s: %s", http.StatusText(resp.StatusCode), err) } + return Errorf("%s: %s", http.StatusText(resp.StatusCode), errMsg.String()) } diff --git a/cmd/crypto/retry.go b/cmd/crypto/retry.go new file mode 100644 index 000000000..b03116586 --- /dev/null +++ b/cmd/crypto/retry.go @@ -0,0 +1,65 @@ +// MinIO Cloud Storage, (C) 2020 MinIO, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package crypto + +import ( + "math/rand" + "time" +) + +// default retry configuration +const ( + retryWaitMin = 500 * time.Millisecond // minimum retry limit. + retryWaitMax = 3 * time.Second // 3 secs worth of max retry. + retryMax = 2 +) + +// LinearJitterBackoff provides the time.Duration for a caller to +// perform linear backoff based on the attempt number and with jitter to +// prevent a thundering herd. +// +// min and max here are *not* absolute values. The number to be multiplied by +// the attempt number will be chosen at random from between them, thus they are +// bounding the jitter. +// +// For instance: +// * To get strictly linear backoff of one second increasing each retry, set +// both to one second (1s, 2s, 3s, 4s, ...) +// * To get a small amount of jitter centered around one second increasing each +// retry, set to around one second, such as a min of 800ms and max of 1200ms +// (892ms, 2102ms, 2945ms, 4312ms, ...) +// * To get extreme jitter, set to a very wide spread, such as a min of 100ms +// and a max of 20s (15382ms, 292ms, 51321ms, 35234ms, ...) +func LinearJitterBackoff(min, max time.Duration, attemptNum int) time.Duration { + // attemptNum always starts at zero but we want to start at 1 for multiplication + attemptNum++ + + if max <= min { + // Unclear what to do here, or they are the same, so return min * + // attemptNum + return min * time.Duration(attemptNum) + } + + // Seed rand; doing this every time is fine + rand := rand.New(rand.NewSource(int64(time.Now().Nanosecond()))) + + // Pick a random number that lies somewhere between the min and max and + // multiply by the attemptNum. attemptNum starts at zero so we always + // increment here. We first get a random percentage, then apply that to the + // difference between min and max, and add to min. + jitter := rand.Float64() * float64(max-min) + jitterMin := int64(jitter) + int64(min) + return time.Duration(jitterMin * int64(attemptNum)) +} diff --git a/go.sum b/go.sum index ac45b0d26..e01412390 100644 --- a/go.sum +++ b/go.sum @@ -233,6 +233,8 @@ github.com/minio/lsync v1.0.1/go.mod h1:tCFzfo0dlvdGl70IT4IAK/5Wtgb0/BrTmo/jE8pA github.com/minio/minio-go/v6 v6.0.53/go.mod h1:DIvC/IApeHX8q1BAMVCXSXwpmrmM+I+iBvhvztQorfI= github.com/minio/minio-go/v6 v6.0.55-0.20200424204115-7506d2996b22 h1:nZEve4vdUhwHBoV18zRvPDgjL6NYyDJE5QJvz3l9bRs= github.com/minio/minio-go/v6 v6.0.55-0.20200424204115-7506d2996b22/go.mod h1:KQMM+/44DSlSGSQWSfRrAZ12FVMmpWNuX37i2AX0jfI= +github.com/minio/minio-go/v6 v6.0.55-0.20200425081427-89eebdef2af0 h1:PdHKpM9h2vqCDr1AjJdK8e/6tRdOSjUNzIqeNmxu7ak= +github.com/minio/minio-go/v6 v6.0.55-0.20200425081427-89eebdef2af0/go.mod h1:KQMM+/44DSlSGSQWSfRrAZ12FVMmpWNuX37i2AX0jfI= github.com/minio/parquet-go v0.0.0-20200414234858-838cfa8aae61 h1:pUSI/WKPdd77gcuoJkSzhJ4wdS8OMDOsOu99MtpXEQA= github.com/minio/parquet-go v0.0.0-20200414234858-838cfa8aae61/go.mod h1:4trzEJ7N1nBTd5Tt7OCZT5SEin+WiAXpdJ/WgPkESA8= github.com/minio/sha256-simd v0.1.1 h1:5QHSlgo3nt5yKOJrC7W8w7X+NFl8cMPZm96iu8kKUJU=