From bc8b936d4bf0fb7c6cfdd9e777e3c5fa467844d1 Mon Sep 17 00:00:00 2001 From: Bala FA Date: Thu, 26 Oct 2017 10:17:07 -0700 Subject: [PATCH] convert ETag properly for all gateways (#5099) Previously ID/ETag from backend service is used as is which causes failure on s3cmd like tools where those tools use ETag as checksum to validate data. This is fixed by prepending "-1". Refer minio/mint#193 minio/mint#201 --- cmd/gateway-azure.go | 12 +++--------- cmd/gateway-azure_test.go | 17 ----------------- cmd/gateway-b2.go | 16 +++++++++------- cmd/gateway-gcs.go | 6 +++--- cmd/utils.go | 18 ++++++++++++++++++ cmd/utils_test.go | 19 +++++++++++++++++++ 6 files changed, 52 insertions(+), 36 deletions(-) diff --git a/cmd/gateway-azure.go b/cmd/gateway-azure.go index fd67be0c0..5718f856e 100644 --- a/cmd/gateway-azure.go +++ b/cmd/gateway-azure.go @@ -162,11 +162,6 @@ func azurePropertiesToS3Meta(meta storage.BlobMetadata, props storage.BlobProper return s3Metadata } -// Append "-1" to etag so that clients do not interpret it as MD5. -func azureToS3ETag(etag string) string { - return canonicalizeETag(etag) + "-1" -} - // azureObjects - Implements Object layer for Azure blob storage. type azureObjects struct { gatewayUnsupported @@ -420,7 +415,7 @@ func (a *azureObjects) ListObjects(bucket, prefix, marker, delimiter string, max Name: object.Name, ModTime: time.Time(object.Properties.LastModified), Size: object.Properties.ContentLength, - ETag: azureToS3ETag(object.Properties.Etag), + ETag: toS3ETag(object.Properties.Etag), ContentType: object.Properties.ContentType, ContentEncoding: object.Properties.ContentEncoding, }) @@ -510,7 +505,7 @@ func (a *azureObjects) GetObjectInfo(bucket, object string) (objInfo ObjectInfo, objInfo = ObjectInfo{ Bucket: bucket, UserDefined: meta, - ETag: azureToS3ETag(blob.Properties.Etag), + ETag: toS3ETag(blob.Properties.Etag), ModTime: time.Time(blob.Properties.LastModified), Name: object, Size: blob.Properties.ContentLength, @@ -629,8 +624,7 @@ func (a *azureObjects) PutObjectPart(bucket, object, uploadID string, partID int etag := data.MD5HexString() if etag == "" { - // Generate random ETag. - etag = azureToS3ETag(getMD5Hash([]byte(mustGetUUID()))) + etag = genETag() } subPartSize, subPartNumber := int64(azureBlockSize), 1 diff --git a/cmd/gateway-azure_test.go b/cmd/gateway-azure_test.go index 9000eefb6..a5dab2fb1 100644 --- a/cmd/gateway-azure_test.go +++ b/cmd/gateway-azure_test.go @@ -25,23 +25,6 @@ import ( "github.com/Azure/azure-sdk-for-go/storage" ) -// Test azureToS3ETag. -func TestAzureToS3ETag(t *testing.T) { - tests := []struct { - etag string - expected string - }{ - {`"etag"`, `etag-1`}, - {"etag", "etag-1"}, - } - for i, test := range tests { - got := azureToS3ETag(test.etag) - if got != test.expected { - t.Errorf("test %d: got:%s expected:%s", i+1, got, test.expected) - } - } -} - // Test canonical metadata. func TestS3MetaToAzureProperties(t *testing.T) { headers := map[string]string{ diff --git a/cmd/gateway-b2.go b/cmd/gateway-b2.go index a4f117562..d97e0e326 100644 --- a/cmd/gateway-b2.go +++ b/cmd/gateway-b2.go @@ -264,7 +264,7 @@ func (l *b2Objects) ListObjects(bucket string, prefix string, marker string, del Name: file.Name, ModTime: file.Timestamp, Size: file.Size, - ETag: file.Info.ID, + ETag: toS3ETag(file.Info.ID), ContentType: file.Info.ContentType, UserDefined: file.Info.Info, }) @@ -299,7 +299,7 @@ func (l *b2Objects) ListObjectsV2(bucket, prefix, continuationToken, delimiter s Name: file.Name, ModTime: file.Timestamp, Size: file.Size, - ETag: file.Info.ID, + ETag: toS3ETag(file.Info.ID), ContentType: file.Info.ContentType, UserDefined: file.Info.Info, }) @@ -346,7 +346,7 @@ func (l *b2Objects) GetObjectInfo(bucket string, object string) (objInfo ObjectI objInfo = ObjectInfo{ Bucket: bucket, Name: object, - ETag: fi.ID, + ETag: toS3ETag(fi.ID), Size: fi.Size, ModTime: fi.Timestamp, ContentType: fi.ContentType, @@ -452,7 +452,7 @@ func (l *b2Objects) PutObject(bucket string, object string, data *h2.Reader, met return ObjectInfo{ Bucket: bucket, Name: object, - ETag: fi.ID, + ETag: toS3ETag(fi.ID), Size: fi.Size, ModTime: fi.Timestamp, ContentType: fi.ContentType, @@ -566,7 +566,7 @@ func (l *b2Objects) PutObjectPart(bucket string, object string, uploadID string, return PartInfo{ PartNumber: partID, LastModified: UTCNow(), - ETag: sha1, + ETag: toS3ETag(sha1), Size: data.Size(), }, nil } @@ -597,7 +597,7 @@ func (l *b2Objects) ListObjectParts(bucket string, object string, uploadID strin for _, part := range partsList { lpi.Parts = append(lpi.Parts, PartInfo{ PartNumber: part.Number, - ETag: part.SHA1, + ETag: toS3ETag(part.SHA1), Size: part.Size, }) } @@ -627,7 +627,9 @@ func (l *b2Objects) CompleteMultipartUpload(bucket string, object string, upload if i+1 != uploadedPart.PartNumber { return oi, b2ToObjectError(traceError(InvalidPart{}), bucket, object, uploadID) } - hashes[uploadedPart.PartNumber] = uploadedPart.ETag + + // Trim "-1" suffix in ETag as PutObjectPart() treats B2 returned SHA1 as ETag. + hashes[uploadedPart.PartNumber] = strings.TrimSuffix(uploadedPart.ETag, "-1") } if _, err = bkt.File(uploadID, object).CompileParts(0, hashes).FinishLargeFile(l.ctx); err != nil { diff --git a/cmd/gateway-gcs.go b/cmd/gateway-gcs.go index afdeed6aa..a19a5b33c 100644 --- a/cmd/gateway-gcs.go +++ b/cmd/gateway-gcs.go @@ -581,7 +581,7 @@ func (l *gcsGateway) ListObjects(bucket string, prefix string, marker string, de Bucket: attrs.Bucket, ModTime: attrs.Updated, Size: attrs.Size, - ETag: fmt.Sprintf("%d", attrs.CRC32C), + ETag: toS3ETag(fmt.Sprintf("%d", attrs.CRC32C)), UserDefined: attrs.Metadata, ContentType: attrs.ContentType, ContentEncoding: attrs.ContentEncoding, @@ -723,7 +723,7 @@ func fromGCSAttrsToObjectInfo(attrs *storage.ObjectAttrs) ObjectInfo { Bucket: attrs.Bucket, ModTime: attrs.Updated, Size: attrs.Size, - ETag: fmt.Sprintf("%d", attrs.CRC32C), + ETag: toS3ETag(fmt.Sprintf("%d", attrs.CRC32C)), UserDefined: attrs.Metadata, ContentType: attrs.ContentType, ContentEncoding: attrs.ContentEncoding, @@ -858,7 +858,7 @@ func (l *gcsGateway) PutObjectPart(bucket string, key string, uploadID string, p etag := data.MD5HexString() if etag == "" { // Generate random ETag. - etag = getMD5Hash([]byte(mustGetUUID())) + etag = genETag() } object := l.client.Bucket(bucket).Object(gcsMultipartDataName(uploadID, partNumber, etag)) w := object.NewWriter(l.ctx) diff --git a/cmd/utils.go b/cmd/utils.go index 461917c43..d9277a329 100644 --- a/cmd/utils.go +++ b/cmd/utils.go @@ -211,3 +211,21 @@ func checkURL(urlStr string) (*url.URL, error) { func UTCNow() time.Time { return time.Now().UTC() } + +// genETag - generate UUID based ETag +func genETag() string { + return toS3ETag(getMD5Hash([]byte(mustGetUUID()))) +} + +// toS3ETag - return checksum to ETag +func toS3ETag(etag string) string { + etag = canonicalizeETag(etag) + + if !strings.HasSuffix(etag, "-1") { + // Tools like s3cmd uses ETag as checksum of data to validate. + // Append "-1" to indicate ETag is not a checksum. + etag += "-1" + } + + return etag +} diff --git a/cmd/utils_test.go b/cmd/utils_test.go index f364ce4f7..4ddf4d4f1 100644 --- a/cmd/utils_test.go +++ b/cmd/utils_test.go @@ -293,3 +293,22 @@ func TestDumpRequest(t *testing.T) { t.Fatalf("Expected %#v, got %#v", expectedHeader, res.Header) } } + +// Test toS3ETag() +func TestToS3ETag(t *testing.T) { + testCases := []struct { + etag string + expectedETag string + }{ + {`"8019e762"`, `8019e762-1`}, + {"5d57546eeb86b3eba68967292fba0644", "5d57546eeb86b3eba68967292fba0644-1"}, + {`"8019e762-1"`, `8019e762-1`}, + {"5d57546eeb86b3eba68967292fba0644-1", "5d57546eeb86b3eba68967292fba0644-1"}, + } + for i, testCase := range testCases { + etag := toS3ETag(testCase.etag) + if etag != testCase.expectedETag { + t.Fatalf("test %v: expected: %v, got: %v", i+1, testCase.expectedETag, etag) + } + } +}