parent
63c65b4635
commit
293d246f95
@ -0,0 +1,4 @@ |
||||
{ |
||||
"format": "fs", |
||||
"version": "1" |
||||
} |
@ -0,0 +1,14 @@ |
||||
{ |
||||
"version": "1", |
||||
"format": "fs", |
||||
"minio": { |
||||
"release": "DEVELOPMENT.GOGET" |
||||
}, |
||||
"parts": [ |
||||
{ |
||||
"name": "object1", |
||||
"size": 29, |
||||
"eTag": "", |
||||
}, |
||||
] |
||||
} |
@ -0,0 +1,10 @@ |
||||
{ |
||||
"version": "1", |
||||
"format": "fs", |
||||
"uploadIds": [ |
||||
{ |
||||
"uploadID": "id", |
||||
"startTime": "time", |
||||
} |
||||
] |
||||
} |
@ -0,0 +1,20 @@ |
||||
{ |
||||
"xl": { |
||||
"jbod": [ |
||||
"8aa2b1bc-0e5a-49e0-8221-05228336b040", |
||||
"3467a69b-0266-478a-9e10-e819447e4545", |
||||
"d4a4505b-4e4f-4864-befd-4f36adb0bc66", |
||||
"592b6583-ca26-47af-b991-ba6d097e34e8", |
||||
"c7ef69f0-dbf5-4c0e-b167-d30a441bad7e", |
||||
"f0b36ea3-fe96-4f2b-bced-22c7f33e0e0c", |
||||
"b83abf39-e39d-4e7b-8e16-6f9953455a48", |
||||
"7d63dfc9-5441-4243-bd36-de8db0691982", |
||||
"c1bbffc5-81f9-4251-9398-33a959b3ce37", |
||||
"64408f94-26e0-4277-9593-2d703f4d5a91" |
||||
], |
||||
"disk": "8aa2b1bc-0e5a-49e0-8221-05228336b040", |
||||
"version": "1" |
||||
}, |
||||
"format": "xl", |
||||
"version": "1" |
||||
} |
@ -0,0 +1,10 @@ |
||||
{ |
||||
"version": "1", |
||||
"format": "xl", |
||||
"uploadIds": [ |
||||
{ |
||||
"uploadID": "id", |
||||
"startTime": "time", |
||||
} |
||||
] |
||||
} |
@ -0,0 +1,44 @@ |
||||
{ |
||||
"parts": [ |
||||
{ |
||||
"size": 5242880, |
||||
"etag": "3565c6e741e69a007a5ac7db893a62b5", |
||||
"name": "object1" |
||||
}, |
||||
{ |
||||
"size": 5242880, |
||||
"etag": "d416712335c280ab1e39498552937764", |
||||
"name": "object2" |
||||
}, |
||||
{ |
||||
"size": 4338324, |
||||
"etag": "8a98c5c54d81c6c95ed9bdcaeb941aaf", |
||||
"name": "object3" |
||||
} |
||||
], |
||||
"meta": { |
||||
"md5Sum": "97586a5290d4f5a41328062d6a7da593-3", |
||||
"content-type": "application\/octet-stream", |
||||
"content-encoding": "" |
||||
}, |
||||
"minio": { |
||||
"release": "DEVELOPMENT.GOGET" |
||||
}, |
||||
"erasure": { |
||||
"index": 2, |
||||
"distribution": [ 1, 3, 4, 2, 5, 8, 7, 6, 9 ], |
||||
"blockSize": 4194304, |
||||
"parity": 5, |
||||
"data": 5 |
||||
}, |
||||
"checksum": { |
||||
"enable": false, |
||||
}, |
||||
"stat": { |
||||
"version": 0, |
||||
"modTime": "2016-05-24T00:09:40.122390255Z", |
||||
"size": 14824084 |
||||
}, |
||||
"format": "xl", |
||||
"version": "1" |
||||
} |
@ -0,0 +1,172 @@ |
||||
/* |
||||
* Minio Cloud Storage, (C) 2016 Minio, Inc. |
||||
* |
||||
* Licensed under the Apache License, Version 2.0 (the "License"); |
||||
* you may not use this file except in compliance with the License. |
||||
* You may obtain a copy of the License at |
||||
* |
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
* |
||||
* Unless required by applicable law or agreed to in writing, software |
||||
* distributed under the License is distributed on an "AS IS" BASIS, |
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
* See the License for the specific language governing permissions and |
||||
* limitations under the License. |
||||
*/ |
||||
|
||||
package main |
||||
|
||||
import ( |
||||
"io" |
||||
"sync" |
||||
) |
||||
|
||||
// cleanupCreateFileOps - cleans up all the temporary files and other
|
||||
// temporary data upon any failure.
|
||||
func (e erasure) cleanupCreateFileOps(volume, path string, writers []io.WriteCloser) { |
||||
// Close and remove temporary writers.
|
||||
for _, writer := range writers { |
||||
if err := safeCloseAndRemove(writer); err != nil { |
||||
errorIf(err, "Failed to close writer.") |
||||
} |
||||
} |
||||
// Remove any temporary written data.
|
||||
for _, disk := range e.storageDisks { |
||||
if err := disk.DeleteFile(volume, path); err != nil { |
||||
errorIf(err, "Unable to delete file.") |
||||
} |
||||
} |
||||
} |
||||
|
||||
// WriteErasure reads predefined blocks, encodes them and writes to
|
||||
// configured storage disks.
|
||||
func (e erasure) writeErasure(volume, path string, reader *io.PipeReader, wcloser *waitCloser) { |
||||
// Release the block writer upon function return.
|
||||
defer wcloser.release() |
||||
|
||||
writers := make([]io.WriteCloser, len(e.storageDisks)) |
||||
|
||||
// Initialize all writers.
|
||||
for index, disk := range e.storageDisks { |
||||
writer, err := disk.CreateFile(volume, path) |
||||
if err != nil { |
||||
e.cleanupCreateFileOps(volume, path, writers) |
||||
reader.CloseWithError(err) |
||||
return |
||||
} |
||||
writers[index] = writer |
||||
} |
||||
|
||||
// Allocate 4MiB block size buffer for reading.
|
||||
dataBuffer := make([]byte, erasureBlockSize) |
||||
for { |
||||
// Read up to allocated block size.
|
||||
n, err := io.ReadFull(reader, dataBuffer) |
||||
if err != nil { |
||||
// Any unexpected errors, close the pipe reader with error.
|
||||
if err != io.ErrUnexpectedEOF && err != io.EOF { |
||||
// Remove all temp writers.
|
||||
e.cleanupCreateFileOps(volume, path, writers) |
||||
reader.CloseWithError(err) |
||||
return |
||||
} |
||||
} |
||||
// At EOF break out.
|
||||
if err == io.EOF { |
||||
break |
||||
} |
||||
if n > 0 { |
||||
// Split the input buffer into data and parity blocks.
|
||||
var dataBlocks [][]byte |
||||
dataBlocks, err = e.ReedSolomon.Split(dataBuffer[0:n]) |
||||
if err != nil { |
||||
// Remove all temp writers.
|
||||
e.cleanupCreateFileOps(volume, path, writers) |
||||
reader.CloseWithError(err) |
||||
return |
||||
} |
||||
|
||||
// Encode parity blocks using data blocks.
|
||||
err = e.ReedSolomon.Encode(dataBlocks) |
||||
if err != nil { |
||||
// Remove all temp writers upon error.
|
||||
e.cleanupCreateFileOps(volume, path, writers) |
||||
reader.CloseWithError(err) |
||||
return |
||||
} |
||||
|
||||
var wg = &sync.WaitGroup{} |
||||
var wErrs = make([]error, len(writers)) |
||||
// Write encoded data to quorum disks in parallel.
|
||||
for index, writer := range writers { |
||||
if writer == nil { |
||||
continue |
||||
} |
||||
wg.Add(1) |
||||
// Write encoded data in routine.
|
||||
go func(index int, writer io.Writer) { |
||||
defer wg.Done() |
||||
encodedData := dataBlocks[index] |
||||
_, wErr := writers[index].Write(encodedData) |
||||
if wErr != nil { |
||||
wErrs[index] = wErr |
||||
return |
||||
} |
||||
wErrs[index] = nil |
||||
}(index, writer) |
||||
} |
||||
wg.Wait() |
||||
|
||||
// Cleanup and return on first non-nil error.
|
||||
for _, wErr := range wErrs { |
||||
if wErr == nil { |
||||
continue |
||||
} |
||||
// Remove all temp writers upon error.
|
||||
e.cleanupCreateFileOps(volume, path, writers) |
||||
reader.CloseWithError(wErr) |
||||
return |
||||
} |
||||
} |
||||
} |
||||
|
||||
// Close all writers and metadata writers in routines.
|
||||
for _, writer := range writers { |
||||
if writer == nil { |
||||
continue |
||||
} |
||||
// Safely wrote, now rename to its actual location.
|
||||
if err := writer.Close(); err != nil { |
||||
// Remove all temp writers upon error.
|
||||
e.cleanupCreateFileOps(volume, path, writers) |
||||
reader.CloseWithError(err) |
||||
return |
||||
} |
||||
} |
||||
|
||||
// Close the pipe reader and return.
|
||||
reader.Close() |
||||
return |
||||
} |
||||
|
||||
// CreateFile - create a file.
|
||||
func (e erasure) CreateFile(volume, path string) (writeCloser io.WriteCloser, err error) { |
||||
if !isValidVolname(volume) { |
||||
return nil, errInvalidArgument |
||||
} |
||||
if !isValidPath(path) { |
||||
return nil, errInvalidArgument |
||||
} |
||||
|
||||
// Initialize pipe for data pipe line.
|
||||
pipeReader, pipeWriter := io.Pipe() |
||||
|
||||
// Initialize a new wait closer, implements both Write and Close.
|
||||
wcloser := newWaitCloser(pipeWriter) |
||||
|
||||
// Start erasure encoding in routine, reading data block by block from pipeReader.
|
||||
go e.writeErasure(volume, path, pipeReader, wcloser) |
||||
|
||||
// Return the writer, caller should start writing to this.
|
||||
return wcloser, nil |
||||
} |
@ -0,0 +1,60 @@ |
||||
/* |
||||
* Minio Cloud Storage, (C) 2016 Minio, Inc. |
||||
* |
||||
* Licensed under the Apache License, Version 2.0 (the "License"); |
||||
* you may not use this file except in compliance with the License. |
||||
* You may obtain a copy of the License at |
||||
* |
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
* |
||||
* Unless required by applicable law or agreed to in writing, software |
||||
* distributed under the License is distributed on an "AS IS" BASIS, |
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
* See the License for the specific language governing permissions and |
||||
* limitations under the License. |
||||
*/ |
||||
|
||||
package main |
||||
|
||||
import ( |
||||
"errors" |
||||
|
||||
"github.com/klauspost/reedsolomon" |
||||
) |
||||
|
||||
// erasure storage layer.
|
||||
type erasure struct { |
||||
ReedSolomon reedsolomon.Encoder // Erasure encoder/decoder.
|
||||
DataBlocks int |
||||
ParityBlocks int |
||||
storageDisks []StorageAPI |
||||
} |
||||
|
||||
// errUnexpected - returned for any unexpected error.
|
||||
var errUnexpected = errors.New("Unexpected error - please report at https://github.com/minio/minio/issues") |
||||
|
||||
// newErasure instantiate a new erasure.
|
||||
func newErasure(disks []StorageAPI) (*erasure, error) { |
||||
// Initialize E.
|
||||
e := &erasure{} |
||||
|
||||
// Calculate data and parity blocks.
|
||||
dataBlocks, parityBlocks := len(disks)/2, len(disks)/2 |
||||
|
||||
// Initialize reed solomon encoding.
|
||||
rs, err := reedsolomon.New(dataBlocks, parityBlocks) |
||||
if err != nil { |
||||
return nil, err |
||||
} |
||||
|
||||
// Save the reedsolomon.
|
||||
e.DataBlocks = dataBlocks |
||||
e.ParityBlocks = parityBlocks |
||||
e.ReedSolomon = rs |
||||
|
||||
// Save all the initialized storage disks.
|
||||
e.storageDisks = disks |
||||
|
||||
// Return successfully initialized.
|
||||
return e, nil |
||||
} |
@ -1,150 +0,0 @@ |
||||
/* |
||||
* Minio Cloud Storage, (C) 2016 Minio, Inc. |
||||
* |
||||
* Licensed under the Apache License, Version 2.0 (the "License"); |
||||
* you may not use this file except in compliance with the License. |
||||
* You may obtain a copy of the License at |
||||
* |
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
* |
||||
* Unless required by applicable law or agreed to in writing, software |
||||
* distributed under the License is distributed on an "AS IS" BASIS, |
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
* See the License for the specific language governing permissions and |
||||
* limitations under the License. |
||||
*/ |
||||
|
||||
package main |
||||
|
||||
import ( |
||||
"fmt" |
||||
"io" |
||||
"path" |
||||
) |
||||
|
||||
// ListMultipartUploads - list multipart uploads.
|
||||
func (fs fsObjects) ListMultipartUploads(bucket, prefix, keyMarker, uploadIDMarker, delimiter string, maxUploads int) (ListMultipartsInfo, error) { |
||||
return listMultipartUploadsCommon(fs, bucket, prefix, keyMarker, uploadIDMarker, delimiter, maxUploads) |
||||
} |
||||
|
||||
// NewMultipartUpload - initialize a new multipart upload, returns a unique id.
|
||||
func (fs fsObjects) NewMultipartUpload(bucket, object string, meta map[string]string) (string, error) { |
||||
meta = make(map[string]string) // Reset the meta value, we are not going to save headers for fs.
|
||||
return newMultipartUploadCommon(fs.storage, bucket, object, meta) |
||||
} |
||||
|
||||
// PutObjectPart - writes the multipart upload chunks.
|
||||
func (fs fsObjects) PutObjectPart(bucket, object, uploadID string, partID int, size int64, data io.Reader, md5Hex string) (string, error) { |
||||
return putObjectPartCommon(fs.storage, bucket, object, uploadID, partID, size, data, md5Hex) |
||||
} |
||||
|
||||
func (fs fsObjects) ListObjectParts(bucket, object, uploadID string, partNumberMarker, maxParts int) (ListPartsInfo, error) { |
||||
return listObjectPartsCommon(fs.storage, bucket, object, uploadID, partNumberMarker, maxParts) |
||||
} |
||||
|
||||
func (fs fsObjects) CompleteMultipartUpload(bucket string, object string, uploadID string, parts []completePart) (string, error) { |
||||
// Verify if bucket is valid.
|
||||
if !IsValidBucketName(bucket) { |
||||
return "", BucketNameInvalid{Bucket: bucket} |
||||
} |
||||
// Verify whether the bucket exists.
|
||||
if !isBucketExist(fs.storage, bucket) { |
||||
return "", BucketNotFound{Bucket: bucket} |
||||
} |
||||
if !IsValidObjectName(object) { |
||||
return "", ObjectNameInvalid{ |
||||
Bucket: bucket, |
||||
Object: object, |
||||
} |
||||
} |
||||
if !isUploadIDExists(fs.storage, bucket, object, uploadID) { |
||||
return "", InvalidUploadID{UploadID: uploadID} |
||||
} |
||||
|
||||
// Calculate s3 compatible md5sum for complete multipart.
|
||||
s3MD5, err := completeMultipartMD5(parts...) |
||||
if err != nil { |
||||
return "", err |
||||
} |
||||
|
||||
tempObj := path.Join(tmpMetaPrefix, bucket, object, uploadID, incompleteFile) |
||||
fileWriter, err := fs.storage.CreateFile(minioMetaBucket, tempObj) |
||||
if err != nil { |
||||
return "", toObjectErr(err, bucket, object) |
||||
} |
||||
|
||||
// Loop through all parts, validate them and then commit to disk.
|
||||
for i, part := range parts { |
||||
// Construct part suffix.
|
||||
partSuffix := fmt.Sprintf("%.5d.%s", part.PartNumber, part.ETag) |
||||
multipartPartFile := path.Join(mpartMetaPrefix, bucket, object, uploadID, partSuffix) |
||||
var fi FileInfo |
||||
fi, err = fs.storage.StatFile(minioMetaBucket, multipartPartFile) |
||||
if err != nil { |
||||
if err == errFileNotFound { |
||||
return "", InvalidPart{} |
||||
} |
||||
return "", err |
||||
} |
||||
// All parts except the last part has to be atleast 5MB.
|
||||
if (i < len(parts)-1) && !isMinAllowedPartSize(fi.Size) { |
||||
return "", PartTooSmall{} |
||||
} |
||||
var fileReader io.ReadCloser |
||||
fileReader, err = fs.storage.ReadFile(minioMetaBucket, multipartPartFile, 0) |
||||
if err != nil { |
||||
if clErr := safeCloseAndRemove(fileWriter); clErr != nil { |
||||
return "", clErr |
||||
} |
||||
if err == errFileNotFound { |
||||
return "", InvalidPart{} |
||||
} |
||||
return "", err |
||||
} |
||||
_, err = io.Copy(fileWriter, fileReader) |
||||
if err != nil { |
||||
if clErr := safeCloseAndRemove(fileWriter); clErr != nil { |
||||
return "", clErr |
||||
} |
||||
return "", err |
||||
} |
||||
err = fileReader.Close() |
||||
if err != nil { |
||||
if clErr := safeCloseAndRemove(fileWriter); clErr != nil { |
||||
return "", clErr |
||||
} |
||||
return "", err |
||||
} |
||||
} |
||||
|
||||
err = fileWriter.Close() |
||||
if err != nil { |
||||
if clErr := safeCloseAndRemove(fileWriter); clErr != nil { |
||||
return "", clErr |
||||
} |
||||
return "", err |
||||
} |
||||
|
||||
// Rename the file back to original location, if not delete the
|
||||
// temporary object.
|
||||
err = fs.storage.RenameFile(minioMetaBucket, tempObj, bucket, object) |
||||
if err != nil { |
||||
if derr := fs.storage.DeleteFile(minioMetaBucket, tempObj); derr != nil { |
||||
return "", toObjectErr(derr, minioMetaBucket, tempObj) |
||||
} |
||||
return "", toObjectErr(err, bucket, object) |
||||
} |
||||
|
||||
// Cleanup all the parts if everything else has been safely committed.
|
||||
if err = cleanupUploadedParts(fs.storage, bucket, object, uploadID); err != nil { |
||||
return "", err |
||||
} |
||||
|
||||
// Return md5sum.
|
||||
return s3MD5, nil |
||||
} |
||||
|
||||
// AbortMultipartUpload - aborts a multipart upload.
|
||||
func (fs fsObjects) AbortMultipartUpload(bucket, object, uploadID string) error { |
||||
return abortMultipartUploadCommon(fs.storage, bucket, object, uploadID) |
||||
} |
@ -0,0 +1,106 @@ |
||||
package main |
||||
|
||||
import ( |
||||
"bytes" |
||||
"encoding/json" |
||||
"io" |
||||
"path" |
||||
"sort" |
||||
) |
||||
|
||||
// A fsMetaV1 represents a metadata header mapping keys to sets of values.
|
||||
type fsMetaV1 struct { |
||||
Version string `json:"version"` |
||||
Format string `json:"format"` |
||||
Minio struct { |
||||
Release string `json:"release"` |
||||
} `json:"minio"` |
||||
Checksum struct { |
||||
Enable bool `json:"enable"` |
||||
} `json:"checksum"` |
||||
Parts []objectPartInfo `json:"parts,omitempty"` |
||||
} |
||||
|
||||
// ReadFrom - read from implements io.ReaderFrom interface for
|
||||
// unmarshalling fsMetaV1.
|
||||
func (m *fsMetaV1) ReadFrom(reader io.Reader) (n int64, err error) { |
||||
var buffer bytes.Buffer |
||||
n, err = buffer.ReadFrom(reader) |
||||
if err != nil { |
||||
return 0, err |
||||
} |
||||
err = json.Unmarshal(buffer.Bytes(), m) |
||||
return n, err |
||||
} |
||||
|
||||
// WriteTo - write to implements io.WriterTo interface for marshalling fsMetaV1.
|
||||
func (m fsMetaV1) WriteTo(writer io.Writer) (n int64, err error) { |
||||
metadataBytes, err := json.Marshal(m) |
||||
if err != nil { |
||||
return 0, err |
||||
} |
||||
p, err := writer.Write(metadataBytes) |
||||
return int64(p), err |
||||
} |
||||
|
||||
// SearchObjectPart - search object part name and etag.
|
||||
func (m fsMetaV1) SearchObjectPart(name string, etag string) int { |
||||
for i, part := range m.Parts { |
||||
if name == part.Name && etag == part.ETag { |
||||
return i |
||||
} |
||||
} |
||||
return -1 |
||||
} |
||||
|
||||
// AddObjectPart - add a new object part in order.
|
||||
func (m *fsMetaV1) AddObjectPart(name string, etag string, size int64) { |
||||
m.Parts = append(m.Parts, objectPartInfo{ |
||||
Name: name, |
||||
ETag: etag, |
||||
Size: size, |
||||
}) |
||||
sort.Sort(byPartName(m.Parts)) |
||||
} |
||||
|
||||
const ( |
||||
fsMetaJSONFile = "fs.json" |
||||
) |
||||
|
||||
// readFSMetadata - read `fs.json`.
|
||||
func (fs fsObjects) readFSMetadata(bucket, object string) (fsMeta fsMetaV1, err error) { |
||||
r, err := fs.storage.ReadFile(bucket, path.Join(object, fsMetaJSONFile), int64(0)) |
||||
if err != nil { |
||||
return fsMetaV1{}, err |
||||
} |
||||
defer r.Close() |
||||
_, err = fsMeta.ReadFrom(r) |
||||
if err != nil { |
||||
return fsMetaV1{}, err |
||||
} |
||||
return fsMeta, nil |
||||
} |
||||
|
||||
// writeFSMetadata - write `fs.json`.
|
||||
func (fs fsObjects) writeFSMetadata(bucket, prefix string, fsMeta fsMetaV1) error { |
||||
// Initialize metadata map, save all erasure related metadata.
|
||||
fsMeta.Minio.Release = minioReleaseTag |
||||
w, err := fs.storage.CreateFile(bucket, path.Join(prefix, fsMetaJSONFile)) |
||||
if err != nil { |
||||
return err |
||||
} |
||||
_, err = fsMeta.WriteTo(w) |
||||
if err != nil { |
||||
if mErr := safeCloseAndRemove(w); mErr != nil { |
||||
return mErr |
||||
} |
||||
return err |
||||
} |
||||
if err = w.Close(); err != nil { |
||||
if mErr := safeCloseAndRemove(w); mErr != nil { |
||||
return mErr |
||||
} |
||||
return err |
||||
} |
||||
return nil |
||||
} |
@ -0,0 +1,265 @@ |
||||
/* |
||||
* Minio Cloud Storage, (C) 2016 Minio, Inc. |
||||
* |
||||
* Licensed under the Apache License, Version 2.0 (the "License"); |
||||
* you may not use this file except in compliance with the License. |
||||
* You may obtain a copy of the License at |
||||
* |
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
* |
||||
* Unless required by applicable law or agreed to in writing, software |
||||
* distributed under the License is distributed on an "AS IS" BASIS, |
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
* See the License for the specific language governing permissions and |
||||
* limitations under the License. |
||||
*/ |
||||
|
||||
package main |
||||
|
||||
import ( |
||||
"math/rand" |
||||
"path" |
||||
"sort" |
||||
"strings" |
||||
"time" |
||||
) |
||||
|
||||
// listParams - list object params used for list object map
|
||||
type listParams struct { |
||||
bucket string |
||||
recursive bool |
||||
marker string |
||||
prefix string |
||||
} |
||||
|
||||
// Tree walk result carries results of tree walking.
|
||||
type treeWalkResult struct { |
||||
objInfo ObjectInfo |
||||
err error |
||||
end bool |
||||
} |
||||
|
||||
// Tree walk notify carries a channel which notifies tree walk
|
||||
// results, additionally it also carries information if treeWalk
|
||||
// should be timedOut.
|
||||
type treeWalker struct { |
||||
ch <-chan treeWalkResult |
||||
timedOut bool |
||||
} |
||||
|
||||
// listDir - listDir.
|
||||
func (xl xlObjects) listDir(bucket, prefixDir string, filter func(entry string) bool) (entries []string, err error) { |
||||
// Count for list errors encountered.
|
||||
var listErrCount = 0 |
||||
|
||||
// Loop through and return the first success entry based on the
|
||||
// selected random disk.
|
||||
for listErrCount < len(xl.storageDisks) { |
||||
// Choose a random disk on each attempt, do not hit the same disk all the time.
|
||||
randIndex := rand.Intn(len(xl.storageDisks) - 1) |
||||
disk := xl.storageDisks[randIndex] // Pick a random disk.
|
||||
if entries, err = disk.ListDir(bucket, prefixDir); err == nil { |
||||
// Skip the entries which do not match the filter.
|
||||
for i, entry := range entries { |
||||
if filter(entry) { |
||||
entries[i] = "" |
||||
continue |
||||
} |
||||
if strings.HasSuffix(entry, slashSeparator) && xl.isObject(bucket, path.Join(prefixDir, entry)) { |
||||
entries[i] = strings.TrimSuffix(entry, slashSeparator) |
||||
} |
||||
} |
||||
sort.Strings(entries) |
||||
// Skip the empty strings
|
||||
for len(entries) > 0 && entries[0] == "" { |
||||
entries = entries[1:] |
||||
} |
||||
return entries, nil |
||||
} |
||||
listErrCount++ // Update list error count.
|
||||
} |
||||
|
||||
// Return error at the end.
|
||||
return nil, err |
||||
} |
||||
|
||||
// getRandomDisk - gives a random disk at any point in time from the
|
||||
// available disk pool.
|
||||
func (xl xlObjects) getRandomDisk() (disk StorageAPI) { |
||||
randIndex := rand.Intn(len(xl.storageDisks) - 1) |
||||
disk = xl.storageDisks[randIndex] // Pick a random disk.
|
||||
return disk |
||||
} |
||||
|
||||
// treeWalkXL walks directory tree recursively pushing fileInfo into the channel as and when it encounters files.
|
||||
func (xl xlObjects) treeWalkXL(bucket, prefixDir, entryPrefixMatch, marker string, recursive bool, send func(treeWalkResult) bool, count *int) bool { |
||||
// Example:
|
||||
// if prefixDir="one/two/three/" and marker="four/five.txt" treeWalk is recursively
|
||||
// called with prefixDir="one/two/three/four/" and marker="five.txt"
|
||||
|
||||
// Convert entry to FileInfo
|
||||
entryToObjectInfo := func(entry string) (objInfo ObjectInfo, err error) { |
||||
if strings.HasSuffix(entry, slashSeparator) { |
||||
// Object name needs to be full path.
|
||||
objInfo.Bucket = bucket |
||||
objInfo.Name = path.Join(prefixDir, entry) |
||||
objInfo.Name += slashSeparator |
||||
objInfo.IsDir = true |
||||
return objInfo, nil |
||||
} |
||||
// Set the Mode to a "regular" file.
|
||||
return xl.getObjectInfo(bucket, path.Join(prefixDir, entry)) |
||||
} |
||||
|
||||
var markerBase, markerDir string |
||||
if marker != "" { |
||||
// Ex: if marker="four/five.txt", markerDir="four/" markerBase="five.txt"
|
||||
markerSplit := strings.SplitN(marker, slashSeparator, 2) |
||||
markerDir = markerSplit[0] |
||||
if len(markerSplit) == 2 { |
||||
markerDir += slashSeparator |
||||
markerBase = markerSplit[1] |
||||
} |
||||
} |
||||
entries, err := xl.listDir(bucket, prefixDir, func(entry string) bool { |
||||
return !strings.HasPrefix(entry, entryPrefixMatch) |
||||
}) |
||||
if err != nil { |
||||
send(treeWalkResult{err: err}) |
||||
return false |
||||
} |
||||
if len(entries) == 0 { |
||||
return true |
||||
} |
||||
|
||||
// example:
|
||||
// If markerDir="four/" Search() returns the index of "four/" in the sorted
|
||||
// entries list so we skip all the entries till "four/"
|
||||
idx := sort.Search(len(entries), func(i int) bool { |
||||
return entries[i] >= markerDir |
||||
}) |
||||
entries = entries[idx:] |
||||
*count += len(entries) |
||||
for i, entry := range entries { |
||||
if i == 0 && markerDir == entry { |
||||
if !recursive { |
||||
// Skip as the marker would already be listed in the previous listing.
|
||||
*count-- |
||||
continue |
||||
} |
||||
if recursive && !strings.HasSuffix(entry, slashSeparator) { |
||||
// We should not skip for recursive listing and if markerDir is a directory
|
||||
// for ex. if marker is "four/five.txt" markerDir will be "four/" which
|
||||
// should not be skipped, instead it will need to be treeWalkXL()'ed into.
|
||||
|
||||
// Skip if it is a file though as it would be listed in previous listing.
|
||||
*count-- |
||||
continue |
||||
} |
||||
} |
||||
|
||||
if recursive && strings.HasSuffix(entry, slashSeparator) { |
||||
// If the entry is a directory, we will need recurse into it.
|
||||
markerArg := "" |
||||
if entry == markerDir { |
||||
// We need to pass "five.txt" as marker only if we are
|
||||
// recursing into "four/"
|
||||
markerArg = markerBase |
||||
} |
||||
*count-- |
||||
prefixMatch := "" // Valid only for first level treeWalk and empty for subdirectories.
|
||||
if !xl.treeWalkXL(bucket, path.Join(prefixDir, entry), prefixMatch, markerArg, recursive, send, count) { |
||||
return false |
||||
} |
||||
continue |
||||
} |
||||
*count-- |
||||
objInfo, err := entryToObjectInfo(entry) |
||||
if err != nil { |
||||
// The file got deleted in the interim between ListDir() and StatFile()
|
||||
// Ignore error and continue.
|
||||
continue |
||||
} |
||||
if !send(treeWalkResult{objInfo: objInfo}) { |
||||
return false |
||||
} |
||||
} |
||||
return true |
||||
} |
||||
|
||||
// Initiate a new treeWalk in a goroutine.
|
||||
func (xl xlObjects) startTreeWalkXL(bucket, prefix, marker string, recursive bool) *treeWalker { |
||||
// Example 1
|
||||
// If prefix is "one/two/three/" and marker is "one/two/three/four/five.txt"
|
||||
// treeWalk is called with prefixDir="one/two/three/" and marker="four/five.txt"
|
||||
// and entryPrefixMatch=""
|
||||
|
||||
// Example 2
|
||||
// if prefix is "one/two/th" and marker is "one/two/three/four/five.txt"
|
||||
// treeWalk is called with prefixDir="one/two/" and marker="three/four/five.txt"
|
||||
// and entryPrefixMatch="th"
|
||||
|
||||
ch := make(chan treeWalkResult, maxObjectList) |
||||
walkNotify := treeWalker{ch: ch} |
||||
entryPrefixMatch := prefix |
||||
prefixDir := "" |
||||
lastIndex := strings.LastIndex(prefix, slashSeparator) |
||||
if lastIndex != -1 { |
||||
entryPrefixMatch = prefix[lastIndex+1:] |
||||
prefixDir = prefix[:lastIndex+1] |
||||
} |
||||
count := 0 |
||||
marker = strings.TrimPrefix(marker, prefixDir) |
||||
go func() { |
||||
defer close(ch) |
||||
send := func(walkResult treeWalkResult) bool { |
||||
if count == 0 { |
||||
walkResult.end = true |
||||
} |
||||
timer := time.After(time.Second * 60) |
||||
select { |
||||
case ch <- walkResult: |
||||
return true |
||||
case <-timer: |
||||
walkNotify.timedOut = true |
||||
return false |
||||
} |
||||
} |
||||
xl.treeWalkXL(bucket, prefixDir, entryPrefixMatch, marker, recursive, send, &count) |
||||
}() |
||||
return &walkNotify |
||||
} |
||||
|
||||
// Save the goroutine reference in the map
|
||||
func (xl xlObjects) saveTreeWalkXL(params listParams, walker *treeWalker) { |
||||
xl.listObjectMapMutex.Lock() |
||||
defer xl.listObjectMapMutex.Unlock() |
||||
|
||||
walkers, _ := xl.listObjectMap[params] |
||||
walkers = append(walkers, walker) |
||||
|
||||
xl.listObjectMap[params] = walkers |
||||
} |
||||
|
||||
// Lookup the goroutine reference from map
|
||||
func (xl xlObjects) lookupTreeWalkXL(params listParams) *treeWalker { |
||||
xl.listObjectMapMutex.Lock() |
||||
defer xl.listObjectMapMutex.Unlock() |
||||
|
||||
if walkChs, ok := xl.listObjectMap[params]; ok { |
||||
for i, walkCh := range walkChs { |
||||
if !walkCh.timedOut { |
||||
newWalkChs := walkChs[i+1:] |
||||
if len(newWalkChs) > 0 { |
||||
xl.listObjectMap[params] = newWalkChs |
||||
} else { |
||||
delete(xl.listObjectMap, params) |
||||
} |
||||
return walkCh |
||||
} |
||||
} |
||||
// As all channels are timed out, delete the map entry
|
||||
delete(xl.listObjectMap, params) |
||||
} |
||||
return nil |
||||
} |
@ -1,204 +0,0 @@ |
||||
/* |
||||
* Minio Cloud Storage, (C) 2016 Minio, Inc. |
||||
* |
||||
* Licensed under the Apache License, Version 2.0 (the "License"); |
||||
* you may not use this file except in compliance with the License. |
||||
* You may obtain a copy of the License at |
||||
* |
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
* |
||||
* Unless required by applicable law or agreed to in writing, software |
||||
* distributed under the License is distributed on an "AS IS" BASIS, |
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
* See the License for the specific language governing permissions and |
||||
* limitations under the License. |
||||
*/ |
||||
|
||||
package main |
||||
|
||||
import ( |
||||
"errors" |
||||
slashpath "path" |
||||
"sync" |
||||
) |
||||
|
||||
// Get the highest integer from a given integer slice.
|
||||
func highestInt(intSlice []int64) (highestInteger int64) { |
||||
highestInteger = int64(0) |
||||
for _, integer := range intSlice { |
||||
if highestInteger < integer { |
||||
highestInteger = integer |
||||
} |
||||
} |
||||
return highestInteger |
||||
} |
||||
|
||||
// Extracts file versions from partsMetadata slice and returns version slice.
|
||||
func listFileVersions(partsMetadata []xlMetaV1, errs []error) (versions []int64) { |
||||
versions = make([]int64, len(partsMetadata)) |
||||
for index, metadata := range partsMetadata { |
||||
if errs[index] == nil { |
||||
versions[index] = metadata.Stat.Version |
||||
} else { |
||||
versions[index] = -1 |
||||
} |
||||
} |
||||
return versions |
||||
} |
||||
|
||||
// reduceError - convert collection of errors into a single
|
||||
// error based on total errors and read quorum.
|
||||
func (xl XL) reduceError(errs []error) error { |
||||
fileNotFoundCount := 0 |
||||
diskNotFoundCount := 0 |
||||
volumeNotFoundCount := 0 |
||||
diskAccessDeniedCount := 0 |
||||
for _, err := range errs { |
||||
if err == errFileNotFound { |
||||
fileNotFoundCount++ |
||||
} else if err == errDiskNotFound { |
||||
diskNotFoundCount++ |
||||
} else if err == errVolumeAccessDenied { |
||||
diskAccessDeniedCount++ |
||||
} else if err == errVolumeNotFound { |
||||
volumeNotFoundCount++ |
||||
} |
||||
} |
||||
// If we have errors with 'file not found' greater than
|
||||
// readQuorum, return as errFileNotFound.
|
||||
// else if we have errors with 'volume not found' greater than
|
||||
// readQuorum, return as errVolumeNotFound.
|
||||
if fileNotFoundCount > len(xl.storageDisks)-xl.readQuorum { |
||||
return errFileNotFound |
||||
} else if volumeNotFoundCount > len(xl.storageDisks)-xl.readQuorum { |
||||
return errVolumeNotFound |
||||
} |
||||
// If we have errors with disk not found equal to the
|
||||
// number of disks, return as errDiskNotFound.
|
||||
if diskNotFoundCount == len(xl.storageDisks) { |
||||
return errDiskNotFound |
||||
} else if diskNotFoundCount > len(xl.storageDisks)-xl.readQuorum { |
||||
// If we have errors with 'disk not found' greater than
|
||||
// readQuorum, return as errFileNotFound.
|
||||
return errFileNotFound |
||||
} |
||||
// If we have errors with disk not found equal to the
|
||||
// number of disks, return as errDiskNotFound.
|
||||
if diskAccessDeniedCount == len(xl.storageDisks) { |
||||
return errVolumeAccessDenied |
||||
} |
||||
return nil |
||||
} |
||||
|
||||
// Returns slice of online disks needed.
|
||||
// - slice returing readable disks.
|
||||
// - xlMetaV1
|
||||
// - bool value indicating if healing is needed.
|
||||
// - error if any.
|
||||
func (xl XL) listOnlineDisks(volume, path string) (onlineDisks []StorageAPI, mdata xlMetaV1, heal bool, err error) { |
||||
partsMetadata, errs := xl.getPartsMetadata(volume, path) |
||||
if err = xl.reduceError(errs); err != nil { |
||||
return nil, xlMetaV1{}, false, err |
||||
} |
||||
highestVersion := int64(0) |
||||
onlineDisks = make([]StorageAPI, len(xl.storageDisks)) |
||||
// List all the file versions from partsMetadata list.
|
||||
versions := listFileVersions(partsMetadata, errs) |
||||
|
||||
// Get highest file version.
|
||||
highestVersion = highestInt(versions) |
||||
|
||||
// Pick online disks with version set to highestVersion.
|
||||
onlineDiskCount := 0 |
||||
for index, version := range versions { |
||||
if version == highestVersion { |
||||
mdata = partsMetadata[index] |
||||
onlineDisks[index] = xl.storageDisks[index] |
||||
onlineDiskCount++ |
||||
} else { |
||||
onlineDisks[index] = nil |
||||
} |
||||
} |
||||
|
||||
// If online disks count is lesser than configured disks, most
|
||||
// probably we need to heal the file, additionally verify if the
|
||||
// count is lesser than readQuorum, if not we throw an error.
|
||||
if onlineDiskCount < len(xl.storageDisks) { |
||||
// Online disks lesser than total storage disks, needs to be
|
||||
// healed. unless we do not have readQuorum.
|
||||
heal = true |
||||
// Verify if online disks count are lesser than readQuorum
|
||||
// threshold, return an error if yes.
|
||||
if onlineDiskCount < xl.readQuorum { |
||||
return nil, xlMetaV1{}, false, errReadQuorum |
||||
} |
||||
} |
||||
return onlineDisks, mdata, heal, nil |
||||
} |
||||
|
||||
// Get file.json metadata as a map slice.
|
||||
// Returns error slice indicating the failed metadata reads.
|
||||
// Read lockNS() should be done by caller.
|
||||
func (xl XL) getPartsMetadata(volume, path string) ([]xlMetaV1, []error) { |
||||
errs := make([]error, len(xl.storageDisks)) |
||||
metadataArray := make([]xlMetaV1, len(xl.storageDisks)) |
||||
xlMetaV1FilePath := slashpath.Join(path, xlMetaV1File) |
||||
var wg = &sync.WaitGroup{} |
||||
for index, disk := range xl.storageDisks { |
||||
wg.Add(1) |
||||
go func(index int, disk StorageAPI) { |
||||
defer wg.Done() |
||||
offset := int64(0) |
||||
metadataReader, err := disk.ReadFile(volume, xlMetaV1FilePath, offset) |
||||
if err != nil { |
||||
errs[index] = err |
||||
return |
||||
} |
||||
defer metadataReader.Close() |
||||
|
||||
metadata, err := xlMetaV1Decode(metadataReader) |
||||
if err != nil { |
||||
// Unable to parse file.json, set error.
|
||||
errs[index] = err |
||||
return |
||||
} |
||||
metadataArray[index] = metadata |
||||
}(index, disk) |
||||
} |
||||
wg.Wait() |
||||
return metadataArray, errs |
||||
} |
||||
|
||||
// Writes/Updates `file.json` for given file. updateParts carries
|
||||
// index of disks where `file.json` needs to be updated.
|
||||
//
|
||||
// Returns collection of errors, indexed in accordance with input
|
||||
// updateParts order.
|
||||
// Write lockNS() should be done by caller.
|
||||
func (xl XL) updatePartsMetadata(volume, path string, metadata xlMetaV1, updateParts []bool) []error { |
||||
xlMetaV1FilePath := pathJoin(path, xlMetaV1File) |
||||
errs := make([]error, len(xl.storageDisks)) |
||||
|
||||
for index := range updateParts { |
||||
errs[index] = errors.New("Metadata not updated") |
||||
} |
||||
|
||||
for index, shouldUpdate := range updateParts { |
||||
if !shouldUpdate { |
||||
continue |
||||
} |
||||
writer, err := xl.storageDisks[index].CreateFile(volume, xlMetaV1FilePath) |
||||
errs[index] = err |
||||
if err != nil { |
||||
continue |
||||
} |
||||
err = metadata.Write(writer) |
||||
if err != nil { |
||||
errs[index] = err |
||||
safeCloseAndRemove(writer) |
||||
continue |
||||
} |
||||
writer.Close() |
||||
} |
||||
return errs |
||||
} |
@ -1,287 +0,0 @@ |
||||
/* |
||||
* Minio Cloud Storage, (C) 2016 Minio, Inc. |
||||
* |
||||
* Licensed under the Apache License, Version 2.0 (the "License"); |
||||
* you may not use this file except in compliance with the License. |
||||
* You may obtain a copy of the License at |
||||
* |
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
* |
||||
* Unless required by applicable law or agreed to in writing, software |
||||
* distributed under the License is distributed on an "AS IS" BASIS, |
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
* See the License for the specific language governing permissions and |
||||
* limitations under the License. |
||||
*/ |
||||
|
||||
package main |
||||
|
||||
import ( |
||||
"fmt" |
||||
"io" |
||||
slashpath "path" |
||||
"sync" |
||||
"time" |
||||
) |
||||
|
||||
// Erasure block size.
|
||||
const erasureBlockSize = 4 * 1024 * 1024 // 4MiB.
|
||||
|
||||
// cleanupCreateFileOps - cleans up all the temporary files and other
|
||||
// temporary data upon any failure.
|
||||
func (xl XL) cleanupCreateFileOps(volume, path string, writers ...io.WriteCloser) { |
||||
closeAndRemoveWriters(writers...) |
||||
for _, disk := range xl.storageDisks { |
||||
if err := disk.DeleteFile(volume, path); err != nil { |
||||
errorIf(err, "Unable to delete file.") |
||||
} |
||||
} |
||||
} |
||||
|
||||
// Close and remove writers if they are safeFile.
|
||||
func closeAndRemoveWriters(writers ...io.WriteCloser) { |
||||
for _, writer := range writers { |
||||
if err := safeCloseAndRemove(writer); err != nil { |
||||
errorIf(err, "Failed to close writer.") |
||||
} |
||||
} |
||||
} |
||||
|
||||
// WriteErasure reads predefined blocks, encodes them and writes to
|
||||
// configured storage disks.
|
||||
func (xl XL) writeErasure(volume, path string, reader *io.PipeReader, wcloser *waitCloser) { |
||||
// Release the block writer upon function return.
|
||||
defer wcloser.release() |
||||
|
||||
partsMetadata, errs := xl.getPartsMetadata(volume, path) |
||||
|
||||
// Convert errs into meaningful err to be sent upwards if possible
|
||||
// based on total number of errors and read quorum.
|
||||
err := xl.reduceError(errs) |
||||
if err != nil && err != errFileNotFound { |
||||
reader.CloseWithError(err) |
||||
return |
||||
} |
||||
|
||||
// List all the file versions on existing files.
|
||||
versions := listFileVersions(partsMetadata, errs) |
||||
// Get highest file version.
|
||||
higherVersion := highestInt(versions) |
||||
// Increment to have next higher version.
|
||||
higherVersion++ |
||||
|
||||
writers := make([]io.WriteCloser, len(xl.storageDisks)) |
||||
|
||||
xlMetaV1FilePath := slashpath.Join(path, xlMetaV1File) |
||||
metadataWriters := make([]io.WriteCloser, len(xl.storageDisks)) |
||||
|
||||
// Save additional erasureMetadata.
|
||||
modTime := time.Now().UTC() |
||||
|
||||
createFileError := 0 |
||||
for index, disk := range xl.storageDisks { |
||||
erasurePart := slashpath.Join(path, fmt.Sprintf("file.%d", index)) |
||||
var writer io.WriteCloser |
||||
writer, err = disk.CreateFile(volume, erasurePart) |
||||
if err != nil { |
||||
// Treat errFileNameTooLong specially
|
||||
if err == errFileNameTooLong { |
||||
xl.cleanupCreateFileOps(volume, path, append(writers, metadataWriters...)...) |
||||
reader.CloseWithError(err) |
||||
return |
||||
} |
||||
|
||||
createFileError++ |
||||
|
||||
// We can safely allow CreateFile errors up to len(xl.storageDisks) - xl.writeQuorum
|
||||
// otherwise return failure.
|
||||
if createFileError <= len(xl.storageDisks)-xl.writeQuorum { |
||||
continue |
||||
} |
||||
|
||||
// Remove previous temp writers for any failure.
|
||||
xl.cleanupCreateFileOps(volume, path, append(writers, metadataWriters...)...) |
||||
reader.CloseWithError(errWriteQuorum) |
||||
return |
||||
} |
||||
|
||||
// Create meta data file.
|
||||
var metadataWriter io.WriteCloser |
||||
metadataWriter, err = disk.CreateFile(volume, xlMetaV1FilePath) |
||||
if err != nil { |
||||
createFileError++ |
||||
|
||||
// We can safely allow CreateFile errors up to
|
||||
// len(xl.storageDisks) - xl.writeQuorum otherwise return failure.
|
||||
if createFileError <= len(xl.storageDisks)-xl.writeQuorum { |
||||
continue |
||||
} |
||||
|
||||
// Remove previous temp writers for any failure.
|
||||
xl.cleanupCreateFileOps(volume, path, append(writers, metadataWriters...)...) |
||||
reader.CloseWithError(errWriteQuorum) |
||||
return |
||||
} |
||||
|
||||
writers[index] = writer |
||||
metadataWriters[index] = metadataWriter |
||||
} |
||||
|
||||
// Allocate 4MiB block size buffer for reading.
|
||||
dataBuffer := make([]byte, erasureBlockSize) |
||||
var totalSize int64 // Saves total incoming stream size.
|
||||
for { |
||||
// Read up to allocated block size.
|
||||
var n int |
||||
n, err = io.ReadFull(reader, dataBuffer) |
||||
if err != nil { |
||||
// Any unexpected errors, close the pipe reader with error.
|
||||
if err != io.ErrUnexpectedEOF && err != io.EOF { |
||||
// Remove all temp writers.
|
||||
xl.cleanupCreateFileOps(volume, path, append(writers, metadataWriters...)...) |
||||
reader.CloseWithError(err) |
||||
return |
||||
} |
||||
} |
||||
// At EOF break out.
|
||||
if err == io.EOF { |
||||
break |
||||
} |
||||
if n > 0 { |
||||
// Split the input buffer into data and parity blocks.
|
||||
var dataBlocks [][]byte |
||||
dataBlocks, err = xl.ReedSolomon.Split(dataBuffer[0:n]) |
||||
if err != nil { |
||||
// Remove all temp writers.
|
||||
xl.cleanupCreateFileOps(volume, path, append(writers, metadataWriters...)...) |
||||
reader.CloseWithError(err) |
||||
return |
||||
} |
||||
|
||||
// Encode parity blocks using data blocks.
|
||||
err = xl.ReedSolomon.Encode(dataBlocks) |
||||
if err != nil { |
||||
// Remove all temp writers upon error.
|
||||
xl.cleanupCreateFileOps(volume, path, append(writers, metadataWriters...)...) |
||||
reader.CloseWithError(err) |
||||
return |
||||
} |
||||
|
||||
var wg = &sync.WaitGroup{} |
||||
var wErrs = make([]error, len(writers)) |
||||
// Loop through and write encoded data to quorum disks.
|
||||
for index, writer := range writers { |
||||
if writer == nil { |
||||
continue |
||||
} |
||||
wg.Add(1) |
||||
go func(index int, writer io.Writer) { |
||||
defer wg.Done() |
||||
encodedData := dataBlocks[index] |
||||
_, wErr := writers[index].Write(encodedData) |
||||
wErrs[index] = wErr |
||||
}(index, writer) |
||||
} |
||||
wg.Wait() |
||||
for _, wErr := range wErrs { |
||||
if wErr == nil { |
||||
continue |
||||
} |
||||
// Remove all temp writers upon error.
|
||||
xl.cleanupCreateFileOps(volume, path, append(writers, metadataWriters...)...) |
||||
reader.CloseWithError(wErr) |
||||
return |
||||
} |
||||
|
||||
// Update total written.
|
||||
totalSize += int64(n) |
||||
} |
||||
} |
||||
|
||||
// Initialize metadata map, save all erasure related metadata.
|
||||
metadata := xlMetaV1{} |
||||
metadata.Version = "1" |
||||
metadata.Stat.Size = totalSize |
||||
metadata.Stat.ModTime = modTime |
||||
metadata.Minio.Release = minioReleaseTag |
||||
if len(xl.storageDisks) > len(writers) { |
||||
// Save file.version only if we wrote to less disks than all
|
||||
// storage disks.
|
||||
metadata.Stat.Version = higherVersion |
||||
} |
||||
metadata.Erasure.DataBlocks = xl.DataBlocks |
||||
metadata.Erasure.ParityBlocks = xl.ParityBlocks |
||||
metadata.Erasure.BlockSize = erasureBlockSize |
||||
|
||||
// Write all the metadata.
|
||||
// below case is not handled here
|
||||
// Case: when storageDisks is 16 and write quorumDisks is 13,
|
||||
// meta data write failure up to 2 can be considered.
|
||||
// currently we fail for any meta data writes
|
||||
for _, metadataWriter := range metadataWriters { |
||||
if metadataWriter == nil { |
||||
continue |
||||
} |
||||
|
||||
// Write metadata.
|
||||
err = metadata.Write(metadataWriter) |
||||
if err != nil { |
||||
// Remove temporary files.
|
||||
xl.cleanupCreateFileOps(volume, path, append(writers, metadataWriters...)...) |
||||
reader.CloseWithError(err) |
||||
return |
||||
} |
||||
} |
||||
|
||||
// Close all writers and metadata writers in routines.
|
||||
for index, writer := range writers { |
||||
if writer == nil { |
||||
continue |
||||
} |
||||
// Safely wrote, now rename to its actual location.
|
||||
if err = writer.Close(); err != nil { |
||||
// Remove all temp writers upon error.
|
||||
xl.cleanupCreateFileOps(volume, path, append(writers, metadataWriters...)...) |
||||
reader.CloseWithError(err) |
||||
return |
||||
} |
||||
|
||||
if metadataWriters[index] == nil { |
||||
continue |
||||
} |
||||
// Safely wrote, now rename to its actual location.
|
||||
if err = metadataWriters[index].Close(); err != nil { |
||||
// Remove all temp writers upon error.
|
||||
xl.cleanupCreateFileOps(volume, path, append(writers, metadataWriters...)...) |
||||
reader.CloseWithError(err) |
||||
return |
||||
} |
||||
|
||||
} |
||||
|
||||
// Close the pipe reader and return.
|
||||
reader.Close() |
||||
return |
||||
} |
||||
|
||||
// CreateFile - create a file.
|
||||
func (xl XL) CreateFile(volume, path string) (writeCloser io.WriteCloser, err error) { |
||||
if !isValidVolname(volume) { |
||||
return nil, errInvalidArgument |
||||
} |
||||
if !isValidPath(path) { |
||||
return nil, errInvalidArgument |
||||
} |
||||
|
||||
// Initialize pipe for data pipe line.
|
||||
pipeReader, pipeWriter := io.Pipe() |
||||
|
||||
// Initialize a new wait closer, implements both Write and Close.
|
||||
wcloser := newWaitCloser(pipeWriter) |
||||
|
||||
// Start erasure encoding in routine, reading data block by block from pipeReader.
|
||||
go xl.writeErasure(volume, path, pipeReader, wcloser) |
||||
|
||||
// Return the writer, caller should start writing to this.
|
||||
return wcloser, nil |
||||
} |
@ -1,185 +0,0 @@ |
||||
/* |
||||
* Minio Cloud Storage, (C) 2016 Minio, Inc. |
||||
* |
||||
* Licensed under the Apache License, Version 2.0 (the "License"); |
||||
* you may not use this file except in compliance with the License. |
||||
* You may obtain a copy of the License at |
||||
* |
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
* |
||||
* Unless required by applicable law or agreed to in writing, software |
||||
* distributed under the License is distributed on an "AS IS" BASIS, |
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
* See the License for the specific language governing permissions and |
||||
* limitations under the License. |
||||
*/ |
||||
|
||||
package main |
||||
|
||||
import ( |
||||
"errors" |
||||
"fmt" |
||||
"io" |
||||
slashpath "path" |
||||
) |
||||
|
||||
// healHeal - heals the file at path.
|
||||
func (xl XL) healFile(volume string, path string) error { |
||||
totalBlocks := xl.DataBlocks + xl.ParityBlocks |
||||
needsHeal := make([]bool, totalBlocks) |
||||
var readers = make([]io.Reader, totalBlocks) |
||||
var writers = make([]io.WriteCloser, totalBlocks) |
||||
|
||||
// List all online disks to verify if we need to heal.
|
||||
onlineDisks, metadata, heal, err := xl.listOnlineDisks(volume, path) |
||||
if err != nil { |
||||
return err |
||||
} |
||||
if !heal { |
||||
return nil |
||||
} |
||||
|
||||
for index, disk := range onlineDisks { |
||||
if disk == nil { |
||||
needsHeal[index] = true |
||||
continue |
||||
} |
||||
erasurePart := slashpath.Join(path, fmt.Sprintf("file.%d", index)) |
||||
// If disk.ReadFile returns error and we don't have read quorum it will be taken care as
|
||||
// ReedSolomon.Reconstruct() will fail later.
|
||||
var reader io.ReadCloser |
||||
offset := int64(0) |
||||
if reader, err = xl.storageDisks[index].ReadFile(volume, erasurePart, offset); err == nil { |
||||
readers[index] = reader |
||||
defer reader.Close() |
||||
} |
||||
} |
||||
|
||||
// create writers for parts where healing is needed.
|
||||
for index, healNeeded := range needsHeal { |
||||
if !healNeeded { |
||||
continue |
||||
} |
||||
erasurePart := slashpath.Join(path, fmt.Sprintf("file.%d", index)) |
||||
writers[index], err = xl.storageDisks[index].CreateFile(volume, erasurePart) |
||||
if err != nil { |
||||
needsHeal[index] = false |
||||
safeCloseAndRemove(writers[index]) |
||||
continue |
||||
} |
||||
} |
||||
|
||||
// Check if there is atleast one part that needs to be healed.
|
||||
atleastOneHeal := false |
||||
for _, healNeeded := range needsHeal { |
||||
if healNeeded { |
||||
atleastOneHeal = true |
||||
break |
||||
} |
||||
} |
||||
if !atleastOneHeal { |
||||
// Return if healing not needed anywhere.
|
||||
return nil |
||||
} |
||||
|
||||
var totalLeft = metadata.Stat.Size |
||||
for totalLeft > 0 { |
||||
// Figure out the right blockSize.
|
||||
var curBlockSize int64 |
||||
if metadata.Erasure.BlockSize < totalLeft { |
||||
curBlockSize = metadata.Erasure.BlockSize |
||||
} else { |
||||
curBlockSize = totalLeft |
||||
} |
||||
// Calculate the current block size.
|
||||
curBlockSize = getEncodedBlockLen(curBlockSize, metadata.Erasure.DataBlocks) |
||||
enBlocks := make([][]byte, totalBlocks) |
||||
// Loop through all readers and read.
|
||||
for index, reader := range readers { |
||||
// Initialize block slice and fill the data from each parts.
|
||||
// ReedSolomon.Verify() expects that slice is not nil even if the particular
|
||||
// part needs healing.
|
||||
enBlocks[index] = make([]byte, curBlockSize) |
||||
if needsHeal[index] { |
||||
// Skip reading if the part needs healing.
|
||||
continue |
||||
} |
||||
if reader == nil { |
||||
// If ReadFile() had returned error, do not read from this disk.
|
||||
continue |
||||
} |
||||
_, err = io.ReadFull(reader, enBlocks[index]) |
||||
if err != nil && err != io.ErrUnexpectedEOF { |
||||
enBlocks[index] = nil |
||||
} |
||||
} |
||||
|
||||
// Check blocks if they are all zero in length.
|
||||
if checkBlockSize(enBlocks) == 0 { |
||||
return errDataCorrupt |
||||
} |
||||
|
||||
// Verify the blocks.
|
||||
ok, err := xl.ReedSolomon.Verify(enBlocks) |
||||
if err != nil { |
||||
closeAndRemoveWriters(writers...) |
||||
return err |
||||
} |
||||
|
||||
// Verification failed, blocks require reconstruction.
|
||||
if !ok { |
||||
for index, healNeeded := range needsHeal { |
||||
if healNeeded { |
||||
// Reconstructs() reconstructs the parts if the array is nil.
|
||||
enBlocks[index] = nil |
||||
} |
||||
} |
||||
err = xl.ReedSolomon.Reconstruct(enBlocks) |
||||
if err != nil { |
||||
closeAndRemoveWriters(writers...) |
||||
return err |
||||
} |
||||
// Verify reconstructed blocks again.
|
||||
ok, err = xl.ReedSolomon.Verify(enBlocks) |
||||
if err != nil { |
||||
closeAndRemoveWriters(writers...) |
||||
return err |
||||
} |
||||
if !ok { |
||||
// Blocks cannot be reconstructed, corrupted data.
|
||||
err = errors.New("Verification failed after reconstruction, data likely corrupted.") |
||||
closeAndRemoveWriters(writers...) |
||||
return err |
||||
} |
||||
} |
||||
for index, healNeeded := range needsHeal { |
||||
if !healNeeded { |
||||
continue |
||||
} |
||||
_, err := writers[index].Write(enBlocks[index]) |
||||
if err != nil { |
||||
safeCloseAndRemove(writers[index]) |
||||
continue |
||||
} |
||||
} |
||||
totalLeft = totalLeft - metadata.Erasure.BlockSize |
||||
} |
||||
|
||||
// After successful healing Close() the writer so that the temp
|
||||
// files are committed to their location.
|
||||
for _, writer := range writers { |
||||
if writer == nil { |
||||
continue |
||||
} |
||||
writer.Close() |
||||
} |
||||
|
||||
// Update the quorum metadata after heal.
|
||||
errs := xl.updatePartsMetadata(volume, path, metadata, needsHeal) |
||||
for index, healNeeded := range needsHeal { |
||||
if healNeeded && errs[index] != nil { |
||||
return errs[index] |
||||
} |
||||
} |
||||
return nil |
||||
} |
@ -1,61 +0,0 @@ |
||||
/* |
||||
* Minio Cloud Storage, (C) 2016 Minio, Inc. |
||||
* |
||||
* Licensed under the Apache License, Version 2.0 (the "License"); |
||||
* you may not use this file except in compliance with the License. |
||||
* You may obtain a copy of the License at |
||||
* |
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
* |
||||
* Unless required by applicable law or agreed to in writing, software |
||||
* distributed under the License is distributed on an "AS IS" BASIS, |
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
* See the License for the specific language governing permissions and |
||||
* limitations under the License. |
||||
*/ |
||||
|
||||
package main |
||||
|
||||
import ( |
||||
"encoding/json" |
||||
"io" |
||||
"time" |
||||
) |
||||
|
||||
// A xlMetaV1 represents a metadata header mapping keys to sets of values.
|
||||
type xlMetaV1 struct { |
||||
Version string `json:"version"` |
||||
Stat struct { |
||||
Size int64 `json:"size"` |
||||
ModTime time.Time `json:"modTime"` |
||||
Version int64 `json:"version"` |
||||
} `json:"stat"` |
||||
Erasure struct { |
||||
DataBlocks int `json:"data"` |
||||
ParityBlocks int `json:"parity"` |
||||
BlockSize int64 `json:"blockSize"` |
||||
} `json:"erasure"` |
||||
Minio struct { |
||||
Release string `json:"release"` |
||||
} `json:"minio"` |
||||
} |
||||
|
||||
// Write writes a metadata in wire format.
|
||||
func (m xlMetaV1) Write(writer io.Writer) error { |
||||
metadataBytes, err := json.Marshal(m) |
||||
if err != nil { |
||||
return err |
||||
} |
||||
_, err = writer.Write(metadataBytes) |
||||
return err |
||||
} |
||||
|
||||
// xlMetaV1Decode - file metadata decode.
|
||||
func xlMetaV1Decode(reader io.Reader) (metadata xlMetaV1, err error) { |
||||
decoder := json.NewDecoder(reader) |
||||
// Unmarshalling failed, file possibly corrupted.
|
||||
if err = decoder.Decode(&metadata); err != nil { |
||||
return xlMetaV1{}, err |
||||
} |
||||
return metadata, nil |
||||
} |
@ -1,546 +0,0 @@ |
||||
/* |
||||
* Minio Cloud Storage, (C) 2016 Minio, Inc. |
||||
* |
||||
* Licensed under the Apache License, Version 2.0 (the "License"); |
||||
* you may not use this file except in compliance with the License. |
||||
* You may obtain a copy of the License at |
||||
* |
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
* |
||||
* Unless required by applicable law or agreed to in writing, software |
||||
* distributed under the License is distributed on an "AS IS" BASIS, |
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
* See the License for the specific language governing permissions and |
||||
* limitations under the License. |
||||
*/ |
||||
|
||||
package main |
||||
|
||||
import ( |
||||
"errors" |
||||
"fmt" |
||||
"math/rand" |
||||
"os" |
||||
slashpath "path" |
||||
"strings" |
||||
|
||||
"path" |
||||
"sync" |
||||
|
||||
"github.com/klauspost/reedsolomon" |
||||
) |
||||
|
||||
const ( |
||||
// XL erasure metadata file.
|
||||
xlMetaV1File = "file.json" |
||||
) |
||||
|
||||
// XL layer structure.
|
||||
type XL struct { |
||||
ReedSolomon reedsolomon.Encoder // Erasure encoder/decoder.
|
||||
DataBlocks int |
||||
ParityBlocks int |
||||
storageDisks []StorageAPI |
||||
readQuorum int |
||||
writeQuorum int |
||||
} |
||||
|
||||
// errUnexpected - returned for any unexpected error.
|
||||
var errUnexpected = errors.New("Unexpected error - please report at https://github.com/minio/minio/issues") |
||||
|
||||
// newXL instantiate a new XL.
|
||||
func newXL(disks []StorageAPI) (StorageAPI, error) { |
||||
// Initialize XL.
|
||||
xl := &XL{} |
||||
|
||||
// Calculate data and parity blocks.
|
||||
dataBlocks, parityBlocks := len(disks)/2, len(disks)/2 |
||||
|
||||
// Initialize reed solomon encoding.
|
||||
rs, err := reedsolomon.New(dataBlocks, parityBlocks) |
||||
if err != nil { |
||||
return nil, err |
||||
} |
||||
|
||||
// Save the reedsolomon.
|
||||
xl.DataBlocks = dataBlocks |
||||
xl.ParityBlocks = parityBlocks |
||||
xl.ReedSolomon = rs |
||||
|
||||
// Save all the initialized storage disks.
|
||||
xl.storageDisks = disks |
||||
|
||||
// Figure out read and write quorum based on number of storage disks.
|
||||
// Read quorum should be always N/2 + 1 (due to Vandermonde matrix
|
||||
// erasure requirements)
|
||||
xl.readQuorum = len(xl.storageDisks)/2 + 1 |
||||
|
||||
// Write quorum is assumed if we have total disks + 3
|
||||
// parity. (Need to discuss this again)
|
||||
xl.writeQuorum = len(xl.storageDisks)/2 + 3 |
||||
if xl.writeQuorum > len(xl.storageDisks) { |
||||
xl.writeQuorum = len(xl.storageDisks) |
||||
} |
||||
|
||||
// Return successfully initialized.
|
||||
return xl, nil |
||||
} |
||||
|
||||
// MakeVol - make a volume.
|
||||
func (xl XL) MakeVol(volume string) error { |
||||
if !isValidVolname(volume) { |
||||
return errInvalidArgument |
||||
} |
||||
|
||||
// Err counters.
|
||||
createVolErr := 0 // Count generic create vol errs.
|
||||
volumeExistsErrCnt := 0 // Count all errVolumeExists errs.
|
||||
|
||||
// Initialize sync waitgroup.
|
||||
var wg = &sync.WaitGroup{} |
||||
|
||||
// Initialize list of errors.
|
||||
var dErrs = make([]error, len(xl.storageDisks)) |
||||
|
||||
// Make a volume entry on all underlying storage disks.
|
||||
for index, disk := range xl.storageDisks { |
||||
wg.Add(1) |
||||
// Make a volume inside a go-routine.
|
||||
go func(index int, disk StorageAPI) { |
||||
defer wg.Done() |
||||
if disk == nil { |
||||
return |
||||
} |
||||
dErrs[index] = disk.MakeVol(volume) |
||||
}(index, disk) |
||||
} |
||||
|
||||
// Wait for all make vol to finish.
|
||||
wg.Wait() |
||||
|
||||
// Loop through all the concocted errors.
|
||||
for _, err := range dErrs { |
||||
if err == nil { |
||||
continue |
||||
} |
||||
// if volume already exists, count them.
|
||||
if err == errVolumeExists { |
||||
volumeExistsErrCnt++ |
||||
continue |
||||
} |
||||
|
||||
// Update error counter separately.
|
||||
createVolErr++ |
||||
} |
||||
// Return err if all disks report volume exists.
|
||||
if volumeExistsErrCnt == len(xl.storageDisks) { |
||||
return errVolumeExists |
||||
} else if createVolErr > len(xl.storageDisks)-xl.writeQuorum { |
||||
// Return errWriteQuorum if errors were more than
|
||||
// allowed write quorum.
|
||||
return errWriteQuorum |
||||
} |
||||
return nil |
||||
} |
||||
|
||||
// DeleteVol - delete a volume.
|
||||
func (xl XL) DeleteVol(volume string) error { |
||||
if !isValidVolname(volume) { |
||||
return errInvalidArgument |
||||
} |
||||
|
||||
// Collect if all disks report volume not found.
|
||||
var volumeNotFoundErrCnt int |
||||
|
||||
var wg = &sync.WaitGroup{} |
||||
var dErrs = make([]error, len(xl.storageDisks)) |
||||
|
||||
// Remove a volume entry on all underlying storage disks.
|
||||
for index, disk := range xl.storageDisks { |
||||
wg.Add(1) |
||||
// Delete volume inside a go-routine.
|
||||
go func(index int, disk StorageAPI) { |
||||
defer wg.Done() |
||||
dErrs[index] = disk.DeleteVol(volume) |
||||
}(index, disk) |
||||
} |
||||
|
||||
// Wait for all the delete vols to finish.
|
||||
wg.Wait() |
||||
|
||||
// Loop through concocted errors and return anything unusual.
|
||||
for _, err := range dErrs { |
||||
if err != nil { |
||||
// We ignore error if errVolumeNotFound or errDiskNotFound
|
||||
if err == errVolumeNotFound || err == errDiskNotFound { |
||||
volumeNotFoundErrCnt++ |
||||
continue |
||||
} |
||||
return err |
||||
} |
||||
} |
||||
// Return err if all disks report volume not found.
|
||||
if volumeNotFoundErrCnt == len(xl.storageDisks) { |
||||
return errVolumeNotFound |
||||
} |
||||
return nil |
||||
} |
||||
|
||||
// ListVols - list volumes.
|
||||
func (xl XL) ListVols() (volsInfo []VolInfo, err error) { |
||||
// Initialize sync waitgroup.
|
||||
var wg = &sync.WaitGroup{} |
||||
|
||||
// Success vols map carries successful results of ListVols from each disks.
|
||||
var successVols = make([][]VolInfo, len(xl.storageDisks)) |
||||
for index, disk := range xl.storageDisks { |
||||
wg.Add(1) // Add each go-routine to wait for.
|
||||
go func(index int, disk StorageAPI) { |
||||
// Indicate wait group as finished.
|
||||
defer wg.Done() |
||||
|
||||
// Initiate listing.
|
||||
vlsInfo, _ := disk.ListVols() |
||||
successVols[index] = vlsInfo |
||||
}(index, disk) |
||||
} |
||||
|
||||
// For all the list volumes running in parallel to finish.
|
||||
wg.Wait() |
||||
|
||||
// Loop through success vols and get aggregated usage values.
|
||||
var vlsInfo []VolInfo |
||||
var total, free int64 |
||||
for _, vlsInfo = range successVols { |
||||
if len(vlsInfo) <= 1 { |
||||
continue |
||||
} |
||||
var vlInfo VolInfo |
||||
for _, vlInfo = range vlsInfo { |
||||
if vlInfo.Name == "" { |
||||
continue |
||||
} |
||||
break |
||||
} |
||||
free += vlInfo.Free |
||||
total += vlInfo.Total |
||||
} |
||||
|
||||
// Save the updated usage values back into the vols.
|
||||
for _, vlInfo := range vlsInfo { |
||||
vlInfo.Free = free |
||||
vlInfo.Total = total |
||||
volsInfo = append(volsInfo, vlInfo) |
||||
} |
||||
|
||||
// NOTE: The assumption here is that volumes across all disks in
|
||||
// readQuorum have consistent view i.e they all have same number
|
||||
// of buckets. This is essentially not verified since healing
|
||||
// should take care of this.
|
||||
return volsInfo, nil |
||||
} |
||||
|
||||
// getAllVolInfo - list bucket volume info from all disks.
|
||||
// Returns error slice indicating the failed volume stat operations.
|
||||
func (xl XL) getAllVolInfo(volume string) ([]VolInfo, []error) { |
||||
// Create errs and volInfo slices of storageDisks size.
|
||||
var errs = make([]error, len(xl.storageDisks)) |
||||
var volsInfo = make([]VolInfo, len(xl.storageDisks)) |
||||
|
||||
// Allocate a new waitgroup.
|
||||
var wg = &sync.WaitGroup{} |
||||
for index, disk := range xl.storageDisks { |
||||
wg.Add(1) |
||||
// Stat volume on all the disks in a routine.
|
||||
go func(index int, disk StorageAPI) { |
||||
defer wg.Done() |
||||
volInfo, err := disk.StatVol(volume) |
||||
if err != nil { |
||||
errs[index] = err |
||||
return |
||||
} |
||||
volsInfo[index] = volInfo |
||||
}(index, disk) |
||||
} |
||||
|
||||
// Wait for all the Stat operations to finish.
|
||||
wg.Wait() |
||||
|
||||
// Return the concocted values.
|
||||
return volsInfo, errs |
||||
} |
||||
|
||||
// listAllVolInfo - list all stat volume info from all disks.
|
||||
// Returns
|
||||
// - stat volume info for all online disks.
|
||||
// - boolean to indicate if healing is necessary.
|
||||
// - error if any.
|
||||
func (xl XL) listAllVolInfo(volume string) ([]VolInfo, bool, error) { |
||||
volsInfo, errs := xl.getAllVolInfo(volume) |
||||
notFoundCount := 0 |
||||
for _, err := range errs { |
||||
if err == errVolumeNotFound { |
||||
notFoundCount++ |
||||
// If we have errors with file not found greater than allowed read
|
||||
// quorum we return err as errFileNotFound.
|
||||
if notFoundCount > len(xl.storageDisks)-xl.readQuorum { |
||||
return nil, false, errVolumeNotFound |
||||
} |
||||
} |
||||
} |
||||
|
||||
// Calculate online disk count.
|
||||
onlineDiskCount := 0 |
||||
for index := range errs { |
||||
if errs[index] == nil { |
||||
onlineDiskCount++ |
||||
} |
||||
} |
||||
|
||||
var heal bool |
||||
// If online disks count is lesser than configured disks, most
|
||||
// probably we need to heal the file, additionally verify if the
|
||||
// count is lesser than readQuorum, if not we throw an error.
|
||||
if onlineDiskCount < len(xl.storageDisks) { |
||||
// Online disks lesser than total storage disks, needs to be
|
||||
// healed. unless we do not have readQuorum.
|
||||
heal = true |
||||
// Verify if online disks count are lesser than readQuorum
|
||||
// threshold, return an error if yes.
|
||||
if onlineDiskCount < xl.readQuorum { |
||||
return nil, false, errReadQuorum |
||||
} |
||||
} |
||||
|
||||
// Return success.
|
||||
return volsInfo, heal, nil |
||||
} |
||||
|
||||
// StatVol - get volume stat info.
|
||||
func (xl XL) StatVol(volume string) (volInfo VolInfo, err error) { |
||||
if !isValidVolname(volume) { |
||||
return VolInfo{}, errInvalidArgument |
||||
} |
||||
|
||||
// List and figured out if we need healing.
|
||||
volsInfo, heal, err := xl.listAllVolInfo(volume) |
||||
if err != nil { |
||||
return VolInfo{}, err |
||||
} |
||||
|
||||
// Heal for missing entries.
|
||||
if heal { |
||||
go func() { |
||||
// Create volume if missing on disks.
|
||||
for index, volInfo := range volsInfo { |
||||
if volInfo.Name != "" { |
||||
continue |
||||
} |
||||
// Volinfo name would be an empty string, create it.
|
||||
xl.storageDisks[index].MakeVol(volume) |
||||
} |
||||
}() |
||||
} |
||||
|
||||
// Loop through all statVols, calculate the actual usage values.
|
||||
var total, free int64 |
||||
for _, volInfo = range volsInfo { |
||||
if volInfo.Name == "" { |
||||
continue |
||||
} |
||||
free += volInfo.Free |
||||
total += volInfo.Total |
||||
} |
||||
// Update the aggregated values.
|
||||
volInfo.Free = free |
||||
volInfo.Total = total |
||||
return volInfo, nil |
||||
} |
||||
|
||||
// isLeafDirectoryXL - check if a given path is leaf directory. i.e
|
||||
// if it contains file xlMetaV1File
|
||||
func isLeafDirectoryXL(disk StorageAPI, volume, leafPath string) (isLeaf bool) { |
||||
_, err := disk.StatFile(volume, path.Join(leafPath, xlMetaV1File)) |
||||
return err == nil |
||||
} |
||||
|
||||
// ListDir - return all the entries at the given directory path.
|
||||
// If an entry is a directory it will be returned with a trailing "/".
|
||||
func (xl XL) ListDir(volume, dirPath string) (entries []string, err error) { |
||||
if !isValidVolname(volume) { |
||||
return nil, errInvalidArgument |
||||
} |
||||
|
||||
// Count for list errors encountered.
|
||||
var listErrCount = 0 |
||||
|
||||
// Loop through and return the first success entry based on the
|
||||
// selected random disk.
|
||||
for listErrCount < len(xl.storageDisks) { |
||||
// Choose a random disk on each attempt, do not hit the same disk all the time.
|
||||
randIndex := rand.Intn(len(xl.storageDisks) - 1) |
||||
disk := xl.storageDisks[randIndex] // Pick a random disk.
|
||||
// Initiate a list operation, if successful filter and return quickly.
|
||||
if entries, err = disk.ListDir(volume, dirPath); err == nil { |
||||
for i, entry := range entries { |
||||
isLeaf := isLeafDirectoryXL(disk, volume, path.Join(dirPath, entry)) |
||||
isDir := strings.HasSuffix(entry, slashSeparator) |
||||
if isDir && isLeaf { |
||||
entries[i] = strings.TrimSuffix(entry, slashSeparator) |
||||
} |
||||
} |
||||
// We got the entries successfully return.
|
||||
return entries, nil |
||||
} |
||||
listErrCount++ // Update list error count.
|
||||
} |
||||
// Return error at the end.
|
||||
return nil, err |
||||
} |
||||
|
||||
// Object API.
|
||||
|
||||
// StatFile - stat a file
|
||||
func (xl XL) StatFile(volume, path string) (FileInfo, error) { |
||||
if !isValidVolname(volume) { |
||||
return FileInfo{}, errInvalidArgument |
||||
} |
||||
if !isValidPath(path) { |
||||
return FileInfo{}, errInvalidArgument |
||||
} |
||||
|
||||
_, metadata, heal, err := xl.listOnlineDisks(volume, path) |
||||
if err != nil { |
||||
return FileInfo{}, err |
||||
} |
||||
|
||||
if heal { |
||||
// Heal in background safely, since we already have read quorum disks.
|
||||
go func() { |
||||
hErr := xl.healFile(volume, path) |
||||
errorIf(hErr, "Unable to heal file "+volume+"/"+path+".") |
||||
}() |
||||
} |
||||
|
||||
// Return file info.
|
||||
return FileInfo{ |
||||
Volume: volume, |
||||
Name: path, |
||||
Size: metadata.Stat.Size, |
||||
ModTime: metadata.Stat.ModTime, |
||||
Mode: os.FileMode(0644), |
||||
}, nil |
||||
} |
||||
|
||||
// deleteXLFiles - delete all XL backend files.
|
||||
func (xl XL) deleteXLFiles(volume, path string) error { |
||||
errCount := 0 |
||||
// Update meta data file and remove part file
|
||||
for index, disk := range xl.storageDisks { |
||||
erasureFilePart := slashpath.Join(path, fmt.Sprintf("file.%d", index)) |
||||
err := disk.DeleteFile(volume, erasureFilePart) |
||||
if err != nil { |
||||
errCount++ |
||||
|
||||
// We can safely allow DeleteFile errors up to len(xl.storageDisks) - xl.writeQuorum
|
||||
// otherwise return failure.
|
||||
if errCount <= len(xl.storageDisks)-xl.writeQuorum { |
||||
continue |
||||
} |
||||
|
||||
return err |
||||
} |
||||
|
||||
xlMetaV1FilePath := slashpath.Join(path, "file.json") |
||||
err = disk.DeleteFile(volume, xlMetaV1FilePath) |
||||
if err != nil { |
||||
errCount++ |
||||
|
||||
// We can safely allow DeleteFile errors up to len(xl.storageDisks) - xl.writeQuorum
|
||||
// otherwise return failure.
|
||||
if errCount <= len(xl.storageDisks)-xl.writeQuorum { |
||||
continue |
||||
} |
||||
|
||||
return err |
||||
} |
||||
} |
||||
// Return success.
|
||||
return nil |
||||
} |
||||
|
||||
// DeleteFile - delete a file
|
||||
func (xl XL) DeleteFile(volume, path string) error { |
||||
if !isValidVolname(volume) { |
||||
return errInvalidArgument |
||||
} |
||||
if !isValidPath(path) { |
||||
return errInvalidArgument |
||||
} |
||||
|
||||
// Delete all XL files.
|
||||
return xl.deleteXLFiles(volume, path) |
||||
} |
||||
|
||||
// RenameFile - rename file.
|
||||
func (xl XL) RenameFile(srcVolume, srcPath, dstVolume, dstPath string) error { |
||||
// Validate inputs.
|
||||
if !isValidVolname(srcVolume) { |
||||
return errInvalidArgument |
||||
} |
||||
if !isValidPath(srcPath) { |
||||
return errInvalidArgument |
||||
} |
||||
if !isValidVolname(dstVolume) { |
||||
return errInvalidArgument |
||||
} |
||||
if !isValidPath(dstPath) { |
||||
return errInvalidArgument |
||||
} |
||||
|
||||
// Initialize sync waitgroup.
|
||||
var wg = &sync.WaitGroup{} |
||||
|
||||
// Initialize list of errors.
|
||||
var errs = make([]error, len(xl.storageDisks)) |
||||
|
||||
// Rename file on all underlying storage disks.
|
||||
for index, disk := range xl.storageDisks { |
||||
// Append "/" as srcPath and dstPath are either leaf-dirs or non-leaf-dris.
|
||||
// If srcPath is an object instead of prefix we just rename the leaf-dir and
|
||||
// not rename the part and metadata files separately.
|
||||
wg.Add(1) |
||||
go func(index int, disk StorageAPI) { |
||||
defer wg.Done() |
||||
err := disk.RenameFile(srcVolume, retainSlash(srcPath), dstVolume, retainSlash(dstPath)) |
||||
if err != nil { |
||||
errs[index] = err |
||||
} |
||||
errs[index] = nil |
||||
}(index, disk) |
||||
} |
||||
|
||||
// Wait for all RenameFile to finish.
|
||||
wg.Wait() |
||||
|
||||
// Gather err count.
|
||||
var errCount = 0 |
||||
for _, err := range errs { |
||||
if err == nil { |
||||
continue |
||||
} |
||||
errCount++ |
||||
} |
||||
// We can safely allow RenameFile errors up to len(xl.storageDisks) - xl.writeQuorum
|
||||
// otherwise return failure. Cleanup successful renames.
|
||||
if errCount > len(xl.storageDisks)-xl.writeQuorum { |
||||
// Special condition if readQuorum exists, then return success.
|
||||
if errCount <= len(xl.storageDisks)-xl.readQuorum { |
||||
return nil |
||||
} |
||||
// Ignore errors here, delete all successfully written files.
|
||||
xl.deleteXLFiles(dstVolume, dstPath) |
||||
return errWriteQuorum |
||||
} |
||||
return nil |
||||
} |
@ -1,336 +0,0 @@ |
||||
/* |
||||
* Minio Cloud Storage, (C) 2016 Minio, Inc. |
||||
* |
||||
* Licensed under the Apache License, Version 2.0 (the "License"); |
||||
* you may not use this file except in compliance with the License. |
||||
* You may obtain a copy of the License at |
||||
* |
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
* |
||||
* Unless required by applicable law or agreed to in writing, software |
||||
* distributed under the License is distributed on an "AS IS" BASIS, |
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
* See the License for the specific language governing permissions and |
||||
* limitations under the License. |
||||
*/ |
||||
|
||||
package main |
||||
|
||||
import ( |
||||
"encoding/json" |
||||
"fmt" |
||||
"io" |
||||
"path" |
||||
"strings" |
||||
"sync" |
||||
"time" |
||||
) |
||||
|
||||
// MultipartPartInfo Info of each part kept in the multipart metadata file after
|
||||
// CompleteMultipartUpload() is called.
|
||||
type MultipartPartInfo struct { |
||||
PartNumber int |
||||
ETag string |
||||
Size int64 |
||||
} |
||||
|
||||
// MultipartObjectInfo - contents of the multipart metadata file after
|
||||
// CompleteMultipartUpload() is called.
|
||||
type MultipartObjectInfo struct { |
||||
Parts []MultipartPartInfo |
||||
ModTime time.Time |
||||
Size int64 |
||||
MD5Sum string |
||||
ContentType string |
||||
ContentEncoding string |
||||
// Add more fields here.
|
||||
} |
||||
|
||||
type byMultipartFiles []string |
||||
|
||||
func (files byMultipartFiles) Len() int { return len(files) } |
||||
func (files byMultipartFiles) Less(i, j int) bool { |
||||
first := strings.TrimSuffix(files[i], multipartSuffix) |
||||
second := strings.TrimSuffix(files[j], multipartSuffix) |
||||
return first < second |
||||
} |
||||
func (files byMultipartFiles) Swap(i, j int) { files[i], files[j] = files[j], files[i] } |
||||
|
||||
// GetPartNumberOffset - given an offset for the whole object, return the part and offset in that part.
|
||||
func (m MultipartObjectInfo) GetPartNumberOffset(offset int64) (partIndex int, partOffset int64, err error) { |
||||
partOffset = offset |
||||
for i, part := range m.Parts { |
||||
partIndex = i |
||||
if partOffset < part.Size { |
||||
return |
||||
} |
||||
partOffset -= part.Size |
||||
} |
||||
// Offset beyond the size of the object
|
||||
err = errUnexpected |
||||
return |
||||
} |
||||
|
||||
// getMultipartObjectMeta - incomplete meta file and extract meta information if any.
|
||||
func getMultipartObjectMeta(storage StorageAPI, metaFile string) (meta map[string]string, err error) { |
||||
meta = make(map[string]string) |
||||
offset := int64(0) |
||||
objMetaReader, err := storage.ReadFile(minioMetaBucket, metaFile, offset) |
||||
if err != nil { |
||||
return nil, err |
||||
} |
||||
// Close the metadata reader.
|
||||
defer objMetaReader.Close() |
||||
|
||||
decoder := json.NewDecoder(objMetaReader) |
||||
err = decoder.Decode(&meta) |
||||
if err != nil { |
||||
return nil, err |
||||
} |
||||
return meta, nil |
||||
} |
||||
|
||||
func partNumToPartFileName(partNum int) string { |
||||
return fmt.Sprintf("%.5d%s", partNum, multipartSuffix) |
||||
} |
||||
|
||||
// ListMultipartUploads - list multipart uploads.
|
||||
func (xl xlObjects) ListMultipartUploads(bucket, prefix, keyMarker, uploadIDMarker, delimiter string, maxUploads int) (ListMultipartsInfo, error) { |
||||
return listMultipartUploadsCommon(xl, bucket, prefix, keyMarker, uploadIDMarker, delimiter, maxUploads) |
||||
} |
||||
|
||||
// NewMultipartUpload - initialize a new multipart upload, returns a unique id.
|
||||
func (xl xlObjects) NewMultipartUpload(bucket, object string, meta map[string]string) (string, error) { |
||||
return newMultipartUploadCommon(xl.storage, bucket, object, meta) |
||||
} |
||||
|
||||
// PutObjectPart - writes the multipart upload chunks.
|
||||
func (xl xlObjects) PutObjectPart(bucket, object, uploadID string, partID int, size int64, data io.Reader, md5Hex string) (string, error) { |
||||
return putObjectPartCommon(xl.storage, bucket, object, uploadID, partID, size, data, md5Hex) |
||||
} |
||||
|
||||
// ListObjectParts - list object parts.
|
||||
func (xl xlObjects) ListObjectParts(bucket, object, uploadID string, partNumberMarker, maxParts int) (ListPartsInfo, error) { |
||||
return listObjectPartsCommon(xl.storage, bucket, object, uploadID, partNumberMarker, maxParts) |
||||
} |
||||
|
||||
// This function does the following check, suppose
|
||||
// object is "a/b/c/d", stat makes sure that objects ""a/b/c""
|
||||
// "a/b" and "a" do not exist.
|
||||
func (xl xlObjects) parentDirIsObject(bucket, parent string) error { |
||||
var stat func(string) error |
||||
stat = func(p string) error { |
||||
if p == "." { |
||||
return nil |
||||
} |
||||
_, err := xl.getObjectInfo(bucket, p) |
||||
if err == nil { |
||||
// If there is already a file at prefix "p" return error.
|
||||
return errFileAccessDenied |
||||
} |
||||
if err == errFileNotFound { |
||||
// Check if there is a file as one of the parent paths.
|
||||
return stat(path.Dir(p)) |
||||
} |
||||
return err |
||||
} |
||||
return stat(parent) |
||||
} |
||||
|
||||
func (xl xlObjects) CompleteMultipartUpload(bucket string, object string, uploadID string, parts []completePart) (string, error) { |
||||
// Verify if bucket is valid.
|
||||
if !IsValidBucketName(bucket) { |
||||
return "", BucketNameInvalid{Bucket: bucket} |
||||
} |
||||
// Verify whether the bucket exists.
|
||||
if !isBucketExist(xl.storage, bucket) { |
||||
return "", BucketNotFound{Bucket: bucket} |
||||
} |
||||
if !IsValidObjectName(object) { |
||||
return "", ObjectNameInvalid{ |
||||
Bucket: bucket, |
||||
Object: object, |
||||
} |
||||
} |
||||
if !isUploadIDExists(xl.storage, bucket, object, uploadID) { |
||||
return "", InvalidUploadID{UploadID: uploadID} |
||||
} |
||||
// Hold lock so that
|
||||
// 1) no one aborts this multipart upload
|
||||
// 2) no one does a parallel complete-multipart-upload on this multipart upload
|
||||
nsMutex.Lock(minioMetaBucket, pathJoin(mpartMetaPrefix, bucket, object, uploadID)) |
||||
defer nsMutex.Unlock(minioMetaBucket, pathJoin(mpartMetaPrefix, bucket, object, uploadID)) |
||||
|
||||
// Calculate s3 compatible md5sum for complete multipart.
|
||||
s3MD5, err := completeMultipartMD5(parts...) |
||||
if err != nil { |
||||
return "", err |
||||
} |
||||
|
||||
var metadata = MultipartObjectInfo{} |
||||
var errs = make([]error, len(parts)) |
||||
|
||||
uploadIDIncompletePath := path.Join(mpartMetaPrefix, bucket, object, uploadID, incompleteFile) |
||||
objMeta, err := getMultipartObjectMeta(xl.storage, uploadIDIncompletePath) |
||||
if err != nil { |
||||
return "", toObjectErr(err, minioMetaBucket, uploadIDIncompletePath) |
||||
} |
||||
|
||||
// Waitgroup to wait for go-routines.
|
||||
var wg = &sync.WaitGroup{} |
||||
|
||||
// Loop through all parts, validate them and then commit to disk.
|
||||
for i, part := range parts { |
||||
// Construct part suffix.
|
||||
partSuffix := fmt.Sprintf("%.5d.%s", part.PartNumber, part.ETag) |
||||
multipartPartFile := path.Join(mpartMetaPrefix, bucket, object, uploadID, partSuffix) |
||||
var fi FileInfo |
||||
fi, err = xl.storage.StatFile(minioMetaBucket, multipartPartFile) |
||||
if err != nil { |
||||
if err == errFileNotFound { |
||||
return "", InvalidPart{} |
||||
} |
||||
return "", err |
||||
} |
||||
// All parts except the last part has to be atleast 5MB.
|
||||
if (i < len(parts)-1) && !isMinAllowedPartSize(fi.Size) { |
||||
return "", PartTooSmall{} |
||||
} |
||||
// Update metadata parts.
|
||||
metadata.Parts = append(metadata.Parts, MultipartPartInfo{ |
||||
PartNumber: part.PartNumber, |
||||
ETag: part.ETag, |
||||
Size: fi.Size, |
||||
}) |
||||
metadata.Size += fi.Size |
||||
} |
||||
|
||||
// check if an object is present as one of the parent dir.
|
||||
if err = xl.parentDirIsObject(bucket, path.Dir(object)); err != nil { |
||||
return "", toObjectErr(err, bucket, object) |
||||
} |
||||
|
||||
// Save successfully calculated md5sum.
|
||||
metadata.MD5Sum = s3MD5 |
||||
metadata.ContentType = objMeta["content-type"] |
||||
metadata.ContentEncoding = objMeta["content-encoding"] |
||||
|
||||
// Save modTime as well as the current time.
|
||||
metadata.ModTime = time.Now().UTC() |
||||
|
||||
// Create temporary multipart meta file to write and then rename.
|
||||
multipartMetaSuffix := fmt.Sprintf("%s.%s", uploadID, multipartMetaFile) |
||||
tempMultipartMetaFile := path.Join(tmpMetaPrefix, bucket, object, multipartMetaSuffix) |
||||
w, err := xl.storage.CreateFile(minioMetaBucket, tempMultipartMetaFile) |
||||
if err != nil { |
||||
return "", toObjectErr(err, bucket, object) |
||||
} |
||||
encoder := json.NewEncoder(w) |
||||
err = encoder.Encode(&metadata) |
||||
if err != nil { |
||||
if err = safeCloseAndRemove(w); err != nil { |
||||
return "", toObjectErr(err, bucket, object) |
||||
} |
||||
return "", toObjectErr(err, bucket, object) |
||||
} |
||||
// Close the writer.
|
||||
if err = w.Close(); err != nil { |
||||
if err = safeCloseAndRemove(w); err != nil { |
||||
return "", toObjectErr(err, bucket, object) |
||||
} |
||||
return "", toObjectErr(err, bucket, object) |
||||
} |
||||
|
||||
// Attempt a Rename of multipart meta file to final namespace.
|
||||
multipartObjFile := path.Join(mpartMetaPrefix, bucket, object, uploadID, multipartMetaFile) |
||||
err = xl.storage.RenameFile(minioMetaBucket, tempMultipartMetaFile, minioMetaBucket, multipartObjFile) |
||||
if err != nil { |
||||
if derr := xl.storage.DeleteFile(minioMetaBucket, tempMultipartMetaFile); derr != nil { |
||||
return "", toObjectErr(err, minioMetaBucket, tempMultipartMetaFile) |
||||
} |
||||
return "", toObjectErr(err, bucket, multipartObjFile) |
||||
} |
||||
|
||||
// Loop through and atomically rename the parts to their actual location.
|
||||
for index, part := range parts { |
||||
wg.Add(1) |
||||
go func(index int, part completePart) { |
||||
defer wg.Done() |
||||
partSuffix := fmt.Sprintf("%.5d.%s", part.PartNumber, part.ETag) |
||||
src := path.Join(mpartMetaPrefix, bucket, object, uploadID, partSuffix) |
||||
dst := path.Join(mpartMetaPrefix, bucket, object, uploadID, partNumToPartFileName(part.PartNumber)) |
||||
errs[index] = xl.storage.RenameFile(minioMetaBucket, src, minioMetaBucket, dst) |
||||
errorIf(errs[index], "Unable to rename file %s to %s.", src, dst) |
||||
}(index, part) |
||||
} |
||||
|
||||
// Wait for all the renames to finish.
|
||||
wg.Wait() |
||||
|
||||
// Loop through errs list and return first error.
|
||||
for _, err := range errs { |
||||
if err != nil { |
||||
return "", toObjectErr(err, bucket, object) |
||||
} |
||||
} |
||||
|
||||
// Delete the incomplete file place holder.
|
||||
err = xl.storage.DeleteFile(minioMetaBucket, uploadIDIncompletePath) |
||||
if err != nil { |
||||
return "", toObjectErr(err, minioMetaBucket, uploadIDIncompletePath) |
||||
} |
||||
|
||||
// Hold write lock on the destination before rename
|
||||
nsMutex.Lock(bucket, object) |
||||
defer nsMutex.Unlock(bucket, object) |
||||
|
||||
// Delete if an object already exists.
|
||||
// FIXME: rename it to tmp file and delete only after
|
||||
// the newly uploaded file is renamed from tmp location to
|
||||
// the original location.
|
||||
// Verify if the object is a multipart object.
|
||||
if isMultipartObject(xl.storage, bucket, object) { |
||||
err = xl.deleteMultipartObject(bucket, object) |
||||
if err != nil { |
||||
return "", toObjectErr(err, bucket, object) |
||||
} |
||||
return s3MD5, nil |
||||
} |
||||
err = xl.deleteObject(bucket, object) |
||||
if err != nil { |
||||
return "", toObjectErr(err, bucket, object) |
||||
} |
||||
|
||||
uploadIDPath := path.Join(mpartMetaPrefix, bucket, object, uploadID) |
||||
if err = xl.storage.RenameFile(minioMetaBucket, uploadIDPath, bucket, object); err != nil { |
||||
return "", toObjectErr(err, bucket, object) |
||||
} |
||||
|
||||
// Hold the lock so that two parallel complete-multipart-uploads do no
|
||||
// leave a stale uploads.json behind.
|
||||
nsMutex.Lock(minioMetaBucket, pathJoin(mpartMetaPrefix, bucket, object)) |
||||
defer nsMutex.Unlock(minioMetaBucket, pathJoin(mpartMetaPrefix, bucket, object)) |
||||
|
||||
// Validate if there are other incomplete upload-id's present for
|
||||
// the object, if yes do not attempt to delete 'uploads.json'.
|
||||
var entries []string |
||||
if entries, err = xl.storage.ListDir(minioMetaBucket, path.Join(mpartMetaPrefix, bucket, object)); err == nil { |
||||
if len(entries) > 1 { |
||||
return s3MD5, nil |
||||
} |
||||
} |
||||
|
||||
uploadsJSONPath := path.Join(mpartMetaPrefix, bucket, object, uploadsJSONFile) |
||||
err = xl.storage.DeleteFile(minioMetaBucket, uploadsJSONPath) |
||||
if err != nil { |
||||
return "", toObjectErr(err, minioMetaBucket, uploadsJSONPath) |
||||
} |
||||
|
||||
// Return md5sum.
|
||||
return s3MD5, nil |
||||
} |
||||
|
||||
// AbortMultipartUpload - aborts a multipart upload.
|
||||
func (xl xlObjects) AbortMultipartUpload(bucket, object, uploadID string) error { |
||||
return abortMultipartUploadCommon(xl.storage, bucket, object, uploadID) |
||||
} |
@ -1,581 +0,0 @@ |
||||
/* |
||||
* Minio Cloud Storage, (C) 2016 Minio, Inc. |
||||
* |
||||
* Licensed under the Apache License, Version 2.0 (the "License"); |
||||
* you may not use this file except in compliance with the License. |
||||
* You may obtain a copy of the License at |
||||
* |
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
* |
||||
* Unless required by applicable law or agreed to in writing, software |
||||
* distributed under the License is distributed on an "AS IS" BASIS, |
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
* See the License for the specific language governing permissions and |
||||
* limitations under the License. |
||||
*/ |
||||
|
||||
package main |
||||
|
||||
import ( |
||||
"crypto/md5" |
||||
"encoding/hex" |
||||
"encoding/json" |
||||
"errors" |
||||
"fmt" |
||||
"io" |
||||
"path" |
||||
"path/filepath" |
||||
"strings" |
||||
"sync" |
||||
|
||||
"github.com/minio/minio/pkg/mimedb" |
||||
) |
||||
|
||||
const ( |
||||
multipartSuffix = ".minio.multipart" |
||||
multipartMetaFile = "00000" + multipartSuffix |
||||
formatConfigFile = "format.json" |
||||
) |
||||
|
||||
// xlObjects - Implements fs object layer.
|
||||
type xlObjects struct { |
||||
storage StorageAPI |
||||
listObjectMap map[listParams][]*treeWalker |
||||
listObjectMapMutex *sync.Mutex |
||||
} |
||||
|
||||
// errMaxDisks - returned for reached maximum of disks.
|
||||
var errMaxDisks = errors.New("Number of disks are higher than supported maximum count '16'") |
||||
|
||||
// errMinDisks - returned for minimum number of disks.
|
||||
var errMinDisks = errors.New("Number of disks are smaller than supported minimum count '8'") |
||||
|
||||
// errNumDisks - returned for odd number of disks.
|
||||
var errNumDisks = errors.New("Number of disks should be multiples of '2'") |
||||
|
||||
const ( |
||||
// Maximum erasure blocks.
|
||||
maxErasureBlocks = 16 |
||||
// Minimum erasure blocks.
|
||||
minErasureBlocks = 8 |
||||
) |
||||
|
||||
func checkSufficientDisks(disks []string) error { |
||||
// Verify total number of disks.
|
||||
totalDisks := len(disks) |
||||
if totalDisks > maxErasureBlocks { |
||||
return errMaxDisks |
||||
} |
||||
if totalDisks < minErasureBlocks { |
||||
return errMinDisks |
||||
} |
||||
|
||||
// isEven function to verify if a given number if even.
|
||||
isEven := func(number int) bool { |
||||
return number%2 == 0 |
||||
} |
||||
|
||||
// Verify if we have even number of disks.
|
||||
// only combination of 8, 10, 12, 14, 16 are supported.
|
||||
if !isEven(totalDisks) { |
||||
return errNumDisks |
||||
} |
||||
|
||||
return nil |
||||
} |
||||
|
||||
// Depending on the disk type network or local, initialize storage layer.
|
||||
func newStorageLayer(disk string) (storage StorageAPI, err error) { |
||||
if !strings.ContainsRune(disk, ':') || filepath.VolumeName(disk) != "" { |
||||
// Initialize filesystem storage API.
|
||||
return newPosix(disk) |
||||
} |
||||
// Initialize rpc client storage API.
|
||||
return newRPCClient(disk) |
||||
} |
||||
|
||||
// Initialize all storage disks to bootstrap.
|
||||
func bootstrapDisks(disks []string) ([]StorageAPI, error) { |
||||
storageDisks := make([]StorageAPI, len(disks)) |
||||
for index, disk := range disks { |
||||
var err error |
||||
// Intentionally ignore disk not found errors while
|
||||
// initializing POSIX, so that we have successfully
|
||||
// initialized posix Storage. Subsequent calls to XL/Erasure
|
||||
// will manage any errors related to disks.
|
||||
storageDisks[index], err = newStorageLayer(disk) |
||||
if err != nil && err != errDiskNotFound { |
||||
return nil, err |
||||
} |
||||
} |
||||
return storageDisks, nil |
||||
} |
||||
|
||||
// newXLObjects - initialize new xl object layer.
|
||||
func newXLObjects(disks []string) (ObjectLayer, error) { |
||||
if err := checkSufficientDisks(disks); err != nil { |
||||
return nil, err |
||||
} |
||||
|
||||
storageDisks, err := bootstrapDisks(disks) |
||||
if err != nil { |
||||
return nil, err |
||||
} |
||||
|
||||
// Initialize object layer - like creating minioMetaBucket, cleaning up tmp files etc.
|
||||
initObjectLayer(storageDisks...) |
||||
|
||||
// Load saved XL format.json and validate.
|
||||
newDisks, err := loadFormatXL(storageDisks) |
||||
if err != nil { |
||||
switch err { |
||||
case errUnformattedDisk: |
||||
// Save new XL format.
|
||||
errSave := initFormatXL(storageDisks) |
||||
if errSave != nil { |
||||
return nil, errSave |
||||
} |
||||
newDisks = storageDisks |
||||
default: |
||||
// errCorruptedDisk - error.
|
||||
return nil, fmt.Errorf("Unable to recognize backend format, %s", err) |
||||
} |
||||
} |
||||
|
||||
// FIXME: healFormatXL(newDisks)
|
||||
|
||||
storage, err := newXL(newDisks) |
||||
if err != nil { |
||||
return nil, err |
||||
} |
||||
|
||||
// Return successfully initialized object layer.
|
||||
return xlObjects{ |
||||
storage: storage, |
||||
listObjectMap: make(map[listParams][]*treeWalker), |
||||
listObjectMapMutex: &sync.Mutex{}, |
||||
}, nil |
||||
} |
||||
|
||||
/// Bucket operations
|
||||
|
||||
// MakeBucket - make a bucket.
|
||||
func (xl xlObjects) MakeBucket(bucket string) error { |
||||
nsMutex.Lock(bucket, "") |
||||
defer nsMutex.Unlock(bucket, "") |
||||
return makeBucket(xl.storage, bucket) |
||||
} |
||||
|
||||
// GetBucketInfo - get bucket info.
|
||||
func (xl xlObjects) GetBucketInfo(bucket string) (BucketInfo, error) { |
||||
nsMutex.RLock(bucket, "") |
||||
defer nsMutex.RUnlock(bucket, "") |
||||
return getBucketInfo(xl.storage, bucket) |
||||
} |
||||
|
||||
// ListBuckets - list buckets.
|
||||
func (xl xlObjects) ListBuckets() ([]BucketInfo, error) { |
||||
return listBuckets(xl.storage) |
||||
} |
||||
|
||||
// DeleteBucket - delete a bucket.
|
||||
func (xl xlObjects) DeleteBucket(bucket string) error { |
||||
nsMutex.Lock(bucket, "") |
||||
nsMutex.Unlock(bucket, "") |
||||
return deleteBucket(xl.storage, bucket) |
||||
} |
||||
|
||||
/// Object Operations
|
||||
|
||||
// GetObject - get an object.
|
||||
func (xl xlObjects) GetObject(bucket, object string, startOffset int64) (io.ReadCloser, error) { |
||||
// Verify if bucket is valid.
|
||||
if !IsValidBucketName(bucket) { |
||||
return nil, BucketNameInvalid{Bucket: bucket} |
||||
} |
||||
// Verify if object is valid.
|
||||
if !IsValidObjectName(object) { |
||||
return nil, ObjectNameInvalid{Bucket: bucket, Object: object} |
||||
} |
||||
nsMutex.RLock(bucket, object) |
||||
defer nsMutex.RUnlock(bucket, object) |
||||
if !isMultipartObject(xl.storage, bucket, object) { |
||||
_, err := xl.storage.StatFile(bucket, object) |
||||
if err == nil { |
||||
var reader io.ReadCloser |
||||
reader, err = xl.storage.ReadFile(bucket, object, startOffset) |
||||
if err != nil { |
||||
return nil, toObjectErr(err, bucket, object) |
||||
} |
||||
return reader, nil |
||||
} |
||||
return nil, toObjectErr(err, bucket, object) |
||||
} |
||||
fileReader, fileWriter := io.Pipe() |
||||
info, err := getMultipartObjectInfo(xl.storage, bucket, object) |
||||
if err != nil { |
||||
return nil, toObjectErr(err, bucket, object) |
||||
} |
||||
partIndex, offset, err := info.GetPartNumberOffset(startOffset) |
||||
if err != nil { |
||||
return nil, toObjectErr(err, bucket, object) |
||||
} |
||||
|
||||
// Hold a read lock once more which can be released after the following go-routine ends.
|
||||
// We hold RLock once more because the current function would return before the go routine below
|
||||
// executes and hence releasing the read lock (because of defer'ed nsMutex.RUnlock() call).
|
||||
nsMutex.RLock(bucket, object) |
||||
go func() { |
||||
defer nsMutex.RUnlock(bucket, object) |
||||
for ; partIndex < len(info.Parts); partIndex++ { |
||||
part := info.Parts[partIndex] |
||||
r, err := xl.storage.ReadFile(bucket, pathJoin(object, partNumToPartFileName(part.PartNumber)), offset) |
||||
if err != nil { |
||||
fileWriter.CloseWithError(err) |
||||
return |
||||
} |
||||
// Reset offset to 0 as it would be non-0 only for the first loop if startOffset is non-0.
|
||||
offset = 0 |
||||
if _, err = io.Copy(fileWriter, r); err != nil { |
||||
switch reader := r.(type) { |
||||
case *io.PipeReader: |
||||
reader.CloseWithError(err) |
||||
case io.ReadCloser: |
||||
reader.Close() |
||||
} |
||||
fileWriter.CloseWithError(err) |
||||
return |
||||
} |
||||
// Close the readerCloser that reads multiparts of an object from the xl storage layer.
|
||||
// Not closing leaks underlying file descriptors.
|
||||
r.Close() |
||||
} |
||||
fileWriter.Close() |
||||
}() |
||||
return fileReader, nil |
||||
} |
||||
|
||||
// Return the partsInfo of a special multipart object.
|
||||
func getMultipartObjectInfo(storage StorageAPI, bucket, object string) (info MultipartObjectInfo, err error) { |
||||
offset := int64(0) |
||||
r, err := storage.ReadFile(bucket, pathJoin(object, multipartMetaFile), offset) |
||||
if err != nil { |
||||
return MultipartObjectInfo{}, err |
||||
} |
||||
decoder := json.NewDecoder(r) |
||||
err = decoder.Decode(&info) |
||||
if err != nil { |
||||
return MultipartObjectInfo{}, err |
||||
} |
||||
return info, nil |
||||
} |
||||
|
||||
// Return ObjectInfo.
|
||||
func (xl xlObjects) getObjectInfo(bucket, object string) (objInfo ObjectInfo, err error) { |
||||
objInfo.Bucket = bucket |
||||
objInfo.Name = object |
||||
// First see if the object was a simple-PUT upload.
|
||||
fi, err := xl.storage.StatFile(bucket, object) |
||||
if err != nil { |
||||
if err != errFileNotFound { |
||||
return ObjectInfo{}, err |
||||
} |
||||
var info MultipartObjectInfo |
||||
// Check if the object was multipart upload.
|
||||
info, err = getMultipartObjectInfo(xl.storage, bucket, object) |
||||
if err != nil { |
||||
return ObjectInfo{}, err |
||||
} |
||||
objInfo.Size = info.Size |
||||
objInfo.ModTime = info.ModTime |
||||
objInfo.MD5Sum = info.MD5Sum |
||||
objInfo.ContentType = info.ContentType |
||||
objInfo.ContentEncoding = info.ContentEncoding |
||||
} else { |
||||
metadata := make(map[string]string) |
||||
offset := int64(0) // To read entire content
|
||||
r, err := xl.storage.ReadFile(bucket, pathJoin(object, "meta.json"), offset) |
||||
if err != nil { |
||||
return ObjectInfo{}, toObjectErr(err, bucket, object) |
||||
} |
||||
decoder := json.NewDecoder(r) |
||||
if err = decoder.Decode(&metadata); err != nil { |
||||
return ObjectInfo{}, toObjectErr(err, bucket, object) |
||||
} |
||||
contentType := metadata["content-type"] |
||||
if len(contentType) == 0 { |
||||
contentType = "application/octet-stream" |
||||
if objectExt := filepath.Ext(object); objectExt != "" { |
||||
content, ok := mimedb.DB[strings.ToLower(strings.TrimPrefix(objectExt, "."))] |
||||
if ok { |
||||
contentType = content.ContentType |
||||
} |
||||
} |
||||
} |
||||
objInfo.Size = fi.Size |
||||
objInfo.IsDir = fi.Mode.IsDir() |
||||
objInfo.ModTime = fi.ModTime |
||||
objInfo.MD5Sum = metadata["md5Sum"] |
||||
objInfo.ContentType = contentType |
||||
objInfo.ContentEncoding = metadata["content-encoding"] |
||||
} |
||||
return objInfo, nil |
||||
} |
||||
|
||||
// GetObjectInfo - get object info.
|
||||
func (xl xlObjects) GetObjectInfo(bucket, object string) (ObjectInfo, error) { |
||||
// Verify if bucket is valid.
|
||||
if !IsValidBucketName(bucket) { |
||||
return ObjectInfo{}, BucketNameInvalid{Bucket: bucket} |
||||
} |
||||
// Verify if object is valid.
|
||||
if !IsValidObjectName(object) { |
||||
return ObjectInfo{}, ObjectNameInvalid{Bucket: bucket, Object: object} |
||||
} |
||||
nsMutex.RLock(bucket, object) |
||||
defer nsMutex.RUnlock(bucket, object) |
||||
info, err := xl.getObjectInfo(bucket, object) |
||||
if err != nil { |
||||
return ObjectInfo{}, toObjectErr(err, bucket, object) |
||||
} |
||||
return info, nil |
||||
} |
||||
|
||||
// PutObject - create an object.
|
||||
func (xl xlObjects) PutObject(bucket string, object string, size int64, data io.Reader, metadata map[string]string) (string, error) { |
||||
// Verify if bucket is valid.
|
||||
if !IsValidBucketName(bucket) { |
||||
return "", BucketNameInvalid{Bucket: bucket} |
||||
} |
||||
// Verify bucket exists.
|
||||
if !isBucketExist(xl.storage, bucket) { |
||||
return "", BucketNotFound{Bucket: bucket} |
||||
} |
||||
if !IsValidObjectName(object) { |
||||
return "", ObjectNameInvalid{ |
||||
Bucket: bucket, |
||||
Object: object, |
||||
} |
||||
} |
||||
// No metadata is set, allocate a new one.
|
||||
if metadata == nil { |
||||
metadata = make(map[string]string) |
||||
} |
||||
nsMutex.Lock(bucket, object) |
||||
defer nsMutex.Unlock(bucket, object) |
||||
|
||||
tempObj := path.Join(tmpMetaPrefix, bucket, object) |
||||
fileWriter, err := xl.storage.CreateFile(minioMetaBucket, tempObj) |
||||
if err != nil { |
||||
return "", toObjectErr(err, bucket, object) |
||||
} |
||||
|
||||
// Initialize md5 writer.
|
||||
md5Writer := md5.New() |
||||
|
||||
// Instantiate a new multi writer.
|
||||
multiWriter := io.MultiWriter(md5Writer, fileWriter) |
||||
|
||||
// Instantiate checksum hashers and create a multiwriter.
|
||||
if size > 0 { |
||||
if _, err = io.CopyN(multiWriter, data, size); err != nil { |
||||
if clErr := safeCloseAndRemove(fileWriter); clErr != nil { |
||||
return "", toObjectErr(clErr, bucket, object) |
||||
} |
||||
return "", toObjectErr(err, bucket, object) |
||||
} |
||||
} else { |
||||
if _, err = io.Copy(multiWriter, data); err != nil { |
||||
if clErr := safeCloseAndRemove(fileWriter); clErr != nil { |
||||
return "", toObjectErr(clErr, bucket, object) |
||||
} |
||||
return "", toObjectErr(err, bucket, object) |
||||
} |
||||
} |
||||
|
||||
newMD5Hex := hex.EncodeToString(md5Writer.Sum(nil)) |
||||
// Update the md5sum if not set with the newly calculated one.
|
||||
if len(metadata["md5Sum"]) == 0 { |
||||
metadata["md5Sum"] = newMD5Hex |
||||
} |
||||
|
||||
// md5Hex representation.
|
||||
md5Hex := metadata["md5Sum"] |
||||
if md5Hex != "" { |
||||
if newMD5Hex != md5Hex { |
||||
if err = safeCloseAndRemove(fileWriter); err != nil { |
||||
return "", toObjectErr(err, bucket, object) |
||||
} |
||||
return "", BadDigest{md5Hex, newMD5Hex} |
||||
} |
||||
} |
||||
|
||||
err = fileWriter.Close() |
||||
if err != nil { |
||||
if clErr := safeCloseAndRemove(fileWriter); clErr != nil { |
||||
return "", toObjectErr(clErr, bucket, object) |
||||
} |
||||
return "", toObjectErr(err, bucket, object) |
||||
} |
||||
|
||||
// Check if an object is present as one of the parent dir.
|
||||
if err = xl.parentDirIsObject(bucket, path.Dir(object)); err != nil { |
||||
return "", toObjectErr(err, bucket, object) |
||||
} |
||||
|
||||
// Delete if an object already exists.
|
||||
// FIXME: rename it to tmp file and delete only after
|
||||
// the newly uploaded file is renamed from tmp location to
|
||||
// the original location.
|
||||
// Verify if the object is a multipart object.
|
||||
if isMultipartObject(xl.storage, bucket, object) { |
||||
err = xl.deleteMultipartObject(bucket, object) |
||||
if err != nil { |
||||
return "", toObjectErr(err, bucket, object) |
||||
} |
||||
} else { |
||||
err = xl.deleteObject(bucket, object) |
||||
if err != nil { |
||||
return "", toObjectErr(err, bucket, object) |
||||
} |
||||
} |
||||
|
||||
err = xl.storage.RenameFile(minioMetaBucket, tempObj, bucket, object) |
||||
if err != nil { |
||||
if dErr := xl.storage.DeleteFile(minioMetaBucket, tempObj); dErr != nil { |
||||
return "", toObjectErr(dErr, bucket, object) |
||||
} |
||||
return "", toObjectErr(err, bucket, object) |
||||
} |
||||
|
||||
tempMetaJSONFile := path.Join(tmpMetaPrefix, bucket, object, "meta.json") |
||||
metaWriter, err := xl.storage.CreateFile(minioMetaBucket, tempMetaJSONFile) |
||||
if err != nil { |
||||
return "", toObjectErr(err, bucket, object) |
||||
} |
||||
|
||||
encoder := json.NewEncoder(metaWriter) |
||||
err = encoder.Encode(&metadata) |
||||
if err != nil { |
||||
if clErr := safeCloseAndRemove(metaWriter); clErr != nil { |
||||
return "", toObjectErr(clErr, bucket, object) |
||||
} |
||||
return "", toObjectErr(err, bucket, object) |
||||
} |
||||
if err = metaWriter.Close(); err != nil { |
||||
if err = safeCloseAndRemove(metaWriter); err != nil { |
||||
return "", toObjectErr(err, bucket, object) |
||||
} |
||||
return "", toObjectErr(err, bucket, object) |
||||
} |
||||
|
||||
metaJSONFile := path.Join(object, "meta.json") |
||||
err = xl.storage.RenameFile(minioMetaBucket, tempMetaJSONFile, bucket, metaJSONFile) |
||||
if err != nil { |
||||
if derr := xl.storage.DeleteFile(minioMetaBucket, tempMetaJSONFile); derr != nil { |
||||
return "", toObjectErr(derr, bucket, object) |
||||
} |
||||
return "", toObjectErr(err, bucket, object) |
||||
} |
||||
|
||||
// Return md5sum, successfully wrote object.
|
||||
return newMD5Hex, nil |
||||
} |
||||
|
||||
// isMultipartObject - verifies if an object is special multipart file.
|
||||
func isMultipartObject(storage StorageAPI, bucket, object string) bool { |
||||
_, err := storage.StatFile(bucket, pathJoin(object, multipartMetaFile)) |
||||
if err != nil { |
||||
if err == errFileNotFound { |
||||
return false |
||||
} |
||||
errorIf(err, "Failed to stat file "+bucket+pathJoin(object, multipartMetaFile)) |
||||
return false |
||||
} |
||||
return true |
||||
} |
||||
|
||||
// deleteMultipartObject - deletes only multipart object.
|
||||
func (xl xlObjects) deleteMultipartObject(bucket, object string) error { |
||||
// Get parts info.
|
||||
info, err := getMultipartObjectInfo(xl.storage, bucket, object) |
||||
if err != nil { |
||||
return err |
||||
} |
||||
// Range through all files and delete it.
|
||||
var wg = &sync.WaitGroup{} |
||||
var errs = make([]error, len(info.Parts)) |
||||
for index, part := range info.Parts { |
||||
wg.Add(1) |
||||
// Start deleting parts in routine.
|
||||
go func(index int, part MultipartPartInfo) { |
||||
defer wg.Done() |
||||
partFileName := partNumToPartFileName(part.PartNumber) |
||||
errs[index] = xl.storage.DeleteFile(bucket, pathJoin(object, partFileName)) |
||||
}(index, part) |
||||
} |
||||
// Wait for all the deletes to finish.
|
||||
wg.Wait() |
||||
// Loop through and validate if any errors, if we are unable to remove any part return
|
||||
// "unexpected" error as returning any other error might be misleading. For ex.
|
||||
// if DeleteFile() had returned errFileNotFound and we return it, then client would see
|
||||
// ObjectNotFound which is misleading.
|
||||
for _, err := range errs { |
||||
if err != nil { |
||||
return errUnexpected |
||||
} |
||||
} |
||||
err = xl.storage.DeleteFile(bucket, pathJoin(object, multipartMetaFile)) |
||||
if err != nil { |
||||
return err |
||||
} |
||||
return nil |
||||
} |
||||
|
||||
// deleteObject - deletes a regular object.
|
||||
func (xl xlObjects) deleteObject(bucket, object string) error { |
||||
metaJSONFile := path.Join(object, "meta.json") |
||||
// Ignore if meta.json file doesn't exist.
|
||||
if err := xl.storage.DeleteFile(bucket, metaJSONFile); err != nil { |
||||
if err != errFileNotFound { |
||||
return err |
||||
} |
||||
} |
||||
if err := xl.storage.DeleteFile(bucket, object); err != nil { |
||||
if err != errFileNotFound { |
||||
return err |
||||
} |
||||
} |
||||
return nil |
||||
} |
||||
|
||||
// DeleteObject - delete the object.
|
||||
func (xl xlObjects) DeleteObject(bucket, object string) error { |
||||
// Verify if bucket is valid.
|
||||
if !IsValidBucketName(bucket) { |
||||
return BucketNameInvalid{Bucket: bucket} |
||||
} |
||||
if !IsValidObjectName(object) { |
||||
return ObjectNameInvalid{Bucket: bucket, Object: object} |
||||
} |
||||
nsMutex.Lock(bucket, object) |
||||
defer nsMutex.Unlock(bucket, object) |
||||
// Verify if the object is a multipart object.
|
||||
if isMultipartObject(xl.storage, bucket, object) { |
||||
err := xl.deleteMultipartObject(bucket, object) |
||||
if err != nil { |
||||
return toObjectErr(err, bucket, object) |
||||
} |
||||
return nil |
||||
} |
||||
err := xl.deleteObject(bucket, object) |
||||
if err != nil { |
||||
return toObjectErr(err, bucket, object) |
||||
} |
||||
return nil |
||||
} |
||||
|
||||
// ListObjects - list all objects at prefix, delimited by '/'.
|
||||
func (xl xlObjects) ListObjects(bucket, prefix, marker, delimiter string, maxKeys int) (ListObjectsInfo, error) { |
||||
return listObjectsCommon(xl, bucket, prefix, marker, delimiter, maxKeys) |
||||
} |
@ -0,0 +1,355 @@ |
||||
package main |
||||
|
||||
import ( |
||||
"sort" |
||||
"sync" |
||||
) |
||||
|
||||
/// Bucket operations
|
||||
|
||||
// MakeBucket - make a bucket.
|
||||
func (xl xlObjects) MakeBucket(bucket string) error { |
||||
// Verify if bucket is valid.
|
||||
if !IsValidBucketName(bucket) { |
||||
return BucketNameInvalid{Bucket: bucket} |
||||
} |
||||
|
||||
nsMutex.Lock(bucket, "") |
||||
defer nsMutex.Unlock(bucket, "") |
||||
|
||||
// Err counters.
|
||||
createVolErr := 0 // Count generic create vol errs.
|
||||
volumeExistsErrCnt := 0 // Count all errVolumeExists errs.
|
||||
|
||||
// Initialize sync waitgroup.
|
||||
var wg = &sync.WaitGroup{} |
||||
|
||||
// Initialize list of errors.
|
||||
var dErrs = make([]error, len(xl.storageDisks)) |
||||
|
||||
// Make a volume entry on all underlying storage disks.
|
||||
for index, disk := range xl.storageDisks { |
||||
wg.Add(1) |
||||
// Make a volume inside a go-routine.
|
||||
go func(index int, disk StorageAPI) { |
||||
defer wg.Done() |
||||
err := disk.MakeVol(bucket) |
||||
if err != nil { |
||||
dErrs[index] = err |
||||
return |
||||
} |
||||
dErrs[index] = nil |
||||
}(index, disk) |
||||
} |
||||
|
||||
// Wait for all make vol to finish.
|
||||
wg.Wait() |
||||
|
||||
// Loop through all the concocted errors.
|
||||
for _, err := range dErrs { |
||||
if err == nil { |
||||
continue |
||||
} |
||||
// if volume already exists, count them.
|
||||
if err == errVolumeExists { |
||||
volumeExistsErrCnt++ |
||||
continue |
||||
} |
||||
|
||||
// Update error counter separately.
|
||||
createVolErr++ |
||||
} |
||||
|
||||
// Return err if all disks report volume exists.
|
||||
if volumeExistsErrCnt == len(xl.storageDisks) { |
||||
return toObjectErr(errVolumeExists, bucket) |
||||
} else if createVolErr > len(xl.storageDisks)-xl.writeQuorum { |
||||
// Return errWriteQuorum if errors were more than
|
||||
// allowed write quorum.
|
||||
return toObjectErr(errWriteQuorum, bucket) |
||||
} |
||||
return nil |
||||
} |
||||
|
||||
// getAllBucketInfo - list bucket info from all disks.
|
||||
// Returns error slice indicating the failed volume stat operations.
|
||||
func (xl xlObjects) getAllBucketInfo(bucketName string) ([]BucketInfo, []error) { |
||||
// Create errs and volInfo slices of storageDisks size.
|
||||
var errs = make([]error, len(xl.storageDisks)) |
||||
var volsInfo = make([]VolInfo, len(xl.storageDisks)) |
||||
|
||||
// Allocate a new waitgroup.
|
||||
var wg = &sync.WaitGroup{} |
||||
for index, disk := range xl.storageDisks { |
||||
wg.Add(1) |
||||
// Stat volume on all the disks in a routine.
|
||||
go func(index int, disk StorageAPI) { |
||||
defer wg.Done() |
||||
volInfo, err := disk.StatVol(bucketName) |
||||
if err != nil { |
||||
errs[index] = err |
||||
return |
||||
} |
||||
volsInfo[index] = volInfo |
||||
errs[index] = nil |
||||
}(index, disk) |
||||
} |
||||
|
||||
// Wait for all the Stat operations to finish.
|
||||
wg.Wait() |
||||
|
||||
// Return the concocted values.
|
||||
var bucketsInfo = make([]BucketInfo, len(xl.storageDisks)) |
||||
for _, volInfo := range volsInfo { |
||||
if IsValidBucketName(volInfo.Name) { |
||||
bucketsInfo = append(bucketsInfo, BucketInfo{ |
||||
Name: volInfo.Name, |
||||
Created: volInfo.Created, |
||||
}) |
||||
} |
||||
} |
||||
return bucketsInfo, errs |
||||
} |
||||
|
||||
// listAllBucketInfo - list all stat volume info from all disks.
|
||||
// Returns
|
||||
// - stat volume info for all online disks.
|
||||
// - boolean to indicate if healing is necessary.
|
||||
// - error if any.
|
||||
func (xl xlObjects) listAllBucketInfo(bucketName string) ([]BucketInfo, bool, error) { |
||||
bucketsInfo, errs := xl.getAllBucketInfo(bucketName) |
||||
notFoundCount := 0 |
||||
for _, err := range errs { |
||||
if err == errVolumeNotFound { |
||||
notFoundCount++ |
||||
// If we have errors with file not found greater than allowed read
|
||||
// quorum we return err as errFileNotFound.
|
||||
if notFoundCount > len(xl.storageDisks)-xl.readQuorum { |
||||
return nil, false, errVolumeNotFound |
||||
} |
||||
} |
||||
} |
||||
|
||||
// Calculate online disk count.
|
||||
onlineDiskCount := 0 |
||||
for index := range errs { |
||||
if errs[index] == nil { |
||||
onlineDiskCount++ |
||||
} |
||||
} |
||||
|
||||
var heal bool |
||||
// If online disks count is lesser than configured disks, most
|
||||
// probably we need to heal the file, additionally verify if the
|
||||
// count is lesser than readQuorum, if not we throw an error.
|
||||
if onlineDiskCount < len(xl.storageDisks) { |
||||
// Online disks lesser than total storage disks, needs to be
|
||||
// healed. unless we do not have readQuorum.
|
||||
heal = true |
||||
// Verify if online disks count are lesser than readQuorum
|
||||
// threshold, return an error if yes.
|
||||
if onlineDiskCount < xl.readQuorum { |
||||
return nil, false, errReadQuorum |
||||
} |
||||
} |
||||
|
||||
// Return success.
|
||||
return bucketsInfo, heal, nil |
||||
} |
||||
|
||||
// Checks whether bucket exists.
|
||||
func (xl xlObjects) isBucketExist(bucketName string) bool { |
||||
// Check whether bucket exists.
|
||||
_, _, err := xl.listAllBucketInfo(bucketName) |
||||
if err != nil { |
||||
if err == errVolumeNotFound { |
||||
return false |
||||
} |
||||
errorIf(err, "Stat failed on bucket "+bucketName+".") |
||||
return false |
||||
} |
||||
return true |
||||
} |
||||
|
||||
// GetBucketInfo - get bucket info.
|
||||
func (xl xlObjects) GetBucketInfo(bucket string) (BucketInfo, error) { |
||||
// Verify if bucket is valid.
|
||||
if !IsValidBucketName(bucket) { |
||||
return BucketInfo{}, BucketNameInvalid{Bucket: bucket} |
||||
} |
||||
|
||||
nsMutex.RLock(bucket, "") |
||||
defer nsMutex.RUnlock(bucket, "") |
||||
|
||||
// List and figured out if we need healing.
|
||||
bucketsInfo, heal, err := xl.listAllBucketInfo(bucket) |
||||
if err != nil { |
||||
return BucketInfo{}, toObjectErr(err, bucket) |
||||
} |
||||
|
||||
// Heal for missing entries.
|
||||
if heal { |
||||
go func() { |
||||
// Create bucket if missing on disks.
|
||||
for index, bktInfo := range bucketsInfo { |
||||
if bktInfo.Name != "" { |
||||
continue |
||||
} |
||||
// Bucketinfo name would be an empty string, create it.
|
||||
xl.storageDisks[index].MakeVol(bucket) |
||||
} |
||||
}() |
||||
} |
||||
|
||||
// Loop through all statVols, calculate the actual usage values.
|
||||
var total, free int64 |
||||
var bucketInfo BucketInfo |
||||
for _, bucketInfo = range bucketsInfo { |
||||
if bucketInfo.Name == "" { |
||||
continue |
||||
} |
||||
free += bucketInfo.Free |
||||
total += bucketInfo.Total |
||||
} |
||||
// Update the aggregated values.
|
||||
bucketInfo.Free = free |
||||
bucketInfo.Total = total |
||||
|
||||
return BucketInfo{ |
||||
Name: bucket, |
||||
Created: bucketInfo.Created, |
||||
Total: bucketInfo.Total, |
||||
Free: bucketInfo.Free, |
||||
}, nil |
||||
} |
||||
|
||||
func (xl xlObjects) listBuckets() ([]BucketInfo, error) { |
||||
// Initialize sync waitgroup.
|
||||
var wg = &sync.WaitGroup{} |
||||
|
||||
// Success vols map carries successful results of ListVols from each disks.
|
||||
var successVols = make([][]VolInfo, len(xl.storageDisks)) |
||||
for index, disk := range xl.storageDisks { |
||||
wg.Add(1) // Add each go-routine to wait for.
|
||||
go func(index int, disk StorageAPI) { |
||||
// Indicate wait group as finished.
|
||||
defer wg.Done() |
||||
|
||||
// Initiate listing.
|
||||
volsInfo, _ := disk.ListVols() |
||||
successVols[index] = volsInfo |
||||
}(index, disk) |
||||
} |
||||
|
||||
// For all the list volumes running in parallel to finish.
|
||||
wg.Wait() |
||||
|
||||
// Loop through success vols and get aggregated usage values.
|
||||
var volsInfo []VolInfo |
||||
var total, free int64 |
||||
for _, volsInfo = range successVols { |
||||
var volInfo VolInfo |
||||
for _, volInfo = range volsInfo { |
||||
if volInfo.Name == "" { |
||||
continue |
||||
} |
||||
if !IsValidBucketName(volInfo.Name) { |
||||
continue |
||||
} |
||||
break |
||||
} |
||||
free += volInfo.Free |
||||
total += volInfo.Total |
||||
} |
||||
|
||||
// Save the updated usage values back into the vols.
|
||||
for index, volInfo := range volsInfo { |
||||
volInfo.Free = free |
||||
volInfo.Total = total |
||||
volsInfo[index] = volInfo |
||||
} |
||||
|
||||
// NOTE: The assumption here is that volumes across all disks in
|
||||
// readQuorum have consistent view i.e they all have same number
|
||||
// of buckets. This is essentially not verified since healing
|
||||
// should take care of this.
|
||||
var bucketsInfo []BucketInfo |
||||
for _, volInfo := range volsInfo { |
||||
// StorageAPI can send volume names which are incompatible
|
||||
// with buckets, handle it and skip them.
|
||||
if !IsValidBucketName(volInfo.Name) { |
||||
continue |
||||
} |
||||
bucketsInfo = append(bucketsInfo, BucketInfo{ |
||||
Name: volInfo.Name, |
||||
Created: volInfo.Created, |
||||
Total: volInfo.Total, |
||||
Free: volInfo.Free, |
||||
}) |
||||
} |
||||
return bucketsInfo, nil |
||||
} |
||||
|
||||
// ListBuckets - list buckets.
|
||||
func (xl xlObjects) ListBuckets() ([]BucketInfo, error) { |
||||
bucketInfos, err := xl.listBuckets() |
||||
if err != nil { |
||||
return nil, toObjectErr(err) |
||||
} |
||||
sort.Sort(byBucketName(bucketInfos)) |
||||
return bucketInfos, nil |
||||
} |
||||
|
||||
// DeleteBucket - delete a bucket.
|
||||
func (xl xlObjects) DeleteBucket(bucket string) error { |
||||
// Verify if bucket is valid.
|
||||
if !IsValidBucketName(bucket) { |
||||
return BucketNameInvalid{Bucket: bucket} |
||||
} |
||||
|
||||
nsMutex.Lock(bucket, "") |
||||
nsMutex.Unlock(bucket, "") |
||||
|
||||
// Collect if all disks report volume not found.
|
||||
var volumeNotFoundErrCnt int |
||||
|
||||
var wg = &sync.WaitGroup{} |
||||
var dErrs = make([]error, len(xl.storageDisks)) |
||||
|
||||
// Remove a volume entry on all underlying storage disks.
|
||||
for index, disk := range xl.storageDisks { |
||||
wg.Add(1) |
||||
// Delete volume inside a go-routine.
|
||||
go func(index int, disk StorageAPI) { |
||||
defer wg.Done() |
||||
err := disk.DeleteVol(bucket) |
||||
if err != nil { |
||||
dErrs[index] = err |
||||
return |
||||
} |
||||
dErrs[index] = nil |
||||
}(index, disk) |
||||
} |
||||
|
||||
// Wait for all the delete vols to finish.
|
||||
wg.Wait() |
||||
|
||||
// Loop through concocted errors and return anything unusual.
|
||||
for _, err := range dErrs { |
||||
if err != nil { |
||||
// We ignore error if errVolumeNotFound or errDiskNotFound
|
||||
if err == errVolumeNotFound || err == errDiskNotFound { |
||||
volumeNotFoundErrCnt++ |
||||
continue |
||||
} |
||||
return toObjectErr(err, bucket) |
||||
} |
||||
} |
||||
|
||||
// Return err if all disks report volume not found.
|
||||
if volumeNotFoundErrCnt == len(xl.storageDisks) { |
||||
return toObjectErr(errVolumeNotFound, bucket) |
||||
} |
||||
|
||||
return nil |
||||
} |
@ -0,0 +1,116 @@ |
||||
package main |
||||
|
||||
import "strings" |
||||
|
||||
func (xl xlObjects) listObjectsXL(bucket, prefix, marker, delimiter string, maxKeys int) (ListObjectsInfo, error) { |
||||
// Default is recursive, if delimiter is set then list non recursive.
|
||||
recursive := true |
||||
if delimiter == slashSeparator { |
||||
recursive = false |
||||
} |
||||
|
||||
walker := xl.lookupTreeWalkXL(listParams{bucket, recursive, marker, prefix}) |
||||
if walker == nil { |
||||
walker = xl.startTreeWalkXL(bucket, prefix, marker, recursive) |
||||
} |
||||
var objInfos []ObjectInfo |
||||
var eof bool |
||||
var nextMarker string |
||||
for i := 0; i < maxKeys; { |
||||
walkResult, ok := <-walker.ch |
||||
if !ok { |
||||
// Closed channel.
|
||||
eof = true |
||||
break |
||||
} |
||||
// For any walk error return right away.
|
||||
if walkResult.err != nil { |
||||
// File not found is a valid case.
|
||||
if walkResult.err == errFileNotFound { |
||||
return ListObjectsInfo{}, nil |
||||
} |
||||
return ListObjectsInfo{}, toObjectErr(walkResult.err, bucket, prefix) |
||||
} |
||||
objInfo := walkResult.objInfo |
||||
nextMarker = objInfo.Name |
||||
objInfos = append(objInfos, objInfo) |
||||
if walkResult.end { |
||||
eof = true |
||||
break |
||||
} |
||||
i++ |
||||
} |
||||
params := listParams{bucket, recursive, nextMarker, prefix} |
||||
if !eof { |
||||
xl.saveTreeWalkXL(params, walker) |
||||
} |
||||
|
||||
result := ListObjectsInfo{IsTruncated: !eof} |
||||
for _, objInfo := range objInfos { |
||||
// With delimiter set we fill in NextMarker and Prefixes.
|
||||
if delimiter == slashSeparator { |
||||
result.NextMarker = objInfo.Name |
||||
if objInfo.IsDir { |
||||
result.Prefixes = append(result.Prefixes, objInfo.Name) |
||||
continue |
||||
} |
||||
} |
||||
result.Objects = append(result.Objects, ObjectInfo{ |
||||
Name: objInfo.Name, |
||||
ModTime: objInfo.ModTime, |
||||
Size: objInfo.Size, |
||||
IsDir: false, |
||||
}) |
||||
} |
||||
return result, nil |
||||
} |
||||
|
||||
// ListObjects - list all objects at prefix, delimited by '/'.
|
||||
func (xl xlObjects) ListObjects(bucket, prefix, marker, delimiter string, maxKeys int) (ListObjectsInfo, error) { |
||||
// Verify if bucket is valid.
|
||||
if !IsValidBucketName(bucket) { |
||||
return ListObjectsInfo{}, BucketNameInvalid{Bucket: bucket} |
||||
} |
||||
// Verify if bucket exists.
|
||||
if !xl.isBucketExist(bucket) { |
||||
return ListObjectsInfo{}, BucketNotFound{Bucket: bucket} |
||||
} |
||||
if !IsValidObjectPrefix(prefix) { |
||||
return ListObjectsInfo{}, ObjectNameInvalid{Bucket: bucket, Object: prefix} |
||||
} |
||||
// Verify if delimiter is anything other than '/', which we do not support.
|
||||
if delimiter != "" && delimiter != slashSeparator { |
||||
return ListObjectsInfo{}, UnsupportedDelimiter{ |
||||
Delimiter: delimiter, |
||||
} |
||||
} |
||||
// Verify if marker has prefix.
|
||||
if marker != "" { |
||||
if !strings.HasPrefix(marker, prefix) { |
||||
return ListObjectsInfo{}, InvalidMarkerPrefixCombination{ |
||||
Marker: marker, |
||||
Prefix: prefix, |
||||
} |
||||
} |
||||
} |
||||
|
||||
// With max keys of zero we have reached eof, return right here.
|
||||
if maxKeys == 0 { |
||||
return ListObjectsInfo{}, nil |
||||
} |
||||
|
||||
// Over flowing count - reset to maxObjectList.
|
||||
if maxKeys < 0 || maxKeys > maxObjectList { |
||||
maxKeys = maxObjectList |
||||
} |
||||
|
||||
// Initiate a list operation, if successful filter and return quickly.
|
||||
listObjInfo, err := xl.listObjectsXL(bucket, prefix, marker, delimiter, maxKeys) |
||||
if err == nil { |
||||
// We got the entries successfully return.
|
||||
return listObjInfo, nil |
||||
} |
||||
|
||||
// Return error at the end.
|
||||
return ListObjectsInfo{}, toObjectErr(err, bucket, prefix) |
||||
} |
@ -0,0 +1,287 @@ |
||||
/* |
||||
* Minio Cloud Storage, (C) 2016 Minio, Inc. |
||||
* |
||||
* Licensed under the Apache License, Version 2.0 (the "License"); |
||||
* you may not use this file except in compliance with the License. |
||||
* You may obtain a copy of the License at |
||||
* |
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
* |
||||
* Unless required by applicable law or agreed to in writing, software |
||||
* distributed under the License is distributed on an "AS IS" BASIS, |
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
* See the License for the specific language governing permissions and |
||||
* limitations under the License. |
||||
*/ |
||||
|
||||
package main |
||||
|
||||
import ( |
||||
"bytes" |
||||
"encoding/json" |
||||
"io" |
||||
"path" |
||||
"sort" |
||||
"sync" |
||||
"time" |
||||
) |
||||
|
||||
// Erasure block size.
|
||||
const erasureBlockSize = 4 * 1024 * 1024 // 4MiB.
|
||||
|
||||
// objectPartInfo Info of each part kept in the multipart metadata
|
||||
// file after CompleteMultipartUpload() is called.
|
||||
type objectPartInfo struct { |
||||
Name string `json:"name"` |
||||
ETag string `json:"etag"` |
||||
Size int64 `json:"size"` |
||||
} |
||||
|
||||
// A xlMetaV1 represents a metadata header mapping keys to sets of values.
|
||||
type xlMetaV1 struct { |
||||
Version string `json:"version"` |
||||
Format string `json:"format"` |
||||
Stat struct { |
||||
Size int64 `json:"size"` |
||||
ModTime time.Time `json:"modTime"` |
||||
Version int64 `json:"version"` |
||||
} `json:"stat"` |
||||
Erasure struct { |
||||
DataBlocks int `json:"data"` |
||||
ParityBlocks int `json:"parity"` |
||||
BlockSize int64 `json:"blockSize"` |
||||
Index int `json:"index"` |
||||
Distribution []int `json:"distribution"` |
||||
} `json:"erasure"` |
||||
Checksum struct { |
||||
Enable bool `json:"enable"` |
||||
} `json:"checksum"` |
||||
Minio struct { |
||||
Release string `json:"release"` |
||||
} `json:"minio"` |
||||
Meta map[string]string `json:"meta"` |
||||
Parts []objectPartInfo `json:"parts,omitempty"` |
||||
} |
||||
|
||||
// ReadFrom - read from implements io.ReaderFrom interface for
|
||||
// unmarshalling xlMetaV1.
|
||||
func (m *xlMetaV1) ReadFrom(reader io.Reader) (n int64, err error) { |
||||
var buffer bytes.Buffer |
||||
n, err = buffer.ReadFrom(reader) |
||||
if err != nil { |
||||
return 0, err |
||||
} |
||||
err = json.Unmarshal(buffer.Bytes(), m) |
||||
return n, err |
||||
} |
||||
|
||||
// WriteTo - write to implements io.WriterTo interface for marshalling xlMetaV1.
|
||||
func (m xlMetaV1) WriteTo(writer io.Writer) (n int64, err error) { |
||||
metadataBytes, err := json.Marshal(m) |
||||
if err != nil { |
||||
return 0, err |
||||
} |
||||
p, err := writer.Write(metadataBytes) |
||||
return int64(p), err |
||||
} |
||||
|
||||
// byPartName is a collection satisfying sort.Interface.
|
||||
type byPartName []objectPartInfo |
||||
|
||||
func (t byPartName) Len() int { return len(t) } |
||||
func (t byPartName) Swap(i, j int) { t[i], t[j] = t[j], t[i] } |
||||
func (t byPartName) Less(i, j int) bool { return t[i].Name < t[j].Name } |
||||
|
||||
// SearchObjectPart - searches for part name and etag, returns the
|
||||
// index if found.
|
||||
func (m xlMetaV1) SearchObjectPart(name string, etag string) int { |
||||
for i, part := range m.Parts { |
||||
if name == part.Name && etag == part.ETag { |
||||
return i |
||||
} |
||||
} |
||||
return -1 |
||||
} |
||||
|
||||
// AddObjectPart - add a new object part in order.
|
||||
func (m *xlMetaV1) AddObjectPart(name string, etag string, size int64) { |
||||
m.Parts = append(m.Parts, objectPartInfo{ |
||||
Name: name, |
||||
ETag: etag, |
||||
Size: size, |
||||
}) |
||||
sort.Sort(byPartName(m.Parts)) |
||||
} |
||||
|
||||
// getPartNumberOffset - given an offset for the whole object, return the part and offset in that part.
|
||||
func (m xlMetaV1) getPartNumberOffset(offset int64) (partNumber int, partOffset int64, err error) { |
||||
partOffset = offset |
||||
for i, part := range m.Parts { |
||||
partNumber = i |
||||
if part.Size == 0 { |
||||
return partNumber, partOffset, nil |
||||
} |
||||
if partOffset < part.Size { |
||||
return partNumber, partOffset, nil |
||||
} |
||||
partOffset -= part.Size |
||||
} |
||||
// Offset beyond the size of the object
|
||||
err = errUnexpected |
||||
return 0, 0, err |
||||
} |
||||
|
||||
// This function does the following check, suppose
|
||||
// object is "a/b/c/d", stat makes sure that objects ""a/b/c""
|
||||
// "a/b" and "a" do not exist.
|
||||
func (xl xlObjects) parentDirIsObject(bucket, parent string) bool { |
||||
var isParentDirObject func(string) bool |
||||
isParentDirObject = func(p string) bool { |
||||
if p == "." { |
||||
return false |
||||
} |
||||
if xl.isObject(bucket, p) { |
||||
// If there is already a file at prefix "p" return error.
|
||||
return true |
||||
} |
||||
// Check if there is a file as one of the parent paths.
|
||||
return isParentDirObject(path.Dir(p)) |
||||
} |
||||
return isParentDirObject(parent) |
||||
} |
||||
|
||||
func (xl xlObjects) isObject(bucket, prefix string) bool { |
||||
// Create errs and volInfo slices of storageDisks size.
|
||||
var errs = make([]error, len(xl.storageDisks)) |
||||
|
||||
// Allocate a new waitgroup.
|
||||
var wg = &sync.WaitGroup{} |
||||
for index, disk := range xl.storageDisks { |
||||
wg.Add(1) |
||||
// Stat file on all the disks in a routine.
|
||||
go func(index int, disk StorageAPI) { |
||||
defer wg.Done() |
||||
_, err := disk.StatFile(bucket, path.Join(prefix, xlMetaJSONFile)) |
||||
if err != nil { |
||||
errs[index] = err |
||||
return |
||||
} |
||||
errs[index] = nil |
||||
}(index, disk) |
||||
} |
||||
|
||||
// Wait for all the Stat operations to finish.
|
||||
wg.Wait() |
||||
|
||||
var errFileNotFoundCount int |
||||
for _, err := range errs { |
||||
if err != nil { |
||||
if err == errFileNotFound { |
||||
errFileNotFoundCount++ |
||||
// If we have errors with file not found greater than allowed read
|
||||
// quorum we return err as errFileNotFound.
|
||||
if errFileNotFoundCount > len(xl.storageDisks)-xl.readQuorum { |
||||
return false |
||||
} |
||||
continue |
||||
} |
||||
errorIf(err, "Unable to access file "+path.Join(bucket, prefix)) |
||||
return false |
||||
} |
||||
} |
||||
return true |
||||
} |
||||
|
||||
// readXLMetadata - read xl metadata.
|
||||
func readXLMetadata(disk StorageAPI, bucket, object string) (xlMeta xlMetaV1, err error) { |
||||
r, err := disk.ReadFile(bucket, path.Join(object, xlMetaJSONFile), int64(0)) |
||||
if err != nil { |
||||
return xlMetaV1{}, err |
||||
} |
||||
defer r.Close() |
||||
_, err = xlMeta.ReadFrom(r) |
||||
if err != nil { |
||||
return xlMetaV1{}, err |
||||
} |
||||
return xlMeta, nil |
||||
} |
||||
|
||||
// deleteXLJson - delete `xl.json` on all disks.
|
||||
func (xl xlObjects) deleteXLMetadata(bucket, object string) error { |
||||
return xl.deleteObject(bucket, path.Join(object, xlMetaJSONFile)) |
||||
} |
||||
|
||||
// renameXLJson - rename `xl.json` on all disks.
|
||||
func (xl xlObjects) renameXLMetadata(srcBucket, srcPrefix, dstBucket, dstPrefix string) error { |
||||
return xl.renameObject(srcBucket, path.Join(srcPrefix, xlMetaJSONFile), dstBucket, path.Join(dstPrefix, xlMetaJSONFile)) |
||||
} |
||||
|
||||
// getDiskDistribution - get disk distribution.
|
||||
func (xl xlObjects) getDiskDistribution() []int { |
||||
var distribution = make([]int, len(xl.storageDisks)) |
||||
for index := range xl.storageDisks { |
||||
distribution[index] = index + 1 |
||||
} |
||||
return distribution |
||||
} |
||||
|
||||
// writeXLJson - write `xl.json` on all disks in order.
|
||||
func (xl xlObjects) writeXLMetadata(bucket, prefix string, xlMeta xlMetaV1) error { |
||||
var wg = &sync.WaitGroup{} |
||||
var mErrs = make([]error, len(xl.storageDisks)) |
||||
|
||||
// Initialize metadata map, save all erasure related metadata.
|
||||
xlMeta.Minio.Release = minioReleaseTag |
||||
xlMeta.Erasure.DataBlocks = xl.dataBlocks |
||||
xlMeta.Erasure.ParityBlocks = xl.parityBlocks |
||||
xlMeta.Erasure.BlockSize = erasureBlockSize |
||||
xlMeta.Erasure.Distribution = xl.getDiskDistribution() |
||||
|
||||
for index, disk := range xl.storageDisks { |
||||
wg.Add(1) |
||||
go func(index int, disk StorageAPI, metadata xlMetaV1) { |
||||
defer wg.Done() |
||||
|
||||
metaJSONFile := path.Join(prefix, xlMetaJSONFile) |
||||
metaWriter, mErr := disk.CreateFile(bucket, metaJSONFile) |
||||
if mErr != nil { |
||||
mErrs[index] = mErr |
||||
return |
||||
} |
||||
|
||||
// Save the order.
|
||||
metadata.Erasure.Index = index + 1 |
||||
_, mErr = metadata.WriteTo(metaWriter) |
||||
if mErr != nil { |
||||
if mErr = safeCloseAndRemove(metaWriter); mErr != nil { |
||||
mErrs[index] = mErr |
||||
return |
||||
} |
||||
mErrs[index] = mErr |
||||
return |
||||
} |
||||
if mErr = metaWriter.Close(); mErr != nil { |
||||
if mErr = safeCloseAndRemove(metaWriter); mErr != nil { |
||||
mErrs[index] = mErr |
||||
return |
||||
} |
||||
mErrs[index] = mErr |
||||
return |
||||
} |
||||
mErrs[index] = nil |
||||
}(index, disk, xlMeta) |
||||
} |
||||
|
||||
// Wait for all the routines.
|
||||
wg.Wait() |
||||
|
||||
// FIXME: check for quorum.
|
||||
// Loop through concocted errors and return the first one.
|
||||
for _, err := range mErrs { |
||||
if err == nil { |
||||
continue |
||||
} |
||||
return err |
||||
} |
||||
return nil |
||||
} |
@ -0,0 +1,474 @@ |
||||
/* |
||||
* Minio Cloud Storage, (C) 2016 Minio, Inc. |
||||
* |
||||
* Licensed under the Apache License, Version 2.0 (the "License"); |
||||
* you may not use this file except in compliance with the License. |
||||
* You may obtain a copy of the License at |
||||
* |
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
* |
||||
* Unless required by applicable law or agreed to in writing, software |
||||
* distributed under the License is distributed on an "AS IS" BASIS, |
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
* See the License for the specific language governing permissions and |
||||
* limitations under the License. |
||||
*/ |
||||
|
||||
package main |
||||
|
||||
import ( |
||||
"bytes" |
||||
"encoding/json" |
||||
"io" |
||||
"path" |
||||
"sort" |
||||
"strings" |
||||
"sync" |
||||
"time" |
||||
|
||||
"github.com/skyrings/skyring-common/tools/uuid" |
||||
) |
||||
|
||||
// uploadInfo -
|
||||
type uploadInfo struct { |
||||
UploadID string `json:"uploadId"` |
||||
Initiated time.Time `json:"initiated"` |
||||
} |
||||
|
||||
// uploadsV1 -
|
||||
type uploadsV1 struct { |
||||
Version string `json:"version"` |
||||
Format string `json:"format"` |
||||
Uploads []uploadInfo `json:"uploadIds"` |
||||
} |
||||
|
||||
// byInitiatedTime is a collection satisfying sort.Interface.
|
||||
type byInitiatedTime []uploadInfo |
||||
|
||||
func (t byInitiatedTime) Len() int { return len(t) } |
||||
func (t byInitiatedTime) Swap(i, j int) { t[i], t[j] = t[j], t[i] } |
||||
func (t byInitiatedTime) Less(i, j int) bool { |
||||
return t[i].Initiated.After(t[j].Initiated) |
||||
} |
||||
|
||||
// AddUploadID - adds a new upload id in order of its initiated time.
|
||||
func (u *uploadsV1) AddUploadID(uploadID string, initiated time.Time) { |
||||
u.Uploads = append(u.Uploads, uploadInfo{ |
||||
UploadID: uploadID, |
||||
Initiated: initiated, |
||||
}) |
||||
sort.Sort(byInitiatedTime(u.Uploads)) |
||||
} |
||||
|
||||
func (u uploadsV1) SearchUploadID(uploadID string) int { |
||||
for i, u := range u.Uploads { |
||||
if u.UploadID == uploadID { |
||||
return i |
||||
} |
||||
} |
||||
return -1 |
||||
} |
||||
|
||||
// ReadFrom - read from implements io.ReaderFrom interface for unmarshalling uploads.
|
||||
func (u *uploadsV1) ReadFrom(reader io.Reader) (n int64, err error) { |
||||
var buffer bytes.Buffer |
||||
n, err = buffer.ReadFrom(reader) |
||||
if err != nil { |
||||
return 0, err |
||||
} |
||||
err = json.Unmarshal(buffer.Bytes(), &u) |
||||
return n, err |
||||
} |
||||
|
||||
// WriteTo - write to implements io.WriterTo interface for marshalling uploads.
|
||||
func (u uploadsV1) WriteTo(writer io.Writer) (n int64, err error) { |
||||
metadataBytes, err := json.Marshal(u) |
||||
if err != nil { |
||||
return 0, err |
||||
} |
||||
m, err := writer.Write(metadataBytes) |
||||
return int64(m), err |
||||
} |
||||
|
||||
// getUploadIDs - get saved upload id's.
|
||||
func getUploadIDs(bucket, object string, storageDisks ...StorageAPI) (uploadIDs uploadsV1, err error) { |
||||
uploadJSONPath := path.Join(mpartMetaPrefix, bucket, object, uploadsJSONFile) |
||||
var errs = make([]error, len(storageDisks)) |
||||
var uploads = make([]uploadsV1, len(storageDisks)) |
||||
var wg = &sync.WaitGroup{} |
||||
|
||||
for index, disk := range storageDisks { |
||||
wg.Add(1) |
||||
go func(index int, disk StorageAPI) { |
||||
defer wg.Done() |
||||
r, rErr := disk.ReadFile(minioMetaBucket, uploadJSONPath, int64(0)) |
||||
if rErr != nil { |
||||
errs[index] = rErr |
||||
return |
||||
} |
||||
defer r.Close() |
||||
_, rErr = uploads[index].ReadFrom(r) |
||||
if rErr != nil { |
||||
errs[index] = rErr |
||||
return |
||||
} |
||||
errs[index] = nil |
||||
}(index, disk) |
||||
} |
||||
wg.Wait() |
||||
|
||||
for _, err = range errs { |
||||
if err != nil { |
||||
return uploadsV1{}, err |
||||
} |
||||
} |
||||
|
||||
// FIXME: Do not know if it should pick the picks the first successful one and returns.
|
||||
return uploads[0], nil |
||||
} |
||||
|
||||
func updateUploadJSON(bucket, object string, uploadIDs uploadsV1, storageDisks ...StorageAPI) error { |
||||
uploadsPath := path.Join(mpartMetaPrefix, bucket, object, uploadsJSONFile) |
||||
var errs = make([]error, len(storageDisks)) |
||||
var wg = &sync.WaitGroup{} |
||||
|
||||
for index, disk := range storageDisks { |
||||
wg.Add(1) |
||||
go func(index int, disk StorageAPI) { |
||||
defer wg.Done() |
||||
w, wErr := disk.CreateFile(minioMetaBucket, uploadsPath) |
||||
if wErr != nil { |
||||
errs[index] = wErr |
||||
return |
||||
} |
||||
_, wErr = uploadIDs.WriteTo(w) |
||||
if wErr != nil { |
||||
errs[index] = wErr |
||||
return |
||||
} |
||||
if wErr = w.Close(); wErr != nil { |
||||
if clErr := safeCloseAndRemove(w); clErr != nil { |
||||
errs[index] = clErr |
||||
return |
||||
} |
||||
errs[index] = wErr |
||||
return |
||||
} |
||||
}(index, disk) |
||||
} |
||||
|
||||
wg.Wait() |
||||
|
||||
for _, err := range errs { |
||||
if err != nil { |
||||
return err |
||||
} |
||||
} |
||||
|
||||
return nil |
||||
} |
||||
|
||||
// writeUploadJSON - create `uploads.json` or update it with new uploadID.
|
||||
func writeUploadJSON(bucket, object, uploadID string, initiated time.Time, storageDisks ...StorageAPI) error { |
||||
uploadsPath := path.Join(mpartMetaPrefix, bucket, object, uploadsJSONFile) |
||||
tmpUploadsPath := path.Join(tmpMetaPrefix, bucket, object, uploadsJSONFile) |
||||
|
||||
var errs = make([]error, len(storageDisks)) |
||||
var wg = &sync.WaitGroup{} |
||||
|
||||
uploadIDs, err := getUploadIDs(bucket, object, storageDisks...) |
||||
if err != nil && err != errFileNotFound { |
||||
return err |
||||
} |
||||
uploadIDs.Version = "1" |
||||
uploadIDs.Format = "xl" |
||||
uploadIDs.AddUploadID(uploadID, initiated) |
||||
|
||||
for index, disk := range storageDisks { |
||||
wg.Add(1) |
||||
go func(index int, disk StorageAPI) { |
||||
defer wg.Done() |
||||
w, wErr := disk.CreateFile(minioMetaBucket, tmpUploadsPath) |
||||
if wErr != nil { |
||||
errs[index] = wErr |
||||
return |
||||
} |
||||
_, wErr = uploadIDs.WriteTo(w) |
||||
if wErr != nil { |
||||
errs[index] = wErr |
||||
return |
||||
} |
||||
if wErr = w.Close(); wErr != nil { |
||||
if clErr := safeCloseAndRemove(w); clErr != nil { |
||||
errs[index] = clErr |
||||
return |
||||
} |
||||
errs[index] = wErr |
||||
return |
||||
} |
||||
|
||||
_, wErr = disk.StatFile(minioMetaBucket, uploadsPath) |
||||
if wErr != nil { |
||||
if wErr == errFileNotFound { |
||||
wErr = disk.RenameFile(minioMetaBucket, tmpUploadsPath, minioMetaBucket, uploadsPath) |
||||
if wErr == nil { |
||||
return |
||||
} |
||||
} |
||||
if dErr := disk.DeleteFile(minioMetaBucket, tmpUploadsPath); dErr != nil { |
||||
errs[index] = dErr |
||||
return |
||||
} |
||||
errs[index] = wErr |
||||
return |
||||
} |
||||
}(index, disk) |
||||
} |
||||
|
||||
wg.Wait() |
||||
|
||||
for _, err = range errs { |
||||
if err != nil { |
||||
return err |
||||
} |
||||
} |
||||
|
||||
return nil |
||||
} |
||||
|
||||
// Wrapper which removes all the uploaded parts.
|
||||
func cleanupUploadedParts(bucket, object, uploadID string, storageDisks ...StorageAPI) error { |
||||
var errs = make([]error, len(storageDisks)) |
||||
var wg = &sync.WaitGroup{} |
||||
for index, disk := range storageDisks { |
||||
wg.Add(1) |
||||
go func(index int, disk StorageAPI) { |
||||
defer wg.Done() |
||||
err := cleanupDir(disk, minioMetaBucket, path.Join(mpartMetaPrefix, bucket, object, uploadID)) |
||||
if err != nil { |
||||
errs[index] = err |
||||
return |
||||
} |
||||
errs[index] = nil |
||||
}(index, disk) |
||||
} |
||||
wg.Wait() |
||||
|
||||
for _, err := range errs { |
||||
if err != nil { |
||||
return err |
||||
} |
||||
} |
||||
return nil |
||||
} |
||||
|
||||
// listUploadsInfo - list all uploads info.
|
||||
func (xl xlObjects) listUploadsInfo(prefixPath string) (uploads []uploadInfo, err error) { |
||||
disk := xl.getRandomDisk() |
||||
splitPrefixes := strings.SplitN(prefixPath, "/", 3) |
||||
uploadIDs, err := getUploadIDs(splitPrefixes[1], splitPrefixes[2], disk) |
||||
if err != nil { |
||||
if err == errFileNotFound { |
||||
return []uploadInfo{}, nil |
||||
} |
||||
return nil, err |
||||
} |
||||
uploads = uploadIDs.Uploads |
||||
return uploads, nil |
||||
} |
||||
|
||||
// listMetaBucketMultipart - list all objects at a given prefix inside minioMetaBucket.
|
||||
func (xl xlObjects) listMetaBucketMultipart(prefixPath string, markerPath string, recursive bool, maxKeys int) (objInfos []ObjectInfo, eof bool, err error) { |
||||
walker := xl.lookupTreeWalkXL(listParams{minioMetaBucket, recursive, markerPath, prefixPath}) |
||||
if walker == nil { |
||||
walker = xl.startTreeWalkXL(minioMetaBucket, prefixPath, markerPath, recursive) |
||||
} |
||||
|
||||
// newMaxKeys tracks the size of entries which are going to be
|
||||
// returned back.
|
||||
var newMaxKeys int |
||||
|
||||
// Following loop gathers and filters out special files inside minio meta volume.
|
||||
for { |
||||
walkResult, ok := <-walker.ch |
||||
if !ok { |
||||
// Closed channel.
|
||||
eof = true |
||||
break |
||||
} |
||||
// For any walk error return right away.
|
||||
if walkResult.err != nil { |
||||
// File not found or Disk not found is a valid case.
|
||||
if walkResult.err == errFileNotFound || walkResult.err == errDiskNotFound { |
||||
return nil, true, nil |
||||
} |
||||
return nil, false, toObjectErr(walkResult.err, minioMetaBucket, prefixPath) |
||||
} |
||||
objInfo := walkResult.objInfo |
||||
var uploads []uploadInfo |
||||
if objInfo.IsDir { |
||||
// List all the entries if fi.Name is a leaf directory, if
|
||||
// fi.Name is not a leaf directory then the resulting
|
||||
// entries are empty.
|
||||
uploads, err = xl.listUploadsInfo(objInfo.Name) |
||||
if err != nil { |
||||
return nil, false, err |
||||
} |
||||
} |
||||
if len(uploads) > 0 { |
||||
for _, upload := range uploads { |
||||
objInfos = append(objInfos, ObjectInfo{ |
||||
Name: path.Join(objInfo.Name, upload.UploadID), |
||||
ModTime: upload.Initiated, |
||||
}) |
||||
newMaxKeys++ |
||||
// If we have reached the maxKeys, it means we have listed
|
||||
// everything that was requested.
|
||||
if newMaxKeys == maxKeys { |
||||
break |
||||
} |
||||
} |
||||
} else { |
||||
// We reach here for a non-recursive case non-leaf entry
|
||||
// OR recursive case with fi.Name.
|
||||
if !objInfo.IsDir { // Do not skip non-recursive case directory entries.
|
||||
// Validate if 'fi.Name' is incomplete multipart.
|
||||
if !strings.HasSuffix(objInfo.Name, xlMetaJSONFile) { |
||||
continue |
||||
} |
||||
objInfo.Name = path.Dir(objInfo.Name) |
||||
} |
||||
objInfos = append(objInfos, objInfo) |
||||
newMaxKeys++ |
||||
// If we have reached the maxKeys, it means we have listed
|
||||
// everything that was requested.
|
||||
if newMaxKeys == maxKeys { |
||||
break |
||||
} |
||||
} |
||||
} |
||||
|
||||
if !eof && len(objInfos) != 0 { |
||||
// EOF has not reached, hence save the walker channel to the map so that the walker go routine
|
||||
// can continue from where it left off for the next list request.
|
||||
lastObjInfo := objInfos[len(objInfos)-1] |
||||
markerPath = lastObjInfo.Name |
||||
xl.saveTreeWalkXL(listParams{minioMetaBucket, recursive, markerPath, prefixPath}, walker) |
||||
} |
||||
|
||||
// Return entries here.
|
||||
return objInfos, eof, nil |
||||
} |
||||
|
||||
// FIXME: Currently the code sorts based on keyName/upload-id which is
|
||||
// not correct based on the S3 specs. According to s3 specs we are
|
||||
// supposed to only lexically sort keyNames and then for keyNames with
|
||||
// multiple upload ids should be sorted based on the initiated time.
|
||||
// Currently this case is not handled.
|
||||
|
||||
// listMultipartUploadsCommon - lists all multipart uploads, common
|
||||
// function for both object layers.
|
||||
func (xl xlObjects) listMultipartUploadsCommon(bucket, prefix, keyMarker, uploadIDMarker, delimiter string, maxUploads int) (ListMultipartsInfo, error) { |
||||
result := ListMultipartsInfo{} |
||||
// Verify if bucket is valid.
|
||||
if !IsValidBucketName(bucket) { |
||||
return ListMultipartsInfo{}, BucketNameInvalid{Bucket: bucket} |
||||
} |
||||
if !xl.isBucketExist(bucket) { |
||||
return ListMultipartsInfo{}, BucketNotFound{Bucket: bucket} |
||||
} |
||||
if !IsValidObjectPrefix(prefix) { |
||||
return ListMultipartsInfo{}, ObjectNameInvalid{Bucket: bucket, Object: prefix} |
||||
} |
||||
// Verify if delimiter is anything other than '/', which we do not support.
|
||||
if delimiter != "" && delimiter != slashSeparator { |
||||
return ListMultipartsInfo{}, UnsupportedDelimiter{ |
||||
Delimiter: delimiter, |
||||
} |
||||
} |
||||
// Verify if marker has prefix.
|
||||
if keyMarker != "" && !strings.HasPrefix(keyMarker, prefix) { |
||||
return ListMultipartsInfo{}, InvalidMarkerPrefixCombination{ |
||||
Marker: keyMarker, |
||||
Prefix: prefix, |
||||
} |
||||
} |
||||
if uploadIDMarker != "" { |
||||
if strings.HasSuffix(keyMarker, slashSeparator) { |
||||
return result, InvalidUploadIDKeyCombination{ |
||||
UploadIDMarker: uploadIDMarker, |
||||
KeyMarker: keyMarker, |
||||
} |
||||
} |
||||
id, err := uuid.Parse(uploadIDMarker) |
||||
if err != nil { |
||||
return result, err |
||||
} |
||||
if id.IsZero() { |
||||
return result, MalformedUploadID{ |
||||
UploadID: uploadIDMarker, |
||||
} |
||||
} |
||||
} |
||||
|
||||
recursive := true |
||||
if delimiter == slashSeparator { |
||||
recursive = false |
||||
} |
||||
|
||||
result.IsTruncated = true |
||||
result.MaxUploads = maxUploads |
||||
|
||||
// Not using path.Join() as it strips off the trailing '/'.
|
||||
multipartPrefixPath := pathJoin(mpartMetaPrefix, pathJoin(bucket, prefix)) |
||||
if prefix == "" { |
||||
// Should have a trailing "/" if prefix is ""
|
||||
// For ex. multipartPrefixPath should be "multipart/bucket/" if prefix is ""
|
||||
multipartPrefixPath += slashSeparator |
||||
} |
||||
multipartMarkerPath := "" |
||||
if keyMarker != "" { |
||||
keyMarkerPath := pathJoin(pathJoin(bucket, keyMarker), uploadIDMarker) |
||||
multipartMarkerPath = pathJoin(mpartMetaPrefix, keyMarkerPath) |
||||
} |
||||
|
||||
// List all the multipart files at prefixPath, starting with marker keyMarkerPath.
|
||||
objInfos, eof, err := xl.listMetaBucketMultipart(multipartPrefixPath, multipartMarkerPath, recursive, maxUploads) |
||||
if err != nil { |
||||
return ListMultipartsInfo{}, err |
||||
} |
||||
|
||||
// Loop through all the received files fill in the multiparts result.
|
||||
for _, objInfo := range objInfos { |
||||
var objectName string |
||||
var uploadID string |
||||
if objInfo.IsDir { |
||||
// All directory entries are common prefixes.
|
||||
uploadID = "" // Upload ids are empty for CommonPrefixes.
|
||||
objectName = strings.TrimPrefix(objInfo.Name, retainSlash(pathJoin(mpartMetaPrefix, bucket))) |
||||
result.CommonPrefixes = append(result.CommonPrefixes, objectName) |
||||
} else { |
||||
uploadID = path.Base(objInfo.Name) |
||||
objectName = strings.TrimPrefix(path.Dir(objInfo.Name), retainSlash(pathJoin(mpartMetaPrefix, bucket))) |
||||
result.Uploads = append(result.Uploads, uploadMetadata{ |
||||
Object: objectName, |
||||
UploadID: uploadID, |
||||
Initiated: objInfo.ModTime, |
||||
}) |
||||
} |
||||
result.NextKeyMarker = objectName |
||||
result.NextUploadIDMarker = uploadID |
||||
} |
||||
result.IsTruncated = !eof |
||||
if !result.IsTruncated { |
||||
result.NextKeyMarker = "" |
||||
result.NextUploadIDMarker = "" |
||||
} |
||||
return result, nil |
||||
} |
||||
|
||||
// isUploadIDExists - verify if a given uploadID exists and is valid.
|
||||
func (xl xlObjects) isUploadIDExists(bucket, object, uploadID string) bool { |
||||
uploadIDPath := path.Join(mpartMetaPrefix, bucket, object, uploadID) |
||||
return xl.isObject(minioMetaBucket, uploadIDPath) |
||||
} |
@ -0,0 +1,432 @@ |
||||
/* |
||||
* Minio Cloud Storage, (C) 2016 Minio, Inc. |
||||
* |
||||
* Licensed under the Apache License, Version 2.0 (the "License"); |
||||
* you may not use this file except in compliance with the License. |
||||
* You may obtain a copy of the License at |
||||
* |
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
* |
||||
* Unless required by applicable law or agreed to in writing, software |
||||
* distributed under the License is distributed on an "AS IS" BASIS, |
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
* See the License for the specific language governing permissions and |
||||
* limitations under the License. |
||||
*/ |
||||
|
||||
package main |
||||
|
||||
import ( |
||||
"crypto/md5" |
||||
"encoding/hex" |
||||
"fmt" |
||||
"io" |
||||
"io/ioutil" |
||||
"path" |
||||
"strconv" |
||||
"time" |
||||
) |
||||
|
||||
// ListMultipartUploads - list multipart uploads.
|
||||
func (xl xlObjects) ListMultipartUploads(bucket, prefix, keyMarker, uploadIDMarker, delimiter string, maxUploads int) (ListMultipartsInfo, error) { |
||||
return xl.listMultipartUploadsCommon(bucket, prefix, keyMarker, uploadIDMarker, delimiter, maxUploads) |
||||
} |
||||
|
||||
/// Common multipart object layer functions.
|
||||
|
||||
// newMultipartUploadCommon - initialize a new multipart, is a common function for both object layers.
|
||||
func (xl xlObjects) newMultipartUploadCommon(bucket string, object string, meta map[string]string) (uploadID string, err error) { |
||||
// Verify if bucket name is valid.
|
||||
if !IsValidBucketName(bucket) { |
||||
return "", BucketNameInvalid{Bucket: bucket} |
||||
} |
||||
// Verify whether the bucket exists.
|
||||
if !xl.isBucketExist(bucket) { |
||||
return "", BucketNotFound{Bucket: bucket} |
||||
} |
||||
// Verify if object name is valid.
|
||||
if !IsValidObjectName(object) { |
||||
return "", ObjectNameInvalid{Bucket: bucket, Object: object} |
||||
} |
||||
// No metadata is set, allocate a new one.
|
||||
if meta == nil { |
||||
meta = make(map[string]string) |
||||
} |
||||
|
||||
xlMeta := xlMetaV1{} |
||||
xlMeta.Format = "xl" |
||||
xlMeta.Version = "1" |
||||
// If not set default to "application/octet-stream"
|
||||
if meta["content-type"] == "" { |
||||
meta["content-type"] = "application/octet-stream" |
||||
} |
||||
xlMeta.Meta = meta |
||||
|
||||
// This lock needs to be held for any changes to the directory contents of ".minio/multipart/object/"
|
||||
nsMutex.Lock(minioMetaBucket, pathJoin(mpartMetaPrefix, bucket, object)) |
||||
defer nsMutex.Unlock(minioMetaBucket, pathJoin(mpartMetaPrefix, bucket, object)) |
||||
|
||||
uploadID = getUUID() |
||||
initiated := time.Now().UTC() |
||||
// Create 'uploads.json'
|
||||
if err = writeUploadJSON(bucket, object, uploadID, initiated, xl.storageDisks...); err != nil { |
||||
return "", err |
||||
} |
||||
uploadIDPath := path.Join(mpartMetaPrefix, bucket, object, uploadID) |
||||
tempUploadIDPath := path.Join(tmpMetaPrefix, bucket, object, uploadID) |
||||
if err = xl.writeXLMetadata(minioMetaBucket, tempUploadIDPath, xlMeta); err != nil { |
||||
return "", toObjectErr(err, minioMetaBucket, tempUploadIDPath) |
||||
} |
||||
if err = xl.renameXLMetadata(minioMetaBucket, tempUploadIDPath, minioMetaBucket, uploadIDPath); err != nil { |
||||
if dErr := xl.deleteXLMetadata(minioMetaBucket, tempUploadIDPath); dErr != nil { |
||||
return "", toObjectErr(dErr, minioMetaBucket, tempUploadIDPath) |
||||
} |
||||
return "", toObjectErr(err, minioMetaBucket, uploadIDPath) |
||||
} |
||||
// Return success.
|
||||
return uploadID, nil |
||||
} |
||||
|
||||
// NewMultipartUpload - initialize a new multipart upload, returns a unique id.
|
||||
func (xl xlObjects) NewMultipartUpload(bucket, object string, meta map[string]string) (string, error) { |
||||
return xl.newMultipartUploadCommon(bucket, object, meta) |
||||
} |
||||
|
||||
// putObjectPartCommon - put object part.
|
||||
func (xl xlObjects) putObjectPartCommon(bucket string, object string, uploadID string, partID int, size int64, data io.Reader, md5Hex string) (string, error) { |
||||
// Verify if bucket is valid.
|
||||
if !IsValidBucketName(bucket) { |
||||
return "", BucketNameInvalid{Bucket: bucket} |
||||
} |
||||
// Verify whether the bucket exists.
|
||||
if !xl.isBucketExist(bucket) { |
||||
return "", BucketNotFound{Bucket: bucket} |
||||
} |
||||
if !IsValidObjectName(object) { |
||||
return "", ObjectNameInvalid{Bucket: bucket, Object: object} |
||||
} |
||||
if !xl.isUploadIDExists(bucket, object, uploadID) { |
||||
return "", InvalidUploadID{UploadID: uploadID} |
||||
} |
||||
// Hold read lock on the uploadID so that no one aborts it.
|
||||
nsMutex.RLock(minioMetaBucket, pathJoin(mpartMetaPrefix, bucket, object, uploadID)) |
||||
defer nsMutex.RUnlock(minioMetaBucket, pathJoin(mpartMetaPrefix, bucket, object, uploadID)) |
||||
|
||||
// Hold write lock on the part so that there is no parallel upload on the part.
|
||||
nsMutex.Lock(minioMetaBucket, pathJoin(mpartMetaPrefix, bucket, object, uploadID, strconv.Itoa(partID))) |
||||
defer nsMutex.Unlock(minioMetaBucket, pathJoin(mpartMetaPrefix, bucket, object, uploadID, strconv.Itoa(partID))) |
||||
|
||||
partSuffix := fmt.Sprintf("object%d", partID) |
||||
tmpPartPath := path.Join(tmpMetaPrefix, bucket, object, uploadID, partSuffix) |
||||
fileWriter, err := xl.erasureDisk.CreateFile(minioMetaBucket, tmpPartPath) |
||||
if err != nil { |
||||
return "", toObjectErr(err, bucket, object) |
||||
} |
||||
|
||||
// Initialize md5 writer.
|
||||
md5Writer := md5.New() |
||||
|
||||
// Instantiate a new multi writer.
|
||||
multiWriter := io.MultiWriter(md5Writer, fileWriter) |
||||
|
||||
// Instantiate checksum hashers and create a multiwriter.
|
||||
if size > 0 { |
||||
if _, err = io.CopyN(multiWriter, data, size); err != nil { |
||||
if clErr := safeCloseAndRemove(fileWriter); clErr != nil { |
||||
return "", toObjectErr(clErr, bucket, object) |
||||
} |
||||
return "", toObjectErr(err, bucket, object) |
||||
} |
||||
// Reader shouldn't have more data what mentioned in size argument.
|
||||
// reading one more byte from the reader to validate it.
|
||||
// expected to fail, success validates existence of more data in the reader.
|
||||
if _, err = io.CopyN(ioutil.Discard, data, 1); err == nil { |
||||
if clErr := safeCloseAndRemove(fileWriter); clErr != nil { |
||||
return "", toObjectErr(clErr, bucket, object) |
||||
} |
||||
return "", UnExpectedDataSize{Size: int(size)} |
||||
} |
||||
} else { |
||||
var n int64 |
||||
if n, err = io.Copy(multiWriter, data); err != nil { |
||||
if clErr := safeCloseAndRemove(fileWriter); clErr != nil { |
||||
return "", toObjectErr(clErr, bucket, object) |
||||
} |
||||
return "", toObjectErr(err, bucket, object) |
||||
} |
||||
size = n |
||||
} |
||||
|
||||
newMD5Hex := hex.EncodeToString(md5Writer.Sum(nil)) |
||||
if md5Hex != "" { |
||||
if newMD5Hex != md5Hex { |
||||
if clErr := safeCloseAndRemove(fileWriter); clErr != nil { |
||||
return "", toObjectErr(clErr, bucket, object) |
||||
} |
||||
return "", BadDigest{md5Hex, newMD5Hex} |
||||
} |
||||
} |
||||
err = fileWriter.Close() |
||||
if err != nil { |
||||
if clErr := safeCloseAndRemove(fileWriter); clErr != nil { |
||||
return "", toObjectErr(clErr, bucket, object) |
||||
} |
||||
return "", err |
||||
} |
||||
|
||||
uploadIDPath := path.Join(mpartMetaPrefix, bucket, object, uploadID) |
||||
xlMeta, err := readXLMetadata(xl.getRandomDisk(), minioMetaBucket, uploadIDPath) |
||||
if err != nil { |
||||
return "", toObjectErr(err, minioMetaBucket, uploadIDPath) |
||||
} |
||||
xlMeta.AddObjectPart(partSuffix, newMD5Hex, size) |
||||
|
||||
partPath := path.Join(mpartMetaPrefix, bucket, object, uploadID, partSuffix) |
||||
err = xl.renameObject(minioMetaBucket, tmpPartPath, minioMetaBucket, partPath) |
||||
if err != nil { |
||||
if dErr := xl.deleteObject(minioMetaBucket, tmpPartPath); dErr != nil { |
||||
return "", toObjectErr(dErr, minioMetaBucket, tmpPartPath) |
||||
} |
||||
return "", toObjectErr(err, minioMetaBucket, partPath) |
||||
} |
||||
if err = xl.writeXLMetadata(minioMetaBucket, path.Join(mpartMetaPrefix, bucket, object, uploadID), xlMeta); err != nil { |
||||
return "", toObjectErr(err, minioMetaBucket, path.Join(mpartMetaPrefix, bucket, object, uploadID)) |
||||
} |
||||
return newMD5Hex, nil |
||||
} |
||||
|
||||
// PutObjectPart - writes the multipart upload chunks.
|
||||
func (xl xlObjects) PutObjectPart(bucket, object, uploadID string, partID int, size int64, data io.Reader, md5Hex string) (string, error) { |
||||
return xl.putObjectPartCommon(bucket, object, uploadID, partID, size, data, md5Hex) |
||||
} |
||||
|
||||
// ListObjectParts - list object parts, common function across both object layers.
|
||||
func (xl xlObjects) listObjectPartsCommon(bucket, object, uploadID string, partNumberMarker, maxParts int) (ListPartsInfo, error) { |
||||
// Verify if bucket is valid.
|
||||
if !IsValidBucketName(bucket) { |
||||
return ListPartsInfo{}, BucketNameInvalid{Bucket: bucket} |
||||
} |
||||
// Verify whether the bucket exists.
|
||||
if !xl.isBucketExist(bucket) { |
||||
return ListPartsInfo{}, BucketNotFound{Bucket: bucket} |
||||
} |
||||
if !IsValidObjectName(object) { |
||||
return ListPartsInfo{}, ObjectNameInvalid{Bucket: bucket, Object: object} |
||||
} |
||||
if !xl.isUploadIDExists(bucket, object, uploadID) { |
||||
return ListPartsInfo{}, InvalidUploadID{UploadID: uploadID} |
||||
} |
||||
// Hold lock so that there is no competing abort-multipart-upload or complete-multipart-upload.
|
||||
nsMutex.Lock(minioMetaBucket, pathJoin(mpartMetaPrefix, bucket, object, uploadID)) |
||||
defer nsMutex.Unlock(minioMetaBucket, pathJoin(mpartMetaPrefix, bucket, object, uploadID)) |
||||
result := ListPartsInfo{} |
||||
|
||||
disk := xl.getRandomDisk() // Pick a random disk and read `xl.json` from there.
|
||||
uploadIDPath := path.Join(mpartMetaPrefix, bucket, object, uploadID) |
||||
xlMeta, err := readXLMetadata(disk, minioMetaBucket, uploadIDPath) |
||||
if err != nil { |
||||
return ListPartsInfo{}, toObjectErr(err, minioMetaBucket, uploadIDPath) |
||||
} |
||||
// Only parts with higher part numbers will be listed.
|
||||
parts := xlMeta.Parts[partNumberMarker:] |
||||
count := maxParts |
||||
for i, part := range parts { |
||||
var fi FileInfo |
||||
partNamePath := path.Join(mpartMetaPrefix, bucket, object, uploadID, part.Name) |
||||
fi, err = disk.StatFile(minioMetaBucket, partNamePath) |
||||
if err != nil { |
||||
return ListPartsInfo{}, toObjectErr(err, minioMetaBucket, partNamePath) |
||||
} |
||||
partNum := i + partNumberMarker + 1 |
||||
result.Parts = append(result.Parts, partInfo{ |
||||
PartNumber: partNum, |
||||
ETag: part.ETag, |
||||
LastModified: fi.ModTime, |
||||
Size: fi.Size, |
||||
}) |
||||
count-- |
||||
if count == 0 { |
||||
break |
||||
} |
||||
} |
||||
// If listed entries are more than maxParts, we set IsTruncated as true.
|
||||
if len(parts) > len(result.Parts) { |
||||
result.IsTruncated = true |
||||
// Make sure to fill next part number marker if IsTruncated is
|
||||
// true for subsequent listing.
|
||||
nextPartNumberMarker := result.Parts[len(result.Parts)-1].PartNumber |
||||
result.NextPartNumberMarker = nextPartNumberMarker |
||||
} |
||||
result.Bucket = bucket |
||||
result.Object = object |
||||
result.UploadID = uploadID |
||||
result.MaxParts = maxParts |
||||
return result, nil |
||||
} |
||||
|
||||
// ListObjectParts - list object parts.
|
||||
func (xl xlObjects) ListObjectParts(bucket, object, uploadID string, partNumberMarker, maxParts int) (ListPartsInfo, error) { |
||||
return xl.listObjectPartsCommon(bucket, object, uploadID, partNumberMarker, maxParts) |
||||
} |
||||
|
||||
func (xl xlObjects) CompleteMultipartUpload(bucket string, object string, uploadID string, parts []completePart) (string, error) { |
||||
// Verify if bucket is valid.
|
||||
if !IsValidBucketName(bucket) { |
||||
return "", BucketNameInvalid{Bucket: bucket} |
||||
} |
||||
// Verify whether the bucket exists.
|
||||
if !xl.isBucketExist(bucket) { |
||||
return "", BucketNotFound{Bucket: bucket} |
||||
} |
||||
if !IsValidObjectName(object) { |
||||
return "", ObjectNameInvalid{ |
||||
Bucket: bucket, |
||||
Object: object, |
||||
} |
||||
} |
||||
if !xl.isUploadIDExists(bucket, object, uploadID) { |
||||
return "", InvalidUploadID{UploadID: uploadID} |
||||
} |
||||
// Hold lock so that
|
||||
// 1) no one aborts this multipart upload
|
||||
// 2) no one does a parallel complete-multipart-upload on this multipart upload
|
||||
nsMutex.Lock(minioMetaBucket, pathJoin(mpartMetaPrefix, bucket, object, uploadID)) |
||||
defer nsMutex.Unlock(minioMetaBucket, pathJoin(mpartMetaPrefix, bucket, object, uploadID)) |
||||
|
||||
// Calculate s3 compatible md5sum for complete multipart.
|
||||
s3MD5, err := completeMultipartMD5(parts...) |
||||
if err != nil { |
||||
return "", err |
||||
} |
||||
|
||||
uploadIDPath := pathJoin(mpartMetaPrefix, bucket, object, uploadID) |
||||
xlMeta, err := readXLMetadata(xl.getRandomDisk(), minioMetaBucket, uploadIDPath) |
||||
if err != nil { |
||||
return "", err |
||||
} |
||||
|
||||
var objectSize int64 |
||||
// Loop through all parts, validate them and then commit to disk.
|
||||
for i, part := range parts { |
||||
// Construct part suffix.
|
||||
partSuffix := fmt.Sprintf("object%d", part.PartNumber) |
||||
if xlMeta.SearchObjectPart(partSuffix, part.ETag) == -1 { |
||||
return "", InvalidPart{} |
||||
} |
||||
// All parts except the last part has to be atleast 5MB.
|
||||
if (i < len(parts)-1) && !isMinAllowedPartSize(xlMeta.Parts[i].Size) { |
||||
return "", PartTooSmall{} |
||||
} |
||||
objectSize += xlMeta.Parts[i].Size |
||||
} |
||||
|
||||
// Check if an object is present as one of the parent dir.
|
||||
if xl.parentDirIsObject(bucket, path.Dir(object)) { |
||||
return "", toObjectErr(errFileAccessDenied, bucket, object) |
||||
} |
||||
|
||||
// Save the final object size and modtime.
|
||||
xlMeta.Stat.Size = objectSize |
||||
xlMeta.Stat.ModTime = time.Now().UTC() |
||||
|
||||
// Save successfully calculated md5sum.
|
||||
xlMeta.Meta["md5Sum"] = s3MD5 |
||||
if err = xl.writeXLMetadata(minioMetaBucket, uploadIDPath, xlMeta); err != nil { |
||||
return "", toObjectErr(err, minioMetaBucket, uploadIDPath) |
||||
} |
||||
|
||||
// Hold write lock on the destination before rename
|
||||
nsMutex.Lock(bucket, object) |
||||
defer nsMutex.Unlock(bucket, object) |
||||
|
||||
// Delete if an object already exists.
|
||||
// FIXME: rename it to tmp file and delete only after
|
||||
// the newly uploaded file is renamed from tmp location to
|
||||
// the original location. Verify if the object is a multipart object.
|
||||
err = xl.deleteObject(bucket, object) |
||||
if err != nil { |
||||
return "", toObjectErr(err, bucket, object) |
||||
} |
||||
|
||||
if err = xl.renameObject(minioMetaBucket, uploadIDPath, bucket, object); err != nil { |
||||
return "", toObjectErr(err, bucket, object) |
||||
} |
||||
|
||||
// Hold the lock so that two parallel complete-multipart-uploads do no
|
||||
// leave a stale uploads.json behind.
|
||||
nsMutex.Lock(minioMetaBucket, pathJoin(mpartMetaPrefix, bucket, object)) |
||||
defer nsMutex.Unlock(minioMetaBucket, pathJoin(mpartMetaPrefix, bucket, object)) |
||||
|
||||
// Validate if there are other incomplete upload-id's present for
|
||||
// the object, if yes do not attempt to delete 'uploads.json'.
|
||||
uploadIDs, err := getUploadIDs(bucket, object, xl.storageDisks...) |
||||
if err == nil { |
||||
uploadIDIdx := uploadIDs.SearchUploadID(uploadID) |
||||
if uploadIDIdx != -1 { |
||||
uploadIDs.Uploads = append(uploadIDs.Uploads[:uploadIDIdx], uploadIDs.Uploads[uploadIDIdx+1:]...) |
||||
} |
||||
if len(uploadIDs.Uploads) > 0 { |
||||
if err = updateUploadJSON(bucket, object, uploadIDs, xl.storageDisks...); err != nil { |
||||
return "", err |
||||
} |
||||
return s3MD5, nil |
||||
} |
||||
} |
||||
|
||||
err = xl.deleteObject(minioMetaBucket, path.Join(mpartMetaPrefix, bucket, object)) |
||||
if err != nil { |
||||
return "", toObjectErr(err, minioMetaBucket, path.Join(mpartMetaPrefix, bucket, object)) |
||||
} |
||||
|
||||
// Return md5sum.
|
||||
return s3MD5, nil |
||||
} |
||||
|
||||
// abortMultipartUploadCommon - aborts a multipart upload, common
|
||||
// function used by both object layers.
|
||||
func (xl xlObjects) abortMultipartUploadCommon(bucket, object, uploadID string) error { |
||||
// Verify if bucket is valid.
|
||||
if !IsValidBucketName(bucket) { |
||||
return BucketNameInvalid{Bucket: bucket} |
||||
} |
||||
if !xl.isBucketExist(bucket) { |
||||
return BucketNotFound{Bucket: bucket} |
||||
} |
||||
if !IsValidObjectName(object) { |
||||
return ObjectNameInvalid{Bucket: bucket, Object: object} |
||||
} |
||||
if !xl.isUploadIDExists(bucket, object, uploadID) { |
||||
return InvalidUploadID{UploadID: uploadID} |
||||
} |
||||
|
||||
// Hold lock so that there is no competing complete-multipart-upload or put-object-part.
|
||||
nsMutex.Lock(minioMetaBucket, pathJoin(mpartMetaPrefix, bucket, object, uploadID)) |
||||
defer nsMutex.Unlock(minioMetaBucket, pathJoin(mpartMetaPrefix, bucket, object, uploadID)) |
||||
|
||||
// Cleanup all uploaded parts.
|
||||
if err := cleanupUploadedParts(bucket, object, uploadID, xl.storageDisks...); err != nil { |
||||
return err |
||||
} |
||||
|
||||
// Validate if there are other incomplete upload-id's present for
|
||||
// the object, if yes do not attempt to delete 'uploads.json'.
|
||||
uploadIDs, err := getUploadIDs(bucket, object, xl.storageDisks...) |
||||
if err == nil { |
||||
uploadIDIdx := uploadIDs.SearchUploadID(uploadID) |
||||
if uploadIDIdx != -1 { |
||||
uploadIDs.Uploads = append(uploadIDs.Uploads[:uploadIDIdx], uploadIDs.Uploads[uploadIDIdx+1:]...) |
||||
} |
||||
if len(uploadIDs.Uploads) > 0 { |
||||
return nil |
||||
} |
||||
} |
||||
if err = xl.deleteObject(minioMetaBucket, path.Join(mpartMetaPrefix, bucket, object)); err != nil { |
||||
return toObjectErr(err, minioMetaBucket, path.Join(mpartMetaPrefix, bucket, object)) |
||||
} |
||||
return nil |
||||
} |
||||
|
||||
// AbortMultipartUpload - aborts a multipart upload.
|
||||
func (xl xlObjects) AbortMultipartUpload(bucket, object, uploadID string) error { |
||||
return xl.abortMultipartUploadCommon(bucket, object, uploadID) |
||||
} |
@ -0,0 +1,357 @@ |
||||
package main |
||||
|
||||
import ( |
||||
"crypto/md5" |
||||
"encoding/hex" |
||||
"io" |
||||
"path" |
||||
"path/filepath" |
||||
"strings" |
||||
"sync" |
||||
"time" |
||||
|
||||
"github.com/minio/minio/pkg/mimedb" |
||||
) |
||||
|
||||
/// Object Operations
|
||||
|
||||
// GetObject - get an object.
|
||||
func (xl xlObjects) GetObject(bucket, object string, startOffset int64) (io.ReadCloser, error) { |
||||
// Verify if bucket is valid.
|
||||
if !IsValidBucketName(bucket) { |
||||
return nil, BucketNameInvalid{Bucket: bucket} |
||||
} |
||||
// Verify if object is valid.
|
||||
if !IsValidObjectName(object) { |
||||
return nil, ObjectNameInvalid{Bucket: bucket, Object: object} |
||||
} |
||||
nsMutex.RLock(bucket, object) |
||||
defer nsMutex.RUnlock(bucket, object) |
||||
fileReader, fileWriter := io.Pipe() |
||||
xlMeta, err := readXLMetadata(xl.getRandomDisk(), bucket, object) |
||||
if err != nil { |
||||
return nil, toObjectErr(err, bucket, object) |
||||
} |
||||
partIndex, offset, err := xlMeta.getPartNumberOffset(startOffset) |
||||
if err != nil { |
||||
return nil, toObjectErr(err, bucket, object) |
||||
} |
||||
|
||||
// Hold a read lock once more which can be released after the following go-routine ends.
|
||||
// We hold RLock once more because the current function would return before the go routine below
|
||||
// executes and hence releasing the read lock (because of defer'ed nsMutex.RUnlock() call).
|
||||
nsMutex.RLock(bucket, object) |
||||
go func() { |
||||
defer nsMutex.RUnlock(bucket, object) |
||||
for ; partIndex < len(xlMeta.Parts); partIndex++ { |
||||
part := xlMeta.Parts[partIndex] |
||||
r, err := xl.erasureDisk.ReadFile(bucket, pathJoin(object, part.Name), offset) |
||||
if err != nil { |
||||
fileWriter.CloseWithError(err) |
||||
return |
||||
} |
||||
// Reset offset to 0 as it would be non-0 only for the first loop if startOffset is non-0.
|
||||
offset = 0 |
||||
if _, err = io.Copy(fileWriter, r); err != nil { |
||||
switch reader := r.(type) { |
||||
case *io.PipeReader: |
||||
reader.CloseWithError(err) |
||||
case io.ReadCloser: |
||||
reader.Close() |
||||
} |
||||
fileWriter.CloseWithError(err) |
||||
return |
||||
} |
||||
// Close the readerCloser that reads multiparts of an object from the xl storage layer.
|
||||
// Not closing leaks underlying file descriptors.
|
||||
r.Close() |
||||
} |
||||
fileWriter.Close() |
||||
}() |
||||
return fileReader, nil |
||||
} |
||||
|
||||
// GetObjectInfo - get object info.
|
||||
func (xl xlObjects) GetObjectInfo(bucket, object string) (ObjectInfo, error) { |
||||
// Verify if bucket is valid.
|
||||
if !IsValidBucketName(bucket) { |
||||
return ObjectInfo{}, BucketNameInvalid{Bucket: bucket} |
||||
} |
||||
// Verify if object is valid.
|
||||
if !IsValidObjectName(object) { |
||||
return ObjectInfo{}, ObjectNameInvalid{Bucket: bucket, Object: object} |
||||
} |
||||
nsMutex.RLock(bucket, object) |
||||
defer nsMutex.RUnlock(bucket, object) |
||||
info, err := xl.getObjectInfo(bucket, object) |
||||
if err != nil { |
||||
return ObjectInfo{}, toObjectErr(err, bucket, object) |
||||
} |
||||
return info, nil |
||||
} |
||||
|
||||
func (xl xlObjects) getObjectInfo(bucket, object string) (objInfo ObjectInfo, err error) { |
||||
// Count for errors encountered.
|
||||
var xlJSONErrCount = 0 |
||||
|
||||
// Loop through and return the first success entry based on the
|
||||
// selected random disk.
|
||||
for xlJSONErrCount < len(xl.storageDisks) { |
||||
// Choose a random disk on each attempt, do not hit the same disk all the time.
|
||||
disk := xl.getRandomDisk() // Pick a random disk.
|
||||
var xlMeta xlMetaV1 |
||||
xlMeta, err = readXLMetadata(disk, bucket, object) |
||||
if err == nil { |
||||
objInfo = ObjectInfo{} |
||||
objInfo.IsDir = false |
||||
objInfo.Bucket = bucket |
||||
objInfo.Name = object |
||||
objInfo.Size = xlMeta.Stat.Size |
||||
objInfo.ModTime = xlMeta.Stat.ModTime |
||||
objInfo.MD5Sum = xlMeta.Meta["md5Sum"] |
||||
objInfo.ContentType = xlMeta.Meta["content-type"] |
||||
objInfo.ContentEncoding = xlMeta.Meta["content-encoding"] |
||||
return objInfo, nil |
||||
} |
||||
xlJSONErrCount++ // Update error count.
|
||||
} |
||||
|
||||
// Return error at the end.
|
||||
return ObjectInfo{}, err |
||||
} |
||||
|
||||
// renameObject - renaming all source objects to destination object across all disks.
|
||||
func (xl xlObjects) renameObject(srcBucket, srcObject, dstBucket, dstObject string) error { |
||||
// Initialize sync waitgroup.
|
||||
var wg = &sync.WaitGroup{} |
||||
|
||||
// Initialize list of errors.
|
||||
var errs = make([]error, len(xl.storageDisks)) |
||||
|
||||
// Rename file on all underlying storage disks.
|
||||
for index, disk := range xl.storageDisks { |
||||
// Append "/" as srcObject and dstObject are either leaf-dirs or non-leaf-dris.
|
||||
// If srcObject is an object instead of prefix we just rename the leaf-dir and
|
||||
// not rename the part and metadata files separately.
|
||||
wg.Add(1) |
||||
go func(index int, disk StorageAPI) { |
||||
defer wg.Done() |
||||
err := disk.RenameFile(srcBucket, retainSlash(srcObject), dstBucket, retainSlash(dstObject)) |
||||
if err != nil { |
||||
errs[index] = err |
||||
} |
||||
errs[index] = nil |
||||
}(index, disk) |
||||
} |
||||
|
||||
// Wait for all RenameFile to finish.
|
||||
wg.Wait() |
||||
|
||||
// Gather err count.
|
||||
var errCount = 0 |
||||
for _, err := range errs { |
||||
if err == nil { |
||||
continue |
||||
} |
||||
errCount++ |
||||
} |
||||
// We can safely allow RenameFile errors up to len(xl.storageDisks) - xl.writeQuorum
|
||||
// otherwise return failure. Cleanup successful renames.
|
||||
if errCount > len(xl.storageDisks)-xl.writeQuorum { |
||||
// Special condition if readQuorum exists, then return success.
|
||||
if errCount <= len(xl.storageDisks)-xl.readQuorum { |
||||
return nil |
||||
} |
||||
xl.deleteObject(srcBucket, srcObject) |
||||
return errWriteQuorum |
||||
} |
||||
return nil |
||||
} |
||||
|
||||
// PutObject - create an object.
|
||||
func (xl xlObjects) PutObject(bucket string, object string, size int64, data io.Reader, metadata map[string]string) (string, error) { |
||||
// Verify if bucket is valid.
|
||||
if !IsValidBucketName(bucket) { |
||||
return "", BucketNameInvalid{Bucket: bucket} |
||||
} |
||||
// Verify bucket exists.
|
||||
if !xl.isBucketExist(bucket) { |
||||
return "", BucketNotFound{Bucket: bucket} |
||||
} |
||||
if !IsValidObjectName(object) { |
||||
return "", ObjectNameInvalid{ |
||||
Bucket: bucket, |
||||
Object: object, |
||||
} |
||||
} |
||||
// No metadata is set, allocate a new one.
|
||||
if metadata == nil { |
||||
metadata = make(map[string]string) |
||||
} |
||||
nsMutex.Lock(bucket, object) |
||||
defer nsMutex.Unlock(bucket, object) |
||||
|
||||
tempErasureObj := path.Join(tmpMetaPrefix, bucket, object, "object1") |
||||
tempObj := path.Join(tmpMetaPrefix, bucket, object) |
||||
fileWriter, err := xl.erasureDisk.CreateFile(minioMetaBucket, tempErasureObj) |
||||
if err != nil { |
||||
return "", toObjectErr(err, bucket, object) |
||||
} |
||||
|
||||
// Initialize md5 writer.
|
||||
md5Writer := md5.New() |
||||
|
||||
// Instantiate a new multi writer.
|
||||
multiWriter := io.MultiWriter(md5Writer, fileWriter) |
||||
|
||||
// Instantiate checksum hashers and create a multiwriter.
|
||||
if size > 0 { |
||||
if _, err = io.CopyN(multiWriter, data, size); err != nil { |
||||
if clErr := safeCloseAndRemove(fileWriter); clErr != nil { |
||||
return "", toObjectErr(clErr, bucket, object) |
||||
} |
||||
return "", toObjectErr(err, bucket, object) |
||||
} |
||||
} else { |
||||
if _, err = io.Copy(multiWriter, data); err != nil { |
||||
if clErr := safeCloseAndRemove(fileWriter); clErr != nil { |
||||
return "", toObjectErr(clErr, bucket, object) |
||||
} |
||||
return "", toObjectErr(err, bucket, object) |
||||
} |
||||
} |
||||
|
||||
// Save additional erasureMetadata.
|
||||
modTime := time.Now().UTC() |
||||
|
||||
newMD5Hex := hex.EncodeToString(md5Writer.Sum(nil)) |
||||
// Update the md5sum if not set with the newly calculated one.
|
||||
if len(metadata["md5Sum"]) == 0 { |
||||
metadata["md5Sum"] = newMD5Hex |
||||
} |
||||
// If not set default to "application/octet-stream"
|
||||
if metadata["content-type"] == "" { |
||||
contentType := "application/octet-stream" |
||||
if objectExt := filepath.Ext(object); objectExt != "" { |
||||
content, ok := mimedb.DB[strings.ToLower(strings.TrimPrefix(objectExt, "."))] |
||||
if ok { |
||||
contentType = content.ContentType |
||||
} |
||||
} |
||||
metadata["content-type"] = contentType |
||||
} |
||||
|
||||
// md5Hex representation.
|
||||
md5Hex := metadata["md5Sum"] |
||||
if md5Hex != "" { |
||||
if newMD5Hex != md5Hex { |
||||
if err = safeCloseAndRemove(fileWriter); err != nil { |
||||
return "", toObjectErr(err, bucket, object) |
||||
} |
||||
return "", BadDigest{md5Hex, newMD5Hex} |
||||
} |
||||
} |
||||
|
||||
err = fileWriter.Close() |
||||
if err != nil { |
||||
if clErr := safeCloseAndRemove(fileWriter); clErr != nil { |
||||
return "", toObjectErr(clErr, bucket, object) |
||||
} |
||||
return "", toObjectErr(err, bucket, object) |
||||
} |
||||
|
||||
// Check if an object is present as one of the parent dir.
|
||||
if xl.parentDirIsObject(bucket, path.Dir(object)) { |
||||
return "", toObjectErr(errFileAccessDenied, bucket, object) |
||||
} |
||||
|
||||
// Delete if an object already exists.
|
||||
err = xl.deleteObject(bucket, object) |
||||
if err != nil { |
||||
return "", toObjectErr(err, bucket, object) |
||||
} |
||||
|
||||
err = xl.renameObject(minioMetaBucket, tempObj, bucket, object) |
||||
if err != nil { |
||||
if dErr := xl.deleteObject(minioMetaBucket, tempObj); dErr != nil { |
||||
return "", toObjectErr(dErr, minioMetaBucket, tempObj) |
||||
} |
||||
return "", toObjectErr(err, bucket, object) |
||||
} |
||||
|
||||
xlMeta := xlMetaV1{} |
||||
xlMeta.Version = "1" |
||||
xlMeta.Format = "xl" |
||||
xlMeta.Meta = metadata |
||||
xlMeta.Stat.Size = size |
||||
xlMeta.Stat.ModTime = modTime |
||||
xlMeta.AddObjectPart("object1", newMD5Hex, xlMeta.Stat.Size) |
||||
if err = xl.writeXLMetadata(bucket, object, xlMeta); err != nil { |
||||
return "", toObjectErr(err, bucket, object) |
||||
} |
||||
|
||||
// Return md5sum, successfully wrote object.
|
||||
return newMD5Hex, nil |
||||
} |
||||
|
||||
// deleteObject - deletes a regular object.
|
||||
func (xl xlObjects) deleteObject(bucket, object string) error { |
||||
// Initialize sync waitgroup.
|
||||
var wg = &sync.WaitGroup{} |
||||
|
||||
// Initialize list of errors.
|
||||
var dErrs = make([]error, len(xl.storageDisks)) |
||||
|
||||
for index, disk := range xl.storageDisks { |
||||
wg.Add(1) |
||||
go func(index int, disk StorageAPI) { |
||||
defer wg.Done() |
||||
dErrs[index] = cleanupDir(disk, bucket, object) |
||||
}(index, disk) |
||||
} |
||||
|
||||
// Wait for all routines to finish.
|
||||
wg.Wait() |
||||
|
||||
var fileNotFoundCnt, deleteFileErr int |
||||
// Loop through all the concocted errors.
|
||||
for _, err := range dErrs { |
||||
if err == nil { |
||||
continue |
||||
} |
||||
// If file not found, count them.
|
||||
if err == errFileNotFound { |
||||
fileNotFoundCnt++ |
||||
continue |
||||
} |
||||
|
||||
// Update error counter separately.
|
||||
deleteFileErr++ |
||||
} |
||||
|
||||
// Return err if all disks report file not found.
|
||||
if fileNotFoundCnt == len(xl.storageDisks) { |
||||
return errFileNotFound |
||||
} else if deleteFileErr > len(xl.storageDisks)-xl.writeQuorum { |
||||
// Return errWriteQuorum if errors were more than
|
||||
// allowed write quorum.
|
||||
return errWriteQuorum |
||||
} |
||||
|
||||
return nil |
||||
} |
||||
|
||||
// DeleteObject - delete the object.
|
||||
func (xl xlObjects) DeleteObject(bucket, object string) error { |
||||
// Verify if bucket is valid.
|
||||
if !IsValidBucketName(bucket) { |
||||
return BucketNameInvalid{Bucket: bucket} |
||||
} |
||||
if !IsValidObjectName(object) { |
||||
return ObjectNameInvalid{Bucket: bucket, Object: object} |
||||
} |
||||
nsMutex.Lock(bucket, object) |
||||
defer nsMutex.Unlock(bucket, object) |
||||
xl.deleteObject(bucket, object) |
||||
return nil |
||||
} |
@ -0,0 +1,177 @@ |
||||
/* |
||||
* Minio Cloud Storage, (C) 2016 Minio, Inc. |
||||
* |
||||
* Licensed under the Apache License, Version 2.0 (the "License"); |
||||
* you may not use this file except in compliance with the License. |
||||
* You may obtain a copy of the License at |
||||
* |
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
* |
||||
* Unless required by applicable law or agreed to in writing, software |
||||
* distributed under the License is distributed on an "AS IS" BASIS, |
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
* See the License for the specific language governing permissions and |
||||
* limitations under the License. |
||||
*/ |
||||
|
||||
package main |
||||
|
||||
import ( |
||||
"errors" |
||||
"fmt" |
||||
"path/filepath" |
||||
"strings" |
||||
"sync" |
||||
) |
||||
|
||||
const ( |
||||
formatConfigFile = "format.json" |
||||
xlMetaJSONFile = "xl.json" |
||||
uploadsJSONFile = "uploads.json" |
||||
) |
||||
|
||||
// xlObjects - Implements fs object layer.
|
||||
type xlObjects struct { |
||||
storageDisks []StorageAPI |
||||
erasureDisk *erasure |
||||
dataBlocks int |
||||
parityBlocks int |
||||
readQuorum int |
||||
writeQuorum int |
||||
listObjectMap map[listParams][]*treeWalker |
||||
listObjectMapMutex *sync.Mutex |
||||
} |
||||
|
||||
// errMaxDisks - returned for reached maximum of disks.
|
||||
var errMaxDisks = errors.New("Number of disks are higher than supported maximum count '16'") |
||||
|
||||
// errMinDisks - returned for minimum number of disks.
|
||||
var errMinDisks = errors.New("Number of disks are smaller than supported minimum count '8'") |
||||
|
||||
// errNumDisks - returned for odd number of disks.
|
||||
var errNumDisks = errors.New("Number of disks should be multiples of '2'") |
||||
|
||||
const ( |
||||
// Maximum erasure blocks.
|
||||
maxErasureBlocks = 16 |
||||
// Minimum erasure blocks.
|
||||
minErasureBlocks = 8 |
||||
) |
||||
|
||||
func checkSufficientDisks(disks []string) error { |
||||
// Verify total number of disks.
|
||||
totalDisks := len(disks) |
||||
if totalDisks > maxErasureBlocks { |
||||
return errMaxDisks |
||||
} |
||||
if totalDisks < minErasureBlocks { |
||||
return errMinDisks |
||||
} |
||||
|
||||
// isEven function to verify if a given number if even.
|
||||
isEven := func(number int) bool { |
||||
return number%2 == 0 |
||||
} |
||||
|
||||
// Verify if we have even number of disks.
|
||||
// only combination of 8, 10, 12, 14, 16 are supported.
|
||||
if !isEven(totalDisks) { |
||||
return errNumDisks |
||||
} |
||||
|
||||
return nil |
||||
} |
||||
|
||||
// Depending on the disk type network or local, initialize storage layer.
|
||||
func newStorageLayer(disk string) (storage StorageAPI, err error) { |
||||
if !strings.ContainsRune(disk, ':') || filepath.VolumeName(disk) != "" { |
||||
// Initialize filesystem storage API.
|
||||
return newPosix(disk) |
||||
} |
||||
// Initialize rpc client storage API.
|
||||
return newRPCClient(disk) |
||||
} |
||||
|
||||
// Initialize all storage disks to bootstrap.
|
||||
func bootstrapDisks(disks []string) ([]StorageAPI, error) { |
||||
storageDisks := make([]StorageAPI, len(disks)) |
||||
for index, disk := range disks { |
||||
var err error |
||||
// Intentionally ignore disk not found errors while
|
||||
// initializing POSIX, so that we have successfully
|
||||
// initialized posix Storage. Subsequent calls to XL/Erasure
|
||||
// will manage any errors related to disks.
|
||||
storageDisks[index], err = newStorageLayer(disk) |
||||
if err != nil && err != errDiskNotFound { |
||||
return nil, err |
||||
} |
||||
} |
||||
return storageDisks, nil |
||||
} |
||||
|
||||
// newXLObjects - initialize new xl object layer.
|
||||
func newXLObjects(disks []string) (ObjectLayer, error) { |
||||
if err := checkSufficientDisks(disks); err != nil { |
||||
return nil, err |
||||
} |
||||
|
||||
// Bootstrap disks.
|
||||
storageDisks, err := bootstrapDisks(disks) |
||||
if err != nil { |
||||
return nil, err |
||||
} |
||||
|
||||
// Initialize object layer - like creating minioMetaBucket, cleaning up tmp files etc.
|
||||
initObjectLayer(storageDisks...) |
||||
|
||||
// Load saved XL format.json and validate.
|
||||
newPosixDisks, err := loadFormatXL(storageDisks) |
||||
if err != nil { |
||||
switch err { |
||||
case errUnformattedDisk: |
||||
// Save new XL format.
|
||||
errSave := initFormatXL(storageDisks) |
||||
if errSave != nil { |
||||
return nil, errSave |
||||
} |
||||
newPosixDisks = storageDisks |
||||
default: |
||||
// errCorruptedDisk - error.
|
||||
return nil, fmt.Errorf("Unable to recognize backend format, %s", err) |
||||
} |
||||
} |
||||
|
||||
// FIXME: healFormatXL(newDisks)
|
||||
|
||||
newErasureDisk, err := newErasure(newPosixDisks) |
||||
if err != nil { |
||||
return nil, err |
||||
} |
||||
|
||||
// Calculate data and parity blocks.
|
||||
dataBlocks, parityBlocks := len(newPosixDisks)/2, len(newPosixDisks)/2 |
||||
|
||||
xl := xlObjects{ |
||||
storageDisks: newPosixDisks, |
||||
erasureDisk: newErasureDisk, |
||||
dataBlocks: dataBlocks, |
||||
parityBlocks: parityBlocks, |
||||
listObjectMap: make(map[listParams][]*treeWalker), |
||||
listObjectMapMutex: &sync.Mutex{}, |
||||
} |
||||
|
||||
// Figure out read and write quorum based on number of storage disks.
|
||||
// Read quorum should be always N/2 + 1 (due to Vandermonde matrix
|
||||
// erasure requirements)
|
||||
xl.readQuorum = len(xl.storageDisks)/2 + 1 |
||||
|
||||
// Write quorum is assumed if we have total disks + 3
|
||||
// parity. (Need to discuss this again)
|
||||
xl.writeQuorum = len(xl.storageDisks)/2 + 3 |
||||
if xl.writeQuorum > len(xl.storageDisks) { |
||||
xl.writeQuorum = len(xl.storageDisks) |
||||
} |
||||
|
||||
// Return successfully initialized object layer.
|
||||
return xl, nil |
||||
} |
Loading…
Reference in new issue