You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 
minio/pkg/donut/donut-v2.go

638 lines
19 KiB

/*
* Minimalist Object Storage, (C) 2015 Minio, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package donut
import (
"bytes"
"crypto/md5"
"encoding/base64"
"encoding/hex"
"errors"
"io"
"io/ioutil"
"log"
"runtime/debug"
"sort"
"strconv"
"strings"
"sync"
"time"
"github.com/minio/minio/pkg/donut/trove"
"github.com/minio/minio/pkg/iodine"
"github.com/minio/minio/pkg/quick"
)
// total Number of buckets allowed
const (
totalBuckets = 100
)
// Config donut config
type Config struct {
Version string `json:"version"`
MaxSize uint64 `json:"max-size"`
Expiration time.Duration `json:"expiration"`
DonutName string `json:"donut-name"`
NodeDiskMap map[string][]string `json:"node-disk-map"`
}
// API - local variables
type API struct {
config *Config
lock *sync.RWMutex
objects *trove.Cache
multiPartObjects *trove.Cache
storedBuckets map[string]storedBucket
nodes map[string]node
buckets map[string]bucket
}
// storedBucket saved bucket
type storedBucket struct {
bucketMetadata BucketMetadata
objectMetadata map[string]ObjectMetadata
partMetadata map[string]PartMetadata
multiPartSession map[string]multiPartSession
}
// multiPartSession multipart session
type multiPartSession struct {
totalParts int
uploadID string
initiated time.Time
}
// New instantiate a new donut
func New(c *Config) (Interface, error) {
if err := quick.CheckData(c); err != nil {
return nil, iodine.New(err, nil)
}
a := API{config: c}
a.storedBuckets = make(map[string]storedBucket)
a.nodes = make(map[string]node)
a.buckets = make(map[string]bucket)
a.objects = trove.NewCache(a.config.MaxSize, a.config.Expiration)
a.multiPartObjects = trove.NewCache(0, time.Duration(0))
a.objects.OnExpired = a.expiredObject
a.multiPartObjects.OnExpired = a.expiredPart
a.lock = new(sync.RWMutex)
// set up cache expiration
a.objects.ExpireObjects(time.Second * 5)
if len(a.config.NodeDiskMap) > 0 {
for k, v := range a.config.NodeDiskMap {
if len(v) == 0 {
return nil, iodine.New(InvalidDisksArgument{}, nil)
}
err := a.AttachNode(k, v)
if err != nil {
return nil, iodine.New(err, nil)
}
}
/// Initialization, populate all buckets into memory
buckets, err := a.listBuckets()
if err != nil {
return nil, iodine.New(err, nil)
}
for k, v := range buckets {
storedBucket := a.storedBuckets[k]
storedBucket.bucketMetadata = v
a.storedBuckets[k] = storedBucket
}
}
return a, nil
}
// GetObject - GET object from cache buffer
func (donut API) GetObject(w io.Writer, bucket string, object string) (int64, error) {
donut.lock.RLock()
if !IsValidBucket(bucket) {
donut.lock.RUnlock()
return 0, iodine.New(BucketNameInvalid{Bucket: bucket}, nil)
}
if !IsValidObjectName(object) {
donut.lock.RUnlock()
return 0, iodine.New(ObjectNameInvalid{Object: object}, nil)
}
if _, ok := donut.storedBuckets[bucket]; ok == false {
donut.lock.RUnlock()
return 0, iodine.New(BucketNotFound{Bucket: bucket}, nil)
}
objectKey := bucket + "/" + object
data, ok := donut.objects.Get(objectKey)
if !ok {
if len(donut.config.NodeDiskMap) > 0 {
reader, size, err := donut.getObject(bucket, object)
if err != nil {
donut.lock.RUnlock()
return 0, iodine.New(err, nil)
}
// new proxy writer to capture data read from disk
pw := NewProxyWriter(w)
written, err := io.CopyN(pw, reader, size)
if err != nil {
donut.lock.RUnlock()
return 0, iodine.New(err, nil)
}
donut.lock.RUnlock()
/// cache object read from disk
{
donut.lock.Lock()
ok := donut.objects.Set(objectKey, pw.writtenBytes)
donut.lock.Unlock()
pw.writtenBytes = nil
go debug.FreeOSMemory()
if !ok {
return 0, iodine.New(InternalError{}, nil)
}
}
return written, nil
}
donut.lock.RUnlock()
return 0, iodine.New(ObjectNotFound{Object: object}, nil)
}
written, err := io.CopyN(w, bytes.NewBuffer(data), int64(donut.objects.Len(objectKey)))
if err != nil {
return 0, iodine.New(err, nil)
}
donut.lock.RUnlock()
return written, nil
}
// GetPartialObject - GET object from cache buffer range
func (donut API) GetPartialObject(w io.Writer, bucket, object string, start, length int64) (int64, error) {
errParams := map[string]string{
"bucket": bucket,
"object": object,
"start": strconv.FormatInt(start, 10),
"length": strconv.FormatInt(length, 10),
}
donut.lock.RLock()
if !IsValidBucket(bucket) {
donut.lock.RUnlock()
return 0, iodine.New(BucketNameInvalid{Bucket: bucket}, errParams)
}
if !IsValidObjectName(object) {
donut.lock.RUnlock()
return 0, iodine.New(ObjectNameInvalid{Object: object}, errParams)
}
if start < 0 {
donut.lock.RUnlock()
return 0, iodine.New(InvalidRange{
Start: start,
Length: length,
}, errParams)
}
objectKey := bucket + "/" + object
data, ok := donut.objects.Get(objectKey)
if !ok {
if len(donut.config.NodeDiskMap) > 0 {
reader, _, err := donut.getObject(bucket, object)
if err != nil {
donut.lock.RUnlock()
return 0, iodine.New(err, nil)
}
if _, err := io.CopyN(ioutil.Discard, reader, start); err != nil {
donut.lock.RUnlock()
return 0, iodine.New(err, nil)
}
pw := NewProxyWriter(w)
written, err := io.CopyN(w, reader, length)
if err != nil {
donut.lock.RUnlock()
return 0, iodine.New(err, nil)
}
donut.lock.RUnlock()
{
donut.lock.Lock()
ok := donut.objects.Set(objectKey, pw.writtenBytes)
donut.lock.Unlock()
pw.writtenBytes = nil
go debug.FreeOSMemory()
if !ok {
return 0, iodine.New(InternalError{}, nil)
}
}
return written, nil
}
donut.lock.RUnlock()
return 0, iodine.New(ObjectNotFound{Object: object}, nil)
}
written, err := io.CopyN(w, bytes.NewBuffer(data[start:]), length)
if err != nil {
return 0, iodine.New(err, nil)
}
donut.lock.RUnlock()
return written, nil
}
// GetBucketMetadata -
func (donut API) GetBucketMetadata(bucket string) (BucketMetadata, error) {
donut.lock.RLock()
if !IsValidBucket(bucket) {
donut.lock.RUnlock()
return BucketMetadata{}, iodine.New(BucketNameInvalid{Bucket: bucket}, nil)
}
if _, ok := donut.storedBuckets[bucket]; ok == false {
if len(donut.config.NodeDiskMap) > 0 {
bucketMetadata, err := donut.getBucketMetadata(bucket)
if err != nil {
donut.lock.RUnlock()
return BucketMetadata{}, iodine.New(err, nil)
}
storedBucket := donut.storedBuckets[bucket]
donut.lock.RUnlock()
{
donut.lock.Lock()
storedBucket.bucketMetadata = bucketMetadata
donut.storedBuckets[bucket] = storedBucket
donut.lock.Unlock()
}
}
return BucketMetadata{}, iodine.New(BucketNotFound{Bucket: bucket}, nil)
}
donut.lock.RUnlock()
return donut.storedBuckets[bucket].bucketMetadata, nil
}
// SetBucketMetadata -
func (donut API) SetBucketMetadata(bucket string, metadata map[string]string) error {
donut.lock.RLock()
if !IsValidBucket(bucket) {
donut.lock.RUnlock()
return iodine.New(BucketNameInvalid{Bucket: bucket}, nil)
}
if _, ok := donut.storedBuckets[bucket]; ok == false {
donut.lock.RUnlock()
return iodine.New(BucketNotFound{Bucket: bucket}, nil)
}
donut.lock.RUnlock()
donut.lock.Lock()
{
if len(donut.config.NodeDiskMap) > 0 {
if err := donut.setBucketMetadata(bucket, metadata); err != nil {
return iodine.New(err, nil)
}
}
storedBucket := donut.storedBuckets[bucket]
storedBucket.bucketMetadata.ACL = BucketACL(metadata["acl"])
donut.storedBuckets[bucket] = storedBucket
}
donut.lock.Unlock()
return nil
}
// isMD5SumEqual - returns error if md5sum mismatches, success its `nil`
func isMD5SumEqual(expectedMD5Sum, actualMD5Sum string) error {
if strings.TrimSpace(expectedMD5Sum) != "" && strings.TrimSpace(actualMD5Sum) != "" {
expectedMD5SumBytes, err := hex.DecodeString(expectedMD5Sum)
if err != nil {
return iodine.New(err, nil)
}
actualMD5SumBytes, err := hex.DecodeString(actualMD5Sum)
if err != nil {
return iodine.New(err, nil)
}
if !bytes.Equal(expectedMD5SumBytes, actualMD5SumBytes) {
return iodine.New(errors.New("bad digest, md5sum mismatch"), nil)
}
return nil
}
return iodine.New(errors.New("invalid argument"), nil)
}
// CreateObject -
func (donut API) CreateObject(bucket, key, expectedMD5Sum string, size int64, data io.Reader, metadata map[string]string) (ObjectMetadata, error) {
if size > int64(donut.config.MaxSize) {
generic := GenericObjectError{Bucket: bucket, Object: key}
return ObjectMetadata{}, iodine.New(EntityTooLarge{
GenericObjectError: generic,
Size: strconv.FormatInt(size, 10),
MaxSize: strconv.FormatUint(donut.config.MaxSize, 10),
}, nil)
}
contentType := metadata["contentType"]
objectMetadata, err := donut.createObject(bucket, key, contentType, expectedMD5Sum, size, data)
// free
debug.FreeOSMemory()
return objectMetadata, iodine.New(err, nil)
}
// createObject - PUT object to cache buffer
func (donut API) createObject(bucket, key, contentType, expectedMD5Sum string, size int64, data io.Reader) (ObjectMetadata, error) {
donut.lock.RLock()
if !IsValidBucket(bucket) {
donut.lock.RUnlock()
return ObjectMetadata{}, iodine.New(BucketNameInvalid{Bucket: bucket}, nil)
}
if !IsValidObjectName(key) {
donut.lock.RUnlock()
return ObjectMetadata{}, iodine.New(ObjectNameInvalid{Object: key}, nil)
}
if _, ok := donut.storedBuckets[bucket]; ok == false {
donut.lock.RUnlock()
return ObjectMetadata{}, iodine.New(BucketNotFound{Bucket: bucket}, nil)
}
storedBucket := donut.storedBuckets[bucket]
// get object key
objectKey := bucket + "/" + key
if _, ok := storedBucket.objectMetadata[objectKey]; ok == true {
donut.lock.RUnlock()
return ObjectMetadata{}, iodine.New(ObjectExists{Object: key}, nil)
}
donut.lock.RUnlock()
if contentType == "" {
contentType = "application/octet-stream"
}
contentType = strings.TrimSpace(contentType)
if strings.TrimSpace(expectedMD5Sum) != "" {
expectedMD5SumBytes, err := base64.StdEncoding.DecodeString(strings.TrimSpace(expectedMD5Sum))
if err != nil {
// pro-actively close the connection
return ObjectMetadata{}, iodine.New(InvalidDigest{Md5: expectedMD5Sum}, nil)
}
expectedMD5Sum = hex.EncodeToString(expectedMD5SumBytes)
}
if len(donut.config.NodeDiskMap) > 0 {
objMetadata, err := donut.putObject(bucket, key, expectedMD5Sum, data, map[string]string{"contentType": contentType})
if err != nil {
return ObjectMetadata{}, iodine.New(err, nil)
}
donut.lock.Lock()
storedBucket.objectMetadata[objectKey] = objMetadata
donut.storedBuckets[bucket] = storedBucket
donut.lock.Unlock()
return objMetadata, nil
}
// calculate md5
hash := md5.New()
var err error
var totalLength int
for err == nil {
var length int
byteBuffer := make([]byte, 1024*1024)
length, err = data.Read(byteBuffer)
// While hash.Write() wouldn't mind a Nil byteBuffer
// It is necessary for us to verify this and break
if length == 0 {
break
}
hash.Write(byteBuffer[0:length])
//donut.lock.Lock()
ok := donut.objects.Append(objectKey, byteBuffer[0:length])
//donut.lock.Unlock()
if !ok {
return ObjectMetadata{}, iodine.New(InternalError{}, nil)
}
totalLength += length
go debug.FreeOSMemory()
}
if err != io.EOF {
return ObjectMetadata{}, iodine.New(err, nil)
}
md5SumBytes := hash.Sum(nil)
md5Sum := hex.EncodeToString(md5SumBytes)
// Verify if the written object is equal to what is expected, only if it is requested as such
if strings.TrimSpace(expectedMD5Sum) != "" {
if err := isMD5SumEqual(strings.TrimSpace(expectedMD5Sum), md5Sum); err != nil {
return ObjectMetadata{}, iodine.New(BadDigest{}, nil)
}
}
m := make(map[string]string)
m["contentType"] = contentType
newObject := ObjectMetadata{
Bucket: bucket,
Object: key,
Metadata: m,
Created: time.Now().UTC(),
MD5Sum: md5Sum,
Size: int64(totalLength),
}
//donut.lock.Lock()
storedBucket.objectMetadata[objectKey] = newObject
donut.storedBuckets[bucket] = storedBucket
//donut.lock.Unlock()
return newObject, nil
}
// MakeBucket - create bucket in cache
func (donut API) MakeBucket(bucketName, acl string) error {
donut.lock.RLock()
if len(donut.storedBuckets) == totalBuckets {
donut.lock.RUnlock()
return iodine.New(TooManyBuckets{Bucket: bucketName}, nil)
}
if !IsValidBucket(bucketName) {
donut.lock.RUnlock()
return iodine.New(BucketNameInvalid{Bucket: bucketName}, nil)
}
if !IsValidBucketACL(acl) {
donut.lock.RUnlock()
return iodine.New(InvalidACL{ACL: acl}, nil)
}
if _, ok := donut.storedBuckets[bucketName]; ok == true {
donut.lock.RUnlock()
return iodine.New(BucketExists{Bucket: bucketName}, nil)
}
donut.lock.RUnlock()
if strings.TrimSpace(acl) == "" {
// default is private
acl = "private"
}
if len(donut.config.NodeDiskMap) > 0 {
if err := donut.makeBucket(bucketName, BucketACL(acl)); err != nil {
return iodine.New(err, nil)
}
}
var newBucket = storedBucket{}
newBucket.objectMetadata = make(map[string]ObjectMetadata)
newBucket.multiPartSession = make(map[string]multiPartSession)
newBucket.partMetadata = make(map[string]PartMetadata)
newBucket.bucketMetadata = BucketMetadata{}
newBucket.bucketMetadata.Name = bucketName
newBucket.bucketMetadata.Created = time.Now().UTC()
newBucket.bucketMetadata.ACL = BucketACL(acl)
//donut.lock.Lock()
donut.storedBuckets[bucketName] = newBucket
//donut.lock.Unlock()
return nil
}
// ListObjects - list objects from cache
func (donut API) ListObjects(bucket string, resources BucketResourcesMetadata) ([]ObjectMetadata, BucketResourcesMetadata, error) {
donut.lock.RLock()
defer donut.lock.RUnlock()
if !IsValidBucket(bucket) {
return nil, BucketResourcesMetadata{IsTruncated: false}, iodine.New(BucketNameInvalid{Bucket: bucket}, nil)
}
if !IsValidPrefix(resources.Prefix) {
return nil, BucketResourcesMetadata{IsTruncated: false}, iodine.New(ObjectNameInvalid{Object: resources.Prefix}, nil)
}
if _, ok := donut.storedBuckets[bucket]; ok == false {
return nil, BucketResourcesMetadata{IsTruncated: false}, iodine.New(BucketNotFound{Bucket: bucket}, nil)
}
var results []ObjectMetadata
var keys []string
if len(donut.config.NodeDiskMap) > 0 {
listObjects, err := donut.listObjects(
bucket,
resources.Prefix,
resources.Marker,
resources.Delimiter,
resources.Maxkeys,
)
if err != nil {
return nil, BucketResourcesMetadata{IsTruncated: false}, iodine.New(err, nil)
}
resources.CommonPrefixes = listObjects.CommonPrefixes
resources.IsTruncated = listObjects.IsTruncated
if resources.IsTruncated && resources.IsDelimiterSet() {
resources.NextMarker = results[len(results)-1].Object
}
for key := range listObjects.Objects {
keys = append(keys, key)
}
sort.Strings(keys)
for _, key := range keys {
results = append(results, listObjects.Objects[key])
}
return results, resources, nil
}
storedBucket := donut.storedBuckets[bucket]
for key := range storedBucket.objectMetadata {
if strings.HasPrefix(key, bucket+"/") {
key = key[len(bucket)+1:]
if strings.HasPrefix(key, resources.Prefix) {
if key > resources.Marker {
keys = append(keys, key)
}
}
}
}
if strings.TrimSpace(resources.Prefix) != "" {
keys = TrimPrefix(keys, resources.Prefix)
}
var prefixes []string
var filteredKeys []string
if strings.TrimSpace(resources.Delimiter) != "" {
filteredKeys = HasNoDelimiter(keys, resources.Delimiter)
prefixes = HasDelimiter(keys, resources.Delimiter)
prefixes = SplitDelimiter(prefixes, resources.Delimiter)
prefixes = SortU(prefixes)
} else {
filteredKeys = keys
}
for _, commonPrefix := range prefixes {
resources.CommonPrefixes = append(resources.CommonPrefixes, resources.Prefix+commonPrefix)
}
filteredKeys = RemoveDuplicates(filteredKeys)
sort.Strings(filteredKeys)
for _, key := range filteredKeys {
if len(results) == resources.Maxkeys {
resources.IsTruncated = true
if resources.IsTruncated && resources.IsDelimiterSet() {
resources.NextMarker = results[len(results)-1].Object
}
return results, resources, nil
}
object := storedBucket.objectMetadata[bucket+"/"+resources.Prefix+key]
results = append(results, object)
}
resources.CommonPrefixes = RemoveDuplicates(resources.CommonPrefixes)
sort.Strings(resources.CommonPrefixes)
return results, resources, nil
}
// byBucketName is a type for sorting bucket metadata by bucket name
type byBucketName []BucketMetadata
func (b byBucketName) Len() int { return len(b) }
func (b byBucketName) Swap(i, j int) { b[i], b[j] = b[j], b[i] }
func (b byBucketName) Less(i, j int) bool { return b[i].Name < b[j].Name }
// ListBuckets - List buckets from cache
func (donut API) ListBuckets() ([]BucketMetadata, error) {
donut.lock.RLock()
defer donut.lock.RUnlock()
var results []BucketMetadata
for _, bucket := range donut.storedBuckets {
results = append(results, bucket.bucketMetadata)
}
sort.Sort(byBucketName(results))
return results, nil
}
// GetObjectMetadata - get object metadata from cache
func (donut API) GetObjectMetadata(bucket, key string) (ObjectMetadata, error) {
donut.lock.RLock()
// check if bucket exists
if !IsValidBucket(bucket) {
donut.lock.RUnlock()
return ObjectMetadata{}, iodine.New(BucketNameInvalid{Bucket: bucket}, nil)
}
if !IsValidObjectName(key) {
donut.lock.RUnlock()
return ObjectMetadata{}, iodine.New(ObjectNameInvalid{Object: key}, nil)
}
if _, ok := donut.storedBuckets[bucket]; ok == false {
donut.lock.RUnlock()
return ObjectMetadata{}, iodine.New(BucketNotFound{Bucket: bucket}, nil)
}
storedBucket := donut.storedBuckets[bucket]
objectKey := bucket + "/" + key
if objMetadata, ok := storedBucket.objectMetadata[objectKey]; ok == true {
donut.lock.RUnlock()
return objMetadata, nil
}
if len(donut.config.NodeDiskMap) > 0 {
objMetadata, err := donut.getObjectMetadata(bucket, key)
donut.lock.RUnlock()
if err != nil {
return ObjectMetadata{}, iodine.New(err, nil)
}
// update
donut.lock.Lock()
storedBucket.objectMetadata[objectKey] = objMetadata
donut.lock.Unlock()
return objMetadata, nil
}
donut.lock.RUnlock()
return ObjectMetadata{}, iodine.New(ObjectNotFound{Object: key}, nil)
}
func (donut API) expiredObject(a ...interface{}) {
cacheStats := donut.objects.Stats()
log.Printf("CurrentSize: %d, CurrentItems: %d, TotalExpirations: %d",
cacheStats.Bytes, cacheStats.Items, cacheStats.Expired)
key := a[0].(string)
// loop through all buckets
for _, storedBucket := range donut.storedBuckets {
delete(storedBucket.objectMetadata, key)
}
debug.FreeOSMemory()
}