fix: rename remaining zone -> pool (#11231)

master
Harshavardhana 4 years ago committed by GitHub
parent eb9172eecb
commit b5d291ea88
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 18
      cmd/background-newdisks-heal-ops.go
  2. 12
      cmd/endpoint.go
  3. 176
      cmd/erasure-server-sets.go
  4. 4
      cmd/metacache-server-pool.go
  5. 4
      cmd/peer-rest-server.go
  6. 10
      cmd/prepare-storage.go
  7. 20
      docs/distributed/DESIGN.md
  8. 6
      docs/distributed/README.md
  9. 14
      docs/zh_CN/distributed/DESIGN.md

@ -153,22 +153,22 @@ wait:
continue continue
} }
zoneIdx := globalEndpoints.GetLocalZoneIdx(disk.Endpoint()) poolIdx := globalEndpoints.GetLocalZoneIdx(disk.Endpoint())
if zoneIdx < 0 { if poolIdx < 0 {
continue continue
} }
// Calculate the set index where the current endpoint belongs // Calculate the set index where the current endpoint belongs
z.serverPools[zoneIdx].erasureDisksMu.RLock() z.serverPools[poolIdx].erasureDisksMu.RLock()
// Protect reading reference format. // Protect reading reference format.
setIndex, _, err := findDiskIndex(z.serverPools[zoneIdx].format, format) setIndex, _, err := findDiskIndex(z.serverPools[poolIdx].format, format)
z.serverPools[zoneIdx].erasureDisksMu.RUnlock() z.serverPools[poolIdx].erasureDisksMu.RUnlock()
if err != nil { if err != nil {
printEndpointError(endpoint, err, false) printEndpointError(endpoint, err, false)
continue continue
} }
erasureSetInZoneDisksToHeal[zoneIdx][setIndex] = append(erasureSetInZoneDisksToHeal[zoneIdx][setIndex], disk) erasureSetInZoneDisksToHeal[poolIdx][setIndex] = append(erasureSetInZoneDisksToHeal[poolIdx][setIndex], disk)
} }
buckets, _ := z.ListBuckets(ctx) buckets, _ := z.ListBuckets(ctx)
@ -181,11 +181,11 @@ wait:
for i, setMap := range erasureSetInZoneDisksToHeal { for i, setMap := range erasureSetInZoneDisksToHeal {
for setIndex, disks := range setMap { for setIndex, disks := range setMap {
for _, disk := range disks { for _, disk := range disks {
logger.Info("Healing disk '%s' on %s zone", disk, humanize.Ordinal(i+1)) logger.Info("Healing disk '%s' on %s pool", disk, humanize.Ordinal(i+1))
// So someone changed the drives underneath, healing tracker missing. // So someone changed the drives underneath, healing tracker missing.
if !disk.Healing() { if !disk.Healing() {
logger.Info("Healing tracker missing on '%s', disk was swapped again on %s zone", disk, humanize.Ordinal(i+1)) logger.Info("Healing tracker missing on '%s', disk was swapped again on %s pool", disk, humanize.Ordinal(i+1))
diskID, err := disk.GetDiskID() diskID, err := disk.GetDiskID()
if err != nil { if err != nil {
logger.LogIf(ctx, err) logger.LogIf(ctx, err)
@ -209,7 +209,7 @@ wait:
continue continue
} }
logger.Info("Healing disk '%s' on %s zone complete", disk, humanize.Ordinal(i+1)) logger.Info("Healing disk '%s' on %s pool complete", disk, humanize.Ordinal(i+1))
if err := disk.Delete(ctx, pathJoin(minioMetaBucket, bucketMetaPrefix), if err := disk.Delete(ctx, pathJoin(minioMetaBucket, bucketMetaPrefix),
healingTrackerFilename, false); err != nil && !errors.Is(err, errFileNotFound) { healingTrackerFilename, false); err != nil && !errors.Is(err, errFileNotFound) {

@ -196,7 +196,7 @@ func NewEndpoint(arg string) (ep Endpoint, e error) {
}, nil }, nil
} }
// ZoneEndpoints represent endpoints in a given zone // ZoneEndpoints represent endpoints in a given pool
// along with its setCount and setDriveCount. // along with its setCount and setDriveCount.
type ZoneEndpoints struct { type ZoneEndpoints struct {
SetCount int SetCount int
@ -207,8 +207,8 @@ type ZoneEndpoints struct {
// EndpointServerPools - list of list of endpoints // EndpointServerPools - list of list of endpoints
type EndpointServerPools []ZoneEndpoints type EndpointServerPools []ZoneEndpoints
// GetLocalZoneIdx returns the zone which endpoint belongs to locally. // GetLocalZoneIdx returns the pool which endpoint belongs to locally.
// if ep is remote this code will return -1 zoneIndex // if ep is remote this code will return -1 poolIndex
func (l EndpointServerPools) GetLocalZoneIdx(ep Endpoint) int { func (l EndpointServerPools) GetLocalZoneIdx(ep Endpoint) int {
for i, zep := range l { for i, zep := range l {
for _, cep := range zep.Endpoints { for _, cep := range zep.Endpoints {
@ -222,7 +222,7 @@ func (l EndpointServerPools) GetLocalZoneIdx(ep Endpoint) int {
return -1 return -1
} }
// Add add zone endpoints // Add add pool endpoints
func (l *EndpointServerPools) Add(zeps ZoneEndpoints) error { func (l *EndpointServerPools) Add(zeps ZoneEndpoints) error {
existSet := set.NewStringSet() existSet := set.NewStringSet()
for _, zep := range *l { for _, zep := range *l {
@ -478,8 +478,8 @@ func (endpoints Endpoints) UpdateIsLocal(foundPrevLocal bool) error {
// participate atleast one disk and be local. // participate atleast one disk and be local.
// //
// In special cases for replica set with expanded // In special cases for replica set with expanded
// zone setups we need to make sure to provide // pool setups we need to make sure to provide
// value of foundPrevLocal from zone1 if we already // value of foundPrevLocal from pool1 if we already
// found a local setup. Only if we haven't found // found a local setup. Only if we haven't found
// previous local we continue to wait to look for // previous local we continue to wait to look for
// atleast one local. // atleast one local.

@ -51,7 +51,7 @@ func (z *erasureServerPools) SingleZone() bool {
return len(z.serverPools) == 1 return len(z.serverPools) == 1
} }
// Initialize new zone of erasure sets. // Initialize new pool of erasure sets.
func newErasureServerPools(ctx context.Context, endpointServerPools EndpointServerPools) (ObjectLayer, error) { func newErasureServerPools(ctx context.Context, endpointServerPools EndpointServerPools) (ObjectLayer, error) {
var ( var (
deploymentID string deploymentID string
@ -127,9 +127,9 @@ func (z *erasureServerPools) SetDriveCount() int {
return z.serverPools[0].SetDriveCount() return z.serverPools[0].SetDriveCount()
} }
type serverPoolsAvailableSpace []zoneAvailableSpace type serverPoolsAvailableSpace []poolAvailableSpace
type zoneAvailableSpace struct { type poolAvailableSpace struct {
Index int Index int
Available uint64 Available uint64
} }
@ -154,10 +154,10 @@ func (z *erasureServerPools) getAvailableZoneIdx(ctx context.Context, size int64
// choose when we reach this many // choose when we reach this many
choose := rand.Uint64() % total choose := rand.Uint64() % total
atTotal := uint64(0) atTotal := uint64(0)
for _, zone := range serverPools { for _, pool := range serverPools {
atTotal += zone.Available atTotal += pool.Available
if atTotal > choose && zone.Available > 0 { if atTotal > choose && pool.Available > 0 {
return zone.Index return pool.Index
} }
} }
// Should not happen, but print values just in case. // Should not happen, but print values just in case.
@ -165,8 +165,8 @@ func (z *erasureServerPools) getAvailableZoneIdx(ctx context.Context, size int64
return -1 return -1
} }
// getServerPoolsAvailableSpace will return the available space of each zone after storing the content. // getServerPoolsAvailableSpace will return the available space of each pool after storing the content.
// If there is not enough space the zone will return 0 bytes available. // If there is not enough space the pool will return 0 bytes available.
// Negative sizes are seen as 0 bytes. // Negative sizes are seen as 0 bytes.
func (z *erasureServerPools) getServerPoolsAvailableSpace(ctx context.Context, size int64) serverPoolsAvailableSpace { func (z *erasureServerPools) getServerPoolsAvailableSpace(ctx context.Context, size int64) serverPoolsAvailableSpace {
if size < 0 { if size < 0 {
@ -208,7 +208,7 @@ func (z *erasureServerPools) getServerPoolsAvailableSpace(ctx context.Context, s
available = 0 available = 0
} }
} }
serverPools[i] = zoneAvailableSpace{ serverPools[i] = poolAvailableSpace{
Index: i, Index: i,
Available: available, Available: available,
} }
@ -216,14 +216,14 @@ func (z *erasureServerPools) getServerPoolsAvailableSpace(ctx context.Context, s
return serverPools return serverPools
} }
// getZoneIdx returns the found previous object and its corresponding zone idx, // getZoneIdx returns the found previous object and its corresponding pool idx,
// if none are found falls back to most available space zone. // if none are found falls back to most available space pool.
func (z *erasureServerPools) getZoneIdx(ctx context.Context, bucket, object string, opts ObjectOptions, size int64) (idx int, err error) { func (z *erasureServerPools) getZoneIdx(ctx context.Context, bucket, object string, opts ObjectOptions, size int64) (idx int, err error) {
if z.SingleZone() { if z.SingleZone() {
return 0, nil return 0, nil
} }
for i, zone := range z.serverPools { for i, pool := range z.serverPools {
objInfo, err := zone.GetObjectInfo(ctx, bucket, object, opts) objInfo, err := pool.GetObjectInfo(ctx, bucket, object, opts)
switch err.(type) { switch err.(type) {
case ObjectNotFound: case ObjectNotFound:
// VersionId was not specified but found delete marker or no versions exist. // VersionId was not specified but found delete marker or no versions exist.
@ -236,7 +236,7 @@ func (z *erasureServerPools) getZoneIdx(ctx context.Context, bucket, object stri
} }
} }
// delete marker not specified means no versions // delete marker not specified means no versions
// exist continue to next zone. // exist continue to next pool.
if !objInfo.DeleteMarker && err != nil { if !objInfo.DeleteMarker && err != nil {
continue continue
} }
@ -376,7 +376,7 @@ func (z *erasureServerPools) CrawlAndGetDataUsage(ctx context.Context, bf *bloom
defer updateTicker.Stop() defer updateTicker.Stop()
var lastUpdate time.Time var lastUpdate time.Time
// We need to merge since we will get the same buckets from each zone. // We need to merge since we will get the same buckets from each pool.
// Therefore to get the exact bucket sizes we must merge before we can convert. // Therefore to get the exact bucket sizes we must merge before we can convert.
var allMerged dataUsageCache var allMerged dataUsageCache
@ -477,8 +477,8 @@ func (z *erasureServerPools) GetObjectNInfo(ctx context.Context, bucket, object
object = encodeDirObject(object) object = encodeDirObject(object)
for _, zone := range z.serverPools { for _, pool := range z.serverPools {
gr, err = zone.GetObjectNInfo(ctx, bucket, object, rs, h, lockType, opts) gr, err = pool.GetObjectNInfo(ctx, bucket, object, rs, h, lockType, opts)
if err != nil { if err != nil {
if isErrObjectNotFound(err) || isErrVersionNotFound(err) { if isErrObjectNotFound(err) || isErrVersionNotFound(err) {
continue continue
@ -500,8 +500,8 @@ func (z *erasureServerPools) GetObject(ctx context.Context, bucket, object strin
object = encodeDirObject(object) object = encodeDirObject(object)
for _, zone := range z.serverPools { for _, pool := range z.serverPools {
if err := zone.GetObject(ctx, bucket, object, startOffset, length, writer, etag, opts); err != nil { if err := pool.GetObject(ctx, bucket, object, startOffset, length, writer, etag, opts); err != nil {
if isErrObjectNotFound(err) || isErrVersionNotFound(err) { if isErrObjectNotFound(err) || isErrVersionNotFound(err) {
continue continue
} }
@ -521,8 +521,8 @@ func (z *erasureServerPools) GetObjectInfo(ctx context.Context, bucket, object s
} }
object = encodeDirObject(object) object = encodeDirObject(object)
for _, zone := range z.serverPools { for _, pool := range z.serverPools {
objInfo, err = zone.GetObjectInfo(ctx, bucket, object, opts) objInfo, err = pool.GetObjectInfo(ctx, bucket, object, opts)
if err != nil { if err != nil {
if isErrObjectNotFound(err) || isErrVersionNotFound(err) { if isErrObjectNotFound(err) || isErrVersionNotFound(err) {
continue continue
@ -538,7 +538,7 @@ func (z *erasureServerPools) GetObjectInfo(ctx context.Context, bucket, object s
return objInfo, ObjectNotFound{Bucket: bucket, Object: object} return objInfo, ObjectNotFound{Bucket: bucket, Object: object}
} }
// PutObject - writes an object to least used erasure zone. // PutObject - writes an object to least used erasure pool.
func (z *erasureServerPools) PutObject(ctx context.Context, bucket string, object string, data *PutObjReader, opts ObjectOptions) (ObjectInfo, error) { func (z *erasureServerPools) PutObject(ctx context.Context, bucket string, object string, data *PutObjReader, opts ObjectOptions) (ObjectInfo, error) {
// Validate put object input args. // Validate put object input args.
if err := checkPutObjectArgs(ctx, bucket, object, z); err != nil { if err := checkPutObjectArgs(ctx, bucket, object, z); err != nil {
@ -556,7 +556,7 @@ func (z *erasureServerPools) PutObject(ctx context.Context, bucket string, objec
return ObjectInfo{}, err return ObjectInfo{}, err
} }
// Overwrite the object at the right zone // Overwrite the object at the right pool
return z.serverPools[idx].PutObject(ctx, bucket, object, data, opts) return z.serverPools[idx].PutObject(ctx, bucket, object, data, opts)
} }
@ -570,8 +570,8 @@ func (z *erasureServerPools) DeleteObject(ctx context.Context, bucket string, ob
if z.SingleZone() { if z.SingleZone() {
return z.serverPools[0].DeleteObject(ctx, bucket, object, opts) return z.serverPools[0].DeleteObject(ctx, bucket, object, opts)
} }
for _, zone := range z.serverPools { for _, pool := range z.serverPools {
objInfo, err = zone.DeleteObject(ctx, bucket, object, opts) objInfo, err = pool.DeleteObject(ctx, bucket, object, opts)
if err == nil { if err == nil {
return objInfo, nil return objInfo, nil
} }
@ -608,8 +608,8 @@ func (z *erasureServerPools) DeleteObjects(ctx context.Context, bucket string, o
return z.serverPools[0].DeleteObjects(ctx, bucket, objects, opts) return z.serverPools[0].DeleteObjects(ctx, bucket, objects, opts)
} }
for _, zone := range z.serverPools { for _, pool := range z.serverPools {
deletedObjects, errs := zone.DeleteObjects(ctx, bucket, objects, opts) deletedObjects, errs := pool.DeleteObjects(ctx, bucket, objects, opts)
for i, derr := range errs { for i, derr := range errs {
if derr != nil { if derr != nil {
derrs[i] = derr derrs[i] = derr
@ -626,7 +626,7 @@ func (z *erasureServerPools) CopyObject(ctx context.Context, srcBucket, srcObjec
cpSrcDstSame := isStringEqual(pathJoin(srcBucket, srcObject), pathJoin(dstBucket, dstObject)) cpSrcDstSame := isStringEqual(pathJoin(srcBucket, srcObject), pathJoin(dstBucket, dstObject))
zoneIdx, err := z.getZoneIdx(ctx, dstBucket, dstObject, dstOpts, srcInfo.Size) poolIdx, err := z.getZoneIdx(ctx, dstBucket, dstObject, dstOpts, srcInfo.Size)
if err != nil { if err != nil {
return objInfo, err return objInfo, err
} }
@ -634,12 +634,12 @@ func (z *erasureServerPools) CopyObject(ctx context.Context, srcBucket, srcObjec
if cpSrcDstSame && srcInfo.metadataOnly { if cpSrcDstSame && srcInfo.metadataOnly {
// Version ID is set for the destination and source == destination version ID. // Version ID is set for the destination and source == destination version ID.
if dstOpts.VersionID != "" && srcOpts.VersionID == dstOpts.VersionID { if dstOpts.VersionID != "" && srcOpts.VersionID == dstOpts.VersionID {
return z.serverPools[zoneIdx].CopyObject(ctx, srcBucket, srcObject, dstBucket, dstObject, srcInfo, srcOpts, dstOpts) return z.serverPools[poolIdx].CopyObject(ctx, srcBucket, srcObject, dstBucket, dstObject, srcInfo, srcOpts, dstOpts)
} }
// Destination is not versioned and source version ID is empty // Destination is not versioned and source version ID is empty
// perform an in-place update. // perform an in-place update.
if !dstOpts.Versioned && srcOpts.VersionID == "" { if !dstOpts.Versioned && srcOpts.VersionID == "" {
return z.serverPools[zoneIdx].CopyObject(ctx, srcBucket, srcObject, dstBucket, dstObject, srcInfo, srcOpts, dstOpts) return z.serverPools[poolIdx].CopyObject(ctx, srcBucket, srcObject, dstBucket, dstObject, srcInfo, srcOpts, dstOpts)
} }
// Destination is versioned, source is not destination version, // Destination is versioned, source is not destination version,
// as a special case look for if the source object is not legacy // as a special case look for if the source object is not legacy
@ -649,7 +649,7 @@ func (z *erasureServerPools) CopyObject(ctx context.Context, srcBucket, srcObjec
// CopyObject optimization where we don't create an entire copy // CopyObject optimization where we don't create an entire copy
// of the content, instead we add a reference. // of the content, instead we add a reference.
srcInfo.versionOnly = true srcInfo.versionOnly = true
return z.serverPools[zoneIdx].CopyObject(ctx, srcBucket, srcObject, dstBucket, dstObject, srcInfo, srcOpts, dstOpts) return z.serverPools[poolIdx].CopyObject(ctx, srcBucket, srcObject, dstBucket, dstObject, srcInfo, srcOpts, dstOpts)
} }
} }
@ -661,7 +661,7 @@ func (z *erasureServerPools) CopyObject(ctx context.Context, srcBucket, srcObjec
MTime: dstOpts.MTime, MTime: dstOpts.MTime,
} }
return z.serverPools[zoneIdx].PutObject(ctx, dstBucket, dstObject, srcInfo.PutObjReader, putOpts) return z.serverPools[poolIdx].PutObject(ctx, dstBucket, dstObject, srcInfo.PutObjReader, putOpts)
} }
func (z *erasureServerPools) ListObjectsV2(ctx context.Context, bucket, prefix, continuationToken, delimiter string, maxKeys int, fetchOwner bool, startAfter string) (ListObjectsV2Info, error) { func (z *erasureServerPools) ListObjectsV2(ctx context.Context, bucket, prefix, continuationToken, delimiter string, maxKeys int, fetchOwner bool, startAfter string) (ListObjectsV2Info, error) {
@ -783,20 +783,20 @@ func (z *erasureServerPools) ListMultipartUploads(ctx context.Context, bucket, p
return z.serverPools[0].ListMultipartUploads(ctx, bucket, prefix, keyMarker, uploadIDMarker, delimiter, maxUploads) return z.serverPools[0].ListMultipartUploads(ctx, bucket, prefix, keyMarker, uploadIDMarker, delimiter, maxUploads)
} }
var zoneResult = ListMultipartsInfo{} var poolResult = ListMultipartsInfo{}
zoneResult.MaxUploads = maxUploads poolResult.MaxUploads = maxUploads
zoneResult.KeyMarker = keyMarker poolResult.KeyMarker = keyMarker
zoneResult.Prefix = prefix poolResult.Prefix = prefix
zoneResult.Delimiter = delimiter poolResult.Delimiter = delimiter
for _, zone := range z.serverPools { for _, pool := range z.serverPools {
result, err := zone.ListMultipartUploads(ctx, bucket, prefix, keyMarker, uploadIDMarker, result, err := pool.ListMultipartUploads(ctx, bucket, prefix, keyMarker, uploadIDMarker,
delimiter, maxUploads) delimiter, maxUploads)
if err != nil { if err != nil {
return result, err return result, err
} }
zoneResult.Uploads = append(zoneResult.Uploads, result.Uploads...) poolResult.Uploads = append(poolResult.Uploads, result.Uploads...)
} }
return zoneResult, nil return poolResult, nil
} }
// Initiate a new multipart upload on a hashedSet based on object name. // Initiate a new multipart upload on a hashedSet based on object name.
@ -838,14 +838,14 @@ func (z *erasureServerPools) PutObjectPart(ctx context.Context, bucket, object,
return z.serverPools[0].PutObjectPart(ctx, bucket, object, uploadID, partID, data, opts) return z.serverPools[0].PutObjectPart(ctx, bucket, object, uploadID, partID, data, opts)
} }
for _, zone := range z.serverPools { for _, pool := range z.serverPools {
_, err := zone.GetMultipartInfo(ctx, bucket, object, uploadID, opts) _, err := pool.GetMultipartInfo(ctx, bucket, object, uploadID, opts)
if err == nil { if err == nil {
return zone.PutObjectPart(ctx, bucket, object, uploadID, partID, data, opts) return pool.PutObjectPart(ctx, bucket, object, uploadID, partID, data, opts)
} }
switch err.(type) { switch err.(type) {
case InvalidUploadID: case InvalidUploadID:
// Look for information on the next zone // Look for information on the next pool
continue continue
} }
// Any other unhandled errors such as quorum return. // Any other unhandled errors such as quorum return.
@ -867,14 +867,14 @@ func (z *erasureServerPools) GetMultipartInfo(ctx context.Context, bucket, objec
if z.SingleZone() { if z.SingleZone() {
return z.serverPools[0].GetMultipartInfo(ctx, bucket, object, uploadID, opts) return z.serverPools[0].GetMultipartInfo(ctx, bucket, object, uploadID, opts)
} }
for _, zone := range z.serverPools { for _, pool := range z.serverPools {
mi, err := zone.GetMultipartInfo(ctx, bucket, object, uploadID, opts) mi, err := pool.GetMultipartInfo(ctx, bucket, object, uploadID, opts)
if err == nil { if err == nil {
return mi, nil return mi, nil
} }
switch err.(type) { switch err.(type) {
case InvalidUploadID: case InvalidUploadID:
// upload id not found, continue to the next zone. // upload id not found, continue to the next pool.
continue continue
} }
// any other unhandled error return right here. // any other unhandled error return right here.
@ -897,10 +897,10 @@ func (z *erasureServerPools) ListObjectParts(ctx context.Context, bucket, object
if z.SingleZone() { if z.SingleZone() {
return z.serverPools[0].ListObjectParts(ctx, bucket, object, uploadID, partNumberMarker, maxParts, opts) return z.serverPools[0].ListObjectParts(ctx, bucket, object, uploadID, partNumberMarker, maxParts, opts)
} }
for _, zone := range z.serverPools { for _, pool := range z.serverPools {
_, err := zone.GetMultipartInfo(ctx, bucket, object, uploadID, opts) _, err := pool.GetMultipartInfo(ctx, bucket, object, uploadID, opts)
if err == nil { if err == nil {
return zone.ListObjectParts(ctx, bucket, object, uploadID, partNumberMarker, maxParts, opts) return pool.ListObjectParts(ctx, bucket, object, uploadID, partNumberMarker, maxParts, opts)
} }
switch err.(type) { switch err.(type) {
case InvalidUploadID: case InvalidUploadID:
@ -925,14 +925,14 @@ func (z *erasureServerPools) AbortMultipartUpload(ctx context.Context, bucket, o
return z.serverPools[0].AbortMultipartUpload(ctx, bucket, object, uploadID, opts) return z.serverPools[0].AbortMultipartUpload(ctx, bucket, object, uploadID, opts)
} }
for _, zone := range z.serverPools { for _, pool := range z.serverPools {
_, err := zone.GetMultipartInfo(ctx, bucket, object, uploadID, opts) _, err := pool.GetMultipartInfo(ctx, bucket, object, uploadID, opts)
if err == nil { if err == nil {
return zone.AbortMultipartUpload(ctx, bucket, object, uploadID, opts) return pool.AbortMultipartUpload(ctx, bucket, object, uploadID, opts)
} }
switch err.(type) { switch err.(type) {
case InvalidUploadID: case InvalidUploadID:
// upload id not found move to next zone // upload id not found move to next pool
continue continue
} }
return err return err
@ -955,17 +955,17 @@ func (z *erasureServerPools) CompleteMultipartUpload(ctx context.Context, bucket
} }
// Purge any existing object. // Purge any existing object.
for _, zone := range z.serverPools { for _, pool := range z.serverPools {
zone.DeleteObject(ctx, bucket, object, opts) pool.DeleteObject(ctx, bucket, object, opts)
} }
for _, zone := range z.serverPools { for _, pool := range z.serverPools {
result, err := zone.ListMultipartUploads(ctx, bucket, object, "", "", "", maxUploadsList) result, err := pool.ListMultipartUploads(ctx, bucket, object, "", "", "", maxUploadsList)
if err != nil { if err != nil {
return objInfo, err return objInfo, err
} }
if result.Lookup(uploadID) { if result.Lookup(uploadID) {
return zone.CompleteMultipartUpload(ctx, bucket, object, uploadID, uploadedParts, opts) return pool.CompleteMultipartUpload(ctx, bucket, object, uploadID, uploadedParts, opts)
} }
} }
return objInfo, InvalidUploadID{ return objInfo, InvalidUploadID{
@ -988,8 +988,8 @@ func (z *erasureServerPools) GetBucketInfo(ctx context.Context, bucket string) (
} }
return bucketInfo, nil return bucketInfo, nil
} }
for _, zone := range z.serverPools { for _, pool := range z.serverPools {
bucketInfo, err = zone.GetBucketInfo(ctx, bucket) bucketInfo, err = pool.GetBucketInfo(ctx, bucket)
if err != nil { if err != nil {
if isErrBucketNotFound(err) { if isErrBucketNotFound(err) {
continue continue
@ -1114,8 +1114,8 @@ func (z *erasureServerPools) ListBuckets(ctx context.Context) (buckets []BucketI
if z.SingleZone() { if z.SingleZone() {
buckets, err = z.serverPools[0].ListBuckets(ctx) buckets, err = z.serverPools[0].ListBuckets(ctx)
} else { } else {
for _, zone := range z.serverPools { for _, pool := range z.serverPools {
buckets, err = zone.ListBuckets(ctx) buckets, err = pool.ListBuckets(ctx)
if err != nil { if err != nil {
logger.LogIf(ctx, err) logger.LogIf(ctx, err)
continue continue
@ -1149,8 +1149,8 @@ func (z *erasureServerPools) HealFormat(ctx context.Context, dryRun bool) (madmi
} }
var countNoHeal int var countNoHeal int
for _, zone := range z.serverPools { for _, pool := range z.serverPools {
result, err := zone.HealFormat(ctx, dryRun) result, err := pool.HealFormat(ctx, dryRun)
if err != nil && !errors.Is(err, errNoHealRequired) { if err != nil && !errors.Is(err, errNoHealRequired) {
logger.LogIf(ctx, err) logger.LogIf(ctx, err)
continue continue
@ -1183,8 +1183,8 @@ func (z *erasureServerPools) HealBucket(ctx context.Context, bucket string, opts
// Attempt heal on the bucket metadata, ignore any failures // Attempt heal on the bucket metadata, ignore any failures
_, _ = z.HealObject(ctx, minioMetaBucket, pathJoin(bucketConfigPrefix, bucket, bucketMetadataFile), "", opts) _, _ = z.HealObject(ctx, minioMetaBucket, pathJoin(bucketConfigPrefix, bucket, bucketMetadataFile), "", opts)
for _, zone := range z.serverPools { for _, pool := range z.serverPools {
result, err := zone.HealBucket(ctx, bucket, opts) result, err := pool.HealBucket(ctx, bucket, opts)
if err != nil { if err != nil {
switch err.(type) { switch err.(type) {
case BucketNotFound: case BucketNotFound:
@ -1344,8 +1344,8 @@ func (z *erasureServerPools) HealObject(ctx context.Context, bucket, object, ver
defer lk.RUnlock() defer lk.RUnlock()
} }
for _, zone := range z.serverPools { for _, pool := range z.serverPools {
result, err := zone.HealObject(ctx, bucket, object, versionID, opts) result, err := pool.HealObject(ctx, bucket, object, versionID, opts)
if err != nil { if err != nil {
if isErrObjectNotFound(err) || isErrVersionNotFound(err) { if isErrObjectNotFound(err) || isErrVersionNotFound(err) {
continue continue
@ -1374,12 +1374,12 @@ func (z *erasureServerPools) GetMetrics(ctx context.Context) (*Metrics, error) {
} }
func (z *erasureServerPools) getZoneAndSet(id string) (int, int, error) { func (z *erasureServerPools) getZoneAndSet(id string) (int, int, error) {
for zoneIdx := range z.serverPools { for poolIdx := range z.serverPools {
format := z.serverPools[zoneIdx].format format := z.serverPools[poolIdx].format
for setIdx, set := range format.Erasure.Sets { for setIdx, set := range format.Erasure.Sets {
for _, diskID := range set { for _, diskID := range set {
if diskID == id { if diskID == id {
return zoneIdx, setIdx, nil return poolIdx, setIdx, nil
} }
} }
} }
@ -1419,12 +1419,12 @@ func (z *erasureServerPools) Health(ctx context.Context, opts HealthOptions) Hea
for _, localDiskIDs := range diskIDs { for _, localDiskIDs := range diskIDs {
for _, id := range localDiskIDs { for _, id := range localDiskIDs {
zoneIdx, setIdx, err := z.getZoneAndSet(id) poolIdx, setIdx, err := z.getZoneAndSet(id)
if err != nil { if err != nil {
logger.LogIf(ctx, err) logger.LogIf(ctx, err)
continue continue
} }
erasureSetUpCount[zoneIdx][setIdx]++ erasureSetUpCount[poolIdx][setIdx]++
} }
} }
@ -1461,16 +1461,16 @@ func (z *erasureServerPools) Health(ctx context.Context, opts HealthOptions) Hea
} }
} }
for zoneIdx := range erasureSetUpCount { for poolIdx := range erasureSetUpCount {
for setIdx := range erasureSetUpCount[zoneIdx] { for setIdx := range erasureSetUpCount[poolIdx] {
if erasureSetUpCount[zoneIdx][setIdx] < writeQuorum { if erasureSetUpCount[poolIdx][setIdx] < writeQuorum {
logger.LogIf(logger.SetReqInfo(ctx, reqInfo), logger.LogIf(logger.SetReqInfo(ctx, reqInfo),
fmt.Errorf("Write quorum may be lost on zone: %d, set: %d, expected write quorum: %d", fmt.Errorf("Write quorum may be lost on pool: %d, set: %d, expected write quorum: %d",
zoneIdx, setIdx, writeQuorum)) poolIdx, setIdx, writeQuorum))
return HealthResult{ return HealthResult{
Healthy: false, Healthy: false,
HealingDrives: len(aggHealStateResult.HealDisks), HealingDrives: len(aggHealStateResult.HealDisks),
ZoneID: zoneIdx, ZoneID: poolIdx,
SetID: setIdx, SetID: setIdx,
WriteQuorum: writeQuorum, WriteQuorum: writeQuorum,
} }
@ -1501,8 +1501,8 @@ func (z *erasureServerPools) PutObjectTags(ctx context.Context, bucket, object s
return z.serverPools[0].PutObjectTags(ctx, bucket, object, tags, opts) return z.serverPools[0].PutObjectTags(ctx, bucket, object, tags, opts)
} }
for _, zone := range z.serverPools { for _, pool := range z.serverPools {
err := zone.PutObjectTags(ctx, bucket, object, tags, opts) err := pool.PutObjectTags(ctx, bucket, object, tags, opts)
if err != nil { if err != nil {
if isErrObjectNotFound(err) || isErrVersionNotFound(err) { if isErrObjectNotFound(err) || isErrVersionNotFound(err) {
continue continue
@ -1530,8 +1530,8 @@ func (z *erasureServerPools) DeleteObjectTags(ctx context.Context, bucket, objec
if z.SingleZone() { if z.SingleZone() {
return z.serverPools[0].DeleteObjectTags(ctx, bucket, object, opts) return z.serverPools[0].DeleteObjectTags(ctx, bucket, object, opts)
} }
for _, zone := range z.serverPools { for _, pool := range z.serverPools {
err := zone.DeleteObjectTags(ctx, bucket, object, opts) err := pool.DeleteObjectTags(ctx, bucket, object, opts)
if err != nil { if err != nil {
if isErrObjectNotFound(err) || isErrVersionNotFound(err) { if isErrObjectNotFound(err) || isErrVersionNotFound(err) {
continue continue
@ -1559,8 +1559,8 @@ func (z *erasureServerPools) GetObjectTags(ctx context.Context, bucket, object s
if z.SingleZone() { if z.SingleZone() {
return z.serverPools[0].GetObjectTags(ctx, bucket, object, opts) return z.serverPools[0].GetObjectTags(ctx, bucket, object, opts)
} }
for _, zone := range z.serverPools { for _, pool := range z.serverPools {
tags, err := zone.GetObjectTags(ctx, bucket, object, opts) tags, err := pool.GetObjectTags(ctx, bucket, object, opts)
if err != nil { if err != nil {
if isErrObjectNotFound(err) || isErrVersionNotFound(err) { if isErrObjectNotFound(err) || isErrVersionNotFound(err) {
continue continue

@ -149,8 +149,8 @@ func (z *erasureServerPools) listPath(ctx context.Context, o listPathOptions) (e
allAtEOF := true allAtEOF := true
mu.Lock() mu.Lock()
// Ask all sets and merge entries. // Ask all sets and merge entries.
for _, zone := range z.serverPools { for _, pool := range z.serverPools {
for _, set := range zone.sets { for _, set := range pool.sets {
wg.Add(1) wg.Add(1)
go func(i int, set *erasureObjects) { go func(i int, set *erasureObjects) {
defer wg.Done() defer wg.Done()

@ -672,8 +672,8 @@ func (s *peerRESTServer) PutBucketNotificationHandler(w http.ResponseWriter, r *
func getLocalDiskIDs(z *erasureServerPools) []string { func getLocalDiskIDs(z *erasureServerPools) []string {
var ids []string var ids []string
for zoneIdx := range z.serverPools { for poolIdx := range z.serverPools {
for _, set := range z.serverPools[zoneIdx].sets { for _, set := range z.serverPools[poolIdx].sets {
disks := set.getDisks() disks := set.getDisks()
for _, disk := range disks { for _, disk := range disks {
if disk == nil { if disk == nil {

@ -228,7 +228,7 @@ func IsServerResolvable(endpoint Endpoint) error {
// connect to list of endpoints and load all Erasure disk formats, validate the formats are correct // connect to list of endpoints and load all Erasure disk formats, validate the formats are correct
// and are in quorum, if no formats are found attempt to initialize all of them for the first // and are in quorum, if no formats are found attempt to initialize all of them for the first
// time. additionally make sure to close all the disks used in this attempt. // time. additionally make sure to close all the disks used in this attempt.
func connectLoadInitFormats(retryCount int, firstDisk bool, endpoints Endpoints, zoneCount, setCount, setDriveCount int, deploymentID string) (storageDisks []StorageAPI, format *formatErasureV3, err error) { func connectLoadInitFormats(retryCount int, firstDisk bool, endpoints Endpoints, poolCount, setCount, setDriveCount int, deploymentID string) (storageDisks []StorageAPI, format *formatErasureV3, err error) {
// Initialize all storage disks // Initialize all storage disks
storageDisks, errs := initStorageDisksWithErrors(endpoints) storageDisks, errs := initStorageDisksWithErrors(endpoints)
@ -272,8 +272,8 @@ func connectLoadInitFormats(retryCount int, firstDisk bool, endpoints Endpoints,
// All disks report unformatted we should initialized everyone. // All disks report unformatted we should initialized everyone.
if shouldInitErasureDisks(sErrs) && firstDisk { if shouldInitErasureDisks(sErrs) && firstDisk {
logger.Info("Formatting %s zone, %v set(s), %v drives per set.", logger.Info("Formatting %s pool, %v set(s), %v drives per set.",
humanize.Ordinal(zoneCount), setCount, setDriveCount) humanize.Ordinal(poolCount), setCount, setDriveCount)
// Initialize erasure code format on disks // Initialize erasure code format on disks
format, err = initFormatErasure(GlobalContext, storageDisks, setCount, setDriveCount, "", deploymentID, sErrs) format, err = initFormatErasure(GlobalContext, storageDisks, setCount, setDriveCount, "", deploymentID, sErrs)
@ -345,7 +345,7 @@ func connectLoadInitFormats(retryCount int, firstDisk bool, endpoints Endpoints,
} }
// Format disks before initialization of object layer. // Format disks before initialization of object layer.
func waitForFormatErasure(firstDisk bool, endpoints Endpoints, zoneCount, setCount, setDriveCount int, deploymentID string) ([]StorageAPI, *formatErasureV3, error) { func waitForFormatErasure(firstDisk bool, endpoints Endpoints, poolCount, setCount, setDriveCount int, deploymentID string) ([]StorageAPI, *formatErasureV3, error) {
if len(endpoints) == 0 || setCount == 0 || setDriveCount == 0 { if len(endpoints) == 0 || setCount == 0 || setDriveCount == 0 {
return nil, nil, errInvalidArgument return nil, nil, errInvalidArgument
} }
@ -372,7 +372,7 @@ func waitForFormatErasure(firstDisk bool, endpoints Endpoints, zoneCount, setCou
for { for {
select { select {
case <-ticker.C: case <-ticker.C:
storageDisks, format, err := connectLoadInitFormats(tries, firstDisk, endpoints, zoneCount, setCount, setDriveCount, deploymentID) storageDisks, format, err := connectLoadInitFormats(tries, firstDisk, endpoints, poolCount, setCount, setDriveCount, deploymentID)
if err != nil { if err != nil {
tries++ tries++
switch err { switch err {

@ -94,7 +94,7 @@ Input for the key is the object name specified in `PutObject()`, returns a uniqu
- MinIO does erasure coding at the object level not at the volume level, unlike other object storage vendors. This allows applications to choose different storage class by setting `x-amz-storage-class=STANDARD/REDUCED_REDUNDANCY` for each object uploads so effectively utilizing the capacity of the cluster. Additionally these can also be enforced using IAM policies to make sure the client uploads with correct HTTP headers. - MinIO does erasure coding at the object level not at the volume level, unlike other object storage vendors. This allows applications to choose different storage class by setting `x-amz-storage-class=STANDARD/REDUCED_REDUNDANCY` for each object uploads so effectively utilizing the capacity of the cluster. Additionally these can also be enforced using IAM policies to make sure the client uploads with correct HTTP headers.
- MinIO also supports expansion of existing clusters in server pools. Each zone is a self contained entity with same SLA's (read/write quorum) for each object as original cluster. By using the existing namespace for lookup validation MinIO ensures conflicting objects are not created. When no such object exists then MinIO simply uses the least used zone. - MinIO also supports expansion of existing clusters in server pools. Each pool is a self contained entity with same SLA's (read/write quorum) for each object as original cluster. By using the existing namespace for lookup validation MinIO ensures conflicting objects are not created. When no such object exists then MinIO simply uses the least used pool.
__There are no limits on how many server pools can be combined__ __There are no limits on how many server pools can be combined__
@ -104,23 +104,23 @@ minio server http://host{1...32}/export{1...32} http://host{5...6}/export{1...8}
In above example there are two server pools In above example there are two server pools
- 32 * 32 = 1024 drives zone1 - 32 * 32 = 1024 drives pool1
- 2 * 8 = 16 drives zone2 - 2 * 8 = 16 drives pool2
> Notice the requirement of common SLA here original cluster had 1024 drives with 16 drives per erasure set, second zone is expected to have a minimum of 16 drives to match the original cluster SLA or it should be in multiples of 16. > Notice the requirement of common SLA here original cluster had 1024 drives with 16 drives per erasure set, second pool is expected to have a minimum of 16 drives to match the original cluster SLA or it should be in multiples of 16.
MinIO places new objects in server pools based on proportionate free space, per zone. Following pseudo code demonstrates this behavior. MinIO places new objects in server pools based on proportionate free space, per pool. Following pseudo code demonstrates this behavior.
```go ```go
func getAvailableZoneIdx(ctx context.Context) int { func getAvailablePoolIdx(ctx context.Context) int {
serverPools := z.getServerPoolsAvailableSpace(ctx) serverPools := z.getServerPoolsAvailableSpace(ctx)
total := serverPools.TotalAvailable() total := serverPools.TotalAvailable()
// choose when we reach this many // choose when we reach this many
choose := rand.Uint64() % total choose := rand.Uint64() % total
atTotal := uint64(0) atTotal := uint64(0)
for _, zone := range serverPools { for _, pool := range serverPools {
atTotal += zone.Available atTotal += pool.Available
if atTotal > choose && zone.Available > 0 { if atTotal > choose && pool.Available > 0 {
return zone.Index return pool.Index
} }
} }
// Should not happen, but print values just in case. // Should not happen, but print values just in case.

@ -77,10 +77,10 @@ For example:
minio server http://host{1...4}/export{1...16} http://host{5...12}/export{1...16} minio server http://host{1...4}/export{1...16} http://host{5...12}/export{1...16}
``` ```
Now the server has expanded total storage by _(newly_added_servers\*m)_ more disks, taking the total count to _(existing_servers\*m)+(newly_added_servers\*m)_ disks. New object upload requests automatically start using the least used cluster. This expansion strategy works endlessly, so you can perpetually expand your clusters as needed. When you restart, it is immediate and non-disruptive to the applications. Each group of servers in the command-line is called a zone. There are 2 server pools in this example. New objects are placed in server pools in proportion to the amount of free space in each zone. Within each zone, the location of the erasure-set of drives is determined based on a deterministic hashing algorithm. Now the server has expanded total storage by _(newly_added_servers\*m)_ more disks, taking the total count to _(existing_servers\*m)+(newly_added_servers\*m)_ disks. New object upload requests automatically start using the least used cluster. This expansion strategy works endlessly, so you can perpetually expand your clusters as needed. When you restart, it is immediate and non-disruptive to the applications. Each group of servers in the command-line is called a pool. There are 2 server pools in this example. New objects are placed in server pools in proportion to the amount of free space in each pool. Within each pool, the location of the erasure-set of drives is determined based on a deterministic hashing algorithm.
> __NOTE:__ __Each zone you add must have the same erasure coding set size as the original zone, so the same data redundancy SLA is maintained.__ > __NOTE:__ __Each pool you add must have the same erasure coding set size as the original pool, so the same data redundancy SLA is maintained.__
> For example, if your first zone was 8 drives, you could add further server pools of 16, 32 or 1024 drives each. All you have to make sure is deployment SLA is multiples of original data redundancy SLA i.e 8. > For example, if your first pool was 8 drives, you could add further server pools of 16, 32 or 1024 drives each. All you have to make sure is deployment SLA is multiples of original data redundancy SLA i.e 8.
## 3. Test your setup ## 3. Test your setup
To test this setup, access the MinIO server via browser or [`mc`](https://docs.min.io/docs/minio-client-quickstart-guide). To test this setup, access the MinIO server via browser or [`mc`](https://docs.min.io/docs/minio-client-quickstart-guide).

@ -104,24 +104,24 @@ minio server http://host{1...32}/export{1...32} http://host{5...6}/export{1...8}
以上示例有两个区域 以上示例有两个区域
- 32 * 32 = 1024 drives zone1 - 32 * 32 = 1024 drives pool1
- 2 * 8 = 16 drives zone2 - 2 * 8 = 16 drives pool2
> 注意这里对通用SLA的要求,原来的集群有1024个磁盘,每个纠删集合有16个磁盘,第二个区域至少要有16个磁盘才能符合原来集群的SLA,或者应该是16的倍数。 > 注意这里对通用SLA的要求,原来的集群有1024个磁盘,每个纠删集合有16个磁盘,第二个区域至少要有16个磁盘才能符合原来集群的SLA,或者应该是16的倍数。
MinIO根据每个区域的可用空间比例将新对象放置在区域中。以下伪代码演示了此行为。 MinIO根据每个区域的可用空间比例将新对象放置在区域中。以下伪代码演示了此行为。
```go ```go
func getAvailableZoneIdx(ctx context.Context) int { func getAvailablePoolIdx(ctx context.Context) int {
serverPools := z.getServerPoolsAvailableSpace(ctx) serverPools := z.getServerPoolsAvailableSpace(ctx)
total := serverPools.TotalAvailable() total := serverPools.TotalAvailable()
// choose when we reach this many // choose when we reach this many
choose := rand.Uint64() % total choose := rand.Uint64() % total
atTotal := uint64(0) atTotal := uint64(0)
for _, zone := range serverPools { for _, pool := range serverPools {
atTotal += zone.Available atTotal += pool.Available
if atTotal > choose && zone.Available > 0 { if atTotal > choose && pool.Available > 0 {
return zone.Index return pool.Index
} }
} }
// Should not happen, but print values just in case. // Should not happen, but print values just in case.

Loading…
Cancel
Save