fix: startup load time by reusing storageDisks (#9210)

master
Harshavardhana 5 years ago committed by GitHub
parent 0c80bf45d0
commit 6f992134a2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 4
      cmd/admin-handlers_test.go
  2. 18
      cmd/config-encrypted.go
  3. 127
      cmd/format-xl.go
  4. 2
      cmd/format-xl_test.go
  5. 5
      cmd/naughty-disk_test.go
  6. 8
      cmd/posix-diskid-check.go
  7. 16
      cmd/posix.go
  8. 53
      cmd/prepare-storage.go
  9. 12
      cmd/server-main.go
  10. 1
      cmd/storage-interface.go
  11. 4
      cmd/storage-rest-client.go
  12. 2
      cmd/storage-rest-server.go
  13. 4
      cmd/test-utils_test.go
  14. 195
      cmd/xl-sets.go
  15. 8
      cmd/xl-sets_test.go
  16. 2
      cmd/xl-v1.go
  17. 9
      cmd/xl-zones.go
  18. 9
      pkg/madmin/heal-commands.go

@ -109,14 +109,14 @@ func initTestXLObjLayer() (ObjectLayer, []string, error) {
return nil, nil, err
}
endpoints := mustGetNewEndpoints(xlDirs...)
format, err := waitForFormatXL(true, endpoints, 1, 1, 16, "")
storageDisks, format, err := waitForFormatXL(true, endpoints, 1, 1, 16, "")
if err != nil {
removeRoots(xlDirs)
return nil, nil, err
}
globalPolicySys = NewPolicySys()
objLayer, err := newXLSets(endpoints, format, 1, 16)
objLayer, err := newXLSets(endpoints, storageDisks, format, 1, 16)
if err != nil {
return nil, nil, err
}

@ -21,7 +21,6 @@ import (
"context"
"errors"
"fmt"
"strings"
"unicode/utf8"
etcd "github.com/coreos/etcd/clientv3"
@ -48,12 +47,18 @@ func handleEncryptedConfigBackend(objAPI ObjectLayer, server bool) error {
// of the object layer.
retryTimerCh := newRetryTimerSimple(doneCh)
var stop bool
rquorum := InsufficientReadQuorum{}
wquorum := InsufficientWriteQuorum{}
bucketNotFound := BucketNotFound{}
for !stop {
select {
case <-retryTimerCh:
if encrypted, err = checkBackendEncrypted(objAPI); err != nil {
if err == errDiskNotFound ||
strings.Contains(err.Error(), InsufficientReadQuorum{}.Error()) {
if errors.Is(err, errDiskNotFound) ||
errors.As(err, &rquorum) ||
errors.As(err, &bucketNotFound) {
logger.Info("Waiting for config backend to be encrypted..")
continue
}
@ -100,9 +105,10 @@ func handleEncryptedConfigBackend(objAPI ObjectLayer, server bool) error {
case <-retryTimerCh:
// Migrate IAM configuration
if err = migrateConfigPrefixToEncrypted(objAPI, globalOldCred, encrypted); err != nil {
if err == errDiskNotFound ||
strings.Contains(err.Error(), InsufficientReadQuorum{}.Error()) ||
strings.Contains(err.Error(), InsufficientWriteQuorum{}.Error()) {
if errors.Is(err, errDiskNotFound) ||
errors.As(err, &rquorum) ||
errors.As(err, &wquorum) ||
errors.As(err, &bucketNotFound) {
logger.Info("Waiting for config backend to be encrypted..")
continue
}

@ -312,7 +312,7 @@ func quorumUnformattedDisks(errs []error) bool {
}
// loadFormatXLAll - load all format config from all input disks in parallel.
func loadFormatXLAll(storageDisks []StorageAPI) ([]*formatXLV3, []error) {
func loadFormatXLAll(storageDisks []StorageAPI, heal bool) ([]*formatXLV3, []error) {
// Initialize list of errors.
g := errgroup.WithNErrs(len(storageDisks))
@ -331,6 +331,11 @@ func loadFormatXLAll(storageDisks []StorageAPI) ([]*formatXLV3, []error) {
return err
}
formats[index] = format
if !heal {
// If no healing required, make the disks valid and
// online.
storageDisks[index].SetDiskID(format.XL.This)
}
return nil
}, index)
}
@ -339,7 +344,15 @@ func loadFormatXLAll(storageDisks []StorageAPI) ([]*formatXLV3, []error) {
return formats, g.Wait()
}
func saveFormatXL(disk StorageAPI, format interface{}) error {
func saveFormatXL(disk StorageAPI, format interface{}, diskID string) error {
if format == nil || disk == nil {
return errDiskNotFound
}
if err := makeFormatXLMetaVolumes(disk); err != nil {
return err
}
// Marshal and write to disk.
formatBytes, err := json.Marshal(format)
if err != nil {
@ -357,7 +370,12 @@ func saveFormatXL(disk StorageAPI, format interface{}) error {
}
// Rename file `uuid.json` --> `format.json`.
return disk.RenameFile(minioMetaBucket, tmpFormat, minioMetaBucket, formatConfigFile)
if err = disk.RenameFile(minioMetaBucket, tmpFormat, minioMetaBucket, formatConfigFile); err != nil {
return err
}
disk.SetDiskID(diskID)
return nil
}
var ignoredHiddenDirectories = []string{
@ -475,7 +493,7 @@ func formatXLGetDeploymentID(refFormat *formatXLV3, formats []*formatXLV3) (stri
func formatXLFixDeploymentID(endpoints Endpoints, storageDisks []StorageAPI, refFormat *formatXLV3) (err error) {
// Attempt to load all `format.json` from all disks.
var sErrs []error
formats, sErrs := loadFormatXLAll(storageDisks)
formats, sErrs := loadFormatXLAll(storageDisks, false)
for i, sErr := range sErrs {
if _, ok := formatCriticalErrors[sErr]; ok {
return fmt.Errorf("Disk %s: %w", endpoints[i], sErr)
@ -519,25 +537,38 @@ func formatXLFixDeploymentID(endpoints Endpoints, storageDisks []StorageAPI, ref
func formatXLFixLocalDeploymentID(endpoints Endpoints, storageDisks []StorageAPI, refFormat *formatXLV3) error {
// If this server was down when the deploymentID was updated
// then we make sure that we update the local disks with the deploymentID.
for index, storageDisk := range storageDisks {
if endpoints[index].IsLocal && storageDisk != nil && storageDisk.IsOnline() {
format, err := loadFormatXL(storageDisk)
if err != nil {
// Disk can be offline etc.
// ignore the errors seen here.
continue
}
if format.ID != "" {
continue
}
if !reflect.DeepEqual(format.XL.Sets, refFormat.XL.Sets) {
continue
}
format.ID = refFormat.ID
if err := saveFormatXL(storageDisk, format); err != nil {
logger.LogIf(context.Background(), err)
return fmt.Errorf("Unable to save format.json, %w", err)
// Initialize errs to collect errors inside go-routine.
g := errgroup.WithNErrs(len(storageDisks))
for index := range storageDisks {
index := index
g.Go(func() error {
if endpoints[index].IsLocal && storageDisks[index] != nil && storageDisks[index].IsOnline() {
format, err := loadFormatXL(storageDisks[index])
if err != nil {
// Disk can be offline etc.
// ignore the errors seen here.
return nil
}
if format.ID != "" {
return nil
}
if !reflect.DeepEqual(format.XL.Sets, refFormat.XL.Sets) {
return nil
}
format.ID = refFormat.ID
if err := saveFormatXL(storageDisks[index], format, format.XL.This); err != nil {
logger.LogIf(context.Background(), err)
return fmt.Errorf("Unable to save format.json, %w", err)
}
}
return nil
}, index)
}
for _, err := range g.Wait() {
if err != nil {
return err
}
}
return nil
@ -670,13 +701,7 @@ func saveFormatXLAll(ctx context.Context, storageDisks []StorageAPI, formats []*
for index := range storageDisks {
index := index
g.Go(func() error {
if formats[index] == nil || storageDisks[index] == nil {
return errDiskNotFound
}
if err := makeFormatXLMetaVolumes(storageDisks[index]); err != nil {
return err
}
return saveFormatXL(storageDisks[index], formats[index])
return saveFormatXL(storageDisks[index], formats[index], formats[index].XL.This)
}, index)
}
@ -738,25 +763,36 @@ func formatXLV3ThisEmpty(formats []*formatXLV3) bool {
// fixFormatXLV3 - fix format XL configuration on all disks.
func fixFormatXLV3(storageDisks []StorageAPI, endpoints Endpoints, formats []*formatXLV3) error {
for i, format := range formats {
if format == nil || !endpoints[i].IsLocal {
continue
}
// NOTE: This code is specifically needed when migrating version
// V1 to V2 to V3, in a scenario such as this we only need to handle
// single sets since we never used to support multiple sets in releases
// with V1 format version.
if len(format.XL.Sets) > 1 {
continue
}
if format.XL.This == "" {
formats[i].XL.This = format.XL.Sets[0][i]
if err := saveFormatXL(storageDisks[i], formats[i]); err != nil {
return err
g := errgroup.WithNErrs(len(formats))
for i := range formats {
i := i
g.Go(func() error {
if formats[i] == nil || !endpoints[i].IsLocal {
return nil
}
// NOTE: This code is specifically needed when migrating version
// V1 to V2 to V3, in a scenario such as this we only need to handle
// single sets since we never used to support multiple sets in releases
// with V1 format version.
if len(formats[i].XL.Sets) > 1 {
return nil
}
if formats[i].XL.This == "" {
formats[i].XL.This = formats[i].XL.Sets[0][i]
if err := saveFormatXL(storageDisks[i], formats[i], formats[i].XL.This); err != nil {
return err
}
}
return nil
}, i)
}
for _, err := range g.Wait() {
if err != nil {
return err
}
}
return nil
}
// initFormatXL - save XL format configuration on all disks.
@ -827,6 +863,9 @@ func ecDrivesNoConfig(drivesPerSet int) int {
// Make XL backend meta volumes.
func makeFormatXLMetaVolumes(disk StorageAPI) error {
if disk == nil {
return errDiskNotFound
}
// Attempt to create MinIO internal buckets.
return disk.MakeVolBulk(minioMetaBucket, minioMetaTmpBucket, minioMetaMultipartBucket, dataUsageBucket)
}

@ -112,7 +112,7 @@ func TestFixFormatV3(t *testing.T) {
t.Fatal(err)
}
newFormats, errs := loadFormatXLAll(storageDisks)
newFormats, errs := loadFormatXLAll(storageDisks, false)
for _, err := range errs {
if err != nil && err != errUnformattedDisk {
t.Fatal(err)

@ -78,7 +78,12 @@ func (d *naughtyDisk) calcError() (err error) {
return nil
}
func (d *naughtyDisk) GetDiskID() (string, error) {
return d.disk.GetDiskID()
}
func (d *naughtyDisk) SetDiskID(id string) {
d.disk.SetDiskID(id)
}
func (d *naughtyDisk) CrawlAndGetDataUsage(ctx context.Context, cache dataUsageCache) (info dataUsageCache, err error) {

@ -32,7 +32,7 @@ func (p *posixDiskIDCheck) String() string {
}
func (p *posixDiskIDCheck) IsOnline() bool {
storedDiskID, err := p.storage.getDiskID()
storedDiskID, err := p.storage.GetDiskID()
if err != nil {
return false
}
@ -51,6 +51,10 @@ func (p *posixDiskIDCheck) Close() error {
return p.storage.Close()
}
func (p *posixDiskIDCheck) GetDiskID() (string, error) {
return p.diskID, nil
}
func (p *posixDiskIDCheck) SetDiskID(id string) {
p.diskID = id
}
@ -61,7 +65,7 @@ func (p *posixDiskIDCheck) isDiskStale() bool {
// or create format.json
return false
}
storedDiskID, err := p.storage.getDiskID()
storedDiskID, err := p.storage.GetDiskID()
if err == nil && p.diskID == storedDiskID {
return false
}

@ -424,7 +424,8 @@ func (s *posix) getVolDir(volume string) (string, error) {
return volumeDir, nil
}
func (s *posix) getDiskID() (string, error) {
// GetDiskID - returns the cached disk uuid
func (s *posix) GetDiskID() (string, error) {
s.RLock()
diskID := s.diskID
fileInfo := s.formatFileInfo
@ -440,7 +441,7 @@ func (s *posix) getDiskID() (string, error) {
defer s.Unlock()
// If somebody else updated the disk ID and changed the time, return what they got.
if !s.formatLastCheck.Equal(lastCheck) {
if !lastCheck.IsZero() && !s.formatLastCheck.Equal(lastCheck) && diskID != "" {
// Somebody else got the lock first.
return diskID, nil
}
@ -448,10 +449,13 @@ func (s *posix) getDiskID() (string, error) {
fi, err := os.Stat(formatFile)
if err != nil {
// If the disk is still not initialized.
return "", err
if os.IsNotExist(err) {
return "", errUnformattedDisk
}
return "", errCorruptedFormat
}
if xioutil.SameFile(fi, fileInfo) {
if xioutil.SameFile(fi, fileInfo) && diskID != "" {
// If the file has not changed, just return the cached diskID information.
s.formatLastCheck = time.Now()
return diskID, nil
@ -459,12 +463,12 @@ func (s *posix) getDiskID() (string, error) {
b, err := ioutil.ReadFile(formatFile)
if err != nil {
return "", err
return "", errCorruptedFormat
}
format := &formatXLV3{}
var json = jsoniter.ConfigCompatibleWithStandardLibrary
if err = json.Unmarshal(b, &format); err != nil {
return "", err
return "", errCorruptedFormat
}
s.diskID = format.XL.This
s.formatFileInfo = fi

@ -221,15 +221,14 @@ func IsServerResolvable(endpoint Endpoint) error {
// connect to list of endpoints and load all XL disk formats, validate the formats are correct
// and are in quorum, if no formats are found attempt to initialize all of them for the first
// time. additionally make sure to close all the disks used in this attempt.
func connectLoadInitFormats(retryCount int, firstDisk bool, endpoints Endpoints, zoneCount, setCount, drivesPerSet int, deploymentID string) (*formatXLV3, error) {
func connectLoadInitFormats(retryCount int, firstDisk bool, endpoints Endpoints, zoneCount, setCount, drivesPerSet int, deploymentID string) ([]StorageAPI, *formatXLV3, error) {
// Initialize all storage disks
storageDisks, errs := initStorageDisksWithErrors(endpoints)
defer closeStorageDisks(storageDisks)
for i, err := range errs {
if err != nil {
if err != errDiskNotFound {
return nil, fmt.Errorf("Disk %s: %w", endpoints[i], err)
return nil, nil, fmt.Errorf("Disk %s: %w", endpoints[i], err)
}
if retryCount >= 5 {
logger.Info("Unable to connect to %s: %v\n", endpoints[i], IsServerResolvable(endpoints[i]))
@ -238,11 +237,11 @@ func connectLoadInitFormats(retryCount int, firstDisk bool, endpoints Endpoints,
}
// Attempt to load all `format.json` from all disks.
formatConfigs, sErrs := loadFormatXLAll(storageDisks)
formatConfigs, sErrs := loadFormatXLAll(storageDisks, false)
// Check if we have
for i, sErr := range sErrs {
if _, ok := formatCriticalErrors[sErr]; ok {
return nil, fmt.Errorf("Disk %s: %w", endpoints[i], sErr)
return nil, nil, fmt.Errorf("Disk %s: %w", endpoints[i], sErr)
}
// not critical error but still print the error, nonetheless, which is perhaps unhandled
if sErr != errUnformattedDisk && sErr != errDiskNotFound && retryCount >= 5 {
@ -258,7 +257,7 @@ func connectLoadInitFormats(retryCount int, firstDisk bool, endpoints Endpoints,
// with expected XL format. For example if a user is
// trying to pool FS backend into an XL set.
if err := checkFormatXLValues(formatConfigs, drivesPerSet); err != nil {
return nil, err
return nil, nil, err
}
// All disks report unformatted we should initialized everyone.
@ -269,24 +268,24 @@ func connectLoadInitFormats(retryCount int, firstDisk bool, endpoints Endpoints,
// Initialize erasure code format on disks
format, err := initFormatXL(context.Background(), storageDisks, setCount, drivesPerSet, deploymentID)
if err != nil {
return nil, err
return nil, nil, err
}
// Assign globalDeploymentID on first run for the
// minio server managing the first disk
globalDeploymentID = format.ID
return format, nil
return storageDisks, format, nil
}
// Return error when quorum unformatted disks - indicating we are
// waiting for first server to be online.
if quorumUnformattedDisks(sErrs) && !firstDisk {
return nil, errNotFirstDisk
return nil, nil, errNotFirstDisk
}
// Return error when quorum unformatted disks but waiting for rest
// of the servers to be online.
if quorumUnformattedDisks(sErrs) && firstDisk {
return nil, errFirstDiskWait
return nil, nil, errFirstDiskWait
}
// Following function is added to fix a regressions which was introduced
@ -295,54 +294,54 @@ func connectLoadInitFormats(retryCount int, firstDisk bool, endpoints Endpoints,
// the disk UUID association. Below function is called to handle and fix
// this regression, for more info refer https://github.com/minio/minio/issues/5667
if err := fixFormatXLV3(storageDisks, endpoints, formatConfigs); err != nil {
return nil, err
return nil, nil, err
}
// If any of the .This field is still empty, we return error.
if formatXLV3ThisEmpty(formatConfigs) {
return nil, errXLV3ThisEmpty
return nil, nil, errXLV3ThisEmpty
}
format, err := getFormatXLInQuorum(formatConfigs)
if err != nil {
return nil, err
return nil, nil, err
}
if format.ID == "" {
// Not a first disk, wait until first disk fixes deploymentID
if !firstDisk {
return nil, errNotFirstDisk
return nil, nil, errNotFirstDisk
}
if err = formatXLFixDeploymentID(endpoints, storageDisks, format); err != nil {
return nil, err
return nil, nil, err
}
}
globalDeploymentID = format.ID
if err = formatXLFixLocalDeploymentID(endpoints, storageDisks, format); err != nil {
return nil, err
return nil, nil, err
}
// The will always recreate some directories inside .minio.sys of
// the local disk such as tmp, multipart and background-ops
initXLMetaVolumesInLocalDisks(storageDisks, formatConfigs)
return format, nil
return storageDisks, format, nil
}
// Format disks before initialization of object layer.
func waitForFormatXL(firstDisk bool, endpoints Endpoints, zoneCount, setCount, drivesPerSet int, deploymentID string) (format *formatXLV3, err error) {
func waitForFormatXL(firstDisk bool, endpoints Endpoints, zoneCount, setCount, drivesPerSet int, deploymentID string) ([]StorageAPI, *formatXLV3, error) {
if len(endpoints) == 0 || setCount == 0 || drivesPerSet == 0 {
return nil, errInvalidArgument
return nil, nil, errInvalidArgument
}
if err = formatXLMigrateLocalEndpoints(endpoints); err != nil {
return nil, err
if err := formatXLMigrateLocalEndpoints(endpoints); err != nil {
return nil, nil, err
}
if err = formatXLCleanupTmpLocalEndpoints(endpoints); err != nil {
return nil, err
if err := formatXLCleanupTmpLocalEndpoints(endpoints); err != nil {
return nil, nil, err
}
// prepare getElapsedTime() to calculate elapsed time since we started trying formatting disks.
@ -359,7 +358,7 @@ func waitForFormatXL(firstDisk bool, endpoints Endpoints, zoneCount, setCount, d
for {
select {
case <-ticker.C:
format, err := connectLoadInitFormats(tries, firstDisk, endpoints, zoneCount, setCount, drivesPerSet, deploymentID)
storageDisks, format, err := connectLoadInitFormats(tries, firstDisk, endpoints, zoneCount, setCount, drivesPerSet, deploymentID)
if err != nil {
tries++
switch err {
@ -380,12 +379,12 @@ func waitForFormatXL(firstDisk bool, endpoints Endpoints, zoneCount, setCount, d
continue
default:
// For all other unhandled errors we exit and fail.
return nil, err
return nil, nil, err
}
}
return format, nil
return storageDisks, format, nil
case <-globalOSSignalCh:
return nil, fmt.Errorf("Initializing data volumes gracefully stopped")
return nil, nil, fmt.Errorf("Initializing data volumes gracefully stopped")
}
}
}

@ -213,14 +213,15 @@ func initSafeMode(buckets []BucketInfo) (err error) {
for {
rquorum := InsufficientReadQuorum{}
wquorum := InsufficientWriteQuorum{}
bucketNotFound := BucketNotFound{}
var err error
select {
case n := <-retryTimerCh:
if err = initAllSubsystems(buckets, newObject); err != nil {
if errors.Is(err, errDiskNotFound) ||
errors.As(err, &rquorum) ||
errors.As(err, &wquorum) {
errors.As(err, &wquorum) ||
errors.As(err, &bucketNotFound) {
if n < 5 {
logger.Info("Waiting for all sub-systems to be initialized..")
} else {
@ -245,7 +246,6 @@ func initAllSubsystems(buckets []BucketInfo, newObject ObjectLayer) (err error)
if err = globalConfigSys.Init(newObject); err != nil {
return fmt.Errorf("Unable to initialize config system: %w", err)
}
if globalEtcdClient != nil {
// **** WARNING ****
// Migrating to encrypted backend on etcd should happen before initialization of
@ -419,7 +419,6 @@ func serverMain(ctx *cli.Context) {
globalObjectAPI = newObject
globalObjLayerMutex.Unlock()
// Calls New() and initializes all sub-systems.
newAllSubsystems()
// Enable healing to heal drives if possible
@ -428,6 +427,9 @@ func serverMain(ctx *cli.Context) {
initLocalDisksAutoHeal(GlobalContext, newObject)
}
go startBackgroundOps(GlobalContext, newObject)
// Calls New() and initializes all sub-systems.
buckets, err := newObject.ListBuckets(GlobalContext)
if err != nil {
logger.Fatal(err, "Unable to list buckets")
@ -451,8 +453,6 @@ func serverMain(ctx *cli.Context) {
initFederatorBackend(buckets, newObject)
}
go startBackgroundOps(GlobalContext, newObject)
// Disable safe mode operation, after all initialization is over.
globalObjLayerMutex.Lock()
globalSafeMode = false

@ -30,6 +30,7 @@ type StorageAPI interface {
IsOnline() bool // Returns true if disk is online.
Hostname() string // Returns host name if remote host.
Close() error
GetDiskID() (string, error)
SetDiskID(id string)
DiskInfo() (info DiskInfo, err error)

@ -172,6 +172,10 @@ func (client *storageRESTClient) CrawlAndGetDataUsage(ctx context.Context, cache
return newCache, newCache.deserialize(b)
}
func (client *storageRESTClient) GetDiskID() (string, error) {
return client.diskID, nil
}
func (client *storageRESTClient) SetDiskID(id string) {
client.diskID = id
}

@ -110,7 +110,7 @@ func (s *storageRESTServer) IsValid(w http.ResponseWriter, r *http.Request) bool
// or create format.json
return true
}
storedDiskID, err := s.storage.getDiskID()
storedDiskID, err := s.storage.GetDiskID()
if err == nil && diskID == storedDiskID {
// If format.json is available and request sent the right disk-id, we allow the request
return true

@ -193,13 +193,13 @@ func prepareXLSets32() (ObjectLayer, []string, error) {
endpoints := append(endpoints1, endpoints2...)
fsDirs := append(fsDirs1, fsDirs2...)
format, err := waitForFormatXL(true, endpoints, 1, 2, 16, "")
storageDisks, format, err := waitForFormatXL(true, endpoints, 1, 2, 16, "")
if err != nil {
removeRoots(fsDirs)
return nil, nil, err
}
objAPI, err := newXLSets(endpoints, format, 2, 16)
objAPI, err := newXLSets(endpoints, storageDisks, format, 2, 16)
if err != nil {
return nil, nil, err
}

@ -45,17 +45,12 @@ type setsStorageAPI [][]StorageAPI
// setsDsyncLockers is encapsulated type for Close()
type setsDsyncLockers [][]dsync.NetLocker
func (s setsStorageAPI) Close() error {
for i := 0; i < len(s); i++ {
for j, disk := range s[i] {
if disk == nil {
continue
}
disk.Close()
s[i][j] = nil
}
func (s setsStorageAPI) Copy() [][]StorageAPI {
copyS := make(setsStorageAPI, len(s))
for i, disks := range s {
copyS[i] = append(copyS[i], disks...)
}
return nil
return copyS
}
// Information of a new disk connection
@ -174,6 +169,25 @@ func connectEndpoint(endpoint Endpoint) (StorageAPI, *formatXLV3, error) {
return disk, format, nil
}
// findDiskIndex - returns the i,j'th position of the input `diskID` against the reference
// format, after successful validation.
// - i'th position is the set index
// - j'th position is the disk index in the current set
func findDiskIndexByDiskID(refFormat *formatXLV3, diskID string) (int, int, error) {
if diskID == offlineDiskUUID {
return -1, -1, fmt.Errorf("diskID: %s is offline", diskID)
}
for i := 0; i < len(refFormat.XL.Sets); i++ {
for j := 0; j < len(refFormat.XL.Sets[0]); j++ {
if refFormat.XL.Sets[i][j] == diskID {
return i, j, nil
}
}
}
return -1, -1, fmt.Errorf("diskID: %s not found", diskID)
}
// findDiskIndex - returns the i,j'th position of the input `format` against the reference
// format, after successful validation.
// - i'th position is the set index
@ -198,18 +212,6 @@ func findDiskIndex(refFormat, format *formatXLV3) (int, int, error) {
return -1, -1, fmt.Errorf("diskID: %s not found", format.XL.This)
}
// connectDisksWithQuorum is same as connectDisks but waits
// for quorum number of formatted disks to be online in any given sets.
func (s *xlSets) connectDisksWithQuorum() {
for {
s.connectDisks()
if s.getOnlineDisksCount() > len(s.endpoints)/2 {
return
}
time.Sleep(100 * time.Millisecond)
}
}
// connectDisks - attempt to connect all the endpoints, loads format
// and re-arranges the disks in proper position.
func (s *xlSets) connectDisks() {
@ -288,7 +290,7 @@ func (s *xlSets) GetDisks(setIndex int) func() []StorageAPI {
const defaultMonitorConnectEndpointInterval = time.Second * 10 // Set to 10 secs.
// Initialize new set of erasure coded sets.
func newXLSets(endpoints Endpoints, format *formatXLV3, setCount int, drivesPerSet int) (*xlSets, error) {
func newXLSets(endpoints Endpoints, storageDisks []StorageAPI, format *formatXLV3, setCount int, drivesPerSet int) (*xlSets, error) {
endpointStrings := make([]string, len(endpoints))
for i, endpoint := range endpoints {
if endpoint.IsLocal {
@ -322,13 +324,25 @@ func newXLSets(endpoints Endpoints, format *formatXLV3, setCount int, drivesPerS
// setCount * drivesPerSet with each memory upto blockSizeV1.
bp := bpool.NewBytePoolCap(setCount*drivesPerSet, blockSizeV1, blockSizeV1*2)
for i := 0; i < len(format.XL.Sets); i++ {
for i := 0; i < setCount; i++ {
s.xlDisks[i] = make([]StorageAPI, drivesPerSet)
s.xlLockers[i] = make([]dsync.NetLocker, drivesPerSet)
var endpoints Endpoints
for j := 0; j < drivesPerSet; j++ {
endpoints = append(endpoints, s.endpoints[i*s.drivesPerSet+j])
endpoints = append(endpoints, s.endpoints[i*drivesPerSet+j])
// Rely on endpoints list to initialize, init lockers and available disks.
s.xlLockers[i][j] = newLockAPI(s.endpoints[i*drivesPerSet+j])
if storageDisks[i*drivesPerSet+j] == nil {
continue
}
if diskID, derr := storageDisks[i*drivesPerSet+j].GetDiskID(); derr == nil {
m, n, err := findDiskIndexByDiskID(format, diskID)
if err != nil {
continue
}
s.xlDisks[m][n] = storageDisks[i*drivesPerSet+j]
}
}
// Initialize xl objects for a given set.
@ -345,19 +359,8 @@ func newXLSets(endpoints Endpoints, format *formatXLV3, setCount int, drivesPerS
GlobalMultipartCleanupInterval, GlobalMultipartExpiry, GlobalServiceDoneCh)
}
// Rely on endpoints list to initialize, init lockers.
for i := 0; i < s.setCount; i++ {
for j := 0; j < s.drivesPerSet; j++ {
s.xlLockers[i][j] = newLockAPI(s.endpoints[i*s.drivesPerSet+j])
}
}
// Connect disks right away, but wait until we have `format.json` quorum.
s.connectDisksWithQuorum()
// Start the disk monitoring and connect routine.
go s.monitorAndConnectEndpoints(GlobalContext, defaultMonitorConnectEndpointInterval)
go s.maintainMRFList()
go s.healMRFRoutine()
@ -421,52 +424,40 @@ func (s *xlSets) StorageInfo(ctx context.Context, local bool) StorageInfo {
return storageInfo
}
storageDisks, dErrs := initStorageDisksWithErrors(s.endpoints)
defer closeStorageDisks(storageDisks)
formats, sErrs := loadFormatXLAll(storageDisks)
combineStorageErrors := func(diskErrs []error, storageErrs []error) []error {
for index, err := range diskErrs {
if err != nil {
storageErrs[index] = err
}
}
return storageErrs
}
errs := combineStorageErrors(dErrs, sErrs)
drivesInfo := formatsToDrivesInfo(s.endpoints, formats, errs)
s.xlDisksMu.RLock()
storageDisks := s.xlDisks.Copy()
s.xlDisksMu.RUnlock()
// fill all the available/online endpoints
for k, drive := range drivesInfo {
if drive.UUID == "" {
continue
}
if formats[k] == nil {
continue
}
for i := range formats[k].XL.Sets {
for j, driveUUID := range formats[k].XL.Sets[i] {
if driveUUID == drive.UUID {
storageInfo.Backend.Sets[i][j] = drive
for i := 0; i < s.setCount; i++ {
for j := 0; j < s.drivesPerSet; j++ {
if storageDisks[i][j] == nil {
storageInfo.Backend.Sets[i][j] = madmin.DriveInfo{
State: madmin.DriveStateOffline,
Endpoint: s.endpointStrings[i*s.drivesPerSet+j],
}
continue
}
}
}
// fill all the offline, missing endpoints as well.
for _, drive := range drivesInfo {
if drive.UUID == "" {
for i := range storageInfo.Backend.Sets {
for j := range storageInfo.Backend.Sets[i] {
if storageInfo.Backend.Sets[i][j].Endpoint == drive.Endpoint {
continue
diskID, err := storageDisks[i][j].GetDiskID()
if err != nil {
if err == errUnformattedDisk {
storageInfo.Backend.Sets[i][j] = madmin.DriveInfo{
State: madmin.DriveStateUnformatted,
Endpoint: storageDisks[i][j].String(),
UUID: "",
}
if storageInfo.Backend.Sets[i][j].Endpoint == "" {
storageInfo.Backend.Sets[i][j] = drive
break
} else {
storageInfo.Backend.Sets[i][j] = madmin.DriveInfo{
State: madmin.DriveStateCorrupt,
Endpoint: storageDisks[i][j].String(),
UUID: "",
}
}
continue
}
storageInfo.Backend.Sets[i][j] = madmin.DriveInfo{
State: madmin.DriveStateOk,
Endpoint: storageDisks[i][j].String(),
UUID: diskID,
}
}
}
@ -1347,7 +1338,7 @@ func (s *xlSets) ReloadFormat(ctx context.Context, dryRun bool) (err error) {
}
}(storageDisks)
formats, sErrs := loadFormatXLAll(storageDisks)
formats, sErrs := loadFormatXLAll(storageDisks, false)
if err = checkFormatXLValues(formats, s.drivesPerSet); err != nil {
return err
}
@ -1372,14 +1363,33 @@ func (s *xlSets) ReloadFormat(ctx context.Context, dryRun bool) (err error) {
// with new format.
s.disksConnectDoneCh <- struct{}{}
// Replace the new format.
// Replace with new reference format.
s.format = refFormat
// Close all existing disks and reconnect all the disks.
s.xlDisksMu.Lock()
s.xlDisks.Close()
for _, disk := range storageDisks {
if disk == nil {
continue
}
diskID, err := disk.GetDiskID()
if err != nil {
continue
}
m, n, err := findDiskIndexByDiskID(refFormat, diskID)
if err != nil {
continue
}
if s.xlDisks[m][n] != nil {
s.xlDisks[m][n].Close()
}
s.xlDisks[m][n] = disk
}
s.xlDisksMu.Unlock()
s.connectDisks()
// Restart monitoring loop to monitor reformatted disks again.
go s.monitorAndConnectEndpoints(GlobalContext, defaultMonitorConnectEndpointInterval)
@ -1460,7 +1470,7 @@ func (s *xlSets) HealFormat(ctx context.Context, dryRun bool) (res madmin.HealRe
markRootDisksAsDown(storageDisks)
formats, sErrs := loadFormatXLAll(storageDisks)
formats, sErrs := loadFormatXLAll(storageDisks, true)
if err = checkFormatXLValues(formats, s.drivesPerSet); err != nil {
return madmin.HealResultItem{}, err
}
@ -1566,9 +1576,28 @@ func (s *xlSets) HealFormat(ctx context.Context, dryRun bool) (res madmin.HealRe
// Disconnect/relinquish all existing disks, lockers and reconnect the disks, lockers.
s.xlDisksMu.Lock()
s.xlDisks.Close()
for _, disk := range storageDisks {
if disk == nil {
continue
}
diskID, err := disk.GetDiskID()
if err != nil {
continue
}
m, n, err := findDiskIndexByDiskID(refFormat, diskID)
if err != nil {
continue
}
if s.xlDisks[m][n] != nil {
s.xlDisks[m][n].Close()
}
s.xlDisks[m][n] = disk
}
s.xlDisksMu.Unlock()
s.connectDisks()
// Restart our monitoring loop to start monitoring newly formatted disks.
go s.monitorAndConnectEndpoints(GlobalContext, defaultMonitorConnectEndpointInterval)

@ -76,23 +76,23 @@ func TestNewXLSets(t *testing.T) {
}
endpoints := mustGetNewEndpoints(erasureDisks...)
_, err := waitForFormatXL(true, endpoints, 1, 0, 16, "")
_, _, err := waitForFormatXL(true, endpoints, 1, 0, 16, "")
if err != errInvalidArgument {
t.Fatalf("Expecting error, got %s", err)
}
_, err = waitForFormatXL(true, nil, 1, 1, 16, "")
_, _, err = waitForFormatXL(true, nil, 1, 1, 16, "")
if err != errInvalidArgument {
t.Fatalf("Expecting error, got %s", err)
}
// Initializes all erasure disks
format, err := waitForFormatXL(true, endpoints, 1, 1, 16, "")
storageDisks, format, err := waitForFormatXL(true, endpoints, 1, 1, 16, "")
if err != nil {
t.Fatalf("Unable to format disks for erasure, %s", err)
}
if _, err := newXLSets(endpoints, format, 1, 16); err != nil {
if _, err := newXLSets(endpoints, storageDisks, format, 1, 16); err != nil {
t.Fatalf("Unable to initialize erasure")
}
}

@ -182,7 +182,7 @@ func (xl xlObjects) StorageInfo(ctx context.Context, local bool) StorageInfo {
disks = xl.getDisks()
} else {
for i, d := range xl.getDisks() {
if endpoints[i].IsLocal {
if endpoints[i].IsLocal && d.Hostname() == "" {
// Append this local disk since local flag is true
disks = append(disks, d)
}

@ -60,12 +60,13 @@ func newXLZones(endpointZones EndpointZones) (ObjectLayer, error) {
deploymentID string
err error
formats = make([]*formatXLV3, len(endpointZones))
z = &xlZones{zones: make([]*xlSets, len(endpointZones))}
formats = make([]*formatXLV3, len(endpointZones))
storageDisks = make([][]StorageAPI, len(endpointZones))
z = &xlZones{zones: make([]*xlSets, len(endpointZones))}
)
local := endpointZones.FirstLocal()
for i, ep := range endpointZones {
formats[i], err = waitForFormatXL(local, ep.Endpoints, i+1,
storageDisks[i], formats[i], err = waitForFormatXL(local, ep.Endpoints, i+1,
ep.SetCount, ep.DrivesPerSet, deploymentID)
if err != nil {
return nil, err
@ -73,7 +74,7 @@ func newXLZones(endpointZones EndpointZones) (ObjectLayer, error) {
if deploymentID == "" {
deploymentID = formats[i].ID
}
z.zones[i], err = newXLSets(ep.Endpoints, formats[i], ep.SetCount, ep.DrivesPerSet)
z.zones[i], err = newXLSets(ep.Endpoints, storageDisks[i], formats[i], ep.SetCount, ep.DrivesPerSet)
if err != nil {
return nil, err
}

@ -82,10 +82,11 @@ const (
// Drive state constants
const (
DriveStateOk string = "ok"
DriveStateOffline = "offline"
DriveStateCorrupt = "corrupt"
DriveStateMissing = "missing"
DriveStateOk string = "ok"
DriveStateOffline = "offline"
DriveStateCorrupt = "corrupt"
DriveStateMissing = "missing"
DriveStateUnformatted = "unformatted" // only returned by disk
)
// HealDriveInfo - struct for an individual drive info item.

Loading…
Cancel
Save