From 74116204ced74b2516a3f681896b5a28f1dec6b2 Mon Sep 17 00:00:00 2001 From: Harshavardhana Date: Tue, 18 Aug 2020 14:37:26 -0700 Subject: [PATCH] handle fresh setup with mixed drives (#10273) fresh drive setups when one of the drive is a root drive, we should ignore such a root drive and not proceed to format. This PR handles this properly by marking the disks which are root disk and they are taken offline. --- cmd/background-newdisks-heal-ops.go | 3 +- cmd/erasure-sets.go | 55 +++++++++++++++++------------ cmd/erasure.go | 23 +++++++++++- cmd/format-erasure.go | 27 +++++++------- cmd/prepare-storage.go | 5 ++- cmd/storage-interface.go | 1 + cmd/xl-storage.go | 17 +++++---- pkg/disk/root_disk_unix.go | 20 +++++++++-- pkg/madmin/info-commands.go | 1 + 9 files changed, 101 insertions(+), 51 deletions(-) diff --git a/cmd/background-newdisks-heal-ops.go b/cmd/background-newdisks-heal-ops.go index 7c1275065..e9b341725 100644 --- a/cmd/background-newdisks-heal-ops.go +++ b/cmd/background-newdisks-heal-ops.go @@ -18,6 +18,7 @@ package cmd import ( "context" + "errors" "fmt" "time" @@ -86,7 +87,7 @@ func getLocalDisksToHeal(objAPI ObjectLayer) []Endpoints { // Try to connect to the current endpoint // and reformat if the current disk is not formatted _, _, err := connectEndpoint(endpoint) - if err == errUnformattedDisk { + if errors.Is(err, errUnformattedDisk) { localDisksToHeal = append(localDisksToHeal, endpoint) } } diff --git a/cmd/erasure-sets.go b/cmd/erasure-sets.go index ca92b47e0..091c316b0 100644 --- a/cmd/erasure-sets.go +++ b/cmd/erasure-sets.go @@ -18,6 +18,7 @@ package cmd import ( "context" + "errors" "fmt" "hash/crc32" "io" @@ -137,7 +138,14 @@ func connectEndpoint(endpoint Endpoint) (StorageAPI, *formatErasureV3, error) { if err != nil { // Close the internal connection to avoid connection leaks. disk.Close() - return nil, nil, err + if errors.Is(err, errUnformattedDisk) { + info, derr := disk.DiskInfo() + if derr != nil && info.RootDisk { + return nil, nil, fmt.Errorf("Disk: %s returned %w but its a root disk refusing to use it", + disk, derr) // make sure to '%w' to wrap the error + } + } + return nil, nil, fmt.Errorf("Disk: %s returned %w", disk, err) // make sure to '%w' to wrap the error } return disk, format, nil @@ -1274,19 +1282,20 @@ func isTestSetup(infos []DiskInfo, errs []error) bool { return rootDiskCount == len(infos) } -func getHealDiskInfos(storageDisks []StorageAPI) ([]DiskInfo, []error) { +func getHealDiskInfos(storageDisks []StorageAPI, errs []error) ([]DiskInfo, []error) { infos := make([]DiskInfo, len(storageDisks)) g := errgroup.WithNErrs(len(storageDisks)) for index := range storageDisks { index := index g.Go(func() error { - var err error - if storageDisks[index] != nil { - infos[index], err = storageDisks[index].DiskInfo() - } else { - // Disk not found. - err = errDiskNotFound + if errs[index] != nil && errs[index] != errUnformattedDisk { + return errs[index] + } + if storageDisks[index] == nil { + return errDiskNotFound } + var err error + infos[index], err = storageDisks[index].DiskInfo() return err }, index) } @@ -1294,19 +1303,18 @@ func getHealDiskInfos(storageDisks []StorageAPI) ([]DiskInfo, []error) { } // Mark root disks as down so as not to heal them. -func markRootDisksAsDown(storageDisks []StorageAPI) { - infos, errs := getHealDiskInfos(storageDisks) - if isTestSetup(infos, errs) { - // Allow healing of disks for test setups to help with testing. - return - } - for i := range storageDisks { - if infos[i].RootDisk { - // We should not heal on root disk. i.e in a situation where the minio-administrator has unmounted a - // defective drive we should not heal a path on the root disk. - logger.Info("Disk `%s` is a root disk. Please ensure the disk is mounted properly, refusing to use root disk.", - storageDisks[i].String()) - storageDisks[i] = nil +func markRootDisksAsDown(storageDisks []StorageAPI, errs []error) { + var infos []DiskInfo + infos, errs = getHealDiskInfos(storageDisks, errs) + if !isTestSetup(infos, errs) { + for i := range storageDisks { + if storageDisks[i] != nil && infos[i].RootDisk { + // We should not heal on root disk. i.e in a situation where the minio-administrator has unmounted a + // defective drive we should not heal a path on the root disk. + logger.Info("Disk `%s` is a root disk. Please ensure the disk is mounted properly, refusing to use root disk.", + storageDisks[i].String()) + storageDisks[i] = nil + } } } } @@ -1326,13 +1334,14 @@ func (s *erasureSets) HealFormat(ctx context.Context, dryRun bool) (res madmin.H } }(storageDisks) - markRootDisksAsDown(storageDisks) - formats, sErrs := loadFormatErasureAll(storageDisks, true) if err = checkFormatErasureValues(formats, s.drivesPerSet); err != nil { return madmin.HealResultItem{}, err } + // Mark all root disks down + markRootDisksAsDown(storageDisks, sErrs) + // Prepare heal-result res = madmin.HealResultItem{ Type: madmin.HealItemMetadata, diff --git a/cmd/erasure.go b/cmd/erasure.go index 26cee44a5..50a6254a2 100644 --- a/cmd/erasure.go +++ b/cmd/erasure.go @@ -154,6 +154,7 @@ func getDisksInfo(disks []StorageAPI, endpoints []string) (disksInfo []madmin.Di UsedSpace: info.Used, AvailableSpace: info.Free, UUID: info.ID, + RootDisk: info.RootDisk, State: diskErrToDriveState(err), } if info.Total > 0 { @@ -175,7 +176,27 @@ func getDisksInfo(disks []StorageAPI, endpoints []string) (disksInfo []madmin.Di onlineDisks[ep]++ } - // Success. + rootDiskCount := 0 + for _, di := range disksInfo { + if di.RootDisk { + rootDiskCount++ + } + } + + if len(disksInfo) == rootDiskCount { + // Success. + return disksInfo, errs, onlineDisks, offlineDisks + } + + // Root disk should be considered offline + for i := range disksInfo { + ep := disksInfo[i].Endpoint + if disksInfo[i].RootDisk { + offlineDisks[ep]++ + onlineDisks[ep]-- + } + } + return disksInfo, errs, onlineDisks, offlineDisks } diff --git a/cmd/format-erasure.go b/cmd/format-erasure.go index dd75c0f06..8b57aacd7 100644 --- a/cmd/format-erasure.go +++ b/cmd/format-erasure.go @@ -335,11 +335,13 @@ func loadFormatErasureAll(storageDisks []StorageAPI, heal bool) ([]*formatErasur return formats, g.Wait() } -func saveFormatErasure(disk StorageAPI, format interface{}, diskID string) error { - if format == nil || disk == nil { +func saveFormatErasure(disk StorageAPI, format *formatErasureV3) error { + if disk == nil || format == nil { return errDiskNotFound } + diskID := format.Erasure.This + if err := makeFormatErasureMetaVolumes(disk); err != nil { return err } @@ -549,7 +551,7 @@ func formatErasureFixLocalDeploymentID(endpoints Endpoints, storageDisks []Stora return nil } format.ID = refFormat.ID - if err := saveFormatErasure(storageDisks[index], format, format.Erasure.This); err != nil { + if err := saveFormatErasure(storageDisks[index], format); err != nil { logger.LogIf(GlobalContext, err) return fmt.Errorf("Unable to save format.json, %w", err) } @@ -695,7 +697,7 @@ func saveFormatErasureAll(ctx context.Context, storageDisks []StorageAPI, format if formats[index] == nil { return errDiskNotFound } - return saveFormatErasure(storageDisks[index], formats[index], formats[index].Erasure.This) + return saveFormatErasure(storageDisks[index], formats[index]) }, index) } @@ -722,13 +724,9 @@ func initStorageDisksWithErrors(endpoints Endpoints) ([]StorageAPI, []error) { g := errgroup.WithNErrs(len(endpoints)) for index := range endpoints { index := index - g.Go(func() error { - storageDisk, err := newStorageAPI(endpoints[index]) - if err != nil { - return err - } - storageDisks[index] = storageDisk - return nil + g.Go(func() (err error) { + storageDisks[index], err = newStorageAPI(endpoints[index]) + return err }, index) } return storageDisks, g.Wait() @@ -773,7 +771,7 @@ func fixFormatErasureV3(storageDisks []StorageAPI, endpoints Endpoints, formats } if formats[i].Erasure.This == "" { formats[i].Erasure.This = formats[i].Erasure.Sets[0][i] - if err := saveFormatErasure(storageDisks[i], formats[i], formats[i].Erasure.This); err != nil { + if err := saveFormatErasure(storageDisks[i], formats[i]); err != nil { return err } } @@ -790,7 +788,7 @@ func fixFormatErasureV3(storageDisks []StorageAPI, endpoints Endpoints, formats } // initFormatErasure - save Erasure format configuration on all disks. -func initFormatErasure(ctx context.Context, storageDisks []StorageAPI, setCount, drivesPerSet int, deploymentID string) (*formatErasureV3, error) { +func initFormatErasure(ctx context.Context, storageDisks []StorageAPI, setCount, drivesPerSet int, deploymentID string, sErrs []error) (*formatErasureV3, error) { format := newFormatErasureV3(setCount, drivesPerSet) formats := make([]*formatErasureV3, len(storageDisks)) wantAtMost := ecDrivesNoConfig(drivesPerSet) @@ -831,6 +829,9 @@ func initFormatErasure(ctx context.Context, storageDisks []StorageAPI, setCount, } } + // Mark all root disks down + markRootDisksAsDown(storageDisks, sErrs) + // Save formats `format.json` across all disks. if err := saveFormatErasureAll(ctx, storageDisks, formats); err != nil { return nil, err diff --git a/cmd/prepare-storage.go b/cmd/prepare-storage.go index 13a56d911..80e6c5e0d 100644 --- a/cmd/prepare-storage.go +++ b/cmd/prepare-storage.go @@ -278,7 +278,7 @@ func connectLoadInitFormats(retryCount int, firstDisk bool, endpoints Endpoints, humanize.Ordinal(zoneCount), setCount, drivesPerSet) // Initialize erasure code format on disks - format, err = initFormatErasure(GlobalContext, storageDisks, setCount, drivesPerSet, deploymentID) + format, err = initFormatErasure(GlobalContext, storageDisks, setCount, drivesPerSet, deploymentID, sErrs) if err != nil { return nil, nil, err } @@ -301,6 +301,9 @@ func connectLoadInitFormats(retryCount int, firstDisk bool, endpoints Endpoints, return nil, nil, errFirstDiskWait } + // Mark all root disks down + markRootDisksAsDown(storageDisks, sErrs) + // Following function is added to fix a regressions which was introduced // in release RELEASE.2018-03-16T22-52-12Z after migrating v1 to v2 to v3. // This migration failed to capture '.This' field properly which indicates diff --git a/cmd/storage-interface.go b/cmd/storage-interface.go index 104256cd2..1d0f3e4ef 100644 --- a/cmd/storage-interface.go +++ b/cmd/storage-interface.go @@ -29,6 +29,7 @@ type StorageAPI interface { // Storage operations. IsOnline() bool // Returns true if disk is online. IsLocal() bool + Hostname() string // Returns host name if remote host. Close() error GetDiskID() (string, error) diff --git a/cmd/xl-storage.go b/cmd/xl-storage.go index 5b02c4b2a..69349578b 100644 --- a/cmd/xl-storage.go +++ b/cmd/xl-storage.go @@ -45,7 +45,6 @@ import ( "github.com/minio/minio/pkg/disk" "github.com/minio/minio/pkg/env" xioutil "github.com/minio/minio/pkg/ioutil" - "github.com/minio/minio/pkg/mountinfo" ) const ( @@ -97,7 +96,7 @@ type xlStorage struct { globalSync bool - diskMount bool // indicates if the path is an actual mount. + rootDisk bool diskID string @@ -240,6 +239,11 @@ func newXLStorage(path string, hostname string) (*xlStorage, error) { return nil, err } + rootDisk, err := disk.IsRootDisk(path) + if err != nil { + return nil, err + } + p := &xlStorage{ diskPath: path, hostname: hostname, @@ -250,13 +254,13 @@ func newXLStorage(path string, hostname string) (*xlStorage, error) { }, }, globalSync: env.Get(config.EnvFSOSync, config.EnableOff) == config.EnableOn, - diskMount: mountinfo.IsLikelyMountPoint(path), // Allow disk usage crawler to run with up to 2 concurrent // I/O ops, if and when activeIOCount reaches this // value disk usage routine suspends the crawler // and waits until activeIOCount reaches below this threshold. maxActiveIOCount: 3, ctx: GlobalContext, + rootDisk: rootDisk, } // Success. @@ -412,16 +416,11 @@ func (s *xlStorage) DiskInfo() (info DiskInfo, err error) { return info, err } - rootDisk, err := disk.IsRootDisk(s.diskPath) - if err != nil { - return info, err - } - info = DiskInfo{ Total: di.Total, Free: di.Free, Used: di.Total - di.Free, - RootDisk: rootDisk, + RootDisk: s.rootDisk, MountPath: s.diskPath, } diff --git a/pkg/disk/root_disk_unix.go b/pkg/disk/root_disk_unix.go index 83d68660e..8af06804d 100644 --- a/pkg/disk/root_disk_unix.go +++ b/pkg/disk/root_disk_unix.go @@ -31,18 +31,32 @@ func IsRootDisk(diskPath string) (bool, error) { if err != nil { return false, err } - rootInfo, err := os.Stat("/etc/hosts") + rootHostsInfo, err := os.Stat("/etc/hosts") + if err != nil { + return false, err + } + rootInfo, err := os.Stat("/") if err != nil { return false, err } diskStat, diskStatOK := diskInfo.Sys().(*syscall.Stat_t) + rootHostsStat, rootHostsStatOK := rootHostsInfo.Sys().(*syscall.Stat_t) rootStat, rootStatOK := rootInfo.Sys().(*syscall.Stat_t) - if diskStatOK && rootStatOK { - if diskStat.Dev == rootStat.Dev { + if diskStatOK && rootHostsStatOK { + if diskStat.Dev == rootHostsStat.Dev { // Indicate if the disk path is on root disk. This is used to indicate the healing // process not to format the drive and end up healing it. rootDisk = true } } + if !rootDisk { + if diskStatOK && rootStatOK { + if diskStat.Dev == rootStat.Dev { + // Indicate if the disk path is on root disk. This is used to indicate the healing + // process not to format the drive and end up healing it. + rootDisk = true + } + } + } return rootDisk, nil } diff --git a/pkg/madmin/info-commands.go b/pkg/madmin/info-commands.go index 2177d1be4..c481dfb8c 100644 --- a/pkg/madmin/info-commands.go +++ b/pkg/madmin/info-commands.go @@ -270,6 +270,7 @@ type ServerProperties struct { // Disk holds Disk information type Disk struct { Endpoint string `json:"endpoint,omitempty"` + RootDisk bool `json:"rootDisk,omitempty"` DrivePath string `json:"path,omitempty"` State string `json:"state,omitempty"` UUID string `json:"uuid,omitempty"`