From b686bb9c836423331da6caa1ef281efa9a4f3ca5 Mon Sep 17 00:00:00 2001 From: Harshavardhana Date: Sat, 31 Oct 2020 01:34:48 -0700 Subject: [PATCH] fix: replaced drive properly by healing the entire drive (#10799) Bonus fixes, we do not need reload format anymore as the replaced drive is healed locally we only need to ensure that drive heal reloads the drive properly. We preserve the UUID of the original order, this means that the replacement in `format.json` doesn't mean that the drive needs to be reloaded into memory anymore. fixes #10791 --- cmd/erasure-server-sets.go | 20 ------- cmd/erasure-sets.go | 115 ++++-------------------------------- cmd/format-erasure.go | 30 ++++------ cmd/fs-v1.go | 6 -- cmd/gateway-unsupported.go | 5 -- cmd/notification.go | 15 ----- cmd/object-api-interface.go | 1 - cmd/peer-rest-client.go | 13 ---- cmd/peer-rest-common.go | 2 - cmd/peer-rest-server.go | 40 ------------- 10 files changed, 20 insertions(+), 227 deletions(-) diff --git a/cmd/erasure-server-sets.go b/cmd/erasure-server-sets.go index 64be83470..4e76a5829 100644 --- a/cmd/erasure-server-sets.go +++ b/cmd/erasure-server-sets.go @@ -1264,17 +1264,6 @@ func (z *erasureServerSets) ListBuckets(ctx context.Context) (buckets []BucketIn return buckets, nil } -func (z *erasureServerSets) ReloadFormat(ctx context.Context, dryRun bool) error { - // No locks needed since reload happens in HealFormat under - // write lock across all nodes. - for _, zone := range z.serverSets { - if err := zone.ReloadFormat(ctx, dryRun); err != nil { - return err - } - } - return nil -} - func (z *erasureServerSets) HealFormat(ctx context.Context, dryRun bool) (madmin.HealResultItem, error) { // Acquire lock on format.json formatLock := z.NewNSLock(ctx, minioMetaBucket, formatConfigFile) @@ -1306,15 +1295,6 @@ func (z *erasureServerSets) HealFormat(ctx context.Context, dryRun bool) (madmin r.After.Drives = append(r.After.Drives, result.After.Drives...) } - // Healing succeeded notify the peers to reload format and re-initialize disks. - // We will not notify peers if healing is not required. - for _, nerr := range globalNotificationSys.ReloadFormat(dryRun) { - if nerr.Err != nil { - logger.GetReqInfo(ctx).SetTags("peerAddress", nerr.Host.String()) - logger.LogIf(ctx, nerr.Err) - } - } - // No heal returned by all serverSets, return errNoHealRequired if countNoHeal == len(z.serverSets) { return r, errNoHealRequired diff --git a/cmd/erasure-sets.go b/cmd/erasure-sets.go index 9640aca3a..8685d5375 100644 --- a/cmd/erasure-sets.go +++ b/cmd/erasure-sets.go @@ -1141,81 +1141,6 @@ func formatsToDrivesInfo(endpoints Endpoints, formats []*formatErasureV3, sErrs return beforeDrives } -// Reloads the format from the disk, usually called by a remote peer notifier while -// healing in a distributed setup. -func (s *erasureSets) ReloadFormat(ctx context.Context, dryRun bool) (err error) { - storageDisks, errs := initStorageDisksWithErrorsWithoutHealthCheck(s.endpoints) - for i, err := range errs { - if err != nil && err != errDiskNotFound { - return fmt.Errorf("Disk %s: %w", s.endpoints[i], err) - } - } - defer func(storageDisks []StorageAPI) { - if err != nil { - closeStorageDisks(storageDisks) - } - }(storageDisks) - - formats, _ := loadFormatErasureAll(storageDisks, false) - if err = checkFormatErasureValues(formats, s.setDriveCount); err != nil { - return err - } - - refFormat, err := getFormatErasureInQuorum(formats) - if err != nil { - return err - } - - s.monitorContextCancel() // turn-off disk monitoring and replace format. - - s.erasureDisksMu.Lock() - - // Replace with new reference format. - s.format = refFormat - - // Close all existing disks and reconnect all the disks. - for _, disk := range storageDisks { - if disk == nil { - continue - } - - diskID, err := disk.GetDiskID() - if err != nil { - continue - } - - m, n, err := findDiskIndexByDiskID(refFormat, diskID) - if err != nil { - continue - } - - if s.erasureDisks[m][n] != nil { - s.erasureDisks[m][n].Close() - } - - s.endpointStrings[m*s.setDriveCount+n] = disk.String() - if !disk.IsLocal() { - // Enable healthcheck disk for remote endpoint. - disk, err = newStorageAPI(disk.Endpoint()) - if err != nil { - continue - } - disk.SetDiskID(diskID) - } - - s.erasureDisks[m][n] = disk - } - - s.erasureDisksMu.Unlock() - - mctx, mctxCancel := context.WithCancel(GlobalContext) - s.monitorContextCancel = mctxCancel - - go s.monitorAndConnectEndpoints(mctx, defaultMonitorConnectEndpointInterval) - - return nil -} - // If it is a single node Erasure and all disks are root disks, it is most likely a test setup, else it is a production setup. // On a test setup we allow creation of format.json on root disks to help with dev/testing. func isTestSetup(infos []DiskInfo, errs []error) bool { @@ -1335,13 +1260,8 @@ func (s *erasureSets) HealFormat(ctx context.Context, dryRun bool) (res madmin.H } } - // Save formats `format.json` across all disks. - if err = saveFormatErasureAllWithErrs(ctx, storageDisks, sErrs, tmpNewFormats); err != nil { - return madmin.HealResultItem{}, err - } - - refFormat, err = getFormatErasureInQuorum(tmpNewFormats) - if err != nil { + // Save new formats `format.json` on unformatted disks. + if err = saveUnformattedFormat(ctx, storageDisks, tmpNewFormats); err != nil { return madmin.HealResultItem{}, err } @@ -1349,21 +1269,12 @@ func (s *erasureSets) HealFormat(ctx context.Context, dryRun bool) (res madmin.H s.erasureDisksMu.Lock() - // Replace with new reference format. - s.format = refFormat - - // Disconnect/relinquish all existing disks, lockers and reconnect the disks, lockers. - for _, disk := range storageDisks { - if disk == nil { + for index, format := range tmpNewFormats { + if format == nil { continue } - diskID, err := disk.GetDiskID() - if err != nil { - continue - } - - m, n, err := findDiskIndexByDiskID(refFormat, diskID) + m, n, err := findDiskIndexByDiskID(refFormat, format.Erasure.This) if err != nil { continue } @@ -1372,19 +1283,13 @@ func (s *erasureSets) HealFormat(ctx context.Context, dryRun bool) (res madmin.H s.erasureDisks[m][n].Close() } - s.endpointStrings[m*s.setDriveCount+n] = disk.String() - if !disk.IsLocal() { - // Enable healthcheck disk for remote endpoint. - disk, err = newStorageAPI(disk.Endpoint()) - if err != nil { - continue - } - disk.SetDiskID(diskID) - } - s.erasureDisks[m][n] = disk - + s.erasureDisks[m][n] = storageDisks[index] + s.endpointStrings[m*s.setDriveCount+n] = storageDisks[index].String() } + // Replace with new reference format. + s.format = refFormat + s.erasureDisksMu.Unlock() mctx, mctxCancel := context.WithCancel(GlobalContext) diff --git a/cmd/format-erasure.go b/cmd/format-erasure.go index 24bda8d03..08d259425 100644 --- a/cmd/format-erasure.go +++ b/cmd/format-erasure.go @@ -704,28 +704,18 @@ func initErasureMetaVolumesInLocalDisks(storageDisks []StorageAPI, formats []*fo return nil } -// saveFormatErasureAllWithErrs - populates `format.json` on disks in its order. +// saveUnformattedFormat - populates `format.json` on unformatted disks. // also adds `.healing.bin` on the disks which are being actively healed. -func saveFormatErasureAllWithErrs(ctx context.Context, storageDisks []StorageAPI, fErrs []error, formats []*formatErasureV3) error { - g := errgroup.WithNErrs(len(storageDisks)) - - // Write `format.json` to all disks. - for index := range storageDisks { - index := index - g.Go(func() error { - if formats[index] == nil { - return errDiskNotFound - } - if errors.Is(fErrs[index], errUnformattedDisk) { - return saveFormatErasure(storageDisks[index], formats[index], true) - } - return nil - }, index) +func saveUnformattedFormat(ctx context.Context, storageDisks []StorageAPI, formats []*formatErasureV3) error { + for index, format := range formats { + if format == nil { + continue + } + if err := saveFormatErasure(storageDisks[index], format, true); err != nil { + return err + } } - - writeQuorum := getWriteQuorum(len(storageDisks)) - // Wait for the routines to finish. - return reduceWriteQuorumErrs(ctx, g.Wait(), nil, writeQuorum) + return nil } // saveFormatErasureAll - populates `format.json` on disks in its order. diff --git a/cmd/fs-v1.go b/cmd/fs-v1.go index 963fd1f02..f5fb95848 100644 --- a/cmd/fs-v1.go +++ b/cmd/fs-v1.go @@ -1536,12 +1536,6 @@ func (fs *FSObjects) DeleteObjectTags(ctx context.Context, bucket, object string return fs.PutObjectTags(ctx, bucket, object, "", opts) } -// ReloadFormat - no-op for fs, Valid only for Erasure. -func (fs *FSObjects) ReloadFormat(ctx context.Context, dryRun bool) error { - logger.LogIf(ctx, NotImplemented{}) - return NotImplemented{} -} - // HealFormat - no-op for fs, Valid only for Erasure. func (fs *FSObjects) HealFormat(ctx context.Context, dryRun bool) (madmin.HealResultItem, error) { logger.LogIf(ctx, NotImplemented{}) diff --git a/cmd/gateway-unsupported.go b/cmd/gateway-unsupported.go index 6fc3c0973..61fc213d3 100644 --- a/cmd/gateway-unsupported.go +++ b/cmd/gateway-unsupported.go @@ -160,11 +160,6 @@ func (a GatewayUnsupported) DeleteBucketSSEConfig(ctx context.Context, bucket st return NotImplemented{} } -// ReloadFormat - Not implemented stub. -func (a GatewayUnsupported) ReloadFormat(ctx context.Context, dryRun bool) error { - return NotImplemented{} -} - // HealFormat - Not implemented stub func (a GatewayUnsupported) HealFormat(ctx context.Context, dryRun bool) (madmin.HealResultItem, error) { return madmin.HealResultItem{}, NotImplemented{} diff --git a/cmd/notification.go b/cmd/notification.go index 01ec27896..e3777213e 100644 --- a/cmd/notification.go +++ b/cmd/notification.go @@ -140,21 +140,6 @@ func (g *NotificationGroup) Go(ctx context.Context, f func() error, index int, a }() } -// ReloadFormat - calls ReloadFormat REST call on all peers. -func (sys *NotificationSys) ReloadFormat(dryRun bool) []NotificationPeerErr { - ng := WithNPeers(len(sys.peerClients)) - for idx, client := range sys.peerClients { - if client == nil { - continue - } - client := client - ng.Go(GlobalContext, func() error { - return client.ReloadFormat(dryRun) - }, idx, *client.host) - } - return ng.Wait() -} - // DeletePolicy - deletes policy across all peers. func (sys *NotificationSys) DeletePolicy(policyName string) []NotificationPeerErr { ng := WithNPeers(len(sys.peerClients)) diff --git a/cmd/object-api-interface.go b/cmd/object-api-interface.go index 620e1c495..7a727d352 100644 --- a/cmd/object-api-interface.go +++ b/cmd/object-api-interface.go @@ -114,7 +114,6 @@ type ObjectLayer interface { CompleteMultipartUpload(ctx context.Context, bucket, object, uploadID string, uploadedParts []CompletePart, opts ObjectOptions) (objInfo ObjectInfo, err error) // Healing operations. - ReloadFormat(ctx context.Context, dryRun bool) error HealFormat(ctx context.Context, dryRun bool) (madmin.HealResultItem, error) HealBucket(ctx context.Context, bucket string, dryRun, remove bool) (madmin.HealResultItem, error) HealObject(ctx context.Context, bucket, object, versionID string, opts madmin.HealOpts) (madmin.HealResultItem, error) diff --git a/cmd/peer-rest-client.go b/cmd/peer-rest-client.go index e90e0a022..6bae9417d 100644 --- a/cmd/peer-rest-client.go +++ b/cmd/peer-rest-client.go @@ -456,19 +456,6 @@ func (client *peerRESTClient) DeleteBucketMetadata(bucket string) error { return nil } -// ReloadFormat - reload format on the peer node. -func (client *peerRESTClient) ReloadFormat(dryRun bool) error { - values := make(url.Values) - values.Set(peerRESTDryRun, strconv.FormatBool(dryRun)) - - respBody, err := client.call(peerRESTMethodReloadFormat, values, nil, -1) - if err != nil { - return err - } - defer http.DrainBody(respBody) - return nil -} - // cycleServerBloomFilter will cycle the bloom filter to start recording to index y if not already. // The response will contain a bloom filter starting at index x up to, but not including index y. // If y is 0, the response will not update y, but return the currently recorded information diff --git a/cmd/peer-rest-common.go b/cmd/peer-rest-common.go index d62393889..44a8f53e4 100644 --- a/cmd/peer-rest-common.go +++ b/cmd/peer-rest-common.go @@ -50,7 +50,6 @@ const ( peerRESTMethodLoadGroup = "/loadgroup" peerRESTMethodStartProfiling = "/startprofiling" peerRESTMethodDownloadProfilingData = "/downloadprofilingdata" - peerRESTMethodReloadFormat = "/reloadformat" peerRESTMethodCycleBloom = "/cyclebloom" peerRESTMethodTrace = "/trace" peerRESTMethodListen = "/listen" @@ -72,7 +71,6 @@ const ( peerRESTIsGroup = "is-group" peerRESTSignal = "signal" peerRESTProfiler = "profiler" - peerRESTDryRun = "dry-run" peerRESTTraceAll = "all" peerRESTTraceErr = "err" diff --git a/cmd/peer-rest-server.go b/cmd/peer-rest-server.go index 16f4fd154..7d0a0c3da 100644 --- a/cmd/peer-rest-server.go +++ b/cmd/peer-rest-server.go @@ -578,45 +578,6 @@ func (s *peerRESTServer) LoadBucketMetadataHandler(w http.ResponseWriter, r *htt } } -// ReloadFormatHandler - Reload Format. -func (s *peerRESTServer) ReloadFormatHandler(w http.ResponseWriter, r *http.Request) { - if !s.IsValid(w, r) { - s.writeErrorResponse(w, errors.New("Invalid request")) - return - } - - vars := mux.Vars(r) - dryRunString := vars[peerRESTDryRun] - if dryRunString == "" { - s.writeErrorResponse(w, errors.New("dry-run parameter is missing")) - return - } - - var dryRun bool - switch strings.ToLower(dryRunString) { - case "true": - dryRun = true - case "false": - dryRun = false - default: - s.writeErrorResponse(w, errInvalidArgument) - return - } - - objAPI := newObjectLayerFn() - if objAPI == nil { - s.writeErrorResponse(w, errServerNotInitialized) - return - } - - err := objAPI.ReloadFormat(GlobalContext, dryRun) - if err != nil { - s.writeErrorResponse(w, err) - return - } - w.(http.Flusher).Flush() -} - // CycleServerBloomFilterHandler cycles bloom filter on server. func (s *peerRESTServer) CycleServerBloomFilterHandler(w http.ResponseWriter, r *http.Request) { if !s.IsValid(w, r) { @@ -1093,7 +1054,6 @@ func registerPeerRESTHandlers(router *mux.Router) { subrouter.Methods(http.MethodPost).Path(peerRESTVersionPrefix + peerRESTMethodStartProfiling).HandlerFunc(httpTraceAll(server.StartProfilingHandler)).Queries(restQueries(peerRESTProfiler)...) subrouter.Methods(http.MethodPost).Path(peerRESTVersionPrefix + peerRESTMethodDownloadProfilingData).HandlerFunc(httpTraceHdrs(server.DownloadProfilingDataHandler)) - subrouter.Methods(http.MethodPost).Path(peerRESTVersionPrefix + peerRESTMethodReloadFormat).HandlerFunc(httpTraceHdrs(server.ReloadFormatHandler)).Queries(restQueries(peerRESTDryRun)...) subrouter.Methods(http.MethodPost).Path(peerRESTVersionPrefix + peerRESTMethodTrace).HandlerFunc(server.TraceHandler) subrouter.Methods(http.MethodPost).Path(peerRESTVersionPrefix + peerRESTMethodListen).HandlerFunc(httpTraceHdrs(server.ListenHandler)) subrouter.Methods(http.MethodPost).Path(peerRESTVersionPrefix + peerRESTMethodBackgroundHealStatus).HandlerFunc(server.BackgroundHealStatusHandler)