fix: replaced drive properly by healing the entire drive (#10799)

Bonus fixes, we do not need reload format anymore
as the replaced drive is healed locally we only need
to ensure that drive heal reloads the drive properly.

We preserve the UUID of the original order, this means
that the replacement in `format.json` doesn't mean that
the drive needs to be reloaded into memory anymore.

fixes #10791
master
Harshavardhana 4 years ago committed by GitHub
parent 5e5cdc581d
commit b686bb9c83
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 20
      cmd/erasure-server-sets.go
  2. 113
      cmd/erasure-sets.go
  3. 26
      cmd/format-erasure.go
  4. 6
      cmd/fs-v1.go
  5. 5
      cmd/gateway-unsupported.go
  6. 15
      cmd/notification.go
  7. 1
      cmd/object-api-interface.go
  8. 13
      cmd/peer-rest-client.go
  9. 2
      cmd/peer-rest-common.go
  10. 40
      cmd/peer-rest-server.go

@ -1264,17 +1264,6 @@ func (z *erasureServerSets) ListBuckets(ctx context.Context) (buckets []BucketIn
return buckets, nil return buckets, nil
} }
func (z *erasureServerSets) ReloadFormat(ctx context.Context, dryRun bool) error {
// No locks needed since reload happens in HealFormat under
// write lock across all nodes.
for _, zone := range z.serverSets {
if err := zone.ReloadFormat(ctx, dryRun); err != nil {
return err
}
}
return nil
}
func (z *erasureServerSets) HealFormat(ctx context.Context, dryRun bool) (madmin.HealResultItem, error) { func (z *erasureServerSets) HealFormat(ctx context.Context, dryRun bool) (madmin.HealResultItem, error) {
// Acquire lock on format.json // Acquire lock on format.json
formatLock := z.NewNSLock(ctx, minioMetaBucket, formatConfigFile) formatLock := z.NewNSLock(ctx, minioMetaBucket, formatConfigFile)
@ -1306,15 +1295,6 @@ func (z *erasureServerSets) HealFormat(ctx context.Context, dryRun bool) (madmin
r.After.Drives = append(r.After.Drives, result.After.Drives...) r.After.Drives = append(r.After.Drives, result.After.Drives...)
} }
// Healing succeeded notify the peers to reload format and re-initialize disks.
// We will not notify peers if healing is not required.
for _, nerr := range globalNotificationSys.ReloadFormat(dryRun) {
if nerr.Err != nil {
logger.GetReqInfo(ctx).SetTags("peerAddress", nerr.Host.String())
logger.LogIf(ctx, nerr.Err)
}
}
// No heal returned by all serverSets, return errNoHealRequired // No heal returned by all serverSets, return errNoHealRequired
if countNoHeal == len(z.serverSets) { if countNoHeal == len(z.serverSets) {
return r, errNoHealRequired return r, errNoHealRequired

@ -1141,81 +1141,6 @@ func formatsToDrivesInfo(endpoints Endpoints, formats []*formatErasureV3, sErrs
return beforeDrives return beforeDrives
} }
// Reloads the format from the disk, usually called by a remote peer notifier while
// healing in a distributed setup.
func (s *erasureSets) ReloadFormat(ctx context.Context, dryRun bool) (err error) {
storageDisks, errs := initStorageDisksWithErrorsWithoutHealthCheck(s.endpoints)
for i, err := range errs {
if err != nil && err != errDiskNotFound {
return fmt.Errorf("Disk %s: %w", s.endpoints[i], err)
}
}
defer func(storageDisks []StorageAPI) {
if err != nil {
closeStorageDisks(storageDisks)
}
}(storageDisks)
formats, _ := loadFormatErasureAll(storageDisks, false)
if err = checkFormatErasureValues(formats, s.setDriveCount); err != nil {
return err
}
refFormat, err := getFormatErasureInQuorum(formats)
if err != nil {
return err
}
s.monitorContextCancel() // turn-off disk monitoring and replace format.
s.erasureDisksMu.Lock()
// Replace with new reference format.
s.format = refFormat
// Close all existing disks and reconnect all the disks.
for _, disk := range storageDisks {
if disk == nil {
continue
}
diskID, err := disk.GetDiskID()
if err != nil {
continue
}
m, n, err := findDiskIndexByDiskID(refFormat, diskID)
if err != nil {
continue
}
if s.erasureDisks[m][n] != nil {
s.erasureDisks[m][n].Close()
}
s.endpointStrings[m*s.setDriveCount+n] = disk.String()
if !disk.IsLocal() {
// Enable healthcheck disk for remote endpoint.
disk, err = newStorageAPI(disk.Endpoint())
if err != nil {
continue
}
disk.SetDiskID(diskID)
}
s.erasureDisks[m][n] = disk
}
s.erasureDisksMu.Unlock()
mctx, mctxCancel := context.WithCancel(GlobalContext)
s.monitorContextCancel = mctxCancel
go s.monitorAndConnectEndpoints(mctx, defaultMonitorConnectEndpointInterval)
return nil
}
// If it is a single node Erasure and all disks are root disks, it is most likely a test setup, else it is a production setup. // If it is a single node Erasure and all disks are root disks, it is most likely a test setup, else it is a production setup.
// On a test setup we allow creation of format.json on root disks to help with dev/testing. // On a test setup we allow creation of format.json on root disks to help with dev/testing.
func isTestSetup(infos []DiskInfo, errs []error) bool { func isTestSetup(infos []DiskInfo, errs []error) bool {
@ -1335,13 +1260,8 @@ func (s *erasureSets) HealFormat(ctx context.Context, dryRun bool) (res madmin.H
} }
} }
// Save formats `format.json` across all disks. // Save new formats `format.json` on unformatted disks.
if err = saveFormatErasureAllWithErrs(ctx, storageDisks, sErrs, tmpNewFormats); err != nil { if err = saveUnformattedFormat(ctx, storageDisks, tmpNewFormats); err != nil {
return madmin.HealResultItem{}, err
}
refFormat, err = getFormatErasureInQuorum(tmpNewFormats)
if err != nil {
return madmin.HealResultItem{}, err return madmin.HealResultItem{}, err
} }
@ -1349,21 +1269,12 @@ func (s *erasureSets) HealFormat(ctx context.Context, dryRun bool) (res madmin.H
s.erasureDisksMu.Lock() s.erasureDisksMu.Lock()
// Replace with new reference format. for index, format := range tmpNewFormats {
s.format = refFormat if format == nil {
// Disconnect/relinquish all existing disks, lockers and reconnect the disks, lockers.
for _, disk := range storageDisks {
if disk == nil {
continue
}
diskID, err := disk.GetDiskID()
if err != nil {
continue continue
} }
m, n, err := findDiskIndexByDiskID(refFormat, diskID) m, n, err := findDiskIndexByDiskID(refFormat, format.Erasure.This)
if err != nil { if err != nil {
continue continue
} }
@ -1372,18 +1283,12 @@ func (s *erasureSets) HealFormat(ctx context.Context, dryRun bool) (res madmin.H
s.erasureDisks[m][n].Close() s.erasureDisks[m][n].Close()
} }
s.endpointStrings[m*s.setDriveCount+n] = disk.String() s.erasureDisks[m][n] = storageDisks[index]
if !disk.IsLocal() { s.endpointStrings[m*s.setDriveCount+n] = storageDisks[index].String()
// Enable healthcheck disk for remote endpoint.
disk, err = newStorageAPI(disk.Endpoint())
if err != nil {
continue
} }
disk.SetDiskID(diskID)
}
s.erasureDisks[m][n] = disk
} // Replace with new reference format.
s.format = refFormat
s.erasureDisksMu.Unlock() s.erasureDisksMu.Unlock()

@ -704,28 +704,18 @@ func initErasureMetaVolumesInLocalDisks(storageDisks []StorageAPI, formats []*fo
return nil return nil
} }
// saveFormatErasureAllWithErrs - populates `format.json` on disks in its order. // saveUnformattedFormat - populates `format.json` on unformatted disks.
// also adds `.healing.bin` on the disks which are being actively healed. // also adds `.healing.bin` on the disks which are being actively healed.
func saveFormatErasureAllWithErrs(ctx context.Context, storageDisks []StorageAPI, fErrs []error, formats []*formatErasureV3) error { func saveUnformattedFormat(ctx context.Context, storageDisks []StorageAPI, formats []*formatErasureV3) error {
g := errgroup.WithNErrs(len(storageDisks)) for index, format := range formats {
if format == nil {
// Write `format.json` to all disks. continue
for index := range storageDisks {
index := index
g.Go(func() error {
if formats[index] == nil {
return errDiskNotFound
} }
if errors.Is(fErrs[index], errUnformattedDisk) { if err := saveFormatErasure(storageDisks[index], format, true); err != nil {
return saveFormatErasure(storageDisks[index], formats[index], true) return err
} }
return nil
}, index)
} }
return nil
writeQuorum := getWriteQuorum(len(storageDisks))
// Wait for the routines to finish.
return reduceWriteQuorumErrs(ctx, g.Wait(), nil, writeQuorum)
} }
// saveFormatErasureAll - populates `format.json` on disks in its order. // saveFormatErasureAll - populates `format.json` on disks in its order.

@ -1536,12 +1536,6 @@ func (fs *FSObjects) DeleteObjectTags(ctx context.Context, bucket, object string
return fs.PutObjectTags(ctx, bucket, object, "", opts) return fs.PutObjectTags(ctx, bucket, object, "", opts)
} }
// ReloadFormat - no-op for fs, Valid only for Erasure.
func (fs *FSObjects) ReloadFormat(ctx context.Context, dryRun bool) error {
logger.LogIf(ctx, NotImplemented{})
return NotImplemented{}
}
// HealFormat - no-op for fs, Valid only for Erasure. // HealFormat - no-op for fs, Valid only for Erasure.
func (fs *FSObjects) HealFormat(ctx context.Context, dryRun bool) (madmin.HealResultItem, error) { func (fs *FSObjects) HealFormat(ctx context.Context, dryRun bool) (madmin.HealResultItem, error) {
logger.LogIf(ctx, NotImplemented{}) logger.LogIf(ctx, NotImplemented{})

@ -160,11 +160,6 @@ func (a GatewayUnsupported) DeleteBucketSSEConfig(ctx context.Context, bucket st
return NotImplemented{} return NotImplemented{}
} }
// ReloadFormat - Not implemented stub.
func (a GatewayUnsupported) ReloadFormat(ctx context.Context, dryRun bool) error {
return NotImplemented{}
}
// HealFormat - Not implemented stub // HealFormat - Not implemented stub
func (a GatewayUnsupported) HealFormat(ctx context.Context, dryRun bool) (madmin.HealResultItem, error) { func (a GatewayUnsupported) HealFormat(ctx context.Context, dryRun bool) (madmin.HealResultItem, error) {
return madmin.HealResultItem{}, NotImplemented{} return madmin.HealResultItem{}, NotImplemented{}

@ -140,21 +140,6 @@ func (g *NotificationGroup) Go(ctx context.Context, f func() error, index int, a
}() }()
} }
// ReloadFormat - calls ReloadFormat REST call on all peers.
func (sys *NotificationSys) ReloadFormat(dryRun bool) []NotificationPeerErr {
ng := WithNPeers(len(sys.peerClients))
for idx, client := range sys.peerClients {
if client == nil {
continue
}
client := client
ng.Go(GlobalContext, func() error {
return client.ReloadFormat(dryRun)
}, idx, *client.host)
}
return ng.Wait()
}
// DeletePolicy - deletes policy across all peers. // DeletePolicy - deletes policy across all peers.
func (sys *NotificationSys) DeletePolicy(policyName string) []NotificationPeerErr { func (sys *NotificationSys) DeletePolicy(policyName string) []NotificationPeerErr {
ng := WithNPeers(len(sys.peerClients)) ng := WithNPeers(len(sys.peerClients))

@ -114,7 +114,6 @@ type ObjectLayer interface {
CompleteMultipartUpload(ctx context.Context, bucket, object, uploadID string, uploadedParts []CompletePart, opts ObjectOptions) (objInfo ObjectInfo, err error) CompleteMultipartUpload(ctx context.Context, bucket, object, uploadID string, uploadedParts []CompletePart, opts ObjectOptions) (objInfo ObjectInfo, err error)
// Healing operations. // Healing operations.
ReloadFormat(ctx context.Context, dryRun bool) error
HealFormat(ctx context.Context, dryRun bool) (madmin.HealResultItem, error) HealFormat(ctx context.Context, dryRun bool) (madmin.HealResultItem, error)
HealBucket(ctx context.Context, bucket string, dryRun, remove bool) (madmin.HealResultItem, error) HealBucket(ctx context.Context, bucket string, dryRun, remove bool) (madmin.HealResultItem, error)
HealObject(ctx context.Context, bucket, object, versionID string, opts madmin.HealOpts) (madmin.HealResultItem, error) HealObject(ctx context.Context, bucket, object, versionID string, opts madmin.HealOpts) (madmin.HealResultItem, error)

@ -456,19 +456,6 @@ func (client *peerRESTClient) DeleteBucketMetadata(bucket string) error {
return nil return nil
} }
// ReloadFormat - reload format on the peer node.
func (client *peerRESTClient) ReloadFormat(dryRun bool) error {
values := make(url.Values)
values.Set(peerRESTDryRun, strconv.FormatBool(dryRun))
respBody, err := client.call(peerRESTMethodReloadFormat, values, nil, -1)
if err != nil {
return err
}
defer http.DrainBody(respBody)
return nil
}
// cycleServerBloomFilter will cycle the bloom filter to start recording to index y if not already. // cycleServerBloomFilter will cycle the bloom filter to start recording to index y if not already.
// The response will contain a bloom filter starting at index x up to, but not including index y. // The response will contain a bloom filter starting at index x up to, but not including index y.
// If y is 0, the response will not update y, but return the currently recorded information // If y is 0, the response will not update y, but return the currently recorded information

@ -50,7 +50,6 @@ const (
peerRESTMethodLoadGroup = "/loadgroup" peerRESTMethodLoadGroup = "/loadgroup"
peerRESTMethodStartProfiling = "/startprofiling" peerRESTMethodStartProfiling = "/startprofiling"
peerRESTMethodDownloadProfilingData = "/downloadprofilingdata" peerRESTMethodDownloadProfilingData = "/downloadprofilingdata"
peerRESTMethodReloadFormat = "/reloadformat"
peerRESTMethodCycleBloom = "/cyclebloom" peerRESTMethodCycleBloom = "/cyclebloom"
peerRESTMethodTrace = "/trace" peerRESTMethodTrace = "/trace"
peerRESTMethodListen = "/listen" peerRESTMethodListen = "/listen"
@ -72,7 +71,6 @@ const (
peerRESTIsGroup = "is-group" peerRESTIsGroup = "is-group"
peerRESTSignal = "signal" peerRESTSignal = "signal"
peerRESTProfiler = "profiler" peerRESTProfiler = "profiler"
peerRESTDryRun = "dry-run"
peerRESTTraceAll = "all" peerRESTTraceAll = "all"
peerRESTTraceErr = "err" peerRESTTraceErr = "err"

@ -578,45 +578,6 @@ func (s *peerRESTServer) LoadBucketMetadataHandler(w http.ResponseWriter, r *htt
} }
} }
// ReloadFormatHandler - Reload Format.
func (s *peerRESTServer) ReloadFormatHandler(w http.ResponseWriter, r *http.Request) {
if !s.IsValid(w, r) {
s.writeErrorResponse(w, errors.New("Invalid request"))
return
}
vars := mux.Vars(r)
dryRunString := vars[peerRESTDryRun]
if dryRunString == "" {
s.writeErrorResponse(w, errors.New("dry-run parameter is missing"))
return
}
var dryRun bool
switch strings.ToLower(dryRunString) {
case "true":
dryRun = true
case "false":
dryRun = false
default:
s.writeErrorResponse(w, errInvalidArgument)
return
}
objAPI := newObjectLayerFn()
if objAPI == nil {
s.writeErrorResponse(w, errServerNotInitialized)
return
}
err := objAPI.ReloadFormat(GlobalContext, dryRun)
if err != nil {
s.writeErrorResponse(w, err)
return
}
w.(http.Flusher).Flush()
}
// CycleServerBloomFilterHandler cycles bloom filter on server. // CycleServerBloomFilterHandler cycles bloom filter on server.
func (s *peerRESTServer) CycleServerBloomFilterHandler(w http.ResponseWriter, r *http.Request) { func (s *peerRESTServer) CycleServerBloomFilterHandler(w http.ResponseWriter, r *http.Request) {
if !s.IsValid(w, r) { if !s.IsValid(w, r) {
@ -1093,7 +1054,6 @@ func registerPeerRESTHandlers(router *mux.Router) {
subrouter.Methods(http.MethodPost).Path(peerRESTVersionPrefix + peerRESTMethodStartProfiling).HandlerFunc(httpTraceAll(server.StartProfilingHandler)).Queries(restQueries(peerRESTProfiler)...) subrouter.Methods(http.MethodPost).Path(peerRESTVersionPrefix + peerRESTMethodStartProfiling).HandlerFunc(httpTraceAll(server.StartProfilingHandler)).Queries(restQueries(peerRESTProfiler)...)
subrouter.Methods(http.MethodPost).Path(peerRESTVersionPrefix + peerRESTMethodDownloadProfilingData).HandlerFunc(httpTraceHdrs(server.DownloadProfilingDataHandler)) subrouter.Methods(http.MethodPost).Path(peerRESTVersionPrefix + peerRESTMethodDownloadProfilingData).HandlerFunc(httpTraceHdrs(server.DownloadProfilingDataHandler))
subrouter.Methods(http.MethodPost).Path(peerRESTVersionPrefix + peerRESTMethodReloadFormat).HandlerFunc(httpTraceHdrs(server.ReloadFormatHandler)).Queries(restQueries(peerRESTDryRun)...)
subrouter.Methods(http.MethodPost).Path(peerRESTVersionPrefix + peerRESTMethodTrace).HandlerFunc(server.TraceHandler) subrouter.Methods(http.MethodPost).Path(peerRESTVersionPrefix + peerRESTMethodTrace).HandlerFunc(server.TraceHandler)
subrouter.Methods(http.MethodPost).Path(peerRESTVersionPrefix + peerRESTMethodListen).HandlerFunc(httpTraceHdrs(server.ListenHandler)) subrouter.Methods(http.MethodPost).Path(peerRESTVersionPrefix + peerRESTMethodListen).HandlerFunc(httpTraceHdrs(server.ListenHandler))
subrouter.Methods(http.MethodPost).Path(peerRESTVersionPrefix + peerRESTMethodBackgroundHealStatus).HandlerFunc(server.BackgroundHealStatusHandler) subrouter.Methods(http.MethodPost).Path(peerRESTVersionPrefix + peerRESTMethodBackgroundHealStatus).HandlerFunc(server.BackgroundHealStatusHandler)

Loading…
Cancel
Save