From a20d4568a2ef73de2eb4902c5b42f30ff37a24a7 Mon Sep 17 00:00:00 2001 From: Harshavardhana Date: Wed, 5 Aug 2020 13:31:12 -0700 Subject: [PATCH] fix: make sure to use uniform drive count calculation (#10208) It is possible in situations when server was deployed in asymmetric configuration in the past such as ``` minio server ~/fs{1...4}/disk{1...5} ``` Results in setDriveCount of 10 in older releases but with fairly recent releases we have moved to having server affinity which means that a set drive count ascertained from above config will be now '4' While the object layer make sure that we honor `format.json` the storageClass configuration however was by mistake was using the global value obtained by heuristics. Which leads to prematurely using lower parity without being requested by the an administrator. This PR fixes this behavior. --- cmd/admin-handlers-config-kv.go | 6 +++--- cmd/config-current.go | 12 +++++------- cmd/endpoint-ellipses.go | 26 +++++++++++++------------- cmd/endpoint-ellipses_test.go | 2 +- cmd/erasure-sets.go | 5 +++++ cmd/erasure-zones.go | 4 ++++ cmd/erasure.go | 5 +++++ cmd/fs-v1.go | 5 +++++ cmd/gateway-main.go | 2 +- cmd/gateway-unsupported.go | 5 +++++ cmd/globals.go | 3 --- cmd/lock-rest-server.go | 9 +++++++-- cmd/object-api-interface.go | 2 ++ cmd/server-main.go | 4 ++-- cmd/storage-rest_test.go | 2 +- cmd/xl-storage.go | 11 +++++------ 16 files changed, 64 insertions(+), 39 deletions(-) diff --git a/cmd/admin-handlers-config-kv.go b/cmd/admin-handlers-config-kv.go index 89ffa942d..653c165c4 100644 --- a/cmd/admin-handlers-config-kv.go +++ b/cmd/admin-handlers-config-kv.go @@ -136,7 +136,7 @@ func (a adminAPIHandlers) SetConfigKVHandler(w http.ResponseWriter, r *http.Requ return } - if err = validateConfig(cfg); err != nil { + if err = validateConfig(cfg, objectAPI.SetDriveCount()); err != nil { writeCustomErrorResponseJSON(ctx, w, errorCodes.ToAPIErr(ErrAdminConfigBadJSON), err.Error(), r.URL) return } @@ -271,7 +271,7 @@ func (a adminAPIHandlers) RestoreConfigHistoryKVHandler(w http.ResponseWriter, r return } - if err = validateConfig(cfg); err != nil { + if err = validateConfig(cfg, objectAPI.SetDriveCount()); err != nil { writeCustomErrorResponseJSON(ctx, w, errorCodes.ToAPIErr(ErrAdminConfigBadJSON), err.Error(), r.URL) return } @@ -383,7 +383,7 @@ func (a adminAPIHandlers) SetConfigHandler(w http.ResponseWriter, r *http.Reques return } - if err = validateConfig(cfg); err != nil { + if err = validateConfig(cfg, objectAPI.SetDriveCount()); err != nil { writeCustomErrorResponseJSON(ctx, w, errorCodes.ToAPIErr(ErrAdminConfigBadJSON), err.Error(), r.URL) return } diff --git a/cmd/config-current.go b/cmd/config-current.go index a9fe01c92..13c826391 100644 --- a/cmd/config-current.go +++ b/cmd/config-current.go @@ -213,7 +213,7 @@ var ( globalServerConfigMu sync.RWMutex ) -func validateConfig(s config.Config) error { +func validateConfig(s config.Config, setDriveCount int) error { // Disable merging env values with config for validation. env.SetEnvOff() @@ -233,8 +233,7 @@ func validateConfig(s config.Config) error { } if globalIsErasure { - if _, err := storageclass.LookupConfig(s[config.StorageClassSubSys][config.Default], - globalErasureSetDriveCount); err != nil { + if _, err := storageclass.LookupConfig(s[config.StorageClassSubSys][config.Default], setDriveCount); err != nil { return err } } @@ -311,7 +310,7 @@ func validateConfig(s config.Config) error { globalNotificationSys.ConfiguredTargetIDs()) } -func lookupConfigs(s config.Config) { +func lookupConfigs(s config.Config, setDriveCount int) { ctx := GlobalContext var err error @@ -382,8 +381,7 @@ func lookupConfigs(s config.Config) { globalAPIConfig.init(apiConfig) if globalIsErasure { - globalStorageClass, err = storageclass.LookupConfig(s[config.StorageClassSubSys][config.Default], - globalErasureSetDriveCount) + globalStorageClass, err = storageclass.LookupConfig(s[config.StorageClassSubSys][config.Default], setDriveCount) if err != nil { logger.LogIf(ctx, fmt.Errorf("Unable to initialize storage class config: %w", err)) } @@ -601,7 +599,7 @@ func loadConfig(objAPI ObjectLayer) error { } // Override any values from ENVs. - lookupConfigs(srvCfg) + lookupConfigs(srvCfg, objAPI.SetDriveCount()) // hold the mutex lock before a new config is assigned. globalServerConfigMu.Lock() diff --git a/cmd/endpoint-ellipses.go b/cmd/endpoint-ellipses.go index 136cc750e..4da67e154 100644 --- a/cmd/endpoint-ellipses.go +++ b/cmd/endpoint-ellipses.go @@ -329,28 +329,28 @@ var ( // CreateServerEndpoints - validates and creates new endpoints from input args, supports // both ellipses and without ellipses transparently. func createServerEndpoints(serverAddr string, args ...string) ( - endpointZones EndpointZones, setDriveCount int, - setupType SetupType, err error) { + endpointZones EndpointZones, setupType SetupType, err error) { if len(args) == 0 { - return nil, -1, -1, errInvalidArgument + return nil, -1, errInvalidArgument } + var setDriveCount int if v := env.Get(EnvErasureSetDriveCount, ""); v != "" { setDriveCount, err = strconv.Atoi(v) if err != nil { - return nil, -1, -1, config.ErrInvalidErasureSetSize(err) + return nil, -1, config.ErrInvalidErasureSetSize(err) } } if !ellipses.HasEllipses(args...) { setArgs, err := GetAllSets(uint64(setDriveCount), args...) if err != nil { - return nil, -1, -1, err + return nil, -1, err } endpointList, newSetupType, err := CreateEndpoints(serverAddr, false, setArgs...) if err != nil { - return nil, -1, -1, err + return nil, -1, err } endpointZones = append(endpointZones, ZoneEndpoints{ SetCount: len(setArgs), @@ -358,7 +358,7 @@ func createServerEndpoints(serverAddr string, args ...string) ( Endpoints: endpointList, }) setupType = newSetupType - return endpointZones, len(setArgs[0]), setupType, nil + return endpointZones, setupType, nil } var prevSetupType SetupType @@ -366,25 +366,25 @@ func createServerEndpoints(serverAddr string, args ...string) ( for _, arg := range args { setArgs, err := GetAllSets(uint64(setDriveCount), arg) if err != nil { - return nil, -1, -1, err + return nil, -1, err } var endpointList Endpoints endpointList, setupType, err = CreateEndpoints(serverAddr, foundPrevLocal, setArgs...) if err != nil { - return nil, -1, -1, err + return nil, -1, err } if setDriveCount != 0 && setDriveCount != len(setArgs[0]) { - return nil, -1, -1, fmt.Errorf("All zones should have same drive per set ratio - expected %d, got %d", setDriveCount, len(setArgs[0])) + return nil, -1, fmt.Errorf("All zones should have same drive per set ratio - expected %d, got %d", setDriveCount, len(setArgs[0])) } if prevSetupType != UnknownSetupType && prevSetupType != setupType { - return nil, -1, -1, fmt.Errorf("All zones should be of the same setup-type to maintain the original SLA expectations - expected %s, got %s", prevSetupType, setupType) + return nil, -1, fmt.Errorf("All zones should be of the same setup-type to maintain the original SLA expectations - expected %s, got %s", prevSetupType, setupType) } if err = endpointZones.Add(ZoneEndpoints{ SetCount: len(setArgs), DrivesPerSet: len(setArgs[0]), Endpoints: endpointList, }); err != nil { - return nil, -1, -1, err + return nil, -1, err } foundPrevLocal = endpointList.atleastOneEndpointLocal() if setDriveCount == 0 { @@ -393,5 +393,5 @@ func createServerEndpoints(serverAddr string, args ...string) ( prevSetupType = setupType } - return endpointZones, setDriveCount, setupType, nil + return endpointZones, setupType, nil } diff --git a/cmd/endpoint-ellipses_test.go b/cmd/endpoint-ellipses_test.go index 14cf07f71..7c8674741 100644 --- a/cmd/endpoint-ellipses_test.go +++ b/cmd/endpoint-ellipses_test.go @@ -56,7 +56,7 @@ func TestCreateServerEndpoints(t *testing.T) { for _, testCase := range testCases { testCase := testCase t.Run("", func(t *testing.T) { - _, _, _, err := createServerEndpoints(testCase.serverAddr, testCase.args...) + _, _, err := createServerEndpoints(testCase.serverAddr, testCase.args...) if err != nil && testCase.success { t.Errorf("Expected success but failed instead %s", err) } diff --git a/cmd/erasure-sets.go b/cmd/erasure-sets.go index 9164a91c9..ba5fe4f59 100644 --- a/cmd/erasure-sets.go +++ b/cmd/erasure-sets.go @@ -371,6 +371,11 @@ func (s *erasureSets) NewNSLock(ctx context.Context, bucket string, objects ...s return s.getHashedSet("").NewNSLock(ctx, bucket, objects...) } +// SetDriveCount returns the current drives per set. +func (s *erasureSets) SetDriveCount() int { + return s.drivesPerSet +} + // StorageUsageInfo - combines output of StorageInfo across all erasure coded object sets. // This only returns disk usage info for Zones to perform placement decision, this call // is not implemented in Object interface and is not meant to be used by other object diff --git a/cmd/erasure-zones.go b/cmd/erasure-zones.go index 69c931299..7738fdd6d 100644 --- a/cmd/erasure-zones.go +++ b/cmd/erasure-zones.go @@ -87,6 +87,10 @@ func (z *erasureZones) NewNSLock(ctx context.Context, bucket string, objects ... return z.zones[0].NewNSLock(ctx, bucket, objects...) } +func (z *erasureZones) SetDriveCount() int { + return z.zones[0].SetDriveCount() +} + type zonesAvailableSpace []zoneAvailableSpace type zoneAvailableSpace struct { diff --git a/cmd/erasure.go b/cmd/erasure.go index ea91f88dc..acd6cbfd0 100644 --- a/cmd/erasure.go +++ b/cmd/erasure.go @@ -72,6 +72,11 @@ func (er erasureObjects) NewNSLock(ctx context.Context, bucket string, objects . return er.nsMutex.NewNSLock(ctx, er.getLockers, bucket, objects...) } +// SetDriveCount returns the current drives per set. +func (er erasureObjects) SetDriveCount() int { + return len(er.getDisks()) +} + // Shutdown function for object storage interface. func (er erasureObjects) Shutdown(ctx context.Context) error { // Add any object layer shutdown activities here. diff --git a/cmd/fs-v1.go b/cmd/fs-v1.go index 2384c612d..8ce09c1bc 100644 --- a/cmd/fs-v1.go +++ b/cmd/fs-v1.go @@ -191,6 +191,11 @@ func (fs *FSObjects) NewNSLock(ctx context.Context, bucket string, objects ...st return fs.nsMutex.NewNSLock(ctx, nil, bucket, objects...) } +// SetDriveCount no-op +func (fs *FSObjects) SetDriveCount() int { + return 0 +} + // Shutdown - should be called when process shuts down. func (fs *FSObjects) Shutdown(ctx context.Context) error { fs.fsFormatRlk.Close() diff --git a/cmd/gateway-main.go b/cmd/gateway-main.go index 1f4836ade..3f82c73a5 100644 --- a/cmd/gateway-main.go +++ b/cmd/gateway-main.go @@ -220,7 +220,7 @@ func StartGateway(ctx *cli.Context, gw Gateway) { srvCfg := newServerConfig() // Override any values from ENVs. - lookupConfigs(srvCfg) + lookupConfigs(srvCfg, 0) // hold the mutex lock before a new config is assigned. globalServerConfigMu.Lock() diff --git a/cmd/gateway-unsupported.go b/cmd/gateway-unsupported.go index 70680b58e..2fb919b3d 100644 --- a/cmd/gateway-unsupported.go +++ b/cmd/gateway-unsupported.go @@ -46,6 +46,11 @@ func (a GatewayUnsupported) NewNSLock(ctx context.Context, bucket string, object return nil } +// SetDriveCount no-op +func (a GatewayUnsupported) SetDriveCount() int { + return 0 +} + // ListMultipartUploads lists all multipart uploads. func (a GatewayUnsupported) ListMultipartUploads(ctx context.Context, bucket string, prefix string, keyMarker string, uploadIDMarker string, delimiter string, maxUploads int) (lmi ListMultipartsInfo, err error) { return lmi, NotImplemented{} diff --git a/cmd/globals.go b/cmd/globals.go index 61063e727..066ddd392 100644 --- a/cmd/globals.go +++ b/cmd/globals.go @@ -108,9 +108,6 @@ var globalCLIContext = struct { }{} var ( - // Indicates set drive count. - globalErasureSetDriveCount int - // Indicates if the running minio server is distributed setup. globalIsDistErasure = false diff --git a/cmd/lock-rest-server.go b/cmd/lock-rest-server.go index 620115c9a..df1de8675 100644 --- a/cmd/lock-rest-server.go +++ b/cmd/lock-rest-server.go @@ -236,6 +236,11 @@ func getLongLivedLocks(interval time.Duration) map[Endpoint][]nameLockRequesterI // // We will ignore the error, and we will retry later to get a resolve on this lock func lockMaintenance(ctx context.Context, interval time.Duration) error { + objAPI := newObjectLayerWithoutSafeModeFn() + if objAPI == nil { + return nil + } + // Validate if long lived locks are indeed clean. // Get list of long lived locks to check for staleness. for lendpoint, nlrips := range getLongLivedLocks(interval) { @@ -275,10 +280,10 @@ func lockMaintenance(ctx context.Context, interval time.Duration) error { } // Read locks we assume quorum for be N/2 success - quorum := globalErasureSetDriveCount / 2 + quorum := objAPI.SetDriveCount() / 2 if nlrip.lri.Writer { // For write locks we need N/2+1 success - quorum = globalErasureSetDriveCount/2 + 1 + quorum = objAPI.SetDriveCount()/2 + 1 } // less than the quorum, we have locks expired. diff --git a/cmd/object-api-interface.go b/cmd/object-api-interface.go index 8e4baf657..c68d1cb00 100644 --- a/cmd/object-api-interface.go +++ b/cmd/object-api-interface.go @@ -64,6 +64,8 @@ const ( // ObjectLayer implements primitives for object API layer. type ObjectLayer interface { + SetDriveCount() int // Only implemented by erasure layer + // Locking operations on object. NewNSLock(ctx context.Context, bucket string, objects ...string) RWLocker diff --git a/cmd/server-main.go b/cmd/server-main.go index 4f4630470..c8d4efa4d 100644 --- a/cmd/server-main.go +++ b/cmd/server-main.go @@ -114,9 +114,9 @@ func serverHandleCmdArgs(ctx *cli.Context) { globalMinioHost, globalMinioPort = mustSplitHostPort(globalMinioAddr) endpoints := strings.Fields(env.Get(config.EnvEndpoints, "")) if len(endpoints) > 0 { - globalEndpoints, globalErasureSetDriveCount, setupType, err = createServerEndpoints(globalCLIContext.Addr, endpoints...) + globalEndpoints, setupType, err = createServerEndpoints(globalCLIContext.Addr, endpoints...) } else { - globalEndpoints, globalErasureSetDriveCount, setupType, err = createServerEndpoints(globalCLIContext.Addr, ctx.Args()...) + globalEndpoints, setupType, err = createServerEndpoints(globalCLIContext.Addr, ctx.Args()...) } logger.FatalIf(err, "Invalid command line arguments") diff --git a/cmd/storage-rest_test.go b/cmd/storage-rest_test.go index 51cb905d3..086d1de28 100644 --- a/cmd/storage-rest_test.go +++ b/cmd/storage-rest_test.go @@ -451,7 +451,7 @@ func newStorageRESTHTTPServerClient(t *testing.T) (*httptest.Server, *storageRES prevGlobalServerConfig := globalServerConfig globalServerConfig = newServerConfig() - lookupConfigs(globalServerConfig) + lookupConfigs(globalServerConfig, 0) restClient := newStorageRESTClient(endpoint) diff --git a/cmd/xl-storage.go b/cmd/xl-storage.go index b7ea0acc6..03f4c3099 100644 --- a/cmd/xl-storage.go +++ b/cmd/xl-storage.go @@ -249,7 +249,7 @@ func newXLStorage(path string, hostname string) (*xlStorage, error) { return &b }, }, - globalSync: env.Get(config.EnvFSOSync, config.EnableOn) == config.EnableOn, + globalSync: env.Get(config.EnvFSOSync, config.EnableOff) == config.EnableOn, diskMount: mountinfo.IsLikelyMountPoint(path), // Allow disk usage crawler to run with up to 2 concurrent // I/O ops, if and when activeIOCount reaches this @@ -2082,10 +2082,7 @@ func (s *xlStorage) RenameData(srcVolume, srcPath, dataDir, dstVolume, dstPath s return osErrToFileErr(err) } for _, entry := range entries { - if entry == xlStorageFormatFile { - continue - } - if strings.HasSuffix(entry, slashSeparator) { + if entry == xlStorageFormatFile || strings.HasSuffix(entry, slashSeparator) { continue } if strings.HasPrefix(entry, "part.") { @@ -2102,6 +2099,7 @@ func (s *xlStorage) RenameData(srcVolume, srcPath, dataDir, dstVolume, dstPath s if err != nil { return osErrToFileErr(err) } + legacyDataPath := pathJoin(dstVolumeDir, dstPath, legacyDataDir) // legacy data dir means its old content, honor system umask. if err = os.MkdirAll(legacyDataPath, 0777); err != nil { @@ -2117,7 +2115,8 @@ func (s *xlStorage) RenameData(srcVolume, srcPath, dataDir, dstVolume, dstPath s } for _, entry := range entries { - if entry == xlStorageFormatFile { + // Skip xl.meta renames further, also ignore any directories such as `legacyDataDir` + if entry == xlStorageFormatFile || strings.HasPrefix(entry, SlashSeparator) { continue }