From d1144c2c7e47194fbae57dd634daa27be9ed43e1 Mon Sep 17 00:00:00 2001 From: Harshavardhana Date: Fri, 14 Feb 2020 03:31:41 +0530 Subject: [PATCH] reference format obtained doesn't need further validation (#8964) we don't need to validateFormats again once we have obtained reference format, because it is possible that at this stage another server is doing a disk heal during startup, once in a while due to delays we get false positives and our server doesn't start. Format in quorum as reference format can be assumed as valid and we proceed further, until and unless HealFormat re-inits the disks after a successful heal. Also use separate port for healing tests to avoid any conflicts with regular build testing. Fixes #8884 --- buildscripts/verify-healing.sh | 28 ++++++++++++++-------------- cmd/background-heal-ops.go | 1 + cmd/iam-object-store.go | 10 +++++++--- cmd/prepare-storage.go | 11 ----------- cmd/xl-sets.go | 1 + cmd/xl-zones.go | 1 + 6 files changed, 24 insertions(+), 28 deletions(-) diff --git a/buildscripts/verify-healing.sh b/buildscripts/verify-healing.sh index b72bb749d..b584ff74f 100755 --- a/buildscripts/verify-healing.sh +++ b/buildscripts/verify-healing.sh @@ -34,16 +34,16 @@ function start_minio_3_node() { export MINIO_SECRET_KEY=minio123 for i in $(seq 1 3); do - ARGS+=("http://127.0.0.1:$[9000+$i]${WORK_DIR}/$i/1/ http://127.0.0.1:$[9000+$i]${WORK_DIR}/$i/2/ http://127.0.0.1:$[9000+$i]${WORK_DIR}/$i/3/ http://127.0.0.1:$[9000+$i]${WORK_DIR}/$i/4/ http://127.0.0.1:$[9000+$i]${WORK_DIR}/$i/5/ http://127.0.0.1:$[9000+$i]${WORK_DIR}/$i/6/") + ARGS+=("http://127.0.0.1:$[8000+$i]${WORK_DIR}/$i/1/ http://127.0.0.1:$[8000+$i]${WORK_DIR}/$i/2/ http://127.0.0.1:$[8000+$i]${WORK_DIR}/$i/3/ http://127.0.0.1:$[8000+$i]${WORK_DIR}/$i/4/ http://127.0.0.1:$[8000+$i]${WORK_DIR}/$i/5/ http://127.0.0.1:$[8000+$i]${WORK_DIR}/$i/6/") done - "${MINIO[@]}" --address ":9001" ${ARGS[@]} > "${WORK_DIR}/dist-minio-9001.log" 2>&1 & + "${MINIO[@]}" --address ":8001" ${ARGS[@]} > "${WORK_DIR}/dist-minio-8001.log" 2>&1 & minio_pids[0]=$! - "${MINIO[@]}" --address ":9002" ${ARGS[@]} > "${WORK_DIR}/dist-minio-9002.log" 2>&1 & + "${MINIO[@]}" --address ":8002" ${ARGS[@]} > "${WORK_DIR}/dist-minio-8002.log" 2>&1 & minio_pids[1]=$! - "${MINIO[@]}" --address ":9003" ${ARGS[@]} > "${WORK_DIR}/dist-minio-9003.log" 2>&1 & + "${MINIO[@]}" --address ":8003" ${ARGS[@]} > "${WORK_DIR}/dist-minio-8003.log" 2>&1 & minio_pids[2]=$! sleep "$1" @@ -53,7 +53,7 @@ function start_minio_3_node() { function check_online() { for i in $(seq 1 3); do - if grep -q 'Server switching to safe mode' ${WORK_DIR}/dist-minio-$[9000+$i].log; then + if grep -q 'Server switching to safe mode' ${WORK_DIR}/dist-minio-$[8000+$i].log; then echo "1" fi done @@ -80,7 +80,7 @@ function perform_test_1() { if ! kill "$pid"; then for i in $(seq 1 3); do echo "server$i log:" - cat "${WORK_DIR}/dist-minio-$[9000+$i].log" + cat "${WORK_DIR}/dist-minio-$[8000+$i].log" done echo "FAILED" purge "$WORK_DIR" @@ -98,7 +98,7 @@ function perform_test_1() { if ! kill "$pid"; then for i in $(seq 1 3); do echo "server$i log:" - cat "${WORK_DIR}/dist-minio-$[9000+$i].log" + cat "${WORK_DIR}/dist-minio-$[8000+$i].log" done echo "FAILED" purge "$WORK_DIR" @@ -113,7 +113,7 @@ function perform_test_1() { done for i in $(seq 1 3); do echo "server$i log:" - cat "${WORK_DIR}/dist-minio-$[9000+$i].log" + cat "${WORK_DIR}/dist-minio-$[8000+$i].log" done echo "FAILED" purge "$WORK_DIR" @@ -127,7 +127,7 @@ function perform_test_2() { if ! kill "$pid"; then for i in $(seq 1 3); do echo "server$i log:" - cat "${WORK_DIR}/dist-minio-$[9000+$i].log" + cat "${WORK_DIR}/dist-minio-$[8000+$i].log" done echo "FAILED" purge "$WORK_DIR" @@ -145,7 +145,7 @@ function perform_test_2() { if ! kill "$pid"; then for i in $(seq 1 3); do echo "server$i log:" - cat "${WORK_DIR}/dist-minio-$[9000+$i].log" + cat "${WORK_DIR}/dist-minio-$[8000+$i].log" done echo "FAILED" purge "$WORK_DIR" @@ -160,7 +160,7 @@ function perform_test_2() { done for i in $(seq 1 3); do echo "server$i log:" - cat "${WORK_DIR}/dist-minio-$[9000+$i].log" + cat "${WORK_DIR}/dist-minio-$[8000+$i].log" done echo "FAILED" purge "$WORK_DIR" @@ -174,7 +174,7 @@ function perform_test_3() { if ! kill "$pid"; then for i in $(seq 1 3); do echo "server$i log:" - cat "${WORK_DIR}/dist-minio-$[9000+$i].log" + cat "${WORK_DIR}/dist-minio-$[8000+$i].log" done echo "FAILED" purge "$WORK_DIR" @@ -192,7 +192,7 @@ function perform_test_3() { if ! kill "$pid"; then for i in $(seq 1 3); do echo "server$i log:" - cat "${WORK_DIR}/dist-minio-$[9000+$i].log" + cat "${WORK_DIR}/dist-minio-$[8000+$i].log" done echo "FAILED" purge "$WORK_DIR" @@ -207,7 +207,7 @@ function perform_test_3() { done for i in $(seq 1 3); do echo "server$i log:" - cat "${WORK_DIR}/dist-minio-$[9000+$i].log" + cat "${WORK_DIR}/dist-minio-$[8000+$i].log" done echo "FAILED" purge "$WORK_DIR" diff --git a/cmd/background-heal-ops.go b/cmd/background-heal-ops.go index bce2efda6..8e3e813ed 100644 --- a/cmd/background-heal-ops.go +++ b/cmd/background-heal-ops.go @@ -75,6 +75,7 @@ func (h *healRoutine) run() { break } + // Wait and proceed if there are active requests waitForLowHTTPReq(int32(globalEndpoints.Nodes())) var res madmin.HealResultItem diff --git a/cmd/iam-object-store.go b/cmd/iam-object-store.go index aa5944b14..1d3953f35 100644 --- a/cmd/iam-object-store.go +++ b/cmd/iam-object-store.go @@ -593,9 +593,13 @@ func listIAMConfigItems(objectAPI ObjectLayer, pathPrefix string, dirs bool, return } - // Slow down listing and loading for config items to - // reduce load on the server - waitForLowHTTPReq(int32(globalEndpoints.Nodes())) + // Attempt a slow down load only when server is + // active and initialized. + if !globalSafeMode { + // Slow down listing and loading for config items to + // reduce load on the server + waitForLowHTTPReq(int32(globalEndpoints.Nodes())) + } marker = lo.NextMarker lister := dirList(lo) diff --git a/cmd/prepare-storage.go b/cmd/prepare-storage.go index a1f65a60e..ec337c197 100644 --- a/cmd/prepare-storage.go +++ b/cmd/prepare-storage.go @@ -302,17 +302,6 @@ func connectLoadInitFormats(retryCount int, firstDisk bool, endpoints Endpoints, return nil, err } - // Validate all format configs with reference format. - if err = validateXLFormats(format, formatConfigs, endpoints, setCount, drivesPerSet); err != nil { - return nil, err - } - - // Get the deploymentID if set. - format.ID, err = formatXLGetDeploymentID(format, formatConfigs) - if err != nil { - return nil, err - } - if format.ID == "" { // Not a first disk, wait until first disk fixes deploymentID if !firstDisk { diff --git a/cmd/xl-sets.go b/cmd/xl-sets.go index 71df46912..87f4eb84b 100644 --- a/cmd/xl-sets.go +++ b/cmd/xl-sets.go @@ -1664,6 +1664,7 @@ func (s *xlSets) HealObjects(ctx context.Context, bucket, prefix string, healObj continue } + // Wait and proceed if there are active requests waitForLowHTTPReq(int32(s.drivesPerSet)) if err := healObject(bucket, entry.Name); err != nil { diff --git a/cmd/xl-zones.go b/cmd/xl-zones.go index 3d0736919..95f1a0117 100644 --- a/cmd/xl-zones.go +++ b/cmd/xl-zones.go @@ -1363,6 +1363,7 @@ func (z *xlZones) HealObjects(ctx context.Context, bucket, prefix string, healOb continue } + // Wait and proceed if there are active requests waitForLowHTTPReq(int32(zoneDrivesPerSet[zoneIndex])) if err := healObject(bucket, entry.Name); err != nil {