reference format obtained doesn't need further validation (#8964)

we don't need to validateFormats again once we have obtained
reference format, because it is possible that at this stage
another server is doing a disk heal during startup, once
in a while due to delays we get false positives and our
server doesn't start.

Format in quorum as reference format can be assumed as valid
and we proceed further, until and unless HealFormat re-inits
the disks after a successful heal.

Also use separate port for healing tests to avoid any
conflicts with regular build testing.

Fixes #8884
master
Harshavardhana 5 years ago committed by GitHub
parent 78125ee853
commit d1144c2c7e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 28
      buildscripts/verify-healing.sh
  2. 1
      cmd/background-heal-ops.go
  3. 10
      cmd/iam-object-store.go
  4. 11
      cmd/prepare-storage.go
  5. 1
      cmd/xl-sets.go
  6. 1
      cmd/xl-zones.go

@ -34,16 +34,16 @@ function start_minio_3_node() {
export MINIO_SECRET_KEY=minio123 export MINIO_SECRET_KEY=minio123
for i in $(seq 1 3); do for i in $(seq 1 3); do
ARGS+=("http://127.0.0.1:$[9000+$i]${WORK_DIR}/$i/1/ http://127.0.0.1:$[9000+$i]${WORK_DIR}/$i/2/ http://127.0.0.1:$[9000+$i]${WORK_DIR}/$i/3/ http://127.0.0.1:$[9000+$i]${WORK_DIR}/$i/4/ http://127.0.0.1:$[9000+$i]${WORK_DIR}/$i/5/ http://127.0.0.1:$[9000+$i]${WORK_DIR}/$i/6/") ARGS+=("http://127.0.0.1:$[8000+$i]${WORK_DIR}/$i/1/ http://127.0.0.1:$[8000+$i]${WORK_DIR}/$i/2/ http://127.0.0.1:$[8000+$i]${WORK_DIR}/$i/3/ http://127.0.0.1:$[8000+$i]${WORK_DIR}/$i/4/ http://127.0.0.1:$[8000+$i]${WORK_DIR}/$i/5/ http://127.0.0.1:$[8000+$i]${WORK_DIR}/$i/6/")
done done
"${MINIO[@]}" --address ":9001" ${ARGS[@]} > "${WORK_DIR}/dist-minio-9001.log" 2>&1 & "${MINIO[@]}" --address ":8001" ${ARGS[@]} > "${WORK_DIR}/dist-minio-8001.log" 2>&1 &
minio_pids[0]=$! minio_pids[0]=$!
"${MINIO[@]}" --address ":9002" ${ARGS[@]} > "${WORK_DIR}/dist-minio-9002.log" 2>&1 & "${MINIO[@]}" --address ":8002" ${ARGS[@]} > "${WORK_DIR}/dist-minio-8002.log" 2>&1 &
minio_pids[1]=$! minio_pids[1]=$!
"${MINIO[@]}" --address ":9003" ${ARGS[@]} > "${WORK_DIR}/dist-minio-9003.log" 2>&1 & "${MINIO[@]}" --address ":8003" ${ARGS[@]} > "${WORK_DIR}/dist-minio-8003.log" 2>&1 &
minio_pids[2]=$! minio_pids[2]=$!
sleep "$1" sleep "$1"
@ -53,7 +53,7 @@ function start_minio_3_node() {
function check_online() { function check_online() {
for i in $(seq 1 3); do for i in $(seq 1 3); do
if grep -q 'Server switching to safe mode' ${WORK_DIR}/dist-minio-$[9000+$i].log; then if grep -q 'Server switching to safe mode' ${WORK_DIR}/dist-minio-$[8000+$i].log; then
echo "1" echo "1"
fi fi
done done
@ -80,7 +80,7 @@ function perform_test_1() {
if ! kill "$pid"; then if ! kill "$pid"; then
for i in $(seq 1 3); do for i in $(seq 1 3); do
echo "server$i log:" echo "server$i log:"
cat "${WORK_DIR}/dist-minio-$[9000+$i].log" cat "${WORK_DIR}/dist-minio-$[8000+$i].log"
done done
echo "FAILED" echo "FAILED"
purge "$WORK_DIR" purge "$WORK_DIR"
@ -98,7 +98,7 @@ function perform_test_1() {
if ! kill "$pid"; then if ! kill "$pid"; then
for i in $(seq 1 3); do for i in $(seq 1 3); do
echo "server$i log:" echo "server$i log:"
cat "${WORK_DIR}/dist-minio-$[9000+$i].log" cat "${WORK_DIR}/dist-minio-$[8000+$i].log"
done done
echo "FAILED" echo "FAILED"
purge "$WORK_DIR" purge "$WORK_DIR"
@ -113,7 +113,7 @@ function perform_test_1() {
done done
for i in $(seq 1 3); do for i in $(seq 1 3); do
echo "server$i log:" echo "server$i log:"
cat "${WORK_DIR}/dist-minio-$[9000+$i].log" cat "${WORK_DIR}/dist-minio-$[8000+$i].log"
done done
echo "FAILED" echo "FAILED"
purge "$WORK_DIR" purge "$WORK_DIR"
@ -127,7 +127,7 @@ function perform_test_2() {
if ! kill "$pid"; then if ! kill "$pid"; then
for i in $(seq 1 3); do for i in $(seq 1 3); do
echo "server$i log:" echo "server$i log:"
cat "${WORK_DIR}/dist-minio-$[9000+$i].log" cat "${WORK_DIR}/dist-minio-$[8000+$i].log"
done done
echo "FAILED" echo "FAILED"
purge "$WORK_DIR" purge "$WORK_DIR"
@ -145,7 +145,7 @@ function perform_test_2() {
if ! kill "$pid"; then if ! kill "$pid"; then
for i in $(seq 1 3); do for i in $(seq 1 3); do
echo "server$i log:" echo "server$i log:"
cat "${WORK_DIR}/dist-minio-$[9000+$i].log" cat "${WORK_DIR}/dist-minio-$[8000+$i].log"
done done
echo "FAILED" echo "FAILED"
purge "$WORK_DIR" purge "$WORK_DIR"
@ -160,7 +160,7 @@ function perform_test_2() {
done done
for i in $(seq 1 3); do for i in $(seq 1 3); do
echo "server$i log:" echo "server$i log:"
cat "${WORK_DIR}/dist-minio-$[9000+$i].log" cat "${WORK_DIR}/dist-minio-$[8000+$i].log"
done done
echo "FAILED" echo "FAILED"
purge "$WORK_DIR" purge "$WORK_DIR"
@ -174,7 +174,7 @@ function perform_test_3() {
if ! kill "$pid"; then if ! kill "$pid"; then
for i in $(seq 1 3); do for i in $(seq 1 3); do
echo "server$i log:" echo "server$i log:"
cat "${WORK_DIR}/dist-minio-$[9000+$i].log" cat "${WORK_DIR}/dist-minio-$[8000+$i].log"
done done
echo "FAILED" echo "FAILED"
purge "$WORK_DIR" purge "$WORK_DIR"
@ -192,7 +192,7 @@ function perform_test_3() {
if ! kill "$pid"; then if ! kill "$pid"; then
for i in $(seq 1 3); do for i in $(seq 1 3); do
echo "server$i log:" echo "server$i log:"
cat "${WORK_DIR}/dist-minio-$[9000+$i].log" cat "${WORK_DIR}/dist-minio-$[8000+$i].log"
done done
echo "FAILED" echo "FAILED"
purge "$WORK_DIR" purge "$WORK_DIR"
@ -207,7 +207,7 @@ function perform_test_3() {
done done
for i in $(seq 1 3); do for i in $(seq 1 3); do
echo "server$i log:" echo "server$i log:"
cat "${WORK_DIR}/dist-minio-$[9000+$i].log" cat "${WORK_DIR}/dist-minio-$[8000+$i].log"
done done
echo "FAILED" echo "FAILED"
purge "$WORK_DIR" purge "$WORK_DIR"

@ -75,6 +75,7 @@ func (h *healRoutine) run() {
break break
} }
// Wait and proceed if there are active requests
waitForLowHTTPReq(int32(globalEndpoints.Nodes())) waitForLowHTTPReq(int32(globalEndpoints.Nodes()))
var res madmin.HealResultItem var res madmin.HealResultItem

@ -593,9 +593,13 @@ func listIAMConfigItems(objectAPI ObjectLayer, pathPrefix string, dirs bool,
return return
} }
// Slow down listing and loading for config items to // Attempt a slow down load only when server is
// reduce load on the server // active and initialized.
waitForLowHTTPReq(int32(globalEndpoints.Nodes())) if !globalSafeMode {
// Slow down listing and loading for config items to
// reduce load on the server
waitForLowHTTPReq(int32(globalEndpoints.Nodes()))
}
marker = lo.NextMarker marker = lo.NextMarker
lister := dirList(lo) lister := dirList(lo)

@ -302,17 +302,6 @@ func connectLoadInitFormats(retryCount int, firstDisk bool, endpoints Endpoints,
return nil, err return nil, err
} }
// Validate all format configs with reference format.
if err = validateXLFormats(format, formatConfigs, endpoints, setCount, drivesPerSet); err != nil {
return nil, err
}
// Get the deploymentID if set.
format.ID, err = formatXLGetDeploymentID(format, formatConfigs)
if err != nil {
return nil, err
}
if format.ID == "" { if format.ID == "" {
// Not a first disk, wait until first disk fixes deploymentID // Not a first disk, wait until first disk fixes deploymentID
if !firstDisk { if !firstDisk {

@ -1664,6 +1664,7 @@ func (s *xlSets) HealObjects(ctx context.Context, bucket, prefix string, healObj
continue continue
} }
// Wait and proceed if there are active requests
waitForLowHTTPReq(int32(s.drivesPerSet)) waitForLowHTTPReq(int32(s.drivesPerSet))
if err := healObject(bucket, entry.Name); err != nil { if err := healObject(bucket, entry.Name); err != nil {

@ -1363,6 +1363,7 @@ func (z *xlZones) HealObjects(ctx context.Context, bucket, prefix string, healOb
continue continue
} }
// Wait and proceed if there are active requests
waitForLowHTTPReq(int32(zoneDrivesPerSet[zoneIndex])) waitForLowHTTPReq(int32(zoneDrivesPerSet[zoneIndex]))
if err := healObject(bucket, entry.Name); err != nil { if err := healObject(bucket, entry.Name); err != nil {

Loading…
Cancel
Save