reference format obtained doesn't need further validation (#8964)

we don't need to validateFormats again once we have obtained
reference format, because it is possible that at this stage
another server is doing a disk heal during startup, once
in a while due to delays we get false positives and our
server doesn't start.

Format in quorum as reference format can be assumed as valid
and we proceed further, until and unless HealFormat re-inits
the disks after a successful heal.

Also use separate port for healing tests to avoid any
conflicts with regular build testing.

Fixes #8884
master
Harshavardhana 4 years ago committed by GitHub
parent 78125ee853
commit d1144c2c7e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 28
      buildscripts/verify-healing.sh
  2. 1
      cmd/background-heal-ops.go
  3. 10
      cmd/iam-object-store.go
  4. 11
      cmd/prepare-storage.go
  5. 1
      cmd/xl-sets.go
  6. 1
      cmd/xl-zones.go

@ -34,16 +34,16 @@ function start_minio_3_node() {
export MINIO_SECRET_KEY=minio123
for i in $(seq 1 3); do
ARGS+=("http://127.0.0.1:$[9000+$i]${WORK_DIR}/$i/1/ http://127.0.0.1:$[9000+$i]${WORK_DIR}/$i/2/ http://127.0.0.1:$[9000+$i]${WORK_DIR}/$i/3/ http://127.0.0.1:$[9000+$i]${WORK_DIR}/$i/4/ http://127.0.0.1:$[9000+$i]${WORK_DIR}/$i/5/ http://127.0.0.1:$[9000+$i]${WORK_DIR}/$i/6/")
ARGS+=("http://127.0.0.1:$[8000+$i]${WORK_DIR}/$i/1/ http://127.0.0.1:$[8000+$i]${WORK_DIR}/$i/2/ http://127.0.0.1:$[8000+$i]${WORK_DIR}/$i/3/ http://127.0.0.1:$[8000+$i]${WORK_DIR}/$i/4/ http://127.0.0.1:$[8000+$i]${WORK_DIR}/$i/5/ http://127.0.0.1:$[8000+$i]${WORK_DIR}/$i/6/")
done
"${MINIO[@]}" --address ":9001" ${ARGS[@]} > "${WORK_DIR}/dist-minio-9001.log" 2>&1 &
"${MINIO[@]}" --address ":8001" ${ARGS[@]} > "${WORK_DIR}/dist-minio-8001.log" 2>&1 &
minio_pids[0]=$!
"${MINIO[@]}" --address ":9002" ${ARGS[@]} > "${WORK_DIR}/dist-minio-9002.log" 2>&1 &
"${MINIO[@]}" --address ":8002" ${ARGS[@]} > "${WORK_DIR}/dist-minio-8002.log" 2>&1 &
minio_pids[1]=$!
"${MINIO[@]}" --address ":9003" ${ARGS[@]} > "${WORK_DIR}/dist-minio-9003.log" 2>&1 &
"${MINIO[@]}" --address ":8003" ${ARGS[@]} > "${WORK_DIR}/dist-minio-8003.log" 2>&1 &
minio_pids[2]=$!
sleep "$1"
@ -53,7 +53,7 @@ function start_minio_3_node() {
function check_online() {
for i in $(seq 1 3); do
if grep -q 'Server switching to safe mode' ${WORK_DIR}/dist-minio-$[9000+$i].log; then
if grep -q 'Server switching to safe mode' ${WORK_DIR}/dist-minio-$[8000+$i].log; then
echo "1"
fi
done
@ -80,7 +80,7 @@ function perform_test_1() {
if ! kill "$pid"; then
for i in $(seq 1 3); do
echo "server$i log:"
cat "${WORK_DIR}/dist-minio-$[9000+$i].log"
cat "${WORK_DIR}/dist-minio-$[8000+$i].log"
done
echo "FAILED"
purge "$WORK_DIR"
@ -98,7 +98,7 @@ function perform_test_1() {
if ! kill "$pid"; then
for i in $(seq 1 3); do
echo "server$i log:"
cat "${WORK_DIR}/dist-minio-$[9000+$i].log"
cat "${WORK_DIR}/dist-minio-$[8000+$i].log"
done
echo "FAILED"
purge "$WORK_DIR"
@ -113,7 +113,7 @@ function perform_test_1() {
done
for i in $(seq 1 3); do
echo "server$i log:"
cat "${WORK_DIR}/dist-minio-$[9000+$i].log"
cat "${WORK_DIR}/dist-minio-$[8000+$i].log"
done
echo "FAILED"
purge "$WORK_DIR"
@ -127,7 +127,7 @@ function perform_test_2() {
if ! kill "$pid"; then
for i in $(seq 1 3); do
echo "server$i log:"
cat "${WORK_DIR}/dist-minio-$[9000+$i].log"
cat "${WORK_DIR}/dist-minio-$[8000+$i].log"
done
echo "FAILED"
purge "$WORK_DIR"
@ -145,7 +145,7 @@ function perform_test_2() {
if ! kill "$pid"; then
for i in $(seq 1 3); do
echo "server$i log:"
cat "${WORK_DIR}/dist-minio-$[9000+$i].log"
cat "${WORK_DIR}/dist-minio-$[8000+$i].log"
done
echo "FAILED"
purge "$WORK_DIR"
@ -160,7 +160,7 @@ function perform_test_2() {
done
for i in $(seq 1 3); do
echo "server$i log:"
cat "${WORK_DIR}/dist-minio-$[9000+$i].log"
cat "${WORK_DIR}/dist-minio-$[8000+$i].log"
done
echo "FAILED"
purge "$WORK_DIR"
@ -174,7 +174,7 @@ function perform_test_3() {
if ! kill "$pid"; then
for i in $(seq 1 3); do
echo "server$i log:"
cat "${WORK_DIR}/dist-minio-$[9000+$i].log"
cat "${WORK_DIR}/dist-minio-$[8000+$i].log"
done
echo "FAILED"
purge "$WORK_DIR"
@ -192,7 +192,7 @@ function perform_test_3() {
if ! kill "$pid"; then
for i in $(seq 1 3); do
echo "server$i log:"
cat "${WORK_DIR}/dist-minio-$[9000+$i].log"
cat "${WORK_DIR}/dist-minio-$[8000+$i].log"
done
echo "FAILED"
purge "$WORK_DIR"
@ -207,7 +207,7 @@ function perform_test_3() {
done
for i in $(seq 1 3); do
echo "server$i log:"
cat "${WORK_DIR}/dist-minio-$[9000+$i].log"
cat "${WORK_DIR}/dist-minio-$[8000+$i].log"
done
echo "FAILED"
purge "$WORK_DIR"

@ -75,6 +75,7 @@ func (h *healRoutine) run() {
break
}
// Wait and proceed if there are active requests
waitForLowHTTPReq(int32(globalEndpoints.Nodes()))
var res madmin.HealResultItem

@ -593,9 +593,13 @@ func listIAMConfigItems(objectAPI ObjectLayer, pathPrefix string, dirs bool,
return
}
// Slow down listing and loading for config items to
// reduce load on the server
waitForLowHTTPReq(int32(globalEndpoints.Nodes()))
// Attempt a slow down load only when server is
// active and initialized.
if !globalSafeMode {
// Slow down listing and loading for config items to
// reduce load on the server
waitForLowHTTPReq(int32(globalEndpoints.Nodes()))
}
marker = lo.NextMarker
lister := dirList(lo)

@ -302,17 +302,6 @@ func connectLoadInitFormats(retryCount int, firstDisk bool, endpoints Endpoints,
return nil, err
}
// Validate all format configs with reference format.
if err = validateXLFormats(format, formatConfigs, endpoints, setCount, drivesPerSet); err != nil {
return nil, err
}
// Get the deploymentID if set.
format.ID, err = formatXLGetDeploymentID(format, formatConfigs)
if err != nil {
return nil, err
}
if format.ID == "" {
// Not a first disk, wait until first disk fixes deploymentID
if !firstDisk {

@ -1664,6 +1664,7 @@ func (s *xlSets) HealObjects(ctx context.Context, bucket, prefix string, healObj
continue
}
// Wait and proceed if there are active requests
waitForLowHTTPReq(int32(s.drivesPerSet))
if err := healObject(bucket, entry.Name); err != nil {

@ -1363,6 +1363,7 @@ func (z *xlZones) HealObjects(ctx context.Context, bucket, prefix string, healOb
continue
}
// Wait and proceed if there are active requests
waitForLowHTTPReq(int32(zoneDrivesPerSet[zoneIndex]))
if err := healObject(bucket, entry.Name); err != nil {

Loading…
Cancel
Save