Add metadata parsing to be inside mutex to slow down (#8952)

Adding mutex slows down the crawler to avoid large
spikes in CPU, also add millisecond interval jitter
in calculation of disk usage to slow down the spikes
further.
master
Harshavardhana 5 years ago committed by GitHub
parent b1bfd75fcf
commit 49df290270
  1. 12
      cmd/data-usage.go

@ -20,6 +20,7 @@ import (
"bytes"
"context"
"encoding/json"
"math/rand"
"os"
"path/filepath"
"sync"
@ -159,13 +160,19 @@ func updateUsage(basePath string, doneCh <-chan struct{}, waitForLowActiveIO fun
}
numWorkers := 4
walkInterval := 1 * time.Millisecond
var mutex sync.Mutex // Mutex to update dataUsageInfo
r := rand.New(rand.NewSource(UTCNow().UnixNano()))
fastWalk(basePath, numWorkers, doneCh, func(path string, typ os.FileMode) error {
// Wait for I/O to go down.
waitForLowActiveIO()
// Randomize sleep intervals, to stagger the walk.
defer time.Sleep(time.Duration(r.Float64() * float64(walkInterval)))
bucket, entry := path2BucketObjectWithBasePath(basePath, path)
if bucket == "" {
return nil
@ -183,6 +190,9 @@ func updateUsage(basePath string, doneCh <-chan struct{}, waitForLowActiveIO fun
return nil
}
mutex.Lock()
defer mutex.Unlock()
if typ&os.ModeDir != 0 {
return nil
}
@ -192,12 +202,10 @@ func updateUsage(basePath string, doneCh <-chan struct{}, waitForLowActiveIO fun
return errSkipFile
}
mutex.Lock()
dataUsageInfo.ObjectsCount++
dataUsageInfo.ObjectsTotalSize += uint64(size)
dataUsageInfo.BucketsSizes[bucket] += uint64(size)
dataUsageInfo.ObjectsSizesHistogram[objSizeToHistoInterval(uint64(size))]++
mutex.Unlock()
return nil
})

Loading…
Cancel
Save