@ -35,12 +35,11 @@ import (
)
)
const (
const (
dataUsageObjName = "usage.json"
dataUsageObjName = ".usage.json"
dataUsageCacheName = "usage-cache.bin"
dataUsageCacheName = ".usage-cache.bin"
dataUsageBucketCacheDir = "usage-caches"
envDataUsageCrawlConf = "MINIO_DISK_USAGE_CRAWL_ENABLE"
dataUsageCrawlConf = "MINIO_DISK_USAGE_CRAWL"
envDataUsageCrawlDelay = "MINIO_DISK_USAGE_CRAWL_DELAY"
dataUsageCrawlDelay = "MINIO_DISK_USAGE_CRAWL_DELAY"
envDataUsageCrawlDebug = "MINIO_DISK_USAGE_CRAWL_DEBUG"
dataUsageDebug = true
dataUsageSleepPerFolder = 1 * time . Millisecond
dataUsageSleepPerFolder = 1 * time . Millisecond
dataUsageSleepDefMult = 10.0
dataUsageSleepDefMult = 10.0
dataUsageUpdateDirCycles = 16
dataUsageUpdateDirCycles = 16
@ -51,11 +50,9 @@ const (
// initDataUsageStats will start the crawler unless disabled.
// initDataUsageStats will start the crawler unless disabled.
func initDataUsageStats ( ) {
func initDataUsageStats ( ) {
dataUsageEnabled , err := config . ParseBool ( env . Get ( dataUsageCrawlConf , config . EnableOn ) )
if env . Get ( envDataUsageCrawlConf , config . EnableOn ) == config . EnableOn {
if err == nil && ! dataUsageEnabled {
return
}
go runDataUsageInfoUpdateRoutine ( )
go runDataUsageInfoUpdateRoutine ( )
}
}
}
// runDataUsageInfoUpdateRoutine will contain the main crawler.
// runDataUsageInfoUpdateRoutine will contain the main crawler.
@ -89,9 +86,6 @@ func runDataUsageInfo(ctx context.Context, objAPI ObjectLayer) {
// data usage calculator role for its lifetime.
// data usage calculator role for its lifetime.
break
break
}
}
if dataUsageDebug {
logger . Info ( color . Green ( "runDataUsageInfo:" ) + " Starting crawler master" )
}
for {
for {
select {
select {
case <- ctx . Done ( ) :
case <- ctx . Done ( ) :
@ -111,14 +105,11 @@ func runDataUsageInfo(ctx context.Context, objAPI ObjectLayer) {
// storeDataUsageInBackend will store all objects sent on the gui channel until closed.
// storeDataUsageInBackend will store all objects sent on the gui channel until closed.
func storeDataUsageInBackend ( ctx context . Context , objAPI ObjectLayer , gui <- chan DataUsageInfo ) {
func storeDataUsageInBackend ( ctx context . Context , objAPI ObjectLayer , gui <- chan DataUsageInfo ) {
for dataUsageInfo := range gui {
for dataUsageInfo := range gui {
dataUsageJSON , err := json . MarshalIndent ( dataUsageInfo , "" , " " )
dataUsageJSON , err := json . Marshal ( dataUsageInfo )
if err != nil {
if err != nil {
logger . LogIf ( ctx , err )
logger . LogIf ( ctx , err )
continue
continue
}
}
if dataUsageDebug {
logger . Info ( color . Green ( "data-usage:" ) + " Received update: %s" , string ( dataUsageJSON ) )
}
size := int64 ( len ( dataUsageJSON ) )
size := int64 ( len ( dataUsageJSON ) )
r , err := hash . NewReader ( bytes . NewReader ( dataUsageJSON ) , size , "" , "" , size , false )
r , err := hash . NewReader ( bytes . NewReader ( dataUsageJSON ) , size , "" , "" , size , false )
if err != nil {
if err != nil {
@ -172,6 +163,9 @@ type folderScanner struct {
newCache dataUsageCache
newCache dataUsageCache
waitForLowActiveIO func ( )
waitForLowActiveIO func ( )
dataUsageCrawlMult float64
dataUsageCrawlDebug bool
newFolders [ ] cachedFolder
newFolders [ ] cachedFolder
existingFolders [ ] cachedFolder
existingFolders [ ] cachedFolder
}
}
@ -194,12 +188,6 @@ func sleepDuration(d time.Duration, x float64) {
// If final is not provided the folders found are returned from the function.
// If final is not provided the folders found are returned from the function.
func ( f * folderScanner ) scanQueuedLevels ( ctx context . Context , folders [ ] cachedFolder , final bool ) ( [ ] cachedFolder , error ) {
func ( f * folderScanner ) scanQueuedLevels ( ctx context . Context , folders [ ] cachedFolder , final bool ) ( [ ] cachedFolder , error ) {
var nextFolders [ ] cachedFolder
var nextFolders [ ] cachedFolder
delayMult := dataUsageSleepDefMult
if mult := os . Getenv ( dataUsageCrawlDelay ) ; mult != "" {
if d , err := strconv . ParseFloat ( mult , 64 ) ; err == nil {
delayMult = d
}
}
done := ctx . Done ( )
done := ctx . Done ( )
for _ , folder := range folders {
for _ , folder := range folders {
select {
select {
@ -207,8 +195,9 @@ func (f *folderScanner) scanQueuedLevels(ctx context.Context, folders []cachedFo
return nil , ctx . Err ( )
return nil , ctx . Err ( )
default :
default :
}
}
f . waitForLowActiveIO ( )
f . waitForLowActiveIO ( )
sleepDuration ( dataUsageSleepPerFolder , delay Mult )
sleepDuration ( dataUsageSleepPerFolder , f . dataUsag eCraw lMult )
cache := dataUsageEntry { }
cache := dataUsageEntry { }
thisHash := hashPath ( folder . name )
thisHash := hashPath ( folder . name )
@ -218,14 +207,14 @@ func (f *folderScanner) scanQueuedLevels(ctx context.Context, folders []cachedFo
entName = path . Clean ( path . Join ( folder . name , entName ) )
entName = path . Clean ( path . Join ( folder . name , entName ) )
bucket , _ := path2BucketObjectWithBasePath ( f . root , entName )
bucket , _ := path2BucketObjectWithBasePath ( f . root , entName )
if bucket == "" {
if bucket == "" {
if dataUsageDebug {
if f . dataUsageCrawl Debug {
logger . Info ( color . Green ( "data-usage:" ) + " no bucket (%s,%s)" , f . root , entName )
logger . Info ( color . Green ( "data-usage:" ) + " no bucket (%s,%s)" , f . root , entName )
}
}
return nil
return nil
}
}
if isReservedOrInvalidBucket ( bucket , false ) {
if isReservedOrInvalidBucket ( bucket , false ) {
if dataUsageDebug {
if f . dataUsageCrawl Debug {
logger . Info ( color . Green ( "data-usage:" ) + " invalid bucket: %v, entry: %v" , bucket , entName )
logger . Info ( color . Green ( "data-usage:" ) + " invalid bucket: %v, entry: %v" , bucket , entName )
}
}
return nil
return nil
@ -257,12 +246,12 @@ func (f *folderScanner) scanQueuedLevels(ctx context.Context, folders []cachedFo
}
}
f . waitForLowActiveIO ( )
f . waitForLowActiveIO ( )
// Dynamic time delay.
// Dynamic time delay.
t := time . Now ( )
t := UTC Now( )
// Get file size, ignore errors.
// Get file size, ignore errors.
size , err := f . getSize ( Item { Path : path . Join ( f . root , entName ) , Typ : typ } )
size , err := f . getSize ( Item { Path : path . Join ( f . root , entName ) , Typ : typ } )
sleepDuration ( time . Since ( t ) , delay Mult )
sleepDuration ( time . Since ( t ) , f . dataUsag eCraw lMult )
if err == errSkipFile {
if err == errSkipFile {
return nil
return nil
}
}
@ -284,12 +273,7 @@ func (f *folderScanner) scanQueuedLevels(ctx context.Context, folders []cachedFo
// deepScanFolder will deep scan a folder and return the size if no error occurs.
// deepScanFolder will deep scan a folder and return the size if no error occurs.
func ( f * folderScanner ) deepScanFolder ( ctx context . Context , folder string ) ( * dataUsageEntry , error ) {
func ( f * folderScanner ) deepScanFolder ( ctx context . Context , folder string ) ( * dataUsageEntry , error ) {
var cache dataUsageEntry
var cache dataUsageEntry
delayMult := dataUsageSleepDefMult
if mult := os . Getenv ( dataUsageCrawlDelay ) ; mult != "" {
if d , err := strconv . ParseFloat ( mult , 64 ) ; err == nil {
delayMult = d
}
}
done := ctx . Done ( )
done := ctx . Done ( )
var addDir func ( entName string , typ os . FileMode ) error
var addDir func ( entName string , typ os . FileMode ) error
@ -307,11 +291,12 @@ func (f *folderScanner) deepScanFolder(ctx context.Context, folder string) (*dat
dirStack = append ( dirStack , entName )
dirStack = append ( dirStack , entName )
err := readDirFn ( path . Join ( dirStack ... ) , addDir )
err := readDirFn ( path . Join ( dirStack ... ) , addDir )
dirStack = dirStack [ : len ( dirStack ) - 1 ]
dirStack = dirStack [ : len ( dirStack ) - 1 ]
sleepDuration ( dataUsageSleepPerFolder , delay Mult )
sleepDuration ( dataUsageSleepPerFolder , f . dataUsag eCraw lMult )
return err
return err
}
}
// Dynamic time delay.
// Dynamic time delay.
t := time . Now ( )
t := UTC Now( )
// Get file size, ignore errors.
// Get file size, ignore errors.
dirStack = append ( dirStack , entName )
dirStack = append ( dirStack , entName )
@ -321,7 +306,7 @@ func (f *folderScanner) deepScanFolder(ctx context.Context, folder string) (*dat
size , err := f . getSize ( Item { Path : fileName , Typ : typ } )
size , err := f . getSize ( Item { Path : fileName , Typ : typ } )
// Don't sleep for really small amount of time
// Don't sleep for really small amount of time
sleepDuration ( time . Since ( t ) , delay Mult )
sleepDuration ( time . Since ( t ) , f . dataUsag eCraw lMult )
if err == errSkipFile {
if err == errSkipFile {
return nil
return nil
@ -344,14 +329,25 @@ func (f *folderScanner) deepScanFolder(ctx context.Context, folder string) (*dat
// Before each operation waitForLowActiveIO is called which can be used to temporarily halt the crawler.
// Before each operation waitForLowActiveIO is called which can be used to temporarily halt the crawler.
// If the supplied context is canceled the function will return at the first chance.
// If the supplied context is canceled the function will return at the first chance.
func updateUsage ( ctx context . Context , basePath string , cache dataUsageCache , waitForLowActiveIO func ( ) , getSize getSizeFn ) ( dataUsageCache , error ) {
func updateUsage ( ctx context . Context , basePath string , cache dataUsageCache , waitForLowActiveIO func ( ) , getSize getSizeFn ) ( dataUsageCache , error ) {
t := UTCNow ( )
dataUsageDebug := env . Get ( envDataUsageCrawlDebug , config . EnableOff ) == config . EnableOn
defer func ( ) {
if dataUsageDebug {
logger . Info ( color . Green ( "updateUsage" ) + " Crawl time at %s: %v" , basePath , time . Since ( t ) )
}
} ( )
if cache . Info . Name == "" {
if cache . Info . Name == "" {
cache . Info . Name = dataUsageRoot
cache . Info . Name = dataUsageRoot
}
}
var logPrefix , logSuffix string
if dataUsageDebug {
delayMult , err := strconv . ParseFloat ( env . Get ( envDataUsageCrawlDelay , "10.0" ) , 64 )
logPrefix = color . Green ( "data-usage: " )
if err != nil {
logSuffix = color . Blue ( " - %v + %v" , basePath , cache . Info . Name )
logger . LogIf ( ctx , err )
delayMult = dataUsageSleepDefMult
}
}
s := folderScanner {
s := folderScanner {
root : basePath ,
root : basePath ,
getSize : getSize ,
getSize : getSize ,
@ -360,6 +356,12 @@ func updateUsage(ctx context.Context, basePath string, cache dataUsageCache, wai
waitForLowActiveIO : waitForLowActiveIO ,
waitForLowActiveIO : waitForLowActiveIO ,
newFolders : nil ,
newFolders : nil ,
existingFolders : nil ,
existingFolders : nil ,
dataUsageCrawlMult : delayMult ,
dataUsageCrawlDebug : dataUsageDebug ,
}
if s . dataUsageCrawlDebug {
logger . Info ( color . Green ( "runDataUsageInfo:" ) + " Starting crawler master" )
}
}
done := ctx . Done ( )
done := ctx . Done ( )
@ -369,14 +371,21 @@ func updateUsage(ctx context.Context, basePath string, cache dataUsageCache, wai
if cache . Info . Name != dataUsageRoot {
if cache . Info . Name != dataUsageRoot {
flattenLevels --
flattenLevels --
}
}
if dataUsageDebug {
var logPrefix , logSuffix string
if s . dataUsageCrawlDebug {
logPrefix = color . Green ( "data-usage: " )
logSuffix = color . Blue ( " - %v + %v" , basePath , cache . Info . Name )
}
if s . dataUsageCrawlDebug {
logger . Info ( logPrefix + "Cycle: %v" + logSuffix , cache . Info . NextCycle )
logger . Info ( logPrefix + "Cycle: %v" + logSuffix , cache . Info . NextCycle )
}
}
// Always scan flattenLevels deep. Cache root is level 0.
// Always scan flattenLevels deep. Cache root is level 0.
todo := [ ] cachedFolder { { name : cache . Info . Name } }
todo := [ ] cachedFolder { { name : cache . Info . Name } }
for i := 0 ; i < flattenLevels ; i ++ {
for i := 0 ; i < flattenLevels ; i ++ {
if dataUsageDebug {
if s . dataUsageCrawl Debug {
logger . Info ( logPrefix + "Level %v, scanning %v directories." + logSuffix , i , len ( todo ) )
logger . Info ( logPrefix + "Level %v, scanning %v directories." + logSuffix , i , len ( todo ) )
}
}
select {
select {
@ -392,9 +401,10 @@ func updateUsage(ctx context.Context, basePath string, cache dataUsageCache, wai
}
}
}
}
if dataUsageDebug {
if s . dataUsageCrawl Debug {
logger . Info ( logPrefix + "New folders: %v" + logSuffix , s . newFolders )
logger . Info ( logPrefix + "New folders: %v" + logSuffix , s . newFolders )
}
}
// Add new folders first
// Add new folders first
for _ , folder := range s . newFolders {
for _ , folder := range s . newFolders {
select {
select {
@ -419,9 +429,10 @@ func updateUsage(ctx context.Context, basePath string, cache dataUsageCache, wai
}
}
}
}
if dataUsageDebug {
if s . dataUsageCrawl Debug {
logger . Info ( logPrefix + "Existing folders: %v" + logSuffix , len ( s . existingFolders ) )
logger . Info ( logPrefix + "Existing folders: %v" + logSuffix , len ( s . existingFolders ) )
}
}
// Do selective scanning of existing folders.
// Do selective scanning of existing folders.
for _ , folder := range s . existingFolders {
for _ , folder := range s . existingFolders {
select {
select {
@ -448,7 +459,7 @@ func updateUsage(ctx context.Context, basePath string, cache dataUsageCache, wai
s . newCache . replaceHashed ( h , folder . parent , * du )
s . newCache . replaceHashed ( h , folder . parent , * du )
}
}
s . newCache . Info . LastUpdate = time . Now ( )
s . newCache . Info . LastUpdate = UTC Now( )
s . newCache . Info . NextCycle ++
s . newCache . Info . NextCycle ++
return s . newCache , nil
return s . newCache , nil
}
}