@ -19,7 +19,9 @@ package cmd
import (
import (
"bytes"
"bytes"
"context"
"context"
"crypto/md5"
"crypto/rand"
"crypto/rand"
"encoding/base64"
"encoding/hex"
"encoding/hex"
"errors"
"errors"
"fmt"
"fmt"
@ -70,7 +72,9 @@ type cacheMeta struct {
// Ranges maps cached range to associated filename.
// Ranges maps cached range to associated filename.
Ranges map [ string ] string ` json:"ranges,omitempty" `
Ranges map [ string ] string ` json:"ranges,omitempty" `
// Hits is a counter on the number of times this object has been accessed so far.
// Hits is a counter on the number of times this object has been accessed so far.
Hits int ` json:"hits,omitempty" `
Hits int ` json:"hits,omitempty" `
Bucket string ` json:"bucket,omitempty" `
Object string ` json:"object,omitempty" `
}
}
// RangeInfo has the range, file and range length information for a cached range.
// RangeInfo has the range, file and range length information for a cached range.
@ -130,15 +134,17 @@ type diskCache struct {
online uint32 // ref: https://golang.org/pkg/sync/atomic/#pkg-note-BUG
online uint32 // ref: https://golang.org/pkg/sync/atomic/#pkg-note-BUG
purgeRunning int32
purgeRunning int32
triggerGC chan struct { }
triggerGC chan struct { }
dir string // caching directory
dir string // caching directory
stats CacheDiskStats // disk cache stats for prometheus
stats CacheDiskStats // disk cache stats for prometheus
quotaPct int // max usage in %
quotaPct int // max usage in %
pool sync . Pool
pool sync . Pool
after int // minimum accesses before an object is cached.
after int // minimum accesses before an object is cached.
lowWatermark int
lowWatermark int
highWatermark int
highWatermark int
enableRange bool
enableRange bool
commitWriteback bool
retryWritebackCh chan ObjectInfo
// nsMutex namespace lock
// nsMutex namespace lock
nsMutex * nsLockMap
nsMutex * nsLockMap
// Object functions pointing to the corresponding functions of backend implementation.
// Object functions pointing to the corresponding functions of backend implementation.
@ -156,15 +162,17 @@ func newDiskCache(ctx context.Context, dir string, config cache.Config) (*diskCa
return nil , fmt . Errorf ( "Unable to initialize '%s' dir, %w" , dir , err )
return nil , fmt . Errorf ( "Unable to initialize '%s' dir, %w" , dir , err )
}
}
cache := diskCache {
cache := diskCache {
dir : dir ,
dir : dir ,
triggerGC : make ( chan struct { } , 1 ) ,
triggerGC : make ( chan struct { } , 1 ) ,
stats : CacheDiskStats { Dir : dir } ,
stats : CacheDiskStats { Dir : dir } ,
quotaPct : quotaPct ,
quotaPct : quotaPct ,
after : config . After ,
after : config . After ,
lowWatermark : config . WatermarkLow ,
lowWatermark : config . WatermarkLow ,
highWatermark : config . WatermarkHigh ,
highWatermark : config . WatermarkHigh ,
enableRange : config . Range ,
enableRange : config . Range ,
online : 1 ,
commitWriteback : config . CommitWriteback ,
retryWritebackCh : make ( chan ObjectInfo , 10000 ) ,
online : 1 ,
pool : sync . Pool {
pool : sync . Pool {
New : func ( ) interface { } {
New : func ( ) interface { } {
b := disk . AlignedBlock ( int ( cacheBlkSize ) )
b := disk . AlignedBlock ( int ( cacheBlkSize ) )
@ -174,6 +182,9 @@ func newDiskCache(ctx context.Context, dir string, config cache.Config) (*diskCa
nsMutex : newNSLock ( false ) ,
nsMutex : newNSLock ( false ) ,
}
}
go cache . purgeWait ( ctx )
go cache . purgeWait ( ctx )
if cache . commitWriteback {
go cache . scanCacheWritebackFailures ( ctx )
}
cache . diskSpaceAvailable ( 0 ) // update if cache usage is already high.
cache . diskSpaceAvailable ( 0 ) // update if cache usage is already high.
cache . NewNSLockFn = func ( ctx context . Context , cachePath string ) RWLocker {
cache . NewNSLockFn = func ( ctx context . Context , cachePath string ) RWLocker {
return cache . nsMutex . NewNSLock ( ctx , nil , cachePath , "" )
return cache . nsMutex . NewNSLock ( ctx , nil , cachePath , "" )
@ -323,6 +334,12 @@ func (c *diskCache) purge(ctx context.Context) {
// stat all cached file ranges and cacheDataFile.
// stat all cached file ranges and cacheDataFile.
cachedFiles := fiStatFn ( meta . Ranges , cacheDataFile , pathJoin ( c . dir , name ) )
cachedFiles := fiStatFn ( meta . Ranges , cacheDataFile , pathJoin ( c . dir , name ) )
objInfo := meta . ToObjectInfo ( "" , "" )
objInfo := meta . ToObjectInfo ( "" , "" )
// prevent gc from clearing un-synced commits. This metadata is present when
// cache writeback commit setting is enabled.
status , ok := objInfo . UserDefined [ writeBackStatusHeader ]
if ok && status != CommitComplete . String ( ) {
return nil
}
cc := cacheControlOpts ( objInfo )
cc := cacheControlOpts ( objInfo )
for fname , fi := range cachedFiles {
for fname , fi := range cachedFiles {
if cc != nil {
if cc != nil {
@ -524,7 +541,11 @@ func (c *diskCache) saveMetadata(ctx context.Context, bucket, object string, met
}
}
defer f . Close ( )
defer f . Close ( )
m := & cacheMeta { Version : cacheMetaVersion }
m := & cacheMeta {
Version : cacheMetaVersion ,
Bucket : bucket ,
Object : object ,
}
if err := jsonLoad ( f , m ) ; err != nil && err != io . EOF {
if err := jsonLoad ( f , m ) ; err != nil && err != io . EOF {
return err
return err
}
}
@ -561,7 +582,6 @@ func (c *diskCache) saveMetadata(ctx context.Context, bucket, object string, met
m . Meta [ "etag" ] = etag
m . Meta [ "etag" ] = etag
}
}
}
}
m . Hits ++
m . Hits ++
m . Checksum = CacheChecksumInfoV1 { Algorithm : HighwayHash256S . String ( ) , Blocksize : cacheBlkSize }
m . Checksum = CacheChecksumInfoV1 { Algorithm : HighwayHash256S . String ( ) , Blocksize : cacheBlkSize }
@ -573,22 +593,22 @@ func getCacheSHADir(dir, bucket, object string) string {
}
}
// Cache data to disk with bitrot checksum added for each block of 1MB
// Cache data to disk with bitrot checksum added for each block of 1MB
func ( c * diskCache ) bitrotWriteToCache ( cachePath , fileName string , reader io . Reader , size uint64 ) ( int64 , error ) {
func ( c * diskCache ) bitrotWriteToCache ( cachePath , fileName string , reader io . Reader , size uint64 ) ( int64 , string , error ) {
if err := os . MkdirAll ( cachePath , 0777 ) ; err != nil {
if err := os . MkdirAll ( cachePath , 0777 ) ; err != nil {
return 0 , err
return 0 , "" , err
}
}
filePath := pathJoin ( cachePath , fileName )
filePath := pathJoin ( cachePath , fileName )
if filePath == "" || reader == nil {
if filePath == "" || reader == nil {
return 0 , errInvalidArgument
return 0 , "" , errInvalidArgument
}
}
if err := checkPathLength ( filePath ) ; err != nil {
if err := checkPathLength ( filePath ) ; err != nil {
return 0 , err
return 0 , "" , err
}
}
f , err := os . Create ( filePath )
f , err := os . Create ( filePath )
if err != nil {
if err != nil {
return 0 , osErrToFileErr ( err )
return 0 , "" , osErrToFileErr ( err )
}
}
defer f . Close ( )
defer f . Close ( )
@ -598,12 +618,12 @@ func (c *diskCache) bitrotWriteToCache(cachePath, fileName string, reader io.Rea
bufp := c . pool . Get ( ) . ( * [ ] byte )
bufp := c . pool . Get ( ) . ( * [ ] byte )
defer c . pool . Put ( bufp )
defer c . pool . Put ( bufp )
md5Hash := md5 . New ( )
var n , n2 int
var n , n2 int
for {
for {
n , err = io . ReadFull ( reader , * bufp )
n , err = io . ReadFull ( reader , * bufp )
if err != nil && err != io . EOF && err != io . ErrUnexpectedEOF {
if err != nil && err != io . EOF && err != io . ErrUnexpectedEOF {
return 0 , err
return 0 , "" , err
}
}
eof := err == io . EOF || err == io . ErrUnexpectedEOF
eof := err == io . EOF || err == io . ErrUnexpectedEOF
if n == 0 && size != 0 {
if n == 0 && size != 0 {
@ -612,21 +632,28 @@ func (c *diskCache) bitrotWriteToCache(cachePath, fileName string, reader io.Rea
}
}
h . Reset ( )
h . Reset ( )
if _ , err = h . Write ( ( * bufp ) [ : n ] ) ; err != nil {
if _ , err = h . Write ( ( * bufp ) [ : n ] ) ; err != nil {
return 0 , err
return 0 , "" , err
}
}
hashBytes := h . Sum ( nil )
hashBytes := h . Sum ( nil )
// compute md5Hash of original data stream if writeback commit to cache
if c . commitWriteback {
if _ , err = md5Hash . Write ( ( * bufp ) [ : n ] ) ; err != nil {
return 0 , "" , err
}
}
if _ , err = f . Write ( hashBytes ) ; err != nil {
if _ , err = f . Write ( hashBytes ) ; err != nil {
return 0 , err
return 0 , "" , err
}
}
if n2 , err = f . Write ( ( * bufp ) [ : n ] ) ; err != nil {
if n2 , err = f . Write ( ( * bufp ) [ : n ] ) ; err != nil {
return 0 , err
return 0 , "" , err
}
}
bytesWritten += int64 ( n2 )
bytesWritten += int64 ( n2 )
if eof {
if eof {
break
break
}
}
}
}
return bytesWritten , nil
return bytesWritten , base64 . StdEncoding . EncodeToString ( md5Hash . Sum ( nil ) ) , nil
}
}
func newCacheEncryptReader ( content io . Reader , bucket , object string , metadata map [ string ] string ) ( r io . Reader , err error ) {
func newCacheEncryptReader ( content io . Reader , bucket , object string , metadata map [ string ] string ) ( r io . Reader , err error ) {
@ -663,41 +690,41 @@ func newCacheEncryptMetadata(bucket, object string, metadata map[string]string)
}
}
// Caches the object to disk
// Caches the object to disk
func ( c * diskCache ) Put ( ctx context . Context , bucket , object string , data io . Reader , size int64 , rs * HTTPRangeSpec , opts ObjectOptions , incHitsOnly bool ) error {
func ( c * diskCache ) Put ( ctx context . Context , bucket , object string , data io . Reader , size int64 , rs * HTTPRangeSpec , opts ObjectOptions , incHitsOnly bool ) ( oi ObjectInfo , err error ) {
if ! c . diskSpaceAvailable ( size ) {
if ! c . diskSpaceAvailable ( size ) {
io . Copy ( ioutil . Discard , data )
io . Copy ( ioutil . Discard , data )
return errDiskFull
return oi , errDiskFull
}
}
cachePath := getCacheSHADir ( c . dir , bucket , object )
cachePath := getCacheSHADir ( c . dir , bucket , object )
cLock := c . NewNSLockFn ( ctx , cachePath )
cLock := c . NewNSLockFn ( ctx , cachePath )
if err := cLock . GetLock ( globalOperationTimeout ) ; err != nil {
if err := cLock . GetLock ( globalOperationTimeout ) ; err != nil {
return err
return oi , err
}
}
defer cLock . Unlock ( )
defer cLock . Unlock ( )
meta , _ , numHits , err := c . statCache ( ctx , cachePath )
meta , _ , numHits , err := c . statCache ( ctx , cachePath )
// Case where object not yet cached
// Case where object not yet cached
if os . IsNotExist ( err ) && c . after >= 1 {
if os . IsNotExist ( err ) && c . after >= 1 {
return c . saveMetadata ( ctx , bucket , object , opts . UserDefined , size , nil , "" , false )
return oi , c . saveMetadata ( ctx , bucket , object , opts . UserDefined , size , nil , "" , false )
}
}
// Case where object already has a cache metadata entry but not yet cached
// Case where object already has a cache metadata entry but not yet cached
if err == nil && numHits < c . after {
if err == nil && numHits < c . after {
cETag := extractETag ( meta . Meta )
cETag := extractETag ( meta . Meta )
bETag := extractETag ( opts . UserDefined )
bETag := extractETag ( opts . UserDefined )
if cETag == bETag {
if cETag == bETag {
return c . saveMetadata ( ctx , bucket , object , opts . UserDefined , size , nil , "" , false )
return oi , c . saveMetadata ( ctx , bucket , object , opts . UserDefined , size , nil , "" , false )
}
}
incHitsOnly = true
incHitsOnly = true
}
}
if rs != nil {
if rs != nil {
return c . putRange ( ctx , bucket , object , data , size , rs , opts )
return oi , c . putRange ( ctx , bucket , object , data , size , rs , opts )
}
}
if ! c . diskSpaceAvailable ( size ) {
if ! c . diskSpaceAvailable ( size ) {
return errDiskFull
return oi , errDiskFull
}
}
if err := os . MkdirAll ( cachePath , 0777 ) ; err != nil {
if err := os . MkdirAll ( cachePath , 0777 ) ; err != nil {
return err
return oi , err
}
}
var metadata = cloneMSS ( opts . UserDefined )
var metadata = cloneMSS ( opts . UserDefined )
var reader = data
var reader = data
@ -705,25 +732,39 @@ func (c *diskCache) Put(ctx context.Context, bucket, object string, data io.Read
if globalCacheKMS != nil {
if globalCacheKMS != nil {
reader , err = newCacheEncryptReader ( data , bucket , object , metadata )
reader , err = newCacheEncryptReader ( data , bucket , object , metadata )
if err != nil {
if err != nil {
return err
return oi , err
}
}
actualSize , _ = sio . EncryptedSize ( uint64 ( size ) )
actualSize , _ = sio . EncryptedSize ( uint64 ( size ) )
}
}
n , err := c . bitrotWriteToCache ( cachePath , cacheDataFile , reader , actualSize )
n , md5sum , err := c . bitrotWriteToCache ( cachePath , cacheDataFile , reader , actualSize )
if IsErr ( err , baseErrs ... ) {
if IsErr ( err , baseErrs ... ) {
// take the cache drive offline
// take the cache drive offline
c . setOffline ( )
c . setOffline ( )
}
}
if err != nil {
if err != nil {
removeAll ( cachePath )
removeAll ( cachePath )
return err
return oi , err
}
}
if actualSize != uint64 ( n ) {
if actualSize != uint64 ( n ) {
removeAll ( cachePath )
removeAll ( cachePath )
return IncompleteBody { Bucket : bucket , Object : object }
return oi , IncompleteBody { Bucket : bucket , Object : object }
}
}
return c . saveMetadata ( ctx , bucket , object , metadata , n , nil , "" , incHitsOnly )
if c . commitWriteback {
metadata [ "content-md5" ] = md5sum
if md5bytes , err := base64 . StdEncoding . DecodeString ( md5sum ) ; err == nil {
metadata [ "etag" ] = hex . EncodeToString ( md5bytes )
}
metadata [ writeBackStatusHeader ] = CommitPending . String ( )
}
return ObjectInfo {
Bucket : bucket ,
Name : object ,
ETag : metadata [ "etag" ] ,
Size : n ,
UserDefined : metadata ,
} ,
c . saveMetadata ( ctx , bucket , object , metadata , n , nil , "" , incHitsOnly )
}
}
// Caches the range to disk
// Caches the range to disk
@ -754,7 +795,7 @@ func (c *diskCache) putRange(ctx context.Context, bucket, object string, data io
}
}
cacheFile := MustGetUUID ( )
cacheFile := MustGetUUID ( )
n , err := c . bitrotWriteToCache ( cachePath , cacheFile , reader , actualSize )
n , _ , err := c . bitrotWriteToCache ( cachePath , cacheFile , reader , actualSize )
if IsErr ( err , baseErrs ... ) {
if IsErr ( err , baseErrs ... ) {
// take the cache drive offline
// take the cache drive offline
c . setOffline ( )
c . setOffline ( )
@ -954,3 +995,36 @@ func (c *diskCache) Exists(ctx context.Context, bucket, object string) bool {
}
}
return true
return true
}
}
// queues writeback upload failures on server startup
func ( c * diskCache ) scanCacheWritebackFailures ( ctx context . Context ) {
defer close ( c . retryWritebackCh )
filterFn := func ( name string , typ os . FileMode ) error {
if name == minioMetaBucket {
// Proceed to next file.
return nil
}
cacheDir := pathJoin ( c . dir , name )
meta , _ , _ , err := c . statCachedMeta ( ctx , cacheDir )
if err != nil {
return nil
}
objInfo := meta . ToObjectInfo ( "" , "" )
status , ok := objInfo . UserDefined [ writeBackStatusHeader ]
if ! ok || status == CommitComplete . String ( ) {
return nil
}
select {
case c . retryWritebackCh <- objInfo :
default :
}
return nil
}
if err := readDirFilterFn ( c . dir , filterFn ) ; err != nil {
logger . LogIf ( ctx , err )
return
}
}