From 4752323e1c08d1099c70fe0e3dc4177091073b51 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jorge=20Israel=20Pe=C3=B1a?= Date: Fri, 24 Jul 2020 11:31:51 -0700 Subject: [PATCH] Use hdfs.Readdir() to optimize HDFS directory listings (#10121) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently, listing directories on HDFS incurs a per-entry remote Stat() call penalty, the cost of which can really blow up on directories with many entries (+1,000) especially when considered in addition to peripheral calls (such as validation) and the fact that minio is an intermediary to the client (whereas other clients listed below can query HDFS directly). Because listing directories this way is expensive, the Golang HDFS library provides the [`Client.Open()`] function which creates a [`FileReader`] that is able to batch multiple calls together through the [`Readdir()`] function. This is substantially more efficient for very large directories. In one case we were witnessing about +20 seconds to list a directory with 1,500 entries, admittedly large, but the Java hdfs ls utility as well as the HDFS library sample ls utility were much faster. Hadoop HDFS DFS (4.02s): λ ~/code/minio → use-readdir » time hdfs dfs -ls /directory/with/1500/entries/ … hdfs dfs -ls 5.81s user 0.49s system 156% cpu 4.020 total Golang HDFS library (0.47s): λ ~/code/hdfs → master » time ./hdfs ls -lh /directory/with/1500/entries/ … ./hdfs ls -lh 0.13s user 0.14s system 56% cpu 0.478 total mc and minio **without** optimization (16.96s): λ ~/code/minio → master » time mc ls myhdfs/directory/with/1500/entries/ … ./mc ls 0.22s user 0.29s system 3% cpu 16.968 total mc and minio **with** optimization (0.40s): λ ~/code/minio → use-readdir » time mc ls myhdfs/directory/with/1500/entries/ … ./mc ls 0.13s user 0.28s system 102% cpu 0.403 total [`Client.Open()`]: https://godoc.org/github.com/colinmarc/hdfs#Client.Open [`FileReader`]: https://godoc.org/github.com/colinmarc/hdfs#FileReader [`Readdir()`]: https://godoc.org/github.com/colinmarc/hdfs#FileReader.Readdir --- cmd/gateway/hdfs/gateway-hdfs.go | 62 ++++++++++++++++++++++++++++---- 1 file changed, 56 insertions(+), 6 deletions(-) diff --git a/cmd/gateway/hdfs/gateway-hdfs.go b/cmd/gateway/hdfs/gateway-hdfs.go index 66924a19a..0416e77ea 100644 --- a/cmd/gateway/hdfs/gateway-hdfs.go +++ b/cmd/gateway/hdfs/gateway-hdfs.go @@ -384,28 +384,78 @@ func (n *hdfsObjects) listDirFactory() minio.ListDirFunc { // ListObjects lists all blobs in HDFS bucket filtered by prefix. func (n *hdfsObjects) ListObjects(ctx context.Context, bucket, prefix, marker, delimiter string, maxKeys int) (loi minio.ListObjectsInfo, err error) { - if _, err := n.clnt.Stat(n.hdfsPathJoin(bucket)); err != nil { + fileInfos := make(map[string]os.FileInfo) + directoryPath := n.hdfsPathJoin(bucket, prefix) + + if err = n.populateDirectoryListing(directoryPath, fileInfos); err != nil { return loi, hdfsToObjectErr(ctx, err, bucket) } getObjectInfo := func(ctx context.Context, bucket, entry string) (minio.ObjectInfo, error) { - fi, err := n.clnt.Stat(n.hdfsPathJoin(bucket, entry)) - if err != nil { - return minio.ObjectInfo{}, hdfsToObjectErr(ctx, err, bucket, entry) + filePath := path.Clean(n.hdfsPathJoin(bucket, entry)) + fi, ok := fileInfos[filePath] + + // If the file info is not known, this may be a recursive listing and filePath is a + // child of a sub-directory. In this case, obtain that sub-directory's listing. + if !ok { + parentPath := path.Dir(filePath) + + if err := n.populateDirectoryListing(parentPath, fileInfos); err != nil { + return minio.ObjectInfo{}, hdfsToObjectErr(ctx, err, bucket) + } + + fi, ok = fileInfos[filePath] + + if !ok { + err = fmt.Errorf("could not get FileInfo for path '%s'", filePath) + return minio.ObjectInfo{}, hdfsToObjectErr(ctx, err, bucket, entry) + } } - return minio.ObjectInfo{ + + objectInfo := minio.ObjectInfo{ Bucket: bucket, Name: entry, ModTime: fi.ModTime(), Size: fi.Size(), IsDir: fi.IsDir(), AccTime: fi.(*hdfs.FileInfo).AccessTime(), - }, nil + } + + delete(fileInfos, filePath) + + return objectInfo, nil } return minio.ListObjects(ctx, n, bucket, prefix, marker, delimiter, maxKeys, n.listPool, n.listDirFactory(), getObjectInfo, getObjectInfo) } +// Lists a path's direct, first-level entries and populates them in the `fileInfos` cache which maps +// a path entry to an `os.FileInfo`. It also saves the listed path's `os.FileInfo` in the cache. +func (n *hdfsObjects) populateDirectoryListing(filePath string, fileInfos map[string]os.FileInfo) error { + dirReader, err := n.clnt.Open(filePath) + + if err != nil { + return err + } + + dirStat := dirReader.Stat() + key := path.Clean(filePath) + + fileInfos[key] = dirStat + infos, err := dirReader.Readdir(0) + + if err != nil { + return err + } + + for _, fileInfo := range infos { + filePath := n.hdfsPathJoin(filePath, fileInfo.Name()) + fileInfos[filePath] = fileInfo + } + + return nil +} + // deleteObject deletes a file path if its empty. If it's successfully deleted, // it will recursively move up the tree, deleting empty parent directories // until it finds one with files in it. Returns nil for a non-empty directory.