From f53d1de87fc56ab0c463722b7f38de7a5de19d91 Mon Sep 17 00:00:00 2001 From: Harshavardhana Date: Wed, 10 Feb 2021 08:49:48 -0800 Subject: [PATCH] fix: missing data on multiple columns reading parquet (#11499) fixes #11413 --- pkg/s3select/internal/parquet-go/page.go | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/pkg/s3select/internal/parquet-go/page.go b/pkg/s3select/internal/parquet-go/page.go index 22c25793e..22f89fe80 100644 --- a/pkg/s3select/internal/parquet-go/page.go +++ b/pkg/s3select/internal/parquet-go/page.go @@ -21,6 +21,7 @@ import ( "context" "errors" "fmt" + "io" "math" "strings" @@ -108,24 +109,39 @@ func readPage( } repLevelsLen = pageHeader.DataPageHeaderV2.GetRepetitionLevelsByteLength() repLevelsBuf = make([]byte, repLevelsLen) - if _, err = thriftReader.Read(repLevelsBuf); err != nil { + + n, err := io.ReadFull(thriftReader, repLevelsBuf) + if err != nil { return nil, err } + if n != int(repLevelsLen) { + return nil, fmt.Errorf("expected parquet header repetition levels %d, got %d", repLevelsLen, n) + } defLevelsLen = pageHeader.DataPageHeaderV2.GetDefinitionLevelsByteLength() defLevelsBuf = make([]byte, defLevelsLen) - if _, err = thriftReader.Read(defLevelsBuf); err != nil { + + n, err = io.ReadFull(thriftReader, defLevelsBuf) + if err != nil { return nil, err } + if n != int(defLevelsLen) { + return nil, fmt.Errorf("expected parquet header definition levels %d, got %d", defLevelsLen, n) + } } dbLen := pageHeader.GetCompressedPageSize() - repLevelsLen - defLevelsLen if dbLen < 0 { return nil, errors.New("parquet: negative data length") } + dataBuf := make([]byte, dbLen) - if _, err = thriftReader.Read(dataBuf); err != nil { + n, err := io.ReadFull(thriftReader, dataBuf) + if err != nil { return nil, err } + if n != int(dbLen) { + return nil, fmt.Errorf("expected parquet data buffer %d, got %d", dbLen, n) + } if dataBuf, err = compressionCodec(metadata.GetCodec()).uncompress(dataBuf); err != nil { return nil, err