select: Check if CSV is valid utf8 (#10991)

Check if first block of data is valid utf8.

Fixes #10970
master
Klaus Post 4 years ago committed by GitHub
parent 7cbca43eb1
commit 02aecb2fc1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 11
      pkg/s3select/csv/errors.go
  2. 17
      pkg/s3select/csv/reader.go

@ -16,6 +16,8 @@
package csv package csv
import "errors"
type s3Error struct { type s3Error struct {
code string code string
message string message string
@ -51,3 +53,12 @@ func errCSVParsingError(err error) *s3Error {
cause: err, cause: err,
} }
} }
func errInvalidTextEncodingError() *s3Error {
return &s3Error{
code: "InvalidTextEncoding",
message: "UTF-8 encoding is required.",
statusCode: 400,
cause: errors.New("invalid utf8 encoding"),
}
}

@ -23,6 +23,7 @@ import (
"io" "io"
"runtime" "runtime"
"sync" "sync"
"unicode/utf8"
csv "github.com/minio/minio/pkg/csvparser" csv "github.com/minio/minio/pkg/csvparser"
"github.com/minio/minio/pkg/s3select/sql" "github.com/minio/minio/pkg/s3select/sql"
@ -159,6 +160,9 @@ func (r *Reader) startReaders(newReader func(io.Reader) *csv.Reader) error {
r.err = err r.err = err
return err return err
} }
if !utf8.Valid(b) {
return errInvalidTextEncodingError()
}
reader := newReader(bytes.NewReader(b)) reader := newReader(bytes.NewReader(b))
record, err := reader.Read() record, err := reader.Read()
if err != nil { if err != nil {
@ -181,6 +185,13 @@ func (r *Reader) startReaders(newReader func(io.Reader) *csv.Reader) error {
return make([]byte, csvSplitSize+1024) return make([]byte, csvSplitSize+1024)
} }
// Return first block
next, nextErr := r.nextSplit(csvSplitSize, r.bufferPool.Get().([]byte))
// Check if first block is valid.
if !utf8.Valid(next) {
return errInvalidTextEncodingError()
}
// Create queue // Create queue
r.queue = make(chan *queueItem, runtime.GOMAXPROCS(0)) r.queue = make(chan *queueItem, runtime.GOMAXPROCS(0))
r.input = make(chan *queueItem, runtime.GOMAXPROCS(0)) r.input = make(chan *queueItem, runtime.GOMAXPROCS(0))
@ -192,11 +203,10 @@ func (r *Reader) startReaders(newReader func(io.Reader) *csv.Reader) error {
defer close(r.queue) defer close(r.queue)
defer r.readerWg.Done() defer r.readerWg.Done()
for { for {
next, err := r.nextSplit(csvSplitSize, r.bufferPool.Get().([]byte))
q := queueItem{ q := queueItem{
input: next, input: next,
dst: make(chan [][]string, 1), dst: make(chan [][]string, 1),
err: err, err: nextErr,
} }
select { select {
case <-r.close: case <-r.close:
@ -209,10 +219,11 @@ func (r *Reader) startReaders(newReader func(io.Reader) *csv.Reader) error {
return return
case r.input <- &q: case r.input <- &q:
} }
if err != nil { if nextErr != nil {
// Exit on any error. // Exit on any error.
return return
} }
next, nextErr = r.nextSplit(csvSplitSize, r.bufferPool.Get().([]byte))
} }
}() }()

Loading…
Cancel
Save