S3 Select: Parsing tweaks (#8261)

* Don't output empty lines.
* Trim whitespace from byte to int/float/bool conversions.
master
Klaus Post 5 years ago committed by Harshavardhana
parent cb01516a26
commit dac1cf5a9a
  1. 10
      pkg/s3select/select.go
  2. 11
      pkg/s3select/sql/statement.go
  3. 22
      pkg/s3select/sql/value.go
  4. 467
      pkg/s3select/sql/value_test.go

@ -470,13 +470,15 @@ func (s3Select *S3Select) Evaluate(w http.ResponseWriter) {
outputRecord = s3Select.outputRecord()
outputQueue[len(outputQueue)-1] = outputRecord
}
if err = s3Select.statement.Eval(inputRecord, outputRecord); err != nil {
break
}
if outputRecord == nil {
var ok bool
ok, err = s3Select.statement.Eval(inputRecord, outputRecord)
if !ok || err != nil {
// This should not be written.
// Remove it from the queue.
outputQueue = outputQueue[:len(outputQueue)-1]
if err != nil {
break
}
continue
}

@ -219,11 +219,12 @@ func (e *SelectStatement) AggregateRow(input Record) error {
// Eval - evaluates the Select statement for the given record. It
// applies only to non-aggregation queries.
func (e *SelectStatement) Eval(input, output Record) error {
// The function returns whether the statement passed the WHERE clause and should be outputted.
func (e *SelectStatement) Eval(input, output Record) (bool, error) {
ok, err := e.isPassingWhereClause(input)
if err != nil || !ok {
// Either error or row did not pass where clause
return err
return false, err
}
if e.selectAST.Expression.All {
@ -234,13 +235,13 @@ func (e *SelectStatement) Eval(input, output Record) error {
if e.limitValue > -1 {
e.outputCount++
}
return output.CopyFrom(input)
return true, output.CopyFrom(input)
}
for i, expr := range e.selectAST.Expression.Expressions {
v, err := expr.evalNode(input)
if err != nil {
return err
return false, err
}
// Pick output column names
@ -258,7 +259,7 @@ func (e *SelectStatement) Eval(input, output Record) error {
e.outputCount++
}
return nil
return true, nil
}
// LimitReached - returns true if the number of records output has

@ -514,7 +514,7 @@ func inferTypeForArithOp(a *Value) error {
return nil
}
err := fmt.Errorf("Could not convert %s to a number", string(a.value.([]byte)))
err := fmt.Errorf("Could not convert %q to a number", string(a.value.([]byte)))
return errInvalidDataType(err)
}
@ -522,29 +522,29 @@ func inferTypeForArithOp(a *Value) error {
// Converts untyped value into int. The bool return implies success -
// it returns false only if there is a conversion failure.
func (v *Value) bytesToInt() (int64, bool) {
func (v Value) bytesToInt() (int64, bool) {
bytes, _ := v.ToBytes()
i, err := strconv.ParseInt(string(bytes), 10, 64)
i, err := strconv.ParseInt(strings.TrimSpace(string(bytes)), 10, 64)
return i, err == nil
}
// Converts untyped value into float. The bool return implies success
// - it returns false only if there is a conversion failure.
func (v *Value) bytesToFloat() (float64, bool) {
func (v Value) bytesToFloat() (float64, bool) {
bytes, _ := v.ToBytes()
i, err := strconv.ParseFloat(string(bytes), 64)
i, err := strconv.ParseFloat(strings.TrimSpace(string(bytes)), 64)
return i, err == nil
}
// Converts untyped value into bool. The second bool return implies
// success - it returns false in case of a conversion failure.
func (v *Value) bytesToBool() (val bool, ok bool) {
func (v Value) bytesToBool() (val bool, ok bool) {
bytes, _ := v.ToBytes()
ok = true
switch strings.ToLower(string(bytes)) {
case "t", "true":
switch strings.ToLower(strings.TrimSpace(string(bytes))) {
case "t", "true", "1":
val = true
case "f", "false":
case "f", "false", "0":
val = false
default:
ok = false
@ -552,8 +552,8 @@ func (v *Value) bytesToBool() (val bool, ok bool) {
return val, ok
}
// bytesToString - never fails
func (v *Value) bytesToString() string {
// bytesToString - never fails, but returns empty string if value is not bytes.
func (v Value) bytesToString() string {
bytes, _ := v.ToBytes()
return string(bytes)
}

@ -19,6 +19,7 @@ package sql
import (
"fmt"
"math"
"strconv"
"testing"
"time"
)
@ -219,3 +220,469 @@ func TestValue_CSVString(t *testing.T) {
})
}
}
func TestValue_bytesToInt(t *testing.T) {
type fields struct {
value interface{}
}
tests := []struct {
name string
fields fields
want int64
wantOK bool
}{
{
name: "zero",
fields: fields{
value: []byte("0"),
},
want: 0,
wantOK: true,
},
{
name: "minuszero",
fields: fields{
value: []byte("-0"),
},
want: 0,
wantOK: true,
},
{
name: "one",
fields: fields{
value: []byte("1"),
},
want: 1,
wantOK: true,
},
{
name: "minusone",
fields: fields{
value: []byte("-1"),
},
want: -1,
wantOK: true,
},
{
name: "plusone",
fields: fields{
value: []byte("+1"),
},
want: 1,
wantOK: true,
},
{
name: "max",
fields: fields{
value: []byte(strconv.FormatInt(math.MaxInt64, 10)),
},
want: math.MaxInt64,
wantOK: true,
},
{
name: "min",
fields: fields{
value: []byte(strconv.FormatInt(math.MinInt64, 10)),
},
want: math.MinInt64,
wantOK: true,
},
{
name: "max-overflow",
fields: fields{
value: []byte("9223372036854775808"),
},
// Seems to be what strconv.ParseInt returns
want: math.MaxInt64,
wantOK: false,
},
{
name: "min-underflow",
fields: fields{
value: []byte("-9223372036854775809"),
},
// Seems to be what strconv.ParseInt returns
want: math.MinInt64,
wantOK: false,
},
{
name: "zerospace",
fields: fields{
value: []byte(" 0"),
},
want: 0,
wantOK: true,
},
{
name: "onespace",
fields: fields{
value: []byte("1 "),
},
want: 1,
wantOK: true,
},
{
name: "minusonespace",
fields: fields{
value: []byte(" -1 "),
},
want: -1,
wantOK: true,
},
{
name: "plusonespace",
fields: fields{
value: []byte("\t+1\t"),
},
want: 1,
wantOK: true,
},
{
name: "scientific",
fields: fields{
value: []byte("3e5"),
},
want: 0,
wantOK: false,
},
{
// No support for prefixes
name: "hex",
fields: fields{
value: []byte("0xff"),
},
want: 0,
wantOK: false,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
v := &Value{
value: tt.fields.value,
}
got, got1 := v.bytesToInt()
if got != tt.want {
t.Errorf("bytesToInt() got = %v, want %v", got, tt.want)
}
if got1 != tt.wantOK {
t.Errorf("bytesToInt() got1 = %v, want %v", got1, tt.wantOK)
}
})
}
}
func TestValue_bytesToFloat(t *testing.T) {
type fields struct {
value interface{}
}
tests := []struct {
name string
fields fields
want float64
wantOK bool
}{
// Copied from TestValue_bytesToInt.
{
name: "zero",
fields: fields{
value: []byte("0"),
},
want: 0,
wantOK: true,
},
{
name: "minuszero",
fields: fields{
value: []byte("-0"),
},
want: 0,
wantOK: true,
},
{
name: "one",
fields: fields{
value: []byte("1"),
},
want: 1,
wantOK: true,
},
{
name: "minusone",
fields: fields{
value: []byte("-1"),
},
want: -1,
wantOK: true,
},
{
name: "plusone",
fields: fields{
value: []byte("+1"),
},
want: 1,
wantOK: true,
},
{
name: "maxint",
fields: fields{
value: []byte(strconv.FormatInt(math.MaxInt64, 10)),
},
want: math.MaxInt64,
wantOK: true,
},
{
name: "minint",
fields: fields{
value: []byte(strconv.FormatInt(math.MinInt64, 10)),
},
want: math.MinInt64,
wantOK: true,
},
{
name: "max-overflow-int",
fields: fields{
value: []byte("9223372036854775808"),
},
// Seems to be what strconv.ParseInt returns
want: math.MaxInt64,
wantOK: true,
},
{
name: "min-underflow-int",
fields: fields{
value: []byte("-9223372036854775809"),
},
// Seems to be what strconv.ParseInt returns
want: math.MinInt64,
wantOK: true,
},
{
name: "max",
fields: fields{
value: []byte(strconv.FormatFloat(math.MaxFloat64, 'g', -1, 64)),
},
want: math.MaxFloat64,
wantOK: true,
},
{
name: "min",
fields: fields{
value: []byte(strconv.FormatFloat(-math.MaxFloat64, 'g', -1, 64)),
},
want: -math.MaxFloat64,
wantOK: true,
},
{
name: "max-overflow",
fields: fields{
value: []byte("1.797693134862315708145274237317043567981e+309"),
},
// Seems to be what strconv.ParseInt returns
want: math.Inf(1),
wantOK: false,
},
{
name: "min-underflow",
fields: fields{
value: []byte("-1.797693134862315708145274237317043567981e+309"),
},
// Seems to be what strconv.ParseInt returns
want: math.Inf(-1),
wantOK: false,
},
{
name: "smallest-pos",
fields: fields{
value: []byte(strconv.FormatFloat(math.SmallestNonzeroFloat64, 'g', -1, 64)),
},
want: math.SmallestNonzeroFloat64,
wantOK: true,
},
{
name: "smallest-pos",
fields: fields{
value: []byte(strconv.FormatFloat(-math.SmallestNonzeroFloat64, 'g', -1, 64)),
},
want: -math.SmallestNonzeroFloat64,
wantOK: true,
},
{
name: "zerospace",
fields: fields{
value: []byte(" 0"),
},
want: 0,
wantOK: true,
},
{
name: "onespace",
fields: fields{
value: []byte("1 "),
},
want: 1,
wantOK: true,
},
{
name: "minusonespace",
fields: fields{
value: []byte(" -1 "),
},
want: -1,
wantOK: true,
},
{
name: "plusonespace",
fields: fields{
value: []byte("\t+1\t"),
},
want: 1,
wantOK: true,
},
{
name: "scientific",
fields: fields{
value: []byte("3e5"),
},
want: 300000,
wantOK: true,
},
{
// No support for prefixes
name: "hex",
fields: fields{
value: []byte("0xff"),
},
want: 0,
wantOK: false,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
v := Value{
value: tt.fields.value,
}
got, got1 := v.bytesToFloat()
if got != tt.want {
t.Errorf("bytesToFloat() got = %v, want %v", got, tt.want)
}
if got1 != tt.wantOK {
t.Errorf("bytesToFloat() got1 = %v, want %v", got1, tt.wantOK)
}
})
}
}
func TestValue_bytesToBool(t *testing.T) {
type fields struct {
value interface{}
}
tests := []struct {
name string
fields fields
wantVal bool
wantOk bool
}{
{
name: "true",
fields: fields{
value: []byte("true"),
},
wantVal: true,
wantOk: true,
},
{
name: "false",
fields: fields{
value: []byte("false"),
},
wantVal: false,
wantOk: true,
},
{
name: "t",
fields: fields{
value: []byte("t"),
},
wantVal: true,
wantOk: true,
},
{
name: "f",
fields: fields{
value: []byte("f"),
},
wantVal: false,
wantOk: true,
},
{
name: "1",
fields: fields{
value: []byte("1"),
},
wantVal: true,
wantOk: true,
},
{
name: "0",
fields: fields{
value: []byte("0"),
},
wantVal: false,
wantOk: true,
},
{
name: "truespace",
fields: fields{
value: []byte(" true "),
},
wantVal: true,
wantOk: true,
},
{
name: "truetabs",
fields: fields{
value: []byte("\ttrue\t"),
},
wantVal: true,
wantOk: true,
},
{
name: "TRUE",
fields: fields{
value: []byte("TRUE"),
},
wantVal: true,
wantOk: true,
},
{
name: "FALSE",
fields: fields{
value: []byte("FALSE"),
},
wantVal: false,
wantOk: true,
},
{
name: "invalid",
fields: fields{
value: []byte("no"),
},
wantVal: false,
wantOk: false,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
v := Value{
value: tt.fields.value,
}
gotVal, gotOk := v.bytesToBool()
if gotVal != tt.wantVal {
t.Errorf("bytesToBool() gotVal = %v, want %v", gotVal, tt.wantVal)
}
if gotOk != tt.wantOk {
t.Errorf("bytesToBool() gotOk = %v, want %v", gotOk, tt.wantOk)
}
})
}
}

Loading…
Cancel
Save