minio/pkg/s3select/sql/value.go

/*
 * Minio Cloud Storage, (C) 2019 Minio, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package sql

import (
	"errors"
	"fmt"
	"math"
	"strconv"
	"strings"
	"time"
)

var (
	errArithMismatchedTypes = errors.New("cannot perform arithmetic on mismatched types")
	errArithInvalidOperator = errors.New("invalid arithmetic operator")
	errArithDivideByZero    = errors.New("cannot divide by 0")

	errCmpMismatchedTypes     = errors.New("cannot compare values of different types")
	errCmpInvalidBoolOperator = errors.New("invalid comparison operator for boolean arguments")
)

// vType represents the concrete type of a `Value`
type vType int

// Valid values for Type
const (
	typeNull vType = iota + 1
	typeBool
	typeString

	// 64-bit signed integer
	typeInt

	// 64-bit floating point
	typeFloat

	// timestamp type
	typeTimestamp

	// This type refers to untyped values, e.g. as read from CSV
	typeBytes
)

// Value represents a value of restricted type reduced from an
// expression represented by an ASTNode. Only one of the fields is
// non-nil.
//
// In cases where we are fetching data from a data source (like csv),
// the type may not be determined yet. In these cases, a byte-slice is
// used.
type Value struct {
	value interface{}
	vType vType
}

// GetTypeString returns a string representation for vType
func (v *Value) GetTypeString() string {
	switch v.vType {
	case typeNull:
		return "NULL"
	case typeBool:
		return "BOOL"
	case typeString:
		return "STRING"
	case typeInt:
		return "INT"
	case typeFloat:
		return "FLOAT"
	case typeTimestamp:
		return "TIMESTAMP"
	case typeBytes:
		return "BYTES"
	}
	return "--"
}

// Repr returns a string representation of value.
func (v *Value) Repr() string {
	switch v.vType {
	case typeNull:
		return ":NULL"
	case typeBool, typeInt, typeFloat:
		return fmt.Sprintf("%v:%s", v.value, v.GetTypeString())
	case typeTimestamp:
		return fmt.Sprintf("%s:TIMESTAMP", v.value.(*time.Time))
	case typeString:
		return fmt.Sprintf("\"%s\":%s", v.value.(string), v.GetTypeString())
	case typeBytes:
		return fmt.Sprintf("\"%s\":BYTES", string(v.value.([]byte)))
	default:
		return fmt.Sprintf("%v:INVALID", v.value)
	}
}

// FromFloat creates a Value from a number
func FromFloat(f float64) *Value {
	return &Value{value: f, vType: typeFloat}
}

// FromInt creates a Value from an int
func FromInt(f int64) *Value {
	return &Value{value: f, vType: typeInt}
}

// FromString creates a Value from a string
func FromString(str string) *Value {
	return &Value{value: str, vType: typeString}
}

// FromBool creates a Value from a bool
func FromBool(b bool) *Value {
	return &Value{value: b, vType: typeBool}
}

// FromTimestamp creates a Value from a timestamp
func FromTimestamp(t time.Time) *Value {
	return &Value{value: t, vType: typeTimestamp}
}

// FromNull creates a Value with Null value
func FromNull() *Value {
	return &Value{vType: typeNull}
}

// FromBytes creates a Value from a []byte
func FromBytes(b []byte) *Value {
	return &Value{value: b, vType: typeBytes}
}

// ToFloat works for int and float values
func (v *Value) ToFloat() (val float64, ok bool) {
	switch v.vType {
	case typeFloat:
		val, ok = v.value.(float64)
	case typeInt:
		var i int64
		i, ok = v.value.(int64)
		val = float64(i)
	default:
	}
	return
}

// ToInt converts value to int.
func (v *Value) ToInt() (val int64, ok bool) {
	switch v.vType {
	case typeInt:
		val, ok = v.value.(int64)
	default:
	}
	return
}

// ToString converts value to string.
func (v *Value) ToString() (val string, ok bool) {
	switch v.vType {
	case typeString:
		val, ok = v.value.(string)
	default:
	}
	return
}

// ToBool returns the bool value; second return value refers to if the bool
// conversion succeeded.
func (v *Value) ToBool() (val bool, ok bool) {
	switch v.vType {
	case typeBool:
		return v.value.(bool), true
	}
	return false, false
}

// ToTimestamp returns the timestamp value if present.
func (v *Value) ToTimestamp() (t time.Time, ok bool) {
	switch v.vType {
	case typeTimestamp:
		return v.value.(time.Time), true
	}
	return t, false
}

// ToBytes converts Value to byte-slice.
func (v *Value) ToBytes() ([]byte, bool) {
	switch v.vType {
	case typeBytes:
		return v.value.([]byte), true
	}
	return nil, false
}

// IsNull - checks if value is missing.
func (v *Value) IsNull() bool {
	return v.vType == typeNull
}

func (v *Value) isNumeric() bool {
	return v.vType == typeInt || v.vType == typeFloat
}

// setters used internally to mutate values

func (v *Value) setInt(i int64) {
	v.vType = typeInt
	v.value = i
}

func (v *Value) setFloat(f float64) {
	v.vType = typeFloat
	v.value = f
}

func (v *Value) setString(s string) {
	v.vType = typeString
	v.value = s
}

func (v *Value) setBool(b bool) {
	v.vType = typeBool
	v.value = b
}

func (v *Value) setTimestamp(t time.Time) {
	v.vType = typeTimestamp
	v.value = t
}

// CSVString - convert to string for CSV serialization
func (v *Value) CSVString() string {
	switch v.vType {
	case typeNull:
		return ""
	case typeBool:
		return fmt.Sprintf("%v", v.value.(bool))
	case typeString:
		return fmt.Sprintf("%s", v.value.(string))
	case typeInt:
		return fmt.Sprintf("%v", v.value.(int64))
	case typeFloat:
		return fmt.Sprintf("%v", v.value.(float64))
	case typeTimestamp:
		return FormatSQLTimestamp(v.value.(time.Time))
	case typeBytes:
		return fmt.Sprintf("%v", string(v.value.([]byte)))
	default:
		return "CSV serialization not implemented for this type"
	}
}

// floatToValue converts a float into int representation if needed.
func floatToValue(f float64) *Value {
	intPart, fracPart := math.Modf(f)
	if fracPart == 0 {
		return FromInt(int64(intPart))
	}
	return FromFloat(f)
}

// negate negates a numeric value
func (v *Value) negate() {
	switch v.vType {
	case typeFloat:
		v.value = -(v.value.(float64))
	case typeInt:
		v.value = -(v.value.(int64))
	}
}

// Value comparison functions: we do not expose them outside the
// module. Logical operators "<", ">", ">=", "<=" work on strings and
// numbers. Equality operators "=", "!=" work on strings,
// numbers and booleans.

// Supported comparison operators
const (
	opLt   = "<"
	opLte  = "<="
	opGt   = ">"
	opGte  = ">="
	opEq   = "="
	opIneq = "!="
)

// When numeric types are compared, type promotions could happen. If
// values do not have types (e.g. when reading from CSV), for
// comparison operations, automatic type conversion happens by trying
// to check if the value is a number (first an integer, then a float),
// and falling back to string.
func (v *Value) compareOp(op string, a *Value) (res bool, err error) {
	if !isValidComparisonOperator(op) {
		return false, errArithInvalidOperator
	}

	// Check if type conversion/inference is needed - it is needed
	// if the Value is a byte-slice.
	err = inferTypesForCmp(v, a)
	if err != nil {
		return false, err
	}

	isNumeric := v.isNumeric() && a.isNumeric()
	if isNumeric {
		intV, ok1i := v.ToInt()
		intA, ok2i := a.ToInt()
		if ok1i && ok2i {
			return intCompare(op, intV, intA), nil
		}

		// If both values are numeric, then at least one is
		// float since we got here, so we convert.
		flV, _ := v.ToFloat()
		flA, _ := a.ToFloat()
		return floatCompare(op, flV, flA), nil
	}

	strV, ok1s := v.ToString()
	strA, ok2s := a.ToString()
	if ok1s && ok2s {
		return stringCompare(op, strV, strA), nil
	}

	boolV, ok1b := v.ToBool()
	boolA, ok2b := v.ToBool()
	if ok1b && ok2b {
		return boolCompare(op, boolV, boolA)
	}

	return false, errCmpMismatchedTypes
}

func inferTypesForCmp(a *Value, b *Value) error {
	_, okA := a.ToBytes()
	_, okB := b.ToBytes()
	switch {
	case !okA && !okB:
		// Both Values already have types
		return nil

	case okA && okB:
		// Both Values are untyped so try the types in order:
		// int, float, bool, string

		// Check for numeric inference
		iA, okAi := a.bytesToInt()
		iB, okBi := b.bytesToInt()
		if okAi && okBi {
			a.setInt(iA)
			b.setInt(iB)
			return nil
		}

		fA, okAf := a.bytesToFloat()
		fB, okBf := b.bytesToFloat()
		if okAf && okBf {
			a.setFloat(fA)
			b.setFloat(fB)
			return nil
		}

		// Check if they int and float combination.
		if okAi && okBf {
			a.setInt(iA)
			b.setFloat(fA)
			return nil
		}
		if okBi && okAf {
			a.setFloat(fA)
			b.setInt(iB)
			return nil
		}

		// Not numeric types at this point.

		// Check for bool inference
		bA, okAb := a.bytesToBool()
		bB, okBb := b.bytesToBool()
		if okAb && okBb {
			a.setBool(bA)
			b.setBool(bB)
			return nil
		}

		// Fallback to string
		sA := a.bytesToString()
		sB := b.bytesToString()
		a.setString(sA)
		b.setString(sB)
		return nil

	case okA && !okB:
		// Here a has `a` is untyped, but `b` has a fixed
		// type.
		switch b.vType {
		case typeString:
			s := a.bytesToString()
			a.setString(s)

		case typeInt, typeFloat:
			if iA, ok := a.bytesToInt(); ok {
				a.setInt(iA)
			} else if fA, ok := a.bytesToFloat(); ok {
				a.setFloat(fA)
			} else {
				return fmt.Errorf("Could not convert %s to a number", string(a.value.([]byte)))
			}

		case typeBool:
			if bA, ok := a.bytesToBool(); ok {
				a.setBool(bA)
			} else {
				return fmt.Errorf("Could not convert %s to a boolean", string(a.value.([]byte)))
			}

		default:
			return errCmpMismatchedTypes
		}
		return nil

	case !okA && okB:
		// swap arguments to avoid repeating code
		return inferTypesForCmp(b, a)

	default:
		// Does not happen
		return nil
	}
}

// Value arithmetic functions: we do not expose them outside the
// module. All arithmetic works only on numeric values with automatic
// promotion to the "larger" type that can represent the value. TODO:
// Add support for large number arithmetic.

// Supported arithmetic operators
const (
	opPlus     = "+"
	opMinus    = "-"
	opDivide   = "/"
	opMultiply = "*"
	opModulo   = "%"
)

// For arithmetic operations, if both values are numeric then the
// operation shall succeed. If the types are unknown automatic type
// conversion to a number is attempted.
func (v *Value) arithOp(op string, a *Value) error {
	err := inferTypeForArithOp(v)
	if err != nil {
		return err
	}

	err = inferTypeForArithOp(a)
	if err != nil {
		return err
	}

	if !v.isNumeric() || !a.isNumeric() {
		return errInvalidDataType(errArithMismatchedTypes)
	}

	if !isValidArithOperator(op) {
		return errInvalidDataType(errArithMismatchedTypes)
	}

	intV, ok1i := v.ToInt()
	intA, ok2i := a.ToInt()
	switch {
	case ok1i && ok2i:
		res, err := intArithOp(op, intV, intA)
		v.setInt(res)
		return err

	default:
		// Convert arguments to float
		flV, _ := v.ToFloat()
		flA, _ := a.ToFloat()
		res, err := floatArithOp(op, flV, flA)
		v.setFloat(res)
		return err
	}
}

func inferTypeForArithOp(a *Value) error {
	if _, ok := a.ToBytes(); !ok {
		return nil
	}

	if i, ok := a.bytesToInt(); ok {
		a.setInt(i)
		return nil
	}

	if f, ok := a.bytesToFloat(); ok {
		a.setFloat(f)
		return nil
	}

	err := fmt.Errorf("Could not convert %s to a number", string(a.value.([]byte)))
	return errInvalidDataType(err)
}

// All the bytesTo* functions defined below assume the value is a byte-slice.

// Converts untyped value into int. The bool return implies success -
// it returns false only if there is a conversion failure.
func (v *Value) bytesToInt() (int64, bool) {
	bytes, _ := v.ToBytes()
	i, err := strconv.ParseInt(string(bytes), 10, 64)
	return i, err == nil
}

// Converts untyped value into float. The bool return implies success
// - it returns false only if there is a conversion failure.
func (v *Value) bytesToFloat() (float64, bool) {
	bytes, _ := v.ToBytes()
	i, err := strconv.ParseFloat(string(bytes), 64)
	return i, err == nil
}

// Converts untyped value into bool. The second bool return implies
// success - it returns false in case of a conversion failure.
func (v *Value) bytesToBool() (val bool, ok bool) {
	bytes, _ := v.ToBytes()
	ok = true
	switch strings.ToLower(string(bytes)) {
	case "t", "true":
		val = true
	case "f", "false":
		val = false
	default:
		ok = false
	}
	return val, ok
}

// bytesToString - never fails
func (v *Value) bytesToString() string {
	bytes, _ := v.ToBytes()
	return string(bytes)
}

// Calculates minimum or maximum of v and a and assigns the result to
// v - it works only on numeric arguments, where `v` is already
// assumed to be numeric. Attempts conversion to numeric type for `a`
// (first int, then float) only if the underlying values do not have a
// type.
func (v *Value) minmax(a *Value, isMax, isFirstRow bool) error {
	err := inferTypeForArithOp(a)
	if err != nil {
		return err
	}

	if !a.isNumeric() {
		return errArithMismatchedTypes
	}

	// In case of first row, set v to a.
	if isFirstRow {
		intA, okI := a.ToInt()
		if okI {
			v.setInt(intA)
			return nil
		}
		floatA, _ := a.ToFloat()
		v.setFloat(floatA)
		return nil
	}

	intV, ok1i := v.ToInt()
	intA, ok2i := a.ToInt()
	if ok1i && ok2i {
		result := intV
		if !isMax {
			if intA < result {
				result = intA
			}
		} else {
			if intA > result {
				result = intA
			}
		}
		v.setInt(result)
		return nil
	}

	floatV, _ := v.ToFloat()
	floatA, _ := a.ToFloat()
	var result float64
	if !isMax {
		result = math.Min(floatV, floatA)
	} else {
		result = math.Max(floatV, floatA)
	}
	v.setFloat(result)
	return nil
}

func inferTypeAsTimestamp(v *Value) {
	if s, ok := v.ToString(); ok {
		t, err := parseSQLTimestamp(s)
		if err != nil {
			return
		}
		v.setTimestamp(t)
	} else if b, ok := v.ToBytes(); ok {
		s := string(b)
		t, err := parseSQLTimestamp(s)
		if err != nil {
			return
		}
		v.setTimestamp(t)
	}
	return
}

// inferTypeAsString is used to convert untyped values to string - it
// is called when the caller requires a string context to proceed.
func inferTypeAsString(v *Value) {
	b, ok := v.ToBytes()
	if !ok {
		return
	}

	v.setString(string(b))
}

func isValidComparisonOperator(op string) bool {
	switch op {
	case opLt:
	case opLte:
	case opGt:
	case opGte:
	case opEq:
	case opIneq:
	default:
		return false
	}
	return true
}

func intCompare(op string, left, right int64) bool {
	switch op {
	case opLt:
		return left < right
	case opLte:
		return left <= right
	case opGt:
		return left > right
	case opGte:
		return left >= right
	case opEq:
		return left == right
	case opIneq:
		return left != right
	}
	// This case does not happen
	return false
}

func floatCompare(op string, left, right float64) bool {
	switch op {
	case opLt:
		return left < right
	case opLte:
		return left <= right
	case opGt:
		return left > right
	case opGte:
		return left >= right
	case opEq:
		return left == right
	case opIneq:
		return left != right
	}
	// This case does not happen
	return false
}

func stringCompare(op string, left, right string) bool {
	switch op {
	case opLt:
		return left < right
	case opLte:
		return left <= right
	case opGt:
		return left > right
	case opGte:
		return left >= right
	case opEq:
		return left == right
	case opIneq:
		return left != right
	}
	// This case does not happen
	return false
}

func boolCompare(op string, left, right bool) (bool, error) {
	switch op {
	case opEq:
		return left == right, nil
	case opIneq:
		return left != right, nil
	default:
		return false, errCmpInvalidBoolOperator
	}
}

func isValidArithOperator(op string) bool {
	switch op {
	case opPlus:
	case opMinus:
	case opDivide:
	case opMultiply:
	case opModulo:
	default:
		return false
	}
	return true
}

// Overflow errors are ignored.
func intArithOp(op string, left, right int64) (int64, error) {
	switch op {
	case opPlus:
		return left + right, nil
	case opMinus:
		return left - right, nil
	case opDivide:
		if right == 0 {
			return 0, errArithDivideByZero
		}
		return left / right, nil
	case opMultiply:
		return left * right, nil
	case opModulo:
		if right == 0 {
			return 0, errArithDivideByZero
		}
		return left % right, nil
	}
	// This does not happen
	return 0, nil
}

// Overflow errors are ignored.
func floatArithOp(op string, left, right float64) (float64, error) {
	switch op {
	case opPlus:
		return left + right, nil
	case opMinus:
		return left - right, nil
	case opDivide:
		if right == 0 {
			return 0, errArithDivideByZero
		}
		return left / right, nil
	case opMultiply:
		return left * right, nil
	case opModulo:
		if right == 0 {
			return 0, errArithDivideByZero
		}
		return math.Mod(left, right), nil
	}
	// This does not happen
	return 0, nil
}