|
|
|
/*
|
|
|
|
* Minio Cloud Storage, (C) 2018 Minio, Inc.
|
|
|
|
*
|
|
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
* you may not use this file except in compliance with the License.
|
|
|
|
* You may obtain a copy of the License at
|
|
|
|
*
|
|
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
*
|
|
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
* See the License for the specific language governing permissions and
|
|
|
|
* limitations under the License.
|
|
|
|
*/
|
|
|
|
|
|
|
|
package s3select
|
|
|
|
|
|
|
|
import (
|
|
|
|
"math"
|
|
|
|
"strconv"
|
|
|
|
"strings"
|
|
|
|
|
|
|
|
"github.com/xwb1989/sqlparser"
|
|
|
|
)
|
|
|
|
|
|
|
|
// SelectFuncs contains the relevant values from the parser for S3 Select
|
|
|
|
// Functions
|
|
|
|
type SelectFuncs struct {
|
|
|
|
funcExpr []*sqlparser.FuncExpr
|
|
|
|
index []int
|
|
|
|
}
|
|
|
|
|
|
|
|
// RunSqlParser allows us to easily bundle all the functions from above and run
|
|
|
|
// them in the appropriate order.
|
|
|
|
func (reader *Input) runSelectParser(selectExpression string, myRow chan *Row) {
|
|
|
|
reqCols, alias, myLimit, whereClause, aggFunctionNames, myFuncs, myErr := reader.ParseSelect(selectExpression)
|
|
|
|
if myErr != nil {
|
|
|
|
rowStruct := &Row{
|
|
|
|
err: myErr,
|
|
|
|
}
|
|
|
|
myRow <- rowStruct
|
|
|
|
return
|
|
|
|
}
|
|
|
|
reader.processSelectReq(reqCols, alias, whereClause, myLimit, aggFunctionNames, myRow, myFuncs)
|
|
|
|
}
|
|
|
|
|
|
|
|
// ParseSelect parses the SELECT expression, and effectively tokenizes it into
|
|
|
|
// its separate parts. It returns the requested column names,alias,limit of
|
|
|
|
// records, and the where clause.
|
|
|
|
func (reader *Input) ParseSelect(sqlInput string) ([]string, string, int64, interface{}, []string, *SelectFuncs, error) {
|
|
|
|
// return columnNames, alias, limitOfRecords, whereclause,coalStore, nil
|
|
|
|
|
|
|
|
stmt, err := sqlparser.Parse(sqlInput)
|
|
|
|
var whereClause interface{}
|
|
|
|
var alias string
|
|
|
|
var limit int64
|
|
|
|
myFuncs := &SelectFuncs{}
|
|
|
|
// TODO Maybe can parse their errors a bit to return some more of the s3 errors
|
|
|
|
if err != nil {
|
|
|
|
return nil, "", 0, nil, nil, nil, ErrLexerInvalidChar
|
|
|
|
}
|
|
|
|
switch stmt := stmt.(type) {
|
|
|
|
case *sqlparser.Select:
|
|
|
|
// evaluates the where clause
|
|
|
|
functionNames := make([]string, len(stmt.SelectExprs))
|
|
|
|
columnNames := make([]string, len(stmt.SelectExprs))
|
|
|
|
|
|
|
|
if stmt.Where != nil {
|
|
|
|
switch expr := stmt.Where.Expr.(type) {
|
|
|
|
default:
|
|
|
|
whereClause = expr
|
|
|
|
case *sqlparser.ComparisonExpr:
|
|
|
|
whereClause = expr
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if stmt.SelectExprs != nil {
|
|
|
|
for i := 0; i < len(stmt.SelectExprs); i++ {
|
|
|
|
switch expr := stmt.SelectExprs[i].(type) {
|
|
|
|
case *sqlparser.StarExpr:
|
|
|
|
columnNames[0] = "*"
|
|
|
|
case *sqlparser.AliasedExpr:
|
|
|
|
switch smallerexpr := expr.Expr.(type) {
|
|
|
|
case *sqlparser.FuncExpr:
|
|
|
|
if smallerexpr.IsAggregate() {
|
|
|
|
functionNames[i] = smallerexpr.Name.CompliantName()
|
|
|
|
// Will return function name
|
|
|
|
// Case to deal with if we have functions and not an asterix
|
|
|
|
switch tempagg := smallerexpr.Exprs[0].(type) {
|
|
|
|
case *sqlparser.StarExpr:
|
|
|
|
columnNames[0] = "*"
|
|
|
|
if smallerexpr.Name.CompliantName() != "count" {
|
|
|
|
return nil, "", 0, nil, nil, nil, ErrParseUnsupportedCallWithStar
|
|
|
|
}
|
|
|
|
case *sqlparser.AliasedExpr:
|
|
|
|
switch col := tempagg.Expr.(type) {
|
|
|
|
case *sqlparser.BinaryExpr:
|
|
|
|
return nil, "", 0, nil, nil, nil, ErrParseNonUnaryAgregateFunctionCall
|
|
|
|
case *sqlparser.ColName:
|
|
|
|
columnNames[i] = col.Name.CompliantName()
|
|
|
|
}
|
|
|
|
}
|
|
|
|
// Case to deal with if COALESCE was used..
|
|
|
|
} else if supportedFunc(smallerexpr.Name.CompliantName()) {
|
|
|
|
if myFuncs.funcExpr == nil {
|
|
|
|
myFuncs.funcExpr = make([]*sqlparser.FuncExpr, len(stmt.SelectExprs))
|
|
|
|
myFuncs.index = make([]int, len(stmt.SelectExprs))
|
|
|
|
}
|
|
|
|
myFuncs.funcExpr[i] = smallerexpr
|
|
|
|
myFuncs.index[i] = i
|
|
|
|
} else {
|
|
|
|
return nil, "", 0, nil, nil, nil, ErrUnsupportedSQLOperation
|
|
|
|
}
|
|
|
|
case *sqlparser.ColName:
|
|
|
|
columnNames[i] = smallerexpr.Name.CompliantName()
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// This code retrieves the alias and makes sure it is set to the correct
|
|
|
|
// value, if not it sets it to the tablename
|
|
|
|
if (stmt.From) != nil {
|
|
|
|
for i := 0; i < len(stmt.From); i++ {
|
|
|
|
switch smallerexpr := stmt.From[i].(type) {
|
|
|
|
case *sqlparser.JoinTableExpr:
|
|
|
|
return nil, "", 0, nil, nil, nil, ErrParseMalformedJoin
|
|
|
|
case *sqlparser.AliasedTableExpr:
|
|
|
|
alias = smallerexpr.As.CompliantName()
|
|
|
|
if alias == "" {
|
|
|
|
alias = sqlparser.GetTableName(smallerexpr.Expr).CompliantName()
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if stmt.Limit != nil {
|
|
|
|
switch expr := stmt.Limit.Rowcount.(type) {
|
|
|
|
case *sqlparser.SQLVal:
|
|
|
|
// The Value of how many rows we're going to limit by
|
|
|
|
parsedLimit, _ := strconv.Atoi(string(expr.Val[:]))
|
|
|
|
limit = int64(parsedLimit)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if stmt.GroupBy != nil {
|
|
|
|
return nil, "", 0, nil, nil, nil, ErrParseUnsupportedLiteralsGroupBy
|
|
|
|
}
|
|
|
|
if stmt.OrderBy != nil {
|
|
|
|
return nil, "", 0, nil, nil, nil, ErrParseUnsupportedToken
|
|
|
|
}
|
|
|
|
if err := reader.parseErrs(columnNames, whereClause, alias, myFuncs); err != nil {
|
|
|
|
return nil, "", 0, nil, nil, nil, err
|
|
|
|
}
|
|
|
|
return columnNames, alias, limit, whereClause, functionNames, myFuncs, nil
|
|
|
|
}
|
|
|
|
return nil, "", 0, nil, nil, nil, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// This is the main function, It goes row by row and for records which validate
|
|
|
|
// the where clause it currently prints the appropriate row given the requested
|
|
|
|
// columns.
|
|
|
|
func (reader *Input) processSelectReq(reqColNames []string, alias string, whereClause interface{}, limitOfRecords int64, functionNames []string, myRow chan *Row, myFunc *SelectFuncs) {
|
|
|
|
counter := -1
|
|
|
|
filtrCount := 0
|
|
|
|
functionFlag := false
|
|
|
|
// My values is used to store our aggregation values if we need to store them.
|
|
|
|
myAggVals := make([]float64, len(reqColNames))
|
|
|
|
var columns []string
|
|
|
|
// LowercasecolumnsMap is used in accordance with hasDuplicates so that we can
|
|
|
|
// raise the error "Ambigious" if a case insensitive column is provided and we
|
|
|
|
// have multiple matches.
|
|
|
|
lowercaseColumnsMap := make(map[string]int)
|
|
|
|
hasDuplicates := make(map[string]bool)
|
|
|
|
// ColumnsMap stores our columns and their index.
|
|
|
|
columnsMap := make(map[string]int)
|
|
|
|
if limitOfRecords == 0 {
|
|
|
|
limitOfRecords = math.MaxInt64
|
|
|
|
}
|
|
|
|
|
|
|
|
for {
|
|
|
|
record := reader.ReadRecord()
|
|
|
|
reader.stats.BytesProcessed += processSize(record)
|
|
|
|
if record == nil {
|
|
|
|
if functionFlag {
|
|
|
|
rowStruct := &Row{
|
|
|
|
record: reader.aggFuncToStr(myAggVals) + "\n",
|
|
|
|
}
|
|
|
|
myRow <- rowStruct
|
|
|
|
}
|
|
|
|
close(myRow)
|
|
|
|
return
|
|
|
|
}
|
|
|
|
if counter == -1 && reader.options.HeaderOpt && len(reader.header) > 0 {
|
|
|
|
columns = reader.Header()
|
|
|
|
myErr := checkForDuplicates(columns, columnsMap, hasDuplicates, lowercaseColumnsMap)
|
|
|
|
if myErr != nil {
|
|
|
|
rowStruct := &Row{
|
|
|
|
err: myErr,
|
|
|
|
}
|
|
|
|
myRow <- rowStruct
|
|
|
|
return
|
|
|
|
}
|
|
|
|
} else if counter == -1 && len(reader.header) > 0 {
|
|
|
|
columns = reader.Header()
|
|
|
|
}
|
|
|
|
// When we have reached our limit, on what the user specified as the number
|
|
|
|
// of rows they wanted, we terminate our interpreter.
|
|
|
|
if int64(filtrCount) == limitOfRecords && limitOfRecords != 0 {
|
|
|
|
close(myRow)
|
|
|
|
return
|
|
|
|
}
|
|
|
|
// The call to the where function clause,ensures that the rows we print match our where clause.
|
|
|
|
condition, myErr := matchesMyWhereClause(record, columnsMap, alias, whereClause)
|
|
|
|
if myErr != nil {
|
|
|
|
rowStruct := &Row{
|
|
|
|
err: myErr,
|
|
|
|
}
|
|
|
|
myRow <- rowStruct
|
|
|
|
return
|
|
|
|
}
|
|
|
|
if condition {
|
|
|
|
// if its an asterix we just print everything in the row
|
|
|
|
if reqColNames[0] == "*" && functionNames[0] == "" {
|
|
|
|
rowStruct := &Row{
|
|
|
|
record: reader.printAsterix(record) + "\n",
|
|
|
|
}
|
|
|
|
myRow <- rowStruct
|
|
|
|
} else if alias != "" {
|
|
|
|
// This is for dealing with the case of if we have to deal with a
|
|
|
|
// request for a column with an index e.g A_1.
|
|
|
|
if representsInt(reqColNames[0]) {
|
|
|
|
// This checks whether any aggregation function was called as now we
|
|
|
|
// no longer will go through printing each row, and only print at the
|
|
|
|
// end
|
|
|
|
if len(functionNames) > 0 && functionNames[0] != "" {
|
|
|
|
functionFlag = true
|
|
|
|
aggregationFunctions(counter, filtrCount, myAggVals, columnsMap, reqColNames, functionNames, record)
|
|
|
|
} else {
|
|
|
|
// The code below finds the appropriate columns of the row given the
|
|
|
|
// indicies provided in the SQL request and utilizes the map to
|
|
|
|
// retrieve the correct part of the row.
|
|
|
|
myQueryRow, myErr := reader.processColNameIndex(record, reqColNames, columns)
|
|
|
|
if myErr != nil {
|
|
|
|
rowStruct := &Row{
|
|
|
|
err: myErr,
|
|
|
|
}
|
|
|
|
myRow <- rowStruct
|
|
|
|
return
|
|
|
|
}
|
|
|
|
rowStruct := &Row{
|
|
|
|
record: myQueryRow + "\n",
|
|
|
|
}
|
|
|
|
myRow <- rowStruct
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
// This code does aggregation if we were provided column names in the
|
|
|
|
// form of acutal names rather an indices.
|
|
|
|
if len(functionNames) > 0 && functionNames[0] != "" {
|
|
|
|
functionFlag = true
|
|
|
|
aggregationFunctions(counter, filtrCount, myAggVals, columnsMap, reqColNames, functionNames, record)
|
|
|
|
} else {
|
|
|
|
// This code prints the appropriate part of the row given the filter
|
|
|
|
// and select request, if the select request was based on column
|
|
|
|
// names rather than indices.
|
|
|
|
myQueryRow, myErr := reader.processColNameLiteral(record, reqColNames, columns, columnsMap, myFunc)
|
|
|
|
if myErr != nil {
|
|
|
|
rowStruct := &Row{
|
|
|
|
err: myErr,
|
|
|
|
}
|
|
|
|
myRow <- rowStruct
|
|
|
|
return
|
|
|
|
}
|
|
|
|
rowStruct := &Row{
|
|
|
|
record: myQueryRow + "\n",
|
|
|
|
}
|
|
|
|
myRow <- rowStruct
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
filtrCount++
|
|
|
|
}
|
|
|
|
counter++
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// printAsterix helps to print out the entire row if an asterix is used.
|
|
|
|
func (reader *Input) printAsterix(record []string) string {
|
|
|
|
myRow := record[0]
|
|
|
|
for i := 1; i < len(record); i++ {
|
|
|
|
myRow = myRow + reader.options.OutputFieldDelimiter + record[i]
|
|
|
|
}
|
|
|
|
return myRow
|
|
|
|
}
|
|
|
|
|
|
|
|
// processColumnNames is a function which allows for cleaning of column names.
|
|
|
|
func (reader *Input) processColumnNames(reqColNames []string, alias string) error {
|
|
|
|
for i := 0; i < len(reqColNames); i++ {
|
|
|
|
// The code below basically cleans the column name of its alias and other
|
|
|
|
// syntax, so that we can extract its pure name.
|
|
|
|
reqColNames[i] = cleanCol(reqColNames[i], alias)
|
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// processColNameIndex is the function which creates the row for an index based
|
|
|
|
// query.
|
|
|
|
func (reader *Input) processColNameIndex(record []string, reqColNames []string, columns []string) (string, error) {
|
|
|
|
myRow := ""
|
|
|
|
for i := 0; i < len(reqColNames); i++ {
|
|
|
|
// COALESCE AND NULLIF do not support index based access.
|
|
|
|
if reqColNames[0] == "0" {
|
|
|
|
return "", ErrInvalidColumnIndex
|
|
|
|
}
|
|
|
|
// Subtract 1 because AWS Indexing is not 0 based, it starts at 1.
|
|
|
|
mytempindex, err := strconv.Atoi(reqColNames[i])
|
|
|
|
mytempindex = mytempindex - 1
|
|
|
|
if mytempindex > len(columns) {
|
|
|
|
return "", ErrInvalidColumnIndex
|
|
|
|
}
|
|
|
|
myRow = writeRow(myRow, record[mytempindex], reader.options.OutputFieldDelimiter, len(reqColNames))
|
|
|
|
if err != nil {
|
|
|
|
return "", ErrMissingHeaders
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if len(myRow) > 1000000 {
|
|
|
|
return "", ErrOverMaxRecordSize
|
|
|
|
}
|
|
|
|
if strings.Count(myRow, reader.options.OutputFieldDelimiter) != len(reqColNames)-1 {
|
|
|
|
myRow = qualityCheck(myRow, len(reqColNames)-1-strings.Count(myRow, reader.options.OutputFieldDelimiter), reader.options.OutputFieldDelimiter)
|
|
|
|
}
|
|
|
|
return myRow, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// processColNameLiteral is the function which creates the row for an name based
|
|
|
|
// query.
|
|
|
|
func (reader *Input) processColNameLiteral(record []string, reqColNames []string, columns []string, columnsMap map[string]int, myFunc *SelectFuncs) (string, error) {
|
|
|
|
myRow := ""
|
|
|
|
for i := 0; i < len(reqColNames); i++ {
|
|
|
|
// this is the case to deal with COALESCE.
|
|
|
|
if reqColNames[i] == "" && isValidFunc(myFunc.index, i) {
|
|
|
|
myVal := evaluateFuncExpr(myFunc.funcExpr[i], "", record, columnsMap)
|
|
|
|
myRow = writeRow(myRow, myVal, reader.options.OutputFieldDelimiter, len(reqColNames))
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
myTempIndex, notFound := columnsMap[trimQuotes(reqColNames[i])]
|
|
|
|
if !notFound {
|
|
|
|
return "", ErrMissingHeaders
|
|
|
|
}
|
|
|
|
myRow = writeRow(myRow, record[myTempIndex], reader.options.OutputFieldDelimiter, len(reqColNames))
|
|
|
|
}
|
|
|
|
if len(myRow) > 1000000 {
|
|
|
|
return "", ErrOverMaxRecordSize
|
|
|
|
}
|
|
|
|
if strings.Count(myRow, reader.options.OutputFieldDelimiter) != len(reqColNames)-1 {
|
|
|
|
myRow = qualityCheck(myRow, len(reqColNames)-1-strings.Count(myRow, reader.options.OutputFieldDelimiter), reader.options.OutputFieldDelimiter)
|
|
|
|
}
|
|
|
|
return myRow, nil
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
// aggregationFunctions is a function which performs the actual aggregation
|
|
|
|
// methods on the given row, it uses an array defined the the main parsing
|
|
|
|
// function to keep track of values.
|
|
|
|
func aggregationFunctions(counter int, filtrCount int, myAggVals []float64, columnsMap map[string]int, storeReqCols []string, storeFunctions []string, record []string) error {
|
|
|
|
for i := 0; i < len(storeFunctions); i++ {
|
|
|
|
if storeFunctions[i] == "" {
|
|
|
|
i++
|
|
|
|
} else if storeFunctions[i] == "count" {
|
|
|
|
myAggVals[i]++
|
|
|
|
} else {
|
|
|
|
// If column names are provided as an index it'll use this if statement instead of the else/
|
|
|
|
var convAggFloat float64
|
|
|
|
if representsInt(storeReqCols[i]) {
|
|
|
|
myIndex, _ := strconv.Atoi(storeReqCols[i])
|
|
|
|
convAggFloat, _ = strconv.ParseFloat(record[myIndex], 64)
|
|
|
|
|
|
|
|
} else {
|
|
|
|
// case that the columns are in the form of named columns rather than indices.
|
|
|
|
convAggFloat, _ = strconv.ParseFloat(record[columnsMap[trimQuotes(storeReqCols[i])]], 64)
|
|
|
|
|
|
|
|
}
|
|
|
|
// This if statement is for calculating the min.
|
|
|
|
if storeFunctions[i] == "min" {
|
|
|
|
if counter == -1 {
|
|
|
|
myAggVals[i] = math.MaxFloat64
|
|
|
|
}
|
|
|
|
if convAggFloat < myAggVals[i] {
|
|
|
|
myAggVals[i] = convAggFloat
|
|
|
|
}
|
|
|
|
|
|
|
|
} else if storeFunctions[i] == "max" {
|
|
|
|
// This if statement is for calculating the max.
|
|
|
|
if counter == -1 {
|
|
|
|
myAggVals[i] = math.SmallestNonzeroFloat64
|
|
|
|
}
|
|
|
|
if convAggFloat > myAggVals[i] {
|
|
|
|
myAggVals[i] = convAggFloat
|
|
|
|
}
|
|
|
|
|
|
|
|
} else if storeFunctions[i] == "sum" {
|
|
|
|
// This if statement is for calculating the sum.
|
|
|
|
myAggVals[i] += convAggFloat
|
|
|
|
|
|
|
|
} else if storeFunctions[i] == "avg" {
|
|
|
|
// This if statement is for calculating the average.
|
|
|
|
if filtrCount == 0 {
|
|
|
|
myAggVals[i] = convAggFloat
|
|
|
|
} else {
|
|
|
|
myAggVals[i] = (convAggFloat + (myAggVals[i] * float64(filtrCount))) / float64((filtrCount + 1))
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
return ErrParseNonUnaryAgregateFunctionCall
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|