From ae4ded7fd1c7b45e90930f4dfb1a50883ab17692 Mon Sep 17 00:00:00 2001 From: Harshavardhana Date: Sun, 29 Nov 2020 13:05:37 -0800 Subject: [PATCH] fix: s3select tests with new minio-py SDK (#10995) --- mint/run/core/minio-go/go.mod | 2 + mint/run/core/s3select/csv.py | 82 +++++++++++++++---------------- mint/run/core/s3select/sql_ops.py | 47 +++++++++--------- mint/run/core/s3select/tests.py | 1 + 4 files changed, 64 insertions(+), 68 deletions(-) diff --git a/mint/run/core/minio-go/go.mod b/mint/run/core/minio-go/go.mod index 0ce4ece6b..0b54bef88 100644 --- a/mint/run/core/minio-go/go.mod +++ b/mint/run/core/minio-go/go.mod @@ -1,3 +1,5 @@ module mint.minio.io/minio-go go 1.14 + +require github.com/minio/minio-go/v7 v7.0.6 // indirect diff --git a/mint/run/core/s3select/csv.py b/mint/run/core/s3select/csv.py index 9f5759af0..cf1eaaaa3 100644 --- a/mint/run/core/s3select/csv.py +++ b/mint/run/core/s3select/csv.py @@ -19,9 +19,13 @@ import io import os from minio import Minio -from minio.select.options import (CSVInput, CSVOutput, InputSerialization, - JSONOutput, OutputSerialization, - RequestProgress, SelectObjectOptions) +from minio.selectrequest import (COMPRESSION_TYPE_NONE, FILE_HEADER_INFO_NONE, + JSON_TYPE_DOCUMENT, QUOTE_FIELDS_ALWAYS, + QUOTE_FIELDS_ASNEEDED, CSVInputSerialization, + CSVOutputSerialization, + JSONInputSerialization, + JSONOutputSerialization, SelectRequest) + from utils import * @@ -93,26 +97,22 @@ def test_csv_input_custom_quote_char(client, log_output): try: for idx, (quote_char, escape_char, data, expected_output) in enumerate(tests): - sql_opts = SelectObjectOptions( - expression="select * from s3object", - input_serialization=InputSerialization( - compression_type="NONE", - csv=CSVInput(file_header_info="NONE", - record_delimiter="\n", - field_delimiter=",", - quote_character=quote_char, - quote_escape_character=escape_char, - comments="#", - allow_quoted_record_delimiter="FALSE",), + sql_opts = SelectRequest( + "select * from s3object", + CSVInputSerialization( + compression_type=COMPRESSION_TYPE_NONE, + file_header_info=FILE_HEADER_INFO_NONE, + record_delimiter="\n", + field_delimiter=",", + quote_character=quote_char, + quote_escape_character=escape_char, + comments="#", + allow_quoted_record_delimiter="FALSE", ), - output_serialization=OutputSerialization( - json=JSONOutput( - record_delimiter="\n", - ) + JSONOutputSerialization( + record_delimiter="\n", ), - request_progress=RequestProgress( - enabled="False" - ) + request_progress=False, ) test_sql_api(f'test_{idx}', client, bucket_name, @@ -150,30 +150,26 @@ def test_csv_output_custom_quote_char(client, log_output): try: for idx, (quote_char, escape_char, input_data, expected_output) in enumerate(tests): - sql_opts = SelectObjectOptions( - expression="select * from s3object", - input_serialization=InputSerialization( - compression_type="NONE", - csv=CSVInput(file_header_info="NONE", - record_delimiter="\n", - field_delimiter=",", - quote_character='"', - quote_escape_character='"', - comments="#", - allow_quoted_record_delimiter="FALSE", - ), + sql_opts = SelectRequest( + "select * from s3object", + CSVInputSerialization( + compression_type=COMPRESSION_TYPE_NONE, + file_header_info=FILE_HEADER_INFO_NONE, + record_delimiter="\n", + field_delimiter=",", + quote_character='"', + quote_escape_character='"', + comments="#", + allow_quoted_record_delimiter="FALSE", ), - output_serialization=OutputSerialization( - csv=CSVOutput(quote_fields="ALWAYS", - record_delimiter="\n", - field_delimiter=",", - quote_character=quote_char, - quote_escape_character=escape_char, - ) + CSVOutputSerialization( + quote_fields=QUOTE_FIELDS_ALWAYS, + record_delimiter="\n", + field_delimiter=",", + quote_character=quote_char, + quote_escape_character=escape_char, ), - request_progress=RequestProgress( - enabled="False" - ) + request_progress=False, ) test_sql_api(f'test_{idx}', client, bucket_name, diff --git a/mint/run/core/s3select/sql_ops.py b/mint/run/core/s3select/sql_ops.py index 7b67c535d..e43b06321 100644 --- a/mint/run/core/s3select/sql_ops.py +++ b/mint/run/core/s3select/sql_ops.py @@ -18,9 +18,12 @@ import io from datetime import datetime -from minio.select.options import (CSVInput, CSVOutput, InputSerialization, - JSONInput, JSONOutput, OutputSerialization, - RequestProgress, SelectObjectOptions) +from minio.selectrequest import (FILE_HEADER_INFO_NONE, JSON_TYPE_DOCUMENT, + QUOTE_FIELDS_ASNEEDED, CSVInputSerialization, + CSVOutputSerialization, + JSONInputSerialization, + JSONOutputSerialization, SelectRequest) + from utils import generate_bucket_name, generate_object_name @@ -42,17 +45,15 @@ def test_sql_expressions_custom_input_output(client, input_bytes, sql_input, continue try: log_output.args['total_tests'] += 1 - options = SelectObjectOptions( - expression=select_expression, - input_serialization=sql_input, - output_serialization=sql_output, - request_progress=RequestProgress( - enabled="False" - ) + sreq = SelectRequest( + select_expression, + sql_input, + sql_output, + request_progress=False ) data = client.select_object_content( - bucket_name, object_name, options) + bucket_name, object_name, sreq) # Get the records records = io.BytesIO() @@ -79,14 +80,13 @@ def test_sql_expressions_custom_input_output(client, input_bytes, sql_input, def test_sql_expressions(client, input_json_bytes, tests, log_output): - input_serialization = InputSerialization( + input_serialization = JSONInputSerialization( compression_type="NONE", - json=JSONInput(json_type="DOCUMENT"), + json_type=JSON_TYPE_DOCUMENT, ) - output_serialization = OutputSerialization( - csv=CSVOutput(quote_fields="ASNEEDED") - ) + output_serialization = CSVOutputSerialization( + quote_fields=QUOTE_FIELDS_ASNEEDED) test_sql_expressions_custom_input_output(client, input_json_bytes, input_serialization, output_serialization, tests, log_output) @@ -392,8 +392,8 @@ def test_sql_select_json(client, log_output): "Select s.rules[1].expr from S3Object s", b'{"expr":"y > x"}\n{}\n'), ] - input_serialization = InputSerialization(json=JSONInput(json_type="DOCUMENT")) - output_serialization = OutputSerialization(json=JSONOutput()) + input_serialization = JSONInputSerialization(json_type=JSON_TYPE_DOCUMENT) + output_serialization = JSONOutputSerialization() try: test_sql_expressions_custom_input_output(client, json_testcontent, input_serialization, output_serialization, tests, log_output) @@ -414,14 +414,11 @@ val4,val5,val6 ("select_1", "SELECT s._2 FROM S3Object as s", b'val2\nval5\n'), ] - input_serialization = InputSerialization( - csv=CSVInput( - file_header_info="NONE", - allow_quoted_record_delimiter="FALSE", - ), + input_serialization = CSVInputSerialization( + file_header_info=FILE_HEADER_INFO_NONE, + allow_quoted_record_delimiter="FALSE", ) - - output_serialization = OutputSerialization(csv=CSVOutput()) + output_serialization = CSVOutputSerialization() try: test_sql_expressions_custom_input_output(client, json_testcontent, input_serialization, output_serialization, tests, log_output) diff --git a/mint/run/core/s3select/tests.py b/mint/run/core/s3select/tests.py index 3ba0eb934..b13091137 100644 --- a/mint/run/core/s3select/tests.py +++ b/mint/run/core/s3select/tests.py @@ -21,6 +21,7 @@ from csv import (test_csv_input_custom_quote_char, test_csv_output_custom_quote_char) from minio import Minio + from sql_ops import (test_sql_datatypes, test_sql_functions_agg_cond_conv, test_sql_functions_date, test_sql_functions_string, test_sql_operators, test_sql_operators_precedence,