Support configurable quote character parameter in Select (#8955)
parent
3ca9f5ffa3
commit
35ecc04223
@ -0,0 +1,18 @@ |
||||
#!/bin/bash -e |
||||
# |
||||
# Mint (C) 2020 Minio, Inc. |
||||
# |
||||
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||
# you may not use this file except in compliance with the License. |
||||
# You may obtain a copy of the License at |
||||
# |
||||
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
# |
||||
# Unless required by applicable law or agreed to in writing, software |
||||
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
# See the License for the specific language governing permissions and |
||||
# limitations under the License. |
||||
# |
||||
|
||||
python -m pip install minio |
@ -0,0 +1,21 @@ |
||||
|
||||
## `s3select` tests |
||||
This directory serves as the location for Mint tests for s3select features. Top level `mint.sh` calls `run.sh` to execute tests. |
||||
|
||||
## Adding new tests |
||||
New tests are added into `s3select/tests.py` as new functions. |
||||
|
||||
## Running tests manually |
||||
- Set environment variables `MINT_DATA_DIR`, `MINT_MODE`, `SERVER_ENDPOINT`, `ACCESS_KEY`, `SECRET_KEY`, `SERVER_REGION` and `ENABLE_HTTPS` |
||||
- Call `run.sh` with output log file and error log file. for example |
||||
|
||||
```bash |
||||
export MINT_DATA_DIR=~/my-mint-dir |
||||
export MINT_MODE=core |
||||
export SERVER_ENDPOINT="play.min.io" |
||||
export ACCESS_KEY="Q3AM3UQ867SPQQA43P2F" |
||||
export SECRET_KEY="zuf+tfteSlswRu7BJ86wekitnifILbZam1KYY3TG" |
||||
export ENABLE_HTTPS=1 |
||||
export SERVER_REGION=us-east-1 |
||||
./run.sh /tmp/output.log /tmp/error.log |
||||
``` |
@ -0,0 +1,28 @@ |
||||
#!/bin/bash |
||||
# |
||||
# Mint (C) 2020 Minio, Inc. |
||||
# |
||||
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||
# you may not use this file except in compliance with the License. |
||||
# You may obtain a copy of the License at |
||||
# |
||||
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
# |
||||
# Unless required by applicable law or agreed to in writing, software |
||||
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
# See the License for the specific language governing permissions and |
||||
# limitations under the License. |
||||
# |
||||
|
||||
# handle command line arguments |
||||
if [ $# -ne 2 ]; then |
||||
echo "usage: run.sh <OUTPUT-LOG-FILE> <ERROR-LOG-FILE>" |
||||
exit -1 |
||||
fi |
||||
|
||||
output_log_file="$1" |
||||
error_log_file="$2" |
||||
|
||||
# run path style tests |
||||
python "./tests.py" 1>>"$output_log_file" 2>"$error_log_file" |
@ -0,0 +1,304 @@ |
||||
#!/usr/bin/env python |
||||
# -*- coding: utf-8 -*- |
||||
# MinIO Python Library for Amazon S3 Compatible Cloud Storage, |
||||
# (C) 2015-2020 MinIO, Inc. |
||||
# |
||||
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||
# you may not use this file except in compliance with the License. |
||||
# You may obtain a copy of the License at |
||||
# |
||||
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
# |
||||
# Unless required by applicable law or agreed to in writing, software |
||||
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
# See the License for the specific language governing permissions and |
||||
# limitations under the License. |
||||
|
||||
# from __future__ import division |
||||
# from __future__ import absolute_import |
||||
|
||||
import os |
||||
import io |
||||
from sys import exit |
||||
import uuid |
||||
import inspect |
||||
import json |
||||
import time |
||||
import traceback |
||||
|
||||
from minio import Minio |
||||
from minio.select.options import (SelectObjectOptions, CSVInput, |
||||
RequestProgress, InputSerialization, |
||||
OutputSerialization, CSVOutput, JsonOutput) |
||||
|
||||
class LogOutput(object): |
||||
""" |
||||
LogOutput is the class for log output. It is required standard for all |
||||
SDK tests controlled by mint. |
||||
Here are its attributes: |
||||
'name': name of the SDK under test, e.g. 's3select' |
||||
'function': name of the method/api under test with its signature |
||||
The following python code can be used to |
||||
pull args information of a <method> and to |
||||
put together with the method name: |
||||
<method>.__name__+'('+', '.join(args_list)+')' |
||||
e.g. 'remove_object(bucket_name, object_name)' |
||||
'args': method/api arguments with their values, in |
||||
dictionary form: {'arg1': val1, 'arg2': val2, ...} |
||||
'duration': duration of the whole test in milliseconds, |
||||
defaults to 0 |
||||
'alert': any extra information user is needed to be alerted about, |
||||
like whether this is a Blocker/Gateway/Server related |
||||
issue, etc., defaults to None |
||||
'message': descriptive error message, defaults to None |
||||
'error': stack-trace/exception message(only in case of failure), |
||||
actual low level exception/error thrown by the program, |
||||
defaults to None |
||||
'status': exit status, possible values are 'PASS', 'FAIL', 'NA', |
||||
defaults to 'PASS' |
||||
""" |
||||
|
||||
PASS = 'PASS' |
||||
FAIL = 'FAIL' |
||||
NA = 'NA' |
||||
|
||||
def __init__(self, meth, test_name): |
||||
self.__args_list = inspect.getargspec(meth).args[1:] |
||||
self.__name = 'minio-py:'+test_name |
||||
self.__function = meth.__name__+'('+', '.join(self.__args_list)+')' |
||||
self.__args = {} |
||||
self.__duration = 0 |
||||
self.__alert = '' |
||||
self.__message = None |
||||
self.__error = None |
||||
self.__status = self.PASS |
||||
self.__start_time = time.time() |
||||
|
||||
@property |
||||
def name(self): return self.__name |
||||
|
||||
@property |
||||
def function(self): return self.__function |
||||
|
||||
@property |
||||
def args(self): return self.__args |
||||
|
||||
@name.setter |
||||
def name(self, val): self.__name = val |
||||
|
||||
@function.setter |
||||
def function(self, val): self.__function = val |
||||
|
||||
@args.setter |
||||
def args(self, val): self.__args = val |
||||
|
||||
def json_report(self, err_msg='', alert='', status=''): |
||||
self.__args = {k: v for k, v in self.__args.items() if v and v != ''} |
||||
entry = {'name': self.__name, |
||||
'function': self.__function, |
||||
'args': self.__args, |
||||
'duration': int(round((time.time() - self.__start_time)*1000)), |
||||
'alert': str(alert), |
||||
'message': str(err_msg), |
||||
'error': traceback.format_exc() if err_msg and err_msg != '' else '', |
||||
'status': status if status and status != '' else |
||||
self.FAIL if err_msg and err_msg != '' else self.PASS |
||||
} |
||||
return json.dumps({k: v for k, v in entry.items() if v and v != ''}) |
||||
|
||||
def generate_bucket_name(): |
||||
return "s3select-test-" + uuid.uuid4().__str__() |
||||
|
||||
|
||||
def test_csv_input_quote_char(client, log_output): |
||||
# Get a unique bucket_name and object_name |
||||
log_output.args['bucket_name'] = bucket_name = generate_bucket_name() |
||||
|
||||
tests = [ |
||||
# Invalid quote character, should fail |
||||
('""', b'col1,col2,col3\n', Exception()), |
||||
# UTF-8 quote character |
||||
('ع', b'\xd8\xb9col1\xd8\xb9,\xd8\xb9col2\xd8\xb9,\xd8\xb9col3\xd8\xb9\n', b'{"_1":"col1","_2":"col2","_3":"col3"}\n'), |
||||
# Only one field is quoted |
||||
('"', b'"col1",col2,col3\n', b'{"_1":"col1","_2":"col2","_3":"col3"}\n'), |
||||
('"', b'"col1,col2,col3"\n', b'{"_1":"col1,col2,col3"}\n'), |
||||
('\'', b'"col1",col2,col3\n', b'{"_1":"\\"col1\\"","_2":"col2","_3":"col3"}\n'), |
||||
('', b'"col1",col2,col3\n', b'{"_1":"\\"col1\\"","_2":"col2","_3":"col3"}\n'), |
||||
('', b'"col1",col2,col3\n', b'{"_1":"\\"col1\\"","_2":"col2","_3":"col3"}\n'), |
||||
('', b'"col1","col2","col3"\n', b'{"_1":"\\"col1\\"","_2":"\\"col2\\"","_3":"\\"col3\\""}\n'), |
||||
('"', b'""""""\n', b'{"_1":"\\"\\""}\n'), |
||||
] |
||||
|
||||
try: |
||||
client.make_bucket(bucket_name) |
||||
|
||||
for idx, (quote_char, object_content, expected_output) in enumerate(tests): |
||||
options = SelectObjectOptions( |
||||
expression="select * from s3object", |
||||
input_serialization=InputSerialization( |
||||
compression_type="NONE", |
||||
csv=CSVInput(FileHeaderInfo="NONE", |
||||
RecordDelimiter="\n", |
||||
FieldDelimiter=",", |
||||
QuoteCharacter=quote_char, |
||||
QuoteEscapeCharacter=quote_char, |
||||
Comments="#", |
||||
AllowQuotedRecordDelimiter="FALSE",), |
||||
), |
||||
output_serialization=OutputSerialization( |
||||
json = JsonOutput( |
||||
RecordDelimiter="\n", |
||||
) |
||||
), |
||||
request_progress=RequestProgress( |
||||
enabled="False" |
||||
) |
||||
) |
||||
|
||||
got_output = b'' |
||||
|
||||
try: |
||||
got_output = exec_select(client, bucket_name, object_content, options, log_output) |
||||
except Exception as select_err: |
||||
if not isinstance(expected_output, Exception): |
||||
raise ValueError('Test {} unexpectedly failed with: {}'.format(idx+1, select_err)) |
||||
else: |
||||
if isinstance(expected_output, Exception): |
||||
raise ValueError('Test {}: expected an exception, got {}'.format(idx+1, got_output)) |
||||
if got_output != expected_output: |
||||
raise ValueError('Test {}: data mismatch. Expected : {}, Received {}'.format(idx+1, expected_output, got_output)) |
||||
|
||||
except Exception as err: |
||||
raise Exception(err) |
||||
finally: |
||||
try: |
||||
client.remove_bucket(bucket_name) |
||||
except Exception as err: |
||||
raise Exception(err) |
||||
|
||||
# Test passes |
||||
print(log_output.json_report()) |
||||
|
||||
def test_csv_output_quote_char(client, log_output): |
||||
# Get a unique bucket_name and object_name |
||||
log_output.args['bucket_name'] = bucket_name = generate_bucket_name() |
||||
|
||||
tests = [ |
||||
# UTF-8 quote character |
||||
("''", b'col1,col2,col3\n', Exception()), |
||||
("'", b'col1,col2,col3\n', b"'col1','col2','col3'\n"), |
||||
("", b'col1,col2,col3\n', b'\x00col1\x00,\x00col2\x00,\x00col3\x00\n'), |
||||
('"', b'col1,col2,col3\n', b'"col1","col2","col3"\n'), |
||||
('"', b'col"1,col2,col3\n', b'"col""1","col2","col3"\n'), |
||||
('"', b'\n', b''), |
||||
] |
||||
|
||||
try: |
||||
client.make_bucket(bucket_name) |
||||
|
||||
for idx, (quote_char, object_content, expected_output) in enumerate(tests): |
||||
options = SelectObjectOptions( |
||||
expression="select * from s3object", |
||||
input_serialization=InputSerialization( |
||||
compression_type="NONE", |
||||
csv=CSVInput(FileHeaderInfo="NONE", |
||||
RecordDelimiter="\n", |
||||
FieldDelimiter=",", |
||||
QuoteCharacter='"', |
||||
QuoteEscapeCharacter='"', |
||||
Comments="#", |
||||
AllowQuotedRecordDelimiter="FALSE",), |
||||
), |
||||
output_serialization=OutputSerialization( |
||||
csv=CSVOutput(QuoteFields="ALWAYS", |
||||
RecordDelimiter="\n", |
||||
FieldDelimiter=",", |
||||
QuoteCharacter=quote_char, |
||||
QuoteEscapeCharacter=quote_char,) |
||||
), |
||||
request_progress=RequestProgress( |
||||
enabled="False" |
||||
) |
||||
) |
||||
|
||||
got_output = b'' |
||||
|
||||
try: |
||||
got_output = exec_select(client, bucket_name, object_content, options, log_output) |
||||
except Exception as select_err: |
||||
if not isinstance(expected_output, Exception): |
||||
raise ValueError('Test {} unexpectedly failed with: {}'.format(idx+1, select_err)) |
||||
else: |
||||
if isinstance(expected_output, Exception): |
||||
raise ValueError('Test {}: expected an exception, got {}'.format(idx+1, got_output)) |
||||
if got_output != expected_output: |
||||
raise ValueError('Test {}: data mismatch. Expected : {}. Received: {}.'.format(idx+1, expected_output, got_output)) |
||||
|
||||
except Exception as err: |
||||
raise Exception(err) |
||||
finally: |
||||
try: |
||||
client.remove_bucket(bucket_name) |
||||
except Exception as err: |
||||
raise Exception(err) |
||||
|
||||
# Test passes |
||||
print(log_output.json_report()) |
||||
|
||||
|
||||
def exec_select(client, bucket_name, object_content, options, log_output): |
||||
log_output.args['object_name'] = object_name = uuid.uuid4().__str__() |
||||
try: |
||||
bytes_content = io.BytesIO(object_content) |
||||
client.put_object(bucket_name, object_name, io.BytesIO(object_content), len(object_content)) |
||||
|
||||
data = client.select_object_content(bucket_name, object_name, options) |
||||
# Get the records |
||||
records = io.BytesIO() |
||||
for d in data.stream(10*1024): |
||||
records.write(d.encode('utf-8')) |
||||
|
||||
return records.getvalue() |
||||
|
||||
except Exception as err: |
||||
raise Exception(err) |
||||
finally: |
||||
try: |
||||
client.remove_object(bucket_name, object_name) |
||||
except Exception as err: |
||||
raise Exception(err) |
||||
|
||||
|
||||
def main(): |
||||
""" |
||||
Functional testing for S3 select. |
||||
""" |
||||
|
||||
try: |
||||
access_key = os.getenv('ACCESS_KEY', 'Q3AM3UQ867SPQQA43P2F') |
||||
secret_key = os.getenv('SECRET_KEY', |
||||
'zuf+tfteSlswRu7BJ86wekitnifILbZam1KYY3TG') |
||||
server_endpoint = os.getenv('SERVER_ENDPOINT', 'play.min.io') |
||||
secure = os.getenv('ENABLE_HTTPS', '1') == '1' |
||||
if server_endpoint == 'play.min.io': |
||||
access_key = 'Q3AM3UQ867SPQQA43P2F' |
||||
secret_key = 'zuf+tfteSlswRu7BJ86wekitnifILbZam1KYY3TG' |
||||
secure = True |
||||
|
||||
client = Minio(server_endpoint, access_key, secret_key, secure=secure) |
||||
|
||||
log_output = LogOutput(client.select_object_content, 'test_csv_input_quote_char') |
||||
test_csv_input_quote_char(client, log_output) |
||||
|
||||
log_output = LogOutput(client.select_object_content, 'test_csv_output_quote_char') |
||||
test_csv_output_quote_char(client, log_output) |
||||
|
||||
|
||||
except Exception as err: |
||||
print(log_output.json_report(err)) |
||||
exit(1) |
||||
|
||||
if __name__ == "__main__": |
||||
# Execute only if run as a script |
||||
main() |
Loading…
Reference in new issue