From be313f17586a3bb5d9691ba892771c9d6f65ee38 Mon Sep 17 00:00:00 2001 From: Klaus Post Date: Wed, 25 Sep 2019 16:26:21 -0700 Subject: [PATCH] S3 Select: Workaround java buffer size (#8312) Updates #7475 The Java implementation has a 128KB buffer and a message must be emitted before that is used. #7475 therefore limits the message size to 128KB. But up to 256 bytes are written to the buffer in each call. This means we must emit a message before shorter than 128KB. Therefore we change the limit to 128KB minus 256 bytes. --- pkg/s3select/message.go | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/pkg/s3select/message.go b/pkg/s3select/message.go index ec6a7e0ae..1733e905d 100644 --- a/pkg/s3select/message.go +++ b/pkg/s3select/message.go @@ -62,7 +62,12 @@ var recordsHeader = []byte{ } const ( - maxRecordMessageLength = 128 * 1024 // Chosen for compatibility with AWS JAVA SDK + // Chosen for compatibility with AWS JAVA SDK + // It has a a buffer size of 128K: + // https://github.com/aws/aws-sdk-java/blob/master/aws-java-sdk-s3/src/main/java/com/amazonaws/services/s3/internal/eventstreaming/MessageDecoder.java#L26 + // but we must make sure there is always space to add 256 bytes: + // https://github.com/aws/aws-sdk-java/blob/master/aws-java-sdk-s3/src/main/java/com/amazonaws/services/s3/model/SelectObjectContentEventStream.java#L197 + maxRecordMessageLength = (128 << 10) - 256 ) var ( @@ -83,7 +88,7 @@ func newRecordsMessage(payload []byte) []byte { } // payloadLenForMsgLen computes the length of the payload in a record -// message given the length of the message. +// message given the total length of the message. func payloadLenForMsgLen(messageLength int) int { headerLength := len(recordsHeader) payloadLength := messageLength - 4 - 4 - 4 - headerLength - 4