sql, csv: Cache some values between Read() calls to gain performance (#9645)

Below is the benchmark enhancement after this commit:

benchmark                                            old ns/op     new ns/op     delta
BenchmarkRead-8                                      2807          2189          -22.02%
BenchmarkReadWithFieldsPerRecord-8                   2802          2179          -22.23%
BenchmarkReadWithoutFieldsPerRecord-8                2824          2181          -22.77%
BenchmarkReadLargeFields-8                           3584          3371          -5.94%
BenchmarkReadReuseRecord-8                           2044          1480          -27.59%
BenchmarkReadReuseRecordWithFieldsPerRecord-8        2056          1483          -27.87%
BenchmarkReadReuseRecordWithoutFieldsPerRecord-8     2047          1482          -27.60%
BenchmarkReadReuseRecordLargeFields-8                2777          2594          -6.59%

benchmark                                            old allocs     new allocs     delta
BenchmarkRead-8                                      26             16             -38.46%
BenchmarkReadWithFieldsPerRecord-8                   26             16             -38.46%
BenchmarkReadWithoutFieldsPerRecord-8                26             16             -38.46%
BenchmarkReadLargeFields-8                           36             24             -33.33%
BenchmarkReadReuseRecord-8                           16             6              -62.50%
BenchmarkReadReuseRecordWithFieldsPerRecord-8        16             6              -62.50%
BenchmarkReadReuseRecordWithoutFieldsPerRecord-8     16             6              -62.50%
BenchmarkReadReuseRecordLargeFields-8                24             12             -50.00%

benchmark                                            old bytes     new bytes     delta
BenchmarkRead-8                                      672           664           -1.19%
BenchmarkReadWithFieldsPerRecord-8                   672           664           -1.19%
BenchmarkReadWithoutFieldsPerRecord-8                672           664           -1.19%
BenchmarkReadLargeFields-8                           3948          3936          -0.30%
BenchmarkReadReuseRecord-8                           32            24            -25.00%
BenchmarkReadReuseRecordWithFieldsPerRecord-8        32            24            -25.00%
BenchmarkReadReuseRecordWithoutFieldsPerRecord-8     32            24            -25.00%
BenchmarkReadReuseRecordLargeFields-8                2988          2976          -0.40%
master
Anis Elleuch 5 years ago committed by GitHub
parent bede525dc9
commit 6542bc4a03
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 57
      pkg/csvparser/reader.go

@ -171,6 +171,14 @@ type Reader struct {
// lastRecord is a record cache and only used when ReuseRecord == true. // lastRecord is a record cache and only used when ReuseRecord == true.
lastRecord []string lastRecord []string
// Caching some values between Read() calls for performance gain
cached bool
cachedQuoteEscapeLen int
cachedQuoteLen int
cachedEncodedQuote []byte
cachedCommaLen int
cachedQuotes string
} }
// NewReader returns a new Reader that reads from r. // NewReader returns a new Reader that reads from r.
@ -295,21 +303,20 @@ func (r *Reader) readRecord(dst []string) ([]string, error) {
return nil, errRead return nil, errRead
} }
var quoteEscape = r.QuoteEscape if !r.cached {
var quoteEscapeLen = utf8.RuneLen(quoteEscape) r.cachedQuoteEscapeLen = utf8.RuneLen(r.QuoteEscape)
var quote rune
var quoteLen int
if len(r.Quote) > 0 { if len(r.Quote) > 0 {
quote = r.Quote[0] r.cachedQuoteLen = utf8.RuneLen(r.Quote[0])
quoteLen = utf8.RuneLen(quote) r.cachedEncodedQuote = encodeRune(r.Quote[0])
r.cachedQuotes += string(r.Quote[0])
}
r.cachedCommaLen = utf8.RuneLen(r.Comma)
r.cachedQuotes += string(r.QuoteEscape)
r.cached = true
} }
encodedQuote := encodeRune(quote)
// Parse each field in the record. // Parse each field in the record.
var err error var err error
commaLen := utf8.RuneLen(r.Comma)
recLine := r.numLine // Starting line for record recLine := r.numLine // Starting line for record
r.recordBuffer = r.recordBuffer[:0] r.recordBuffer = r.recordBuffer[:0]
r.fieldIndexes = r.fieldIndexes[:0] r.fieldIndexes = r.fieldIndexes[:0]
@ -318,7 +325,7 @@ parseField:
if r.TrimLeadingSpace { if r.TrimLeadingSpace {
line = bytes.TrimLeftFunc(line, unicode.IsSpace) line = bytes.TrimLeftFunc(line, unicode.IsSpace)
} }
if len(line) == 0 || quoteLen == 0 || nextRune(line) != quote { if len(line) == 0 || r.cachedQuoteLen == 0 || nextRune(line) != r.Quote[0] {
// Non-quoted string field // Non-quoted string field
i := bytes.IndexRune(line, r.Comma) i := bytes.IndexRune(line, r.Comma)
field := line field := line
@ -329,7 +336,7 @@ parseField:
} }
// Check to make sure a quote does not appear in field. // Check to make sure a quote does not appear in field.
if !r.LazyQuotes { if !r.LazyQuotes {
if j := bytes.IndexRune(field, quote); j >= 0 { if j := bytes.IndexRune(field, r.Quote[0]); j >= 0 {
col := utf8.RuneCount(fullLine[:len(fullLine)-len(line[j:])]) col := utf8.RuneCount(fullLine[:len(fullLine)-len(line[j:])])
err = &ParseError{StartLine: recLine, Line: r.numLine, Column: col, Err: ErrBareQuote} err = &ParseError{StartLine: recLine, Line: r.numLine, Column: col, Err: ErrBareQuote}
break parseField break parseField
@ -338,37 +345,37 @@ parseField:
r.recordBuffer = append(r.recordBuffer, field...) r.recordBuffer = append(r.recordBuffer, field...)
r.fieldIndexes = append(r.fieldIndexes, len(r.recordBuffer)) r.fieldIndexes = append(r.fieldIndexes, len(r.recordBuffer))
if i >= 0 { if i >= 0 {
line = line[i+commaLen:] line = line[i+r.cachedCommaLen:]
continue parseField continue parseField
} }
break parseField break parseField
} else { } else {
// Quoted string field // Quoted string field
line = line[quoteLen:] line = line[r.cachedQuoteLen:]
for { for {
i := bytes.IndexAny(line, string(quote)+string(quoteEscape)) i := bytes.IndexAny(line, r.cachedQuotes)
if i >= 0 { if i >= 0 {
// Hit next quote or escape quote // Hit next quote or escape quote
r.recordBuffer = append(r.recordBuffer, line[:i]...) r.recordBuffer = append(r.recordBuffer, line[:i]...)
escape := nextRune(line[i:]) == quoteEscape escape := nextRune(line[i:]) == r.QuoteEscape
if escape { if escape {
line = line[i+quoteEscapeLen:] line = line[i+r.cachedQuoteEscapeLen:]
} else { } else {
line = line[i+quoteLen:] line = line[i+r.cachedQuoteLen:]
} }
switch rn := nextRune(line); { switch rn := nextRune(line); {
case escape && quoteEscape != quote: case escape && r.QuoteEscape != r.Quote[0]:
r.recordBuffer = append(r.recordBuffer, encodeRune(rn)...) r.recordBuffer = append(r.recordBuffer, encodeRune(rn)...)
line = line[utf8.RuneLen(rn):] line = line[utf8.RuneLen(rn):]
case rn == quote: case rn == r.Quote[0]:
// `""` sequence (append quote). // `""` sequence (append quote).
r.recordBuffer = append(r.recordBuffer, encodedQuote...) r.recordBuffer = append(r.recordBuffer, r.cachedEncodedQuote...)
line = line[quoteLen:] line = line[r.cachedQuoteLen:]
case rn == r.Comma: case rn == r.Comma:
// `",` sequence (end of field). // `",` sequence (end of field).
line = line[commaLen:] line = line[r.cachedCommaLen:]
r.fieldIndexes = append(r.fieldIndexes, len(r.recordBuffer)) r.fieldIndexes = append(r.fieldIndexes, len(r.recordBuffer))
continue parseField continue parseField
case lengthNL(line) == len(line): case lengthNL(line) == len(line):
@ -377,10 +384,10 @@ parseField:
break parseField break parseField
case r.LazyQuotes: case r.LazyQuotes:
// `"` sequence (bare quote). // `"` sequence (bare quote).
r.recordBuffer = append(r.recordBuffer, encodedQuote...) r.recordBuffer = append(r.recordBuffer, r.cachedEncodedQuote...)
default: default:
// `"*` sequence (invalid non-escaped quote). // `"*` sequence (invalid non-escaped quote).
col := utf8.RuneCount(fullLine[:len(fullLine)-len(line)-quoteLen]) col := utf8.RuneCount(fullLine[:len(fullLine)-len(line)-r.cachedQuoteLen])
err = &ParseError{StartLine: recLine, Line: r.numLine, Column: col, Err: ErrQuote} err = &ParseError{StartLine: recLine, Line: r.numLine, Column: col, Err: ErrQuote}
break parseField break parseField
} }

Loading…
Cancel
Save