parent
15e2ea2c96
commit
de924605a1
@ -0,0 +1,131 @@ |
|||||||
|
// Copyright 2015 The Go Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style
|
||||||
|
// license that can be found in https://golang.org/LICENSE
|
||||||
|
|
||||||
|
package csv_test |
||||||
|
|
||||||
|
import ( |
||||||
|
"encoding/csv" |
||||||
|
"fmt" |
||||||
|
"io" |
||||||
|
"log" |
||||||
|
"os" |
||||||
|
"strings" |
||||||
|
) |
||||||
|
|
||||||
|
func ExampleReader() { |
||||||
|
in := `first_name,last_name,username |
||||||
|
"Rob","Pike",rob |
||||||
|
Ken,Thompson,ken |
||||||
|
"Robert","Griesemer","gri" |
||||||
|
` |
||||||
|
r := csv.NewReader(strings.NewReader(in)) |
||||||
|
|
||||||
|
for { |
||||||
|
record, err := r.Read() |
||||||
|
if err == io.EOF { |
||||||
|
break |
||||||
|
} |
||||||
|
if err != nil { |
||||||
|
log.Fatal(err) |
||||||
|
} |
||||||
|
|
||||||
|
fmt.Println(record) |
||||||
|
} |
||||||
|
// Output:
|
||||||
|
// [first_name last_name username]
|
||||||
|
// [Rob Pike rob]
|
||||||
|
// [Ken Thompson ken]
|
||||||
|
// [Robert Griesemer gri]
|
||||||
|
} |
||||||
|
|
||||||
|
// This example shows how csv.Reader can be configured to handle other
|
||||||
|
// types of CSV files.
|
||||||
|
func ExampleReader_options() { |
||||||
|
in := `first_name;last_name;username |
||||||
|
"Rob";"Pike";rob |
||||||
|
# lines beginning with a # character are ignored |
||||||
|
Ken;Thompson;ken |
||||||
|
"Robert";"Griesemer";"gri" |
||||||
|
` |
||||||
|
r := csv.NewReader(strings.NewReader(in)) |
||||||
|
r.Comma = ';' |
||||||
|
r.Comment = '#' |
||||||
|
|
||||||
|
records, err := r.ReadAll() |
||||||
|
if err != nil { |
||||||
|
log.Fatal(err) |
||||||
|
} |
||||||
|
|
||||||
|
fmt.Print(records) |
||||||
|
// Output:
|
||||||
|
// [[first_name last_name username] [Rob Pike rob] [Ken Thompson ken] [Robert Griesemer gri]]
|
||||||
|
} |
||||||
|
|
||||||
|
func ExampleReader_ReadAll() { |
||||||
|
in := `first_name,last_name,username |
||||||
|
"Rob","Pike",rob |
||||||
|
Ken,Thompson,ken |
||||||
|
"Robert","Griesemer","gri" |
||||||
|
` |
||||||
|
r := csv.NewReader(strings.NewReader(in)) |
||||||
|
|
||||||
|
records, err := r.ReadAll() |
||||||
|
if err != nil { |
||||||
|
log.Fatal(err) |
||||||
|
} |
||||||
|
|
||||||
|
fmt.Print(records) |
||||||
|
// Output:
|
||||||
|
// [[first_name last_name username] [Rob Pike rob] [Ken Thompson ken] [Robert Griesemer gri]]
|
||||||
|
} |
||||||
|
|
||||||
|
func ExampleWriter() { |
||||||
|
records := [][]string{ |
||||||
|
{"first_name", "last_name", "username"}, |
||||||
|
{"Rob", "Pike", "rob"}, |
||||||
|
{"Ken", "Thompson", "ken"}, |
||||||
|
{"Robert", "Griesemer", "gri"}, |
||||||
|
} |
||||||
|
|
||||||
|
w := csv.NewWriter(os.Stdout) |
||||||
|
|
||||||
|
for _, record := range records { |
||||||
|
if err := w.Write(record); err != nil { |
||||||
|
log.Fatalln("error writing record to csv:", err) |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
// Write any buffered data to the underlying writer (standard output).
|
||||||
|
w.Flush() |
||||||
|
|
||||||
|
if err := w.Error(); err != nil { |
||||||
|
log.Fatal(err) |
||||||
|
} |
||||||
|
// Output:
|
||||||
|
// first_name,last_name,username
|
||||||
|
// Rob,Pike,rob
|
||||||
|
// Ken,Thompson,ken
|
||||||
|
// Robert,Griesemer,gri
|
||||||
|
} |
||||||
|
|
||||||
|
func ExampleWriter_WriteAll() { |
||||||
|
records := [][]string{ |
||||||
|
{"first_name", "last_name", "username"}, |
||||||
|
{"Rob", "Pike", "rob"}, |
||||||
|
{"Ken", "Thompson", "ken"}, |
||||||
|
{"Robert", "Griesemer", "gri"}, |
||||||
|
} |
||||||
|
|
||||||
|
w := csv.NewWriter(os.Stdout) |
||||||
|
w.WriteAll(records) // calls Flush internally
|
||||||
|
|
||||||
|
if err := w.Error(); err != nil { |
||||||
|
log.Fatalln("error writing csv:", err) |
||||||
|
} |
||||||
|
// Output:
|
||||||
|
// first_name,last_name,username
|
||||||
|
// Rob,Pike,rob
|
||||||
|
// Ken,Thompson,ken
|
||||||
|
// Robert,Griesemer,gri
|
||||||
|
} |
@ -0,0 +1,70 @@ |
|||||||
|
// Copyright 2019 The Go Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style
|
||||||
|
// license that can be found in https://golang.org/LICENSE
|
||||||
|
|
||||||
|
// +build gofuzz
|
||||||
|
|
||||||
|
package csv |
||||||
|
|
||||||
|
import ( |
||||||
|
"bytes" |
||||||
|
"fmt" |
||||||
|
"reflect" |
||||||
|
) |
||||||
|
|
||||||
|
func Fuzz(data []byte) int { |
||||||
|
score := 0 |
||||||
|
buf := new(bytes.Buffer) |
||||||
|
|
||||||
|
for _, tt := range []Reader{ |
||||||
|
{}, |
||||||
|
{Comma: ';'}, |
||||||
|
{Comma: '\t'}, |
||||||
|
{LazyQuotes: true}, |
||||||
|
{TrimLeadingSpace: true}, |
||||||
|
{Comment: '#'}, |
||||||
|
{Comment: ';'}, |
||||||
|
} { |
||||||
|
r := NewReader(bytes.NewReader(data)) |
||||||
|
r.Comma = tt.Comma |
||||||
|
r.Comment = tt.Comment |
||||||
|
r.LazyQuotes = tt.LazyQuotes |
||||||
|
r.TrimLeadingSpace = tt.TrimLeadingSpace |
||||||
|
|
||||||
|
records, err := r.ReadAll() |
||||||
|
if err != nil { |
||||||
|
continue |
||||||
|
} |
||||||
|
score = 1 |
||||||
|
|
||||||
|
buf.Reset() |
||||||
|
w := NewWriter(buf) |
||||||
|
w.Comma = tt.Comma |
||||||
|
err = w.WriteAll(records) |
||||||
|
if err != nil { |
||||||
|
fmt.Printf("writer = %#v\n", w) |
||||||
|
fmt.Printf("records = %v\n", records) |
||||||
|
panic(err) |
||||||
|
} |
||||||
|
|
||||||
|
r = NewReader(buf) |
||||||
|
r.Comma = tt.Comma |
||||||
|
r.Comment = tt.Comment |
||||||
|
r.LazyQuotes = tt.LazyQuotes |
||||||
|
r.TrimLeadingSpace = tt.TrimLeadingSpace |
||||||
|
result, err := r.ReadAll() |
||||||
|
if err != nil { |
||||||
|
fmt.Printf("reader = %#v\n", r) |
||||||
|
fmt.Printf("records = %v\n", records) |
||||||
|
panic(err) |
||||||
|
} |
||||||
|
|
||||||
|
if !reflect.DeepEqual(records, result) { |
||||||
|
fmt.Println("records = \n", records) |
||||||
|
fmt.Println("result = \n", records) |
||||||
|
panic("not equal") |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
return score |
||||||
|
} |
@ -0,0 +1,402 @@ |
|||||||
|
// Copyright 2011 The Go Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style
|
||||||
|
// license that can be found in https://golang.org/LICENSE
|
||||||
|
|
||||||
|
// Package csv reads and writes comma-separated values (CSV) files.
|
||||||
|
// There are many kinds of CSV files; this package supports the format
|
||||||
|
// described in RFC 4180.
|
||||||
|
//
|
||||||
|
// A csv file contains zero or more records of one or more fields per record.
|
||||||
|
// Each record is separated by the newline character. The final record may
|
||||||
|
// optionally be followed by a newline character.
|
||||||
|
//
|
||||||
|
// field1,field2,field3
|
||||||
|
//
|
||||||
|
// White space is considered part of a field.
|
||||||
|
//
|
||||||
|
// Carriage returns before newline characters are silently removed.
|
||||||
|
//
|
||||||
|
// Blank lines are ignored. A line with only whitespace characters (excluding
|
||||||
|
// the ending newline character) is not considered a blank line.
|
||||||
|
//
|
||||||
|
// Fields which start and stop with the quote character " are called
|
||||||
|
// quoted-fields. The beginning and ending quote are not part of the
|
||||||
|
// field.
|
||||||
|
//
|
||||||
|
// The source:
|
||||||
|
//
|
||||||
|
// normal string,"quoted-field"
|
||||||
|
//
|
||||||
|
// results in the fields
|
||||||
|
//
|
||||||
|
// {`normal string`, `quoted-field`}
|
||||||
|
//
|
||||||
|
// Within a quoted-field a quote character followed by a second quote
|
||||||
|
// character is considered a single quote.
|
||||||
|
//
|
||||||
|
// "the ""word"" is true","a ""quoted-field"""
|
||||||
|
//
|
||||||
|
// results in
|
||||||
|
//
|
||||||
|
// {`the "word" is true`, `a "quoted-field"`}
|
||||||
|
//
|
||||||
|
// Newlines and commas may be included in a quoted-field
|
||||||
|
//
|
||||||
|
// "Multi-line
|
||||||
|
// field","comma is ,"
|
||||||
|
//
|
||||||
|
// results in
|
||||||
|
//
|
||||||
|
// {`Multi-line
|
||||||
|
// field`, `comma is ,`}
|
||||||
|
package csv |
||||||
|
|
||||||
|
import ( |
||||||
|
"bufio" |
||||||
|
"bytes" |
||||||
|
"errors" |
||||||
|
"fmt" |
||||||
|
"io" |
||||||
|
"unicode" |
||||||
|
"unicode/utf8" |
||||||
|
) |
||||||
|
|
||||||
|
// A ParseError is returned for parsing errors.
|
||||||
|
// Line numbers are 1-indexed and columns are 0-indexed.
|
||||||
|
type ParseError struct { |
||||||
|
StartLine int // Line where the record starts
|
||||||
|
Line int // Line where the error occurred
|
||||||
|
Column int // Column (rune index) where the error occurred
|
||||||
|
Err error // The actual error
|
||||||
|
} |
||||||
|
|
||||||
|
func (e *ParseError) Error() string { |
||||||
|
if e.Err == ErrFieldCount { |
||||||
|
return fmt.Sprintf("record on line %d: %v", e.Line, e.Err) |
||||||
|
} |
||||||
|
if e.StartLine != e.Line { |
||||||
|
return fmt.Sprintf("record on line %d; parse error on line %d, column %d: %v", e.StartLine, e.Line, e.Column, e.Err) |
||||||
|
} |
||||||
|
return fmt.Sprintf("parse error on line %d, column %d: %v", e.Line, e.Column, e.Err) |
||||||
|
} |
||||||
|
|
||||||
|
// Unwrap returns the underlying error
|
||||||
|
func (e *ParseError) Unwrap() error { return e.Err } |
||||||
|
|
||||||
|
// These are the errors that can be returned in ParseError.Err.
|
||||||
|
var ( |
||||||
|
ErrTrailingComma = errors.New("extra delimiter at end of line") // Deprecated: No longer used.
|
||||||
|
ErrBareQuote = errors.New("bare \" in non-quoted-field") |
||||||
|
ErrQuote = errors.New("extraneous or missing \" in quoted-field") |
||||||
|
ErrFieldCount = errors.New("wrong number of fields") |
||||||
|
) |
||||||
|
|
||||||
|
var errInvalidDelim = errors.New("csv: invalid field or comment delimiter") |
||||||
|
|
||||||
|
func validDelim(r rune) bool { |
||||||
|
return r != 0 && r != '"' && r != '\r' && r != '\n' && utf8.ValidRune(r) && r != utf8.RuneError |
||||||
|
} |
||||||
|
|
||||||
|
// A Reader reads records from a CSV-encoded file.
|
||||||
|
//
|
||||||
|
// As returned by NewReader, a Reader expects input conforming to RFC 4180.
|
||||||
|
// The exported fields can be changed to customize the details before the
|
||||||
|
// first call to Read or ReadAll.
|
||||||
|
//
|
||||||
|
// The Reader converts all \r\n sequences in its input to plain \n,
|
||||||
|
// including in multiline field values, so that the returned data does
|
||||||
|
// not depend on which line-ending convention an input file uses.
|
||||||
|
type Reader struct { |
||||||
|
// Comma is the field delimiter.
|
||||||
|
// It is set to comma (',') by NewReader.
|
||||||
|
// Comma must be a valid rune and must not be \r, \n,
|
||||||
|
// or the Unicode replacement character (0xFFFD).
|
||||||
|
Comma rune |
||||||
|
|
||||||
|
// Comment, if not 0, is the comment character. Lines beginning with the
|
||||||
|
// Comment character without preceding whitespace are ignored.
|
||||||
|
// With leading whitespace the Comment character becomes part of the
|
||||||
|
// field, even if TrimLeadingSpace is true.
|
||||||
|
// Comment must be a valid rune and must not be \r, \n,
|
||||||
|
// or the Unicode replacement character (0xFFFD).
|
||||||
|
// It must also not be equal to Comma.
|
||||||
|
Comment rune |
||||||
|
|
||||||
|
// FieldsPerRecord is the number of expected fields per record.
|
||||||
|
// If FieldsPerRecord is positive, Read requires each record to
|
||||||
|
// have the given number of fields. If FieldsPerRecord is 0, Read sets it to
|
||||||
|
// the number of fields in the first record, so that future records must
|
||||||
|
// have the same field count. If FieldsPerRecord is negative, no check is
|
||||||
|
// made and records may have a variable number of fields.
|
||||||
|
FieldsPerRecord int |
||||||
|
|
||||||
|
// If LazyQuotes is true, a quote may appear in an unquoted field and a
|
||||||
|
// non-doubled quote may appear in a quoted field.
|
||||||
|
LazyQuotes bool |
||||||
|
|
||||||
|
// If TrimLeadingSpace is true, leading white space in a field is ignored.
|
||||||
|
// This is done even if the field delimiter, Comma, is white space.
|
||||||
|
TrimLeadingSpace bool |
||||||
|
|
||||||
|
// ReuseRecord controls whether calls to Read may return a slice sharing
|
||||||
|
// the backing array of the previous call's returned slice for performance.
|
||||||
|
// By default, each call to Read returns newly allocated memory owned by the caller.
|
||||||
|
ReuseRecord bool |
||||||
|
|
||||||
|
TrailingComma bool // Deprecated: No longer used.
|
||||||
|
|
||||||
|
r *bufio.Reader |
||||||
|
|
||||||
|
// numLine is the current line being read in the CSV file.
|
||||||
|
numLine int |
||||||
|
|
||||||
|
// rawBuffer is a line buffer only used by the readLine method.
|
||||||
|
rawBuffer []byte |
||||||
|
|
||||||
|
// recordBuffer holds the unescaped fields, one after another.
|
||||||
|
// The fields can be accessed by using the indexes in fieldIndexes.
|
||||||
|
// E.g., For the row `a,"b","c""d",e`, recordBuffer will contain `abc"de`
|
||||||
|
// and fieldIndexes will contain the indexes [1, 2, 5, 6].
|
||||||
|
recordBuffer []byte |
||||||
|
|
||||||
|
// fieldIndexes is an index of fields inside recordBuffer.
|
||||||
|
// The i'th field ends at offset fieldIndexes[i] in recordBuffer.
|
||||||
|
fieldIndexes []int |
||||||
|
|
||||||
|
// lastRecord is a record cache and only used when ReuseRecord == true.
|
||||||
|
lastRecord []string |
||||||
|
} |
||||||
|
|
||||||
|
// NewReader returns a new Reader that reads from r.
|
||||||
|
func NewReader(r io.Reader) *Reader { |
||||||
|
return &Reader{ |
||||||
|
Comma: ',', |
||||||
|
r: bufio.NewReader(r), |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
// Read reads one record (a slice of fields) from r.
|
||||||
|
// If the record has an unexpected number of fields,
|
||||||
|
// Read returns the record along with the error ErrFieldCount.
|
||||||
|
// Except for that case, Read always returns either a non-nil
|
||||||
|
// record or a non-nil error, but not both.
|
||||||
|
// If there is no data left to be read, Read returns nil, io.EOF.
|
||||||
|
// If ReuseRecord is true, the returned slice may be shared
|
||||||
|
// between multiple calls to Read.
|
||||||
|
func (r *Reader) Read() (record []string, err error) { |
||||||
|
if r.ReuseRecord { |
||||||
|
record, err = r.readRecord(r.lastRecord) |
||||||
|
r.lastRecord = record |
||||||
|
} else { |
||||||
|
record, err = r.readRecord(nil) |
||||||
|
} |
||||||
|
return record, err |
||||||
|
} |
||||||
|
|
||||||
|
// ReadAll reads all the remaining records from r.
|
||||||
|
// Each record is a slice of fields.
|
||||||
|
// A successful call returns err == nil, not err == io.EOF. Because ReadAll is
|
||||||
|
// defined to read until EOF, it does not treat end of file as an error to be
|
||||||
|
// reported.
|
||||||
|
func (r *Reader) ReadAll() (records [][]string, err error) { |
||||||
|
for { |
||||||
|
record, err := r.readRecord(nil) |
||||||
|
if err == io.EOF { |
||||||
|
return records, nil |
||||||
|
} |
||||||
|
if err != nil { |
||||||
|
return nil, err |
||||||
|
} |
||||||
|
records = append(records, record) |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
// readLine reads the next line (with the trailing endline).
|
||||||
|
// If EOF is hit without a trailing endline, it will be omitted.
|
||||||
|
// If some bytes were read, then the error is never io.EOF.
|
||||||
|
// The result is only valid until the next call to readLine.
|
||||||
|
func (r *Reader) readLine() ([]byte, error) { |
||||||
|
line, err := r.r.ReadSlice('\n') |
||||||
|
if err == bufio.ErrBufferFull { |
||||||
|
r.rawBuffer = append(r.rawBuffer[:0], line...) |
||||||
|
for err == bufio.ErrBufferFull { |
||||||
|
line, err = r.r.ReadSlice('\n') |
||||||
|
r.rawBuffer = append(r.rawBuffer, line...) |
||||||
|
} |
||||||
|
line = r.rawBuffer |
||||||
|
} |
||||||
|
if len(line) > 0 && err == io.EOF { |
||||||
|
err = nil |
||||||
|
// For backwards compatibility, drop trailing \r before EOF.
|
||||||
|
if line[len(line)-1] == '\r' { |
||||||
|
line = line[:len(line)-1] |
||||||
|
} |
||||||
|
} |
||||||
|
r.numLine++ |
||||||
|
// Normalize \r\n to \n on all input lines.
|
||||||
|
if n := len(line); n >= 2 && line[n-2] == '\r' && line[n-1] == '\n' { |
||||||
|
line[n-2] = '\n' |
||||||
|
line = line[:n-1] |
||||||
|
} |
||||||
|
return line, err |
||||||
|
} |
||||||
|
|
||||||
|
// lengthNL reports the number of bytes for the trailing \n.
|
||||||
|
func lengthNL(b []byte) int { |
||||||
|
if len(b) > 0 && b[len(b)-1] == '\n' { |
||||||
|
return 1 |
||||||
|
} |
||||||
|
return 0 |
||||||
|
} |
||||||
|
|
||||||
|
// nextRune returns the next rune in b or utf8.RuneError.
|
||||||
|
func nextRune(b []byte) rune { |
||||||
|
r, _ := utf8.DecodeRune(b) |
||||||
|
return r |
||||||
|
} |
||||||
|
|
||||||
|
func (r *Reader) readRecord(dst []string) ([]string, error) { |
||||||
|
if r.Comma == r.Comment || !validDelim(r.Comma) || (r.Comment != 0 && !validDelim(r.Comment)) { |
||||||
|
return nil, errInvalidDelim |
||||||
|
} |
||||||
|
|
||||||
|
// Read line (automatically skipping past empty lines and any comments).
|
||||||
|
var line, fullLine []byte |
||||||
|
var errRead error |
||||||
|
for errRead == nil { |
||||||
|
line, errRead = r.readLine() |
||||||
|
if r.Comment != 0 && nextRune(line) == r.Comment { |
||||||
|
line = nil |
||||||
|
continue // Skip comment lines
|
||||||
|
} |
||||||
|
if errRead == nil && len(line) == lengthNL(line) { |
||||||
|
line = nil |
||||||
|
continue // Skip empty lines
|
||||||
|
} |
||||||
|
fullLine = line |
||||||
|
break |
||||||
|
} |
||||||
|
if errRead == io.EOF { |
||||||
|
return nil, errRead |
||||||
|
} |
||||||
|
|
||||||
|
// Parse each field in the record.
|
||||||
|
var err error |
||||||
|
const quoteLen = len(`"`) |
||||||
|
commaLen := utf8.RuneLen(r.Comma) |
||||||
|
recLine := r.numLine // Starting line for record
|
||||||
|
r.recordBuffer = r.recordBuffer[:0] |
||||||
|
r.fieldIndexes = r.fieldIndexes[:0] |
||||||
|
parseField: |
||||||
|
for { |
||||||
|
if r.TrimLeadingSpace { |
||||||
|
line = bytes.TrimLeftFunc(line, unicode.IsSpace) |
||||||
|
} |
||||||
|
if len(line) == 0 || line[0] != '"' { |
||||||
|
// Non-quoted string field
|
||||||
|
i := bytes.IndexRune(line, r.Comma) |
||||||
|
field := line |
||||||
|
if i >= 0 { |
||||||
|
field = field[:i] |
||||||
|
} else { |
||||||
|
field = field[:len(field)-lengthNL(field)] |
||||||
|
} |
||||||
|
// Check to make sure a quote does not appear in field.
|
||||||
|
if !r.LazyQuotes { |
||||||
|
if j := bytes.IndexByte(field, '"'); j >= 0 { |
||||||
|
col := utf8.RuneCount(fullLine[:len(fullLine)-len(line[j:])]) |
||||||
|
err = &ParseError{StartLine: recLine, Line: r.numLine, Column: col, Err: ErrBareQuote} |
||||||
|
break parseField |
||||||
|
} |
||||||
|
} |
||||||
|
r.recordBuffer = append(r.recordBuffer, field...) |
||||||
|
r.fieldIndexes = append(r.fieldIndexes, len(r.recordBuffer)) |
||||||
|
if i >= 0 { |
||||||
|
line = line[i+commaLen:] |
||||||
|
continue parseField |
||||||
|
} |
||||||
|
break parseField |
||||||
|
} else { |
||||||
|
// Quoted string field
|
||||||
|
line = line[quoteLen:] |
||||||
|
for { |
||||||
|
i := bytes.IndexByte(line, '"') |
||||||
|
if i >= 0 { |
||||||
|
// Hit next quote.
|
||||||
|
r.recordBuffer = append(r.recordBuffer, line[:i]...) |
||||||
|
line = line[i+quoteLen:] |
||||||
|
switch rn := nextRune(line); { |
||||||
|
case rn == '"': |
||||||
|
// `""` sequence (append quote).
|
||||||
|
r.recordBuffer = append(r.recordBuffer, '"') |
||||||
|
line = line[quoteLen:] |
||||||
|
case rn == r.Comma: |
||||||
|
// `",` sequence (end of field).
|
||||||
|
line = line[commaLen:] |
||||||
|
r.fieldIndexes = append(r.fieldIndexes, len(r.recordBuffer)) |
||||||
|
continue parseField |
||||||
|
case lengthNL(line) == len(line): |
||||||
|
// `"\n` sequence (end of line).
|
||||||
|
r.fieldIndexes = append(r.fieldIndexes, len(r.recordBuffer)) |
||||||
|
break parseField |
||||||
|
case r.LazyQuotes: |
||||||
|
// `"` sequence (bare quote).
|
||||||
|
r.recordBuffer = append(r.recordBuffer, '"') |
||||||
|
default: |
||||||
|
// `"*` sequence (invalid non-escaped quote).
|
||||||
|
col := utf8.RuneCount(fullLine[:len(fullLine)-len(line)-quoteLen]) |
||||||
|
err = &ParseError{StartLine: recLine, Line: r.numLine, Column: col, Err: ErrQuote} |
||||||
|
break parseField |
||||||
|
} |
||||||
|
} else if len(line) > 0 { |
||||||
|
// Hit end of line (copy all data so far).
|
||||||
|
r.recordBuffer = append(r.recordBuffer, line...) |
||||||
|
if errRead != nil { |
||||||
|
break parseField |
||||||
|
} |
||||||
|
line, errRead = r.readLine() |
||||||
|
if errRead == io.EOF { |
||||||
|
errRead = nil |
||||||
|
} |
||||||
|
fullLine = line |
||||||
|
} else { |
||||||
|
// Abrupt end of file (EOF or error).
|
||||||
|
if !r.LazyQuotes && errRead == nil { |
||||||
|
col := utf8.RuneCount(fullLine) |
||||||
|
err = &ParseError{StartLine: recLine, Line: r.numLine, Column: col, Err: ErrQuote} |
||||||
|
break parseField |
||||||
|
} |
||||||
|
r.fieldIndexes = append(r.fieldIndexes, len(r.recordBuffer)) |
||||||
|
break parseField |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
if err == nil { |
||||||
|
err = errRead |
||||||
|
} |
||||||
|
|
||||||
|
// Create a single string and create slices out of it.
|
||||||
|
// This pins the memory of the fields together, but allocates once.
|
||||||
|
str := string(r.recordBuffer) // Convert to string once to batch allocations
|
||||||
|
dst = dst[:0] |
||||||
|
if cap(dst) < len(r.fieldIndexes) { |
||||||
|
dst = make([]string, len(r.fieldIndexes)) |
||||||
|
} |
||||||
|
dst = dst[:len(r.fieldIndexes)] |
||||||
|
var preIdx int |
||||||
|
for i, idx := range r.fieldIndexes { |
||||||
|
dst[i] = str[preIdx:idx] |
||||||
|
preIdx = idx |
||||||
|
} |
||||||
|
|
||||||
|
// Check or update the expected fields per record.
|
||||||
|
if r.FieldsPerRecord > 0 { |
||||||
|
if len(dst) != r.FieldsPerRecord && err == nil { |
||||||
|
err = &ParseError{StartLine: recLine, Line: recLine, Err: ErrFieldCount} |
||||||
|
} |
||||||
|
} else if r.FieldsPerRecord == 0 { |
||||||
|
r.FieldsPerRecord = len(dst) |
||||||
|
} |
||||||
|
return dst, err |
||||||
|
} |
@ -0,0 +1,509 @@ |
|||||||
|
// Copyright 2011 The Go Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style
|
||||||
|
// license that can be found in https://golang.org/LICENSE
|
||||||
|
|
||||||
|
package csv |
||||||
|
|
||||||
|
import ( |
||||||
|
"io" |
||||||
|
"reflect" |
||||||
|
"strings" |
||||||
|
"testing" |
||||||
|
"unicode/utf8" |
||||||
|
) |
||||||
|
|
||||||
|
func TestRead(t *testing.T) { |
||||||
|
tests := []struct { |
||||||
|
Name string |
||||||
|
Input string |
||||||
|
Output [][]string |
||||||
|
Error error |
||||||
|
|
||||||
|
// These fields are copied into the Reader
|
||||||
|
Comma rune |
||||||
|
Comment rune |
||||||
|
UseFieldsPerRecord bool // false (default) means FieldsPerRecord is -1
|
||||||
|
FieldsPerRecord int |
||||||
|
LazyQuotes bool |
||||||
|
TrimLeadingSpace bool |
||||||
|
ReuseRecord bool |
||||||
|
}{{ |
||||||
|
Name: "Simple", |
||||||
|
Input: "a,b,c\n", |
||||||
|
Output: [][]string{{"a", "b", "c"}}, |
||||||
|
}, { |
||||||
|
Name: "CRLF", |
||||||
|
Input: "a,b\r\nc,d\r\n", |
||||||
|
Output: [][]string{{"a", "b"}, {"c", "d"}}, |
||||||
|
}, { |
||||||
|
Name: "BareCR", |
||||||
|
Input: "a,b\rc,d\r\n", |
||||||
|
Output: [][]string{{"a", "b\rc", "d"}}, |
||||||
|
}, { |
||||||
|
Name: "RFC4180test", |
||||||
|
Input: `#field1,field2,field3 |
||||||
|
"aaa","bb |
||||||
|
b","ccc" |
||||||
|
"a,a","b""bb","ccc" |
||||||
|
zzz,yyy,xxx |
||||||
|
`, |
||||||
|
Output: [][]string{ |
||||||
|
{"#field1", "field2", "field3"}, |
||||||
|
{"aaa", "bb\nb", "ccc"}, |
||||||
|
{"a,a", `b"bb`, "ccc"}, |
||||||
|
{"zzz", "yyy", "xxx"}, |
||||||
|
}, |
||||||
|
UseFieldsPerRecord: true, |
||||||
|
FieldsPerRecord: 0, |
||||||
|
}, { |
||||||
|
Name: "NoEOLTest", |
||||||
|
Input: "a,b,c", |
||||||
|
Output: [][]string{{"a", "b", "c"}}, |
||||||
|
}, { |
||||||
|
Name: "Semicolon", |
||||||
|
Input: "a;b;c\n", |
||||||
|
Output: [][]string{{"a", "b", "c"}}, |
||||||
|
Comma: ';', |
||||||
|
}, { |
||||||
|
Name: "MultiLine", |
||||||
|
Input: `"two |
||||||
|
line","one line","three |
||||||
|
line |
||||||
|
field"`, |
||||||
|
Output: [][]string{{"two\nline", "one line", "three\nline\nfield"}}, |
||||||
|
}, { |
||||||
|
Name: "BlankLine", |
||||||
|
Input: "a,b,c\n\nd,e,f\n\n", |
||||||
|
Output: [][]string{ |
||||||
|
{"a", "b", "c"}, |
||||||
|
{"d", "e", "f"}, |
||||||
|
}, |
||||||
|
}, { |
||||||
|
Name: "BlankLineFieldCount", |
||||||
|
Input: "a,b,c\n\nd,e,f\n\n", |
||||||
|
Output: [][]string{ |
||||||
|
{"a", "b", "c"}, |
||||||
|
{"d", "e", "f"}, |
||||||
|
}, |
||||||
|
UseFieldsPerRecord: true, |
||||||
|
FieldsPerRecord: 0, |
||||||
|
}, { |
||||||
|
Name: "TrimSpace", |
||||||
|
Input: " a, b, c\n", |
||||||
|
Output: [][]string{{"a", "b", "c"}}, |
||||||
|
TrimLeadingSpace: true, |
||||||
|
}, { |
||||||
|
Name: "LeadingSpace", |
||||||
|
Input: " a, b, c\n", |
||||||
|
Output: [][]string{{" a", " b", " c"}}, |
||||||
|
}, { |
||||||
|
Name: "Comment", |
||||||
|
Input: "#1,2,3\na,b,c\n#comment", |
||||||
|
Output: [][]string{{"a", "b", "c"}}, |
||||||
|
Comment: '#', |
||||||
|
}, { |
||||||
|
Name: "NoComment", |
||||||
|
Input: "#1,2,3\na,b,c", |
||||||
|
Output: [][]string{{"#1", "2", "3"}, {"a", "b", "c"}}, |
||||||
|
}, { |
||||||
|
Name: "LazyQuotes", |
||||||
|
Input: `a "word","1"2",a","b`, |
||||||
|
Output: [][]string{{`a "word"`, `1"2`, `a"`, `b`}}, |
||||||
|
LazyQuotes: true, |
||||||
|
}, { |
||||||
|
Name: "BareQuotes", |
||||||
|
Input: `a "word","1"2",a"`, |
||||||
|
Output: [][]string{{`a "word"`, `1"2`, `a"`}}, |
||||||
|
LazyQuotes: true, |
||||||
|
}, { |
||||||
|
Name: "BareDoubleQuotes", |
||||||
|
Input: `a""b,c`, |
||||||
|
Output: [][]string{{`a""b`, `c`}}, |
||||||
|
LazyQuotes: true, |
||||||
|
}, { |
||||||
|
Name: "BadDoubleQuotes", |
||||||
|
Input: `a""b,c`, |
||||||
|
Error: &ParseError{StartLine: 1, Line: 1, Column: 1, Err: ErrBareQuote}, |
||||||
|
}, { |
||||||
|
Name: "TrimQuote", |
||||||
|
Input: ` "a"," b",c`, |
||||||
|
Output: [][]string{{"a", " b", "c"}}, |
||||||
|
TrimLeadingSpace: true, |
||||||
|
}, { |
||||||
|
Name: "BadBareQuote", |
||||||
|
Input: `a "word","b"`, |
||||||
|
Error: &ParseError{StartLine: 1, Line: 1, Column: 2, Err: ErrBareQuote}, |
||||||
|
}, { |
||||||
|
Name: "BadTrailingQuote", |
||||||
|
Input: `"a word",b"`, |
||||||
|
Error: &ParseError{StartLine: 1, Line: 1, Column: 10, Err: ErrBareQuote}, |
||||||
|
}, { |
||||||
|
Name: "ExtraneousQuote", |
||||||
|
Input: `"a "word","b"`, |
||||||
|
Error: &ParseError{StartLine: 1, Line: 1, Column: 3, Err: ErrQuote}, |
||||||
|
}, { |
||||||
|
Name: "BadFieldCount", |
||||||
|
Input: "a,b,c\nd,e", |
||||||
|
Error: &ParseError{StartLine: 2, Line: 2, Err: ErrFieldCount}, |
||||||
|
UseFieldsPerRecord: true, |
||||||
|
FieldsPerRecord: 0, |
||||||
|
}, { |
||||||
|
Name: "BadFieldCount1", |
||||||
|
Input: `a,b,c`, |
||||||
|
Error: &ParseError{StartLine: 1, Line: 1, Err: ErrFieldCount}, |
||||||
|
UseFieldsPerRecord: true, |
||||||
|
FieldsPerRecord: 2, |
||||||
|
}, { |
||||||
|
Name: "FieldCount", |
||||||
|
Input: "a,b,c\nd,e", |
||||||
|
Output: [][]string{{"a", "b", "c"}, {"d", "e"}}, |
||||||
|
}, { |
||||||
|
Name: "TrailingCommaEOF", |
||||||
|
Input: "a,b,c,", |
||||||
|
Output: [][]string{{"a", "b", "c", ""}}, |
||||||
|
}, { |
||||||
|
Name: "TrailingCommaEOL", |
||||||
|
Input: "a,b,c,\n", |
||||||
|
Output: [][]string{{"a", "b", "c", ""}}, |
||||||
|
}, { |
||||||
|
Name: "TrailingCommaSpaceEOF", |
||||||
|
Input: "a,b,c, ", |
||||||
|
Output: [][]string{{"a", "b", "c", ""}}, |
||||||
|
TrimLeadingSpace: true, |
||||||
|
}, { |
||||||
|
Name: "TrailingCommaSpaceEOL", |
||||||
|
Input: "a,b,c, \n", |
||||||
|
Output: [][]string{{"a", "b", "c", ""}}, |
||||||
|
TrimLeadingSpace: true, |
||||||
|
}, { |
||||||
|
Name: "TrailingCommaLine3", |
||||||
|
Input: "a,b,c\nd,e,f\ng,hi,", |
||||||
|
Output: [][]string{{"a", "b", "c"}, {"d", "e", "f"}, {"g", "hi", ""}}, |
||||||
|
TrimLeadingSpace: true, |
||||||
|
}, { |
||||||
|
Name: "NotTrailingComma3", |
||||||
|
Input: "a,b,c, \n", |
||||||
|
Output: [][]string{{"a", "b", "c", " "}}, |
||||||
|
}, { |
||||||
|
Name: "CommaFieldTest", |
||||||
|
Input: `x,y,z,w |
||||||
|
x,y,z, |
||||||
|
x,y,, |
||||||
|
x,,, |
||||||
|
,,, |
||||||
|
"x","y","z","w" |
||||||
|
"x","y","z","" |
||||||
|
"x","y","","" |
||||||
|
"x","","","" |
||||||
|
"","","","" |
||||||
|
`, |
||||||
|
Output: [][]string{ |
||||||
|
{"x", "y", "z", "w"}, |
||||||
|
{"x", "y", "z", ""}, |
||||||
|
{"x", "y", "", ""}, |
||||||
|
{"x", "", "", ""}, |
||||||
|
{"", "", "", ""}, |
||||||
|
{"x", "y", "z", "w"}, |
||||||
|
{"x", "y", "z", ""}, |
||||||
|
{"x", "y", "", ""}, |
||||||
|
{"x", "", "", ""}, |
||||||
|
{"", "", "", ""}, |
||||||
|
}, |
||||||
|
}, { |
||||||
|
Name: "TrailingCommaIneffective1", |
||||||
|
Input: "a,b,\nc,d,e", |
||||||
|
Output: [][]string{ |
||||||
|
{"a", "b", ""}, |
||||||
|
{"c", "d", "e"}, |
||||||
|
}, |
||||||
|
TrimLeadingSpace: true, |
||||||
|
}, { |
||||||
|
Name: "ReadAllReuseRecord", |
||||||
|
Input: "a,b\nc,d", |
||||||
|
Output: [][]string{ |
||||||
|
{"a", "b"}, |
||||||
|
{"c", "d"}, |
||||||
|
}, |
||||||
|
ReuseRecord: true, |
||||||
|
}, { |
||||||
|
Name: "StartLine1", // Issue 19019
|
||||||
|
Input: "a,\"b\nc\"d,e", |
||||||
|
Error: &ParseError{StartLine: 1, Line: 2, Column: 1, Err: ErrQuote}, |
||||||
|
}, { |
||||||
|
Name: "StartLine2", |
||||||
|
Input: "a,b\n\"d\n\n,e", |
||||||
|
Error: &ParseError{StartLine: 2, Line: 5, Column: 0, Err: ErrQuote}, |
||||||
|
}, { |
||||||
|
Name: "CRLFInQuotedField", // Issue 21201
|
||||||
|
Input: "A,\"Hello\r\nHi\",B\r\n", |
||||||
|
Output: [][]string{ |
||||||
|
{"A", "Hello\nHi", "B"}, |
||||||
|
}, |
||||||
|
}, { |
||||||
|
Name: "BinaryBlobField", // Issue 19410
|
||||||
|
Input: "x09\x41\xb4\x1c,aktau", |
||||||
|
Output: [][]string{{"x09A\xb4\x1c", "aktau"}}, |
||||||
|
}, { |
||||||
|
Name: "TrailingCR", |
||||||
|
Input: "field1,field2\r", |
||||||
|
Output: [][]string{{"field1", "field2"}}, |
||||||
|
}, { |
||||||
|
Name: "QuotedTrailingCR", |
||||||
|
Input: "\"field\"\r", |
||||||
|
Output: [][]string{{"field"}}, |
||||||
|
}, { |
||||||
|
Name: "QuotedTrailingCRCR", |
||||||
|
Input: "\"field\"\r\r", |
||||||
|
Error: &ParseError{StartLine: 1, Line: 1, Column: 6, Err: ErrQuote}, |
||||||
|
}, { |
||||||
|
Name: "FieldCR", |
||||||
|
Input: "field\rfield\r", |
||||||
|
Output: [][]string{{"field\rfield"}}, |
||||||
|
}, { |
||||||
|
Name: "FieldCRCR", |
||||||
|
Input: "field\r\rfield\r\r", |
||||||
|
Output: [][]string{{"field\r\rfield\r"}}, |
||||||
|
}, { |
||||||
|
Name: "FieldCRCRLF", |
||||||
|
Input: "field\r\r\nfield\r\r\n", |
||||||
|
Output: [][]string{{"field\r"}, {"field\r"}}, |
||||||
|
}, { |
||||||
|
Name: "FieldCRCRLFCR", |
||||||
|
Input: "field\r\r\n\rfield\r\r\n\r", |
||||||
|
Output: [][]string{{"field\r"}, {"\rfield\r"}}, |
||||||
|
}, { |
||||||
|
Name: "FieldCRCRLFCRCR", |
||||||
|
Input: "field\r\r\n\r\rfield\r\r\n\r\r", |
||||||
|
Output: [][]string{{"field\r"}, {"\r\rfield\r"}, {"\r"}}, |
||||||
|
}, { |
||||||
|
Name: "MultiFieldCRCRLFCRCR", |
||||||
|
Input: "field1,field2\r\r\n\r\rfield1,field2\r\r\n\r\r,", |
||||||
|
Output: [][]string{ |
||||||
|
{"field1", "field2\r"}, |
||||||
|
{"\r\rfield1", "field2\r"}, |
||||||
|
{"\r\r", ""}, |
||||||
|
}, |
||||||
|
}, { |
||||||
|
Name: "NonASCIICommaAndComment", |
||||||
|
Input: "a£b,c£ \td,e\n€ comment\n", |
||||||
|
Output: [][]string{{"a", "b,c", "d,e"}}, |
||||||
|
TrimLeadingSpace: true, |
||||||
|
Comma: '£', |
||||||
|
Comment: '€', |
||||||
|
}, { |
||||||
|
Name: "NonASCIICommaAndCommentWithQuotes", |
||||||
|
Input: "a€\" b,\"€ c\nλ comment\n", |
||||||
|
Output: [][]string{{"a", " b,", " c"}}, |
||||||
|
Comma: '€', |
||||||
|
Comment: 'λ', |
||||||
|
}, { |
||||||
|
// λ and θ start with the same byte.
|
||||||
|
// This tests that the parser doesn't confuse such characters.
|
||||||
|
Name: "NonASCIICommaConfusion", |
||||||
|
Input: "\"abθcd\"λefθgh", |
||||||
|
Output: [][]string{{"abθcd", "efθgh"}}, |
||||||
|
Comma: 'λ', |
||||||
|
Comment: '€', |
||||||
|
}, { |
||||||
|
Name: "NonASCIICommentConfusion", |
||||||
|
Input: "λ\nλ\nθ\nλ\n", |
||||||
|
Output: [][]string{{"λ"}, {"λ"}, {"λ"}}, |
||||||
|
Comment: 'θ', |
||||||
|
}, { |
||||||
|
Name: "QuotedFieldMultipleLF", |
||||||
|
Input: "\"\n\n\n\n\"", |
||||||
|
Output: [][]string{{"\n\n\n\n"}}, |
||||||
|
}, { |
||||||
|
Name: "MultipleCRLF", |
||||||
|
Input: "\r\n\r\n\r\n\r\n", |
||||||
|
}, { |
||||||
|
// The implementation may read each line in several chunks if it doesn't fit entirely
|
||||||
|
// in the read buffer, so we should test the code to handle that condition.
|
||||||
|
Name: "HugeLines", |
||||||
|
Input: strings.Repeat("#ignore\n", 10000) + strings.Repeat("@", 5000) + "," + strings.Repeat("*", 5000), |
||||||
|
Output: [][]string{{strings.Repeat("@", 5000), strings.Repeat("*", 5000)}}, |
||||||
|
Comment: '#', |
||||||
|
}, { |
||||||
|
Name: "QuoteWithTrailingCRLF", |
||||||
|
Input: "\"foo\"bar\"\r\n", |
||||||
|
Error: &ParseError{StartLine: 1, Line: 1, Column: 4, Err: ErrQuote}, |
||||||
|
}, { |
||||||
|
Name: "LazyQuoteWithTrailingCRLF", |
||||||
|
Input: "\"foo\"bar\"\r\n", |
||||||
|
Output: [][]string{{`foo"bar`}}, |
||||||
|
LazyQuotes: true, |
||||||
|
}, { |
||||||
|
Name: "DoubleQuoteWithTrailingCRLF", |
||||||
|
Input: "\"foo\"\"bar\"\r\n", |
||||||
|
Output: [][]string{{`foo"bar`}}, |
||||||
|
}, { |
||||||
|
Name: "EvenQuotes", |
||||||
|
Input: `""""""""`, |
||||||
|
Output: [][]string{{`"""`}}, |
||||||
|
}, { |
||||||
|
Name: "OddQuotes", |
||||||
|
Input: `"""""""`, |
||||||
|
Error: &ParseError{StartLine: 1, Line: 1, Column: 7, Err: ErrQuote}, |
||||||
|
}, { |
||||||
|
Name: "LazyOddQuotes", |
||||||
|
Input: `"""""""`, |
||||||
|
Output: [][]string{{`"""`}}, |
||||||
|
LazyQuotes: true, |
||||||
|
}, { |
||||||
|
Name: "BadComma1", |
||||||
|
Comma: '\n', |
||||||
|
Error: errInvalidDelim, |
||||||
|
}, { |
||||||
|
Name: "BadComma2", |
||||||
|
Comma: '\r', |
||||||
|
Error: errInvalidDelim, |
||||||
|
}, { |
||||||
|
Name: "BadComma3", |
||||||
|
Comma: '"', |
||||||
|
Error: errInvalidDelim, |
||||||
|
}, { |
||||||
|
Name: "BadComma4", |
||||||
|
Comma: utf8.RuneError, |
||||||
|
Error: errInvalidDelim, |
||||||
|
}, { |
||||||
|
Name: "BadComment1", |
||||||
|
Comment: '\n', |
||||||
|
Error: errInvalidDelim, |
||||||
|
}, { |
||||||
|
Name: "BadComment2", |
||||||
|
Comment: '\r', |
||||||
|
Error: errInvalidDelim, |
||||||
|
}, { |
||||||
|
Name: "BadComment3", |
||||||
|
Comment: utf8.RuneError, |
||||||
|
Error: errInvalidDelim, |
||||||
|
}, { |
||||||
|
Name: "BadCommaComment", |
||||||
|
Comma: 'X', |
||||||
|
Comment: 'X', |
||||||
|
Error: errInvalidDelim, |
||||||
|
}} |
||||||
|
|
||||||
|
for _, tt := range tests { |
||||||
|
t.Run(tt.Name, func(t *testing.T) { |
||||||
|
r := NewReader(strings.NewReader(tt.Input)) |
||||||
|
|
||||||
|
if tt.Comma != 0 { |
||||||
|
r.Comma = tt.Comma |
||||||
|
} |
||||||
|
r.Comment = tt.Comment |
||||||
|
if tt.UseFieldsPerRecord { |
||||||
|
r.FieldsPerRecord = tt.FieldsPerRecord |
||||||
|
} else { |
||||||
|
r.FieldsPerRecord = -1 |
||||||
|
} |
||||||
|
r.LazyQuotes = tt.LazyQuotes |
||||||
|
r.TrimLeadingSpace = tt.TrimLeadingSpace |
||||||
|
r.ReuseRecord = tt.ReuseRecord |
||||||
|
|
||||||
|
out, err := r.ReadAll() |
||||||
|
if !reflect.DeepEqual(err, tt.Error) { |
||||||
|
t.Errorf("ReadAll() error:\ngot %v\nwant %v", err, tt.Error) |
||||||
|
} else if !reflect.DeepEqual(out, tt.Output) { |
||||||
|
t.Errorf("ReadAll() output:\ngot %q\nwant %q", out, tt.Output) |
||||||
|
} |
||||||
|
}) |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
// nTimes is an io.Reader which yields the string s n times.
|
||||||
|
type nTimes struct { |
||||||
|
s string |
||||||
|
n int |
||||||
|
off int |
||||||
|
} |
||||||
|
|
||||||
|
func (r *nTimes) Read(p []byte) (n int, err error) { |
||||||
|
for { |
||||||
|
if r.n <= 0 || r.s == "" { |
||||||
|
return n, io.EOF |
||||||
|
} |
||||||
|
n0 := copy(p, r.s[r.off:]) |
||||||
|
p = p[n0:] |
||||||
|
n += n0 |
||||||
|
r.off += n0 |
||||||
|
if r.off == len(r.s) { |
||||||
|
r.off = 0 |
||||||
|
r.n-- |
||||||
|
} |
||||||
|
if len(p) == 0 { |
||||||
|
return |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
// benchmarkRead measures reading the provided CSV rows data.
|
||||||
|
// initReader, if non-nil, modifies the Reader before it's used.
|
||||||
|
func benchmarkRead(b *testing.B, initReader func(*Reader), rows string) { |
||||||
|
b.ReportAllocs() |
||||||
|
r := NewReader(&nTimes{s: rows, n: b.N}) |
||||||
|
if initReader != nil { |
||||||
|
initReader(r) |
||||||
|
} |
||||||
|
for { |
||||||
|
_, err := r.Read() |
||||||
|
if err == io.EOF { |
||||||
|
break |
||||||
|
} |
||||||
|
if err != nil { |
||||||
|
b.Fatal(err) |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
const benchmarkCSVData = `x,y,z,w |
||||||
|
x,y,z, |
||||||
|
x,y,, |
||||||
|
x,,, |
||||||
|
,,, |
||||||
|
"x","y","z","w" |
||||||
|
"x","y","z","" |
||||||
|
"x","y","","" |
||||||
|
"x","","","" |
||||||
|
"","","","" |
||||||
|
` |
||||||
|
|
||||||
|
func BenchmarkRead(b *testing.B) { |
||||||
|
benchmarkRead(b, nil, benchmarkCSVData) |
||||||
|
} |
||||||
|
|
||||||
|
func BenchmarkReadWithFieldsPerRecord(b *testing.B) { |
||||||
|
benchmarkRead(b, func(r *Reader) { r.FieldsPerRecord = 4 }, benchmarkCSVData) |
||||||
|
} |
||||||
|
|
||||||
|
func BenchmarkReadWithoutFieldsPerRecord(b *testing.B) { |
||||||
|
benchmarkRead(b, func(r *Reader) { r.FieldsPerRecord = -1 }, benchmarkCSVData) |
||||||
|
} |
||||||
|
|
||||||
|
func BenchmarkReadLargeFields(b *testing.B) { |
||||||
|
benchmarkRead(b, nil, strings.Repeat(`xxxxxxxxxxxxxxxx,yyyyyyyyyyyyyyyy,zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv |
||||||
|
xxxxxxxxxxxxxxxxxxxxxxxx,yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy,zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvv |
||||||
|
,,zzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv |
||||||
|
xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx,yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy,zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv |
||||||
|
`, 3)) |
||||||
|
} |
||||||
|
|
||||||
|
func BenchmarkReadReuseRecord(b *testing.B) { |
||||||
|
benchmarkRead(b, func(r *Reader) { r.ReuseRecord = true }, benchmarkCSVData) |
||||||
|
} |
||||||
|
|
||||||
|
func BenchmarkReadReuseRecordWithFieldsPerRecord(b *testing.B) { |
||||||
|
benchmarkRead(b, func(r *Reader) { r.ReuseRecord = true; r.FieldsPerRecord = 4 }, benchmarkCSVData) |
||||||
|
} |
||||||
|
|
||||||
|
func BenchmarkReadReuseRecordWithoutFieldsPerRecord(b *testing.B) { |
||||||
|
benchmarkRead(b, func(r *Reader) { r.ReuseRecord = true; r.FieldsPerRecord = -1 }, benchmarkCSVData) |
||||||
|
} |
||||||
|
|
||||||
|
func BenchmarkReadReuseRecordLargeFields(b *testing.B) { |
||||||
|
benchmarkRead(b, func(r *Reader) { r.ReuseRecord = true }, strings.Repeat(`xxxxxxxxxxxxxxxx,yyyyyyyyyyyyyyyy,zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv |
||||||
|
xxxxxxxxxxxxxxxxxxxxxxxx,yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy,zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvv |
||||||
|
,,zzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv |
||||||
|
xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx,yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy,zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv |
||||||
|
`, 3)) |
||||||
|
} |
@ -0,0 +1,167 @@ |
|||||||
|
// Copyright 2011 The Go Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style
|
||||||
|
// license that can be found in https://golang.org/LICENSE
|
||||||
|
|
||||||
|
package csv |
||||||
|
|
||||||
|
import ( |
||||||
|
"bufio" |
||||||
|
"io" |
||||||
|
"strings" |
||||||
|
"unicode" |
||||||
|
"unicode/utf8" |
||||||
|
) |
||||||
|
|
||||||
|
// A Writer writes records using CSV encoding.
|
||||||
|
//
|
||||||
|
// As returned by NewWriter, a Writer writes records terminated by a
|
||||||
|
// newline and uses ',' as the field delimiter. The exported fields can be
|
||||||
|
// changed to customize the details before the first call to Write or WriteAll.
|
||||||
|
//
|
||||||
|
// Comma is the field delimiter.
|
||||||
|
//
|
||||||
|
// If UseCRLF is true, the Writer ends each output line with \r\n instead of \n.
|
||||||
|
//
|
||||||
|
// The writes of individual records are buffered.
|
||||||
|
// After all data has been written, the client should call the
|
||||||
|
// Flush method to guarantee all data has been forwarded to
|
||||||
|
// the underlying io.Writer. Any errors that occurred should
|
||||||
|
// be checked by calling the Error method.
|
||||||
|
type Writer struct { |
||||||
|
Comma rune // Field delimiter (set to ',' by NewWriter)
|
||||||
|
UseCRLF bool // True to use \r\n as the line terminator
|
||||||
|
w *bufio.Writer |
||||||
|
} |
||||||
|
|
||||||
|
// NewWriter returns a new Writer that writes to w.
|
||||||
|
func NewWriter(w io.Writer) *Writer { |
||||||
|
return &Writer{ |
||||||
|
Comma: ',', |
||||||
|
w: bufio.NewWriter(w), |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
// Write writes a single CSV record to w along with any necessary quoting.
|
||||||
|
// A record is a slice of strings with each string being one field.
|
||||||
|
// Writes are buffered, so Flush must eventually be called to ensure
|
||||||
|
// that the record is written to the underlying io.Writer.
|
||||||
|
func (w *Writer) Write(record []string) error { |
||||||
|
if !validDelim(w.Comma) { |
||||||
|
return errInvalidDelim |
||||||
|
} |
||||||
|
|
||||||
|
for n, field := range record { |
||||||
|
if n > 0 { |
||||||
|
if _, err := w.w.WriteRune(w.Comma); err != nil { |
||||||
|
return err |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
// If we don't have to have a quoted field then just
|
||||||
|
// write out the field and continue to the next field.
|
||||||
|
if !w.fieldNeedsQuotes(field) { |
||||||
|
if _, err := w.w.WriteString(field); err != nil { |
||||||
|
return err |
||||||
|
} |
||||||
|
continue |
||||||
|
} |
||||||
|
|
||||||
|
if err := w.w.WriteByte('"'); err != nil { |
||||||
|
return err |
||||||
|
} |
||||||
|
for len(field) > 0 { |
||||||
|
// Search for special characters.
|
||||||
|
i := strings.IndexAny(field, "\"\r\n") |
||||||
|
if i < 0 { |
||||||
|
i = len(field) |
||||||
|
} |
||||||
|
|
||||||
|
// Copy verbatim everything before the special character.
|
||||||
|
if _, err := w.w.WriteString(field[:i]); err != nil { |
||||||
|
return err |
||||||
|
} |
||||||
|
field = field[i:] |
||||||
|
|
||||||
|
// Encode the special character.
|
||||||
|
if len(field) > 0 { |
||||||
|
var err error |
||||||
|
switch field[0] { |
||||||
|
case '"': |
||||||
|
_, err = w.w.WriteString(`""`) |
||||||
|
case '\r': |
||||||
|
if !w.UseCRLF { |
||||||
|
err = w.w.WriteByte('\r') |
||||||
|
} |
||||||
|
case '\n': |
||||||
|
if w.UseCRLF { |
||||||
|
_, err = w.w.WriteString("\r\n") |
||||||
|
} else { |
||||||
|
err = w.w.WriteByte('\n') |
||||||
|
} |
||||||
|
} |
||||||
|
field = field[1:] |
||||||
|
if err != nil { |
||||||
|
return err |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
if err := w.w.WriteByte('"'); err != nil { |
||||||
|
return err |
||||||
|
} |
||||||
|
} |
||||||
|
var err error |
||||||
|
if w.UseCRLF { |
||||||
|
_, err = w.w.WriteString("\r\n") |
||||||
|
} else { |
||||||
|
err = w.w.WriteByte('\n') |
||||||
|
} |
||||||
|
return err |
||||||
|
} |
||||||
|
|
||||||
|
// Flush writes any buffered data to the underlying io.Writer.
|
||||||
|
// To check if an error occurred during the Flush, call Error.
|
||||||
|
func (w *Writer) Flush() { |
||||||
|
w.w.Flush() |
||||||
|
} |
||||||
|
|
||||||
|
// Error reports any error that has occurred during a previous Write or Flush.
|
||||||
|
func (w *Writer) Error() error { |
||||||
|
_, err := w.w.Write(nil) |
||||||
|
return err |
||||||
|
} |
||||||
|
|
||||||
|
// WriteAll writes multiple CSV records to w using Write and then calls Flush,
|
||||||
|
// returning any error from the Flush.
|
||||||
|
func (w *Writer) WriteAll(records [][]string) error { |
||||||
|
for _, record := range records { |
||||||
|
err := w.Write(record) |
||||||
|
if err != nil { |
||||||
|
return err |
||||||
|
} |
||||||
|
} |
||||||
|
return w.w.Flush() |
||||||
|
} |
||||||
|
|
||||||
|
// fieldNeedsQuotes reports whether our field must be enclosed in quotes.
|
||||||
|
// Fields with a Comma, fields with a quote or newline, and
|
||||||
|
// fields which start with a space must be enclosed in quotes.
|
||||||
|
// We used to quote empty strings, but we do not anymore (as of Go 1.4).
|
||||||
|
// The two representations should be equivalent, but Postgres distinguishes
|
||||||
|
// quoted vs non-quoted empty string during database imports, and it has
|
||||||
|
// an option to force the quoted behavior for non-quoted CSV but it has
|
||||||
|
// no option to force the non-quoted behavior for quoted CSV, making
|
||||||
|
// CSV with quoted empty strings strictly less useful.
|
||||||
|
// Not quoting the empty string also makes this package match the behavior
|
||||||
|
// of Microsoft Excel and Google Drive.
|
||||||
|
// For Postgres, quote the data terminating string `\.`.
|
||||||
|
func (w *Writer) fieldNeedsQuotes(field string) bool { |
||||||
|
if field == "" { |
||||||
|
return false |
||||||
|
} |
||||||
|
if field == `\.` || strings.ContainsRune(field, w.Comma) || strings.ContainsAny(field, "\"\r\n") { |
||||||
|
return true |
||||||
|
} |
||||||
|
|
||||||
|
r1, _ := utf8.DecodeRuneInString(field) |
||||||
|
return unicode.IsSpace(r1) |
||||||
|
} |
@ -0,0 +1,95 @@ |
|||||||
|
// Copyright 2011 The Go Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style
|
||||||
|
// license that can be found in https://golang.org/LICENSE
|
||||||
|
|
||||||
|
package csv |
||||||
|
|
||||||
|
import ( |
||||||
|
"bytes" |
||||||
|
"errors" |
||||||
|
"testing" |
||||||
|
) |
||||||
|
|
||||||
|
var writeTests = []struct { |
||||||
|
Input [][]string |
||||||
|
Output string |
||||||
|
Error error |
||||||
|
UseCRLF bool |
||||||
|
Comma rune |
||||||
|
}{ |
||||||
|
{Input: [][]string{{"abc"}}, Output: "abc\n"}, |
||||||
|
{Input: [][]string{{"abc"}}, Output: "abc\r\n", UseCRLF: true}, |
||||||
|
{Input: [][]string{{`"abc"`}}, Output: `"""abc"""` + "\n"}, |
||||||
|
{Input: [][]string{{`a"b`}}, Output: `"a""b"` + "\n"}, |
||||||
|
{Input: [][]string{{`"a"b"`}}, Output: `"""a""b"""` + "\n"}, |
||||||
|
{Input: [][]string{{" abc"}}, Output: `" abc"` + "\n"}, |
||||||
|
{Input: [][]string{{"abc,def"}}, Output: `"abc,def"` + "\n"}, |
||||||
|
{Input: [][]string{{"abc", "def"}}, Output: "abc,def\n"}, |
||||||
|
{Input: [][]string{{"abc"}, {"def"}}, Output: "abc\ndef\n"}, |
||||||
|
{Input: [][]string{{"abc\ndef"}}, Output: "\"abc\ndef\"\n"}, |
||||||
|
{Input: [][]string{{"abc\ndef"}}, Output: "\"abc\r\ndef\"\r\n", UseCRLF: true}, |
||||||
|
{Input: [][]string{{"abc\rdef"}}, Output: "\"abcdef\"\r\n", UseCRLF: true}, |
||||||
|
{Input: [][]string{{"abc\rdef"}}, Output: "\"abc\rdef\"\n", UseCRLF: false}, |
||||||
|
{Input: [][]string{{""}}, Output: "\n"}, |
||||||
|
{Input: [][]string{{"", ""}}, Output: ",\n"}, |
||||||
|
{Input: [][]string{{"", "", ""}}, Output: ",,\n"}, |
||||||
|
{Input: [][]string{{"", "", "a"}}, Output: ",,a\n"}, |
||||||
|
{Input: [][]string{{"", "a", ""}}, Output: ",a,\n"}, |
||||||
|
{Input: [][]string{{"", "a", "a"}}, Output: ",a,a\n"}, |
||||||
|
{Input: [][]string{{"a", "", ""}}, Output: "a,,\n"}, |
||||||
|
{Input: [][]string{{"a", "", "a"}}, Output: "a,,a\n"}, |
||||||
|
{Input: [][]string{{"a", "a", ""}}, Output: "a,a,\n"}, |
||||||
|
{Input: [][]string{{"a", "a", "a"}}, Output: "a,a,a\n"}, |
||||||
|
{Input: [][]string{{`\.`}}, Output: "\"\\.\"\n"}, |
||||||
|
{Input: [][]string{{"x09\x41\xb4\x1c", "aktau"}}, Output: "x09\x41\xb4\x1c,aktau\n"}, |
||||||
|
{Input: [][]string{{",x09\x41\xb4\x1c", "aktau"}}, Output: "\",x09\x41\xb4\x1c\",aktau\n"}, |
||||||
|
{Input: [][]string{{"a", "a", ""}}, Output: "a|a|\n", Comma: '|'}, |
||||||
|
{Input: [][]string{{",", ",", ""}}, Output: ",|,|\n", Comma: '|'}, |
||||||
|
{Input: [][]string{{"foo"}}, Comma: '"', Error: errInvalidDelim}, |
||||||
|
} |
||||||
|
|
||||||
|
func TestWrite(t *testing.T) { |
||||||
|
for n, tt := range writeTests { |
||||||
|
b := &bytes.Buffer{} |
||||||
|
f := NewWriter(b) |
||||||
|
f.UseCRLF = tt.UseCRLF |
||||||
|
if tt.Comma != 0 { |
||||||
|
f.Comma = tt.Comma |
||||||
|
} |
||||||
|
err := f.WriteAll(tt.Input) |
||||||
|
if err != tt.Error { |
||||||
|
t.Errorf("Unexpected error:\ngot %v\nwant %v", err, tt.Error) |
||||||
|
} |
||||||
|
out := b.String() |
||||||
|
if out != tt.Output { |
||||||
|
t.Errorf("#%d: out=%q want %q", n, out, tt.Output) |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
type errorWriter struct{} |
||||||
|
|
||||||
|
func (e errorWriter) Write(b []byte) (int, error) { |
||||||
|
return 0, errors.New("Test") |
||||||
|
} |
||||||
|
|
||||||
|
func TestError(t *testing.T) { |
||||||
|
b := &bytes.Buffer{} |
||||||
|
f := NewWriter(b) |
||||||
|
f.Write([]string{"abc"}) |
||||||
|
f.Flush() |
||||||
|
err := f.Error() |
||||||
|
|
||||||
|
if err != nil { |
||||||
|
t.Errorf("Unexpected error: %s\n", err) |
||||||
|
} |
||||||
|
|
||||||
|
f = NewWriter(errorWriter{}) |
||||||
|
f.Write([]string{"abc"}) |
||||||
|
f.Flush() |
||||||
|
err = f.Error() |
||||||
|
|
||||||
|
if err == nil { |
||||||
|
t.Error("Error should not be nil") |
||||||
|
} |
||||||
|
} |
Loading…
Reference in new issue