parent
15e2ea2c96
commit
de924605a1
@ -0,0 +1,131 @@ |
||||
// Copyright 2015 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in https://golang.org/LICENSE
|
||||
|
||||
package csv_test |
||||
|
||||
import ( |
||||
"encoding/csv" |
||||
"fmt" |
||||
"io" |
||||
"log" |
||||
"os" |
||||
"strings" |
||||
) |
||||
|
||||
func ExampleReader() { |
||||
in := `first_name,last_name,username |
||||
"Rob","Pike",rob |
||||
Ken,Thompson,ken |
||||
"Robert","Griesemer","gri" |
||||
` |
||||
r := csv.NewReader(strings.NewReader(in)) |
||||
|
||||
for { |
||||
record, err := r.Read() |
||||
if err == io.EOF { |
||||
break |
||||
} |
||||
if err != nil { |
||||
log.Fatal(err) |
||||
} |
||||
|
||||
fmt.Println(record) |
||||
} |
||||
// Output:
|
||||
// [first_name last_name username]
|
||||
// [Rob Pike rob]
|
||||
// [Ken Thompson ken]
|
||||
// [Robert Griesemer gri]
|
||||
} |
||||
|
||||
// This example shows how csv.Reader can be configured to handle other
|
||||
// types of CSV files.
|
||||
func ExampleReader_options() { |
||||
in := `first_name;last_name;username |
||||
"Rob";"Pike";rob |
||||
# lines beginning with a # character are ignored |
||||
Ken;Thompson;ken |
||||
"Robert";"Griesemer";"gri" |
||||
` |
||||
r := csv.NewReader(strings.NewReader(in)) |
||||
r.Comma = ';' |
||||
r.Comment = '#' |
||||
|
||||
records, err := r.ReadAll() |
||||
if err != nil { |
||||
log.Fatal(err) |
||||
} |
||||
|
||||
fmt.Print(records) |
||||
// Output:
|
||||
// [[first_name last_name username] [Rob Pike rob] [Ken Thompson ken] [Robert Griesemer gri]]
|
||||
} |
||||
|
||||
func ExampleReader_ReadAll() { |
||||
in := `first_name,last_name,username |
||||
"Rob","Pike",rob |
||||
Ken,Thompson,ken |
||||
"Robert","Griesemer","gri" |
||||
` |
||||
r := csv.NewReader(strings.NewReader(in)) |
||||
|
||||
records, err := r.ReadAll() |
||||
if err != nil { |
||||
log.Fatal(err) |
||||
} |
||||
|
||||
fmt.Print(records) |
||||
// Output:
|
||||
// [[first_name last_name username] [Rob Pike rob] [Ken Thompson ken] [Robert Griesemer gri]]
|
||||
} |
||||
|
||||
func ExampleWriter() { |
||||
records := [][]string{ |
||||
{"first_name", "last_name", "username"}, |
||||
{"Rob", "Pike", "rob"}, |
||||
{"Ken", "Thompson", "ken"}, |
||||
{"Robert", "Griesemer", "gri"}, |
||||
} |
||||
|
||||
w := csv.NewWriter(os.Stdout) |
||||
|
||||
for _, record := range records { |
||||
if err := w.Write(record); err != nil { |
||||
log.Fatalln("error writing record to csv:", err) |
||||
} |
||||
} |
||||
|
||||
// Write any buffered data to the underlying writer (standard output).
|
||||
w.Flush() |
||||
|
||||
if err := w.Error(); err != nil { |
||||
log.Fatal(err) |
||||
} |
||||
// Output:
|
||||
// first_name,last_name,username
|
||||
// Rob,Pike,rob
|
||||
// Ken,Thompson,ken
|
||||
// Robert,Griesemer,gri
|
||||
} |
||||
|
||||
func ExampleWriter_WriteAll() { |
||||
records := [][]string{ |
||||
{"first_name", "last_name", "username"}, |
||||
{"Rob", "Pike", "rob"}, |
||||
{"Ken", "Thompson", "ken"}, |
||||
{"Robert", "Griesemer", "gri"}, |
||||
} |
||||
|
||||
w := csv.NewWriter(os.Stdout) |
||||
w.WriteAll(records) // calls Flush internally
|
||||
|
||||
if err := w.Error(); err != nil { |
||||
log.Fatalln("error writing csv:", err) |
||||
} |
||||
// Output:
|
||||
// first_name,last_name,username
|
||||
// Rob,Pike,rob
|
||||
// Ken,Thompson,ken
|
||||
// Robert,Griesemer,gri
|
||||
} |
@ -0,0 +1,70 @@ |
||||
// Copyright 2019 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in https://golang.org/LICENSE
|
||||
|
||||
// +build gofuzz
|
||||
|
||||
package csv |
||||
|
||||
import ( |
||||
"bytes" |
||||
"fmt" |
||||
"reflect" |
||||
) |
||||
|
||||
func Fuzz(data []byte) int { |
||||
score := 0 |
||||
buf := new(bytes.Buffer) |
||||
|
||||
for _, tt := range []Reader{ |
||||
{}, |
||||
{Comma: ';'}, |
||||
{Comma: '\t'}, |
||||
{LazyQuotes: true}, |
||||
{TrimLeadingSpace: true}, |
||||
{Comment: '#'}, |
||||
{Comment: ';'}, |
||||
} { |
||||
r := NewReader(bytes.NewReader(data)) |
||||
r.Comma = tt.Comma |
||||
r.Comment = tt.Comment |
||||
r.LazyQuotes = tt.LazyQuotes |
||||
r.TrimLeadingSpace = tt.TrimLeadingSpace |
||||
|
||||
records, err := r.ReadAll() |
||||
if err != nil { |
||||
continue |
||||
} |
||||
score = 1 |
||||
|
||||
buf.Reset() |
||||
w := NewWriter(buf) |
||||
w.Comma = tt.Comma |
||||
err = w.WriteAll(records) |
||||
if err != nil { |
||||
fmt.Printf("writer = %#v\n", w) |
||||
fmt.Printf("records = %v\n", records) |
||||
panic(err) |
||||
} |
||||
|
||||
r = NewReader(buf) |
||||
r.Comma = tt.Comma |
||||
r.Comment = tt.Comment |
||||
r.LazyQuotes = tt.LazyQuotes |
||||
r.TrimLeadingSpace = tt.TrimLeadingSpace |
||||
result, err := r.ReadAll() |
||||
if err != nil { |
||||
fmt.Printf("reader = %#v\n", r) |
||||
fmt.Printf("records = %v\n", records) |
||||
panic(err) |
||||
} |
||||
|
||||
if !reflect.DeepEqual(records, result) { |
||||
fmt.Println("records = \n", records) |
||||
fmt.Println("result = \n", records) |
||||
panic("not equal") |
||||
} |
||||
} |
||||
|
||||
return score |
||||
} |
@ -0,0 +1,402 @@ |
||||
// Copyright 2011 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in https://golang.org/LICENSE
|
||||
|
||||
// Package csv reads and writes comma-separated values (CSV) files.
|
||||
// There are many kinds of CSV files; this package supports the format
|
||||
// described in RFC 4180.
|
||||
//
|
||||
// A csv file contains zero or more records of one or more fields per record.
|
||||
// Each record is separated by the newline character. The final record may
|
||||
// optionally be followed by a newline character.
|
||||
//
|
||||
// field1,field2,field3
|
||||
//
|
||||
// White space is considered part of a field.
|
||||
//
|
||||
// Carriage returns before newline characters are silently removed.
|
||||
//
|
||||
// Blank lines are ignored. A line with only whitespace characters (excluding
|
||||
// the ending newline character) is not considered a blank line.
|
||||
//
|
||||
// Fields which start and stop with the quote character " are called
|
||||
// quoted-fields. The beginning and ending quote are not part of the
|
||||
// field.
|
||||
//
|
||||
// The source:
|
||||
//
|
||||
// normal string,"quoted-field"
|
||||
//
|
||||
// results in the fields
|
||||
//
|
||||
// {`normal string`, `quoted-field`}
|
||||
//
|
||||
// Within a quoted-field a quote character followed by a second quote
|
||||
// character is considered a single quote.
|
||||
//
|
||||
// "the ""word"" is true","a ""quoted-field"""
|
||||
//
|
||||
// results in
|
||||
//
|
||||
// {`the "word" is true`, `a "quoted-field"`}
|
||||
//
|
||||
// Newlines and commas may be included in a quoted-field
|
||||
//
|
||||
// "Multi-line
|
||||
// field","comma is ,"
|
||||
//
|
||||
// results in
|
||||
//
|
||||
// {`Multi-line
|
||||
// field`, `comma is ,`}
|
||||
package csv |
||||
|
||||
import ( |
||||
"bufio" |
||||
"bytes" |
||||
"errors" |
||||
"fmt" |
||||
"io" |
||||
"unicode" |
||||
"unicode/utf8" |
||||
) |
||||
|
||||
// A ParseError is returned for parsing errors.
|
||||
// Line numbers are 1-indexed and columns are 0-indexed.
|
||||
type ParseError struct { |
||||
StartLine int // Line where the record starts
|
||||
Line int // Line where the error occurred
|
||||
Column int // Column (rune index) where the error occurred
|
||||
Err error // The actual error
|
||||
} |
||||
|
||||
func (e *ParseError) Error() string { |
||||
if e.Err == ErrFieldCount { |
||||
return fmt.Sprintf("record on line %d: %v", e.Line, e.Err) |
||||
} |
||||
if e.StartLine != e.Line { |
||||
return fmt.Sprintf("record on line %d; parse error on line %d, column %d: %v", e.StartLine, e.Line, e.Column, e.Err) |
||||
} |
||||
return fmt.Sprintf("parse error on line %d, column %d: %v", e.Line, e.Column, e.Err) |
||||
} |
||||
|
||||
// Unwrap returns the underlying error
|
||||
func (e *ParseError) Unwrap() error { return e.Err } |
||||
|
||||
// These are the errors that can be returned in ParseError.Err.
|
||||
var ( |
||||
ErrTrailingComma = errors.New("extra delimiter at end of line") // Deprecated: No longer used.
|
||||
ErrBareQuote = errors.New("bare \" in non-quoted-field") |
||||
ErrQuote = errors.New("extraneous or missing \" in quoted-field") |
||||
ErrFieldCount = errors.New("wrong number of fields") |
||||
) |
||||
|
||||
var errInvalidDelim = errors.New("csv: invalid field or comment delimiter") |
||||
|
||||
func validDelim(r rune) bool { |
||||
return r != 0 && r != '"' && r != '\r' && r != '\n' && utf8.ValidRune(r) && r != utf8.RuneError |
||||
} |
||||
|
||||
// A Reader reads records from a CSV-encoded file.
|
||||
//
|
||||
// As returned by NewReader, a Reader expects input conforming to RFC 4180.
|
||||
// The exported fields can be changed to customize the details before the
|
||||
// first call to Read or ReadAll.
|
||||
//
|
||||
// The Reader converts all \r\n sequences in its input to plain \n,
|
||||
// including in multiline field values, so that the returned data does
|
||||
// not depend on which line-ending convention an input file uses.
|
||||
type Reader struct { |
||||
// Comma is the field delimiter.
|
||||
// It is set to comma (',') by NewReader.
|
||||
// Comma must be a valid rune and must not be \r, \n,
|
||||
// or the Unicode replacement character (0xFFFD).
|
||||
Comma rune |
||||
|
||||
// Comment, if not 0, is the comment character. Lines beginning with the
|
||||
// Comment character without preceding whitespace are ignored.
|
||||
// With leading whitespace the Comment character becomes part of the
|
||||
// field, even if TrimLeadingSpace is true.
|
||||
// Comment must be a valid rune and must not be \r, \n,
|
||||
// or the Unicode replacement character (0xFFFD).
|
||||
// It must also not be equal to Comma.
|
||||
Comment rune |
||||
|
||||
// FieldsPerRecord is the number of expected fields per record.
|
||||
// If FieldsPerRecord is positive, Read requires each record to
|
||||
// have the given number of fields. If FieldsPerRecord is 0, Read sets it to
|
||||
// the number of fields in the first record, so that future records must
|
||||
// have the same field count. If FieldsPerRecord is negative, no check is
|
||||
// made and records may have a variable number of fields.
|
||||
FieldsPerRecord int |
||||
|
||||
// If LazyQuotes is true, a quote may appear in an unquoted field and a
|
||||
// non-doubled quote may appear in a quoted field.
|
||||
LazyQuotes bool |
||||
|
||||
// If TrimLeadingSpace is true, leading white space in a field is ignored.
|
||||
// This is done even if the field delimiter, Comma, is white space.
|
||||
TrimLeadingSpace bool |
||||
|
||||
// ReuseRecord controls whether calls to Read may return a slice sharing
|
||||
// the backing array of the previous call's returned slice for performance.
|
||||
// By default, each call to Read returns newly allocated memory owned by the caller.
|
||||
ReuseRecord bool |
||||
|
||||
TrailingComma bool // Deprecated: No longer used.
|
||||
|
||||
r *bufio.Reader |
||||
|
||||
// numLine is the current line being read in the CSV file.
|
||||
numLine int |
||||
|
||||
// rawBuffer is a line buffer only used by the readLine method.
|
||||
rawBuffer []byte |
||||
|
||||
// recordBuffer holds the unescaped fields, one after another.
|
||||
// The fields can be accessed by using the indexes in fieldIndexes.
|
||||
// E.g., For the row `a,"b","c""d",e`, recordBuffer will contain `abc"de`
|
||||
// and fieldIndexes will contain the indexes [1, 2, 5, 6].
|
||||
recordBuffer []byte |
||||
|
||||
// fieldIndexes is an index of fields inside recordBuffer.
|
||||
// The i'th field ends at offset fieldIndexes[i] in recordBuffer.
|
||||
fieldIndexes []int |
||||
|
||||
// lastRecord is a record cache and only used when ReuseRecord == true.
|
||||
lastRecord []string |
||||
} |
||||
|
||||
// NewReader returns a new Reader that reads from r.
|
||||
func NewReader(r io.Reader) *Reader { |
||||
return &Reader{ |
||||
Comma: ',', |
||||
r: bufio.NewReader(r), |
||||
} |
||||
} |
||||
|
||||
// Read reads one record (a slice of fields) from r.
|
||||
// If the record has an unexpected number of fields,
|
||||
// Read returns the record along with the error ErrFieldCount.
|
||||
// Except for that case, Read always returns either a non-nil
|
||||
// record or a non-nil error, but not both.
|
||||
// If there is no data left to be read, Read returns nil, io.EOF.
|
||||
// If ReuseRecord is true, the returned slice may be shared
|
||||
// between multiple calls to Read.
|
||||
func (r *Reader) Read() (record []string, err error) { |
||||
if r.ReuseRecord { |
||||
record, err = r.readRecord(r.lastRecord) |
||||
r.lastRecord = record |
||||
} else { |
||||
record, err = r.readRecord(nil) |
||||
} |
||||
return record, err |
||||
} |
||||
|
||||
// ReadAll reads all the remaining records from r.
|
||||
// Each record is a slice of fields.
|
||||
// A successful call returns err == nil, not err == io.EOF. Because ReadAll is
|
||||
// defined to read until EOF, it does not treat end of file as an error to be
|
||||
// reported.
|
||||
func (r *Reader) ReadAll() (records [][]string, err error) { |
||||
for { |
||||
record, err := r.readRecord(nil) |
||||
if err == io.EOF { |
||||
return records, nil |
||||
} |
||||
if err != nil { |
||||
return nil, err |
||||
} |
||||
records = append(records, record) |
||||
} |
||||
} |
||||
|
||||
// readLine reads the next line (with the trailing endline).
|
||||
// If EOF is hit without a trailing endline, it will be omitted.
|
||||
// If some bytes were read, then the error is never io.EOF.
|
||||
// The result is only valid until the next call to readLine.
|
||||
func (r *Reader) readLine() ([]byte, error) { |
||||
line, err := r.r.ReadSlice('\n') |
||||
if err == bufio.ErrBufferFull { |
||||
r.rawBuffer = append(r.rawBuffer[:0], line...) |
||||
for err == bufio.ErrBufferFull { |
||||
line, err = r.r.ReadSlice('\n') |
||||
r.rawBuffer = append(r.rawBuffer, line...) |
||||
} |
||||
line = r.rawBuffer |
||||
} |
||||
if len(line) > 0 && err == io.EOF { |
||||
err = nil |
||||
// For backwards compatibility, drop trailing \r before EOF.
|
||||
if line[len(line)-1] == '\r' { |
||||
line = line[:len(line)-1] |
||||
} |
||||
} |
||||
r.numLine++ |
||||
// Normalize \r\n to \n on all input lines.
|
||||
if n := len(line); n >= 2 && line[n-2] == '\r' && line[n-1] == '\n' { |
||||
line[n-2] = '\n' |
||||
line = line[:n-1] |
||||
} |
||||
return line, err |
||||
} |
||||
|
||||
// lengthNL reports the number of bytes for the trailing \n.
|
||||
func lengthNL(b []byte) int { |
||||
if len(b) > 0 && b[len(b)-1] == '\n' { |
||||
return 1 |
||||
} |
||||
return 0 |
||||
} |
||||
|
||||
// nextRune returns the next rune in b or utf8.RuneError.
|
||||
func nextRune(b []byte) rune { |
||||
r, _ := utf8.DecodeRune(b) |
||||
return r |
||||
} |
||||
|
||||
func (r *Reader) readRecord(dst []string) ([]string, error) { |
||||
if r.Comma == r.Comment || !validDelim(r.Comma) || (r.Comment != 0 && !validDelim(r.Comment)) { |
||||
return nil, errInvalidDelim |
||||
} |
||||
|
||||
// Read line (automatically skipping past empty lines and any comments).
|
||||
var line, fullLine []byte |
||||
var errRead error |
||||
for errRead == nil { |
||||
line, errRead = r.readLine() |
||||
if r.Comment != 0 && nextRune(line) == r.Comment { |
||||
line = nil |
||||
continue // Skip comment lines
|
||||
} |
||||
if errRead == nil && len(line) == lengthNL(line) { |
||||
line = nil |
||||
continue // Skip empty lines
|
||||
} |
||||
fullLine = line |
||||
break |
||||
} |
||||
if errRead == io.EOF { |
||||
return nil, errRead |
||||
} |
||||
|
||||
// Parse each field in the record.
|
||||
var err error |
||||
const quoteLen = len(`"`) |
||||
commaLen := utf8.RuneLen(r.Comma) |
||||
recLine := r.numLine // Starting line for record
|
||||
r.recordBuffer = r.recordBuffer[:0] |
||||
r.fieldIndexes = r.fieldIndexes[:0] |
||||
parseField: |
||||
for { |
||||
if r.TrimLeadingSpace { |
||||
line = bytes.TrimLeftFunc(line, unicode.IsSpace) |
||||
} |
||||
if len(line) == 0 || line[0] != '"' { |
||||
// Non-quoted string field
|
||||
i := bytes.IndexRune(line, r.Comma) |
||||
field := line |
||||
if i >= 0 { |
||||
field = field[:i] |
||||
} else { |
||||
field = field[:len(field)-lengthNL(field)] |
||||
} |
||||
// Check to make sure a quote does not appear in field.
|
||||
if !r.LazyQuotes { |
||||
if j := bytes.IndexByte(field, '"'); j >= 0 { |
||||
col := utf8.RuneCount(fullLine[:len(fullLine)-len(line[j:])]) |
||||
err = &ParseError{StartLine: recLine, Line: r.numLine, Column: col, Err: ErrBareQuote} |
||||
break parseField |
||||
} |
||||
} |
||||
r.recordBuffer = append(r.recordBuffer, field...) |
||||
r.fieldIndexes = append(r.fieldIndexes, len(r.recordBuffer)) |
||||
if i >= 0 { |
||||
line = line[i+commaLen:] |
||||
continue parseField |
||||
} |
||||
break parseField |
||||
} else { |
||||
// Quoted string field
|
||||
line = line[quoteLen:] |
||||
for { |
||||
i := bytes.IndexByte(line, '"') |
||||
if i >= 0 { |
||||
// Hit next quote.
|
||||
r.recordBuffer = append(r.recordBuffer, line[:i]...) |
||||
line = line[i+quoteLen:] |
||||
switch rn := nextRune(line); { |
||||
case rn == '"': |
||||
// `""` sequence (append quote).
|
||||
r.recordBuffer = append(r.recordBuffer, '"') |
||||
line = line[quoteLen:] |
||||
case rn == r.Comma: |
||||
// `",` sequence (end of field).
|
||||
line = line[commaLen:] |
||||
r.fieldIndexes = append(r.fieldIndexes, len(r.recordBuffer)) |
||||
continue parseField |
||||
case lengthNL(line) == len(line): |
||||
// `"\n` sequence (end of line).
|
||||
r.fieldIndexes = append(r.fieldIndexes, len(r.recordBuffer)) |
||||
break parseField |
||||
case r.LazyQuotes: |
||||
// `"` sequence (bare quote).
|
||||
r.recordBuffer = append(r.recordBuffer, '"') |
||||
default: |
||||
// `"*` sequence (invalid non-escaped quote).
|
||||
col := utf8.RuneCount(fullLine[:len(fullLine)-len(line)-quoteLen]) |
||||
err = &ParseError{StartLine: recLine, Line: r.numLine, Column: col, Err: ErrQuote} |
||||
break parseField |
||||
} |
||||
} else if len(line) > 0 { |
||||
// Hit end of line (copy all data so far).
|
||||
r.recordBuffer = append(r.recordBuffer, line...) |
||||
if errRead != nil { |
||||
break parseField |
||||
} |
||||
line, errRead = r.readLine() |
||||
if errRead == io.EOF { |
||||
errRead = nil |
||||
} |
||||
fullLine = line |
||||
} else { |
||||
// Abrupt end of file (EOF or error).
|
||||
if !r.LazyQuotes && errRead == nil { |
||||
col := utf8.RuneCount(fullLine) |
||||
err = &ParseError{StartLine: recLine, Line: r.numLine, Column: col, Err: ErrQuote} |
||||
break parseField |
||||
} |
||||
r.fieldIndexes = append(r.fieldIndexes, len(r.recordBuffer)) |
||||
break parseField |
||||
} |
||||
} |
||||
} |
||||
} |
||||
if err == nil { |
||||
err = errRead |
||||
} |
||||
|
||||
// Create a single string and create slices out of it.
|
||||
// This pins the memory of the fields together, but allocates once.
|
||||
str := string(r.recordBuffer) // Convert to string once to batch allocations
|
||||
dst = dst[:0] |
||||
if cap(dst) < len(r.fieldIndexes) { |
||||
dst = make([]string, len(r.fieldIndexes)) |
||||
} |
||||
dst = dst[:len(r.fieldIndexes)] |
||||
var preIdx int |
||||
for i, idx := range r.fieldIndexes { |
||||
dst[i] = str[preIdx:idx] |
||||
preIdx = idx |
||||
} |
||||
|
||||
// Check or update the expected fields per record.
|
||||
if r.FieldsPerRecord > 0 { |
||||
if len(dst) != r.FieldsPerRecord && err == nil { |
||||
err = &ParseError{StartLine: recLine, Line: recLine, Err: ErrFieldCount} |
||||
} |
||||
} else if r.FieldsPerRecord == 0 { |
||||
r.FieldsPerRecord = len(dst) |
||||
} |
||||
return dst, err |
||||
} |
@ -0,0 +1,509 @@ |
||||
// Copyright 2011 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in https://golang.org/LICENSE
|
||||
|
||||
package csv |
||||
|
||||
import ( |
||||
"io" |
||||
"reflect" |
||||
"strings" |
||||
"testing" |
||||
"unicode/utf8" |
||||
) |
||||
|
||||
func TestRead(t *testing.T) { |
||||
tests := []struct { |
||||
Name string |
||||
Input string |
||||
Output [][]string |
||||
Error error |
||||
|
||||
// These fields are copied into the Reader
|
||||
Comma rune |
||||
Comment rune |
||||
UseFieldsPerRecord bool // false (default) means FieldsPerRecord is -1
|
||||
FieldsPerRecord int |
||||
LazyQuotes bool |
||||
TrimLeadingSpace bool |
||||
ReuseRecord bool |
||||
}{{ |
||||
Name: "Simple", |
||||
Input: "a,b,c\n", |
||||
Output: [][]string{{"a", "b", "c"}}, |
||||
}, { |
||||
Name: "CRLF", |
||||
Input: "a,b\r\nc,d\r\n", |
||||
Output: [][]string{{"a", "b"}, {"c", "d"}}, |
||||
}, { |
||||
Name: "BareCR", |
||||
Input: "a,b\rc,d\r\n", |
||||
Output: [][]string{{"a", "b\rc", "d"}}, |
||||
}, { |
||||
Name: "RFC4180test", |
||||
Input: `#field1,field2,field3 |
||||
"aaa","bb |
||||
b","ccc" |
||||
"a,a","b""bb","ccc" |
||||
zzz,yyy,xxx |
||||
`, |
||||
Output: [][]string{ |
||||
{"#field1", "field2", "field3"}, |
||||
{"aaa", "bb\nb", "ccc"}, |
||||
{"a,a", `b"bb`, "ccc"}, |
||||
{"zzz", "yyy", "xxx"}, |
||||
}, |
||||
UseFieldsPerRecord: true, |
||||
FieldsPerRecord: 0, |
||||
}, { |
||||
Name: "NoEOLTest", |
||||
Input: "a,b,c", |
||||
Output: [][]string{{"a", "b", "c"}}, |
||||
}, { |
||||
Name: "Semicolon", |
||||
Input: "a;b;c\n", |
||||
Output: [][]string{{"a", "b", "c"}}, |
||||
Comma: ';', |
||||
}, { |
||||
Name: "MultiLine", |
||||
Input: `"two |
||||
line","one line","three |
||||
line |
||||
field"`, |
||||
Output: [][]string{{"two\nline", "one line", "three\nline\nfield"}}, |
||||
}, { |
||||
Name: "BlankLine", |
||||
Input: "a,b,c\n\nd,e,f\n\n", |
||||
Output: [][]string{ |
||||
{"a", "b", "c"}, |
||||
{"d", "e", "f"}, |
||||
}, |
||||
}, { |
||||
Name: "BlankLineFieldCount", |
||||
Input: "a,b,c\n\nd,e,f\n\n", |
||||
Output: [][]string{ |
||||
{"a", "b", "c"}, |
||||
{"d", "e", "f"}, |
||||
}, |
||||
UseFieldsPerRecord: true, |
||||
FieldsPerRecord: 0, |
||||
}, { |
||||
Name: "TrimSpace", |
||||
Input: " a, b, c\n", |
||||
Output: [][]string{{"a", "b", "c"}}, |
||||
TrimLeadingSpace: true, |
||||
}, { |
||||
Name: "LeadingSpace", |
||||
Input: " a, b, c\n", |
||||
Output: [][]string{{" a", " b", " c"}}, |
||||
}, { |
||||
Name: "Comment", |
||||
Input: "#1,2,3\na,b,c\n#comment", |
||||
Output: [][]string{{"a", "b", "c"}}, |
||||
Comment: '#', |
||||
}, { |
||||
Name: "NoComment", |
||||
Input: "#1,2,3\na,b,c", |
||||
Output: [][]string{{"#1", "2", "3"}, {"a", "b", "c"}}, |
||||
}, { |
||||
Name: "LazyQuotes", |
||||
Input: `a "word","1"2",a","b`, |
||||
Output: [][]string{{`a "word"`, `1"2`, `a"`, `b`}}, |
||||
LazyQuotes: true, |
||||
}, { |
||||
Name: "BareQuotes", |
||||
Input: `a "word","1"2",a"`, |
||||
Output: [][]string{{`a "word"`, `1"2`, `a"`}}, |
||||
LazyQuotes: true, |
||||
}, { |
||||
Name: "BareDoubleQuotes", |
||||
Input: `a""b,c`, |
||||
Output: [][]string{{`a""b`, `c`}}, |
||||
LazyQuotes: true, |
||||
}, { |
||||
Name: "BadDoubleQuotes", |
||||
Input: `a""b,c`, |
||||
Error: &ParseError{StartLine: 1, Line: 1, Column: 1, Err: ErrBareQuote}, |
||||
}, { |
||||
Name: "TrimQuote", |
||||
Input: ` "a"," b",c`, |
||||
Output: [][]string{{"a", " b", "c"}}, |
||||
TrimLeadingSpace: true, |
||||
}, { |
||||
Name: "BadBareQuote", |
||||
Input: `a "word","b"`, |
||||
Error: &ParseError{StartLine: 1, Line: 1, Column: 2, Err: ErrBareQuote}, |
||||
}, { |
||||
Name: "BadTrailingQuote", |
||||
Input: `"a word",b"`, |
||||
Error: &ParseError{StartLine: 1, Line: 1, Column: 10, Err: ErrBareQuote}, |
||||
}, { |
||||
Name: "ExtraneousQuote", |
||||
Input: `"a "word","b"`, |
||||
Error: &ParseError{StartLine: 1, Line: 1, Column: 3, Err: ErrQuote}, |
||||
}, { |
||||
Name: "BadFieldCount", |
||||
Input: "a,b,c\nd,e", |
||||
Error: &ParseError{StartLine: 2, Line: 2, Err: ErrFieldCount}, |
||||
UseFieldsPerRecord: true, |
||||
FieldsPerRecord: 0, |
||||
}, { |
||||
Name: "BadFieldCount1", |
||||
Input: `a,b,c`, |
||||
Error: &ParseError{StartLine: 1, Line: 1, Err: ErrFieldCount}, |
||||
UseFieldsPerRecord: true, |
||||
FieldsPerRecord: 2, |
||||
}, { |
||||
Name: "FieldCount", |
||||
Input: "a,b,c\nd,e", |
||||
Output: [][]string{{"a", "b", "c"}, {"d", "e"}}, |
||||
}, { |
||||
Name: "TrailingCommaEOF", |
||||
Input: "a,b,c,", |
||||
Output: [][]string{{"a", "b", "c", ""}}, |
||||
}, { |
||||
Name: "TrailingCommaEOL", |
||||
Input: "a,b,c,\n", |
||||
Output: [][]string{{"a", "b", "c", ""}}, |
||||
}, { |
||||
Name: "TrailingCommaSpaceEOF", |
||||
Input: "a,b,c, ", |
||||
Output: [][]string{{"a", "b", "c", ""}}, |
||||
TrimLeadingSpace: true, |
||||
}, { |
||||
Name: "TrailingCommaSpaceEOL", |
||||
Input: "a,b,c, \n", |
||||
Output: [][]string{{"a", "b", "c", ""}}, |
||||
TrimLeadingSpace: true, |
||||
}, { |
||||
Name: "TrailingCommaLine3", |
||||
Input: "a,b,c\nd,e,f\ng,hi,", |
||||
Output: [][]string{{"a", "b", "c"}, {"d", "e", "f"}, {"g", "hi", ""}}, |
||||
TrimLeadingSpace: true, |
||||
}, { |
||||
Name: "NotTrailingComma3", |
||||
Input: "a,b,c, \n", |
||||
Output: [][]string{{"a", "b", "c", " "}}, |
||||
}, { |
||||
Name: "CommaFieldTest", |
||||
Input: `x,y,z,w |
||||
x,y,z, |
||||
x,y,, |
||||
x,,, |
||||
,,, |
||||
"x","y","z","w" |
||||
"x","y","z","" |
||||
"x","y","","" |
||||
"x","","","" |
||||
"","","","" |
||||
`, |
||||
Output: [][]string{ |
||||
{"x", "y", "z", "w"}, |
||||
{"x", "y", "z", ""}, |
||||
{"x", "y", "", ""}, |
||||
{"x", "", "", ""}, |
||||
{"", "", "", ""}, |
||||
{"x", "y", "z", "w"}, |
||||
{"x", "y", "z", ""}, |
||||
{"x", "y", "", ""}, |
||||
{"x", "", "", ""}, |
||||
{"", "", "", ""}, |
||||
}, |
||||
}, { |
||||
Name: "TrailingCommaIneffective1", |
||||
Input: "a,b,\nc,d,e", |
||||
Output: [][]string{ |
||||
{"a", "b", ""}, |
||||
{"c", "d", "e"}, |
||||
}, |
||||
TrimLeadingSpace: true, |
||||
}, { |
||||
Name: "ReadAllReuseRecord", |
||||
Input: "a,b\nc,d", |
||||
Output: [][]string{ |
||||
{"a", "b"}, |
||||
{"c", "d"}, |
||||
}, |
||||
ReuseRecord: true, |
||||
}, { |
||||
Name: "StartLine1", // Issue 19019
|
||||
Input: "a,\"b\nc\"d,e", |
||||
Error: &ParseError{StartLine: 1, Line: 2, Column: 1, Err: ErrQuote}, |
||||
}, { |
||||
Name: "StartLine2", |
||||
Input: "a,b\n\"d\n\n,e", |
||||
Error: &ParseError{StartLine: 2, Line: 5, Column: 0, Err: ErrQuote}, |
||||
}, { |
||||
Name: "CRLFInQuotedField", // Issue 21201
|
||||
Input: "A,\"Hello\r\nHi\",B\r\n", |
||||
Output: [][]string{ |
||||
{"A", "Hello\nHi", "B"}, |
||||
}, |
||||
}, { |
||||
Name: "BinaryBlobField", // Issue 19410
|
||||
Input: "x09\x41\xb4\x1c,aktau", |
||||
Output: [][]string{{"x09A\xb4\x1c", "aktau"}}, |
||||
}, { |
||||
Name: "TrailingCR", |
||||
Input: "field1,field2\r", |
||||
Output: [][]string{{"field1", "field2"}}, |
||||
}, { |
||||
Name: "QuotedTrailingCR", |
||||
Input: "\"field\"\r", |
||||
Output: [][]string{{"field"}}, |
||||
}, { |
||||
Name: "QuotedTrailingCRCR", |
||||
Input: "\"field\"\r\r", |
||||
Error: &ParseError{StartLine: 1, Line: 1, Column: 6, Err: ErrQuote}, |
||||
}, { |
||||
Name: "FieldCR", |
||||
Input: "field\rfield\r", |
||||
Output: [][]string{{"field\rfield"}}, |
||||
}, { |
||||
Name: "FieldCRCR", |
||||
Input: "field\r\rfield\r\r", |
||||
Output: [][]string{{"field\r\rfield\r"}}, |
||||
}, { |
||||
Name: "FieldCRCRLF", |
||||
Input: "field\r\r\nfield\r\r\n", |
||||
Output: [][]string{{"field\r"}, {"field\r"}}, |
||||
}, { |
||||
Name: "FieldCRCRLFCR", |
||||
Input: "field\r\r\n\rfield\r\r\n\r", |
||||
Output: [][]string{{"field\r"}, {"\rfield\r"}}, |
||||
}, { |
||||
Name: "FieldCRCRLFCRCR", |
||||
Input: "field\r\r\n\r\rfield\r\r\n\r\r", |
||||
Output: [][]string{{"field\r"}, {"\r\rfield\r"}, {"\r"}}, |
||||
}, { |
||||
Name: "MultiFieldCRCRLFCRCR", |
||||
Input: "field1,field2\r\r\n\r\rfield1,field2\r\r\n\r\r,", |
||||
Output: [][]string{ |
||||
{"field1", "field2\r"}, |
||||
{"\r\rfield1", "field2\r"}, |
||||
{"\r\r", ""}, |
||||
}, |
||||
}, { |
||||
Name: "NonASCIICommaAndComment", |
||||
Input: "a£b,c£ \td,e\n€ comment\n", |
||||
Output: [][]string{{"a", "b,c", "d,e"}}, |
||||
TrimLeadingSpace: true, |
||||
Comma: '£', |
||||
Comment: '€', |
||||
}, { |
||||
Name: "NonASCIICommaAndCommentWithQuotes", |
||||
Input: "a€\" b,\"€ c\nλ comment\n", |
||||
Output: [][]string{{"a", " b,", " c"}}, |
||||
Comma: '€', |
||||
Comment: 'λ', |
||||
}, { |
||||
// λ and θ start with the same byte.
|
||||
// This tests that the parser doesn't confuse such characters.
|
||||
Name: "NonASCIICommaConfusion", |
||||
Input: "\"abθcd\"λefθgh", |
||||
Output: [][]string{{"abθcd", "efθgh"}}, |
||||
Comma: 'λ', |
||||
Comment: '€', |
||||
}, { |
||||
Name: "NonASCIICommentConfusion", |
||||
Input: "λ\nλ\nθ\nλ\n", |
||||
Output: [][]string{{"λ"}, {"λ"}, {"λ"}}, |
||||
Comment: 'θ', |
||||
}, { |
||||
Name: "QuotedFieldMultipleLF", |
||||
Input: "\"\n\n\n\n\"", |
||||
Output: [][]string{{"\n\n\n\n"}}, |
||||
}, { |
||||
Name: "MultipleCRLF", |
||||
Input: "\r\n\r\n\r\n\r\n", |
||||
}, { |
||||
// The implementation may read each line in several chunks if it doesn't fit entirely
|
||||
// in the read buffer, so we should test the code to handle that condition.
|
||||
Name: "HugeLines", |
||||
Input: strings.Repeat("#ignore\n", 10000) + strings.Repeat("@", 5000) + "," + strings.Repeat("*", 5000), |
||||
Output: [][]string{{strings.Repeat("@", 5000), strings.Repeat("*", 5000)}}, |
||||
Comment: '#', |
||||
}, { |
||||
Name: "QuoteWithTrailingCRLF", |
||||
Input: "\"foo\"bar\"\r\n", |
||||
Error: &ParseError{StartLine: 1, Line: 1, Column: 4, Err: ErrQuote}, |
||||
}, { |
||||
Name: "LazyQuoteWithTrailingCRLF", |
||||
Input: "\"foo\"bar\"\r\n", |
||||
Output: [][]string{{`foo"bar`}}, |
||||
LazyQuotes: true, |
||||
}, { |
||||
Name: "DoubleQuoteWithTrailingCRLF", |
||||
Input: "\"foo\"\"bar\"\r\n", |
||||
Output: [][]string{{`foo"bar`}}, |
||||
}, { |
||||
Name: "EvenQuotes", |
||||
Input: `""""""""`, |
||||
Output: [][]string{{`"""`}}, |
||||
}, { |
||||
Name: "OddQuotes", |
||||
Input: `"""""""`, |
||||
Error: &ParseError{StartLine: 1, Line: 1, Column: 7, Err: ErrQuote}, |
||||
}, { |
||||
Name: "LazyOddQuotes", |
||||
Input: `"""""""`, |
||||
Output: [][]string{{`"""`}}, |
||||
LazyQuotes: true, |
||||
}, { |
||||
Name: "BadComma1", |
||||
Comma: '\n', |
||||
Error: errInvalidDelim, |
||||
}, { |
||||
Name: "BadComma2", |
||||
Comma: '\r', |
||||
Error: errInvalidDelim, |
||||
}, { |
||||
Name: "BadComma3", |
||||
Comma: '"', |
||||
Error: errInvalidDelim, |
||||
}, { |
||||
Name: "BadComma4", |
||||
Comma: utf8.RuneError, |
||||
Error: errInvalidDelim, |
||||
}, { |
||||
Name: "BadComment1", |
||||
Comment: '\n', |
||||
Error: errInvalidDelim, |
||||
}, { |
||||
Name: "BadComment2", |
||||
Comment: '\r', |
||||
Error: errInvalidDelim, |
||||
}, { |
||||
Name: "BadComment3", |
||||
Comment: utf8.RuneError, |
||||
Error: errInvalidDelim, |
||||
}, { |
||||
Name: "BadCommaComment", |
||||
Comma: 'X', |
||||
Comment: 'X', |
||||
Error: errInvalidDelim, |
||||
}} |
||||
|
||||
for _, tt := range tests { |
||||
t.Run(tt.Name, func(t *testing.T) { |
||||
r := NewReader(strings.NewReader(tt.Input)) |
||||
|
||||
if tt.Comma != 0 { |
||||
r.Comma = tt.Comma |
||||
} |
||||
r.Comment = tt.Comment |
||||
if tt.UseFieldsPerRecord { |
||||
r.FieldsPerRecord = tt.FieldsPerRecord |
||||
} else { |
||||
r.FieldsPerRecord = -1 |
||||
} |
||||
r.LazyQuotes = tt.LazyQuotes |
||||
r.TrimLeadingSpace = tt.TrimLeadingSpace |
||||
r.ReuseRecord = tt.ReuseRecord |
||||
|
||||
out, err := r.ReadAll() |
||||
if !reflect.DeepEqual(err, tt.Error) { |
||||
t.Errorf("ReadAll() error:\ngot %v\nwant %v", err, tt.Error) |
||||
} else if !reflect.DeepEqual(out, tt.Output) { |
||||
t.Errorf("ReadAll() output:\ngot %q\nwant %q", out, tt.Output) |
||||
} |
||||
}) |
||||
} |
||||
} |
||||
|
||||
// nTimes is an io.Reader which yields the string s n times.
|
||||
type nTimes struct { |
||||
s string |
||||
n int |
||||
off int |
||||
} |
||||
|
||||
func (r *nTimes) Read(p []byte) (n int, err error) { |
||||
for { |
||||
if r.n <= 0 || r.s == "" { |
||||
return n, io.EOF |
||||
} |
||||
n0 := copy(p, r.s[r.off:]) |
||||
p = p[n0:] |
||||
n += n0 |
||||
r.off += n0 |
||||
if r.off == len(r.s) { |
||||
r.off = 0 |
||||
r.n-- |
||||
} |
||||
if len(p) == 0 { |
||||
return |
||||
} |
||||
} |
||||
} |
||||
|
||||
// benchmarkRead measures reading the provided CSV rows data.
|
||||
// initReader, if non-nil, modifies the Reader before it's used.
|
||||
func benchmarkRead(b *testing.B, initReader func(*Reader), rows string) { |
||||
b.ReportAllocs() |
||||
r := NewReader(&nTimes{s: rows, n: b.N}) |
||||
if initReader != nil { |
||||
initReader(r) |
||||
} |
||||
for { |
||||
_, err := r.Read() |
||||
if err == io.EOF { |
||||
break |
||||
} |
||||
if err != nil { |
||||
b.Fatal(err) |
||||
} |
||||
} |
||||
} |
||||
|
||||
const benchmarkCSVData = `x,y,z,w |
||||
x,y,z, |
||||
x,y,, |
||||
x,,, |
||||
,,, |
||||
"x","y","z","w" |
||||
"x","y","z","" |
||||
"x","y","","" |
||||
"x","","","" |
||||
"","","","" |
||||
` |
||||
|
||||
func BenchmarkRead(b *testing.B) { |
||||
benchmarkRead(b, nil, benchmarkCSVData) |
||||
} |
||||
|
||||
func BenchmarkReadWithFieldsPerRecord(b *testing.B) { |
||||
benchmarkRead(b, func(r *Reader) { r.FieldsPerRecord = 4 }, benchmarkCSVData) |
||||
} |
||||
|
||||
func BenchmarkReadWithoutFieldsPerRecord(b *testing.B) { |
||||
benchmarkRead(b, func(r *Reader) { r.FieldsPerRecord = -1 }, benchmarkCSVData) |
||||
} |
||||
|
||||
func BenchmarkReadLargeFields(b *testing.B) { |
||||
benchmarkRead(b, nil, strings.Repeat(`xxxxxxxxxxxxxxxx,yyyyyyyyyyyyyyyy,zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv |
||||
xxxxxxxxxxxxxxxxxxxxxxxx,yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy,zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvv |
||||
,,zzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv |
||||
xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx,yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy,zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv |
||||
`, 3)) |
||||
} |
||||
|
||||
func BenchmarkReadReuseRecord(b *testing.B) { |
||||
benchmarkRead(b, func(r *Reader) { r.ReuseRecord = true }, benchmarkCSVData) |
||||
} |
||||
|
||||
func BenchmarkReadReuseRecordWithFieldsPerRecord(b *testing.B) { |
||||
benchmarkRead(b, func(r *Reader) { r.ReuseRecord = true; r.FieldsPerRecord = 4 }, benchmarkCSVData) |
||||
} |
||||
|
||||
func BenchmarkReadReuseRecordWithoutFieldsPerRecord(b *testing.B) { |
||||
benchmarkRead(b, func(r *Reader) { r.ReuseRecord = true; r.FieldsPerRecord = -1 }, benchmarkCSVData) |
||||
} |
||||
|
||||
func BenchmarkReadReuseRecordLargeFields(b *testing.B) { |
||||
benchmarkRead(b, func(r *Reader) { r.ReuseRecord = true }, strings.Repeat(`xxxxxxxxxxxxxxxx,yyyyyyyyyyyyyyyy,zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv |
||||
xxxxxxxxxxxxxxxxxxxxxxxx,yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy,zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvv |
||||
,,zzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv |
||||
xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx,yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy,zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv |
||||
`, 3)) |
||||
} |
@ -0,0 +1,167 @@ |
||||
// Copyright 2011 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in https://golang.org/LICENSE
|
||||
|
||||
package csv |
||||
|
||||
import ( |
||||
"bufio" |
||||
"io" |
||||
"strings" |
||||
"unicode" |
||||
"unicode/utf8" |
||||
) |
||||
|
||||
// A Writer writes records using CSV encoding.
|
||||
//
|
||||
// As returned by NewWriter, a Writer writes records terminated by a
|
||||
// newline and uses ',' as the field delimiter. The exported fields can be
|
||||
// changed to customize the details before the first call to Write or WriteAll.
|
||||
//
|
||||
// Comma is the field delimiter.
|
||||
//
|
||||
// If UseCRLF is true, the Writer ends each output line with \r\n instead of \n.
|
||||
//
|
||||
// The writes of individual records are buffered.
|
||||
// After all data has been written, the client should call the
|
||||
// Flush method to guarantee all data has been forwarded to
|
||||
// the underlying io.Writer. Any errors that occurred should
|
||||
// be checked by calling the Error method.
|
||||
type Writer struct { |
||||
Comma rune // Field delimiter (set to ',' by NewWriter)
|
||||
UseCRLF bool // True to use \r\n as the line terminator
|
||||
w *bufio.Writer |
||||
} |
||||
|
||||
// NewWriter returns a new Writer that writes to w.
|
||||
func NewWriter(w io.Writer) *Writer { |
||||
return &Writer{ |
||||
Comma: ',', |
||||
w: bufio.NewWriter(w), |
||||
} |
||||
} |
||||
|
||||
// Write writes a single CSV record to w along with any necessary quoting.
|
||||
// A record is a slice of strings with each string being one field.
|
||||
// Writes are buffered, so Flush must eventually be called to ensure
|
||||
// that the record is written to the underlying io.Writer.
|
||||
func (w *Writer) Write(record []string) error { |
||||
if !validDelim(w.Comma) { |
||||
return errInvalidDelim |
||||
} |
||||
|
||||
for n, field := range record { |
||||
if n > 0 { |
||||
if _, err := w.w.WriteRune(w.Comma); err != nil { |
||||
return err |
||||
} |
||||
} |
||||
|
||||
// If we don't have to have a quoted field then just
|
||||
// write out the field and continue to the next field.
|
||||
if !w.fieldNeedsQuotes(field) { |
||||
if _, err := w.w.WriteString(field); err != nil { |
||||
return err |
||||
} |
||||
continue |
||||
} |
||||
|
||||
if err := w.w.WriteByte('"'); err != nil { |
||||
return err |
||||
} |
||||
for len(field) > 0 { |
||||
// Search for special characters.
|
||||
i := strings.IndexAny(field, "\"\r\n") |
||||
if i < 0 { |
||||
i = len(field) |
||||
} |
||||
|
||||
// Copy verbatim everything before the special character.
|
||||
if _, err := w.w.WriteString(field[:i]); err != nil { |
||||
return err |
||||
} |
||||
field = field[i:] |
||||
|
||||
// Encode the special character.
|
||||
if len(field) > 0 { |
||||
var err error |
||||
switch field[0] { |
||||
case '"': |
||||
_, err = w.w.WriteString(`""`) |
||||
case '\r': |
||||
if !w.UseCRLF { |
||||
err = w.w.WriteByte('\r') |
||||
} |
||||
case '\n': |
||||
if w.UseCRLF { |
||||
_, err = w.w.WriteString("\r\n") |
||||
} else { |
||||
err = w.w.WriteByte('\n') |
||||
} |
||||
} |
||||
field = field[1:] |
||||
if err != nil { |
||||
return err |
||||
} |
||||
} |
||||
} |
||||
if err := w.w.WriteByte('"'); err != nil { |
||||
return err |
||||
} |
||||
} |
||||
var err error |
||||
if w.UseCRLF { |
||||
_, err = w.w.WriteString("\r\n") |
||||
} else { |
||||
err = w.w.WriteByte('\n') |
||||
} |
||||
return err |
||||
} |
||||
|
||||
// Flush writes any buffered data to the underlying io.Writer.
|
||||
// To check if an error occurred during the Flush, call Error.
|
||||
func (w *Writer) Flush() { |
||||
w.w.Flush() |
||||
} |
||||
|
||||
// Error reports any error that has occurred during a previous Write or Flush.
|
||||
func (w *Writer) Error() error { |
||||
_, err := w.w.Write(nil) |
||||
return err |
||||
} |
||||
|
||||
// WriteAll writes multiple CSV records to w using Write and then calls Flush,
|
||||
// returning any error from the Flush.
|
||||
func (w *Writer) WriteAll(records [][]string) error { |
||||
for _, record := range records { |
||||
err := w.Write(record) |
||||
if err != nil { |
||||
return err |
||||
} |
||||
} |
||||
return w.w.Flush() |
||||
} |
||||
|
||||
// fieldNeedsQuotes reports whether our field must be enclosed in quotes.
|
||||
// Fields with a Comma, fields with a quote or newline, and
|
||||
// fields which start with a space must be enclosed in quotes.
|
||||
// We used to quote empty strings, but we do not anymore (as of Go 1.4).
|
||||
// The two representations should be equivalent, but Postgres distinguishes
|
||||
// quoted vs non-quoted empty string during database imports, and it has
|
||||
// an option to force the quoted behavior for non-quoted CSV but it has
|
||||
// no option to force the non-quoted behavior for quoted CSV, making
|
||||
// CSV with quoted empty strings strictly less useful.
|
||||
// Not quoting the empty string also makes this package match the behavior
|
||||
// of Microsoft Excel and Google Drive.
|
||||
// For Postgres, quote the data terminating string `\.`.
|
||||
func (w *Writer) fieldNeedsQuotes(field string) bool { |
||||
if field == "" { |
||||
return false |
||||
} |
||||
if field == `\.` || strings.ContainsRune(field, w.Comma) || strings.ContainsAny(field, "\"\r\n") { |
||||
return true |
||||
} |
||||
|
||||
r1, _ := utf8.DecodeRuneInString(field) |
||||
return unicode.IsSpace(r1) |
||||
} |
@ -0,0 +1,95 @@ |
||||
// Copyright 2011 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in https://golang.org/LICENSE
|
||||
|
||||
package csv |
||||
|
||||
import ( |
||||
"bytes" |
||||
"errors" |
||||
"testing" |
||||
) |
||||
|
||||
var writeTests = []struct { |
||||
Input [][]string |
||||
Output string |
||||
Error error |
||||
UseCRLF bool |
||||
Comma rune |
||||
}{ |
||||
{Input: [][]string{{"abc"}}, Output: "abc\n"}, |
||||
{Input: [][]string{{"abc"}}, Output: "abc\r\n", UseCRLF: true}, |
||||
{Input: [][]string{{`"abc"`}}, Output: `"""abc"""` + "\n"}, |
||||
{Input: [][]string{{`a"b`}}, Output: `"a""b"` + "\n"}, |
||||
{Input: [][]string{{`"a"b"`}}, Output: `"""a""b"""` + "\n"}, |
||||
{Input: [][]string{{" abc"}}, Output: `" abc"` + "\n"}, |
||||
{Input: [][]string{{"abc,def"}}, Output: `"abc,def"` + "\n"}, |
||||
{Input: [][]string{{"abc", "def"}}, Output: "abc,def\n"}, |
||||
{Input: [][]string{{"abc"}, {"def"}}, Output: "abc\ndef\n"}, |
||||
{Input: [][]string{{"abc\ndef"}}, Output: "\"abc\ndef\"\n"}, |
||||
{Input: [][]string{{"abc\ndef"}}, Output: "\"abc\r\ndef\"\r\n", UseCRLF: true}, |
||||
{Input: [][]string{{"abc\rdef"}}, Output: "\"abcdef\"\r\n", UseCRLF: true}, |
||||
{Input: [][]string{{"abc\rdef"}}, Output: "\"abc\rdef\"\n", UseCRLF: false}, |
||||
{Input: [][]string{{""}}, Output: "\n"}, |
||||
{Input: [][]string{{"", ""}}, Output: ",\n"}, |
||||
{Input: [][]string{{"", "", ""}}, Output: ",,\n"}, |
||||
{Input: [][]string{{"", "", "a"}}, Output: ",,a\n"}, |
||||
{Input: [][]string{{"", "a", ""}}, Output: ",a,\n"}, |
||||
{Input: [][]string{{"", "a", "a"}}, Output: ",a,a\n"}, |
||||
{Input: [][]string{{"a", "", ""}}, Output: "a,,\n"}, |
||||
{Input: [][]string{{"a", "", "a"}}, Output: "a,,a\n"}, |
||||
{Input: [][]string{{"a", "a", ""}}, Output: "a,a,\n"}, |
||||
{Input: [][]string{{"a", "a", "a"}}, Output: "a,a,a\n"}, |
||||
{Input: [][]string{{`\.`}}, Output: "\"\\.\"\n"}, |
||||
{Input: [][]string{{"x09\x41\xb4\x1c", "aktau"}}, Output: "x09\x41\xb4\x1c,aktau\n"}, |
||||
{Input: [][]string{{",x09\x41\xb4\x1c", "aktau"}}, Output: "\",x09\x41\xb4\x1c\",aktau\n"}, |
||||
{Input: [][]string{{"a", "a", ""}}, Output: "a|a|\n", Comma: '|'}, |
||||
{Input: [][]string{{",", ",", ""}}, Output: ",|,|\n", Comma: '|'}, |
||||
{Input: [][]string{{"foo"}}, Comma: '"', Error: errInvalidDelim}, |
||||
} |
||||
|
||||
func TestWrite(t *testing.T) { |
||||
for n, tt := range writeTests { |
||||
b := &bytes.Buffer{} |
||||
f := NewWriter(b) |
||||
f.UseCRLF = tt.UseCRLF |
||||
if tt.Comma != 0 { |
||||
f.Comma = tt.Comma |
||||
} |
||||
err := f.WriteAll(tt.Input) |
||||
if err != tt.Error { |
||||
t.Errorf("Unexpected error:\ngot %v\nwant %v", err, tt.Error) |
||||
} |
||||
out := b.String() |
||||
if out != tt.Output { |
||||
t.Errorf("#%d: out=%q want %q", n, out, tt.Output) |
||||
} |
||||
} |
||||
} |
||||
|
||||
type errorWriter struct{} |
||||
|
||||
func (e errorWriter) Write(b []byte) (int, error) { |
||||
return 0, errors.New("Test") |
||||
} |
||||
|
||||
func TestError(t *testing.T) { |
||||
b := &bytes.Buffer{} |
||||
f := NewWriter(b) |
||||
f.Write([]string{"abc"}) |
||||
f.Flush() |
||||
err := f.Error() |
||||
|
||||
if err != nil { |
||||
t.Errorf("Unexpected error: %s\n", err) |
||||
} |
||||
|
||||
f = NewWriter(errorWriter{}) |
||||
f.Write([]string{"abc"}) |
||||
f.Flush() |
||||
err = f.Error() |
||||
|
||||
if err == nil { |
||||
t.Error("Error should not be nil") |
||||
} |
||||
} |
Loading…
Reference in new issue