minio/pkg/csvparser/writer.go

// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in https://golang.org/LICENSE

package csv

import (
	"bufio"
	"io"
	"strings"
	"unicode"
	"unicode/utf8"
)

// A Writer writes records using CSV encoding.
//
// As returned by NewWriter, a Writer writes records terminated by a
// newline and uses ',' as the field delimiter. The exported fields can be
// changed to customize the details before the first call to Write or WriteAll.
//
// Comma is the field delimiter.
//
// If UseCRLF is true, the Writer ends each output line with \r\n instead of \n.
//
// The writes of individual records are buffered.
// After all data has been written, the client should call the
// Flush method to guarantee all data has been forwarded to
// the underlying io.Writer.  Any errors that occurred should
// be checked by calling the Error method.
type Writer struct {
	Comma       rune // Field delimiter (set to ',' by NewWriter)
	Quote       rune // Fields quote character
	QuoteEscape rune
	AlwaysQuote bool // True to quote all fields
	UseCRLF     bool // True to use \r\n as the line terminator
	w           *bufio.Writer
}

// NewWriter returns a new Writer that writes to w.
func NewWriter(w io.Writer) *Writer {
	return &Writer{
		Comma:       ',',
		Quote:       '"',
		QuoteEscape: '"',
		w:           bufio.NewWriter(w),
	}
}

// Write writes a single CSV record to w along with any necessary quoting.
// A record is a slice of strings with each string being one field.
// Writes are buffered, so Flush must eventually be called to ensure
// that the record is written to the underlying io.Writer.
func (w *Writer) Write(record []string) error {
	if !validDelim(w.Comma) {
		return errInvalidDelim
	}

	for n, field := range record {
		if n > 0 {
			if _, err := w.w.WriteRune(w.Comma); err != nil {
				return err
			}
		}

		// If we don't have to have a quoted field then just
		// write out the field and continue to the next field.
		if !w.AlwaysQuote && !w.fieldNeedsQuotes(field) {
			if _, err := w.w.WriteString(field); err != nil {
				return err
			}
			continue
		}

		if _, err := w.w.WriteRune(w.Quote); err != nil {
			return err
		}

		specialChars := "\r\n" + string(w.Quote)

		for len(field) > 0 {
			// Search for special characters.
			i := strings.IndexAny(field, specialChars)
			if i < 0 {
				i = len(field)
			}

			// Copy verbatim everything before the special character.
			if _, err := w.w.WriteString(field[:i]); err != nil {
				return err
			}
			field = field[i:]

			// Encode the special character.
			if len(field) > 0 {
				var err error
				switch nextRune([]byte(field)) {
				case w.Quote:
					_, err = w.w.WriteRune(w.QuoteEscape)
					if err != nil {
						break
					}
					_, err = w.w.WriteRune(w.Quote)
				case '\r':
					if !w.UseCRLF {
						err = w.w.WriteByte('\r')
					}
				case '\n':
					if w.UseCRLF {
						_, err = w.w.WriteString("\r\n")
					} else {
						err = w.w.WriteByte('\n')
					}
				}
				field = field[1:]
				if err != nil {
					return err
				}
			}
		}
		if _, err := w.w.WriteRune(w.Quote); err != nil {
			return err
		}
	}
	var err error
	if w.UseCRLF {
		_, err = w.w.WriteString("\r\n")
	} else {
		err = w.w.WriteByte('\n')
	}
	return err
}

// Flush writes any buffered data to the underlying io.Writer.
// To check if an error occurred during the Flush, call Error.
func (w *Writer) Flush() {
	w.w.Flush()
}

// Error reports any error that has occurred during a previous Write or Flush.
func (w *Writer) Error() error {
	_, err := w.w.Write(nil)
	return err
}

// WriteAll writes multiple CSV records to w using Write and then calls Flush,
// returning any error from the Flush.
func (w *Writer) WriteAll(records [][]string) error {
	for _, record := range records {
		err := w.Write(record)
		if err != nil {
			return err
		}
	}
	return w.w.Flush()
}

// fieldNeedsQuotes reports whether our field must be enclosed in quotes.
// Fields with a Comma, fields with a quote or newline, and
// fields which start with a space must be enclosed in quotes.
// We used to quote empty strings, but we do not anymore (as of Go 1.4).
// The two representations should be equivalent, but Postgres distinguishes
// quoted vs non-quoted empty string during database imports, and it has
// an option to force the quoted behavior for non-quoted CSV but it has
// no option to force the non-quoted behavior for quoted CSV, making
// CSV with quoted empty strings strictly less useful.
// Not quoting the empty string also makes this package match the behavior
// of Microsoft Excel and Google Drive.
// For Postgres, quote the data terminating string `\.`.
func (w *Writer) fieldNeedsQuotes(field string) bool {
	if field == "" {
		return false
	}
	if field == `\.` || strings.ContainsAny(field, "\r\n"+string(w.Quote)+string(w.Comma)) {
		return true
	}

	r1, _ := utf8.DecodeRuneInString(field)
	return unicode.IsSpace(r1)
}
Import CSV parser library (#8927) The CSV library code is imported from Go 1.13.6 5 years ago			`// Copyright 2011 The Go Authors. All rights reserved.`
			`// Use of this source code is governed by a BSD-style`
			`// license that can be found in https://golang.org/LICENSE`

			`package csv`

			`import (`
			`"bufio"`
			`"io"`
			`"strings"`
			`"unicode"`
			`"unicode/utf8"`
			`)`

			`// A Writer writes records using CSV encoding.`
			`//`
			`// As returned by NewWriter, a Writer writes records terminated by a`
			`// newline and uses ',' as the field delimiter. The exported fields can be`
			`// changed to customize the details before the first call to Write or WriteAll.`
			`//`
			`// Comma is the field delimiter.`
			`//`
			`// If UseCRLF is true, the Writer ends each output line with \r\n instead of \n.`
			`//`
			`// The writes of individual records are buffered.`
			`// After all data has been written, the client should call the`
			`// Flush method to guarantee all data has been forwarded to`
			`// the underlying io.Writer. Any errors that occurred should`
			`// be checked by calling the Error method.`
			`type Writer struct {`
Support configurable quote character parameter in Select (#8955) 5 years ago			`Comma rune // Field delimiter (set to ',' by NewWriter)`
			`Quote rune // Fields quote character`
sql: Add support of escape quote in CSV (#9231) This commit modifies csv parser, a fork of golang csv parser to support a custom quote escape character. The quote escape character is used to escape the quote character when a csv field contains a quote character as part of data. 5 years ago			`QuoteEscape rune`
Support configurable quote character parameter in Select (#8955) 5 years ago			`AlwaysQuote bool // True to quote all fields`
			`UseCRLF bool // True to use \r\n as the line terminator`
			`w *bufio.Writer`
Import CSV parser library (#8927) The CSV library code is imported from Go 1.13.6 5 years ago			`}`

			`// NewWriter returns a new Writer that writes to w.`
			`func NewWriter(w io.Writer) *Writer {`
			`return &Writer{`
sql: Add support of escape quote in CSV (#9231) This commit modifies csv parser, a fork of golang csv parser to support a custom quote escape character. The quote escape character is used to escape the quote character when a csv field contains a quote character as part of data. 5 years ago			`Comma: ',',`
			`Quote: '"',`
			`QuoteEscape: '"',`
			`w: bufio.NewWriter(w),`
Import CSV parser library (#8927) The CSV library code is imported from Go 1.13.6 5 years ago			`}`
			`}`

			`// Write writes a single CSV record to w along with any necessary quoting.`
			`// A record is a slice of strings with each string being one field.`
			`// Writes are buffered, so Flush must eventually be called to ensure`
			`// that the record is written to the underlying io.Writer.`
			`func (w *Writer) Write(record []string) error {`
			`if !validDelim(w.Comma) {`
			`return errInvalidDelim`
			`}`

			`for n, field := range record {`
			`if n > 0 {`
			`if _, err := w.w.WriteRune(w.Comma); err != nil {`
			`return err`
			`}`
			`}`

			`// If we don't have to have a quoted field then just`
			`// write out the field and continue to the next field.`
Support configurable quote character parameter in Select (#8955) 5 years ago			`if !w.AlwaysQuote && !w.fieldNeedsQuotes(field) {`
Import CSV parser library (#8927) The CSV library code is imported from Go 1.13.6 5 years ago			`if _, err := w.w.WriteString(field); err != nil {`
			`return err`
			`}`
			`continue`
			`}`

Support configurable quote character parameter in Select (#8955) 5 years ago			`if _, err := w.w.WriteRune(w.Quote); err != nil {`
Import CSV parser library (#8927) The CSV library code is imported from Go 1.13.6 5 years ago			`return err`
			`}`
Support configurable quote character parameter in Select (#8955) 5 years ago
			`specialChars := "\r\n" + string(w.Quote)`

Import CSV parser library (#8927) The CSV library code is imported from Go 1.13.6 5 years ago			`for len(field) > 0 {`
			`// Search for special characters.`
Support configurable quote character parameter in Select (#8955) 5 years ago			`i := strings.IndexAny(field, specialChars)`
Import CSV parser library (#8927) The CSV library code is imported from Go 1.13.6 5 years ago			`if i < 0 {`
			`i = len(field)`
			`}`

			`// Copy verbatim everything before the special character.`
			`if _, err := w.w.WriteString(field[:i]); err != nil {`
			`return err`
			`}`
			`field = field[i:]`

			`// Encode the special character.`
			`if len(field) > 0 {`
			`var err error`
Support configurable quote character parameter in Select (#8955) 5 years ago			`switch nextRune([]byte(field)) {`
			`case w.Quote:`
sql: Add support of escape quote in CSV (#9231) This commit modifies csv parser, a fork of golang csv parser to support a custom quote escape character. The quote escape character is used to escape the quote character when a csv field contains a quote character as part of data. 5 years ago			`_, err = w.w.WriteRune(w.QuoteEscape)`
Support configurable quote character parameter in Select (#8955) 5 years ago			`if err != nil {`
			`break`
			`}`
			`_, err = w.w.WriteRune(w.Quote)`
Import CSV parser library (#8927) The CSV library code is imported from Go 1.13.6 5 years ago			`case '\r':`
			`if !w.UseCRLF {`
			`err = w.w.WriteByte('\r')`
			`}`
			`case '\n':`
			`if w.UseCRLF {`
			`_, err = w.w.WriteString("\r\n")`
			`} else {`
			`err = w.w.WriteByte('\n')`
			`}`
			`}`
			`field = field[1:]`
			`if err != nil {`
			`return err`
			`}`
			`}`
			`}`
Support configurable quote character parameter in Select (#8955) 5 years ago			`if _, err := w.w.WriteRune(w.Quote); err != nil {`
Import CSV parser library (#8927) The CSV library code is imported from Go 1.13.6 5 years ago			`return err`
			`}`
			`}`
			`var err error`
			`if w.UseCRLF {`
			`_, err = w.w.WriteString("\r\n")`
			`} else {`
			`err = w.w.WriteByte('\n')`
			`}`
			`return err`
			`}`

			`// Flush writes any buffered data to the underlying io.Writer.`
			`// To check if an error occurred during the Flush, call Error.`
			`func (w *Writer) Flush() {`
			`w.w.Flush()`
			`}`

			`// Error reports any error that has occurred during a previous Write or Flush.`
			`func (w *Writer) Error() error {`
			`_, err := w.w.Write(nil)`
			`return err`
			`}`

			`// WriteAll writes multiple CSV records to w using Write and then calls Flush,`
			`// returning any error from the Flush.`
			`func (w *Writer) WriteAll(records [][]string) error {`
			`for _, record := range records {`
			`err := w.Write(record)`
			`if err != nil {`
			`return err`
			`}`
			`}`
			`return w.w.Flush()`
			`}`

			`// fieldNeedsQuotes reports whether our field must be enclosed in quotes.`
			`// Fields with a Comma, fields with a quote or newline, and`
			`// fields which start with a space must be enclosed in quotes.`
			`// We used to quote empty strings, but we do not anymore (as of Go 1.4).`
			`// The two representations should be equivalent, but Postgres distinguishes`
			`// quoted vs non-quoted empty string during database imports, and it has`
			`// an option to force the quoted behavior for non-quoted CSV but it has`
			`// no option to force the non-quoted behavior for quoted CSV, making`
			`// CSV with quoted empty strings strictly less useful.`
			`// Not quoting the empty string also makes this package match the behavior`
			`// of Microsoft Excel and Google Drive.`
			// For Postgres, quote the data terminating string `\.`.
			`func (w *Writer) fieldNeedsQuotes(field string) bool {`
			`if field == "" {`
			`return false`
			`}`
Support configurable quote character parameter in Select (#8955) 5 years ago			if field == `\.` \|\| strings.ContainsAny(field, "\r\n"+string(w.Quote)+string(w.Comma)) {
Import CSV parser library (#8927) The CSV library code is imported from Go 1.13.6 5 years ago			`return true`
			`}`

			`r1, _ := utf8.DecodeRuneInString(field)`
			`return unicode.IsSpace(r1)`
			`}`