Joe Tsai has uploaded this change for review.
encoding/csv: deprecate ParseError.Column
The csv package is inconsistent about when it uses ParseError.Column.
When reporting ErrFieldCount, it is always set to zero.
When reporting ErrQuote, it reports the line where the record starts, but
the column of where the actual error occured, which is just wrong.
Reporting the error where the parser finally discovered the lack of
a matching quote seemed unhelpful (see #19019), so the other sensible
information is to just print the start of the record.
In this case, the column is useless.
Only when reporting ErrBareQuote is the column marginally useful, but
it is arguably easy to find that problem since this is the easy case
where the entire record is on a single line. However, there is the edge
case where ErrBareQuote is reported because of a mismatched quoted-string.
In this case, the starting line of the record is still the most useful
piece of information.
Deprecating ParseError.Column has the additional benefit that
a future implementation can avoid computing its value,
which computationally requires decoding all UTF-8 runes.
Updates #19019
Fixes #22352
Change-Id: I58c771ff5bd4fc30cad1876878e843f6fecc66bf
---
M src/encoding/csv/reader.go
M src/encoding/csv/reader_test.go
2 files changed, 40 insertions(+), 41 deletions(-)
diff --git a/src/encoding/csv/reader.go b/src/encoding/csv/reader.go
index e49240f..5bfad00 100644
--- a/src/encoding/csv/reader.go
+++ b/src/encoding/csv/reader.go
@@ -61,22 +61,21 @@
)
// A ParseError is returned for parsing errors.
-// The first line is 1. The first column is 0.
type ParseError struct {
- Line int // Line where the error occurred
- Column int // Column (rune index) where the error occurred
+ Line int // Line number of the erroneous record
+ Column int // Deprecated: No longer set.
Err error // The actual error
}
func (e *ParseError) Error() string {
- return fmt.Sprintf("line %d, column %d: %s", e.Line, e.Column, e.Err)
+ return fmt.Sprintf("line %d: %s", e.Line, e.Err)
}
-// These are the errors that can be returned in ParseError.Error
+// These are the errors that can be returned in ParseError.Err.
var (
- ErrTrailingComma = errors.New("extra delimiter at end of line") // no longer used
+ ErrTrailingComma = errors.New("extra delimiter at end of line") // Deprecated: No longer used.
ErrBareQuote = errors.New("bare \" in non-quoted-field")
- ErrQuote = errors.New("extraneous \" in field")
+ ErrQuote = errors.New("extraneous or missing \" in quoted-field")
ErrFieldCount = errors.New("wrong number of fields in line")
)
@@ -85,17 +84,17 @@
// As returned by NewReader, a Reader expects input conforming to RFC 4180.
// The exported fields can be changed to customize the details before the
// first call to Read or ReadAll.
-//
-//
type Reader struct {
// Comma is the field delimiter.
// It is set to comma (',') by NewReader.
Comma rune
+
// Comment, if not 0, is the comment character. Lines beginning with the
// Comment character without preceding whitespace are ignored.
// With leading whitespace the Comment character becomes part of the
// field, even if TrimLeadingSpace is true.
Comment rune
+
// FieldsPerRecord is the number of expected fields per record.
// If FieldsPerRecord is positive, Read requires each record to
// have the given number of fields. If FieldsPerRecord is 0, Read sets it to
@@ -103,18 +102,22 @@
// have the same field count. If FieldsPerRecord is negative, no check is
// made and records may have a variable number of fields.
FieldsPerRecord int
+
// If LazyQuotes is true, a quote may appear in an unquoted field and a
// non-doubled quote may appear in a quoted field.
- LazyQuotes bool
- TrailingComma bool // ignored; here for backwards compatibility
+ LazyQuotes bool
+
// If TrimLeadingSpace is true, leading white space in a field is ignored.
// This is done even if the field delimiter, Comma, is white space.
TrimLeadingSpace bool
+
// ReuseRecord controls whether calls to Read may return a slice sharing
// the backing array of the previous call's returned slice for performance.
// By default, each call to Read returns newly allocated memory owned by the caller.
ReuseRecord bool
+ TrailingComma bool // Deprecated: No longer used.
+
line int
recordLine int // line where the current record started
column int
@@ -143,9 +146,8 @@
// error creates a new ParseError based on err.
func (r *Reader) error(err error) error {
return &ParseError{
- Line: r.recordLine,
- Column: r.column,
- Err: err,
+ Line: r.recordLine,
+ Err: err,
}
}
@@ -202,7 +204,6 @@
if r.FieldsPerRecord > 0 {
if len(record) != r.FieldsPerRecord {
- r.column = 0 // report at start of record
return record, r.error(ErrFieldCount)
}
} else if r.FieldsPerRecord == 0 {
@@ -212,8 +213,7 @@
}
// readRune reads one rune from r, folding \r\n to \n and keeping track
-// of how far into the line we have read. r.column will point to the start
-// of this rune, not the end of this rune.
+// of how far into the line we have read.
func (r *Reader) readRune() (rune, error) {
r1, _, err := r.r.ReadRune()
diff --git a/src/encoding/csv/reader_test.go b/src/encoding/csv/reader_test.go
index 3811629..0657fae 100644
--- a/src/encoding/csv/reader_test.go
+++ b/src/encoding/csv/reader_test.go
@@ -26,9 +26,8 @@
TrimLeadingSpace bool
ReuseRecord bool
- Error string
- Line int // Expected error line if != 0
- Column int // Expected error column if line != 0
+ Error error
+ Line int // Expected error line if != 0
}{
{
Name: "Simple",
@@ -140,7 +139,7 @@
{
Name: "BadDoubleQuotes",
Input: `a""b,c`,
- Error: `bare " in non-quoted-field`, Line: 1, Column: 1,
+ Error: ErrBareQuote, Line: 1,
},
{
Name: "TrimQuote",
@@ -151,30 +150,30 @@
{
Name: "BadBareQuote",
Input: `a "word","b"`,
- Error: `bare " in non-quoted-field`, Line: 1, Column: 2,
+ Error: ErrBareQuote, Line: 1,
},
{
Name: "BadTrailingQuote",
Input: `"a word",b"`,
- Error: `bare " in non-quoted-field`, Line: 1, Column: 10,
+ Error: ErrBareQuote, Line: 1,
},
{
Name: "ExtraneousQuote",
Input: `"a "word","b"`,
- Error: `extraneous " in field`, Line: 1, Column: 3,
+ Error: ErrQuote, Line: 1,
},
{
Name: "BadFieldCount",
UseFieldsPerRecord: true,
Input: "a,b,c\nd,e",
- Error: "wrong number of fields", Line: 2,
+ Error: ErrFieldCount, Line: 2,
},
{
Name: "BadFieldCount1",
UseFieldsPerRecord: true,
FieldsPerRecord: 2,
Input: `a,b,c`,
- Error: "wrong number of fields", Line: 1,
+ Error: ErrFieldCount, Line: 1,
},
{
Name: "FieldCount",
@@ -271,18 +270,16 @@
},
},
{ // issue 19019
- Name: "RecordLine1",
- Input: "a,\"b\nc\"d,e",
- Error: `extraneous " in field`,
- Line: 1,
- Column: 1,
+ Name: "RecordLine1",
+ Input: "a,\"b\nc\"d,e",
+ Error: ErrQuote,
+ Line: 1,
},
{
- Name: "RecordLine2",
- Input: "a,b\n\"d\n\n,e",
- Error: `extraneous " in field`,
- Line: 2,
- Column: 2,
+ Name: "RecordLine2",
+ Input: "a,b\n\"d\n\n,e",
+ Error: ErrQuote,
+ Line: 2,
},
{ // issue 21201
Name: "CRLFInQuotedField",
@@ -311,11 +308,13 @@
}
out, err := r.ReadAll()
perr, _ := err.(*ParseError)
- if tt.Error != "" {
- if err == nil || !strings.Contains(err.Error(), tt.Error) {
- t.Errorf("%s: error %v, want error %q", tt.Name, err, tt.Error)
- } else if tt.Line != 0 && (tt.Line != perr.Line || tt.Column != perr.Column) {
- t.Errorf("%s: error at %d:%d expected %d:%d", tt.Name, perr.Line, perr.Column, tt.Line, tt.Column)
+ if tt.Error != nil {
+ if perr == nil {
+ t.Errorf("expected ParseError")
+ } else if perr.Err != tt.Error {
+ t.Errorf("%s: got %v, want %q", tt.Name, err, tt.Error)
+ } else if tt.Line != perr.Line {
+ t.Errorf("%s: got line %d, want line %d", tt.Name, perr.Line, tt.Line)
}
} else if err != nil {
t.Errorf("%s: unexpected error %v", tt.Name, err)
To view, visit change 72050. To unsubscribe, or for help writing mail filters, visit settings.
Patch set 1:Run-TryBot +1
TryBots beginning. Status page: https://farmer.golang.org/try?commit=adf84dc5
TryBots are happy.
Patch set 1:TryBot-Result +1
I agree that ParseError.Column isn't really helpful and am in favor of deprecating it.
Maybe it would make sense to replace it with a new field for the current field in the record? (Basically the current len(r.fieldIndexes)+1 with the current implementation) Unlike the column, the number of the field in the record should always be somewhat meaningful as far as I remember, even for ErrFieldCount (where it would be the actual len of the record).
Joe Tsai abandoned this change.
To view, visit change 72050. To unsubscribe, or for help writing mail filters, visit settings.