@ -18,11 +18,16 @@ package csv
import (
import (
"bytes"
"bytes"
"errors"
"fmt"
"io"
"io"
"io/ioutil"
"io/ioutil"
"path/filepath"
"reflect"
"strings"
"strings"
"testing"
"testing"
"github.com/klauspost/compress/zip"
"github.com/minio/minio/pkg/s3select/sql"
"github.com/minio/minio/pkg/s3select/sql"
)
)
@ -49,12 +54,12 @@ func TestRead(t *testing.T) {
QuoteCharacter : defaultQuoteCharacter ,
QuoteCharacter : defaultQuoteCharacter ,
QuoteEscapeCharacter : defaultQuoteEscapeCharacter ,
QuoteEscapeCharacter : defaultQuoteEscapeCharacter ,
CommentCharacter : defaultCommentCharacter ,
CommentCharacter : defaultCommentCharacter ,
AllowQuotedRecordDelimiter : tru e,
AllowQuotedRecordDelimiter : fals e,
unmarshaled : true ,
unmarshaled : true ,
} )
} )
for {
for {
record , err = r . Read ( )
record , err = r . Read ( record )
if err != nil {
if err != nil {
break
break
}
}
@ -72,3 +77,555 @@ func TestRead(t *testing.T) {
}
}
}
}
}
}
type tester interface {
Fatal ( ... interface { } )
}
func openTestFile ( t tester , file string ) [ ] byte {
f , err := ioutil . ReadFile ( filepath . Join ( "testdata/testdata.zip" ) )
if err != nil {
t . Fatal ( err )
}
z , err := zip . NewReader ( bytes . NewReader ( f ) , int64 ( len ( f ) ) )
if err != nil {
t . Fatal ( err )
}
for _ , f := range z . File {
if f . Name == file {
rc , err := f . Open ( )
if err != nil {
t . Fatal ( err )
}
defer rc . Close ( )
b , err := ioutil . ReadAll ( rc )
if err != nil {
t . Fatal ( err )
}
return b
}
}
t . Fatal ( file , "not found in testdata/testdata.zip" )
return nil
}
func TestReadExtended ( t * testing . T ) {
cases := [ ] struct {
file string
recordDelimiter string
fieldDelimiter string
header bool
wantColumns [ ] string
wantTenFields string
totalFields int
} {
{
file : "nyc-taxi-data-100k.csv" ,
recordDelimiter : "\n" ,
fieldDelimiter : "," ,
header : true ,
wantColumns : [ ] string { "trip_id" , "vendor_id" , "pickup_datetime" , "dropoff_datetime" , "store_and_fwd_flag" , "rate_code_id" , "pickup_longitude" , "pickup_latitude" , "dropoff_longitude" , "dropoff_latitude" , "passenger_count" , "trip_distance" , "fare_amount" , "extra" , "mta_tax" , "tip_amount" , "tolls_amount" , "ehail_fee" , "improvement_surcharge" , "total_amount" , "payment_type" , "trip_type" , "pickup" , "dropoff" , "cab_type" , "precipitation" , "snow_depth" , "snowfall" , "max_temp" , "min_temp" , "wind" , "pickup_nyct2010_gid" , "pickup_ctlabel" , "pickup_borocode" , "pickup_boroname" , "pickup_ct2010" , "pickup_boroct2010" , "pickup_cdeligibil" , "pickup_ntacode" , "pickup_ntaname" , "pickup_puma" , "dropoff_nyct2010_gid" , "dropoff_ctlabel" , "dropoff_borocode" , "dropoff_boroname" , "dropoff_ct2010" , "dropoff_boroct2010" , "dropoff_cdeligibil" , "dropoff_ntacode" , "dropoff_ntaname" , "dropoff_puma" } ,
wantTenFields : ` 3389224 , 2 , 2014 - 03 - 26 00 : 26 : 15 , 2014 - 03 - 26 00 : 28 : 38 , N , 1 , - 73.950431823730469 , 40.792251586914063 , - 73.938949584960937 , 40.794425964355469 , 1 , 0.84 , 4.5 , 0.5 , 0.5 , 1 , 0 , , , 6.5 , 1 , 1 , 75 , 74 , green , 0.00 , 0.0 , 0.0 , 36 , 24 , 11.86 , 1267 , 168 , 1 , Manhattan , 016 800 , 1016800 , E , MN33 , East Harlem South , 3804 , 1828 , 180 , 1 , Manhattan , 01 8000 , 1018000 , E , MN34 , East Harlem North , 3804
3389225 , 2 , 2014 - 03 - 31 0 9 : 42 : 15 , 2014 - 03 - 31 10 : 01 : 17 , N , 1 , - 73.950340270996094 , 40.792228698730469 , - 73.941970825195313 , 40.842235565185547 , 1 , 4.47 , 17.5 , 0 , 0.5 , 0 , 0 , , , 18 , 2 , 1 , 75 , 244 , green , 0.16 , 0.0 , 0.0 , 56 , 36 , 8.28 , 1267 , 168 , 1 , Manhattan , 016 800 , 1016800 , E , MN33 , East Harlem South , 3804 , 911 , 251 , 1 , Manhattan , 025100 , 1025100 , E , MN36 , Washington Heights South , 3801
3389226 , 2 , 2014 - 03 - 26 17 : 13 : 28 , 2014 - 03 - 26 17 : 19 : 07 , N , 1 , - 73.949493408203125 , 40.793506622314453 , - 73.943374633789063 , 40.786155700683594 , 1 , 0.82 , 5.5 , 1 , 0.5 , 0 , 0 , , , 7 , 1 , 1 , 75 , 75 , green , 0.00 , 0.0 , 0.0 , 36 , 24 , 11.86 , 1267 , 168 , 1 , Manhattan , 016 800 , 1016800 , E , MN33 , East Harlem South , 3804 , 1387 , 164 , 1 , Manhattan , 016400 , 1016400 , E , MN33 , East Harlem South , 3804
3389227 , 2 , 2014 - 03 - 14 21 : 07 : 19 , 2014 - 03 - 14 21 : 11 : 41 , N , 1 , - 73.950538635253906 , 40.792228698730469 , - 73.940811157226563 , 40.809253692626953 , 1 , 1.40 , 6 , 0.5 , 0.5 , 0 , 0 , , , 7 , 2 , 1 , 75 , 42 , green , 0.00 , 0.0 , 0.0 , 46 , 22 , 5.59 , 1267 , 168 , 1 , Manhattan , 016 800 , 1016800 , E , MN33 , East Harlem South , 3804 , 1184 , 208 , 1 , Manhattan , 020 800 , 1020800 , E , MN03 , Central Harlem North - Polo Grounds , 3803
3389228 , 1 , 2014 - 03 - 28 13 : 52 : 56 , 2014 - 03 - 28 14 : 29 : 01 , N , 1 , - 73.950569152832031 , 40.792312622070313 , - 73.868507385253906 , 40.688491821289063 , 2 , 16.10 , 46 , 0 , 0.5 , 0 , 5.33 , , , 51.83 , 2 , , 75 , 63 , green , 0.04 , 0.0 , 0.0 , 62 , 37 , 5.37 , 1267 , 168 , 1 , Manhattan , 016 800 , 1016800 , E , MN33 , East Harlem South , 3804 , 1544 , 1182.02 , 3 , Brooklyn , 118202 , 3118202 , E , BK83 , Cypress Hills - City Line , 4008
3389229 , 2 , 2014 - 03 - 07 0 9 : 46 : 32 , 2014 - 03 - 07 0 9 : 55 : 01 , N , 1 , - 73.952301025390625 , 40.789798736572266 , - 73.935806274414062 , 40.794448852539063 , 1 , 1.67 , 8 , 0 , 0.5 , 2 , 0 , , , 10.5 , 1 , 1 , 75 , 74 , green , 0.00 , 3.9 , 0.0 , 37 , 26 , 7.83 , 1267 , 168 , 1 , Manhattan , 016 800 , 1016800 , E , MN33 , East Harlem South , 3804 , 1553 , 178 , 1 , Manhattan , 017 800 , 1017800 , E , MN34 , East Harlem North , 3804
3389230 , 2 , 2014 - 03 - 17 18 : 23 : 05 , 2014 - 03 - 17 18 : 28 : 38 , N , 1 , - 73.952346801757813 , 40.789844512939453 , - 73.946319580078125 , 40.783851623535156 , 5 , 0.95 , 5.5 , 1 , 0.5 , 0.65 , 0 , , , 7.65 , 1 , 1 , 75 , 263 , green , 0.00 , 0.0 , 0.0 , 35 , 23 , 8.05 , 1267 , 168 , 1 , Manhattan , 016 800 , 1016800 , E , MN33 , East Harlem South , 3804 , 32 , 156.01 , 1 , Manhattan , 015601 , 1015601 , I , MN32 , Yorkville , 3805
3389231 , 1 , 2014 - 03 - 19 19 : 0 9 : 36 , 2014 - 03 - 19 19 : 12 : 20 , N , 1 , - 73.952377319335938 , 40.789779663085938 , - 73.947494506835938 , 40.796474456787109 , 1 , 0.50 , 4 , 1 , 0.5 , 1 , 0 , , , 6.5 , 1 , , 75 , 75 , green , 0.92 , 0.0 , 0.0 , 46 , 32 , 7.16 , 1267 , 168 , 1 , Manhattan , 016 800 , 1016800 , E , MN33 , East Harlem South , 3804 , 1401 , 174.02 , 1 , Manhattan , 017402 , 1017402 , E , MN33 , East Harlem South , 3804
3389232 , 2 , 2014 - 03 - 20 19 : 06 : 28 , 2014 - 03 - 20 19 : 21 : 35 , N , 1 , - 73.952583312988281 , 40.789516448974609 , - 73.985870361328125 , 40.776973724365234 , 2 , 3.04 , 13 , 1 , 0.5 , 2.8 , 0 , , , 17.3 , 1 , 1 , 75 , 143 , green , 0.00 , 0.0 , 0.0 , 54 , 40 , 8.05 , 1267 , 168 , 1 , Manhattan , 016 800 , 1016800 , E , MN33 , East Harlem South , 3804 , 1742 , 155 , 1 , Manhattan , 015500 , 1015500 , I , MN14 , Lincoln Square , 3806
3389233 , 2 , 2014 - 03 - 29 0 9 : 38 : 12 , 2014 - 03 - 29 0 9 : 44 : 16 , N , 1 , - 73.952728271484375 , 40.789501190185547 , - 73.950935363769531 , 40.775600433349609 , 1 , 1.10 , 6.5 , 0 , 0.5 , 1.3 , 0 , , , 8.3 , 1 , 1 , 75 , 263 , green , 1.81 , 0.0 , 0.0 , 59 , 43 , 10.74 , 1267 , 168 , 1 , Manhattan , 016 800 , 1016800 , E , MN33 , East Harlem South , 3804 , 2048 , 138 , 1 , Manhattan , 013 800 , 1013800 , I , MN32 , Yorkville , 3805
` ,
totalFields : 308 * 2 + 1 ,
} , {
file : "nyc-taxi-data-tabs-100k.csv" ,
recordDelimiter : "\n" ,
fieldDelimiter : "\t" ,
header : true ,
wantColumns : [ ] string { "trip_id" , "vendor_id" , "pickup_datetime" , "dropoff_datetime" , "store_and_fwd_flag" , "rate_code_id" , "pickup_longitude" , "pickup_latitude" , "dropoff_longitude" , "dropoff_latitude" , "passenger_count" , "trip_distance" , "fare_amount" , "extra" , "mta_tax" , "tip_amount" , "tolls_amount" , "ehail_fee" , "improvement_surcharge" , "total_amount" , "payment_type" , "trip_type" , "pickup" , "dropoff" , "cab_type" , "precipitation" , "snow_depth" , "snowfall" , "max_temp" , "min_temp" , "wind" , "pickup_nyct2010_gid" , "pickup_ctlabel" , "pickup_borocode" , "pickup_boroname" , "pickup_ct2010" , "pickup_boroct2010" , "pickup_cdeligibil" , "pickup_ntacode" , "pickup_ntaname" , "pickup_puma" , "dropoff_nyct2010_gid" , "dropoff_ctlabel" , "dropoff_borocode" , "dropoff_boroname" , "dropoff_ct2010" , "dropoff_boroct2010" , "dropoff_cdeligibil" , "dropoff_ntacode" , "dropoff_ntaname" , "dropoff_puma" } ,
wantTenFields : ` 3389224 , 2 , 2014 - 03 - 26 00 : 26 : 15 , 2014 - 03 - 26 00 : 28 : 38 , N , 1 , - 73.950431823730469 , 40.792251586914063 , - 73.938949584960937 , 40.794425964355469 , 1 , 0.84 , 4.5 , 0.5 , 0.5 , 1 , 0 , , , 6.5 , 1 , 1 , 75 , 74 , green , 0.00 , 0.0 , 0.0 , 36 , 24 , 11.86 , 1267 , 168 , 1 , Manhattan , 016 800 , 1016800 , E , MN33 , East Harlem South , 3804 , 1828 , 180 , 1 , Manhattan , 01 8000 , 1018000 , E , MN34 , East Harlem North , 3804
3389225 , 2 , 2014 - 03 - 31 0 9 : 42 : 15 , 2014 - 03 - 31 10 : 01 : 17 , N , 1 , - 73.950340270996094 , 40.792228698730469 , - 73.941970825195313 , 40.842235565185547 , 1 , 4.47 , 17.5 , 0 , 0.5 , 0 , 0 , , , 18 , 2 , 1 , 75 , 244 , green , 0.16 , 0.0 , 0.0 , 56 , 36 , 8.28 , 1267 , 168 , 1 , Manhattan , 016 800 , 1016800 , E , MN33 , East Harlem South , 3804 , 911 , 251 , 1 , Manhattan , 025100 , 1025100 , E , MN36 , Washington Heights South , 3801
3389226 , 2 , 2014 - 03 - 26 17 : 13 : 28 , 2014 - 03 - 26 17 : 19 : 07 , N , 1 , - 73.949493408203125 , 40.793506622314453 , - 73.943374633789063 , 40.786155700683594 , 1 , 0.82 , 5.5 , 1 , 0.5 , 0 , 0 , , , 7 , 1 , 1 , 75 , 75 , green , 0.00 , 0.0 , 0.0 , 36 , 24 , 11.86 , 1267 , 168 , 1 , Manhattan , 016 800 , 1016800 , E , MN33 , East Harlem South , 3804 , 1387 , 164 , 1 , Manhattan , 016400 , 1016400 , E , MN33 , East Harlem South , 3804
3389227 , 2 , 2014 - 03 - 14 21 : 07 : 19 , 2014 - 03 - 14 21 : 11 : 41 , N , 1 , - 73.950538635253906 , 40.792228698730469 , - 73.940811157226563 , 40.809253692626953 , 1 , 1.40 , 6 , 0.5 , 0.5 , 0 , 0 , , , 7 , 2 , 1 , 75 , 42 , green , 0.00 , 0.0 , 0.0 , 46 , 22 , 5.59 , 1267 , 168 , 1 , Manhattan , 016 800 , 1016800 , E , MN33 , East Harlem South , 3804 , 1184 , 208 , 1 , Manhattan , 020 800 , 1020800 , E , MN03 , Central Harlem North - Polo Grounds , 3803
3389228 , 1 , 2014 - 03 - 28 13 : 52 : 56 , 2014 - 03 - 28 14 : 29 : 01 , N , 1 , - 73.950569152832031 , 40.792312622070313 , - 73.868507385253906 , 40.688491821289063 , 2 , 16.10 , 46 , 0 , 0.5 , 0 , 5.33 , , , 51.83 , 2 , , 75 , 63 , green , 0.04 , 0.0 , 0.0 , 62 , 37 , 5.37 , 1267 , 168 , 1 , Manhattan , 016 800 , 1016800 , E , MN33 , East Harlem South , 3804 , 1544 , 1182.02 , 3 , Brooklyn , 118202 , 3118202 , E , BK83 , Cypress Hills - City Line , 4008
3389229 , 2 , 2014 - 03 - 07 0 9 : 46 : 32 , 2014 - 03 - 07 0 9 : 55 : 01 , N , 1 , - 73.952301025390625 , 40.789798736572266 , - 73.935806274414062 , 40.794448852539063 , 1 , 1.67 , 8 , 0 , 0.5 , 2 , 0 , , , 10.5 , 1 , 1 , 75 , 74 , green , 0.00 , 3.9 , 0.0 , 37 , 26 , 7.83 , 1267 , 168 , 1 , Manhattan , 016 800 , 1016800 , E , MN33 , East Harlem South , 3804 , 1553 , 178 , 1 , Manhattan , 017 800 , 1017800 , E , MN34 , East Harlem North , 3804
3389230 , 2 , 2014 - 03 - 17 18 : 23 : 05 , 2014 - 03 - 17 18 : 28 : 38 , N , 1 , - 73.952346801757813 , 40.789844512939453 , - 73.946319580078125 , 40.783851623535156 , 5 , 0.95 , 5.5 , 1 , 0.5 , 0.65 , 0 , , , 7.65 , 1 , 1 , 75 , 263 , green , 0.00 , 0.0 , 0.0 , 35 , 23 , 8.05 , 1267 , 168 , 1 , Manhattan , 016 800 , 1016800 , E , MN33 , East Harlem South , 3804 , 32 , 156.01 , 1 , Manhattan , 015601 , 1015601 , I , MN32 , Yorkville , 3805
3389231 , 1 , 2014 - 03 - 19 19 : 0 9 : 36 , 2014 - 03 - 19 19 : 12 : 20 , N , 1 , - 73.952377319335938 , 40.789779663085938 , - 73.947494506835938 , 40.796474456787109 , 1 , 0.50 , 4 , 1 , 0.5 , 1 , 0 , , , 6.5 , 1 , , 75 , 75 , green , 0.92 , 0.0 , 0.0 , 46 , 32 , 7.16 , 1267 , 168 , 1 , Manhattan , 016 800 , 1016800 , E , MN33 , East Harlem South , 3804 , 1401 , 174.02 , 1 , Manhattan , 017402 , 1017402 , E , MN33 , East Harlem South , 3804
3389232 , 2 , 2014 - 03 - 20 19 : 06 : 28 , 2014 - 03 - 20 19 : 21 : 35 , N , 1 , - 73.952583312988281 , 40.789516448974609 , - 73.985870361328125 , 40.776973724365234 , 2 , 3.04 , 13 , 1 , 0.5 , 2.8 , 0 , , , 17.3 , 1 , 1 , 75 , 143 , green , 0.00 , 0.0 , 0.0 , 54 , 40 , 8.05 , 1267 , 168 , 1 , Manhattan , 016 800 , 1016800 , E , MN33 , East Harlem South , 3804 , 1742 , 155 , 1 , Manhattan , 015500 , 1015500 , I , MN14 , Lincoln Square , 3806
3389233 , 2 , 2014 - 03 - 29 0 9 : 38 : 12 , 2014 - 03 - 29 0 9 : 44 : 16 , N , 1 , - 73.952728271484375 , 40.789501190185547 , - 73.950935363769531 , 40.775600433349609 , 1 , 1.10 , 6.5 , 0 , 0.5 , 1.3 , 0 , , , 8.3 , 1 , 1 , 75 , 263 , green , 1.81 , 0.0 , 0.0 , 59 , 43 , 10.74 , 1267 , 168 , 1 , Manhattan , 016 800 , 1016800 , E , MN33 , East Harlem South , 3804 , 2048 , 138 , 1 , Manhattan , 013 800 , 1013800 , I , MN32 , Yorkville , 3805
` ,
totalFields : 308 * 2 + 1 ,
} , {
file : "nyc-taxi-data-100k-single-delim.csv" ,
recordDelimiter : "^" ,
fieldDelimiter : "," ,
header : true ,
wantColumns : [ ] string { "trip_id" , "vendor_id" , "pickup_datetime" , "dropoff_datetime" , "store_and_fwd_flag" , "rate_code_id" , "pickup_longitude" , "pickup_latitude" , "dropoff_longitude" , "dropoff_latitude" , "passenger_count" , "trip_distance" , "fare_amount" , "extra" , "mta_tax" , "tip_amount" , "tolls_amount" , "ehail_fee" , "improvement_surcharge" , "total_amount" , "payment_type" , "trip_type" , "pickup" , "dropoff" , "cab_type" , "precipitation" , "snow_depth" , "snowfall" , "max_temp" , "min_temp" , "wind" , "pickup_nyct2010_gid" , "pickup_ctlabel" , "pickup_borocode" , "pickup_boroname" , "pickup_ct2010" , "pickup_boroct2010" , "pickup_cdeligibil" , "pickup_ntacode" , "pickup_ntaname" , "pickup_puma" , "dropoff_nyct2010_gid" , "dropoff_ctlabel" , "dropoff_borocode" , "dropoff_boroname" , "dropoff_ct2010" , "dropoff_boroct2010" , "dropoff_cdeligibil" , "dropoff_ntacode" , "dropoff_ntaname" , "dropoff_puma" } ,
wantTenFields : ` 3389224 , 2 , 2014 - 03 - 26 00 : 26 : 15 , 2014 - 03 - 26 00 : 28 : 38 , N , 1 , - 73.950431823730469 , 40.792251586914063 , - 73.938949584960937 , 40.794425964355469 , 1 , 0.84 , 4.5 , 0.5 , 0.5 , 1 , 0 , , , 6.5 , 1 , 1 , 75 , 74 , green , 0.00 , 0.0 , 0.0 , 36 , 24 , 11.86 , 1267 , 168 , 1 , Manhattan , 016 800 , 1016800 , E , MN33 , East Harlem South , 3804 , 1828 , 180 , 1 , Manhattan , 01 8000 , 1018000 , E , MN34 , East Harlem North , 3804
3389225 , 2 , 2014 - 03 - 31 0 9 : 42 : 15 , 2014 - 03 - 31 10 : 01 : 17 , N , 1 , - 73.950340270996094 , 40.792228698730469 , - 73.941970825195313 , 40.842235565185547 , 1 , 4.47 , 17.5 , 0 , 0.5 , 0 , 0 , , , 18 , 2 , 1 , 75 , 244 , green , 0.16 , 0.0 , 0.0 , 56 , 36 , 8.28 , 1267 , 168 , 1 , Manhattan , 016 800 , 1016800 , E , MN33 , East Harlem South , 3804 , 911 , 251 , 1 , Manhattan , 025100 , 1025100 , E , MN36 , Washington Heights South , 3801
3389226 , 2 , 2014 - 03 - 26 17 : 13 : 28 , 2014 - 03 - 26 17 : 19 : 07 , N , 1 , - 73.949493408203125 , 40.793506622314453 , - 73.943374633789063 , 40.786155700683594 , 1 , 0.82 , 5.5 , 1 , 0.5 , 0 , 0 , , , 7 , 1 , 1 , 75 , 75 , green , 0.00 , 0.0 , 0.0 , 36 , 24 , 11.86 , 1267 , 168 , 1 , Manhattan , 016 800 , 1016800 , E , MN33 , East Harlem South , 3804 , 1387 , 164 , 1 , Manhattan , 016400 , 1016400 , E , MN33 , East Harlem South , 3804
3389227 , 2 , 2014 - 03 - 14 21 : 07 : 19 , 2014 - 03 - 14 21 : 11 : 41 , N , 1 , - 73.950538635253906 , 40.792228698730469 , - 73.940811157226563 , 40.809253692626953 , 1 , 1.40 , 6 , 0.5 , 0.5 , 0 , 0 , , , 7 , 2 , 1 , 75 , 42 , green , 0.00 , 0.0 , 0.0 , 46 , 22 , 5.59 , 1267 , 168 , 1 , Manhattan , 016 800 , 1016800 , E , MN33 , East Harlem South , 3804 , 1184 , 208 , 1 , Manhattan , 020 800 , 1020800 , E , MN03 , Central Harlem North - Polo Grounds , 3803
3389228 , 1 , 2014 - 03 - 28 13 : 52 : 56 , 2014 - 03 - 28 14 : 29 : 01 , N , 1 , - 73.950569152832031 , 40.792312622070313 , - 73.868507385253906 , 40.688491821289063 , 2 , 16.10 , 46 , 0 , 0.5 , 0 , 5.33 , , , 51.83 , 2 , , 75 , 63 , green , 0.04 , 0.0 , 0.0 , 62 , 37 , 5.37 , 1267 , 168 , 1 , Manhattan , 016 800 , 1016800 , E , MN33 , East Harlem South , 3804 , 1544 , 1182.02 , 3 , Brooklyn , 118202 , 3118202 , E , BK83 , Cypress Hills - City Line , 4008
3389229 , 2 , 2014 - 03 - 07 0 9 : 46 : 32 , 2014 - 03 - 07 0 9 : 55 : 01 , N , 1 , - 73.952301025390625 , 40.789798736572266 , - 73.935806274414062 , 40.794448852539063 , 1 , 1.67 , 8 , 0 , 0.5 , 2 , 0 , , , 10.5 , 1 , 1 , 75 , 74 , green , 0.00 , 3.9 , 0.0 , 37 , 26 , 7.83 , 1267 , 168 , 1 , Manhattan , 016 800 , 1016800 , E , MN33 , East Harlem South , 3804 , 1553 , 178 , 1 , Manhattan , 017 800 , 1017800 , E , MN34 , East Harlem North , 3804
3389230 , 2 , 2014 - 03 - 17 18 : 23 : 05 , 2014 - 03 - 17 18 : 28 : 38 , N , 1 , - 73.952346801757813 , 40.789844512939453 , - 73.946319580078125 , 40.783851623535156 , 5 , 0.95 , 5.5 , 1 , 0.5 , 0.65 , 0 , , , 7.65 , 1 , 1 , 75 , 263 , green , 0.00 , 0.0 , 0.0 , 35 , 23 , 8.05 , 1267 , 168 , 1 , Manhattan , 016 800 , 1016800 , E , MN33 , East Harlem South , 3804 , 32 , 156.01 , 1 , Manhattan , 015601 , 1015601 , I , MN32 , Yorkville , 3805
3389231 , 1 , 2014 - 03 - 19 19 : 0 9 : 36 , 2014 - 03 - 19 19 : 12 : 20 , N , 1 , - 73.952377319335938 , 40.789779663085938 , - 73.947494506835938 , 40.796474456787109 , 1 , 0.50 , 4 , 1 , 0.5 , 1 , 0 , , , 6.5 , 1 , , 75 , 75 , green , 0.92 , 0.0 , 0.0 , 46 , 32 , 7.16 , 1267 , 168 , 1 , Manhattan , 016 800 , 1016800 , E , MN33 , East Harlem South , 3804 , 1401 , 174.02 , 1 , Manhattan , 017402 , 1017402 , E , MN33 , East Harlem South , 3804
3389232 , 2 , 2014 - 03 - 20 19 : 06 : 28 , 2014 - 03 - 20 19 : 21 : 35 , N , 1 , - 73.952583312988281 , 40.789516448974609 , - 73.985870361328125 , 40.776973724365234 , 2 , 3.04 , 13 , 1 , 0.5 , 2.8 , 0 , , , 17.3 , 1 , 1 , 75 , 143 , green , 0.00 , 0.0 , 0.0 , 54 , 40 , 8.05 , 1267 , 168 , 1 , Manhattan , 016 800 , 1016800 , E , MN33 , East Harlem South , 3804 , 1742 , 155 , 1 , Manhattan , 015500 , 1015500 , I , MN14 , Lincoln Square , 3806
3389233 , 2 , 2014 - 03 - 29 0 9 : 38 : 12 , 2014 - 03 - 29 0 9 : 44 : 16 , N , 1 , - 73.952728271484375 , 40.789501190185547 , - 73.950935363769531 , 40.775600433349609 , 1 , 1.10 , 6.5 , 0 , 0.5 , 1.3 , 0 , , , 8.3 , 1 , 1 , 75 , 263 , green , 1.81 , 0.0 , 0.0 , 59 , 43 , 10.74 , 1267 , 168 , 1 , Manhattan , 016 800 , 1016800 , E , MN33 , East Harlem South , 3804 , 2048 , 138 , 1 , Manhattan , 013 800 , 1013800 , I , MN32 , Yorkville , 3805
` ,
totalFields : 308 * 2 + 1 ,
} , {
file : "nyc-taxi-data-100k-multi-delim.csv" ,
recordDelimiter : "^Y" ,
fieldDelimiter : "," ,
header : true ,
wantColumns : [ ] string { "trip_id" , "vendor_id" , "pickup_datetime" , "dropoff_datetime" , "store_and_fwd_flag" , "rate_code_id" , "pickup_longitude" , "pickup_latitude" , "dropoff_longitude" , "dropoff_latitude" , "passenger_count" , "trip_distance" , "fare_amount" , "extra" , "mta_tax" , "tip_amount" , "tolls_amount" , "ehail_fee" , "improvement_surcharge" , "total_amount" , "payment_type" , "trip_type" , "pickup" , "dropoff" , "cab_type" , "precipitation" , "snow_depth" , "snowfall" , "max_temp" , "min_temp" , "wind" , "pickup_nyct2010_gid" , "pickup_ctlabel" , "pickup_borocode" , "pickup_boroname" , "pickup_ct2010" , "pickup_boroct2010" , "pickup_cdeligibil" , "pickup_ntacode" , "pickup_ntaname" , "pickup_puma" , "dropoff_nyct2010_gid" , "dropoff_ctlabel" , "dropoff_borocode" , "dropoff_boroname" , "dropoff_ct2010" , "dropoff_boroct2010" , "dropoff_cdeligibil" , "dropoff_ntacode" , "dropoff_ntaname" , "dropoff_puma" } ,
wantTenFields : ` 3389224 , 2 , 2014 - 03 - 26 00 : 26 : 15 , 2014 - 03 - 26 00 : 28 : 38 , N , 1 , - 73.950431823730469 , 40.792251586914063 , - 73.938949584960937 , 40.794425964355469 , 1 , 0.84 , 4.5 , 0.5 , 0.5 , 1 , 0 , , , 6.5 , 1 , 1 , 75 , 74 , green , 0.00 , 0.0 , 0.0 , 36 , 24 , 11.86 , 1267 , 168 , 1 , Manhattan , 016 800 , 1016800 , E , MN33 , East Harlem South , 3804 , 1828 , 180 , 1 , Manhattan , 01 8000 , 1018000 , E , MN34 , East Harlem North , 3804
3389225 , 2 , 2014 - 03 - 31 0 9 : 42 : 15 , 2014 - 03 - 31 10 : 01 : 17 , N , 1 , - 73.950340270996094 , 40.792228698730469 , - 73.941970825195313 , 40.842235565185547 , 1 , 4.47 , 17.5 , 0 , 0.5 , 0 , 0 , , , 18 , 2 , 1 , 75 , 244 , green , 0.16 , 0.0 , 0.0 , 56 , 36 , 8.28 , 1267 , 168 , 1 , Manhattan , 016 800 , 1016800 , E , MN33 , East Harlem South , 3804 , 911 , 251 , 1 , Manhattan , 025100 , 1025100 , E , MN36 , Washington Heights South , 3801
3389226 , 2 , 2014 - 03 - 26 17 : 13 : 28 , 2014 - 03 - 26 17 : 19 : 07 , N , 1 , - 73.949493408203125 , 40.793506622314453 , - 73.943374633789063 , 40.786155700683594 , 1 , 0.82 , 5.5 , 1 , 0.5 , 0 , 0 , , , 7 , 1 , 1 , 75 , 75 , green , 0.00 , 0.0 , 0.0 , 36 , 24 , 11.86 , 1267 , 168 , 1 , Manhattan , 016 800 , 1016800 , E , MN33 , East Harlem South , 3804 , 1387 , 164 , 1 , Manhattan , 016400 , 1016400 , E , MN33 , East Harlem South , 3804
3389227 , 2 , 2014 - 03 - 14 21 : 07 : 19 , 2014 - 03 - 14 21 : 11 : 41 , N , 1 , - 73.950538635253906 , 40.792228698730469 , - 73.940811157226563 , 40.809253692626953 , 1 , 1.40 , 6 , 0.5 , 0.5 , 0 , 0 , , , 7 , 2 , 1 , 75 , 42 , green , 0.00 , 0.0 , 0.0 , 46 , 22 , 5.59 , 1267 , 168 , 1 , Manhattan , 016 800 , 1016800 , E , MN33 , East Harlem South , 3804 , 1184 , 208 , 1 , Manhattan , 020 800 , 1020800 , E , MN03 , Central Harlem North - Polo Grounds , 3803
3389228 , 1 , 2014 - 03 - 28 13 : 52 : 56 , 2014 - 03 - 28 14 : 29 : 01 , N , 1 , - 73.950569152832031 , 40.792312622070313 , - 73.868507385253906 , 40.688491821289063 , 2 , 16.10 , 46 , 0 , 0.5 , 0 , 5.33 , , , 51.83 , 2 , , 75 , 63 , green , 0.04 , 0.0 , 0.0 , 62 , 37 , 5.37 , 1267 , 168 , 1 , Manhattan , 016 800 , 1016800 , E , MN33 , East Harlem South , 3804 , 1544 , 1182.02 , 3 , Brooklyn , 118202 , 3118202 , E , BK83 , Cypress Hills - City Line , 4008
3389229 , 2 , 2014 - 03 - 07 0 9 : 46 : 32 , 2014 - 03 - 07 0 9 : 55 : 01 , N , 1 , - 73.952301025390625 , 40.789798736572266 , - 73.935806274414062 , 40.794448852539063 , 1 , 1.67 , 8 , 0 , 0.5 , 2 , 0 , , , 10.5 , 1 , 1 , 75 , 74 , green , 0.00 , 3.9 , 0.0 , 37 , 26 , 7.83 , 1267 , 168 , 1 , Manhattan , 016 800 , 1016800 , E , MN33 , East Harlem South , 3804 , 1553 , 178 , 1 , Manhattan , 017 800 , 1017800 , E , MN34 , East Harlem North , 3804
3389230 , 2 , 2014 - 03 - 17 18 : 23 : 05 , 2014 - 03 - 17 18 : 28 : 38 , N , 1 , - 73.952346801757813 , 40.789844512939453 , - 73.946319580078125 , 40.783851623535156 , 5 , 0.95 , 5.5 , 1 , 0.5 , 0.65 , 0 , , , 7.65 , 1 , 1 , 75 , 263 , green , 0.00 , 0.0 , 0.0 , 35 , 23 , 8.05 , 1267 , 168 , 1 , Manhattan , 016 800 , 1016800 , E , MN33 , East Harlem South , 3804 , 32 , 156.01 , 1 , Manhattan , 015601 , 1015601 , I , MN32 , Yorkville , 3805
3389231 , 1 , 2014 - 03 - 19 19 : 0 9 : 36 , 2014 - 03 - 19 19 : 12 : 20 , N , 1 , - 73.952377319335938 , 40.789779663085938 , - 73.947494506835938 , 40.796474456787109 , 1 , 0.50 , 4 , 1 , 0.5 , 1 , 0 , , , 6.5 , 1 , , 75 , 75 , green , 0.92 , 0.0 , 0.0 , 46 , 32 , 7.16 , 1267 , 168 , 1 , Manhattan , 016 800 , 1016800 , E , MN33 , East Harlem South , 3804 , 1401 , 174.02 , 1 , Manhattan , 017402 , 1017402 , E , MN33 , East Harlem South , 3804
3389232 , 2 , 2014 - 03 - 20 19 : 06 : 28 , 2014 - 03 - 20 19 : 21 : 35 , N , 1 , - 73.952583312988281 , 40.789516448974609 , - 73.985870361328125 , 40.776973724365234 , 2 , 3.04 , 13 , 1 , 0.5 , 2.8 , 0 , , , 17.3 , 1 , 1 , 75 , 143 , green , 0.00 , 0.0 , 0.0 , 54 , 40 , 8.05 , 1267 , 168 , 1 , Manhattan , 016 800 , 1016800 , E , MN33 , East Harlem South , 3804 , 1742 , 155 , 1 , Manhattan , 015500 , 1015500 , I , MN14 , Lincoln Square , 3806
3389233 , 2 , 2014 - 03 - 29 0 9 : 38 : 12 , 2014 - 03 - 29 0 9 : 44 : 16 , N , 1 , - 73.952728271484375 , 40.789501190185547 , - 73.950935363769531 , 40.775600433349609 , 1 , 1.10 , 6.5 , 0 , 0.5 , 1.3 , 0 , , , 8.3 , 1 , 1 , 75 , 263 , green , 1.81 , 0.0 , 0.0 , 59 , 43 , 10.74 , 1267 , 168 , 1 , Manhattan , 016 800 , 1016800 , E , MN33 , East Harlem South , 3804 , 2048 , 138 , 1 , Manhattan , 013 800 , 1013800 , I , MN32 , Yorkville , 3805
` ,
totalFields : 308 * 2 + 1 ,
} , {
file : "nyc-taxi-data-noheader-100k.csv" ,
recordDelimiter : "\n" ,
fieldDelimiter : "," ,
header : false ,
wantColumns : [ ] string { "_1" , "_2" , "_3" , "_4" , "_5" , "_6" , "_7" , "_8" , "_9" , "_10" , "_11" , "_12" , "_13" , "_14" , "_15" , "_16" , "_17" , "_18" , "_19" , "_20" , "_21" , "_22" , "_23" , "_24" , "_25" , "_26" , "_27" , "_28" , "_29" , "_30" , "_31" , "_32" , "_33" , "_34" , "_35" , "_36" , "_37" , "_38" , "_39" , "_40" , "_41" , "_42" , "_43" , "_44" , "_45" , "_46" , "_47" , "_48" , "_49" , "_50" , "_51" } ,
wantTenFields : ` 3389224 , 2 , 2014 - 03 - 26 00 : 26 : 15 , 2014 - 03 - 26 00 : 28 : 38 , N , 1 , - 73.950431823730469 , 40.792251586914063 , - 73.938949584960937 , 40.794425964355469 , 1 , 0.84 , 4.5 , 0.5 , 0.5 , 1 , 0 , , , 6.5 , 1 , 1 , 75 , 74 , green , 0.00 , 0.0 , 0.0 , 36 , 24 , 11.86 , 1267 , 168 , 1 , Manhattan , 016 800 , 1016800 , E , MN33 , East Harlem South , 3804 , 1828 , 180 , 1 , Manhattan , 01 8000 , 1018000 , E , MN34 , East Harlem North , 3804
3389225 , 2 , 2014 - 03 - 31 0 9 : 42 : 15 , 2014 - 03 - 31 10 : 01 : 17 , N , 1 , - 73.950340270996094 , 40.792228698730469 , - 73.941970825195313 , 40.842235565185547 , 1 , 4.47 , 17.5 , 0 , 0.5 , 0 , 0 , , , 18 , 2 , 1 , 75 , 244 , green , 0.16 , 0.0 , 0.0 , 56 , 36 , 8.28 , 1267 , 168 , 1 , Manhattan , 016 800 , 1016800 , E , MN33 , East Harlem South , 3804 , 911 , 251 , 1 , Manhattan , 025100 , 1025100 , E , MN36 , Washington Heights South , 3801
3389226 , 2 , 2014 - 03 - 26 17 : 13 : 28 , 2014 - 03 - 26 17 : 19 : 07 , N , 1 , - 73.949493408203125 , 40.793506622314453 , - 73.943374633789063 , 40.786155700683594 , 1 , 0.82 , 5.5 , 1 , 0.5 , 0 , 0 , , , 7 , 1 , 1 , 75 , 75 , green , 0.00 , 0.0 , 0.0 , 36 , 24 , 11.86 , 1267 , 168 , 1 , Manhattan , 016 800 , 1016800 , E , MN33 , East Harlem South , 3804 , 1387 , 164 , 1 , Manhattan , 016400 , 1016400 , E , MN33 , East Harlem South , 3804
3389227 , 2 , 2014 - 03 - 14 21 : 07 : 19 , 2014 - 03 - 14 21 : 11 : 41 , N , 1 , - 73.950538635253906 , 40.792228698730469 , - 73.940811157226563 , 40.809253692626953 , 1 , 1.40 , 6 , 0.5 , 0.5 , 0 , 0 , , , 7 , 2 , 1 , 75 , 42 , green , 0.00 , 0.0 , 0.0 , 46 , 22 , 5.59 , 1267 , 168 , 1 , Manhattan , 016 800 , 1016800 , E , MN33 , East Harlem South , 3804 , 1184 , 208 , 1 , Manhattan , 020 800 , 1020800 , E , MN03 , Central Harlem North - Polo Grounds , 3803
3389228 , 1 , 2014 - 03 - 28 13 : 52 : 56 , 2014 - 03 - 28 14 : 29 : 01 , N , 1 , - 73.950569152832031 , 40.792312622070313 , - 73.868507385253906 , 40.688491821289063 , 2 , 16.10 , 46 , 0 , 0.5 , 0 , 5.33 , , , 51.83 , 2 , , 75 , 63 , green , 0.04 , 0.0 , 0.0 , 62 , 37 , 5.37 , 1267 , 168 , 1 , Manhattan , 016 800 , 1016800 , E , MN33 , East Harlem South , 3804 , 1544 , 1182.02 , 3 , Brooklyn , 118202 , 3118202 , E , BK83 , Cypress Hills - City Line , 4008
3389229 , 2 , 2014 - 03 - 07 0 9 : 46 : 32 , 2014 - 03 - 07 0 9 : 55 : 01 , N , 1 , - 73.952301025390625 , 40.789798736572266 , - 73.935806274414062 , 40.794448852539063 , 1 , 1.67 , 8 , 0 , 0.5 , 2 , 0 , , , 10.5 , 1 , 1 , 75 , 74 , green , 0.00 , 3.9 , 0.0 , 37 , 26 , 7.83 , 1267 , 168 , 1 , Manhattan , 016 800 , 1016800 , E , MN33 , East Harlem South , 3804 , 1553 , 178 , 1 , Manhattan , 017 800 , 1017800 , E , MN34 , East Harlem North , 3804
3389230 , 2 , 2014 - 03 - 17 18 : 23 : 05 , 2014 - 03 - 17 18 : 28 : 38 , N , 1 , - 73.952346801757813 , 40.789844512939453 , - 73.946319580078125 , 40.783851623535156 , 5 , 0.95 , 5.5 , 1 , 0.5 , 0.65 , 0 , , , 7.65 , 1 , 1 , 75 , 263 , green , 0.00 , 0.0 , 0.0 , 35 , 23 , 8.05 , 1267 , 168 , 1 , Manhattan , 016 800 , 1016800 , E , MN33 , East Harlem South , 3804 , 32 , 156.01 , 1 , Manhattan , 015601 , 1015601 , I , MN32 , Yorkville , 3805
3389231 , 1 , 2014 - 03 - 19 19 : 0 9 : 36 , 2014 - 03 - 19 19 : 12 : 20 , N , 1 , - 73.952377319335938 , 40.789779663085938 , - 73.947494506835938 , 40.796474456787109 , 1 , 0.50 , 4 , 1 , 0.5 , 1 , 0 , , , 6.5 , 1 , , 75 , 75 , green , 0.92 , 0.0 , 0.0 , 46 , 32 , 7.16 , 1267 , 168 , 1 , Manhattan , 016 800 , 1016800 , E , MN33 , East Harlem South , 3804 , 1401 , 174.02 , 1 , Manhattan , 017402 , 1017402 , E , MN33 , East Harlem South , 3804
3389232 , 2 , 2014 - 03 - 20 19 : 06 : 28 , 2014 - 03 - 20 19 : 21 : 35 , N , 1 , - 73.952583312988281 , 40.789516448974609 , - 73.985870361328125 , 40.776973724365234 , 2 , 3.04 , 13 , 1 , 0.5 , 2.8 , 0 , , , 17.3 , 1 , 1 , 75 , 143 , green , 0.00 , 0.0 , 0.0 , 54 , 40 , 8.05 , 1267 , 168 , 1 , Manhattan , 016 800 , 1016800 , E , MN33 , East Harlem South , 3804 , 1742 , 155 , 1 , Manhattan , 015500 , 1015500 , I , MN14 , Lincoln Square , 3806
3389233 , 2 , 2014 - 03 - 29 0 9 : 38 : 12 , 2014 - 03 - 29 0 9 : 44 : 16 , N , 1 , - 73.952728271484375 , 40.789501190185547 , - 73.950935363769531 , 40.775600433349609 , 1 , 1.10 , 6.5 , 0 , 0.5 , 1.3 , 0 , , , 8.3 , 1 , 1 , 75 , 263 , green , 1.81 , 0.0 , 0.0 , 59 , 43 , 10.74 , 1267 , 168 , 1 , Manhattan , 016 800 , 1016800 , E , MN33 , East Harlem South , 3804 , 2048 , 138 , 1 , Manhattan , 013 800 , 1013800 , I , MN32 , Yorkville , 3805
` ,
totalFields : 308 * 2 ,
} ,
}
for i , c := range cases {
t . Run ( c . file , func ( t * testing . T ) {
var err error
var record sql . Record
var result bytes . Buffer
input := openTestFile ( t , c . file )
// Get above block size.
input = append ( input , input ... )
args := ReaderArgs {
FileHeaderInfo : use ,
RecordDelimiter : c . recordDelimiter ,
FieldDelimiter : c . fieldDelimiter ,
QuoteCharacter : defaultQuoteCharacter ,
QuoteEscapeCharacter : defaultQuoteEscapeCharacter ,
CommentCharacter : defaultCommentCharacter ,
AllowQuotedRecordDelimiter : false ,
unmarshaled : true ,
}
if ! c . header {
args . FileHeaderInfo = none
}
r , _ := NewReader ( ioutil . NopCloser ( bytes . NewReader ( input ) ) , & args )
fields := 0
for {
record , err = r . Read ( record )
if err != nil {
break
}
if fields < 10 {
// Write with fixed delimiters, newlines.
err := record . WriteCSV ( & result , ',' )
if err != nil {
t . Error ( err )
}
}
fields ++
}
r . Close ( )
if err != io . EOF {
t . Fatalf ( "Case %d failed with %s" , i , err )
}
if ! reflect . DeepEqual ( r . columnNames , c . wantColumns ) {
t . Errorf ( "Case %d failed: expected %#v, got result %#v" , i , c . wantColumns , r . columnNames )
}
if result . String ( ) != c . wantTenFields {
t . Errorf ( "Case %d failed: expected %v, got result %v" , i , c . wantTenFields , result . String ( ) )
}
if fields != c . totalFields {
t . Errorf ( "Case %d failed: expected %v results %v" , i , c . totalFields , fields )
}
} )
}
}
type errReader struct {
err error
}
func ( e errReader ) Read ( p [ ] byte ) ( n int , err error ) {
return 0 , e . err
}
func TestReadFailures ( t * testing . T ) {
customErr := errors . New ( "unable to read file :(" )
cases := [ ] struct {
file string
recordDelimiter string
fieldDelimiter string
sendErr error
header bool
wantColumns [ ] string
wantFields string
wantErr error
} {
{
file : "truncated-records.csv" ,
recordDelimiter : "^Y" ,
fieldDelimiter : "," ,
header : true ,
wantColumns : [ ] string { "trip_id" , "vendor_id" , "pickup_datetime" , "dropoff_datetime" , "store_and_fwd_flag" , "rate_code_id" , "pickup_longitude" , "pickup_latitude" , "dropoff_longitude" , "dropoff_latitude" , "passenger_count" , "trip_distance" , "fare_amount" , "extra" , "mta_tax" , "tip_amount" , "tolls_amount" , "ehail_fee" , "improvement_surcharge" , "total_amount" , "payment_type" , "trip_type" , "pickup" , "dropoff" , "cab_type" , "precipitation" , "snow_depth" , "snowfall" , "max_temp" , "min_temp" , "wind" , "pickup_nyct2010_gid" , "pickup_ctlabel" , "pickup_borocode" , "pickup_boroname" , "pickup_ct2010" , "pickup_boroct2010" , "pickup_cdeligibil" , "pickup_ntacode" , "pickup_ntaname" , "pickup_puma" , "dropoff_nyct2010_gid" , "dropoff_ctlabel" , "dropoff_borocode" , "dropoff_boroname" , "dropoff_ct2010" , "dropoff_boroct2010" , "dropoff_cdeligibil" , "dropoff_ntacode" , "dropoff_ntaname" , "dropoff_puma" } ,
wantFields : ` 3389224 , 2 , 2014 - 03 - 26 00 : 26 : 15 , 2014 - 03 - 26 00 : 28 : 38 , N , 1 , - 73.950431823730469 , 40.792251586914063 , - 73.938949584960937 , 40.794425964355469 , 1 , 0.84 , 4.5 , 0.5 , 0.5 , 1 , 0 , , , 6.5 , 1 , 1 , 75 , 74 , green , 0.00 , 0.0 , 0.0 , 36 , 24 , 11.86 , 1267 , 168 , 1 , Manhattan , 016 800 , 1016800 , E , MN33 , East Harlem South , 3804 , 1828 , 180 , 1 , Manhattan , 01 8000 , 1018000 , E , MN34 , East Harlem North , 3804
3389225 , 2 , 2014 - 03 - 31 0 9 : 42 : 15 , 2014 - 03 - 31 10 : 01 : 17 , N , 1 , - 73.950340270996094 , 40.792228698730469 , - 73.941970825195313 , 40.842235565185547 , 1 , 4.47 , 17.5 , 0 , 0.5 , 0 , 0 , , , 18 , 2 , 1 , 75 , 244 , green , 0.16 , 0.0 , 0.0 , 56 , 36 , 8.28 , 1267 , 168 , 1 , Manhattan , 016 800 , 1016800 , E , MN33 , East Harlem South , 3804 , 911 , 251 , 1 , Manhattan , 025100
` ,
wantErr : io . EOF ,
} ,
{
file : "truncated-records.csv" ,
recordDelimiter : "^Y" ,
fieldDelimiter : "," ,
sendErr : customErr ,
header : true ,
wantColumns : [ ] string { "trip_id" , "vendor_id" , "pickup_datetime" , "dropoff_datetime" , "store_and_fwd_flag" , "rate_code_id" , "pickup_longitude" , "pickup_latitude" , "dropoff_longitude" , "dropoff_latitude" , "passenger_count" , "trip_distance" , "fare_amount" , "extra" , "mta_tax" , "tip_amount" , "tolls_amount" , "ehail_fee" , "improvement_surcharge" , "total_amount" , "payment_type" , "trip_type" , "pickup" , "dropoff" , "cab_type" , "precipitation" , "snow_depth" , "snowfall" , "max_temp" , "min_temp" , "wind" , "pickup_nyct2010_gid" , "pickup_ctlabel" , "pickup_borocode" , "pickup_boroname" , "pickup_ct2010" , "pickup_boroct2010" , "pickup_cdeligibil" , "pickup_ntacode" , "pickup_ntaname" , "pickup_puma" , "dropoff_nyct2010_gid" , "dropoff_ctlabel" , "dropoff_borocode" , "dropoff_boroname" , "dropoff_ct2010" , "dropoff_boroct2010" , "dropoff_cdeligibil" , "dropoff_ntacode" , "dropoff_ntaname" , "dropoff_puma" } ,
wantFields : ` 3389224 , 2 , 2014 - 03 - 26 00 : 26 : 15 , 2014 - 03 - 26 00 : 28 : 38 , N , 1 , - 73.950431823730469 , 40.792251586914063 , - 73.938949584960937 , 40.794425964355469 , 1 , 0.84 , 4.5 , 0.5 , 0.5 , 1 , 0 , , , 6.5 , 1 , 1 , 75 , 74 , green , 0.00 , 0.0 , 0.0 , 36 , 24 , 11.86 , 1267 , 168 , 1 , Manhattan , 016 800 , 1016800 , E , MN33 , East Harlem South , 3804 , 1828 , 180 , 1 , Manhattan , 01 8000 , 1018000 , E , MN34 , East Harlem North , 3804
3389225 , 2 , 2014 - 03 - 31 0 9 : 42 : 15 , 2014 - 03 - 31 10 : 01 : 17 , N , 1 , - 73.950340270996094 , 40.792228698730469 , - 73.941970825195313 , 40.842235565185547 , 1 , 4.47 , 17.5 , 0 , 0.5 , 0 , 0 , , , 18 , 2 , 1 , 75 , 244 , green , 0.16 , 0.0 , 0.0 , 56 , 36 , 8.28 , 1267 , 168 , 1 , Manhattan , 016 800 , 1016800 , E , MN33 , East Harlem South , 3804 , 911 , 251 , 1 , Manhattan , 025100
` ,
wantErr : customErr ,
} ,
{
// This works since LazyQuotes is true:
file : "invalid-badbarequote.csv" ,
recordDelimiter : "\n" ,
fieldDelimiter : "," ,
sendErr : nil ,
header : true ,
wantColumns : [ ] string { "header1" , "header2" , "header3" } ,
wantFields : "ok1,ok2,ok3\n" + ` "a ""word""",b ` + "\n" ,
wantErr : io . EOF ,
} ,
{
// This works since LazyQuotes is true:
file : "invalid-baddoubleq.csv" ,
recordDelimiter : "\n" ,
fieldDelimiter : "," ,
sendErr : nil ,
header : true ,
wantColumns : [ ] string { "header1" , "header2" , "header3" } ,
wantFields : "ok1,ok2,ok3\n" + ` "a""""b",c ` + "\n" ,
wantErr : io . EOF ,
} ,
{
// This works since LazyQuotes is true:
file : "invalid-badextraq.csv" ,
recordDelimiter : "\n" ,
fieldDelimiter : "," ,
sendErr : nil ,
header : true ,
wantColumns : [ ] string { "header1" , "header2" , "header3" } ,
wantFields : "ok1,ok2,ok3\n" + ` a word,"b""" ` + "\n" ,
wantErr : io . EOF ,
} ,
{
// This works since LazyQuotes is true:
file : "invalid-badstartline.csv" ,
recordDelimiter : "\n" ,
fieldDelimiter : "," ,
sendErr : nil ,
header : true ,
wantColumns : [ ] string { "header1" , "header2" , "header3" } ,
wantFields : "ok1,ok2,ok3\n" + ` a,"b ` + "\n" + ` c""d,e ` + "\n\"\n" ,
wantErr : io . EOF ,
} ,
{
// This works since LazyQuotes is true:
file : "invalid-badstartline2.csv" ,
recordDelimiter : "\n" ,
fieldDelimiter : "," ,
sendErr : nil ,
header : true ,
wantColumns : [ ] string { "header1" , "header2" , "header3" } ,
wantFields : "ok1,ok2,ok3\n" + ` a,b ` + "\n" + ` "d ` + "\n\ne\"\n" ,
wantErr : io . EOF ,
} ,
{
// This works since LazyQuotes is true:
file : "invalid-badtrailingq.csv" ,
recordDelimiter : "\n" ,
fieldDelimiter : "," ,
sendErr : nil ,
header : true ,
wantColumns : [ ] string { "header1" , "header2" , "header3" } ,
wantFields : "ok1,ok2,ok3\n" + ` a word,"b""" ` + "\n" ,
wantErr : io . EOF ,
} ,
{
// This works since LazyQuotes is true:
file : "invalid-crlfquoted.csv" ,
recordDelimiter : "\n" ,
fieldDelimiter : "," ,
sendErr : nil ,
header : true ,
wantColumns : [ ] string { "header1" , "header2" , "header3" } ,
wantFields : "ok1,ok2,ok3\n" + ` "foo""bar" ` + "\n" ,
wantErr : io . EOF ,
} ,
{
// This works since LazyQuotes is true:
file : "invalid-csv.csv" ,
recordDelimiter : "\n" ,
fieldDelimiter : "," ,
sendErr : nil ,
header : true ,
wantColumns : [ ] string { "header1" , "header2" , "header3" } ,
wantFields : "ok1,ok2,ok3\n" + ` "a""""b",c ` + "\n" ,
wantErr : io . EOF ,
} ,
{
// This works since LazyQuotes is true, but output is very weird.
file : "invalid-oddquote.csv" ,
recordDelimiter : "\n" ,
fieldDelimiter : "," ,
sendErr : nil ,
header : true ,
wantColumns : [ ] string { "header1" , "header2" , "header3" } ,
wantFields : "ok1,ok2,ok3\n" + ` """"""",b,c ` + "\n\"\n" ,
wantErr : io . EOF ,
} ,
{
// Test when file ends with a half separator
file : "endswithhalfsep.csv" ,
recordDelimiter : "%!" ,
fieldDelimiter : "," ,
sendErr : nil ,
header : false ,
wantColumns : [ ] string { "_1" , "_2" , "_3" } ,
wantFields : "a,b,c\na2,b2,c2%\n" ,
wantErr : io . EOF ,
} ,
}
for i , c := range cases {
t . Run ( c . file , func ( t * testing . T ) {
var err error
var record sql . Record
var result bytes . Buffer
input := openTestFile ( t , c . file )
args := ReaderArgs {
FileHeaderInfo : use ,
RecordDelimiter : c . recordDelimiter ,
FieldDelimiter : c . fieldDelimiter ,
QuoteCharacter : defaultQuoteCharacter ,
QuoteEscapeCharacter : defaultQuoteEscapeCharacter ,
CommentCharacter : defaultCommentCharacter ,
AllowQuotedRecordDelimiter : false ,
unmarshaled : true ,
}
if ! c . header {
args . FileHeaderInfo = none
}
inr := io . Reader ( bytes . NewReader ( input ) )
if c . sendErr != nil {
inr = io . MultiReader ( inr , errReader { c . sendErr } )
}
r , _ := NewReader ( ioutil . NopCloser ( inr ) , & args )
fields := 0
for {
record , err = r . Read ( record )
if err != nil {
break
}
// Write with fixed delimiters, newlines.
err := record . WriteCSV ( & result , ',' )
if err != nil {
t . Error ( err )
}
fields ++
}
r . Close ( )
if err != c . wantErr {
t . Fatalf ( "Case %d failed with %s" , i , err )
}
if ! reflect . DeepEqual ( r . columnNames , c . wantColumns ) {
t . Errorf ( "Case %d failed: expected \n%#v, got result \n%#v" , i , c . wantColumns , r . columnNames )
}
if result . String ( ) != c . wantFields {
t . Errorf ( "Case %d failed: expected \n%v\nGot result \n%v" , i , c . wantFields , result . String ( ) )
}
} )
}
}
func BenchmarkReaderBasic ( b * testing . B ) {
args := ReaderArgs {
FileHeaderInfo : use ,
RecordDelimiter : "\n" ,
FieldDelimiter : "," ,
QuoteCharacter : defaultQuoteCharacter ,
QuoteEscapeCharacter : defaultQuoteEscapeCharacter ,
CommentCharacter : defaultCommentCharacter ,
AllowQuotedRecordDelimiter : false ,
unmarshaled : true ,
}
f := openTestFile ( b , "nyc-taxi-data-100k.csv" )
r , err := NewReader ( ioutil . NopCloser ( bytes . NewBuffer ( f ) ) , & args )
if err != nil {
b . Fatalf ( "Reading init failed with %s" , err )
}
defer r . Close ( )
b . ReportAllocs ( )
b . ResetTimer ( )
b . SetBytes ( int64 ( len ( f ) ) )
var record sql . Record
for i := 0 ; i < b . N ; i ++ {
r , err = NewReader ( ioutil . NopCloser ( bytes . NewBuffer ( f ) ) , & args )
if err != nil {
b . Fatalf ( "Reading init failed with %s" , err )
}
for err == nil {
record , err = r . Read ( record )
if err != nil && err != io . EOF {
b . Fatalf ( "Reading failed with %s" , err )
}
}
r . Close ( )
}
}
func BenchmarkReaderHuge ( b * testing . B ) {
args := ReaderArgs {
FileHeaderInfo : use ,
RecordDelimiter : "\n" ,
FieldDelimiter : "," ,
QuoteCharacter : defaultQuoteCharacter ,
QuoteEscapeCharacter : defaultQuoteEscapeCharacter ,
CommentCharacter : defaultCommentCharacter ,
AllowQuotedRecordDelimiter : false ,
unmarshaled : true ,
}
for n := 0 ; n < 11 ; n ++ {
f := openTestFile ( b , "nyc-taxi-data-100k.csv" )
want := 309
for i := 0 ; i < n ; i ++ {
f = append ( f , f ... )
want *= 2
}
b . Run ( fmt . Sprint ( len ( f ) / ( 1 << 10 ) , "K" ) , func ( b * testing . B ) {
b . ReportAllocs ( )
b . SetBytes ( int64 ( len ( f ) ) )
b . ResetTimer ( )
var record sql . Record
for i := 0 ; i < b . N ; i ++ {
r , err := NewReader ( ioutil . NopCloser ( bytes . NewBuffer ( f ) ) , & args )
if err != nil {
b . Fatalf ( "Reading init failed with %s" , err )
}
got := 0
for err == nil {
record , err = r . Read ( record )
if err != nil && err != io . EOF {
b . Fatalf ( "Reading failed with %s" , err )
}
got ++
}
r . Close ( )
if got != want {
b . Errorf ( "want %d records, got %d" , want , got )
}
}
} )
}
}
func BenchmarkReaderReplace ( b * testing . B ) {
args := ReaderArgs {
FileHeaderInfo : use ,
RecordDelimiter : "^" ,
FieldDelimiter : "," ,
QuoteCharacter : defaultQuoteCharacter ,
QuoteEscapeCharacter : defaultQuoteEscapeCharacter ,
CommentCharacter : defaultCommentCharacter ,
AllowQuotedRecordDelimiter : false ,
unmarshaled : true ,
}
f := openTestFile ( b , "nyc-taxi-data-100k-single-delim.csv" )
r , err := NewReader ( ioutil . NopCloser ( bytes . NewBuffer ( f ) ) , & args )
if err != nil {
b . Fatalf ( "Reading init failed with %s" , err )
}
defer r . Close ( )
b . ReportAllocs ( )
b . ResetTimer ( )
b . SetBytes ( int64 ( len ( f ) ) )
var record sql . Record
for i := 0 ; i < b . N ; i ++ {
r , err = NewReader ( ioutil . NopCloser ( bytes . NewBuffer ( f ) ) , & args )
if err != nil {
b . Fatalf ( "Reading init failed with %s" , err )
}
for err == nil {
record , err = r . Read ( record )
if err != nil && err != io . EOF {
b . Fatalf ( "Reading failed with %s" , err )
}
}
r . Close ( )
}
}
func BenchmarkReaderReplaceTwo ( b * testing . B ) {
args := ReaderArgs {
FileHeaderInfo : use ,
RecordDelimiter : "^Y" ,
FieldDelimiter : "," ,
QuoteCharacter : defaultQuoteCharacter ,
QuoteEscapeCharacter : defaultQuoteEscapeCharacter ,
CommentCharacter : defaultCommentCharacter ,
AllowQuotedRecordDelimiter : false ,
unmarshaled : true ,
}
f := openTestFile ( b , "nyc-taxi-data-100k-multi-delim.csv" )
r , err := NewReader ( ioutil . NopCloser ( bytes . NewBuffer ( f ) ) , & args )
if err != nil {
b . Fatalf ( "Reading init failed with %s" , err )
}
defer r . Close ( )
b . ReportAllocs ( )
b . ResetTimer ( )
b . SetBytes ( int64 ( len ( f ) ) )
var record sql . Record
for i := 0 ; i < b . N ; i ++ {
r , err = NewReader ( ioutil . NopCloser ( bytes . NewBuffer ( f ) ) , & args )
if err != nil {
b . Fatalf ( "Reading init failed with %s" , err )
}
for err == nil {
record , err = r . Read ( record )
if err != nil && err != io . EOF {
b . Fatalf ( "Reading failed with %s" , err )
}
}
r . Close ( )
}
}