diff --git a/changelog/csv_un_equal_separator_count.dd b/changelog/csv_un_equal_separator_count.dd new file mode 100644 index 000000000..cfcac98f6 --- /dev/null +++ b/changelog/csv_un_equal_separator_count.dd @@ -0,0 +1,17 @@ +`std.csv` can now optionally handle csv files with variable number of columns. + +By default `std.csv` will throw if the number of columns on a line +is not equal to the number of columns of the first line. +To allow, or disallow, a variable amount of columns a `bool` can be passed to +all overloads of the `csvReader` function as shown below. + +``` +string text = "76,26,22\n1,2\n3,4,5,6"; +auto records = text.csvReader!int(',', '"', true); + +assert(records.equal!equal([ + [76, 26, 22], + [1, 2], + [3, 4, 5, 6] +])); +``` diff --git a/std/csv.d b/std/csv.d index 609cfbb95..7f5c2b24c 100644 --- a/std/csv.d +++ b/std/csv.d @@ -57,7 +57,7 @@ * associative array. Passing null to signify that a header is present. * * ------- - * auto text = "Name,Occupation,Salary\r" + * auto text = "Name,Occupation,Salary\r" ~ * "Joe,Carpenter,300000\nFred,Blacksmith,400000\r\n"; * * foreach (record; csvReader!(string[string]) @@ -338,14 +338,15 @@ Throws: when the exception is thrown for different types of `Contents`. */ auto csvReader(Contents = string,Malformed ErrorLevel = Malformed.throwException, Range, Separator = char)(Range input, - Separator delimiter = ',', Separator quote = '"') + Separator delimiter = ',', Separator quote = '"', + bool allowInconsistentDelimiterCount = false) if (isInputRange!Range && is(immutable ElementType!Range == immutable dchar) && isSomeChar!(Separator) && !is(Contents T : T[U], U : string)) { return CsvReader!(Contents,ErrorLevel,Range, Unqual!(ElementType!Range),string[]) - (input, delimiter, quote); + (input, delimiter, quote, allowInconsistentDelimiterCount); } /// ditto @@ -353,7 +354,8 @@ auto csvReader(Contents = string, Malformed ErrorLevel = Malformed.throwException, Range, Header, Separator = char) (Range input, Header header, - Separator delimiter = ',', Separator quote = '"') + Separator delimiter = ',', Separator quote = '"', + bool allowInconsistentDelimiterCount = false) if (isInputRange!Range && is(immutable ElementType!Range == immutable dchar) && isSomeChar!(Separator) && isForwardRange!Header @@ -361,7 +363,7 @@ if (isInputRange!Range && is(immutable ElementType!Range == immutable dchar) { return CsvReader!(Contents,ErrorLevel,Range, Unqual!(ElementType!Range),Header) - (input, header, delimiter, quote); + (input, header, delimiter, quote, allowInconsistentDelimiterCount); } /// ditto @@ -369,14 +371,16 @@ auto csvReader(Contents = string, Malformed ErrorLevel = Malformed.throwException, Range, Header, Separator = char) (Range input, Header header, - Separator delimiter = ',', Separator quote = '"') + Separator delimiter = ',', Separator quote = '"', + bool allowInconsistentDelimiterCount = false) if (isInputRange!Range && is(immutable ElementType!Range == immutable dchar) && isSomeChar!(Separator) && is(Header : typeof(null))) { return CsvReader!(Contents,ErrorLevel,Range, Unqual!(ElementType!Range),string[]) - (input, cast(string[]) null, delimiter, quote); + (input, cast(string[]) null, delimiter, quote, + allowInconsistentDelimiterCount); } @@ -474,6 +478,66 @@ The header from the input can always be accessed from the `header` field. assert(records.header == ["a","b","c"]); } +/** +Handcrafted csv files tend to have an variable amount of columns. + +By default `std.csv` will throw if the number of columns on a line +is unequal to the number of columns of the first line. +To allow, or disallow, a variable amount of columns a `bool` can be passed to +all overloads of the `csvReader` function as shown below. +*/ +@safe unittest +{ + import std.algorithm.comparison : equal; + + string text = "76,26,22\n1,2\n3,4,5,6"; + auto records = text.csvReader!int(',', '"', true); + + assert(records.equal!equal([ + [76, 26, 22], + [1, 2], + [3, 4, 5, 6] + ])); +} + +/// ditto +@safe unittest +{ + import std.algorithm.comparison : equal; + + static struct Three + { + int a; + int b; + int c; + } + + string text = "76,26,22\n1,2\n3,4,5,6"; + auto records = text.csvReader!Three(',', '"', true); + + assert(records.equal([ + Three(76, 26, 22), + Three(1, 2, 0), + Three(3, 4, 5) + ])); +} + +/// ditto +@safe unittest +{ + import std.algorithm.comparison : equal; + + auto text = "Name,Occupation,Salary\r" ~ + "Joe,Carpenter,300000\nFred,Blacksmith\r\n"; + + auto r = csvReader!(string[string])(text, null, ',', '"', true); + + assert(r.equal([ + [ "Name" : "Joe", "Occupation" : "Carpenter", "Salary" : "300000" ], + [ "Name" : "Fred", "Occupation" : "Blacksmith" ] + ])); +} + // Test standard iteration over input. @safe pure unittest { @@ -843,6 +907,7 @@ private: Separator _quote; size_t[] indices; bool _empty; + bool _allowInconsistentDelimiterCount; static if (is(Contents == struct) || is(Contents == class)) { Contents recordContent; @@ -884,11 +949,13 @@ public: * } * ------- */ - this(Range input, Separator delimiter, Separator quote) + this(Range input, Separator delimiter, Separator quote, + bool allowInconsistentDelimiterCount) { _input = new Input!(Range, ErrorLevel)(input); _separator = delimiter; _quote = quote; + _allowInconsistentDelimiterCount = allowInconsistentDelimiterCount; if (_input.range.empty) { @@ -920,11 +987,13 @@ public: * matching column is not found or the order did not match that found * in the input (non-struct). */ - this(Range input, Header colHeaders, Separator delimiter, Separator quote) + this(Range input, Header colHeaders, Separator delimiter, Separator quote, + bool allowInconsistentDelimiterCount) { _input = new Input!(Range, ErrorLevel)(input); _separator = delimiter; _quote = quote; + _allowInconsistentDelimiterCount = allowInconsistentDelimiterCount; if (_input.range.empty) { @@ -939,7 +1008,8 @@ public: } auto r = CsvRecord!(string, ErrorLevel, Range, Separator) - (_input, _separator, _quote, indices); + (_input, _separator, _quote, indices, + _allowInconsistentDelimiterCount); size_t colIndex; foreach (col; r) @@ -952,6 +1022,8 @@ public: } // The above loop empties the header row. recordRange._empty = true; + recordRange._allowInconsistentDelimiterCount = + allowInconsistentDelimiterCount; indices.length = colToIndex.length; int i; @@ -1090,12 +1162,14 @@ public: static if (is(Contents == struct) || is(Contents == class)) { recordRange = typeof(recordRange) - (_input, _separator, _quote, null); + (_input, _separator, _quote, null, + _allowInconsistentDelimiterCount); } else { recordRange = typeof(recordRange) - (_input, _separator, _quote, indices); + (_input, _separator, _quote, indices, + _allowInconsistentDelimiterCount); } static if (is(Contents T : T[U], U : string)) @@ -1168,7 +1242,7 @@ public: string str = `76;^26^;22`; int[] ans = [76,26,22]; auto records = CsvReader!(int,Malformed.ignore,string,char,string[]) - (str, ';', '^'); + (str, ';', '^', false); foreach (record; records) { @@ -1203,6 +1277,7 @@ private: Contents curContentsoken; typeof(appender!(dchar[])()) _front; bool _empty; + bool _allowInconsistentDelimiterCount; size_t[] _popCount; public: /* @@ -1214,7 +1289,8 @@ public: * If empty, all columns are returned. List must be in order. */ this(Input!(Range, ErrorLevel)* input, Separator delimiter, - Separator quote, size_t[] indices) + Separator quote, size_t[] indices, + bool allowInconsistentDelimiterCount) { _input = input; _separator = delimiter; @@ -1222,6 +1298,7 @@ public: _front = appender!(dchar[])(); _popCount = indices.dup; + _allowInconsistentDelimiterCount = allowInconsistentDelimiterCount; // If a header was given, each call to popFront will need // to eliminate so many tokens. This calculates @@ -1304,23 +1381,38 @@ public: { _empty = true; static if (ErrorLevel == Malformed.throwException) - if (_input.rowLength != 0) - if (_input.col != _input.rowLength) - throw new CSVException( - format("Row %s's length %s does not match "~ - "previous length of %s.", _input.row, - _input.col, _input.rowLength)); + { + if (_input.rowLength != 0 && _input.col != _input.rowLength + && !_allowInconsistentDelimiterCount) + { + throw new CSVException( + format("Row %s's length %s does not match "~ + "previous length of %s.", _input.row, + _input.col, _input.rowLength)); + } + } return; } else { static if (ErrorLevel == Malformed.throwException) - if (_input.rowLength != 0) - if (_input.col > _input.rowLength) + { + if (_input.rowLength != 0 && _input.col > _input.rowLength) + { + if (!_allowInconsistentDelimiterCount) + { throw new CSVException( format("Row %s's length %s does not match "~ "previous length of %s.", _input.row, _input.col, _input.rowLength)); + } + else + { + _empty = true; + return; + } + } + } } // Separator is left on the end of input from the last call.