mirror of
https://github.com/dlang/phobos.git
synced 2025-04-28 14:10:30 +03:00
std.csv allow un equal number of value separators
By default `std.csv` will throw if there is an number of separators on a line unequal to the number of separators of the first line. To allow, or disallow, unequal numbers of separators a `bool` can be passed to all overloads of the `csvReader` function as shown below. ``` string text = "76,26,22\n1,2\n3,4,5,6"; auto records = text.csvReader!int(',', '"', true); assert(records.equal!equal([ [76, 26, 22], [1, 2], [3, 4, 5, 6] ])); ``` working in the comments from the PR more review changes one more round of review fixes there is always one left
This commit is contained in:
parent
f9fc1f612a
commit
ba80631325
2 changed files with 131 additions and 22 deletions
17
changelog/csv_un_equal_separator_count.dd
Normal file
17
changelog/csv_un_equal_separator_count.dd
Normal file
|
@ -0,0 +1,17 @@
|
||||||
|
`std.csv` can now optionally handle csv files with variable number of columns.
|
||||||
|
|
||||||
|
By default `std.csv` will throw if the number of columns on a line
|
||||||
|
is not equal to the number of columns of the first line.
|
||||||
|
To allow, or disallow, a variable amount of columns a `bool` can be passed to
|
||||||
|
all overloads of the `csvReader` function as shown below.
|
||||||
|
|
||||||
|
```
|
||||||
|
string text = "76,26,22\n1,2\n3,4,5,6";
|
||||||
|
auto records = text.csvReader!int(',', '"', true);
|
||||||
|
|
||||||
|
assert(records.equal!equal([
|
||||||
|
[76, 26, 22],
|
||||||
|
[1, 2],
|
||||||
|
[3, 4, 5, 6]
|
||||||
|
]));
|
||||||
|
```
|
128
std/csv.d
128
std/csv.d
|
@ -57,7 +57,7 @@
|
||||||
* associative array. Passing null to signify that a header is present.
|
* associative array. Passing null to signify that a header is present.
|
||||||
*
|
*
|
||||||
* -------
|
* -------
|
||||||
* auto text = "Name,Occupation,Salary\r"
|
* auto text = "Name,Occupation,Salary\r" ~
|
||||||
* "Joe,Carpenter,300000\nFred,Blacksmith,400000\r\n";
|
* "Joe,Carpenter,300000\nFred,Blacksmith,400000\r\n";
|
||||||
*
|
*
|
||||||
* foreach (record; csvReader!(string[string])
|
* foreach (record; csvReader!(string[string])
|
||||||
|
@ -338,14 +338,15 @@ Throws:
|
||||||
when the exception is thrown for different types of `Contents`.
|
when the exception is thrown for different types of `Contents`.
|
||||||
*/
|
*/
|
||||||
auto csvReader(Contents = string,Malformed ErrorLevel = Malformed.throwException, Range, Separator = char)(Range input,
|
auto csvReader(Contents = string,Malformed ErrorLevel = Malformed.throwException, Range, Separator = char)(Range input,
|
||||||
Separator delimiter = ',', Separator quote = '"')
|
Separator delimiter = ',', Separator quote = '"',
|
||||||
|
bool allowInconsistentDelimiterCount = false)
|
||||||
if (isInputRange!Range && is(immutable ElementType!Range == immutable dchar)
|
if (isInputRange!Range && is(immutable ElementType!Range == immutable dchar)
|
||||||
&& isSomeChar!(Separator)
|
&& isSomeChar!(Separator)
|
||||||
&& !is(Contents T : T[U], U : string))
|
&& !is(Contents T : T[U], U : string))
|
||||||
{
|
{
|
||||||
return CsvReader!(Contents,ErrorLevel,Range,
|
return CsvReader!(Contents,ErrorLevel,Range,
|
||||||
Unqual!(ElementType!Range),string[])
|
Unqual!(ElementType!Range),string[])
|
||||||
(input, delimiter, quote);
|
(input, delimiter, quote, allowInconsistentDelimiterCount);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// ditto
|
/// ditto
|
||||||
|
@ -353,7 +354,8 @@ auto csvReader(Contents = string,
|
||||||
Malformed ErrorLevel = Malformed.throwException,
|
Malformed ErrorLevel = Malformed.throwException,
|
||||||
Range, Header, Separator = char)
|
Range, Header, Separator = char)
|
||||||
(Range input, Header header,
|
(Range input, Header header,
|
||||||
Separator delimiter = ',', Separator quote = '"')
|
Separator delimiter = ',', Separator quote = '"',
|
||||||
|
bool allowInconsistentDelimiterCount = false)
|
||||||
if (isInputRange!Range && is(immutable ElementType!Range == immutable dchar)
|
if (isInputRange!Range && is(immutable ElementType!Range == immutable dchar)
|
||||||
&& isSomeChar!(Separator)
|
&& isSomeChar!(Separator)
|
||||||
&& isForwardRange!Header
|
&& isForwardRange!Header
|
||||||
|
@ -361,7 +363,7 @@ if (isInputRange!Range && is(immutable ElementType!Range == immutable dchar)
|
||||||
{
|
{
|
||||||
return CsvReader!(Contents,ErrorLevel,Range,
|
return CsvReader!(Contents,ErrorLevel,Range,
|
||||||
Unqual!(ElementType!Range),Header)
|
Unqual!(ElementType!Range),Header)
|
||||||
(input, header, delimiter, quote);
|
(input, header, delimiter, quote, allowInconsistentDelimiterCount);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// ditto
|
/// ditto
|
||||||
|
@ -369,14 +371,16 @@ auto csvReader(Contents = string,
|
||||||
Malformed ErrorLevel = Malformed.throwException,
|
Malformed ErrorLevel = Malformed.throwException,
|
||||||
Range, Header, Separator = char)
|
Range, Header, Separator = char)
|
||||||
(Range input, Header header,
|
(Range input, Header header,
|
||||||
Separator delimiter = ',', Separator quote = '"')
|
Separator delimiter = ',', Separator quote = '"',
|
||||||
|
bool allowInconsistentDelimiterCount = false)
|
||||||
if (isInputRange!Range && is(immutable ElementType!Range == immutable dchar)
|
if (isInputRange!Range && is(immutable ElementType!Range == immutable dchar)
|
||||||
&& isSomeChar!(Separator)
|
&& isSomeChar!(Separator)
|
||||||
&& is(Header : typeof(null)))
|
&& is(Header : typeof(null)))
|
||||||
{
|
{
|
||||||
return CsvReader!(Contents,ErrorLevel,Range,
|
return CsvReader!(Contents,ErrorLevel,Range,
|
||||||
Unqual!(ElementType!Range),string[])
|
Unqual!(ElementType!Range),string[])
|
||||||
(input, cast(string[]) null, delimiter, quote);
|
(input, cast(string[]) null, delimiter, quote,
|
||||||
|
allowInconsistentDelimiterCount);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -474,6 +478,66 @@ The header from the input can always be accessed from the `header` field.
|
||||||
assert(records.header == ["a","b","c"]);
|
assert(records.header == ["a","b","c"]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
Handcrafted csv files tend to have an variable amount of columns.
|
||||||
|
|
||||||
|
By default `std.csv` will throw if the number of columns on a line
|
||||||
|
is unequal to the number of columns of the first line.
|
||||||
|
To allow, or disallow, a variable amount of columns a `bool` can be passed to
|
||||||
|
all overloads of the `csvReader` function as shown below.
|
||||||
|
*/
|
||||||
|
@safe unittest
|
||||||
|
{
|
||||||
|
import std.algorithm.comparison : equal;
|
||||||
|
|
||||||
|
string text = "76,26,22\n1,2\n3,4,5,6";
|
||||||
|
auto records = text.csvReader!int(',', '"', true);
|
||||||
|
|
||||||
|
assert(records.equal!equal([
|
||||||
|
[76, 26, 22],
|
||||||
|
[1, 2],
|
||||||
|
[3, 4, 5, 6]
|
||||||
|
]));
|
||||||
|
}
|
||||||
|
|
||||||
|
/// ditto
|
||||||
|
@safe unittest
|
||||||
|
{
|
||||||
|
import std.algorithm.comparison : equal;
|
||||||
|
|
||||||
|
static struct Three
|
||||||
|
{
|
||||||
|
int a;
|
||||||
|
int b;
|
||||||
|
int c;
|
||||||
|
}
|
||||||
|
|
||||||
|
string text = "76,26,22\n1,2\n3,4,5,6";
|
||||||
|
auto records = text.csvReader!Three(',', '"', true);
|
||||||
|
|
||||||
|
assert(records.equal([
|
||||||
|
Three(76, 26, 22),
|
||||||
|
Three(1, 2, 0),
|
||||||
|
Three(3, 4, 5)
|
||||||
|
]));
|
||||||
|
}
|
||||||
|
|
||||||
|
/// ditto
|
||||||
|
@safe unittest
|
||||||
|
{
|
||||||
|
import std.algorithm.comparison : equal;
|
||||||
|
|
||||||
|
auto text = "Name,Occupation,Salary\r" ~
|
||||||
|
"Joe,Carpenter,300000\nFred,Blacksmith\r\n";
|
||||||
|
|
||||||
|
auto r = csvReader!(string[string])(text, null, ',', '"', true);
|
||||||
|
|
||||||
|
assert(r.equal([
|
||||||
|
[ "Name" : "Joe", "Occupation" : "Carpenter", "Salary" : "300000" ],
|
||||||
|
[ "Name" : "Fred", "Occupation" : "Blacksmith" ]
|
||||||
|
]));
|
||||||
|
}
|
||||||
|
|
||||||
// Test standard iteration over input.
|
// Test standard iteration over input.
|
||||||
@safe pure unittest
|
@safe pure unittest
|
||||||
{
|
{
|
||||||
|
@ -843,6 +907,7 @@ private:
|
||||||
Separator _quote;
|
Separator _quote;
|
||||||
size_t[] indices;
|
size_t[] indices;
|
||||||
bool _empty;
|
bool _empty;
|
||||||
|
bool _allowInconsistentDelimiterCount;
|
||||||
static if (is(Contents == struct) || is(Contents == class))
|
static if (is(Contents == struct) || is(Contents == class))
|
||||||
{
|
{
|
||||||
Contents recordContent;
|
Contents recordContent;
|
||||||
|
@ -884,11 +949,13 @@ public:
|
||||||
* }
|
* }
|
||||||
* -------
|
* -------
|
||||||
*/
|
*/
|
||||||
this(Range input, Separator delimiter, Separator quote)
|
this(Range input, Separator delimiter, Separator quote,
|
||||||
|
bool allowInconsistentDelimiterCount)
|
||||||
{
|
{
|
||||||
_input = new Input!(Range, ErrorLevel)(input);
|
_input = new Input!(Range, ErrorLevel)(input);
|
||||||
_separator = delimiter;
|
_separator = delimiter;
|
||||||
_quote = quote;
|
_quote = quote;
|
||||||
|
_allowInconsistentDelimiterCount = allowInconsistentDelimiterCount;
|
||||||
|
|
||||||
if (_input.range.empty)
|
if (_input.range.empty)
|
||||||
{
|
{
|
||||||
|
@ -920,11 +987,13 @@ public:
|
||||||
* matching column is not found or the order did not match that found
|
* matching column is not found or the order did not match that found
|
||||||
* in the input (non-struct).
|
* in the input (non-struct).
|
||||||
*/
|
*/
|
||||||
this(Range input, Header colHeaders, Separator delimiter, Separator quote)
|
this(Range input, Header colHeaders, Separator delimiter, Separator quote,
|
||||||
|
bool allowInconsistentDelimiterCount)
|
||||||
{
|
{
|
||||||
_input = new Input!(Range, ErrorLevel)(input);
|
_input = new Input!(Range, ErrorLevel)(input);
|
||||||
_separator = delimiter;
|
_separator = delimiter;
|
||||||
_quote = quote;
|
_quote = quote;
|
||||||
|
_allowInconsistentDelimiterCount = allowInconsistentDelimiterCount;
|
||||||
|
|
||||||
if (_input.range.empty)
|
if (_input.range.empty)
|
||||||
{
|
{
|
||||||
|
@ -939,7 +1008,8 @@ public:
|
||||||
}
|
}
|
||||||
|
|
||||||
auto r = CsvRecord!(string, ErrorLevel, Range, Separator)
|
auto r = CsvRecord!(string, ErrorLevel, Range, Separator)
|
||||||
(_input, _separator, _quote, indices);
|
(_input, _separator, _quote, indices,
|
||||||
|
_allowInconsistentDelimiterCount);
|
||||||
|
|
||||||
size_t colIndex;
|
size_t colIndex;
|
||||||
foreach (col; r)
|
foreach (col; r)
|
||||||
|
@ -952,6 +1022,8 @@ public:
|
||||||
}
|
}
|
||||||
// The above loop empties the header row.
|
// The above loop empties the header row.
|
||||||
recordRange._empty = true;
|
recordRange._empty = true;
|
||||||
|
recordRange._allowInconsistentDelimiterCount =
|
||||||
|
allowInconsistentDelimiterCount;
|
||||||
|
|
||||||
indices.length = colToIndex.length;
|
indices.length = colToIndex.length;
|
||||||
int i;
|
int i;
|
||||||
|
@ -1090,12 +1162,14 @@ public:
|
||||||
static if (is(Contents == struct) || is(Contents == class))
|
static if (is(Contents == struct) || is(Contents == class))
|
||||||
{
|
{
|
||||||
recordRange = typeof(recordRange)
|
recordRange = typeof(recordRange)
|
||||||
(_input, _separator, _quote, null);
|
(_input, _separator, _quote, null,
|
||||||
|
_allowInconsistentDelimiterCount);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
recordRange = typeof(recordRange)
|
recordRange = typeof(recordRange)
|
||||||
(_input, _separator, _quote, indices);
|
(_input, _separator, _quote, indices,
|
||||||
|
_allowInconsistentDelimiterCount);
|
||||||
}
|
}
|
||||||
|
|
||||||
static if (is(Contents T : T[U], U : string))
|
static if (is(Contents T : T[U], U : string))
|
||||||
|
@ -1168,7 +1242,7 @@ public:
|
||||||
string str = `76;^26^;22`;
|
string str = `76;^26^;22`;
|
||||||
int[] ans = [76,26,22];
|
int[] ans = [76,26,22];
|
||||||
auto records = CsvReader!(int,Malformed.ignore,string,char,string[])
|
auto records = CsvReader!(int,Malformed.ignore,string,char,string[])
|
||||||
(str, ';', '^');
|
(str, ';', '^', false);
|
||||||
|
|
||||||
foreach (record; records)
|
foreach (record; records)
|
||||||
{
|
{
|
||||||
|
@ -1203,6 +1277,7 @@ private:
|
||||||
Contents curContentsoken;
|
Contents curContentsoken;
|
||||||
typeof(appender!(dchar[])()) _front;
|
typeof(appender!(dchar[])()) _front;
|
||||||
bool _empty;
|
bool _empty;
|
||||||
|
bool _allowInconsistentDelimiterCount;
|
||||||
size_t[] _popCount;
|
size_t[] _popCount;
|
||||||
public:
|
public:
|
||||||
/*
|
/*
|
||||||
|
@ -1214,7 +1289,8 @@ public:
|
||||||
* If empty, all columns are returned. List must be in order.
|
* If empty, all columns are returned. List must be in order.
|
||||||
*/
|
*/
|
||||||
this(Input!(Range, ErrorLevel)* input, Separator delimiter,
|
this(Input!(Range, ErrorLevel)* input, Separator delimiter,
|
||||||
Separator quote, size_t[] indices)
|
Separator quote, size_t[] indices,
|
||||||
|
bool allowInconsistentDelimiterCount)
|
||||||
{
|
{
|
||||||
_input = input;
|
_input = input;
|
||||||
_separator = delimiter;
|
_separator = delimiter;
|
||||||
|
@ -1222,6 +1298,7 @@ public:
|
||||||
|
|
||||||
_front = appender!(dchar[])();
|
_front = appender!(dchar[])();
|
||||||
_popCount = indices.dup;
|
_popCount = indices.dup;
|
||||||
|
_allowInconsistentDelimiterCount = allowInconsistentDelimiterCount;
|
||||||
|
|
||||||
// If a header was given, each call to popFront will need
|
// If a header was given, each call to popFront will need
|
||||||
// to eliminate so many tokens. This calculates
|
// to eliminate so many tokens. This calculates
|
||||||
|
@ -1304,24 +1381,39 @@ public:
|
||||||
{
|
{
|
||||||
_empty = true;
|
_empty = true;
|
||||||
static if (ErrorLevel == Malformed.throwException)
|
static if (ErrorLevel == Malformed.throwException)
|
||||||
if (_input.rowLength != 0)
|
{
|
||||||
if (_input.col != _input.rowLength)
|
if (_input.rowLength != 0 && _input.col != _input.rowLength
|
||||||
|
&& !_allowInconsistentDelimiterCount)
|
||||||
|
{
|
||||||
throw new CSVException(
|
throw new CSVException(
|
||||||
format("Row %s's length %s does not match "~
|
format("Row %s's length %s does not match "~
|
||||||
"previous length of %s.", _input.row,
|
"previous length of %s.", _input.row,
|
||||||
_input.col, _input.rowLength));
|
_input.col, _input.rowLength));
|
||||||
|
}
|
||||||
|
}
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
static if (ErrorLevel == Malformed.throwException)
|
static if (ErrorLevel == Malformed.throwException)
|
||||||
if (_input.rowLength != 0)
|
{
|
||||||
if (_input.col > _input.rowLength)
|
if (_input.rowLength != 0 && _input.col > _input.rowLength)
|
||||||
|
{
|
||||||
|
if (!_allowInconsistentDelimiterCount)
|
||||||
|
{
|
||||||
throw new CSVException(
|
throw new CSVException(
|
||||||
format("Row %s's length %s does not match "~
|
format("Row %s's length %s does not match "~
|
||||||
"previous length of %s.", _input.row,
|
"previous length of %s.", _input.row,
|
||||||
_input.col, _input.rowLength));
|
_input.col, _input.rowLength));
|
||||||
}
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
_empty = true;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Separator is left on the end of input from the last call.
|
// Separator is left on the end of input from the last call.
|
||||||
// This cannot be moved to after the call to csvNextToken as
|
// This cannot be moved to after the call to csvNextToken as
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue