mirror of
https://github.com/dlang/phobos.git
synced 2025-04-27 13:40:20 +03:00
std.csv allow un equal number of value separators
By default `std.csv` will throw if there is an number of separators on a line unequal to the number of separators of the first line. To allow, or disallow, unequal numbers of separators a `bool` can be passed to all overloads of the `csvReader` function as shown below. ``` string text = "76,26,22\n1,2\n3,4,5,6"; auto records = text.csvReader!int(',', '"', true); assert(records.equal!equal([ [76, 26, 22], [1, 2], [3, 4, 5, 6] ])); ``` working in the comments from the PR more review changes one more round of review fixes there is always one left
This commit is contained in:
parent
f9fc1f612a
commit
ba80631325
2 changed files with 131 additions and 22 deletions
136
std/csv.d
136
std/csv.d
|
@ -57,7 +57,7 @@
|
|||
* associative array. Passing null to signify that a header is present.
|
||||
*
|
||||
* -------
|
||||
* auto text = "Name,Occupation,Salary\r"
|
||||
* auto text = "Name,Occupation,Salary\r" ~
|
||||
* "Joe,Carpenter,300000\nFred,Blacksmith,400000\r\n";
|
||||
*
|
||||
* foreach (record; csvReader!(string[string])
|
||||
|
@ -338,14 +338,15 @@ Throws:
|
|||
when the exception is thrown for different types of `Contents`.
|
||||
*/
|
||||
auto csvReader(Contents = string,Malformed ErrorLevel = Malformed.throwException, Range, Separator = char)(Range input,
|
||||
Separator delimiter = ',', Separator quote = '"')
|
||||
Separator delimiter = ',', Separator quote = '"',
|
||||
bool allowInconsistentDelimiterCount = false)
|
||||
if (isInputRange!Range && is(immutable ElementType!Range == immutable dchar)
|
||||
&& isSomeChar!(Separator)
|
||||
&& !is(Contents T : T[U], U : string))
|
||||
{
|
||||
return CsvReader!(Contents,ErrorLevel,Range,
|
||||
Unqual!(ElementType!Range),string[])
|
||||
(input, delimiter, quote);
|
||||
(input, delimiter, quote, allowInconsistentDelimiterCount);
|
||||
}
|
||||
|
||||
/// ditto
|
||||
|
@ -353,7 +354,8 @@ auto csvReader(Contents = string,
|
|||
Malformed ErrorLevel = Malformed.throwException,
|
||||
Range, Header, Separator = char)
|
||||
(Range input, Header header,
|
||||
Separator delimiter = ',', Separator quote = '"')
|
||||
Separator delimiter = ',', Separator quote = '"',
|
||||
bool allowInconsistentDelimiterCount = false)
|
||||
if (isInputRange!Range && is(immutable ElementType!Range == immutable dchar)
|
||||
&& isSomeChar!(Separator)
|
||||
&& isForwardRange!Header
|
||||
|
@ -361,7 +363,7 @@ if (isInputRange!Range && is(immutable ElementType!Range == immutable dchar)
|
|||
{
|
||||
return CsvReader!(Contents,ErrorLevel,Range,
|
||||
Unqual!(ElementType!Range),Header)
|
||||
(input, header, delimiter, quote);
|
||||
(input, header, delimiter, quote, allowInconsistentDelimiterCount);
|
||||
}
|
||||
|
||||
/// ditto
|
||||
|
@ -369,14 +371,16 @@ auto csvReader(Contents = string,
|
|||
Malformed ErrorLevel = Malformed.throwException,
|
||||
Range, Header, Separator = char)
|
||||
(Range input, Header header,
|
||||
Separator delimiter = ',', Separator quote = '"')
|
||||
Separator delimiter = ',', Separator quote = '"',
|
||||
bool allowInconsistentDelimiterCount = false)
|
||||
if (isInputRange!Range && is(immutable ElementType!Range == immutable dchar)
|
||||
&& isSomeChar!(Separator)
|
||||
&& is(Header : typeof(null)))
|
||||
{
|
||||
return CsvReader!(Contents,ErrorLevel,Range,
|
||||
Unqual!(ElementType!Range),string[])
|
||||
(input, cast(string[]) null, delimiter, quote);
|
||||
(input, cast(string[]) null, delimiter, quote,
|
||||
allowInconsistentDelimiterCount);
|
||||
}
|
||||
|
||||
|
||||
|
@ -474,6 +478,66 @@ The header from the input can always be accessed from the `header` field.
|
|||
assert(records.header == ["a","b","c"]);
|
||||
}
|
||||
|
||||
/**
|
||||
Handcrafted csv files tend to have an variable amount of columns.
|
||||
|
||||
By default `std.csv` will throw if the number of columns on a line
|
||||
is unequal to the number of columns of the first line.
|
||||
To allow, or disallow, a variable amount of columns a `bool` can be passed to
|
||||
all overloads of the `csvReader` function as shown below.
|
||||
*/
|
||||
@safe unittest
|
||||
{
|
||||
import std.algorithm.comparison : equal;
|
||||
|
||||
string text = "76,26,22\n1,2\n3,4,5,6";
|
||||
auto records = text.csvReader!int(',', '"', true);
|
||||
|
||||
assert(records.equal!equal([
|
||||
[76, 26, 22],
|
||||
[1, 2],
|
||||
[3, 4, 5, 6]
|
||||
]));
|
||||
}
|
||||
|
||||
/// ditto
|
||||
@safe unittest
|
||||
{
|
||||
import std.algorithm.comparison : equal;
|
||||
|
||||
static struct Three
|
||||
{
|
||||
int a;
|
||||
int b;
|
||||
int c;
|
||||
}
|
||||
|
||||
string text = "76,26,22\n1,2\n3,4,5,6";
|
||||
auto records = text.csvReader!Three(',', '"', true);
|
||||
|
||||
assert(records.equal([
|
||||
Three(76, 26, 22),
|
||||
Three(1, 2, 0),
|
||||
Three(3, 4, 5)
|
||||
]));
|
||||
}
|
||||
|
||||
/// ditto
|
||||
@safe unittest
|
||||
{
|
||||
import std.algorithm.comparison : equal;
|
||||
|
||||
auto text = "Name,Occupation,Salary\r" ~
|
||||
"Joe,Carpenter,300000\nFred,Blacksmith\r\n";
|
||||
|
||||
auto r = csvReader!(string[string])(text, null, ',', '"', true);
|
||||
|
||||
assert(r.equal([
|
||||
[ "Name" : "Joe", "Occupation" : "Carpenter", "Salary" : "300000" ],
|
||||
[ "Name" : "Fred", "Occupation" : "Blacksmith" ]
|
||||
]));
|
||||
}
|
||||
|
||||
// Test standard iteration over input.
|
||||
@safe pure unittest
|
||||
{
|
||||
|
@ -843,6 +907,7 @@ private:
|
|||
Separator _quote;
|
||||
size_t[] indices;
|
||||
bool _empty;
|
||||
bool _allowInconsistentDelimiterCount;
|
||||
static if (is(Contents == struct) || is(Contents == class))
|
||||
{
|
||||
Contents recordContent;
|
||||
|
@ -884,11 +949,13 @@ public:
|
|||
* }
|
||||
* -------
|
||||
*/
|
||||
this(Range input, Separator delimiter, Separator quote)
|
||||
this(Range input, Separator delimiter, Separator quote,
|
||||
bool allowInconsistentDelimiterCount)
|
||||
{
|
||||
_input = new Input!(Range, ErrorLevel)(input);
|
||||
_separator = delimiter;
|
||||
_quote = quote;
|
||||
_allowInconsistentDelimiterCount = allowInconsistentDelimiterCount;
|
||||
|
||||
if (_input.range.empty)
|
||||
{
|
||||
|
@ -920,11 +987,13 @@ public:
|
|||
* matching column is not found or the order did not match that found
|
||||
* in the input (non-struct).
|
||||
*/
|
||||
this(Range input, Header colHeaders, Separator delimiter, Separator quote)
|
||||
this(Range input, Header colHeaders, Separator delimiter, Separator quote,
|
||||
bool allowInconsistentDelimiterCount)
|
||||
{
|
||||
_input = new Input!(Range, ErrorLevel)(input);
|
||||
_separator = delimiter;
|
||||
_quote = quote;
|
||||
_allowInconsistentDelimiterCount = allowInconsistentDelimiterCount;
|
||||
|
||||
if (_input.range.empty)
|
||||
{
|
||||
|
@ -939,7 +1008,8 @@ public:
|
|||
}
|
||||
|
||||
auto r = CsvRecord!(string, ErrorLevel, Range, Separator)
|
||||
(_input, _separator, _quote, indices);
|
||||
(_input, _separator, _quote, indices,
|
||||
_allowInconsistentDelimiterCount);
|
||||
|
||||
size_t colIndex;
|
||||
foreach (col; r)
|
||||
|
@ -952,6 +1022,8 @@ public:
|
|||
}
|
||||
// The above loop empties the header row.
|
||||
recordRange._empty = true;
|
||||
recordRange._allowInconsistentDelimiterCount =
|
||||
allowInconsistentDelimiterCount;
|
||||
|
||||
indices.length = colToIndex.length;
|
||||
int i;
|
||||
|
@ -1090,12 +1162,14 @@ public:
|
|||
static if (is(Contents == struct) || is(Contents == class))
|
||||
{
|
||||
recordRange = typeof(recordRange)
|
||||
(_input, _separator, _quote, null);
|
||||
(_input, _separator, _quote, null,
|
||||
_allowInconsistentDelimiterCount);
|
||||
}
|
||||
else
|
||||
{
|
||||
recordRange = typeof(recordRange)
|
||||
(_input, _separator, _quote, indices);
|
||||
(_input, _separator, _quote, indices,
|
||||
_allowInconsistentDelimiterCount);
|
||||
}
|
||||
|
||||
static if (is(Contents T : T[U], U : string))
|
||||
|
@ -1168,7 +1242,7 @@ public:
|
|||
string str = `76;^26^;22`;
|
||||
int[] ans = [76,26,22];
|
||||
auto records = CsvReader!(int,Malformed.ignore,string,char,string[])
|
||||
(str, ';', '^');
|
||||
(str, ';', '^', false);
|
||||
|
||||
foreach (record; records)
|
||||
{
|
||||
|
@ -1203,6 +1277,7 @@ private:
|
|||
Contents curContentsoken;
|
||||
typeof(appender!(dchar[])()) _front;
|
||||
bool _empty;
|
||||
bool _allowInconsistentDelimiterCount;
|
||||
size_t[] _popCount;
|
||||
public:
|
||||
/*
|
||||
|
@ -1214,7 +1289,8 @@ public:
|
|||
* If empty, all columns are returned. List must be in order.
|
||||
*/
|
||||
this(Input!(Range, ErrorLevel)* input, Separator delimiter,
|
||||
Separator quote, size_t[] indices)
|
||||
Separator quote, size_t[] indices,
|
||||
bool allowInconsistentDelimiterCount)
|
||||
{
|
||||
_input = input;
|
||||
_separator = delimiter;
|
||||
|
@ -1222,6 +1298,7 @@ public:
|
|||
|
||||
_front = appender!(dchar[])();
|
||||
_popCount = indices.dup;
|
||||
_allowInconsistentDelimiterCount = allowInconsistentDelimiterCount;
|
||||
|
||||
// If a header was given, each call to popFront will need
|
||||
// to eliminate so many tokens. This calculates
|
||||
|
@ -1304,23 +1381,38 @@ public:
|
|||
{
|
||||
_empty = true;
|
||||
static if (ErrorLevel == Malformed.throwException)
|
||||
if (_input.rowLength != 0)
|
||||
if (_input.col != _input.rowLength)
|
||||
throw new CSVException(
|
||||
format("Row %s's length %s does not match "~
|
||||
"previous length of %s.", _input.row,
|
||||
_input.col, _input.rowLength));
|
||||
{
|
||||
if (_input.rowLength != 0 && _input.col != _input.rowLength
|
||||
&& !_allowInconsistentDelimiterCount)
|
||||
{
|
||||
throw new CSVException(
|
||||
format("Row %s's length %s does not match "~
|
||||
"previous length of %s.", _input.row,
|
||||
_input.col, _input.rowLength));
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
else
|
||||
{
|
||||
static if (ErrorLevel == Malformed.throwException)
|
||||
if (_input.rowLength != 0)
|
||||
if (_input.col > _input.rowLength)
|
||||
{
|
||||
if (_input.rowLength != 0 && _input.col > _input.rowLength)
|
||||
{
|
||||
if (!_allowInconsistentDelimiterCount)
|
||||
{
|
||||
throw new CSVException(
|
||||
format("Row %s's length %s does not match "~
|
||||
"previous length of %s.", _input.row,
|
||||
_input.col, _input.rowLength));
|
||||
}
|
||||
else
|
||||
{
|
||||
_empty = true;
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Separator is left on the end of input from the last call.
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue