mirror of
https://github.com/dlang/phobos.git
synced 2025-04-26 21:22:20 +03:00
1890 lines
52 KiB
D
1890 lines
52 KiB
D
//Written in the D programming language
|
|
|
|
/**
|
|
* Implements functionality to read Comma Separated Values and its variants
|
|
* from an $(REF_ALTTEXT input range, isInputRange, std,range,primitives) of `dchar`.
|
|
*
|
|
* Comma Separated Values provide a simple means to transfer and store
|
|
* tabular data. It has been common for programs to use their own
|
|
* variant of the CSV format. This parser will loosely follow the
|
|
* $(HTTP tools.ietf.org/html/rfc4180, RFC-4180). CSV input should adhere
|
|
* to the following criteria (differences from RFC-4180 in parentheses):
|
|
*
|
|
* $(UL
|
|
* $(LI A record is separated by a new line (CRLF,LF,CR))
|
|
* $(LI A final record may end with a new line)
|
|
* $(LI A header may be provided as the first record in input)
|
|
* $(LI A record has fields separated by a comma (customizable))
|
|
* $(LI A field containing new lines, commas, or double quotes
|
|
* should be enclosed in double quotes (customizable))
|
|
* $(LI Double quotes in a field are escaped with a double quote)
|
|
* $(LI Each record should contain the same number of fields)
|
|
* )
|
|
*
|
|
* Example:
|
|
*
|
|
* -------
|
|
* import std.algorithm;
|
|
* import std.array;
|
|
* import std.csv;
|
|
* import std.stdio;
|
|
* import std.typecons;
|
|
*
|
|
* void main()
|
|
* {
|
|
* auto text = "Joe,Carpenter,300000\nFred,Blacksmith,400000\r\n";
|
|
*
|
|
* foreach (record; csvReader!(Tuple!(string, string, int))(text))
|
|
* {
|
|
* writefln("%s works as a %s and earns $%d per year",
|
|
* record[0], record[1], record[2]);
|
|
* }
|
|
*
|
|
* // To read the same string from the file "filename.csv":
|
|
*
|
|
* auto file = File("filename.csv", "r");
|
|
* foreach (record;
|
|
* file.byLine.joiner("\n").csvReader!(Tuple!(string, string, int)))
|
|
* {
|
|
* writefln("%s works as a %s and earns $%d per year",
|
|
* record[0], record[1], record[2]);
|
|
* }
|
|
}
|
|
* }
|
|
* -------
|
|
*
|
|
* When an input contains a header the `Contents` can be specified as an
|
|
* associative array. Passing null to signify that a header is present.
|
|
*
|
|
* -------
|
|
* auto text = "Name,Occupation,Salary\r" ~
|
|
* "Joe,Carpenter,300000\nFred,Blacksmith,400000\r\n";
|
|
*
|
|
* foreach (record; csvReader!(string[string])
|
|
* (text, null))
|
|
* {
|
|
* writefln("%s works as a %s and earns $%s per year.",
|
|
* record["Name"], record["Occupation"],
|
|
* record["Salary"]);
|
|
* }
|
|
*
|
|
* // To read the same string from the file "filename.csv":
|
|
*
|
|
* auto file = File("filename.csv", "r");
|
|
*
|
|
* foreach (record; csvReader!(string[string])
|
|
* (file.byLine.joiner("\n"), null))
|
|
* {
|
|
* writefln("%s works as a %s and earns $%s per year.",
|
|
* record["Name"], record["Occupation"],
|
|
* record["Salary"]);
|
|
* }
|
|
* -------
|
|
*
|
|
* This module allows content to be iterated by record stored in a struct,
|
|
* class, associative array, or as a range of fields. Upon detection of an
|
|
* error an CSVException is thrown (can be disabled). csvNextToken has been
|
|
* made public to allow for attempted recovery.
|
|
*
|
|
* Disabling exceptions will lift many restrictions specified above. A quote
|
|
* can appear in a field if the field was not quoted. If in a quoted field any
|
|
* quote by itself, not at the end of a field, will end processing for that
|
|
* field. The field is ended when there is no input, even if the quote was not
|
|
* closed.
|
|
*
|
|
* See_Also:
|
|
* $(HTTP en.wikipedia.org/wiki/Comma-separated_values, Wikipedia
|
|
* Comma-separated values)
|
|
*
|
|
* Copyright: Copyright 2011
|
|
* License: $(HTTP www.boost.org/LICENSE_1_0.txt, Boost License 1.0).
|
|
* Authors: Jesse Phillips
|
|
* Source: $(PHOBOSSRC std/csv.d)
|
|
*/
|
|
module std.csv;
|
|
|
|
import std.conv;
|
|
import std.exception : basicExceptionCtors;
|
|
import std.range.primitives;
|
|
import std.traits;
|
|
|
|
/**
|
|
* Exception containing the row and column for when an exception was thrown.
|
|
*
|
|
* Numbering of both row and col start at one and corresponds to the location
|
|
* in the file rather than any specified header. Special consideration should
|
|
* be made when there is failure to match the header see $(LREF
|
|
* HeaderMismatchException) for details.
|
|
*
|
|
* When performing type conversions, $(REF ConvException, std,conv) is stored in
|
|
* the `next` field.
|
|
*/
|
|
class CSVException : Exception
|
|
{
|
|
///
|
|
size_t row, col;
|
|
|
|
// FIXME: Use std.exception.basicExceptionCtors here once
|
|
// https://issues.dlang.org/show_bug.cgi?id=11500 is fixed
|
|
|
|
this(string msg, string file = __FILE__, size_t line = __LINE__,
|
|
Throwable next = null) @nogc @safe pure nothrow
|
|
{
|
|
super(msg, file, line, next);
|
|
}
|
|
|
|
this(string msg, Throwable next, string file = __FILE__,
|
|
size_t line = __LINE__) @nogc @safe pure nothrow
|
|
{
|
|
super(msg, file, line, next);
|
|
}
|
|
|
|
this(string msg, size_t row, size_t col, Throwable next = null,
|
|
string file = __FILE__, size_t line = __LINE__) @nogc @safe pure nothrow
|
|
{
|
|
super(msg, next, file, line);
|
|
this.row = row;
|
|
this.col = col;
|
|
}
|
|
|
|
override string toString() @safe pure const
|
|
{
|
|
return "(Row: " ~ to!string(row) ~
|
|
", Col: " ~ to!string(col) ~ ") " ~ msg;
|
|
}
|
|
}
|
|
|
|
///
|
|
@safe unittest
|
|
{
|
|
import std.exception : collectException;
|
|
import std.algorithm.searching : count;
|
|
string text = "a,b,c\nHello,65";
|
|
auto ex = collectException!CSVException(csvReader(text).count);
|
|
assert(ex.toString == "(Row: 0, Col: 0) Row 2's length 2 does not match previous length of 3.");
|
|
}
|
|
|
|
///
|
|
@safe unittest
|
|
{
|
|
import std.exception : collectException;
|
|
import std.algorithm.searching : count;
|
|
import std.typecons : Tuple;
|
|
string text = "a,b\nHello,65";
|
|
auto ex = collectException!CSVException(csvReader!(Tuple!(string,int))(text).count);
|
|
assert(ex.toString == "(Row: 1, Col: 2) Unexpected 'b' when converting from type string to type int");
|
|
}
|
|
|
|
// https://issues.dlang.org/show_bug.cgi?id=24478
|
|
@safe unittest
|
|
{
|
|
import std.exception : collectException;
|
|
import std.algorithm.searching : count;
|
|
string text = "A, B\n1, 2, 3";
|
|
auto ex = collectException!CSVException(csvReader!(string[string])(text, null).count);
|
|
assert(ex.toString == "(Row: 1, Col: 3) row contains more values than header");
|
|
}
|
|
|
|
@safe pure unittest
|
|
{
|
|
import std.string;
|
|
auto e1 = new Exception("Foobar");
|
|
auto e2 = new CSVException("args", e1);
|
|
assert(e2.next is e1);
|
|
|
|
size_t r = 13;
|
|
size_t c = 37;
|
|
|
|
auto e3 = new CSVException("argv", r, c);
|
|
assert(e3.row == r);
|
|
assert(e3.col == c);
|
|
|
|
auto em = e3.toString();
|
|
assert(em.indexOf("13") != -1);
|
|
assert(em.indexOf("37") != -1);
|
|
}
|
|
|
|
/**
|
|
* Exception thrown when a Token is identified to not be completed: a quote is
|
|
* found in an unquoted field, data continues after a closing quote, or the
|
|
* quoted field was not closed before data was empty.
|
|
*/
|
|
class IncompleteCellException : CSVException
|
|
{
|
|
/**
|
|
* Data pulled from input before finding a problem
|
|
*
|
|
* This field is populated when using $(LREF csvReader)
|
|
* but not by $(LREF csvNextToken) as this data will have
|
|
* already been fed to the output range.
|
|
*/
|
|
dstring partialData;
|
|
|
|
mixin basicExceptionCtors;
|
|
}
|
|
|
|
///
|
|
@safe unittest
|
|
{
|
|
import std.exception : assertThrown;
|
|
string text = "a,\"b,c\nHello,65,2.5";
|
|
assertThrown!IncompleteCellException(text.csvReader(["a","b","c"]));
|
|
}
|
|
|
|
@safe pure unittest
|
|
{
|
|
auto e1 = new Exception("Foobar");
|
|
auto e2 = new IncompleteCellException("args", e1);
|
|
assert(e2.next is e1);
|
|
}
|
|
|
|
/**
|
|
* Exception thrown under different conditions based on the type of $(D
|
|
* Contents).
|
|
*
|
|
* Structure, Class, and Associative Array
|
|
* $(UL
|
|
* $(LI When a header is provided but a matching column is not found)
|
|
* )
|
|
*
|
|
* Other
|
|
* $(UL
|
|
* $(LI When a header is provided but a matching column is not found)
|
|
* $(LI Order did not match that found in the input)
|
|
* )
|
|
*
|
|
* Since a row and column is not meaningful when a column specified by the
|
|
* header is not found in the data, both row and col will be zero. Otherwise
|
|
* row is always one and col is the first instance found in header that
|
|
* occurred before the previous starting at one.
|
|
*/
|
|
class HeaderMismatchException : CSVException
|
|
{
|
|
mixin basicExceptionCtors;
|
|
}
|
|
|
|
///
|
|
@safe unittest
|
|
{
|
|
import std.exception : assertThrown;
|
|
string text = "a,b,c\nHello,65,2.5";
|
|
assertThrown!HeaderMismatchException(text.csvReader(["b","c","invalid"]));
|
|
}
|
|
|
|
@safe pure unittest
|
|
{
|
|
auto e1 = new Exception("Foobar");
|
|
auto e2 = new HeaderMismatchException("args", e1);
|
|
assert(e2.next is e1);
|
|
}
|
|
|
|
/**
|
|
* Determines the behavior for when an error is detected.
|
|
*
|
|
* Disabling exception will follow these rules:
|
|
* $(UL
|
|
* $(LI A quote can appear in a field if the field was not quoted.)
|
|
* $(LI If in a quoted field any quote by itself, not at the end of a
|
|
* field, will end processing for that field.)
|
|
* $(LI The field is ended when there is no input, even if the quote was
|
|
* not closed.)
|
|
* $(LI If the given header does not match the order in the input, the
|
|
* content will return as it is found in the input.)
|
|
* $(LI If the given header contains columns not found in the input they
|
|
* will be ignored.)
|
|
* )
|
|
*/
|
|
enum Malformed
|
|
{
|
|
ignore, /// No exceptions are thrown due to incorrect CSV.
|
|
throwException /// Use exceptions when input has incorrect CSV.
|
|
}
|
|
|
|
///
|
|
@safe unittest
|
|
{
|
|
import std.algorithm.comparison : equal;
|
|
import std.algorithm.searching : count;
|
|
import std.exception : assertThrown;
|
|
|
|
string text = "a,b,c\nHello,65,\"2.5";
|
|
assertThrown!IncompleteCellException(text.csvReader.count);
|
|
|
|
// ignore the exceptions and try to handle invalid CSV
|
|
auto firstLine = text.csvReader!(string, Malformed.ignore)(null).front;
|
|
assert(firstLine.equal(["Hello", "65", "2.5"]));
|
|
}
|
|
|
|
/**
|
|
Returns an $(REF_ALTTEXT input range, isInputRange, std,range,primitives)
|
|
for iterating over records found in `input`.
|
|
|
|
An optional `header` can be provided. The first record will be read in
|
|
as the header. If `Contents` is a struct then the header provided is
|
|
expected to correspond to the fields in the struct. When `Contents` is
|
|
not a type which can contain the entire record, the `header` must be
|
|
provided in the same order as the input or an exception is thrown.
|
|
|
|
Returns:
|
|
An input range R as defined by
|
|
$(REF isInputRange, std,range,primitives). When `Contents` is a
|
|
struct, class, or an associative array, the element type of R is
|
|
`Contents`, otherwise the element type of R is itself a range with
|
|
element type `Contents`.
|
|
|
|
If a `header` argument is provided,
|
|
the returned range provides a `header` field for accessing the header
|
|
from the input in array form.
|
|
|
|
Throws:
|
|
$(LREF CSVException) When a quote is found in an unquoted field,
|
|
data continues after a closing quote, the quoted field was not
|
|
closed before data was empty, a conversion failed, or when the row's
|
|
length does not match the previous length.
|
|
|
|
$(LREF HeaderMismatchException) when a header is provided but a
|
|
matching column is not found or the order did not match that found in
|
|
the input. Read the exception documentation for specific details of
|
|
when the exception is thrown for different types of `Contents`.
|
|
*/
|
|
auto csvReader(Contents = string,Malformed ErrorLevel = Malformed.throwException, Range, Separator = char)(Range input,
|
|
Separator delimiter = ',', Separator quote = '"',
|
|
bool allowInconsistentDelimiterCount = false)
|
|
if (isInputRange!Range && is(immutable ElementType!Range == immutable dchar)
|
|
&& isSomeChar!(Separator)
|
|
&& !is(Contents T : T[U], U : string))
|
|
{
|
|
return CsvReader!(Contents,ErrorLevel,Range,
|
|
Unqual!(ElementType!Range),string[])
|
|
(input, delimiter, quote, allowInconsistentDelimiterCount);
|
|
}
|
|
|
|
/// ditto
|
|
auto csvReader(Contents = string,
|
|
Malformed ErrorLevel = Malformed.throwException,
|
|
Range, Header, Separator = char)
|
|
(Range input, Header header,
|
|
Separator delimiter = ',', Separator quote = '"',
|
|
bool allowInconsistentDelimiterCount = false)
|
|
if (isInputRange!Range && is(immutable ElementType!Range == immutable dchar)
|
|
&& isSomeChar!(Separator)
|
|
&& isForwardRange!Header
|
|
&& isSomeString!(ElementType!Header))
|
|
{
|
|
return CsvReader!(Contents,ErrorLevel,Range,
|
|
Unqual!(ElementType!Range),Header)
|
|
(input, header, delimiter, quote, allowInconsistentDelimiterCount);
|
|
}
|
|
|
|
/// ditto
|
|
auto csvReader(Contents = string,
|
|
Malformed ErrorLevel = Malformed.throwException,
|
|
Range, Header, Separator = char)
|
|
(Range input, Header header,
|
|
Separator delimiter = ',', Separator quote = '"',
|
|
bool allowInconsistentDelimiterCount = false)
|
|
if (isInputRange!Range && is(immutable ElementType!Range == immutable dchar)
|
|
&& isSomeChar!(Separator)
|
|
&& is(Header : typeof(null)))
|
|
{
|
|
return CsvReader!(Contents,ErrorLevel,Range,
|
|
Unqual!(ElementType!Range),string[])
|
|
(input, cast(string[]) null, delimiter, quote,
|
|
allowInconsistentDelimiterCount);
|
|
}
|
|
|
|
|
|
/**
|
|
The `Contents` of the input can be provided if all the records are the
|
|
same type such as all integer data:
|
|
*/
|
|
@safe unittest
|
|
{
|
|
import std.algorithm.comparison : equal;
|
|
string text = "76,26,22";
|
|
auto records = text.csvReader!int;
|
|
assert(records.equal!equal([
|
|
[76, 26, 22],
|
|
]));
|
|
}
|
|
|
|
/**
|
|
Using a struct with modified delimiter:
|
|
*/
|
|
@safe unittest
|
|
{
|
|
import std.algorithm.comparison : equal;
|
|
string text = "Hello;65;2.5\nWorld;123;7.5";
|
|
struct Layout
|
|
{
|
|
string name;
|
|
int value;
|
|
double other;
|
|
}
|
|
|
|
auto records = text.csvReader!Layout(';');
|
|
assert(records.equal([
|
|
Layout("Hello", 65, 2.5),
|
|
Layout("World", 123, 7.5),
|
|
]));
|
|
}
|
|
|
|
/**
|
|
Specifying `ErrorLevel` as $(LREF Malformed.ignore) will lift restrictions
|
|
on the format. This example shows that an exception is not thrown when
|
|
finding a quote in a field not quoted.
|
|
*/
|
|
@safe unittest
|
|
{
|
|
string text = "A \" is now part of the data";
|
|
auto records = text.csvReader!(string, Malformed.ignore);
|
|
auto record = records.front;
|
|
|
|
assert(record.front == text);
|
|
}
|
|
|
|
/// Read only column "b"
|
|
@safe unittest
|
|
{
|
|
import std.algorithm.comparison : equal;
|
|
string text = "a,b,c\nHello,65,63.63\nWorld,123,3673.562";
|
|
auto records = text.csvReader!int(["b"]);
|
|
|
|
assert(records.equal!equal([
|
|
[65],
|
|
[123],
|
|
]));
|
|
}
|
|
|
|
/// Read while rearranging the columns by specifying a header with a different order"
|
|
@safe unittest
|
|
{
|
|
import std.algorithm.comparison : equal;
|
|
string text = "a,b,c\nHello,65,2.5\nWorld,123,7.5";
|
|
struct Layout
|
|
{
|
|
int value;
|
|
double other;
|
|
string name;
|
|
}
|
|
|
|
auto records = text.csvReader!Layout(["b","c","a"]);
|
|
assert(records.equal([
|
|
Layout(65, 2.5, "Hello"),
|
|
Layout(123, 7.5, "World")
|
|
]));
|
|
}
|
|
|
|
/**
|
|
The header can also be left empty if the input contains a header row
|
|
and all columns should be iterated.
|
|
The header from the input can always be accessed from the `header` field.
|
|
*/
|
|
@safe unittest
|
|
{
|
|
string text = "a,b,c\nHello,65,63.63";
|
|
auto records = text.csvReader(null);
|
|
|
|
assert(records.header == ["a","b","c"]);
|
|
}
|
|
|
|
/**
|
|
Handcrafted csv files tend to have an variable amount of columns.
|
|
|
|
By default `std.csv` will throw if the number of columns on a line
|
|
is unequal to the number of columns of the first line.
|
|
To allow, or disallow, a variable amount of columns a `bool` can be passed to
|
|
all overloads of the `csvReader` function as shown below.
|
|
*/
|
|
@safe unittest
|
|
{
|
|
import std.algorithm.comparison : equal;
|
|
|
|
string text = "76,26,22\n1,2\n3,4,5,6";
|
|
auto records = text.csvReader!int(',', '"', true);
|
|
|
|
assert(records.equal!equal([
|
|
[76, 26, 22],
|
|
[1, 2],
|
|
[3, 4, 5, 6]
|
|
]));
|
|
}
|
|
|
|
/// ditto
|
|
@safe unittest
|
|
{
|
|
import std.algorithm.comparison : equal;
|
|
|
|
static struct Three
|
|
{
|
|
int a;
|
|
int b;
|
|
int c;
|
|
}
|
|
|
|
string text = "76,26,22\n1,2\n3,4,5,6";
|
|
auto records = text.csvReader!Three(',', '"', true);
|
|
|
|
assert(records.equal([
|
|
Three(76, 26, 22),
|
|
Three(1, 2, 0),
|
|
Three(3, 4, 5)
|
|
]));
|
|
}
|
|
|
|
/// ditto
|
|
@safe unittest
|
|
{
|
|
import std.algorithm.comparison : equal;
|
|
|
|
auto text = "Name,Occupation,Salary\r" ~
|
|
"Joe,Carpenter,300000\nFred,Blacksmith\r\n";
|
|
|
|
auto r = csvReader!(string[string])(text, null, ',', '"', true);
|
|
|
|
assert(r.equal([
|
|
[ "Name" : "Joe", "Occupation" : "Carpenter", "Salary" : "300000" ],
|
|
[ "Name" : "Fred", "Occupation" : "Blacksmith" ]
|
|
]));
|
|
}
|
|
|
|
// Test standard iteration over input.
|
|
@safe pure unittest
|
|
{
|
|
string str = `one,"two ""quoted"""` ~ "\n\"three\nnew line\",\nfive,six";
|
|
auto records = csvReader(str);
|
|
|
|
int count;
|
|
foreach (record; records)
|
|
{
|
|
foreach (cell; record)
|
|
{
|
|
count++;
|
|
}
|
|
}
|
|
assert(count == 6);
|
|
}
|
|
|
|
// Test newline on last record
|
|
@safe pure unittest
|
|
{
|
|
string str = "one,two\nthree,four\n";
|
|
auto records = csvReader(str);
|
|
records.popFront();
|
|
records.popFront();
|
|
assert(records.empty);
|
|
}
|
|
|
|
// Test shorter row length
|
|
@safe pure unittest
|
|
{
|
|
wstring str = "one,1\ntwo\nthree"w;
|
|
struct Layout
|
|
{
|
|
string name;
|
|
int value;
|
|
}
|
|
|
|
Layout[3] ans;
|
|
ans[0].name = "one";
|
|
ans[0].value = 1;
|
|
ans[1].name = "two";
|
|
ans[1].value = 0;
|
|
ans[2].name = "three";
|
|
ans[2].value = 0;
|
|
|
|
auto records = csvReader!(Layout,Malformed.ignore)(str);
|
|
|
|
int count;
|
|
foreach (record; records)
|
|
{
|
|
assert(ans[count].name == record.name);
|
|
assert(ans[count].value == record.value);
|
|
count++;
|
|
}
|
|
}
|
|
|
|
// Test shorter row length exception
|
|
@safe pure unittest
|
|
{
|
|
import std.exception;
|
|
|
|
struct A
|
|
{
|
|
string a,b,c;
|
|
}
|
|
|
|
auto strs = ["one,1\ntwo",
|
|
"one\ntwo,2,二\nthree,3,三",
|
|
"one\ntwo,2\nthree,3",
|
|
"one,1\ntwo\nthree,3"];
|
|
|
|
foreach (str; strs)
|
|
{
|
|
auto records = csvReader!A(str);
|
|
assertThrown!CSVException((){foreach (record; records) { }}());
|
|
}
|
|
}
|
|
|
|
|
|
// Test structure conversion interface with unicode.
|
|
@safe pure unittest
|
|
{
|
|
import std.math.algebraic : abs;
|
|
|
|
wstring str = "\U00010143Hello,65,63.63\nWorld,123,3673.562"w;
|
|
struct Layout
|
|
{
|
|
string name;
|
|
int value;
|
|
double other;
|
|
}
|
|
|
|
Layout[2] ans;
|
|
ans[0].name = "\U00010143Hello";
|
|
ans[0].value = 65;
|
|
ans[0].other = 63.63;
|
|
ans[1].name = "World";
|
|
ans[1].value = 123;
|
|
ans[1].other = 3673.562;
|
|
|
|
auto records = csvReader!Layout(str);
|
|
|
|
int count;
|
|
foreach (record; records)
|
|
{
|
|
assert(ans[count].name == record.name);
|
|
assert(ans[count].value == record.value);
|
|
assert(abs(ans[count].other - record.other) < 0.00001);
|
|
count++;
|
|
}
|
|
assert(count == ans.length);
|
|
}
|
|
|
|
// Test input conversion interface
|
|
@safe pure unittest
|
|
{
|
|
import std.algorithm;
|
|
string str = `76,26,22`;
|
|
int[] ans = [76,26,22];
|
|
auto records = csvReader!int(str);
|
|
|
|
foreach (record; records)
|
|
{
|
|
assert(equal(record, ans));
|
|
}
|
|
}
|
|
|
|
// Test struct & header interface and same unicode
|
|
@safe unittest
|
|
{
|
|
import std.math.algebraic : abs;
|
|
|
|
string str = "a,b,c\nHello,65,63.63\n➊➋➂❹,123,3673.562";
|
|
struct Layout
|
|
{
|
|
int value;
|
|
double other;
|
|
string name;
|
|
}
|
|
|
|
auto records = csvReader!Layout(str, ["b","c","a"]);
|
|
|
|
Layout[2] ans;
|
|
ans[0].name = "Hello";
|
|
ans[0].value = 65;
|
|
ans[0].other = 63.63;
|
|
ans[1].name = "➊➋➂❹";
|
|
ans[1].value = 123;
|
|
ans[1].other = 3673.562;
|
|
|
|
int count;
|
|
foreach (record; records)
|
|
{
|
|
assert(ans[count].name == record.name);
|
|
assert(ans[count].value == record.value);
|
|
assert(abs(ans[count].other - record.other) < 0.00001);
|
|
count++;
|
|
}
|
|
assert(count == ans.length);
|
|
|
|
}
|
|
|
|
// Test header interface
|
|
@safe unittest
|
|
{
|
|
import std.algorithm;
|
|
|
|
string str = "a,b,c\nHello,65,63.63\nWorld,123,3673.562";
|
|
auto records = csvReader!int(str, ["b"]);
|
|
|
|
auto ans = [[65],[123]];
|
|
foreach (record; records)
|
|
{
|
|
assert(equal(record, ans.front));
|
|
ans.popFront();
|
|
}
|
|
|
|
try
|
|
{
|
|
csvReader(str, ["c","b"]);
|
|
assert(0);
|
|
}
|
|
catch (HeaderMismatchException e)
|
|
{
|
|
assert(e.col == 2);
|
|
}
|
|
auto records2 = csvReader!(string,Malformed.ignore)
|
|
(str, ["b","a"], ',', '"');
|
|
|
|
auto ans2 = [["Hello","65"],["World","123"]];
|
|
foreach (record; records2)
|
|
{
|
|
assert(equal(record, ans2.front));
|
|
ans2.popFront();
|
|
}
|
|
|
|
str = "a,c,e\nJoe,Carpenter,300000\nFred,Fly,4";
|
|
records2 = csvReader!(string,Malformed.ignore)
|
|
(str, ["a","b","c","d"], ',', '"');
|
|
|
|
ans2 = [["Joe","Carpenter"],["Fred","Fly"]];
|
|
foreach (record; records2)
|
|
{
|
|
assert(equal(record, ans2.front));
|
|
ans2.popFront();
|
|
}
|
|
}
|
|
|
|
// Test null header interface
|
|
@safe unittest
|
|
{
|
|
string str = "a,b,c\nHello,65,63.63\nWorld,123,3673.562";
|
|
auto records = csvReader(str, ["a"]);
|
|
|
|
assert(records.header == ["a","b","c"]);
|
|
}
|
|
|
|
// Test unchecked read
|
|
@safe pure unittest
|
|
{
|
|
string str = "one \"quoted\"";
|
|
foreach (record; csvReader!(string,Malformed.ignore)(str))
|
|
{
|
|
foreach (cell; record)
|
|
{
|
|
assert(cell == "one \"quoted\"");
|
|
}
|
|
}
|
|
|
|
str = "one \"quoted\",two \"quoted\" end";
|
|
struct Ans
|
|
{
|
|
string a,b;
|
|
}
|
|
foreach (record; csvReader!(Ans,Malformed.ignore)(str))
|
|
{
|
|
assert(record.a == "one \"quoted\"");
|
|
assert(record.b == "two \"quoted\" end");
|
|
}
|
|
}
|
|
|
|
// Test partial data returned
|
|
@safe pure unittest
|
|
{
|
|
string str = "\"one\nnew line";
|
|
|
|
try
|
|
{
|
|
foreach (record; csvReader(str))
|
|
{}
|
|
assert(0);
|
|
}
|
|
catch (IncompleteCellException ice)
|
|
{
|
|
assert(ice.partialData == "one\nnew line");
|
|
}
|
|
}
|
|
|
|
// Test Windows line break
|
|
@safe pure unittest
|
|
{
|
|
string str = "one,two\r\nthree";
|
|
|
|
auto records = csvReader(str);
|
|
auto record = records.front;
|
|
assert(record.front == "one");
|
|
record.popFront();
|
|
assert(record.front == "two");
|
|
records.popFront();
|
|
record = records.front;
|
|
assert(record.front == "three");
|
|
}
|
|
|
|
|
|
// Test associative array support with unicode separator
|
|
@safe unittest
|
|
{
|
|
string str = "1❁2❁3\n34❁65❁63\n34❁65❁63";
|
|
|
|
auto records = csvReader!(string[string])(str,["3","1"],'❁');
|
|
int count;
|
|
foreach (record; records)
|
|
{
|
|
count++;
|
|
assert(record["1"] == "34");
|
|
assert(record["3"] == "63");
|
|
}
|
|
assert(count == 2);
|
|
}
|
|
|
|
// Test restricted range
|
|
@safe unittest
|
|
{
|
|
import std.typecons;
|
|
struct InputRange
|
|
{
|
|
dstring text;
|
|
|
|
this(dstring txt)
|
|
{
|
|
text = txt;
|
|
}
|
|
|
|
@property auto empty()
|
|
{
|
|
return text.empty;
|
|
}
|
|
|
|
void popFront()
|
|
{
|
|
text.popFront();
|
|
}
|
|
|
|
@property dchar front()
|
|
{
|
|
return text[0];
|
|
}
|
|
}
|
|
auto ir = InputRange("Name,Occupation,Salary\r"d~
|
|
"Joe,Carpenter,300000\nFred,Blacksmith,400000\r\n"d);
|
|
|
|
foreach (record; csvReader(ir, cast(string[]) null))
|
|
foreach (cell; record) {}
|
|
foreach (record; csvReader!(Tuple!(string, string, int))
|
|
(ir,cast(string[]) null)) {}
|
|
foreach (record; csvReader!(string[string])
|
|
(ir,cast(string[]) null)) {}
|
|
}
|
|
|
|
@safe unittest // const/immutable dchars
|
|
{
|
|
import std.algorithm.iteration : map;
|
|
import std.array : array;
|
|
const(dchar)[] c = "foo,bar\n";
|
|
assert(csvReader(c).map!array.array == [["foo", "bar"]]);
|
|
immutable(dchar)[] i = "foo,bar\n";
|
|
assert(csvReader(i).map!array.array == [["foo", "bar"]]);
|
|
}
|
|
|
|
/*
|
|
* This struct is stored on the heap for when the structures
|
|
* are passed around.
|
|
*/
|
|
private pure struct Input(Range, Malformed ErrorLevel)
|
|
{
|
|
Range range;
|
|
size_t row, col;
|
|
static if (ErrorLevel == Malformed.throwException)
|
|
size_t rowLength;
|
|
}
|
|
|
|
/*
|
|
* Range for iterating CSV records.
|
|
*
|
|
* This range is returned by the $(LREF csvReader) functions. It can be
|
|
* created in a similar manner to allow `ErrorLevel` be set to $(LREF
|
|
* Malformed).ignore if best guess processing should take place.
|
|
*/
|
|
private struct CsvReader(Contents, Malformed ErrorLevel, Range, Separator, Header)
|
|
if (isSomeChar!Separator && isInputRange!Range
|
|
&& is(immutable ElementType!Range == immutable dchar)
|
|
&& isForwardRange!Header && isSomeString!(ElementType!Header))
|
|
{
|
|
private:
|
|
Input!(Range, ErrorLevel)* _input;
|
|
Separator _separator;
|
|
Separator _quote;
|
|
size_t[] indices;
|
|
bool _empty;
|
|
bool _allowInconsistentDelimiterCount;
|
|
static if (is(Contents == struct) || is(Contents == class))
|
|
{
|
|
Contents recordContent;
|
|
CsvRecord!(string, ErrorLevel, Range, Separator) recordRange;
|
|
}
|
|
else static if (is(Contents T : T[U], U : string))
|
|
{
|
|
Contents recordContent;
|
|
CsvRecord!(T, ErrorLevel, Range, Separator) recordRange;
|
|
}
|
|
else
|
|
CsvRecord!(Contents, ErrorLevel, Range, Separator) recordRange;
|
|
public:
|
|
/**
|
|
* Header from the input in array form.
|
|
*
|
|
* -------
|
|
* string str = "a,b,c\nHello,65,63.63";
|
|
* auto records = csvReader(str, ["a"]);
|
|
*
|
|
* assert(records.header == ["a","b","c"]);
|
|
* -------
|
|
*/
|
|
string[] header;
|
|
|
|
/**
|
|
* Constructor to initialize the input, delimiter and quote for input
|
|
* without a header.
|
|
*
|
|
* -------
|
|
* string str = `76;^26^;22`;
|
|
* int[] ans = [76,26,22];
|
|
* auto records = CsvReader!(int,Malformed.ignore,string,char,string[])
|
|
* (str, ';', '^');
|
|
*
|
|
* foreach (record; records)
|
|
* {
|
|
* assert(equal(record, ans));
|
|
* }
|
|
* -------
|
|
*/
|
|
this(Range input, Separator delimiter, Separator quote,
|
|
bool allowInconsistentDelimiterCount)
|
|
{
|
|
_input = new Input!(Range, ErrorLevel)(input);
|
|
_separator = delimiter;
|
|
_quote = quote;
|
|
_allowInconsistentDelimiterCount = allowInconsistentDelimiterCount;
|
|
|
|
if (_input.range.empty)
|
|
{
|
|
_empty = true;
|
|
return;
|
|
}
|
|
|
|
prime();
|
|
}
|
|
|
|
/**
|
|
* Constructor to initialize the input, delimiter and quote for input
|
|
* with a header.
|
|
*
|
|
* -------
|
|
* string str = `high;mean;low\n76;^26^;22`;
|
|
* auto records = CsvReader!(int,Malformed.ignore,string,char,string[])
|
|
* (str, ["high","low"], ';', '^');
|
|
*
|
|
* int[] ans = [76,22];
|
|
* foreach (record; records)
|
|
* {
|
|
* assert(equal(record, ans));
|
|
* }
|
|
* -------
|
|
*
|
|
* Throws:
|
|
* $(LREF HeaderMismatchException) when a header is provided but a
|
|
* matching column is not found or the order did not match that found
|
|
* in the input (non-struct).
|
|
*/
|
|
this(Range input, Header colHeaders, Separator delimiter, Separator quote,
|
|
bool allowInconsistentDelimiterCount)
|
|
{
|
|
_input = new Input!(Range, ErrorLevel)(input);
|
|
_separator = delimiter;
|
|
_quote = quote;
|
|
_allowInconsistentDelimiterCount = allowInconsistentDelimiterCount;
|
|
|
|
if (_input.range.empty)
|
|
{
|
|
_empty = true;
|
|
return;
|
|
}
|
|
|
|
size_t[string] colToIndex;
|
|
foreach (h; colHeaders)
|
|
{
|
|
colToIndex[h] = size_t.max;
|
|
}
|
|
|
|
auto r = CsvRecord!(string, ErrorLevel, Range, Separator)
|
|
(_input, _separator, _quote, indices,
|
|
_allowInconsistentDelimiterCount);
|
|
|
|
size_t colIndex;
|
|
foreach (col; r)
|
|
{
|
|
header ~= col;
|
|
auto ptr = col in colToIndex;
|
|
if (ptr)
|
|
*ptr = colIndex;
|
|
colIndex++;
|
|
}
|
|
// The above loop empties the header row.
|
|
recordRange._empty = true;
|
|
recordRange._allowInconsistentDelimiterCount =
|
|
allowInconsistentDelimiterCount;
|
|
|
|
indices.length = colToIndex.length;
|
|
int i;
|
|
foreach (h; colHeaders)
|
|
{
|
|
immutable index = colToIndex[h];
|
|
static if (ErrorLevel != Malformed.ignore)
|
|
if (index == size_t.max)
|
|
throw new HeaderMismatchException
|
|
("Header not found: " ~ to!string(h));
|
|
indices[i++] = index;
|
|
}
|
|
|
|
static if (!is(Contents == struct) && !is(Contents == class))
|
|
{
|
|
static if (is(Contents T : T[U], U : string))
|
|
{
|
|
import std.algorithm.sorting : sort;
|
|
sort(indices);
|
|
}
|
|
else static if (ErrorLevel == Malformed.ignore)
|
|
{
|
|
import std.algorithm.sorting : sort;
|
|
sort(indices);
|
|
}
|
|
else
|
|
{
|
|
import std.algorithm.searching : findAdjacent;
|
|
import std.algorithm.sorting : isSorted;
|
|
if (!isSorted(indices))
|
|
{
|
|
auto ex = new HeaderMismatchException
|
|
("Header in input does not match specified header.");
|
|
findAdjacent!"a > b"(indices);
|
|
ex.row = 1;
|
|
ex.col = indices.front;
|
|
|
|
throw ex;
|
|
}
|
|
}
|
|
}
|
|
|
|
popFront();
|
|
}
|
|
|
|
/**
|
|
* Part of an input range as defined by
|
|
* $(REF isInputRange, std,range,primitives).
|
|
*
|
|
* Returns:
|
|
* If `Contents` is a struct, will be filled with record data.
|
|
*
|
|
* If `Contents` is a class, will be filled with record data.
|
|
*
|
|
* If `Contents` is a associative array, will be filled
|
|
* with record data.
|
|
*
|
|
* If `Contents` is non-struct, a $(LREF CsvRecord) will be
|
|
* returned.
|
|
*/
|
|
@property auto front()
|
|
{
|
|
assert(!empty, "Attempting to fetch the front of an empty CsvReader");
|
|
static if (is(Contents == struct) || is(Contents == class))
|
|
{
|
|
return recordContent;
|
|
}
|
|
else static if (is(Contents T : T[U], U : string))
|
|
{
|
|
return recordContent;
|
|
}
|
|
else
|
|
{
|
|
return recordRange;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Part of an input range as defined by
|
|
* $(REF isInputRange, std,range,primitives).
|
|
*/
|
|
@property bool empty() @safe @nogc pure nothrow const
|
|
{
|
|
return _empty;
|
|
}
|
|
|
|
/**
|
|
* Part of an input range as defined by
|
|
* $(REF isInputRange, std,range,primitives).
|
|
*
|
|
* Throws:
|
|
* $(LREF CSVException) When a quote is found in an unquoted field,
|
|
* data continues after a closing quote, the quoted field was not
|
|
* closed before data was empty, a conversion failed, or when the
|
|
* row's length does not match the previous length.
|
|
*/
|
|
void popFront()
|
|
{
|
|
while (!recordRange.empty)
|
|
{
|
|
recordRange.popFront();
|
|
}
|
|
|
|
static if (ErrorLevel == Malformed.throwException)
|
|
if (_input.rowLength == 0)
|
|
_input.rowLength = _input.col;
|
|
|
|
_input.col = 0;
|
|
|
|
if (!_input.range.empty)
|
|
{
|
|
if (_input.range.front == '\r')
|
|
{
|
|
_input.range.popFront();
|
|
if (!_input.range.empty && _input.range.front == '\n')
|
|
_input.range.popFront();
|
|
}
|
|
else if (_input.range.front == '\n')
|
|
_input.range.popFront();
|
|
}
|
|
|
|
if (_input.range.empty)
|
|
{
|
|
_empty = true;
|
|
return;
|
|
}
|
|
|
|
prime();
|
|
}
|
|
|
|
private void prime()
|
|
{
|
|
if (_empty)
|
|
return;
|
|
_input.row++;
|
|
static if (is(Contents == struct) || is(Contents == class))
|
|
{
|
|
recordRange = typeof(recordRange)
|
|
(_input, _separator, _quote, null,
|
|
_allowInconsistentDelimiterCount);
|
|
}
|
|
else
|
|
{
|
|
recordRange = typeof(recordRange)
|
|
(_input, _separator, _quote, indices,
|
|
_allowInconsistentDelimiterCount);
|
|
}
|
|
|
|
static if (is(Contents T : T[U], U : string))
|
|
{
|
|
T[U] aa;
|
|
try
|
|
{
|
|
for (; !recordRange.empty; recordRange.popFront())
|
|
{
|
|
const i = _input.col - 1;
|
|
if (i >= header.length)
|
|
throw new CSVException("row contains more values than header", _input.row, _input.col);
|
|
aa[header[i]] = recordRange.front;
|
|
}
|
|
}
|
|
catch (ConvException e)
|
|
{
|
|
throw new CSVException(e.msg, _input.row, _input.col, e);
|
|
}
|
|
|
|
recordContent = aa;
|
|
}
|
|
else static if (is(Contents == struct) || is(Contents == class))
|
|
{
|
|
static if (is(Contents == class))
|
|
recordContent = new typeof(recordContent)();
|
|
else
|
|
recordContent = typeof(recordContent).init;
|
|
size_t colIndex;
|
|
try
|
|
{
|
|
for (; !recordRange.empty;)
|
|
{
|
|
auto colData = recordRange.front;
|
|
scope(exit) colIndex++;
|
|
if (indices.length > 0)
|
|
{
|
|
foreach (ti, ToType; Fields!(Contents))
|
|
{
|
|
if (indices[ti] == colIndex)
|
|
{
|
|
static if (!isSomeString!ToType) skipWS(colData);
|
|
recordContent.tupleof[ti] = to!ToType(colData);
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
foreach (ti, ToType; Fields!(Contents))
|
|
{
|
|
if (ti == colIndex)
|
|
{
|
|
static if (!isSomeString!ToType) skipWS(colData);
|
|
recordContent.tupleof[ti] = to!ToType(colData);
|
|
}
|
|
}
|
|
}
|
|
recordRange.popFront();
|
|
}
|
|
}
|
|
catch (ConvException e)
|
|
{
|
|
throw new CSVException(e.msg, _input.row, colIndex, e);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
@safe pure unittest
|
|
{
|
|
import std.algorithm.comparison : equal;
|
|
|
|
string str = `76;^26^;22`;
|
|
int[] ans = [76,26,22];
|
|
auto records = CsvReader!(int,Malformed.ignore,string,char,string[])
|
|
(str, ';', '^', false);
|
|
|
|
foreach (record; records)
|
|
{
|
|
assert(equal(record, ans));
|
|
}
|
|
}
|
|
|
|
// https://issues.dlang.org/show_bug.cgi?id=15545
|
|
// @system due to the catch for Throwable
|
|
@system pure unittest
|
|
{
|
|
import std.exception : assertNotThrown;
|
|
enum failData =
|
|
"name, surname, age
|
|
Joe, Joker, 99\r";
|
|
auto r = csvReader(failData);
|
|
assertNotThrown((){foreach (entry; r){}}());
|
|
}
|
|
|
|
/*
|
|
* This input range is accessible through $(LREF CsvReader) when the
|
|
* requested `Contents` type is neither a structure or an associative array.
|
|
*/
|
|
private struct CsvRecord(Contents, Malformed ErrorLevel, Range, Separator)
|
|
if (!is(Contents == class) && !is(Contents == struct))
|
|
{
|
|
import std.array : appender;
|
|
private:
|
|
Input!(Range, ErrorLevel)* _input;
|
|
Separator _separator;
|
|
Separator _quote;
|
|
Contents curContentsoken;
|
|
typeof(appender!(dchar[])()) _front;
|
|
bool _empty;
|
|
bool _allowInconsistentDelimiterCount;
|
|
size_t[] _popCount;
|
|
public:
|
|
/*
|
|
* Params:
|
|
* input = Pointer to a character $(REF_ALTTEXT input range, isInputRange, std,range,primitives)
|
|
* delimiter = Separator for each column
|
|
* quote = Character used for quotation
|
|
* indices = An array containing which columns will be returned.
|
|
* If empty, all columns are returned. List must be in order.
|
|
*/
|
|
this(Input!(Range, ErrorLevel)* input, Separator delimiter,
|
|
Separator quote, size_t[] indices,
|
|
bool allowInconsistentDelimiterCount)
|
|
{
|
|
_input = input;
|
|
_separator = delimiter;
|
|
_quote = quote;
|
|
|
|
_front = appender!(dchar[])();
|
|
_popCount = indices.dup;
|
|
_allowInconsistentDelimiterCount = allowInconsistentDelimiterCount;
|
|
|
|
// If a header was given, each call to popFront will need
|
|
// to eliminate so many tokens. This calculates
|
|
// how many will be skipped to get to the next header column
|
|
size_t normalizer;
|
|
foreach (ref c; _popCount)
|
|
{
|
|
static if (ErrorLevel == Malformed.ignore)
|
|
{
|
|
// If we are not throwing exceptions
|
|
// a header may not exist, indices are sorted
|
|
// and will be size_t.max if not found.
|
|
if (c == size_t.max)
|
|
break;
|
|
}
|
|
c -= normalizer;
|
|
normalizer += c + 1;
|
|
}
|
|
|
|
prime();
|
|
}
|
|
|
|
/**
|
|
* Part of an input range as defined by
|
|
* $(REF isInputRange, std,range,primitives).
|
|
*/
|
|
@property Contents front() @safe pure
|
|
{
|
|
assert(!empty, "Attempting to fetch the front of an empty CsvRecord");
|
|
return curContentsoken;
|
|
}
|
|
|
|
/**
|
|
* Part of an input range as defined by
|
|
* $(REF isInputRange, std,range,primitives).
|
|
*/
|
|
@property bool empty() @safe pure nothrow @nogc const
|
|
{
|
|
return _empty;
|
|
}
|
|
|
|
/*
|
|
* CsvRecord is complete when input
|
|
* is empty or starts with record break
|
|
*/
|
|
private bool recordEnd()
|
|
{
|
|
if (_input.range.empty
|
|
|| _input.range.front == '\n'
|
|
|| _input.range.front == '\r')
|
|
{
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
|
|
/**
|
|
* Part of an input range as defined by
|
|
* $(REF isInputRange, std,range,primitives).
|
|
*
|
|
* Throws:
|
|
* $(LREF CSVException) When a quote is found in an unquoted field,
|
|
* data continues after a closing quote, the quoted field was not
|
|
* closed before data was empty, a conversion failed, or when the
|
|
* row's length does not match the previous length.
|
|
*/
|
|
void popFront()
|
|
{
|
|
static if (ErrorLevel == Malformed.throwException)
|
|
import std.format : format;
|
|
// Skip last of record when header is depleted.
|
|
if (_popCount.ptr && _popCount.empty)
|
|
while (!recordEnd())
|
|
{
|
|
prime(1);
|
|
}
|
|
|
|
if (recordEnd())
|
|
{
|
|
_empty = true;
|
|
static if (ErrorLevel == Malformed.throwException)
|
|
{
|
|
if (_input.rowLength != 0 && _input.col != _input.rowLength
|
|
&& !_allowInconsistentDelimiterCount)
|
|
{
|
|
throw new CSVException(
|
|
format("Row %s's length %s does not match "~
|
|
"previous length of %s.", _input.row,
|
|
_input.col, _input.rowLength));
|
|
}
|
|
}
|
|
return;
|
|
}
|
|
else
|
|
{
|
|
static if (ErrorLevel == Malformed.throwException)
|
|
{
|
|
if (_input.rowLength != 0 && _input.col > _input.rowLength)
|
|
{
|
|
if (!_allowInconsistentDelimiterCount)
|
|
{
|
|
throw new CSVException(
|
|
format("Row %s's length %s does not match "~
|
|
"previous length of %s.", _input.row,
|
|
_input.col, _input.rowLength));
|
|
}
|
|
else
|
|
{
|
|
_empty = true;
|
|
return;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Separator is left on the end of input from the last call.
|
|
// This cannot be moved to after the call to csvNextToken as
|
|
// there may be an empty record after it.
|
|
if (_input.range.front == _separator)
|
|
_input.range.popFront();
|
|
|
|
_front.shrinkTo(0);
|
|
|
|
prime();
|
|
}
|
|
|
|
/*
|
|
* Handles moving to the next skipNum token.
|
|
*/
|
|
private void prime(size_t skipNum)
|
|
{
|
|
foreach (i; 0 .. skipNum)
|
|
{
|
|
_input.col++;
|
|
_front.shrinkTo(0);
|
|
if (_input.range.front == _separator)
|
|
_input.range.popFront();
|
|
|
|
try
|
|
csvNextToken!(Range, ErrorLevel, Separator)
|
|
(_input.range, _front, _separator, _quote,false);
|
|
catch (IncompleteCellException ice)
|
|
{
|
|
ice.row = _input.row;
|
|
ice.col = _input.col;
|
|
ice.partialData = _front.data.idup;
|
|
throw ice;
|
|
}
|
|
catch (ConvException e)
|
|
{
|
|
throw new CSVException(e.msg, _input.row, _input.col, e);
|
|
}
|
|
}
|
|
}
|
|
|
|
private void prime()
|
|
{
|
|
try
|
|
{
|
|
_input.col++;
|
|
csvNextToken!(Range, ErrorLevel, Separator)
|
|
(_input.range, _front, _separator, _quote,false);
|
|
}
|
|
catch (IncompleteCellException ice)
|
|
{
|
|
ice.row = _input.row;
|
|
ice.col = _input.col;
|
|
ice.partialData = _front.data.idup;
|
|
throw ice;
|
|
}
|
|
|
|
auto skipNum = _popCount.empty ? 0 : _popCount.front;
|
|
if (!_popCount.empty)
|
|
_popCount.popFront();
|
|
|
|
if (skipNum == size_t.max)
|
|
{
|
|
while (!recordEnd())
|
|
prime(1);
|
|
_empty = true;
|
|
return;
|
|
}
|
|
|
|
if (skipNum)
|
|
prime(skipNum);
|
|
|
|
auto data = _front.data;
|
|
static if (!isSomeString!Contents) skipWS(data);
|
|
try curContentsoken = to!Contents(data);
|
|
catch (ConvException e)
|
|
{
|
|
throw new CSVException(e.msg, _input.row, _input.col, e);
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Lower level control over parsing CSV
|
|
*
|
|
* This function consumes the input. After each call the input will
|
|
* start with either a delimiter or record break (\n, \r\n, \r) which
|
|
* must be removed for subsequent calls.
|
|
*
|
|
* Params:
|
|
* input = Any CSV input
|
|
* ans = The first field in the input
|
|
* sep = The character to represent a comma in the specification
|
|
* quote = The character to represent a quote in the specification
|
|
* startQuoted = Whether the input should be considered to already be in
|
|
* quotes
|
|
*
|
|
* Throws:
|
|
* $(LREF IncompleteCellException) When a quote is found in an unquoted
|
|
* field, data continues after a closing quote, or the quoted field was
|
|
* not closed before data was empty.
|
|
*/
|
|
void csvNextToken(Range, Malformed ErrorLevel = Malformed.throwException,
|
|
Separator, Output)
|
|
(ref Range input, ref Output ans,
|
|
Separator sep, Separator quote,
|
|
bool startQuoted = false)
|
|
if (isSomeChar!Separator && isInputRange!Range
|
|
&& is(immutable ElementType!Range == immutable dchar)
|
|
&& isOutputRange!(Output, dchar))
|
|
{
|
|
bool quoted = startQuoted;
|
|
bool escQuote;
|
|
if (input.empty)
|
|
return;
|
|
|
|
if (input.front == '\n')
|
|
return;
|
|
if (input.front == '\r')
|
|
return;
|
|
|
|
if (input.front == quote)
|
|
{
|
|
quoted = true;
|
|
input.popFront();
|
|
}
|
|
|
|
while (!input.empty)
|
|
{
|
|
assert(!(quoted && escQuote),
|
|
"Invalid quotation state in csvNextToken");
|
|
if (!quoted)
|
|
{
|
|
// When not quoted the token ends at sep
|
|
if (input.front == sep)
|
|
break;
|
|
if (input.front == '\r')
|
|
break;
|
|
if (input.front == '\n')
|
|
break;
|
|
}
|
|
if (!quoted && !escQuote)
|
|
{
|
|
if (input.front == quote)
|
|
{
|
|
// Not quoted, but quote found
|
|
static if (ErrorLevel == Malformed.throwException)
|
|
throw new IncompleteCellException(
|
|
"Quote located in unquoted token");
|
|
else static if (ErrorLevel == Malformed.ignore)
|
|
ans.put(quote);
|
|
}
|
|
else
|
|
{
|
|
// Not quoted, non-quote character
|
|
ans.put(input.front);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
if (input.front == quote)
|
|
{
|
|
// Quoted, quote found
|
|
// By turning off quoted and turning on escQuote
|
|
// I can tell when to add a quote to the string
|
|
// escQuote is turned to false when it escapes a
|
|
// quote or is followed by a non-quote (see outside else).
|
|
// They are mutually exclusive, but provide different
|
|
// information.
|
|
if (escQuote)
|
|
{
|
|
escQuote = false;
|
|
quoted = true;
|
|
ans.put(quote);
|
|
} else
|
|
{
|
|
escQuote = true;
|
|
quoted = false;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
// Quoted, non-quote character
|
|
if (escQuote)
|
|
{
|
|
static if (ErrorLevel == Malformed.throwException)
|
|
throw new IncompleteCellException(
|
|
"Content continues after end quote, " ~
|
|
"or needs to be escaped.");
|
|
else static if (ErrorLevel == Malformed.ignore)
|
|
break;
|
|
}
|
|
ans.put(input.front);
|
|
}
|
|
}
|
|
input.popFront();
|
|
}
|
|
|
|
static if (ErrorLevel == Malformed.throwException)
|
|
if (quoted && (input.empty || input.front == '\n' || input.front == '\r'))
|
|
throw new IncompleteCellException(
|
|
"Data continues on future lines or trailing quote");
|
|
|
|
}
|
|
|
|
///
|
|
@safe unittest
|
|
{
|
|
import std.array : appender;
|
|
import std.range.primitives : popFront;
|
|
|
|
string str = "65,63\n123,3673";
|
|
|
|
auto a = appender!(char[])();
|
|
|
|
csvNextToken(str,a,',','"');
|
|
assert(a.data == "65");
|
|
assert(str == ",63\n123,3673");
|
|
|
|
str.popFront();
|
|
a.shrinkTo(0);
|
|
csvNextToken(str,a,',','"');
|
|
assert(a.data == "63");
|
|
assert(str == "\n123,3673");
|
|
|
|
str.popFront();
|
|
a.shrinkTo(0);
|
|
csvNextToken(str,a,',','"');
|
|
assert(a.data == "123");
|
|
assert(str == ",3673");
|
|
}
|
|
|
|
// Test csvNextToken on simplest form and correct format.
|
|
@safe pure unittest
|
|
{
|
|
import std.array;
|
|
|
|
string str = "\U00010143Hello,65,63.63\nWorld,123,3673.562";
|
|
|
|
auto a = appender!(dchar[])();
|
|
csvNextToken!string(str,a,',','"');
|
|
assert(a.data == "\U00010143Hello");
|
|
assert(str == ",65,63.63\nWorld,123,3673.562");
|
|
|
|
str.popFront();
|
|
a.shrinkTo(0);
|
|
csvNextToken(str,a,',','"');
|
|
assert(a.data == "65");
|
|
assert(str == ",63.63\nWorld,123,3673.562");
|
|
|
|
str.popFront();
|
|
a.shrinkTo(0);
|
|
csvNextToken(str,a,',','"');
|
|
assert(a.data == "63.63");
|
|
assert(str == "\nWorld,123,3673.562");
|
|
|
|
str.popFront();
|
|
a.shrinkTo(0);
|
|
csvNextToken(str,a,',','"');
|
|
assert(a.data == "World");
|
|
assert(str == ",123,3673.562");
|
|
|
|
str.popFront();
|
|
a.shrinkTo(0);
|
|
csvNextToken(str,a,',','"');
|
|
assert(a.data == "123");
|
|
assert(str == ",3673.562");
|
|
|
|
str.popFront();
|
|
a.shrinkTo(0);
|
|
csvNextToken(str,a,',','"');
|
|
assert(a.data == "3673.562");
|
|
assert(str == "");
|
|
}
|
|
|
|
// Test quoted tokens
|
|
@safe pure unittest
|
|
{
|
|
import std.array;
|
|
|
|
string str = `one,two,"three ""quoted""","",` ~ "\"five\nnew line\"\nsix";
|
|
|
|
auto a = appender!(dchar[])();
|
|
csvNextToken!string(str,a,',','"');
|
|
assert(a.data == "one");
|
|
assert(str == `,two,"three ""quoted""","",` ~ "\"five\nnew line\"\nsix");
|
|
|
|
str.popFront();
|
|
a.shrinkTo(0);
|
|
csvNextToken(str,a,',','"');
|
|
assert(a.data == "two");
|
|
assert(str == `,"three ""quoted""","",` ~ "\"five\nnew line\"\nsix");
|
|
|
|
str.popFront();
|
|
a.shrinkTo(0);
|
|
csvNextToken(str,a,',','"');
|
|
assert(a.data == "three \"quoted\"");
|
|
assert(str == `,"",` ~ "\"five\nnew line\"\nsix");
|
|
|
|
str.popFront();
|
|
a.shrinkTo(0);
|
|
csvNextToken(str,a,',','"');
|
|
assert(a.data == "");
|
|
assert(str == ",\"five\nnew line\"\nsix");
|
|
|
|
str.popFront();
|
|
a.shrinkTo(0);
|
|
csvNextToken(str,a,',','"');
|
|
assert(a.data == "five\nnew line");
|
|
assert(str == "\nsix");
|
|
|
|
str.popFront();
|
|
a.shrinkTo(0);
|
|
csvNextToken(str,a,',','"');
|
|
assert(a.data == "six");
|
|
assert(str == "");
|
|
}
|
|
|
|
// Test empty data is pulled at end of record.
|
|
@safe pure unittest
|
|
{
|
|
import std.array;
|
|
|
|
string str = "one,";
|
|
auto a = appender!(dchar[])();
|
|
csvNextToken(str,a,',','"');
|
|
assert(a.data == "one");
|
|
assert(str == ",");
|
|
|
|
a.shrinkTo(0);
|
|
csvNextToken(str,a,',','"');
|
|
assert(a.data == "");
|
|
}
|
|
|
|
// Test exceptions
|
|
@safe pure unittest
|
|
{
|
|
import std.array;
|
|
|
|
string str = "\"one\nnew line";
|
|
|
|
typeof(appender!(dchar[])()) a;
|
|
try
|
|
{
|
|
a = appender!(dchar[])();
|
|
csvNextToken(str,a,',','"');
|
|
assert(0);
|
|
}
|
|
catch (IncompleteCellException ice)
|
|
{
|
|
assert(a.data == "one\nnew line");
|
|
assert(str == "");
|
|
}
|
|
|
|
str = "Hello world\"";
|
|
|
|
try
|
|
{
|
|
a = appender!(dchar[])();
|
|
csvNextToken(str,a,',','"');
|
|
assert(0);
|
|
}
|
|
catch (IncompleteCellException ice)
|
|
{
|
|
assert(a.data == "Hello world");
|
|
assert(str == "\"");
|
|
}
|
|
|
|
str = "one, two \"quoted\" end";
|
|
|
|
a = appender!(dchar[])();
|
|
csvNextToken!(string,Malformed.ignore)(str,a,',','"');
|
|
assert(a.data == "one");
|
|
str.popFront();
|
|
a.shrinkTo(0);
|
|
csvNextToken!(string,Malformed.ignore)(str,a,',','"');
|
|
assert(a.data == " two \"quoted\" end");
|
|
}
|
|
|
|
// Test modifying token delimiter
|
|
@safe pure unittest
|
|
{
|
|
import std.array;
|
|
|
|
string str = `one|two|/three "quoted"/|//`;
|
|
|
|
auto a = appender!(dchar[])();
|
|
csvNextToken(str,a, '|','/');
|
|
assert(a.data == "one"d);
|
|
assert(str == `|two|/three "quoted"/|//`);
|
|
|
|
str.popFront();
|
|
a.shrinkTo(0);
|
|
csvNextToken(str,a, '|','/');
|
|
assert(a.data == "two"d);
|
|
assert(str == `|/three "quoted"/|//`);
|
|
|
|
str.popFront();
|
|
a.shrinkTo(0);
|
|
csvNextToken(str,a, '|','/');
|
|
assert(a.data == `three "quoted"`);
|
|
assert(str == `|//`);
|
|
|
|
str.popFront();
|
|
a.shrinkTo(0);
|
|
csvNextToken(str,a, '|','/');
|
|
assert(a.data == ""d);
|
|
}
|
|
|
|
// https://issues.dlang.org/show_bug.cgi?id=8908
|
|
@safe pure unittest
|
|
{
|
|
string csv = ` 1.0, 2.0, 3.0
|
|
4.0, 5.0, 6.0`;
|
|
|
|
static struct Data { real a, b, c; }
|
|
size_t i = 0;
|
|
foreach (data; csvReader!Data(csv)) with (data)
|
|
{
|
|
int[] row = [cast(int) a, cast(int) b, cast(int) c];
|
|
if (i == 0)
|
|
assert(row == [1, 2, 3]);
|
|
else
|
|
assert(row == [4, 5, 6]);
|
|
++i;
|
|
}
|
|
|
|
i = 0;
|
|
foreach (data; csvReader!real(csv))
|
|
{
|
|
auto a = data.front; data.popFront();
|
|
auto b = data.front; data.popFront();
|
|
auto c = data.front;
|
|
int[] row = [cast(int) a, cast(int) b, cast(int) c];
|
|
if (i == 0)
|
|
assert(row == [1, 2, 3]);
|
|
else
|
|
assert(row == [4, 5, 6]);
|
|
++i;
|
|
}
|
|
}
|
|
|
|
// https://issues.dlang.org/show_bug.cgi?id=21629
|
|
@safe pure unittest
|
|
{
|
|
import std.typecons : Tuple;
|
|
struct Reccord
|
|
{
|
|
string a;
|
|
string b;
|
|
}
|
|
|
|
auto header = ["a" ,"b"];
|
|
string input = "";
|
|
assert(csvReader!Reccord(input).empty, "This should be empty");
|
|
assert(csvReader!Reccord(input, header).empty, "This should be empty");
|
|
assert(csvReader!(Tuple!(string,string))(input).empty, "This should be empty");
|
|
assert(csvReader!(string[string])(input, header).empty, "This should be empty");
|
|
assert(csvReader!(string[string])(input, null).empty, "This should be empty");
|
|
assert(csvReader!(int)(input, null).empty, "This should be empty");
|
|
}
|