mirror of
https://github.com/dlang/phobos.git
synced 2025-04-29 14:40:30 +03:00
1340 lines
35 KiB
D
1340 lines
35 KiB
D
//Written in the D programming language
|
|
|
|
/**
|
|
* Implements functionality to read Comma Separated Values and its variants
|
|
* from a input range.
|
|
*
|
|
* Comma Separated Values provide a simple means to transfer and store
|
|
* tabular data. It has been common for programs to use their own
|
|
* variant of the CSV format. This parser will loosely follow the
|
|
* $(WEB tools.ietf.org/html/rfc4180, RFC-4180). CSV input should adhered
|
|
* to the following criteria, differences from RFC-4180 in parentheses.
|
|
*
|
|
* $(UL
|
|
* $(LI A record is separated by a new line (CRLF,LF,CR))
|
|
* $(LI A final record may end with a new line)
|
|
* $(LI A header may be provided as the first record in input)
|
|
* $(LI A record has fields separated by a comma (customizable))
|
|
* $(LI A field containing new lines, commas, or double quotes
|
|
* should be enclosed in double quotes (customizable))
|
|
* $(LI Double quotes in a field are escaped with a double quote)
|
|
* $(LI Each record should contain the same number of fields (not enforced))
|
|
* )
|
|
*
|
|
* Where any input range of characters is accepted for recieving input.
|
|
*
|
|
* Example:
|
|
*
|
|
* -------
|
|
* import std.algorithm;
|
|
* import std.array;
|
|
* import std.csv;
|
|
* import std.stdio;
|
|
* import std.typecons;
|
|
*
|
|
* void main()
|
|
* {
|
|
* auto text = "Joe,Carpenter,300000\nFred,Blacksmith,400000\r\n";
|
|
*
|
|
* foreach(record; csvReader!(Tuple!(string,string,int))(text))
|
|
* {
|
|
* writefln("%s works as a %s and earns $%d per year",
|
|
* record[0], record[1], record[2]);
|
|
* }
|
|
* }
|
|
* -------
|
|
*
|
|
* When a input contains a heading the Contents can be specified as an
|
|
* associative array. Passing null to signafy that a heading is pressent.
|
|
*
|
|
* -------
|
|
* auto text = "Name,Occupation,Salary\r"
|
|
* "Joe,Carpenter,300000\nFred,Blacksmith,400000\r\n";
|
|
*
|
|
* foreach(record; csvReader!(string[string])
|
|
* (text,cast(string[])null))
|
|
* {
|
|
* writefln("%s works as a %s and earns $%s per year.",
|
|
* record["Name"], record["Occupation"],
|
|
* record["Salary"]);
|
|
* }
|
|
* -------
|
|
*
|
|
* This module allows content to be iterated by record stored in a struct
|
|
* or into a range of fields. Upon detection of an error an
|
|
* IncompleteCellException is thrown (can be disabled). csvNextToken has been
|
|
* made public to allow for attempted recovery.
|
|
*
|
|
* Disabling exceptions will lift many restrictions specified above. A quote
|
|
* can appear in a field if the field was not quoted. If in a quoted field any
|
|
* quote by itself, not at the end of a field, will end processing for that
|
|
* field. The field is ended when there is no input, even if the quote was not
|
|
* closed.
|
|
*
|
|
* See_Also:
|
|
* $(WEB en.wikipedia.org/wiki/Comma-separated_values, Wikipedia
|
|
* Comma-separated values)
|
|
*
|
|
* Copyright: Copyright 2011
|
|
* License: $(WEB www.boost.org/LICENSE_1_0.txt, Boost License 1.0).
|
|
* Authors: Jesse Phillips
|
|
* Source: $(PHOBOSSRC std/_csv.d)
|
|
*/
|
|
module std.csv;
|
|
|
|
import std.algorithm;
|
|
import std.array;
|
|
import std.conv;
|
|
import std.exception;
|
|
import std.range;
|
|
import std.traits;
|
|
|
|
/**
|
|
* Exception containing the row and column for when an Exception was thrown.
|
|
*
|
|
* This Exception will have one of the following as part of its next property.
|
|
*
|
|
* $(UL
|
|
* $(LI IncompleteCellException)
|
|
* $(LI ConvException)
|
|
* )
|
|
*/
|
|
class CSVException : Exception {
|
|
///
|
|
size_t row, col;
|
|
this(size_t row, size_t col, Exception e) {
|
|
super("(Row: " ~ to!string(row) ~
|
|
", Col: " ~ to!string(col) ~ ") CSV Parse Failure", e);
|
|
this.row = row;
|
|
this.col = col;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Exception thrown when a Token is identified to not be completed: a quote is
|
|
* found in an unquoted field, data continues after a closing quote, or the
|
|
* quoted field was not closed before data was empty.
|
|
*
|
|
* This Exception will be part of CSVException unless using $(LREF
|
|
* csvNextToken) directly.
|
|
*/
|
|
class IncompleteCellException : Exception
|
|
{
|
|
/// Data pulled from input before finding a problem
|
|
string partialData;
|
|
this(string cellPartial, string msg)
|
|
{
|
|
super(msg);
|
|
partialData = cellPartial;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Exception thrown when a heading is provided but a matching column is not
|
|
* found or the order did not match that found in the input (non-struct).
|
|
*/
|
|
class HeadingMismatchException : Exception
|
|
{
|
|
this(string msg)
|
|
{
|
|
super(msg);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Determines the behavior for when an error is detected.
|
|
*
|
|
* Disabling exception will follow this rules:
|
|
* $(UL
|
|
* $(LI A quote can appear in a field if the field was not quoted.)
|
|
* $(LI If in a quoted field any quote by itself, not at the end of a
|
|
* field, will end processing for that field.)
|
|
* $(LI The field is ended when there is no input, even if the quote was
|
|
* not closed.)
|
|
* $(LI If the given header does not match the order in the input, the
|
|
* content will return as it is found in the input.)
|
|
* $(LI If the given header contains columns not found in the input they
|
|
* will be ignored.)
|
|
* )
|
|
*
|
|
*/
|
|
enum Malformed
|
|
{
|
|
/// No exceptions are thrown due to incorrect CSV.
|
|
ignore,
|
|
/// Use exceptions when input is incorrect CSV.
|
|
throwException
|
|
}
|
|
|
|
/**
|
|
* Builds a $(LREF Records) struct for iterating over records found in $(D
|
|
* input).
|
|
*
|
|
* This function simplifies the process for standard text input.
|
|
* For other input, delimited by colon, create Records yourself.
|
|
*
|
|
* The $(D Contents) of the input can be provided if all the records are the
|
|
* same type such as all integer data:
|
|
*
|
|
* -------
|
|
* string str = `76,26,22`;
|
|
* int[] ans = [76,26,22];
|
|
* auto records = csvReader!int(str);
|
|
*
|
|
* foreach(record; records) {
|
|
* assert(equal(record, ans));
|
|
* }
|
|
* -------
|
|
*
|
|
* Example using a struct with modified delimiter:
|
|
*
|
|
* -------
|
|
* string str = "Hello;65;63.63\nWorld;123;3673.562";
|
|
* struct Layout {
|
|
* string name;
|
|
* int value;
|
|
* double other;
|
|
* }
|
|
*
|
|
* auto records = csvReader!Layout(str,';');
|
|
*
|
|
* foreach(record; records) {
|
|
* writeln(record.name);
|
|
* writeln(record.value);
|
|
* writeln(record.other);
|
|
* }
|
|
* -------
|
|
*
|
|
* An optional $(D heading) can be provided. The first record will be read in
|
|
* as the heading. If $(D Contents) is a struct then the heading provided is
|
|
* expected to correspond to the fields in the struct. When $(D Contents) is
|
|
* non-struct the $(D heading) must be provided in the same order as the input
|
|
* or an exception is thrown.
|
|
*
|
|
* Read only column "b":
|
|
*
|
|
* -------
|
|
* string str = "a,b,c\nHello,65,63.63\nWorld,123,3673.562";
|
|
* auto records = csvReader(str, ["b"]);
|
|
*
|
|
* auto ans = [["65"],["123"]];
|
|
* foreach(record; records) {
|
|
* assert(equal(record, ans.front));
|
|
* ans.popFront();
|
|
* }
|
|
* -------
|
|
*
|
|
* Read from heading of different order:
|
|
*
|
|
* -------
|
|
* string str = "a,b,c\nHello,65,63.63\nWorld,123,3673.562";
|
|
* struct Layout
|
|
* {
|
|
* int value;
|
|
* double other;
|
|
* string name;
|
|
* }
|
|
*
|
|
* auto records = csvReader!Layout(str, ["b","c","a"]);
|
|
* -------
|
|
*
|
|
* The header can also be left empty if the input contains a header but
|
|
* all columns should be iterated. The heading from the input can always
|
|
* be accessed from the heading field.
|
|
*
|
|
* -------
|
|
* string str = "a,b,c\nHello,65,63.63\nWorld,123,3673.562";
|
|
* auto records = csvReader(str, cast(string[])null);
|
|
*
|
|
* assert(records.heading == ["a","b","c"]);
|
|
* -------
|
|
*
|
|
* $(LINK2 http://d.puremagic.com/issues/show_bug.cgi?id=2394, IFTI fails for
|
|
* nulls) prevents just sending null or [] as a header.
|
|
*
|
|
* Returns:
|
|
* $(LREF Records) struct which provides a $(XREF range, isInputRange) of
|
|
* each record.
|
|
*
|
|
* Throws:
|
|
* $(LREF CSVException) When a quote is found in an unquoted field,
|
|
* data continues after a closing quote, the quoted field was not
|
|
* closed before data was empty, or a conversion failed.
|
|
*
|
|
* $(LREF HeadingMismatchException) when a heading is provided but a
|
|
* matching column is not found or the order did not match that found in
|
|
* the input (non-struct).
|
|
*/
|
|
auto csvReader(Contents = string, Range, Separator = char)(Range input,
|
|
Separator delimiter = ',', Separator quote = '"')
|
|
if(isInputRange!Range && isSomeChar!(ElementType!Range)
|
|
&& isSomeChar!(Separator) && !is(Contents == class)
|
|
&& !is(Contents T : T[U], U : string))
|
|
{
|
|
return Records!(Contents,Malformed.throwException,Range,
|
|
ElementType!Range,string[])
|
|
(input, delimiter, quote);
|
|
}
|
|
|
|
/// Ditto
|
|
auto csvReader(Contents = string, Range, Heading, Separator = char)
|
|
(Range input, Heading heading,
|
|
Separator delimiter = ',', Separator quote = '"')
|
|
if(isInputRange!Range && isSomeChar!(ElementType!Range)
|
|
&& isSomeChar!(Separator) && !is(Contents == class)
|
|
&& isForwardRange!Heading
|
|
&& isSomeString!(ElementType!Heading))
|
|
{
|
|
return Records!(Contents,Malformed.throwException,Range,
|
|
ElementType!Range,Heading)
|
|
(input, heading, delimiter, quote);
|
|
}
|
|
|
|
// Test standard iteration over input.
|
|
unittest
|
|
{
|
|
string str = `one,two,"three ""quoted""","",` ~ "\"five\nnew line\"\nsix";
|
|
auto records = csvReader(str);
|
|
|
|
int count;
|
|
foreach(record; records)
|
|
{
|
|
foreach(cell; record)
|
|
{
|
|
count++;
|
|
}
|
|
}
|
|
assert(count == 6);
|
|
}
|
|
|
|
// Test newline on last record
|
|
unittest
|
|
{
|
|
string str = "one,two\nthree,four\n";
|
|
auto records = csvReader(str);
|
|
records.popFront();
|
|
records.popFront();
|
|
assert(records.empty);
|
|
}
|
|
|
|
// Test structure conversion interface.
|
|
unittest {
|
|
string str = "Hello,65,63.63\nWorld,123,3673.562";
|
|
struct Layout
|
|
{
|
|
string name;
|
|
int value;
|
|
double other;
|
|
}
|
|
|
|
Layout ans[2];
|
|
ans[0].name = "Hello";
|
|
ans[0].value = 65;
|
|
ans[0].other = 663.63;
|
|
ans[1].name = "World";
|
|
ans[1].value = 65;
|
|
ans[1].other = 663.63;
|
|
|
|
auto records = csvReader!Layout(str);
|
|
|
|
int count;
|
|
foreach(record; records)
|
|
{
|
|
ans[count].name = record.name;
|
|
ans[count].value = record.value;
|
|
ans[count].other = record.other;
|
|
count++;
|
|
}
|
|
assert(count == ans.length);
|
|
}
|
|
|
|
// Test input conversion interface
|
|
unittest
|
|
{
|
|
string str = `76,26,22`;
|
|
int[] ans = [76,26,22];
|
|
auto records = csvReader!int(str);
|
|
|
|
foreach(record; records)
|
|
{
|
|
assert(equal(record, ans));
|
|
}
|
|
}
|
|
|
|
// Test struct & header interface
|
|
unittest
|
|
{
|
|
string str = "a,b,c\nHello,65,63.63\nWorld,123,3673.562";
|
|
struct Layout
|
|
{
|
|
int value;
|
|
double other;
|
|
string name;
|
|
}
|
|
|
|
auto records = csvReader!Layout(str, ["b","c","a"]);
|
|
|
|
Layout ans[2];
|
|
ans[0].name = "Hello";
|
|
ans[0].value = 65;
|
|
ans[0].other = 63.63;
|
|
ans[1].name = "World";
|
|
ans[1].value = 123;
|
|
ans[1].other = 3673.562;
|
|
|
|
int count;
|
|
foreach (record; records)
|
|
{
|
|
assert(ans[count].name == record.name);
|
|
assert(ans[count].value == record.value);
|
|
assert(ans[count].other == record.other);
|
|
count++;
|
|
}
|
|
assert(count == ans.length);
|
|
|
|
}
|
|
|
|
// Test header interface
|
|
unittest
|
|
{
|
|
string str = "a,b,c\nHello,65,63.63\nWorld,123,3673.562";
|
|
auto records = csvReader(str, ["b"]);
|
|
|
|
auto ans = [["65"],["123"]];
|
|
foreach(record; records) {
|
|
assert(equal(record, ans.front));
|
|
ans.popFront();
|
|
}
|
|
|
|
try
|
|
{
|
|
records = csvReader(str, ["b","a"]);
|
|
assert(0);
|
|
}
|
|
catch(HeadingMismatchException e)
|
|
{
|
|
}
|
|
auto records2 = Records!(string,Malformed.ignore,string,char,string[])
|
|
(str, ["b","a"], ',', '"');
|
|
|
|
ans = [["Hello","65"],["World","123"]];
|
|
foreach(record; records2) {
|
|
assert(equal(record, ans.front));
|
|
ans.popFront();
|
|
}
|
|
|
|
str = "a,c,e\nJoe,Carpenter,300000\nFred,Fly,4";
|
|
records2 = Records!(string,Malformed.ignore,string,char,string[])
|
|
(str, ["a","b","c","d"], ',', '"');
|
|
|
|
ans = [["Joe","Carpenter"],["Fred","Fly"]];
|
|
foreach(record; records2) {
|
|
assert(equal(record, ans.front));
|
|
ans.popFront();
|
|
}
|
|
}
|
|
|
|
// Test null header interface
|
|
unittest
|
|
{
|
|
string str = "a,b,c\nHello,65,63.63\nWorld,123,3673.562";
|
|
auto records = csvReader(str, ["a"]);
|
|
|
|
assert(records.heading == ["a","b","c"]);
|
|
}
|
|
|
|
// Test unchecked read
|
|
unittest
|
|
{
|
|
string str = "one \"quoted\"";
|
|
foreach(record; Records!(string,Malformed.ignore,string,char,string[])
|
|
(str,',','"'))
|
|
{
|
|
foreach(cell; record)
|
|
{
|
|
assert(cell == "one \"quoted\"");
|
|
}
|
|
}
|
|
|
|
str = "one \"quoted\",two \"quoted\" end";
|
|
struct Ans
|
|
{
|
|
string a,b;
|
|
}
|
|
foreach(record; Records!(Ans,Malformed.ignore,string,char,string[])
|
|
(str,',','"'))
|
|
{
|
|
assert(record.a == "one \"quoted\"");
|
|
assert(record.b == "two \"quoted\" end");
|
|
}
|
|
}
|
|
|
|
// Test Windows line break
|
|
unittest
|
|
{
|
|
string str = "one,two\r\nthree";
|
|
|
|
auto records = csvReader(str);
|
|
auto record = records.front;
|
|
assert(record.front == "one");
|
|
record.popFront();
|
|
assert(record.front == "two");
|
|
records.popFront();
|
|
record = records.front;
|
|
assert(record.front == "three");
|
|
}
|
|
|
|
|
|
// Test associative array support
|
|
unittest
|
|
{
|
|
string str = "1;2;3\n34;65;63\n34;65;63";
|
|
|
|
auto records = csvReader!(string[string])(str,["3","1"],';');
|
|
int count;
|
|
foreach(record; records)
|
|
{
|
|
count++;
|
|
assert(record["1"] == "34");
|
|
assert(record["3"] == "63");
|
|
}
|
|
assert(count == 2);
|
|
}
|
|
|
|
// Test restricted range
|
|
unittest
|
|
{
|
|
import std.typecons;
|
|
struct InputRange
|
|
{
|
|
wstring text;
|
|
|
|
this(wstring txt)
|
|
{
|
|
text = txt;
|
|
}
|
|
|
|
auto empty()
|
|
{
|
|
return text.empty();
|
|
}
|
|
|
|
auto popFront()
|
|
{
|
|
text.popFront();
|
|
}
|
|
|
|
wchar front()
|
|
{
|
|
return text[0];
|
|
}
|
|
}
|
|
auto ir = InputRange("Name,Occupation,Salary\r"w
|
|
"Joe,Carpenter,300000\nFred,Blacksmith,400000\r\n"w);
|
|
|
|
foreach(record; csvReader(ir, cast(string[])null))
|
|
foreach(cell; record) {}
|
|
foreach(record; csvReader!(Tuple!(string,string,int))
|
|
(ir,cast(string[])null)) {}
|
|
foreach(record; csvReader!(string[string])
|
|
(ir,cast(string[])null)) {}
|
|
}
|
|
|
|
/**
|
|
* Range for iterating CSV records.
|
|
*
|
|
* This range is returned by the csvReader functions. It can be
|
|
* created in a similar manner to allow $(D ErrorLevel) be set to $(LREF
|
|
* Malformed).ignore if best guess processing should take place.
|
|
*
|
|
* Example for integer data:
|
|
*
|
|
* -------
|
|
* string str = `76;^26^;22`;
|
|
* int[] ans = [76,26,22];
|
|
* auto records = Records!(int,Malformed.ignore,string,char,string[])
|
|
* (str, ';', '^');
|
|
*
|
|
* foreach(record; records) {
|
|
* assert(equal(record, ans));
|
|
* }
|
|
* -------
|
|
*
|
|
*/
|
|
struct Records(Contents, Malformed ErrorLevel, Range, Separator, Heading)
|
|
if(isSomeChar!Separator && isInputRange!Range
|
|
&& isSomeChar!(ElementType!Range) && !is(Contents == class)
|
|
&& isForwardRange!Heading && isSomeString!(ElementType!Heading))
|
|
{
|
|
private:
|
|
Range _input;
|
|
Separator _separator;
|
|
Separator _quote;
|
|
size_t[] indices;
|
|
uint _row;
|
|
bool _empty;
|
|
static if(is(Contents == struct))
|
|
{
|
|
Contents recordContent;
|
|
Record!(string, ErrorLevel, Range, Separator) recordRange;
|
|
}
|
|
else static if(is(Contents T : T[U], U : string))
|
|
{
|
|
Contents recordContent;
|
|
Record!(T, ErrorLevel, Range, Separator) recordRange;
|
|
}
|
|
else
|
|
Record!(Contents, ErrorLevel, Range, Separator) recordRange;
|
|
public:
|
|
/**
|
|
* Heading from the input in array form.
|
|
*
|
|
* -------
|
|
* string str = "a,b,c\nHello,65,63.63";
|
|
* auto records = csvReader(str, ["a"]);
|
|
*
|
|
* assert(records.heading == ["a","b","c"]);
|
|
* -------
|
|
*/
|
|
string[] heading;
|
|
|
|
/**
|
|
* Constructor to initialize the input, delimiter and quote for input
|
|
* without a heading.
|
|
*
|
|
* -------
|
|
* string str = `76;^26^;22`;
|
|
* int[] ans = [76,26,22];
|
|
* auto records = Records!(int,Malformed.ignore,string,char,string[])
|
|
* (str, ';', '^');
|
|
*
|
|
* foreach(record; records) {
|
|
* assert(equal(record, ans));
|
|
* }
|
|
* -------
|
|
*/
|
|
this(Range input, Separator delimiter, Separator quote)
|
|
{
|
|
_input = input;
|
|
_separator = delimiter;
|
|
_quote = quote;
|
|
|
|
static if(is(Contents == struct))
|
|
{
|
|
indices.length = FieldTypeTuple!(Contents).length;
|
|
foreach(i, j; FieldTypeTuple!Contents)
|
|
indices[i] = i;
|
|
}
|
|
prime();
|
|
}
|
|
|
|
/**
|
|
* Constructor to initialize the input, delimiter and quote for input
|
|
* with a heading.
|
|
*
|
|
* -------
|
|
* string str = `high;mean;low\n76;^26^;22`;
|
|
* auto records = Records!(int,Malformed.ignore,string,char,string[])
|
|
* (str, ["high","low"], ';', '^');
|
|
*
|
|
* int[] ans = [76,22];
|
|
* foreach(record; records) {
|
|
* assert(equal(record, ans));
|
|
* }
|
|
* -------
|
|
*
|
|
* Throws:
|
|
* $(LREF HeadingMismatchException) when a heading is provided but a
|
|
* matching column is not found or the order did not match that found
|
|
* in the input (non-struct).
|
|
*/
|
|
this(Range input, Heading colHeaders, Separator delimiter, Separator quote)
|
|
{
|
|
_input = input;
|
|
_separator = delimiter;
|
|
_quote = quote;
|
|
|
|
size_t[string] colToIndex;
|
|
foreach(h; colHeaders)
|
|
{
|
|
colToIndex[h] = size_t.max;
|
|
}
|
|
|
|
auto r = Record!(string, ErrorLevel, Range, Separator)
|
|
(&_input, _separator, _quote, indices);
|
|
|
|
size_t colIndex;
|
|
foreach(col; r)
|
|
{
|
|
heading ~= col;
|
|
auto ptr = col in colToIndex;
|
|
if(ptr)
|
|
*ptr = colIndex;
|
|
colIndex++;
|
|
}
|
|
|
|
indices.length = colToIndex.length;
|
|
int i;
|
|
foreach(h; colHeaders)
|
|
{
|
|
immutable index = colToIndex[h];
|
|
static if(ErrorLevel != Malformed.ignore)
|
|
enforceEx!(HeadingMismatchException)(index < size_t.max,
|
|
"Header not found: " ~ to!string(h));
|
|
indices[i++] = index;
|
|
}
|
|
|
|
static if(!is(Contents == struct))
|
|
{
|
|
static if(is(Contents T : T[U], U : string))
|
|
{
|
|
sort(indices);
|
|
}
|
|
else static if(ErrorLevel == Malformed.ignore)
|
|
{
|
|
sort(indices);
|
|
}
|
|
else
|
|
{
|
|
enforceEx!(HeadingMismatchException)(isSorted(indices),
|
|
"Header in input does not match specified header.");
|
|
}
|
|
}
|
|
|
|
popFront();
|
|
}
|
|
|
|
this(this)
|
|
{
|
|
recordRange._input = &_input;
|
|
}
|
|
|
|
/**
|
|
* Part of the $(XREF range, isInputRange) interface.
|
|
*
|
|
* Returns:
|
|
* If $(D Contents) is a struct, the struct will be filled with record
|
|
* data.
|
|
*
|
|
* If $(D Contents) is non-struct, a $(LREF Record) will be returned.
|
|
*/
|
|
@property auto front()
|
|
{
|
|
assert(!empty);
|
|
static if(is(Contents == struct))
|
|
{
|
|
return recordContent;
|
|
}
|
|
else static if(is(Contents T : T[U], U : string))
|
|
{
|
|
return recordContent;
|
|
}
|
|
else
|
|
{
|
|
recordRange._input = &_input;
|
|
return recordRange;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Part of the $(XREF range, isInputRange) interface.
|
|
*/
|
|
@property bool empty()
|
|
{
|
|
return _empty;
|
|
}
|
|
|
|
/**
|
|
* Part of the $(XREF range, isInputRange) interface.
|
|
*
|
|
* Throws:
|
|
* $(LREF CSVException) When a quote is found in an unquoted field,
|
|
* data continues after a closing quote, the quoted field was not
|
|
* closed before data was empty, or a conversion failed.
|
|
*/
|
|
void popFront()
|
|
{
|
|
recordRange._input = &_input;
|
|
|
|
while(!recordRange.empty)
|
|
{
|
|
recordRange.popFront();
|
|
}
|
|
|
|
if(!_input.empty)
|
|
{
|
|
if(_input.front == '\r')
|
|
{
|
|
_input.popFront();
|
|
if(_input.front == '\n')
|
|
_input.popFront();
|
|
}
|
|
else if(_input.front == '\n')
|
|
_input.popFront();
|
|
}
|
|
|
|
if(_input.empty)
|
|
_empty = true;
|
|
|
|
prime();
|
|
}
|
|
|
|
private void prime()
|
|
{
|
|
if(_empty)
|
|
return;
|
|
_row++;
|
|
static if(is(Contents == struct))
|
|
{
|
|
recordRange = typeof(recordRange)
|
|
(&_input, _separator, _quote, null);
|
|
}
|
|
else
|
|
{
|
|
recordRange = typeof(recordRange)
|
|
(&_input, _separator, _quote, indices);
|
|
}
|
|
|
|
recordRange._row = _row;
|
|
|
|
static if(is(Contents T : T[U], U : string))
|
|
{
|
|
T[U] aa;
|
|
try
|
|
{
|
|
for(; !recordRange.empty; recordRange.popFront())
|
|
{
|
|
aa[heading[recordRange._col-1]] = recordRange.front;
|
|
}
|
|
}
|
|
catch(ConvException e)
|
|
{
|
|
throw new CSVException(_row, recordRange._col, e);
|
|
}
|
|
|
|
recordContent = aa;
|
|
}
|
|
else static if(is(Contents == struct))
|
|
{
|
|
size_t colIndex;
|
|
try
|
|
{
|
|
foreach(colData; recordRange)
|
|
{
|
|
scope(exit) colIndex++;
|
|
if(indices.length > 0)
|
|
{
|
|
foreach(ti, ToType; FieldTypeTuple!(Contents))
|
|
{
|
|
if(indices[ti] == colIndex)
|
|
{
|
|
recordContent.tupleof[ti] = to!ToType(colData);
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
foreach(ti, ToType; FieldTypeTuple!(Contents))
|
|
{
|
|
if(ti == colIndex)
|
|
recordContent.tupleof[ti] = to!ToType(colData);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
catch(ConvException e)
|
|
{
|
|
throw new CSVException(_row, colIndex, e);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
unittest {
|
|
string str = `76;^26^;22`;
|
|
int[] ans = [76,26,22];
|
|
auto records = Records!(int,Malformed.ignore,string,char,string[])
|
|
(str, ';', '^');
|
|
|
|
foreach(record; records)
|
|
{
|
|
assert(equal(record, ans));
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Returned by a Records when Contents is a non-struct.
|
|
*/
|
|
private struct Record(Contents, Malformed ErrorLevel, Range, Separator)
|
|
if(!is(Contents == class) && !is(Contents == struct))
|
|
{
|
|
private:
|
|
Range* _input;
|
|
Separator _separator;
|
|
Separator _quote;
|
|
Contents curContentsoken;
|
|
typeof(appender!(char[])()) _front;
|
|
bool _empty;
|
|
size_t _col, _row;
|
|
size_t[] _popCount;
|
|
public:
|
|
/*
|
|
* params:
|
|
* input = Pointer to a character input range
|
|
* delimiter = Separator for each column
|
|
* quote = Character used for quotation
|
|
* indices = An array containing which columns will be returned.
|
|
* If empty, all columns are returned. List must be in order.
|
|
*/
|
|
this(Range* input, Separator delimiter, Separator quote, size_t[] indices)
|
|
{
|
|
_input = input;
|
|
_separator = delimiter;
|
|
_quote = quote;
|
|
_front = appender!(char[])();
|
|
_popCount = indices.dup;
|
|
|
|
// If a header was given, each call to popFront will need
|
|
// to eliminate so many tokens. This calculates
|
|
// how many will be skipped to get to the next header column
|
|
size_t normalizer;
|
|
foreach(ref c; _popCount) {
|
|
static if(ErrorLevel == Malformed.ignore)
|
|
{
|
|
// If we are not throwing exceptions
|
|
// a header may not exist, indices are sorted
|
|
// and will be size_t.max if not found.
|
|
if(c == size_t.max)
|
|
break;
|
|
}
|
|
c -= normalizer;
|
|
normalizer += c + 1;
|
|
}
|
|
|
|
prime();
|
|
}
|
|
|
|
/**
|
|
* Part of the $(XREF range, isInputRange) interface.
|
|
*/
|
|
@property Contents front()
|
|
{
|
|
assert(!empty);
|
|
return curContentsoken;
|
|
}
|
|
|
|
/**
|
|
* Part of the $(XREF range, isInputRange) interface.
|
|
*/
|
|
@property bool empty()
|
|
{
|
|
return _empty;
|
|
}
|
|
|
|
/*
|
|
* Record is complete when input
|
|
* is empty or starts with record break
|
|
*/
|
|
private bool recordEnd()
|
|
{
|
|
if((*_input).empty
|
|
|| (*_input).front == '\n'
|
|
|| (*_input).front == '\r')
|
|
{
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
|
|
/**
|
|
* Part of the $(XREF range, isInputRange) interface.
|
|
*
|
|
* Throws:
|
|
* $(LREF CSVException) When a quote is found in an unquoted field,
|
|
* data continues after a closing quote, the quoted field was not
|
|
* closed before data was empty, or a conversion failed.
|
|
*/
|
|
void popFront()
|
|
{
|
|
// Skip last of record when header is depleted.
|
|
if(_popCount && _popCount.empty)
|
|
while(!recordEnd())
|
|
{
|
|
prime(1);
|
|
}
|
|
|
|
if(recordEnd())
|
|
{
|
|
_empty = true;
|
|
return;
|
|
}
|
|
|
|
// Separator is left on the end of input from the last call.
|
|
// This cannot be moved to after the call to csvNextToken as
|
|
// there may be an empty record after it.
|
|
if((*_input).front == _separator)
|
|
(*_input).popFront();
|
|
|
|
_front.shrinkTo(0);
|
|
prime();
|
|
}
|
|
|
|
/*
|
|
* Handles moving to the next skipNum token.
|
|
*/
|
|
private void prime(size_t skipNum)
|
|
{
|
|
foreach(i; 0..skipNum)
|
|
{
|
|
_col++;
|
|
_front.shrinkTo(0);
|
|
if((*_input).front == _separator)
|
|
(*_input).popFront();
|
|
try
|
|
{
|
|
csvNextToken!(ErrorLevel, Range, Separator)
|
|
(*_input, _front, _separator, _quote,false);
|
|
}
|
|
catch(Exception e)
|
|
{
|
|
throw new CSVException(_row, _col, e);
|
|
}
|
|
}
|
|
}
|
|
|
|
private void prime()
|
|
{
|
|
_col++;
|
|
try
|
|
{
|
|
csvNextToken!(ErrorLevel, Range, Separator)
|
|
(*_input, _front, _separator, _quote,false);
|
|
auto skipNum = _popCount.empty ? 0 : _popCount.front;
|
|
if(!_popCount.empty)
|
|
_popCount.popFront();
|
|
|
|
if(skipNum == size_t.max) {
|
|
while(!recordEnd())
|
|
prime(1);
|
|
_empty = true;
|
|
return;
|
|
}
|
|
|
|
if(skipNum)
|
|
prime(skipNum);
|
|
curContentsoken = to!Contents(_front.data);
|
|
}
|
|
catch(Exception e)
|
|
{
|
|
throw new CSVException(_row, _col, e);
|
|
}
|
|
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Lower level control over parsing CSV
|
|
*
|
|
* This function consumes the input. After each call the input will
|
|
* start with either a delimiter or record break (\n, \r\n, \r) which
|
|
* must be removed for subsequent calls.
|
|
*
|
|
* -------
|
|
* string str = "65,63\n123,3673";
|
|
*
|
|
* auto a = appender!(char[]);
|
|
*
|
|
* csvNextToken(str,a,',','"');
|
|
* assert(a.data == "65");
|
|
* assert(str == ",63\n123,3673");
|
|
*
|
|
* str.popFront();
|
|
* a.shrinkTo(0);
|
|
* csvNextToken(str,a,',','"');
|
|
* assert(a.data == "63");
|
|
* assert(str == "\n123,3673");
|
|
*
|
|
* str.popFront();
|
|
* a.shrinkTo(0);
|
|
* csvNextToken(str,a,',','"');
|
|
* assert(a.data == "123");
|
|
* assert(str == ",3673");
|
|
* -------
|
|
*
|
|
* params:
|
|
* input = Any CSV input
|
|
* ans = The first field in the input
|
|
* sep = The character to represent a comma in the specification
|
|
* quote = The character to represent a quote in the specification
|
|
* startQuoted = Whether the input should be considered to already be in
|
|
* quotes
|
|
*
|
|
*/
|
|
void csvNextToken(Malformed ErrorLevel = Malformed.throwException,
|
|
Range, Separator)
|
|
(ref Range input, ref Appender!(char[]) ans,
|
|
Separator sep, Separator quote,
|
|
bool startQuoted = false)
|
|
if(isSomeChar!Separator && isInputRange!Range
|
|
&& isSomeChar!(ElementType!Range))
|
|
{
|
|
bool quoted = startQuoted;
|
|
bool escQuote;
|
|
if(input.empty)
|
|
return;
|
|
|
|
if(input.front == '\n')
|
|
return;
|
|
if(input.front == '\r')
|
|
return;
|
|
|
|
if(input.front == quote)
|
|
{
|
|
quoted = true;
|
|
input.popFront();
|
|
}
|
|
|
|
while(!input.empty)
|
|
{
|
|
assert(!(quoted && escQuote));
|
|
if(!quoted)
|
|
{
|
|
// When not quoted the token ends at sep
|
|
if(input.front == sep)
|
|
break;
|
|
if(input.front == '\r')
|
|
break;
|
|
if(input.front == '\n')
|
|
break;
|
|
}
|
|
if(!quoted && !escQuote)
|
|
{
|
|
if(input.front == quote)
|
|
{
|
|
// Not quoted, but quote found
|
|
static if(ErrorLevel == Malformed.throwException)
|
|
throw new IncompleteCellException(ans.data.idup,
|
|
"Quote located in unquoted token");
|
|
else static if(ErrorLevel == Malformed.ignore)
|
|
ans.put(quote);
|
|
}
|
|
else
|
|
{
|
|
// Not quoted, non-quote character
|
|
ans.put(input.front);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
if(input.front == quote)
|
|
{
|
|
// Quoted, quote found
|
|
// By turning off quoted and turning on escQuote
|
|
// I can tell when to add a quote to the string
|
|
// escQuote is turned to false when it escapes a
|
|
// quote or is followed by a non-quote (see outside else).
|
|
// They are mutually exclusive, but provide different
|
|
// information.
|
|
if(escQuote)
|
|
{
|
|
escQuote = false;
|
|
quoted = true;
|
|
ans.put(quote);
|
|
} else
|
|
{
|
|
escQuote = true;
|
|
quoted = false;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
// Quoted, non-quote character
|
|
if(escQuote)
|
|
{
|
|
static if(ErrorLevel == Malformed.throwException)
|
|
throw new IncompleteCellException(ans.data.idup,
|
|
"Content continues after end quote, " ~
|
|
"or needs to be escaped.");
|
|
else static if(ErrorLevel == Malformed.ignore)
|
|
break;
|
|
}
|
|
ans.put(input.front);
|
|
}
|
|
}
|
|
input.popFront();
|
|
}
|
|
|
|
static if(ErrorLevel == Malformed.throwException)
|
|
if(quoted && (input.empty || input.front == '\n' || input.front == '\r'))
|
|
throw new IncompleteCellException(ans.data.idup,
|
|
"Data continues on future lines or trailing quote");
|
|
|
|
}
|
|
|
|
// Test csvNextToken on simplest form and correct format.
|
|
unittest
|
|
{
|
|
string str = "Hello,65,63.63\nWorld,123,3673.562";
|
|
|
|
auto a = appender!(char[]);
|
|
csvNextToken(str,a,',','"');
|
|
assert(a.data == "Hello");
|
|
assert(str == ",65,63.63\nWorld,123,3673.562");
|
|
|
|
str.popFront();
|
|
a.shrinkTo(0);
|
|
csvNextToken(str,a,',','"');
|
|
assert(a.data == "65");
|
|
assert(str == ",63.63\nWorld,123,3673.562");
|
|
|
|
str.popFront();
|
|
a.shrinkTo(0);
|
|
csvNextToken(str,a,',','"');
|
|
assert(a.data == "63.63");
|
|
assert(str == "\nWorld,123,3673.562");
|
|
|
|
str.popFront();
|
|
a.shrinkTo(0);
|
|
csvNextToken(str,a,',','"');
|
|
assert(a.data == "World");
|
|
assert(str == ",123,3673.562");
|
|
|
|
str.popFront();
|
|
a.shrinkTo(0);
|
|
csvNextToken(str,a,',','"');
|
|
assert(a.data == "123");
|
|
assert(str == ",3673.562");
|
|
|
|
str.popFront();
|
|
a.shrinkTo(0);
|
|
csvNextToken(str,a,',','"');
|
|
assert(a.data == "3673.562");
|
|
assert(str == "");
|
|
}
|
|
|
|
// Test quoted tokens
|
|
unittest
|
|
{
|
|
string str = `one,two,"three ""quoted""","",` ~ "\"five\nnew line\"\nsix";
|
|
|
|
auto a = appender!(char[]);
|
|
csvNextToken(str,a,',','"');
|
|
assert(a.data == "one");
|
|
assert(str == `,two,"three ""quoted""","",` ~ "\"five\nnew line\"\nsix");
|
|
|
|
str.popFront();
|
|
a.shrinkTo(0);
|
|
csvNextToken(str,a,',','"');
|
|
assert(a.data == "two");
|
|
assert(str == `,"three ""quoted""","",` ~ "\"five\nnew line\"\nsix");
|
|
|
|
str.popFront();
|
|
a.shrinkTo(0);
|
|
csvNextToken(str,a,',','"');
|
|
assert(a.data == "three \"quoted\"");
|
|
assert(str == `,"",` ~ "\"five\nnew line\"\nsix");
|
|
|
|
str.popFront();
|
|
a.shrinkTo(0);
|
|
csvNextToken(str,a,',','"');
|
|
assert(a.data == "");
|
|
assert(str == ",\"five\nnew line\"\nsix");
|
|
|
|
str.popFront();
|
|
a.shrinkTo(0);
|
|
csvNextToken(str,a,',','"');
|
|
assert(a.data == "five\nnew line");
|
|
assert(str == "\nsix");
|
|
|
|
str.popFront();
|
|
a.shrinkTo(0);
|
|
csvNextToken(str,a,',','"');
|
|
assert(a.data == "six");
|
|
assert(str == "");
|
|
}
|
|
|
|
// Test empty data is pulled at end of record.
|
|
unittest
|
|
{
|
|
string str = "one,";
|
|
auto a = appender!(char[]);
|
|
csvNextToken(str,a,',','"');
|
|
assert(a.data == "one");
|
|
assert(str == ",");
|
|
|
|
a.shrinkTo(0);
|
|
csvNextToken(str,a,',','"');
|
|
assert(a.data == "");
|
|
}
|
|
|
|
// Test exceptions
|
|
unittest
|
|
{
|
|
string str = "\"one\nnew line";
|
|
|
|
try
|
|
{
|
|
auto a = appender!(char[]);
|
|
csvNextToken(str,a,',','"');
|
|
assert(0);
|
|
}
|
|
catch (IncompleteCellException ice)
|
|
{
|
|
assert(ice.partialData == "one\nnew line");
|
|
assert(str == "");
|
|
}
|
|
|
|
str = "Hello world\"";
|
|
|
|
try
|
|
{
|
|
auto a = appender!(char[]);
|
|
csvNextToken(str,a,',','"');
|
|
assert(0);
|
|
}
|
|
catch (IncompleteCellException ice)
|
|
{
|
|
assert(ice.partialData == "Hello world");
|
|
assert(str == "\"");
|
|
}
|
|
|
|
str = "one, two \"quoted\" end";
|
|
|
|
auto a = appender!(char[]);
|
|
csvNextToken!(Malformed.ignore)(str,a,',','"');
|
|
assert(a.data == "one");
|
|
str.popFront();
|
|
a.shrinkTo(0);
|
|
csvNextToken!(Malformed.ignore)(str,a,',','"');
|
|
assert(a.data == " two \"quoted\" end");
|
|
}
|
|
|
|
|
|
// Test modifying token delimiter
|
|
unittest
|
|
{
|
|
string str = `one|two|/three "quoted"/|//`;
|
|
|
|
auto a = appender!(char[]);
|
|
csvNextToken(str,a, '|','/');
|
|
assert(a.data == "one");
|
|
assert(str == `|two|/three "quoted"/|//`);
|
|
|
|
str.popFront();
|
|
a.shrinkTo(0);
|
|
csvNextToken(str,a, '|','/');
|
|
assert(a.data == "two");
|
|
assert(str == `|/three "quoted"/|//`);
|
|
|
|
str.popFront();
|
|
a.shrinkTo(0);
|
|
csvNextToken(str,a, '|','/');
|
|
assert(a.data == `three "quoted"`);
|
|
assert(str == `|//`);
|
|
|
|
str.popFront();
|
|
a.shrinkTo(0);
|
|
csvNextToken(str,a, '|','/');
|
|
assert(a.data == "");
|
|
}
|