mirror of
https://github.com/dlang/phobos.git
synced 2025-04-28 22:21:09 +03:00
Expended CSV documentation.
This commit is contained in:
parent
5fddab421f
commit
f6f5ce9d89
1 changed files with 137 additions and 87 deletions
208
std/csv.d
208
std/csv.d
|
@ -8,7 +8,7 @@
|
|||
* tabular data. It has been common for programs to use their own
|
||||
* variant of the CSV format. This parser will loosely follow the
|
||||
* $(WEB tools.ietf.org/html/rfc4180, RFC-4180). CSV input should adhered
|
||||
* to the following rules.
|
||||
* to the following criteria, differences from RFC-4180 in parentheses.
|
||||
*
|
||||
* $(UL
|
||||
* $(LI A record is separated by a new line (CRLF,LF,CR))
|
||||
|
@ -18,7 +18,7 @@
|
|||
* $(LI A field containing new lines, commas, or double quotes
|
||||
* should be enclosed in double quotes (customizable))
|
||||
* $(LI Double quotes in a field are escaped with a double quote)
|
||||
* $(LI Each record should contain the same number of fields)
|
||||
* $(LI Each record should contain the same number of fields (not enforced))
|
||||
* )
|
||||
*
|
||||
* Example:
|
||||
|
@ -30,9 +30,9 @@
|
|||
*
|
||||
* void main()
|
||||
* {
|
||||
* auto text = "Joe,Carpenter,300000\nFred,Fly,4\r\n";
|
||||
* auto text = "Joe,Carpenter,300000\nFred,Blacksmith,400000\r\n";
|
||||
*
|
||||
* foreach(record; csvText!(Tuple!(string,string,int))(text))
|
||||
* foreach(record; csvReader!(Tuple!(string,string,int))(text))
|
||||
* {
|
||||
* writefln("%s works as a %s and earns $%d per year",
|
||||
* record[0], record[1], record[2]);
|
||||
|
@ -122,18 +122,22 @@ enum Malformed
|
|||
}
|
||||
|
||||
/**
|
||||
* Builds a RecordList range for iterating over records found in input.
|
||||
* Builds a $(LREF Records) struct for iterating over records found in $(D
|
||||
* input).
|
||||
*
|
||||
* This function simplifies the process for standard text input.
|
||||
* For other input create RecordList yourself.
|
||||
* For other input, delimited by colon, create Records yourself.
|
||||
*
|
||||
* The Contents of the input can be provided if all the records are the same
|
||||
* type such as all integer data:
|
||||
* The $(D ErrorLevel) can be set to $(LREF Malformed).ignore if best guess
|
||||
* processing should take place.
|
||||
*
|
||||
* The $(D Contents) of the input can be provided if all the records are the
|
||||
* same type such as all integer data:
|
||||
*
|
||||
* -------
|
||||
* string str = `76,26,22`;
|
||||
* int[] ans = [76,26,22];
|
||||
* auto records = csvText!int(str);
|
||||
* auto records = csvReader!int(str);
|
||||
*
|
||||
* int count;
|
||||
* foreach(record; records) {
|
||||
|
@ -141,15 +145,6 @@ enum Malformed
|
|||
* }
|
||||
* -------
|
||||
*
|
||||
* The ErrorLevel can be set to $(LREF Malformed).ignore if best guess
|
||||
* processing should take place.
|
||||
*
|
||||
* An optional heading can be provided. The first record will be read in as the
|
||||
* heading. If the Content type is a struct then the heading provided is
|
||||
* expected to correspond to the fields in the struct. When Content is
|
||||
* non-struct the heading must be provided in the same order as the input or an
|
||||
* exception is thrown.
|
||||
*
|
||||
* Example using a struct:
|
||||
*
|
||||
* -------
|
||||
|
@ -160,7 +155,7 @@ enum Malformed
|
|||
* double other;
|
||||
* }
|
||||
*
|
||||
* auto records = csvText!Layout(str);
|
||||
* auto records = csvReader!Layout(str);
|
||||
*
|
||||
* foreach(record; records) {
|
||||
* writeln(record.name);
|
||||
|
@ -169,27 +164,46 @@ enum Malformed
|
|||
* }
|
||||
* -------
|
||||
*
|
||||
* The header can be provided to identify which columns to read in.
|
||||
* An optional $(D heading) can be provided. The first record will be read in
|
||||
* as the heading. If $(D Contents) is a struct then the heading provided is
|
||||
* expected to correspond to the fields in the struct. When $(D Contents) is
|
||||
* non-struct the $(D heading) must be provided in the same order as the input
|
||||
* or an exception is thrown.
|
||||
*
|
||||
* Read only column "b":
|
||||
*
|
||||
* -------
|
||||
* string str = "a,b,c\nHello,65,63.63\nWorld,123,3673.562";
|
||||
* auto records = csvText(str, ["b"]);
|
||||
* auto records = csvReader(str, ["b"]);
|
||||
*
|
||||
* auto ans = ["65","123"];
|
||||
* foreach(record; records)
|
||||
* foreach(cell; record) {
|
||||
* assert(cell == ans.front);
|
||||
* auto ans = [["65"],["123"]];
|
||||
* foreach(record; records) {
|
||||
* assert(equal(record, ans.front));
|
||||
* ans.popFront();
|
||||
* }
|
||||
* -------
|
||||
*
|
||||
* Read from heading of different order:
|
||||
*
|
||||
* -------
|
||||
* string str = "a,b,c\nHello,65,63.63\nWorld,123,3673.562";
|
||||
* struct Layout
|
||||
* {
|
||||
* int value;
|
||||
* double other;
|
||||
* string name;
|
||||
* }
|
||||
*
|
||||
* auto records = csvReader!Layout(str, ["b","c","a"]);
|
||||
* -------
|
||||
*
|
||||
* The header can also be left empty if the input contains a header but
|
||||
* all columns should be iterated. The heading from the input can always
|
||||
* be accessed from the heading field.
|
||||
*
|
||||
* -------
|
||||
* string str = "a,b,c\nHello,65,63.63\nWorld,123,3673.562";
|
||||
* auto records = csvText(str, cast(string[])null);
|
||||
* auto records = csvReader(str, cast(string[])null);
|
||||
*
|
||||
* assert(records.heading == ["a","b","c"]);
|
||||
* -------
|
||||
|
@ -198,11 +212,8 @@ enum Malformed
|
|||
* nulls) prevents just sending null or [] as a header.
|
||||
*
|
||||
* Returns:
|
||||
* If Contents is a struct, the range will return the
|
||||
* struct populated by a single record.
|
||||
*
|
||||
* Otherwise the range will return a $(LREF Record) range of the type
|
||||
* (default string).
|
||||
* $(LREF Records) struct which provides a $(XREF range, InputRange) of
|
||||
* each record.
|
||||
*
|
||||
* Throws:
|
||||
* IncompleteCellException When a quote is found in an unquoted field,
|
||||
|
@ -213,22 +224,23 @@ enum Malformed
|
|||
* column is not found or the order did not match that found in the input
|
||||
* (non-struct).
|
||||
*/
|
||||
auto csvText(Contents = string, Malformed ErrorLevel
|
||||
auto csvReader(Contents = string, Malformed ErrorLevel
|
||||
= Malformed.throwException, Range)(Range input)
|
||||
if(isInputRange!Range && isSomeChar!(ElementType!Range)
|
||||
&& !is(Contents == class))
|
||||
{
|
||||
return RecordList!(Contents,ErrorLevel,Range,ElementType!Range)
|
||||
return Records!(Contents,ErrorLevel,Range,ElementType!Range,string[])
|
||||
(input, ',', '"');
|
||||
}
|
||||
|
||||
/// Ditto
|
||||
auto csvText(Contents = string, Malformed ErrorLevel
|
||||
= Malformed.throwException, Range)(Range input, string[] heading)
|
||||
auto csvReader(Contents = string, Malformed ErrorLevel
|
||||
= Malformed.throwException, Range, Heading)
|
||||
(Range input, Heading heading)
|
||||
if(isInputRange!Range && isSomeChar!(ElementType!Range)
|
||||
&& !is(Contents == class))
|
||||
&& !is(Contents == class) && isInputRange!Heading)
|
||||
{
|
||||
return RecordList!(Contents,ErrorLevel,Range,ElementType!Range)
|
||||
return Records!(Contents,ErrorLevel,Range,ElementType!Range,Heading)
|
||||
(input, ',', '"', heading);
|
||||
}
|
||||
|
||||
|
@ -236,7 +248,7 @@ auto csvText(Contents = string, Malformed ErrorLevel
|
|||
unittest
|
||||
{
|
||||
string str = `one,two,"three ""quoted""","",` ~ "\"five\nnew line\"\nsix";
|
||||
auto records = csvText(str);
|
||||
auto records = csvReader(str);
|
||||
|
||||
int count;
|
||||
foreach(record; records)
|
||||
|
@ -253,7 +265,7 @@ unittest
|
|||
unittest
|
||||
{
|
||||
string str = "one,two\nthree,four\n";
|
||||
auto records = csvText(str);
|
||||
auto records = csvReader(str);
|
||||
records.popFront();
|
||||
records.popFront();
|
||||
assert(records.empty);
|
||||
|
@ -277,7 +289,7 @@ unittest {
|
|||
ans[1].value = 65;
|
||||
ans[1].other = 663.63;
|
||||
|
||||
auto records = csvText!Layout(str);
|
||||
auto records = csvReader!Layout(str);
|
||||
|
||||
int count;
|
||||
foreach(record; records)
|
||||
|
@ -295,7 +307,7 @@ unittest
|
|||
{
|
||||
string str = `76,26,22`;
|
||||
int[] ans = [76,26,22];
|
||||
auto records = csvText!int(str);
|
||||
auto records = csvReader!int(str);
|
||||
|
||||
foreach(record; records)
|
||||
{
|
||||
|
@ -314,7 +326,7 @@ unittest
|
|||
string name;
|
||||
}
|
||||
|
||||
auto records = csvText!Layout(str, ["b","c","a"]);
|
||||
auto records = csvReader!Layout(str, ["b","c","a"]);
|
||||
|
||||
Layout ans[2];
|
||||
ans[0].name = "Hello";
|
||||
|
@ -340,42 +352,37 @@ unittest
|
|||
unittest
|
||||
{
|
||||
string str = "a,b,c\nHello,65,63.63\nWorld,123,3673.562";
|
||||
auto records = csvText(str, ["b"]);
|
||||
auto records = csvReader(str, ["b"]);
|
||||
|
||||
auto ans = ["65","123"];
|
||||
foreach(record; records)
|
||||
foreach(cell; record) {
|
||||
assert(cell == ans.front);
|
||||
auto ans = [["65"],["123"]];
|
||||
foreach(record; records) {
|
||||
assert(equal(record, ans.front));
|
||||
ans.popFront();
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
records = csvText(str, ["b","a"]);
|
||||
records = csvReader(str, ["b","a"]);
|
||||
assert(0);
|
||||
}
|
||||
catch(Exception e)
|
||||
{
|
||||
}
|
||||
|
||||
auto records2 = csvText!(string, Malformed.ignore)(str, ["b","a"]);
|
||||
auto records2 = csvReader!(string, Malformed.ignore)(str, ["b","a"]);
|
||||
|
||||
ans = ["Hello","65","World","123"];
|
||||
foreach(record; records2)
|
||||
foreach(cell; record)
|
||||
{
|
||||
assert(cell == ans.front);
|
||||
ans = [["Hello","65"],["World","123"]];
|
||||
foreach(record; records2) {
|
||||
assert(equal(record, ans.front));
|
||||
ans.popFront();
|
||||
}
|
||||
|
||||
str = "a,c,e\nJoe,Carpenter,300000\nFred,Fly,4";
|
||||
records2 = csvText!(string, Malformed.ignore)(str, ["a","b","c","d"]);
|
||||
records2 = csvReader!(string, Malformed.ignore)(str, ["a","b","c","d"]);
|
||||
|
||||
ans = ["Joe","Carpenter","Fred","Fly"];
|
||||
foreach(record; records2)
|
||||
foreach(cell; record)
|
||||
{
|
||||
assert(cell == ans.front);
|
||||
ans = [["Joe","Carpenter"],["Fred","Fly"]];
|
||||
foreach(record; records2) {
|
||||
assert(equal(record, ans.front));
|
||||
ans.popFront();
|
||||
}
|
||||
}
|
||||
|
@ -384,7 +391,7 @@ unittest
|
|||
unittest
|
||||
{
|
||||
string str = "a,b,c\nHello,65,63.63\nWorld,123,3673.562";
|
||||
auto records = csvText(str, cast(string[])null);
|
||||
auto records = csvReader(str, ["a"]);
|
||||
|
||||
assert(records.heading == ["a","b","c"]);
|
||||
}
|
||||
|
@ -393,7 +400,7 @@ unittest
|
|||
unittest
|
||||
{
|
||||
string str = "one \"quoted\"";
|
||||
foreach(record; csvText!(string, Malformed.ignore)(str))
|
||||
foreach(record; csvReader!(string, Malformed.ignore)(str))
|
||||
{
|
||||
foreach(cell; record)
|
||||
{
|
||||
|
@ -406,7 +413,7 @@ unittest
|
|||
{
|
||||
string a,b;
|
||||
}
|
||||
foreach(record; csvText!(Ans, Malformed.ignore)(str))
|
||||
foreach(record; csvReader!(Ans, Malformed.ignore)(str))
|
||||
{
|
||||
assert(record.a == "one \"quoted\"");
|
||||
assert(record.b == "two \"quoted\" end");
|
||||
|
@ -418,7 +425,7 @@ unittest
|
|||
{
|
||||
string str = "one,two\r\nthree";
|
||||
|
||||
auto records = csvText(str);
|
||||
auto records = csvReader(str);
|
||||
auto record = records.front;
|
||||
assert(record.front == "one");
|
||||
record.popFront();
|
||||
|
@ -431,7 +438,7 @@ unittest
|
|||
/**
|
||||
* Range for iterating CSV records.
|
||||
*
|
||||
* This range is returned by the csvText functions. It can be
|
||||
* This range is returned by the csvReader functions. It can be
|
||||
* created in a similar manner to allow for custom separation.
|
||||
*
|
||||
* Example for integer data:
|
||||
|
@ -439,7 +446,7 @@ unittest
|
|||
* -------
|
||||
* string str = `76;^26^;22`;
|
||||
* int[] ans = [76,26,22];
|
||||
* auto records = RecordList!(int,Malformed.ignore,string,char)
|
||||
* auto records = Records!(int,Malformed.ignore,string,char,char[])
|
||||
* (str, ';', '^');
|
||||
*
|
||||
* foreach(record; records) {
|
||||
|
@ -448,9 +455,10 @@ unittest
|
|||
* -------
|
||||
*
|
||||
*/
|
||||
struct RecordList(Contents, Malformed ErrorLevel, Range, Separator)
|
||||
struct Records(Contents, Malformed ErrorLevel, Range, Separator, Heading)
|
||||
if(isSomeChar!Separator && isInputRange!Range
|
||||
&& isSomeChar!(ElementType!Range) && !is(Contents == class))
|
||||
&& isSomeChar!(ElementType!Range) && !is(Contents == class)
|
||||
&& isInputRange!Heading)
|
||||
{
|
||||
private:
|
||||
Range _input;
|
||||
|
@ -466,12 +474,32 @@ private:
|
|||
else
|
||||
Record!(Contents, ErrorLevel, Range, Separator) recordRange;
|
||||
public:
|
||||
/// Heading from the input in array form.
|
||||
/**
|
||||
* Heading from the input in array form.
|
||||
*
|
||||
* -------
|
||||
* string str = "a,b,c\nHello,65,63.63";
|
||||
* auto records = csvReader(str, ["a"]);
|
||||
*
|
||||
* assert(records.heading == ["a","b","c"]);
|
||||
* -------
|
||||
*/
|
||||
Range[] heading;
|
||||
|
||||
/**
|
||||
* Constructor to initialize the input, delimiter and quote for input
|
||||
* without a heading.
|
||||
*
|
||||
* -------
|
||||
* string str = `76;^26^;22`;
|
||||
* int[] ans = [76,26,22];
|
||||
* auto records = Records!(int,Malformed.ignore,string,char,string[])
|
||||
* (str, ';', '^');
|
||||
*
|
||||
* foreach(record; records) {
|
||||
* assert(equal(record, ans));
|
||||
* }
|
||||
* -------
|
||||
*/
|
||||
this(Range input, Separator delimiter, Separator quote)
|
||||
{
|
||||
|
@ -492,19 +520,30 @@ public:
|
|||
* Constructor to initialize the input, delimiter and quote for input
|
||||
* with a heading.
|
||||
*
|
||||
* -------
|
||||
* string str = `high;mean;low\n76;^26^;22`;
|
||||
* int[] ans = [76,22];
|
||||
* auto records = Records!(int,Malformed.ignore,string,char,string[])
|
||||
* (str, ';', '^',["high","low"]);
|
||||
*
|
||||
* foreach(record; records) {
|
||||
* assert(equal(record, ans));
|
||||
* }
|
||||
* -------
|
||||
*
|
||||
* Throws:
|
||||
* HeadingMismatchException when a heading is provided but a
|
||||
* matching column is not found or the order did not match that found
|
||||
* in the input (non-struct).
|
||||
*/
|
||||
this(Range input, Separator delimiter, Separator quote, string[] colHeaders)
|
||||
this(Range input, Separator delimiter, Separator quote, Heading colHeaders)
|
||||
{
|
||||
_input = input;
|
||||
_separator = delimiter;
|
||||
_quote = quote;
|
||||
|
||||
size_t[string] colToIndex;
|
||||
foreach(i, h; colHeaders)
|
||||
foreach(h; colHeaders)
|
||||
{
|
||||
colToIndex[h] = size_t.max;
|
||||
}
|
||||
|
@ -522,14 +561,15 @@ public:
|
|||
colIndex++;
|
||||
}
|
||||
|
||||
indices.length = colHeaders.length;
|
||||
foreach(i, h; colHeaders)
|
||||
indices.length = colToIndex.length;
|
||||
int i;
|
||||
foreach(h; colHeaders)
|
||||
{
|
||||
immutable index = colToIndex[h];
|
||||
static if(ErrorLevel != Malformed.ignore)
|
||||
enforceEx!(HeadingMismatchException)(index < size_t.max,
|
||||
"Header not found: " ~ to!string(h));
|
||||
indices[i] = index;
|
||||
indices[i++] = index;
|
||||
}
|
||||
|
||||
static if(!is(Contents == struct))
|
||||
|
@ -554,6 +594,13 @@ public:
|
|||
}
|
||||
|
||||
/**
|
||||
* Part of the $(XREF range, InputRange) interface.
|
||||
*
|
||||
* Returns:
|
||||
* If $(D Contents) is a struct, the struct will be filled with record
|
||||
* data.
|
||||
*
|
||||
* If $(D Contents) is non-struct, a $(LREF Record) will be returned.
|
||||
*/
|
||||
@property auto front()
|
||||
{
|
||||
|
@ -570,6 +617,7 @@ public:
|
|||
}
|
||||
|
||||
/**
|
||||
* Part of the $(XREF range, InputRange) interface.
|
||||
*/
|
||||
@property bool empty()
|
||||
{
|
||||
|
@ -577,7 +625,7 @@ public:
|
|||
}
|
||||
|
||||
/**
|
||||
* Brings the next Record into the front of the range.
|
||||
* Part of the $(XREF range, InputRange) interface.
|
||||
*
|
||||
* Throws:
|
||||
* IncompleteCellException When a quote is found in an unquoted field,
|
||||
|
@ -661,7 +709,7 @@ public:
|
|||
unittest {
|
||||
string str = `76;^26^;22`;
|
||||
int[] ans = [76,26,22];
|
||||
auto records = RecordList!(int,Malformed.ignore,string,char)
|
||||
auto records = Records!(int,Malformed.ignore,string,char,string[])
|
||||
(str, ';', '^');
|
||||
|
||||
foreach(record; records)
|
||||
|
@ -671,7 +719,7 @@ unittest {
|
|||
}
|
||||
|
||||
/**
|
||||
* Returned by a RecordList when Contents is a non-struct.
|
||||
* Returned by a Records when Contents is a non-struct.
|
||||
*/
|
||||
private struct Record(Contents, Malformed ErrorLevel, Range, Separator)
|
||||
if(!is(Contents == class) && !is(Contents == struct))
|
||||
|
@ -685,7 +733,7 @@ private:
|
|||
bool _empty;
|
||||
size_t[] _popCount;
|
||||
public:
|
||||
/**
|
||||
/*
|
||||
* params:
|
||||
* input = Pointer to a character input range
|
||||
* delimiter = Separator for each column
|
||||
|
@ -722,6 +770,7 @@ public:
|
|||
}
|
||||
|
||||
/**
|
||||
* Part of the $(XREF range, InputRange) interface.
|
||||
*/
|
||||
@property Contents front()
|
||||
{
|
||||
|
@ -730,6 +779,7 @@ public:
|
|||
}
|
||||
|
||||
/**
|
||||
* Part of the $(XREF range, InputRange) interface.
|
||||
*/
|
||||
@property bool empty()
|
||||
{
|
||||
|
@ -753,7 +803,7 @@ public:
|
|||
|
||||
|
||||
/**
|
||||
* Brings the next Content into the front of the range.
|
||||
* Part of the $(XREF range, InputRange) interface.
|
||||
*
|
||||
* Throws:
|
||||
* IncompleteCellException When a quote is found in an unquoted field,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue