diff --git a/std/csv.d b/std/csv.d index a5e452456..10bb022bf 100644 --- a/std/csv.d +++ b/std/csv.d @@ -8,7 +8,7 @@ * tabular data. It has been common for programs to use their own * variant of the CSV format. This parser will loosely follow the * $(WEB tools.ietf.org/html/rfc4180, RFC-4180). CSV input should adhered - * to the following rules. + * to the following criteria, differences from RFC-4180 in parentheses. * * $(UL * $(LI A record is separated by a new line (CRLF,LF,CR)) @@ -18,7 +18,7 @@ * $(LI A field containing new lines, commas, or double quotes * should be enclosed in double quotes (customizable)) * $(LI Double quotes in a field are escaped with a double quote) - * $(LI Each record should contain the same number of fields) + * $(LI Each record should contain the same number of fields (not enforced)) * ) * * Example: @@ -30,9 +30,9 @@ * * void main() * { - * auto text = "Joe,Carpenter,300000\nFred,Fly,4\r\n"; + * auto text = "Joe,Carpenter,300000\nFred,Blacksmith,400000\r\n"; * - * foreach(record; csvText!(Tuple!(string,string,int))(text)) + * foreach(record; csvReader!(Tuple!(string,string,int))(text)) * { * writefln("%s works as a %s and earns $%d per year", * record[0], record[1], record[2]); @@ -122,18 +122,22 @@ enum Malformed } /** - * Builds a RecordList range for iterating over records found in input. + * Builds a $(LREF Records) struct for iterating over records found in $(D + * input). * * This function simplifies the process for standard text input. - * For other input create RecordList yourself. + * For other input, delimited by colon, create Records yourself. * - * The Contents of the input can be provided if all the records are the same - * type such as all integer data: + * The $(D ErrorLevel) can be set to $(LREF Malformed).ignore if best guess + * processing should take place. + * + * The $(D Contents) of the input can be provided if all the records are the + * same type such as all integer data: * * ------- * string str = `76,26,22`; * int[] ans = [76,26,22]; - * auto records = csvText!int(str); + * auto records = csvReader!int(str); * * int count; * foreach(record; records) { @@ -141,15 +145,6 @@ enum Malformed * } * ------- * - * The ErrorLevel can be set to $(LREF Malformed).ignore if best guess - * processing should take place. - * - * An optional heading can be provided. The first record will be read in as the - * heading. If the Content type is a struct then the heading provided is - * expected to correspond to the fields in the struct. When Content is - * non-struct the heading must be provided in the same order as the input or an - * exception is thrown. - * * Example using a struct: * * ------- @@ -160,7 +155,7 @@ enum Malformed * double other; * } * - * auto records = csvText!Layout(str); + * auto records = csvReader!Layout(str); * * foreach(record; records) { * writeln(record.name); @@ -169,18 +164,37 @@ enum Malformed * } * ------- * - * The header can be provided to identify which columns to read in. + * An optional $(D heading) can be provided. The first record will be read in + * as the heading. If $(D Contents) is a struct then the heading provided is + * expected to correspond to the fields in the struct. When $(D Contents) is + * non-struct the $(D heading) must be provided in the same order as the input + * or an exception is thrown. + * + * Read only column "b": * * ------- * string str = "a,b,c\nHello,65,63.63\nWorld,123,3673.562"; - * auto records = csvText(str, ["b"]); + * auto records = csvReader(str, ["b"]); * - * auto ans = ["65","123"]; - * foreach(record; records) - * foreach(cell; record) { - * assert(cell == ans.front); - * ans.popFront(); - * } + * auto ans = [["65"],["123"]]; + * foreach(record; records) { + * assert(equal(record, ans.front)); + * ans.popFront(); + * } + * ------- + * + * Read from heading of different order: + * + * ------- + * string str = "a,b,c\nHello,65,63.63\nWorld,123,3673.562"; + * struct Layout + * { + * int value; + * double other; + * string name; + * } + * + * auto records = csvReader!Layout(str, ["b","c","a"]); * ------- * * The header can also be left empty if the input contains a header but @@ -189,7 +203,7 @@ enum Malformed * * ------- * string str = "a,b,c\nHello,65,63.63\nWorld,123,3673.562"; - * auto records = csvText(str, cast(string[])null); + * auto records = csvReader(str, cast(string[])null); * * assert(records.heading == ["a","b","c"]); * ------- @@ -198,11 +212,8 @@ enum Malformed * nulls) prevents just sending null or [] as a header. * * Returns: - * If Contents is a struct, the range will return the - * struct populated by a single record. - * - * Otherwise the range will return a $(LREF Record) range of the type - * (default string). + * $(LREF Records) struct which provides a $(XREF range, InputRange) of + * each record. * * Throws: * IncompleteCellException When a quote is found in an unquoted field, @@ -213,22 +224,23 @@ enum Malformed * column is not found or the order did not match that found in the input * (non-struct). */ -auto csvText(Contents = string, Malformed ErrorLevel +auto csvReader(Contents = string, Malformed ErrorLevel = Malformed.throwException, Range)(Range input) if(isInputRange!Range && isSomeChar!(ElementType!Range) && !is(Contents == class)) { - return RecordList!(Contents,ErrorLevel,Range,ElementType!Range) + return Records!(Contents,ErrorLevel,Range,ElementType!Range,string[]) (input, ',', '"'); } /// Ditto -auto csvText(Contents = string, Malformed ErrorLevel - = Malformed.throwException, Range)(Range input, string[] heading) +auto csvReader(Contents = string, Malformed ErrorLevel + = Malformed.throwException, Range, Heading) + (Range input, Heading heading) if(isInputRange!Range && isSomeChar!(ElementType!Range) - && !is(Contents == class)) + && !is(Contents == class) && isInputRange!Heading) { - return RecordList!(Contents,ErrorLevel,Range,ElementType!Range) + return Records!(Contents,ErrorLevel,Range,ElementType!Range,Heading) (input, ',', '"', heading); } @@ -236,7 +248,7 @@ auto csvText(Contents = string, Malformed ErrorLevel unittest { string str = `one,two,"three ""quoted""","",` ~ "\"five\nnew line\"\nsix"; - auto records = csvText(str); + auto records = csvReader(str); int count; foreach(record; records) @@ -253,7 +265,7 @@ unittest unittest { string str = "one,two\nthree,four\n"; - auto records = csvText(str); + auto records = csvReader(str); records.popFront(); records.popFront(); assert(records.empty); @@ -277,7 +289,7 @@ unittest { ans[1].value = 65; ans[1].other = 663.63; - auto records = csvText!Layout(str); + auto records = csvReader!Layout(str); int count; foreach(record; records) @@ -295,7 +307,7 @@ unittest { string str = `76,26,22`; int[] ans = [76,26,22]; - auto records = csvText!int(str); + auto records = csvReader!int(str); foreach(record; records) { @@ -314,7 +326,7 @@ unittest string name; } - auto records = csvText!Layout(str, ["b","c","a"]); + auto records = csvReader!Layout(str, ["b","c","a"]); Layout ans[2]; ans[0].name = "Hello"; @@ -340,51 +352,46 @@ unittest unittest { string str = "a,b,c\nHello,65,63.63\nWorld,123,3673.562"; - auto records = csvText(str, ["b"]); + auto records = csvReader(str, ["b"]); - auto ans = ["65","123"]; - foreach(record; records) - foreach(cell; record) { - assert(cell == ans.front); - ans.popFront(); - } + auto ans = [["65"],["123"]]; + foreach(record; records) { + assert(equal(record, ans.front)); + ans.popFront(); + } try { - records = csvText(str, ["b","a"]); + records = csvReader(str, ["b","a"]); assert(0); } catch(Exception e) { } - auto records2 = csvText!(string, Malformed.ignore)(str, ["b","a"]); + auto records2 = csvReader!(string, Malformed.ignore)(str, ["b","a"]); - ans = ["Hello","65","World","123"]; - foreach(record; records2) - foreach(cell; record) - { - assert(cell == ans.front); - ans.popFront(); - } + ans = [["Hello","65"],["World","123"]]; + foreach(record; records2) { + assert(equal(record, ans.front)); + ans.popFront(); + } str = "a,c,e\nJoe,Carpenter,300000\nFred,Fly,4"; - records2 = csvText!(string, Malformed.ignore)(str, ["a","b","c","d"]); + records2 = csvReader!(string, Malformed.ignore)(str, ["a","b","c","d"]); - ans = ["Joe","Carpenter","Fred","Fly"]; - foreach(record; records2) - foreach(cell; record) - { - assert(cell == ans.front); - ans.popFront(); - } + ans = [["Joe","Carpenter"],["Fred","Fly"]]; + foreach(record; records2) { + assert(equal(record, ans.front)); + ans.popFront(); + } } // Test null header interface unittest { string str = "a,b,c\nHello,65,63.63\nWorld,123,3673.562"; - auto records = csvText(str, cast(string[])null); + auto records = csvReader(str, ["a"]); assert(records.heading == ["a","b","c"]); } @@ -393,7 +400,7 @@ unittest unittest { string str = "one \"quoted\""; - foreach(record; csvText!(string, Malformed.ignore)(str)) + foreach(record; csvReader!(string, Malformed.ignore)(str)) { foreach(cell; record) { @@ -406,7 +413,7 @@ unittest { string a,b; } - foreach(record; csvText!(Ans, Malformed.ignore)(str)) + foreach(record; csvReader!(Ans, Malformed.ignore)(str)) { assert(record.a == "one \"quoted\""); assert(record.b == "two \"quoted\" end"); @@ -418,7 +425,7 @@ unittest { string str = "one,two\r\nthree"; - auto records = csvText(str); + auto records = csvReader(str); auto record = records.front; assert(record.front == "one"); record.popFront(); @@ -431,7 +438,7 @@ unittest /** * Range for iterating CSV records. * - * This range is returned by the csvText functions. It can be + * This range is returned by the csvReader functions. It can be * created in a similar manner to allow for custom separation. * * Example for integer data: @@ -439,7 +446,7 @@ unittest * ------- * string str = `76;^26^;22`; * int[] ans = [76,26,22]; - * auto records = RecordList!(int,Malformed.ignore,string,char) + * auto records = Records!(int,Malformed.ignore,string,char,char[]) * (str, ';', '^'); * * foreach(record; records) { @@ -448,9 +455,10 @@ unittest * ------- * */ -struct RecordList(Contents, Malformed ErrorLevel, Range, Separator) +struct Records(Contents, Malformed ErrorLevel, Range, Separator, Heading) if(isSomeChar!Separator && isInputRange!Range - && isSomeChar!(ElementType!Range) && !is(Contents == class)) + && isSomeChar!(ElementType!Range) && !is(Contents == class) + && isInputRange!Heading) { private: Range _input; @@ -466,12 +474,32 @@ private: else Record!(Contents, ErrorLevel, Range, Separator) recordRange; public: - /// Heading from the input in array form. + /** + * Heading from the input in array form. + * + * ------- + * string str = "a,b,c\nHello,65,63.63"; + * auto records = csvReader(str, ["a"]); + * + * assert(records.heading == ["a","b","c"]); + * ------- + */ Range[] heading; /** * Constructor to initialize the input, delimiter and quote for input * without a heading. + * + * ------- + * string str = `76;^26^;22`; + * int[] ans = [76,26,22]; + * auto records = Records!(int,Malformed.ignore,string,char,string[]) + * (str, ';', '^'); + * + * foreach(record; records) { + * assert(equal(record, ans)); + * } + * ------- */ this(Range input, Separator delimiter, Separator quote) { @@ -492,19 +520,30 @@ public: * Constructor to initialize the input, delimiter and quote for input * with a heading. * + * ------- + * string str = `high;mean;low\n76;^26^;22`; + * int[] ans = [76,22]; + * auto records = Records!(int,Malformed.ignore,string,char,string[]) + * (str, ';', '^',["high","low"]); + * + * foreach(record; records) { + * assert(equal(record, ans)); + * } + * ------- + * * Throws: * HeadingMismatchException when a heading is provided but a * matching column is not found or the order did not match that found * in the input (non-struct). */ - this(Range input, Separator delimiter, Separator quote, string[] colHeaders) + this(Range input, Separator delimiter, Separator quote, Heading colHeaders) { _input = input; _separator = delimiter; _quote = quote; size_t[string] colToIndex; - foreach(i, h; colHeaders) + foreach(h; colHeaders) { colToIndex[h] = size_t.max; } @@ -522,14 +561,15 @@ public: colIndex++; } - indices.length = colHeaders.length; - foreach(i, h; colHeaders) + indices.length = colToIndex.length; + int i; + foreach(h; colHeaders) { immutable index = colToIndex[h]; static if(ErrorLevel != Malformed.ignore) enforceEx!(HeadingMismatchException)(index < size_t.max, "Header not found: " ~ to!string(h)); - indices[i] = index; + indices[i++] = index; } static if(!is(Contents == struct)) @@ -554,6 +594,13 @@ public: } /** + * Part of the $(XREF range, InputRange) interface. + * + * Returns: + * If $(D Contents) is a struct, the struct will be filled with record + * data. + * + * If $(D Contents) is non-struct, a $(LREF Record) will be returned. */ @property auto front() { @@ -570,6 +617,7 @@ public: } /** + * Part of the $(XREF range, InputRange) interface. */ @property bool empty() { @@ -577,7 +625,7 @@ public: } /** - * Brings the next Record into the front of the range. + * Part of the $(XREF range, InputRange) interface. * * Throws: * IncompleteCellException When a quote is found in an unquoted field, @@ -661,7 +709,7 @@ public: unittest { string str = `76;^26^;22`; int[] ans = [76,26,22]; - auto records = RecordList!(int,Malformed.ignore,string,char) + auto records = Records!(int,Malformed.ignore,string,char,string[]) (str, ';', '^'); foreach(record; records) @@ -671,7 +719,7 @@ unittest { } /** - * Returned by a RecordList when Contents is a non-struct. + * Returned by a Records when Contents is a non-struct. */ private struct Record(Contents, Malformed ErrorLevel, Range, Separator) if(!is(Contents == class) && !is(Contents == struct)) @@ -685,7 +733,7 @@ private: bool _empty; size_t[] _popCount; public: - /** + /* * params: * input = Pointer to a character input range * delimiter = Separator for each column @@ -722,6 +770,7 @@ public: } /** + * Part of the $(XREF range, InputRange) interface. */ @property Contents front() { @@ -730,6 +779,7 @@ public: } /** + * Part of the $(XREF range, InputRange) interface. */ @property bool empty() { @@ -753,7 +803,7 @@ public: /** - * Brings the next Content into the front of the range. + * Part of the $(XREF range, InputRange) interface. * * Throws: * IncompleteCellException When a quote is found in an unquoted field,