//Written in the D programming language /** * Implements functionality to read Comma Separated Values and its variants * from a input range of dchar. * * Comma Separated Values provide a simple means to transfer and store * tabular data. It has been common for programs to use their own * variant of the CSV format. This parser will loosely follow the * $(WEB tools.ietf.org/html/rfc4180, RFC-4180). CSV input should adhered * to the following criteria, differences from RFC-4180 in parentheses. * * $(UL * $(LI A record is separated by a new line (CRLF,LF,CR)) * $(LI A final record may end with a new line) * $(LI A header may be provided as the first record in input) * $(LI A record has fields separated by a comma (customizable)) * $(LI A field containing new lines, commas, or double quotes * should be enclosed in double quotes (customizable)) * $(LI Double quotes in a field are escaped with a double quote) * $(LI Each record should contain the same number of fields (not enforced)) * ) * * Example: * * ------- * import std.algorithm; * import std.array; * import std.csv; * import std.stdio; * import std.typecons; * * void main() * { * auto text = "Joe,Carpenter,300000\nFred,Blacksmith,400000\r\n"; * * foreach(record; csvReader!(Tuple!(string,string,int))(text)) * { * writefln("%s works as a %s and earns $%d per year", * record[0], record[1], record[2]); * } * } * ------- * * When a input contains a header the Contents can be specified as an * associative array. Passing null to signify that a header is prescient. * * ------- * auto text = "Name,Occupation,Salary\r" * "Joe,Carpenter,300000\nFred,Blacksmith,400000\r\n"; * * foreach(record; csvReader!(string[string]) * (text,cast(string[])null)) * { * writefln("%s works as a %s and earns $%s per year.", * record["Name"], record["Occupation"], * record["Salary"]); * } * ------- * * This module allows content to be iterated by record stored in a struct * or into a range of fields. Upon detection of an error an * IncompleteCellException is thrown (can be disabled). csvNextToken has been * made public to allow for attempted recovery. * * Disabling exceptions will lift many restrictions specified above. A quote * can appear in a field if the field was not quoted. If in a quoted field any * quote by itself, not at the end of a field, will end processing for that * field. The field is ended when there is no input, even if the quote was not * closed. * * See_Also: * $(WEB en.wikipedia.org/wiki/Comma-separated_values, Wikipedia * Comma-separated values) * * Copyright: Copyright 2011 * License: $(WEB www.boost.org/LICENSE_1_0.txt, Boost License 1.0). * Authors: Jesse Phillips * Source: $(PHOBOSSRC std/_csv.d) */ module std.csv; import std.algorithm; import std.array; import std.conv; import std.exception; import std.range; import std.traits; /** * Exception containing the row and column for when an Exception was thrown. * * This Exception will have one of the following as part of its next property. * * $(UL * $(LI IncompleteCellException) * $(LI ConvException) * ) */ class CSVException : Exception { /// size_t row, col; this(size_t row, size_t col, Exception e) { super("(Row: " ~ to!string(row) ~ ", Col: " ~ to!string(col) ~ ") CSV Parse Failure", e); this.row = row; this.col = col; } } /** * Exception thrown when a Token is identified to not be completed: a quote is * found in an unquoted field, data continues after a closing quote, or the * quoted field was not closed before data was empty. * * This Exception will be part of CSVException unless using $(LREF * csvNextToken) directly. */ class IncompleteCellException : Exception { /// Data pulled from input before finding a problem dstring partialData; this(string msg) { super(msg); } } /** * Exception thrown when a header is provided but a matching column is not * found or the order did not match that found in the input (non-struct). */ class HeaderMismatchException : Exception { this(string msg) { super(msg); } } /** * Determines the behavior for when an error is detected. * * Disabling exception will follow this rules: * $(UL * $(LI A quote can appear in a field if the field was not quoted.) * $(LI If in a quoted field any quote by itself, not at the end of a * field, will end processing for that field.) * $(LI The field is ended when there is no input, even if the quote was * not closed.) * $(LI If the given header does not match the order in the input, the * content will return as it is found in the input.) * $(LI If the given header contains columns not found in the input they * will be ignored.) * ) * */ enum Malformed { /// No exceptions are thrown due to incorrect CSV. ignore, /// Use exceptions when input is incorrect CSV. throwException } /** * Builds a $(LREF Records) struct for iterating over records found in $(D * input). * * This function simplifies the process for standard text input. * For other input, delimited by colon, create Records yourself. * * The $(D Contents) of the input can be provided if all the records are the * same type such as all integer data: * * ------- * string str = `76,26,22`; * int[] ans = [76,26,22]; * auto records = csvReader!int(str); * * foreach(record; records) { * assert(equal(record, ans)); * } * ------- * * Example using a struct with modified delimiter: * * ------- * string str = "Hello;65;63.63\nWorld;123;3673.562"; * struct Layout { * string name; * int value; * double other; * } * * auto records = csvReader!Layout(str,';'); * * foreach(record; records) { * writeln(record.name); * writeln(record.value); * writeln(record.other); * } * ------- * * An optional $(D header) can be provided. The first record will be read in * as the header. If $(D Contents) is a struct then the header provided is * expected to correspond to the fields in the struct. When $(D Contents) is * non-struct the $(D header) must be provided in the same order as the input * or an exception is thrown. * * Read only column "b": * * ------- * string str = "a,b,c\nHello,65,63.63\nWorld,123,3673.562"; * auto records = csvReader(str, ["b"]); * * auto ans = [["65"],["123"]]; * foreach(record; records) { * assert(equal(record, ans.front)); * ans.popFront(); * } * ------- * * Read from header of different order: * * ------- * string str = "a,b,c\nHello,65,63.63\nWorld,123,3673.562"; * struct Layout * { * int value; * double other; * string name; * } * * auto records = csvReader!Layout(str, ["b","c","a"]); * ------- * * The header can also be left empty if the input contains a header but * all columns should be iterated. The header from the input can always * be accessed from the header field. * * ------- * string str = "a,b,c\nHello,65,63.63\nWorld,123,3673.562"; * auto records = csvReader(str, cast(string[])null); * * assert(records.header == ["a","b","c"]); * ------- * * $(LINK2 http://d.puremagic.com/issues/show_bug.cgi?id=2394, IFTI fails for * nulls) prevents just sending null or [] as a header. * * Returns: * $(LREF Records) struct which provides a $(XREF range, isInputRange) of * each record. * * Throws: * $(LREF CSVException) When a quote is found in an unquoted field, * data continues after a closing quote, the quoted field was not * closed before data was empty, or a conversion failed. * * $(LREF HeaderMismatchException) when a header is provided but a * matching column is not found or the order did not match that found in * the input (non-struct). */ auto csvReader(Contents = string,Malformed ErrorLevel = Malformed.throwException, Range, Separator = char)(Range input, Separator delimiter = ',', Separator quote = '"') if(isInputRange!Range && is(ElementType!Range == dchar) && isSomeChar!(Separator) && !is(Contents == class) && !is(Contents T : T[U], U : string)) { return Records!(Contents,ErrorLevel,Range, ElementType!Range,string[]) (input, delimiter, quote); } /// Ditto auto csvReader(Contents = string,Malformed ErrorLevel = Malformed.throwException, Range, Header, Separator = char) (Range input, Header header, Separator delimiter = ',', Separator quote = '"') if(isInputRange!Range && is(ElementType!Range == dchar) && isSomeChar!(Separator) && !is(Contents == class) && isForwardRange!Header && isSomeString!(ElementType!Header)) { return Records!(Contents,ErrorLevel,Range, ElementType!Range,Header) (input, header, delimiter, quote); } auto csvReader(Contents = string,Malformed ErrorLevel = Malformed.throwException, Range, Header, Separator = char) (Range input, Header header, Separator delimiter = ',', Separator quote = '"') if(isInputRange!Range && is(ElementType!Range == dchar) && isSomeChar!(Separator) && !is(Contents == class) && is(Header : void*)) { return Records!(Contents,ErrorLevel,Range, ElementType!Range,string[]) (input, cast(string[])null, delimiter, quote); } // Test standard iteration over input. unittest { string str = `one,two,"three ""quoted""","",` ~ "\"five\nnew line\"\nsix"; auto records = csvReader(str); int count; foreach(record; records) { foreach(cell; record) { count++; } } assert(count == 6); } // Test newline on last record unittest { string str = "one,two\nthree,four\n"; auto records = csvReader(str); records.popFront(); records.popFront(); assert(records.empty); } // Test structure conversion interface with unicode. unittest { wstring str = "\U00010143Hello,65,63.63\nWorld,123,3673.562"w; struct Layout { string name; int value; double other; } Layout ans[2]; ans[0].name = "\U00010143Hello"; ans[0].value = 65; ans[0].other = 663.63; ans[1].name = "World"; ans[1].value = 65; ans[1].other = 663.63; auto records = csvReader!Layout(str); int count; foreach(record; records) { ans[count].name = record.name; ans[count].value = record.value; ans[count].other = record.other; count++; } assert(count == ans.length); } // Test input conversion interface unittest { string str = `76,26,22`; int[] ans = [76,26,22]; auto records = csvReader!int(str); foreach(record; records) { assert(equal(record, ans)); } } // Test struct & header interface and same unicode unittest { string str = "a,b,c\nHello,65,63.63\n➊➋➂❹,123,3673.562"; struct Layout { int value; double other; string name; } auto records = csvReader!Layout(str, ["b","c","a"]); Layout ans[2]; ans[0].name = "Hello"; ans[0].value = 65; ans[0].other = 63.63; ans[1].name = "➊➋➂❹"; ans[1].value = 123; ans[1].other = 3673.562; int count; foreach (record; records) { assert(ans[count].name == record.name); assert(ans[count].value == record.value); assert(ans[count].other == record.other); count++; } assert(count == ans.length); } // Test header interface unittest { string str = "a,b,c\nHello,65,63.63\nWorld,123,3673.562"; auto records = csvReader(str, ["b"]); auto ans = [["65"],["123"]]; foreach(record; records) { assert(equal(record, ans.front)); ans.popFront(); } try { records = csvReader(str, ["b","a"]); assert(0); } catch(HeaderMismatchException e) { } auto records2 = csvReader!(string,Malformed.ignore) (str, ["b","a"], ',', '"'); ans = [["Hello","65"],["World","123"]]; foreach(record; records2) { assert(equal(record, ans.front)); ans.popFront(); } str = "a,c,e\nJoe,Carpenter,300000\nFred,Fly,4"; records2 = csvReader!(string,Malformed.ignore) (str, ["a","b","c","d"], ',', '"'); ans = [["Joe","Carpenter"],["Fred","Fly"]]; foreach(record; records2) { assert(equal(record, ans.front)); ans.popFront(); } } // Test null header interface unittest { string str = "a,b,c\nHello,65,63.63\nWorld,123,3673.562"; auto records = csvReader(str, ["a"]); assert(records.header == ["a","b","c"]); } // Test unchecked read unittest { string str = "one \"quoted\""; foreach(record; csvReader!(string,Malformed.ignore)(str)) { foreach(cell; record) { assert(cell == "one \"quoted\""); } } str = "one \"quoted\",two \"quoted\" end"; struct Ans { string a,b; } foreach(record; csvReader!(Ans,Malformed.ignore)(str)) { assert(record.a == "one \"quoted\""); assert(record.b == "two \"quoted\" end"); } } // Test partial data returned unittest { string str = "\"one\nnew line"; try { foreach(record; csvReader(str)) {} assert(0); } catch (IncompleteCellException ice) { assert(ice.partialData == "one\nnew line"); } } // Test Windows line break unittest { string str = "one,two\r\nthree"; auto records = csvReader(str); auto record = records.front; assert(record.front == "one"); record.popFront(); assert(record.front == "two"); records.popFront(); record = records.front; assert(record.front == "three"); } // Test associative array support with unicode separator unittest { string str = "1❁2❁3\n34❁65❁63\n34❁65❁63"; auto records = csvReader!(string[string])(str,["3","1"],'❁'); int count; foreach(record; records) { count++; assert(record["1"] == "34"); assert(record["3"] == "63"); } assert(count == 2); } // Test restricted range unittest { import std.typecons; struct InputRange { dstring text; this(dstring txt) { text = txt; } auto empty() { return text.empty(); } auto popFront() { text.popFront(); } dchar front() { return text[0]; } } auto ir = InputRange("Name,Occupation,Salary\r"d "Joe,Carpenter,300000\nFred,Blacksmith,400000\r\n"d); foreach(record; csvReader(ir, cast(string[])null)) foreach(cell; record) {} foreach(record; csvReader!(Tuple!(string,string,int)) (ir,cast(string[])null)) {} foreach(record; csvReader!(string[string]) (ir,cast(string[])null)) {} } /** * Range for iterating CSV records. * * This range is returned by the csvReader functions. It can be * created in a similar manner to allow $(D ErrorLevel) be set to $(LREF * Malformed).ignore if best guess processing should take place. * * Example for integer data: * * ------- * string str = `76;^26^;22`; * int[] ans = [76,26,22]; * auto records = Records!(int,Malformed.ignore,string,char,string[]) * (str, ';', '^'); * * foreach(record; records) { * assert(equal(record, ans)); * } * ------- * */ struct Records(Contents, Malformed ErrorLevel, Range, Separator, Header) if(isSomeChar!Separator && isInputRange!Range && is(ElementType!Range == dchar) && !is(Contents == class) && isForwardRange!Header && isSomeString!(ElementType!Header)) { private: Range _input; Separator _separator; Separator _quote; size_t[] indices; uint _row; bool _empty; static if(is(Contents == struct)) { Contents recordContent; Record!(string, ErrorLevel, Range, Separator) recordRange; } else static if(is(Contents T : T[U], U : string)) { Contents recordContent; Record!(T, ErrorLevel, Range, Separator) recordRange; } else Record!(Contents, ErrorLevel, Range, Separator) recordRange; public: /** * Header from the input in array form. * * ------- * string str = "a,b,c\nHello,65,63.63"; * auto records = csvReader(str, ["a"]); * * assert(records.header == ["a","b","c"]); * ------- */ string[] header; /** * Constructor to initialize the input, delimiter and quote for input * without a header. * * ------- * string str = `76;^26^;22`; * int[] ans = [76,26,22]; * auto records = Records!(int,Malformed.ignore,string,char,string[]) * (str, ';', '^'); * * foreach(record; records) { * assert(equal(record, ans)); * } * ------- */ this(Range input, Separator delimiter, Separator quote) { _input = input; _separator = delimiter; _quote = quote; static if(is(Contents == struct)) { indices.length = FieldTypeTuple!(Contents).length; foreach(i, j; FieldTypeTuple!Contents) indices[i] = i; } prime(); } /** * Constructor to initialize the input, delimiter and quote for input * with a header. * * ------- * string str = `high;mean;low\n76;^26^;22`; * auto records = Records!(int,Malformed.ignore,string,char,string[]) * (str, ["high","low"], ';', '^'); * * int[] ans = [76,22]; * foreach(record; records) { * assert(equal(record, ans)); * } * ------- * * Throws: * $(LREF HeaderMismatchException) when a header is provided but a * matching column is not found or the order did not match that found * in the input (non-struct). */ this(Range input, Header colHeaders, Separator delimiter, Separator quote) { _input = input; _separator = delimiter; _quote = quote; size_t[string] colToIndex; foreach(h; colHeaders) { colToIndex[h] = size_t.max; } auto r = Record!(string, ErrorLevel, Range, Separator) (&_input, _separator, _quote, indices); size_t colIndex; foreach(col; r) { header ~= col; auto ptr = col in colToIndex; if(ptr) *ptr = colIndex; colIndex++; } indices.length = colToIndex.length; int i; foreach(h; colHeaders) { immutable index = colToIndex[h]; static if(ErrorLevel != Malformed.ignore) enforceEx!(HeaderMismatchException)(index < size_t.max, "Header not found: " ~ to!string(h)); indices[i++] = index; } static if(!is(Contents == struct)) { static if(is(Contents T : T[U], U : string)) { sort(indices); } else static if(ErrorLevel == Malformed.ignore) { sort(indices); } else { enforceEx!(HeaderMismatchException)(isSorted(indices), "Header in input does not match specified header."); } } popFront(); } this(this) { recordRange._input = &_input; } /** * Part of the $(XREF range, isInputRange) interface. * * Returns: * If $(D Contents) is a struct, the struct will be filled with record * data. * * If $(D Contents) is non-struct, a $(LREF Record) will be returned. */ @property auto front() { assert(!empty); static if(is(Contents == struct)) { return recordContent; } else static if(is(Contents T : T[U], U : string)) { return recordContent; } else { recordRange._input = &_input; return recordRange; } } /** * Part of the $(XREF range, isInputRange) interface. */ @property bool empty() { return _empty; } /** * Part of the $(XREF range, isInputRange) interface. * * Throws: * $(LREF CSVException) When a quote is found in an unquoted field, * data continues after a closing quote, the quoted field was not * closed before data was empty, or a conversion failed. */ void popFront() { recordRange._input = &_input; while(!recordRange.empty) { recordRange.popFront(); } if(!_input.empty) { if(_input.front == '\r') { _input.popFront(); if(_input.front == '\n') _input.popFront(); } else if(_input.front == '\n') _input.popFront(); } if(_input.empty) _empty = true; prime(); } private void prime() { if(_empty) return; _row++; static if(is(Contents == struct)) { recordRange = typeof(recordRange) (&_input, _separator, _quote, null); } else { recordRange = typeof(recordRange) (&_input, _separator, _quote, indices); } recordRange._row = _row; static if(is(Contents T : T[U], U : string)) { T[U] aa; try { for(; !recordRange.empty; recordRange.popFront()) { aa[header[recordRange._col-1]] = recordRange.front; } } catch(ConvException e) { throw new CSVException(_row, recordRange._col, e); } recordContent = aa; } else static if(is(Contents == struct)) { size_t colIndex; try { foreach(colData; recordRange) { scope(exit) colIndex++; if(indices.length > 0) { foreach(ti, ToType; FieldTypeTuple!(Contents)) { if(indices[ti] == colIndex) { recordContent.tupleof[ti] = to!ToType(colData); } } } else { foreach(ti, ToType; FieldTypeTuple!(Contents)) { if(ti == colIndex) recordContent.tupleof[ti] = to!ToType(colData); } } } } catch(ConvException e) { throw new CSVException(_row, colIndex, e); } } } } unittest { string str = `76;^26^;22`; int[] ans = [76,26,22]; auto records = Records!(int,Malformed.ignore,string,char,string[]) (str, ';', '^'); foreach(record; records) { assert(equal(record, ans)); } } /** * Returned by a Records when Contents is a non-struct. */ private struct Record(Contents, Malformed ErrorLevel, Range, Separator) if(!is(Contents == class) && !is(Contents == struct)) { private: Range* _input; Separator _separator; Separator _quote; Contents curContentsoken; typeof(appender!(dchar[])()) _front; bool _empty; size_t _col, _row; size_t[] _popCount; public: /* * params: * input = Pointer to a character input range * delimiter = Separator for each column * quote = Character used for quotation * indices = An array containing which columns will be returned. * If empty, all columns are returned. List must be in order. */ this(Range* input, Separator delimiter, Separator quote, size_t[] indices) { _input = input; _separator = delimiter; _quote = quote; _front = appender!(dchar[])(); _popCount = indices.dup; // If a header was given, each call to popFront will need // to eliminate so many tokens. This calculates // how many will be skipped to get to the next header column size_t normalizer; foreach(ref c; _popCount) { static if(ErrorLevel == Malformed.ignore) { // If we are not throwing exceptions // a header may not exist, indices are sorted // and will be size_t.max if not found. if(c == size_t.max) break; } c -= normalizer; normalizer += c + 1; } prime(); } /** * Part of the $(XREF range, isInputRange) interface. */ @property Contents front() { assert(!empty); return curContentsoken; } /** * Part of the $(XREF range, isInputRange) interface. */ @property bool empty() { return _empty; } /* * Record is complete when input * is empty or starts with record break */ private bool recordEnd() { if((*_input).empty || (*_input).front == '\n' || (*_input).front == '\r') { return true; } return false; } /** * Part of the $(XREF range, isInputRange) interface. * * Throws: * $(LREF CSVException) When a quote is found in an unquoted field, * data continues after a closing quote, the quoted field was not * closed before data was empty, or a conversion failed. */ void popFront() { // Skip last of record when header is depleted. if(_popCount && _popCount.empty) while(!recordEnd()) { prime(1); } if(recordEnd()) { _empty = true; return; } // Separator is left on the end of input from the last call. // This cannot be moved to after the call to csvNextToken as // there may be an empty record after it. if((*_input).front == _separator) (*_input).popFront(); _front.shrinkTo(0); prime(); } /* * Handles moving to the next skipNum token. */ private void prime(size_t skipNum) { foreach(i; 0..skipNum) { _col++; _front.shrinkTo(0); if((*_input).front == _separator) (*_input).popFront(); try { csvNextToken!(Range, ErrorLevel, Separator) (*_input, _front, _separator, _quote,false); } catch(Exception e) { throw new CSVException(_row, _col, e); } } } private void prime() { _col++; try csvNextToken!(Range, ErrorLevel, Separator) (*_input, _front, _separator, _quote,false); catch(IncompleteCellException ice) { ice.partialData = _front.data.idup; throw ice; } auto skipNum = _popCount.empty ? 0 : _popCount.front; if(!_popCount.empty) _popCount.popFront(); if(skipNum == size_t.max) { while(!recordEnd()) prime(1); _empty = true; return; } try { if(skipNum) prime(skipNum); curContentsoken = to!Contents(_front.data); } catch(Exception e) { throw new CSVException(_row, _col, e); } } } /** * Lower level control over parsing CSV * * This function consumes the input. After each call the input will * start with either a delimiter or record break (\n, \r\n, \r) which * must be removed for subsequent calls. * * ------- * string str = "65,63\n123,3673"; * * auto a = appender!(char[]); * * csvNextToken(str,a,',','"'); * assert(a.data == "65"); * assert(str == ",63\n123,3673"); * * str.popFront(); * a.shrinkTo(0); * csvNextToken(str,a,',','"'); * assert(a.data == "63"); * assert(str == "\n123,3673"); * * str.popFront(); * a.shrinkTo(0); * csvNextToken(str,a,',','"'); * assert(a.data == "123"); * assert(str == ",3673"); * ------- * * params: * input = Any CSV input * ans = The first field in the input * sep = The character to represent a comma in the specification * quote = The character to represent a quote in the specification * startQuoted = Whether the input should be considered to already be in * quotes * */ void csvNextToken(Range, Malformed ErrorLevel = Malformed.throwException, Separator, Output) (ref Range input, ref Output ans, Separator sep, Separator quote, bool startQuoted = false) if(isSomeChar!Separator && isInputRange!Range && is(ElementType!Range == dchar) && isOutputRange!(Output, dchar)) { bool quoted = startQuoted; bool escQuote; if(input.empty) return; if(input.front == '\n') return; if(input.front == '\r') return; if(input.front == quote) { quoted = true; input.popFront(); } while(!input.empty) { assert(!(quoted && escQuote)); if(!quoted) { // When not quoted the token ends at sep if(input.front == sep) break; if(input.front == '\r') break; if(input.front == '\n') break; } if(!quoted && !escQuote) { if(input.front == quote) { // Not quoted, but quote found static if(ErrorLevel == Malformed.throwException) throw new IncompleteCellException( "Quote located in unquoted token"); else static if(ErrorLevel == Malformed.ignore) ans.put(quote); } else { // Not quoted, non-quote character ans.put(input.front); } } else { if(input.front == quote) { // Quoted, quote found // By turning off quoted and turning on escQuote // I can tell when to add a quote to the string // escQuote is turned to false when it escapes a // quote or is followed by a non-quote (see outside else). // They are mutually exclusive, but provide different // information. if(escQuote) { escQuote = false; quoted = true; ans.put(quote); } else { escQuote = true; quoted = false; } } else { // Quoted, non-quote character if(escQuote) { static if(ErrorLevel == Malformed.throwException) throw new IncompleteCellException( "Content continues after end quote, " ~ "or needs to be escaped."); else static if(ErrorLevel == Malformed.ignore) break; } ans.put(input.front); } } input.popFront(); } static if(ErrorLevel == Malformed.throwException) if(quoted && (input.empty || input.front == '\n' || input.front == '\r')) throw new IncompleteCellException( "Data continues on future lines or trailing quote"); } // Test csvNextToken on simplest form and correct format. unittest { string str = "\U00010143Hello,65,63.63\nWorld,123,3673.562"; auto a = appender!(dchar[]); csvNextToken!string(str,a,',','"'); assert(a.data == "\U00010143Hello"); assert(str == ",65,63.63\nWorld,123,3673.562"); str.popFront(); a.shrinkTo(0); csvNextToken(str,a,',','"'); assert(a.data == "65"); assert(str == ",63.63\nWorld,123,3673.562"); str.popFront(); a.shrinkTo(0); csvNextToken(str,a,',','"'); assert(a.data == "63.63"); assert(str == "\nWorld,123,3673.562"); str.popFront(); a.shrinkTo(0); csvNextToken(str,a,',','"'); assert(a.data == "World"); assert(str == ",123,3673.562"); str.popFront(); a.shrinkTo(0); csvNextToken(str,a,',','"'); assert(a.data == "123"); assert(str == ",3673.562"); str.popFront(); a.shrinkTo(0); csvNextToken(str,a,',','"'); assert(a.data == "3673.562"); assert(str == ""); } // Test quoted tokens unittest { string str = `one,two,"three ""quoted""","",` ~ "\"five\nnew line\"\nsix"; auto a = appender!(dchar[]); csvNextToken!string(str,a,',','"'); assert(a.data == "one"); assert(str == `,two,"three ""quoted""","",` ~ "\"five\nnew line\"\nsix"); str.popFront(); a.shrinkTo(0); csvNextToken(str,a,',','"'); assert(a.data == "two"); assert(str == `,"three ""quoted""","",` ~ "\"five\nnew line\"\nsix"); str.popFront(); a.shrinkTo(0); csvNextToken(str,a,',','"'); assert(a.data == "three \"quoted\""); assert(str == `,"",` ~ "\"five\nnew line\"\nsix"); str.popFront(); a.shrinkTo(0); csvNextToken(str,a,',','"'); assert(a.data == ""); assert(str == ",\"five\nnew line\"\nsix"); str.popFront(); a.shrinkTo(0); csvNextToken(str,a,',','"'); assert(a.data == "five\nnew line"); assert(str == "\nsix"); str.popFront(); a.shrinkTo(0); csvNextToken(str,a,',','"'); assert(a.data == "six"); assert(str == ""); } // Test empty data is pulled at end of record. unittest { string str = "one,"; auto a = appender!(dchar[]); csvNextToken(str,a,',','"'); assert(a.data == "one"); assert(str == ","); a.shrinkTo(0); csvNextToken(str,a,',','"'); assert(a.data == ""); } // Test exceptions unittest { string str = "\"one\nnew line"; typeof(appender!(dchar[])()) a; try { a = appender!(dchar[]); csvNextToken(str,a,',','"'); assert(0); } catch (IncompleteCellException ice) { assert(a.data == "one\nnew line"); assert(str == ""); } str = "Hello world\""; try { a = appender!(dchar[]); csvNextToken(str,a,',','"'); assert(0); } catch (IncompleteCellException ice) { assert(a.data == "Hello world"); assert(str == "\""); } str = "one, two \"quoted\" end"; a = appender!(dchar[]); csvNextToken!(string,Malformed.ignore)(str,a,',','"'); assert(a.data == "one"); str.popFront(); a.shrinkTo(0); csvNextToken!(string,Malformed.ignore)(str,a,',','"'); assert(a.data == " two \"quoted\" end"); } // Test modifying token delimiter unittest { string str = `one|two|/three "quoted"/|//`; auto a = appender!(dchar[]); csvNextToken(str,a, '|','/'); assert(a.data == "one"d); assert(str == `|two|/three "quoted"/|//`); str.popFront(); a.shrinkTo(0); csvNextToken(str,a, '|','/'); assert(a.data == "two"d); assert(str == `|/three "quoted"/|//`); str.popFront(); a.shrinkTo(0); csvNextToken(str,a, '|','/'); assert(a.data == `three "quoted"`); assert(str == `|//`); str.popFront(); a.shrinkTo(0); csvNextToken(str,a, '|','/'); assert(a.data == ""d); }